aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-06-13 19:31:46 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-31 18:56:55 +0000
commitaf732203b8f7f006927528db5497f5cbc4c4742a (patch)
tree596f112de3b76118552871dbb6114bb7e3e17f40 /contrib/llvm-project/llvm
parent83dea422ac8d4a8323e64203c2eadaa813768717 (diff)
downloadsrc-af732203b8f7f006927528db5497f5cbc4c4742a.tar.gz
src-af732203b8f7f006927528db5497f5cbc4c4742a.zip
Merge llvm-project 12.0.1 release and follow-up fixes
Merge llvm-project main llvmorg-12-init-17869-g8e464dd76bef This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-12-init-17869-g8e464dd76bef, the last commit before the upstream release/12.x branch was created. PR: 255570 (cherry picked from commit e8d8bef961a50d4dc22501cde4fb9fb0be1b2532) Merge llvm-project 12.0.0 release This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-12.0.0-0-gd28af7c654d8, a.k.a. 12.0.0 release. PR: 255570 (cherry picked from commit d409305fa3838fb39b38c26fc085fb729b8766d5) Disable strict-fp for powerpcspe, as it does not work properly yet Merge commit 5c18d1136665 from llvm git (by Qiu Chaofan) [SPE] Disable strict-fp for SPE by default As discussed in PR50385, strict-fp on PowerPC SPE has not been handled well. This patch disables it by default for SPE. Reviewed By: nemanjai, vit9696, jhibbits Differential Revision: https://reviews.llvm.org/D103235 PR: 255570 (cherry picked from commit 715df83abc049b23d9acddc81f2480bd4c056d64) Apply upstream libc++ fix to allow building with devel/xxx-xtoolchain-gcc Merge commit 52e9d80d5db2 from llvm git (by Jason Liu): [libc++] add `inline` for __open's definition in ifstream and ofstream Summary: When building with gcc on AIX, it seems that gcc does not like the `always_inline` without the `inline` keyword. So adding the inline keywords in for __open in ifstream and ofstream. That will also make it consistent with __open in basic_filebuf (it seems we added `inline` there before for gcc build as well). Differential Revision: https://reviews.llvm.org/D99422 PR: 255570 (cherry picked from commit d099db25464b826c5724cf2fb5b22292bbe15f6e) Undefine HAVE_(DE)REGISTER_FRAME in llvm's config.h on arm Otherwise, the lli tool (enable by WITH_CLANG_EXTRAS) won't link on arm, stating that __register_frame is undefined. This function is normally provided by libunwind, but explicitly not for the ARM Exception ABI. Reported by: oh PR: 255570 (cherry picked from commit f336b45e943c7f9a90ffcea1a6c4c7039e54c73c) Merge llvm-project 12.0.1 rc2 This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-12.0.1-rc2-0-ge7dac564cd0e, a.k.a. 12.0.1 rc2. PR: 255570 (cherry picked from commit 23408297fbf3089f0388a8873b02fa75ab3f5bb9) Revert libunwind change to fix backtrace segfault on aarch64 Revert commit 22b615a96593 from llvm git (by Daniel Kiss): [libunwind] Support for leaf function unwinding. Unwinding leaf function is useful in cases when the backtrace finds a leaf function for example when it caused a signal. This patch also add the support for the DW_CFA_undefined because it marks the end of the frames. Ryan Prichard provided code for the tests. Reviewed By: #libunwind, mstorsjo Differential Revision: https://reviews.llvm.org/D83573 Reland with limit the test to the x86_64-linux target. Bisection has shown that this particular upstream commit causes programs using backtrace(3) on aarch64 to segfault. This affects the lang/rust port, for instance. Until we can upstream to fix this problem, revert the commit for now. Reported by: mikael PR: 256864 (cherry picked from commit 5866c369e4fd917c0d456f0f10b92ee354b82279) Merge llvm-project 12.0.1 release This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-12.0.1-0-gfed41342a82f, a.k.a. 12.0.1 release. PR: 255570 (cherry picked from commit 4652422eb477731f284b1345afeefef7f269da50) compilert-rt: build out-of-line LSE atomics helpers for aarch64 Both clang >= 12 and gcc >= 10.1 now default to -moutline-atomics for aarch64. This requires a bunch of helper functions in libcompiler_rt.a, to avoid link errors like "undefined symbol: __aarch64_ldadd8_acq_rel". (Note: of course you can use -mno-outline-atomics as a workaround too, but this would negate the potential performance benefit of the faster LSE instructions.) Bump __FreeBSD_version so ports maintainers can easily detect this. PR: 257392 (cherry picked from commit cc55ee8009a550810d38777fd6ace9abf3a2f6b4)
Diffstat (limited to 'contrib/llvm-project/llvm')
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Core.h60
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/DataTypes.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Error.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/LLJIT.h213
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/LinkTimeOptimizer.h66
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Orc.h452
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/OrcBindings.h169
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/OrcEE.h55
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/APFixedPoint.h237
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/APInt.h44
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/APSInt.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/AllocatorList.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Any.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h40
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h35
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/DenseMapInfo.h45
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/DenseSet.h21
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/DepthFirstIterator.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/FloatingPointMode.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/IntervalMap.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h45
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Optional.h165
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h104
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Sequence.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SetVector.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h61
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h549
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SparseSet.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/StringExtras.h109
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/StringMap.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/StringSet.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Triple.h67
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/iterator.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/simple_ilist.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h104
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/AssumptionCache.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h63
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h287
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/CFGPrinter.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/CGSCCPassManager.h540
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/CallGraph.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/CaptureTracking.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/CodeMetrics.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ConstantFolding.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h88
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/DDGPrinter.h91
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Delinearization.h33
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/DemandedBits.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/DivergenceAnalysis.h85
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/DominanceFrontier.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/EHPersonalities.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h86
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h789
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/IVDescriptors.h154
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/InlineAdvisor.h82
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/InlineFeaturesAnalysis.h45
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/InstCount.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h42
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Interval.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/IntervalIterator.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h51
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Lint.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopAnalysisManager.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopCacheAnalysis.h29
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/MLInlineAdvisor.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/MemDerefPrinter.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/MemoryLocation.h68
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h61
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSAUpdater.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h29
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ProfileSummaryInfo.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfo.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfoImpl.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/RegionPass.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h41
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h279
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h135
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/SparsePropagation.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/StackLifetime.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/StackSafetyAnalysis.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h42
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.def21
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h271
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h346
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h (renamed from contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h)6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h76
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Utils/TFUtils.h203
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ValueLattice.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h107
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def146
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h56
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h93
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/DynamicTags.def1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h92
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def74
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/MachO.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h49
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/WasmRelocs.def5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/WasmTraits.h (renamed from contrib/llvm-project/llvm/include/llvm/Object/WasmTraits.h)6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h111
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeCommon.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeConvenience.h486
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Bitstream/BitCodes.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamWriter.h103
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/Analysis.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/AntiDepBreaker.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h131
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinterHandler.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h491
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/CalcSpillWeights.h70
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h1144
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/DebugHandlerBase.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h43
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h150
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h268
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h102
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h45
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h77
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h124
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h82
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h80
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h102
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h162
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h23
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h130
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h108
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LexicalScopes.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegMatrix.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h54
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MBFIWrapper.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MIRYamlMapping.h51
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h31
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h23
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineConstantPool.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h58
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h40
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h41
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassManager.h256
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassRegistry.def197
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineRegisterInfo.h42
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSSAUpdater.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineStableHash.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineTraceMetrics.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MultiHazardRecognizer.h47
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/RDFLiveness.h38
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/RDFRegisters.h43
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h45
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/RegAllocPBQP.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/Register.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterPressure.h34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterScavenging.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/RuntimeLibcalls.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h138
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h240
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h45
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/StableHashing.h112
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h62
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h62
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h121
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h130
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetPassConfig.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetRegisterInfo.h96
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TileShapeInfo.h97
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h96
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td209
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/VirtRegMap.h35
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h104
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h33
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h31
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFStreamer.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def36
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/RecordName.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DIContext.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h66
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h37
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h63
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h41
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h46
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h48
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h203
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h21
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITEventListener.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h33
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h91
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h639
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h611
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h110
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h111
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h100
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h80
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h55
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h49
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h84
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Layer.h42
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h267
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h211
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h43
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h57
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h84
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h415
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h263
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h280
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h21
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h387
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h564
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h79
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcError.h (renamed from contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h)0
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h (renamed from contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCUtils.h)252
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h (renamed from contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h)25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h (renamed from contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h)367
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h165
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h66
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h54
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h222
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h620
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h41
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h38
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h218
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/FileCheck/FileCheck.h (renamed from contrib/llvm-project/llvm/include/llvm/Support/FileCheck.h)43
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/Directive/DirectiveBase.td52
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenACC/ACC.td132
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMP.td234
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h44
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPContext.h49
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h402
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def1046
-rw-r--r--contrib/llvm-project/llvm/include/llvm/FuzzMutate/IRMutator.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Argument.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Assumptions.h50
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Attributes.h78
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Attributes.td32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/BasicBlock.h88
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Constant.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Constants.h99
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h44
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/DataLayout.h54
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h238
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/DebugLoc.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/DerivedTypes.h76
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Dominators.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/FixedMetadataKinds.def5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/FixedPointBuilder.h465
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Function.h46
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/GetElementPtrTypeIterator.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/GlobalObject.h69
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/GlobalVariable.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h175
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h23
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h149
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Instruction.h70
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Instructions.h78
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h79
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td696
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td471
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td142
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsBPF.td10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td113
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td584
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td1024
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsVE.td35
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td1213
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td196
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsX86.td153
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/LLVMRemarkStreamer.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/MDBuilder.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/MatrixBuilder.h39
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Metadata.def2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Metadata.h48
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Module.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Operator.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/OptBisect.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PassInstrumentation.h117
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PassManager.h138
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PassManagerImpl.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PassTimingInfo.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h272
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PredIteratorCache.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PrintPasses.h44
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PseudoProbe.h87
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/ReplaceConstant.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Statepoint.h50
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/StructuralHash.h34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/SymbolTableListTraits.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Type.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/User.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/VPIntrinsics.def140
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Value.def18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Value.h104
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Verifier.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/InitializePasses.h63
-rw-r--r--contrib/llvm-project/llvm/include/llvm/InterfaceStub/ELFObjHandler.h47
-rw-r--r--contrib/llvm-project/llvm/include/llvm/InterfaceStub/ELFStub.h (renamed from contrib/llvm-project/llvm/include/llvm/TextAPI/ELF/ELFStub.h)8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/InterfaceStub/TBEHandler.h (renamed from contrib/llvm-project/llvm/include/llvm/TextAPI/ELF/TBEHandler.h)2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LTO/Config.h29
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LTO/LTO.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOModule.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h50
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCAsmMacro.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCCodeView.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCContext.h31
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCELFObjectWriter.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCExpr.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCFixup.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h52
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCInst.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h31
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmLexer.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmLexer.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmParser.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h178
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCRegister.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSchedule.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h51
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSubtargetInfo.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCWasmObjectWriter.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCWasmStreamer.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCWin64EH.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCWinCOFFStreamer.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCWinEH.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/StringTableBuilder.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/SubtargetFeature.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/ArchiveWriter.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/Binary.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/COFF.h23
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/ELF.h736
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h294
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/ELFTypes.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/MachO.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/MachOUniversal.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/MachOUniversalWriter.h102
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/RelocationResolver.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/StackMapParser.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/SymbolicFile.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/Wasm.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h101
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/ArchiveYAML.h77
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFEmitter.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h197
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h332
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/MachOYAML.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/MinidumpYAML.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/ObjectYAML.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/WasmYAML.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/yaml2obj.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Option/ArgList.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Option/OptParser.td93
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Option/OptTable.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Option/Option.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Pass.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/PassAnalysisSupport.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h188
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Passes/StandardInstrumentations.h232
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h70
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/GCOV.h184
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h40
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc139
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfWriter.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/ProfileCommon.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h325
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h164
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h197
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Remarks/BitstreamRemarkParser.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Remarks/HotnessThresholdParser.h63
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def35
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h72
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/ARMWinEH.h87
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AlignOf.h35
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Allocator.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AtomicOrdering.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/BinaryItemStream.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/CFGDiff.h225
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/CFGUpdate.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/CheckedArithmetic.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Compiler.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/DOTGraphTraits.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Error.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/ErrorHandling.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/ErrorOr.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/ExitCodes.h33
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h78
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/FileSystem.h88
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/FileSystem/UniqueID.h52
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h62
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/GenericDomTreeConstruction.h293
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/GlobPattern.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/GraphWriter.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Host.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/InstructionCost.h238
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/JSON.h205
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h145
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h342
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/MemoryBuffer.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/MemoryBufferRef.h56
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Parallel.h104
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Path.h38
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/PluginLoader.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Process.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Program.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/RISCVTargetParser.def14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Signals.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Signposts.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/SuffixTree.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/SwapByteOrder.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/SymbolRemappingReader.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def77
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TargetParser.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TaskQueue.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Threading.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/ToolOutputFile.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TrigramIndex.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h531
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h31
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Win64EH.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.def9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.h19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/YAMLParser.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h92
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h114
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TableGen/DirectiveEmitter.h211
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TableGen/Error.h19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TableGen/Record.h362
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TableGen/SearchableTable.td26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/CGPassBuilderOption.h65
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td602
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td353
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/Target.td396
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetItinerary.td2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetLoweringObjectFile.h36
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h70
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h67
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetPfmCounters.td2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetSchedule.td28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td271
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Testing/Support/SupportHelpers.h138
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TextAPI/MachO/Platform.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroElide.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h23
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Annotation2Metadata.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h658
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/BlockExtractor.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/IROutliner.h358
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Inliner.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/LoopExtractor.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h152
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleProfile.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h147
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/StripSymbols.h47
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h528
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h51
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/ObjCARC.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h67
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstraintElimination.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/DCE.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h71
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h414
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopReroll.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopRotation.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h29
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/Reg2Mem.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SROA.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h29
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/StraightLineStrengthReduce.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h129
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Cloning.h43
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Debugify.h65
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/FixIrreducible.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/InstructionNamer.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h118
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopPeel.h40
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h105
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopVersioning.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LowerSwitch.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MatrixUtils.h94
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MetaRenamer.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MisExpect.h43
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/PredicateInfo.h34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h792
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h77
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h23
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyLoopExits.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnrollLoop.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h74
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/module.modulemap20
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp100
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp875
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp914
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CFG.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp558
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CodeMetrics.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp335
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp158
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DDG.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DDGPrinter.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp107
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp531
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp342
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/EHPersonalities.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp88
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp937
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp350
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ImportedFunctionsInliningStatistics.cpp (renamed from contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp)14
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp190
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp263
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InlineFeaturesAnalysis.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp1238
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Interval.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp297
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp342
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Lint.cpp399
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Loads.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopNestAnalysis.cpp136
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp248
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp102
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ObjCARCInstKind.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ReleaseModeModelRunner.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp1996
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionDivision.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/StackLifetime.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp256
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp469
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TFUtils.cpp338
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp168
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp1485
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/models/inliner/README.txt3
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/models/inliner/output_spec.json14
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp3734
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLParser.h444
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLToken.h7
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/MachO.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/Wasm.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/XCOFF.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp291
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp192
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp223
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h128
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp589
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp176
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp185
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp482
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h64
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp282
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp397
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp (renamed from contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp)159
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp289
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp387
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp232
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp128
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp362
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp624
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1983
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp249
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp893
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp1612
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp145
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp390
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp258
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp107
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp3363
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp (renamed from contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp)224
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp244
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp125
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp179
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp169
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp256
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp290
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp194
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp412
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp165
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp151
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp186
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp1486
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp537
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp117
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp270
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2396
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp257
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp551
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp289
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp396
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp1082
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp946
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp776
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp164
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp322
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp1071
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp180
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp272
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp443
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp303
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp270
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp417
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp113
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp139
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp194
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp177
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp195
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBContext.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBExtras.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/UDTLayout.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h6
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp396
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h45
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp334
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h24
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp544
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h16
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h4
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h10
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp2542
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp188
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Layer.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Legacy.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp339
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp158
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h534
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp138
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h502
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp303
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp149
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp (renamed from contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/OrcError.cpp)13
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp (renamed from contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/RPCError.cpp)17
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCEHFrameRegistrar.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp423
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp208
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp153
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h7
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h9
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/FileCheck/FileCheck.cpp (renamed from contrib/llvm-project/llvm/lib/Support/FileCheck.cpp)410
-rw-r--r--contrib/llvm-project/llvm/lib/FileCheck/FileCheckImpl.h (renamed from contrib/llvm-project/llvm/lib/Support/FileCheckImpl.h)55
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPContext.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp830
-rw-r--r--contrib/llvm-project/llvm/lib/FuzzMutate/IRMutator.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp146
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Assumptions.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AttributeImpl.h7
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Attributes.cpp241
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp490
-rw-r--r--contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp151
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Constants.cpp415
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Core.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DataLayout.cpp414
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DebugLoc.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DiagnosticInfo.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Dominators.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Function.cpp128
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Globals.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp212
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Instruction.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Instructions.cpp338
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h225
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LLVMRemarkStreamer.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp156
-rw-r--r--contrib/llvm-project/llvm/lib/IR/MDBuilder.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Mangler.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Metadata.cpp391
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Module.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Operator.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/IR/OptBisect.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Pass.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PassManager.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PassRegistry.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PrintPasses.cpp88
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ProfileSummary.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PseudoProbe.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ReplaceConstant.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Type.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Use.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/IR/User.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Value.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Verifier.cpp431
-rw-r--r--contrib/llvm-project/llvm/lib/IRReader/IRReader.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp680
-rw-r--r--contrib/llvm-project/llvm/lib/InterfaceStub/ELFStub.cpp (renamed from contrib/llvm-project/llvm/lib/TextAPI/ELF/ELFStub.cpp)2
-rw-r--r--contrib/llvm-project/llvm/lib/InterfaceStub/TBEHandler.cpp (renamed from contrib/llvm-project/llvm/lib/TextAPI/ELF/TBEHandler.cpp)33
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/Caching.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTO.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp240
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Linker/IRMover.cpp158
-rw-r--r--contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmMacro.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp152
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp94
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCContext.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCExpr.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCFragment.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp174
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp1903
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/WasmAsmParser.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp213
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSection.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSectionMachO.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSectionWasm.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSymbolXCOFF.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCWasmStreamer.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp487
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCWinCOFFStreamer.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp936
-rw-r--r--contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Object/Archive.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/Object/Binary.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ELF.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Object/MachOUniversal.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Object/MachOUniversalWriter.cpp337
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp286
-rw-r--r--contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Object/SymbolicFile.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp350
-rw-r--r--contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp303
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ArchiveEmitter.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ArchiveYAML.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp1051
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/DWARFVisitor.cpp196
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/DWARFVisitor.h97
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/DWARFYAML.cpp168
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp677
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp478
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/WasmEmitter.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Option/OptTable.cpp109
-rw-r--r--contrib/llvm-project/llvm/lib/Option/Option.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp847
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassRegistry.def84
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp743
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp940
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp340
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp272
-rw-r--r--contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Support/APFixedPoint.cpp574
-rw-r--r--contrib/llvm-project/llvm/lib/Support/APFloat.cpp108
-rw-r--r--contrib/llvm-project/llvm/lib/Support/APInt.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CRC.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CachePruning.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CommandLine.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Compression.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ConvertUTFWrapper.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/DynamicLibrary.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Error.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ErrorHandling.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Support/FileCollector.cpp128
-rw-r--r--contrib/llvm-project/llvm/lib/Support/FormatVariadic.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Host.cpp298
-rw-r--r--contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Support/InstructionCost.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Support/JSON.cpp219
-rw-r--r--contrib/llvm-project/llvm/lib/Support/KnownBits.cpp397
-rw-r--r--contrib/llvm-project/llvm/lib/Support/LineIterator.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Support/LowLevelType.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/MemoryBufferRef.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Path.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Support/PrettyStackTrace.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Process.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Program.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Support/SHA1.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Signals.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Signposts.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Support/SmallVector.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Support/TargetParser.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Timer.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Support/TrigramIndex.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Triple.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unicode.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Path.inc76
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Process.inc6
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Program.inc6
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc29
-rw-r--r--contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp158
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Path.inc61
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Process.inc3
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Program.inc59
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc5
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc25
-rw-r--r--contrib/llvm-project/llvm/lib/Support/X86TargetParser.cpp372
-rw-r--r--contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/DetailedRecordsBackend.cpp203
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/Error.cpp85
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/Main.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/Record.cpp519
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGLexer.h28
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp722
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGParser.h5
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TableGenBackendSkeleton.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td200
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp401
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp575
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp3173
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h137
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td130
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp527
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td334
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h74
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td684
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td339
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td3890
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td745
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h151
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp167
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td253
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp196
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp596
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp1677
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp336
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp523
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp704
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp187
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/select-saddo.mir158
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/select-ssubo.mir158
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td557
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td197
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp162
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp856
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp122
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp601
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp237
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp226
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp1075
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp1101
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h62
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td159
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp195
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp1259
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp120
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp281
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp372
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp172
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h1204
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp252
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp365
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h106
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp1973
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td237
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td119
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp435
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/EXPInstructions.td125
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td138
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td704
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp220
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNILPSched.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp256
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h1064
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/InstCombineTables.td (renamed from contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineTables.td)0
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp217
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td129
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Defines.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Subtarget.h174
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h257
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp144
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp239
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp283
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp406
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp2013
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp241
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp1551
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td552
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td570
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp316
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp109
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h65
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp586
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp208
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp744
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp170
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp204
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td629
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp247
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp460
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h188
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td225
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td592
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td269
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.td109
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp721
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h221
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBlockPlacement.cpp228
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFeatures.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp190
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp737
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td801
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td147
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td51
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td51
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp449
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp1525
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSLSHardening.cpp416
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td66
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td147
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM7.td488
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp858
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp256
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredUtils.h157
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp505
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp460
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp147
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp129
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPF.h37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp323
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFAdjustOpt.cpp323
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp130
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td190
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMIChecking.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetTransformInfo.h61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BTF.def1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKY.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrFormats.td528
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.td108
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYRegisterInfo.td182
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.h38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.h20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp229
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp162
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h909
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp738
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp265
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td119
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td240
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp1487
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp165
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h139
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp257
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp257
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h86
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp674
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp586
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp841
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp3678
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h130
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td170
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp1019
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h106
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td441
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrPrefix.td1735
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td1212
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td375
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp278
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp140
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp240
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp170
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td113
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp326
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp657
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp (renamed from contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp)65
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h406
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp (renamed from contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp)4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h (renamed from contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h)3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp145
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp1095
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp1969
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h125
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td85
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td141
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp117
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td141
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td1029
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td695
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td4416
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td643
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td371
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td389
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td233
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td227
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td228
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td222
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h223
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp158
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp88
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp101
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td54
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp85
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td138
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp402
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp2238
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h150
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp534
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td845
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td1604
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td1510
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td71
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp140
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp116
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h95
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp133
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp676
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp329
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td212
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td1199
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp325
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp123
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp1218
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp225
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86.td868
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CondBrFolding.cpp579
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp168
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp648
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp4966
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InsertWait.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp2017
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td54
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td738
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td192
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp255
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td150
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td86
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td221
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSVM.td28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp184
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXType.cpp351
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PartialReduction.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp265
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp139
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp102
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp117
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp358
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TileConfig.cpp248
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Testing/Support/Annotations.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/MachO/InterfaceFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/MachO/Platform.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/MachO/Target.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStub.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStubCommon.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp149
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp1518
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h181
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h59
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp595
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp109
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/HelloNew/HelloWorld.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Annotation2Metadata.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp542
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp1306
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp271
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp100
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp308
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp1764
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp100
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp1350
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp329
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp585
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp980
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp434
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp335
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp774
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp3334
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp376
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp502
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h407
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp232
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp209
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp149
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp253
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp421
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp266
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp604
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp434
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp422
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp166
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CFGMST.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp859
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp682
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp311
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp109
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp638
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp411
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp324
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp124
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp186
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp226
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp337
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/PtrState.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantProp.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp407
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp339
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp1066
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp432
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp417
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp1383
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp1308
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp160
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp352
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp333
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp129
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp728
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp329
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp534
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp135
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp313
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp148
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp139
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp325
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp110
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp291
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp860
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp127
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp120
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp147
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp419
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp351
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp (renamed from contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp)123
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp188
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp101
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp171
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp146
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp320
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp446
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp273
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp753
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp (renamed from contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp)108
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp108
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp129
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp485
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp403
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp235
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp178
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp225
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp753
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp1893
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp1130
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp113
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp110
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h52
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp3607
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp2251
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp302
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h606
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h214
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp207
-rw-r--r--contrib/llvm-project/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp24
-rw-r--r--contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/bugpoint/ExecutionDriver.cpp18
-rw-r--r--contrib/llvm-project/llvm/tools/bugpoint/ExtractFunction.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/bugpoint/OptimizerDriver.cpp3
-rw-r--r--contrib/llvm-project/llvm/tools/bugpoint/ToolRunner.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llc/llc.cpp124
-rw-r--r--contrib/llvm-project/llvm/tools/lli/ChildTarget/ChildTarget.cpp6
-rw-r--r--contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h72
-rw-r--r--contrib/llvm-project/llvm/tools/lli/lli.cpp28
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp96
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp100
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp99
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/CoverageReport.cpp49
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp38
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.h54
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/CoverageViewOptions.h6
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.cpp23
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.h28
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp76
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.h3
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.cpp50
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.h3
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/gcov.cpp26
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp202
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-dwp/llvm-dwp.cpp56
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-link/llvm-link.cpp53
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-lto/llvm-lto.cpp46
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp21
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp8
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/CodeRegion.cpp1
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.h4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp47
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h13
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.h1
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp127
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.h28
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.cpp60
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.h67
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h3
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp52
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.h10
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h3
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.h1
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp54
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.h18
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp68
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.h11
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/View.cpp3
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/View.h18
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp22
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp1082
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/BitcodeStripOpts.td24
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp38
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.cpp69
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp130
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h9
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp153
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp891
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h264
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/InstallNameToolOpts.td10
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp14
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h8
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp148
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp51
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.h6
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp14
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp59
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h15
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp108
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.h34
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/Object.cpp4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp1
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp49
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp90
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp281
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.h6
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp17
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.cpp7
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.h4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp13
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp1204
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ARMEHABIPrinter.h135
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp211
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.h12
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/COFFDumper.cpp64
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h82
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp5242
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/Error.cpp56
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/Error.h40
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/MachODumper.cpp41
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp52
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h49
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/WasmDumper.cpp16
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp5
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/WindowsResourceDumper.cpp1
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp15
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp157
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-stress/llvm-stress.cpp38
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-symbolizer/Opts.td71
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp383
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp84
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-account.h31
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp3
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp23
-rw-r--r--contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp158
-rw-r--r--contrib/llvm-project/llvm/tools/opt/NewPMDriver.h13
-rw-r--r--contrib/llvm-project/llvm/tools/opt/opt.cpp233
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp139
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp113
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.cpp7
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CallingConvEmitter.cpp18
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp8
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp34
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h8
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h10
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenMapTable.cpp26
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.cpp19
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h4
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.cpp351
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.h13
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp255
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h9
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DAGISelEmitter.cpp14
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcher.h20
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp226
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp10
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.cpp5
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DirectiveEmitter.cpp760
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/ExegesisEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp44
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp55
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp23
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp15
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.h5
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp802
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp39
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp20
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp309
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp22
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h3
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/PseudoLoweringEmitter.cpp114
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp234
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/RegisterBankEmitter.cpp19
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp47
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp229
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/SubtargetEmitter.cpp122
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.h5
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp30
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp15
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp27
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp2
2713 files changed, 238362 insertions, 103464 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Core.h b/contrib/llvm-project/llvm/include/llvm-c/Core.h
index c8a6f970419b..2901ab715810 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Core.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Core.h
@@ -162,7 +162,8 @@ typedef enum {
LLVMX86_MMXTypeKind, /**< X86 MMX */
LLVMTokenTypeKind, /**< Tokens */
LLVMScalableVectorTypeKind, /**< Scalable SIMD vector type */
- LLVMBFloatTypeKind /**< 16 bit brain floating point type */
+ LLVMBFloatTypeKind, /**< 16 bit brain floating point type */
+ LLVMX86_AMXTypeKind /**< X86 AMX */
} LLVMTypeKind;
typedef enum {
@@ -281,6 +282,7 @@ typedef enum {
LLVMInlineAsmValueKind,
LLVMInstructionValueKind,
+ LLVMPoisonValueValueKind
} LLVMValueKind;
typedef enum {
@@ -603,6 +605,17 @@ unsigned LLVMGetEnumAttributeKind(LLVMAttributeRef A);
uint64_t LLVMGetEnumAttributeValue(LLVMAttributeRef A);
/**
+ * Create a type attribute
+ */
+LLVMAttributeRef LLVMCreateTypeAttribute(LLVMContextRef C, unsigned KindID,
+ LLVMTypeRef type_ref);
+
+/**
+ * Get the type attribute's value.
+ */
+LLVMTypeRef LLVMGetTypeAttributeValue(LLVMAttributeRef A);
+
+/**
* Create a string attribute.
*/
LLVMAttributeRef LLVMCreateStringAttribute(LLVMContextRef C,
@@ -624,6 +637,12 @@ const char *LLVMGetStringAttributeValue(LLVMAttributeRef A, unsigned *Length);
*/
LLVMBool LLVMIsEnumAttribute(LLVMAttributeRef A);
LLVMBool LLVMIsStringAttribute(LLVMAttributeRef A);
+LLVMBool LLVMIsTypeAttribute(LLVMAttributeRef A);
+
+/**
+ * Obtain a Type from a context by its registered name.
+ */
+LLVMTypeRef LLVMGetTypeByName2(LLVMContextRef C, const char *Name);
/**
* @}
@@ -866,9 +885,7 @@ LLVMValueRef LLVMGetInlineAsm(LLVMTypeRef Ty,
*/
LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M);
-/**
- * Obtain a Type from a module by its registered name.
- */
+/** Deprecated: Use LLVMGetTypeByName2 instead. */
LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
/**
@@ -1444,9 +1461,21 @@ unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy);
LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount);
/**
- * Obtain the number of elements in a vector type.
+ * Create a vector type that contains a defined type and has a scalable
+ * number of elements.
*
- * This only works on types that represent vectors.
+ * The created type will exist in the context thats its element type
+ * exists in.
+ *
+ * @see llvm::ScalableVectorType::get()
+ */
+LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType,
+ unsigned ElementCount);
+
+/**
+ * Obtain the (possibly scalable) number of elements in a vector type.
+ *
+ * This only works on types that represent vectors (fixed or scalable).
*
* @see llvm::VectorType::getNumElements()
*/
@@ -1478,6 +1507,11 @@ LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C);
LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C);
/**
+ * Create a X86 AMX type in a context.
+ */
+LLVMTypeRef LLVMX86AMXTypeInContext(LLVMContextRef C);
+
+/**
* Create a token type in a context.
*/
LLVMTypeRef LLVMTokenTypeInContext(LLVMContextRef C);
@@ -1494,6 +1528,7 @@ LLVMTypeRef LLVMMetadataTypeInContext(LLVMContextRef C);
LLVMTypeRef LLVMVoidType(void);
LLVMTypeRef LLVMLabelType(void);
LLVMTypeRef LLVMX86MMXType(void);
+LLVMTypeRef LLVMX86AMXType(void);
/**
* @}
@@ -1550,6 +1585,7 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(Function) \
macro(GlobalVariable) \
macro(UndefValue) \
+ macro(PoisonValue) \
macro(Instruction) \
macro(UnaryOperator) \
macro(BinaryOperator) \
@@ -1684,6 +1720,11 @@ LLVMBool LLVMIsConstant(LLVMValueRef Val);
LLVMBool LLVMIsUndef(LLVMValueRef Val);
/**
+ * Determine whether a value instance is poisonous.
+ */
+LLVMBool LLVMIsPoison(LLVMValueRef Val);
+
+/**
* Convert value instances between types.
*
* Internally, an LLVMValueRef is "pinned" to a specific type. This
@@ -1842,6 +1883,13 @@ LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty);
LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty);
/**
+ * Obtain a constant value referring to a poison value of a type.
+ *
+ * @see llvm::PoisonValue::get()
+ */
+LLVMValueRef LLVMGetPoison(LLVMTypeRef Ty);
+
+/**
* Determine whether a value instance is null.
*
* @see llvm::Constant::isNullValue()
diff --git a/contrib/llvm-project/llvm/include/llvm-c/DataTypes.h b/contrib/llvm-project/llvm/include/llvm-c/DataTypes.h
index 0f27ba81865e..4eb0ac97d97e 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/DataTypes.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/DataTypes.h
@@ -77,8 +77,4 @@ typedef signed int ssize_t;
# define UINT64_MAX 0xffffffffffffffffULL
#endif
-#ifndef HUGE_VALF
-#define HUGE_VALF (float)HUGE_VAL
-#endif
-
#endif /* LLVM_C_DATATYPES_H */
diff --git a/contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h b/contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h
index cdf5f5a0cca8..5a9cd8e2ee63 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h
@@ -159,7 +159,9 @@ enum {
LLVMDIImportedEntityMetadataKind,
LLVMDIMacroMetadataKind,
LLVMDIMacroFileMetadataKind,
- LLVMDICommonBlockMetadataKind
+ LLVMDICommonBlockMetadataKind,
+ LLVMDIStringTypeMetadataKind,
+ LLVMDIGenericSubrangeMetadataKind
};
typedef unsigned LLVMMetadataKind;
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Error.h b/contrib/llvm-project/llvm/include/llvm-c/Error.h
index 92f81bf38304..bc702ac7a1bf 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Error.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Error.h
@@ -62,6 +62,11 @@ void LLVMDisposeErrorMessage(char *ErrMsg);
*/
LLVMErrorTypeId LLVMGetStringErrorTypeId(void);
+/**
+ * Create a StringError.
+ */
+LLVMErrorRef LLVMCreateStringError(const char *ErrMsg);
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/LLJIT.h b/contrib/llvm-project/llvm/include/llvm-c/LLJIT.h
new file mode 100644
index 000000000000..28eb8bbff96b
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm-c/LLJIT.h
@@ -0,0 +1,213 @@
+/*===----------- llvm-c/LLJIT.h - OrcV2 LLJIT C bindings --------*- C++ -*-===*\
+|* *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This header declares the C interface to the LLJIT class in *|
+|* libLLVMOrcJIT.a, which provides a simple MCJIT-like ORC JIT. *|
+|* *|
+|* Many exotic languages can interoperate with C code but have a harder time *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages. *|
+|* *|
+|* Note: This interface is experimental. It is *NOT* stable, and may be *|
+|* changed without warning. Only C API usage documentation is *|
+|* provided. See the C++ documentation for all higher level ORC API *|
+|* details. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_LLJIT_H
+#define LLVM_C_LLJIT_H
+
+#include "llvm-c/Error.h"
+#include "llvm-c/Orc.h"
+#include "llvm-c/TargetMachine.h"
+#include "llvm-c/Types.h"
+
+LLVM_C_EXTERN_C_BEGIN
+
+/**
+ * A function for constructing an ObjectLinkingLayer instance to be used
+ * by an LLJIT instance.
+ *
+ * Clients can call LLVMOrcLLJITBuilderSetObjectLinkingLayerCreator to
+ * set the creator function to use when constructing an LLJIT instance.
+ * This can be used to override the default linking layer implementation
+ * that would otherwise be chosen by LLJITBuilder.
+ *
+ * Object linking layers returned by this function will become owned by the
+ * LLJIT instance. The client is not responsible for managing their lifetimes
+ * after the function returns.
+ */
+typedef LLVMOrcObjectLayerRef (
+ *LLVMOrcLLJITBuilderObjectLinkingLayerCreatorFunction)(
+ void *Ctx, LLVMOrcExecutionSessionRef ES, const char *Triple);
+
+/**
+ * A reference to an orc::LLJITBuilder instance.
+ */
+typedef struct LLVMOrcOpaqueLLJITBuilder *LLVMOrcLLJITBuilderRef;
+
+/**
+ * A reference to an orc::LLJIT instance.
+ */
+typedef struct LLVMOrcOpaqueLLJIT *LLVMOrcLLJITRef;
+
+/**
+ * Create an LLVMOrcLLJITBuilder.
+ *
+ * The client owns the resulting LLJITBuilder and should dispose of it using
+ * LLVMOrcDisposeLLJITBuilder once they are done with it.
+ */
+LLVMOrcLLJITBuilderRef LLVMOrcCreateLLJITBuilder(void);
+
+/**
+ * Dispose of an LLVMOrcLLJITBuilderRef. This should only be called if ownership
+ * has not been passed to LLVMOrcCreateLLJIT (e.g. because some error prevented
+ * that function from being called).
+ */
+void LLVMOrcDisposeLLJITBuilder(LLVMOrcLLJITBuilderRef Builder);
+
+/**
+ * Set the JITTargetMachineBuilder to be used when constructing the LLJIT
+ * instance. Calling this function is optional: if it is not called then the
+ * LLJITBuilder will use JITTargeTMachineBuilder::detectHost to construct a
+ * JITTargetMachineBuilder.
+ */
+void LLVMOrcLLJITBuilderSetJITTargetMachineBuilder(
+ LLVMOrcLLJITBuilderRef Builder, LLVMOrcJITTargetMachineBuilderRef JTMB);
+
+/**
+ * Set an ObjectLinkingLayer creator function for this LLJIT instance.
+ */
+void LLVMOrcLLJITBuilderSetObjectLinkingLayerCreator(
+ LLVMOrcLLJITBuilderRef Builder,
+ LLVMOrcLLJITBuilderObjectLinkingLayerCreatorFunction F, void *Ctx);
+
+/**
+ * Create an LLJIT instance from an LLJITBuilder.
+ *
+ * This operation takes ownership of the Builder argument: clients should not
+ * dispose of the builder after calling this function (even if the function
+ * returns an error). If a null Builder argument is provided then a
+ * default-constructed LLJITBuilder will be used.
+ *
+ * On success the resulting LLJIT instance is uniquely owned by the client and
+ * automatically manages the memory of all JIT'd code and all modules that are
+ * transferred to it (e.g. via LLVMOrcLLJITAddLLVMIRModule). Disposing of the
+ * LLJIT instance will free all memory managed by the JIT, including JIT'd code
+ * and not-yet compiled modules.
+ */
+LLVMErrorRef LLVMOrcCreateLLJIT(LLVMOrcLLJITRef *Result,
+ LLVMOrcLLJITBuilderRef Builder);
+
+/**
+ * Dispose of an LLJIT instance.
+ */
+LLVMErrorRef LLVMOrcDisposeLLJIT(LLVMOrcLLJITRef J);
+
+/**
+ * Get a reference to the ExecutionSession for this LLJIT instance.
+ *
+ * The ExecutionSession is owned by the LLJIT instance. The client is not
+ * responsible for managing its memory.
+ */
+LLVMOrcExecutionSessionRef LLVMOrcLLJITGetExecutionSession(LLVMOrcLLJITRef J);
+
+/**
+ * Return a reference to the Main JITDylib.
+ *
+ * The JITDylib is owned by the LLJIT instance. The client is not responsible
+ * for managing its memory.
+ */
+LLVMOrcJITDylibRef LLVMOrcLLJITGetMainJITDylib(LLVMOrcLLJITRef J);
+
+/**
+ * Return the target triple for this LLJIT instance. This string is owned by
+ * the LLJIT instance and should not be freed by the client.
+ */
+const char *LLVMOrcLLJITGetTripleString(LLVMOrcLLJITRef J);
+
+/**
+ * Returns the global prefix character according to the LLJIT's DataLayout.
+ */
+char LLVMOrcLLJITGetGlobalPrefix(LLVMOrcLLJITRef J);
+
+/**
+ * Mangles the given string according to the LLJIT instance's DataLayout, then
+ * interns the result in the SymbolStringPool and returns a reference to the
+ * pool entry. Clients should call LLVMOrcReleaseSymbolStringPoolEntry to
+ * decrement the ref-count on the pool entry once they are finished with this
+ * value.
+ */
+LLVMOrcSymbolStringPoolEntryRef
+LLVMOrcLLJITMangleAndIntern(LLVMOrcLLJITRef J, const char *UnmangledName);
+
+/**
+ * Add a buffer representing an object file to the given JITDylib in the given
+ * LLJIT instance. This operation transfers ownership of the buffer to the
+ * LLJIT instance. The buffer should not be disposed of or referenced once this
+ * function returns.
+ *
+ * Resources associated with the given object will be tracked by the given
+ * JITDylib's default resource tracker.
+ */
+LLVMErrorRef LLVMOrcLLJITAddObjectFile(LLVMOrcLLJITRef J, LLVMOrcJITDylibRef JD,
+ LLVMMemoryBufferRef ObjBuffer);
+
+/**
+ * Add a buffer representing an object file to the given ResourceTracker's
+ * JITDylib in the given LLJIT instance. This operation transfers ownership of
+ * the buffer to the LLJIT instance. The buffer should not be disposed of or
+ * referenced once this function returns.
+ *
+ * Resources associated with the given object will be tracked by ResourceTracker
+ * RT.
+ */
+LLVMErrorRef LLVMOrcLLJITAddObjectFileWithRT(LLVMOrcLLJITRef J,
+ LLVMOrcResourceTrackerRef RT,
+ LLVMMemoryBufferRef ObjBuffer);
+
+/**
+ * Add an IR module to the given JITDylib in the given LLJIT instance. This
+ * operation transfers ownership of the TSM argument to the LLJIT instance.
+ * The TSM argument should not be disposed of or referenced once this
+ * function returns.
+ *
+ * Resources associated with the given Module will be tracked by the given
+ * JITDylib's default resource tracker.
+ */
+LLVMErrorRef LLVMOrcLLJITAddLLVMIRModule(LLVMOrcLLJITRef J,
+ LLVMOrcJITDylibRef JD,
+ LLVMOrcThreadSafeModuleRef TSM);
+
+/**
+ * Add an IR module to the given ResourceTracker's JITDylib in the given LLJIT
+ * instance. This operation transfers ownership of the TSM argument to the LLJIT
+ * instance. The TSM argument should not be disposed of or referenced once this
+ * function returns.
+ *
+ * Resources associated with the given Module will be tracked by ResourceTracker
+ * RT.
+ */
+LLVMErrorRef LLVMOrcLLJITAddLLVMIRModuleWithRT(LLVMOrcLLJITRef J,
+ LLVMOrcResourceTrackerRef JD,
+ LLVMOrcThreadSafeModuleRef TSM);
+
+/**
+ * Look up the given symbol in the main JITDylib of the given LLJIT instance.
+ *
+ * This operation does not take ownership of the Name argument.
+ */
+LLVMErrorRef LLVMOrcLLJITLookup(LLVMOrcLLJITRef J,
+ LLVMOrcJITTargetAddress *Result,
+ const char *Name);
+
+LLVM_C_EXTERN_C_END
+
+#endif /* LLVM_C_LLJIT_H */
diff --git a/contrib/llvm-project/llvm/include/llvm-c/LinkTimeOptimizer.h b/contrib/llvm-project/llvm/include/llvm-c/LinkTimeOptimizer.h
deleted file mode 100644
index 9ae65b8fe5e0..000000000000
--- a/contrib/llvm-project/llvm/include/llvm-c/LinkTimeOptimizer.h
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- llvm/LinkTimeOptimizer.h - LTO Public C Interface -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This header provides a C API to use the LLVM link time optimization
-// library. This is intended to be used by linkers which are C-only in
-// their implementation for performing LTO.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_C_LINKTIMEOPTIMIZER_H
-#define LLVM_C_LINKTIMEOPTIMIZER_H
-
-#include "llvm-c/ExternC.h"
-
-LLVM_C_EXTERN_C_BEGIN
-
-/**
- * @defgroup LLVMCLinkTimeOptimizer Link Time Optimization
- * @ingroup LLVMC
- *
- * @{
- */
-
- /// This provides a dummy type for pointers to the LTO object.
- typedef void* llvm_lto_t;
-
- /// This provides a C-visible enumerator to manage status codes.
- /// This should map exactly onto the C++ enumerator LTOStatus.
- typedef enum llvm_lto_status {
- LLVM_LTO_UNKNOWN,
- LLVM_LTO_OPT_SUCCESS,
- LLVM_LTO_READ_SUCCESS,
- LLVM_LTO_READ_FAILURE,
- LLVM_LTO_WRITE_FAILURE,
- LLVM_LTO_NO_TARGET,
- LLVM_LTO_NO_WORK,
- LLVM_LTO_MODULE_MERGE_FAILURE,
- LLVM_LTO_ASM_FAILURE,
-
- // Added C-specific error codes
- LLVM_LTO_NULL_OBJECT
- } llvm_lto_status_t;
-
- /// This provides C interface to initialize link time optimizer. This allows
- /// linker to use dlopen() interface to dynamically load LinkTimeOptimizer.
- /// extern "C" helps, because dlopen() interface uses name to find the symbol.
- extern llvm_lto_t llvm_create_optimizer(void);
- extern void llvm_destroy_optimizer(llvm_lto_t lto);
-
- extern llvm_lto_status_t llvm_read_object_file
- (llvm_lto_t lto, const char* input_filename);
- extern llvm_lto_status_t llvm_optimize_modules
- (llvm_lto_t lto, const char* output_filename);
-
-/**
- * @}
- */
-
- LLVM_C_EXTERN_C_END
-
-#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Orc.h b/contrib/llvm-project/llvm/include/llvm-c/Orc.h
index 09a058846108..9beef44c89dd 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Orc.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Orc.h
@@ -39,32 +39,200 @@ LLVM_C_EXTERN_C_BEGIN
typedef uint64_t LLVMOrcJITTargetAddress;
/**
+ * Represents generic linkage flags for a symbol definition.
+ */
+typedef enum {
+ LLVMJITSymbolGenericFlagsExported = 1U << 0,
+ LLVMJITSymbolGenericFlagsWeak = 1U << 1
+} LLVMJITSymbolGenericFlags;
+
+/**
+ * Represents target specific flags for a symbol definition.
+ */
+typedef uint8_t LLVMJITTargetSymbolFlags;
+
+/**
+ * Represents the linkage flags for a symbol definition.
+ */
+typedef struct {
+ uint8_t GenericFlags;
+ uint8_t TargetFlags;
+} LLVMJITSymbolFlags;
+
+/**
+ * Represents an evaluated symbol address and flags.
+ */
+typedef struct {
+ LLVMOrcJITTargetAddress Address;
+ LLVMJITSymbolFlags Flags;
+} LLVMJITEvaluatedSymbol;
+
+/**
* A reference to an orc::ExecutionSession instance.
*/
typedef struct LLVMOrcOpaqueExecutionSession *LLVMOrcExecutionSessionRef;
/**
+ * Error reporter function.
+ */
+typedef void (*LLVMOrcErrorReporterFunction)(void *Ctx, LLVMErrorRef Err);
+
+/**
+ * A reference to an orc::SymbolStringPool.
+ */
+typedef struct LLVMOrcOpaqueSymbolStringPool *LLVMOrcSymbolStringPoolRef;
+
+/**
* A reference to an orc::SymbolStringPool table entry.
*/
-typedef struct LLVMOrcQuaqueSymbolStringPoolEntryPtr
+typedef struct LLVMOrcOpaqueSymbolStringPoolEntry
*LLVMOrcSymbolStringPoolEntryRef;
/**
+ * Represents a pair of a symbol name and an evaluated symbol.
+ */
+typedef struct {
+ LLVMOrcSymbolStringPoolEntryRef Name;
+ LLVMJITEvaluatedSymbol Sym;
+} LLVMJITCSymbolMapPair;
+
+/**
+ * Represents a list of (SymbolStringPtr, JITEvaluatedSymbol) pairs that can be
+ * used to construct a SymbolMap.
+ */
+typedef LLVMJITCSymbolMapPair *LLVMOrcCSymbolMapPairs;
+
+/**
+ * Lookup kind. This can be used by definition generators when deciding whether
+ * to produce a definition for a requested symbol.
+ *
+ * This enum should be kept in sync with llvm::orc::LookupKind.
+ */
+typedef enum {
+ LLVMOrcLookupKindStatic,
+ LLVMOrcLookupKindDLSym
+} LLVMOrcLookupKind;
+
+/**
+ * JITDylib lookup flags. This can be used by definition generators when
+ * deciding whether to produce a definition for a requested symbol.
+ *
+ * This enum should be kept in sync with llvm::orc::JITDylibLookupFlags.
+ */
+typedef enum {
+ LLVMOrcJITDylibLookupFlagsMatchExportedSymbolsOnly,
+ LLVMOrcJITDylibLookupFlagsMatchAllSymbols
+} LLVMOrcJITDylibLookupFlags;
+
+/**
+ * Symbol lookup flags for lookup sets. This should be kept in sync with
+ * llvm::orc::SymbolLookupFlags.
+ */
+typedef enum {
+ LLVMOrcSymbolLookupFlagsRequiredSymbol,
+ LLVMOrcSymbolLookupFlagsWeaklyReferencedSymbol
+} LLVMOrcSymbolLookupFlags;
+
+/**
+ * An element type for a symbol lookup set.
+ */
+typedef struct {
+ LLVMOrcSymbolStringPoolEntryRef Name;
+ LLVMOrcSymbolLookupFlags LookupFlags;
+} LLVMOrcCLookupSetElement;
+
+/**
+ * A set of symbols to look up / generate.
+ *
+ * The list is terminated with an element containing a null pointer for the
+ * Name field.
+ *
+ * If a client creates an instance of this type then they are responsible for
+ * freeing it, and for ensuring that all strings have been retained over the
+ * course of its life. Clients receiving a copy from a callback are not
+ * responsible for managing lifetime or retain counts.
+ */
+typedef LLVMOrcCLookupSetElement *LLVMOrcCLookupSet;
+
+/**
+ * A reference to an orc::MaterializationUnit.
+ */
+typedef struct LLVMOrcOpaqueMaterializationUnit *LLVMOrcMaterializationUnitRef;
+
+/**
* A reference to an orc::JITDylib instance.
*/
typedef struct LLVMOrcOpaqueJITDylib *LLVMOrcJITDylibRef;
/**
- * A reference to an orc::JITDylib::DefinitionGenerator.
+ * A reference to an orc::ResourceTracker instance.
+ */
+typedef struct LLVMOrcOpaqueResourceTracker *LLVMOrcResourceTrackerRef;
+
+/**
+ * A reference to an orc::DefinitionGenerator.
+ */
+typedef struct LLVMOrcOpaqueDefinitionGenerator
+ *LLVMOrcDefinitionGeneratorRef;
+
+/**
+ * An opaque lookup state object. Instances of this type can be captured to
+ * suspend a lookup while a custom generator function attempts to produce a
+ * definition.
+ *
+ * If a client captures a lookup state object then they must eventually call
+ * LLVMOrcLookupStateContinueLookup to restart the lookup. This is required
+ * in order to release memory allocated for the lookup state, even if errors
+ * have occurred while the lookup was suspended (if these errors have made the
+ * lookup impossible to complete then it will issue its own error before
+ * destruction).
+ */
+typedef struct LLVMOrcOpaqueLookupState *LLVMOrcLookupStateRef;
+
+/**
+ * A custom generator function. This can be used to create a custom generator
+ * object using LLVMOrcCreateCustomCAPIDefinitionGenerator. The resulting
+ * object can be attached to a JITDylib, via LLVMOrcJITDylibAddGenerator, to
+ * receive callbacks when lookups fail to match existing definitions.
+ *
+ * GeneratorObj will contain the address of the custom generator object.
+ *
+ * Ctx will contain the context object passed to
+ * LLVMOrcCreateCustomCAPIDefinitionGenerator.
+ *
+ * LookupState will contain a pointer to an LLVMOrcLookupStateRef object. This
+ * can optionally be modified to make the definition generation process
+ * asynchronous: If the LookupStateRef value is copied, and the original
+ * LLVMOrcLookupStateRef set to null, the lookup will be suspended. Once the
+ * asynchronous definition process has been completed clients must call
+ * LLVMOrcLookupStateContinueLookup to continue the lookup (this should be
+ * done unconditionally, even if errors have occurred in the mean time, to
+ * free the lookup state memory and notify the query object of the failures. If
+ * LookupState is captured this function must return LLVMErrorSuccess.
+ *
+ * The Kind argument can be inspected to determine the lookup kind (e.g.
+ * as-if-during-static-link, or as-if-during-dlsym).
+ *
+ * The JD argument specifies which JITDylib the definitions should be generated
+ * into.
+ *
+ * The JDLookupFlags argument can be inspected to determine whether the original
+ * lookup included non-exported symobls.
+ *
+ * Finally, the LookupSet argument contains the set of symbols that could not
+ * be found in JD already (the set of generation candidates).
*/
-typedef struct LLVMOrcOpaqueJITDylibDefinitionGenerator
- *LLVMOrcJITDylibDefinitionGeneratorRef;
+typedef LLVMErrorRef (*LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction)(
+ LLVMOrcDefinitionGeneratorRef GeneratorObj, void *Ctx,
+ LLVMOrcLookupStateRef *LookupState, LLVMOrcLookupKind Kind,
+ LLVMOrcJITDylibRef JD, LLVMOrcJITDylibLookupFlags JDLookupFlags,
+ LLVMOrcCLookupSet LookupSet, size_t LookupSetSize);
/**
* Predicate function for SymbolStringPoolEntries.
*/
-typedef int (*LLVMOrcSymbolPredicate)(LLVMOrcSymbolStringPoolEntryRef Sym,
- void *Ctx);
+typedef int (*LLVMOrcSymbolPredicate)(void *Ctx,
+ LLVMOrcSymbolStringPoolEntryRef Sym);
/**
* A reference to an orc::ThreadSafeContext instance.
@@ -83,14 +251,43 @@ typedef struct LLVMOrcOpaqueJITTargetMachineBuilder
*LLVMOrcJITTargetMachineBuilderRef;
/**
- * A reference to an orc::LLJITBuilder instance.
+ * A reference to an orc::ObjectLayer instance.
+ */
+typedef struct LLVMOrcOpaqueObjectLayer *LLVMOrcObjectLayerRef;
+
+/**
+ * Attach a custom error reporter function to the ExecutionSession.
+ *
+ * The error reporter will be called to deliver failure notices that can not be
+ * directly reported to a caller. For example, failure to resolve symbols in
+ * the JIT linker is typically reported via the error reporter (callers
+ * requesting definitions from the JIT will typically be delivered a
+ * FailureToMaterialize error instead).
+ */
+void LLVMOrcExecutionSessionSetErrorReporter(
+ LLVMOrcExecutionSessionRef ES, LLVMOrcErrorReporterFunction ReportError,
+ void *Ctx);
+
+/**
+ * Return a reference to the SymbolStringPool for an ExecutionSession.
+ *
+ * Ownership of the pool remains with the ExecutionSession: The caller is
+ * not required to free the pool.
*/
-typedef struct LLVMOrcOpaqueLLJITBuilder *LLVMOrcLLJITBuilderRef;
+LLVMOrcSymbolStringPoolRef
+LLVMOrcExecutionSessionGetSymbolStringPool(LLVMOrcExecutionSessionRef ES);
/**
- * A reference to an orc::LLJIT instance.
+ * Clear all unreferenced symbol string pool entries.
+ *
+ * This can be called at any time to release unused entries in the
+ * ExecutionSession's string pool. Since it locks the pool (preventing
+ * interning of any new strings) it is recommended that it only be called
+ * infrequently, ideally when the caller has reason to believe that some
+ * entries will have become unreferenced, e.g. after removing a module or
+ * closing a JITDylib.
*/
-typedef struct LLVMOrcOpaqueLLJIT *LLVMOrcLLJITRef;
+void LLVMOrcSymbolStringPoolClearDeadEntries(LLVMOrcSymbolStringPoolRef SSP);
/**
* Intern a string in the ExecutionSession's SymbolStringPool and return a
@@ -108,26 +305,138 @@ LLVMOrcSymbolStringPoolEntryRef
LLVMOrcExecutionSessionIntern(LLVMOrcExecutionSessionRef ES, const char *Name);
/**
+ * Increments the ref-count for a SymbolStringPool entry.
+ */
+void LLVMOrcRetainSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S);
+
+/**
* Reduces the ref-count for of a SymbolStringPool entry.
*/
void LLVMOrcReleaseSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S);
+const char *LLVMOrcSymbolStringPoolEntryStr(LLVMOrcSymbolStringPoolEntryRef S);
+
+/**
+ * Reduces the ref-count of a ResourceTracker.
+ */
+void LLVMOrcReleaseResourceTracker(LLVMOrcResourceTrackerRef RT);
+
+/**
+ * Transfers tracking of all resources associated with resource tracker SrcRT
+ * to resource tracker DstRT.
+ */
+void LLVMOrcResourceTrackerTransferTo(LLVMOrcResourceTrackerRef SrcRT,
+ LLVMOrcResourceTrackerRef DstRT);
+
+/**
+ * Remove all resources associated with the given tracker. See
+ * ResourceTracker::remove().
+ */
+LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT);
+
/**
* Dispose of a JITDylib::DefinitionGenerator. This should only be called if
* ownership has not been passed to a JITDylib (e.g. because some error
* prevented the client from calling LLVMOrcJITDylibAddGenerator).
*/
-void LLVMOrcDisposeJITDylibDefinitionGenerator(
- LLVMOrcJITDylibDefinitionGeneratorRef DG);
+void LLVMOrcDisposeDefinitionGenerator(LLVMOrcDefinitionGeneratorRef DG);
/**
- * Add a JITDylib::DefinitionGenerator to the given JITDylib.
+ * Dispose of a MaterializationUnit.
+ */
+void LLVMOrcDisposeMaterializationUnit(LLVMOrcMaterializationUnitRef MU);
+
+/**
+ * Create a MaterializationUnit to define the given symbols as pointing to
+ * the corresponding raw addresses.
+ */
+LLVMOrcMaterializationUnitRef
+LLVMOrcAbsoluteSymbols(LLVMOrcCSymbolMapPairs Syms, size_t NumPairs);
+
+/**
+ * Create a "bare" JITDylib.
+ *
+ * The client is responsible for ensuring that the JITDylib's name is unique,
+ * e.g. by calling LLVMOrcExecutionSessionGetJTIDylibByName first.
+ *
+ * This call does not install any library code or symbols into the newly
+ * created JITDylib. The client is responsible for all configuration.
+ */
+LLVMOrcJITDylibRef
+LLVMOrcExecutionSessionCreateBareJITDylib(LLVMOrcExecutionSessionRef ES,
+ const char *Name);
+
+/**
+ * Create a JITDylib.
+ *
+ * The client is responsible for ensuring that the JITDylib's name is unique,
+ * e.g. by calling LLVMOrcExecutionSessionGetJTIDylibByName first.
+ *
+ * If a Platform is attached to the ExecutionSession then
+ * Platform::setupJITDylib will be called to install standard platform symbols
+ * (e.g. standard library interposes). If no Platform is installed then this
+ * call is equivalent to LLVMExecutionSessionRefCreateBareJITDylib and will
+ * always return success.
+ */
+LLVMErrorRef
+LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES,
+ LLVMOrcJITDylibRef *Result,
+ const char *Name);
+
+/**
+ * Returns the JITDylib with the given name, or NULL if no such JITDylib
+ * exists.
+ */
+LLVMOrcJITDylibRef
+LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES,
+ const char *Name);
+
+/**
+ * Return a reference to a newly created resource tracker associated with JD.
+ * The tracker is returned with an initial ref-count of 1, and must be released
+ * with LLVMOrcReleaseResourceTracker when no longer needed.
+ */
+LLVMOrcResourceTrackerRef
+LLVMOrcJITDylibCreateResourceTracker(LLVMOrcJITDylibRef JD);
+
+/**
+ * Return a reference to the default resource tracker for the given JITDylib.
+ * This operation will increase the retain count of the tracker: Clients should
+ * call LLVMOrcReleaseResourceTracker when the result is no longer needed.
+ */
+LLVMOrcResourceTrackerRef
+LLVMOrcJITDylibGetDefaultResourceTracker(LLVMOrcJITDylibRef JD);
+
+/**
+ * Add the given MaterializationUnit to the given JITDylib.
+ *
+ * If this operation succeeds then JITDylib JD will take ownership of MU.
+ * If the operation fails then ownership remains with the caller who should
+ * call LLVMOrcDisposeMaterializationUnit to destroy it.
+ */
+LLVMErrorRef LLVMOrcJITDylibDefine(LLVMOrcJITDylibRef JD,
+ LLVMOrcMaterializationUnitRef MU);
+
+/**
+ * Calls remove on all trackers associated with this JITDylib, see
+ * JITDylib::clear().
+ */
+LLVMErrorRef LLVMOrcJITDylibClear(LLVMOrcJITDylibRef JD);
+
+/**
+ * Add a DefinitionGenerator to the given JITDylib.
*
* The JITDylib will take ownership of the given generator: The client is no
* longer responsible for managing its memory.
*/
void LLVMOrcJITDylibAddGenerator(LLVMOrcJITDylibRef JD,
- LLVMOrcJITDylibDefinitionGeneratorRef DG);
+ LLVMOrcDefinitionGeneratorRef DG);
+
+/**
+ * Create a custom generator.
+ */
+LLVMOrcDefinitionGeneratorRef LLVMOrcCreateCustomCAPIDefinitionGenerator(
+ LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx);
/**
* Get a DynamicLibrarySearchGenerator that will reflect process symbols into
@@ -148,7 +457,7 @@ void LLVMOrcJITDylibAddGenerator(LLVMOrcJITDylibRef JD,
* the global prefix if present.
*/
LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
- LLVMOrcJITDylibDefinitionGeneratorRef *Result, char GlobalPrefx,
+ LLVMOrcDefinitionGeneratorRef *Result, char GlobalPrefx,
LLVMOrcSymbolPredicate Filter, void *FilterCtx);
/**
@@ -156,7 +465,7 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
*
* Ownership of the underlying ThreadSafeContext data is shared: Clients
* can and should dispose of their ThreadSafeContext as soon as they no longer
- * need to refer to it directly. Other references (e.g. from ThreadSafeModules
+ * need to refer to it directly. Other references (e.g. from ThreadSafeModules)
* will keep the data alive as long as it is needed.
*/
LLVMOrcThreadSafeContextRef LLVMOrcCreateNewThreadSafeContext(void);
@@ -178,7 +487,7 @@ void LLVMOrcDisposeThreadSafeContext(LLVMOrcThreadSafeContextRef TSCtx);
* after this function returns.
*
* Ownership of the ThreadSafeModule is unique: If it is transferred to the JIT
- * (e.g. by LLVMOrcLLJITAddLLVMIRModule), in which case the client is no longer
+ * (e.g. by LLVMOrcLLJITAddLLVMIRModule) then the client is no longer
* responsible for it. If it is not transferred to the JIT then the client
* should call LLVMOrcDisposeThreadSafeModule to dispose of it.
*/
@@ -221,114 +530,9 @@ void LLVMOrcDisposeJITTargetMachineBuilder(
LLVMOrcJITTargetMachineBuilderRef JTMB);
/**
- * Create an LLJITTargetMachineBuilder.
- *
- * The client owns the resulting LLJITBuilder and should dispose of it using
- * LLVMOrcDisposeLLJITBuilder once they are done with it.
- */
-LLVMOrcLLJITBuilderRef LLVMOrcCreateLLJITBuilder(void);
-
-/**
- * Dispose of an LLVMOrcLLJITBuilderRef. This should only be called if ownership
- * has not been passed to LLVMOrcCreateLLJIT (e.g. because some error prevented
- * that function from being called).
- */
-void LLVMOrcDisposeLLJITBuilder(LLVMOrcLLJITBuilderRef Builder);
-
-/**
- * Set the JITTargetMachineBuilder to be used when constructing the LLJIT
- * instance. Calling this function is optional: if it is not called then the
- * LLJITBuilder will use JITTargeTMachineBuilder::detectHost to construct a
- * JITTargetMachineBuilder.
- */
-void LLVMOrcLLJITBuilderSetJITTargetMachineBuilder(
- LLVMOrcLLJITBuilderRef Builder, LLVMOrcJITTargetMachineBuilderRef JTMB);
-
-/**
- * Create an LLJIT instance from an LLJITBuilder.
- *
- * This operation takes ownership of the Builder argument: clients should not
- * dispose of the builder after calling this function (even if the function
- * returns an error). If a null Builder argument is provided then a
- * default-constructed LLJITBuilder will be used.
- *
- * On success the resulting LLJIT instance is uniquely owned by the client and
- * automatically manages the memory of all JIT'd code and all modules that are
- * transferred to it (e.g. via LLVMOrcLLJITAddLLVMIRModule). Disposing of the
- * LLJIT instance will free all memory managed by the JIT, including JIT'd code
- * and not-yet compiled modules.
- */
-LLVMErrorRef LLVMOrcCreateLLJIT(LLVMOrcLLJITRef *Result,
- LLVMOrcLLJITBuilderRef Builder);
-
-/**
- * Dispose of an LLJIT instance.
- */
-LLVMErrorRef LLVMOrcDisposeLLJIT(LLVMOrcLLJITRef J);
-
-/**
- * Get a reference to the ExecutionSession for this LLJIT instance.
- *
- * The ExecutionSession is owned by the LLJIT instance. The client is not
- * responsible for managing its memory.
- */
-LLVMOrcExecutionSessionRef LLVMOrcLLJITGetExecutionSession(LLVMOrcLLJITRef J);
-
-/**
- * Return a reference to the Main JITDylib.
- *
- * The JITDylib is owned by the LLJIT instance. The client is not responsible
- * for managing its memory.
- */
-LLVMOrcJITDylibRef LLVMOrcLLJITGetMainJITDylib(LLVMOrcLLJITRef J);
-
-/**
- * Return the target triple for this LLJIT instance. This string is owned by
- * the LLJIT instance and should not be freed by the client.
- */
-const char *LLVMOrcLLJITGetTripleString(LLVMOrcLLJITRef J);
-
-/**
- * Returns the global prefix character according to the LLJIT's DataLayout.
- */
-char LLVMOrcLLJITGetGlobalPrefix(LLVMOrcLLJITRef J);
-
-/**
- * Mangles the given string according to the LLJIT instance's DataLayout, then
- * interns the result in the SymbolStringPool and returns a reference to the
- * pool entry. Clients should call LLVMOrcReleaseSymbolStringPoolEntry to
- * decrement the ref-count on the pool entry once they are finished with this
- * value.
- */
-LLVMOrcSymbolStringPoolEntryRef
-LLVMOrcLLJITMangleAndIntern(LLVMOrcLLJITRef J, const char *UnmangledName);
-
-/**
- * Add a buffer representing an object file to the given JITDylib in the given
- * LLJIT instance. This operation transfers ownership of the buffer to the
- * LLJIT instance. The buffer should not be disposed of or referenced once this
- * function returns.
- */
-LLVMErrorRef LLVMOrcLLJITAddObjectFile(LLVMOrcLLJITRef J, LLVMOrcJITDylibRef JD,
- LLVMMemoryBufferRef ObjBuffer);
-
-/**
- * Add an IR module to the given JITDylib of the given LLJIT instance. This
- * operation transfers ownership of the TSM argument to the LLJIT instance.
- * The TSM argument should not be 3disposed of or referenced once this
- * function returns.
- */
-LLVMErrorRef LLVMOrcLLJITAddLLVMIRModule(LLVMOrcLLJITRef J,
- LLVMOrcJITDylibRef JD,
- LLVMOrcThreadSafeModuleRef TSM);
-/**
- * Look up the given symbol in the main JITDylib of the given LLJIT instance.
- *
- * This operation does not take ownership of the Name argument.
+ * Dispose of an ObjectLayer.
*/
-LLVMErrorRef LLVMOrcLLJITLookup(LLVMOrcLLJITRef J,
- LLVMOrcJITTargetAddress *Result,
- const char *Name);
+void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer);
LLVM_C_EXTERN_C_END
diff --git a/contrib/llvm-project/llvm/include/llvm-c/OrcBindings.h b/contrib/llvm-project/llvm/include/llvm-c/OrcBindings.h
deleted file mode 100644
index 11cdade7c26f..000000000000
--- a/contrib/llvm-project/llvm/include/llvm-c/OrcBindings.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*===----------- llvm-c/OrcBindings.h - Orc Lib C Iface ---------*- C++ -*-===*\
-|* *|
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
-|* Exceptions. *|
-|* See https://llvm.org/LICENSE.txt for license information. *|
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to libLLVMOrcJIT.a, which implements *|
-|* JIT compilation of LLVM IR. *|
-|* *|
-|* Many exotic languages can interoperate with C code but have a harder time *|
-|* with C++ due to name mangling. So in addition to C, this interface enables *|
-|* tools written in such languages. *|
-|* *|
-|* Note: This interface is experimental. It is *NOT* stable, and may be *|
-|* changed without warning. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_ORCBINDINGS_H
-#define LLVM_C_ORCBINDINGS_H
-
-#include "llvm-c/Error.h"
-#include "llvm-c/ExternC.h"
-#include "llvm-c/Object.h"
-#include "llvm-c/TargetMachine.h"
-
-LLVM_C_EXTERN_C_BEGIN
-
-typedef struct LLVMOrcOpaqueJITStack *LLVMOrcJITStackRef;
-typedef uint64_t LLVMOrcModuleHandle;
-typedef uint64_t LLVMOrcTargetAddress;
-typedef uint64_t (*LLVMOrcSymbolResolverFn)(const char *Name, void *LookupCtx);
-typedef uint64_t (*LLVMOrcLazyCompileCallbackFn)(LLVMOrcJITStackRef JITStack,
- void *CallbackCtx);
-
-/**
- * Create an ORC JIT stack.
- *
- * The client owns the resulting stack, and must call OrcDisposeInstance(...)
- * to destroy it and free its memory. The JIT stack will take ownership of the
- * TargetMachine, which will be destroyed when the stack is destroyed. The
- * client should not attempt to dispose of the Target Machine, or it will result
- * in a double-free.
- */
-LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM);
-
-/**
- * Get the error message for the most recent error (if any).
- *
- * This message is owned by the ORC JIT Stack and will be freed when the stack
- * is disposed of by LLVMOrcDisposeInstance.
- */
-const char *LLVMOrcGetErrorMsg(LLVMOrcJITStackRef JITStack);
-
-/**
- * Mangle the given symbol.
- * Memory will be allocated for MangledSymbol to hold the result. The client
- */
-void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledSymbol,
- const char *Symbol);
-
-/**
- * Dispose of a mangled symbol.
- */
-void LLVMOrcDisposeMangledSymbol(char *MangledSymbol);
-
-/**
- * Create a lazy compile callback.
- */
-LLVMErrorRef LLVMOrcCreateLazyCompileCallback(
- LLVMOrcJITStackRef JITStack, LLVMOrcTargetAddress *RetAddr,
- LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx);
-
-/**
- * Create a named indirect call stub.
- */
-LLVMErrorRef LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
- const char *StubName,
- LLVMOrcTargetAddress InitAddr);
-
-/**
- * Set the pointer for the given indirect stub.
- */
-LLVMErrorRef LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
- const char *StubName,
- LLVMOrcTargetAddress NewAddr);
-
-/**
- * Add module to be eagerly compiled.
- */
-LLVMErrorRef LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle,
- LLVMModuleRef Mod,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx);
-
-/**
- * Add module to be lazily compiled one function at a time.
- */
-LLVMErrorRef LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle,
- LLVMModuleRef Mod,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx);
-
-/**
- * Add an object file.
- *
- * This method takes ownership of the given memory buffer and attempts to add
- * it to the JIT as an object file.
- * Clients should *not* dispose of the 'Obj' argument: the JIT will manage it
- * from this call onwards.
- */
-LLVMErrorRef LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle,
- LLVMMemoryBufferRef Obj,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx);
-
-/**
- * Remove a module set from the JIT.
- *
- * This works for all modules that can be added via OrcAdd*, including object
- * files.
- */
-LLVMErrorRef LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle H);
-
-/**
- * Get symbol address from JIT instance.
- */
-LLVMErrorRef LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- const char *SymbolName);
-
-/**
- * Get symbol address from JIT instance, searching only the specified
- * handle.
- */
-LLVMErrorRef LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- LLVMOrcModuleHandle H,
- const char *SymbolName);
-
-/**
- * Dispose of an ORC JIT stack.
- */
-LLVMErrorRef LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack);
-
-/**
- * Register a JIT Event Listener.
- *
- * A NULL listener is ignored.
- */
-void LLVMOrcRegisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventListenerRef L);
-
-/**
- * Unegister a JIT Event Listener.
- *
- * A NULL listener is ignored.
- */
-void LLVMOrcUnregisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventListenerRef L);
-
-LLVM_C_EXTERN_C_END
-
-#endif /* LLVM_C_ORCBINDINGS_H */
diff --git a/contrib/llvm-project/llvm/include/llvm-c/OrcEE.h b/contrib/llvm-project/llvm/include/llvm-c/OrcEE.h
new file mode 100644
index 000000000000..2435e7421a42
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm-c/OrcEE.h
@@ -0,0 +1,55 @@
+/*===-- llvm-c/OrcEE.h - OrcV2 C bindings ExecutionEngine utils -*- C++ -*-===*\
+|* *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This header declares the C interface to ExecutionEngine based utils, e.g. *|
+|* RTDyldObjectLinkingLayer (based on RuntimeDyld) in Orc. *|
+|* *|
+|* Many exotic languages can interoperate with C code but have a harder time *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages. *|
+|* *|
+|* Note: This interface is experimental. It is *NOT* stable, and may be *|
+|* changed without warning. Only C API usage documentation is *|
+|* provided. See the C++ documentation for all higher level ORC API *|
+|* details. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_ORCEE_H
+#define LLVM_C_ORCEE_H
+
+#include "llvm-c/Error.h"
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm-c/Orc.h"
+#include "llvm-c/TargetMachine.h"
+#include "llvm-c/Types.h"
+
+LLVM_C_EXTERN_C_BEGIN
+
+/**
+ * Create a RTDyldObjectLinkingLayer instance using the standard
+ * SectionMemoryManager for memory management.
+ */
+LLVMOrcObjectLayerRef
+LLVMOrcCreateRTDyldObjectLinkingLayerWithSectionMemoryManager(
+ LLVMOrcExecutionSessionRef ES);
+
+/**
+ * Add the given listener to the given RTDyldObjectLinkingLayer.
+ *
+ * Note: Layer must be an RTDyldObjectLinkingLayer instance or
+ * behavior is undefined.
+ */
+void LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(
+ LLVMOrcObjectLayerRef RTDyldObjLinkingLayer,
+ LLVMJITEventListenerRef Listener);
+
+LLVM_C_EXTERN_C_END
+
+#endif /* LLVM_C_ORCEE_H */
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h
index cde3d2460920..3f2cadf32366 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h
@@ -57,9 +57,6 @@ void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM);
/** See llvm::createGlobalOptimizerPass function. */
void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM);
-/** See llvm::createIPConstantPropagationPass function. */
-void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM);
-
/** See llvm::createPruneEHPass function. */
void LLVMAddPruneEHPass(LLVMPassManagerRef PM);
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h
index 93d79a205195..ba142508bbe4 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h
@@ -67,6 +67,9 @@ void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM);
/** See llvm::createInstructionCombiningPass function. */
void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM);
+/** See llvm::createInstSimplifyLegacyPass function. */
+void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM);
+
/** See llvm::createJumpThreadingPass function. */
void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM);
@@ -125,9 +128,6 @@ void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM);
/** See llvm::createTailCallEliminationPass function. */
void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM);
-/** See llvm::createConstantPropagationPass function. */
-void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM);
-
/** See llvm::demotePromoteMemoryToRegisterPass function. */
void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM);
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/APFixedPoint.h b/contrib/llvm-project/llvm/include/llvm/ADT/APFixedPoint.h
new file mode 100644
index 000000000000..d6349e6b2a88
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/APFixedPoint.h
@@ -0,0 +1,237 @@
+//===- APFixedPoint.h - Fixed point constant handling -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines the fixed point number interface.
+/// This is a class for abstracting various operations performed on fixed point
+/// types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_APFIXEDPOINT_H
+#define LLVM_ADT_APFIXEDPOINT_H
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class APFloat;
+struct fltSemantics;
+
+/// The fixed point semantics work similarly to fltSemantics. The width
+/// specifies the whole bit width of the underlying scaled integer (with padding
+/// if any). The scale represents the number of fractional bits in this type.
+/// When HasUnsignedPadding is true and this type is unsigned, the first bit
+/// in the value this represents is treated as padding.
+class FixedPointSemantics {
+public:
+ FixedPointSemantics(unsigned Width, unsigned Scale, bool IsSigned,
+ bool IsSaturated, bool HasUnsignedPadding)
+ : Width(Width), Scale(Scale), IsSigned(IsSigned),
+ IsSaturated(IsSaturated), HasUnsignedPadding(HasUnsignedPadding) {
+ assert(Width >= Scale && "Not enough room for the scale");
+ assert(!(IsSigned && HasUnsignedPadding) &&
+ "Cannot have unsigned padding on a signed type.");
+ }
+
+ unsigned getWidth() const { return Width; }
+ unsigned getScale() const { return Scale; }
+ bool isSigned() const { return IsSigned; }
+ bool isSaturated() const { return IsSaturated; }
+ bool hasUnsignedPadding() const { return HasUnsignedPadding; }
+
+ void setSaturated(bool Saturated) { IsSaturated = Saturated; }
+
+ /// Return the number of integral bits represented by these semantics. These
+ /// are separate from the fractional bits and do not include the sign or
+ /// padding bit.
+ unsigned getIntegralBits() const {
+ if (IsSigned || (!IsSigned && HasUnsignedPadding))
+ return Width - Scale - 1;
+ else
+ return Width - Scale;
+ }
+
+ /// Return the FixedPointSemantics that allows for calculating the full
+ /// precision semantic that can precisely represent the precision and ranges
+ /// of both input values. This does not compute the resulting semantics for a
+ /// given binary operation.
+ FixedPointSemantics
+ getCommonSemantics(const FixedPointSemantics &Other) const;
+
+ /// Returns true if this fixed-point semantic with its value bits interpreted
+ /// as an integer can fit in the given floating point semantic without
+ /// overflowing to infinity.
+ /// For example, a signed 8-bit fixed-point semantic has a maximum and
+ /// minimum integer representation of 127 and -128, respectively. If both of
+ /// these values can be represented (possibly inexactly) in the floating
+ /// point semantic without overflowing, this returns true.
+ bool fitsInFloatSemantics(const fltSemantics &FloatSema) const;
+
+ /// Return the FixedPointSemantics for an integer type.
+ static FixedPointSemantics GetIntegerSemantics(unsigned Width,
+ bool IsSigned) {
+ return FixedPointSemantics(Width, /*Scale=*/0, IsSigned,
+ /*IsSaturated=*/false,
+ /*HasUnsignedPadding=*/false);
+ }
+
+private:
+ unsigned Width : 16;
+ unsigned Scale : 13;
+ unsigned IsSigned : 1;
+ unsigned IsSaturated : 1;
+ unsigned HasUnsignedPadding : 1;
+};
+
+/// The APFixedPoint class works similarly to APInt/APSInt in that it is a
+/// functional replacement for a scaled integer. It is meant to replicate the
+/// fixed point types proposed in ISO/IEC JTC1 SC22 WG14 N1169. The class carries
+/// info about the fixed point type's width, sign, scale, and saturation, and
+/// provides different operations that would normally be performed on fixed point
+/// types.
+class APFixedPoint {
+public:
+ APFixedPoint(const APInt &Val, const FixedPointSemantics &Sema)
+ : Val(Val, !Sema.isSigned()), Sema(Sema) {
+ assert(Val.getBitWidth() == Sema.getWidth() &&
+ "The value should have a bit width that matches the Sema width");
+ }
+
+ APFixedPoint(uint64_t Val, const FixedPointSemantics &Sema)
+ : APFixedPoint(APInt(Sema.getWidth(), Val, Sema.isSigned()), Sema) {}
+
+ // Zero initialization.
+ APFixedPoint(const FixedPointSemantics &Sema) : APFixedPoint(0, Sema) {}
+
+ APSInt getValue() const { return APSInt(Val, !Sema.isSigned()); }
+ inline unsigned getWidth() const { return Sema.getWidth(); }
+ inline unsigned getScale() const { return Sema.getScale(); }
+ inline bool isSaturated() const { return Sema.isSaturated(); }
+ inline bool isSigned() const { return Sema.isSigned(); }
+ inline bool hasPadding() const { return Sema.hasUnsignedPadding(); }
+ FixedPointSemantics getSemantics() const { return Sema; }
+
+ bool getBoolValue() const { return Val.getBoolValue(); }
+
+ // Convert this number to match the semantics provided. If the overflow
+ // parameter is provided, set this value to true or false to indicate if this
+ // operation results in an overflow.
+ APFixedPoint convert(const FixedPointSemantics &DstSema,
+ bool *Overflow = nullptr) const;
+
+ // Perform binary operations on a fixed point type. The resulting fixed point
+ // value will be in the common, full precision semantics that can represent
+ // the precision and ranges of both input values. See convert() for an
+ // explanation of the Overflow parameter.
+ APFixedPoint add(const APFixedPoint &Other, bool *Overflow = nullptr) const;
+ APFixedPoint sub(const APFixedPoint &Other, bool *Overflow = nullptr) const;
+ APFixedPoint mul(const APFixedPoint &Other, bool *Overflow = nullptr) const;
+ APFixedPoint div(const APFixedPoint &Other, bool *Overflow = nullptr) const;
+
+ // Perform shift operations on a fixed point type. Unlike the other binary
+ // operations, the resulting fixed point value will be in the original
+ // semantic.
+ APFixedPoint shl(unsigned Amt, bool *Overflow = nullptr) const;
+ APFixedPoint shr(unsigned Amt, bool *Overflow = nullptr) const {
+ // Right shift cannot overflow.
+ if (Overflow)
+ *Overflow = false;
+ return APFixedPoint(Val >> Amt, Sema);
+ }
+
+ /// Perform a unary negation (-X) on this fixed point type, taking into
+ /// account saturation if applicable.
+ APFixedPoint negate(bool *Overflow = nullptr) const;
+
+ /// Return the integral part of this fixed point number, rounded towards
+ /// zero. (-2.5k -> -2)
+ APSInt getIntPart() const {
+ if (Val < 0 && Val != -Val) // Cover the case when we have the min val
+ return -(-Val >> getScale());
+ else
+ return Val >> getScale();
+ }
+
+ /// Return the integral part of this fixed point number, rounded towards
+ /// zero. The value is stored into an APSInt with the provided width and sign.
+ /// If the overflow parameter is provided, and the integral value is not able
+ /// to be fully stored in the provided width and sign, the overflow parameter
+ /// is set to true.
+ APSInt convertToInt(unsigned DstWidth, bool DstSign,
+ bool *Overflow = nullptr) const;
+
+ /// Convert this fixed point number to a floating point value with the
+ /// provided semantics.
+ APFloat convertToFloat(const fltSemantics &FloatSema) const;
+
+ void toString(SmallVectorImpl<char> &Str) const;
+ std::string toString() const {
+ SmallString<40> S;
+ toString(S);
+ return std::string(S.str());
+ }
+
+ // If LHS > RHS, return 1. If LHS == RHS, return 0. If LHS < RHS, return -1.
+ int compare(const APFixedPoint &Other) const;
+ bool operator==(const APFixedPoint &Other) const {
+ return compare(Other) == 0;
+ }
+ bool operator!=(const APFixedPoint &Other) const {
+ return compare(Other) != 0;
+ }
+ bool operator>(const APFixedPoint &Other) const { return compare(Other) > 0; }
+ bool operator<(const APFixedPoint &Other) const { return compare(Other) < 0; }
+ bool operator>=(const APFixedPoint &Other) const {
+ return compare(Other) >= 0;
+ }
+ bool operator<=(const APFixedPoint &Other) const {
+ return compare(Other) <= 0;
+ }
+
+ static APFixedPoint getMax(const FixedPointSemantics &Sema);
+ static APFixedPoint getMin(const FixedPointSemantics &Sema);
+
+ /// Given a floating point semantic, return the next floating point semantic
+ /// with a larger exponent and larger or equal mantissa.
+ static const fltSemantics *promoteFloatSemantics(const fltSemantics *S);
+
+ /// Create an APFixedPoint with a value equal to that of the provided integer,
+ /// and in the same semantics as the provided target semantics. If the value
+ /// is not able to fit in the specified fixed point semantics, and the
+ /// overflow parameter is provided, it is set to true.
+ static APFixedPoint getFromIntValue(const APSInt &Value,
+ const FixedPointSemantics &DstFXSema,
+ bool *Overflow = nullptr);
+
+ /// Create an APFixedPoint with a value equal to that of the provided
+ /// floating point value, in the provided target semantics. If the value is
+ /// not able to fit in the specified fixed point semantics and the overflow
+ /// parameter is specified, it is set to true.
+ /// For NaN, the Overflow flag is always set. For +inf and -inf, if the
+ /// semantic is saturating, the value saturates. Otherwise, the Overflow flag
+ /// is set.
+ static APFixedPoint getFromFloatValue(const APFloat &Value,
+ const FixedPointSemantics &DstFXSema,
+ bool *Overflow = nullptr);
+
+private:
+ APSInt Val;
+ FixedPointSemantics Sema;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const APFixedPoint &FX) {
+ OS << FX.toString();
+ return OS;
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h b/contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h
index 876e52c150a0..1f9ac22621a6 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h
@@ -249,7 +249,7 @@ public:
/// \name Constructors
/// @{
- IEEEFloat(const fltSemantics &); // Default construct to 0.0
+ IEEEFloat(const fltSemantics &); // Default construct to +0.0
IEEEFloat(const fltSemantics &, integerPart);
IEEEFloat(const fltSemantics &, uninitializedTag);
IEEEFloat(const fltSemantics &, const APInt &);
@@ -539,6 +539,9 @@ private:
roundingMode) const;
opStatus roundSignificandWithExponent(const integerPart *, unsigned int, int,
roundingMode);
+ ExponentType exponentNaN() const;
+ ExponentType exponentInf() const;
+ ExponentType exponentZero() const;
/// @}
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h b/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h
index f7df648d27ed..b97ea2cd9aee 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h
@@ -31,6 +31,7 @@ class raw_ostream;
template <typename T> class SmallVectorImpl;
template <typename T> class ArrayRef;
template <typename T> class Optional;
+template <typename T> struct DenseMapInfo;
class APInt;
@@ -96,7 +97,7 @@ private:
unsigned BitWidth; ///< The number of bits in this APInt.
- friend struct DenseMapAPIntKeyInfo;
+ friend struct DenseMapInfo<APInt>;
friend class APSInt;
@@ -764,8 +765,8 @@ public:
/// Move assignment operator.
APInt &operator=(APInt &&that) {
-#ifdef _MSC_VER
- // The MSVC std::shuffle implementation still does self-assignment.
+#ifdef EXPENSIVE_CHECKS
+ // Some std::shuffle implementations still do self-assignment.
if (this == &that)
return *this;
#endif
@@ -793,11 +794,10 @@ public:
APInt &operator=(uint64_t RHS) {
if (isSingleWord()) {
U.VAL = RHS;
- clearUnusedBits();
- } else {
- U.pVal[0] = RHS;
- memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+ return clearUnusedBits();
}
+ U.pVal[0] = RHS;
+ memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
return *this;
}
@@ -854,10 +854,9 @@ public:
APInt &operator|=(uint64_t RHS) {
if (isSingleWord()) {
U.VAL |= RHS;
- clearUnusedBits();
- } else {
- U.pVal[0] |= RHS;
+ return clearUnusedBits();
}
+ U.pVal[0] |= RHS;
return *this;
}
@@ -884,10 +883,9 @@ public:
APInt &operator^=(uint64_t RHS) {
if (isSingleWord()) {
U.VAL ^= RHS;
- clearUnusedBits();
- } else {
- U.pVal[0] ^= RHS;
+ return clearUnusedBits();
}
+ U.pVal[0] ^= RHS;
return *this;
}
@@ -1405,6 +1403,12 @@ public:
/// extended, truncated, or left alone to make it that width.
APInt zextOrTrunc(unsigned width) const;
+ /// Truncate to width
+ ///
+ /// Make this APInt have the bit width given by \p width. The value is
+ /// truncated or left alone to make it that width.
+ APInt truncOrSelf(unsigned width) const;
+
/// Sign extend or truncate to width
///
/// Make this APInt have the bit width given by \p width. The value is sign
@@ -1449,6 +1453,14 @@ public:
setBit(BitWidth - 1);
}
+ /// Set a given bit to a given value.
+ void setBitVal(unsigned BitPosition, bool BitValue) {
+ if (BitValue)
+ setBit(BitPosition);
+ else
+ clearBit(BitPosition);
+ }
+
/// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
/// This function handles "wrap" case when \p loBit >= \p hiBit, and calls
/// setBits when \p loBit < \p hiBit.
@@ -1609,11 +1621,7 @@ public:
/// returns the smallest bit width that will retain the negative value. For
/// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
/// for -1, this function will always return 1.
- unsigned getMinSignedBits() const {
- if (isNegative())
- return BitWidth - countLeadingOnes() + 1;
- return getActiveBits() + 1;
- }
+ unsigned getMinSignedBits() const { return BitWidth - getNumSignBits() + 1; }
/// Get zero extended value
///
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/APSInt.h b/contrib/llvm-project/llvm/include/llvm/ADT/APSInt.h
index 0f991826c457..82e9ba81141f 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/APSInt.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/APSInt.h
@@ -18,6 +18,7 @@
namespace llvm {
+/// An arbitrary precision integer that knows its signedness.
class LLVM_NODISCARD APSInt : public APInt {
bool IsUnsigned;
@@ -25,8 +26,7 @@ public:
/// Default constructor that creates an uninitialized APInt.
explicit APSInt() : IsUnsigned(false) {}
- /// APSInt ctor - Create an APSInt with the specified width, default to
- /// unsigned.
+ /// Create an APSInt with the specified width, default to unsigned.
explicit APSInt(uint32_t BitWidth, bool isUnsigned = true)
: APInt(BitWidth, 0), IsUnsigned(isUnsigned) {}
@@ -78,11 +78,11 @@ public:
void setIsUnsigned(bool Val) { IsUnsigned = Val; }
void setIsSigned(bool Val) { IsUnsigned = !Val; }
- /// toString - Append this APSInt to the specified SmallString.
+ /// Append this APSInt to the specified SmallString.
void toString(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
APInt::toString(Str, Radix, isSigned());
}
- /// toString - Converts an APInt to a std::string. This is an inefficient
+ /// Converts an APInt to a std::string. This is an inefficient
/// method; you should prefer passing in a SmallString instead.
std::string toString(unsigned Radix) const {
return APInt::toString(Radix, isSigned());
@@ -282,15 +282,15 @@ public:
return APSInt(~static_cast<const APInt&>(*this), IsUnsigned);
}
- /// getMaxValue - Return the APSInt representing the maximum integer value
- /// with the given bit width and signedness.
+ /// Return the APSInt representing the maximum integer value with the given
+ /// bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned) {
return APSInt(Unsigned ? APInt::getMaxValue(numBits)
: APInt::getSignedMaxValue(numBits), Unsigned);
}
- /// getMinValue - Return the APSInt representing the minimum integer value
- /// with the given bit width and signedness.
+ /// Return the APSInt representing the minimum integer value with the given
+ /// bit width and signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned) {
return APSInt(Unsigned ? APInt::getMinValue(numBits)
: APInt::getSignedMinValue(numBits), Unsigned);
@@ -331,8 +331,8 @@ public:
static APSInt get(int64_t X) { return APSInt(APInt(64, X), false); }
static APSInt getUnsigned(uint64_t X) { return APSInt(APInt(64, X), true); }
- /// Profile - Used to insert APSInt objects, or objects that contain APSInt
- /// objects, into FoldingSets.
+ /// Used to insert APSInt objects, or objects that contain APSInt objects,
+ /// into FoldingSets.
void Profile(FoldingSetNodeID& ID) const;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/AllocatorList.h b/contrib/llvm-project/llvm/include/llvm/ADT/AllocatorList.h
index 447d7a7538db..404a657f27de 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/AllocatorList.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/AllocatorList.h
@@ -118,13 +118,6 @@ private:
reference operator*() const { return base_type::wrapped()->V; }
pointer operator->() const { return &operator*(); }
-
- friend bool operator==(const IteratorImpl &L, const IteratorImpl &R) {
- return L.wrapped() == R.wrapped();
- }
- friend bool operator!=(const IteratorImpl &L, const IteratorImpl &R) {
- return !(L == R);
- }
};
public:
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Any.h b/contrib/llvm-project/llvm/include/llvm/ADT/Any.h
index 0aded628cda4..1e3abca70679 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Any.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Any.h
@@ -23,7 +23,12 @@
namespace llvm {
-class Any {
+class LLVM_EXTERNAL_VISIBILITY Any {
+
+ // The `Typeid<T>::Id` static data member below is a globally unique
+ // identifier for the type `T`. It is explicitly marked with default
+ // visibility so that when `-fvisibility=hidden` is used, the loader still
+ // merges duplicate definitions across DSO boundaries.
template <typename T> struct TypeId { static const char Id; };
struct StorageBase {
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h
index a8d0f07af94a..2a857786f454 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h
@@ -203,9 +203,10 @@ public:
return !any();
}
- /// find_first_in - Returns the index of the first set bit in the range
- /// [Begin, End). Returns -1 if all bits in the range are unset.
- int find_first_in(unsigned Begin, unsigned End) const {
+ /// find_first_in - Returns the index of the first set / unset bit,
+ /// depending on \p Set, in the range [Begin, End).
+ /// Returns -1 if all bits in the range are unset / set.
+ int find_first_in(unsigned Begin, unsigned End, bool Set = true) const {
assert(Begin <= End && End <= Size);
if (Begin == End)
return -1;
@@ -214,8 +215,14 @@ public:
unsigned LastWord = (End - 1) / BITWORD_SIZE;
// Check subsequent words.
+ // The code below is based on search for the first _set_ bit. If
+ // we're searching for the first _unset_, we just take the
+ // complement of each word before we use it and apply
+ // the same method.
for (unsigned i = FirstWord; i <= LastWord; ++i) {
BitWord Copy = Bits[i];
+ if (!Set)
+ Copy = ~Copy;
if (i == FirstWord) {
unsigned FirstBit = Begin % BITWORD_SIZE;
@@ -266,32 +273,7 @@ public:
/// find_first_unset_in - Returns the index of the first unset bit in the
/// range [Begin, End). Returns -1 if all bits in the range are set.
int find_first_unset_in(unsigned Begin, unsigned End) const {
- assert(Begin <= End && End <= Size);
- if (Begin == End)
- return -1;
-
- unsigned FirstWord = Begin / BITWORD_SIZE;
- unsigned LastWord = (End - 1) / BITWORD_SIZE;
-
- // Check subsequent words.
- for (unsigned i = FirstWord; i <= LastWord; ++i) {
- BitWord Copy = Bits[i];
-
- if (i == FirstWord) {
- unsigned FirstBit = Begin % BITWORD_SIZE;
- Copy |= maskTrailingOnes<BitWord>(FirstBit);
- }
-
- if (i == LastWord) {
- unsigned LastBit = (End - 1) % BITWORD_SIZE;
- Copy |= maskTrailingZeros<BitWord>(LastBit + 1);
- }
- if (Copy != ~BitWord(0)) {
- unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Copy);
- return Result < size() ? Result : -1;
- }
- }
- return -1;
+ return find_first_in(Begin, End, /* Set = */ false);
}
/// find_last_unset_in - Returns the index of the last unset bit in the
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h b/contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h
index 34d397cc9793..ce0b05db840c 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h
@@ -426,8 +426,8 @@ protected:
setNumEntries(other.getNumEntries());
setNumTombstones(other.getNumTombstones());
- if (is_trivially_copyable<KeyT>::value &&
- is_trivially_copyable<ValueT>::value)
+ if (std::is_trivially_copyable<KeyT>::value &&
+ std::is_trivially_copyable<ValueT>::value)
memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(),
getNumBuckets() * sizeof(BucketT));
else
@@ -954,7 +954,7 @@ public:
std::swap(*LHSB, *RHSB);
continue;
}
- // Swap separately and handle any assymetry.
+ // Swap separately and handle any asymmetry.
std::swap(LHSB->getFirst(), RHSB->getFirst());
if (hasLHSValue) {
::new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond()));
@@ -1042,7 +1042,7 @@ public:
if (Small) {
// First move the inline buckets into a temporary storage.
AlignedCharArrayUnion<BucketT[InlineBuckets]> TmpStorage;
- BucketT *TmpBegin = reinterpret_cast<BucketT *>(TmpStorage.buffer);
+ BucketT *TmpBegin = reinterpret_cast<BucketT *>(&TmpStorage);
BucketT *TmpEnd = TmpBegin;
// Loop over the buckets, moving non-empty, non-tombstones into the
@@ -1132,8 +1132,8 @@ private:
assert(Small);
// Note that this cast does not violate aliasing rules as we assert that
// the memory's dynamic type is the small, inline bucket buffer, and the
- // 'storage.buffer' static type is 'char *'.
- return reinterpret_cast<const BucketT *>(storage.buffer);
+ // 'storage' is a POD containing a char buffer.
+ return reinterpret_cast<const BucketT *>(&storage);
}
BucketT *getInlineBuckets() {
@@ -1144,7 +1144,7 @@ private:
const LargeRep *getLargeRep() const {
assert(!Small);
// Note, same rule about aliasing as with getInlineBuckets.
- return reinterpret_cast<const LargeRep *>(storage.buffer);
+ return reinterpret_cast<const LargeRep *>(&storage);
}
LargeRep *getLargeRep() {
@@ -1190,8 +1190,6 @@ class DenseMapIterator : DebugEpochBase::HandleBase {
friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, true>;
friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, false>;
- using ConstIterator = DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, true>;
-
public:
using difference_type = ptrdiff_t;
using value_type =
@@ -1244,19 +1242,18 @@ public:
return Ptr;
}
- bool operator==(const ConstIterator &RHS) const {
- assert((!Ptr || isHandleInSync()) && "handle not in sync!");
+ friend bool operator==(const DenseMapIterator &LHS,
+ const DenseMapIterator &RHS) {
+ assert((!LHS.Ptr || LHS.isHandleInSync()) && "handle not in sync!");
assert((!RHS.Ptr || RHS.isHandleInSync()) && "handle not in sync!");
- assert(getEpochAddress() == RHS.getEpochAddress() &&
+ assert(LHS.getEpochAddress() == RHS.getEpochAddress() &&
"comparing incomparable iterators!");
- return Ptr == RHS.Ptr;
+ return LHS.Ptr == RHS.Ptr;
}
- bool operator!=(const ConstIterator &RHS) const {
- assert((!Ptr || isHandleInSync()) && "handle not in sync!");
- assert((!RHS.Ptr || RHS.isHandleInSync()) && "handle not in sync!");
- assert(getEpochAddress() == RHS.getEpochAddress() &&
- "comparing incomparable iterators!");
- return Ptr != RHS.Ptr;
+
+ friend bool operator!=(const DenseMapIterator &LHS,
+ const DenseMapIterator &RHS) {
+ return !(LHS == RHS);
}
inline DenseMapIterator& operator++() { // Preincrement
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/DenseMapInfo.h b/contrib/llvm-project/llvm/include/llvm/ADT/DenseMapInfo.h
index e465331ac6f7..8271b9334b86 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -13,6 +13,8 @@
#ifndef LLVM_ADT_DENSEMAPINFO_H
#define LLVM_ADT_DENSEMAPINFO_H
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
@@ -347,6 +349,49 @@ template <> struct DenseMapInfo<hash_code> {
static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
};
+/// Provide DenseMapInfo for APInt.
+template <> struct DenseMapInfo<APInt> {
+ static inline APInt getEmptyKey() {
+ APInt V(nullptr, 0);
+ V.U.VAL = 0;
+ return V;
+ }
+
+ static inline APInt getTombstoneKey() {
+ APInt V(nullptr, 0);
+ V.U.VAL = 1;
+ return V;
+ }
+
+ static unsigned getHashValue(const APInt &Key) {
+ return static_cast<unsigned>(hash_value(Key));
+ }
+
+ static bool isEqual(const APInt &LHS, const APInt &RHS) {
+ return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
+ }
+};
+
+/// Provide DenseMapInfo for APSInt, using the DenseMapInfo for APInt.
+template <> struct DenseMapInfo<APSInt> {
+ static inline APSInt getEmptyKey() {
+ return APSInt(DenseMapInfo<APInt>::getEmptyKey());
+ }
+
+ static inline APSInt getTombstoneKey() {
+ return APSInt(DenseMapInfo<APInt>::getTombstoneKey());
+ }
+
+ static unsigned getHashValue(const APSInt &Key) {
+ return static_cast<unsigned>(hash_value(Key));
+ }
+
+ static bool isEqual(const APSInt &LHS, const APSInt &RHS) {
+ return LHS.getBitWidth() == RHS.getBitWidth() &&
+ LHS.isUnsigned() == RHS.isUnsigned() && LHS == RHS;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_ADT_DENSEMAPINFO_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/DenseSet.h b/contrib/llvm-project/llvm/include/llvm/ADT/DenseSet.h
index 07edc3d8e4ec..edce7c43773c 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/DenseSet.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/DenseSet.h
@@ -130,8 +130,12 @@ public:
Iterator& operator++() { ++I; return *this; }
Iterator operator++(int) { auto T = *this; ++I; return T; }
- bool operator==(const ConstIterator& X) const { return I == X.I; }
- bool operator!=(const ConstIterator& X) const { return I != X.I; }
+ friend bool operator==(const Iterator &X, const Iterator &Y) {
+ return X.I == Y.I;
+ }
+ friend bool operator!=(const Iterator &X, const Iterator &Y) {
+ return X.I != Y.I;
+ }
};
class ConstIterator {
@@ -155,8 +159,12 @@ public:
ConstIterator& operator++() { ++I; return *this; }
ConstIterator operator++(int) { auto T = *this; ++I; return T; }
- bool operator==(const ConstIterator& X) const { return I == X.I; }
- bool operator!=(const ConstIterator& X) const { return I != X.I; }
+ friend bool operator==(const ConstIterator &X, const ConstIterator &Y) {
+ return X.I == Y.I;
+ }
+ friend bool operator!=(const ConstIterator &X, const ConstIterator &Y) {
+ return X.I != Y.I;
+ }
};
using iterator = Iterator;
@@ -173,6 +181,11 @@ public:
return ConstIterator(TheMap.find(V));
}
+ /// Check if the set contains the given element.
+ bool contains(const_arg_type_t<ValueT> V) const {
+ return TheMap.find(V) != TheMap.end();
+ }
+
/// Alternative version of find() which allows a different, and possibly less
/// expensive, key type.
/// The DenseMapInfo is responsible for supplying methods
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/DepthFirstIterator.h b/contrib/llvm-project/llvm/include/llvm/ADT/DepthFirstIterator.h
index 11967f5eefcc..5bfea28332b2 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -198,7 +198,7 @@ public:
// nodes that a depth first iteration did not find: ie unreachable nodes.
//
bool nodeVisited(NodeRef Node) const {
- return this->Visited.count(Node) != 0;
+ return this->Visited.contains(Node);
}
/// getPathLength - Return the length of the path from the entry node to the
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h b/contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h
index cfe98e178a91..e8bb9e6b2292 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h
@@ -38,8 +38,10 @@ public:
/// Static polymorphism: delegate implementation (via isEqualTo) to the
/// derived class.
- bool operator==(const EdgeType &E) const { return getDerived().isEqualTo(E); }
- bool operator!=(const EdgeType &E) const { return !operator==(E); }
+ bool operator==(const DGEdge &E) const {
+ return getDerived().isEqualTo(E.getDerived());
+ }
+ bool operator!=(const DGEdge &E) const { return !operator==(E); }
/// Retrieve the target node this edge connects to.
const NodeType &getTargetNode() const { return TargetNode; }
@@ -91,8 +93,12 @@ public:
/// Static polymorphism: delegate implementation (via isEqualTo) to the
/// derived class.
- bool operator==(const NodeType &N) const { return getDerived().isEqualTo(N); }
- bool operator!=(const NodeType &N) const { return !operator==(N); }
+ friend bool operator==(const NodeType &M, const NodeType &N) {
+ return M.isEqualTo(N);
+ }
+ friend bool operator!=(const NodeType &M, const NodeType &N) {
+ return !(M == N);
+ }
const_iterator begin() const { return Edges.begin(); }
const_iterator end() const { return Edges.end(); }
@@ -223,7 +229,7 @@ public:
if (*Node == N)
continue;
Node->findEdgesTo(N, TempList);
- EL.insert(EL.end(), TempList.begin(), TempList.end());
+ llvm::append_range(EL, TempList);
TempList.clear();
}
return !EL.empty();
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/FloatingPointMode.h b/contrib/llvm-project/llvm/include/llvm/ADT/FloatingPointMode.h
index 3ba8ae1b2855..698830937870 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/FloatingPointMode.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/FloatingPointMode.h
@@ -44,6 +44,24 @@ enum class RoundingMode : int8_t {
Invalid = -1 ///< Denotes invalid value.
};
+/// Returns text representation of the given rounding mode.
+inline StringRef spell(RoundingMode RM) {
+ switch (RM) {
+ case RoundingMode::TowardZero: return "towardzero";
+ case RoundingMode::NearestTiesToEven: return "tonearest";
+ case RoundingMode::TowardPositive: return "upward";
+ case RoundingMode::TowardNegative: return "downward";
+ case RoundingMode::NearestTiesToAway: return "tonearestaway";
+ case RoundingMode::Dynamic: return "dynamic";
+ default: return "invalid";
+ }
+}
+
+inline raw_ostream &operator << (raw_ostream &OS, RoundingMode RM) {
+ OS << spell(RM);
+ return OS;
+}
+
/// Represent subnormal handling kind for floating point instruction inputs and
/// outputs.
struct DenormalMode {
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h b/contrib/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h
index 4c75e4d2547b..7f8fb103f148 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h
@@ -64,12 +64,12 @@ template <typename ReturnT, typename... ParamTs> class UniqueFunctionBase {
protected:
static constexpr size_t InlineStorageSize = sizeof(void *) * 3;
- // MSVC has a bug and ICEs if we give it a particular dependent value
- // expression as part of the `std::conditional` below. To work around this,
- // we build that into a template struct's constexpr bool.
- template <typename T> struct IsSizeLessThanThresholdT {
- static constexpr bool value = sizeof(T) <= (2 * sizeof(void *));
- };
+ template <typename T, class = void>
+ struct IsSizeLessThanThresholdT : std::false_type {};
+
+ template <typename T>
+ struct IsSizeLessThanThresholdT<
+ T, std::enable_if_t<sizeof(T) <= 2 * sizeof(void *)>> : std::true_type {};
// Provide a type function to map parameters that won't observe extra copies
// or moves and which are small enough to likely pass in register to values
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h b/contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h
index 9ee310c879fd..cb53b7fa7469 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h
@@ -52,6 +52,7 @@
#include <cassert>
#include <cstring>
#include <string>
+#include <tuple>
#include <utility>
namespace llvm {
@@ -112,6 +113,10 @@ template <typename T> hash_code hash_value(const T *ptr);
template <typename T, typename U>
hash_code hash_value(const std::pair<T, U> &arg);
+/// Compute a hash_code for a tuple.
+template <typename... Ts>
+hash_code hash_value(const std::tuple<Ts...> &arg);
+
/// Compute a hash_code for a standard string.
template <typename T>
hash_code hash_value(const std::basic_string<T> &arg);
@@ -645,6 +650,26 @@ hash_code hash_value(const std::pair<T, U> &arg) {
return hash_combine(arg.first, arg.second);
}
+// Implementation details for the hash_value overload for std::tuple<...>(...).
+namespace hashing {
+namespace detail {
+
+template <typename... Ts, std::size_t... Indices>
+hash_code hash_value_tuple_helper(const std::tuple<Ts...> &arg,
+ std::index_sequence<Indices...> indices) {
+ return hash_combine(std::get<Indices>(arg)...);
+}
+
+} // namespace detail
+} // namespace hashing
+
+template <typename... Ts>
+hash_code hash_value(const std::tuple<Ts...> &arg) {
+ // TODO: Use std::apply when LLVM starts using C++17.
+ return ::llvm::hashing::detail::hash_value_tuple_helper(
+ arg, typename std::index_sequence_for<Ts...>());
+}
+
// Declared and documented above, but defined here so that any of the hashing
// infrastructure is available.
template <typename T>
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/IntervalMap.h b/contrib/llvm-project/llvm/include/llvm/ADT/IntervalMap.h
index db7804d0a551..0b6c7d667807 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/IntervalMap.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/IntervalMap.h
@@ -963,8 +963,7 @@ public:
private:
// The root data is either a RootLeaf or a RootBranchData instance.
- alignas(RootLeaf) alignas(RootBranchData)
- AlignedCharArrayUnion<RootLeaf, RootBranchData> data;
+ AlignedCharArrayUnion<RootLeaf, RootBranchData> data;
// Tree height.
// 0: Leaves in root.
@@ -979,10 +978,7 @@ private:
Allocator &allocator;
/// Represent data as a node type without breaking aliasing rules.
- template <typename T>
- T &dataAs() const {
- return *bit_cast<T *>(const_cast<char *>(data.buffer));
- }
+ template <typename T> T &dataAs() const { return *bit_cast<T *>(&data); }
const RootLeaf &rootLeaf() const {
assert(!branched() && "Cannot acces leaf data in branched root");
@@ -1040,7 +1036,7 @@ private:
public:
explicit IntervalMap(Allocator &a) : height(0), rootSize(0), allocator(a) {
- assert((uintptr_t(data.buffer) & (alignof(RootLeaf) - 1)) == 0 &&
+ assert((uintptr_t(&data) & (alignof(RootLeaf) - 1)) == 0 &&
"Insufficient alignment");
new(&rootLeaf()) RootLeaf();
}
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/contrib/llvm-project/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
index 6d97fe15db8b..ca4c40db48b9 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -58,6 +58,7 @@
#include <atomic>
#include <cassert>
#include <cstddef>
+#include <memory>
namespace llvm {
@@ -70,10 +71,23 @@ namespace llvm {
template <class Derived> class RefCountedBase {
mutable unsigned RefCount = 0;
-public:
+protected:
RefCountedBase() = default;
RefCountedBase(const RefCountedBase &) {}
+ RefCountedBase &operator=(const RefCountedBase &) = delete;
+
+#ifndef NDEBUG
+ ~RefCountedBase() {
+ assert(RefCount == 0 &&
+ "Destruction occured when there are still references to this.");
+ }
+#else
+ // Default the destructor in release builds, A trivial destructor may enable
+ // better codegen.
+ ~RefCountedBase() = default;
+#endif
+public:
void Retain() const { ++RefCount; }
void Release() const {
@@ -85,10 +99,24 @@ public:
/// A thread-safe version of \c RefCountedBase.
template <class Derived> class ThreadSafeRefCountedBase {
- mutable std::atomic<int> RefCount;
+ mutable std::atomic<int> RefCount{0};
protected:
- ThreadSafeRefCountedBase() : RefCount(0) {}
+ ThreadSafeRefCountedBase() = default;
+ ThreadSafeRefCountedBase(const ThreadSafeRefCountedBase &) {}
+ ThreadSafeRefCountedBase &
+ operator=(const ThreadSafeRefCountedBase &) = delete;
+
+#ifndef NDEBUG
+ ~ThreadSafeRefCountedBase() {
+ assert(RefCount == 0 &&
+ "Destruction occured when there are still references to this.");
+ }
+#else
+ // Default the destructor in release builds, A trivial destructor may enable
+ // better codegen.
+ ~ThreadSafeRefCountedBase() = default;
+#endif
public:
void Retain() const { RefCount.fetch_add(1, std::memory_order_relaxed); }
@@ -149,6 +177,11 @@ public:
}
template <class X>
+ IntrusiveRefCntPtr(std::unique_ptr<X> S) : Obj(S.release()) {
+ retain();
+ }
+
+ template <class X>
IntrusiveRefCntPtr(const IntrusiveRefCntPtr<X> &S) : Obj(S.get()) {
retain();
}
@@ -264,6 +297,12 @@ template <class T> struct simplify_type<const IntrusiveRefCntPtr<T>> {
}
};
+/// Factory function for creating intrusive ref counted pointers.
+template <typename T, typename... Args>
+IntrusiveRefCntPtr<T> makeIntrusiveRefCnt(Args &&...A) {
+ return IntrusiveRefCntPtr<T>(new T(std::forward<Args>(A)...));
+}
+
} // end namespace llvm
#endif // LLVM_ADT_INTRUSIVEREFCNTPTR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Optional.h b/contrib/llvm-project/llvm/include/llvm/ADT/Optional.h
index c64b82352397..a285c81d1be8 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Optional.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Optional.h
@@ -15,6 +15,7 @@
#ifndef LLVM_ADT_OPTIONAL_H
#define LLVM_ADT_OPTIONAL_H
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/None.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/type_traits.h"
@@ -32,7 +33,30 @@ namespace optional_detail {
struct in_place_t {};
/// Storage for any type.
-template <typename T, bool = is_trivially_copyable<T>::value>
+//
+// The specialization condition intentionally uses
+// llvm::is_trivially_copy_constructible instead of
+// std::is_trivially_copy_constructible. GCC versions prior to 7.4 may
+// instantiate the copy constructor of `T` when
+// std::is_trivially_copy_constructible is instantiated. This causes
+// compilation to fail if we query the trivially copy constructible property of
+// a class which is not copy constructible.
+//
+// The current implementation of OptionalStorage insists that in order to use
+// the trivial specialization, the value_type must be trivially copy
+// constructible and trivially copy assignable due to =default implementations
+// of the copy/move constructor/assignment. It does not follow that this is
+// necessarily the case std::is_trivially_copyable is true (hence the expanded
+// specialization condition).
+//
+// The move constructible / assignable conditions emulate the remaining behavior
+// of std::is_trivially_copyable.
+template <typename T, bool = (llvm::is_trivially_copy_constructible<T>::value &&
+ std::is_trivially_copy_assignable<T>::value &&
+ (std::is_trivially_move_constructible<T>::value ||
+ !std::is_move_constructible<T>::value) &&
+ (std::is_trivially_move_assignable<T>::value ||
+ !std::is_move_assignable<T>::value))>
class OptionalStorage {
union {
char empty;
@@ -43,21 +67,21 @@ class OptionalStorage {
public:
~OptionalStorage() { reset(); }
- OptionalStorage() noexcept : empty(), hasVal(false) {}
+ constexpr OptionalStorage() noexcept : empty(), hasVal(false) {}
- OptionalStorage(OptionalStorage const &other) : OptionalStorage() {
+ constexpr OptionalStorage(OptionalStorage const &other) : OptionalStorage() {
if (other.hasValue()) {
emplace(other.value);
}
}
- OptionalStorage(OptionalStorage &&other) : OptionalStorage() {
+ constexpr OptionalStorage(OptionalStorage &&other) : OptionalStorage() {
if (other.hasValue()) {
emplace(std::move(other.value));
}
}
template <class... Args>
- explicit OptionalStorage(in_place_t, Args &&... args)
+ constexpr explicit OptionalStorage(in_place_t, Args &&... args)
: value(std::forward<Args>(args)...), hasVal(true) {}
void reset() noexcept {
@@ -67,13 +91,13 @@ public:
}
}
- bool hasValue() const noexcept { return hasVal; }
+ constexpr bool hasValue() const noexcept { return hasVal; }
T &getValue() LLVM_LVALUE_FUNCTION noexcept {
assert(hasVal);
return value;
}
- T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
+ constexpr T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
assert(hasVal);
return value;
}
@@ -148,16 +172,16 @@ template <typename T> class OptionalStorage<T, true> {
public:
~OptionalStorage() = default;
- OptionalStorage() noexcept : empty{} {}
+ constexpr OptionalStorage() noexcept : empty{} {}
- OptionalStorage(OptionalStorage const &other) = default;
- OptionalStorage(OptionalStorage &&other) = default;
+ constexpr OptionalStorage(OptionalStorage const &other) = default;
+ constexpr OptionalStorage(OptionalStorage &&other) = default;
OptionalStorage &operator=(OptionalStorage const &other) = default;
OptionalStorage &operator=(OptionalStorage &&other) = default;
template <class... Args>
- explicit OptionalStorage(in_place_t, Args &&... args)
+ constexpr explicit OptionalStorage(in_place_t, Args &&... args)
: value(std::forward<Args>(args)...), hasVal(true) {}
void reset() noexcept {
@@ -167,13 +191,13 @@ public:
}
}
- bool hasValue() const noexcept { return hasVal; }
+ constexpr bool hasValue() const noexcept { return hasVal; }
T &getValue() LLVM_LVALUE_FUNCTION noexcept {
assert(hasVal);
return value;
}
- T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
+ constexpr T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
assert(hasVal);
return value;
}
@@ -221,11 +245,12 @@ public:
constexpr Optional() {}
constexpr Optional(NoneType) {}
- Optional(const T &y) : Storage(optional_detail::in_place_t{}, y) {}
- Optional(const Optional &O) = default;
+ constexpr Optional(const T &y) : Storage(optional_detail::in_place_t{}, y) {}
+ constexpr Optional(const Optional &O) = default;
- Optional(T &&y) : Storage(optional_detail::in_place_t{}, std::move(y)) {}
- Optional(Optional &&O) = default;
+ constexpr Optional(T &&y)
+ : Storage(optional_detail::in_place_t{}, std::move(y)) {}
+ constexpr Optional(Optional &&O) = default;
Optional &operator=(T &&y) {
Storage = std::move(y);
@@ -238,7 +263,7 @@ public:
Storage.emplace(std::forward<ArgTypes>(Args)...);
}
- static inline Optional create(const T *y) {
+ static constexpr Optional create(const T *y) {
return y ? Optional(*y) : Optional();
}
@@ -250,16 +275,20 @@ public:
void reset() { Storage.reset(); }
- const T *getPointer() const { return &Storage.getValue(); }
+ constexpr const T *getPointer() const { return &Storage.getValue(); }
T *getPointer() { return &Storage.getValue(); }
- const T &getValue() const LLVM_LVALUE_FUNCTION { return Storage.getValue(); }
+ constexpr const T &getValue() const LLVM_LVALUE_FUNCTION {
+ return Storage.getValue();
+ }
T &getValue() LLVM_LVALUE_FUNCTION { return Storage.getValue(); }
- explicit operator bool() const { return hasValue(); }
- bool hasValue() const { return Storage.hasValue(); }
- const T *operator->() const { return getPointer(); }
+ constexpr explicit operator bool() const { return hasValue(); }
+ constexpr bool hasValue() const { return Storage.hasValue(); }
+ constexpr const T *operator->() const { return getPointer(); }
T *operator->() { return getPointer(); }
- const T &operator*() const LLVM_LVALUE_FUNCTION { return getValue(); }
+ constexpr const T &operator*() const LLVM_LVALUE_FUNCTION {
+ return getValue();
+ }
T &operator*() LLVM_LVALUE_FUNCTION { return getValue(); }
template <typename U>
@@ -294,137 +323,157 @@ public:
#endif
};
+template <class T> llvm::hash_code hash_value(const Optional<T> &O) {
+ return O ? hash_combine(true, *O) : hash_value(false);
+}
+
template <typename T, typename U>
-bool operator==(const Optional<T> &X, const Optional<U> &Y) {
+constexpr bool operator==(const Optional<T> &X, const Optional<U> &Y) {
if (X && Y)
return *X == *Y;
return X.hasValue() == Y.hasValue();
}
template <typename T, typename U>
-bool operator!=(const Optional<T> &X, const Optional<U> &Y) {
+constexpr bool operator!=(const Optional<T> &X, const Optional<U> &Y) {
return !(X == Y);
}
template <typename T, typename U>
-bool operator<(const Optional<T> &X, const Optional<U> &Y) {
+constexpr bool operator<(const Optional<T> &X, const Optional<U> &Y) {
if (X && Y)
return *X < *Y;
return X.hasValue() < Y.hasValue();
}
template <typename T, typename U>
-bool operator<=(const Optional<T> &X, const Optional<U> &Y) {
+constexpr bool operator<=(const Optional<T> &X, const Optional<U> &Y) {
return !(Y < X);
}
template <typename T, typename U>
-bool operator>(const Optional<T> &X, const Optional<U> &Y) {
+constexpr bool operator>(const Optional<T> &X, const Optional<U> &Y) {
return Y < X;
}
template <typename T, typename U>
-bool operator>=(const Optional<T> &X, const Optional<U> &Y) {
+constexpr bool operator>=(const Optional<T> &X, const Optional<U> &Y) {
return !(X < Y);
}
-template<typename T>
-bool operator==(const Optional<T> &X, NoneType) {
+template <typename T>
+constexpr bool operator==(const Optional<T> &X, NoneType) {
return !X;
}
-template<typename T>
-bool operator==(NoneType, const Optional<T> &X) {
+template <typename T>
+constexpr bool operator==(NoneType, const Optional<T> &X) {
return X == None;
}
-template<typename T>
-bool operator!=(const Optional<T> &X, NoneType) {
+template <typename T>
+constexpr bool operator!=(const Optional<T> &X, NoneType) {
return !(X == None);
}
-template<typename T>
-bool operator!=(NoneType, const Optional<T> &X) {
+template <typename T>
+constexpr bool operator!=(NoneType, const Optional<T> &X) {
return X != None;
}
-template <typename T> bool operator<(const Optional<T> &X, NoneType) {
+template <typename T> constexpr bool operator<(const Optional<T> &X, NoneType) {
return false;
}
-template <typename T> bool operator<(NoneType, const Optional<T> &X) {
+template <typename T> constexpr bool operator<(NoneType, const Optional<T> &X) {
return X.hasValue();
}
-template <typename T> bool operator<=(const Optional<T> &X, NoneType) {
+template <typename T>
+constexpr bool operator<=(const Optional<T> &X, NoneType) {
return !(None < X);
}
-template <typename T> bool operator<=(NoneType, const Optional<T> &X) {
+template <typename T>
+constexpr bool operator<=(NoneType, const Optional<T> &X) {
return !(X < None);
}
-template <typename T> bool operator>(const Optional<T> &X, NoneType) {
+template <typename T> constexpr bool operator>(const Optional<T> &X, NoneType) {
return None < X;
}
-template <typename T> bool operator>(NoneType, const Optional<T> &X) {
+template <typename T> constexpr bool operator>(NoneType, const Optional<T> &X) {
return X < None;
}
-template <typename T> bool operator>=(const Optional<T> &X, NoneType) {
+template <typename T>
+constexpr bool operator>=(const Optional<T> &X, NoneType) {
return None <= X;
}
-template <typename T> bool operator>=(NoneType, const Optional<T> &X) {
+template <typename T>
+constexpr bool operator>=(NoneType, const Optional<T> &X) {
return X <= None;
}
-template <typename T> bool operator==(const Optional<T> &X, const T &Y) {
+template <typename T>
+constexpr bool operator==(const Optional<T> &X, const T &Y) {
return X && *X == Y;
}
-template <typename T> bool operator==(const T &X, const Optional<T> &Y) {
+template <typename T>
+constexpr bool operator==(const T &X, const Optional<T> &Y) {
return Y && X == *Y;
}
-template <typename T> bool operator!=(const Optional<T> &X, const T &Y) {
+template <typename T>
+constexpr bool operator!=(const Optional<T> &X, const T &Y) {
return !(X == Y);
}
-template <typename T> bool operator!=(const T &X, const Optional<T> &Y) {
+template <typename T>
+constexpr bool operator!=(const T &X, const Optional<T> &Y) {
return !(X == Y);
}
-template <typename T> bool operator<(const Optional<T> &X, const T &Y) {
+template <typename T>
+constexpr bool operator<(const Optional<T> &X, const T &Y) {
return !X || *X < Y;
}
-template <typename T> bool operator<(const T &X, const Optional<T> &Y) {
+template <typename T>
+constexpr bool operator<(const T &X, const Optional<T> &Y) {
return Y && X < *Y;
}
-template <typename T> bool operator<=(const Optional<T> &X, const T &Y) {
+template <typename T>
+constexpr bool operator<=(const Optional<T> &X, const T &Y) {
return !(Y < X);
}
-template <typename T> bool operator<=(const T &X, const Optional<T> &Y) {
+template <typename T>
+constexpr bool operator<=(const T &X, const Optional<T> &Y) {
return !(Y < X);
}
-template <typename T> bool operator>(const Optional<T> &X, const T &Y) {
+template <typename T>
+constexpr bool operator>(const Optional<T> &X, const T &Y) {
return Y < X;
}
-template <typename T> bool operator>(const T &X, const Optional<T> &Y) {
+template <typename T>
+constexpr bool operator>(const T &X, const Optional<T> &Y) {
return Y < X;
}
-template <typename T> bool operator>=(const Optional<T> &X, const T &Y) {
+template <typename T>
+constexpr bool operator>=(const Optional<T> &X, const T &Y) {
return !(X < Y);
}
-template <typename T> bool operator>=(const T &X, const Optional<T> &Y) {
+template <typename T>
+constexpr bool operator>=(const T &X, const Optional<T> &Y) {
return !(X < Y);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h b/contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h
index 6fecff8d756f..c39691061b72 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h
@@ -93,13 +93,6 @@ namespace pointer_union_detail {
static constexpr int NumLowBitsAvailable = lowBitsAvailable<PTs...>();
};
- /// Implement assignment in terms of construction.
- template <typename Derived, typename T> struct AssignableFrom {
- Derived &operator=(T t) {
- return static_cast<Derived &>(*this) = Derived(t);
- }
- };
-
template <typename Derived, typename ValTy, int I, typename ...Types>
class PointerUnionMembers;
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h b/contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h
index 50b688b36648..63c7f48a5bd2 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h
@@ -193,9 +193,15 @@ public:
template <typename Callable>
function_ref(
Callable &&callable,
+ // This is not the copy-constructor.
std::enable_if_t<
!std::is_same<std::remove_cv_t<std::remove_reference_t<Callable>>,
- function_ref>::value> * = nullptr)
+ function_ref>::value> * = nullptr,
+ // Functor must be callable and return a suitable type.
+ std::enable_if_t<std::is_void<Ret>::value ||
+ std::is_convertible<decltype(std::declval<Callable>()(
+ std::declval<Params>()...)),
+ Ret>::value> * = nullptr)
: callback(callback_fn<typename std::remove_reference<Callable>::type>),
callable(reinterpret_cast<intptr_t>(&callable)) {}
@@ -206,15 +212,6 @@ public:
explicit operator bool() const { return callback; }
};
-// deleter - Very very very simple method that is used to invoke operator
-// delete on something. It is used like this:
-//
-// for_each(V.begin(), B.end(), deleter<Interval>);
-template <class T>
-inline void deleter(T *Ptr) {
- delete Ptr;
-}
-
//===----------------------------------------------------------------------===//
// Extra additions to <iterator>
//===----------------------------------------------------------------------===//
@@ -275,7 +272,7 @@ template <typename ContainerTy> bool hasSingleElement(ContainerTy &&C) {
/// Return a range covering \p RangeOrContainer with the first N elements
/// excluded.
-template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N) {
+template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N = 1) {
return make_range(std::next(adl_begin(RangeOrContainer), N),
adl_end(RangeOrContainer));
}
@@ -541,7 +538,7 @@ public:
early_inc_iterator_impl(WrappedIteratorT I) : BaseT(I) {}
using BaseT::operator*;
- typename BaseT::reference operator*() {
+ decltype(*std::declval<WrappedIteratorT>()) operator*() {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
assert(!IsEarlyIncremented && "Cannot dereference twice!");
IsEarlyIncremented = true;
@@ -558,12 +555,12 @@ public:
return *this;
}
- using BaseT::operator==;
- bool operator==(const early_inc_iterator_impl &RHS) const {
+ friend bool operator==(const early_inc_iterator_impl &LHS,
+ const early_inc_iterator_impl &RHS) {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
- assert(!IsEarlyIncremented && "Cannot compare after dereferencing!");
+ assert(!LHS.IsEarlyIncremented && "Cannot compare after dereferencing!");
#endif
- return BaseT::operator==(RHS);
+ return (const BaseT &)LHS == (const BaseT &)RHS;
}
};
@@ -1246,6 +1243,15 @@ public:
}
};
+/// Given a container of pairs, return a range over the first elements.
+template <typename ContainerTy> auto make_first_range(ContainerTy &&c) {
+ return llvm::map_range(
+ std::forward<ContainerTy>(c),
+ [](decltype((*std::begin(c))) elt) -> decltype((elt.first)) {
+ return elt.first;
+ });
+}
+
/// Given a container of pairs, return a range over the second elements.
template <typename ContainerTy> auto make_second_range(ContainerTy &&c) {
return llvm::map_range(
@@ -1422,7 +1428,7 @@ template <typename T>
// is trivially copyable.
using sort_trivially_copyable = conjunction<
std::is_pointer<T>,
- is_trivially_copyable<typename std::iterator_traits<T>::value_type>>;
+ std::is_trivially_copyable<typename std::iterator_traits<T>::value_type>>;
} // namespace detail
// Provide wrappers to std::sort which shuffle the elements before sorting
@@ -1471,18 +1477,19 @@ inline void sort(Container &&C, Compare Comp) {
/// which is only enabled when the operation is O(1).
template <typename R>
auto size(R &&Range,
- std::enable_if_t<std::is_same<typename std::iterator_traits<decltype(
- Range.begin())>::iterator_category,
- std::random_access_iterator_tag>::value,
- void> * = nullptr) {
+ std::enable_if_t<
+ std::is_base_of<std::random_access_iterator_tag,
+ typename std::iterator_traits<decltype(
+ Range.begin())>::iterator_category>::value,
+ void> * = nullptr) {
return std::distance(Range.begin(), Range.end());
}
/// Provide wrappers to std::for_each which take ranges instead of having to
/// pass begin/end explicitly.
-template <typename R, typename UnaryPredicate>
-UnaryPredicate for_each(R &&Range, UnaryPredicate P) {
- return std::for_each(adl_begin(Range), adl_end(Range), P);
+template <typename R, typename UnaryFunction>
+UnaryFunction for_each(R &&Range, UnaryFunction F) {
+ return std::for_each(adl_begin(Range), adl_end(Range), F);
}
/// Provide wrappers to std::all_of which take ranges instead of having to pass
@@ -1543,6 +1550,13 @@ OutputIt copy(R &&Range, OutputIt Out) {
return std::copy(adl_begin(Range), adl_end(Range), Out);
}
+/// Provide wrappers to std::move which take ranges instead of having to
+/// pass begin/end explicitly.
+template <typename R, typename OutputIt>
+OutputIt move(R &&Range, OutputIt Out) {
+ return std::move(adl_begin(Range), adl_end(Range), Out);
+}
+
/// Wrapper function around std::find to detect if an element exists
/// in a container.
template <typename R, typename E>
@@ -1577,9 +1591,9 @@ auto count_if(R &&Range, UnaryPredicate P) {
/// Wrapper function around std::transform to apply a function to a range and
/// store the result elsewhere.
-template <typename R, typename OutputIt, typename UnaryPredicate>
-OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P) {
- return std::transform(adl_begin(Range), adl_end(Range), d_first, P);
+template <typename R, typename OutputIt, typename UnaryFunction>
+OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F) {
+ return std::transform(adl_begin(Range), adl_end(Range), d_first, F);
}
/// Provide wrappers to std::partition which take ranges instead of having to
@@ -1654,6 +1668,22 @@ void erase_if(Container &C, UnaryPredicate P) {
C.erase(remove_if(C, P), C.end());
}
+/// Wrapper function to remove a value from a container:
+///
+/// C.erase(remove(C.begin(), C.end(), V), C.end());
+template <typename Container, typename ValueType>
+void erase_value(Container &C, ValueType V) {
+ C.erase(std::remove(C.begin(), C.end(), V), C.end());
+}
+
+/// Wrapper function to append a range to a container.
+///
+/// C.insert(C.end(), R.begin(), R.end());
+template <typename Container, typename Range>
+inline void append_range(Container &C, Range &&R) {
+ C.insert(C.end(), R.begin(), R.end());
+}
+
/// Given a sequence container Cont, replace the range [ContIt, ContEnd) with
/// the range [ValIt, ValEnd) (which is not from the same container).
template<typename Container, typename RandomAccessIterator>
@@ -1911,16 +1941,16 @@ decltype(auto) apply_tuple(F &&f, Tuple &&t) {
/// Return true if the sequence [Begin, End) has exactly N items. Runs in O(N)
/// time. Not meant for use with random-access iterators.
/// Can optionally take a predicate to filter lazily some items.
-template<typename IterTy,
- typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)>
+template <typename IterTy,
+ typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)>
bool hasNItems(
IterTy &&Begin, IterTy &&End, unsigned N,
Pred &&ShouldBeCounted =
[](const decltype(*std::declval<IterTy>()) &) { return true; },
std::enable_if_t<
- !std::is_same<typename std::iterator_traits<std::remove_reference_t<
- decltype(Begin)>>::iterator_category,
- std::random_access_iterator_tag>::value,
+ !std::is_base_of<std::random_access_iterator_tag,
+ typename std::iterator_traits<std::remove_reference_t<
+ decltype(Begin)>>::iterator_category>::value,
void> * = nullptr) {
for (; N; ++Begin) {
if (Begin == End)
@@ -1936,16 +1966,16 @@ bool hasNItems(
/// Return true if the sequence [Begin, End) has N or more items. Runs in O(N)
/// time. Not meant for use with random-access iterators.
/// Can optionally take a predicate to lazily filter some items.
-template<typename IterTy,
- typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)>
+template <typename IterTy,
+ typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)>
bool hasNItemsOrMore(
IterTy &&Begin, IterTy &&End, unsigned N,
Pred &&ShouldBeCounted =
[](const decltype(*std::declval<IterTy>()) &) { return true; },
std::enable_if_t<
- !std::is_same<typename std::iterator_traits<std::remove_reference_t<
- decltype(Begin)>>::iterator_category,
- std::random_access_iterator_tag>::value,
+ !std::is_base_of<std::random_access_iterator_tag,
+ typename std::iterator_traits<std::remove_reference_t<
+ decltype(Begin)>>::iterator_category>::value,
void> * = nullptr) {
for (; N; ++Begin) {
if (Begin == End)
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Sequence.h b/contrib/llvm-project/llvm/include/llvm/ADT/Sequence.h
index 8c505f2010dd..8a695d75f77a 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Sequence.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Sequence.h
@@ -42,6 +42,10 @@ public:
value_sequence_iterator(const value_sequence_iterator &) = default;
value_sequence_iterator(value_sequence_iterator &&Arg)
: Value(std::move(Arg.Value)) {}
+ value_sequence_iterator &operator=(const value_sequence_iterator &Arg) {
+ Value = Arg.Value;
+ return *this;
+ }
template <typename U, typename Enabler = decltype(ValueT(std::declval<U>()))>
value_sequence_iterator(U &&Value) : Value(std::forward<U>(Value)) {}
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SetVector.h b/contrib/llvm-project/llvm/include/llvm/ADT/SetVector.h
index 91ad72143ed3..32bcd50966cc 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SetVector.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SetVector.h
@@ -205,6 +205,11 @@ public:
return true;
}
+ /// Check if the SetVector contains the given key.
+ bool contains(const key_type &key) const {
+ return set_.find(key) != set_.end();
+ }
+
/// Count the number of elements of a given key in the SetVector.
/// \returns 0 if the element is not in the SetVector, 1 if it is.
size_type count(const key_type &key) const {
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h b/contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h
index a03fa7dd8423..0600e528ee69 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h
@@ -232,6 +232,13 @@ public:
return {Set.end()};
}
+ /// Check if the SmallSet contains the given element.
+ bool contains(const T &V) const {
+ if (isSmall())
+ return vfind(V) != Vector.end();
+ return Set.find(V) != Set.end();
+ }
+
private:
bool isSmall() const { return Set.empty(); }
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h b/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h
index cd6f2173d04f..5a56321ae492 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h
@@ -30,63 +30,56 @@ public:
/// Initialize from a StringRef.
SmallString(StringRef S) : SmallVector<char, InternalLen>(S.begin(), S.end()) {}
+ /// Initialize by concatenating a list of StringRefs.
+ SmallString(std::initializer_list<StringRef> Refs)
+ : SmallVector<char, InternalLen>() {
+ this->append(Refs);
+ }
+
/// Initialize with a range.
template<typename ItTy>
SmallString(ItTy S, ItTy E) : SmallVector<char, InternalLen>(S, E) {}
- // Note that in order to add new overloads for append & assign, we have to
- // duplicate the inherited versions so as not to inadvertently hide them.
-
/// @}
/// @name String Assignment
/// @{
- /// Assign from a repeated element.
- void assign(size_t NumElts, char Elt) {
- this->SmallVectorImpl<char>::assign(NumElts, Elt);
- }
-
- /// Assign from an iterator pair.
- template<typename in_iter>
- void assign(in_iter S, in_iter E) {
- this->clear();
- SmallVectorImpl<char>::append(S, E);
- }
+ using SmallVector<char, InternalLen>::assign;
/// Assign from a StringRef.
void assign(StringRef RHS) {
- this->clear();
- SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
+ SmallVectorImpl<char>::assign(RHS.begin(), RHS.end());
}
- /// Assign from a SmallVector.
- void assign(const SmallVectorImpl<char> &RHS) {
+ /// Assign from a list of StringRefs.
+ void assign(std::initializer_list<StringRef> Refs) {
this->clear();
- SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
+ append(Refs);
}
/// @}
/// @name String Concatenation
/// @{
- /// Append from an iterator pair.
- template<typename in_iter>
- void append(in_iter S, in_iter E) {
- SmallVectorImpl<char>::append(S, E);
- }
-
- void append(size_t NumInputs, char Elt) {
- SmallVectorImpl<char>::append(NumInputs, Elt);
- }
+ using SmallVector<char, InternalLen>::append;
/// Append from a StringRef.
void append(StringRef RHS) {
SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
}
- /// Append from a SmallVector.
- void append(const SmallVectorImpl<char> &RHS) {
- SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
+ /// Append from a list of StringRefs.
+ void append(std::initializer_list<StringRef> Refs) {
+ size_t SizeNeeded = this->size();
+ for (const StringRef &Ref : Refs)
+ SizeNeeded += Ref.size();
+ this->reserve(SizeNeeded);
+ auto CurEnd = this->end();
+ for (const StringRef &Ref : Refs) {
+ this->uninitialized_copy(Ref.begin(), Ref.end(), CurEnd);
+ CurEnd += Ref.size();
+ }
+ this->set_size(SizeNeeded);
}
/// @}
@@ -280,9 +273,9 @@ public:
}
// Extra operators.
- const SmallString &operator=(StringRef RHS) {
- this->clear();
- return *this += RHS;
+ SmallString &operator=(StringRef RHS) {
+ this->assign(RHS);
+ return *this;
}
SmallString &operator+=(StringRef RHS) {
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h b/contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h
index 3ccee3d21d48..e960b272db04 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h
@@ -14,7 +14,6 @@
#define LLVM_ADT_SMALLVECTOR_H
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -57,10 +56,15 @@ protected:
SmallVectorBase(void *FirstEl, size_t TotalCapacity)
: BeginX(FirstEl), Capacity(TotalCapacity) {}
+ /// This is a helper for \a grow() that's out of line to reduce code
+ /// duplication. This function will report a fatal error if it can't grow at
+ /// least to \p MinSize.
+ void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity);
+
/// This is an implementation of the grow() method which only works
/// on POD-like data types and is out of line to reduce code duplication.
/// This function will report a fatal error if it cannot increase capacity.
- void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize);
+ void grow_pod(void *FirstEl, size_t MinSize, size_t TSize);
public:
size_t size() const { return Size; }
@@ -90,8 +94,9 @@ using SmallVectorSizeType =
/// Figure out the offset of the first element.
template <class T, typename = void> struct SmallVectorAlignmentAndSize {
- AlignedCharArrayUnion<SmallVectorBase<SmallVectorSizeType<T>>> Base;
- AlignedCharArrayUnion<T> FirstEl;
+ alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof(
+ SmallVectorBase<SmallVectorSizeType<T>>)];
+ alignas(T) char FirstEl[sizeof(T)];
};
/// This is the part of SmallVectorTemplateBase which does not depend on whether
@@ -115,8 +120,8 @@ class SmallVectorTemplateCommon
protected:
SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}
- void grow_pod(size_t MinCapacity, size_t TSize) {
- Base::grow_pod(getFirstEl(), MinCapacity, TSize);
+ void grow_pod(size_t MinSize, size_t TSize) {
+ Base::grow_pod(getFirstEl(), MinSize, TSize);
}
/// Return true if this is a smallvector which has not had dynamic
@@ -129,6 +134,102 @@ protected:
this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
}
+ /// Return true if V is an internal reference to the given range.
+ bool isReferenceToRange(const void *V, const void *First, const void *Last) const {
+ // Use std::less to avoid UB.
+ std::less<> LessThan;
+ return !LessThan(V, First) && LessThan(V, Last);
+ }
+
+ /// Return true if V is an internal reference to this vector.
+ bool isReferenceToStorage(const void *V) const {
+ return isReferenceToRange(V, this->begin(), this->end());
+ }
+
+ /// Return true if First and Last form a valid (possibly empty) range in this
+ /// vector's storage.
+ bool isRangeInStorage(const void *First, const void *Last) const {
+ // Use std::less to avoid UB.
+ std::less<> LessThan;
+ return !LessThan(First, this->begin()) && !LessThan(Last, First) &&
+ !LessThan(this->end(), Last);
+ }
+
+ /// Return true unless Elt will be invalidated by resizing the vector to
+ /// NewSize.
+ bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
+ // Past the end.
+ if (LLVM_LIKELY(!isReferenceToStorage(Elt)))
+ return true;
+
+ // Return false if Elt will be destroyed by shrinking.
+ if (NewSize <= this->size())
+ return Elt < this->begin() + NewSize;
+
+ // Return false if we need to grow.
+ return NewSize <= this->capacity();
+ }
+
+ /// Check whether Elt will be invalidated by resizing the vector to NewSize.
+ void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
+ assert(isSafeToReferenceAfterResize(Elt, NewSize) &&
+ "Attempting to reference an element of the vector in an operation "
+ "that invalidates it");
+ }
+
+ /// Check whether Elt will be invalidated by increasing the size of the
+ /// vector by N.
+ void assertSafeToAdd(const void *Elt, size_t N = 1) {
+ this->assertSafeToReferenceAfterResize(Elt, this->size() + N);
+ }
+
+ /// Check whether any part of the range will be invalidated by clearing.
+ void assertSafeToReferenceAfterClear(const T *From, const T *To) {
+ if (From == To)
+ return;
+ this->assertSafeToReferenceAfterResize(From, 0);
+ this->assertSafeToReferenceAfterResize(To - 1, 0);
+ }
+ template <
+ class ItTy,
+ std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
+ bool> = false>
+ void assertSafeToReferenceAfterClear(ItTy, ItTy) {}
+
+ /// Check whether any part of the range will be invalidated by growing.
+ void assertSafeToAddRange(const T *From, const T *To) {
+ if (From == To)
+ return;
+ this->assertSafeToAdd(From, To - From);
+ this->assertSafeToAdd(To - 1, To - From);
+ }
+ template <
+ class ItTy,
+ std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
+ bool> = false>
+ void assertSafeToAddRange(ItTy, ItTy) {}
+
+ /// Reserve enough space to add one element, and return the updated element
+ /// pointer in case it was a reference to the storage.
+ template <class U>
+ static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt,
+ size_t N) {
+ size_t NewSize = This->size() + N;
+ if (LLVM_LIKELY(NewSize <= This->capacity()))
+ return &Elt;
+
+ bool ReferencesStorage = false;
+ int64_t Index = -1;
+ if (!U::TakesParamByValue) {
+ if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))) {
+ ReferencesStorage = true;
+ Index = &Elt - This->begin();
+ }
+ }
+ This->grow(NewSize);
+ return ReferencesStorage ? This->begin() + Index : &Elt;
+ }
+
public:
using size_type = size_t;
using difference_type = ptrdiff_t;
@@ -212,7 +313,12 @@ template <typename T, bool = (is_trivially_copy_constructible<T>::value) &&
(is_trivially_move_constructible<T>::value) &&
std::is_trivially_destructible<T>::value>
class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
+ friend class SmallVectorTemplateCommon<T>;
+
protected:
+ static constexpr bool TakesParamByValue = false;
+ using ValueParamT = const T &;
+
SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
static void destroy_range(T *S, T *E) {
@@ -242,18 +348,68 @@ protected:
/// element, or MinSize more elements if specified.
void grow(size_t MinSize = 0);
+ /// Create a new allocation big enough for \p MinSize and pass back its size
+ /// in \p NewCapacity. This is the first section of \a grow().
+ T *mallocForGrow(size_t MinSize, size_t &NewCapacity) {
+ return static_cast<T *>(
+ SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
+ MinSize, sizeof(T), NewCapacity));
+ }
+
+ /// Move existing elements over to the new allocation \p NewElts, the middle
+ /// section of \a grow().
+ void moveElementsForGrow(T *NewElts);
+
+ /// Transfer ownership of the allocation, finishing up \a grow().
+ void takeAllocationForGrow(T *NewElts, size_t NewCapacity);
+
+ /// Reserve enough space to add one element, and return the updated element
+ /// pointer in case it was a reference to the storage.
+ const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
+ return this->reserveForParamAndGetAddressImpl(this, Elt, N);
+ }
+
+ /// Reserve enough space to add one element, and return the updated element
+ /// pointer in case it was a reference to the storage.
+ T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
+ return const_cast<T *>(
+ this->reserveForParamAndGetAddressImpl(this, Elt, N));
+ }
+
+ static T &&forward_value_param(T &&V) { return std::move(V); }
+ static const T &forward_value_param(const T &V) { return V; }
+
+ void growAndAssign(size_t NumElts, const T &Elt) {
+ // Grow manually in case Elt is an internal reference.
+ size_t NewCapacity;
+ T *NewElts = mallocForGrow(NumElts, NewCapacity);
+ std::uninitialized_fill_n(NewElts, NumElts, Elt);
+ this->destroy_range(this->begin(), this->end());
+ takeAllocationForGrow(NewElts, NewCapacity);
+ this->set_size(NumElts);
+ }
+
+ template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
+ // Grow manually in case one of Args is an internal reference.
+ size_t NewCapacity;
+ T *NewElts = mallocForGrow(0, NewCapacity);
+ ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...);
+ moveElementsForGrow(NewElts);
+ takeAllocationForGrow(NewElts, NewCapacity);
+ this->set_size(this->size() + 1);
+ return this->back();
+ }
+
public:
void push_back(const T &Elt) {
- if (LLVM_UNLIKELY(this->size() >= this->capacity()))
- this->grow();
- ::new ((void*) this->end()) T(Elt);
+ const T *EltPtr = reserveForParamAndGetAddress(Elt);
+ ::new ((void *)this->end()) T(*EltPtr);
this->set_size(this->size() + 1);
}
void push_back(T &&Elt) {
- if (LLVM_UNLIKELY(this->size() >= this->capacity()))
- this->grow();
- ::new ((void*) this->end()) T(::std::move(Elt));
+ T *EltPtr = reserveForParamAndGetAddress(Elt);
+ ::new ((void *)this->end()) T(::std::move(*EltPtr));
this->set_size(this->size() + 1);
}
@@ -266,29 +422,27 @@ public:
// Define this out-of-line to dissuade the C++ compiler from inlining it.
template <typename T, bool TriviallyCopyable>
void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
- // Ensure we can fit the new capacity.
- // This is only going to be applicable when the capacity is 32 bit.
- if (MinSize > this->SizeTypeMax())
- report_bad_alloc_error("SmallVector capacity overflow during allocation");
-
- // Ensure we can meet the guarantee of space for at least one more element.
- // The above check alone will not catch the case where grow is called with a
- // default MinCapacity of 0, but the current capacity cannot be increased.
- // This is only going to be applicable when the capacity is 32 bit.
- if (this->capacity() == this->SizeTypeMax())
- report_bad_alloc_error("SmallVector capacity unable to grow");
-
- // Always grow, even from zero.
- size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2));
- NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax());
- T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T)));
+ size_t NewCapacity;
+ T *NewElts = mallocForGrow(MinSize, NewCapacity);
+ moveElementsForGrow(NewElts);
+ takeAllocationForGrow(NewElts, NewCapacity);
+}
+// Define this out-of-line to dissuade the C++ compiler from inlining it.
+template <typename T, bool TriviallyCopyable>
+void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow(
+ T *NewElts) {
// Move the elements over.
this->uninitialized_move(this->begin(), this->end(), NewElts);
// Destroy the original elements.
destroy_range(this->begin(), this->end());
+}
+// Define this out-of-line to dissuade the C++ compiler from inlining it.
+template <typename T, bool TriviallyCopyable>
+void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow(
+ T *NewElts, size_t NewCapacity) {
// If this wasn't grown from the inline copy, deallocate the old space.
if (!this->isSmall())
free(this->begin());
@@ -303,7 +457,18 @@ void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
/// skipping destruction.
template <typename T>
class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
+ friend class SmallVectorTemplateCommon<T>;
+
protected:
+ /// True if it's cheap enough to take parameters by value. Doing so avoids
+ /// overhead related to mitigations for reference invalidation.
+ static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *);
+
+ /// Either const T& or T, depending on whether it's cheap enough to take
+ /// parameters by value.
+ using ValueParamT =
+ typename std::conditional<TakesParamByValue, T, const T &>::type;
+
SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
// No need to do a destroy loop for POD's.
@@ -344,11 +509,43 @@ protected:
/// least one more element or MinSize if specified.
void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); }
+ /// Reserve enough space to add one element, and return the updated element
+ /// pointer in case it was a reference to the storage.
+ const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
+ return this->reserveForParamAndGetAddressImpl(this, Elt, N);
+ }
+
+ /// Reserve enough space to add one element, and return the updated element
+ /// pointer in case it was a reference to the storage.
+ T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
+ return const_cast<T *>(
+ this->reserveForParamAndGetAddressImpl(this, Elt, N));
+ }
+
+ /// Copy \p V or return a reference, depending on \a ValueParamT.
+ static ValueParamT forward_value_param(ValueParamT V) { return V; }
+
+ void growAndAssign(size_t NumElts, T Elt) {
+ // Elt has been copied in case it's an internal reference, side-stepping
+ // reference invalidation problems without losing the realloc optimization.
+ this->set_size(0);
+ this->grow(NumElts);
+ std::uninitialized_fill_n(this->begin(), NumElts, Elt);
+ this->set_size(NumElts);
+ }
+
+ template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
+ // Use push_back with a copy in case Args has an internal reference,
+ // side-stepping reference invalidation problems without losing the realloc
+ // optimization.
+ push_back(T(std::forward<ArgTypes>(Args)...));
+ return this->back();
+ }
+
public:
- void push_back(const T &Elt) {
- if (LLVM_UNLIKELY(this->size() >= this->capacity()))
- this->grow();
- memcpy(reinterpret_cast<void *>(this->end()), &Elt, sizeof(T));
+ void push_back(ValueParamT Elt) {
+ const T *EltPtr = reserveForParamAndGetAddress(Elt);
+ memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T));
this->set_size(this->size() + 1);
}
@@ -368,6 +565,9 @@ public:
using size_type = typename SuperClass::size_type;
protected:
+ using SmallVectorTemplateBase<T>::TakesParamByValue;
+ using ValueParamT = typename SuperClass::ValueParamT;
+
// Default ctor - Initialize to empty.
explicit SmallVectorImpl(unsigned N)
: SmallVectorTemplateBase<T>(N) {}
@@ -387,29 +587,38 @@ public:
this->Size = 0;
}
- void resize(size_type N) {
+private:
+ template <bool ForOverwrite> void resizeImpl(size_type N) {
if (N < this->size()) {
- this->destroy_range(this->begin()+N, this->end());
- this->set_size(N);
+ this->pop_back_n(this->size() - N);
} else if (N > this->size()) {
- if (this->capacity() < N)
- this->grow(N);
+ this->reserve(N);
for (auto I = this->end(), E = this->begin() + N; I != E; ++I)
- new (&*I) T();
+ if (ForOverwrite)
+ new (&*I) T;
+ else
+ new (&*I) T();
this->set_size(N);
}
}
- void resize(size_type N, const T &NV) {
+public:
+ void resize(size_type N) { resizeImpl<false>(N); }
+
+ /// Like resize, but \ref T is POD, the new values won't be initialized.
+ void resize_for_overwrite(size_type N) { resizeImpl<true>(N); }
+
+ void resize(size_type N, ValueParamT NV) {
+ if (N == this->size())
+ return;
+
if (N < this->size()) {
- this->destroy_range(this->begin()+N, this->end());
- this->set_size(N);
- } else if (N > this->size()) {
- if (this->capacity() < N)
- this->grow(N);
- std::uninitialized_fill(this->end(), this->begin()+N, NV);
- this->set_size(N);
+ this->pop_back_n(this->size() - N);
+ return;
}
+
+ // N > this->size(). Defer to append.
+ this->append(N - this->size(), NV);
}
void reserve(size_type N) {
@@ -417,6 +626,12 @@ public:
this->grow(N);
}
+ void pop_back_n(size_type NumItems) {
+ assert(this->size() >= NumItems);
+ this->destroy_range(this->end() - NumItems, this->end());
+ this->set_size(this->size() - NumItems);
+ }
+
LLVM_NODISCARD T pop_back_val() {
T Result = ::std::move(this->back());
this->pop_back();
@@ -431,20 +646,17 @@ public:
typename std::iterator_traits<in_iter>::iterator_category,
std::input_iterator_tag>::value>>
void append(in_iter in_start, in_iter in_end) {
+ this->assertSafeToAddRange(in_start, in_end);
size_type NumInputs = std::distance(in_start, in_end);
- if (NumInputs > this->capacity() - this->size())
- this->grow(this->size()+NumInputs);
-
+ this->reserve(this->size() + NumInputs);
this->uninitialized_copy(in_start, in_end, this->end());
this->set_size(this->size() + NumInputs);
}
/// Append \p NumInputs copies of \p Elt to the end.
- void append(size_type NumInputs, const T &Elt) {
- if (NumInputs > this->capacity() - this->size())
- this->grow(this->size()+NumInputs);
-
- std::uninitialized_fill_n(this->end(), NumInputs, Elt);
+ void append(size_type NumInputs, ValueParamT Elt) {
+ const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs);
+ std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr);
this->set_size(this->size() + NumInputs);
}
@@ -452,22 +664,33 @@ public:
append(IL.begin(), IL.end());
}
- // FIXME: Consider assigning over existing elements, rather than clearing &
- // re-initializing them - for all assign(...) variants.
+ void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); }
- void assign(size_type NumElts, const T &Elt) {
- clear();
- if (this->capacity() < NumElts)
- this->grow(NumElts);
+ void assign(size_type NumElts, ValueParamT Elt) {
+ // Note that Elt could be an internal reference.
+ if (NumElts > this->capacity()) {
+ this->growAndAssign(NumElts, Elt);
+ return;
+ }
+
+ // Assign over existing elements.
+ std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
+ if (NumElts > this->size())
+ std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt);
+ else if (NumElts < this->size())
+ this->destroy_range(this->begin() + NumElts, this->end());
this->set_size(NumElts);
- std::uninitialized_fill(this->begin(), this->end(), Elt);
}
+ // FIXME: Consider assigning over existing elements, rather than clearing &
+ // re-initializing them - for all assign(...) variants.
+
template <typename in_iter,
typename = std::enable_if_t<std::is_convertible<
typename std::iterator_traits<in_iter>::iterator_category,
std::input_iterator_tag>::value>>
void assign(in_iter in_start, in_iter in_end) {
+ this->assertSafeToReferenceAfterClear(in_start, in_end);
clear();
append(in_start, in_end);
}
@@ -477,12 +700,13 @@ public:
append(IL);
}
+ void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); }
+
iterator erase(const_iterator CI) {
// Just cast away constness because this is a non-const member function.
iterator I = const_cast<iterator>(CI);
- assert(I >= this->begin() && "Iterator to erase is out of bounds.");
- assert(I < this->end() && "Erasing at past-the-end iterator.");
+ assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds.");
iterator N = I;
// Shift all elts down one.
@@ -497,9 +721,7 @@ public:
iterator S = const_cast<iterator>(CS);
iterator E = const_cast<iterator>(CE);
- assert(S >= this->begin() && "Range to erase is out of bounds.");
- assert(S <= E && "Trying to erase invalid range.");
- assert(E <= this->end() && "Trying to erase past the end.");
+ assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds.");
iterator N = S;
// Shift all elts down.
@@ -510,20 +732,26 @@ public:
return(N);
}
- iterator insert(iterator I, T &&Elt) {
+private:
+ template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) {
+ // Callers ensure that ArgType is derived from T.
+ static_assert(
+ std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>,
+ T>::value,
+ "ArgType must be derived from T!");
+
if (I == this->end()) { // Important special case for empty vector.
- this->push_back(::std::move(Elt));
+ this->push_back(::std::forward<ArgType>(Elt));
return this->end()-1;
}
- assert(I >= this->begin() && "Insertion iterator is out of bounds.");
- assert(I <= this->end() && "Inserting past the end of the vector.");
+ assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.");
- if (this->size() >= this->capacity()) {
- size_t EltNo = I-this->begin();
- this->grow();
- I = this->begin()+EltNo;
- }
+ // Grow if necessary.
+ size_t Index = I - this->begin();
+ std::remove_reference_t<ArgType> *EltPtr =
+ this->reserveForParamAndGetAddress(Elt);
+ I = this->begin() + Index;
::new ((void*) this->end()) T(::std::move(this->back()));
// Push everything else over.
@@ -531,45 +759,26 @@ public:
this->set_size(this->size() + 1);
// If we just moved the element we're inserting, be sure to update
- // the reference.
- T *EltPtr = &Elt;
- if (I <= EltPtr && EltPtr < this->end())
+ // the reference (never happens if TakesParamByValue).
+ static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value,
+ "ArgType must be 'T' when taking by value!");
+ if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end()))
++EltPtr;
- *I = ::std::move(*EltPtr);
+ *I = ::std::forward<ArgType>(*EltPtr);
return I;
}
- iterator insert(iterator I, const T &Elt) {
- if (I == this->end()) { // Important special case for empty vector.
- this->push_back(Elt);
- return this->end()-1;
- }
-
- assert(I >= this->begin() && "Insertion iterator is out of bounds.");
- assert(I <= this->end() && "Inserting past the end of the vector.");
-
- if (this->size() >= this->capacity()) {
- size_t EltNo = I-this->begin();
- this->grow();
- I = this->begin()+EltNo;
- }
- ::new ((void*) this->end()) T(std::move(this->back()));
- // Push everything else over.
- std::move_backward(I, this->end()-1, this->end());
- this->set_size(this->size() + 1);
-
- // If we just moved the element we're inserting, be sure to update
- // the reference.
- const T *EltPtr = &Elt;
- if (I <= EltPtr && EltPtr < this->end())
- ++EltPtr;
+public:
+ iterator insert(iterator I, T &&Elt) {
+ return insert_one_impl(I, this->forward_value_param(std::move(Elt)));
+ }
- *I = *EltPtr;
- return I;
+ iterator insert(iterator I, const T &Elt) {
+ return insert_one_impl(I, this->forward_value_param(Elt));
}
- iterator insert(iterator I, size_type NumToInsert, const T &Elt) {
+ iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) {
// Convert iterator to elt# to avoid invalidating iterator when we reserve()
size_t InsertElt = I - this->begin();
@@ -578,11 +787,11 @@ public:
return this->begin()+InsertElt;
}
- assert(I >= this->begin() && "Insertion iterator is out of bounds.");
- assert(I <= this->end() && "Inserting past the end of the vector.");
+ assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.");
- // Ensure there is enough space.
- reserve(this->size() + NumToInsert);
+ // Ensure there is enough space, and get the (maybe updated) address of
+ // Elt.
+ const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert);
// Uninvalidate the iterator.
I = this->begin()+InsertElt;
@@ -599,7 +808,12 @@ public:
// Copy the existing elements that get replaced.
std::move_backward(I, OldEnd-NumToInsert, OldEnd);
- std::fill_n(I, NumToInsert, Elt);
+ // If we just moved the element we're inserting, be sure to update
+ // the reference (never happens if TakesParamByValue).
+ if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
+ EltPtr += NumToInsert;
+
+ std::fill_n(I, NumToInsert, *EltPtr);
return I;
}
@@ -612,11 +826,16 @@ public:
size_t NumOverwritten = OldEnd-I;
this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
+ // If we just moved the element we're inserting, be sure to update
+ // the reference (never happens if TakesParamByValue).
+ if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
+ EltPtr += NumToInsert;
+
// Replace the overwritten part.
- std::fill_n(I, NumOverwritten, Elt);
+ std::fill_n(I, NumOverwritten, *EltPtr);
// Insert the non-overwritten middle part.
- std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt);
+ std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr);
return I;
}
@@ -633,8 +852,10 @@ public:
return this->begin()+InsertElt;
}
- assert(I >= this->begin() && "Insertion iterator is out of bounds.");
- assert(I <= this->end() && "Inserting past the end of the vector.");
+ assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.");
+
+ // Check that the reserve that follows doesn't invalidate the iterators.
+ this->assertSafeToAddRange(From, To);
size_t NumToInsert = std::distance(From, To);
@@ -686,7 +907,8 @@ public:
template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
if (LLVM_UNLIKELY(this->size() >= this->capacity()))
- this->grow();
+ return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...);
+
::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
this->set_size(this->size() + 1);
return this->back();
@@ -721,10 +943,8 @@ void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
std::swap(this->Capacity, RHS.Capacity);
return;
}
- if (RHS.size() > this->capacity())
- this->grow(RHS.size());
- if (this->size() > RHS.capacity())
- RHS.grow(this->size());
+ this->reserve(RHS.size());
+ RHS.reserve(this->size());
// Swap the shared elements.
size_t NumShared = this->size();
@@ -779,8 +999,7 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::
// FIXME: don't do this if they're efficiently moveable.
if (this->capacity() < RHSSize) {
// Destroy current elements.
- this->destroy_range(this->begin(), this->end());
- this->set_size(0);
+ this->clear();
CurSize = 0;
this->grow(RHSSize);
} else if (CurSize) {
@@ -839,8 +1058,7 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
// elements.
if (this->capacity() < RHSSize) {
// Destroy current elements.
- this->destroy_range(this->begin(), this->end());
- this->set_size(0);
+ this->clear();
CurSize = 0;
this->grow(RHSSize);
} else if (CurSize) {
@@ -863,13 +1081,71 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
/// to avoid allocating unnecessary storage.
template <typename T, unsigned N>
struct SmallVectorStorage {
- AlignedCharArrayUnion<T> InlineElts[N];
+ alignas(T) char InlineElts[N * sizeof(T)];
};
/// We need the storage to be properly aligned even for small-size of 0 so that
/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is
/// well-defined.
-template <typename T> struct alignas(alignof(T)) SmallVectorStorage<T, 0> {};
+template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {};
+
+/// Forward declaration of SmallVector so that
+/// calculateSmallVectorDefaultInlinedElements can reference
+/// `sizeof(SmallVector<T, 0>)`.
+template <typename T, unsigned N> class LLVM_GSL_OWNER SmallVector;
+
+/// Helper class for calculating the default number of inline elements for
+/// `SmallVector<T>`.
+///
+/// This should be migrated to a constexpr function when our minimum
+/// compiler support is enough for multi-statement constexpr functions.
+template <typename T> struct CalculateSmallVectorDefaultInlinedElements {
+ // Parameter controlling the default number of inlined elements
+ // for `SmallVector<T>`.
+ //
+ // The default number of inlined elements ensures that
+ // 1. There is at least one inlined element.
+ // 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless
+ // it contradicts 1.
+ static constexpr size_t kPreferredSmallVectorSizeof = 64;
+
+ // static_assert that sizeof(T) is not "too big".
+ //
+ // Because our policy guarantees at least one inlined element, it is possible
+ // for an arbitrarily large inlined element to allocate an arbitrarily large
+ // amount of inline storage. We generally consider it an antipattern for a
+ // SmallVector to allocate an excessive amount of inline storage, so we want
+ // to call attention to these cases and make sure that users are making an
+ // intentional decision if they request a lot of inline storage.
+ //
+ // We want this assertion to trigger in pathological cases, but otherwise
+ // not be too easy to hit. To accomplish that, the cutoff is actually somewhat
+ // larger than kPreferredSmallVectorSizeof (otherwise,
+ // `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that
+ // pattern seems useful in practice).
+ //
+ // One wrinkle is that this assertion is in theory non-portable, since
+ // sizeof(T) is in general platform-dependent. However, we don't expect this
+ // to be much of an issue, because most LLVM development happens on 64-bit
+ // hosts, and therefore sizeof(T) is expected to *decrease* when compiled for
+ // 32-bit hosts, dodging the issue. The reverse situation, where development
+ // happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a
+ // 64-bit host, is expected to be very rare.
+ static_assert(
+ sizeof(T) <= 256,
+ "You are trying to use a default number of inlined elements for "
+ "`SmallVector<T>` but `sizeof(T)` is really big! Please use an "
+ "explicit number of inlined elements with `SmallVector<T, N>` to make "
+ "sure you really want that much inline storage.");
+
+ // Discount the size of the header itself when calculating the maximum inline
+ // bytes.
+ static constexpr size_t PreferredInlineBytes =
+ kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>);
+ static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T);
+ static constexpr size_t value =
+ NumElementsThatFit == 0 ? 1 : NumElementsThatFit;
+};
/// This is a 'vector' (really, a variable-sized array), optimized
/// for the case when the array is small. It contains some number of elements
@@ -877,9 +1153,18 @@ template <typename T> struct alignas(alignof(T)) SmallVectorStorage<T, 0> {};
/// elements is below that threshold. This allows normal "small" cases to be
/// fast without losing generality for large inputs.
///
-/// Note that this does not attempt to be exception safe.
+/// \note
+/// In the absence of a well-motivated choice for the number of inlined
+/// elements \p N, it is recommended to use \c SmallVector<T> (that is,
+/// omitting the \p N). This will choose a default number of inlined elements
+/// reasonable for allocation on the stack (for example, trying to keep \c
+/// sizeof(SmallVector<T>) around 64 bytes).
///
-template <typename T, unsigned N>
+/// \warning This does not attempt to be exception safe.
+///
+/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h
+template <typename T,
+ unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value>
class LLVM_GSL_OWNER SmallVector : public SmallVectorImpl<T>,
SmallVectorStorage<T, N> {
public:
@@ -918,7 +1203,7 @@ public:
SmallVectorImpl<T>::operator=(RHS);
}
- const SmallVector &operator=(const SmallVector &RHS) {
+ SmallVector &operator=(const SmallVector &RHS) {
SmallVectorImpl<T>::operator=(RHS);
return *this;
}
@@ -933,17 +1218,17 @@ public:
SmallVectorImpl<T>::operator=(::std::move(RHS));
}
- const SmallVector &operator=(SmallVector &&RHS) {
+ SmallVector &operator=(SmallVector &&RHS) {
SmallVectorImpl<T>::operator=(::std::move(RHS));
return *this;
}
- const SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
+ SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
SmallVectorImpl<T>::operator=(::std::move(RHS));
return *this;
}
- const SmallVector &operator=(std::initializer_list<T> IL) {
+ SmallVector &operator=(std::initializer_list<T> IL) {
this->assign(IL);
return *this;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SparseSet.h b/contrib/llvm-project/llvm/include/llvm/ADT/SparseSet.h
index 74457d5fd679..d8acf1ee2f3a 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SparseSet.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SparseSet.h
@@ -229,12 +229,15 @@ public:
return const_cast<SparseSet*>(this)->findIndex(KeyIndexOf(Key));
}
+ /// Check if the set contains the given \c Key.
+ ///
+ /// @param Key A valid key to find.
+ bool contains(const KeyT &Key) const { return find(Key) == end() ? 0 : 1; }
+
/// count - Returns 1 if this set contains an element identified by Key,
/// 0 otherwise.
///
- size_type count(const KeyT &Key) const {
- return find(Key) == end() ? 0 : 1;
- }
+ size_type count(const KeyT &Key) const { return contains(Key) ? 1 : 0; }
/// insert - Attempts to insert a new element.
///
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h b/contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h
index d7aff6c5939a..aa338ccff19a 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h
@@ -36,6 +36,8 @@
// configure time.
#if !defined(NDEBUG) || LLVM_FORCE_ENABLE_STATS
#define LLVM_ENABLE_STATS 1
+#else
+#define LLVM_ENABLE_STATS 0
#endif
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/StringExtras.h b/contrib/llvm-project/llvm/include/llvm/ADT/StringExtras.h
index 990a3054a9d2..68e89508cba9 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/StringExtras.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/StringExtras.h
@@ -66,17 +66,29 @@ inline ArrayRef<uint8_t> arrayRefFromStringRef(StringRef Input) {
///
/// If \p C is not a valid hex digit, -1U is returned.
inline unsigned hexDigitValue(char C) {
- if (C >= '0' && C <= '9') return C-'0';
- if (C >= 'a' && C <= 'f') return C-'a'+10U;
- if (C >= 'A' && C <= 'F') return C-'A'+10U;
- return -1U;
+ struct HexTable {
+ unsigned LUT[255] = {};
+ constexpr HexTable() {
+ // Default initialize everything to invalid.
+ for (int i = 0; i < 255; ++i)
+ LUT[i] = ~0U;
+ // Initialize `0`-`9`.
+ for (int i = 0; i < 10; ++i)
+ LUT['0' + i] = i;
+ // Initialize `A`-`F` and `a`-`f`.
+ for (int i = 0; i < 6; ++i)
+ LUT['A' + i] = LUT['a' + i] = 10 + i;
+ }
+ };
+ constexpr HexTable Table;
+ return Table.LUT[static_cast<unsigned char>(C)];
}
/// Checks if character \p C is one of the 10 decimal digits.
inline bool isDigit(char C) { return C >= '0' && C <= '9'; }
/// Checks if character \p C is a hexadecimal numeric character.
-inline bool isHexDigit(char C) { return hexDigitValue(C) != -1U; }
+inline bool isHexDigit(char C) { return hexDigitValue(C) != ~0U; }
/// Checks if character \p C is a valid letter as classified by "C" locale.
inline bool isAlpha(char C) {
@@ -165,34 +177,68 @@ inline std::string toHex(ArrayRef<uint8_t> Input, bool LowerCase = false) {
return toHex(toStringRef(Input), LowerCase);
}
-inline uint8_t hexFromNibbles(char MSB, char LSB) {
+/// Store the binary representation of the two provided values, \p MSB and
+/// \p LSB, that make up the nibbles of a hexadecimal digit. If \p MSB or \p LSB
+/// do not correspond to proper nibbles of a hexadecimal digit, this method
+/// returns false. Otherwise, returns true.
+inline bool tryGetHexFromNibbles(char MSB, char LSB, uint8_t &Hex) {
unsigned U1 = hexDigitValue(MSB);
unsigned U2 = hexDigitValue(LSB);
- assert(U1 != -1U && U2 != -1U);
+ if (U1 == ~0U || U2 == ~0U)
+ return false;
- return static_cast<uint8_t>((U1 << 4) | U2);
+ Hex = static_cast<uint8_t>((U1 << 4) | U2);
+ return true;
}
-/// Convert hexadecimal string \p Input to its binary representation.
-/// The return string is half the size of \p Input.
-inline std::string fromHex(StringRef Input) {
+/// Return the binary representation of the two provided values, \p MSB and
+/// \p LSB, that make up the nibbles of a hexadecimal digit.
+inline uint8_t hexFromNibbles(char MSB, char LSB) {
+ uint8_t Hex = 0;
+ bool GotHex = tryGetHexFromNibbles(MSB, LSB, Hex);
+ (void)GotHex;
+ assert(GotHex && "MSB and/or LSB do not correspond to hex digits");
+ return Hex;
+}
+
+/// Convert hexadecimal string \p Input to its binary representation and store
+/// the result in \p Output. Returns true if the binary representation could be
+/// converted from the hexadecimal string. Returns false if \p Input contains
+/// non-hexadecimal digits. The output string is half the size of \p Input.
+inline bool tryGetFromHex(StringRef Input, std::string &Output) {
if (Input.empty())
- return std::string();
+ return true;
- std::string Output;
Output.reserve((Input.size() + 1) / 2);
if (Input.size() % 2 == 1) {
- Output.push_back(hexFromNibbles('0', Input.front()));
+ uint8_t Hex = 0;
+ if (!tryGetHexFromNibbles('0', Input.front(), Hex))
+ return false;
+
+ Output.push_back(Hex);
Input = Input.drop_front();
}
assert(Input.size() % 2 == 0);
while (!Input.empty()) {
- uint8_t Hex = hexFromNibbles(Input[0], Input[1]);
+ uint8_t Hex = 0;
+ if (!tryGetHexFromNibbles(Input[0], Input[1], Hex))
+ return false;
+
Output.push_back(Hex);
Input = Input.drop_front(2);
}
- return Output;
+ return true;
+}
+
+/// Convert hexadecimal string \p Input to its binary representation.
+/// The return string is half the size of \p Input.
+inline std::string fromHex(StringRef Input) {
+ std::string Hex;
+ bool GotHex = tryGetFromHex(Input, Hex);
+ (void)GotHex;
+ assert(GotHex && "Input contains non hex digits");
+ return Hex;
}
/// Convert the string \p S to an integer of the specified type using
@@ -245,7 +291,7 @@ inline std::string utostr(uint64_t X, bool isNeg = false) {
inline std::string itostr(int64_t X) {
if (X < 0)
- return utostr(-static_cast<uint64_t>(X), true);
+ return utostr(static_cast<uint64_t>(1) + ~static_cast<uint64_t>(X), true);
else
return utostr(static_cast<uint64_t>(X));
}
@@ -338,13 +384,16 @@ inline std::string join_impl(IteratorT Begin, IteratorT End,
size_t Len = (std::distance(Begin, End) - 1) * Separator.size();
for (IteratorT I = Begin; I != End; ++I)
- Len += (*Begin).size();
+ Len += (*I).size();
S.reserve(Len);
+ size_t PrevCapacity = S.capacity();
+ (void)PrevCapacity;
S += (*Begin);
while (++Begin != End) {
S += Separator;
S += (*Begin);
}
+ assert(PrevCapacity == S.capacity() && "String grew during building");
return S;
}
@@ -416,6 +465,30 @@ inline std::string join_items(Sep Separator, Args &&... Items) {
return Result;
}
+/// A helper class to return the specified delimiter string after the first
+/// invocation of operator StringRef(). Used to generate a comma-separated
+/// list from a loop like so:
+///
+/// \code
+/// ListSeparator LS;
+/// for (auto &I : C)
+/// OS << LS << I.getName();
+/// \end
+class ListSeparator {
+ bool First = true;
+ StringRef Separator;
+
+public:
+ ListSeparator(StringRef Separator = ", ") : Separator(Separator) {}
+ operator StringRef() {
+ if (First) {
+ First = false;
+ return {};
+ }
+ return Separator;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_ADT_STRINGEXTRAS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/StringMap.h b/contrib/llvm-project/llvm/include/llvm/ADT/StringMap.h
index 840f328db796..a82afc9a817c 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/StringMap.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/StringMap.h
@@ -78,10 +78,12 @@ protected:
void init(unsigned Size);
public:
+ static constexpr uintptr_t TombstoneIntVal =
+ static_cast<uintptr_t>(-1)
+ << PointerLikeTypeTraits<StringMapEntryBase *>::NumLowBitsAvailable;
+
static StringMapEntryBase *getTombstoneVal() {
- uintptr_t Val = static_cast<uintptr_t>(-1);
- Val <<= PointerLikeTypeTraits<StringMapEntryBase *>::NumLowBitsAvailable;
- return reinterpret_cast<StringMapEntryBase *>(Val);
+ return reinterpret_cast<StringMapEntryBase *>(TombstoneIntVal);
}
unsigned getNumBuckets() const { return NumBuckets; }
@@ -387,7 +389,9 @@ public:
return static_cast<DerivedTy &>(*this);
}
- bool operator==(const DerivedTy &RHS) const { return Ptr == RHS.Ptr; }
+ friend bool operator==(const DerivedTy &LHS, const DerivedTy &RHS) {
+ return LHS.Ptr == RHS.Ptr;
+ }
DerivedTy &operator++() { // Preincrement
++Ptr;
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/StringSet.h b/contrib/llvm-project/llvm/include/llvm/ADT/StringSet.h
index 63d929399a4e..c4245175544b 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/StringSet.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/StringSet.h
@@ -45,6 +45,9 @@ public:
insert(const StringMapEntry<ValueTy> &mapEntry) {
return insert(mapEntry.getKey());
}
+
+ /// Check if the set contains the given \c key.
+ bool contains(StringRef key) const { return Base::FindKey(key) != -1; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h b/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h
index 6bad18f19244..eed315c929ad 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h
@@ -56,6 +56,7 @@ public:
avr, // AVR: Atmel AVR microcontroller
bpfel, // eBPF or extended BPF or 64-bit BPF (little endian)
bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian)
+ csky, // CSKY: csky
hexagon, // Hexagon: hexagon
mips, // MIPS: mips, mipsallegrex, mipsr6
mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el
@@ -63,6 +64,7 @@ public:
mips64el, // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el
msp430, // MSP430: msp430
ppc, // PPC: powerpc
+ ppcle, // PPCLE: powerpc (little endian)
ppc64, // PPC64: powerpc64, ppu
ppc64le, // PPC64LE: powerpc64le
r600, // R600: AMD GPUs HD2XXX - HD6XXX
@@ -103,6 +105,7 @@ public:
enum SubArchType {
NoSubArch,
+ ARMSubArch_v8_7a,
ARMSubArch_v8_6a,
ARMSubArch_v8_5a,
ARMSubArch_v8_4a,
@@ -128,6 +131,8 @@ public:
ARMSubArch_v5te,
ARMSubArch_v4t,
+ AArch64SubArch_arm64e,
+
KalimbaSubArch_v3,
KalimbaSubArch_v4,
KalimbaSubArch_v5,
@@ -142,8 +147,6 @@ public:
Apple,
PC,
SCEI,
- BGP,
- BGQ,
Freescale,
IBM,
ImaginationTechnologies,
@@ -175,11 +178,11 @@ public:
OpenBSD,
Solaris,
Win32,
+ ZOS,
Haiku,
Minix,
RTEMS,
NaCl, // Native Client
- CNK, // BG/P Compute-Node Kernel
AIX,
CUDA, // NVIDIA CUDA
NVCL, // NVIDIA OpenCL
@@ -206,6 +209,7 @@ public:
GNUEABI,
GNUEABIHF,
GNUX32,
+ GNUILP32,
CODE16,
EABI,
EABIHF,
@@ -227,6 +231,7 @@ public:
COFF,
ELF,
+ GOFF,
MachO,
Wasm,
XCOFF,
@@ -471,6 +476,8 @@ public:
return getSubArch() == Triple::ARMSubArch_v7k;
}
+ bool isOSzOS() const { return getOS() == Triple::ZOS; }
+
/// isOSDarwin - Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
bool isOSDarwin() const {
return isMacOSX() || isiOS() || isWatchOS();
@@ -484,6 +491,12 @@ public:
return getEnvironment() == Triple::MacABI;
}
+ /// Returns true for targets that run on a macOS machine.
+ bool isTargetMachineMac() const {
+ return isMacOSX() || (isOSDarwin() && (isSimulatorEnvironment() ||
+ isMacCatalystEnvironment()));
+ }
+
bool isOSNetBSD() const {
return getOS() == Triple::NetBSD;
}
@@ -623,6 +636,9 @@ public:
return getObjectFormat() == Triple::COFF;
}
+ /// Tests whether the OS uses the GOFF binary format.
+ bool isOSBinFormatGOFF() const { return getObjectFormat() == Triple::GOFF; }
+
/// Tests whether the environment is MachO.
bool isOSBinFormatMachO() const {
return getObjectFormat() == Triple::MachO;
@@ -703,7 +719,20 @@ public:
/// Tests whether the target is AArch64 (little and big endian).
bool isAArch64() const {
- return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be;
+ return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be ||
+ getArch() == Triple::aarch64_32;
+ }
+
+ /// Tests whether the target is AArch64 and pointers are the size specified by
+ /// \p PointerWidth.
+ bool isAArch64(int PointerWidth) const {
+ assert(PointerWidth == 64 || PointerWidth == 32);
+ if (!isAArch64())
+ return false;
+ return getArch() == Triple::aarch64_32 ||
+ getEnvironment() == Triple::GNUILP32
+ ? PointerWidth == 32
+ : PointerWidth == 64;
}
/// Tests whether the target is MIPS 32-bit (little and big endian).
@@ -721,6 +750,17 @@ public:
return isMIPS32() || isMIPS64();
}
+ /// Tests whether the target is PowerPC (32- or 64-bit LE or BE).
+ bool isPPC() const {
+ return getArch() == Triple::ppc || getArch() == Triple::ppc64 ||
+ getArch() == Triple::ppcle || getArch() == Triple::ppc64le;
+ }
+
+ /// Tests whether the target is 32-bit PowerPC (little and big endian).
+ bool isPPC32() const {
+ return getArch() == Triple::ppc || getArch() == Triple::ppcle;
+ }
+
/// Tests whether the target is 64-bit PowerPC (little and big endian).
bool isPPC64() const {
return getArch() == Triple::ppc64 || getArch() == Triple::ppc64le;
@@ -751,6 +791,17 @@ public:
return getArch() == Triple::wasm32 || getArch() == Triple::wasm64;
}
+ // Tests whether the target is CSKY
+ bool isCSKY() const {
+ return getArch() == Triple::csky;
+ }
+
+ /// Tests whether the target is the Apple "arm64e" AArch64 subarch.
+ bool isArm64e() const {
+ return getArch() == Triple::aarch64 &&
+ getSubArch() == Triple::AArch64SubArch_arm64e;
+ }
+
/// Tests whether the target supports comdat
bool supportsCOMDAT() const {
return !(isOSBinFormatMachO() || isOSBinFormatXCOFF());
@@ -761,6 +812,14 @@ public:
return isAndroid() || isOSOpenBSD() || isWindowsCygwinEnvironment();
}
+ /// Tests whether the target uses -data-sections as default.
+ bool hasDefaultDataSections() const {
+ return isOSBinFormatXCOFF() || isWasm();
+ }
+
+ /// Tests if the environment supports dllimport/export annotations.
+ bool hasDLLImportExport() const { return isOSWindows() || isPS4CPU(); }
+
/// @}
/// @name Mutators
/// @{
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/iterator.h b/contrib/llvm-project/llvm/include/llvm/ADT/iterator.h
index 9a1f6e1511e7..6625a3f6179e 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/iterator.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/iterator.h
@@ -142,28 +142,30 @@ public:
return tmp;
}
+#ifndef __cpp_impl_three_way_comparison
bool operator!=(const DerivedT &RHS) const {
- return !static_cast<const DerivedT *>(this)->operator==(RHS);
+ return !(static_cast<const DerivedT &>(*this) == RHS);
}
+#endif
bool operator>(const DerivedT &RHS) const {
static_assert(
IsRandomAccess,
"Relational operators are only defined for random access iterators.");
- return !static_cast<const DerivedT *>(this)->operator<(RHS) &&
- !static_cast<const DerivedT *>(this)->operator==(RHS);
+ return !(static_cast<const DerivedT &>(*this) < RHS) &&
+ !(static_cast<const DerivedT &>(*this) == RHS);
}
bool operator<=(const DerivedT &RHS) const {
static_assert(
IsRandomAccess,
"Relational operators are only defined for random access iterators.");
- return !static_cast<const DerivedT *>(this)->operator>(RHS);
+ return !(static_cast<const DerivedT &>(*this) > RHS);
}
bool operator>=(const DerivedT &RHS) const {
static_assert(
IsRandomAccess,
"Relational operators are only defined for random access iterators.");
- return !static_cast<const DerivedT *>(this)->operator<(RHS);
+ return !(static_cast<const DerivedT &>(*this) < RHS);
}
PointerT operator->() { return &static_cast<DerivedT *>(this)->operator*(); }
@@ -260,12 +262,16 @@ public:
return *static_cast<DerivedT *>(this);
}
- bool operator==(const DerivedT &RHS) const { return I == RHS.I; }
- bool operator<(const DerivedT &RHS) const {
+ friend bool operator==(const iterator_adaptor_base &LHS,
+ const iterator_adaptor_base &RHS) {
+ return LHS.I == RHS.I;
+ }
+ friend bool operator<(const iterator_adaptor_base &LHS,
+ const iterator_adaptor_base &RHS) {
static_assert(
BaseT::IsRandomAccess,
"Relational operators are only defined for random access iterators.");
- return I < RHS.I;
+ return LHS.I < RHS.I;
}
ReferenceT operator*() const { return *I; }
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h b/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h
index f038f6bf2128..a9b46a3aa45b 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h
@@ -18,7 +18,6 @@
#ifndef LLVM_ADT_ITERATOR_RANGE_H
#define LLVM_ADT_ITERATOR_RANGE_H
-#include <iterator>
#include <utility>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/simple_ilist.h b/contrib/llvm-project/llvm/include/llvm/ADT/simple_ilist.h
index 9257b47b9cf8..d4b6be347219 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/simple_ilist.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/simple_ilist.h
@@ -28,8 +28,8 @@ namespace llvm {
/// This is a simple intrusive list for a \c T that inherits from \c
/// ilist_node<T>. The list never takes ownership of anything inserted in it.
///
-/// Unlike \a iplist<T> and \a ilist<T>, \a simple_ilist<T> never allocates or
-/// deletes values, and has no callback traits.
+/// Unlike \a iplist<T> and \a ilist<T>, \a simple_ilist<T> never deletes
+/// values, and has no callback traits.
///
/// The API for adding nodes include \a push_front(), \a push_back(), and \a
/// insert(). These all take values by reference (not by pointer), except for
@@ -52,7 +52,7 @@ namespace llvm {
/// to calling \a std::for_each() on the range to be discarded.
///
/// The currently available \p Options customize the nodes in the list. The
-/// same options must be specified in the \a ilist_node instantation for
+/// same options must be specified in the \a ilist_node instantiation for
/// compatibility (although the order is irrelevant).
/// \li Use \a ilist_tag to designate which ilist_node for a given \p T this
/// list should use. This is useful if a type \p T is part of multiple,
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
index c35ee2f499de..9f7461243f35 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -42,10 +42,6 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include <cstdint>
@@ -56,9 +52,17 @@
namespace llvm {
class AnalysisUsage;
+class AtomicCmpXchgInst;
class BasicAAResult;
class BasicBlock;
+class CatchPadInst;
+class CatchReturnInst;
class DominatorTree;
+class FenceInst;
+class Function;
+class InvokeInst;
+class PreservedAnalyses;
+class TargetLibraryInfo;
class Value;
/// The possible results of an alias query.
@@ -342,12 +346,28 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
class AAQueryInfo {
public:
using LocPair = std::pair<MemoryLocation, MemoryLocation>;
- using AliasCacheT = SmallDenseMap<LocPair, AliasResult, 8>;
+ struct CacheEntry {
+ AliasResult Result;
+ /// Number of times a NoAlias assumption has been used.
+ /// 0 for assumptions that have not been used, -1 for definitive results.
+ int NumAssumptionUses;
+ /// Whether this is a definitive (non-assumption) result.
+ bool isDefinitive() const { return NumAssumptionUses < 0; }
+ };
+ using AliasCacheT = SmallDenseMap<LocPair, CacheEntry, 8>;
AliasCacheT AliasCache;
using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
IsCapturedCacheT IsCapturedCache;
+ /// How many active NoAlias assumption uses there are.
+ int NumAssumptionUses = 0;
+
+ /// Location pairs for which an assumption based result is currently stored.
+ /// Used to remove all potentially incorrect results from the cache if an
+ /// assumption is disproven.
+ SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults;
+
AAQueryInfo() : AliasCache(), IsCapturedCache() {}
};
@@ -401,7 +421,8 @@ public:
/// A convenience wrapper around the primary \c alias interface.
AliasResult alias(const Value *V1, const Value *V2) {
- return alias(V1, LocationSize::unknown(), V2, LocationSize::unknown());
+ return alias(MemoryLocation::getBeforeOrAfter(V1),
+ MemoryLocation::getBeforeOrAfter(V2));
}
/// A trivial helper function to check to see if the specified pointers are
@@ -418,7 +439,8 @@ public:
/// A convenience wrapper around the \c isNoAlias helper interface.
bool isNoAlias(const Value *V1, const Value *V2) {
- return isNoAlias(MemoryLocation(V1), MemoryLocation(V2));
+ return isNoAlias(MemoryLocation::getBeforeOrAfter(V1),
+ MemoryLocation::getBeforeOrAfter(V2));
}
/// A trivial helper function to check to see if the specified pointers are
@@ -440,7 +462,7 @@ public:
/// A convenience wrapper around the primary \c pointsToConstantMemory
/// interface.
bool pointsToConstantMemory(const Value *P, bool OrLocal = false) {
- return pointsToConstantMemory(MemoryLocation(P), OrLocal);
+ return pointsToConstantMemory(MemoryLocation::getBeforeOrAfter(P), OrLocal);
}
/// @}
@@ -533,7 +555,7 @@ public:
/// write at most from objects pointed to by their pointer-typed arguments
/// (with arbitrary offsets).
static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB) {
- return !(MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees);
+ return !((unsigned)MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees);
}
/// Checks if functions with the specified behavior are known to potentially
@@ -541,26 +563,27 @@ public:
/// (with arbitrary offsets).
static bool doesAccessArgPointees(FunctionModRefBehavior MRB) {
return isModOrRefSet(createModRefInfo(MRB)) &&
- (MRB & FMRL_ArgumentPointees);
+ ((unsigned)MRB & FMRL_ArgumentPointees);
}
/// Checks if functions with the specified behavior are known to read and
/// write at most from memory that is inaccessible from LLVM IR.
static bool onlyAccessesInaccessibleMem(FunctionModRefBehavior MRB) {
- return !(MRB & FMRL_Anywhere & ~FMRL_InaccessibleMem);
+ return !((unsigned)MRB & FMRL_Anywhere & ~FMRL_InaccessibleMem);
}
/// Checks if functions with the specified behavior are known to potentially
/// read or write from memory that is inaccessible from LLVM IR.
static bool doesAccessInaccessibleMem(FunctionModRefBehavior MRB) {
- return isModOrRefSet(createModRefInfo(MRB)) && (MRB & FMRL_InaccessibleMem);
+ return isModOrRefSet(createModRefInfo(MRB)) &&
+ ((unsigned)MRB & FMRL_InaccessibleMem);
}
/// Checks if functions with the specified behavior are known to read and
/// write at most from memory that is inaccessible from LLVM IR or objects
/// pointed to by their pointer-typed arguments (with arbitrary offsets).
static bool onlyAccessesInaccessibleOrArgMem(FunctionModRefBehavior MRB) {
- return !(MRB & FMRL_Anywhere &
+ return !((unsigned)MRB & FMRL_Anywhere &
~(FMRL_InaccessibleMem | FMRL_ArgumentPointees));
}
@@ -760,40 +783,7 @@ private:
AAQueryInfo &AAQI);
ModRefInfo getModRefInfo(const Instruction *I,
const Optional<MemoryLocation> &OptLoc,
- AAQueryInfo &AAQIP) {
- if (OptLoc == None) {
- if (const auto *Call = dyn_cast<CallBase>(I)) {
- return createModRefInfo(getModRefBehavior(Call));
- }
- }
-
- const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
-
- switch (I->getOpcode()) {
- case Instruction::VAArg:
- return getModRefInfo((const VAArgInst *)I, Loc, AAQIP);
- case Instruction::Load:
- return getModRefInfo((const LoadInst *)I, Loc, AAQIP);
- case Instruction::Store:
- return getModRefInfo((const StoreInst *)I, Loc, AAQIP);
- case Instruction::Fence:
- return getModRefInfo((const FenceInst *)I, Loc, AAQIP);
- case Instruction::AtomicCmpXchg:
- return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP);
- case Instruction::AtomicRMW:
- return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP);
- case Instruction::Call:
- return getModRefInfo((const CallInst *)I, Loc, AAQIP);
- case Instruction::Invoke:
- return getModRefInfo((const InvokeInst *)I, Loc, AAQIP);
- case Instruction::CatchPad:
- return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP);
- case Instruction::CatchRet:
- return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP);
- default:
- return ModRefInfo::NoModRef;
- }
- }
+ AAQueryInfo &AAQIP);
class Concept;
@@ -807,6 +797,9 @@ private:
std::vector<AnalysisKey *> AADeps;
+ /// Query depth used to distinguish recursive queries.
+ unsigned Depth = 0;
+
friend class BatchAAResults;
};
@@ -847,6 +840,13 @@ public:
FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
return AA.getModRefBehavior(Call);
}
+ bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ return alias(LocA, LocB) == MustAlias;
+ }
+ bool isMustAlias(const Value *V1, const Value *V2) {
+ return alias(MemoryLocation(V1, LocationSize::precise(1)),
+ MemoryLocation(V2, LocationSize::precise(1))) == MustAlias;
+ }
};
/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
@@ -1107,9 +1107,6 @@ public:
/// Return true if this pointer is returned by a noalias function.
bool isNoAliasCall(const Value *V);
-/// Return true if this is an argument with the noalias attribute.
-bool isNoAliasArgument(const Value *V);
-
/// Return true if this pointer refers to a distinct and identifiable object.
/// This returns true for:
/// Global Variables and Functions (but not Global Aliases)
@@ -1157,12 +1154,7 @@ public:
ResultGetters.push_back(&getModuleAAResultImpl<AnalysisT>);
}
- Result run(Function &F, FunctionAnalysisManager &AM) {
- Result R(AM.getResult<TargetLibraryAnalysis>(F));
- for (auto &Getter : ResultGetters)
- (*Getter)(F, AM, R);
- return R;
- }
+ Result run(Function &F, FunctionAnalysisManager &AM);
private:
friend AnalysisInfoMixin<AAManager>;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h
index 690a94d9cf2c..b27fd5aa92a7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -20,9 +20,10 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include <cassert>
@@ -33,6 +34,7 @@
namespace llvm {
+class AAResults;
class AliasSetTracker;
class BasicBlock;
class LoadInst;
@@ -45,6 +47,8 @@ class StoreInst;
class VAArgInst;
class Value;
+enum AliasResult : uint8_t;
+
class AliasSet : public ilist_node<AliasSet> {
friend class AliasSetTracker;
@@ -293,7 +297,7 @@ private:
void addPointer(AliasSetTracker &AST, PointerRec &Entry, LocationSize Size,
const AAMDNodes &AAInfo, bool KnownMustAlias = false,
bool SkipSizeUpdate = false);
- void addUnknownInst(Instruction *I, AliasAnalysis &AA);
+ void addUnknownInst(Instruction *I, AAResults &AA);
void removeUnknownInst(AliasSetTracker &AST, Instruction *I) {
bool WasEmpty = UnknownInsts.empty();
@@ -311,8 +315,8 @@ public:
/// If the specified pointer "may" (or must) alias one of the members in the
/// set return the appropriate AliasResult. Otherwise return NoAlias.
AliasResult aliasesPointer(const Value *Ptr, LocationSize Size,
- const AAMDNodes &AAInfo, AliasAnalysis &AA) const;
- bool aliasesUnknownInst(const Instruction *Inst, AliasAnalysis &AA) const;
+ const AAMDNodes &AAInfo, AAResults &AA) const;
+ bool aliasesUnknownInst(const Instruction *Inst, AAResults &AA) const;
};
inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) {
@@ -338,7 +342,7 @@ class AliasSetTracker {
/// handle.
struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
- AliasAnalysis &AA;
+ AAResults &AA;
MemorySSA *MSSA = nullptr;
Loop *L = nullptr;
ilist<AliasSet> AliasSets;
@@ -352,9 +356,9 @@ class AliasSetTracker {
public:
/// Create an empty collection of AliasSets, and use the specified alias
/// analysis object to disambiguate load and store addresses.
- explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {}
- explicit AliasSetTracker(AliasAnalysis &aa, MemorySSA *mssa, Loop *l)
- : AA(aa), MSSA(mssa), L(l) {}
+ explicit AliasSetTracker(AAResults &AA) : AA(AA) {}
+ explicit AliasSetTracker(AAResults &AA, MemorySSA *MSSA, Loop *L)
+ : AA(AA), MSSA(MSSA), L(L) {}
~AliasSetTracker() { clear(); }
/// These methods are used to add different types of instructions to the alias
@@ -393,7 +397,7 @@ public:
AliasSet &getAliasSetFor(const MemoryLocation &MemLoc);
/// Return the underlying alias analysis object used by this tracker.
- AliasAnalysis &getAliasAnalysis() const { return AA; }
+ AAResults &getAliasAnalysis() const { return AA; }
/// This method is used to remove a pointer value from the AliasSetTracker
/// entirely. It should be used when an instruction is deleted from the
@@ -457,6 +461,14 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) {
return OS;
}
+class AliasSetsPrinterPass : public PassInfoMixin<AliasSetsPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit AliasSetsPrinterPass(raw_ostream &OS);
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
} // end namespace llvm
#endif // LLVM_ANALYSIS_ALIASSETTRACKER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/AssumptionCache.h b/contrib/llvm-project/llvm/include/llvm/Analysis/AssumptionCache.h
index 0ef63dc68e1c..c4602d3449c0 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -45,7 +45,7 @@ public:
enum : unsigned { ExprResultIdx = std::numeric_limits<unsigned>::max() };
struct ResultElem {
- WeakTrackingVH Assume;
+ WeakVH Assume;
/// contains either ExprResultIdx or the index of the operand bundle
/// containing the knowledge.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index 9214bfcd7a24..46b8cd1f3a88 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -18,9 +18,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include <algorithm>
@@ -120,6 +117,9 @@ private:
APInt Scale;
+ // Context instruction to use when querying information about this index.
+ const Instruction *CxtI;
+
bool operator==(const VariableGEPIndex &Other) const {
return V == Other.V && ZExtBits == Other.ZExtBits &&
SExtBits == Other.SExtBits && Scale == Other.Scale;
@@ -128,6 +128,17 @@ private:
bool operator!=(const VariableGEPIndex &Other) const {
return !operator==(Other);
}
+
+ void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+ void print(raw_ostream &OS) const {
+ OS << "(V=" << V->getName()
+ << ", zextbits=" << ZExtBits
+ << ", sextbits=" << SExtBits
+ << ", scale=" << Scale << ")";
+ }
};
// Represents the internal structure of a GEP, decomposed into a base pointer,
@@ -135,15 +146,29 @@ private:
struct DecomposedGEP {
// Base pointer of the GEP
const Value *Base;
- // Total constant offset w.r.t the base from indexing into structs
- APInt StructOffset;
- // Total constant offset w.r.t the base from indexing through
- // pointers/arrays/vectors
- APInt OtherOffset;
+ // Total constant offset from base.
+ APInt Offset;
// Scaled variable (non-constant) indices.
SmallVector<VariableGEPIndex, 4> VarIndices;
// Is GEP index scale compile-time constant.
bool HasCompileTimeConstantScale;
+
+ void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+ void print(raw_ostream &OS) const {
+ OS << "(DecomposedGEP Base=" << Base->getName()
+ << ", Offset=" << Offset
+ << ", VarIndices=[";
+ for (size_t i = 0; i < VarIndices.size(); i++) {
+ if (i != 0)
+ OS << ", ";
+ VarIndices[i].print(OS);
+ }
+ OS << "], HasCompileTimeConstantScale=" << HasCompileTimeConstantScale
+ << ")";
+ }
};
/// Tracks phi nodes we have visited.
@@ -171,8 +196,9 @@ private:
const DataLayout &DL, unsigned Depth, AssumptionCache *AC,
DominatorTree *DT, bool &NSW, bool &NUW);
- static bool DecomposeGEPExpression(const Value *V, DecomposedGEP &Decomposed,
- const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT);
+ static DecomposedGEP
+ DecomposeGEPExpression(const Value *V, const DataLayout &DL,
+ AssumptionCache *AC, DominatorTree *DT);
static bool isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
@@ -206,18 +232,23 @@ private:
AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize,
const AAMDNodes &PNAAInfo, const Value *V2,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderV2, AAQueryInfo &AAQI);
+ AAQueryInfo &AAQI);
AliasResult aliasSelect(const SelectInst *SI, LocationSize SISize,
const AAMDNodes &SIAAInfo, const Value *V2,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderV2, AAQueryInfo &AAQI);
+ AAQueryInfo &AAQI);
AliasResult aliasCheck(const Value *V1, LocationSize V1Size,
- AAMDNodes V1AATag, const Value *V2,
- LocationSize V2Size, AAMDNodes V2AATag,
- AAQueryInfo &AAQI, const Value *O1 = nullptr,
- const Value *O2 = nullptr);
+ const AAMDNodes &V1AATag, const Value *V2,
+ LocationSize V2Size, const AAMDNodes &V2AATag,
+ AAQueryInfo &AAQI);
+
+ AliasResult aliasCheckRecursive(const Value *V1, LocationSize V1Size,
+ const AAMDNodes &V1AATag, const Value *V2,
+ LocationSize V2Size, const AAMDNodes &V2AATag,
+ AAQueryInfo &AAQI, const Value *O1,
+ const Value *O2);
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 868da7a64f68..c22787531117 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -169,7 +169,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) {
/// algorithms for BlockFrequencyInfoImplBase. Only algorithms that depend on
/// the block type (or that call such algorithms) are skipped here.
///
-/// Nevertheless, the majority of the overall algorithm documention lives with
+/// Nevertheless, the majority of the overall algorithm documentation lives with
/// BlockFrequencyInfoImpl. See there for details.
class BlockFrequencyInfoImplBase {
public:
@@ -458,7 +458,7 @@ public:
/// Analyze irreducible SCCs.
///
- /// Separate irreducible SCCs from \c G, which is an explict graph of \c
+ /// Separate irreducible SCCs from \c G, which is an explicit graph of \c
/// OuterLoop (or the top-level function, if \c OuterLoop is \c nullptr).
/// Insert them into \a Loops before \c Insert.
///
@@ -706,7 +706,7 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
///
/// In addition to loops, this algorithm has limited support for irreducible
/// SCCs, which are SCCs with multiple entry blocks. Irreducible SCCs are
-/// discovered on they fly, and modelled as loops with multiple headers.
+/// discovered on the fly, and modelled as loops with multiple headers.
///
/// The headers of irreducible sub-SCCs consist of its entry blocks and all
/// nodes that are targets of a backedge within it (excluding backedges within
@@ -1246,7 +1246,7 @@ bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
}
}
// As a heuristic, if some headers don't have a weight, give them the
- // minimium weight seen (not to disrupt the existing trends too much by
+ // minimum weight seen (not to disrupt the existing trends too much by
// using a weight that's in the general range of the other headers' weights,
// and the minimum seems to perform better than the average.)
// FIXME: better update in the passes that drop the header weight.
@@ -1449,8 +1449,8 @@ void BlockFrequencyInfoImpl<BT>::verifyMatch(
BlockNode Node = Entry.second;
if (OtherValidNodes.count(BB)) {
BlockNode OtherNode = OtherValidNodes[BB];
- auto Freq = Freqs[Node.Index];
- auto OtherFreq = Other.Freqs[OtherNode.Index];
+ const auto &Freq = Freqs[Node.Index];
+ const auto &OtherFreq = Other.Freqs[OtherNode.Index];
if (Freq.Integer != OtherFreq.Integer) {
Match = false;
dbgs() << "Freq mismatch: " << bfi_detail::getBlockName(BB) << " "
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 3e72afba36c3..6a286236a80e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -27,13 +27,16 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <memory>
#include <utility>
namespace llvm {
class Function;
+class Loop;
class LoopInfo;
class raw_ostream;
+class DominatorTree;
class PostDominatorTree;
class TargetLibraryInfo;
class Value;
@@ -50,20 +53,79 @@ class Value;
/// identify an edge, since we can have multiple edges from Src to Dst.
/// As an example, we can have a switch which jumps to Dst with value 0 and
/// value 10.
+///
+/// Process of computing branch probabilities can be logically viewed as three
+/// step process:
+///
+/// First, if there is a profile information associated with the branch then
+/// it is trivially translated to branch probabilities. There is one exception
+/// from this rule though. Probabilities for edges leading to "unreachable"
+/// blocks (blocks with the estimated weight not greater than
+/// UNREACHABLE_WEIGHT) are evaluated according to static estimation and
+/// override profile information. If no branch probabilities were calculated
+/// on this step then take the next one.
+///
+/// Second, estimate absolute execution weights for each block based on
+/// statically known information. Roots of such information are "cold",
+/// "unreachable", "noreturn" and "unwind" blocks. Those blocks get their
+/// weights set to BlockExecWeight::COLD, BlockExecWeight::UNREACHABLE,
+/// BlockExecWeight::NORETURN and BlockExecWeight::UNWIND respectively. Then the
+/// weights are propagated to the other blocks up the domination line. In
+/// addition, if all successors have estimated weights set then maximum of these
+/// weights assigned to the block itself (while this is not ideal heuristic in
+/// theory it's simple and works reasonably well in most cases) and the process
+/// repeats. Once the process of weights propagation converges branch
+/// probabilities are set for all such branches that have at least one successor
+/// with the weight set. Default execution weight (BlockExecWeight::DEFAULT) is
+/// used for any successors which doesn't have its weight set. For loop back
+/// branches we use their weights scaled by loop trip count equal to
+/// 'LBH_TAKEN_WEIGHT/LBH_NOTTAKEN_WEIGHT'.
+///
+/// Here is a simple example demonstrating how the described algorithm works.
+///
+/// BB1
+/// / \
+/// v v
+/// BB2 BB3
+/// / \
+/// v v
+/// ColdBB UnreachBB
+///
+/// Initially, ColdBB is associated with COLD_WEIGHT and UnreachBB with
+/// UNREACHABLE_WEIGHT. COLD_WEIGHT is set to BB2 as maximum between its
+/// successors. BB1 and BB3 has no explicit estimated weights and assumed to
+/// have DEFAULT_WEIGHT. Based on assigned weights branches will have the
+/// following probabilities:
+/// P(BB1->BB2) = COLD_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) =
+/// 0xffff / (0xffff + 0xfffff) = 0.0588(5.9%)
+/// P(BB1->BB3) = DEFAULT_WEIGHT_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) =
+/// 0xfffff / (0xffff + 0xfffff) = 0.941(94.1%)
+/// P(BB2->ColdBB) = COLD_WEIGHT/(COLD_WEIGHT + UNREACHABLE_WEIGHT) = 1(100%)
+/// P(BB2->UnreachBB) =
+/// UNREACHABLE_WEIGHT/(COLD_WEIGHT+UNREACHABLE_WEIGHT) = 0(0%)
+///
+/// If no branch probabilities were calculated on this step then take the next
+/// one.
+///
+/// Third, apply different kinds of local heuristics for each individual
+/// branch until first match. For example probability of a pointer to be null is
+/// estimated as PH_TAKEN_WEIGHT/(PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT). If
+/// no local heuristic has been matched then branch is left with no explicit
+/// probability set and assumed to have default probability.
class BranchProbabilityInfo {
public:
BranchProbabilityInfo() = default;
BranchProbabilityInfo(const Function &F, const LoopInfo &LI,
const TargetLibraryInfo *TLI = nullptr,
+ DominatorTree *DT = nullptr,
PostDominatorTree *PDT = nullptr) {
- calculate(F, LI, TLI, PDT);
+ calculate(F, LI, TLI, DT, PDT);
}
BranchProbabilityInfo(BranchProbabilityInfo &&Arg)
: Probs(std::move(Arg.Probs)), LastF(Arg.LastF),
- PostDominatedByUnreachable(std::move(Arg.PostDominatedByUnreachable)),
- PostDominatedByColdCall(std::move(Arg.PostDominatedByColdCall)) {}
+ EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {}
BranchProbabilityInfo(const BranchProbabilityInfo &) = delete;
BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete;
@@ -71,8 +133,7 @@ public:
BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) {
releaseMemory();
Probs = std::move(RHS.Probs);
- PostDominatedByColdCall = std::move(RHS.PostDominatedByColdCall);
- PostDominatedByUnreachable = std::move(RHS.PostDominatedByUnreachable);
+ EstimatedBlockWeight = std::move(RHS.EstimatedBlockWeight);
return *this;
}
@@ -121,16 +182,6 @@ public:
raw_ostream &printEdgeProbability(raw_ostream &OS, const BasicBlock *Src,
const BasicBlock *Dst) const;
-protected:
- /// Set the raw edge probability for the given edge.
- ///
- /// This allows a pass to explicitly set the edge probability for an edge. It
- /// can be used when updating the CFG to update and preserve the branch
- /// probability information. Read the implementation of how these edge
- /// probabilities are calculated carefully before using!
- void setEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors,
- BranchProbability Prob);
-
public:
/// Set the raw probabilities for all edges from the given block.
///
@@ -140,24 +191,85 @@ public:
void setEdgeProbability(const BasicBlock *Src,
const SmallVectorImpl<BranchProbability> &Probs);
+ /// Copy outgoing edge probabilities from \p Src to \p Dst.
+ ///
+ /// This allows to keep probabilities unset for the destination if they were
+ /// unset for source.
+ void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst);
+
static BranchProbability getBranchProbStackProtector(bool IsLikely) {
static const BranchProbability LikelyProb((1u << 20) - 1, 1u << 20);
return IsLikely ? LikelyProb : LikelyProb.getCompl();
}
void calculate(const Function &F, const LoopInfo &LI,
- const TargetLibraryInfo *TLI, PostDominatorTree *PDT);
+ const TargetLibraryInfo *TLI, DominatorTree *DT,
+ PostDominatorTree *PDT);
/// Forget analysis results for the given basic block.
void eraseBlock(const BasicBlock *BB);
- // Use to track SCCs for handling irreducible loops.
- using SccMap = DenseMap<const BasicBlock *, int>;
- using SccHeaderMap = DenseMap<const BasicBlock *, bool>;
- using SccHeaderMaps = std::vector<SccHeaderMap>;
- struct SccInfo {
+ // Data structure to track SCCs for handling irreducible loops.
+ class SccInfo {
+ // Enum of types to classify basic blocks in SCC. Basic block belonging to
+ // SCC is 'Inner' until it is either 'Header' or 'Exiting'. Note that a
+ // basic block can be 'Header' and 'Exiting' at the same time.
+ enum SccBlockType {
+ Inner = 0x0,
+ Header = 0x1,
+ Exiting = 0x2,
+ };
+ // Map of basic blocks to SCC IDs they belong to. If basic block doesn't
+ // belong to any SCC it is not in the map.
+ using SccMap = DenseMap<const BasicBlock *, int>;
+ // Each basic block in SCC is attributed with one or several types from
+ // SccBlockType. Map value has uint32_t type (instead of SccBlockType)
+ // since basic block may be for example "Header" and "Exiting" at the same
+ // time and we need to be able to keep more than one value from
+ // SccBlockType.
+ using SccBlockTypeMap = DenseMap<const BasicBlock *, uint32_t>;
+ // Vector containing classification of basic blocks for all SCCs where i'th
+ // vector element corresponds to SCC with ID equal to i.
+ using SccBlockTypeMaps = std::vector<SccBlockTypeMap>;
+
SccMap SccNums;
- SccHeaderMaps SccHeaders;
+ SccBlockTypeMaps SccBlocks;
+
+ public:
+ explicit SccInfo(const Function &F);
+
+ /// If \p BB belongs to some SCC then ID of that SCC is returned, otherwise
+ /// -1 is returned. If \p BB belongs to more than one SCC at the same time
+ /// result is undefined.
+ int getSCCNum(const BasicBlock *BB) const;
+ /// Returns true if \p BB is a 'header' block in SCC with \p SccNum ID,
+ /// false otherwise.
+ bool isSCCHeader(const BasicBlock *BB, int SccNum) const {
+ return getSccBlockType(BB, SccNum) & Header;
+ }
+ /// Returns true if \p BB is an 'exiting' block in SCC with \p SccNum ID,
+ /// false otherwise.
+ bool isSCCExitingBlock(const BasicBlock *BB, int SccNum) const {
+ return getSccBlockType(BB, SccNum) & Exiting;
+ }
+ /// Fills in \p Enters vector with all such blocks that don't belong to
+ /// SCC with \p SccNum ID but there is an edge to a block belonging to the
+ /// SCC.
+ void getSccEnterBlocks(int SccNum,
+ SmallVectorImpl<BasicBlock *> &Enters) const;
+ /// Fills in \p Exits vector with all such blocks that don't belong to
+ /// SCC with \p SccNum ID but there is an edge from a block belonging to the
+ /// SCC.
+ void getSccExitBlocks(int SccNum,
+ SmallVectorImpl<BasicBlock *> &Exits) const;
+
+ private:
+ /// Returns \p BB's type according to classification given by SccBlockType
+ /// enum. Please note that \p BB must belong to SSC with \p SccNum ID.
+ uint32_t getSccBlockType(const BasicBlock *BB, int SccNum) const;
+ /// Calculates \p BB's type and stores it in internal data structures for
+ /// future use. Please note that \p BB must belong to SSC with \p SccNum ID.
+ void calculateSccBlockType(const BasicBlock *BB, int SccNum);
};
private:
@@ -169,7 +281,6 @@ private:
void deleted() override {
assert(BPI != nullptr);
BPI->eraseBlock(cast<BasicBlock>(getValPtr()));
- BPI->Handles.erase(*this);
}
public:
@@ -177,44 +288,132 @@ private:
: CallbackVH(const_cast<Value *>(V)), BPI(BPI) {}
};
+ /// Pair of Loop and SCC ID number. Used to unify handling of normal and
+ /// SCC based loop representations.
+ using LoopData = std::pair<Loop *, int>;
+ /// Helper class to keep basic block along with its loop data information.
+ class LoopBlock {
+ public:
+ explicit LoopBlock(const BasicBlock *BB, const LoopInfo &LI,
+ const SccInfo &SccI);
+
+ const BasicBlock *getBlock() const { return BB; }
+ BasicBlock *getBlock() { return const_cast<BasicBlock *>(BB); }
+ LoopData getLoopData() const { return LD; }
+ Loop *getLoop() const { return LD.first; }
+ int getSccNum() const { return LD.second; }
+
+ bool belongsToLoop() const { return getLoop() || getSccNum() != -1; }
+ bool belongsToSameLoop(const LoopBlock &LB) const {
+ return (LB.getLoop() && getLoop() == LB.getLoop()) ||
+ (LB.getSccNum() != -1 && getSccNum() == LB.getSccNum());
+ }
+
+ private:
+ const BasicBlock *const BB = nullptr;
+ LoopData LD = {nullptr, -1};
+ };
+
+ // Pair of LoopBlocks representing an edge from first to second block.
+ using LoopEdge = std::pair<const LoopBlock &, const LoopBlock &>;
+
DenseSet<BasicBlockCallbackVH, DenseMapInfo<Value*>> Handles;
// Since we allow duplicate edges from one basic block to another, we use
// a pair (PredBlock and an index in the successors) to specify an edge.
using Edge = std::pair<const BasicBlock *, unsigned>;
- // Default weight value. Used when we don't have information about the edge.
- // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of
- // the successors have a weight yet. But it doesn't make sense when providing
- // weight to an edge that may have siblings with non-zero weights. This can
- // be handled various ways, but it's probably fine for an edge with unknown
- // weight to just "inherit" the non-zero weight of an adjacent successor.
- static const uint32_t DEFAULT_WEIGHT = 16;
-
DenseMap<Edge, BranchProbability> Probs;
/// Track the last function we run over for printing.
const Function *LastF = nullptr;
- /// Track the set of blocks directly succeeded by a returning block.
- SmallPtrSet<const BasicBlock *, 16> PostDominatedByUnreachable;
+ const LoopInfo *LI = nullptr;
+
+ /// Keeps information about all SCCs in a function.
+ std::unique_ptr<const SccInfo> SccI;
- /// Track the set of blocks that always lead to a cold call.
- SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall;
+ /// Keeps mapping of a basic block to its estimated weight.
+ SmallDenseMap<const BasicBlock *, uint32_t> EstimatedBlockWeight;
+
+ /// Keeps mapping of a loop to estimated weight to enter the loop.
+ SmallDenseMap<LoopData, uint32_t> EstimatedLoopWeight;
+
+ /// Helper to construct LoopBlock for \p BB.
+ LoopBlock getLoopBlock(const BasicBlock *BB) const {
+ return LoopBlock(BB, *LI, *SccI.get());
+ }
- void computePostDominatedByUnreachable(const Function &F,
- PostDominatorTree *PDT);
- void computePostDominatedByColdCall(const Function &F,
- PostDominatorTree *PDT);
- bool calcUnreachableHeuristics(const BasicBlock *BB);
+ /// Returns true if destination block belongs to some loop and source block is
+ /// either doesn't belong to any loop or belongs to a loop which is not inner
+ /// relative to the destination block.
+ bool isLoopEnteringEdge(const LoopEdge &Edge) const;
+ /// Returns true if source block belongs to some loop and destination block is
+ /// either doesn't belong to any loop or belongs to a loop which is not inner
+ /// relative to the source block.
+ bool isLoopExitingEdge(const LoopEdge &Edge) const;
+ /// Returns true if \p Edge is either enters to or exits from some loop, false
+ /// in all other cases.
+ bool isLoopEnteringExitingEdge(const LoopEdge &Edge) const;
+ /// Returns true if source and destination blocks belongs to the same loop and
+ /// destination block is loop header.
+ bool isLoopBackEdge(const LoopEdge &Edge) const;
+ // Fills in \p Enters vector with all "enter" blocks to a loop \LB belongs to.
+ void getLoopEnterBlocks(const LoopBlock &LB,
+ SmallVectorImpl<BasicBlock *> &Enters) const;
+ // Fills in \p Exits vector with all "exit" blocks from a loop \LB belongs to.
+ void getLoopExitBlocks(const LoopBlock &LB,
+ SmallVectorImpl<BasicBlock *> &Exits) const;
+
+ /// Returns estimated weight for \p BB. None if \p BB has no estimated weight.
+ Optional<uint32_t> getEstimatedBlockWeight(const BasicBlock *BB) const;
+
+ /// Returns estimated weight to enter \p L. In other words it is weight of
+ /// loop's header block not scaled by trip count. Returns None if \p L has no
+ /// no estimated weight.
+ Optional<uint32_t> getEstimatedLoopWeight(const LoopData &L) const;
+
+ /// Return estimated weight for \p Edge. Returns None if estimated weight is
+ /// unknown.
+ Optional<uint32_t> getEstimatedEdgeWeight(const LoopEdge &Edge) const;
+
+ /// Iterates over all edges leading from \p SrcBB to \p Successors and
+ /// returns maximum of all estimated weights. If at least one edge has unknown
+ /// estimated weight None is returned.
+ template <class IterT>
+ Optional<uint32_t>
+ getMaxEstimatedEdgeWeight(const LoopBlock &SrcBB,
+ iterator_range<IterT> Successors) const;
+
+ /// If \p LoopBB has no estimated weight then set it to \p BBWeight and
+ /// return true. Otherwise \p BB's weight remains unchanged and false is
+ /// returned. In addition all blocks/loops that might need their weight to be
+ /// re-estimated are put into BlockWorkList/LoopWorkList.
+ bool updateEstimatedBlockWeight(LoopBlock &LoopBB, uint32_t BBWeight,
+ SmallVectorImpl<BasicBlock *> &BlockWorkList,
+ SmallVectorImpl<LoopBlock> &LoopWorkList);
+
+ /// Starting from \p LoopBB (including \p LoopBB itself) propagate \p BBWeight
+ /// up the domination tree.
+ void propagateEstimatedBlockWeight(const LoopBlock &LoopBB, DominatorTree *DT,
+ PostDominatorTree *PDT, uint32_t BBWeight,
+ SmallVectorImpl<BasicBlock *> &WorkList,
+ SmallVectorImpl<LoopBlock> &LoopWorkList);
+
+ /// Returns block's weight encoded in the IR.
+ Optional<uint32_t> getInitialEstimatedBlockWeight(const BasicBlock *BB);
+
+ // Computes estimated weights for all blocks in \p F.
+ void computeEestimateBlockWeight(const Function &F, DominatorTree *DT,
+ PostDominatorTree *PDT);
+
+ /// Based on computed weights by \p computeEstimatedBlockWeight set
+ /// probabilities on branches.
+ bool calcEstimatedHeuristics(const BasicBlock *BB);
bool calcMetadataWeights(const BasicBlock *BB);
- bool calcColdCallHeuristics(const BasicBlock *BB);
bool calcPointerHeuristics(const BasicBlock *BB);
- bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI,
- SccInfo &SccI);
bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI);
bool calcFloatingPointHeuristics(const BasicBlock *BB);
- bool calcInvokeHeuristics(const BasicBlock *BB);
};
/// Analysis pass which computes \c BranchProbabilityInfo.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/CFGPrinter.h b/contrib/llvm-project/llvm/include/llvm/Analysis/CFGPrinter.h
index c4e49ce493ea..53700798b6b3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/CFGPrinter.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -18,6 +18,7 @@
#ifndef LLVM_ANALYSIS_CFGPRINTER_H
#define LLVM_ANALYSIS_CFGPRINTER_H
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/HeatUtils.h"
@@ -141,8 +142,18 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits {
return OS.str();
}
- static std::string getCompleteNodeLabel(const BasicBlock *Node,
- DOTFuncInfo *) {
+ static void eraseComment(std::string &OutStr, unsigned &I, unsigned Idx) {
+ OutStr.erase(OutStr.begin() + I, OutStr.begin() + Idx);
+ --I;
+ }
+
+ static std::string getCompleteNodeLabel(
+ const BasicBlock *Node, DOTFuncInfo *,
+ llvm::function_ref<void(raw_string_ostream &, const BasicBlock &)>
+ HandleBasicBlock = [](raw_string_ostream &OS,
+ const BasicBlock &Node) -> void { OS << Node; },
+ llvm::function_ref<void(std::string &, unsigned &, unsigned)>
+ HandleComment = eraseComment) {
enum { MaxColumns = 80 };
std::string Str;
raw_string_ostream OS(Str);
@@ -152,7 +163,7 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits {
OS << ":";
}
- OS << *Node;
+ HandleBasicBlock(OS, *Node);
std::string OutStr = OS.str();
if (OutStr[0] == '\n')
OutStr.erase(OutStr.begin());
@@ -168,8 +179,7 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits {
LastSpace = 0;
} else if (OutStr[i] == ';') { // Delete comments!
unsigned Idx = OutStr.find('\n', i + 1); // Find end of line
- OutStr.erase(OutStr.begin() + i, OutStr.begin() + Idx);
- --i;
+ HandleComment(OutStr, i, Idx);
} else if (ColNum == MaxColumns) { // Wrap lines.
// Wrap very long names even though we can't find a space.
if (!LastSpace)
@@ -285,7 +295,7 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits {
" fillcolor=\"" + Color + "70\"";
return Attrs;
}
- bool isNodeHidden(const BasicBlock *Node);
+ bool isNodeHidden(const BasicBlock *Node, const DOTFuncInfo *CFGInfo);
void computeHiddenNodes(const Function *F);
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/CGSCCPassManager.h b/contrib/llvm-project/llvm/include/llvm/Analysis/CGSCCPassManager.h
index eb0d3ae8fedf..985424a74054 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -90,6 +90,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -314,6 +315,16 @@ struct CGSCCUpdateResult {
/// for a better technique.
SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
&InlinedInternalEdges;
+
+ /// Weak VHs to keep track of indirect calls for the purposes of detecting
+ /// devirtualization.
+ ///
+ /// This is a map to avoid having duplicate entries. If a Value is
+ /// deallocated, its corresponding WeakTrackingVH will be nulled out. When
+ /// checking if a Value is in the map or not, also check if the corresponding
+ /// WeakTrackingVH is null to avoid issues with a new Value sharing the same
+ /// address as a deallocated one.
+ SmallMapVector<Value *, WeakTrackingVH, 16> IndirectVHs;
};
/// The core module pass which does a post-order walk of the SCCs and
@@ -325,18 +336,15 @@ struct CGSCCUpdateResult {
/// \c CGSCCAnalysisManagerModuleProxy analysis prior to running the CGSCC
/// pass over the module to enable a \c FunctionAnalysisManager to be used
/// within this run safely.
-template <typename CGSCCPassT>
class ModuleToPostOrderCGSCCPassAdaptor
- : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>> {
+ : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor> {
public:
- explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass)
- : Pass(std::move(Pass)) {}
+ using PassConceptT =
+ detail::PassConcept<LazyCallGraph::SCC, CGSCCAnalysisManager,
+ LazyCallGraph &, CGSCCUpdateResult &>;
- // We have to explicitly define all the special member functions because MSVC
- // refuses to generate them.
- ModuleToPostOrderCGSCCPassAdaptor(
- const ModuleToPostOrderCGSCCPassAdaptor &Arg)
- : Pass(Arg.Pass) {}
+ explicit ModuleToPostOrderCGSCCPassAdaptor(std::unique_ptr<PassConceptT> Pass)
+ : Pass(std::move(Pass)) {}
ModuleToPostOrderCGSCCPassAdaptor(ModuleToPostOrderCGSCCPassAdaptor &&Arg)
: Pass(std::move(Arg.Pass)) {}
@@ -355,16 +363,22 @@ public:
/// Runs the CGSCC pass across every SCC in the module.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
+
private:
- CGSCCPassT Pass;
+ std::unique_ptr<PassConceptT> Pass;
};
/// A function to deduce a function pass type and wrap it in the
/// templated adaptor.
template <typename CGSCCPassT>
-ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>
+ModuleToPostOrderCGSCCPassAdaptor
createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) {
- return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass));
+ using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
+ PreservedAnalyses, CGSCCAnalysisManager,
+ LazyCallGraph &, CGSCCUpdateResult &>;
+ return ModuleToPostOrderCGSCCPassAdaptor(
+ std::make_unique<PassModelT>(std::move(Pass)));
}
/// A proxy from a \c FunctionAnalysisManager to an \c SCC.
@@ -442,17 +456,13 @@ LazyCallGraph::SCC &updateCGAndAnalysisManagerForCGSCCPass(
/// \c FunctionAnalysisManagerCGSCCProxy analysis prior to running the function
/// pass over the SCC to enable a \c FunctionAnalysisManager to be used
/// within this run safely.
-template <typename FunctionPassT>
class CGSCCToFunctionPassAdaptor
- : public PassInfoMixin<CGSCCToFunctionPassAdaptor<FunctionPassT>> {
+ : public PassInfoMixin<CGSCCToFunctionPassAdaptor> {
public:
- explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass)
- : Pass(std::move(Pass)) {}
+ using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>;
- // We have to explicitly define all the special member functions because MSVC
- // refuses to generate them.
- CGSCCToFunctionPassAdaptor(const CGSCCToFunctionPassAdaptor &Arg)
- : Pass(Arg.Pass) {}
+ explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass)
+ : Pass(std::move(Pass)) {}
CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg)
: Pass(std::move(Arg.Pass)) {}
@@ -469,90 +479,24 @@ public:
/// Runs the function pass across every function in the module.
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
- LazyCallGraph &CG, CGSCCUpdateResult &UR) {
- // Setup the function analysis manager from its proxy.
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
-
- SmallVector<LazyCallGraph::Node *, 4> Nodes;
- for (LazyCallGraph::Node &N : C)
- Nodes.push_back(&N);
-
- // The SCC may get split while we are optimizing functions due to deleting
- // edges. If this happens, the current SCC can shift, so keep track of
- // a pointer we can overwrite.
- LazyCallGraph::SCC *CurrentC = &C;
-
- LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C
- << "\n");
-
- PreservedAnalyses PA = PreservedAnalyses::all();
- for (LazyCallGraph::Node *N : Nodes) {
- // Skip nodes from other SCCs. These may have been split out during
- // processing. We'll eventually visit those SCCs and pick up the nodes
- // there.
- if (CG.lookupSCC(*N) != CurrentC)
- continue;
-
- Function &F = N->getFunction();
-
- PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F);
- if (!PI.runBeforePass<Function>(Pass, F))
- continue;
-
- PreservedAnalyses PassPA;
- {
- TimeTraceScope TimeScope(Pass.name());
- PassPA = Pass.run(F, FAM);
- }
-
- PI.runAfterPass<Function>(Pass, F);
-
- // We know that the function pass couldn't have invalidated any other
- // function's analyses (that's the contract of a function pass), so
- // directly handle the function analysis manager's invalidation here.
- FAM.invalidate(F, PassPA);
-
- // Then intersect the preserved set so that invalidation of module
- // analyses will eventually occur when the module pass completes.
- PA.intersect(std::move(PassPA));
-
- // If the call graph hasn't been preserved, update it based on this
- // function pass. This may also update the current SCC to point to
- // a smaller, more refined SCC.
- auto PAC = PA.getChecker<LazyCallGraphAnalysis>();
- if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) {
- CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N,
- AM, UR, FAM);
- assert(
- CG.lookupSCC(*N) == CurrentC &&
- "Current SCC not updated to the SCC containing the current node!");
- }
- }
+ LazyCallGraph &CG, CGSCCUpdateResult &UR);
- // By definition we preserve the proxy. And we preserve all analyses on
- // Functions. This precludes *any* invalidation of function analyses by the
- // proxy, but that's OK because we've taken care to invalidate analyses in
- // the function analysis manager incrementally above.
- PA.preserveSet<AllAnalysesOn<Function>>();
- PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
-
- // We've also ensured that we updated the call graph along the way.
- PA.preserve<LazyCallGraphAnalysis>();
-
- return PA;
- }
+ static bool isRequired() { return true; }
private:
- FunctionPassT Pass;
+ std::unique_ptr<PassConceptT> Pass;
};
/// A function to deduce a function pass type and wrap it in the
/// templated adaptor.
template <typename FunctionPassT>
-CGSCCToFunctionPassAdaptor<FunctionPassT>
+CGSCCToFunctionPassAdaptor
createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) {
- return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass));
+ using PassModelT =
+ detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
+ FunctionAnalysisManager>;
+ return CGSCCToFunctionPassAdaptor(
+ std::make_unique<PassModelT>(std::move(Pass)));
}
/// A helper that repeats an SCC pass each time an indirect call is refined to
@@ -569,410 +513,36 @@ createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) {
/// This repetition has the potential to be very large however, as each one
/// might refine a single call site. As a consequence, in practice we use an
/// upper bound on the number of repetitions to limit things.
-template <typename PassT>
-class DevirtSCCRepeatedPass
- : public PassInfoMixin<DevirtSCCRepeatedPass<PassT>> {
+class DevirtSCCRepeatedPass : public PassInfoMixin<DevirtSCCRepeatedPass> {
public:
- explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations)
+ using PassConceptT =
+ detail::PassConcept<LazyCallGraph::SCC, CGSCCAnalysisManager,
+ LazyCallGraph &, CGSCCUpdateResult &>;
+
+ explicit DevirtSCCRepeatedPass(std::unique_ptr<PassConceptT> Pass,
+ int MaxIterations)
: Pass(std::move(Pass)), MaxIterations(MaxIterations) {}
/// Runs the wrapped pass up to \c MaxIterations on the SCC, iterating
/// whenever an indirect call is refined.
PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
- LazyCallGraph &CG, CGSCCUpdateResult &UR) {
- PreservedAnalyses PA = PreservedAnalyses::all();
- PassInstrumentation PI =
- AM.getResult<PassInstrumentationAnalysis>(InitialC, CG);
-
- // The SCC may be refined while we are running passes over it, so set up
- // a pointer that we can update.
- LazyCallGraph::SCC *C = &InitialC;
-
- // Collect value handles for all of the indirect call sites.
- SmallVector<WeakTrackingVH, 8> CallHandles;
-
- // Struct to track the counts of direct and indirect calls in each function
- // of the SCC.
- struct CallCount {
- int Direct;
- int Indirect;
- };
-
- // Put value handles on all of the indirect calls and return the number of
- // direct calls for each function in the SCC.
- auto ScanSCC = [](LazyCallGraph::SCC &C,
- SmallVectorImpl<WeakTrackingVH> &CallHandles) {
- assert(CallHandles.empty() && "Must start with a clear set of handles.");
-
- SmallDenseMap<Function *, CallCount> CallCounts;
- CallCount CountLocal = {0, 0};
- for (LazyCallGraph::Node &N : C) {
- CallCount &Count =
- CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal))
- .first->second;
- for (Instruction &I : instructions(N.getFunction()))
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- if (CB->getCalledFunction()) {
- ++Count.Direct;
- } else {
- ++Count.Indirect;
- CallHandles.push_back(WeakTrackingVH(&I));
- }
- }
- }
-
- return CallCounts;
- };
-
- // Populate the initial call handles and get the initial call counts.
- auto CallCounts = ScanSCC(*C, CallHandles);
-
- for (int Iteration = 0;; ++Iteration) {
-
- if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
- continue;
-
- PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR);
-
- if (UR.InvalidatedSCCs.count(C))
- PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
- else
- PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
-
- // If the SCC structure has changed, bail immediately and let the outer
- // CGSCC layer handle any iteration to reflect the refined structure.
- if (UR.UpdatedC && UR.UpdatedC != C) {
- PA.intersect(std::move(PassPA));
- break;
- }
-
- // Check that we didn't miss any update scenario.
- assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
- assert(C->begin() != C->end() && "Cannot have an empty SCC!");
-
- // Check whether any of the handles were devirtualized.
- auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) {
- if (!CallH)
- return false;
- auto *CB = dyn_cast<CallBase>(CallH);
- if (!CB)
- return false;
-
- // If the call is still indirect, leave it alone.
- Function *F = CB->getCalledFunction();
- if (!F)
- return false;
-
- LLVM_DEBUG(dbgs() << "Found devirtualized call from "
- << CB->getParent()->getParent()->getName() << " to "
- << F->getName() << "\n");
-
- // We now have a direct call where previously we had an indirect call,
- // so iterate to process this devirtualization site.
- return true;
- };
- bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle);
-
- // Rescan to build up a new set of handles and count how many direct
- // calls remain. If we decide to iterate, this also sets up the input to
- // the next iteration.
- CallHandles.clear();
- auto NewCallCounts = ScanSCC(*C, CallHandles);
-
- // If we haven't found an explicit devirtualization already see if we
- // have decreased the number of indirect calls and increased the number
- // of direct calls for any function in the SCC. This can be fooled by all
- // manner of transformations such as DCE and other things, but seems to
- // work well in practice.
- if (!Devirt)
- // Iterate over the keys in NewCallCounts, if Function also exists in
- // CallCounts, make the check below.
- for (auto &Pair : NewCallCounts) {
- auto &CallCountNew = Pair.second;
- auto CountIt = CallCounts.find(Pair.first);
- if (CountIt != CallCounts.end()) {
- const auto &CallCountOld = CountIt->second;
- if (CallCountOld.Indirect > CallCountNew.Indirect &&
- CallCountOld.Direct < CallCountNew.Direct) {
- Devirt = true;
- break;
- }
- }
- }
-
- if (!Devirt) {
- PA.intersect(std::move(PassPA));
- break;
- }
-
- // Otherwise, if we've already hit our max, we're done.
- if (Iteration >= MaxIterations) {
- LLVM_DEBUG(
- dbgs() << "Found another devirtualization after hitting the max "
- "number of repetitions ("
- << MaxIterations << ") on SCC: " << *C << "\n");
- PA.intersect(std::move(PassPA));
- break;
- }
-
- LLVM_DEBUG(
- dbgs()
- << "Repeating an SCC pass after finding a devirtualization in: " << *C
- << "\n");
-
- // Move over the new call counts in preparation for iterating.
- CallCounts = std::move(NewCallCounts);
-
- // Update the analysis manager with each run and intersect the total set
- // of preserved analyses so we're ready to iterate.
- AM.invalidate(*C, PassPA);
-
- PA.intersect(std::move(PassPA));
- }
-
- // Note that we don't add any preserved entries here unlike a more normal
- // "pass manager" because we only handle invalidation *between* iterations,
- // not after the last iteration.
- return PA;
- }
+ LazyCallGraph &CG, CGSCCUpdateResult &UR);
private:
- PassT Pass;
+ std::unique_ptr<PassConceptT> Pass;
int MaxIterations;
};
/// A function to deduce a function pass type and wrap it in the
/// templated adaptor.
-template <typename PassT>
-DevirtSCCRepeatedPass<PassT> createDevirtSCCRepeatedPass(PassT Pass,
- int MaxIterations) {
- return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations);
-}
-
-// Out-of-line implementation details for templates below this point.
-
template <typename CGSCCPassT>
-PreservedAnalyses
-ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>::run(Module &M,
- ModuleAnalysisManager &AM) {
- // Setup the CGSCC analysis manager from its proxy.
- CGSCCAnalysisManager &CGAM =
- AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager();
-
- // Get the call graph for this module.
- LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
-
- // Get Function analysis manager from its proxy.
- FunctionAnalysisManager &FAM =
- AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager();
-
- // We keep worklists to allow us to push more work onto the pass manager as
- // the passes are run.
- SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
- SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
-
- // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
- // iterating off the worklists.
- SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
- SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
-
- SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
- InlinedInternalEdges;
-
- CGSCCUpdateResult UR = {
- RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet,
- nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges};
-
- // Request PassInstrumentation from analysis manager, will use it to run
- // instrumenting callbacks for the passes later.
- PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
-
- PreservedAnalyses PA = PreservedAnalyses::all();
- CG.buildRefSCCs();
- for (auto RCI = CG.postorder_ref_scc_begin(),
- RCE = CG.postorder_ref_scc_end();
- RCI != RCE;) {
- assert(RCWorklist.empty() &&
- "Should always start with an empty RefSCC worklist");
- // The postorder_ref_sccs range we are walking is lazily constructed, so
- // we only push the first one onto the worklist. The worklist allows us
- // to capture *new* RefSCCs created during transformations.
- //
- // We really want to form RefSCCs lazily because that makes them cheaper
- // to update as the program is simplified and allows us to have greater
- // cache locality as forming a RefSCC touches all the parts of all the
- // functions within that RefSCC.
- //
- // We also eagerly increment the iterator to the next position because
- // the CGSCC passes below may delete the current RefSCC.
- RCWorklist.insert(&*RCI++);
-
- do {
- LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
- if (InvalidRefSCCSet.count(RC)) {
- LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n");
- continue;
- }
-
- assert(CWorklist.empty() &&
- "Should always start with an empty SCC worklist");
-
- LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC
- << "\n");
-
- // The top of the worklist may *also* be the same SCC we just ran over
- // (and invalidated for). Keep track of that last SCC we processed due
- // to SCC update to avoid redundant processing when an SCC is both just
- // updated itself and at the top of the worklist.
- LazyCallGraph::SCC *LastUpdatedC = nullptr;
-
- // Push the initial SCCs in reverse post-order as we'll pop off the
- // back and so see this in post-order.
- for (LazyCallGraph::SCC &C : llvm::reverse(*RC))
- CWorklist.insert(&C);
-
- do {
- LazyCallGraph::SCC *C = CWorklist.pop_back_val();
- // Due to call graph mutations, we may have invalid SCCs or SCCs from
- // other RefSCCs in the worklist. The invalid ones are dead and the
- // other RefSCCs should be queued above, so we just need to skip both
- // scenarios here.
- if (InvalidSCCSet.count(C)) {
- LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n");
- continue;
- }
- if (LastUpdatedC == C) {
- LLVM_DEBUG(dbgs() << "Skipping redundant run on SCC: " << *C << "\n");
- continue;
- }
- if (&C->getOuterRefSCC() != RC) {
- LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other "
- "RefSCC...\n");
- continue;
- }
-
- // Ensure we can proxy analysis updates from the CGSCC analysis manager
- // into the the Function analysis manager by getting a proxy here.
- // This also needs to update the FunctionAnalysisManager, as this may be
- // the first time we see this SCC.
- CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM(
- FAM);
-
- // Each time we visit a new SCC pulled off the worklist,
- // a transformation of a child SCC may have also modified this parent
- // and invalidated analyses. So we invalidate using the update record's
- // cross-SCC preserved set. This preserved set is intersected by any
- // CGSCC pass that handles invalidation (primarily pass managers) prior
- // to marking its SCC as preserved. That lets us track everything that
- // might need invalidation across SCCs without excessive invalidations
- // on a single SCC.
- //
- // This essentially allows SCC passes to freely invalidate analyses
- // of any ancestor SCC. If this becomes detrimental to successfully
- // caching analyses, we could force each SCC pass to manually
- // invalidate the analyses for any SCCs other than themselves which
- // are mutated. However, that seems to lose the robustness of the
- // pass-manager driven invalidation scheme.
- CGAM.invalidate(*C, UR.CrossSCCPA);
-
- do {
- // Check that we didn't miss any update scenario.
- assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
- assert(C->begin() != C->end() && "Cannot have an empty SCC!");
- assert(&C->getOuterRefSCC() == RC &&
- "Processing an SCC in a different RefSCC!");
-
- LastUpdatedC = UR.UpdatedC;
- UR.UpdatedRC = nullptr;
- UR.UpdatedC = nullptr;
-
- // Check the PassInstrumentation's BeforePass callbacks before
- // running the pass, skip its execution completely if asked to
- // (callback returns false).
- if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
- continue;
-
- PreservedAnalyses PassPA;
- {
- TimeTraceScope TimeScope(Pass.name());
- PassPA = Pass.run(*C, CGAM, CG, UR);
- }
-
- if (UR.InvalidatedSCCs.count(C))
- PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
- else
- PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
-
- // Update the SCC and RefSCC if necessary.
- C = UR.UpdatedC ? UR.UpdatedC : C;
- RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
-
- if (UR.UpdatedC) {
- // If we're updating the SCC, also update the FAM inside the proxy's
- // result.
- CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM(
- FAM);
- }
-
- // If the CGSCC pass wasn't able to provide a valid updated SCC,
- // the current SCC may simply need to be skipped if invalid.
- if (UR.InvalidatedSCCs.count(C)) {
- LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
- break;
- }
- // Check that we didn't miss any update scenario.
- assert(C->begin() != C->end() && "Cannot have an empty SCC!");
-
- // We handle invalidating the CGSCC analysis manager's information
- // for the (potentially updated) SCC here. Note that any other SCCs
- // whose structure has changed should have been invalidated by
- // whatever was updating the call graph. This SCC gets invalidated
- // late as it contains the nodes that were actively being
- // processed.
- CGAM.invalidate(*C, PassPA);
-
- // Then intersect the preserved set so that invalidation of module
- // analyses will eventually occur when the module pass completes.
- // Also intersect with the cross-SCC preserved set to capture any
- // cross-SCC invalidation.
- UR.CrossSCCPA.intersect(PassPA);
- PA.intersect(std::move(PassPA));
-
- // The pass may have restructured the call graph and refined the
- // current SCC and/or RefSCC. We need to update our current SCC and
- // RefSCC pointers to follow these. Also, when the current SCC is
- // refined, re-run the SCC pass over the newly refined SCC in order
- // to observe the most precise SCC model available. This inherently
- // cannot cycle excessively as it only happens when we split SCCs
- // apart, at most converging on a DAG of single nodes.
- // FIXME: If we ever start having RefSCC passes, we'll want to
- // iterate there too.
- if (UR.UpdatedC)
- LLVM_DEBUG(dbgs()
- << "Re-running SCC passes after a refinement of the "
- "current SCC: "
- << *UR.UpdatedC << "\n");
-
- // Note that both `C` and `RC` may at this point refer to deleted,
- // invalid SCC and RefSCCs respectively. But we will short circuit
- // the processing when we check them in the loop above.
- } while (UR.UpdatedC);
- } while (!CWorklist.empty());
-
- // We only need to keep internal inlined edge information within
- // a RefSCC, clear it to save on space and let the next time we visit
- // any of these functions have a fresh start.
- InlinedInternalEdges.clear();
- } while (!RCWorklist.empty());
- }
-
- // By definition we preserve the call garph, all SCC analyses, and the
- // analysis proxies by handling them above and in any nested pass managers.
- PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
- PA.preserve<LazyCallGraphAnalysis>();
- PA.preserve<CGSCCAnalysisManagerModuleProxy>();
- PA.preserve<FunctionAnalysisManagerModuleProxy>();
- return PA;
+DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT Pass,
+ int MaxIterations) {
+ using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
+ PreservedAnalyses, CGSCCAnalysisManager,
+ LazyCallGraph &, CGSCCUpdateResult &>;
+ return DevirtSCCRepeatedPass(std::make_unique<PassModelT>(std::move(Pass)),
+ MaxIterations);
}
// Clear out the debug logging macro.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/CallGraph.h b/contrib/llvm-project/llvm/include/llvm/Analysis/CallGraph.h
index 98f9b0683fd4..4da448c9900b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/CallGraph.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/CallGraph.h
@@ -87,13 +87,6 @@ class CallGraph {
/// or calling an external function.
std::unique_ptr<CallGraphNode> CallsExternalNode;
- /// Replace the function represented by this node by another.
- ///
- /// This does not rescan the body of the function, so it is suitable when
- /// splicing the body of one function to another while also updating all
- /// callers from the old function to the new.
- void spliceFunction(const Function *From, const Function *To);
-
public:
explicit CallGraph(Module &M);
CallGraph(CallGraph &&Arg);
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/CaptureTracking.h b/contrib/llvm-project/llvm/include/llvm/Analysis/CaptureTracking.h
index e68675b278f1..9da5f18e944b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/CaptureTracking.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/CaptureTracking.h
@@ -13,6 +13,8 @@
#ifndef LLVM_ANALYSIS_CAPTURETRACKING_H
#define LLVM_ANALYSIS_CAPTURETRACKING_H
+#include "llvm/ADT/DenseMap.h"
+
namespace llvm {
class Value;
@@ -94,6 +96,12 @@ namespace llvm {
/// is zero, a default value is assumed.
void PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
unsigned MaxUsesToExplore = 0);
+
+ /// Returns true if the pointer is to a function-local object that never
+ /// escapes from the function.
+ bool isNonEscapingLocalObject(
+ const Value *V,
+ SmallDenseMap<const Value *, bool, 8> *IsCapturedCache = nullptr);
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/CodeMetrics.h b/contrib/llvm-project/llvm/include/llvm/Analysis/CodeMetrics.h
index eab24c8ab179..615591aa83ad 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -75,7 +75,8 @@ struct CodeMetrics {
/// Add information about a block to the current state.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
- const SmallPtrSetImpl<const Value*> &EphValues);
+ const SmallPtrSetImpl<const Value *> &EphValues,
+ bool PrepareForLTO = false);
/// Collect a loop's ephemeral values (those used only by an assume
/// or similar intrinsics in the loop).
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ConstantFolding.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ConstantFolding.h
index 0ccc782ad6f5..ef6e66b2b88e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -25,6 +25,7 @@ template <typename T> class ArrayRef;
class CallBase;
class Constant;
class ConstantExpr;
+class DSOLocalEquivalent;
class DataLayout;
class Function;
class GlobalValue;
@@ -34,8 +35,11 @@ class Type;
/// If this constant is a constant offset from a global, return the global and
/// the constant. Because of constantexprs, this function is recursive.
+/// If the global is part of a dso_local_equivalent constant, return it through
+/// `Equiv` if it is provided.
bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset,
- const DataLayout &DL);
+ const DataLayout &DL,
+ DSOLocalEquivalent **DSOEquiv = nullptr);
/// ConstantFoldInstruction - Try to constant fold the specified instruction.
/// If successful, the constant result is returned, if not, null is returned.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h
new file mode 100644
index 000000000000..83c1fb4485fd
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -0,0 +1,88 @@
+//===- ConstraintSystem.h - A system of linear constraints. --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CONSTRAINTSYSTEM_H
+#define LLVM_ANALYSIS_CONSTRAINTSYSTEM_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+#include <string>
+
+namespace llvm {
+
+class ConstraintSystem {
+ /// Current linear constraints in the system.
+ /// An entry of the form c0, c1, ... cn represents the following constraint:
+ /// c0 >= v0 * c1 + .... + v{n-1} * cn
+ SmallVector<SmallVector<int64_t, 8>, 4> Constraints;
+
+ /// Current greatest common divisor for all coefficients in the system.
+ uint32_t GCD = 1;
+
+ // Eliminate constraints from the system using Fourier–Motzkin elimination.
+ bool eliminateUsingFM();
+
+ /// Print the constraints in the system, using \p Names as variable names.
+ void dump(ArrayRef<std::string> Names) const;
+
+ /// Print the constraints in the system, using x0...xn as variable names.
+ void dump() const;
+
+ /// Returns true if there may be a solution for the constraints in the system.
+ bool mayHaveSolutionImpl();
+
+public:
+ bool addVariableRow(const SmallVector<int64_t, 8> &R) {
+ assert(Constraints.empty() || R.size() == Constraints.back().size());
+ // If all variable coefficients are 0, the constraint does not provide any
+ // usable information.
+ if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
+ return false;
+
+ for (const auto &C : R) {
+ auto A = std::abs(C);
+ GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD})
+ .getZExtValue();
+ }
+ Constraints.push_back(R);
+ return true;
+ }
+
+ bool addVariableRowFill(const SmallVector<int64_t, 8> &R) {
+ for (auto &CR : Constraints) {
+ while (CR.size() != R.size())
+ CR.push_back(0);
+ }
+ return addVariableRow(R);
+ }
+
+ /// Returns true if there may be a solution for the constraints in the system.
+ bool mayHaveSolution();
+
+ static SmallVector<int64_t, 8> negate(SmallVector<int64_t, 8> R) {
+ // The negated constraint R is obtained by multiplying by -1 and adding 1 to
+ // the constant.
+ R[0] += 1;
+ for (auto &C : R)
+ C *= -1;
+ return R;
+ }
+
+ bool isConditionImplied(SmallVector<int64_t, 8> R);
+
+ void popLastConstraint() { Constraints.pop_back(); }
+
+ /// Returns the number of rows in the constraint system.
+ unsigned size() const { return Constraints.size(); }
+};
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_CONSTRAINTSYSTEM_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h b/contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h
index 9e2b7907eaec..e3bef33e55c3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h
@@ -152,7 +152,7 @@ private:
setKind((InstList.size() == 0 && Input.size() == 1)
? NodeKind::SingleInstruction
: NodeKind::MultiInstruction);
- InstList.insert(InstList.end(), Input.begin(), Input.end());
+ llvm::append_range(InstList, Input);
}
void appendInstructions(const SimpleDDGNode &Input) {
appendInstructions(Input.getInstructions());
@@ -290,6 +290,12 @@ public:
bool getDependencies(const NodeType &Src, const NodeType &Dst,
DependenceList &Deps) const;
+ /// Return a string representing the type of dependence that the dependence
+ /// analysis identified between the two given nodes. This function assumes
+ /// that there is a memory dependence between the given two nodes.
+ const std::string getDependenceString(const NodeType &Src,
+ const NodeType &Dst) const;
+
protected:
// Name of the graph.
std::string Name;
@@ -463,6 +469,26 @@ bool DependenceGraphInfo<NodeType>::getDependencies(
return !Deps.empty();
}
+template <typename NodeType>
+const std::string
+DependenceGraphInfo<NodeType>::getDependenceString(const NodeType &Src,
+ const NodeType &Dst) const {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ DependenceList Deps;
+ if (!getDependencies(Src, Dst, Deps))
+ return OS.str();
+ interleaveComma(Deps, OS, [&](const std::unique_ptr<Dependence> &D) {
+ D->dump(OS);
+ // Remove the extra new-line character printed by the dump
+ // method
+ if (OS.str().back() == '\n')
+ OS.str().pop_back();
+ });
+
+ return OS.str();
+}
+
//===--------------------------------------------------------------------===//
// GraphTraits specializations for the DDG
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/DDGPrinter.h b/contrib/llvm-project/llvm/include/llvm/Analysis/DDGPrinter.h
new file mode 100644
index 000000000000..4477b387fe50
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/DDGPrinter.h
@@ -0,0 +1,91 @@
+//===- llvm/Analysis/DDGPrinter.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DOT printer for the Data-Dependence Graph (DDG).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DDGPRINTER_H
+#define LLVM_ANALYSIS_DDGPRINTER_H
+
+#include "llvm/Analysis/DDG.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/DOTGraphTraits.h"
+
+namespace llvm {
+
+//===--------------------------------------------------------------------===//
+// Implementation of DDG DOT Printer for a loop.
+//===--------------------------------------------------------------------===//
+class DDGDotPrinterPass : public PassInfoMixin<DDGDotPrinterPass> {
+public:
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+//===--------------------------------------------------------------------===//
+// Specialization of DOTGraphTraits.
+//===--------------------------------------------------------------------===//
+template <>
+struct DOTGraphTraits<const DataDependenceGraph *>
+ : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
+
+ /// Generate a title for the graph in DOT format
+ std::string getGraphName(const DataDependenceGraph *G) {
+ assert(G && "expected a valid pointer to the graph.");
+ return "DDG for '" + std::string(G->getName()) + "'";
+ }
+
+ /// Print a DDG node either in concise form (-ddg-dot-only) or
+ /// verbose mode (-ddg-dot).
+ std::string getNodeLabel(const DDGNode *Node,
+ const DataDependenceGraph *Graph);
+
+ /// Print attributes of an edge in the DDG graph. If the edge
+ /// is a MemoryDependence edge, then detailed dependence info
+ /// available from DependenceAnalysis is displayed.
+ std::string
+ getEdgeAttributes(const DDGNode *Node,
+ GraphTraits<const DDGNode *>::ChildIteratorType I,
+ const DataDependenceGraph *G);
+
+ /// Do not print nodes that are part of a pi-block separately. They
+ /// will be printed when their containing pi-block is being printed.
+ bool isNodeHidden(const DDGNode *Node, const DataDependenceGraph *G);
+
+private:
+ /// Print a DDG node in concise form.
+ static std::string getSimpleNodeLabel(const DDGNode *Node,
+ const DataDependenceGraph *G);
+
+ /// Print a DDG node with more information including containing instructions
+ /// and detailed information about the dependence edges.
+ static std::string getVerboseNodeLabel(const DDGNode *Node,
+ const DataDependenceGraph *G);
+
+ /// Print a DDG edge in concise form.
+ static std::string getSimpleEdgeAttributes(const DDGNode *Src,
+ const DDGEdge *Edge,
+ const DataDependenceGraph *G);
+
+ /// Print a DDG edge with more information including detailed information
+ /// about the dependence edges.
+ static std::string getVerboseEdgeAttributes(const DDGNode *Src,
+ const DDGEdge *Edge,
+ const DataDependenceGraph *G);
+};
+
+using DDGDotGraphTraits = DOTGraphTraits<const DataDependenceGraph *>;
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DDGPRINTER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Delinearization.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Delinearization.h
new file mode 100644
index 000000000000..2658b6bbc80c
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Delinearization.h
@@ -0,0 +1,33 @@
+//===---- Delinearization.h - MultiDimensional Index Delinearization ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements an analysis pass that tries to delinearize all GEP
+// instructions in all loops using the SCEV analysis functionality. This pass is
+// only used for testing purposes: if your pass needs delinearization, please
+// use the on-demand SCEVAddRecExpr::delinearize() function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DELINEARIZATION_H
+#define LLVM_ANALYSIS_DELINEARIZATION_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+struct DelinearizationPrinterPass
+ : public PassInfoMixin<DelinearizationPrinterPass> {
+ explicit DelinearizationPrinterPass(raw_ostream &OS);
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ raw_ostream &OS;
+};
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DELINEARIZATION_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/DemandedBits.h b/contrib/llvm-project/llvm/include/llvm/Analysis/DemandedBits.h
index 04db3eb57c18..7a8618a27ce7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/DemandedBits.h
@@ -61,6 +61,20 @@ public:
void print(raw_ostream &OS);
+ /// Compute alive bits of one addition operand from alive output and known
+ /// operand bits
+ static APInt determineLiveOperandBitsAdd(unsigned OperandNo,
+ const APInt &AOut,
+ const KnownBits &LHS,
+ const KnownBits &RHS);
+
+ /// Compute alive bits of one subtraction operand from alive output and known
+ /// operand bits
+ static APInt determineLiveOperandBitsSub(unsigned OperandNo,
+ const APInt &AOut,
+ const KnownBits &LHS,
+ const KnownBits &RHS);
+
private:
void performAnalysis();
void determineLiveOperandBits(const Instruction *UserI,
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/DivergenceAnalysis.h
index a2da97bb9059..2e4ae65d0981 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -59,8 +59,10 @@ public:
/// \brief Mark \p UniVal as a value that is always uniform.
void addUniformOverride(const Value &UniVal);
- /// \brief Mark \p DivVal as a value that is always divergent.
- void markDivergent(const Value &DivVal);
+ /// \brief Mark \p DivVal as a value that is always divergent. Will not do so
+ /// if `isAlwaysUniform(DivVal)`.
+ /// \returns Whether the tracked divergence state of \p DivVal changed.
+ bool markDivergent(const Value &DivVal);
/// \brief Propagate divergence to all instructions in the region.
/// Divergence is seeded by calls to \p markDivergent.
@@ -76,45 +78,38 @@ public:
/// \brief Whether \p Val is divergent at its definition.
bool isDivergent(const Value &Val) const;
- /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent.
+ /// \brief Whether \p U is divergent. Uses of a uniform value can be
+ /// divergent.
bool isDivergentUse(const Use &U) const;
void print(raw_ostream &OS, const Module *) const;
private:
- bool updateTerminator(const Instruction &Term) const;
- bool updatePHINode(const PHINode &Phi) const;
-
- /// \brief Computes whether \p Inst is divergent based on the
- /// divergence of its operands.
- ///
- /// \returns Whether \p Inst is divergent.
- ///
- /// This should only be called for non-phi, non-terminator instructions.
- bool updateNormalInstruction(const Instruction &Inst) const;
-
- /// \brief Mark users of live-out users as divergent.
- ///
- /// \param LoopHeader the header of the divergent loop.
- ///
- /// Marks all users of live-out values of the loop headed by \p LoopHeader
- /// as divergent and puts them on the worklist.
- void taintLoopLiveOuts(const BasicBlock &LoopHeader);
-
- /// \brief Push all users of \p Val (in the region) to the worklist
+ /// \brief Mark \p Term as divergent and push all Instructions that become
+ /// divergent as a result on the worklist.
+ void analyzeControlDivergence(const Instruction &Term);
+ /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
+ /// the worklist.
+ void taintAndPushPhiNodes(const BasicBlock &JoinBlock);
+
+ /// \brief Identify all Instructions that become divergent because \p DivExit
+ /// is a divergent loop exit of \p DivLoop. Mark those instructions as
+ /// divergent and push them on the worklist.
+ void propagateLoopExitDivergence(const BasicBlock &DivExit,
+ const Loop &DivLoop);
+
+ /// \brief Internal implementation function for propagateLoopExitDivergence.
+ void analyzeLoopExitDivergence(const BasicBlock &DivExit,
+ const Loop &OuterDivLoop);
+
+ /// \brief Mark all instruction as divergent that use a value defined in \p
+ /// OuterDivLoop. Push their users on the worklist.
+ void analyzeTemporalDivergence(const Instruction &I,
+ const Loop &OuterDivLoop);
+
+ /// \brief Push all users of \p Val (in the region) to the worklist.
void pushUsers(const Value &I);
- /// \brief Push all phi nodes in @block to the worklist
- void pushPHINodes(const BasicBlock &Block);
-
- /// \brief Mark \p Block as join divergent
- ///
- /// A block is join divergent if two threads may reach it from different
- /// incoming blocks at the same time.
- void markBlockJoinDivergent(const BasicBlock &Block) {
- DivergentJoinBlocks.insert(&Block);
- }
-
/// \brief Whether \p Val is divergent when read in \p ObservingBlock.
bool isTemporalDivergent(const BasicBlock &ObservingBlock,
const Value &Val) const;
@@ -123,27 +118,9 @@ private:
///
/// (see markBlockJoinDivergent).
bool isJoinDivergent(const BasicBlock &Block) const {
- return DivergentJoinBlocks.find(&Block) != DivergentJoinBlocks.end();
+ return DivergentJoinBlocks.contains(&Block);
}
- /// \brief Propagate control-induced divergence to users (phi nodes and
- /// instructions).
- //
- // \param JoinBlock is a divergent loop exit or join point of two disjoint
- // paths.
- // \returns Whether \p JoinBlock is a divergent loop exit of \p TermLoop.
- bool propagateJoinDivergence(const BasicBlock &JoinBlock,
- const Loop *TermLoop);
-
- /// \brief Propagate induced value divergence due to control divergence in \p
- /// Term.
- void propagateBranchDivergence(const Instruction &Term);
-
- /// \brief Propagate divergent caused by a divergent loop exit.
- ///
- /// \param ExitingLoop is a divergent loop.
- void propagateLoopDivergence(const Loop &ExitingLoop);
-
private:
const Function &F;
// If regionLoop != nullptr, analysis is only performed within \p RegionLoop.
@@ -166,7 +143,7 @@ private:
DenseSet<const Value *> UniformOverrides;
// Blocks with joining divergent control from different predecessors.
- DenseSet<const BasicBlock *> DivergentJoinBlocks;
+ DenseSet<const BasicBlock *> DivergentJoinBlocks; // FIXME Deprecated
// Detected/marked divergent values.
DenseSet<const Value *> DivergentValues;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/DominanceFrontier.h b/contrib/llvm-project/llvm/include/llvm/Analysis/DominanceFrontier.h
index f67929c997f9..cef5e03b3b7a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/DominanceFrontier.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/DominanceFrontier.h
@@ -26,7 +26,6 @@
#include <map>
#include <set>
#include <utility>
-#include <vector>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/EHPersonalities.h b/contrib/llvm-project/llvm/include/llvm/Analysis/EHPersonalities.h
index c17b0b4a90d3..eaada6627494 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/EHPersonalities.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/EHPersonalities.h
@@ -28,11 +28,12 @@ enum class EHPersonality {
GNU_CXX_SjLj,
GNU_ObjC,
MSVC_X86SEH,
- MSVC_Win64SEH,
+ MSVC_TableSEH,
MSVC_CXX,
CoreCLR,
Rust,
- Wasm_CXX
+ Wasm_CXX,
+ XL_CXX
};
/// See if the given exception handling personality function is one
@@ -51,7 +52,7 @@ inline bool isAsynchronousEHPersonality(EHPersonality Pers) {
// unknown personalities don't catch asynch exceptions.
switch (Pers) {
case EHPersonality::MSVC_X86SEH:
- case EHPersonality::MSVC_Win64SEH:
+ case EHPersonality::MSVC_TableSEH:
return true;
default:
return false;
@@ -65,7 +66,7 @@ inline bool isFuncletEHPersonality(EHPersonality Pers) {
switch (Pers) {
case EHPersonality::MSVC_CXX:
case EHPersonality::MSVC_X86SEH:
- case EHPersonality::MSVC_Win64SEH:
+ case EHPersonality::MSVC_TableSEH:
case EHPersonality::CoreCLR:
return true;
default:
@@ -80,7 +81,7 @@ inline bool isScopedEHPersonality(EHPersonality Pers) {
switch (Pers) {
case EHPersonality::MSVC_CXX:
case EHPersonality::MSVC_X86SEH:
- case EHPersonality::MSVC_Win64SEH:
+ case EHPersonality::MSVC_TableSEH:
case EHPersonality::CoreCLR:
case EHPersonality::Wasm_CXX:
return true;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
new file mode 100644
index 000000000000..a5f96e72ce97
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
@@ -0,0 +1,86 @@
+//=- FunctionPropertiesAnalysis.h - Function Properties Analysis --*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis
+// classes used to extract function properties.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUNCTIONPROPERTIESANALYSIS_H_
+#define LLVM_FUNCTIONPROPERTIESANALYSIS_H_
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Function;
+
+class FunctionPropertiesInfo {
+public:
+ static FunctionPropertiesInfo getFunctionPropertiesInfo(const Function &F,
+ const LoopInfo &LI);
+
+ void print(raw_ostream &OS) const;
+
+ /// Number of basic blocks
+ int64_t BasicBlockCount = 0;
+
+ /// Number of blocks reached from a conditional instruction, or that are
+ /// 'cases' of a SwitchInstr.
+ // FIXME: We may want to replace this with a more meaningful metric, like
+ // number of conditionally executed blocks:
+ // 'if (a) s();' would be counted here as 2 blocks, just like
+ // 'if (a) s(); else s2(); s3();' would.
+ int64_t BlocksReachedFromConditionalInstruction = 0;
+
+ /// Number of uses of this function, plus 1 if the function is callable
+ /// outside the module.
+ int64_t Uses = 0;
+
+ /// Number of direct calls made from this function to other functions
+ /// defined in this module.
+ int64_t DirectCallsToDefinedFunctions = 0;
+
+ // Load Instruction Count
+ int64_t LoadInstCount = 0;
+
+ // Store Instruction Count
+ int64_t StoreInstCount = 0;
+
+ // Maximum Loop Depth in the Function
+ int64_t MaxLoopDepth = 0;
+
+ // Number of Top Level Loops in the Function
+ int64_t TopLevelLoopCount = 0;
+};
+
+// Analysis pass
+class FunctionPropertiesAnalysis
+ : public AnalysisInfoMixin<FunctionPropertiesAnalysis> {
+
+public:
+ static AnalysisKey Key;
+
+ using Result = FunctionPropertiesInfo;
+
+ Result run(Function &F, FunctionAnalysisManager &FAM);
+};
+
+/// Printer pass for the FunctionPropertiesAnalysis results.
+class FunctionPropertiesPrinterPass
+ : public PassInfoMixin<FunctionPropertiesPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit FunctionPropertiesPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // namespace llvm
+#endif // LLVM_FUNCTIONPROPERTIESANALYSIS_H_
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/contrib/llvm-project/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
new file mode 100644
index 000000000000..9e97541e542b
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -0,0 +1,789 @@
+//===- IRSimilarityIdentifier.h - Find similarity in a module --------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// Interface file for the IRSimilarityIdentifier for identifying similarities in
+// IR including the IRInstructionMapper, which maps an Instruction to unsigned
+// integers.
+//
+// Two sequences of instructions are called "similar" if they perform the same
+// series of operations for all inputs.
+//
+// \code
+// %1 = add i32 %a, 10
+// %2 = add i32 %a, %1
+// %3 = icmp slt icmp %1, %2
+// \endcode
+//
+// and
+//
+// \code
+// %1 = add i32 11, %a
+// %2 = sub i32 %a, %1
+// %3 = icmp sgt icmp %2, %1
+// \endcode
+//
+// ultimately have the same result, even if the inputs, and structure are
+// slightly different.
+//
+// For instructions, we do not worry about operands that do not have fixed
+// semantic meaning to the program. We consider the opcode that the instruction
+// has, the types, parameters, and extra information such as the function name,
+// or comparison predicate. These are used to create a hash to map instructions
+// to integers to be used in similarity matching in sequences of instructions
+//
+// Terminology:
+// An IRSimilarityCandidate is a region of IRInstructionData (wrapped
+// Instructions), usually used to denote a region of similarity has been found.
+//
+// A SimilarityGroup is a set of IRSimilarityCandidates that are structurally
+// similar to one another.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
+#define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
+
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+namespace IRSimilarity {
+
+struct IRInstructionDataList;
+
+/// This represents what is and is not supported when finding similarity in
+/// Instructions.
+///
+/// Legal Instructions are considered when looking at similarity between
+/// Instructions.
+///
+/// Illegal Instructions cannot be considered when looking for similarity
+/// between Instructions. They act as boundaries between similarity regions.
+///
+/// Invisible Instructions are skipped over during analysis.
+// TODO: Shared with MachineOutliner
+enum InstrType { Legal, Illegal, Invisible };
+
+/// This provides the utilities for hashing an Instruction to an unsigned
+/// integer. Two IRInstructionDatas produce the same hash value when their
+/// underlying Instructions perform the same operation (even if they don't have
+/// the same input operands.)
+/// As a more concrete example, consider the following:
+///
+/// \code
+/// %add1 = add i32 %a, %b
+/// %add2 = add i32 %c, %d
+/// %add3 = add i64 %e, %f
+/// \endcode
+///
+// Then the IRInstructionData wrappers for these Instructions may be hashed like
+/// so:
+///
+/// \code
+/// ; These two adds have the same types and operand types, so they hash to the
+/// ; same number.
+/// %add1 = add i32 %a, %b ; Hash: 1
+/// %add2 = add i32 %c, %d ; Hash: 1
+/// ; This add produces an i64. This differentiates it from %add1 and %add2. So,
+/// ; it hashes to a different number.
+/// %add3 = add i64 %e, %f; Hash: 2
+/// \endcode
+///
+///
+/// This hashing scheme will be used to represent the program as a very long
+/// string. This string can then be placed in a data structure which can be used
+/// for similarity queries.
+///
+/// TODO: Handle types of Instructions which can be equal even with different
+/// operands. (E.g. comparisons with swapped predicates.)
+/// TODO: Handle CallInsts, which are only checked for function type
+/// by \ref isSameOperationAs.
+/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the
+/// exact same, and some do not.
+struct IRInstructionData : ilist_node<IRInstructionData> {
+
+ /// The source Instruction that is being wrapped.
+ Instruction *Inst = nullptr;
+ /// The values of the operands in the Instruction.
+ SmallVector<Value *, 4> OperVals;
+ /// The legality of the wrapped instruction. This is informed by InstrType,
+ /// and is used when checking when two instructions are considered similar.
+ /// If either instruction is not legal, the instructions are automatically not
+ /// considered similar.
+ bool Legal;
+
+ /// This is only relevant if we are wrapping a CmpInst where we needed to
+ /// change the predicate of a compare instruction from a greater than form
+ /// to a less than form. It is None otherwise.
+ Optional<CmpInst::Predicate> RevisedPredicate;
+
+ /// Gather the information that is difficult to gather for an Instruction, or
+ /// is changed. i.e. the operands of an Instruction and the Types of those
+ /// operands. This extra information allows for similarity matching to make
+ /// assertions that allow for more flexibility when checking for whether an
+ /// Instruction performs the same operation.
+ IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL);
+
+ /// Get the predicate that the compare instruction is using for hashing the
+ /// instruction. the IRInstructionData must be wrapping a CmpInst.
+ CmpInst::Predicate getPredicate() const;
+
+ /// A function that swaps the predicates to their less than form if they are
+ /// in a greater than form. Otherwise, the predicate is unchanged.
+ ///
+ /// \param CI - The comparison operation to find a consistent preidcate for.
+ /// \return the consistent comparison predicate.
+ static CmpInst::Predicate predicateForConsistency(CmpInst *CI);
+
+ /// Hashes \p Value based on its opcode, types, and operand types.
+ /// Two IRInstructionData instances produce the same hash when they perform
+ /// the same operation.
+ ///
+ /// As a simple example, consider the following instructions.
+ ///
+ /// \code
+ /// %add1 = add i32 %x1, %y1
+ /// %add2 = add i32 %x2, %y2
+ ///
+ /// %sub = sub i32 %x1, %y1
+ ///
+ /// %add_i64 = add i64 %x2, %y2
+ /// \endcode
+ ///
+ /// Because the first two adds operate the same types, and are performing the
+ /// same action, they will be hashed to the same value.
+ ///
+ /// However, the subtraction instruction is not the same as an addition, and
+ /// will be hashed to a different value.
+ ///
+ /// Finally, the last add has a different type compared to the first two add
+ /// instructions, so it will also be hashed to a different value that any of
+ /// the previous instructions.
+ ///
+ /// \param [in] ID - The IRInstructionData instance to be hashed.
+ /// \returns A hash_value of the IRInstructionData.
+ friend hash_code hash_value(const IRInstructionData &ID) {
+ SmallVector<Type *, 4> OperTypes;
+ for (Value *V : ID.OperVals)
+ OperTypes.push_back(V->getType());
+
+ if (isa<CmpInst>(ID.Inst))
+ return llvm::hash_combine(
+ llvm::hash_value(ID.Inst->getOpcode()),
+ llvm::hash_value(ID.Inst->getType()),
+ llvm::hash_value(ID.getPredicate()),
+ llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
+ else if (CallInst *CI = dyn_cast<CallInst>(ID.Inst))
+ return llvm::hash_combine(
+ llvm::hash_value(ID.Inst->getOpcode()),
+ llvm::hash_value(ID.Inst->getType()),
+ llvm::hash_value(CI->getCalledFunction()->getName().str()),
+ llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
+ return llvm::hash_combine(
+ llvm::hash_value(ID.Inst->getOpcode()),
+ llvm::hash_value(ID.Inst->getType()),
+ llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
+ }
+
+ IRInstructionDataList *IDL = nullptr;
+};
+
+struct IRInstructionDataList : simple_ilist<IRInstructionData> {};
+
+/// Compare one IRInstructionData class to another IRInstructionData class for
+/// whether they are performing a the same operation, and can mapped to the
+/// same value. For regular instructions if the hash value is the same, then
+/// they will also be close.
+///
+/// \param A - The first IRInstructionData class to compare
+/// \param B - The second IRInstructionData class to compare
+/// \returns true if \p A and \p B are similar enough to be mapped to the same
+/// value.
+bool isClose(const IRInstructionData &A, const IRInstructionData &B);
+
+struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> {
+ static inline IRInstructionData *getEmptyKey() { return nullptr; }
+ static inline IRInstructionData *getTombstoneKey() {
+ return reinterpret_cast<IRInstructionData *>(-1);
+ }
+
+ static unsigned getHashValue(const IRInstructionData *E) {
+ using llvm::hash_value;
+ assert(E && "IRInstructionData is a nullptr?");
+ return hash_value(*E);
+ }
+
+ static bool isEqual(const IRInstructionData *LHS,
+ const IRInstructionData *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
+ LHS == getEmptyKey() || LHS == getTombstoneKey())
+ return LHS == RHS;
+
+ assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?");
+ return isClose(*LHS, *RHS);
+ }
+};
+
+/// Helper struct for converting the Instructions in a Module into a vector of
+/// unsigned integers. This vector of unsigned integers can be thought of as a
+/// "numeric string". This numeric string can then be queried by, for example,
+/// data structures that find repeated substrings.
+///
+/// This hashing is done per BasicBlock in the module. To hash Instructions
+/// based off of their operations, each Instruction is wrapped in an
+/// IRInstructionData struct. The unsigned integer for an IRInstructionData
+/// depends on:
+/// - The hash provided by the IRInstructionData.
+/// - Which member of InstrType the IRInstructionData is classified as.
+// See InstrType for more details on the possible classifications, and how they
+// manifest in the numeric string.
+///
+/// The numeric string for an individual BasicBlock is terminated by an unique
+/// unsigned integer. This prevents data structures which rely on repetition
+/// from matching across BasicBlocks. (For example, the SuffixTree.)
+/// As a concrete example, if we have the following two BasicBlocks:
+/// \code
+/// bb0:
+/// %add1 = add i32 %a, %b
+/// %add2 = add i32 %c, %d
+/// %add3 = add i64 %e, %f
+/// bb1:
+/// %sub = sub i32 %c, %d
+/// \endcode
+/// We may hash the Instructions like this (via IRInstructionData):
+/// \code
+/// bb0:
+/// %add1 = add i32 %a, %b ; Hash: 1
+/// %add2 = add i32 %c, %d; Hash: 1
+/// %add3 = add i64 %e, %f; Hash: 2
+/// bb1:
+/// %sub = sub i32 %c, %d; Hash: 3
+/// %add4 = add i32 %c, %d ; Hash: 1
+/// \endcode
+/// And produce a "numeric string representation" like so:
+/// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2
+///
+/// TODO: This is very similar to the MachineOutliner, and should be
+/// consolidated into the same interface.
+struct IRInstructionMapper {
+ /// The starting illegal instruction number to map to.
+ ///
+ /// Set to -3 for compatibility with DenseMapInfo<unsigned>.
+ unsigned IllegalInstrNumber = static_cast<unsigned>(-3);
+
+ /// The next available integer to assign to a legal Instruction to.
+ unsigned LegalInstrNumber = 0;
+
+ /// Correspondence from IRInstructionData to unsigned integers.
+ DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>
+ InstructionIntegerMap;
+
+ /// Set if we added an illegal number in the previous step.
+ /// Since each illegal number is unique, we only need one of them between
+ /// each range of legal numbers. This lets us make sure we don't add more
+ /// than one illegal number per range.
+ bool AddedIllegalLastTime = false;
+
+ /// Marks whether we found a illegal instruction in the previous step.
+ bool CanCombineWithPrevInstr = false;
+
+ /// Marks whether we have found a set of instructions that is long enough
+ /// to be considered for similarity.
+ bool HaveLegalRange = false;
+
+ /// This allocator pointer is in charge of holding on to the IRInstructionData
+ /// so it is not deallocated until whatever external tool is using it is done
+ /// with the information.
+ SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr;
+
+ /// This allocator pointer is in charge of creating the IRInstructionDataList
+ /// so it is not deallocated until whatever external tool is using it is done
+ /// with the information.
+ SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr;
+
+ /// Get an allocated IRInstructionData struct using the InstDataAllocator.
+ ///
+ /// \param I - The Instruction to wrap with IRInstructionData.
+ /// \param Legality - A boolean value that is true if the instruction is to
+ /// be considered for similarity, and false if not.
+ /// \param IDL - The InstructionDataList that the IRInstructionData is
+ /// inserted into.
+ /// \returns An allocated IRInstructionData struct.
+ IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality,
+ IRInstructionDataList &IDL);
+
+ /// Get an allocated IRInstructionDataList object using the IDLAllocator.
+ ///
+ /// \returns An allocated IRInstructionDataList object.
+ IRInstructionDataList *allocateIRInstructionDataList();
+
+ IRInstructionDataList *IDL = nullptr;
+
+ /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers
+ /// determined by \p InstrType. Two Instructions are mapped to the same value
+ /// if they are close as defined by the InstructionData class above.
+ ///
+ /// \param [in] BB - The BasicBlock to be mapped to integers.
+ /// \param [in,out] InstrList - Vector of IRInstructionData to append to.
+ /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to.
+ void convertToUnsignedVec(BasicBlock &BB,
+ std::vector<IRInstructionData *> &InstrList,
+ std::vector<unsigned> &IntegerMapping);
+
+ /// Maps an Instruction to a legal integer.
+ ///
+ /// \param [in] It - The Instruction to be mapped to an integer.
+ /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
+ /// append to.
+ /// \param [in,out] InstrListForBB - Vector of InstructionData to append to.
+ /// \returns The integer \p It was mapped to.
+ unsigned mapToLegalUnsigned(BasicBlock::iterator &It,
+ std::vector<unsigned> &IntegerMappingForBB,
+ std::vector<IRInstructionData *> &InstrListForBB);
+
+ /// Maps an Instruction to an illegal integer.
+ ///
+ /// \param [in] It - The \p Instruction to be mapped to an integer.
+ /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
+ /// append to.
+ /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to.
+ /// \param End - true if creating a dummy IRInstructionData at the end of a
+ /// basic block.
+ /// \returns The integer \p It was mapped to.
+ unsigned mapToIllegalUnsigned(
+ BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
+ std::vector<IRInstructionData *> &InstrListForBB, bool End = false);
+
+ IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA,
+ SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA)
+ : InstDataAllocator(IDA), IDLAllocator(IDLA) {
+ // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
+ // changed.
+ assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) &&
+ "DenseMapInfo<unsigned>'s empty key isn't -1!");
+ assert(DenseMapInfo<unsigned>::getTombstoneKey() ==
+ static_cast<unsigned>(-2) &&
+ "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
+
+ IDL = new (IDLAllocator->Allocate())
+ IRInstructionDataList();
+ }
+
+ /// Custom InstVisitor to classify different instructions for whether it can
+ /// be analyzed for similarity.
+ struct InstructionClassification
+ : public InstVisitor<InstructionClassification, InstrType> {
+ InstructionClassification() {}
+
+ // TODO: Determine a scheme to resolve when the label is similar enough.
+ InstrType visitBranchInst(BranchInst &BI) { return Illegal; }
+ // TODO: Determine a scheme to resolve when the labels are similar enough.
+ InstrType visitPHINode(PHINode &PN) { return Illegal; }
+ // TODO: Handle allocas.
+ InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
+ // We exclude variable argument instructions since variable arguments
+ // requires extra checking of the argument list.
+ InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; }
+ // We exclude all exception handling cases since they are so context
+ // dependent.
+ InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; }
+ InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; }
+ // DebugInfo should be included in the regions, but should not be
+ // analyzed for similarity as it has no bearing on the outcome of the
+ // program.
+ InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
+ // TODO: Handle specific intrinsics.
+ InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; }
+ // We only allow call instructions where the function has a name and
+ // is not an indirect call.
+ InstrType visitCallInst(CallInst &CI) {
+ Function *F = CI.getCalledFunction();
+ if (!F || CI.isIndirectCall() || !F->hasName())
+ return Illegal;
+ return Legal;
+ }
+ // TODO: We do not current handle similarity that changes the control flow.
+ InstrType visitInvokeInst(InvokeInst &II) { return Illegal; }
+ // TODO: We do not current handle similarity that changes the control flow.
+ InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; }
+ // TODO: Handle interblock similarity.
+ InstrType visitTerminator(Instruction &I) { return Illegal; }
+ InstrType visitInstruction(Instruction &I) { return Legal; }
+ };
+
+ /// Maps an Instruction to a member of InstrType.
+ InstructionClassification InstClassifier;
+};
+
+/// This is a class that wraps a range of IRInstructionData from one point to
+/// another in the vector of IRInstructionData, which is a region of the
+/// program. It is also responsible for defining the structure within this
+/// region of instructions.
+///
+/// The structure of a region is defined through a value numbering system
+/// assigned to each unique value in a region at the creation of the
+/// IRSimilarityCandidate.
+///
+/// For example, for each Instruction we add a mapping for each new
+/// value seen in that Instruction.
+/// IR: Mapping Added:
+/// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
+/// %add2 = add i32 %a, %1 %add2 -> 4
+/// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
+///
+/// We can compare IRSimilarityCandidates against one another.
+/// The \ref isSimilar function compares each IRInstructionData against one
+/// another and if we have the same sequences of IRInstructionData that would
+/// create the same hash, we have similar IRSimilarityCandidates.
+///
+/// We can also compare the structure of IRSimilarityCandidates. If we can
+/// create a mapping of registers in the region contained by one
+/// IRSimilarityCandidate to the region contained by different
+/// IRSimilarityCandidate, they can be considered structurally similar.
+///
+/// IRSimilarityCandidate1: IRSimilarityCandidate2:
+/// %add1 = add i32 %a, %b %add1 = add i32 %d, %e
+/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
+/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
+///
+/// Can have the following mapping from candidate to candidate of:
+/// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4
+/// and can be considered similar.
+///
+/// IRSimilarityCandidate1: IRSimilarityCandidate2:
+/// %add1 = add i32 %a, %b %add1 = add i32 %d, c4
+/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
+/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
+///
+/// We cannot create the same mapping since the use of c4 is not used in the
+/// same way as %b or c2.
+class IRSimilarityCandidate {
+private:
+ /// The start index of this IRSimilarityCandidate in the instruction list.
+ unsigned StartIdx = 0;
+
+ /// The number of instructions in this IRSimilarityCandidate.
+ unsigned Len = 0;
+
+ /// The first instruction in this IRSimilarityCandidate.
+ IRInstructionData *FirstInst = nullptr;
+
+ /// The last instruction in this IRSimilarityCandidate.
+ IRInstructionData *LastInst = nullptr;
+
+ /// Global Value Numbering structures
+ /// @{
+ /// Stores the mapping of the value to the number assigned to it in the
+ /// IRSimilarityCandidate.
+ DenseMap<Value *, unsigned> ValueToNumber;
+ /// Stores the mapping of the number to the value assigned this number.
+ DenseMap<unsigned, Value *> NumberToValue;
+ /// @}
+
+public:
+ /// \param StartIdx - The starting location of the region.
+ /// \param Len - The length of the region.
+ /// \param FirstInstIt - The starting IRInstructionData of the region.
+ /// \param LastInstIt - The ending IRInstructionData of the region.
+ IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
+ IRInstructionData *FirstInstIt,
+ IRInstructionData *LastInstIt);
+
+ /// \param A - The first IRInstructionCandidate to compare.
+ /// \param B - The second IRInstructionCandidate to compare.
+ /// \returns True when every IRInstructionData in \p A is similar to every
+ /// IRInstructionData in \p B.
+ static bool isSimilar(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B);
+
+ /// \param A - The first IRInstructionCandidate to compare.
+ /// \param B - The second IRInstructionCandidate to compare.
+ /// \returns True when every IRInstructionData in \p A is structurally similar
+ /// to \p B.
+ static bool compareStructure(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B);
+
+ struct OperandMapping {
+ /// The IRSimilarityCandidate that holds the instruction the OperVals were
+ /// pulled from.
+ const IRSimilarityCandidate &IRSC;
+
+ /// The operand values to be analyzed.
+ ArrayRef<Value *> &OperVals;
+
+ /// The current mapping of global value numbers from one IRSimilarityCandidate
+ /// to another IRSimilarityCandidate.
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
+ };
+
+ /// Compare the operands in \p A and \p B and check that the current mapping
+ /// of global value numbers from \p A to \p B and \p B to \A is consistent.
+ ///
+ /// \param A - The first IRInstructionCandidate, operand values, and current
+ /// operand mappings to compare.
+ /// \param B - The second IRInstructionCandidate, operand values, and current
+ /// operand mappings to compare.
+ /// \returns true if the IRSimilarityCandidates operands are compatible.
+ static bool compareNonCommutativeOperandMapping(OperandMapping A,
+ OperandMapping B);
+
+ /// Compare the operands in \p A and \p B and check that the current mapping
+ /// of global value numbers from \p A to \p B and \p B to \A is consistent
+ /// given that the operands are commutative.
+ ///
+ /// \param A - The first IRInstructionCandidate, operand values, and current
+ /// operand mappings to compare.
+ /// \param B - The second IRInstructionCandidate, operand values, and current
+ /// operand mappings to compare.
+ /// \returns true if the IRSimilarityCandidates operands are compatible.
+ static bool compareCommutativeOperandMapping(OperandMapping A,
+ OperandMapping B);
+
+ /// Compare the start and end indices of the two IRSimilarityCandidates for
+ /// whether they overlap. If the start instruction of one
+ /// IRSimilarityCandidate is less than the end instruction of the other, and
+ /// the start instruction of one is greater than the start instruction of the
+ /// other, they overlap.
+ ///
+ /// \returns true if the IRSimilarityCandidates do not have overlapping
+ /// instructions.
+ static bool overlap(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B);
+
+ /// \returns the number of instructions in this Candidate.
+ unsigned getLength() const { return Len; }
+
+ /// \returns the start index of this IRSimilarityCandidate.
+ unsigned getStartIdx() const { return StartIdx; }
+
+ /// \returns the end index of this IRSimilarityCandidate.
+ unsigned getEndIdx() const { return StartIdx + Len - 1; }
+
+ /// \returns The first IRInstructionData.
+ IRInstructionData *front() const { return FirstInst; }
+ /// \returns The last IRInstructionData.
+ IRInstructionData *back() const { return LastInst; }
+
+ /// \returns The first Instruction.
+ Instruction *frontInstruction() { return FirstInst->Inst; }
+ /// \returns The last Instruction
+ Instruction *backInstruction() { return LastInst->Inst; }
+
+ /// \returns The BasicBlock the IRSimilarityCandidate starts in.
+ BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); }
+ /// \returns The BasicBlock the IRSimilarityCandidate ends in.
+ BasicBlock *getEndBB() { return LastInst->Inst->getParent(); }
+
+ /// \returns The Function that the IRSimilarityCandidate is located in.
+ Function *getFunction() { return getStartBB()->getParent(); }
+
+ /// Finds the positive number associated with \p V if it has been mapped.
+ /// \param [in] V - the Value to find.
+ /// \returns The positive number corresponding to the value.
+ /// \returns None if not present.
+ Optional<unsigned> getGVN(Value *V) {
+ assert(V != nullptr && "Value is a nullptr?");
+ DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V);
+ if (VNIt == ValueToNumber.end())
+ return None;
+ return VNIt->second;
+ }
+
+ /// Finds the Value associate with \p Num if it exists.
+ /// \param [in] Num - the number to find.
+ /// \returns The Value associated with the number.
+ /// \returns None if not present.
+ Optional<Value *> fromGVN(unsigned Num) {
+ DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num);
+ if (VNIt == NumberToValue.end())
+ return None;
+ assert(VNIt->second != nullptr && "Found value is a nullptr!");
+ return VNIt->second;
+ }
+
+ /// \param RHS -The IRSimilarityCandidate to compare against
+ /// \returns true if the IRSimilarityCandidate is occurs after the
+ /// IRSimilarityCandidate in the program.
+ bool operator<(const IRSimilarityCandidate &RHS) const {
+ return getStartIdx() > RHS.getStartIdx();
+ }
+
+ using iterator = IRInstructionDataList::iterator;
+ iterator begin() const { return iterator(front()); }
+ iterator end() const { return std::next(iterator(back())); }
+};
+
+typedef std::vector<IRSimilarityCandidate> SimilarityGroup;
+typedef std::vector<SimilarityGroup> SimilarityGroupList;
+
+/// This class puts all the pieces of the IRInstructionData,
+/// IRInstructionMapper, IRSimilarityCandidate together.
+///
+/// It first feeds the Module or vector of Modules into the IRInstructionMapper,
+/// and puts all the mapped instructions into a single long list of
+/// IRInstructionData.
+///
+/// The list of unsigned integers is given to the Suffix Tree or similar data
+/// structure to find repeated subsequences. We construct an
+/// IRSimilarityCandidate for each instance of the subsequence. We compare them
+/// against one another since These repeated subsequences can have different
+/// structure. For each different kind of structure found, we create a
+/// similarity group.
+///
+/// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are
+/// structurally similar to one another, while C is different we would have two
+/// SimilarityGroups:
+///
+/// SimilarityGroup 1: SimilarityGroup 2
+/// A, B, D C
+///
+/// A list of the different similarity groups is then returned after
+/// analyzing the module.
+class IRSimilarityIdentifier {
+public:
+ IRSimilarityIdentifier()
+ : Mapper(&InstDataAllocator, &InstDataListAllocator) {}
+
+ /// \param M the module to find similarity in.
+ explicit IRSimilarityIdentifier(Module &M)
+ : Mapper(&InstDataAllocator, &InstDataListAllocator) {
+ findSimilarity(M);
+ }
+
+private:
+ /// Map the instructions in the module to unsigned integers, using mapping
+ /// already present in the Mapper if possible.
+ ///
+ /// \param [in] M Module - To map to integers.
+ /// \param [in,out] InstrList - The vector to append IRInstructionData to.
+ /// \param [in,out] IntegerMapping - The vector to append integers to.
+ void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList,
+ std::vector<unsigned> &IntegerMapping);
+
+ /// Map the instructions in the modules vector to unsigned integers, using
+ /// mapping already present in the mapper if possible.
+ ///
+ /// \param [in] Modules - The list of modules to use to populate the mapper
+ /// \param [in,out] InstrList - The vector to append IRInstructionData to.
+ /// \param [in,out] IntegerMapping - The vector to append integers to.
+ void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules,
+ std::vector<IRInstructionData *> &InstrList,
+ std::vector<unsigned> &IntegerMapping);
+
+ /// Find the similarity candidates in \p InstrList and corresponding
+ /// \p UnsignedVec
+ ///
+ /// \param [in,out] InstrList - The vector to append IRInstructionData to.
+ /// \param [in,out] IntegerMapping - The vector to append integers to.
+ /// candidates found in the program.
+ void findCandidates(std::vector<IRInstructionData *> &InstrList,
+ std::vector<unsigned> &IntegerMapping);
+
+public:
+ // Find the IRSimilarityCandidates in the \p Modules and group by structural
+ // similarity in a SimilarityGroup, each group is returned in a
+ // SimilarityGroupList.
+ //
+ // \param [in] Modules - the modules to analyze.
+ // \returns The groups of similarity ranges found in the modules.
+ SimilarityGroupList &
+ findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules);
+
+ // Find the IRSimilarityCandidates in the given Module grouped by structural
+ // similarity in a SimilarityGroup, contained inside a SimilarityGroupList.
+ //
+ // \param [in] M - the module to analyze.
+ // \returns The groups of similarity ranges found in the module.
+ SimilarityGroupList &findSimilarity(Module &M);
+
+ // Clears \ref SimilarityCandidates if it is already filled by a previous run.
+ void resetSimilarityCandidates() {
+ // If we've already analyzed a Module or set of Modules, so we must clear
+ // the SimilarityCandidates to make sure we do not have only old values
+ // hanging around.
+ if (SimilarityCandidates.hasValue())
+ SimilarityCandidates->clear();
+ else
+ SimilarityCandidates = SimilarityGroupList();
+ }
+
+ // \returns The groups of similarity ranges found in the most recently passed
+ // set of modules.
+ Optional<SimilarityGroupList> &getSimilarity() {
+ return SimilarityCandidates;
+ }
+
+private:
+ /// The allocator for IRInstructionData.
+ SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+
+ /// The allocator for IRInstructionDataLists.
+ SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator;
+
+ /// Map Instructions to unsigned integers and wraps the Instruction in an
+ /// instance of IRInstructionData.
+ IRInstructionMapper Mapper;
+
+ /// The SimilarityGroups found with the most recent run of \ref
+ /// findSimilarity. None if there is no recent run.
+ Optional<SimilarityGroupList> SimilarityCandidates;
+};
+
+} // end namespace IRSimilarity
+
+/// An analysis pass based on legacy pass manager that runs and returns
+/// IRSimilarityIdentifier run on the Module.
+class IRSimilarityIdentifierWrapperPass : public ModulePass {
+ std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI;
+
+public:
+ static char ID;
+ IRSimilarityIdentifierWrapperPass();
+
+ IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; }
+ const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; }
+
+ bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
+ bool runOnModule(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+
+/// An analysis pass that runs and returns the IRSimilarityIdentifier run on the
+/// Module.
+class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> {
+public:
+ typedef IRSimilarity::IRSimilarityIdentifier Result;
+
+ Result run(Module &M, ModuleAnalysisManager &);
+
+private:
+ friend AnalysisInfoMixin<IRSimilarityAnalysis>;
+ static AnalysisKey Key;
+};
+
+/// Printer pass that uses \c IRSimilarityAnalysis.
+class IRSimilarityAnalysisPrinterPass
+ : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/IVDescriptors.h b/contrib/llvm-project/llvm/include/llvm/Analysis/IVDescriptors.h
index 1bae83d13c7a..28546110ba04 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -32,7 +32,24 @@ class PredicatedScalarEvolution;
class ScalarEvolution;
class SCEV;
class DominatorTree;
-class ICFLoopSafetyInfo;
+
+/// These are the kinds of recurrences that we support.
+enum class RecurKind {
+ None, ///< Not a recurrence.
+ Add, ///< Sum of integers.
+ Mul, ///< Product of integers.
+ Or, ///< Bitwise or logical OR of integers.
+ And, ///< Bitwise or logical AND of integers.
+ Xor, ///< Bitwise or logical XOR of integers.
+ SMin, ///< Signed integer min implemented in terms of select(cmp()).
+ SMax, ///< Signed integer max implemented in terms of select(cmp()).
+ UMin, ///< Unisgned integer min implemented in terms of select(cmp()).
+ UMax, ///< Unsigned integer max implemented in terms of select(cmp()).
+ FAdd, ///< Sum of floats.
+ FMul, ///< Product of floats.
+ FMin, ///< FP min implemented in terms of select(cmp()).
+ FMax ///< FP max implemented in terms of select(cmp()).
+};
/// The RecurrenceDescriptor is used to identify recurrences variables in a
/// loop. Reduction is a special case of recurrence that has uses of the
@@ -48,40 +65,13 @@ class ICFLoopSafetyInfo;
/// This struct holds information about recurrence variables.
class RecurrenceDescriptor {
public:
- /// This enum represents the kinds of recurrences that we support.
- enum RecurrenceKind {
- RK_NoRecurrence, ///< Not a recurrence.
- RK_IntegerAdd, ///< Sum of integers.
- RK_IntegerMult, ///< Product of integers.
- RK_IntegerOr, ///< Bitwise or logical OR of numbers.
- RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
- RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
- RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
- RK_FloatAdd, ///< Sum of floats.
- RK_FloatMult, ///< Product of floats.
- RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
- };
-
- // This enum represents the kind of minmax recurrence.
- enum MinMaxRecurrenceKind {
- MRK_Invalid,
- MRK_UIntMin,
- MRK_UIntMax,
- MRK_SIntMin,
- MRK_SIntMax,
- MRK_FloatMin,
- MRK_FloatMax
- };
-
RecurrenceDescriptor() = default;
- RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
- FastMathFlags FMF, MinMaxRecurrenceKind MK,
- Instruction *UAI, Type *RT, bool Signed,
- SmallPtrSetImpl<Instruction *> &CI)
+ RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K,
+ FastMathFlags FMF, Instruction *UAI, Type *RT,
+ bool Signed, SmallPtrSetImpl<Instruction *> &CI)
: StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
- MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT),
- IsSigned(Signed) {
+ UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
CastInsts.insert(CI.begin(), CI.end());
}
@@ -89,22 +79,22 @@ public:
class InstDesc {
public:
InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
- : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
- UnsafeAlgebraInst(UAI) {}
+ : IsRecurrence(IsRecur), PatternLastInst(I),
+ RecKind(RecurKind::None), UnsafeAlgebraInst(UAI) {}
- InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr)
- : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K),
+ InstDesc(Instruction *I, RecurKind K, Instruction *UAI = nullptr)
+ : IsRecurrence(true), PatternLastInst(I), RecKind(K),
UnsafeAlgebraInst(UAI) {}
- bool isRecurrence() { return IsRecurrence; }
+ bool isRecurrence() const { return IsRecurrence; }
- bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+ bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; }
- Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+ Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; }
- MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
+ RecurKind getRecKind() const { return RecKind; }
- Instruction *getPatternInst() { return PatternLastInst; }
+ Instruction *getPatternInst() const { return PatternLastInst; }
private:
// Is this instruction a recurrence candidate.
@@ -112,8 +102,8 @@ public:
// The last instruction in a min/max pattern (select of the select(icmp())
// pattern), or the current recurrence instruction otherwise.
Instruction *PatternLastInst;
- // If this is a min/max pattern the comparison predicate.
- MinMaxRecurrenceKind MinMaxKind;
+ // If this is a min/max pattern.
+ RecurKind RecKind;
// Recurrence has unsafe algebra.
Instruction *UnsafeAlgebraInst;
};
@@ -123,7 +113,7 @@ public:
/// select(icmp()) this function advances the instruction pointer 'I' from the
/// compare instruction to the select instruction and stores this pointer in
/// 'PatternLastInst' member of the returned struct.
- static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+ static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr);
/// Returns true if instruction I has multiple uses in Insts
@@ -134,27 +124,28 @@ public:
/// Returns true if all uses of the instruction I is within the Set.
static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
- /// Returns a struct describing if the instruction if the instruction is a
+ /// Returns a struct describing if the instruction is a
/// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
- /// or max(X, Y).
- static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev);
+ /// or max(X, Y). \p Prev specifies the description of an already processed
+ /// select instruction, so its corresponding cmp can be matched to it.
+ static InstDesc isMinMaxSelectCmpPattern(Instruction *I,
+ const InstDesc &Prev);
/// Returns a struct describing if the instruction is a
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
- static InstDesc isConditionalRdxPattern(RecurrenceKind Kind, Instruction *I);
+ static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I);
/// Returns identity corresponding to the RecurrenceKind.
- static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp);
+ static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp);
- /// Returns the opcode of binary operation corresponding to the
- /// RecurrenceKind.
- static unsigned getRecurrenceBinOp(RecurrenceKind Kind);
+ /// Returns the opcode corresponding to the RecurrenceKind.
+ static unsigned getOpcode(RecurKind Kind);
/// Returns true if Phi is a reduction of type Kind and adds it to the
/// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
/// non-null, the minimal bit width needed to compute the reduction will be
/// computed.
- static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
+ static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
bool HasFunNoNaNAttr,
RecurrenceDescriptor &RedDes,
DemandedBits *DB = nullptr,
@@ -183,42 +174,63 @@ public:
DenseMap<Instruction *, Instruction *> &SinkAfter,
DominatorTree *DT);
- RecurrenceKind getRecurrenceKind() { return Kind; }
+ RecurKind getRecurrenceKind() const { return Kind; }
- MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
+ unsigned getOpcode() const { return getOpcode(getRecurrenceKind()); }
- FastMathFlags getFastMathFlags() { return FMF; }
+ FastMathFlags getFastMathFlags() const { return FMF; }
- TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
+ TrackingVH<Value> getRecurrenceStartValue() const { return StartValue; }
- Instruction *getLoopExitInstr() { return LoopExitInstr; }
+ Instruction *getLoopExitInstr() const { return LoopExitInstr; }
/// Returns true if the recurrence has unsafe algebra which requires a relaxed
/// floating-point model.
- bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+ bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; }
/// Returns first unsafe algebra instruction in the PHI node's use-chain.
- Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+ Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; }
/// Returns true if the recurrence kind is an integer kind.
- static bool isIntegerRecurrenceKind(RecurrenceKind Kind);
+ static bool isIntegerRecurrenceKind(RecurKind Kind);
/// Returns true if the recurrence kind is a floating point kind.
- static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind);
+ static bool isFloatingPointRecurrenceKind(RecurKind Kind);
/// Returns true if the recurrence kind is an arithmetic kind.
- static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
+ static bool isArithmeticRecurrenceKind(RecurKind Kind);
+
+ /// Returns true if the recurrence kind is an integer min/max kind.
+ static bool isIntMinMaxRecurrenceKind(RecurKind Kind) {
+ return Kind == RecurKind::UMin || Kind == RecurKind::UMax ||
+ Kind == RecurKind::SMin || Kind == RecurKind::SMax;
+ }
+
+ /// Returns true if the recurrence kind is a floating-point min/max kind.
+ static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
+ return Kind == RecurKind::FMin || Kind == RecurKind::FMax;
+ }
+
+ /// Returns true if the recurrence kind is any min/max kind.
+ static bool isMinMaxRecurrenceKind(RecurKind Kind) {
+ return isIntMinMaxRecurrenceKind(Kind) || isFPMinMaxRecurrenceKind(Kind);
+ }
/// Returns the type of the recurrence. This type can be narrower than the
/// actual type of the Phi if the recurrence has been type-promoted.
- Type *getRecurrenceType() { return RecurrenceType; }
+ Type *getRecurrenceType() const { return RecurrenceType; }
/// Returns a reference to the instructions used for type-promoting the
/// recurrence.
- SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; }
+ const SmallPtrSet<Instruction *, 8> &getCastInsts() const { return CastInsts; }
/// Returns true if all source operands of the recurrence are SExtInsts.
- bool isSigned() { return IsSigned; }
+ bool isSigned() const { return IsSigned; }
+
+ /// Attempts to find a chain of operations from Phi to LoopExitInst that can
+ /// be treated as a set of reductions instructions for in-loop reductions.
+ SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi,
+ Loop *L) const;
private:
// The starting value of the recurrence.
@@ -227,12 +239,10 @@ private:
// The instruction who's value is used outside the loop.
Instruction *LoopExitInstr = nullptr;
// The kind of the recurrence.
- RecurrenceKind Kind = RK_NoRecurrence;
+ RecurKind Kind = RecurKind::None;
// The fast-math flags on the recurrent instructions. We propagate these
// fast-math flags into the vectorized FP instructions we generate.
FastMathFlags FMF;
- // If this a min/max recurrence the kind of recurrence.
- MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
// First occurrence of unasfe algebra in the PHI's use-chain.
Instruction *UnsafeAlgebraInst = nullptr;
// The type of the recurrence.
@@ -258,12 +268,6 @@ public:
/// Default constructor - creates an invalid induction.
InductionDescriptor() = default;
- /// Get the consecutive direction. Returns:
- /// 0 - unknown or non-consecutive.
- /// 1 - consecutive and increasing.
- /// -1 - consecutive and decreasing.
- int getConsecutiveDirection() const;
-
Value *getStartValue() const { return StartValue; }
InductionKind getKind() const { return IK; }
const SCEV *getStep() const { return Step; }
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/InlineAdvisor.h b/contrib/llvm-project/llvm/include/llvm/Analysis/InlineAdvisor.h
index 3480d93385a8..c39fae13d3b8 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -9,12 +9,12 @@
#ifndef LLVM_INLINEADVISOR_H_
#define LLVM_INLINEADVISOR_H_
-#include <memory>
-#include <unordered_set>
-#include <vector>
-
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
+#include <memory>
+#include <unordered_set>
namespace llvm {
class BasicBlock;
@@ -36,7 +36,11 @@ class OptimizationRemarkEmitter;
/// requires the full C Tensorflow API library, and evaluates models
/// dynamically. This mode also permits generating training logs, for offline
/// training.
-enum class InliningAdvisorMode : int { Default, Release, Development };
+enum class InliningAdvisorMode : int {
+ Default,
+ Release,
+ Development
+};
class InlineAdvisor;
/// Capture state between an inlining decision having had been made, and
@@ -62,10 +66,7 @@ public:
/// behavior by implementing the corresponding record*Impl.
///
/// Call after inlining succeeded, and did not result in deleting the callee.
- void recordInlining() {
- markRecorded();
- recordInliningImpl();
- }
+ void recordInlining();
/// Call after inlining succeeded, and resulted in deleting the callee.
void recordInliningWithCalleeDeleted();
@@ -111,21 +112,44 @@ private:
assert(!Recorded && "Recording should happen exactly once");
Recorded = true;
}
+ void recordInlineStatsIfNeeded();
bool Recorded = false;
};
+class DefaultInlineAdvice : public InlineAdvice {
+public:
+ DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
+ Optional<InlineCost> OIC, OptimizationRemarkEmitter &ORE,
+ bool EmitRemarks = true)
+ : InlineAdvice(Advisor, CB, ORE, OIC.hasValue()), OriginalCB(&CB),
+ OIC(OIC), EmitRemarks(EmitRemarks) {}
+
+private:
+ void recordUnsuccessfulInliningImpl(const InlineResult &Result) override;
+ void recordInliningWithCalleeDeletedImpl() override;
+ void recordInliningImpl() override;
+
+private:
+ CallBase *const OriginalCB;
+ Optional<InlineCost> OIC;
+ bool EmitRemarks;
+};
+
/// Interface for deciding whether to inline a call site or not.
class InlineAdvisor {
public:
InlineAdvisor(InlineAdvisor &&) = delete;
- virtual ~InlineAdvisor() { freeDeletedFunctions(); }
+ virtual ~InlineAdvisor();
/// Get an InlineAdvice containing a recommendation on whether to
/// inline or not. \p CB is assumed to be a direct call. \p FAM is assumed to
- /// be up-to-date wrt previous inlining decisions.
+ /// be up-to-date wrt previous inlining decisions. \p MandatoryOnly indicates
+ /// only mandatory (always-inline) call sites should be recommended - this
+ /// allows the InlineAdvisor track such inlininings.
/// Returns an InlineAdvice with the inlining recommendation.
- virtual std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) = 0;
+ std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB,
+ bool MandatoryOnly = false);
/// This must be called when the Inliner pass is entered, to allow the
/// InlineAdvisor update internal state, as result of function passes run
@@ -138,9 +162,14 @@ public:
virtual void onPassExit() {}
protected:
- InlineAdvisor(FunctionAnalysisManager &FAM) : FAM(FAM) {}
+ InlineAdvisor(Module &M, FunctionAnalysisManager &FAM);
+ virtual std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) = 0;
+ virtual std::unique_ptr<InlineAdvice> getMandatoryAdvice(CallBase &CB,
+ bool Advice);
+ Module &M;
FunctionAnalysisManager &FAM;
+ std::unique_ptr<ImportedFunctionsInliningStatistics> ImportedFunctionsStats;
/// We may want to defer deleting functions to after the inlining for a whole
/// module has finished. This allows us to reliably use function pointers as
@@ -155,6 +184,14 @@ protected:
return DeletedFunctions.count(F);
}
+ enum class MandatoryInliningKind { NotMandatory, Always, Never };
+
+ static MandatoryInliningKind getMandatoryKind(CallBase &CB,
+ FunctionAnalysisManager &FAM,
+ OptimizationRemarkEmitter &ORE);
+
+ OptimizationRemarkEmitter &getCallerORE(CallBase &CB);
+
private:
friend class InlineAdvice;
void markFunctionAsDeleted(Function *F);
@@ -166,11 +203,12 @@ private:
/// reusable as-is for inliner pass test scenarios, as well as for regular use.
class DefaultInlineAdvisor : public InlineAdvisor {
public:
- DefaultInlineAdvisor(FunctionAnalysisManager &FAM, InlineParams Params)
- : InlineAdvisor(FAM), Params(Params) {}
+ DefaultInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
+ InlineParams Params)
+ : InlineAdvisor(M, FAM), Params(Params) {}
private:
- std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
+ std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
void onPassExit() override { freeDeletedFunctions(); }
@@ -190,7 +228,8 @@ public:
// InlineAdvisor must be preserved across analysis invalidations.
return false;
}
- bool tryCreate(InlineParams Params, InliningAdvisorMode Mode);
+ bool tryCreate(InlineParams Params, InliningAdvisorMode Mode,
+ StringRef ReplayFile);
InlineAdvisor *getAdvisor() const { return Advisor.get(); }
void clear() { Advisor.reset(); }
@@ -208,6 +247,12 @@ std::unique_ptr<InlineAdvisor>
getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM);
#endif
+#ifdef LLVM_HAVE_TF_API
+std::unique_ptr<InlineAdvisor>
+getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
+ std::function<bool(CallBase &)> GetDefaultAdvice);
+#endif
+
// Default (manual policy) decision making helper APIs. Shared with the legacy
// pass manager inliner.
@@ -226,6 +271,9 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
bool ForProfileContext = false,
const char *PassName = nullptr);
+/// get call site location as string
+std::string getCallSiteLocation(DebugLoc DLoc);
+
/// Add location info to ORE message.
void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/InlineFeaturesAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/InlineFeaturesAnalysis.h
deleted file mode 100644
index cc3f96c424e9..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/InlineFeaturesAnalysis.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- InlineFeaturesAnalysis.h - ML Policy Feature extraction -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INLINEFEATURESANALYSIS_H_
-#define LLVM_INLINEFEATURESANALYSIS_H_
-
-#include "llvm/IR/PassManager.h"
-
-namespace llvm {
-class Function;
-
-class InlineFeaturesAnalysis
- : public AnalysisInfoMixin<InlineFeaturesAnalysis> {
-public:
- static AnalysisKey Key;
- struct Result {
- /// Number of basic blocks
- int64_t BasicBlockCount = 0;
-
- /// Number of blocks reached from a conditional instruction, or that are
- /// 'cases' of a SwitchInstr.
- // FIXME: We may want to replace this with a more meaningful metric, like
- // number of conditionally executed blocks:
- // 'if (a) s();' would be counted here as 2 blocks, just like
- // 'if (a) s(); else s2(); s3();' would.
- int64_t BlocksReachedFromConditionalInstruction = 0;
-
- /// Number of uses of this function, plus 1 if the function is callable
- /// outside the module.
- int64_t Uses = 0;
-
- /// Number of direct calls made from this function to other functions
- /// defined in this module.
- int64_t DirectCallsToDefinedFunctions = 0;
- };
- Result run(const Function &F, FunctionAnalysisManager &FAM);
-};
-
-} // namespace llvm
-#endif // LLVM_INLINEFEATURESANALYSIS_H_ \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
index 29a6f5914674..ab2cf52494c0 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
@@ -31,5 +31,15 @@ public:
private:
std::unique_ptr<TFModelEvaluator> Evaluator;
};
+
+class InlineSizeEstimatorAnalysisPrinterPass
+ : public PassInfoMixin<InlineSizeEstimatorAnalysisPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit InlineSizeEstimatorAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
} // namespace llvm
-#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H \ No newline at end of file
+#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/InstCount.h b/contrib/llvm-project/llvm/include/llvm/Analysis/InstCount.h
new file mode 100644
index 000000000000..e5ce822caf6e
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/InstCount.h
@@ -0,0 +1,28 @@
+//===- InstCount.h - Collects the count of all instructions -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass collects the count of all instructions and reports them
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_INSTCOUNT_H
+#define LLVM_ANALYSIS_INSTCOUNT_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+struct InstCountPass : PassInfoMixin<InstCountPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_INSTCOUNT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h
index b5ae54fb98bc..17d6f30a35cb 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -26,6 +26,10 @@
// same call context of that function (and not split between caller and callee
// contexts of a directly recursive call, for example).
//
+// Additionally, these routines can't simplify to the instructions that are not
+// def-reachable, meaning we can't just scan the basic block for instructions
+// to simplify to.
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
@@ -98,19 +102,39 @@ struct SimplifyQuery {
// be safely used.
const InstrInfoQuery IIQ;
+ /// Controls whether simplifications are allowed to constrain the range of
+ /// possible values for uses of undef. If it is false, simplifications are not
+ /// allowed to assume a particular value for a use of undef for example.
+ bool CanUseUndef = true;
+
SimplifyQuery(const DataLayout &DL, const Instruction *CXTI = nullptr)
: DL(DL), CxtI(CXTI) {}
SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT = nullptr,
AssumptionCache *AC = nullptr,
- const Instruction *CXTI = nullptr, bool UseInstrInfo = true)
- : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI), IIQ(UseInstrInfo) {}
+ const Instruction *CXTI = nullptr, bool UseInstrInfo = true,
+ bool CanUseUndef = true)
+ : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI), IIQ(UseInstrInfo),
+ CanUseUndef(CanUseUndef) {}
SimplifyQuery getWithInstruction(Instruction *I) const {
SimplifyQuery Copy(*this);
Copy.CxtI = I;
return Copy;
}
+ SimplifyQuery getWithoutUndef() const {
+ SimplifyQuery Copy(*this);
+ Copy.CanUseUndef = false;
+ return Copy;
+ }
+
+ /// If CanUseUndef is true, returns whether \p V is undef.
+ /// Otherwise always return false.
+ bool isUndefValue(Value *V) const {
+ if (!CanUseUndef)
+ return false;
+ return isa<UndefValue>(V);
+ }
};
// NOTE: the explicit multiple argument versions of these functions are
@@ -268,7 +292,8 @@ Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q);
Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
OptimizationRemarkEmitter *ORE = nullptr);
-/// See if V simplifies when its operand Op is replaced with RepOp.
+/// See if V simplifies when its operand Op is replaced with RepOp. If not,
+/// return null.
/// AllowRefinement specifies whether the simplification can be a refinement,
/// or whether it needs to be strictly identical.
Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
@@ -288,17 +313,6 @@ bool replaceAndRecursivelySimplify(
const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr,
SmallSetVector<Instruction *, 8> *UnsimplifiedUsers = nullptr);
-/// Recursively attempt to simplify an instruction.
-///
-/// This routine uses SimplifyInstruction to simplify 'I', and if successful
-/// replaces uses of 'I' with the simplified value. It then recurses on each
-/// of the users impacted. It returns true if any simplifications were
-/// performed.
-bool recursivelySimplifyInstruction(Instruction *I,
- const TargetLibraryInfo *TLI = nullptr,
- const DominatorTree *DT = nullptr,
- AssumptionCache *AC = nullptr);
-
// These helper functions return a SimplifyQuery structure that contains as
// many of the optional analysis we use as are currently valid. This is the
// strongly preferred way of constructing SimplifyQuery in passes.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Interval.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Interval.h
index 5c9a4535bc7f..9afe659d00dd 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/Interval.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Interval.h
@@ -89,9 +89,6 @@ public:
return HeaderNode == I.HeaderNode;
}
- /// isLoop - Find out if there is a back edge in this interval...
- bool isLoop() const;
-
/// print - Show contents in human readable format...
void print(raw_ostream &O) const;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/IntervalIterator.h b/contrib/llvm-project/llvm/include/llvm/Analysis/IntervalIterator.h
index efaaf9715b3d..8e2273618a66 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/IntervalIterator.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/IntervalIterator.h
@@ -81,7 +81,7 @@ inline void addNodeToInterval(Interval *Int, BasicBlock *BB) {
// BasicBlocks are added to the interval.
inline void addNodeToInterval(Interval *Int, Interval *I) {
// Add all of the nodes in I as new nodes in Int.
- Int->Nodes.insert(Int->Nodes.end(), I->Nodes.begin(), I->Nodes.end());
+ llvm::append_range(Int->Nodes, I->Nodes);
}
template<class NodeTy, class OrigContainer_t, class GT = GraphTraits<NodeTy *>,
@@ -227,9 +227,7 @@ private:
if (Int->isSuccessor(NodeHeader)) {
// If we were in the successor list from before... remove from succ list
- Int->Successors.erase(std::remove(Int->Successors.begin(),
- Int->Successors.end(), NodeHeader),
- Int->Successors.end());
+ llvm::erase_value(Int->Successors, NodeHeader);
}
// Now that we have discovered that Node is in the interval, perhaps some
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/contrib/llvm-project/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
index fb6605285156..8166b52aa226 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -73,13 +73,7 @@ ChildrenGetterTy<BasicBlock, IsPostDom>::get(const NodeRef &N) {
return {Children.begin(), Children.end()};
}
- using SnapShotBBPairTy =
- std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, OrderedNodeTy>;
-
- ChildrenTy Ret;
- for (const auto &SnapShotBBPair : children<SnapShotBBPairTy>({GD, N}))
- Ret.emplace_back(SnapShotBBPair.second);
- return Ret;
+ return GD->template getChildren<IsPostDom>(N);
}
} // end of namespace IDFCalculatorDetail
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h
index f4249f74104c..3c632f02905a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h
@@ -63,7 +63,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass {
BranchProbabilityInfo &getCalculated() {
if (!Calculated) {
assert(F && LI && "call setAnalysis");
- BPI.calculate(*F, *LI, TLI, nullptr);
+ BPI.calculate(*F, *LI, TLI, nullptr, nullptr);
Calculated = true;
}
return BPI;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h
index ea63b837ba70..f7a5adac2b43 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -258,7 +258,6 @@ public:
iterator begin() { return iterator(Edges.begin(), Edges.end()); }
iterator end() { return iterator(Edges.end(), Edges.end()); }
- Edge &operator[](int i) { return Edges[i]; }
Edge &operator[](Node &N) {
assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!");
auto &E = Edges[EdgeIndexMap.find(&N)->second];
@@ -305,13 +304,6 @@ public:
/// Internal helper to remove the edge to the given function.
bool removeEdgeInternal(Node &ChildN);
-
- /// Internal helper to replace an edge key with a new one.
- ///
- /// This should be used when the function for a particular node in the
- /// graph gets replaced and we are updating all of the edges to that node
- /// to use the new function as the key.
- void replaceEdgeKey(Function &OldTarget, Function &NewTarget);
};
/// A node in the call graph.
@@ -606,10 +598,6 @@ public:
void verify();
#endif
- /// Handle any necessary parent set updates after inserting a trivial ref
- /// or call edge.
- void handleTrivialEdgeInsertion(Node &SourceN, Node &TargetN);
-
public:
using iterator = pointee_iterator<SmallVectorImpl<SCC *>::const_iterator>;
using range = iterator_range<iterator>;
@@ -1058,12 +1046,29 @@ public:
/// fully visited by the DFS prior to calling this routine.
void removeDeadFunction(Function &F);
- /// Introduce a node for the function \p NewF in the SCC \p C.
- void addNewFunctionIntoSCC(Function &NewF, SCC &C);
+ /// Add a new function split/outlined from an existing function.
+ ///
+ /// The new function may only reference other functions that the original
+ /// function did.
+ ///
+ /// The original function must reference (either directly or indirectly) the
+ /// new function.
+ ///
+ /// The new function may also reference the original function.
+ /// It may end up in a parent SCC in the case that the original function's
+ /// edge to the new function is a ref edge, and the edge back is a call edge.
+ void addSplitFunction(Function &OriginalFunction, Function &NewFunction);
- /// Introduce a node for the function \p NewF, as a single node in a
- /// new SCC, in the RefSCC \p RC.
- void addNewFunctionIntoRefSCC(Function &NewF, RefSCC &RC);
+ /// Add new ref-recursive functions split/outlined from an existing function.
+ ///
+ /// The new functions may only reference other functions that the original
+ /// function did. The new functions may reference (not call) the original
+ /// function.
+ ///
+ /// The original function must reference (not call) all new functions.
+ /// All new functions must reference (not call) each other.
+ void addSplitRefRecursiveFunctions(Function &OriginalFunction,
+ ArrayRef<Function *> NewFunctions);
///@}
@@ -1168,16 +1173,14 @@ private:
/// the NodeMap.
Node &insertInto(Function &F, Node *&MappedN);
+ /// Helper to initialize a new node created outside of creating SCCs and add
+ /// it to the NodeMap if necessary. For example, useful when a function is
+ /// split.
+ Node &initNode(Function &F);
+
/// Helper to update pointers back to the graph object during moves.
void updateGraphPtrs();
- /// Helper to insert a new function, add it to the NodeMap, and populate its
- /// node.
- Node &createNode(Function &F);
-
- /// Helper to add the given Node \p N to the SCCMap, mapped to the SCC \p C.
- void addNodeToSCC(SCC &C, Node &N);
-
/// Allocates an SCC and constructs it using the graph allocator.
///
/// The arguments are forwarded to the constructor.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h
index 1bc88235273e..363cb49af382 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -71,20 +71,20 @@ public:
Instruction *CxtI = nullptr);
/// Determine whether the specified value comparison with a constant is known
- /// to be true or false at the specified instruction
- /// (from an assume intrinsic). Pred is a CmpInst predicate.
+ /// to be true or false at the specified instruction.
+ /// \p Pred is a CmpInst predicate. If \p UseBlockValue is true, the block
+ /// value is also taken into account.
Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C,
- Instruction *CxtI);
+ Instruction *CxtI, bool UseBlockValue = false);
- /// Determine whether the specified value is known to be a
- /// constant at the end of the specified block. Return null if not.
- Constant *getConstant(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr);
+ /// Determine whether the specified value is known to be a constant at the
+ /// specified instruction. Return null if not.
+ Constant *getConstant(Value *V, Instruction *CxtI);
/// Return the ConstantRange constraint that is known to hold for the
- /// specified value at the end of the specified block. This may only be called
+ /// specified value at the specified instruction. This may only be called
/// on integer-typed Values.
- ConstantRange getConstantRange(Value *V, BasicBlock *BB,
- Instruction *CxtI = nullptr,
+ ConstantRange getConstantRange(Value *V, Instruction *CxtI,
bool UndefAllowed = true);
/// Determine whether the specified value is known to be a
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Lint.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Lint.h
index 0fea81e215c9..6eb637e72782 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/Lint.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Lint.h
@@ -19,30 +19,30 @@
#ifndef LLVM_ANALYSIS_LINT_H
#define LLVM_ANALYSIS_LINT_H
+#include "llvm/IR/PassManager.h"
+
namespace llvm {
class FunctionPass;
class Module;
class Function;
-/// Create a lint pass.
-///
-/// Check a module or function.
-FunctionPass *createLintPass();
+FunctionPass *createLintLegacyPassPass();
-/// Check a module.
+/// Lint a module.
///
/// This should only be used for debugging, because it plays games with
/// PassManagers and stuff.
-void lintModule(
- const Module &M ///< The module to be checked
-);
+void lintModule(const Module &M);
+
+// Lint a function.
+void lintFunction(const Function &F);
-// lintFunction - Check a function.
-void lintFunction(
- const Function &F ///< The function to be checked
-);
+class LintPass : public PassInfoMixin<LintPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
-} // End llvm namespace
+} // namespace llvm
-#endif
+#endif // LLVM_ANALYSIS_LINT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h
index 5665a802942d..24a05610e68d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h
@@ -155,6 +155,15 @@ Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan, AAResults *AA,
bool *IsLoadCSE, unsigned *NumScanedInst);
+
+/// Returns true if a pointer value \p A can be replace with another pointer
+/// value \B if they are deemed equal through some means (e.g. information from
+/// conditions).
+/// NOTE: the current implementations is incomplete and unsound. It does not
+/// reject all invalid cases yet, but will be made stricter in the future. In
+/// particular this means returning true means unknown if replacement is safe.
+bool canReplacePointersIfEqual(Value *A, Value *B, const DataLayout &DL,
+ Instruction *CtxI);
}
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index a5237e9ba59e..13fbe884eddf 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -171,7 +171,8 @@ public:
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
: PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeDepDistBytes(0),
- MaxSafeRegisterWidth(-1U), FoundNonConstantDistanceDependence(false),
+ MaxSafeVectorWidthInBits(-1U),
+ FoundNonConstantDistanceDependence(false),
Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {}
/// Register the location (instructions are given increasing numbers)
@@ -204,13 +205,21 @@ public:
return Status == VectorizationSafetyStatus::Safe;
}
+ /// Return true if the number of elements that are safe to operate on
+ /// simultaneously is not bounded.
+ bool isSafeForAnyVectorWidth() const {
+ return MaxSafeVectorWidthInBits == UINT_MAX;
+ }
+
/// The maximum number of bytes of a vector register we can vectorize
/// the accesses safely with.
uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
/// Return the number of elements that are safe to operate on
/// simultaneously, multiplied by the size of the element in bits.
- uint64_t getMaxSafeRegisterWidth() const { return MaxSafeRegisterWidth; }
+ uint64_t getMaxSafeVectorWidthInBits() const {
+ return MaxSafeVectorWidthInBits;
+ }
/// In same cases when the dependency check fails we can still
/// vectorize the loop with a dynamic array access check.
@@ -275,7 +284,7 @@ private:
/// operate on simultaneously, multiplied by the size of the element in bits.
/// The size of the element is taken from the memory access that is most
/// restrictive.
- uint64_t MaxSafeRegisterWidth;
+ uint64_t MaxSafeVectorWidthInBits;
/// If we see a non-constant dependence distance we can still try to
/// vectorize this loop with runtime checks.
@@ -418,7 +427,7 @@ public:
bool UseDependencies);
/// Returns the checks that generateChecks created.
- const SmallVector<RuntimePointerCheck, 4> &getChecks() const {
+ const SmallVectorImpl<RuntimePointerCheck> &getChecks() const {
return Checks;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAnalysisManager.h
index 0e162e03bde1..11dbd15c8678 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAnalysisManager.h
@@ -57,6 +57,7 @@ struct LoopStandardAnalysisResults {
ScalarEvolution &SE;
TargetLibraryInfo &TLI;
TargetTransformInfo &TTI;
+ BlockFrequencyInfo *BFI;
MemorySSA *MSSA;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
index ffec78b6db2c..e8f2205545eb 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -14,19 +14,20 @@
#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/Pass.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
+class AAResults;
+class DependenceInfo;
class LPMUpdater;
+class ScalarEvolution;
+class SCEV;
+class TargetTransformInfo;
+
using CacheCostTy = int64_t;
using LoopVectorTy = SmallVector<Loop *, 8>;
@@ -70,7 +71,7 @@ public:
/// the same chace line iff the distance between them in the innermost
/// dimension is less than the cache line size. Return None if unsure.
Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
- AliasAnalysis &AA) const;
+ AAResults &AA) const;
/// Return true if the current object and the indexed reference \p Other
/// have distance smaller than \p MaxDistance in the dimension associated with
@@ -78,7 +79,7 @@ public:
/// MaxDistance and None if unsure.
Optional<bool> hasTemporalReuse(const IndexedReference &Other,
unsigned MaxDistance, const Loop &L,
- DependenceInfo &DI, AliasAnalysis &AA) const;
+ DependenceInfo &DI, AAResults &AA) const;
/// Compute the cost of the reference w.r.t. the given loop \p L when it is
/// considered in the innermost position in the loop nest.
@@ -118,7 +119,7 @@ private:
/// Return true if the given reference \p Other is definetely aliased with
/// the indexed reference represented by this class.
- bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const;
+ bool isAliased(const IndexedReference &Other, AAResults &AA) const;
private:
/// True if the reference can be delinearized, false otherwise.
@@ -183,7 +184,7 @@ public:
/// between array elements accessed in a loop so that the elements are
/// classified to have temporal reuse.
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
- TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI,
+ TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI,
Optional<unsigned> TRT = None);
/// Create a CacheCost for the loop nest rooted by \p Root.
@@ -197,9 +198,9 @@ public:
/// Return the estimated cost of loop \p L if the given loop is part of the
/// loop nest associated with this object. Return -1 otherwise.
CacheCostTy getLoopCost(const Loop &L) const {
- auto IT = std::find_if(
- LoopCosts.begin(), LoopCosts.end(),
- [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; });
+ auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) {
+ return LCC.first == &L;
+ });
return (IT != LoopCosts.end()) ? (*IT).second : -1;
}
@@ -258,7 +259,7 @@ private:
const LoopInfo &LI;
ScalarEvolution &SE;
TargetTransformInfo &TTI;
- AliasAnalysis &AA;
+ AAResults &AA;
DependenceInfo &DI;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
index 35fe2a03a2a2..a5717bae12c3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
@@ -155,7 +155,17 @@ public:
iterator end() const { return getSubLoops().end(); }
reverse_iterator rbegin() const { return getSubLoops().rbegin(); }
reverse_iterator rend() const { return getSubLoops().rend(); }
- bool empty() const { return getSubLoops().empty(); }
+
+ // LoopInfo does not detect irreducible control flow, just natural
+ // loops. That is, it is possible that there is cyclic control
+ // flow within the "innermost loop" or around the "outermost
+ // loop".
+
+ /// Return true if the loop does not contain any (natural) loops.
+ bool isInnermost() const { return getSubLoops().empty(); }
+ /// Return true if the loop does not have a parent (natural) loop
+ // (i.e. it is outermost, which is the same as top-level).
+ bool isOutermost() const { return getParentLoop() == nullptr; }
/// Get a list of the basic blocks which make up this loop.
ArrayRef<BlockT *> getBlocks() const {
@@ -292,6 +302,9 @@ public:
/// Otherwise return null.
BlockT *getUniqueExitBlock() const;
+ /// Return true if this loop does not have any exit blocks.
+ bool hasNoExitBlocks() const;
+
/// Edge type.
typedef std::pair<BlockT *, BlockT *> Edge;
@@ -830,6 +843,9 @@ public:
/// unrolling pass is run more than once (which it generally is).
void setLoopAlreadyUnrolled();
+ /// Add llvm.loop.mustprogress to this loop's loop id metadata.
+ void setLoopMustProgress();
+
void dump() const;
void dumpVerbose() const;
@@ -974,7 +990,7 @@ public:
LoopT *removeLoop(iterator I) {
assert(I != end() && "Cannot remove end iterator!");
LoopT *L = *I;
- assert(!L->getParentLoop() && "Not a top-level loop!");
+ assert(L->isOutermost() && "Not a top-level loop!");
TopLevelLoops.erase(TopLevelLoops.begin() + (I - begin()));
return L;
}
@@ -1002,7 +1018,7 @@ public:
/// This adds the specified loop to the collection of top-level loops.
void addTopLevelLoop(LoopT *New) {
- assert(!New->getParentLoop() && "Loop already in subloop!");
+ assert(New->isOutermost() && "Loop already in subloop!");
TopLevelLoops.push_back(New);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 58a4abafcc85..426b349c6b8a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -68,6 +68,13 @@ void LoopBase<BlockT, LoopT>::getExitBlocks(
ExitBlocks.push_back(Succ);
}
+template <class BlockT, class LoopT>
+bool LoopBase<BlockT, LoopT>::hasNoExitBlocks() const {
+ SmallVector<BlockT *, 8> ExitBlocks;
+ getExitBlocks(ExitBlocks);
+ return ExitBlocks.empty();
+}
+
/// getExitBlock - If getExitBlocks would return exactly one block,
/// return that block. Otherwise return null.
template <class BlockT, class LoopT>
@@ -502,7 +509,7 @@ void PopulateLoopsDFS<BlockT, LoopT>::insertIntoLoop(BlockT *Block) {
if (Subloop && Block == Subloop->getHeader()) {
// We reach this point once per subloop after processing all the blocks in
// the subloop.
- if (Subloop->getParentLoop())
+ if (!Subloop->isOutermost())
Subloop->getParentLoop()->getSubLoopsVector().push_back(Subloop);
else
LI->addTopLevelLoop(Subloop);
@@ -666,12 +673,13 @@ static void compareLoops(const LoopT *L, const LoopT *OtherL,
"Mismatched basic blocks in the loops!");
const SmallPtrSetImpl<const BlockT *> &BlocksSet = L->getBlocksSet();
- const SmallPtrSetImpl<const BlockT *> &OtherBlocksSet = L->getBlocksSet();
+ const SmallPtrSetImpl<const BlockT *> &OtherBlocksSet =
+ OtherL->getBlocksSet();
assert(BlocksSet.size() == OtherBlocksSet.size() &&
- std::all_of(BlocksSet.begin(), BlocksSet.end(),
- [&OtherBlocksSet](const BlockT *BB) {
- return OtherBlocksSet.count(BB);
- }) &&
+ llvm::all_of(BlocksSet,
+ [&OtherBlocksSet](const BlockT *BB) {
+ return OtherBlocksSet.count(BB);
+ }) &&
"Mismatched basic blocks in BlocksSets!");
}
#endif
@@ -681,7 +689,7 @@ void LoopInfoBase<BlockT, LoopT>::verify(
const DomTreeBase<BlockT> &DomTree) const {
DenseSet<const LoopT *> Loops;
for (iterator I = begin(), E = end(); I != E; ++I) {
- assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
+ assert((*I)->isOutermost() && "Top-level loop has a parent!");
(*I)->verifyLoopNest(&Loops);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h
index 792958a312ce..9c4fb4dbc29b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h
@@ -14,6 +14,7 @@
#ifndef LLVM_ANALYSIS_LOOPNESTANALYSIS_H
#define LLVM_ANALYSIS_LOOPNESTANALYSIS_H
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -59,6 +60,12 @@ public:
/// getMaxPerfectDepth(Loop_i) would return 2.
static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE);
+ /// Recursivelly traverse all empty 'single successor' basic blocks of \p From
+ /// (if there are any). Return the last basic block found or \p End if it was
+ /// reached during the search.
+ static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From,
+ const BasicBlock *End);
+
/// Return the outermost loop in the loop nest.
Loop &getOutermostLoop() const { return *Loops.front(); }
@@ -124,10 +131,16 @@ public:
/// Return true if all loops in the loop nest are in simplify form.
bool areAllLoopsSimplifyForm() const {
- return llvm::all_of(Loops,
- [](const Loop *L) { return L->isLoopSimplifyForm(); });
+ return all_of(Loops, [](const Loop *L) { return L->isLoopSimplifyForm(); });
+ }
+
+ /// Return true if all loops in the loop nest are in rotated form.
+ bool areAllLoopsRotatedForm() const {
+ return all_of(Loops, [](const Loop *L) { return L->isRotatedForm(); });
}
+ StringRef getName() const { return Loops.front()->getName(); }
+
protected:
const unsigned MaxPerfectDepth; // maximum perfect nesting depth level.
LoopVectorTy Loops; // the loops in the nest (in breadth first order).
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index cbe3b1f1f4e6..54edbb823263 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -31,8 +31,6 @@ public:
void onPassEntry() override;
- std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
-
int64_t getIRSize(const Function &F) const { return F.getInstructionCount(); }
void onSuccessfulInlining(const MLInlineAdvice &Advice,
bool CalleeWasDeleted);
@@ -42,13 +40,16 @@ public:
const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); }
protected:
- virtual std::unique_ptr<MLInlineAdvice>
- getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE);
+ std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
+
+ std::unique_ptr<InlineAdvice> getMandatoryAdvice(CallBase &CB,
+ bool Advice) override;
+
+ virtual std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB);
virtual std::unique_ptr<MLInlineAdvice>
getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE);
- Module &M;
std::unique_ptr<MLModelRunner> ModelRunner;
private:
@@ -104,4 +105,4 @@ private:
} // namespace llvm
-#endif // LLVM_ANALYSIS_MLINLINEADVISOR_H \ No newline at end of file
+#endif // LLVM_ANALYSIS_MLINLINEADVISOR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MemDerefPrinter.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MemDerefPrinter.h
new file mode 100644
index 000000000000..bafdc543eeaf
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MemDerefPrinter.h
@@ -0,0 +1,24 @@
+//===- MemDerefPrinter.h - Printer for isDereferenceablePointer -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MEMDEREFPRINTER_H
+#define LLVM_ANALYSIS_MEMDEREFPRINTER_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class MemDerefPrinterPass : public PassInfoMixin<MemDerefPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ MemDerefPrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_MEMDEREFPRINTER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 0777dc7d7862..efde00f82d57 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -302,7 +302,7 @@ private:
/// The maximum size of the dereferences of the pointer.
///
/// May be UnknownSize if the sizes are unknown.
- LocationSize Size = LocationSize::unknown();
+ LocationSize Size = LocationSize::afterPointer();
/// The AA tags associated with dereferences of the pointer.
///
/// The members may be null if there are no tags or conflicting tags.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryLocation.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryLocation.h
index d01ac7da85cd..3b188d763ef2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -64,10 +64,11 @@ class VAArgInst;
// None.
class LocationSize {
enum : uint64_t {
- Unknown = ~uint64_t(0),
+ BeforeOrAfterPointer = ~uint64_t(0),
+ AfterPointer = BeforeOrAfterPointer - 1,
+ MapEmpty = BeforeOrAfterPointer - 2,
+ MapTombstone = BeforeOrAfterPointer - 3,
ImpreciseBit = uint64_t(1) << 63,
- MapEmpty = Unknown - 1,
- MapTombstone = Unknown - 2,
// The maximum value we can represent without falling back to 'unknown'.
MaxValue = (MapTombstone - 1) & ~ImpreciseBit,
@@ -81,7 +82,11 @@ class LocationSize {
constexpr LocationSize(uint64_t Raw, DirectConstruction): Value(Raw) {}
- static_assert(Unknown & ImpreciseBit, "Unknown is imprecise by definition.");
+ static_assert(AfterPointer & ImpreciseBit,
+ "AfterPointer is imprecise by definition.");
+ static_assert(BeforeOrAfterPointer & ImpreciseBit,
+ "BeforeOrAfterPointer is imprecise by definition.");
+
public:
// FIXME: Migrate all users to construct via either `precise` or `upperBound`,
// to make it more obvious at the callsite the kind of size that they're
@@ -90,12 +95,12 @@ public:
// Since the overwhelming majority of users of this provide precise values,
// this assumes the provided value is precise.
constexpr LocationSize(uint64_t Raw)
- : Value(Raw > MaxValue ? Unknown : Raw) {}
+ : Value(Raw > MaxValue ? AfterPointer : Raw) {}
static LocationSize precise(uint64_t Value) { return LocationSize(Value); }
static LocationSize precise(TypeSize Value) {
if (Value.isScalable())
- return unknown();
+ return afterPointer();
return precise(Value.getFixedSize());
}
@@ -104,17 +109,25 @@ public:
if (LLVM_UNLIKELY(Value == 0))
return precise(0);
if (LLVM_UNLIKELY(Value > MaxValue))
- return unknown();
+ return afterPointer();
return LocationSize(Value | ImpreciseBit, Direct);
}
static LocationSize upperBound(TypeSize Value) {
if (Value.isScalable())
- return unknown();
+ return afterPointer();
return upperBound(Value.getFixedSize());
}
- constexpr static LocationSize unknown() {
- return LocationSize(Unknown, Direct);
+ /// Any location after the base pointer (but still within the underlying
+ /// object).
+ constexpr static LocationSize afterPointer() {
+ return LocationSize(AfterPointer, Direct);
+ }
+
+ /// Any location before or after the base pointer (but still within the
+ /// underlying object).
+ constexpr static LocationSize beforeOrAfterPointer() {
+ return LocationSize(BeforeOrAfterPointer, Direct);
}
// Sentinel values, generally used for maps.
@@ -131,20 +144,24 @@ public:
if (Other == *this)
return *this;
- if (!hasValue() || !Other.hasValue())
- return unknown();
+ if (Value == BeforeOrAfterPointer || Other.Value == BeforeOrAfterPointer)
+ return beforeOrAfterPointer();
+ if (Value == AfterPointer || Other.Value == AfterPointer)
+ return afterPointer();
return upperBound(std::max(getValue(), Other.getValue()));
}
- bool hasValue() const { return Value != Unknown; }
+ bool hasValue() const {
+ return Value != AfterPointer && Value != BeforeOrAfterPointer;
+ }
uint64_t getValue() const {
assert(hasValue() && "Getting value from an unknown LocationSize!");
return Value & ~ImpreciseBit;
}
// Returns whether or not this value is precise. Note that if a value is
- // precise, it's guaranteed to not be `unknown()`.
+ // precise, it's guaranteed to not be unknown.
bool isPrecise() const {
return (Value & ImpreciseBit) == 0;
}
@@ -152,6 +169,9 @@ public:
// Convenience method to check if this LocationSize's value is 0.
bool isZero() const { return hasValue() && getValue() == 0; }
+ /// Whether accesses before the base pointer are possible.
+ bool mayBeBeforePointer() const { return Value == BeforeOrAfterPointer; }
+
bool operator==(const LocationSize &Other) const {
return Value == Other.Value;
}
@@ -242,14 +262,30 @@ public:
return getForArgument(Call, ArgIdx, &TLI);
}
+ /// Return a location that may access any location after Ptr, while remaining
+ /// within the underlying object.
+ static MemoryLocation getAfter(const Value *Ptr,
+ const AAMDNodes &AATags = AAMDNodes()) {
+ return MemoryLocation(Ptr, LocationSize::afterPointer(), AATags);
+ }
+
+ /// Return a location that may access any location before or after Ptr, while
+ /// remaining within the underlying object.
+ static MemoryLocation
+ getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags = AAMDNodes()) {
+ return MemoryLocation(Ptr, LocationSize::beforeOrAfterPointer(), AATags);
+ }
+
// Return the exact size if the exact size is known at compiletime,
// otherwise return MemoryLocation::UnknownSize.
static uint64_t getSizeOrUnknown(const TypeSize &T) {
return T.isScalable() ? UnknownSize : T.getFixedSize();
}
- explicit MemoryLocation(const Value *Ptr = nullptr,
- LocationSize Size = LocationSize::unknown(),
+ MemoryLocation()
+ : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()), AATags() {}
+
+ explicit MemoryLocation(const Value *Ptr, LocationSize Size,
const AAMDNodes &AATags = AAMDNodes())
: Ptr(Ptr), Size(Size), AATags(AATags) {}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h
index 5ce2b3fd047f..63c031b1921f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h
@@ -88,6 +88,7 @@
#include "llvm/IR/DerivedUser.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -108,6 +109,7 @@ namespace llvm {
/// Enables memory ssa as a dependency for loop passes.
extern cl::opt<bool> EnableMSSALoopDependency;
+class AllocaInst;
class Function;
class Instruction;
class MemoryAccess;
@@ -270,7 +272,7 @@ public:
// Retrieve AliasResult type of the optimized access. Ideally this would be
// returned by the caching walker and may go away in the future.
Optional<AliasResult> getOptimizedAccessType() const {
- return OptimizedAccessAlias;
+ return isOptimized() ? OptimizedAccessAlias : None;
}
/// Reset the ID of what this MemoryUse was optimized to, causing it to
@@ -840,7 +842,6 @@ private:
CachingWalker<AliasAnalysis> *getWalkerImpl();
void buildMemorySSA(BatchAAResults &BAA);
- void optimizeUses();
void prepareForMoveTo(MemoryAccess *, BasicBlock *);
void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const;
@@ -848,15 +849,11 @@ private:
using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>;
using DefsMap = DenseMap<const BasicBlock *, std::unique_ptr<DefsList>>;
- void
- determineInsertionPoint(const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks);
void markUnreachableAsLiveOnEntry(BasicBlock *BB);
- bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
MemoryPhi *createMemoryPhi(BasicBlock *BB);
template <typename AliasAnalysisType>
MemoryUseOrDef *createNewAccess(Instruction *, AliasAnalysisType *,
const MemoryUseOrDef *Template = nullptr);
- MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &);
MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool);
void renameSuccessorPhis(BasicBlock *, MemoryAccess *, bool);
@@ -1181,9 +1178,11 @@ class upward_defs_iterator
using BaseT = upward_defs_iterator::iterator_facade_base;
public:
- upward_defs_iterator(const MemoryAccessPair &Info, DominatorTree *DT)
+ upward_defs_iterator(const MemoryAccessPair &Info, DominatorTree *DT,
+ bool *PerformedPhiTranslation = nullptr)
: DefIterator(Info.first), Location(Info.second),
- OriginalAccess(Info.first), DT(DT) {
+ OriginalAccess(Info.first), DT(DT),
+ PerformedPhiTranslation(PerformedPhiTranslation) {
CurrentPair.first = nullptr;
WalkingPhi = Info.first && isa<MemoryPhi>(Info.first);
@@ -1215,38 +1214,60 @@ public:
BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); }
private:
+ /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
+ /// loop. In particular, this guarantees that it only references a single
+ /// MemoryLocation during execution of the containing function.
+ bool IsGuaranteedLoopInvariant(Value *Ptr) const;
+
void fillInCurrentPair() {
CurrentPair.first = *DefIterator;
+ CurrentPair.second = Location;
if (WalkingPhi && Location.Ptr) {
+ // Mark size as unknown, if the location is not guaranteed to be
+ // loop-invariant for any possible loop in the function. Setting the size
+ // to unknown guarantees that any memory accesses that access locations
+ // after the pointer are considered as clobbers, which is important to
+ // catch loop carried dependences.
+ if (Location.Ptr &&
+ !IsGuaranteedLoopInvariant(const_cast<Value *>(Location.Ptr)))
+ CurrentPair.second =
+ Location.getWithNewSize(LocationSize::beforeOrAfterPointer());
PHITransAddr Translator(
const_cast<Value *>(Location.Ptr),
OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr);
+
if (!Translator.PHITranslateValue(OriginalAccess->getBlock(),
DefIterator.getPhiArgBlock(), DT,
- false)) {
- if (Translator.getAddr() != Location.Ptr) {
- CurrentPair.second = Location.getWithNewPtr(Translator.getAddr());
- return;
+ true)) {
+ Value *TransAddr = Translator.getAddr();
+ if (TransAddr != Location.Ptr) {
+ CurrentPair.second = CurrentPair.second.getWithNewPtr(TransAddr);
+
+ if (TransAddr &&
+ !IsGuaranteedLoopInvariant(const_cast<Value *>(TransAddr)))
+ CurrentPair.second = CurrentPair.second.getWithNewSize(
+ LocationSize::beforeOrAfterPointer());
+
+ if (PerformedPhiTranslation)
+ *PerformedPhiTranslation = true;
}
- } else {
- CurrentPair.second = Location.getWithNewSize(LocationSize::unknown());
- return;
}
}
- CurrentPair.second = Location;
}
MemoryAccessPair CurrentPair;
memoryaccess_def_iterator DefIterator;
MemoryLocation Location;
MemoryAccess *OriginalAccess = nullptr;
- bool WalkingPhi = false;
DominatorTree *DT = nullptr;
+ bool WalkingPhi = false;
+ bool *PerformedPhiTranslation = nullptr;
};
-inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair,
- DominatorTree &DT) {
- return upward_defs_iterator(Pair, &DT);
+inline upward_defs_iterator
+upward_defs_begin(const MemoryAccessPair &Pair, DominatorTree &DT,
+ bool *PerformedPhiTranslation = nullptr) {
+ return upward_defs_iterator(Pair, &DT, PerformedPhiTranslation);
}
inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); }
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index 20588ef083c5..b0bf2e5ead62 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -52,8 +52,6 @@ class LoopBlocksRPO;
using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>;
using PhiToDefMap = SmallDenseMap<MemoryPhi *, MemoryAccess *>;
using CFGUpdate = cfg::Update<BasicBlock *>;
-using GraphDiffInvBBPair =
- std::pair<const GraphDiff<BasicBlock *> *, Inverse<BasicBlock *>>;
class MemorySSAUpdater {
private:
@@ -121,8 +119,11 @@ public:
ArrayRef<BasicBlock *> ExitBlocks,
ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, DominatorTree &DT);
- /// Apply CFG updates, analogous with the DT edge updates.
- void applyUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT);
+ /// Apply CFG updates, analogous with the DT edge updates. By default, the
+ /// DT is assumed to be already up to date. If UpdateDTFirst is true, first
+ /// update the DT with the same updates.
+ void applyUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT,
+ bool UpdateDTFirst = false);
/// Apply CFG insert updates, analogous with the DT edge updates.
void applyInsertUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT);
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h
new file mode 100644
index 000000000000..99aa315319b8
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h
@@ -0,0 +1,29 @@
+//===- ModuleDebugInfoPrinter.h - -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MODULEDEBUGINFOPRINTER_H
+#define LLVM_ANALYSIS_MODULEDEBUGINFOPRINTER_H
+
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class ModuleDebugInfoPrinterPass
+ : public PassInfoMixin<ModuleDebugInfoPrinterPass> {
+ DebugInfoFinder Finder;
+ raw_ostream &OS;
+
+public:
+ explicit ModuleDebugInfoPrinterPass(raw_ostream &OS);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_MODULEDEBUGINFOPRINTER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h
index a3b7bee97808..df489aaa534d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h
@@ -27,6 +27,8 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -541,6 +543,23 @@ private:
MustBeExecutedIterator EndIterator;
};
+class MustExecutePrinterPass : public PassInfoMixin<MustExecutePrinterPass> {
+ raw_ostream &OS;
+
+public:
+ MustExecutePrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+class MustBeExecutedContextPrinterPass
+ : public PassInfoMixin<MustBeExecutedContextPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ MustBeExecutedContextPrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index cad1c52f7f87..16c5f6701da0 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -23,7 +23,6 @@
#define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
#include "llvm/ADT/Optional.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
@@ -31,6 +30,9 @@
#include "llvm/IR/ValueHandle.h"
namespace llvm {
+
+class AAResults;
+
namespace objcarc {
/// A handy option to enable/disable all ARC Optimizations.
@@ -64,10 +66,9 @@ inline bool ModuleHasARC(const Module &M) {
/// This is a wrapper around getUnderlyingObject which also knows how to
/// look through objc_retain and objc_autorelease calls, which we know to return
/// their argument verbatim.
-inline const Value *GetUnderlyingObjCPtr(const Value *V,
- const DataLayout &DL) {
+inline const Value *GetUnderlyingObjCPtr(const Value *V) {
for (;;) {
- V = GetUnderlyingObject(V, DL);
+ V = getUnderlyingObject(V);
if (!IsForwarding(GetBasicARCInstKind(V)))
break;
V = cast<CallInst>(V)->getArgOperand(0);
@@ -78,12 +79,12 @@ inline const Value *GetUnderlyingObjCPtr(const Value *V,
/// A wrapper for GetUnderlyingObjCPtr used for results memoization.
inline const Value *
-GetUnderlyingObjCPtrCached(const Value *V, const DataLayout &DL,
+GetUnderlyingObjCPtrCached(const Value *V,
DenseMap<const Value *, WeakTrackingVH> &Cache) {
if (auto InCache = Cache.lookup(V))
return InCache;
- const Value *Computed = GetUnderlyingObjCPtr(V, DL);
+ const Value *Computed = GetUnderlyingObjCPtr(V);
Cache[V] = const_cast<Value *>(Computed);
return Computed;
}
@@ -146,7 +147,7 @@ inline bool IsPotentialRetainableObjPtr(const Value *Op) {
return false;
// Special arguments can not be a valid retainable object pointer.
if (const Argument *Arg = dyn_cast<Argument>(Op))
- if (Arg->hasPassPointeeByValueAttr() || Arg->hasNestAttr() ||
+ if (Arg->hasPassPointeeByValueCopyAttr() || Arg->hasNestAttr() ||
Arg->hasStructRetAttr())
return false;
// Only consider values with pointer types.
@@ -162,24 +163,7 @@ inline bool IsPotentialRetainableObjPtr(const Value *Op) {
return true;
}
-inline bool IsPotentialRetainableObjPtr(const Value *Op,
- AliasAnalysis &AA) {
- // First make the rudimentary check.
- if (!IsPotentialRetainableObjPtr(Op))
- return false;
-
- // Objects in constant memory are not reference-counted.
- if (AA.pointsToConstantMemory(Op))
- return false;
-
- // Pointers in constant memory are not pointing to reference-counted objects.
- if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
- if (AA.pointsToConstantMemory(LI->getPointerOperand()))
- return false;
-
- // Otherwise assume the worst.
- return true;
-}
+bool IsPotentialRetainableObjPtr(const Value *Op, AAResults &AA);
/// Helper for GetARCInstKind. Determines what kind of construct CS
/// is.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h b/contrib/llvm-project/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h
index ab97d5b8504e..9815dd05cd1c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h
@@ -88,8 +88,14 @@ public:
/// provide more context so that non-trivial false positives can be quickly
/// detected by the user.
bool allowExtraAnalysis(StringRef PassName) const {
- return (F->getContext().getLLVMRemarkStreamer() ||
- F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(PassName));
+ return OptimizationRemarkEmitter::allowExtraAnalysis(*F, PassName);
+ }
+ static bool allowExtraAnalysis(const Function &F, StringRef PassName) {
+ return allowExtraAnalysis(F.getContext(), PassName);
+ }
+ static bool allowExtraAnalysis(LLVMContext &Ctx, StringRef PassName) {
+ return Ctx.getLLVMRemarkStreamer() ||
+ Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(PassName);
}
private:
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h b/contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h
index ea879d727282..c0e91c8b0bdf 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h
@@ -21,7 +21,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
@@ -40,7 +40,7 @@ class Function;
/// it is queried.
class PhiValues {
public:
- using ValueSet = SmallPtrSet<Value *, 4>;
+ using ValueSet = SmallSetVector<Value *, 4>;
/// Construct an empty PhiValues.
PhiValues(const Function &F) : F(F) {}
@@ -70,8 +70,7 @@ public:
FunctionAnalysisManager::Invalidator &);
private:
- using PhiSet = SmallPtrSet<const PHINode *, 4>;
- using ConstValueSet = SmallPtrSet<const Value *, 4>;
+ using ConstValueSet = SmallSetVector<const Value *, 4>;
/// The next depth number to be used by processPhi.
unsigned int NextDepthNumber = 1;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index a1fea9fefc9a..a4e6ffc3dd58 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -25,7 +25,6 @@ class BasicBlock;
class BlockFrequencyInfo;
class CallBase;
class Function;
-class ProfileSummary;
/// Analysis providing profile information.
///
@@ -39,7 +38,7 @@ class ProfileSummary;
// units. This would require making this depend on BFI.
class ProfileSummaryInfo {
private:
- Module &M;
+ const Module &M;
std::unique_ptr<ProfileSummary> Summary;
void computeThresholds();
// Count thresholds to answer isHotCount and isColdCount queries.
@@ -59,7 +58,8 @@ private:
mutable DenseMap<int, uint64_t> ThresholdCache;
public:
- ProfileSummaryInfo(Module &M) : M(M) { refresh(); }
+ ProfileSummaryInfo(const Module &M) : M(M) { refresh(); }
+
ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;
/// If no summary is present, attempt to refresh.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfo.h
index b0336c559774..f93081d6f51d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfo.h
@@ -59,7 +59,6 @@
namespace llvm {
class DominanceFrontier;
-class DominatorTree;
class Loop;
class LoopInfo;
class PostDominatorTree;
@@ -877,8 +876,6 @@ public:
void verifyAnalysis() const;
};
-class Region;
-
class RegionNode : public RegionNodeBase<RegionTraits<Function>> {
public:
inline RegionNode(Region *Parent, BasicBlock *Entry, bool isSubRegion = false)
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfoImpl.h
index 8d9ec646f519..b694effb2229 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -585,10 +585,8 @@ bool RegionInfoBase<Tr>::isRegion(BlockT *entry, BlockT *exit) const {
// Exit is the header of a loop that contains the entry. In this case,
// the dominance frontier must only contain the exit.
if (!DT->dominates(entry, exit)) {
- for (typename DST::iterator SI = entrySuccs->begin(),
- SE = entrySuccs->end();
- SI != SE; ++SI) {
- if (*SI != exit && *SI != entry)
+ for (BlockT *successor : *entrySuccs) {
+ if (successor != exit && successor != entry)
return false;
}
@@ -817,8 +815,7 @@ void RegionInfoBase<Tr>::verifyAnalysis() const {
// Region pass manager support.
template <class Tr>
typename Tr::RegionT *RegionInfoBase<Tr>::getRegionFor(BlockT *BB) const {
- typename BBtoRegionMap::const_iterator I = BBtoRegion.find(BB);
- return I != BBtoRegion.end() ? I->second : nullptr;
+ return BBtoRegion.lookup(BB);
}
template <class Tr>
@@ -889,8 +886,7 @@ typename Tr::RegionT *RegionInfoBase<Tr>::getCommonRegion(RegionT *A,
template <class Tr>
typename Tr::RegionT *
RegionInfoBase<Tr>::getCommonRegion(SmallVectorImpl<RegionT *> &Regions) const {
- RegionT *ret = Regions.back();
- Regions.pop_back();
+ RegionT *ret = Regions.pop_back_val();
for (RegionT *R : Regions)
ret = getCommonRegion(ret, R);
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/RegionPass.h b/contrib/llvm-project/llvm/include/llvm/Analysis/RegionPass.h
index 995c5dca3de3..5c7fa5f56693 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/RegionPass.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/RegionPass.h
@@ -85,8 +85,6 @@ protected:
/// The pass manager to schedule RegionPasses.
class RGPassManager : public FunctionPass, public PMDataManager {
std::deque<Region*> RQ;
- bool skipThisRegion;
- bool redoThisRegion;
RegionInfo *RI;
Region *CurrentRegion;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
new file mode 100644
index 000000000000..3018bcc241d8
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
@@ -0,0 +1,41 @@
+//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_ANALYSIS_REPLAYINLINEADVISOR_H
+#define LLVM_ANALYSIS_REPLAYINLINEADVISOR_H
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+class BasicBlock;
+class CallBase;
+class Function;
+class Module;
+class OptimizationRemarkEmitter;
+
+/// Replay inline advisor that uses optimization remarks from inlining of
+/// previous build to guide current inlining. This is useful for inliner tuning.
+class ReplayInlineAdvisor : public InlineAdvisor {
+public:
+ ReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
+ LLVMContext &Context,
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+ StringRef RemarksFile, bool EmitRemarks);
+ std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
+ bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
+
+private:
+ StringSet<> InlineSitesFromRemarks;
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor;
+ bool HasReplayRemarks = false;
+ bool EmitRemarks = false;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_REPLAYINLINEADVISOR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h
index 81c5fc932588..b3f199de2cfa 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -70,6 +70,7 @@ class StructType;
class TargetLibraryInfo;
class Type;
class Value;
+enum SCEVTypes : unsigned short;
/// This class represents an analyzed expression in the program. These are
/// opaque objects that the client is not allowed to do much with directly.
@@ -82,7 +83,7 @@ class SCEV : public FoldingSetNode {
FoldingSetNodeIDRef FastID;
// The SCEV baseclass this node corresponds to
- const unsigned short SCEVType;
+ const SCEVTypes SCEVType;
protected:
// Estimated complexity of this node's expression tree size.
@@ -119,13 +120,13 @@ public:
NoWrapMask = (1 << 3) - 1
};
- explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy,
+ explicit SCEV(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
unsigned short ExpressionSize)
: FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {}
SCEV(const SCEV &) = delete;
SCEV &operator=(const SCEV &) = delete;
- unsigned getSCEVType() const { return SCEVType; }
+ SCEVTypes getSCEVType() const { return SCEVType; }
/// Return the LLVM type of this SCEV expression.
Type *getType() const;
@@ -511,6 +512,7 @@ public:
const SCEV *getConstant(ConstantInt *V);
const SCEV *getConstant(const APInt &Val);
const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
+ const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
@@ -572,7 +574,9 @@ public:
/// \p IndexExprs The expressions for the indices.
const SCEV *getGEPExpr(GEPOperator *GEP,
const SmallVectorImpl<const SCEV *> &IndexExprs);
- const SCEV *getMinMaxExpr(unsigned Kind,
+ const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW);
+ const SCEV *getSignumExpr(const SCEV *Op);
+ const SCEV *getMinMaxExpr(SCEVTypes Kind,
SmallVectorImpl<const SCEV *> &Operands);
const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
@@ -591,9 +595,22 @@ public:
/// Return a SCEV for the constant 1 of a specific type.
const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); }
- /// Return an expression for sizeof AllocTy that is type IntTy
+ /// Return a SCEV for the constant -1 of a specific type.
+ const SCEV *getMinusOne(Type *Ty) {
+ return getConstant(Ty, -1, /*isSigned=*/true);
+ }
+
+ /// Return an expression for sizeof ScalableTy that is type IntTy, where
+ /// ScalableTy is a scalable vector type.
+ const SCEV *getSizeOfScalableVectorExpr(Type *IntTy,
+ ScalableVectorType *ScalableTy);
+
+ /// Return an expression for the alloc size of AllocTy that is type IntTy
const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy);
+ /// Return an expression for the store size of StoreTy that is type IntTy
+ const SCEV *getStoreSizeOfExpr(Type *IntTy, Type *StoreTy);
+
/// Return an expression for offsetof on the given field with type IntTy
const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo);
@@ -677,6 +694,12 @@ public:
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
+ /// Test whether entry to the basic block is protected by a conditional
+ /// between LHS and RHS.
+ bool isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
+ ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS);
+
/// Test whether the backedge of the loop is protected by a conditional
/// between LHS and RHS. This is used to eliminate casts.
bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
@@ -696,7 +719,8 @@ public:
/// before taking the branch. For loops with multiple exits, it may not be
/// the number times that the loop header executes if the loop exits
/// prematurely via another branch.
- unsigned getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock);
+ unsigned getSmallConstantTripCount(const Loop *L,
+ const BasicBlock *ExitingBlock);
/// Returns the upper bound of the loop trip count as a normal unsigned
/// value.
@@ -718,8 +742,7 @@ public:
/// for getSmallConstantTripCount, this assumes that control exits the loop
/// via ExitingBlock.
unsigned getSmallConstantTripMultiple(const Loop *L,
- BasicBlock *ExitingBlock);
-
+ const BasicBlock *ExitingBlock);
/// The terms "backedge taken count" and "exit count" are used
/// interchangeably to refer to the number of times the backedge of a loop
@@ -730,6 +753,8 @@ public:
Exact,
/// A constant which provides an upper bound on the exact trip count.
ConstantMaximum,
+ /// An expression which provides an upper bound on the exact trip count.
+ SymbolicMaximum,
};
/// Return the number of times the backedge executes before the given exit
@@ -737,8 +762,8 @@ public:
/// For a single exit loop, this value is equivelent to the result of
/// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit)
/// before the backedge is executed (ExitCount + 1) times. Note that there
- /// is no guarantee about *which* exit is taken on the exiting iteration.
- const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock,
+ /// is no guarantee about *which* exit is taken on the exiting iteration.
+ const SCEV *getExitCount(const Loop *L, const BasicBlock *ExitingBlock,
ExitCountKind Kind = Exact);
/// If the specified loop has a predictable backedge-taken count, return it,
@@ -766,7 +791,15 @@ public:
/// SCEVCouldNotCompute object.
const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L) {
return getBackedgeTakenCount(L, ConstantMaximum);
- }
+ }
+
+ /// When successful, this returns a SCEV that is greater than or equal
+ /// to (i.e. a "conservative over-approximation") of the value returend by
+ /// getBackedgeTakenCount. If such a value cannot be computed, it returns the
+ /// SCEVCouldNotCompute object.
+ const SCEV *getSymbolicMaxBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenCount(L, SymbolicMaximum);
+ }
/// Return true if the backedge taken count is either the value returned by
/// getConstantMaxBackedgeTakenCount or zero.
@@ -905,32 +938,61 @@ public:
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS);
+ /// Test if the given expression is known to satisfy the condition described
+ /// by Pred, LHS, and RHS in the given Context.
+ bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS, const Instruction *Context);
+
/// Test if the condition described by Pred, LHS, RHS is known to be true on
/// every iteration of the loop of the recurrency LHS.
bool isKnownOnEveryIteration(ICmpInst::Predicate Pred,
const SCEVAddRecExpr *LHS, const SCEV *RHS);
- /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
- /// is monotonically increasing or decreasing. In the former case set
- /// `Increasing` to true and in the latter case set `Increasing` to false.
- ///
/// A predicate is said to be monotonically increasing if may go from being
/// false to being true as the loop iterates, but never the other way
/// around. A predicate is said to be monotonically decreasing if may go
/// from being true to being false as the loop iterates, but never the other
/// way around.
- bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred,
- bool &Increasing);
-
- /// Return true if the result of the predicate LHS `Pred` RHS is loop
- /// invariant with respect to L. Set InvariantPred, InvariantLHS and
- /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the
- /// loop invariant form of LHS `Pred` RHS.
- bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
- const SCEV *RHS, const Loop *L,
- ICmpInst::Predicate &InvariantPred,
- const SCEV *&InvariantLHS,
- const SCEV *&InvariantRHS);
+ enum MonotonicPredicateType {
+ MonotonicallyIncreasing,
+ MonotonicallyDecreasing
+ };
+
+ /// If, for all loop invariant X, the predicate "LHS `Pred` X" is
+ /// monotonically increasing or decreasing, returns
+ /// Some(MonotonicallyIncreasing) and Some(MonotonicallyDecreasing)
+ /// respectively. If we could not prove either of these facts, returns None.
+ Optional<MonotonicPredicateType>
+ getMonotonicPredicateType(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred);
+
+ struct LoopInvariantPredicate {
+ ICmpInst::Predicate Pred;
+ const SCEV *LHS;
+ const SCEV *RHS;
+
+ LoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS)
+ : Pred(Pred), LHS(LHS), RHS(RHS) {}
+ };
+ /// If the result of the predicate LHS `Pred` RHS is loop invariant with
+ /// respect to L, return a LoopInvariantPredicate with LHS and RHS being
+ /// invariants, available at L's entry. Otherwise, return None.
+ Optional<LoopInvariantPredicate>
+ getLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS, const Loop *L);
+
+ /// If the result of the predicate LHS `Pred` RHS is loop invariant with
+ /// respect to L at given Context during at least first MaxIter iterations,
+ /// return a LoopInvariantPredicate with LHS and RHS being invariants,
+ /// available at L's entry. Otherwise, return None. The predicate should be
+ /// the loop's exit condition.
+ Optional<LoopInvariantPredicate>
+ getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS, const Loop *L,
+ const Instruction *Context,
+ const SCEV *MaxIter);
/// Simplify LHS and RHS in a comparison with predicate Pred. Return true
/// iff any changes were made. If the operands are provably equal or
@@ -1101,6 +1163,20 @@ public:
const SCEV *S, const Loop *L,
SmallPtrSetImpl<const SCEVPredicate *> &Preds);
+ /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a
+ /// constant, and None if it isn't.
+ ///
+ /// This is intended to be a cheaper version of getMinusSCEV. We can be
+ /// frugal here since we just bail out of actually constructing and
+ /// canonicalizing an expression in the cases where the result isn't going
+ /// to be a constant.
+ Optional<APInt> computeConstantDifference(const SCEV *LHS, const SCEV *RHS);
+
+ /// Update no-wrap flags of an AddRec. This may drop the cached info about
+ /// this AddRec (such as range info) in case if new flags may potentially
+ /// sharpen it.
+ void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags);
+
private:
/// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
/// Value is deleted.
@@ -1181,7 +1257,7 @@ private:
ValueExprMapType ValueExprMap;
/// Mark predicate values currently being processed by isImpliedCond.
- SmallPtrSet<Value *, 6> PendingLoopPredicates;
+ SmallPtrSet<const Value *, 6> PendingLoopPredicates;
/// Mark SCEVUnknown Phis currently being processed by getRangeRef.
SmallPtrSet<const PHINode *, 6> PendingPhiRanges;
@@ -1284,39 +1360,41 @@ private:
/// never have more than one computable exit.
SmallVector<ExitNotTakenInfo, 1> ExitNotTaken;
- /// The pointer part of \c MaxAndComplete is an expression indicating the
- /// least maximum backedge-taken count of the loop that is known, or a
- /// SCEVCouldNotCompute. This expression is only valid if the predicates
- /// associated with all loop exits are true.
- ///
- /// The integer part of \c MaxAndComplete is a boolean indicating if \c
- /// ExitNotTaken has an element for every exiting block in the loop.
- PointerIntPair<const SCEV *, 1> MaxAndComplete;
+ /// Expression indicating the least constant maximum backedge-taken count of
+ /// the loop that is known, or a SCEVCouldNotCompute. This expression is
+ /// only valid if the redicates associated with all loop exits are true.
+ const SCEV *ConstantMax;
+
+ /// Indicating if \c ExitNotTaken has an element for every exiting block in
+ /// the loop.
+ bool IsComplete;
+
+ /// Expression indicating the least maximum backedge-taken count of the loop
+ /// that is known, or a SCEVCouldNotCompute. Lazily computed on first query.
+ const SCEV *SymbolicMax = nullptr;
/// True iff the backedge is taken either exactly Max or zero times.
bool MaxOrZero = false;
- /// \name Helper projection functions on \c MaxAndComplete.
- /// @{
- bool isComplete() const { return MaxAndComplete.getInt(); }
- const SCEV *getMax() const { return MaxAndComplete.getPointer(); }
- /// @}
+ bool isComplete() const { return IsComplete; }
+ const SCEV *getConstantMax() const { return ConstantMax; }
public:
- BackedgeTakenInfo() : MaxAndComplete(nullptr, 0) {}
+ BackedgeTakenInfo() : ConstantMax(nullptr), IsComplete(false) {}
BackedgeTakenInfo(BackedgeTakenInfo &&) = default;
BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default;
using EdgeExitInfo = std::pair<BasicBlock *, ExitLimit>;
/// Initialize BackedgeTakenInfo from a list of exact exit counts.
- BackedgeTakenInfo(ArrayRef<EdgeExitInfo> ExitCounts, bool Complete,
- const SCEV *MaxCount, bool MaxOrZero);
+ BackedgeTakenInfo(ArrayRef<EdgeExitInfo> ExitCounts, bool IsComplete,
+ const SCEV *ConstantMax, bool MaxOrZero);
/// Test whether this BackedgeTakenInfo contains any computed information,
/// or whether it's all SCEVCouldNotCompute values.
bool hasAnyInfo() const {
- return !ExitNotTaken.empty() || !isa<SCEVCouldNotCompute>(getMax());
+ return !ExitNotTaken.empty() ||
+ !isa<SCEVCouldNotCompute>(getConstantMax());
}
/// Test whether this BackedgeTakenInfo contains complete information.
@@ -1347,17 +1425,22 @@ private:
/// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
/// this block before this number of iterations, but may exit via another
/// block.
- const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const;
+ const SCEV *getExact(const BasicBlock *ExitingBlock,
+ ScalarEvolution *SE) const;
+
+ /// Get the constant max backedge taken count for the loop.
+ const SCEV *getConstantMax(ScalarEvolution *SE) const;
- /// Get the max backedge taken count for the loop.
- const SCEV *getMax(ScalarEvolution *SE) const;
+ /// Get the constant max backedge taken count for the particular loop exit.
+ const SCEV *getConstantMax(const BasicBlock *ExitingBlock,
+ ScalarEvolution *SE) const;
- /// Get the max backedge taken count for the particular loop exit.
- const SCEV *getMax(BasicBlock *ExitingBlock, ScalarEvolution *SE) const;
+ /// Get the symbolic max backedge taken count for the loop.
+ const SCEV *getSymbolicMax(const Loop *L, ScalarEvolution *SE);
/// Return true if the number of times this backedge is taken is either the
- /// value returned by getMax or zero.
- bool isMaxOrZero(ScalarEvolution *SE) const;
+ /// value returned by getConstantMax or zero.
+ bool isConstantMaxOrZero(ScalarEvolution *SE) const;
/// Return true if any backedge taken count expressions refer to the given
/// subexpression.
@@ -1462,6 +1545,13 @@ private:
ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
const SCEV *MaxBECount, unsigned BitWidth);
+ /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
+ /// Start,+,\p Stop}<nw>.
+ ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec,
+ const SCEV *MaxBECount,
+ unsigned BitWidth,
+ RangeSignHint SignHint);
+
/// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
/// Stop} by "factoring out" a ternary expression from the add recurrence.
/// Helper called by \c getRange.
@@ -1507,7 +1597,7 @@ private:
/// Return the BackedgeTakenInfo for the given loop, lazily computing new
/// values if the loop hasn't been analyzed yet. The returned result is
/// guaranteed not to be predicated.
- const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
+ BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
/// Similar to getBackedgeTakenInfo, but will add predicates as required
/// with the purpose of returning complete information.
@@ -1540,6 +1630,11 @@ private:
bool ExitIfTrue, bool ControlsExit,
bool AllowPredicates = false);
+ /// Return a symbolic upper bound for the backedge taken count of the loop.
+ /// This is more general than getConstantMaxBackedgeTakenCount as it returns
+ /// an arbitrary expression as opposed to only constants.
+ const SCEV *computeSymbolicMaxBackedgeTakenCount(const Loop *L);
+
// Helper functions for computeExitLimitFromCond to avoid exponential time
// complexity.
@@ -1577,6 +1672,10 @@ private:
Value *ExitCond, bool ExitIfTrue,
bool ControlsExit,
bool AllowPredicates);
+ Optional<ScalarEvolution::ExitLimit>
+ computeExitLimitFromCondFromBinOp(ExitLimitCacheTy &Cache, const Loop *L,
+ Value *ExitCond, bool ExitIfTrue,
+ bool ControlsExit, bool AllowPredicates);
/// Compute the number of times the backedge of the specified loop will
/// execute if its exit condition were a conditional branch of the ICmpInst
@@ -1655,27 +1754,44 @@ private:
/// Return a predecessor of BB (which may not be an immediate predecessor)
/// which has exactly one successor from which BB is reachable, or null if
/// no such block is found.
- std::pair<BasicBlock *, BasicBlock *>
- getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
+ std::pair<const BasicBlock *, const BasicBlock *>
+ getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) const;
/// Test whether the condition described by Pred, LHS, and RHS is true
- /// whenever the given FoundCondValue value evaluates to true.
+ /// whenever the given FoundCondValue value evaluates to true in given
+ /// Context. If Context is nullptr, then the found predicate is true
+ /// everywhere. LHS and FoundLHS may have different type width.
bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
- Value *FoundCondValue, bool Inverse);
+ const Value *FoundCondValue, bool Inverse,
+ const Instruction *Context = nullptr);
+
+ /// Test whether the condition described by Pred, LHS, and RHS is true
+ /// whenever the given FoundCondValue value evaluates to true in given
+ /// Context. If Context is nullptr, then the found predicate is true
+ /// everywhere. LHS and FoundLHS must have same type width.
+ bool isImpliedCondBalancedTypes(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS,
+ ICmpInst::Predicate FoundPred,
+ const SCEV *FoundLHS, const SCEV *FoundRHS,
+ const Instruction *Context);
/// Test whether the condition described by Pred, LHS, and RHS is true
/// whenever the condition described by FoundPred, FoundLHS, FoundRHS is
- /// true.
+ /// true in given Context. If Context is nullptr, then the found predicate is
+ /// true everywhere.
bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
ICmpInst::Predicate FoundPred, const SCEV *FoundLHS,
- const SCEV *FoundRHS);
+ const SCEV *FoundRHS,
+ const Instruction *Context = nullptr);
/// Test whether the condition described by Pred, LHS, and RHS is true
/// whenever the condition described by Pred, FoundLHS, and FoundRHS is
- /// true.
+ /// true in given Context. If Context is nullptr, then the found predicate is
+ /// true everywhere.
bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS, const SCEV *FoundLHS,
- const SCEV *FoundRHS);
+ const SCEV *FoundRHS,
+ const Instruction *Context = nullptr);
/// Test whether the condition described by Pred, LHS, and RHS is true
/// whenever the condition described by Pred, FoundLHS, and FoundRHS is
@@ -1708,7 +1824,7 @@ private:
/// Return true if the condition denoted by \p LHS \p Pred \p RHS is implied
/// by a call to @llvm.experimental.guard in \p BB.
- bool isImpliedViaGuard(BasicBlock *BB, ICmpInst::Predicate Pred,
+ bool isImpliedViaGuard(const BasicBlock *BB, ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
/// Test whether the condition described by Pred, LHS, and RHS is true
@@ -1726,6 +1842,18 @@ private:
/// whenever the condition described by Pred, FoundLHS, and FoundRHS is
/// true.
///
+ /// This routine tries to weaken the known condition basing on fact that
+ /// FoundLHS is an AddRec.
+ bool isImpliedCondOperandsViaAddRecStart(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS,
+ const Instruction *Context);
+
+ /// Test whether the condition described by Pred, LHS, and RHS is true
+ /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+ /// true.
+ ///
/// This routine tries to figure out predicate for Phis which are SCEVUnknown
/// if it is true for every possible incoming value from their respective
/// basic blocks.
@@ -1762,15 +1890,6 @@ private:
bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
SCEV::NoWrapFlags &Flags);
- /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a
- /// constant, and None if it isn't.
- ///
- /// This is intended to be a cheaper version of getMinusSCEV. We can be
- /// frugal here since we just bail out of actually constructing and
- /// canonicalizing an expression in the cases where the result isn't going
- /// to be a constant.
- Optional<APInt> computeConstantDifference(const SCEV *LHS, const SCEV *RHS);
-
/// Drop memoized information computed for S.
void forgetMemoizedResults(const SCEV *S);
@@ -1793,8 +1912,17 @@ private:
/// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation.
SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR);
- bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
- ICmpInst::Predicate Pred, bool &Increasing);
+ /// Try to prove NSW on \p AR by proving facts about conditions known on
+ /// entry and backedge.
+ SCEV::NoWrapFlags proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR);
+
+ /// Try to prove NUW on \p AR by proving facts about conditions known on
+ /// entry and backedge.
+ SCEV::NoWrapFlags proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR);
+
+ Optional<MonotonicPredicateType>
+ getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred);
/// Return SCEV no-wrap flags that can be proven based on reasoning about
/// how poison produced from no-wrap flags on this value (e.g. a nuw add)
@@ -1893,6 +2021,9 @@ private:
/// Assign A and B to LHS and RHS, respectively.
bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
+ /// Try to apply information from loop guards for \p L to \p Expr.
+ const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L);
+
/// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
/// `UniqueSCEVs`.
///
@@ -1901,7 +2032,7 @@ private:
/// constructed to look up the SCEV and the third component is the insertion
/// point.
std::tuple<SCEV *, FoldingSetNodeID, void *>
- findExistingSCEVInCache(int SCEVType, ArrayRef<const SCEV *> Ops);
+ findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
FoldingSet<SCEV> UniqueSCEVs;
FoldingSet<SCEVPredicate> UniquePreds;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h
index 480f92c117a0..24f0c51487bd 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h
@@ -33,6 +33,7 @@ public:
// Except in the trivial case described above, we do not know how to divide
// Expr by Denominator for the following functions with empty implementation.
+ void visitPtrToIntExpr(const SCEVPtrToIntExpr *Numerator) {}
void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 0076e02ae1bf..37e675f08afc 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -35,12 +35,12 @@ class ConstantRange;
class Loop;
class Type;
- enum SCEVTypes {
+ enum SCEVTypes : unsigned short {
// These should be ordered in terms of increasing complexity to make the
// folders simpler.
scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr,
scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr,
- scUnknown, scCouldNotCompute
+ scPtrToInt, scUnknown, scCouldNotCompute
};
/// This class represents a constant integer value.
@@ -74,18 +74,58 @@ class Type;
/// This is the base class for unary cast operator classes.
class SCEVCastExpr : public SCEV {
protected:
- const SCEV *Op;
+ std::array<const SCEV *, 1> Operands;
Type *Ty;
- SCEVCastExpr(const FoldingSetNodeIDRef ID,
- unsigned SCEVTy, const SCEV *op, Type *ty);
+ SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, const SCEV *op,
+ Type *ty);
public:
- const SCEV *getOperand() const { return Op; }
+ const SCEV *getOperand() const { return Operands[0]; }
+ const SCEV *getOperand(unsigned i) const {
+ assert(i == 0 && "Operand index out of range!");
+ return Operands[0];
+ }
+ using op_iterator = std::array<const SCEV *, 1>::const_iterator;
+ using op_range = iterator_range<op_iterator>;
+
+ op_range operands() const {
+ return make_range(Operands.begin(), Operands.end());
+ }
+ size_t getNumOperands() const { return 1; }
Type *getType() const { return Ty; }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEV *S) {
+ return S->getSCEVType() == scPtrToInt || S->getSCEVType() == scTruncate ||
+ S->getSCEVType() == scZeroExtend ||
+ S->getSCEVType() == scSignExtend;
+ }
+ };
+
+ /// This class represents a cast from a pointer to a pointer-sized integer
+ /// value.
+ class SCEVPtrToIntExpr : public SCEVCastExpr {
+ friend class ScalarEvolution;
+
+ SCEVPtrToIntExpr(const FoldingSetNodeIDRef ID, const SCEV *Op, Type *ITy);
+
+ public:
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const SCEV *S) {
+ return S->getSCEVType() == scPtrToInt;
+ }
+ };
+
+ /// This is the base class for unary integral cast operator classes.
+ class SCEVIntegralCastExpr : public SCEVCastExpr {
+ protected:
+ SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
+ const SCEV *op, Type *ty);
+
+ public:
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scTruncate ||
S->getSCEVType() == scZeroExtend ||
S->getSCEVType() == scSignExtend;
@@ -94,7 +134,7 @@ class Type;
/// This class represents a truncation of an integer value to a
/// smaller integer value.
- class SCEVTruncateExpr : public SCEVCastExpr {
+ class SCEVTruncateExpr : public SCEVIntegralCastExpr {
friend class ScalarEvolution;
SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
@@ -109,7 +149,7 @@ class Type;
/// This class represents a zero extension of a small integer value
/// to a larger integer value.
- class SCEVZeroExtendExpr : public SCEVCastExpr {
+ class SCEVZeroExtendExpr : public SCEVIntegralCastExpr {
friend class ScalarEvolution;
SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
@@ -124,7 +164,7 @@ class Type;
/// This class represents a sign extension of a small integer value
/// to a larger integer value.
- class SCEVSignExtendExpr : public SCEVCastExpr {
+ class SCEVSignExtendExpr : public SCEVIntegralCastExpr {
friend class ScalarEvolution;
SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
@@ -263,16 +303,28 @@ class Type;
class SCEVUDivExpr : public SCEV {
friend class ScalarEvolution;
- const SCEV *LHS;
- const SCEV *RHS;
+ std::array<const SCEV *, 2> Operands;
SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs)
- : SCEV(ID, scUDivExpr, computeExpressionSize({lhs, rhs})), LHS(lhs),
- RHS(rhs) {}
+ : SCEV(ID, scUDivExpr, computeExpressionSize({lhs, rhs})) {
+ Operands[0] = lhs;
+ Operands[1] = rhs;
+ }
public:
- const SCEV *getLHS() const { return LHS; }
- const SCEV *getRHS() const { return RHS; }
+ const SCEV *getLHS() const { return Operands[0]; }
+ const SCEV *getRHS() const { return Operands[1]; }
+ size_t getNumOperands() const { return 2; }
+ const SCEV *getOperand(unsigned i) const {
+ assert((i == 0 || i == 1) && "Operand index out of range!");
+ return i == 0 ? getLHS() : getRHS();
+ }
+
+ using op_iterator = std::array<const SCEV *, 2>::const_iterator;
+ using op_range = iterator_range<op_iterator>;
+ op_range operands() const {
+ return make_range(Operands.begin(), Operands.end());
+ }
Type *getType() const {
// In most cases the types of LHS and RHS will be the same, but in some
@@ -389,7 +441,7 @@ class Type;
public:
static bool classof(const SCEV *S) {
- return isMinMaxType(static_cast<SCEVTypes>(S->getSCEVType()));
+ return isMinMaxType(S->getSCEVType());
}
static enum SCEVTypes negate(enum SCEVTypes T) {
@@ -518,6 +570,8 @@ class Type;
switch (S->getSCEVType()) {
case scConstant:
return ((SC*)this)->visitConstant((const SCEVConstant*)S);
+ case scPtrToInt:
+ return ((SC *)this)->visitPtrToIntExpr((const SCEVPtrToIntExpr *)S);
case scTruncate:
return ((SC*)this)->visitTruncateExpr((const SCEVTruncateExpr*)S);
case scZeroExtend:
@@ -544,9 +598,8 @@ class Type;
return ((SC*)this)->visitUnknown((const SCEVUnknown*)S);
case scCouldNotCompute:
return ((SC*)this)->visitCouldNotCompute((const SCEVCouldNotCompute*)S);
- default:
- llvm_unreachable("Unknown SCEV type!");
}
+ llvm_unreachable("Unknown SCEV kind!");
}
RetVal visitCouldNotCompute(const SCEVCouldNotCompute *S) {
@@ -583,12 +636,13 @@ class Type;
switch (S->getSCEVType()) {
case scConstant:
case scUnknown:
- break;
+ continue;
+ case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend:
push(cast<SCEVCastExpr>(S)->getOperand());
- break;
+ continue;
case scAddExpr:
case scMulExpr:
case scSMaxExpr:
@@ -598,18 +652,17 @@ class Type;
case scAddRecExpr:
for (const auto *Op : cast<SCEVNAryExpr>(S)->operands())
push(Op);
- break;
+ continue;
case scUDivExpr: {
const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
push(UDiv->getLHS());
push(UDiv->getRHS());
- break;
+ continue;
}
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- default:
- llvm_unreachable("Unknown SCEV kind!");
}
+ llvm_unreachable("Unknown SCEV kind!");
}
}
};
@@ -677,6 +730,13 @@ class Type;
return Constant;
}
+ const SCEV *visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) {
+ const SCEV *Operand = ((SC *)this)->visit(Expr->getOperand());
+ return Operand == Expr->getOperand()
+ ? Expr
+ : SE.getPtrToIntExpr(Operand, Expr->getType());
+ }
+
const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
return Operand == Expr->getOperand()
@@ -787,35 +847,30 @@ class Type;
};
using ValueToValueMap = DenseMap<const Value *, Value *>;
+ using ValueToSCEVMapTy = DenseMap<const Value *, const SCEV *>;
/// The SCEVParameterRewriter takes a scalar evolution expression and updates
- /// the SCEVUnknown components following the Map (Value -> Value).
+ /// the SCEVUnknown components following the Map (Value -> SCEV).
class SCEVParameterRewriter : public SCEVRewriteVisitor<SCEVParameterRewriter> {
public:
static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
- ValueToValueMap &Map,
- bool InterpretConsts = false) {
- SCEVParameterRewriter Rewriter(SE, Map, InterpretConsts);
+ ValueToSCEVMapTy &Map) {
+ SCEVParameterRewriter Rewriter(SE, Map);
return Rewriter.visit(Scev);
}
- SCEVParameterRewriter(ScalarEvolution &SE, ValueToValueMap &M, bool C)
- : SCEVRewriteVisitor(SE), Map(M), InterpretConsts(C) {}
+ SCEVParameterRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M)
+ : SCEVRewriteVisitor(SE), Map(M) {}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- Value *V = Expr->getValue();
- if (Map.count(V)) {
- Value *NV = Map[V];
- if (InterpretConsts && isa<ConstantInt>(NV))
- return SE.getConstant(cast<ConstantInt>(NV));
- return SE.getUnknown(NV);
- }
- return Expr;
+ auto I = Map.find(Expr->getValue());
+ if (I == Map.end())
+ return Expr;
+ return I->second;
}
private:
- ValueToValueMap &Map;
- bool InterpretConsts;
+ ValueToSCEVMapTy &Map;
};
using LoopToScevMapT = DenseMap<const Loop *, const SCEV *>;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/SparsePropagation.h b/contrib/llvm-project/llvm/include/llvm/Analysis/SparsePropagation.h
index fac92e4a25a4..81a2533152de 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/SparsePropagation.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/SparsePropagation.h
@@ -485,8 +485,7 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::Solve() {
// Process the basic block work list.
while (!BBWorkList.empty()) {
- BasicBlock *BB = BBWorkList.back();
- BBWorkList.pop_back();
+ BasicBlock *BB = BBWorkList.pop_back_val();
LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB);
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/StackLifetime.h b/contrib/llvm-project/llvm/include/llvm/Analysis/StackLifetime.h
index 8abc6cc1ce00..df342a9533ee 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/StackLifetime.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/StackLifetime.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/raw_ostream.h"
@@ -121,6 +122,8 @@ private:
DenseMap<const BasicBlock *, SmallVector<std::pair<unsigned, Marker>, 4>>
BBMarkers;
+ bool HasUnknownLifetimeStartOrEnd = false;
+
void dumpAllocas() const;
void dumpBlockLiveness() const;
void dumpLiveRanges() const;
@@ -166,16 +169,9 @@ public:
static inline raw_ostream &operator<<(raw_ostream &OS, const BitVector &V) {
OS << "{";
- int Idx = V.find_first();
- bool First = true;
- while (Idx >= 0) {
- if (!First) {
- OS << ", ";
- }
- First = false;
- OS << Idx;
- Idx = V.find_next(Idx);
- }
+ ListSeparator LS;
+ for (int Idx = V.find_first(); Idx >= 0; Idx = V.find_next(Idx))
+ OS << LS << Idx;
OS << "}";
return OS;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
index 846c2e6f7e91..59c1e3e3bd56 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
@@ -51,7 +51,8 @@ public:
/// StackSafety assumes that missing parameter information means possibility
/// of access to the parameter with any offset, so we can correctly link
/// code without StackSafety information, e.g. non-ThinLTO.
- std::vector<FunctionSummary::ParamAccess> getParamAccesses() const;
+ std::vector<FunctionSummary::ParamAccess>
+ getParamAccesses(ModuleSummaryIndex &Index) const;
};
class StackSafetyGlobalInfo {
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
index 2f07b3135308..9838d629e93e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/LoopInfo.h"
#include <memory>
+#include <unordered_map>
namespace llvm {
@@ -30,6 +31,26 @@ class Loop;
class PostDominatorTree;
using ConstBlockSet = SmallPtrSet<const BasicBlock *, 4>;
+struct ControlDivergenceDesc {
+ // Join points of divergent disjoint paths.
+ ConstBlockSet JoinDivBlocks;
+ // Divergent loop exits
+ ConstBlockSet LoopDivBlocks;
+};
+
+struct ModifiedPO {
+ std::vector<const BasicBlock *> LoopPO;
+ std::unordered_map<const BasicBlock *, unsigned> POIndex;
+ void appendBlock(const BasicBlock &BB) {
+ POIndex[&BB] = LoopPO.size();
+ LoopPO.push_back(&BB);
+ }
+ unsigned getIndexOf(const BasicBlock &BB) const {
+ return POIndex.find(&BB)->second;
+ }
+ unsigned size() const { return LoopPO.size(); }
+ const BasicBlock *getBlockAt(unsigned Idx) const { return LoopPO[Idx]; }
+};
/// \brief Relates points of divergent control to join points in
/// reducible CFGs.
@@ -51,28 +72,19 @@ public:
/// header. Those exit blocks are added to the returned set.
/// If L is the parent loop of \p Term and an exit of L is in the returned
/// set then L is a divergent loop.
- const ConstBlockSet &join_blocks(const Instruction &Term);
-
- /// \brief Computes divergent join points and loop exits (in the surrounding
- /// loop) caused by the divergent loop exits of\p Loop.
- ///
- /// The set of blocks which are reachable by disjoint paths from the
- /// loop exits of \p Loop.
- /// This treats the loop as a single node in \p Loop's parent loop.
- /// The returned set has the same properties as for join_blocks(TermInst&).
- const ConstBlockSet &join_blocks(const Loop &Loop);
+ const ControlDivergenceDesc &getJoinBlocks(const Instruction &Term);
private:
- static ConstBlockSet EmptyBlockSet;
+ static ControlDivergenceDesc EmptyDivergenceDesc;
+
+ ModifiedPO LoopPO;
- ReversePostOrderTraversal<const Function *> FuncRPOT;
const DominatorTree &DT;
const PostDominatorTree &PDT;
const LoopInfo &LI;
- std::map<const Loop *, std::unique_ptr<ConstBlockSet>> CachedLoopExitJoins;
- std::map<const Instruction *, std::unique_ptr<ConstBlockSet>>
- CachedBranchJoins;
+ std::map<const Instruction *, std::unique_ptr<ControlDivergenceDesc>>
+ CachedControlDivDescs;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 3864d4955104..defc95d0062a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -262,6 +262,12 @@ TLI_DEFINE_STRING_INTERNAL("__atanhf_finite")
/// long double __atanhl_finite(long double x);
TLI_DEFINE_ENUM_INTERNAL(atanhl_finite)
TLI_DEFINE_STRING_INTERNAL("__atanhl_finite")
+/// void __atomic_load(size_t size, void *mptr, void *vptr, int smodel);
+TLI_DEFINE_ENUM_INTERNAL(atomic_load)
+TLI_DEFINE_STRING_INTERNAL("__atomic_load")
+/// void __atomic_store(size_t size, void *mptr, void *vptr, int smodel);
+TLI_DEFINE_ENUM_INTERNAL(atomic_store)
+TLI_DEFINE_STRING_INTERNAL("__atomic_store")
/// double __cosh_finite(double x);
TLI_DEFINE_ENUM_INTERNAL(cosh_finite)
TLI_DEFINE_STRING_INTERNAL("__cosh_finite")
@@ -360,6 +366,9 @@ TLI_DEFINE_STRING_INTERNAL("__memcpy_chk")
/// void *__memmove_chk(void *s1, const void *s2, size_t n, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(memmove_chk)
TLI_DEFINE_STRING_INTERNAL("__memmove_chk")
+/// void *__mempcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
+TLI_DEFINE_ENUM_INTERNAL(mempcpy_chk)
+TLI_DEFINE_STRING_INTERNAL("__mempcpy_chk")
/// void *__memset_chk(void *s, char v, size_t n, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(memset_chk)
TLI_DEFINE_STRING_INTERNAL("__memset_chk")
@@ -1411,6 +1420,18 @@ TLI_DEFINE_STRING_INTERNAL("utimes")
/// void *valloc(size_t size);
TLI_DEFINE_ENUM_INTERNAL(valloc)
TLI_DEFINE_STRING_INTERNAL("valloc")
+/// void *vec_calloc(size_t count, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(vec_calloc)
+TLI_DEFINE_STRING_INTERNAL("vec_calloc")
+/// void vec_free(void *ptr);
+TLI_DEFINE_ENUM_INTERNAL(vec_free)
+TLI_DEFINE_STRING_INTERNAL("vec_free")
+/// void *vec_malloc(size_t size);
+TLI_DEFINE_ENUM_INTERNAL(vec_malloc)
+TLI_DEFINE_STRING_INTERNAL("vec_malloc")
+/// void *vec_realloc(void *ptr, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(vec_realloc)
+TLI_DEFINE_STRING_INTERNAL("vec_realloc")
/// int vfprintf(FILE *stream, const char *format, va_list ap);
TLI_DEFINE_ENUM_INTERNAL(vfprintf)
TLI_DEFINE_STRING_INTERNAL("vfprintf")
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 3a7c26e1463b..34a8a1e3407c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -88,6 +88,7 @@ public:
enum VectorLibrary {
NoLibrary, // Don't use any vector library.
Accelerate, // Use Accelerate framework.
+ LIBMVEC_X86,// GLIBC Vector Math library.
MASSV, // IBM MASS vector library.
SVML // Intel short vector math library.
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
index b6698eefdb01..cdfb04424e56 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -21,11 +21,13 @@
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/InstructionCost.h"
#include <functional>
namespace llvm {
@@ -42,6 +44,7 @@ class CallBase;
class ExtractElementInst;
class Function;
class GlobalValue;
+class InstCombiner;
class IntrinsicInst;
class LoadInst;
class LoopAccessInfo;
@@ -56,6 +59,7 @@ class TargetLibraryInfo;
class Type;
class User;
class Value;
+struct KnownBits;
template <typename T> class Optional;
/// Information about a load/store intrinsic defined by the target.
@@ -90,7 +94,7 @@ struct HardwareLoopInfo {
Loop *L = nullptr;
BasicBlock *ExitBlock = nullptr;
BranchInst *ExitBranch = nullptr;
- const SCEV *ExitCount = nullptr;
+ const SCEV *TripCount = nullptr;
IntegerType *CountType = nullptr;
Value *LoopDecrement = nullptr; // Decrement the loop counter by this
// value in every iteration.
@@ -114,7 +118,7 @@ class IntrinsicCostAttributes {
SmallVector<Type *, 4> ParamTys;
SmallVector<const Value *, 4> Arguments;
FastMathFlags FMF;
- unsigned VF = 1;
+ ElementCount VF = ElementCount::getFixed(1);
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
// arguments and the return value will be computed based on types.
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
@@ -125,10 +129,10 @@ public:
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
- unsigned Factor);
+ ElementCount Factor);
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
- unsigned Factor, unsigned ScalarCost);
+ ElementCount Factor, unsigned ScalarCost);
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys, FastMathFlags Flags);
@@ -151,7 +155,7 @@ public:
Intrinsic::ID getID() const { return IID; }
const IntrinsicInst *getInst() const { return II; }
Type *getReturnType() const { return RetTy; }
- unsigned getVectorFactor() const { return VF; }
+ ElementCount getVectorFactor() const { return VF; }
FastMathFlags getFlags() const { return FMF; }
unsigned getScalarizationCost() const { return ScalarizationCost; }
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
@@ -228,19 +232,24 @@ public:
///
/// Note, this method does not cache the cost calculation and it
/// can be expensive in some cases.
- int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
+ InstructionCost getInstructionCost(const Instruction *I,
+ enum TargetCostKind kind) const {
+ InstructionCost Cost;
switch (kind) {
case TCK_RecipThroughput:
- return getInstructionThroughput(I);
-
+ Cost = getInstructionThroughput(I);
+ break;
case TCK_Latency:
- return getInstructionLatency(I);
-
+ Cost = getInstructionLatency(I);
+ break;
case TCK_CodeSize:
case TCK_SizeAndLatency:
- return getUserCost(I, kind);
+ Cost = getUserCost(I, kind);
+ break;
}
- llvm_unreachable("Unknown instruction cost kind");
+ if (Cost == -1)
+ Cost.setInvalid();
+ return Cost;
}
/// Underlying constants for 'cost' values in this interface.
@@ -280,6 +289,9 @@ public:
/// individual classes of instructions would be better.
unsigned getInliningThresholdMultiplier() const;
+ /// \returns A value to be added to the inlining threshold.
+ unsigned adjustInliningThreshold(const CallBase *CB) const;
+
/// \returns Vector bonus in percent.
///
/// Vector bonuses: We want to more aggressively inline vector-dense kernels
@@ -323,8 +335,7 @@ public:
/// This is a helper function which calls the two-argument getUserCost
/// with \p Operands which are the current operands U has.
int getUserCost(const User *U, TargetCostKind CostKind) const {
- SmallVector<const Value *, 4> Operands(U->value_op_begin(),
- U->value_op_end());
+ SmallVector<const Value *, 4> Operands(U->operand_values());
return getUserCost(U, Operands, CostKind);
}
@@ -379,6 +390,8 @@ public:
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
+ unsigned getAssumedAddrSpace(const Value *V) const;
+
/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
/// NewV, which has a different address space. This should happen for every
/// operand index that collectFlatAddressOperands returned for the intrinsic.
@@ -542,6 +555,29 @@ public:
/// target-independent defaults with information from \p L and \p SE.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) const;
+
+ /// Targets can implement their own combinations for target-specific
+ /// intrinsics. This function will be called from the InstCombine pass every
+ /// time a target-specific intrinsic is encountered.
+ ///
+ /// \returns None to not do anything target specific or a value that will be
+ /// returned from the InstCombiner. It is possible to return null and stop
+ /// further processing of the intrinsic by returning nullptr.
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+ /// Can be used to implement target-specific instruction combining.
+ /// \see instCombineIntrinsic
+ Optional<Value *>
+ simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+ APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) const;
+ /// Can be used to implement target-specific instruction combining.
+ /// \see instCombineIntrinsic
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const;
/// @}
/// \name Scalar Target Information
@@ -583,6 +619,11 @@ public:
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) const;
+ /// Return true if LSR major cost is number of registers. Targets which
+ /// implement their own isLSRCostLess and unset number of registers as major
+ /// cost should return false, otherwise return true.
+ bool isNumRegsMajorCostOfLSR() const;
+
/// \returns true if LSR should not optimize a chain that includes \p I.
bool isProfitableLSRChainElement(Instruction *I) const;
@@ -672,6 +713,9 @@ public:
/// Return true if this type is legal.
bool isTypeLegal(Type *Ty) const;
+ /// Returns the estimated number of registers required to represent \p Ty.
+ unsigned getRegUsageForType(Type *Ty) const;
+
/// Return true if switches should be turned into lookup tables for the
/// target.
bool shouldBuildLookupTables() const;
@@ -780,8 +824,9 @@ public:
/// Return the expected cost of materialization for the given integer
/// immediate of the specified type for a given instruction. The cost can be
/// zero if the immediate can be folded into the specified instruction.
- int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty, TargetCostKind CostKind) const;
+ int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind,
+ Instruction *Inst = nullptr) const;
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TargetCostKind CostKind) const;
@@ -845,6 +890,10 @@ public:
static ReductionKind matchVectorSplittingReduction(
const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
+ static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, VectorType *&Ty,
+ bool &IsPairwise);
+
/// Additional information about an operand's possible values.
enum OperandValueKind {
OK_AnyValue, // Operand can have any value.
@@ -881,6 +930,10 @@ public:
/// \return The width of the smallest vector register type.
unsigned getMinVectorRegisterBitWidth() const;
+ /// \return The maximum value of vscale if the target specifies an
+ /// architectural maximum vector length, and None otherwise.
+ Optional<unsigned> getMaxVScale() const;
+
/// \return True if the vectorization factor should be chosen to
/// make the vector of the smallest element type match the size of a
/// vector register. For wider element types, this could result in
@@ -894,6 +947,11 @@ public:
/// applies when shouldMaximizeVectorBandwidth returns true.
unsigned getMinimumVF(unsigned ElemWidth) const;
+ /// \return The maximum vectorization factor for types of given element
+ /// bit width and opcode, or 0 if there is no maximum VF.
+ /// Currently only used by the SLP vectorizer.
+ unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
+
/// \return True if it should be considered for address type promotion.
/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
/// profitable without finding other extensions fed by the same input.
@@ -996,10 +1054,47 @@ public:
int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0,
VectorType *SubTp = nullptr) const;
+ /// Represents a hint about the context in which a cast is used.
+ ///
+ /// For zext/sext, the context of the cast is the operand, which must be a
+ /// load of some kind. For trunc, the context is of the cast is the single
+ /// user of the instruction, which must be a store of some kind.
+ ///
+ /// This enum allows the vectorizer to give getCastInstrCost an idea of the
+ /// type of cast it's dealing with, as not every cast is equal. For instance,
+ /// the zext of a load may be free, but the zext of an interleaving load can
+ //// be (very) expensive!
+ ///
+ /// See \c getCastContextHint to compute a CastContextHint from a cast
+ /// Instruction*. Callers can use it if they don't need to override the
+ /// context and just want it to be calculated from the instruction.
+ ///
+ /// FIXME: This handles the types of load/store that the vectorizer can
+ /// produce, which are the cases where the context instruction is most
+ /// likely to be incorrect. There are other situations where that can happen
+ /// too, which might be handled here but in the long run a more general
+ /// solution of costing multiple instructions at the same times may be better.
+ enum class CastContextHint : uint8_t {
+ None, ///< The cast is not used with a load/store of any kind.
+ Normal, ///< The cast is used with a normal load/store.
+ Masked, ///< The cast is used with a masked load/store.
+ GatherScatter, ///< The cast is used with a gather/scatter.
+ Interleave, ///< The cast is used with an interleaved load/store.
+ Reversed, ///< The cast is used with a reversed load/store.
+ };
+
+ /// Calculates a CastContextHint from \p I.
+ /// This should be used by callers of getCastInstrCost if they wish to
+ /// determine the context from some instruction.
+ /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
+ /// or if it's another type of cast.
+ static CastContextHint getCastContextHint(const Instruction *I);
+
/// \return The expected cost of cast instructions, such as bitcast, trunc,
/// zext, etc. If there is an existing instruction that holds Opcode, it
/// may be passed in the 'I' parameter.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
const Instruction *I = nullptr) const;
@@ -1015,10 +1110,14 @@ public:
/// \returns The expected cost of compare and select instructions. If there
/// is an existing instruction that holds Opcode, it may be passed in the
- /// 'I' parameter.
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
+ /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
+ /// is using a compare with the specified predicate as condition. When vector
+ /// types are passed, \p VecPred must be used for all lanes.
+ int getCmpSelInstrCost(
+ unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
+ CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ const Instruction *I = nullptr) const;
/// \return The expected cost of vector Insert and Extract.
/// Use -1 to indicate that there is no information on the index value.
@@ -1086,6 +1185,16 @@ public:
VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+ /// Calculate the cost of an extended reduction pattern, similar to
+ /// getArithmeticReductionCost of an Add reduction with an extension and
+ /// optional multiply. This is the cost of as:
+ /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
+ /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
+ /// on a VectorType with ResTy elements and Ty lanes.
+ InstructionCost getExtendedAddReductionCost(
+ bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
/// Three cases are handled: 1. scalar instruction 2. vector instruction
/// 3. scalar instruction which is to be vectorized.
@@ -1221,6 +1330,24 @@ public:
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const;
+ /// \returns True if the target prefers reductions in loop.
+ bool preferInLoopReduction(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const;
+
+ /// \returns True if the target prefers reductions select kept in the loop
+ /// when tail folding. i.e.
+ /// loop:
+ /// p = phi (0, s)
+ /// a = add (p, x)
+ /// s = select (mask, a, p)
+ /// vecreduce.add(s)
+ ///
+ /// As opposed to the normal scheme of p = phi (0, a) which allows the select
+ /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
+ /// by the target, this can lead to cleaner code generation.
+ bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const;
+
/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
bool shouldExpandReduction(const IntrinsicInst *II) const;
@@ -1229,6 +1356,9 @@ public:
/// to a stack reload.
unsigned getGISelRematGlobalCost() const;
+ /// \returns True if the target supports scalable vectors.
+ bool supportsScalableVectors() const;
+
/// \name Vector Predication Information
/// @{
/// Whether the target supports the %evl parameter of VP intrinsic efficiently
@@ -1268,6 +1398,7 @@ public:
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
+ virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
virtual int getInlinerVectorBonusPercent() = 0;
virtual int getMemcpyCost(const Instruction *I) = 0;
virtual unsigned
@@ -1284,6 +1415,7 @@ public:
virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const = 0;
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
+ virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Value *OldV,
Value *NewV) const = 0;
@@ -1301,6 +1433,17 @@ public:
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
virtual bool emitGetActiveLaneMask() = 0;
+ virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) = 0;
+ virtual Optional<Value *>
+ simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+ APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) = 0;
+ virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
@@ -1309,6 +1452,7 @@ public:
Instruction *I) = 0;
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
+ virtual bool isNumRegsMajorCostOfLSR() = 0;
virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
virtual bool canMacroFuseCmp() = 0;
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
@@ -1335,6 +1479,7 @@ public:
virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool useAA() = 0;
virtual bool isTypeLegal(Type *Ty) = 0;
+ virtual unsigned getRegUsageForType(Type *Ty) = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
virtual bool useColdCCForColdCall(Function &F) = 0;
@@ -1365,7 +1510,8 @@ public:
virtual int getIntImmCost(const APInt &Imm, Type *Ty,
TargetCostKind CostKind) = 0;
virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty, TargetCostKind CostKind) = 0;
+ Type *Ty, TargetCostKind CostKind,
+ Instruction *Inst = nullptr) = 0;
virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,
TargetCostKind CostKind) = 0;
@@ -1375,8 +1521,10 @@ public:
virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
virtual unsigned getMinVectorRegisterBitWidth() = 0;
+ virtual Optional<unsigned> getMaxVScale() const = 0;
virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
+ virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
virtual bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
virtual unsigned getCacheLineSize() const = 0;
@@ -1418,6 +1566,7 @@ public:
virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
@@ -1425,6 +1574,7 @@ public:
virtual int getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind) = 0;
virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
@@ -1452,6 +1602,9 @@ public:
virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm, bool IsUnsigned,
TTI::TargetCostKind CostKind) = 0;
+ virtual InstructionCost getExtendedAddReductionCost(
+ bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) = 0;
virtual int getCallInstrCost(Function *F, Type *RetTy,
@@ -1499,8 +1652,13 @@ public:
VectorType *VecTy) const = 0;
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
+ virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
+ ReductionFlags) const = 0;
+ virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
virtual unsigned getGISelRematGlobalCost() const = 0;
+ virtual bool supportsScalableVectors() const = 0;
virtual bool hasActiveVectorLength() const = 0;
virtual int getInstructionLatency(const Instruction *I) = 0;
};
@@ -1525,6 +1683,9 @@ public:
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
}
+ unsigned adjustInliningThreshold(const CallBase *CB) override {
+ return Impl.adjustInliningThreshold(CB);
+ }
int getInlinerVectorBonusPercent() override {
return Impl.getInlinerVectorBonusPercent();
}
@@ -1558,6 +1719,10 @@ public:
return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
}
+ unsigned getAssumedAddrSpace(const Value *V) const override {
+ return Impl.getAssumedAddrSpace(V);
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const override {
return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
@@ -1588,6 +1753,26 @@ public:
bool emitGetActiveLaneMask() override {
return Impl.emitGetActiveLaneMask();
}
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) override {
+ return Impl.instCombineIntrinsic(IC, II);
+ }
+ Optional<Value *>
+ simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+ APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) override {
+ return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
+ KnownBitsComputed);
+ }
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) override {
+ return Impl.simplifyDemandedVectorEltsIntrinsic(
+ IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+ SimplifyAndSetOp);
+ }
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
@@ -1604,6 +1789,9 @@ public:
TargetTransformInfo::LSRCost &C2) override {
return Impl.isLSRCostLess(C1, C2);
}
+ bool isNumRegsMajorCostOfLSR() override {
+ return Impl.isNumRegsMajorCostOfLSR();
+ }
bool isProfitableLSRChainElement(Instruction *I) override {
return Impl.isProfitableLSRChainElement(I);
}
@@ -1665,6 +1853,9 @@ public:
}
bool useAA() override { return Impl.useAA(); }
bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
+ unsigned getRegUsageForType(Type *Ty) override {
+ return Impl.getRegUsageForType(Ty);
+ }
bool shouldBuildLookupTables() override {
return Impl.shouldBuildLookupTables();
}
@@ -1729,9 +1920,10 @@ public:
TargetCostKind CostKind) override {
return Impl.getIntImmCost(Imm, Ty, CostKind);
}
- int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty, TargetCostKind CostKind) override {
- return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind);
+ int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind,
+ Instruction *Inst = nullptr) override {
+ return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
}
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TargetCostKind CostKind) override {
@@ -1753,12 +1945,18 @@ public:
unsigned getMinVectorRegisterBitWidth() override {
return Impl.getMinVectorRegisterBitWidth();
}
+ Optional<unsigned> getMaxVScale() const override {
+ return Impl.getMaxVScale();
+ }
bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
return Impl.shouldMaximizeVectorBandwidth(OptSize);
}
unsigned getMinimumVF(unsigned ElemWidth) const override {
return Impl.getMinimumVF(ElemWidth);
}
+ unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
+ return Impl.getMaximumVF(ElemWidth, Opcode);
+ }
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
return Impl.shouldConsiderAddressTypePromotion(
@@ -1826,9 +2024,9 @@ public:
return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
}
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::TargetCostKind CostKind,
+ CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I) override {
- return Impl.getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
}
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index) override {
@@ -1838,9 +2036,10 @@ public:
return Impl.getCFInstrCost(Opcode, CostKind);
}
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) override {
- return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
@@ -1886,6 +2085,12 @@ public:
return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
CostKind);
}
+ InstructionCost getExtendedAddReductionCost(
+ bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
+ return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
+ CostKind);
+ }
int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) override {
return Impl.getIntrinsicInstrCost(ICA, CostKind);
@@ -1979,6 +2184,14 @@ public:
ReductionFlags Flags) const override {
return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
}
+ bool preferInLoopReduction(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const override {
+ return Impl.preferInLoopReduction(Opcode, Ty, Flags);
+ }
+ bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const override {
+ return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
+ }
bool shouldExpandReduction(const IntrinsicInst *II) const override {
return Impl.shouldExpandReduction(II);
}
@@ -1987,6 +2200,10 @@ public:
return Impl.getGISelRematGlobalCost();
}
+ bool supportsScalableVectors() const override {
+ return Impl.supportsScalableVectors();
+ }
+
bool hasActiveVectorLength() const override {
return Impl.hasActiveVectorLength();
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 0ce975d6d4b5..7e31cb365a87 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -20,7 +20,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
@@ -46,7 +46,7 @@ public:
int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
// In the basic model, we just assume that all-constant GEPs will be folded
// into their uses via addressing modes.
for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@@ -59,28 +59,31 @@ public:
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize,
ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI) {
+ BlockFrequencyInfo *BFI) const {
(void)PSI;
(void)BFI;
JTSize = 0;
return SI.getNumCases();
}
- unsigned getInliningThresholdMultiplier() { return 1; }
+ unsigned getInliningThresholdMultiplier() const { return 1; }
+ unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
- int getInlinerVectorBonusPercent() { return 150; }
+ int getInlinerVectorBonusPercent() const { return 150; }
- unsigned getMemcpyCost(const Instruction *I) { return TTI::TCC_Expensive; }
+ unsigned getMemcpyCost(const Instruction *I) const {
+ return TTI::TCC_Expensive;
+ }
- bool hasBranchDivergence() { return false; }
+ bool hasBranchDivergence() const { return false; }
- bool useGPUDivergenceAnalysis() { return false; }
+ bool useGPUDivergenceAnalysis() const { return false; }
- bool isSourceOfDivergence(const Value *V) { return false; }
+ bool isSourceOfDivergence(const Value *V) const { return false; }
- bool isAlwaysUniform(const Value *V) { return false; }
+ bool isAlwaysUniform(const Value *V) const { return false; }
- unsigned getFlatAddressSpace() { return -1; }
+ unsigned getFlatAddressSpace() const { return -1; }
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const {
@@ -89,12 +92,14 @@ public:
bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
+ unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const {
return nullptr;
}
- bool isLoweredToCall(const Function *F) {
+ bool isLoweredToCall(const Function *F) const {
assert(F && "A concrete function must be provided to this routine.");
// FIXME: These should almost certainly not be handled here, and instead
@@ -132,7 +137,7 @@ public:
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *LibInfo,
- HardwareLoopInfo &HWLoopInfo) {
+ HardwareLoopInfo &HWLoopInfo) const {
return false;
}
@@ -147,38 +152,60 @@ public:
return false;
}
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const {
+ return None;
+ }
+
+ Optional<Value *>
+ simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+ APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) const {
+ return None;
+ }
+
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const {
+ return None;
+ }
+
void getUnrollingPreferences(Loop *, ScalarEvolution &,
- TTI::UnrollingPreferences &) {}
+ TTI::UnrollingPreferences &) const {}
void getPeelingPreferences(Loop *, ScalarEvolution &,
- TTI::PeelingPreferences &) {}
+ TTI::PeelingPreferences &) const {}
- bool isLegalAddImmediate(int64_t Imm) { return false; }
+ bool isLegalAddImmediate(int64_t Imm) const { return false; }
- bool isLegalICmpImmediate(int64_t Imm) { return false; }
+ bool isLegalICmpImmediate(int64_t Imm) const { return false; }
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
- Instruction *I = nullptr) {
+ Instruction *I = nullptr) const {
// Guess that only reg and reg+reg addressing is allowed. This heuristic is
// taken from the implementation of LSR.
return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
}
- bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
+ bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const {
return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}
- bool isProfitableLSRChainElement(Instruction *I) { return false; }
+ bool isNumRegsMajorCostOfLSR() const { return true; }
+
+ bool isProfitableLSRChainElement(Instruction *I) const { return false; }
- bool canMacroFuseCmp() { return false; }
+ bool canMacroFuseCmp() const { return false; }
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
DominatorTree *DT, AssumptionCache *AC,
- TargetLibraryInfo *LibInfo) {
+ TargetLibraryInfo *LibInfo) const {
return false;
}
@@ -186,40 +213,51 @@ public:
bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
- bool isLegalMaskedStore(Type *DataType, Align Alignment) { return false; }
+ bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
+ return false;
+ }
- bool isLegalMaskedLoad(Type *DataType, Align Alignment) { return false; }
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
+ return false;
+ }
- bool isLegalNTStore(Type *DataType, Align Alignment) {
+ bool isLegalNTStore(Type *DataType, Align Alignment) const {
// By default, assume nontemporal memory stores are available for stores
// that are aligned and have a size that is a power of 2.
unsigned DataSize = DL.getTypeStoreSize(DataType);
return Alignment >= DataSize && isPowerOf2_32(DataSize);
}
- bool isLegalNTLoad(Type *DataType, Align Alignment) {
+ bool isLegalNTLoad(Type *DataType, Align Alignment) const {
// By default, assume nontemporal memory loads are available for loads that
// are aligned and have a size that is a power of 2.
unsigned DataSize = DL.getTypeStoreSize(DataType);
return Alignment >= DataSize && isPowerOf2_32(DataSize);
}
- bool isLegalMaskedScatter(Type *DataType, Align Alignment) { return false; }
+ bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
+ return false;
+ }
- bool isLegalMaskedGather(Type *DataType, Align Alignment) { return false; }
+ bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
+ return false;
+ }
- bool isLegalMaskedCompressStore(Type *DataType) { return false; }
+ bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
- bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
+ bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
- bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
+ bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
- bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
+ bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
+ return false;
+ }
- bool prefersVectorizedAddressing() { return true; }
+ bool prefersVectorizedAddressing() const { return true; }
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
+ bool HasBaseReg, int64_t Scale,
+ unsigned AddrSpace) const {
// Guess that all legal addressing mode are free.
if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
AddrSpace))
@@ -227,80 +265,87 @@ public:
return -1;
}
- bool LSRWithInstrQueries() { return false; }
+ bool LSRWithInstrQueries() const { return false; }
- bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
- bool isProfitableToHoist(Instruction *I) { return true; }
+ bool isProfitableToHoist(Instruction *I) const { return true; }
- bool useAA() { return false; }
+ bool useAA() const { return false; }
- bool isTypeLegal(Type *Ty) { return false; }
+ bool isTypeLegal(Type *Ty) const { return false; }
- bool shouldBuildLookupTables() { return true; }
- bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
+ unsigned getRegUsageForType(Type *Ty) const { return 1; }
- bool useColdCCForColdCall(Function &F) { return false; }
+ bool shouldBuildLookupTables() const { return true; }
+ bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
+
+ bool useColdCCForColdCall(Function &F) const { return false; }
unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
- bool Insert, bool Extract) {
+ bool Insert, bool Extract) const {
return 0;
}
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- unsigned VF) {
+ unsigned VF) const {
return 0;
}
- bool supportsEfficientVectorElementLoadStore() { return false; }
+ bool supportsEfficientVectorElementLoadStore() const { return false; }
- bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
+ bool enableAggressiveInterleaving(bool LoopHasReductions) const {
+ return false;
+ }
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const {
return {};
}
- bool enableInterleavedAccessVectorization() { return false; }
+ bool enableInterleavedAccessVectorization() const { return false; }
- bool enableMaskedInterleavedAccessVectorization() { return false; }
+ bool enableMaskedInterleavedAccessVectorization() const { return false; }
- bool isFPVectorizationPotentiallyUnsafe() { return false; }
+ bool isFPVectorizationPotentiallyUnsafe() const { return false; }
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace, unsigned Alignment,
- bool *Fast) {
+ bool *Fast) const {
return false;
}
- TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
+ TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
return TTI::PSK_Software;
}
- bool haveFastSqrt(Type *Ty) { return false; }
+ bool haveFastSqrt(Type *Ty) const { return false; }
- bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
+ bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
- unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
+ unsigned getFPOpCost(Type *Ty) const {
+ return TargetTransformInfo::TCC_Basic;
+ }
int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+ Type *Ty) const {
return 0;
}
unsigned getIntImmCost(const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind) const {
return TTI::TCC_Basic;
}
unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind) {
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr) const {
return TTI::TCC_Free;
}
unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind) const {
return TTI::TCC_Free;
}
@@ -323,15 +368,18 @@ public:
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
- unsigned getMinVectorRegisterBitWidth() { return 128; }
+ unsigned getMinVectorRegisterBitWidth() const { return 128; }
+
+ Optional<unsigned> getMaxVScale() const { return None; }
bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
- bool
- shouldConsiderAddressTypePromotion(const Instruction &I,
- bool &AllowPromotionWithoutCommonHeader) {
+ unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
+
+ bool shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
AllowPromotionWithoutCommonHeader = false;
return false;
}
@@ -370,7 +418,7 @@ public:
unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
bool enableWritePrefetching() const { return false; }
- unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
+ unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind,
@@ -379,7 +427,7 @@ public:
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args,
- const Instruction *CxtI = nullptr) {
+ const Instruction *CxtI = nullptr) const {
// FIXME: A number of transformation tests seem to require these values
// which seems a little odd for how arbitary there are.
switch (Opcode) {
@@ -398,13 +446,14 @@ public:
}
unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index,
- VectorType *SubTp) {
+ VectorType *SubTp) const {
return 1;
}
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
- const Instruction *I) {
+ const Instruction *I) const {
switch (Opcode) {
default:
break;
@@ -427,23 +476,24 @@ public:
// Identity and pointer-to-pointer casts are free.
return 0;
break;
- case Instruction::Trunc:
+ case Instruction::Trunc: {
// trunc to a native type is free (assuming the target has compare and
// shift-right of the same width).
- if (DL.isLegalInteger(DL.getTypeSizeInBits(Dst)))
+ TypeSize DstSize = DL.getTypeSizeInBits(Dst);
+ if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedSize()))
return 0;
break;
}
+ }
return 1;
}
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
- VectorType *VecTy, unsigned Index) {
+ VectorType *VecTy, unsigned Index) const {
return 1;
}
- unsigned getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind) {
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const {
// A phi would be free, unless we're costing the throughput because it
// will require a register.
if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
@@ -452,12 +502,14 @@ public:
}
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) const {
return 1;
}
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
return 1;
}
@@ -469,32 +521,33 @@ public:
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind) const {
return 1;
}
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr) {
+ const Instruction *I = nullptr) const {
return 1;
}
unsigned getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
- bool UseMaskForCond, bool UseMaskForGaps) {
+ bool UseMaskForCond, bool UseMaskForGaps) const {
return 1;
}
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind) const {
switch (ICA.getID()) {
default:
break;
case Intrinsic::annotation:
case Intrinsic::assume:
case Intrinsic::sideeffect:
+ case Intrinsic::pseudoprobe:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_label:
@@ -505,6 +558,7 @@ public:
case Intrinsic::is_constant:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::objectsize:
case Intrinsic::ptr_annotation:
case Intrinsic::var_annotation:
@@ -526,26 +580,38 @@ public:
}
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind) const {
return 1;
}
- unsigned getNumberOfParts(Type *Tp) { return 0; }
+ unsigned getNumberOfParts(Type *Tp) const { return 0; }
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
- const SCEV *) {
+ const SCEV *) const {
return 0;
}
unsigned getArithmeticReductionCost(unsigned, VectorType *, bool,
- TTI::TargetCostKind) { return 1; }
+ TTI::TargetCostKind) const {
+ return 1;
+ }
unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool,
- TTI::TargetCostKind) { return 1; }
+ TTI::TargetCostKind) const {
+ return 1;
+ }
+
+ InstructionCost getExtendedAddReductionCost(
+ bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const {
+ return 1;
+ }
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
+ unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
+ return 0;
+ }
- bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
+ bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
return false;
}
@@ -559,7 +625,7 @@ public:
}
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType) {
+ Type *ExpectedType) const {
return nullptr;
}
@@ -637,22 +703,34 @@ public:
return false;
}
+ bool preferInLoopReduction(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const {
+ return false;
+ }
+
+ bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const {
+ return false;
+ }
+
bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
unsigned getGISelRematGlobalCost() const { return 1; }
+ bool supportsScalableVectors() const { return false; }
+
bool hasActiveVectorLength() const { return false; }
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
- unsigned minRequiredElementSize(const Value *Val, bool &isSigned) {
+ unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
const auto *VectorValue = cast<Constant>(Val);
// In case of a vector need to pick the max between the min
// required size for each element
- auto *VT = cast<VectorType>(Val->getType());
+ auto *VT = cast<FixedVectorType>(Val->getType());
// Assume unsigned elements
isSigned = false;
@@ -700,12 +778,12 @@ protected:
return Val->getType()->getScalarSizeInBits();
}
- bool isStridedAccess(const SCEV *Ptr) {
+ bool isStridedAccess(const SCEV *Ptr) const {
return Ptr && isa<SCEVAddRecExpr>(Ptr);
}
const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
- const SCEV *Ptr) {
+ const SCEV *Ptr) const {
if (!isStridedAccess(Ptr))
return nullptr;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
@@ -713,7 +791,7 @@ protected:
}
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
- int64_t MergeDistance) {
+ int64_t MergeDistance) const {
const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
if (!Step)
return false;
@@ -775,7 +853,12 @@ public:
uint64_t Field = ConstIdx->getZExtValue();
BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
} else {
- int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
+ // If this operand is a scalable type, bail out early.
+ // TODO: handle scalable vectors
+ if (isa<ScalableVectorType>(TargetType))
+ return TTI::TCC_Basic;
+ int64_t ElementSize =
+ DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
if (ConstIdx) {
BaseOffset +=
ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
@@ -800,30 +883,17 @@ public:
int getUserCost(const User *U, ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
auto *TargetTTI = static_cast<T *>(this);
-
- // FIXME: We shouldn't have to special-case intrinsics here.
- if (CostKind == TTI::TCK_RecipThroughput) {
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- IntrinsicCostAttributes CostAttrs(*II);
- return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
- }
- }
-
+ // Handle non-intrinsic calls, invokes, and callbr.
// FIXME: Unlikely to be true for anything but CodeSize.
- if (const auto *CB = dyn_cast<CallBase>(U)) {
- const Function *F = CB->getCalledFunction();
- if (F) {
- FunctionType *FTy = F->getFunctionType();
- if (Intrinsic::ID IID = F->getIntrinsicID()) {
- IntrinsicCostAttributes Attrs(IID, *CB);
- return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind);
- }
-
+ auto *CB = dyn_cast<CallBase>(U);
+ if (CB && !isa<IntrinsicInst>(U)) {
+ if (const Function *F = CB->getCalledFunction()) {
if (!TargetTTI->isLoweredToCall(F))
return TTI::TCC_Basic; // Give a basic cost if it will be lowered
- return TTI::TCC_Basic * (FTy->getNumParams() + 1);
+ return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
}
+ // For indirect or other calls, scale cost by number of arguments.
return TTI::TCC_Basic * (CB->arg_size() + 1);
}
@@ -835,6 +905,12 @@ public:
switch (Opcode) {
default:
break;
+ case Instruction::Call: {
+ assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
+ auto *Intrinsic = cast<IntrinsicInst>(U);
+ IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
+ return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
+ }
case Instruction::Br:
case Instruction::Ret:
case Instruction::PHI:
@@ -895,7 +971,8 @@ public:
case Instruction::SExt:
case Instruction::ZExt:
case Instruction::AddrSpaceCast:
- return TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I);
+ return TargetTTI->getCastInstrCost(
+ Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
case Instruction::Store: {
auto *SI = cast<StoreInst>(U);
Type *ValTy = U->getOperand(0)->getType();
@@ -912,12 +989,16 @@ public:
case Instruction::Select: {
Type *CondTy = U->getOperand(0)->getType();
return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
+ CmpInst::BAD_ICMP_PREDICATE,
CostKind, I);
}
case Instruction::ICmp:
case Instruction::FCmp: {
Type *ValTy = U->getOperand(0)->getType();
+ // TODO: Also handle ICmp/FCmp constant expressions.
return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
+ I ? cast<CmpInst>(I)->getPredicate()
+ : CmpInst::BAD_ICMP_PREDICATE,
CostKind, I);
}
case Instruction::InsertElement: {
@@ -969,41 +1050,23 @@ public:
if (CI)
Idx = CI->getZExtValue();
- // Try to match a reduction sequence (series of shufflevector and
- // vector adds followed by a extractelement).
- unsigned ReduxOpCode;
- VectorType *ReduxType;
-
- switch (TTI::matchVectorSplittingReduction(EEI, ReduxOpCode,
- ReduxType)) {
- case TTI::RK_Arithmetic:
- return TargetTTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
- /*IsPairwiseForm=*/false,
- CostKind);
- case TTI::RK_MinMax:
- return TargetTTI->getMinMaxReductionCost(
- ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
- /*IsPairwiseForm=*/false, /*IsUnsigned=*/false, CostKind);
- case TTI::RK_UnsignedMinMax:
- return TargetTTI->getMinMaxReductionCost(
- ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
- /*IsPairwiseForm=*/false, /*IsUnsigned=*/true, CostKind);
- case TTI::RK_None:
- break;
- }
-
- switch (TTI::matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
+ // Try to match a reduction (a series of shufflevector and vector ops
+ // followed by an extractelement).
+ unsigned RdxOpcode;
+ VectorType *RdxType;
+ bool IsPairwise;
+ switch (TTI::matchVectorReduction(EEI, RdxOpcode, RdxType, IsPairwise)) {
case TTI::RK_Arithmetic:
- return TargetTTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
- /*IsPairwiseForm=*/true, CostKind);
+ return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType,
+ IsPairwise, CostKind);
case TTI::RK_MinMax:
return TargetTTI->getMinMaxReductionCost(
- ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
- /*IsPairwiseForm=*/true, /*IsUnsigned=*/false, CostKind);
+ RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
+ IsPairwise, /*IsUnsigned=*/false, CostKind);
case TTI::RK_UnsignedMinMax:
return TargetTTI->getMinMaxReductionCost(
- ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
- /*IsPairwiseForm=*/true, /*IsUnsigned=*/true, CostKind);
+ RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
+ IsPairwise, /*IsUnsigned=*/true, CostKind);
case TTI::RK_None:
break;
}
@@ -1016,8 +1079,7 @@ public:
}
int getInstructionLatency(const Instruction *I) {
- SmallVector<const Value *, 4> Operands(I->value_op_begin(),
- I->value_op_end());
+ SmallVector<const Value *, 4> Operands(I->operand_values());
if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free)
return 0;
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h
index 033ea05b77fa..d02bcd0e335b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h
@@ -101,6 +101,12 @@ private:
StringRef ModuleName;
};
+enum class InlinerFunctionImportStatsOpts {
+ No = 0,
+ Basic = 1,
+ Verbose = 2,
+};
+
} // llvm
#endif // LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h
index f31b56345424..bd82b34165d6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h
@@ -30,7 +30,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
bool NoAssumptions = false) {
GEPOperator *GEPOp = cast<GEPOperator>(GEP);
Type *IntIdxTy = DL.getIndexType(GEP->getType());
- Value *Result = Constant::getNullValue(IntIdxTy);
+ Value *Result = nullptr;
// If the GEP is inbounds, we know that none of the addressing operations will
// overflow in a signed sense.
@@ -46,6 +46,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
++i, ++GTI) {
Value *Op = *i;
uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+ Value *Offset;
if (Constant *OpC = dyn_cast<Constant>(Op)) {
if (OpC->isZeroValue())
continue;
@@ -54,46 +55,47 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
if (StructType *STy = GTI.getStructTypeOrNull()) {
uint64_t OpValue = OpC->getUniqueInteger().getZExtValue();
Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
-
- if (Size)
- Result = Builder->CreateAdd(Result, ConstantInt::get(IntIdxTy, Size),
- GEP->getName().str()+".offs");
- continue;
+ if (!Size)
+ continue;
+
+ Offset = ConstantInt::get(IntIdxTy, Size);
+ } else {
+ // Splat the constant if needed.
+ if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy())
+ OpC = ConstantVector::getSplat(
+ cast<VectorType>(IntIdxTy)->getElementCount(), OpC);
+
+ Constant *Scale = ConstantInt::get(IntIdxTy, Size);
+ Constant *OC =
+ ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/);
+ Offset =
+ ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/);
}
-
- // Splat the constant if needed.
- if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy())
- OpC = ConstantVector::getSplat(
- cast<VectorType>(IntIdxTy)->getElementCount(), OpC);
-
- Constant *Scale = ConstantInt::get(IntIdxTy, Size);
- Constant *OC = ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/);
- Scale =
- ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/);
- // Emit an add instruction.
- Result = Builder->CreateAdd(Result, Scale, GEP->getName().str()+".offs");
- continue;
- }
-
- // Splat the index if needed.
- if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy())
- Op = Builder->CreateVectorSplat(
- cast<VectorType>(IntIdxTy)->getNumElements(), Op);
-
- // Convert to correct type.
- if (Op->getType() != IntIdxTy)
- Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName().str()+".c");
- if (Size != 1) {
- // We'll let instcombine(mul) convert this to a shl if possible.
- Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size),
- GEP->getName().str() + ".idx", false /*NUW*/,
- isInBounds /*NSW*/);
+ } else {
+ // Splat the index if needed.
+ if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy())
+ Op = Builder->CreateVectorSplat(
+ cast<FixedVectorType>(IntIdxTy)->getNumElements(), Op);
+
+ // Convert to correct type.
+ if (Op->getType() != IntIdxTy)
+ Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName().str()+".c");
+ if (Size != 1) {
+ // We'll let instcombine(mul) convert this to a shl if possible.
+ Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size),
+ GEP->getName().str() + ".idx", false /*NUW*/,
+ isInBounds /*NSW*/);
+ }
+ Offset = Op;
}
- // Emit an add instruction.
- Result = Builder->CreateAdd(Op, Result, GEP->getName().str()+".offs");
+ if (Result)
+ Result = Builder->CreateAdd(Result, Offset, GEP->getName().str()+".offs",
+ false /*NUW*/, isInBounds /*NSW*/);
+ else
+ Result = Offset;
}
- return Result;
+ return Result ? Result : Constant::getNullValue(IntIdxTy);
}
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/TFUtils.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/TFUtils.h
index 2ab2c7a57d94..ea6bc2cf19ee 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -9,10 +9,11 @@
#ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H
#define LLVM_ANALYSIS_UTILS_TFUTILS_H
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
#ifdef LLVM_HAVE_TF_API
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/JSON.h"
#include <memory>
#include <vector>
@@ -36,6 +37,141 @@ namespace llvm {
class TFModelEvaluatorImpl;
class EvaluationResultImpl;
+/// TensorSpec encapsulates the specification of a tensor: its dimensions, or
+/// "shape" (row-major), its type (see TensorSpec::getDataType specializations
+/// for supported types), its name and port (see "TensorFlow: Large-Scale
+/// Machine Learning on Heterogeneous Distributed Systems", section 4.2, para 2:
+/// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf)
+///
+/// TensorSpec is used to set up a TFModelEvaluator by describing the expected
+/// inputs and outputs.
+class TensorSpec final {
+public:
+ template <typename T>
+ static TensorSpec createSpec(const std::string &Name,
+ const std::vector<int64_t> &Shape,
+ int Port = 0) {
+ return TensorSpec(Name, Port, getDataType<T>(), Shape);
+ }
+
+ const std::string &name() const { return Name; }
+ int port() const { return Port; }
+ int typeIndex() const { return TypeIndex; }
+ const std::vector<int64_t> &shape() const { return Shape; }
+
+ bool operator==(const TensorSpec &Other) const {
+ return Name == Other.Name && Port == Other.Port &&
+ TypeIndex == Other.TypeIndex && Shape == Other.Shape;
+ }
+
+ bool operator!=(const TensorSpec &Other) const { return !(*this == Other); }
+
+ /// Get the number of elements in a tensor with this shape.
+ size_t getElementCount() const { return ElementCount; }
+ /// Get the size, in bytes, of one element.
+ size_t getElementByteSize() const;
+
+ template <typename T> bool isElementType() const {
+ return getDataType<T>() == TypeIndex;
+ }
+
+private:
+ TensorSpec(const std::string &Name, int Port, int TypeIndex,
+ const std::vector<int64_t> &Shape);
+
+ template <typename T> static int getDataType() {
+ llvm_unreachable("Undefined tensor type");
+ }
+
+ std::string Name;
+ int Port = 0;
+ int TypeIndex = 0;
+ std::vector<int64_t> Shape;
+ size_t ElementCount = 0;
+};
+
+/// Construct a TensorSpec from a JSON dictionary of the form:
+/// { "name": <string>,
+/// "port": <int>,
+/// "type": <string. Use LLVM's types, e.g. float, double, int64_t>,
+/// "shape": <array of ints> }
+/// For the "type" field, see the C++ primitive types used in
+/// TFUTILS_SUPPORTED_TYPES.
+Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
+ const json::Value &Value);
+
+struct LoggedFeatureSpec {
+ TensorSpec Spec;
+ Optional<std::string> LoggingName;
+};
+
+/// Load the output specs. If SpecFileOverride is not empty, that path is used.
+/// Otherwise, the file is assumed to be called 'output_spec.json' and be found
+/// under ModelPath (the model directory).
+/// The first output tensor name must match ExpectedDecisionName.
+/// In case of error, the return is None and the error is logged.
+Optional<std::vector<LoggedFeatureSpec>>
+loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
+ StringRef ModelPath, StringRef SpecFileOverride = StringRef());
+
+/// Logging utility - given an ordered specification of features, and assuming
+/// a scalar reward, allow logging feature values and rewards, and then print
+/// as tf.train.SequenceExample text protobuf.
+/// The assumption is that, for an event to be logged (i.e. a set of feature
+/// values and a reward), the user calls the log* API for each feature exactly
+/// once, providing the index matching the position in the feature spec list
+/// provided at construction:
+/// event 0:
+/// logTensorValue(0, ...)
+/// logTensorValue(1, ...)
+/// ...
+/// logReward(...)
+/// event 1:
+/// logTensorValue(0, ...)
+/// logTensorValue(1, ...)
+/// ...
+/// logReward(...)
+///
+/// At the end, call print to generate the protobuf.
+class Logger final {
+public:
+ /// Construct a Logger. If IncludeReward is false, then logReward shouldn't
+ /// be called, and the reward feature won't be printed out.
+ Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
+ const TensorSpec &RewardSpec, bool IncludeReward)
+ : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
+ RawLogData(FeatureSpecs.size() + IncludeReward),
+ IncludeReward(IncludeReward) {}
+
+ template <typename T> void logReward(T Value) {
+ assert(IncludeReward);
+ logTensorValue(RawLogData.size() - 1, &Value);
+ }
+
+ template <typename T> void logFinalReward(T Value) {
+ assert(RawLogData.back().empty());
+ logReward(Value);
+ }
+
+ template <typename T>
+ void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
+ const char *Start = reinterpret_cast<const char *>(Value);
+ const char *End = Start + sizeof(T) * Size;
+ RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End);
+ }
+
+ void print(raw_ostream &OS);
+
+private:
+ std::vector<LoggedFeatureSpec> FeatureSpecs;
+ TensorSpec RewardSpec;
+ /// RawData has one entry per feature, plus one more for the reward.
+ /// Each feature's values are then stored in a vector, in succession.
+ /// This means the ith event is stored at [*][i]
+ std::vector<std::vector<char>> RawLogData;
+ const bool IncludeReward;
+};
+
class TFModelEvaluator final {
public:
/// The result of a model evaluation. Handles the lifetime of the output
@@ -44,25 +180,41 @@ public:
class EvaluationResult {
public:
EvaluationResult(const EvaluationResult &) = delete;
+ EvaluationResult &operator=(const EvaluationResult &Other) = delete;
+
EvaluationResult(EvaluationResult &&Other);
+ EvaluationResult &operator=(EvaluationResult &&Other);
+
~EvaluationResult();
- /// Get a pointer to the first element of the tensor at Index.
+ /// Get a (const) pointer to the first element of the tensor at Index.
template <typename T> T *getTensorValue(size_t Index) {
return static_cast<T *>(getUntypedTensorValue(Index));
}
+ template <typename T> const T *getTensorValue(size_t Index) const {
+ return static_cast<T *>(getUntypedTensorValue(Index));
+ }
+
+ /// Get a (const) pointer to the untyped data of the tensor.
+ void *getUntypedTensorValue(size_t Index);
+ const void *getUntypedTensorValue(size_t Index) const;
+
private:
friend class TFModelEvaluator;
EvaluationResult(std::unique_ptr<EvaluationResultImpl> Impl);
- void *getUntypedTensorValue(size_t Index);
std::unique_ptr<EvaluationResultImpl> Impl;
};
TFModelEvaluator(StringRef SavedModelPath,
- const std::vector<std::string> &InputNames,
- const std::vector<std::string> &OutputNames,
+ const std::vector<TensorSpec> &InputSpecs,
+ const std::vector<TensorSpec> &OutputSpecs,
const char *Tags = "serve");
+ TFModelEvaluator(StringRef SavedModelPath,
+ const std::vector<TensorSpec> &InputSpecs,
+ function_ref<TensorSpec(size_t)> GetOutputSpecs,
+ size_t OutputSpecsSize, const char *Tags = "serve");
+
~TFModelEvaluator();
TFModelEvaluator(const TFModelEvaluator &) = delete;
TFModelEvaluator(TFModelEvaluator &&) = delete;
@@ -82,33 +234,32 @@ public:
/// otherwise.
bool isValid() const { return !!Impl; }
- /// Initialize the input at Index as a tensor of the given type and
- /// dimensions.
- template <typename T>
- void initInput(size_t Index, const std::vector<int64_t> &Dimensions) {
- return initInput(Index, getModelTypeIndex<T>(), Dimensions);
- }
-
private:
void *getUntypedInput(size_t Index);
- template <typename T> int getModelTypeIndex();
- void initInput(size_t Index, int TypeIndex,
- const std::vector<int64_t> &Dimensions);
-
std::unique_ptr<TFModelEvaluatorImpl> Impl;
};
-template <> int TFModelEvaluator::getModelTypeIndex<float>();
-template <> int TFModelEvaluator::getModelTypeIndex<double>();
-template <> int TFModelEvaluator::getModelTypeIndex<int8_t>();
-template <> int TFModelEvaluator::getModelTypeIndex<uint8_t>();
-template <> int TFModelEvaluator::getModelTypeIndex<int16_t>();
-template <> int TFModelEvaluator::getModelTypeIndex<uint16_t>();
-template <> int TFModelEvaluator::getModelTypeIndex<int32_t>();
-template <> int TFModelEvaluator::getModelTypeIndex<uint32_t>();
-template <> int TFModelEvaluator::getModelTypeIndex<int64_t>();
-template <> int TFModelEvaluator::getModelTypeIndex<uint64_t>();
+/// List of supported types, as a pair:
+/// - C++ type
+/// - enum name (implementation-specific)
+#define TFUTILS_SUPPORTED_TYPES(M) \
+ M(float, TF_FLOAT) \
+ M(double, TF_DOUBLE) \
+ M(int8_t, TF_INT8) \
+ M(uint8_t, TF_UINT8) \
+ M(int16_t, TF_INT16) \
+ M(uint16_t, TF_UINT16) \
+ M(int32_t, TF_INT32) \
+ M(uint32_t, TF_UINT32) \
+ M(int64_t, TF_INT64) \
+ M(uint64_t, TF_UINT64)
+
+#define TFUTILS_GETDATATYPE_DEF(T, E) \
+ template <> int TensorSpec::getDataType<T>();
+
+TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_DEF)
+#undef TFUTILS_GETDATATYPE_DEF
} // namespace llvm
#endif // LLVM_HAVE_TF_API
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueLattice.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueLattice.h
index bf5bab9ced22..108d08033ac3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueLattice.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueLattice.h
@@ -11,6 +11,7 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
//
//===----------------------------------------------------------------------===//
// ValueLatticeElement
@@ -456,6 +457,16 @@ public:
if (isConstant() && Other.isConstant())
return ConstantExpr::getCompare(Pred, getConstant(), Other.getConstant());
+ if (ICmpInst::isEquality(Pred)) {
+ // not(C) != C => true, not(C) == C => false.
+ if ((isNotConstant() && Other.isConstant() &&
+ getNotConstant() == Other.getConstant()) ||
+ (isConstant() && Other.isNotConstant() &&
+ getConstant() == Other.getNotConstant()))
+ return Pred == ICmpInst::ICMP_NE
+ ? ConstantInt::getTrue(Ty) : ConstantInt::getFalse(Ty);
+ }
+
// Integer constants are represented as ConstantRanges with single
// elements.
if (!isConstantRange() || !Other.isConstantRange())
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h
index 9510739ef5ab..86c0991451c5 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h
@@ -21,12 +21,14 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
#include <cassert>
#include <cstdint>
namespace llvm {
class AddOperator;
+class AllocaInst;
class APInt;
class AssumptionCache;
class DominatorTree;
@@ -43,6 +45,8 @@ class StringRef;
class TargetLibraryInfo;
class Value;
+constexpr unsigned MaxAnalysisRecursionDepth = 6;
+
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
///
@@ -366,14 +370,13 @@ class Value;
/// that the returned value has pointer type if the specified value does. If
/// the MaxLookup value is non-zero, it limits the number of instructions to
/// be stripped off.
- Value *GetUnderlyingObject(Value *V, const DataLayout &DL,
- unsigned MaxLookup = 6);
- inline const Value *GetUnderlyingObject(const Value *V, const DataLayout &DL,
+ Value *getUnderlyingObject(Value *V, unsigned MaxLookup = 6);
+ inline const Value *getUnderlyingObject(const Value *V,
unsigned MaxLookup = 6) {
- return GetUnderlyingObject(const_cast<Value *>(V), DL, MaxLookup);
+ return getUnderlyingObject(const_cast<Value *>(V), MaxLookup);
}
- /// This method is similar to GetUnderlyingObject except that it can
+ /// This method is similar to getUnderlyingObject except that it can
/// look through phi and select instructions and return multiple objects.
///
/// If LoopInfo is passed, loop phis are further analyzed. If a pointer
@@ -401,20 +404,30 @@ class Value;
/// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects
/// should not assume that Curr and Prev share the same underlying object thus
/// it shouldn't look through the phi above.
- void GetUnderlyingObjects(const Value *V,
+ void getUnderlyingObjects(const Value *V,
SmallVectorImpl<const Value *> &Objects,
- const DataLayout &DL, LoopInfo *LI = nullptr,
- unsigned MaxLookup = 6);
+ LoopInfo *LI = nullptr, unsigned MaxLookup = 6);
- /// This is a wrapper around GetUnderlyingObjects and adds support for basic
+ /// This is a wrapper around getUnderlyingObjects and adds support for basic
/// ptrtoint+arithmetic+inttoptr sequences.
bool getUnderlyingObjectsForCodeGen(const Value *V,
- SmallVectorImpl<Value *> &Objects,
- const DataLayout &DL);
+ SmallVectorImpl<Value *> &Objects);
+
+ /// Returns unique alloca where the value comes from, or nullptr.
+ /// If OffsetZero is true check that V points to the begining of the alloca.
+ AllocaInst *findAllocaForValue(Value *V, bool OffsetZero = false);
+ inline const AllocaInst *findAllocaForValue(const Value *V,
+ bool OffsetZero = false) {
+ return findAllocaForValue(const_cast<Value *>(V), OffsetZero);
+ }
/// Return true if the only users of this pointer are lifetime markers.
bool onlyUsedByLifetimeMarkers(const Value *V);
+ /// Return true if the only users of this pointer are lifetime markers or
+ /// droppable instructions.
+ bool onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V);
+
/// Return true if speculation of the given load must be suppressed to avoid
/// ordering or interfering with an active sanitizer. If not suppressed,
/// dereferenceability and alignment must be proven separately. Note: This
@@ -571,45 +584,65 @@ class Value;
/// if, for all i, r is evaluated to poison or op raises UB if vi = poison.
/// To filter out operands that raise UB on poison, you can use
/// getGuaranteedNonPoisonOp.
- bool propagatesPoison(const Instruction *I);
+ bool propagatesPoison(const Operator *I);
- /// Return either nullptr or an operand of I such that I will trigger
- /// undefined behavior if I is executed and that operand has a poison
- /// value.
- const Value *getGuaranteedNonPoisonOp(const Instruction *I);
+ /// Insert operands of I into Ops such that I will trigger undefined behavior
+ /// if I is executed and that operand has a poison value.
+ void getGuaranteedNonPoisonOps(const Instruction *I,
+ SmallPtrSetImpl<const Value *> &Ops);
- /// Return true if the given instruction must trigger undefined behavior.
+ /// Return true if the given instruction must trigger undefined behavior
/// when I is executed with any operands which appear in KnownPoison holding
/// a poison value at the point of execution.
bool mustTriggerUB(const Instruction *I,
const SmallSet<const Value *, 16>& KnownPoison);
- /// Return true if this function can prove that if PoisonI is executed
- /// and yields a poison value, then that will trigger undefined behavior.
+ /// Return true if this function can prove that if Inst is executed
+ /// and yields a poison value or undef bits, then that will trigger
+ /// undefined behavior.
///
/// Note that this currently only considers the basic block that is
- /// the parent of I.
- bool programUndefinedIfPoison(const Instruction *PoisonI);
-
- /// Return true if I can create poison from non-poison operands.
- /// For vectors, canCreatePoison returns true if there is potential poison in
- /// any element of the result when vectors without poison are given as
+ /// the parent of Inst.
+ bool programUndefinedIfUndefOrPoison(const Instruction *Inst);
+ bool programUndefinedIfPoison(const Instruction *Inst);
+
+ /// canCreateUndefOrPoison returns true if Op can create undef or poison from
+ /// non-undef & non-poison operands.
+ /// For vectors, canCreateUndefOrPoison returns true if there is potential
+ /// poison or undef in any element of the result when vectors without
+ /// undef/poison poison are given as operands.
+ /// For example, given `Op = shl <2 x i32> %x, <0, 32>`, this function returns
+ /// true. If Op raises immediate UB but never creates poison or undef
+ /// (e.g. sdiv I, 0), canCreatePoison returns false.
+ ///
+ /// canCreatePoison returns true if Op can create poison from non-poison
/// operands.
- /// For example, given `I = shl <2 x i32> %x, <0, 32>`, this function returns
- /// true. If I raises immediate UB but never creates poison (e.g. sdiv I, 0),
- /// canCreatePoison returns false.
- bool canCreatePoison(const Instruction *I);
-
- /// Return true if this function can prove that V is never undef value
- /// or poison value.
- //
+ bool canCreateUndefOrPoison(const Operator *Op);
+ bool canCreatePoison(const Operator *Op);
+
+ /// Return true if V is poison given that ValAssumedPoison is already poison.
+ /// For example, if ValAssumedPoison is `icmp X, 10` and V is `icmp X, 5`,
+ /// impliesPoison returns true.
+ bool impliesPoison(const Value *ValAssumedPoison, const Value *V);
+
+ /// Return true if this function can prove that V does not have undef bits
+ /// and is never poison. If V is an aggregate value or vector, check whether
+ /// all elements (except padding) are not undef or poison.
+ /// Note that this is different from canCreateUndefOrPoison because the
+ /// function assumes Op's operands are not poison/undef.
+ ///
/// If CtxI and DT are specified this method performs flow-sensitive analysis
/// and returns true if it is guaranteed to be never undef or poison
/// immediately before the CtxI.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
+ AssumptionCache *AC = nullptr,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr,
unsigned Depth = 0);
+ bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC = nullptr,
+ const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr,
+ unsigned Depth = 0);
/// Specific patterns of select instructions we can match.
enum SelectPatternFlavor {
@@ -700,6 +733,14 @@ class Value;
/// minimum/maximum flavor.
CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
+ /// Check if the values in \p VL are select instructions that can be converted
+ /// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a
+ /// conversion is possible, together with a bool indicating whether all select
+ /// conditions are only used by the selects. Otherwise return
+ /// Intrinsic::not_intrinsic.
+ std::pair<Intrinsic::ID, bool>
+ canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL);
+
/// Return true if RHS is known to be implied true by LHS. Return false if
/// RHS is known to be implied false by LHS. Otherwise, return None if no
/// implication can be made.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
index 2f64b0fedc7a..cfc3d6115866 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
@@ -62,6 +62,87 @@ TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
+#elif defined(TLI_DEFINE_LIBMVEC_X86_VECFUNCS)
+// GLIBC Vector math Functions
+
+TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", 2)
+TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", 4)
+
+TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", 4)
+TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", 8)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", 2)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", 4)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", 8)
+
+TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", 2)
+TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", 4)
+
+TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", 4)
+TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", 8)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", 2)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", 4)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", 8)
+
+TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", 2)
+TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", 4)
+
+TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", 4)
+TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", 8)
+
+TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", 2)
+TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", 4)
+
+TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", 4)
+TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", 8)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", 2)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", 4)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", 8)
+
+TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", 2)
+TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", 4)
+
+TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", 4)
+TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", 8)
+
+TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", 2)
+TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", 4)
+
+TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", 4)
+TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", 8)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", 2)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", 4)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", 8)
+
+TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", 2)
+TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", 4)
+
+TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", 4)
+TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", 8)
+
+TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", 2)
+TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", 4)
+
+TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", 4)
+TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", 8)
+
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", 2)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", 4)
+
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", 8)
+
#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
// IBM MASS library's vector Functions
@@ -245,6 +326,70 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
+TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2)
+TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4)
+TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8)
+
+TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4)
+TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8)
+TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16)
+
+TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2)
+TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4)
+TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8)
+
+TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4)
+TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8)
+TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8)
+
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16)
+
+TLI_DEFINE_VECFUNC("log10", "__svml_log102", 2)
+TLI_DEFINE_VECFUNC("log10", "__svml_log104", 4)
+TLI_DEFINE_VECFUNC("log10", "__svml_log108", 8)
+
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", 4)
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", 8)
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", 16)
+
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", 2)
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", 4)
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", 8)
+
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", 4)
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", 8)
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", 2)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", 8)
+
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", 8)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", 16)
+
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", 2)
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", 4)
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", 8)
+
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", 4)
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", 8)
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", 16)
+
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", 2)
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", 4)
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", 8)
+
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4)
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8)
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16)
+
TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
@@ -275,6 +420,7 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16)
#undef TLI_DEFINE_VECFUNC
#undef TLI_DEFINE_ACCELERATE_VECFUNCS
+#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS
#undef TLI_DEFINE_MASSV_VECFUNCS
#undef TLI_DEFINE_SVML_VECFUNCS
#undef TLI_DEFINE_MASSV_VECFUNCS_NAMES
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h b/contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h
index b1d7850442fb..26cb0e456ed4 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h
@@ -14,12 +14,12 @@
#define LLVM_ANALYSIS_VECTORUTILS_H
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Support/CheckedArithmetic.h"
namespace llvm {
+class TargetLibraryInfo;
/// Describes the type of Parameters
enum class VFParamKind {
@@ -99,7 +99,8 @@ struct VFShape {
// Retrieve the VFShape that can be used to map a (scalar) function to itself,
// with VF = 1.
static VFShape getScalarShape(const CallInst &CI) {
- return VFShape::get(CI, /*EC*/ {1, false}, /*HasGlobalPredicate*/ false);
+ return VFShape::get(CI, ElementCount::getFixed(1),
+ /*HasGlobalPredicate*/ false);
}
// Retrieve the basic vectorization shape of the function, where all
@@ -114,7 +115,7 @@ struct VFShape {
Parameters.push_back(
VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate}));
- return {EC.Min, EC.Scalable, Parameters};
+ return {EC.getKnownMinValue(), EC.isScalable(), Parameters};
}
/// Sanity check on the Parameters in the VFShape.
bool hasValidParameterList() const;
@@ -299,13 +300,17 @@ namespace Intrinsic {
typedef unsigned ID;
}
-/// A helper function for converting Scalar types to vector types.
-/// If the incoming type is void, we return void. If the VF is 1, we return
-/// the scalar type.
-inline Type *ToVectorTy(Type *Scalar, unsigned VF, bool isScalable = false) {
- if (Scalar->isVoidTy() || VF == 1)
+/// A helper function for converting Scalar types to vector types. If
+/// the incoming type is void, we return void. If the EC represents a
+/// scalar, we return the scalar type.
+inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
+ if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
return Scalar;
- return VectorType::get(Scalar, {VF, isScalable});
+ return VectorType::get(Scalar, EC);
+}
+
+inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
+ return ToVectorTy(Scalar, ElementCount::getFixed(VF));
}
/// Identify if the intrinsic is trivially vectorizable.
@@ -353,7 +358,7 @@ int getSplatIndex(ArrayRef<int> Mask);
/// Get splat value if the input is a splat vector or return nullptr.
/// The value may be extracted from a splat constants vector or from
/// a sequence of instructions that broadcast a single value into a vector.
-const Value *getSplatValue(const Value *V);
+Value *getSplatValue(const Value *V);
/// Return true if each element of the vector value \p V is poisoned or equal to
/// every other non-poisoned element. If an index element is specified, either
@@ -539,20 +544,20 @@ createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
/// elements, it will be padded with undefs.
Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
-/// Given a mask vector of the form <Y x i1>, Return true if all of the
-/// elements of this predicate mask are false or undef. That is, return true
-/// if all lanes can be assumed inactive.
+/// Given a mask vector of i1, Return true if all of the elements of this
+/// predicate mask are known to be false or undef. That is, return true if all
+/// lanes can be assumed inactive.
bool maskIsAllZeroOrUndef(Value *Mask);
-/// Given a mask vector of the form <Y x i1>, Return true if all of the
-/// elements of this predicate mask are true or undef. That is, return true
-/// if all lanes can be assumed active.
+/// Given a mask vector of i1, Return true if all of the elements of this
+/// predicate mask are known to be true or undef. That is, return true if all
+/// lanes can be assumed active.
bool maskIsAllOneOrUndef(Value *Mask);
/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
/// for each lane which may be active.
APInt possiblyDemandedEltsInMask(Value *Mask);
-
+
/// The group of interleaved loads/stores sharing the same stride and
/// close to each other.
///
@@ -615,6 +620,11 @@ public:
return false;
int32_t Key = *MaybeKey;
+ // Skip if the key is used for either the tombstone or empty special values.
+ if (DenseMapInfo<int32_t>::getTombstoneKey() == Key ||
+ DenseMapInfo<int32_t>::getEmptyKey() == Key)
+ return false;
+
// Skip if there is already a member with the same index.
if (Members.find(Key) != Members.end())
return false;
@@ -650,11 +660,7 @@ public:
/// \returns nullptr if contains no such member.
InstTy *getMember(uint32_t Index) const {
int32_t Key = SmallestKey + Index;
- auto Member = Members.find(Key);
- if (Member == Members.end())
- return nullptr;
-
- return Member->second;
+ return Members.lookup(Key);
}
/// Get the index for the given member. Unlike the key in the member
@@ -772,9 +778,7 @@ public:
/// \returns nullptr if doesn't have such group.
InterleaveGroup<Instruction> *
getInterleaveGroup(const Instruction *Instr) const {
- if (InterleaveGroupMap.count(Instr))
- return InterleaveGroupMap.find(Instr)->second;
- return nullptr;
+ return InterleaveGroupMap.lookup(Instr);
}
iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>>
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h
index 1919d7f0dece..716d649f7c51 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h
@@ -311,6 +311,7 @@ enum SectionCharacteristics : uint32_t {
IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000,
IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000,
IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000,
+ IMAGE_SCN_ALIGN_MASK = 0x00F00000,
IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000,
IMAGE_SCN_MEM_DISCARDABLE = 0x02000000,
IMAGE_SCN_MEM_NOT_CACHED = 0x04000000,
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def
index f0337ef4fb54..f69877bb50df 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -17,7 +17,7 @@
defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \
defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE || \
defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO || \
- defined HANDLE_MACRO_FLAG || \
+ defined HANDLE_DW_MACRO_GNU || defined HANDLE_MACRO_FLAG || \
defined HANDLE_DW_RLE || defined HANDLE_DW_LLE || \
(defined HANDLE_DW_CFA && defined HANDLE_DW_CFA_PRED) || \
defined HANDLE_DW_APPLE_PROPERTY || defined HANDLE_DW_UT || \
@@ -88,6 +88,10 @@
#define HANDLE_DW_MACRO(ID, NAME)
#endif
+#ifndef HANDLE_DW_MACRO_GNU
+#define HANDLE_DW_MACRO_GNU(ID, NAME)
+#endif
+
#ifndef HANDLE_MACRO_FLAG
#define HANDLE_MACRO_FLAG(ID, NAME)
#endif
@@ -837,6 +841,18 @@ HANDLE_DW_MACRO(0x0a, import_sup)
HANDLE_DW_MACRO(0x0b, define_strx)
HANDLE_DW_MACRO(0x0c, undef_strx)
+// GNU .debug_macro extension.
+HANDLE_DW_MACRO_GNU(0x01, define)
+HANDLE_DW_MACRO_GNU(0x02, undef)
+HANDLE_DW_MACRO_GNU(0x03, start_file)
+HANDLE_DW_MACRO_GNU(0x04, end_file)
+HANDLE_DW_MACRO_GNU(0x05, define_indirect)
+HANDLE_DW_MACRO_GNU(0x06, undef_indirect)
+HANDLE_DW_MACRO_GNU(0x07, transparent_include)
+HANDLE_DW_MACRO_GNU(0x08, define_indirect_alt)
+HANDLE_DW_MACRO_GNU(0x09, undef_indirect_alt)
+HANDLE_DW_MACRO_GNU(0x0a, transparent_include_alt)
+
// DWARF v5 Macro header flags.
HANDLE_MACRO_FLAG(0x01, OFFSET_SIZE)
HANDLE_MACRO_FLAG(0x02, DEBUG_LINE_OFFSET)
@@ -986,6 +1002,7 @@ HANDLE_DW_SECT(8, RNGLISTS)
#undef HANDLE_DW_LNE
#undef HANDLE_DW_LNCT
#undef HANDLE_DW_MACRO
+#undef HANDLE_DW_MACRO_GNU
#undef HANDLE_MACRO_FLAG
#undef HANDLE_DW_RLE
#undef HANDLE_DW_LLE
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h
index 4e8b708f39bb..cafc5be686bc 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -27,6 +27,8 @@
#include "llvm/Support/FormatVariadicDetails.h"
#include "llvm/ADT/Triple.h"
+#include <limits>
+
namespace llvm {
class StringRef;
@@ -118,10 +120,11 @@ enum LocationAtom {
#include "llvm/BinaryFormat/Dwarf.def"
DW_OP_lo_user = 0xe0,
DW_OP_hi_user = 0xff,
- DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata.
- DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata.
- DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata.
- DW_OP_LLVM_entry_value = 0x1003, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_entry_value = 0x1003, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_implicit_pointer = 0x1004, ///< Only used in LLVM metadata.
};
enum TypeKind : uint8_t {
@@ -183,6 +186,7 @@ enum SourceLanguage {
};
inline bool isCPlusPlus(SourceLanguage S) {
+ bool result = false;
// Deliberately enumerate all the language options so we get a warning when
// new language options are added (-Wswitch) that'll hopefully help keep this
// switch up-to-date when new C++ versions are added.
@@ -191,7 +195,8 @@ inline bool isCPlusPlus(SourceLanguage S) {
case DW_LANG_C_plus_plus_03:
case DW_LANG_C_plus_plus_11:
case DW_LANG_C_plus_plus_14:
- return true;
+ result = true;
+ break;
case DW_LANG_C89:
case DW_LANG_C:
case DW_LANG_Ada83:
@@ -230,9 +235,68 @@ inline bool isCPlusPlus(SourceLanguage S) {
case DW_LANG_BORLAND_Delphi:
case DW_LANG_lo_user:
case DW_LANG_hi_user:
- return false;
+ result = false;
+ break;
}
- llvm_unreachable("Invalid source language");
+
+ return result;
+}
+
+inline bool isFortran(SourceLanguage S) {
+ bool result = false;
+ // Deliberately enumerate all the language options so we get a warning when
+ // new language options are added (-Wswitch) that'll hopefully help keep this
+ // switch up-to-date when new Fortran versions are added.
+ switch (S) {
+ case DW_LANG_Fortran77:
+ case DW_LANG_Fortran90:
+ case DW_LANG_Fortran95:
+ case DW_LANG_Fortran03:
+ case DW_LANG_Fortran08:
+ result = true;
+ break;
+ case DW_LANG_C89:
+ case DW_LANG_C:
+ case DW_LANG_Ada83:
+ case DW_LANG_C_plus_plus:
+ case DW_LANG_Cobol74:
+ case DW_LANG_Cobol85:
+ case DW_LANG_Pascal83:
+ case DW_LANG_Modula2:
+ case DW_LANG_Java:
+ case DW_LANG_C99:
+ case DW_LANG_Ada95:
+ case DW_LANG_PLI:
+ case DW_LANG_ObjC:
+ case DW_LANG_ObjC_plus_plus:
+ case DW_LANG_UPC:
+ case DW_LANG_D:
+ case DW_LANG_Python:
+ case DW_LANG_OpenCL:
+ case DW_LANG_Go:
+ case DW_LANG_Modula3:
+ case DW_LANG_Haskell:
+ case DW_LANG_C_plus_plus_03:
+ case DW_LANG_C_plus_plus_11:
+ case DW_LANG_OCaml:
+ case DW_LANG_Rust:
+ case DW_LANG_C11:
+ case DW_LANG_Swift:
+ case DW_LANG_Julia:
+ case DW_LANG_Dylan:
+ case DW_LANG_C_plus_plus_14:
+ case DW_LANG_RenderScript:
+ case DW_LANG_BLISS:
+ case DW_LANG_Mips_Assembler:
+ case DW_LANG_GOOGLE_RenderScript:
+ case DW_LANG_BORLAND_Delphi:
+ case DW_LANG_lo_user:
+ case DW_LANG_hi_user:
+ result = false;
+ break;
+ }
+
+ return result;
}
enum CaseSensitivity {
@@ -309,6 +373,14 @@ enum MacroEntryType {
DW_MACRO_hi_user = 0xff
};
+/// GNU .debug_macro macro information entry type encodings.
+enum GnuMacroEntryType {
+#define HANDLE_DW_MACRO_GNU(ID, NAME) DW_MACRO_GNU_##NAME = ID,
+#include "llvm/BinaryFormat/Dwarf.def"
+ DW_MACRO_GNU_lo_user = 0xe0,
+ DW_MACRO_GNU_hi_user = 0xff
+};
+
/// DWARF v5 range list entry encoding values.
enum RnglistEntries {
#define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID,
@@ -472,6 +544,7 @@ StringRef LNStandardString(unsigned Standard);
StringRef LNExtendedString(unsigned Encoding);
StringRef MacinfoString(unsigned Encoding);
StringRef MacroString(unsigned Encoding);
+StringRef GnuMacroString(unsigned Encoding);
StringRef RangeListEncodingString(unsigned Encoding);
StringRef LocListEncodingString(unsigned Encoding);
StringRef CallFrameString(unsigned Encoding, Triple::ArchType Arch);
@@ -483,6 +556,7 @@ StringRef GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage);
StringRef IndexString(unsigned Idx);
StringRef FormatString(DwarfFormat Format);
StringRef FormatString(bool IsDWARF64);
+StringRef RLEString(unsigned RLE);
/// @}
/// \defgroup DwarfConstantsParsing Dwarf constants parsing functions
@@ -674,6 +748,11 @@ template <> struct EnumTraits<LocationAtom> : public std::true_type {
static constexpr char Type[3] = "OP";
static constexpr StringRef (*StringFn)(unsigned) = &OperationEncodingString;
};
+
+inline uint64_t computeTombstoneAddress(uint8_t AddressByteSize) {
+ return std::numeric_limits<uint64_t>::max() >> (8 - AddressByteSize) * 8;
+}
+
} // End of namespace dwarf
/// Dwarf constants format_provider
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/DynamicTags.def b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/DynamicTags.def
index aec408bd2d72..c08f8a53bdb5 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/DynamicTags.def
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/DynamicTags.def
@@ -120,6 +120,7 @@ DYNAMIC_TAG(VERNEEDNUM, 0X6FFFFFFF) // The number of entries in DT_VERNEED.
// AArch64 specific dynamic table entries
AARCH64_DYNAMIC_TAG(AARCH64_BTI_PLT, 0x70000001)
AARCH64_DYNAMIC_TAG(AARCH64_PAC_PLT, 0x70000003)
+AARCH64_DYNAMIC_TAG(AARCH64_VARIANT_PCS, 0x70000005)
// Hexagon specific dynamic table entries
HEXAGON_DYNAMIC_TAG(HEXAGON_SYMSZ, 0x70000000)
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h
index 21a5c26883cd..1552303b610c 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h
@@ -107,13 +107,17 @@ struct Elf64_Ehdr {
unsigned char getDataEncoding() const { return e_ident[EI_DATA]; }
};
-// File types
+// File types.
+// See current registered ELF types at:
+// http://www.sco.com/developers/gabi/latest/ch4.eheader.html
enum {
ET_NONE = 0, // No file type
ET_REL = 1, // Relocatable file
ET_EXEC = 2, // Executable file
ET_DYN = 3, // Shared object file
ET_CORE = 4, // Core file
+ ET_LOOS = 0xfe00, // Beginning of operating system-specific codes
+ ET_HIOS = 0xfeff, // Operating system-specific
ET_LOPROC = 0xff00, // Beginning of processor-specific codes
ET_HIPROC = 0xffff // Processor-specific
};
@@ -312,6 +316,7 @@ enum {
EM_LANAI = 244, // Lanai 32-bit processor
EM_BPF = 247, // Linux kernel bpf virtual machine
EM_VE = 251, // NEC SX-Aurora VE
+ EM_CSKY = 252, // C-SKY 32-bit processor
};
// Object file classes.
@@ -359,6 +364,14 @@ enum {
ELFOSABI_LAST_ARCH = 255 // Last Architecture-specific OS ABI
};
+// AMDGPU OS ABI Version identification.
+enum {
+ // ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
+ // was never defined for V1.
+ ELFABIVERSION_AMDGPU_HSA_V2 = 0,
+ ELFABIVERSION_AMDGPU_HSA_V3 = 1,
+};
+
#define ELF_RELOC(name, value) name = value,
// X86_64 relocations.
@@ -686,41 +699,39 @@ enum : unsigned {
EF_AMDGPU_MACH_R600_LAST = EF_AMDGPU_MACH_R600_TURKS,
// AMDGCN-based processors.
-
- // AMDGCN GFX6.
- EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
- EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
- // AMDGCN GFX7.
- EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
- EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023,
- EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024,
- EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025,
- EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026,
- // AMDGCN GFX8.
- EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
- EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029,
- EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
- EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b,
- // AMDGCN GFX9.
- EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
- EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
- EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
- EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
- EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
- EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
- // AMDGCN GFX10.
- EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
- EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
- EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
- EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
-
- // Reserved for AMDGCN-based processors.
- EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
- EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x032,
+ EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
+ EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
+ EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
+ EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023,
+ EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024,
+ EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025,
+ EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026,
+ EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27 = 0x027,
+ EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
+ EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029,
+ EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
+ EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b,
+ EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
+ EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
+ EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
+ EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
+ EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
+ EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
+ EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032,
+ EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
+ EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
+ EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
+ EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
+ EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037,
+ EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038,
+ EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039,
+ EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a,
+ EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b,
+ EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c,
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1030,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX805,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
@@ -777,6 +788,12 @@ enum {
#include "ELFRelocs/VE.def"
};
+
+// ELF Relocation types for CSKY
+enum {
+#include "ELFRelocs/CSKY.def"
+};
+
#undef ELF_RELOC
// Section header.
@@ -854,10 +871,11 @@ enum : unsigned {
SHT_LLVM_ADDRSIG = 0x6fff4c03, // List of address-significant symbols
// for safe ICF.
SHT_LLVM_DEPENDENT_LIBRARIES =
- 0x6fff4c04, // LLVM Dependent Library Specifiers.
- SHT_LLVM_SYMPART = 0x6fff4c05, // Symbol partition specification.
- SHT_LLVM_PART_EHDR = 0x6fff4c06, // ELF header for loadable partition.
- SHT_LLVM_PART_PHDR = 0x6fff4c07, // Phdrs for loadable partition.
+ 0x6fff4c04, // LLVM Dependent Library Specifiers.
+ SHT_LLVM_SYMPART = 0x6fff4c05, // Symbol partition specification.
+ SHT_LLVM_PART_EHDR = 0x6fff4c06, // ELF header for loadable partition.
+ SHT_LLVM_PART_PHDR = 0x6fff4c07, // Phdrs for loadable partition.
+ SHT_LLVM_BB_ADDR_MAP = 0x6fff4c08, // LLVM Basic Block Address Map.
// Android's experimental support for SHT_RELR sections.
// https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512
SHT_ANDROID_RELR = 0x6fffff00, // Relocation entries; only offsets.
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def
new file mode 100644
index 000000000000..c5f2dbae8033
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def
@@ -0,0 +1,74 @@
+
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+ELF_RELOC(R_CKCORE_NONE, 0)
+ELF_RELOC(R_CKCORE_ADDR32, 1)
+ELF_RELOC(R_CKCORE_PCREL_IMM8_4, 2)
+ELF_RELOC(R_CKCORE_PCREL_IMM11_2, 3)
+ELF_RELOC(R_CKCORE_PCREL_IMM4_2, 4)
+ELF_RELOC(R_CKCORE_PCREL32, 5)
+ELF_RELOC(R_CKCORE_PCREL_JSR_IMM11_2, 6)
+ELF_RELOC(R_CKCORE_GNU_VTINHERIT, 7)
+ELF_RELOC(R_CKCORE_GNU_VTENTRY, 8)
+ELF_RELOC(R_CKCORE_RELATIVE, 9)
+ELF_RELOC(R_CKCORE_COPY, 10)
+ELF_RELOC(R_CKCORE_GLOB_DAT, 11)
+ELF_RELOC(R_CKCORE_JUMP_SLOT, 12)
+ELF_RELOC(R_CKCORE_GOTOFF, 13)
+ELF_RELOC(R_CKCORE_GOTPC, 14)
+ELF_RELOC(R_CKCORE_GOT32, 15)
+ELF_RELOC(R_CKCORE_PLT32, 16)
+ELF_RELOC(R_CKCORE_ADDRGOT, 17)
+ELF_RELOC(R_CKCORE_ADDRPLT, 18)
+ELF_RELOC(R_CKCORE_PCREL_IMM26_2, 19)
+ELF_RELOC(R_CKCORE_PCREL_IMM16_2, 20)
+ELF_RELOC(R_CKCORE_PCREL_IMM16_4, 21)
+ELF_RELOC(R_CKCORE_PCREL_IMM10_2, 22)
+ELF_RELOC(R_CKCORE_PCREL_IMM10_4, 23)
+ELF_RELOC(R_CKCORE_ADDR_HI16, 24)
+ELF_RELOC(R_CKCORE_ADDR_LO16, 25)
+ELF_RELOC(R_CKCORE_GOTPC_HI16, 26)
+ELF_RELOC(R_CKCORE_GOTPC_LO16, 27)
+ELF_RELOC(R_CKCORE_GOTOFF_HI16, 28)
+ELF_RELOC(R_CKCORE_GOTOFF_LO16, 29)
+ELF_RELOC(R_CKCORE_GOT12, 30)
+ELF_RELOC(R_CKCORE_GOT_HI16, 31)
+ELF_RELOC(R_CKCORE_GOT_LO16, 32)
+ELF_RELOC(R_CKCORE_PLT12, 33)
+ELF_RELOC(R_CKCORE_PLT_HI16, 34)
+ELF_RELOC(R_CKCORE_PLT_LO16, 35)
+ELF_RELOC(R_CKCORE_ADDRGOT_HI16, 36)
+ELF_RELOC(R_CKCORE_ADDRGOT_LO16, 37)
+ELF_RELOC(R_CKCORE_ADDRPLT_HI16, 38)
+ELF_RELOC(R_CKCORE_ADDRPLT_LO16, 39)
+ELF_RELOC(R_CKCORE_PCREL_JSR_IMM26_2, 40)
+ELF_RELOC(R_CKCORE_TOFFSET_LO16, 41)
+ELF_RELOC(R_CKCORE_DOFFSET_LO16, 42)
+ELF_RELOC(R_CKCORE_PCREL_IMM18_2, 43)
+ELF_RELOC(R_CKCORE_DOFFSET_IMM18, 44)
+ELF_RELOC(R_CKCORE_DOFFSET_IMM18_2, 45)
+ELF_RELOC(R_CKCORE_DOFFSET_IMM18_4, 46)
+ELF_RELOC(R_CKCORE_GOTOFF_IMM18, 47)
+ELF_RELOC(R_CKCORE_GOT_IMM18_4, 48)
+ELF_RELOC(R_CKCORE_PLT_IMM18_4, 49)
+ELF_RELOC(R_CKCORE_PCREL_IMM7_4, 50)
+ELF_RELOC(R_CKCORE_TLS_LE32, 51)
+ELF_RELOC(R_CKCORE_TLS_IE32, 52)
+ELF_RELOC(R_CKCORE_TLS_GD32, 53)
+ELF_RELOC(R_CKCORE_TLS_LDM32, 54)
+ELF_RELOC(R_CKCORE_TLS_LDO32, 55)
+ELF_RELOC(R_CKCORE_TLS_DTPMOD32, 56)
+ELF_RELOC(R_CKCORE_TLS_DTPOFF32, 57)
+ELF_RELOC(R_CKCORE_TLS_TPOFF32, 58)
+ELF_RELOC(R_CKCORE_PCREL_FLRW_IMM8_4, 59)
+ELF_RELOC(R_CKCORE_NOJSRI, 60)
+ELF_RELOC(R_CKCORE_CALLGRAPH, 61)
+ELF_RELOC(R_CKCORE_IRELATIVE, 62)
+ELF_RELOC(R_CKCORE_PCREL_BLOOP_IMM4_4, 63)
+ELF_RELOC(R_CKCORE_PCREL_BLOOP_IMM12_4, 64)
+ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_1, 65)
+ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_2, 66)
+ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_4, 67)
+ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_8, 68)
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
index e28c9caaefaf..0422aa0606d7 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
@@ -97,8 +97,14 @@
#undef R_PPC64_DTPREL16_HIGH
#undef R_PPC64_DTPREL16_HIGHA
#undef R_PPC64_REL24_NOTOC
+#undef R_PPC64_PCREL_OPT
#undef R_PPC64_PCREL34
#undef R_PPC64_GOT_PCREL34
+#undef R_PPC64_TPREL34
+#undef R_PPC64_DTPREL34
+#undef R_PPC64_GOT_TLSGD_PCREL34
+#undef R_PPC64_GOT_TLSLD_PCREL34
+#undef R_PPC64_GOT_TPREL_PCREL34
#undef R_PPC64_IRELATIVE
#undef R_PPC64_REL16
#undef R_PPC64_REL16_LO
@@ -194,8 +200,14 @@ ELF_RELOC(R_PPC64_TPREL16_HIGHA, 113)
ELF_RELOC(R_PPC64_DTPREL16_HIGH, 114)
ELF_RELOC(R_PPC64_DTPREL16_HIGHA, 115)
ELF_RELOC(R_PPC64_REL24_NOTOC, 116)
+ELF_RELOC(R_PPC64_PCREL_OPT, 123)
ELF_RELOC(R_PPC64_PCREL34, 132)
ELF_RELOC(R_PPC64_GOT_PCREL34, 133)
+ELF_RELOC(R_PPC64_TPREL34, 146)
+ELF_RELOC(R_PPC64_DTPREL34, 147)
+ELF_RELOC(R_PPC64_GOT_TLSGD_PCREL34, 148)
+ELF_RELOC(R_PPC64_GOT_TLSLD_PCREL34, 149)
+ELF_RELOC(R_PPC64_GOT_TPREL_PCREL34, 150)
ELF_RELOC(R_PPC64_IRELATIVE, 248)
ELF_RELOC(R_PPC64_REL16, 249)
ELF_RELOC(R_PPC64_REL16_LO, 250)
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/MachO.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/MachO.h
index e43fea0a2465..f5d5ec328b5e 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/MachO.h
@@ -83,6 +83,7 @@ enum {
MH_NO_HEAP_EXECUTION = 0x01000000u,
MH_APP_EXTENSION_SAFE = 0x02000000u,
MH_NLIST_OUTOFSYNC_WITH_DYLDINFO = 0x04000000u,
+ MH_SIM_SUPPORT = 0x08000000u,
MH_DYLIB_IN_CACHE = 0x80000000u,
};
@@ -495,7 +496,8 @@ enum PlatformType {
PLATFORM_MACCATALYST = 6,
PLATFORM_IOSSIMULATOR = 7,
PLATFORM_TVOSSIMULATOR = 8,
- PLATFORM_WATCHOSSIMULATOR = 9
+ PLATFORM_WATCHOSSIMULATOR = 9,
+ PLATFORM_DRIVERKIT = 10,
};
// Values for tools enum in build_tool_version.
@@ -1492,6 +1494,7 @@ enum CPUSubTypeARM {
enum CPUSubTypeARM64 {
CPU_SUBTYPE_ARM64_ALL = 0,
+ CPU_SUBTYPE_ARM64_V8 = 1,
CPU_SUBTYPE_ARM64E = 2,
};
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h
index 1aca692e30a7..063c6a3f9449 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -41,7 +41,7 @@ struct WasmDylinkInfo {
uint32_t MemoryAlignment; // P2 alignment of memory
uint32_t TableSize; // Table size in elements
uint32_t TableAlignment; // P2 alignment of table
- std::vector<StringRef> Needed; // Shared library depenedencies
+ std::vector<StringRef> Needed; // Shared library dependencies
};
struct WasmProducerInfo {
@@ -67,11 +67,17 @@ struct WasmLimits {
uint64_t Maximum;
};
-struct WasmTable {
+struct WasmTableType {
uint8_t ElemType;
WasmLimits Limits;
};
+struct WasmTable {
+ uint32_t Index;
+ WasmTableType Type;
+ StringRef SymbolName; // from the "linking" section
+};
+
struct WasmInitExpr {
uint8_t Opcode;
union {
@@ -114,7 +120,7 @@ struct WasmImport {
union {
uint32_t SigIndex;
WasmGlobalType Global;
- WasmTable Table;
+ WasmTableType Table;
WasmLimits Memory;
WasmEventType Event;
};
@@ -140,8 +146,11 @@ struct WasmFunction {
struct WasmDataSegment {
uint32_t InitFlags;
- uint32_t MemoryIndex; // present if InitFlags & WASM_SEGMENT_HAS_MEMINDEX
- WasmInitExpr Offset; // present if InitFlags & WASM_SEGMENT_IS_PASSIVE == 0
+ // Present if InitFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX.
+ uint32_t MemoryIndex;
+ // Present if InitFlags & WASM_DATA_SEGMENT_IS_PASSIVE == 0.
+ WasmInitExpr Offset;
+
ArrayRef<uint8_t> Content;
StringRef Name; // from the "segment info" section
uint32_t Alignment;
@@ -186,15 +195,22 @@ struct WasmSymbolInfo {
// For symbols to be exported from the final module
Optional<StringRef> ExportName;
union {
- // For function or global symbols, the index in function or global index
- // space.
+ // For function, table, or global symbols, the index in function, table, or
+ // global index space.
uint32_t ElementIndex;
// For a data symbols, the address of the data relative to segment.
WasmDataReference DataRef;
};
};
-struct WasmFunctionName {
+enum class NameType {
+ FUNCTION,
+ GLOBAL,
+ DATA_SEGMENT,
+};
+
+struct WasmDebugName {
+ NameType Type;
uint32_t Index;
StringRef Name;
};
@@ -231,7 +247,6 @@ enum : unsigned {
WASM_TYPE_F64 = 0x7C,
WASM_TYPE_V128 = 0x7B,
WASM_TYPE_FUNCREF = 0x70,
- WASM_TYPE_EXNREF = 0x68,
WASM_TYPE_EXTERNREF = 0x6F,
WASM_TYPE_FUNC = 0x60,
WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
@@ -251,6 +266,7 @@ enum : unsigned {
WASM_OPCODE_END = 0x0b,
WASM_OPCODE_CALL = 0x10,
WASM_OPCODE_LOCAL_GET = 0x20,
+ WASM_OPCODE_LOCAL_SET = 0x21,
WASM_OPCODE_GLOBAL_GET = 0x23,
WASM_OPCODE_GLOBAL_SET = 0x24,
WASM_OPCODE_I32_STORE = 0x36,
@@ -287,8 +303,8 @@ enum : unsigned {
};
enum : unsigned {
- WASM_SEGMENT_IS_PASSIVE = 0x01,
- WASM_SEGMENT_HAS_MEMINDEX = 0x02,
+ WASM_DATA_SEGMENT_IS_PASSIVE = 0x01,
+ WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02,
};
// Feature policy prefixes used in the custom "target_features" section
@@ -300,8 +316,10 @@ enum : uint8_t {
// Kind codes used in the custom "name" section
enum : unsigned {
- WASM_NAMES_FUNCTION = 0x1,
- WASM_NAMES_LOCAL = 0x2,
+ WASM_NAMES_FUNCTION = 1,
+ WASM_NAMES_LOCAL = 2,
+ WASM_NAMES_GLOBAL = 7,
+ WASM_NAMES_DATA_SEGMENT = 9,
};
// Kind codes used in the custom "linking" section
@@ -316,6 +334,8 @@ enum : unsigned {
enum : unsigned {
WASM_COMDAT_DATA = 0x0,
WASM_COMDAT_FUNCTION = 0x1,
+ // GLOBAL, EVENT, and TABLE are in here but LLVM doesn't use them yet.
+ WASM_COMDAT_SECTION = 0x5,
};
// Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE
@@ -325,6 +345,7 @@ enum WasmSymbolType : unsigned {
WASM_SYMBOL_TYPE_GLOBAL = 0x2,
WASM_SYMBOL_TYPE_SECTION = 0x3,
WASM_SYMBOL_TYPE_EVENT = 0x4,
+ WASM_SYMBOL_TYPE_TABLE = 0x5,
};
// Kinds of event attributes.
@@ -360,7 +381,7 @@ enum class ValType {
F32 = WASM_TYPE_F32,
F64 = WASM_TYPE_F64,
V128 = WASM_TYPE_V128,
- EXNREF = WASM_TYPE_EXNREF,
+ FUNCREF = WASM_TYPE_FUNCREF,
EXTERNREF = WASM_TYPE_EXTERNREF,
};
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/WasmRelocs.def
index 05c5147e6314..dca63eca9455 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/WasmRelocs.def
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/WasmRelocs.def
@@ -20,3 +20,8 @@ WASM_RELOC(R_WASM_MEMORY_ADDR_LEB64, 14)
WASM_RELOC(R_WASM_MEMORY_ADDR_SLEB64, 15)
WASM_RELOC(R_WASM_MEMORY_ADDR_I64, 16)
WASM_RELOC(R_WASM_MEMORY_ADDR_REL_SLEB64, 17)
+WASM_RELOC(R_WASM_TABLE_INDEX_SLEB64, 18)
+WASM_RELOC(R_WASM_TABLE_INDEX_I64, 19)
+WASM_RELOC(R_WASM_TABLE_NUMBER_LEB, 20)
+WASM_RELOC(R_WASM_MEMORY_ADDR_TLS_SLEB, 21)
+WASM_RELOC(R_WASM_FUNCTION_OFFSET_I64, 22)
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/WasmTraits.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/WasmTraits.h
index 3eee8e71b187..e34182499187 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/WasmTraits.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/WasmTraits.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_OBJECT_WASMTRAITS_H
-#define LLVM_OBJECT_WASMTRAITS_H
+#ifndef LLVM_BINARYFORMAT_WASMTRAITS_H
+#define LLVM_BINARYFORMAT_WASMTRAITS_H
#include "llvm/ADT/Hashing.h"
#include "llvm/BinaryFormat/Wasm.h"
@@ -65,4 +65,4 @@ template <> struct DenseMapInfo<wasm::WasmGlobalType> {
} // end namespace llvm
-#endif // LLVM_OBJECT_WASMTRAITS_H
+#endif // LLVM_BINARYFORMAT_WASMTRAITS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h
index 5a7ce80a2f62..48e1baf72689 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h
@@ -18,6 +18,7 @@
namespace llvm {
class StringRef;
+template <unsigned> class SmallString;
namespace XCOFF {
@@ -28,6 +29,7 @@ constexpr size_t NameSize = 8;
constexpr size_t SymbolTableEntrySize = 18;
constexpr size_t RelocationSerializationSize32 = 10;
constexpr uint16_t RelocOverflow = 65535;
+constexpr uint8_t AllocRegNo = 31;
enum ReservedSectionNum : int16_t { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 };
@@ -294,6 +296,115 @@ enum CFileCpuId : uint8_t {
StringRef getMappingClassString(XCOFF::StorageMappingClass SMC);
StringRef getRelocationTypeString(XCOFF::RelocationType Type);
+SmallString<32> parseParmsType(uint32_t Value, unsigned ParmsNum);
+
+struct TracebackTable {
+ enum LanguageID : uint8_t {
+ C,
+ Fortran,
+ Pascal,
+ Ada,
+ PL1,
+ Basic,
+ Lisp,
+ Cobol,
+ Modula2,
+ CPlusPlus,
+ Rpg,
+ PL8,
+ PLIX = PL8,
+ Assembly,
+ Java,
+ ObjectiveC
+ };
+ // Byte 1
+ static constexpr uint32_t VersionMask = 0xFF00'0000;
+ static constexpr uint8_t VersionShift = 24;
+
+ // Byte 2
+ static constexpr uint32_t LanguageIdMask = 0x00FF'0000;
+ static constexpr uint8_t LanguageIdShift = 16;
+
+ // Byte 3
+ static constexpr uint32_t IsGlobaLinkageMask = 0x0000'8000;
+ static constexpr uint32_t IsOutOfLineEpilogOrPrologueMask = 0x0000'4000;
+ static constexpr uint32_t HasTraceBackTableOffsetMask = 0x0000'2000;
+ static constexpr uint32_t IsInternalProcedureMask = 0x0000'1000;
+ static constexpr uint32_t HasControlledStorageMask = 0x0000'0800;
+ static constexpr uint32_t IsTOClessMask = 0x0000'0400;
+ static constexpr uint32_t IsFloatingPointPresentMask = 0x0000'0200;
+ static constexpr uint32_t IsFloatingPointOperationLogOrAbortEnabledMask =
+ 0x0000'0100;
+
+ // Byte 4
+ static constexpr uint32_t IsInterruptHandlerMask = 0x0000'0080;
+ static constexpr uint32_t IsFunctionNamePresentMask = 0x0000'0040;
+ static constexpr uint32_t IsAllocaUsedMask = 0x0000'0020;
+ static constexpr uint32_t OnConditionDirectiveMask = 0x0000'001C;
+ static constexpr uint32_t IsCRSavedMask = 0x0000'0002;
+ static constexpr uint32_t IsLRSavedMask = 0x0000'0001;
+ static constexpr uint8_t OnConditionDirectiveShift = 2;
+
+ // Byte 5
+ static constexpr uint32_t IsBackChainStoredMask = 0x8000'0000;
+ static constexpr uint32_t IsFixupMask = 0x4000'0000;
+ static constexpr uint32_t FPRSavedMask = 0x3F00'0000;
+ static constexpr uint32_t FPRSavedShift = 24;
+
+ // Byte 6
+ static constexpr uint32_t HasVectorInfoMask = 0x0080'0000;
+ static constexpr uint32_t HasExtensionTableMask = 0x0040'0000;
+ static constexpr uint32_t GPRSavedMask = 0x003F'0000;
+ static constexpr uint32_t GPRSavedShift = 16;
+
+ // Byte 7
+ static constexpr uint32_t NumberOfFixedParmsMask = 0x0000'FF00;
+ static constexpr uint8_t NumberOfFixedParmsShift = 8;
+
+ // Byte 8
+ static constexpr uint32_t NumberOfFloatingPointParmsMask = 0x0000'00FE;
+ static constexpr uint32_t HasParmsOnStackMask = 0x0000'0001;
+ static constexpr uint8_t NumberOfFloatingPointParmsShift = 1;
+
+ // Masks to select leftmost bits for decoding parameter type information.
+ // Bit to use when vector info is not presented.
+ static constexpr uint32_t ParmTypeIsFloatingBit = 0x8000'0000;
+ static constexpr uint32_t ParmTypeFloatingIsDoubleBit = 0x4000'0000;
+ // Bits to use when vector info is presented.
+ static constexpr uint32_t ParmTypeIsFixedBits = 0x0000'0000;
+ static constexpr uint32_t ParmTypeIsVectorBits = 0x4000'0000;
+ static constexpr uint32_t ParmTypeIsFloatingBits = 0x8000'0000;
+ static constexpr uint32_t ParmTypeIsDoubleBits = 0xC000'0000;
+ static constexpr uint32_t ParmTypeMask = 0xC000'0000;
+
+ // Vector extension
+ static constexpr uint16_t NumberOfVRSavedMask = 0xFC00;
+ static constexpr uint16_t IsVRSavedOnStackMask = 0x0200;
+ static constexpr uint16_t HasVarArgsMask = 0x0100;
+ static constexpr uint8_t NumberOfVRSavedShift = 10;
+
+ static constexpr uint16_t NumberOfVectorParmsMask = 0x00FE;
+ static constexpr uint16_t HasVMXInstructionMask = 0x0001;
+ static constexpr uint8_t NumberOfVectorParmsShift = 1;
+
+ static constexpr uint32_t ParmTypeIsVectorCharBit = 0x0000'0000;
+ static constexpr uint32_t ParmTypeIsVectorShortBit = 0x4000'0000;
+ static constexpr uint32_t ParmTypeIsVectorIntBit = 0x8000'0000;
+ static constexpr uint32_t ParmTypeIsVectorFloatBit = 0xC000'0000;
+};
+
+// Extended Traceback table flags.
+enum ExtendedTBTableFlag : uint8_t {
+ TB_OS1 = 0x80, ///< Reserved for OS use.
+ TB_RESERVED = 0x40, ///< Reserved for compiler.
+ TB_SSP_CANARY = 0x20, ///< stack smasher canary present on stack.
+ TB_OS2 = 0x10, ///< Reserved for OS use.
+ TB_EH_INFO = 0x08, ///< Exception handling info present.
+ TB_LONGTBTABLE2 = 0x01 ///< Additional tbtable extension exists.
+};
+
+StringRef getNameForTracebackTableLanguageId(TracebackTable::LanguageID LangId);
+SmallString<32> getExtendedTBTableFlagString(uint8_t Flag);
} // end namespace XCOFF
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeCommon.h b/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeCommon.h
new file mode 100644
index 000000000000..6a3e74550bc4
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeCommon.h
@@ -0,0 +1,30 @@
+//===- BitcodeCommon.h - Common code for encode/decode --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines common code to be used by BitcodeWriter and
+// BitcodeReader.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_BITCODECOMMON_H
+#define LLVM_BITCODE_BITCODECOMMON_H
+
+#include "llvm/ADT/Bitfields.h"
+
+namespace llvm {
+
+struct AllocaPackedValues {
+ using Align = Bitfield::Element<unsigned, 0, 5>;
+ using UsedWithInAlloca = Bitfield::Element<bool, Align::NextBit, 1>;
+ using ExplicitType = Bitfield::Element<bool, UsedWithInAlloca::NextBit, 1>;
+ using SwiftError = Bitfield::Element<bool, ExplicitType::NextBit, 1>;
+};
+
+} // namespace llvm
+
+#endif // LLVM_BITCODE_BITCODECOMMON_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeConvenience.h b/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeConvenience.h
new file mode 100644
index 000000000000..0060d014ba82
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeConvenience.h
@@ -0,0 +1,486 @@
+//===- llvm/Bitcode/BitcodeConvenience.h - Convenience Wrappers -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file Convenience wrappers for the LLVM bitcode format and bitstream APIs.
+///
+/// This allows you to use a sort of DSL to declare and use bitcode
+/// abbreviations and records. Example:
+///
+/// \code
+/// using Metadata = BCRecordLayout<
+/// METADATA_ID, // ID
+/// BCFixed<16>, // Module format major version
+/// BCFixed<16>, // Module format minor version
+/// BCBlob // misc. version information
+/// >;
+/// Metadata metadata(Out);
+/// metadata.emit(ScratchRecord, VERSION_MAJOR, VERSION_MINOR, Data);
+/// \endcode
+///
+/// For details on the bitcode format, see
+/// http://llvm.org/docs/BitCodeFormat.html
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_BITCODECONVENIENCE_H
+#define LLVM_BITCODE_BITCODECONVENIENCE_H
+
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Bitstream/BitstreamWriter.h"
+#include <cstdint>
+
+namespace llvm {
+namespace detail {
+/// Convenience base for all kinds of bitcode abbreviation fields.
+///
+/// This just defines common properties queried by the metaprogramming.
+template <bool Compound = false> class BCField {
+public:
+ static const bool IsCompound = Compound;
+
+ /// Asserts that the given data is a valid value for this field.
+ template <typename T> static void assertValid(const T &data) {}
+
+ /// Converts a raw numeric representation of this value to its preferred
+ /// type.
+ template <typename T> static T convert(T rawValue) { return rawValue; }
+};
+} // namespace detail
+
+/// Represents a literal operand in a bitcode record.
+///
+/// The value of a literal operand is the same for all instances of the record,
+/// so it is only emitted in the abbreviation definition.
+///
+/// Note that because this uses a compile-time template, you cannot have a
+/// literal operand that is fixed at run-time without dropping down to the
+/// raw LLVM APIs.
+template <uint64_t Value> class BCLiteral : public detail::BCField<> {
+public:
+ static void emitOp(llvm::BitCodeAbbrev &abbrev) {
+ abbrev.Add(llvm::BitCodeAbbrevOp(Value));
+ }
+
+ template <typename T> static void assertValid(const T &data) {
+ assert(data == Value && "data value does not match declared literal value");
+ }
+};
+
+/// Represents a fixed-width value in a bitcode record.
+///
+/// Note that the LLVM bitcode format only supports unsigned values.
+template <unsigned Width> class BCFixed : public detail::BCField<> {
+public:
+ static_assert(Width <= 64, "fixed-width field is too large");
+
+ static void emitOp(llvm::BitCodeAbbrev &abbrev) {
+ abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, Width));
+ }
+
+ static void assertValid(const bool &data) {
+ assert(llvm::isUInt<Width>(data) &&
+ "data value does not fit in the given bit width");
+ }
+
+ template <typename T> static void assertValid(const T &data) {
+ assert(data >= 0 && "cannot encode signed integers");
+ assert(llvm::isUInt<Width>(data) &&
+ "data value does not fit in the given bit width");
+ }
+};
+
+/// Represents a variable-width value in a bitcode record.
+///
+/// The \p Width parameter should include the continuation bit.
+///
+/// Note that the LLVM bitcode format only supports unsigned values.
+template <unsigned Width> class BCVBR : public detail::BCField<> {
+ static_assert(Width >= 2, "width does not have room for continuation bit");
+
+public:
+ static void emitOp(llvm::BitCodeAbbrev &abbrev) {
+ abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, Width));
+ }
+
+ template <typename T> static void assertValid(const T &data) {
+ assert(data >= 0 && "cannot encode signed integers");
+ }
+};
+
+/// Represents a character encoded in LLVM's Char6 encoding.
+///
+/// This format is suitable for encoding decimal numbers (without signs or
+/// exponents) and C identifiers (without dollar signs), but not much else.
+///
+/// \sa http://llvm.org/docs/BitCodeFormat.html#char6-encoded-value
+class BCChar6 : public detail::BCField<> {
+public:
+ static void emitOp(llvm::BitCodeAbbrev &abbrev) {
+ abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Char6));
+ }
+
+ template <typename T> static void assertValid(const T &data) {
+ assert(llvm::BitCodeAbbrevOp::isChar6(data) && "invalid Char6 data");
+ }
+
+ template <typename T> char convert(T rawValue) {
+ return static_cast<char>(rawValue);
+ }
+};
+
+/// Represents an untyped blob of bytes.
+///
+/// If present, this must be the last field in a record.
+class BCBlob : public detail::BCField<true> {
+public:
+ static void emitOp(llvm::BitCodeAbbrev &abbrev) {
+ abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
+ }
+};
+
+/// Represents an array of some other type.
+///
+/// If present, this must be the last field in a record.
+template <typename ElementTy> class BCArray : public detail::BCField<true> {
+ static_assert(!ElementTy::IsCompound, "arrays can only contain scalar types");
+
+public:
+ static void emitOp(llvm::BitCodeAbbrev &abbrev) {
+ abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
+ ElementTy::emitOp(abbrev);
+ }
+};
+
+namespace detail {
+/// Attaches the last field to an abbreviation.
+///
+/// This is the base case for \c emitOps.
+///
+/// \sa BCRecordLayout::emitAbbrev
+template <typename FieldTy> static void emitOps(llvm::BitCodeAbbrev &abbrev) {
+ FieldTy::emitOp(abbrev);
+}
+
+/// Attaches fields to an abbreviation.
+///
+/// This is the recursive case for \c emitOps.
+///
+/// \sa BCRecordLayout::emitAbbrev
+template <typename FieldTy, typename Next, typename... Rest>
+static void emitOps(llvm::BitCodeAbbrev &abbrev) {
+ static_assert(!FieldTy::IsCompound,
+ "arrays and blobs may not appear in the middle of a record");
+ FieldTy::emitOp(abbrev);
+ emitOps<Next, Rest...>(abbrev);
+}
+
+/// Helper class for dealing with a scalar element in the middle of a record.
+///
+/// \sa BCRecordLayout
+template <typename ElementTy, typename... Fields> class BCRecordCoding {
+public:
+ template <typename BufferTy, typename ElementDataTy, typename... DataTy>
+ static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer,
+ unsigned code, ElementDataTy element, DataTy &&...data) {
+ static_assert(!ElementTy::IsCompound,
+ "arrays and blobs may not appear in the middle of a record");
+ ElementTy::assertValid(element);
+ buffer.push_back(element);
+ BCRecordCoding<Fields...>::emit(Stream, buffer, code,
+ std::forward<DataTy>(data)...);
+ }
+
+ template <typename T, typename ElementDataTy, typename... DataTy>
+ static void read(ArrayRef<T> buffer, ElementDataTy &element,
+ DataTy &&...data) {
+ assert(!buffer.empty() && "too few elements in buffer");
+ element = ElementTy::convert(buffer.front());
+ BCRecordCoding<Fields...>::read(buffer.slice(1),
+ std::forward<DataTy>(data)...);
+ }
+
+ template <typename T, typename... DataTy>
+ static void read(ArrayRef<T> buffer, NoneType, DataTy &&...data) {
+ assert(!buffer.empty() && "too few elements in buffer");
+ BCRecordCoding<Fields...>::read(buffer.slice(1),
+ std::forward<DataTy>(data)...);
+ }
+};
+
+/// Helper class for dealing with a scalar element at the end of a record.
+///
+/// This has a separate implementation because up until now we've only been
+/// \em building the record (into a data buffer), and now we need to hand it
+/// off to the BitstreamWriter to be emitted.
+///
+/// \sa BCRecordLayout
+template <typename ElementTy> class BCRecordCoding<ElementTy> {
+public:
+ template <typename BufferTy, typename DataTy>
+ static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer,
+ unsigned code, const DataTy &data) {
+ static_assert(!ElementTy::IsCompound,
+ "arrays and blobs need special handling");
+ ElementTy::assertValid(data);
+ buffer.push_back(data);
+ Stream.EmitRecordWithAbbrev(code, buffer);
+ }
+
+ template <typename T, typename DataTy>
+ static void read(ArrayRef<T> buffer, DataTy &data) {
+ assert(buffer.size() == 1 && "record data does not match layout");
+ data = ElementTy::convert(buffer.front());
+ }
+
+ template <typename T> static void read(ArrayRef<T> buffer, NoneType) {
+ assert(buffer.size() == 1 && "record data does not match layout");
+ (void)buffer;
+ }
+
+ template <typename T> static void read(ArrayRef<T> buffer) = delete;
+};
+
+/// Helper class for dealing with an array at the end of a record.
+///
+/// \sa BCRecordLayout::emitRecord
+template <typename ElementTy> class BCRecordCoding<BCArray<ElementTy>> {
+public:
+ template <typename BufferTy>
+ static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer,
+ unsigned code, StringRef data) {
+ // TODO: validate array data.
+ Stream.EmitRecordWithArray(code, buffer, data);
+ }
+
+ template <typename BufferTy, typename ArrayTy>
+ static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer,
+ unsigned code, const ArrayTy &array) {
+#ifndef NDEBUG
+ for (auto &element : array)
+ ElementTy::assertValid(element);
+#endif
+ buffer.reserve(buffer.size() + std::distance(array.begin(), array.end()));
+ std::copy(array.begin(), array.end(), std::back_inserter(buffer));
+ Stream.EmitRecordWithAbbrev(code, buffer);
+ }
+
+ template <typename BufferTy, typename ElementDataTy, typename... DataTy>
+ static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer,
+ unsigned code, ElementDataTy element, DataTy... data) {
+ std::array<ElementDataTy, 1 + sizeof...(data)> array{{element, data...}};
+ emit(Stream, buffer, code, array);
+ }
+
+ template <typename BufferTy>
+ static void emit(llvm::BitstreamWriter &Stream, BufferTy &Buffer,
+ unsigned code, NoneType) {
+ Stream.EmitRecordWithAbbrev(code, Buffer);
+ }
+
+ template <typename T>
+ static void read(ArrayRef<T> Buffer, ArrayRef<T> &rawData) {
+ rawData = Buffer;
+ }
+
+ template <typename T, typename ArrayTy>
+ static void read(ArrayRef<T> buffer, ArrayTy &array) {
+ array.append(llvm::map_iterator(buffer.begin(), T::convert),
+ llvm::map_iterator(buffer.end(), T::convert));
+ }
+
+ template <typename T> static void read(ArrayRef<T> buffer, NoneType) {
+ (void)buffer;
+ }
+
+ template <typename T> static void read(ArrayRef<T> buffer) = delete;
+};
+
+/// Helper class for dealing with a blob at the end of a record.
+///
+/// \sa BCRecordLayout
+template <> class BCRecordCoding<BCBlob> {
+public:
+ template <typename BufferTy>
+ static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer,
+ unsigned code, StringRef data) {
+ Stream.EmitRecordWithBlob(code, buffer, data);
+ }
+
+ template <typename T> static void read(ArrayRef<T> buffer) { (void)buffer; }
+
+ /// Blob data is not stored in the buffer if you are using the correct
+ /// accessor; this method should not be used.
+ template <typename T, typename DataTy>
+ static void read(ArrayRef<T> buffer, DataTy &data) = delete;
+};
+
+/// A type trait whose \c type field is the last of its template parameters.
+template <typename Head, typename... Tail> struct last_type {
+ using type = typename last_type<Tail...>::type;
+};
+
+template <typename Head> struct last_type<Head> { using type = Head; };
+
+/// A type trait whose \c value field is \c true if the last type is BCBlob.
+template <typename... Types>
+using has_blob = std::is_same<BCBlob, typename last_type<int, Types...>::type>;
+
+/// A type trait whose \c value field is \c true if the given type is a
+/// BCArray (of any element kind).
+template <typename T> struct is_array {
+private:
+ template <typename E> static bool check(BCArray<E> *);
+ static int check(...);
+
+public:
+ typedef bool value_type;
+ static constexpr bool value = !std::is_same<decltype(check((T *)nullptr)),
+ decltype(check(false))>::value;
+};
+
+/// A type trait whose \c value field is \c true if the last type is a
+/// BCArray (of any element kind).
+template <typename... Types>
+using has_array = is_array<typename last_type<int, Types...>::type>;
+} // namespace detail
+
+/// Represents a single bitcode record type.
+///
+/// This class template is meant to be instantiated and then given a name,
+/// so that from then on that name can be used.
+template <typename IDField, typename... Fields> class BCGenericRecordLayout {
+ llvm::BitstreamWriter &Stream;
+
+public:
+ /// The abbreviation code used for this record in the current block.
+ ///
+ /// Note that this is not the same as the semantic record code, which is the
+ /// first field of the record.
+ const unsigned AbbrevCode;
+
+ /// Create a layout and register it with the given bitstream writer.
+ explicit BCGenericRecordLayout(llvm::BitstreamWriter &Stream)
+ : Stream(Stream), AbbrevCode(emitAbbrev(Stream)) {}
+
+ /// Emit a record to the bitstream writer, using the given buffer for scratch
+ /// space.
+ ///
+ /// Note that even fixed arguments must be specified here.
+ template <typename BufferTy, typename... Data>
+ void emit(BufferTy &buffer, unsigned id, Data &&...data) const {
+ emitRecord(Stream, buffer, AbbrevCode, id, std::forward<Data>(data)...);
+ }
+
+ /// Registers this record's layout with the bitstream reader.
+ ///
+ /// eturns The abbreviation code for the newly-registered record type.
+ static unsigned emitAbbrev(llvm::BitstreamWriter &Stream) {
+ auto Abbrev = std::make_shared<llvm::BitCodeAbbrev>();
+ detail::emitOps<IDField, Fields...>(*Abbrev);
+ return Stream.EmitAbbrev(std::move(Abbrev));
+ }
+
+ /// Emit a record identified by \p abbrCode to bitstream reader \p Stream,
+ /// using \p buffer for scratch space.
+ ///
+ /// Note that even fixed arguments must be specified here. Blobs are passed
+ /// as StringRefs, while arrays can be passed inline, as aggregates, or as
+ /// pre-encoded StringRef data. Skipped values and empty arrays should use
+ /// the special Nothing value.
+ template <typename BufferTy, typename... Data>
+ static void emitRecord(llvm::BitstreamWriter &Stream, BufferTy &buffer,
+ unsigned abbrCode, unsigned recordID, Data &&...data) {
+ static_assert(sizeof...(data) <= sizeof...(Fields) ||
+ detail::has_array<Fields...>::value,
+ "Too many record elements");
+ static_assert(sizeof...(data) >= sizeof...(Fields),
+ "Too few record elements");
+ buffer.clear();
+ detail::BCRecordCoding<IDField, Fields...>::emit(
+ Stream, buffer, abbrCode, recordID, std::forward<Data>(data)...);
+ }
+
+ /// Extract record data from \p buffer into the given data fields.
+ ///
+ /// Note that even fixed arguments must be specified here. Pass \c Nothing
+ /// if you don't care about a particular parameter. Blob data is not included
+ /// in the buffer and should be handled separately by the caller.
+ template <typename ElementTy, typename... Data>
+ static void readRecord(ArrayRef<ElementTy> buffer, Data &&...data) {
+ static_assert(sizeof...(data) <= sizeof...(Fields),
+ "Too many record elements");
+ static_assert(sizeof...(Fields) <=
+ sizeof...(data) + detail::has_blob<Fields...>::value,
+ "Too few record elements");
+ return detail::BCRecordCoding<Fields...>::read(buffer,
+ std::forward<Data>(data)...);
+ }
+
+ /// Extract record data from \p buffer into the given data fields.
+ ///
+ /// Note that even fixed arguments must be specified here. Pass \c Nothing
+ /// if you don't care about a particular parameter. Blob data is not included
+ /// in the buffer and should be handled separately by the caller.
+ template <typename BufferTy, typename... Data>
+ static void readRecord(BufferTy &buffer, Data &&...data) {
+ return readRecord(llvm::makeArrayRef(buffer), std::forward<Data>(data)...);
+ }
+};
+
+/// A record with a fixed record code.
+template <unsigned RecordCode, typename... Fields>
+class BCRecordLayout
+ : public BCGenericRecordLayout<BCLiteral<RecordCode>, Fields...> {
+ using Base = BCGenericRecordLayout<BCLiteral<RecordCode>, Fields...>;
+
+public:
+ enum : unsigned {
+ /// The record code associated with this layout.
+ Code = RecordCode
+ };
+
+ /// Create a layout and register it with the given bitstream writer.
+ explicit BCRecordLayout(llvm::BitstreamWriter &Stream) : Base(Stream) {}
+
+ /// Emit a record to the bitstream writer, using the given buffer for scratch
+ /// space.
+ ///
+ /// Note that even fixed arguments must be specified here.
+ template <typename BufferTy, typename... Data>
+ void emit(BufferTy &buffer, Data &&...data) const {
+ Base::emit(buffer, RecordCode, std::forward<Data>(data)...);
+ }
+
+ /// Emit a record identified by \p abbrCode to bitstream reader \p Stream,
+ /// using \p buffer for scratch space.
+ ///
+ /// Note that even fixed arguments must be specified here. Currently, arrays
+ /// and blobs can only be passed as StringRefs.
+ template <typename BufferTy, typename... Data>
+ static void emitRecord(llvm::BitstreamWriter &Stream, BufferTy &buffer,
+ unsigned abbrCode, Data &&...data) {
+ Base::emitRecord(Stream, buffer, abbrCode, RecordCode,
+ std::forward<Data>(data)...);
+ }
+};
+
+/// RAII object to pair entering and exiting a sub-block.
+class BCBlockRAII {
+ llvm::BitstreamWriter &Stream;
+
+public:
+ BCBlockRAII(llvm::BitstreamWriter &Stream, unsigned block, unsigned abbrev)
+ : Stream(Stream) {
+ Stream.EnterSubblock(block, abbrev);
+ }
+
+ ~BCBlockRAII() { Stream.ExitBlock(); }
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h b/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h
index 4beb89d30e00..7ad2d37a2a35 100644
--- a/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h
@@ -47,7 +47,7 @@ class raw_ostream;
public:
/// Create a BitcodeWriter that writes to Buffer.
- BitcodeWriter(SmallVectorImpl<char> &Buffer);
+ BitcodeWriter(SmallVectorImpl<char> &Buffer, raw_fd_stream *FS = nullptr);
~BitcodeWriter();
@@ -152,10 +152,18 @@ class raw_ostream;
const std::map<std::string, GVSummaryMapTy>
*ModuleToSummariesForIndex = nullptr);
- /// Save a copy of the llvm IR as data in the __LLVM,__bitcode section.
+ /// If EmbedBitcode is set, save a copy of the llvm IR as data in the
+ /// __LLVM,__bitcode section (.llvmbc on non-MacOS).
+ /// If available, pass the serialized module via the Buf parameter. If not,
+ /// pass an empty (default-initialized) MemoryBufferRef, and the serialization
+ /// will be handled by this API. The same behavior happens if the provided Buf
+ /// is not bitcode (i.e. if it's invalid data or even textual LLVM assembly).
+ /// If EmbedCmdline is set, the command line is also exported in
+ /// the corresponding section (__LLVM,_cmdline / .llvmcmd) - even if CmdArgs
+ /// were empty.
void EmbedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode,
- bool EmbedMarker,
- const std::vector<uint8_t> *CmdArgs);
+ bool EmbedCmdline,
+ const std::vector<uint8_t> &CmdArgs);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index de4fe6630324..5b4854d6c95e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -168,7 +168,8 @@ enum TypeCodes {
TYPE_CODE_TOKEN = 22, // TOKEN
- TYPE_CODE_BFLOAT = 23 // BRAIN FLOATING POINT
+ TYPE_CODE_BFLOAT = 23, // BRAIN FLOATING POINT
+ TYPE_CODE_X86_AMX = 24 // X86 AMX
};
enum OperandBundleTagCode {
@@ -338,7 +339,11 @@ enum MetadataCodes {
METADATA_INDEX_OFFSET = 38, // [offset]
METADATA_INDEX = 39, // [bitpos]
METADATA_LABEL = 40, // [distinct, scope, name, file, line]
- METADATA_COMMON_BLOCK = 44, // [distinct, scope, name, variable,...]
+ METADATA_STRING_TYPE = 41, // [distinct, name, size, align,...]
+ // Codes 42 and 43 are reserved for support for Fortran array specific debug
+ // info.
+ METADATA_COMMON_BLOCK = 44, // [distinct, scope, name, variable,...]
+ METADATA_GENERIC_SUBRANGE = 45 // [distinct, count, lo, up, stride]
};
// The constants block (CONSTANTS_BLOCK_ID) describes emission for each
@@ -371,6 +376,7 @@ enum ConstantsCodes {
// asmdialect,asmstr,conststr]
CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24, // [opty, flags, n x operands]
CST_CODE_CE_UNOP = 25, // CE_UNOP: [opcode, opval]
+ CST_CODE_POISON = 26, // POISON
};
/// CastOpcodes - These are values used in the bitcode files to encode which
@@ -536,8 +542,9 @@ enum FunctionCodes {
FUNC_CODE_DEBUG_LOC = 35, // DEBUG_LOC: [Line,Col,ScopeVal, IAVal]
FUNC_CODE_INST_FENCE = 36, // FENCE: [ordering, synchscope]
- FUNC_CODE_INST_CMPXCHG_OLD = 37, // CMPXCHG: [ptrty,ptr,cmp,new, align, vol,
- // ordering, synchscope]
+ FUNC_CODE_INST_CMPXCHG_OLD = 37, // CMPXCHG: [ptrty, ptr, cmp, val, vol,
+ // ordering, synchscope,
+ // failure_ordering?, weak?]
FUNC_CODE_INST_ATOMICRMW = 38, // ATOMICRMW: [ptrty,ptr,val, operation,
// align, vol,
// ordering, synchscope]
@@ -551,8 +558,9 @@ enum FunctionCodes {
FUNC_CODE_INST_GEP = 43, // GEP: [inbounds, n x operands]
FUNC_CODE_INST_STORE = 44, // STORE: [ptrty,ptr,valty,val, align, vol]
FUNC_CODE_INST_STOREATOMIC = 45, // STORE: [ptrty,ptr,val, align, vol
- FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty,ptr,valty,cmp,new, align,
- // vol,ordering,synchscope]
+ FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty, ptr, cmp, val, vol,
+ // success_ordering, synchscope,
+ // failure_ordering, weak]
FUNC_CODE_INST_LANDINGPAD = 47, // LANDINGPAD: [ty,val,num,id0,val0...]
FUNC_CODE_INST_CLEANUPRET = 48, // CLEANUPRET: [val] or [val,bb#]
FUNC_CODE_INST_CATCHRET = 49, // CATCHRET: [val,bb#]
@@ -644,6 +652,11 @@ enum AttributeKindCodes {
ATTR_KIND_NO_MERGE = 66,
ATTR_KIND_NULL_POINTER_IS_VALID = 67,
ATTR_KIND_NOUNDEF = 68,
+ ATTR_KIND_BYREF = 69,
+ ATTR_KIND_MUSTPROGRESS = 70,
+ ATTR_KIND_NO_CALLBACK = 71,
+ ATTR_KIND_HOT = 72,
+ ATTR_KIND_NO_PROFILE = 73,
};
enum ComdatSelectionKindCodes {
diff --git a/contrib/llvm-project/llvm/include/llvm/Bitstream/BitCodes.h b/contrib/llvm-project/llvm/include/llvm/Bitstream/BitCodes.h
index 41a3de3b20ef..9cd4e535a470 100644
--- a/contrib/llvm-project/llvm/include/llvm/Bitstream/BitCodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/Bitstream/BitCodes.h
@@ -18,6 +18,7 @@
#define LLVM_BITSTREAM_BITCODES_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
@@ -137,13 +138,7 @@ public:
}
/// isChar6 - Return true if this character is legal in the Char6 encoding.
- static bool isChar6(char C) {
- if (C >= 'a' && C <= 'z') return true;
- if (C >= 'A' && C <= 'Z') return true;
- if (C >= '0' && C <= '9') return true;
- if (C == '.' || C == '_') return true;
- return false;
- }
+ static bool isChar6(char C) { return isAlnum(C) || C == '.' || C == '_'; }
static unsigned EncodeChar6(char C) {
if (C >= 'a' && C <= 'z') return C-'a';
if (C >= 'A' && C <= 'Z') return C-'A'+26;
diff --git a/contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamWriter.h b/contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamWriter.h
index c0ead19dc71d..3954df4897ae 100644
--- a/contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamWriter.h
@@ -20,17 +20,28 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitstream/BitCodes.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <vector>
namespace llvm {
class BitstreamWriter {
+ /// Out - The buffer that keeps unflushed bytes.
SmallVectorImpl<char> &Out;
+ /// FS - The file stream that Out flushes to. If FS is nullptr, it does not
+ /// support read or seek, Out cannot be flushed until all data are written.
+ raw_fd_stream *FS;
+
+ /// FlushThreshold - If FS is valid, this is the threshold (unit B) to flush
+ /// FS.
+ const uint64_t FlushThreshold;
+
/// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
unsigned CurBit;
- /// CurValue - The current value. Only bits < CurBit are valid.
+ /// CurValue - The current value. Only bits < CurBit are valid.
uint32_t CurValue;
/// CurCodeSize - This is the declared size of code values used for the
@@ -64,15 +75,19 @@ class BitstreamWriter {
void WriteByte(unsigned char Value) {
Out.push_back(Value);
+ FlushToFile();
}
void WriteWord(unsigned Value) {
Value = support::endian::byte_swap<uint32_t, support::little>(Value);
Out.append(reinterpret_cast<const char *>(&Value),
reinterpret_cast<const char *>(&Value + 1));
+ FlushToFile();
}
- size_t GetBufferOffset() const { return Out.size(); }
+ uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; }
+
+ size_t GetBufferOffset() const { return Out.size() + GetNumOfFlushedBytes(); }
size_t GetWordIndex() const {
size_t Offset = GetBufferOffset();
@@ -80,9 +95,29 @@ class BitstreamWriter {
return Offset / 4;
}
+ /// If the related file stream supports reading, seeking and writing, flush
+ /// the buffer if its size is above a threshold.
+ void FlushToFile() {
+ if (!FS)
+ return;
+ if (Out.size() < FlushThreshold)
+ return;
+ FS->write((char *)&Out.front(), Out.size());
+ Out.clear();
+ }
+
public:
- explicit BitstreamWriter(SmallVectorImpl<char> &O)
- : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
+ /// Create a BitstreamWriter that writes to Buffer \p O.
+ ///
+ /// \p FS is the file stream that \p O flushes to incrementally. If \p FS is
+ /// null, \p O does not flush incrementially, but writes to disk at the end.
+ ///
+ /// \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is
+ /// valid.
+ BitstreamWriter(SmallVectorImpl<char> &O, raw_fd_stream *FS = nullptr,
+ uint32_t FlushThreshold = 512)
+ : Out(O), FS(FS), FlushThreshold(FlushThreshold << 20), CurBit(0),
+ CurValue(0), CurCodeSize(2) {}
~BitstreamWriter() {
assert(CurBit == 0 && "Unflushed data remaining");
@@ -103,12 +138,60 @@ public:
/// with the specified value.
void BackpatchWord(uint64_t BitNo, unsigned NewWord) {
using namespace llvm::support;
- unsigned ByteNo = BitNo / 8;
- assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
- &Out[ByteNo], BitNo & 7)) &&
- "Expected to be patching over 0-value placeholders");
- endian::writeAtBitAlignment<uint32_t, little, unaligned>(
- &Out[ByteNo], NewWord, BitNo & 7);
+ uint64_t ByteNo = BitNo / 8;
+ uint64_t StartBit = BitNo & 7;
+ uint64_t NumOfFlushedBytes = GetNumOfFlushedBytes();
+
+ if (ByteNo >= NumOfFlushedBytes) {
+ assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
+ &Out[ByteNo - NumOfFlushedBytes], StartBit)) &&
+ "Expected to be patching over 0-value placeholders");
+ endian::writeAtBitAlignment<uint32_t, little, unaligned>(
+ &Out[ByteNo - NumOfFlushedBytes], NewWord, StartBit);
+ return;
+ }
+
+ // If the byte offset to backpatch is flushed, use seek to backfill data.
+ // First, save the file position to restore later.
+ uint64_t CurPos = FS->tell();
+
+ // Copy data to update into Bytes from the file FS and the buffer Out.
+ char Bytes[9]; // Use one more byte to silence a warning from Visual C++.
+ size_t BytesNum = StartBit ? 8 : 4;
+ size_t BytesFromDisk = std::min(static_cast<uint64_t>(BytesNum), NumOfFlushedBytes - ByteNo);
+ size_t BytesFromBuffer = BytesNum - BytesFromDisk;
+
+ // When unaligned, copy existing data into Bytes from the file FS and the
+ // buffer Out so that it can be updated before writing. For debug builds
+ // read bytes unconditionally in order to check that the existing value is 0
+ // as expected.
+#ifdef NDEBUG
+ if (StartBit)
+#endif
+ {
+ FS->seek(ByteNo);
+ ssize_t BytesRead = FS->read(Bytes, BytesFromDisk);
+ (void)BytesRead; // silence warning
+ assert(BytesRead >= 0 && static_cast<size_t>(BytesRead) == BytesFromDisk);
+ for (size_t i = 0; i < BytesFromBuffer; ++i)
+ Bytes[BytesFromDisk + i] = Out[i];
+ assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
+ Bytes, StartBit)) &&
+ "Expected to be patching over 0-value placeholders");
+ }
+
+ // Update Bytes in terms of bit offset and value.
+ endian::writeAtBitAlignment<uint32_t, little, unaligned>(Bytes, NewWord,
+ StartBit);
+
+ // Copy updated data back to the file FS and the buffer Out.
+ FS->seek(ByteNo);
+ FS->write(Bytes, BytesFromDisk);
+ for (size_t i = 0; i < BytesFromBuffer; ++i)
+ Out[i] = Bytes[BytesFromDisk + i];
+
+ // Restore the file position.
+ FS->seek(CurPos);
}
void BackpatchWord64(uint64_t BitNo, uint64_t Val) {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/Analysis.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/Analysis.h
index fe610b5bdc8d..bdfb416d9bd9 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/Analysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/Analysis.h
@@ -92,11 +92,6 @@ void computeValueLLTs(const DataLayout &DL, Type &Ty,
/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
GlobalValue *ExtractTypeInfo(Value *V);
-/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
-/// processed uses a memory 'm' constraint.
-bool hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
- const TargetLowering &TLI);
-
/// getFCmpCondCode - Return the ISD condition code corresponding to
/// the given LLVM IR floating-point condition code. This includes
/// consideration of global floating-point math flags.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/AntiDepBreaker.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/AntiDepBreaker.h
index d75c13e2dd75..0553d7d452a4 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/AntiDepBreaker.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/AntiDepBreaker.h
@@ -17,7 +17,6 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Compiler.h"
#include <cassert>
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h
index 0eb950861af6..76486b0b48ce 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -66,6 +66,7 @@ class MCSymbol;
class MCTargetOptions;
class MDNode;
class Module;
+class PseudoProbeHandler;
class raw_ostream;
class StackMaps;
class StringRef;
@@ -139,9 +140,30 @@ public:
using GOTEquivUsePair = std::pair<const GlobalVariable *, unsigned>;
MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
+ /// struct HandlerInfo and Handlers permit users or target extended
+ /// AsmPrinter to add their own handlers.
+ struct HandlerInfo {
+ std::unique_ptr<AsmPrinterHandler> Handler;
+ const char *TimerName;
+ const char *TimerDescription;
+ const char *TimerGroupName;
+ const char *TimerGroupDescription;
+
+ HandlerInfo(std::unique_ptr<AsmPrinterHandler> Handler,
+ const char *TimerName, const char *TimerDescription,
+ const char *TimerGroupName, const char *TimerGroupDescription)
+ : Handler(std::move(Handler)), TimerName(TimerName),
+ TimerDescription(TimerDescription), TimerGroupName(TimerGroupName),
+ TimerGroupDescription(TimerGroupDescription) {}
+ };
+
private:
MCSymbol *CurrentFnEnd = nullptr;
- MCSymbol *CurExceptionSym = nullptr;
+
+ /// Map a basic block section ID to the exception symbol associated with that
+ /// section. Map entries are assigned and looked up via
+ /// AsmPrinter::getMBBExceptionSym.
+ DenseMap<unsigned, MCSymbol *> MBBSectionExceptionSyms;
// The symbol used to represent the start of the current BB section of the
// function. This is used to calculate the size of the BB section.
@@ -158,26 +180,10 @@ private:
protected:
MCSymbol *CurrentFnBegin = nullptr;
- /// Protected struct HandlerInfo and Handlers permit target extended
- /// AsmPrinter adds their own handlers.
- struct HandlerInfo {
- std::unique_ptr<AsmPrinterHandler> Handler;
- const char *TimerName;
- const char *TimerDescription;
- const char *TimerGroupName;
- const char *TimerGroupDescription;
-
- HandlerInfo(std::unique_ptr<AsmPrinterHandler> Handler,
- const char *TimerName, const char *TimerDescription,
- const char *TimerGroupName, const char *TimerGroupDescription)
- : Handler(std::move(Handler)), TimerName(TimerName),
- TimerDescription(TimerDescription), TimerGroupName(TimerGroupName),
- TimerGroupDescription(TimerGroupDescription) {}
- };
-
/// A vector of all debug/EH info emitters we should use. This vector
/// maintains ownership of the emitters.
- SmallVector<HandlerInfo, 1> Handlers;
+ std::vector<HandlerInfo> Handlers;
+ size_t NumUserHandlers = 0;
public:
struct SrcMgrDiagInfo {
@@ -201,6 +207,10 @@ private:
/// If the target supports dwarf debug info, this pointer is non-null.
DwarfDebug *DD = nullptr;
+ /// A handler that supports pseudo probe emission with embedded inline
+ /// context.
+ PseudoProbeHandler *PP = nullptr;
+
/// If the current module uses dwarf CFI annotations strictly for debugging.
bool isCFIMoveForDebugging = false;
@@ -216,6 +226,14 @@ public:
uint16_t getDwarfVersion() const;
void setDwarfVersion(uint16_t Version);
+ bool isDwarf64() const;
+
+ /// Returns 4 for DWARF32 and 8 for DWARF64.
+ unsigned int getDwarfOffsetByteSize() const;
+
+ /// Returns 4 for DWARF32 and 12 for DWARF64.
+ unsigned int getUnitLengthFieldByteSize() const;
+
bool isPositionIndependent() const;
/// Return true if assembly output should contain comments.
@@ -230,7 +248,10 @@ public:
MCSymbol *getFunctionBegin() const { return CurrentFnBegin; }
MCSymbol *getFunctionEnd() const { return CurrentFnEnd; }
- MCSymbol *getCurExceptionSym();
+
+ // Return the exception symbol associated with the MBB section containing a
+ // given basic block.
+ MCSymbol *getMBBExceptionSym(const MachineBasicBlock &MBB);
/// Return information about object file lowering.
const TargetLoweringObjectFile &getObjFileLowering() const;
@@ -342,6 +363,10 @@ public:
void emitStackSizeSection(const MachineFunction &MF);
+ void emitBBAddrMapSection(const MachineFunction &MF);
+
+ void emitPseudoProbe(const MachineInstr &MI);
+
void emitRemarksSection(remarks::RemarkStreamer &RS);
enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
@@ -369,6 +394,32 @@ public:
/// so, emit it and return true, otherwise do nothing and return false.
bool emitSpecialLLVMGlobal(const GlobalVariable *GV);
+ /// `llvm.global_ctors` and `llvm.global_dtors` are arrays of Structor
+ /// structs.
+ ///
+ /// Priority - init priority
+ /// Func - global initialization or global clean-up function
+ /// ComdatKey - associated data
+ struct Structor {
+ int Priority = 0;
+ Constant *Func = nullptr;
+ GlobalValue *ComdatKey = nullptr;
+
+ Structor() = default;
+ };
+
+ /// This method gathers an array of Structors and then sorts them out by
+ /// Priority.
+ /// @param List The initializer of `llvm.global_ctors` or `llvm.global_dtors`
+ /// array.
+ /// @param[out] Structors Sorted Structor structs by Priority.
+ void preprocessXXStructorList(const DataLayout &DL, const Constant *List,
+ SmallVector<Structor, 8> &Structors);
+
+ /// This method emits `llvm.global_ctors` or `llvm.global_dtors` list.
+ virtual void emitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool IsCtor);
+
/// Emit an alignment directive to the specified power of two boundary. If a
/// global value is specified, and if that global has an explicit alignment
/// requested, it will override the alignment request if required for
@@ -403,6 +454,11 @@ public:
// Overridable Hooks
//===------------------------------------------------------------------===//
+ void addAsmPrinterHandler(HandlerInfo Handler) {
+ Handlers.insert(Handlers.begin(), std::move(Handler));
+ NumUserHandlers++;
+ }
+
// Targets can, or in the case of EmitInstruction, must implement these to
// customize output.
@@ -534,9 +590,6 @@ public:
emitLabelPlusOffset(Label, 0, Size, IsSectionRelative);
}
- /// Emit something like ".long Label + Offset".
- void emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const;
-
//===------------------------------------------------------------------===//
// Dwarf Emission Helper Routines
//===------------------------------------------------------------------===//
@@ -557,7 +610,7 @@ public:
unsigned GetSizeOfEncodedValue(unsigned Encoding) const;
/// Emit reference to a ttype global with a specified encoding.
- void emitTTypeReference(const GlobalValue *GV, unsigned Encoding) const;
+ virtual void emitTTypeReference(const GlobalValue *GV, unsigned Encoding);
/// Emit a reference to a symbol for use in dwarf. Different object formats
/// represent this in different ways. Some use a relocation others encode
@@ -565,18 +618,39 @@ public:
void emitDwarfSymbolReference(const MCSymbol *Label,
bool ForceOffset = false) const;
- /// Emit the 4-byte offset of a string from the start of its section.
+ /// Emit the 4- or 8-byte offset of a string from the start of its section.
///
/// When possible, emit a DwarfStringPool section offset without any
/// relocations, and without using the symbol. Otherwise, defers to \a
/// emitDwarfSymbolReference().
+ ///
+ /// The length of the emitted value depends on the DWARF format.
void emitDwarfStringOffset(DwarfStringPoolEntry S) const;
- /// Emit the 4-byte offset of a string from the start of its section.
+ /// Emit the 4-or 8-byte offset of a string from the start of its section.
void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
emitDwarfStringOffset(S.getEntry());
}
+ /// Emit something like ".long Label + Offset" or ".quad Label + Offset"
+ /// depending on the DWARF format.
+ void emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const;
+
+ /// Emit 32- or 64-bit value depending on the DWARF format.
+ void emitDwarfLengthOrOffset(uint64_t Value) const;
+
+ /// Emit a special value of 0xffffffff if producing 64-bit debugging info.
+ void maybeEmitDwarf64Mark() const;
+
+ /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen
+ /// according to the settings.
+ void emitDwarfUnitLength(uint64_t Length, const Twine &Comment) const;
+
+ /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen
+ /// according to the settings.
+ void emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo,
+ const Twine &Comment) const;
+
/// Emit reference to a call site with a specified encoding
void emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
unsigned Encoding) const;
@@ -713,12 +787,13 @@ private:
void emitModuleIdents(Module &M);
/// Emit bytes for llvm.commandline metadata.
void emitModuleCommandLines(Module &M);
- void emitXXStructorList(const DataLayout &DL, const Constant *List,
- bool isCtor);
GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S);
/// Emit GlobalAlias or GlobalIFunc.
void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS);
+
+ /// This method decides whether the specified basic block requires a label.
+ bool shouldEmitLabelForBasicBlock(const MachineBasicBlock &MBB) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinterHandler.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinterHandler.h
index 899d067d03f0..dc81a3040097 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinterHandler.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinterHandler.h
@@ -23,8 +23,10 @@ class MachineBasicBlock;
class MachineFunction;
class MachineInstr;
class MCSymbol;
+class Module;
-typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm);
+typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm,
+ const MachineBasicBlock *MBB);
/// Collects and handles AsmPrinter objects required to build debug
/// or EH information.
@@ -36,6 +38,8 @@ public:
/// this tracks that size.
virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0;
+ virtual void beginModule(Module *M) {}
+
/// Emit all sections that should come after the content.
virtual void endModule() = 0;
@@ -74,6 +78,7 @@ public:
/// Process end of a basic block during basic block sections.
virtual void endBasicBlock(const MachineBasicBlock &MBB) {}
};
+
} // End of namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h
new file mode 100644
index 000000000000..d8da3be0cd4c
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h
@@ -0,0 +1,30 @@
+//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
+#define LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+
+extern cl::opt<std::string> BBSectionsColdTextPrefix;
+
+class MachineFunction;
+class MachineBasicBlock;
+
+using MachineBasicBlockComparator =
+ function_ref<bool(const MachineBasicBlock &, const MachineBasicBlock &)>;
+
+void sortBasicBlocksAndUpdateBranches(MachineFunction &MF,
+ MachineBasicBlockComparator MBBCmp);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 407f09063dce..9514dd22be80 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -40,7 +40,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -115,12 +114,14 @@ private:
/// Estimate a cost of subvector extraction as a sequence of extract and
/// insert operations.
- unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index,
+ unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index,
FixedVectorType *SubVTy) {
assert(VTy && SubVTy &&
"Can only extract subvectors from vectors");
int NumSubElts = SubVTy->getNumElements();
- assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
+ assert((!isa<FixedVectorType>(VTy) ||
+ (Index + NumSubElts) <=
+ (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
"SK_ExtractSubvector index out of range");
unsigned Cost = 0;
@@ -138,12 +139,14 @@ private:
/// Estimate a cost of subvector insertion as a sequence of extract and
/// insert operations.
- unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index,
+ unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index,
FixedVectorType *SubVTy) {
assert(VTy && SubVTy &&
"Can only insert subvectors into vectors");
int NumSubElts = SubVTy->getNumElements();
- assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
+ assert((!isa<FixedVectorType>(VTy) ||
+ (Index + NumSubElts) <=
+ (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
"SK_InsertSubvector index out of range");
unsigned Cost = 0;
@@ -222,7 +225,11 @@ public:
}
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
- return getTLI()->isNoopAddrSpaceCast(FromAS, ToAS);
+ return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
+ }
+
+ unsigned getAssumedAddrSpace(const Value *V) const {
+ return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
}
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
@@ -265,6 +272,10 @@ public:
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
}
+ bool isNumRegsMajorCostOfLSR() {
+ return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR();
+ }
+
bool isProfitableLSRChainElement(Instruction *I) {
return TargetTransformInfoImplBase::isProfitableLSRChainElement(I);
}
@@ -294,6 +305,10 @@ public:
return getTLI()->isTypeLegal(VT);
}
+ unsigned getRegUsageForType(Type *Ty) {
+ return getTLI()->getTypeLegalizationCost(DL, Ty).first;
+ }
+
int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) {
return BaseT::getGEPCost(PointeeType, Ptr, Operands);
@@ -386,6 +401,7 @@ public:
}
unsigned getInliningThresholdMultiplier() { return 1; }
+ unsigned adjustInliningThreshold(const CallBase *CB) { return 0; }
int getInlinerVectorBonusPercent() { return 150; }
@@ -477,6 +493,30 @@ public:
return BaseT::emitGetActiveLaneMask();
}
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) {
+ return BaseT::instCombineIntrinsic(IC, II);
+ }
+
+ Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II,
+ APInt DemandedMask,
+ KnownBits &Known,
+ bool &KnownBitsComputed) {
+ return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
+ KnownBitsComputed);
+ }
+
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) {
+ return BaseT::simplifyDemandedVectorEltsIntrinsic(
+ IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+ SimplifyAndSetOp);
+ }
+
int getInstructionLatency(const Instruction *I) {
if (isa<LoadInst>(I))
return getST()->getSchedModel().DefaultLoadLatency;
@@ -532,6 +572,8 @@ public:
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
+ Optional<unsigned> getMaxVScale() const { return None; }
+
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the demanded result elements need to be inserted and/or
/// extracted from vectors.
@@ -567,7 +609,7 @@ public:
return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
}
- /// Estimate the overhead of scalarizing an instructions unique
+ /// Estimate the overhead of scalarizing an instruction's unique
/// non-constant operands. The types of the arguments are ordinarily
/// scalar, in which case the costs are multiplied with VF.
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
@@ -575,8 +617,14 @@ public:
unsigned Cost = 0;
SmallPtrSet<const Value*, 4> UniqueOperands;
for (const Value *A : Args) {
+ // Disregard things like metadata arguments.
+ Type *Ty = A->getType();
+ if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
+ !Ty->isPtrOrPtrVectorTy())
+ continue;
+
if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
- auto *VecTy = dyn_cast<VectorType>(A->getType());
+ auto *VecTy = dyn_cast<VectorType>(Ty);
if (VecTy) {
// If A is a vector operand, VF should be 1 or correspond to A.
assert((VF == 1 ||
@@ -584,7 +632,7 @@ public:
"Vector argument does not match VF");
}
else
- VecTy = FixedVectorType::get(A->getType(), VF);
+ VecTy = FixedVectorType::get(Ty, VF);
Cost += getScalarizationOverhead(VecTy, false, true);
}
@@ -658,7 +706,8 @@ public:
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
unsigned Cost = thisT()->getArithmeticInstrCost(
- Opcode, VTy->getScalarType(), CostKind);
+ Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo, Args, CxtI);
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
return getScalarizationOverhead(VTy, Args) + Num * Cost;
@@ -681,19 +730,20 @@ public:
case TTI::SK_PermuteTwoSrc:
return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp));
case TTI::SK_ExtractSubvector:
- return getExtractSubvectorOverhead(cast<FixedVectorType>(Tp), Index,
+ return getExtractSubvectorOverhead(Tp, Index,
cast<FixedVectorType>(SubTp));
case TTI::SK_InsertSubvector:
- return getInsertSubvectorOverhead(cast<FixedVectorType>(Tp), Index,
+ return getInsertSubvectorOverhead(Tp, Index,
cast<FixedVectorType>(SubTp));
}
llvm_unreachable("Unknown TTI::ShuffleKind");
}
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
- if (BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I) == 0)
+ if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
return 0;
const TargetLoweringBase *TLI = getTLI();
@@ -731,15 +781,12 @@ public:
return 0;
LLVM_FALLTHROUGH;
case Instruction::SExt:
- if (!I)
- break;
-
- if (getTLI()->isExtFree(I))
+ if (I && getTLI()->isExtFree(I))
return 0;
// If this is a zext/sext of a load, return 0 if the corresponding
// extending load exists on target.
- if (I && isa<LoadInst>(I->getOperand(0))) {
+ if (CCH == TTI::CastContextHint::Normal) {
EVT ExtVT = EVT::getEVT(Dst);
EVT LoadVT = EVT::getEVT(Src);
unsigned LType =
@@ -814,7 +861,7 @@ public:
unsigned SplitCost =
(!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
return SplitCost +
- (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy,
+ (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
CostKind, I));
}
@@ -822,7 +869,7 @@ public:
// the operation will get scalarized.
unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
unsigned Cost = thisT()->getCastInstrCost(
- Opcode, Dst->getScalarType(), Src->getScalarType(), CostKind, I);
+ Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
@@ -847,7 +894,7 @@ public:
return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
Index) +
thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
- TTI::TCK_RecipThroughput);
+ TTI::CastContextHint::None, TTI::TCK_RecipThroughput);
}
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
@@ -855,6 +902,7 @@ public:
}
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
const TargetLoweringBase *TLI = getTLI();
@@ -863,7 +911,8 @@ public:
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
+ I);
// Selects on vectors are actually vector selects.
if (ISD == ISD::SELECT) {
@@ -888,7 +937,7 @@ public:
if (CondTy)
CondTy = CondTy->getScalarType();
unsigned Cost = thisT()->getCmpSelInstrCost(
- Opcode, ValVTy->getScalarType(), CondTy, CostKind, I);
+ Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I);
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
@@ -922,7 +971,11 @@ public:
return Cost;
if (Src->isVectorTy() &&
- Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
+ // In practice it's not currently possible to have a change in lane
+ // length for extending loads or truncating stores so both types should
+ // have the same scalable property.
+ TypeSize::isKnownLT(Src->getPrimitiveSizeInBits(),
+ LT.second.getSizeInBits())) {
// This is a vector load that legalizes to a larger type than the vector
// itself. Unless the corresponding extending load or truncating store is
// legal, then this will scalarize.
@@ -945,6 +998,51 @@ public:
return Cost;
}
+ unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) {
+ auto *VT = cast<FixedVectorType>(DataTy);
+ // Assume the target does not have support for gather/scatter operations
+ // and provide a rough estimate.
+ //
+ // First, compute the cost of extracting the individual addresses and the
+ // individual memory operations.
+ int LoadCost =
+ VT->getNumElements() *
+ (getVectorInstrCost(
+ Instruction::ExtractElement,
+ FixedVectorType::get(PointerType::get(VT->getElementType(), 0),
+ VT->getNumElements()),
+ -1) +
+ getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
+
+ // Next, compute the cost of packing the result in a vector.
+ int PackingCost = getScalarizationOverhead(VT, Opcode != Instruction::Store,
+ Opcode == Instruction::Store);
+
+ int ConditionalCost = 0;
+ if (VariableMask) {
+ // Compute the cost of conditionally executing the memory operations with
+ // variable masks. This includes extracting the individual conditions, a
+ // branches and PHIs to combine the results.
+ // NOTE: Estimating the cost of conditionally executing the memory
+ // operations accurately is quite difficult and the current solution
+ // provides a very rough estimate only.
+ ConditionalCost =
+ VT->getNumElements() *
+ (getVectorInstrCost(
+ Instruction::ExtractElement,
+ FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()),
+ VT->getNumElements()),
+ -1) +
+ getCFInstrCost(Instruction::Br, CostKind) +
+ getCFInstrCost(Instruction::PHI, CostKind));
+ }
+
+ return LoadCost + PackingCost + ConditionalCost;
+ }
+
unsigned getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1099,76 +1197,52 @@ public:
/// Get intrinsic cost based on arguments.
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
- Intrinsic::ID IID = ICA.getID();
-
- // Special case some scalar intrinsics.
- if (CostKind != TTI::TCK_RecipThroughput) {
- switch (IID) {
- default:
- break;
- case Intrinsic::cttz:
- if (getTLI()->isCheapToSpeculateCttz())
- return TargetTransformInfo::TCC_Basic;
- break;
- case Intrinsic::ctlz:
- if (getTLI()->isCheapToSpeculateCtlz())
- return TargetTransformInfo::TCC_Basic;
- break;
- case Intrinsic::memcpy:
- return thisT()->getMemcpyCost(ICA.getInst());
- // TODO: other libc intrinsics.
- }
- return BaseT::getIntrinsicInstrCost(ICA, CostKind);
- }
-
+ // Check for generically free intrinsics.
if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
return 0;
- // TODO: Combine these two logic paths.
+ // Assume that target intrinsics are cheap.
+ Intrinsic::ID IID = ICA.getID();
+ if (Function::isTargetIntrinsic(IID))
+ return TargetTransformInfo::TCC_Basic;
+
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
Type *RetTy = ICA.getReturnType();
- unsigned VF = ICA.getVectorFactor();
- unsigned RetVF =
- (RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getNumElements()
- : 1);
- assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
+
+ ElementCount VF = ICA.getVectorFactor();
+ ElementCount RetVF =
+ (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
+ : ElementCount::getFixed(1));
+ assert((RetVF.isScalar() || VF.isScalar()) &&
+ "VF > 1 and RetVF is a vector type");
const IntrinsicInst *I = ICA.getInst();
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
FastMathFlags FMF = ICA.getFlags();
-
switch (IID) {
- default: {
- // Assume that we need to scalarize this intrinsic.
- SmallVector<Type *, 4> Types;
- for (const Value *Op : Args) {
- Type *OpTy = Op->getType();
- assert(VF == 1 || !OpTy->isVectorTy());
- Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF));
- }
+ default:
+ break;
- if (VF > 1 && !RetTy->isVoidTy())
- RetTy = FixedVectorType::get(RetTy, VF);
-
- // Compute the scalarization overhead based on Args for a vector
- // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
- // CostModel will pass a vector RetTy and VF is 1.
- unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
- if (RetVF > 1 || VF > 1) {
- ScalarizationCost = 0;
- if (!RetTy->isVoidTy())
- ScalarizationCost +=
- getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
- ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
- }
+ case Intrinsic::cttz:
+ // FIXME: If necessary, this should go in target-specific overrides.
+ if (VF.isScalar() && RetVF.isScalar() &&
+ getTLI()->isCheapToSpeculateCttz())
+ return TargetTransformInfo::TCC_Basic;
+ break;
+
+ case Intrinsic::ctlz:
+ // FIXME: If necessary, this should go in target-specific overrides.
+ if (VF.isScalar() && RetVF.isScalar() &&
+ getTLI()->isCheapToSpeculateCtlz())
+ return TargetTransformInfo::TCC_Basic;
+ break;
+
+ case Intrinsic::memcpy:
+ return thisT()->getMemcpyCost(ICA.getInst());
- IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF,
- ScalarizationCost, I);
- return thisT()->getIntrinsicInstrCost(Attrs, CostKind);
- }
case Intrinsic::masked_scatter: {
- assert(VF == 1 && "Can't vectorize types here.");
+ assert(VF.isScalar() && "Can't vectorize types here.");
const Value *Mask = Args[3];
bool VarMask = !isa<Constant>(Mask);
Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
@@ -1177,31 +1251,57 @@ public:
VarMask, Alignment, CostKind, I);
}
case Intrinsic::masked_gather: {
- assert(VF == 1 && "Can't vectorize types here.");
+ assert(VF.isScalar() && "Can't vectorize types here.");
const Value *Mask = Args[2];
bool VarMask = !isa<Constant>(Mask);
Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
VarMask, Alignment, CostKind, I);
}
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin: {
+ case Intrinsic::experimental_vector_extract: {
+ // FIXME: Handle case where a scalable vector is extracted from a scalable
+ // vector
+ if (isa<ScalableVectorType>(RetTy))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+ unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
+ return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
+ cast<VectorType>(Args[0]->getType()),
+ Index, cast<VectorType>(RetTy));
+ }
+ case Intrinsic::experimental_vector_insert: {
+ // FIXME: Handle case where a scalable vector is inserted into a scalable
+ // vector
+ if (isa<ScalableVectorType>(Args[1]->getType()))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+ unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
+ return thisT()->getShuffleCost(
+ TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), Index,
+ cast<VectorType>(Args[1]->getType()));
+ }
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin: {
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
- return getIntrinsicInstrCost(Attrs, CostKind);
+ return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
+ }
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
+ IntrinsicCostAttributes Attrs(
+ IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
+ return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
+ if (isa<ScalableVectorType>(RetTy))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
const Value *X = Args[0];
const Value *Y = Args[1];
const Value *Z = Args[2];
@@ -1232,14 +1332,48 @@ public:
// For non-rotates (X != Y) we must add shift-by-zero handling costs.
if (X != Y) {
Type *CondTy = RetTy->getWithNewBitWidth(1);
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- CostKind);
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
- CondTy, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
}
return Cost;
}
}
+ // TODO: Handle the remaining intrinsic with scalable vector type
+ if (isa<ScalableVectorType>(RetTy))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
+ // Assume that we need to scalarize this intrinsic.
+ SmallVector<Type *, 4> Types;
+ for (const Value *Op : Args) {
+ Type *OpTy = Op->getType();
+ assert(VF.isScalar() || !OpTy->isVectorTy());
+ Types.push_back(VF.isScalar()
+ ? OpTy
+ : FixedVectorType::get(OpTy, VF.getKnownMinValue()));
+ }
+
+ if (VF.isVector() && !RetTy->isVoidTy())
+ RetTy = FixedVectorType::get(RetTy, VF.getKnownMinValue());
+
+ // Compute the scalarization overhead based on Args for a vector
+ // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
+ // CostModel will pass a vector RetTy and VF is 1.
+ unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
+ if (RetVF.isVector() || VF.isVector()) {
+ ScalarizationCost = 0;
+ if (!RetTy->isVoidTy())
+ ScalarizationCost +=
+ getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
+ ScalarizationCost +=
+ getOperandsScalarizationOverhead(Args, VF.getKnownMinValue());
+ }
+
+ IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, ScalarizationCost, I);
+ return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
}
/// Get intrinsic cost based on argument types.
@@ -1255,10 +1389,21 @@ public:
unsigned ScalarizationCostPassed = ICA.getScalarizationCost();
bool SkipScalarizationCost = ICA.skipScalarizationCost();
- auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast<VectorType>(Tys[0]);
+ VectorType *VecOpTy = nullptr;
+ if (!Tys.empty()) {
+ // The vector reduction operand is operand 0 except for fadd/fmul.
+ // Their operand 0 is a scalar start value, so the vector op is operand 1.
+ unsigned VecTyIndex = 0;
+ if (IID == Intrinsic::vector_reduce_fadd ||
+ IID == Intrinsic::vector_reduce_fmul)
+ VecTyIndex = 1;
+ assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
+ VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
+ }
+ // Library call cost - other than size, make it expensive.
+ unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
SmallVector<unsigned, 2> ISDs;
- unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
switch (IID) {
default: {
// Assume that we need to scalarize this intrinsic.
@@ -1327,13 +1472,15 @@ public:
break;
case Intrinsic::minnum:
ISDs.push_back(ISD::FMINNUM);
- if (FMF.noNaNs())
- ISDs.push_back(ISD::FMINIMUM);
break;
case Intrinsic::maxnum:
ISDs.push_back(ISD::FMAXNUM);
- if (FMF.noNaNs())
- ISDs.push_back(ISD::FMAXIMUM);
+ break;
+ case Intrinsic::minimum:
+ ISDs.push_back(ISD::FMINIMUM);
+ break;
+ case Intrinsic::maximum:
+ ISDs.push_back(ISD::FMAXIMUM);
break;
case Intrinsic::copysign:
ISDs.push_back(ISD::FCOPYSIGN);
@@ -1375,6 +1522,7 @@ public:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::sideeffect:
+ case Intrinsic::pseudoprobe:
return 0;
case Intrinsic::masked_store: {
Type *Ty = Tys[0];
@@ -1388,50 +1536,72 @@ public:
return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
CostKind);
}
- case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::vector_reduce_add:
return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
/*IsPairwiseForm=*/false,
CostKind);
- case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::vector_reduce_mul:
return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
/*IsPairwiseForm=*/false,
CostKind);
- case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::vector_reduce_and:
return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
/*IsPairwiseForm=*/false,
CostKind);
- case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::vector_reduce_or:
return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy,
/*IsPairwiseForm=*/false,
CostKind);
- case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::vector_reduce_xor:
return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
/*IsPairwiseForm=*/false,
CostKind);
- case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::vector_reduce_fadd:
// FIXME: Add new flag for cost of strict reductions.
return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
/*IsPairwiseForm=*/false,
CostKind);
- case Intrinsic::experimental_vector_reduce_v2_fmul:
+ case Intrinsic::vector_reduce_fmul:
// FIXME: Add new flag for cost of strict reductions.
return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
/*IsPairwiseForm=*/false,
CostKind);
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
return thisT()->getMinMaxReductionCost(
VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
/*IsPairwiseForm=*/false,
/*IsUnsigned=*/false, CostKind);
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
return thisT()->getMinMaxReductionCost(
VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
/*IsPairwiseForm=*/false,
/*IsUnsigned=*/true, CostKind);
+ case Intrinsic::abs:
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin: {
+ // abs(X) = select(icmp(X,0),X,sub(0,X))
+ // minmax(X,Y) = select(icmp(X,Y),X,Y)
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ unsigned Cost = 0;
+ // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code.
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ // TODO: Should we add an OperandValueProperties::OP_Zero property?
+ if (IID == Intrinsic::abs)
+ Cost += thisT()->getArithmeticInstrCost(
+ BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
+ return Cost;
+ }
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat: {
Type *CondTy = RetTy->getWithNewBitWidth(1);
@@ -1447,10 +1617,12 @@ public:
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
ScalarizationCostPassed);
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- CostKind);
- Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
- CondTy, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += 2 * thisT()->getCmpSelInstrCost(
+ BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
return Cost;
}
case Intrinsic::uadd_sat:
@@ -1466,8 +1638,9 @@ public:
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
ScalarizationCostPassed);
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
- CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
return Cost;
}
case Intrinsic::smul_fix:
@@ -1477,13 +1650,14 @@ public:
unsigned ExtOp =
IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+ TTI::CastContextHint CCH = TTI::CastContextHint::None;
unsigned Cost = 0;
- Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CostKind);
+ Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
Cost +=
thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
- CostKind);
+ CCH, CostKind);
Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
CostKind, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue);
@@ -1511,10 +1685,12 @@ public:
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
unsigned Cost = 0;
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
- Cost += 3 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
- OverflowTy, CostKind);
- Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, OverflowTy,
- OverflowTy, CostKind);
+ Cost += 3 * thisT()->getCmpSelInstrCost(
+ Instruction::ICmp, SumTy, OverflowTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += 2 * thisT()->getCmpSelInstrCost(
+ Instruction::Select, OverflowTy, OverflowTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
CostKind);
return Cost;
@@ -1529,8 +1705,9 @@ public:
unsigned Cost = 0;
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
- OverflowTy, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
return Cost;
}
case Intrinsic::smul_with_overflow:
@@ -1542,13 +1719,14 @@ public:
unsigned ExtOp =
IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+ TTI::CastContextHint CCH = TTI::CastContextHint::None;
unsigned Cost = 0;
- Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CostKind);
+ Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
Cost +=
thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
- CostKind);
+ CCH, CostKind);
Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy,
CostKind, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue);
@@ -1558,8 +1736,9 @@ public:
CostKind, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue);
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
- OverflowTy, CostKind);
+ Cost +=
+ thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
return Cost;
}
case Intrinsic::ctpop:
@@ -1568,7 +1747,12 @@ public:
// library call but still not a cheap instruction.
SingleCallCost = TargetTransformInfo::TCC_Expensive;
break;
- // FIXME: ctlz, cttz, ...
+ case Intrinsic::ctlz:
+ ISDs.push_back(ISD::CTLZ);
+ break;
+ case Intrinsic::cttz:
+ ISDs.push_back(ISD::CTTZ);
+ break;
case Intrinsic::bswap:
ISDs.push_back(ISD::BSWAP);
break;
@@ -1604,7 +1788,7 @@ public:
}
}
- auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
+ auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
if (MinLegalCostI != LegalCost.end())
return *MinLegalCostI;
@@ -1801,9 +1985,10 @@ public:
(IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
Ty, NumVecElts, SubTy);
MinMaxCost +=
- thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) +
+ thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind) +
thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
- CostKind);
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
Ty = SubTy;
++LongVectorCount;
}
@@ -1825,15 +2010,37 @@ public:
thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty);
MinMaxCost +=
NumReduxLevels *
- (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
+ (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind) +
thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
- CostKind));
+ CmpInst::BAD_ICMP_PREDICATE, CostKind));
// The last min/max should be in vector registers and we counted it above.
// So just need a single extractelement.
return ShuffleCost + MinMaxCost +
thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
}
+ InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
+ Type *ResTy, VectorType *Ty,
+ TTI::TargetCostKind CostKind) {
+ // Without any native support, this is equivalent to the cost of
+ // vecreduce.add(ext) or if IsMLA vecreduce.add(mul(ext, ext))
+ VectorType *ExtTy = VectorType::get(ResTy, Ty);
+ unsigned RedCost = thisT()->getArithmeticReductionCost(
+ Instruction::Add, ExtTy, false, CostKind);
+ unsigned MulCost = 0;
+ unsigned ExtCost = thisT()->getCastInstrCost(
+ IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
+ TTI::CastContextHint::None, CostKind);
+ if (IsMLA) {
+ MulCost =
+ thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+ ExtCost *= 2;
+ }
+
+ return RedCost + MulCost + ExtCost;
+ }
+
unsigned getVectorSplitCost() { return 1; }
/// @}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CalcSpillWeights.h
index 9b8b7324f30a..78dae81f596e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CalcSpillWeights.h
@@ -44,64 +44,60 @@ class VirtRegMap;
/// Calculate auxiliary information for a virtual register such as its
/// spill weight and allocation hint.
class VirtRegAuxInfo {
- public:
- using NormalizingFn = float (*)(float, unsigned, unsigned);
-
- private:
MachineFunction &MF;
LiveIntervals &LIS;
- VirtRegMap *VRM;
+ const VirtRegMap &VRM;
const MachineLoopInfo &Loops;
const MachineBlockFrequencyInfo &MBFI;
- DenseMap<unsigned, float> Hint;
- NormalizingFn normalize;
public:
- VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis,
- VirtRegMap *vrm, const MachineLoopInfo &loops,
- const MachineBlockFrequencyInfo &mbfi,
- NormalizingFn norm = normalizeSpillWeight)
- : MF(mf), LIS(lis), VRM(vrm), Loops(loops), MBFI(mbfi), normalize(norm) {}
+ VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS,
+ const VirtRegMap &VRM, const MachineLoopInfo &Loops,
+ const MachineBlockFrequencyInfo &MBFI)
+ : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {}
+
+ virtual ~VirtRegAuxInfo() = default;
/// (re)compute li's spill weight and allocation hint.
- void calculateSpillWeightAndHint(LiveInterval &li);
+ void calculateSpillWeightAndHint(LiveInterval &LI);
- /// Compute future expected spill weight of a split artifact of li
+ /// Compute future expected spill weight of a split artifact of LI
/// that will span between start and end slot indexes.
- /// \param li The live interval to be split.
- /// \param start The expected begining of the split artifact. Instructions
+ /// \param LI The live interval to be split.
+ /// \param Start The expected beginning of the split artifact. Instructions
/// before start will not affect the weight.
- /// \param end The expected end of the split artifact. Instructions
+ /// \param End The expected end of the split artifact. Instructions
/// after end will not affect the weight.
/// \return The expected spill weight of the split artifact. Returns
- /// negative weight for unspillable li.
- float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end);
+ /// negative weight for unspillable LI.
+ float futureWeight(LiveInterval &LI, SlotIndex Start, SlotIndex End);
+
+ /// Compute spill weights and allocation hints for all virtual register
+ /// live intervals.
+ void calculateSpillWeightsAndHints();
+ protected:
/// Helper function for weight calculations.
- /// (Re)compute li's spill weight and allocation hint, or, for non null
+ /// (Re)compute LI's spill weight and allocation hint, or, for non null
/// start and end - compute future expected spill weight of a split
- /// artifact of li that will span between start and end slot indexes.
- /// \param li The live interval for which to compute the weight.
- /// \param start The expected begining of the split artifact. Instructions
+ /// artifact of LI that will span between start and end slot indexes.
+ /// \param LI The live interval for which to compute the weight.
+ /// \param Start The expected beginning of the split artifact. Instructions
/// before start will not affect the weight. Relevant for
/// weight calculation of future split artifact.
- /// \param end The expected end of the split artifact. Instructions
+ /// \param End The expected end of the split artifact. Instructions
/// after end will not affect the weight. Relevant for
/// weight calculation of future split artifact.
- /// \return The spill weight. Returns negative weight for unspillable li.
- float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr,
- SlotIndex *end = nullptr);
- };
-
- /// Compute spill weights and allocation hints for all virtual register
- /// live intervals.
- void calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF,
- VirtRegMap *VRM,
- const MachineLoopInfo &MLI,
- const MachineBlockFrequencyInfo &MBFI,
- VirtRegAuxInfo::NormalizingFn norm =
- normalizeSpillWeight);
+ /// \return The spill weight. Returns negative weight for unspillable LI.
+ float weightCalcHelper(LiveInterval &LI, SlotIndex *Start = nullptr,
+ SlotIndex *End = nullptr);
+ /// Weight normalization function.
+ virtual float normalize(float UseDefFreq, unsigned Size,
+ unsigned NumInstr) {
+ return normalizeSpillWeight(UseDefFreq, Size, NumInstr);
+ }
+ };
} // end namespace llvm
#endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h
index 8ebe788ac360..2fe4e371263b 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -25,6 +25,7 @@
namespace llvm {
class CCState;
+class MachineFunction;
class MVT;
class TargetRegisterInfo;
@@ -339,6 +340,11 @@ public:
return Regs.size();
}
+ void DeallocateReg(MCPhysReg Reg) {
+ assert(isAllocated(Reg) && "Trying to deallocate an unallocated register");
+ MarkUnallocated(Reg);
+ }
+
/// AllocateReg - Attempt to allocate one register. If it is not available,
/// return zero. Otherwise, return the register, marking it and any aliases
/// as allocated.
@@ -432,10 +438,7 @@ public:
return AllocateStack(Size, Align(Alignment));
}
- void ensureMaxAlignment(Align Alignment) {
- if (!AnalyzingMustTailForwardedRegs)
- MF.getFrameInfo().ensureMaxAlignment(Alignment);
- }
+ void ensureMaxAlignment(Align Alignment);
/// Version of AllocateStack with extra register to be shadowed.
LLVM_ATTRIBUTE_DEPRECATED(unsigned AllocateStack(unsigned Size,
@@ -572,6 +575,8 @@ public:
private:
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void MarkAllocated(MCPhysReg Reg);
+
+ void MarkUnallocated(MCPhysReg Reg);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
new file mode 100644
index 000000000000..893bc6e013f4
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -0,0 +1,1144 @@
+//===- Construction of codegen pass pipelines ------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Interfaces for registering analysis passes, producing common pass manager
+/// configurations, and parsing of pass pipelines.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CODEGENPASSBUILDER_H
+#define LLVM_CODEGEN_CODEGENPASSBUILDER_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
+#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePassManager.h"
+#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/CodeGen/UnreachableBlockElim.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/CGPassBuilderOption.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/ConstantHoisting.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
+#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
+#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
+#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Transforms/Utils/LowerInvoke.h"
+#include <cassert>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+namespace llvm {
+
+// FIXME: Dummy target independent passes definitions that have not yet been
+// ported to new pass manager. Once they do, remove these.
+#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \
+ template <typename... Ts> PASS_NAME(Ts &&...) {} \
+ PreservedAnalyses run(Function &, FunctionAnalysisManager &) { \
+ return PreservedAnalyses::all(); \
+ } \
+ };
+#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \
+ template <typename... Ts> PASS_NAME(Ts &&...) {} \
+ PreservedAnalyses run(Module &, ModuleAnalysisManager &) { \
+ return PreservedAnalyses::all(); \
+ } \
+ };
+#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \
+ template <typename... Ts> PASS_NAME(Ts &&...) {} \
+ Error run(Module &, MachineFunctionAnalysisManager &) { \
+ return Error::success(); \
+ } \
+ PreservedAnalyses run(MachineFunction &, \
+ MachineFunctionAnalysisManager &) { \
+ llvm_unreachable("this api is to make new PM api happy"); \
+ } \
+ static AnalysisKey Key; \
+ };
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \
+ template <typename... Ts> PASS_NAME(Ts &&...) {} \
+ PreservedAnalyses run(MachineFunction &, \
+ MachineFunctionAnalysisManager &) { \
+ return PreservedAnalyses::all(); \
+ } \
+ static AnalysisKey Key; \
+ };
+#include "MachinePassRegistry.def"
+
+/// This class provides access to building LLVM's passes.
+///
+/// Its members provide the baseline state available to passes during their
+/// construction. The \c MachinePassRegistry.def file specifies how to construct
+/// all of the built-in passes, and those may reference these members during
+/// construction.
+template <typename DerivedT> class CodeGenPassBuilder {
+public:
+ explicit CodeGenPassBuilder(LLVMTargetMachine &TM, CGPassBuilderOption Opts,
+ PassInstrumentationCallbacks *PIC)
+ : TM(TM), Opt(Opts), PIC(PIC) {
+ // Target could set CGPassBuilderOption::MISchedPostRA to true to achieve
+ // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID)
+
+ // Target should override TM.Options.EnableIPRA in their target-specific
+ // LLVMTM ctor. See TargetMachine::setGlobalISel for example.
+ if (Opt.EnableIPRA)
+ TM.Options.EnableIPRA = *Opt.EnableIPRA;
+
+ if (Opt.EnableGlobalISelAbort)
+ TM.Options.GlobalISelAbort = *Opt.EnableGlobalISelAbort;
+
+ if (!Opt.OptimizeRegAlloc)
+ Opt.OptimizeRegAlloc = getOptLevel() != CodeGenOpt::None;
+ }
+
+ Error buildPipeline(ModulePassManager &MPM, MachineFunctionPassManager &MFPM,
+ raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType) const;
+
+ void registerModuleAnalyses(ModuleAnalysisManager &) const;
+ void registerFunctionAnalyses(FunctionAnalysisManager &) const;
+ void registerMachineFunctionAnalyses(MachineFunctionAnalysisManager &) const;
+ std::pair<StringRef, bool> getPassNameFromLegacyName(StringRef) const;
+
+ void registerAnalyses(MachineFunctionAnalysisManager &MFAM) const {
+ registerModuleAnalyses(*MFAM.MAM);
+ registerFunctionAnalyses(*MFAM.FAM);
+ registerMachineFunctionAnalyses(MFAM);
+ }
+
+ PassInstrumentationCallbacks *getPassInstrumentationCallbacks() const {
+ return PIC;
+ }
+
+protected:
+ template <typename PassT> using has_key_t = decltype(PassT::Key);
+
+ template <typename PassT>
+ using is_module_pass_t = decltype(std::declval<PassT &>().run(
+ std::declval<Module &>(), std::declval<ModuleAnalysisManager &>()));
+
+ template <typename PassT>
+ using is_function_pass_t = decltype(std::declval<PassT &>().run(
+ std::declval<Function &>(), std::declval<FunctionAnalysisManager &>()));
+
+ // Function object to maintain state while adding codegen IR passes.
+ class AddIRPass {
+ public:
+ AddIRPass(ModulePassManager &MPM, bool DebugPM, bool Check = true)
+ : MPM(MPM), FPM(DebugPM) {
+ if (Check)
+ AddingFunctionPasses = false;
+ }
+ ~AddIRPass() {
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ }
+
+ // Add Function Pass
+ template <typename PassT>
+ std::enable_if_t<is_detected<is_function_pass_t, PassT>::value>
+ operator()(PassT &&Pass) {
+ if (AddingFunctionPasses && !*AddingFunctionPasses)
+ AddingFunctionPasses = true;
+ FPM.addPass(std::forward<PassT>(Pass));
+ }
+
+ // Add Module Pass
+ template <typename PassT>
+ std::enable_if_t<is_detected<is_module_pass_t, PassT>::value &&
+ !is_detected<is_function_pass_t, PassT>::value>
+ operator()(PassT &&Pass) {
+ assert((!AddingFunctionPasses || !*AddingFunctionPasses) &&
+ "could not add module pass after adding function pass");
+ MPM.addPass(std::forward<PassT>(Pass));
+ }
+
+ private:
+ ModulePassManager &MPM;
+ FunctionPassManager FPM;
+ // The codegen IR pipeline are mostly function passes with the exceptions of
+ // a few loop and module passes. `AddingFunctionPasses` make sures that
+ // we could only add module passes at the beginning of the pipeline. Once
+ // we begin adding function passes, we could no longer add module passes.
+ // This special-casing introduces less adaptor passes. If we have the need
+ // of adding module passes after function passes, we could change the
+ // implementation to accommodate that.
+ Optional<bool> AddingFunctionPasses;
+ };
+
+ // Function object to maintain state while adding codegen machine passes.
+ class AddMachinePass {
+ public:
+ AddMachinePass(MachineFunctionPassManager &PM) : PM(PM) {}
+
+ template <typename PassT> void operator()(PassT &&Pass) {
+ static_assert(
+ is_detected<has_key_t, PassT>::value,
+ "Machine function pass must define a static member variable `Key`.");
+ for (auto &C : BeforeCallbacks)
+ if (!C(&PassT::Key))
+ return;
+ PM.addPass(std::forward<PassT>(Pass));
+ for (auto &C : AfterCallbacks)
+ C(&PassT::Key);
+ }
+
+ template <typename PassT> void insertPass(AnalysisKey *ID, PassT Pass) {
+ AfterCallbacks.emplace_back(
+ [this, ID, Pass = std::move(Pass)](AnalysisKey *PassID) {
+ if (PassID == ID)
+ this->PM.addPass(std::move(Pass));
+ });
+ }
+
+ void disablePass(AnalysisKey *ID) {
+ BeforeCallbacks.emplace_back(
+ [ID](AnalysisKey *PassID) { return PassID != ID; });
+ }
+
+ MachineFunctionPassManager releasePM() { return std::move(PM); }
+
+ private:
+ MachineFunctionPassManager &PM;
+ SmallVector<llvm::unique_function<bool(AnalysisKey *)>, 4> BeforeCallbacks;
+ SmallVector<llvm::unique_function<void(AnalysisKey *)>, 4> AfterCallbacks;
+ };
+
+ LLVMTargetMachine &TM;
+ CGPassBuilderOption Opt;
+ PassInstrumentationCallbacks *PIC;
+
+ /// Target override these hooks to parse target-specific analyses.
+ void registerTargetAnalysis(ModuleAnalysisManager &) const {}
+ void registerTargetAnalysis(FunctionAnalysisManager &) const {}
+ void registerTargetAnalysis(MachineFunctionAnalysisManager &) const {}
+ std::pair<StringRef, bool> getTargetPassNameFromLegacyName(StringRef) const {
+ return {"", false};
+ }
+
+ template <typename TMC> TMC &getTM() const { return static_cast<TMC &>(TM); }
+ CodeGenOpt::Level getOptLevel() const { return TM.getOptLevel(); }
+
+ /// Check whether or not GlobalISel should abort on error.
+ /// When this is disabled, GlobalISel will fall back on SDISel instead of
+ /// erroring out.
+ bool isGlobalISelAbortEnabled() const {
+ return TM.Options.GlobalISelAbort == GlobalISelAbortMode::Enable;
+ }
+
+ /// Check whether or not a diagnostic should be emitted when GlobalISel
+ /// uses the fallback path. In other words, it will emit a diagnostic
+ /// when GlobalISel failed and isGlobalISelAbortEnabled is false.
+ bool reportDiagnosticWhenGlobalISelFallback() const {
+ return TM.Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag;
+ }
+
+ /// addInstSelector - This method should install an instruction selector pass,
+ /// which converts from LLVM code to machine instructions.
+ Error addInstSelector(AddMachinePass &) const {
+ return make_error<StringError>("addInstSelector is not overridden",
+ inconvertibleErrorCode());
+ }
+
+ /// Add passes that optimize instruction level parallelism for out-of-order
+ /// targets. These passes are run while the machine code is still in SSA
+ /// form, so they can use MachineTraceMetrics to control their heuristics.
+ ///
+ /// All passes added here should preserve the MachineDominatorTree,
+ /// MachineLoopInfo, and MachineTraceMetrics analyses.
+ void addILPOpts(AddMachinePass &) const {}
+
+ /// This method may be implemented by targets that want to run passes
+ /// immediately before register allocation.
+ void addPreRegAlloc(AddMachinePass &) const {}
+
+ /// addPreRewrite - Add passes to the optimized register allocation pipeline
+ /// after register allocation is complete, but before virtual registers are
+ /// rewritten to physical registers.
+ ///
+ /// These passes must preserve VirtRegMap and LiveIntervals, and when running
+ /// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix.
+ /// When these passes run, VirtRegMap contains legal physreg assignments for
+ /// all virtual registers.
+ ///
+ /// Note if the target overloads addRegAssignAndRewriteOptimized, this may not
+ /// be honored. This is also not generally used for the the fast variant,
+ /// where the allocation and rewriting are done in one pass.
+ void addPreRewrite(AddMachinePass &) const {}
+
+ /// Add passes to be run immediately after virtual registers are rewritten
+ /// to physical registers.
+ void addPostRewrite(AddMachinePass &) const {}
+
+ /// This method may be implemented by targets that want to run passes after
+ /// register allocation pass pipeline but before prolog-epilog insertion.
+ void addPostRegAlloc(AddMachinePass &) const {}
+
+ /// This method may be implemented by targets that want to run passes after
+ /// prolog-epilog insertion and before the second instruction scheduling pass.
+ void addPreSched2(AddMachinePass &) const {}
+
+ /// This pass may be implemented by targets that want to run passes
+ /// immediately before machine code is emitted.
+ void addPreEmitPass(AddMachinePass &) const {}
+
+ /// Targets may add passes immediately before machine code is emitted in this
+ /// callback. This is called even later than `addPreEmitPass`.
+ // FIXME: Rename `addPreEmitPass` to something more sensible given its actual
+ // position and remove the `2` suffix here as this callback is what
+ // `addPreEmitPass` *should* be but in reality isn't.
+ void addPreEmitPass2(AddMachinePass &) const {}
+
+ /// {{@ For GlobalISel
+ ///
+
+ /// addPreISel - This method should add any "last minute" LLVM->LLVM
+ /// passes (which are run just before instruction selector).
+ void addPreISel(AddIRPass &) const {
+ llvm_unreachable("addPreISel is not overridden");
+ }
+
+ /// This method should install an IR translator pass, which converts from
+ /// LLVM code to machine instructions with possibly generic opcodes.
+ Error addIRTranslator(AddMachinePass &) const {
+ return make_error<StringError>("addIRTranslator is not overridden",
+ inconvertibleErrorCode());
+ }
+
+ /// This method may be implemented by targets that want to run passes
+ /// immediately before legalization.
+ void addPreLegalizeMachineIR(AddMachinePass &) const {}
+
+ /// This method should install a legalize pass, which converts the instruction
+ /// sequence into one that can be selected by the target.
+ Error addLegalizeMachineIR(AddMachinePass &) const {
+ return make_error<StringError>("addLegalizeMachineIR is not overridden",
+ inconvertibleErrorCode());
+ }
+
+ /// This method may be implemented by targets that want to run passes
+ /// immediately before the register bank selection.
+ void addPreRegBankSelect(AddMachinePass &) const {}
+
+ /// This method should install a register bank selector pass, which
+ /// assigns register banks to virtual registers without a register
+ /// class or register banks.
+ Error addRegBankSelect(AddMachinePass &) const {
+ return make_error<StringError>("addRegBankSelect is not overridden",
+ inconvertibleErrorCode());
+ }
+
+ /// This method may be implemented by targets that want to run passes
+ /// immediately before the (global) instruction selection.
+ void addPreGlobalInstructionSelect(AddMachinePass &) const {}
+
+ /// This method should install a (global) instruction selector pass, which
+ /// converts possibly generic instructions to fully target-specific
+ /// instructions, thereby constraining all generic virtual registers to
+ /// register classes.
+ Error addGlobalInstructionSelect(AddMachinePass &) const {
+ return make_error<StringError>(
+ "addGlobalInstructionSelect is not overridden",
+ inconvertibleErrorCode());
+ }
+ /// @}}
+
+ /// High level function that adds all passes necessary to go from llvm IR
+ /// representation to the MI representation.
+ /// Adds IR based lowering and target specific optimization passes and finally
+ /// the core instruction selection passes.
+ /// \returns true if an error occurred, false otherwise.
+ void addISelPasses(AddIRPass &) const;
+
+ /// Add the actual instruction selection passes. This does not include
+ /// preparation passes on IR.
+ Error addCoreISelPasses(AddMachinePass &) const;
+
+ /// Add the complete, standard set of LLVM CodeGen passes.
+ /// Fully developed targets will not generally override this.
+ Error addMachinePasses(AddMachinePass &) const;
+
+ /// Add passes to lower exception handling for the code generator.
+ void addPassesToHandleExceptions(AddIRPass &) const;
+
+ /// Add common target configurable passes that perform LLVM IR to IR
+ /// transforms following machine independent optimization.
+ void addIRPasses(AddIRPass &) const;
+
+ /// Add pass to prepare the LLVM IR for code generation. This should be done
+ /// before exception handling preparation passes.
+ void addCodeGenPrepare(AddIRPass &) const;
+
+ /// Add common passes that perform LLVM IR to IR transforms in preparation for
+ /// instruction selection.
+ void addISelPrepare(AddIRPass &) const;
+
+ /// Methods with trivial inline returns are convenient points in the common
+ /// codegen pass pipeline where targets may insert passes. Methods with
+ /// out-of-line standard implementations are major CodeGen stages called by
+ /// addMachinePasses. Some targets may override major stages when inserting
+ /// passes is insufficient, but maintaining overriden stages is more work.
+ ///
+
+ /// addMachineSSAOptimization - Add standard passes that optimize machine
+ /// instructions in SSA form.
+ void addMachineSSAOptimization(AddMachinePass &) const;
+
+ /// addFastRegAlloc - Add the minimum set of target-independent passes that
+ /// are required for fast register allocation.
+ Error addFastRegAlloc(AddMachinePass &) const;
+
+ /// addOptimizedRegAlloc - Add passes related to register allocation.
+ /// LLVMTargetMachine provides standard regalloc passes for most targets.
+ void addOptimizedRegAlloc(AddMachinePass &) const;
+
+ /// Add passes that optimize machine instructions after register allocation.
+ void addMachineLateOptimization(AddMachinePass &) const;
+
+ /// addGCPasses - Add late codegen passes that analyze code for garbage
+ /// collection. This should return true if GC info should be printed after
+ /// these passes.
+ void addGCPasses(AddMachinePass &) const {}
+
+ /// Add standard basic block placement passes.
+ void addBlockPlacement(AddMachinePass &) const;
+
+ using CreateMCStreamer =
+ std::function<Expected<std::unique_ptr<MCStreamer>>(MCContext &)>;
+ void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const {
+ llvm_unreachable("addAsmPrinter is not overridden");
+ }
+
+ /// Utilities for targets to add passes to the pass manager.
+ ///
+
+ /// createTargetRegisterAllocator - Create the register allocator pass for
+ /// this target at the current optimization level.
+ void addTargetRegisterAllocator(AddMachinePass &, bool Optimized) const;
+
+ /// addMachinePasses helper to create the target-selected or overriden
+ /// regalloc pass.
+ void addRegAllocPass(AddMachinePass &, bool Optimized) const;
+
+ /// Add core register alloator passes which do the actual register assignment
+ /// and rewriting. \returns true if any passes were added.
+ Error addRegAssignmentFast(AddMachinePass &) const;
+ Error addRegAssignmentOptimized(AddMachinePass &) const;
+
+private:
+ DerivedT &derived() { return static_cast<DerivedT &>(*this); }
+ const DerivedT &derived() const {
+ return static_cast<const DerivedT &>(*this);
+ }
+};
+
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::buildPipeline(
+ ModulePassManager &MPM, MachineFunctionPassManager &MFPM,
+ raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType) const {
+ AddIRPass addIRPass(MPM, Opt.DebugPM);
+ addISelPasses(addIRPass);
+
+ AddMachinePass addPass(MFPM);
+ if (auto Err = addCoreISelPasses(addPass))
+ return std::move(Err);
+
+ if (auto Err = derived().addMachinePasses(addPass))
+ return std::move(Err);
+
+ derived().addAsmPrinter(
+ addPass, [this, &Out, DwoOut, FileType](MCContext &Ctx) {
+ return this->TM.createMCStreamer(Out, DwoOut, FileType, Ctx);
+ });
+
+ addPass(FreeMachineFunctionPass());
+ return Error::success();
+}
+
+static inline AAManager registerAAAnalyses(CFLAAType UseCFLAA) {
+ AAManager AA;
+
+ // The order in which these are registered determines their priority when
+ // being queried.
+
+ switch (UseCFLAA) {
+ case CFLAAType::Steensgaard:
+ AA.registerFunctionAnalysis<CFLSteensAA>();
+ break;
+ case CFLAAType::Andersen:
+ AA.registerFunctionAnalysis<CFLAndersAA>();
+ break;
+ case CFLAAType::Both:
+ AA.registerFunctionAnalysis<CFLAndersAA>();
+ AA.registerFunctionAnalysis<CFLSteensAA>();
+ break;
+ default:
+ break;
+ }
+
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ AA.registerFunctionAnalysis<TypeBasedAA>();
+ AA.registerFunctionAnalysis<ScopedNoAliasAA>();
+ AA.registerFunctionAnalysis<BasicAA>();
+
+ return AA;
+}
+
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::registerModuleAnalyses(
+ ModuleAnalysisManager &MAM) const {
+#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \
+ MAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; });
+#include "MachinePassRegistry.def"
+ derived().registerTargetAnalysis(MAM);
+}
+
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::registerFunctionAnalyses(
+ FunctionAnalysisManager &FAM) const {
+ FAM.registerPass([this] { return registerAAAnalyses(this->Opt.UseCFLAA); });
+
+#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \
+ FAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; });
+#include "MachinePassRegistry.def"
+ derived().registerTargetAnalysis(FAM);
+}
+
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::registerMachineFunctionAnalyses(
+ MachineFunctionAnalysisManager &MFAM) const {
+#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \
+ MFAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; });
+#include "MachinePassRegistry.def"
+ derived().registerTargetAnalysis(MFAM);
+}
+
+// FIXME: For new PM, use pass name directly in commandline seems good.
+// Translate stringfied pass name to its old commandline name. Returns the
+// matching legacy name and a boolean value indicating if the pass is a machine
+// pass.
+template <typename Derived>
+std::pair<StringRef, bool>
+CodeGenPassBuilder<Derived>::getPassNameFromLegacyName(StringRef Name) const {
+ std::pair<StringRef, bool> Ret;
+ if (Name.empty())
+ return Ret;
+
+#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, false};
+#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, false};
+#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, false};
+#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, false};
+#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, true};
+#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, true};
+#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, true};
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ if (Name == NAME) \
+ Ret = {#PASS_NAME, true};
+#include "llvm/CodeGen/MachinePassRegistry.def"
+
+ if (Ret.first.empty())
+ Ret = derived().getTargetPassNameFromLegacyName(Name);
+
+ if (Ret.first.empty())
+ report_fatal_error(Twine('\"') + Twine(Name) +
+ Twine("\" pass could not be found."));
+
+ return Ret;
+}
+
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addISelPasses(AddIRPass &addPass) const {
+ if (TM.useEmulatedTLS())
+ addPass(LowerEmuTLSPass());
+
+ addPass(PreISelIntrinsicLoweringPass());
+
+ derived().addIRPasses(addPass);
+ derived().addCodeGenPrepare(addPass);
+ addPassesToHandleExceptions(addPass);
+ derived().addISelPrepare(addPass);
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!Opt.DisableVerify)
+ addPass(VerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (getOptLevel() != CodeGenOpt::None && !Opt.DisableLSR) {
+ addPass(createFunctionToLoopPassAdaptor(
+ LoopStrengthReducePass(), /*UseMemorySSA*/ true, Opt.DebugPM));
+ // FIXME: use -stop-after so we could remove PrintLSR
+ if (Opt.PrintLSR)
+ addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
+ }
+
+ if (getOptLevel() != CodeGenOpt::None) {
+ // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
+ // loads and compares. ExpandMemCmpPass then tries to expand those calls
+ // into optimally-sized loads and compares. The transforms are enabled by a
+ // target lowering hook.
+ if (!Opt.DisableMergeICmps)
+ addPass(MergeICmpsPass());
+ addPass(ExpandMemCmpPass());
+ }
+
+ // Run GC lowering passes for builtin collectors
+ // TODO: add a pass insertion point here
+ addPass(GCLoweringPass());
+ addPass(ShadowStackGCLoweringPass());
+ addPass(LowerConstantIntrinsicsPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ addPass(UnreachableBlockElimPass());
+
+ // Prepare expensive constants for SelectionDAG.
+ if (getOptLevel() != CodeGenOpt::None && !Opt.DisableConstantHoisting)
+ addPass(ConstantHoistingPass());
+
+ if (getOptLevel() != CodeGenOpt::None && !Opt.DisablePartialLibcallInlining)
+ addPass(PartiallyInlineLibCallsPass());
+
+ // Instrument function entry and exit, e.g. with calls to mcount().
+ addPass(EntryExitInstrumenterPass(/*PostInlining=*/true));
+
+ // Add scalarization of target's unsupported masked memory intrinsics pass.
+ // the unsupported intrinsic will be replaced with a chain of basic blocks,
+ // that stores/loads element one-by-one if the appropriate mask bit is set.
+ addPass(ScalarizeMaskedMemIntrinPass());
+
+ // Expand reduction intrinsics into shuffle sequences if the target wants to.
+ addPass(ExpandReductionsPass());
+}
+
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addPassesToHandleExceptions(
+ AddIRPass &addPass) const {
+ const MCAsmInfo *MCAI = TM.getMCAsmInfo();
+ assert(MCAI && "No MCAsmInfo");
+ switch (MCAI->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ addPass(SjLjEHPreparePass());
+ LLVM_FALLTHROUGH;
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::AIX:
+ addPass(DwarfEHPass(getOptLevel()));
+ break;
+ case ExceptionHandling::WinEH:
+ // We support using both GCC-style and MSVC-style exceptions on Windows, so
+ // add both preparation passes. Each pass will only actually run if it
+ // recognizes the personality function.
+ addPass(WinEHPass());
+ addPass(DwarfEHPass(getOptLevel()));
+ break;
+ case ExceptionHandling::Wasm:
+ // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs
+ // on catchpads and cleanuppads because it does not outline them into
+ // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we
+ // should remove PHIs there.
+ addPass(WinEHPass(/*DemoteCatchSwitchPHIOnly=*/false));
+ addPass(WasmEHPass());
+ break;
+ case ExceptionHandling::None:
+ addPass(LowerInvokePass());
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ addPass(UnreachableBlockElimPass());
+ break;
+ }
+}
+
+/// Add pass to prepare the LLVM IR for code generation. This should be done
+/// before exception handling preparation passes.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addCodeGenPrepare(AddIRPass &addPass) const {
+ if (getOptLevel() != CodeGenOpt::None && !Opt.DisableCGP)
+ addPass(CodeGenPreparePass());
+ // TODO: Default ctor'd RewriteSymbolPass is no-op.
+ // addPass(RewriteSymbolPass());
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addISelPrepare(AddIRPass &addPass) const {
+ derived().addPreISel(addPass);
+
+ // Add both the safe stack and the stack protection passes: each of them will
+ // only protect functions that have corresponding attributes.
+ addPass(SafeStackPass());
+ addPass(StackProtectorPass());
+
+ if (Opt.PrintISelInput)
+ addPass(PrintFunctionPass(dbgs(),
+ "\n\n*** Final LLVM Code input to ISel ***\n"));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!Opt.DisableVerify)
+ addPass(VerifierPass());
+}
+
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addCoreISelPasses(
+ AddMachinePass &addPass) const {
+ // Enable FastISel with -fast-isel, but allow that to be overridden.
+ TM.setO0WantsFastISel(Opt.EnableFastISelOption.getValueOr(true));
+
+ // Determine an instruction selector.
+ enum class SelectorType { SelectionDAG, FastISel, GlobalISel };
+ SelectorType Selector;
+
+ if (Opt.EnableFastISelOption && *Opt.EnableFastISelOption == true)
+ Selector = SelectorType::FastISel;
+ else if ((Opt.EnableGlobalISelOption &&
+ *Opt.EnableGlobalISelOption == true) ||
+ (TM.Options.EnableGlobalISel &&
+ (!Opt.EnableGlobalISelOption ||
+ *Opt.EnableGlobalISelOption == false)))
+ Selector = SelectorType::GlobalISel;
+ else if (TM.getOptLevel() == CodeGenOpt::None && TM.getO0WantsFastISel())
+ Selector = SelectorType::FastISel;
+ else
+ Selector = SelectorType::SelectionDAG;
+
+ // Set consistently TM.Options.EnableFastISel and EnableGlobalISel.
+ if (Selector == SelectorType::FastISel) {
+ TM.setFastISel(true);
+ TM.setGlobalISel(false);
+ } else if (Selector == SelectorType::GlobalISel) {
+ TM.setFastISel(false);
+ TM.setGlobalISel(true);
+ }
+
+ // Add instruction selector passes.
+ if (Selector == SelectorType::GlobalISel) {
+ if (auto Err = derived().addIRTranslator(addPass))
+ return std::move(Err);
+
+ derived().addPreLegalizeMachineIR(addPass);
+
+ if (auto Err = derived().addLegalizeMachineIR(addPass))
+ return std::move(Err);
+
+ // Before running the register bank selector, ask the target if it
+ // wants to run some passes.
+ derived().addPreRegBankSelect(addPass);
+
+ if (auto Err = derived().addRegBankSelect(addPass))
+ return std::move(Err);
+
+ derived().addPreGlobalInstructionSelect(addPass);
+
+ if (auto Err = derived().addGlobalInstructionSelect(addPass))
+ return std::move(Err);
+
+ // Pass to reset the MachineFunction if the ISel failed.
+ addPass(ResetMachineFunctionPass(reportDiagnosticWhenGlobalISelFallback(),
+ isGlobalISelAbortEnabled()));
+
+ // Provide a fallback path when we do not want to abort on
+ // not-yet-supported input.
+ if (!isGlobalISelAbortEnabled())
+ if (auto Err = derived().addInstSelector(addPass))
+ return std::move(Err);
+
+ } else if (auto Err = derived().addInstSelector(addPass))
+ return std::move(Err);
+
+ // Expand pseudo-instructions emitted by ISel. Don't run the verifier before
+ // FinalizeISel.
+ addPass(FinalizeISelPass());
+
+ // // Print the instruction selected machine code...
+ // printAndVerify("After Instruction Selection");
+
+ return Error::success();
+}
+
+/// Add the complete set of target-independent postISel code generator passes.
+///
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
+///
+/// Any CodeGenPassBuilder<Derived>::addXX routine may be overriden by the
+/// Target. The addPre/Post methods with empty header implementations allow
+/// injecting target-specific fixups just before or after major stages.
+/// Additionally, targets have the flexibility to change pass order within a
+/// stage by overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addMachinePasses(
+ AddMachinePass &addPass) const {
+ // Add passes that optimize machine instructions in SSA form.
+ if (getOptLevel() != CodeGenOpt::None) {
+ derived().addMachineSSAOptimization(addPass);
+ } else {
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(LocalStackSlotPass());
+ }
+
+ if (TM.Options.EnableIPRA)
+ addPass(RegUsageInfoPropagationPass());
+
+ // Run pre-ra passes.
+ derived().addPreRegAlloc(addPass);
+
+ // Run register allocation and passes that are tightly coupled with it,
+ // including phi elimination and scheduling.
+ if (*Opt.OptimizeRegAlloc) {
+ derived().addOptimizedRegAlloc(addPass);
+ } else {
+ if (auto Err = derived().addFastRegAlloc(addPass))
+ return Err;
+ }
+
+ // Run post-ra passes.
+ derived().addPostRegAlloc(addPass);
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(PostRAMachineSinkingPass());
+ addPass(ShrinkWrapPass());
+ }
+
+ addPass(PrologEpilogInserterPass());
+
+ /// Add passes that optimize machine instructions after register allocation.
+ if (getOptLevel() != CodeGenOpt::None)
+ derived().addMachineLateOptimization(addPass);
+
+ // Expand pseudo instructions before second scheduling pass.
+ addPass(ExpandPostRAPseudosPass());
+
+ // Run pre-sched2 passes.
+ derived().addPreSched2(addPass);
+
+ if (Opt.EnableImplicitNullChecks)
+ addPass(ImplicitNullChecksPass());
+
+ // Second pass scheduler.
+ // Let Target optionally insert this pass by itself at some other
+ // point.
+ if (getOptLevel() != CodeGenOpt::None &&
+ !TM.targetSchedulesPostRAScheduling()) {
+ if (Opt.MISchedPostRA)
+ addPass(PostMachineSchedulerPass());
+ else
+ addPass(PostRASchedulerPass());
+ }
+
+ // GC
+ derived().addGCPasses(addPass);
+
+ // Basic block placement.
+ if (getOptLevel() != CodeGenOpt::None)
+ derived().addBlockPlacement(addPass);
+
+ // Insert before XRay Instrumentation.
+ addPass(FEntryInserterPass());
+
+ addPass(XRayInstrumentationPass());
+ addPass(PatchableFunctionPass());
+
+ derived().addPreEmitPass(addPass);
+
+ if (TM.Options.EnableIPRA)
+ // Collect register usage information and produce a register mask of
+ // clobbered registers, to be used to optimize call sites.
+ addPass(RegUsageInfoCollectorPass());
+
+ addPass(FuncletLayoutPass());
+
+ addPass(StackMapLivenessPass());
+ addPass(LiveDebugValuesPass());
+
+ if (TM.Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
+ Opt.EnableMachineOutliner != RunOutliner::NeverOutline) {
+ bool RunOnAllFunctions =
+ (Opt.EnableMachineOutliner == RunOutliner::AlwaysOutline);
+ bool AddOutliner = RunOnAllFunctions || TM.Options.SupportsDefaultOutlining;
+ if (AddOutliner)
+ addPass(MachineOutlinerPass(RunOnAllFunctions));
+ }
+
+ // Add passes that directly emit MI after all other MI passes.
+ derived().addPreEmitPass2(addPass);
+
+ return Error::success();
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addMachineSSAOptimization(
+ AddMachinePass &addPass) const {
+ // Pre-ra tail duplication.
+ addPass(EarlyTailDuplicatePass());
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(OptimizePHIsPass());
+
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(StackColoringPass());
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(LocalStackSlotPass());
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(DeadMachineInstructionElimPass());
+
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ derived().addILPOpts(addPass);
+
+ addPass(EarlyMachineLICMPass());
+ addPass(MachineCSEPass());
+
+ addPass(MachineSinkingPass());
+
+ addPass(PeepholeOptimizerPass());
+ // Clean-up the dead code that may have been generated by peephole
+ // rewriting.
+ addPass(DeadMachineInstructionElimPass());
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
+///
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addTargetRegisterAllocator(
+ AddMachinePass &addPass, bool Optimized) const {
+ if (Optimized)
+ addPass(RAGreedyPass());
+ else
+ addPass(RAFastPass());
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level. Different register allocators are
+/// defined as separate passes because they may require different analysis.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addRegAllocPass(AddMachinePass &addPass,
+ bool Optimized) const {
+ if (Opt.RegAlloc == RegAllocType::Default)
+ // With no -regalloc= override, ask the target for a regalloc pass.
+ derived().addTargetRegisterAllocator(addPass, Optimized);
+ else if (Opt.RegAlloc == RegAllocType::Basic)
+ addPass(RABasicPass());
+ else if (Opt.RegAlloc == RegAllocType::Fast)
+ addPass(RAFastPass());
+ else if (Opt.RegAlloc == RegAllocType::Greedy)
+ addPass(RAGreedyPass());
+ else if (Opt.RegAlloc == RegAllocType::PBQP)
+ addPass(RAPBQPPass());
+ else
+ llvm_unreachable("unknonwn register allocator type");
+}
+
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addRegAssignmentFast(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Default &&
+ Opt.RegAlloc != RegAllocType::Fast)
+ return make_error<StringError>(
+ "Must use fast (default) register allocator for unoptimized regalloc.",
+ inconvertibleErrorCode());
+
+ addRegAllocPass(addPass, false);
+ return Error::success();
+}
+
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addRegAssignmentOptimized(
+ AddMachinePass &addPass) const {
+ // Add the selected register allocation pass.
+ addRegAllocPass(addPass, true);
+
+ // Allow targets to change the register assignments before rewriting.
+ derived().addPreRewrite(addPass);
+
+ // Finally rewrite virtual registers.
+ addPass(VirtRegRewriterPass());
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(StackSlotColoringPass());
+
+ return Error::success();
+}
+
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+template <typename Derived>
+Error CodeGenPassBuilder<Derived>::addFastRegAlloc(
+ AddMachinePass &addPass) const {
+ addPass(PHIEliminationPass());
+ addPass(TwoAddressInstructionPass());
+ return derived().addRegAssignmentFast(addPass);
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addOptimizedRegAlloc(
+ AddMachinePass &addPass) const {
+ addPass(DetectDeadLanesPass());
+
+ addPass(ProcessImplicitDefsPass());
+
+ // Edge splitting is smarter with machine loop info.
+ addPass(PHIEliminationPass());
+
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (Opt.EarlyLiveIntervals)
+ addPass(LiveIntervalsPass());
+
+ addPass(TwoAddressInstructionPass());
+ addPass(RegisterCoalescerPass());
+
+ // The machine scheduler may accidentally create disconnected components
+ // when moving subregister definitions around, avoid this by splitting them to
+ // separate vregs before. Splitting can also improve reg. allocation quality.
+ addPass(RenameIndependentSubregsPass());
+
+ // PreRA instruction scheduling.
+ addPass(MachineSchedulerPass());
+
+ if (derived().addRegAssignmentOptimized(addPass)) {
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ derived().addPostRewrite(addPass);
+
+ // Copy propagate to forward register uses and try to eliminate COPYs that
+ // were not coalesced.
+ addPass(MachineCopyPropagationPass());
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(MachineLICMPass());
+ }
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addMachineLateOptimization(
+ AddMachinePass &addPass) const {
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ addPass(BranchFolderPass());
+
+ // Tail duplication.
+ // Note that duplicating tail just increases code size and degrades
+ // performance for targets that require Structured Control Flow.
+ // In addition it can also make CFG irreducible. Thus we disable it.
+ if (!TM.requiresStructuredCFG())
+ addPass(TailDuplicatePass());
+
+ // Copy propagation.
+ addPass(MachineCopyPropagationPass());
+}
+
+/// Add standard basic block placement passes.
+template <typename Derived>
+void CodeGenPassBuilder<Derived>::addBlockPlacement(
+ AddMachinePass &addPass) const {
+ addPass(MachineBlockPlacementPass());
+ // Run a separate pass to collect block placement statistics.
+ if (Opt.EnableBlockPlacementStats)
+ addPass(MachineBlockPlacementStatsPass());
+}
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CODEGENPASSBUILDER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.h
index 1b77556dcbb1..e6c64cd4dd8e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -14,6 +14,7 @@
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
@@ -74,6 +75,8 @@ bool getDontPlaceZerosInBSS();
bool getEnableGuaranteedTailCallOpt();
+bool getEnableAIXExtendedAltivecABI();
+
bool getDisableTailCalls();
bool getStackSymbolOrdering();
@@ -94,8 +97,16 @@ Optional<bool> getExplicitDataSections();
bool getFunctionSections();
Optional<bool> getExplicitFunctionSections();
+bool getIgnoreXCOFFVisibility();
+
+bool getXCOFFTracebackTable();
+
std::string getBBSections();
+std::string getStackProtectorGuard();
+unsigned getStackProtectorGuardOffset();
+std::string getStackProtectorGuardReg();
+
unsigned getTLSSize();
bool getEmulatedTLS();
@@ -114,8 +125,14 @@ bool getEnableAddrsig();
bool getEmitCallSiteInfo();
+bool getEnableMachineFunctionSplitter();
+
bool getEnableDebugEntryValues();
+bool getPseudoProbeForProfiling();
+
+bool getValueTrackingVariableLocations();
+
bool getForceDwarfFrameSection();
bool getXRayOmitFunctionIndex();
@@ -128,9 +145,16 @@ struct RegisterCodeGenFlags {
llvm::BasicBlockSection getBBSectionsMode(llvm::TargetOptions &Options);
-// Common utility function tightly tied to the options listed here. Initializes
-// a TargetOptions object with CodeGen flags and returns it.
-TargetOptions InitTargetOptionsFromCodeGenFlags();
+llvm::StackProtectorGuards
+getStackProtectorGuardMode(llvm::TargetOptions &Options);
+
+/// Common utility function tightly tied to the options listed here. Initializes
+/// a TargetOptions object with CodeGen flags and returns it.
+/// \p TheTriple is used to determine the default value for options if
+/// options are not explicitly specified. If those triple dependant options
+/// value do not have effect for your component, a default Triple() could be
+/// passed in.
+TargetOptions InitTargetOptionsFromCodeGenFlags(const llvm::Triple &TheTriple);
std::string getCPUStr();
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h
index c7baaf6aef3d..3efef6ec0acd 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h
@@ -247,6 +247,7 @@ public:
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
void print(raw_ostream &O) const;
+ uint64_t getIndex() const { return Index; }
};
//===--------------------------------------------------------------------===//
@@ -382,12 +383,12 @@ private:
static_assert(std::is_standard_layout<T>::value ||
std::is_pointer<T>::value,
"Expected standard layout or pointer");
- new (reinterpret_cast<void *>(Val.buffer)) T(V);
+ new (reinterpret_cast<void *>(&Val)) T(V);
}
- template <class T> T *get() { return reinterpret_cast<T *>(Val.buffer); }
+ template <class T> T *get() { return reinterpret_cast<T *>(&Val); }
template <class T> const T *get() const {
- return reinterpret_cast<const T *>(Val.buffer);
+ return reinterpret_cast<const T *>(&Val);
}
template <class T> void destruct() { get<T>()->~T(); }
@@ -589,7 +590,6 @@ public:
T &operator*() const { return *static_cast<T *>(N); }
bool operator==(const iterator &X) const { return N == X.N; }
- bool operator!=(const iterator &X) const { return N != X.N; }
};
class const_iterator
@@ -612,7 +612,6 @@ public:
const T &operator*() const { return *static_cast<const T *>(N); }
bool operator==(const const_iterator &X) const { return N == X.N; }
- bool operator!=(const const_iterator &X) const { return N != X.N; }
};
iterator begin() {
@@ -788,7 +787,7 @@ public:
/// Get the absolute offset within the .debug_info or .debug_types section
/// for this DIE.
- unsigned getDebugSectionOffset() const;
+ uint64_t getDebugSectionOffset() const;
/// Compute the offset of this DIE and all its children.
///
@@ -864,14 +863,11 @@ class DIEUnit {
/// a valid section depending on the client that is emitting DWARF.
MCSection *Section;
uint64_t Offset; /// .debug_info or .debug_types absolute section offset.
- uint32_t Length; /// The length in bytes of all of the DIEs in this unit.
- const uint16_t Version; /// The Dwarf version number for this unit.
- const uint8_t AddrSize; /// The size in bytes of an address for this unit.
protected:
virtual ~DIEUnit() = default;
public:
- DIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag);
+ explicit DIEUnit(dwarf::Tag UnitTag);
DIEUnit(const DIEUnit &RHS) = delete;
DIEUnit(DIEUnit &&RHS) = delete;
void operator=(const DIEUnit &RHS) = delete;
@@ -893,19 +889,14 @@ public:
///
/// \returns Section pointer which can be NULL.
MCSection *getSection() const { return Section; }
- void setDebugSectionOffset(unsigned O) { Offset = O; }
- unsigned getDebugSectionOffset() const { return Offset; }
- void setLength(uint64_t L) { Length = L; }
- uint64_t getLength() const { return Length; }
- uint16_t getDwarfVersion() const { return Version; }
- uint16_t getAddressSize() const { return AddrSize; }
+ void setDebugSectionOffset(uint64_t O) { Offset = O; }
+ uint64_t getDebugSectionOffset() const { return Offset; }
DIE &getUnitDie() { return Die; }
const DIE &getUnitDie() const { return Die; }
};
struct BasicDIEUnit final : DIEUnit {
- BasicDIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag)
- : DIEUnit(Version, AddrSize, UnitTag) {}
+ explicit BasicDIEUnit(dwarf::Tag UnitTag) : DIEUnit(UnitTag) {}
};
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
index f7fc74a27fca..bca6065b1643 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
#include <utility>
namespace llvm {
@@ -23,6 +24,24 @@ class MachineFunction;
class MachineInstr;
class TargetRegisterInfo;
+/// Record instruction ordering so we can query their relative positions within
+/// a function. Meta instructions are given the same ordinal as the preceding
+/// non-meta instruction. Class state is invalid if MF is modified after
+/// calling initialize.
+class InstructionOrdering {
+public:
+ void initialize(const MachineFunction &MF);
+ void clear() { InstNumberMap.clear(); }
+
+ /// Check if instruction \p A comes before \p B, where \p A and \p B both
+ /// belong to the MachineFunction passed to initialize().
+ bool isBefore(const MachineInstr *A, const MachineInstr *B) const;
+
+private:
+ /// Each instruction is assigned an order number.
+ DenseMap<const MachineInstr *, unsigned> InstNumberMap;
+};
+
/// For each user variable, keep a list of instruction ranges where this
/// variable is accessible. The variables are listed in order of appearance.
class DbgValueHistoryMap {
@@ -52,6 +71,8 @@ public:
/// register-described debug values that have their end index
/// set to this entry's position in the entry vector.
class Entry {
+ friend DbgValueHistoryMap;
+
public:
enum EntryKind { DbgValue, Clobber };
@@ -89,6 +110,9 @@ public:
return Entries[Index];
}
+ /// Drop location ranges which exist entirely outside each variable's scope.
+ void trimLocationRanges(const MachineFunction &MF, LexicalScopes &LScopes,
+ const InstructionOrdering &Ordering);
bool empty() const { return VarEntries.empty(); }
void clear() { VarEntries.clear(); }
EntriesMap::const_iterator begin() const { return VarEntries.begin(); }
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/DebugHandlerBase.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/DebugHandlerBase.h
index 4ff0fdea36ae..45823b2ba349 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/DebugHandlerBase.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/DebugHandlerBase.h
@@ -110,8 +110,13 @@ protected:
virtual void endFunctionImpl(const MachineFunction *MF) = 0;
virtual void skippedNonDebugFunction() {}
+private:
+ InstructionOrdering InstOrdering;
+
// AsmPrinterHandler overrides.
public:
+ void beginModule(Module *M) override;
+
void beginInstruction(const MachineInstr *MI) override;
void endInstruction() override;
@@ -129,8 +134,13 @@ public:
/// If this type is derived from a base type then return base type size.
static uint64_t getBaseTypeSize(const DIType *Ty);
+
+ /// Return true if type encoding is unsigned.
+ static bool isUnsignedDIType(const DIType *Ty);
+
+ const InstructionOrdering &getInstOrdering() const { return InstOrdering; }
};
-}
+} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
index e189352a7b2d..abeba62707c1 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
@@ -21,7 +21,7 @@ struct DwarfStringPoolEntry {
static constexpr unsigned NotIndexed = -1;
MCSymbol *Symbol;
- unsigned Offset;
+ uint64_t Offset;
unsigned Index;
bool isIndexed() const { return Index != NotIndexed; }
@@ -47,7 +47,7 @@ public:
assert(getMapEntry()->second.Symbol && "No symbol available!");
return getMapEntry()->second.Symbol;
}
- unsigned getOffset() const { return getMapEntry()->second.Offset; }
+ uint64_t getOffset() const { return getMapEntry()->second.Offset; }
bool isIndexed() const { return MapEntryAndIndexed.getInt(); }
unsigned getIndex() const {
assert(isIndexed());
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h
index 7662179db44d..26bf4ab2618c 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h
@@ -224,10 +224,6 @@ protected:
/// makes sense (for example, on function calls)
MachineInstr *EmitStartPt;
- /// Last local value flush point. On a subsequent flush, no local value will
- /// sink past this point.
- MachineBasicBlock::iterator LastFlushPoint;
-
public:
virtual ~FastISel();
@@ -246,7 +242,7 @@ public:
/// be appended.
void startNewBlock();
- /// Flush the local value map and sink local values if possible.
+ /// Flush the local value map.
void finishBasicBlock();
/// Return current debug location information.
@@ -313,10 +309,7 @@ public:
void removeDeadCode(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E);
- struct SavePoint {
- MachineBasicBlock::iterator InsertPt;
- DebugLoc DL;
- };
+ using SavePoint = MachineBasicBlock::iterator;
/// Prepare InsertPt to begin inserting instructions into the local
/// value area and return the old insert position.
@@ -497,7 +490,10 @@ protected:
/// - \c Add has a constant operand.
bool canFoldAddIntoGEP(const User *GEP, const Value *Add);
- /// Test whether the given value has exactly one use.
+ /// Test whether the register associated with this value has exactly one use,
+ /// in which case that single use is killing. Note that multiple IR values
+ /// may map onto the same register, in which case this is not the same as
+ /// checking that an IR value has one use.
bool hasTrivialKill(const Value *V);
/// Create a machine mem operand from the given instruction.
@@ -510,18 +506,6 @@ protected:
unsigned NumArgs);
bool lowerCallTo(CallLoweringInfo &CLI);
- bool isCommutativeIntrinsic(IntrinsicInst const *II) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- return true;
- default:
- return false;
- }
- }
-
bool lowerCall(const CallInst *I);
/// Select and emit code for a binary operator instruction, which has
/// an opcode which directly corresponds to the given ISD opcode.
@@ -536,7 +520,6 @@ protected:
bool selectFreeze(const User *I);
bool selectCast(const User *I, unsigned Opcode);
bool selectExtractValue(const User *U);
- bool selectInsertValue(const User *I);
bool selectXRayCustomEvent(const CallInst *II);
bool selectXRayTypedEvent(const CallInst *II);
@@ -572,20 +555,6 @@ private:
/// Removes dead local value instructions after SavedLastLocalvalue.
void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue);
- struct InstOrderMap {
- DenseMap<MachineInstr *, unsigned> Orders;
- MachineInstr *FirstTerminator = nullptr;
- unsigned FirstTerminatorOrder = std::numeric_limits<unsigned>::max();
-
- void initialize(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator LastFlushPoint);
- };
-
- /// Sinks the local value materialization instruction LocalMI to its first use
- /// in the basic block, or deletes it if it is not used.
- void sinkLocalValueMaterialization(MachineInstr &LocalMI, Register DefReg,
- InstOrderMap &OrderMap);
-
/// Insertion point before trying to select the current instruction.
MachineBasicBlock::iterator SavedInsertPt;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index c99ca00eac29..b6bde0249f88 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -91,13 +91,33 @@ public:
/// Track virtual registers created for exception pointers.
DenseMap<const Value *, Register> CatchPadExceptionPointers;
- /// Keep track of frame indices allocated for statepoints as they could be
- /// used across basic block boundaries (e.g. for an invoke). For each
- /// gc.statepoint instruction, maps uniqued llvm IR values to the slots they
- /// were spilled in. If a value is mapped to None it means we visited the
- /// value but didn't spill it (because it was a constant, for instance).
- using StatepointSpillMapTy = DenseMap<const Value *, Optional<int>>;
- DenseMap<const Instruction *, StatepointSpillMapTy> StatepointSpillMaps;
+ /// Helper object to track which of three possible relocation mechanisms are
+ /// used for a particular value being relocated over a statepoint.
+ struct StatepointRelocationRecord {
+ enum RelocType {
+ // Value did not need to be relocated and can be used directly.
+ NoRelocate,
+ // Value was spilled to stack and needs filled at the gc.relocate.
+ Spill,
+ // Value was lowered to tied def and gc.relocate should be replaced with
+ // copy from vreg.
+ VReg,
+ } type = NoRelocate;
+ // Payload contains either frame index of the stack slot in which the value
+ // was spilled, or virtual register which contains the re-definition.
+ union payload_t {
+ payload_t() : FI(-1) {}
+ int FI;
+ Register Reg;
+ } payload;
+ };
+
+ /// Keep track of each value which was relocated and the strategy used to
+ /// relocate that value. This information is required when visiting
+ /// gc.relocates which may appear in following blocks.
+ using StatepointSpillMapTy =
+ DenseMap<const Value *, StatepointRelocationRecord>;
+ DenseMap<const Instruction *, StatepointSpillMapTy> StatepointRelocationMaps;
/// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
/// the entry block. This allows the allocas to be efficiently referenced
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
index 8bd9e9443552..f76dec57c840 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
@@ -16,14 +16,12 @@
#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CodeGen.h"
namespace llvm {
+class MachineBasicBlock;
/// A class that wraps MachineInstrs and derives from FoldingSetNode in order to
/// be uniqued in a CSEMap. The tradeoff here is extra memory allocations for
@@ -184,6 +182,8 @@ public:
const GISelInstProfileBuilder &addNodeIDRegNum(Register Reg) const;
+ const GISelInstProfileBuilder &addNodeIDReg(Register Reg) const;
+
const GISelInstProfileBuilder &addNodeIDImmediate(int64_t Imm) const;
const GISelInstProfileBuilder &
addNodeIDMBB(const MachineBasicBlock *MBB) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 4d60dffb91db..26ae7129f04a 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -17,8 +17,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetCallingConv.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include <cstdint>
@@ -26,16 +29,14 @@
namespace llvm {
-class CCState;
class CallBase;
class DataLayout;
class Function;
+class FunctionLoweringInfo;
class MachineIRBuilder;
-class MachineOperand;
struct MachinePointerInfo;
class MachineRegisterInfo;
class TargetLowering;
-class Type;
class Value;
class CallLowering {
@@ -43,21 +44,30 @@ class CallLowering {
virtual void anchor();
public:
- struct ArgInfo {
+ struct BaseArgInfo {
+ Type *Ty;
+ SmallVector<ISD::ArgFlagsTy, 4> Flags;
+ bool IsFixed;
+
+ BaseArgInfo(Type *Ty,
+ ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
+ bool IsFixed = true)
+ : Ty(Ty), Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) {}
+
+ BaseArgInfo() : Ty(nullptr), IsFixed(false) {}
+ };
+
+ struct ArgInfo : public BaseArgInfo {
SmallVector<Register, 4> Regs;
// If the argument had to be split into multiple parts according to the
// target calling convention, then this contains the original vregs
// if the argument was an incoming arg.
SmallVector<Register, 2> OrigRegs;
- Type *Ty;
- SmallVector<ISD::ArgFlagsTy, 4> Flags;
- bool IsFixed;
ArgInfo(ArrayRef<Register> Regs, Type *Ty,
ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
bool IsFixed = true)
- : Regs(Regs.begin(), Regs.end()), Ty(Ty),
- Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) {
+ : BaseArgInfo(Ty, Flags, IsFixed), Regs(Regs.begin(), Regs.end()) {
if (!Regs.empty() && Flags.empty())
this->Flags.push_back(ISD::ArgFlagsTy());
// FIXME: We should have just one way of saying "no register".
@@ -66,7 +76,7 @@ public:
"only void types should have no register");
}
- ArgInfo() : Ty(nullptr), IsFixed(false) {}
+ ArgInfo() : BaseArgInfo() {}
};
struct CallLoweringInfo {
@@ -102,6 +112,15 @@ public:
/// True if the call is to a vararg function.
bool IsVarArg = false;
+
+ /// True if the function's return value can be lowered to registers.
+ bool CanLowerReturn = true;
+
+ /// VReg to hold the hidden sret parameter.
+ Register DemoteRegister;
+
+ /// The stack index for sret demotion.
+ int DemoteStackIndex;
};
/// Argument handling is mostly uniform between the four places that
@@ -111,15 +130,18 @@ public:
/// argument should go, exactly what happens can vary slightly. This
/// class abstracts the differences.
struct ValueHandler {
- ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- CCAssignFn *AssignFn)
- : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn) {}
+ ValueHandler(bool IsIncoming, MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, CCAssignFn *AssignFn)
+ : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn),
+ IsIncomingArgumentHandler(IsIncoming) {}
virtual ~ValueHandler() = default;
/// Returns true if the handler is dealing with incoming arguments,
/// i.e. those that move values from some physical location to vregs.
- virtual bool isIncomingArgumentHandler() const = 0;
+ bool isIncomingArgumentHandler() const {
+ return IsIncomingArgumentHandler;
+ }
/// Materialize a VReg containing the address of the specified
/// stack-based object. This is either based on a FrameIndex or
@@ -147,6 +169,7 @@ public:
virtual void assignValueToAddress(const ArgInfo &Arg, Register Addr,
uint64_t Size, MachinePointerInfo &MPO,
CCValAssign &VA) {
+ assert(Arg.Regs.size() == 1);
assignValueToAddress(Arg.Regs[0], Addr, Size, MPO, VA);
}
@@ -177,9 +200,22 @@ public:
CCAssignFn *AssignFn;
private:
+ bool IsIncomingArgumentHandler;
virtual void anchor();
};
+ struct IncomingValueHandler : public ValueHandler {
+ IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(true, MIRBuilder, MRI, AssignFn) {}
+ };
+
+ struct OutgoingValueHandler : public ValueHandler {
+ OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(false, MIRBuilder, MRI, AssignFn) {}
+ };
+
protected:
/// Getter for generic TargetLowering class.
const TargetLowering *getTLI() const {
@@ -192,6 +228,17 @@ protected:
return static_cast<const XXXTargetLowering *>(TLI);
}
+ /// \returns Flags corresponding to the attributes on the \p ArgIdx-th
+ /// parameter of \p Call.
+ ISD::ArgFlagsTy getAttributesForArgIdx(const CallBase &Call,
+ unsigned ArgIdx) const;
+
+ /// Adds flags to \p Flags based off of the attributes in \p Attrs.
+ /// \p OpIdx is the index in \p Attrs to add flags from.
+ void addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
+ const AttributeList &Attrs,
+ unsigned OpIdx) const;
+
template <typename FuncInfoTy>
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL,
const FuncInfoTy &FuncInfo) const;
@@ -215,7 +262,7 @@ protected:
MachineIRBuilder &MIRBuilder) const;
/// Invoke Handler::assignArg on each of the given \p Args and then use
- /// \p Callback to move them to the assigned locations.
+ /// \p Handler to move them to the assigned locations.
///
/// \return True if everything has succeeded, false otherwise.
bool handleAssignments(MachineIRBuilder &MIRBuilder,
@@ -235,6 +282,14 @@ protected:
CCAssignFn &AssignFnFixed,
CCAssignFn &AssignFnVarArg) const;
+ /// Check whether parameters to a call that are passed in callee saved
+ /// registers are the same as from the calling function. This needs to be
+ /// checked for tail call eligibility.
+ bool parametersInCSRMatch(const MachineRegisterInfo &MRI,
+ const uint32_t *CallerPreservedMask,
+ const SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<ArgInfo> &OutVals) const;
+
/// \returns True if the calling convention for a callee and its caller pass
/// results in the same way. Typically used for tail call eligibility checks.
///
@@ -265,20 +320,73 @@ public:
return false;
}
+ /// Load the returned value from the stack into virtual registers in \p VRegs.
+ /// It uses the frame index \p FI and the start offset from \p DemoteReg.
+ /// The loaded data size will be determined from \p RetTy.
+ void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs, Register DemoteReg,
+ int FI) const;
+
+ /// Store the return value given by \p VRegs into stack starting at the offset
+ /// specified in \p DemoteReg.
+ void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs, Register DemoteReg) const;
+
+ /// Insert the hidden sret ArgInfo to the beginning of \p SplitArgs.
+ /// This function should be called from the target specific
+ /// lowerFormalArguments when \p F requires the sret demotion.
+ void insertSRetIncomingArgument(const Function &F,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ Register &DemoteReg, MachineRegisterInfo &MRI,
+ const DataLayout &DL) const;
+
+ /// For the call-base described by \p CB, insert the hidden sret ArgInfo to
+ /// the OrigArgs field of \p Info.
+ void insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder,
+ const CallBase &CB,
+ CallLoweringInfo &Info) const;
+
+ /// \return True if the return type described by \p Outs can be returned
+ /// without performing sret demotion.
+ bool checkReturn(CCState &CCInfo, SmallVectorImpl<BaseArgInfo> &Outs,
+ CCAssignFn *Fn) const;
+
+ /// Get the type and the ArgFlags for the split components of \p RetTy as
+ /// returned by \c ComputeValueVTs.
+ void getReturnInfo(CallingConv::ID CallConv, Type *RetTy, AttributeList Attrs,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ const DataLayout &DL) const;
+
+ /// Toplevel function to check the return type based on the target calling
+ /// convention. \return True if the return value of \p MF can be returned
+ /// without performing sret demotion.
+ bool checkReturnTypeForCallConv(MachineFunction &MF) const;
+
+ /// This hook must be implemented to check whether the return values
+ /// described by \p Outs can fit into the return registers. If false
+ /// is returned, an sret-demotion is performed.
+ virtual bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ bool IsVarArg) const {
+ return true;
+ }
+
/// This hook must be implemented to lower outgoing return values, described
/// by \p Val, into the specified virtual registers \p VRegs.
/// This hook is used by GlobalISel.
///
+ /// \p FLI is required for sret demotion.
+ ///
/// \p SwiftErrorVReg is non-zero if the function has a swifterror parameter
/// that needs to be implicitly returned.
///
/// \return True if the lowering succeeds, false otherwise.
virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs,
+ ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
Register SwiftErrorVReg) const {
if (!supportSwiftError()) {
assert(SwiftErrorVReg == 0 && "attempt to use unsupported swifterror");
- return lowerReturn(MIRBuilder, Val, VRegs);
+ return lowerReturn(MIRBuilder, Val, VRegs, FLI);
}
return false;
}
@@ -286,7 +394,8 @@ public:
/// This hook behaves as the extended lowerReturn function, but for targets
/// that do not support swifterror value promotion.
virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs) const {
+ ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const {
return false;
}
@@ -299,12 +408,13 @@ public:
/// the second in \c VRegs[1], and so on. For each argument, there will be one
/// register for each non-aggregate type, as returned by \c computeValueLLTs.
/// \p MIRBuilder is set to the proper insertion for the argument
- /// lowering.
+ /// lowering. \p FLI is required for sret demotion.
///
/// \return True if the lowering succeeded, false otherwise.
virtual bool lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
return false;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index c317b7ed4c54..8570f5ca5dd5 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -17,6 +17,8 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
#define LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/Support/Alignment.h"
@@ -25,12 +27,15 @@ namespace llvm {
class GISelChangeObserver;
class MachineIRBuilder;
+class MachineInstrBuilder;
class MachineRegisterInfo;
class MachineInstr;
class MachineOperand;
class GISelKnownBits;
class MachineDominatorTree;
class LegalizerInfo;
+struct LegalityQuery;
+class TargetLowering;
struct PreferredTuple {
LLT Ty; // The result type of the extend.
@@ -50,6 +55,37 @@ struct PtrAddChain {
Register Base;
};
+struct RegisterImmPair {
+ Register Reg;
+ int64_t Imm;
+};
+
+struct ShiftOfShiftedLogic {
+ MachineInstr *Logic;
+ MachineInstr *Shift2;
+ Register LogicNonShiftReg;
+ uint64_t ValSum;
+};
+
+using OperandBuildSteps =
+ SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;
+struct InstructionBuildSteps {
+ unsigned Opcode = 0; /// The opcode for the produced instruction.
+ OperandBuildSteps OperandFns; /// Operands to be added to the instruction.
+ InstructionBuildSteps() = default;
+ InstructionBuildSteps(unsigned Opcode, const OperandBuildSteps &OperandFns)
+ : Opcode(Opcode), OperandFns(OperandFns) {}
+};
+
+struct InstructionStepsMatchInfo {
+ /// Describes instructions to be built during a combine.
+ SmallVector<InstructionBuildSteps, 2> InstrsToBuild;
+ InstructionStepsMatchInfo() = default;
+ InstructionStepsMatchInfo(
+ std::initializer_list<InstructionBuildSteps> InstrsToBuild)
+ : InstrsToBuild(InstrsToBuild) {}
+};
+
class CombinerHelper {
protected:
MachineIRBuilder &Builder;
@@ -69,6 +105,12 @@ public:
return KB;
}
+ const TargetLowering &getTargetLowering() const;
+
+ /// \return true if the combine is running prior to legalization, or if \p
+ /// Query is legal on the target.
+ bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const;
+
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
@@ -107,12 +149,17 @@ public:
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
- bool matchSextAlreadyExtended(MachineInstr &MI);
- bool applySextAlreadyExtended(MachineInstr &MI);
+ bool matchSextTruncSextLoad(MachineInstr &MI);
+ bool applySextTruncSextLoad(MachineInstr &MI);
- bool matchElideBrByInvertingCond(MachineInstr &MI);
- void applyElideBrByInvertingCond(MachineInstr &MI);
- bool tryElideBrByInvertingCond(MachineInstr &MI);
+ /// Match sext_inreg(load p), imm -> sextload p
+ bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
+ bool applySextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
+
+ /// If a brcond's true block is not the fallthrough, make it so by inverting
+ /// the condition and swapping operands.
+ bool matchOptBrCondByInvertingCond(MachineInstr &MI);
+ void applyOptBrCondByInvertingCond(MachineInstr &MI);
/// If \p MI is G_CONCAT_VECTORS, try to combine it.
/// Returns true if MI changed.
@@ -189,10 +236,28 @@ public:
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
bool applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
+ /// Fold (shift (shift base, x), y) -> (shift base (x+y))
+ bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo);
+ bool applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo);
+
+ /// If we have a shift-by-constant of a bitwise logic op that itself has a
+ /// shift-by-constant operand with identical opcode, we may be able to convert
+ /// that into 2 independent shifts followed by the logic op.
+ bool matchShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo);
+ bool applyShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo);
+
/// Transform a multiply by a power-of-2 value to a left shift.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
bool applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
+ // Transform a G_SHL with an extended source into a narrower shift if
+ // possible.
+ bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
+ bool applyCombineShlOfExtend(MachineInstr &MI,
+ const RegisterImmPair &MatchData);
+
/// Reduce a shift by a constant to an unmerge and a shift on a half sized
/// type. This will not produce a shift smaller than \p TargetShiftSize.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize,
@@ -200,6 +265,86 @@ public:
bool applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal);
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount);
+ /// Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
+ bool
+ matchCombineUnmergeMergeToPlainValues(MachineInstr &MI,
+ SmallVectorImpl<Register> &Operands);
+ bool
+ applyCombineUnmergeMergeToPlainValues(MachineInstr &MI,
+ SmallVectorImpl<Register> &Operands);
+
+ /// Transform G_UNMERGE Constant -> Constant1, Constant2, ...
+ bool matchCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts);
+ bool applyCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts);
+
+ /// Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
+ bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
+ bool applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
+
+ /// Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0
+ bool matchCombineUnmergeZExtToZExt(MachineInstr &MI);
+ bool applyCombineUnmergeZExtToZExt(MachineInstr &MI);
+
+ /// Transform fp_instr(cst) to constant result of the fp operation.
+ bool matchCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst);
+ bool applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst);
+
+ /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
+ bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg);
+ bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg);
+
+ /// Transform PtrToInt(IntToPtr(x)) to x.
+ bool matchCombineP2IToI2P(MachineInstr &MI, Register &Reg);
+ bool applyCombineP2IToI2P(MachineInstr &MI, Register &Reg);
+
+ /// Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y)
+ /// Transform G_ADD y, (G_PTRTOINT x) -> G_PTRTOINT (G_PTR_ADD x, y)
+ bool matchCombineAddP2IToPtrAdd(MachineInstr &MI,
+ std::pair<Register, bool> &PtrRegAndCommute);
+ bool applyCombineAddP2IToPtrAdd(MachineInstr &MI,
+ std::pair<Register, bool> &PtrRegAndCommute);
+
+ // Transform G_PTR_ADD (G_PTRTOINT C1), C2 -> C1 + C2
+ bool matchCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst);
+ bool applyCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst);
+
+ /// Transform anyext(trunc(x)) to x.
+ bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);
+ bool applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);
+
+ /// Transform [asz]ext([asz]ext(x)) to [asz]ext x.
+ bool matchCombineExtOfExt(MachineInstr &MI,
+ std::tuple<Register, unsigned> &MatchInfo);
+ bool applyCombineExtOfExt(MachineInstr &MI,
+ std::tuple<Register, unsigned> &MatchInfo);
+
+ /// Transform fneg(fneg(x)) to x.
+ bool matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg);
+
+ /// Match fabs(fabs(x)) to fabs(x).
+ bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
+ bool applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
+
+ /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
+ bool matchCombineTruncOfExt(MachineInstr &MI,
+ std::pair<Register, unsigned> &MatchInfo);
+ bool applyCombineTruncOfExt(MachineInstr &MI,
+ std::pair<Register, unsigned> &MatchInfo);
+
+ /// Transform trunc (shl x, K) to shl (trunc x),
+ /// K => K < VT.getScalarSizeInBits().
+ bool matchCombineTruncOfShl(MachineInstr &MI,
+ std::pair<Register, Register> &MatchInfo);
+ bool applyCombineTruncOfShl(MachineInstr &MI,
+ std::pair<Register, Register> &MatchInfo);
+
+ /// Transform G_MUL(x, -1) to G_SUB(0, x)
+ bool applyCombineMulByNegativeOne(MachineInstr &MI);
+
/// Return true if any explicit use operand on \p MI is defined by a
/// G_IMPLICIT_DEF.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI);
@@ -214,6 +359,13 @@ public:
/// Return true if a G_STORE instruction \p MI is storing an undef value.
bool matchUndefStore(MachineInstr &MI);
+ /// Return true if a G_SELECT instruction \p MI has an undef comparison.
+ bool matchUndefSelectCmp(MachineInstr &MI);
+
+ /// Return true if a G_SELECT instruction \p MI has a constant comparison. If
+ /// true, \p OpIdx will store the operand index of the known selected value.
+ bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx);
+
/// Replace an instruction with a G_FCONSTANT with value \p C.
bool replaceInstWithFConstant(MachineInstr &MI, double C);
@@ -226,6 +378,9 @@ public:
/// Delete \p MI and replace all of its uses with its \p OpIdx-th operand.
bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx);
+ /// Delete \p MI and replace all of its uses with \p Replacement.
+ bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement);
+
/// Return true if \p MOP1 and \p MOP2 are register operands are defined by
/// equivalent instructions.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2);
@@ -243,6 +398,12 @@ public:
/// Check if operand \p OpIdx is zero.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx);
+ /// Check if operand \p OpIdx is undef.
+ bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx);
+
+ /// Check if operand \p OpIdx is known to be a power of 2.
+ bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx);
+
/// Erase \p MI
bool eraseInst(MachineInstr &MI);
@@ -252,6 +413,79 @@ public:
bool applySimplifyAddToSub(MachineInstr &MI,
std::tuple<Register, Register> &MatchInfo);
+ /// Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
+ bool
+ matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI,
+ InstructionStepsMatchInfo &MatchInfo);
+
+ /// Replace \p MI with a series of instructions described in \p MatchInfo.
+ bool applyBuildInstructionSteps(MachineInstr &MI,
+ InstructionStepsMatchInfo &MatchInfo);
+
+ /// Match ashr (shl x, C), C -> sext_inreg (C)
+ bool matchAshrShlToSextInreg(MachineInstr &MI,
+ std::tuple<Register, int64_t> &MatchInfo);
+ bool applyAshShlToSextInreg(MachineInstr &MI,
+ std::tuple<Register, int64_t> &MatchInfo);
+ /// \return true if \p MI is a G_AND instruction whose operands are x and y
+ /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.)
+ ///
+ /// \param [in] MI - The G_AND instruction.
+ /// \param [out] Replacement - A register the G_AND should be replaced with on
+ /// success.
+ bool matchRedundantAnd(MachineInstr &MI, Register &Replacement);
+
+ /// \return true if \p MI is a G_OR instruction whose operands are x and y
+ /// where x | y == x or x | y == y. (E.g., one of operands is all-zeros
+ /// value.)
+ ///
+ /// \param [in] MI - The G_OR instruction.
+ /// \param [out] Replacement - A register the G_OR should be replaced with on
+ /// success.
+ bool matchRedundantOr(MachineInstr &MI, Register &Replacement);
+
+ /// \return true if \p MI is a G_SEXT_INREG that can be erased.
+ bool matchRedundantSExtInReg(MachineInstr &MI);
+
+ /// Combine inverting a result of a compare into the opposite cond code.
+ bool matchNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate);
+ bool applyNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate);
+
+ /// Fold (xor (and x, y), y) -> (and (not x), y)
+ ///{
+ bool matchXorOfAndWithSameReg(MachineInstr &MI,
+ std::pair<Register, Register> &MatchInfo);
+ bool applyXorOfAndWithSameReg(MachineInstr &MI,
+ std::pair<Register, Register> &MatchInfo);
+ ///}
+
+ /// Combine G_PTR_ADD with nullptr to G_INTTOPTR
+ bool matchPtrAddZero(MachineInstr &MI);
+ bool applyPtrAddZero(MachineInstr &MI);
+
+ /// Combine G_UREM x, (known power of 2) to an add and bitmasking.
+ bool applySimplifyURemByPow2(MachineInstr &MI);
+
+ bool matchCombineInsertVecElts(MachineInstr &MI,
+ SmallVectorImpl<Register> &MatchInfo);
+
+ bool applyCombineInsertVecElts(MachineInstr &MI,
+ SmallVectorImpl<Register> &MatchInfo);
+
+ /// Match expression trees of the form
+ ///
+ /// \code
+ /// sN *a = ...
+ /// sM val = a[0] | (a[1] << N) | (a[2] << 2N) | (a[3] << 3N) ...
+ /// \endcode
+ ///
+ /// And check if the tree can be replaced with a M-bit load + possibly a
+ /// bswap.
+ bool matchLoadOrCombine(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+ bool applyLoadOrCombine(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
@@ -280,6 +514,30 @@ private:
/// \returns true if a candidate is found.
bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
Register &Offset);
+
+ /// Helper function for matchLoadOrCombine. Searches for Registers
+ /// which may have been produced by a load instruction + some arithmetic.
+ ///
+ /// \param [in] Root - The search root.
+ ///
+ /// \returns The Registers found during the search.
+ Optional<SmallVector<Register, 8>>
+ findCandidatesForLoadOrCombine(const MachineInstr *Root) const;
+
+ /// Helper function for matchLoadOrCombine.
+ ///
+ /// Checks if every register in \p RegsToVisit is defined by a load
+ /// instruction + some arithmetic.
+ ///
+ /// \param [out] MemOffset2Idx - Maps the byte positions each load ends up
+ /// at to the index of the load.
+ /// \param [in] MemSizeInBits - The number of bits each load should produce.
+ ///
+ /// \returns The lowest-index load found and the lowest index on success.
+ Optional<std::pair<MachineInstr *, int64_t>> findLoadOffsetsForLoadOrCombine(
+ SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ const SmallVector<Register, 8> &RegsToVisit,
+ const unsigned MemSizeInBits);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
index d8fe4b3103db..dd7f04a33f4b 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
@@ -51,7 +51,7 @@ public:
/// For convenience, finishedChangingAllUsesOfReg() will report the completion
/// of the changes. The use list may change between this call and
/// finishedChangingAllUsesOfReg().
- void changingAllUsesOfReg(const MachineRegisterInfo &MRI, unsigned Reg);
+ void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg);
/// All instructions reported as changing by changingAllUsesOfReg() have
/// finished being changed.
void finishedChangingAllUsesOfReg();
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
index 55cf54d6e946..eafed3760738 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
@@ -13,13 +13,11 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H
#define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Register.h"
-#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/KnownBits.h"
namespace llvm {
@@ -36,10 +34,16 @@ class GISelKnownBits : public GISelChangeObserver {
/// Cache maintained during a computeKnownBits request.
SmallDenseMap<Register, KnownBits, 16> ComputeKnownBitsCache;
+ void computeKnownBitsMin(Register Src0, Register Src1, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth = 0);
+
+ unsigned computeNumSignBitsMin(Register Src0, Register Src1,
+ const APInt &DemandedElts, unsigned Depth = 0);
+
public:
GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6);
virtual ~GISelKnownBits() = default;
- void setMF(MachineFunction &MF);
const MachineFunction &getMachineFunction() const {
return MF;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
index b0bb519283b1..9e7ade3ee329 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
@@ -11,9 +11,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Support/Debug.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 751ab67c4e97..8eab8a5846a7 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -20,12 +20,14 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
-#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CodeGen.h"
#include <memory>
#include <utility>
@@ -36,8 +38,8 @@ class BasicBlock;
class CallInst;
class CallLowering;
class Constant;
+class ConstrainedFPIntrinsic;
class DataLayout;
-class FunctionLoweringInfo;
class Instruction;
class MachineBasicBlock;
class MachineFunction;
@@ -217,12 +219,14 @@ private:
/// Translate an LLVM string intrinsic (memcpy, memset, ...).
bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
- Intrinsic::ID ID);
+ unsigned Opcode);
void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder);
+ bool translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
+ MachineIRBuilder &MIRBuilder);
/// Helper function for translateSimpleIntrinsic.
/// \return The generic opcode for \p IntrinsicID if \p IntrinsicID is a
@@ -256,6 +260,19 @@ private:
/// \pre \p U is a call instruction.
bool translateCall(const User &U, MachineIRBuilder &MIRBuilder);
+ /// When an invoke or a cleanupret unwinds to the next EH pad, there are
+ /// many places it could ultimately go. In the IR, we have a single unwind
+ /// destination, but in the machine CFG, we enumerate all the possible blocks.
+ /// This function skips over imaginary basic blocks that hold catchswitch
+ /// instructions, and finds all the "real" machine
+ /// basic block destinations. As those destinations may not be successors of
+ /// EHPadBB, here we also calculate the edge probability to those
+ /// destinations. The passed-in Prob is the edge probability to EHPadBB.
+ bool findUnwindDestinations(
+ const BasicBlock *EHPadBB, BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests);
+
bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
@@ -287,11 +304,37 @@ private:
/// MachineBasicBlocks for the function have been created.
void finishPendingPhis();
+ /// Translate \p Inst into a unary operation \p Opcode.
+ /// \pre \p U is a unary operation.
+ bool translateUnaryOp(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder);
+
/// Translate \p Inst into a binary operation \p Opcode.
/// \pre \p U is a binary operation.
bool translateBinaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder);
+ /// If the set of cases should be emitted as a series of branches, return
+ /// true. If we should emit this as a bunch of and/or'd together conditions,
+ /// return false.
+ bool shouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases);
+ /// Helper method for findMergedConditions.
+ /// This function emits a branch and is used at the leaves of an OR or an
+ /// AND operator tree.
+ void emitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond);
+ /// Used during condbr translation to find trees of conditions that can be
+ /// optimized.
+ void findMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond);
+
/// Translate branch (br) instruction.
/// \pre \p U is a branch instruction.
bool translateBr(const User &U, MachineIRBuilder &MIRBuilder);
@@ -305,19 +348,23 @@ private:
void emitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB,
MachineIRBuilder &MIB);
- bool lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
- MachineBasicBlock *SwitchMBB,
- MachineBasicBlock *CurMBB,
- MachineBasicBlock *DefaultMBB,
- MachineIRBuilder &MIB,
- MachineFunction::iterator BBI,
- BranchProbability UnhandledProbs,
- SwitchCG::CaseClusterIt I,
- MachineBasicBlock *Fallthrough,
- bool FallthroughUnreachable);
-
- bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
- Value *Cond,
+ /// Generate for for the BitTest header block, which precedes each sequence of
+ /// BitTestCases.
+ void emitBitTestHeader(SwitchCG::BitTestBlock &BTB,
+ MachineBasicBlock *SwitchMBB);
+ /// Generate code to produces one "bit test" for a given BitTestCase \p B.
+ void emitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext, Register Reg,
+ SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
+
+ bool lowerJumpTableWorkItem(
+ SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+ BranchProbability UnhandledProbs, SwitchCG::CaseClusterIt I,
+ MachineBasicBlock *Fallthrough, bool FallthroughUnreachable);
+
+ bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, Value *Cond,
MachineBasicBlock *Fallthrough,
bool FallthroughUnreachable,
BranchProbability UnhandledProbs,
@@ -325,6 +372,14 @@ private:
MachineIRBuilder &MIB,
MachineBasicBlock *SwitchMBB);
+ bool lowerBitTestWorkItem(
+ SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+ BranchProbability DefaultProb, BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable);
+
bool lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB,
@@ -351,8 +406,6 @@ private:
/// \pre \p U is a return instruction.
bool translateRet(const User &U, MachineIRBuilder &MIRBuilder);
- bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder);
-
bool translateFNeg(const User &U, MachineIRBuilder &MIRBuilder);
bool translateAdd(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -437,6 +490,9 @@ private:
bool translateFAdd(const User &U, MachineIRBuilder &MIRBuilder) {
return translateBinaryOp(TargetOpcode::G_FADD, U, MIRBuilder);
}
+ bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
+ return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
+ }
bool translateFMul(const User &U, MachineIRBuilder &MIRBuilder) {
return translateBinaryOp(TargetOpcode::G_FMUL, U, MIRBuilder);
}
@@ -515,6 +571,8 @@ private:
/// Current target configuration. Controls how the pass handles errors.
const TargetPassConfig *TPC;
+ CodeGenOpt::Level OptLevel;
+
/// Current optimization remark emitter. Used to report failures.
std::unique_ptr<OptimizationRemarkEmitter> ORE;
@@ -614,12 +672,12 @@ private:
BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const;
- void addSuccessorWithProb(MachineBasicBlock *Src, MachineBasicBlock *Dst,
- BranchProbability Prob);
+ void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown());
public:
- // Ctor, nothing fancy.
- IRTranslator();
+ IRTranslator(CodeGenOpt::Level OptLevel = CodeGenOpt::None);
StringRef getPassName() const override { return "IRTranslator"; }
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 1af96cb4a9ee..5b8243a93e7f 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -112,6 +112,14 @@ enum {
/// - InsnID - Instruction ID
/// - Expected opcode
GIM_CheckOpcode,
+
+ /// Check the opcode on the specified instruction, checking 2 acceptable
+ /// alternatives.
+ /// - InsnID - Instruction ID
+ /// - Expected opcode
+ /// - Alternative expected opcode
+ GIM_CheckOpcodeIsEither,
+
/// Check the instruction has the right number of operands
/// - InsnID - Instruction ID
/// - Expected number of operands
@@ -164,6 +172,15 @@ enum {
GIM_CheckMemorySizeEqualToLLT,
GIM_CheckMemorySizeLessThanLLT,
GIM_CheckMemorySizeGreaterThanLLT,
+
+ /// Check if this is a vector that can be treated as a vector splat
+ /// constant. This is valid for both G_BUILD_VECTOR as well as
+ /// G_BUILD_VECTOR_TRUNC. For AllOnes refers to individual bits, so a -1
+ /// element.
+ /// - InsnID - Instruction ID
+ GIM_CheckIsBuildVectorAllOnes,
+ GIM_CheckIsBuildVectorAllZeros,
+
/// Check a generic C++ instruction predicate
/// - InsnID - Instruction ID
/// - PredicateID - The ID of the predicate function to call
@@ -237,6 +254,15 @@ enum {
/// - OtherOpIdx - Other operand index
GIM_CheckIsSameOperand,
+ /// Predicates with 'let PredicateCodeUsesOperands = 1' need to examine some
+ /// named operands that will be recorded in RecordedOperands. Names of these
+ /// operands are referenced in predicate argument list. Emitter determines
+ /// StoreIdx(corresponds to the order in which names appear in argument list).
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - StoreIdx - Store location in RecordedOperands.
+ GIM_RecordNamedOperand,
+
/// Fail the current try-block, or completely fail to match if there is no
/// current try-block.
GIM_Reject,
@@ -429,6 +455,11 @@ protected:
std::vector<ComplexRendererFns::value_type> Renderers;
RecordedMIVector MIs;
DenseMap<unsigned, unsigned> TempRegisters;
+ /// Named operands that predicate with 'let PredicateCodeUsesOperands = 1'
+ /// referenced in its argument list. Operands are inserted at index set by
+ /// emitter, it corresponds to the order in which names appear in argument
+ /// list. Currently such predicates don't have more then 3 arguments.
+ std::array<const MachineOperand *, 3> RecordedOperands;
MatcherState(unsigned MaxRenderers);
};
@@ -489,21 +520,13 @@ protected:
llvm_unreachable(
"Subclasses must override this with a tablegen-erated function");
}
- virtual bool testMIPredicate_MI(unsigned, const MachineInstr &) const {
+ virtual bool testMIPredicate_MI(
+ unsigned, const MachineInstr &,
+ const std::array<const MachineOperand *, 3> &Operands) const {
llvm_unreachable(
"Subclasses must override this with a tablegen-erated function");
}
- /// Constrain a register operand of an instruction \p I to a specified
- /// register class. This could involve inserting COPYs before (for uses) or
- /// after (for defs) and may replace the operand of \p I.
- /// \returns whether operand regclass constraining succeeded.
- bool constrainOperandRegToRegClass(MachineInstr &I, unsigned OpIdx,
- const TargetRegisterClass &RC,
- const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const;
-
bool isOperandImmEqual(const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index 73ac578d61be..82e26b0bc355 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -154,24 +154,31 @@ bool InstructionSelector::executeMatchTable(
break;
}
- case GIM_CheckOpcode: {
+ case GIM_CheckOpcode:
+ case GIM_CheckOpcodeIsEither: {
int64_t InsnID = MatchTable[CurrentIdx++];
- int64_t Expected = MatchTable[CurrentIdx++];
+ int64_t Expected0 = MatchTable[CurrentIdx++];
+ int64_t Expected1 = -1;
+ if (MatcherOpcode == GIM_CheckOpcodeIsEither)
+ Expected1 = MatchTable[CurrentIdx++];
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
unsigned Opcode = State.MIs[InsnID]->getOpcode();
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID
- << "], ExpectedOpcode=" << Expected
- << ") // Got=" << Opcode << "\n");
- if (Opcode != Expected) {
+ dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID
+ << "], ExpectedOpcode=" << Expected0;
+ if (MatcherOpcode == GIM_CheckOpcodeIsEither)
+ dbgs() << " || " << Expected1;
+ dbgs() << ") // Got=" << Opcode << "\n";
+ );
+
+ if (Opcode != Expected0 && Opcode != Expected1) {
if (handleReject() == RejectAndGiveUp)
return false;
}
break;
}
-
case GIM_SwitchOpcode: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t LowerBound = MatchTable[CurrentIdx++];
@@ -193,7 +200,7 @@ bool InstructionSelector::executeMatchTable(
CurrentIdx = MatchTable[CurrentIdx + (Opcode - LowerBound)];
if (!CurrentIdx) {
CurrentIdx = Default;
- break;
+ break;
}
OnFailResumeAt.push_back(Default);
break;
@@ -321,6 +328,35 @@ bool InstructionSelector::executeMatchTable(
return false;
break;
}
+ case GIM_CheckIsBuildVectorAllOnes:
+ case GIM_CheckIsBuildVectorAllZeros: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx
+ << ": GIM_CheckBuildVectorAll{Zeros|Ones}(MIs["
+ << InsnID << "])\n");
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+
+ const MachineInstr *MI = State.MIs[InsnID];
+ assert((MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR ||
+ MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR_TRUNC) &&
+ "Expected G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC");
+
+ if (MatcherOpcode == GIM_CheckIsBuildVectorAllOnes) {
+ if (!isBuildVectorAllOnes(*MI, MRI)) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ }
+ } else {
+ if (!isBuildVectorAllZeros(*MI, MRI)) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ }
+ }
+
+ break;
+ }
case GIM_CheckCxxInsnPredicate: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t Predicate = MatchTable[CurrentIdx++];
@@ -331,7 +367,8 @@ bool InstructionSelector::executeMatchTable(
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
assert(Predicate > GIPFP_MI_Invalid && "Expected a valid predicate");
- if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID]))
+ if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID],
+ State.RecordedOperands))
if (handleReject() == RejectAndGiveUp)
return false;
break;
@@ -581,6 +618,20 @@ bool InstructionSelector::executeMatchTable(
break;
}
+ case GIM_RecordNamedOperand: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t OpIdx = MatchTable[CurrentIdx++];
+ uint64_t StoreIdx = MatchTable[CurrentIdx++];
+
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIM_RecordNamedOperand(MIs["
+ << InsnID << "]->getOperand(" << OpIdx
+ << "), StoreIdx=" << StoreIdx << ")\n");
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+ assert(StoreIdx < State.RecordedOperands.size() && "Index out of range");
+ State.RecordedOperands[StoreIdx] = &State.MIs[InsnID]->getOperand(OpIdx);
+ break;
+ }
case GIM_CheckRegBankForClass: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
@@ -1007,8 +1058,12 @@ bool InstructionSelector::executeMatchTable(
int64_t OpIdx = MatchTable[CurrentIdx++];
int64_t RCEnum = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
- constrainOperandRegToRegClass(*OutMIs[InsnID].getInstr(), OpIdx,
- *TRI.getRegClass(RCEnum), TII, TRI, RBI);
+ MachineInstr &I = *OutMIs[InsnID].getInstr();
+ MachineFunction &MF = *I.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterClass &RC = *TRI.getRegClass(RCEnum);
+ MachineOperand &MO = I.getOperand(OpIdx);
+ constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC, MO);
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
dbgs() << CurrentIdx << ": GIR_ConstrainOperandRC(OutMIs["
<< InsnID << "], " << OpIdx << ", " << RCEnum
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 016b0bacab85..e7bda3b4bd97 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -105,19 +105,23 @@ public:
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
// zext(trunc x) - > and (aext/copy/trunc x), mask
+ // zext(sext x) -> and (sext x), mask
Register TruncSrc;
- if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
+ Register SextSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) ||
+ mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) {
LLT DstTy = MRI.getType(DstReg);
if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
isConstantUnsupported(DstTy))
return false;
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
- APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
- auto MIBMask = Builder.buildConstant(
- DstTy, Mask.zext(DstTy.getScalarSizeInBits()));
- Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc),
- MIBMask);
+ APInt MaskVal = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
+ auto Mask = Builder.buildConstant(
+ DstTy, MaskVal.zext(DstTy.getScalarSizeInBits()));
+ auto Extended = SextSrc ? Builder.buildSExtOrTrunc(DstTy, SextSrc) :
+ Builder.buildAnyExtOrTrunc(DstTy, TruncSrc);
+ Builder.buildAnd(DstReg, Extended, Mask);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
@@ -482,7 +486,7 @@ public:
MachineRegisterInfo &MRI,
MachineIRBuilder &Builder,
SmallVectorImpl<Register> &UpdatedDefs,
- GISelObserverWrapper &Observer) {
+ GISelChangeObserver &Observer) {
if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) {
Builder.buildCopy(DstReg, SrcReg);
UpdatedDefs.push_back(DstReg);
@@ -502,20 +506,78 @@ public:
Observer.changedInstr(*UseMI);
}
- bool tryCombineMerges(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts,
- SmallVectorImpl<Register> &UpdatedDefs,
- GISelObserverWrapper &Observer) {
+ /// Return the operand index in \p MI that defines \p Def
+ static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) {
+ unsigned DefIdx = 0;
+ for (const MachineOperand &Def : MI.defs()) {
+ if (Def.getReg() == SearchDef)
+ break;
+ ++DefIdx;
+ }
+
+ return DefIdx;
+ }
+
+ bool tryCombineUnmergeValues(MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelChangeObserver &Observer) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
unsigned NumDefs = MI.getNumOperands() - 1;
- MachineInstr *SrcDef =
- getDefIgnoringCopies(MI.getOperand(NumDefs).getReg(), MRI);
+ Register SrcReg = MI.getOperand(NumDefs).getReg();
+ MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
if (!SrcDef)
return false;
LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg());
LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
+
+ if (SrcDef->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
+ // %0:_(<4 x s16>) = G_FOO
+ // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0
+ // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1
+ //
+ // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0
+ const unsigned NumSrcOps = SrcDef->getNumOperands();
+ Register SrcUnmergeSrc = SrcDef->getOperand(NumSrcOps - 1).getReg();
+ LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc);
+
+ // If we need to decrease the number of vector elements in the result type
+ // of an unmerge, this would involve the creation of an equivalent unmerge
+ // to copy back to the original result registers.
+ LegalizeActionStep ActionStep = LI.getAction(
+ {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}});
+ switch (ActionStep.Action) {
+ case LegalizeActions::Lower:
+ case LegalizeActions::Unsupported:
+ break;
+ case LegalizeActions::FewerElements:
+ case LegalizeActions::NarrowScalar:
+ if (ActionStep.TypeIdx == 1)
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
+
+ // TODO: Should we try to process out the other defs now? If the other
+ // defs of the source unmerge are also unmerged, we end up with a separate
+ // unmerge for each one.
+ unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
+ for (unsigned I = 0; I != NumDefs; ++I) {
+ Register Def = MI.getOperand(I).getReg();
+ replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I),
+ MRI, Builder, UpdatedDefs, Observer);
+ }
+
+ markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
+ return true;
+ }
+
MachineInstr *MergeI = SrcDef;
unsigned ConvertOp = 0;
@@ -743,9 +805,12 @@ public:
Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
break;
case TargetOpcode::G_UNMERGE_VALUES:
- Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs, WrapperObserver);
+ Changed =
+ tryCombineUnmergeValues(MI, DeadInsts, UpdatedDefs, WrapperObserver);
break;
case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_CONCAT_VECTORS:
// If any of the users of this merge are an unmerge, then add them to the
// artifact worklist in case there's folding that can be done looking up.
for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
@@ -829,7 +894,8 @@ private:
/// dead.
/// MI is not marked dead.
void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ unsigned DefIdx = 0) {
// Collect all the copy instructions that are made dead, due to deleting
// this instruction. Collect all of them until the Trunc(DefMI).
// Eg,
@@ -856,8 +922,27 @@ private:
break;
PrevMI = TmpDef;
}
- if (PrevMI == &DefMI && MRI.hasOneUse(DefMI.getOperand(0).getReg()))
- DeadInsts.push_back(&DefMI);
+
+ if (PrevMI == &DefMI) {
+ unsigned I = 0;
+ bool IsDead = true;
+ for (MachineOperand &Def : DefMI.defs()) {
+ if (I != DefIdx) {
+ if (!MRI.use_empty(Def.getReg())) {
+ IsDead = false;
+ break;
+ }
+ } else {
+ if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg()))
+ break;
+ }
+
+ ++I;
+ }
+
+ if (IsDead)
+ DeadInsts.push_back(&DefMI);
+ }
}
/// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
@@ -866,9 +951,10 @@ private:
/// copies in between the extends and the truncs, and this attempts to collect
/// the in between copies if they're dead.
void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ unsigned DefIdx = 0) {
DeadInsts.push_back(&MI);
- markDefDead(MI, DefMI, DeadInsts);
+ markDefDead(MI, DefMI, DeadInsts, DefIdx);
}
/// Erase the dead instructions in the list and call the observer hooks.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
index e59bf1b91262..690e84f79a6b 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -64,9 +64,6 @@ public:
MachineFunctionProperties::Property::NoPHIs);
}
- bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII);
-
bool runOnMachineFunction(MachineFunction &MF) override;
static MFResult
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 058aacf38634..4a982b00125d 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -32,6 +32,7 @@ class LegalizerInfo;
class Legalizer;
class MachineRegisterInfo;
class GISelChangeObserver;
+class TargetLowering;
class LegalizerHelper {
public:
@@ -45,6 +46,7 @@ public:
private:
MachineRegisterInfo &MRI;
const LegalizerInfo &LI;
+ const TargetLowering &TLI;
public:
enum LegalizeResult {
@@ -62,6 +64,7 @@ public:
/// Expose LegalizerInfo so the clients can re-use.
const LegalizerInfo &getLegalizerInfo() const { return LI; }
+ const TargetLowering &getTargetLowering() const { return TLI; }
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer,
MachineIRBuilder &B);
@@ -154,6 +157,10 @@ public:
/// def by inserting a G_BITCAST from \p CastTy
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx);
+ /// Widen \p OrigReg to \p WideTy by merging to a wider type, padding with
+ /// G_IMPLICIT_DEF, and producing dead results.
+ Register widenWithUnmerge(LLT WideTy, Register OrigReg);
+
private:
LegalizeResult
widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
@@ -163,8 +170,10 @@ private:
widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
LegalizeResult
widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
- LegalizeResult widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy);
+ LegalizeResult widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy);
+ LegalizeResult widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy);
/// Helper function to split a wide generic register into bitwise blocks with
/// the given Type (which implies the number of blocks needed). The generic
@@ -191,11 +200,19 @@ private:
LLT PartTy, ArrayRef<Register> PartRegs,
LLT LeftoverTy = LLT(), ArrayRef<Register> LeftoverRegs = {});
- /// Unmerge \p SrcReg into \p Parts with the greatest common divisor type with
- /// \p DstTy and \p NarrowTy. Returns the GCD type.
+ /// Unmerge \p SrcReg into smaller sized values, and append them to \p
+ /// Parts. The elements of \p Parts will be the greatest common divisor type
+ /// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and
+ /// return the GCD type.
LLT extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
LLT NarrowTy, Register SrcReg);
+ /// Unmerge \p SrcReg into \p GCDTy typed registers. This will append all of
+ /// the unpacked registers to \p Parts. This version is if the common unmerge
+ /// type is already known.
+ void extractGCDType(SmallVectorImpl<Register> &Parts, LLT GCDTy,
+ Register SrcReg);
+
/// Produce a merge of values in \p VRegs to define \p DstReg. Perform a merge
/// from the least common multiple type, and convert as appropriate to \p
/// DstReg.
@@ -228,7 +245,23 @@ private:
ArrayRef<Register> Src1Regs,
ArrayRef<Register> Src2Regs, LLT NarrowTy);
+ void changeOpcode(MachineInstr &MI, unsigned NewOpcode);
+
public:
+ /// Return the alignment to use for a stack temporary object with the given
+ /// type.
+ Align getStackTemporaryAlignment(LLT Type, Align MinAlign = Align()) const;
+
+ /// Create a stack temporary based on the size in bytes and the alignment
+ MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment,
+ MachinePointerInfo &PtrInfo);
+
+ /// Get a pointer to vector element \p Index located in memory for a vector of
+ /// type \p VecTy starting at a base address of \p VecPtr. If \p Index is out
+ /// of bounds the returned pointer is unspecified, but will be within the
+ /// vector bounds.
+ Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index);
+
LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI,
unsigned TypeIdx, LLT NarrowTy);
@@ -256,9 +289,11 @@ public:
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy);
- LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI,
- unsigned TypeIdx,
- LLT NarrowTy);
+ LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy);
+ LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy);
LegalizeResult
reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
@@ -281,6 +316,7 @@ public:
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty);
+ LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
@@ -291,34 +327,52 @@ public:
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ /// Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
+ LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy);
+
+ /// Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
+ LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy);
+
LegalizeResult lowerBitcast(MachineInstr &MI);
- LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerLoad(MachineInstr &MI);
+ LegalizeResult lowerStore(MachineInstr &MI);
+ LegalizeResult lowerBitCount(MachineInstr &MI);
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
- LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
- LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
- LegalizeResult lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerUITOFP(MachineInstr &MI);
+ LegalizeResult lowerSITOFP(MachineInstr &MI);
+ LegalizeResult lowerFPTOUI(MachineInstr &MI);
LegalizeResult lowerFPTOSI(MachineInstr &MI);
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
- LegalizeResult lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerFPTRUNC(MachineInstr &MI);
+ LegalizeResult lowerFPOWI(MachineInstr &MI);
- LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
- LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerMinMax(MachineInstr &MI);
+ LegalizeResult lowerFCopySign(MachineInstr &MI);
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
LegalizeResult lowerFMad(MachineInstr &MI);
LegalizeResult lowerIntrinsicRound(MachineInstr &MI);
LegalizeResult lowerFFloor(MachineInstr &MI);
LegalizeResult lowerMergeValues(MachineInstr &MI);
LegalizeResult lowerUnmergeValues(MachineInstr &MI);
+ LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI);
LegalizeResult lowerShuffleVector(MachineInstr &MI);
LegalizeResult lowerDynStackAlloc(MachineInstr &MI);
LegalizeResult lowerExtract(MachineInstr &MI);
LegalizeResult lowerInsert(MachineInstr &MI);
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI);
+ LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI);
+ LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI);
+ LegalizeResult lowerShlSat(MachineInstr &MI);
LegalizeResult lowerBswap(MachineInstr &MI);
LegalizeResult lowerBitreverse(MachineInstr &MI);
LegalizeResult lowerReadWriteRegister(MachineInstr &MI);
+ LegalizeResult lowerSMULH_UMULH(MachineInstr &MI);
+ LegalizeResult lowerSelect(MachineInstr &MI);
+
};
/// Helper function that creates a libcall to the given \p Name using the given
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 61e0418757bc..c0a89b6ae619 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -37,7 +37,6 @@ extern cl::opt<bool> DisableGISelLegalityCheck;
class LegalizerHelper;
class MachineInstr;
-class MachineIRBuilder;
class MachineRegisterInfo;
class MCInstrInfo;
class GISelChangeObserver;
@@ -183,7 +182,7 @@ struct TypePairAndMemDesc {
MemSize == Other.MemSize;
}
- /// \returns true if this memory access is legal with for the acecss described
+ /// \returns true if this memory access is legal with for the access described
/// by \p Other (The alignment is sufficient for the size and result type).
bool isCompatible(const TypePairAndMemDesc &Other) const {
return Type0 == Other.Type0 && Type1 == Other.Type1 &&
@@ -218,11 +217,19 @@ Predicate any(Predicate P0, Predicate P1, Args... args) {
return any(any(P0, P1), args...);
}
-/// True iff the given type index is the specified types.
+/// True iff the given type index is the specified type.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit);
/// True iff the given type index is one of the specified types.
LegalityPredicate typeInSet(unsigned TypeIdx,
std::initializer_list<LLT> TypesInit);
+
+/// True iff the given type index is not the specified type.
+inline LegalityPredicate typeIsNot(unsigned TypeIdx, LLT Type) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx] != Type;
+ };
+}
+
/// True iff the given types for the given pair of type indexes is one of the
/// specified type pairs.
LegalityPredicate
@@ -308,6 +315,11 @@ LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx);
/// Keep the same scalar or element type as the given type.
LegalizeMutation changeElementTo(unsigned TypeIdx, LLT Ty);
+/// Change the scalar size or element size to have the same scalar size as type
+/// index \p FromIndex. Unlike changeElementTo, this discards pointer types and
+/// only changes the size.
+LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx);
+
/// Widen the scalar type or vector element type for the given type index to the
/// next power of 2.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0);
@@ -616,8 +628,7 @@ public:
/// The instruction is lowered when type index 0 is any type in the given
/// list. Keep type index 0 as the same type.
LegalizeRuleSet &lowerFor(std::initializer_list<LLT> Types) {
- return actionFor(LegalizeAction::Lower, Types,
- LegalizeMutations::changeTo(0, 0));
+ return actionFor(LegalizeAction::Lower, Types);
}
/// The instruction is lowered when type index 0 is any type in the given
/// list.
@@ -628,8 +639,7 @@ public:
/// The instruction is lowered when type indexes 0 and 1 is any type pair in
/// the given list. Keep type index 0 as the same type.
LegalizeRuleSet &lowerFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
- return actionFor(LegalizeAction::Lower, Types,
- LegalizeMutations::changeTo(0, 0));
+ return actionFor(LegalizeAction::Lower, Types);
}
/// The instruction is lowered when type indexes 0 and 1 is any type pair in
/// the given list.
@@ -654,6 +664,15 @@ public:
Types2);
}
+ /// The instruction is emitted as a library call.
+ LegalizeRuleSet &libcall() {
+ using namespace LegalizeMutations;
+ // We have no choice but conservatively assume that predicate-less lowering
+ // properly handles all type indices by design:
+ markAllIdxsAsCovered();
+ return actionIf(LegalizeAction::Libcall, always);
+ }
+
/// Like legalIf, but for the Libcall action.
LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that a libcall with a
@@ -696,6 +715,13 @@ public:
markAllIdxsAsCovered();
return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation);
}
+ /// Narrow the scalar, specified in mutation, when type indexes 0 and 1 is any
+ /// type pair in the given list.
+ LegalizeRuleSet &
+ narrowScalarFor(std::initializer_list<std::pair<LLT, LLT>> Types,
+ LegalizeMutation Mutation) {
+ return actionFor(LegalizeAction::NarrowScalar, Types, Mutation);
+ }
/// Add more elements to reach the type selected by the mutation if the
/// predicate is true.
@@ -800,6 +826,13 @@ public:
LegalizeMutations::scalarize(TypeIdx));
}
+ LegalizeRuleSet &scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx) {
+ using namespace LegalityPredicates;
+ return actionIf(LegalizeAction::FewerElements,
+ all(Predicate, isVector(typeIdx(TypeIdx))),
+ LegalizeMutations::scalarize(TypeIdx));
+ }
+
/// Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet &minScalarOrElt(unsigned TypeIdx, const LLT Ty) {
using namespace LegalityPredicates;
@@ -857,7 +890,10 @@ public:
return actionIf(
LegalizeAction::NarrowScalar,
[=](const LegalityQuery &Query) {
- return scalarWiderThan(TypeIdx, Ty.getSizeInBits()) && Predicate(Query);
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() &&
+ QueryTy.getSizeInBits() > Ty.getSizeInBits() &&
+ Predicate(Query);
},
changeElementTo(typeIdx(TypeIdx), Ty));
}
@@ -883,11 +919,25 @@ public:
return Query.Types[LargeTypeIdx].getScalarSizeInBits() >
Query.Types[TypeIdx].getSizeInBits();
},
+ LegalizeMutations::changeElementSizeTo(TypeIdx, LargeTypeIdx));
+ }
+
+ /// Narrow the scalar to match the size of another.
+ LegalizeRuleSet &maxScalarSameAs(unsigned TypeIdx, unsigned NarrowTypeIdx) {
+ typeIdx(TypeIdx);
+ return narrowScalarIf(
[=](const LegalityQuery &Query) {
- LLT T = Query.Types[LargeTypeIdx];
- return std::make_pair(TypeIdx,
- T.isVector() ? T.getElementType() : T);
- });
+ return Query.Types[NarrowTypeIdx].getScalarSizeInBits() <
+ Query.Types[TypeIdx].getSizeInBits();
+ },
+ LegalizeMutations::changeElementSizeTo(TypeIdx, NarrowTypeIdx));
+ }
+
+ /// Change the type \p TypeIdx to have the same scalar size as type \p
+ /// SameSizeIdx.
+ LegalizeRuleSet &scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx) {
+ return minScalarSameAs(TypeIdx, SameSizeIdx)
+ .maxScalarSameAs(TypeIdx, SameSizeIdx);
}
/// Conditionally widen the scalar or elt to match the size of another.
@@ -1207,6 +1257,12 @@ public:
bool isLegal(const LegalityQuery &Query) const {
return getAction(Query).Action == LegalizeAction::Legal;
}
+
+ bool isLegalOrCustom(const LegalityQuery &Query) const {
+ auto Action = getAction(Query).Action;
+ return Action == LegalizeAction::Legal || Action == LegalizeAction::Custom;
+ }
+
bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
bool isLegalOrCustom(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
index 67e450641eaf..1d1afff7f934 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -52,9 +52,6 @@ private:
/// TTI used for getting remat costs for instructions.
TargetTransformInfo *TTI;
- /// Check whether or not \p MI needs to be moved close to its uses.
- bool shouldLocalize(const MachineInstr &MI);
-
/// Check if \p MOUse is used in the same basic block as \p Def.
/// If the use is in the same block, we say it is local.
/// When the use is not local, \p InsertMBB will contain the basic
@@ -67,6 +64,11 @@ private:
typedef SmallSetVector<MachineInstr *, 32> LocalizedSetVecT;
+ /// If \p Op is a phi operand and not unique in that phi, that is,
+ /// there are other operands in the phi with the same register,
+ /// return true.
+ bool isNonUniquePhiValue(MachineOperand &Op) const;
+
/// Do inter-block localization from the entry block.
bool localizeInterBlock(MachineFunction &MF,
LocalizedSetVecT &LocalizedInstrs);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 043be086ff41..55d6d365fbb4 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -39,11 +39,25 @@ inline OneUse_match<SubPat> m_OneUse(const SubPat &SP) {
return SP;
}
+template <typename SubPatternT> struct OneNonDBGUse_match {
+ SubPatternT SubPat;
+ OneNonDBGUse_match(const SubPatternT &SP) : SubPat(SP) {}
+
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ return MRI.hasOneNonDBGUse(Reg) && SubPat.match(MRI, Reg);
+ }
+};
+
+template <typename SubPat>
+inline OneNonDBGUse_match<SubPat> m_OneNonDBGUse(const SubPat &SP) {
+ return SP;
+}
+
struct ConstantMatch {
int64_t &CR;
ConstantMatch(int64_t &C) : CR(C) {}
bool match(const MachineRegisterInfo &MRI, Register Reg) {
- if (auto MaybeCst = getConstantVRegVal(Reg, MRI)) {
+ if (auto MaybeCst = getConstantVRegSExtVal(Reg, MRI)) {
CR = *MaybeCst;
return true;
}
@@ -53,6 +67,29 @@ struct ConstantMatch {
inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); }
+/// Matcher for a specific constant value.
+struct SpecificConstantMatch {
+ int64_t RequestedVal;
+ SpecificConstantMatch(int64_t RequestedVal) : RequestedVal(RequestedVal) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ int64_t MatchedVal;
+ return mi_match(Reg, MRI, m_ICst(MatchedVal)) && MatchedVal == RequestedVal;
+ }
+};
+
+/// Matches a constant equal to \p RequestedValue.
+inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) {
+ return SpecificConstantMatch(RequestedValue);
+}
+
+///{
+/// Convenience matchers for specific integer values.
+inline SpecificConstantMatch m_ZeroInt() { return SpecificConstantMatch(0); }
+inline SpecificConstantMatch m_AllOnesInt() {
+ return SpecificConstantMatch(-1);
+}
+///}
+
// TODO: Rework this for different kinds of MachineOperand.
// Currently assumes the Src for a match is a register.
// We might want to support taking in some MachineOperands and call getReg on
@@ -198,6 +235,12 @@ m_GAdd(const LHS &L, const RHS &R) {
}
template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_PTR_ADD, true>
+m_GPtrAdd(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_PTR_ADD, true>(L, R);
+}
+
+template <typename LHS, typename RHS>
inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB> m_GSub(const LHS &L,
const RHS &R) {
return BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB>(L, R);
@@ -234,6 +277,12 @@ m_GAnd(const LHS &L, const RHS &R) {
}
template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_XOR, true>
+m_GXor(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_XOR, true>(L, R);
+}
+
+template <typename LHS, typename RHS>
inline BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true> m_GOr(const LHS &L,
const RHS &R) {
return BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true>(L, R);
@@ -251,6 +300,12 @@ m_GLShr(const LHS &L, const RHS &R) {
return BinaryOp_match<LHS, RHS, TargetOpcode::G_LSHR, false>(L, R);
}
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_ASHR, false>
+m_GAShr(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_ASHR, false>(L, R);
+}
+
// Helper for unary instructions (G_[ZSA]EXT/G_TRUNC) etc
template <typename SrcTy, unsigned Opcode> struct UnaryOp_match {
SrcTy L;
@@ -384,6 +439,51 @@ struct CheckType {
inline CheckType m_SpecificType(LLT Ty) { return Ty; }
+template <typename Src0Ty, typename Src1Ty, typename Src2Ty, unsigned Opcode>
+struct TernaryOp_match {
+ Src0Ty Src0;
+ Src1Ty Src1;
+ Src2Ty Src2;
+
+ TernaryOp_match(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
+ : Src0(Src0), Src1(Src1), Src2(Src2) {}
+ template <typename OpTy>
+ bool match(const MachineRegisterInfo &MRI, OpTy &&Op) {
+ MachineInstr *TmpMI;
+ if (mi_match(Op, MRI, m_MInstr(TmpMI))) {
+ if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 4) {
+ return (Src0.match(MRI, TmpMI->getOperand(1).getReg()) &&
+ Src1.match(MRI, TmpMI->getOperand(2).getReg()) &&
+ Src2.match(MRI, TmpMI->getOperand(3).getReg()));
+ }
+ }
+ return false;
+ }
+};
+template <typename Src0Ty, typename Src1Ty, typename Src2Ty>
+inline TernaryOp_match<Src0Ty, Src1Ty, Src2Ty,
+ TargetOpcode::G_INSERT_VECTOR_ELT>
+m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2) {
+ return TernaryOp_match<Src0Ty, Src1Ty, Src2Ty,
+ TargetOpcode::G_INSERT_VECTOR_ELT>(Src0, Src1, Src2);
+}
+
+/// Matches a register negated by a G_SUB.
+/// G_SUB 0, %negated_reg
+template <typename SrcTy>
+inline BinaryOp_match<SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB>
+m_Neg(const SrcTy &&Src) {
+ return m_GSub(m_ZeroInt(), Src);
+}
+
+/// Matches a register not-ed by a G_XOR.
+/// G_XOR %not_reg, -1
+template <typename SrcTy>
+inline BinaryOp_match<SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true>
+m_Not(const SrcTy &&Src) {
+ return m_GXor(Src, m_AllOnesInt());
+}
+
} // namespace GMIPatternMatch
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index d6498345f25c..1ab4cd704824 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -18,9 +18,10 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
-
+#include "llvm/IR/Module.h"
namespace llvm {
@@ -223,6 +224,7 @@ class MachineIRBuilder {
protected:
void validateTruncExt(const LLT Dst, const LLT Src, bool IsExtend);
+ void validateUnaryOp(const LLT Res, const LLT Op0);
void validateBinaryOp(const LLT Res, const LLT Op0, const LLT Op1);
void validateShiftOp(const LLT Res, const LLT Op0, const LLT Op1);
@@ -250,6 +252,11 @@ public:
setDebugLoc(MI.getDebugLoc());
}
+ MachineIRBuilder(MachineInstr &MI, GISelChangeObserver &Observer) :
+ MachineIRBuilder(MI) {
+ setChangeObserver(Observer);
+ }
+
virtual ~MachineIRBuilder() = default;
MachineIRBuilder(const MachineIRBuilderState &BState) : State(BState) {}
@@ -729,7 +736,7 @@ public:
/// depend on bit 0 (for now).
///
/// \return The newly created instruction.
- MachineInstrBuilder buildBrCond(Register Tst, MachineBasicBlock &Dest);
+ MachineInstrBuilder buildBrCond(const SrcOp &Tst, MachineBasicBlock &Dest);
/// Build and insert G_BRINDIRECT \p Tgt
///
@@ -813,7 +820,17 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr,
- MachineMemOperand &MMO);
+ MachineMemOperand &MMO) {
+ return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
+ }
+
+ /// Build and insert a G_LOAD instruction, while constructing the
+ /// MachineMemOperand.
+ MachineInstrBuilder
+ buildLoad(const DstOp &Res, const SrcOp &Addr, MachinePointerInfo PtrInfo,
+ Align Alignment,
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes());
/// Build and insert `Res = <opcode> Addr, MMO`.
///
@@ -847,6 +864,14 @@ public:
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr,
MachineMemOperand &MMO);
+ /// Build and insert a G_STORE instruction, while constructing the
+ /// MachineMemOperand.
+ MachineInstrBuilder
+ buildStore(const SrcOp &Val, const SrcOp &Addr, MachinePointerInfo PtrInfo,
+ Align Alignment,
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes());
+
/// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`.
///
/// \pre setBasicBlock or setMI must have been called.
@@ -938,6 +963,23 @@ public:
MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res,
ArrayRef<Register> Ops);
+ /// Build and insert a vector splat of a scalar \p Src using a
+ /// G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idiom.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Src must have the same type as the element type of \p Dst
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src);
+
+ /// Build and insert \p Res = G_SHUFFLE_VECTOR \p Src1, \p Src2, \p Mask
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1,
+ const SrcOp &Src2, ArrayRef<int> Mask);
+
/// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ...
///
/// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more
@@ -1521,6 +1563,13 @@ public:
return buildInstr(TargetOpcode::G_FSUB, {Dst}, {Src0, Src1}, Flags);
}
+ /// Build and insert \p Res = G_FDIV \p Op0, \p Op1
+ MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FDIV, {Dst}, {Src0, Src1}, Flags);
+ }
+
/// Build and insert \p Res = G_FMA \p Op0, \p Op1, \p Op2
MachineInstrBuilder buildFMA(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1, const SrcOp &Src2,
@@ -1583,6 +1632,13 @@ public:
return buildInstr(TargetOpcode::G_FEXP2, {Dst}, {Src}, Flags);
}
+ /// Build and insert \p Dst = G_FPOW \p Src0, \p Src1
+ MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags);
+ }
+
/// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1) {
@@ -1633,6 +1689,11 @@ public:
return buildInstr(TargetOpcode::G_UMAX, {Dst}, {Src0, Src1});
}
+ /// Build and insert \p Dst = G_ABS \p Src
+ MachineInstrBuilder buildAbs(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_ABS, {Dst}, {Src});
+ }
+
/// Build and insert \p Res = G_JUMP_TABLE \p JTI
///
/// G_JUMP_TABLE sets \p Res to the address of the jump table specified by
@@ -1641,6 +1702,101 @@ public:
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildJumpTable(const LLT PtrTy, unsigned JTI);
+ /// Build and insert \p Res = G_VECREDUCE_SEQ_FADD \p ScalarIn, \p VecIn
+ ///
+ /// \p ScalarIn is the scalar accumulator input to start the sequential
+ /// reduction operation of \p VecIn.
+ MachineInstrBuilder buildVecReduceSeqFAdd(const DstOp &Dst,
+ const SrcOp &ScalarIn,
+ const SrcOp &VecIn) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FADD, {Dst},
+ {ScalarIn, {VecIn}});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_SEQ_FMUL \p ScalarIn, \p VecIn
+ ///
+ /// \p ScalarIn is the scalar accumulator input to start the sequential
+ /// reduction operation of \p VecIn.
+ MachineInstrBuilder buildVecReduceSeqFMul(const DstOp &Dst,
+ const SrcOp &ScalarIn,
+ const SrcOp &VecIn) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FMUL, {Dst},
+ {ScalarIn, {VecIn}});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_FADD \p Src
+ ///
+ /// \p ScalarIn is the scalar accumulator input to the reduction operation of
+ /// \p VecIn.
+ MachineInstrBuilder buildVecReduceFAdd(const DstOp &Dst,
+ const SrcOp &ScalarIn,
+ const SrcOp &VecIn) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_FADD, {Dst}, {ScalarIn, VecIn});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_FMUL \p Src
+ ///
+ /// \p ScalarIn is the scalar accumulator input to the reduction operation of
+ /// \p VecIn.
+ MachineInstrBuilder buildVecReduceFMul(const DstOp &Dst,
+ const SrcOp &ScalarIn,
+ const SrcOp &VecIn) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_FMUL, {Dst}, {ScalarIn, VecIn});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_FMAX \p Src
+ MachineInstrBuilder buildVecReduceFMax(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_FMAX, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_FMIN \p Src
+ MachineInstrBuilder buildVecReduceFMin(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_FMIN, {Dst}, {Src});
+ }
+ /// Build and insert \p Res = G_VECREDUCE_ADD \p Src
+ MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_ADD, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_MUL \p Src
+ MachineInstrBuilder buildVecReduceMul(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_MUL, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_AND \p Src
+ MachineInstrBuilder buildVecReduceAnd(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_AND, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_OR \p Src
+ MachineInstrBuilder buildVecReduceOr(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_OR, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_XOR \p Src
+ MachineInstrBuilder buildVecReduceXor(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_XOR, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_SMAX \p Src
+ MachineInstrBuilder buildVecReduceSMax(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_SMAX, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_SMIN \p Src
+ MachineInstrBuilder buildVecReduceSMin(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_SMIN, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_UMAX \p Src
+ MachineInstrBuilder buildVecReduceUMax(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_UMAX, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Res = G_VECREDUCE_UMIN \p Src
+ MachineInstrBuilder buildVecReduceUMin(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_VECREDUCE_UMIN, {Dst}, {Src});
+ }
virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
ArrayRef<SrcOp> SrcOps,
Optional<unsigned> Flags = None);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index 8725d96efd82..da785406bc31 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -104,36 +104,37 @@ public:
/// Currently the TableGen-like file would look like:
/// \code
/// PartialMapping[] = {
- /// /*32-bit add*/ {0, 32, GPR}, // Scalar entry repeated for first vec elt.
- /// /*2x32-bit add*/ {0, 32, GPR}, {32, 32, GPR},
- /// /*<2x32-bit> vadd {0, 64, VPR}
+ /// /*32-bit add*/ {0, 32, GPR}, // Scalar entry repeated for first
+ /// // vec elt.
+ /// /*2x32-bit add*/ {0, 32, GPR}, {32, 32, GPR},
+ /// /*<2x32-bit> vadd*/ {0, 64, VPR}
/// }; // PartialMapping duplicated.
///
/// ValueMapping[] {
- /// /*plain 32-bit add*/ {&PartialMapping[0], 1},
+ /// /*plain 32-bit add*/ {&PartialMapping[0], 1},
/// /*expanded vadd on 2xadd*/ {&PartialMapping[1], 2},
- /// /*plain <2x32-bit> vadd*/ {&PartialMapping[3], 1}
+ /// /*plain <2x32-bit> vadd*/ {&PartialMapping[3], 1}
/// };
/// \endcode
///
/// With the array of pointer, we would have:
/// \code
/// PartialMapping[] = {
- /// /*32-bit add lower */ {0, 32, GPR},
+ /// /*32-bit add lower */ { 0, 32, GPR},
/// /*32-bit add upper */ {32, 32, GPR},
- /// /*<2x32-bit> vadd {0, 64, VPR}
+ /// /*<2x32-bit> vadd */ { 0, 64, VPR}
/// }; // No more duplication.
///
/// BreakDowns[] = {
- /// /*AddBreakDown*/ &PartialMapping[0],
+ /// /*AddBreakDown*/ &PartialMapping[0],
/// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[1],
- /// /*VAddBreakDown*/ &PartialMapping[2]
+ /// /*VAddBreakDown*/ &PartialMapping[2]
/// }; // Addresses of PartialMapping duplicated (smaller).
///
/// ValueMapping[] {
- /// /*plain 32-bit add*/ {&BreakDowns[0], 1},
+ /// /*plain 32-bit add*/ {&BreakDowns[0], 1},
/// /*expanded vadd on 2xadd*/ {&BreakDowns[1], 2},
- /// /*plain <2x32-bit> vadd*/ {&BreakDowns[3], 1}
+ /// /*plain <2x32-bit> vadd*/ {&BreakDowns[3], 1}
/// };
/// \endcode
///
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 42d86917721a..68553ab5b1a8 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -18,11 +18,12 @@
#include "llvm/CodeGen/Register.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MachineValueType.h"
+#include <cstdint>
namespace llvm {
class AnalysisUsage;
+class GISelKnownBits;
class MachineFunction;
class MachineInstr;
class MachineOperand;
@@ -33,10 +34,10 @@ class MachineRegisterInfo;
class MCInstrDesc;
class RegisterBankInfo;
class TargetInstrInfo;
+class TargetLowering;
class TargetPassConfig;
class TargetRegisterInfo;
class TargetRegisterClass;
-class Twine;
class ConstantFP;
class APFloat;
@@ -51,9 +52,10 @@ Register constrainRegToClass(MachineRegisterInfo &MRI,
/// Constrain the Register operand OpIdx, so that it is now constrained to the
/// TargetRegisterClass passed as an argument (RegClass).
-/// If this fails, create a new virtual register in the correct class and
-/// insert a COPY before \p InsertPt if it is a use or after if it is a
-/// definition. The debug location of \p InsertPt is used for the new copy.
+/// If this fails, create a new virtual register in the correct class and insert
+/// a COPY before \p InsertPt if it is a use or after if it is a definition.
+/// In both cases, the function also updates the register of RegMo. The debug
+/// location of \p InsertPt is used for the new copy.
///
/// \return The virtual register constrained to the right register class.
Register constrainOperandRegClass(const MachineFunction &MF,
@@ -63,12 +65,13 @@ Register constrainOperandRegClass(const MachineFunction &MF,
const RegisterBankInfo &RBI,
MachineInstr &InsertPt,
const TargetRegisterClass &RegClass,
- const MachineOperand &RegMO);
+ MachineOperand &RegMO);
-/// Try to constrain Reg so that it is usable by argument OpIdx of the
-/// provided MCInstrDesc \p II. If this fails, create a new virtual
-/// register in the correct class and insert a COPY before \p InsertPt
-/// if it is a use or after if it is a definition.
+/// Try to constrain Reg so that it is usable by argument OpIdx of the provided
+/// MCInstrDesc \p II. If this fails, create a new virtual register in the
+/// correct class and insert a COPY before \p InsertPt if it is a use or after
+/// if it is a definition. In both cases, the function also updates the register
+/// of RegMo.
/// This is equivalent to constrainOperandRegClass(..., RegClass, ...)
/// with RegClass obtained from the MCInstrDesc. The debug location of \p
/// InsertPt is used for the new copy.
@@ -80,7 +83,7 @@ Register constrainOperandRegClass(const MachineFunction &MF,
const TargetInstrInfo &TII,
const RegisterBankInfo &RBI,
MachineInstr &InsertPt, const MCInstrDesc &II,
- const MachineOperand &RegMO, unsigned OpIdx);
+ MachineOperand &RegMO, unsigned OpIdx);
/// Mutate the newly-selected instruction \p I to constrain its (possibly
/// generic) virtual register operands to the instruction's register class.
@@ -121,14 +124,19 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
MachineOptimizationRemarkEmitter &MORE,
MachineOptimizationRemarkMissed &R);
+/// If \p VReg is defined by a G_CONSTANT, return the corresponding value.
+Optional<APInt> getConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI);
+
/// If \p VReg is defined by a G_CONSTANT fits in int64_t
/// returns it.
-Optional<int64_t> getConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI);
+Optional<int64_t> getConstantVRegSExtVal(Register VReg,
+ const MachineRegisterInfo &MRI);
+
/// Simple struct used to hold a constant integer value and a virtual
/// register.
struct ValueAndVReg {
- int64_t Value;
+ APInt Value;
Register VReg;
};
/// If \p VReg is defined by a statically evaluable chain of
@@ -138,10 +146,13 @@ struct ValueAndVReg {
/// When \p LookThroughInstrs == false this function behaves like
/// getConstantVRegVal.
/// When \p HandleFConstants == false the function bails on G_FCONSTANTs.
+/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as
+/// G_SEXT.
Optional<ValueAndVReg>
getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
bool LookThroughInstrs = true,
- bool HandleFConstants = true);
+ bool HandleFConstants = true,
+ bool LookThroughAnyExt = false);
const ConstantFP* getConstantFPVRegVal(Register VReg,
const MachineRegisterInfo &MRI);
@@ -151,9 +162,20 @@ const ConstantFP* getConstantFPVRegVal(Register VReg,
MachineInstr *getOpcodeDef(unsigned Opcode, Register Reg,
const MachineRegisterInfo &MRI);
-/// Find the def instruction for \p Reg, folding away any trivial copies. Note
-/// it may still return a COPY, if it changes the type. May return nullptr if \p
-/// Reg is not a generic virtual register.
+/// Simple struct used to hold a Register value and the instruction which
+/// defines it.
+struct DefinitionAndSourceRegister {
+ MachineInstr *MI;
+ Register Reg;
+};
+
+/// Find the def instruction for \p Reg, and underlying value Register folding
+/// away any copies.
+Optional<DefinitionAndSourceRegister>
+getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI);
+
+/// Find the def instruction for \p Reg, folding away any trivial copies. May
+/// return nullptr if \p Reg is not a generic virtual register.
MachineInstr *getDefIgnoringCopies(Register Reg,
const MachineRegisterInfo &MRI);
@@ -178,6 +200,12 @@ Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const Register Op1,
Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm, const MachineRegisterInfo &MRI);
+/// Test if the given value is known to have exactly one bit set. This differs
+/// from computeKnownBits in that it doesn't necessarily determine which bit is
+/// set.
+bool isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KnownBits = nullptr);
+
/// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
/// this returns if \p Val can be assumed to never be a signaling NaN.
bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
@@ -190,17 +218,65 @@ inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO);
-/// Return the least common multiple type of \p Ty0 and \p Ty1, by changing
-/// the number of vector elements or scalar bitwidth. The intent is a
-/// G_MERGE_VALUES can be constructed from \p Ty0 elements, and unmerged into
-/// \p Ty1.
-LLT getLCMType(LLT Ty0, LLT Ty1);
+/// Return a virtual register corresponding to the incoming argument register \p
+/// PhysReg. This register is expected to have class \p RC, and optional type \p
+/// RegTy. This assumes all references to the register will use the same type.
+///
+/// If there is an existing live-in argument register, it will be returned.
+/// This will also ensure there is a valid copy
+Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII,
+ MCRegister PhysReg,
+ const TargetRegisterClass &RC,
+ LLT RegTy = LLT());
+
+/// Return the least common multiple type of \p OrigTy and \p TargetTy, by changing the
+/// number of vector elements or scalar bitwidth. The intent is a
+/// G_MERGE_VALUES, G_BUILD_VECTOR, or G_CONCAT_VECTORS can be constructed from
+/// \p OrigTy elements, and unmerged into \p TargetTy
+LLVM_READNONE
+LLT getLCMType(LLT OrigTy, LLT TargetTy);
-/// Return a type that is greatest common divisor of \p OrigTy and \p
-/// TargetTy. This will either change the number of vector elements, or
-/// bitwidth of scalars. The intent is the result type can be used as the
-/// result of a G_UNMERGE_VALUES from \p OrigTy.
+/// Return a type where the total size is the greatest common divisor of \p
+/// OrigTy and \p TargetTy. This will try to either change the number of vector
+/// elements, or bitwidth of scalars. The intent is the result type can be used
+/// as the result of a G_UNMERGE_VALUES from \p OrigTy, and then some
+/// combination of G_MERGE_VALUES, G_BUILD_VECTOR and G_CONCAT_VECTORS (possibly
+/// with intermediate casts) can re-form \p TargetTy.
+///
+/// If these are vectors with different element types, this will try to produce
+/// a vector with a compatible total size, but the element type of \p OrigTy. If
+/// this can't be satisfied, this will produce a scalar smaller than the
+/// original vector elements.
+///
+/// In the worst case, this returns LLT::scalar(1)
+LLVM_READNONE
LLT getGCDType(LLT OrigTy, LLT TargetTy);
+/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat.
+/// If \p MI is not a splat, returns None.
+Optional<int> getSplatIndex(MachineInstr &MI);
+
+/// Returns a scalar constant of a G_BUILD_VECTOR splat if it exists.
+Optional<int64_t> getBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// Return true if the specified instruction is a G_BUILD_VECTOR or
+/// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef.
+bool isBuildVectorAllZeros(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// Return true if the specified instruction is a G_BUILD_VECTOR or
+/// G_BUILD_VECTOR_TRUNC where all of the elements are ~0 or undef.
+bool isBuildVectorAllOnes(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// Returns true if given the TargetLowering's boolean contents information,
+/// the value \p Val contains a true value.
+bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
+ bool IsFP);
+
+/// Returns an integer representing true, as defined by the
+/// TargetBooleanContents.
+int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
} // End namespace llvm.
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 534f988c5e96..1974e2f842c9 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -86,7 +86,16 @@ enum NodeType {
/// the parent's frame or return address, and so on.
FRAMEADDR,
RETURNADDR,
+
+ /// ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
+ /// This node takes no operand, returns a target-specific pointer to the
+ /// place in the stack frame where the return address of the current
+ /// function is stored.
ADDROFRETURNADDR,
+
+ /// SPONENTRY - Represents the llvm.sponentry intrinsic. Takes no argument
+ /// and returns the stack pointer value at the entry of the current
+ /// function calling this intrinsic.
SPONENTRY,
/// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
@@ -274,6 +283,16 @@ enum NodeType {
ADDCARRY,
SUBCARRY,
+ /// Carry-using overflow-aware nodes for multiple precision addition and
+ /// subtraction. These nodes take three operands: The first two are normal lhs
+ /// and rhs to the add or sub, and the third is a boolean indicating if there
+ /// is an incoming carry. They produce two results: the normal result of the
+ /// add or sub, and a boolean that indicates if an overflow occured (*not*
+ /// flag, because it may be a store to memory, etc.). If the type of the
+ /// boolean is not i1 then the high bits conform to getBooleanContents.
+ SADDO_CARRY,
+ SSUBO_CARRY,
+
/// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
/// These nodes take two operands: the normal LHS and RHS to the add. They
/// produce two results: the normal result of the add, and a boolean that
@@ -310,6 +329,16 @@ enum NodeType {
SSUBSAT,
USUBSAT,
+ /// RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift. The first
+ /// operand is the value to be shifted, and the second argument is the amount
+ /// to shift by. Both must be integers of the same bit width (W). If the true
+ /// value of LHS << RHS exceeds the largest value that can be represented by
+ /// W bits, the resulting value is this maximum value, Otherwise, if this
+ /// value is less than the smallest value that can be represented by W bits,
+ /// the resulting value is this minimum value.
+ SSHLSAT,
+ USHLSAT,
+
/// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication
/// on
/// 2 integers with the same width and scale. SCALE represents the scale of
@@ -504,7 +533,8 @@ enum NodeType {
/// IDX is first scaled by the runtime scaling factor of T. Elements IDX
/// through (IDX + num_elements(T) - 1) must be valid VECTOR indices. If this
/// condition cannot be determined statically but is false at runtime, then
- /// the result vector is undefined.
+ /// the result vector is undefined. The IDX parameter must be a vector index
+ /// constant type, which for most targets will be an integer pointer type.
///
/// This operation supports extracting a fixed-width vector from a scalable
/// vector, but not the other way around.
@@ -587,6 +617,7 @@ enum NodeType {
CTLZ,
CTPOP,
BITREVERSE,
+ PARITY,
/// Bit counting operators with an undefined result for zero inputs.
CTTZ_ZERO_UNDEF,
@@ -703,6 +734,21 @@ enum NodeType {
FP_TO_SINT,
FP_TO_UINT,
+ /// FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a
+ /// signed or unsigned integer type with the bit width given in operand 1 with
+ /// the following semantics:
+ ///
+ /// * If the value is NaN, zero is returned.
+ /// * If the value is larger/smaller than the largest/smallest integer,
+ /// the largest/smallest integer is returned (saturation).
+ /// * Otherwise the result of rounding the value towards zero is returned.
+ ///
+ /// The width given in operand 1 must be equal to, or smaller than, the scalar
+ /// result type width. It may end up being smaller than the result witdh as a
+ /// result of integer type legalization.
+ FP_TO_SINT_SAT,
+ FP_TO_UINT_SAT,
+
/// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type
/// down to the precision of the destination VT. TRUNC is a flag, which is
/// always an integer that is zero or one. If TRUNC is 0, this is a
@@ -844,13 +890,18 @@ enum NodeType {
/// BRCOND - Conditional branch. The first operand is the chain, the
/// second is the condition, the third is the block to branch to if the
/// condition is true. If the type of the condition is not i1, then the
- /// high bits must conform to getBooleanContents.
+ /// high bits must conform to getBooleanContents. If the condition is undef,
+ /// it nondeterministically jumps to the block.
+ /// TODO: Its semantics w.r.t undef requires further discussion; we need to
+ /// make it sure that it is consistent with optimizations in MIR & the
+ /// meaning of IMPLICIT_DEF. See https://reviews.llvm.org/D92015
BRCOND,
/// BR_CC - Conditional branch. The behavior is like that of SELECT_CC, in
/// that the condition is represented as condition code, and two nodes to
/// compare, rather than as a combined SetCC node. The operands in order
- /// are chain, cc, lhs, rhs, block to branch to if condition is true.
+ /// are chain, cc, lhs, rhs, block to branch to if condition is true. If
+ /// condition is undef, it nondeterministically jumps to the block.
BR_CC,
/// INLINEASM - Represents an inline asm block. This node always has two
@@ -981,6 +1032,9 @@ enum NodeType {
/// DEBUGTRAP - Trap intended to get the attention of a debugger.
DEBUGTRAP,
+ /// UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
+ UBSANTRAP,
+
/// PREFETCH - This corresponds to a prefetch intrinsic. The first operand
/// is the chain. The other operands are the address to prefetch,
/// read / write specifier, locality specifier and instruction / data cache
@@ -1075,6 +1129,10 @@ enum NodeType {
/// known nonzero constant. The only operand here is the chain.
GET_DYNAMIC_AREA_OFFSET,
+ /// Pseudo probe for AutoFDO, as a place holder in a basic block to improve
+ /// the sample counts quality.
+ PSEUDO_PROBE,
+
/// VSCALE(IMM) - Returns the runtime scaling factor used to calculate the
/// number of elements within a scalable vector. IMM is a constant integer
/// multiplier that is applied to the runtime value.
@@ -1082,12 +1140,25 @@ enum NodeType {
/// Generic reduction nodes. These nodes represent horizontal vector
/// reduction operations, producing a scalar result.
- /// The STRICT variants perform reductions in sequential order. The first
+ /// The SEQ variants perform reductions in sequential order. The first
/// operand is an initial scalar accumulator value, and the second operand
/// is the vector to reduce.
- VECREDUCE_STRICT_FADD,
- VECREDUCE_STRICT_FMUL,
- /// These reductions are non-strict, and have a single vector operand.
+ /// E.g. RES = VECREDUCE_SEQ_FADD f32 ACC, <4 x f32> SRC_VEC
+ /// ... is equivalent to
+ /// RES = (((ACC + SRC_VEC[0]) + SRC_VEC[1]) + SRC_VEC[2]) + SRC_VEC[3]
+ VECREDUCE_SEQ_FADD,
+ VECREDUCE_SEQ_FMUL,
+
+ /// These reductions have relaxed evaluation order semantics, and have a
+ /// single vector operand. The order of evaluation is unspecified. For
+ /// pow-of-2 vectors, one valid legalizer expansion is to use a tree
+ /// reduction, i.e.:
+ /// For RES = VECREDUCE_FADD <8 x f16> SRC_VEC
+ /// PART_RDX = FADD SRC_VEC[0:3], SRC_VEC[4:7]
+ /// PART_RDX2 = FADD PART_RDX[0:1], PART_RDX[2:3]
+ /// RES = FADD PART_RDX2[0], PART_RDX2[1]
+ /// For non-pow-2 vectors, this can be computed by extracting each element
+ /// and performing the operation as if it were scalarized.
VECREDUCE_FADD,
VECREDUCE_FMUL,
/// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@@ -1106,6 +1177,10 @@ enum NodeType {
VECREDUCE_UMAX,
VECREDUCE_UMIN,
+// Vector Predication
+#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID,
+#include "llvm/IR/VPIntrinsics.def"
+
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific pre-isel opcode values start here.
BUILTIN_OP_END
@@ -1122,6 +1197,19 @@ static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400;
/// be used with SelectionDAG::getMemIntrinsicNode.
static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500;
+/// Get underlying scalar opcode for VECREDUCE opcode.
+/// For example ISD::AND for ISD::VECREDUCE_AND.
+NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
+
+/// Whether this is a vector-predicated Opcode.
+bool isVPOpcode(unsigned Opcode);
+
+/// The operand position of the vector mask.
+Optional<unsigned> getVPMaskIdx(unsigned Opcode);
+
+/// The operand position of the explicit vector length parameter.
+Optional<unsigned> getVPExplicitVectorLengthIdx(unsigned Opcode);
+
//===--------------------------------------------------------------------===//
/// MemIndexedMode enum - This enum defines the load / store indexed
/// addressing modes.
@@ -1244,6 +1332,12 @@ inline bool isUnsignedIntSetCC(CondCode Code) {
return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE;
}
+/// Return true if this is a setcc instruction that performs an equality
+/// comparison when used with integer operands.
+inline bool isIntEqualitySetCC(CondCode Code) {
+ return Code == SETEQ || Code == SETNE;
+}
+
/// Return true if the specified condition returns true if the two operands to
/// the condition are equal. Note that if one of the two operands is a NaN,
/// this value is meaningless.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LexicalScopes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LexicalScopes.h
index bac850d327ef..9617ba80c138 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LexicalScopes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LexicalScopes.h
@@ -194,9 +194,6 @@ public:
return I != LexicalScopeMap.end() ? &I->second : nullptr;
}
- /// dump - Print data structures to dbgs().
- void dump() const;
-
/// getOrCreateAbstractScope - Find or create an abstract lexical scope.
LexicalScope *getOrCreateAbstractScope(const DILocalScope *Scope);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h
index 0764257125e6..c2b158ac1b7f 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -25,6 +25,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Allocator.h"
@@ -597,10 +598,9 @@ namespace llvm {
/// @p End.
bool isUndefIn(ArrayRef<SlotIndex> Undefs, SlotIndex Begin,
SlotIndex End) const {
- return std::any_of(Undefs.begin(), Undefs.end(),
- [Begin,End] (SlotIndex Idx) -> bool {
- return Begin <= Idx && Idx < End;
- });
+ return llvm::any_of(Undefs, [Begin, End](SlotIndex Idx) -> bool {
+ return Begin <= Idx && Idx < End;
+ });
}
/// Flush segment set into the regular segment vector.
@@ -704,12 +704,16 @@ namespace llvm {
private:
SubRange *SubRanges = nullptr; ///< Single linked list of subregister live
/// ranges.
+ const Register Reg; // the register or stack slot of this interval.
+ float Weight = 0.0; // weight of this interval
public:
- const unsigned reg; // the register or stack slot of this interval.
- float weight; // weight of this interval
+ Register reg() const { return Reg; }
+ float weight() const { return Weight; }
+ void incrementWeight(float Inc) { Weight += Inc; }
+ void setWeight(float Value) { Weight = Value; }
- LiveInterval(unsigned Reg, float Weight) : reg(Reg), weight(Weight) {}
+ LiveInterval(unsigned Reg, float Weight) : Reg(Reg), Weight(Weight) {}
~LiveInterval() {
clearSubRanges();
@@ -731,10 +735,10 @@ namespace llvm {
++*this;
return res;
}
- bool operator!=(const SingleLinkedListIterator<T> &Other) {
+ bool operator!=(const SingleLinkedListIterator<T> &Other) const {
return P != Other.operator->();
}
- bool operator==(const SingleLinkedListIterator<T> &Other) {
+ bool operator==(const SingleLinkedListIterator<T> &Other) const {
return P == Other.operator->();
}
T &operator*() const {
@@ -806,14 +810,10 @@ namespace llvm {
unsigned getSize() const;
/// isSpillable - Can this interval be spilled?
- bool isSpillable() const {
- return weight != huge_valf;
- }
+ bool isSpillable() const { return Weight != huge_valf; }
/// markNotSpillable - Mark interval as not spillable
- void markNotSpillable() {
- weight = huge_valf;
- }
+ void markNotSpillable() { Weight = huge_valf; }
/// For a given lane mask @p LaneMask, compute indexes at which the
/// lane is marked undefined by subregister <def,read-undef> definitions.
@@ -834,7 +834,7 @@ namespace llvm {
/// function will be applied to the L0010 and L0008 subranges.
///
/// \p Indexes and \p TRI are required to clean up the VNIs that
- /// don't defne the related lane masks after they get shrunk. E.g.,
+ /// don't define the related lane masks after they get shrunk. E.g.,
/// when L000F gets split into L0007 and L0008 maybe only a subset
/// of the VNIs that defined L000F defines L0007.
///
@@ -870,7 +870,7 @@ namespace llvm {
bool operator<(const LiveInterval& other) const {
const SlotIndex &thisIndex = beginIndex();
const SlotIndex &otherIndex = other.beginIndex();
- return std::tie(thisIndex, reg) < std::tie(otherIndex, other.reg);
+ return std::tie(thisIndex, Reg) < std::tie(otherIndex, other.Reg);
}
void print(raw_ostream &OS) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
index c555763a4ec2..ad9e06d2bcf0 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -104,6 +104,9 @@ public:
void verify(LiveVirtRegBitSet& VisitedVRegs);
#endif
+ // Get any virtual register that is assign to this physical unit
+ LiveInterval *getOneVReg() const;
+
/// Query interferences between a single live virtual register and a live
/// interval union.
class Query {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h
index 945a40829714..fa08166791b0 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -114,8 +114,8 @@ class VirtRegMap;
LiveInterval &getInterval(Register Reg) {
if (hasInterval(Reg))
return *VirtRegIntervals[Reg.id()];
- else
- return createAndComputeVirtRegInterval(Reg);
+
+ return createAndComputeVirtRegInterval(Reg);
}
const LiveInterval &getInterval(Register Reg) const {
@@ -142,14 +142,14 @@ class VirtRegMap;
}
/// Interval removal.
- void removeInterval(unsigned Reg) {
+ void removeInterval(Register Reg) {
delete VirtRegIntervals[Reg];
VirtRegIntervals[Reg] = nullptr;
}
/// Given a register and an instruction, adds a live segment from that
/// instruction to the end of its MBB.
- LiveInterval::Segment addSegmentToEndOfBlock(unsigned reg,
+ LiveInterval::Segment addSegmentToEndOfBlock(Register Reg,
MachineInstr &startInst);
/// After removing some uses of a register, shrink its live range to just
@@ -167,7 +167,7 @@ class VirtRegMap;
/// the lane mask of the subregister range.
/// This may leave the subrange empty which needs to be cleaned up with
/// LiveInterval::removeEmptySubranges() afterwards.
- void shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg);
+ void shrinkToUses(LiveInterval::SubRange &SR, Register Reg);
/// Extend the live range \p LR to reach all points in \p Indices. The
/// points in the \p Indices array must be jointly dominated by the union
@@ -256,9 +256,8 @@ class VirtRegMap;
return Indexes->getMBBFromIndex(index);
}
- void insertMBBInMaps(MachineBasicBlock *MBB,
- MachineInstr *InsertionPoint = nullptr) {
- Indexes->insertMBBInMaps(MBB, InsertionPoint);
+ void insertMBBInMaps(MachineBasicBlock *MBB) {
+ Indexes->insertMBBInMaps(MBB);
assert(unsigned(MBB->getNumber()) == RegMaskBlocks.size() &&
"Blocks must be added in order.");
RegMaskBlocks.push_back(std::make_pair(RegMaskSlots.size(), 0));
@@ -423,7 +422,7 @@ class VirtRegMap;
/// Reg. Subsequent uses should rely on on-demand recomputation. \note This
/// method can result in inconsistent liveness tracking if multiple phyical
/// registers share a regunit, and should be used cautiously.
- void removeAllRegUnitsForPhysReg(unsigned Reg) {
+ void removeAllRegUnitsForPhysReg(MCRegister Reg) {
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
removeRegUnit(*Units);
}
@@ -431,7 +430,7 @@ class VirtRegMap;
/// Remove value numbers and related live segments starting at position
/// \p Pos that are part of any liverange of physical register \p Reg or one
/// of its subregisters.
- void removePhysRegDefAt(unsigned Reg, SlotIndex Pos);
+ void removePhysRegDefAt(MCRegister Reg, SlotIndex Pos);
/// Remove value number and related live segments of \p LI and its subranges
/// that start at position \p Pos.
@@ -463,7 +462,7 @@ class VirtRegMap;
bool computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead);
- static LiveInterval* createInterval(unsigned Reg);
+ static LiveInterval *createInterval(Register Reg);
void printInstrs(raw_ostream &O) const;
void dumpInstrs() const;
@@ -474,7 +473,7 @@ class VirtRegMap;
using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>;
void extendSegmentsToUses(LiveRange &Segments,
- ShrinkToUsesWorkList &WorkList, unsigned Reg,
+ ShrinkToUsesWorkList &WorkList, Register Reg,
LaneBitmask LaneMask);
/// Helper function for repairIntervalsInRange(), walks backwards and
@@ -484,7 +483,7 @@ class VirtRegMap;
void repairOldRegInRange(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
const SlotIndex endIdx, LiveRange &LR,
- unsigned Reg,
+ Register Reg,
LaneBitmask LaneMask = LaneBitmask::getAll());
class HMEditor;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h
index 3c4273130ab2..87d48adc7f27 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h
@@ -56,14 +56,14 @@ public:
/// Called when a virtual register is no longer used. Return false to defer
/// its deletion from LiveIntervals.
- virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
+ virtual bool LRE_CanEraseVirtReg(Register) { return true; }
/// Called before shrinking the live range of a virtual register.
- virtual void LRE_WillShrinkVirtReg(unsigned) {}
+ virtual void LRE_WillShrinkVirtReg(Register) {}
/// Called after cloning a virtual register.
/// This is used for new registers representing connected components of Old.
- virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
+ virtual void LRE_DidCloneVirtReg(Register New, Register Old) {}
};
private:
@@ -152,7 +152,7 @@ public:
return *Parent;
}
- Register getReg() const { return getParent().reg; }
+ Register getReg() const { return getParent().reg(); }
/// Iterator for accessing the new registers added by this edit.
using iterator = SmallVectorImpl<Register>::const_iterator;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegMatrix.h
index ab4d44f9a611..fc67bce329ab 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegMatrix.h
@@ -104,19 +104,19 @@ public:
/// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg).
/// When there is more than one kind of interference, the InterferenceKind
/// with the highest enum value is returned.
- InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg);
+ InterferenceKind checkInterference(LiveInterval &VirtReg, MCRegister PhysReg);
/// Check for interference in the segment [Start, End) that may prevent
/// assignment to PhysReg. If this function returns true, there is
/// interference in the segment [Start, End) of some other interval already
/// assigned to PhysReg. If this function returns false, PhysReg is free at
/// the segment [Start, End).
- bool checkInterference(SlotIndex Start, SlotIndex End, unsigned PhysReg);
+ bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg);
/// Assign VirtReg to PhysReg.
/// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
/// update VirtRegMap. The live range is expected to be available in PhysReg.
- void assign(LiveInterval &VirtReg, unsigned PhysReg);
+ void assign(LiveInterval &VirtReg, MCRegister PhysReg);
/// Unassign VirtReg from its PhysReg.
/// Assuming that VirtReg was previously assigned to a PhysReg, this undoes
@@ -124,7 +124,7 @@ public:
void unassign(LiveInterval &VirtReg);
/// Returns true if the given \p PhysReg has any live intervals assigned.
- bool isPhysRegUsed(unsigned PhysReg) const;
+ bool isPhysRegUsed(MCRegister PhysReg) const;
//===--------------------------------------------------------------------===//
// Low-level interface.
@@ -136,22 +136,25 @@ public:
/// Check for regmask interference only.
/// Return true if VirtReg crosses a regmask operand that clobbers PhysReg.
/// If PhysReg is null, check if VirtReg crosses any regmask operands.
- bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0);
+ bool checkRegMaskInterference(LiveInterval &VirtReg,
+ MCRegister PhysReg = MCRegister::NoRegister);
/// Check for regunit interference only.
/// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's
/// register units.
- bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg);
+ bool checkRegUnitInterference(LiveInterval &VirtReg, MCRegister PhysReg);
/// Query a line of the assigned virtual register matrix directly.
/// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
/// This returns a reference to an internal Query data structure that is only
/// valid until the next query() call.
- LiveIntervalUnion::Query &query(const LiveRange &LR, unsigned RegUnit);
+ LiveIntervalUnion::Query &query(const LiveRange &LR, MCRegister RegUnit);
/// Directly access the live interval unions per regunit.
/// This returns an array indexed by the regunit number.
LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; }
+
+ Register getOneVReg(unsigned PhysReg) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h
index 1ed091e3bb5e..39a1ec461ef6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h
@@ -15,7 +15,7 @@
#define LLVM_CODEGEN_LIVEREGUNITS_H
#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -67,7 +67,6 @@ public:
UsedRegUnits.addReg(Reg);
}
}
- return;
}
/// Initialize and clear the set.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h
index efb0fa85a0fe..9b0667bbbeb0 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -105,8 +105,7 @@ public:
/// isLiveIn - Is Reg live in to MBB? This means that Reg is live through
/// MBB, or it is killed in MBB. If Reg is only used by PHI instructions in
/// MBB, it is not considered live in.
- bool isLiveIn(const MachineBasicBlock &MBB,
- unsigned Reg,
+ bool isLiveIn(const MachineBasicBlock &MBB, Register Reg,
MachineRegisterInfo &MRI);
void dump() const;
@@ -149,25 +148,25 @@ private: // Intermediate data structures
/// HandlePhysRegKill - Add kills of Reg and its sub-registers to the
/// uses. Pay special attention to the sub-register uses which may come below
/// the last use of the whole register.
- bool HandlePhysRegKill(unsigned Reg, MachineInstr *MI);
+ bool HandlePhysRegKill(Register Reg, MachineInstr *MI);
/// HandleRegMask - Call HandlePhysRegKill for all registers clobbered by Mask.
void HandleRegMask(const MachineOperand&);
- void HandlePhysRegUse(unsigned Reg, MachineInstr &MI);
- void HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+ void HandlePhysRegUse(Register Reg, MachineInstr &MI);
+ void HandlePhysRegDef(Register Reg, MachineInstr *MI,
SmallVectorImpl<unsigned> &Defs);
void UpdatePhysRegDefs(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs);
/// FindLastRefOrPartRef - Return the last reference or partial reference of
/// the specified register.
- MachineInstr *FindLastRefOrPartRef(unsigned Reg);
+ MachineInstr *FindLastRefOrPartRef(Register Reg);
/// FindLastPartialDef - Return the last partial def of the specified
/// register. Also returns the sub-registers that're defined by the
/// instruction.
- MachineInstr *FindLastPartialDef(unsigned Reg,
- SmallSet<unsigned,4> &PartDefRegs);
+ MachineInstr *FindLastPartialDef(Register Reg,
+ SmallSet<unsigned, 4> &PartDefRegs);
/// analyzePHINodes - Gather information about the PHI nodes in here. In
/// particular, we want to map the variable information of a virtual
@@ -184,21 +183,21 @@ public:
/// RegisterDefIsDead - Return true if the specified instruction defines the
/// specified register, but that definition is dead.
- bool RegisterDefIsDead(MachineInstr &MI, unsigned Reg) const;
+ bool RegisterDefIsDead(MachineInstr &MI, Register Reg) const;
//===--------------------------------------------------------------------===//
// API to update live variable information
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
- void replaceKillInstruction(unsigned Reg, MachineInstr &OldMI,
+ void replaceKillInstruction(Register Reg, MachineInstr &OldMI,
MachineInstr &NewMI);
/// addVirtualRegisterKilled - Add information about the fact that the
/// specified register is killed after being used by the specified
/// instruction. If AddIfNotFound is true, add a implicit operand if it's
/// not found.
- void addVirtualRegisterKilled(unsigned IncomingReg, MachineInstr &MI,
+ void addVirtualRegisterKilled(Register IncomingReg, MachineInstr &MI,
bool AddIfNotFound = false) {
if (MI.addRegisterKilled(IncomingReg, TRI, AddIfNotFound))
getVarInfo(IncomingReg).Kills.push_back(&MI);
@@ -208,14 +207,14 @@ public:
/// register from the live variable information. Returns true if the
/// variable was marked as killed by the specified instruction,
/// false otherwise.
- bool removeVirtualRegisterKilled(unsigned reg, MachineInstr &MI) {
- if (!getVarInfo(reg).removeKill(MI))
+ bool removeVirtualRegisterKilled(Register Reg, MachineInstr &MI) {
+ if (!getVarInfo(Reg).removeKill(MI))
return false;
bool Removed = false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && MO.isKill() && MO.getReg() == reg) {
+ if (MO.isReg() && MO.isKill() && MO.getReg() == Reg) {
MO.setIsKill(false);
Removed = true;
break;
@@ -234,7 +233,7 @@ public:
/// addVirtualRegisterDead - Add information about the fact that the specified
/// register is dead after being used by the specified instruction. If
/// AddIfNotFound is true, add a implicit operand if it's not found.
- void addVirtualRegisterDead(unsigned IncomingReg, MachineInstr &MI,
+ void addVirtualRegisterDead(Register IncomingReg, MachineInstr &MI,
bool AddIfNotFound = false) {
if (MI.addRegisterDead(IncomingReg, TRI, AddIfNotFound))
getVarInfo(IncomingReg).Kills.push_back(&MI);
@@ -244,14 +243,14 @@ public:
/// register from the live variable information. Returns true if the
/// variable was marked dead at the specified instruction, false
/// otherwise.
- bool removeVirtualRegisterDead(unsigned reg, MachineInstr &MI) {
- if (!getVarInfo(reg).removeKill(MI))
+ bool removeVirtualRegisterDead(Register Reg, MachineInstr &MI) {
+ if (!getVarInfo(Reg).removeKill(MI))
return false;
bool Removed = false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && MO.isDef() && MO.getReg() == reg) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
MO.setIsDead(false);
Removed = true;
break;
@@ -270,24 +269,25 @@ public:
/// getVarInfo - Return the VarInfo structure for the specified VIRTUAL
/// register.
- VarInfo &getVarInfo(unsigned RegIdx);
+ VarInfo &getVarInfo(Register Reg);
void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock,
MachineBasicBlock *BB);
- void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock,
+ void MarkVirtRegAliveInBlock(VarInfo &VRInfo, MachineBasicBlock *DefBlock,
MachineBasicBlock *BB,
- std::vector<MachineBasicBlock*> &WorkList);
- void HandleVirtRegDef(unsigned reg, MachineInstr &MI);
- void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, MachineInstr &MI);
+ SmallVectorImpl<MachineBasicBlock *> &WorkList);
+
+ void HandleVirtRegDef(Register reg, MachineInstr &MI);
+ void HandleVirtRegUse(Register reg, MachineBasicBlock *MBB, MachineInstr &MI);
- bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB) {
+ bool isLiveIn(Register Reg, const MachineBasicBlock &MBB) {
return getVarInfo(Reg).isLiveIn(MBB, Reg, *MRI);
}
/// isLiveOut - Determine if Reg is live out from MBB, when not considering
/// PHI nodes. This means that Reg is either killed by a successor block or
/// passed through one.
- bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB);
+ bool isLiveOut(Register Reg, const MachineBasicBlock &MBB);
/// addNewBlock - Add a new basic block BB between DomBB and SuccBB. All
/// variables that are live out of DomBB and live into SuccBB will be marked
@@ -303,10 +303,10 @@ public:
std::vector<SparseBitVector<>> &LiveInSets);
/// isPHIJoin - Return true if Reg is a phi join register.
- bool isPHIJoin(unsigned Reg) { return PHIJoins.test(Reg); }
+ bool isPHIJoin(Register Reg) { return PHIJoins.test(Reg.id()); }
/// setPHIJoin - Mark Reg as a phi join register.
- void setPHIJoin(unsigned Reg) { PHIJoins.set(Reg); }
+ void setPHIJoin(Register Reg) { PHIJoins.set(Reg.id()); }
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h
index 6295d86f749c..402fa2ce61e7 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h
@@ -23,6 +23,7 @@ namespace llvm {
class DataLayout;
class Type;
+struct fltSemantics;
/// Construct a low-level type based on an LLVM type.
LLT getLLTForType(Type &Ty, const DataLayout &DL);
@@ -35,6 +36,9 @@ MVT getMVTForLLT(LLT Ty);
/// scalarable vector types, and will assert if used.
LLT getLLTForMVT(MVT Ty);
+/// Get the appropriate floating point arithmetic semantic based on the bit size
+/// of the given scalar LLT.
+const llvm::fltSemantics &getFltSemanticForLLT(LLT Ty);
}
#endif // LLVM_CODEGEN_LOWLEVELTYPE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MBFIWrapper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MBFIWrapper.h
index 062431a6f96b..bcbf3eedf59d 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MBFIWrapper.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MBFIWrapper.h
@@ -28,6 +28,8 @@ class MBFIWrapper {
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
+ Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const;
+
raw_ostream &printBlockFreq(raw_ostream &OS,
const MachineBasicBlock *MBB) const;
raw_ostream &printBlockFreq(raw_ostream &OS,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h
index e57c32c5ae61..9cb92091db50 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h
@@ -14,11 +14,15 @@
#ifndef LLVM_CODEGEN_MIRFORMATTER_H
#define LLVM_CODEGEN_MIRFORMATTER_H
-#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
namespace llvm {
+class MachineFunction;
+class MachineInstr;
struct PerFunctionMIParsingState;
struct SlotMapping;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index c68b073ebb8c..4a7406473b11 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -159,6 +159,22 @@ template <> struct ScalarTraits<MaybeAlign> {
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
};
+template <> struct ScalarTraits<Align> {
+ static void output(const Align &Alignment, void *, llvm::raw_ostream &OS) {
+ OS << Alignment.value();
+ }
+ static StringRef input(StringRef Scalar, void *, Align &Alignment) {
+ unsigned long long N;
+ if (getAsUnsignedInteger(Scalar, 10, N))
+ return "invalid number";
+ if (!isPowerOf2_64(N))
+ return "must be a power of two";
+ Alignment = Align(N);
+ return StringRef();
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
} // end namespace yaml
} // end namespace llvm
@@ -331,7 +347,7 @@ struct ScalarEnumerationTraits<TargetStackID::Value> {
static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) {
IO.enumCase(ID, "default", TargetStackID::Default);
IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill);
- IO.enumCase(ID, "sve-vec", TargetStackID::SVEVector);
+ IO.enumCase(ID, "scalable-vector", TargetStackID::ScalableVector);
IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc);
}
};
@@ -425,6 +441,36 @@ template <> struct MappingTraits<CallSiteInfo> {
static const bool flow = true;
};
+/// Serializable representation of debug value substitutions.
+struct DebugValueSubstitution {
+ unsigned SrcInst;
+ unsigned SrcOp;
+ unsigned DstInst;
+ unsigned DstOp;
+
+ bool operator==(const DebugValueSubstitution &Other) const {
+ return std::tie(SrcInst, SrcOp, DstInst, DstOp) ==
+ std::tie(Other.SrcInst, Other.SrcOp, Other.DstInst, Other.DstOp);
+ }
+};
+
+template <> struct MappingTraits<DebugValueSubstitution> {
+ static void mapping(IO &YamlIO, DebugValueSubstitution &Sub) {
+ YamlIO.mapRequired("srcinst", Sub.SrcInst);
+ YamlIO.mapRequired("srcop", Sub.SrcOp);
+ YamlIO.mapRequired("dstinst", Sub.DstInst);
+ YamlIO.mapRequired("dstop", Sub.DstOp);
+ }
+
+ static const bool flow = true;
+};
+} // namespace yaml
+} // namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::DebugValueSubstitution)
+
+namespace llvm {
+namespace yaml {
struct MachineConstantPoolValue {
UnsignedValue ID;
StringValue Value;
@@ -609,6 +655,7 @@ struct MachineFunction {
std::vector<MachineConstantPoolValue> Constants; /// Constant pool.
std::unique_ptr<MachineFunctionInfo> MachineFuncInfo;
std::vector<CallSiteInfo> CallSitesInfo;
+ std::vector<DebugValueSubstitution> DebugValueSubstitutions;
MachineJumpTable JumpTableInfo;
BlockStringValue Body;
};
@@ -637,6 +684,8 @@ template <> struct MappingTraits<MachineFunction> {
std::vector<MachineStackObject>());
YamlIO.mapOptional("callSites", MF.CallSitesInfo,
std::vector<CallSiteInfo>());
+ YamlIO.mapOptional("debugValueSubstitutions", MF.DebugValueSubstitutions,
+ std::vector<DebugValueSubstitution>());
YamlIO.mapOptional("constants", MF.Constants,
std::vector<MachineConstantPoolValue>());
YamlIO.mapOptional("machineFunctionInfo", MF.MachineFuncInfo);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index d6cb7211cf70..2bad64c6cc2e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -40,6 +40,7 @@ class Printable;
class SlotIndexes;
class StringRef;
class raw_ostream;
+class LiveIntervals;
class TargetRegisterClass;
class TargetRegisterInfo;
@@ -174,8 +175,9 @@ private:
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
- /// Used during basic block sections to mark the end of a basic block.
- MCSymbol *EndMCSymbol = nullptr;
+ /// Marks the end of the basic block. Used during basic block sections to
+ /// calculate the size of the basic block, or the BB section ending with it.
+ mutable MCSymbol *CachedEndMCSymbol = nullptr;
// Intrusive list support
MachineBasicBlock() = default;
@@ -432,6 +434,9 @@ public:
bool hasEHPadSuccessor() const;
+ /// Returns true if this is the entry block of the function.
+ bool isEntryBlock() const;
+
/// Returns true if this is the entry block of an EH scope, i.e., the block
/// that used to have a catchpad or cleanuppad instruction in the LLVM IR.
bool isEHScopeEntry() const { return IsEHScopeEntry; }
@@ -474,6 +479,9 @@ public:
/// Sets the section ID for this basic block.
void setSectionID(MBBSectionID V) { SectionID = V; }
+ /// Returns the MCSymbol marking the end of this basic block.
+ MCSymbol *getEndSymbol() const;
+
/// Returns true if this block may have an INLINEASM_BR (overestimate, by
/// checking if any of the successors are indirect targets of any inlineasm_br
/// in the function).
@@ -671,6 +679,17 @@ public:
return !empty() && back().isEHScopeReturn();
}
+ /// Split a basic block into 2 pieces at \p SplitPoint. A new block will be
+ /// inserted after this block, and all instructions after \p SplitInst moved
+ /// to it (\p SplitInst will be in the original block). If \p LIS is provided,
+ /// LiveIntervals will be appropriately updated. \return the newly inserted
+ /// block.
+ ///
+ /// If \p UpdateLiveIns is true, this will ensure the live ins list is
+ /// accurate, including for physreg uses/defs in the original block.
+ MachineBasicBlock *splitAt(MachineInstr &SplitInst, bool UpdateLiveIns = true,
+ LiveIntervals *LIS = nullptr);
+
/// Split the critical edge from this block to the given successor block, and
/// return the newly created block, or null if splitting is not possible.
///
@@ -872,6 +891,14 @@ public:
void print(raw_ostream &OS, ModuleSlotTracker &MST,
const SlotIndexes * = nullptr, bool IsStandalone = true) const;
+ enum PrintNameFlag {
+ PrintNameIr = (1 << 0), ///< Add IR name where available
+ PrintNameAttributes = (1 << 1), ///< Print attributes
+ };
+
+ void printName(raw_ostream &os, unsigned printNameFlags = PrintNameIr,
+ ModuleSlotTracker *moduleSlotTracker = nullptr) const;
+
// Printing method used by LoopInfo.
void printAsOperand(raw_ostream &OS, bool PrintType = true) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index 0f8d69ebd7da..6c442d3d07bd 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -58,18 +58,33 @@ public:
/// information. Please note that initial frequency is equal to 1024. It means
/// that we should not rely on the value itself, but only on the comparison to
/// the other block frequencies. We do this to avoid using of floating points.
- ///
+ /// For example, to get the frequency of a block relative to the entry block,
+ /// divide the integral value returned by this function (the
+ /// BlockFrequency::getFrequency() value) by getEntryFreq().
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
+ /// Compute the frequency of the block, relative to the entry block.
+ /// This API assumes getEntryFreq() is non-zero.
+ float getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const {
+ return getBlockFreq(MBB).getFrequency() * (1.0f / getEntryFreq());
+ }
+
Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const;
Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const;
- bool isIrrLoopHeader(const MachineBasicBlock *MBB);
+ bool isIrrLoopHeader(const MachineBasicBlock *MBB) const;
- void setBlockFreq(const MachineBasicBlock *MBB, uint64_t Freq);
+ /// incrementally calculate block frequencies when we split edges, to avoid
+ /// full CFG traversal.
+ void onEdgeSplit(const MachineBasicBlock &NewPredecessor,
+ const MachineBasicBlock &NewSuccessor,
+ const MachineBranchProbabilityInfo &MBPI);
const MachineFunction *getFunction() const;
const MachineBranchProbabilityInfo *getMBPI() const;
+
+ /// Pop up a ghostview window with the current block frequency propagation
+ /// rendered using dot.
void view(const Twine &Name, bool isSimple = true) const;
// Print the block frequency Freq to OS using the current functions entry
@@ -81,6 +96,8 @@ public:
raw_ostream &printBlockFreq(raw_ostream &OS,
const MachineBasicBlock *MBB) const;
+ /// Divide a block's BlockFrequency::getFrequency() value by this value to
+ /// obtain the entry block - relative frequency of said block.
uint64_t getEntryFreq() const;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index e9f52fb064e1..ac0cc70744d1 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -29,6 +29,11 @@ enum class MachineCombinerPattern {
REASSOC_XY_AMM_BMM,
REASSOC_XMM_AMM_BMM,
+ // These are patterns matched by the PowerPC to reassociate FMA and FSUB to
+ // reduce register pressure.
+ REASSOC_XY_BCA,
+ REASSOC_XY_BAC,
+
// These are multiply-add patterns matched by the AArch64 machine combiner.
MULADDW_OP1,
MULADDW_OP2,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineConstantPool.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineConstantPool.h
index cfc9ca88c976..a9bc0ce300b2 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineConstantPool.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineConstantPool.h
@@ -41,10 +41,10 @@ public:
explicit MachineConstantPoolValue(Type *ty) : Ty(ty) {}
virtual ~MachineConstantPoolValue() = default;
- /// getType - get type of this MachineConstantPoolValue.
- ///
Type *getType() const { return Ty; }
+ virtual unsigned getSizeInBytes(const DataLayout &DL) const;
+
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
Align Alignment) = 0;
@@ -94,7 +94,7 @@ public:
Align getAlign() const { return Alignment; }
- Type *getType() const;
+ unsigned getSizeInBytes(const DataLayout &DL) const;
/// This method classifies the entry according to whether or not it may
/// generate a relocation entry. This must be conservative, so if it might
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h
index f7bbd07a63ab..e3e679608784 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/GenericDomTree.h"
-#include <vector>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h
index cf3af4d38223..46bf73cdd7b6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -23,7 +23,6 @@
#include "llvm/Support/GenericDomTreeConstruction.h"
#include <cassert>
#include <memory>
-#include <vector>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 5cd7f9cde674..7f0ec0df57c5 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -14,6 +14,7 @@
#define LLVM_CODEGEN_MACHINEFRAMEINFO_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/DataTypes.h"
#include <cassert>
@@ -31,7 +32,7 @@ class AllocaInst;
/// Callee saved reg can also be saved to a different register rather than
/// on the stack by setting DstReg instead of FrameIdx.
class CalleeSavedInfo {
- unsigned Reg;
+ Register Reg;
union {
int FrameIdx;
unsigned DstReg;
@@ -58,14 +59,14 @@ public:
: Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {}
// Accessors.
- unsigned getReg() const { return Reg; }
+ Register getReg() const { return Reg; }
int getFrameIdx() const { return FrameIdx; }
unsigned getDstReg() const { return DstReg; }
void setFrameIdx(int FI) {
FrameIdx = FI;
SpilledToReg = false;
}
- void setDstReg(unsigned SpillReg) {
+ void setDstReg(Register SpillReg) {
DstReg = SpillReg;
SpilledToReg = true;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
index 809c21dd26fc..e9979c788ce0 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -431,6 +431,39 @@ public:
using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>;
VariableDbgInfoMapTy VariableDbgInfos;
+ /// A count of how many instructions in the function have had numbers
+ /// assigned to them. Used for debug value tracking, to determine the
+ /// next instruction number.
+ unsigned DebugInstrNumberingCount = 0;
+
+ /// Set value of DebugInstrNumberingCount field. Avoid using this unless
+ /// you're deserializing this data.
+ void setDebugInstrNumberingCount(unsigned Num);
+
+ /// Pair of instruction number and operand number.
+ using DebugInstrOperandPair = std::pair<unsigned, unsigned>;
+
+ /// Substitution map: from one <inst,operand> pair to another. Used to
+ /// record changes in where a value is defined, so that debug variable
+ /// locations can find it later.
+ std::map<DebugInstrOperandPair, DebugInstrOperandPair>
+ DebugValueSubstitutions;
+
+ /// Create a substitution between one <instr,operand> value to a different,
+ /// new value.
+ void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair);
+
+ /// Create substitutions for any tracked values in \p Old, to point at
+ /// \p New. Needed when we re-create an instruction during optimization,
+ /// which has the same signature (i.e., def operands in the same place) but
+ /// a modified instruction type, flags, or otherwise. An example: X86 moves
+ /// are sometimes transformed into equivalent LEAs.
+ /// If the two instructions are not the same opcode, limit which operands to
+ /// examine for substitutions to the first N operands by setting
+ /// \p MaxOperand.
+ void substituteDebugValuesForInst(const MachineInstr &Old, MachineInstr &New,
+ unsigned MaxOperand = UINT_MAX);
+
MachineFunction(Function &F, const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI, unsigned FunctionNum,
MachineModuleInfo &MMI);
@@ -494,7 +527,8 @@ public:
/// Returns true if this function has basic block sections enabled.
bool hasBBSections() const {
return (BBSectionsType == BasicBlockSection::All ||
- BBSectionsType == BasicBlockSection::List);
+ BBSectionsType == BasicBlockSection::List ||
+ BBSectionsType == BasicBlockSection::Preset);
}
/// Returns true if basic block labels are to be generated for this function.
@@ -504,9 +538,6 @@ public:
void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; }
- /// Creates basic block Labels for this function.
- void createBBLabels();
-
/// Assign IsBeginSection IsEndSection fields for basic blocks in this
/// function.
void assignBeginEndSections();
@@ -769,7 +800,7 @@ public:
/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
/// of `new MachineInstr'.
MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL,
- bool NoImp = false);
+ bool NoImplicit = false);
/// Create a new MachineInstr which is a copy of \p Orig, identical in all
/// ways except the instruction has no parent, prev, or next. Bundling flags
@@ -815,6 +846,14 @@ public:
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size);
+ /// getMachineMemOperand - Allocate a new MachineMemOperand by copying
+ /// an existing one, replacing only the MachinePointerInfo and size.
+ /// MachineMemOperands are owned by the MachineFunction and need not be
+ /// explicitly deallocated.
+ MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
+ MachinePointerInfo &PtrInfo,
+ uint64_t Size);
+
/// Allocate a new MachineMemOperand by copying an existing one,
/// replacing only AliasAnalysis information. MachineMemOperands are owned
/// by the MachineFunction and need not be explicitly deallocated.
@@ -1067,6 +1106,10 @@ public:
/// the same callee.
void moveCallSiteInfo(const MachineInstr *Old,
const MachineInstr *New);
+
+ unsigned getNewDebugInstrNum() {
+ return ++DebugInstrNumberingCount;
+ }
};
//===--------------------------------------------------------------------===//
@@ -1133,6 +1176,11 @@ template <> struct GraphTraits<Inverse<const MachineFunction*>> :
}
};
+class MachineFunctionAnalysisManager;
+void verifyMachineFunction(MachineFunctionAnalysisManager *,
+ const std::string &Banner,
+ const MachineFunction &MF);
+
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEFUNCTION_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h
index 970d6d7db334..f8d97c2c07a6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -249,6 +249,10 @@ private:
DebugLoc debugLoc; // Source line information.
+ /// Unique instruction number. Used by DBG_INSTR_REFs to refer to the values
+ /// defined by this instruction.
+ unsigned DebugInstrNum;
+
// Intrusive list support
friend struct ilist_traits<MachineInstr>;
friend struct ilist_callback_traits<MachineBasicBlock>;
@@ -280,6 +284,9 @@ public:
const MachineBasicBlock* getParent() const { return Parent; }
MachineBasicBlock* getParent() { return Parent; }
+ /// Move the instruction before \p MovePos.
+ void moveBefore(MachineInstr *MovePos);
+
/// Return the function that contains the basic block that this instruction
/// belongs to.
///
@@ -441,6 +448,18 @@ public:
/// this DBG_LABEL instruction.
const DILabel *getDebugLabel() const;
+ /// Fetch the instruction number of this MachineInstr. If it does not have
+ /// one already, a new and unique number will be assigned.
+ unsigned getDebugInstrNum();
+
+ /// Examine the instruction number of this MachineInstr. May be zero if
+ /// it hasn't been assigned a number yet.
+ unsigned peekDebugInstrNum() const { return DebugInstrNum; }
+
+ /// Set instruction number of this MachineInstr. Avoid using unless you're
+ /// deserializing this information.
+ void setDebugInstrNum(unsigned Num) { DebugInstrNum = Num; }
+
/// Emit an error referring to the source location of this instruction.
/// This should only be used for inline assembly that is somehow
/// impossible to compile. Other errors should have been handled much
@@ -1137,12 +1156,22 @@ public:
return getOpcode() == TargetOpcode::CFI_INSTRUCTION;
}
+ bool isPseudoProbe() const {
+ return getOpcode() == TargetOpcode::PSEUDO_PROBE;
+ }
+
// True if the instruction represents a position in the function.
bool isPosition() const { return isLabel() || isCFIInstruction(); }
bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; }
bool isDebugLabel() const { return getOpcode() == TargetOpcode::DBG_LABEL; }
- bool isDebugInstr() const { return isDebugValue() || isDebugLabel(); }
+ bool isDebugRef() const { return getOpcode() == TargetOpcode::DBG_INSTR_REF; }
+ bool isDebugInstr() const {
+ return isDebugValue() || isDebugLabel() || isDebugRef();
+ }
+ bool isDebugOrPseudoInstr() const {
+ return isDebugInstr() || isPseudoProbe();
+ }
bool isDebugOffsetImm() const { return getDebugOffset().isImm(); }
@@ -1235,9 +1264,11 @@ public:
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
case TargetOpcode::DBG_VALUE:
+ case TargetOpcode::DBG_INSTR_REF:
case TargetOpcode::DBG_LABEL:
case TargetOpcode::LIFETIME_START:
case TargetOpcode::LIFETIME_END:
+ case TargetOpcode::PSEUDO_PROBE:
return true;
}
}
@@ -1310,7 +1341,8 @@ public:
/// Return true if the MachineInstr modifies (fully define or partially
/// define) the specified register.
/// NOTE: It's ignoring subreg indices on virtual registers.
- bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const {
+ bool modifiesRegister(Register Reg,
+ const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterDefOperandIdx(Reg, false, true, TRI) != -1;
}
@@ -1761,8 +1793,10 @@ public:
void setDebugValueUndef() {
assert(isDebugValue() && "Must be a debug value instruction.");
for (MachineOperand &MO : debug_operands()) {
- if (MO.isReg())
+ if (MO.isReg()) {
MO.setReg(0);
+ MO.setSubReg(0);
+ }
}
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index cabb9f1c97c9..115c50175604 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -40,20 +40,30 @@ class MDNode;
namespace RegState {
- enum {
- Define = 0x2,
- Implicit = 0x4,
- Kill = 0x8,
- Dead = 0x10,
- Undef = 0x20,
- EarlyClobber = 0x40,
- Debug = 0x80,
- InternalRead = 0x100,
- Renamable = 0x200,
- DefineNoRead = Define | Undef,
- ImplicitDefine = Implicit | Define,
- ImplicitKill = Implicit | Kill
- };
+enum {
+ /// Register definition.
+ Define = 0x2,
+ /// Not emitted register (e.g. carry, or temporary result).
+ Implicit = 0x4,
+ /// The last use of a register.
+ Kill = 0x8,
+ /// Unused definition.
+ Dead = 0x10,
+ /// Value of the register doesn't matter.
+ Undef = 0x20,
+ /// Register definition happens before uses.
+ EarlyClobber = 0x40,
+ /// Register 'use' is for debugging purpose.
+ Debug = 0x80,
+ /// Register reads a value that is defined inside the same instruction or
+ /// bundle.
+ InternalRead = 0x100,
+ /// Register that may be renamed.
+ Renamable = 0x200,
+ DefineNoRead = Define | Undef,
+ ImplicitDefine = Implicit | Define,
+ ImplicitKill = Implicit | Kill
+};
} // end namespace RegState
@@ -295,6 +305,9 @@ public:
case MachineOperand::MO_BlockAddress:
return addBlockAddress(Disp.getBlockAddress(), Disp.getOffset() + off,
TargetFlags);
+ case MachineOperand::MO_JumpTableIndex:
+ assert(off == 0 && "cannot create offset into jump tables");
+ return addJumpTableIndex(Disp.getIndex(), TargetFlags);
}
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
index 11781145b378..1d082bd03e5b 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -106,6 +106,9 @@ public:
JumpTables[Idx].MBBs.clear();
}
+ /// RemoveMBBFromJumpTables - If MBB is present in any jump tables, remove it.
+ bool RemoveMBBFromJumpTables(MachineBasicBlock *MBB);
+
/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
/// the jump tables to branch to New instead.
bool ReplaceMBBInJumpTables(MachineBasicBlock *Old, MachineBasicBlock *New);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index 8a93f91ae54d..c7491d4191de 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -67,6 +67,12 @@ public:
/// it returns an unknown location.
DebugLoc getStartLoc() const;
+ /// Returns true if the instruction is loop invariant.
+ /// I.e., all virtual register operands are defined outside of the loop,
+ /// physical registers aren't accessed explicitly, and there are no side
+ /// effects that aren't captured by the operands or other flags.
+ bool isLoopInvariant(MachineInstr &I) const;
+
void dump() const;
private:
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h
index 2cb0134ca848..ec0b3529c0d6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h
@@ -37,10 +37,6 @@ MachineBasicBlock *PeelSingleBlockLoop(LoopPeelDirection Direction,
MachineRegisterInfo &MRI,
const TargetInstrInfo *TII);
-/// Return true if PhysReg is live outside the loop, i.e. determine if it
-/// is live in the loop exit blocks, and false otherwise.
-bool isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg);
-
} // namespace llvm
#endif // LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index 0ee595b5b5ce..fa900affb214 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -54,8 +54,8 @@ class Module;
//===----------------------------------------------------------------------===//
/// This class can be derived from and used by targets to hold private
/// target-specific information for each Module. Objects of type are
-/// accessed/created with MMI::getInfo and destroyed when the MachineModuleInfo
-/// is destroyed.
+/// accessed/created with MachineModuleInfo::getObjFileInfo and destroyed when
+/// the MachineModuleInfo is destroyed.
///
class MachineModuleInfoImpl {
public:
@@ -83,6 +83,9 @@ class MachineModuleInfo {
/// This is the MCContext used for the entire code generator.
MCContext Context;
+ // This is an external context, that if assigned, will be used instead of the
+ // internal context.
+ MCContext *ExternalContext = nullptr;
/// This is the LLVM Module being worked on.
const Module *TheModule;
@@ -149,6 +152,9 @@ class MachineModuleInfo {
public:
explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr);
+ explicit MachineModuleInfo(const LLVMTargetMachine *TM,
+ MCContext *ExtContext);
+
MachineModuleInfo(MachineModuleInfo &&MMII);
~MachineModuleInfo();
@@ -158,8 +164,12 @@ public:
const LLVMTargetMachine &getTarget() const { return TM; }
- const MCContext &getContext() const { return Context; }
- MCContext &getContext() { return Context; }
+ const MCContext &getContext() const {
+ return ExternalContext ? *ExternalContext : Context;
+ }
+ MCContext &getContext() {
+ return ExternalContext ? *ExternalContext : Context;
+ }
const Module *getModule() const { return TheModule; }
@@ -251,6 +261,12 @@ public:
return Personalities;
}
/// \}
+
+ // MMI owes MCContext. It should never be invalidated.
+ bool invalidate(Module &, const PreservedAnalyses &,
+ ModuleAnalysisManager::Invalidator &) {
+ return false;
+ }
}; // End class MachineModuleInfo
class MachineModuleInfoWrapperPass : public ImmutablePass {
@@ -260,6 +276,9 @@ public:
static char ID; // Pass identification, replacement for typeid
explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM = nullptr);
+ explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM,
+ MCContext *ExtContext);
+
// Initialization and Finalization
bool doInitialization(Module &) override;
bool doFinalization(Module &) override;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h
index 0f252137364c..b12351b8a702 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h
@@ -33,7 +33,6 @@ class MachineRegisterInfo;
class MCCFIInstruction;
class MDNode;
class ModuleSlotTracker;
-class TargetMachine;
class TargetIntrinsicInfo;
class TargetRegisterInfo;
class hash_code;
@@ -728,12 +727,12 @@ public:
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
- void ChangeToImmediate(int64_t ImmVal);
+ void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags = 0);
/// ChangeToFPImmediate - Replace this operand with a new FP immediate operand
/// of the specified value. If an operand is known to be an FP immediate
/// already, the setFPImm method should be used.
- void ChangeToFPImmediate(const ConstantFP *FPImm);
+ void ChangeToFPImmediate(const ConstantFP *FPImm, unsigned TargetFlags = 0);
/// ChangeToES - Replace this operand with a new external symbol operand.
void ChangeToES(const char *SymName, unsigned TargetFlags = 0);
@@ -743,10 +742,10 @@ public:
unsigned TargetFlags = 0);
/// ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
- void ChangeToMCSymbol(MCSymbol *Sym);
+ void ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags = 0);
/// Replace this operand with a frame index.
- void ChangeToFrameIndex(int Idx);
+ void ChangeToFrameIndex(int Idx, unsigned TargetFlags = 0);
/// Replace this operand with a target index.
void ChangeToTargetIndex(unsigned Idx, int64_t Offset,
@@ -759,6 +758,11 @@ public:
bool isKill = false, bool isDead = false,
bool isUndef = false, bool isDebug = false);
+ /// getTargetIndexName - If this MachineOperand is a TargetIndex that has a
+ /// name, attempt to get the name. Returns nullptr if the TargetIndex does not
+ /// have a name. Asserts if MO is not a TargetIndex.
+ const char *getTargetIndexName() const;
+
//===--------------------------------------------------------------------===//
// Construction methods.
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h
index 4a1b04ab3e88..a5dbbdb4fdcd 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -15,10 +15,11 @@
#ifndef LLVM_MACHINEOUTLINER_H
#define LLVM_MACHINEOUTLINER_H
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
namespace llvm {
namespace outliner {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassManager.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassManager.h
new file mode 100644
index 000000000000..1489177d9668
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassManager.h
@@ -0,0 +1,256 @@
+//===- PassManager.h --- Pass management for CodeGen ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the pass manager interface for codegen. The codegen
+// pipeline consists of only machine function passes. There is no container
+// relationship between IR module/function and machine function in terms of pass
+// manager organization. So there is no need for adaptor classes (for example
+// ModuleToMachineFunctionAdaptor). Since invalidation could only happen among
+// machine function passes, there is no proxy classes to handle cross-IR-unit
+// invalidation. IR analysis results are provided for machine function passes by
+// their respective analysis managers such as ModuleAnalysisManager and
+// FunctionAnalysisManager.
+//
+// TODO: Add MachineFunctionProperties support.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEPASSMANAGER_H
+#define LLVM_CODEGEN_MACHINEPASSMANAGER_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/type_traits.h"
+
+namespace llvm {
+class Module;
+
+extern template class AnalysisManager<MachineFunction>;
+
+/// An AnalysisManager<MachineFunction> that also exposes IR analysis results.
+class MachineFunctionAnalysisManager : public AnalysisManager<MachineFunction> {
+public:
+ using Base = AnalysisManager<MachineFunction>;
+
+ MachineFunctionAnalysisManager() : Base(false), FAM(nullptr), MAM(nullptr) {}
+ MachineFunctionAnalysisManager(FunctionAnalysisManager &FAM,
+ ModuleAnalysisManager &MAM,
+ bool DebugLogging = false)
+ : Base(DebugLogging), FAM(&FAM), MAM(&MAM) {}
+ MachineFunctionAnalysisManager(MachineFunctionAnalysisManager &&) = default;
+ MachineFunctionAnalysisManager &
+ operator=(MachineFunctionAnalysisManager &&) = default;
+
+ /// Get the result of an analysis pass for a Function.
+ ///
+ /// Runs the analysis if a cached result is not available.
+ template <typename PassT> typename PassT::Result &getResult(Function &F) {
+ return FAM->getResult<PassT>(F);
+ }
+
+ /// Get the cached result of an analysis pass for a Function.
+ ///
+ /// This method never runs the analysis.
+ ///
+ /// \returns null if there is no cached result.
+ template <typename PassT>
+ typename PassT::Result *getCachedResult(Function &F) {
+ return FAM->getCachedResult<PassT>(F);
+ }
+
+ /// Get the result of an analysis pass for a Module.
+ ///
+ /// Runs the analysis if a cached result is not available.
+ template <typename PassT> typename PassT::Result &getResult(Module &M) {
+ return MAM->getResult<PassT>(M);
+ }
+
+ /// Get the cached result of an analysis pass for a Module.
+ ///
+ /// This method never runs the analysis.
+ ///
+ /// \returns null if there is no cached result.
+ template <typename PassT> typename PassT::Result *getCachedResult(Module &M) {
+ return MAM->getCachedResult<PassT>(M);
+ }
+
+ /// Get the result of an analysis pass for a MachineFunction.
+ ///
+ /// Runs the analysis if a cached result is not available.
+ using Base::getResult;
+
+ /// Get the cached result of an analysis pass for a MachineFunction.
+ ///
+ /// This method never runs the analysis.
+ ///
+ /// returns null if there is no cached result.
+ using Base::getCachedResult;
+
+ // FIXME: Add LoopAnalysisManager or CGSCCAnalysisManager if needed.
+ FunctionAnalysisManager *FAM;
+ ModuleAnalysisManager *MAM;
+};
+
+extern template class PassManager<MachineFunction>;
+
+/// MachineFunctionPassManager adds/removes below features to/from the base
+/// PassManager template instantiation.
+///
+/// - Support passes that implement doInitialization/doFinalization. This is for
+/// machine function passes to work on module level constructs. One such pass
+/// is AsmPrinter.
+///
+/// - Support machine module pass which runs over the module (for example,
+/// MachineOutliner). A machine module pass needs to define the method:
+///
+/// ```Error run(Module &, MachineFunctionAnalysisManager &)```
+///
+/// FIXME: machine module passes still need to define the usual machine
+/// function pass interface, namely,
+/// `PreservedAnalyses run(MachineFunction &,
+/// MachineFunctionAnalysisManager &)`
+/// But this interface wouldn't be executed. It is just a placeholder
+/// to satisfy the pass manager type-erased inteface. This
+/// special-casing of machine module pass is due to its limited use
+/// cases and the unnecessary complexity it may bring to the machine
+/// pass manager.
+///
+/// - The base class `run` method is replaced by an alternative `run` method.
+/// See details below.
+///
+/// - Support codegening in the SCC order. Users include interprocedural
+/// register allocation (IPRA).
+class MachineFunctionPassManager
+ : public PassManager<MachineFunction, MachineFunctionAnalysisManager> {
+ using Base = PassManager<MachineFunction, MachineFunctionAnalysisManager>;
+
+public:
+ MachineFunctionPassManager(bool DebugLogging = false,
+ bool RequireCodeGenSCCOrder = false,
+ bool VerifyMachineFunction = false)
+ : Base(DebugLogging), RequireCodeGenSCCOrder(RequireCodeGenSCCOrder),
+ VerifyMachineFunction(VerifyMachineFunction) {}
+ MachineFunctionPassManager(MachineFunctionPassManager &&) = default;
+ MachineFunctionPassManager &
+ operator=(MachineFunctionPassManager &&) = default;
+
+ /// Run machine passes for a Module.
+ ///
+ /// The intended use is to start the codegen pipeline for a Module. The base
+ /// class's `run` method is deliberately hidden by this due to the observation
+ /// that we don't yet have the use cases of compositing two instances of
+ /// machine pass managers, or compositing machine pass managers with other
+ /// types of pass managers.
+ Error run(Module &M, MachineFunctionAnalysisManager &MFAM);
+
+ template <typename PassT> void addPass(PassT &&Pass) {
+ Base::addPass(std::forward<PassT>(Pass));
+ PassConceptT *P = Passes.back().get();
+ addDoInitialization<PassT>(P);
+ addDoFinalization<PassT>(P);
+
+ // Add machine module pass.
+ addRunOnModule<PassT>(P);
+ }
+
+private:
+ template <typename PassT>
+ using has_init_t = decltype(std::declval<PassT &>().doInitialization(
+ std::declval<Module &>(),
+ std::declval<MachineFunctionAnalysisManager &>()));
+
+ template <typename PassT>
+ std::enable_if_t<!is_detected<has_init_t, PassT>::value>
+ addDoInitialization(PassConceptT *Pass) {}
+
+ template <typename PassT>
+ std::enable_if_t<is_detected<has_init_t, PassT>::value>
+ addDoInitialization(PassConceptT *Pass) {
+ using PassModelT =
+ detail::PassModel<MachineFunction, PassT, PreservedAnalyses,
+ MachineFunctionAnalysisManager>;
+ auto *P = static_cast<PassModelT *>(Pass);
+ InitializationFuncs.emplace_back(
+ [=](Module &M, MachineFunctionAnalysisManager &MFAM) {
+ return P->Pass.doInitialization(M, MFAM);
+ });
+ }
+
+ template <typename PassT>
+ using has_fini_t = decltype(std::declval<PassT &>().doFinalization(
+ std::declval<Module &>(),
+ std::declval<MachineFunctionAnalysisManager &>()));
+
+ template <typename PassT>
+ std::enable_if_t<!is_detected<has_fini_t, PassT>::value>
+ addDoFinalization(PassConceptT *Pass) {}
+
+ template <typename PassT>
+ std::enable_if_t<is_detected<has_fini_t, PassT>::value>
+ addDoFinalization(PassConceptT *Pass) {
+ using PassModelT =
+ detail::PassModel<MachineFunction, PassT, PreservedAnalyses,
+ MachineFunctionAnalysisManager>;
+ auto *P = static_cast<PassModelT *>(Pass);
+ FinalizationFuncs.emplace_back(
+ [=](Module &M, MachineFunctionAnalysisManager &MFAM) {
+ return P->Pass.doFinalization(M, MFAM);
+ });
+ }
+
+ template <typename PassT>
+ using is_machine_module_pass_t = decltype(std::declval<PassT &>().run(
+ std::declval<Module &>(),
+ std::declval<MachineFunctionAnalysisManager &>()));
+
+ template <typename PassT>
+ using is_machine_function_pass_t = decltype(std::declval<PassT &>().run(
+ std::declval<MachineFunction &>(),
+ std::declval<MachineFunctionAnalysisManager &>()));
+
+ template <typename PassT>
+ std::enable_if_t<!is_detected<is_machine_module_pass_t, PassT>::value>
+ addRunOnModule(PassConceptT *Pass) {}
+
+ template <typename PassT>
+ std::enable_if_t<is_detected<is_machine_module_pass_t, PassT>::value>
+ addRunOnModule(PassConceptT *Pass) {
+ static_assert(is_detected<is_machine_function_pass_t, PassT>::value,
+ "machine module pass needs to define machine function pass "
+ "api. sorry.");
+
+ using PassModelT =
+ detail::PassModel<MachineFunction, PassT, PreservedAnalyses,
+ MachineFunctionAnalysisManager>;
+ auto *P = static_cast<PassModelT *>(Pass);
+ MachineModulePasses.emplace(
+ Passes.size() - 1,
+ [=](Module &M, MachineFunctionAnalysisManager &MFAM) {
+ return P->Pass.run(M, MFAM);
+ });
+ }
+
+ using FuncTy = Error(Module &, MachineFunctionAnalysisManager &);
+ SmallVector<llvm::unique_function<FuncTy>, 4> InitializationFuncs;
+ SmallVector<llvm::unique_function<FuncTy>, 4> FinalizationFuncs;
+
+ using PassIndex = decltype(Passes)::size_type;
+ std::map<PassIndex, llvm::unique_function<FuncTy>> MachineModulePasses;
+
+ // Run codegen in the SCC order.
+ bool RequireCodeGenSCCOrder;
+
+ bool VerifyMachineFunction;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINEPASSMANAGER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassRegistry.def
new file mode 100644
index 000000000000..e9eaa5f77000
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -0,0 +1,197 @@
+//===- MachinePassRegistry.def - Registry of passes -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is used as the registry of passes that are for target-independent
+// code generator.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef MODULE_ANALYSIS
+#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC))
+#undef MODULE_ANALYSIS
+
+#ifndef MODULE_PASS
+#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass, ())
+#undef MODULE_PASS
+
+#ifndef FUNCTION_ANALYSIS
+#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC))
+FUNCTION_ANALYSIS("targetir", TargetIRAnalysis, (std::move(TM.getTargetIRAnalysis())))
+#undef FUNCTION_ANALYSIS
+
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
+FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
+FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ())
+FUNCTION_PASS("consthoist", ConstantHoistingPass, ())
+FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
+FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
+FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
+FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
+FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
+FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ())
+FUNCTION_PASS("verify", VerifierPass, ())
+#undef FUNCTION_PASS
+
+#ifndef LOOP_PASS
+#define LOOP_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+LOOP_PASS("loop-reduce", LoopStrengthReducePass, ())
+#undef LOOP_PASS
+
+#ifndef MACHINE_MODULE_PASS
+#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+#undef MACHINE_MODULE_PASS
+
+#ifndef MACHINE_FUNCTION_ANALYSIS
+#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC))
+// LiveVariables currently requires pure SSA form.
+// FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+// LiveVariables can be removed completely, and LiveIntervals can be directly
+// computed. (We still either need to regenerate kill flags after regalloc, or
+// preferably fix the scavenger to not depend on them).
+// MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis())
+
+// MACHINE_FUNCTION_ANALYSIS("live-stacks", LiveStacksPass())
+// MACHINE_FUNCTION_ANALYSIS("slot-indexes", SlotIndexesAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("edge-bundles", EdgeBundlesAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("lazy-machine-bfi", LazyMachineBlockFrequencyInfoAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-bfi", MachineBlockFrequencyInfoAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-loops", MachineLoopInfoAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-dom-frontier", MachineDominanceFrontierAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-dom-tree", MachineDominatorTreeAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-ore", MachineOptimizationRemarkEmitterPassAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree", MachinePostDominatorTreeAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-region-info", MachineRegionInfoPassAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", MachineTraceMetricsAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("reaching-def", ReachingDefAnalysisAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis())
+// MACHINE_FUNCTION_ANALYSIS("gc-analysis", GCMachineCodeAnalysisPass())
+#undef MACHINE_FUNCTION_ANALYSIS
+
+#ifndef MACHINE_FUNCTION_PASS
+#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+// MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ())
+// MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, ())
+#undef MACHINE_FUNCTION_PASS
+
+// After a pass is converted to new pass manager, its entry should be moved from
+// dummy table to the normal one. For example, for a machine function pass,
+// DUMMY_MACHINE_FUNCTION_PASS to MACHINE_FUNCTION_PASS.
+
+#ifndef DUMMY_FUNCTION_PASS
+#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ())
+DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ())
+DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ())
+DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ())
+DUMMY_FUNCTION_PASS("dwarfehprepare", DwarfEHPass, ())
+DUMMY_FUNCTION_PASS("winehprepare", WinEHPass, ())
+DUMMY_FUNCTION_PASS("wasmehprepare", WasmEHPass, ())
+DUMMY_FUNCTION_PASS("codegenprepare", CodeGenPreparePass, ())
+DUMMY_FUNCTION_PASS("safe-stack", SafeStackPass, ())
+DUMMY_FUNCTION_PASS("stack-protector", StackProtectorPass, ())
+DUMMY_FUNCTION_PASS("atomic-expand", AtomicExpandPass, ())
+DUMMY_FUNCTION_PASS("interleaved-access", InterleavedAccessPass, ())
+DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ())
+DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ())
+DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ())
+DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ())
+#undef DUMMY_FUNCTION_PASS
+
+#ifndef DUMMY_MODULE_PASS
+#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+DUMMY_MODULE_PASS("lower-emutls", LowerEmuTLSPass, ())
+#undef DUMMY_MODULE_PASS
+
+#ifndef DUMMY_MACHINE_MODULE_PASS
+#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass, ())
+#undef DUMMY_MACHINE_MODULE_PASS
+
+#ifndef DUMMY_MACHINE_FUNCTION_PASS
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
+#endif
+DUMMY_MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass, ())
+DUMMY_MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-sink", MachineSinkingPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("twoaddressinstruction", TwoAddressInstructionPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("liveintervals", LiveIntervalsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass, ())
+DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("ra-fast", RAFastPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("legalizer", LegalizerPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ())
+DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ())
+#undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 8b2c27e7b888..f89a453749e8 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -40,8 +40,6 @@
#ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
#define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
-#include "llvm/Analysis/AliasAnalysis.h"
-
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
@@ -51,6 +49,7 @@
namespace llvm {
+class AAResults;
class NodeSet;
class SMSchedule;
@@ -92,15 +91,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<MachineLoopInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<LiveIntervals>();
- AU.addRequired<MachineOptimizationRemarkEmitterPass>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
private:
void preprocessPhiNodes(MachineBasicBlock &B);
@@ -285,7 +276,7 @@ public:
static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
private:
- void addLoopCarriedDependences(AliasAnalysis *AA);
+ void addLoopCarriedDependences(AAResults *AA);
void updatePhiDependences();
void changeDependences();
unsigned calculateResMII();
@@ -304,7 +295,7 @@ private:
void checkValidNodeOrder(const NodeSetType &Circuits) const;
bool schedulePipeline(SMSchedule &Schedule);
bool computeDelta(MachineInstr &MI, unsigned &Delta);
- MachineInstr *findDefInLoop(unsigned Reg);
+ MachineInstr *findDefInLoop(Register Reg);
bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
unsigned &OffsetPos, unsigned &NewBase,
int64_t &NewOffset);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 35aab5018fa4..57086b4eebd6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -442,10 +442,20 @@ public:
/// Return true if there is exactly one operand defining the specified
/// register.
bool hasOneDef(Register RegNo) const {
- def_iterator DI = def_begin(RegNo);
- if (DI == def_end())
- return false;
- return ++DI == def_end();
+ return hasSingleElement(def_operands(RegNo));
+ }
+
+ /// Returns the defining operand if there is exactly one operand defining the
+ /// specified register, otherwise nullptr.
+ MachineOperand *getOneDef(Register Reg) const {
+ def_iterator DI = def_begin(Reg);
+ if (DI == def_end()) // No defs.
+ return nullptr;
+
+ def_iterator OneDef = DI;
+ if (++DI == def_end())
+ return &*OneDef;
+ return nullptr; // Multiple defs.
}
/// use_iterator/use_begin/use_end - Walk all uses of the specified register.
@@ -498,10 +508,7 @@ public:
/// hasOneUse - Return true if there is exactly one instruction using the
/// specified register.
bool hasOneUse(Register RegNo) const {
- use_iterator UI = use_begin(RegNo);
- if (UI == use_end())
- return false;
- return ++UI == use_end();
+ return hasSingleElement(use_operands(RegNo));
}
/// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the
@@ -612,14 +619,10 @@ public:
/// function. Writing to a constant register has no effect.
bool isConstantPhysReg(MCRegister PhysReg) const;
- /// Returns true if either isConstantPhysReg or TRI->isCallerPreservedPhysReg
- /// returns true. This is a utility member function.
- bool isCallerPreservedOrConstPhysReg(MCRegister PhysReg) const;
-
/// Get an iterator over the pressure sets affected by the given physical or
/// virtual register. If RegUnit is physical, it must be a register unit (from
/// MCRegUnitIterator).
- PSetIterator getPressureSets(unsigned RegUnit) const;
+ PSetIterator getPressureSets(Register RegUnit) const;
//===--------------------------------------------------------------------===//
// Virtual Register Info
@@ -894,7 +897,7 @@ public:
///
/// Reserved registers may belong to an allocatable register class, but the
/// target has explicitly requested that they are not used.
- bool isReserved(Register PhysReg) const {
+ bool isReserved(MCRegister PhysReg) const {
return getReservedRegs().test(PhysReg.id());
}
@@ -1174,14 +1177,13 @@ class PSetIterator {
public:
PSetIterator() = default;
- PSetIterator(unsigned RegUnit, const MachineRegisterInfo *MRI) {
+ PSetIterator(Register RegUnit, const MachineRegisterInfo *MRI) {
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- if (Register::isVirtualRegister(RegUnit)) {
+ if (RegUnit.isVirtual()) {
const TargetRegisterClass *RC = MRI->getRegClass(RegUnit);
PSet = TRI->getRegClassPressureSets(RC);
Weight = TRI->getRegClassWeight(RC).RegWeight;
- }
- else {
+ } else {
PSet = TRI->getRegUnitPressureSets(RegUnit);
Weight = TRI->getRegUnitWeight(RegUnit);
}
@@ -1203,8 +1205,8 @@ public:
}
};
-inline PSetIterator MachineRegisterInfo::
-getPressureSets(unsigned RegUnit) const {
+inline PSetIterator
+MachineRegisterInfo::getPressureSets(Register RegUnit) const {
return PSetIterator(RegUnit, this);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSSAUpdater.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSSAUpdater.h
index df972e12d461..0af356e376ab 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -40,9 +40,6 @@ private:
//typedef DenseMap<MachineBasicBlock*, Register> AvailableValsTy;
void *AV = nullptr;
- /// VR - Current virtual register whose uses are being updated.
- Register VR;
-
/// VRC - Register class of the current virtual register.
const TargetRegisterClass *VRC;
@@ -65,6 +62,7 @@ public:
/// Initialize - Reset this object to get ready for a new set of SSA
/// updates.
void Initialize(Register V);
+ void Initialize(const TargetRegisterClass *RC);
/// AddAvailableValue - Indicate that a rewritten value is available at the
/// end of the specified block with the specified value.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineStableHash.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineStableHash.h
new file mode 100644
index 000000000000..8423b2da1c78
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineStableHash.h
@@ -0,0 +1,30 @@
+//===------------ MachineStableHash.h - MIR Stable Hashing Utilities ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Stable hashing for MachineInstr and MachineOperand. Useful or getting a
+// hash across runs, modules, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINESTABLEHASH_H
+#define LLVM_CODEGEN_MACHINESTABLEHASH_H
+
+#include "llvm/CodeGen/StableHashing.h"
+
+namespace llvm {
+class MachineInstr;
+class MachineOperand;
+
+stable_hash stableHashValue(const MachineOperand &MO);
+stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false,
+ bool HashConstantPoolIndices = false,
+ bool HashMemOperands = false);
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineTraceMetrics.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineTraceMetrics.h
index 025989504177..46b57365e653 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -140,13 +140,13 @@ public:
/// successors.
struct LiveInReg {
/// The virtual register required, or a register unit.
- unsigned Reg;
+ Register Reg;
/// For virtual registers: Minimum height of the defining instruction.
/// For regunits: Height of the highest user in the trace.
unsigned Height;
- LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
+ LiveInReg(Register Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
};
/// Per-basic block information that relates to a specific trace through the
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MultiHazardRecognizer.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MultiHazardRecognizer.h
new file mode 100644
index 000000000000..9846045ff014
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MultiHazardRecognizer.h
@@ -0,0 +1,47 @@
+//=- llvm/CodeGen/MultiHazardRecognizer.h - Scheduling Support ----*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MultiHazardRecognizer class, which is a wrapper
+// for a set of ScheduleHazardRecognizer instances
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MULTIHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_MULTIHAZARDRECOGNIZER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class MachineInstr;
+class SUnit;
+
+class MultiHazardRecognizer : public ScheduleHazardRecognizer {
+ SmallVector<std::unique_ptr<ScheduleHazardRecognizer>, 4> Recognizers;
+
+public:
+ MultiHazardRecognizer() = default;
+ void AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer> &&);
+
+ bool atIssueLimit() const override;
+ HazardType getHazardType(SUnit *, int Stalls = 0) override;
+ void Reset() override;
+ void EmitInstruction(SUnit *) override;
+ void EmitInstruction(MachineInstr *) override;
+ unsigned PreEmitNoops(SUnit *) override;
+ unsigned PreEmitNoops(MachineInstr *) override;
+ bool ShouldPreferAnother(SUnit *) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+ void EmitNoop() override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MULTIHAZARDRECOGNIZER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h
index 56db30ff7d6d..fe07c70d85c5 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h
@@ -39,7 +39,7 @@ public:
/// Get the offset of string \p S in the string table. This can insert a new
/// element or return the offset of a pre-existing one.
- uint32_t getStringOffset(StringRef S) { return getEntry(S).getOffset(); }
+ uint64_t getStringOffset(StringRef S) { return getEntry(S).getOffset(); }
/// Get permanent storage for \p S (but do not necessarily emit \p S in the
/// output section). A latter call to getStringOffset() with the same string
@@ -57,7 +57,7 @@ public:
private:
MapTy Strings;
- uint32_t CurrentEndOffset = 0;
+ uint64_t CurrentEndOffset = 0;
unsigned NumEntries = 0;
DwarfStringPoolEntryRef EmptyString;
std::function<StringRef(StringRef Input)> Translator;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h
index 9e5b4446c195..676ed2c65eb1 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h
@@ -44,11 +44,15 @@ namespace llvm {
/// the entry block.
FunctionPass *createUnreachableBlockEliminationPass();
- /// createBBSectionsPrepare Pass - This pass assigns sections to machine basic
- /// blocks and is enabled with -fbasic-block-sections.
- /// Buf is a memory buffer that contains the list of functions and basic
- /// block ids to selectively enable basic block sections.
- MachineFunctionPass *createBBSectionsPreparePass(const MemoryBuffer *Buf);
+ /// createBasicBlockSections Pass - This pass assigns sections to machine
+ /// basic blocks and is enabled with -fbasic-block-sections. Buf is a memory
+ /// buffer that contains the list of functions and basic block ids to
+ /// selectively enable basic block sections.
+ MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf);
+
+ /// createMachineFunctionSplitterPass - This pass splits machine functions
+ /// using profile information.
+ MachineFunctionPass *createMachineFunctionSplitterPass();
/// MachineFunctionPrinter pass - This pass prints out the machine function to
/// the given stream as a debugging tool.
@@ -72,10 +76,6 @@ namespace llvm {
/// matching during instruction selection.
FunctionPass *createCodeGenPreparePass();
- /// createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather
- /// and scatter intrinsics with scalar code when target doesn't support them.
- FunctionPass *createScalarizeMaskedMemIntrinPass();
-
/// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg
/// load-linked/store-conditional loops.
extern char &AtomicExpandID;
@@ -387,10 +387,6 @@ namespace llvm {
/// createJumpInstrTables - This pass creates jump-instruction tables.
ModulePass *createJumpInstrTablesPass();
- /// createForwardControlFlowIntegrityPass - This pass adds control-flow
- /// integrity.
- ModulePass *createForwardControlFlowIntegrityPass();
-
/// InterleavedAccess Pass - This pass identifies and matches interleaved
/// memory accesses to target specific intrinsics.
///
@@ -471,6 +467,9 @@ namespace llvm {
/// Create Hardware Loop pass. \see HardwareLoops.cpp
FunctionPass *createHardwareLoopsPass();
+ /// This pass inserts pseudo probe annotation for callsite profiling.
+ FunctionPass *createPseudoProbeInserter();
+
/// Create IR Type Promotion pass. \see TypePromotion.cpp
FunctionPass *createTypePromotionPass();
@@ -483,9 +482,16 @@ namespace llvm {
/// info was generated by another source such as clang.
ModulePass *createStripDebugMachineModulePass(bool OnlyDebugified);
+ /// Creates MIR Check Debug pass. \see MachineCheckDebugify.cpp
+ ModulePass *createCheckDebugMachineModulePass();
+
/// The pass fixups statepoint machine instruction to replace usage of
/// caller saved registers with stack slots.
extern char &FixupStatepointCallerSavedID;
+
+ /// The pass transform load/store <256 x i32> to AMX load/store intrinsics
+ /// or split the data to two <128 x i32>.
+ FunctionPass *createX86LowerAMXTypePass();
} // End llvm namespace
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h
index 585f43e116f9..00d6ec93d555 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h
@@ -753,10 +753,6 @@ namespace rdf {
NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
NodeAddr<RefNode*> RA) const;
- NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA, bool Create);
- NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
NodeAddr<RefNode*> RA, bool Create);
NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFLiveness.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFLiveness.h
index ea4890271726..d39d3585e7bd 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFLiveness.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFLiveness.h
@@ -18,6 +18,8 @@
#include "llvm/MC/LaneBitmask.h"
#include <map>
#include <set>
+#include <unordered_map>
+#include <unordered_set>
#include <utility>
namespace llvm {
@@ -28,6 +30,30 @@ class MachineDominatorTree;
class MachineRegisterInfo;
class TargetRegisterInfo;
+} // namespace llvm
+
+namespace llvm {
+namespace rdf {
+namespace detail {
+
+using NodeRef = std::pair<NodeId, LaneBitmask>;
+
+} // namespace detail
+} // namespace rdf
+} // namespace llvm
+
+namespace std {
+
+template <> struct hash<llvm::rdf::detail::NodeRef> {
+ std::size_t operator()(llvm::rdf::detail::NodeRef R) const {
+ return std::hash<llvm::rdf::NodeId>{}(R.first) ^
+ std::hash<llvm::LaneBitmask::Type>{}(R.second.getAsInteger());
+ }
+};
+
+} // namespace std
+
+namespace llvm {
namespace rdf {
struct Liveness {
@@ -46,10 +72,9 @@ namespace rdf {
std::map<MachineBasicBlock*,RegisterAggr> Map;
};
- using NodeRef = std::pair<NodeId, LaneBitmask>;
- using NodeRefSet = std::set<NodeRef>;
- // RegisterId in RefMap must be normalized.
- using RefMap = std::map<RegisterId, NodeRefSet>;
+ using NodeRef = detail::NodeRef;
+ using NodeRefSet = std::unordered_set<NodeRef>;
+ using RefMap = std::unordered_map<RegisterId, NodeRefSet>;
Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
: DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
@@ -110,15 +135,14 @@ namespace rdf {
// Cache of mapping from node ids (for RefNodes) to the containing
// basic blocks. Not computing it each time for each node reduces
// the liveness calculation time by a large fraction.
- using NodeBlockMap = DenseMap<NodeId, MachineBasicBlock *>;
- NodeBlockMap NBMap;
+ DenseMap<NodeId, MachineBasicBlock *> NBMap;
// Phi information:
//
// RealUseMap
// map: NodeId -> (map: RegisterId -> NodeRefSet)
// phi id -> (map: register -> set of reached non-phi uses)
- std::map<NodeId, RefMap> RealUseMap;
+ DenseMap<NodeId, RefMap> RealUseMap;
// Inverse iterated dominance frontier.
std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFRegisters.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFRegisters.h
index 4afaf80e4659..c49b4883e1c1 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFRegisters.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFRegisters.h
@@ -91,6 +91,11 @@ namespace rdf {
bool operator< (const RegisterRef &RR) const {
return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
}
+
+ size_t hash() const {
+ return std::hash<RegisterId>{}(Reg) ^
+ std::hash<LaneBitmask::Type>{}(Mask.getAsInteger());
+ }
};
@@ -110,8 +115,6 @@ namespace rdf {
return RegMasks.get(Register::stackSlot2Index(R));
}
- RegisterRef normalize(RegisterRef RR) const;
-
bool alias(RegisterRef RA, RegisterRef RB) const {
if (!isRegMaskId(RA.Reg))
return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB);
@@ -128,6 +131,10 @@ namespace rdf {
return MaskInfos[Register::stackSlot2Index(MaskId)].Units;
}
+ const BitVector &getUnitAliases(uint32_t U) const {
+ return AliasInfos[U].Regs;
+ }
+
RegisterRef mapTo(RegisterRef RR, unsigned R) const;
const TargetRegisterInfo &getTRI() const { return TRI; }
@@ -142,12 +149,16 @@ namespace rdf {
struct MaskInfo {
BitVector Units;
};
+ struct AliasInfo {
+ BitVector Regs;
+ };
const TargetRegisterInfo &TRI;
IndexedSet<const uint32_t*> RegMasks;
std::vector<RegInfo> RegInfos;
std::vector<UnitInfo> UnitInfos;
std::vector<MaskInfo> MaskInfos;
+ std::vector<AliasInfo> AliasInfos;
bool aliasRR(RegisterRef RA, RegisterRef RB) const;
bool aliasRM(RegisterRef RR, RegisterRef RM) const;
@@ -159,10 +170,15 @@ namespace rdf {
: Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
RegisterAggr(const RegisterAggr &RG) = default;
+ unsigned count() const { return Units.count(); }
bool empty() const { return Units.none(); }
bool hasAliasOf(RegisterRef RR) const;
bool hasCoverOf(RegisterRef RR) const;
+ bool operator==(const RegisterAggr &A) const {
+ return DenseMapInfo<BitVector>::isEqual(Units, A.Units);
+ }
+
static bool isCoverOf(RegisterRef RA, RegisterRef RB,
const PhysicalRegisterInfo &PRI) {
return RegisterAggr(PRI).insert(RA).hasCoverOf(RB);
@@ -179,6 +195,10 @@ namespace rdf {
RegisterRef clearIn(RegisterRef RR) const;
RegisterRef makeRegRef() const;
+ size_t hash() const {
+ return DenseMapInfo<BitVector>::getHashValue(Units);
+ }
+
void print(raw_ostream &OS) const;
struct rr_iterator {
@@ -233,8 +253,27 @@ namespace rdf {
};
raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
+ raw_ostream &operator<< (raw_ostream &OS, const RegisterAggr &A);
} // end namespace rdf
} // end namespace llvm
+namespace std {
+ template <> struct hash<llvm::rdf::RegisterRef> {
+ size_t operator()(llvm::rdf::RegisterRef A) const {
+ return A.hash();
+ }
+ };
+ template <> struct hash<llvm::rdf::RegisterAggr> {
+ size_t operator()(const llvm::rdf::RegisterAggr &A) const {
+ return A.hash();
+ }
+ };
+ template <> struct equal_to<llvm::rdf::RegisterAggr> {
+ bool operator()(const llvm::rdf::RegisterAggr &A,
+ const llvm::rdf::RegisterAggr &B) const {
+ return A == B;
+ }
+ };
+}
#endif // LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
index a8a436337e07..bcb48de2fe5a 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -139,23 +139,25 @@ public:
/// Provides the instruction id of the closest reaching def instruction of
/// PhysReg that reaches MI, relative to the begining of MI's basic block.
- int getReachingDef(MachineInstr *MI, int PhysReg) const;
+ int getReachingDef(MachineInstr *MI, MCRegister PhysReg) const;
/// Return whether A and B use the same def of PhysReg.
- bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, int PhysReg) const;
+ bool hasSameReachingDef(MachineInstr *A, MachineInstr *B,
+ MCRegister PhysReg) const;
/// Return whether the reaching def for MI also is live out of its parent
/// block.
- bool isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const;
+ bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const;
/// Return the local MI that produces the live out value for PhysReg, or
/// nullptr for a non-live out or non-local def.
MachineInstr *getLocalLiveOutMIDef(MachineBasicBlock *MBB,
- int PhysReg) const;
+ MCRegister PhysReg) const;
/// If a single MachineInstr creates the reaching definition, then return it.
/// Otherwise return null.
- MachineInstr *getUniqueReachingMIDef(MachineInstr *MI, int PhysReg) const;
+ MachineInstr *getUniqueReachingMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const;
/// If a single MachineInstr creates the reaching definition, for MIs operand
/// at Idx, then return it. Otherwise return null.
@@ -167,40 +169,45 @@ public:
/// Provide whether the register has been defined in the same basic block as,
/// and before, MI.
- bool hasLocalDefBefore(MachineInstr *MI, int PhysReg) const;
+ bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const;
/// Return whether the given register is used after MI, whether it's a local
/// use or a live out.
- bool isRegUsedAfter(MachineInstr *MI, int PhysReg) const;
+ bool isRegUsedAfter(MachineInstr *MI, MCRegister PhysReg) const;
/// Return whether the given register is defined after MI.
- bool isRegDefinedAfter(MachineInstr *MI, int PhysReg) const;
+ bool isRegDefinedAfter(MachineInstr *MI, MCRegister PhysReg) const;
/// Provides the clearance - the number of instructions since the closest
/// reaching def instuction of PhysReg that reaches MI.
- int getClearance(MachineInstr *MI, MCPhysReg PhysReg) const;
+ int getClearance(MachineInstr *MI, MCRegister PhysReg) const;
/// Provides the uses, in the same block as MI, of register that MI defines.
/// This does not consider live-outs.
- void getReachingLocalUses(MachineInstr *MI, int PhysReg,
+ void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg,
InstSet &Uses) const;
/// Search MBB for a definition of PhysReg and insert it into Defs. If no
/// definition is found, recursively search the predecessor blocks for them.
- void getLiveOuts(MachineBasicBlock *MBB, int PhysReg, InstSet &Defs,
+ void getLiveOuts(MachineBasicBlock *MBB, MCRegister PhysReg, InstSet &Defs,
BlockSet &VisitedBBs) const;
- void getLiveOuts(MachineBasicBlock *MBB, int PhysReg, InstSet &Defs) const;
+ void getLiveOuts(MachineBasicBlock *MBB, MCRegister PhysReg,
+ InstSet &Defs) const;
/// For the given block, collect the instructions that use the live-in
/// value of the provided register. Return whether the value is still
/// live on exit.
- bool getLiveInUses(MachineBasicBlock *MBB, int PhysReg,
+ bool getLiveInUses(MachineBasicBlock *MBB, MCRegister PhysReg,
InstSet &Uses) const;
/// Collect the users of the value stored in PhysReg, which is defined
/// by MI.
- void getGlobalUses(MachineInstr *MI, int PhysReg,
- InstSet &Uses) const;
+ void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const;
+
+ /// Collect all possible definitions of the value stored in PhysReg, which is
+ /// used by MI.
+ void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg,
+ InstSet &Defs) const;
/// Return whether From can be moved forwards to just before To.
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const;
@@ -224,12 +231,13 @@ public:
/// Return whether a MachineInstr could be inserted at MI and safely define
/// the given register without affecting the program.
- bool isSafeToDefRegAt(MachineInstr *MI, int PhysReg) const;
+ bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const;
/// Return whether a MachineInstr could be inserted at MI and safely define
/// the given register without affecting the program, ignoring any effects
/// on the provided instructions.
- bool isSafeToDefRegAt(MachineInstr *MI, int PhysReg, InstSet &Ignore) const;
+ bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
+ InstSet &Ignore) const;
private:
/// Set up LiveRegs by merging predecessor live-out values.
@@ -264,7 +272,8 @@ private:
/// Provides the instruction of the closest reaching def instruction of
/// PhysReg that reaches MI, relative to the begining of MI's basic block.
- MachineInstr *getReachingLocalMIDef(MachineInstr *MI, int PhysReg) const;
+ MachineInstr *getReachingLocalMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegAllocPBQP.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegAllocPBQP.h
index f7f92248f4ce..1ed55082e32c 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegAllocPBQP.h
@@ -22,6 +22,8 @@
#include "llvm/CodeGen/PBQP/Math.h"
#include "llvm/CodeGen/PBQP/ReductionRules.h"
#include "llvm/CodeGen/PBQP/Solution.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
@@ -96,13 +98,13 @@ public:
AllowedRegVector() = default;
AllowedRegVector(AllowedRegVector &&) = default;
- AllowedRegVector(const std::vector<unsigned> &OptVec)
- : NumOpts(OptVec.size()), Opts(new unsigned[NumOpts]) {
+ AllowedRegVector(const std::vector<MCRegister> &OptVec)
+ : NumOpts(OptVec.size()), Opts(new MCRegister[NumOpts]) {
std::copy(OptVec.begin(), OptVec.end(), Opts.get());
}
unsigned size() const { return NumOpts; }
- unsigned operator[](size_t I) const { return Opts[I]; }
+ MCRegister operator[](size_t I) const { return Opts[I]; }
bool operator==(const AllowedRegVector &Other) const {
if (NumOpts != Other.NumOpts)
@@ -116,12 +118,12 @@ public:
private:
unsigned NumOpts = 0;
- std::unique_ptr<unsigned[]> Opts;
+ std::unique_ptr<MCRegister[]> Opts;
};
inline hash_code hash_value(const AllowedRegVector &OptRegs) {
- unsigned *OStart = OptRegs.Opts.get();
- unsigned *OEnd = OptRegs.Opts.get() + OptRegs.NumOpts;
+ MCRegister *OStart = OptRegs.Opts.get();
+ MCRegister *OEnd = OptRegs.Opts.get() + OptRegs.NumOpts;
return hash_combine(OptRegs.NumOpts,
hash_combine_range(OStart, OEnd));
}
@@ -143,11 +145,11 @@ public:
LiveIntervals &LIS;
MachineBlockFrequencyInfo &MBFI;
- void setNodeIdForVReg(unsigned VReg, GraphBase::NodeId NId) {
- VRegToNodeId[VReg] = NId;
+ void setNodeIdForVReg(Register VReg, GraphBase::NodeId NId) {
+ VRegToNodeId[VReg.id()] = NId;
}
- GraphBase::NodeId getNodeIdForVReg(unsigned VReg) const {
+ GraphBase::NodeId getNodeIdForVReg(Register VReg) const {
auto VRegItr = VRegToNodeId.find(VReg);
if (VRegItr == VRegToNodeId.end())
return GraphBase::invalidNodeId();
@@ -159,7 +161,7 @@ public:
}
private:
- DenseMap<unsigned, GraphBase::NodeId> VRegToNodeId;
+ DenseMap<Register, GraphBase::NodeId> VRegToNodeId;
AllowedRegVecPool AllowedRegVecs;
};
@@ -197,8 +199,8 @@ public:
NodeMetadata(NodeMetadata &&) = default;
NodeMetadata& operator=(NodeMetadata &&) = default;
- void setVReg(unsigned VReg) { this->VReg = VReg; }
- unsigned getVReg() const { return VReg; }
+ void setVReg(Register VReg) { this->VReg = VReg; }
+ Register getVReg() const { return VReg; }
void setAllowedRegs(GraphMetadata::AllowedRegVecRef AllowedRegs) {
this->AllowedRegs = std::move(AllowedRegs);
@@ -256,7 +258,7 @@ private:
unsigned NumOpts = 0;
unsigned DeniedOpts = 0;
std::unique_ptr<unsigned[]> OptUnsafeEdges;
- unsigned VReg = 0;
+ Register VReg;
GraphMetadata::AllowedRegVecRef AllowedRegs;
#ifndef NDEBUG
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/Register.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/Register.h
index 054040cd29a1..d7057cfb76e0 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/Register.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/Register.h
@@ -40,24 +40,24 @@ public:
/// frame index in a variable that normally holds a register. isStackSlot()
/// returns true if Reg is in the range used for stack slots.
///
- /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack
- /// slots, so if a variable may contains a stack slot, always check
- /// isStackSlot() first.
- ///
+ /// FIXME: remove in favor of member.
static bool isStackSlot(unsigned Reg) {
return MCRegister::isStackSlot(Reg);
}
+ /// Return true if this is a stack slot.
+ bool isStack() const { return MCRegister::isStackSlot(Reg); }
+
/// Compute the frame index from a register value representing a stack slot.
- static int stackSlot2Index(unsigned Reg) {
- assert(isStackSlot(Reg) && "Not a stack slot");
+ static int stackSlot2Index(Register Reg) {
+ assert(Reg.isStack() && "Not a stack slot");
return int(Reg - MCRegister::FirstStackSlot);
}
/// Convert a non-negative frame index to a stack slot register value.
- static unsigned index2StackSlot(int FI) {
+ static Register index2StackSlot(int FI) {
assert(FI >= 0 && "Cannot hold a negative frame index.");
- return FI + MCRegister::FirstStackSlot;
+ return Register(FI + MCRegister::FirstStackSlot);
}
/// Return true if the specified register number is in
@@ -69,20 +69,19 @@ public:
/// Return true if the specified register number is in
/// the virtual register namespace.
static bool isVirtualRegister(unsigned Reg) {
- assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
- return Reg & MCRegister::VirtualRegFlag;
+ return Reg & MCRegister::VirtualRegFlag && !isStackSlot(Reg);
}
/// Convert a virtual register number to a 0-based index.
/// The first virtual register in a function will get the index 0.
- static unsigned virtReg2Index(unsigned Reg) {
+ static unsigned virtReg2Index(Register Reg) {
assert(isVirtualRegister(Reg) && "Not a virtual register");
return Reg & ~MCRegister::VirtualRegFlag;
}
/// Convert a 0-based index to a virtual register number.
/// This is the inverse operation of VirtReg2IndexFunctor below.
- static unsigned index2VirtReg(unsigned Index) {
+ static Register index2VirtReg(unsigned Index) {
assert(Index < (1u << 31) && "Index too large for virtual register range.");
return Index | MCRegister::VirtualRegFlag;
}
@@ -115,6 +114,15 @@ public:
return MCRegister(Reg);
}
+ /// Utility to check-convert this value to a MCRegister. The caller is
+ /// expected to have already validated that this Register is, indeed,
+ /// physical.
+ MCRegister asMCReg() const {
+ assert(Reg == MCRegister::NoRegister ||
+ MCRegister::isPhysicalRegister(Reg));
+ return MCRegister(Reg);
+ }
+
bool isValid() const { return Reg != MCRegister::NoRegister; }
/// Comparisons between register objects
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterPressure.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterPressure.h
index 92333b859f1b..1deeb4d41511 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -37,10 +37,10 @@ class MachineRegisterInfo;
class RegisterClassInfo;
struct RegisterMaskPair {
- unsigned RegUnit; ///< Virtual register or register unit.
+ Register RegUnit; ///< Virtual register or register unit.
LaneBitmask LaneMask;
- RegisterMaskPair(unsigned RegUnit, LaneBitmask LaneMask)
+ RegisterMaskPair(Register RegUnit, LaneBitmask LaneMask)
: RegUnit(RegUnit), LaneMask(LaneMask) {}
};
@@ -157,7 +157,7 @@ public:
const_iterator begin() const { return &PressureChanges[0]; }
const_iterator end() const { return &PressureChanges[MaxPSets]; }
- void addPressureChange(unsigned RegUnit, bool IsDec,
+ void addPressureChange(Register RegUnit, bool IsDec,
const MachineRegisterInfo *MRI);
void dump(const TargetRegisterInfo &TRI) const;
@@ -275,24 +275,24 @@ private:
RegSet Regs;
unsigned NumRegUnits;
- unsigned getSparseIndexFromReg(unsigned Reg) const {
- if (Register::isVirtualRegister(Reg))
+ unsigned getSparseIndexFromReg(Register Reg) const {
+ if (Reg.isVirtual())
return Register::virtReg2Index(Reg) + NumRegUnits;
assert(Reg < NumRegUnits);
return Reg;
}
- unsigned getRegFromSparseIndex(unsigned SparseIndex) const {
+ Register getRegFromSparseIndex(unsigned SparseIndex) const {
if (SparseIndex >= NumRegUnits)
- return Register::index2VirtReg(SparseIndex-NumRegUnits);
- return SparseIndex;
+ return Register::index2VirtReg(SparseIndex - NumRegUnits);
+ return Register(SparseIndex);
}
public:
void clear();
void init(const MachineRegisterInfo &MRI);
- LaneBitmask contains(unsigned Reg) const {
+ LaneBitmask contains(Register Reg) const {
unsigned SparseIndex = getSparseIndexFromReg(Reg);
RegSet::const_iterator I = Regs.find(SparseIndex);
if (I == Regs.end())
@@ -332,7 +332,7 @@ public:
template<typename ContainerT>
void appendTo(ContainerT &To) const {
for (const IndexMaskPair &P : Regs) {
- unsigned Reg = getRegFromSparseIndex(P.Index);
+ Register Reg = getRegFromSparseIndex(P.Index);
if (P.LaneMask.any())
To.push_back(RegisterMaskPair(Reg, P.LaneMask));
}
@@ -390,7 +390,7 @@ class RegPressureTracker {
LiveRegSet LiveRegs;
/// Set of vreg defs that start a live range.
- SparseSet<unsigned, VirtReg2IndexFunctor> UntiedDefs;
+ SparseSet<Register, VirtReg2IndexFunctor> UntiedDefs;
/// Live-through pressure.
std::vector<unsigned> LiveThruPressure;
@@ -532,7 +532,7 @@ public:
return getDownwardPressure(MI, PressureResult, MaxPressureResult);
}
- bool hasUntiedDef(unsigned VirtReg) const {
+ bool hasUntiedDef(Register VirtReg) const {
return UntiedDefs.count(VirtReg);
}
@@ -548,9 +548,9 @@ protected:
/// after the current position.
SlotIndex getCurrSlot() const;
- void increaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask,
+ void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask,
LaneBitmask NewMask);
- void decreaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask,
+ void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask,
LaneBitmask NewMask);
void bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs);
@@ -561,9 +561,9 @@ protected:
void discoverLiveInOrOut(RegisterMaskPair Pair,
SmallVectorImpl<RegisterMaskPair> &LiveInOrOut);
- LaneBitmask getLastUsedLanes(unsigned RegUnit, SlotIndex Pos) const;
- LaneBitmask getLiveLanesAt(unsigned RegUnit, SlotIndex Pos) const;
- LaneBitmask getLiveThroughAt(unsigned RegUnit, SlotIndex Pos) const;
+ LaneBitmask getLastUsedLanes(Register RegUnit, SlotIndex Pos) const;
+ LaneBitmask getLiveLanesAt(Register RegUnit, SlotIndex Pos) const;
+ LaneBitmask getLiveThroughAt(Register RegUnit, SlotIndex Pos) const;
};
void dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterScavenging.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterScavenging.h
index 5b5a80a67e7f..4f48ea2dc8e8 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -89,15 +89,6 @@ public:
while (MBBI != I) forward();
}
- /// Invert the behavior of forward() on the current instruction (undo the
- /// changes to the available registers made by forward()).
- void unprocess();
-
- /// Unprocess instructions until you reach the provided iterator.
- void unprocess(MachineBasicBlock::iterator I) {
- while (MBBI != I) unprocess();
- }
-
/// Update internal register state and move MBB iterator backwards.
/// Contrary to unprocess() this method gives precise results even in the
/// absence of kill flags.
@@ -203,10 +194,10 @@ private:
void determineKillsAndDefs();
/// Add all Reg Units that Reg contains to BV.
- void addRegUnits(BitVector &BV, Register Reg);
+ void addRegUnits(BitVector &BV, MCRegister Reg);
/// Remove all Reg Units that \p Reg contains from \p BV.
- void removeRegUnits(BitVector &BV, Register Reg);
+ void removeRegUnits(BitVector &BV, MCRegister Reg);
/// Return the candidate register that is unused for the longest after
/// StartMI. UseMI is set to the instruction where the search stopped.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h
index b38cd4924174..bd63dd875621 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -107,7 +107,6 @@ namespace llvm {
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
void initNumRegDefsLeft(SUnit *SU);
- void updateNumRegDefsLeft(SUnit *SU);
int regPressureDelta(SUnit *SU, bool RawPressure = false);
int rawRegPressureDelta (SUnit *SU, unsigned RCId);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
index f71f39e5bf03..86e24cab76f6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_RUNTIMELIBCALLS_H
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/AtomicOrdering.h"
namespace llvm {
namespace RTLIB {
@@ -60,6 +61,10 @@ namespace RTLIB {
/// UNKNOWN_LIBCALL if there is none.
Libcall getSYNC(unsigned Opc, MVT VT);
+ /// Return the outline atomics value for the given opcode, atomic ordering
+ /// and type, or UNKNOWN_LIBCALL if there is none.
+ Libcall getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT);
+
/// getMEMCPY_ELEMENT_UNORDERED_ATOMIC - Return
/// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
/// UNKNOW_LIBCALL if there is none.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 1eb9b9f322ba..50b186de2b05 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -268,6 +268,11 @@ namespace llvm {
return SU->SchedClass;
}
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(SUnit *SU, SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
/// Returns an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator begin() const { return RegionBegin; }
@@ -362,16 +367,6 @@ namespace llvm {
void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
- /// Initializes register live-range state for updating kills.
- /// PostRA helper for rewriting kill flags.
- void startBlockForKills(MachineBasicBlock *BB);
-
- /// Toggles a register operand kill flag.
- ///
- /// Other adjustments may be made to the instruction if necessary. Return
- /// true if the operand has been deleted, false if not.
- void toggleKillFlag(MachineInstr &MI, MachineOperand &MO);
-
/// Returns a mask for which lanes get read/written by the given (register)
/// machine operand.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
@@ -393,10 +388,7 @@ namespace llvm {
/// Returns an existing SUnit for this MI, or nullptr.
inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
- DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
- if (I == MISUnitMap.end())
- return nullptr;
- return I->second;
+ return MISUnitMap.lookup(MI);
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index 37590f496ca2..9f1101b658d0 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -57,7 +57,7 @@ public:
/// other instruction is available, issue it first.
/// * NoopHazard: issuing this instruction would break the program. If
/// some other instruction can be issued, do so, otherwise issue a noop.
- virtual HazardType getHazardType(SUnit *m, int Stalls = 0) {
+ virtual HazardType getHazardType(SUnit *, int Stalls = 0) {
return NoHazard;
}
@@ -114,6 +114,14 @@ public:
// Default implementation: count it as a cycle.
AdvanceCycle();
}
+
+ /// EmitNoops - This callback is invoked when noops were added to the
+ /// instruction stream.
+ virtual void EmitNoops(unsigned Quantity) {
+ // Default implementation: count it as a cycle.
+ for (unsigned i = 0; i < Quantity; ++i)
+ EmitNoop();
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h
index f26ab6f287a0..aeb488dd6c83 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -27,7 +27,6 @@
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/DAGCombine.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -64,6 +63,7 @@ class ConstantFP;
class ConstantInt;
class DataLayout;
struct fltSemantics;
+class FunctionLoweringInfo;
class GlobalValue;
struct KnownBits;
class LegacyDivergenceAnalysis;
@@ -331,6 +331,29 @@ public:
virtual void anchor();
};
+ /// Help to insert SDNodeFlags automatically in transforming. Use
+ /// RAII to save and resume flags in current scope.
+ class FlagInserter {
+ SelectionDAG &DAG;
+ SDNodeFlags Flags;
+ FlagInserter *LastInserter;
+
+ public:
+ FlagInserter(SelectionDAG &SDAG, SDNodeFlags Flags)
+ : DAG(SDAG), Flags(Flags),
+ LastInserter(SDAG.getFlagInserter()) {
+ SDAG.setFlagInserter(this);
+ }
+ FlagInserter(SelectionDAG &SDAG, SDNode *N)
+ : FlagInserter(SDAG, N->getFlags()) {}
+
+ FlagInserter(const FlagInserter &) = delete;
+ FlagInserter &operator=(const FlagInserter &) = delete;
+ ~FlagInserter() { DAG.setFlagInserter(LastInserter); }
+
+ const SDNodeFlags getFlags() const { return Flags; }
+ };
+
/// When true, additional steps are taken to
/// ensure that getConstant() and similar functions return DAG nodes that
/// have legal types. This is important after type legalization since
@@ -433,6 +456,9 @@ public:
ProfileSummaryInfo *getPSI() const { return PSI; }
BlockFrequencyInfo *getBFI() const { return BFI; }
+ FlagInserter *getFlagInserter() { return Inserter; }
+ void setFlagInserter(FlagInserter *FI) { Inserter = FI; }
+
/// Just dump dot graph to a user-provided path and title.
/// This doesn't open the dot viewer program and
/// helps visualization when outside debugging session.
@@ -695,9 +721,7 @@ public:
// When generating a branch to a BB, we don't in general know enough
// to provide debug info for the BB at that time, so keep this one around.
SDValue getBasicBlock(MachineBasicBlock *MBB);
- SDValue getBasicBlock(MachineBasicBlock *MBB, SDLoc dl);
SDValue getExternalSymbol(const char *Sym, EVT VT);
- SDValue getExternalSymbol(const char *Sym, const SDLoc &dl, EVT VT);
SDValue getTargetExternalSymbol(const char *Sym, EVT VT,
unsigned TargetFlags = 0);
SDValue getMCSymbol(MCSymbol *Sym, EVT VT);
@@ -870,7 +894,7 @@ public:
/// Returns sum of the base pointer and offset.
/// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default.
- SDValue getMemBasePlusOffset(SDValue Base, int64_t Offset, const SDLoc &DL,
+ SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL,
const SDNodeFlags Flags = SDNodeFlags());
SDValue getMemBasePlusOffset(SDValue Base, SDValue Offset, const SDLoc &DL,
const SDNodeFlags Flags = SDNodeFlags());
@@ -878,7 +902,7 @@ public:
/// Create an add instruction with appropriate flags when used for
/// addressing some offset of an object. i.e. if a load is split into multiple
/// components, create an add nuw from the base pointer to the offset.
- SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, int64_t Offset) {
+ SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset) {
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
return getMemBasePlusOffset(Ptr, Offset, SL, Flags);
@@ -945,21 +969,31 @@ public:
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDUse> Ops);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops, const SDNodeFlags Flags = SDNodeFlags());
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys,
ArrayRef<SDValue> Ops);
SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
- ArrayRef<SDValue> Ops, const SDNodeFlags Flags = SDNodeFlags());
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
+
+ // Use flags from current flag inserter.
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops);
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops);
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand);
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2);
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3);
// Specialize based on number of operands.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
- const SDNodeFlags Flags = SDNodeFlags());
+ const SDNodeFlags Flags);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
- SDValue N2, const SDNodeFlags Flags = SDNodeFlags());
+ SDValue N2, const SDNodeFlags Flags);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
- SDValue N2, SDValue N3,
- const SDNodeFlags Flags = SDNodeFlags());
+ SDValue N2, SDValue N3, const SDNodeFlags Flags);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
SDValue N2, SDValue N3, SDValue N4);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
@@ -1169,6 +1203,12 @@ public:
SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain,
int FrameIndex, int64_t Size, int64_t Offset = -1);
+ /// Creates a PseudoProbeSDNode with function GUID `Guid` and
+ /// the index of the block `Index` it is probing, as well as the attributes
+ /// `attr` of the probe.
+ SDValue getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, uint64_t Guid,
+ uint64_t Index, uint32_t Attr);
+
/// Create a MERGE_VALUES node from the given operands.
SDValue getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl);
@@ -1178,14 +1218,15 @@ public:
/// This function will set the MOLoad flag on MMOFlags, but you can set it if
/// you want. The MOStore flag must not be set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
- MachinePointerInfo PtrInfo, MaybeAlign Alignment,
+ MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment = MaybeAlign(),
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr);
/// FIXME: Remove once transition to Align is over.
inline SDValue
getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
- MachinePointerInfo PtrInfo, unsigned Alignment = 0,
+ MachinePointerInfo PtrInfo, unsigned Alignment,
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr) {
@@ -1197,14 +1238,14 @@ public:
SDValue
getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain,
SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT,
- MaybeAlign Alignment,
+ MaybeAlign Alignment = MaybeAlign(),
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes());
/// FIXME: Remove once transition to Align is over.
inline SDValue
getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain,
SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT,
- unsigned Alignment = 0,
+ unsigned Alignment,
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes()) {
return getExtLoad(ExtType, dl, VT, Chain, Ptr, PtrInfo, MemVT,
@@ -1221,13 +1262,12 @@ public:
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr);
- inline SDValue
- getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
- const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
- MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment,
- MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
- const AAMDNodes &AAInfo = AAMDNodes(),
- const MDNode *Ranges = nullptr) {
+ inline SDValue getLoad(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl,
+ SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo,
+ EVT MemVT, MaybeAlign Alignment = MaybeAlign(),
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) {
// Ensures that codegen never sees a None Alignment.
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, PtrInfo, MemVT,
Alignment.getValueOr(getEVTAlign(MemVT)), MMOFlags, AAInfo,
@@ -1237,7 +1277,7 @@ public:
inline SDValue
getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
- MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment = 0,
+ MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment,
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr) {
@@ -1260,7 +1300,7 @@ public:
const AAMDNodes &AAInfo = AAMDNodes());
inline SDValue
getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
- MachinePointerInfo PtrInfo, MaybeAlign Alignment,
+ MachinePointerInfo PtrInfo, MaybeAlign Alignment = MaybeAlign(),
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes()) {
return getStore(Chain, dl, Val, Ptr, PtrInfo,
@@ -1270,7 +1310,7 @@ public:
/// FIXME: Remove once transition to Align is over.
inline SDValue
getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
- MachinePointerInfo PtrInfo, unsigned Alignment = 0,
+ MachinePointerInfo PtrInfo, unsigned Alignment,
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes()) {
return getStore(Chain, dl, Val, Ptr, PtrInfo, MaybeAlign(Alignment),
@@ -1285,7 +1325,8 @@ public:
const AAMDNodes &AAInfo = AAMDNodes());
inline SDValue
getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
- MachinePointerInfo PtrInfo, EVT SVT, MaybeAlign Alignment,
+ MachinePointerInfo PtrInfo, EVT SVT,
+ MaybeAlign Alignment = MaybeAlign(),
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes()) {
return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT,
@@ -1295,7 +1336,7 @@ public:
/// FIXME: Remove once transition to Align is over.
inline SDValue
getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
- MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment = 0,
+ MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment,
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes()) {
return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT,
@@ -1321,10 +1362,11 @@ public:
ISD::MemIndexedMode AM);
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
- ISD::MemIndexType IndexType);
+ ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy);
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
- ISD::MemIndexType IndexType);
+ ISD::MemIndexType IndexType,
+ bool IsTruncating = false);
/// Construct a node to track a Value* through the backend.
SDValue getSrcValue(const Value *v);
@@ -1389,6 +1431,9 @@ public:
void setNodeMemRefs(MachineSDNode *N,
ArrayRef<MachineMemOperand *> NewMemRefs);
+ // Calculate divergence of node \p N based on its operands.
+ bool calculateDivergence(SDNode *N);
+
// Propagates the change in divergence to users
void updateDivergence(SDNode * N);
@@ -1409,8 +1454,6 @@ public:
EVT VT2, ArrayRef<SDValue> Ops);
SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
EVT VT2, EVT VT3, ArrayRef<SDValue> Ops);
- SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
- EVT VT2, SDValue Op1);
SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
EVT VT2, SDValue Op1, SDValue Op2);
SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs,
@@ -1468,8 +1511,13 @@ public:
SDValue Operand, SDValue Subreg);
/// Get the specified node if it's already available, or else return NULL.
- SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops,
- const SDNodeFlags Flags = SDNodeFlags());
+ SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
+ SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops);
+
+ /// Check if a node exists without modifying its flags.
+ bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops);
/// Creates a SDDbgValue node.
SDDbgValue *getDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N,
@@ -1543,7 +1591,14 @@ public:
/// chain to the token factor. This ensures that the new memory node will have
/// the same relative memory dependency position as the old load. Returns the
/// new merged load chain.
- SDValue makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New);
+ SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain);
+
+ /// If an existing load has uses of its chain, create a token factor node with
+ /// that chain and the new memory node's chain and update users of the old
+ /// chain to the token factor. This ensures that the new memory node will have
+ /// the same relative memory dependency position as the old load. Returns the
+ /// new merged load chain.
+ SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
/// Topological-sort the AllNodes list and a
/// assign a unique node id for each node in the DAG based on their
@@ -1781,7 +1836,8 @@ public:
/// for \p DemandedElts.
///
/// NOTE: The function will return true for a demanded splat of UNDEF values.
- bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts);
+ bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts,
+ unsigned Depth = 0);
/// Test whether \p V has a splatted value.
bool isSplatValue(SDValue V, bool AllowUndefs = false);
@@ -1903,14 +1959,14 @@ public:
}
/// Test whether the given value is a constant int or similar node.
- SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N);
+ SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) const;
/// Test whether the given value is a constant FP or similar node.
- SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N);
+ SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) const ;
/// \returns true if \p N is any kind of constant or build_vector of
/// constants, int or float. If a vector, it may not necessarily be a splat.
- inline bool isConstantValueOfAnyType(SDValue N) {
+ inline bool isConstantValueOfAnyType(SDValue N) const {
return isConstantIntBuildVectorOrConstantInt(N) ||
isConstantFPBuildVectorOrConstantFP(N);
}
@@ -1958,6 +2014,10 @@ public:
bool shouldOptForSize() const;
+ /// Get the (commutative) neutral element for the given opcode, if it exists.
+ SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDNodeFlags Flags);
+
private:
void InsertNode(SDNode *N);
bool RemoveNodeFromCSEMaps(SDNode *N);
@@ -1998,6 +2058,8 @@ private:
std::map<std::pair<std::string, unsigned>, SDNode *> TargetExternalSymbols;
DenseMap<MCSymbol *, SDNode *> MCSymbols;
+
+ FlagInserter *Inserter = nullptr;
};
template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 3bfbf3765e4f..84bb11edd715 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -323,8 +323,6 @@ private:
SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
ArrayRef<SDValue> Ops, unsigned EmitNodeInfo);
- SDNode *MutateStrictFPToFP(SDNode *Node, unsigned NewOpc);
-
/// Prepares the landing pad to take incoming values or do other EH
/// personality specific tasks. Returns true if the block should be
/// instruction selected, false if no code should be emitted for it.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 7c2b49087edd..000e383b71eb 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -85,29 +85,42 @@ namespace ISD {
/// Node predicates
- /// If N is a BUILD_VECTOR node whose elements are all the same constant or
- /// undefined, return true and return the constant value in \p SplatValue.
- bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
-
- /// Return true if the specified node is a BUILD_VECTOR where all of the
- /// elements are ~0 or undef.
- bool isBuildVectorAllOnes(const SDNode *N);
-
- /// Return true if the specified node is a BUILD_VECTOR where all of the
- /// elements are 0 or undef.
- bool isBuildVectorAllZeros(const SDNode *N);
-
- /// Return true if the specified node is a BUILD_VECTOR node of all
- /// ConstantSDNode or undef.
- bool isBuildVectorOfConstantSDNodes(const SDNode *N);
-
- /// Return true if the specified node is a BUILD_VECTOR node of all
- /// ConstantFPSDNode or undef.
- bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
-
- /// Return true if the node has at least one operand and all operands of the
- /// specified node are ISD::UNDEF.
- bool allOperandsUndef(const SDNode *N);
+/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
+/// same constant or undefined, return true and return the constant value in
+/// \p SplatValue.
+bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
+
+/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
+/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
+/// true, it only checks BUILD_VECTOR.
+bool isConstantSplatVectorAllOnes(const SDNode *N,
+ bool BuildVectorOnly = false);
+
+/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
+/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
+/// only checks BUILD_VECTOR.
+bool isConstantSplatVectorAllZeros(const SDNode *N,
+ bool BuildVectorOnly = false);
+
+/// Return true if the specified node is a BUILD_VECTOR where all of the
+/// elements are ~0 or undef.
+bool isBuildVectorAllOnes(const SDNode *N);
+
+/// Return true if the specified node is a BUILD_VECTOR where all of the
+/// elements are 0 or undef.
+bool isBuildVectorAllZeros(const SDNode *N);
+
+/// Return true if the specified node is a BUILD_VECTOR node of all
+/// ConstantSDNode or undef.
+bool isBuildVectorOfConstantSDNodes(const SDNode *N);
+
+/// Return true if the specified node is a BUILD_VECTOR node of all
+/// ConstantFPSDNode or undef.
+bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
+
+/// Return true if the node has at least one operand and all operands of the
+/// specified node are ISD::UNDEF.
+bool allOperandsUndef(const SDNode *N);
} // end namespace ISD
@@ -180,8 +193,8 @@ public:
return getValueType().getSizeInBits();
}
- TypeSize getScalarValueSizeInBits() const {
- return getValueType().getScalarType().getSizeInBits();
+ uint64_t getScalarValueSizeInBits() const {
+ return getValueType().getScalarType().getFixedSizeInBits();
}
// Forwarding methods - These forward to the corresponding methods in SDNode.
@@ -357,11 +370,6 @@ template<> struct simplify_type<SDUse> {
/// the backend.
struct SDNodeFlags {
private:
- // This bit is used to determine if the flags are in a defined state.
- // Flag bits can only be masked out during intersection if the masking flags
- // are defined.
- bool AnyDefined : 1;
-
bool NoUnsignedWrap : 1;
bool NoSignedWrap : 1;
bool Exact : 1;
@@ -383,9 +391,8 @@ private:
public:
/// Default constructor turns off all optimization flags.
SDNodeFlags()
- : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
- Exact(false), NoNaNs(false), NoInfs(false),
- NoSignedZeros(false), AllowReciprocal(false),
+ : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
+ NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
AllowContract(false), ApproximateFuncs(false),
AllowReassociation(false), NoFPExcept(false) {}
@@ -400,56 +407,18 @@ public:
setAllowReassociation(FPMO.hasAllowReassoc());
}
- /// Sets the state of the flags to the defined state.
- void setDefined() { AnyDefined = true; }
- /// Returns true if the flags are in a defined state.
- bool isDefined() const { return AnyDefined; }
-
// These are mutators for each flag.
- void setNoUnsignedWrap(bool b) {
- setDefined();
- NoUnsignedWrap = b;
- }
- void setNoSignedWrap(bool b) {
- setDefined();
- NoSignedWrap = b;
- }
- void setExact(bool b) {
- setDefined();
- Exact = b;
- }
- void setNoNaNs(bool b) {
- setDefined();
- NoNaNs = b;
- }
- void setNoInfs(bool b) {
- setDefined();
- NoInfs = b;
- }
- void setNoSignedZeros(bool b) {
- setDefined();
- NoSignedZeros = b;
- }
- void setAllowReciprocal(bool b) {
- setDefined();
- AllowReciprocal = b;
- }
- void setAllowContract(bool b) {
- setDefined();
- AllowContract = b;
- }
- void setApproximateFuncs(bool b) {
- setDefined();
- ApproximateFuncs = b;
- }
- void setAllowReassociation(bool b) {
- setDefined();
- AllowReassociation = b;
- }
- void setNoFPExcept(bool b) {
- setDefined();
- NoFPExcept = b;
- }
+ void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
+ void setNoSignedWrap(bool b) { NoSignedWrap = b; }
+ void setExact(bool b) { Exact = b; }
+ void setNoNaNs(bool b) { NoNaNs = b; }
+ void setNoInfs(bool b) { NoInfs = b; }
+ void setNoSignedZeros(bool b) { NoSignedZeros = b; }
+ void setAllowReciprocal(bool b) { AllowReciprocal = b; }
+ void setAllowContract(bool b) { AllowContract = b; }
+ void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
+ void setAllowReassociation(bool b) { AllowReassociation = b; }
+ void setNoFPExcept(bool b) { NoFPExcept = b; }
// These are accessors for each flag.
bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@@ -464,11 +433,9 @@ public:
bool hasAllowReassociation() const { return AllowReassociation; }
bool hasNoFPExcept() const { return NoFPExcept; }
- /// Clear any flags in this flag set that aren't also set in Flags.
- /// If the given Flags are undefined then don't do anything.
+ /// Clear any flags in this flag set that aren't also set in Flags. All
+ /// flags will be cleared if Flags are undefined.
void intersectWith(const SDNodeFlags Flags) {
- if (!Flags.isDefined())
- return;
NoUnsignedWrap &= Flags.NoUnsignedWrap;
NoSignedWrap &= Flags.NoSignedWrap;
Exact &= Flags.Exact;
@@ -559,6 +526,7 @@ BEGIN_TWO_BYTE_PACK()
class LoadSDNodeBitfields {
friend class LoadSDNode;
friend class MaskedLoadSDNode;
+ friend class MaskedGatherSDNode;
uint16_t : NumLSBaseSDNodeBits;
@@ -569,6 +537,7 @@ BEGIN_TWO_BYTE_PACK()
class StoreSDNodeBitfields {
friend class StoreSDNode;
friend class MaskedStoreSDNode;
+ friend class MaskedScatterSDNode;
uint16_t : NumLSBaseSDNodeBits;
@@ -720,9 +689,7 @@ public:
bool use_empty() const { return UseList == nullptr; }
/// Return true if there is exactly one use of this node.
- bool hasOneUse() const {
- return !use_empty() && std::next(use_begin()) == use_end();
- }
+ bool hasOneUse() const { return hasSingleElement(uses()); }
/// Return the number of uses of this node. This method takes
/// time proportional to the number of uses.
@@ -1379,8 +1346,18 @@ public:
}
const SDValue &getChain() const { return getOperand(0); }
+
const SDValue &getBasePtr() const {
- return getOperand(getOpcode() == ISD::STORE ? 2 : 1);
+ switch (getOpcode()) {
+ case ISD::STORE:
+ case ISD::MSTORE:
+ return getOperand(2);
+ case ISD::MGATHER:
+ case ISD::MSCATTER:
+ return getOperand(3);
+ default:
+ return getOperand(1);
+ }
}
// Methods to support isa and dyn_cast
@@ -1784,6 +1761,32 @@ public:
}
};
+/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
+/// the index of the basic block being probed. A pseudo probe serves as a place
+/// holder and will be removed at the end of compilation. It does not have any
+/// operand because we do not want the instruction selection to deal with any.
+class PseudoProbeSDNode : public SDNode {
+ friend class SelectionDAG;
+ uint64_t Guid;
+ uint64_t Index;
+ uint32_t Attributes;
+
+ PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
+ SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
+ : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
+ Attributes(Attr) {}
+
+public:
+ uint64_t getGuid() const { return Guid; }
+ uint64_t getIndex() const { return Index; }
+ uint32_t getAttributes() const { return Attributes; }
+
+ // Methods to support isa and dyn_cast
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::PSEUDO_PROBE;
+ }
+};
+
class JumpTableSDNode : public SDNode {
friend class SelectionDAG;
@@ -1944,6 +1947,33 @@ public:
/// the vector width and set the bits where elements are undef.
SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
+ /// Find the shortest repeating sequence of values in the build vector.
+ ///
+ /// e.g. { u, X, u, X, u, u, X, u } -> { X }
+ /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
+ ///
+ /// Currently this must be a power-of-2 build vector.
+ /// The DemandedElts mask indicates the elements that must be present,
+ /// undemanded elements in Sequence may be null (SDValue()). If passed a
+ /// non-null UndefElements bitvector, it will resize it to match the original
+ /// vector width and set the bits where elements are undef. If result is
+ /// false, Sequence will be empty.
+ bool getRepeatedSequence(const APInt &DemandedElts,
+ SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements = nullptr) const;
+
+ /// Find the shortest repeating sequence of values in the build vector.
+ ///
+ /// e.g. { u, X, u, X, u, u, X, u } -> { X }
+ /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
+ ///
+ /// Currently this must be a power-of-2 build vector.
+ /// If passed a non-null UndefElements bitvector, it will resize it to match
+ /// the original vector width and set the bits where elements are undef.
+ /// If result is false, Sequence will be empty.
+ bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements = nullptr) const;
+
/// Returns the demanded splatted constant or null if this is not a constant
/// splat.
///
@@ -2292,9 +2322,6 @@ public:
// MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
// MaskedStoreSDNode (Chain, data, ptr, offset, mask)
// Mask is a vector of i1 elements
- const SDValue &getBasePtr() const {
- return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2);
- }
const SDValue &getOffset() const {
return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
}
@@ -2402,6 +2429,9 @@ public:
ISD::MemIndexType getIndexType() const {
return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
}
+ void setIndexType(ISD::MemIndexType IndexType) {
+ LSBaseSDNodeBits.AddressingMode = IndexType;
+ }
bool isIndexScaled() const {
return (getIndexType() == ISD::SIGNED_SCALED) ||
(getIndexType() == ISD::UNSIGNED_SCALED);
@@ -2434,12 +2464,18 @@ public:
MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
EVT MemVT, MachineMemOperand *MMO,
- ISD::MemIndexType IndexType)
+ ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
: MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
- IndexType) {}
+ IndexType) {
+ LoadSDNodeBits.ExtTy = ETy;
+ }
const SDValue &getPassThru() const { return getOperand(1); }
+ ISD::LoadExtType getExtensionType() const {
+ return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
+ }
+
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MGATHER;
}
@@ -2453,9 +2489,16 @@ public:
MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
EVT MemVT, MachineMemOperand *MMO,
- ISD::MemIndexType IndexType)
+ ISD::MemIndexType IndexType, bool IsTrunc)
: MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
- IndexType) {}
+ IndexType) {
+ StoreSDNodeBits.IsTruncating = IsTrunc;
+ }
+
+ /// Return true if the op does a truncation before store.
+ /// For integers this is the same as doing a TRUNCATE and storing the result.
+ /// For floats, it is the same as doing an FP_ROUND and storing the result.
+ bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
const SDValue &getValue() const { return getOperand(1); }
@@ -2605,7 +2648,8 @@ template <> struct GraphTraits<SDNode*> {
/// with 4 and 8 byte pointer alignment, respectively.
using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
BlockAddressSDNode,
- GlobalAddressSDNode>;
+ GlobalAddressSDNode,
+ PseudoProbeSDNode>;
/// The SDNode class with the greatest alignment requirement.
using MostAlignedSDNode = GlobalAddressSDNode;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
index 014523f1af6a..78f6fc6656fa 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -85,7 +85,7 @@ public:
return SDValue();
}
- /// Emit target-specific code that performs a memcmp, in cases where that is
+ /// Emit target-specific code that performs a memcmp/bcmp, in cases where that is
/// faster than a libcall. The first returned SDValue is the result of the
/// memcmp and the second is the chain. Both SDValues can be null if a normal
/// libcall should be used.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h
index 19eab7ae5e35..b2133de93ea2 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -604,38 +604,27 @@ class raw_ostream;
}
/// Add the given MachineBasicBlock into the maps.
- /// If \p InsertionPoint is specified then the block will be placed
- /// before the given machine instr, otherwise it will be placed
- /// before the next block in MachineFunction insertion order.
- void insertMBBInMaps(MachineBasicBlock *mbb,
- MachineInstr *InsertionPoint = nullptr) {
- MachineFunction::iterator nextMBB =
- std::next(MachineFunction::iterator(mbb));
-
- IndexListEntry *startEntry = nullptr;
- IndexListEntry *endEntry = nullptr;
- IndexList::iterator newItr;
- if (InsertionPoint) {
- startEntry = createEntry(nullptr, 0);
- endEntry = getInstructionIndex(*InsertionPoint).listEntry();
- newItr = indexList.insert(endEntry->getIterator(), startEntry);
- } else if (nextMBB == mbb->getParent()->end()) {
- startEntry = &indexList.back();
- endEntry = createEntry(nullptr, 0);
- newItr = indexList.insertAfter(startEntry->getIterator(), endEntry);
- } else {
- startEntry = createEntry(nullptr, 0);
- endEntry = getMBBStartIdx(&*nextMBB).listEntry();
- newItr = indexList.insert(endEntry->getIterator(), startEntry);
- }
+ /// If it contains any instructions then they must already be in the maps.
+ /// This is used after a block has been split by moving some suffix of its
+ /// instructions into a newly created block.
+ void insertMBBInMaps(MachineBasicBlock *mbb) {
+ assert(mbb != &mbb->getParent()->front() &&
+ "Can't insert a new block at the beginning of a function.");
+ auto prevMBB = std::prev(MachineFunction::iterator(mbb));
+
+ // Create a new entry to be used for the start of mbb and the end of
+ // prevMBB.
+ IndexListEntry *startEntry = createEntry(nullptr, 0);
+ IndexListEntry *endEntry = getMBBEndIdx(&*prevMBB).listEntry();
+ IndexListEntry *insEntry =
+ mbb->empty() ? endEntry
+ : getInstructionIndex(mbb->front()).listEntry();
+ IndexList::iterator newItr =
+ indexList.insert(insEntry->getIterator(), startEntry);
SlotIndex startIdx(startEntry, SlotIndex::Slot_Block);
SlotIndex endIdx(endEntry, SlotIndex::Slot_Block);
- MachineFunction::iterator prevMBB(mbb);
- assert(prevMBB != mbb->getParent()->end() &&
- "Can't insert a new block at the beginning of a function.");
- --prevMBB;
MBBRanges[prevMBB->getNumber()].second = startIdx;
assert(unsigned(mbb->getNumber()) == MBBRanges.size() &&
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/StableHashing.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/StableHashing.h
new file mode 100644
index 000000000000..caf27e152e78
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/StableHashing.h
@@ -0,0 +1,112 @@
+//===- llvm/CodeGen/StableHashing.h - Utilities for stable hashing * C++ *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides types and functions for computing and combining stable
+// hashes. Stable hashes can be useful for hashing across different modules,
+// processes, or compiler runs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_STABLEHASHING_H
+#define LLVM_CODEGEN_STABLEHASHING_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+/// An opaque object representing a stable hash code. It can be serialized,
+/// deserialized, and is stable across processes and executions.
+using stable_hash = uint64_t;
+
+// Implementation details
+namespace hashing {
+namespace detail {
+
+// Stable hashes are based on the 64-bit FNV-1 hash:
+// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function
+
+const uint64_t FNV_PRIME_64 = 1099511628211u;
+const uint64_t FNV_OFFSET_64 = 14695981039346656037u;
+
+inline void stable_hash_append(stable_hash &Hash, const char Value) {
+ Hash = Hash ^ (Value & 0xFF);
+ Hash = Hash * FNV_PRIME_64;
+}
+
+inline void stable_hash_append(stable_hash &Hash, stable_hash Value) {
+ for (unsigned I = 0; I < 8; ++I) {
+ stable_hash_append(Hash, static_cast<char>(Value));
+ Value >>= 8;
+ }
+}
+
+} // namespace detail
+} // namespace hashing
+
+inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ hashing::detail::stable_hash_append(Hash, A);
+ hashing::detail::stable_hash_append(Hash, B);
+ return Hash;
+}
+
+inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
+ stable_hash C) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ hashing::detail::stable_hash_append(Hash, A);
+ hashing::detail::stable_hash_append(Hash, B);
+ hashing::detail::stable_hash_append(Hash, C);
+ return Hash;
+}
+
+inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
+ stable_hash C, stable_hash D) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ hashing::detail::stable_hash_append(Hash, A);
+ hashing::detail::stable_hash_append(Hash, B);
+ hashing::detail::stable_hash_append(Hash, C);
+ hashing::detail::stable_hash_append(Hash, D);
+ return Hash;
+}
+
+/// Compute a stable_hash for a sequence of values.
+///
+/// This hashes a sequence of values. It produces the same stable_hash as
+/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized
+/// sequences and is significantly faster given pointers and types which
+/// can be hashed as a sequence of bytes.
+template <typename InputIteratorT>
+stable_hash stable_hash_combine_range(InputIteratorT First,
+ InputIteratorT Last) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ for (auto I = First; I != Last; ++I)
+ hashing::detail::stable_hash_append(Hash, *I);
+ return Hash;
+}
+
+inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ for (size_t I = 0; I < C; ++I)
+ hashing::detail::stable_hash_append(Hash, P[I]);
+ return Hash;
+}
+
+inline stable_hash stable_hash_combine_string(const StringRef &S) {
+ return stable_hash_combine_range(S.begin(), S.end());
+}
+
+inline stable_hash stable_hash_combine_string(const char *C) {
+ stable_hash Hash = hashing::detail::FNV_OFFSET_64;
+ while (*C)
+ hashing::detail::stable_hash_append(Hash, *(C++));
+ return Hash;
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h
index e33ee226e41a..928d7cc6cc04 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h
@@ -148,9 +148,13 @@ public:
/// <StackMaps::ConstantOp>, <calling convention>,
/// <StackMaps::ConstantOp>, <statepoint flags>,
/// <StackMaps::ConstantOp>, <num deopt args>, [deopt args...],
-/// <gc base/derived pairs...> <gc allocas...>
-/// Note that the last two sets of arguments are not currently length
-/// prefixed.
+/// <StackMaps::ConstantOp>, <num gc pointer args>, [gc pointer args...],
+/// <StackMaps::ConstantOp>, <num gc allocas>, [gc allocas args...],
+/// <StackMaps::ConstantOp>, <num entries in gc map>, [base/derived pairs]
+/// base/derived pairs in gc map are logical indices into <gc pointer args>
+/// section.
+/// All gc pointers assigned to VRegs produce new value (in form of MI Def
+/// operand) and are tied to it.
class StatepointOpers {
// TODO:: we should change the STATEPOINT representation so that CC and
// Flags should be part of meta operands, with args and deopt operands, and
@@ -166,21 +170,23 @@ class StatepointOpers {
enum { CCOffset = 1, FlagsOffset = 3, NumDeoptOperandsOffset = 5 };
public:
- explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {}
+ explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {
+ NumDefs = MI->getNumDefs();
+ }
/// Get index of statepoint ID operand.
- unsigned getIDPos() const { return IDPos; }
+ unsigned getIDPos() const { return NumDefs + IDPos; }
/// Get index of Num Patch Bytes operand.
- unsigned getNBytesPos() const { return NBytesPos; }
+ unsigned getNBytesPos() const { return NumDefs + NBytesPos; }
/// Get index of Num Call Arguments operand.
- unsigned getNCallArgsPos() const { return NCallArgsPos; }
+ unsigned getNCallArgsPos() const { return NumDefs + NCallArgsPos; }
/// Get starting index of non call related arguments
/// (calling convention, statepoint flags, vm state and gc state).
unsigned getVarIdx() const {
- return MI->getOperand(NCallArgsPos).getImm() + MetaEnd;
+ return MI->getOperand(NumDefs + NCallArgsPos).getImm() + MetaEnd + NumDefs;
}
/// Get index of Calling Convention operand.
@@ -195,16 +201,16 @@ public:
}
/// Return the ID for the given statepoint.
- uint64_t getID() const { return MI->getOperand(IDPos).getImm(); }
+ uint64_t getID() const { return MI->getOperand(NumDefs + IDPos).getImm(); }
/// Return the number of patchable bytes the given statepoint should emit.
uint32_t getNumPatchBytes() const {
- return MI->getOperand(NBytesPos).getImm();
+ return MI->getOperand(NumDefs + NBytesPos).getImm();
}
/// Return the target of the underlying call.
const MachineOperand &getCallTarget() const {
- return MI->getOperand(CallTargetPos);
+ return MI->getOperand(NumDefs + CallTargetPos);
}
/// Return the calling convention.
@@ -215,8 +221,31 @@ public:
/// Return the statepoint flags.
uint64_t getFlags() const { return MI->getOperand(getFlagsIdx()).getImm(); }
+ uint64_t getNumDeoptArgs() const {
+ return MI->getOperand(getNumDeoptArgsIdx()).getImm();
+ }
+
+ /// Get index of number of gc map entries.
+ unsigned getNumGcMapEntriesIdx();
+
+ /// Get index of number of gc allocas.
+ unsigned getNumAllocaIdx();
+
+ /// Get index of number of GC pointers.
+ unsigned getNumGCPtrIdx();
+
+ /// Get index of first GC pointer operand of -1 if there are none.
+ int getFirstGCPtrIdx();
+
+ /// Get vector of base/derived pairs from statepoint.
+ /// Elements are indices into GC Pointer operand list (logical).
+ /// Returns number of elements in GCMap.
+ unsigned
+ getGCPointerMap(SmallVectorImpl<std::pair<unsigned, unsigned>> &GCMap);
+
private:
const MachineInstr *MI;
+ unsigned NumDefs;
};
class StackMaps {
@@ -258,6 +287,10 @@ public:
StackMaps(AsmPrinter &AP);
+ /// Get index of next meta operand.
+ /// Similar to parseOperand, but does not actually parses operand meaning.
+ static unsigned getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx);
+
void reset() {
CSInfos.clear();
ConstPool.clear();
@@ -330,6 +363,13 @@ private:
MachineInstr::const_mop_iterator MOE, LocationVec &Locs,
LiveOutVec &LiveOuts) const;
+ /// Specialized parser of statepoint operands.
+ /// They do not directly correspond to StackMap record entries.
+ void parseStatepointOpers(const MachineInstr &MI,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ LocationVec &Locations, LiveOutVec &LiveOuts);
+
/// Create a live-out register record for the given register @p Reg.
LiveOutReg createLiveOutReg(unsigned Reg,
const TargetRegisterInfo *TRI) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index 4d6afa617d3a..51f1d7d6fd21 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -10,16 +10,21 @@
#define LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/Support/BranchProbability.h"
+#include <vector>
namespace llvm {
+class BlockFrequencyInfo;
+class ConstantInt;
class FunctionLoweringInfo;
class MachineBasicBlock;
-class BlockFrequencyInfo;
+class ProfileSummaryInfo;
+class TargetLowering;
+class TargetMachine;
namespace SwitchCG {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 347d7ff40404..df974b499851 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -31,6 +31,7 @@ namespace ISD {
unsigned IsInReg : 1; ///< Passed in register
unsigned IsSRet : 1; ///< Hidden struct-ret ptr
unsigned IsByVal : 1; ///< Struct passed by value
+ unsigned IsByRef : 1; ///< Passed in memory
unsigned IsNest : 1; ///< Nested fn static chain
unsigned IsReturned : 1; ///< Always returned
unsigned IsSplit : 1;
@@ -43,25 +44,31 @@ namespace ISD {
unsigned IsHva : 1; ///< HVA field for
unsigned IsHvaStart : 1; ///< HVA structure start
unsigned IsSecArgPass : 1; ///< Second argument
- unsigned ByValAlign : 4; ///< Log 2 of byval alignment
+ unsigned ByValOrByRefAlign : 4; ///< Log 2 of byval/byref alignment
unsigned OrigAlign : 5; ///< Log 2 of original alignment
unsigned IsInConsecutiveRegsLast : 1;
unsigned IsInConsecutiveRegs : 1;
unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate
unsigned IsPointer : 1;
- unsigned ByValSize; ///< Byval struct size
+ unsigned ByValOrByRefSize; ///< Byval or byref struct size
unsigned PointerAddrSpace; ///< Address space of pointer argument
+ /// Set the alignment used by byref or byval parameters.
+ void setAlignImpl(Align A) {
+ ByValOrByRefAlign = encode(A);
+ assert(getNonZeroByValAlign() == A && "bitfield overflow");
+ }
+
public:
ArgFlagsTy()
- : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
- IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0),
+ : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsByRef(0),
+ IsNest(0), IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0),
IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0),
- IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
- IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
- IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
+ IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValOrByRefAlign(0),
+ OrigAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
+ IsCopyElisionCandidate(0), IsPointer(0), ByValOrByRefSize(0),
PointerAddrSpace(0) {
static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big");
}
@@ -81,6 +88,9 @@ namespace ISD {
bool isByVal() const { return IsByVal; }
void setByVal() { IsByVal = 1; }
+ bool isByRef() const { return IsByRef; }
+ void setByRef() { IsByRef = 1; }
+
bool isInAlloca() const { return IsInAlloca; }
void setInAlloca() { IsInAlloca = 1; }
@@ -112,10 +122,12 @@ namespace ISD {
void setReturned() { IsReturned = 1; }
bool isInConsecutiveRegs() const { return IsInConsecutiveRegs; }
- void setInConsecutiveRegs() { IsInConsecutiveRegs = 1; }
+ void setInConsecutiveRegs(bool Flag = true) { IsInConsecutiveRegs = Flag; }
bool isInConsecutiveRegsLast() const { return IsInConsecutiveRegsLast; }
- void setInConsecutiveRegsLast() { IsInConsecutiveRegsLast = 1; }
+ void setInConsecutiveRegsLast(bool Flag = true) {
+ IsInConsecutiveRegsLast = Flag;
+ }
bool isSplit() const { return IsSplit; }
void setSplit() { IsSplit = 1; }
@@ -131,17 +143,22 @@ namespace ISD {
LLVM_ATTRIBUTE_DEPRECATED(unsigned getByValAlign() const,
"Use getNonZeroByValAlign() instead") {
- MaybeAlign A = decodeMaybeAlign(ByValAlign);
+ MaybeAlign A = decodeMaybeAlign(ByValOrByRefAlign);
return A ? A->value() : 0;
}
Align getNonZeroByValAlign() const {
- MaybeAlign A = decodeMaybeAlign(ByValAlign);
+ MaybeAlign A = decodeMaybeAlign(ByValOrByRefAlign);
assert(A && "ByValAlign must be defined");
return *A;
}
void setByValAlign(Align A) {
- ByValAlign = encode(A);
- assert(getNonZeroByValAlign() == A && "bitfield overflow");
+ assert(isByVal() && !isByRef());
+ setAlignImpl(A);
+ }
+
+ void setByRefAlign(Align A) {
+ assert(!isByVal() && isByRef());
+ setAlignImpl(A);
}
LLVM_ATTRIBUTE_DEPRECATED(unsigned getOrigAlign() const,
@@ -157,8 +174,23 @@ namespace ISD {
assert(getNonZeroOrigAlign() == A && "bitfield overflow");
}
- unsigned getByValSize() const { return ByValSize; }
- void setByValSize(unsigned S) { ByValSize = S; }
+ unsigned getByValSize() const {
+ assert(isByVal() && !isByRef());
+ return ByValOrByRefSize;
+ }
+ void setByValSize(unsigned S) {
+ assert(isByVal() && !isByRef());
+ ByValOrByRefSize = S;
+ }
+
+ unsigned getByRefSize() const {
+ assert(!isByVal() && isByRef());
+ return ByValOrByRefSize;
+ }
+ void setByRefSize(unsigned S) {
+ assert(!isByVal() && isByRef());
+ ByValOrByRefSize = S;
+ }
unsigned getPointerAddrSpace() const { return PointerAddrSpace; }
void setPointerAddrSpace(unsigned AS) { PointerAddrSpace = AS; }
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index d6580430daf7..792452f6e81d 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -14,6 +14,7 @@
#define LLVM_CODEGEN_TARGETFRAMELOWERING_H
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/TypeSize.h"
#include <vector>
namespace llvm {
@@ -26,7 +27,7 @@ namespace TargetStackID {
enum Value {
Default = 0,
SGPRSpill = 1,
- SVEVector = 2,
+ ScalableVector = 2,
NoAlloc = 255
};
}
@@ -297,8 +298,8 @@ public:
/// getFrameIndexReference - This method should return the base register
/// and offset used to reference a frame index location. The offset is
/// returned directly, and the base register is returned via FrameReg.
- virtual int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const;
+ virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const;
/// Same as \c getFrameIndexReference, except that the stack pointer (as
/// opposed to the frame pointer) will be the preferred value for \p
@@ -306,9 +307,10 @@ public:
/// use offsets from RSP. If \p IgnoreSPUpdates is true, the returned
/// offset is only guaranteed to be valid with respect to the value of SP at
/// the end of the prologue.
- virtual int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
- Register &FrameReg,
- bool IgnoreSPUpdates) const {
+ virtual StackOffset
+ getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
+ Register &FrameReg,
+ bool IgnoreSPUpdates) const {
// Always safe to dispatch to getFrameIndexReference.
return getFrameIndexReference(MF, FI, FrameReg);
}
@@ -316,8 +318,8 @@ public:
/// getNonLocalFrameIndexReference - This method returns the offset used to
/// reference a frame index location. The offset can be from either FP/BP/SP
/// based on which base register is returned by llvm.localaddress.
- virtual int getNonLocalFrameIndexReference(const MachineFunction &MF,
- int FI) const {
+ virtual StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF,
+ int FI) const {
// By default, dispatch to getFrameIndexReference. Interested targets can
// override this.
Register FrameReg;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index b3b2fa218627..36afdefd27b2 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOutliner.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/BranchProbability.h"
@@ -80,6 +81,15 @@ struct RegImmPair {
RegImmPair(Register Reg, int64_t Imm) : Reg(Reg), Imm(Imm) {}
};
+/// Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
+/// It holds the register values, the scale value and the displacement.
+struct ExtAddrMode {
+ Register BaseReg;
+ Register ScaledReg;
+ int64_t Scale;
+ int64_t Displacement;
+};
+
//---------------------------------------------------------------------------
///
/// TargetInstrInfo - Interface to description of machine instruction set
@@ -339,6 +349,12 @@ public:
unsigned &Size, unsigned &Offset,
const MachineFunction &MF) const;
+ /// Return true if the given instruction is terminator that is unspillable,
+ /// according to isUnspillableTerminatorImpl.
+ bool isUnspillableTerminator(const MachineInstr *MI) const {
+ return MI->isTerminator() && isUnspillableTerminatorImpl(MI);
+ }
+
/// Returns the size in bytes of the specified MachineInstr, or ~0U
/// when this function is not implemented by a target.
virtual unsigned getInstSizeInBytes(const MachineInstr &MI) const {
@@ -724,7 +740,7 @@ public:
return nullptr;
}
- /// Analyze the loop code, return true if it cannot be understoo. Upon
+ /// Analyze the loop code, return true if it cannot be understood. Upon
/// success, this function returns false and returns information about the
/// induction variable and compare instruction used at the end.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
@@ -771,7 +787,7 @@ public:
/// Second variant of isProfitableToIfCvt. This one
/// checks for the case where two basic blocks from true and false path
- /// of a if-then-else (diamond) are predicated on mutally exclusive
+ /// of a if-then-else (diamond) are predicated on mutually exclusive
/// predicates, where the probability of the true path being taken is given
/// by Probability, and Confidence is a measure of our confidence that it
/// will be properly predicted.
@@ -945,6 +961,17 @@ protected:
return None;
}
+ /// Return true if the given terminator MI is not expected to spill. This
+ /// sets the live interval as not spillable and adjusts phi node lowering to
+ /// not introduce copies after the terminator. Use with care, these are
+ /// currently used for hardware loop intrinsics in very controlled situations,
+ /// created prior to registry allocation in loops that only have single phi
+ /// users for the terminators value. They may run out of registers if not used
+ /// carefully.
+ virtual bool isUnspillableTerminatorImpl(const MachineInstr *MI) const {
+ return false;
+ }
+
public:
/// If the specific machine instruction is a instruction that moves/copies
/// value from one register to another register return destination and source
@@ -968,6 +995,15 @@ public:
return None;
}
+ /// Returns true if MI is an instruction that defines Reg to have a constant
+ /// value and the value is recorded in ImmVal. The ImmVal is a result that
+ /// should be interpreted as modulo size of Reg.
+ virtual bool getConstValDefinedInReg(const MachineInstr &MI,
+ const Register Reg,
+ int64_t &ImmVal) const {
+ return false;
+ }
+
/// Store the specified register of the given register class to the specified
/// stack frame index. The store instruction is to be added to the given
/// machine basic block before the specified machine instruction. If isKill
@@ -1041,9 +1077,23 @@ public:
/// faster sequence.
/// \param Root - Instruction that could be combined with one of its operands
/// \param Patterns - Vector of possible combination patterns
- virtual bool getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const;
+ virtual bool
+ getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const;
+
+ /// Return true if target supports reassociation of instructions in machine
+ /// combiner pass to reduce register pressure for a given BB.
+ virtual bool
+ shouldReduceRegisterPressure(MachineBasicBlock *MBB,
+ RegisterClassInfo *RegClassInfo) const {
+ return false;
+ }
+
+ /// Fix up the placeholder we may add in genAlternativeCodeSequence().
+ virtual void
+ finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const {}
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
@@ -1248,10 +1298,11 @@ public:
bool &OffsetIsScalable,
const TargetRegisterInfo *TRI) const;
- /// Get the base operands and byte offset of an instruction that reads/writes
- /// memory.
+ /// Get zero or more base operands and the byte offset of an instruction that
+ /// reads/writes memory. Note that there may be zero base operands if the
+ /// instruction accesses a constant address.
/// It returns false if MI does not read/write memory.
- /// It returns false if no base operands and offset was found.
+ /// It returns false if base operands and offset could not be determined.
/// It is not guaranteed to always recognize base operands and offsets in all
/// cases.
virtual bool getMemOperandsWithOffsetWidth(
@@ -1270,6 +1321,27 @@ public:
return false;
}
+ /// Target dependent implementation to get the values constituting the address
+ /// MachineInstr that is accessing memory. These values are returned as a
+ /// struct ExtAddrMode which contains all relevant information to make up the
+ /// address.
+ virtual Optional<ExtAddrMode>
+ getAddrModeFromMemoryOp(const MachineInstr &MemI,
+ const TargetRegisterInfo *TRI) const {
+ return None;
+ }
+
+ /// Returns true if MI's Def is NullValueReg, and the MI
+ /// does not change the Zero value. i.e. cases such as rax = shr rax, X where
+ /// NullValueReg = rax. Note that if the NullValueReg is non-zero, this
+ /// function can return true even if becomes zero. Specifically cases such as
+ /// NullValueReg = shl NullValueReg, 63.
+ virtual bool preservesZeroValueInReg(const MachineInstr *MI,
+ const Register NullValueReg,
+ const TargetRegisterInfo *TRI) const {
+ return false;
+ }
+
/// If the instruction is an increment of a constant value, return the amount.
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const {
return false;
@@ -1304,6 +1376,11 @@ public:
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
+ /// Insert noops into the instruction stream at the specified point.
+ virtual void insertNoops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned Quantity) const;
+
/// Return the noop instruction to use for a noop.
virtual void getNoop(MCInst &NopInst) const;
@@ -1355,8 +1432,13 @@ public:
/// If the specified instruction defines any predicate
/// or condition code register(s) used for predication, returns true as well
/// as the definition predicate(s) by reference.
- virtual bool DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const {
+ /// SkipDead should be set to false at any point that dead
+ /// predicate instructions should be considered as being defined.
+ /// A dead predicate instruction is one that is guaranteed to be removed
+ /// after a call to PredicateInstruction.
+ virtual bool ClobbersPredicate(MachineInstr &MI,
+ std::vector<MachineOperand> &Pred,
+ bool SkipDead) const {
return false;
}
@@ -1442,7 +1524,7 @@ public:
/// the machine instruction generated due to folding.
virtual MachineInstr *optimizeLoadInstr(MachineInstr &MI,
const MachineRegisterInfo *MRI,
- unsigned &FoldAsLoadDefReg,
+ Register &FoldAsLoadDefReg,
MachineInstr *&DefMI) const {
return nullptr;
}
@@ -1627,7 +1709,7 @@ public:
/// This hook works similarly to getPartialRegUpdateClearance, except that it
/// does not take an operand index. Instead sets \p OpNum to the index of the
/// unused register.
- virtual unsigned getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
+ virtual unsigned getUndefRegClearance(const MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const {
// The default implementation returns 0 for no undef register dependency.
return 0;
@@ -1688,6 +1770,21 @@ public:
return 5;
}
+ /// Return the maximal number of alias checks on memory operands. For
+ /// instructions with more than one memory operands, the alias check on a
+ /// single MachineInstr pair has quadratic overhead and results in
+ /// unacceptable performance in the worst case. The limit here is to clamp
+ /// that maximal checks performed. Usually, that's the product of memory
+ /// operand numbers from that pair of MachineInstr to be checked. For
+ /// instance, with two MachineInstrs with 4 and 5 memory operands
+ /// correspondingly, a total of 20 checks are required. With this limit set to
+ /// 16, their alias check is skipped. We choose to limit the product instead
+ /// of the individual instruction as targets may have special MachineInstrs
+ /// with a considerably high number of memory operands, such as `ldm` in ARM.
+ /// Setting this limit per MachineInstr would result in either too high
+ /// overhead or too rigid restriction.
+ virtual unsigned getMemOperandAACheckLimit() const { return 16; }
+
/// Return an array that contains the ids of the target indices (used for the
/// TargetIndex machine operand) and their names.
///
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h
index 06f2b3ca38ea..40115fbd2f15 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -278,6 +278,7 @@ public:
bool IsSRet : 1;
bool IsNest : 1;
bool IsByVal : 1;
+ bool IsByRef : 1;
bool IsInAlloca : 1;
bool IsPreallocated : 1;
bool IsReturned : 1;
@@ -290,7 +291,7 @@ public:
ArgListEntry()
: IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
- IsNest(false), IsByVal(false), IsInAlloca(false),
+ IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false),
IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
IsSwiftError(false), IsCFGuardTarget(false) {}
@@ -374,6 +375,13 @@ public:
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
bool LegalTypes = true) const;
+ /// Return the preferred type to use for a shift opcode, given the shifted
+ /// amount type is \p ShiftValueTy.
+ LLVM_READONLY
+ virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const {
+ return ShiftValueTy;
+ }
+
/// Returns the type to be used for the index operand of:
/// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
/// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
@@ -419,7 +427,7 @@ public:
virtual TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const {
// The default action for one element vectors is to scalarize
- if (VT.getVectorElementCount() == 1)
+ if (VT.getVectorElementCount().isScalar())
return TypeScalarizeVector;
// The default action for an odd-width vector is to widen.
if (!VT.isPow2VectorType())
@@ -597,6 +605,12 @@ public:
return false;
}
+ /// Return the maximum number of "x & (x - 1)" operations that can be done
+ /// instead of deferring to a custom CTPOP.
+ virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const {
+ return 1;
+ }
+
/// Return true if instruction generated for equality comparison is folded
/// with instruction generated for signed comparison.
virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
@@ -1085,8 +1099,13 @@ public:
/// Return true if the specified operation is legal on this target or can be
/// made legal with custom lowering. This is used to help guide high-level
- /// lowering decisions.
- bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
+ /// lowering decisions. LegalOnly is an optional convenience for code paths
+ /// traversed pre and post legalisation.
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ if (LegalOnly)
+ return isOperationLegal(Op, VT);
+
return (VT == MVT::Other || isTypeLegal(VT)) &&
(getOperationAction(Op, VT) == Legal ||
getOperationAction(Op, VT) == Custom);
@@ -1094,8 +1113,13 @@ public:
/// Return true if the specified operation is legal on this target or can be
/// made legal using promotion. This is used to help guide high-level lowering
- /// decisions.
- bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
+ /// decisions. LegalOnly is an optional convenience for code paths traversed
+ /// pre and post legalisation.
+ bool isOperationLegalOrPromote(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ if (LegalOnly)
+ return isOperationLegal(Op, VT);
+
return (VT == MVT::Other || isTypeLegal(VT)) &&
(getOperationAction(Op, VT) == Legal ||
getOperationAction(Op, VT) == Promote);
@@ -1103,8 +1127,13 @@ public:
/// Return true if the specified operation is legal on this target or can be
/// made legal with custom lowering or using promotion. This is used to help
- /// guide high-level lowering decisions.
- bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const {
+ /// guide high-level lowering decisions. LegalOnly is an optional convenience
+ /// for code paths traversed pre and post legalisation.
+ bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ if (LegalOnly)
+ return isOperationLegal(Op, VT);
+
return (VT == MVT::Other || isTypeLegal(VT)) &&
(getOperationAction(Op, VT) == Legal ||
getOperationAction(Op, VT) == Custom ||
@@ -1289,6 +1318,10 @@ public:
getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
}
+ // Returns true if VT is a legal index type for masked gathers/scatters
+ // on this target
+ virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; }
+
/// Return how the condition code should be treated: either it is legal, needs
/// to be expanded to some other code sequence, or the target has a custom
/// expander for it.
@@ -1625,6 +1658,11 @@ public:
const MachineMemOperand &MMO,
bool *Fast = nullptr) const;
+ /// LLT handling variant.
+ bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty,
+ const MachineMemOperand &MMO,
+ bool *Fast = nullptr) const;
+
/// Returns the target specific optimal type for load and store operations as
/// a result of memset, memcpy, and memmove lowering.
/// It returns EVT::Other if the type should be determined using generic
@@ -1663,13 +1701,9 @@ public:
virtual bool isJumpTableRelative() const;
- /// Return true if a mulh[s|u] node for a specific type is cheaper than
- /// a multiply followed by a shift. This is false by default.
- virtual bool isMulhCheaperThanMulShift(EVT Type) const { return false; }
-
/// If a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
- unsigned getStackPointerRegisterToSaveRestore() const {
+ Register getStackPointerRegisterToSaveRestore() const {
return StackPointerRegisterToSaveRestore;
}
@@ -1758,17 +1792,10 @@ public:
return "";
}
- /// Returns true if a cast between SrcAS and DestAS is a noop.
- virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
- return false;
- }
-
/// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
/// are happy to sink it into basic blocks. A cast may be free, but not
/// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
- virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
- return isNoopAddrSpaceCast(SrcAS, DestAS);
- }
+ virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const;
/// Return true if the pointer arguments to CI should be aligned by aligning
/// the object whose address is being passed. If so then MinSize is set to the
@@ -2758,6 +2785,10 @@ public:
return false;
}
+ /// Does this target require the clearing of high-order bits in a register
+ /// passed to the fp16 to fp conversion library function.
+ virtual bool shouldKeepZExtForFP16Conv() const { return false; }
+
//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
@@ -3090,16 +3121,6 @@ protected:
MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- /// Replace/modify the XRay custom event operands with target-dependent
- /// details.
- MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI,
- MachineBasicBlock *MBB) const;
-
- /// Replace/modify the XRay typed event operands with target-dependent
- /// details.
- MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
- MachineBasicBlock *MBB) const;
-
bool IsStrictFPEnabled;
};
@@ -4188,7 +4209,7 @@ public:
// Lower custom output constraints. If invalid, return SDValue().
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
- SDLoc DL,
+ const SDLoc &DL,
const AsmOperandInfo &OpInfo,
SelectionDAG &DAG) const;
@@ -4255,6 +4276,20 @@ public:
return SDValue();
}
+ /// Return a target-dependent comparison result if the input operand is
+ /// suitable for use with a square root estimate calculation. For example, the
+ /// comparison may check if the operand is NAN, INF, zero, normal, etc. The
+ /// result should be used as the condition operand for a select or branch.
+ virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
+ const DenormalMode &Mode) const;
+
+ /// Return a target-dependent result if the input operand is not suitable for
+ /// use with a square root estimate calculation.
+ virtual SDValue getSqrtResultForDenormInput(SDValue Operand,
+ SelectionDAG &DAG) const {
+ return DAG.getConstantFP(0.0, SDLoc(Operand), Operand.getValueType());
+ }
+
//===--------------------------------------------------------------------===//
// Legalization utility functions
//
@@ -4269,7 +4304,7 @@ public:
/// \param RL Low bits of the RHS of the MUL. See LL for meaning
/// \param RH High bits of the RHS of the MUL. See LL for meaning.
/// \returns true if the node has been expanded, false if it has not
- bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS,
+ bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS,
SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
SelectionDAG &DAG, MulExpansionKind Kind,
SDValue LL = SDValue(), SDValue LH = SDValue(),
@@ -4297,9 +4332,12 @@ public:
/// Expand rotations.
/// \param N Node to expand
+ /// \param AllowVectorOps expand vector rotate, this should only be performed
+ /// if the legalization is happening outside of LegalizeVectorOps
/// \param Result output after conversion
/// \returns True, if the expansion was successful, false otherwise
- bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ bool expandROT(SDNode *N, bool AllowVectorOps, SDValue &Result,
+ SelectionDAG &DAG) const;
/// Expand float(f32) to SINT(i64) conversion
/// \param N Node to expand
@@ -4326,6 +4364,11 @@ public:
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
+ /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
+ /// \param N Node to expand
+ /// \returns The expansion result
+ SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const;
+
/// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
@@ -4352,8 +4395,10 @@ public:
/// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
/// \param N Node to expand
/// \param Result output after conversion
+ /// \param IsNegative indicate negated abs
/// \returns True, if the expansion was successful, false otherwise
- bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG,
+ bool IsNegative = false) const;
/// Turn load of vector type into a load of the individual elements.
/// \param LD load to expand
@@ -4393,10 +4438,18 @@ public:
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
SDValue Index) const;
+ /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This
+ /// method accepts integers as its arguments.
+ SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const;
+
/// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
/// method accepts integers as its arguments.
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
+ /// Method for building the DAG expansion of ISD::[US]SHLSAT. This
+ /// method accepts integers as its arguments.
+ SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const;
+
/// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This
/// method accepts integers as its arguments.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
@@ -4428,6 +4481,9 @@ public:
/// only the first Count elements of the vector are used.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
+ /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
+ SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;
+
/// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
/// Returns true if the expansion was successful.
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const;
@@ -4482,6 +4538,10 @@ public:
// combiner can fold the new nodes.
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
+ /// Give targets the chance to reduce the number of distinct addresing modes.
+ ISD::MemIndexType getCanonicalIndexType(ISD::MemIndexType IndexType,
+ EVT MemVT, SDValue Offsets) const;
+
private:
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, DAGCombinerInfo &DCI) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 6e2c0973e354..31e08b7d1e63 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -21,6 +21,7 @@ namespace llvm {
class GlobalValue;
class MachineModuleInfo;
+class MachineFunction;
class MCContext;
class MCExpr;
class MCSection;
@@ -35,10 +36,9 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
protected:
MCSymbolRefExpr::VariantKind PLTRelativeVariantKind =
MCSymbolRefExpr::VK_None;
- const TargetMachine *TM = nullptr;
public:
- TargetLoweringObjectFileELF() = default;
+ TargetLoweringObjectFileELF();
~TargetLoweringObjectFileELF() override = default;
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
@@ -63,6 +63,8 @@ public:
MCSection *getSectionForJumpTable(const Function &F,
const TargetMachine &TM) const override;
+ MCSection *getSectionForLSDA(const Function &F,
+ const TargetMachine &TM) const override;
MCSection *
getSectionForMachineBasicBlock(const Function &F,
@@ -95,6 +97,9 @@ public:
const GlobalValue *RHS,
const TargetMachine &TM) const override;
+ const MCExpr *lowerDSOLocalEquivalent(const DSOLocalEquivalent *Equiv,
+ const TargetMachine &TM) const override;
+
MCSection *getSectionForCommandLines() const override;
};
@@ -143,6 +148,7 @@ public:
class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
mutable unsigned NextUniqueID = 0;
+ const TargetMachine *TM = nullptr;
public:
~TargetLoweringObjectFileCOFF() override = default;
@@ -168,12 +174,6 @@ public:
MCSection *getStaticDtorSection(unsigned Priority,
const MCSymbol *KeySym) const override;
- void emitLinkerFlagsForGlobal(raw_ostream &OS,
- const GlobalValue *GV) const override;
-
- void emitLinkerFlagsForUsed(raw_ostream &OS,
- const GlobalValue *GV) const override;
-
const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
const GlobalValue *RHS,
const TargetMachine &TM) const override;
@@ -183,6 +183,9 @@ public:
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
Align &Alignment) const override;
+
+private:
+ void emitLinkerDirectives(MCStreamer &Streamer, Module &M) const;
};
class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile {
@@ -217,6 +220,10 @@ public:
TargetLoweringObjectFileXCOFF() = default;
~TargetLoweringObjectFileXCOFF() override = default;
+ static bool ShouldEmitEHBlock(const MachineFunction *MF);
+
+ static MCSymbol *getEHInfoTableSymbol(const MachineFunction *MF);
+
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
@@ -246,12 +253,13 @@ public:
const Constant *C,
Align &Alignment) const override;
- static XCOFF::StorageClass getStorageClassForGlobal(const GlobalObject *GO);
+ static XCOFF::StorageClass getStorageClassForGlobal(const GlobalValue *GV);
MCSection *
getSectionForFunctionDescriptor(const Function *F,
const TargetMachine &TM) const override;
- MCSection *getSectionForTOCEntry(const MCSymbol *Sym) const override;
+ MCSection *getSectionForTOCEntry(const MCSymbol *Sym,
+ const TargetMachine &TM) const override;
/// For external functions, this will always return a function descriptor
/// csect.
@@ -263,7 +271,7 @@ public:
MCSymbol *getTargetSymbol(const GlobalValue *GV,
const TargetMachine &TM) const override;
- MCSymbol *getFunctionEntryPointSymbol(const Function *F,
+ MCSymbol *getFunctionEntryPointSymbol(const GlobalValue *Func,
const TargetMachine &TM) const override;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetPassConfig.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetPassConfig.h
index a18c8b16bf1c..b4787710379f 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -25,6 +25,7 @@ struct MachineSchedContext;
class PassConfigImpl;
class ScheduleDAGInstrs;
class CSEConfigBase;
+class PassInstrumentationCallbacks;
// The old pass manager infrastructure is hidden in a legacy namespace now.
namespace legacy {
@@ -187,7 +188,7 @@ public:
/// Insert InsertedPassID pass after TargetPassID pass.
void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter = true, bool PrintAfter = true);
+ bool VerifyAfter = true);
/// Allow the target to enable a specific standard pass by default.
void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
@@ -313,14 +314,17 @@ public:
/// Add a pass to remove debug info from the MIR.
void addStripDebugPass();
+ /// Add a pass to check synthesized debug info for MIR.
+ void addCheckDebugPass();
+
/// Add standard passes before a pass that's about to be added. For example,
/// the DebugifyMachineModulePass if it is enabled.
void addMachinePrePasses(bool AllowDebugify = true);
/// Add standard passes after a pass that has just been added. For example,
/// the MachineVerifier if it is enabled.
- void addMachinePostPasses(const std::string &Banner, bool AllowPrint = true,
- bool AllowVerify = true, bool AllowStrip = true);
+ void addMachinePostPasses(const std::string &Banner, bool AllowVerify = true,
+ bool AllowStrip = true);
/// Check whether or not GlobalISel should abort on error.
/// When this is disabled, GlobalISel will fall back on SDISel instead of
@@ -441,32 +445,30 @@ protected:
/// Add a CodeGen pass at this point in the pipeline after checking overrides.
/// Return the pass that was added, or zero if no pass was added.
- /// @p printAfter if true and adding a machine function pass add an extra
- /// machine printer pass afterwards
/// @p verifyAfter if true and adding a machine function pass add an extra
/// machine verification pass afterwards.
- AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true,
- bool printAfter = true);
+ AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true);
/// Add a pass to the PassManager if that pass is supposed to be run, as
/// determined by the StartAfter and StopAfter options. Takes ownership of the
/// pass.
- /// @p printAfter if true and adding a machine function pass add an extra
- /// machine printer pass afterwards
/// @p verifyAfter if true and adding a machine function pass add an extra
/// machine verification pass afterwards.
- void addPass(Pass *P, bool verifyAfter = true, bool printAfter = true);
+ void addPass(Pass *P, bool verifyAfter = true);
/// addMachinePasses helper to create the target-selected or overriden
/// regalloc pass.
virtual FunctionPass *createRegAllocPass(bool Optimized);
- /// Add core register alloator passes which do the actual register assignment
+ /// Add core register allocator passes which do the actual register assignment
/// and rewriting. \returns true if any passes were added.
- virtual bool addRegAssignmentFast();
- virtual bool addRegAssignmentOptimized();
+ virtual bool addRegAssignAndRewriteFast();
+ virtual bool addRegAssignAndRewriteOptimized();
};
+void registerCodeGenCallback(PassInstrumentationCallbacks &PIC,
+ LLVMTargetMachine &);
+
} // end namespace llvm
#endif // LLVM_CODEGEN_TARGETPASSCONFIG_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index d921c4c9028b..8790e2f09eb6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -34,6 +34,7 @@
namespace llvm {
class BitVector;
+class DIExpression;
class LiveRegMatrix;
class MachineFunction;
class MachineInstr;
@@ -87,22 +88,21 @@ public:
/// Return true if the specified register is included in this register class.
/// This does not include virtual registers.
- bool contains(unsigned Reg) const {
+ bool contains(Register Reg) const {
/// FIXME: Historically this function has returned false when given vregs
/// but it should probably only receive physical registers
- if (!Register::isPhysicalRegister(Reg))
+ if (!Reg.isPhysical())
return false;
- return MC->contains(Reg);
+ return MC->contains(Reg.asMCReg());
}
/// Return true if both registers are in this class.
- bool contains(unsigned Reg1, unsigned Reg2) const {
+ bool contains(Register Reg1, Register Reg2) const {
/// FIXME: Historically this function has returned false when given a vregs
/// but it should probably only receive physical registers
- if (!Register::isPhysicalRegister(Reg1) ||
- !Register::isPhysicalRegister(Reg2))
+ if (!Reg1.isPhysical() || !Reg2.isPhysical())
return false;
- return MC->contains(Reg1, Reg2);
+ return MC->contains(Reg1.asMCReg(), Reg2.asMCReg());
}
/// Return the cost of copying a value between two registers in this class.
@@ -386,12 +386,12 @@ public:
/// The registers may be virtual registers.
bool regsOverlap(Register regA, Register regB) const {
if (regA == regB) return true;
- if (regA.isVirtual() || regB.isVirtual())
+ if (!regA.isPhysical() || !regB.isPhysical())
return false;
// Regunits are numerically ordered. Find a common unit.
- MCRegUnitIterator RUA(regA, this);
- MCRegUnitIterator RUB(regB, this);
+ MCRegUnitIterator RUA(regA.asMCReg(), this);
+ MCRegUnitIterator RUB(regB.asMCReg(), this);
do {
if (*RUA == *RUB) return true;
if (*RUA < *RUB) ++RUA;
@@ -401,9 +401,9 @@ public:
}
/// Returns true if Reg contains RegUnit.
- bool hasRegUnit(MCRegister Reg, unsigned RegUnit) const {
+ bool hasRegUnit(MCRegister Reg, Register RegUnit) const {
for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units)
- if (*Units == RegUnit)
+ if (Register(*Units) == RegUnit)
return true;
return false;
}
@@ -415,6 +415,16 @@ public:
virtual Register lookThruCopyLike(Register SrcReg,
const MachineRegisterInfo *MRI) const;
+ /// Find the original SrcReg unless it is the target of a copy-like operation,
+ /// in which case we chain backwards through all such operations to the
+ /// ultimate source register. If a physical register is encountered, we stop
+ /// the search.
+ /// Return the original SrcReg if all the definitions in the chain only have
+ /// one user and not a physical register.
+ virtual Register
+ lookThruSingleUseCopyChain(Register SrcReg,
+ const MachineRegisterInfo *MRI) const;
+
/// Return a null-terminated list of all of the callee-saved registers on
/// this target. The register should be in the order of desired callee-save
/// stack frame offset. The first register is closest to the incoming stack
@@ -449,6 +459,13 @@ public:
return nullptr;
}
+ /// Return a register mask for the registers preserved by the unwinder,
+ /// or nullptr if no custom mask is needed.
+ virtual const uint32_t *
+ getCustomEHPadPreservedMask(const MachineFunction &MF) const {
+ return nullptr;
+ }
+
/// Return a register mask that clobbers everything.
virtual const uint32_t *getNoPreservedMask() const {
llvm_unreachable("target does not provide no preserved mask");
@@ -894,11 +911,11 @@ public:
return false;
}
- /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
- /// before insertion point I.
- virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB,
- Register BaseReg, int FrameIdx,
- int64_t Offset) const {
+ /// Insert defining instruction(s) for a pointer to FrameIdx before
+ /// insertion point I. Return materialized frame pointer.
+ virtual Register materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ int FrameIdx,
+ int64_t Offset) const {
llvm_unreachable("materializeFrameBaseRegister does not exist on this "
"target");
}
@@ -917,6 +934,15 @@ public:
llvm_unreachable("isFrameOffsetLegal does not exist on this target");
}
+ /// Gets the DWARF expression opcodes for \p Offset.
+ virtual void getOffsetOpcodes(const StackOffset &Offset,
+ SmallVectorImpl<uint64_t> &Ops) const;
+
+ /// Prepends a DWARF expression for \p Offset to DIExpression \p Expr.
+ DIExpression *
+ prependOffsetExpression(const DIExpression *Expr, unsigned PrependFlags,
+ const StackOffset &Offset) const;
+
/// Spill the register so it can be used by the register scavenger.
/// Return true if the register was spilled, false otherwise.
/// If this function does not spill the register, the scavenger
@@ -970,6 +996,36 @@ public:
virtual bool shouldRegionSplitForVirtReg(const MachineFunction &MF,
const LiveInterval &VirtReg) const;
+ /// Last chance recoloring has a high compile time cost especially for
+ /// targets with a lot of registers.
+ /// This method is used to decide whether or not \p VirtReg should
+ /// go through this expensive heuristic.
+ /// When this target hook is hit, by returning false, there is a high
+ /// chance that the register allocation will fail altogether (usually with
+ /// "ran out of registers").
+ /// That said, this error usually points to another problem in the
+ /// optimization pipeline.
+ virtual bool
+ shouldUseLastChanceRecoloringForVirtReg(const MachineFunction &MF,
+ const LiveInterval &VirtReg) const {
+ return true;
+ }
+
+ /// Deferred spilling delays the spill insertion of a virtual register
+ /// after every other allocation. By deferring the spilling, it is
+ /// sometimes possible to eliminate that spilling altogether because
+ /// something else could have been eliminated, thus leaving some space
+ /// for the virtual register.
+ /// However, this comes with a compile time impact because it adds one
+ /// more stage to the greedy register allocator.
+ /// This method is used to decide whether \p VirtReg should use the deferred
+ /// spilling stage instead of being spilled right away.
+ virtual bool
+ shouldUseDeferredSpillingForVirtReg(const MachineFunction &MF,
+ const LiveInterval &VirtReg) const {
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
/// Debug information queries.
@@ -994,7 +1050,7 @@ public:
/// Returns the physical register number of sub-register "Index"
/// for physical register RegNo. Return zero if the sub-register does not
/// exist.
- inline Register getSubReg(MCRegister Reg, unsigned Idx) const {
+ inline MCRegister getSubReg(MCRegister Reg, unsigned Idx) const {
return static_cast<const MCRegisterInfo *>(this)->getSubReg(Reg, Idx);
}
};
@@ -1146,8 +1202,8 @@ public:
// This is useful when building IndexedMaps keyed on virtual registers
struct VirtReg2IndexFunctor {
- using argument_type = unsigned;
- unsigned operator()(unsigned Reg) const {
+ using argument_type = Register;
+ unsigned operator()(Register Reg) const {
return Register::virtReg2Index(Reg);
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index e0dfd9c8bbc5..3fac2f688dd8 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -58,8 +58,8 @@ class Triple;
///
class TargetSubtargetInfo : public MCSubtargetInfo {
protected: // Can only create subclasses...
- TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS,
- ArrayRef<SubtargetFeatureKV> PF,
+ TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, ArrayRef<SubtargetFeatureKV> PF,
ArrayRef<SubtargetSubTypeKV> PD,
const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TileShapeInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TileShapeInfo.h
new file mode 100644
index 000000000000..031d23555b7e
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TileShapeInfo.h
@@ -0,0 +1,97 @@
+//===- llvm/CodeGen/TileShapeInfo.h - ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Shape utility for AMX.
+/// AMX hardware requires to config the shape of tile data register before use.
+/// The 2D shape includes row and column. In AMX intrinsics interface the shape
+/// is passed as 1st and 2nd parameter and they are lowered as the 1st and 2nd
+/// machine operand of AMX pseudo instructions. ShapeT class is to facilitate
+/// tile config and register allocator. The row and column are machine operand
+/// of AMX pseudo instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TILESHAPEINFO_H
+#define LLVM_CODEGEN_TILESHAPEINFO_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include <utility>
+
+namespace llvm {
+
+class ShapeT {
+public:
+ ShapeT(MachineOperand *Row, MachineOperand *Col,
+ const MachineRegisterInfo *MRI = nullptr)
+ : Row(Row), Col(Col) {
+ if (MRI)
+ deduceImm(MRI);
+ }
+ ShapeT()
+ : Row(nullptr), Col(nullptr), RowImm(InvalidImmShape),
+ ColImm(InvalidImmShape) {}
+ bool operator==(const ShapeT &Shape) {
+ MachineOperand *R = Shape.Row;
+ MachineOperand *C = Shape.Col;
+ if (!R || !C)
+ return false;
+ if (!Row || !Col)
+ return false;
+ if (Row->getReg() == R->getReg() && Col->getReg() == C->getReg())
+ return true;
+ if ((RowImm != InvalidImmShape) && (ColImm != InvalidImmShape))
+ return RowImm == Shape.getRowImm() && ColImm == Shape.getColImm();
+ return false;
+ }
+
+ bool operator!=(const ShapeT &Shape) { return !(*this == Shape); }
+
+ MachineOperand *getRow() const { return Row; }
+
+ MachineOperand *getCol() const { return Col; }
+
+ int64_t getRowImm() const { return RowImm; }
+
+ int64_t getColImm() const { return ColImm; }
+
+ bool isValid() { return (Row != nullptr) && (Col != nullptr); }
+
+ void deduceImm(const MachineRegisterInfo *MRI) {
+ // All def must be the same value, otherwise it is invalid MIs.
+ // Find the immediate.
+ // TODO copy propagation.
+ auto GetImm = [&](Register Reg) {
+ int64_t Imm = InvalidImmShape;
+ for (const MachineOperand &DefMO : MRI->def_operands(Reg)) {
+ const auto *MI = DefMO.getParent();
+ if (MI->isMoveImmediate()) {
+ Imm = MI->getOperand(1).getImm();
+ break;
+ }
+ }
+ return Imm;
+ };
+ RowImm = GetImm(Row->getReg());
+ ColImm = GetImm(Col->getReg());
+ }
+
+private:
+ static constexpr int64_t InvalidImmShape = -1;
+ MachineOperand *Row;
+ MachineOperand *Col;
+ int64_t RowImm;
+ int64_t ColImm;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h
index db8161caf7d2..888b83d6f736 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -92,26 +92,17 @@ namespace llvm {
/// with the element type converted to an integer type with the same
/// bitwidth.
EVT changeVectorElementTypeToInteger() const {
- if (!isSimple())
- return changeExtendedVectorElementTypeToInteger();
- MVT EltTy = getSimpleVT().getVectorElementType();
- unsigned BitWidth = EltTy.getSizeInBits();
- MVT IntTy = MVT::getIntegerVT(BitWidth);
- MVT VecTy = MVT::getVectorVT(IntTy, getVectorElementCount());
- assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
- "Simple vector VT not representable by simple integer vector VT!");
- return VecTy;
+ if (isSimple())
+ return getSimpleVT().changeVectorElementTypeToInteger();
+ return changeExtendedVectorElementTypeToInteger();
}
/// Return a VT for a vector type whose attributes match ourselves
/// with the exception of the element type that is chosen by the caller.
EVT changeVectorElementType(EVT EltVT) const {
- if (!isSimple())
- return changeExtendedVectorElementType(EltVT);
- MVT VecTy = MVT::getVectorVT(EltVT.V, getVectorElementCount());
- assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
- "Simple vector VT not representable by simple integer vector VT!");
- return VecTy;
+ if (isSimple() && EltVT.isSimple())
+ return getSimpleVT().changeVectorElementType(EltVT.getSimpleVT());
+ return changeExtendedVectorElementType(EltVT);
}
/// Return the type converted to an equivalently sized integer or vector
@@ -122,8 +113,7 @@ namespace llvm {
return changeVectorElementTypeToInteger();
if (isSimple())
- return MVT::getIntegerVT(getSizeInBits());
-
+ return getSimpleVT().changeTypeToInteger();
return changeExtendedTypeToInteger();
}
@@ -214,9 +204,7 @@ namespace llvm {
}
/// Return true if the bit size is a multiple of 8.
- bool isByteSized() const {
- return getSizeInBits().isByteSized();
- }
+ bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); }
/// Return true if the size is a power-of-two number of bytes.
bool isRound() const {
@@ -232,28 +220,58 @@ namespace llvm {
return getSizeInBits() == VT.getSizeInBits();
}
+ /// Return true if we know at compile time this has more bits than VT.
+ bool knownBitsGT(EVT VT) const {
+ return TypeSize::isKnownGT(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has more than or the same
+ /// bits as VT.
+ bool knownBitsGE(EVT VT) const {
+ return TypeSize::isKnownGE(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has fewer bits than VT.
+ bool knownBitsLT(EVT VT) const {
+ return TypeSize::isKnownLT(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has fewer than or the same
+ /// bits as VT.
+ bool knownBitsLE(EVT VT) const {
+ return TypeSize::isKnownLE(getSizeInBits(), VT.getSizeInBits());
+ }
+
/// Return true if this has more bits than VT.
bool bitsGT(EVT VT) const {
if (EVT::operator==(VT)) return false;
- return getSizeInBits() > VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsGT(VT);
}
/// Return true if this has no less bits than VT.
bool bitsGE(EVT VT) const {
if (EVT::operator==(VT)) return true;
- return getSizeInBits() >= VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsGE(VT);
}
/// Return true if this has less bits than VT.
bool bitsLT(EVT VT) const {
if (EVT::operator==(VT)) return false;
- return getSizeInBits() < VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsLT(VT);
}
/// Return true if this has no more bits than VT.
bool bitsLE(EVT VT) const {
if (EVT::operator==(VT)) return true;
- return getSizeInBits() <= VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsLE(VT);
}
/// Return the SimpleValueType held in the specified simple EVT.
@@ -285,7 +303,7 @@ namespace llvm {
if (isScalableVector())
WithColor::warning()
<< "Possible incorrect use of EVT::getVectorNumElements() for "
- "scalable vector. Scalable flag may be dropped, use"
+ "scalable vector. Scalable flag may be dropped, use "
"EVT::getVectorElementCount() instead\n";
#endif
if (isSimple())
@@ -304,7 +322,7 @@ namespace llvm {
/// Given a vector type, return the minimum number of elements it contains.
unsigned getVectorMinNumElements() const {
- return getVectorElementCount().Min;
+ return getVectorElementCount().getKnownMinValue();
}
/// Return the size of the specified value type in bits.
@@ -318,8 +336,14 @@ namespace llvm {
return getExtendedSizeInBits();
}
- TypeSize getScalarSizeInBits() const {
- return getScalarType().getSizeInBits();
+ /// Return the size of the specified fixed width value type in bits. The
+ /// function will assert if the type is scalable.
+ uint64_t getFixedSizeInBits() const {
+ return getSizeInBits().getFixedSize();
+ }
+
+ uint64_t getScalarSizeInBits() const {
+ return getScalarType().getSizeInBits().getFixedSize();
}
/// Return the number of bytes overwritten by a store of the specified value
@@ -383,8 +407,17 @@ namespace llvm {
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const {
EVT EltVT = getVectorElementType();
auto EltCnt = getVectorElementCount();
- assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
- return EVT::getVectorVT(Context, EltVT, EltCnt / 2);
+ assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
+ return EVT::getVectorVT(Context, EltVT, EltCnt.divideCoefficientBy(2));
+ }
+
+ // Return a VT for a vector type with the same element type but
+ // double the number of elements. The type returned may be an
+ // extended type.
+ EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const {
+ EVT EltVT = getVectorElementType();
+ auto EltCnt = getVectorElementCount();
+ return EVT::getVectorVT(Context, EltVT, EltCnt * 2);
}
/// Returns true if the given vector is a power of 2.
@@ -398,7 +431,8 @@ namespace llvm {
EVT getPow2VectorType(LLVMContext &Context) const {
if (!isPow2VectorType()) {
ElementCount NElts = getVectorElementCount();
- NElts.Min = 1 << Log2_32_Ceil(NElts.Min);
+ unsigned NewMinCount = 1 << Log2_32_Ceil(NElts.getKnownMinValue());
+ NElts = ElementCount::get(NewMinCount, NElts.isScalable());
return EVT::getVectorVT(Context, getVectorElementType(), NElts);
}
else {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td
index c5eb87cf1d34..d13d0a7772e9 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -87,107 +87,116 @@ def v4i64 : ValueType<256, 60>; // 4 x i64 vector value
def v8i64 : ValueType<512, 61>; // 8 x i64 vector value
def v16i64 : ValueType<1024,62>; // 16 x i64 vector value
def v32i64 : ValueType<2048,63>; // 32 x i64 vector value
+def v64i64 : ValueType<4096,64>; // 64 x i64 vector value
+def v128i64: ValueType<8192,65>; // 128 x i64 vector value
+def v256i64: ValueType<16384,66>; // 256 x i64 vector value
+
+def v1i128 : ValueType<128, 67>; // 1 x i128 vector value
+
+def v2f16 : ValueType<32 , 68>; // 2 x f16 vector value
+def v3f16 : ValueType<48 , 69>; // 3 x f16 vector value
+def v4f16 : ValueType<64 , 70>; // 4 x f16 vector value
+def v8f16 : ValueType<128, 71>; // 8 x f16 vector value
+def v16f16 : ValueType<256, 72>; // 16 x f16 vector value
+def v32f16 : ValueType<512, 73>; // 32 x f16 vector value
+def v64f16 : ValueType<1024, 74>; // 64 x f16 vector value
+def v128f16 : ValueType<2048, 75>; // 128 x f16 vector value
+def v2bf16 : ValueType<32 , 76>; // 2 x bf16 vector value
+def v3bf16 : ValueType<48 , 77>; // 3 x bf16 vector value
+def v4bf16 : ValueType<64 , 78>; // 4 x bf16 vector value
+def v8bf16 : ValueType<128, 79>; // 8 x bf16 vector value
+def v16bf16 : ValueType<256, 80>; // 16 x bf16 vector value
+def v32bf16 : ValueType<512, 81>; // 32 x bf16 vector value
+def v64bf16 : ValueType<1024, 82>; // 64 x bf16 vector value
+def v128bf16 : ValueType<2048, 83>; // 128 x bf16 vector value
+def v1f32 : ValueType<32 , 84>; // 1 x f32 vector value
+def v2f32 : ValueType<64 , 85>; // 2 x f32 vector value
+def v3f32 : ValueType<96 , 86>; // 3 x f32 vector value
+def v4f32 : ValueType<128, 87>; // 4 x f32 vector value
+def v5f32 : ValueType<160, 88>; // 5 x f32 vector value
+def v8f32 : ValueType<256, 89>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 90>; // 16 x f32 vector value
+def v32f32 : ValueType<1024, 91>; // 32 x f32 vector value
+def v64f32 : ValueType<2048, 92>; // 64 x f32 vector value
+def v128f32 : ValueType<4096, 93>; // 128 x f32 vector value
+def v256f32 : ValueType<8182, 94>; // 256 x f32 vector value
+def v512f32 : ValueType<16384, 95>; // 512 x f32 vector value
+def v1024f32 : ValueType<32768, 96>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 97>; // 2048 x f32 vector value
+def v1f64 : ValueType<64, 98>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 99>; // 2 x f64 vector value
+def v4f64 : ValueType<256, 100>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 101>; // 8 x f64 vector value
+def v16f64 : ValueType<1024, 102>; // 16 x f64 vector value
+def v32f64 : ValueType<2048, 103>; // 32 x f64 vector value
+def v64f64 : ValueType<4096, 104>; // 64 x f64 vector value
+def v128f64 : ValueType<8192, 105>; // 128 x f64 vector value
+def v256f64 : ValueType<16384, 106>; // 256 x f64 vector value
+
+def nxv1i1 : ValueType<1, 107>; // n x 1 x i1 vector value
+def nxv2i1 : ValueType<2, 108>; // n x 2 x i1 vector value
+def nxv4i1 : ValueType<4, 109>; // n x 4 x i1 vector value
+def nxv8i1 : ValueType<8, 110>; // n x 8 x i1 vector value
+def nxv16i1 : ValueType<16, 111>; // n x 16 x i1 vector value
+def nxv32i1 : ValueType<32, 112>; // n x 32 x i1 vector value
+def nxv64i1 : ValueType<64,113>; // n x 64 x i1 vector value
+
+def nxv1i8 : ValueType<8, 114>; // n x 1 x i8 vector value
+def nxv2i8 : ValueType<16, 115>; // n x 2 x i8 vector value
+def nxv4i8 : ValueType<32, 116>; // n x 4 x i8 vector value
+def nxv8i8 : ValueType<64, 117>; // n x 8 x i8 vector value
+def nxv16i8 : ValueType<128, 118>; // n x 16 x i8 vector value
+def nxv32i8 : ValueType<256, 119>; // n x 32 x i8 vector value
+def nxv64i8 : ValueType<512, 120>; // n x 64 x i8 vector value
+
+def nxv1i16 : ValueType<16, 121>; // n x 1 x i16 vector value
+def nxv2i16 : ValueType<32, 122>; // n x 2 x i16 vector value
+def nxv4i16 : ValueType<64, 123>; // n x 4 x i16 vector value
+def nxv8i16 : ValueType<128, 124>; // n x 8 x i16 vector value
+def nxv16i16: ValueType<256, 125>; // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 126>; // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32, 127>; // n x 1 x i32 vector value
+def nxv2i32 : ValueType<64, 128>; // n x 2 x i32 vector value
+def nxv4i32 : ValueType<128, 129>; // n x 4 x i32 vector value
+def nxv8i32 : ValueType<256, 130>; // n x 8 x i32 vector value
+def nxv16i32: ValueType<512, 131>; // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,132>; // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64, 133>; // n x 1 x i64 vector value
+def nxv2i64 : ValueType<128, 134>; // n x 2 x i64 vector value
+def nxv4i64 : ValueType<256, 135>; // n x 4 x i64 vector value
+def nxv8i64 : ValueType<512, 136>; // n x 8 x i64 vector value
+def nxv16i64: ValueType<1024,137>; // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,138>; // n x 32 x i64 vector value
+
+def nxv1f16 : ValueType<32, 139>; // n x 1 x f16 vector value
+def nxv2f16 : ValueType<32 , 140>; // n x 2 x f16 vector value
+def nxv4f16 : ValueType<64 , 141>; // n x 4 x f16 vector value
+def nxv8f16 : ValueType<128, 142>; // n x 8 x f16 vector value
+def nxv16f16 : ValueType<256,143>; // n x 16 x f16 vector value
+def nxv32f16 : ValueType<512,144>; // n x 32 x f16 vector value
+def nxv2bf16 : ValueType<32 , 145>; // n x 2 x bf16 vector value
+def nxv4bf16 : ValueType<64 , 146>; // n x 4 x bf16 vector value
+def nxv8bf16 : ValueType<128, 147>; // n x 8 x bf16 vector value
+def nxv1f32 : ValueType<32 , 148>; // n x 1 x f32 vector value
+def nxv2f32 : ValueType<64 , 149>; // n x 2 x f32 vector value
+def nxv4f32 : ValueType<128, 150>; // n x 4 x f32 vector value
+def nxv8f32 : ValueType<256, 151>; // n x 8 x f32 vector value
+def nxv16f32 : ValueType<512, 152>; // n x 16 x f32 vector value
+def nxv1f64 : ValueType<64, 153>; // n x 1 x f64 vector value
+def nxv2f64 : ValueType<128, 154>; // n x 2 x f64 vector value
+def nxv4f64 : ValueType<256, 155>; // n x 4 x f64 vector value
+def nxv8f64 : ValueType<512, 156>; // n x 8 x f64 vector value
+
+def x86mmx : ValueType<64 , 157>; // X86 MMX value
+def FlagVT : ValueType<0 , 158>; // Pre-RA sched glue
+def isVoid : ValueType<0 , 159>; // Produces no value
+def untyped: ValueType<8 , 160>; // Produces an untyped value
+def funcref : ValueType<0 , 161>; // WebAssembly's funcref type
+def externref : ValueType<0 , 162>; // WebAssembly's externref type
+def x86amx : ValueType<8192, 163>; // X86 AMX value
-def v1i128 : ValueType<128, 64>; // 1 x i128 vector value
-
-def v2f16 : ValueType<32 , 65>; // 2 x f16 vector value
-def v3f16 : ValueType<48 , 66>; // 3 x f16 vector value
-def v4f16 : ValueType<64 , 67>; // 4 x f16 vector value
-def v8f16 : ValueType<128, 68>; // 8 x f16 vector value
-def v16f16 : ValueType<256, 69>; // 16 x f16 vector value
-def v32f16 : ValueType<512, 70>; // 32 x f16 vector value
-def v64f16 : ValueType<1024, 71>; // 64 x f16 vector value
-def v128f16 : ValueType<2048, 72>; // 128 x f16 vector value
-def v2bf16 : ValueType<32 , 73>; // 2 x bf16 vector value
-def v3bf16 : ValueType<48 , 74>; // 3 x bf16 vector value
-def v4bf16 : ValueType<64 , 75>; // 4 x bf16 vector value
-def v8bf16 : ValueType<128, 76>; // 8 x bf16 vector value
-def v16bf16 : ValueType<256, 77>; // 16 x bf16 vector value
-def v32bf16 : ValueType<512, 78>; // 32 x bf16 vector value
-def v64bf16 : ValueType<1024, 79>; // 64 x bf16 vector value
-def v128bf16 : ValueType<2048, 80>; // 128 x bf16 vector value
-def v1f32 : ValueType<32 , 81>; // 1 x f32 vector value
-def v2f32 : ValueType<64 , 82>; // 2 x f32 vector value
-def v3f32 : ValueType<96 , 83>; // 3 x f32 vector value
-def v4f32 : ValueType<128, 84>; // 4 x f32 vector value
-def v5f32 : ValueType<160, 85>; // 5 x f32 vector value
-def v8f32 : ValueType<256, 86>; // 8 x f32 vector value
-def v16f32 : ValueType<512, 87>; // 16 x f32 vector value
-def v32f32 : ValueType<1024, 88>; // 32 x f32 vector value
-def v64f32 : ValueType<2048, 89>; // 64 x f32 vector value
-def v128f32 : ValueType<4096, 90>; // 128 x f32 vector value
-def v256f32 : ValueType<8182, 91>; // 256 x f32 vector value
-def v512f32 : ValueType<16384, 92>; // 512 x f32 vector value
-def v1024f32 : ValueType<32768, 93>; // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 94>; // 2048 x f32 vector value
-def v1f64 : ValueType<64, 95>; // 1 x f64 vector value
-def v2f64 : ValueType<128, 96>; // 2 x f64 vector value
-def v4f64 : ValueType<256, 97>; // 4 x f64 vector value
-def v8f64 : ValueType<512, 98>; // 8 x f64 vector value
-def v16f64 : ValueType<1024, 99>; // 16 x f64 vector value
-def v32f64 : ValueType<2048, 100>; // 32 x f64 vector value
-
-def nxv1i1 : ValueType<1, 101>; // n x 1 x i1 vector value
-def nxv2i1 : ValueType<2, 102>; // n x 2 x i1 vector value
-def nxv4i1 : ValueType<4, 103>; // n x 4 x i1 vector value
-def nxv8i1 : ValueType<8, 104>; // n x 8 x i1 vector value
-def nxv16i1 : ValueType<16, 105>; // n x 16 x i1 vector value
-def nxv32i1 : ValueType<32, 106>; // n x 32 x i1 vector value
-def nxv64i1 : ValueType<64,107>; // n x 64 x i1 vector value
-
-def nxv1i8 : ValueType<8, 108>; // n x 1 x i8 vector value
-def nxv2i8 : ValueType<16, 109>; // n x 2 x i8 vector value
-def nxv4i8 : ValueType<32, 110>; // n x 4 x i8 vector value
-def nxv8i8 : ValueType<64, 111>; // n x 8 x i8 vector value
-def nxv16i8 : ValueType<128, 112>; // n x 16 x i8 vector value
-def nxv32i8 : ValueType<256, 113>; // n x 32 x i8 vector value
-def nxv64i8 : ValueType<512, 114>; // n x 64 x i8 vector value
-
-def nxv1i16 : ValueType<16, 115>; // n x 1 x i16 vector value
-def nxv2i16 : ValueType<32, 116>; // n x 2 x i16 vector value
-def nxv4i16 : ValueType<64, 117>; // n x 4 x i16 vector value
-def nxv8i16 : ValueType<128, 118>; // n x 8 x i16 vector value
-def nxv16i16: ValueType<256, 119>; // n x 16 x i16 vector value
-def nxv32i16: ValueType<512, 120>; // n x 32 x i16 vector value
-
-def nxv1i32 : ValueType<32, 121>; // n x 1 x i32 vector value
-def nxv2i32 : ValueType<64, 122>; // n x 2 x i32 vector value
-def nxv4i32 : ValueType<128, 123>; // n x 4 x i32 vector value
-def nxv8i32 : ValueType<256, 124>; // n x 8 x i32 vector value
-def nxv16i32: ValueType<512, 125>; // n x 16 x i32 vector value
-def nxv32i32: ValueType<1024,126>; // n x 32 x i32 vector value
-
-def nxv1i64 : ValueType<64, 127>; // n x 1 x i64 vector value
-def nxv2i64 : ValueType<128, 128>; // n x 2 x i64 vector value
-def nxv4i64 : ValueType<256, 129>; // n x 4 x i64 vector value
-def nxv8i64 : ValueType<512, 130>; // n x 8 x i64 vector value
-def nxv16i64: ValueType<1024,131>; // n x 16 x i64 vector value
-def nxv32i64: ValueType<2048,132>; // n x 32 x i64 vector value
-
-def nxv1f16 : ValueType<32, 133>; // n x 1 x f16 vector value
-def nxv2f16 : ValueType<32 , 134>; // n x 2 x f16 vector value
-def nxv4f16 : ValueType<64 , 135>; // n x 4 x f16 vector value
-def nxv8f16 : ValueType<128, 136>; // n x 8 x f16 vector value
-def nxv16f16 : ValueType<256,137>; // n x 16 x f16 vector value
-def nxv32f16 : ValueType<512,138>; // n x 32 x f16 vector value
-def nxv2bf16 : ValueType<32 , 139>; // n x 2 x bf16 vector value
-def nxv4bf16 : ValueType<64 , 140>; // n x 4 x bf16 vector value
-def nxv8bf16 : ValueType<128, 141>; // n x 8 x bf16 vector value
-def nxv1f32 : ValueType<32 , 142>; // n x 1 x f32 vector value
-def nxv2f32 : ValueType<64 , 143>; // n x 2 x f32 vector value
-def nxv4f32 : ValueType<128, 144>; // n x 4 x f32 vector value
-def nxv8f32 : ValueType<256, 145>; // n x 8 x f32 vector value
-def nxv16f32 : ValueType<512, 146>; // n x 16 x f32 vector value
-def nxv1f64 : ValueType<64, 147>; // n x 1 x f64 vector value
-def nxv2f64 : ValueType<128, 148>; // n x 2 x f64 vector value
-def nxv4f64 : ValueType<256, 149>; // n x 4 x f64 vector value
-def nxv8f64 : ValueType<512, 150>; // n x 8 x f64 vector value
-
-def x86mmx : ValueType<64 , 151>; // X86 MMX value
-def FlagVT : ValueType<0 , 152>; // Pre-RA sched glue
-def isVoid : ValueType<0 , 153>; // Produces no value
-def untyped: ValueType<8 , 154>; // Produces an untyped value
-def exnref : ValueType<0 , 155>; // WebAssembly's exnref type
def token : ValueType<0 , 248>; // TokenTy
def MetadataVT: ValueType<0, 249>; // Metadata
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/VirtRegMap.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/VirtRegMap.h
index 823154318eb7..deef4b90279a 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TileShapeInfo.h"
#include "llvm/Pass.h"
#include <cassert>
@@ -60,6 +61,10 @@ class TargetInstrInfo;
/// mapping.
IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
+ /// Virt2ShapeMap - For X86 AMX register whose register is bound shape
+ /// information.
+ DenseMap<unsigned, ShapeT> Virt2ShapeMap;
+
/// createSpillSlot - Allocate a spill slot for RC from MFI.
unsigned createSpillSlot(const TargetRegisterClass *RC);
@@ -98,15 +103,30 @@ class TargetInstrInfo;
/// returns the physical register mapped to the specified
/// virtual register
- Register getPhys(Register virtReg) const {
+ MCRegister getPhys(Register virtReg) const {
assert(virtReg.isVirtual());
- return Virt2PhysMap[virtReg.id()];
+ return MCRegister::from(Virt2PhysMap[virtReg.id()]);
}
/// creates a mapping for the specified virtual register to
/// the specified physical register
void assignVirt2Phys(Register virtReg, MCPhysReg physReg);
+ bool isShapeMapEmpty() const { return Virt2ShapeMap.empty(); }
+
+ bool hasShape(Register virtReg) const {
+ return getShape(virtReg).isValid();
+ }
+
+ ShapeT getShape(Register virtReg) const {
+ assert(virtReg.isVirtual());
+ return Virt2ShapeMap.lookup(virtReg);
+ }
+
+ void assignVirt2Shape(Register virtReg, ShapeT shape) {
+ Virt2ShapeMap[virtReg.id()] = shape;
+ }
+
/// clears the specified virtual register's, physical
/// register mapping
void clearVirt(Register virtReg) {
@@ -131,12 +151,15 @@ class TargetInstrInfo;
bool hasKnownPreference(Register VirtReg);
/// records virtReg is a split live interval from SReg.
- void setIsSplitFromReg(Register virtReg, unsigned SReg) {
+ void setIsSplitFromReg(Register virtReg, Register SReg) {
Virt2SplitMap[virtReg.id()] = SReg;
+ if (hasShape(SReg)) {
+ Virt2ShapeMap[virtReg.id()] = getShape(SReg);
+ }
}
/// returns the live interval virtReg is split from.
- unsigned getPreSplitReg(Register virtReg) const {
+ Register getPreSplitReg(Register virtReg) const {
return Virt2SplitMap[virtReg.id()];
}
@@ -144,8 +167,8 @@ class TargetInstrInfo;
/// from through splitting.
/// A register that was not created by splitting is its own original.
/// This operation is idempotent.
- unsigned getOriginal(unsigned VirtReg) const {
- unsigned Orig = getPreSplitReg(VirtReg);
+ Register getOriginal(Register VirtReg) const {
+ Register Orig = getPreSplitReg(VirtReg);
return Orig ? Orig : VirtReg;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
index 41f8856f31f2..54e8c40a9e72 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
@@ -22,7 +22,9 @@ class BasicBlock;
class Function;
class MachineBasicBlock;
+namespace WebAssembly {
enum EventTag { CPP_EXCEPTION = 0, C_LONGJMP = 1 };
+}
using BBOrMBB = PointerUnion<const BasicBlock *, MachineBasicBlock *>;
diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h
index be3c5ebcadae..7281966fc608 100644
--- a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h
+++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h
@@ -64,14 +64,17 @@ public:
/// section. Reset current relocation pointer if neccessary.
virtual bool hasValidRelocs(bool ResetRelocsPtr = true) = 0;
- /// Checks that there is a relocation against .debug_info
- /// table between \p StartOffset and \p NextOffset.
- ///
- /// This function must be called with offsets in strictly ascending
- /// order because it never looks back at relocations it already 'went past'.
- /// \returns true and sets Info.InDebugMap if it is the case.
- virtual bool hasValidRelocationAt(uint64_t StartOffset, uint64_t EndOffset,
- CompileUnit::DIEInfo &Info) = 0;
+ /// Checks that the specified DIE has a DW_AT_Location attribute
+ /// that references into a live code section. This function
+ /// must be called with DIE offsets in strictly ascending order.
+ virtual bool hasLiveMemoryLocation(const DWARFDie &DIE,
+ CompileUnit::DIEInfo &Info) = 0;
+
+ /// Checks that the specified DIE has a DW_AT_Low_pc attribute
+ /// that references into a live code section. This function
+ /// must be called with DIE offsets in strictly ascending order.
+ virtual bool hasLiveAddressRange(const DWARFDie &DIE,
+ CompileUnit::DIEInfo &Info) = 0;
/// Apply the valid relocations to the buffer \p Data, taking into
/// account that Data is at \p BaseOffset in the debug_info section.
@@ -82,6 +85,9 @@ public:
virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset,
bool IsLittleEndian) = 0;
+ /// Relocate the given address offset if a valid relocation exists.
+ virtual llvm::Expected<uint64_t> relocateIndexedAddr(uint64_t Offset) = 0;
+
/// Returns all valid functions address ranges(i.e., those ranges
/// which points to sections with code).
virtual RangesTy &getValidAddressRanges() = 0;
@@ -180,7 +186,8 @@ public:
///
/// As a side effect, this also switches the current Dwarf version
/// of the MC layer to the one of U.getOrigUnit().
- virtual void emitCompileUnitHeader(CompileUnit &Unit) = 0;
+ virtual void emitCompileUnitHeader(CompileUnit &Unit,
+ unsigned DwarfVersion) = 0;
/// Recursively emit the DIE tree rooted at \p Die.
virtual void emitDIE(DIE &Die) = 0;
@@ -202,9 +209,9 @@ using UnitListTy = std::vector<std::unique_ptr<CompileUnit>>;
/// this class represents DWARF information for source file
/// and it`s address map.
-class DwarfFile {
+class DWARFFile {
public:
- DwarfFile(StringRef Name, DWARFContext *Dwarf, AddressesMap *Addresses,
+ DWARFFile(StringRef Name, DWARFContext *Dwarf, AddressesMap *Addresses,
const std::vector<std::string> &Warnings)
: FileName(Name), Dwarf(Dwarf), Addresses(Addresses), Warnings(Warnings) {
}
@@ -222,7 +229,7 @@ public:
typedef std::function<void(const Twine &Warning, StringRef Context,
const DWARFDie *DIE)>
messageHandler;
-typedef std::function<ErrorOr<DwarfFile &>(StringRef ContainerName,
+typedef std::function<ErrorOr<DWARFFile &>(StringRef ContainerName,
StringRef Path)>
objFileLoader;
typedef std::map<std::string, std::string> swiftInterfacesMap;
@@ -249,7 +256,7 @@ public:
: TheDwarfEmitter(Emitter), DwarfLinkerClientID(ClientID) {}
/// Add object file to be linked.
- void addObjectFile(DwarfFile &File);
+ void addObjectFile(DWARFFile &File);
/// Link debug info for added objFiles. Object
/// files are linked all together.
@@ -353,36 +360,38 @@ private:
/// of work needs to be performed when processing the current item. The flags
/// and info fields are optional based on the type.
struct WorklistItem {
- WorklistItemType Type;
DWARFDie Die;
+ WorklistItemType Type;
CompileUnit &CU;
unsigned Flags;
- unsigned AncestorIdx = 0;
- CompileUnit::DIEInfo *OtherInfo = nullptr;
+ union {
+ const unsigned AncestorIdx;
+ CompileUnit::DIEInfo *OtherInfo;
+ };
WorklistItem(DWARFDie Die, CompileUnit &CU, unsigned Flags,
WorklistItemType T = WorklistItemType::LookForDIEsToKeep)
- : Type(T), Die(Die), CU(CU), Flags(Flags) {}
+ : Die(Die), Type(T), CU(CU), Flags(Flags), AncestorIdx(0) {}
WorklistItem(DWARFDie Die, CompileUnit &CU, WorklistItemType T,
CompileUnit::DIEInfo *OtherInfo = nullptr)
- : Type(T), Die(Die), CU(CU), OtherInfo(OtherInfo) {}
+ : Die(Die), Type(T), CU(CU), Flags(0), OtherInfo(OtherInfo) {}
WorklistItem(unsigned AncestorIdx, CompileUnit &CU, unsigned Flags)
- : Type(WorklistItemType::LookForParentDIEsToKeep), CU(CU), Flags(Flags),
- AncestorIdx(AncestorIdx) {}
+ : Die(), Type(WorklistItemType::LookForParentDIEsToKeep), CU(CU),
+ Flags(Flags), AncestorIdx(AncestorIdx) {}
};
/// returns true if we need to translate strings.
bool needToTranslateStrings() { return StringsTranslator != nullptr; }
- void reportWarning(const Twine &Warning, const DwarfFile &File,
+ void reportWarning(const Twine &Warning, const DWARFFile &File,
const DWARFDie *DIE = nullptr) const {
if (Options.WarningHandler != nullptr)
Options.WarningHandler(Warning, File.FileName, DIE);
}
- void reportError(const Twine &Warning, const DwarfFile &File,
+ void reportError(const Twine &Warning, const DWARFFile &File,
const DWARFDie *DIE = nullptr) const {
if (Options.ErrorHandler != nullptr)
Options.ErrorHandler(Warning, File.FileName, DIE);
@@ -398,18 +407,18 @@ private:
void updateAccelKind(DWARFContext &Dwarf);
/// Emit warnings as Dwarf compile units to leave a trail after linking.
- bool emitPaperTrailWarnings(const DwarfFile &File,
+ bool emitPaperTrailWarnings(const DWARFFile &File,
OffsetsStringPool &StringPool);
void copyInvariantDebugSection(DWARFContext &Dwarf);
/// Keeps track of data associated with one object during linking.
struct LinkContext {
- DwarfFile &File;
+ DWARFFile &File;
UnitListTy CompileUnits;
bool Skip = false;
- LinkContext(DwarfFile &File) : File(File) {}
+ LinkContext(DWARFFile &File) : File(File) {}
/// Clear part of the context that's no longer needed when we're done with
/// the debug object.
@@ -438,7 +447,7 @@ private:
/// kept. All DIEs referenced though attributes should be kept.
void lookForRefDIEsToKeep(const DWARFDie &Die, CompileUnit &CU,
unsigned Flags, const UnitListTy &Units,
- const DwarfFile &File,
+ const DWARFFile &File,
SmallVectorImpl<WorklistItem> &Worklist);
/// \defgroup FindRootDIEs Find DIEs corresponding to Address map entries.
@@ -450,7 +459,7 @@ private:
/// The return value indicates whether the DIE is incomplete.
void lookForDIEsToKeep(AddressesMap &RelocMgr, RangesTy &Ranges,
const UnitListTy &Units, const DWARFDie &DIE,
- const DwarfFile &File, CompileUnit &CU,
+ const DWARFFile &File, CompileUnit &CU,
unsigned Flags);
/// If this compile unit is really a skeleton CU that points to a
@@ -460,9 +469,8 @@ private:
/// pointing to the module, and a DW_AT_gnu_dwo_id with the module
/// hash.
bool registerModuleReference(DWARFDie CUDie, const DWARFUnit &Unit,
- const DwarfFile &File,
+ const DWARFFile &File,
OffsetsStringPool &OffsetsStringPool,
- UniquingStringPool &UniquingStringPoolStringPool,
DeclContextTree &ODRContexts,
uint64_t ModulesEndOffset, unsigned &UnitID,
bool IsLittleEndian, unsigned Indent = 0,
@@ -473,9 +481,8 @@ private:
/// to Units.
Error loadClangModule(DWARFDie CUDie, StringRef FilePath,
StringRef ModuleName, uint64_t DwoId,
- const DwarfFile &File,
+ const DWARFFile &File,
OffsetsStringPool &OffsetsStringPool,
- UniquingStringPool &UniquingStringPool,
DeclContextTree &ODRContexts, uint64_t ModulesEndOffset,
unsigned &UnitID, bool IsLittleEndian,
unsigned Indent = 0, bool Quiet = false);
@@ -484,22 +491,21 @@ private:
void keepDIEAndDependencies(AddressesMap &RelocMgr, RangesTy &Ranges,
const UnitListTy &Units, const DWARFDie &DIE,
CompileUnit::DIEInfo &MyInfo,
- const DwarfFile &File, CompileUnit &CU,
+ const DWARFFile &File, CompileUnit &CU,
bool UseODR);
unsigned shouldKeepDIE(AddressesMap &RelocMgr, RangesTy &Ranges,
- const DWARFDie &DIE, const DwarfFile &File,
+ const DWARFDie &DIE, const DWARFFile &File,
CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo,
unsigned Flags);
/// Check if a variable describing DIE should be kept.
/// \returns updated TraversalFlags.
unsigned shouldKeepVariableDIE(AddressesMap &RelocMgr, const DWARFDie &DIE,
- CompileUnit &Unit,
CompileUnit::DIEInfo &MyInfo, unsigned Flags);
unsigned shouldKeepSubprogramDIE(AddressesMap &RelocMgr, RangesTy &Ranges,
- const DWARFDie &DIE, const DwarfFile &File,
+ const DWARFDie &DIE, const DWARFFile &File,
CompileUnit &Unit,
CompileUnit::DIEInfo &MyInfo,
unsigned Flags);
@@ -508,7 +514,7 @@ private:
/// RefValue. The resulting DIE might be in another CompileUnit which is
/// stored into \p ReferencedCU. \returns null if resolving fails for any
/// reason.
- DWARFDie resolveDIEReference(const DwarfFile &File, const UnitListTy &Units,
+ DWARFDie resolveDIEReference(const DWARFFile &File, const UnitListTy &Units,
const DWARFFormValue &RefValue,
const DWARFDie &DIE, CompileUnit *&RefCU);
@@ -523,7 +529,7 @@ private:
class DIECloner {
DWARFLinker &Linker;
DwarfEmitter *Emitter;
- DwarfFile &ObjFile;
+ DWARFFile &ObjFile;
/// Allocator used for all the DIEValue objects.
BumpPtrAllocator &DIEAlloc;
@@ -533,7 +539,7 @@ private:
bool Update;
public:
- DIECloner(DWARFLinker &Linker, DwarfEmitter *Emitter, DwarfFile &ObjFile,
+ DIECloner(DWARFLinker &Linker, DwarfEmitter *Emitter, DWARFFile &ObjFile,
BumpPtrAllocator &DIEAlloc,
std::vector<std::unique_ptr<CompileUnit>> &CompileUnits,
bool Update)
@@ -551,7 +557,7 @@ private:
/// applied to the entry point of the function to get the linked address.
/// \param Die the output DIE to use, pass NULL to create one.
/// \returns the root of the cloned tree or null if nothing was selected.
- DIE *cloneDIE(const DWARFDie &InputDIE, const DwarfFile &File,
+ DIE *cloneDIE(const DWARFDie &InputDIE, const DWARFFile &File,
CompileUnit &U, OffsetsStringPool &StringPool,
int64_t PCOffset, uint32_t OutOffset, unsigned Flags,
bool IsLittleEndian, DIE *Die = nullptr);
@@ -560,7 +566,7 @@ private:
/// chose to keep above. If there are no valid relocs, then there's
/// nothing to clone/emit.
uint64_t cloneAllCompileUnits(DWARFContext &DwarfContext,
- const DwarfFile &File,
+ const DWARFFile &File,
OffsetsStringPool &StringPool,
bool IsLittleEndian);
@@ -606,7 +612,7 @@ private:
/// Helper for cloneDIE.
unsigned cloneAttribute(DIE &Die, const DWARFDie &InputDIE,
- const DwarfFile &File, CompileUnit &U,
+ const DWARFFile &File, CompileUnit &U,
OffsetsStringPool &StringPool,
const DWARFFormValue &Val,
const AttributeSpec AttrSpec, unsigned AttrSize,
@@ -627,18 +633,18 @@ private:
AttributeSpec AttrSpec,
unsigned AttrSize,
const DWARFFormValue &Val,
- const DwarfFile &File,
+ const DWARFFile &File,
CompileUnit &Unit);
/// Clone a DWARF expression that may be referencing another DIE.
void cloneExpression(DataExtractor &Data, DWARFExpression Expression,
- const DwarfFile &File, CompileUnit &Unit,
+ const DWARFFile &File, CompileUnit &Unit,
SmallVectorImpl<uint8_t> &OutputBuffer);
/// Clone an attribute referencing another DIE and add
/// it to \p Die.
/// \returns the size of the new attribute.
- unsigned cloneBlockAttribute(DIE &Die, const DwarfFile &File,
+ unsigned cloneBlockAttribute(DIE &Die, const DWARFFile &File,
CompileUnit &Unit, AttributeSpec AttrSpec,
const DWARFFormValue &Val, unsigned AttrSize,
bool IsLittleEndian);
@@ -654,7 +660,7 @@ private:
/// Clone a scalar attribute and add it to \p Die.
/// \returns the size of the new attribute.
unsigned cloneScalarAttribute(DIE &Die, const DWARFDie &InputDIE,
- const DwarfFile &File, CompileUnit &U,
+ const DWARFFile &File, CompileUnit &U,
AttributeSpec AttrSpec,
const DWARFFormValue &Val, unsigned AttrSize,
AttributesInfo &Info);
@@ -670,7 +676,7 @@ private:
void copyAbbrev(const DWARFAbbreviationDeclaration &Abbrev, bool hasODR);
uint32_t hashFullyQualifiedName(DWARFDie DIE, CompileUnit &U,
- const DwarfFile &File,
+ const DWARFFile &File,
int RecurseDepth = 0);
/// Helper for cloneDIE.
@@ -685,7 +691,7 @@ private:
/// Compute and emit debug_ranges section for \p Unit, and
/// patch the attributes referencing it.
void patchRangesForUnit(const CompileUnit &Unit, DWARFContext &Dwarf,
- const DwarfFile &File) const;
+ const DWARFFile &File) const;
/// Generate and emit the DW_AT_ranges attribute for a compile_unit if it had
/// one.
@@ -695,7 +701,7 @@ private:
/// parts according to the linked function ranges and emit the result in the
/// debug_line section.
void patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf,
- const DwarfFile &File);
+ const DWARFFile &File);
/// Emit the accelerator entries for \p Unit.
void emitAcceleratorEntriesForUnit(CompileUnit &Unit);
@@ -703,7 +709,7 @@ private:
void emitAppleAcceleratorEntriesForUnit(CompileUnit &Unit);
/// Patch the frame info for an object file and emit it.
- void patchFrameInfoForObject(const DwarfFile &, RangesTy &Ranges,
+ void patchFrameInfoForObject(const DWARFFile &, RangesTy &Ranges,
DWARFContext &, unsigned AddressSize);
/// FoldingSet that uniques the abbreviations.
diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
index 944e7e3501c9..a6310bcb5df1 100644
--- a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
+++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
@@ -101,10 +101,7 @@ public:
unsigned getUniqueID() const { return ID; }
- void createOutputDIE() {
- NewUnit.emplace(OrigUnit.getVersion(), OrigUnit.getAddressByteSize(),
- OrigUnit.getUnitDIE().getTag());
- }
+ void createOutputDIE() { NewUnit.emplace(OrigUnit.getUnitDIE().getTag()); }
DIE *getOutputUnitDIE() const {
if (NewUnit)
@@ -123,6 +120,11 @@ public:
DIEInfo &getInfo(unsigned Idx) { return Info[Idx]; }
const DIEInfo &getInfo(unsigned Idx) const { return Info[Idx]; }
+ DIEInfo &getInfo(const DWARFDie &Die) {
+ unsigned Idx = getOrigUnit().getDIEIndex(Die);
+ return Info[Idx];
+ }
+
uint64_t getStartOffset() const { return StartOffset; }
uint64_t getNextUnitOffset() const { return NextUnitOffset; }
void setStartOffset(uint64_t DebugInfoSize) { StartOffset = DebugInfoSize; }
@@ -157,7 +159,7 @@ public:
/// Compute the end offset for this unit. Must be called after the CU's DIEs
/// have been cloned. \returns the next unit offset (which is also the
/// current debug_info section size).
- uint64_t computeNextUnitOffset();
+ uint64_t computeNextUnitOffset(uint16_t DwarfVersion);
/// Keep track of a forward reference to DIE \p Die in \p RefUnit by \p
/// Attr. The attribute should be fixed up later to point to the absolute
@@ -235,21 +237,6 @@ public:
const std::vector<AccelInfo> &getNamespaces() const { return Namespaces; }
const std::vector<AccelInfo> &getObjC() const { return ObjC; }
- /// Get the full path for file \a FileNum in the line table
- StringRef getResolvedPath(unsigned FileNum) {
- if (FileNum >= ResolvedPaths.size())
- return StringRef();
- return ResolvedPaths[FileNum];
- }
-
- /// Set the fully resolved path for the line-table's file \a FileNum
- /// to \a Path.
- void setResolvedPath(unsigned FileNum, StringRef Path) {
- if (ResolvedPaths.size() <= FileNum)
- ResolvedPaths.resize(FileNum + 1);
- ResolvedPaths[FileNum] = Path;
- }
-
MCSymbol *getLabelBegin() { return LabelBegin; }
void setLabelBegin(MCSymbol *S) { LabelBegin = S; }
@@ -308,12 +295,6 @@ private:
std::vector<AccelInfo> ObjC;
/// @}
- /// Cached resolved paths from the line table.
- /// Note, the StringRefs here point in to the intern (uniquing) string pool.
- /// This means that a StringRef returned here doesn't need to then be uniqued
- /// for the purposes of getting a unique address for each string.
- std::vector<StringRef> ResolvedPaths;
-
/// Is this unit subject to the ODR rule?
bool HasODR;
diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
index e59e15f00a7e..d2274488e85f 100644
--- a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/NonRelocatableStringpool.h"
#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
@@ -31,16 +32,18 @@ class CachedPathResolver {
public:
/// Resolve a path by calling realpath and cache its result. The returned
/// StringRef is interned in the given \p StringPool.
- StringRef resolve(std::string Path, NonRelocatableStringpool &StringPool) {
+ StringRef resolve(const std::string &Path,
+ NonRelocatableStringpool &StringPool) {
StringRef FileName = sys::path::filename(Path);
- SmallString<256> ParentPath = sys::path::parent_path(Path);
+ StringRef ParentPath = sys::path::parent_path(Path);
// If the ParentPath has not yet been resolved, resolve and cache it for
// future look-ups.
if (!ResolvedPaths.count(ParentPath)) {
SmallString<256> RealPath;
sys::fs::real_path(ParentPath, RealPath);
- ResolvedPaths.insert({ParentPath, StringRef(RealPath).str()});
+ ResolvedPaths.insert(
+ {ParentPath, std::string(RealPath.c_str(), RealPath.size())});
}
// Join the file name again with the resolved path.
@@ -95,7 +98,6 @@ public:
void setDefinedInClangModule(bool Val) { DefinedInClangModule = Val; }
uint16_t getTag() const { return Tag; }
- StringRef getName() const { return Name; }
private:
friend DeclMapInfo;
@@ -129,10 +131,10 @@ public:
///
/// FIXME: The invalid bit along the return value is to emulate some
/// dsymutil-classic functionality.
- PointerIntPair<DeclContext *, 1>
- getChildDeclContext(DeclContext &Context, const DWARFDie &DIE,
- CompileUnit &Unit, UniquingStringPool &StringPool,
- bool InClangModule);
+ PointerIntPair<DeclContext *, 1> getChildDeclContext(DeclContext &Context,
+ const DWARFDie &DIE,
+ CompileUnit &Unit,
+ bool InClangModule);
DeclContext &getRoot() { return Root; }
@@ -141,8 +143,19 @@ private:
DeclContext Root;
DeclContext::Map Contexts;
- /// Cache resolved paths from the line table.
+ /// Cached resolved paths from the line table.
+ /// The key is <UniqueUnitID, FileIdx>.
+ using ResolvedPathsMap = DenseMap<std::pair<unsigned, unsigned>, StringRef>;
+ ResolvedPathsMap ResolvedPaths;
+
+ /// Helper that resolves and caches fragments of file paths.
CachedPathResolver PathResolver;
+
+ /// String pool keeping real path bodies.
+ NonRelocatableStringpool StringPool;
+
+ StringRef getResolvedPath(CompileUnit &CU, unsigned FileNum,
+ const DWARFDebugLine::LineTable &LineTable);
};
/// Info type for the DenseMap storing the DeclContext pointers.
diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
index de58f5dedf24..7b0851159252 100644
--- a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -64,7 +64,7 @@ public:
///
/// As a side effect, this also switches the current Dwarf version
/// of the MC layer to the one of U.getOrigUnit().
- void emitCompileUnitHeader(CompileUnit &Unit) override;
+ void emitCompileUnitHeader(CompileUnit &Unit, unsigned DwarfVersion) override;
/// Recursively emit the DIE tree rooted at \p Die.
void emitDIE(DIE &Die) override;
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
index 784c47e3bf5d..bb29ef5f2ce8 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -11,9 +11,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
@@ -61,12 +61,9 @@ public:
ArrayRef<uint8_t> RecordData;
};
-template <typename Kind> struct RemappedRecord {
- explicit RemappedRecord(const CVRecord<Kind> &R) : OriginalRecord(R) {}
-
- CVRecord<Kind> OriginalRecord;
- SmallVector<std::pair<uint32_t, TypeIndex>, 8> Mappings;
-};
+// There are two kinds of codeview records: type and symbol records.
+using CVType = CVRecord<TypeLeafKind>;
+using CVSymbol = CVRecord<SymbolKind>;
template <typename Record, typename Func>
Error forEachCodeViewRecord(ArrayRef<uint8_t> StreamBuffer, Func F) {
@@ -126,6 +123,12 @@ struct VarStreamArrayExtractor<codeview::CVRecord<Kind>> {
}
};
+namespace codeview {
+using CVSymbolArray = VarStreamArray<CVSymbol>;
+using CVTypeArray = VarStreamArray<CVType>;
+using CVTypeRange = iterator_range<CVTypeArray::Iterator>;
+} // namespace codeview
+
} // end namespace llvm
#endif // LLVM_DEBUGINFO_CODEVIEW_RECORDITERATOR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
index 1615ff41df12..82ef8c173bee 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
@@ -10,9 +10,6 @@
#define LLVM_DEBUGINFO_CODEVIEW_CVSYMBOLVISITOR_H
#include "llvm/DebugInfo/CodeView/CVRecord.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h"
#include "llvm/Support/ErrorOr.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
index f26e80ebe2a9..d851dea0a27f 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
@@ -15,7 +15,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/GUID.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Error.h"
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index ed5c143818e6..48ea7e52c172 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -15,6 +15,7 @@
#endif
#if !defined(CV_REGISTERS_ALL) && !defined(CV_REGISTERS_X86) && \
+ !defined(CV_REGISTERS_ARM) && \
!defined(CV_REGISTERS_ARM64)
#error Need include at least one register set.
#endif
@@ -393,13 +394,46 @@ CV_REGISTER(ARM_PC, 25)
// Status register
-CV_REGISTER(ARM_CPSR, 25)
+CV_REGISTER(ARM_CPSR, 26)
// ARM VFPv1 registers
CV_REGISTER(ARM_FPSCR, 40)
CV_REGISTER(ARM_FPEXC, 41)
+CV_REGISTER(ARM_FS0, 50)
+CV_REGISTER(ARM_FS1, 51)
+CV_REGISTER(ARM_FS2, 52)
+CV_REGISTER(ARM_FS3, 53)
+CV_REGISTER(ARM_FS4, 54)
+CV_REGISTER(ARM_FS5, 55)
+CV_REGISTER(ARM_FS6, 56)
+CV_REGISTER(ARM_FS7, 57)
+CV_REGISTER(ARM_FS8, 58)
+CV_REGISTER(ARM_FS9, 59)
+CV_REGISTER(ARM_FS10, 60)
+CV_REGISTER(ARM_FS11, 61)
+CV_REGISTER(ARM_FS12, 62)
+CV_REGISTER(ARM_FS13, 63)
+CV_REGISTER(ARM_FS14, 64)
+CV_REGISTER(ARM_FS15, 65)
+CV_REGISTER(ARM_FS16, 66)
+CV_REGISTER(ARM_FS17, 67)
+CV_REGISTER(ARM_FS18, 68)
+CV_REGISTER(ARM_FS19, 69)
+CV_REGISTER(ARM_FS20, 70)
+CV_REGISTER(ARM_FS21, 71)
+CV_REGISTER(ARM_FS22, 72)
+CV_REGISTER(ARM_FS23, 73)
+CV_REGISTER(ARM_FS24, 74)
+CV_REGISTER(ARM_FS25, 75)
+CV_REGISTER(ARM_FS26, 76)
+CV_REGISTER(ARM_FS27, 77)
+CV_REGISTER(ARM_FS28, 78)
+CV_REGISTER(ARM_FS29, 79)
+CV_REGISTER(ARM_FS30, 80)
+CV_REGISTER(ARM_FS31, 81)
+
// ARM VFPv3/NEON registers
CV_REGISTER(ARM_FS32, 200)
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
index 720b1b49581f..624a623e75b8 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
@@ -10,10 +10,8 @@
#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H
#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
#include "llvm/Support/Error.h"
-#include <cstdint>
namespace llvm {
@@ -30,7 +28,6 @@ class DebugStringTableSubsectionRef;
class DebugSymbolRVASubsectionRef;
class DebugSymbolsSubsectionRef;
class DebugUnknownSubsectionRef;
-class StringsAndChecksumsRef;
class DebugSubsectionVisitor {
public:
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
index 784fc59484b9..51b8523ed969 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
@@ -9,8 +9,8 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H
#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/Support/Error.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
index 35eeef5a327e..ddbb4e3c5e6c 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
@@ -14,7 +14,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/Error.h"
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/RecordName.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/RecordName.h
index cc09db8933bd..8e06be9e41e8 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/RecordName.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/RecordName.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H
#define LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
index d832a48b1265..aaeffb2446ad 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
@@ -11,8 +11,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringSet.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index 4383534b0db2..c37f6b4d5fa7 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -1003,9 +1003,6 @@ public:
uint32_t RecordOffset = 0;
};
-using CVSymbol = CVRecord<SymbolKind>;
-using CVSymbolArray = VarStreamArray<CVSymbol>;
-
Expected<CVSymbol> readSymbolFromStream(BinaryStreamRef Stream,
uint32_t Offset);
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
index 57dbc56c0769..71bc70dde6ed 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
@@ -9,7 +9,8 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H
#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
namespace llvm {
namespace codeview {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h
index 102d68c3fb2a..bde5a8b3ab2f 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h
@@ -10,9 +10,8 @@
#define LLVM_DEBUGINFO_CODEVIEW_TYPECOLLECTION_H
#include "llvm/ADT/StringRef.h"
-
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
namespace llvm {
namespace codeview {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
index b0a16cccbff3..9f34d026b1ba 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
@@ -86,6 +86,16 @@ struct GloballyHashedType {
bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
+ friend inline bool operator==(const GloballyHashedType &L,
+ const GloballyHashedType &R) {
+ return L.Hash == R.Hash;
+ }
+
+ friend inline bool operator!=(const GloballyHashedType &L,
+ const GloballyHashedType &R) {
+ return !(L.Hash == R.Hash);
+ }
+
/// Given a sequence of bytes representing a record, compute a global hash for
/// this record. Due to the nature of global hashes incorporating the hashes
/// of referenced records, this function requires a list of types and ids
@@ -161,15 +171,10 @@ struct GloballyHashedType {
return Hashes;
}
};
-#if defined(_MSC_VER)
-// is_trivially_copyable is not available in older versions of libc++, but it is
-// available in all supported versions of MSVC, so at least this gives us some
-// coverage.
static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
"GloballyHashedType must be trivially copyable so that we can "
"reinterpret_cast arrays of hash data to arrays of "
"GloballyHashedType");
-#endif
} // namespace codeview
template <> struct DenseMapInfo<codeview::LocallyHashedType> {
@@ -206,7 +211,7 @@ template <> struct DenseMapInfo<codeview::GloballyHashedType> {
static bool isEqual(codeview::GloballyHashedType LHS,
codeview::GloballyHashedType RHS) {
- return LHS.Hash == RHS.Hash;
+ return LHS == RHS;
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
index b9e2562bfc2b..bdc6cf46509b 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -116,13 +116,22 @@ public:
uint32_t toArrayIndex() const {
assert(!isSimple());
- return getIndex() - FirstNonSimpleIndex;
+ return (getIndex() & ~DecoratedItemIdMask) - FirstNonSimpleIndex;
}
static TypeIndex fromArrayIndex(uint32_t Index) {
return TypeIndex(Index + FirstNonSimpleIndex);
}
+ static TypeIndex fromDecoratedArrayIndex(bool IsItem, uint32_t Index) {
+ return TypeIndex((Index + FirstNonSimpleIndex) |
+ (IsItem ? DecoratedItemIdMask : 0));
+ }
+
+ TypeIndex removeDecoration() {
+ return TypeIndex(Index & ~DecoratedItemIdMask);
+ }
+
SimpleTypeKind getSimpleKind() const {
assert(isSimple());
return static_cast<SimpleTypeKind>(Index & SimpleKindMask);
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
index 469768787274..f4f5835d8b57 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
@@ -10,8 +10,8 @@
#define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/Error.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
index 35f5c0561138..3b6d1b0b1a70 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -14,7 +14,6 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/GUID.h"
@@ -32,15 +31,10 @@ using support::little32_t;
using support::ulittle16_t;
using support::ulittle32_t;
-using CVType = CVRecord<TypeLeafKind>;
-using RemappedType = RemappedRecord<TypeLeafKind>;
-
struct CVMemberRecord {
TypeLeafKind Kind;
ArrayRef<uint8_t> Data;
};
-using CVTypeArray = VarStreamArray<CVType>;
-using CVTypeRange = iterator_range<CVTypeArray::Iterator>;
/// Equvalent to CV_fldattr_t in cvinfo.h.
struct MemberAttributes {
@@ -703,7 +697,7 @@ public:
: TypeRecord(TypeRecordKind::VFTable), CompleteClass(CompleteClass),
OverriddenVFTable(OverriddenVFTable), VFPtrOffset(VFPtrOffset) {
MethodNames.push_back(Name);
- MethodNames.insert(MethodNames.end(), Methods.begin(), Methods.end());
+ llvm::append_range(MethodNames, Methods);
}
TypeIndex getCompleteClass() const { return CompleteClass; }
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
index 19492b93681c..041f5214967c 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
@@ -9,7 +9,8 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
namespace llvm {
namespace codeview {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
index d0506cce8176..04d7c7b0420a 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
@@ -11,7 +11,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/Support/Error.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DIContext.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DIContext.h
index 661d30d04c94..ae78fe912188 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DIContext.h
@@ -35,6 +35,7 @@ struct DILineInfo {
static constexpr const char *const Addr2LineBadString = "??";
std::string FileName;
std::string FunctionName;
+ std::string StartFileName;
Optional<StringRef> Source;
uint32_t Line = 0;
uint32_t Column = 0;
@@ -43,12 +44,15 @@ struct DILineInfo {
// DWARF-specific.
uint32_t Discriminator = 0;
- DILineInfo() : FileName(BadString), FunctionName(BadString) {}
+ DILineInfo()
+ : FileName(BadString), FunctionName(BadString), StartFileName(BadString) {
+ }
bool operator==(const DILineInfo &RHS) const {
return Line == RHS.Line && Column == RHS.Column &&
FileName == RHS.FileName && FunctionName == RHS.FunctionName &&
- StartLine == RHS.StartLine && Discriminator == RHS.Discriminator;
+ StartFileName == RHS.StartFileName && StartLine == RHS.StartLine &&
+ Discriminator == RHS.Discriminator;
}
bool operator!=(const DILineInfo &RHS) const {
@@ -56,10 +60,10 @@ struct DILineInfo {
}
bool operator<(const DILineInfo &RHS) const {
- return std::tie(FileName, FunctionName, Line, Column, StartLine,
- Discriminator) <
- std::tie(RHS.FileName, RHS.FunctionName, RHS.Line, RHS.Column,
- RHS.StartLine, RHS.Discriminator);
+ return std::tie(FileName, FunctionName, StartFileName, Line, Column,
+ StartLine, Discriminator) <
+ std::tie(RHS.FileName, RHS.FunctionName, RHS.StartFileName, RHS.Line,
+ RHS.Column, RHS.StartLine, RHS.Discriminator);
}
explicit operator bool() const { return *this != DILineInfo(); }
@@ -72,6 +76,8 @@ struct DILineInfo {
OS << "function '" << FunctionName << "', ";
OS << "line " << Line << ", ";
OS << "column " << Column << ", ";
+ if (StartFileName != BadString)
+ OS << "start file '" << StartFileName << "', ";
OS << "start line " << StartLine << '\n';
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index 39ae53c4e7fe..cf4c827b9267 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -111,6 +111,16 @@ public:
return AttributeSpecs[idx].Attr;
}
+ bool getAttrIsImplicitConstByIndex(uint32_t idx) const {
+ assert(idx < AttributeSpecs.size());
+ return AttributeSpecs[idx].isImplicitConst();
+ }
+
+ int64_t getAttrImplicitConstValueByIndex(uint32_t idx) const {
+ assert(idx < AttributeSpecs.size());
+ return AttributeSpecs[idx].getImplicitConstValue();
+ }
+
/// Get the index of the specified attribute.
///
/// Searches the this abbreviation declaration for the index of the specified
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 97903a96b3fc..7d88e1447dca 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -146,6 +146,7 @@ public:
bool verify(raw_ostream &OS, DIDumpOptions DumpOpts = {}) override;
using unit_iterator_range = DWARFUnitVector::iterator_range;
+ using compile_unit_range = DWARFUnitVector::compile_unit_range;
/// Get units from .debug_info in this context.
unit_iterator_range info_section_units() {
@@ -163,10 +164,12 @@ public:
}
/// Get compile units in this context.
- unit_iterator_range compile_units() { return info_section_units(); }
+ compile_unit_range compile_units() {
+ return make_filter_range(info_section_units(), isCompileUnit);
+ }
- /// Get type units in this context.
- unit_iterator_range type_units() { return types_section_units(); }
+ // If you want type_units(), it'll need to be a concat iterator of a filter of
+ // TUs in info_section + all the (all type) units in types_section
/// Get all normal compile/type units in this context.
unit_iterator_range normal_units() {
@@ -189,10 +192,13 @@ public:
}
/// Get compile units in the DWO context.
- unit_iterator_range dwo_compile_units() { return dwo_info_section_units(); }
+ compile_unit_range dwo_compile_units() {
+ return make_filter_range(dwo_info_section_units(), isCompileUnit);
+ }
- /// Get type units in the DWO context.
- unit_iterator_range dwo_type_units() { return dwo_types_section_units(); }
+ // If you want dwo_type_units(), it'll need to be a concat iterator of a
+ // filter of TUs in dwo_info_section + all the (all type) units in
+ // dwo_types_section.
/// Get all units in the DWO context.
unit_iterator_range dwo_units() {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
index 32844ffd570f..69e67866946c 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
@@ -74,6 +74,24 @@ public:
/// Return the full length of this table, including the length field.
/// Return None if the length cannot be identified reliably.
Optional<uint64_t> getFullLength() const;
+
+ /// Return the DWARF format of this table.
+ dwarf::DwarfFormat getFormat() const { return Format; }
+
+ /// Return the length of this table.
+ uint64_t getLength() const { return Length; }
+
+ /// Return the version of this table.
+ uint16_t getVersion() const { return Version; }
+
+ /// Return the address size of this table.
+ uint8_t getAddressSize() const { return AddrSize; }
+
+ /// Return the segment selector size of this table.
+ uint8_t getSegmentSelectorSize() const { return SegSize; }
+
+ /// Return the parsed addresses of this table.
+ ArrayRef<uint64_t> getAddressEntries() const { return Addrs; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index 0681a2e33a50..3d5852ee1518 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -60,7 +60,8 @@ public:
DWARFDebugArangeSet() { clear(); }
void clear();
- Error extract(DWARFDataExtractor data, uint64_t *offset_ptr);
+ Error extract(DWARFDataExtractor data, uint64_t *offset_ptr,
+ function_ref<void(Error)> WarningHandler);
void dump(raw_ostream &OS) const;
uint64_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index 233b55cc55c1..af87811f5d7d 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -71,8 +71,8 @@ public:
/// where a problem occurred in case an error is returned.
Error parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset);
- void dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH,
- unsigned IndentLevel = 1) const;
+ void dump(raw_ostream &OS, DIDumpOptions DumpOpts, const MCRegisterInfo *MRI,
+ bool IsEH, unsigned IndentLevel = 1) const;
private:
std::vector<Instruction> Instructions;
@@ -121,7 +121,8 @@ private:
static ArrayRef<OperandType[2]> getOperandTypes();
/// Print \p Opcode's operand number \p OperandIdx which has value \p Operand.
- void printOperand(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH,
+ void printOperand(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const MCRegisterInfo *MRI, bool IsEH,
const Instruction &Instr, unsigned OperandIdx,
uint64_t Operand) const;
};
@@ -146,8 +147,8 @@ public:
CFIProgram &cfis() { return CFIs; }
/// Dump the instructions in this CFI fragment
- virtual void dump(raw_ostream &OS, const MCRegisterInfo *MRI,
- bool IsEH) const = 0;
+ virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const MCRegisterInfo *MRI, bool IsEH) const = 0;
protected:
const FrameKind Kind;
@@ -201,7 +202,7 @@ public:
uint32_t getLSDAPointerEncoding() const { return LSDAPointerEncoding; }
- void dump(raw_ostream &OS, const MCRegisterInfo *MRI,
+ void dump(raw_ostream &OS, DIDumpOptions DumpOpts, const MCRegisterInfo *MRI,
bool IsEH) const override;
private:
@@ -242,7 +243,7 @@ public:
uint64_t getAddressRange() const { return AddressRange; }
Optional<uint64_t> getLSDAAddress() const { return LSDAAddress; }
- void dump(raw_ostream &OS, const MCRegisterInfo *MRI,
+ void dump(raw_ostream &OS, DIDumpOptions DumpOpts, const MCRegisterInfo *MRI,
bool IsEH) const override;
static bool classof(const FrameEntry *FE) { return FE->getKind() == FK_FDE; }
@@ -285,7 +286,7 @@ public:
~DWARFDebugFrame();
/// Dump the section data into the given stream.
- void dump(raw_ostream &OS, const MCRegisterInfo *MRI,
+ void dump(raw_ostream &OS, DIDumpOptions DumpOpts, const MCRegisterInfo *MRI,
Optional<uint64_t> Offset) const;
/// Parse the section from raw data. \p Data is assumed to contain the whole
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index fe46d613aedd..bc6c67ae6c5d 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -121,6 +121,8 @@ public:
bool hasFileAtIndex(uint64_t FileIndex) const;
+ Optional<uint64_t> getLastValidFileIndex() const;
+
bool
getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
DILineInfoSpecifier::FileLineInfoKind Kind,
@@ -251,6 +253,10 @@ public:
return Prologue.hasFileAtIndex(FileIndex);
}
+ Optional<uint64_t> getLastValidFileIndex() const {
+ return Prologue.getLastValidFileIndex();
+ }
+
/// Extracts filename by its index in filename table in prologue.
/// In Dwarf 4, the files are 1-indexed and the current compilation file
/// name is not represented in the list. In DWARF v5, the files are
@@ -309,12 +315,10 @@ public:
/// Helper to allow for parsing of an entire .debug_line section in sequence.
class SectionParser {
public:
- using cu_range = DWARFUnitVector::iterator_range;
- using tu_range = DWARFUnitVector::iterator_range;
using LineToUnitMap = std::map<uint64_t, DWARFUnit *>;
- SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, cu_range CUs,
- tu_range TUs);
+ SectionParser(DWARFDataExtractor &Data, const DWARFContext &C,
+ DWARFUnitVector::iterator_range Units);
/// Get the next line table from the section. Report any issues via the
/// handlers.
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index 3b141304f85f..dbc11c51a789 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -72,6 +72,8 @@ public:
std::function<Optional<object::SectionedAddress>(uint32_t)> LookupAddr,
function_ref<bool(Expected<DWARFLocationExpression>)> Callback) const;
+ const DWARFDataExtractor &getData() { return Data; }
+
protected:
DWARFDataExtractor Data;
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
index 4d463d8fe6f5..f1768a1ddab5 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
@@ -96,6 +96,9 @@ class DWARFDebugMacro {
MacroHeader Header;
SmallVector<Entry, 4> Macros;
uint64_t Offset;
+
+ /// Whether or not this is a .debug_macro section.
+ bool IsDebugMacro;
};
/// A list of all the macro entries in the debug_macinfo section.
@@ -107,7 +110,7 @@ public:
/// Print the macro list found within the debug_macinfo/debug_macro section.
void dump(raw_ostream &OS) const;
- Error parseMacro(DWARFUnitVector::iterator_range Units,
+ Error parseMacro(DWARFUnitVector::compile_unit_range Units,
DataExtractor StringExtractor,
DWARFDataExtractor MacroData) {
return parseImpl(Units, StringExtractor, MacroData, /*IsMacro=*/true);
@@ -123,7 +126,7 @@ public:
private:
/// Parse the debug_macinfo/debug_macro section accessible via the 'MacroData'
/// parameter.
- Error parseImpl(Optional<DWARFUnitVector::iterator_range> Units,
+ Error parseImpl(Optional<DWARFUnitVector::compile_unit_range> Units,
Optional<DataExtractor> StringExtractor,
DWARFDataExtractor Data, bool IsMacro);
};
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
index 88e5432851d6..4d28bdcde2e4 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
@@ -34,7 +34,7 @@ struct RangeListEntry : public DWARFListEntryBase {
uint64_t Value0;
uint64_t Value1;
- Error extract(DWARFDataExtractor Data, uint64_t End, uint64_t *OffsetPtr);
+ Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr);
void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
uint64_t &CurrentBase, DIDumpOptions DumpOpts,
llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
@@ -48,6 +48,7 @@ public:
/// Build a DWARFAddressRangesVector from a rangelist.
DWARFAddressRangesVector
getAbsoluteRanges(Optional<object::SectionedAddress> BaseAddr,
+ uint8_t AddressByteSize,
function_ref<Optional<object::SectionedAddress>(uint32_t)>
LookupPooledAddress) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
index 05a6056e8e21..0f76d7f1b31c 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -262,6 +262,7 @@ public:
/// for this subprogram by resolving DW_AT_sepcification or
/// DW_AT_abstract_origin references if necessary.
uint64_t getDeclLine() const;
+ std::string getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const;
/// Retrieves values of DW_AT_call_file, DW_AT_call_line and DW_AT_call_column
/// from DIE (or zeroes if they are missing). This function looks for
@@ -381,11 +382,6 @@ inline bool operator==(const DWARFDie::iterator &LHS,
return LHS.Die == RHS.Die;
}
-inline bool operator!=(const DWARFDie::iterator &LHS,
- const DWARFDie::iterator &RHS) {
- return !(LHS == RHS);
-}
-
// These inline functions must follow the DWARFDie::iterator definition above
// as they use functions from that class.
inline DWARFDie::iterator DWARFDie::begin() const {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index edfa68d49a60..447ad66b9352 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -10,10 +10,11 @@
#define LLVM_DEBUGINFO_DWARFEXPRESSION_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DIContext.h"
#include "llvm/Support/DataExtractor.h"
namespace llvm {
@@ -93,8 +94,9 @@ public:
bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset,
Optional<dwarf::DwarfFormat> Format);
bool isError() { return Error; }
- bool print(raw_ostream &OS, const DWARFExpression *Expr,
- const MCRegisterInfo *RegInfo, DWARFUnit *U, bool isEH);
+ bool print(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const DWARFExpression *Expr, const MCRegisterInfo *RegInfo,
+ DWARFUnit *U, bool isEH);
bool verify(DWARFUnit *U);
};
@@ -143,7 +145,8 @@ public:
iterator begin() const { return iterator(this, 0); }
iterator end() const { return iterator(this, Data.getData().size()); }
- void print(raw_ostream &OS, const MCRegisterInfo *RegInfo, DWARFUnit *U,
+ void print(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const MCRegisterInfo *RegInfo, DWARFUnit *U,
bool IsEH = false) const;
/// Print the expression in a format intended to be compact and useful to a
@@ -164,10 +167,5 @@ inline bool operator==(const DWARFExpression::iterator &LHS,
const DWARFExpression::iterator &RHS) {
return LHS.Expr == RHS.Expr && LHS.Offset == RHS.Offset;
}
-
-inline bool operator!=(const DWARFExpression::iterator &LHS,
- const DWARFExpression::iterator &RHS) {
- return !(LHS == RHS);
-}
}
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 3f1be4e5a592..1342e645934c 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -82,6 +82,9 @@ public:
void dump(raw_ostream &OS, DIDumpOptions DumpOpts = DIDumpOptions()) const;
void dumpSectionedAddress(raw_ostream &OS, DIDumpOptions DumpOpts,
object::SectionedAddress SA) const;
+ void dumpAddress(raw_ostream &OS, uint64_t Address) const;
+ static void dumpAddress(raw_ostream &OS, uint8_t AddressSize,
+ uint64_t Address);
static void dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS,
DIDumpOptions DumpOpts, uint64_t SectionIndex);
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
index 496fdb2477f9..8f58b4e6458e 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
@@ -46,7 +46,7 @@ public:
const ListEntries &getEntries() const { return Entries; }
bool empty() const { return Entries.empty(); }
void clear() { Entries.clear(); }
- Error extract(DWARFDataExtractor Data, uint64_t HeaderOffset, uint64_t End,
+ Error extract(DWARFDataExtractor Data, uint64_t HeaderOffset,
uint64_t *OffsetPtr, StringRef SectionName,
StringRef ListStringName);
};
@@ -72,10 +72,6 @@ class DWARFListTableHeader {
};
Header HeaderData;
- /// The offset table, which contains offsets to the individual list entries.
- /// It is used by forms such as DW_FORM_rnglistx.
- /// FIXME: Generate the table and use the appropriate forms.
- std::vector<uint64_t> Offsets;
/// The table's format, either DWARF32 or DWARF64.
dwarf::DwarfFormat Format;
/// The offset at which the header (and hence the table) is located within
@@ -93,7 +89,6 @@ public:
void clear() {
HeaderData = {};
- Offsets.clear();
}
uint64_t getHeaderOffset() const { return HeaderOffset; }
uint8_t getAddrSize() const { return HeaderData.AddrSize; }
@@ -115,11 +110,23 @@ public:
llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64");
}
- void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const;
- Optional<uint64_t> getOffsetEntry(uint32_t Index) const {
- if (Index < Offsets.size())
- return Offsets[Index];
- return None;
+ void dump(DataExtractor Data, raw_ostream &OS,
+ DIDumpOptions DumpOpts = {}) const;
+ Optional<uint64_t> getOffsetEntry(DataExtractor Data, uint32_t Index) const {
+ if (Index > HeaderData.OffsetEntryCount)
+ return None;
+
+ return getOffsetEntry(Data, getHeaderOffset() + getHeaderSize(Format), Format, Index);
+ }
+
+ static Optional<uint64_t> getOffsetEntry(DataExtractor Data,
+ uint64_t OffsetTableOffset,
+ dwarf::DwarfFormat Format,
+ uint32_t Index) {
+ uint8_t OffsetByteSize = Format == dwarf::DWARF64 ? 8 : 4;
+ uint64_t Offset = OffsetTableOffset + OffsetByteSize * Index;
+ auto R = Data.getUnsigned(&Offset, OffsetByteSize);
+ return R;
}
/// Extract the table header and the array of offsets.
@@ -169,14 +176,14 @@ public:
uint8_t getAddrSize() const { return Header.getAddrSize(); }
dwarf::DwarfFormat getFormat() const { return Header.getFormat(); }
- void dump(raw_ostream &OS,
+ void dump(DWARFDataExtractor Data, raw_ostream &OS,
llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
LookupPooledAddress,
DIDumpOptions DumpOpts = {}) const;
/// Return the contents of the offset entry designated by a given index.
- Optional<uint64_t> getOffsetEntry(uint32_t Index) const {
- return Header.getOffsetEntry(Index);
+ Optional<uint64_t> getOffsetEntry(DataExtractor Data, uint32_t Index) const {
+ return Header.getOffsetEntry(Data, Index);
}
/// Return the size of the table header including the length but not including
/// the offsets. This is dependent on the table format, which is unambiguously
@@ -196,18 +203,18 @@ Error DWARFListTableBase<DWARFListType>::extract(DWARFDataExtractor Data,
return E;
Data.setAddressSize(Header.getAddrSize());
- uint64_t End = getHeaderOffset() + Header.length();
- while (*OffsetPtr < End) {
+ Data = DWARFDataExtractor(Data, getHeaderOffset() + Header.length());
+ while (Data.isValidOffset(*OffsetPtr)) {
DWARFListType CurrentList;
uint64_t Off = *OffsetPtr;
- if (Error E = CurrentList.extract(Data, getHeaderOffset(), End, OffsetPtr,
+ if (Error E = CurrentList.extract(Data, getHeaderOffset(), OffsetPtr,
Header.getSectionName(),
Header.getListTypeString()))
return E;
ListMap[Off] = CurrentList;
}
- assert(*OffsetPtr == End &&
+ assert(*OffsetPtr == Data.size() &&
"mismatch between expected length of table and length "
"of extracted data");
return Error::success();
@@ -215,18 +222,18 @@ Error DWARFListTableBase<DWARFListType>::extract(DWARFDataExtractor Data,
template <typename ListEntryType>
Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
- uint64_t HeaderOffset, uint64_t End,
+ uint64_t HeaderOffset,
uint64_t *OffsetPtr,
StringRef SectionName,
StringRef ListTypeString) {
- if (*OffsetPtr < HeaderOffset || *OffsetPtr >= End)
+ if (*OffsetPtr < HeaderOffset || *OffsetPtr >= Data.size())
return createStringError(errc::invalid_argument,
"invalid %s list offset 0x%" PRIx64,
ListTypeString.data(), *OffsetPtr);
Entries.clear();
- while (*OffsetPtr < End) {
+ while (Data.isValidOffset(*OffsetPtr)) {
ListEntryType Entry;
- if (Error E = Entry.extract(Data, End, OffsetPtr))
+ if (Error E = Entry.extract(Data, OffsetPtr))
return E;
Entries.push_back(Entry);
if (Entry.isSentinel())
@@ -240,11 +247,11 @@ Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
template <typename DWARFListType>
void DWARFListTableBase<DWARFListType>::dump(
- raw_ostream &OS,
+ DWARFDataExtractor Data, raw_ostream &OS,
llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
LookupPooledAddress,
DIDumpOptions DumpOpts) const {
- Header.dump(OS, DumpOpts);
+ Header.dump(Data, OS, DumpOpts);
OS << HeaderString << "\n";
// Determine the length of the longest encoding string we have in the table,
@@ -269,19 +276,14 @@ template <typename DWARFListType>
Expected<DWARFListType>
DWARFListTableBase<DWARFListType>::findList(DWARFDataExtractor Data,
uint64_t Offset) {
- auto Entry = ListMap.find(Offset);
- if (Entry != ListMap.end())
- return Entry->second;
-
// Extract the list from the section and enter it into the list map.
DWARFListType List;
- uint64_t End = getHeaderOffset() + Header.length();
- uint64_t StartingOffset = Offset;
+ if (Header.length())
+ Data = DWARFDataExtractor(Data, getHeaderOffset() + Header.length());
if (Error E =
- List.extract(Data, getHeaderOffset(), End, &Offset,
+ List.extract(Data, Header.length() ? getHeaderOffset() : 0, &Offset,
Header.getSectionName(), Header.getListTypeString()))
return std::move(E);
- ListMap[StartingOffset] = List;
return List;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 5b3b46626059..369cbdc28c2e 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -113,6 +113,8 @@ public:
const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
DWARFSectionKind Kind);
+bool isCompileUnit(const std::unique_ptr<DWARFUnit> &U);
+
/// Describe a collection of units. Intended to hold all units either from
/// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo.
class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> {
@@ -127,6 +129,9 @@ public:
using iterator = typename UnitVector::iterator;
using iterator_range = llvm::iterator_range<typename UnitVector::iterator>;
+ using compile_unit_range =
+ decltype(make_filter_range(std::declval<iterator_range>(), isCompileUnit));
+
DWARFUnit *getUnitForOffset(uint64_t Offset) const;
DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
@@ -204,7 +209,6 @@ class DWARFUnit {
const DWARFDebugAbbrev *Abbrev;
const DWARFSection *RangeSection;
uint64_t RangeSectionBase;
- const DWARFSection *LocSection;
uint64_t LocSectionBase;
/// Location table of this unit.
@@ -223,10 +227,6 @@ class DWARFUnit {
/// offsets table (DWARF v5).
Optional<StrOffsetsContributionDescriptor> StringOffsetsTableContribution;
- /// A table of range lists (DWARF v5 and later).
- Optional<DWARFDebugRnglistTable> RngListTable;
- Optional<DWARFListTableHeader> LoclistTableHeader;
-
mutable const DWARFAbbreviationDeclarationSet *Abbrevs;
llvm::Optional<object::SectionedAddress> BaseAddr;
/// The compile unit debug information entry items.
@@ -294,6 +294,7 @@ public:
dwarf::DwarfFormat getFormat() const { return Header.getFormat(); }
uint8_t getUnitType() const { return Header.getUnitType(); }
bool isTypeUnit() const { return Header.isTypeUnit(); }
+ uint64_t getAbbrOffset() const { return Header.getAbbrOffset(); }
uint64_t getNextUnitOffset() const { return Header.getNextUnitOffset(); }
const DWARFSection &getLineSection() const { return LineSection; }
StringRef getStringSection() const { return StringSection; }
@@ -313,10 +314,6 @@ public:
RangeSection = RS;
RangeSectionBase = Base;
}
- void setLocSection(const DWARFSection *LS, uint64_t Base) {
- LocSection = LS;
- LocSectionBase = Base;
- }
uint64_t getLocSectionBase() const {
return LocSectionBase;
@@ -411,21 +408,10 @@ public:
/// Return a rangelist's offset based on an index. The index designates
/// an entry in the rangelist table's offset array and is supplied by
/// DW_FORM_rnglistx.
- Optional<uint64_t> getRnglistOffset(uint32_t Index) {
- if (!RngListTable)
- return None;
- if (Optional<uint64_t> Off = RngListTable->getOffsetEntry(Index))
- return *Off + RangeSectionBase;
- return None;
- }
+ Optional<uint64_t> getRnglistOffset(uint32_t Index);
+
+ Optional<uint64_t> getLoclistOffset(uint32_t Index);
- Optional<uint64_t> getLoclistOffset(uint32_t Index) {
- if (!LoclistTableHeader)
- return None;
- if (Optional<uint64_t> Off = LoclistTableHeader->getOffsetEntry(Index))
- return *Off + getLocSectionBase();
- return None;
- }
Expected<DWARFAddressRangesVector> collectAddressRanges();
Expected<DWARFLocationExpressionsVector>
@@ -480,7 +466,6 @@ public:
/// The unit needs to have its DIEs extracted for this method to work.
DWARFDie getDIEForOffset(uint64_t Offset) {
extractDIEsIfNeeded(false);
- assert(!DieArray.empty());
auto It =
llvm::partition_point(DieArray, [=](const DWARFDebugInfoEntry &DIE) {
return DIE.getOffset() < Offset;
@@ -529,6 +514,10 @@ private:
bool parseDWO();
};
+inline bool isCompileUnit(const std::unique_ptr<DWARFUnit> &U) {
+ return !U->isTypeUnit();
+}
+
} // end namespace llvm
#endif // LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index 22b1d722fc89..18d889f5cadb 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -12,25 +12,22 @@
#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
-#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
-
#include <cstdint>
#include <map>
#include <set>
namespace llvm {
class raw_ostream;
+struct DWARFAddressRange;
struct DWARFAttribute;
class DWARFContext;
-class DWARFDie;
-class DWARFUnit;
-class DWARFCompileUnit;
class DWARFDataExtractor;
class DWARFDebugAbbrev;
class DataExtractor;
struct DWARFSection;
+class DWARFUnit;
/// A class that verifies DWARF debug information given a DWARF Context.
class DWARFVerifier {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
index 593d781b990e..473c89e8106f 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
@@ -24,8 +24,6 @@
namespace llvm {
namespace msf {
-struct MSFLayout;
-
/// MappedBlockStream represents data stored in an MSF file into chunks of a
/// particular size (called the Block Size), and whose chunks may not be
/// necessarily contiguous. The arrangement of these chunks MSF the file
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
index beaaef0c5a6c..82b63d729454 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -34,6 +34,34 @@ struct MSFLayout;
}
namespace pdb {
+// Represents merged or unmerged symbols. Merged symbols can be written to the
+// output file as is, but unmerged symbols must be rewritten first. In either
+// case, the size must be known up front.
+struct SymbolListWrapper {
+ explicit SymbolListWrapper(ArrayRef<uint8_t> Syms)
+ : SymPtr(const_cast<uint8_t *>(Syms.data())), SymSize(Syms.size()),
+ NeedsToBeMerged(false) {}
+ explicit SymbolListWrapper(void *SymSrc, uint32_t Length)
+ : SymPtr(SymSrc), SymSize(Length), NeedsToBeMerged(true) {}
+
+ ArrayRef<uint8_t> asArray() const {
+ return ArrayRef<uint8_t>(static_cast<const uint8_t *>(SymPtr), SymSize);
+ }
+
+ uint32_t size() const { return SymSize; }
+
+ void *SymPtr = nullptr;
+ uint32_t SymSize = 0;
+ bool NeedsToBeMerged = false;
+};
+
+/// Represents a string table reference at some offset in the module symbol
+/// stream.
+struct StringTableFixup {
+ uint32_t StrTabOffset = 0;
+ uint32_t SymOffsetOfReference = 0;
+};
+
class DbiModuleDescriptorBuilder {
friend class DbiStreamBuilder;
@@ -48,10 +76,28 @@ public:
void setPdbFilePathNI(uint32_t NI);
void setObjFileName(StringRef Name);
+
+ // Callback to merge one source of unmerged symbols.
+ using MergeSymbolsCallback = Error (*)(void *Ctx, void *Symbols,
+ BinaryStreamWriter &Writer);
+
+ void setMergeSymbolsCallback(void *Ctx, MergeSymbolsCallback Callback) {
+ MergeSymsCtx = Ctx;
+ MergeSymsCallback = Callback;
+ }
+
+ void setStringTableFixups(std::vector<StringTableFixup> &&Fixups) {
+ StringTableFixups = std::move(Fixups);
+ }
+
void setFirstSectionContrib(const SectionContrib &SC);
void addSymbol(codeview::CVSymbol Symbol);
void addSymbolsInBulk(ArrayRef<uint8_t> BulkSymbols);
+ // Add symbols of known size which will be merged (rewritten) when committing
+ // the PDB to disk.
+ void addUnmergedSymbols(void *SymSrc, uint32_t SymLength);
+
void
addDebugSubsection(std::shared_ptr<codeview::DebugSubsection> Subsection);
@@ -77,8 +123,14 @@ public:
void finalize();
Error finalizeMsfLayout();
- Error commit(BinaryStreamWriter &ModiWriter, const msf::MSFLayout &MsfLayout,
- WritableBinaryStreamRef MsfBuffer);
+ /// Commit the DBI descriptor to the DBI stream.
+ Error commit(BinaryStreamWriter &ModiWriter);
+
+ /// Commit the accumulated symbols to the module symbol stream. Safe to call
+ /// in parallel on different DbiModuleDescriptorBuilder objects. Only modifies
+ /// the pre-allocated stream in question.
+ Error commitSymbolStream(const msf::MSFLayout &MsfLayout,
+ WritableBinaryStreamRef MsfBuffer);
private:
uint32_t calculateC13DebugInfoSize() const;
@@ -91,7 +143,12 @@ private:
std::string ModuleName;
std::string ObjFileName;
std::vector<std::string> SourceFiles;
- std::vector<ArrayRef<uint8_t>> Symbols;
+ std::vector<SymbolListWrapper> Symbols;
+
+ void *MergeSymsCtx = nullptr;
+ MergeSymbolsCallback MergeSymsCallback = nullptr;
+
+ std::vector<StringTableFixup> StringTableFixups;
std::vector<codeview::DebugSubsectionRecordBuilder> C13Builders;
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h
new file mode 100644
index 000000000000..480b3fb11419
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h
@@ -0,0 +1,41 @@
+//==- NativeEnumSymbols.h - Native Symbols Enumerator impl -------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOLS_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOLS_H
+
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+
+#include <vector>
+
+namespace llvm {
+namespace pdb {
+
+class NativeSession;
+
+class NativeEnumSymbols : public IPDBEnumChildren<PDBSymbol> {
+public:
+ NativeEnumSymbols(NativeSession &Session, std::vector<SymIndexId> Symbols);
+
+ uint32_t getChildCount() const override;
+ std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
+ std::unique_ptr<PDBSymbol> getNext() override;
+ void reset() override;
+
+private:
+ std::vector<SymIndexId> Symbols;
+ uint32_t Index;
+ NativeSession &Session;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h
index 4adf89f0d69a..b219055d2153 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h
@@ -20,7 +20,7 @@ namespace pdb {
class NativeFunctionSymbol : public NativeRawSymbol {
public:
NativeFunctionSymbol(NativeSession &Session, SymIndexId Id,
- const codeview::ProcSym &Sym);
+ const codeview::ProcSym &Sym, uint32_t RecordOffset);
~NativeFunctionSymbol() override;
@@ -30,13 +30,15 @@ public:
uint32_t getAddressOffset() const override;
uint32_t getAddressSection() const override;
std::string getName() const override;
- PDB_SymType getSymTag() const override;
uint64_t getLength() const override;
uint32_t getRelativeVirtualAddress() const override;
uint64_t getVirtualAddress() const override;
+ std::unique_ptr<IPDBEnumSymbols>
+ findInlineFramesByVA(uint64_t VA) const override;
protected:
const codeview::ProcSym Sym;
+ uint32_t RecordOffset = 0;
};
} // namespace pdb
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h
new file mode 100644
index 000000000000..2f6aba038ae8
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h
@@ -0,0 +1,46 @@
+//===- NativeInlineSiteSymbol.h - info about inline sites -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeInlineSiteSymbol : public NativeRawSymbol {
+public:
+ NativeInlineSiteSymbol(NativeSession &Session, SymIndexId Id,
+ const codeview::InlineSiteSym &Sym,
+ uint64_t ParentAddr);
+
+ ~NativeInlineSiteSymbol() override;
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
+
+ std::string getName() const override;
+ std::unique_ptr<IPDBEnumLineNumbers>
+ findInlineeLinesByVA(uint64_t VA, uint32_t Length) const override;
+
+private:
+ const codeview::InlineSiteSym Sym;
+ uint64_t ParentAddr;
+
+ void getLineOffset(uint32_t OffsetInFunc, uint32_t &LineOffset,
+ uint32_t &FileOffset) const;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
index a7ce82c70b08..5dedc70f11ba 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
@@ -22,7 +22,7 @@ public:
const codeview::LineInfo Line,
uint32_t ColumnNumber, uint32_t Length,
uint32_t Section, uint32_t Offset,
- uint32_t SrcFileId);
+ uint32_t SrcFileId, uint32_t CompilandId);
uint32_t getLineNumber() const override;
uint32_t getLineNumberEnd() const override;
@@ -45,6 +45,7 @@ private:
uint32_t Offset;
uint32_t Length;
uint32_t SrcFileId;
+ uint32_t CompilandId;
};
} // namespace pdb
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h
index 0a1451530f18..9f410e27f4cb 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h
@@ -30,7 +30,6 @@ public:
uint32_t getAddressOffset() const override;
uint32_t getAddressSection() const override;
std::string getName() const override;
- PDB_SymType getSymTag() const override;
uint32_t getRelativeVirtualAddress() const override;
uint64_t getVirtualAddress() const override;
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
index 342e63599e66..5f8fc587e546 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
@@ -110,9 +110,14 @@ public:
const SymbolCache &getSymbolCache() const { return Cache; }
uint32_t getRVAFromSectOffset(uint32_t Section, uint32_t Offset) const;
uint64_t getVAFromSectOffset(uint32_t Section, uint32_t Offset) const;
+ bool moduleIndexForVA(uint64_t VA, uint16_t &ModuleIndex) const;
+ bool moduleIndexForSectOffset(uint32_t Sect, uint32_t Offset,
+ uint16_t &ModuleIndex) const;
+ Expected<ModuleDebugStreamRef> getModuleDebugStream(uint32_t Index) const;
private:
void initializeExeSymbol();
+ void parseSectionContribs();
std::unique_ptr<PDBFile> Pdb;
std::unique_ptr<BumpPtrAllocator> Allocator;
@@ -120,6 +125,12 @@ private:
SymbolCache Cache;
SymIndexId ExeSymbol = 0;
uint64_t LoadAddress = 0;
+
+ /// Map from virtual address to module index.
+ using IMap =
+ IntervalMap<uint64_t, uint16_t, 8, IntervalMapHalfOpenInfo<uint64_t>>;
+ IMap::Allocator IMapAllocator;
+ IMap AddrToModuleIndex;
};
} // namespace pdb
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
index 90fd19a7a2fb..1ff6ca173b2b 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
@@ -37,40 +37,40 @@ class SymbolCache {
/// an Id. Id allocation is an implementation, with the only guarantee
/// being that once an Id is allocated, the symbol can be assumed to be
/// cached.
- std::vector<std::unique_ptr<NativeRawSymbol>> Cache;
+ mutable std::vector<std::unique_ptr<NativeRawSymbol>> Cache;
/// For type records from the TPI stream which have been paresd and cached,
/// stores a mapping to SymIndexId of the cached symbol.
- DenseMap<codeview::TypeIndex, SymIndexId> TypeIndexToSymbolId;
+ mutable DenseMap<codeview::TypeIndex, SymIndexId> TypeIndexToSymbolId;
/// For field list members which have been parsed and cached, stores a mapping
/// from (IndexOfClass, MemberIndex) to the corresponding SymIndexId of the
/// cached symbol.
- DenseMap<std::pair<codeview::TypeIndex, uint32_t>, SymIndexId>
+ mutable DenseMap<std::pair<codeview::TypeIndex, uint32_t>, SymIndexId>
FieldListMembersToSymbolId;
/// List of SymIndexIds for each compiland, indexed by compiland index as they
/// appear in the PDB file.
- std::vector<SymIndexId> Compilands;
+ mutable std::vector<SymIndexId> Compilands;
/// List of source files, indexed by unique source file index.
mutable std::vector<std::unique_ptr<NativeSourceFile>> SourceFiles;
+
+ /// Map from string table offset to source file Id.
mutable DenseMap<uint32_t, SymIndexId> FileNameOffsetToId;
/// Map from global symbol offset to SymIndexId.
- DenseMap<uint32_t, SymIndexId> GlobalOffsetToSymbolId;
-
- /// Map from segment and code offset to SymIndexId.
- DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> AddressToFunctionSymId;
- DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> AddressToPublicSymId;
+ mutable DenseMap<uint32_t, SymIndexId> GlobalOffsetToSymbolId;
- /// Map from virtual address to module index.
- using IMap =
- IntervalMap<uint64_t, uint16_t, 8, IntervalMapHalfOpenInfo<uint64_t>>;
- IMap::Allocator IMapAllocator;
- IMap AddrToModuleIndex;
+ /// Map from segment and code offset to function symbols.
+ mutable DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> AddressToSymbolId;
+ /// Map from segment and code offset to public symbols.
+ mutable DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId>
+ AddressToPublicSymId;
- Expected<ModuleDebugStreamRef> getModuleDebugStream(uint32_t Index) const;
+ /// Map from module index and symbol table offset to SymIndexId.
+ mutable DenseMap<std::pair<uint16_t, uint32_t>, SymIndexId>
+ SymTabOffsetToSymbolId;
struct LineTableEntry {
uint64_t Addr;
@@ -83,7 +83,7 @@ class SymbolCache {
std::vector<LineTableEntry> findLineTable(uint16_t Modi) const;
mutable DenseMap<uint16_t, std::vector<LineTableEntry>> LineTable;
- SymIndexId createSymbolPlaceholder() {
+ SymIndexId createSymbolPlaceholder() const {
SymIndexId Id = Cache.size();
Cache.push_back(nullptr);
return Id;
@@ -91,7 +91,7 @@ class SymbolCache {
template <typename ConcreteSymbolT, typename CVRecordT, typename... Args>
SymIndexId createSymbolForType(codeview::TypeIndex TI, codeview::CVType CVT,
- Args &&... ConstructorArgs) {
+ Args &&...ConstructorArgs) const {
CVRecordT Record;
if (auto EC =
codeview::TypeDeserializer::deserializeAs<CVRecordT>(CVT, Record)) {
@@ -104,10 +104,10 @@ class SymbolCache {
}
SymIndexId createSymbolForModifiedType(codeview::TypeIndex ModifierTI,
- codeview::CVType CVT);
+ codeview::CVType CVT) const;
SymIndexId createSimpleType(codeview::TypeIndex TI,
- codeview::ModifierOptions Mods);
+ codeview::ModifierOptions Mods) const;
std::unique_ptr<PDBSymbol> findFunctionSymbolBySectOffset(uint32_t Sect,
uint32_t Offset);
@@ -118,7 +118,7 @@ public:
SymbolCache(NativeSession &Session, DbiStream *Dbi);
template <typename ConcreteSymbolT, typename... Args>
- SymIndexId createSymbol(Args &&... ConstructorArgs) {
+ SymIndexId createSymbol(Args &&...ConstructorArgs) const {
SymIndexId Id = Cache.size();
// Initial construction must not access the cache, since it must be done
@@ -145,7 +145,7 @@ public:
std::unique_ptr<IPDBEnumSymbols>
createGlobalsEnumerator(codeview::SymbolKind Kind);
- SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI);
+ SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI) const;
template <typename ConcreteSymbolT, typename... Args>
SymIndexId getOrCreateFieldListMember(codeview::TypeIndex FieldListTI,
@@ -163,6 +163,9 @@ public:
}
SymIndexId getOrCreateGlobalSymbolByOffset(uint32_t Offset);
+ SymIndexId getOrCreateInlineSymbol(codeview::InlineSiteSym Sym,
+ uint64_t ParentAddr, uint16_t Modi,
+ uint32_t RecordOffset) const;
std::unique_ptr<PDBSymbol>
findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, PDB_SymType Type);
@@ -185,9 +188,6 @@ public:
std::unique_ptr<IPDBSourceFile> getSourceFileById(SymIndexId FileId) const;
SymIndexId
getOrCreateSourceFile(const codeview::FileChecksumEntry &Checksum) const;
-
- void parseSectionContribs();
- Optional<uint16_t> getModuleIndexForAddr(uint64_t Addr) const;
};
} // namespace pdb
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
index 1b7fd2d54cb2..70288868ca21 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
@@ -9,7 +9,7 @@
#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H
#define LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
index 72d98e9c2c4d..9ef2ee6a9307 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
@@ -54,16 +54,20 @@ public:
void setVersionHeader(PdbRaw_TpiVer Version);
void addTypeRecord(ArrayRef<uint8_t> Type, Optional<uint32_t> Hash);
+ void addTypeRecords(ArrayRef<uint8_t> Types, ArrayRef<uint16_t> Sizes,
+ ArrayRef<uint32_t> Hashes);
Error finalizeMsfLayout();
- uint32_t getRecordCount() const { return TypeRecords.size(); }
+ uint32_t getRecordCount() const { return TypeRecordCount; }
Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer);
uint32_t calculateSerializedLength();
private:
+ void updateTypeIndexOffsets(ArrayRef<uint16_t> Sizes);
+
uint32_t calculateHashBufferSize() const;
uint32_t calculateIndexOffsetSize() const;
Error finalize();
@@ -71,10 +75,11 @@ private:
msf::MSFBuilder &Msf;
BumpPtrAllocator &Allocator;
+ uint32_t TypeRecordCount = 0;
size_t TypeRecordBytes = 0;
PdbRaw_TpiVer VerHeader = PdbRaw_TpiVer::PdbTpiV80;
- std::vector<ArrayRef<uint8_t>> TypeRecords;
+ std::vector<ArrayRef<uint8_t>> TypeRecBuffers;
std::vector<uint32_t> TypeHashes;
std::vector<codeview::TypeIndexOffset> TypeIndexOffsets;
uint32_t HashStreamIndex = kInvalidStreamIndex;
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
index 45aba013e7c8..802d18a069ee 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -9,16 +9,15 @@
#ifndef LLVM_DEBUGINFO_PDB_PDBEXTRAS_H
#define LLVM_DEBUGINFO_PDB_PDBEXTRAS_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/raw_ostream.h"
-
+#include <cstdint>
#include <unordered_map>
namespace llvm {
-class raw_ostream;
-
namespace pdb {
using TagStats = std::unordered_map<PDB_SymType, int>;
@@ -51,7 +50,6 @@ void dumpSymbolField(raw_ostream &OS, StringRef Name, T Value, int Indent) {
OS << Name << ": " << Value;
}
-
} // end namespace pdb
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
index 2982146f960c..24cf1e459f92 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
@@ -42,7 +42,6 @@ class StringRef;
class raw_ostream;
namespace pdb {
-class IPDBRawSymbol;
class IPDBSession;
#define DECLARE_PDB_SYMBOL_CONCRETE_TYPE(TagValue) \
@@ -141,7 +140,14 @@ public:
StringRef Name,
PDB_NameSearchFlags Flags,
uint32_t RVA) const;
+ std::unique_ptr<IPDBEnumSymbols> findInlineFramesByVA(uint64_t VA) const;
std::unique_ptr<IPDBEnumSymbols> findInlineFramesByRVA(uint32_t RVA) const;
+ std::unique_ptr<IPDBEnumLineNumbers>
+ findInlineeLinesByVA(uint64_t VA, uint32_t Length) const;
+ std::unique_ptr<IPDBEnumLineNumbers>
+ findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const;
+
+ std::string getName() const;
const IPDBRawSymbol &getRawSymbol() const { return *RawSymbol; }
IPDBRawSymbol &getRawSymbol() { return *RawSymbol; }
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 085e4bb4ccb8..1c8fa11660af 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -43,7 +43,7 @@ public:
bool Demangle = true;
bool RelativeAddresses = false;
bool UntagAddresses = false;
- bool UseNativePDBReader = false;
+ bool UseDIA = false;
std::string DefaultArch;
std::vector<std::string> DsymHints;
std::string FallbackDebugPath;
diff --git a/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h b/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 6ab873218386..e5fca98f9271 100644
--- a/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -82,6 +82,7 @@
X(PostfixExpr) \
X(ConditionalExpr) \
X(MemberExpr) \
+ X(SubobjectExpr) \
X(EnclosingExpr) \
X(CastExpr) \
X(SizeofParamPackExpr) \
@@ -91,10 +92,10 @@
X(PrefixExpr) \
X(FunctionParam) \
X(ConversionExpr) \
+ X(PointerToMemberConversionExpr) \
X(InitListExpr) \
X(FoldExpr) \
X(ThrowExpr) \
- X(UUIDOfExpr) \
X(BoolExpr) \
X(StringLiteral) \
X(LambdaExpr) \
@@ -1656,6 +1657,40 @@ public:
}
};
+class SubobjectExpr : public Node {
+ const Node *Type;
+ const Node *SubExpr;
+ StringView Offset;
+ NodeArray UnionSelectors;
+ bool OnePastTheEnd;
+
+public:
+ SubobjectExpr(const Node *Type_, const Node *SubExpr_, StringView Offset_,
+ NodeArray UnionSelectors_, bool OnePastTheEnd_)
+ : Node(KSubobjectExpr), Type(Type_), SubExpr(SubExpr_), Offset(Offset_),
+ UnionSelectors(UnionSelectors_), OnePastTheEnd(OnePastTheEnd_) {}
+
+ template<typename Fn> void match(Fn F) const {
+ F(Type, SubExpr, Offset, UnionSelectors, OnePastTheEnd);
+ }
+
+ void printLeft(OutputStream &S) const override {
+ SubExpr->print(S);
+ S += ".<";
+ Type->print(S);
+ S += " at offset ";
+ if (Offset.empty()) {
+ S += "0";
+ } else if (Offset[0] == 'n') {
+ S += "-";
+ S += Offset.dropFront();
+ } else {
+ S += Offset;
+ }
+ S += ">";
+ }
+};
+
class EnclosingExpr : public Node {
const StringView Prefix;
const Node *Infix;
@@ -1843,6 +1878,28 @@ public:
}
};
+class PointerToMemberConversionExpr : public Node {
+ const Node *Type;
+ const Node *SubExpr;
+ StringView Offset;
+
+public:
+ PointerToMemberConversionExpr(const Node *Type_, const Node *SubExpr_,
+ StringView Offset_)
+ : Node(KPointerToMemberConversionExpr), Type(Type_), SubExpr(SubExpr_),
+ Offset(Offset_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Type, SubExpr, Offset); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "(";
+ Type->print(S);
+ S += ")(";
+ SubExpr->print(S);
+ S += ")";
+ }
+};
+
class InitListExpr : public Node {
const Node *Ty;
NodeArray Inits;
@@ -1977,21 +2034,6 @@ public:
}
};
-// MSVC __uuidof extension, generated by clang in -fms-extensions mode.
-class UUIDOfExpr : public Node {
- Node *Operand;
-public:
- UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {}
-
- template<typename Fn> void match(Fn F) const { F(Operand); }
-
- void printLeft(OutputStream &S) const override {
- S << "__uuidof(";
- Operand->print(S);
- S << ")";
- }
-};
-
class BoolExpr : public Node {
bool Value;
@@ -2313,9 +2355,9 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
TemplateParamList Params;
public:
- ScopedTemplateParamList(AbstractManglingParser *Parser)
- : Parser(Parser),
- OldNumTemplateParamLists(Parser->TemplateParams.size()) {
+ ScopedTemplateParamList(AbstractManglingParser *TheParser)
+ : Parser(TheParser),
+ OldNumTemplateParamLists(TheParser->TemplateParams.size()) {
Parser->TemplateParams.push_back(&Params);
}
~ScopedTemplateParamList() {
@@ -2437,6 +2479,8 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
Node *parseConversionExpr();
Node *parseBracedExpr();
Node *parseFoldExpr();
+ Node *parsePointerToMemberConversionExpr();
+ Node *parseSubobjectExpr();
/// Parse the <type> production.
Node *parseType();
@@ -4404,6 +4448,50 @@ Node *AbstractManglingParser<Derived, Alloc>::parseFoldExpr() {
return make<FoldExpr>(IsLeftFold, OperatorName, Pack, Init);
}
+// <expression> ::= mc <parameter type> <expr> [<offset number>] E
+//
+// Not yet in the spec: https://github.com/itanium-cxx-abi/cxx-abi/issues/47
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parsePointerToMemberConversionExpr() {
+ Node *Ty = getDerived().parseType();
+ if (!Ty)
+ return nullptr;
+ Node *Expr = getDerived().parseExpr();
+ if (!Expr)
+ return nullptr;
+ StringView Offset = getDerived().parseNumber(true);
+ if (!consumeIf('E'))
+ return nullptr;
+ return make<PointerToMemberConversionExpr>(Ty, Expr, Offset);
+}
+
+// <expression> ::= so <referent type> <expr> [<offset number>] <union-selector>* [p] E
+// <union-selector> ::= _ [<number>]
+//
+// Not yet in the spec: https://github.com/itanium-cxx-abi/cxx-abi/issues/47
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseSubobjectExpr() {
+ Node *Ty = getDerived().parseType();
+ if (!Ty)
+ return nullptr;
+ Node *Expr = getDerived().parseExpr();
+ if (!Expr)
+ return nullptr;
+ StringView Offset = getDerived().parseNumber(true);
+ size_t SelectorsBegin = Names.size();
+ while (consumeIf('_')) {
+ Node *Selector = make<NameType>(parseNumber());
+ if (!Selector)
+ return nullptr;
+ Names.push_back(Selector);
+ }
+ bool OnePastTheEnd = consumeIf('p');
+ if (!consumeIf('E'))
+ return nullptr;
+ return make<SubobjectExpr>(
+ Ty, Expr, Offset, popTrailingNodeArray(SelectorsBegin), OnePastTheEnd);
+}
+
// <expression> ::= <unary operator-name> <expression>
// ::= <binary operator-name> <expression> <expression>
// ::= <ternary operator-name> <expression> <expression> <expression>
@@ -4661,6 +4749,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
return nullptr;
case 'm':
switch (First[1]) {
+ case 'c':
+ First += 2;
+ return parsePointerToMemberConversionExpr();
case 'i':
First += 2;
return getDerived().parseBinaryExpr("-");
@@ -4808,6 +4899,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
return Ex;
return make<CastExpr>("static_cast", T, Ex);
}
+ case 'o':
+ First += 2;
+ return parseSubobjectExpr();
case 'p': {
First += 2;
Node *Child = getDerived().parseExpr();
@@ -4903,6 +4997,43 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
}
}
return nullptr;
+ case 'u': {
+ ++First;
+ Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr);
+ if (!Name)
+ return nullptr;
+ // Special case legacy __uuidof mangling. The 't' and 'z' appear where the
+ // standard encoding expects a <template-arg>, and would be otherwise be
+ // interpreted as <type> node 'short' or 'ellipsis'. However, neither
+ // __uuidof(short) nor __uuidof(...) can actually appear, so there is no
+ // actual conflict here.
+ if (Name->getBaseName() == "__uuidof") {
+ if (numLeft() < 2)
+ return nullptr;
+ if (*First == 't') {
+ ++First;
+ Node *Ty = getDerived().parseType();
+ if (!Ty)
+ return nullptr;
+ return make<CallExpr>(Name, makeNodeArray(&Ty, &Ty + 1));
+ }
+ if (*First == 'z') {
+ ++First;
+ Node *Ex = getDerived().parseExpr();
+ if (!Ex)
+ return nullptr;
+ return make<CallExpr>(Name, makeNodeArray(&Ex, &Ex + 1));
+ }
+ }
+ size_t ExprsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseTemplateArg();
+ if (E == nullptr)
+ return E;
+ Names.push_back(E);
+ }
+ return make<CallExpr>(Name, popTrailingNodeArray(ExprsBegin));
+ }
case '1':
case '2':
case '3':
@@ -4914,21 +5045,6 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
case '9':
return getDerived().parseUnresolvedName();
}
-
- if (consumeIf("u8__uuidoft")) {
- Node *Ty = getDerived().parseType();
- if (!Ty)
- return nullptr;
- return make<UUIDOfExpr>(Ty);
- }
-
- if (consumeIf("u8__uuidofz")) {
- Node *Ex = getDerived().parseExpr();
- if (!Ex)
- return nullptr;
- return make<UUIDOfExpr>(Ex);
- }
-
return nullptr;
}
@@ -4975,6 +5091,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSpecialName() {
switch (look()) {
case 'T':
switch (look(1)) {
+ // TA <template-arg> # template parameter object
+ //
+ // Not yet in the spec: https://github.com/itanium-cxx-abi/cxx-abi/issues/63
+ case 'A': {
+ First += 2;
+ Node *Arg = getDerived().parseTemplateArg();
+ if (Arg == nullptr)
+ return nullptr;
+ return make<SpecialName>("template parameter object for ", Arg);
+ }
// TV <type> # virtual table
case 'V': {
First += 2;
@@ -5103,7 +5229,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseEncoding() {
decltype(TemplateParams) OldParams;
public:
- SaveTemplateParams(AbstractManglingParser *Parser) : Parser(Parser) {
+ SaveTemplateParams(AbstractManglingParser *TheParser) : Parser(TheParser) {
OldParams = std::move(Parser->TemplateParams);
Parser->TemplateParams.clear();
}
@@ -5203,7 +5329,12 @@ struct FloatData<long double>
#else
static const size_t mangled_size = 20; // May need to be adjusted to 16 or 24 on other platforms
#endif
- static const size_t max_demangled_size = 40;
+ // `-0x1.ffffffffffffffffffffffffffffp+16383` + 'L' + '\0' == 42 bytes.
+ // 28 'f's * 4 bits == 112 bits, which is the number of mantissa bits.
+ // Negatives are one character longer than positives.
+ // `0x1.` and `p` are constant, and exponents `+16383` and `-16382` are the
+ // same length. 1 sign bit, 112 mantissa bits, and 15 exponent bits == 128.
+ static const size_t max_demangled_size = 42;
static constexpr const char *spec = "%LaL";
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h b/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h
index 04e1936ebbe7..846a5f0818e7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h
+++ b/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h
@@ -52,7 +52,7 @@ class OutputStream {
char *TempPtr = std::end(Temp);
while (N) {
- *--TempPtr = '0' + char(N % 10);
+ *--TempPtr = char('0' + N % 10);
N /= 10;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
index 2562da7cf60b..2e386518f0bf 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -142,11 +142,6 @@ protected:
std::shared_ptr<LegacyJITSymbolResolver> SR,
std::unique_ptr<TargetMachine> TM);
- static ExecutionEngine *(*OrcMCJITReplacementCtor)(
- std::string *ErrorStr, std::shared_ptr<MCJITMemoryManager> MM,
- std::shared_ptr<LegacyJITSymbolResolver> SR,
- std::unique_ptr<TargetMachine> TM);
-
static ExecutionEngine *(*InterpCtor)(std::unique_ptr<Module> M,
std::string *ErrorStr);
@@ -552,7 +547,6 @@ private:
std::string MCPU;
SmallVector<std::string, 4> MAttrs;
bool VerifyModules;
- bool UseOrcMCJITReplacement;
bool EmulatedTLS = true;
public:
@@ -648,17 +642,6 @@ public:
return *this;
}
- // Use OrcMCJITReplacement instead of MCJIT. Off by default.
- LLVM_ATTRIBUTE_DEPRECATED(
- inline void setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement),
- "ORCv1 utilities (including OrcMCJITReplacement) are deprecated. Please "
- "use ORCv2/LLJIT instead (see docs/ORCv2.rst)");
-
- void setUseOrcMCJITReplacement(ORCv1DeprecationAcknowledgement,
- bool UseOrcMCJITReplacement) {
- this->UseOrcMCJITReplacement = UseOrcMCJITReplacement;
- }
-
void setEmulatedTLS(bool EmulatedTLS) {
this->EmulatedTLS = EmulatedTLS;
}
@@ -679,10 +662,6 @@ public:
ExecutionEngine *create(TargetMachine *TM);
};
-void EngineBuilder::setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement) {
- this->UseOrcMCJITReplacement = UseOrcMCJITReplacement;
-}
-
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef)
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITEventListener.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITEventListener.h
index 606b6f7cc128..4eefd993de2b 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITEventListener.h
@@ -20,7 +20,6 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/CBindingWrapping.h"
#include <cstdint>
-#include <vector>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
index 72687682f606..ec78d9db40b6 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
@@ -21,14 +21,6 @@
namespace llvm {
namespace jitlink {
-/// Registers all FDEs in the given eh-frame section with the current process.
-Error registerEHFrameSection(const void *EHFrameSectionAddr,
- size_t EHFrameSectionSize);
-
-/// Deregisters all FDEs in the given eh-frame section with the current process.
-Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
- size_t EHFrameSectionSize);
-
/// Supports registration/deregistration of EH-frames in a target process.
class EHFrameRegistrar {
public:
@@ -42,32 +34,11 @@ public:
/// Registers / Deregisters EH-frames in the current process.
class InProcessEHFrameRegistrar final : public EHFrameRegistrar {
public:
- /// Get a reference to the InProcessEHFrameRegistrar singleton.
- static InProcessEHFrameRegistrar &getInstance();
-
- InProcessEHFrameRegistrar(const InProcessEHFrameRegistrar &) = delete;
- InProcessEHFrameRegistrar &
- operator=(const InProcessEHFrameRegistrar &) = delete;
-
- InProcessEHFrameRegistrar(InProcessEHFrameRegistrar &&) = delete;
- InProcessEHFrameRegistrar &operator=(InProcessEHFrameRegistrar &&) = delete;
-
Error registerEHFrames(JITTargetAddress EHFrameSectionAddr,
- size_t EHFrameSectionSize) override {
- return registerEHFrameSection(
- jitTargetAddressToPointer<void *>(EHFrameSectionAddr),
- EHFrameSectionSize);
- }
+ size_t EHFrameSectionSize) override;
Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr,
- size_t EHFrameSectionSize) override {
- return deregisterEHFrameSection(
- jitTargetAddressToPointer<void *>(EHFrameSectionAddr),
- EHFrameSectionSize);
- }
-
-private:
- InProcessEHFrameRegistrar();
+ size_t EHFrameSectionSize) override;
};
using StoreFrameRangeFunction =
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h
index 9f6ea5271f4b..8912f3a2db45 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h
@@ -19,11 +19,20 @@
namespace llvm {
namespace jitlink {
-/// jit-link the given ObjBuffer, which must be a ELF object file.
+/// Create a LinkGraph from an ELF relocatable object.
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer);
+
+/// Link the given graph.
///
/// Uses conservative defaults for GOT and stub handling based on the target
/// platform.
-void jitLink_ELF(std::unique_ptr<JITLinkContext> Ctx);
+void link_ELF(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
} // end namespace jitlink
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
index 7860088f3569..1423b0c30b2a 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
@@ -44,8 +44,20 @@ enum ELFX86RelocationKind : Edge::Kind {
} // end namespace ELF_x86_64_Edges
+/// Create a LinkGraph from an ELF/x86-64 relocatable object.
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_x86_64(MemoryBufferRef ObjectBuffer);
+
/// jit-link the given object buffer, which must be a ELF x86-64 object file.
-void jitLink_ELF_x86_64(std::unique_ptr<JITLinkContext> Ctx);
+void link_ELF_x86_64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
+
+/// Return the string name of the given ELF x86-64 edge kind.
+StringRef getELFX86RelocationKindName(Edge::Kind R);
} // end namespace jitlink
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index 76f9dea4160f..e8c0e28b83aa 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -395,6 +395,10 @@ public:
return Name;
}
+ /// Rename this symbol. The client is responsible for updating scope and
+ /// linkage if this name-change requires it.
+ void setName(StringRef Name) { this->Name = Name; }
+
/// Returns true if this Symbol has content (potentially) defined within this
/// object file (i.e. is anything but an external or absolute symbol).
bool isDefined() const {
@@ -782,21 +786,48 @@ public:
Section::const_block_iterator, const Block *,
getSectionConstBlocks>;
- LinkGraph(std::string Name, unsigned PointerSize,
+ LinkGraph(std::string Name, const Triple &TT, unsigned PointerSize,
support::endianness Endianness)
- : Name(std::move(Name)), PointerSize(PointerSize),
+ : Name(std::move(Name)), TT(TT), PointerSize(PointerSize),
Endianness(Endianness) {}
/// Returns the name of this graph (usually the name of the original
/// underlying MemoryBuffer).
const std::string &getName() { return Name; }
+ /// Returns the target triple for this Graph.
+ const Triple &getTargetTriple() const { return TT; }
+
/// Returns the pointer size for use in this graph.
unsigned getPointerSize() const { return PointerSize; }
/// Returns the endianness of content in this graph.
support::endianness getEndianness() const { return Endianness; }
+ /// Allocate a copy of the given string using the LinkGraph's allocator.
+ /// This can be useful when renaming symbols or adding new content to the
+ /// graph.
+ StringRef allocateString(StringRef Source) {
+ auto *AllocatedBuffer = Allocator.Allocate<char>(Source.size());
+ llvm::copy(Source, AllocatedBuffer);
+ return StringRef(AllocatedBuffer, Source.size());
+ }
+
+ /// Allocate a copy of the given string using the LinkGraph's allocator.
+ /// This can be useful when renaming symbols or adding new content to the
+ /// graph.
+ ///
+ /// Note: This Twine-based overload requires an extra string copy and an
+ /// extra heap allocation for large strings. The StringRef overload should
+ /// be preferred where possible.
+ StringRef allocateString(Twine Source) {
+ SmallString<256> TmpBuffer;
+ auto SourceStr = Source.toStringRef(TmpBuffer);
+ auto *AllocatedBuffer = Allocator.Allocate<char>(SourceStr.size());
+ llvm::copy(SourceStr, AllocatedBuffer);
+ return StringRef(AllocatedBuffer, SourceStr.size());
+ }
+
/// Create a section with the given name, protection flags, and alignment.
Section &createSection(StringRef Name, sys::Memory::ProtectionFlags Prot) {
std::unique_ptr<Section> Sec(new Section(Name, Prot, Sections.size()));
@@ -959,7 +990,7 @@ public:
Section &Sec = Sym.getBlock().getSection();
Sec.removeSymbol(Sym);
}
- Sym.makeExternal(createAddressable(false));
+ Sym.makeExternal(createAddressable(0, false));
ExternalSymbols.insert(&Sym);
}
@@ -1019,6 +1050,7 @@ private:
BumpPtrAllocator Allocator;
std::string Name;
+ Triple TT;
unsigned PointerSize;
support::endianness Endianness;
SectionList Sections;
@@ -1191,15 +1223,31 @@ struct PassConfiguration {
/// Notable use cases: Building GOT, stub, and TLV symbols.
LinkGraphPassList PostPrunePasses;
+ /// Post-allocation passes.
+ ///
+ /// These passes are called on the graph after memory has been allocated and
+ /// defined nodes have been assigned their final addresses, but before the
+ /// context has been notified of these addresses. At this point externals
+ /// have not been resolved, and symbol content has not yet been copied into
+ /// working memory.
+ ///
+ /// Notable use cases: Setting up data structures associated with addresses
+ /// of defined symbols (e.g. a mapping of __dso_handle to JITDylib* for the
+ /// JIT runtime) -- using a PostAllocationPass for this ensures that the
+ /// data structures are in-place before any query for resolved symbols
+ /// can complete.
+ LinkGraphPassList PostAllocationPasses;
+
/// Pre-fixup passes.
///
/// These passes are called on the graph after memory has been allocated,
- /// content copied into working memory, and nodes have been assigned their
- /// final addresses.
+ /// content copied into working memory, and all nodes (including externals)
+ /// have been assigned their final addresses, but before any fixups have been
+ /// applied.
///
/// Notable use cases: Late link-time optimizations like GOT and stub
/// elimination.
- LinkGraphPassList PostAllocationPasses;
+ LinkGraphPassList PreFixupPasses;
/// Post-fixup passes.
///
@@ -1255,16 +1303,18 @@ class JITLinkContext {
public:
using LookupMap = DenseMap<StringRef, SymbolLookupFlags>;
+ /// Create a JITLinkContext.
+ JITLinkContext(const JITLinkDylib *JD) : JD(JD) {}
+
/// Destroy a JITLinkContext.
virtual ~JITLinkContext();
+ /// Return the JITLinkDylib that this link is targeting, if any.
+ const JITLinkDylib *getJITLinkDylib() const { return JD; }
+
/// Return the MemoryManager to be used for this link.
virtual JITLinkMemoryManager &getMemoryManager() = 0;
- /// Returns a StringRef for the object buffer.
- /// This method can not be called once takeObjectBuffer has been called.
- virtual MemoryBufferRef getObjectBuffer() const = 0;
-
/// Notify this context that linking failed.
/// Called by JITLink if linking cannot be completed.
virtual void notifyFailed(Error Err) = 0;
@@ -1279,7 +1329,11 @@ public:
/// their final memory locations in the target process. At this point the
/// LinkGraph can be inspected to build a symbol table, however the block
/// content will not generally have been copied to the target location yet.
- virtual void notifyResolved(LinkGraph &G) = 0;
+ ///
+ /// If the client detects an error in the LinkGraph state (e.g. unexpected or
+ /// missing symbols) they may return an error here. The error will be
+ /// propagated to notifyFailed and the linker will bail out.
+ virtual Error notifyResolved(LinkGraph &G) = 0;
/// Called by JITLink to notify the context that the object has been
/// finalized (i.e. emitted to memory and memory permissions set). If all of
@@ -1305,16 +1359,25 @@ public:
/// Called by JITLink to modify the pass pipeline prior to linking.
/// The default version performs no modification.
virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config);
+
+private:
+ const JITLinkDylib *JD = nullptr;
};
/// Marks all symbols in a graph live. This can be used as a default,
/// conservative mark-live implementation.
Error markAllSymbolsLive(LinkGraph &G);
-/// Basic JITLink implementation.
+/// Create a LinkGraph from the given object buffer.
///
-/// This function will use sensible defaults for GOT and Stub handling.
-void jitLink(std::unique_ptr<JITLinkContext> Ctx);
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromObject(MemoryBufferRef ObjectBuffer);
+
+/// Link the given graph.
+void link(std::unique_ptr<LinkGraph> G, std::unique_ptr<JITLinkContext> Ctx);
} // end namespace jitlink
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h
new file mode 100644
index 000000000000..2aa88cb50074
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h
@@ -0,0 +1,24 @@
+//===-- JITLinkDylib.h - JITLink Dylib type ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the JITLinkDylib API.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKDYLIB_H
+#define LLVM_EXECUTIONENGINE_JITLINK_JITLINKDYLIB_H
+
+namespace llvm {
+namespace jitlink {
+
+class JITLinkDylib {};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINKDYLIB_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index 0c8514a60a50..cee7d6b09c48 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -14,10 +14,11 @@
#define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/Memory.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
+#include "llvm/Support/Memory.h"
#include <cstdint>
#include <future>
@@ -93,18 +94,28 @@ public:
virtual ~JITLinkMemoryManager();
/// Create an Allocation object.
+ ///
+ /// The JD argument represents the target JITLinkDylib, and can be used by
+ /// JITLinkMemoryManager implementers to manage per-dylib allocation pools
+ /// (e.g. one pre-reserved address space slab per dylib to ensure that all
+ /// allocations for the dylib are within a certain range). The JD argument
+ /// may be null (representing an allocation not associated with any
+ /// JITDylib.
+ ///
+ /// The request argument describes the segment sizes and permisssions being
+ /// requested.
virtual Expected<std::unique_ptr<Allocation>>
- allocate(const SegmentsRequestMap &Request) = 0;
+ allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) = 0;
};
/// A JITLinkMemoryManager that allocates in-process memory.
class InProcessMemoryManager : public JITLinkMemoryManager {
public:
Expected<std::unique_ptr<Allocation>>
- allocate(const SegmentsRequestMap &Request) override;
+ allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) override;
};
} // end namespace jitlink
} // end namespace llvm
-#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h
index 7facb657a51c..b8432c4d26c6 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h
@@ -18,11 +18,20 @@
namespace llvm {
namespace jitlink {
+/// Create a LinkGraph from a MachO relocatable object.
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromMachOObject(MemoryBufferRef ObjectBuffer);
+
/// jit-link the given ObjBuffer, which must be a MachO object file.
///
/// Uses conservative defaults for GOT and stub handling based on the target
/// platform.
-void jitLink_MachO(std::unique_ptr<JITLinkContext> Ctx);
+void link_MachO(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
} // end namespace jitlink
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
index d70b545fff86..c6aed2b60eac 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
@@ -40,6 +40,14 @@ enum MachOARM64RelocationKind : Edge::Kind {
} // namespace MachO_arm64_Edges
+/// Create a LinkGraph from a MachO/arm64 relocatable object.
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromMachOObject_arm64(MemoryBufferRef ObjectBuffer);
+
/// jit-link the given object buffer, which must be a MachO arm64 object file.
///
/// If PrePrunePasses is empty then a default mark-live pass will be inserted
@@ -49,7 +57,8 @@ enum MachOARM64RelocationKind : Edge::Kind {
/// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will
/// be inserted. If PostPrunePasses is not empty then the caller is responsible
/// for including a pass to insert GOT and stub edges.
-void jitLink_MachO_arm64(std::unique_ptr<JITLinkContext> Ctx);
+void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
/// Return the string name of the given MachO arm64 edge kind.
StringRef getMachOARM64RelocationKindName(Edge::Kind R);
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
index 27fcdf4fa990..66c53d8c8291 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
@@ -45,7 +45,15 @@ enum MachOX86RelocationKind : Edge::Kind {
} // namespace MachO_x86_64_Edges
-/// jit-link the given object buffer, which must be a MachO x86-64 object file.
+/// Create a LinkGraph from a MachO/x86-64 relocatable object.
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromMachOObject_x86_64(MemoryBufferRef ObjectBuffer);
+
+/// jit-link the given LinkGraph.
///
/// If PrePrunePasses is empty then a default mark-live pass will be inserted
/// that will mark all exported atoms live. If PrePrunePasses is not empty, the
@@ -54,7 +62,8 @@ enum MachOX86RelocationKind : Edge::Kind {
/// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will
/// be inserted. If PostPrunePasses is not empty then the caller is responsible
/// for including a pass to insert GOT and stub edges.
-void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx);
+void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
/// Return the string name of the given MachO x86-64 edge kind.
StringRef getMachOX86RelocationKindName(Edge::Kind R);
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h
index 6f0030a18f47..9bbdd21f77de 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h
@@ -429,7 +429,7 @@ public:
virtual JITSymbol findSymbol(const std::string &Name) = 0;
private:
- virtual void anchor();
+ void anchor() override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 9ecc0464dec1..91b12fd2277a 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -20,12 +20,10 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
-#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/Layer.h"
#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
-#include "llvm/ExecutionEngine/Orc/Legacy.h"
-#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/ExecutionEngine/Orc/Speculation.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
@@ -96,7 +94,8 @@ public:
/// Emits the given module. This should not be called by clients: it will be
/// called by the JIT when a definition added via the add method is requested.
- void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
+ void emit(std::unique_ptr<MaterializationResponsibility> R,
+ ThreadSafeModule TSM) override;
private:
struct PerDylibResources {
@@ -120,7 +119,8 @@ private:
void expandPartition(GlobalValueSet &Partition);
- void emitPartition(MaterializationResponsibility R, ThreadSafeModule TSM,
+ void emitPartition(std::unique_ptr<MaterializationResponsibility> R,
+ ThreadSafeModule TSM,
IRMaterializationUnit::SymbolNameToDefinitionMap Defs);
mutable std::mutex CODLayerMutex;
@@ -134,635 +134,6 @@ private:
ImplSymbolMap *AliaseeImpls = nullptr;
};
-/// Compile-on-demand layer.
-///
-/// When a module is added to this layer a stub is created for each of its
-/// function definitions. The stubs and other global values are immediately
-/// added to the layer below. When a stub is called it triggers the extraction
-/// of the function body from the original module. The extracted body is then
-/// compiled and executed.
-template <typename BaseLayerT,
- typename CompileCallbackMgrT = JITCompileCallbackManager,
- typename IndirectStubsMgrT = IndirectStubsManager>
-class LegacyCompileOnDemandLayer {
-private:
- template <typename MaterializerFtor>
- class LambdaMaterializer final : public ValueMaterializer {
- public:
- LambdaMaterializer(MaterializerFtor M) : M(std::move(M)) {}
-
- Value *materialize(Value *V) final { return M(V); }
-
- private:
- MaterializerFtor M;
- };
-
- template <typename MaterializerFtor>
- LambdaMaterializer<MaterializerFtor>
- createLambdaMaterializer(MaterializerFtor M) {
- return LambdaMaterializer<MaterializerFtor>(std::move(M));
- }
-
- // Provide type-erasure for the Modules and MemoryManagers.
- template <typename ResourceT>
- class ResourceOwner {
- public:
- ResourceOwner() = default;
- ResourceOwner(const ResourceOwner &) = delete;
- ResourceOwner &operator=(const ResourceOwner &) = delete;
- virtual ~ResourceOwner() = default;
-
- virtual ResourceT& getResource() const = 0;
- };
-
- template <typename ResourceT, typename ResourcePtrT>
- class ResourceOwnerImpl : public ResourceOwner<ResourceT> {
- public:
- ResourceOwnerImpl(ResourcePtrT ResourcePtr)
- : ResourcePtr(std::move(ResourcePtr)) {}
-
- ResourceT& getResource() const override { return *ResourcePtr; }
-
- private:
- ResourcePtrT ResourcePtr;
- };
-
- template <typename ResourceT, typename ResourcePtrT>
- std::unique_ptr<ResourceOwner<ResourceT>>
- wrapOwnership(ResourcePtrT ResourcePtr) {
- using RO = ResourceOwnerImpl<ResourceT, ResourcePtrT>;
- return std::make_unique<RO>(std::move(ResourcePtr));
- }
-
- struct LogicalDylib {
- struct SourceModuleEntry {
- std::unique_ptr<Module> SourceMod;
- std::set<Function*> StubsToClone;
- };
-
- using SourceModulesList = std::vector<SourceModuleEntry>;
- using SourceModuleHandle = typename SourceModulesList::size_type;
-
- LogicalDylib() = default;
-
- LogicalDylib(VModuleKey K, std::shared_ptr<SymbolResolver> BackingResolver,
- std::unique_ptr<IndirectStubsMgrT> StubsMgr)
- : K(std::move(K)), BackingResolver(std::move(BackingResolver)),
- StubsMgr(std::move(StubsMgr)) {}
-
- SourceModuleHandle addSourceModule(std::unique_ptr<Module> M) {
- SourceModuleHandle H = SourceModules.size();
- SourceModules.push_back(SourceModuleEntry());
- SourceModules.back().SourceMod = std::move(M);
- return H;
- }
-
- Module& getSourceModule(SourceModuleHandle H) {
- return *SourceModules[H].SourceMod;
- }
-
- std::set<Function*>& getStubsToClone(SourceModuleHandle H) {
- return SourceModules[H].StubsToClone;
- }
-
- JITSymbol findSymbol(BaseLayerT &BaseLayer, const std::string &Name,
- bool ExportedSymbolsOnly) {
- if (auto Sym = StubsMgr->findStub(Name, ExportedSymbolsOnly))
- return Sym;
- for (auto BLK : BaseLayerVModuleKeys)
- if (auto Sym = BaseLayer.findSymbolIn(BLK, Name, ExportedSymbolsOnly))
- return Sym;
- else if (auto Err = Sym.takeError())
- return std::move(Err);
- return nullptr;
- }
-
- Error removeModulesFromBaseLayer(BaseLayerT &BaseLayer) {
- for (auto &BLK : BaseLayerVModuleKeys)
- if (auto Err = BaseLayer.removeModule(BLK))
- return Err;
- return Error::success();
- }
-
- VModuleKey K;
- std::shared_ptr<SymbolResolver> BackingResolver;
- std::unique_ptr<IndirectStubsMgrT> StubsMgr;
- SymbolLinkagePromoter PromoteSymbols;
- SourceModulesList SourceModules;
- std::vector<VModuleKey> BaseLayerVModuleKeys;
- };
-
-public:
-
- /// Module partitioning functor.
- using PartitioningFtor = std::function<std::set<Function*>(Function&)>;
-
- /// Builder for IndirectStubsManagers.
- using IndirectStubsManagerBuilderT =
- std::function<std::unique_ptr<IndirectStubsMgrT>()>;
-
- using SymbolResolverGetter =
- std::function<std::shared_ptr<SymbolResolver>(VModuleKey K)>;
-
- using SymbolResolverSetter =
- std::function<void(VModuleKey K, std::shared_ptr<SymbolResolver> R)>;
-
- /// Construct a compile-on-demand layer instance.
- LLVM_ATTRIBUTE_DEPRECATED(
- LegacyCompileOnDemandLayer(
- ExecutionSession &ES, BaseLayerT &BaseLayer,
- SymbolResolverGetter GetSymbolResolver,
- SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
- CompileCallbackMgrT &CallbackMgr,
- IndirectStubsManagerBuilderT CreateIndirectStubsManager,
- bool CloneStubsIntoPartitions = true),
- "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
- "use "
- "the ORCv2 LegacyCompileOnDemandLayer instead");
-
- /// Legacy layer constructor with deprecation acknowledgement.
- LegacyCompileOnDemandLayer(
- ORCv1DeprecationAcknowledgement, ExecutionSession &ES,
- BaseLayerT &BaseLayer, SymbolResolverGetter GetSymbolResolver,
- SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
- CompileCallbackMgrT &CallbackMgr,
- IndirectStubsManagerBuilderT CreateIndirectStubsManager,
- bool CloneStubsIntoPartitions = true)
- : ES(ES), BaseLayer(BaseLayer),
- GetSymbolResolver(std::move(GetSymbolResolver)),
- SetSymbolResolver(std::move(SetSymbolResolver)),
- Partition(std::move(Partition)), CompileCallbackMgr(CallbackMgr),
- CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)),
- CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
-
- ~LegacyCompileOnDemandLayer() {
- // FIXME: Report error on log.
- while (!LogicalDylibs.empty())
- consumeError(removeModule(LogicalDylibs.begin()->first));
- }
-
- /// Add a module to the compile-on-demand layer.
- Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
-
- assert(!LogicalDylibs.count(K) && "VModuleKey K already in use");
- auto I = LogicalDylibs.insert(
- LogicalDylibs.end(),
- std::make_pair(K, LogicalDylib(K, GetSymbolResolver(K),
- CreateIndirectStubsManager())));
-
- return addLogicalModule(I->second, std::move(M));
- }
-
- /// Add extra modules to an existing logical module.
- Error addExtraModule(VModuleKey K, std::unique_ptr<Module> M) {
- return addLogicalModule(LogicalDylibs[K], std::move(M));
- }
-
- /// Remove the module represented by the given key.
- ///
- /// This will remove all modules in the layers below that were derived from
- /// the module represented by K.
- Error removeModule(VModuleKey K) {
- auto I = LogicalDylibs.find(K);
- assert(I != LogicalDylibs.end() && "VModuleKey K not valid here");
- auto Err = I->second.removeModulesFromBaseLayer(BaseLayer);
- LogicalDylibs.erase(I);
- return Err;
- }
-
- /// Search for the given named symbol.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it exists.
- JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
- for (auto &KV : LogicalDylibs) {
- if (auto Sym = KV.second.StubsMgr->findStub(Name, ExportedSymbolsOnly))
- return Sym;
- if (auto Sym =
- findSymbolIn(KV.first, std::string(Name), ExportedSymbolsOnly))
- return Sym;
- else if (auto Err = Sym.takeError())
- return std::move(Err);
- }
- return BaseLayer.findSymbol(std::string(Name), ExportedSymbolsOnly);
- }
-
- /// Get the address of a symbol provided by this layer, or some layer
- /// below this one.
- JITSymbol findSymbolIn(VModuleKey K, const std::string &Name,
- bool ExportedSymbolsOnly) {
- assert(LogicalDylibs.count(K) && "VModuleKey K is not valid here");
- return LogicalDylibs[K].findSymbol(BaseLayer, Name, ExportedSymbolsOnly);
- }
-
- /// Update the stub for the given function to point at FnBodyAddr.
- /// This can be used to support re-optimization.
- /// @return true if the function exists and the stub is updated, false
- /// otherwise.
- //
- // FIXME: We should track and free associated resources (unused compile
- // callbacks, uncompiled IR, and no-longer-needed/reachable function
- // implementations).
- Error updatePointer(std::string FuncName, JITTargetAddress FnBodyAddr) {
- //Find out which logical dylib contains our symbol
- auto LDI = LogicalDylibs.begin();
- for (auto LDE = LogicalDylibs.end(); LDI != LDE; ++LDI) {
- if (auto LMResources =
- LDI->getLogicalModuleResourcesForSymbol(FuncName, false)) {
- Module &SrcM = LMResources->SourceModule->getResource();
- std::string CalledFnName = mangle(FuncName, SrcM.getDataLayout());
- if (auto Err = LMResources->StubsMgr->updatePointer(CalledFnName,
- FnBodyAddr))
- return Err;
- return Error::success();
- }
- }
- return make_error<JITSymbolNotFound>(FuncName);
- }
-
-private:
- Error addLogicalModule(LogicalDylib &LD, std::unique_ptr<Module> SrcMPtr) {
-
- // Rename anonymous globals and promote linkage to ensure that everything
- // will resolve properly after we partition SrcM.
- LD.PromoteSymbols(*SrcMPtr);
-
- // Create a logical module handle for SrcM within the logical dylib.
- Module &SrcM = *SrcMPtr;
- auto LMId = LD.addSourceModule(std::move(SrcMPtr));
-
- // Create stub functions.
- const DataLayout &DL = SrcM.getDataLayout();
-
- typename IndirectStubsMgrT::StubInitsMap StubInits;
- for (auto &F : SrcM) {
- // Skip declarations.
- if (F.isDeclaration())
- continue;
-
- // Skip weak functions for which we already have definitions.
- auto MangledName = mangle(F.getName(), DL);
- if (F.hasWeakLinkage() || F.hasLinkOnceLinkage()) {
- if (auto Sym = LD.findSymbol(BaseLayer, MangledName, false))
- continue;
- else if (auto Err = Sym.takeError())
- return Err;
- }
-
- // Record all functions defined by this module.
- if (CloneStubsIntoPartitions)
- LD.getStubsToClone(LMId).insert(&F);
-
- // Create a callback, associate it with the stub for the function,
- // and set the compile action to compile the partition containing the
- // function.
- auto CompileAction = [this, &LD, LMId, &F]() -> JITTargetAddress {
- if (auto FnImplAddrOrErr = this->extractAndCompile(LD, LMId, F))
- return *FnImplAddrOrErr;
- else {
- // FIXME: Report error, return to 'abort' or something similar.
- consumeError(FnImplAddrOrErr.takeError());
- return 0;
- }
- };
- if (auto CCAddr =
- CompileCallbackMgr.getCompileCallback(std::move(CompileAction)))
- StubInits[MangledName] =
- std::make_pair(*CCAddr, JITSymbolFlags::fromGlobalValue(F));
- else
- return CCAddr.takeError();
- }
-
- if (auto Err = LD.StubsMgr->createStubs(StubInits))
- return Err;
-
- // If this module doesn't contain any globals, aliases, or module flags then
- // we can bail out early and avoid the overhead of creating and managing an
- // empty globals module.
- if (SrcM.global_empty() && SrcM.alias_empty() &&
- !SrcM.getModuleFlagsMetadata())
- return Error::success();
-
- // Create the GlobalValues module.
- auto GVsM = std::make_unique<Module>((SrcM.getName() + ".globals").str(),
- SrcM.getContext());
- GVsM->setDataLayout(DL);
-
- ValueToValueMapTy VMap;
-
- // Clone global variable decls.
- for (auto &GV : SrcM.globals())
- if (!GV.isDeclaration() && !VMap.count(&GV))
- cloneGlobalVariableDecl(*GVsM, GV, &VMap);
-
- // And the aliases.
- for (auto &A : SrcM.aliases())
- if (!VMap.count(&A))
- cloneGlobalAliasDecl(*GVsM, A, VMap);
-
- // Clone the module flags.
- cloneModuleFlagsMetadata(*GVsM, SrcM, VMap);
-
- // Now we need to clone the GV and alias initializers.
-
- // Initializers may refer to functions declared (but not defined) in this
- // module. Build a materializer to clone decls on demand.
- auto Materializer = createLambdaMaterializer(
- [&LD, &GVsM](Value *V) -> Value* {
- if (auto *F = dyn_cast<Function>(V)) {
- // Decls in the original module just get cloned.
- if (F->isDeclaration())
- return cloneFunctionDecl(*GVsM, *F);
-
- // Definitions in the original module (which we have emitted stubs
- // for at this point) get turned into a constant alias to the stub
- // instead.
- const DataLayout &DL = GVsM->getDataLayout();
- std::string FName = mangle(F->getName(), DL);
- unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(F->getType());
- JITTargetAddress StubAddr =
- LD.StubsMgr->findStub(FName, false).getAddress();
-
- ConstantInt *StubAddrCI =
- ConstantInt::get(GVsM->getContext(), APInt(PtrBitWidth, StubAddr));
- Constant *Init = ConstantExpr::getCast(Instruction::IntToPtr,
- StubAddrCI, F->getType());
- return GlobalAlias::create(F->getFunctionType(),
- F->getType()->getAddressSpace(),
- F->getLinkage(), F->getName(),
- Init, GVsM.get());
- }
- // else....
- return nullptr;
- });
-
- // Clone the global variable initializers.
- for (auto &GV : SrcM.globals())
- if (!GV.isDeclaration())
- moveGlobalVariableInitializer(GV, VMap, &Materializer);
-
- // Clone the global alias initializers.
- for (auto &A : SrcM.aliases()) {
- auto *NewA = cast<GlobalAlias>(VMap[&A]);
- assert(NewA && "Alias not cloned?");
- Value *Init = MapValue(A.getAliasee(), VMap, RF_None, nullptr,
- &Materializer);
- NewA->setAliasee(cast<Constant>(Init));
- }
-
- // Build a resolver for the globals module and add it to the base layer.
- auto LegacyLookup = [this, &LD](StringRef Name) -> JITSymbol {
- if (auto Sym = LD.StubsMgr->findStub(Name, false))
- return Sym;
-
- if (auto Sym = LD.findSymbol(BaseLayer, std::string(Name), false))
- return Sym;
- else if (auto Err = Sym.takeError())
- return std::move(Err);
-
- return nullptr;
- };
-
- auto GVsResolver = createSymbolResolver(
- [&LD, LegacyLookup](const SymbolNameSet &Symbols) {
- auto RS = getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup);
-
- if (!RS) {
- logAllUnhandledErrors(
- RS.takeError(), errs(),
- "CODLayer/GVsResolver responsibility set lookup failed: ");
- return SymbolNameSet();
- }
-
- if (RS->size() == Symbols.size())
- return *RS;
-
- SymbolNameSet NotFoundViaLegacyLookup;
- for (auto &S : Symbols)
- if (!RS->count(S))
- NotFoundViaLegacyLookup.insert(S);
- auto RS2 =
- LD.BackingResolver->getResponsibilitySet(NotFoundViaLegacyLookup);
-
- for (auto &S : RS2)
- (*RS).insert(S);
-
- return *RS;
- },
- [this, &LD,
- LegacyLookup](std::shared_ptr<AsynchronousSymbolQuery> Query,
- SymbolNameSet Symbols) {
- auto NotFoundViaLegacyLookup =
- lookupWithLegacyFn(ES, *Query, Symbols, LegacyLookup);
- return LD.BackingResolver->lookup(Query, NotFoundViaLegacyLookup);
- });
-
- SetSymbolResolver(LD.K, std::move(GVsResolver));
-
- if (auto Err = BaseLayer.addModule(LD.K, std::move(GVsM)))
- return Err;
-
- LD.BaseLayerVModuleKeys.push_back(LD.K);
-
- return Error::success();
- }
-
- static std::string mangle(StringRef Name, const DataLayout &DL) {
- std::string MangledName;
- {
- raw_string_ostream MangledNameStream(MangledName);
- Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
- }
- return MangledName;
- }
-
- Expected<JITTargetAddress>
- extractAndCompile(LogicalDylib &LD,
- typename LogicalDylib::SourceModuleHandle LMId,
- Function &F) {
- Module &SrcM = LD.getSourceModule(LMId);
-
- // If F is a declaration we must already have compiled it.
- if (F.isDeclaration())
- return 0;
-
- // Grab the name of the function being called here.
- std::string CalledFnName = mangle(F.getName(), SrcM.getDataLayout());
-
- JITTargetAddress CalledAddr = 0;
- auto Part = Partition(F);
- if (auto PartKeyOrErr = emitPartition(LD, LMId, Part)) {
- auto &PartKey = *PartKeyOrErr;
- for (auto *SubF : Part) {
- std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout());
- if (auto FnBodySym = BaseLayer.findSymbolIn(PartKey, FnName, false)) {
- if (auto FnBodyAddrOrErr = FnBodySym.getAddress()) {
- JITTargetAddress FnBodyAddr = *FnBodyAddrOrErr;
-
- // If this is the function we're calling record the address so we can
- // return it from this function.
- if (SubF == &F)
- CalledAddr = FnBodyAddr;
-
- // Update the function body pointer for the stub.
- if (auto EC = LD.StubsMgr->updatePointer(FnName, FnBodyAddr))
- return 0;
-
- } else
- return FnBodyAddrOrErr.takeError();
- } else if (auto Err = FnBodySym.takeError())
- return std::move(Err);
- else
- llvm_unreachable("Function not emitted for partition");
- }
-
- LD.BaseLayerVModuleKeys.push_back(PartKey);
- } else
- return PartKeyOrErr.takeError();
-
- return CalledAddr;
- }
-
- template <typename PartitionT>
- Expected<VModuleKey>
- emitPartition(LogicalDylib &LD,
- typename LogicalDylib::SourceModuleHandle LMId,
- const PartitionT &Part) {
- Module &SrcM = LD.getSourceModule(LMId);
-
- // Create the module.
- std::string NewName(SrcM.getName());
- for (auto *F : Part) {
- NewName += ".";
- NewName += F->getName();
- }
-
- auto M = std::make_unique<Module>(NewName, SrcM.getContext());
- M->setDataLayout(SrcM.getDataLayout());
- ValueToValueMapTy VMap;
-
- auto Materializer = createLambdaMaterializer([&LD, &LMId,
- &M](Value *V) -> Value * {
- if (auto *GV = dyn_cast<GlobalVariable>(V))
- return cloneGlobalVariableDecl(*M, *GV);
-
- if (auto *F = dyn_cast<Function>(V)) {
- // Check whether we want to clone an available_externally definition.
- if (!LD.getStubsToClone(LMId).count(F))
- return cloneFunctionDecl(*M, *F);
-
- // Ok - we want an inlinable stub. For that to work we need a decl
- // for the stub pointer.
- auto *StubPtr = createImplPointer(*F->getType(), *M,
- F->getName() + "$stub_ptr", nullptr);
- auto *ClonedF = cloneFunctionDecl(*M, *F);
- makeStub(*ClonedF, *StubPtr);
- ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage);
- ClonedF->addFnAttr(Attribute::AlwaysInline);
- return ClonedF;
- }
-
- if (auto *A = dyn_cast<GlobalAlias>(V)) {
- auto *Ty = A->getValueType();
- if (Ty->isFunctionTy())
- return Function::Create(cast<FunctionType>(Ty),
- GlobalValue::ExternalLinkage, A->getName(),
- M.get());
-
- return new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
- nullptr, A->getName(), nullptr,
- GlobalValue::NotThreadLocal,
- A->getType()->getAddressSpace());
- }
-
- return nullptr;
- });
-
- // Create decls in the new module.
- for (auto *F : Part)
- cloneFunctionDecl(*M, *F, &VMap);
-
- // Move the function bodies.
- for (auto *F : Part)
- moveFunctionBody(*F, VMap, &Materializer);
-
- auto K = ES.allocateVModule();
-
- auto LegacyLookup = [this, &LD](StringRef Name) -> JITSymbol {
- return LD.findSymbol(BaseLayer, std::string(Name), false);
- };
-
- // Create memory manager and symbol resolver.
- auto Resolver = createSymbolResolver(
- [&LD, LegacyLookup](const SymbolNameSet &Symbols) {
- auto RS = getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup);
- if (!RS) {
- logAllUnhandledErrors(
- RS.takeError(), errs(),
- "CODLayer/SubResolver responsibility set lookup failed: ");
- return SymbolNameSet();
- }
-
- if (RS->size() == Symbols.size())
- return *RS;
-
- SymbolNameSet NotFoundViaLegacyLookup;
- for (auto &S : Symbols)
- if (!RS->count(S))
- NotFoundViaLegacyLookup.insert(S);
-
- auto RS2 =
- LD.BackingResolver->getResponsibilitySet(NotFoundViaLegacyLookup);
-
- for (auto &S : RS2)
- (*RS).insert(S);
-
- return *RS;
- },
- [this, &LD, LegacyLookup](std::shared_ptr<AsynchronousSymbolQuery> Q,
- SymbolNameSet Symbols) {
- auto NotFoundViaLegacyLookup =
- lookupWithLegacyFn(ES, *Q, Symbols, LegacyLookup);
- return LD.BackingResolver->lookup(Q,
- std::move(NotFoundViaLegacyLookup));
- });
- SetSymbolResolver(K, std::move(Resolver));
-
- if (auto Err = BaseLayer.addModule(std::move(K), std::move(M)))
- return std::move(Err);
-
- return K;
- }
-
- ExecutionSession &ES;
- BaseLayerT &BaseLayer;
- SymbolResolverGetter GetSymbolResolver;
- SymbolResolverSetter SetSymbolResolver;
- PartitioningFtor Partition;
- CompileCallbackMgrT &CompileCallbackMgr;
- IndirectStubsManagerBuilderT CreateIndirectStubsManager;
-
- std::map<VModuleKey, LogicalDylib> LogicalDylibs;
- bool CloneStubsIntoPartitions;
-};
-
-template <typename BaseLayerT, typename CompileCallbackMgrT,
- typename IndirectStubsMgrT>
-LegacyCompileOnDemandLayer<BaseLayerT, CompileCallbackMgrT, IndirectStubsMgrT>::
- LegacyCompileOnDemandLayer(
- ExecutionSession &ES, BaseLayerT &BaseLayer,
- SymbolResolverGetter GetSymbolResolver,
- SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
- CompileCallbackMgrT &CallbackMgr,
- IndirectStubsManagerBuilderT CreateIndirectStubsManager,
- bool CloneStubsIntoPartitions)
- : ES(ES), BaseLayer(BaseLayer),
- GetSymbolResolver(std::move(GetSymbolResolver)),
- SetSymbolResolver(std::move(SetSymbolResolver)),
- Partition(std::move(Partition)), CompileCallbackMgr(CallbackMgr),
- CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)),
- CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
-
} // end namespace orc
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
index 8376d163d57a..c7ba57228ab7 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
@@ -28,8 +28,6 @@ class TargetMachine;
namespace orc {
-class JITTargetMachineBuilder;
-
IRSymbolMapper::ManglingOptions
irManglingOptionsFromTargetOptions(const TargetOptions &Opts);
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index a117acefd2d3..4a4b58ed32e3 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -16,11 +16,14 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
#include "llvm/Support/Debug.h"
+#include <atomic>
#include <memory>
#include <vector>
@@ -33,11 +36,67 @@ class ExecutionSession;
class MaterializationUnit;
class MaterializationResponsibility;
class JITDylib;
+class ResourceTracker;
+class InProgressLookupState;
+
enum class SymbolState : uint8_t;
-/// VModuleKey provides a unique identifier (allocated and managed by
-/// ExecutionSessions) for a module added to the JIT.
-using VModuleKey = uint64_t;
+using ResourceTrackerSP = IntrusiveRefCntPtr<ResourceTracker>;
+using JITDylibSP = IntrusiveRefCntPtr<JITDylib>;
+
+using ResourceKey = uintptr_t;
+
+/// API to remove / transfer ownership of JIT resources.
+class ResourceTracker : public ThreadSafeRefCountedBase<ResourceTracker> {
+private:
+ friend class ExecutionSession;
+ friend class JITDylib;
+ friend class MaterializationResponsibility;
+
+public:
+ ResourceTracker(const ResourceTracker &) = delete;
+ ResourceTracker &operator=(const ResourceTracker &) = delete;
+ ResourceTracker(ResourceTracker &&) = delete;
+ ResourceTracker &operator=(ResourceTracker &&) = delete;
+
+ ~ResourceTracker();
+
+ /// Return the JITDylib targeted by this tracker.
+ JITDylib &getJITDylib() const {
+ return *reinterpret_cast<JITDylib *>(JDAndFlag.load() &
+ ~static_cast<uintptr_t>(1));
+ }
+
+ /// Remove all resources associated with this key.
+ Error remove();
+
+ /// Transfer all resources associated with this key to the given
+ /// tracker, which must target the same JITDylib as this one.
+ void transferTo(ResourceTracker &DstRT);
+
+ /// Return true if this tracker has become defunct.
+ bool isDefunct() const { return JDAndFlag.load() & 0x1; }
+
+ /// Returns the key associated with this tracker.
+ /// This method should not be used except for debug logging: there is no
+ /// guarantee that the returned value will remain valid.
+ ResourceKey getKeyUnsafe() const { return reinterpret_cast<uintptr_t>(this); }
+
+private:
+ ResourceTracker(JITDylibSP JD);
+
+ void makeDefunct();
+
+ std::atomic_uintptr_t JDAndFlag;
+};
+
+/// Listens for ResourceTracker operations.
+class ResourceManager {
+public:
+ virtual ~ResourceManager();
+ virtual Error handleRemoveResources(ResourceKey K) = 0;
+ virtual void handleTransferResources(ResourceKey DstK, ResourceKey SrcK) = 0;
+};
/// A set of symbol names (represented by SymbolStringPtrs for
// efficiency).
@@ -158,9 +217,19 @@ public:
/// Add an element to the set. The client is responsible for checking that
/// duplicates are not added.
- void add(SymbolStringPtr Name,
- SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) {
+ SymbolLookupSet &
+ add(SymbolStringPtr Name,
+ SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) {
Symbols.push_back(std::make_pair(std::move(Name), Flags));
+ return *this;
+ }
+
+ /// Quickly append one lookup set to another.
+ SymbolLookupSet &append(SymbolLookupSet Other) {
+ Symbols.reserve(Symbols.size() + Other.size());
+ for (auto &KV : Other)
+ Symbols.push_back(std::move(KV));
+ return *this;
}
bool empty() const { return Symbols.empty(); }
@@ -287,7 +356,7 @@ public:
for (UnderlyingVector::size_type I = 1; I != Symbols.size(); ++I)
if (Symbols[I].first == Symbols[I - 1].first)
return true;
- return true;
+ return false;
}
#endif
@@ -318,6 +387,18 @@ using RegisterDependenciesFunction =
/// are no dependants to register with.
extern RegisterDependenciesFunction NoDependenciesToRegister;
+class ResourceTrackerDefunct : public ErrorInfo<ResourceTrackerDefunct> {
+public:
+ static char ID;
+
+ ResourceTrackerDefunct(ResourceTrackerSP RT);
+ std::error_code convertToErrorCode() const override;
+ void log(raw_ostream &OS) const override;
+
+private:
+ ResourceTrackerSP RT;
+};
+
/// Used to notify a JITDylib that the given set of symbols failed to
/// materialize.
class FailedToMaterialize : public ErrorInfo<FailedToMaterialize> {
@@ -408,9 +489,10 @@ private:
/// emit symbols, or abandon materialization by notifying any unmaterialized
/// symbols of an error.
class MaterializationResponsibility {
- friend class MaterializationUnit;
+ friend class ExecutionSession;
+
public:
- MaterializationResponsibility(MaterializationResponsibility &&) = default;
+ MaterializationResponsibility(MaterializationResponsibility &&) = delete;
MaterializationResponsibility &
operator=(MaterializationResponsibility &&) = delete;
@@ -419,12 +501,15 @@ public:
/// emitted or notified of an error.
~MaterializationResponsibility();
+ /// Returns the ResourceTracker for this instance.
+ template <typename Func> Error withResourceKeyDo(Func &&F) const;
+
/// Returns the target JITDylib that these symbols are being materialized
/// into.
JITDylib &getTargetJITDylib() const { return *JD; }
- /// Returns the VModuleKey for this instance.
- VModuleKey getVModuleKey() const { return K; }
+ /// Returns the ExecutionSession for this instance.
+ ExecutionSession &getExecutionSession();
/// Returns the symbol flags map for this responsibility instance.
/// Note: The returned flags may have transient flags (Lazy, Materializing)
@@ -509,13 +594,13 @@ public:
/// materializers to break up work based on run-time information (e.g.
/// by introspecting which symbols have actually been looked up and
/// materializing only those).
- void replace(std::unique_ptr<MaterializationUnit> MU);
+ Error replace(std::unique_ptr<MaterializationUnit> MU);
/// Delegates responsibility for the given symbols to the returned
/// materialization responsibility. Useful for breaking up work between
/// threads, or different kinds of materialization processes.
- MaterializationResponsibility delegate(const SymbolNameSet &Symbols,
- VModuleKey NewKey = VModuleKey());
+ Expected<std::unique_ptr<MaterializationResponsibility>>
+ delegate(const SymbolNameSet &Symbols);
void addDependencies(const SymbolStringPtr &Name,
const SymbolDependenceMap &Dependencies);
@@ -526,19 +611,17 @@ public:
private:
/// Create a MaterializationResponsibility for the given JITDylib and
/// initial symbols.
- MaterializationResponsibility(std::shared_ptr<JITDylib> JD,
- SymbolFlagsMap SymbolFlags,
- SymbolStringPtr InitSymbol, VModuleKey K)
+ MaterializationResponsibility(JITDylibSP JD, SymbolFlagsMap SymbolFlags,
+ SymbolStringPtr InitSymbol)
: JD(std::move(JD)), SymbolFlags(std::move(SymbolFlags)),
- InitSymbol(std::move(InitSymbol)), K(std::move(K)) {
- assert(this->JD && "Cannot initialize with null JD");
+ InitSymbol(std::move(InitSymbol)) {
+ assert(this->JD && "Cannot initialize with null JITDylib");
assert(!this->SymbolFlags.empty() && "Materializing nothing?");
}
- std::shared_ptr<JITDylib> JD;
+ JITDylibSP JD;
SymbolFlagsMap SymbolFlags;
SymbolStringPtr InitSymbol;
- VModuleKey K;
};
/// A MaterializationUnit represents a set of symbol definitions that can
@@ -555,9 +638,9 @@ class MaterializationUnit {
public:
MaterializationUnit(SymbolFlagsMap InitalSymbolFlags,
- SymbolStringPtr InitSymbol, VModuleKey K)
+ SymbolStringPtr InitSymbol)
: SymbolFlags(std::move(InitalSymbolFlags)),
- InitSymbol(std::move(InitSymbol)), K(std::move(K)) {
+ InitSymbol(std::move(InitSymbol)) {
assert((!this->InitSymbol || this->SymbolFlags.count(this->InitSymbol)) &&
"If set, InitSymbol should appear in InitialSymbolFlags map");
}
@@ -577,7 +660,8 @@ public:
/// Implementations of this method should materialize all symbols
/// in the materialzation unit, except for those that have been
/// previously discarded.
- virtual void materialize(MaterializationResponsibility R) = 0;
+ virtual void
+ materialize(std::unique_ptr<MaterializationResponsibility> R) = 0;
/// Called by JITDylibs to notify MaterializationUnits that the given symbol
/// has been overridden.
@@ -589,17 +673,10 @@ public:
protected:
SymbolFlagsMap SymbolFlags;
SymbolStringPtr InitSymbol;
- VModuleKey K;
private:
virtual void anchor();
- MaterializationResponsibility
- createMaterializationResponsibility(std::shared_ptr<JITDylib> JD) {
- return MaterializationResponsibility(std::move(JD), std::move(SymbolFlags),
- std::move(InitSymbol), K);
- }
-
/// Implementations of this method should discard the given symbol
/// from the source (e.g. if the source is an LLVM IR Module and the
/// symbol is a function, delete the function body or mark it available
@@ -607,21 +684,18 @@ private:
virtual void discard(const JITDylib &JD, const SymbolStringPtr &Name) = 0;
};
-using MaterializationUnitList =
- std::vector<std::unique_ptr<MaterializationUnit>>;
-
/// A MaterializationUnit implementation for pre-existing absolute symbols.
///
/// All symbols will be resolved and marked ready as soon as the unit is
/// materialized.
class AbsoluteSymbolsMaterializationUnit : public MaterializationUnit {
public:
- AbsoluteSymbolsMaterializationUnit(SymbolMap Symbols, VModuleKey K);
+ AbsoluteSymbolsMaterializationUnit(SymbolMap Symbols);
StringRef getName() const override;
private:
- void materialize(MaterializationResponsibility R) override;
+ void materialize(std::unique_ptr<MaterializationResponsibility> R) override;
void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
static SymbolFlagsMap extractFlags(const SymbolMap &Symbols);
@@ -639,9 +713,9 @@ private:
/// \endcode
///
inline std::unique_ptr<AbsoluteSymbolsMaterializationUnit>
-absoluteSymbols(SymbolMap Symbols, VModuleKey K = VModuleKey()) {
+absoluteSymbols(SymbolMap Symbols) {
return std::make_unique<AbsoluteSymbolsMaterializationUnit>(
- std::move(Symbols), std::move(K));
+ std::move(Symbols));
}
/// A materialization unit for symbol aliases. Allows existing symbols to be
@@ -658,12 +732,12 @@ public:
/// resolved.
ReExportsMaterializationUnit(JITDylib *SourceJD,
JITDylibLookupFlags SourceJDLookupFlags,
- SymbolAliasMap Aliases, VModuleKey K);
+ SymbolAliasMap Aliases);
StringRef getName() const override;
private:
- void materialize(MaterializationResponsibility R) override;
+ void materialize(std::unique_ptr<MaterializationResponsibility> R) override;
void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases);
@@ -684,10 +758,9 @@ private:
/// return Err;
/// \endcode
inline std::unique_ptr<ReExportsMaterializationUnit>
-symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) {
+symbolAliases(SymbolAliasMap Aliases) {
return std::make_unique<ReExportsMaterializationUnit>(
- nullptr, JITDylibLookupFlags::MatchAllSymbols, std::move(Aliases),
- std::move(K));
+ nullptr, JITDylibLookupFlags::MatchAllSymbols, std::move(Aliases));
}
/// Create a materialization unit for re-exporting symbols from another JITDylib
@@ -696,10 +769,9 @@ symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) {
inline std::unique_ptr<ReExportsMaterializationUnit>
reexports(JITDylib &SourceJD, SymbolAliasMap Aliases,
JITDylibLookupFlags SourceJDLookupFlags =
- JITDylibLookupFlags::MatchExportedSymbolsOnly,
- VModuleKey K = VModuleKey()) {
+ JITDylibLookupFlags::MatchExportedSymbolsOnly) {
return std::make_unique<ReExportsMaterializationUnit>(
- &SourceJD, SourceJDLookupFlags, std::move(Aliases), std::move(K));
+ &SourceJD, SourceJDLookupFlags, std::move(Aliases));
}
/// Build a SymbolAliasMap for the common case where you want to re-export
@@ -723,8 +795,10 @@ enum class SymbolState : uint8_t {
/// makes a callback when all symbols are available.
class AsynchronousSymbolQuery {
friend class ExecutionSession;
+ friend class InProgressFullLookupState;
friend class JITDylib;
friend class JITSymbolResolverAdapter;
+ friend class MaterializationResponsibility;
public:
/// Create a query for the given symbols. The NotifyComplete
@@ -757,8 +831,6 @@ private:
void dropSymbol(const SymbolStringPtr &Name);
- bool canStillFail();
-
void handleFailed(Error Err);
void detach();
@@ -770,34 +842,62 @@ private:
SymbolState RequiredState;
};
+/// Wraps state for a lookup-in-progress.
+/// DefinitionGenerators can optionally take ownership of a LookupState object
+/// to suspend a lookup-in-progress while they search for definitions.
+class LookupState {
+ friend class OrcV2CAPIHelper;
+ friend class ExecutionSession;
+
+public:
+ LookupState();
+ LookupState(LookupState &&);
+ LookupState &operator=(LookupState &&);
+ ~LookupState();
+
+ /// Continue the lookup. This can be called by DefinitionGenerators
+ /// to re-start a captured query-application operation.
+ void continueLookup(Error Err);
+
+private:
+ LookupState(std::unique_ptr<InProgressLookupState> IPLS);
+
+ // For C API.
+ void reset(InProgressLookupState *IPLS);
+
+ std::unique_ptr<InProgressLookupState> IPLS;
+};
+
+/// Definition generators can be attached to JITDylibs to generate new
+/// definitions for otherwise unresolved symbols during lookup.
+class DefinitionGenerator {
+public:
+ virtual ~DefinitionGenerator();
+
+ /// DefinitionGenerators should override this method to insert new
+ /// definitions into the parent JITDylib. K specifies the kind of this
+ /// lookup. JD specifies the target JITDylib being searched, and
+ /// JDLookupFlags specifies whether the search should match against
+ /// hidden symbols. Finally, Symbols describes the set of unresolved
+ /// symbols and their associated lookup flags.
+ virtual Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &LookupSet) = 0;
+};
+
/// A symbol table that supports asynchoronous symbol queries.
///
/// Represents a virtual shared object. Instances can not be copied or moved, so
/// their addresses may be used as keys for resource management.
/// JITDylib state changes must be made via an ExecutionSession to guarantee
/// that they are synchronized with respect to other JITDylib operations.
-class JITDylib : public std::enable_shared_from_this<JITDylib> {
+class JITDylib : public ThreadSafeRefCountedBase<JITDylib>,
+ public jitlink::JITLinkDylib {
friend class AsynchronousSymbolQuery;
friend class ExecutionSession;
friend class Platform;
friend class MaterializationResponsibility;
public:
- /// Definition generators can be attached to JITDylibs to generate new
- /// definitions for otherwise unresolved symbols during lookup.
- class DefinitionGenerator {
- public:
- virtual ~DefinitionGenerator();
-
- /// DefinitionGenerators should override this method to insert new
- /// definitions into the parent JITDylib. K specifies the kind of this
- /// lookup. JD specifies the target JITDylib being searched, and
- /// JDLookupFlags specifies whether the search should match against
- /// hidden symbols. Finally, Symbols describes the set of unresolved
- /// symbols and their associated lookup flags.
- virtual Error tryToGenerate(LookupKind K, JITDylib &JD,
- JITDylibLookupFlags JDLookupFlags,
- const SymbolLookupSet &LookupSet) = 0;
- };
using AsynchronousSymbolQuerySet =
std::set<std::shared_ptr<AsynchronousSymbolQuery>>;
@@ -813,6 +913,21 @@ public:
/// Get a reference to the ExecutionSession for this JITDylib.
ExecutionSession &getExecutionSession() const { return ES; }
+ /// Calls remove on all trackers currently associated with this JITDylib.
+ /// Does not run static deinits.
+ ///
+ /// Note that removal happens outside the session lock, so new code may be
+ /// added concurrently while the clear is underway, and the newly added
+ /// code will *not* be cleared. Adding new code concurrently with a clear
+ /// is usually a bug and should be avoided.
+ Error clear();
+
+ /// Get the default resource tracker for this JITDylib.
+ ResourceTrackerSP getDefaultResourceTracker();
+
+ /// Create a resource tracker for this JITDylib.
+ ResourceTrackerSP createResourceTracker();
+
/// Adds a definition generator to this JITDylib and returns a referenece to
/// it.
///
@@ -873,10 +988,13 @@ public:
/// Define all symbols provided by the materialization unit to be part of this
/// JITDylib.
///
+ /// If RT is not specified then the default resource tracker will be used.
+ ///
/// This overload always takes ownership of the MaterializationUnit. If any
/// errors occur, the MaterializationUnit consumed.
template <typename MaterializationUnitType>
- Error define(std::unique_ptr<MaterializationUnitType> &&MU);
+ Error define(std::unique_ptr<MaterializationUnitType> &&MU,
+ ResourceTrackerSP RT = nullptr);
/// Define all symbols provided by the materialization unit to be part of this
/// JITDylib.
@@ -886,7 +1004,8 @@ public:
/// may allow the caller to modify the MaterializationUnit to correct the
/// issue, then re-call define.
template <typename MaterializationUnitType>
- Error define(std::unique_ptr<MaterializationUnitType> &MU);
+ Error define(std::unique_ptr<MaterializationUnitType> &MU,
+ ResourceTrackerSP RT = nullptr);
/// Tries to remove the given symbols.
///
@@ -900,41 +1019,47 @@ public:
/// left unmodified (no symbols are removed).
Error remove(const SymbolNameSet &Names);
- /// Search the given JITDylib for the symbols in Symbols. If found, store
- /// the flags for each symbol in Flags. If any required symbols are not found
- /// then an error will be returned.
- Expected<SymbolFlagsMap> lookupFlags(LookupKind K,
- JITDylibLookupFlags JDLookupFlags,
- SymbolLookupSet LookupSet);
-
/// Dump current JITDylib state to OS.
void dump(raw_ostream &OS);
- /// FIXME: Remove this when we remove the old ORC layers.
- /// Search the given JITDylibs in order for the symbols in Symbols. Results
- /// (once they become available) will be returned via the given Query.
- ///
- /// If any symbol is not found then the unresolved symbols will be returned,
- /// and the query will not be applied. The Query is not failed and can be
- /// re-used in a subsequent lookup once the symbols have been added, or
- /// manually failed.
- Expected<SymbolNameSet>
- legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names);
+ /// Returns the given JITDylibs and all of their transitive dependencies in
+ /// DFS order (based on linkage relationships). Each JITDylib will appear
+ /// only once.
+ static std::vector<JITDylibSP> getDFSLinkOrder(ArrayRef<JITDylibSP> JDs);
+
+ /// Returns the given JITDylibs and all of their transitive dependensies in
+ /// reverse DFS order (based on linkage relationships). Each JITDylib will
+ /// appear only once.
+ static std::vector<JITDylibSP>
+ getReverseDFSLinkOrder(ArrayRef<JITDylibSP> JDs);
+
+ /// Return this JITDylib and its transitive dependencies in DFS order
+ /// based on linkage relationships.
+ std::vector<JITDylibSP> getDFSLinkOrder();
+
+ /// Rteurn this JITDylib and its transitive dependencies in reverse DFS order
+ /// based on linkage relationships.
+ std::vector<JITDylibSP> getReverseDFSLinkOrder();
private:
using AsynchronousSymbolQueryList =
std::vector<std::shared_ptr<AsynchronousSymbolQuery>>;
struct UnmaterializedInfo {
- UnmaterializedInfo(std::unique_ptr<MaterializationUnit> MU)
- : MU(std::move(MU)) {}
+ UnmaterializedInfo(std::unique_ptr<MaterializationUnit> MU,
+ ResourceTracker *RT)
+ : MU(std::move(MU)), RT(RT) {}
std::unique_ptr<MaterializationUnit> MU;
+ ResourceTracker *RT;
};
using UnmaterializedInfosMap =
DenseMap<SymbolStringPtr, std::shared_ptr<UnmaterializedInfo>>;
+ using UnmaterializedInfosList =
+ std::vector<std::shared_ptr<UnmaterializedInfo>>;
+
struct MaterializingInfo {
SymbolDependenceMap Dependants;
SymbolDependenceMap UnemittedDependencies;
@@ -1001,25 +1126,16 @@ private:
JITDylib(ExecutionSession &ES, std::string Name);
- Error defineImpl(MaterializationUnit &MU);
-
- void lookupFlagsImpl(SymbolFlagsMap &Result, LookupKind K,
- JITDylibLookupFlags JDLookupFlags,
- SymbolLookupSet &Unresolved);
+ ResourceTrackerSP getTracker(MaterializationResponsibility &MR);
+ std::pair<AsynchronousSymbolQuerySet, std::shared_ptr<SymbolDependenceMap>>
+ removeTracker(ResourceTracker &RT);
- Error lodgeQuery(MaterializationUnitList &MUs,
- std::shared_ptr<AsynchronousSymbolQuery> &Q, LookupKind K,
- JITDylibLookupFlags JDLookupFlags,
- SymbolLookupSet &Unresolved);
+ void transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT);
- Error lodgeQueryImpl(MaterializationUnitList &MUs,
- std::shared_ptr<AsynchronousSymbolQuery> &Q,
- LookupKind K, JITDylibLookupFlags JDLookupFlags,
- SymbolLookupSet &Unresolved);
+ Error defineImpl(MaterializationUnit &MU);
- bool lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
- SymbolLookupSet &Unresolved);
+ void installMaterializationUnit(std::unique_ptr<MaterializationUnit> MU,
+ ResourceTracker &RT);
void detachQueryHelper(AsynchronousSymbolQuery &Q,
const SymbolNameSet &QuerySymbols);
@@ -1030,29 +1146,45 @@ private:
Expected<SymbolFlagsMap> defineMaterializing(SymbolFlagsMap SymbolFlags);
- void replace(std::unique_ptr<MaterializationUnit> MU);
+ Error replace(MaterializationResponsibility &FromMR,
+ std::unique_ptr<MaterializationUnit> MU);
+
+ Expected<std::unique_ptr<MaterializationResponsibility>>
+ delegate(MaterializationResponsibility &FromMR, SymbolFlagsMap SymbolFlags,
+ SymbolStringPtr InitSymbol);
SymbolNameSet getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const;
void addDependencies(const SymbolStringPtr &Name,
const SymbolDependenceMap &Dependants);
- Error resolve(const SymbolMap &Resolved);
+ Error resolve(MaterializationResponsibility &MR, const SymbolMap &Resolved);
+
+ Error emit(MaterializationResponsibility &MR, const SymbolFlagsMap &Emitted);
- Error emit(const SymbolFlagsMap &Emitted);
+ void unlinkMaterializationResponsibility(MaterializationResponsibility &MR);
using FailedSymbolsWorklist =
std::vector<std::pair<JITDylib *, SymbolStringPtr>>;
- static void notifyFailed(FailedSymbolsWorklist FailedSymbols);
+
+ static std::pair<AsynchronousSymbolQuerySet,
+ std::shared_ptr<SymbolDependenceMap>>
+ failSymbols(FailedSymbolsWorklist);
ExecutionSession &ES;
std::string JITDylibName;
+ std::mutex GeneratorsMutex;
bool Open = true;
SymbolTable Symbols;
UnmaterializedInfosMap UnmaterializedInfos;
MaterializingInfosMap MaterializingInfos;
- std::vector<std::unique_ptr<DefinitionGenerator>> DefGenerators;
+ std::vector<std::shared_ptr<DefinitionGenerator>> DefGenerators;
JITDylibSearchOrder LinkOrder;
+ ResourceTrackerSP DefaultTracker;
+
+ // Map trackers to sets of symbols tracked.
+ DenseMap<ResourceTracker *, SymbolNameVector> TrackerSymbols;
+ DenseMap<MaterializationResponsibility *, ResourceTracker *> MRTrackers;
};
/// Platforms set up standard symbols and mediate interactions between dynamic
@@ -1071,11 +1203,12 @@ public:
/// This method will be called under the ExecutionSession lock each time a
/// MaterializationUnit is added to a JITDylib.
- virtual Error notifyAdding(JITDylib &JD, const MaterializationUnit &MU) = 0;
+ virtual Error notifyAdding(ResourceTracker &RT,
+ const MaterializationUnit &MU) = 0;
/// This method will be called under the ExecutionSession lock when a
- /// VModuleKey is removed.
- virtual Error notifyRemoving(JITDylib &JD, VModuleKey K) = 0;
+ /// ResourceTracker is removed.
+ virtual Error notifyRemoving(ResourceTracker &RT) = 0;
/// A utility function for looking up initializer symbols. Performs a blocking
/// lookup for the given symbols in each of the given JITDylibs.
@@ -1086,8 +1219,12 @@ public:
/// An ExecutionSession represents a running JIT program.
class ExecutionSession {
- // FIXME: Remove this when we remove the old ORC layers.
+ friend class InProgressLookupFlagsState;
+ friend class InProgressFullLookupState;
friend class JITDylib;
+ friend class LookupState;
+ friend class MaterializationResponsibility;
+ friend class ResourceTracker;
public:
/// For reporting errors.
@@ -1096,13 +1233,16 @@ public:
/// For dispatching MaterializationUnit::materialize calls.
using DispatchMaterializationFunction =
std::function<void(std::unique_ptr<MaterializationUnit> MU,
- MaterializationResponsibility MR)>;
+ std::unique_ptr<MaterializationResponsibility> MR)>;
/// Construct an ExecutionSession.
///
/// SymbolStringPools may be shared between ExecutionSessions.
ExecutionSession(std::shared_ptr<SymbolStringPool> SSP = nullptr);
+ /// End the session. Closes all JITDylibs.
+ Error endSession();
+
/// Add a symbol name to the SymbolStringPool and return a pointer to it.
SymbolStringPtr intern(StringRef SymName) { return SSP->intern(SymName); }
@@ -1122,6 +1262,14 @@ public:
return F();
}
+ /// Register the given ResourceManager with this ExecutionSession.
+ /// Managers will be notified of events in reverse order of registration.
+ void registerResourceManager(ResourceManager &RM);
+
+ /// Deregister the given ResourceManager with this ExecutionSession.
+ /// Manager must have been previously registered.
+ void deregisterResourceManager(ResourceManager &RM);
+
/// Return a pointer to the "name" JITDylib.
/// Ownership of JITDylib remains within Execution Session
JITDylib *getJITDylibByName(StringRef Name);
@@ -1147,17 +1295,6 @@ public:
/// If no Platform is attached this call is equivalent to createBareJITDylib.
Expected<JITDylib &> createJITDylib(std::string Name);
- /// Allocate a module key for a new module to add to the JIT.
- VModuleKey allocateVModule() {
- return runSessionLocked([this]() { return ++LastKey; });
- }
-
- /// Return a module key to the ExecutionSession so that it can be
- /// re-used. This should only be done once all resources associated
- /// with the original key have been released.
- void releaseVModule(VModuleKey Key) { /* FIXME: Recycle keys */
- }
-
/// Set the error reporter function.
ExecutionSession &setErrorReporter(ErrorReporter ReportError) {
this->ReportError = std::move(ReportError);
@@ -1176,19 +1313,18 @@ public:
return *this;
}
- void legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err);
+ /// Search the given JITDylibs to find the flags associated with each of the
+ /// given symbols.
+ void lookupFlags(LookupKind K, JITDylibSearchOrder SearchOrder,
+ SymbolLookupSet Symbols,
+ unique_function<void(Expected<SymbolFlagsMap>)> OnComplete);
- using LegacyAsyncLookupFunction = std::function<SymbolNameSet(
- std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names)>;
-
- /// A legacy lookup function for JITSymbolResolverAdapter.
- /// Do not use -- this will be removed soon.
- Expected<SymbolMap>
- legacyLookup(LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
- SymbolState RequiredState,
- RegisterDependenciesFunction RegisterDependencies);
+ /// Blocking version of lookupFlags.
+ Expected<SymbolFlagsMap> lookupFlags(LookupKind K,
+ JITDylibSearchOrder SearchOrder,
+ SymbolLookupSet Symbols);
- /// Search the given JITDylib list for the given symbols.
+ /// Search the given JITDylibs for the given symbols.
///
/// SearchOrder lists the JITDylibs to search. For each dylib, the associated
/// boolean indicates whether the search should match against non-exported
@@ -1248,10 +1384,11 @@ public:
SymbolState RequiredState = SymbolState::Ready);
/// Materialize the given unit.
- void dispatchMaterialization(std::unique_ptr<MaterializationUnit> MU,
- MaterializationResponsibility MR) {
+ void
+ dispatchMaterialization(std::unique_ptr<MaterializationUnit> MU,
+ std::unique_ptr<MaterializationResponsibility> MR) {
assert(MU && "MU must be non-null");
- DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR.getTargetJITDylib(), *MU));
+ DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR->getTargetJITDylib(), *MU));
DispatchMaterialization(std::move(MU), std::move(MR));
}
@@ -1263,41 +1400,124 @@ private:
logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: ");
}
- static void
- materializeOnCurrentThread(std::unique_ptr<MaterializationUnit> MU,
- MaterializationResponsibility MR) {
+ static void materializeOnCurrentThread(
+ std::unique_ptr<MaterializationUnit> MU,
+ std::unique_ptr<MaterializationResponsibility> MR) {
MU->materialize(std::move(MR));
}
- void runOutstandingMUs();
+ void dispatchOutstandingMUs();
+
+ static std::unique_ptr<MaterializationResponsibility>
+ createMaterializationResponsibility(ResourceTracker &RT,
+ SymbolFlagsMap Symbols,
+ SymbolStringPtr InitSymbol) {
+ auto &JD = RT.getJITDylib();
+ std::unique_ptr<MaterializationResponsibility> MR(
+ new MaterializationResponsibility(&JD, std::move(Symbols),
+ std::move(InitSymbol)));
+ JD.MRTrackers[MR.get()] = &RT;
+ return MR;
+ }
+
+ Error removeResourceTracker(ResourceTracker &RT);
+ void transferResourceTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT);
+ void destroyResourceTracker(ResourceTracker &RT);
+
+ // State machine functions for query application..
+
+ /// IL_updateCandidatesFor is called to remove already-defined symbols that
+ /// match a given query from the set of candidate symbols to generate
+ /// definitions for (no need to generate a definition if one already exists).
+ Error IL_updateCandidatesFor(JITDylib &JD, JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet &Candidates,
+ SymbolLookupSet *NonCandidates);
+
+ /// OL_applyQueryPhase1 is an optionally re-startable loop for triggering
+ /// definition generation. It is called when a lookup is performed, and again
+ /// each time that LookupState::continueLookup is called.
+ void OL_applyQueryPhase1(std::unique_ptr<InProgressLookupState> IPLS,
+ Error Err);
+
+ /// OL_completeLookup is run once phase 1 successfully completes for a lookup
+ /// call. It attempts to attach the symbol to all symbol table entries and
+ /// collect all MaterializationUnits to dispatch. If this method fails then
+ /// all MaterializationUnits will be left un-materialized.
+ void OL_completeLookup(std::unique_ptr<InProgressLookupState> IPLS,
+ std::shared_ptr<AsynchronousSymbolQuery> Q,
+ RegisterDependenciesFunction RegisterDependencies);
+
+ /// OL_completeLookupFlags is run once phase 1 successfully completes for a
+ /// lookupFlags call.
+ void OL_completeLookupFlags(
+ std::unique_ptr<InProgressLookupState> IPLS,
+ unique_function<void(Expected<SymbolFlagsMap>)> OnComplete);
+
+ // State machine functions for MaterializationResponsibility.
+ void OL_destroyMaterializationResponsibility(
+ MaterializationResponsibility &MR);
+ SymbolNameSet OL_getRequestedSymbols(const MaterializationResponsibility &MR);
+ Error OL_notifyResolved(MaterializationResponsibility &MR,
+ const SymbolMap &Symbols);
+ Error OL_notifyEmitted(MaterializationResponsibility &MR);
+ Error OL_defineMaterializing(MaterializationResponsibility &MR,
+ SymbolFlagsMap SymbolFlags);
+ void OL_notifyFailed(MaterializationResponsibility &MR);
+ Error OL_replace(MaterializationResponsibility &MR,
+ std::unique_ptr<MaterializationUnit> MU);
+ Expected<std::unique_ptr<MaterializationResponsibility>>
+ OL_delegate(MaterializationResponsibility &MR, const SymbolNameSet &Symbols);
+ void OL_addDependencies(MaterializationResponsibility &MR,
+ const SymbolStringPtr &Name,
+ const SymbolDependenceMap &Dependencies);
+ void OL_addDependenciesForAll(MaterializationResponsibility &MR,
+ const SymbolDependenceMap &Dependencies);
#ifndef NDEBUG
void dumpDispatchInfo(JITDylib &JD, MaterializationUnit &MU);
#endif // NDEBUG
mutable std::recursive_mutex SessionMutex;
+ bool SessionOpen = true;
std::shared_ptr<SymbolStringPool> SSP;
std::unique_ptr<Platform> P;
- VModuleKey LastKey = 0;
ErrorReporter ReportError = logErrorsToStdErr;
DispatchMaterializationFunction DispatchMaterialization =
materializeOnCurrentThread;
- std::vector<std::shared_ptr<JITDylib>> JDs;
+ std::vector<ResourceManager *> ResourceManagers;
+
+ std::vector<JITDylibSP> JDs;
// FIXME: Remove this (and runOutstandingMUs) once the linking layer works
// with callbacks from asynchronous queries.
mutable std::recursive_mutex OutstandingMUsMutex;
std::vector<std::pair<std::unique_ptr<MaterializationUnit>,
- MaterializationResponsibility>>
+ std::unique_ptr<MaterializationResponsibility>>>
OutstandingMUs;
};
+inline ExecutionSession &MaterializationResponsibility::getExecutionSession() {
+ return JD->getExecutionSession();
+}
+
+template <typename Func>
+Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const {
+ return JD->getExecutionSession().runSessionLocked([&]() -> Error {
+ auto I = JD->MRTrackers.find(this);
+ assert(I != JD->MRTrackers.end() && "No tracker for this MR");
+ if (I->second->isDefunct())
+ return make_error<ResourceTrackerDefunct>(I->second);
+ F(I->second->getKeyUnsafe());
+ return Error::success();
+ });
+}
+
template <typename GeneratorT>
GeneratorT &JITDylib::addGenerator(std::unique_ptr<GeneratorT> DefGenerator) {
auto &G = *DefGenerator;
- ES.runSessionLocked(
- [&]() { DefGenerators.push_back(std::move(DefGenerator)); });
+ std::lock_guard<std::mutex> Lock(GeneratorsMutex);
+ DefGenerators.push_back(std::move(DefGenerator));
return G;
}
@@ -1308,7 +1528,8 @@ auto JITDylib::withLinkOrderDo(Func &&F)
}
template <typename MaterializationUnitType>
-Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU) {
+Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU,
+ ResourceTrackerSP RT) {
assert(MU && "Can not define with a null MU");
if (MU->getSymbols().empty()) {
@@ -1320,29 +1541,36 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU) {
return Error::success();
} else
DEBUG_WITH_TYPE("orc", {
- dbgs() << "Defining MU " << MU->getName() << " for " << getName() << "\n";
+ dbgs() << "Defining MU " << MU->getName() << " for " << getName()
+ << " (tracker: ";
+ if (RT == getDefaultResourceTracker())
+ dbgs() << "default)";
+ else if (RT)
+ dbgs() << RT.get() << ")\n";
+ else
+ dbgs() << "0x0, default will be used)\n";
});
return ES.runSessionLocked([&, this]() -> Error {
if (auto Err = defineImpl(*MU))
return Err;
+ if (!RT)
+ RT = getDefaultResourceTracker();
+
if (auto *P = ES.getPlatform()) {
- if (auto Err = P->notifyAdding(*this, *MU))
+ if (auto Err = P->notifyAdding(*RT, *MU))
return Err;
}
- /// defineImpl succeeded.
- auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
- for (auto &KV : UMI->MU->getSymbols())
- UnmaterializedInfos[KV.first] = UMI;
-
+ installMaterializationUnit(std::move(MU), *RT);
return Error::success();
});
}
template <typename MaterializationUnitType>
-Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU) {
+Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU,
+ ResourceTrackerSP RT) {
assert(MU && "Can not define with a null MU");
if (MU->getSymbols().empty()) {
@@ -1354,30 +1582,36 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU) {
return Error::success();
} else
DEBUG_WITH_TYPE("orc", {
- dbgs() << "Defining MU " << MU->getName() << " for " << getName() << "\n";
+ dbgs() << "Defining MU " << MU->getName() << " for " << getName()
+ << " (tracker: ";
+ if (RT == getDefaultResourceTracker())
+ dbgs() << "default)";
+ else if (RT)
+ dbgs() << RT.get() << ")\n";
+ else
+ dbgs() << "0x0, default will be used)\n";
});
return ES.runSessionLocked([&, this]() -> Error {
if (auto Err = defineImpl(*MU))
return Err;
+ if (!RT)
+ RT = getDefaultResourceTracker();
+
if (auto *P = ES.getPlatform()) {
- if (auto Err = P->notifyAdding(*this, *MU))
+ if (auto Err = P->notifyAdding(*RT, *MU))
return Err;
}
- /// defineImpl succeeded.
- auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
- for (auto &KV : UMI->MU->getSymbols())
- UnmaterializedInfos[KV.first] = UMI;
-
+ installMaterializationUnit(std::move(MU), *RT);
return Error::success();
});
}
/// ReexportsGenerator can be used with JITDylib::addGenerator to automatically
/// re-export a subset of the source JITDylib's symbols in the target.
-class ReexportsGenerator : public JITDylib::DefinitionGenerator {
+class ReexportsGenerator : public DefinitionGenerator {
public:
using SymbolPredicate = std::function<bool(SymbolStringPtr)>;
@@ -1388,7 +1622,7 @@ public:
JITDylibLookupFlags SourceJDLookupFlags,
SymbolPredicate Allow = SymbolPredicate());
- Error tryToGenerate(LookupKind K, JITDylib &JD,
+ Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD,
JITDylibLookupFlags JDLookupFlags,
const SymbolLookupSet &LookupSet) override;
@@ -1398,6 +1632,57 @@ private:
SymbolPredicate Allow;
};
+// --------------- IMPLEMENTATION --------------
+// Implementations for inline functions/methods.
+// ---------------------------------------------
+
+inline MaterializationResponsibility::~MaterializationResponsibility() {
+ JD->getExecutionSession().OL_destroyMaterializationResponsibility(*this);
+}
+
+inline SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const {
+ return JD->getExecutionSession().OL_getRequestedSymbols(*this);
+}
+
+inline Error MaterializationResponsibility::notifyResolved(
+ const SymbolMap &Symbols) {
+ return JD->getExecutionSession().OL_notifyResolved(*this, Symbols);
+}
+
+inline Error MaterializationResponsibility::notifyEmitted() {
+ return JD->getExecutionSession().OL_notifyEmitted(*this);
+}
+
+inline Error MaterializationResponsibility::defineMaterializing(
+ SymbolFlagsMap SymbolFlags) {
+ return JD->getExecutionSession().OL_defineMaterializing(
+ *this, std::move(SymbolFlags));
+}
+
+inline void MaterializationResponsibility::failMaterialization() {
+ JD->getExecutionSession().OL_notifyFailed(*this);
+}
+
+inline Error MaterializationResponsibility::replace(
+ std::unique_ptr<MaterializationUnit> MU) {
+ return JD->getExecutionSession().OL_replace(*this, std::move(MU));
+}
+
+inline Expected<std::unique_ptr<MaterializationResponsibility>>
+MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) {
+ return JD->getExecutionSession().OL_delegate(*this, Symbols);
+}
+
+inline void MaterializationResponsibility::addDependencies(
+ const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies) {
+ JD->getExecutionSession().OL_addDependencies(*this, Name, Dependencies);
+}
+
+inline void MaterializationResponsibility::addDependenciesForAll(
+ const SymbolDependenceMap &Dependencies) {
+ JD->getExecutionSession().OL_addDependenciesForAll(*this, Dependencies);
+}
+
} // End namespace orc
} // End namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index 3b824b83b052..fdddc9694d0b 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -18,7 +18,7 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/Mangling.h"
-#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "llvm/Object/Archive.h"
#include "llvm/Support/DynamicLibrary.h"
@@ -41,17 +41,6 @@ namespace orc {
class ObjectLayer;
-/// Run a main function, returning the result.
-///
-/// If the optional ProgramName argument is given then it will be inserted
-/// before the strings in Args as the first argument to the called function.
-///
-/// It is legal to have an empty argument list and no program name, however
-/// many main functions will expect a name argument at least, and will fail
-/// if none is provided.
-int runAsMain(int (*Main)(int, char *[]), ArrayRef<std::string> Args,
- Optional<StringRef> ProgramName = None);
-
/// This iterator provides a convenient way to iterate over the elements
/// of an llvm.global_ctors/llvm.global_dtors instance.
///
@@ -152,56 +141,6 @@ inline iterator_range<StaticInitGVIterator> getStaticInitGVs(Module &M) {
return make_range(StaticInitGVIterator(M), StaticInitGVIterator());
}
-/// Convenience class for recording constructor/destructor names for
-/// later execution.
-template <typename JITLayerT>
-class LegacyCtorDtorRunner {
-public:
- /// Construct a CtorDtorRunner for the given range using the given
- /// name mangling function.
- LLVM_ATTRIBUTE_DEPRECATED(
- LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames,
- VModuleKey K),
- "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. "
- "Please use the ORCv2 CtorDtorRunner utility instead");
-
- LegacyCtorDtorRunner(ORCv1DeprecationAcknowledgement,
- std::vector<std::string> CtorDtorNames, VModuleKey K)
- : CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
-
- /// Run the recorded constructors/destructors through the given JIT
- /// layer.
- Error runViaLayer(JITLayerT &JITLayer) const {
- using CtorDtorTy = void (*)();
-
- for (const auto &CtorDtorName : CtorDtorNames) {
- if (auto CtorDtorSym = JITLayer.findSymbolIn(K, CtorDtorName, false)) {
- if (auto AddrOrErr = CtorDtorSym.getAddress()) {
- CtorDtorTy CtorDtor =
- reinterpret_cast<CtorDtorTy>(static_cast<uintptr_t>(*AddrOrErr));
- CtorDtor();
- } else
- return AddrOrErr.takeError();
- } else {
- if (auto Err = CtorDtorSym.takeError())
- return Err;
- else
- return make_error<JITSymbolNotFound>(CtorDtorName);
- }
- }
- return Error::success();
- }
-
-private:
- std::vector<std::string> CtorDtorNames;
- orc::VModuleKey K;
-};
-
-template <typename JITLayerT>
-LegacyCtorDtorRunner<JITLayerT>::LegacyCtorDtorRunner(
- std::vector<std::string> CtorDtorNames, VModuleKey K)
- : CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
-
class CtorDtorRunner {
public:
CtorDtorRunner(JITDylib &JD) : JD(JD) {}
@@ -250,45 +189,6 @@ protected:
void *DSOHandle);
};
-class LegacyLocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
-public:
- /// Create a runtime-overrides class.
- template <typename MangleFtorT>
- LLVM_ATTRIBUTE_DEPRECATED(
- LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle),
- "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. "
- "Please use the ORCv2 LocalCXXRuntimeOverrides utility instead");
-
- template <typename MangleFtorT>
- LegacyLocalCXXRuntimeOverrides(ORCv1DeprecationAcknowledgement,
- const MangleFtorT &Mangle) {
- addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
- addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
- }
-
- /// Search overrided symbols.
- JITEvaluatedSymbol searchOverrides(const std::string &Name) {
- auto I = CXXRuntimeOverrides.find(Name);
- if (I != CXXRuntimeOverrides.end())
- return JITEvaluatedSymbol(I->second, JITSymbolFlags::Exported);
- return nullptr;
- }
-
-private:
- void addOverride(const std::string &Name, JITTargetAddress Addr) {
- CXXRuntimeOverrides.insert(std::make_pair(Name, Addr));
- }
-
- StringMap<JITTargetAddress> CXXRuntimeOverrides;
-};
-
-template <typename MangleFtorT>
-LegacyLocalCXXRuntimeOverrides::LegacyLocalCXXRuntimeOverrides(
- const MangleFtorT &Mangle) {
- addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
- addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
-}
-
class LocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
public:
Error enable(JITDylib &JD, MangleAndInterner &Mangler);
@@ -315,7 +215,7 @@ private:
/// If an instance of this class is attached to a JITDylib as a fallback
/// definition generator, then any symbol found in the given DynamicLibrary that
/// passes the 'Allow' predicate will be added to the JITDylib.
-class DynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator {
+class DynamicLibrarySearchGenerator : public DefinitionGenerator {
public:
using SymbolPredicate = std::function<bool(const SymbolStringPtr &)>;
@@ -343,7 +243,7 @@ public:
return Load(nullptr, GlobalPrefix, std::move(Allow));
}
- Error tryToGenerate(LookupKind K, JITDylib &JD,
+ Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD,
JITDylibLookupFlags JDLookupFlags,
const SymbolLookupSet &Symbols) override;
@@ -358,7 +258,7 @@ private:
/// If an instance of this class is attached to a JITDylib as a fallback
/// definition generator, then any symbol found in the archive will result in
/// the containing object being added to the JITDylib.
-class StaticLibraryDefinitionGenerator : public JITDylib::DefinitionGenerator {
+class StaticLibraryDefinitionGenerator : public DefinitionGenerator {
public:
/// Try to create a StaticLibraryDefinitionGenerator from the given path.
///
@@ -381,7 +281,7 @@ public:
static Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
Create(ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer);
- Error tryToGenerate(LookupKind K, JITDylib &JD,
+ Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD,
JITDylibLookupFlags JDLookupFlags,
const SymbolLookupSet &Symbols) override;
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
deleted file mode 100644
index a4e43d4e1c9c..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
+++ /dev/null
@@ -1,111 +0,0 @@
-//===- GlobalMappingLayer.h - Run all IR through a functor ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Convenience layer for injecting symbols that will appear in calls to
-// findSymbol.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
-#define LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
-
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include <map>
-#include <memory>
-#include <string>
-
-namespace llvm {
-
-class Module;
-class JITSymbolResolver;
-
-namespace orc {
-
-/// Global mapping layer.
-///
-/// This layer overrides the findSymbol method to first search a local symbol
-/// table that the client can define. It can be used to inject new symbol
-/// mappings into the JIT. Beware, however: symbols within a single IR module or
-/// object file will still resolve locally (via RuntimeDyld's symbol table) -
-/// such internal references cannot be overriden via this layer.
-template <typename BaseLayerT>
-class GlobalMappingLayer {
-public:
-
- /// Handle to an added module.
- using ModuleHandleT = typename BaseLayerT::ModuleHandleT;
-
- /// Construct an GlobalMappingLayer with the given BaseLayer
- GlobalMappingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
-
- /// Add the given module to the JIT.
- /// @return A handle for the added modules.
- Expected<ModuleHandleT>
- addModule(std::shared_ptr<Module> M,
- std::shared_ptr<JITSymbolResolver> Resolver) {
- return BaseLayer.addModule(std::move(M), std::move(Resolver));
- }
-
- /// Remove the module set associated with the handle H.
- Error removeModule(ModuleHandleT H) { return BaseLayer.removeModule(H); }
-
- /// Manually set the address to return for the given symbol.
- void setGlobalMapping(const std::string &Name, JITTargetAddress Addr) {
- SymbolTable[Name] = Addr;
- }
-
- /// Remove the given symbol from the global mapping.
- void eraseGlobalMapping(const std::string &Name) {
- SymbolTable.erase(Name);
- }
-
- /// Search for the given named symbol.
- ///
- /// This method will first search the local symbol table, returning
- /// any symbol found there. If the symbol is not found in the local
- /// table then this call will be passed through to the base layer.
- ///
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it exists.
- JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
- auto I = SymbolTable.find(Name);
- if (I != SymbolTable.end())
- return JITSymbol(I->second, JITSymbolFlags::Exported);
- return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
- }
-
- /// Get the address of the given symbol in the context of the of the
- /// module represented by the handle H. This call is forwarded to the
- /// base layer's implementation.
- /// @param H The handle for the module to search in.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it is found in the
- /// given module.
- JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name,
- bool ExportedSymbolsOnly) {
- return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly);
- }
-
- /// Immediately emit and finalize the module set represented by the
- /// given handle.
- /// @param H Handle for module set to emit/finalize.
- Error emitAndFinalize(ModuleHandleT H) {
- return BaseLayer.emitAndFinalize(H);
- }
-
-private:
- BaseLayerT &BaseLayer;
- std::map<std::string, JITTargetAddress> SymbolTable;
-};
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index eb74d283f043..f8fdb171bbf9 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -45,8 +45,8 @@ public:
IRSymbolMapper::ManglingOptions MO;
};
- using NotifyCompiledFunction =
- std::function<void(VModuleKey K, ThreadSafeModule TSM)>;
+ using NotifyCompiledFunction = std::function<void(
+ MaterializationResponsibility &R, ThreadSafeModule TSM)>;
IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
std::unique_ptr<IRCompiler> Compile);
@@ -55,7 +55,8 @@ public:
void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled);
- void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
+ void emit(std::unique_ptr<MaterializationResponsibility> R,
+ ThreadSafeModule TSM) override;
private:
mutable std::mutex IRLayerMutex;
@@ -65,99 +66,6 @@ private:
NotifyCompiledFunction NotifyCompiled = NotifyCompiledFunction();
};
-/// Eager IR compiling layer.
-///
-/// This layer immediately compiles each IR module added via addModule to an
-/// object file and adds this module file to the layer below, which must
-/// implement the object layer concept.
-template <typename BaseLayerT, typename CompileFtor>
-class LegacyIRCompileLayer {
-public:
- /// Callback type for notifications when modules are compiled.
- using NotifyCompiledCallback =
- std::function<void(VModuleKey K, std::unique_ptr<Module>)>;
-
- /// Construct an LegacyIRCompileLayer with the given BaseLayer, which must
- /// implement the ObjectLayer concept.
- LLVM_ATTRIBUTE_DEPRECATED(
- LegacyIRCompileLayer(
- BaseLayerT &BaseLayer, CompileFtor Compile,
- NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback()),
- "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
- "use "
- "the ORCv2 IRCompileLayer instead");
-
- /// Legacy layer constructor with deprecation acknowledgement.
- LegacyIRCompileLayer(
- ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer,
- CompileFtor Compile,
- NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback())
- : BaseLayer(BaseLayer), Compile(std::move(Compile)),
- NotifyCompiled(std::move(NotifyCompiled)) {}
-
- /// Get a reference to the compiler functor.
- CompileFtor& getCompiler() { return Compile; }
-
- /// (Re)set the NotifyCompiled callback.
- void setNotifyCompiled(NotifyCompiledCallback NotifyCompiled) {
- this->NotifyCompiled = std::move(NotifyCompiled);
- }
-
- /// Compile the module, and add the resulting object to the base layer
- /// along with the given memory manager and symbol resolver.
- Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
- auto Obj = Compile(*M);
- if (!Obj)
- return Obj.takeError();
- if (auto Err = BaseLayer.addObject(std::move(K), std::move(*Obj)))
- return Err;
- if (NotifyCompiled)
- NotifyCompiled(std::move(K), std::move(M));
- return Error::success();
- }
-
- /// Remove the module associated with the VModuleKey K.
- Error removeModule(VModuleKey K) { return BaseLayer.removeObject(K); }
-
- /// Search for the given named symbol.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it exists.
- JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
- return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
- }
-
- /// Get the address of the given symbol in compiled module represented
- /// by the handle H. This call is forwarded to the base layer's
- /// implementation.
- /// @param K The VModuleKey for the module to search in.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it is found in the
- /// given module.
- JITSymbol findSymbolIn(VModuleKey K, const std::string &Name,
- bool ExportedSymbolsOnly) {
- return BaseLayer.findSymbolIn(K, Name, ExportedSymbolsOnly);
- }
-
- /// Immediately emit and finalize the module represented by the given
- /// handle.
- /// @param K The VModuleKey for the module to emit/finalize.
- Error emitAndFinalize(VModuleKey K) { return BaseLayer.emitAndFinalize(K); }
-
-private:
- BaseLayerT &BaseLayer;
- CompileFtor Compile;
- NotifyCompiledCallback NotifyCompiled;
-};
-
-template <typename BaseLayerT, typename CompileFtor>
-LegacyIRCompileLayer<BaseLayerT, CompileFtor>::LegacyIRCompileLayer(
- BaseLayerT &BaseLayer, CompileFtor Compile,
- NotifyCompiledCallback NotifyCompiled)
- : BaseLayer(BaseLayer), Compile(std::move(Compile)),
- NotifyCompiled(std::move(NotifyCompiled)) {}
-
} // end namespace orc
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
index 296d74ae6b86..66966a0f8762 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -13,6 +13,7 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H
#define LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H
+#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/Layer.h"
#include <memory>
@@ -27,7 +28,7 @@ namespace orc {
/// before operating on the module.
class IRTransformLayer : public IRLayer {
public:
- using TransformFunction = std::function<Expected<ThreadSafeModule>(
+ using TransformFunction = unique_function<Expected<ThreadSafeModule>(
ThreadSafeModule, MaterializationResponsibility &R)>;
IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer,
@@ -37,7 +38,8 @@ public:
this->Transform = std::move(Transform);
}
- void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
+ void emit(std::unique_ptr<MaterializationResponsibility> R,
+ ThreadSafeModule TSM) override;
static ThreadSafeModule identityTransform(ThreadSafeModule TSM,
MaterializationResponsibility &R) {
@@ -49,80 +51,6 @@ private:
TransformFunction Transform;
};
-/// IR mutating layer.
-///
-/// This layer applies a user supplied transform to each module that is added,
-/// then adds the transformed module to the layer below.
-template <typename BaseLayerT, typename TransformFtor>
-class LegacyIRTransformLayer {
-public:
-
- /// Construct an LegacyIRTransformLayer with the given BaseLayer
- LLVM_ATTRIBUTE_DEPRECATED(
- LegacyIRTransformLayer(BaseLayerT &BaseLayer,
- TransformFtor Transform = TransformFtor()),
- "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
- "use "
- "the ORCv2 IRTransformLayer instead");
-
- /// Legacy layer constructor with deprecation acknowledgement.
- LegacyIRTransformLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer,
- TransformFtor Transform = TransformFtor())
- : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
-
- /// Apply the transform functor to the module, then add the module to
- /// the layer below, along with the memory manager and symbol resolver.
- ///
- /// @return A handle for the added modules.
- Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
- return BaseLayer.addModule(std::move(K), Transform(std::move(M)));
- }
-
- /// Remove the module associated with the VModuleKey K.
- Error removeModule(VModuleKey K) { return BaseLayer.removeModule(K); }
-
- /// Search for the given named symbol.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it exists.
- JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
- return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
- }
-
- /// Get the address of the given symbol in the context of the module
- /// represented by the VModuleKey K. This call is forwarded to the base
- /// layer's implementation.
- /// @param K The VModuleKey for the module to search in.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it is found in the
- /// given module.
- JITSymbol findSymbolIn(VModuleKey K, const std::string &Name,
- bool ExportedSymbolsOnly) {
- return BaseLayer.findSymbolIn(K, Name, ExportedSymbolsOnly);
- }
-
- /// Immediately emit and finalize the module represented by the given
- /// VModuleKey.
- /// @param K The VModuleKey for the module to emit/finalize.
- Error emitAndFinalize(VModuleKey K) { return BaseLayer.emitAndFinalize(K); }
-
- /// Access the transform functor directly.
- TransformFtor& getTransform() { return Transform; }
-
- /// Access the mumate functor directly.
- const TransformFtor& getTransform() const { return Transform; }
-
-private:
- BaseLayerT &BaseLayer;
- TransformFtor Transform;
-};
-
-template <typename BaseLayerT, typename TransformFtor>
-LegacyIRTransformLayer<BaseLayerT, TransformFtor>::LegacyIRTransformLayer(
- BaseLayerT &BaseLayer, TransformFtor Transform)
- : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
-
} // end namespace orc
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index e0cfd8bf2409..78e3ceef50e2 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -62,14 +62,33 @@ public:
JITTargetAddress TrampolineAddr,
NotifyLandingResolvedFunction OnLandingResolved) const>;
- virtual ~TrampolinePool() {}
+ virtual ~TrampolinePool();
/// Get an available trampoline address.
/// Returns an error if no trampoline can be created.
- virtual Expected<JITTargetAddress> getTrampoline() = 0;
+ Expected<JITTargetAddress> getTrampoline() {
+ std::lock_guard<std::mutex> Lock(TPMutex);
+ if (AvailableTrampolines.empty()) {
+ if (auto Err = grow())
+ return std::move(Err);
+ }
+ assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool");
+ auto TrampolineAddr = AvailableTrampolines.back();
+ AvailableTrampolines.pop_back();
+ return TrampolineAddr;
+ }
-private:
- virtual void anchor();
+ /// Returns the given trampoline to the pool for re-use.
+ void releaseTrampoline(JITTargetAddress TrampolineAddr) {
+ std::lock_guard<std::mutex> Lock(TPMutex);
+ AvailableTrampolines.push_back(TrampolineAddr);
+ }
+
+protected:
+ virtual Error grow() = 0;
+
+ std::mutex TPMutex;
+ std::vector<JITTargetAddress> AvailableTrampolines;
};
/// A trampoline pool for trampolines within the current process.
@@ -90,26 +109,6 @@ public:
return std::move(LTP);
}
- /// Get a free trampoline. Returns an error if one can not be provided (e.g.
- /// because the pool is empty and can not be grown).
- Expected<JITTargetAddress> getTrampoline() override {
- std::lock_guard<std::mutex> Lock(LTPMutex);
- if (AvailableTrampolines.empty()) {
- if (auto Err = grow())
- return std::move(Err);
- }
- assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool");
- auto TrampolineAddr = AvailableTrampolines.back();
- AvailableTrampolines.pop_back();
- return TrampolineAddr;
- }
-
- /// Returns the given trampoline to the pool for re-use.
- void releaseTrampoline(JITTargetAddress TrampolineAddr) {
- std::lock_guard<std::mutex> Lock(LTPMutex);
- AvailableTrampolines.push_back(TrampolineAddr);
- }
-
private:
static JITTargetAddress reenter(void *TrampolinePoolPtr, void *TrampolineId) {
LocalTrampolinePool<ORCABI> *TrampolinePool =
@@ -154,8 +153,8 @@ private:
}
}
- Error grow() {
- assert(this->AvailableTrampolines.empty() && "Growing prematurely?");
+ Error grow() override {
+ assert(AvailableTrampolines.empty() && "Growing prematurely?");
std::error_code EC;
auto TrampolineBlock =
@@ -175,7 +174,7 @@ private:
pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines);
for (unsigned I = 0; I < NumTrampolines; ++I)
- this->AvailableTrampolines.push_back(pointerToJITTargetAddress(
+ AvailableTrampolines.push_back(pointerToJITTargetAddress(
TrampolineMem + (I * ORCABI::TrampolineSize)));
if (auto EC = sys::Memory::protectMappedMemory(
@@ -189,10 +188,8 @@ private:
ResolveLandingFunction ResolveLanding;
- std::mutex LTPMutex;
sys::OwningMemoryBlock ResolverBlock;
std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
- std::vector<JITTargetAddress> AvailableTrampolines;
};
/// Target-independent base class for compile callback management.
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index 96f8e169e7dc..ff0aa0238523 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -19,7 +19,6 @@
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
-#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ThreadPool.h"
@@ -29,6 +28,8 @@ namespace orc {
class LLJITBuilderState;
class LLLazyJITBuilderState;
+class ObjectTransformLayer;
+class TargetProcessControl;
/// A pre-fabricated ORC JIT stack that can serve as an alternative to MCJIT.
///
@@ -85,21 +86,8 @@ public:
return ES->createJITDylib(std::move(Name));
}
- /// A convenience method for defining MUs in LLJIT's Main JITDylib. This can
- /// be useful for succinctly defining absolute symbols, aliases and
- /// re-exports.
- template <typename MUType>
- Error define(std::unique_ptr<MUType> &&MU) {
- return Main->define(std::move(MU));
- }
-
- /// A convenience method for defining MUs in LLJIT's Main JITDylib. This can
- /// be usedful for succinctly defining absolute symbols, aliases and
- /// re-exports.
- template <typename MUType>
- Error define(std::unique_ptr<MUType> &MU) {
- return Main->define(MU);
- }
+ /// Adds an IR module with the given ResourceTracker.
+ Error addIRModule(ResourceTrackerSP RT, ThreadSafeModule TSM);
/// Adds an IR module to the given JITDylib.
Error addIRModule(JITDylib &JD, ThreadSafeModule TSM);
@@ -110,6 +98,9 @@ public:
}
/// Adds an object file to the given JITDylib.
+ Error addObjectFile(ResourceTrackerSP RT, std::unique_ptr<MemoryBuffer> Obj);
+
+ /// Adds an object file to the given JITDylib.
Error addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj);
/// Adds an object file to the given JITDylib.
@@ -178,7 +169,7 @@ public:
ObjectLayer &getObjLinkingLayer() { return *ObjLinkingLayer; }
/// Returns a reference to the object transform layer.
- ObjectTransformLayer &getObjTransformLayer() { return ObjTransformLayer; }
+ ObjectTransformLayer &getObjTransformLayer() { return *ObjTransformLayer; }
/// Returns a reference to the IR transform layer.
IRTransformLayer &getIRTransformLayer() { return *TransformLayer; }
@@ -195,7 +186,7 @@ public:
}
protected:
- static std::unique_ptr<ObjectLayer>
+ static Expected<std::unique_ptr<ObjectLayer>>
createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES);
static Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>
@@ -218,7 +209,7 @@ protected:
std::unique_ptr<ThreadPool> CompileThreads;
std::unique_ptr<ObjectLayer> ObjLinkingLayer;
- ObjectTransformLayer ObjTransformLayer;
+ std::unique_ptr<ObjectTransformLayer> ObjTransformLayer;
std::unique_ptr<IRCompileLayer> CompileLayer;
std::unique_ptr<IRTransformLayer> TransformLayer;
std::unique_ptr<IRTransformLayer> InitHelperTransformLayer;
@@ -237,6 +228,9 @@ public:
CODLayer->setPartitionFunction(std::move(Partition));
}
+ /// Returns a reference to the on-demand layer.
+ CompileOnDemandLayer &getCompileOnDemandLayer() { return *CODLayer; }
+
/// Add a module to be lazily compiled to JITDylib JD.
Error addLazyIRModule(JITDylib &JD, ThreadSafeModule M);
@@ -256,8 +250,9 @@ private:
class LLJITBuilderState {
public:
- using ObjectLinkingLayerCreator = std::function<std::unique_ptr<ObjectLayer>(
- ExecutionSession &, const Triple &TT)>;
+ using ObjectLinkingLayerCreator =
+ std::function<Expected<std::unique_ptr<ObjectLayer>>(ExecutionSession &,
+ const Triple &)>;
using CompileFunctionCreator =
std::function<Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>(
@@ -272,6 +267,7 @@ public:
CompileFunctionCreator CreateCompileFunction;
PlatformSetupFunction SetUpPlatform;
unsigned NumCompileThreads = 0;
+ TargetProcessControl *TPC = nullptr;
/// Called prior to JIT class construcion to fix up defaults.
Error prepareForConstruction();
@@ -354,6 +350,17 @@ public:
return impl();
}
+ /// Set a TargetProcessControl object.
+ ///
+ /// If the platform uses ObjectLinkingLayer by default and no
+ /// ObjectLinkingLayerCreator has been set then the TargetProcessControl
+ /// object will be used to supply the memory manager for the
+ /// ObjectLinkingLayer.
+ SetterImpl &setTargetProcessControl(TargetProcessControl &TPC) {
+ impl().TPC = &TPC;
+ return impl();
+ }
+
/// Create an instance of the JIT.
Expected<std::unique_ptr<JITType>> create() {
if (auto Err = impl().prepareForConstruction())
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
deleted file mode 100644
index b31914f12a0d..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
+++ /dev/null
@@ -1,84 +0,0 @@
-//===- LambdaResolverMM - Redirect symbol lookup via a functor --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Defines a RuntimeDyld::SymbolResolver subclass that uses a user-supplied
-// functor for symbol resolution.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H
-#define LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
-#include <memory>
-
-namespace llvm {
-namespace orc {
-
-template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
-class LambdaResolver : public LegacyJITSymbolResolver {
-public:
- LLVM_ATTRIBUTE_DEPRECATED(
- LambdaResolver(DylibLookupFtorT DylibLookupFtor,
- ExternalLookupFtorT ExternalLookupFtor),
- "ORCv1 utilities (including resolvers) are deprecated and will be "
- "removed "
- "in the next release. Please use ORCv2 (see docs/ORCv2.rst)");
-
- LambdaResolver(ORCv1DeprecationAcknowledgement,
- DylibLookupFtorT DylibLookupFtor,
- ExternalLookupFtorT ExternalLookupFtor)
- : DylibLookupFtor(DylibLookupFtor),
- ExternalLookupFtor(ExternalLookupFtor) {}
-
- JITSymbol findSymbolInLogicalDylib(const std::string &Name) final {
- return DylibLookupFtor(Name);
- }
-
- JITSymbol findSymbol(const std::string &Name) final {
- return ExternalLookupFtor(Name);
- }
-
-private:
- DylibLookupFtorT DylibLookupFtor;
- ExternalLookupFtorT ExternalLookupFtor;
-};
-
-template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
-LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>::LambdaResolver(
- DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor)
- : DylibLookupFtor(DylibLookupFtor), ExternalLookupFtor(ExternalLookupFtor) {
-}
-
-template <typename DylibLookupFtorT,
- typename ExternalLookupFtorT>
-std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>>
-createLambdaResolver(DylibLookupFtorT DylibLookupFtor,
- ExternalLookupFtorT ExternalLookupFtor) {
- using LR = LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>;
- return std::make_unique<LR>(std::move(DylibLookupFtor),
- std::move(ExternalLookupFtor));
-}
-
-template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
-std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>>
-createLambdaResolver(ORCv1DeprecationAcknowledgement,
- DylibLookupFtorT DylibLookupFtor,
- ExternalLookupFtorT ExternalLookupFtor) {
- using LR = LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>;
- return std::make_unique<LR>(AcknowledgeORCv1Deprecation,
- std::move(DylibLookupFtor),
- std::move(ExternalLookupFtor));
-}
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
index e843d0f56245..f9cc15583b42 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
@@ -34,15 +34,15 @@ public:
/// SymbolFlags and SymbolToDefinition maps.
IRMaterializationUnit(ExecutionSession &ES,
const IRSymbolMapper::ManglingOptions &MO,
- ThreadSafeModule TSM, VModuleKey K);
+ ThreadSafeModule TSM);
/// Create an IRMaterializationLayer from a module, and pre-existing
/// SymbolFlags and SymbolToDefinition maps. The maps must provide
/// entries for each definition in M.
/// This constructor is useful for delegating work from one
/// IRMaterializationUnit to another.
- IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K,
- SymbolFlagsMap SymbolFlags, SymbolStringPtr InitSymbol,
+ IRMaterializationUnit(ThreadSafeModule TSM, SymbolFlagsMap SymbolFlags,
+ SymbolStringPtr InitSymbol,
SymbolNameToDefinitionMap SymbolToDefinition);
/// Return the ModuleIdentifier as the name for this MaterializationUnit.
@@ -94,13 +94,19 @@ public:
/// Returns the current value of the CloneToNewContextOnEmit flag.
bool getCloneToNewContextOnEmit() const { return CloneToNewContextOnEmit; }
+ /// Add a MaterializatinoUnit representing the given IR to the JITDylib
+ /// targeted by the given tracker.
+ virtual Error add(ResourceTrackerSP RT, ThreadSafeModule TSM);
+
/// Adds a MaterializationUnit representing the given IR to the given
- /// JITDylib.
- virtual Error add(JITDylib &JD, ThreadSafeModule TSM,
- VModuleKey K = VModuleKey());
+ /// JITDylib. If RT is not specif
+ Error add(JITDylib &JD, ThreadSafeModule TSM) {
+ return add(JD.getDefaultResourceTracker(), std::move(TSM));
+ }
/// Emit should materialize the given IR.
- virtual void emit(MaterializationResponsibility R, ThreadSafeModule TSM) = 0;
+ virtual void emit(std::unique_ptr<MaterializationResponsibility> R,
+ ThreadSafeModule TSM) = 0;
private:
bool CloneToNewContextOnEmit = false;
@@ -114,14 +120,12 @@ class BasicIRLayerMaterializationUnit : public IRMaterializationUnit {
public:
BasicIRLayerMaterializationUnit(IRLayer &L,
const IRSymbolMapper::ManglingOptions &MO,
- ThreadSafeModule TSM, VModuleKey K);
+ ThreadSafeModule TSM);
private:
-
- void materialize(MaterializationResponsibility R) override;
+ void materialize(std::unique_ptr<MaterializationResponsibility> R) override;
IRLayer &L;
- VModuleKey K;
};
/// Interface for Layers that accept object files.
@@ -135,11 +139,14 @@ public:
/// Adds a MaterializationUnit representing the given IR to the given
/// JITDylib.
- virtual Error add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O,
- VModuleKey K = VModuleKey());
+ virtual Error add(ResourceTrackerSP RT, std::unique_ptr<MemoryBuffer> O);
+
+ Error add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O) {
+ return add(JD.getDefaultResourceTracker(), std::move(O));
+ }
/// Emit should materialize the given IR.
- virtual void emit(MaterializationResponsibility R,
+ virtual void emit(std::unique_ptr<MaterializationResponsibility> R,
std::unique_ptr<MemoryBuffer> O) = 0;
private:
@@ -151,9 +158,9 @@ private:
class BasicObjectLayerMaterializationUnit : public MaterializationUnit {
public:
static Expected<std::unique_ptr<BasicObjectLayerMaterializationUnit>>
- Create(ObjectLayer &L, VModuleKey K, std::unique_ptr<MemoryBuffer> O);
+ Create(ObjectLayer &L, std::unique_ptr<MemoryBuffer> O);
- BasicObjectLayerMaterializationUnit(ObjectLayer &L, VModuleKey K,
+ BasicObjectLayerMaterializationUnit(ObjectLayer &L,
std::unique_ptr<MemoryBuffer> O,
SymbolFlagsMap SymbolFlags,
SymbolStringPtr InitSymbol);
@@ -162,8 +169,7 @@ public:
StringRef getName() const override;
private:
-
- void materialize(MaterializationResponsibility R) override;
+ void materialize(std::unique_ptr<MaterializationResponsibility> R) override;
void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
ObjectLayer &L;
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
deleted file mode 100644
index 84f5e0350c2e..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ /dev/null
@@ -1,267 +0,0 @@
-//===- LazyEmittingLayer.h - Lazily emit IR to lower JIT layers -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Contains the definition for a lazy-emitting layer for the JIT.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H
-#define LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/Core.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <list>
-#include <memory>
-#include <string>
-
-namespace llvm {
-namespace orc {
-
-/// Lazy-emitting IR layer.
-///
-/// This layer accepts LLVM IR Modules (via addModule) but does not
-/// immediately emit them the layer below. Instead, emission to the base layer
-/// is deferred until the first time the client requests the address (via
-/// JITSymbol::getAddress) for a symbol contained in this layer.
-template <typename BaseLayerT> class LazyEmittingLayer {
-private:
- class EmissionDeferredModule {
- public:
- EmissionDeferredModule(VModuleKey K, std::unique_ptr<Module> M)
- : K(std::move(K)), M(std::move(M)) {}
-
- JITSymbol find(StringRef Name, bool ExportedSymbolsOnly, BaseLayerT &B) {
- switch (EmitState) {
- case NotEmitted:
- if (auto GV = searchGVs(Name, ExportedSymbolsOnly)) {
- JITSymbolFlags Flags = JITSymbolFlags::fromGlobalValue(*GV);
- auto GetAddress = [this, ExportedSymbolsOnly, Name = Name.str(),
- &B]() -> Expected<JITTargetAddress> {
- if (this->EmitState == Emitting)
- return 0;
- else if (this->EmitState == NotEmitted) {
- this->EmitState = Emitting;
- if (auto Err = this->emitToBaseLayer(B))
- return std::move(Err);
- this->EmitState = Emitted;
- }
- if (auto Sym = B.findSymbolIn(K, Name, ExportedSymbolsOnly))
- return Sym.getAddress();
- else if (auto Err = Sym.takeError())
- return std::move(Err);
- else
- llvm_unreachable("Successful symbol lookup should return "
- "definition address here");
- };
- return JITSymbol(std::move(GetAddress), Flags);
- } else
- return nullptr;
- case Emitting:
- // Calling "emit" can trigger a recursive call to 'find' (e.g. to check
- // for pre-existing definitions of common-symbol), but any symbol in
- // this module would already have been found internally (in the
- // RuntimeDyld that did the lookup), so just return a nullptr here.
- return nullptr;
- case Emitted:
- return B.findSymbolIn(K, std::string(Name), ExportedSymbolsOnly);
- }
- llvm_unreachable("Invalid emit-state.");
- }
-
- Error removeModuleFromBaseLayer(BaseLayerT& BaseLayer) {
- return EmitState != NotEmitted ? BaseLayer.removeModule(K)
- : Error::success();
- }
-
- void emitAndFinalize(BaseLayerT &BaseLayer) {
- assert(EmitState != Emitting &&
- "Cannot emitAndFinalize while already emitting");
- if (EmitState == NotEmitted) {
- EmitState = Emitting;
- emitToBaseLayer(BaseLayer);
- EmitState = Emitted;
- }
- BaseLayer.emitAndFinalize(K);
- }
-
- private:
-
- const GlobalValue* searchGVs(StringRef Name,
- bool ExportedSymbolsOnly) const {
- // FIXME: We could clean all this up if we had a way to reliably demangle
- // names: We could just demangle name and search, rather than
- // mangling everything else.
-
- // If we have already built the mangled name set then just search it.
- if (MangledSymbols) {
- auto VI = MangledSymbols->find(Name);
- if (VI == MangledSymbols->end())
- return nullptr;
- auto GV = VI->second;
- if (!ExportedSymbolsOnly || GV->hasDefaultVisibility())
- return GV;
- return nullptr;
- }
-
- // If we haven't built the mangled name set yet, try to build it. As an
- // optimization this will leave MangledNames set to nullptr if we find
- // Name in the process of building the set.
- return buildMangledSymbols(Name, ExportedSymbolsOnly);
- }
-
- Error emitToBaseLayer(BaseLayerT &BaseLayer) {
- // We don't need the mangled names set any more: Once we've emitted this
- // to the base layer we'll just look for symbols there.
- MangledSymbols.reset();
- return BaseLayer.addModule(std::move(K), std::move(M));
- }
-
- // If the mangled name of the given GlobalValue matches the given search
- // name (and its visibility conforms to the ExportedSymbolsOnly flag) then
- // return the symbol. Otherwise, add the mangled name to the Names map and
- // return nullptr.
- const GlobalValue* addGlobalValue(StringMap<const GlobalValue*> &Names,
- const GlobalValue &GV,
- const Mangler &Mang, StringRef SearchName,
- bool ExportedSymbolsOnly) const {
- // Modules don't "provide" decls or common symbols.
- if (GV.isDeclaration() || GV.hasCommonLinkage())
- return nullptr;
-
- // Mangle the GV name.
- std::string MangledName;
- {
- raw_string_ostream MangledNameStream(MangledName);
- Mang.getNameWithPrefix(MangledNameStream, &GV, false);
- }
-
- // Check whether this is the name we were searching for, and if it is then
- // bail out early.
- if (MangledName == SearchName)
- if (!ExportedSymbolsOnly || GV.hasDefaultVisibility())
- return &GV;
-
- // Otherwise add this to the map for later.
- Names[MangledName] = &GV;
- return nullptr;
- }
-
- // Build the MangledSymbols map. Bails out early (with MangledSymbols left set
- // to nullptr) if the given SearchName is found while building the map.
- const GlobalValue* buildMangledSymbols(StringRef SearchName,
- bool ExportedSymbolsOnly) const {
- assert(!MangledSymbols && "Mangled symbols map already exists?");
-
- auto Symbols = std::make_unique<StringMap<const GlobalValue*>>();
-
- Mangler Mang;
-
- for (const auto &GO : M->global_objects())
- if (auto GV = addGlobalValue(*Symbols, GO, Mang, SearchName,
- ExportedSymbolsOnly))
- return GV;
-
- MangledSymbols = std::move(Symbols);
- return nullptr;
- }
-
- enum { NotEmitted, Emitting, Emitted } EmitState = NotEmitted;
- VModuleKey K;
- std::unique_ptr<Module> M;
- mutable std::unique_ptr<StringMap<const GlobalValue*>> MangledSymbols;
- };
-
- BaseLayerT &BaseLayer;
- std::map<VModuleKey, std::unique_ptr<EmissionDeferredModule>> ModuleMap;
-
-public:
-
- /// Construct a lazy emitting layer.
- LLVM_ATTRIBUTE_DEPRECATED(
- LazyEmittingLayer(BaseLayerT &BaseLayer),
- "ORCv1 layers (including LazyEmittingLayer) are deprecated. Please use "
- "ORCv2, where lazy emission is the default");
-
- /// Construct a lazy emitting layer.
- LazyEmittingLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer)
- : BaseLayer(BaseLayer) {}
-
- /// Add the given module to the lazy emitting layer.
- Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
- assert(!ModuleMap.count(K) && "VModuleKey K already in use");
- ModuleMap[K] =
- std::make_unique<EmissionDeferredModule>(std::move(K), std::move(M));
- return Error::success();
- }
-
- /// Remove the module represented by the given handle.
- ///
- /// This method will free the memory associated with the given module, both
- /// in this layer, and the base layer.
- Error removeModule(VModuleKey K) {
- auto I = ModuleMap.find(K);
- assert(I != ModuleMap.end() && "VModuleKey K not valid here");
- auto EDM = std::move(I.second);
- ModuleMap.erase(I);
- return EDM->removeModuleFromBaseLayer(BaseLayer);
- }
-
- /// Search for the given named symbol.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it exists.
- JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
- // Look for the symbol among existing definitions.
- if (auto Symbol = BaseLayer.findSymbol(Name, ExportedSymbolsOnly))
- return Symbol;
-
- // If not found then search the deferred modules. If any of these contain a
- // definition of 'Name' then they will return a JITSymbol that will emit
- // the corresponding module when the symbol address is requested.
- for (auto &KV : ModuleMap)
- if (auto Symbol = KV.second->find(Name, ExportedSymbolsOnly, BaseLayer))
- return Symbol;
-
- // If no definition found anywhere return a null symbol.
- return nullptr;
- }
-
- /// Get the address of the given symbol in the context of the of
- /// compiled modules represented by the key K.
- JITSymbol findSymbolIn(VModuleKey K, const std::string &Name,
- bool ExportedSymbolsOnly) {
- assert(ModuleMap.count(K) && "VModuleKey K not valid here");
- return ModuleMap[K]->find(Name, ExportedSymbolsOnly, BaseLayer);
- }
-
- /// Immediately emit and finalize the module represented by the given
- /// key.
- Error emitAndFinalize(VModuleKey K) {
- assert(ModuleMap.count(K) && "VModuleKey K not valid here");
- return ModuleMap[K]->emitAndFinalize(BaseLayer);
- }
-};
-
-template <typename BaseLayerT>
-LazyEmittingLayer<BaseLayerT>::LazyEmittingLayer(BaseLayerT &BaseLayer)
- : BaseLayer(BaseLayer) {}
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index 0d3ccecdf121..e6a9d8945285 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -40,6 +40,9 @@ public:
using NotifyResolvedFunction =
unique_function<Error(JITTargetAddress ResolvedAddr)>;
+ LazyCallThroughManager(ExecutionSession &ES,
+ JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP);
+
// Return a free call-through trampoline and bind it to look up and call
// through to the given symbol.
Expected<JITTargetAddress>
@@ -56,9 +59,6 @@ protected:
using NotifyLandingResolvedFunction =
TrampolinePool::NotifyLandingResolvedFunction;
- LazyCallThroughManager(ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP);
-
struct ReexportsEntry {
JITDylib *SourceJD;
SymbolStringPtr SymbolName;
@@ -144,12 +144,12 @@ public:
IndirectStubsManager &ISManager,
JITDylib &SourceJD,
SymbolAliasMap CallableAliases,
- ImplSymbolMap *SrcJDLoc, VModuleKey K);
+ ImplSymbolMap *SrcJDLoc);
StringRef getName() const override;
private:
- void materialize(MaterializationResponsibility R) override;
+ void materialize(std::unique_ptr<MaterializationResponsibility> R) override;
void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases);
@@ -166,11 +166,10 @@ private:
inline std::unique_ptr<LazyReexportsMaterializationUnit>
lazyReexports(LazyCallThroughManager &LCTManager,
IndirectStubsManager &ISManager, JITDylib &SourceJD,
- SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc = nullptr,
- VModuleKey K = VModuleKey()) {
+ SymbolAliasMap CallableAliases,
+ ImplSymbolMap *SrcJDLoc = nullptr) {
return std::make_unique<LazyReexportsMaterializationUnit>(
- LCTManager, ISManager, SourceJD, std::move(CallableAliases), SrcJDLoc,
- std::move(K));
+ LCTManager, ISManager, SourceJD, std::move(CallableAliases), SrcJDLoc);
}
} // End namespace orc
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h
deleted file mode 100644
index b20202a49ef6..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h
+++ /dev/null
@@ -1,211 +0,0 @@
-//===--- Legacy.h -- Adapters for ExecutionEngine API interop ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Contains core ORC APIs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_LEGACY_H
-#define LLVM_EXECUTIONENGINE_ORC_LEGACY_H
-
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/Core.h"
-
-namespace llvm {
-namespace orc {
-
-/// SymbolResolver is a composable interface for looking up symbol flags
-/// and addresses using the AsynchronousSymbolQuery type. It will
-/// eventually replace the LegacyJITSymbolResolver interface as the
-/// stardard ORC symbol resolver type.
-///
-/// FIXME: SymbolResolvers should go away and be replaced with VSOs with
-/// defenition generators.
-class SymbolResolver {
-public:
- virtual ~SymbolResolver() = default;
-
- /// Returns the subset of the given symbols that the caller is responsible for
- /// materializing.
- virtual SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) = 0;
-
- /// For each symbol in Symbols that can be found, assigns that symbols
- /// value in Query. Returns the set of symbols that could not be found.
- virtual SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
- SymbolNameSet Symbols) = 0;
-
-private:
- virtual void anchor();
-};
-
-/// Implements SymbolResolver with a pair of supplied function objects
-/// for convenience. See createSymbolResolver.
-template <typename GetResponsibilitySetFn, typename LookupFn>
-class LambdaSymbolResolver final : public SymbolResolver {
-public:
- template <typename GetResponsibilitySetFnRef, typename LookupFnRef>
- LambdaSymbolResolver(GetResponsibilitySetFnRef &&GetResponsibilitySet,
- LookupFnRef &&Lookup)
- : GetResponsibilitySet(
- std::forward<GetResponsibilitySetFnRef>(GetResponsibilitySet)),
- Lookup(std::forward<LookupFnRef>(Lookup)) {}
-
- SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final {
- return GetResponsibilitySet(Symbols);
- }
-
- SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
- SymbolNameSet Symbols) final {
- return Lookup(std::move(Query), std::move(Symbols));
- }
-
-private:
- GetResponsibilitySetFn GetResponsibilitySet;
- LookupFn Lookup;
-};
-
-/// Creates a SymbolResolver implementation from the pair of supplied
-/// function objects.
-template <typename GetResponsibilitySetFn, typename LookupFn>
-std::unique_ptr<LambdaSymbolResolver<
- std::remove_cv_t<std::remove_reference_t<GetResponsibilitySetFn>>,
- std::remove_cv_t<std::remove_reference_t<LookupFn>>>>
-createSymbolResolver(GetResponsibilitySetFn &&GetResponsibilitySet,
- LookupFn &&Lookup) {
- using LambdaSymbolResolverImpl = LambdaSymbolResolver<
- std::remove_cv_t<std::remove_reference_t<GetResponsibilitySetFn>>,
- std::remove_cv_t<std::remove_reference_t<LookupFn>>>;
- return std::make_unique<LambdaSymbolResolverImpl>(
- std::forward<GetResponsibilitySetFn>(GetResponsibilitySet),
- std::forward<LookupFn>(Lookup));
-}
-
-/// Legacy adapter. Remove once we kill off the old ORC layers.
-class JITSymbolResolverAdapter : public JITSymbolResolver {
-public:
- JITSymbolResolverAdapter(ExecutionSession &ES, SymbolResolver &R,
- MaterializationResponsibility *MR);
- Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override;
- void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) override;
-
-private:
- ExecutionSession &ES;
- std::set<SymbolStringPtr> ResolvedStrings;
- SymbolResolver &R;
- MaterializationResponsibility *MR;
-};
-
-/// Use the given legacy-style FindSymbol function (i.e. a function that takes
-/// a const std::string& or StringRef and returns a JITSymbol) to get the
-/// subset of symbols that the caller is responsible for materializing. If any
-/// JITSymbol returned by FindSymbol is in an error state the function returns
-/// immediately with that error.
-///
-/// Useful for implementing getResponsibilitySet bodies that query legacy
-/// resolvers.
-template <typename FindSymbolFn>
-Expected<SymbolNameSet>
-getResponsibilitySetWithLegacyFn(const SymbolNameSet &Symbols,
- FindSymbolFn FindSymbol) {
- SymbolNameSet Result;
-
- for (auto &S : Symbols) {
- if (JITSymbol Sym = FindSymbol(*S)) {
- if (!Sym.getFlags().isStrong())
- Result.insert(S);
- } else if (auto Err = Sym.takeError())
- return std::move(Err);
- }
-
- return Result;
-}
-
-/// Use the given legacy-style FindSymbol function (i.e. a function that
-/// takes a const std::string& or StringRef and returns a JITSymbol) to
-/// find the address and flags for each symbol in Symbols and store the
-/// result in Query. If any JITSymbol returned by FindSymbol is in an
-/// error then Query.notifyFailed(...) is called with that error and the
-/// function returns immediately. On success, returns the set of symbols
-/// not found.
-///
-/// Useful for implementing lookup bodies that query legacy resolvers.
-template <typename FindSymbolFn>
-SymbolNameSet
-lookupWithLegacyFn(ExecutionSession &ES, AsynchronousSymbolQuery &Query,
- const SymbolNameSet &Symbols, FindSymbolFn FindSymbol) {
- SymbolNameSet SymbolsNotFound;
- bool NewSymbolsResolved = false;
-
- for (auto &S : Symbols) {
- if (JITSymbol Sym = FindSymbol(*S)) {
- if (auto Addr = Sym.getAddress()) {
- Query.notifySymbolMetRequiredState(
- S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
- NewSymbolsResolved = true;
- } else {
- ES.legacyFailQuery(Query, Addr.takeError());
- return SymbolNameSet();
- }
- } else if (auto Err = Sym.takeError()) {
- ES.legacyFailQuery(Query, std::move(Err));
- return SymbolNameSet();
- } else
- SymbolsNotFound.insert(S);
- }
-
- if (NewSymbolsResolved && Query.isComplete())
- Query.handleComplete();
-
- return SymbolsNotFound;
-}
-
-/// An ORC SymbolResolver implementation that uses a legacy
-/// findSymbol-like function to perform lookup;
-template <typename LegacyLookupFn>
-class LegacyLookupFnResolver final : public SymbolResolver {
-public:
- using ErrorReporter = std::function<void(Error)>;
-
- LegacyLookupFnResolver(ExecutionSession &ES, LegacyLookupFn LegacyLookup,
- ErrorReporter ReportError)
- : ES(ES), LegacyLookup(std::move(LegacyLookup)),
- ReportError(std::move(ReportError)) {}
-
- SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final {
- if (auto ResponsibilitySet =
- getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup))
- return std::move(*ResponsibilitySet);
- else {
- ReportError(ResponsibilitySet.takeError());
- return SymbolNameSet();
- }
- }
-
- SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
- SymbolNameSet Symbols) final {
- return lookupWithLegacyFn(ES, *Query, Symbols, LegacyLookup);
- }
-
-private:
- ExecutionSession &ES;
- LegacyLookupFn LegacyLookup;
- ErrorReporter ReportError;
-};
-
-template <typename LegacyLookupFn>
-std::shared_ptr<LegacyLookupFnResolver<LegacyLookupFn>>
-createLegacyLookupResolver(ExecutionSession &ES, LegacyLookupFn LegacyLookup,
- std::function<void(Error)> ErrorReporter) {
- return std::make_shared<LegacyLookupFnResolver<LegacyLookupFn>>(
- ES, std::move(LegacyLookup), std::move(ErrorReporter));
-}
-
-} // End namespace orc
-} // End namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_LEGACY_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
index 15fe079eccaf..90e1d4704f34 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
@@ -98,8 +98,9 @@ public:
ExecutionSession &getExecutionSession() const { return ES; }
Error setupJITDylib(JITDylib &JD) override;
- Error notifyAdding(JITDylib &JD, const MaterializationUnit &MU) override;
- Error notifyRemoving(JITDylib &JD, VModuleKey K) override;
+ Error notifyAdding(ResourceTracker &RT,
+ const MaterializationUnit &MU) override;
+ Error notifyRemoving(ResourceTracker &RT) override;
Expected<InitializerSequence> getInitializerSequence(JITDylib &JD);
@@ -119,6 +120,19 @@ private:
LocalDependenciesMap getSyntheticSymbolLocalDependencies(
MaterializationResponsibility &MR) override;
+ // FIXME: We should be tentatively tracking scraped sections and discarding
+ // if the MR fails.
+ Error notifyFailed(MaterializationResponsibility &MR) override {
+ return Error::success();
+ }
+
+ Error notifyRemovingResources(ResourceKey K) override {
+ return Error::success();
+ }
+
+ void notifyTransferringResources(ResourceKey DstKey,
+ ResourceKey SrcKey) override {}
+
private:
using InitSymbolDepMap =
DenseMap<MaterializationResponsibility *, JITLinkSymbolVector>;
@@ -136,8 +150,6 @@ private:
InitSymbolDepMap InitSymbolDeps;
};
- static std::vector<JITDylib *> getDFSLinkOrder(JITDylib &JD);
-
void registerInitInfo(JITDylib &JD, JITTargetAddress ObjCImageInfoAddr,
MachOJITDylibInitializers::SectionExtent ModInits,
MachOJITDylibInitializers::SectionExtent ObjCSelRefs,
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
deleted file mode 100644
index ffa37a13d064..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===------ NullResolver.h - Reject symbol lookup requests ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Defines a RuntimeDyld::SymbolResolver subclass that rejects all symbol
-// resolution requests, for clients that have no cross-object fixups.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_NULLRESOLVER_H
-#define LLVM_EXECUTIONENGINE_ORC_NULLRESOLVER_H
-
-#include "llvm/ExecutionEngine/Orc/Legacy.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-
-namespace llvm {
-namespace orc {
-
-class NullResolver : public SymbolResolver {
-public:
- SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final;
-
- SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
- SymbolNameSet Symbols) final;
-};
-
-/// SymbolResolver impliementation that rejects all resolution requests.
-/// Useful for clients that have no cross-object fixups.
-class NullLegacyResolver : public LegacyJITSymbolResolver {
-public:
- JITSymbol findSymbol(const std::string &Name) final;
-
- JITSymbol findSymbolInLogicalDylib(const std::string &Name) final;
-};
-
-} // End namespace orc.
-} // End namespace llvm.
-
-#endif // LLVM_EXECUTIONENGINE_ORC_NULLRESOLVER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index 2bfe3b001709..f2975e29fcd6 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -35,6 +35,7 @@ namespace llvm {
namespace jitlink {
class EHFrameRegistrar;
+class LinkGraph;
class Symbol;
} // namespace jitlink
@@ -51,7 +52,7 @@ class ObjectLinkingLayerJITLinkContext;
/// Clients can use this class to add relocatable object files to an
/// ExecutionSession, and it typically serves as the base layer (underneath
/// a compiling layer like IRCompileLayer) for the rest of the JIT.
-class ObjectLinkingLayer : public ObjectLayer {
+class ObjectLinkingLayer : public ObjectLayer, private ResourceManager {
friend class ObjectLinkingLayerJITLinkContext;
public:
@@ -72,10 +73,10 @@ public:
virtual Error notifyEmitted(MaterializationResponsibility &MR) {
return Error::success();
}
- virtual Error notifyRemovingModule(VModuleKey K) {
- return Error::success();
- }
- virtual Error notifyRemovingAllModules() { return Error::success(); }
+ virtual Error notifyFailed(MaterializationResponsibility &MR) = 0;
+ virtual Error notifyRemovingResources(ResourceKey K) = 0;
+ virtual void notifyTransferringResources(ResourceKey DstKey,
+ ResourceKey SrcKey) = 0;
/// Return any dependencies that synthetic symbols (e.g. init symbols)
/// have on locally scoped jitlink::Symbols. This is used by the
@@ -90,8 +91,14 @@ public:
using ReturnObjectBufferFunction =
std::function<void(std::unique_ptr<MemoryBuffer>)>;
- /// Construct an ObjectLinkingLayer with the given NotifyLoaded,
- /// and NotifyEmitted functors.
+ /// Construct an ObjectLinkingLayer.
+ ObjectLinkingLayer(ExecutionSession &ES,
+ jitlink::JITLinkMemoryManager &MemMgr);
+
+ /// Construct an ObjectLinkingLayer. Takes ownership of the given
+ /// JITLinkMemoryManager. This method is a temporary hack to simplify
+ /// co-existence with RTDyldObjectLinkingLayer (which also owns its
+ /// allocators).
ObjectLinkingLayer(ExecutionSession &ES,
std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr);
@@ -112,10 +119,14 @@ public:
return *this;
}
- /// Emit the object.
- void emit(MaterializationResponsibility R,
+ /// Emit an object file.
+ void emit(std::unique_ptr<MaterializationResponsibility> R,
std::unique_ptr<MemoryBuffer> O) override;
+ /// Emit a LinkGraph.
+ void emit(std::unique_ptr<MaterializationResponsibility> R,
+ std::unique_ptr<jitlink::LinkGraph> G);
+
/// Instructs this ObjectLinkingLayer instance to override the symbol flags
/// found in the AtomGraph with the flags supplied by the
/// MaterializationResponsibility instance. This is a workaround to support
@@ -155,27 +166,31 @@ private:
void notifyLoaded(MaterializationResponsibility &MR);
Error notifyEmitted(MaterializationResponsibility &MR, AllocPtr Alloc);
- Error removeModule(VModuleKey K);
- Error removeAllModules();
+ Error handleRemoveResources(ResourceKey K) override;
+ void handleTransferResources(ResourceKey DstKey, ResourceKey SrcKey) override;
mutable std::mutex LayerMutex;
- std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr;
+ jitlink::JITLinkMemoryManager &MemMgr;
+ std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgrOwnership;
bool OverrideObjectFlags = false;
bool AutoClaimObjectSymbols = false;
ReturnObjectBufferFunction ReturnObjectBuffer;
- DenseMap<VModuleKey, AllocPtr> TrackedAllocs;
- std::vector<AllocPtr> UntrackedAllocs;
+ DenseMap<ResourceKey, std::vector<AllocPtr>> Allocs;
std::vector<std::unique_ptr<Plugin>> Plugins;
};
class EHFrameRegistrationPlugin : public ObjectLinkingLayer::Plugin {
public:
- EHFrameRegistrationPlugin(jitlink::EHFrameRegistrar &Registrar);
- Error notifyEmitted(MaterializationResponsibility &MR) override;
+ EHFrameRegistrationPlugin(
+ ExecutionSession &ES,
+ std::unique_ptr<jitlink::EHFrameRegistrar> Registrar);
void modifyPassConfig(MaterializationResponsibility &MR, const Triple &TT,
jitlink::PassConfiguration &PassConfig) override;
- Error notifyRemovingModule(VModuleKey K) override;
- Error notifyRemovingAllModules() override;
+ Error notifyEmitted(MaterializationResponsibility &MR) override;
+ Error notifyFailed(MaterializationResponsibility &MR) override;
+ Error notifyRemovingResources(ResourceKey K) override;
+ void notifyTransferringResources(ResourceKey DstKey,
+ ResourceKey SrcKey) override;
private:
@@ -185,10 +200,10 @@ private:
};
std::mutex EHFramePluginMutex;
- jitlink::EHFrameRegistrar &Registrar;
+ ExecutionSession &ES;
+ std::unique_ptr<jitlink::EHFrameRegistrar> Registrar;
DenseMap<MaterializationResponsibility *, EHFrameRange> InProcessLinks;
- DenseMap<VModuleKey, EHFrameRange> TrackedEHFrameRanges;
- std::vector<EHFrameRange> UntrackedEHFrameRanges;
+ DenseMap<ResourceKey, std::vector<EHFrameRange>> EHFrameRanges;
};
} // end namespace orc
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index bf989cc8677c..d8395ab34e47 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -31,7 +31,7 @@ public:
ObjectTransformLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
TransformFunction Transform = TransformFunction());
- void emit(MaterializationResponsibility R,
+ void emit(std::unique_ptr<MaterializationResponsibility> R,
std::unique_ptr<MemoryBuffer> O) override;
void setTransform(TransformFunction Transform) {
@@ -43,88 +43,6 @@ private:
TransformFunction Transform;
};
-/// Object mutating layer.
-///
-/// This layer accepts sets of ObjectFiles (via addObject). It
-/// immediately applies the user supplied functor to each object, then adds
-/// the set of transformed objects to the layer below.
-template <typename BaseLayerT, typename TransformFtor>
-class LegacyObjectTransformLayer {
-public:
- /// Construct an ObjectTransformLayer with the given BaseLayer
- LLVM_ATTRIBUTE_DEPRECATED(
- LegacyObjectTransformLayer(BaseLayerT &BaseLayer,
- TransformFtor Transform = TransformFtor()),
- "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
- "use "
- "the ORCv2 ObjectTransformLayer instead");
-
- /// Legacy layer constructor with deprecation acknowledgement.
- LegacyObjectTransformLayer(ORCv1DeprecationAcknowledgement,
- BaseLayerT &BaseLayer,
- TransformFtor Transform = TransformFtor())
- : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
-
- /// Apply the transform functor to each object in the object set, then
- /// add the resulting set of objects to the base layer, along with the
- /// memory manager and symbol resolver.
- ///
- /// @return A handle for the added objects.
- template <typename ObjectPtr> Error addObject(VModuleKey K, ObjectPtr Obj) {
- return BaseLayer.addObject(std::move(K), Transform(std::move(Obj)));
- }
-
- /// Remove the object set associated with the VModuleKey K.
- Error removeObject(VModuleKey K) { return BaseLayer.removeObject(K); }
-
- /// Search for the given named symbol.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it exists.
- JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
- return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
- }
-
- /// Get the address of the given symbol in the context of the set of
- /// objects represented by the VModuleKey K. This call is forwarded to
- /// the base layer's implementation.
- /// @param K The VModuleKey associated with the object set to search in.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it is found in the
- /// given object set.
- JITSymbol findSymbolIn(VModuleKey K, const std::string &Name,
- bool ExportedSymbolsOnly) {
- return BaseLayer.findSymbolIn(K, Name, ExportedSymbolsOnly);
- }
-
- /// Immediately emit and finalize the object set represented by the
- /// given VModuleKey K.
- Error emitAndFinalize(VModuleKey K) { return BaseLayer.emitAndFinalize(K); }
-
- /// Map section addresses for the objects associated with the
- /// VModuleKey K.
- void mapSectionAddress(VModuleKey K, const void *LocalAddress,
- JITTargetAddress TargetAddr) {
- BaseLayer.mapSectionAddress(K, LocalAddress, TargetAddr);
- }
-
- /// Access the transform functor directly.
- TransformFtor &getTransform() { return Transform; }
-
- /// Access the mumate functor directly.
- const TransformFtor &getTransform() const { return Transform; }
-
-private:
- BaseLayerT &BaseLayer;
- TransformFtor Transform;
-};
-
-template <typename BaseLayerT, typename TransformFtor>
-LegacyObjectTransformLayer<BaseLayerT, TransformFtor>::
- LegacyObjectTransformLayer(BaseLayerT &BaseLayer, TransformFtor Transform)
- : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
-
} // end namespace orc
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h
new file mode 100644
index 000000000000..a8aa42799115
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h
@@ -0,0 +1,415 @@
+//===--- OrcRPCTargetProcessControl.h - Remote target control ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for interacting with target processes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCRPCTARGETPROCESSCONTROL_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCRPCTARGETPROCESSCONTROL_H
+
+#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
+
+namespace llvm {
+namespace orc {
+
+/// JITLinkMemoryManager implementation for a process connected via an ORC RPC
+/// endpoint.
+template <typename OrcRPCTPCImplT>
+class OrcRPCTPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
+private:
+ struct HostAlloc {
+ std::unique_ptr<char[]> Mem;
+ uint64_t Size;
+ };
+
+ struct TargetAlloc {
+ JITTargetAddress Address = 0;
+ uint64_t AllocatedSize = 0;
+ };
+
+ using HostAllocMap = DenseMap<int, HostAlloc>;
+ using TargetAllocMap = DenseMap<int, TargetAlloc>;
+
+public:
+ class OrcRPCAllocation : public Allocation {
+ public:
+ OrcRPCAllocation(OrcRPCTPCJITLinkMemoryManager<OrcRPCTPCImplT> &Parent,
+ HostAllocMap HostAllocs, TargetAllocMap TargetAllocs)
+ : Parent(Parent), HostAllocs(std::move(HostAllocs)),
+ TargetAllocs(std::move(TargetAllocs)) {
+ assert(HostAllocs.size() == TargetAllocs.size() &&
+ "HostAllocs size should match TargetAllocs");
+ }
+
+ ~OrcRPCAllocation() override {
+ assert(TargetAllocs.empty() && "failed to deallocate");
+ }
+
+ MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
+ auto I = HostAllocs.find(Seg);
+ assert(I != HostAllocs.end() && "No host allocation for segment");
+ auto &HA = I->second;
+ return {HA.Mem.get(), static_cast<size_t>(HA.Size)};
+ }
+
+ JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
+ auto I = TargetAllocs.find(Seg);
+ assert(I != TargetAllocs.end() && "No target allocation for segment");
+ return I->second.Address;
+ }
+
+ void finalizeAsync(FinalizeContinuation OnFinalize) override {
+
+ std::vector<tpctypes::BufferWrite> BufferWrites;
+ orcrpctpc::ReleaseOrFinalizeMemRequest FMR;
+
+ for (auto &KV : HostAllocs) {
+ assert(TargetAllocs.count(KV.first) &&
+ "No target allocation for buffer");
+ auto &HA = KV.second;
+ auto &TA = TargetAllocs[KV.first];
+ BufferWrites.push_back({TA.Address, StringRef(HA.Mem.get(), HA.Size)});
+ FMR.push_back({orcrpctpc::toWireProtectionFlags(
+ static_cast<sys::Memory::ProtectionFlags>(KV.first)),
+ TA.Address, TA.AllocatedSize});
+ }
+
+ DEBUG_WITH_TYPE("orc", {
+ dbgs() << "finalizeAsync " << (void *)this << ":\n";
+ auto FMRI = FMR.begin();
+ for (auto &B : BufferWrites) {
+ auto Prot = FMRI->Prot;
+ ++FMRI;
+ dbgs() << " Writing " << formatv("{0:x16}", B.Buffer.size())
+ << " bytes to " << ((Prot & orcrpctpc::WPF_Read) ? 'R' : '-')
+ << ((Prot & orcrpctpc::WPF_Write) ? 'W' : '-')
+ << ((Prot & orcrpctpc::WPF_Exec) ? 'X' : '-')
+ << " segment: local " << (const void *)B.Buffer.data()
+ << " -> target " << formatv("{0:x16}", B.Address) << "\n";
+ }
+ });
+ if (auto Err =
+ Parent.Parent.getMemoryAccess().writeBuffers(BufferWrites)) {
+ OnFinalize(std::move(Err));
+ return;
+ }
+
+ DEBUG_WITH_TYPE("orc", dbgs() << " Applying permissions...\n");
+ if (auto Err =
+ Parent.getEndpoint().template callAsync<orcrpctpc::FinalizeMem>(
+ [OF = std::move(OnFinalize)](Error Err2) {
+ // FIXME: Dispatch to work queue.
+ std::thread([OF = std::move(OF),
+ Err3 = std::move(Err2)]() mutable {
+ DEBUG_WITH_TYPE(
+ "orc", { dbgs() << " finalizeAsync complete\n"; });
+ OF(std::move(Err3));
+ }).detach();
+ return Error::success();
+ },
+ FMR)) {
+ DEBUG_WITH_TYPE("orc", dbgs() << " failed.\n");
+ Parent.getEndpoint().abandonPendingResponses();
+ Parent.reportError(std::move(Err));
+ }
+ DEBUG_WITH_TYPE("orc", {
+ dbgs() << "Leaving finalizeAsync (finalization may continue in "
+ "background)\n";
+ });
+ }
+
+ Error deallocate() override {
+ orcrpctpc::ReleaseOrFinalizeMemRequest RMR;
+ for (auto &KV : TargetAllocs)
+ RMR.push_back({orcrpctpc::toWireProtectionFlags(
+ static_cast<sys::Memory::ProtectionFlags>(KV.first)),
+ KV.second.Address, KV.second.AllocatedSize});
+ TargetAllocs.clear();
+
+ return Parent.getEndpoint().template callB<orcrpctpc::ReleaseMem>(RMR);
+ }
+
+ private:
+ OrcRPCTPCJITLinkMemoryManager<OrcRPCTPCImplT> &Parent;
+ HostAllocMap HostAllocs;
+ TargetAllocMap TargetAllocs;
+ };
+
+ OrcRPCTPCJITLinkMemoryManager(OrcRPCTPCImplT &Parent) : Parent(Parent) {}
+
+ Expected<std::unique_ptr<Allocation>>
+ allocate(const jitlink::JITLinkDylib *JD,
+ const SegmentsRequestMap &Request) override {
+ orcrpctpc::ReserveMemRequest RMR;
+ HostAllocMap HostAllocs;
+
+ for (auto &KV : Request) {
+ assert(KV.second.getContentSize() <= std::numeric_limits<size_t>::max() &&
+ "Content size is out-of-range for host");
+
+ RMR.push_back({orcrpctpc::toWireProtectionFlags(
+ static_cast<sys::Memory::ProtectionFlags>(KV.first)),
+ KV.second.getContentSize() + KV.second.getZeroFillSize(),
+ KV.second.getAlignment()});
+ HostAllocs[KV.first] = {
+ std::make_unique<char[]>(KV.second.getContentSize()),
+ KV.second.getContentSize()};
+ }
+
+ DEBUG_WITH_TYPE("orc", {
+ dbgs() << "Orc remote memmgr got request:\n";
+ for (auto &KV : Request)
+ dbgs() << " permissions: "
+ << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-')
+ << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-')
+ << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-')
+ << ", content size: "
+ << formatv("{0:x16}", KV.second.getContentSize())
+ << " + zero-fill-size: "
+ << formatv("{0:x16}", KV.second.getZeroFillSize())
+ << ", align: " << KV.second.getAlignment() << "\n";
+ });
+
+ // FIXME: LLVM RPC needs to be fixed to support alt
+ // serialization/deserialization on return types. For now just
+ // translate from std::map to DenseMap manually.
+ auto TmpTargetAllocs =
+ Parent.getEndpoint().template callB<orcrpctpc::ReserveMem>(RMR);
+ if (!TmpTargetAllocs)
+ return TmpTargetAllocs.takeError();
+
+ if (TmpTargetAllocs->size() != RMR.size())
+ return make_error<StringError>(
+ "Number of target allocations does not match request",
+ inconvertibleErrorCode());
+
+ TargetAllocMap TargetAllocs;
+ for (auto &E : *TmpTargetAllocs)
+ TargetAllocs[orcrpctpc::fromWireProtectionFlags(E.Prot)] = {
+ E.Address, E.AllocatedSize};
+
+ DEBUG_WITH_TYPE("orc", {
+ auto HAI = HostAllocs.begin();
+ for (auto &KV : TargetAllocs)
+ dbgs() << " permissions: "
+ << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-')
+ << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-')
+ << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-')
+ << " assigned local " << (void *)HAI->second.Mem.get()
+ << ", target " << formatv("{0:x16}", KV.second.Address) << "\n";
+ });
+
+ return std::make_unique<OrcRPCAllocation>(*this, std::move(HostAllocs),
+ std::move(TargetAllocs));
+ }
+
+private:
+ void reportError(Error Err) { Parent.reportError(std::move(Err)); }
+
+ decltype(std::declval<OrcRPCTPCImplT>().getEndpoint()) getEndpoint() {
+ return Parent.getEndpoint();
+ }
+
+ OrcRPCTPCImplT &Parent;
+};
+
+/// TargetProcessControl::MemoryAccess implementation for a process connected
+/// via an ORC RPC endpoint.
+template <typename OrcRPCTPCImplT>
+class OrcRPCTPCMemoryAccess : public TargetProcessControl::MemoryAccess {
+public:
+ OrcRPCTPCMemoryAccess(OrcRPCTPCImplT &Parent) : Parent(Parent) {}
+
+ void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ writeViaRPC<orcrpctpc::WriteUInt8s>(Ws, std::move(OnWriteComplete));
+ }
+
+ void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ writeViaRPC<orcrpctpc::WriteUInt16s>(Ws, std::move(OnWriteComplete));
+ }
+
+ void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ writeViaRPC<orcrpctpc::WriteUInt32s>(Ws, std::move(OnWriteComplete));
+ }
+
+ void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ writeViaRPC<orcrpctpc::WriteUInt64s>(Ws, std::move(OnWriteComplete));
+ }
+
+ void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) override {
+ writeViaRPC<orcrpctpc::WriteBuffers>(Ws, std::move(OnWriteComplete));
+ }
+
+private:
+ template <typename WriteRPCFunction, typename WriteElementT>
+ void writeViaRPC(ArrayRef<WriteElementT> Ws, WriteResultFn OnWriteComplete) {
+ if (auto Err = Parent.getEndpoint().template callAsync<WriteRPCFunction>(
+ [OWC = std::move(OnWriteComplete)](Error Err2) mutable -> Error {
+ OWC(std::move(Err2));
+ return Error::success();
+ },
+ Ws)) {
+ Parent.reportError(std::move(Err));
+ Parent.getEndpoint().abandonPendingResponses();
+ }
+ }
+
+ OrcRPCTPCImplT &Parent;
+};
+
+// TargetProcessControl for a process connected via an ORC RPC Endpoint.
+template <typename RPCEndpointT>
+class OrcRPCTargetProcessControlBase : public TargetProcessControl {
+public:
+ using ErrorReporter = unique_function<void(Error)>;
+
+ using OnCloseConnectionFunction = unique_function<Error(Error)>;
+
+ OrcRPCTargetProcessControlBase(std::shared_ptr<SymbolStringPool> SSP,
+ RPCEndpointT &EP, ErrorReporter ReportError)
+ : TargetProcessControl(std::move(SSP)),
+ ReportError(std::move(ReportError)), EP(EP) {}
+
+ void reportError(Error Err) { ReportError(std::move(Err)); }
+
+ RPCEndpointT &getEndpoint() { return EP; }
+
+ Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override {
+ DEBUG_WITH_TYPE("orc", {
+ dbgs() << "Loading dylib \"" << (DylibPath ? DylibPath : "") << "\" ";
+ if (!DylibPath)
+ dbgs() << "(process symbols)";
+ dbgs() << "\n";
+ });
+ if (!DylibPath)
+ DylibPath = "";
+ auto H = EP.template callB<orcrpctpc::LoadDylib>(DylibPath);
+ DEBUG_WITH_TYPE("orc", {
+ if (H)
+ dbgs() << " got handle " << formatv("{0:x16}", *H) << "\n";
+ else
+ dbgs() << " error, unable to load\n";
+ });
+ return H;
+ }
+
+ Expected<std::vector<tpctypes::LookupResult>>
+ lookupSymbols(ArrayRef<LookupRequest> Request) override {
+ std::vector<orcrpctpc::RemoteLookupRequest> RR;
+ for (auto &E : Request) {
+ RR.push_back({});
+ RR.back().first = E.Handle;
+ for (auto &KV : E.Symbols)
+ RR.back().second.push_back(
+ {(*KV.first).str(),
+ KV.second == SymbolLookupFlags::WeaklyReferencedSymbol});
+ }
+ DEBUG_WITH_TYPE("orc", {
+ dbgs() << "Compound lookup:\n";
+ for (auto &R : Request) {
+ dbgs() << " In " << formatv("{0:x16}", R.Handle) << ": {";
+ bool First = true;
+ for (auto &KV : R.Symbols) {
+ dbgs() << (First ? "" : ",") << " " << *KV.first;
+ First = false;
+ }
+ dbgs() << " }\n";
+ }
+ });
+ return EP.template callB<orcrpctpc::LookupSymbols>(RR);
+ }
+
+ Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+ ArrayRef<std::string> Args) override {
+ DEBUG_WITH_TYPE("orc", {
+ dbgs() << "Running as main: " << formatv("{0:x16}", MainFnAddr)
+ << ", args = [";
+ for (unsigned I = 0; I != Args.size(); ++I)
+ dbgs() << (I ? "," : "") << " \"" << Args[I] << "\"";
+ dbgs() << "]\n";
+ });
+ auto Result = EP.template callB<orcrpctpc::RunMain>(MainFnAddr, Args);
+ DEBUG_WITH_TYPE("orc", {
+ dbgs() << " call to " << formatv("{0:x16}", MainFnAddr);
+ if (Result)
+ dbgs() << " returned result " << *Result << "\n";
+ else
+ dbgs() << " failed\n";
+ });
+ return Result;
+ }
+
+ Expected<tpctypes::WrapperFunctionResult>
+ runWrapper(JITTargetAddress WrapperFnAddr,
+ ArrayRef<uint8_t> ArgBuffer) override {
+ DEBUG_WITH_TYPE("orc", {
+ dbgs() << "Running as wrapper function "
+ << formatv("{0:x16}", WrapperFnAddr) << " with "
+ << formatv("{0:x16}", ArgBuffer.size()) << " argument buffer\n";
+ });
+ auto Result =
+ EP.template callB<orcrpctpc::RunWrapper>(WrapperFnAddr, ArgBuffer);
+ // dbgs() << "Returned from runWrapper...\n";
+ return Result;
+ }
+
+ Error closeConnection(OnCloseConnectionFunction OnCloseConnection) {
+ DEBUG_WITH_TYPE("orc", dbgs() << "Closing connection to remote\n");
+ return EP.template callAsync<orcrpctpc::CloseConnection>(
+ std::move(OnCloseConnection));
+ }
+
+ Error closeConnectionAndWait() {
+ std::promise<MSVCPError> P;
+ auto F = P.get_future();
+ if (auto Err = closeConnection([&](Error Err2) -> Error {
+ P.set_value(std::move(Err2));
+ return Error::success();
+ })) {
+ EP.abandonAllPendingResponses();
+ return joinErrors(std::move(Err), F.get());
+ }
+ return F.get();
+ }
+
+protected:
+ /// Subclasses must call this during construction to initialize the
+ /// TargetTriple and PageSize members.
+ Error initializeORCRPCTPCBase() {
+ if (auto TripleOrErr = EP.template callB<orcrpctpc::GetTargetTriple>())
+ TargetTriple = Triple(*TripleOrErr);
+ else
+ return TripleOrErr.takeError();
+
+ if (auto PageSizeOrErr = EP.template callB<orcrpctpc::GetPageSize>())
+ PageSize = *PageSizeOrErr;
+ else
+ return PageSizeOrErr.takeError();
+
+ return Error::success();
+ }
+
+private:
+ ErrorReporter ReportError;
+ RPCEndpointT &EP;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_ORCRPCTARGETPROCESSCONTROL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
index 86e8d5df3ad9..3d139740d677 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
@@ -53,7 +54,7 @@ namespace remote {
/// OrcRemoteTargetServer class) via an RPC system (see RPCUtils.h) to carry out
/// its actions.
class OrcRemoteTargetClient
- : public rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel> {
+ : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
public:
/// Remote-mapped RuntimeDyld-compatible memory manager.
class RemoteRTDyldMemoryManager : public RuntimeDyld::MemoryManager {
@@ -329,6 +330,221 @@ public:
std::vector<EHFrame> RegisteredEHFrames;
};
+ class RPCMMAlloc : public jitlink::JITLinkMemoryManager::Allocation {
+ using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>;
+ using FinalizeContinuation =
+ jitlink::JITLinkMemoryManager::Allocation::FinalizeContinuation;
+ using ProtectionFlags = sys::Memory::ProtectionFlags;
+ using SegmentsRequestMap =
+ DenseMap<unsigned, jitlink::JITLinkMemoryManager::SegmentRequest>;
+
+ RPCMMAlloc(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id)
+ : Client(Client), Id(Id) {}
+
+ public:
+ static Expected<std::unique_ptr<RPCMMAlloc>>
+ Create(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id,
+ const SegmentsRequestMap &Request) {
+ auto *MM = new RPCMMAlloc(Client, Id);
+
+ if (Error Err = MM->allocateHostBlocks(Request))
+ return std::move(Err);
+
+ if (Error Err = MM->allocateTargetBlocks())
+ return std::move(Err);
+
+ return std::unique_ptr<RPCMMAlloc>(MM);
+ }
+
+ MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
+ assert(HostSegBlocks.count(Seg) && "No allocation for segment");
+ return {static_cast<char *>(HostSegBlocks[Seg].base()),
+ HostSegBlocks[Seg].allocatedSize()};
+ }
+
+ JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
+ assert(TargetSegBlocks.count(Seg) && "No allocation for segment");
+ return pointerToJITTargetAddress(TargetSegBlocks[Seg].base());
+ }
+
+ void finalizeAsync(FinalizeContinuation OnFinalize) override {
+ // Host allocations (working memory) remain ReadWrite.
+ OnFinalize(copyAndProtect());
+ }
+
+ Error deallocate() override {
+ // TODO: Cannot release target allocation. RPCAPI has no function
+ // symmetric to reserveMem(). Add RPC call like freeMem()?
+ return errorCodeToError(sys::Memory::releaseMappedMemory(HostAllocation));
+ }
+
+ private:
+ OrcRemoteTargetClient &Client;
+ ResourceIdMgr::ResourceId Id;
+ AllocationMap HostSegBlocks;
+ AllocationMap TargetSegBlocks;
+ JITTargetAddress TargetSegmentAddr;
+ sys::MemoryBlock HostAllocation;
+
+ Error allocateHostBlocks(const SegmentsRequestMap &Request) {
+ unsigned TargetPageSize = Client.getPageSize();
+
+ if (!isPowerOf2_64(static_cast<uint64_t>(TargetPageSize)))
+ return make_error<StringError>("Host page size is not a power of 2",
+ inconvertibleErrorCode());
+
+ auto TotalSize = calcTotalAllocSize(Request, TargetPageSize);
+ if (!TotalSize)
+ return TotalSize.takeError();
+
+ // Allocate one slab to cover all the segments.
+ const sys::Memory::ProtectionFlags ReadWrite =
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+ std::error_code EC;
+ HostAllocation =
+ sys::Memory::allocateMappedMemory(*TotalSize, nullptr, ReadWrite, EC);
+ if (EC)
+ return errorCodeToError(EC);
+
+ char *SlabAddr = static_cast<char *>(HostAllocation.base());
+#ifndef NDEBUG
+ char *SlabAddrEnd = SlabAddr + HostAllocation.allocatedSize();
+#endif
+
+ // Allocate segment memory from the slab.
+ for (auto &KV : Request) {
+ const auto &Seg = KV.second;
+
+ uint64_t SegmentSize = Seg.getContentSize() + Seg.getZeroFillSize();
+ uint64_t AlignedSegmentSize = alignTo(SegmentSize, TargetPageSize);
+
+ // Zero out zero-fill memory.
+ char *ZeroFillBegin = SlabAddr + Seg.getContentSize();
+ memset(ZeroFillBegin, 0, Seg.getZeroFillSize());
+
+ // Record the block for this segment.
+ HostSegBlocks[KV.first] =
+ sys::MemoryBlock(SlabAddr, AlignedSegmentSize);
+
+ SlabAddr += AlignedSegmentSize;
+ assert(SlabAddr <= SlabAddrEnd && "Out of range");
+ }
+
+ return Error::success();
+ }
+
+ Error allocateTargetBlocks() {
+ // Reserve memory for all blocks on the target. We need as much space on
+ // the target as we allocated on the host.
+ TargetSegmentAddr = Client.reserveMem(Id, HostAllocation.allocatedSize(),
+ Client.getPageSize());
+ if (!TargetSegmentAddr)
+ return make_error<StringError>("Failed to reserve memory on the target",
+ inconvertibleErrorCode());
+
+ // Map memory blocks into the allocation, that match the host allocation.
+ JITTargetAddress TargetAllocAddr = TargetSegmentAddr;
+ for (const auto &KV : HostSegBlocks) {
+ size_t TargetAllocSize = KV.second.allocatedSize();
+
+ TargetSegBlocks[KV.first] =
+ sys::MemoryBlock(jitTargetAddressToPointer<void *>(TargetAllocAddr),
+ TargetAllocSize);
+
+ TargetAllocAddr += TargetAllocSize;
+ assert(TargetAllocAddr - TargetSegmentAddr <=
+ HostAllocation.allocatedSize() &&
+ "Out of range on target");
+ }
+
+ return Error::success();
+ }
+
+ Error copyAndProtect() {
+ unsigned Permissions = 0u;
+
+ // Copy segments one by one.
+ for (auto &KV : TargetSegBlocks) {
+ Permissions |= KV.first;
+
+ const sys::MemoryBlock &TargetBlock = KV.second;
+ const sys::MemoryBlock &HostBlock = HostSegBlocks.lookup(KV.first);
+
+ size_t TargetAllocSize = TargetBlock.allocatedSize();
+ auto TargetAllocAddr = pointerToJITTargetAddress(TargetBlock.base());
+ auto *HostAllocBegin = static_cast<const char *>(HostBlock.base());
+
+ bool CopyErr =
+ Client.writeMem(TargetAllocAddr, HostAllocBegin, TargetAllocSize);
+ if (CopyErr)
+ return createStringError(inconvertibleErrorCode(),
+ "Failed to copy %d segment to the target",
+ KV.first);
+ }
+
+ // Set permission flags for all segments at once.
+ bool ProtectErr =
+ Client.setProtections(Id, TargetSegmentAddr, Permissions);
+ if (ProtectErr)
+ return createStringError(inconvertibleErrorCode(),
+ "Failed to apply permissions for %d segment "
+ "on the target",
+ Permissions);
+ return Error::success();
+ }
+
+ static Expected<size_t>
+ calcTotalAllocSize(const SegmentsRequestMap &Request,
+ unsigned TargetPageSize) {
+ size_t TotalSize = 0;
+ for (const auto &KV : Request) {
+ const auto &Seg = KV.second;
+
+ if (Seg.getAlignment() > TargetPageSize)
+ return make_error<StringError>("Cannot request alignment higher than "
+ "page alignment on target",
+ inconvertibleErrorCode());
+
+ TotalSize = alignTo(TotalSize, TargetPageSize);
+ TotalSize += Seg.getContentSize();
+ TotalSize += Seg.getZeroFillSize();
+ }
+
+ return TotalSize;
+ }
+ };
+
+ class RemoteJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
+ public:
+ RemoteJITLinkMemoryManager(OrcRemoteTargetClient &Client,
+ ResourceIdMgr::ResourceId Id)
+ : Client(Client), Id(Id) {}
+
+ RemoteJITLinkMemoryManager(const RemoteJITLinkMemoryManager &) = delete;
+ RemoteJITLinkMemoryManager(RemoteJITLinkMemoryManager &&) = default;
+
+ RemoteJITLinkMemoryManager &
+ operator=(const RemoteJITLinkMemoryManager &) = delete;
+ RemoteJITLinkMemoryManager &
+ operator=(RemoteJITLinkMemoryManager &&) = delete;
+
+ ~RemoteJITLinkMemoryManager() {
+ Client.destroyRemoteAllocator(Id);
+ LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n");
+ }
+
+ Expected<std::unique_ptr<Allocation>>
+ allocate(const jitlink::JITLinkDylib *JD,
+ const SegmentsRequestMap &Request) override {
+ return RPCMMAlloc::Create(Client, Id, Request);
+ }
+
+ private:
+ OrcRemoteTargetClient &Client;
+ ResourceIdMgr::ResourceId Id;
+ };
+
/// Remote indirect stubs manager.
class RemoteIndirectStubsManager : public IndirectStubsManager {
public:
@@ -453,20 +669,8 @@ public:
public:
RemoteTrampolinePool(OrcRemoteTargetClient &Client) : Client(Client) {}
- Expected<JITTargetAddress> getTrampoline() override {
- std::lock_guard<std::mutex> Lock(RTPMutex);
- if (AvailableTrampolines.empty()) {
- if (auto Err = grow())
- return std::move(Err);
- }
- assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool");
- auto TrampolineAddr = AvailableTrampolines.back();
- AvailableTrampolines.pop_back();
- return TrampolineAddr;
- }
-
private:
- Error grow() {
+ Error grow() override {
JITTargetAddress BlockAddr = 0;
uint32_t NumTrampolines = 0;
if (auto TrampolineInfoOrErr = Client.emitTrampolineBlock())
@@ -476,14 +680,12 @@ public:
uint32_t TrampolineSize = Client.getTrampolineSize();
for (unsigned I = 0; I < NumTrampolines; ++I)
- this->AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize));
+ AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize));
return Error::success();
}
- std::mutex RTPMutex;
OrcRemoteTargetClient &Client;
- std::vector<JITTargetAddress> AvailableTrampolines;
};
/// Remote compile callback manager.
@@ -501,7 +703,7 @@ public:
/// Channel is the ChannelT instance to communicate on. It is assumed that
/// the channel is ready to be read from and written to.
static Expected<std::unique_ptr<OrcRemoteTargetClient>>
- Create(rpc::RawByteChannel &Channel, ExecutionSession &ES) {
+ Create(shared::RawByteChannel &Channel, ExecutionSession &ES) {
Error Err = Error::success();
auto Client = std::unique_ptr<OrcRemoteTargetClient>(
new OrcRemoteTargetClient(Channel, ES, Err));
@@ -518,6 +720,14 @@ public:
return callB<exec::CallIntVoid>(Addr);
}
+ /// Call the int(int) function at the given address in the target and return
+ /// its result.
+ Expected<int> callIntInt(JITTargetAddress Addr, int Arg) {
+ LLVM_DEBUG(dbgs() << "Calling int(*)(int) " << format("0x%016" PRIx64, Addr)
+ << "\n");
+ return callB<exec::CallIntInt>(Addr, Arg);
+ }
+
/// Call the int(int, char*[]) function at the given address in the target and
/// return its result.
Expected<int> callMain(JITTargetAddress Addr,
@@ -546,6 +756,18 @@ public:
new RemoteRTDyldMemoryManager(*this, Id));
}
+ /// Create a JITLink-compatible memory manager which will allocate working
+ /// memory on the host and target memory on the remote target.
+ Expected<std::unique_ptr<RemoteJITLinkMemoryManager>>
+ createRemoteJITLinkMemoryManager() {
+ auto Id = AllocatorIds.getNext();
+ if (auto Err = callB<mem::CreateRemoteAllocator>(Id))
+ return std::move(Err);
+ LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
+ return std::unique_ptr<RemoteJITLinkMemoryManager>(
+ new RemoteJITLinkMemoryManager(*this, Id));
+ }
+
/// Create an RCIndirectStubsManager that will allocate stubs on the remote
/// target.
Expected<std::unique_ptr<RemoteIndirectStubsManager>>
@@ -583,9 +805,10 @@ public:
Error terminateSession() { return callB<utils::TerminateSession>(); }
private:
- OrcRemoteTargetClient(rpc::RawByteChannel &Channel, ExecutionSession &ES,
+ OrcRemoteTargetClient(shared::RawByteChannel &Channel, ExecutionSession &ES,
Error &Err)
- : rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel>(Channel, true),
+ : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel,
+ true),
ES(ES) {
ErrorAsOutParameter EAO(&Err);
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
index 52a328165240..367bfb369191 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@@ -16,8 +16,8 @@
#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/RPC/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h"
+#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
namespace llvm {
namespace orc {
@@ -73,10 +73,9 @@ private:
} // end namespace remote
-namespace rpc {
+namespace shared {
-template <>
-class RPCTypeName<JITSymbolFlags> {
+template <> class SerializationTypeName<JITSymbolFlags> {
public:
static const char *getName() { return "JITSymbolFlags"; }
};
@@ -100,7 +99,7 @@ public:
}
};
-template <> class RPCTypeName<remote::DirectBufferWriter> {
+template <> class SerializationTypeName<remote::DirectBufferWriter> {
public:
static const char *getName() { return "DirectBufferWriter"; }
};
@@ -133,7 +132,7 @@ public:
}
};
-} // end namespace rpc
+} // end namespace shared
namespace remote {
@@ -167,20 +166,20 @@ private:
namespace eh {
/// Registers EH frames on the remote.
- class RegisterEHFrames
- : public rpc::Function<RegisterEHFrames,
- void(JITTargetAddress Addr, uint32_t Size)> {
- public:
- static const char *getName() { return "RegisterEHFrames"; }
- };
+class RegisterEHFrames
+ : public shared::RPCFunction<RegisterEHFrames,
+ void(JITTargetAddress Addr, uint32_t Size)> {
+public:
+ static const char *getName() { return "RegisterEHFrames"; }
+};
/// Deregisters EH frames on the remote.
- class DeregisterEHFrames
- : public rpc::Function<DeregisterEHFrames,
- void(JITTargetAddress Addr, uint32_t Size)> {
- public:
- static const char *getName() { return "DeregisterEHFrames"; }
- };
+class DeregisterEHFrames
+ : public shared::RPCFunction<DeregisterEHFrames,
+ void(JITTargetAddress Addr, uint32_t Size)> {
+public:
+ static const char *getName() { return "DeregisterEHFrames"; }
+};
} // end namespace eh
@@ -189,28 +188,38 @@ namespace exec {
/// Call an 'int32_t()'-type function on the remote, returns the called
/// function's return value.
- class CallIntVoid
- : public rpc::Function<CallIntVoid, int32_t(JITTargetAddress Addr)> {
- public:
- static const char *getName() { return "CallIntVoid"; }
- };
+class CallIntVoid
+ : public shared::RPCFunction<CallIntVoid, int32_t(JITTargetAddress Addr)> {
+public:
+ static const char *getName() { return "CallIntVoid"; }
+};
+
+ /// Call an 'int32_t(int32_t)'-type function on the remote, returns the called
+ /// function's return value.
+class CallIntInt
+ : public shared::RPCFunction<CallIntInt,
+ int32_t(JITTargetAddress Addr, int)> {
+public:
+ static const char *getName() { return "CallIntInt"; }
+};
/// Call an 'int32_t(int32_t, char**)'-type function on the remote, returns the
/// called function's return value.
- class CallMain
- : public rpc::Function<CallMain, int32_t(JITTargetAddress Addr,
- std::vector<std::string> Args)> {
- public:
- static const char *getName() { return "CallMain"; }
- };
+class CallMain
+ : public shared::RPCFunction<CallMain,
+ int32_t(JITTargetAddress Addr,
+ std::vector<std::string> Args)> {
+public:
+ static const char *getName() { return "CallMain"; }
+};
/// Calls a 'void()'-type function on the remote, returns when the called
/// function completes.
- class CallVoidVoid
- : public rpc::Function<CallVoidVoid, void(JITTargetAddress FnAddr)> {
- public:
- static const char *getName() { return "CallVoidVoid"; }
- };
+class CallVoidVoid
+ : public shared::RPCFunction<CallVoidVoid, void(JITTargetAddress FnAddr)> {
+public:
+ static const char *getName() { return "CallVoidVoid"; }
+};
} // end namespace exec
@@ -218,60 +227,62 @@ namespace exec {
namespace mem {
/// Creates a memory allocator on the remote.
- class CreateRemoteAllocator
- : public rpc::Function<CreateRemoteAllocator,
- void(ResourceIdMgr::ResourceId AllocatorID)> {
- public:
- static const char *getName() { return "CreateRemoteAllocator"; }
- };
+class CreateRemoteAllocator
+ : public shared::RPCFunction<CreateRemoteAllocator,
+ void(ResourceIdMgr::ResourceId AllocatorID)> {
+public:
+ static const char *getName() { return "CreateRemoteAllocator"; }
+};
/// Destroys a remote allocator, freeing any memory allocated by it.
- class DestroyRemoteAllocator
- : public rpc::Function<DestroyRemoteAllocator,
- void(ResourceIdMgr::ResourceId AllocatorID)> {
- public:
- static const char *getName() { return "DestroyRemoteAllocator"; }
- };
+class DestroyRemoteAllocator
+ : public shared::RPCFunction<DestroyRemoteAllocator,
+ void(ResourceIdMgr::ResourceId AllocatorID)> {
+public:
+ static const char *getName() { return "DestroyRemoteAllocator"; }
+};
/// Read a remote memory block.
- class ReadMem
- : public rpc::Function<ReadMem, std::vector<uint8_t>(JITTargetAddress Src,
- uint64_t Size)> {
- public:
- static const char *getName() { return "ReadMem"; }
- };
+class ReadMem
+ : public shared::RPCFunction<
+ ReadMem, std::vector<uint8_t>(JITTargetAddress Src, uint64_t Size)> {
+public:
+ static const char *getName() { return "ReadMem"; }
+};
/// Reserve a block of memory on the remote via the given allocator.
- class ReserveMem
- : public rpc::Function<ReserveMem,
- JITTargetAddress(ResourceIdMgr::ResourceId AllocID,
- uint64_t Size, uint32_t Align)> {
- public:
- static const char *getName() { return "ReserveMem"; }
- };
+class ReserveMem
+ : public shared::RPCFunction<
+ ReserveMem, JITTargetAddress(ResourceIdMgr::ResourceId AllocID,
+ uint64_t Size, uint32_t Align)> {
+public:
+ static const char *getName() { return "ReserveMem"; }
+};
/// Set the memory protection on a memory block.
- class SetProtections
- : public rpc::Function<SetProtections,
- void(ResourceIdMgr::ResourceId AllocID,
- JITTargetAddress Dst, uint32_t ProtFlags)> {
- public:
- static const char *getName() { return "SetProtections"; }
- };
+class SetProtections
+ : public shared::RPCFunction<
+ SetProtections, void(ResourceIdMgr::ResourceId AllocID,
+ JITTargetAddress Dst, uint32_t ProtFlags)> {
+public:
+ static const char *getName() { return "SetProtections"; }
+};
/// Write to a remote memory block.
- class WriteMem
- : public rpc::Function<WriteMem, void(remote::DirectBufferWriter DB)> {
- public:
- static const char *getName() { return "WriteMem"; }
- };
+class WriteMem
+ : public shared::RPCFunction<WriteMem,
+ void(remote::DirectBufferWriter DB)> {
+public:
+ static const char *getName() { return "WriteMem"; }
+};
/// Write to a remote pointer.
- class WritePtr : public rpc::Function<WritePtr, void(JITTargetAddress Dst,
- JITTargetAddress Val)> {
- public:
- static const char *getName() { return "WritePtr"; }
- };
+class WritePtr
+ : public shared::RPCFunction<WritePtr, void(JITTargetAddress Dst,
+ JITTargetAddress Val)> {
+public:
+ static const char *getName() { return "WritePtr"; }
+};
} // end namespace mem
@@ -279,45 +290,46 @@ namespace mem {
namespace stubs {
/// Creates an indirect stub owner on the remote.
- class CreateIndirectStubsOwner
- : public rpc::Function<CreateIndirectStubsOwner,
- void(ResourceIdMgr::ResourceId StubOwnerID)> {
- public:
- static const char *getName() { return "CreateIndirectStubsOwner"; }
- };
+class CreateIndirectStubsOwner
+ : public shared::RPCFunction<CreateIndirectStubsOwner,
+ void(ResourceIdMgr::ResourceId StubOwnerID)> {
+public:
+ static const char *getName() { return "CreateIndirectStubsOwner"; }
+};
/// RPC function for destroying an indirect stubs owner.
- class DestroyIndirectStubsOwner
- : public rpc::Function<DestroyIndirectStubsOwner,
- void(ResourceIdMgr::ResourceId StubsOwnerID)> {
- public:
- static const char *getName() { return "DestroyIndirectStubsOwner"; }
- };
+class DestroyIndirectStubsOwner
+ : public shared::RPCFunction<DestroyIndirectStubsOwner,
+ void(ResourceIdMgr::ResourceId StubsOwnerID)> {
+public:
+ static const char *getName() { return "DestroyIndirectStubsOwner"; }
+};
/// EmitIndirectStubs result is (StubsBase, PtrsBase, NumStubsEmitted).
- class EmitIndirectStubs
- : public rpc::Function<
- EmitIndirectStubs,
- std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>(
- ResourceIdMgr::ResourceId StubsOwnerID,
- uint32_t NumStubsRequired)> {
- public:
- static const char *getName() { return "EmitIndirectStubs"; }
- };
+class EmitIndirectStubs
+ : public shared::RPCFunction<
+ EmitIndirectStubs,
+ std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>(
+ ResourceIdMgr::ResourceId StubsOwnerID,
+ uint32_t NumStubsRequired)> {
+public:
+ static const char *getName() { return "EmitIndirectStubs"; }
+};
/// RPC function to emit the resolver block and return its address.
- class EmitResolverBlock : public rpc::Function<EmitResolverBlock, void()> {
- public:
- static const char *getName() { return "EmitResolverBlock"; }
- };
+class EmitResolverBlock
+ : public shared::RPCFunction<EmitResolverBlock, void()> {
+public:
+ static const char *getName() { return "EmitResolverBlock"; }
+};
/// EmitTrampolineBlock result is (BlockAddr, NumTrampolines).
- class EmitTrampolineBlock
- : public rpc::Function<EmitTrampolineBlock,
- std::tuple<JITTargetAddress, uint32_t>()> {
- public:
- static const char *getName() { return "EmitTrampolineBlock"; }
- };
+class EmitTrampolineBlock
+ : public shared::RPCFunction<EmitTrampolineBlock,
+ std::tuple<JITTargetAddress, uint32_t>()> {
+public:
+ static const char *getName() { return "EmitTrampolineBlock"; }
+};
} // end namespace stubs
@@ -326,44 +338,44 @@ namespace utils {
/// GetRemoteInfo result is (Triple, PointerSize, PageSize, TrampolineSize,
/// IndirectStubsSize).
- class GetRemoteInfo
- : public rpc::Function<
- GetRemoteInfo,
- std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>()> {
- public:
- static const char *getName() { return "GetRemoteInfo"; }
- };
+class GetRemoteInfo
+ : public shared::RPCFunction<
+ GetRemoteInfo,
+ std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>()> {
+public:
+ static const char *getName() { return "GetRemoteInfo"; }
+};
/// Get the address of a remote symbol.
- class GetSymbolAddress
- : public rpc::Function<GetSymbolAddress,
- JITTargetAddress(std::string SymbolName)> {
- public:
- static const char *getName() { return "GetSymbolAddress"; }
- };
+class GetSymbolAddress
+ : public shared::RPCFunction<GetSymbolAddress,
+ JITTargetAddress(std::string SymbolName)> {
+public:
+ static const char *getName() { return "GetSymbolAddress"; }
+};
/// Request that the host execute a compile callback.
- class RequestCompile
- : public rpc::Function<
- RequestCompile, JITTargetAddress(JITTargetAddress TrampolineAddr)> {
- public:
- static const char *getName() { return "RequestCompile"; }
- };
+class RequestCompile
+ : public shared::RPCFunction<
+ RequestCompile, JITTargetAddress(JITTargetAddress TrampolineAddr)> {
+public:
+ static const char *getName() { return "RequestCompile"; }
+};
/// Notify the remote and terminate the session.
- class TerminateSession : public rpc::Function<TerminateSession, void()> {
- public:
- static const char *getName() { return "TerminateSession"; }
- };
+class TerminateSession : public shared::RPCFunction<TerminateSession, void()> {
+public:
+ static const char *getName() { return "TerminateSession"; }
+};
} // namespace utils
class OrcRemoteTargetRPCAPI
- : public rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel> {
+ : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
public:
// FIXME: Remove constructors once MSVC supports synthesizing move-ops.
- OrcRemoteTargetRPCAPI(rpc::RawByteChannel &C)
- : rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel>(C, true) {}
+ OrcRemoteTargetRPCAPI(shared::RawByteChannel &C)
+ : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(C, true) {}
};
} // end namespace remote
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
index 50c155d77db1..ce9bf064303d 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
@@ -16,8 +16,8 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
-#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
@@ -46,7 +46,7 @@ namespace remote {
template <typename ChannelT, typename TargetT>
class OrcRemoteTargetServer
- : public rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel> {
+ : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
public:
using SymbolLookupFtor =
std::function<JITTargetAddress(const std::string &Name)>;
@@ -57,12 +57,14 @@ public:
OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup,
EHFrameRegistrationFtor EHFramesRegister,
EHFrameRegistrationFtor EHFramesDeregister)
- : rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel>(Channel, true),
+ : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel,
+ true),
SymbolLookup(std::move(SymbolLookup)),
EHFramesRegister(std::move(EHFramesRegister)),
EHFramesDeregister(std::move(EHFramesDeregister)) {
using ThisT = std::remove_reference_t<decltype(*this)>;
addHandler<exec::CallIntVoid>(*this, &ThisT::handleCallIntVoid);
+ addHandler<exec::CallIntInt>(*this, &ThisT::handleCallIntInt);
addHandler<exec::CallMain>(*this, &ThisT::handleCallMain);
addHandler<exec::CallVoidVoid>(*this, &ThisT::handleCallVoidVoid);
addHandler<mem::CreateRemoteAllocator>(*this,
@@ -168,6 +170,19 @@ private:
return Result;
}
+ Expected<int32_t> handleCallIntInt(JITTargetAddress Addr, int Arg) {
+ using IntIntFnTy = int (*)(int);
+
+ IntIntFnTy Fn = reinterpret_cast<IntIntFnTy>(static_cast<uintptr_t>(Addr));
+
+ LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr)
+ << " with argument " << Arg << "\n");
+ int Result = Fn(Arg);
+ LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
+
+ return Result;
+ }
+
Expected<int32_t> handleCallMain(JITTargetAddress Addr,
std::vector<std::string> Args) {
using MainFnTy = int (*)(int, const char *[]);
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index 9ada0871cf0c..7dfbf32b1ffa 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -20,7 +20,6 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/Layer.h"
-#include "llvm/ExecutionEngine/Orc/Legacy.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
@@ -36,16 +35,16 @@
namespace llvm {
namespace orc {
-class RTDyldObjectLinkingLayer : public ObjectLayer {
+class RTDyldObjectLinkingLayer : public ObjectLayer, private ResourceManager {
public:
/// Functor for receiving object-loaded notifications.
- using NotifyLoadedFunction =
- std::function<void(VModuleKey, const object::ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &)>;
+ using NotifyLoadedFunction = std::function<void(
+ MaterializationResponsibility &R, const object::ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &)>;
/// Functor for receiving finalization notifications.
- using NotifyEmittedFunction =
- std::function<void(VModuleKey, std::unique_ptr<MemoryBuffer>)>;
+ using NotifyEmittedFunction = std::function<void(
+ MaterializationResponsibility &R, std::unique_ptr<MemoryBuffer>)>;
using GetMemoryManagerFunction =
std::function<std::unique_ptr<RuntimeDyld::MemoryManager>()>;
@@ -58,7 +57,7 @@ public:
~RTDyldObjectLinkingLayer();
/// Emit the object.
- void emit(MaterializationResponsibility R,
+ void emit(std::unique_ptr<MaterializationResponsibility> R,
std::unique_ptr<MemoryBuffer> O) override;
/// Set the NotifyLoaded callback.
@@ -123,16 +122,23 @@ public:
void unregisterJITEventListener(JITEventListener &L);
private:
- Error onObjLoad(VModuleKey K, MaterializationResponsibility &R,
+ using MemoryManagerUP = std::unique_ptr<RuntimeDyld::MemoryManager>;
+
+ Error onObjLoad(MaterializationResponsibility &R,
const object::ObjectFile &Obj,
- RuntimeDyld::MemoryManager *MemMgr,
- std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+ RuntimeDyld::MemoryManager &MemMgr,
+ RuntimeDyld::LoadedObjectInfo &LoadedObjInfo,
std::map<StringRef, JITEvaluatedSymbol> Resolved,
std::set<StringRef> &InternalSymbols);
- void onObjEmit(VModuleKey K, MaterializationResponsibility &R,
+ void onObjEmit(MaterializationResponsibility &R,
object::OwningBinary<object::ObjectFile> O,
- RuntimeDyld::MemoryManager *MemMgr, Error Err);
+ std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+ Error Err);
+
+ Error handleRemoveResources(ResourceKey K) override;
+ void handleTransferResources(ResourceKey DstKey, ResourceKey SrcKey) override;
mutable std::mutex RTDyldLayerMutex;
GetMemoryManagerFunction GetMemoryManager;
@@ -141,361 +147,8 @@ private:
bool ProcessAllSections = false;
bool OverrideObjectFlags = false;
bool AutoClaimObjectSymbols = false;
- std::vector<std::unique_ptr<RuntimeDyld::MemoryManager>> MemMgrs;
+ DenseMap<ResourceKey, std::vector<MemoryManagerUP>> MemMgrs;
std::vector<JITEventListener *> EventListeners;
- DenseMap<RuntimeDyld::MemoryManager *,
- std::unique_ptr<RuntimeDyld::LoadedObjectInfo>>
- LoadedObjInfos;
-};
-
-class LegacyRTDyldObjectLinkingLayerBase {
-public:
- using ObjectPtr = std::unique_ptr<MemoryBuffer>;
-
-protected:
-
- /// Holds an object to be allocated/linked as a unit in the JIT.
- ///
- /// An instance of this class will be created for each object added
- /// via JITObjectLayer::addObject. Deleting the instance (via
- /// removeObject) frees its memory, removing all symbol definitions that
- /// had been provided by this instance. Higher level layers are responsible
- /// for taking any action required to handle the missing symbols.
- class LinkedObject {
- public:
- LinkedObject() = default;
- LinkedObject(const LinkedObject&) = delete;
- void operator=(const LinkedObject&) = delete;
- virtual ~LinkedObject() = default;
-
- virtual Error finalize() = 0;
-
- virtual JITSymbol::GetAddressFtor
- getSymbolMaterializer(std::string Name) = 0;
-
- virtual void mapSectionAddress(const void *LocalAddress,
- JITTargetAddress TargetAddr) const = 0;
-
- JITSymbol getSymbol(StringRef Name, bool ExportedSymbolsOnly) {
- auto SymEntry = SymbolTable.find(Name);
- if (SymEntry == SymbolTable.end())
- return nullptr;
- if (!SymEntry->second.getFlags().isExported() && ExportedSymbolsOnly)
- return nullptr;
- if (!Finalized)
- return JITSymbol(getSymbolMaterializer(std::string(Name)),
- SymEntry->second.getFlags());
- return JITSymbol(SymEntry->second);
- }
-
- protected:
- StringMap<JITEvaluatedSymbol> SymbolTable;
- bool Finalized = false;
- };
-};
-
-/// Bare bones object linking layer.
-///
-/// This class is intended to be used as the base layer for a JIT. It allows
-/// object files to be loaded into memory, linked, and the addresses of their
-/// symbols queried. All objects added to this layer can see each other's
-/// symbols.
-class LegacyRTDyldObjectLinkingLayer : public LegacyRTDyldObjectLinkingLayerBase {
-public:
-
- using LegacyRTDyldObjectLinkingLayerBase::ObjectPtr;
-
- /// Functor for receiving object-loaded notifications.
- using NotifyLoadedFtor =
- std::function<void(VModuleKey, const object::ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &)>;
-
- /// Functor for receiving finalization notifications.
- using NotifyFinalizedFtor =
- std::function<void(VModuleKey, const object::ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &)>;
-
- /// Functor for receiving deallocation notifications.
- using NotifyFreedFtor = std::function<void(VModuleKey, const object::ObjectFile &Obj)>;
-
-private:
- using OwnedObject = object::OwningBinary<object::ObjectFile>;
-
- template <typename MemoryManagerPtrT>
- class ConcreteLinkedObject : public LinkedObject {
- public:
- ConcreteLinkedObject(LegacyRTDyldObjectLinkingLayer &Parent, VModuleKey K,
- OwnedObject Obj, MemoryManagerPtrT MemMgr,
- std::shared_ptr<SymbolResolver> Resolver,
- bool ProcessAllSections)
- : K(std::move(K)),
- Parent(Parent),
- MemMgr(std::move(MemMgr)),
- PFC(std::make_unique<PreFinalizeContents>(
- std::move(Obj), std::move(Resolver),
- ProcessAllSections)) {
- buildInitialSymbolTable(PFC->Obj);
- }
-
- ~ConcreteLinkedObject() override {
- if (this->Parent.NotifyFreed && ObjForNotify.getBinary())
- this->Parent.NotifyFreed(K, *ObjForNotify.getBinary());
-
- MemMgr->deregisterEHFrames();
- }
-
- Error finalize() override {
- assert(PFC && "mapSectionAddress called on finalized LinkedObject");
-
- JITSymbolResolverAdapter ResolverAdapter(Parent.ES, *PFC->Resolver,
- nullptr);
- PFC->RTDyld = std::make_unique<RuntimeDyld>(*MemMgr, ResolverAdapter);
- PFC->RTDyld->setProcessAllSections(PFC->ProcessAllSections);
-
- Finalized = true;
-
- std::unique_ptr<RuntimeDyld::LoadedObjectInfo> Info =
- PFC->RTDyld->loadObject(*PFC->Obj.getBinary());
-
- // Copy the symbol table out of the RuntimeDyld instance.
- {
- auto SymTab = PFC->RTDyld->getSymbolTable();
- for (auto &KV : SymTab)
- SymbolTable[KV.first] = KV.second;
- }
-
- if (Parent.NotifyLoaded)
- Parent.NotifyLoaded(K, *PFC->Obj.getBinary(), *Info);
-
- PFC->RTDyld->finalizeWithMemoryManagerLocking();
-
- if (PFC->RTDyld->hasError())
- return make_error<StringError>(PFC->RTDyld->getErrorString(),
- inconvertibleErrorCode());
-
- if (Parent.NotifyFinalized)
- Parent.NotifyFinalized(K, *PFC->Obj.getBinary(), *Info);
-
- // Release resources.
- if (this->Parent.NotifyFreed)
- ObjForNotify = std::move(PFC->Obj); // needed for callback
- PFC = nullptr;
- return Error::success();
- }
-
- JITSymbol::GetAddressFtor getSymbolMaterializer(std::string Name) override {
- return [this, Name]() -> Expected<JITTargetAddress> {
- // The symbol may be materialized between the creation of this lambda
- // and its execution, so we need to double check.
- if (!this->Finalized)
- if (auto Err = this->finalize())
- return std::move(Err);
- return this->getSymbol(Name, false).getAddress();
- };
- }
-
- void mapSectionAddress(const void *LocalAddress,
- JITTargetAddress TargetAddr) const override {
- assert(PFC && "mapSectionAddress called on finalized LinkedObject");
- assert(PFC->RTDyld && "mapSectionAddress called on raw LinkedObject");
- PFC->RTDyld->mapSectionAddress(LocalAddress, TargetAddr);
- }
-
- private:
- void buildInitialSymbolTable(const OwnedObject &Obj) {
- for (auto &Symbol : Obj.getBinary()->symbols()) {
- if (Expected<uint32_t> SymbolFlagsOrErr = Symbol.getFlags()) {
- if (*SymbolFlagsOrErr & object::SymbolRef::SF_Undefined)
- continue;
- } else {
- // FIXME: Raise an error for bad symbols.
- consumeError(SymbolFlagsOrErr.takeError());
- continue;
- }
-
- Expected<StringRef> SymbolName = Symbol.getName();
- // FIXME: Raise an error for bad symbols.
- if (!SymbolName) {
- consumeError(SymbolName.takeError());
- continue;
- }
- // FIXME: Raise an error for bad symbols.
- auto Flags = JITSymbolFlags::fromObjectSymbol(Symbol);
- if (!Flags) {
- consumeError(Flags.takeError());
- continue;
- }
- SymbolTable.insert(
- std::make_pair(*SymbolName, JITEvaluatedSymbol(0, *Flags)));
- }
- }
-
- // Contains the information needed prior to finalization: the object files,
- // memory manager, resolver, and flags needed for RuntimeDyld.
- struct PreFinalizeContents {
- PreFinalizeContents(OwnedObject Obj,
- std::shared_ptr<SymbolResolver> Resolver,
- bool ProcessAllSections)
- : Obj(std::move(Obj)),
- Resolver(std::move(Resolver)),
- ProcessAllSections(ProcessAllSections) {}
-
- OwnedObject Obj;
- std::shared_ptr<SymbolResolver> Resolver;
- bool ProcessAllSections;
- std::unique_ptr<RuntimeDyld> RTDyld;
- };
-
- VModuleKey K;
- LegacyRTDyldObjectLinkingLayer &Parent;
- MemoryManagerPtrT MemMgr;
- OwnedObject ObjForNotify;
- std::unique_ptr<PreFinalizeContents> PFC;
- };
-
- template <typename MemoryManagerPtrT>
- std::unique_ptr<ConcreteLinkedObject<MemoryManagerPtrT>>
- createLinkedObject(LegacyRTDyldObjectLinkingLayer &Parent, VModuleKey K,
- OwnedObject Obj, MemoryManagerPtrT MemMgr,
- std::shared_ptr<SymbolResolver> Resolver,
- bool ProcessAllSections) {
- using LOS = ConcreteLinkedObject<MemoryManagerPtrT>;
- return std::make_unique<LOS>(Parent, std::move(K), std::move(Obj),
- std::move(MemMgr), std::move(Resolver),
- ProcessAllSections);
- }
-
-public:
- struct Resources {
- std::shared_ptr<RuntimeDyld::MemoryManager> MemMgr;
- std::shared_ptr<SymbolResolver> Resolver;
- };
-
- using ResourcesGetter = std::function<Resources(VModuleKey)>;
-
- /// Construct an ObjectLinkingLayer with the given NotifyLoaded,
- /// and NotifyFinalized functors.
- LLVM_ATTRIBUTE_DEPRECATED(
- LegacyRTDyldObjectLinkingLayer(
- ExecutionSession &ES, ResourcesGetter GetResources,
- NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
- NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
- NotifyFreedFtor NotifyFreed = NotifyFreedFtor()),
- "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
- "use "
- "ORCv2 (see docs/ORCv2.rst)");
-
- // Legacy layer constructor with deprecation acknowledgement.
- LegacyRTDyldObjectLinkingLayer(
- ORCv1DeprecationAcknowledgement, ExecutionSession &ES,
- ResourcesGetter GetResources,
- NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
- NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
- NotifyFreedFtor NotifyFreed = NotifyFreedFtor())
- : ES(ES), GetResources(std::move(GetResources)),
- NotifyLoaded(std::move(NotifyLoaded)),
- NotifyFinalized(std::move(NotifyFinalized)),
- NotifyFreed(std::move(NotifyFreed)), ProcessAllSections(false) {}
-
- /// Set the 'ProcessAllSections' flag.
- ///
- /// If set to true, all sections in each object file will be allocated using
- /// the memory manager, rather than just the sections required for execution.
- ///
- /// This is kludgy, and may be removed in the future.
- void setProcessAllSections(bool ProcessAllSections) {
- this->ProcessAllSections = ProcessAllSections;
- }
-
- /// Add an object to the JIT.
- Error addObject(VModuleKey K, ObjectPtr ObjBuffer) {
-
- auto Obj =
- object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
- if (!Obj)
- return Obj.takeError();
-
- assert(!LinkedObjects.count(K) && "VModuleKey already in use");
-
- auto R = GetResources(K);
-
- LinkedObjects[K] = createLinkedObject(
- *this, K, OwnedObject(std::move(*Obj), std::move(ObjBuffer)),
- std::move(R.MemMgr), std::move(R.Resolver), ProcessAllSections);
-
- return Error::success();
- }
-
- /// Remove the object associated with VModuleKey K.
- ///
- /// All memory allocated for the object will be freed, and the sections and
- /// symbols it provided will no longer be available. No attempt is made to
- /// re-emit the missing symbols, and any use of these symbols (directly or
- /// indirectly) will result in undefined behavior. If dependence tracking is
- /// required to detect or resolve such issues it should be added at a higher
- /// layer.
- Error removeObject(VModuleKey K) {
- assert(LinkedObjects.count(K) && "VModuleKey not associated with object");
- // How do we invalidate the symbols in H?
- LinkedObjects.erase(K);
- return Error::success();
- }
-
- /// Search for the given named symbol.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it exists.
- JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
- for (auto &KV : LinkedObjects)
- if (auto Sym = KV.second->getSymbol(Name, ExportedSymbolsOnly))
- return Sym;
- else if (auto Err = Sym.takeError())
- return std::move(Err);
-
- return nullptr;
- }
-
- /// Search for the given named symbol in the context of the loaded
- /// object represented by the VModuleKey K.
- /// @param K The VModuleKey for the object to search in.
- /// @param Name The name of the symbol to search for.
- /// @param ExportedSymbolsOnly If true, search only for exported symbols.
- /// @return A handle for the given named symbol, if it is found in the
- /// given object.
- JITSymbol findSymbolIn(VModuleKey K, StringRef Name,
- bool ExportedSymbolsOnly) {
- assert(LinkedObjects.count(K) && "VModuleKey not associated with object");
- return LinkedObjects[K]->getSymbol(Name, ExportedSymbolsOnly);
- }
-
- /// Map section addresses for the object associated with the
- /// VModuleKey K.
- void mapSectionAddress(VModuleKey K, const void *LocalAddress,
- JITTargetAddress TargetAddr) {
- assert(LinkedObjects.count(K) && "VModuleKey not associated with object");
- LinkedObjects[K]->mapSectionAddress(LocalAddress, TargetAddr);
- }
-
- /// Immediately emit and finalize the object represented by the given
- /// VModuleKey.
- /// @param K VModuleKey for object to emit/finalize.
- Error emitAndFinalize(VModuleKey K) {
- assert(LinkedObjects.count(K) && "VModuleKey not associated with object");
- return LinkedObjects[K]->finalize();
- }
-
-private:
- ExecutionSession &ES;
-
- ResourcesGetter GetResources;
- NotifyLoadedFtor NotifyLoaded;
- NotifyFinalizedFtor NotifyFinalized;
- NotifyFreedFtor NotifyFreed;
-
- // NB! `LinkedObjects` needs to be destroyed before `NotifyFreed` because
- // `~ConcreteLinkedObject` calls `NotifyFreed`
- std::map<VModuleKey, std::unique_ptr<LinkedObject>> LinkedObjects;
- bool ProcessAllSections = false;
};
} // end namespace orc
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
deleted file mode 100644
index d7304cfcf931..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
+++ /dev/null
@@ -1,564 +0,0 @@
-//===------ RemoteObjectLayer.h - Forwards objs to a remote -----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Forwards objects to a remote object layer via RPC.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H
-#define LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H
-
-#include "llvm/ExecutionEngine/Orc/Core.h"
-#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
-#include "llvm/Object/ObjectFile.h"
-#include <map>
-
-namespace llvm {
-namespace orc {
-
-/// RPC API needed by RemoteObjectClientLayer and RemoteObjectServerLayer.
-class RemoteObjectLayerAPI {
-public:
-
- using ObjHandleT = remote::ResourceIdMgr::ResourceId;
-
-protected:
-
- using RemoteSymbolId = remote::ResourceIdMgr::ResourceId;
- using RemoteSymbol = std::pair<RemoteSymbolId, JITSymbolFlags>;
-
-public:
-
- using BadSymbolHandleError = remote::ResourceNotFound<RemoteSymbolId>;
- using BadObjectHandleError = remote::ResourceNotFound<ObjHandleT>;
-
-protected:
-
- static const ObjHandleT InvalidObjectHandleId = 0;
- static const RemoteSymbolId NullSymbolId = 0;
-
- class AddObject
- : public rpc::Function<AddObject, Expected<ObjHandleT>(std::string)> {
- public:
- static const char *getName() { return "AddObject"; }
- };
-
- class RemoveObject
- : public rpc::Function<RemoveObject, Error(ObjHandleT)> {
- public:
- static const char *getName() { return "RemoveObject"; }
- };
-
- class FindSymbol
- : public rpc::Function<FindSymbol, Expected<RemoteSymbol>(std::string,
- bool)> {
- public:
- static const char *getName() { return "FindSymbol"; }
- };
-
- class FindSymbolIn
- : public rpc::Function<FindSymbolIn,
- Expected<RemoteSymbol>(ObjHandleT, std::string,
- bool)> {
- public:
- static const char *getName() { return "FindSymbolIn"; }
- };
-
- class EmitAndFinalize
- : public rpc::Function<EmitAndFinalize,
- Error(ObjHandleT)> {
- public:
- static const char *getName() { return "EmitAndFinalize"; }
- };
-
- class Lookup
- : public rpc::Function<Lookup,
- Expected<RemoteSymbol>(ObjHandleT, std::string)> {
- public:
- static const char *getName() { return "Lookup"; }
- };
-
- class LookupInLogicalDylib
- : public rpc::Function<LookupInLogicalDylib,
- Expected<RemoteSymbol>(ObjHandleT, std::string)> {
- public:
- static const char *getName() { return "LookupInLogicalDylib"; }
- };
-
- class ReleaseRemoteSymbol
- : public rpc::Function<ReleaseRemoteSymbol, Error(RemoteSymbolId)> {
- public:
- static const char *getName() { return "ReleaseRemoteSymbol"; }
- };
-
- class MaterializeRemoteSymbol
- : public rpc::Function<MaterializeRemoteSymbol,
- Expected<JITTargetAddress>(RemoteSymbolId)> {
- public:
- static const char *getName() { return "MaterializeRemoteSymbol"; }
- };
-};
-
-/// Base class containing common utilities for RemoteObjectClientLayer and
-/// RemoteObjectServerLayer.
-template <typename RPCEndpoint>
-class RemoteObjectLayer : public RemoteObjectLayerAPI {
-public:
-
- RemoteObjectLayer(RPCEndpoint &Remote,
- std::function<void(Error)> ReportError)
- : Remote(Remote), ReportError(std::move(ReportError)),
- SymbolIdMgr(NullSymbolId + 1) {
- using ThisT = RemoteObjectLayer<RPCEndpoint>;
- Remote.template addHandler<ReleaseRemoteSymbol>(
- *this, &ThisT::handleReleaseRemoteSymbol);
- Remote.template addHandler<MaterializeRemoteSymbol>(
- *this, &ThisT::handleMaterializeRemoteSymbol);
- }
-
-protected:
-
- /// This class is used as the symbol materializer for JITSymbols returned by
- /// RemoteObjectLayerClient/RemoteObjectLayerServer -- the materializer knows
- /// how to call back to the other RPC endpoint to get the address when
- /// requested.
- class RemoteSymbolMaterializer {
- public:
-
- /// Construct a RemoteSymbolMaterializer for the given RemoteObjectLayer
- /// with the given Id.
- RemoteSymbolMaterializer(RemoteObjectLayer &C,
- RemoteSymbolId Id)
- : C(C), Id(Id) {}
-
- RemoteSymbolMaterializer(RemoteSymbolMaterializer &&Other)
- : C(Other.C), Id(Other.Id) {
- Other.Id = 0;
- }
-
- RemoteSymbolMaterializer &operator=(RemoteSymbolMaterializer &&) = delete;
-
- /// Release the remote symbol.
- ~RemoteSymbolMaterializer() {
- if (Id)
- C.releaseRemoteSymbol(Id);
- }
-
- /// Materialize the symbol on the remote and get its address.
- Expected<JITTargetAddress> materialize() {
- auto Addr = C.materializeRemoteSymbol(Id);
- Id = 0;
- return Addr;
- }
-
- private:
- RemoteObjectLayer &C;
- RemoteSymbolId Id;
- };
-
- /// Convenience function for getting a null remote symbol value.
- RemoteSymbol nullRemoteSymbol() {
- return RemoteSymbol(0, JITSymbolFlags());
- }
-
- /// Creates a StringError that contains a copy of Err's log message, then
- /// sends that StringError to ReportError.
- ///
- /// This allows us to locally log error messages for errors that will actually
- /// be delivered to the remote.
- Error teeLog(Error Err) {
- return handleErrors(std::move(Err),
- [this](std::unique_ptr<ErrorInfoBase> EIB) {
- ReportError(make_error<StringError>(
- EIB->message(),
- EIB->convertToErrorCode()));
- return Error(std::move(EIB));
- });
- }
-
- Error badRemoteSymbolIdError(RemoteSymbolId Id) {
- return make_error<BadSymbolHandleError>(Id, "Remote JIT Symbol");
- }
-
- Error badObjectHandleError(ObjHandleT H) {
- return make_error<RemoteObjectLayerAPI::BadObjectHandleError>(
- H, "Bad object handle");
- }
-
- /// Create a RemoteSymbol wrapping the given JITSymbol.
- Expected<RemoteSymbol> jitSymbolToRemote(JITSymbol Sym) {
- if (Sym) {
- auto Id = SymbolIdMgr.getNext();
- auto Flags = Sym.getFlags();
- assert(!InUseSymbols.count(Id) && "Symbol id already in use");
- InUseSymbols.insert(std::make_pair(Id, std::move(Sym)));
- return RemoteSymbol(Id, Flags);
- } else if (auto Err = Sym.takeError())
- return teeLog(std::move(Err));
- // else...
- return nullRemoteSymbol();
- }
-
- /// Convert an Expected<RemoteSymbol> to a JITSymbol.
- JITSymbol remoteToJITSymbol(Expected<RemoteSymbol> RemoteSymOrErr) {
- if (RemoteSymOrErr) {
- auto &RemoteSym = *RemoteSymOrErr;
- if (RemoteSym == nullRemoteSymbol())
- return nullptr;
- // else...
- RemoteSymbolMaterializer RSM(*this, RemoteSym.first);
- auto Sym = JITSymbol(
- [RSM = std::move(RSM)]() mutable { return RSM.materialize(); },
- RemoteSym.second);
- return Sym;
- } else
- return RemoteSymOrErr.takeError();
- }
-
- RPCEndpoint &Remote;
- std::function<void(Error)> ReportError;
-
-private:
-
- /// Notify the remote to release the given JITSymbol.
- void releaseRemoteSymbol(RemoteSymbolId Id) {
- if (auto Err = Remote.template callB<ReleaseRemoteSymbol>(Id))
- ReportError(std::move(Err));
- }
-
- /// Notify the remote to materialize the JITSymbol with the given Id and
- /// return its address.
- Expected<JITTargetAddress> materializeRemoteSymbol(RemoteSymbolId Id) {
- return Remote.template callB<MaterializeRemoteSymbol>(Id);
- }
-
- /// Release the JITSymbol with the given Id.
- Error handleReleaseRemoteSymbol(RemoteSymbolId Id) {
- auto SI = InUseSymbols.find(Id);
- if (SI != InUseSymbols.end()) {
- InUseSymbols.erase(SI);
- return Error::success();
- } else
- return teeLog(badRemoteSymbolIdError(Id));
- }
-
- /// Run the materializer for the JITSymbol with the given Id and return its
- /// address.
- Expected<JITTargetAddress> handleMaterializeRemoteSymbol(RemoteSymbolId Id) {
- auto SI = InUseSymbols.find(Id);
- if (SI != InUseSymbols.end()) {
- auto AddrOrErr = SI->second.getAddress();
- InUseSymbols.erase(SI);
- SymbolIdMgr.release(Id);
- if (AddrOrErr)
- return *AddrOrErr;
- else
- return teeLog(AddrOrErr.takeError());
- } else {
- return teeLog(badRemoteSymbolIdError(Id));
- }
- }
-
- remote::ResourceIdMgr SymbolIdMgr;
- std::map<RemoteSymbolId, JITSymbol> InUseSymbols;
-};
-
-/// RemoteObjectClientLayer forwards the ORC Object Layer API over an RPC
-/// connection.
-///
-/// This class can be used as the base layer of a JIT stack on the client and
-/// will forward operations to a corresponding RemoteObjectServerLayer on the
-/// server (which can be composed on top of a "real" object layer like
-/// RTDyldObjectLinkingLayer to actually carry out the operations).
-///
-/// Sending relocatable objects to the server (rather than fully relocated
-/// bits) allows JIT'd code to be cached on the server side and re-used in
-/// subsequent JIT sessions.
-template <typename RPCEndpoint>
-class RemoteObjectClientLayer : public RemoteObjectLayer<RPCEndpoint> {
-private:
-
- using AddObject = RemoteObjectLayerAPI::AddObject;
- using RemoveObject = RemoteObjectLayerAPI::RemoveObject;
- using FindSymbol = RemoteObjectLayerAPI::FindSymbol;
- using FindSymbolIn = RemoteObjectLayerAPI::FindSymbolIn;
- using EmitAndFinalize = RemoteObjectLayerAPI::EmitAndFinalize;
- using Lookup = RemoteObjectLayerAPI::Lookup;
- using LookupInLogicalDylib = RemoteObjectLayerAPI::LookupInLogicalDylib;
-
- using RemoteObjectLayer<RPCEndpoint>::teeLog;
- using RemoteObjectLayer<RPCEndpoint>::badObjectHandleError;
- using RemoteObjectLayer<RPCEndpoint>::remoteToJITSymbol;
-
-public:
-
- using ObjHandleT = RemoteObjectLayerAPI::ObjHandleT;
- using RemoteSymbol = RemoteObjectLayerAPI::RemoteSymbol;
-
- using ObjectPtr = std::unique_ptr<MemoryBuffer>;
-
- /// Create a RemoteObjectClientLayer that communicates with a
- /// RemoteObjectServerLayer instance via the given RPCEndpoint.
- ///
- /// The ReportError functor can be used locally log errors that are intended
- /// to be sent sent
- LLVM_ATTRIBUTE_DEPRECATED(
- RemoteObjectClientLayer(RPCEndpoint &Remote,
- std::function<void(Error)> ReportError),
- "ORCv1 layers (including RemoteObjectClientLayer) are deprecated. Please "
- "use "
- "ORCv2 (see docs/ORCv2.rst)");
-
- RemoteObjectClientLayer(ORCv1DeprecationAcknowledgement, RPCEndpoint &Remote,
- std::function<void(Error)> ReportError)
- : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) {
- using ThisT = RemoteObjectClientLayer<RPCEndpoint>;
- Remote.template addHandler<Lookup>(*this, &ThisT::lookup);
- Remote.template addHandler<LookupInLogicalDylib>(
- *this, &ThisT::lookupInLogicalDylib);
- }
-
- /// Add an object to the JIT.
- ///
- /// @return A handle that can be used to refer to the loaded object (for
- /// symbol searching, finalization, freeing memory, etc.).
- Expected<ObjHandleT>
- addObject(ObjectPtr ObjBuffer,
- std::shared_ptr<LegacyJITSymbolResolver> Resolver) {
- if (auto HandleOrErr =
- this->Remote.template callB<AddObject>(ObjBuffer->getBuffer())) {
- auto &Handle = *HandleOrErr;
- // FIXME: Return an error for this:
- assert(!Resolvers.count(Handle) && "Handle already in use?");
- Resolvers[Handle] = std::move(Resolver);
- return Handle;
- } else
- return HandleOrErr.takeError();
- }
-
- /// Remove the given object from the JIT.
- Error removeObject(ObjHandleT H) {
- return this->Remote.template callB<RemoveObject>(H);
- }
-
- /// Search for the given named symbol.
- JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
- return remoteToJITSymbol(
- this->Remote.template callB<FindSymbol>(Name,
- ExportedSymbolsOnly));
- }
-
- /// Search for the given named symbol within the given context.
- JITSymbol findSymbolIn(ObjHandleT H, StringRef Name, bool ExportedSymbolsOnly) {
- return remoteToJITSymbol(
- this->Remote.template callB<FindSymbolIn>(H, Name,
- ExportedSymbolsOnly));
- }
-
- /// Immediately emit and finalize the object with the given handle.
- Error emitAndFinalize(ObjHandleT H) {
- return this->Remote.template callB<EmitAndFinalize>(H);
- }
-
-private:
-
- Expected<RemoteSymbol> lookup(ObjHandleT H, const std::string &Name) {
- auto RI = Resolvers.find(H);
- if (RI != Resolvers.end()) {
- return this->jitSymbolToRemote(RI->second->findSymbol(Name));
- } else
- return teeLog(badObjectHandleError(H));
- }
-
- Expected<RemoteSymbol> lookupInLogicalDylib(ObjHandleT H,
- const std::string &Name) {
- auto RI = Resolvers.find(H);
- if (RI != Resolvers.end())
- return this->jitSymbolToRemote(
- RI->second->findSymbolInLogicalDylib(Name));
- else
- return teeLog(badObjectHandleError(H));
- }
-
- std::map<remote::ResourceIdMgr::ResourceId,
- std::shared_ptr<LegacyJITSymbolResolver>>
- Resolvers;
-};
-
-/// RemoteObjectServerLayer acts as a server and handling RPC calls for the
-/// object layer API from the given RPC connection.
-///
-/// This class can be composed on top of a 'real' object layer (e.g.
-/// RTDyldObjectLinkingLayer) to do the actual work of relocating objects
-/// and making them executable.
-template <typename BaseLayerT, typename RPCEndpoint>
-class RemoteObjectServerLayer : public RemoteObjectLayer<RPCEndpoint> {
-private:
-
- using ObjHandleT = RemoteObjectLayerAPI::ObjHandleT;
- using RemoteSymbol = RemoteObjectLayerAPI::RemoteSymbol;
-
- using AddObject = RemoteObjectLayerAPI::AddObject;
- using RemoveObject = RemoteObjectLayerAPI::RemoveObject;
- using FindSymbol = RemoteObjectLayerAPI::FindSymbol;
- using FindSymbolIn = RemoteObjectLayerAPI::FindSymbolIn;
- using EmitAndFinalize = RemoteObjectLayerAPI::EmitAndFinalize;
- using Lookup = RemoteObjectLayerAPI::Lookup;
- using LookupInLogicalDylib = RemoteObjectLayerAPI::LookupInLogicalDylib;
-
- using RemoteObjectLayer<RPCEndpoint>::teeLog;
- using RemoteObjectLayer<RPCEndpoint>::badObjectHandleError;
- using RemoteObjectLayer<RPCEndpoint>::remoteToJITSymbol;
-
-public:
-
- /// Create a RemoteObjectServerLayer with the given base layer (which must be
- /// an object layer), RPC endpoint, and error reporter function.
- LLVM_ATTRIBUTE_DEPRECATED(
- RemoteObjectServerLayer(BaseLayerT &BaseLayer, RPCEndpoint &Remote,
- std::function<void(Error)> ReportError),
- "ORCv1 layers (including RemoteObjectServerLayer) are deprecated. Please "
- "use "
- "ORCv2 (see docs/ORCv2.rst)");
-
- RemoteObjectServerLayer(ORCv1DeprecationAcknowledgement,
- BaseLayerT &BaseLayer, RPCEndpoint &Remote,
- std::function<void(Error)> ReportError)
- : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
- BaseLayer(BaseLayer), HandleIdMgr(1) {
- using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>;
-
- Remote.template addHandler<AddObject>(*this, &ThisT::addObject);
- Remote.template addHandler<RemoveObject>(*this, &ThisT::removeObject);
- Remote.template addHandler<FindSymbol>(*this, &ThisT::findSymbol);
- Remote.template addHandler<FindSymbolIn>(*this, &ThisT::findSymbolIn);
- Remote.template addHandler<EmitAndFinalize>(*this, &ThisT::emitAndFinalize);
- }
-
-private:
-
- class StringMemoryBuffer : public MemoryBuffer {
- public:
- StringMemoryBuffer(std::string Buffer)
- : Buffer(std::move(Buffer)) {
- init(this->Buffer.data(), this->Buffer.data() + this->Buffer.size(),
- false);
- }
-
- BufferKind getBufferKind() const override { return MemoryBuffer_Malloc; }
- private:
- std::string Buffer;
- };
-
- JITSymbol lookup(ObjHandleT Id, const std::string &Name) {
- return remoteToJITSymbol(
- this->Remote.template callB<Lookup>(Id, Name));
- }
-
- JITSymbol lookupInLogicalDylib(ObjHandleT Id, const std::string &Name) {
- return remoteToJITSymbol(
- this->Remote.template callB<LookupInLogicalDylib>(Id, Name));
- }
-
- Expected<ObjHandleT> addObject(std::string ObjBuffer) {
- auto Buffer = std::make_unique<StringMemoryBuffer>(std::move(ObjBuffer));
- auto Id = HandleIdMgr.getNext();
- assert(!BaseLayerHandles.count(Id) && "Id already in use?");
-
- auto Resolver = createLambdaResolver(
- AcknowledgeORCv1Deprecation,
- [this, Id](const std::string &Name) { return lookup(Id, Name); },
- [this, Id](const std::string &Name) {
- return lookupInLogicalDylib(Id, Name);
- });
-
- if (auto HandleOrErr =
- BaseLayer.addObject(std::move(Buffer), std::move(Resolver))) {
- BaseLayerHandles[Id] = std::move(*HandleOrErr);
- return Id;
- } else
- return teeLog(HandleOrErr.takeError());
- }
-
- Error removeObject(ObjHandleT H) {
- auto HI = BaseLayerHandles.find(H);
- if (HI != BaseLayerHandles.end()) {
- if (auto Err = BaseLayer.removeObject(HI->second))
- return teeLog(std::move(Err));
- return Error::success();
- } else
- return teeLog(badObjectHandleError(H));
- }
-
- Expected<RemoteSymbol> findSymbol(const std::string &Name,
- bool ExportedSymbolsOnly) {
- if (auto Sym = BaseLayer.findSymbol(Name, ExportedSymbolsOnly))
- return this->jitSymbolToRemote(std::move(Sym));
- else if (auto Err = Sym.takeError())
- return teeLog(std::move(Err));
- return this->nullRemoteSymbol();
- }
-
- Expected<RemoteSymbol> findSymbolIn(ObjHandleT H, const std::string &Name,
- bool ExportedSymbolsOnly) {
- auto HI = BaseLayerHandles.find(H);
- if (HI != BaseLayerHandles.end()) {
- if (auto Sym = BaseLayer.findSymbolIn(HI->second, Name, ExportedSymbolsOnly))
- return this->jitSymbolToRemote(std::move(Sym));
- else if (auto Err = Sym.takeError())
- return teeLog(std::move(Err));
- return this->nullRemoteSymbol();
- } else
- return teeLog(badObjectHandleError(H));
- }
-
- Error emitAndFinalize(ObjHandleT H) {
- auto HI = BaseLayerHandles.find(H);
- if (HI != BaseLayerHandles.end()) {
- if (auto Err = BaseLayer.emitAndFinalize(HI->second))
- return teeLog(std::move(Err));
- return Error::success();
- } else
- return teeLog(badObjectHandleError(H));
- }
-
- BaseLayerT &BaseLayer;
- remote::ResourceIdMgr HandleIdMgr;
- std::map<ObjHandleT, typename BaseLayerT::ObjHandleT> BaseLayerHandles;
-};
-
-template <typename RPCEndpoint>
-RemoteObjectClientLayer<RPCEndpoint>::RemoteObjectClientLayer(
- RPCEndpoint &Remote, std::function<void(Error)> ReportError)
- : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) {
- using ThisT = RemoteObjectClientLayer<RPCEndpoint>;
- Remote.template addHandler<Lookup>(*this, &ThisT::lookup);
- Remote.template addHandler<LookupInLogicalDylib>(
- *this, &ThisT::lookupInLogicalDylib);
-}
-
-template <typename BaseLayerT, typename RPCEndpoint>
-RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>::RemoteObjectServerLayer(
- BaseLayerT &BaseLayer, RPCEndpoint &Remote,
- std::function<void(Error)> ReportError)
- : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
- BaseLayer(BaseLayer), HandleIdMgr(1) {
- using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>;
-
- Remote.template addHandler<AddObject>(*this, &ThisT::addObject);
- Remote.template addHandler<RemoveObject>(*this, &ThisT::removeObject);
- Remote.template addHandler<FindSymbol>(*this, &ThisT::findSymbol);
- Remote.template addHandler<FindSymbolIn>(*this, &ThisT::findSymbolIn);
- Remote.template addHandler<EmitAndFinalize>(*this, &ThisT::emitAndFinalize);
-}
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h
new file mode 100644
index 000000000000..3f96fe3da49d
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h
@@ -0,0 +1,79 @@
+//===- FDRawByteChannel.h - File descriptor based byte-channel -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// File descriptor based RawByteChannel.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
+
+#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
+
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
+namespace llvm {
+namespace orc {
+namespace shared {
+
+/// Serialization channel that reads from and writes from file descriptors.
+class FDRawByteChannel final : public RawByteChannel {
+public:
+ FDRawByteChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {}
+
+ llvm::Error readBytes(char *Dst, unsigned Size) override {
+ assert(Dst && "Attempt to read into null.");
+ ssize_t Completed = 0;
+ while (Completed < static_cast<ssize_t>(Size)) {
+ ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed);
+ if (Read <= 0) {
+ auto ErrNo = errno;
+ if (ErrNo == EAGAIN || ErrNo == EINTR)
+ continue;
+ else
+ return llvm::errorCodeToError(
+ std::error_code(errno, std::generic_category()));
+ }
+ Completed += Read;
+ }
+ return llvm::Error::success();
+ }
+
+ llvm::Error appendBytes(const char *Src, unsigned Size) override {
+ assert(Src && "Attempt to append from null.");
+ ssize_t Completed = 0;
+ while (Completed < static_cast<ssize_t>(Size)) {
+ ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed);
+ if (Written < 0) {
+ auto ErrNo = errno;
+ if (ErrNo == EAGAIN || ErrNo == EINTR)
+ continue;
+ else
+ return llvm::errorCodeToError(
+ std::error_code(errno, std::generic_category()));
+ }
+ Completed += Written;
+ }
+ return llvm::Error::success();
+ }
+
+ llvm::Error send() override { return llvm::Error::success(); }
+
+private:
+ int InFD, OutFD;
+};
+
+} // namespace shared
+} // namespace orc
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcError.h
index 9b0d941f5459..9b0d941f5459 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcError.h
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h
index f348844f39ce..e0ac640ebcdd 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h
@@ -14,23 +14,23 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H
-#define LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
#include <map>
#include <thread>
#include <vector>
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ExecutionEngine/Orc/OrcError.h"
-#include "llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
#include <future>
namespace llvm {
namespace orc {
-namespace rpc {
+namespace shared {
/// Base class of all fatal RPC errors (those that necessarily result in the
/// termination of the RPC session).
@@ -56,7 +56,7 @@ public:
/// function id it cannot parse the call.
template <typename FnIdT, typename SeqNoT>
class BadFunctionCall
- : public ErrorInfo<BadFunctionCall<FnIdT, SeqNoT>, RPCFatalError> {
+ : public ErrorInfo<BadFunctionCall<FnIdT, SeqNoT>, RPCFatalError> {
public:
static char ID;
@@ -68,8 +68,10 @@ public:
}
void log(raw_ostream &OS) const override {
- OS << "Call to invalid RPC function id '" << FnId << "' with "
- "sequence number " << SeqNo;
+ OS << "Call to invalid RPC function id '" << FnId
+ << "' with "
+ "sequence number "
+ << SeqNo;
}
private:
@@ -89,12 +91,12 @@ char BadFunctionCall<FnIdT, SeqNoT>::ID = 0;
/// a result parser for this sequence number it can't do that.
template <typename SeqNoT>
class InvalidSequenceNumberForResponse
- : public ErrorInfo<InvalidSequenceNumberForResponse<SeqNoT>, RPCFatalError> {
+ : public ErrorInfo<InvalidSequenceNumberForResponse<SeqNoT>,
+ RPCFatalError> {
public:
static char ID;
- InvalidSequenceNumberForResponse(SeqNoT SeqNo)
- : SeqNo(std::move(SeqNo)) {}
+ InvalidSequenceNumberForResponse(SeqNoT SeqNo) : SeqNo(std::move(SeqNo)) {}
std::error_code convertToErrorCode() const override {
return orcError(OrcErrorCode::UnexpectedRPCCall);
@@ -103,6 +105,7 @@ public:
void log(raw_ostream &OS) const override {
OS << "Response has unknown sequence number " << SeqNo;
}
+
private:
SeqNoT SeqNo;
};
@@ -131,17 +134,18 @@ public:
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
const std::string &getSignature() const { return Signature; }
+
private:
std::string Signature;
};
-template <typename DerivedFunc, typename FnT> class Function;
+template <typename DerivedFunc, typename FnT> class RPCFunction;
// RPC Function class.
// DerivedFunc should be a user defined class with a static 'getName()' method
// returning a const char* representing the function's name.
template <typename DerivedFunc, typename RetT, typename... ArgTs>
-class Function<DerivedFunc, RetT(ArgTs...)> {
+class RPCFunction<DerivedFunc, RetT(ArgTs...)> {
public:
/// User defined function type.
using Type = RetT(ArgTs...);
@@ -154,8 +158,9 @@ public:
static std::string Name = [] {
std::string Name;
raw_string_ostream(Name)
- << RPCTypeName<RetT>::getName() << " " << DerivedFunc::getName()
- << "(" << llvm::orc::rpc::RPCTypeNameSequence<ArgTs...>() << ")";
+ << SerializationTypeName<RetT>::getName() << " "
+ << DerivedFunc::getName() << "("
+ << SerializationTypeNameSequence<ArgTs...>() << ")";
return Name;
}();
return Name.data();
@@ -199,10 +204,10 @@ private:
namespace detail {
/// Provides a typedef for a tuple containing the decayed argument types.
-template <typename T> class FunctionArgsTuple;
+template <typename T> class RPCFunctionArgsTuple;
template <typename RetT, typename... ArgTs>
-class FunctionArgsTuple<RetT(ArgTs...)> {
+class RPCFunctionArgsTuple<RetT(ArgTs...)> {
public:
using Type = std::tuple<std::decay_t<std::remove_reference_t<ArgTs>>...>;
};
@@ -287,34 +292,28 @@ class ResultTraits<Expected<RetT>> : public ResultTraits<RetT> {};
// Determines whether an RPC function's defined error return type supports
// error return value.
-template <typename T>
-class SupportsErrorReturn {
+template <typename T> class SupportsErrorReturn {
public:
static const bool value = false;
};
-template <>
-class SupportsErrorReturn<Error> {
+template <> class SupportsErrorReturn<Error> {
public:
static const bool value = true;
};
-template <typename T>
-class SupportsErrorReturn<Expected<T>> {
+template <typename T> class SupportsErrorReturn<Expected<T>> {
public:
static const bool value = true;
};
// RespondHelper packages return values based on whether or not the declared
// RPC function return type supports error returns.
-template <bool FuncSupportsErrorReturn>
-class RespondHelper;
+template <bool FuncSupportsErrorReturn> class RespondHelper;
// RespondHelper specialization for functions that support error returns.
-template <>
-class RespondHelper<true> {
+template <> class RespondHelper<true> {
public:
-
// Send Expected<T>.
template <typename WireRetT, typename HandlerRetT, typename ChannelT,
typename FunctionIdT, typename SequenceNumberT>
@@ -330,9 +329,8 @@ public:
// Serialize the result.
if (auto Err =
- SerializationTraits<ChannelT, WireRetT,
- Expected<HandlerRetT>>::serialize(
- C, std::move(ResultOrErr)))
+ SerializationTraits<ChannelT, WireRetT, Expected<HandlerRetT>>::
+ serialize(C, std::move(ResultOrErr)))
return Err;
// Close the response message.
@@ -354,14 +352,11 @@ public:
return Err2;
return C.send();
}
-
};
// RespondHelper specialization for functions that do not support error returns.
-template <>
-class RespondHelper<false> {
+template <> class RespondHelper<false> {
public:
-
template <typename WireRetT, typename HandlerRetT, typename ChannelT,
typename FunctionIdT, typename SequenceNumberT>
static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
@@ -376,8 +371,8 @@ public:
// Serialize the result.
if (auto Err =
- SerializationTraits<ChannelT, WireRetT, HandlerRetT>::serialize(
- C, *ResultOrErr))
+ SerializationTraits<ChannelT, WireRetT, HandlerRetT>::serialize(
+ C, *ResultOrErr))
return Err;
// End the response message.
@@ -398,18 +393,17 @@ public:
return Err2;
return C.send();
}
-
};
-
// Send a response of the given wire return type (WireRetT) over the
// channel, with the given sequence number.
template <typename WireRetT, typename HandlerRetT, typename ChannelT,
typename FunctionIdT, typename SequenceNumberT>
-Error respond(ChannelT &C, const FunctionIdT &ResponseId,
- SequenceNumberT SeqNo, Expected<HandlerRetT> ResultOrErr) {
+Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo,
+ Expected<HandlerRetT> ResultOrErr) {
return RespondHelper<SupportsErrorReturn<WireRetT>::value>::
- template sendResult<WireRetT>(C, ResponseId, SeqNo, std::move(ResultOrErr));
+ template sendResult<WireRetT>(C, ResponseId, SeqNo,
+ std::move(ResultOrErr));
}
// Send an empty response message on the given channel to indicate that
@@ -418,8 +412,8 @@ template <typename WireRetT, typename ChannelT, typename FunctionIdT,
typename SequenceNumberT>
Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo,
Error Err) {
- return RespondHelper<SupportsErrorReturn<WireRetT>::value>::
- sendResult(C, ResponseId, SeqNo, std::move(Err));
+ return RespondHelper<SupportsErrorReturn<WireRetT>::value>::sendResult(
+ C, ResponseId, SeqNo, std::move(Err));
}
// Converts a given type to the equivalent error return type.
@@ -453,7 +447,8 @@ public:
template <typename FnT> class AsyncHandlerTraits;
template <typename ResultT, typename... ArgTs>
-class AsyncHandlerTraits<Error(std::function<Error(Expected<ResultT>)>, ArgTs...)> {
+class AsyncHandlerTraits<Error(std::function<Error(Expected<ResultT>)>,
+ ArgTs...)> {
public:
using Type = Error(ArgTs...);
using ResultType = Expected<ResultT>;
@@ -490,9 +485,9 @@ class AsyncHandlerTraits<Error(ResponseHandlerT, ArgTs...)>
// specialized for function types) and inherits from the appropriate
// speciilization for the given non-function type's call operator.
template <typename HandlerT>
-class HandlerTraits : public HandlerTraits<decltype(
- &std::remove_reference<HandlerT>::type::operator())> {
-};
+class HandlerTraits
+ : public HandlerTraits<
+ decltype(&std::remove_reference<HandlerT>::type::operator())> {};
// Traits for handlers with a given function type.
template <typename RetT, typename... ArgTs>
@@ -524,7 +519,7 @@ public:
template <typename HandlerT>
static std::enable_if_t<
std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value, Error>
- run(HandlerT &Handler, ArgTs &&... Args) {
+ run(HandlerT &Handler, ArgTs &&...Args) {
Handler(std::move(Args)...);
return Error::success();
}
@@ -577,8 +572,8 @@ private:
// Handler traits for free functions.
template <typename RetT, typename... ArgTs>
-class HandlerTraits<RetT(*)(ArgTs...)>
- : public HandlerTraits<RetT(ArgTs...)> {};
+class HandlerTraits<RetT (*)(ArgTs...)> : public HandlerTraits<RetT(ArgTs...)> {
+};
// Handler traits for class methods (especially call operators for lambdas).
template <typename Class, typename RetT, typename... ArgTs>
@@ -714,9 +709,8 @@ public:
typename HandlerTraits<HandlerT>::Type>::ArgType;
HandlerArgType Result((typename HandlerArgType::value_type()));
- if (auto Err =
- SerializationTraits<ChannelT, Expected<FuncRetT>,
- HandlerArgType>::deserialize(C, Result))
+ if (auto Err = SerializationTraits<ChannelT, Expected<FuncRetT>,
+ HandlerArgType>::deserialize(C, Result))
return Err;
if (auto Err = C.endReceiveMessage())
return Err;
@@ -786,7 +780,7 @@ public:
using MethodT = RetT (ClassT::*)(ArgTs...);
MemberFnWrapper(ClassT &Instance, MethodT Method)
: Instance(Instance), Method(Method) {}
- RetT operator()(ArgTs &&... Args) {
+ RetT operator()(ArgTs &&...Args) {
return (Instance.*Method)(std::move(Args)...);
}
@@ -804,10 +798,9 @@ public:
template <typename ArgT, typename... ArgTs>
class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> {
public:
- ReadArgs(ArgT &Arg, ArgTs &... Args)
- : ReadArgs<ArgTs...>(Args...), Arg(Arg) {}
+ ReadArgs(ArgT &Arg, ArgTs &...Args) : ReadArgs<ArgTs...>(Args...), Arg(Arg) {}
- Error operator()(ArgT &ArgVal, ArgTs &... ArgVals) {
+ Error operator()(ArgT &ArgVal, ArgTs &...ArgVals) {
this->Arg = std::move(ArgVal);
return ReadArgs<ArgTs...>::operator()(ArgVals...);
}
@@ -872,8 +865,8 @@ public:
template <template <class, class> class P, typename T1Sig, typename T2Sig>
class RPCArgTypeCheck {
public:
- using T1Tuple = typename FunctionArgsTuple<T1Sig>::Type;
- using T2Tuple = typename FunctionArgsTuple<T2Sig>::Type;
+ using T1Tuple = typename RPCFunctionArgsTuple<T1Sig>::Type;
+ using T2Tuple = typename RPCFunctionArgsTuple<T2Sig>::Type;
static_assert(std::tuple_size<T1Tuple>::value >=
std::tuple_size<T2Tuple>::value,
@@ -937,18 +930,18 @@ template <typename ImplT, typename ChannelT, typename FunctionIdT,
typename SequenceNumberT>
class RPCEndpointBase {
protected:
- class OrcRPCInvalid : public Function<OrcRPCInvalid, void()> {
+ class OrcRPCInvalid : public RPCFunction<OrcRPCInvalid, void()> {
public:
static const char *getName() { return "__orc_rpc$invalid"; }
};
- class OrcRPCResponse : public Function<OrcRPCResponse, void()> {
+ class OrcRPCResponse : public RPCFunction<OrcRPCResponse, void()> {
public:
static const char *getName() { return "__orc_rpc$response"; }
};
class OrcRPCNegotiate
- : public Function<OrcRPCNegotiate, FunctionIdT(std::string)> {
+ : public RPCFunction<OrcRPCNegotiate, FunctionIdT(std::string)> {
public:
static const char *getName() { return "__orc_rpc$negotiate"; }
};
@@ -994,7 +987,6 @@ public:
[this](const std::string &Name) { return handleNegotiate(Name); });
}
-
/// Negotiate a function id for Func with the other end of the channel.
template <typename Func> Error negotiateFunction(bool Retry = false) {
return getRemoteFunctionId<Func>(true, Retry).takeError();
@@ -1006,7 +998,7 @@ public:
/// or an Error (if Func::ReturnType is void). The handler will be called
/// with an error if the return value is abandoned due to a channel error.
template <typename Func, typename HandlerT, typename... ArgTs>
- Error appendCallAsync(HandlerT Handler, const ArgTs &... Args) {
+ Error appendCallAsync(HandlerT Handler, const ArgTs &...Args) {
static_assert(
detail::RPCArgTypeCheck<CanSerializeCheck, typename Func::Type,
@@ -1036,8 +1028,8 @@ public:
// Install the user handler.
PendingResponses[SeqNo] =
- detail::createResponseHandler<ChannelT, typename Func::ReturnType>(
- std::move(Handler));
+ detail::createResponseHandler<ChannelT, typename Func::ReturnType>(
+ std::move(Handler));
}
// Open the function call message.
@@ -1065,7 +1057,7 @@ public:
Error sendAppendedCalls() { return C.send(); };
template <typename Func, typename HandlerT, typename... ArgTs>
- Error callAsync(HandlerT Handler, const ArgTs &... Args) {
+ Error callAsync(HandlerT Handler, const ArgTs &...Args) {
if (auto Err = appendCallAsync<Func>(std::move(Handler), Args...))
return Err;
return C.send();
@@ -1104,7 +1096,7 @@ public:
/// /* Handle Args */ ;
///
template <typename... ArgTs>
- static detail::ReadArgs<ArgTs...> readArgs(ArgTs &... Args) {
+ static detail::ReadArgs<ArgTs...> readArgs(ArgTs &...Args) {
return detail::ReadArgs<ArgTs...>(Args...);
}
@@ -1128,8 +1120,7 @@ public:
/// Remove the handler for the given function.
/// A handler must currently be registered for this function.
- template <typename Func>
- void removeHandler() {
+ template <typename Func> void removeHandler() {
auto IdItr = LocalFunctionIds.find(Func::getPrototype());
assert(IdItr != LocalFunctionIds.end() &&
"Function does not have a registered handler");
@@ -1140,12 +1131,9 @@ public:
}
/// Clear all handlers.
- void clearHandlers() {
- Handlers.clear();
- }
+ void clearHandlers() { Handlers.clear(); }
protected:
-
FunctionIdT getInvalidFunctionId() const {
return FnIdAllocator.getInvalidId();
}
@@ -1168,12 +1156,12 @@ protected:
template <typename Func, typename HandlerT>
void addAsyncHandlerImpl(HandlerT Handler) {
- static_assert(detail::RPCArgTypeCheck<
- CanDeserializeCheck, typename Func::Type,
- typename detail::AsyncHandlerTraits<
- typename detail::HandlerTraits<HandlerT>::Type
- >::Type>::value,
- "");
+ static_assert(
+ detail::RPCArgTypeCheck<
+ CanDeserializeCheck, typename Func::Type,
+ typename detail::AsyncHandlerTraits<
+ typename detail::HandlerTraits<HandlerT>::Type>::Type>::value,
+ "");
FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>();
LocalFunctionIds[Func::getPrototype()] = NewFnId;
@@ -1197,8 +1185,8 @@ protected:
// Unlock the pending results map to prevent recursive lock.
Lock.unlock();
abandonPendingResponses();
- return make_error<
- InvalidSequenceNumberForResponse<SequenceNumberT>>(SeqNo);
+ return make_error<InvalidSequenceNumberForResponse<SequenceNumberT>>(
+ SeqNo);
}
}
@@ -1241,7 +1229,7 @@ protected:
if (DoNegotiate) {
auto &Impl = static_cast<ImplT &>(*this);
if (auto RemoteIdOrErr =
- Impl.template callB<OrcRPCNegotiate>(Func::getPrototype())) {
+ Impl.template callB<OrcRPCNegotiate>(Func::getPrototype())) {
RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
if (*RemoteIdOrErr == getInvalidFunctionId())
return make_error<CouldNotNegotiate>(Func::getPrototype());
@@ -1264,9 +1252,8 @@ protected:
return [this, Handler](ChannelT &Channel,
SequenceNumberT SeqNo) mutable -> Error {
// Start by deserializing the arguments.
- using ArgsTuple =
- typename detail::FunctionArgsTuple<
- typename detail::HandlerTraits<HandlerT>::Type>::Type;
+ using ArgsTuple = typename detail::RPCFunctionArgsTuple<
+ typename detail::HandlerTraits<HandlerT>::Type>::Type;
auto Args = std::make_shared<ArgsTuple>();
if (auto Err =
@@ -1298,9 +1285,9 @@ protected:
SequenceNumberT SeqNo) mutable -> Error {
// Start by deserializing the arguments.
using AHTraits = detail::AsyncHandlerTraits<
- typename detail::HandlerTraits<HandlerT>::Type>;
+ typename detail::HandlerTraits<HandlerT>::Type>;
using ArgsTuple =
- typename detail::FunctionArgsTuple<typename AHTraits::Type>::Type;
+ typename detail::RPCFunctionArgsTuple<typename AHTraits::Type>::Type;
auto Args = std::make_shared<ArgsTuple>();
if (auto Err =
@@ -1319,11 +1306,11 @@ protected:
using HTraits = detail::HandlerTraits<HandlerT>;
using FuncReturn = typename Func::ReturnType;
- auto Responder =
- [this, SeqNo](typename AHTraits::ResultType RetVal) -> Error {
- return detail::respond<FuncReturn>(C, ResponseId, SeqNo,
- std::move(RetVal));
- };
+ auto Responder = [this,
+ SeqNo](typename AHTraits::ResultType RetVal) -> Error {
+ return detail::respond<FuncReturn>(C, ResponseId, SeqNo,
+ std::move(RetVal));
+ };
return HTraits::unpackAndRunAsync(Handler, Responder, *Args);
};
@@ -1356,17 +1343,16 @@ class MultiThreadedRPCEndpoint
MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
ChannelT, FunctionIdT, SequenceNumberT> {
private:
- using BaseClass =
- detail::RPCEndpointBase<
- MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT>;
+ using BaseClass = detail::RPCEndpointBase<
+ MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
+ ChannelT, FunctionIdT, SequenceNumberT>;
public:
MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
: BaseClass(C, LazyAutoNegotiation) {}
/// Add a handler for the given RPC function.
- /// This installs the given handler functor for the given RPC Function, and
+ /// This installs the given handler functor for the given RPCFunction, and
/// makes the RPC function available for negotiation/calling from the remote.
template <typename Func, typename HandlerT>
void addHandler(HandlerT Handler) {
@@ -1377,7 +1363,7 @@ public:
template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
addHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
+ detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
}
template <typename Func, typename HandlerT>
@@ -1389,7 +1375,7 @@ public:
template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
addAsyncHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
+ detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
}
/// Return type for non-blocking call primitives.
@@ -1405,7 +1391,7 @@ public:
/// result. In multi-threaded mode the appendCallNB method, which does not
/// return the sequence numeber, should be preferred.
template <typename Func, typename... ArgTs>
- Expected<NonBlockingCallResult<Func>> appendCallNB(const ArgTs &... Args) {
+ Expected<NonBlockingCallResult<Func>> appendCallNB(const ArgTs &...Args) {
using RTraits = detail::ResultTraits<typename Func::ReturnType>;
using ErrorReturn = typename RTraits::ErrorReturnType;
using ErrorReturnPromise = typename RTraits::ReturnPromiseType;
@@ -1428,7 +1414,7 @@ public:
/// The same as appendCallNBWithSeq, except that it calls C.send() to
/// flush the channel after serializing the call.
template <typename Func, typename... ArgTs>
- Expected<NonBlockingCallResult<Func>> callNB(const ArgTs &... Args) {
+ Expected<NonBlockingCallResult<Func>> callNB(const ArgTs &...Args) {
auto Result = appendCallNB<Func>(Args...);
if (!Result)
return Result;
@@ -1449,7 +1435,7 @@ public:
template <typename Func, typename... ArgTs,
typename AltRetT = typename Func::ReturnType>
typename detail::ResultTraits<AltRetT>::ErrorReturnType
- callB(const ArgTs &... Args) {
+ callB(const ArgTs &...Args) {
if (auto FutureResOrErr = callNB<Func>(Args...))
return FutureResOrErr->get();
else
@@ -1472,10 +1458,9 @@ class SingleThreadedRPCEndpoint
SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
ChannelT, FunctionIdT, SequenceNumberT> {
private:
- using BaseClass =
- detail::RPCEndpointBase<
- SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT>;
+ using BaseClass = detail::RPCEndpointBase<
+ SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
+ ChannelT, FunctionIdT, SequenceNumberT>;
public:
SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
@@ -1501,13 +1486,13 @@ public:
template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
addAsyncHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
+ detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
}
template <typename Func, typename... ArgTs,
typename AltRetT = typename Func::ReturnType>
typename detail::ResultTraits<AltRetT>::ErrorReturnType
- callB(const ArgTs &... Args) {
+ callB(const ArgTs &...Args) {
bool ReceivedResponse = false;
using ResultType = typename detail::ResultTraits<AltRetT>::ErrorReturnType;
auto Result = detail::ResultTraits<AltRetT>::createBlankErrorReturnValue();
@@ -1547,13 +1532,12 @@ public:
};
/// Asynchronous dispatch for a function on an RPC endpoint.
-template <typename RPCClass, typename Func>
-class RPCAsyncDispatch {
+template <typename RPCClass, typename Func> class RPCAsyncDispatch {
public:
RPCAsyncDispatch(RPCClass &Endpoint) : Endpoint(Endpoint) {}
template <typename HandlerT, typename... ArgTs>
- Error operator()(HandlerT Handler, const ArgTs &... Args) const {
+ Error operator()(HandlerT Handler, const ArgTs &...Args) const {
return Endpoint.template appendCallAsync<Func>(std::move(Handler), Args...);
}
@@ -1571,7 +1555,6 @@ RPCAsyncDispatch<RPCEndpointT, Func> rpcAsyncDispatch(RPCEndpointT &Endpoint) {
/// waited on as a group.
class ParallelCallGroup {
public:
-
ParallelCallGroup() = default;
ParallelCallGroup(const ParallelCallGroup &) = delete;
ParallelCallGroup &operator=(const ParallelCallGroup &) = delete;
@@ -1579,7 +1562,7 @@ public:
/// Make as asynchronous call.
template <typename AsyncDispatcher, typename HandlerT, typename... ArgTs>
Error call(const AsyncDispatcher &AsyncDispatch, HandlerT Handler,
- const ArgTs &... Args) {
+ const ArgTs &...Args) {
// Increment the count of outstanding calls. This has to happen before
// we invoke the call, as the handler may (depending on scheduling)
// be run immediately on another thread, and we don't want the decrement
@@ -1618,70 +1601,57 @@ private:
uint32_t NumOutstandingCalls = 0;
};
-/// Convenience class for grouping RPC Functions into APIs that can be
+/// Convenience class for grouping RPCFunctions into APIs that can be
/// negotiated as a block.
///
-template <typename... Funcs>
-class APICalls {
+template <typename... Funcs> class APICalls {
public:
-
/// Test whether this API contains Function F.
- template <typename F>
- class Contains {
+ template <typename F> class Contains {
public:
static const bool value = false;
};
/// Negotiate all functions in this API.
- template <typename RPCEndpoint>
- static Error negotiate(RPCEndpoint &R) {
+ template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
return Error::success();
}
};
-template <typename Func, typename... Funcs>
-class APICalls<Func, Funcs...> {
+template <typename Func, typename... Funcs> class APICalls<Func, Funcs...> {
public:
-
- template <typename F>
- class Contains {
+ template <typename F> class Contains {
public:
static const bool value = std::is_same<F, Func>::value |
APICalls<Funcs...>::template Contains<F>::value;
};
- template <typename RPCEndpoint>
- static Error negotiate(RPCEndpoint &R) {
+ template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
if (auto Err = R.template negotiateFunction<Func>())
return Err;
return APICalls<Funcs...>::negotiate(R);
}
-
};
template <typename... InnerFuncs, typename... Funcs>
class APICalls<APICalls<InnerFuncs...>, Funcs...> {
public:
-
- template <typename F>
- class Contains {
+ template <typename F> class Contains {
public:
static const bool value =
- APICalls<InnerFuncs...>::template Contains<F>::value |
- APICalls<Funcs...>::template Contains<F>::value;
+ APICalls<InnerFuncs...>::template Contains<F>::value |
+ APICalls<Funcs...>::template Contains<F>::value;
};
- template <typename RPCEndpoint>
- static Error negotiate(RPCEndpoint &R) {
+ template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
if (auto Err = APICalls<InnerFuncs...>::negotiate(R))
return Err;
return APICalls<Funcs...>::negotiate(R);
}
-
};
-} // end namespace rpc
+} // end namespace shared
} // end namespace orc
} // end namespace llvm
-#endif
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h
index 35745993248c..2ee471939251 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h
@@ -1,4 +1,4 @@
-//===- llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h ----------------*- C++ -*-===//
+//===- RawByteChannel.h -----------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H
-#define LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h"
+#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <cstdint>
@@ -20,9 +20,9 @@
namespace llvm {
namespace orc {
-namespace rpc {
+namespace shared {
-/// Interface for byte-streams to be used with RPC.
+/// Interface for byte-streams to be used with ORC Serialization.
class RawByteChannel {
public:
virtual ~RawByteChannel() = default;
@@ -115,8 +115,7 @@ class SerializationTraits<
public:
static Error serialize(ChannelT &C, bool V) {
uint8_t Tmp = V ? 1 : 0;
- if (auto Err =
- C.appendBytes(reinterpret_cast<const char *>(&Tmp), 1))
+ if (auto Err = C.appendBytes(reinterpret_cast<const char *>(&Tmp), 1))
return Err;
return Error::success();
}
@@ -135,7 +134,7 @@ class SerializationTraits<
ChannelT, std::string, StringRef,
std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
public:
- /// RPC channel serialization for std::strings.
+ /// Serialization channel serialization for std::strings.
static Error serialize(RawByteChannel &C, StringRef S) {
if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size())))
return Err;
@@ -161,13 +160,13 @@ class SerializationTraits<
ChannelT, std::string, std::string,
std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
public:
- /// RPC channel serialization for std::strings.
+ /// Serialization channel serialization for std::strings.
static Error serialize(RawByteChannel &C, const std::string &S) {
return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C,
S);
}
- /// RPC channel deserialization for std::strings.
+ /// Serialization channel deserialization for std::strings.
static Error deserialize(RawByteChannel &C, std::string &S) {
uint64_t Count = 0;
if (auto Err = deserializeSeq(C, Count))
@@ -177,8 +176,8 @@ public:
}
};
-} // end namespace rpc
+} // end namespace shared
} // end namespace orc
} // end namespace llvm
-#endif // LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h
index 2f37ab40c7f8..f2d07632bd5d 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h
@@ -1,4 +1,4 @@
-//===- llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h --------------*- C++ -*-===//
+//===- Serialization.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,10 +6,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H
-#define LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
-#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
#include "llvm/Support/thread.h"
#include <map>
#include <mutex>
@@ -20,118 +22,104 @@
namespace llvm {
namespace orc {
-namespace rpc {
+namespace shared {
-template <typename T>
-class RPCTypeName;
+template <typename T> class SerializationTypeName;
/// TypeNameSequence is a utility for rendering sequences of types to a string
/// by rendering each type, separated by ", ".
-template <typename... ArgTs> class RPCTypeNameSequence {};
+template <typename... ArgTs> class SerializationTypeNameSequence {};
/// Render an empty TypeNameSequence to an ostream.
template <typename OStream>
-OStream &operator<<(OStream &OS, const RPCTypeNameSequence<> &V) {
+OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<> &V) {
return OS;
}
/// Render a TypeNameSequence of a single type to an ostream.
template <typename OStream, typename ArgT>
-OStream &operator<<(OStream &OS, const RPCTypeNameSequence<ArgT> &V) {
- OS << RPCTypeName<ArgT>::getName();
+OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<ArgT> &V) {
+ OS << SerializationTypeName<ArgT>::getName();
return OS;
}
/// Render a TypeNameSequence of more than one type to an ostream.
template <typename OStream, typename ArgT1, typename ArgT2, typename... ArgTs>
-OStream&
-operator<<(OStream &OS, const RPCTypeNameSequence<ArgT1, ArgT2, ArgTs...> &V) {
- OS << RPCTypeName<ArgT1>::getName() << ", "
- << RPCTypeNameSequence<ArgT2, ArgTs...>();
+OStream &
+operator<<(OStream &OS,
+ const SerializationTypeNameSequence<ArgT1, ArgT2, ArgTs...> &V) {
+ OS << SerializationTypeName<ArgT1>::getName() << ", "
+ << SerializationTypeNameSequence<ArgT2, ArgTs...>();
return OS;
}
-template <>
-class RPCTypeName<void> {
+template <> class SerializationTypeName<void> {
public:
- static const char* getName() { return "void"; }
+ static const char *getName() { return "void"; }
};
-template <>
-class RPCTypeName<int8_t> {
+template <> class SerializationTypeName<int8_t> {
public:
- static const char* getName() { return "int8_t"; }
+ static const char *getName() { return "int8_t"; }
};
-template <>
-class RPCTypeName<uint8_t> {
+template <> class SerializationTypeName<uint8_t> {
public:
- static const char* getName() { return "uint8_t"; }
+ static const char *getName() { return "uint8_t"; }
};
-template <>
-class RPCTypeName<int16_t> {
+template <> class SerializationTypeName<int16_t> {
public:
- static const char* getName() { return "int16_t"; }
+ static const char *getName() { return "int16_t"; }
};
-template <>
-class RPCTypeName<uint16_t> {
+template <> class SerializationTypeName<uint16_t> {
public:
- static const char* getName() { return "uint16_t"; }
+ static const char *getName() { return "uint16_t"; }
};
-template <>
-class RPCTypeName<int32_t> {
+template <> class SerializationTypeName<int32_t> {
public:
- static const char* getName() { return "int32_t"; }
+ static const char *getName() { return "int32_t"; }
};
-template <>
-class RPCTypeName<uint32_t> {
+template <> class SerializationTypeName<uint32_t> {
public:
- static const char* getName() { return "uint32_t"; }
+ static const char *getName() { return "uint32_t"; }
};
-template <>
-class RPCTypeName<int64_t> {
+template <> class SerializationTypeName<int64_t> {
public:
- static const char* getName() { return "int64_t"; }
+ static const char *getName() { return "int64_t"; }
};
-template <>
-class RPCTypeName<uint64_t> {
+template <> class SerializationTypeName<uint64_t> {
public:
- static const char* getName() { return "uint64_t"; }
+ static const char *getName() { return "uint64_t"; }
};
-template <>
-class RPCTypeName<bool> {
+template <> class SerializationTypeName<bool> {
public:
- static const char* getName() { return "bool"; }
+ static const char *getName() { return "bool"; }
};
-template <>
-class RPCTypeName<std::string> {
+template <> class SerializationTypeName<std::string> {
public:
- static const char* getName() { return "std::string"; }
+ static const char *getName() { return "std::string"; }
};
-template <>
-class RPCTypeName<Error> {
+template <> class SerializationTypeName<Error> {
public:
- static const char* getName() { return "Error"; }
+ static const char *getName() { return "Error"; }
};
-template <typename T>
-class RPCTypeName<Expected<T>> {
+template <typename T> class SerializationTypeName<Expected<T>> {
public:
- static const char* getName() {
+ static const char *getName() {
static std::string Name = [] {
std::string Name;
- raw_string_ostream(Name) << "Expected<"
- << RPCTypeNameSequence<T>()
- << ">";
+ raw_string_ostream(Name)
+ << "Expected<" << SerializationTypeNameSequence<T>() << ">";
return Name;
}();
return Name.data();
@@ -139,67 +127,78 @@ public:
};
template <typename T1, typename T2>
-class RPCTypeName<std::pair<T1, T2>> {
+class SerializationTypeName<std::pair<T1, T2>> {
+public:
+ static const char *getName() {
+ static std::string Name = [] {
+ std::string Name;
+ raw_string_ostream(Name)
+ << "std::pair<" << SerializationTypeNameSequence<T1, T2>() << ">";
+ return Name;
+ }();
+ return Name.data();
+ }
+};
+
+template <typename... ArgTs> class SerializationTypeName<std::tuple<ArgTs...>> {
public:
- static const char* getName() {
+ static const char *getName() {
static std::string Name = [] {
std::string Name;
- raw_string_ostream(Name) << "std::pair<" << RPCTypeNameSequence<T1, T2>()
- << ">";
+ raw_string_ostream(Name)
+ << "std::tuple<" << SerializationTypeNameSequence<ArgTs...>() << ">";
return Name;
}();
return Name.data();
}
};
-template <typename... ArgTs>
-class RPCTypeName<std::tuple<ArgTs...>> {
+template <typename T> class SerializationTypeName<Optional<T>> {
public:
- static const char* getName() {
+ static const char *getName() {
static std::string Name = [] {
std::string Name;
- raw_string_ostream(Name) << "std::tuple<"
- << RPCTypeNameSequence<ArgTs...>() << ">";
+ raw_string_ostream(Name)
+ << "Optional<" << SerializationTypeName<T>::getName() << ">";
return Name;
}();
return Name.data();
}
};
-template <typename T>
-class RPCTypeName<std::vector<T>> {
+template <typename T> class SerializationTypeName<std::vector<T>> {
public:
- static const char*getName() {
+ static const char *getName() {
static std::string Name = [] {
std::string Name;
- raw_string_ostream(Name) << "std::vector<" << RPCTypeName<T>::getName()
- << ">";
+ raw_string_ostream(Name)
+ << "std::vector<" << SerializationTypeName<T>::getName() << ">";
return Name;
}();
return Name.data();
}
};
-template <typename T> class RPCTypeName<std::set<T>> {
+template <typename T> class SerializationTypeName<std::set<T>> {
public:
static const char *getName() {
static std::string Name = [] {
std::string Name;
raw_string_ostream(Name)
- << "std::set<" << RPCTypeName<T>::getName() << ">";
+ << "std::set<" << SerializationTypeName<T>::getName() << ">";
return Name;
}();
return Name.data();
}
};
-template <typename K, typename V> class RPCTypeName<std::map<K, V>> {
+template <typename K, typename V> class SerializationTypeName<std::map<K, V>> {
public:
static const char *getName() {
static std::string Name = [] {
std::string Name;
raw_string_ostream(Name)
- << "std::map<" << RPCTypeNameSequence<K, V>() << ">";
+ << "std::map<" << SerializationTypeNameSequence<K, V>() << ">";
return Name;
}();
return Name.data();
@@ -242,8 +241,7 @@ template <typename ChannelT, typename WireType,
typename ConcreteType = WireType, typename = void>
class SerializationTraits;
-template <typename ChannelT>
-class SequenceTraits {
+template <typename ChannelT> class SequenceTraits {
public:
static Error emitSeparator(ChannelT &C) { return Error::success(); }
static Error consumeSeparator(ChannelT &C) { return Error::success(); }
@@ -258,11 +256,9 @@ public:
/// is a SerializationTraits specialization
/// SerializeTraits<ChannelT, ArgT, CArgT> with methods that can serialize the
/// caller argument to over-the-wire value.
-template <typename ChannelT, typename... ArgTs>
-class SequenceSerialization;
+template <typename ChannelT, typename... ArgTs> class SequenceSerialization;
-template <typename ChannelT>
-class SequenceSerialization<ChannelT> {
+template <typename ChannelT> class SequenceSerialization<ChannelT> {
public:
static Error serialize(ChannelT &C) { return Error::success(); }
static Error deserialize(ChannelT &C) { return Error::success(); }
@@ -271,15 +267,12 @@ public:
template <typename ChannelT, typename ArgT>
class SequenceSerialization<ChannelT, ArgT> {
public:
-
- template <typename CArgT>
- static Error serialize(ChannelT &C, CArgT &&CArg) {
+ template <typename CArgT> static Error serialize(ChannelT &C, CArgT &&CArg) {
return SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize(
C, std::forward<CArgT>(CArg));
}
- template <typename CArgT>
- static Error deserialize(ChannelT &C, CArgT &CArg) {
+ template <typename CArgT> static Error deserialize(ChannelT &C, CArgT &CArg) {
return SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg);
}
};
@@ -287,25 +280,22 @@ public:
template <typename ChannelT, typename ArgT, typename... ArgTs>
class SequenceSerialization<ChannelT, ArgT, ArgTs...> {
public:
-
template <typename CArgT, typename... CArgTs>
- static Error serialize(ChannelT &C, CArgT &&CArg,
- CArgTs &&... CArgs) {
+ static Error serialize(ChannelT &C, CArgT &&CArg, CArgTs &&...CArgs) {
if (auto Err =
SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize(
C, std::forward<CArgT>(CArg)))
return Err;
if (auto Err = SequenceTraits<ChannelT>::emitSeparator(C))
return Err;
- return SequenceSerialization<ChannelT, ArgTs...>::
- serialize(C, std::forward<CArgTs>(CArgs)...);
+ return SequenceSerialization<ChannelT, ArgTs...>::serialize(
+ C, std::forward<CArgTs>(CArgs)...);
}
template <typename CArgT, typename... CArgTs>
- static Error deserialize(ChannelT &C, CArgT &CArg,
- CArgTs &... CArgs) {
+ static Error deserialize(ChannelT &C, CArgT &CArg, CArgTs &...CArgs) {
if (auto Err =
- SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg))
+ SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg))
return Err;
if (auto Err = SequenceTraits<ChannelT>::consumeSeparator(C))
return Err;
@@ -314,25 +304,23 @@ public:
};
template <typename ChannelT, typename... ArgTs>
-Error serializeSeq(ChannelT &C, ArgTs &&... Args) {
+Error serializeSeq(ChannelT &C, ArgTs &&...Args) {
return SequenceSerialization<ChannelT, std::decay_t<ArgTs>...>::serialize(
C, std::forward<ArgTs>(Args)...);
}
template <typename ChannelT, typename... ArgTs>
-Error deserializeSeq(ChannelT &C, ArgTs &... Args) {
+Error deserializeSeq(ChannelT &C, ArgTs &...Args) {
return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, Args...);
}
-template <typename ChannelT>
-class SerializationTraits<ChannelT, Error> {
+template <typename ChannelT> class SerializationTraits<ChannelT, Error> {
public:
-
using WrappedErrorSerializer =
- std::function<Error(ChannelT &C, const ErrorInfoBase&)>;
+ std::function<Error(ChannelT &C, const ErrorInfoBase &)>;
using WrappedErrorDeserializer =
- std::function<Error(ChannelT &C, Error &Err)>;
+ std::function<Error(ChannelT &C, Error &Err)>;
template <typename ErrorInfoT, typename SerializeFtor,
typename DeserializeFtor>
@@ -343,15 +331,14 @@ public:
const std::string *KeyName = nullptr;
{
- // We're abusing the stability of std::map here: We take a reference to the
- // key of the deserializers map to save us from duplicating the string in
- // the serializer. This should be changed to use a stringpool if we switch
- // to a map type that may move keys in memory.
+ // We're abusing the stability of std::map here: We take a reference to
+ // the key of the deserializers map to save us from duplicating the string
+ // in the serializer. This should be changed to use a stringpool if we
+ // switch to a map type that may move keys in memory.
std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex);
- auto I =
- Deserializers.insert(Deserializers.begin(),
- std::make_pair(std::move(Name),
- std::move(Deserialize)));
+ auto I = Deserializers.insert(
+ Deserializers.begin(),
+ std::make_pair(std::move(Name), std::move(Deserialize)));
KeyName = &I->first;
}
@@ -376,13 +363,12 @@ public:
if (!Err)
return serializeSeq(C, std::string());
- return handleErrors(std::move(Err),
- [&C](const ErrorInfoBase &EIB) {
- auto SI = Serializers.find(EIB.dynamicClassID());
- if (SI == Serializers.end())
- return serializeAsStringError(C, EIB);
- return (SI->second)(C, EIB);
- });
+ return handleErrors(std::move(Err), [&C](const ErrorInfoBase &EIB) {
+ auto SI = Serializers.find(EIB.dynamicClassID());
+ if (SI == Serializers.end())
+ return serializeAsStringError(C, EIB);
+ return (SI->second)(C, EIB);
+ });
}
static Error deserialize(ChannelT &C, Error &Err) {
@@ -404,7 +390,6 @@ public:
}
private:
-
static Error serializeAsStringError(ChannelT &C, const ErrorInfoBase &EIB) {
std::string ErrMsg;
{
@@ -417,7 +402,7 @@ private:
static std::recursive_mutex SerializersMutex;
static std::recursive_mutex DeserializersMutex;
- static std::map<const void*, WrappedErrorSerializer> Serializers;
+ static std::map<const void *, WrappedErrorSerializer> Serializers;
static std::map<std::string, WrappedErrorDeserializer> Deserializers;
};
@@ -428,14 +413,14 @@ template <typename ChannelT>
std::recursive_mutex SerializationTraits<ChannelT, Error>::DeserializersMutex;
template <typename ChannelT>
-std::map<const void*,
+std::map<const void *,
typename SerializationTraits<ChannelT, Error>::WrappedErrorSerializer>
-SerializationTraits<ChannelT, Error>::Serializers;
+ SerializationTraits<ChannelT, Error>::Serializers;
template <typename ChannelT>
-std::map<std::string,
- typename SerializationTraits<ChannelT, Error>::WrappedErrorDeserializer>
-SerializationTraits<ChannelT, Error>::Deserializers;
+std::map<std::string, typename SerializationTraits<
+ ChannelT, Error>::WrappedErrorDeserializer>
+ SerializationTraits<ChannelT, Error>::Deserializers;
/// Registers a serializer and deserializer for the given error type on the
/// given channel type.
@@ -444,32 +429,29 @@ template <typename ChannelT, typename ErrorInfoT, typename SerializeFtor,
void registerErrorSerialization(std::string Name, SerializeFtor &&Serialize,
DeserializeFtor &&Deserialize) {
SerializationTraits<ChannelT, Error>::template registerErrorType<ErrorInfoT>(
- std::move(Name),
- std::forward<SerializeFtor>(Serialize),
- std::forward<DeserializeFtor>(Deserialize));
+ std::move(Name), std::forward<SerializeFtor>(Serialize),
+ std::forward<DeserializeFtor>(Deserialize));
}
/// Registers serialization/deserialization for StringError.
-template <typename ChannelT>
-void registerStringError() {
+template <typename ChannelT> void registerStringError() {
static bool AlreadyRegistered = false;
if (!AlreadyRegistered) {
registerErrorSerialization<ChannelT, StringError>(
- "StringError",
- [](ChannelT &C, const StringError &SE) {
- return serializeSeq(C, SE.getMessage());
- },
- [](ChannelT &C, Error &Err) -> Error {
- ErrorAsOutParameter EAO(&Err);
- std::string Msg;
- if (auto E2 = deserializeSeq(C, Msg))
- return E2;
- Err =
- make_error<StringError>(std::move(Msg),
- orcError(
- OrcErrorCode::UnknownErrorCodeFromRemote));
- return Error::success();
- });
+ "StringError",
+ [](ChannelT &C, const StringError &SE) {
+ return serializeSeq(C, SE.getMessage());
+ },
+ [](ChannelT &C, Error &Err) -> Error {
+ ErrorAsOutParameter EAO(&Err);
+ std::string Msg;
+ if (auto E2 = deserializeSeq(C, Msg))
+ return E2;
+ Err = make_error<StringError>(
+ std::move(Msg),
+ orcError(OrcErrorCode::UnknownErrorCodeFromRemote));
+ return Error::success();
+ });
AlreadyRegistered = true;
}
}
@@ -478,7 +460,6 @@ void registerStringError() {
template <typename ChannelT, typename T1, typename T2>
class SerializationTraits<ChannelT, Expected<T1>, Expected<T2>> {
public:
-
static Error serialize(ChannelT &C, Expected<T2> &&ValOrErr) {
if (ValOrErr) {
if (auto Err = serializeSeq(C, true))
@@ -509,7 +490,6 @@ public:
template <typename ChannelT, typename T1, typename T2>
class SerializationTraits<ChannelT, Expected<T1>, T2> {
public:
-
static Error serialize(ChannelT &C, T2 &&Val) {
return serializeSeq(C, Expected<T2>(std::forward<T2>(Val)));
}
@@ -519,7 +499,6 @@ public:
template <typename ChannelT, typename T>
class SerializationTraits<ChannelT, Expected<T>, Error> {
public:
-
static Error serialize(ChannelT &C, Error &&Err) {
return serializeSeq(C, Expected<T>(std::move(Err)));
}
@@ -547,7 +526,6 @@ public:
template <typename ChannelT, typename... ArgTs>
class SerializationTraits<ChannelT, std::tuple<ArgTs...>> {
public:
-
/// RPC channel serialization for std::tuple.
static Error serialize(ChannelT &C, const std::tuple<ArgTs...> &V) {
return serializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>());
@@ -574,11 +552,35 @@ private:
}
};
+template <typename ChannelT, typename T>
+class SerializationTraits<ChannelT, Optional<T>> {
+public:
+ /// Serialize an Optional<T>.
+ static Error serialize(ChannelT &C, const Optional<T> &O) {
+ if (auto Err = serializeSeq(C, O != None))
+ return Err;
+ if (O)
+ if (auto Err = serializeSeq(C, *O))
+ return Err;
+ return Error::success();
+ }
+
+ /// Deserialize an Optional<T>.
+ static Error deserialize(ChannelT &C, Optional<T> &O) {
+ bool HasValue = false;
+ if (auto Err = deserializeSeq(C, HasValue))
+ return Err;
+ if (HasValue)
+ if (auto Err = deserializeSeq(C, *O))
+ return Err;
+ return Error::success();
+ };
+};
+
/// SerializationTraits default specialization for std::vector.
template <typename ChannelT, typename T>
class SerializationTraits<ChannelT, std::vector<T>> {
public:
-
/// Serialize a std::vector<T> from std::vector<T>.
static Error serialize(ChannelT &C, const std::vector<T> &V) {
if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size())))
@@ -609,6 +611,22 @@ public:
}
};
+/// Enable vector serialization from an ArrayRef.
+template <typename ChannelT, typename T>
+class SerializationTraits<ChannelT, std::vector<T>, ArrayRef<T>> {
+public:
+ static Error serialize(ChannelT &C, ArrayRef<T> V) {
+ if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size())))
+ return Err;
+
+ for (const auto &E : V)
+ if (auto Err = serializeSeq(C, E))
+ return Err;
+
+ return Error::success();
+ }
+};
+
template <typename ChannelT, typename T, typename T2>
class SerializationTraits<ChannelT, std::set<T>, std::set<T2>> {
public:
@@ -695,8 +713,57 @@ public:
}
};
-} // end namespace rpc
+template <typename ChannelT, typename K, typename V, typename K2, typename V2>
+class SerializationTraits<ChannelT, std::map<K, V>, DenseMap<K2, V2>> {
+public:
+ /// Serialize a std::map<K, V> from DenseMap<K2, V2>.
+ static Error serialize(ChannelT &C, const DenseMap<K2, V2> &M) {
+ if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size())))
+ return Err;
+
+ for (auto &E : M) {
+ if (auto Err =
+ SerializationTraits<ChannelT, K, K2>::serialize(C, E.first))
+ return Err;
+
+ if (auto Err =
+ SerializationTraits<ChannelT, V, V2>::serialize(C, E.second))
+ return Err;
+ }
+
+ return Error::success();
+ }
+
+ /// Serialize a std::map<K, V> from DenseMap<K2, V2>.
+ static Error deserialize(ChannelT &C, DenseMap<K2, V2> &M) {
+ assert(M.empty() && "Expected default-constructed map to deserialize into");
+
+ uint64_t Count = 0;
+ if (auto Err = deserializeSeq(C, Count))
+ return Err;
+
+ while (Count-- != 0) {
+ std::pair<K2, V2> Val;
+ if (auto Err =
+ SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first))
+ return Err;
+
+ if (auto Err =
+ SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second))
+ return Err;
+
+ auto Added = M.insert(Val).second;
+ if (!Added)
+ return make_error<StringError>("Duplicate element in deserialized map",
+ orcError(OrcErrorCode::UnknownORCError));
+ }
+
+ return Error::success();
+ }
+};
+
+} // namespace shared
} // end namespace orc
} // end namespace llvm
-#endif // LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H
+#endif // LLVM_EXECUTIONENGINE_ORC_RPC_RPCSERIALIZATION_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
new file mode 100644
index 000000000000..d01b3ef21f80
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
@@ -0,0 +1,165 @@
+//===--- TargetProcessControlTypes.h -- Shared Core/TPC types ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// TargetProcessControl types that are used by both the Orc and
+// OrcTargetProcess libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_TARGETPROCESSCONTROLTYPES_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_TARGETPROCESSCONTROLTYPES_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+
+#include <vector>
+
+namespace llvm {
+namespace orc {
+namespace tpctypes {
+
+template <typename T> struct UIntWrite {
+ UIntWrite() = default;
+ UIntWrite(JITTargetAddress Address, T Value)
+ : Address(Address), Value(Value) {}
+
+ JITTargetAddress Address = 0;
+ T Value = 0;
+};
+
+/// Describes a write to a uint8_t.
+using UInt8Write = UIntWrite<uint8_t>;
+
+/// Describes a write to a uint16_t.
+using UInt16Write = UIntWrite<uint16_t>;
+
+/// Describes a write to a uint32_t.
+using UInt32Write = UIntWrite<uint32_t>;
+
+/// Describes a write to a uint64_t.
+using UInt64Write = UIntWrite<uint64_t>;
+
+/// Describes a write to a buffer.
+/// For use with TargetProcessControl::MemoryAccess objects.
+struct BufferWrite {
+ BufferWrite() = default;
+ BufferWrite(JITTargetAddress Address, StringRef Buffer)
+ : Address(Address), Buffer(Buffer) {}
+
+ JITTargetAddress Address = 0;
+ StringRef Buffer;
+};
+
+/// A handle used to represent a loaded dylib in the target process.
+using DylibHandle = JITTargetAddress;
+
+using LookupResult = std::vector<JITTargetAddress>;
+
+/// Either a uint8_t array or a uint8_t*.
+union CWrapperFunctionResultData {
+ uint8_t Value[8];
+ uint8_t *ValuePtr;
+};
+
+/// C ABI compatible wrapper function result.
+///
+/// This can be safely returned from extern "C" functions, but should be used
+/// to construct a WrapperFunctionResult for safety.
+struct CWrapperFunctionResult {
+ uint64_t Size;
+ CWrapperFunctionResultData Data;
+ void (*Destroy)(CWrapperFunctionResultData Data, uint64_t Size);
+};
+
+/// C++ wrapper function result: Same as CWrapperFunctionResult but
+/// auto-releases memory.
+class WrapperFunctionResult {
+public:
+ /// Create a default WrapperFunctionResult.
+ WrapperFunctionResult() { zeroInit(R); }
+
+ /// Create a WrapperFunctionResult from a CWrapperFunctionResult. This
+ /// instance takes ownership of the result object and will automatically
+ /// call the Destroy member upon destruction.
+ WrapperFunctionResult(CWrapperFunctionResult R) : R(R) {}
+
+ WrapperFunctionResult(const WrapperFunctionResult &) = delete;
+ WrapperFunctionResult &operator=(const WrapperFunctionResult &) = delete;
+
+ WrapperFunctionResult(WrapperFunctionResult &&Other) {
+ zeroInit(R);
+ std::swap(R, Other.R);
+ }
+
+ WrapperFunctionResult &operator=(WrapperFunctionResult &&Other) {
+ CWrapperFunctionResult Tmp;
+ zeroInit(Tmp);
+ std::swap(Tmp, Other.R);
+ std::swap(R, Tmp);
+ return *this;
+ }
+
+ ~WrapperFunctionResult() {
+ if (R.Destroy)
+ R.Destroy(R.Data, R.Size);
+ }
+
+ /// Relinquish ownership of and return the CWrapperFunctionResult.
+ CWrapperFunctionResult release() {
+ CWrapperFunctionResult Tmp;
+ zeroInit(Tmp);
+ std::swap(R, Tmp);
+ return Tmp;
+ }
+
+ /// Get an ArrayRef covering the data in the result.
+ ArrayRef<uint8_t> getData() const {
+ if (R.Size <= 8)
+ return ArrayRef<uint8_t>(R.Data.Value, R.Size);
+ return ArrayRef<uint8_t>(R.Data.ValuePtr, R.Size);
+ }
+
+ /// Create a WrapperFunctionResult from the given integer, provided its
+ /// size is no greater than 64 bits.
+ template <typename T,
+ typename _ = std::enable_if_t<std::is_integral<T>::value &&
+ sizeof(T) <= sizeof(uint64_t)>>
+ static WrapperFunctionResult from(T Value) {
+ CWrapperFunctionResult R;
+ R.Size = sizeof(T);
+ memcpy(&R.Data.Value, Value, R.Size);
+ R.Destroy = nullptr;
+ return R;
+ }
+
+ /// Create a WrapperFunctionResult from the given string.
+ static WrapperFunctionResult from(StringRef S);
+
+ /// Always free Data.ValuePtr by calling free on it.
+ static void destroyWithFree(CWrapperFunctionResultData Data, uint64_t Size);
+
+ /// Always free Data.ValuePtr by calling delete[] on it.
+ static void destroyWithDeleteArray(CWrapperFunctionResultData Data,
+ uint64_t Size);
+
+private:
+ static void zeroInit(CWrapperFunctionResult &R) {
+ R.Size = 0;
+ R.Data.ValuePtr = nullptr;
+ R.Destroy = nullptr;
+ }
+
+ CWrapperFunctionResult R;
+};
+
+} // end namespace tpctypes
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_TARGETPROCESSCONTROLTYPES_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
index d8213d3b35e8..a138f60a7756 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
@@ -18,14 +18,10 @@
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/Debug.h"
-
#include <mutex>
#include <type_traits>
#include <utility>
-#include <vector>
namespace llvm {
namespace orc {
@@ -185,7 +181,8 @@ public:
: IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer),
S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {}
- void emit(MaterializationResponsibility R, ThreadSafeModule TSM);
+ void emit(std::unique_ptr<MaterializationResponsibility> R,
+ ThreadSafeModule TSM) override;
private:
TargetAndLikelies
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h
new file mode 100644
index 000000000000..ed4f6080bb4e
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h
@@ -0,0 +1,66 @@
+//===------------ TPCDynamicLibrarySearchGenerator.h ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Support loading and searching of dynamic libraries in a target process via
+// the TargetProcessControl class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H
+#define LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
+
+namespace llvm {
+namespace orc {
+
+class TPCDynamicLibrarySearchGenerator : public DefinitionGenerator {
+public:
+ using SymbolPredicate = unique_function<bool(const SymbolStringPtr &)>;
+
+ /// Create a DynamicLibrarySearchGenerator that searches for symbols in the
+ /// library with the given handle.
+ ///
+ /// If the Allow predicate is given then only symbols matching the predicate
+ /// will be searched for. If the predicate is not given then all symbols will
+ /// be searched for.
+ TPCDynamicLibrarySearchGenerator(TargetProcessControl &TPC,
+ tpctypes::DylibHandle H,
+ SymbolPredicate Allow = SymbolPredicate())
+ : TPC(TPC), H(H), Allow(std::move(Allow)) {}
+
+ /// Permanently loads the library at the given path and, on success, returns
+ /// a DynamicLibrarySearchGenerator that will search it for symbol definitions
+ /// in the library. On failure returns the reason the library failed to load.
+ static Expected<std::unique_ptr<TPCDynamicLibrarySearchGenerator>>
+ Load(TargetProcessControl &TPC, const char *LibraryPath,
+ SymbolPredicate Allow = SymbolPredicate());
+
+ /// Creates a TPCDynamicLibrarySearchGenerator that searches for symbols in
+ /// the target process.
+ static Expected<std::unique_ptr<TPCDynamicLibrarySearchGenerator>>
+ GetForTargetProcess(TargetProcessControl &TPC,
+ SymbolPredicate Allow = SymbolPredicate()) {
+ return Load(TPC, nullptr, std::move(Allow));
+ }
+
+ Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &Symbols) override;
+
+private:
+ TargetProcessControl &TPC;
+ tpctypes::DylibHandle H;
+ SymbolPredicate Allow;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h
new file mode 100644
index 000000000000..519f818907f9
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h
@@ -0,0 +1,54 @@
+//===-- TPCEHFrameRegistrar.h - TPC based eh-frame registration -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// TargetProcessControl based eh-frame registration.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TPCEHFRAMEREGISTRAR_H
+#define LLVM_EXECUTIONENGINE_ORC_TPCEHFRAMEREGISTRAR_H
+
+#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
+
+namespace llvm {
+namespace orc {
+
+/// Register/Deregisters EH frames in a remote process via a
+/// TargetProcessControl instance.
+class TPCEHFrameRegistrar : public jitlink::EHFrameRegistrar {
+public:
+ /// Create from a TargetProcessControl instance alone. This will use
+ /// the TPC's lookupSymbols method to find the registration/deregistration
+ /// funciton addresses by name.
+ static Expected<std::unique_ptr<TPCEHFrameRegistrar>>
+ Create(TargetProcessControl &TPC);
+
+ /// Create a TPCEHFrameRegistrar with the given TargetProcessControl
+ /// object and registration/deregistration function addresses.
+ TPCEHFrameRegistrar(TargetProcessControl &TPC,
+ JITTargetAddress RegisterEHFrameWrapperFnAddr,
+ JITTargetAddress DeregisterEHFRameWrapperFnAddr)
+ : TPC(TPC), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr),
+ DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {}
+
+ Error registerEHFrames(JITTargetAddress EHFrameSectionAddr,
+ size_t EHFrameSectionSize) override;
+ Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr,
+ size_t EHFrameSectionSize) override;
+
+private:
+ TargetProcessControl &TPC;
+ JITTargetAddress RegisterEHFrameWrapperFnAddr;
+ JITTargetAddress DeregisterEHFrameWrapperFnAddr;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TPCEHFRAMEREGISTRAR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h
new file mode 100644
index 000000000000..e7abd7fb90df
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h
@@ -0,0 +1,222 @@
+//===--- TPCIndirectionUtils.h - TPC based indirection utils ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Indirection utilities (stubs, trampolines, lazy call-throughs) that use the
+// TargetProcessControl API to interact with the target process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
+#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+class TargetProcessControl;
+
+/// Provides TargetProcessControl based indirect stubs, trampoline pool and
+/// lazy call through manager.
+class TPCIndirectionUtils {
+ friend class TPCIndirectionUtilsAccess;
+
+public:
+ /// ABI support base class. Used to write resolver, stub, and trampoline
+ /// blocks.
+ class ABISupport {
+ protected:
+ ABISupport(unsigned PointerSize, unsigned TrampolineSize, unsigned StubSize,
+ unsigned StubToPointerMaxDisplacement, unsigned ResolverCodeSize)
+ : PointerSize(PointerSize), TrampolineSize(TrampolineSize),
+ StubSize(StubSize),
+ StubToPointerMaxDisplacement(StubToPointerMaxDisplacement),
+ ResolverCodeSize(ResolverCodeSize) {}
+
+ public:
+ virtual ~ABISupport();
+
+ unsigned getPointerSize() const { return PointerSize; }
+ unsigned getTrampolineSize() const { return TrampolineSize; }
+ unsigned getStubSize() const { return StubSize; }
+ unsigned getStubToPointerMaxDisplacement() const {
+ return StubToPointerMaxDisplacement;
+ }
+ unsigned getResolverCodeSize() const { return ResolverCodeSize; }
+
+ virtual void writeResolverCode(char *ResolverWorkingMem,
+ JITTargetAddress ResolverTargetAddr,
+ JITTargetAddress ReentryFnAddr,
+ JITTargetAddress ReentryCtxAddr) const = 0;
+
+ virtual void writeTrampolines(char *TrampolineBlockWorkingMem,
+ JITTargetAddress TrampolineBlockTragetAddr,
+ JITTargetAddress ResolverAddr,
+ unsigned NumTrampolines) const = 0;
+
+ virtual void
+ writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ JITTargetAddress StubsBlockTargetAddress,
+ JITTargetAddress PointersBlockTargetAddress,
+ unsigned NumStubs) const = 0;
+
+ private:
+ unsigned PointerSize = 0;
+ unsigned TrampolineSize = 0;
+ unsigned StubSize = 0;
+ unsigned StubToPointerMaxDisplacement = 0;
+ unsigned ResolverCodeSize = 0;
+ };
+
+ /// Create using the given ABI class.
+ template <typename ORCABI>
+ static std::unique_ptr<TPCIndirectionUtils>
+ CreateWithABI(TargetProcessControl &TPC);
+
+ /// Create based on the TargetProcessControl triple.
+ static Expected<std::unique_ptr<TPCIndirectionUtils>>
+ Create(TargetProcessControl &TPC);
+
+ /// Return a reference to the TargetProcessControl object.
+ TargetProcessControl &getTargetProcessControl() const { return TPC; }
+
+ /// Return a reference to the ABISupport object for this instance.
+ ABISupport &getABISupport() const { return *ABI; }
+
+ /// Release memory for resources held by this instance. This *must* be called
+ /// prior to destruction of the class.
+ Error cleanup();
+
+ /// Write resolver code to the target process and return its address.
+ /// This must be called before any call to createTrampolinePool or
+ /// createLazyCallThroughManager.
+ Expected<JITTargetAddress>
+ writeResolverBlock(JITTargetAddress ReentryFnAddr,
+ JITTargetAddress ReentryCtxAddr);
+
+ /// Returns the address of the Resolver block. Returns zero if the
+ /// writeResolverBlock method has not previously been called.
+ JITTargetAddress getResolverBlockAddress() const { return ResolverBlockAddr; }
+
+ /// Create an IndirectStubsManager for the target process.
+ std::unique_ptr<IndirectStubsManager> createIndirectStubsManager();
+
+ /// Create a TrampolinePool for the target process.
+ TrampolinePool &getTrampolinePool();
+
+ /// Create a LazyCallThroughManager.
+ /// This function should only be called once.
+ LazyCallThroughManager &
+ createLazyCallThroughManager(ExecutionSession &ES,
+ JITTargetAddress ErrorHandlerAddr);
+
+ /// Create a LazyCallThroughManager for the target process.
+ LazyCallThroughManager &getLazyCallThroughManager() {
+ assert(LCTM && "createLazyCallThroughManager must be called first");
+ return *LCTM;
+ }
+
+private:
+ using Allocation = jitlink::JITLinkMemoryManager::Allocation;
+
+ struct IndirectStubInfo {
+ IndirectStubInfo() = default;
+ IndirectStubInfo(JITTargetAddress StubAddress,
+ JITTargetAddress PointerAddress)
+ : StubAddress(StubAddress), PointerAddress(PointerAddress) {}
+ JITTargetAddress StubAddress = 0;
+ JITTargetAddress PointerAddress = 0;
+ };
+
+ using IndirectStubInfoVector = std::vector<IndirectStubInfo>;
+
+ /// Create a TPCIndirectionUtils instance.
+ TPCIndirectionUtils(TargetProcessControl &TPC,
+ std::unique_ptr<ABISupport> ABI);
+
+ Expected<IndirectStubInfoVector> getIndirectStubs(unsigned NumStubs);
+
+ std::mutex TPCUIMutex;
+ TargetProcessControl &TPC;
+ std::unique_ptr<ABISupport> ABI;
+ JITTargetAddress ResolverBlockAddr;
+ std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation> ResolverBlock;
+ std::unique_ptr<TrampolinePool> TP;
+ std::unique_ptr<LazyCallThroughManager> LCTM;
+
+ std::vector<IndirectStubInfo> AvailableIndirectStubs;
+ std::vector<std::unique_ptr<Allocation>> IndirectStubAllocs;
+};
+
+/// This will call writeResolver on the given TPCIndirectionUtils instance
+/// to set up re-entry via a function that will directly return the trampoline
+/// landing address.
+///
+/// The TPCIndirectionUtils' LazyCallThroughManager must have been previously
+/// created via TPCIndirectionUtils::createLazyCallThroughManager.
+///
+/// The TPCIndirectionUtils' writeResolver method must not have been previously
+/// called.
+///
+/// This function is experimental and likely subject to revision.
+Error setUpInProcessLCTMReentryViaTPCIU(TPCIndirectionUtils &TPCIU);
+
+namespace detail {
+
+template <typename ORCABI>
+class ABISupportImpl : public TPCIndirectionUtils::ABISupport {
+public:
+ ABISupportImpl()
+ : ABISupport(ORCABI::PointerSize, ORCABI::TrampolineSize,
+ ORCABI::StubSize, ORCABI::StubToPointerMaxDisplacement,
+ ORCABI::ResolverCodeSize) {}
+
+ void writeResolverCode(char *ResolverWorkingMem,
+ JITTargetAddress ResolverTargetAddr,
+ JITTargetAddress ReentryFnAddr,
+ JITTargetAddress ReentryCtxAddr) const override {
+ ORCABI::writeResolverCode(ResolverWorkingMem, ResolverTargetAddr,
+ ReentryFnAddr, ReentryCtxAddr);
+ }
+
+ void writeTrampolines(char *TrampolineBlockWorkingMem,
+ JITTargetAddress TrampolineBlockTargetAddr,
+ JITTargetAddress ResolverAddr,
+ unsigned NumTrampolines) const override {
+ ORCABI::writeTrampolines(TrampolineBlockWorkingMem,
+ TrampolineBlockTargetAddr, ResolverAddr,
+ NumTrampolines);
+ }
+
+ void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ JITTargetAddress StubsBlockTargetAddress,
+ JITTargetAddress PointersBlockTargetAddress,
+ unsigned NumStubs) const override {
+ ORCABI::writeIndirectStubsBlock(StubsBlockWorkingMem,
+ StubsBlockTargetAddress,
+ PointersBlockTargetAddress, NumStubs);
+ }
+};
+
+} // end namespace detail
+
+template <typename ORCABI>
+std::unique_ptr<TPCIndirectionUtils>
+TPCIndirectionUtils::CreateWithABI(TargetProcessControl &TPC) {
+ return std::unique_ptr<TPCIndirectionUtils>(new TPCIndirectionUtils(
+ TPC, std::make_unique<detail::ABISupportImpl<ORCABI>>()));
+}
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h
new file mode 100644
index 000000000000..253e06ba0ba1
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h
@@ -0,0 +1,620 @@
+//===-- OrcRPCTPCServer.h -- OrcRPCTargetProcessControl Server --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// OrcRPCTargetProcessControl server class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Process.h"
+
+#include <atomic>
+
+namespace llvm {
+namespace orc {
+
+namespace orcrpctpc {
+
+enum WireProtectionFlags : uint8_t {
+ WPF_None = 0,
+ WPF_Read = 1U << 0,
+ WPF_Write = 1U << 1,
+ WPF_Exec = 1U << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec)
+};
+
+/// Convert from sys::Memory::ProtectionFlags
+inline WireProtectionFlags
+toWireProtectionFlags(sys::Memory::ProtectionFlags PF) {
+ WireProtectionFlags WPF = WPF_None;
+ if (PF & sys::Memory::MF_READ)
+ WPF |= WPF_Read;
+ if (PF & sys::Memory::MF_WRITE)
+ WPF |= WPF_Write;
+ if (PF & sys::Memory::MF_EXEC)
+ WPF |= WPF_Exec;
+ return WPF;
+}
+
+inline sys::Memory::ProtectionFlags
+fromWireProtectionFlags(WireProtectionFlags WPF) {
+ int PF = 0;
+ if (WPF & WPF_Read)
+ PF |= sys::Memory::MF_READ;
+ if (WPF & WPF_Write)
+ PF |= sys::Memory::MF_WRITE;
+ if (WPF & WPF_Exec)
+ PF |= sys::Memory::MF_EXEC;
+ return static_cast<sys::Memory::ProtectionFlags>(PF);
+}
+
+struct ReserveMemRequestElement {
+ WireProtectionFlags Prot = WPF_None;
+ uint64_t Size = 0;
+ uint64_t Alignment = 0;
+};
+
+using ReserveMemRequest = std::vector<ReserveMemRequestElement>;
+
+struct ReserveMemResultElement {
+ WireProtectionFlags Prot = WPF_None;
+ JITTargetAddress Address = 0;
+ uint64_t AllocatedSize = 0;
+};
+
+using ReserveMemResult = std::vector<ReserveMemResultElement>;
+
+struct ReleaseOrFinalizeMemRequestElement {
+ WireProtectionFlags Prot = WPF_None;
+ JITTargetAddress Address = 0;
+ uint64_t Size = 0;
+};
+
+using ReleaseOrFinalizeMemRequest =
+ std::vector<ReleaseOrFinalizeMemRequestElement>;
+
+} // end namespace orcrpctpc
+
+namespace shared {
+
+template <> class SerializationTypeName<tpctypes::UInt8Write> {
+public:
+ static const char *getName() { return "UInt8Write"; }
+};
+
+template <> class SerializationTypeName<tpctypes::UInt16Write> {
+public:
+ static const char *getName() { return "UInt16Write"; }
+};
+
+template <> class SerializationTypeName<tpctypes::UInt32Write> {
+public:
+ static const char *getName() { return "UInt32Write"; }
+};
+
+template <> class SerializationTypeName<tpctypes::UInt64Write> {
+public:
+ static const char *getName() { return "UInt64Write"; }
+};
+
+template <> class SerializationTypeName<tpctypes::BufferWrite> {
+public:
+ static const char *getName() { return "BufferWrite"; }
+};
+
+template <> class SerializationTypeName<orcrpctpc::ReserveMemRequestElement> {
+public:
+ static const char *getName() { return "ReserveMemRequestElement"; }
+};
+
+template <> class SerializationTypeName<orcrpctpc::ReserveMemResultElement> {
+public:
+ static const char *getName() { return "ReserveMemResultElement"; }
+};
+
+template <>
+class SerializationTypeName<orcrpctpc::ReleaseOrFinalizeMemRequestElement> {
+public:
+ static const char *getName() { return "ReleaseOrFinalizeMemRequestElement"; }
+};
+
+template <> class SerializationTypeName<tpctypes::WrapperFunctionResult> {
+public:
+ static const char *getName() { return "WrapperFunctionResult"; }
+};
+
+template <typename ChannelT, typename WriteT>
+class SerializationTraits<
+ ChannelT, WriteT, WriteT,
+ std::enable_if_t<std::is_same<WriteT, tpctypes::UInt8Write>::value ||
+ std::is_same<WriteT, tpctypes::UInt16Write>::value ||
+ std::is_same<WriteT, tpctypes::UInt32Write>::value ||
+ std::is_same<WriteT, tpctypes::UInt64Write>::value>> {
+public:
+ static Error serialize(ChannelT &C, const WriteT &W) {
+ return serializeSeq(C, W.Address, W.Value);
+ }
+ static Error deserialize(ChannelT &C, WriteT &W) {
+ return deserializeSeq(C, W.Address, W.Value);
+ }
+};
+
+template <typename ChannelT>
+class SerializationTraits<
+ ChannelT, tpctypes::BufferWrite, tpctypes::BufferWrite,
+ std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
+public:
+ static Error serialize(ChannelT &C, const tpctypes::BufferWrite &W) {
+ uint64_t Size = W.Buffer.size();
+ if (auto Err = serializeSeq(C, W.Address, Size))
+ return Err;
+
+ return C.appendBytes(W.Buffer.data(), Size);
+ }
+ static Error deserialize(ChannelT &C, tpctypes::BufferWrite &W) {
+ JITTargetAddress Address;
+ uint64_t Size;
+
+ if (auto Err = deserializeSeq(C, Address, Size))
+ return Err;
+
+ char *Buffer = jitTargetAddressToPointer<char *>(Address);
+
+ if (auto Err = C.readBytes(Buffer, Size))
+ return Err;
+
+ W = {Address, StringRef(Buffer, Size)};
+ return Error::success();
+ }
+};
+
+template <typename ChannelT>
+class SerializationTraits<ChannelT, orcrpctpc::ReserveMemRequestElement> {
+public:
+ static Error serialize(ChannelT &C,
+ const orcrpctpc::ReserveMemRequestElement &E) {
+ return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Size, E.Alignment);
+ }
+
+ static Error deserialize(ChannelT &C,
+ orcrpctpc::ReserveMemRequestElement &E) {
+ return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Size,
+ E.Alignment);
+ }
+};
+
+template <typename ChannelT>
+class SerializationTraits<ChannelT, orcrpctpc::ReserveMemResultElement> {
+public:
+ static Error serialize(ChannelT &C,
+ const orcrpctpc::ReserveMemResultElement &E) {
+ return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address,
+ E.AllocatedSize);
+ }
+
+ static Error deserialize(ChannelT &C, orcrpctpc::ReserveMemResultElement &E) {
+ return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address,
+ E.AllocatedSize);
+ }
+};
+
+template <typename ChannelT>
+class SerializationTraits<ChannelT,
+ orcrpctpc::ReleaseOrFinalizeMemRequestElement> {
+public:
+ static Error
+ serialize(ChannelT &C,
+ const orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) {
+ return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address, E.Size);
+ }
+
+ static Error deserialize(ChannelT &C,
+ orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) {
+ return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address,
+ E.Size);
+ }
+};
+
+template <typename ChannelT>
+class SerializationTraits<
+ ChannelT, tpctypes::WrapperFunctionResult, tpctypes::WrapperFunctionResult,
+ std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
+public:
+ static Error serialize(ChannelT &C,
+ const tpctypes::WrapperFunctionResult &E) {
+ auto Data = E.getData();
+ if (auto Err = serializeSeq(C, static_cast<uint64_t>(Data.size())))
+ return Err;
+ if (Data.size() == 0)
+ return Error::success();
+ return C.appendBytes(reinterpret_cast<const char *>(Data.data()),
+ Data.size());
+ }
+
+ static Error deserialize(ChannelT &C, tpctypes::WrapperFunctionResult &E) {
+ tpctypes::CWrapperFunctionResult R;
+
+ R.Size = 0;
+ R.Data.ValuePtr = nullptr;
+ R.Destroy = nullptr;
+
+ if (auto Err = deserializeSeq(C, R.Size))
+ return Err;
+ if (R.Size == 0)
+ return Error::success();
+ R.Data.ValuePtr = new uint8_t[R.Size];
+ if (auto Err =
+ C.readBytes(reinterpret_cast<char *>(R.Data.ValuePtr), R.Size)) {
+ R.Destroy = tpctypes::WrapperFunctionResult::destroyWithDeleteArray;
+ return Err;
+ }
+
+ E = tpctypes::WrapperFunctionResult(R);
+ return Error::success();
+ }
+};
+
+} // end namespace shared
+
+namespace orcrpctpc {
+
+using RemoteSymbolLookupSet = std::vector<std::pair<std::string, bool>>;
+using RemoteLookupRequest =
+ std::pair<tpctypes::DylibHandle, RemoteSymbolLookupSet>;
+
+class GetTargetTriple
+ : public shared::RPCFunction<GetTargetTriple, std::string()> {
+public:
+ static const char *getName() { return "GetTargetTriple"; }
+};
+
+class GetPageSize : public shared::RPCFunction<GetPageSize, uint64_t()> {
+public:
+ static const char *getName() { return "GetPageSize"; }
+};
+
+class ReserveMem
+ : public shared::RPCFunction<ReserveMem, Expected<ReserveMemResult>(
+ ReserveMemRequest)> {
+public:
+ static const char *getName() { return "ReserveMem"; }
+};
+
+class FinalizeMem
+ : public shared::RPCFunction<FinalizeMem,
+ Error(ReleaseOrFinalizeMemRequest)> {
+public:
+ static const char *getName() { return "FinalizeMem"; }
+};
+
+class ReleaseMem
+ : public shared::RPCFunction<ReleaseMem,
+ Error(ReleaseOrFinalizeMemRequest)> {
+public:
+ static const char *getName() { return "ReleaseMem"; }
+};
+
+class WriteUInt8s
+ : public shared::RPCFunction<WriteUInt8s,
+ Error(std::vector<tpctypes::UInt8Write>)> {
+public:
+ static const char *getName() { return "WriteUInt8s"; }
+};
+
+class WriteUInt16s
+ : public shared::RPCFunction<WriteUInt16s,
+ Error(std::vector<tpctypes::UInt16Write>)> {
+public:
+ static const char *getName() { return "WriteUInt16s"; }
+};
+
+class WriteUInt32s
+ : public shared::RPCFunction<WriteUInt32s,
+ Error(std::vector<tpctypes::UInt32Write>)> {
+public:
+ static const char *getName() { return "WriteUInt32s"; }
+};
+
+class WriteUInt64s
+ : public shared::RPCFunction<WriteUInt64s,
+ Error(std::vector<tpctypes::UInt64Write>)> {
+public:
+ static const char *getName() { return "WriteUInt64s"; }
+};
+
+class WriteBuffers
+ : public shared::RPCFunction<WriteBuffers,
+ Error(std::vector<tpctypes::BufferWrite>)> {
+public:
+ static const char *getName() { return "WriteBuffers"; }
+};
+
+class LoadDylib
+ : public shared::RPCFunction<LoadDylib, Expected<tpctypes::DylibHandle>(
+ std::string DylibPath)> {
+public:
+ static const char *getName() { return "LoadDylib"; }
+};
+
+class LookupSymbols
+ : public shared::RPCFunction<LookupSymbols,
+ Expected<std::vector<tpctypes::LookupResult>>(
+ std::vector<RemoteLookupRequest>)> {
+public:
+ static const char *getName() { return "LookupSymbols"; }
+};
+
+class RunMain
+ : public shared::RPCFunction<RunMain,
+ int32_t(JITTargetAddress MainAddr,
+ std::vector<std::string> Args)> {
+public:
+ static const char *getName() { return "RunMain"; }
+};
+
+class RunWrapper
+ : public shared::RPCFunction<RunWrapper,
+ tpctypes::WrapperFunctionResult(
+ JITTargetAddress, std::vector<uint8_t>)> {
+public:
+ static const char *getName() { return "RunWrapper"; }
+};
+
+class CloseConnection : public shared::RPCFunction<CloseConnection, void()> {
+public:
+ static const char *getName() { return "CloseConnection"; }
+};
+
+} // end namespace orcrpctpc
+
+/// TargetProcessControl for a process connected via an ORC RPC Endpoint.
+template <typename RPCEndpointT> class OrcRPCTPCServer {
+public:
+ /// Create an OrcRPCTPCServer from the given endpoint.
+ OrcRPCTPCServer(RPCEndpointT &EP) : EP(EP) {
+ using ThisT = OrcRPCTPCServer<RPCEndpointT>;
+
+ TripleStr = sys::getProcessTriple();
+ PageSize = sys::Process::getPageSizeEstimate();
+
+ EP.template addHandler<orcrpctpc::GetTargetTriple>(*this,
+ &ThisT::getTargetTriple);
+ EP.template addHandler<orcrpctpc::GetPageSize>(*this, &ThisT::getPageSize);
+
+ EP.template addHandler<orcrpctpc::ReserveMem>(*this, &ThisT::reserveMemory);
+ EP.template addHandler<orcrpctpc::FinalizeMem>(*this,
+ &ThisT::finalizeMemory);
+ EP.template addHandler<orcrpctpc::ReleaseMem>(*this, &ThisT::releaseMemory);
+
+ EP.template addHandler<orcrpctpc::WriteUInt8s>(
+ handleWriteUInt<tpctypes::UInt8Write>);
+ EP.template addHandler<orcrpctpc::WriteUInt16s>(
+ handleWriteUInt<tpctypes::UInt16Write>);
+ EP.template addHandler<orcrpctpc::WriteUInt32s>(
+ handleWriteUInt<tpctypes::UInt32Write>);
+ EP.template addHandler<orcrpctpc::WriteUInt64s>(
+ handleWriteUInt<tpctypes::UInt64Write>);
+ EP.template addHandler<orcrpctpc::WriteBuffers>(handleWriteBuffer);
+
+ EP.template addHandler<orcrpctpc::LoadDylib>(*this, &ThisT::loadDylib);
+ EP.template addHandler<orcrpctpc::LookupSymbols>(*this,
+ &ThisT::lookupSymbols);
+
+ EP.template addHandler<orcrpctpc::RunMain>(*this, &ThisT::runMain);
+ EP.template addHandler<orcrpctpc::RunWrapper>(*this, &ThisT::runWrapper);
+
+ EP.template addHandler<orcrpctpc::CloseConnection>(*this,
+ &ThisT::closeConnection);
+ }
+
+ /// Set the ProgramName to be used as the first argv element when running
+ /// functions via runAsMain.
+ void setProgramName(Optional<std::string> ProgramName = None) {
+ this->ProgramName = std::move(ProgramName);
+ }
+
+ /// Get the RPC endpoint for this server.
+ RPCEndpointT &getEndpoint() { return EP; }
+
+ /// Run the server loop.
+ Error run() {
+ while (!Finished) {
+ if (auto Err = EP.handleOne())
+ return Err;
+ }
+ return Error::success();
+ }
+
+private:
+ std::string getTargetTriple() { return TripleStr; }
+ uint64_t getPageSize() { return PageSize; }
+
+ template <typename WriteT>
+ static void handleWriteUInt(const std::vector<WriteT> &Ws) {
+ using ValueT = decltype(std::declval<WriteT>().Value);
+ for (auto &W : Ws)
+ *jitTargetAddressToPointer<ValueT *>(W.Address) = W.Value;
+ }
+
+ std::string getProtStr(orcrpctpc::WireProtectionFlags WPF) {
+ std::string Result;
+ Result += (WPF & orcrpctpc::WPF_Read) ? 'R' : '-';
+ Result += (WPF & orcrpctpc::WPF_Write) ? 'W' : '-';
+ Result += (WPF & orcrpctpc::WPF_Exec) ? 'X' : '-';
+ return Result;
+ }
+
+ static void handleWriteBuffer(const std::vector<tpctypes::BufferWrite> &Ws) {
+ for (auto &W : Ws) {
+ memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(),
+ W.Buffer.size());
+ }
+ }
+
+ Expected<orcrpctpc::ReserveMemResult>
+ reserveMemory(const orcrpctpc::ReserveMemRequest &Request) {
+ orcrpctpc::ReserveMemResult Allocs;
+ auto PF = sys::Memory::MF_READ | sys::Memory::MF_WRITE;
+
+ uint64_t TotalSize = 0;
+
+ for (const auto &E : Request) {
+ uint64_t Size = alignTo(E.Size, PageSize);
+ uint16_t Align = E.Alignment;
+
+ if ((Align > PageSize) || (PageSize % Align))
+ return make_error<StringError>(
+ "Page alignmen does not satisfy requested alignment",
+ inconvertibleErrorCode());
+
+ TotalSize += Size;
+ }
+
+ // Allocate memory slab.
+ std::error_code EC;
+ auto MB = sys::Memory::allocateMappedMemory(TotalSize, nullptr, PF, EC);
+ if (EC)
+ return make_error<StringError>("Unable to allocate memory: " +
+ EC.message(),
+ inconvertibleErrorCode());
+
+ // Zero-fill the whole thing.
+ memset(MB.base(), 0, MB.allocatedSize());
+
+ // Carve up sections to return.
+ uint64_t SectionBase = 0;
+ for (const auto &E : Request) {
+ uint64_t SectionSize = alignTo(E.Size, PageSize);
+ Allocs.push_back({E.Prot,
+ pointerToJITTargetAddress(MB.base()) + SectionBase,
+ SectionSize});
+ SectionBase += SectionSize;
+ }
+
+ return Allocs;
+ }
+
+ Error finalizeMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &FMR) {
+ for (const auto &E : FMR) {
+ sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size);
+
+ auto PF = orcrpctpc::fromWireProtectionFlags(E.Prot);
+ if (auto EC =
+ sys::Memory::protectMappedMemory(MB, static_cast<unsigned>(PF)))
+ return make_error<StringError>("error protecting memory: " +
+ EC.message(),
+ inconvertibleErrorCode());
+ }
+ return Error::success();
+ }
+
+ Error releaseMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &RMR) {
+ for (const auto &E : RMR) {
+ sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size);
+
+ if (auto EC = sys::Memory::releaseMappedMemory(MB))
+ return make_error<StringError>("error release memory: " + EC.message(),
+ inconvertibleErrorCode());
+ }
+ return Error::success();
+ }
+
+ Expected<tpctypes::DylibHandle> loadDylib(const std::string &Path) {
+ std::string ErrMsg;
+ const char *DLPath = !Path.empty() ? Path.c_str() : nullptr;
+ auto DL = sys::DynamicLibrary::getPermanentLibrary(DLPath, &ErrMsg);
+ if (!DL.isValid())
+ return make_error<StringError>(std::move(ErrMsg),
+ inconvertibleErrorCode());
+
+ tpctypes::DylibHandle H = Dylibs.size();
+ Dylibs[H] = std::move(DL);
+ return H;
+ }
+
+ Expected<std::vector<tpctypes::LookupResult>>
+ lookupSymbols(const std::vector<orcrpctpc::RemoteLookupRequest> &Request) {
+ std::vector<tpctypes::LookupResult> Result;
+
+ for (const auto &E : Request) {
+ auto I = Dylibs.find(E.first);
+ if (I == Dylibs.end())
+ return make_error<StringError>("Unrecognized handle",
+ inconvertibleErrorCode());
+ auto &DL = I->second;
+ Result.push_back({});
+
+ for (const auto &KV : E.second) {
+ auto &SymString = KV.first;
+ bool WeakReference = KV.second;
+
+ const char *Sym = SymString.c_str();
+#ifdef __APPLE__
+ if (*Sym == '_')
+ ++Sym;
+#endif
+
+ void *Addr = DL.getAddressOfSymbol(Sym);
+ if (!Addr && !WeakReference)
+ return make_error<StringError>(Twine("Missing definition for ") + Sym,
+ inconvertibleErrorCode());
+
+ Result.back().push_back(pointerToJITTargetAddress(Addr));
+ }
+ }
+
+ return Result;
+ }
+
+ int32_t runMain(JITTargetAddress MainFnAddr,
+ const std::vector<std::string> &Args) {
+ Optional<StringRef> ProgramNameOverride;
+ if (ProgramName)
+ ProgramNameOverride = *ProgramName;
+
+ return runAsMain(
+ jitTargetAddressToFunction<int (*)(int, char *[])>(MainFnAddr), Args,
+ ProgramNameOverride);
+ }
+
+ tpctypes::WrapperFunctionResult
+ runWrapper(JITTargetAddress WrapperFnAddr,
+ const std::vector<uint8_t> &ArgBuffer) {
+ using WrapperFnTy = tpctypes::CWrapperFunctionResult (*)(
+ const uint8_t *Data, uint64_t Size);
+ auto *WrapperFn = jitTargetAddressToFunction<WrapperFnTy>(WrapperFnAddr);
+ return WrapperFn(ArgBuffer.data(), ArgBuffer.size());
+ }
+
+ void closeConnection() { Finished = true; }
+
+ std::string TripleStr;
+ uint64_t PageSize = 0;
+ Optional<std::string> ProgramName;
+ RPCEndpointT &EP;
+ std::atomic<bool> Finished{false};
+ DenseMap<tpctypes::DylibHandle, sys::DynamicLibrary> Dylibs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
new file mode 100644
index 000000000000..811c50e3ce4d
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
@@ -0,0 +1,41 @@
+//===----- RegisterEHFrames.h -- Register EH frame sections -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Support for dynamically registering and deregistering eh-frame sections
+// in-process via libunwind.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H
+
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+/// Register frames in the given eh-frame section with libunwind.
+Error registerEHFrameSection(const void *EHFrameSectionAddr,
+ size_t EHFrameSectionSize);
+
+/// Unregister frames in the given eh-frame section with libunwind.
+Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
+ size_t EHFrameSectionSize);
+
+} // end namespace orc
+} // end namespace llvm
+
+extern "C" llvm::orc::tpctypes::CWrapperFunctionResult
+llvm_orc_registerEHFrameSectionWrapper(uint8_t *Data, uint64_t Size);
+
+extern "C" llvm::orc::tpctypes::CWrapperFunctionResult
+llvm_orc_deregisterEHFrameSectionWrapper(uint8_t *Data, uint64_t Size);
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h
new file mode 100644
index 000000000000..1d2f6d2be089
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h
@@ -0,0 +1,38 @@
+//===-- TargetExecutionUtils.h - Utils for execution in target --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for execution in the target process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_TARGETEXECUTIONUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_TARGETEXECUTIONUTILS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace llvm {
+namespace orc {
+
+/// Run a main function, returning the result.
+///
+/// If the optional ProgramName argument is given then it will be inserted
+/// before the strings in Args as the first argument to the called function.
+///
+/// It is legal to have an empty argument list and no program name, however
+/// many main functions will expect a name argument at least, and will fail
+/// if none is provided.
+int runAsMain(int (*Main)(int, char *[]), ArrayRef<std::string> Args,
+ Optional<StringRef> ProgramName = None);
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_TARGETEXECUTIONUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h
new file mode 100644
index 000000000000..b60b1ca6e372
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h
@@ -0,0 +1,218 @@
+//===--- TargetProcessControl.h - Target process control APIs ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for interacting with target processes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
+
+#include <future>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+/// TargetProcessControl supports interaction with a JIT target process.
+class TargetProcessControl {
+public:
+ /// APIs for manipulating memory in the target process.
+ class MemoryAccess {
+ public:
+ /// Callback function for asynchronous writes.
+ using WriteResultFn = unique_function<void(Error)>;
+
+ virtual ~MemoryAccess();
+
+ virtual void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
+
+ virtual void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
+
+ virtual void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
+
+ virtual void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
+
+ virtual void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) = 0;
+
+ Error writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws) {
+ std::promise<MSVCPError> ResultP;
+ auto ResultF = ResultP.get_future();
+ writeUInt8s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ return ResultF.get();
+ }
+
+ Error writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws) {
+ std::promise<MSVCPError> ResultP;
+ auto ResultF = ResultP.get_future();
+ writeUInt16s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ return ResultF.get();
+ }
+
+ Error writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws) {
+ std::promise<MSVCPError> ResultP;
+ auto ResultF = ResultP.get_future();
+ writeUInt32s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ return ResultF.get();
+ }
+
+ Error writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws) {
+ std::promise<MSVCPError> ResultP;
+ auto ResultF = ResultP.get_future();
+ writeUInt64s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ return ResultF.get();
+ }
+
+ Error writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws) {
+ std::promise<MSVCPError> ResultP;
+ auto ResultF = ResultP.get_future();
+ writeBuffers(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ return ResultF.get();
+ }
+ };
+
+ /// A pair of a dylib and a set of symbols to be looked up.
+ struct LookupRequest {
+ LookupRequest(tpctypes::DylibHandle Handle, const SymbolLookupSet &Symbols)
+ : Handle(Handle), Symbols(Symbols) {}
+ tpctypes::DylibHandle Handle;
+ const SymbolLookupSet &Symbols;
+ };
+
+ virtual ~TargetProcessControl();
+
+ /// Intern a symbol name in the SymbolStringPool.
+ SymbolStringPtr intern(StringRef SymName) { return SSP->intern(SymName); }
+
+ /// Return a shared pointer to the SymbolStringPool for this instance.
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() const { return SSP; }
+
+ /// Return the Triple for the target process.
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ /// Get the page size for the target process.
+ unsigned getPageSize() const { return PageSize; }
+
+ /// Return a MemoryAccess object for the target process.
+ MemoryAccess &getMemoryAccess() const { return *MemAccess; }
+
+ /// Return a JITLinkMemoryManager for the target process.
+ jitlink::JITLinkMemoryManager &getMemMgr() const { return *MemMgr; }
+
+ /// Load the dynamic library at the given path and return a handle to it.
+ /// If LibraryPath is null this function will return the global handle for
+ /// the target process.
+ virtual Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) = 0;
+
+ /// Search for symbols in the target process.
+ ///
+ /// The result of the lookup is a 2-dimentional array of target addresses
+ /// that correspond to the lookup order. If a required symbol is not
+ /// found then this method will return an error. If a weakly referenced
+ /// symbol is not found then it be assigned a '0' value in the result.
+ /// that correspond to the lookup order.
+ virtual Expected<std::vector<tpctypes::LookupResult>>
+ lookupSymbols(ArrayRef<LookupRequest> Request) = 0;
+
+ /// Run function with a main-like signature.
+ virtual Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+ ArrayRef<std::string> Args) = 0;
+
+ /// Run a wrapper function with signature:
+ ///
+ /// \code{.cpp}
+ /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
+ /// \endcode{.cpp}
+ ///
+ virtual Expected<tpctypes::WrapperFunctionResult>
+ runWrapper(JITTargetAddress WrapperFnAddr, ArrayRef<uint8_t> ArgBuffer) = 0;
+
+ /// Disconnect from the target process.
+ ///
+ /// This should be called after the JIT session is shut down.
+ virtual Error disconnect() = 0;
+
+protected:
+ TargetProcessControl(std::shared_ptr<SymbolStringPool> SSP)
+ : SSP(std::move(SSP)) {}
+
+ std::shared_ptr<SymbolStringPool> SSP;
+ Triple TargetTriple;
+ unsigned PageSize = 0;
+ MemoryAccess *MemAccess = nullptr;
+ jitlink::JITLinkMemoryManager *MemMgr = nullptr;
+};
+
+/// A TargetProcessControl implementation targeting the current process.
+class SelfTargetProcessControl : public TargetProcessControl,
+ private TargetProcessControl::MemoryAccess {
+public:
+ SelfTargetProcessControl(
+ std::shared_ptr<SymbolStringPool> SSP, Triple TargetTriple,
+ unsigned PageSize, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr);
+
+ /// Create a SelfTargetProcessControl with the given memory manager.
+ /// If no memory manager is given a jitlink::InProcessMemoryManager will
+ /// be used by default.
+ static Expected<std::unique_ptr<SelfTargetProcessControl>>
+ Create(std::shared_ptr<SymbolStringPool> SSP,
+ std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr = nullptr);
+
+ Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override;
+
+ Expected<std::vector<tpctypes::LookupResult>>
+ lookupSymbols(ArrayRef<LookupRequest> Request) override;
+
+ Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+ ArrayRef<std::string> Args) override;
+
+ Expected<tpctypes::WrapperFunctionResult>
+ runWrapper(JITTargetAddress WrapperFnAddr,
+ ArrayRef<uint8_t> ArgBuffer) override;
+
+ Error disconnect() override;
+
+private:
+ void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) override;
+
+ void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) override;
+
+ void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) override;
+
+ void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) override;
+
+ void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) override;
+
+ std::unique_ptr<jitlink::JITLinkMemoryManager> OwnedMemMgr;
+ char GlobalManglingPrefix = 0;
+ std::vector<std::unique_ptr<sys::DynamicLibrary>> DynamicLibraries;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
index 58c96737e580..82f2b7464953 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
@@ -162,7 +162,7 @@ using GVModifier = std::function<void(GlobalValue &)>;
/// Clones the given module on to a new context.
ThreadSafeModule
-cloneToNewContext(ThreadSafeModule &TSMW,
+cloneToNewContext(const ThreadSafeModule &TSMW,
GVPredicate ShouldCloneDef = GVPredicate(),
GVModifier UpdateClonedDefSource = GVModifier());
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
index 1b3ce1127e4a..9b83092e653f 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -271,11 +271,11 @@ private:
object::OwningBinary<object::ObjectFile> O,
RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver,
bool ProcessAllSections,
- unique_function<Error(const object::ObjectFile &Obj,
- std::unique_ptr<LoadedObjectInfo>,
+ unique_function<Error(const object::ObjectFile &Obj, LoadedObjectInfo &,
std::map<StringRef, JITEvaluatedSymbol>)>
OnLoaded,
- unique_function<void(object::OwningBinary<object::ObjectFile> O, Error)>
+ unique_function<void(object::OwningBinary<object::ObjectFile> O,
+ std::unique_ptr<LoadedObjectInfo>, Error)>
OnEmitted);
// RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
@@ -298,10 +298,11 @@ void jitLinkForORC(
RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver,
bool ProcessAllSections,
unique_function<Error(const object::ObjectFile &Obj,
- std::unique_ptr<RuntimeDyld::LoadedObjectInfo>,
+ RuntimeDyld::LoadedObjectInfo &,
std::map<StringRef, JITEvaluatedSymbol>)>
OnLoaded,
- unique_function<void(object::OwningBinary<object::ObjectFile>, Error)>
+ unique_function<void(object::OwningBinary<object::ObjectFile>,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>, Error)>
OnEmitted);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/FileCheck.h b/contrib/llvm-project/llvm/include/llvm/FileCheck/FileCheck.h
index 2f0e641394d5..b44ab025694b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/FileCheck.h
+++ b/contrib/llvm-project/llvm/include/llvm/FileCheck/FileCheck.h
@@ -1,4 +1,4 @@
-//==-- llvm/Support/FileCheck.h ---------------------------*- C++ -*-==//
+//==-- llvm/FileCheck/FileCheck.h --------------------------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,13 +10,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SUPPORT_FILECHECK_H
-#define LLVM_SUPPORT_FILECHECK_H
+#ifndef LLVM_FILECHECK_FILECHECK_H
+#define LLVM_FILECHECK_FILECHECK_H
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SourceMgr.h"
+#include <bitset>
#include <string>
#include <vector>
@@ -30,6 +31,7 @@ struct FileCheckRequest {
std::vector<StringRef> ImplicitCheckNot;
std::vector<StringRef> GlobalDefines;
bool AllowEmptyInput = false;
+ bool AllowUnusedPrefixes = false;
bool MatchFullLines = false;
bool IgnoreCase = false;
bool IsDefaultCheckPrefix = false;
@@ -39,10 +41,6 @@ struct FileCheckRequest {
bool VerboseVerbose = false;
};
-//===----------------------------------------------------------------------===//
-// Summary of a FileCheck diagnostic.
-//===----------------------------------------------------------------------===//
-
namespace Check {
enum FileCheckKind {
@@ -67,12 +65,23 @@ enum FileCheckKind {
CheckBadCount
};
+enum FileCheckKindModifier {
+ /// Modifies directive to perform literal match.
+ ModifierLiteral = 0,
+
+ // The number of modifier.
+ Size
+};
+
class FileCheckType {
FileCheckKind Kind;
int Count; ///< optional Count for some checks
+ /// Modifers for the check directive.
+ std::bitset<FileCheckKindModifier::Size> Modifiers;
public:
- FileCheckType(FileCheckKind Kind = CheckNone) : Kind(Kind), Count(1) {}
+ FileCheckType(FileCheckKind Kind = CheckNone)
+ : Kind(Kind), Count(1), Modifiers() {}
FileCheckType(const FileCheckType &) = default;
FileCheckType &operator=(const FileCheckType &) = default;
@@ -81,11 +90,23 @@ public:
int getCount() const { return Count; }
FileCheckType &setCount(int C);
+ bool isLiteralMatch() const {
+ return Modifiers[FileCheckKindModifier::ModifierLiteral];
+ }
+ FileCheckType &setLiteralMatch(bool Literal = true) {
+ Modifiers.set(FileCheckKindModifier::ModifierLiteral, Literal);
+ return *this;
+ }
+
// \returns a description of \p Prefix.
std::string getDescription(StringRef Prefix) const;
+
+ // \returns a description of \p Modifiers.
+ std::string getModifiersDescription() const;
};
} // namespace Check
+/// Summary of a FileCheck diagnostic.
struct FileCheckDiag {
/// What is the FileCheck directive for this diagnostic?
Check::FileCheckType CheckTy;
@@ -131,8 +152,12 @@ struct FileCheckDiag {
unsigned InputStartCol;
unsigned InputEndLine;
unsigned InputEndCol;
+ /// A note to replace the one normally indicated by MatchTy, or the empty
+ /// string if none.
+ std::string Note;
FileCheckDiag(const SourceMgr &SM, const Check::FileCheckType &CheckTy,
- SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange);
+ SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange,
+ StringRef Note = "");
};
class FileCheckPatternContext;
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/contrib/llvm-project/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
index 26049ca60db3..e40f40f74c73 100644
--- a/contrib/llvm-project/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
@@ -35,10 +35,10 @@ class DirectiveLanguage {
// Make the enum values available in the namespace. This allows us to
// write something like Enum_X if we have a `using namespace cppNamespace`.
- bit makeEnumAvailableInNamespace = 0;
+ bit makeEnumAvailableInNamespace = false;
// Generate include and macro to enable LLVM BitmaskEnum.
- bit enableBitmaskEnumInNamespace = 0;
+ bit enableBitmaskEnumInNamespace = false;
// Header file included in the implementation code generated. Ususally the
// output file of the declaration code generation. Can be left blank.
@@ -46,6 +46,24 @@ class DirectiveLanguage {
// EnumSet class name used for clauses to generated the allowed clauses map.
string clauseEnumSetClass = "";
+
+ // Class holding the clauses in the flang parse-tree.
+ string flangClauseBaseClass = "";
+}
+
+// Information about values accepted by enum-like clauses
+class ClauseVal<string n, int v, bit uv> {
+ // Name of the clause value.
+ string name = n;
+
+ // Integer value of the clause.
+ int value = v;
+
+ // Can user specify this value?
+ bit isUserValue = uv;
+
+ // Set clause value used by default when unknown.
+ bit isDefault = false;
}
// Information about a specific clause.
@@ -57,18 +75,32 @@ class Clause<string c> {
string alternativeName = "";
// Optional class holding value of the clause in clang AST.
- string clangClass = ?;
+ string clangClass = "";
// Optional class holding value of the clause in flang AST.
- string flangClass = ?;
+ string flangClass = "";
+
+ // If set to true, value is optional. Not optional by default.
+ bit isValueOptional = false;
+
+ // Name of enum when there is a list of allowed clause values.
+ string enumClauseValue = "";
+
+ // List of allowed clause values
+ list<ClauseVal> allowedClauseValues = [];
+ // If set to true, value class is part of a list. Single class by default.
+ bit isValueList = false;
+
+ // Define a default value such as "*".
+ string defaultValue = "";
// Is clause implicit? If clause is set as implicit, the default kind will
// be return in get<LanguageName>ClauseKind instead of their own kind.
- bit isImplicit = 0;
+ bit isImplicit = false;
- // Set directive used by default when unknown. Function returning the kind
+ // Set clause used by default when unknown. Function returning the kind
// of enumeration will use this clause as the default.
- bit isDefault = 0;
+ bit isDefault = false;
}
// Hold information about clause validity by version.
@@ -92,6 +124,10 @@ class Directive<string d> {
// function.
string alternativeName = "";
+ // Clauses cannot appear twice in the three allowed lists below. Also, since
+ // required implies allowed, the same clause cannot appear in both the
+ // allowedClauses and requiredClauses lists.
+
// List of allowed clauses for the directive.
list<VersionedClause> allowedClauses = [];
@@ -105,5 +141,5 @@ class Directive<string d> {
list<VersionedClause> requiredClauses = [];
// Set directive used by default when unknown.
- bit isDefault = 0;
+ bit isDefault = false;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenACC/ACC.td b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenACC/ACC.td
index e96b7e846662..6045a9ac2af0 100644
--- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenACC/ACC.td
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenACC/ACC.td
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This is the definition file for OpenACC directives and clauses.
+// This is the definition file for OpenACC 3.1 directives and clauses.
//
//===----------------------------------------------------------------------===//
@@ -21,32 +21,34 @@ def OpenACC : DirectiveLanguage {
let cppNamespace = "acc"; // final namespace will be llvm::acc
let directivePrefix = "ACCD_";
let clausePrefix = "ACCC_";
- let makeEnumAvailableInNamespace = 1;
- let enableBitmaskEnumInNamespace = 1;
+ let makeEnumAvailableInNamespace = true;
+ let enableBitmaskEnumInNamespace = true;
let includeHeader = "llvm/Frontend/OpenACC/ACC.h.inc";
let clauseEnumSetClass = "AccClauseSet";
+ let flangClauseBaseClass = "AccClause";
}
//===----------------------------------------------------------------------===//
// Definition of OpenACC clauses
//===----------------------------------------------------------------------===//
-// 2.9.6
-def ACCC_Auto : Clause<"auto"> {}
-
// 2.16.1
def ACCC_Async : Clause<"async"> {
- let flangClass = "std::optional<ScalarIntExpr>";
+ let flangClass = "ScalarIntExpr";
+ let isValueOptional = true;
}
-// 2.7.11
+// 2.9.7
+def ACCC_Auto : Clause<"auto"> {}
+
+// 2.7.12
def ACCC_Attach : Clause<"attach"> {
let flangClass = "AccObjectList";
}
// 2.15.1
def ACCC_Bind : Clause<"bind"> {
- let flangClass = "Name";
+ let flangClass = "AccBindClause";
}
// 2.12
@@ -58,41 +60,49 @@ def ACCC_Collapse : Clause<"collapse"> {
let flangClass = "ScalarIntConstantExpr";
}
-// 2.7.5
+// 2.7.6
def ACCC_Copy : Clause<"copy"> {
let flangClass = "AccObjectList";
}
-// 2.7.6
+// 2.7.7
def ACCC_Copyin : Clause<"copyin"> {
let flangClass = "AccObjectListWithModifier";
}
-// 2.7.7
+// 2.7.8
def ACCC_Copyout : Clause<"copyout"> {
let flangClass = "AccObjectListWithModifier";
}
-// 2.7.8
+// 2.7.9
def ACCC_Create : Clause<"create"> {
let flangClass = "AccObjectListWithModifier";
}
-// 2.5.14
+// 2.5.15
+def ACC_Default_none : ClauseVal<"none", 1, 1> { let isDefault = 1; }
+def ACC_Default_present : ClauseVal<"present", 0, 1> {}
+
def ACCC_Default : Clause<"default"> {
let flangClass = "AccDefaultClause";
+ let enumClauseValue = "DefaultValue";
+ let allowedClauseValues = [
+ ACC_Default_present,
+ ACC_Default_none
+ ];
}
-// 2.4.12
+// 2.14.3
def ACCC_DefaultAsync : Clause<"default_async"> {
let flangClass = "ScalarIntExpr";
}
-// 2.7.10
+// 2.7.11
def ACCC_Delete : Clause<"delete"> {
let flangClass = "AccObjectList";
}
-// 2.7.12
+// 2.7.13
def ACCC_Detach : Clause<"detach"> {
let flangClass = "AccObjectList";
}
@@ -102,38 +112,41 @@ def ACCC_Device : Clause<"device"> {
let flangClass = "AccObjectList";
}
-// 2.14.1
+// 2.14.1 - 2.14.2
def ACCC_DeviceNum : Clause<"device_num"> {
- let flangClass = "ScalarIntConstantExpr";
+ let flangClass = "ScalarIntExpr";
}
-// 2.7.3
+// 2.7.4
def ACCC_DevicePtr : Clause<"deviceptr"> {
let flangClass = "AccObjectList";
}
-// 2.13
+// 2.13.1
def ACCC_DeviceResident : Clause<"device_resident"> {
let flangClass = "AccObjectList";
}
// 2.4
def ACCC_DeviceType : Clause<"device_type"> {
- // (DeviceType, "*"
- let flangClass = "std::optional<std::list<Name>>";
+ let flangClass = "ScalarIntExpr";
+ let defaultValue = "*";
+ let isValueOptional = true;
+ let isValueList = true;
}
// 2.6.6
def ACCC_Finalize : Clause<"finalize"> {}
-// 2.5.12
+// 2.5.13
def ACCC_FirstPrivate : Clause<"firstprivate"> {
let flangClass = "AccObjectList";
}
// 2.9.2
def ACCC_Gang : Clause<"gang"> {
- let flangClass = "std::optional<AccGangArgument>";
+ let flangClass = "AccGangArgument";
+ let isValueOptional = true;
}
// 2.14.4
@@ -141,7 +154,7 @@ def ACCC_Host : Clause<"host"> {
let flangClass = "AccObjectList";
}
-// 2.5.4
+// 2.5.5
def ACCC_If : Clause <"if"> {
let flangClass = "ScalarLogicalExpr";
}
@@ -149,15 +162,15 @@ def ACCC_If : Clause <"if"> {
// 2.14.4
def ACCC_IfPresent : Clause<"if_present"> {}
-// 2.9.9
+// 2.9.6
def ACCC_Independent : Clause<"independent"> {}
-// 2.13
+// 2.13.3
def ACCC_Link : Clause<"link"> {
let flangClass = "AccObjectList";
}
-// 2.7.9
+// 2.7.10
def ACCC_NoCreate : Clause<"no_create"> {
let flangClass = "AccObjectList";
}
@@ -165,29 +178,29 @@ def ACCC_NoCreate : Clause<"no_create"> {
// 2.15.1
def ACCC_NoHost : Clause<"nohost"> {}
-// 2.5.8
+// 2.5.9
def ACCC_NumGangs : Clause<"num_gangs"> {
let flangClass = "ScalarIntExpr";
}
-// 2.5.9
+// 2.5.10
def ACCC_NumWorkers : Clause<"num_workers"> {
let flangClass = "ScalarIntExpr";
}
-// 2.7.4
+// 2.7.5
def ACCC_Present : Clause<"present"> {
let flangClass = "AccObjectList";
}
-// 2.5.11
+// 2.5.12
def ACCC_Private : Clause<"private"> {
let flangClass = "AccObjectList";
}
-// 2.9.7
+// 2.9.8
def ACCC_Tile : Clause <"tile"> {
- let flangClass = "AccSizeExprList";
+ let flangClass = "AccTileExprList";
}
// 2.8.1
@@ -198,14 +211,14 @@ def ACCC_UseDevice : Clause <"use_device"> {
// 2.12
def ACCC_Read : Clause<"read"> {}
-// 2.5.13
+// 2.5.14
def ACCC_Reduction : Clause<"reduction"> {
let flangClass = "AccObjectListWithReduction";
}
-// 2.5.5
+// 2.5.6
def ACCC_Self : Clause<"self"> {
- let flangClass = "std::optional<ScalarLogicalExpr>";
+ let flangClass = "AccSelfClause";
}
// 2.9.5
@@ -213,29 +226,32 @@ def ACCC_Seq : Clause<"seq"> {}
// 2.9.4
def ACCC_Vector : Clause<"vector"> {
- let flangClass = "std::optional<ScalarIntExpr>";
+ let flangClass = "ScalarIntExpr";
+ let isValueOptional = true;
}
-// 2.5.10
+// 2.5.11
def ACCC_VectorLength : Clause<"vector_length"> {
let flangClass = "ScalarIntExpr";
}
// 2.16.2
def ACCC_Wait : Clause<"wait"> {
- let flangClass = "std::optional<AccWaitArgument>";
+ let flangClass = "AccWaitArgument";
+ let isValueOptional = true;
}
// 2.9.3
def ACCC_Worker: Clause<"worker"> {
- let flangClass = "std::optional<ScalarIntExpr>";
+ let flangClass = "ScalarIntExpr";
+ let isValueOptional = true;
}
// 2.12
def ACCC_Write : Clause<"write"> {}
def ACCC_Unknown : Clause<"unknown"> {
- let isDefault = 1;
+ let isDefault = true;
}
//===----------------------------------------------------------------------===//
@@ -248,7 +264,8 @@ def ACC_Atomic : Directive<"atomic"> {}
// 2.6.5
def ACC_Data : Directive<"data"> {
let allowedOnceClauses = [
- VersionedClause<ACCC_If>
+ VersionedClause<ACCC_If>,
+ VersionedClause<ACCC_Default>
];
let requiredClauses = [
VersionedClause<ACCC_Attach>,
@@ -277,7 +294,7 @@ def ACC_Declare : Directive<"declare"> {
];
}
-// 2.5.2
+// 2.5.3
def ACC_Kernels : Directive<"kernels"> {
let allowedClauses = [
VersionedClause<ACCC_Attach>,
@@ -288,7 +305,8 @@ def ACC_Kernels : Directive<"kernels"> {
VersionedClause<ACCC_DeviceType>,
VersionedClause<ACCC_NoCreate>,
VersionedClause<ACCC_Present>,
- VersionedClause<ACCC_DevicePtr>
+ VersionedClause<ACCC_DevicePtr>,
+ VersionedClause<ACCC_Wait>
];
let allowedOnceClauses = [
VersionedClause<ACCC_Async>,
@@ -297,8 +315,7 @@ def ACC_Kernels : Directive<"kernels"> {
VersionedClause<ACCC_NumGangs>,
VersionedClause<ACCC_NumWorkers>,
VersionedClause<ACCC_Self>,
- VersionedClause<ACCC_VectorLength>,
- VersionedClause<ACCC_Wait>
+ VersionedClause<ACCC_VectorLength>
];
}
@@ -330,8 +347,10 @@ def ACC_Parallel : Directive<"parallel"> {
];
}
-// 2.5.3
+// 2.5.2
def ACC_Serial : Directive<"serial"> {
+ // Spec line 950-951: clause is as for the parallel construct except that the
+ // num_gangs, num_workers, and vector_length clauses are not permitted.
let allowedClauses = [
VersionedClause<ACCC_Attach>,
VersionedClause<ACCC_Copy>,
@@ -340,10 +359,10 @@ def ACC_Serial : Directive<"serial"> {
VersionedClause<ACCC_Create>,
VersionedClause<ACCC_DevicePtr>,
VersionedClause<ACCC_DeviceType>,
- VersionedClause<ACCC_FirstPrivate>,
VersionedClause<ACCC_NoCreate>,
VersionedClause<ACCC_Present>,
VersionedClause<ACCC_Private>,
+ VersionedClause<ACCC_FirstPrivate>,
VersionedClause<ACCC_Wait>
];
let allowedOnceClauses = [
@@ -406,9 +425,15 @@ def ACC_Routine : Directive<"routine"> {
// 2.14.3
def ACC_Set : Directive<"set"> {
let allowedOnceClauses = [
+ VersionedClause<ACCC_DefaultAsync>,
+ VersionedClause<ACCC_DeviceNum>,
+ VersionedClause<ACCC_DeviceType>,
VersionedClause<ACCC_If>
];
let requiredClauses = [
+ // The three following clauses are also in allowedOnceClauses list due to
+ // restriction 2255 - Two instances of the same clause may not appear on the
+ // same directive.
VersionedClause<ACCC_DefaultAsync>,
VersionedClause<ACCC_DeviceNum>,
VersionedClause<ACCC_DeviceType>
@@ -478,6 +503,8 @@ def ACC_ExitData : Directive<"exit data"> {
VersionedClause<ACCC_Detach>
];
}
+
+// 2.8
def ACC_HostData : Directive<"host_data"> {
let allowedClauses = [
VersionedClause<ACCC_If>,
@@ -508,7 +535,6 @@ def ACC_KernelsLoop : Directive<"kernels loop"> {
VersionedClause<ACCC_Default>,
VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_If>,
- VersionedClause<ACCC_Independent>,
VersionedClause<ACCC_NumGangs>,
VersionedClause<ACCC_NumWorkers>,
VersionedClause<ACCC_Reduction>,
@@ -600,5 +626,5 @@ def ACC_SerialLoop : Directive<"serial loop"> {
}
def ACC_Unknown : Directive<"unknown"> {
- let isDefault = 1;
-} \ No newline at end of file
+ let isDefault = true;
+}
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMP.td b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMP.td
index a565bdf90b3f..10fa5a37b891 100644
--- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -21,10 +21,11 @@ def OpenMP : DirectiveLanguage {
let cppNamespace = "omp"; // final namespace will be llvm::omp
let directivePrefix = "OMPD_";
let clausePrefix = "OMPC_";
- let makeEnumAvailableInNamespace = 1;
- let enableBitmaskEnumInNamespace = 1;
+ let makeEnumAvailableInNamespace = true;
+ let enableBitmaskEnumInNamespace = true;
let includeHeader = "llvm/Frontend/OpenMP/OMP.h.inc";
let clauseEnumSetClass = "OmpClauseSet";
+ let flangClauseBaseClass = "OmpClause";
}
//===----------------------------------------------------------------------===//
@@ -33,39 +34,120 @@ def OpenMP : DirectiveLanguage {
def OMPC_Allocator : Clause<"allocator"> {
let clangClass = "OMPAllocatorClause";
+ let flangClass = "ScalarIntExpr";
+}
+def OMPC_If : Clause<"if"> {
+ let clangClass = "OMPIfClause";
+ let flangClass = "OmpIfClause";
+}
+def OMPC_Final : Clause<"final"> {
+ let clangClass = "OMPFinalClause";
+ let flangClass = "ScalarLogicalExpr";
}
-def OMPC_If : Clause<"if"> { let clangClass = "OMPIfClause"; }
-def OMPC_Final : Clause<"final"> { let clangClass = "OMPFinalClause"; }
def OMPC_NumThreads : Clause<"num_threads"> {
let clangClass = "OMPNumThreadsClause";
+ let flangClass = "ScalarIntExpr";
+}
+def OMPC_SafeLen : Clause<"safelen"> {
+ let clangClass = "OMPSafelenClause";
+ let flangClass = "ScalarIntConstantExpr";
+}
+def OMPC_SimdLen : Clause<"simdlen"> {
+ let clangClass = "OMPSimdlenClause";
+ let flangClass = "ScalarIntConstantExpr";
+}
+def OMPC_Collapse : Clause<"collapse"> {
+ let clangClass = "OMPCollapseClause";
+ let flangClass = "ScalarIntConstantExpr";
+}
+def OMPC_Default : Clause<"default"> {
+ let clangClass = "OMPDefaultClause";
+ let flangClass = "OmpDefaultClause";
+}
+def OMPC_Private : Clause<"private"> {
+ let clangClass = "OMPPrivateClause";
+ let flangClass = "OmpObjectList";
}
-def OMPC_SafeLen : Clause<"safelen"> { let clangClass = "OMPSafelenClause"; }
-def OMPC_SimdLen : Clause<"simdlen"> { let clangClass = "OMPSimdlenClause"; }
-def OMPC_Collapse : Clause<"collapse"> { let clangClass = "OMPCollapseClause"; }
-def OMPC_Default : Clause<"default"> { let clangClass = "OMPDefaultClause"; }
-def OMPC_Private : Clause<"private"> { let clangClass = "OMPPrivateClause"; }
def OMPC_FirstPrivate : Clause<"firstprivate"> {
let clangClass = "OMPFirstprivateClause";
+ let flangClass = "OmpObjectList";
}
def OMPC_LastPrivate : Clause<"lastprivate"> {
let clangClass = "OMPLastprivateClause";
+ let flangClass = "OmpObjectList";
+}
+def OMPC_Shared : Clause<"shared"> {
+ let clangClass = "OMPSharedClause";
+ let flangClass = "OmpObjectList";
}
-def OMPC_Shared : Clause<"shared"> { let clangClass = "OMPSharedClause"; }
def OMPC_Reduction : Clause<"reduction"> {
let clangClass = "OMPReductionClause";
+ let flangClass = "OmpReductionClause";
+}
+def OMPC_Linear : Clause<"linear"> {
+ let clangClass = "OMPLinearClause";
+ let flangClass = "OmpLinearClause";
+}
+def OMPC_Aligned : Clause<"aligned"> {
+ let clangClass = "OMPAlignedClause";
+ let flangClass = "OmpAlignedClause";
+}
+def OMPC_Copyin : Clause<"copyin"> {
+ let clangClass = "OMPCopyinClause";
+ let flangClass = "OmpObjectList";
}
-def OMPC_Linear : Clause<"linear"> { let clangClass = "OMPLinearClause"; }
-def OMPC_Aligned : Clause<"aligned"> { let clangClass = "OMPAlignedClause"; }
-def OMPC_Copyin : Clause<"copyin"> { let clangClass = "OMPCopyinClause"; }
def OMPC_CopyPrivate : Clause<"copyprivate"> {
let clangClass = "OMPCopyprivateClause";
+ let flangClass = "OmpObjectList";
}
+def OMP_PROC_BIND_master : ClauseVal<"master",2,1> {}
+def OMP_PROC_BIND_close : ClauseVal<"close",3,1> {}
+def OMP_PROC_BIND_spread : ClauseVal<"spread",4,1> {}
+def OMP_PROC_BIND_default : ClauseVal<"default",5,0> {}
+def OMP_PROC_BIND_unknown : ClauseVal<"unknown",6,0> { let isDefault = true; }
def OMPC_ProcBind : Clause<"proc_bind"> {
let clangClass = "OMPProcBindClause";
+ let flangClass = "OmpProcBindClause";
+ let enumClauseValue = "ProcBindKind";
+ let allowedClauseValues = [
+ OMP_PROC_BIND_master,
+ OMP_PROC_BIND_close,
+ OMP_PROC_BIND_spread,
+ OMP_PROC_BIND_default,
+ OMP_PROC_BIND_unknown
+ ];
+}
+
+// static and auto are C++ keywords so need a capital to disambiguate.
+def OMP_SCHEDULE_Static : ClauseVal<"Static", 2, 1> {}
+def OMP_SCHEDULE_Dynamic : ClauseVal<"Dynamic", 3, 1> {}
+def OMP_SCHEDULE_Guided : ClauseVal<"Guided", 4, 1> {}
+def OMP_SCHEDULE_Auto : ClauseVal<"Auto", 5, 1> {}
+def OMP_SCHEDULE_Runtime : ClauseVal<"Runtime", 6, 1> {}
+def OMP_SCHEDULE_Default : ClauseVal<"Default", 7, 0> { let isDefault = 1; }
+
+def OMPC_Schedule : Clause<"schedule"> {
+ let clangClass = "OMPScheduleClause";
+ let flangClass = "OmpScheduleClause";
+ let enumClauseValue = "ScheduleKind";
+ let allowedClauseValues = [
+ OMP_SCHEDULE_Static,
+ OMP_SCHEDULE_Dynamic,
+ OMP_SCHEDULE_Guided,
+ OMP_SCHEDULE_Auto,
+ OMP_SCHEDULE_Runtime,
+ OMP_SCHEDULE_Default
+ ];
+}
+
+def OMPC_Ordered : Clause<"ordered"> {
+ let clangClass = "OMPOrderedClause";
+ let flangClass = "ScalarIntConstantExpr";
+ let isValueOptional = true;
+}
+def OMPC_NoWait : Clause<"nowait"> {
+ let clangClass = "OMPNowaitClause";
}
-def OMPC_Schedule : Clause<"schedule"> { let clangClass = "OMPScheduleClause"; }
-def OMPC_Ordered : Clause<"ordered"> { let clangClass = "OMPOrderedClause"; }
-def OMPC_NoWait : Clause<"nowait"> { let clangClass = "OMPNowaitClause"; }
def OMPC_Untied : Clause<"untied"> { let clangClass = "OMPUntiedClause"; }
def OMPC_Mergeable : Clause<"mergeable"> {
let clangClass = "OMPMergeableClause";
@@ -79,50 +161,77 @@ def OMPC_AcqRel : Clause<"acq_rel"> { let clangClass = "OMPAcqRelClause"; }
def OMPC_Acquire : Clause<"acquire"> { let clangClass = "OMPAcquireClause"; }
def OMPC_Release : Clause<"release"> { let clangClass = "OMPReleaseClause"; }
def OMPC_Relaxed : Clause<"relaxed"> { let clangClass = "OMPRelaxedClause"; }
-def OMPC_Depend : Clause<"depend"> { let clangClass = "OMPDependClause"; }
-def OMPC_Device : Clause<"device"> { let clangClass = "OMPDeviceClause"; }
+def OMPC_Depend : Clause<"depend"> {
+ let clangClass = "OMPDependClause";
+ let flangClass = "OmpDependClause";
+}
+def OMPC_Device : Clause<"device"> {
+ let clangClass = "OMPDeviceClause";
+ let flangClass = "ScalarIntExpr";
+}
def OMPC_Threads : Clause<"threads"> { let clangClass = "OMPThreadsClause"; }
def OMPC_Simd : Clause<"simd"> { let clangClass = "OMPSIMDClause"; }
-def OMPC_Map : Clause<"map"> { let clangClass = "OMPMapClause"; }
+def OMPC_Map : Clause<"map"> {
+ let clangClass = "OMPMapClause";
+ let flangClass = "OmpMapClause";
+}
def OMPC_NumTeams : Clause<"num_teams"> {
let clangClass = "OMPNumTeamsClause";
+ let flangClass = "ScalarIntExpr";
}
def OMPC_ThreadLimit : Clause<"thread_limit"> {
let clangClass = "OMPThreadLimitClause";
+ let flangClass = "ScalarIntExpr";
}
def OMPC_Priority : Clause<"priority"> {
let clangClass = "OMPPriorityClause";
+ let flangClass = "ScalarIntExpr";
}
def OMPC_GrainSize : Clause<"grainsize"> {
let clangClass = "OMPGrainsizeClause";
+ let flangClass = "ScalarIntExpr";
}
def OMPC_NoGroup : Clause<"nogroup"> {
let clangClass = "OMPNogroupClause";
}
def OMPC_NumTasks : Clause<"num_tasks"> {
let clangClass = "OMPNumTasksClause";
+ let flangClass = "ScalarIntExpr";
}
def OMPC_Hint : Clause<"hint"> {
let clangClass = "OMPHintClause";
+ let flangClass = "ConstantExpr";
}
def OMPC_DistSchedule : Clause<"dist_schedule"> {
let clangClass = "OMPDistScheduleClause";
+ let flangClass = "ScalarIntExpr";
+ let isValueOptional = true;
}
def OMPC_DefaultMap : Clause<"defaultmap"> {
let clangClass = "OMPDefaultmapClause";
+ let flangClass = "OmpDefaultmapClause";
}
def OMPC_To : Clause<"to"> {
let clangClass = "OMPToClause";
+ let flangClass = "OmpObjectList";
+}
+def OMPC_From : Clause<"from"> {
+ let clangClass = "OMPFromClause";
+ let flangClass = "OmpObjectList";
}
-def OMPC_From : Clause<"from"> { let clangClass = "OMPFromClause"; }
def OMPC_UseDevicePtr : Clause<"use_device_ptr"> {
let clangClass = "OMPUseDevicePtrClause";
+ let flangClass = "Name";
+ let isValueList = true;
}
def OMPC_IsDevicePtr : Clause<"is_device_ptr"> {
let clangClass = "OMPIsDevicePtrClause";
+ let flangClass = "Name";
+ let isValueList = true;
}
def OMPC_TaskReduction : Clause<"task_reduction"> {
let clangClass = "OMPTaskReductionClause";
+ let flangClass = "OmpReductionClause";
}
def OMPC_InReduction : Clause<"in_reduction"> {
let clangClass = "OMPInReductionClause";
@@ -144,12 +253,19 @@ def OMPC_AtomicDefaultMemOrder : Clause<"atomic_default_mem_order"> {
}
def OMPC_Allocate : Clause<"allocate"> {
let clangClass = "OMPAllocateClause";
+ let flangClass = "OmpAllocateClause";
}
def OMPC_NonTemporal : Clause<"nontemporal"> {
let clangClass = "OMPNontemporalClause";
}
+
+def OMP_ORDER_concurrent : ClauseVal<"default",2,0> { let isDefault = 1; }
def OMPC_Order : Clause<"order"> {
let clangClass = "OMPOrderClause";
+ let enumClauseValue = "OrderKind";
+ let allowedClauseValues = [
+ OMP_ORDER_concurrent
+ ];
}
def OMPC_Destroy : Clause<"destroy"> {
let clangClass = "OMPDestroyClause";
@@ -172,26 +288,31 @@ def OMPC_Affinity : Clause<"affinity"> {
def OMPC_UseDeviceAddr : Clause<"use_device_addr"> {
let clangClass = "OMPUseDeviceAddrClause";
}
-def OMPC_Uniform : Clause<"uniform"> {}
+def OMPC_Uniform : Clause<"uniform"> {
+ let flangClass = "Name";
+ let isValueList = true;
+}
def OMPC_DeviceType : Clause<"device_type"> {}
def OMPC_Match : Clause<"match"> {}
def OMPC_Depobj : Clause<"depobj"> {
let clangClass = "OMPDepobjClause";
- let isImplicit = 1;
+ let isImplicit = true;
}
def OMPC_Flush : Clause<"flush"> {
let clangClass = "OMPFlushClause";
- let isImplicit = 1;
+ let isImplicit = true;
}
def OMPC_ThreadPrivate : Clause<"threadprivate"> {
let alternativeName = "threadprivate or thread local";
- let isImplicit = 1;
+ let isImplicit = true;
}
def OMPC_Unknown : Clause<"unknown"> {
- let isImplicit = 1;
- let isDefault = 1;
+ let isImplicit = true;
+ let isDefault = true;
+}
+def OMPC_Link : Clause<"link"> {
+ let flangClass = "OmpObjectList";
}
-def OMPC_Link : Clause<"link"> {}
def OMPC_Inbranch : Clause<"inbranch"> {}
def OMPC_Notinbranch : Clause<"notinbranch"> {}
@@ -202,7 +323,6 @@ def OMPC_Notinbranch : Clause<"notinbranch"> {}
def OMP_ThreadPrivate : Directive<"threadprivate"> {}
def OMP_Parallel : Directive<"parallel"> {
let allowedClauses = [
- VersionedClause<OMPC_Default>,
VersionedClause<OMPC_Private>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_Shared>,
@@ -211,6 +331,7 @@ def OMP_Parallel : Directive<"parallel"> {
VersionedClause<OMPC_Allocate>
];
let allowedOnceClauses = [
+ VersionedClause<OMPC_Default>,
VersionedClause<OMPC_If>,
VersionedClause<OMPC_NumThreads>,
VersionedClause<OMPC_ProcBind>,
@@ -218,7 +339,6 @@ def OMP_Parallel : Directive<"parallel"> {
}
def OMP_Task : Directive<"task"> {
let allowedClauses = [
- VersionedClause<OMPC_Default>,
VersionedClause<OMPC_Private>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_Shared>,
@@ -231,6 +351,7 @@ def OMP_Task : Directive<"task"> {
VersionedClause<OMPC_Affinity, 50>
];
let allowedOnceClauses = [
+ VersionedClause<OMPC_Default>,
VersionedClause<OMPC_If>,
VersionedClause<OMPC_Final>,
VersionedClause<OMPC_Priority>
@@ -312,7 +433,11 @@ def OMP_Critical : Directive<"critical"> {
}
def OMP_TaskYield : Directive<"taskyield"> {}
def OMP_Barrier : Directive<"barrier"> {}
-def OMP_TaskWait : Directive<"taskwait"> {}
+def OMP_TaskWait : Directive<"taskwait"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_Depend, 50>
+ ];
+}
def OMP_TaskGroup : Directive<"taskgroup"> {
let allowedClauses = [
VersionedClause<OMPC_TaskReduction>,
@@ -320,7 +445,7 @@ def OMP_TaskGroup : Directive<"taskgroup"> {
];
}
def OMP_Flush : Directive<"flush"> {
- let allowedClauses = [
+ let allowedOnceClauses = [
VersionedClause<OMPC_AcqRel, 50>,
VersionedClause<OMPC_Acquire, 50>,
VersionedClause<OMPC_Release, 50>,
@@ -342,6 +467,8 @@ def OMP_Atomic : Directive<"atomic"> {
VersionedClause<OMPC_Write>,
VersionedClause<OMPC_Update>,
VersionedClause<OMPC_Capture>,
+ ];
+ let allowedOnceClauses = [
VersionedClause<OMPC_SeqCst>,
VersionedClause<OMPC_AcqRel, 50>,
VersionedClause<OMPC_Acquire, 50>,
@@ -398,7 +525,6 @@ def OMP_Requires : Directive<"requires"> {
}
def OMP_TargetData : Directive<"target data"> {
let allowedClauses = [
- VersionedClause<OMPC_Map>,
VersionedClause<OMPC_UseDevicePtr>,
VersionedClause<OMPC_UseDeviceAddr, 50>
];
@@ -412,19 +538,20 @@ def OMP_TargetData : Directive<"target data"> {
}
def OMP_TargetEnterData : Directive<"target enter data"> {
let allowedClauses = [
- VersionedClause<OMPC_Depend>,
- VersionedClause<OMPC_Map>
+ VersionedClause<OMPC_Depend>
];
let allowedOnceClauses = [
VersionedClause<OMPC_If>,
VersionedClause<OMPC_Device>,
VersionedClause<OMPC_NoWait>
];
+ let requiredClauses = [
+ VersionedClause<OMPC_Map>
+ ];
}
def OMP_TargetExitData : Directive<"target exit data"> {
let allowedClauses = [
- VersionedClause<OMPC_Depend>,
- VersionedClause<OMPC_Map>
+ VersionedClause<OMPC_Depend>
];
let allowedOnceClauses = [
VersionedClause<OMPC_Device>,
@@ -708,7 +835,6 @@ def OMP_TaskLoop : Directive<"taskloop"> {
VersionedClause<OMPC_Private>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_LastPrivate>,
- VersionedClause<OMPC_Default>,
VersionedClause<OMPC_Untied>,
VersionedClause<OMPC_Mergeable>,
VersionedClause<OMPC_NoGroup>,
@@ -717,6 +843,7 @@ def OMP_TaskLoop : Directive<"taskloop"> {
VersionedClause<OMPC_Allocate>
];
let allowedOnceClauses = [
+ VersionedClause<OMPC_Default>,
VersionedClause<OMPC_If>,
VersionedClause<OMPC_Collapse>,
VersionedClause<OMPC_Final>,
@@ -770,7 +897,12 @@ def OMP_Distribute : Directive<"distribute"> {
VersionedClause<OMPC_DistSchedule>
];
}
-def OMP_DeclareTarget : Directive<"declare target"> {}
+def OMP_DeclareTarget : Directive<"declare target"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_To>,
+ VersionedClause<OMPC_Link>
+ ];
+}
def OMP_EndDeclareTarget : Directive<"end declare target"> {}
def OMP_DistributeParallelFor : Directive<"distribute parallel for"> {
let allowedClauses = [
@@ -1460,6 +1592,9 @@ def OMP_Scan : Directive<"scan"> {
VersionedClause<OMPC_Exclusive, 50>
];
}
+def OMP_Assumes : Directive<"assumes"> {}
+def OMP_BeginAssumes : Directive<"begin assumes"> {}
+def OMP_EndAssumes : Directive<"end assumes"> {}
def OMP_BeginDeclareVariant : Directive<"begin declare variant"> {}
def OMP_EndDeclareVariant : Directive<"end declare variant"> {}
def OMP_ParallelWorkshare : Directive<"parallel workshare"> {
@@ -1481,9 +1616,24 @@ def OMP_ParallelWorkshare : Directive<"parallel workshare"> {
def OMP_Workshare : Directive<"workshare"> {}
def OMP_EndDo : Directive<"end do"> {}
def OMP_EndDoSimd : Directive<"end do simd"> {}
-def OMP_EndSections : Directive<"end sections"> {}
-def OMP_EndSingle : Directive<"end single"> {}
-def OMP_EndWorkshare : Directive<"end workshare"> {}
+def OMP_EndSections : Directive<"end sections"> {
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_NoWait>
+ ];
+}
+def OMP_EndSingle : Directive<"end single"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_CopyPrivate>
+ ];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_NoWait>
+ ];
+}
+def OMP_EndWorkshare : Directive<"end workshare"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_NoWait>
+ ];
+}
def OMP_Unknown : Directive<"unknown"> {
- let isDefault = 1;
+ let isDefault = true;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index d171d0a2b6c4..36ce3fc0f66f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Frontend/OpenMP/OMP.h.inc"
namespace llvm {
@@ -41,12 +42,12 @@ enum class InternalControlVar {
#include "llvm/Frontend/OpenMP/OMPKinds.def"
enum class ICVInitValue {
-#define ICV_DATA_ENV(Enum, Name, EnvVar, Init) Init,
+#define ICV_INIT_VALUE(Enum, Name) Enum,
#include "llvm/Frontend/OpenMP/OMPKinds.def"
};
-#define ICV_DATA_ENV(Enum, Name, EnvVar, Init) \
- constexpr auto Init = omp::ICVInitValue::Init;
+#define ICV_INIT_VALUE(Enum, Name) \
+ constexpr auto Enum = omp::ICVInitValue::Enum;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
/// IDs for all omp runtime library (RTL) functions.
@@ -68,16 +69,6 @@ enum class DefaultKind {
constexpr auto Enum = omp::DefaultKind::Enum;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
-/// IDs for the different proc bind kinds.
-enum class ProcBindKind {
-#define OMP_PROC_BIND_KIND(Enum, Str, Value) Enum = Value,
-#include "llvm/Frontend/OpenMP/OMPKinds.def"
-};
-
-#define OMP_PROC_BIND_KIND(Enum, ...) \
- constexpr auto Enum = omp::ProcBindKind::Enum;
-#include "llvm/Frontend/OpenMP/OMPKinds.def"
-
/// IDs for all omp runtime library ident_t flag encodings (see
/// their defintion in openmp/runtime/src/kmp.h).
enum class IdentFlag {
@@ -89,6 +80,33 @@ enum class IdentFlag {
#define OMP_IDENT_FLAG(Enum, ...) constexpr auto Enum = omp::IdentFlag::Enum;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
+/// Helper to describe assume clauses.
+struct AssumptionClauseMappingInfo {
+ /// The identifier describing the (beginning of the) clause.
+ llvm::StringLiteral Identifier;
+ /// Flag to determine if the identifier is a full name or the start of a name.
+ bool StartsWith;
+ /// Flag to determine if a directive lists follows.
+ bool HasDirectiveList;
+ /// Flag to determine if an expression follows.
+ bool HasExpression;
+};
+
+/// All known assume clauses.
+static constexpr AssumptionClauseMappingInfo AssumptionClauseMappings[] = {
+#define OMP_ASSUME_CLAUSE(Identifier, StartsWith, HasDirectiveList, \
+ HasExpression) \
+ {Identifier, StartsWith, HasDirectiveList, HasExpression},
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+};
+
+inline std::string getAllAssumeClauseOptions() {
+ std::string S;
+ for (const AssumptionClauseMappingInfo &ACMI : AssumptionClauseMappings)
+ S += (S.empty() ? "'" : "', '") + ACMI.Identifier.str();
+ return S + "'";
+}
+
} // end namespace omp
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPContext.h b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPContext.h
index 1a42d189db44..8a4179167c89 100644
--- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPContext.h
@@ -70,15 +70,20 @@ TraitSelector getOpenMPContextTraitSelectorForProperty(TraitProperty Property);
/// Return a textual representation of the trait selector \p Kind.
StringRef getOpenMPContextTraitSelectorName(TraitSelector Kind);
-/// Parse \p Str and return the trait set it matches or
-/// TraitProperty::invalid.
-TraitProperty getOpenMPContextTraitPropertyKind(TraitSet Set, StringRef Str);
+/// Parse \p Str and return the trait property it matches in the set \p Set and
+/// selector \p Selector or TraitProperty::invalid.
+TraitProperty getOpenMPContextTraitPropertyKind(TraitSet Set,
+ TraitSelector Selector,
+ StringRef Str);
/// Return the trait property for a singleton selector \p Selector.
TraitProperty getOpenMPContextTraitPropertyForSelector(TraitSelector Selector);
-/// Return a textual representation of the trait property \p Kind.
-StringRef getOpenMPContextTraitPropertyName(TraitProperty Kind);
+/// Return a textual representation of the trait property \p Kind, which might
+/// be the raw string we parsed (\p RawString) if we do not translate the
+/// property into a (distinct) enum.
+StringRef getOpenMPContextTraitPropertyName(TraitProperty Kind,
+ StringRef RawString);
/// Return a textual representation of the trait property \p Kind with selector
/// and set name included.
@@ -112,24 +117,36 @@ bool isValidTraitPropertyForTraitSetAndSelector(TraitProperty Property,
/// scored (via the ScoresMap). In addition, the required consturct nesting is
/// decribed as well.
struct VariantMatchInfo {
- /// Add the trait \p Property to the required trait set. If \p Score is not
- /// null, it recorded as well. If \p Property is in the `construct` set it
- /// is recorded in-order in the ConstructTraits as well.
- void addTrait(TraitProperty Property, APInt *Score = nullptr) {
- addTrait(getOpenMPContextTraitSetForProperty(Property), Property, Score);
+ /// Add the trait \p Property to the required trait set. \p RawString is the
+ /// string we parsed and derived \p Property from. If \p Score is not null, it
+ /// recorded as well. If \p Property is in the `construct` set it is recorded
+ /// in-order in the ConstructTraits as well.
+ void addTrait(TraitProperty Property, StringRef RawString,
+ APInt *Score = nullptr) {
+ addTrait(getOpenMPContextTraitSetForProperty(Property), Property, RawString,
+ Score);
}
/// Add the trait \p Property which is in set \p Set to the required trait
- /// set. If \p Score is not null, it recorded as well. If \p Set is the
- /// `construct` set it is recorded in-order in the ConstructTraits as well.
- void addTrait(TraitSet Set, TraitProperty Property, APInt *Score = nullptr) {
+ /// set. \p RawString is the string we parsed and derived \p Property from. If
+ /// \p Score is not null, it recorded as well. If \p Set is the `construct`
+ /// set it is recorded in-order in the ConstructTraits as well.
+ void addTrait(TraitSet Set, TraitProperty Property, StringRef RawString,
+ APInt *Score = nullptr) {
if (Score)
ScoreMap[Property] = *Score;
+
+ // Special handling for `device={isa(...)}` as we do not match the enum but
+ // the raw string.
+ if (Property == TraitProperty::device_isa___ANY)
+ ISATraits.push_back(RawString);
+
RequiredTraits.set(unsigned(Property));
if (Set == TraitSet::construct)
ConstructTraits.push_back(Property);
}
BitVector RequiredTraits = BitVector(unsigned(TraitProperty::Last) + 1);
+ SmallVector<StringRef, 8> ISATraits;
SmallVector<TraitProperty, 8> ConstructTraits;
SmallDenseMap<TraitProperty, APInt> ScoreMap;
};
@@ -139,6 +156,7 @@ struct VariantMatchInfo {
/// in OpenMP constructs at the location.
struct OMPContext {
OMPContext(bool IsDeviceCompilation, Triple TargetTriple);
+ virtual ~OMPContext() = default;
void addTrait(TraitProperty Property) {
addTrait(getOpenMPContextTraitSetForProperty(Property), Property);
@@ -149,6 +167,11 @@ struct OMPContext {
ConstructTraits.push_back(Property);
}
+ /// Hook for users to check if an ISA trait matches. The trait is described as
+ /// the string that got parsed and it depends on the target and context if
+ /// this matches or not.
+ virtual bool matchesISATrait(StringRef) const { return false; }
+
BitVector ActiveTraits = BitVector(unsigned(TraitProperty::Last) + 1);
SmallVector<TraitProperty, 8> ConstructTraits;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
index 3ae4a2edbf96..6b48cc447e13 100644
--- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -1,9 +1,8 @@
//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -29,29 +28,30 @@ namespace omp {
/// use the new array name.
///
/// Example usage in clang:
-/// const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size);
+/// const unsigned slot_size =
+/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
///
/// Example usage in libomptarget/deviceRTLs:
-/// #include "OMPGridValues.h"
+/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
/// #ifdef __AMDGPU__
/// #define GRIDVAL AMDGPUGpuGridValues
/// #else
/// #define GRIDVAL NVPTXGpuGridValues
/// #endif
/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
-/// GRIDVAL[GV_Warp_Size]
+/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
///
/// Example usage in libomptarget hsa plugin:
-/// #include "OMPGridValues.h"
+/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
/// #define GRIDVAL AMDGPUGpuGridValues
/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
-/// GRIDVAL[GV_Warp_Size]
+/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
///
/// Example usage in libomptarget cuda plugin:
-/// #include "OMPGridValues.h"
+/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
/// #define GRIDVAL NVPTXGpuGridValues
/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
-/// GRIDVAL[GV_Warp_Size]
+/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
///
enum GVIDX {
/// The maximum number of workers in a kernel.
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a2a440d65fd8..22204d9a9ccb 100644
--- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -18,8 +18,10 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/Allocator.h"
+#include <forward_list>
namespace llvm {
+class CanonicalLoopInfo;
/// An interface to create LLVM-IR for OpenMP directives.
///
@@ -36,7 +38,10 @@ public:
void initialize();
/// Finalize the underlying module, e.g., by outlining regions.
- void finalize();
+ /// \param AllowExtractorSinking Flag to include sinking instructions,
+ /// emitted by CodeExtractor, in the
+ /// outlined region. Default is false.
+ void finalize(bool AllowExtractorSinking = false);
/// Add attributes known for \p FnID to \p Fn.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
@@ -56,7 +61,7 @@ public:
struct FinalizationInfo {
/// The finalization callback provided by the last in-flight invocation of
- /// CreateXXXX for the directive of kind DK.
+ /// createXXXX for the directive of kind DK.
FinalizeCallbackTy FiniCB;
/// The directive kind of the innermost directive that has an associated
@@ -96,6 +101,17 @@ public:
function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationBB)>;
+ /// Callback type for loop body code generation.
+ ///
+ /// \param CodeGenIP is the insertion point where the loop's body code must be
+ /// placed. This will be a dedicated BasicBlock with a
+ /// conditional branch from the loop condition check and
+ /// terminated with an unconditional branch to the loop
+ /// latch.
+ /// \param IndVar is the induction variable usable at the insertion point.
+ using LoopBodyGenCallbackTy =
+ function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
+
/// Callback type for variable privatization (think copy & default
/// constructor).
///
@@ -103,15 +119,20 @@ public:
/// should be placed.
/// \param CodeGenIP is the insertion point at which the privatization code
/// should be placed.
- /// \param Val The value beeing copied/created.
+ /// \param Original The value being copied/created, should not be used in the
+ /// generated IR.
+ /// \param Inner The equivalent of \p Original that should be used in the
+ /// generated IR; this is equal to \p Original if the value is
+ /// a pointer and can thus be passed directly, otherwise it is
+ /// an equivalent but different value.
/// \param ReplVal The replacement value, thus a copy or new created version
- /// of \p Val.
+ /// of \p Inner.
///
/// \returns The new insertion point where code generation continues and
- /// \p ReplVal the replacement of \p Val.
+ /// \p ReplVal the replacement value.
using PrivatizeCallbackTy = function_ref<InsertPointTy(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Val,
- Value *&ReplVal)>;
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
+ Value &Inner, Value *&ReplVal)>;
/// Description of a LLVM-IR insertion point (IP) and a debug/source location
/// (filename, line, column, ...).
@@ -139,7 +160,7 @@ public:
/// should be checked and acted upon.
///
/// \returns The insertion point after the barrier.
- InsertPointTy CreateBarrier(const LocationDescription &Loc, omp::Directive DK,
+ InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
bool ForceSimpleCall = false,
bool CheckCancelFlag = true);
@@ -150,12 +171,13 @@ public:
/// \param CanceledDirective The kind of directive that is cancled.
///
/// \returns The insertion point after the barrier.
- InsertPointTy CreateCancel(const LocationDescription &Loc, Value *IfCondition,
+ InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
omp::Directive CanceledDirective);
/// Generator for '#omp parallel'
///
/// \param Loc The insert and source location description.
+ /// \param AllocaIP The insertion points to be used for alloca instructions.
/// \param BodyGenCB Callback that will generate the region code.
/// \param PrivCB Callback to copy a given variable (think copy constructor).
/// \param FiniCB Callback to finalize variable copies.
@@ -166,25 +188,179 @@ public:
///
/// \returns The insertion position *after* the parallel.
IRBuilder<>::InsertPoint
- CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
- PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
- Value *IfCondition, Value *NumThreads,
- omp::ProcBindKind ProcBind, bool IsCancellable);
+ createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
+ BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
+ FinalizeCallbackTy FiniCB, Value *IfCondition,
+ Value *NumThreads, omp::ProcBindKind ProcBind,
+ bool IsCancellable);
+
+ /// Generator for the control flow structure of an OpenMP canonical loop.
+ ///
+ /// This generator operates on the logical iteration space of the loop, i.e.
+ /// the caller only has to provide a loop trip count of the loop as defined by
+ /// base language semantics. The trip count is interpreted as an unsigned
+ /// integer. The induction variable passed to \p BodyGenCB will be of the same
+ /// type and run from 0 to \p TripCount - 1. It is up to the callback to
+ /// convert the logical iteration variable to the loop counter variable in the
+ /// loop body.
+ ///
+ /// \param Loc The insert and source location description. The insert
+ /// location can be between two instructions or the end of a
+ /// degenerate block (e.g. a BB under construction).
+ /// \param BodyGenCB Callback that will generate the loop body code.
+ /// \param TripCount Number of iterations the loop body is executed.
+ /// \param Name Base name used to derive BB and instruction names.
+ ///
+ /// \returns An object representing the created control flow structure which
+ /// can be used for loop-associated directives.
+ CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
+ LoopBodyGenCallbackTy BodyGenCB,
+ Value *TripCount,
+ const Twine &Name = "loop");
+
+ /// Generator for the control flow structure of an OpenMP canonical loop.
+ ///
+ /// Instead of a logical iteration space, this allows specifying user-defined
+ /// loop counter values using increment, upper- and lower bounds. To
+ /// disambiguate the terminology when counting downwards, instead of lower
+ /// bounds we use \p Start for the loop counter value in the first body
+ /// iteration.
+ ///
+ /// Consider the following limitations:
+ ///
+ /// * A loop counter space over all integer values of its bit-width cannot be
+ /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
+ /// stored into an 8 bit integer):
+ ///
+ /// DO I = 0, 255, 1
+ ///
+ /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
+ /// effectively counting downwards:
+ ///
+ /// for (uint8_t i = 100u; i > 0; i += 127u)
+ ///
+ ///
+ /// TODO: May need to add additional parameters to represent:
+ ///
+ /// * Allow representing downcounting with unsigned integers.
+ ///
+ /// * Sign of the step and the comparison operator might disagree:
+ ///
+ /// for (int i = 0; i < 42; --i)
+ ///
+ //
+ /// \param Loc The insert and source location description.
+ /// \param BodyGenCB Callback that will generate the loop body code.
+ /// \param Start Value of the loop counter for the first iterations.
+ /// \param Stop Loop counter values past this will stop the the
+ /// iterations.
+ /// \param Step Loop counter increment after each iteration; negative
+ /// means counting down. \param IsSigned Whether Start, Stop
+ /// and Stop are signed integers.
+ /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
+ /// counter.
+ /// \param ComputeIP Insertion point for instructions computing the trip
+ /// count. Can be used to ensure the trip count is available
+ /// at the outermost loop of a loop nest. If not set,
+ /// defaults to the preheader of the generated loop.
+ /// \param Name Base name used to derive BB and instruction names.
+ ///
+ /// \returns An object representing the created control flow structure which
+ /// can be used for loop-associated directives.
+ CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
+ LoopBodyGenCallbackTy BodyGenCB,
+ Value *Start, Value *Stop, Value *Step,
+ bool IsSigned, bool InclusiveStop,
+ InsertPointTy ComputeIP = {},
+ const Twine &Name = "loop");
+
+ /// Modifies the canonical loop to be a statically-scheduled workshare loop.
+ ///
+ /// This takes a \p LoopInfo representing a canonical loop, such as the one
+ /// created by \p createCanonicalLoop and emits additional instructions to
+ /// turn it into a workshare loop. In particular, it calls to an OpenMP
+ /// runtime function in the preheader to obtain the loop bounds to be used in
+ /// the current thread, updates the relevant instructions in the canonical
+ /// loop and calls to an OpenMP runtime finalization function after the loop.
+ ///
+ /// \param Loc The source location description, the insertion location
+ /// is not used.
+ /// \param CLI A descriptor of the canonical loop to workshare.
+ /// \param AllocaIP An insertion point for Alloca instructions usable in the
+ /// preheader of the loop.
+ /// \param NeedsBarrier Indicates whether a barrier must be insterted after
+ /// the loop.
+ /// \param Chunk The size of loop chunk considered as a unit when
+ /// scheduling. If \p nullptr, defaults to 1.
+ ///
+ /// \returns Updated CanonicalLoopInfo.
+ CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
+ CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP,
+ bool NeedsBarrier,
+ Value *Chunk = nullptr);
+
+ /// Tile a loop nest.
+ ///
+ /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
+ /// \p/ Loops must be perfectly nested, from outermost to innermost loop
+ /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
+ /// of every loop and every tile sizes must be usable in the outermost
+ /// loop's preheader. This implies that the loop nest is rectangular.
+ ///
+ /// Example:
+ /// \code
+ /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
+ /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
+ /// body(i, j);
+ /// \endcode
+ ///
+ /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
+ /// \code
+ /// for (int i1 = 0; i1 < 3; ++i1)
+ /// for (int j1 = 0; j1 < 2; ++j1)
+ /// for (int i2 = 0; i2 < 5; ++i2)
+ /// for (int j2 = 0; j2 < 7; ++j2)
+ /// body(i1*3+i2, j1*3+j2);
+ /// \endcode
+ ///
+ /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
+ /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
+ /// handles non-constant trip counts, non-constant tile sizes and trip counts
+ /// that are not multiples of the tile size. In the latter case the tile loop
+ /// of the last floor-loop iteration will have fewer iterations than specified
+ /// as its tile size.
+ ///
+ ///
+ /// @param DL Debug location for instructions added by tiling, for
+ /// instance the floor- and tile trip count computation.
+ /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
+ /// invalidated by this method, i.e. should not used after
+ /// tiling.
+ /// @param TileSizes For each loop in \p Loops, the tile size for that
+ /// dimensions.
+ ///
+ /// \returns A list of generated loops. Contains twice as many loops as the
+ /// input loop nest; the first half are the floor loops and the
+ /// second half are the tile loops.
+ std::vector<CanonicalLoopInfo *>
+ tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
+ ArrayRef<Value *> TileSizes);
/// Generator for '#omp flush'
///
/// \param Loc The location where the flush directive was encountered
- void CreateFlush(const LocationDescription &Loc);
+ void createFlush(const LocationDescription &Loc);
/// Generator for '#omp taskwait'
///
/// \param Loc The location where the taskwait directive was encountered.
- void CreateTaskwait(const LocationDescription &Loc);
+ void createTaskwait(const LocationDescription &Loc);
/// Generator for '#omp taskyield'
///
/// \param Loc The location where the taskyield directive was encountered.
- void CreateTaskyield(const LocationDescription &Loc);
+ void createTaskyield(const LocationDescription &Loc);
///}
@@ -224,6 +400,9 @@ public:
omp::IdentFlag Flags = omp::IdentFlag(0),
unsigned Reserve2Flags = 0);
+ // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
+ Type *getLanemaskType();
+
/// Generate control flow and cleanup for cancellation.
///
/// \param CancelFlag Flag indicating if the cancellation is performed.
@@ -305,6 +484,10 @@ public:
/// Collection of regions that need to be outlined during finalization.
SmallVector<OutlineInfo, 16> OutlineInfos;
+ /// Collection of owned canonical loop objects that eventually need to be
+ /// free'd.
+ std::forward_list<CanonicalLoopInfo> LoopInfos;
+
/// Add a new region that will be outlined later.
void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
@@ -316,6 +499,32 @@ public:
StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars;
public:
+ /// Generator for __kmpc_copyprivate
+ ///
+ /// \param Loc The source location description.
+ /// \param BufSize Number of elements in the buffer.
+ /// \param CpyBuf List of pointers to data to be copied.
+ /// \param CpyFn function to call for copying data.
+ /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
+ ///
+ /// \return The insertion position *after* the CopyPrivate call.
+
+ InsertPointTy createCopyPrivate(const LocationDescription &Loc,
+ llvm::Value *BufSize, llvm::Value *CpyBuf,
+ llvm::Value *CpyFn, llvm::Value *DidIt);
+
+ /// Generator for '#omp single'
+ ///
+ /// \param Loc The source location description.
+ /// \param BodyGenCB Callback that will generate the region code.
+ /// \param FiniCB Callback to finalize variable copies.
+ /// \param DidIt Local variable used as a flag to indicate 'single' thread
+ ///
+ /// \returns The insertion position *after* the single call.
+ InsertPointTy createSingle(const LocationDescription &Loc,
+ BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB, llvm::Value *DidIt);
+
/// Generator for '#omp master'
///
/// \param Loc The insert and source location description.
@@ -323,7 +532,7 @@ public:
/// \param FiniCB Callback to finalize variable copies.
///
/// \returns The insertion position *after* the master.
- InsertPointTy CreateMaster(const LocationDescription &Loc,
+ InsertPointTy createMaster(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB);
@@ -336,7 +545,7 @@ public:
/// \param HintInst Hint Instruction for hint clause associated with critical
///
/// \returns The insertion position *after* the master.
- InsertPointTy CreateCritical(const LocationDescription &Loc,
+ InsertPointTy createCritical(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB,
StringRef CriticalName, Value *HintInst);
@@ -353,7 +562,7 @@ public:
// and copy.in.end block
///
/// \returns The insertion point where copying operation to be emitted.
- InsertPointTy CreateCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
+ InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
Value *PrivateAddr,
llvm::IntegerType *IntPtrTy,
bool BranchtoEnd = true);
@@ -366,7 +575,7 @@ public:
/// \param Name Name of call Instruction for OMP_alloc
///
/// \returns CallInst to the OMP_Alloc call
- CallInst *CreateOMPAlloc(const LocationDescription &Loc, Value *Size,
+ CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
Value *Allocator, std::string Name = "");
/// Create a runtime call for kmpc_free
@@ -377,7 +586,7 @@ public:
/// \param Name Name of call Instruction for OMP_Free
///
/// \returns CallInst to the OMP_Free call
- CallInst *CreateOMPFree(const LocationDescription &Loc, Value *Addr,
+ CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
Value *Allocator, std::string Name = "");
/// Create a runtime call for kmpc_threadprivate_cached
@@ -388,7 +597,7 @@ public:
/// \param Name Name of call Instruction for callinst
///
/// \returns CallInst to the thread private cache call.
- CallInst *CreateCachedThreadPrivate(const LocationDescription &Loc,
+ CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
llvm::Value *Pointer,
llvm::ConstantInt *Size,
const llvm::Twine &Name = Twine(""));
@@ -496,6 +705,155 @@ private:
/// \param CriticalName Name of the critical region.
///
Value *getOMPCriticalRegionLock(StringRef CriticalName);
+
+ /// Create the control flow structure of a canonical OpenMP loop.
+ ///
+ /// The emitted loop will be disconnected, i.e. no edge to the loop's
+ /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
+ /// IRBuilder location is not preserved.
+ ///
+ /// \param DL DebugLoc used for the instructions in the skeleton.
+ /// \param TripCount Value to be used for the trip count.
+ /// \param F Function in which to insert the BasicBlocks.
+ /// \param PreInsertBefore Where to insert BBs that execute before the body,
+ /// typically the body itself.
+ /// \param PostInsertBefore Where to insert BBs that execute after the body.
+ /// \param Name Base name used to derive BB
+ /// and instruction names.
+ ///
+ /// \returns The CanonicalLoopInfo that represents the emitted loop.
+ CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
+ Function *F,
+ BasicBlock *PreInsertBefore,
+ BasicBlock *PostInsertBefore,
+ const Twine &Name = {});
+};
+
+/// Class to represented the control flow structure of an OpenMP canonical loop.
+///
+/// The control-flow structure is standardized for easy consumption by
+/// directives associated with loops. For instance, the worksharing-loop
+/// construct may change this control flow such that each loop iteration is
+/// executed on only one thread.
+///
+/// The control flow can be described as follows:
+///
+/// Preheader
+/// |
+/// /-> Header
+/// | |
+/// | Cond---\
+/// | | |
+/// | Body |
+/// | | | |
+/// | <...> |
+/// | | | |
+/// \--Latch |
+/// |
+/// Exit
+/// |
+/// After
+///
+/// Code in the header, condition block, latch and exit block must not have any
+/// side-effect. The body block is the single entry point into the loop body,
+/// which may contain arbitrary control flow as long as all control paths
+/// eventually branch to the latch block.
+///
+/// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
+/// classes.
+class CanonicalLoopInfo {
+ friend class OpenMPIRBuilder;
+
+private:
+ /// Whether this object currently represents a loop.
+ bool IsValid = false;
+
+ BasicBlock *Preheader;
+ BasicBlock *Header;
+ BasicBlock *Cond;
+ BasicBlock *Body;
+ BasicBlock *Latch;
+ BasicBlock *Exit;
+ BasicBlock *After;
+
+ /// Add the control blocks of this loop to \p BBs.
+ ///
+ /// This does not include any block from the body, including the one returned
+ /// by getBody().
+ void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
+
+public:
+ /// The preheader ensures that there is only a single edge entering the loop.
+ /// Code that must be execute before any loop iteration can be emitted here,
+ /// such as computing the loop trip count and begin lifetime markers. Code in
+ /// the preheader is not considered part of the canonical loop.
+ BasicBlock *getPreheader() const { return Preheader; }
+
+ /// The header is the entry for each iteration. In the canonical control flow,
+ /// it only contains the PHINode for the induction variable.
+ BasicBlock *getHeader() const { return Header; }
+
+ /// The condition block computes whether there is another loop iteration. If
+ /// yes, branches to the body; otherwise to the exit block.
+ BasicBlock *getCond() const { return Cond; }
+
+ /// The body block is the single entry for a loop iteration and not controlled
+ /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
+ /// eventually branch to the \p Latch block.
+ BasicBlock *getBody() const { return Body; }
+
+ /// Reaching the latch indicates the end of the loop body code. In the
+ /// canonical control flow, it only contains the increment of the induction
+ /// variable.
+ BasicBlock *getLatch() const { return Latch; }
+
+ /// Reaching the exit indicates no more iterations are being executed.
+ BasicBlock *getExit() const { return Exit; }
+
+ /// The after block is intended for clean-up code such as lifetime end
+ /// markers. It is separate from the exit block to ensure, analogous to the
+ /// preheader, it having just a single entry edge and being free from PHI
+ /// nodes should there be multiple loop exits (such as from break
+ /// statements/cancellations).
+ BasicBlock *getAfter() const { return After; }
+
+ /// Returns the llvm::Value containing the number of loop iterations. It must
+ /// be valid in the preheader and always interpreted as an unsigned integer of
+ /// any bit-width.
+ Value *getTripCount() const {
+ Instruction *CmpI = &Cond->front();
+ assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
+ return CmpI->getOperand(1);
+ }
+
+ /// Returns the instruction representing the current logical induction
+ /// variable. Always unsigned, always starting at 0 with an increment of one.
+ Instruction *getIndVar() const {
+ Instruction *IndVarPHI = &Header->front();
+ assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
+ return IndVarPHI;
+ }
+
+ /// Return the type of the induction variable (and the trip count).
+ Type *getIndVarType() const { return getIndVar()->getType(); }
+
+ /// Return the insertion point for user code before the loop.
+ OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
+ return {Preheader, std::prev(Preheader->end())};
+ };
+
+ /// Return the insertion point for user code in the body.
+ OpenMPIRBuilder::InsertPointTy getBodyIP() const {
+ return {Body, Body->begin()};
+ };
+
+ /// Return the insertion point for user code after the loop.
+ OpenMPIRBuilder::InsertPointTy getAfterIP() const {
+ return {After, After->begin()};
+ };
+
+ /// Consistency self-check.
+ void assertOK() const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 93ea63c1c2e6..75d360bf4237 100644
--- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -7,217 +7,15 @@
//===----------------------------------------------------------------------===//
/// \file
///
-/// This file defines the list of supported OpenMP directives, clauses, runtime
+/// This file defines the list of supported OpenMP runtime
/// calls, and other things that need to be listed in enums.
///
-//===----------------------------------------------------------------------===//
-
-/// OpenMP Directives and combined directives
+/// This file is under transition to OMP.td with TableGen code generation.
///
-///{
-
-#ifndef OMP_DIRECTIVE
-#define OMP_DIRECTIVE(Enum, Str)
-#endif
-
-#define __OMP_DIRECTIVE_EXT(Name, Str) OMP_DIRECTIVE(OMPD_##Name, Str)
-#define __OMP_DIRECTIVE(Name) __OMP_DIRECTIVE_EXT(Name, #Name)
-
-__OMP_DIRECTIVE(threadprivate)
-__OMP_DIRECTIVE(parallel)
-__OMP_DIRECTIVE(task)
-__OMP_DIRECTIVE(simd)
-__OMP_DIRECTIVE(for)
-__OMP_DIRECTIVE(sections)
-__OMP_DIRECTIVE(section)
-__OMP_DIRECTIVE(single)
-__OMP_DIRECTIVE(master)
-__OMP_DIRECTIVE(critical)
-__OMP_DIRECTIVE(taskyield)
-__OMP_DIRECTIVE(barrier)
-__OMP_DIRECTIVE(taskwait)
-__OMP_DIRECTIVE(taskgroup)
-__OMP_DIRECTIVE(flush)
-__OMP_DIRECTIVE(ordered)
-__OMP_DIRECTIVE(atomic)
-__OMP_DIRECTIVE(target)
-__OMP_DIRECTIVE(teams)
-__OMP_DIRECTIVE(cancel)
-__OMP_DIRECTIVE(requires)
-__OMP_DIRECTIVE_EXT(target_data, "target data")
-__OMP_DIRECTIVE_EXT(target_enter_data, "target enter data")
-__OMP_DIRECTIVE_EXT(target_exit_data, "target exit data")
-__OMP_DIRECTIVE_EXT(target_parallel, "target parallel")
-__OMP_DIRECTIVE_EXT(target_parallel_for, "target parallel for")
-__OMP_DIRECTIVE_EXT(target_update, "target update")
-__OMP_DIRECTIVE_EXT(parallel_for, "parallel for")
-__OMP_DIRECTIVE_EXT(parallel_for_simd, "parallel for simd")
-__OMP_DIRECTIVE_EXT(parallel_master, "parallel master")
-__OMP_DIRECTIVE_EXT(parallel_sections, "parallel sections")
-__OMP_DIRECTIVE_EXT(for_simd, "for simd")
-__OMP_DIRECTIVE_EXT(cancellation_point, "cancellation point")
-__OMP_DIRECTIVE_EXT(declare_reduction, "declare reduction")
-__OMP_DIRECTIVE_EXT(declare_mapper, "declare mapper")
-__OMP_DIRECTIVE_EXT(declare_simd, "declare simd")
-__OMP_DIRECTIVE(taskloop)
-__OMP_DIRECTIVE_EXT(taskloop_simd, "taskloop simd")
-__OMP_DIRECTIVE(distribute)
-__OMP_DIRECTIVE_EXT(declare_target, "declare target")
-__OMP_DIRECTIVE_EXT(end_declare_target, "end declare target")
-__OMP_DIRECTIVE_EXT(distribute_parallel_for, "distribute parallel for")
-__OMP_DIRECTIVE_EXT(distribute_parallel_for_simd,
- "distribute parallel for simd")
-__OMP_DIRECTIVE_EXT(distribute_simd, "distribute simd")
-__OMP_DIRECTIVE_EXT(target_parallel_for_simd, "target parallel for simd")
-__OMP_DIRECTIVE_EXT(target_simd, "target simd")
-__OMP_DIRECTIVE_EXT(teams_distribute, "teams distribute")
-__OMP_DIRECTIVE_EXT(teams_distribute_simd, "teams distribute simd")
-__OMP_DIRECTIVE_EXT(teams_distribute_parallel_for_simd,
- "teams distribute parallel for simd")
-__OMP_DIRECTIVE_EXT(teams_distribute_parallel_for,
- "teams distribute parallel for")
-__OMP_DIRECTIVE_EXT(target_teams, "target teams")
-__OMP_DIRECTIVE_EXT(target_teams_distribute, "target teams distribute")
-__OMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for,
- "target teams distribute parallel for")
-__OMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd,
- "target teams distribute parallel for simd")
-__OMP_DIRECTIVE_EXT(target_teams_distribute_simd,
- "target teams distribute simd")
-__OMP_DIRECTIVE(allocate)
-__OMP_DIRECTIVE_EXT(declare_variant, "declare variant")
-__OMP_DIRECTIVE_EXT(master_taskloop, "master taskloop")
-__OMP_DIRECTIVE_EXT(parallel_master_taskloop, "parallel master taskloop")
-__OMP_DIRECTIVE_EXT(master_taskloop_simd, "master taskloop simd")
-__OMP_DIRECTIVE_EXT(parallel_master_taskloop_simd,
- "parallel master taskloop simd")
-__OMP_DIRECTIVE(depobj)
-__OMP_DIRECTIVE(scan)
-__OMP_DIRECTIVE_EXT(begin_declare_variant, "begin declare variant")
-__OMP_DIRECTIVE_EXT(end_declare_variant, "end declare variant")
-
-// Has to be the last because Clang implicitly expects it to be.
-__OMP_DIRECTIVE(unknown)
-
-#undef __OMP_DIRECTIVE_EXT
-#undef __OMP_DIRECTIVE
-#undef OMP_DIRECTIVE
-
-///}
-
-/// OpenMP Clauses
-///
-///{
-
-#ifndef OMP_CLAUSE
-#define OMP_CLAUSE(Enum, Str, Implicit)
-#endif
-#ifndef OMP_CLAUSE_CLASS
-#define OMP_CLAUSE_CLASS(Enum, Str, Class)
-#endif
-#ifndef OMP_CLAUSE_NO_CLASS
-#define OMP_CLAUSE_NO_CLASS(Enum, Str)
-#endif
-
-#define __OMP_CLAUSE(Name, Class) \
- OMP_CLAUSE(OMPC_##Name, #Name, /* Implicit */ false) \
- OMP_CLAUSE_CLASS(OMPC_##Name, #Name, Class)
-#define __OMP_CLAUSE_NO_CLASS(Name) \
- OMP_CLAUSE(OMPC_##Name, #Name, /* Implicit */ false) \
- OMP_CLAUSE_NO_CLASS(OMPC_##Name, #Name)
-#define __OMP_IMPLICIT_CLAUSE_CLASS(Name, Str, Class) \
- OMP_CLAUSE(OMPC_##Name, Str, /* Implicit */ true) \
- OMP_CLAUSE_CLASS(OMPC_##Name, Str, Class)
-#define __OMP_IMPLICIT_CLAUSE_NO_CLASS(Name, Str) \
- OMP_CLAUSE(OMPC_##Name, Str, /* Implicit */ true) \
- OMP_CLAUSE_NO_CLASS(OMPC_##Name, Str)
-
-__OMP_CLAUSE(allocator, OMPAllocatorClause)
-__OMP_CLAUSE(if, OMPIfClause)
-__OMP_CLAUSE(final, OMPFinalClause)
-__OMP_CLAUSE(num_threads, OMPNumThreadsClause)
-__OMP_CLAUSE(safelen, OMPSafelenClause)
-__OMP_CLAUSE(simdlen, OMPSimdlenClause)
-__OMP_CLAUSE(collapse, OMPCollapseClause)
-__OMP_CLAUSE(default, OMPDefaultClause)
-__OMP_CLAUSE(private, OMPPrivateClause)
-__OMP_CLAUSE(firstprivate, OMPFirstprivateClause)
-__OMP_CLAUSE(lastprivate, OMPLastprivateClause)
-__OMP_CLAUSE(shared, OMPSharedClause)
-__OMP_CLAUSE(reduction, OMPReductionClause)
-__OMP_CLAUSE(linear, OMPLinearClause)
-__OMP_CLAUSE(aligned, OMPAlignedClause)
-__OMP_CLAUSE(copyin, OMPCopyinClause)
-__OMP_CLAUSE(copyprivate, OMPCopyprivateClause)
-__OMP_CLAUSE(proc_bind, OMPProcBindClause)
-__OMP_CLAUSE(schedule, OMPScheduleClause)
-__OMP_CLAUSE(ordered, OMPOrderedClause)
-__OMP_CLAUSE(nowait, OMPNowaitClause)
-__OMP_CLAUSE(untied, OMPUntiedClause)
-__OMP_CLAUSE(mergeable, OMPMergeableClause)
-__OMP_CLAUSE(read, OMPReadClause)
-__OMP_CLAUSE(write, OMPWriteClause)
-__OMP_CLAUSE(update, OMPUpdateClause)
-__OMP_CLAUSE(capture, OMPCaptureClause)
-__OMP_CLAUSE(seq_cst, OMPSeqCstClause)
-__OMP_CLAUSE(acq_rel, OMPAcqRelClause)
-__OMP_CLAUSE(acquire, OMPAcquireClause)
-__OMP_CLAUSE(release, OMPReleaseClause)
-__OMP_CLAUSE(relaxed, OMPRelaxedClause)
-__OMP_CLAUSE(depend, OMPDependClause)
-__OMP_CLAUSE(device, OMPDeviceClause)
-__OMP_CLAUSE(threads, OMPThreadsClause)
-__OMP_CLAUSE(simd, OMPSIMDClause)
-__OMP_CLAUSE(map, OMPMapClause)
-__OMP_CLAUSE(num_teams, OMPNumTeamsClause)
-__OMP_CLAUSE(thread_limit, OMPThreadLimitClause)
-__OMP_CLAUSE(priority, OMPPriorityClause)
-__OMP_CLAUSE(grainsize, OMPGrainsizeClause)
-__OMP_CLAUSE(nogroup, OMPNogroupClause)
-__OMP_CLAUSE(num_tasks, OMPNumTasksClause)
-__OMP_CLAUSE(hint, OMPHintClause)
-__OMP_CLAUSE(dist_schedule, OMPDistScheduleClause)
-__OMP_CLAUSE(defaultmap, OMPDefaultmapClause)
-__OMP_CLAUSE(to, OMPToClause)
-__OMP_CLAUSE(from, OMPFromClause)
-__OMP_CLAUSE(use_device_ptr, OMPUseDevicePtrClause)
-__OMP_CLAUSE(is_device_ptr, OMPIsDevicePtrClause)
-__OMP_CLAUSE(task_reduction, OMPTaskReductionClause)
-__OMP_CLAUSE(in_reduction, OMPInReductionClause)
-__OMP_CLAUSE(unified_address, OMPUnifiedAddressClause)
-__OMP_CLAUSE(unified_shared_memory, OMPUnifiedSharedMemoryClause)
-__OMP_CLAUSE(reverse_offload, OMPReverseOffloadClause)
-__OMP_CLAUSE(dynamic_allocators, OMPDynamicAllocatorsClause)
-__OMP_CLAUSE(atomic_default_mem_order, OMPAtomicDefaultMemOrderClause)
-__OMP_CLAUSE(allocate, OMPAllocateClause)
-__OMP_CLAUSE(nontemporal, OMPNontemporalClause)
-__OMP_CLAUSE(order, OMPOrderClause)
-__OMP_CLAUSE(destroy, OMPDestroyClause)
-__OMP_CLAUSE(detach, OMPDetachClause)
-__OMP_CLAUSE(inclusive, OMPInclusiveClause)
-__OMP_CLAUSE(exclusive, OMPExclusiveClause)
-__OMP_CLAUSE(uses_allocators, OMPUsesAllocatorsClause)
-__OMP_CLAUSE(affinity, OMPAffinityClause)
-__OMP_CLAUSE(use_device_addr, OMPUseDeviceAddrClause)
-
-__OMP_CLAUSE_NO_CLASS(uniform)
-__OMP_CLAUSE_NO_CLASS(device_type)
-__OMP_CLAUSE_NO_CLASS(match)
-
-__OMP_IMPLICIT_CLAUSE_CLASS(depobj, "depobj", OMPDepobjClause)
-__OMP_IMPLICIT_CLAUSE_CLASS(flush, "flush", OMPFlushClause)
-
-__OMP_IMPLICIT_CLAUSE_NO_CLASS(threadprivate, "threadprivate or thread local")
-__OMP_IMPLICIT_CLAUSE_NO_CLASS(unknown, "unknown")
-
-#undef __OMP_IMPLICIT_CLAUSE_NO_CLASS
-#undef __OMP_IMPLICIT_CLAUSE_CLASS
-#undef __OMP_CLAUSE
-#undef OMP_CLAUSE_NO_CLASS
-#undef OMP_CLAUSE_CLASS
-#undef OMP_CLAUSE
+//===----------------------------------------------------------------------===//
-///}
+/// OpenMP Directives, combined directives and Clauses
+/// - Moved to OMP.td
/// Types used in runtime structs or runtime functions
///
@@ -232,13 +30,16 @@ __OMP_IMPLICIT_CLAUSE_NO_CLASS(unknown, "unknown")
__OMP_TYPE(Void)
__OMP_TYPE(Int1)
__OMP_TYPE(Int8)
+__OMP_TYPE(Int16)
__OMP_TYPE(Int32)
__OMP_TYPE(Int64)
__OMP_TYPE(Int8Ptr)
+__OMP_TYPE(Int16Ptr)
__OMP_TYPE(Int32Ptr)
__OMP_TYPE(Int64Ptr)
OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))
+OMP_TYPE(LanemaskTy, getLanemaskType())
#define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo())
@@ -286,6 +87,7 @@ __OMP_ARRAY_TYPE(KmpCriticalName, Int32, 8)
OMP_STRUCT_TYPE(VarName, "struct." #Name, __VA_ARGS__)
__OMP_STRUCT_TYPE(Ident, ident_t, Int32, Int32, Int32, Int32, Int8Ptr)
+__OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, Int8Ptr)
#undef __OMP_STRUCT_TYPE
#undef OMP_STRUCT_TYPE
@@ -305,6 +107,9 @@ __OMP_FUNCTION_TYPE(KmpcDtor, false, Void, VoidPtr)
__OMP_FUNCTION_TYPE(KmpcCopyCtor, false, VoidPtr, VoidPtr, VoidPtr)
__OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32,
/* kmp_task_t */ VoidPtr)
+__OMP_FUNCTION_TYPE(ShuffleReduce, false, Void, VoidPtr, Int16, Int16, Int16)
+__OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32)
+__OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr)
#undef __OMP_FUNCTION_TYPE
#undef OMP_FUNCTION_TYPE
@@ -315,6 +120,20 @@ __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32,
///
///{
+#ifndef ICV_INIT_VALUE
+#define ICV_INIT_VALUE(Enum, Name)
+#endif
+
+#define __ICV_INIT_VALUE(Name) ICV_INIT_VALUE(ICV_##Name, #Name)
+
+__ICV_INIT_VALUE(ZERO)
+__ICV_INIT_VALUE(FALSE)
+__ICV_INIT_VALUE(IMPLEMENTATION_DEFINED)
+__ICV_INIT_VALUE(LAST)
+
+#undef __ICV_INIT_VALUE
+#undef ICV_INIT_VALUE
+
#ifndef ICV_DATA_ENV
#define ICV_DATA_ENV(Enum, Name, EnvVarName, Init)
#endif
@@ -325,6 +144,7 @@ __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32,
__ICV_DATA_ENV(nthreads, OMP_NUM_THREADS, ICV_IMPLEMENTATION_DEFINED)
__ICV_DATA_ENV(active_levels, NONE, ICV_ZERO)
__ICV_DATA_ENV(cancel, OMP_CANCELLATION, ICV_FALSE)
+__ICV_DATA_ENV(proc_bind, OMP_PROC_BIND, ICV_IMPLEMENTATION_DEFINED)
__ICV_DATA_ENV(__last, last, ICV_LAST)
#undef __ICV_DATA_ENV
@@ -350,6 +170,7 @@ __ICV_RT_SET(nthreads, omp_set_num_threads)
__ICV_RT_GET(nthreads, omp_get_max_threads)
__ICV_RT_GET(active_levels, omp_get_active_level)
__ICV_RT_GET(cancel, omp_get_cancellation)
+__ICV_RT_GET(proc_bind, omp_get_proc_bind)
#undef __ICV_RT_GET
#undef ICV_RT_GET
@@ -380,10 +201,9 @@ __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32)
__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32,
/* Int */ Int32)
__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32)
-__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
-__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32,
- Int8Ptr, Int32, Int8Ptr)
+ /* kmp_task_t */ VoidPtr, Int32,
+ /* kmp_task_affinity_info_t */ VoidPtr)
__OMP_RTL(omp_get_thread_num, false, Int32, )
__OMP_RTL(omp_get_num_threads, false, Int32, )
@@ -430,8 +250,7 @@ __OMP_RTL(__kmpc_reduce, false, Int32, IdentPtr, Int32, Int32, SizeTy, VoidPtr,
ReduceFunctionPtr, KmpCriticalNamePtrTy)
__OMP_RTL(__kmpc_reduce_nowait, false, Int32, IdentPtr, Int32, Int32, SizeTy,
VoidPtr, ReduceFunctionPtr, KmpCriticalNamePtrTy)
-__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32,
- KmpCriticalNamePtrTy)
+__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32, KmpCriticalNamePtrTy)
__OMP_RTL(__kmpc_end_reduce_nowait, false, Void, IdentPtr, Int32,
KmpCriticalNamePtrTy)
@@ -514,10 +333,10 @@ __OMP_RTL(__kmpc_taskloop, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
/* Int */ Int32, Int64, VoidPtr)
__OMP_RTL(__kmpc_omp_target_task_alloc, false, /* kmp_task_t */ VoidPtr,
IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr, Int64)
-__OMP_RTL(__kmpc_taskred_modifier_init, false, VoidPtr, IdentPtr,
- /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr)
-__OMP_RTL(__kmpc_taskred_init, false, VoidPtr, /* Int */ Int32,
- /* Int */ Int32, VoidPtr)
+__OMP_RTL(__kmpc_taskred_modifier_init, false, /* kmp_taskgroup */ VoidPtr,
+ IdentPtr, /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr)
+__OMP_RTL(__kmpc_taskred_init, false, /* kmp_taskgroup */ VoidPtr,
+ /* Int */ Int32, /* Int */ Int32, VoidPtr)
__OMP_RTL(__kmpc_task_reduction_modifier_fini, false, Void, IdentPtr,
/* Int */ Int32, /* Int */ Int32)
__OMP_RTL(__kmpc_task_reduction_get_th_data, false, VoidPtr, Int32, VoidPtr,
@@ -556,45 +375,83 @@ __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr,
__OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32,
/* omp_allocator_handle_t */ VoidPtr)
-__OMP_RTL(__kmpc_push_target_tripcount, false, Void, Int64, Int64)
-__OMP_RTL(__tgt_target, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr,
- VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_nowait, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr,
- VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_teams, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr,
- VoidPtrPtr, Int64Ptr, Int64Ptr, Int32, Int32)
-__OMP_RTL(__tgt_target_teams_nowait, false, Int32, Int64, VoidPtr, Int32,
- VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, Int32, Int32)
+__OMP_RTL(__kmpc_push_target_tripcount_mapper, false, Void, IdentPtr, Int64, Int64)
+__OMP_RTL(__tgt_target_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr,
+ VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32,
+ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_teams_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32,
+ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, Int32, Int32)
+__OMP_RTL(__tgt_target_teams_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32,
+ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, Int32, Int32)
__OMP_RTL(__tgt_register_requires, false, Void, Int64)
-__OMP_RTL(__tgt_target_data_begin, false, Void, Int64, Int32, VoidPtrPtr,
- VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_begin_nowait, false, Void, Int64, Int32, VoidPtrPtr,
- VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_end, false, Void, Int64, Int32, VoidPtrPtr,
- VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_end_nowait, false, Void, Int64, Int32, VoidPtrPtr,
- VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_update, false, Void, Int64, Int32, VoidPtrPtr,
- VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_update_nowait, false, Void, Int64, Int32,
- VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr)
+__OMP_RTL(__tgt_target_data_begin_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr,
+ VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
+ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_begin_mapper_issue, false, Void, IdentPtr, Int64, Int32,
+ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, AsyncInfoPtr)
+__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfoPtr)
+__OMP_RTL(__tgt_target_data_end_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr,
+ VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
+ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_update_mapper, false, Void, IdentPtr, Int64, Int32,
+ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
+ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
__OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr)
__OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr,
- Int64, Int64)
+ Int64, Int64, VoidPtr)
__OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
/* Int */ Int32, /* kmp_task_t */ VoidPtr)
-/// Note that device runtime functions (in the following) do not necessarily
-/// need attributes as we expect to see the definitions.
-__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr)
+/// OpenMP Device runtime functions
+__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16)
+__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16)
+__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16)
+__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
+__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr)
+__OMP_RTL(__kmpc_kernel_end_parallel, false, Void, )
+__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16)
+__OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int32,
+ Int32, SizeTy, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr)
+__OMP_RTL(__kmpc_nvptx_end_reduce_nowait, false, Void, Int32)
+__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32,
+ VoidPtr, Int32, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr,
+ GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr)
+
+__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16)
+__OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, )
+__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, )
+
+__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, Int16)
+__OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16)
+__OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr)
+__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy)
+__OMP_RTL(__kmpc_end_sharing_variables, false, Void, )
+__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr)
+__OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32)
+__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, )
+__OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy,
+ Int16, VoidPtrPtr)
+__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16)
+__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32)
+
+__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,)
+__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy)
__OMP_RTL(__last, false, Void, )
#undef __OMP_RTL
#undef OMP_RTL
+#define ParamAttrs(...) ArrayRef<AttributeSet>({__VA_ARGS__})
#define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind)
+#define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N)
#define AttributeSet(...) \
AttributeSet::get(Ctx, ArrayRef<Attribute>({__VA_ARGS__}))
@@ -607,19 +464,94 @@ __OMP_RTL(__last, false, Void, )
__OMP_ATTRS_SET(GetterAttrs,
OptimisticAttributes
? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly),
- EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly))
+ EnumAttr(NoSync), EnumAttr(NoFree),
+ EnumAttr(InaccessibleMemOnly),
+ EnumAttr(WillReturn))
: AttributeSet(EnumAttr(NoUnwind)))
__OMP_ATTRS_SET(GetterArgWriteAttrs,
OptimisticAttributes
? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
- EnumAttr(NoFree), EnumAttr(InaccessibleMemOrArgMemOnly))
+ EnumAttr(NoFree),
+ EnumAttr(InaccessibleMemOrArgMemOnly),
+ EnumAttr(WillReturn))
: AttributeSet(EnumAttr(NoUnwind)))
__OMP_ATTRS_SET(SetterAttrs,
OptimisticAttributes
? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly),
- EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly))
+ EnumAttr(NoSync), EnumAttr(NoFree),
+ EnumAttr(InaccessibleMemOnly),
+ EnumAttr(WillReturn))
+ : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(DefaultAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+ EnumAttr(WillReturn), EnumAttr(NoFree))
+ : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(BarrierAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))
+ : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)))
+
+__OMP_ATTRS_SET(InaccessibleArgOnlyAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+ EnumAttr(InaccessibleMemOrArgMemOnly),
+ EnumAttr(WillReturn), EnumAttr(NoFree))
: AttributeSet(EnumAttr(NoUnwind)))
+#if 0
+__OMP_ATTRS_SET(InaccessibleOnlyAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+ EnumAttr(InaccessibleMemOnly),
+ EnumAttr(WillReturn), EnumAttr(NoFree))
+ : AttributeSet(EnumAttr(NoUnwind)))
+#endif
+
+__OMP_ATTRS_SET(AllocAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+ EnumAttr(WillReturn))
+ : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(ForkAttrs, OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoUnwind))
+ : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(ReadOnlyPtrAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoFree),
+ EnumAttr(NoCapture))
+ : AttributeSet())
+
+#if 0
+__OMP_ATTRS_SET(WriteOnlyPtrAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoFree),
+ EnumAttr(NoCapture))
+ : AttributeSet())
+#endif
+
+__OMP_ATTRS_SET(ArgPtrAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoCapture), EnumAttr(NoFree))
+ : AttributeSet())
+
+__OMP_ATTRS_SET(ReturnPtrAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoAlias))
+ : AttributeSet())
+
+#if 0
+__OMP_ATTRS_SET(ReturnAlignedPtrAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoAlias), EnumAttrInt(Alignment, 8),
+ EnumAttrInt(DereferenceableOrNull, 8))
+ : AttributeSet())
+#endif
+
#undef __OMP_ATTRS_SET
#undef OMP_ATTRS_SET
@@ -630,295 +562,314 @@ __OMP_ATTRS_SET(SetterAttrs,
#define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets) \
OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets)
-__OMP_RTL_ATTRS(__kmpc_barrier, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_cancel,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_cancel_barrier, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_flush, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_taskwait, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_taskyield,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_push_num_threads,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_push_proc_bind,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_serialized_parallel,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
- ArrayRef<AttributeSet>(
- {AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
- AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))}))
-__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(),
+ ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_flush, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_fork_call, ForkAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_taskwait, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_taskyield, InaccessibleArgOnlyAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_push_num_threads, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_push_proc_bind, InaccessibleArgOnlyAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_reg_task_with_affinity, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs,
+ AttributeSet(), ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(
+ omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
+ AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))))
+__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), ParamAttrs())
__OMP_RTL_ATTRS(omp_get_supported_active_levels, GetterAttrs, AttributeSet(),
- {})
-__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), {})
+ ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(),
+ ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(),
+ ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), ParamAttrs())
__OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(),
- ArrayRef<AttributeSet>({AttributeSet(),
- AttributeSet(EnumAttr(NoCapture),
- EnumAttr(WriteOnly))}))
-__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), {})
-
-__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_master,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_master,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_critical,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_critical_with_hint,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_critical,
- AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_begin, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_reduce, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_reduce, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_ordered, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_ordered, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_fini, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_single, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_single, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_taskgroup, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskgroup, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskloop, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskred_init,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_init,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_cancellationpoint, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_fork_teams, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_push_num_teams, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_copyprivate, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_threadprivate_register, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_doacross_init, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_doacross_post, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_doacross_wait, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_doacross_fini, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_alloc, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_free, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_init_allocator, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_push_target_tripcount,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_nowait,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams_nowait,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_register_requires,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end_nowait,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update_nowait,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_mapper_num_components,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_push_mapper_component,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event,
- AttributeSet(EnumAttr(NoUnwind)),
- AttributeSet(), {})
+ ParamAttrs(AttributeSet(), AttributeSet(EnumAttr(NoCapture),
+ EnumAttr(WriteOnly))))
+__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(),
+ ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(),
+ ParamAttrs())
+
+__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(),
+ ParamAttrs())
+
+__OMP_RTL_ATTRS(__kmpc_master, InaccessibleArgOnlyAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_master, InaccessibleArgOnlyAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_critical, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_critical_with_hint, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_critical, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+
+__OMP_RTL_ATTRS(__kmpc_begin, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_reduce, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ AttributeSet(), ReadOnlyPtrAttrs, AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_reduce_nowait, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ AttributeSet(), ReadOnlyPtrAttrs, AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_reduce, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+
+__OMP_RTL_ATTRS(__kmpc_ordered, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_ordered, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_for_static_init_4, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_init_8, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_4, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_8, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ ArgPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_single, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_single, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, DefaultAttrs, ReturnPtrAttrs,
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_taskgroup, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_taskgroup, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ AttributeSet(), ReadOnlyPtrAttrs, AttributeSet(),
+ ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_taskloop, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ AttributeSet(), ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet(), AttributeSet(),
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc, DefaultAttrs, ReturnPtrAttrs,
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs,
+ AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init, DefaultAttrs, ReturnPtrAttrs,
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_taskred_init, DefaultAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini, BarrierAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data, DefaultAttrs, ReturnPtrAttrs,
+ ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_task_reduction_init, DefaultAttrs, ReturnPtrAttrs,
+ ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init, DefaultAttrs,
+ ReturnPtrAttrs, ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo, DefaultAttrs, AttributeSet(),
+ ParamAttrs())
+
+__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_cancellationpoint, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_fork_teams, ForkAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_push_num_teams, InaccessibleArgOnlyAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_copyprivate, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, DefaultAttrs, ReturnPtrAttrs,
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_threadprivate_register, DefaultAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs,
+ ReadOnlyPtrAttrs, ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_doacross_init, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_doacross_post, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, {})
+__OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {})
+__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_push_target_tripcount_mapper, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_register_requires, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs,
+ AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs,
+ AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs,
+ AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
+ ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs))
#undef __OMP_RTL_ATTRS
#undef OMP_RTL_ATTRS
#undef AttributeSet
#undef EnumAttr
+#undef EnumAttrInt
+#undef ParamAttrs
///}
@@ -1066,10 +1017,6 @@ __OMP_TRAIT_PROPERTY(device, kind, gpu)
__OMP_TRAIT_PROPERTY(device, kind, fpga)
__OMP_TRAIT_PROPERTY(device, kind, any)
-__OMP_TRAIT_SELECTOR(device, isa, true)
-
-// TODO: What do we want for ISA?
-
__OMP_TRAIT_SELECTOR(device, arch, true)
__OMP_TRAIT_PROPERTY(device, arch, arm)
@@ -1078,6 +1025,7 @@ __OMP_TRAIT_PROPERTY(device, arch, aarch64)
__OMP_TRAIT_PROPERTY(device, arch, aarch64_be)
__OMP_TRAIT_PROPERTY(device, arch, aarch64_32)
__OMP_TRAIT_PROPERTY(device, arch, ppc)
+__OMP_TRAIT_PROPERTY(device, arch, ppcle)
__OMP_TRAIT_PROPERTY(device, arch, ppc64)
__OMP_TRAIT_PROPERTY(device, arch, ppc64le)
__OMP_TRAIT_PROPERTY(device, arch, x86)
@@ -1107,6 +1055,8 @@ __OMP_TRAIT_SELECTOR(implementation, extension, true)
__OMP_TRAIT_PROPERTY(implementation, extension, match_all)
__OMP_TRAIT_PROPERTY(implementation, extension, match_any)
__OMP_TRAIT_PROPERTY(implementation, extension, match_none)
+__OMP_TRAIT_PROPERTY(implementation, extension, disable_implicit_base)
+__OMP_TRAIT_PROPERTY(implementation, extension, allow_templates)
__OMP_TRAIT_SET(user)
@@ -1116,6 +1066,18 @@ __OMP_TRAIT_PROPERTY(user, condition, true)
__OMP_TRAIT_PROPERTY(user, condition, false)
__OMP_TRAIT_PROPERTY(user, condition, unknown)
+
+// Note that we put isa last so that the other conditions are checked first.
+// This allows us to issue warnings wrt. isa only if we match otherwise.
+__OMP_TRAIT_SELECTOR(device, isa, true)
+
+// We use "__ANY" as a placeholder in the isa property to denote the
+// conceptual "any", not the literal `any` used in kind. The string we
+// we use is not important except that it will show up in diagnostics.
+OMP_TRAIT_PROPERTY(device_isa___ANY, device, device_isa,
+ "<any, entirely target dependent>")
+
+
#undef OMP_TRAIT_SET
#undef __OMP_TRAIT_SET
///}
@@ -1153,3 +1115,27 @@ OMP_LAST_TRAIT_PROPERTY(
#undef __OMP_REQUIRES_TRAIT
#undef OMP_REQUIRES_TRAIT
///}
+
+
+/// Assumption clauses
+///
+///{
+
+#ifdef OMP_ASSUME_CLAUSE
+#define __OMP_ASSUME_CLAUSE(Identifier, StartsWith, HasDirectiveList, HasExpression) \
+OMP_ASSUME_CLAUSE(Identifier, StartsWith, HasDirectiveList, HasExpression)
+#else
+#define __OMP_ASSUME_CLAUSE(...)
+#endif
+
+__OMP_ASSUME_CLAUSE(llvm::StringLiteral("ext_"), true, false, false)
+__OMP_ASSUME_CLAUSE(llvm::StringLiteral("absent"), false, true, false)
+__OMP_ASSUME_CLAUSE(llvm::StringLiteral("contains"), false, true, false)
+__OMP_ASSUME_CLAUSE(llvm::StringLiteral("holds"), false, false, true)
+__OMP_ASSUME_CLAUSE(llvm::StringLiteral("no_openmp"), false, false, false)
+__OMP_ASSUME_CLAUSE(llvm::StringLiteral("no_openmp_routines"), false, false, false)
+__OMP_ASSUME_CLAUSE(llvm::StringLiteral("no_parallelism"), false, false, false)
+
+#undef __OMP_ASSUME_CLAUSE
+#undef OMP_ASSUME_CLAUSE
+///}
diff --git a/contrib/llvm-project/llvm/include/llvm/FuzzMutate/IRMutator.h b/contrib/llvm-project/llvm/include/llvm/FuzzMutate/IRMutator.h
index 40a1ce8aeec9..423582eace9b 100644
--- a/contrib/llvm-project/llvm/include/llvm/FuzzMutate/IRMutator.h
+++ b/contrib/llvm-project/llvm/include/llvm/FuzzMutate/IRMutator.h
@@ -102,6 +102,17 @@ public:
void mutate(Instruction &Inst, RandomIRBuilder &IB) override;
};
+class InstModificationIRStrategy : public IRMutationStrategy {
+public:
+ uint64_t getWeight(size_t CurrentSize, size_t MaxSize,
+ uint64_t CurrentWeight) override {
+ return 4;
+ }
+
+ using IRMutationStrategy::mutate;
+ void mutate(Instruction &Inst, RandomIRBuilder &IB) override;
+};
+
} // end llvm namespace
#endif // LLVM_FUZZMUTATE_IRMUTATOR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Argument.h b/contrib/llvm-project/llvm/include/llvm/IR/Argument.h
index af469e8a5d1a..76d780485ea0 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Argument.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Argument.h
@@ -52,7 +52,9 @@ public:
/// Return true if this argument has the nonnull attribute. Also returns true
/// if at least one byte is known to be dereferenceable and the pointer is in
/// addrspace(0).
- bool hasNonNullAttr() const;
+ /// If AllowUndefOrPoison is true, respect the semantics of nonnull attribute
+ /// and return true even if the argument can be undef or poison.
+ bool hasNonNullAttr(bool AllowUndefOrPoison = true) const;
/// If this argument has the dereferenceable attribute, return the number of
/// bytes known to be dereferenceable. Otherwise, zero is returned.
@@ -65,6 +67,9 @@ public:
/// Return true if this argument has the byval attribute.
bool hasByValAttr() const;
+ /// Return true if this argument has the byref attribute.
+ bool hasByRefAttr() const;
+
/// Return true if this argument has the swiftself attribute.
bool hasSwiftSelfAttr() const;
@@ -72,13 +77,23 @@ public:
bool hasSwiftErrorAttr() const;
/// Return true if this argument has the byval, inalloca, or preallocated
- /// attribute. These attributes represent arguments being passed by value.
- bool hasPassPointeeByValueAttr() const;
+ /// attribute. These attributes represent arguments being passed by value,
+ /// with an associated copy between the caller and callee
+ bool hasPassPointeeByValueCopyAttr() const;
/// If this argument satisfies has hasPassPointeeByValueAttr, return the
/// in-memory ABI size copied to the stack for the call. Otherwise, return 0.
uint64_t getPassPointeeByValueCopySize(const DataLayout &DL) const;
+ /// Return true if this argument has the byval, sret, inalloca, preallocated,
+ /// or byref attribute. These attributes represent arguments being passed by
+ /// value (which may or may not involve a stack copy)
+ bool hasPointeeInMemoryValueAttr() const;
+
+ /// If hasPointeeInMemoryValueAttr returns true, the in-memory ABI type is
+ /// returned. Otherwise, nullptr.
+ Type *getPointeeInMemoryValueType() const;
+
/// If this is a byval or inalloca argument, return its alignment.
/// FIXME: Remove this function once transition to Align is over.
/// Use getParamAlign() instead.
@@ -90,6 +105,12 @@ public:
/// If this is a byval argument, return its type.
Type *getParamByValType() const;
+ /// If this is an sret argument, return its type.
+ Type *getParamStructRetType() const;
+
+ /// If this is a byref argument, return its type.
+ Type *getParamByRefType() const;
+
/// Return true if this argument has the nest attribute.
bool hasNestAttr() const;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Assumptions.h b/contrib/llvm-project/llvm/include/llvm/IR/Assumptions.h
new file mode 100644
index 000000000000..f64616c25d87
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Assumptions.h
@@ -0,0 +1,50 @@
+//===--- Assumptions.h - Assumption handling and organization ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// String assumptions that are known to optimization passes should be placed in
+// the KnownAssumptionStrings set. This can be done in various ways, i.a.,
+// via a static KnownAssumptionString object.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_ASSUMPTIONS_H
+#define LLVM_IR_ASSUMPTIONS_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+
+namespace llvm {
+
+class Function;
+
+/// The key we use for assumption attributes.
+constexpr StringRef AssumptionAttrKey = "llvm.assume";
+
+/// A set of known assumption strings that are accepted without warning and
+/// which can be recommended as typo correction.
+extern StringSet<> KnownAssumptionStrings;
+
+/// Helper that allows to insert a new assumption string in the known assumption
+/// set by creating a (static) object.
+struct KnownAssumptionString {
+ KnownAssumptionString(StringRef AssumptionStr)
+ : AssumptionStr(AssumptionStr) {
+ KnownAssumptionStrings.insert(AssumptionStr);
+ }
+ operator StringRef() const { return AssumptionStr; }
+
+private:
+ StringRef AssumptionStr;
+};
+
+/// Return true if \p F has the assumption \p AssumptionStr attached.
+bool hasAssumption(Function &F, const KnownAssumptionString &AssumptionStr);
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.h b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.h
index 58365aa2b764..b4056540663f 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.h
@@ -108,8 +108,17 @@ public:
unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg);
static Attribute getWithByValType(LLVMContext &Context, Type *Ty);
+ static Attribute getWithStructRetType(LLVMContext &Context, Type *Ty);
+ static Attribute getWithByRefType(LLVMContext &Context, Type *Ty);
static Attribute getWithPreallocatedType(LLVMContext &Context, Type *Ty);
+ /// For a typed attribute, return the equivalent attribute with the type
+ /// changed to \p ReplacementTy.
+ Attribute getWithNewType(LLVMContext &Context, Type *ReplacementTy) {
+ assert(isTypeAttribute() && "this requires a typed attribute");
+ return get(Context, getKindAsEnum(), ReplacementTy);
+ }
+
static Attribute::AttrKind getAttrKindFromName(StringRef AttrName);
static StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind);
@@ -138,6 +147,9 @@ public:
/// Return true if the attribute is a type attribute.
bool isTypeAttribute() const;
+ /// Return true if the attribute is any kind of attribute.
+ bool isValid() const { return pImpl; }
+
/// Return true if the attribute is present.
bool hasAttribute(AttrKind Val) const;
@@ -303,6 +315,8 @@ public:
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
Type *getByValType() const;
+ Type *getStructRetType() const;
+ Type *getByRefType() const;
Type *getPreallocatedType() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
std::string getAsString(bool InAttrGrp = false) const;
@@ -385,6 +399,9 @@ private:
static AttributeList getImpl(LLVMContext &C, ArrayRef<AttributeSet> AttrSets);
+ AttributeList setAttributes(LLVMContext &C, unsigned Index,
+ AttributeSet Attrs) const;
+
public:
AttributeList() = default;
@@ -503,6 +520,17 @@ public:
return removeAttributes(C, ArgNo + FirstArgIndex);
}
+ /// Replace the type contained by attribute \p AttrKind at index \p ArgNo wih
+ /// \p ReplacementTy, preserving all other attributes.
+ LLVM_NODISCARD AttributeList replaceAttributeType(LLVMContext &C,
+ unsigned ArgNo,
+ Attribute::AttrKind Kind,
+ Type *ReplacementTy) const {
+ Attribute Attr = getAttribute(ArgNo, Kind);
+ auto Attrs = removeAttribute(C, ArgNo, Kind);
+ return Attrs.addAttribute(C, ArgNo, Attr.getWithNewType(C, ReplacementTy));
+ }
+
/// \brief Add the dereferenceable attribute to the attribute set at the given
/// index. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C,
@@ -626,6 +654,12 @@ public:
/// Return the byval type for the specified function parameter.
Type *getParamByValType(unsigned ArgNo) const;
+ /// Return the sret type for the specified function parameter.
+ Type *getParamStructRetType(unsigned ArgNo) const;
+
+ /// Return the byref type for the specified function parameter.
+ Type *getParamByRefType(unsigned ArgNo) const;
+
/// Return the preallocated type for the specified function parameter.
Type *getParamPreallocatedType(unsigned ArgNo) const;
@@ -729,6 +763,8 @@ class AttrBuilder {
uint64_t DerefOrNullBytes = 0;
uint64_t AllocSizeArgs = 0;
Type *ByValType = nullptr;
+ Type *StructRetType = nullptr;
+ Type *ByRefType = nullptr;
Type *PreallocatedType = nullptr;
public:
@@ -744,7 +780,14 @@ public:
void clear();
/// Add an attribute to the builder.
- AttrBuilder &addAttribute(Attribute::AttrKind Val);
+ AttrBuilder &addAttribute(Attribute::AttrKind Val) {
+ assert((unsigned)Val < Attribute::EndAttrKinds &&
+ "Attribute out of range!");
+ assert(!Attribute::doesAttrKindHaveArgument(Val) &&
+ "Adding integer attribute without adding a value!");
+ Attrs[Val] = true;
+ return *this;
+ }
/// Add the Attribute object to the builder.
AttrBuilder &addAttribute(Attribute A);
@@ -808,6 +851,12 @@ public:
/// Retrieve the byval type.
Type *getByValType() const { return ByValType; }
+ /// Retrieve the sret type.
+ Type *getStructRetType() const { return StructRetType; }
+
+ /// Retrieve the byref type.
+ Type *getByRefType() const { return ByRefType; }
+
/// Retrieve the preallocated type.
Type *getPreallocatedType() const { return PreallocatedType; }
@@ -854,6 +903,12 @@ public:
/// This turns a byval type into the form used internally in Attribute.
AttrBuilder &addByValAttr(Type *Ty);
+ /// This turns a sret type into the form used internally in Attribute.
+ AttrBuilder &addStructRetAttr(Type *Ty);
+
+ /// This turns a byref type into the form used internally in Attribute.
+ AttrBuilder &addByRefAttr(Type *Ty);
+
/// This turns a preallocated type into the form used internally in Attribute.
AttrBuilder &addPreallocatedAttr(Type *Ty);
@@ -886,10 +941,8 @@ public:
bool td_empty() const { return TargetDepAttrs.empty(); }
- bool operator==(const AttrBuilder &B);
- bool operator!=(const AttrBuilder &B) {
- return !(*this == B);
- }
+ bool operator==(const AttrBuilder &B) const;
+ bool operator!=(const AttrBuilder &B) const { return !(*this == B); }
};
namespace AttributeFuncs {
@@ -901,9 +954,24 @@ AttrBuilder typeIncompatible(Type *Ty);
/// attributes for inlining purposes.
bool areInlineCompatible(const Function &Caller, const Function &Callee);
+
+/// Checks if there are any incompatible function attributes between
+/// \p A and \p B.
+///
+/// \param [in] A - The first function to be compared with.
+/// \param [in] B - The second function to be compared with.
+/// \returns true if the functions have compatible attributes.
+bool areOutlineCompatible(const Function &A, const Function &B);
+
/// Merge caller's and callee's attributes.
void mergeAttributesForInlining(Function &Caller, const Function &Callee);
+/// Merges the functions attributes from \p ToMerge into function \p Base.
+///
+/// \param [in,out] Base - The function being merged into.
+/// \param [in] ToMerge - The function to merge attributes from.
+void mergeAttributesForOutlining(Function &Base, const Function &ToMerge);
+
} // end namespace AttributeFuncs
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td
index 395f9dbfb176..f7ffc888c65a 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td
@@ -1,3 +1,15 @@
+//===- Attributes.td - Defines all LLVM attributes ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all the LLVM attributes.
+//
+//===----------------------------------------------------------------------===//
+
/// Attribute base class.
class Attr<string S> {
// String representation of this attribute in the IR.
@@ -39,6 +51,9 @@ def Builtin : EnumAttr<"builtin">;
/// Pass structure by value.
def ByVal : TypeAttr<"byval">;
+/// Mark in-memory ABI type.
+def ByRef : TypeAttr<"byref">;
+
/// Parameter or return value may not contain uninitialized or poison bits.
def NoUndef : EnumAttr<"noundef">;
@@ -48,6 +63,9 @@ def Cold : EnumAttr<"cold">;
/// Can only be moved to control-equivalent blocks.
def Convergent : EnumAttr<"convergent">;
+/// Marks function as being in a hot path and frequently called.
+def Hot: EnumAttr<"hot">;
+
/// Pointer is known to be dereferenceable.
def Dereferenceable : IntAttr<"dereferenceable">;
@@ -88,6 +106,9 @@ def NoAlias : EnumAttr<"noalias">;
/// Callee isn't recognized as a builtin.
def NoBuiltin : EnumAttr<"nobuiltin">;
+/// Function cannot enter into caller's translation unit.
+def NoCallback : EnumAttr<"nocallback">;
+
/// Function creates no aliases of pointer.
def NoCapture : EnumAttr<"nocapture">;
@@ -106,7 +127,7 @@ def NoInline : EnumAttr<"noinline">;
/// Function is called early and/or often, so lazy binding isn't worthwhile.
def NonLazyBind : EnumAttr<"nonlazybind">;
-/// Disable merging for call sites
+/// Disable merging for specified functions or call sites.
def NoMerge : EnumAttr<"nomerge">;
/// Pointer is known to be not null.
@@ -127,6 +148,9 @@ def NoSync : EnumAttr<"nosync">;
/// Disable Indirect Branch Tracking.
def NoCfCheck : EnumAttr<"nocf_check">;
+/// Function should be instrumented.
+def NoProfile : EnumAttr<"noprofile">;
+
/// Function doesn't unwind stack.
def NoUnwind : EnumAttr<"nounwind">;
@@ -189,7 +213,7 @@ def StackProtectStrong : EnumAttr<"sspstrong">;
def StrictFP : EnumAttr<"strictfp">;
/// Hidden pointer to structure to return.
-def StructRet : EnumAttr<"sret">;
+def StructRet : TypeAttr<"sret">;
/// AddressSanitizer is on.
def SanitizeAddress : EnumAttr<"sanitize_address">;
@@ -232,6 +256,9 @@ def WriteOnly : EnumAttr<"writeonly">;
/// Zero extended before/after call.
def ZExt : EnumAttr<"zeroext">;
+/// Function is required to make Forward Progress.
+def MustProgress : TypeAttr<"mustprogress">;
+
/// Target-independent string attributes.
def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">;
def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">;
@@ -285,3 +312,4 @@ def : MergeRule<"adjustCallerStackProbes">;
def : MergeRule<"adjustCallerStackProbeSize">;
def : MergeRule<"adjustMinLegalVectorWidth">;
def : MergeRule<"adjustNullPointerValidAttr">;
+def : MergeRule<"setAND<MustProgressAttr>">;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/BasicBlock.h b/contrib/llvm-project/llvm/include/llvm/IR/BasicBlock.h
index 24d568a728c6..b86bb16e1239 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/BasicBlock.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/BasicBlock.h
@@ -165,19 +165,24 @@ public:
}
/// Returns a pointer to the first instruction in this block that is not a
- /// PHINode or a debug intrinsic.
- const Instruction* getFirstNonPHIOrDbg() const;
- Instruction* getFirstNonPHIOrDbg() {
+ /// PHINode or a debug intrinsic, or any pseudo operation if \c SkipPseudoOp
+ /// is true.
+ const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) const;
+ Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) {
return const_cast<Instruction *>(
- static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbg());
+ static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbg(
+ SkipPseudoOp));
}
/// Returns a pointer to the first instruction in this block that is not a
- /// PHINode, a debug intrinsic, or a lifetime intrinsic.
- const Instruction* getFirstNonPHIOrDbgOrLifetime() const;
- Instruction* getFirstNonPHIOrDbgOrLifetime() {
+ /// PHINode, a debug intrinsic, or a lifetime intrinsic, or any pseudo
+ /// operation if \c SkipPseudoOp is true.
+ const Instruction *
+ getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) const;
+ Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) {
return const_cast<Instruction *>(
- static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbgOrLifetime());
+ static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbgOrLifetime(
+ SkipPseudoOp));
}
/// Returns an iterator to the first instruction in this block that is
@@ -191,16 +196,18 @@ public:
}
/// Return a const iterator range over the instructions in the block, skipping
- /// any debug instructions.
+ /// any debug instructions. Skip any pseudo operations as well if \c
+ /// SkipPseudoOp is true.
iterator_range<filter_iterator<BasicBlock::const_iterator,
std::function<bool(const Instruction &)>>>
- instructionsWithoutDebug() const;
+ instructionsWithoutDebug(bool SkipPseudoOp = false) const;
/// Return an iterator range over the instructions in the block, skipping any
- /// debug instructions.
- iterator_range<filter_iterator<BasicBlock::iterator,
- std::function<bool(Instruction &)>>>
- instructionsWithoutDebug();
+ /// debug instructions. Skip and any pseudo operations as well if \c
+ /// SkipPseudoOp is true.
+ iterator_range<
+ filter_iterator<BasicBlock::iterator, std::function<bool(Instruction &)>>>
+ instructionsWithoutDebug(bool SkipPseudoOp = false);
/// Return the size of the basic block ignoring debug instructions
filter_iterator<BasicBlock::const_iterator,
@@ -320,7 +327,9 @@ public:
phi_iterator_impl() = default;
// Allow conversion between instantiations where valid.
- template <typename PHINodeU, typename BBIteratorU>
+ template <typename PHINodeU, typename BBIteratorU,
+ typename = std::enable_if_t<
+ std::is_convertible<PHINodeU *, PHINodeT *>::value>>
phi_iterator_impl(const phi_iterator_impl<PHINodeU, BBIteratorU> &Arg)
: PN(Arg.PN) {}
@@ -389,22 +398,49 @@ public:
/// Split the basic block into two basic blocks at the specified instruction.
///
- /// Note that all instructions BEFORE the specified iterator stay as part of
- /// the original basic block, an unconditional branch is added to the original
- /// BB, and the rest of the instructions in the BB are moved to the new BB,
- /// including the old terminator. The newly formed BasicBlock is returned.
- /// This function invalidates the specified iterator.
+ /// If \p Before is true, splitBasicBlockBefore handles the
+ /// block splitting. Otherwise, execution proceeds as described below.
+ ///
+ /// Note that all instructions BEFORE the specified iterator
+ /// stay as part of the original basic block, an unconditional branch is added
+ /// to the original BB, and the rest of the instructions in the BB are moved
+ /// to the new BB, including the old terminator. The newly formed basic block
+ /// is returned. This function invalidates the specified iterator.
///
/// Note that this only works on well formed basic blocks (must have a
- /// terminator), and 'I' must not be the end of instruction list (which would
- /// cause a degenerate basic block to be formed, having a terminator inside of
- /// the basic block).
+ /// terminator), and \p 'I' must not be the end of instruction list (which
+ /// would cause a degenerate basic block to be formed, having a terminator
+ /// inside of the basic block).
///
/// Also note that this doesn't preserve any passes. To split blocks while
/// keeping loop information consistent, use the SplitBlock utility function.
- BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "");
- BasicBlock *splitBasicBlock(Instruction *I, const Twine &BBName = "") {
- return splitBasicBlock(I->getIterator(), BBName);
+ BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "",
+ bool Before = false);
+ BasicBlock *splitBasicBlock(Instruction *I, const Twine &BBName = "",
+ bool Before = false) {
+ return splitBasicBlock(I->getIterator(), BBName, Before);
+ }
+
+ /// Split the basic block into two basic blocks at the specified instruction
+ /// and insert the new basic blocks as the predecessor of the current block.
+ ///
+ /// This function ensures all instructions AFTER and including the specified
+ /// iterator \p I are part of the original basic block. All Instructions
+ /// BEFORE the iterator \p I are moved to the new BB and an unconditional
+ /// branch is added to the new BB. The new basic block is returned.
+ ///
+ /// Note that this only works on well formed basic blocks (must have a
+ /// terminator), and \p 'I' must not be the end of instruction list (which
+ /// would cause a degenerate basic block to be formed, having a terminator
+ /// inside of the basic block). \p 'I' cannot be a iterator for a PHINode
+ /// with multiple incoming blocks.
+ ///
+ /// Also note that this doesn't preserve any passes. To split blocks while
+ /// keeping loop information consistent, use the SplitBlockBefore utility
+ /// function.
+ BasicBlock *splitBasicBlockBefore(iterator I, const Twine &BBName = "");
+ BasicBlock *splitBasicBlockBefore(Instruction *I, const Twine &BBName = "") {
+ return splitBasicBlockBefore(I->getIterator(), BBName);
}
/// Returns true if there are any uses of this basic block other than
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h b/contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h
index d0906de3ea4e..6a4e368b2e9d 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h
@@ -241,6 +241,9 @@ namespace CallingConv {
/// The remainder matches the regular calling convention.
WASM_EmscriptenInvoke = 99,
+ /// Calling convention used for AMD graphics targets.
+ AMDGPU_Gfx = 100,
+
/// The highest possible calling convention ID. Must be some 2^k - 1.
MaxID = 1023
};
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Constant.h b/contrib/llvm-project/llvm/include/llvm/IR/Constant.h
index 9a1d2b80c48e..71692c746015 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Constant.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Constant.h
@@ -78,11 +78,13 @@ public:
bool isMinSignedValue() const;
/// Return true if this is a finite and non-zero floating-point scalar
- /// constant or a vector constant with all finite and non-zero elements.
+ /// constant or a fixed width vector constant with all finite and non-zero
+ /// elements.
bool isFiniteNonZeroFP() const;
- /// Return true if this is a normal (as opposed to denormal) floating-point
- /// scalar constant or a vector constant with all normal elements.
+ /// Return true if this is a normal (as opposed to denormal, infinity, nan,
+ /// or zero) floating-point scalar constant or a vector constant with all
+ /// normal elements. See APFloat::isNormal.
bool isNormalFP() const;
/// Return true if this scalar has an exact multiplicative inverse or this
@@ -99,12 +101,18 @@ public:
/// lane, the constants still match.
bool isElementWiseEqual(Value *Y) const;
- /// Return true if this is a vector constant that includes any undefined
+ /// Return true if this is a vector constant that includes any undef or
+ /// poison elements. Since it is impossible to inspect a scalable vector
+ /// element- wise at compile time, this function returns true only if the
+ /// entire vector is undef or poison.
+ bool containsUndefOrPoisonElement() const;
+
+ /// Return true if this is a vector constant that includes any poison
/// elements.
- bool containsUndefElement() const;
+ bool containsPoisonElement() const;
- /// Return true if this is a vector constant that includes any constant
- /// expressions.
+ /// Return true if this is a fixed width vector constant that includes
+ /// any constant expressions.
bool containsConstantExpression() const;
/// Return true if evaluation of this constant could trap. This is true for
@@ -200,6 +208,16 @@ public:
/// Try to replace undefined constant C or undefined elements in C with
/// Replacement. If no changes are made, the constant C is returned.
static Constant *replaceUndefsWith(Constant *C, Constant *Replacement);
+
+ /// Merges undefs of a Constant with another Constant, along with the
+ /// undefs already present. Other doesn't have to be the same type as C, but
+ /// both must either be scalars or vectors with the same element count. If no
+ /// changes are made, the constant C is returned.
+ static Constant *mergeUndefsWith(Constant *C, Constant *Other);
+
+ /// Return true if a constant is ConstantData or a ConstantAggregate or
+ /// ConstantExpr that contain only ConstantData.
+ bool isManifestConstant() const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h b/contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h
index 8ecb9aa0ce02..20e8e67436a4 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h
@@ -150,6 +150,14 @@ public:
const APInt &Other,
unsigned NoWrapKind);
+ /// Returns true if ConstantRange calculations are supported for intrinsic
+ /// with \p IntrinsicID.
+ static bool isIntrinsicSupported(Intrinsic::ID IntrinsicID);
+
+ /// Compute range of intrinsic result for the given operand ranges.
+ static ConstantRange intrinsic(Intrinsic::ID IntrinsicID,
+ ArrayRef<ConstantRange> Ops);
+
/// Set up \p Pred and \p RHS such that
/// ConstantRange::makeExactICmpRegion(Pred, RHS) == *this. Return true if
/// successful.
@@ -253,6 +261,14 @@ public:
return !operator==(CR);
}
+ /// Compute the maximal number of active bits needed to represent every value
+ /// in this range.
+ unsigned getActiveBits() const;
+
+ /// Compute the maximal number of bits needed to represent every value
+ /// in this signed range.
+ unsigned getMinSignedBits() const;
+
/// Subtract the specified constant from the endpoints of this constant range.
ConstantRange subtract(const APInt &CI) const;
@@ -401,6 +417,11 @@ public:
/// value in \p Other.
ConstantRange srem(const ConstantRange &Other) const;
+ /// Return a new range representing the possible values resulting from
+ /// a binary-xor of a value in this range by an all-one value,
+ /// aka bitwise complement operation.
+ ConstantRange binaryNot() const;
+
/// Return a new range representing the possible values resulting
/// from a binary-and of a value in this range by a value in \p Other.
ConstantRange binaryAnd(const ConstantRange &Other) const;
@@ -456,8 +477,9 @@ public:
ConstantRange inverse() const;
/// Calculate absolute value range. If the original range contains signed
- /// min, then the resulting range will also contain signed min.
- ConstantRange abs() const;
+ /// min, then the resulting range will contain signed min if and only if
+ /// \p IntMinIsPoison is false.
+ ConstantRange abs(bool IntMinIsPoison = false) const;
/// Represents whether an operation on the given constant range is known to
/// always or never overflow.
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Constants.h b/contrib/llvm-project/llvm/include/llvm/IR/Constants.h
index 8e2dba9b2417..ac802232c23d 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Constants.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Constants.h
@@ -88,8 +88,10 @@ public:
static ConstantInt *getTrue(LLVMContext &Context);
static ConstantInt *getFalse(LLVMContext &Context);
+ static ConstantInt *getBool(LLVMContext &Context, bool V);
static Constant *getTrue(Type *Ty);
static Constant *getFalse(Type *Ty);
+ static Constant *getBool(Type *Ty, bool V);
/// If Ty is a vector type, return a Constant with a splat of the given
/// value. Otherwise return a ConstantInt for the given value.
@@ -592,14 +594,13 @@ class ConstantDataSequential : public ConstantData {
/// the same value but different type. For example, 0,0,0,1 could be a 4
/// element array of i8, or a 1-element array of i32. They'll both end up in
/// the same StringMap bucket, linked up.
- ConstantDataSequential *Next;
+ std::unique_ptr<ConstantDataSequential> Next;
void destroyConstantImpl();
protected:
explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data)
- : ConstantData(ty, VT), DataElements(Data), Next(nullptr) {}
- ~ConstantDataSequential() { delete Next; }
+ : ConstantData(ty, VT), DataElements(Data) {}
static Constant *getImpl(StringRef Bytes, Type *Ty);
@@ -889,6 +890,42 @@ struct OperandTraits<BlockAddress> :
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value)
+/// Wrapper for a function that represents a value that
+/// functionally represents the original function. This can be a function,
+/// global alias to a function, or an ifunc.
+class DSOLocalEquivalent final : public Constant {
+ friend class Constant;
+
+ DSOLocalEquivalent(GlobalValue *GV);
+
+ void *operator new(size_t s) { return User::operator new(s, 1); }
+
+ void destroyConstantImpl();
+ Value *handleOperandChangeImpl(Value *From, Value *To);
+
+public:
+ /// Return a DSOLocalEquivalent for the specified global value.
+ static DSOLocalEquivalent *get(GlobalValue *GV);
+
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ GlobalValue *getGlobalValue() const {
+ return cast<GlobalValue>(Op<0>().get());
+ }
+
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const Value *V) {
+ return V->getValueID() == DSOLocalEquivalentVal;
+ }
+};
+
+template <>
+struct OperandTraits<DSOLocalEquivalent>
+ : public FixedNumOperandTraits<DSOLocalEquivalent, 1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(DSOLocalEquivalent, Value)
+
//===----------------------------------------------------------------------===//
/// A constant value that is initialized with an expression using
/// other constant values.
@@ -959,6 +996,7 @@ public:
static Constant *getAnd(Constant *C1, Constant *C2);
static Constant *getOr(Constant *C1, Constant *C2);
static Constant *getXor(Constant *C1, Constant *C2);
+ static Constant *getUMin(Constant *C1, Constant *C2);
static Constant *getShl(Constant *C1, Constant *C2,
bool HasNUW = false, bool HasNSW = false);
static Constant *getLShr(Constant *C1, Constant *C2, bool isExact = false);
@@ -1034,6 +1072,12 @@ public:
return getLShr(C1, C2, true);
}
+ /// If C is a scalar/fixed width vector of known powers of 2, then this
+ /// function returns a new scalar/fixed width vector obtained from logBase2
+ /// of C. Undef vector elements are set to zero.
+ /// Return a null pointer otherwise.
+ static Constant *getExactLogBase2(Constant *C);
+
/// Return the identity constant for a binary opcode.
/// The identity constant C is defined as X op C = X and C op X = X for every
/// X when the binary operation is commutative. If the binop is not
@@ -1306,13 +1350,16 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantExpr, Constant)
/// can appear to have different bit patterns at each use. See
/// LangRef.html#undefvalues for details.
///
-class UndefValue final : public ConstantData {
+class UndefValue : public ConstantData {
friend class Constant;
explicit UndefValue(Type *T) : ConstantData(T, UndefValueVal) {}
void destroyConstantImpl();
+protected:
+ explicit UndefValue(Type *T, ValueTy vty) : ConstantData(T, vty) {}
+
public:
UndefValue(const UndefValue &) = delete;
@@ -1339,7 +1386,49 @@ public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
- return V->getValueID() == UndefValueVal;
+ return V->getValueID() == UndefValueVal ||
+ V->getValueID() == PoisonValueVal;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// In order to facilitate speculative execution, many instructions do not
+/// invoke immediate undefined behavior when provided with illegal operands,
+/// and return a poison value instead.
+///
+/// see LangRef.html#poisonvalues for details.
+///
+class PoisonValue final : public UndefValue {
+ friend class Constant;
+
+ explicit PoisonValue(Type *T) : UndefValue(T, PoisonValueVal) {}
+
+ void destroyConstantImpl();
+
+public:
+ PoisonValue(const PoisonValue &) = delete;
+
+ /// Static factory methods - Return an 'poison' object of the specified type.
+ static PoisonValue *get(Type *T);
+
+ /// If this poison has array or vector type, return a poison with the right
+ /// element type.
+ PoisonValue *getSequentialElement() const;
+
+ /// If this poison has struct type, return a poison with the right element
+ /// type for the specified element.
+ PoisonValue *getStructElement(unsigned Elt) const;
+
+ /// Return an poison of the right value for the specified GEP index if we can,
+ /// otherwise return null (e.g. if C is a ConstantExpr).
+ PoisonValue *getElementValue(Constant *C) const;
+
+ /// Return an poison of the right value for the specified GEP index.
+ PoisonValue *getElementValue(unsigned Idx) const;
+
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const Value *V) {
+ return V->getValueID() == PoisonValueVal;
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h b/contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h
index d1c7d126b5a9..e0238567f251 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h
@@ -199,6 +199,12 @@ namespace llvm {
unsigned Encoding,
DINode::DIFlags Flags = DINode::FlagZero);
+ /// Create debugging information entry for a string
+ /// type.
+ /// \param Name Type name.
+ /// \param SizeInBits Size of the type.
+ DIStringType *createStringType(StringRef Name, uint64_t SizeInBits);
+
/// Create debugging information entry for a qualified
/// type, e.g. 'const int'.
/// \param Tag Tag identifing type, e.g. dwarf::TAG_volatile_type
@@ -488,8 +494,24 @@ namespace llvm {
/// \param AlignInBits Alignment.
/// \param Ty Element type.
/// \param Subscripts Subscripts.
- DICompositeType *createArrayType(uint64_t Size, uint32_t AlignInBits,
- DIType *Ty, DINodeArray Subscripts);
+ /// \param DataLocation The location of the raw data of a descriptor-based
+ /// Fortran array, either a DIExpression* or
+ /// a DIVariable*.
+ /// \param Associated The associated attribute of a descriptor-based
+ /// Fortran array, either a DIExpression* or
+ /// a DIVariable*.
+ /// \param Allocated The allocated attribute of a descriptor-based
+ /// Fortran array, either a DIExpression* or
+ /// a DIVariable*.
+ /// \param Rank The rank attribute of a descriptor-based
+ /// Fortran array, either a DIExpression* or
+ /// a DIVariable*.
+ DICompositeType *createArrayType(
+ uint64_t Size, uint32_t AlignInBits, DIType *Ty, DINodeArray Subscripts,
+ PointerUnion<DIExpression *, DIVariable *> DataLocation = nullptr,
+ PointerUnion<DIExpression *, DIVariable *> Associated = nullptr,
+ PointerUnion<DIExpression *, DIVariable *> Allocated = nullptr,
+ PointerUnion<DIExpression *, DIVariable *> Rank = nullptr);
/// Create debugging information entry for a vector type.
/// \param Size Array size.
@@ -576,6 +598,12 @@ namespace llvm {
DISubrange *getOrCreateSubrange(Metadata *Count, Metadata *LowerBound,
Metadata *UpperBound, Metadata *Stride);
+ DIGenericSubrange *
+ getOrCreateGenericSubrange(DIGenericSubrange::BoundType Count,
+ DIGenericSubrange::BoundType LowerBound,
+ DIGenericSubrange::BoundType UpperBound,
+ DIGenericSubrange::BoundType Stride);
+
/// Create a new descriptor for the specified variable.
/// \param Context Variable scope.
/// \param Name Name of the variable.
@@ -744,14 +772,18 @@ namespace llvm {
/// definitions as they would appear on a command line.
/// \param IncludePath The path to the module map file.
/// \param APINotesFile The path to an API notes file for this module.
- /// \param File Source file of the module declaration. Used for
- /// Fortran modules.
- /// \param LineNo Source line number of the module declaration.
+ /// \param File Source file of the module.
+ /// Used for Fortran modules.
+ /// \param LineNo Source line number of the module.
/// Used for Fortran modules.
+ /// \param IsDecl This is a module declaration; default to false;
+ /// when set to true, only Scope and Name are required
+ /// as this entry is just a hint for the debugger to find
+ /// the corresponding definition in the global scope.
DIModule *createModule(DIScope *Scope, StringRef Name,
StringRef ConfigurationMacros, StringRef IncludePath,
StringRef APINotesFile = {}, DIFile *File = nullptr,
- unsigned LineNo = 0);
+ unsigned LineNo = 0, bool IsDecl = false);
/// This creates a descriptor for a lexical block with a new file
/// attached. This merely extends the existing
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/DataLayout.h b/contrib/llvm-project/llvm/include/llvm/IR/DataLayout.h
index 17297bb8b309..eb031613a935 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/DataLayout.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/DataLayout.h
@@ -123,6 +123,7 @@ private:
unsigned AllocaAddrSpace;
MaybeAlign StackNaturalAlign;
unsigned ProgramAddrSpace;
+ unsigned DefaultGlobalsAddrSpace;
MaybeAlign FunctionPtrAlign;
FunctionPtrAlignType TheFunctionPtrAlignType;
@@ -160,12 +161,7 @@ private:
using PointersTy = SmallVector<PointerAlignElem, 8>;
PointersTy Pointers;
- PointersTy::const_iterator
- findPointerLowerBound(uint32_t AddressSpace) const {
- return const_cast<DataLayout *>(this)->findPointerLowerBound(AddressSpace);
- }
-
- PointersTy::iterator findPointerLowerBound(uint32_t AddressSpace);
+ const PointerAlignElem &getPointerAlignElem(uint32_t AddressSpace) const;
// The StructType -> StructLayout map.
mutable void *LayoutMap = nullptr;
@@ -174,19 +170,25 @@ private:
/// well-defined bitwise representation.
SmallVector<unsigned, 8> NonIntegralAddressSpaces;
- void setAlignment(AlignTypeEnum align_type, Align abi_align, Align pref_align,
- uint32_t bit_width);
- Align getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
- bool ABIAlign, Type *Ty) const;
- void setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, Align PrefAlign,
- uint32_t TypeByteWidth, uint32_t IndexWidth);
+ /// Attempts to set the alignment of the given type. Returns an error
+ /// description on failure.
+ Error setAlignment(AlignTypeEnum align_type, Align abi_align,
+ Align pref_align, uint32_t bit_width);
+
+ /// Attempts to set the alignment of a pointer in the given address space.
+ /// Returns an error description on failure.
+ Error setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, Align PrefAlign,
+ uint32_t TypeByteWidth, uint32_t IndexWidth);
+
+ /// Internal helper to get alignment for integer of given bitwidth.
+ Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const;
/// Internal helper method that returns requested alignment for type.
Align getAlignment(Type *Ty, bool abi_or_pref) const;
- /// Parses a target data specification string. Assert if the string is
- /// malformed.
- void parseSpecifier(StringRef LayoutDescription);
+ /// Attempts to parse a target data specification string and reports an error
+ /// if the string is malformed.
+ Error parseSpecifier(StringRef Desc);
// Free all internal data structures.
void clear();
@@ -213,6 +215,7 @@ public:
FunctionPtrAlign = DL.FunctionPtrAlign;
TheFunctionPtrAlignType = DL.TheFunctionPtrAlignType;
ProgramAddrSpace = DL.ProgramAddrSpace;
+ DefaultGlobalsAddrSpace = DL.DefaultGlobalsAddrSpace;
ManglingMode = DL.ManglingMode;
LegalIntWidths = DL.LegalIntWidths;
Alignments = DL.Alignments;
@@ -229,6 +232,10 @@ public:
/// Parse a data layout string (with fallback to default values).
void reset(StringRef LayoutDescription);
+ /// Parse a data layout string and return the layout. Return an error
+ /// description on failure.
+ static Expected<DataLayout> parse(StringRef LayoutDescription);
+
/// Layout endianness...
bool isLittleEndian() const { return !BigEndian; }
bool isBigEndian() const { return BigEndian; }
@@ -285,6 +292,9 @@ public:
}
unsigned getProgramAddressSpace() const { return ProgramAddrSpace; }
+ unsigned getDefaultGlobalsAddressSpace() const {
+ return DefaultGlobalsAddrSpace;
+ }
bool hasMicrosoftFastStdCallMangling() const {
return ManglingMode == MM_WinCOFFX86;
@@ -378,7 +388,7 @@ public:
bool isNonIntegralAddressSpace(unsigned AddrSpace) const {
ArrayRef<unsigned> NonIntegralSpaces = getNonIntegralAddressSpaces();
- return find(NonIntegralSpaces, AddrSpace) != NonIntegralSpaces.end();
+ return is_contained(NonIntegralSpaces, AddrSpace);
}
bool isNonIntegralPointerType(PointerType *PT) const {
@@ -520,7 +530,9 @@ public:
/// Returns the minimum ABI-required alignment for an integer type of
/// the specified bitwidth.
- Align getABIIntegerTypeAlignment(unsigned BitWidth) const;
+ Align getABIIntegerTypeAlignment(unsigned BitWidth) const {
+ return getIntegerAlignment(BitWidth, /* abi_or_pref */ true);
+ }
/// Returns the preferred stack/global alignment for the specified
/// type.
@@ -678,6 +690,8 @@ inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const {
case Type::PPC_FP128TyID:
case Type::FP128TyID:
return TypeSize::Fixed(128);
+ case Type::X86_AMXTyID:
+ return TypeSize::Fixed(8192);
// In memory objects this is always aligned to a higher boundary, but
// only 80 bits contain information.
case Type::X86_FP80TyID:
@@ -686,9 +700,9 @@ inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const {
case Type::ScalableVectorTyID: {
VectorType *VTy = cast<VectorType>(Ty);
auto EltCnt = VTy->getElementCount();
- uint64_t MinBits = EltCnt.Min *
- getTypeSizeInBits(VTy->getElementType()).getFixedSize();
- return TypeSize(MinBits, EltCnt.Scalable);
+ uint64_t MinBits = EltCnt.getKnownMinValue() *
+ getTypeSizeInBits(VTy->getElementType()).getFixedSize();
+ return TypeSize(MinBits, EltCnt.isScalable());
}
default:
llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h b/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h
index 7d7cc4de7937..22dd5ee6efac 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -182,6 +182,7 @@ public:
case DISubrangeKind:
case DIEnumeratorKind:
case DIBasicTypeKind:
+ case DIStringTypeKind:
case DIDerivedTypeKind:
case DICompositeTypeKind:
case DISubroutineTypeKind:
@@ -200,6 +201,7 @@ public:
case DIObjCPropertyKind:
case DIImportedEntityKind:
case DIModuleKind:
+ case DIGenericSubrangeKind:
return true;
}
}
@@ -238,9 +240,8 @@ class GenericDINode : public DINode {
StorageType Storage, bool ShouldCreate = true);
TempGenericDINode cloneImpl() const {
- return getTemporary(
- getContext(), getTag(), getHeader(),
- SmallVector<Metadata *, 4>(dwarf_op_begin(), dwarf_op_end()));
+ return getTemporary(getContext(), getTag(), getHeader(),
+ SmallVector<Metadata *, 4>(dwarf_operands()));
}
public:
@@ -350,6 +351,52 @@ public:
}
};
+class DIGenericSubrange : public DINode {
+ friend class LLVMContextImpl;
+ friend class MDNode;
+
+ DIGenericSubrange(LLVMContext &C, StorageType Storage,
+ ArrayRef<Metadata *> Ops)
+ : DINode(C, DIGenericSubrangeKind, Storage,
+ dwarf::DW_TAG_generic_subrange, Ops) {}
+
+ ~DIGenericSubrange() = default;
+
+ static DIGenericSubrange *getImpl(LLVMContext &Context, Metadata *CountNode,
+ Metadata *LowerBound, Metadata *UpperBound,
+ Metadata *Stride, StorageType Storage,
+ bool ShouldCreate = true);
+
+ TempDIGenericSubrange cloneImpl() const {
+ return getTemporary(getContext(), getRawCountNode(), getRawLowerBound(),
+ getRawUpperBound(), getRawStride());
+ }
+
+public:
+ DEFINE_MDNODE_GET(DIGenericSubrange,
+ (Metadata * CountNode, Metadata *LowerBound,
+ Metadata *UpperBound, Metadata *Stride),
+ (CountNode, LowerBound, UpperBound, Stride))
+
+ TempDIGenericSubrange clone() const { return cloneImpl(); }
+
+ Metadata *getRawCountNode() const { return getOperand(0).get(); }
+ Metadata *getRawLowerBound() const { return getOperand(1).get(); }
+ Metadata *getRawUpperBound() const { return getOperand(2).get(); }
+ Metadata *getRawStride() const { return getOperand(3).get(); }
+
+ using BoundType = PointerUnion<DIVariable *, DIExpression *>;
+
+ BoundType getCount() const;
+ BoundType getLowerBound() const;
+ BoundType getUpperBound() const;
+ BoundType getStride() const;
+
+ static bool classof(const Metadata *MD) {
+ return MD->getMetadataID() == DIGenericSubrangeKind;
+ }
+};
+
/// Enumeration value.
///
/// TODO: Add a pointer to the context (DW_TAG_enumeration_type) once that no
@@ -451,6 +498,7 @@ public:
default:
return false;
case DIBasicTypeKind:
+ case DIStringTypeKind:
case DIDerivedTypeKind:
case DICompositeTypeKind:
case DISubroutineTypeKind:
@@ -697,6 +745,7 @@ public:
default:
return false;
case DIBasicTypeKind:
+ case DIStringTypeKind:
case DIDerivedTypeKind:
case DICompositeTypeKind:
case DISubroutineTypeKind:
@@ -747,6 +796,12 @@ public:
DEFINE_MDNODE_GET(DIBasicType, (unsigned Tag, StringRef Name),
(Tag, Name, 0, 0, 0, FlagZero))
DEFINE_MDNODE_GET(DIBasicType,
+ (unsigned Tag, StringRef Name, uint64_t SizeInBits),
+ (Tag, Name, SizeInBits, 0, 0, FlagZero))
+ DEFINE_MDNODE_GET(DIBasicType,
+ (unsigned Tag, MDString *Name, uint64_t SizeInBits),
+ (Tag, Name, SizeInBits, 0, 0, FlagZero))
+ DEFINE_MDNODE_GET(DIBasicType,
(unsigned Tag, StringRef Name, uint64_t SizeInBits,
uint32_t AlignInBits, unsigned Encoding, DIFlags Flags),
(Tag, Name, SizeInBits, AlignInBits, Encoding, Flags))
@@ -770,6 +825,81 @@ public:
}
};
+/// String type, Fortran CHARACTER(n)
+class DIStringType : public DIType {
+ friend class LLVMContextImpl;
+ friend class MDNode;
+
+ unsigned Encoding;
+
+ DIStringType(LLVMContext &C, StorageType Storage, unsigned Tag,
+ uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding,
+ ArrayRef<Metadata *> Ops)
+ : DIType(C, DIStringTypeKind, Storage, Tag, 0, SizeInBits, AlignInBits, 0,
+ FlagZero, Ops),
+ Encoding(Encoding) {}
+ ~DIStringType() = default;
+
+ static DIStringType *getImpl(LLVMContext &Context, unsigned Tag,
+ StringRef Name, Metadata *StringLength,
+ Metadata *StrLenExp, uint64_t SizeInBits,
+ uint32_t AlignInBits, unsigned Encoding,
+ StorageType Storage, bool ShouldCreate = true) {
+ return getImpl(Context, Tag, getCanonicalMDString(Context, Name),
+ StringLength, StrLenExp, SizeInBits, AlignInBits, Encoding,
+ Storage, ShouldCreate);
+ }
+ static DIStringType *getImpl(LLVMContext &Context, unsigned Tag,
+ MDString *Name, Metadata *StringLength,
+ Metadata *StrLenExp, uint64_t SizeInBits,
+ uint32_t AlignInBits, unsigned Encoding,
+ StorageType Storage, bool ShouldCreate = true);
+
+ TempDIStringType cloneImpl() const {
+ return getTemporary(getContext(), getTag(), getRawName(),
+ getRawStringLength(), getRawStringLengthExp(),
+ getSizeInBits(), getAlignInBits(), getEncoding());
+ }
+
+public:
+ DEFINE_MDNODE_GET(DIStringType,
+ (unsigned Tag, StringRef Name, uint64_t SizeInBits,
+ uint32_t AlignInBits),
+ (Tag, Name, nullptr, nullptr, SizeInBits, AlignInBits, 0))
+ DEFINE_MDNODE_GET(DIStringType,
+ (unsigned Tag, MDString *Name, Metadata *StringLength,
+ Metadata *StringLengthExp, uint64_t SizeInBits,
+ uint32_t AlignInBits, unsigned Encoding),
+ (Tag, Name, StringLength, StringLengthExp, SizeInBits,
+ AlignInBits, Encoding))
+ DEFINE_MDNODE_GET(DIStringType,
+ (unsigned Tag, StringRef Name, Metadata *StringLength,
+ Metadata *StringLengthExp, uint64_t SizeInBits,
+ uint32_t AlignInBits, unsigned Encoding),
+ (Tag, Name, StringLength, StringLengthExp, SizeInBits,
+ AlignInBits, Encoding))
+
+ TempDIStringType clone() const { return cloneImpl(); }
+
+ static bool classof(const Metadata *MD) {
+ return MD->getMetadataID() == DIStringTypeKind;
+ }
+
+ DIVariable *getStringLength() const {
+ return cast_or_null<DIVariable>(getRawStringLength());
+ }
+
+ DIExpression *getStringLengthExp() const {
+ return cast_or_null<DIExpression>(getRawStringLengthExp());
+ }
+
+ unsigned getEncoding() const { return Encoding; }
+
+ Metadata *getRawStringLength() const { return getOperand(3); }
+
+ Metadata *getRawStringLengthExp() const { return getOperand(4); }
+};
+
/// Derived types.
///
/// This includes qualified types, pointers, references, friends, typedefs, and
@@ -942,13 +1072,14 @@ class DICompositeType : public DIType {
DINodeArray Elements, unsigned RuntimeLang, DIType *VTableHolder,
DITemplateParameterArray TemplateParams, StringRef Identifier,
DIDerivedType *Discriminator, Metadata *DataLocation,
+ Metadata *Associated, Metadata *Allocated, Metadata *Rank,
StorageType Storage, bool ShouldCreate = true) {
- return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File,
- Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits,
- Flags, Elements.get(), RuntimeLang, VTableHolder,
- TemplateParams.get(),
- getCanonicalMDString(Context, Identifier), Discriminator,
- DataLocation, Storage, ShouldCreate);
+ return getImpl(
+ Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope,
+ BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(),
+ RuntimeLang, VTableHolder, TemplateParams.get(),
+ getCanonicalMDString(Context, Identifier), Discriminator, DataLocation,
+ Associated, Allocated, Rank, Storage, ShouldCreate);
}
static DICompositeType *
getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
@@ -957,6 +1088,7 @@ class DICompositeType : public DIType {
DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
Metadata *VTableHolder, Metadata *TemplateParams,
MDString *Identifier, Metadata *Discriminator, Metadata *DataLocation,
+ Metadata *Associated, Metadata *Allocated, Metadata *Rank,
StorageType Storage, bool ShouldCreate = true);
TempDICompositeType cloneImpl() const {
@@ -965,7 +1097,8 @@ class DICompositeType : public DIType {
getAlignInBits(), getOffsetInBits(), getFlags(),
getElements(), getRuntimeLang(), getVTableHolder(),
getTemplateParams(), getIdentifier(),
- getDiscriminator(), getRawDataLocation());
+ getDiscriminator(), getRawDataLocation(),
+ getRawAssociated(), getRawAllocated(), getRawRank());
}
public:
@@ -977,10 +1110,11 @@ public:
DINodeArray Elements, unsigned RuntimeLang, DIType *VTableHolder,
DITemplateParameterArray TemplateParams = nullptr,
StringRef Identifier = "", DIDerivedType *Discriminator = nullptr,
- Metadata *DataLocation = nullptr),
+ Metadata *DataLocation = nullptr, Metadata *Associated = nullptr,
+ Metadata *Allocated = nullptr, Metadata *Rank = nullptr),
(Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier, Discriminator, DataLocation))
+ Identifier, Discriminator, DataLocation, Associated, Allocated, Rank))
DEFINE_MDNODE_GET(
DICompositeType,
(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
@@ -988,10 +1122,12 @@ public:
uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams = nullptr, MDString *Identifier = nullptr,
- Metadata *Discriminator = nullptr, Metadata *DataLocation = nullptr),
+ Metadata *Discriminator = nullptr, Metadata *DataLocation = nullptr,
+ Metadata *Associated = nullptr, Metadata *Allocated = nullptr,
+ Metadata *Rank = nullptr),
(Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier, Discriminator, DataLocation))
+ Identifier, Discriminator, DataLocation, Associated, Allocated, Rank))
TempDICompositeType clone() const { return cloneImpl(); }
@@ -1009,7 +1145,8 @@ public:
uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements,
unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, Metadata *Discriminator,
- Metadata *DataLocation);
+ Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
+ Metadata *Rank);
static DICompositeType *getODRTypeIfExists(LLVMContext &Context,
MDString &Identifier);
@@ -1029,7 +1166,8 @@ public:
uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements,
unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, Metadata *Discriminator,
- Metadata *DataLocation);
+ Metadata *DataLocation, Metadata *Associated,
+ Metadata *Allocated, Metadata *Rank);
DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); }
DINodeArray getElements() const {
@@ -1058,6 +1196,29 @@ public:
DIExpression *getDataLocationExp() const {
return dyn_cast_or_null<DIExpression>(getRawDataLocation());
}
+ Metadata *getRawAssociated() const { return getOperand(10); }
+ DIVariable *getAssociated() const {
+ return dyn_cast_or_null<DIVariable>(getRawAssociated());
+ }
+ DIExpression *getAssociatedExp() const {
+ return dyn_cast_or_null<DIExpression>(getRawAssociated());
+ }
+ Metadata *getRawAllocated() const { return getOperand(11); }
+ DIVariable *getAllocated() const {
+ return dyn_cast_or_null<DIVariable>(getRawAllocated());
+ }
+ DIExpression *getAllocatedExp() const {
+ return dyn_cast_or_null<DIExpression>(getRawAllocated());
+ }
+ Metadata *getRawRank() const { return getOperand(12); }
+ ConstantInt *getRankConst() const {
+ if (auto *MD = dyn_cast_or_null<ConstantAsMetadata>(getRawRank()))
+ return dyn_cast_or_null<ConstantInt>(MD->getValue());
+ return nullptr;
+ }
+ DIExpression *getRankExp() const {
+ return dyn_cast_or_null<DIExpression>(getRawRank());
+ }
/// Replace operands.
///
@@ -1536,6 +1697,18 @@ public:
inline unsigned getDiscriminator() const;
+ // For the regular discriminator, it stands for all empty components if all
+ // the lowest 3 bits are non-zero and all higher 29 bits are unused(zero by
+ // default). Here we fully leverage the higher 29 bits for pseudo probe use.
+ // This is the format:
+ // [2:0] - 0x7
+ // [31:3] - pseudo probe fields guaranteed to be non-zero as a whole
+ // So if the lower 3 bits is non-zero and the others has at least one
+ // non-zero bit, it guarantees to be a pseudo probe discriminator
+ inline static bool isPseudoProbeDiscriminator(unsigned Discriminator) {
+ return ((Discriminator & 0x7) == 0x7) && (Discriminator & 0xFFFFFFF8);
+ }
+
/// Returns a new DILocation with updated \p Discriminator.
inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
@@ -1879,6 +2052,10 @@ public:
return getNumOperands() > 10 ? getOperandAs<Metadata>(10) : nullptr;
}
+ void replaceRawLinkageName(MDString *LinkageName) {
+ replaceOperandWith(3, LinkageName);
+ }
+
/// Check if this subprogram describes the given function.
///
/// FIXME: Should this be looking through bitcasts?
@@ -2123,49 +2300,52 @@ class DIModule : public DIScope {
friend class LLVMContextImpl;
friend class MDNode;
unsigned LineNo;
+ bool IsDecl;
DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo,
- ArrayRef<Metadata *> Ops)
+ bool IsDecl, ArrayRef<Metadata *> Ops)
: DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops),
- LineNo(LineNo) {}
+ LineNo(LineNo), IsDecl(IsDecl) {}
~DIModule() = default;
static DIModule *getImpl(LLVMContext &Context, DIFile *File, DIScope *Scope,
StringRef Name, StringRef ConfigurationMacros,
StringRef IncludePath, StringRef APINotesFile,
- unsigned LineNo, StorageType Storage,
+ unsigned LineNo, bool IsDecl, StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, File, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, ConfigurationMacros),
getCanonicalMDString(Context, IncludePath),
- getCanonicalMDString(Context, APINotesFile), LineNo, Storage,
- ShouldCreate);
+ getCanonicalMDString(Context, APINotesFile), LineNo, IsDecl,
+ Storage, ShouldCreate);
}
static DIModule *getImpl(LLVMContext &Context, Metadata *File,
Metadata *Scope, MDString *Name,
MDString *ConfigurationMacros, MDString *IncludePath,
- MDString *APINotesFile, unsigned LineNo,
+ MDString *APINotesFile, unsigned LineNo, bool IsDecl,
StorageType Storage, bool ShouldCreate = true);
TempDIModule cloneImpl() const {
return getTemporary(getContext(), getFile(), getScope(), getName(),
getConfigurationMacros(), getIncludePath(),
- getAPINotesFile(), getLineNo());
+ getAPINotesFile(), getLineNo(), getIsDecl());
}
public:
DEFINE_MDNODE_GET(DIModule,
(DIFile * File, DIScope *Scope, StringRef Name,
StringRef ConfigurationMacros, StringRef IncludePath,
- StringRef APINotesFile, unsigned LineNo),
+ StringRef APINotesFile, unsigned LineNo,
+ bool IsDecl = false),
(File, Scope, Name, ConfigurationMacros, IncludePath,
- APINotesFile, LineNo))
+ APINotesFile, LineNo, IsDecl))
DEFINE_MDNODE_GET(DIModule,
(Metadata * File, Metadata *Scope, MDString *Name,
MDString *ConfigurationMacros, MDString *IncludePath,
- MDString *APINotesFile, unsigned LineNo),
+ MDString *APINotesFile, unsigned LineNo,
+ bool IsDecl = false),
(File, Scope, Name, ConfigurationMacros, IncludePath,
- APINotesFile, LineNo))
+ APINotesFile, LineNo, IsDecl))
TempDIModule clone() const { return cloneImpl(); }
@@ -2175,6 +2355,7 @@ public:
StringRef getIncludePath() const { return getStringOperand(4); }
StringRef getAPINotesFile() const { return getStringOperand(5); }
unsigned getLineNo() const { return LineNo; }
+ bool getIsDecl() const { return IsDecl; }
Metadata *getRawScope() const { return getOperand(1); }
MDString *getRawName() const { return getOperandAs<MDString>(2); }
@@ -2409,6 +2590,9 @@ public:
/// Determine whether this represents a standalone constant value.
bool isConstant() const;
+ /// Determine whether this represents a standalone signed constant value.
+ bool isSignedConstant() const;
+
using element_iterator = ArrayRef<uint64_t>::iterator;
element_iterator elements_begin() const { return getElements().begin(); }
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/DebugLoc.h b/contrib/llvm-project/llvm/include/llvm/IR/DebugLoc.h
index 4914d733fe0d..4824f2e9f2fd 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/DebugLoc.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/DebugLoc.h
@@ -68,27 +68,13 @@ namespace llvm {
/// Check whether this has a trivial destructor.
bool hasTrivialDestructor() const { return Loc.hasTrivialDestructor(); }
- /// Create a new DebugLoc.
- ///
- /// Create a new DebugLoc at the specified line/col and scope/inline. This
- /// forwards to \a DILocation::get().
- ///
- /// If \c !Scope, returns a default-constructed \a DebugLoc.
- ///
- /// FIXME: Remove this. Users should use DILocation::get().
- static DebugLoc get(unsigned Line, unsigned Col, const MDNode *Scope,
- const MDNode *InlinedAt = nullptr,
- bool ImplicitCode = false);
-
enum { ReplaceLastInlinedAt = true };
/// Rebuild the entire inlined-at chain for this instruction so that the top of
/// the chain now is inlined-at the new call site.
/// \param InlinedAt The new outermost inlined-at in the chain.
- /// \param ReplaceLast Replace the last location in the inlined-at chain.
static DebugLoc appendInlinedAt(const DebugLoc &DL, DILocation *InlinedAt,
LLVMContext &Ctx,
- DenseMap<const MDNode *, MDNode *> &Cache,
- bool ReplaceLast = false);
+ DenseMap<const MDNode *, MDNode *> &Cache);
unsigned getLine() const;
unsigned getCol() const;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/DerivedTypes.h b/contrib/llvm-project/llvm/include/llvm/IR/DerivedTypes.h
index 3618447168be..c3d97f4520e1 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/DerivedTypes.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/DerivedTypes.h
@@ -87,12 +87,6 @@ public:
/// Get a bit mask for this type.
APInt getMask() const;
- /// This method determines if the width of this IntegerType is a power-of-2
- /// in terms of 8 bit bytes.
- /// @returns true if this is a power-of-2 byte width.
- /// Is this a power-of-2 byte-width IntegerType ?
- bool isPowerOf2ByteWidth() const;
-
/// Methods for support type inquiry through isa, cast, and dyn_cast.
static bool classof(const Type *T) {
return T->getTypeID() == IntegerTyID;
@@ -273,6 +267,10 @@ public:
return llvm::StructType::get(Ctx, StructFields);
}
+ /// Return the type with the specified name, or null if there is none by that
+ /// name.
+ static StructType *getTypeByName(LLVMContext &C, StringRef Name);
+
bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; }
/// Return true if this type is uniqued by structural equivalence, false if it
@@ -286,6 +284,9 @@ public:
/// isSized - Return true if this is a sized type.
bool isSized(SmallPtrSetImpl<Type *> *Visited = nullptr) const;
+ /// Returns true if this struct contains a scalable vector.
+ bool containsScalableVectorType() const;
+
/// Return true if this is a named struct that has a non-empty name.
bool hasName() const { return SymbolTableEntry != nullptr; }
@@ -423,41 +424,21 @@ public:
/// Get the number of elements in this vector. It does not make sense to call
/// this function on a scalable vector, and this will be moved into
/// FixedVectorType in a future commit
- unsigned getNumElements() const {
- ElementCount EC = getElementCount();
-#ifdef STRICT_FIXED_SIZE_VECTORS
- assert(!EC.Scalable &&
- "Request for fixed number of elements from scalable vector");
- return EC.Min;
-#else
- if (EC.Scalable)
- WithColor::warning()
- << "The code that requested the fixed number of elements has made "
- "the assumption that this vector is not scalable. This assumption "
- "was not correct, and this may lead to broken code\n";
- return EC.Min;
-#endif
- }
+ LLVM_ATTRIBUTE_DEPRECATED(
+ inline unsigned getNumElements() const,
+ "Calling this function via a base VectorType is deprecated. Either call "
+ "getElementCount() and handle the case where Scalable is true or cast to "
+ "FixedVectorType.");
Type *getElementType() const { return ContainedType; }
/// This static method is the primary way to construct an VectorType.
static VectorType *get(Type *ElementType, ElementCount EC);
- /// Base class getter that specifically constructs a FixedVectorType. This
- /// function is deprecated, and will be removed after LLVM 11 ships. Since
- /// this always returns a FixedVectorType via a base VectorType pointer,
- /// FixedVectorType::get(Type *, unsigned) is strictly better since no cast is
- /// required to call getNumElements() on the result.
- LLVM_ATTRIBUTE_DEPRECATED(
- inline static VectorType *get(Type *ElementType, unsigned NumElements),
- "The base class version of get with the scalable argument defaulted to "
- "false is deprecated. Either call VectorType::get(Type *, unsigned, "
- "bool) and pass false, or call FixedVectorType::get(Type *, unsigned).");
-
static VectorType *get(Type *ElementType, unsigned NumElements,
bool Scalable) {
- return VectorType::get(ElementType, {NumElements, Scalable});
+ return VectorType::get(ElementType,
+ ElementCount::get(NumElements, Scalable));
}
static VectorType *get(Type *ElementType, const VectorType *Other) {
@@ -522,16 +503,18 @@ public:
/// input type and the same element type.
static VectorType *getHalfElementsVectorType(VectorType *VTy) {
auto EltCnt = VTy->getElementCount();
- assert ((EltCnt.Min & 1) == 0 &&
- "Cannot halve vector with odd number of elements.");
- return VectorType::get(VTy->getElementType(), EltCnt/2);
+ assert(EltCnt.isKnownEven() &&
+ "Cannot halve vector with odd number of elements.");
+ return VectorType::get(VTy->getElementType(),
+ EltCnt.divideCoefficientBy(2));
}
/// This static method returns a VectorType with twice as many elements as the
/// input type and the same element type.
static VectorType *getDoubleElementsVectorType(VectorType *VTy) {
auto EltCnt = VTy->getElementCount();
- assert((EltCnt.Min * 2ull) <= UINT_MAX && "Too many elements in vector");
+ assert((EltCnt.getKnownMinValue() * 2ull) <= UINT_MAX &&
+ "Too many elements in vector");
return VectorType::get(VTy->getElementType(), EltCnt * 2);
}
@@ -549,8 +532,19 @@ public:
}
};
-inline VectorType *VectorType::get(Type *ElementType, unsigned NumElements) {
- return VectorType::get(ElementType, NumElements, false);
+unsigned VectorType::getNumElements() const {
+ ElementCount EC = getElementCount();
+#ifdef STRICT_FIXED_SIZE_VECTORS
+ assert(!EC.isScalable() &&
+ "Request for fixed number of elements from scalable vector");
+#else
+ if (EC.isScalable())
+ WithColor::warning()
+ << "The code that requested the fixed number of elements has made the "
+ "assumption that this vector is not scalable. This assumption was "
+ "not correct, and this may lead to broken code\n";
+#endif
+ return EC.getKnownMinValue();
}
/// Class to represent fixed width SIMD vectors
@@ -596,6 +590,8 @@ public:
static bool classof(const Type *T) {
return T->getTypeID() == FixedVectorTyID;
}
+
+ unsigned getNumElements() const { return ElementQuantity; }
};
/// Class to represent scalable SIMD vectors
@@ -655,7 +651,7 @@ public:
};
inline ElementCount VectorType::getElementCount() const {
- return ElementCount(ElementQuantity, isa<ScalableVectorType>(this));
+ return ElementCount::get(ElementQuantity, isa<ScalableVectorType>(this));
}
/// Class to represent pointers.
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h b/contrib/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h
index b7e0ecde8629..c457072d50f1 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Support/YAMLTraits.h"
#include <algorithm>
#include <cstdint>
@@ -34,6 +35,7 @@ namespace llvm {
class DiagnosticPrinter;
class Function;
class Instruction;
+class InstructionCost;
class LLVMContext;
class Module;
class SMDiagnostic;
@@ -75,7 +77,6 @@ enum DiagnosticKind {
DK_LastMachineRemark = DK_MachineOptimizationRemarkAnalysis,
DK_MIRParser,
DK_PGOProfile,
- DK_MisExpect,
DK_Unsupported,
DK_FirstPluginKind // Must be last value to work with
// getNextAvailablePluginDiagnosticKind
@@ -434,8 +435,10 @@ public:
Argument(StringRef Key, unsigned N);
Argument(StringRef Key, unsigned long N);
Argument(StringRef Key, unsigned long long N);
+ Argument(StringRef Key, ElementCount EC);
Argument(StringRef Key, bool B) : Key(Key), Val(B ? "true" : "false") {}
Argument(StringRef Key, DebugLoc dl);
+ Argument(StringRef Key, InstructionCost C);
};
/// \p PassName is the name of the pass emitting this diagnostic. \p
@@ -1012,25 +1015,6 @@ public:
void print(DiagnosticPrinter &DP) const override;
};
-/// Diagnostic information for MisExpect analysis.
-class DiagnosticInfoMisExpect : public DiagnosticInfoWithLocationBase {
-public:
- DiagnosticInfoMisExpect(const Instruction *Inst, Twine &Msg);
-
- /// \see DiagnosticInfo::print.
- void print(DiagnosticPrinter &DP) const override;
-
- static bool classof(const DiagnosticInfo *DI) {
- return DI->getKind() == DK_MisExpect;
- }
-
- const Twine &getMsg() const { return Msg; }
-
-private:
- /// Message to report.
- const Twine &Msg;
-};
-
} // end namespace llvm
#endif // LLVM_IR_DIAGNOSTICINFO_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Dominators.h b/contrib/llvm-project/llvm/include/llvm/IR/Dominators.h
index 71595cb15df4..08dbccaf2c01 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Dominators.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Dominators.h
@@ -44,6 +44,9 @@ using BBPostDomTree = PostDomTreeBase<BasicBlock>;
using BBUpdates = ArrayRef<llvm::cfg::Update<BasicBlock *>>;
+using BBDomTreeGraphDiff = GraphDiff<BasicBlock *, false>;
+using BBPostDomTreeGraphDiff = GraphDiff<BasicBlock *, true>;
+
extern template void Calculate<BBDomTree>(BBDomTree &DT);
extern template void CalculateWithUpdates<BBDomTree>(BBDomTree &DT,
BBUpdates U);
@@ -62,8 +65,12 @@ extern template void DeleteEdge<BBPostDomTree>(BBPostDomTree &DT,
BasicBlock *From,
BasicBlock *To);
-extern template void ApplyUpdates<BBDomTree>(BBDomTree &DT, BBUpdates);
-extern template void ApplyUpdates<BBPostDomTree>(BBPostDomTree &DT, BBUpdates);
+extern template void ApplyUpdates<BBDomTree>(BBDomTree &DT,
+ BBDomTreeGraphDiff &,
+ BBDomTreeGraphDiff *);
+extern template void ApplyUpdates<BBPostDomTree>(BBPostDomTree &DT,
+ BBPostDomTreeGraphDiff &,
+ BBPostDomTreeGraphDiff *);
extern template bool Verify<BBDomTree>(const BBDomTree &DT,
BBDomTree::VerificationLevel VL);
@@ -158,12 +165,21 @@ class DominatorTree : public DominatorTreeBase<BasicBlock, false> {
// Ensure base-class overloads are visible.
using Base::dominates;
- /// Return true if Def dominates a use in User.
+ /// Return true if value Def dominates use U, in the sense that Def is
+ /// available at U, and could be substituted as the used value without
+ /// violating the SSA dominance requirement.
///
- /// This performs the special checks necessary if Def and User are in the same
- /// basic block. Note that Def doesn't dominate a use in Def itself!
- bool dominates(const Instruction *Def, const Use &U) const;
- bool dominates(const Instruction *Def, const Instruction *User) const;
+ /// In particular, it is worth noting that:
+ /// * Non-instruction Defs dominate everything.
+ /// * Def does not dominate a use in Def itself (outside of degenerate cases
+ /// like unreachable code or trivial phi cycles).
+ /// * Invoke/callbr Defs only dominate uses in their default destination.
+ bool dominates(const Value *Def, const Use &U) const;
+ /// Return true if value Def dominates all possible uses inside instruction
+ /// User. Same comments as for the Use-based API apply.
+ bool dominates(const Value *Def, const Instruction *User) const;
+ // Does not accept Value to avoid ambiguity with dominance checks between
+ // two basic blocks.
bool dominates(const Instruction *Def, const BasicBlock *BB) const;
/// Return true if an edge dominates a use.
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/FixedMetadataKinds.def b/contrib/llvm-project/llvm/include/llvm/IR/FixedMetadataKinds.def
index 0e1ffef58672..31979cd2f9db 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/contrib/llvm-project/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -39,5 +39,6 @@ LLVM_FIXED_MD_KIND(MD_irr_loop, "irr_loop", 24)
LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25)
LLVM_FIXED_MD_KIND(MD_callback, "callback", 26)
LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27)
-LLVM_FIXED_MD_KIND(MD_misexpect, "misexpect", 28)
-LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 29)
+LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28)
+LLVM_FIXED_MD_KIND(MD_noundef, "noundef", 29)
+LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30)
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/FixedPointBuilder.h b/contrib/llvm-project/llvm/include/llvm/IR/FixedPointBuilder.h
new file mode 100644
index 000000000000..a99c761ad3e9
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/FixedPointBuilder.h
@@ -0,0 +1,465 @@
+//===- llvm/FixedPointBuilder.h - Builder for fixed-point ops ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the FixedPointBuilder class, which is used as a convenient
+// way to lower fixed-point arithmetic operations to LLVM IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_FIXEDPOINTBUILDER_H
+#define LLVM_IR_FIXEDPOINTBUILDER_H
+
+#include "llvm/ADT/APFixedPoint.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+
+namespace llvm {
+
+template <class IRBuilderTy> class FixedPointBuilder {
+ IRBuilderTy &B;
+
+ Value *Convert(Value *Src, const FixedPointSemantics &SrcSema,
+ const FixedPointSemantics &DstSema, bool DstIsInteger) {
+ unsigned SrcWidth = SrcSema.getWidth();
+ unsigned DstWidth = DstSema.getWidth();
+ unsigned SrcScale = SrcSema.getScale();
+ unsigned DstScale = DstSema.getScale();
+ bool SrcIsSigned = SrcSema.isSigned();
+ bool DstIsSigned = DstSema.isSigned();
+
+ Type *DstIntTy = B.getIntNTy(DstWidth);
+
+ Value *Result = Src;
+ unsigned ResultWidth = SrcWidth;
+
+ // Downscale.
+ if (DstScale < SrcScale) {
+ // When converting to integers, we round towards zero. For negative
+ // numbers, right shifting rounds towards negative infinity. In this case,
+ // we can just round up before shifting.
+ if (DstIsInteger && SrcIsSigned) {
+ Value *Zero = Constant::getNullValue(Result->getType());
+ Value *IsNegative = B.CreateICmpSLT(Result, Zero);
+ Value *LowBits = ConstantInt::get(
+ B.getContext(), APInt::getLowBitsSet(ResultWidth, SrcScale));
+ Value *Rounded = B.CreateAdd(Result, LowBits);
+ Result = B.CreateSelect(IsNegative, Rounded, Result);
+ }
+
+ Result = SrcIsSigned
+ ? B.CreateAShr(Result, SrcScale - DstScale, "downscale")
+ : B.CreateLShr(Result, SrcScale - DstScale, "downscale");
+ }
+
+ if (!DstSema.isSaturated()) {
+ // Resize.
+ Result = B.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize");
+
+ // Upscale.
+ if (DstScale > SrcScale)
+ Result = B.CreateShl(Result, DstScale - SrcScale, "upscale");
+ } else {
+ // Adjust the number of fractional bits.
+ if (DstScale > SrcScale) {
+ // Compare to DstWidth to prevent resizing twice.
+ ResultWidth = std::max(SrcWidth + DstScale - SrcScale, DstWidth);
+ Type *UpscaledTy = B.getIntNTy(ResultWidth);
+ Result = B.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize");
+ Result = B.CreateShl(Result, DstScale - SrcScale, "upscale");
+ }
+
+ // Handle saturation.
+ bool LessIntBits = DstSema.getIntegralBits() < SrcSema.getIntegralBits();
+ if (LessIntBits) {
+ Value *Max = ConstantInt::get(
+ B.getContext(),
+ APFixedPoint::getMax(DstSema).getValue().extOrTrunc(ResultWidth));
+ Value *TooHigh = SrcIsSigned ? B.CreateICmpSGT(Result, Max)
+ : B.CreateICmpUGT(Result, Max);
+ Result = B.CreateSelect(TooHigh, Max, Result, "satmax");
+ }
+ // Cannot overflow min to dest type if src is unsigned since all fixed
+ // point types can cover the unsigned min of 0.
+ if (SrcIsSigned && (LessIntBits || !DstIsSigned)) {
+ Value *Min = ConstantInt::get(
+ B.getContext(),
+ APFixedPoint::getMin(DstSema).getValue().extOrTrunc(ResultWidth));
+ Value *TooLow = B.CreateICmpSLT(Result, Min);
+ Result = B.CreateSelect(TooLow, Min, Result, "satmin");
+ }
+
+ // Resize the integer part to get the final destination size.
+ if (ResultWidth != DstWidth)
+ Result = B.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize");
+ }
+ return Result;
+ }
+
+ /// Get the common semantic for two semantics, with the added imposition that
+ /// saturated padded types retain the padding bit.
+ FixedPointSemantics
+ getCommonBinopSemantic(const FixedPointSemantics &LHSSema,
+ const FixedPointSemantics &RHSSema) {
+ auto C = LHSSema.getCommonSemantics(RHSSema);
+ bool BothPadded =
+ LHSSema.hasUnsignedPadding() && RHSSema.hasUnsignedPadding();
+ return FixedPointSemantics(
+ C.getWidth() + (unsigned)(BothPadded && C.isSaturated()), C.getScale(),
+ C.isSigned(), C.isSaturated(), BothPadded);
+ }
+
+ /// Given a floating point type and a fixed-point semantic, return a floating
+ /// point type which can accommodate the fixed-point semantic. This is either
+ /// \p Ty, or a floating point type with a larger exponent than Ty.
+ Type *getAccommodatingFloatType(Type *Ty, const FixedPointSemantics &Sema) {
+ const fltSemantics *FloatSema = &Ty->getFltSemantics();
+ while (!Sema.fitsInFloatSemantics(*FloatSema))
+ FloatSema = APFixedPoint::promoteFloatSemantics(FloatSema);
+ return Type::getFloatingPointTy(Ty->getContext(), *FloatSema);
+ }
+
+public:
+ FixedPointBuilder(IRBuilderTy &Builder) : B(Builder) {}
+
+ /// Convert an integer value representing a fixed-point number from one
+ /// fixed-point semantic to another fixed-point semantic.
+ /// \p Src - The source value
+ /// \p SrcSema - The fixed-point semantic of the source value
+ /// \p DstSema - The resulting fixed-point semantic
+ Value *CreateFixedToFixed(Value *Src, const FixedPointSemantics &SrcSema,
+ const FixedPointSemantics &DstSema) {
+ return Convert(Src, SrcSema, DstSema, false);
+ }
+
+ /// Convert an integer value representing a fixed-point number to an integer
+ /// with the given bit width and signedness.
+ /// \p Src - The source value
+ /// \p SrcSema - The fixed-point semantic of the source value
+ /// \p DstWidth - The bit width of the result value
+ /// \p DstIsSigned - The signedness of the result value
+ Value *CreateFixedToInteger(Value *Src, const FixedPointSemantics &SrcSema,
+ unsigned DstWidth, bool DstIsSigned) {
+ return Convert(
+ Src, SrcSema,
+ FixedPointSemantics::GetIntegerSemantics(DstWidth, DstIsSigned), true);
+ }
+
+ /// Convert an integer value with the given signedness to an integer value
+ /// representing the given fixed-point semantic.
+ /// \p Src - The source value
+ /// \p SrcIsSigned - The signedness of the source value
+ /// \p DstSema - The resulting fixed-point semantic
+ Value *CreateIntegerToFixed(Value *Src, unsigned SrcIsSigned,
+ const FixedPointSemantics &DstSema) {
+ return Convert(Src,
+ FixedPointSemantics::GetIntegerSemantics(
+ Src->getType()->getScalarSizeInBits(), SrcIsSigned),
+ DstSema, false);
+ }
+
+ Value *CreateFixedToFloating(Value *Src, const FixedPointSemantics &SrcSema,
+ Type *DstTy) {
+ Value *Result;
+ Type *OpTy = getAccommodatingFloatType(DstTy, SrcSema);
+ // Convert the raw fixed-point value directly to floating point. If the
+ // value is too large to fit, it will be rounded, not truncated.
+ Result = SrcSema.isSigned() ? B.CreateSIToFP(Src, OpTy)
+ : B.CreateUIToFP(Src, OpTy);
+ // Rescale the integral-in-floating point by the scaling factor. This is
+ // lossless, except for overflow to infinity which is unlikely.
+ Result = B.CreateFMul(Result,
+ ConstantFP::get(OpTy, std::pow(2, -(int)SrcSema.getScale())));
+ if (OpTy != DstTy)
+ Result = B.CreateFPTrunc(Result, DstTy);
+ return Result;
+ }
+
+ Value *CreateFloatingToFixed(Value *Src, const FixedPointSemantics &DstSema) {
+ bool UseSigned = DstSema.isSigned() || DstSema.hasUnsignedPadding();
+ Value *Result = Src;
+ Type *OpTy = getAccommodatingFloatType(Src->getType(), DstSema);
+ if (OpTy != Src->getType())
+ Result = B.CreateFPExt(Result, OpTy);
+ // Rescale the floating point value so that its significant bits (for the
+ // purposes of the conversion) are in the integral range.
+ Result = B.CreateFMul(Result,
+ ConstantFP::get(OpTy, std::pow(2, DstSema.getScale())));
+
+ Type *ResultTy = B.getIntNTy(DstSema.getWidth());
+ if (DstSema.isSaturated()) {
+ Intrinsic::ID IID =
+ UseSigned ? Intrinsic::fptosi_sat : Intrinsic::fptoui_sat;
+ Result = B.CreateIntrinsic(IID, {ResultTy, OpTy}, {Result});
+ } else {
+ Result = UseSigned ? B.CreateFPToSI(Result, ResultTy)
+ : B.CreateFPToUI(Result, ResultTy);
+ }
+
+ // When saturating unsigned-with-padding using signed operations, we may
+ // get negative values. Emit an extra clamp to zero.
+ if (DstSema.isSaturated() && DstSema.hasUnsignedPadding()) {
+ Constant *Zero = Constant::getNullValue(Result->getType());
+ Result =
+ B.CreateSelect(B.CreateICmpSLT(Result, Zero), Zero, Result, "satmin");
+ }
+
+ return Result;
+ }
+
+ /// Add two fixed-point values and return the result in their common semantic.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateAdd(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+ bool UseSigned = CommonSema.isSigned() || CommonSema.hasUnsignedPadding();
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ Value *Result;
+ if (CommonSema.isSaturated()) {
+ Intrinsic::ID IID = UseSigned ? Intrinsic::sadd_sat : Intrinsic::uadd_sat;
+ Result = B.CreateBinaryIntrinsic(IID, WideLHS, WideRHS);
+ } else {
+ Result = B.CreateAdd(WideLHS, WideRHS);
+ }
+
+ return CreateFixedToFixed(Result, CommonSema,
+ LHSSema.getCommonSemantics(RHSSema));
+ }
+
+ /// Subtract two fixed-point values and return the result in their common
+ /// semantic.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateSub(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+ bool UseSigned = CommonSema.isSigned() || CommonSema.hasUnsignedPadding();
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ Value *Result;
+ if (CommonSema.isSaturated()) {
+ Intrinsic::ID IID = UseSigned ? Intrinsic::ssub_sat : Intrinsic::usub_sat;
+ Result = B.CreateBinaryIntrinsic(IID, WideLHS, WideRHS);
+ } else {
+ Result = B.CreateSub(WideLHS, WideRHS);
+ }
+
+ // Subtraction can end up below 0 for padded unsigned operations, so emit
+ // an extra clamp in that case.
+ if (CommonSema.isSaturated() && CommonSema.hasUnsignedPadding()) {
+ Constant *Zero = Constant::getNullValue(Result->getType());
+ Result =
+ B.CreateSelect(B.CreateICmpSLT(Result, Zero), Zero, Result, "satmin");
+ }
+
+ return CreateFixedToFixed(Result, CommonSema,
+ LHSSema.getCommonSemantics(RHSSema));
+ }
+
+ /// Multiply two fixed-point values and return the result in their common
+ /// semantic.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateMul(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+ bool UseSigned = CommonSema.isSigned() || CommonSema.hasUnsignedPadding();
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ Intrinsic::ID IID;
+ if (CommonSema.isSaturated()) {
+ IID = UseSigned ? Intrinsic::smul_fix_sat : Intrinsic::umul_fix_sat;
+ } else {
+ IID = UseSigned ? Intrinsic::smul_fix : Intrinsic::umul_fix;
+ }
+ Value *Result = B.CreateIntrinsic(
+ IID, {WideLHS->getType()},
+ {WideLHS, WideRHS, B.getInt32(CommonSema.getScale())});
+
+ return CreateFixedToFixed(Result, CommonSema,
+ LHSSema.getCommonSemantics(RHSSema));
+ }
+
+ /// Divide two fixed-point values and return the result in their common
+ /// semantic.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateDiv(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+ bool UseSigned = CommonSema.isSigned() || CommonSema.hasUnsignedPadding();
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ Intrinsic::ID IID;
+ if (CommonSema.isSaturated()) {
+ IID = UseSigned ? Intrinsic::sdiv_fix_sat : Intrinsic::udiv_fix_sat;
+ } else {
+ IID = UseSigned ? Intrinsic::sdiv_fix : Intrinsic::udiv_fix;
+ }
+ Value *Result = B.CreateIntrinsic(
+ IID, {WideLHS->getType()},
+ {WideLHS, WideRHS, B.getInt32(CommonSema.getScale())});
+
+ return CreateFixedToFixed(Result, CommonSema,
+ LHSSema.getCommonSemantics(RHSSema));
+ }
+
+ /// Left shift a fixed-point value by an unsigned integer value. The integer
+ /// value can be any bit width.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ Value *CreateShl(Value *LHS, const FixedPointSemantics &LHSSema, Value *RHS) {
+ bool UseSigned = LHSSema.isSigned() || LHSSema.hasUnsignedPadding();
+
+ RHS = B.CreateIntCast(RHS, LHS->getType(), /*IsSigned=*/false);
+
+ Value *Result;
+ if (LHSSema.isSaturated()) {
+ Intrinsic::ID IID = UseSigned ? Intrinsic::sshl_sat : Intrinsic::ushl_sat;
+ Result = B.CreateBinaryIntrinsic(IID, LHS, RHS);
+ } else {
+ Result = B.CreateShl(LHS, RHS);
+ }
+
+ return Result;
+ }
+
+ /// Right shift a fixed-point value by an unsigned integer value. The integer
+ /// value can be any bit width.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ Value *CreateShr(Value *LHS, const FixedPointSemantics &LHSSema, Value *RHS) {
+ RHS = B.CreateIntCast(RHS, LHS->getType(), false);
+
+ return LHSSema.isSigned() ? B.CreateAShr(LHS, RHS) : B.CreateLShr(LHS, RHS);
+ }
+
+ /// Compare two fixed-point values for equality.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateEQ(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ return B.CreateICmpEQ(WideLHS, WideRHS);
+ }
+
+ /// Compare two fixed-point values for inequality.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateNE(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ return B.CreateICmpNE(WideLHS, WideRHS);
+ }
+
+ /// Compare two fixed-point values as LHS < RHS.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateLT(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ return CommonSema.isSigned() ? B.CreateICmpSLT(WideLHS, WideRHS)
+ : B.CreateICmpULT(WideLHS, WideRHS);
+ }
+
+ /// Compare two fixed-point values as LHS <= RHS.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateLE(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ return CommonSema.isSigned() ? B.CreateICmpSLE(WideLHS, WideRHS)
+ : B.CreateICmpULE(WideLHS, WideRHS);
+ }
+
+ /// Compare two fixed-point values as LHS > RHS.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateGT(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ return CommonSema.isSigned() ? B.CreateICmpSGT(WideLHS, WideRHS)
+ : B.CreateICmpUGT(WideLHS, WideRHS);
+ }
+
+ /// Compare two fixed-point values as LHS >= RHS.
+ /// \p LHS - The left hand side
+ /// \p LHSSema - The semantic of the left hand side
+ /// \p RHS - The right hand side
+ /// \p RHSSema - The semantic of the right hand side
+ Value *CreateGE(Value *LHS, const FixedPointSemantics &LHSSema,
+ Value *RHS, const FixedPointSemantics &RHSSema) {
+ auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema);
+
+ Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema);
+ Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema);
+
+ return CommonSema.isSigned() ? B.CreateICmpSGE(WideLHS, WideRHS)
+ : B.CreateICmpUGE(WideLHS, WideRHS);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_IR_FIXEDPOINTBUILDER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Function.h b/contrib/llvm-project/llvm/include/llvm/IR/Function.h
index bb4ec13c7610..7e209bb3769b 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Function.h
@@ -199,6 +199,15 @@ public:
/// returns Intrinsic::not_intrinsic!
bool isIntrinsic() const { return HasLLVMReservedName; }
+ /// isTargetIntrinsic - Returns true if IID is an intrinsic specific to a
+ /// certain target. If it is a generic intrinsic false is returned.
+ static bool isTargetIntrinsic(Intrinsic::ID IID);
+
+ /// isTargetIntrinsic - Returns true if this function is an intrinsic and the
+ /// intrinsic is specific to a certain target. If this is not an intrinsic
+ /// or a generic intrinsic, false is returned.
+ bool isTargetIntrinsic() const;
+
/// Returns true if the function is one of the "Constrained Floating-Point
/// Intrinsics". Returns false if not, and returns false when
/// getIntrinsicID() returns Intrinsic::not_intrinsic.
@@ -259,6 +268,12 @@ public:
getContext(), AttributeList::FunctionIndex, Kind));
}
+ /// A function will have the "coroutine.presplit" attribute if it's
+ /// a coroutine and has not gone through full CoroSplit pass.
+ bool isPresplitCoroutine() const {
+ return hasFnAttribute("coroutine.presplit");
+ }
+
enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic };
/// Class to represent profile counts.
@@ -372,6 +387,9 @@ public:
void setGC(std::string Str);
void clearGC();
+ /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
+ bool hasStackProtectorFnAttr() const;
+
/// adds the attribute to the list of attributes.
void addAttribute(unsigned i, Attribute::AttrKind Kind);
@@ -463,8 +481,17 @@ public:
/// Extract the byval type for a parameter.
Type *getParamByValType(unsigned ArgNo) const {
- Type *Ty = AttributeSets.getParamByValType(ArgNo);
- return Ty ? Ty : (arg_begin() + ArgNo)->getType()->getPointerElementType();
+ return AttributeSets.getParamByValType(ArgNo);
+ }
+
+ /// Extract the sret type for a parameter.
+ Type *getParamStructRetType(unsigned ArgNo) const {
+ return AttributeSets.getParamStructRetType(ArgNo);
+ }
+
+ /// Extract the byref type for a parameter.
+ Type *getParamByRefType(unsigned ArgNo) const {
+ return AttributeSets.getParamByRefType(ArgNo);
}
/// Extract the number of dereferenceable bytes for a call or
@@ -606,6 +633,17 @@ public:
addFnAttr(Attribute::NoRecurse);
}
+ /// Determine if the function is required to make forward progress.
+ bool mustProgress() const {
+ return hasFnAttribute(Attribute::MustProgress) ||
+ hasFnAttribute(Attribute::WillReturn);
+ }
+ void setMustProgress() { addFnAttr(Attribute::MustProgress); }
+
+ /// Determine if the function will return.
+ bool willReturn() const { return hasFnAttribute(Attribute::WillReturn); }
+ void setWillReturn() { addFnAttr(Attribute::WillReturn); }
+
/// True if the ABI mandates (or the user requested) that this
/// function be in a unwind table.
bool hasUWTable() const {
@@ -648,6 +686,10 @@ public:
return hasFnAttribute(Attribute::OptimizeForSize) || hasMinSize();
}
+ /// Returns the denormal handling type for the default rounding mode of the
+ /// function.
+ DenormalMode getDenormalMode(const fltSemantics &FPType) const;
+
/// copyAttributesFrom - copy all additional attributes (those not needed to
/// create a Function) from the Function Src to this one.
void copyAttributesFrom(const Function *Src);
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/contrib/llvm-project/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
index 79ea5791b2fd..6293305a2639 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -83,7 +83,7 @@ namespace llvm {
if (isa<ScalableVectorType>(VTy))
NumElements = Unbounded;
else
- NumElements = VTy->getNumElements();
+ NumElements = cast<FixedVectorType>(VTy)->getNumElements();
} else
CurTy = dyn_cast<StructType>(Ty);
++OpIt;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/GlobalObject.h b/contrib/llvm-project/llvm/include/llvm/IR/GlobalObject.h
index 3a7b718845cb..d01abdc3b625 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/GlobalObject.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/GlobalObject.h
@@ -54,7 +54,6 @@ protected:
Comdat *ObjComdat;
enum {
LastAlignmentBit = 4,
- HasMetadataHashEntryBit,
HasSectionHashEntryBit,
GlobalObjectBits,
@@ -127,58 +126,13 @@ public:
Comdat *getComdat() { return ObjComdat; }
void setComdat(Comdat *C) { ObjComdat = C; }
- /// Check if this has any metadata.
- bool hasMetadata() const { return hasMetadataHashEntry(); }
-
- /// Check if this has any metadata of the given kind.
- bool hasMetadata(unsigned KindID) const {
- return getMetadata(KindID) != nullptr;
- }
- bool hasMetadata(StringRef Kind) const {
- return getMetadata(Kind) != nullptr;
- }
-
- /// Get the current metadata attachments for the given kind, if any.
- ///
- /// These functions require that the function have at most a single attachment
- /// of the given kind, and return \c nullptr if such an attachment is missing.
- /// @{
- MDNode *getMetadata(unsigned KindID) const;
- MDNode *getMetadata(StringRef Kind) const;
- /// @}
-
- /// Appends all attachments with the given ID to \c MDs in insertion order.
- /// If the global has no attachments with the given ID, or if ID is invalid,
- /// leaves MDs unchanged.
- /// @{
- void getMetadata(unsigned KindID, SmallVectorImpl<MDNode *> &MDs) const;
- void getMetadata(StringRef Kind, SmallVectorImpl<MDNode *> &MDs) const;
- /// @}
-
- /// Set a particular kind of metadata attachment.
- ///
- /// Sets the given attachment to \c MD, erasing it if \c MD is \c nullptr or
- /// replacing it if it already exists.
- /// @{
- void setMetadata(unsigned KindID, MDNode *MD);
- void setMetadata(StringRef Kind, MDNode *MD);
- /// @}
-
- /// Add a metadata attachment.
- /// @{
- void addMetadata(unsigned KindID, MDNode &MD);
- void addMetadata(StringRef Kind, MDNode &MD);
- /// @}
-
- /// Appends all attachments for the global to \c MDs, sorting by attachment
- /// ID. Attachments with the same ID appear in insertion order.
- void
- getAllMetadata(SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const;
-
- /// Erase all metadata attachments with the given kind.
- ///
- /// \returns true if any metadata was removed.
- bool eraseMetadata(unsigned KindID);
+ using Value::addMetadata;
+ using Value::clearMetadata;
+ using Value::eraseMetadata;
+ using Value::getAllMetadata;
+ using Value::getMetadata;
+ using Value::hasMetadata;
+ using Value::setMetadata;
/// Copy metadata from Src, adjusting offsets by Offset.
void copyMetadata(const GlobalObject *Src, unsigned Offset);
@@ -204,8 +158,6 @@ public:
V->getValueID() == Value::GlobalVariableVal;
}
- void clearMetadata();
-
private:
void setGlobalObjectFlag(unsigned Bit, bool Val) {
unsigned Mask = 1 << Bit;
@@ -213,13 +165,6 @@ private:
(Val ? Mask : 0u));
}
- bool hasMetadataHashEntry() const {
- return getGlobalValueSubClassData() & (1 << HasMetadataHashEntryBit);
- }
- void setHasMetadataHashEntry(bool HasEntry) {
- setGlobalObjectFlag(HasMetadataHashEntryBit, HasEntry);
- }
-
StringRef getSectionImpl() const;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/GlobalVariable.h b/contrib/llvm-project/llvm/include/llvm/IR/GlobalVariable.h
index 12093e337d6e..674d49eb9de6 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/GlobalVariable.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/GlobalVariable.h
@@ -56,10 +56,11 @@ public:
bool isExternallyInitialized = false);
/// GlobalVariable ctor - This creates a global and inserts it before the
/// specified other global.
- GlobalVariable(Module &M, Type *Ty, bool isConstant,
- LinkageTypes Linkage, Constant *Initializer,
- const Twine &Name = "", GlobalVariable *InsertBefore = nullptr,
- ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0,
+ GlobalVariable(Module &M, Type *Ty, bool isConstant, LinkageTypes Linkage,
+ Constant *Initializer, const Twine &Name = "",
+ GlobalVariable *InsertBefore = nullptr,
+ ThreadLocalMode = NotThreadLocal,
+ Optional<unsigned> AddressSpace = None,
bool isExternallyInitialized = false);
GlobalVariable(const GlobalVariable &) = delete;
GlobalVariable &operator=(const GlobalVariable &) = delete;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h
index b90480ebc59e..9cefc9aa764c 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h
@@ -17,6 +17,7 @@
#include "llvm-c/Types.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/BasicBlock.h"
@@ -24,6 +25,7 @@
#include "llvm/IR/ConstantFolder.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -91,7 +93,28 @@ public:
/// Common base class shared among various IRBuilders.
class IRBuilderBase {
- DebugLoc CurDbgLocation;
+ /// Pairs of (metadata kind, MDNode *) that should be added to all newly
+ /// created instructions, like !dbg metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 2> MetadataToCopy;
+
+ /// Add or update the an entry (Kind, MD) to MetadataToCopy, if \p MD is not
+ /// null. If \p MD is null, remove the entry with \p Kind.
+ void AddOrRemoveMetadataToCopy(unsigned Kind, MDNode *MD) {
+ if (!MD) {
+ erase_if(MetadataToCopy, [Kind](const std::pair<unsigned, MDNode *> &KV) {
+ return KV.first == Kind;
+ });
+ return;
+ }
+
+ for (auto &KV : MetadataToCopy)
+ if (KV.first == Kind) {
+ KV.second = MD;
+ return;
+ }
+
+ MetadataToCopy.emplace_back(Kind, MD);
+ }
protected:
BasicBlock *BB;
@@ -125,7 +148,7 @@ public:
template<typename InstTy>
InstTy *Insert(InstTy *I, const Twine &Name = "") const {
Inserter.InsertHelper(I, Name, BB, InsertPt);
- SetInstDebugLocation(I);
+ AddMetadataToInst(I);
return I;
}
@@ -182,16 +205,42 @@ public:
}
/// Set location information used by debugging information.
- void SetCurrentDebugLocation(DebugLoc L) { CurDbgLocation = std::move(L); }
+ void SetCurrentDebugLocation(DebugLoc L) {
+ AddOrRemoveMetadataToCopy(LLVMContext::MD_dbg, L.getAsMDNode());
+ }
+
+ /// Collect metadata with IDs \p MetadataKinds from \p Src which should be
+ /// added to all created instructions. Entries present in MedataDataToCopy but
+ /// not on \p Src will be dropped from MetadataToCopy.
+ void CollectMetadataToCopy(Instruction *Src,
+ ArrayRef<unsigned> MetadataKinds) {
+ for (unsigned K : MetadataKinds)
+ AddOrRemoveMetadataToCopy(K, Src->getMetadata(K));
+ }
/// Get location information used by debugging information.
- const DebugLoc &getCurrentDebugLocation() const { return CurDbgLocation; }
+ DebugLoc getCurrentDebugLocation() const {
+ for (auto &KV : MetadataToCopy)
+ if (KV.first == LLVMContext::MD_dbg)
+ return {cast<DILocation>(KV.second)};
+
+ return {};
+ }
/// If this builder has a current debug location, set it on the
/// specified instruction.
void SetInstDebugLocation(Instruction *I) const {
- if (CurDbgLocation)
- I->setDebugLoc(CurDbgLocation);
+ for (const auto &KV : MetadataToCopy)
+ if (KV.first == LLVMContext::MD_dbg) {
+ I->setDebugLoc(DebugLoc(KV.second));
+ return;
+ }
+ }
+
+ /// Add all entries in MetadataToCopy to \p I.
+ void AddMetadataToInst(Instruction *I) const {
+ for (auto &KV : MetadataToCopy)
+ I->setMetadata(KV.first, KV.second);
}
/// Get the return type of the current function that we're emitting
@@ -266,11 +315,19 @@ public:
/// Set the exception handling to be used with constrained floating point
void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) {
+#ifndef NDEBUG
+ Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(NewExcept);
+ assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
+#endif
DefaultConstrainedExcept = NewExcept;
}
/// Set the rounding mode handling to be used with constrained floating point
void setDefaultConstrainedRounding(RoundingMode NewRounding) {
+#ifndef NDEBUG
+ Optional<StringRef> RoundingStr = RoundingModeToStr(NewRounding);
+ assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
+#endif
DefaultConstrainedRounding = NewRounding;
}
@@ -293,9 +350,8 @@ public:
}
}
- void setConstrainedFPCallAttr(CallInst *I) {
- if (!I->hasFnAttr(Attribute::StrictFP))
- I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
+ void setConstrainedFPCallAttr(CallBase *I) {
+ I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
}
void setDefaultOperandBundles(ArrayRef<OperandBundleDef> OpBundles) {
@@ -574,12 +630,22 @@ public:
NoAliasTag);
}
+ CallInst *CreateMemTransferInst(
+ Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src,
+ MaybeAlign SrcAlign, Value *Size, bool isVolatile = false,
+ MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr,
+ MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr);
+
CallInst *CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src,
MaybeAlign SrcAlign, Value *Size,
bool isVolatile = false, MDNode *TBAATag = nullptr,
MDNode *TBAAStructTag = nullptr,
MDNode *ScopeTag = nullptr,
- MDNode *NoAliasTag = nullptr);
+ MDNode *NoAliasTag = nullptr) {
+ return CreateMemTransferInst(Intrinsic::memcpy, Dst, DstAlign, Src,
+ SrcAlign, Size, isVolatile, TBAATag,
+ TBAAStructTag, ScopeTag, NoAliasTag);
+ }
CallInst *CreateMemCpyInline(Value *Dst, MaybeAlign DstAlign, Value *Src,
MaybeAlign SrcAlign, Value *Size);
@@ -713,11 +779,11 @@ public:
/// Create a vector float max reduction intrinsic of the source
/// vector.
- CallInst *CreateFPMaxReduce(Value *Src, bool NoNaN = false);
+ CallInst *CreateFPMaxReduce(Value *Src);
/// Create a vector float min reduction intrinsic of the source
/// vector.
- CallInst *CreateFPMinReduce(Value *Src, bool NoNaN = false);
+ CallInst *CreateFPMinReduce(Value *Src);
/// Create a lifetime.start intrinsic.
///
@@ -786,7 +852,18 @@ public:
/// Create an assume intrinsic call that allows the optimizer to
/// assume that the provided condition will be true.
- CallInst *CreateAssumption(Value *Cond);
+ ///
+ /// The optional argument \p OpBundles specifies operand bundles that are
+ /// added to the call instruction.
+ CallInst *CreateAssumption(Value *Cond,
+ ArrayRef<OperandBundleDef> OpBundles = llvm::None);
+
+ /// Create a llvm.experimental.noalias.scope.decl intrinsic call.
+ Instruction *CreateNoAliasScopeDeclaration(Value *Scope);
+ Instruction *CreateNoAliasScopeDeclaration(MDNode *ScopeTag) {
+ return CreateNoAliasScopeDeclaration(
+ MetadataAsValue::get(Context, ScopeTag));
+ }
/// Create a call to the experimental.gc.statepoint intrinsic to
/// start a new statepoint sequence.
@@ -801,7 +878,7 @@ public:
/// start a new statepoint sequence.
CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes,
Value *ActualCallee, uint32_t Flags,
- ArrayRef<Use> CallArgs,
+ ArrayRef<Value *> CallArgs,
Optional<ArrayRef<Use>> TransitionArgs,
Optional<ArrayRef<Use>> DeoptArgs,
ArrayRef<Value *> GCArgs,
@@ -830,7 +907,7 @@ public:
InvokeInst *CreateGCStatepointInvoke(
uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags,
- ArrayRef<Use> InvokeArgs, Optional<ArrayRef<Use>> TransitionArgs,
+ ArrayRef<Value *> InvokeArgs, Optional<ArrayRef<Use>> TransitionArgs,
Optional<ArrayRef<Use>> DeoptArgs, ArrayRef<Value *> GCArgs,
const Twine &Name = "");
@@ -858,6 +935,10 @@ public:
Type *ResultType,
const Twine &Name = "");
+ /// Create a call to llvm.vscale, multiplied by \p Scaling. The type of VScale
+ /// will be the same type as that of \p Scaling.
+ Value *CreateVScale(Constant *Scaling, const Twine &Name = "");
+
/// Create a call to intrinsic \p ID with 1 operand which is mangled on its
/// type.
CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
@@ -898,6 +979,22 @@ public:
return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name);
}
+ /// Create a call to the experimental.vector.extract intrinsic.
+ CallInst *CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx,
+ const Twine &Name = "") {
+ return CreateIntrinsic(Intrinsic::experimental_vector_extract,
+ {DstType, SrcVec->getType()}, {SrcVec, Idx}, nullptr,
+ Name);
+ }
+
+ /// Create a call to the experimental.vector.insert intrinsic.
+ CallInst *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
+ Value *Idx, const Twine &Name = "") {
+ return CreateIntrinsic(Intrinsic::experimental_vector_insert,
+ {DstType, SubVec->getType()}, {SrcVec, SubVec, Idx},
+ nullptr, Name);
+ }
+
private:
/// Create a call to a masked intrinsic with given Id.
CallInst *CreateMaskedIntrinsic(Intrinsic::ID Id, ArrayRef<Value *> Ops,
@@ -998,16 +1095,21 @@ public:
ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> OpBundles,
const Twine &Name = "") {
- return Insert(
- InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args, OpBundles),
- Name);
+ InvokeInst *II =
+ InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args, OpBundles);
+ if (IsFPConstrained)
+ setConstrainedFPCallAttr(II);
+ return Insert(II, Name);
}
InvokeInst *CreateInvoke(FunctionType *Ty, Value *Callee,
BasicBlock *NormalDest, BasicBlock *UnwindDest,
ArrayRef<Value *> Args = None,
const Twine &Name = "") {
- return Insert(InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args),
- Name);
+ InvokeInst *II =
+ InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args);
+ if (IsFPConstrained)
+ setConstrainedFPCallAttr(II);
+ return Insert(II, Name);
}
InvokeInst *CreateInvoke(FunctionCallee Callee, BasicBlock *NormalDest,
@@ -2428,6 +2530,13 @@ public:
return Insert(new ShuffleVectorInst(V1, V2, Mask), Name);
}
+ /// Create a unary shuffle. The second vector operand of the IR instruction
+ /// is poison.
+ Value *CreateShuffleVector(Value *V, ArrayRef<int> Mask,
+ const Twine &Name = "") {
+ return CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, Name);
+ }
+
Value *CreateExtractValue(Value *Agg,
ArrayRef<unsigned> Idxs,
const Twine &Name = "") {
@@ -2492,6 +2601,10 @@ public:
/// NumElts elements.
Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "");
+ /// Return a vector value that contains \arg V broadcasted to \p
+ /// EC elements.
+ Value *CreateVectorSplat(ElementCount EC, Value *V, const Twine &Name = "");
+
/// Return a value that has been extracted from a larger integer type.
Value *CreateExtractInteger(const DataLayout &DL, Value *From,
IntegerType *ExtractedTy, uint64_t Offset,
@@ -2510,13 +2623,11 @@ public:
private:
/// Helper function that creates an assume intrinsic call that
- /// represents an alignment assumption on the provided Ptr, Mask, Type
- /// and Offset. It may be sometimes useful to do some other logic
- /// based on this alignment check, thus it can be stored into 'TheCheck'.
+ /// represents an alignment assumption on the provided pointer \p PtrValue
+ /// with offset \p OffsetValue and alignment value \p AlignValue.
CallInst *CreateAlignmentAssumptionHelper(const DataLayout &DL,
- Value *PtrValue, Value *Mask,
- Type *IntPtrTy, Value *OffsetValue,
- Value **TheCheck);
+ Value *PtrValue, Value *AlignValue,
+ Value *OffsetValue);
public:
/// Create an assume intrinsic call that represents an alignment
@@ -2525,13 +2636,9 @@ public:
/// An optional offset can be provided, and if it is provided, the offset
/// must be subtracted from the provided pointer to get the pointer with the
/// specified alignment.
- ///
- /// It may be sometimes useful to do some other logic
- /// based on this alignment check, thus it can be stored into 'TheCheck'.
CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
unsigned Alignment,
- Value *OffsetValue = nullptr,
- Value **TheCheck = nullptr);
+ Value *OffsetValue = nullptr);
/// Create an assume intrinsic call that represents an alignment
/// assumption on the provided pointer.
@@ -2540,15 +2647,11 @@ public:
/// must be subtracted from the provided pointer to get the pointer with the
/// specified alignment.
///
- /// It may be sometimes useful to do some other logic
- /// based on this alignment check, thus it can be stored into 'TheCheck'.
- ///
/// This overload handles the condition where the Alignment is dependent
/// on an existing value rather than a static value.
CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
Value *Alignment,
- Value *OffsetValue = nullptr,
- Value **TheCheck = nullptr);
+ Value *OffsetValue = nullptr);
};
/// This provides a uniform API for creating instructions and inserting
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h b/contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h
index 3a1c489ee09f..2e62be7cd1ec 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h
@@ -18,11 +18,12 @@
#ifndef LLVM_IR_IRPRINTINGPASSES_H
#define LLVM_IR_IRPRINTINGPASSES_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/IR/PassManager.h"
#include <string>
namespace llvm {
+class raw_ostream;
+class StringRef;
/// Create and return a pass that writes the module to the specified
/// \c raw_ostream.
@@ -44,22 +45,6 @@ void printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name);
/// Return true if a pass is for IR printing.
bool isIRPrintingPass(Pass *P);
-/// isFunctionInPrintList - returns true if a function should be printed via
-// debugging options like -print-after-all/-print-before-all.
-// Tells if the function IR should be printed by PrinterPass.
-extern bool isFunctionInPrintList(StringRef FunctionName);
-
-/// forcePrintModuleIR - returns true if IR printing passes should
-// be printing module IR (even for local-pass printers e.g. function-pass)
-// to provide more context, as enabled by debugging option -print-module-scope
-// Tells if IR printer should be printing module IR
-extern bool forcePrintModuleIR();
-
-extern bool shouldPrintBeforePass();
-extern bool shouldPrintBeforePass(StringRef);
-extern bool shouldPrintAfterPass();
-extern bool shouldPrintAfterPass(StringRef);
-
/// Pass for printing a Module as LLVM's text IR assembly.
///
/// Note: This pass is for use with the new pass manager. Use the create...Pass
@@ -75,6 +60,7 @@ public:
bool ShouldPreserveUseListOrder = false);
PreservedAnalyses run(Module &M, AnalysisManager<Module> &);
+ static bool isRequired() { return true; }
};
/// Pass for printing a Function as LLVM's text IR assembly.
@@ -90,8 +76,9 @@ public:
PrintFunctionPass(raw_ostream &OS, const std::string &Banner = "");
PreservedAnalyses run(Function &F, AnalysisManager<Function> &);
+ static bool isRequired() { return true; }
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h b/contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h
index 07af00ec9240..955ac8e537fe 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h
@@ -599,12 +599,6 @@ public:
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
- /// Check whether it is valid to call getCastOpcode for these types.
- static bool isCastable(
- Type *SrcTy, ///< The Type from which the value should be cast.
- Type *DestTy ///< The Type to which the value should be cast.
- );
-
/// Check whether a bitcast between these types is valid
static bool isBitCastable(
Type *SrcTy, ///< The Type from which the value should be cast.
@@ -650,8 +644,8 @@ public:
/// DataLayout argument is to determine the pointer size when examining casts
/// involving Integer and Pointer types. They are no-op casts if the integer
/// is the same size as the pointer. However, pointer size varies with
- /// platform.
- /// Determine if the described cast is a no-op cast.
+ /// platform. Note that a precondition of this method is that the cast is
+ /// legal - i.e. the instruction formed with these operands would verify.
static bool isNoopCast(
Instruction::CastOps Opcode, ///< Opcode of cast
Type *SrcTy, ///< SrcTy of cast
@@ -691,11 +685,14 @@ public:
/// Return the destination type, as a convenience
Type* getDestTy() const { return getType(); }
- /// This method can be used to determine if a cast from S to DstTy using
+ /// This method can be used to determine if a cast from SrcTy to DstTy using
/// Opcode op is valid or not.
/// @returns true iff the proposed cast is valid.
/// Determine if a cast is valid without creating one.
- static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy);
+ static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy);
+ static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
+ return castIsValid(op, S->getType(), DstTy);
+ }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
@@ -805,8 +802,8 @@ public:
void setPredicate(Predicate P) { setSubclassData<PredicateField>(P); }
static bool isFPPredicate(Predicate P) {
- assert(FIRST_FCMP_PREDICATE == 0 &&
- "FIRST_FCMP_PREDICATE is required to be 0");
+ static_assert(FIRST_FCMP_PREDICATE == 0,
+ "FIRST_FCMP_PREDICATE is required to be 0");
return P <= LAST_FCMP_PREDICATE;
}
@@ -848,20 +845,38 @@ public:
/// Return the predicate as if the operands were swapped.
static Predicate getSwappedPredicate(Predicate pred);
- /// For predicate of kind "is X or equal to 0" returns the predicate "is X".
- /// For predicate of kind "is X" returns the predicate "is X or equal to 0".
- /// does not support other kind of predicates.
- /// @returns the predicate that does not contains is equal to zero if
- /// it had and vice versa.
- /// Return the flipped strictness of predicate
- Predicate getFlippedStrictnessPredicate() const {
- return getFlippedStrictnessPredicate(getPredicate());
+ /// This is a static version that you can use without an instruction
+ /// available.
+ /// @returns true if the comparison predicate is strict, false otherwise.
+ static bool isStrictPredicate(Predicate predicate);
+
+ /// @returns true if the comparison predicate is strict, false otherwise.
+ /// Determine if this instruction is using an strict comparison predicate.
+ bool isStrictPredicate() const { return isStrictPredicate(getPredicate()); }
+
+ /// This is a static version that you can use without an instruction
+ /// available.
+ /// @returns true if the comparison predicate is non-strict, false otherwise.
+ static bool isNonStrictPredicate(Predicate predicate);
+
+ /// @returns true if the comparison predicate is non-strict, false otherwise.
+ /// Determine if this instruction is using an non-strict comparison predicate.
+ bool isNonStrictPredicate() const {
+ return isNonStrictPredicate(getPredicate());
+ }
+
+ /// For example, SGE -> SGT, SLE -> SLT, ULE -> ULT, UGE -> UGT.
+ /// Returns the strict version of non-strict comparisons.
+ Predicate getStrictPredicate() const {
+ return getStrictPredicate(getPredicate());
}
/// This is a static version that you can use without an instruction
/// available.
- /// Return the flipped strictness of predicate
- static Predicate getFlippedStrictnessPredicate(Predicate pred);
+ /// @returns the strict version of comparison provided in \p pred.
+ /// If \p pred is not a strict comparison predicate, returns \p pred.
+ /// Returns the strict version of non-strict comparisons.
+ static Predicate getStrictPredicate(Predicate pred);
/// For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
/// Returns the non-strict version of strict comparisons.
@@ -876,6 +891,21 @@ public:
/// Returns the non-strict version of strict comparisons.
static Predicate getNonStrictPredicate(Predicate pred);
+ /// This is a static version that you can use without an instruction
+ /// available.
+ /// Return the flipped strictness of predicate
+ static Predicate getFlippedStrictnessPredicate(Predicate pred);
+
+ /// For predicate of kind "is X or equal to 0" returns the predicate "is X".
+ /// For predicate of kind "is X" returns the predicate "is X or equal to 0".
+ /// does not support other kind of predicates.
+ /// @returns the predicate that does not contains is equal to zero if
+ /// it had and vice versa.
+ /// Return the flipped strictness of predicate
+ Predicate getFlippedStrictnessPredicate() const {
+ return getFlippedStrictnessPredicate(getPredicate());
+ }
+
/// Provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -888,9 +918,19 @@ public:
/// Determine if this CmpInst is commutative.
bool isCommutative() const;
- /// This is just a convenience that dispatches to the subclasses.
/// Determine if this is an equals/not equals predicate.
- bool isEquality() const;
+ /// This is a static version that you can use without an instruction
+ /// available.
+ static bool isEquality(Predicate pred);
+
+ /// Determine if this is an equals/not equals predicate.
+ bool isEquality() const { return isEquality(getPredicate()); }
+
+ /// Return true if the predicate is relational (not EQ or NE).
+ static bool isRelational(Predicate P) { return !isEquality(P); }
+
+ /// Return true if the predicate is relational (not EQ or NE).
+ bool isRelational() const { return !isEquality(); }
/// @returns true if the comparison is signed, false otherwise.
/// Determine if this instruction is using a signed comparison.
@@ -917,6 +957,30 @@ public:
return getSignedPredicate(getPredicate());
}
+ /// For example, SLT->ULT, SLE->ULE, SGT->UGT, SGE->UGE, ULT->Failed assert
+ /// @returns the unsigned version of the signed predicate pred.
+ static Predicate getUnsignedPredicate(Predicate pred);
+
+ /// For example, SLT->ULT, SLE->ULE, SGT->UGT, SGE->UGE, ULT->Failed assert
+ /// @returns the unsigned version of the predicate for this instruction (which
+ /// has to be an signed predicate).
+ /// return the unsigned version of a predicate
+ Predicate getUnsignedPredicate() {
+ return getUnsignedPredicate(getPredicate());
+ }
+
+ /// For example, SLT->ULT, ULT->SLT, SLE->ULE, ULE->SLE, EQ->Failed assert
+ /// @returns the unsigned version of the signed predicate pred or
+ /// the signed version of the signed predicate pred.
+ static Predicate getFlippedSignednessPredicate(Predicate pred);
+
+ /// For example, SLT->ULT, ULT->SLT, SLE->ULE, ULE->SLE, EQ->Failed assert
+ /// @returns the unsigned version of the signed predicate pred or
+ /// the signed version of the signed predicate pred.
+ Predicate getFlippedSignednessPredicate() {
+ return getFlippedSignednessPredicate(getPredicate());
+ }
+
/// This is just a convenience.
/// Determine if this is true when both operands are the same.
bool isTrueWhenEqual() const {
@@ -1062,7 +1126,7 @@ public:
explicit OperandBundleDefT(const OperandBundleUse &OBU) {
Tag = std::string(OBU.getTagName());
- Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end());
+ llvm::append_range(Inputs, OBU.Inputs);
}
ArrayRef<InputTy> inputs() const { return Inputs; }
@@ -1301,7 +1365,7 @@ public:
/// Returns true if this CallSite passes the given Value* as an argument to
/// the called function.
bool hasArgument(const Value *V) const {
- return llvm::any_of(args(), [V](const Value *Arg) { return Arg == V; });
+ return llvm::is_contained(args(), V);
}
Value *getCalledOperand() const { return Op<CalledOperandOpEndIdx>(); }
@@ -1393,14 +1457,18 @@ public:
///
void setAttributes(AttributeList A) { Attrs = A; }
- /// Determine whether this call has the given attribute.
+ /// Determine whether this call has the given attribute. If it does not
+ /// then determine if the called function has the attribute, but only if
+ /// the attribute is allowed for the call.
bool hasFnAttr(Attribute::AttrKind Kind) const {
assert(Kind != Attribute::NoBuiltin &&
"Use CallBase::isNoBuiltin() to check for Attribute::NoBuiltin");
return hasFnAttrImpl(Kind);
}
- /// Determine whether this call has the given attribute.
+ /// Determine whether this call has the given attribute. If it does not
+ /// then determine if the called function has the attribute, but only if
+ /// the attribute is allowed for the call.
bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); }
/// adds the attribute to the list of attributes.
@@ -1447,6 +1515,12 @@ public:
setAttributes(PAL);
}
+ void removeAttributes(unsigned i, const AttrBuilder &Attrs) {
+ AttributeList PAL = getAttributes();
+ PAL = PAL.removeAttributes(getContext(), i, Attrs);
+ setAttributes(PAL);
+ }
+
/// Removes the attribute from the given argument
void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
assert(ArgNo < getNumArgOperands() && "Out of bounds");
@@ -1479,7 +1553,11 @@ public:
}
/// Determine whether the return value has the given attribute.
- bool hasRetAttr(Attribute::AttrKind Kind) const;
+ bool hasRetAttr(Attribute::AttrKind Kind) const {
+ return hasRetAttrImpl(Kind);
+ }
+ /// Determine whether the return value has the given attribute.
+ bool hasRetAttr(StringRef Kind) const { return hasRetAttrImpl(Kind); }
/// Determine whether the argument or parameter has the given attribute.
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const;
@@ -1678,6 +1756,7 @@ public:
bool onlyReadsMemory() const {
return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
}
+
void setOnlyReadsMemory() {
addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
}
@@ -2158,6 +2237,18 @@ private:
return hasFnAttrOnCalledFunction(Kind);
}
+
+ /// Determine whether the return value has the given attribute. Supports
+ /// Attribute::AttrKind and StringRef as \p AttrKind types.
+ template <typename AttrKind> bool hasRetAttrImpl(AttrKind Kind) const {
+ if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
+ return true;
+
+ // Look at the callee, if available.
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
+ return false;
+ }
};
template <>
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h b/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h
index a03eac0ad40d..b99dc62bbb9d 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h
@@ -256,13 +256,11 @@ public:
//===--------------------------------------------------------------------===//
/// Return true if this instruction has any metadata attached to it.
- bool hasMetadata() const { return DbgLoc || hasMetadataHashEntry(); }
+ bool hasMetadata() const { return DbgLoc || Value::hasMetadata(); }
/// Return true if this instruction has metadata attached to it other than a
/// debug location.
- bool hasMetadataOtherThanDebugLoc() const {
- return hasMetadataHashEntry();
- }
+ bool hasMetadataOtherThanDebugLoc() const { return Value::hasMetadata(); }
/// Return true if this instruction has the given type of metadata attached.
bool hasMetadata(unsigned KindID) const {
@@ -301,8 +299,7 @@ public:
/// debug location.
void getAllMetadataOtherThanDebugLoc(
SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const {
- if (hasMetadataOtherThanDebugLoc())
- getAllMetadataOtherThanDebugLocImpl(MDs);
+ Value::getAllMetadata(MDs);
}
/// Fills the AAMDNodes structure with AA metadata from this instruction.
@@ -343,6 +340,11 @@ public:
}
/// @}
+ /// Adds an !annotation metadata node with \p Annotation to this instruction.
+ /// If this instruction already has !annotation metadata, append \p Annotation
+ /// to the existing node.
+ void addAnnotationMetadata(StringRef Annotation);
+
/// Sets the metadata on this instruction from the AAMDNodes structure.
void setAAMetadata(const AAMDNodes &N);
@@ -492,21 +494,26 @@ public:
/// merged DebugLoc.
void applyMergedLocation(const DILocation *LocA, const DILocation *LocB);
-private:
- /// Return true if we have an entry in the on-the-side metadata hash.
- bool hasMetadataHashEntry() const {
- return Bitfield::test<HasMetadataField>(getSubclassDataFromValue());
- }
+ /// Updates the debug location given that the instruction has been hoisted
+ /// from a block to a predecessor of that block.
+ /// Note: it is undefined behavior to call this on an instruction not
+ /// currently inserted into a function.
+ void updateLocationAfterHoist();
+ /// Drop the instruction's debug location. This does not guarantee removal
+ /// of the !dbg source location attachment, as it must set a line 0 location
+ /// with scope information attached on call instructions. To guarantee
+ /// removal of the !dbg attachment, use the \ref setDebugLoc() API.
+ /// Note: it is undefined behavior to call this on an instruction not
+ /// currently inserted into a function.
+ void dropLocation();
+
+private:
// These are all implemented in Metadata.cpp.
MDNode *getMetadataImpl(unsigned KindID) const;
MDNode *getMetadataImpl(StringRef Kind) const;
void
getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned, MDNode *>> &) const;
- void getAllMetadataOtherThanDebugLocImpl(
- SmallVectorImpl<std::pair<unsigned, MDNode *>> &) const;
- /// Clear all hashtable-based metadata from this instruction.
- void clearMetadataHashEntries();
public:
//===--------------------------------------------------------------------===//
@@ -532,7 +539,7 @@ public:
/// In LLVM, these are the commutative operators, plus SetEQ and SetNE, when
/// applied to any type.
///
- bool isCommutative() const { return isCommutative(getOpcode()); }
+ bool isCommutative() const LLVM_READONLY;
static bool isCommutative(unsigned Opcode) {
switch (Opcode) {
case Add: case FAdd:
@@ -626,6 +633,10 @@ public:
/// generated program.
bool isSafeToRemove() const;
+ /// Return true if the instruction will return (unwinding is considered as
+ /// a form of returning control flow here).
+ bool willReturn() const;
+
/// Return true if the instruction is a variety of EH-block.
bool isEHPad() const {
switch (getOpcode()) {
@@ -643,20 +654,29 @@ public:
/// llvm.lifetime.end marker.
bool isLifetimeStartOrEnd() const;
+ /// Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
+ bool isDebugOrPseudoInst() const;
+
/// Return a pointer to the next non-debug instruction in the same basic
- /// block as 'this', or nullptr if no such instruction exists.
- const Instruction *getNextNonDebugInstruction() const;
- Instruction *getNextNonDebugInstruction() {
+ /// block as 'this', or nullptr if no such instruction exists. Skip any pseudo
+ /// operations if \c SkipPseudoOp is true.
+ const Instruction *
+ getNextNonDebugInstruction(bool SkipPseudoOp = false) const;
+ Instruction *getNextNonDebugInstruction(bool SkipPseudoOp = false) {
return const_cast<Instruction *>(
- static_cast<const Instruction *>(this)->getNextNonDebugInstruction());
+ static_cast<const Instruction *>(this)->getNextNonDebugInstruction(
+ SkipPseudoOp));
}
/// Return a pointer to the previous non-debug instruction in the same basic
- /// block as 'this', or nullptr if no such instruction exists.
- const Instruction *getPrevNonDebugInstruction() const;
- Instruction *getPrevNonDebugInstruction() {
+ /// block as 'this', or nullptr if no such instruction exists. Skip any pseudo
+ /// operations if \c SkipPseudoOp is true.
+ const Instruction *
+ getPrevNonDebugInstruction(bool SkipPseudoOp = false) const;
+ Instruction *getPrevNonDebugInstruction(bool SkipPseudoOp = false) {
return const_cast<Instruction *>(
- static_cast<const Instruction *>(this)->getPrevNonDebugInstruction());
+ static_cast<const Instruction *>(this)->getPrevNonDebugInstruction(
+ SkipPseudoOp));
}
/// Create a copy of 'this' instruction that is identical in all ways except
@@ -787,8 +807,6 @@ private:
return Value::getSubclassDataFromValue();
}
- void setHasMetadataHashEntry(bool V) { setSubclassData<HasMetadataField>(V); }
-
void setParent(BasicBlock *P);
protected:
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h b/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h
index 0afc585dfbe5..00ecc2aa7f37 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h
@@ -27,6 +27,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -105,7 +106,7 @@ public:
/// Get allocation size in bits. Returns None if size can't be determined,
/// e.g. in case of a VLA.
- Optional<uint64_t> getAllocationSizeInBits(const DataLayout &DL) const;
+ Optional<TypeSize> getAllocationSizeInBits(const DataLayout &DL) const;
/// Return the type that is being allocated by the instruction.
Type *getAllocatedType() const { return AllocatedType; }
@@ -1289,6 +1290,30 @@ public:
return !isEquality(P);
}
+ /// Return true if the predicate is SGT or UGT.
+ ///
+ static bool isGT(Predicate P) {
+ return P == ICMP_SGT || P == ICMP_UGT;
+ }
+
+ /// Return true if the predicate is SLT or ULT.
+ ///
+ static bool isLT(Predicate P) {
+ return P == ICMP_SLT || P == ICMP_ULT;
+ }
+
+ /// Return true if the predicate is SGE or UGE.
+ ///
+ static bool isGE(Predicate P) {
+ return P == ICMP_SGE || P == ICMP_UGE;
+ }
+
+ /// Return true if the predicate is SLE or ULE.
+ ///
+ static bool isLE(Predicate P) {
+ return P == ICMP_SLE || P == ICMP_ULE;
+ }
+
/// Exchange the two operands to this instruction in such a way that it does
/// not modify the semantics of the instruction. The predicate value may be
/// changed to retain the same result if the predicate is order dependent
@@ -1560,6 +1585,16 @@ public:
static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles,
Instruction *InsertPt = nullptr);
+ /// Create a clone of \p CI with a different set of operand bundles and
+ /// insert it before \p InsertPt.
+ ///
+ /// The returned call instruction is identical \p CI in every way except that
+ /// the operand bundle for the new instruction is set to the operand bundle
+ /// in \p Bundle.
+ static CallInst *CreateWithReplacedBundle(CallInst *CI,
+ OperandBundleDef Bundle,
+ Instruction *InsertPt = nullptr);
+
/// Generate the IR for a call to malloc:
/// 1. Compute the malloc call's argument as the specified type's size,
/// possibly multiplied by the array size if the array size is not
@@ -2035,8 +2070,9 @@ public:
/// Examples: shufflevector <4 x n> A, <4 x n> B, <1,2,3>
/// shufflevector <4 x n> A, <4 x n> B, <1,2,3,4,5>
bool changesLength() const {
- unsigned NumSourceElts =
- cast<VectorType>(Op<0>()->getType())->getElementCount().Min;
+ unsigned NumSourceElts = cast<VectorType>(Op<0>()->getType())
+ ->getElementCount()
+ .getKnownMinValue();
unsigned NumMaskElts = ShuffleMask.size();
return NumSourceElts != NumMaskElts;
}
@@ -2045,8 +2081,9 @@ public:
/// elements than its source vectors.
/// Example: shufflevector <2 x n> A, <2 x n> B, <1,2,3>
bool increasesLength() const {
- unsigned NumSourceElts =
- cast<VectorType>(Op<0>()->getType())->getNumElements();
+ unsigned NumSourceElts = cast<VectorType>(Op<0>()->getType())
+ ->getElementCount()
+ .getKnownMinValue();
unsigned NumMaskElts = ShuffleMask.size();
return NumSourceElts < NumMaskElts;
}
@@ -2232,6 +2269,10 @@ public:
static bool isExtractSubvectorMask(const Constant *Mask, int NumSrcElts,
int &Index) {
assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(Mask->getType()))
+ return false;
SmallVector<int, 16> MaskAsInts;
getShuffleMask(Mask, MaskAsInts);
return isExtractSubvectorMask(MaskAsInts, NumSrcElts, Index);
@@ -2239,7 +2280,13 @@ public:
/// Return true if this shuffle mask is an extract subvector mask.
bool isExtractSubvectorMask(int &Index) const {
- int NumSrcElts = cast<VectorType>(Op<0>()->getType())->getNumElements();
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(getType()))
+ return false;
+
+ int NumSrcElts =
+ cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
return isExtractSubvectorMask(ShuffleMask, NumSrcElts, Index);
}
@@ -2743,6 +2790,15 @@ public:
/// non-undef value.
bool hasConstantOrUndefValue() const;
+ /// If the PHI node is complete which means all of its parent's predecessors
+ /// have incoming value in this PHI, return true, otherwise return false.
+ bool isComplete() const {
+ return llvm::all_of(predecessors(getParent()),
+ [this](const BasicBlock *Pred) {
+ return getBasicBlockIndex(Pred) >= 0;
+ });
+ }
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::PHI;
@@ -3768,6 +3824,16 @@ public:
static InvokeInst *Create(InvokeInst *II, ArrayRef<OperandBundleDef> Bundles,
Instruction *InsertPt = nullptr);
+ /// Create a clone of \p II with a different set of operand bundles and
+ /// insert it before \p InsertPt.
+ ///
+ /// The returned invoke instruction is identical to \p II in every way except
+ /// that the operand bundle for the new instruction is set to the operand
+ /// bundle in \p Bundle.
+ static InvokeInst *CreateWithReplacedBundle(InvokeInst *II,
+ OperandBundleDef Bundles,
+ Instruction *InsertPt = nullptr);
+
// get*Dest - Return the destination basic blocks...
BasicBlock *getNormalDest() const {
return cast<BasicBlock>(Op<NormalDestOpEndIdx>());
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h
index 7a8898464e66..df3a1d568756 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h
@@ -52,6 +52,36 @@ public:
return getCalledFunction()->getIntrinsicID();
}
+ /// Return true if swapping the first two arguments to the intrinsic produces
+ /// the same result.
+ bool isCommutative() const {
+ switch (getIntrinsicID()) {
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ case Intrinsic::maximum:
+ case Intrinsic::minimum:
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_fix:
+ case Intrinsic::umul_fix:
+ case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix_sat:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ return true;
+ default:
+ return false;
+ }
+ }
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const CallInst *I) {
if (const Function *CF = I->getCalledFunction())
@@ -937,6 +967,55 @@ public:
}
};
+class PseudoProbeInst : public IntrinsicInst {
+public:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::pseudoprobe;
+ }
+
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+
+ ConstantInt *getFuncGuid() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(0)));
+ }
+
+ ConstantInt *getIndex() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+ }
+
+ ConstantInt *getAttributes() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2)));
+ }
+
+ ConstantInt *getFactor() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
+ }
+};
+
+class NoAliasScopeDeclInst : public IntrinsicInst {
+public:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl;
+ }
+
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+
+ MDNode *getScopeList() const {
+ auto *MV =
+ cast<MetadataAsValue>(getOperand(Intrinsic::NoAliasScopeDeclScopeArg));
+ return cast<MDNode>(MV->getMetadata());
+ }
+
+ void setScopeList(MDNode *ScopeList) {
+ setOperand(Intrinsic::NoAliasScopeDeclScopeArg,
+ MetadataAsValue::get(getContext(), ScopeList));
+ }
+};
+
} // end namespace llvm
#endif // LLVM_IR_INTRINSICINST_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h
index a9e6525e2f3d..f9b6c098a3f2 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h
@@ -34,6 +34,9 @@ class AttributeList;
/// function known by LLVM. The enum values are returned by
/// Function::getIntrinsicID().
namespace Intrinsic {
+ // Abstraction for the arguments of the noalias intrinsics
+ static const int NoAliasScopeDeclScopeArg = 0;
+
// Intrinsic ID type. This is an opaque typedef to facilitate splitting up
// the enum into target-specific enums.
typedef unsigned ID;
@@ -125,7 +128,8 @@ namespace Intrinsic {
VecElementArgument,
Subdivide2Argument,
Subdivide4Argument,
- VecOfBitcastsToInt
+ VecOfBitcastsToInt,
+ AMX
} Kind;
union {
@@ -188,10 +192,8 @@ namespace Intrinsic {
}
static IITDescriptor getVector(unsigned Width, bool IsScalable) {
- IITDescriptor Result;
- Result.Kind = Vector;
- Result.Vector_Width.Min = Width;
- Result.Vector_Width.Scalable = IsScalable;
+ IITDescriptor Result = {Vector, {0}};
+ Result.Vector_Width = ElementCount::get(Width, IsScalable);
return Result;
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td
index 4918ea876df6..21307ed1bd91 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td
@@ -17,7 +17,9 @@ include "llvm/CodeGen/SDNodeProperties.td"
// Properties we keep track of for intrinsics.
//===----------------------------------------------------------------------===//
-class IntrinsicProperty;
+class IntrinsicProperty<bit is_default = false> {
+ bit IsDefault = is_default;
+}
// Intr*Mem - Memory properties. If no property is set, the worst case
// is assumed (it may read and write any memory it can get access to and it may
@@ -27,10 +29,6 @@ class IntrinsicProperty;
// effects. It may be CSE'd deleted if dead, etc.
def IntrNoMem : IntrinsicProperty;
-// IntrNoSync - Threads executing the intrinsic will not synchronize using
-// memory or other means.
-def IntrNoSync : IntrinsicProperty;
-
// IntrReadMem - This intrinsic only reads from memory. It does not write to
// memory and has no other side effects. Therefore, it cannot be moved across
// potentially aliasing stores. However, it can be reordered otherwise and can
@@ -81,6 +79,11 @@ class NoAlias<AttrIndex idx> : IntrinsicProperty {
int ArgNo = idx.Value;
}
+// NoUndef - The specified argument is neither undef nor poison.
+class NoUndef<AttrIndex idx> : IntrinsicProperty {
+ int ArgNo = idx.Value;
+}
+
class Align<AttrIndex idx, int align> : IntrinsicProperty {
int ArgNo = idx.Value;
int Align = align;
@@ -117,9 +120,15 @@ class ReadNone<AttrIndex idx> : IntrinsicProperty {
def IntrNoReturn : IntrinsicProperty;
-def IntrNoFree : IntrinsicProperty;
+// IntrNoSync - Threads executing the intrinsic will not synchronize using
+// memory or other means. Applied by default.
+def IntrNoSync : IntrinsicProperty<1>;
+
+// Applied by default.
+def IntrNoFree : IntrinsicProperty<1>;
-def IntrWillReturn : IntrinsicProperty;
+// Applied by default.
+def IntrWillReturn : IntrinsicProperty<1>;
// IntrCold - Calls to this intrinsic are cold.
// Parallels the cold attribute on LLVM IR functions.
@@ -152,7 +161,7 @@ def IntrHasSideEffects : IntrinsicProperty;
class LLVMType<ValueType vt> {
ValueType VT = vt;
- int isAny = 0;
+ int isAny = false;
}
class LLVMQualPointerType<LLVMType elty, int addrspace>
@@ -168,7 +177,7 @@ class LLVMAnyPointerType<LLVMType elty>
: LLVMType<iPTRAny>{
LLVMType ElTy = elty;
- let isAny = 1;
+ let isAny = true;
}
// Match the type of another intrinsic parameter. Number is an index into the
@@ -217,7 +226,7 @@ class LLVMSubdivide4VectorType<int num> : LLVMMatchType<num>;
class LLVMVectorOfBitcastsToInt<int num> : LLVMMatchType<num>;
def llvm_void_ty : LLVMType<isVoid>;
-let isAny = 1 in {
+let isAny = true in {
def llvm_any_ty : LLVMType<Any>;
def llvm_anyint_ty : LLVMType<iAny>;
def llvm_anyfloat_ty : LLVMType<fAny>;
@@ -246,6 +255,8 @@ def llvm_token_ty : LLVMType<token>; // token
def llvm_x86mmx_ty : LLVMType<x86mmx>;
def llvm_ptrx86mmx_ty : LLVMPointerType<llvm_x86mmx_ty>; // <1 x i64>*
+def llvm_x86amx_ty : LLVMType<x86amx>;
+
def llvm_v2i1_ty : LLVMType<v2i1>; // 2 x i1
def llvm_v4i1_ty : LLVMType<v4i1>; // 4 x i1
def llvm_v8i1_ty : LLVMType<v8i1>; // 8 x i1
@@ -253,6 +264,7 @@ def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1
def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1
def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1
def llvm_v128i1_ty : LLVMType<v128i1>; // 128 x i1
+def llvm_v256i1_ty : LLVMType<v256i1>; // 256 x i1
def llvm_v512i1_ty : LLVMType<v512i1>; // 512 x i1
def llvm_v1024i1_ty : LLVMType<v1024i1>; //1024 x i1
@@ -282,6 +294,7 @@ def llvm_v8i32_ty : LLVMType<v8i32>; // 8 x i32
def llvm_v16i32_ty : LLVMType<v16i32>; // 16 x i32
def llvm_v32i32_ty : LLVMType<v32i32>; // 32 x i32
def llvm_v64i32_ty : LLVMType<v64i32>; // 64 x i32
+def llvm_v256i32_ty : LLVMType<v256i32>; //256 x i32
def llvm_v1i64_ty : LLVMType<v1i64>; // 1 x i64
def llvm_v2i64_ty : LLVMType<v2i64>; // 2 x i64
@@ -331,7 +344,8 @@ class Intrinsic<list<LLVMType> ret_types,
list<LLVMType> param_types = [],
list<IntrinsicProperty> intr_properties = [],
string name = "",
- list<SDNodeProperty> sd_properties = []> : SDPatternOperator {
+ list<SDNodeProperty> sd_properties = [],
+ bit disable_default_attributes = true> : SDPatternOperator {
string LLVMName = name;
string TargetPrefix = ""; // Set to a prefix for target-specific intrinsics.
list<LLVMType> RetTypes = ret_types;
@@ -339,9 +353,23 @@ class Intrinsic<list<LLVMType> ret_types,
list<IntrinsicProperty> IntrProperties = intr_properties;
let Properties = sd_properties;
- bit isTarget = 0;
+ // Disable applying IntrinsicProperties that are marked default with
+ // IntrinsicProperty<1>
+ bit DisableDefaultAttributes = disable_default_attributes;
+
+ bit isTarget = false;
}
+// Intrinisc with default attributes (disable_default_attributes = false).
+class DefaultAttrsIntrinsic<list<LLVMType> ret_types,
+ list<LLVMType> param_types = [],
+ list<IntrinsicProperty> intr_properties = [],
+ string name = "",
+ list<SDNodeProperty> sd_properties = []>
+ : Intrinsic<ret_types, param_types,
+ intr_properties, name,
+ sd_properties, /*disable_default_attributes*/ 0> {}
+
/// GCCBuiltin - If this intrinsic exactly corresponds to a GCC builtin, this
/// specifies the name of the builtin. This provides automatic CBE and CFE
/// support.
@@ -357,10 +385,10 @@ class MSBuiltin<string name> {
//===--------------- Variable Argument Handling Intrinsics ----------------===//
//
-def int_vastart : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">;
-def int_vacopy : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [],
+def int_vastart : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">;
+def int_vacopy : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [],
"llvm.va_copy">;
-def int_vaend : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">;
+def int_vaend : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">;
//===------------------- Garbage Collection Intrinsics --------------------===//
//
@@ -448,12 +476,12 @@ def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[],
//===--------------------- Code Generator Intrinsics ----------------------===//
//
-def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
+def int_returnaddress : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<0>>]>;
-def int_addressofreturnaddress : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>;
-def int_frameaddress : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty],
+def int_addressofreturnaddress : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>;
+def int_frameaddress : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<0>>]>;
-def int_sponentry : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>;
+def int_sponentry : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>;
def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
[IntrReadMem], "llvm.read_register">;
def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty],
@@ -464,33 +492,33 @@ def int_read_volatile_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty]
// Gets the address of the local variable area. This is typically a copy of the
// stack, frame, or base pointer depending on the type of prologue.
-def int_localaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+def int_localaddress : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
// Escapes local variables to allow access from other functions.
-def int_localescape : Intrinsic<[], [llvm_vararg_ty]>;
+def int_localescape : DefaultAttrsIntrinsic<[], [llvm_vararg_ty]>;
// Given a function and the localaddress of a parent frame, returns a pointer
// to an escaped allocation indicated by the index.
-def int_localrecover : Intrinsic<[llvm_ptr_ty],
+def int_localrecover : DefaultAttrsIntrinsic<[llvm_ptr_ty],
[llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
// Given the frame pointer passed into an SEH filter function, returns a
// pointer to the local variable area suitable for use with llvm.localrecover.
-def int_eh_recoverfp : Intrinsic<[llvm_ptr_ty],
+def int_eh_recoverfp : DefaultAttrsIntrinsic<[llvm_ptr_ty],
[llvm_ptr_ty, llvm_ptr_ty],
[IntrNoMem]>;
// Note: we treat stacksave/stackrestore as writemem because we don't otherwise
// model their dependencies on allocas.
-def int_stacksave : Intrinsic<[llvm_ptr_ty]>,
+def int_stacksave : DefaultAttrsIntrinsic<[llvm_ptr_ty]>,
GCCBuiltin<"__builtin_stack_save">;
-def int_stackrestore : Intrinsic<[], [llvm_ptr_ty]>,
+def int_stackrestore : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>,
GCCBuiltin<"__builtin_stack_restore">;
-def int_get_dynamic_area_offset : Intrinsic<[llvm_anyint_ty]>;
+def int_get_dynamic_area_offset : DefaultAttrsIntrinsic<[llvm_anyint_ty]>;
-def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>,
+def int_thread_pointer : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>,
GCCBuiltin<"__builtin_thread_pointer">;
// IntrInaccessibleMemOrArgMemOnly is a little more pessimistic than strictly
@@ -498,51 +526,59 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>,
// from being reordered overly much with respect to nearby access to the same
// memory while not impeding optimization.
def int_prefetch
- : Intrinsic<[], [ llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
+ : DefaultAttrsIntrinsic<[], [ llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
[IntrInaccessibleMemOrArgMemOnly, IntrWillReturn,
ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
-def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>;
+def int_pcmarker : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
-def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
+def int_readcyclecounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;
// The assume intrinsic is marked as arbitrarily writing so that proper
// control dependencies will be maintained.
-def int_assume : Intrinsic<[], [llvm_i1_ty], [IntrWillReturn]>;
+def int_assume : DefaultAttrsIntrinsic<[], [llvm_i1_ty], [IntrWillReturn,
+ NoUndef<ArgIndex<0>>]>;
+
+// 'llvm.experimental.noalias.scope.decl' intrinsic: Inserted at the location of
+// noalias scope declaration. Makes it possible to identify that a noalias scope
+// is only valid inside the body of a loop.
+//
+// Purpose of the different arguments:
+// - arg0: id.scope: metadata representing the scope declaration.
+def int_experimental_noalias_scope_decl
+ : DefaultAttrsIntrinsic<[], [llvm_metadata_ty],
+ [IntrInaccessibleMemOnly]>; // blocks LICM and some more
// Stack Protector Intrinsic - The stackprotector intrinsic writes the stack
// guard to the correct place on the stack frame.
-def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
-def int_stackguard : Intrinsic<[llvm_ptr_ty], [], []>;
+def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
+def int_stackguard : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], []>;
// A counter increment for instrumentation based profiling.
def int_instrprof_increment : Intrinsic<[],
[llvm_ptr_ty, llvm_i64_ty,
- llvm_i32_ty, llvm_i32_ty],
- []>;
+ llvm_i32_ty, llvm_i32_ty]>;
// A counter increment with step for instrumentation based profiling.
def int_instrprof_increment_step : Intrinsic<[],
[llvm_ptr_ty, llvm_i64_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
- []>;
+ llvm_i32_ty, llvm_i32_ty, llvm_i64_ty]>;
// A call to profile runtime for value profiling of target expressions
// through instrumentation based profiling.
def int_instrprof_value_profile : Intrinsic<[],
[llvm_ptr_ty, llvm_i64_ty,
llvm_i64_ty, llvm_i32_ty,
- llvm_i32_ty],
- []>;
+ llvm_i32_ty]>;
-def int_call_preallocated_setup : Intrinsic<[llvm_token_ty], [llvm_i32_ty]>;
-def int_call_preallocated_arg : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_i32_ty]>;
-def int_call_preallocated_teardown : Intrinsic<[], [llvm_token_ty]>;
+def int_call_preallocated_setup : DefaultAttrsIntrinsic<[llvm_token_ty], [llvm_i32_ty]>;
+def int_call_preallocated_arg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_i32_ty]>;
+def int_call_preallocated_teardown : DefaultAttrsIntrinsic<[], [llvm_token_ty]>;
//===------------------- Standard C Library Intrinsics --------------------===//
//
-def int_memcpy : Intrinsic<[],
+def int_memcpy : DefaultAttrsIntrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i1_ty],
[IntrArgMemOnly, IntrWillReturn,
@@ -556,7 +592,7 @@ def int_memcpy : Intrinsic<[],
// external function.
// The third argument (specifying the size) must be a constant.
def int_memcpy_inline
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty],
[IntrArgMemOnly, IntrWillReturn,
NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
@@ -564,13 +600,14 @@ def int_memcpy_inline
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
-def int_memmove : Intrinsic<[],
+def int_memmove : DefaultAttrsIntrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i1_ty],
[IntrArgMemOnly, IntrWillReturn,
NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
- ReadOnly<ArgIndex<1>>, ImmArg<ArgIndex<3>>]>;
-def int_memset : Intrinsic<[],
+ WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
+ ImmArg<ArgIndex<3>>]>;
+def int_memset : DefaultAttrsIntrinsic<[],
[llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
llvm_i1_ty],
[IntrWriteMem, IntrArgMemOnly, IntrWillReturn,
@@ -581,65 +618,65 @@ def int_memset : Intrinsic<[],
// rounding modes and FP exception handling.
let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
- def int_fma : Intrinsic<[llvm_anyfloat_ty],
+ def int_fma : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>]>;
- def int_fmuladd : Intrinsic<[llvm_anyfloat_ty],
+ def int_fmuladd : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>]>;
// These functions do not read memory, but are sensitive to the
// rounding mode. LLVM purposely does not model changes to the FP
// environment so they can be treated as readnone.
- def int_sqrt : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_powi : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>;
- def int_sin : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_cos : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_pow : Intrinsic<[llvm_anyfloat_ty],
+ def int_sqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>;
+ def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_pow : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
- def int_log : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_log10: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_log2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_copysign : Intrinsic<[llvm_anyfloat_ty],
+ def int_log : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_log10: DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_log2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_exp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_exp2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_fabs : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_copysign : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
- def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_ceil : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_rint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_nearbyint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_round : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_roundeven : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
- def int_canonicalize : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>],
+ def int_floor : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_ceil : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_trunc : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_rint : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_nearbyint : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_round : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_canonicalize : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>],
[IntrNoMem]>;
- def int_lround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
- def int_llround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
- def int_lrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
- def int_llrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+ def int_lround : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+ def int_llround : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+ def int_lrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+ def int_llrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
}
-def int_minnum : Intrinsic<[llvm_anyfloat_ty],
+def int_minnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
-def int_maxnum : Intrinsic<[llvm_anyfloat_ty],
+def int_maxnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
-def int_minimum : Intrinsic<[llvm_anyfloat_ty],
+def int_minimum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
-def int_maximum : Intrinsic<[llvm_anyfloat_ty],
+def int_maximum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
// Internal interface for object size checking
-def int_objectsize : Intrinsic<[llvm_anyint_ty],
+def int_objectsize : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty, llvm_i1_ty,
llvm_i1_ty, llvm_i1_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
@@ -651,77 +688,77 @@ def int_objectsize : Intrinsic<[llvm_anyint_ty],
//
let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
- def int_flt_rounds : Intrinsic<[llvm_i32_ty], []>;
+ def int_flt_rounds : DefaultAttrsIntrinsic<[llvm_i32_ty], []>;
}
//===--------------- Constrained Floating Point Intrinsics ----------------===//
//
let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
- def int_experimental_constrained_fadd : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_fadd : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_fsub : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_fsub : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_fmul : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_fmul : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_fdiv : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_fdiv : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_frem : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_frem : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_fma : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_fmuladd : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_fmuladd : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_fptosi : Intrinsic<[ llvm_anyint_ty ],
+ def int_experimental_constrained_fptosi : DefaultAttrsIntrinsic<[ llvm_anyint_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_fptoui : Intrinsic<[ llvm_anyint_ty ],
+ def int_experimental_constrained_fptoui : DefaultAttrsIntrinsic<[ llvm_anyint_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_sitofp : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_sitofp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyint_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_uitofp : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_uitofp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyint_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_fpext : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_fpext : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty ]>;
@@ -729,110 +766,110 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
// versions of each of them. When strict rounding and exception control are
// not required the non-constrained versions of these intrinsics should be
// used.
- def int_experimental_constrained_sqrt : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_sqrt : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_powi : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_powi : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_i32_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_sin : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_cos : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_cos : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_pow : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_pow : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_log : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_log : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_log10: Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_log10: DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_log2 : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_log2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_exp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_exp2 : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_exp2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_rint : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_rint : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_nearbyint : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_nearbyint : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_lrint : Intrinsic<[ llvm_anyint_ty ],
+ def int_experimental_constrained_lrint : DefaultAttrsIntrinsic<[ llvm_anyint_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_llrint : Intrinsic<[ llvm_anyint_ty ],
+ def int_experimental_constrained_llrint : DefaultAttrsIntrinsic<[ llvm_anyint_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_maxnum : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty ]>;
- def int_experimental_constrained_minnum : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_minnum : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty ]>;
- def int_experimental_constrained_maximum : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_maximum : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty ]>;
- def int_experimental_constrained_minimum : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_minimum : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_metadata_ty ]>;
- def int_experimental_constrained_ceil : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_ceil : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty ]>;
- def int_experimental_constrained_floor : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_floor : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty ]>;
- def int_experimental_constrained_lround : Intrinsic<[ llvm_anyint_ty ],
+ def int_experimental_constrained_lround : DefaultAttrsIntrinsic<[ llvm_anyint_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_llround : Intrinsic<[ llvm_anyint_ty ],
+ def int_experimental_constrained_llround : DefaultAttrsIntrinsic<[ llvm_anyint_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty ]>;
- def int_experimental_constrained_roundeven : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_roundeven : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty ]>;
- def int_experimental_constrained_trunc : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_trunc : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty ]>;
// Constrained floating-point comparison (quiet and signaling variants).
// Third operand is the predicate represented as a metadata string.
def int_experimental_constrained_fcmp
- : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
[ llvm_anyfloat_ty, LLVMMatchType<0>,
llvm_metadata_ty, llvm_metadata_ty ]>;
def int_experimental_constrained_fcmps
- : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
[ llvm_anyfloat_ty, LLVMMatchType<0>,
llvm_metadata_ty, llvm_metadata_ty ]>;
}
@@ -840,10 +877,10 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
//===------------------------- Expect Intrinsics --------------------------===//
//
-def int_expect : Intrinsic<[llvm_anyint_ty],
+def int_expect : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrWillReturn]>;
-def int_expect_with_probability : Intrinsic<[llvm_anyint_ty],
+def int_expect_with_probability : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_double_ty],
[IntrNoMem, IntrWillReturn]>;
@@ -852,19 +889,19 @@ def int_expect_with_probability : Intrinsic<[llvm_anyint_ty],
// None of these intrinsics accesses memory at all.
let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
- def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
- def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
- def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
- def int_fshl : Intrinsic<[llvm_anyint_ty],
+ def int_bswap: DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+ def int_ctpop: DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+ def int_bitreverse : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+ def int_fshl : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
- def int_fshr : Intrinsic<[llvm_anyint_ty],
+ def int_fshr : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
}
let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn,
ImmArg<ArgIndex<1>>] in {
- def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
- def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+ def int_ctlz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+ def int_cttz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
}
//===------------------------ Debugger Intrinsics -------------------------===//
@@ -875,19 +912,19 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn,
// needed in a few places. These synthetic intrinsics have no
// side-effects and just mark information about their operands.
let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
- def int_dbg_declare : Intrinsic<[],
+ def int_dbg_declare : DefaultAttrsIntrinsic<[],
[llvm_metadata_ty,
llvm_metadata_ty,
llvm_metadata_ty]>;
- def int_dbg_value : Intrinsic<[],
+ def int_dbg_value : DefaultAttrsIntrinsic<[],
[llvm_metadata_ty,
llvm_metadata_ty,
llvm_metadata_ty]>;
- def int_dbg_addr : Intrinsic<[],
+ def int_dbg_addr : DefaultAttrsIntrinsic<[],
[llvm_metadata_ty,
llvm_metadata_ty,
llvm_metadata_ty]>;
- def int_dbg_label : Intrinsic<[],
+ def int_dbg_label : DefaultAttrsIntrinsic<[],
[llvm_metadata_ty]>;
}
@@ -917,10 +954,9 @@ def int_eh_unwind_init: Intrinsic<[]>,
def int_eh_dwarf_cfa : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>;
-let IntrProperties = [IntrNoMem] in {
- def int_eh_sjlj_lsda : Intrinsic<[llvm_ptr_ty]>;
- def int_eh_sjlj_callsite : Intrinsic<[], [llvm_i32_ty]>;
-}
+def int_eh_sjlj_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+def int_eh_sjlj_callsite : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+
def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>;
def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>;
@@ -928,15 +964,15 @@ def int_eh_sjlj_setup_dispatch : Intrinsic<[], []>;
//===---------------- Generic Variable Attribute Intrinsics----------------===//
//
-def int_var_annotation : Intrinsic<[],
+def int_var_annotation : DefaultAttrsIntrinsic<[],
[llvm_ptr_ty, llvm_ptr_ty,
- llvm_ptr_ty, llvm_i32_ty],
+ llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty],
[IntrWillReturn], "llvm.var.annotation">;
-def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType<llvm_anyint_ty>],
+def int_ptr_annotation : DefaultAttrsIntrinsic<[LLVMAnyPointerType<llvm_anyint_ty>],
[LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty,
- llvm_i32_ty],
+ llvm_i32_ty, llvm_ptr_ty],
[IntrWillReturn], "llvm.ptr.annotation">;
-def int_annotation : Intrinsic<[llvm_anyint_ty],
+def int_annotation : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, llvm_ptr_ty,
llvm_ptr_ty, llvm_i32_ty],
[IntrWillReturn], "llvm.annotation">;
@@ -944,7 +980,7 @@ def int_annotation : Intrinsic<[llvm_anyint_ty],
// Annotates the current program point with metadata strings which are emitted
// as CodeView debug info records. This is expensive, as it disables inlining
// and is modelled as having side effects.
-def int_codeview_annotation : Intrinsic<[], [llvm_metadata_ty],
+def int_codeview_annotation : DefaultAttrsIntrinsic<[], [llvm_metadata_ty],
[IntrInaccessibleMemOnly, IntrNoDuplicate, IntrWillReturn],
"llvm.codeview.annotation">;
@@ -964,99 +1000,124 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
// Expose the carry flag from add operations on two integrals.
let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
- def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+ def int_sadd_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
- def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+ def int_uadd_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
- def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty,
+ def int_ssub_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
- def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty,
+ def int_usub_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
- def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty,
+ def int_smul_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
- def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty,
+ def int_umul_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
}
//===------------------------- Saturation Arithmetic Intrinsics ---------------------===//
//
-def int_sadd_sat : Intrinsic<[llvm_anyint_ty],
+def int_sadd_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>;
-def int_uadd_sat : Intrinsic<[llvm_anyint_ty],
+def int_uadd_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>;
-def int_ssub_sat : Intrinsic<[llvm_anyint_ty],
+def int_ssub_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+def int_usub_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+def int_sshl_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
-def int_usub_sat : Intrinsic<[llvm_anyint_ty],
+def int_ushl_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
//===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===//
//
-def int_smul_fix : Intrinsic<[llvm_anyint_ty],
+def int_smul_fix : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
Commutative, ImmArg<ArgIndex<2>>]>;
-def int_umul_fix : Intrinsic<[llvm_anyint_ty],
+def int_umul_fix : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
Commutative, ImmArg<ArgIndex<2>>]>;
-def int_sdiv_fix : Intrinsic<[llvm_anyint_ty],
+def int_sdiv_fix : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_udiv_fix : Intrinsic<[llvm_anyint_ty],
+def int_udiv_fix : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
//===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===//
//
-def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty],
+def int_smul_fix_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
Commutative, ImmArg<ArgIndex<2>>]>;
-def int_umul_fix_sat : Intrinsic<[llvm_anyint_ty],
+def int_umul_fix_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
Commutative, ImmArg<ArgIndex<2>>]>;
-def int_sdiv_fix_sat : Intrinsic<[llvm_anyint_ty],
+def int_sdiv_fix_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_udiv_fix_sat : Intrinsic<[llvm_anyint_ty],
+def int_udiv_fix_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
+//===------------------ Integer Min/Max/Abs Intrinsics --------------------===//
+//
+def int_abs : DefaultAttrsIntrinsic<
+ [llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
+
+def int_smax : DefaultAttrsIntrinsic<
+ [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+def int_smin : DefaultAttrsIntrinsic<
+ [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+def int_umax : DefaultAttrsIntrinsic<
+ [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+def int_umin : DefaultAttrsIntrinsic<
+ [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+
//===------------------------- Memory Use Markers -------------------------===//
//
-def int_lifetime_start : Intrinsic<[],
+def int_lifetime_start : DefaultAttrsIntrinsic<[],
[llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, IntrWillReturn,
NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<0>>]>;
-def int_lifetime_end : Intrinsic<[],
+def int_lifetime_end : DefaultAttrsIntrinsic<[],
[llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, IntrWillReturn,
NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<0>>]>;
-def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
+def int_invariant_start : DefaultAttrsIntrinsic<[llvm_descriptor_ty],
[llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, IntrWillReturn,
NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<0>>]>;
-def int_invariant_end : Intrinsic<[],
+def int_invariant_end : DefaultAttrsIntrinsic<[],
[llvm_descriptor_ty, llvm_i64_ty,
llvm_anyptr_ty],
[IntrArgMemOnly, IntrWillReturn,
@@ -1075,26 +1136,26 @@ def int_invariant_end : Intrinsic<[],
// it would remove barrier.
// Note that it is still experimental, which means that its semantics
// might change in the future.
-def int_launder_invariant_group : Intrinsic<[llvm_anyptr_ty],
+def int_launder_invariant_group : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
[LLVMMatchType<0>],
[IntrInaccessibleMemOnly, IntrSpeculatable, IntrWillReturn]>;
-def int_strip_invariant_group : Intrinsic<[llvm_anyptr_ty],
+def int_strip_invariant_group : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
[LLVMMatchType<0>],
[IntrSpeculatable, IntrNoMem, IntrWillReturn]>;
//===------------------------ Stackmap Intrinsics -------------------------===//
//
-def int_experimental_stackmap : Intrinsic<[],
+def int_experimental_stackmap : DefaultAttrsIntrinsic<[],
[llvm_i64_ty, llvm_i32_ty, llvm_vararg_ty],
[Throws]>;
-def int_experimental_patchpoint_void : Intrinsic<[],
+def int_experimental_patchpoint_void : DefaultAttrsIntrinsic<[],
[llvm_i64_ty, llvm_i32_ty,
llvm_ptr_ty, llvm_i32_ty,
llvm_vararg_ty],
[Throws]>;
-def int_experimental_patchpoint_i64 : Intrinsic<[llvm_i64_ty],
+def int_experimental_patchpoint_i64 : DefaultAttrsIntrinsic<[llvm_i64_ty],
[llvm_i64_ty, llvm_i32_ty,
llvm_ptr_ty, llvm_i32_ty,
llvm_vararg_ty],
@@ -1139,6 +1200,23 @@ def int_coro_id_retcon_once : Intrinsic<[llvm_token_ty],
llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
[]>;
def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>;
+def int_coro_id_async : Intrinsic<[llvm_token_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty],
+ []>;
+def int_coro_async_context_alloc : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty, llvm_ptr_ty],
+ []>;
+def int_coro_async_context_dealloc : Intrinsic<[],
+ [llvm_ptr_ty],
+ []>;
+def int_coro_async_resume : Intrinsic<[llvm_ptr_ty],
+ [],
+ []>;
+def int_coro_suspend_async : Intrinsic<[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
+ [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty],
+ []>;
+def int_coro_prepare_async : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
+ [IntrNoMem]>;
def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
[WriteOnly<ArgIndex<1>>]>;
@@ -1147,6 +1225,8 @@ def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
ReadOnly<ArgIndex<1>>,
NoCapture<ArgIndex<1>>]>;
def int_coro_end : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i1_ty], []>;
+def int_coro_end_async
+ : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i1_ty, llvm_vararg_ty], []>;
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
@@ -1190,32 +1270,47 @@ def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>,
GCCBuiltin<"__builtin_trap">;
def int_debugtrap : Intrinsic<[]>,
GCCBuiltin<"__builtin_debugtrap">;
+def int_ubsantrap : Intrinsic<[], [llvm_i8_ty],
+ [IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;
// Support for dynamic deoptimization (or de-specialization)
def int_experimental_deoptimize : Intrinsic<[llvm_any_ty], [llvm_vararg_ty],
[Throws]>;
// Support for speculative runtime guards
-def int_experimental_guard : Intrinsic<[], [llvm_i1_ty, llvm_vararg_ty],
+def int_experimental_guard : DefaultAttrsIntrinsic<[], [llvm_i1_ty, llvm_vararg_ty],
[Throws]>;
// Supports widenable conditions for guards represented as explicit branches.
-def int_experimental_widenable_condition : Intrinsic<[llvm_i1_ty], [],
+def int_experimental_widenable_condition : DefaultAttrsIntrinsic<[llvm_i1_ty], [],
[IntrInaccessibleMemOnly, IntrWillReturn, IntrSpeculatable]>;
// NOP: calls/invokes to this intrinsic are removed by codegen
-def int_donothing : Intrinsic<[], [], [IntrNoMem, IntrWillReturn]>;
+def int_donothing : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrWillReturn]>;
// This instruction has no actual effect, though it is treated by the optimizer
// has having opaque side effects. This may be inserted into loops to ensure
// that they are not removed even if they turn out to be empty, for languages
// which specify that infinite loops must be preserved.
-def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>;
+def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>;
+
+// The pseudoprobe intrinsic works as a place holder to the block it probes.
+// Like the sideeffect intrinsic defined above, this intrinsic is treated by the
+// optimizer as having opaque side effects so that it won't be get rid of or moved
+// out of the block it probes.
+def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrInaccessibleMemOnly, IntrWillReturn]>;
// Intrinsics to support half precision floating point format
let IntrProperties = [IntrNoMem, IntrWillReturn] in {
-def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>;
-def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>;
+def int_convert_to_fp16 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>;
+def int_convert_from_fp16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>;
+}
+
+// Saturating floating point to integer intrinsics
+let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
+def int_fptoui_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+def int_fptosi_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
}
// Clear cache intrinsic, default to ignore (ie. emit nothing)
@@ -1224,144 +1319,147 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
[], "llvm.clear_cache">;
// Intrinsic to detect whether its argument is a constant.
-def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty],
+def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
[IntrNoMem, IntrWillReturn, IntrConvergent],
"llvm.is.constant">;
// Intrinsic to mask out bits of a pointer.
-def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty],
+def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
//===---------------- Vector Predication Intrinsics --------------===//
-// Binary operators
-let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
- def int_vp_add : Intrinsic<[ llvm_anyvector_ty ],
+// Speculatable Binary operators
+let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
- def int_vp_sub : Intrinsic<[ llvm_anyvector_ty ],
+ def int_vp_sub : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
- def int_vp_mul : Intrinsic<[ llvm_anyvector_ty ],
+ def int_vp_mul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
- def int_vp_sdiv : Intrinsic<[ llvm_anyvector_ty ],
+ def int_vp_ashr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
- def int_vp_udiv : Intrinsic<[ llvm_anyvector_ty ],
+ def int_vp_lshr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
- def int_vp_srem : Intrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
- def int_vp_urem : Intrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
- def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
- def int_vp_lshr : Intrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
- def int_vp_shl : Intrinsic<[ llvm_anyvector_ty ],
+ def int_vp_shl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
- def int_vp_or : Intrinsic<[ llvm_anyvector_ty ],
+ def int_vp_or : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
- def int_vp_and : Intrinsic<[ llvm_anyvector_ty ],
+ def int_vp_and : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
- def int_vp_xor : Intrinsic<[ llvm_anyvector_ty ],
+ def int_vp_xor : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
+}
+// Non-speculatable binary operators.
+let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
}
def int_get_active_lane_mask:
- Intrinsic<[llvm_anyvector_ty],
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyint_ty, LLVMMatchType<1>],
[IntrNoMem, IntrNoSync, IntrWillReturn]>;
//===-------------------------- Masked Intrinsics -------------------------===//
//
-def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
- LLVMAnyPointerType<LLVMMatchType<0>>,
- llvm_i32_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [IntrArgMemOnly, IntrWillReturn, ImmArg<ArgIndex<2>>]>;
-
-def int_masked_load : Intrinsic<[llvm_anyvector_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
- [IntrReadMem, IntrArgMemOnly, IntrWillReturn,
- ImmArg<ArgIndex<1>>]>;
-
-def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
- [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<0>],
- [IntrReadMem, IntrWillReturn,
- ImmArg<ArgIndex<1>>]>;
-
-def int_masked_scatter: Intrinsic<[],
- [llvm_anyvector_ty,
- LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [IntrWillReturn, ImmArg<ArgIndex<2>>]>;
-
-def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
- [LLVMPointerToElt<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<0>],
- [IntrReadMem, IntrWillReturn]>;
-
-def int_masked_compressstore: Intrinsic<[],
- [llvm_anyvector_ty,
- LLVMPointerToElt<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [IntrArgMemOnly, IntrWillReturn]>;
+def int_masked_load:
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
+ [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
+
+def int_masked_store:
+ DefaultAttrsIntrinsic<[],
+ [llvm_anyvector_ty, LLVMAnyPointerType<LLVMMatchType<0>>,
+ llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrWriteMem, IntrArgMemOnly, IntrWillReturn,
+ ImmArg<ArgIndex<2>>]>;
+
+def int_masked_gather:
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
+ [IntrReadMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
+
+def int_masked_scatter:
+ DefaultAttrsIntrinsic<[],
+ [llvm_anyvector_ty, LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<2>>]>;
+
+def int_masked_expandload:
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>],
+ [IntrReadMem, IntrWillReturn]>;
+
+def int_masked_compressstore:
+ DefaultAttrsIntrinsic<[],
+ [llvm_anyvector_ty, LLVMPointerToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrWriteMem, IntrArgMemOnly, IntrWillReturn]>;
// Test whether a pointer is associated with a type metadata identifier.
-def int_type_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
+def int_type_test : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
[IntrNoMem, IntrWillReturn]>;
// Safely loads a function pointer from a virtual table pointer using type metadata.
-def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty],
+def int_type_checked_load : DefaultAttrsIntrinsic<[llvm_ptr_ty, llvm_i1_ty],
[llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty],
[IntrNoMem, IntrWillReturn]>;
// Create a branch funnel that implements an indirect call to a limited set of
// callees. This needs to be a musttail call.
-def int_icall_branch_funnel : Intrinsic<[], [llvm_vararg_ty], []>;
+def int_icall_branch_funnel : DefaultAttrsIntrinsic<[], [llvm_vararg_ty], []>;
-def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
+def int_load_relative: DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
[IntrReadMem, IntrArgMemOnly]>;
def int_hwasan_check_memaccess :
@@ -1413,54 +1511,55 @@ def int_memset_element_unordered_atomic
//===------------------------ Reduction Intrinsics ------------------------===//
//
-let IntrProperties = [IntrNoMem, IntrWillReturn] in {
- def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>,
- llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>,
- llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
- def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty]>;
+let IntrProperties = [IntrNoMem] in {
+
+ def int_vector_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty]>;
+ def int_vector_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty]>;
+ def int_vector_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
}
//===----- Matrix intrinsics ---------------------------------------------===//
def int_matrix_transpose
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
[ IntrNoSync, IntrWillReturn, IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
def int_matrix_multiply
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, llvm_anyvector_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty],
[IntrNoSync, IntrWillReturn, IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
def int_matrix_column_major_load
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty,
llvm_i32_ty, llvm_i32_ty],
[IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrReadMem,
@@ -1468,7 +1567,7 @@ def int_matrix_column_major_load
ImmArg<ArgIndex<4>>]>;
def int_matrix_column_major_store
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, LLVMPointerToElt<0>,
llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrWriteMem,
@@ -1480,18 +1579,23 @@ def int_matrix_column_major_store
// Specify that the value given is the number of iterations that the next loop
// will execute.
def int_set_loop_iterations :
- Intrinsic<[], [llvm_anyint_ty], [IntrNoDuplicate]>;
+ DefaultAttrsIntrinsic<[], [llvm_anyint_ty], [IntrNoDuplicate]>;
+
+// Same as the above, but produces a value (the same as the input operand) to
+// be fed into the loop.
+def int_start_loop_iterations :
+ DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoDuplicate]>;
// Specify that the value given is the number of iterations that the next loop
// will execute. Also test that the given count is not zero, allowing it to
// control entry to a 'while' loop.
def int_test_set_loop_iterations :
- Intrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>;
+ DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>;
// Decrement loop counter by the given argument. Return false if the loop
// should exit.
def int_loop_decrement :
- Intrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>;
+ DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>;
// Decrement the first operand (the loop counter) by the second operand (the
// maximum number of elements processed in an iteration). Return the remaining
@@ -1501,27 +1605,27 @@ def int_loop_decrement :
// it's scevable, so it's the backends responsibility to handle cases where it
// may be optimised.
def int_loop_decrement_reg :
- Intrinsic<[llvm_anyint_ty],
+ DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoDuplicate]>;
//===----- Intrinsics that are used to provide predicate information -----===//
-def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>],
+def int_ssa_copy : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>],
[IntrNoMem, Returned<ArgIndex<0>>]>;
//===------- Intrinsics that are used to preserve debug information -------===//
-def int_preserve_array_access_index : Intrinsic<[llvm_anyptr_ty],
+def int_preserve_array_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty, llvm_i32_ty,
llvm_i32_ty],
[IntrNoMem,
ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
-def int_preserve_union_access_index : Intrinsic<[llvm_anyptr_ty],
+def int_preserve_union_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty, llvm_i32_ty],
[IntrNoMem,
ImmArg<ArgIndex<1>>]>;
-def int_preserve_struct_access_index : Intrinsic<[llvm_anyptr_ty],
+def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty, llvm_i32_ty,
llvm_i32_ty],
[IntrNoMem,
@@ -1529,7 +1633,16 @@ def int_preserve_struct_access_index : Intrinsic<[llvm_anyptr_ty],
ImmArg<ArgIndex<2>>]>;
//===---------- Intrinsics to query properties of scalable vectors --------===//
-def int_vscale : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
+def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
+
+//===---------- Intrinsics to perform subvector insertion/extraction ------===//
+def int_experimental_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+def int_experimental_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
//===----------------------------------------------------------------------===//
@@ -1550,3 +1663,4 @@ include "llvm/IR/IntrinsicsBPF.td"
include "llvm/IR/IntrinsicsSystemZ.td"
include "llvm/IR/IntrinsicsWebAssembly.td"
include "llvm/IR/IntrinsicsRISCV.td"
+include "llvm/IR/IntrinsicsVE.td"
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 3f71f644f9a1..da3085171b19 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -12,47 +12,58 @@
let TargetPrefix = "aarch64" in {
-def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
-def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
-def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
-def int_aarch64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
-
-def int_aarch64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
-def int_aarch64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
+def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty],
+ [IntrNoFree, IntrWillReturn]>;
+
+def int_aarch64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty],
+ [IntrNoFree, IntrWillReturn]>;
def int_aarch64_stxp : Intrinsic<[llvm_i32_ty],
- [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>;
+ [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
+ [IntrNoFree, IntrWillReturn]>;
def int_aarch64_stlxp : Intrinsic<[llvm_i32_ty],
- [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>;
+ [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
+ [IntrNoFree, IntrWillReturn]>;
def int_aarch64_clrex : Intrinsic<[]>;
-def int_aarch64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
-def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
-def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+def int_aarch64_fjcvtzs : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-def int_aarch64_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_aarch64_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+def int_aarch64_cls: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_aarch64_cls64: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
// HINT
-def int_aarch64_hint : Intrinsic<[], [llvm_i32_ty]>;
+def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
//===----------------------------------------------------------------------===//
// Data Barrier Instructions
-def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, Intrinsic<[], [llvm_i32_ty]>;
-def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, Intrinsic<[], [llvm_i32_ty]>;
-def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, Intrinsic<[], [llvm_i32_ty]>;
+def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
+ Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
+ Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
+ Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>;
// A space-consuming intrinsic primarily for testing block and jump table
// placements. The first argument is the number of bytes this "instruction"
// takes up, the second and return value are essentially chains, used to force
// ordering during ISel.
-def int_aarch64_space : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>;
+def int_aarch64_space : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>;
}
@@ -61,129 +72,133 @@ def int_aarch64_space : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_2Scalar_Float_Intrinsic
- : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_FPToIntRounding_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
class AdvSIMD_1IntArg_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_1FloatArg_Intrinsic
- : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Expand_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Long_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>;
class AdvSIMD_1IntArg_Narrow_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Narrow_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Int_Across_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Float_Across_Intrinsic
- : Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
class AdvSIMD_2IntArg_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_2FloatArg_Intrinsic
- : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Compare_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem]>;
class AdvSIMD_2Arg_FloatCompare_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>],
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Long_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>, LLVMTruncatedType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Wide_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMTruncatedType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Narrow_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMExtendedType<0>, LLVMExtendedType<0>],
[IntrNoMem]>;
class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic
- : Intrinsic<[llvm_anyint_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMExtendedType<0>, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Scalar_Expand_BySize_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Tied_Narrow_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMHalfElementsVectorType<0>, llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Lane_Intrinsic
- : Intrinsic<[llvm_anyint_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, llvm_anyint_ty, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_3VectorArg_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_3VectorArg_Scalar_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_3VectorArg_Tied_Narrow_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMHalfElementsVectorType<0>, llvm_anyvector_ty,
LLVMMatchType<1>], [IntrNoMem]>;
class AdvSIMD_3VectorArg_Scalar_Tied_Narrow_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMHalfElementsVectorType<0>, llvm_anyvector_ty, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_CvtFxToFP_Intrinsic
- : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_CvtFPToFx_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_1Arg_Intrinsic
- : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_Dot_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem]>;
class AdvSIMD_FP16FML_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem]>;
class AdvSIMD_MatMul_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem]>;
class AdvSIMD_FML_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem]>;
+ class AdvSIMD_BF16FML_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
}
// Arithmetic ops
@@ -241,7 +256,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
// 64-bit polynomial multiply really returns an i128, which is not legal. Fake
// it with a v16i8.
def int_aarch64_neon_pmull64 :
- Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
// Vector Extending Multiply
def int_aarch64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic {
@@ -251,7 +266,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
// Vector Saturating Doubling Long Multiply
def int_aarch64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic;
def int_aarch64_neon_sqdmulls_scalar
- : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Vector Halving Subtract
def int_aarch64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic;
@@ -421,9 +436,9 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
// Vector Conversions Between Half-Precision and Single-Precision.
def int_aarch64_neon_vcvtfp2hf
- : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_aarch64_neon_vcvthf2fp
- : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
// Vector Conversions Between Floating-point and Fixed-point.
def int_aarch64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
@@ -453,7 +468,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
def int_aarch64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic;
// Scalar FP Inexact Narrowing
- def int_aarch64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty],
+ def int_aarch64_sisd_fcvtxn : DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty],
[IntrNoMem]>;
// v8.2-A Dot Product
@@ -466,18 +481,21 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
def int_aarch64_neon_usmmla : AdvSIMD_MatMul_Intrinsic;
def int_aarch64_neon_usdot : AdvSIMD_Dot_Intrinsic;
def int_aarch64_neon_bfdot : AdvSIMD_Dot_Intrinsic;
- def int_aarch64_neon_bfmmla : AdvSIMD_MatMul_Intrinsic;
- def int_aarch64_neon_bfmlalb : AdvSIMD_FML_Intrinsic;
- def int_aarch64_neon_bfmlalt : AdvSIMD_FML_Intrinsic;
+ def int_aarch64_neon_bfmmla
+ : DefaultAttrsIntrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_aarch64_neon_bfmlalb : AdvSIMD_BF16FML_Intrinsic;
+ def int_aarch64_neon_bfmlalt : AdvSIMD_BF16FML_Intrinsic;
// v8.6-A Bfloat Intrinsics
def int_aarch64_neon_bfcvt
- : Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>;
def int_aarch64_neon_bfcvtn
- : Intrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_aarch64_neon_bfcvtn2
- : Intrinsic<[llvm_v8bf16_ty],
+ : DefaultAttrsIntrinsic<[llvm_v8bf16_ty],
[llvm_v8bf16_ty, llvm_v4f32_ty],
[IntrNoMem]>;
@@ -490,11 +508,16 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
// v8.3-A Floating-point complex add
def int_aarch64_neon_vcadd_rot90 : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_neon_vcadd_rot270 : AdvSIMD_2VectorArg_Intrinsic;
+
+ def int_aarch64_neon_vcmla_rot0 : AdvSIMD_3VectorArg_Intrinsic;
+ def int_aarch64_neon_vcmla_rot90 : AdvSIMD_3VectorArg_Intrinsic;
+ def int_aarch64_neon_vcmla_rot180 : AdvSIMD_3VectorArg_Intrinsic;
+ def int_aarch64_neon_vcmla_rot270 : AdvSIMD_3VectorArg_Intrinsic;
}
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_2Vector2Index_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty],
[IntrNoMem]>;
}
@@ -504,68 +527,68 @@ def int_aarch64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_1Vec_Load_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_1Vec_Store_Lane_Intrinsic
- : Intrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
+ : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
class AdvSIMD_2Vec_Load_Intrinsic
- : Intrinsic<[LLVMMatchType<0>, llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[LLVMMatchType<0>, llvm_anyvector_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_2Vec_Load_Lane_Intrinsic
- : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
+ : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
[LLVMMatchType<0>, llvm_anyvector_ty,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_2Vec_Store_Intrinsic
- : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
+ : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
class AdvSIMD_2Vec_Store_Lane_Intrinsic
- : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
+ : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
class AdvSIMD_3Vec_Load_Intrinsic
- : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_3Vec_Load_Lane_Intrinsic
- : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_3Vec_Store_Intrinsic
- : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
+ : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
class AdvSIMD_3Vec_Store_Lane_Intrinsic
- : Intrinsic<[], [llvm_anyvector_ty,
+ : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
class AdvSIMD_4Vec_Load_Intrinsic
- : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
+ : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>, llvm_anyvector_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_4Vec_Load_Lane_Intrinsic
- : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
+ : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>, llvm_anyvector_ty,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_4Vec_Store_Intrinsic
- : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
+ : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
class AdvSIMD_4Vec_Store_Lane_Intrinsic
- : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
+ : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
@@ -603,38 +626,38 @@ def int_aarch64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic;
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_Tbl1_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbl2_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_Tbl3_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbl4_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbx1_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbx2_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbx3_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbx4_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
[IntrNoMem]>;
@@ -651,7 +674,7 @@ def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
let TargetPrefix = "aarch64" in {
class FPCR_Get_Intrinsic
- : Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>;
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>;
}
// FPCR
@@ -659,34 +682,34 @@ def int_aarch64_get_fpcr : FPCR_Get_Intrinsic;
let TargetPrefix = "aarch64" in {
class Crypto_AES_DataKey_Intrinsic
- : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
class Crypto_AES_Data_Intrinsic
- : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
// SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
// (v4i32).
class Crypto_SHA_5Hash4Schedule_Intrinsic
- : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
+ : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
// (v4i32).
class Crypto_SHA_1Hash_Intrinsic
- : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
// SHA intrinsic taking 8 words of the schedule
class Crypto_SHA_8Schedule_Intrinsic
- : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
// SHA intrinsic taking 12 words of the schedule
class Crypto_SHA_12Schedule_Intrinsic
- : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// SHA intrinsic taking 8 words of the hash and 4 of the schedule.
class Crypto_SHA_8Hash4Schedule_Intrinsic
- : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
}
@@ -716,84 +739,96 @@ def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
let TargetPrefix = "aarch64" in {
-def int_aarch64_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32b : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_aarch64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32cb : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_aarch64_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32h : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_aarch64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32ch : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_aarch64_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32w : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_aarch64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32cw : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_aarch64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
+def int_aarch64_crc32x : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
-def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
+def int_aarch64_crc32cx : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// Memory Tagging Extensions (MTE) Intrinsics
let TargetPrefix = "aarch64" in {
-def int_aarch64_irg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
+def int_aarch64_irg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
[IntrNoMem, IntrHasSideEffects]>;
-def int_aarch64_addg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
+def int_aarch64_addg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
[IntrNoMem]>;
-def int_aarch64_gmi : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty],
+def int_aarch64_gmi : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty],
[IntrNoMem]>;
-def int_aarch64_ldg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty],
+def int_aarch64_ldg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty],
[IntrReadMem]>;
-def int_aarch64_stg : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
+def int_aarch64_stg : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
[IntrWriteMem]>;
-def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
+def int_aarch64_subp : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
[IntrNoMem]>;
// The following are codegen-only intrinsics for stack instrumentation.
// Generate a randomly tagged stack base pointer.
-def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty],
+def int_aarch64_irg_sp : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_i64_ty],
[IntrNoMem, IntrHasSideEffects]>;
// Transfer pointer tag with offset.
// ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where
// * address is the address in ptr0
// * tag is a function of (tag in baseptr, tag_offset).
+// ** Beware, this is not the same function as implemented by the ADDG instruction!
+// Backend optimizations may change tag_offset; the only guarantee is that calls
+// to tagp with the same pair of (baseptr, tag_offset) will produce pointers
+// with the same tag value, assuming the set of excluded tags has not changed.
// Address bits in baseptr and tag bits in ptr0 are ignored.
// When offset between ptr0 and baseptr is a compile time constant, this can be emitted as
// ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset
// It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp.
-def int_aarch64_tagp : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty],
+def int_aarch64_tagp : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
// Update allocation tags for the memory range to match the tag in the pointer argument.
-def int_aarch64_settag : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
+def int_aarch64_settag : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
// Update allocation tags for the memory range to match the tag in the pointer argument,
// and set memory contents to zero.
-def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
+def int_aarch64_settag_zero : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
// Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values.
-def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
+def int_aarch64_stgp : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
}
// Transactional Memory Extension (TME) Intrinsics
let TargetPrefix = "aarch64" in {
def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">,
- Intrinsic<[llvm_i64_ty]>;
+ Intrinsic<[llvm_i64_ty], [], [IntrWillReturn]>;
-def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[]>;
+def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[], [], [IntrWillReturn]>;
def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">,
- Intrinsic<[], [llvm_i64_ty], [ImmArg<ArgIndex<0>>]>;
+ Intrinsic<[], [llvm_i64_ty], [IntrWillReturn, ImmArg<ArgIndex<0>>]>;
def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">,
Intrinsic<[llvm_i64_ty], [],
- [IntrNoMem, IntrHasSideEffects]>;
+ [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
+
+// Armv8.7-A load/store 64-byte intrinsics
+defvar data512 = !listsplat(llvm_i64_ty, 8);
+def int_aarch64_ld64b: Intrinsic<data512, [llvm_ptr_ty]>;
+def int_aarch64_st64b: Intrinsic<[], !listconcat([llvm_ptr_ty], data512)>;
+def int_aarch64_st64bv: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], data512)>;
+def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], data512)>;
+
}
def llvm_nxv2i1_ty : LLVMType<nxv2i1>;
@@ -811,88 +846,88 @@ def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_SVE_Create_2Vector_Tuple
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>],
[IntrReadMem]>;
class AdvSIMD_SVE_Create_3Vector_Tuple
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>],
[IntrReadMem]>;
class AdvSIMD_SVE_Create_4Vector_Tuple
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
LLVMMatchType<1>],
[IntrReadMem]>;
class AdvSIMD_SVE_Set_Vector_Tuple
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
[IntrReadMem, ImmArg<ArgIndex<1>>]>;
class AdvSIMD_SVE_Get_Vector_Tuple
- : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
class AdvSIMD_ManyVec_PredLoad_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMPointerToElt<0>],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMPointerToElt<0>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_1Vec_PredLoad_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMPointerToElt<0>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_1Vec_PredStore_Intrinsic
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMPointerToElt<0>],
[IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
class AdvSIMD_2Vec_PredStore_Intrinsic
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
class AdvSIMD_3Vec_PredStore_Intrinsic
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
class AdvSIMD_4Vec_PredStore_Intrinsic
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>],
[IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
class AdvSIMD_SVE_Index_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMVectorElementType<0>,
LLVMVectorElementType<0>],
[IntrNoMem]>;
class AdvSIMD_Merged1VectorArg_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArgIndexed_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
class AdvSIMD_3VectorArgIndexed_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -900,20 +935,20 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
class AdvSIMD_Pred1VectorArg_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Pred2VectorArg_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Pred3VectorArg_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -921,77 +956,77 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrNoMem]>;
class AdvSIMD_SVE_Compare_Intrinsic
- : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_CompareWide_Intrinsic
- : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty,
llvm_nxv2i64_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_Saturating_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[IntrNoMem]>;
class AdvSIMD_SVE_SaturatingWithPattern_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
llvm_i32_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
class AdvSIMD_SVE_Saturating_N_Intrinsic<LLVMType T>
- : Intrinsic<[T],
+ : DefaultAttrsIntrinsic<[T],
[T, llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<LLVMType T>
- : Intrinsic<[T],
+ : DefaultAttrsIntrinsic<[T],
[T, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
class AdvSIMD_SVE_CNT_Intrinsic
- : Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
+ : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>],
[LLVMVectorOfBitcastsToInt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_ReduceWithInit_Intrinsic
- : Intrinsic<[LLVMVectorElementType<0>],
+ : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMVectorElementType<0>,
llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_ShiftByImm_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
class AdvSIMD_SVE_ShiftWide_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
llvm_nxv2i64_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_Unpack_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_CADD_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -999,7 +1034,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
class AdvSIMD_SVE_CMLA_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -1008,7 +1043,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
class AdvSIMD_SVE_CMLA_LANE_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -1017,96 +1052,96 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
class AdvSIMD_SVE_DUP_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMVectorElementType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_DUP_Unpred_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [LLVMVectorElementType<0>],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMVectorElementType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_DUPQ_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
llvm_i64_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_EXPA_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMVectorOfBitcastsToInt<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_FCVT_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_FCVTZS_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMVectorOfBitcastsToInt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_INSR_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMVectorElementType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_PTRUE_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<0>>]>;
class AdvSIMD_SVE_PUNPKHI_Intrinsic
- : Intrinsic<[LLVMHalfElementsVectorType<0>],
+ : DefaultAttrsIntrinsic<[LLVMHalfElementsVectorType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_SCALE_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMVectorOfBitcastsToInt<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_SCVTF_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_TSMUL_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMVectorOfBitcastsToInt<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_CNTB_Intrinsic
- : Intrinsic<[llvm_i64_ty],
+ : DefaultAttrsIntrinsic<[llvm_i64_ty],
[llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<0>>]>;
class AdvSIMD_SVE_CNTP_Intrinsic
- : Intrinsic<[llvm_i64_ty],
+ : DefaultAttrsIntrinsic<[llvm_i64_ty],
[llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_DOT_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMSubdivide4VectorType<0>,
LLVMSubdivide4VectorType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_DOT_Indexed_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMSubdivide4VectorType<0>,
LLVMSubdivide4VectorType<0>,
@@ -1114,65 +1149,65 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
class AdvSIMD_SVE_PTEST_Intrinsic
- : Intrinsic<[llvm_i1_ty],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty],
[llvm_anyvector_ty,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_TBL_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMVectorOfBitcastsToInt<0>],
[IntrNoMem]>;
class AdvSIMD_SVE2_TBX_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMVectorOfBitcastsToInt<0>],
[IntrNoMem]>;
class SVE2_1VectorArg_Long_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
class SVE2_2VectorArg_Long_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>,
LLVMSubdivide2VectorType<0>],
[IntrNoMem]>;
class SVE2_2VectorArgIndexed_Long_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>,
LLVMSubdivide2VectorType<0>,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
class SVE2_2VectorArg_Wide_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMSubdivide2VectorType<0>],
[IntrNoMem]>;
class SVE2_2VectorArg_Pred_Long_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMSubdivide2VectorType<0>],
[IntrNoMem]>;
class SVE2_3VectorArg_Long_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMSubdivide2VectorType<0>,
LLVMSubdivide2VectorType<0>],
[IntrNoMem]>;
class SVE2_3VectorArgIndexed_Long_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMSubdivide2VectorType<0>,
LLVMSubdivide2VectorType<0>,
@@ -1180,45 +1215,45 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
class SVE2_1VectorArg_Narrowing_Intrinsic
- : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
class SVE2_Merged1VectorArg_Narrowing_Intrinsic
- : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
[LLVMSubdivide2VectorType<0>,
llvm_anyvector_ty],
[IntrNoMem]>;
class SVE2_2VectorArg_Narrowing_Intrinsic
- : Intrinsic<
+ : DefaultAttrsIntrinsic<
[LLVMSubdivide2VectorType<0>],
[llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
class SVE2_Merged2VectorArg_Narrowing_Intrinsic
- : Intrinsic<
+ : DefaultAttrsIntrinsic<
[LLVMSubdivide2VectorType<0>],
[LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
class SVE2_1VectorArg_Imm_Narrowing_Intrinsic
- : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
[llvm_anyvector_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
class SVE2_2VectorArg_Imm_Narrowing_Intrinsic
- : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
[LLVMSubdivide2VectorType<0>, llvm_anyvector_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
class SVE2_CONFLICT_DETECT_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMAnyPointerType<llvm_any_ty>,
LLVMMatchType<1>]>;
class SVE2_3VectorArg_Indexed_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMSubdivide2VectorType<0>,
LLVMSubdivide2VectorType<0>,
@@ -1226,7 +1261,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
class AdvSIMD_SVE_CDOT_LANE_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMSubdivide4VectorType<0>,
LLVMSubdivide4VectorType<0>,
@@ -1243,7 +1278,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
// This class of intrinsics are not intended to be useful within LLVM IR but
// are instead here to support some of the more regid parts of the ACLE.
class Builtin_SVCVT<string name, LLVMType OUT, LLVMType PRED, LLVMType IN>
- : Intrinsic<[OUT], [OUT, PRED, IN], [IntrNoMem]>;
+ : DefaultAttrsIntrinsic<[OUT], [OUT, PRED, IN], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -1252,24 +1287,24 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_SVE_Reduce_Intrinsic
- : Intrinsic<[LLVMVectorElementType<0>],
+ : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_SADDV_Reduce_Intrinsic
- : Intrinsic<[llvm_i64_ty],
+ : DefaultAttrsIntrinsic<[llvm_i64_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_WHILE_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyint_ty, LLVMMatchType<1>],
[IntrNoMem]>;
class AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMPointerToElt<0>,
@@ -1278,7 +1313,7 @@ class AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMPointerToElt<0>,
@@ -1287,16 +1322,16 @@ class AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_GatherLoad_VS_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty,
llvm_i64_ty
],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem]>;
class AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1306,7 +1341,7 @@ class AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic
[IntrWriteMem, IntrArgMemOnly]>;
class AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1316,17 +1351,17 @@ class AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic
[IntrWriteMem, IntrArgMemOnly]>;
class AdvSIMD_ScatterStore_VS_Intrinsic
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty, llvm_i64_ty
],
- [IntrWriteMem, IntrArgMemOnly]>;
+ [IntrWriteMem]>;
class SVE_gather_prf_SV
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Predicate
llvm_ptr_ty, // Base address
@@ -1336,7 +1371,7 @@ class SVE_gather_prf_SV
[IntrInaccessibleMemOrArgMemOnly, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<3>>]>;
class SVE_gather_prf_VS
- : Intrinsic<[],
+ : DefaultAttrsIntrinsic<[],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Predicate
llvm_anyvector_ty, // Base addresses
@@ -1346,17 +1381,17 @@ class SVE_gather_prf_VS
[IntrInaccessibleMemOrArgMemOnly, ImmArg<ArgIndex<3>>]>;
class SVE_MatMul_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMSubdivide4VectorType<0>, LLVMSubdivide4VectorType<0>],
[IntrNoMem]>;
class SVE_4Vec_BF16
- : Intrinsic<[llvm_nxv4f32_ty],
+ : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
[llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty],
[IntrNoMem]>;
class SVE_4Vec_BF16_Indexed
- : Intrinsic<[llvm_nxv4f32_ty],
+ : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
[llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
@@ -1408,7 +1443,7 @@ def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic;
//
def int_aarch64_sve_prf
- : Intrinsic<[], [llvm_anyvector_ty, llvm_ptr_ty, llvm_i32_ty],
+ : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly, ImmArg<ArgIndex<2>>]>;
// Scalar + 32-bit scaled offset vector, zero extend, packed and
@@ -1572,10 +1607,10 @@ def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic;
// FFR manipulation
//
-def int_aarch64_sve_rdffr : GCCBuiltin<"__builtin_sve_svrdffr">, Intrinsic<[llvm_nxv16i1_ty], []>;
-def int_aarch64_sve_rdffr_z : GCCBuiltin<"__builtin_sve_svrdffr_z">, Intrinsic<[llvm_nxv16i1_ty], [llvm_nxv16i1_ty]>;
-def int_aarch64_sve_setffr : GCCBuiltin<"__builtin_sve_svsetffr">, Intrinsic<[], []>;
-def int_aarch64_sve_wrffr : GCCBuiltin<"__builtin_sve_svwrffr">, Intrinsic<[], [llvm_nxv16i1_ty]>;
+def int_aarch64_sve_rdffr : GCCBuiltin<"__builtin_sve_svrdffr">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], []>;
+def int_aarch64_sve_rdffr_z : GCCBuiltin<"__builtin_sve_svrdffr_z">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [llvm_nxv16i1_ty]>;
+def int_aarch64_sve_setffr : GCCBuiltin<"__builtin_sve_svsetffr">, DefaultAttrsIntrinsic<[], []>;
+def int_aarch64_sve_wrffr : GCCBuiltin<"__builtin_sve_svwrffr">, DefaultAttrsIntrinsic<[], [llvm_nxv16i1_ty]>;
//
// Saturating scalar arithmetic
@@ -1888,11 +1923,11 @@ def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic;
// Reinterpreting data
//
-def int_aarch64_sve_convert_from_svbool : Intrinsic<[llvm_anyvector_ty],
+def int_aarch64_sve_convert_from_svbool : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_nxv16i1_ty],
[IntrNoMem]>;
-def int_aarch64_sve_convert_to_svbool : Intrinsic<[llvm_nxv16i1_ty],
+def int_aarch64_sve_convert_to_svbool : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
[llvm_anyvector_ty],
[IntrNoMem]>;
@@ -2307,31 +2342,31 @@ def int_aarch64_sve_xar : AdvSIMD_2VectorArgIndexed_Intrinsic;
//
def int_aarch64_sve_aesd : GCCBuiltin<"__builtin_sve_svaesd_u8">,
- Intrinsic<[llvm_nxv16i8_ty],
+ DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[IntrNoMem]>;
def int_aarch64_sve_aesimc : GCCBuiltin<"__builtin_sve_svaesimc_u8">,
- Intrinsic<[llvm_nxv16i8_ty],
+ DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty],
[IntrNoMem]>;
def int_aarch64_sve_aese : GCCBuiltin<"__builtin_sve_svaese_u8">,
- Intrinsic<[llvm_nxv16i8_ty],
+ DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[IntrNoMem]>;
def int_aarch64_sve_aesmc : GCCBuiltin<"__builtin_sve_svaesmc_u8">,
- Intrinsic<[llvm_nxv16i8_ty],
+ DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty],
[IntrNoMem]>;
def int_aarch64_sve_rax1 : GCCBuiltin<"__builtin_sve_svrax1_u64">,
- Intrinsic<[llvm_nxv2i64_ty],
+ DefaultAttrsIntrinsic<[llvm_nxv2i64_ty],
[llvm_nxv2i64_ty, llvm_nxv2i64_ty],
[IntrNoMem]>;
def int_aarch64_sve_sm4e : GCCBuiltin<"__builtin_sve_svsm4e_u32">,
- Intrinsic<[llvm_nxv4i32_ty],
+ DefaultAttrsIntrinsic<[llvm_nxv4i32_ty],
[llvm_nxv4i32_ty, llvm_nxv4i32_ty],
[IntrNoMem]>;
def int_aarch64_sve_sm4ekey : GCCBuiltin<"__builtin_sve_svsm4ekey_u32">,
- Intrinsic<[llvm_nxv4i32_ty],
+ DefaultAttrsIntrinsic<[llvm_nxv4i32_ty],
[llvm_nxv4i32_ty, llvm_nxv4i32_ty],
[IntrNoMem]>;
//
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 01380afae006..ac2291f9d43b 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -18,7 +18,7 @@ class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
// Used to tag image and resource intrinsics with information used to generate
// mem operands.
-class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = 0> {
+class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = false> {
int RsrcArg = rsrcarg;
bit IsImage = isimage;
}
@@ -182,6 +182,8 @@ def int_amdgcn_init_exec : Intrinsic<[],
// Set EXEC according to a thread count packed in an SGPR input:
// thread_count = (input >> bitoffset) & 0x7f;
// This is always moved to the beginning of the basic block.
+// Note: only inreg arguments to the parent function are valid as
+// inputs to this intrinsic, computed values cannot be used.
def int_amdgcn_init_exec_from_input : Intrinsic<[],
[llvm_i32_ty, // 32-bit SGPR input
llvm_i32_ty], // bit offset of the thread count
@@ -255,7 +257,17 @@ def int_amdgcn_log_clamp : Intrinsic<
def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
+>;
+
+// Fused single-precision multiply-add with legacy behaviour for the multiply,
+// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
+// intended for use on subtargets that have the v_fma_legacy_f32 and/or
+// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
+// has a completely different kind of legacy behaviour.)
+def int_amdgcn_fma_legacy :
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
def int_amdgcn_rcp : Intrinsic<
@@ -397,11 +409,10 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
-class AMDGPULDSF32Intrin<string clang_builtin> :
- GCCBuiltin<clang_builtin>,
- Intrinsic<[llvm_float_ty],
- [LLVMQualPointerType<llvm_float_ty, 3>,
- llvm_float_ty,
+class AMDGPULDSIntrin :
+ Intrinsic<[llvm_any_ty],
+ [LLVMQualPointerType<LLVMMatchType<0>, 3>,
+ LLVMMatchType<0>,
llvm_i32_ty, // ordering
llvm_i32_ty, // scope
llvm_i1_ty], // isVolatile
@@ -446,9 +457,9 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
-def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
-def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
-def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
+def int_amdgcn_ds_fadd : AMDGPULDSIntrin;
+def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
+def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
} // TargetPrefix = "amdgcn"
@@ -545,7 +556,7 @@ class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr
// {offset} {bias} {z-compare}
list<AMDGPUArg> ExtraAddrArgs = extra_addr;
- bit Gradients = 0;
+ bit Gradients = false;
// Name of the {lod} or {clamp} argument that is appended to the coordinates,
// if any.
@@ -585,7 +596,7 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>;
}
- let Gradients = 1 in {
+ let Gradients = true in {
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>;
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>;
}
@@ -600,12 +611,12 @@ class AMDGPUDimProfile<string opmod,
string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod
// These are intended to be overwritten by subclasses
- bit IsSample = 0;
- bit IsAtomic = 0;
+ bit IsSample = false;
+ bit IsAtomic = false;
list<LLVMType> RetTypes = [];
list<AMDGPUArg> DataArgs = [];
list<AMDGPUArg> ExtraAddrArgs = [];
- bit Gradients = 0;
+ bit Gradients = false;
string LodClampMip = "";
int NumRetAndDataAnyTypes =
@@ -616,7 +627,7 @@ class AMDGPUDimProfile<string opmod,
arglistconcat<[ExtraAddrArgs,
!if(Gradients, dim.GradientArgs, []),
!listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs),
- !if(!eq(LodClampMip, ""),
+ !if(!empty(LodClampMip),
[]<AMDGPUArg>,
[AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))],
NumRetAndDataAnyTypes>.ret;
@@ -646,7 +657,7 @@ class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod,
class AMDGPUDimSampleProfile<string opmod,
AMDGPUDimProps dim,
AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
- let IsSample = 1;
+ let IsSample = true;
let RetTypes = [llvm_any_ty];
let ExtraAddrArgs = sample.ExtraAddrArgs;
let Gradients = sample.Gradients;
@@ -657,7 +668,7 @@ class AMDGPUDimNoSampleProfile<string opmod,
AMDGPUDimProps dim,
list<LLVMType> retty,
list<AMDGPUArg> dataargs,
- bit Mip = 0> : AMDGPUDimProfile<opmod, dim> {
+ bit Mip = false> : AMDGPUDimProfile<opmod, dim> {
let RetTypes = retty;
let DataArgs = dataargs;
let LodClampMip = !if(Mip, "mip", "");
@@ -668,7 +679,7 @@ class AMDGPUDimAtomicProfile<string opmod,
list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> {
let RetTypes = [llvm_anyint_ty];
let DataArgs = dataargs;
- let IsAtomic = 1;
+ let IsAtomic = true;
}
class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
@@ -681,13 +692,23 @@ class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RES
// Helper class for figuring out image intrinsic argument indexes.
class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
int NumDataArgs = !size(P_.DataArgs);
- int NumDmaskArgs = !if(P_.IsAtomic, 0, 1);
+ int NumDmaskArgs = !not(P_.IsAtomic);
+ int NumExtraAddrArgs = !size(P_.ExtraAddrArgs);
int NumVAddrArgs = !size(P_.AddrArgs);
+ int NumGradientArgs = !if(P_.Gradients, !size(P_.Dim.GradientArgs), 0);
+ int NumCoordArgs = !if(P_.IsSample, !size(P_.Dim.CoordSliceArgs), !size(P_.Dim.CoordSliceIntArgs));
int NumRSrcArgs = 1;
int NumSampArgs = !if(P_.IsSample, 2, 0);
int DmaskArgIndex = NumDataArgs;
- int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1);
- int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs);
+ int VAddrArgIndex = !add(DmaskArgIndex, NumDmaskArgs);
+ int GradientArgIndex = !add(VAddrArgIndex, NumExtraAddrArgs);
+ int CoordArgIndex = !add(GradientArgIndex, NumGradientArgs);
+ int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1);
+ int MipArgIndex = LodArgIndex;
+ int RsrcArgIndex = !add(VAddrArgIndex, NumVAddrArgs);
+ int SampArgIndex = !add(RsrcArgIndex, NumRSrcArgs);
+ int UnormArgIndex = !add(SampArgIndex, 1);
+ int TexFailCtrlArgIndex = !add(SampArgIndex, NumSampArgs);
int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1);
}
@@ -738,7 +759,7 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
list<AMDGPUArg> dataargs,
list<IntrinsicProperty> props,
list<SDNodeProperty> sdnodeprops,
- bit Mip = 0> {
+ bit Mip = false> {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name)
: AMDGPUImageDimIntrinsic<
@@ -752,7 +773,7 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
list<AMDGPUArg> dataargs,
list<IntrinsicProperty> props,
list<SDNodeProperty> sdnodeprops,
- bit Mip = 0> {
+ bit Mip = false> {
foreach dim = AMDGPUDims.All in {
def !strconcat(NAME, "_", dim.Name)
: AMDGPUImageDimIntrinsic<
@@ -787,7 +808,7 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
//////////////////////////////////////////////////////////////////////////
multiclass AMDGPUImageDimSampleDims<string opmod,
AMDGPUSampleVariant sample,
- bit NoMem = 0> {
+ bit NoMem = false> {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
AMDGPUDimSampleProfile<opmod, dim, sample>,
@@ -973,9 +994,9 @@ class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
-class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
- [data_ty],
- [LLVMMatchType<0>, // vdata(VGPR)
+class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
+ !if(NoRtn, [], [data_ty]),
+ [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
@@ -1005,9 +1026,12 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
[ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
-class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
- [data_ty],
- [LLVMMatchType<0>, // vdata(VGPR)
+// gfx908 intrinsic
+def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
+
+class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
+ !if(NoRtn, [], [data_ty]),
+ [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
@@ -1039,6 +1063,10 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
[ImmArg<ArgIndex<6>>, IntrWillReturn], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
+// gfx908 intrinsic
+def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
+
+
// Obsolescent tbuffer intrinsics.
def int_amdgcn_tbuffer_load : Intrinsic <
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
@@ -1168,6 +1196,19 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
AMDGPURsrcIntrinsic<2, 0>;
def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic;
+
+class AMDGPUBufferAtomicFP : Intrinsic <
+ [llvm_anyfloat_ty],
+ [LLVMMatchType<0>, // vdata(VGPR)
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(SGPR/VGPR/imm)
+ llvm_i1_ty], // slc(imm)
+ [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1, 0>;
+
+// Legacy form of the intrinsic. raw and struct forms should be preferred.
+def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP;
} // defset AMDGPUBufferIntrinsics
// Uses that do not set the done bit should set IntrWriteMem on the
@@ -1248,7 +1289,7 @@ def int_amdgcn_s_getreg :
def int_amdgcn_s_setreg :
GCCBuiltin<"__builtin_amdgcn_s_setreg">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]
+ [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg<ArgIndex<0>>]
>;
// int_amdgcn_s_getpc is provided to allow a specific style of position
@@ -1291,6 +1332,7 @@ def int_amdgcn_interp_p2 :
// See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
// __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0>
+// high selects whether high or low 16-bits are loaded from LDS
def int_amdgcn_interp_p1_f16 :
GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">,
Intrinsic<[llvm_float_ty],
@@ -1299,6 +1341,7 @@ def int_amdgcn_interp_p1_f16 :
ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
// __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0>
+// high selects whether high or low 16-bits are loaded from LDS
def int_amdgcn_interp_p2_f16 :
GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">,
Intrinsic<[llvm_half_ty],
@@ -1538,6 +1581,10 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
// FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn?
def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
+def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">,
+ Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects]
+>;
+
// Copies the active channels of the source value to the destination value,
// with the guarantee that the source value is computed as if the entire
// program were executed in Whole Wavefront Mode, i.e. with all channels
@@ -1667,10 +1714,19 @@ class AMDGPUGlobalAtomicRtn<LLVMType vt> : Intrinsic <
[vt],
[llvm_anyptr_ty, // vaddr
vt], // vdata(VGPR)
- [IntrArgMemOnly, NoCapture<ArgIndex<0>>], "", [SDNPMemOperand]>;
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>], "",
+ [SDNPMemOperand]>;
def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn<llvm_i32_ty>;
+// uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
+// <ray_dir>, <ray_inv_dir>, <texture_descr>
+def int_amdgcn_image_bvh_intersect_ray :
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_anyint_ty, llvm_float_ty, llvm_v4f32_ty, llvm_anyvector_ty,
+ LLVMMatchType<1>, llvm_v4i32_ty],
+ [IntrReadMem, IntrWillReturn]>;
+
//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
@@ -1786,25 +1842,7 @@ def int_amdgcn_udot8 :
// gfx908 intrinsics
// ===----------------------------------------------------------------------===//
-class AMDGPUBufferAtomicNoRtn : Intrinsic <
- [],
- [llvm_anyfloat_ty, // vdata(VGPR)
- llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // offset(SGPR/VGPR/imm)
- llvm_i1_ty], // slc(imm)
- [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
- AMDGPURsrcIntrinsic<1, 0>;
-
-class AMDGPUGlobalAtomicNoRtn : Intrinsic <
- [],
- [llvm_anyptr_ty, // vaddr
- llvm_anyfloat_ty], // vdata(VGPR)
- [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>], "",
- [SDNPMemOperand]>;
-
-def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn;
-def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn;
+def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">,
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td
index df74e446b965..0eb27cc34462 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -791,14 +791,17 @@ def int_arm_neon_vcvtbfp2bf
: Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>;
def int_arm_neon_bfdot : Neon_Dot_Intrinsic;
-def int_arm_neon_bfmmla : Neon_MatMul_Intrinsic;
-
-class Neon_FML_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
- [IntrNoMem]>;
-def int_arm_neon_bfmlalb : Neon_FML_Intrinsic;
-def int_arm_neon_bfmlalt : Neon_FML_Intrinsic;
+def int_arm_neon_bfmmla
+ : Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+
+class Neon_BF16FML_Intrinsic
+ : Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_arm_neon_bfmlalb : Neon_BF16FML_Intrinsic;
+def int_arm_neon_bfmlalt : Neon_BF16FML_Intrinsic;
def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
@@ -814,9 +817,7 @@ def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic;
def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic;
// GNU eabi mcount
-def int_arm_gnu_eabi_mcount : Intrinsic<[],
- [],
- [IntrReadMem, IntrWriteMem]>;
+def int_arm_gnu_eabi_mcount : Intrinsic<[], [], []>;
def int_arm_mve_pred_i2v : Intrinsic<
[llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>;
@@ -921,7 +922,7 @@ multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,
list<IntrinsicProperty> props = [IntrNoMem]> {
def "": Intrinsic<rets, params, props>;
def _predicated: Intrinsic<rets, params # [pred,
- !if(!eq(!cast<string>(rets[0]), "llvm_anyvector_ty"),
+ !if(!eq(rets[0], llvm_anyvector_ty),
LLVMMatchType<0>, rets[0])], props>;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsBPF.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsBPF.td
index c4d35b2a0a88..4b4dd94b1599 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -24,6 +24,14 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf."
Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i64_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_bpf_btf_type_id : GCCBuiltin<"__builtin_bpf_btf_type_id">,
- Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_any_ty, llvm_i64_ty],
+ Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
+ def int_bpf_preserve_type_info : GCCBuiltin<"__builtin_bpf_preserve_type_info">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+ def int_bpf_preserve_enum_value : GCCBuiltin<"__builtin_bpf_preserve_enum_value">,
+ Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_ptr_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+ def int_bpf_passthrough : GCCBuiltin<"__builtin_bpf_passthrough">,
+ Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_any_ty], [IntrNoMem]>;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 61293418ec41..2ab48cfc4bb7 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -37,11 +37,6 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
// MISC
//
-// Helper class for construction of n-element list<LLVMtype> [t,t,...,t]
-class RepLLVMType<int N, LLVMType T> {
- list<LLVMType> ret = !if(N, !listconcat(RepLLVMType<!add(N,-1), T>.ret, [T]), []);
-}
-
// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
// Geom: m<M>n<N>k<K>. E.g. m8n32k16
// Frag: [abcd]
@@ -54,40 +49,40 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
string ft = frag#":"#ptx_elt_type;
list<LLVMType> regs = !cond(
// mma.sync.m8n8k4 uses smaller a/b fragments than wmma fp ops
- !eq(gft,"m8n8k4:a:f16") : RepLLVMType<2, llvm_v2f16_ty>.ret,
- !eq(gft,"m8n8k4:b:f16") : RepLLVMType<2, llvm_v2f16_ty>.ret,
+ !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2),
+ !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2),
// fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16
// All currently supported geometries use the same fragment format,
// so we only need to consider {fragment, type}.
- !eq(ft,"a:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
- !eq(ft,"b:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
- !eq(ft,"c:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
- !eq(ft,"d:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
- !eq(ft,"c:f32") : RepLLVMType<8, llvm_float_ty>.ret,
- !eq(ft,"d:f32") : RepLLVMType<8, llvm_float_ty>.ret,
+ !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8),
+ !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8),
+ !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4),
+ !eq(ft,"d:f16") : !listsplat(llvm_v2f16_ty, 4),
+ !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8),
+ !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8),
// u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
- !eq(gft,"m16n16k16:a:u8") : RepLLVMType<2, llvm_i32_ty>.ret,
- !eq(gft,"m16n16k16:a:s8") : RepLLVMType<2, llvm_i32_ty>.ret,
- !eq(gft,"m16n16k16:b:u8") : RepLLVMType<2, llvm_i32_ty>.ret,
- !eq(gft,"m16n16k16:b:s8") : RepLLVMType<2, llvm_i32_ty>.ret,
- !eq(gft,"m16n16k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
- !eq(gft,"m16n16k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+ !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m16n16k16:b:s8") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m16n16k16:c:s32") : !listsplat(llvm_i32_ty, 8),
+ !eq(gft,"m16n16k16:d:s32") : !listsplat(llvm_i32_ty, 8),
!eq(gft,"m8n32k16:a:u8") : [llvm_i32_ty],
!eq(gft,"m8n32k16:a:s8") : [llvm_i32_ty],
- !eq(gft,"m8n32k16:b:u8") : RepLLVMType<4, llvm_i32_ty>.ret,
- !eq(gft,"m8n32k16:b:s8") : RepLLVMType<4, llvm_i32_ty>.ret,
- !eq(gft,"m8n32k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
- !eq(gft,"m8n32k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+ !eq(gft,"m8n32k16:b:u8") : !listsplat(llvm_i32_ty, 4),
+ !eq(gft,"m8n32k16:b:s8") : !listsplat(llvm_i32_ty, 4),
+ !eq(gft,"m8n32k16:c:s32") : !listsplat(llvm_i32_ty, 8),
+ !eq(gft,"m8n32k16:d:s32") : !listsplat(llvm_i32_ty, 8),
- !eq(gft,"m32n8k16:a:u8") : RepLLVMType<4, llvm_i32_ty>.ret,
- !eq(gft,"m32n8k16:a:s8") : RepLLVMType<4, llvm_i32_ty>.ret,
+ !eq(gft,"m32n8k16:a:u8") : !listsplat(llvm_i32_ty, 4),
+ !eq(gft,"m32n8k16:a:s8") : !listsplat(llvm_i32_ty, 4),
!eq(gft,"m32n8k16:b:u8") : [llvm_i32_ty],
!eq(gft,"m32n8k16:b:s8") : [llvm_i32_ty],
- !eq(gft,"m32n8k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
- !eq(gft,"m32n8k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+ !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8),
+ !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8),
// u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
!eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty],
@@ -96,10 +91,10 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
!eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty],
!eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty],
!eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty],
- !eq(gft,"m8n8k128:c:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
- !eq(gft,"m8n8k128:d:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
- !eq(gft,"m8n8k32:c:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
- !eq(gft,"m8n8k32:d:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
+ !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2),
);
}
@@ -133,7 +128,7 @@ class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
!eq(A.ptx_elt_type, "u4") : [A],
!eq(A.ptx_elt_type, "b1") : [A],
// the rest are FP ops identified by accumulator & result type.
- 1: [D, C]
+ true: [D, C]
);
string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type));
}
@@ -230,19 +225,17 @@ class NVVM_MMA_OPS<int _ = 0> {
ldst_bit_ab_ops,
ldst_subint_cd_ops);
// Separate A/B/C fragments (loads) from D (stores).
- list<WMMA_REGS> all_ld_ops = !foldl([]<WMMA_REGS>, all_ldst_ops, a, b,
- !listconcat(a, !if(!eq(b.frag,"d"), [],[b])));
- list<WMMA_REGS> all_st_ops = !foldl([]<WMMA_REGS>, all_ldst_ops, a, b,
- !listconcat(a, !if(!eq(b.frag,"d"), [b],[])));
+ list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d"));
+ list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d"));
}
def NVVM_MMA_OPS : NVVM_MMA_OPS;
-// Returns [1] if this combination of layout/satf is supported, [] otherwise.
+// Returns true if this combination of layout/satf is supported; false otherwise.
// MMA ops must provide all parameters. Loads and stores -- only frags and layout_a.
// The class is used to prevent generation of records for the unsupported variants.
// E.g.
-// foreach _ = NVVM_MMA_SUPPORTED<...>.ret in =
+// if NVVM_MMA_SUPPORTED<...>.ret then
// def : FOO<>; // The record will only be defined for supported ops.
//
class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b="-", int satf=-1> {
@@ -268,20 +261,20 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b
# !if(!eq(!size(frags), 4),
frags[2].ptx_elt_type # frags[3].ptx_elt_type,
"?");
- list<int> ret = !cond(
+ bit ret = !cond(
// Sub-int MMA only supports fixed A/B layout.
// b1 does not support .satf.
- !eq(mma#":"#satf, "b1:row:col:0") : [1],
+ !eq(mma#":"#satf, "b1:row:col:0") : true,
// mma.m8n8k4 has no .satf modifier.
!and(!eq(frags[0].geom, "m8n8k4"),
- !ne(satf, 0)): [],
+ !ne(satf, 0)): false,
// mma.m8n8k4 has no C=f32 D=f16 variant.
- !eq(gcd, "m8n8k4:f32f16"): [],
- !eq(mma, "s4:row:col") : [1],
- !eq(mma, "u4:row:col") : [1],
- !eq(mma, "s4:row:col") : [1],
- !eq(mma, "u4:row:col") : [1],
+ !eq(gcd, "m8n8k4:f32f16"): false,
+ !eq(mma, "s4:row:col") : true,
+ !eq(mma, "u4:row:col") : true,
+ !eq(mma, "s4:row:col") : true,
+ !eq(mma, "u4:row:col") : true,
// Sub-int load/stores have fixed layout for A and B.
!and(!eq(layout_b, "-"), // It's a Load or Store op
!or(!eq(ld, "b1:a:row"),
@@ -295,13 +288,13 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b
!eq(ld, "u4:a:row"),
!eq(ld, "u4:b:col"),
!eq(ldf, "u4:c"),
- !eq(ldf, "u4:d"))) : [1],
+ !eq(ldf, "u4:d"))) : true,
// All other sub-int ops are not supported.
- !eq(t, "b1") : [],
- !eq(t, "s4") : [],
- !eq(t, "u4") : [],
+ !eq(t, "b1") : false,
+ !eq(t, "s4") : false,
+ !eq(t, "u4") : false,
// All other (non sub-int) are OK.
- 1: [1]
+ true: true
);
}
@@ -314,8 +307,8 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
string Name = "int_nvvm_shfl_" # Suffix;
string Builtin = "__nvvm_shfl_" # Suffix;
string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix);
- list<int> withGccBuiltin = !if(return_pred, [], [1]);
- list<int> withoutGccBuiltin = !if(return_pred, [1], []);
+ bit withGccBuiltin = !not(return_pred);
+ bit withoutGccBuiltin = return_pred;
LLVMType OpType = !cond(
!eq(type,"i32"): llvm_i32_ty,
!eq(type,"f32"): llvm_float_ty);
@@ -4005,18 +3998,18 @@ def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
// SHUFFLE
//
// Generate intrinsics for all variants of shfl instruction.
-foreach sync = [0, 1] in {
+foreach sync = [false, true] in {
foreach mode = ["up", "down", "bfly", "idx"] in {
foreach type = ["i32", "f32"] in {
- foreach return_pred = [0, 1] in {
+ foreach return_pred = [false, true] in {
foreach i = [SHFL_INFO<sync, mode, type, return_pred>] in {
- foreach _ = i.withGccBuiltin in {
+ if i.withGccBuiltin then {
def i.Name : GCCBuiltin<i.Builtin>,
Intrinsic<i.RetTy, i.ArgsTy,
[IntrInaccessibleMemOnly, IntrConvergent],
i.IntrName>;
}
- foreach _ = i.withoutGccBuiltin in {
+ if i.withoutGccBuiltin then {
def i.Name : Intrinsic<i.RetTy, i.ArgsTy,
[IntrInaccessibleMemOnly, IntrConvergent], i.IntrName>;
}
@@ -4127,11 +4120,11 @@ class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
foreach layout = ["row", "col"] in {
foreach stride = [0, 1] in {
foreach frag = NVVM_MMA_OPS.all_ld_ops in
- foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+ if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
def WMMA_NAME_LDST<"load", frag, layout, stride>.record
: NVVM_WMMA_LD<frag, layout, stride>;
foreach frag = NVVM_MMA_OPS.all_st_ops in
- foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+ if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
def WMMA_NAME_LDST<"store", frag, layout, stride>.record
: NVVM_WMMA_ST<frag, layout, stride>;
}
@@ -4150,7 +4143,7 @@ foreach layout_a = ["row", "col"] in {
foreach layout_b = ["row", "col"] in {
foreach satf = [0, 1] in {
foreach op = NVVM_MMA_OPS.all_mma_ops in {
- foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
+ if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
def WMMA_NAME_MMA<layout_a, layout_b, satf,
op[0], op[1], op[2], op[3]>.record
: NVVM_WMMA_MMA<layout_a, layout_b, satf,
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 614a29049686..075b6252d9a5 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -18,10 +18,12 @@
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions.
def int_ppc_dcba : Intrinsic<[], [llvm_ptr_ty], []>;
- def int_ppc_dcbf : GCCBuiltin<"__builtin_dcbf">,
- Intrinsic<[], [llvm_ptr_ty], []>;
- def int_ppc_dcbfl : Intrinsic<[], [llvm_ptr_ty], []>;
- def int_ppc_dcbflp: Intrinsic<[], [llvm_ptr_ty], []>;
+ def int_ppc_dcbf : GCCBuiltin<"__builtin_dcbf">,
+ Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
+ def int_ppc_dcbfl : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
+ def int_ppc_dcbflp : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
+ def int_ppc_dcbfps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
+ def int_ppc_dcbstps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty],
@@ -47,6 +49,13 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// eieio instruction
def int_ppc_eieio : Intrinsic<[],[],[]>;
+ // Get content from current FPSCR register
+ def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">,
+ Intrinsic<[llvm_double_ty], [], [IntrNoMem]>;
+ // Set FPSCR register, and return previous content
+ def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+
// Intrinsics for [double]word extended forms of divide instructions
def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
@@ -61,6 +70,14 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
+ // Generate a random number
+ def int_ppc_darn : GCCBuiltin<"__builtin_darn">,
+ Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
+ def int_ppc_darnraw : GCCBuiltin<"__builtin_darn_raw">,
+ Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
+ def int_ppc_darn32 : GCCBuiltin<"__builtin_darn_32">,
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+
// Bit permute doubleword
def int_ppc_bpermd : GCCBuiltin<"__builtin_bpermd">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
@@ -135,6 +152,28 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
}
//===----------------------------------------------------------------------===//
+// PowerPC MMA Intrinsic Multi Class Definitions.
+//
+
+multiclass PowerPC_MMA_ACC_Intrinsic<list<LLVMType> args> {
+ def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>;
+ def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
+ [IntrNoMem]>;
+ def pn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
+ [IntrNoMem]>;
+ def np : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
+ [IntrNoMem]>;
+ def nn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
+ [IntrNoMem]>;
+}
+
+multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> {
+ def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>;
+ def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
+ [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
// PowerPC Altivec Intrinsic Class Definitions.
//
@@ -186,6 +225,13 @@ class PowerPC_Vec_QQQ_Intrinsic<string GCCIntSuffix>
[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
+/// PowerPC_Vec_QDD_Intrinsic - A PowerPC intrinsic that takes two v2i64
+/// vectors and returns one v1i128. These intrinsics have no side effects.
+class PowerPC_Vec_QDD_Intrinsic<string GCCIntSuffix>
+ : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+ [llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
// PowerPC VSX Intrinsic Class Definitions.
//
@@ -239,9 +285,9 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// VSCR access.
def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">,
- Intrinsic<[llvm_v8i16_ty], [], [IntrReadMem]>;
+ Intrinsic<[llvm_v8i16_ty], [], [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">,
- Intrinsic<[], [llvm_v4i32_ty], []>;
+ Intrinsic<[], [llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>;
// Loads. These don't map directly to GCC builtins because they represent the
@@ -347,6 +393,28 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vcmpequq : GCCBuiltin<"__builtin_altivec_vcmpequq">,
+ Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vcmpgtsq : GCCBuiltin<"__builtin_altivec_vcmpgtsq">,
+ Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vcmpgtuq : GCCBuiltin<"__builtin_altivec_vcmpgtuq">,
+ Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vcmpequq_p : GCCBuiltin<"__builtin_altivec_vcmpequq_p">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vcmpgtsq_p : GCCBuiltin<"__builtin_altivec_vcmpgtsq_p">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vcmpgtuq_p : GCCBuiltin<"__builtin_altivec_vcmpgtuq_p">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty],
+ [IntrNoMem]>;
+
// Predicate Comparisons. The first operand specifies interpretation of CR6.
def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
@@ -429,6 +497,56 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">,
Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>;
+ // P10 Vector Extract with Mask
+ def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">,
+ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vextracthm : GCCBuiltin<"__builtin_altivec_vextracthm">,
+ Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vextractwm : GCCBuiltin<"__builtin_altivec_vextractwm">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vextractdm : GCCBuiltin<"__builtin_altivec_vextractdm">,
+ Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">,
+ Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>;
+
+ // P10 Vector Expand with Mask
+ def int_ppc_altivec_vexpandbm : GCCBuiltin<"__builtin_altivec_vexpandbm">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vexpandhm : GCCBuiltin<"__builtin_altivec_vexpandhm">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vexpandwm : GCCBuiltin<"__builtin_altivec_vexpandwm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vexpanddm : GCCBuiltin<"__builtin_altivec_vexpanddm">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">,
+ Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>;
+
+ // P10 Vector Count with Mask intrinsics.
+ def int_ppc_altivec_vcntmbb : GCCBuiltin<"__builtin_altivec_vcntmbb">,
+ Intrinsic<[llvm_i64_ty], [llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_altivec_vcntmbh : GCCBuiltin<"__builtin_altivec_vcntmbh">,
+ Intrinsic<[llvm_i64_ty], [llvm_v8i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_altivec_vcntmbw : GCCBuiltin<"__builtin_altivec_vcntmbw">,
+ Intrinsic<[llvm_i64_ty], [llvm_v4i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_altivec_vcntmbd : GCCBuiltin<"__builtin_altivec_vcntmbd">,
+ Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+ // P10 Move to VSR with Mask Intrinsics.
+ def int_ppc_altivec_mtvsrbm : GCCBuiltin<"__builtin_altivec_mtvsrbm">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_mtvsrhm : GCCBuiltin<"__builtin_altivec_mtvsrhm">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_mtvsrwm : GCCBuiltin<"__builtin_altivec_mtvsrwm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_mtvsrdm : GCCBuiltin<"__builtin_altivec_mtvsrdm">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_mtvsrqm : GCCBuiltin<"__builtin_altivec_mtvsrqm">,
+ Intrinsic<[llvm_v1i128_ty], [llvm_i64_ty], [IntrNoMem]>;
+
// P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins.
def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
@@ -437,6 +555,25 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
+ // P10 Vector String Isolate Intrinsics.
+ def int_ppc_altivec_vstribr : GCCBuiltin<"__builtin_altivec_vstribr">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vstribl : GCCBuiltin<"__builtin_altivec_vstribl">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vstrihr : GCCBuiltin<"__builtin_altivec_vstrihr">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vstrihl : GCCBuiltin<"__builtin_altivec_vstrihl">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ // Predicate Intrinsics: The first operand specifies interpretation of CR6.
+ def int_ppc_altivec_vstribr_p : GCCBuiltin<"__builtin_altivec_vstribr_p">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vstribl_p : GCCBuiltin<"__builtin_altivec_vstribl_p">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vstrihr_p : GCCBuiltin<"__builtin_altivec_vstrihr_p">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vstrihl_p : GCCBuiltin<"__builtin_altivec_vstrihl_p">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>;
+
// P10 Vector Centrifuge Builtin.
def int_ppc_altivec_vcfuged : GCCBuiltin<"__builtin_altivec_vcfuged">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
@@ -468,27 +605,27 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// P10 Vector Insert.
def int_ppc_altivec_vinsblx : GCCBuiltin<"__builtin_altivec_vinsblx">,
Intrinsic<[llvm_v16i8_ty],
- [llvm_v16i8_ty, llvm_i64_ty, llvm_i64_ty],
+ [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinsbrx : GCCBuiltin<"__builtin_altivec_vinsbrx">,
Intrinsic<[llvm_v16i8_ty],
- [llvm_v16i8_ty, llvm_i64_ty, llvm_i64_ty],
+ [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinshlx : GCCBuiltin<"__builtin_altivec_vinshlx">,
Intrinsic<[llvm_v8i16_ty],
- [llvm_v8i16_ty, llvm_i64_ty, llvm_i64_ty],
+ [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinshrx : GCCBuiltin<"__builtin_altivec_vinshrx">,
Intrinsic<[llvm_v8i16_ty],
- [llvm_v8i16_ty, llvm_i64_ty, llvm_i64_ty],
+ [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinswlx : GCCBuiltin<"__builtin_altivec_vinswlx">,
Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_i64_ty, llvm_i64_ty],
+ [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinswrx : GCCBuiltin<"__builtin_altivec_vinswrx">,
Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_i64_ty, llvm_i64_ty],
+ [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinsdlx : GCCBuiltin<"__builtin_altivec_vinsdlx">,
Intrinsic<[llvm_v2i64_ty],
@@ -500,37 +637,70 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
[IntrNoMem]>;
def int_ppc_altivec_vinsbvlx : GCCBuiltin<"__builtin_altivec_vinsbvlx">,
Intrinsic<[llvm_v16i8_ty],
- [llvm_v16i8_ty, llvm_i64_ty, llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinsbvrx : GCCBuiltin<"__builtin_altivec_vinsbvrx">,
Intrinsic<[llvm_v16i8_ty],
- [llvm_v16i8_ty, llvm_i64_ty, llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinshvlx : GCCBuiltin<"__builtin_altivec_vinshvlx">,
Intrinsic<[llvm_v8i16_ty],
- [llvm_v8i16_ty, llvm_i64_ty, llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinshvrx : GCCBuiltin<"__builtin_altivec_vinshvrx">,
Intrinsic<[llvm_v8i16_ty],
- [llvm_v8i16_ty, llvm_i64_ty, llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinswvlx : GCCBuiltin<"__builtin_altivec_vinswvlx">,
Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_i64_ty, llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vinswvrx : GCCBuiltin<"__builtin_altivec_vinswvrx">,
Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_i64_ty, llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// P10 Vector Insert with immediate.
def int_ppc_altivec_vinsw :
Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_i64_ty, llvm_i32_ty],
+ [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_ppc_altivec_vinsd :
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ // P10 Vector Extract.
+ def int_ppc_altivec_vextdubvlx : GCCBuiltin<"__builtin_altivec_vextdubvlx">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vextdubvrx : GCCBuiltin<"__builtin_altivec_vextdubvrx">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vextduhvlx : GCCBuiltin<"__builtin_altivec_vextduhvlx">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vextduhvrx : GCCBuiltin<"__builtin_altivec_vextduhvrx">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vextduwvlx : GCCBuiltin<"__builtin_altivec_vextduwvlx">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vextduwvrx : GCCBuiltin<"__builtin_altivec_vextduwvrx">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vextddvlx : GCCBuiltin<"__builtin_altivec_vextddvlx">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_altivec_vextddvrx : GCCBuiltin<"__builtin_altivec_vextddvrx">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
}
// Vector average.
@@ -587,10 +757,12 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
// Saturating multiply-adds.
def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
+ llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
+ llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
@@ -608,7 +780,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v4i32_ty], [IntrNoMem]>;
+ llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v4i32_ty], [IntrNoMem]>;
@@ -620,7 +792,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
llvm_v1i128_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v4i32_ty], [IntrNoMem]>;
+ llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>;
+ def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">,
+ Intrinsic<[llvm_v1i128_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>;
// Vector Multiply Instructions.
def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">,
@@ -632,6 +807,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vmulesd : PowerPC_Vec_QDD_Intrinsic<"vmulesd">;
def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
@@ -641,6 +817,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vmuleud : PowerPC_Vec_QDD_Intrinsic<"vmuleud">;
def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@@ -651,6 +828,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vmulosd : PowerPC_Vec_QDD_Intrinsic<"vmulosd">;
def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
@@ -660,23 +838,38 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vmuloud : PowerPC_Vec_QDD_Intrinsic<"vmuloud">;
// Vector Sum Instructions.
def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
+
+ // Vector Sign Extension Instructions
+ def int_ppc_altivec_vextsb2w : GCCBuiltin<"__builtin_altivec_vextsb2w">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vextsb2d : GCCBuiltin<"__builtin_altivec_vextsb2d">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vextsh2w : GCCBuiltin<"__builtin_altivec_vextsh2w">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vextsh2d : GCCBuiltin<"__builtin_altivec_vextsh2d">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vextsw2d : GCCBuiltin<"__builtin_altivec_vextsw2d">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vextsd2q : GCCBuiltin<"__builtin_altivec_vextsd2q">,
+ Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty], [IntrNoMem]>;
// Other multiplies.
def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">,
@@ -689,34 +882,34 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
[IntrNoMem]>;
def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vpksdss : GCCBuiltin<"__builtin_altivec_vpksdss">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_ppc_altivec_vpksdus : GCCBuiltin<"__builtin_altivec_vpksdus">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
// vpkuhum is lowered to a shuffle.
def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
// vpkuwum is lowered to a shuffle.
def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
// vpkudum is lowered to a shuffle.
def int_ppc_altivec_vpkudus : GCCBuiltin<"__builtin_altivec_vpkudus">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrHasSideEffects]>;
// Unpacks.
def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">,
@@ -898,6 +1091,29 @@ def int_ppc_altivec_vrldmi :
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
+def int_ppc_altivec_vrlqnm :
+ PowerPC_Vec_Intrinsic<"vrlqnm", [llvm_v1i128_ty],
+ [llvm_v1i128_ty, llvm_v1i128_ty],
+ [IntrNoMem]>;
+def int_ppc_altivec_vrlqmi :
+ PowerPC_Vec_Intrinsic<"vrlqmi", [llvm_v1i128_ty],
+ [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
+ [IntrNoMem]>;
+
+// Vector Divide Extended Intrinsics.
+def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
+def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;
+def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">;
+def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">;
+def int_ppc_altivec_vdivesq : PowerPC_Vec_QQQ_Intrinsic<"vdivesq">;
+def int_ppc_altivec_vdiveuq : PowerPC_Vec_QQQ_Intrinsic<"vdiveuq">;
+
+// Vector Multiply High Intrinsics.
+def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
+def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">;
+def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">;
+def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">;
+
//===----------------------------------------------------------------------===//
// PowerPC VSX Intrinsic Definitions.
@@ -918,12 +1134,8 @@ def int_ppc_vsx_lxvl :
def int_ppc_vsx_lxvll :
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem,
IntrArgMemOnly]>;
-def int_ppc_vsx_stxvl :
- Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
- [IntrWriteMem, IntrArgMemOnly]>;
-def int_ppc_vsx_stxvll :
- Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
- [IntrWriteMem, IntrArgMemOnly]>;
+def int_ppc_vsx_lxvp :
+ Intrinsic<[llvm_v256i1_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
// Vector store.
def int_ppc_vsx_stxvw4x : Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
@@ -934,6 +1146,15 @@ def int_ppc_vsx_stxvw4x_be : Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
[IntrWriteMem, IntrArgMemOnly]>;
def int_ppc_vsx_stxvd2x_be : Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty],
[IntrWriteMem, IntrArgMemOnly]>;
+def int_ppc_vsx_stxvl :
+ Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
+ [IntrWriteMem, IntrArgMemOnly]>;
+def int_ppc_vsx_stxvll :
+ Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
+ [IntrWriteMem, IntrArgMemOnly]>;
+def int_ppc_vsx_stxvp :
+ Intrinsic<[], [llvm_v256i1_ty, llvm_ptr_ty], [IntrWriteMem,
+ IntrArgMemOnly]>;
// Vector and scalar maximum.
def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">;
def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">;
@@ -1060,6 +1281,12 @@ def int_ppc_vsx_xvtstdcsp :
def int_ppc_vsx_xvcvhpsp :
PowerPC_VSX_Intrinsic<"xvcvhpsp", [llvm_v4f32_ty],
[llvm_v8i16_ty],[IntrNoMem]>;
+def int_ppc_vsx_xvcvspbf16 :
+ PowerPC_VSX_Intrinsic<"xvcvspbf16", [llvm_v16i8_ty],
+ [llvm_v16i8_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvbf16spn :
+ PowerPC_VSX_Intrinsic<"xvcvbf16spn", [llvm_v16i8_ty],
+ [llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_vsx_xxextractuw :
PowerPC_VSX_Intrinsic<"xxextractuw",[llvm_v2i64_ty],
[llvm_v2i64_ty,llvm_i32_ty], [IntrNoMem]>;
@@ -1069,7 +1296,17 @@ def int_ppc_vsx_xxinsertw :
[IntrNoMem]>;
def int_ppc_vsx_xvtlsbb :
PowerPC_VSX_Intrinsic<"xvtlsbb", [llvm_i32_ty],
- [llvm_v16i8_ty, llvm_i1_ty], [IntrNoMem]>;
+ [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtdivdp :
+ PowerPC_VSX_Intrinsic<"xvtdivdp", [llvm_i32_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtdivsp :
+ PowerPC_VSX_Intrinsic<"xvtdivsp", [llvm_i32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtsqrtdp :
+ PowerPC_VSX_Intrinsic<"xvtsqrtdp", [llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtsqrtsp :
+ PowerPC_VSX_Intrinsic<"xvtsqrtsp", [llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_ppc_vsx_xxeval :
PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty,
@@ -1110,182 +1347,6 @@ def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">,
}
//===----------------------------------------------------------------------===//
-// PowerPC QPX Intrinsics.
-//
-
-let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
- /// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics.
- class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
- list<LLVMType> param_types,
- list<IntrinsicProperty> properties>
- : GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>,
- Intrinsic<ret_types, param_types, properties>;
-}
-
-//===----------------------------------------------------------------------===//
-// PowerPC QPX Intrinsic Class Definitions.
-//
-
-/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64
-/// vector and returns one. These intrinsics have no side effects.
-class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix>
- : PowerPC_QPX_Intrinsic<GCCIntSuffix,
- [llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
-
-/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64
-/// vectors and returns one. These intrinsics have no side effects.
-class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix>
- : PowerPC_QPX_Intrinsic<GCCIntSuffix,
- [llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
- [IntrNoMem]>;
-
-/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64
-/// vectors and returns one. These intrinsics have no side effects.
-class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix>
- : PowerPC_QPX_Intrinsic<GCCIntSuffix,
- [llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
- [IntrNoMem]>;
-
-/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer
-/// and returns a v4f64.
-class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix>
- : PowerPC_QPX_Intrinsic<GCCIntSuffix,
- [llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
-
-/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer
-/// and returns a v4f64 permutation.
-class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix>
- : PowerPC_QPX_Intrinsic<GCCIntSuffix,
- [llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>;
-
-/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer
-/// and stores a v4f64.
-class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix>
- : PowerPC_QPX_Intrinsic<GCCIntSuffix,
- [], [llvm_v4f64_ty, llvm_ptr_ty],
- [IntrWriteMem, IntrArgMemOnly]>;
-
-//===----------------------------------------------------------------------===//
-// PowerPC QPX Intrinsic Definitions.
-
-let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
- // Add Instructions
- def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">;
- def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">;
- def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">;
- def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">;
-
- // Estimate Instructions
- def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">;
- def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">;
- def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">;
- def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">;
-
- // Multiply Instructions
- def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">;
- def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">;
- def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">;
- def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">;
-
- // Multiply-add instructions
- def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">;
- def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">;
- def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">;
- def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">;
- def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">;
- def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">;
- def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">;
- def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">;
- def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">;
- def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">;
- def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">;
- def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">;
- def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">;
- def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">;
- def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">;
- def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">;
-
- // Select Instruction
- def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">;
-
- // Permute Instruction
- def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">;
-
- // Convert and Round Instructions
- def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">;
- def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">;
- def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">;
- def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">;
- def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">;
- def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">;
- def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">;
- def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">;
- def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">;
- def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">;
- def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">;
- def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">;
- def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">;
- def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">;
- def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">;
- def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">;
- def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">;
-
- // Move Instructions
- def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">;
- def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">;
- def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">;
- def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">;
-
- // Compare Instructions
- def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">;
- def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">;
- def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">;
- def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">;
-
- // Load instructions
- def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">;
- def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">;
- def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">;
- def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">;
-
- def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">;
- def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">;
- def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">;
- def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">;
- def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">;
- def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">;
- def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">;
- def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">;
-
- def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">;
- def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">;
- def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">;
- def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">;
-
- // Store instructions
- def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">;
- def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">;
- def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">;
- def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">;
-
- def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">;
- def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">;
- def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">;
- def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">;
- def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">;
- def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">;
-
- // Logical and permutation formation
- def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical",
- [llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci",
- [llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
// PowerPC HTM Intrinsic Definitions.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
@@ -1349,5 +1410,88 @@ def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>;
// PowerPC set FPSCR Intrinsic Definitions.
def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
+}
+let TargetPrefix = "ppc" in {
+ def int_ppc_vsx_assemble_pair :
+ Intrinsic<[llvm_v256i1_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+
+ def int_ppc_vsx_disassemble_pair :
+ Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
+ [llvm_v256i1_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_assemble_acc :
+ Intrinsic<[llvm_v512i1_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+
+ def int_ppc_mma_disassemble_acc :
+ Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [llvm_v512i1_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_xxmtacc :
+ Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_xxmfacc :
+ Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_xxsetaccz :
+ Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
+
+ // MMA Reduced-Precision: Outer Product Intrinsic Definitions.
+ defm int_ppc_mma_xvi4ger8 :
+ PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
+ defm int_ppc_mma_pmxvi4ger8 :
+ PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty]>;
+
+ defm int_ppc_mma_xvi8ger4 :
+ PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
+ defm int_ppc_mma_pmxvi8ger4 :
+ PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty]>;
+
+ defm int_ppc_mma_xvi16ger2s :
+ PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
+ defm int_ppc_mma_pmxvi16ger2s :
+ PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty]>;
+
+ defm int_ppc_mma_xvf16ger2 :
+ PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
+ defm int_ppc_mma_pmxvf16ger2 :
+ PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty]>;
+ defm int_ppc_mma_xvf32ger :
+ PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
+ defm int_ppc_mma_pmxvf32ger :
+ PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
+ llvm_i32_ty]>;
+ defm int_ppc_mma_xvf64ger :
+ PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
+ defm int_ppc_mma_pmxvf64ger :
+ PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
+ llvm_i32_ty]>;
+
+ // MMA Reduced-Precision: bfloat16 Outer Product Intrinsic Definitions.
+ defm int_ppc_mma_xvbf16ger2 :
+ PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
+ defm int_ppc_mma_pmxvbf16ger2 :
+ PowerPC_MMA_ACC_Intrinsic<
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+
+ // MMA Reduced-Precision: Missing Integer-based Outer Product Operations.
+ defm int_ppc_mma_xvi16ger2 :
+ PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
+ defm int_ppc_mma_pmxvi16ger2 :
+ PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty]>;
+ def int_ppc_mma_xvi8ger4spp :
+ Intrinsic<[llvm_v512i1_ty],
+ [llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_mma_pmxvi8ger4spp :
+ Intrinsic<[llvm_v512i1_ty],
+ [llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 7590b568c367..c4056895f68e 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -66,3 +66,1027 @@ let TargetPrefix = "riscv" in {
defm int_riscv_masked_cmpxchg : MaskedAtomicRMWFiveArgIntrinsics;
} // TargetPrefix = "riscv"
+
+//===----------------------------------------------------------------------===//
+// Vectors
+
+class RISCVVIntrinsic {
+ // These intrinsics may accept illegal integer values in their llvm_any_ty
+ // operand, so they have to be extended. If set to zero then the intrinsic
+ // does not have any operand that must be extended.
+ Intrinsic IntrinsicID = !cast<Intrinsic>(NAME);
+ bits<4> ExtendOperand = 0;
+}
+
+let TargetPrefix = "riscv" in {
+ // We use anyint here but we only support XLen.
+ def int_riscv_vsetvli : Intrinsic<[llvm_anyint_ty],
+ /* AVL */ [LLVMMatchType<0>,
+ /* VSEW */ LLVMMatchType<0>,
+ /* VLMUL */ LLVMMatchType<0>],
+ [IntrNoMem, IntrHasSideEffects,
+ ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
+ def int_riscv_vsetvlimax : Intrinsic<[llvm_anyint_ty],
+ /* VSEW */ [LLVMMatchType<0>,
+ /* VLMUL */ LLVMMatchType<0>],
+ [IntrNoMem, IntrHasSideEffects,
+ ImmArg<ArgIndex<0>>,
+ ImmArg<ArgIndex<1>>]>;
+
+ // For unit stride load
+ // Input: (pointer, vl)
+ class RISCVUSLoad
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMPointerType<LLVMMatchType<0>>,
+ llvm_anyint_ty],
+ [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
+ // For unit stride fault-only-first load
+ // Input: (pointer, vl)
+ // Output: (data, vl)
+ // NOTE: We model this with default memory properties since we model writing
+ // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
+ class RISCVUSLoadFF
+ : Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
+ [LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<0>>]>,
+ RISCVVIntrinsic;
+ // For unit stride load with mask
+ // Input: (maskedoff, pointer, mask, vl)
+ class RISCVUSLoadMask
+ : Intrinsic<[llvm_anyvector_ty ],
+ [LLVMMatchType<0>,
+ LLVMPointerType<LLVMMatchType<0>>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+ // For unit stride fault-only-first load with mask
+ // Input: (maskedoff, pointer, mask, vl)
+ // Output: (data, vl)
+ // NOTE: We model this with default memory properties since we model writing
+ // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
+ class RISCVUSLoadFFMask
+ : Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>,
+ LLVMPointerType<LLVMMatchType<0>>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>]>, RISCVVIntrinsic;
+ // For strided load
+ // Input: (pointer, stride, vl)
+ class RISCVSLoad
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMPointerType<LLVMMatchType<0>>,
+ llvm_anyint_ty, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
+ // For strided load with mask
+ // Input: (maskedoff, pointer, stride, mask, vl)
+ class RISCVSLoadMask
+ : Intrinsic<[llvm_anyvector_ty ],
+ [LLVMMatchType<0>,
+ LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+ // For indexed load
+ // Input: (pointer, index, vl)
+ class RISCVILoad
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMPointerType<LLVMMatchType<0>>,
+ llvm_anyvector_ty, llvm_anyint_ty],
+ [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
+ // For indexed load with mask
+ // Input: (maskedoff, pointer, index, mask, vl)
+ class RISCVILoadMask
+ : Intrinsic<[llvm_anyvector_ty ],
+ [LLVMMatchType<0>,
+ LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+ // For unit stride store
+ // Input: (vector_in, pointer, vl)
+ class RISCVUSStore
+ : Intrinsic<[],
+ [llvm_anyvector_ty,
+ LLVMPointerType<LLVMMatchType<0>>,
+ llvm_anyint_ty],
+ [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For unit stride store with mask
+ // Input: (vector_in, pointer, mask, vl)
+ class RISCVUSStoreMask
+ : Intrinsic<[],
+ [llvm_anyvector_ty,
+ LLVMPointerType<LLVMMatchType<0>>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For strided store
+ // Input: (vector_in, pointer, stride, vl)
+ class RISCVSStore
+ : Intrinsic<[],
+ [llvm_anyvector_ty,
+ LLVMPointerType<LLVMMatchType<0>>,
+ llvm_anyint_ty, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For stride store with mask
+ // Input: (vector_in, pointer, stirde, mask, vl)
+ class RISCVSStoreMask
+ : Intrinsic<[],
+ [llvm_anyvector_ty,
+ LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For indexed store
+ // Input: (vector_in, pointer, index, vl)
+ class RISCVIStore
+ : Intrinsic<[],
+ [llvm_anyvector_ty,
+ LLVMPointerType<LLVMMatchType<0>>,
+ llvm_anyint_ty, llvm_anyint_ty],
+ [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For indexed store with mask
+ // Input: (vector_in, pointer, index, mask, vl)
+ class RISCVIStoreMask
+ : Intrinsic<[],
+ [llvm_anyvector_ty,
+ LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For destination vector type is the same as source vector.
+ // Input: (vector_in, vl)
+ class RISCVUnaryAANoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For destination vector type is the same as first source vector (with mask).
+ // Input: (vector_in, mask, vl)
+ class RISCVUnaryAAMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For destination vector type is the same as first and second source vector.
+ // Input: (vector_in, vector_in, vl)
+ class RISCVBinaryAAANoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For destination vector type is the same as first and second source vector.
+ // Input: (vector_in, vector_in, vl)
+ class RISCVBinaryAAAMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For destination vector type is the same as first source vector.
+ // Input: (vector_in, vector_in/scalar_in, vl)
+ class RISCVBinaryAAXNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ // For destination vector type is the same as first source vector (with mask).
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ class RISCVBinaryAAXMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 3;
+ }
+ // For destination vector type is NOT the same as first source vector.
+ // Input: (vector_in, vector_in/scalar_in, vl)
+ class RISCVBinaryABXNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ // For destination vector type is NOT the same as first source vector (with mask).
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ class RISCVBinaryABXMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 3;
+ }
+ // For binary operations with V0 as input.
+ // Input: (vector_in, vector_in/scalar_in, V0, vl)
+ class RISCVBinaryWithV0
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ // For binary operations with mask type output and V0 as input.
+ // Output: (mask type output)
+ // Input: (vector_in, vector_in/scalar_in, V0, vl)
+ class RISCVBinaryMOutWithV0
+ :Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [llvm_anyvector_ty, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ // For binary operations with mask type output.
+ // Output: (mask type output)
+ // Input: (vector_in, vector_in/scalar_in, vl)
+ class RISCVBinaryMOut
+ : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ // For binary operations with mask type output without mask.
+ // Output: (mask type output)
+ // Input: (vector_in, vector_in/scalar_in, vl)
+ class RISCVCompareNoMask
+ : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ // For binary operations with mask type output with mask.
+ // Output: (mask type output)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ class RISCVCompareMask
+ : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 3;
+ }
+ // For FP classify operations.
+ // Output: (bit mask type output)
+ // Input: (vector_in, vl)
+ class RISCVClassifyNoMask
+ : Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
+ [llvm_anyvector_ty, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For FP classify operations with mask.
+ // Output: (bit mask type output)
+ // Input: (maskedoff, vector_in, mask, vl)
+ class RISCVClassifyMask
+ : Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
+ [LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For Saturating binary operations.
+ // The destination vector type is the same as first source vector.
+ // Input: (vector_in, vector_in/scalar_in, vl)
+ class RISCVSaturatingBinaryAAXNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+ [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ // For Saturating binary operations with mask.
+ // The destination vector type is the same as first source vector.
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ class RISCVSaturatingBinaryAAXMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
+ let ExtendOperand = 3;
+ }
+ // For Saturating binary operations.
+ // The destination vector type is NOT the same as first source vector.
+ // Input: (vector_in, vector_in/scalar_in, vl)
+ class RISCVSaturatingBinaryABXNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+ [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ // For Saturating binary operations with mask.
+ // The destination vector type is NOT the same as first source vector (with mask).
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ class RISCVSaturatingBinaryABXMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
+ let ExtendOperand = 3;
+ }
+ class RISCVTernaryAAAXNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
+ LLVMMatchType<1>],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ class RISCVTernaryAAAXMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ class RISCVTernaryAAXANoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ class RISCVTernaryAAXAMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ class RISCVTernaryWideNoMask
+ : Intrinsic< [llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty,
+ llvm_anyint_ty],
+ [IntrNoMem] >, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ class RISCVTernaryWideMask
+ : Intrinsic< [llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+ // For Reduction ternary operations.
+ // For destination vector type is the same as first and third source vector.
+ // Input: (vector_in, vector_in, vector_in, vl)
+ class RISCVReductionNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For Reduction ternary operations with mask.
+ // For destination vector type is the same as first and third source vector.
+ // The mask type come from second source vector.
+ // Input: (maskedoff, vector_in, vector_in, vector_in, mask, vl)
+ class RISCVReductionMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For unary operations with scalar type output without mask
+ // Output: (scalar type)
+ // Input: (vector_in, vl)
+ class RISCVMaskUnarySOutNoMask
+ : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For unary operations with scalar type output with mask
+ // Output: (scalar type)
+ // Input: (vector_in, mask, vl)
+ class RISCVMaskUnarySOutMask
+ : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<0>],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For destination vector type is NOT the same as source vector.
+ // Input: (vector_in, vl)
+ class RISCVUnaryABNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For destination vector type is NOT the same as source vector (with mask).
+ // Input: (maskedoff, vector_in, mask, vl)
+ class RISCVUnaryABMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For unary operations with the same vector type in/out without mask
+ // Output: (vector)
+ // Input: (vector_in, vl)
+ class RISCVUnaryNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For mask unary operations with mask type in/out with mask
+ // Output: (mask type output)
+ // Input: (mask type maskedoff, mask type vector_in, mask, vl)
+ class RISCVMaskUnaryMOutMask
+ : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // Output: (vector)
+ // Input: (vl)
+ class RISCVNullaryIntrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For Conversion unary operations.
+ // Input: (vector_in, vl)
+ class RISCVConversionNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For Conversion unary operations with mask.
+ // Input: (maskedoff, vector_in, mask, vl)
+ class RISCVConversionMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For atomic operations without mask
+ // Input: (base, index, value, vl)
+ class RISCVAMONoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, LLVMMatchType<0>,
+ llvm_anyint_ty],
+ [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic;
+ // For atomic operations with mask
+ // Input: (base, index, value, mask, vl)
+ class RISCVAMOMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic;
+
+ // For unit stride segment load
+ // Input: (pointer, vl)
+ class RISCVUSSegLoad<int nf>
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1))),
+ [LLVMPointerToElt<0>, llvm_anyint_ty],
+ [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
+ // For unit stride segment load with mask
+ // Input: (maskedoff, pointer, mask, vl)
+ class RISCVUSSegLoadMask<int nf>
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1))),
+ !listconcat(!listsplat(LLVMMatchType<0>, nf),
+ [LLVMPointerToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty]),
+ [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+
+ // For unit stride fault-only-first segment load
+ // Input: (pointer, vl)
+ // Output: (data, vl)
+ // NOTE: We model this with default memory properties since we model writing
+ // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
+ class RISCVUSSegLoadFF<int nf>
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1)), [llvm_anyint_ty]),
+ [LLVMPointerToElt<0>, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic;
+ // For unit stride fault-only-first segment load with mask
+ // Input: (maskedoff, pointer, mask, vl)
+ // Output: (data, vl)
+ // NOTE: We model this with default memory properties since we model writing
+ // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
+ class RISCVUSSegLoadFFMask<int nf>
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1)), [llvm_anyint_ty]),
+ !listconcat(!listsplat(LLVMMatchType<0>, nf),
+ [LLVMPointerToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<1>]),
+ [NoCapture<ArgIndex<nf>>]>, RISCVVIntrinsic;
+
+ // For stride segment load
+ // Input: (pointer, offset, vl)
+ class RISCVSSegLoad<int nf>
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1))),
+ [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
+ // For stride segment load with mask
+ // Input: (maskedoff, pointer, offset, mask, vl)
+ class RISCVSSegLoadMask<int nf>
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1))),
+ !listconcat(!listsplat(LLVMMatchType<0>, nf),
+ [LLVMPointerToElt<0>,
+ llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<1>]),
+ [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+
+ // For indexed segment load
+ // Input: (pointer, index, vl)
+ class RISCVISegLoad<int nf>
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1))),
+ [LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty],
+ [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
+ // For indexed segment load with mask
+ // Input: (maskedoff, pointer, index, mask, vl)
+ class RISCVISegLoadMask<int nf>
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1))),
+ !listconcat(!listsplat(LLVMMatchType<0>, nf),
+ [LLVMPointerToElt<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty]),
+ [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+
+ // For unit stride segment store
+ // Input: (value, pointer, vl)
+ class RISCVUSSegStore<int nf>
+ : Intrinsic<[],
+ !listconcat([llvm_anyvector_ty],
+ !listsplat(LLVMMatchType<0>, !add(nf, -1)),
+ [LLVMPointerToElt<0>, llvm_anyint_ty]),
+ [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For unit stride segment store with mask
+ // Input: (value, pointer, mask, vl)
+ class RISCVUSSegStoreMask<int nf>
+ : Intrinsic<[],
+ !listconcat([llvm_anyvector_ty],
+ !listsplat(LLVMMatchType<0>, !add(nf, -1)),
+ [LLVMPointerToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty]),
+ [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic;
+
+ // For stride segment store
+ // Input: (value, pointer, offset, vl)
+ class RISCVSSegStore<int nf>
+ : Intrinsic<[],
+ !listconcat([llvm_anyvector_ty],
+ !listsplat(LLVMMatchType<0>, !add(nf, -1)),
+ [LLVMPointerToElt<0>, llvm_anyint_ty,
+ LLVMMatchType<1>]),
+ [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For stride segment store with mask
+ // Input: (value, pointer, offset, mask, vl)
+ class RISCVSSegStoreMask<int nf>
+ : Intrinsic<[],
+ !listconcat([llvm_anyvector_ty],
+ !listsplat(LLVMMatchType<0>, !add(nf, -1)),
+ [LLVMPointerToElt<0>, llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<1>]),
+ [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic;
+
+ // For indexed segment store
+ // Input: (value, pointer, offset, vl)
+ class RISCVISegStore<int nf>
+ : Intrinsic<[],
+ !listconcat([llvm_anyvector_ty],
+ !listsplat(LLVMMatchType<0>, !add(nf, -1)),
+ [LLVMPointerToElt<0>, llvm_anyvector_ty,
+ llvm_anyint_ty]),
+ [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For indexed segment store with mask
+ // Input: (value, pointer, offset, mask, vl)
+ class RISCVISegStoreMask<int nf>
+ : Intrinsic<[],
+ !listconcat([llvm_anyvector_ty],
+ !listsplat(LLVMMatchType<0>, !add(nf, -1)),
+ [LLVMPointerToElt<0>, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty]),
+ [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic;
+
+ multiclass RISCVUSLoad {
+ def "int_riscv_" # NAME : RISCVUSLoad;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMask;
+ }
+ multiclass RISCVUSLoadFF {
+ def "int_riscv_" # NAME : RISCVUSLoadFF;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMask;
+ }
+ multiclass RISCVSLoad {
+ def "int_riscv_" # NAME : RISCVSLoad;
+ def "int_riscv_" # NAME # "_mask" : RISCVSLoadMask;
+ }
+ multiclass RISCVILoad {
+ def "int_riscv_" # NAME : RISCVILoad;
+ def "int_riscv_" # NAME # "_mask" : RISCVILoadMask;
+ }
+ multiclass RISCVUSStore {
+ def "int_riscv_" # NAME : RISCVUSStore;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSStoreMask;
+ }
+ multiclass RISCVSStore {
+ def "int_riscv_" # NAME : RISCVSStore;
+ def "int_riscv_" # NAME # "_mask" : RISCVSStoreMask;
+ }
+
+ multiclass RISCVIStore {
+ def "int_riscv_" # NAME : RISCVIStore;
+ def "int_riscv_" # NAME # "_mask" : RISCVIStoreMask;
+ }
+ multiclass RISCVUnaryAA {
+ def "int_riscv_" # NAME : RISCVUnaryAANoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMask;
+ }
+ multiclass RISCVUnaryAB {
+ def "int_riscv_" # NAME : RISCVUnaryABNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVUnaryABMask;
+ }
+ // AAX means the destination type(A) is the same as the first source
+ // type(A). X means any type for the second source operand.
+ multiclass RISCVBinaryAAX {
+ def "int_riscv_" # NAME : RISCVBinaryAAXNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMask;
+ }
+ // ABX means the destination type(A) is different from the first source
+ // type(B). X means any type for the second source operand.
+ multiclass RISCVBinaryABX {
+ def "int_riscv_" # NAME : RISCVBinaryABXNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMask;
+ }
+ multiclass RISCVBinaryWithV0 {
+ def "int_riscv_" # NAME : RISCVBinaryWithV0;
+ }
+ multiclass RISCVBinaryMaskOutWithV0 {
+ def "int_riscv_" # NAME : RISCVBinaryMOutWithV0;
+ }
+ multiclass RISCVBinaryMaskOut {
+ def "int_riscv_" # NAME : RISCVBinaryMOut;
+ }
+ multiclass RISCVSaturatingBinaryAAX {
+ def "int_riscv_" # NAME : RISCVSaturatingBinaryAAXNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAXMask;
+ }
+ multiclass RISCVSaturatingBinaryABX {
+ def "int_riscv_" # NAME : RISCVSaturatingBinaryABXNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryABXMask;
+ }
+ multiclass RISCVTernaryAAAX {
+ def "int_riscv_" # NAME : RISCVTernaryAAAXNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAAXMask;
+ }
+ multiclass RISCVTernaryAAXA {
+ def "int_riscv_" # NAME : RISCVTernaryAAXANoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMask;
+ }
+ multiclass RISCVCompare {
+ def "int_riscv_" # NAME : RISCVCompareNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVCompareMask;
+ }
+ multiclass RISCVClassify {
+ def "int_riscv_" # NAME : RISCVClassifyNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVClassifyMask;
+ }
+ multiclass RISCVTernaryWide {
+ def "int_riscv_" # NAME : RISCVTernaryWideNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMask;
+ }
+ multiclass RISCVReduction {
+ def "int_riscv_" # NAME : RISCVReductionNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVReductionMask;
+ }
+ multiclass RISCVMaskUnarySOut {
+ def "int_riscv_" # NAME : RISCVMaskUnarySOutNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVMaskUnarySOutMask;
+ }
+ multiclass RISCVMaskUnaryMOut {
+ def "int_riscv_" # NAME : RISCVUnaryNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVMaskUnaryMOutMask;
+ }
+ multiclass RISCVConversion {
+ def "int_riscv_" #NAME :RISCVConversionNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVConversionMask;
+ }
+ multiclass RISCVAMO {
+ def "int_riscv_" # NAME : RISCVAMONoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVAMOMask;
+ }
+ multiclass RISCVUSSegLoad<int nf> {
+ def "int_riscv_" # NAME : RISCVUSSegLoad<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadMask<nf>;
+ }
+ multiclass RISCVUSSegLoadFF<int nf> {
+ def "int_riscv_" # NAME : RISCVUSSegLoadFF<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadFFMask<nf>;
+ }
+ multiclass RISCVSSegLoad<int nf> {
+ def "int_riscv_" # NAME : RISCVSSegLoad<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVSSegLoadMask<nf>;
+ }
+ multiclass RISCVISegLoad<int nf> {
+ def "int_riscv_" # NAME : RISCVISegLoad<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVISegLoadMask<nf>;
+ }
+ multiclass RISCVUSSegStore<int nf> {
+ def "int_riscv_" # NAME : RISCVUSSegStore<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSSegStoreMask<nf>;
+ }
+ multiclass RISCVSSegStore<int nf> {
+ def "int_riscv_" # NAME : RISCVSSegStore<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVSSegStoreMask<nf>;
+ }
+ multiclass RISCVISegStore<int nf> {
+ def "int_riscv_" # NAME : RISCVISegStore<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVISegStoreMask<nf>;
+ }
+
+ defm vle : RISCVUSLoad;
+ defm vleff : RISCVUSLoadFF;
+ defm vse : RISCVUSStore;
+ defm vlse: RISCVSLoad;
+ defm vsse: RISCVSStore;
+ defm vluxei : RISCVILoad;
+ defm vloxei : RISCVILoad;
+ defm vsoxei : RISCVIStore;
+ defm vsuxei : RISCVIStore;
+
+ def int_riscv_vle1 : RISCVUSLoad;
+ def int_riscv_vse1 : RISCVUSStore;
+
+ defm vamoswap : RISCVAMO;
+ defm vamoadd : RISCVAMO;
+ defm vamoxor : RISCVAMO;
+ defm vamoand : RISCVAMO;
+ defm vamoor : RISCVAMO;
+ defm vamomin : RISCVAMO;
+ defm vamomax : RISCVAMO;
+ defm vamominu : RISCVAMO;
+ defm vamomaxu : RISCVAMO;
+
+ defm vadd : RISCVBinaryAAX;
+ defm vsub : RISCVBinaryAAX;
+ defm vrsub : RISCVBinaryAAX;
+
+ defm vwaddu : RISCVBinaryABX;
+ defm vwadd : RISCVBinaryABX;
+ defm vwaddu_w : RISCVBinaryAAX;
+ defm vwadd_w : RISCVBinaryAAX;
+ defm vwsubu : RISCVBinaryABX;
+ defm vwsub : RISCVBinaryABX;
+ defm vwsubu_w : RISCVBinaryAAX;
+ defm vwsub_w : RISCVBinaryAAX;
+
+ defm vzext : RISCVUnaryAB;
+ defm vsext : RISCVUnaryAB;
+
+ defm vadc : RISCVBinaryWithV0;
+ defm vmadc_carry_in : RISCVBinaryMaskOutWithV0;
+ defm vmadc : RISCVBinaryMaskOut;
+
+ defm vsbc : RISCVBinaryWithV0;
+ defm vmsbc_borrow_in : RISCVBinaryMaskOutWithV0;
+ defm vmsbc : RISCVBinaryMaskOut;
+
+ defm vand : RISCVBinaryAAX;
+ defm vor : RISCVBinaryAAX;
+ defm vxor : RISCVBinaryAAX;
+
+ defm vsll : RISCVBinaryAAX;
+ defm vsrl : RISCVBinaryAAX;
+ defm vsra : RISCVBinaryAAX;
+
+ defm vnsrl : RISCVBinaryABX;
+ defm vnsra : RISCVBinaryABX;
+
+ defm vmseq : RISCVCompare;
+ defm vmsne : RISCVCompare;
+ defm vmsltu : RISCVCompare;
+ defm vmslt : RISCVCompare;
+ defm vmsleu : RISCVCompare;
+ defm vmsle : RISCVCompare;
+ defm vmsgtu : RISCVCompare;
+ defm vmsgt : RISCVCompare;
+
+ defm vminu : RISCVBinaryAAX;
+ defm vmin : RISCVBinaryAAX;
+ defm vmaxu : RISCVBinaryAAX;
+ defm vmax : RISCVBinaryAAX;
+
+ defm vmul : RISCVBinaryAAX;
+ defm vmulh : RISCVBinaryAAX;
+ defm vmulhu : RISCVBinaryAAX;
+ defm vmulhsu : RISCVBinaryAAX;
+
+ defm vdivu : RISCVBinaryAAX;
+ defm vdiv : RISCVBinaryAAX;
+ defm vremu : RISCVBinaryAAX;
+ defm vrem : RISCVBinaryAAX;
+
+ defm vwmul : RISCVBinaryABX;
+ defm vwmulu : RISCVBinaryABX;
+ defm vwmulsu : RISCVBinaryABX;
+
+ defm vmacc : RISCVTernaryAAXA;
+ defm vnmsac : RISCVTernaryAAXA;
+ defm vmadd : RISCVTernaryAAXA;
+ defm vnmsub : RISCVTernaryAAXA;
+
+ defm vwmaccu : RISCVTernaryWide;
+ defm vwmacc : RISCVTernaryWide;
+ defm vwmaccus : RISCVTernaryWide;
+ defm vwmaccsu : RISCVTernaryWide;
+
+ defm vfadd : RISCVBinaryAAX;
+ defm vfsub : RISCVBinaryAAX;
+ defm vfrsub : RISCVBinaryAAX;
+
+ defm vfwadd : RISCVBinaryABX;
+ defm vfwsub : RISCVBinaryABX;
+ defm vfwadd_w : RISCVBinaryAAX;
+ defm vfwsub_w : RISCVBinaryAAX;
+
+ defm vsaddu : RISCVSaturatingBinaryAAX;
+ defm vsadd : RISCVSaturatingBinaryAAX;
+ defm vssubu : RISCVSaturatingBinaryAAX;
+ defm vssub : RISCVSaturatingBinaryAAX;
+
+ def int_riscv_vmerge : RISCVBinaryWithV0;
+
+ def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ def int_riscv_vmv_v_x : Intrinsic<[llvm_anyint_ty],
+ [LLVMVectorElementType<0>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 1;
+ }
+ def int_riscv_vfmv_v_f : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMVectorElementType<0>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+
+ def int_riscv_vmv_x_s : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ def int_riscv_vmv_s_x : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMVectorElementType<0>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic {
+ let ExtendOperand = 2;
+ }
+
+ def int_riscv_vfmv_f_s : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyfloat_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ def int_riscv_vfmv_s_f : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMVectorElementType<0>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+
+ defm vfmul : RISCVBinaryAAX;
+ defm vfdiv : RISCVBinaryAAX;
+ defm vfrdiv : RISCVBinaryAAX;
+
+ defm vfwmul : RISCVBinaryABX;
+
+ defm vfmacc : RISCVTernaryAAXA;
+ defm vfnmacc : RISCVTernaryAAXA;
+ defm vfmsac : RISCVTernaryAAXA;
+ defm vfnmsac : RISCVTernaryAAXA;
+ defm vfmadd : RISCVTernaryAAXA;
+ defm vfnmadd : RISCVTernaryAAXA;
+ defm vfmsub : RISCVTernaryAAXA;
+ defm vfnmsub : RISCVTernaryAAXA;
+
+ defm vfwmacc : RISCVTernaryWide;
+ defm vfwnmacc : RISCVTernaryWide;
+ defm vfwmsac : RISCVTernaryWide;
+ defm vfwnmsac : RISCVTernaryWide;
+
+ defm vfsqrt : RISCVUnaryAA;
+ defm vfrsqrt7 : RISCVUnaryAA;
+ defm vfrec7 : RISCVUnaryAA;
+
+ defm vfmin : RISCVBinaryAAX;
+ defm vfmax : RISCVBinaryAAX;
+
+ defm vfsgnj : RISCVBinaryAAX;
+ defm vfsgnjn : RISCVBinaryAAX;
+ defm vfsgnjx : RISCVBinaryAAX;
+
+ defm vfclass : RISCVClassify;
+
+ defm vfmerge : RISCVBinaryWithV0;
+
+ defm vslideup : RISCVTernaryAAAX;
+ defm vslidedown : RISCVTernaryAAAX;
+
+ defm vslide1up : RISCVBinaryAAX;
+ defm vslide1down : RISCVBinaryAAX;
+ defm vfslide1up : RISCVBinaryAAX;
+ defm vfslide1down : RISCVBinaryAAX;
+
+ defm vrgather : RISCVBinaryAAX;
+ defm vrgatherei16 : RISCVBinaryAAX;
+
+ def "int_riscv_vcompress" : RISCVBinaryAAAMask;
+
+ defm vaaddu : RISCVSaturatingBinaryAAX;
+ defm vaadd : RISCVSaturatingBinaryAAX;
+ defm vasubu : RISCVSaturatingBinaryAAX;
+ defm vasub : RISCVSaturatingBinaryAAX;
+
+ defm vsmul : RISCVSaturatingBinaryAAX;
+
+ defm vssrl : RISCVSaturatingBinaryAAX;
+ defm vssra : RISCVSaturatingBinaryAAX;
+
+ defm vnclipu : RISCVSaturatingBinaryABX;
+ defm vnclip : RISCVSaturatingBinaryABX;
+
+ defm vmfeq : RISCVCompare;
+ defm vmfne : RISCVCompare;
+ defm vmflt : RISCVCompare;
+ defm vmfle : RISCVCompare;
+ defm vmfgt : RISCVCompare;
+ defm vmfge : RISCVCompare;
+
+ defm vredsum : RISCVReduction;
+ defm vredand : RISCVReduction;
+ defm vredor : RISCVReduction;
+ defm vredxor : RISCVReduction;
+ defm vredminu : RISCVReduction;
+ defm vredmin : RISCVReduction;
+ defm vredmaxu : RISCVReduction;
+ defm vredmax : RISCVReduction;
+
+ defm vwredsumu : RISCVReduction;
+ defm vwredsum : RISCVReduction;
+
+ defm vfredosum : RISCVReduction;
+ defm vfredsum : RISCVReduction;
+ defm vfredmin : RISCVReduction;
+ defm vfredmax : RISCVReduction;
+
+ defm vfwredsum : RISCVReduction;
+ defm vfwredosum : RISCVReduction;
+
+ def int_riscv_vmand: RISCVBinaryAAANoMask;
+ def int_riscv_vmnand: RISCVBinaryAAANoMask;
+ def int_riscv_vmandnot: RISCVBinaryAAANoMask;
+ def int_riscv_vmxor: RISCVBinaryAAANoMask;
+ def int_riscv_vmor: RISCVBinaryAAANoMask;
+ def int_riscv_vmnor: RISCVBinaryAAANoMask;
+ def int_riscv_vmornot: RISCVBinaryAAANoMask;
+ def int_riscv_vmxnor: RISCVBinaryAAANoMask;
+ def int_riscv_vmclr : RISCVNullaryIntrinsic;
+ def int_riscv_vmset : RISCVNullaryIntrinsic;
+
+ defm vpopc : RISCVMaskUnarySOut;
+ defm vfirst : RISCVMaskUnarySOut;
+ defm vmsbf : RISCVMaskUnaryMOut;
+ defm vmsof : RISCVMaskUnaryMOut;
+ defm vmsif : RISCVMaskUnaryMOut;
+
+ defm vfcvt_xu_f_v : RISCVConversion;
+ defm vfcvt_x_f_v : RISCVConversion;
+ defm vfcvt_rtz_xu_f_v : RISCVConversion;
+ defm vfcvt_rtz_x_f_v : RISCVConversion;
+ defm vfcvt_f_xu_v : RISCVConversion;
+ defm vfcvt_f_x_v : RISCVConversion;
+
+ defm vfwcvt_f_xu_v : RISCVConversion;
+ defm vfwcvt_f_x_v : RISCVConversion;
+ defm vfwcvt_xu_f_v : RISCVConversion;
+ defm vfwcvt_x_f_v : RISCVConversion;
+ defm vfwcvt_rtz_xu_f_v : RISCVConversion;
+ defm vfwcvt_rtz_x_f_v : RISCVConversion;
+ defm vfwcvt_f_f_v : RISCVConversion;
+
+ defm vfncvt_f_xu_w : RISCVConversion;
+ defm vfncvt_f_x_w : RISCVConversion;
+ defm vfncvt_xu_f_w : RISCVConversion;
+ defm vfncvt_x_f_w : RISCVConversion;
+ defm vfncvt_rtz_xu_f_w : RISCVConversion;
+ defm vfncvt_rtz_x_f_w : RISCVConversion;
+ defm vfncvt_f_f_w : RISCVConversion;
+ defm vfncvt_rod_f_f_w : RISCVConversion;
+
+ // Output: (vector)
+ // Input: (mask type input, vl)
+ def int_riscv_viota : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // Output: (vector)
+ // Input: (maskedoff, mask type vector_in, mask, vl)
+ def int_riscv_viota_mask : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // Output: (vector)
+ // Input: (vl)
+ def int_riscv_vid : RISCVNullaryIntrinsic;
+
+ // Output: (vector)
+ // Input: (maskedoff, mask, vl)
+ def int_riscv_vid_mask : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+
+ foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
+ defm vlseg # nf : RISCVUSSegLoad<nf>;
+ defm vlseg # nf # ff : RISCVUSSegLoadFF<nf>;
+ defm vlsseg # nf : RISCVSSegLoad<nf>;
+ defm vloxseg # nf : RISCVISegLoad<nf>;
+ defm vluxseg # nf : RISCVISegLoad<nf>;
+ defm vsseg # nf : RISCVUSSegStore<nf>;
+ defm vssseg # nf : RISCVSSegStore<nf>;
+ defm vsoxseg # nf : RISCVISegStore<nf>;
+ defm vsuxseg # nf : RISCVISegStore<nf>;
+ }
+
+} // TargetPrefix = "riscv"
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsVE.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsVE.td
new file mode 100644
index 000000000000..be4bccef0cc1
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsVE.td
@@ -0,0 +1,35 @@
+// Define intrinsics written by hand
+
+// VEL Intrinsic instructions.
+let TargetPrefix = "ve" in {
+ def int_ve_vl_svob : GCCBuiltin<"__builtin_ve_vl_svob">,
+ Intrinsic<[], [], [IntrHasSideEffects]>;
+
+ def int_ve_vl_pack_f32p : GCCBuiltin<"__builtin_ve_vl_pack_f32p">,
+ Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
+ [IntrReadMem]>;
+ def int_ve_vl_pack_f32a : GCCBuiltin<"__builtin_ve_vl_pack_f32a">,
+ Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+
+ def int_ve_vl_extract_vm512u :
+ GCCBuiltin<"__builtin_ve_vl_extract_vm512u">,
+ Intrinsic<[LLVMType<v256i1>], [LLVMType<v512i1>], [IntrNoMem]>;
+
+ def int_ve_vl_extract_vm512l :
+ GCCBuiltin<"__builtin_ve_vl_extract_vm512l">,
+ Intrinsic<[LLVMType<v256i1>], [LLVMType<v512i1>], [IntrNoMem]>;
+
+ def int_ve_vl_insert_vm512u :
+ GCCBuiltin<"__builtin_ve_vl_insert_vm512u">,
+ Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v256i1>],
+ [IntrNoMem]>;
+
+ def int_ve_vl_insert_vm512l :
+ GCCBuiltin<"__builtin_ve_vl_insert_vm512l">,
+ Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v256i1>],
+ [IntrNoMem]>;
+}
+
+// Define intrinsics automatically generated
+include "llvm/IR/IntrinsicsVEVL.gen.td"
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
new file mode 100644
index 000000000000..67cbd307903d
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
@@ -0,0 +1,1213 @@
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : GCCBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : GCCBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : GCCBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : GCCBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : GCCBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstot_vssl : GCCBuiltin<"__builtin_ve_vl_vstot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstot_vssml : GCCBuiltin<"__builtin_ve_vl_vstot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu_vssl : GCCBuiltin<"__builtin_ve_vl_vstu_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu_vssml : GCCBuiltin<"__builtin_ve_vl_vstu_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssl : GCCBuiltin<"__builtin_ve_vl_vstunc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssml : GCCBuiltin<"__builtin_ve_vl_vstunc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl_vssl : GCCBuiltin<"__builtin_ve_vl_vstl_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl_vssml : GCCBuiltin<"__builtin_ve_vl_vstl_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssl : GCCBuiltin<"__builtin_ve_vl_vst2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssml : GCCBuiltin<"__builtin_ve_vl_vst2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pfchv_ssl : GCCBuiltin<"__builtin_ve_vl_pfchv_ssl">, Intrinsic<[], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrInaccessibleMemOrArgMemOnly]>;
+let TargetPrefix = "ve" in def int_ve_vl_pfchvnc_ssl : GCCBuiltin<"__builtin_ve_vl_pfchvnc_ssl">, Intrinsic<[], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrInaccessibleMemOrArgMemOnly]>;
+let TargetPrefix = "ve" in def int_ve_vl_lsv_vvss : GCCBuiltin<"__builtin_ve_vl_lsv_vvss">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvsl_svs : GCCBuiltin<"__builtin_ve_vl_lvsl_svs">, Intrinsic<[LLVMType<i64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvsd_svs : GCCBuiltin<"__builtin_ve_vl_lvsd_svs">, Intrinsic<[LLVMType<f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvss_svs : GCCBuiltin<"__builtin_ve_vl_lvss_svs">, Intrinsic<[LLVMType<f32>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvm_mmss : GCCBuiltin<"__builtin_ve_vl_lvm_mmss">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvm_MMss : GCCBuiltin<"__builtin_ve_vl_lvm_MMss">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_svm_sms : GCCBuiltin<"__builtin_ve_vl_svm_sms">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_svm_sMs : GCCBuiltin<"__builtin_ve_vl_svm_sMs">, Intrinsic<[LLVMType<i64>], [LLVMType<v512i1>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsvl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsMvl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vvvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vvvvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vsvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vsvvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vvvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vvvvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vsvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vsvvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvmvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvmvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vseq_vl : GCCBuiltin<"__builtin_ve_vl_vseq_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vseq_vvl : GCCBuiltin<"__builtin_ve_vl_vseq_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vl : GCCBuiltin<"__builtin_ve_vl_pvseqlo_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vvl : GCCBuiltin<"__builtin_ve_vl_pvseqlo_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vl : GCCBuiltin<"__builtin_ve_vl_pvsequp_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vvl : GCCBuiltin<"__builtin_ve_vl_pvsequp_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvseq_vl : GCCBuiltin<"__builtin_ve_vl_pvseq_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvseq_vvl : GCCBuiltin<"__builtin_ve_vl_pvseq_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsvl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcp_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcp_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vex_vvmvl : GCCBuiltin<"__builtin_ve_vl_vex_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklat_ml : GCCBuiltin<"__builtin_ve_vl_vfmklat_ml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklaf_ml : GCCBuiltin<"__builtin_ve_vl_vfmklaf_ml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkat_Ml : GCCBuiltin<"__builtin_ve_vl_pvfmkat_Ml">, Intrinsic<[LLVMType<v512i1>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkaf_Ml : GCCBuiltin<"__builtin_ve_vl_pvfmkaf_Ml">, Intrinsic<[LLVMType<v512i1>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkllt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkllt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkleq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkleq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkleqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkleqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkllenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkllenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwlt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwlt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkweq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkweq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkweqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkweqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwlenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwlenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlone_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlone_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlole_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuple_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlole_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuple_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwne_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwne_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkweq_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkweq_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwge_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwge_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwle_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwle_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnum_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnum_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgtnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgtnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwltnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwltnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkweqnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkweqnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdlt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdlt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdlenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdlenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkslt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkslt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkseq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkseq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkseqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkseqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkslenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkslenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslolt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuplt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslolt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuplt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslone_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslone_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslole_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuple_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslole_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuple_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksne_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksne_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkseq_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkseq_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksge_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksge_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksle_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksle_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnum_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnum_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgtnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgtnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksltnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksltnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkseqnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkseqnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvl : GCCBuiltin<"__builtin_ve_vl_vsuml_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvml : GCCBuiltin<"__builtin_ve_vl_vsuml_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvml : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvl : GCCBuiltin<"__builtin_ve_vl_vfsums_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvml : GCCBuiltin<"__builtin_ve_vl_vfsums_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxslfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxslfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxsllst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxsllst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvl : GCCBuiltin<"__builtin_ve_vl_vrminslfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminslfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvl : GCCBuiltin<"__builtin_ve_vl_vrminsllst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminsllst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxslst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxslst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmindfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmindfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmindlst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmindlst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrminsfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrminsfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrminslst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrminslst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrand_vvl : GCCBuiltin<"__builtin_ve_vl_vrand_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrand_vvml : GCCBuiltin<"__builtin_ve_vl_vrand_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vror_vvl : GCCBuiltin<"__builtin_ve_vl_vror_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vror_vvml : GCCBuiltin<"__builtin_ve_vl_vror_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvl : GCCBuiltin<"__builtin_ve_vl_vrxor_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvml : GCCBuiltin<"__builtin_ve_vl_vrxor_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : GCCBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssl : GCCBuiltin<"__builtin_ve_vl_vsc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssml : GCCBuiltin<"__builtin_ve_vl_vsc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vscnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vscnc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssl : GCCBuiltin<"__builtin_ve_vl_vscu_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssml : GCCBuiltin<"__builtin_ve_vl_vscu_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssl : GCCBuiltin<"__builtin_ve_vl_vscunc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssml : GCCBuiltin<"__builtin_ve_vl_vscunc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscuot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscuot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscuncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscuncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssl : GCCBuiltin<"__builtin_ve_vl_vscl_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssml : GCCBuiltin<"__builtin_ve_vl_vscl_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclnc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_andm_mmm : GCCBuiltin<"__builtin_ve_vl_andm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_andm_MMM : GCCBuiltin<"__builtin_ve_vl_andm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_orm_mmm : GCCBuiltin<"__builtin_ve_vl_orm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_orm_MMM : GCCBuiltin<"__builtin_ve_vl_orm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_xorm_mmm : GCCBuiltin<"__builtin_ve_vl_xorm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_xorm_MMM : GCCBuiltin<"__builtin_ve_vl_xorm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_eqvm_mmm : GCCBuiltin<"__builtin_ve_vl_eqvm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_eqvm_MMM : GCCBuiltin<"__builtin_ve_vl_eqvm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_nndm_mmm : GCCBuiltin<"__builtin_ve_vl_nndm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_nndm_MMM : GCCBuiltin<"__builtin_ve_vl_nndm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_negm_mm : GCCBuiltin<"__builtin_ve_vl_negm_mm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_negm_MM : GCCBuiltin<"__builtin_ve_vl_negm_MM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pcvm_sml : GCCBuiltin<"__builtin_ve_vl_pcvm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lzvm_sml : GCCBuiltin<"__builtin_ve_vl_lzvm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_tovm_sml : GCCBuiltin<"__builtin_ve_vl_tovm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 7c9ceb148a47..d306d0ccb90d 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -50,9 +50,10 @@ def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty],
//===----------------------------------------------------------------------===//
// throw / rethrow
+// The immediate argument is an index to a tag, which is 0 for C++.
def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty],
[Throws, IntrNoReturn, ImmArg<ArgIndex<0>>]>;
-def int_wasm_rethrow_in_catch : Intrinsic<[], [], [Throws, IntrNoReturn]>;
+def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>;
// Since wasm does not use landingpad instructions, these instructions return
// exception pointer and selector values until we lower them in WasmEHPrepare.
@@ -60,10 +61,12 @@ def int_wasm_get_exception : Intrinsic<[llvm_ptr_ty], [llvm_token_ty],
[IntrHasSideEffects]>;
def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty],
[IntrHasSideEffects]>;
-// This is the same as llvm.wasm.get.exception except that it does not take a
-// token operand. This is only for instruction selection purpose.
-def int_wasm_extract_exception : Intrinsic<[llvm_ptr_ty], [],
- [IntrHasSideEffects]>;
+
+// wasm.catch returns the pointer to the exception object caught by wasm 'catch'
+// instruction. This returns a single pointer, which is sufficient for C++
+// support. The immediate argument is an index to for a tag, which is 0 for C++.
+def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
+ [IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
// WebAssembly EH must maintain the landingpads in the order assigned to them
// by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is
@@ -79,22 +82,23 @@ def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
// wait / notify
-def int_wasm_atomic_wait_i32 :
+def int_wasm_memory_atomic_wait32 :
Intrinsic<[llvm_i32_ty],
[LLVMPointerType<llvm_i32_ty>, llvm_i32_ty, llvm_i64_ty],
- [IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
- IntrHasSideEffects],
- "", [SDNPMemOperand]>;
-def int_wasm_atomic_wait_i64 :
+ [IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>,
+ NoCapture<ArgIndex<0>>, IntrHasSideEffects],
+ "", [SDNPMemOperand]>;
+def int_wasm_memory_atomic_wait64 :
Intrinsic<[llvm_i32_ty],
[LLVMPointerType<llvm_i64_ty>, llvm_i64_ty, llvm_i64_ty],
- [IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
- IntrHasSideEffects],
- "", [SDNPMemOperand]>;
-def int_wasm_atomic_notify:
+ [IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>,
+ NoCapture<ArgIndex<0>>, IntrHasSideEffects],
+ "", [SDNPMemOperand]>;
+def int_wasm_memory_atomic_notify:
Intrinsic<[llvm_i32_ty], [LLVMPointerType<llvm_i32_ty>, llvm_i32_ty],
- [IntrInaccessibleMemOnly, NoCapture<ArgIndex<0>>, IntrHasSideEffects], "",
- [SDNPMemOperand]>;
+ [IntrInaccessibleMemOnly, NoCapture<ArgIndex<0>>,
+ IntrHasSideEffects],
+ "", [SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// SIMD intrinsics
@@ -151,6 +155,7 @@ def int_wasm_dot :
Intrinsic<[llvm_v4i32_ty],
[llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrSpeculatable]>;
+
def int_wasm_narrow_signed :
Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>],
@@ -159,21 +164,21 @@ def int_wasm_narrow_unsigned :
Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem, IntrSpeculatable]>;
+
+// TODO: Replace these intrinsics with normal ISel patterns once i32x4 to i64x2
+// widening is merged to the proposal.
def int_wasm_widen_low_signed :
- Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_wasm_widen_high_signed :
- Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_wasm_widen_low_unsigned :
- Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_wasm_widen_high_unsigned :
- Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty],
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_q15mulr_saturate_signed :
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrSpeculatable]>;
// TODO: Replace these intrinsics with normal ISel patterns
@@ -206,6 +211,143 @@ def int_wasm_nearest :
[LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
+// TODO: Replace these intrinsic with normal ISel patterns once the
+// load_zero instructions are merged to the proposal.
+def int_wasm_load32_zero :
+ Intrinsic<[llvm_v4i32_ty],
+ [LLVMPointerType<llvm_i32_ty>],
+ [IntrReadMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+
+def int_wasm_load64_zero :
+ Intrinsic<[llvm_v2i64_ty],
+ [LLVMPointerType<llvm_i64_ty>],
+ [IntrReadMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+
+// These intrinsics do not mark their lane index arguments as immediate because
+// that changes the corresponding SDNode from ISD::Constant to
+// ISD::TargetConstant, which would require extra complications in the ISel
+// tablegen patterns. TODO: Replace these intrinsic with normal ISel patterns
+// once the load_lane instructions are merged to the proposal.
+def int_wasm_load8_lane :
+ Intrinsic<[llvm_v16i8_ty],
+ [LLVMPointerType<llvm_i8_ty>, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+def int_wasm_load16_lane :
+ Intrinsic<[llvm_v8i16_ty],
+ [LLVMPointerType<llvm_i16_ty>, llvm_v8i16_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+def int_wasm_load32_lane :
+ Intrinsic<[llvm_v4i32_ty],
+ [LLVMPointerType<llvm_i32_ty>, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+def int_wasm_load64_lane :
+ Intrinsic<[llvm_v2i64_ty],
+ [LLVMPointerType<llvm_i64_ty>, llvm_v2i64_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+def int_wasm_store8_lane :
+ Intrinsic<[],
+ [LLVMPointerType<llvm_i8_ty>, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrWriteMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+def int_wasm_store16_lane :
+ Intrinsic<[],
+ [LLVMPointerType<llvm_i16_ty>, llvm_v8i16_ty, llvm_i32_ty],
+ [IntrWriteMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+def int_wasm_store32_lane :
+ Intrinsic<[],
+ [LLVMPointerType<llvm_i32_ty>, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrWriteMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+def int_wasm_store64_lane :
+ Intrinsic<[],
+ [LLVMPointerType<llvm_i64_ty>, llvm_v2i64_ty, llvm_i32_ty],
+ [IntrWriteMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
+
+// TODO: Replace this intrinsic with normal ISel patterns once popcnt is merged
+// to the proposal.
+def int_wasm_popcnt :
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_extmul_low_signed :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_extmul_high_signed :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_extmul_low_unsigned :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_extmul_high_unsigned :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_extadd_pairwise_signed :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMSubdivide2VectorType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_extadd_pairwise_unsigned :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMSubdivide2VectorType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_signselect :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+// TODO: Remove this intrinsic and the associated builtin if i64x2.eq gets
+// merged to the proposal.
+def int_wasm_eq :
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+// TODO: Remove this after experiments have been run. Use the target-agnostic
+// int_prefetch if this becomes specified at some point.
+def int_wasm_prefetch_t :
+ Intrinsic<[], [llvm_ptr_ty],
+ [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn,
+ ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+ "", [SDNPMemOperand]>;
+
+def int_wasm_prefetch_nt :
+ Intrinsic<[], [llvm_ptr_ty],
+ [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn,
+ ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+ "", [SDNPMemOperand]>;
+
+// TODO: Remove these if possible if they are merged to the spec.
+def int_wasm_convert_low_signed :
+ Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_convert_low_unsigned :
+ Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_trunc_saturate_zero_signed :
+ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_trunc_saturate_zero_unsigned :
+ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_demote_zero :
+ Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_promote_low :
+ Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// Thread-local storage intrinsics
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsX86.td
index 3f86fd075d3a..bba12139976e 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -283,11 +283,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
IntrHasSideEffects]>;
def int_x86_sse_ldmxcsr :
Intrinsic<[], [llvm_ptr_ty],
- [IntrReadMem, IntrArgMemOnly, IntrHasSideEffects,
// FIXME: LDMXCSR does not actually write to memory,
- // but Fast and DAG Isel both use writing to memory
- // as a proxy for having side effects.
- IntrWriteMem]>;
+ // but intrinsic properties are generated incorrectly
+ // for IntrReadMem+IntrHasSideEffects.
+ [/*IntrReadMem, IntrArgMemOnly,*/ IntrHasSideEffects]>;
}
// Misc.
@@ -4749,26 +4748,26 @@ let TargetPrefix = "x86" in {
let TargetPrefix = "x86" in {
// NOTE: These comparison intrinsics are not used by clang as long as the
// distinction in signaling behaviour is not implemented.
- def int_x86_avx512_cmp_ps_512 :
+ def int_x86_avx512_mask_cmp_ps_512 :
Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_cmp_pd_512 :
+ llvm_i32_ty, llvm_v16i1_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>]>;
+ def int_x86_avx512_mask_cmp_pd_512 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_cmp_ps_256 :
+ llvm_i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>]>;
+ def int_x86_avx512_mask_cmp_ps_256 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_cmp_pd_256 :
+ llvm_i32_ty, llvm_v8i1_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_x86_avx512_mask_cmp_pd_256 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_cmp_ps_128 :
+ llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_x86_avx512_mask_cmp_ps_128 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_cmp_pd_128 :
+ llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_x86_avx512_mask_cmp_pd_128 :
Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ llvm_i32_ty, llvm_v2i1_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_avx512_mask_cmp_ss :
GCCBuiltin<"__builtin_ia32_cmpss_mask">,
@@ -4948,6 +4947,59 @@ let TargetPrefix = "x86" in {
def int_x86_xresldtrk : GCCBuiltin<"__builtin_ia32_xresldtrk">,
Intrinsic<[], [], []>;
}
+
+//===----------------------------------------------------------------------===//
+// Key Locker
+let TargetPrefix = "x86" in {
+ def int_x86_loadiwkey : GCCBuiltin<"__builtin_ia32_loadiwkey">,
+ Intrinsic<[], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+ []>;
+ def int_x86_encodekey128 :
+ Intrinsic<[llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+ [llvm_i32_ty, llvm_v2i64_ty], []>;
+ def int_x86_encodekey256 :
+ Intrinsic<[llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+ [llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>;
+ def int_x86_aesenc128kl :
+ Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>;
+ def int_x86_aesdec128kl :
+ Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>;
+ def int_x86_aesenc256kl :
+ Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>;
+ def int_x86_aesdec256kl :
+ Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>;
+ def int_x86_aesencwide128kl :
+ Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>;
+ def int_x86_aesdecwide128kl :
+ Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>;
+ def int_x86_aesencwide256kl :
+ Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>;
+ def int_x86_aesdecwide256kl :
+ Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>;
+}
+
//===----------------------------------------------------------------------===//
// AMX - Intel AMX extensions
@@ -4959,21 +5011,68 @@ let TargetPrefix = "x86" in {
def int_x86_tilerelease : GCCBuiltin<"__builtin_ia32_tilerelease">,
Intrinsic<[], [], []>;
def int_x86_tilezero : GCCBuiltin<"__builtin_ia32_tilezero">,
- Intrinsic<[], [llvm_i8_ty], []>;
+ Intrinsic<[], [llvm_i8_ty], [ImmArg<ArgIndex<0>>]>;
def int_x86_tileloadd64 : GCCBuiltin<"__builtin_ia32_tileloadd64">,
- Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], []>;
+ Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
+ [ImmArg<ArgIndex<0>>]>;
def int_x86_tileloaddt164 : GCCBuiltin<"__builtin_ia32_tileloaddt164">,
- Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], []>;
+ Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
+ [ImmArg<ArgIndex<0>>]>;
def int_x86_tilestored64 : GCCBuiltin<"__builtin_ia32_tilestored64">,
- Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], []>;
+ Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
+ [ImmArg<ArgIndex<0>>]>;
def int_x86_tdpbssd : GCCBuiltin<"__builtin_ia32_tdpbssd">,
- Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>;
+ Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
def int_x86_tdpbsud : GCCBuiltin<"__builtin_ia32_tdpbsud">,
- Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>;
+ Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
def int_x86_tdpbusd : GCCBuiltin<"__builtin_ia32_tdpbusd">,
- Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>;
+ Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
def int_x86_tdpbuud : GCCBuiltin<"__builtin_ia32_tdpbuud">,
- Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>;
+ Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
def int_x86_tdpbf16ps : GCCBuiltin<"__builtin_ia32_tdpbf16ps">,
- Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>;
+ Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
+ // AMX - internal intrinsics
+ def int_x86_tileloadd64_internal :
+ GCCBuiltin<"__builtin_ia32_tileloadd64_internal">,
+ Intrinsic<[llvm_x86amx_ty],
+ [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
+ []>;
+ def int_x86_tdpbssd_internal :
+ GCCBuiltin<"__builtin_ia32_tdpbssd_internal">,
+ Intrinsic<[llvm_x86amx_ty],
+ [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
+ llvm_x86amx_ty, llvm_x86amx_ty,
+ llvm_x86amx_ty], []>;
+ def int_x86_tilestored64_internal :
+ GCCBuiltin<"__builtin_ia32_tilestored64_internal">,
+ Intrinsic<[], [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty,
+ llvm_i64_ty, llvm_x86amx_ty], []>;
+ def int_x86_tilezero_internal :
+ GCCBuiltin<"__builtin_ia32_tilezero_internal">,
+ Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty],
+ []>;
+}
+
+//===----------------------------------------------------------------------===//
+// UINTR - User Level Interrupt
+
+let TargetPrefix = "x86" in {
+ def int_x86_clui : GCCBuiltin<"__builtin_ia32_clui">,
+ Intrinsic<[], [], []>;
+ def int_x86_stui : GCCBuiltin<"__builtin_ia32_stui">,
+ Intrinsic<[], [], []>;
+ def int_x86_testui : GCCBuiltin<"__builtin_ia32_testui">,
+ Intrinsic<[llvm_i8_ty], [], []>;
+ def int_x86_senduipi : GCCBuiltin<"__builtin_ia32_senduipi">,
+ Intrinsic<[], [llvm_i64_ty], []>;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h b/contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h
index c465e02c2fc5..8f8a35d07c64 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h
@@ -222,13 +222,23 @@ public:
void setDiagnosticsHotnessRequested(bool Requested);
/// Return the minimum hotness value a diagnostic would need in order
- /// to be included in optimization diagnostics. If there is no minimum, this
- /// returns None.
+ /// to be included in optimization diagnostics.
+ ///
+ /// Three possible return values:
+ /// 0 - threshold is disabled. Everything will be printed out.
+ /// positive int - threshold is set.
+ /// UINT64_MAX - threshold is not yet set, and needs to be synced from
+ /// profile summary. Note that in case of missing profile
+ /// summary, threshold will be kept at "MAX", effectively
+ /// suppresses all remarks output.
uint64_t getDiagnosticsHotnessThreshold() const;
/// Set the minimum hotness value a diagnostic needs in order to be
/// included in optimization diagnostics.
- void setDiagnosticsHotnessThreshold(uint64_t Threshold);
+ void setDiagnosticsHotnessThreshold(Optional<uint64_t> Threshold);
+
+ /// Return if hotness threshold is requested from PSI.
+ bool isDiagnosticsHotnessThresholdSetFromPSI() const;
/// The "main remark streamer" used by all the specialized remark streamers.
/// This streamer keeps generic remark metadata in memory throughout the life
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/LLVMRemarkStreamer.h b/contrib/llvm-project/llvm/include/llvm/IR/LLVMRemarkStreamer.h
index 97082a44e62f..e7627e993370 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/LLVMRemarkStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/LLVMRemarkStreamer.h
@@ -79,16 +79,15 @@ Expected<std::unique_ptr<ToolOutputFile>>
setupLLVMOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
StringRef RemarksPasses, StringRef RemarksFormat,
bool RemarksWithHotness,
- unsigned RemarksHotnessThreshold = 0);
+ Optional<uint64_t> RemarksHotnessThreshold = 0);
/// Setup optimization remarks that output directly to a raw_ostream.
/// \p OS is managed by the caller and should be open for writing as long as \p
/// Context is streaming remarks to it.
-Error setupLLVMOptimizationRemarks(LLVMContext &Context, raw_ostream &OS,
- StringRef RemarksPasses,
- StringRef RemarksFormat,
- bool RemarksWithHotness,
- unsigned RemarksHotnessThreshold = 0);
+Error setupLLVMOptimizationRemarks(
+ LLVMContext &Context, raw_ostream &OS, StringRef RemarksPasses,
+ StringRef RemarksFormat, bool RemarksWithHotness,
+ Optional<uint64_t> RemarksHotnessThreshold = 0);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h b/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h
index 6b1ddd4d79f8..f4fae184e428 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h
@@ -88,7 +88,6 @@
namespace llvm {
template <typename T> class ArrayRef;
class Module;
-class Pass;
class StringRef;
class Value;
class Timer;
@@ -231,11 +230,11 @@ private:
// Map to keep track of last user of the analysis pass.
// LastUser->second is the last user of Lastuser->first.
+ // This is kept in sync with InversedLastUser.
DenseMap<Pass *, Pass *> LastUser;
// Map to keep track of passes that are last used by a pass.
- // This inverse map is initialized at PM->run() based on
- // LastUser map.
+ // This is kept in sync with LastUser.
DenseMap<Pass *, SmallPtrSet<Pass *, 8> > InversedLastUser;
/// Immutable passes are managed by top level manager.
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/MDBuilder.h b/contrib/llvm-project/llvm/include/llvm/IR/MDBuilder.h
index 11e2e2623257..51be8667f1c1 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/MDBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/MDBuilder.h
@@ -76,9 +76,8 @@ public:
/// Return metadata containing the section prefix for a function.
MDNode *createFunctionSectionPrefix(StringRef Prefix);
- /// return metadata containing expected value
- MDNode *createMisExpect(uint64_t Index, uint64_t LikelyWeight,
- uint64_t UnlikelyWeight);
+ /// Return metadata containing the pseudo probe descriptor for a function.
+ MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, Function *F);
//===------------------------------------------------------------------===//
// Range metadata.
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/MatrixBuilder.h b/contrib/llvm-project/llvm/include/llvm/IR/MatrixBuilder.h
index 5d04b3563dd5..084b1d49569e 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/MatrixBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/MatrixBuilder.h
@@ -38,14 +38,19 @@ template <class IRBuilderTy> class MatrixBuilder {
Value *RHS) {
assert((LHS->getType()->isVectorTy() || RHS->getType()->isVectorTy()) &&
"One of the operands must be a matrix (embedded in a vector)");
- if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy())
+ if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) {
+ assert(!isa<ScalableVectorType>(LHS->getType()) &&
+ "LHS Assumed to be fixed width");
RHS = B.CreateVectorSplat(
- cast<VectorType>(LHS->getType())->getNumElements(), RHS,
+ cast<VectorType>(LHS->getType())->getElementCount(), RHS,
"scalar.splat");
- else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy())
+ } else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) {
+ assert(!isa<ScalableVectorType>(RHS->getType()) &&
+ "RHS Assumed to be fixed width");
LHS = B.CreateVectorSplat(
- cast<VectorType>(RHS->getType())->getNumElements(), LHS,
+ cast<VectorType>(RHS->getType())->getElementCount(), LHS,
"scalar.splat");
+ }
return {LHS, RHS};
}
@@ -155,14 +160,19 @@ public:
/// matrixes.
Value *CreateAdd(Value *LHS, Value *RHS) {
assert(LHS->getType()->isVectorTy() || RHS->getType()->isVectorTy());
- if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy())
+ if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) {
+ assert(!isa<ScalableVectorType>(LHS->getType()) &&
+ "LHS Assumed to be fixed width");
RHS = B.CreateVectorSplat(
- cast<VectorType>(LHS->getType())->getNumElements(), RHS,
+ cast<VectorType>(LHS->getType())->getElementCount(), RHS,
"scalar.splat");
- else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy())
+ } else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) {
+ assert(!isa<ScalableVectorType>(RHS->getType()) &&
+ "RHS Assumed to be fixed width");
LHS = B.CreateVectorSplat(
- cast<VectorType>(RHS->getType())->getNumElements(), LHS,
+ cast<VectorType>(RHS->getType())->getElementCount(), LHS,
"scalar.splat");
+ }
return cast<VectorType>(LHS->getType())
->getElementType()
@@ -175,14 +185,19 @@ public:
/// point matrixes.
Value *CreateSub(Value *LHS, Value *RHS) {
assert(LHS->getType()->isVectorTy() || RHS->getType()->isVectorTy());
- if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy())
+ if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) {
+ assert(!isa<ScalableVectorType>(LHS->getType()) &&
+ "LHS Assumed to be fixed width");
RHS = B.CreateVectorSplat(
- cast<VectorType>(LHS->getType())->getNumElements(), RHS,
+ cast<VectorType>(LHS->getType())->getElementCount(), RHS,
"scalar.splat");
- else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy())
+ } else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) {
+ assert(!isa<ScalableVectorType>(RHS->getType()) &&
+ "RHS Assumed to be fixed width");
LHS = B.CreateVectorSplat(
- cast<VectorType>(RHS->getType())->getNumElements(), LHS,
+ cast<VectorType>(RHS->getType())->getElementCount(), LHS,
"scalar.splat");
+ }
return cast<VectorType>(LHS->getType())
->getElementType()
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Metadata.def b/contrib/llvm-project/llvm/include/llvm/IR/Metadata.def
index 1df60cadac08..f31be8d1bc0c 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Metadata.def
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Metadata.def
@@ -114,6 +114,8 @@ HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode)
HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro)
HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile)
HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICommonBlock)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIStringType)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGenericSubrange)
#undef HANDLE_METADATA
#undef HANDLE_METADATA_LEAF
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h b/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h
index 46526c70ea3b..9a4480b75a30 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h
@@ -667,6 +667,12 @@ struct AAMDNodes {
/// The tag specifying the noalias scope.
MDNode *NoAlias = nullptr;
+ // Shift tbaa Metadata node to start off bytes later
+ static MDNode *ShiftTBAA(MDNode *M, size_t off);
+
+ // Shift tbaa.struct Metadata node to start off bytes later
+ static MDNode *ShiftTBAAStruct(MDNode *M, size_t off);
+
/// Given two sets of AAMDNodes that apply to the same pointer,
/// give the best AAMDNodes that are compatible with both (i.e. a set of
/// nodes whose allowable aliasing conclusions are a subset of those
@@ -680,6 +686,18 @@ struct AAMDNodes {
Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr;
return Result;
}
+
+ /// Create a new AAMDNode that describes this AAMDNode after applying a
+ /// constant offset to the start of the pointer
+ AAMDNodes shift(size_t Offset) {
+ AAMDNodes Result;
+ Result.TBAA = TBAA ? ShiftTBAA(TBAA, Offset) : nullptr;
+ Result.TBAAStruct =
+ TBAAStruct ? ShiftTBAAStruct(TBAAStruct, Offset) : nullptr;
+ Result.Scope = Scope;
+ Result.NoAlias = NoAlias;
+ return Result;
+ }
};
// Specialize DenseMapInfo for AAMDNodes.
@@ -1128,8 +1146,7 @@ class MDTuple : public MDNode {
StorageType Storage, bool ShouldCreate = true);
TempMDTuple cloneImpl() const {
- return getTemporary(getContext(),
- SmallVector<Metadata *, 4>(op_begin(), op_end()));
+ return getTemporary(getContext(), SmallVector<Metadata *, 4>(operands()));
}
public:
@@ -1190,6 +1207,33 @@ void TempMDNodeDeleter::operator()(MDNode *Node) const {
MDNode::deleteTemporary(Node);
}
+/// This is a simple wrapper around an MDNode which provides a higher-level
+/// interface by hiding the details of how alias analysis information is encoded
+/// in its operands.
+class AliasScopeNode {
+ const MDNode *Node = nullptr;
+
+public:
+ AliasScopeNode() = default;
+ explicit AliasScopeNode(const MDNode *N) : Node(N) {}
+
+ /// Get the MDNode for this AliasScopeNode.
+ const MDNode *getNode() const { return Node; }
+
+ /// Get the MDNode for this AliasScopeNode's domain.
+ const MDNode *getDomain() const {
+ if (Node->getNumOperands() < 2)
+ return nullptr;
+ return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ }
+ StringRef getName() const {
+ if (Node->getNumOperands() > 2)
+ if (MDString *N = dyn_cast_or_null<MDString>(Node->getOperand(2)))
+ return N->getString();
+ return StringRef();
+ }
+};
+
/// Typed iterator through MDNode operands.
///
/// An iterator that transforms an \a MDNode::iterator into an iterator over a
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Module.h b/contrib/llvm-project/llvm/include/llvm/IR/Module.h
index 3f97d048f862..3664b275114d 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Module.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Module.h
@@ -329,10 +329,6 @@ public:
/// \see LLVMContext::getOperandBundleTagID
void getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const;
- /// Return the type with the specified name, or null if there is none by that
- /// name.
- StructType *getTypeByName(StringRef Name) const;
-
std::vector<StructType *> getIdentifiedStructTypes() const;
/// @}
@@ -854,12 +850,11 @@ public:
/// Returns profile summary metadata. When IsCS is true, use the context
/// sensitive profile summary.
- Metadata *getProfileSummary(bool IsCS);
+ Metadata *getProfileSummary(bool IsCS) const;
/// @}
/// Returns whether semantic interposition is to be respected.
bool getSemanticInterposition() const;
- bool noSemanticInterposition() const;
/// Set whether semantic interposition is to be respected.
void setSemanticInterposition(bool);
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h b/contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 12a829b14e36..d5a7ad63737a 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -562,12 +562,11 @@ public:
/// offsets from the beginning of the value that are passed.
struct Call {
uint64_t ParamNo = 0;
- GlobalValue::GUID Callee = 0;
+ ValueInfo Callee;
ConstantRange Offsets{/*BitWidth=*/RangeWidth, /*isFullSet=*/true};
Call() = default;
- Call(uint64_t ParamNo, GlobalValue::GUID Callee,
- const ConstantRange &Offsets)
+ Call(uint64_t ParamNo, ValueInfo Callee, const ConstantRange &Offsets)
: ParamNo(ParamNo), Callee(Callee), Offsets(Offsets) {}
};
@@ -597,7 +596,7 @@ public:
GlobalValue::LinkageTypes::AvailableExternallyLinkage,
/*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false,
/*CanAutoHide=*/false),
- /*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
+ /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
std::vector<ValueInfo>(), std::move(Edges),
std::vector<GlobalValue::GUID>(),
std::vector<FunctionSummary::VFuncId>(),
@@ -1061,6 +1060,9 @@ private:
// some were not. Set when the combined index is created during the thin link.
bool PartiallySplitLTOUnits = false;
+ /// True if some of the FunctionSummary contains a ParamAccess.
+ bool HasParamAccess = false;
+
std::set<std::string> CfiFunctionDefs;
std::set<std::string> CfiFunctionDecls;
@@ -1213,6 +1215,8 @@ public:
bool partiallySplitLTOUnits() const { return PartiallySplitLTOUnits; }
void setPartiallySplitLTOUnits() { PartiallySplitLTOUnits = true; }
+ bool hasParamAccess() const { return HasParamAccess; }
+
bool isGlobalValueLive(const GlobalValueSummary *GVS) const {
return !WithGlobalValueDeadStripping || GVS->isLive();
}
@@ -1284,6 +1288,8 @@ public:
/// Add a global value summary for the given ValueInfo.
void addGlobalValueSummary(ValueInfo VI,
std::unique_ptr<GlobalValueSummary> Summary) {
+ if (const FunctionSummary *FS = dyn_cast<FunctionSummary>(Summary.get()))
+ HasParamAccess |= !FS->paramAccesses().empty();
addOriginalName(VI.getGUID(), Summary->getOriginalName());
// Here we have a notionally const VI, but the value it points to is owned
// by the non-const *this.
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Operator.h b/contrib/llvm-project/llvm/include/llvm/IR/Operator.h
index acfacbd6c74e..945f7e46e142 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Operator.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Operator.h
@@ -568,6 +568,11 @@ public:
bool accumulateConstantOffset(
const DataLayout &DL, APInt &Offset,
function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr) const;
+
+ static bool accumulateConstantOffset(
+ Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL,
+ APInt &Offset,
+ function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr);
};
class PtrToIntOperator
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/OptBisect.h b/contrib/llvm-project/llvm/include/llvm/IR/OptBisect.h
index 1b2b0bd7acaa..6c2a1b01d897 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/OptBisect.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/OptBisect.h
@@ -15,6 +15,7 @@
#define LLVM_IR_OPTBISECT_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ManagedStatic.h"
namespace llvm {
@@ -32,7 +33,7 @@ public:
return true;
}
- /// isEnabled should return true before calling shouldRunPass
+ /// isEnabled() should return true before calling shouldRunPass().
virtual bool isEnabled() const { return false; }
};
@@ -55,6 +56,14 @@ public:
/// Checks the bisect limit to determine if the specified pass should run.
///
+ /// This forwards to checkPass().
+ bool shouldRunPass(const Pass *P, StringRef IRDescription) override;
+
+ /// isEnabled() should return true before calling shouldRunPass().
+ bool isEnabled() const override { return BisectEnabled; }
+
+ /// Checks the bisect limit to determine if the specified pass should run.
+ ///
/// If the bisect limit is set to -1, the function prints a message describing
/// the pass and the bisect number assigned to it and return true. Otherwise,
/// the function prints a message with the bisect number assigned to the
@@ -64,17 +73,16 @@ public:
/// Most passes should not call this routine directly. Instead, they are
/// called through helper routines provided by the pass base classes. For
/// instance, function passes should call FunctionPass::skipFunction().
- bool shouldRunPass(const Pass *P, StringRef IRDescription) override;
-
- /// isEnabled should return true before calling shouldRunPass
- bool isEnabled() const override { return BisectEnabled; }
-private:
bool checkPass(const StringRef PassName, const StringRef TargetDesc);
+private:
bool BisectEnabled = false;
unsigned LastBisectNum = 0;
};
+/// Singleton instance of the OptBisect class, so multiple pass managers don't
+/// need to coordinate their uses of OptBisect.
+extern ManagedStatic<OptBisect> OptBisector;
} // end namespace llvm
#endif // LLVM_IR_OPTBISECT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PassInstrumentation.h b/contrib/llvm-project/llvm/include/llvm/IR/PassInstrumentation.h
index bcc434548e67..291f324b159a 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/PassInstrumentation.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PassInstrumentation.h
@@ -44,10 +44,6 @@
/// of a pass. For those callbacks returning false means pass will not be
/// executed.
///
-/// TODO: currently there is no way for a pass to opt-out of execution control
-/// (e.g. become unskippable). PassManager is the only entity that determines
-/// how pass instrumentation affects pass execution.
-///
//===----------------------------------------------------------------------===//
#ifndef LLVM_IR_PASSINSTRUMENTATION_H
@@ -56,6 +52,7 @@
#include "llvm/ADT/Any.h"
#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
#include <type_traits>
namespace llvm {
@@ -71,13 +68,17 @@ public:
// to take them as constant pointers, wrapped with llvm::Any.
// For the case when IRUnit has been invalidated there is a different
// callback to use - AfterPassInvalidated.
+ // We call all BeforePassFuncs to determine if a pass should run or not.
+ // BeforeNonSkippedPassFuncs are called only if the pass should run.
// TODO: currently AfterPassInvalidated does not accept IRUnit, since passing
- // already invalidated IRUnit is unsafe. There are ways to handle invalidated IRUnits
- // in a safe way, and we might pursue that as soon as there is a useful instrumentation
- // that needs it.
+ // already invalidated IRUnit is unsafe. There are ways to handle invalidated
+ // IRUnits in a safe way, and we might pursue that as soon as there is a
+ // useful instrumentation that needs it.
using BeforePassFunc = bool(StringRef, Any);
- using AfterPassFunc = void(StringRef, Any);
- using AfterPassInvalidatedFunc = void(StringRef);
+ using BeforeSkippedPassFunc = void(StringRef, Any);
+ using BeforeNonSkippedPassFunc = void(StringRef, Any);
+ using AfterPassFunc = void(StringRef, Any, const PreservedAnalyses &);
+ using AfterPassInvalidatedFunc = void(StringRef, const PreservedAnalyses &);
using BeforeAnalysisFunc = void(StringRef, Any);
using AfterAnalysisFunc = void(StringRef, Any);
@@ -88,8 +89,19 @@ public:
PassInstrumentationCallbacks(const PassInstrumentationCallbacks &) = delete;
void operator=(const PassInstrumentationCallbacks &) = delete;
- template <typename CallableT> void registerBeforePassCallback(CallableT C) {
- BeforePassCallbacks.emplace_back(std::move(C));
+ template <typename CallableT>
+ void registerShouldRunOptionalPassCallback(CallableT C) {
+ ShouldRunOptionalPassCallbacks.emplace_back(std::move(C));
+ }
+
+ template <typename CallableT>
+ void registerBeforeSkippedPassCallback(CallableT C) {
+ BeforeSkippedPassCallbacks.emplace_back(std::move(C));
+ }
+
+ template <typename CallableT>
+ void registerBeforeNonSkippedPassCallback(CallableT C) {
+ BeforeNonSkippedPassCallbacks.emplace_back(std::move(C));
}
template <typename CallableT> void registerAfterPassCallback(CallableT C) {
@@ -111,17 +123,37 @@ public:
AfterAnalysisCallbacks.emplace_back(std::move(C));
}
+ /// Add a class name to pass name mapping for use by pass instrumentation.
+ void addClassToPassName(StringRef ClassName, StringRef PassName);
+ /// Get the pass name for a given pass class name.
+ StringRef getPassNameForClassName(StringRef ClassName);
+
private:
friend class PassInstrumentation;
- SmallVector<llvm::unique_function<BeforePassFunc>, 4> BeforePassCallbacks;
+ /// These are only run on passes that are not required. They return false when
+ /// an optional pass should be skipped.
+ SmallVector<llvm::unique_function<BeforePassFunc>, 4>
+ ShouldRunOptionalPassCallbacks;
+ /// These are run on passes that are skipped.
+ SmallVector<llvm::unique_function<BeforeSkippedPassFunc>, 4>
+ BeforeSkippedPassCallbacks;
+ /// These are run on passes that are about to be run.
+ SmallVector<llvm::unique_function<BeforeNonSkippedPassFunc>, 4>
+ BeforeNonSkippedPassCallbacks;
+ /// These are run on passes that have just run.
SmallVector<llvm::unique_function<AfterPassFunc>, 4> AfterPassCallbacks;
+ /// These are run passes that have just run on invalidated IR.
SmallVector<llvm::unique_function<AfterPassInvalidatedFunc>, 4>
AfterPassInvalidatedCallbacks;
+ /// These are run on analyses that are about to be run.
SmallVector<llvm::unique_function<BeforeAnalysisFunc>, 4>
BeforeAnalysisCallbacks;
+ /// These are run on analyses that have been run.
SmallVector<llvm::unique_function<AfterAnalysisFunc>, 4>
AfterAnalysisCallbacks;
+
+ StringMap<std::string> ClassToPassName;
};
/// This class provides instrumentation entry points for the Pass Manager,
@@ -129,6 +161,26 @@ private:
class PassInstrumentation {
PassInstrumentationCallbacks *Callbacks;
+ // Template argument PassT of PassInstrumentation::runBeforePass could be two
+ // kinds: (1) a regular pass inherited from PassInfoMixin (happen when
+ // creating a adaptor pass for a regular pass); (2) a type-erased PassConcept
+ // created from (1). Here we want to make case (1) skippable unconditionally
+ // since they are regular passes. We call PassConcept::isRequired to decide
+ // for case (2).
+ template <typename PassT>
+ using has_required_t = decltype(std::declval<PassT &>().isRequired());
+
+ template <typename PassT>
+ static std::enable_if_t<is_detected<has_required_t, PassT>::value, bool>
+ isRequired(const PassT &Pass) {
+ return Pass.isRequired();
+ }
+ template <typename PassT>
+ static std::enable_if_t<!is_detected<has_required_t, PassT>::value, bool>
+ isRequired(const PassT &Pass) {
+ return false;
+ }
+
public:
/// Callbacks object is not owned by PassInstrumentation, its life-time
/// should at least match the life-time of corresponding
@@ -139,15 +191,28 @@ public:
/// BeforePass instrumentation point - takes \p Pass instance to be executed
/// and constant reference to IR it operates on. \Returns true if pass is
- /// allowed to be executed.
+ /// allowed to be executed. These are only run on optional pass since required
+ /// passes must always be run. This allows these callbacks to print info when
+ /// they want to skip a pass.
template <typename IRUnitT, typename PassT>
bool runBeforePass(const PassT &Pass, const IRUnitT &IR) const {
if (!Callbacks)
return true;
bool ShouldRun = true;
- for (auto &C : Callbacks->BeforePassCallbacks)
- ShouldRun &= C(Pass.name(), llvm::Any(&IR));
+ if (!isRequired(Pass)) {
+ for (auto &C : Callbacks->ShouldRunOptionalPassCallbacks)
+ ShouldRun &= C(Pass.name(), llvm::Any(&IR));
+ }
+
+ if (ShouldRun) {
+ for (auto &C : Callbacks->BeforeNonSkippedPassCallbacks)
+ C(Pass.name(), llvm::Any(&IR));
+ } else {
+ for (auto &C : Callbacks->BeforeSkippedPassCallbacks)
+ C(Pass.name(), llvm::Any(&IR));
+ }
+
return ShouldRun;
}
@@ -155,20 +220,22 @@ public:
/// just been executed and constant reference to \p IR it operates on.
/// \p IR is guaranteed to be valid at this point.
template <typename IRUnitT, typename PassT>
- void runAfterPass(const PassT &Pass, const IRUnitT &IR) const {
+ void runAfterPass(const PassT &Pass, const IRUnitT &IR,
+ const PreservedAnalyses &PA) const {
if (Callbacks)
for (auto &C : Callbacks->AfterPassCallbacks)
- C(Pass.name(), llvm::Any(&IR));
+ C(Pass.name(), llvm::Any(&IR), PA);
}
/// AfterPassInvalidated instrumentation point - takes \p Pass instance
/// that has just been executed. For use when IR has been invalidated
/// by \p Pass execution.
template <typename IRUnitT, typename PassT>
- void runAfterPassInvalidated(const PassT &Pass) const {
+ void runAfterPassInvalidated(const PassT &Pass,
+ const PreservedAnalyses &PA) const {
if (Callbacks)
for (auto &C : Callbacks->AfterPassInvalidatedCallbacks)
- C(Pass.name());
+ C(Pass.name(), PA);
}
/// BeforeAnalysis instrumentation point - takes \p Analysis instance
@@ -199,8 +266,20 @@ public:
ExtraArgsT...) {
return false;
}
+
+ template <typename CallableT>
+ void pushBeforeNonSkippedPassCallback(CallableT C) {
+ if (Callbacks)
+ Callbacks->BeforeNonSkippedPassCallbacks.emplace_back(std::move(C));
+ }
+ void popBeforeNonSkippedPassCallback() {
+ if (Callbacks)
+ Callbacks->BeforeNonSkippedPassCallbacks.pop_back();
+ }
};
+bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials);
+
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PassManager.h b/contrib/llvm-project/llvm/include/llvm/IR/PassManager.h
index 4d5f292ba9a1..c669565aa33b 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/PassManager.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PassManager.h
@@ -38,6 +38,7 @@
#define LLVM_IR_PASSMANAGER_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
@@ -510,10 +511,6 @@ public:
if (!PI.runBeforePass<IRUnitT>(*P, IR))
continue;
- if (DebugLogging)
- dbgs() << "Running pass: " << P->name() << " on " << IR.getName()
- << "\n";
-
PreservedAnalyses PassPA;
{
TimeTraceScope TimeScope(P->name(), IR.getName());
@@ -522,7 +519,7 @@ public:
// Call onto PassInstrumentation's AfterPass callbacks immediately after
// running the pass.
- PI.runAfterPass<IRUnitT>(*P, IR);
+ PI.runAfterPass<IRUnitT>(*P, IR, PassPA);
// Update the analysis manager as each pass runs and potentially
// invalidates analyses.
@@ -551,7 +548,9 @@ public:
return PA;
}
- template <typename PassT> void addPass(PassT Pass) {
+ template <typename PassT>
+ std::enable_if_t<!std::is_same<PassT, PassManager>::value>
+ addPass(PassT Pass) {
using PassModelT =
detail::PassModel<IRUnitT, PassT, PreservedAnalyses, AnalysisManagerT,
ExtraArgTs...>;
@@ -559,7 +558,24 @@ public:
Passes.emplace_back(new PassModelT(std::move(Pass)));
}
-private:
+ /// When adding a pass manager pass that has the same type as this pass
+ /// manager, simply move the passes over. This is because we don't have use
+ /// cases rely on executing nested pass managers. Doing this could reduce
+ /// implementation complexity and avoid potential invalidation issues that may
+ /// happen with nested pass managers of the same type.
+ template <typename PassT>
+ std::enable_if_t<std::is_same<PassT, PassManager>::value>
+ addPass(PassT &&Pass) {
+ for (auto &P : Pass.Passes)
+ Passes.emplace_back(std::move(P));
+ }
+
+ /// Returns if the pass manager contains any passes.
+ bool isEmpty() const { return Passes.empty(); }
+
+ static bool isRequired() { return true; }
+
+protected:
using PassConceptT =
detail::PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...>;
@@ -649,7 +665,7 @@ public:
/// when any of its embedded analysis results end up invalidated. We pass an
/// \c Invalidator object as an argument to \c invalidate() in order to let
/// the analysis results themselves define the dependency graph on the fly.
- /// This lets us avoid building building an explicit representation of the
+ /// This lets us avoid building an explicit representation of the
/// dependencies between analysis results.
class Invalidator {
public:
@@ -844,7 +860,7 @@ public:
return true;
}
- /// Invalidate a specific analysis pass for an IR module.
+ /// Invalidate a specific analysis pass for an IR unit.
///
/// Note that the analysis result can disregard invalidation, if it determines
/// it is in fact still valid.
@@ -888,7 +904,7 @@ private:
return RI == AnalysisResults.end() ? nullptr : &*RI->second->second;
}
- /// Invalidate a function pass result.
+ /// Invalidate a pass result for a IR unit.
void invalidateImpl(AnalysisKey *ID, IRUnitT &IR) {
typename AnalysisResultMapT::iterator RI =
AnalysisResults.find({ID, &IR});
@@ -902,20 +918,20 @@ private:
AnalysisResults.erase(RI);
}
- /// Map type from module analysis pass ID to pass concept pointer.
+ /// Map type from analysis pass ID to pass concept pointer.
using AnalysisPassMapT =
DenseMap<AnalysisKey *, std::unique_ptr<PassConceptT>>;
- /// Collection of module analysis passes, indexed by ID.
+ /// Collection of analysis passes, indexed by ID.
AnalysisPassMapT AnalysisPasses;
- /// Map from function to a list of function analysis results.
+ /// Map from IR unit to a list of analysis results.
///
- /// Provides linear time removal of all analysis results for a function and
+ /// Provides linear time removal of all analysis results for a IR unit and
/// the ultimate storage for a particular cached analysis result.
AnalysisResultListMapT AnalysisResultLists;
- /// Map from an analysis ID and function to a particular cached
+ /// Map from an analysis ID and IR unit to a particular cached
/// analysis result.
AnalysisResultMapT AnalysisResults;
@@ -1059,7 +1075,16 @@ extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
///
/// This proxy only exposes the const interface of the outer analysis manager,
/// to indicate that you cannot cause an outer analysis to run from within an
-/// inner pass. Instead, you must rely on the \c getCachedResult API.
+/// inner pass. Instead, you must rely on the \c getCachedResult API. This is
+/// due to keeping potential future concurrency in mind. To give an example,
+/// running a module analysis before any function passes may give a different
+/// result than running it in a function pass. Both may be valid, but it would
+/// produce non-deterministic results. GlobalsAA is a good analysis example,
+/// because the cached information has the mod/ref info for all memory for each
+/// function at the time the analysis was computed. The information is still
+/// valid after a function transformation, but it may be *different* if
+/// recomputed after that transform. GlobalsAA is never invalidated.
+
///
/// This proxy doesn't manage invalidation in any way -- that is handled by the
/// recursive return path of each layer of the pass manager. A consequence of
@@ -1104,9 +1129,9 @@ public:
for (auto &KeyValuePair : OuterAnalysisInvalidationMap) {
AnalysisKey *OuterID = KeyValuePair.first;
auto &InnerIDs = KeyValuePair.second;
- InnerIDs.erase(llvm::remove_if(InnerIDs, [&](AnalysisKey *InnerID) {
- return Inv.invalidate(InnerID, IRUnit, PA); }),
- InnerIDs.end());
+ llvm::erase_if(InnerIDs, [&](AnalysisKey *InnerID) {
+ return Inv.invalidate(InnerID, IRUnit, PA);
+ });
if (InnerIDs.empty())
DeadKeys.push_back(OuterID);
}
@@ -1130,9 +1155,7 @@ public:
// analyses that all trigger invalidation on the same outer analysis,
// this entire system should be changed to some other deterministic
// data structure such as a `SetVector` of a pair of pointers.
- auto InvalidatedIt = std::find(InvalidatedIDList.begin(),
- InvalidatedIDList.end(), InvalidatedID);
- if (InvalidatedIt == InvalidatedIDList.end())
+ if (!llvm::is_contained(InvalidatedIDList, InvalidatedID))
InvalidatedIDList.push_back(InvalidatedID);
}
@@ -1205,71 +1228,34 @@ using ModuleAnalysisManagerFunctionProxy =
/// Note that although function passes can access module analyses, module
/// analyses are not invalidated while the function passes are running, so they
/// may be stale. Function analyses will not be stale.
-template <typename FunctionPassT>
class ModuleToFunctionPassAdaptor
- : public PassInfoMixin<ModuleToFunctionPassAdaptor<FunctionPassT>> {
+ : public PassInfoMixin<ModuleToFunctionPassAdaptor> {
public:
- explicit ModuleToFunctionPassAdaptor(FunctionPassT Pass)
+ using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>;
+
+ explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass)
: Pass(std::move(Pass)) {}
/// Runs the function pass across every function in the module.
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
-
- // Request PassInstrumentation from analysis manager, will use it to run
- // instrumenting callbacks for the passes later.
- PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
-
- PreservedAnalyses PA = PreservedAnalyses::all();
- for (Function &F : M) {
- if (F.isDeclaration())
- continue;
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
- // Check the PassInstrumentation's BeforePass callbacks before running the
- // pass, skip its execution completely if asked to (callback returns
- // false).
- if (!PI.runBeforePass<Function>(Pass, F))
- continue;
-
- PreservedAnalyses PassPA;
- {
- TimeTraceScope TimeScope(Pass.name(), F.getName());
- PassPA = Pass.run(F, FAM);
- }
-
- PI.runAfterPass(Pass, F);
-
- // We know that the function pass couldn't have invalidated any other
- // function's analyses (that's the contract of a function pass), so
- // directly handle the function analysis manager's invalidation here.
- FAM.invalidate(F, PassPA);
-
- // Then intersect the preserved set so that invalidation of module
- // analyses will eventually occur when the module pass completes.
- PA.intersect(std::move(PassPA));
- }
-
- // The FunctionAnalysisManagerModuleProxy is preserved because (we assume)
- // the function passes we ran didn't add or remove any functions.
- //
- // We also preserve all analyses on Functions, because we did all the
- // invalidation we needed to do above.
- PA.preserveSet<AllAnalysesOn<Function>>();
- PA.preserve<FunctionAnalysisManagerModuleProxy>();
- return PA;
- }
+ static bool isRequired() { return true; }
private:
- FunctionPassT Pass;
+ std::unique_ptr<PassConceptT> Pass;
};
/// A function to deduce a function pass type and wrap it in the
/// templated adaptor.
template <typename FunctionPassT>
-ModuleToFunctionPassAdaptor<FunctionPassT>
+ModuleToFunctionPassAdaptor
createModuleToFunctionPassAdaptor(FunctionPassT Pass) {
- return ModuleToFunctionPassAdaptor<FunctionPassT>(std::move(Pass));
+ using PassModelT =
+ detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
+ FunctionAnalysisManager>;
+
+ return ModuleToFunctionPassAdaptor(
+ std::make_unique<PassModelT>(std::move(Pass)));
}
/// A utility pass template to force an analysis result to be available.
@@ -1300,6 +1286,7 @@ struct RequireAnalysisPass
return PreservedAnalyses::all();
}
+ static bool isRequired() { return true; }
};
/// A no-op pass template which simply forces a specific analysis result
@@ -1360,8 +1347,9 @@ public:
// false).
if (!PI.runBeforePass<IRUnitT>(P, IR))
continue;
- PA.intersect(P.run(IR, AM, std::forward<Ts>(Args)...));
- PI.runAfterPass(P, IR);
+ PreservedAnalyses IterPA = P.run(IR, AM, std::forward<Ts>(Args)...);
+ PA.intersect(IterPA);
+ PI.runAfterPass(P, IR, IterPA);
}
return PA;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PassManagerImpl.h b/contrib/llvm-project/llvm/include/llvm/IR/PassManagerImpl.h
index 978655ac69c4..71a86d1efb15 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/PassManagerImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PassManagerImpl.h
@@ -64,9 +64,6 @@ AnalysisManager<IRUnitT, ExtraArgTs...>::getResultImpl(
// run it to produce a result, which we then add to the cache.
if (Inserted) {
auto &P = this->lookUpPass(ID);
- if (DebugLogging)
- dbgs() << "Running analysis: " << P.name() << " on " << IR.getName()
- << "\n";
PassInstrumentation PI;
if (ID != PassInstrumentationAnalysis::ID()) {
@@ -97,10 +94,6 @@ inline void AnalysisManager<IRUnitT, ExtraArgTs...>::invalidate(
if (PA.allAnalysesInSetPreserved<AllAnalysesOn<IRUnitT>>())
return;
- if (DebugLogging)
- dbgs() << "Invalidating all non-preserved analyses for: " << IR.getName()
- << "\n";
-
// Track whether each analysis's result is invalidated in
// IsResultInvalidated.
SmallDenseMap<AnalysisKey *, bool, 8> IsResultInvalidated;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h b/contrib/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h
index c602c0b5cc20..986ed0b5a7ac 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h
@@ -48,6 +48,12 @@ struct PassConcept {
/// Polymorphic method to access the name of a pass.
virtual StringRef name() const = 0;
+
+ /// Polymorphic method to to let a pass optionally exempted from skipping by
+ /// PassInstrumentation.
+ /// To opt-in, pass should implement `static bool isRequired()`. It's no-op
+ /// to have `isRequired` always return false since that is the default.
+ virtual bool isRequired() const = 0;
};
/// A template wrapper used to implement the polymorphic API.
@@ -81,6 +87,22 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> {
StringRef name() const override { return PassT::name(); }
+ template <typename T>
+ using has_required_t = decltype(std::declval<T &>().isRequired());
+
+ template <typename T>
+ static std::enable_if_t<is_detected<has_required_t, T>::value, bool>
+ passIsRequiredImpl() {
+ return T::isRequired();
+ }
+ template <typename T>
+ static std::enable_if_t<!is_detected<has_required_t, T>::value, bool>
+ passIsRequiredImpl() {
+ return false;
+ }
+
+ bool isRequired() const override { return passIsRequiredImpl<PassT>(); }
+
PassT Pass;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PassTimingInfo.h b/contrib/llvm-project/llvm/include/llvm/IR/PassTimingInfo.h
index b70850fd64d7..e44321b4af66 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/PassTimingInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PassTimingInfo.h
@@ -17,11 +17,13 @@
#include "llvm/ADT/Any.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Timer.h"
-#include "llvm/Support/TypeName.h"
#include <memory>
+#include <utility>
+
namespace llvm {
class Pass;
@@ -36,11 +38,6 @@ void reportAndResetTimings(raw_ostream *OutStream = nullptr);
/// Request the timer for this legacy-pass-manager's pass instance.
Timer *getPassTimer(Pass *);
-/// If the user specifies the -time-passes argument on an LLVM tool command line
-/// then the value of this boolean will be true, otherwise false.
-/// This is the storage for the -time-passes option.
-extern bool TimePassesIsEnabled;
-
/// This class implements -time-passes functionality for new pass manager.
/// It provides the pass-instrumentation callbacks that measure the pass
/// execution time. They collect timing info into individual timers as
@@ -68,9 +65,11 @@ class TimePassesHandler {
raw_ostream *OutStream = nullptr;
bool Enabled;
+ bool PerRun;
public:
- TimePassesHandler(bool Enabled = TimePassesIsEnabled);
+ TimePassesHandler();
+ TimePassesHandler(bool Enabled, bool PerRun = false);
/// Destructor handles the print action if it has not been handled before.
~TimePassesHandler() { print(); }
@@ -98,7 +97,7 @@ private:
void stopTimer(StringRef PassID);
// Implementation of pass instrumentation callbacks.
- bool runBeforePass(StringRef PassID);
+ void runBeforePass(StringRef PassID);
void runAfterPass(StringRef PassID);
};
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h b/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h
index 4c11bc82510b..166ad23de969 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h
@@ -88,16 +88,29 @@ inline class_match<BinaryOperator> m_BinOp() {
/// Matches any compare instruction and ignore it.
inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
+/// Match an arbitrary undef constant.
+inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
+
+/// Match an arbitrary poison constant.
+inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); }
+
+/// Match an arbitrary Constant and ignore it.
+inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
+
/// Match an arbitrary ConstantInt and ignore it.
inline class_match<ConstantInt> m_ConstantInt() {
return class_match<ConstantInt>();
}
-/// Match an arbitrary undef constant.
-inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
+/// Match an arbitrary ConstantFP and ignore it.
+inline class_match<ConstantFP> m_ConstantFP() {
+ return class_match<ConstantFP>();
+}
-/// Match an arbitrary Constant and ignore it.
-inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
+/// Match an arbitrary ConstantExpr and ignore it.
+inline class_match<ConstantExpr> m_ConstantExpr() {
+ return class_match<ConstantExpr>();
+}
/// Match an arbitrary basic block value and ignore it.
inline class_match<BasicBlock> m_BasicBlock() {
@@ -335,6 +348,33 @@ template <typename Predicate> struct api_pred_ty : public Predicate {
}
};
+/// This helper class is used to match scalar and vector constants that
+/// satisfy a specified predicate, and bind them to an APFloat.
+/// Undefs are allowed in splat vector constants.
+template <typename Predicate> struct apf_pred_ty : public Predicate {
+ const APFloat *&Res;
+
+ apf_pred_ty(const APFloat *&R) : Res(R) {}
+
+ template <typename ITy> bool match(ITy *V) {
+ if (const auto *CI = dyn_cast<ConstantFP>(V))
+ if (this->isValue(CI->getValue())) {
+ Res = &CI->getValue();
+ return true;
+ }
+ if (V->getType()->isVectorTy())
+ if (const auto *C = dyn_cast<Constant>(V))
+ if (auto *CI = dyn_cast_or_null<ConstantFP>(
+ C->getSplatValue(/* AllowUndef */ true)))
+ if (this->isValue(CI->getValue())) {
+ Res = &CI->getValue();
+ return true;
+ }
+
+ return false;
+ }
+};
+
///////////////////////////////////////////////////////////////////////////////
//
// Encapsulate constant value queries for use in templated predicate matchers.
@@ -555,6 +595,15 @@ inline cstfp_pred_ty<is_nan> m_NaN() {
return cstfp_pred_ty<is_nan>();
}
+struct is_nonnan {
+ bool isValue(const APFloat &C) { return !C.isNaN(); }
+};
+/// Match a non-NaN FP constant.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_nonnan> m_NonNaN() {
+ return cstfp_pred_ty<is_nonnan>();
+}
+
struct is_inf {
bool isValue(const APFloat &C) { return C.isInfinity(); }
};
@@ -564,6 +613,37 @@ inline cstfp_pred_ty<is_inf> m_Inf() {
return cstfp_pred_ty<is_inf>();
}
+struct is_noninf {
+ bool isValue(const APFloat &C) { return !C.isInfinity(); }
+};
+/// Match a non-infinity FP constant, i.e. finite or NaN.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_noninf> m_NonInf() {
+ return cstfp_pred_ty<is_noninf>();
+}
+
+struct is_finite {
+ bool isValue(const APFloat &C) { return C.isFinite(); }
+};
+/// Match a finite FP constant, i.e. not infinity or NaN.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_finite> m_Finite() {
+ return cstfp_pred_ty<is_finite>();
+}
+inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; }
+
+struct is_finitenonzero {
+ bool isValue(const APFloat &C) { return C.isFiniteNonZero(); }
+};
+/// Match a finite non-zero FP constant.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() {
+ return cstfp_pred_ty<is_finitenonzero>();
+}
+inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) {
+ return V;
+}
+
struct is_any_zero_fp {
bool isValue(const APFloat &C) { return C.isZero(); }
};
@@ -591,6 +671,15 @@ inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
return cstfp_pred_ty<is_neg_zero_fp>();
}
+struct is_non_zero_fp {
+ bool isValue(const APFloat &C) { return C.isNonZero(); }
+};
+/// Match a floating-point non-zero.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() {
+ return cstfp_pred_ty<is_non_zero_fp>();
+}
+
///////////////////////////////////////////////////////////////////////////////
template <typename Class> struct bind_ty {
@@ -620,21 +709,38 @@ inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
/// Match a with overflow intrinsic, capturing it if we match.
inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
-/// Match a ConstantInt, capturing the value if we match.
-inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
-
/// Match a Constant, capturing the value if we match.
inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
+/// Match a ConstantInt, capturing the value if we match.
+inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
+
/// Match a ConstantFP, capturing the value if we match.
inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
+/// Match a ConstantExpr, capturing the value if we match.
+inline bind_ty<ConstantExpr> m_ConstantExpr(ConstantExpr *&C) { return C; }
+
/// Match a basic block value, capturing it if we match.
inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
return V;
}
+/// Match an arbitrary immediate Constant and ignore it.
+inline match_combine_and<class_match<Constant>,
+ match_unless<class_match<ConstantExpr>>>
+m_ImmConstant() {
+ return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr()));
+}
+
+/// Match an immediate Constant, capturing the value if we match.
+inline match_combine_and<bind_ty<Constant>,
+ match_unless<class_match<ConstantExpr>>>
+m_ImmConstant(Constant *&C) {
+ return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr()));
+}
+
/// Match a specified Value*.
struct specificval_ty {
const Value *Val;
@@ -705,6 +811,7 @@ struct bind_const_intval_ty {
/// Match a specified integer value or vector of all elements of that
/// value.
+template <bool AllowUndefs>
struct specific_intval {
APInt Val;
@@ -714,7 +821,7 @@ struct specific_intval {
const auto *CI = dyn_cast<ConstantInt>(V);
if (!CI && V->getType()->isVectorTy())
if (const auto *C = dyn_cast<Constant>(V))
- CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue());
+ CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs));
return CI && APInt::isSameValue(CI->getValue(), Val);
}
@@ -722,14 +829,22 @@ struct specific_intval {
/// Match a specific integer value or vector with all elements equal to
/// the value.
-inline specific_intval m_SpecificInt(APInt V) {
- return specific_intval(std::move(V));
+inline specific_intval<false> m_SpecificInt(APInt V) {
+ return specific_intval<false>(std::move(V));
}
-inline specific_intval m_SpecificInt(uint64_t V) {
+inline specific_intval<false> m_SpecificInt(uint64_t V) {
return m_SpecificInt(APInt(64, V));
}
+inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) {
+ return specific_intval<true>(std::move(V));
+}
+
+inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) {
+ return m_SpecificIntAllowUndef(APInt(64, V));
+}
+
/// Match a ConstantInt and bind to its value. This does not match
/// ConstantInts wider than 64-bits.
inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
@@ -1442,6 +1557,12 @@ inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
}
+/// Matches IntToPtr.
+template <typename OpTy>
+inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) {
+ return CastClass_match<OpTy, Instruction::IntToPtr>(Op);
+}
+
/// Matches Trunc.
template <typename OpTy>
inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
@@ -1590,6 +1711,17 @@ struct MaxMin_match {
MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
template <typename OpTy> bool match(OpTy *V) {
+ if (auto *II = dyn_cast<IntrinsicInst>(V)) {
+ Intrinsic::ID IID = II->getIntrinsicID();
+ if ((IID == Intrinsic::smax && Pred_t::match(ICmpInst::ICMP_SGT)) ||
+ (IID == Intrinsic::smin && Pred_t::match(ICmpInst::ICMP_SLT)) ||
+ (IID == Intrinsic::umax && Pred_t::match(ICmpInst::ICMP_UGT)) ||
+ (IID == Intrinsic::umin && Pred_t::match(ICmpInst::ICMP_ULT))) {
+ Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
+ return (L.match(LHS) && R.match(RHS)) ||
+ (Commutable && L.match(RHS) && R.match(LHS));
+ }
+ }
// Look for "(x pred y) ? x : y" or "(x pred y) ? y : x".
auto *SI = dyn_cast<SelectInst>(V);
if (!SI)
@@ -1697,6 +1829,17 @@ inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L,
return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
}
+template <typename LHS, typename RHS>
+inline match_combine_or<
+ match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>,
+ MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>>,
+ match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>,
+ MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>>>
+m_MaxOrMin(const LHS &L, const RHS &R) {
+ return m_CombineOr(m_CombineOr(m_SMax(L, R), m_SMin(L, R)),
+ m_CombineOr(m_UMax(L, R), m_UMin(L, R)));
+}
+
/// Match an 'ordered' floating point maximum function.
/// Floating point has one special value 'NaN'. Therefore, there is no total
/// order. However, if we can ignore the 'NaN' value (for example, because of a
@@ -1987,6 +2130,18 @@ inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
}
+template <typename Opnd0, typename Opnd1, typename Opnd2>
+inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
+m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
+ return m_Intrinsic<Intrinsic::fshl>(Op0, Op1, Op2);
+}
+
+template <typename Opnd0, typename Opnd1, typename Opnd2>
+inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
+m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
+ return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2);
+}
+
//===----------------------------------------------------------------------===//
// Matchers for two-operands operators with the operators in either order
//
@@ -2048,6 +2203,15 @@ m_Neg(const ValTy &V) {
return m_Sub(m_ZeroInt(), V);
}
+/// Matches a 'Neg' as 'sub nsw 0, V'.
+template <typename ValTy>
+inline OverflowingBinaryOp_match<cst_pred_ty<is_zero_int>, ValTy,
+ Instruction::Sub,
+ OverflowingBinaryOperator::NoSignedWrap>
+m_NSWNeg(const ValTy &V) {
+ return m_NSWSub(m_ZeroInt(), V);
+}
+
/// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
template <typename ValTy>
inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true>
@@ -2080,6 +2244,17 @@ m_c_UMax(const LHS &L, const RHS &R) {
return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R);
}
+template <typename LHS, typename RHS>
+inline match_combine_or<
+ match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>,
+ MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>>,
+ match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>,
+ MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>>>
+m_c_MaxOrMin(const LHS &L, const RHS &R) {
+ return m_CombineOr(m_CombineOr(m_c_SMax(L, R), m_c_SMin(L, R)),
+ m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R)));
+}
+
/// Matches FAdd with LHS and RHS in either order.
template <typename LHS, typename RHS>
inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
@@ -2153,6 +2328,29 @@ inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) {
return ExtractValue_match<Ind, Val_t>(V);
}
+/// Matcher for a single index InsertValue instruction.
+template <int Ind, typename T0, typename T1> struct InsertValue_match {
+ T0 Op0;
+ T1 Op1;
+
+ InsertValue_match(const T0 &Op0, const T1 &Op1) : Op0(Op0), Op1(Op1) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ if (auto *I = dyn_cast<InsertValueInst>(V)) {
+ return Op0.match(I->getOperand(0)) && Op1.match(I->getOperand(1)) &&
+ I->getNumIndices() == 1 && Ind == I->getIndices()[0];
+ }
+ return false;
+ }
+};
+
+/// Matches a single index InsertValue instruction.
+template <int Ind, typename Val_t, typename Elt_t>
+inline InsertValue_match<Ind, Val_t, Elt_t> m_InsertValue(const Val_t &Val,
+ const Elt_t &Elt) {
+ return InsertValue_match<Ind, Val_t, Elt_t>(Val, Elt);
+}
+
/// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or
/// the constant expression
/// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>`
@@ -2189,6 +2387,58 @@ inline VScaleVal_match m_VScale(const DataLayout &DL) {
return VScaleVal_match(DL);
}
+template <typename LHS, typename RHS, unsigned Opcode>
+struct LogicalOp_match {
+ LHS L;
+ RHS R;
+
+ LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {}
+
+ template <typename T> bool match(T *V) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (!I->getType()->isIntOrIntVectorTy(1))
+ return false;
+
+ if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) &&
+ R.match(I->getOperand(1)))
+ return true;
+
+ if (auto *SI = dyn_cast<SelectInst>(I)) {
+ if (Opcode == Instruction::And) {
+ if (const auto *C = dyn_cast<Constant>(SI->getFalseValue()))
+ if (C->isNullValue() && L.match(SI->getCondition()) &&
+ R.match(SI->getTrueValue()))
+ return true;
+ } else {
+ assert(Opcode == Instruction::Or);
+ if (const auto *C = dyn_cast<Constant>(SI->getTrueValue()))
+ if (C->isOneValue() && L.match(SI->getCondition()) &&
+ R.match(SI->getFalseValue()))
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+};
+
+/// Matches L && R either in the form of L & R or L ? R : false.
+/// Note that the latter form is poison-blocking.
+template <typename LHS, typename RHS>
+inline LogicalOp_match<LHS, RHS, Instruction::And>
+m_LogicalAnd(const LHS &L, const RHS &R) {
+ return LogicalOp_match<LHS, RHS, Instruction::And>(L, R);
+}
+
+/// Matches L || R either in the form of L | R or L ? true : R.
+/// Note that the latter form is poison-blocking.
+template <typename LHS, typename RHS>
+inline LogicalOp_match<LHS, RHS, Instruction::Or>
+m_LogicalOr(const LHS &L, const RHS &R) {
+ return LogicalOp_match<LHS, RHS, Instruction::Or>(L, R);
+}
+
} // end namespace PatternMatch
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PredIteratorCache.h b/contrib/llvm-project/llvm/include/llvm/IR/PredIteratorCache.h
index cc835277910b..6bbd7e5e87a0 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/PredIteratorCache.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PredIteratorCache.h
@@ -44,7 +44,7 @@ private:
if (Entry)
return Entry;
- SmallVector<BasicBlock *, 32> PredCache(pred_begin(BB), pred_end(BB));
+ SmallVector<BasicBlock *, 32> PredCache(predecessors(BB));
PredCache.push_back(nullptr); // null terminator.
BlockToPredCountMap[BB] = PredCache.size() - 1;
@@ -58,7 +58,7 @@ private:
auto Result = BlockToPredCountMap.find(BB);
if (Result != BlockToPredCountMap.end())
return Result->second;
- return BlockToPredCountMap[BB] = std::distance(pred_begin(BB), pred_end(BB));
+ return BlockToPredCountMap[BB] = pred_size(BB);
}
public:
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PrintPasses.h b/contrib/llvm-project/llvm/include/llvm/IR/PrintPasses.h
new file mode 100644
index 000000000000..1fa7c1893e20
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PrintPasses.h
@@ -0,0 +1,44 @@
+//===- PrintPasses.h - Determining whether/when to print IR ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_PRINTPASSES_H
+#define LLVM_IR_PRINTPASSES_H
+
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+
+namespace llvm {
+
+// Returns true if printing before/after some pass is enabled, whether all
+// passes or a specific pass.
+bool shouldPrintBeforeSomePass();
+bool shouldPrintAfterSomePass();
+
+// Returns true if we should print before/after a specific pass. The argument
+// should be the pass ID, e.g. "instcombine".
+bool shouldPrintBeforePass(StringRef PassID);
+bool shouldPrintAfterPass(StringRef PassID);
+
+// Returns true if we should print before/after all passes.
+bool shouldPrintBeforeAll();
+bool shouldPrintAfterAll();
+
+// The list of passes to print before/after, if we only want to print
+// before/after specific passes.
+std::vector<std::string> printBeforePasses();
+std::vector<std::string> printAfterPasses();
+
+// Returns true if we should always print the entire module.
+bool forcePrintModuleIR();
+
+// Returns true if we should print the function.
+bool isFunctionInPrintList(StringRef FunctionName);
+
+} // namespace llvm
+
+#endif // LLVM_IR_PRINTPASSES_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PseudoProbe.h b/contrib/llvm-project/llvm/include/llvm/IR/PseudoProbe.h
new file mode 100644
index 000000000000..5165e80caa2d
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PseudoProbe.h
@@ -0,0 +1,87 @@
+//===- PseudoProbe.h - Pseudo Probe IR Helpers ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Pseudo probe IR intrinsic and dwarf discriminator manipulation routines.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_PSEUDOPROBE_H
+#define LLVM_IR_PSEUDOPROBE_H
+
+#include "llvm/ADT/Optional.h"
+#include <cassert>
+#include <cstdint>
+#include <limits>
+
+namespace llvm {
+
+class Instruction;
+class BasicBlock;
+
+constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
+
+enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
+
+// The saturated distrution factor representing 100% for block probes.
+constexpr static uint64_t PseudoProbeFullDistributionFactor =
+ std::numeric_limits<uint64_t>::max();
+
+struct PseudoProbeDwarfDiscriminator {
+public:
+ // The following APIs encodes/decodes per-probe information to/from a
+ // 32-bit integer which is organized as:
+ // [2:0] - 0x7, this is reserved for regular discriminator,
+ // see DWARF discriminator encoding rule
+ // [18:3] - probe id
+ // [25:19] - probe distribution factor
+ // [28:26] - probe type, see PseudoProbeType
+ // [31:29] - reserved for probe attributes
+ static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags,
+ uint32_t Factor) {
+ assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16");
+ assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7");
+ assert(Flags <= 0x7);
+ assert(Factor <= 100 &&
+ "Probe distribution factor too big to encode, exceeding 100");
+ return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7;
+ }
+
+ static uint32_t extractProbeIndex(uint32_t Value) {
+ return (Value >> 3) & 0xFFFF;
+ }
+
+ static uint32_t extractProbeType(uint32_t Value) {
+ return (Value >> 26) & 0x7;
+ }
+
+ static uint32_t extractProbeAttributes(uint32_t Value) {
+ return (Value >> 29) & 0x7;
+ }
+
+ static uint32_t extractProbeFactor(uint32_t Value) {
+ return (Value >> 19) & 0x7F;
+ }
+
+ // The saturated distrution factor representing 100% for callsites.
+ constexpr static uint8_t FullDistributionFactor = 100;
+};
+
+struct PseudoProbe {
+ uint32_t Id;
+ uint32_t Type;
+ uint32_t Attr;
+ float Factor;
+};
+
+Optional<PseudoProbe> extractProbe(const Instruction &Inst);
+
+void setProbeDistributionFactor(Instruction &Inst, float Factor);
+
+} // end namespace llvm
+
+#endif // LLVM_IR_PSEUDOPROBE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/ReplaceConstant.h b/contrib/llvm-project/llvm/include/llvm/IR/ReplaceConstant.h
new file mode 100644
index 000000000000..753f6d558ef8
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/ReplaceConstant.h
@@ -0,0 +1,28 @@
+//===- ReplaceConstant.h - Replacing LLVM constant expressions --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the utility function for replacing LLVM constant
+// expressions by instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_REPLACECONSTANT_H
+#define LLVM_IR_REPLACECONSTANT_H
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instruction.h"
+
+namespace llvm {
+
+/// Create a replacement instruction for constant expression \p CE and insert
+/// it before \p Instr.
+Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr);
+
+} // end namespace llvm
+
+#endif // LLVM_IR_REPLACECONSTANT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def b/contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def
index 903db6c70498..c73172612b1e 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -286,7 +286,9 @@ HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq")
HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
+HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2")
HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
+HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2")
HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee")
HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2")
@@ -301,6 +303,9 @@ HANDLE_LIBCALL(FPROUND_F80_F64, "__truncxfdf2")
HANDLE_LIBCALL(FPROUND_F128_F64, "__trunctfdf2")
HANDLE_LIBCALL(FPROUND_PPCF128_F64, "__gcc_qtod")
HANDLE_LIBCALL(FPROUND_F128_F80, "__trunctfxf2")
+HANDLE_LIBCALL(FPTOSINT_F16_I32, "__fixhfsi")
+HANDLE_LIBCALL(FPTOSINT_F16_I64, "__fixhfdi")
+HANDLE_LIBCALL(FPTOSINT_F16_I128, "__fixhfti")
HANDLE_LIBCALL(FPTOSINT_F32_I32, "__fixsfsi")
HANDLE_LIBCALL(FPTOSINT_F32_I64, "__fixsfdi")
HANDLE_LIBCALL(FPTOSINT_F32_I128, "__fixsfti")
@@ -316,6 +321,9 @@ HANDLE_LIBCALL(FPTOSINT_F128_I128, "__fixtfti")
HANDLE_LIBCALL(FPTOSINT_PPCF128_I32, "__gcc_qtou")
HANDLE_LIBCALL(FPTOSINT_PPCF128_I64, "__fixtfdi")
HANDLE_LIBCALL(FPTOSINT_PPCF128_I128, "__fixtfti")
+HANDLE_LIBCALL(FPTOUINT_F16_I32, "__fixunshfsi")
+HANDLE_LIBCALL(FPTOUINT_F16_I64, "__fixunshfdi")
+HANDLE_LIBCALL(FPTOUINT_F16_I128, "__fixunshfti")
HANDLE_LIBCALL(FPTOUINT_F32_I32, "__fixunssfsi")
HANDLE_LIBCALL(FPTOUINT_F32_I64, "__fixunssfdi")
HANDLE_LIBCALL(FPTOUINT_F32_I128, "__fixunssfti")
@@ -331,31 +339,37 @@ HANDLE_LIBCALL(FPTOUINT_F128_I128, "__fixunstfti")
HANDLE_LIBCALL(FPTOUINT_PPCF128_I32, "__fixunstfsi")
HANDLE_LIBCALL(FPTOUINT_PPCF128_I64, "__fixunstfdi")
HANDLE_LIBCALL(FPTOUINT_PPCF128_I128, "__fixunstfti")
+HANDLE_LIBCALL(SINTTOFP_I32_F16, "__floatsihf")
HANDLE_LIBCALL(SINTTOFP_I32_F32, "__floatsisf")
HANDLE_LIBCALL(SINTTOFP_I32_F64, "__floatsidf")
HANDLE_LIBCALL(SINTTOFP_I32_F80, "__floatsixf")
HANDLE_LIBCALL(SINTTOFP_I32_F128, "__floatsitf")
HANDLE_LIBCALL(SINTTOFP_I32_PPCF128, "__gcc_itoq")
+HANDLE_LIBCALL(SINTTOFP_I64_F16, "__floatdihf")
HANDLE_LIBCALL(SINTTOFP_I64_F32, "__floatdisf")
HANDLE_LIBCALL(SINTTOFP_I64_F64, "__floatdidf")
HANDLE_LIBCALL(SINTTOFP_I64_F80, "__floatdixf")
HANDLE_LIBCALL(SINTTOFP_I64_F128, "__floatditf")
HANDLE_LIBCALL(SINTTOFP_I64_PPCF128, "__floatditf")
+HANDLE_LIBCALL(SINTTOFP_I128_F16, "__floattihf")
HANDLE_LIBCALL(SINTTOFP_I128_F32, "__floattisf")
HANDLE_LIBCALL(SINTTOFP_I128_F64, "__floattidf")
HANDLE_LIBCALL(SINTTOFP_I128_F80, "__floattixf")
HANDLE_LIBCALL(SINTTOFP_I128_F128, "__floattitf")
HANDLE_LIBCALL(SINTTOFP_I128_PPCF128, "__floattitf")
+HANDLE_LIBCALL(UINTTOFP_I32_F16, "__floatunsihf")
HANDLE_LIBCALL(UINTTOFP_I32_F32, "__floatunsisf")
HANDLE_LIBCALL(UINTTOFP_I32_F64, "__floatunsidf")
HANDLE_LIBCALL(UINTTOFP_I32_F80, "__floatunsixf")
HANDLE_LIBCALL(UINTTOFP_I32_F128, "__floatunsitf")
HANDLE_LIBCALL(UINTTOFP_I32_PPCF128, "__gcc_utoq")
+HANDLE_LIBCALL(UINTTOFP_I64_F16, "__floatundihf")
HANDLE_LIBCALL(UINTTOFP_I64_F32, "__floatundisf")
HANDLE_LIBCALL(UINTTOFP_I64_F64, "__floatundidf")
HANDLE_LIBCALL(UINTTOFP_I64_F80, "__floatundixf")
HANDLE_LIBCALL(UINTTOFP_I64_F128, "__floatunditf")
HANDLE_LIBCALL(UINTTOFP_I64_PPCF128, "__floatunditf")
+HANDLE_LIBCALL(UINTTOFP_I128_F16, "__floatuntihf")
HANDLE_LIBCALL(UINTTOFP_I128_F32, "__floatuntisf")
HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf")
HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf")
@@ -544,6 +558,23 @@ HANDLE_LIBCALL(ATOMIC_FETCH_NAND_4, "__atomic_fetch_nand_4")
HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8")
HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16")
+// Out-of-line atomics libcalls
+#define HLCALLS(A, N) \
+ HANDLE_LIBCALL(A##N##_RELAX, nullptr) \
+ HANDLE_LIBCALL(A##N##_ACQ, nullptr) \
+ HANDLE_LIBCALL(A##N##_REL, nullptr) \
+ HANDLE_LIBCALL(A##N##_ACQ_REL, nullptr)
+#define HLCALL5(A) \
+ HLCALLS(A, 1) HLCALLS(A, 2) HLCALLS(A, 4) HLCALLS(A, 8) HLCALLS(A, 16)
+HLCALL5(OUTLINE_ATOMIC_CAS)
+HLCALL5(OUTLINE_ATOMIC_SWP)
+HLCALL5(OUTLINE_ATOMIC_LDADD)
+HLCALL5(OUTLINE_ATOMIC_LDSET)
+HLCALL5(OUTLINE_ATOMIC_LDCLR)
+HLCALL5(OUTLINE_ATOMIC_LDEOR)
+#undef HLCALLS
+#undef HLCALL5
+
// Stack Protector Fail
HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail")
@@ -555,4 +586,3 @@ HANDLE_LIBCALL(RETURN_ADDRESS, nullptr)
HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr)
-#undef HANDLE_LIBCALL
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Statepoint.h b/contrib/llvm-project/llvm/include/llvm/IR/Statepoint.h
index 1ace39c10701..6ce15839df46 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Statepoint.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Statepoint.h
@@ -136,7 +136,7 @@ public:
/// Return an end iterator of the arguments to the underlying call
const_op_iterator actual_arg_end() const {
auto I = actual_arg_begin() + actual_arg_size();
- assert((arg_end() - I) >= 0);
+ assert((arg_end() - I) == 2);
return I;
}
/// range adapter for actual call arguments
@@ -147,16 +147,12 @@ public:
const_op_iterator gc_transition_args_begin() const {
if (auto Opt = getOperandBundle(LLVMContext::OB_gc_transition))
return Opt->Inputs.begin();
- auto I = actual_arg_end() + 1;
- assert((arg_end() - I) >= 0);
- return I;
+ return arg_end();
}
const_op_iterator gc_transition_args_end() const {
if (auto Opt = getOperandBundle(LLVMContext::OB_gc_transition))
return Opt->Inputs.end();
- auto I = gc_transition_args_begin() + getNumDeoptArgs();
- assert((arg_end() - I) >= 0);
- return I;
+ return arg_end();
}
/// range adapter for GC transition arguments
@@ -167,19 +163,12 @@ public:
const_op_iterator deopt_begin() const {
if (auto Opt = getOperandBundle(LLVMContext::OB_deopt))
return Opt->Inputs.begin();
- // The current format has two length prefix bundles between call args and
- // start of gc args. This will be removed in the near future.
- uint64_t NumTrans = getNumGCTransitionArgs();
- const_op_iterator I = actual_arg_end() + 2 + NumTrans;
- assert((arg_end() - I) >= 0);
- return I;
+ return arg_end();
}
const_op_iterator deopt_end() const {
if (auto Opt = getOperandBundle(LLVMContext::OB_deopt))
return Opt->Inputs.end();
- auto I = deopt_begin() + getNumDeoptArgs();
- assert((arg_end() - I) >= 0);
- return I;
+ return arg_end();
}
/// range adapter for vm state arguments
@@ -192,30 +181,16 @@ public:
const_op_iterator gc_args_begin() const {
if (auto Opt = getOperandBundle(LLVMContext::OB_gc_live))
return Opt->Inputs.begin();
-
- // The current format has two length prefix bundles between call args and
- // start of gc args. This will be removed in the near future.
- uint64_t NumTrans = getNumGCTransitionArgs();
- uint64_t NumDeopt = getNumDeoptArgs();
- auto I = actual_arg_end() + 2 + NumTrans + NumDeopt;
- assert((arg_end() - I) >= 0);
- return I;
+ return arg_end();
}
/// Return an end iterator for the gc argument range
const_op_iterator gc_args_end() const {
if (auto Opt = getOperandBundle(LLVMContext::OB_gc_live))
return Opt->Inputs.end();
-
return arg_end();
}
- /// Return the operand index at which the gc args begin
- unsigned gcArgsStartIdx() const {
- assert(!getOperandBundle(LLVMContext::OB_gc_live));
- return gc_args_begin() - op_begin();
- }
-
/// range adapter for gc arguments
iterator_range<const_op_iterator> gc_args() const {
return make_range(gc_args_begin(), gc_args_end());
@@ -236,19 +211,6 @@ public:
return GRI;
return nullptr;
}
-
-private:
- int getNumGCTransitionArgs() const {
- const Value *NumGCTransitionArgs = *actual_arg_end();
- return cast<ConstantInt>(NumGCTransitionArgs)->getZExtValue();
- }
-
- int getNumDeoptArgs() const {
- uint64_t NumTrans = getNumGCTransitionArgs();
- const_op_iterator trans_end = actual_arg_end() + 1 + NumTrans;
- const Value *NumDeoptArgs = *trans_end;
- return cast<ConstantInt>(NumDeoptArgs)->getZExtValue();
- }
};
/// Common base class for representing values projected from a statepoint.
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/StructuralHash.h b/contrib/llvm-project/llvm/include/llvm/IR/StructuralHash.h
new file mode 100644
index 000000000000..eb63a2140310
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/StructuralHash.h
@@ -0,0 +1,34 @@
+//===- llvm/IR/StructuralHash.h - IR Hash for expensive checks --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides hashing of the LLVM IR structure to be used to check
+// Passes modification status.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_STRUCTURALHASH_H
+#define LLVM_IR_STRUCTURALHASH_H
+
+#ifdef EXPENSIVE_CHECKS
+
+#include <cstdint>
+
+// This header is only meant to be used when -DEXPENSIVE_CHECKS is set
+namespace llvm {
+
+class Function;
+class Module;
+
+uint64_t StructuralHash(const Function &F);
+uint64_t StructuralHash(const Module &M);
+
+} // end namespace llvm
+
+#endif
+
+#endif // LLVM_IR_STRUCTURALHASH_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/SymbolTableListTraits.h b/contrib/llvm-project/llvm/include/llvm/IR/SymbolTableListTraits.h
index 5b793e5dbf28..8af712374bfa 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/SymbolTableListTraits.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/SymbolTableListTraits.h
@@ -76,9 +76,11 @@ private:
/// getListOwner - Return the object that owns this list. If this is a list
/// of instructions, it returns the BasicBlock that owns them.
ItemParentClass *getListOwner() {
- size_t Offset(size_t(&((ItemParentClass*)nullptr->*ItemParentClass::
- getSublistAccess(static_cast<ValueSubClass*>(nullptr)))));
- ListTy *Anchor(static_cast<ListTy *>(this));
+ size_t Offset = reinterpret_cast<size_t>(
+ &((ItemParentClass *)nullptr->*ItemParentClass::getSublistAccess(
+ static_cast<ValueSubClass *>(
+ nullptr))));
+ ListTy *Anchor = static_cast<ListTy *>(this);
return reinterpret_cast<ItemParentClass*>(reinterpret_cast<char*>(Anchor)-
Offset);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Type.h b/contrib/llvm-project/llvm/include/llvm/IR/Type.h
index 1f546884b924..756c69dd6ae9 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Type.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Type.h
@@ -65,6 +65,7 @@ public:
LabelTyID, ///< Labels
MetadataTyID, ///< Metadata
X86_MMXTyID, ///< MMX vectors (64 bits, X86 specific)
+ X86_AMXTyID, ///< AMX vectors (8192 bits, X86 specific)
TokenTyID, ///< Tokens
// Derived types... see DerivedTypes.h file.
@@ -182,6 +183,9 @@ public:
/// Return true if this is X86 MMX.
bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
+ /// Return true if this is X86 AMX.
+ bool isX86_AMXTy() const { return getTypeID() == X86_AMXTyID; }
+
/// Return true if this is a FP type or a vector of FP.
bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
@@ -252,7 +256,7 @@ public:
/// includes all first-class types except struct and array types.
bool isSingleValueType() const {
return isFloatingPointTy() || isX86_MMXTy() || isIntegerTy() ||
- isPointerTy() || isVectorTy();
+ isPointerTy() || isVectorTy() || isX86_AMXTy();
}
/// Return true if the type is an aggregate type. This means it is valid as
@@ -268,8 +272,8 @@ public:
bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const {
// If it's a primitive, it is always sized.
if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
- getTypeID() == PointerTyID ||
- getTypeID() == X86_MMXTyID)
+ getTypeID() == PointerTyID || getTypeID() == X86_MMXTyID ||
+ getTypeID() == X86_AMXTyID)
return true;
// If it is not something that can have a size (e.g. a function or label),
// it doesn't have a size.
@@ -405,6 +409,7 @@ public:
static Type *getFP128Ty(LLVMContext &C);
static Type *getPPC_FP128Ty(LLVMContext &C);
static Type *getX86_MMXTy(LLVMContext &C);
+ static Type *getX86_AMXTy(LLVMContext &C);
static Type *getTokenTy(LLVMContext &C);
static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
static IntegerType *getInt1Ty(LLVMContext &C);
@@ -427,6 +432,26 @@ public:
}
llvm_unreachable("Unsupported type in Type::getScalarTy");
}
+ static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S) {
+ Type *Ty;
+ if (&S == &APFloat::IEEEhalf())
+ Ty = Type::getHalfTy(C);
+ else if (&S == &APFloat::BFloat())
+ Ty = Type::getBFloatTy(C);
+ else if (&S == &APFloat::IEEEsingle())
+ Ty = Type::getFloatTy(C);
+ else if (&S == &APFloat::IEEEdouble())
+ Ty = Type::getDoubleTy(C);
+ else if (&S == &APFloat::x87DoubleExtended())
+ Ty = Type::getX86_FP80Ty(C);
+ else if (&S == &APFloat::IEEEquad())
+ Ty = Type::getFP128Ty(C);
+ else {
+ assert(&S == &APFloat::PPCDoubleDouble() && "Unknown FP format");
+ Ty = Type::getPPC_FP128Ty(C);
+ }
+ return Ty;
+ }
//===--------------------------------------------------------------------===//
// Convenience methods for getting pointer types with one of the above builtin
@@ -440,6 +465,7 @@ public:
static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
+ static PointerType *getX86_AMXPtrTy(LLVMContext &C, unsigned AS = 0);
static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0);
static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/User.h b/contrib/llvm-project/llvm/include/llvm/IR/User.h
index ebfae1db2980..221bb5b2cb1c 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/User.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/User.h
@@ -45,7 +45,7 @@ class User : public Value {
template <unsigned>
friend struct HungoffOperandTraits;
- LLVM_ATTRIBUTE_ALWAYS_INLINE inline static void *
+ LLVM_ATTRIBUTE_ALWAYS_INLINE static void *
allocateFixedOperandUser(size_t, unsigned, unsigned);
protected:
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/VPIntrinsics.def b/contrib/llvm-project/llvm/include/llvm/IR/VPIntrinsics.def
index d3e1fc854373..981548c6dde9 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/contrib/llvm-project/llvm/include/llvm/IR/VPIntrinsics.def
@@ -17,68 +17,140 @@
// Provide definitions of macros so that users of this file do not have to
// define everything to use it...
//
-#ifndef REGISTER_VP_INTRINSIC
-#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)
+// Register a VP intrinsic and begin its property scope.
+// All VP intrinsic scopes are top level, ie it is illegal to place a
+// BEGIN_REGISTER_VP_INTRINSIC within a VP intrinsic scope.
+// \p VPID The VP intrinsic id.
+// \p MASKPOS The mask operand position.
+// \p EVLPOS The explicit vector length operand position.
+#ifndef BEGIN_REGISTER_VP_INTRINSIC
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, EVLPOS)
#endif
-// Map this VP intrinsic to its functional Opcode
-#ifndef HANDLE_VP_TO_OC
-#define HANDLE_VP_TO_OC(VPID, OC)
+// End the property scope of a VP intrinsic.
+#ifndef END_REGISTER_VP_INTRINSIC
+#define END_REGISTER_VP_INTRINSIC(VPID)
#endif
-///// Integer Arithmetic /////
+// Register a new VP SDNode and begin its property scope.
+// When the SDNode scope is nested within a VP intrinsic scope, it is implicitly registered as the canonical SDNode for this VP intrinsic.
+// There is one VP intrinsic that maps directly to one SDNode that goes by the
+// same name. Since the operands are also the same, we open the property
+// scopes for both the VPIntrinsic and the SDNode at once.
+// \p SDOPC The SelectionDAG Node id (eg VP_ADD).
+// \p LEGALPOS The operand position of the SDNode that is used for legalizing
+// this SDNode. This can be `-1`, in which case the return type of
+// the SDNode is used.
+// \p TDNAME The name of the TableGen definition of this SDNode.
+// \p MASKPOS The mask operand position.
+// \p EVLPOS The explicit vector length operand position.
+#ifndef BEGIN_REGISTER_VP_SDNODE
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS)
+#endif
+
+// End the property scope of a new VP SDNode.
+#ifndef END_REGISTER_VP_SDNODE
+#define END_REGISTER_VP_SDNODE(SDOPC)
+#endif
+
+// Helper macros for the common "1:1 - Intrinsic : SDNode" case.
+//
+// There is one VP intrinsic that maps directly to one SDNode that goes by the
+// same name. Since the operands are also the same, we open the property
+// scopes for both the VPIntrinsic and the SDNode at once.
+//
+// \p INTRIN The canonical name (eg `vp_add`, which at the same time is the
+// name of the intrinsic and the TableGen def of the SDNode).
+// \p MASKPOS The mask operand position.
+// \p EVLPOS The explicit vector length operand position.
+// \p SDOPC The SelectionDAG Node id (eg VP_ADD).
+// \p LEGALPOS The operand position of the SDNode that is used for legalizing
+// this SDNode. This can be `-1`, in which case the return type of
+// the SDNode is used.
+#define BEGIN_REGISTER_VP(INTRIN, MASKPOS, EVLPOS, SDOPC, LEGALPOS) \
+BEGIN_REGISTER_VP_INTRINSIC(INTRIN, MASKPOS, EVLPOS) \
+BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, INTRIN, MASKPOS, EVLPOS)
+
+#define END_REGISTER_VP(INTRIN, SDOPC) \
+END_REGISTER_VP_INTRINSIC(INTRIN) \
+END_REGISTER_VP_SDNODE(SDOPC)
+
+
+// The following macros attach properties to the scope they are placed in. This
+// assigns the property to the VP Intrinsic and/or SDNode that belongs to the
+// scope.
+//
+// Property Macros {
+
+// The intrinsic and/or SDNode has the same function as this LLVM IR Opcode.
+// \p OPC The standard IR opcode.
+#ifndef HANDLE_VP_TO_OPC
+#define HANDLE_VP_TO_OPC(OPC)
+#endif
+
+/// } Property Macros
+
+///// Integer Arithmetic {
+
+// Specialized helper macro for integer binary operators (%x, %y, %mask, %evl).
+#ifdef HELPER_REGISTER_BINARY_INT_VP
+#error "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!"
+#endif
+#define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \
+BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \
+HANDLE_VP_TO_OPC(OPC) \
+END_REGISTER_VP(INTRIN, SDOPC)
+
+
// llvm.vp.add(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_add, 2, 3)
-HANDLE_VP_TO_OC(vp_add, Add)
+HELPER_REGISTER_BINARY_INT_VP(vp_add, VP_ADD, Add)
// llvm.vp.and(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_and, 2, 3)
-HANDLE_VP_TO_OC(vp_and, And)
+HELPER_REGISTER_BINARY_INT_VP(vp_and, VP_AND, And)
// llvm.vp.ashr(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_ashr, 2, 3)
-HANDLE_VP_TO_OC(vp_ashr, AShr)
+HELPER_REGISTER_BINARY_INT_VP(vp_ashr, VP_ASHR, AShr)
// llvm.vp.lshr(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_lshr, 2, 3)
-HANDLE_VP_TO_OC(vp_lshr, LShr)
+HELPER_REGISTER_BINARY_INT_VP(vp_lshr, VP_LSHR, LShr)
// llvm.vp.mul(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_mul, 2, 3)
-HANDLE_VP_TO_OC(vp_mul, Mul)
+HELPER_REGISTER_BINARY_INT_VP(vp_mul, VP_MUL, Mul)
// llvm.vp.or(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_or, 2, 3)
-HANDLE_VP_TO_OC(vp_or, Or)
+HELPER_REGISTER_BINARY_INT_VP(vp_or, VP_OR, Or)
// llvm.vp.sdiv(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_sdiv, 2, 3)
-HANDLE_VP_TO_OC(vp_sdiv, SDiv)
+HELPER_REGISTER_BINARY_INT_VP(vp_sdiv, VP_SDIV, SDiv)
// llvm.vp.shl(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_shl, 2, 3)
-HANDLE_VP_TO_OC(vp_shl, Shl)
+HELPER_REGISTER_BINARY_INT_VP(vp_shl, VP_SHL, Shl)
// llvm.vp.srem(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_srem, 2, 3)
-HANDLE_VP_TO_OC(vp_srem, SRem)
+HELPER_REGISTER_BINARY_INT_VP(vp_srem, VP_SREM, SRem)
// llvm.vp.sub(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_sub, 2, 3)
-HANDLE_VP_TO_OC(vp_sub, Sub)
+HELPER_REGISTER_BINARY_INT_VP(vp_sub, VP_SUB, Sub)
// llvm.vp.udiv(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_udiv, 2, 3)
-HANDLE_VP_TO_OC(vp_udiv, UDiv)
+HELPER_REGISTER_BINARY_INT_VP(vp_udiv, VP_UDIV, UDiv)
// llvm.vp.urem(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_urem, 2, 3)
-HANDLE_VP_TO_OC(vp_urem, URem)
+HELPER_REGISTER_BINARY_INT_VP(vp_urem, VP_UREM, URem)
// llvm.vp.xor(x,y,mask,vlen)
-REGISTER_VP_INTRINSIC(vp_xor, 2, 3)
-HANDLE_VP_TO_OC(vp_xor, Xor)
+HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor)
+
+#undef HELPER_REGISTER_BINARY_INT_VP
+
+///// } Integer Arithmetic
+
-#undef REGISTER_VP_INTRINSIC
-#undef HANDLE_VP_TO_OC
+#undef BEGIN_REGISTER_VP
+#undef BEGIN_REGISTER_VP_INTRINSIC
+#undef BEGIN_REGISTER_VP_SDNODE
+#undef END_REGISTER_VP
+#undef END_REGISTER_VP_INTRINSIC
+#undef END_REGISTER_VP_SDNODE
+#undef HANDLE_VP_TO_OPC
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Value.def b/contrib/llvm-project/llvm/include/llvm/IR/Value.def
index aaf1651979a9..0a0125d319c3 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Value.def
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Value.def
@@ -23,6 +23,11 @@
#error "Missing macro definition of HANDLE_VALUE*"
#endif
+// If the LLVM_C_API macro is set, then values handled via HANDLE_*_EXCLUDE_LLVM_C_API will not be expanded in areas the HANDLE_* macro is used. If it is not set, then HANDLE_*_EXCLUDE_LLVM_C_API values are handled normally as their HANDLE_* counterparts.
+#ifndef LLVM_C_API
+#define LLVM_C_API 0
+#endif
+
#ifndef HANDLE_MEMORY_VALUE
#define HANDLE_MEMORY_VALUE(ValueName) HANDLE_VALUE(ValueName)
#endif
@@ -55,6 +60,15 @@
#define HANDLE_CONSTANT_MARKER(MarkerName, ValueName)
#endif
+#ifndef HANDLE_CONSTANT_EXCLUDE_LLVM_C_API
+#define HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(ValueName) HANDLE_CONSTANT(ValueName)
+#endif
+
+#if LLVM_C_API
+#undef HANDLE_CONSTANT_EXCLUDE_LLVM_C_API
+#define HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(ValueName)
+#endif
+
// Having constant first makes the range check for isa<Constant> faster
// and smaller by one operation.
@@ -65,6 +79,7 @@ HANDLE_GLOBAL_VALUE(GlobalIFunc)
HANDLE_GLOBAL_VALUE(GlobalVariable)
HANDLE_CONSTANT(BlockAddress)
HANDLE_CONSTANT(ConstantExpr)
+HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(DSOLocalEquivalent)
// ConstantAggregate.
HANDLE_CONSTANT(ConstantArray)
@@ -73,6 +88,7 @@ HANDLE_CONSTANT(ConstantVector)
// ConstantData.
HANDLE_CONSTANT(UndefValue)
+HANDLE_CONSTANT(PoisonValue)
HANDLE_CONSTANT(ConstantAggregateZero)
HANDLE_CONSTANT(ConstantDataArray)
HANDLE_CONSTANT(ConstantDataVector)
@@ -114,3 +130,5 @@ HANDLE_INSTRUCTION(Instruction)
#undef HANDLE_INLINE_ASM_VALUE
#undef HANDLE_VALUE
#undef HANDLE_CONSTANT_MARKER
+#undef HANDLE_CONSTANT_EXCLUDE_LLVM_C_API
+#undef LLVM_C_API
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Value.h b/contrib/llvm-project/llvm/include/llvm/IR/Value.h
index 04ca68274626..2a9912d46c89 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Value.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Value.h
@@ -15,6 +15,7 @@
#include "llvm-c/Types.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Use.h"
#include "llvm/Support/Alignment.h"
@@ -43,11 +44,11 @@ class GlobalVariable;
class InlineAsm;
class Instruction;
class LLVMContext;
+class MDNode;
class Module;
class ModuleSlotTracker;
class raw_ostream;
template<typename ValueTy> class StringMapEntry;
-class StringRef;
class Twine;
class Type;
class User;
@@ -110,12 +111,13 @@ protected:
///
/// Note, this should *NOT* be used directly by any class other than User.
/// User uses this value to find the Use list.
- enum : unsigned { NumUserOperandsBits = 28 };
+ enum : unsigned { NumUserOperandsBits = 27 };
unsigned NumUserOperands : NumUserOperandsBits;
// Use the same type as the bitfield above so that MSVC will pack them.
unsigned IsUsedByMD : 1;
unsigned HasName : 1;
+ unsigned HasMetadata : 1; // Has metadata attached to this?
unsigned HasHungOffUses : 1;
unsigned HasDescriptor : 1;
@@ -279,6 +281,10 @@ public:
/// \note It is an error to call V->takeName(V).
void takeName(Value *V);
+#ifndef NDEBUG
+ std::string getNameOrAsOperand() const;
+#endif
+
/// Change all uses of this to point to a new Value.
///
/// Go through the uses list for this definition and make each use point to
@@ -424,25 +430,31 @@ public:
return materialized_users();
}
- /// Return true if there is exactly one user of this value.
+ /// Return true if there is exactly one use of this value.
///
/// This is specialized because it is a common request and does not require
/// traversing the whole use list.
- bool hasOneUse() const {
- const_use_iterator I = use_begin(), E = use_end();
- if (I == E) return false;
- return ++I == E;
- }
+ bool hasOneUse() const { return hasSingleElement(uses()); }
- /// Return true if this Value has exactly N users.
+ /// Return true if this Value has exactly N uses.
bool hasNUses(unsigned N) const;
- /// Return true if this value has N users or more.
+ /// Return true if this value has N uses or more.
///
/// This is logically equivalent to getNumUses() >= N.
bool hasNUsesOrMore(unsigned N) const;
- /// Return true if there is exactly one user of this value that cannot be
+ /// Return true if there is exactly one user of this value.
+ ///
+ /// Note that this is not the same as "has one use". If a value has one use,
+ /// then there certainly is a single user. But if value has several uses,
+ /// it is possible that all uses are in a single user, or not.
+ ///
+ /// This check is potentially costly, since it requires traversing,
+ /// in the worst case, the whole use list of a value.
+ bool hasOneUser() const;
+
+ /// Return true if there is exactly one use of this value that cannot be
/// dropped.
///
/// This is specialized because it is a common request and does not require
@@ -455,7 +467,7 @@ public:
/// traversing the whole use list.
bool hasNUndroppableUses(unsigned N) const;
- /// Return true if this value has N users or more.
+ /// Return true if this value has N uses or more.
///
/// This is logically equivalent to getNumUses() >= N.
bool hasNUndroppableUsesOrMore(unsigned N) const;
@@ -470,6 +482,12 @@ public:
void dropDroppableUses(llvm::function_ref<bool(const Use *)> ShouldDrop =
[](const Use *) { return true; });
+ /// Remove every use of this value in \p User that can safely be removed.
+ void dropDroppableUsesIn(User &Usr);
+
+ /// Remove the droppable use \p U.
+ static void dropDroppableUse(Use &U);
+
/// Check if this value is used in the specified basic block.
bool isUsedInBasicBlock(const BasicBlock *BB) const;
@@ -534,6 +552,68 @@ public:
/// Return true if there is metadata referencing this value.
bool isUsedByMetadata() const { return IsUsedByMD; }
+protected:
+ /// Get the current metadata attachments for the given kind, if any.
+ ///
+ /// These functions require that the value have at most a single attachment
+ /// of the given kind, and return \c nullptr if such an attachment is missing.
+ /// @{
+ MDNode *getMetadata(unsigned KindID) const;
+ MDNode *getMetadata(StringRef Kind) const;
+ /// @}
+
+ /// Appends all attachments with the given ID to \c MDs in insertion order.
+ /// If the Value has no attachments with the given ID, or if ID is invalid,
+ /// leaves MDs unchanged.
+ /// @{
+ void getMetadata(unsigned KindID, SmallVectorImpl<MDNode *> &MDs) const;
+ void getMetadata(StringRef Kind, SmallVectorImpl<MDNode *> &MDs) const;
+ /// @}
+
+ /// Appends all metadata attached to this value to \c MDs, sorting by
+ /// KindID. The first element of each pair returned is the KindID, the second
+ /// element is the metadata value. Attachments with the same ID appear in
+ /// insertion order.
+ void
+ getAllMetadata(SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const;
+
+ /// Return true if this value has any metadata attached to it.
+ bool hasMetadata() const { return (bool)HasMetadata; }
+
+ /// Return true if this value has the given type of metadata attached.
+ /// @{
+ bool hasMetadata(unsigned KindID) const {
+ return getMetadata(KindID) != nullptr;
+ }
+ bool hasMetadata(StringRef Kind) const {
+ return getMetadata(Kind) != nullptr;
+ }
+ /// @}
+
+ /// Set a particular kind of metadata attachment.
+ ///
+ /// Sets the given attachment to \c MD, erasing it if \c MD is \c nullptr or
+ /// replacing it if it already exists.
+ /// @{
+ void setMetadata(unsigned KindID, MDNode *Node);
+ void setMetadata(StringRef Kind, MDNode *Node);
+ /// @}
+
+ /// Add a metadata attachment.
+ /// @{
+ void addMetadata(unsigned KindID, MDNode &MD);
+ void addMetadata(StringRef Kind, MDNode &MD);
+ /// @}
+
+ /// Erase all metadata attachments with the given kind.
+ ///
+ /// \returns true if any metadata was removed.
+ bool eraseMetadata(unsigned KindID);
+
+ /// Erase all metadata attached to this Value.
+ void clearMetadata();
+
+public:
/// Return true if this value is a swifterror value.
///
/// swifterror values can be either a function argument or an alloca with a
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h b/contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h
index badc1ca8d1f6..29560815ea55 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h
@@ -258,13 +258,13 @@ template <> struct simplify_type<const WeakTrackingVH> {
/// class turns into a trivial wrapper around a pointer.
template <typename ValueTy>
class AssertingVH
-#ifndef NDEBUG
- : public ValueHandleBase
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ : public ValueHandleBase
#endif
- {
+{
friend struct DenseMapInfo<AssertingVH<ValueTy>>;
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
Value *getRawValPtr() const { return ValueHandleBase::getValPtr(); }
void setRawValPtr(Value *P) { ValueHandleBase::operator=(P); }
#else
@@ -280,14 +280,14 @@ class AssertingVH
void setValPtr(ValueTy *P) { setRawValPtr(GetAsValue(P)); }
public:
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
AssertingVH() : ValueHandleBase(Assert) {}
AssertingVH(ValueTy *P) : ValueHandleBase(Assert, GetAsValue(P)) {}
AssertingVH(const AssertingVH &RHS) : ValueHandleBase(Assert, RHS) {}
#else
AssertingVH() : ThePtr(nullptr) {}
AssertingVH(ValueTy *P) : ThePtr(GetAsValue(P)) {}
- AssertingVH(const AssertingVH<ValueTy> &) = default;
+ AssertingVH(const AssertingVH &) = default;
#endif
operator ValueTy*() const {
@@ -442,9 +442,9 @@ public:
/// PoisoningVH's as it moves. This is required because in non-assert mode this
/// class turns into a trivial wrapper around a pointer.
template <typename ValueTy>
-class PoisoningVH
-#ifndef NDEBUG
- final : public CallbackVH
+class PoisoningVH final
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ : public CallbackVH
#endif
{
friend struct DenseMapInfo<PoisoningVH<ValueTy>>;
@@ -453,7 +453,7 @@ class PoisoningVH
static Value *GetAsValue(Value *V) { return V; }
static Value *GetAsValue(const Value *V) { return const_cast<Value *>(V); }
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
/// A flag tracking whether this value has been poisoned.
///
/// On delete and RAUW, we leave the value pointer alone so that as a raw
@@ -478,7 +478,7 @@ class PoisoningVH
Poisoned = true;
RemoveFromUseList();
}
-#else // NDEBUG
+#else // LLVM_ENABLE_ABI_BREAKING_CHECKS
Value *ThePtr = nullptr;
Value *getRawValPtr() const { return ThePtr; }
@@ -486,14 +486,16 @@ class PoisoningVH
#endif
ValueTy *getValPtr() const {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
assert(!Poisoned && "Accessed a poisoned value handle!");
+#endif
return static_cast<ValueTy *>(getRawValPtr());
}
void setValPtr(ValueTy *P) { setRawValPtr(GetAsValue(P)); }
public:
PoisoningVH() = default;
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
PoisoningVH(ValueTy *P) : CallbackVH(GetAsValue(P)) {}
PoisoningVH(const PoisoningVH &RHS)
: CallbackVH(RHS), Poisoned(RHS.Poisoned) {}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Verifier.h b/contrib/llvm-project/llvm/include/llvm/IR/Verifier.h
index 62c33c8325eb..f4381d2ae4a9 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Verifier.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Verifier.h
@@ -116,6 +116,7 @@ public:
Result run(Module &M, ModuleAnalysisManager &);
Result run(Function &F, FunctionAnalysisManager &);
+ static bool isRequired() { return true; }
};
/// Check a module for errors, but report debug info errors separately.
@@ -141,6 +142,7 @@ public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/InitializePasses.h b/contrib/llvm-project/llvm/include/llvm/InitializePasses.h
index 06e8507036ac..4f89179a03de 100644
--- a/contrib/llvm-project/llvm/include/llvm/InitializePasses.h
+++ b/contrib/llvm-project/llvm/include/llvm/InitializePasses.h
@@ -73,17 +73,19 @@ void initializeAlignmentFromAssumptionsPass(PassRegistry&);
void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
void initializeAssumeSimplifyPassLegacyPassPass(PassRegistry &);
void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &);
+void initializeAnnotation2MetadataLegacyPass(PassRegistry &);
+void initializeAnnotationRemarksLegacyPass(PassRegistry &);
void initializeOpenMPOptLegacyPassPass(PassRegistry &);
void initializeArgPromotionPass(PassRegistry&);
void initializeAssumptionCacheTrackerPass(PassRegistry&);
void initializeAtomicExpandPass(PassRegistry&);
void initializeAttributorLegacyPassPass(PassRegistry&);
void initializeAttributorCGSCCLegacyPassPass(PassRegistry &);
-void initializeBBSectionsPreparePass(PassRegistry &);
+void initializeBasicBlockSectionsPass(PassRegistry &);
void initializeBDCELegacyPassPass(PassRegistry&);
void initializeBarrierNoopPass(PassRegistry&);
void initializeBasicAAWrapperPassPass(PassRegistry&);
-void initializeBlockExtractorPass(PassRegistry &);
+void initializeBlockExtractorLegacyPassPass(PassRegistry &);
void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&);
void initializeBoundsCheckingLegacyPassPass(PassRegistry&);
void initializeBranchFolderPassPass(PassRegistry&);
@@ -110,10 +112,11 @@ void initializeCallGraphViewerPass(PassRegistry&);
void initializeCallGraphWrapperPassPass(PassRegistry&);
void initializeCallSiteSplittingLegacyPassPass(PassRegistry&);
void initializeCalledValuePropagationLegacyPassPass(PassRegistry &);
+void initializeCheckDebugMachineModulePass(PassRegistry &);
void initializeCodeGenPreparePass(PassRegistry&);
void initializeConstantHoistingLegacyPassPass(PassRegistry&);
void initializeConstantMergeLegacyPassPass(PassRegistry&);
-void initializeConstantPropagationPass(PassRegistry&);
+void initializeConstraintEliminationPass(PassRegistry &);
void initializeControlHeightReductionLegacyPassPass(PassRegistry&);
void initializeCorrelatedValuePropagationPass(PassRegistry&);
void initializeCostModelAnalysisPass(PassRegistry&);
@@ -122,8 +125,7 @@ void initializeDAEPass(PassRegistry&);
void initializeDAHPass(PassRegistry&);
void initializeDCELegacyPassPass(PassRegistry&);
void initializeDSELegacyPassPass(PassRegistry&);
-void initializeDataFlowSanitizerPass(PassRegistry&);
-void initializeDeadInstEliminationPass(PassRegistry&);
+void initializeDataFlowSanitizerLegacyPassPass(PassRegistry &);
void initializeDeadMachineInstructionElimPass(PassRegistry&);
void initializeDebugifyMachineModulePass(PassRegistry &);
void initializeDelinearizationPass(PassRegistry&);
@@ -138,7 +140,7 @@ void initializeDomPrinterPass(PassRegistry&);
void initializeDomViewerPass(PassRegistry&);
void initializeDominanceFrontierWrapperPassPass(PassRegistry&);
void initializeDominatorTreeWrapperPassPass(PassRegistry&);
-void initializeDwarfEHPreparePass(PassRegistry&);
+void initializeDwarfEHPrepareLegacyPassPass(PassRegistry &);
void initializeEarlyCSELegacyPassPass(PassRegistry&);
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry&);
void initializeEarlyIfConverterPass(PassRegistry&);
@@ -177,11 +179,13 @@ void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
void initializeHardwareLoopsPass(PassRegistry&);
+void initializeMemProfilerLegacyPassPass(PassRegistry &);
void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
-void initializeIPCPPass(PassRegistry&);
void initializeIPSCCPLegacyPassPass(PassRegistry&);
void initializeIRCELegacyPassPass(PassRegistry&);
+void initializeIROutlinerLegacyPassPass(PassRegistry&);
+void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry&);
void initializeIRTranslatorPass(PassRegistry&);
void initializeIVUsersWrapperPassPass(PassRegistry&);
void initializeIfConverterPass(PassRegistry&);
@@ -193,7 +197,7 @@ void initializeInferAddressSpacesPass(PassRegistry&);
void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&);
void initializeInjectTLIMappingsLegacyPass(PassRegistry &);
void initializeInlineCostAnalysisPass(PassRegistry&);
-void initializeInstCountPass(PassRegistry&);
+void initializeInstCountLegacyPassPass(PassRegistry &);
void initializeInstNamerPass(PassRegistry&);
void initializeInstSimplifyLegacyPassPass(PassRegistry &);
void initializeInstrProfilingLegacyPassPass(PassRegistry&);
@@ -219,7 +223,7 @@ void initializeLegalizerPass(PassRegistry&);
void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &);
void initializeGISelKnownBitsAnalysisPass(PassRegistry &);
void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&);
-void initializeLintPass(PassRegistry&);
+void initializeLintLegacyPassPass(PassRegistry &);
void initializeLiveDebugValuesPass(PassRegistry&);
void initializeLiveDebugVariablesPass(PassRegistry&);
void initializeLiveIntervalsPass(PassRegistry&);
@@ -235,17 +239,18 @@ void initializeLoopAccessLegacyAnalysisPass(PassRegistry&);
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry&);
void initializeLoopDeletionLegacyPassPass(PassRegistry&);
void initializeLoopDistributeLegacyPass(PassRegistry&);
-void initializeLoopExtractorPass(PassRegistry&);
+void initializeLoopExtractorLegacyPassPass(PassRegistry &);
void initializeLoopGuardWideningLegacyPassPass(PassRegistry&);
void initializeLoopFuseLegacyPass(PassRegistry&);
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
void initializeLoopInfoWrapperPassPass(PassRegistry&);
void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&);
-void initializeLoopInterchangePass(PassRegistry&);
+void initializeLoopInterchangeLegacyPassPass(PassRegistry &);
+void initializeLoopFlattenLegacyPassPass(PassRegistry&);
void initializeLoopLoadEliminationPass(PassRegistry&);
void initializeLoopPassPass(PassRegistry&);
void initializeLoopPredicationLegacyPassPass(PassRegistry&);
-void initializeLoopRerollPass(PassRegistry&);
+void initializeLoopRerollLegacyPassPass(PassRegistry &);
void initializeLoopRotateLegacyPassPass(PassRegistry&);
void initializeLoopSimplifyCFGLegacyPassPass(PassRegistry&);
void initializeLoopSimplifyPass(PassRegistry&);
@@ -254,8 +259,8 @@ void initializeLoopUnrollAndJamPass(PassRegistry&);
void initializeLoopUnrollPass(PassRegistry&);
void initializeLoopUnswitchPass(PassRegistry&);
void initializeLoopVectorizePass(PassRegistry&);
-void initializeLoopVersioningLICMPass(PassRegistry&);
-void initializeLoopVersioningPassPass(PassRegistry&);
+void initializeLoopVersioningLICMLegacyPassPass(PassRegistry &);
+void initializeLoopVersioningLegacyPassPass(PassRegistry &);
void initializeLowerAtomicLegacyPassPass(PassRegistry&);
void initializeLowerConstantIntrinsicsPass(PassRegistry&);
void initializeLowerEmuTLSPass(PassRegistry&);
@@ -264,9 +269,10 @@ void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&);
void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&);
void initializeLowerIntrinsicsPass(PassRegistry&);
void initializeLowerInvokeLegacyPassPass(PassRegistry&);
-void initializeLowerSwitchPass(PassRegistry&);
+void initializeLowerSwitchLegacyPassPass(PassRegistry &);
void initializeLowerTypeTestsPass(PassRegistry&);
void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &);
+void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &);
void initializeMIRCanonicalizerPass(PassRegistry &);
void initializeMIRNamerPass(PassRegistry &);
void initializeMIRPrintingPassPass(PassRegistry&);
@@ -280,6 +286,7 @@ void initializeMachineCopyPropagationPass(PassRegistry&);
void initializeMachineDominanceFrontierPass(PassRegistry&);
void initializeMachineDominatorTreePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
+void initializeMachineFunctionSplitterPass(PassRegistry &);
void initializeMachineLICMPass(PassRegistry&);
void initializeMachineLoopInfoPass(PassRegistry&);
void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);
@@ -303,7 +310,8 @@ void initializeMergeFunctionsLegacyPassPass(PassRegistry&);
void initializeMergeICmpsLegacyPassPass(PassRegistry &);
void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
void initializeMetaRenamerPass(PassRegistry&);
-void initializeModuleDebugInfoPrinterPass(PassRegistry&);
+void initializeModuleDebugInfoLegacyPrinterPass(PassRegistry &);
+void initializeModuleMemProfilerLegacyPassPass(PassRegistry &);
void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&);
void initializeModuloScheduleTestPass(PassRegistry&);
void initializeMustExecutePrinterPass(PassRegistry&);
@@ -314,9 +322,9 @@ void initializeNaryReassociateLegacyPassPass(PassRegistry&);
void initializeNewGVNLegacyPassPass(PassRegistry&);
void initializeObjCARCAAWrapperPassPass(PassRegistry&);
void initializeObjCARCAPElimPass(PassRegistry&);
-void initializeObjCARCContractPass(PassRegistry&);
+void initializeObjCARCContractLegacyPassPass(PassRegistry &);
void initializeObjCARCExpandPass(PassRegistry&);
-void initializeObjCARCOptPass(PassRegistry&);
+void initializeObjCARCOptLegacyPassPass(PassRegistry &);
void initializeOptimizationRemarkEmitterWrapperPassPass(PassRegistry&);
void initializeOptimizePHIsPass(PassRegistry&);
void initializePAEvalPass(PassRegistry&);
@@ -355,13 +363,14 @@ void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&);
void initializePromoteLegacyPassPass(PassRegistry&);
void initializePruneEHPass(PassRegistry&);
void initializeRABasicPass(PassRegistry&);
+void initializePseudoProbeInserterPass(PassRegistry &);
void initializeRAGreedyPass(PassRegistry&);
void initializeReachingDefAnalysisPass(PassRegistry&);
void initializeReassociateLegacyPassPass(PassRegistry&);
void initializeRedundantDbgInstEliminationPass(PassRegistry&);
void initializeRegAllocFastPass(PassRegistry&);
void initializeRegBankSelectPass(PassRegistry&);
-void initializeRegToMemPass(PassRegistry&);
+void initializeRegToMemLegacyPass(PassRegistry&);
void initializeRegUsageInfoCollectorPass(PassRegistry&);
void initializeRegUsageInfoPropagationPass(PassRegistry&);
void initializeRegionInfoPassPass(PassRegistry&);
@@ -384,11 +393,11 @@ void initializeSafepointIRVerifierPass(PassRegistry&);
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&);
void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &);
void initializeScalarEvolutionWrapperPassPass(PassRegistry&);
-void initializeScalarizeMaskedMemIntrinPass(PassRegistry&);
+void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &);
void initializeScalarizerLegacyPassPass(PassRegistry&);
void initializeScavengerTestPass(PassRegistry&);
void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&);
-void initializeSeparateConstOffsetFromGEPPass(PassRegistry&);
+void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
void initializeShadowStackGCLoweringPass(PassRegistry&);
void initializeShrinkWrapPass(PassRegistry&);
void initializeSimpleInlinerPass(PassRegistry&);
@@ -405,16 +414,16 @@ void initializeStackProtectorPass(PassRegistry&);
void initializeStackSafetyGlobalInfoWrapperPassPass(PassRegistry &);
void initializeStackSafetyInfoWrapperPassPass(PassRegistry &);
void initializeStackSlotColoringPass(PassRegistry&);
-void initializeStraightLineStrengthReducePass(PassRegistry&);
+void initializeStraightLineStrengthReduceLegacyPassPass(PassRegistry &);
void initializeStripDeadDebugInfoPass(PassRegistry&);
void initializeStripDeadPrototypesLegacyPassPass(PassRegistry&);
void initializeStripDebugDeclarePass(PassRegistry&);
void initializeStripDebugMachineModulePass(PassRegistry &);
-void initializeStripGCRelocatesPass(PassRegistry&);
+void initializeStripGCRelocatesLegacyPass(PassRegistry &);
void initializeStripNonDebugSymbolsPass(PassRegistry&);
-void initializeStripNonLineTableDebugInfoPass(PassRegistry&);
+void initializeStripNonLineTableDebugLegacyPassPass(PassRegistry &);
void initializeStripSymbolsPass(PassRegistry&);
-void initializeStructurizeCFGPass(PassRegistry&);
+void initializeStructurizeCFGLegacyPassPass(PassRegistry &);
void initializeTailCallElimPass(PassRegistry&);
void initializeTailDuplicatePass(PassRegistry&);
void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&);
@@ -424,8 +433,8 @@ void initializeThreadSanitizerLegacyPassPass(PassRegistry&);
void initializeTwoAddressInstructionPassPass(PassRegistry&);
void initializeTypeBasedAAWrapperPassPass(PassRegistry&);
void initializeTypePromotionPass(PassRegistry&);
-void initializeUnifyFunctionExitNodesPass(PassRegistry&);
-void initializeUnifyLoopExitsPass(PassRegistry &);
+void initializeUnifyFunctionExitNodesLegacyPassPass(PassRegistry &);
+void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &);
void initializeUnpackMachineBundlesPass(PassRegistry&);
void initializeUnreachableBlockElimLegacyPassPass(PassRegistry&);
void initializeUnreachableMachineBlockElimPass(PassRegistry&);
diff --git a/contrib/llvm-project/llvm/include/llvm/InterfaceStub/ELFObjHandler.h b/contrib/llvm-project/llvm/include/llvm/InterfaceStub/ELFObjHandler.h
new file mode 100644
index 000000000000..4ec158c1405f
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/InterfaceStub/ELFObjHandler.h
@@ -0,0 +1,47 @@
+//===- ELFObjHandler.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------------===/
+///
+/// This supports reading and writing of elf dynamic shared objects.
+///
+//===-----------------------------------------------------------------------===/
+
+#ifndef LLVM_TOOLS_ELFABI_ELFOBJHANDLER_H
+#define LLVM_TOOLS_ELFABI_ELFOBJHANDLER_H
+
+#include "llvm/InterfaceStub/ELFStub.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Support/FileSystem.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+
+namespace elfabi {
+
+enum class ELFTarget { ELF32LE, ELF32BE, ELF64LE, ELF64BE };
+
+/// Attempt to read a binary ELF file from a MemoryBuffer.
+Expected<std::unique_ptr<ELFStub>> readELFFile(MemoryBufferRef Buf);
+
+/// Attempt to write a binary ELF stub.
+/// This function determines appropriate ELFType using the passed ELFTarget and
+/// then writes a binary ELF stub to a specified file path.
+///
+/// @param FilePath File path for writing the ELF binary.
+/// @param Stub Source ELFStub to generate a binary ELF stub from.
+/// @param OutputFormat Target ELFType to write binary as.
+/// @param WriteIfChanged Whether or not to preserve timestamp if
+/// the output stays the same.
+Error writeBinaryStub(StringRef FilePath, const ELFStub &Stub,
+ ELFTarget OutputFormat, bool WriteIfChanged = false);
+
+} // end namespace elfabi
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_ELFABI_ELFOBJHANDLER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/TextAPI/ELF/ELFStub.h b/contrib/llvm-project/llvm/include/llvm/InterfaceStub/ELFStub.h
index 76b2af121662..7832c1c7413b 100644
--- a/contrib/llvm-project/llvm/include/llvm/TextAPI/ELF/ELFStub.h
+++ b/contrib/llvm-project/llvm/include/llvm/InterfaceStub/ELFStub.h
@@ -16,8 +16,8 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/VersionTuple.h"
-#include <vector>
#include <set>
+#include <vector>
namespace llvm {
namespace elfabi {
@@ -42,15 +42,13 @@ struct ELFSymbol {
bool Undefined;
bool Weak;
Optional<std::string> Warning;
- bool operator<(const ELFSymbol &RHS) const {
- return Name < RHS.Name;
- }
+ bool operator<(const ELFSymbol &RHS) const { return Name < RHS.Name; }
};
// A cumulative representation of ELF stubs.
// Both textual and binary stubs will read into and write from this object.
class ELFStub {
-// TODO: Add support for symbol versioning.
+ // TODO: Add support for symbol versioning.
public:
VersionTuple TbeVersion;
Optional<std::string> SoName;
diff --git a/contrib/llvm-project/llvm/include/llvm/TextAPI/ELF/TBEHandler.h b/contrib/llvm-project/llvm/include/llvm/InterfaceStub/TBEHandler.h
index 76484410987f..5c523eba037e 100644
--- a/contrib/llvm-project/llvm/include/llvm/TextAPI/ELF/TBEHandler.h
+++ b/contrib/llvm-project/llvm/include/llvm/InterfaceStub/TBEHandler.h
@@ -15,8 +15,8 @@
#ifndef LLVM_TEXTAPI_ELF_TBEHANDLER_H
#define LLVM_TEXTAPI_ELF_TBEHANDLER_H
-#include "llvm/Support/VersionTuple.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/VersionTuple.h"
#include <memory>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/LTO/Config.h b/contrib/llvm-project/llvm/include/llvm/LTO/Config.h
index 0a3e52316460..88c1452e5aa9 100644
--- a/contrib/llvm-project/llvm/include/llvm/LTO/Config.h
+++ b/contrib/llvm-project/llvm/include/llvm/LTO/Config.h
@@ -1,4 +1,4 @@
-//===-Config.h - LLVM Link Time Optimizer Configuration -------------------===//
+//===-Config.h - LLVM Link Time Optimizer Configuration ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,9 +15,11 @@
#define LLVM_LTO_CONFIG_H
#include "llvm/ADT/DenseSet.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetOptions.h"
@@ -42,6 +44,8 @@ struct Config {
TargetOptions Options;
std::vector<std::string> MAttrs;
std::vector<std::string> PassPlugins;
+ /// For adding passes that run right before codegen.
+ std::function<void(legacy::PassManager &)> PreCodeGenPassesHook;
Optional<Reloc::Model> RelocModel = Reloc::PIC_;
Optional<CodeModel::Model> CodeModel = None;
CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
@@ -50,7 +54,7 @@ struct Config {
bool DisableVerify = false;
/// Use the new pass manager
- bool UseNewPM = false;
+ bool UseNewPM = LLVM_ENABLE_NEW_PASS_MANAGER;
/// Flag to indicate that the optimizer should not assume builtins are present
/// on the target.
@@ -113,16 +117,31 @@ struct Config {
std::string SplitDwarfOutput;
/// Optimization remarks file path.
- std::string RemarksFilename = "";
+ std::string RemarksFilename;
/// Optimization remarks pass filter.
- std::string RemarksPasses = "";
+ std::string RemarksPasses;
/// Whether to emit optimization remarks with hotness informations.
bool RemarksWithHotness = false;
+ /// The minimum hotness value a diagnostic needs in order to be included in
+ /// optimization diagnostics.
+ ///
+ /// The threshold is an Optional value, which maps to one of the 3 states:
+ /// 1. 0 => threshold disabled. All emarks will be printed.
+ /// 2. positive int => manual threshold by user. Remarks with hotness exceed
+ /// threshold will be printed.
+ /// 3. None => 'auto' threshold by user. The actual value is not
+ /// available at command line, but will be synced with
+ /// hotness threhold from profile summary during
+ /// compilation.
+ ///
+ /// If threshold option is not specified, it is disabled by default.
+ llvm::Optional<uint64_t> RemarksHotnessThreshold = 0;
+
/// The format used for serializing remarks (default: YAML).
- std::string RemarksFormat = "";
+ std::string RemarksFormat;
/// Whether to emit the pass manager debuggging informations.
bool DebugPassManager = false;
diff --git a/contrib/llvm-project/llvm/include/llvm/LTO/LTO.h b/contrib/llvm-project/llvm/include/llvm/LTO/LTO.h
index 93456c0ae7ae..4f169137ee85 100644
--- a/contrib/llvm-project/llvm/include/llvm/LTO/LTO.h
+++ b/contrib/llvm-project/llvm/include/llvm/LTO/LTO.h
@@ -82,15 +82,19 @@ std::string getThinLTOOutputFile(const std::string &Path,
const std::string &NewPrefix);
/// Setup optimization remarks.
-Expected<std::unique_ptr<ToolOutputFile>>
-setupLLVMOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
- StringRef RemarksPasses, StringRef RemarksFormat,
- bool RemarksWithHotness, int Count = -1);
+Expected<std::unique_ptr<ToolOutputFile>> setupLLVMOptimizationRemarks(
+ LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
+ StringRef RemarksFormat, bool RemarksWithHotness,
+ Optional<uint64_t> RemarksHotnessThreshold = 0, int Count = -1);
/// Setups the output file for saving statistics.
Expected<std::unique_ptr<ToolOutputFile>>
setupStatsFile(StringRef StatsFilename);
+/// Produces a container ordering for optimal multi-threaded processing. Returns
+/// ordered indices to elements in the input array.
+std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R);
+
class LTO;
struct SymbolResolution;
class ThinBackendProc;
diff --git a/contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h b/contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h
index 0226e4a3fbf5..824c7d143854 100644
--- a/contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h
+++ b/contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h
@@ -33,6 +33,12 @@ class Target;
namespace lto {
+/// Runs middle-end LTO optimizations on \p Mod.
+bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
+ bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary,
+ const std::vector<uint8_t> &CmdArgs);
+
/// Runs a regular LTO backend. The regular LTO backend can also act as the
/// regular LTO phase of ThinLTO, which may need to access the combined index.
Error backend(const Config &C, AddStreamFn AddStream,
@@ -44,10 +50,27 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
Module &M, const ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
- MapVector<StringRef, BitcodeModule> &ModuleMap);
+ MapVector<StringRef, BitcodeModule> &ModuleMap,
+ const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>());
Error finalizeOptimizationRemarks(
std::unique_ptr<ToolOutputFile> DiagOutputFile);
+
+/// Returns the BitcodeModule that is ThinLTO.
+BitcodeModule *findThinLTOModule(MutableArrayRef<BitcodeModule> BMs);
+
+/// Variant of the above.
+Expected<BitcodeModule> findThinLTOModule(MemoryBufferRef MBRef);
+
+/// Distributed ThinLTO: load the referenced modules, keeping their buffers
+/// alive in the provided OwnedImportLifetimeManager. Returns false if the
+/// operation failed.
+bool loadReferencedModules(
+ const Module &M, const ModuleSummaryIndex &CombinedIndex,
+ FunctionImporter::ImportMapTy &ImportList,
+ MapVector<llvm::StringRef, llvm::BitcodeModule> &ModuleMap,
+ std::vector<std::unique_ptr<llvm::MemoryBuffer>>
+ &OwnedImportsLifetimeManager);
}
}
diff --git a/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
index d7ccc0d5a6c5..fc7b8fc25bd9 100644
--- a/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -93,7 +93,7 @@ struct LTOCodeGenerator {
void setFileType(CodeGenFileType FT) { FileType = FT; }
void setCpu(StringRef MCpu) { this->MCpu = std::string(MCpu); }
- void setAttr(StringRef MAttr) { this->MAttr = std::string(MAttr); }
+ void setAttrs(std::vector<std::string> MAttrs) { this->MAttrs = MAttrs; }
void setOptLevel(unsigned OptLevel);
void setShouldInternalize(bool Value) { ShouldInternalize = Value; }
@@ -145,9 +145,7 @@ struct LTOCodeGenerator {
/// \note It is up to the linker to remove the intermediate output file. Do
/// not try to remove the object file in LTOCodeGenerator's destructor as we
/// don't who (LTOCodeGenerator or the output file) will last longer.
- bool compile_to_file(const char **Name, bool DisableVerify,
- bool DisableInline, bool DisableGVNLoadPRE,
- bool DisableVectorization);
+ bool compile_to_file(const char **Name);
/// As with compile_to_file(), this function compiles the merged module into
/// single output file. Instead of returning the output file path to the
@@ -155,15 +153,12 @@ struct LTOCodeGenerator {
/// to the caller. This function should delete the intermediate file once
/// its content is brought to memory. Return NULL if the compilation was not
/// successful.
- std::unique_ptr<MemoryBuffer> compile(bool DisableVerify, bool DisableInline,
- bool DisableGVNLoadPRE,
- bool DisableVectorization);
+ std::unique_ptr<MemoryBuffer> compile();
/// Optimizes the merged module. Returns true on success.
///
/// Calls \a verifyMergedModuleOnce().
- bool optimize(bool DisableVerify, bool DisableInline, bool DisableGVNLoadPRE,
- bool DisableVectorization);
+ bool optimize();
/// Compiles the merged optimized module into a single output file. It brings
/// the output to a buffer, and returns the buffer to the caller. Return NULL
@@ -183,6 +178,8 @@ struct LTOCodeGenerator {
/// assume builtins are present on the target.
void setFreestanding(bool Enabled) { Freestanding = Enabled; }
+ void setDisableVerify(bool Value) { DisableVerify = Value; }
+
void setDiagnosticHandler(lto_diagnostic_handler_t, void *);
LLVMContext &getContext() { return Context; }
@@ -228,7 +225,7 @@ private:
std::vector<std::string> CodegenOptions;
std::string FeatureStr;
std::string MCpu;
- std::string MAttr;
+ std::vector<std::string> MAttrs;
std::string NativeObjectPath;
TargetOptions Options;
CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
@@ -244,6 +241,7 @@ private:
std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
bool Freestanding = false;
std::unique_ptr<ToolOutputFile> StatsFile = nullptr;
+ bool DisableVerify = false;
};
}
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOModule.h b/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOModule.h
index 998a4557dd22..310447d615f9 100644
--- a/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOModule.h
+++ b/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOModule.h
@@ -48,8 +48,6 @@ private:
std::string LinkerOpts;
- std::string DependentLibraries;
-
std::unique_ptr<Module> Mod;
MemoryBufferRef MBRef;
ModuleSymbolTable SymTab;
diff --git a/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h
index 90e2e24294d4..891d534b4fa6 100644
--- a/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h
+++ b/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h
@@ -89,12 +89,10 @@ namespace {
(void) llvm::createLibCallsShrinkWrapPass();
(void) llvm::createCalledValuePropagationPass();
(void) llvm::createConstantMergePass();
- (void) llvm::createConstantPropagationPass();
(void) llvm::createControlHeightReductionLegacyPass();
(void) llvm::createCostModelAnalysisPass();
(void) llvm::createDeadArgEliminationPass();
(void) llvm::createDeadCodeEliminationPass();
- (void) llvm::createDeadInstEliminationPass();
(void) llvm::createDeadStoreEliminationPass();
(void) llvm::createDependenceAnalysisWrapperPass();
(void) llvm::createDomOnlyPrinterPass();
@@ -116,7 +114,6 @@ namespace {
(void) llvm::createGlobalsAAWrapperPass();
(void) llvm::createGuardWideningPass();
(void) llvm::createLoopGuardWideningPass();
- (void) llvm::createIPConstantPropagationPass();
(void) llvm::createIPSCCPPass();
(void) llvm::createInductiveRangeCheckEliminationPass();
(void) llvm::createIndVarSimplifyPass();
@@ -130,6 +127,7 @@ namespace {
(void) llvm::createLazyValueInfoPass();
(void) llvm::createLoopExtractorPass();
(void) llvm::createLoopInterchangePass();
+ (void) llvm::createLoopFlattenPass();
(void) llvm::createLoopPredicationPass();
(void) llvm::createLoopSimplifyPass();
(void) llvm::createLoopSimplifyCFGPass();
@@ -205,7 +203,7 @@ namespace {
(void) llvm::createPrintFunctionPass(os);
(void) llvm::createModuleDebugInfoPrinterPass();
(void) llvm::createPartialInliningPass();
- (void) llvm::createLintPass();
+ (void) llvm::createLintLegacyPassPass();
(void) llvm::createSinkingPass();
(void) llvm::createLowerAtomicPass();
(void) llvm::createCorrelatedValuePropagationPass();
@@ -226,7 +224,7 @@ namespace {
(void) llvm::createMustBeExecutedContextPrinter();
(void) llvm::createFloat2IntPass();
(void) llvm::createEliminateAvailableExternallyPass();
- (void) llvm::createScalarizeMaskedMemIntrinPass();
+ (void)llvm::createScalarizeMaskedMemIntrinLegacyPass();
(void) llvm::createWarnMissedTransformationsPass();
(void) llvm::createHardwareLoopsPass();
(void) llvm::createInjectTLIMappingsLegacyPass();
@@ -241,7 +239,7 @@ namespace {
llvm::TargetLibraryInfo TLI(TLII);
llvm::AliasAnalysis AA(TLI);
llvm::AliasSetTracker X(AA);
- X.add(nullptr, llvm::LocationSize::unknown(),
+ X.add(nullptr, llvm::LocationSize::beforeOrAfterPointer(),
llvm::AAMDNodes()); // for -print-alias-sets
(void) llvm::AreStatisticsEnabled();
(void) llvm::sys::RunningOnValgrind();
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h
index cc9f42023bc2..94ed3d27e785 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h
@@ -144,7 +144,9 @@ public:
/// \param STI - The MCSubtargetInfo in effect when the instruction was
/// encoded.
virtual bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const = 0;
+ const MCSubtargetInfo &STI) const {
+ return false;
+ }
/// Target specific predicate for whether a given fixup requires the
/// associated instruction to be relaxed.
@@ -175,6 +177,10 @@ public:
///
virtual unsigned getMinimumNopSize() const { return 1; }
+ /// Returns the maximum size of a nop in bytes on this target.
+ ///
+ virtual unsigned getMaximumNopSize() const { return 0; }
+
/// Write an (optimal) nop sequence of Count bytes to the given output. If the
/// target cannot generate such a sequence, it should return an error.
///
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h
index 46c5a111c891..9b2ac558756e 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h
@@ -54,6 +54,15 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment };
/// This class is intended to be used as a base class for asm
/// properties and features specific to the target.
class MCAsmInfo {
+public:
+ /// Assembly character literal syntax types.
+ enum AsmCharLiteralSyntax {
+ ACLS_Unknown, /// Unknown; character literals not used by LLVM for this
+ /// target.
+ ACLS_SingleQuotePrefix, /// The desired character is prefixed by a single
+ /// quote, e.g., `'A`.
+ };
+
protected:
//===------------------------------------------------------------------===//
// Properties to be set by the target writer, used to configure asm printer.
@@ -177,6 +186,9 @@ protected:
/// alignment is supported.
bool UseDotAlignForAlignment = false;
+ /// True if the target supports LEB128 directives.
+ bool HasLEB128Directives = true;
+
//===--- Data Emission Directives -------------------------------------===//
/// This should be set to the directive used to get some number of zero (and
@@ -200,6 +212,16 @@ protected:
/// doesn't support this, it can be set to null. Defaults to "\t.asciz\t"
const char *AscizDirective;
+ /// This directive accepts a comma-separated list of bytes for emission as a
+ /// string of bytes. For targets that do not support this, it shall be set to
+ /// null. Defaults to null.
+ const char *ByteListDirective = nullptr;
+
+ /// Form used for character literals in the assembly syntax. Useful for
+ /// producing strings as byte lists. If a target does not use or support
+ /// this, it shall be set to ACLS_Unknown. Defaults to ACLS_Unknown.
+ AsmCharLiteralSyntax CharacterLiteralSyntax = ACLS_Unknown;
+
/// These directives are used to output some unit of integer data to the
/// current section. If a data directive is set to null, smaller data
/// directives will be used to emit the large sizes. Defaults to "\t.byte\t",
@@ -209,6 +231,9 @@ protected:
const char *Data32bitsDirective;
const char *Data64bitsDirective;
+ /// True if data directives support signed values
+ bool SupportsSignedData = true;
+
/// If non-null, a directive that is used to emit a word which should be
/// relocated as a 64-bit GP-relative offset, e.g. .gpdword on Mips. Defaults
/// to nullptr.
@@ -381,6 +406,12 @@ protected:
//===--- Integrated Assembler Information ----------------------------===//
+ // Generated object files can use all ELF features supported by GNU ld of
+ // this binutils version and later. INT_MAX means all features can be used,
+ // regardless of GNU ld support. The default value is referenced by
+ // clang/Driver/Options.td.
+ std::pair<int, int> BinutilsVersion = {2, 26};
+
/// Should we use the integrated assembler?
/// The integrated assembler should be enabled by default (by the
/// constructors) when failing to parse a valid piece of assembly (inline
@@ -436,6 +467,7 @@ public:
const char *getData16bitsDirective() const { return Data16bitsDirective; }
const char *getData32bitsDirective() const { return Data32bitsDirective; }
const char *getData64bitsDirective() const { return Data64bitsDirective; }
+ bool supportsSignedData() const { return SupportsSignedData; }
const char *getGPRel64Directive() const { return GPRel64Directive; }
const char *getGPRel32Directive() const { return GPRel32Directive; }
const char *getDTPRel64Directive() const { return DTPRel64Directive; }
@@ -552,12 +584,18 @@ public:
return UseDotAlignForAlignment;
}
+ bool hasLEB128Directives() const { return HasLEB128Directives; }
+
const char *getZeroDirective() const { return ZeroDirective; }
bool doesZeroDirectiveSupportNonZeroValue() const {
return ZeroDirectiveSupportsNonZeroValue;
}
const char *getAsciiDirective() const { return AsciiDirective; }
const char *getAscizDirective() const { return AscizDirective; }
+ const char *getByteListDirective() const { return ByteListDirective; }
+ AsmCharLiteralSyntax characterLiteralSyntax() const {
+ return CharacterLiteralSyntax;
+ }
bool getAlignmentIsInBytes() const { return AlignmentIsInBytes; }
unsigned getTextAlignFillValue() const { return TextAlignFillValue; }
const char *getGlobalDirective() const { return GlobalDirective; }
@@ -604,10 +642,6 @@ public:
bool doesSupportDebugInformation() const { return SupportsDebugInformation; }
- bool doesSupportExceptionHandling() const {
- return ExceptionsType != ExceptionHandling::None;
- }
-
ExceptionHandling getExceptionHandlingType() const { return ExceptionsType; }
WinEH::EncodingType getWinEHEncodingType() const { return WinEHEncodingType; }
@@ -645,9 +679,17 @@ public:
return InitialFrameState;
}
+ void setBinutilsVersion(std::pair<int, int> Value) {
+ BinutilsVersion = Value;
+ }
+
/// Return true if assembly (inline or otherwise) should be parsed.
bool useIntegratedAssembler() const { return UseIntegratedAssembler; }
+ bool binutilsIsAtLeast(int Major, int Minor) const {
+ return BinutilsVersion >= std::make_pair(Major, Minor);
+ }
+
/// Set whether assembly (inline or otherwise) should be parsed.
virtual void setUseIntegratedAssembler(bool Value) {
UseIntegratedAssembler = Value;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmMacro.h b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmMacro.h
index 7eecce0faf64..e3d6a858132d 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmMacro.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmMacro.h
@@ -143,10 +143,16 @@ struct MCAsmMacro {
StringRef Name;
StringRef Body;
MCAsmMacroParameters Parameters;
+ std::vector<std::string> Locals;
+ bool IsFunction = false;
public:
MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
: Name(N), Body(B), Parameters(std::move(P)) {}
+ MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P,
+ std::vector<std::string> L, bool F)
+ : Name(N), Body(B), Parameters(std::move(P)), Locals(std::move(L)),
+ IsFunction(F) {}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() const { dump(dbgs()); }
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h b/contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h
index b57439f02ca5..1b76559d33b3 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h
@@ -202,6 +202,7 @@ private:
bool relaxCVInlineLineTable(MCAsmLayout &Layout,
MCCVInlineLineTableFragment &DF);
bool relaxCVDefRange(MCAsmLayout &Layout, MCCVDefRangeFragment &DF);
+ bool relaxPseudoProbeAddr(MCAsmLayout &Layout, MCPseudoProbeAddrFragment &DF);
/// finishLayout - Finalize a layout, including fragment lowering.
void finishLayout(MCAsmLayout &Layout);
@@ -210,7 +211,12 @@ private:
handleFixup(const MCAsmLayout &Layout, MCFragment &F, const MCFixup &Fixup);
public:
- std::vector<std::pair<StringRef, const MCSymbol *>> Symvers;
+ struct Symver {
+ StringRef Name;
+ const MCSymbol *Sym;
+ SMLoc Loc;
+ };
+ std::vector<Symver> Symvers;
/// Construct a new assembler instance.
//
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCCodeView.h b/contrib/llvm-project/llvm/include/llvm/MC/MCCodeView.h
index 2126354cded6..5770f370341d 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCCodeView.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCCodeView.h
@@ -166,8 +166,6 @@ public:
unsigned FileNo, unsigned Line, unsigned Column,
bool PrologueEnd, bool IsStmt);
- bool isValidCVFileNumber(unsigned FileNumber);
-
/// Add a line entry.
void addLineEntry(const MCCVLoc &LineEntry);
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h b/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h
index 45be9bb3d225..49ab0ce8d6fd 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h
@@ -22,6 +22,7 @@
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCAsmMacro.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SectionKind.h"
@@ -97,6 +98,7 @@ namespace llvm {
SpecificBumpPtrAllocator<MCSectionMachO> MachOAllocator;
SpecificBumpPtrAllocator<MCSectionWasm> WasmAllocator;
SpecificBumpPtrAllocator<MCSectionXCOFF> XCOFFAllocator;
+ SpecificBumpPtrAllocator<MCInst> MCInstAllocator;
/// Bindings of names to symbols.
SymbolTable Symbols;
@@ -198,6 +200,9 @@ namespace llvm {
/// The Compile Unit ID that we are currently processing.
unsigned DwarfCompileUnitID = 0;
+ /// A collection of MCPseudoProbe in the current module
+ MCPseudoProbeTable PseudoProbeTable;
+
// Sections are differentiated by the quadruple (section_name, group_name,
// unique_id, link_to_symbol_name). Sections sharing the same quadruple are
// combined into one section.
@@ -380,6 +385,11 @@ namespace llvm {
/// @}
+ /// \name McInst Management
+
+ /// Create and return a new MC instruction.
+ MCInst *createMCInst();
+
/// \name Symbol Management
/// @{
@@ -387,12 +397,16 @@ namespace llvm {
/// unspecified name.
MCSymbol *createLinkerPrivateTempSymbol();
- /// Create and return a new assembler temporary symbol with a unique but
- /// unspecified name.
- MCSymbol *createTempSymbol(bool CanBeUnnamed = true);
+ /// Create a temporary symbol with a unique name. The name will be omitted
+ /// in the symbol table if UseNamesOnTempLabels is false (default except
+ /// MCAsmStreamer). The overload without Name uses an unspecified name.
+ MCSymbol *createTempSymbol();
+ MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix = true);
- MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix,
- bool CanBeUnnamed = true);
+ /// Create a temporary symbol with a unique name whose name cannot be
+ /// omitted in the symbol table. This is rarely used.
+ MCSymbol *createNamedTempSymbol();
+ MCSymbol *createNamedTempSymbol(const Twine &Name);
/// Create the definition of a directional local symbol for numbered label
/// (used for "1:" definitions).
@@ -558,9 +572,8 @@ namespace llvm {
MCSectionXCOFF *getXCOFFSection(StringRef Section,
XCOFF::StorageMappingClass MappingClass,
- XCOFF::SymbolType CSectType,
- XCOFF::StorageClass StorageClass,
- SectionKind K,
+ XCOFF::SymbolType CSectType, SectionKind K,
+ bool MultiSymbolsAllowed = false,
const char *BeginSymName = nullptr);
// Create and save a copy of STI and return a reference to the copy.
@@ -744,6 +757,8 @@ namespace llvm {
}
void undefineMacro(StringRef Name) { MacroMap.erase(Name); }
+
+ MCPseudoProbeTable &getMCPseudoProbeTable() { return PseudoProbeTable; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h b/contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h
index 70da5f76e766..5bf6496806d8 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h
@@ -25,6 +25,7 @@
#include <cassert>
#include <cstdint>
#include <string>
+#include <tuple>
#include <utility>
#include <vector>
@@ -387,11 +388,11 @@ public:
int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS);
/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas using
- /// fixed length operands.
- static bool FixedEncode(MCContext &Context,
- MCDwarfLineTableParams Params,
- int64_t LineDelta, uint64_t AddrDelta,
- raw_ostream &OS, uint32_t *Offset, uint32_t *Size);
+ /// fixed length operands. Returns (Offset, Size, SetDelta).
+ static std::tuple<uint32_t, uint32_t, bool> fixedEncode(MCContext &Context,
+ int64_t LineDelta,
+ uint64_t AddrDelta,
+ raw_ostream &OS);
/// Utility function to emit the encoding to a streamer.
static void Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCELFObjectWriter.h b/contrib/llvm-project/llvm/include/llvm/MC/MCELFObjectWriter.h
index 8f78b99d3794..5d99c494b11e 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -23,7 +23,6 @@ namespace llvm {
class MCAssembler;
class MCContext;
class MCFixup;
-class MCObjectWriter;
class MCSymbol;
class MCSymbolELF;
class MCValue;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCExpr.h b/contrib/llvm-project/llvm/include/llvm/MC/MCExpr.h
index 803c0d443bee..3ffc845e75c0 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCExpr.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCExpr.h
@@ -224,6 +224,7 @@ public:
VK_WEAKREF, // The link between the symbols in .weakref foo, bar
VK_X86_ABS8,
+ VK_X86_PLTOFF,
VK_ARM_NONE,
VK_ARM_GOT_PREL,
@@ -299,9 +300,14 @@ public:
VK_PPC_GOT_TLSLD_HI, // symbol@got@tlsld@h
VK_PPC_GOT_TLSLD_HA, // symbol@got@tlsld@ha
VK_PPC_GOT_PCREL, // symbol@got@pcrel
+ VK_PPC_GOT_TLSGD_PCREL, // symbol@got@tlsgd@pcrel
+ VK_PPC_GOT_TLSLD_PCREL, // symbol@got@tlsld@pcrel
+ VK_PPC_GOT_TPREL_PCREL, // symbol@got@tprel@pcrel
+ VK_PPC_TLS_PCREL, // symbol@tls@pcrel
VK_PPC_TLSLD, // symbol@tlsld
VK_PPC_LOCAL, // symbol@local
VK_PPC_NOTOC, // symbol@notoc
+ VK_PPC_PCREL_OPT, // .reloc expr, R_PPC64_PCREL_OPT, expr
VK_COFF_IMGREL32, // symbol@imgrel (image-relative)
@@ -316,8 +322,9 @@ public:
VK_Hexagon_IE_GOT,
VK_WASM_TYPEINDEX, // Reference to a symbol's type (signature)
- VK_WASM_MBREL, // Memory address relative to memory base
- VK_WASM_TBREL, // Table index relative to table bare
+ VK_WASM_TLSREL, // Memory address relative to __tls_base
+ VK_WASM_MBREL, // Memory address relative to __memory_base
+ VK_WASM_TBREL, // Table index relative to __table_base
VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
@@ -350,30 +357,20 @@ private:
/// The symbol being referenced.
const MCSymbol *Symbol;
- // Subclass data stores VariantKind in bits 0..15, UseParensForSymbolVariant
- // in bit 16 and HasSubsectionsViaSymbols in bit 17.
+ // Subclass data stores VariantKind in bits 0..15 and HasSubsectionsViaSymbols
+ // in bit 16.
static const unsigned VariantKindBits = 16;
static const unsigned VariantKindMask = (1 << VariantKindBits) - 1;
- /// Specifies how the variant kind should be printed.
- static const unsigned UseParensForSymbolVariantBit = 1 << VariantKindBits;
-
// FIXME: Remove this bit.
- static const unsigned HasSubsectionsViaSymbolsBit =
- 1 << (VariantKindBits + 1);
+ static const unsigned HasSubsectionsViaSymbolsBit = 1 << VariantKindBits;
static unsigned encodeSubclassData(VariantKind Kind,
- bool UseParensForSymbolVariant,
- bool HasSubsectionsViaSymbols) {
+ bool HasSubsectionsViaSymbols) {
return (unsigned)Kind |
- (UseParensForSymbolVariant ? UseParensForSymbolVariantBit : 0) |
(HasSubsectionsViaSymbols ? HasSubsectionsViaSymbolsBit : 0);
}
- bool useParensForSymbolVariant() const {
- return (getSubclassData() & UseParensForSymbolVariantBit) != 0;
- }
-
explicit MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind,
const MCAsmInfo *MAI, SMLoc Loc = SMLoc());
@@ -400,8 +397,6 @@ public:
return (VariantKind)(getSubclassData() & VariantKindMask);
}
- void printVariantKind(raw_ostream &OS) const;
-
bool hasSubsectionsViaSymbols() const {
return (getSubclassData() & HasSubsectionsViaSymbolsBit) != 0;
}
@@ -499,6 +494,7 @@ public:
Mul, ///< Multiplication.
NE, ///< Inequality comparison.
Or, ///< Bitwise or.
+ OrNot, ///< Bitwise or not.
Shl, ///< Shift left.
AShr, ///< Arithmetic shift right.
LShr, ///< Logical shift right.
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCFixup.h b/contrib/llvm-project/llvm/include/llvm/MC/MCFixup.h
index affc846cbdd4..b3a23911d636 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCFixup.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCFixup.h
@@ -9,7 +9,6 @@
#ifndef LLVM_MC_MCFIXUP_H
#define LLVM_MC_MCFIXUP_H
-#include "llvm/MC/MCExpr.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h b/contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h
index fb7166e82c09..000b0e33e117 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h
@@ -37,6 +37,7 @@ public:
FT_Data,
FT_CompactEncodedInst,
FT_Fill,
+ FT_Nops,
FT_Relaxable,
FT_Org,
FT_Dwarf,
@@ -46,6 +47,7 @@ public:
FT_SymbolId,
FT_CVInlineLines,
FT_CVDefRange,
+ FT_PseudoProbe,
FT_Dummy
};
@@ -63,6 +65,10 @@ private:
/// The layout order of this fragment.
unsigned LayoutOrder;
+ /// The subsection this fragment belongs to. This is 0 if the fragment is not
+ // in any subsection.
+ unsigned SubsectionNumber = 0;
+
FragmentType Kind;
/// Whether fragment is being laid out.
@@ -101,6 +107,9 @@ public:
bool hasInstructions() const { return HasInstructions; }
void dump() const;
+
+ void setSubsectionNumber(unsigned Value) { SubsectionNumber = Value; }
+ unsigned getSubsectionNumber() const { return SubsectionNumber; }
};
class MCDummyFragment : public MCFragment {
@@ -139,6 +148,7 @@ public:
case MCFragment::FT_Data:
case MCFragment::FT_Dwarf:
case MCFragment::FT_DwarfFrame:
+ case MCFragment::FT_PseudoProbe:
return true;
}
}
@@ -350,6 +360,31 @@ public:
}
};
+class MCNopsFragment : public MCFragment {
+ /// The number of bytes to insert.
+ int64_t Size;
+ /// Maximum number of bytes allowed in each NOP instruction.
+ int64_t ControlledNopLength;
+
+ /// Source location of the directive that this fragment was created for.
+ SMLoc Loc;
+
+public:
+ MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L,
+ MCSection *Sec = nullptr)
+ : MCFragment(FT_Nops, false, Sec), Size(NumBytes),
+ ControlledNopLength(ControlledNopLength), Loc(L) {}
+
+ int64_t getNumBytes() const { return Size; }
+ int64_t getControlledNopLength() const { return ControlledNopLength; }
+
+ SMLoc getLoc() const { return Loc; }
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_Nops;
+ }
+};
+
class MCOrgFragment : public MCFragment {
/// Value to use for filling bytes.
int8_t Value;
@@ -558,6 +593,23 @@ public:
return F->getKind() == MCFragment::FT_BoundaryAlign;
}
};
+
+class MCPseudoProbeAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
+ /// The expression for the difference of the two symbols that
+ /// make up the address delta between two .pseudoprobe directives.
+ const MCExpr *AddrDelta;
+
+public:
+ MCPseudoProbeAddrFragment(const MCExpr *AddrDelta, MCSection *Sec = nullptr)
+ : MCEncodedFragmentWithFixups<8, 1>(FT_PseudoProbe, false, Sec),
+ AddrDelta(AddrDelta) {}
+
+ const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_PseudoProbe;
+ }
+};
} // end namespace llvm
#endif // LLVM_MC_MCFRAGMENT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCInst.h b/contrib/llvm-project/llvm/include/llvm/MC/MCInst.h
index 360dbda58fcb..2ce2ee063daa 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCInst.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCInst.h
@@ -181,7 +181,7 @@ public:
MCOperand &getOperand(unsigned i) { return Operands[i]; }
unsigned getNumOperands() const { return Operands.size(); }
- void addOperand(const MCOperand &Op) { Operands.push_back(Op); }
+ void addOperand(const MCOperand Op) { Operands.push_back(Op); }
using iterator = SmallVectorImpl<MCOperand>::iterator;
using const_iterator = SmallVectorImpl<MCOperand>::const_iterator;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h b/contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h
index 71e049b92455..8b9ef178e33c 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h
@@ -18,6 +18,7 @@ class MCAsmInfo;
class MCInst;
class MCOperand;
class MCInstrInfo;
+class MCInstrAnalysis;
class MCRegisterInfo;
class MCSubtargetInfo;
class raw_ostream;
@@ -48,6 +49,7 @@ protected:
const MCAsmInfo &MAI;
const MCInstrInfo &MII;
const MCRegisterInfo &MRI;
+ const MCInstrAnalysis *MIA = nullptr;
/// True if we are printing marked up assembly.
bool UseMarkup = false;
@@ -63,6 +65,9 @@ protected:
/// (llvm-objdump -d).
bool PrintBranchImmAsAddress = false;
+ /// If true, symbolize branch target and memory reference operands.
+ bool SymbolizeOperands = false;
+
/// Utility function for printing annotations.
void printAnnotation(raw_ostream &OS, StringRef Annot);
@@ -83,6 +88,10 @@ public:
/// Specify a stream to emit comments to.
void setCommentStream(raw_ostream &OS) { CommentStream = &OS; }
+ /// Returns a pair containing the mnemonic for \p MI and the number of bits
+ /// left for further processing by printInstruction (generated by tablegen).
+ virtual std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) = 0;
+
/// Print the specified MCInst to the specified raw_ostream.
///
/// \p Address the address of current instruction on most targets, used to
@@ -115,6 +124,9 @@ public:
PrintBranchImmAsAddress = Value;
}
+ void setSymbolizeOperands(bool Value) { SymbolizeOperands = Value; }
+ void setMCInstrAnalysis(const MCInstrAnalysis *Value) { MIA = Value; }
+
/// Utility function to print immediates in decimal or hex.
format_object<int64_t> formatImm(int64_t Value) const {
return PrintImmHex ? formatHex(Value) : formatDec(Value);
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h b/contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h
index 17454e3134a2..cbb061fc6456 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h
@@ -27,12 +27,22 @@ class MCInst;
//===----------------------------------------------------------------------===//
namespace MCOI {
-// Operand constraints
+/// Operand constraints. These are encoded in 16 bits with one of the
+/// low-order 3 bits specifying that a constraint is present and the
+/// corresponding high-order hex digit specifying the constraint value.
+/// This allows for a maximum of 3 constraints.
enum OperandConstraint {
- TIED_TO = 0, // Must be allocated the same register as.
- EARLY_CLOBBER // Operand is an early clobber register operand
+ TIED_TO = 0, // Must be allocated the same register as specified value.
+ EARLY_CLOBBER // If present, operand is an early clobber register.
};
+// Define a macro to produce each constraint value.
+#define MCOI_TIED_TO(op) \
+ ((1 << MCOI::TIED_TO) | ((op) << (4 + MCOI::TIED_TO * 4)))
+
+#define MCOI_EARLY_CLOBBER \
+ (1 << MCOI::EARLY_CLOBBER)
+
/// These are flags set on operands, but should be considered
/// private, all access should go through the MCOperandInfo accessors.
/// See the accessors for a description of what these are.
@@ -84,10 +94,9 @@ public:
/// Information about the type of the operand.
uint8_t OperandType;
- /// The lower 16 bits are used to specify which constraints are set.
- /// The higher 16 bits are used to specify the value of constraints (4 bits
- /// each).
- uint32_t Constraints;
+
+ /// Operand constraints (see OperandConstraint enum).
+ uint16_t Constraints;
/// Set if this operand is a pointer value and it requires a callback
/// to look up its register class.
@@ -197,14 +206,14 @@ public:
const MCPhysReg *ImplicitDefs; // Registers implicitly defined by this instr
const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands
- /// Returns the value of the specific constraint if
- /// it is set. Returns -1 if it is not set.
+ /// Returns the value of the specified operand constraint if
+ /// it is present. Returns -1 if it is not present.
int getOperandConstraint(unsigned OpNum,
MCOI::OperandConstraint Constraint) const {
if (OpNum < NumOperands &&
(OpInfo[OpNum].Constraints & (1 << Constraint))) {
- unsigned Pos = 16 + Constraint * 4;
- return (int)(OpInfo[OpNum].Constraints >> Pos) & 0xf;
+ unsigned ValuePos = 4 + Constraint * 4;
+ return (int)(OpInfo[OpNum].Constraints >> ValuePos) & 0x0f;
}
return -1;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h b/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h
index 38ba68b78fe1..f4f9c474cdcd 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -114,7 +114,7 @@ class MachObjectWriter : public MCObjectWriter {
/// \name Symbol Table Data
/// @{
- StringTableBuilder StringTable{StringTableBuilder::MachO};
+ StringTableBuilder StringTable;
std::vector<MachSymbolData> LocalSymbolData;
std::vector<MachSymbolData> ExternalSymbolData;
std::vector<MachSymbolData> UndefinedSymbolData;
@@ -129,6 +129,8 @@ public:
MachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, bool IsLittleEndian)
: TargetObjectWriter(std::move(MOTW)),
+ StringTable(TargetObjectWriter->is64Bit() ? StringTableBuilder::MachO64
+ : StringTableBuilder::MachO),
W(OS, IsLittleEndian ? support::little : support::big) {}
support::endian::Writer W;
@@ -233,16 +235,6 @@ public:
Relocations[Sec].push_back(P);
}
- void recordScatteredRelocation(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- unsigned Log2Size, uint64_t &FixedValue);
-
- void recordTLVPRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue);
-
void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, uint64_t &FixedValue) override;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h
index ca04d8e8d3b6..2e6a84b6861f 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -174,6 +174,10 @@ protected:
/// Section containing metadata on function stack sizes.
MCSection *StackSizesSection = nullptr;
+ /// Section for pseudo probe information used by AutoFDO
+ MCSection *PseudoProbeSection = nullptr;
+ MCSection *PseudoProbeDescSection = nullptr;
+
// ELF specific sections.
MCSection *DataRelROSection = nullptr;
MCSection *MergeableConst4Section = nullptr;
@@ -215,6 +219,7 @@ protected:
MCSection *XDataSection = nullptr;
MCSection *SXDataSection = nullptr;
MCSection *GFIDsSection = nullptr;
+ MCSection *GIATsSection = nullptr;
MCSection *GLJMPSection = nullptr;
// XCOFF specific sections
@@ -249,7 +254,6 @@ public:
MCSection *getDataSection() const { return DataSection; }
MCSection *getBSSSection() const { return BSSSection; }
MCSection *getReadOnlySection() const { return ReadOnlySection; }
- MCSection *getLSDASection() const { return LSDASection; }
MCSection *getCompactUnwindSection() const { return CompactUnwindSection; }
MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
@@ -338,6 +342,12 @@ public:
MCSection *getStackSizesSection(const MCSection &TextSec) const;
+ MCSection *getBBAddrMapSection(const MCSection &TextSec) const;
+
+ MCSection *getPseudoProbeSection(const MCSection *TextSec) const;
+
+ MCSection *getPseudoProbeDescSection(StringRef FuncName) const;
+
// ELF specific sections.
MCSection *getDataRelROSection() const { return DataRelROSection; }
const MCSection *getMergeableConst4Section() const {
@@ -396,14 +406,13 @@ public:
MCSection *getXDataSection() const { return XDataSection; }
MCSection *getSXDataSection() const { return SXDataSection; }
MCSection *getGFIDsSection() const { return GFIDsSection; }
+ MCSection *getGIATsSection() const { return GIATsSection; }
MCSection *getGLJMPSection() const { return GLJMPSection; }
// XCOFF specific sections
MCSection *getTOCBaseSection() const { return TOCBaseSection; }
- MCSection *getEHFrameSection() {
- return EHFrameSection;
- }
+ MCSection *getEHFrameSection() const { return EHFrameSection; }
enum Environment { IsMachO, IsELF, IsCOFF, IsWasm, IsXCOFF };
Environment getObjectFileType() const { return Env; }
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h
index c3f3ae5de921..a00000bc11b6 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -179,6 +179,8 @@ public:
SMLoc Loc = SMLoc()) override;
void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr,
SMLoc Loc = SMLoc()) override;
+ void emitNops(int64_t NumBytes, int64_t ControlledNopLength,
+ SMLoc Loc) override;
void emitFileDirective(StringRef Filename) override;
void emitAddrsig() override;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmLexer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmLexer.h
index 05b3695bc7a0..e187a28f267d 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmLexer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmLexer.h
@@ -56,6 +56,7 @@ private:
bool isAtStartOfComment(const char *Ptr);
bool isAtStatementSeparator(const char *Ptr);
int getNextChar();
+ int peekNextChar();
AsmToken ReturnError(const char *Loc, const std::string &Msg);
AsmToken LexIdentifier();
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index e89abeaac94c..21966d1c742d 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -49,7 +49,11 @@ protected: // Can only create subclasses.
bool SkipSpace = true;
bool AllowAtInIdentifier;
bool IsAtStartOfStatement = true;
+ bool LexMasmHexFloats = false;
bool LexMasmIntegers = false;
+ bool LexMasmStrings = false;
+ bool UseMasmDefaultRadix = false;
+ unsigned DefaultRadix = 10;
AsmCommentConsumer *CommentConsumer = nullptr;
MCAsmLexer();
@@ -147,9 +151,23 @@ public:
this->CommentConsumer = CommentConsumer;
}
- /// Set whether to lex masm-style binary and hex literals. They look like
- /// 0b1101 and 0ABCh respectively.
+ /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified
+ /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]).
void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
+
+ /// Set whether to use masm-style default-radix integer literals. If disabled,
+ /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]).
+ void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; }
+
+ unsigned getMasmDefaultRadix() const { return DefaultRadix; }
+ void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
+
+ /// Set whether to lex masm-style hex float literals, such as 3f800000r.
+ void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; }
+
+ /// Set whether to lex masm-style string literals, such as 'Can''t find file'
+ /// and "This ""value"" not found".
+ void setLexMasmStrings(bool V) { LexMasmStrings = V; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index a68066e0f50b..391a6b0b575e 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -90,6 +90,20 @@ private:
IdKind Kind;
};
+// Generic type information for an assembly object.
+// All sizes measured in bytes.
+struct AsmTypeInfo {
+ StringRef Name;
+ unsigned Size = 0;
+ unsigned ElementSize = 0;
+ unsigned Length = 0;
+};
+
+struct AsmFieldInfo {
+ AsmTypeInfo Type;
+ unsigned Offset = 0;
+};
+
/// Generic Sema callback for assembly parser.
class MCAsmParserSemaCallback {
public:
@@ -170,12 +184,17 @@ public:
virtual bool isParsingMasm() const { return false; }
- virtual bool lookUpField(StringRef Name, StringRef &Type,
- unsigned &Offset) const {
+ virtual bool defineMacro(StringRef Name, StringRef Value) { return true; }
+
+ virtual bool lookUpField(StringRef Name, AsmFieldInfo &Info) const {
+ return true;
+ }
+ virtual bool lookUpField(StringRef Base, StringRef Member,
+ AsmFieldInfo &Info) const {
return true;
}
- virtual bool lookUpField(StringRef Base, StringRef Member, StringRef &Type,
- unsigned &Offset) const {
+
+ virtual bool lookUpType(StringRef Name, AsmTypeInfo &Info) const {
return true;
}
@@ -281,7 +300,8 @@ public:
/// \param Res - The value of the expression. The result is undefined
/// on error.
/// \return - False on success.
- virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) = 0;
+ virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
+ AsmTypeInfo *TypeInfo) = 0;
/// Parse an arbitrary expression, assuming that an initial '(' has
/// already been consumed.
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
index 1d10c66b4201..0a1e50d501e9 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -24,7 +24,6 @@
namespace llvm {
class MCInst;
-class MCParsedAsmOperand;
class MCStreamer;
class MCSubtargetInfo;
template <typename T> class SmallVectorImpl;
@@ -370,7 +369,7 @@ public:
// Target-specific parsing of expression.
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
- return getParser().parsePrimaryExpr(Res, EndLoc);
+ return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
}
virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h b/contrib/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h
new file mode 100644
index 000000000000..b9a6196777de
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -0,0 +1,178 @@
+//===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCPseudoProbe to support the pseudo
+// probe encoding for AutoFDO. Pseudo probes together with their inline context
+// are encoded in a DFS recursive way in the .pseudoprobe sections. For each
+// .pseudoprobe section, the encoded binary data consist of a single or mutiple
+// function records each for one outlined function. A function record has the
+// following format :
+//
+// FUNCTION BODY (one for each outlined function present in the text section)
+// GUID (uint64)
+// GUID of the function
+// NPROBES (ULEB128)
+// Number of probes originating from this function.
+// NUM_INLINED_FUNCTIONS (ULEB128)
+// Number of callees inlined into this function, aka number of
+// first-level inlinees
+// PROBE RECORDS
+// A list of NPROBES entries. Each entry contains:
+// INDEX (ULEB128)
+// TYPE (uint4)
+// 0 - block probe, 1 - indirect call, 2 - direct call
+// ATTRIBUTE (uint3)
+// reserved
+// ADDRESS_TYPE (uint1)
+// 0 - code address, 1 - address delta
+// CODE_ADDRESS (uint64 or ULEB128)
+// code address or address delta, depending on ADDRESS_TYPE
+// INLINED FUNCTION RECORDS
+// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
+// callees. Each record contains:
+// INLINE SITE
+// GUID of the inlinee (uint64)
+// ID of the callsite probe (ULEB128)
+// FUNCTION BODY
+// A FUNCTION BODY entry describing the inlined function.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCPSEUDOPROBE_H
+#define LLVM_MC_MCPSEUDOPROBE_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/MCSection.h"
+#include <functional>
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+class MCStreamer;
+class MCSymbol;
+class MCObjectStreamer;
+
+enum class MCPseudoProbeFlag {
+ // If set, indicates that the probe is encoded as an address delta
+ // instead of a real code address.
+ AddressDelta = 0x1,
+};
+
+/// Instances of this class represent a pseudo probe instance for a pseudo probe
+/// table entry, which is created during a machine instruction is assembled and
+/// uses an address from a temporary label created at the current address in the
+/// current section.
+class MCPseudoProbe {
+ MCSymbol *Label;
+ uint64_t Guid;
+ uint64_t Index;
+ uint8_t Type;
+ uint8_t Attributes;
+
+public:
+ MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
+ uint64_t Attributes)
+ : Label(Label), Guid(Guid), Index(Index), Type(Type),
+ Attributes(Attributes) {
+ assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
+ assert(Attributes <= 0xFF &&
+ "Probe attributes too big to encode, exceeding 2^16");
+ }
+
+ MCSymbol *getLabel() const { return Label; }
+
+ uint64_t getGuid() const { return Guid; }
+
+ uint64_t getIndex() const { return Index; }
+
+ uint8_t getType() const { return Type; }
+
+ uint8_t getAttributes() const { return Attributes; }
+
+ void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
+};
+
+// An inline frame has the form <Guid, ProbeID>
+using InlineSite = std::tuple<uint64_t, uint32_t>;
+using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
+
+// A Tri-tree based data structure to group probes by inline stack.
+// A tree is allocated for a standalone .text section. A fake
+// instance is created as the root of a tree.
+// A real instance of this class is created for each function, either an
+// unlined function that has code in .text section or an inlined function.
+class MCPseudoProbeInlineTree {
+ uint64_t Guid;
+ // Set of probes that come with the function.
+ std::vector<MCPseudoProbe> Probes;
+ // Use std::map for a deterministic output.
+ std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees;
+
+ // Root node has a GUID 0.
+ bool isRoot() { return Guid == 0; }
+ MCPseudoProbeInlineTree *getOrAddNode(InlineSite Site);
+
+public:
+ MCPseudoProbeInlineTree() = default;
+ MCPseudoProbeInlineTree(uint64_t Guid) : Guid(Guid) {}
+ ~MCPseudoProbeInlineTree();
+ void addPseudoProbe(const MCPseudoProbe &Probe,
+ const MCPseudoProbeInlineStack &InlineStack);
+ void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
+};
+
+/// Instances of this class represent the pseudo probes inserted into a compile
+/// unit.
+class MCPseudoProbeSection {
+public:
+ void addPseudoProbe(MCSection *Sec, const MCPseudoProbe &Probe,
+ const MCPseudoProbeInlineStack &InlineStack) {
+ MCProbeDivisions[Sec].addPseudoProbe(Probe, InlineStack);
+ }
+
+ // TODO: Sort by getOrdinal to ensure a determinstic section order
+ using MCProbeDivisionMap = std::map<MCSection *, MCPseudoProbeInlineTree>;
+
+private:
+ // A collection of MCPseudoProbe for each text section. The MCPseudoProbes
+ // are grouped by GUID of the functions where they are from and will be
+ // encoded by groups. In the comdat scenario where a text section really only
+ // contains the code of a function solely, the probes associated with a comdat
+ // function are still grouped by GUIDs due to inlining that can bring probes
+ // from different functions into one function.
+ MCProbeDivisionMap MCProbeDivisions;
+
+public:
+ const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; }
+
+ bool empty() const { return MCProbeDivisions.empty(); }
+
+ void emit(MCObjectStreamer *MCOS);
+};
+
+class MCPseudoProbeTable {
+ // A collection of MCPseudoProbe in the current module grouped by text
+ // sections. MCPseudoProbes will be encoded into a corresponding
+ // .pseudoprobe section. With functions emitted as separate comdats,
+ // a text section really only contains the code of a function solely, and the
+ // probes associated with the text section will be emitted into a standalone
+ // .pseudoprobe section that shares the same comdat group with the function.
+ MCPseudoProbeSection MCProbeSections;
+
+public:
+ static void emit(MCObjectStreamer *MCOS);
+
+ MCPseudoProbeSection &getProbeSections() { return MCProbeSections; }
+
+#ifndef NDEBUG
+ static int DdgPrintIndent;
+#endif
+};
+} // end namespace llvm
+
+#endif // LLVM_MC_MCPSEUDOPROBE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCRegister.h b/contrib/llvm-project/llvm/include/llvm/MC/MCRegister.h
index 1f3c4b8494cc..8bbeab5bef43 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCRegister.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCRegister.h
@@ -20,6 +20,7 @@ using MCPhysReg = uint16_t;
/// Wrapper class representing physical registers. Should be passed by value.
class MCRegister {
+ friend hash_code hash_value(const MCRegister &);
unsigned Reg;
public:
@@ -46,31 +47,26 @@ public:
/// register. StackSlot values do not exist in the MC layer, see
/// Register::isStackSlot() for the more information on them.
///
- /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack
- /// slots, so if a variable may contains a stack slot, always check
- /// isStackSlot() first.
static bool isStackSlot(unsigned Reg) {
- return !(Reg & VirtualRegFlag) &&
- uint32_t(Reg & ~VirtualRegFlag) >= FirstStackSlot;
+ return FirstStackSlot <= Reg && Reg < VirtualRegFlag;
}
/// Return true if the specified register number is in
/// the physical register namespace.
static bool isPhysicalRegister(unsigned Reg) {
- assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
- return Reg >= FirstPhysicalReg && !(Reg & VirtualRegFlag);
- }
-
- /// Return true if the specified register number is in the physical register
- /// namespace.
- bool isPhysical() const {
- return isPhysicalRegister(Reg);
+ return FirstPhysicalReg <= Reg && Reg < FirstStackSlot;
}
constexpr operator unsigned() const {
return Reg;
}
+ /// Check the provided unsigned value is a valid MCRegister.
+ static MCRegister from(unsigned Val) {
+ assert(Val == NoRegister || isPhysicalRegister(Val));
+ return MCRegister(Val);
+ }
+
unsigned id() const {
return Reg;
}
@@ -110,6 +106,9 @@ template<> struct DenseMapInfo<MCRegister> {
}
};
+inline hash_code hash_value(const MCRegister &Reg) {
+ return hash_value(Reg.id());
+}
}
#endif // ifndef LLVM_MC_REGISTER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h b/contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h
index 9864d95d19e0..0c1ac6254ec1 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -675,6 +675,7 @@ public:
MCRegUnitIterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
assert(Reg && "Null register has no regunits");
+ assert(MCRegister::isPhysicalRegister(Reg.id()));
// Decode the RegUnits MCRegisterDesc field.
unsigned RU = MCRI->get(Reg).RegUnits;
unsigned Scale = RU & 15;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSchedule.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSchedule.h
index 66c5659af3a7..ee0e5b4df9f0 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSchedule.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSchedule.h
@@ -205,7 +205,7 @@ struct MCExtraProcessorInfo {
/// subtargets can't be done. Nonetheless, the abstract model is
/// useful. Futhermore, subtargets typically extend this model with processor
/// specific resources to model any hardware features that can be exploited by
-/// sceduling heuristics and aren't sufficiently represented in the abstract.
+/// scheduling heuristics and aren't sufficiently represented in the abstract.
///
/// The abstract pipeline is built around the notion of an "issue point". This
/// is merely a reference point for counting machine cycles. The physical
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h
index eed6b9c2609c..aa39dff07180 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h
@@ -34,22 +34,23 @@ class MCSectionXCOFF final : public MCSection {
XCOFF::StorageMappingClass MappingClass;
XCOFF::SymbolType Type;
- XCOFF::StorageClass StorageClass;
MCSymbolXCOFF *const QualName;
StringRef SymbolTableName;
+ bool MultiSymbolsAllowed;
static constexpr unsigned DefaultAlignVal = 4;
MCSectionXCOFF(StringRef Name, XCOFF::StorageMappingClass SMC,
- XCOFF::SymbolType ST, XCOFF::StorageClass SC, SectionKind K,
- MCSymbolXCOFF *QualName, MCSymbol *Begin,
- StringRef SymbolTableName)
+ XCOFF::SymbolType ST, SectionKind K, MCSymbolXCOFF *QualName,
+ MCSymbol *Begin, StringRef SymbolTableName,
+ bool MultiSymbolsAllowed)
: MCSection(SV_XCOFF, Name, K, Begin), MappingClass(SMC), Type(ST),
- StorageClass(SC), QualName(QualName), SymbolTableName(SymbolTableName) {
+ QualName(QualName), SymbolTableName(SymbolTableName),
+ MultiSymbolsAllowed(MultiSymbolsAllowed) {
assert((ST == XCOFF::XTY_SD || ST == XCOFF::XTY_CM || ST == XCOFF::XTY_ER) &&
"Invalid or unhandled type for csect.");
assert(QualName != nullptr && "QualName is needed.");
- QualName->setStorageClass(SC);
QualName->setRepresentedCsect(this);
+ QualName->setStorageClass(XCOFF::C_HIDEXT);
// A csect is 4 byte aligned by default, except for undefined symbol csects.
if (Type != XCOFF::XTY_ER)
setAlignment(Align(DefaultAlignVal));
@@ -65,7 +66,9 @@ public:
}
XCOFF::StorageMappingClass getMappingClass() const { return MappingClass; }
- XCOFF::StorageClass getStorageClass() const { return StorageClass; }
+ XCOFF::StorageClass getStorageClass() const {
+ return QualName->getStorageClass();
+ }
XCOFF::SymbolType getCSectType() const { return Type; }
MCSymbolXCOFF *getQualNameSymbol() const { return QualName; }
@@ -75,6 +78,7 @@ public:
bool UseCodeAlign() const override;
bool isVirtualSection() const override;
StringRef getSymbolTableName() const { return SymbolTableName; }
+ bool isMultiSymbolsAllowed() const { return MultiSymbolsAllowed; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h
index 484c62538366..cdc728f73772 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h
@@ -13,6 +13,7 @@
#ifndef LLVM_MC_MCSTREAMER_H
#define LLVM_MC_MCSTREAMER_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
@@ -20,6 +21,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWinEH.h"
#include "llvm/Support/Error.h"
@@ -205,6 +207,7 @@ class MCStreamer {
std::vector<std::unique_ptr<WinEH::FrameInfo>> WinFrameInfos;
WinEH::FrameInfo *CurrentWinFrameInfo;
+ size_t CurrentProcWinFrameInfoStartIndex;
/// Tracks an index to represent the order a symbol was emitted in.
/// Zero means we did not emit that symbol.
@@ -214,6 +217,10 @@ class MCStreamer {
/// PushSection.
SmallVector<std::pair<MCSectionSubPair, MCSectionSubPair>, 4> SectionStack;
+ /// Pointer to the parser's SMLoc if available. This is used to provide
+ /// locations for diagnostics.
+ const SMLoc *StartTokLocPtr = nullptr;
+
/// The next unique ID to use when creating a WinCFI-related section (.pdata
/// or .xdata). This ID ensures that we have a one-to-one mapping from
/// code section to unwind info section, which MSVC's incremental linker
@@ -239,6 +246,8 @@ protected:
return CurrentWinFrameInfo;
}
+ virtual void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame);
+
virtual void EmitWindowsUnwindTables();
virtual void emitRawTextImpl(StringRef String);
@@ -258,6 +267,11 @@ public:
TargetStreamer.reset(TS);
}
+ void setStartTokLocPtr(const SMLoc *Loc) { StartTokLocPtr = Loc; }
+ SMLoc getStartTokLoc() const {
+ return StartTokLocPtr ? *StartTokLocPtr : SMLoc();
+ }
+
/// State management
///
virtual void reset();
@@ -442,6 +456,10 @@ public:
/// so we can sort on them later.
void AssignFragment(MCSymbol *Symbol, MCFragment *Fragment);
+ /// Returns the mnemonic for \p MI, if the streamer has access to a
+ /// instruction printer and returns an empty string otherwise.
+ virtual StringRef getMnemonic(MCInst &MI) { return ""; }
+
/// Emit a label for \p Symbol into the current section.
///
/// This corresponds to an assembler statement such as:
@@ -673,6 +691,7 @@ public:
/// Special case of EmitValue that avoids the client having
/// to pass in a MCExpr for constant integers.
virtual void emitIntValue(uint64_t Value, unsigned Size);
+ virtual void emitIntValue(APInt Value);
/// Special case of EmitValue that avoids the client having to pass
/// in a MCExpr for constant integers & prints in Hex format for certain
@@ -777,6 +796,9 @@ public:
virtual void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr,
SMLoc Loc = SMLoc());
+ virtual void emitNops(int64_t NumBytes, int64_t ControlledNopLength,
+ SMLoc Loc);
+
/// Emit NumBytes worth of zeros.
/// This function properly handles data in virtual sections.
void emitZeros(uint64_t NumBytes);
@@ -1028,6 +1050,11 @@ public:
/// Emit the given \p Instruction into the current section.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
+ /// Emit the a pseudo probe into the current section.
+ virtual void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+ uint64_t Attr,
+ const MCPseudoProbeInlineStack &InlineStack);
+
/// Set the bundle alignment mode from now on in the section.
/// The argument is the power of 2 to which the alignment is set. The
/// value 0 means turn the bundle alignment off.
@@ -1050,7 +1077,7 @@ public:
/// Streamer specific finalization.
virtual void finishImpl();
/// Finish emission of machine code.
- void Finish();
+ void Finish(SMLoc EndLoc = SMLoc());
virtual bool mayHaveInstructions(MCSection &Sec) const { return true; }
};
@@ -1059,28 +1086,6 @@ public:
/// timing the assembler front end.
MCStreamer *createNullStreamer(MCContext &Ctx);
-/// Create a machine code streamer which will print out assembly for the native
-/// target, suitable for compiling with a native assembler.
-///
-/// \param InstPrint - If given, the instruction printer to use. If not given
-/// the MCInst representation will be printed. This method takes ownership of
-/// InstPrint.
-///
-/// \param CE - If given, a code emitter to use to show the instruction
-/// encoding inline with the assembly. This method takes ownership of \p CE.
-///
-/// \param TAB - If given, a target asm backend to use to show the fixup
-/// information in conjunction with encoding information. This method takes
-/// ownership of \p TAB.
-///
-/// \param ShowInst - Whether to show the MCInst representation inline with
-/// the assembly.
-MCStreamer *createAsmStreamer(MCContext &Ctx,
- std::unique_ptr<formatted_raw_ostream> OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst);
-
} // end namespace llvm
#endif // LLVM_MC_MCSTREAMER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSubtargetInfo.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSubtargetInfo.h
index 61cbb842502e..2c1072d833fb 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -54,6 +54,7 @@ struct SubtargetFeatureKV {
struct SubtargetSubTypeKV {
const char *Key; ///< K-V key string
FeatureBitArray Implies; ///< K-V bit mask
+ FeatureBitArray TuneImplies; ///< K-V bit mask
const MCSchedModel *SchedModel;
/// Compare routine for std::lower_bound
@@ -74,6 +75,7 @@ struct SubtargetSubTypeKV {
class MCSubtargetInfo {
Triple TargetTriple;
std::string CPU; // CPU being targeted.
+ std::string TuneCPU; // CPU being tuned for.
ArrayRef<SubtargetFeatureKV> ProcFeatures; // Processor feature list
ArrayRef<SubtargetSubTypeKV> ProcDesc; // Processor descriptions
@@ -90,8 +92,8 @@ class MCSubtargetInfo {
public:
MCSubtargetInfo(const MCSubtargetInfo &) = default;
- MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS,
- ArrayRef<SubtargetFeatureKV> PF,
+ MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, ArrayRef<SubtargetFeatureKV> PF,
ArrayRef<SubtargetSubTypeKV> PD,
const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
const MCReadAdvanceEntry *RA, const InstrStage *IS,
@@ -103,6 +105,7 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
StringRef getCPU() const { return CPU; }
+ StringRef getTuneCPU() const { return TuneCPU; }
const FeatureBitset& getFeatureBits() const { return FeatureBits; }
void setFeatureBits(const FeatureBitset &FeatureBits_) {
@@ -118,12 +121,12 @@ protected:
///
/// FIXME: Find a way to stick this in the constructor, since it should only
/// be called during initialization.
- void InitMCProcessorInfo(StringRef CPU, StringRef FS);
+ void InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU, StringRef FS);
public:
- /// Set the features to the default for the given CPU with an appended feature
- /// string.
- void setDefaultFeatures(StringRef CPU, StringRef FS);
+ /// Set the features to the default for the given CPU and TuneCPU, with ano
+ /// appended feature string.
+ void setDefaultFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
/// Toggle a feature and return the re-computed feature bits.
/// This version does not change the implied bits.
@@ -210,15 +213,16 @@ public:
void initInstrItins(InstrItineraryData &InstrItins) const;
/// Resolve a variant scheduling class for the given MCInst and CPU.
- virtual unsigned
- resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI,
- unsigned CPUID) const {
+ virtual unsigned resolveVariantSchedClass(unsigned SchedClass,
+ const MCInst *MI,
+ const MCInstrInfo *MCII,
+ unsigned CPUID) const {
return 0;
}
/// Check whether the CPU string is valid.
bool isCPUStringValid(StringRef CPU) const {
- auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
+ auto Found = llvm::lower_bound(ProcDesc, CPU);
return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h
index 84263bf94035..a83781f5c586 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFragment.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -27,7 +28,6 @@ namespace llvm {
class MCAsmInfo;
class MCContext;
-class MCExpr;
class MCSection;
class raw_ostream;
@@ -94,7 +94,8 @@ protected:
mutable unsigned IsRegistered : 1;
- /// This symbol is visible outside this translation unit.
+ /// True if this symbol is visible outside this translation unit. Note: ELF
+ /// uses binding instead of this bit.
mutable unsigned IsExternal : 1;
/// This symbol is private extern.
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h
index ffd8a7aad312..ae512fd27be2 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -25,6 +25,7 @@ class MCSymbolWasm : public MCSymbol {
Optional<StringRef> ExportName;
wasm::WasmSignature *Signature = nullptr;
Optional<wasm::WasmGlobalType> GlobalType;
+ Optional<wasm::ValType> TableType;
Optional<wasm::WasmEventType> EventType;
/// An expression describing how to calculate the size of a symbol. If a
@@ -42,6 +43,7 @@ public:
bool isFunction() const { return Type == wasm::WASM_SYMBOL_TYPE_FUNCTION; }
bool isData() const { return Type == wasm::WASM_SYMBOL_TYPE_DATA; }
bool isGlobal() const { return Type == wasm::WASM_SYMBOL_TYPE_GLOBAL; }
+ bool isTable() const { return Type == wasm::WASM_SYMBOL_TYPE_TABLE; }
bool isSection() const { return Type == wasm::WASM_SYMBOL_TYPE_SECTION; }
bool isEvent() const { return Type == wasm::WASM_SYMBOL_TYPE_EVENT; }
wasm::WasmSymbolType getType() const { return Type; }
@@ -94,6 +96,15 @@ public:
StringRef getExportName() const { return ExportName.getValue(); }
void setExportName(StringRef Name) { ExportName = Name; }
+ bool isFunctionTable() const {
+ return isTable() && hasTableType() &&
+ getTableType() == wasm::ValType::FUNCREF;
+ }
+ void setFunctionTable() {
+ setType(wasm::WASM_SYMBOL_TYPE_TABLE);
+ setTableType(wasm::ValType::FUNCREF);
+ }
+
void setUsedInGOT() const { IsUsedInGOT = true; }
bool isUsedInGOT() const { return IsUsedInGOT; }
@@ -109,6 +120,13 @@ public:
}
void setGlobalType(wasm::WasmGlobalType GT) { GlobalType = GT; }
+ bool hasTableType() const { return TableType.hasValue(); }
+ wasm::ValType getTableType() const {
+ assert(hasTableType());
+ return TableType.getValue();
+ }
+ void setTableType(wasm::ValType TT) { TableType = TT; }
+
const wasm::WasmEventType &getEventType() const {
assert(EventType.hasValue());
return EventType.getValue();
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h
index d0379ec08b7d..752e1e7bba0f 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h
@@ -35,8 +35,6 @@ public:
}
void setStorageClass(XCOFF::StorageClass SC) {
- assert((!StorageClass.hasValue() || StorageClass.getValue() == SC) &&
- "Redefining StorageClass of XCOFF MCSymbol.");
StorageClass = SC;
};
@@ -48,8 +46,6 @@ public:
StringRef getUnqualifiedName() const { return getUnqualifiedName(getName()); }
- bool hasRepresentedCsectSet() const { return RepresentedCsect != nullptr; }
-
MCSectionXCOFF *getRepresentedCsect() const;
void setRepresentedCsect(MCSectionXCOFF *C);
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h b/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h
index 4b786751dbd1..d29a74905ebf 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h
@@ -22,6 +22,7 @@ enum class ExceptionHandling {
ARM, ///< ARM EHABI
WinEH, ///< Windows Exception Handling
Wasm, ///< WebAssembly Exception Handling
+ AIX, ///< AIX Exception Handling
};
enum class DebugCompressionType {
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCWasmObjectWriter.h b/contrib/llvm-project/llvm/include/llvm/MC/MCWasmObjectWriter.h
index 382818ad6867..00da632bbcc6 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCWasmObjectWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCWasmObjectWriter.h
@@ -52,6 +52,10 @@ std::unique_ptr<MCObjectWriter>
createWasmObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW,
raw_pwrite_stream &OS);
+std::unique_ptr<MCObjectWriter>
+createWasmDwoObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS);
+
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCWasmStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCWasmStreamer.h
index 61075e7a5732..6651f071f799 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCWasmStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCWasmStreamer.h
@@ -59,13 +59,9 @@ public:
SMLoc Loc = SMLoc()) override;
void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment = 0) override;
- void emitValueImpl(const MCExpr *Value, unsigned Size,
- SMLoc Loc = SMLoc()) override;
void emitIdent(StringRef IdentString) override;
- void emitValueToAlignment(unsigned, int64_t, unsigned, unsigned) override;
-
void finishImpl() override;
private:
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCWin64EH.h b/contrib/llvm-project/llvm/include/llvm/MC/MCWin64EH.h
index 60ec06e61b7c..065161d1759e 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCWin64EH.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCWin64EH.h
@@ -53,14 +53,15 @@ struct Instruction {
class UnwindEmitter : public WinEH::UnwindEmitter {
public:
void Emit(MCStreamer &Streamer) const override;
- void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI) const override;
+ void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI,
+ bool HandlerData) const override;
};
class ARM64UnwindEmitter : public WinEH::UnwindEmitter {
public:
void Emit(MCStreamer &Streamer) const override;
- void EmitUnwindInfo(MCStreamer &Streamer,
- WinEH::FrameInfo *FI) const override;
+ void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI,
+ bool HandlerData) const override;
};
}
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index 1236304b9e5d..53b2ef0bd96e 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -58,6 +58,7 @@ public:
unsigned ByteAlignment) override;
void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
+ void emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
void emitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment, SMLoc Loc = SMLoc()) override;
void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCWinEH.h b/contrib/llvm-project/llvm/include/llvm/MC/MCWinEH.h
index b1c28c0ecae7..5688255810d0 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCWinEH.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCWinEH.h
@@ -26,6 +26,14 @@ struct Instruction {
Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off)
: Label(L), Offset(Off), Register(Reg), Operation(Op) {}
+
+ bool operator==(const Instruction &I) const {
+ // Check whether two instructions refer to the same operation
+ // applied at a different spot (i.e. pointing at a different label).
+ return Offset == I.Offset && Register == I.Register &&
+ Operation == I.Operation;
+ }
+ bool operator!=(const Instruction &I) const { return !(*this == I); }
};
struct FrameInfo {
@@ -36,10 +44,12 @@ struct FrameInfo {
const MCSymbol *Function = nullptr;
const MCSymbol *PrologEnd = nullptr;
const MCSymbol *Symbol = nullptr;
- const MCSection *TextSection = nullptr;
+ MCSection *TextSection = nullptr;
+ uint32_t PackedInfo = 0;
bool HandlesUnwind = false;
bool HandlesExceptions = false;
+ bool EmitAttempted = false;
int LastFrameInst = -1;
const FrameInfo *ChainedParent = nullptr;
@@ -53,6 +63,15 @@ struct FrameInfo {
const FrameInfo *ChainedParent)
: Begin(BeginFuncEHLabel), Function(Function),
ChainedParent(ChainedParent) {}
+
+ bool empty() const {
+ if (!Instructions.empty())
+ return false;
+ for (const auto &E : EpilogMap)
+ if (!E.second.empty())
+ return false;
+ return true;
+ }
};
class UnwindEmitter {
@@ -61,7 +80,8 @@ public:
/// This emits the unwind info sections (.pdata and .xdata in PE/COFF).
virtual void Emit(MCStreamer &Streamer) const = 0;
- virtual void EmitUnwindInfo(MCStreamer &Streamer, FrameInfo *FI) const = 0;
+ virtual void EmitUnwindInfo(MCStreamer &Streamer, FrameInfo *FI,
+ bool HandlerData) const = 0;
};
}
}
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/StringTableBuilder.h b/contrib/llvm-project/llvm/include/llvm/MC/StringTableBuilder.h
index d8bfac03f7f2..3f9c91be05d3 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/StringTableBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/StringTableBuilder.h
@@ -22,7 +22,17 @@ class raw_ostream;
/// Utility for building string tables with deduplicated suffixes.
class StringTableBuilder {
public:
- enum Kind { ELF, WinCOFF, MachO, RAW, DWARF, XCOFF };
+ enum Kind {
+ ELF,
+ WinCOFF,
+ MachO,
+ MachO64,
+ MachOLinked,
+ MachO64Linked,
+ RAW,
+ DWARF,
+ XCOFF
+ };
private:
DenseMap<CachedHashStringRef, size_t> StringIndexMap;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/SubtargetFeature.h b/contrib/llvm-project/llvm/include/llvm/MC/SubtargetFeature.h
index 01ea794a4bc3..cc36b25a4965 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/SubtargetFeature.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/SubtargetFeature.h
@@ -30,7 +30,7 @@ namespace llvm {
class raw_ostream;
class Triple;
-const unsigned MAX_SUBTARGET_WORDS = 3;
+const unsigned MAX_SUBTARGET_WORDS = 4;
const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64;
/// Container class for subtarget features.
diff --git a/contrib/llvm-project/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/contrib/llvm-project/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
index 6c196757e571..0293364e26ef 100644
--- a/contrib/llvm-project/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
+++ b/contrib/llvm-project/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -267,9 +267,9 @@ public:
// This routine performs a sanity check. This routine should only be called
// when we know that 'IR' is not in the scheduler's instruction queues.
void sanityCheck(const InstRef &IR) const {
- assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!");
- assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!");
- assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!");
+ assert(!is_contained(WaitSet, IR) && "Already in the wait set!");
+ assert(!is_contained(ReadySet, IR) && "Already in the ready set!");
+ assert(!is_contained(IssuedSet, IR) && "Already executing!");
}
#endif // !NDEBUG
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ArchiveWriter.h b/contrib/llvm-project/llvm/include/llvm/Object/ArchiveWriter.h
index 274ffd90c05a..7eaf13e8fb22 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/ArchiveWriter.h
@@ -39,6 +39,12 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
bool WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr);
+
+// writeArchiveToBuffer is similar to writeArchive but returns the Archive in a
+// buffer instead of writing it out to a file.
+Expected<std::unique_ptr<MemoryBuffer>>
+writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab,
+ object::Archive::Kind Kind, bool Deterministic, bool Thin);
}
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/Binary.h b/contrib/llvm-project/llvm/include/llvm/Object/Binary.h
index e95516f30a40..dd98e1143e25 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/Binary.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/Binary.h
@@ -91,6 +91,8 @@ public:
Binary(const Binary &other) = delete;
virtual ~Binary();
+ virtual Error initContent() { return Error::success(); };
+
StringRef getData() const;
StringRef getFileName() const;
MemoryBufferRef getMemoryBufferRef() const;
@@ -163,8 +165,8 @@ public:
static Error checkOffset(MemoryBufferRef M, uintptr_t Addr,
const uint64_t Size) {
if (Addr + Size < Addr || Addr + Size < Size ||
- Addr + Size > uintptr_t(M.getBufferEnd()) ||
- Addr < uintptr_t(M.getBufferStart())) {
+ Addr + Size > reinterpret_cast<uintptr_t>(M.getBufferEnd()) ||
+ Addr < reinterpret_cast<uintptr_t>(M.getBufferStart())) {
return errorCodeToError(object_error::unexpected_eof);
}
return Error::success();
@@ -178,7 +180,8 @@ DEFINE_ISA_CONVERSION_FUNCTIONS(Binary, LLVMBinaryRef)
///
/// @param Source The data to create the Binary from.
Expected<std::unique_ptr<Binary>> createBinary(MemoryBufferRef Source,
- LLVMContext *Context = nullptr);
+ LLVMContext *Context = nullptr,
+ bool InitContent = true);
template <typename T> class OwningBinary {
std::unique_ptr<T> Bin;
@@ -228,7 +231,9 @@ template <typename T> const T* OwningBinary<T>::getBinary() const {
return Bin.get();
}
-Expected<OwningBinary<Binary>> createBinary(StringRef Path);
+Expected<OwningBinary<Binary>> createBinary(StringRef Path,
+ LLVMContext *Context = nullptr,
+ bool InitContent = true);
} // end namespace object
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/COFF.h b/contrib/llvm-project/llvm/include/llvm/Object/COFF.h
index 8aef00a8809d..e7cf1b5495c6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/COFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/COFF.h
@@ -576,11 +576,22 @@ struct coff_tls_directory {
uint32_t getAlignment() const {
// Bit [20:24] contains section alignment.
- uint32_t Shift = (Characteristics & 0x00F00000) >> 20;
+ uint32_t Shift = (Characteristics & COFF::IMAGE_SCN_ALIGN_MASK) >> 20;
if (Shift > 0)
return 1U << (Shift - 1);
return 0;
}
+
+ void setAlignment(uint32_t Align) {
+ uint32_t AlignBits = 0;
+ if (Align) {
+ assert(llvm::isPowerOf2_32(Align) && "alignment is not a power of 2");
+ assert(llvm::Log2_32(Align) <= 13 && "alignment requested is too large");
+ AlignBits = (llvm::Log2_32(Align) + 1) << 20;
+ }
+ Characteristics =
+ (Characteristics & ~COFF::IMAGE_SCN_ALIGN_MASK) | AlignBits;
+ }
};
using coff_tls_directory32 = coff_tls_directory<support::little32_t>;
@@ -786,6 +797,8 @@ private:
const coff_base_reloc_block_header *BaseRelocEnd;
const debug_directory *DebugDirectoryBegin;
const debug_directory *DebugDirectoryEnd;
+ const coff_tls_directory32 *TLSDirectory32;
+ const coff_tls_directory64 *TLSDirectory64;
// Either coff_load_configuration32 or coff_load_configuration64.
const void *LoadConfig = nullptr;
@@ -805,6 +818,7 @@ private:
Error initExportTablePtr();
Error initBaseRelocPtr();
Error initDebugDirectoryPtr();
+ Error initTLSDirectoryPtr();
Error initLoadConfigPtr();
public:
@@ -976,6 +990,13 @@ public:
return make_range(debug_directory_begin(), debug_directory_end());
}
+ const coff_tls_directory32 *getTLSDirectory32() const {
+ return TLSDirectory32;
+ }
+ const coff_tls_directory64 *getTLSDirectory64() const {
+ return TLSDirectory64;
+ }
+
const dos_header *getDOSHeader() const {
if (!PE32Header && !PE32PlusHeader)
return nullptr;
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ELF.h b/contrib/llvm-project/llvm/include/llvm/Object/ELF.h
index b44dd3f48661..447b4c25ce81 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/ELF.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/ELF.h
@@ -30,6 +30,43 @@
namespace llvm {
namespace object {
+struct VerdAux {
+ unsigned Offset;
+ std::string Name;
+};
+
+struct VerDef {
+ unsigned Offset;
+ unsigned Version;
+ unsigned Flags;
+ unsigned Ndx;
+ unsigned Cnt;
+ unsigned Hash;
+ std::string Name;
+ std::vector<VerdAux> AuxV;
+};
+
+struct VernAux {
+ unsigned Hash;
+ unsigned Flags;
+ unsigned Other;
+ unsigned Offset;
+ std::string Name;
+};
+
+struct VerNeed {
+ unsigned Version;
+ unsigned Cnt;
+ unsigned Offset;
+ std::string File;
+ std::vector<VernAux> AuxV;
+};
+
+struct VersionEntry {
+ std::string Name;
+ bool IsVerDef;
+};
+
StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type);
uint32_t getELFRelativeRelocationType(uint32_t Machine);
StringRef getELFSectionTypeName(uint32_t Machine, uint32_t Type);
@@ -48,14 +85,51 @@ static inline Error createError(const Twine &Err) {
return make_error<StringError>(Err, object_error::parse_failed);
}
+enum PPCInstrMasks : uint64_t {
+ PADDI_R12_NO_DISP = 0x0610000039800000,
+ PLD_R12_NO_DISP = 0x04100000E5800000,
+ MTCTR_R12 = 0x7D8903A6,
+ BCTR = 0x4E800420,
+};
+
template <class ELFT> class ELFFile;
+template <class T> struct DataRegion {
+ // This constructor is used when we know the start and the size of a data
+ // region. We assume that Arr does not go past the end of the file.
+ DataRegion(ArrayRef<T> Arr) : First(Arr.data()), Size(Arr.size()) {}
+
+ // Sometimes we only know the start of a data region. We still don't want to
+ // read past the end of the file, so we provide the end of a buffer.
+ DataRegion(const T *Data, const uint8_t *BufferEnd)
+ : First(Data), BufEnd(BufferEnd) {}
+
+ Expected<T> operator[](uint64_t N) {
+ assert(Size || BufEnd);
+ if (Size) {
+ if (N >= *Size)
+ return createError(
+ "the index is greater than or equal to the number of entries (" +
+ Twine(*Size) + ")");
+ } else {
+ const uint8_t *EntryStart = (const uint8_t *)First + N * sizeof(T);
+ if (EntryStart + sizeof(T) > BufEnd)
+ return createError("can't read past the end of the file");
+ }
+ return *(First + N);
+ }
+
+ const T *First;
+ Optional<uint64_t> Size = None;
+ const uint8_t *BufEnd = nullptr;
+};
+
template <class ELFT>
-std::string getSecIndexForError(const ELFFile<ELFT> *Obj,
- const typename ELFT::Shdr *Sec) {
- auto TableOrErr = Obj->sections();
+std::string getSecIndexForError(const ELFFile<ELFT> &Obj,
+ const typename ELFT::Shdr &Sec) {
+ auto TableOrErr = Obj.sections();
if (TableOrErr)
- return "[index " + std::to_string(Sec - &TableOrErr->front()) + "]";
+ return "[index " + std::to_string(&Sec - &TableOrErr->front()) + "]";
// To make this helper be more convenient for error reporting purposes we
// drop the error. But really it should never be triggered. Before this point,
// our code should have called 'sections()' and reported a proper error on
@@ -65,11 +139,21 @@ std::string getSecIndexForError(const ELFFile<ELFT> *Obj,
}
template <class ELFT>
-std::string getPhdrIndexForError(const ELFFile<ELFT> *Obj,
- const typename ELFT::Phdr *Phdr) {
- auto Headers = Obj->program_headers();
+static std::string describe(const ELFFile<ELFT> &Obj,
+ const typename ELFT::Shdr &Sec) {
+ unsigned SecNdx = &Sec - &cantFail(Obj.sections()).front();
+ return (object::getELFSectionTypeName(Obj.getHeader().e_machine,
+ Sec.sh_type) +
+ " section with index " + Twine(SecNdx))
+ .str();
+}
+
+template <class ELFT>
+std::string getPhdrIndexForError(const ELFFile<ELFT> &Obj,
+ const typename ELFT::Phdr &Phdr) {
+ auto Headers = Obj.program_headers();
if (Headers)
- return ("[index " + Twine(Phdr - &Headers->front()) + "]").str();
+ return ("[index " + Twine(&Phdr - &Headers->front()) + "]").str();
// See comment in the getSecIndexForError() above.
llvm::consumeError(Headers.takeError());
return "[unknown index]";
@@ -83,32 +167,6 @@ template <class ELFT>
class ELFFile {
public:
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
- using uintX_t = typename ELFT::uint;
- using Elf_Ehdr = typename ELFT::Ehdr;
- using Elf_Shdr = typename ELFT::Shdr;
- using Elf_Sym = typename ELFT::Sym;
- using Elf_Dyn = typename ELFT::Dyn;
- using Elf_Phdr = typename ELFT::Phdr;
- using Elf_Rel = typename ELFT::Rel;
- using Elf_Rela = typename ELFT::Rela;
- using Elf_Relr = typename ELFT::Relr;
- using Elf_Verdef = typename ELFT::Verdef;
- using Elf_Verdaux = typename ELFT::Verdaux;
- using Elf_Verneed = typename ELFT::Verneed;
- using Elf_Vernaux = typename ELFT::Vernaux;
- using Elf_Versym = typename ELFT::Versym;
- using Elf_Hash = typename ELFT::Hash;
- using Elf_GnuHash = typename ELFT::GnuHash;
- using Elf_Nhdr = typename ELFT::Nhdr;
- using Elf_Note = typename ELFT::Note;
- using Elf_Note_Iterator = typename ELFT::NoteIterator;
- using Elf_Dyn_Range = typename ELFT::DynRange;
- using Elf_Shdr_Range = typename ELFT::ShdrRange;
- using Elf_Sym_Range = typename ELFT::SymRange;
- using Elf_Rel_Range = typename ELFT::RelRange;
- using Elf_Rela_Range = typename ELFT::RelaRange;
- using Elf_Relr_Range = typename ELFT::RelrRange;
- using Elf_Phdr_Range = typename ELFT::PhdrRange;
// This is a callback that can be passed to a number of functions.
// It can be used to ignore non-critical errors (warnings), which is
@@ -118,6 +176,7 @@ public:
using WarningHandler = llvm::function_ref<Error(const Twine &Msg)>;
const uint8_t *base() const { return Buf.bytes_begin(); }
+ const uint8_t *end() const { return base() + getBufSize(); }
size_t getBufSize() const { return Buf.size(); }
@@ -127,26 +186,39 @@ private:
ELFFile(StringRef Object);
public:
- const Elf_Ehdr *getHeader() const {
- return reinterpret_cast<const Elf_Ehdr *>(base());
+ const Elf_Ehdr &getHeader() const {
+ return *reinterpret_cast<const Elf_Ehdr *>(base());
}
template <typename T>
Expected<const T *> getEntry(uint32_t Section, uint32_t Entry) const;
template <typename T>
- Expected<const T *> getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
+ Expected<const T *> getEntry(const Elf_Shdr &Section, uint32_t Entry) const;
+
+ Expected<std::vector<VerDef>>
+ getVersionDefinitions(const Elf_Shdr &Sec) const;
+ Expected<std::vector<VerNeed>> getVersionDependencies(
+ const Elf_Shdr &Sec,
+ WarningHandler WarnHandler = &defaultWarningHandler) const;
+ Expected<StringRef>
+ getSymbolVersionByIndex(uint32_t SymbolVersionIndex, bool &IsDefault,
+ SmallVector<Optional<VersionEntry>, 0> &VersionMap,
+ Optional<bool> IsSymHidden) const;
Expected<StringRef>
- getStringTable(const Elf_Shdr *Section,
+ getStringTable(const Elf_Shdr &Section,
WarningHandler WarnHandler = &defaultWarningHandler) const;
Expected<StringRef> getStringTableForSymtab(const Elf_Shdr &Section) const;
Expected<StringRef> getStringTableForSymtab(const Elf_Shdr &Section,
Elf_Shdr_Range Sections) const;
+ Expected<StringRef> getLinkAsStrtab(const typename ELFT::Shdr &Sec) const;
Expected<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section) const;
Expected<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section,
Elf_Shdr_Range Sections) const;
+ Expected<uint64_t> getDynSymtabSize() const;
+
StringRef getRelocationTypeName(uint32_t Type) const;
void getRelocationTypeName(uint32_t Type,
SmallVectorImpl<char> &Result) const;
@@ -156,18 +228,21 @@ public:
std::string getDynamicTagAsString(uint64_t Type) const;
/// Get the symbol for a given relocation.
- Expected<const Elf_Sym *> getRelocationSymbol(const Elf_Rel *Rel,
+ Expected<const Elf_Sym *> getRelocationSymbol(const Elf_Rel &Rel,
const Elf_Shdr *SymTab) const;
+ Expected<SmallVector<Optional<VersionEntry>, 0>>
+ loadVersionMap(const Elf_Shdr *VerNeedSec, const Elf_Shdr *VerDefSec) const;
+
static Expected<ELFFile> create(StringRef Object);
bool isLE() const {
- return getHeader()->getDataEncoding() == ELF::ELFDATA2LSB;
+ return getHeader().getDataEncoding() == ELF::ELFDATA2LSB;
}
bool isMipsELF64() const {
- return getHeader()->e_machine == ELF::EM_MIPS &&
- getHeader()->getFileClass() == ELF::ELFCLASS64;
+ return getHeader().e_machine == ELF::EM_MIPS &&
+ getHeader().getFileClass() == ELF::ELFCLASS64;
}
bool isMips64EL() const { return isMipsELF64() && isLE(); }
@@ -176,48 +251,50 @@ public:
Expected<Elf_Dyn_Range> dynamicEntries() const;
- Expected<const uint8_t *> toMappedAddr(uint64_t VAddr) const;
+ Expected<const uint8_t *>
+ toMappedAddr(uint64_t VAddr,
+ WarningHandler WarnHandler = &defaultWarningHandler) const;
Expected<Elf_Sym_Range> symbols(const Elf_Shdr *Sec) const {
if (!Sec)
return makeArrayRef<Elf_Sym>(nullptr, nullptr);
- return getSectionContentsAsArray<Elf_Sym>(Sec);
+ return getSectionContentsAsArray<Elf_Sym>(*Sec);
}
- Expected<Elf_Rela_Range> relas(const Elf_Shdr *Sec) const {
+ Expected<Elf_Rela_Range> relas(const Elf_Shdr &Sec) const {
return getSectionContentsAsArray<Elf_Rela>(Sec);
}
- Expected<Elf_Rel_Range> rels(const Elf_Shdr *Sec) const {
+ Expected<Elf_Rel_Range> rels(const Elf_Shdr &Sec) const {
return getSectionContentsAsArray<Elf_Rel>(Sec);
}
- Expected<Elf_Relr_Range> relrs(const Elf_Shdr *Sec) const {
+ Expected<Elf_Relr_Range> relrs(const Elf_Shdr &Sec) const {
return getSectionContentsAsArray<Elf_Relr>(Sec);
}
- Expected<std::vector<Elf_Rela>> decode_relrs(Elf_Relr_Range relrs) const;
+ std::vector<Elf_Rel> decode_relrs(Elf_Relr_Range relrs) const;
- Expected<std::vector<Elf_Rela>> android_relas(const Elf_Shdr *Sec) const;
+ Expected<std::vector<Elf_Rela>> android_relas(const Elf_Shdr &Sec) const;
/// Iterate over program header table.
Expected<Elf_Phdr_Range> program_headers() const {
- if (getHeader()->e_phnum && getHeader()->e_phentsize != sizeof(Elf_Phdr))
+ if (getHeader().e_phnum && getHeader().e_phentsize != sizeof(Elf_Phdr))
return createError("invalid e_phentsize: " +
- Twine(getHeader()->e_phentsize));
+ Twine(getHeader().e_phentsize));
uint64_t HeadersSize =
- (uint64_t)getHeader()->e_phnum * getHeader()->e_phentsize;
- uint64_t PhOff = getHeader()->e_phoff;
+ (uint64_t)getHeader().e_phnum * getHeader().e_phentsize;
+ uint64_t PhOff = getHeader().e_phoff;
if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize())
return createError("program headers are longer than binary of size " +
Twine(getBufSize()) + ": e_phoff = 0x" +
- Twine::utohexstr(getHeader()->e_phoff) +
- ", e_phnum = " + Twine(getHeader()->e_phnum) +
- ", e_phentsize = " + Twine(getHeader()->e_phentsize));
+ Twine::utohexstr(getHeader().e_phoff) +
+ ", e_phnum = " + Twine(getHeader().e_phnum) +
+ ", e_phentsize = " + Twine(getHeader().e_phentsize));
auto *Begin = reinterpret_cast<const Elf_Phdr *>(base() + PhOff);
- return makeArrayRef(Begin, Begin + getHeader()->e_phnum);
+ return makeArrayRef(Begin, Begin + getHeader().e_phnum);
}
/// Get an iterator over notes in a program header.
@@ -231,9 +308,9 @@ public:
assert(Phdr.p_type == ELF::PT_NOTE && "Phdr is not of type PT_NOTE");
ErrorAsOutParameter ErrAsOutParam(&Err);
if (Phdr.p_offset + Phdr.p_filesz > getBufSize()) {
- Err = createError("PT_NOTE header has invalid offset (0x" +
- Twine::utohexstr(Phdr.p_offset) + ") or size (0x" +
- Twine::utohexstr(Phdr.p_filesz) + ")");
+ Err =
+ createError("invalid offset (0x" + Twine::utohexstr(Phdr.p_offset) +
+ ") or size (0x" + Twine::utohexstr(Phdr.p_filesz) + ")");
return Elf_Note_Iterator(Err);
}
return Elf_Note_Iterator(base() + Phdr.p_offset, Phdr.p_filesz, Err);
@@ -250,10 +327,9 @@ public:
assert(Shdr.sh_type == ELF::SHT_NOTE && "Shdr is not of type SHT_NOTE");
ErrorAsOutParameter ErrAsOutParam(&Err);
if (Shdr.sh_offset + Shdr.sh_size > getBufSize()) {
- Err = createError("SHT_NOTE section " + getSecIndexForError(this, &Shdr) +
- " has invalid offset (0x" +
- Twine::utohexstr(Shdr.sh_offset) + ") or size (0x" +
- Twine::utohexstr(Shdr.sh_size) + ")");
+ Err =
+ createError("invalid offset (0x" + Twine::utohexstr(Shdr.sh_offset) +
+ ") or size (0x" + Twine::utohexstr(Shdr.sh_size) + ")");
return Elf_Note_Iterator(Err);
}
return Elf_Note_Iterator(base() + Shdr.sh_offset, Shdr.sh_size, Err);
@@ -291,28 +367,28 @@ public:
Expected<StringRef> getSectionStringTable(
Elf_Shdr_Range Sections,
WarningHandler WarnHandler = &defaultWarningHandler) const;
- Expected<uint32_t> getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms,
- ArrayRef<Elf_Word> ShndxTable) const;
- Expected<const Elf_Shdr *> getSection(const Elf_Sym *Sym,
+ Expected<uint32_t> getSectionIndex(const Elf_Sym &Sym, Elf_Sym_Range Syms,
+ DataRegion<Elf_Word> ShndxTable) const;
+ Expected<const Elf_Shdr *> getSection(const Elf_Sym &Sym,
const Elf_Shdr *SymTab,
- ArrayRef<Elf_Word> ShndxTable) const;
- Expected<const Elf_Shdr *> getSection(const Elf_Sym *Sym,
+ DataRegion<Elf_Word> ShndxTable) const;
+ Expected<const Elf_Shdr *> getSection(const Elf_Sym &Sym,
Elf_Sym_Range Symtab,
- ArrayRef<Elf_Word> ShndxTable) const;
+ DataRegion<Elf_Word> ShndxTable) const;
Expected<const Elf_Shdr *> getSection(uint32_t Index) const;
Expected<const Elf_Sym *> getSymbol(const Elf_Shdr *Sec,
uint32_t Index) const;
Expected<StringRef>
- getSectionName(const Elf_Shdr *Section,
+ getSectionName(const Elf_Shdr &Section,
WarningHandler WarnHandler = &defaultWarningHandler) const;
- Expected<StringRef> getSectionName(const Elf_Shdr *Section,
+ Expected<StringRef> getSectionName(const Elf_Shdr &Section,
StringRef DotShstrtab) const;
template <typename T>
- Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr *Sec) const;
- Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr *Sec) const;
- Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr *Phdr) const;
+ Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr &Sec) const;
+ Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr &Sec) const;
+ Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr &Phdr) const;
};
using ELF32LEFile = ELFFile<ELF32LE>;
@@ -330,29 +406,30 @@ getSection(typename ELFT::ShdrRange Sections, uint32_t Index) {
template <class ELFT>
inline Expected<uint32_t>
-getExtendedSymbolTableIndex(const typename ELFT::Sym *Sym,
- const typename ELFT::Sym *FirstSym,
- ArrayRef<typename ELFT::Word> ShndxTable) {
- assert(Sym->st_shndx == ELF::SHN_XINDEX);
- unsigned Index = Sym - FirstSym;
- if (Index >= ShndxTable.size())
+getExtendedSymbolTableIndex(const typename ELFT::Sym &Sym, unsigned SymIndex,
+ DataRegion<typename ELFT::Word> ShndxTable) {
+ assert(Sym.st_shndx == ELF::SHN_XINDEX);
+ if (!ShndxTable.First)
return createError(
- "extended symbol index (" + Twine(Index) +
- ") is past the end of the SHT_SYMTAB_SHNDX section of size " +
- Twine(ShndxTable.size()));
+ "found an extended symbol index (" + Twine(SymIndex) +
+ "), but unable to locate the extended symbol index table");
- // The size of the table was checked in getSHNDXTable.
- return ShndxTable[Index];
+ Expected<typename ELFT::Word> TableOrErr = ShndxTable[SymIndex];
+ if (!TableOrErr)
+ return createError("unable to read an extended symbol table at index " +
+ Twine(SymIndex) + ": " +
+ toString(TableOrErr.takeError()));
+ return *TableOrErr;
}
template <class ELFT>
Expected<uint32_t>
-ELFFile<ELFT>::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms,
- ArrayRef<Elf_Word> ShndxTable) const {
- uint32_t Index = Sym->st_shndx;
+ELFFile<ELFT>::getSectionIndex(const Elf_Sym &Sym, Elf_Sym_Range Syms,
+ DataRegion<Elf_Word> ShndxTable) const {
+ uint32_t Index = Sym.st_shndx;
if (Index == ELF::SHN_XINDEX) {
- auto ErrorOrIndex = getExtendedSymbolTableIndex<ELFT>(
- Sym, Syms.begin(), ShndxTable);
+ Expected<uint32_t> ErrorOrIndex =
+ getExtendedSymbolTableIndex<ELFT>(Sym, &Sym - Syms.begin(), ShndxTable);
if (!ErrorOrIndex)
return ErrorOrIndex.takeError();
return *ErrorOrIndex;
@@ -364,8 +441,8 @@ ELFFile<ELFT>::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms,
template <class ELFT>
Expected<const typename ELFT::Shdr *>
-ELFFile<ELFT>::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
- ArrayRef<Elf_Word> ShndxTable) const {
+ELFFile<ELFT>::getSection(const Elf_Sym &Sym, const Elf_Shdr *SymTab,
+ DataRegion<Elf_Word> ShndxTable) const {
auto SymsOrErr = symbols(SymTab);
if (!SymsOrErr)
return SymsOrErr.takeError();
@@ -374,8 +451,8 @@ ELFFile<ELFT>::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
template <class ELFT>
Expected<const typename ELFT::Shdr *>
-ELFFile<ELFT>::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols,
- ArrayRef<Elf_Word> ShndxTable) const {
+ELFFile<ELFT>::getSection(const Elf_Sym &Sym, Elf_Sym_Range Symbols,
+ DataRegion<Elf_Word> ShndxTable) const {
auto IndexOrErr = getSectionIndex(Sym, Symbols, ShndxTable);
if (!IndexOrErr)
return IndexOrErr.takeError();
@@ -395,7 +472,7 @@ ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
Elf_Sym_Range Symbols = *SymsOrErr;
if (Index >= Symbols.size())
return createError("unable to get symbol from section " +
- getSecIndexForError(this, Sec) +
+ getSecIndexForError(*this, *Sec) +
": invalid symbol index (" + Twine(Index) + ")");
return &Symbols[Index];
}
@@ -403,26 +480,27 @@ ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
template <class ELFT>
template <typename T>
Expected<ArrayRef<T>>
-ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const {
- if (Sec->sh_entsize != sizeof(T) && sizeof(T) != 1)
- return createError("section " + getSecIndexForError(this, Sec) +
- " has an invalid sh_entsize: " + Twine(Sec->sh_entsize));
+ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr &Sec) const {
+ if (Sec.sh_entsize != sizeof(T) && sizeof(T) != 1)
+ return createError("section " + getSecIndexForError(*this, Sec) +
+ " has invalid sh_entsize: expected " + Twine(sizeof(T)) +
+ ", but got " + Twine(Sec.sh_entsize));
- uintX_t Offset = Sec->sh_offset;
- uintX_t Size = Sec->sh_size;
+ uintX_t Offset = Sec.sh_offset;
+ uintX_t Size = Sec.sh_size;
if (Size % sizeof(T))
- return createError("section " + getSecIndexForError(this, Sec) +
+ return createError("section " + getSecIndexForError(*this, Sec) +
" has an invalid sh_size (" + Twine(Size) +
") which is not a multiple of its sh_entsize (" +
- Twine(Sec->sh_entsize) + ")");
+ Twine(Sec.sh_entsize) + ")");
if (std::numeric_limits<uintX_t>::max() - Offset < Size)
- return createError("section " + getSecIndexForError(this, Sec) +
+ return createError("section " + getSecIndexForError(*this, Sec) +
" has a sh_offset (0x" + Twine::utohexstr(Offset) +
") + sh_size (0x" + Twine::utohexstr(Size) +
") that cannot be represented");
if (Offset + Size > Buf.size())
- return createError("section " + getSecIndexForError(this, Sec) +
+ return createError("section " + getSecIndexForError(*this, Sec) +
" has a sh_offset (0x" + Twine::utohexstr(Offset) +
") + sh_size (0x" + Twine::utohexstr(Size) +
") that is greater than the file size (0x" +
@@ -438,17 +516,17 @@ ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const {
template <class ELFT>
Expected<ArrayRef<uint8_t>>
-ELFFile<ELFT>::getSegmentContents(const Elf_Phdr *Phdr) const {
- uintX_t Offset = Phdr->p_offset;
- uintX_t Size = Phdr->p_filesz;
+ELFFile<ELFT>::getSegmentContents(const Elf_Phdr &Phdr) const {
+ uintX_t Offset = Phdr.p_offset;
+ uintX_t Size = Phdr.p_filesz;
if (std::numeric_limits<uintX_t>::max() - Offset < Size)
- return createError("program header " + getPhdrIndexForError(this, Phdr) +
+ return createError("program header " + getPhdrIndexForError(*this, Phdr) +
" has a p_offset (0x" + Twine::utohexstr(Offset) +
") + p_filesz (0x" + Twine::utohexstr(Size) +
") that cannot be represented");
if (Offset + Size > Buf.size())
- return createError("program header " + getPhdrIndexForError(this, Phdr) +
+ return createError("program header " + getPhdrIndexForError(*this, Phdr) +
" has a p_offset (0x" + Twine::utohexstr(Offset) +
") + p_filesz (0x" + Twine::utohexstr(Size) +
") that is greater than the file size (0x" +
@@ -458,13 +536,13 @@ ELFFile<ELFT>::getSegmentContents(const Elf_Phdr *Phdr) const {
template <class ELFT>
Expected<ArrayRef<uint8_t>>
-ELFFile<ELFT>::getSectionContents(const Elf_Shdr *Sec) const {
+ELFFile<ELFT>::getSectionContents(const Elf_Shdr &Sec) const {
return getSectionContentsAsArray<uint8_t>(Sec);
}
template <class ELFT>
StringRef ELFFile<ELFT>::getRelocationTypeName(uint32_t Type) const {
- return getELFRelocationTypeName(getHeader()->e_machine, Type);
+ return getELFRelocationTypeName(getHeader().e_machine, Type);
}
template <class ELFT>
@@ -500,24 +578,61 @@ void ELFFile<ELFT>::getRelocationTypeName(uint32_t Type,
template <class ELFT>
uint32_t ELFFile<ELFT>::getRelativeRelocationType() const {
- return getELFRelativeRelocationType(getHeader()->e_machine);
+ return getELFRelativeRelocationType(getHeader().e_machine);
+}
+
+template <class ELFT>
+Expected<SmallVector<Optional<VersionEntry>, 0>>
+ELFFile<ELFT>::loadVersionMap(const Elf_Shdr *VerNeedSec,
+ const Elf_Shdr *VerDefSec) const {
+ SmallVector<Optional<VersionEntry>, 0> VersionMap;
+
+ // The first two version indexes are reserved.
+ // Index 0 is VER_NDX_LOCAL, index 1 is VER_NDX_GLOBAL.
+ VersionMap.push_back(VersionEntry());
+ VersionMap.push_back(VersionEntry());
+
+ auto InsertEntry = [&](unsigned N, StringRef Version, bool IsVerdef) {
+ if (N >= VersionMap.size())
+ VersionMap.resize(N + 1);
+ VersionMap[N] = {std::string(Version), IsVerdef};
+ };
+
+ if (VerDefSec) {
+ Expected<std::vector<VerDef>> Defs = getVersionDefinitions(*VerDefSec);
+ if (!Defs)
+ return Defs.takeError();
+ for (const VerDef &Def : *Defs)
+ InsertEntry(Def.Ndx & ELF::VERSYM_VERSION, Def.Name, true);
+ }
+
+ if (VerNeedSec) {
+ Expected<std::vector<VerNeed>> Deps = getVersionDependencies(*VerNeedSec);
+ if (!Deps)
+ return Deps.takeError();
+ for (const VerNeed &Dep : *Deps)
+ for (const VernAux &Aux : Dep.AuxV)
+ InsertEntry(Aux.Other & ELF::VERSYM_VERSION, Aux.Name, false);
+ }
+
+ return VersionMap;
}
template <class ELFT>
Expected<const typename ELFT::Sym *>
-ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel *Rel,
+ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel &Rel,
const Elf_Shdr *SymTab) const {
- uint32_t Index = Rel->getSymbol(isMips64EL());
+ uint32_t Index = Rel.getSymbol(isMips64EL());
if (Index == 0)
return nullptr;
- return getEntry<Elf_Sym>(SymTab, Index);
+ return getEntry<Elf_Sym>(*SymTab, Index);
}
template <class ELFT>
Expected<StringRef>
ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
WarningHandler WarnHandler) const {
- uint32_t Index = getHeader()->e_shstrndx;
+ uint32_t Index = getHeader().e_shstrndx;
if (Index == ELF::SHN_XINDEX) {
// If the section name string table section index is greater than
// or equal to SHN_LORESERVE, then the actual index of the section name
@@ -535,7 +650,100 @@ ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
if (Index >= Sections.size())
return createError("section header string table index " + Twine(Index) +
" does not exist");
- return getStringTable(&Sections[Index], WarnHandler);
+ return getStringTable(Sections[Index], WarnHandler);
+}
+
+/// This function finds the number of dynamic symbols using a GNU hash table.
+///
+/// @param Table The GNU hash table for .dynsym.
+template <class ELFT>
+static Expected<uint64_t>
+getDynSymtabSizeFromGnuHash(const typename ELFT::GnuHash &Table,
+ const void *BufEnd) {
+ using Elf_Word = typename ELFT::Word;
+ if (Table.nbuckets == 0)
+ return Table.symndx + 1;
+ uint64_t LastSymIdx = 0;
+ // Find the index of the first symbol in the last chain.
+ for (Elf_Word Val : Table.buckets())
+ LastSymIdx = std::max(LastSymIdx, (uint64_t)Val);
+ const Elf_Word *It =
+ reinterpret_cast<const Elf_Word *>(Table.values(LastSymIdx).end());
+ // Locate the end of the chain to find the last symbol index.
+ while (It < BufEnd && (*It & 1) == 0) {
+ ++LastSymIdx;
+ ++It;
+ }
+ if (It >= BufEnd) {
+ return createStringError(
+ object_error::parse_failed,
+ "no terminator found for GNU hash section before buffer end");
+ }
+ return LastSymIdx + 1;
+}
+
+/// This function determines the number of dynamic symbols. It reads section
+/// headers first. If section headers are not available, the number of
+/// symbols will be inferred by parsing dynamic hash tables.
+template <class ELFT>
+Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {
+ // Read .dynsym section header first if available.
+ Expected<Elf_Shdr_Range> SectionsOrError = sections();
+ if (!SectionsOrError)
+ return SectionsOrError.takeError();
+ for (const Elf_Shdr &Sec : *SectionsOrError) {
+ if (Sec.sh_type == ELF::SHT_DYNSYM) {
+ if (Sec.sh_size % Sec.sh_entsize != 0) {
+ return createStringError(object_error::parse_failed,
+ "SHT_DYNSYM section has sh_size (" +
+ Twine(Sec.sh_size) + ") % sh_entsize (" +
+ Twine(Sec.sh_entsize) + ") that is not 0");
+ }
+ return Sec.sh_size / Sec.sh_entsize;
+ }
+ }
+
+ if (!SectionsOrError->empty()) {
+ // Section headers are available but .dynsym header is not found.
+ // Return 0 as .dynsym does not exist.
+ return 0;
+ }
+
+ // Section headers do not exist. Falling back to infer
+ // upper bound of .dynsym from .gnu.hash and .hash.
+ Expected<Elf_Dyn_Range> DynTable = dynamicEntries();
+ if (!DynTable)
+ return DynTable.takeError();
+ llvm::Optional<uint64_t> ElfHash;
+ llvm::Optional<uint64_t> ElfGnuHash;
+ for (const Elf_Dyn &Entry : *DynTable) {
+ switch (Entry.d_tag) {
+ case ELF::DT_HASH:
+ ElfHash = Entry.d_un.d_ptr;
+ break;
+ case ELF::DT_GNU_HASH:
+ ElfGnuHash = Entry.d_un.d_ptr;
+ break;
+ }
+ }
+ if (ElfGnuHash) {
+ Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfGnuHash);
+ if (!TablePtr)
+ return TablePtr.takeError();
+ const Elf_GnuHash *Table =
+ reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
+ return getDynSymtabSizeFromGnuHash<ELFT>(*Table, this->Buf.bytes_end());
+ }
+
+ // Search SYSV hash table to try to find the upper bound of dynsym.
+ if (ElfHash) {
+ Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfHash);
+ if (!TablePtr)
+ return TablePtr.takeError();
+ const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
+ return Table->nchain;
+ }
+ return 0;
}
template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
@@ -551,13 +759,13 @@ Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) {
template <class ELFT>
Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
- const uintX_t SectionTableOffset = getHeader()->e_shoff;
+ const uintX_t SectionTableOffset = getHeader().e_shoff;
if (SectionTableOffset == 0)
return ArrayRef<Elf_Shdr>();
- if (getHeader()->e_shentsize != sizeof(Elf_Shdr))
+ if (getHeader().e_shentsize != sizeof(Elf_Shdr))
return createError("invalid e_shentsize in ELF header: " +
- Twine(getHeader()->e_shentsize));
+ Twine(getHeader().e_shentsize));
const uint64_t FileSize = Buf.size();
if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize ||
@@ -574,7 +782,7 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
const Elf_Shdr *First =
reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
- uintX_t NumSections = getHeader()->e_shnum;
+ uintX_t NumSections = getHeader().e_shnum;
if (NumSections == 0)
NumSections = First->sh_size;
@@ -605,24 +813,228 @@ Expected<const T *> ELFFile<ELFT>::getEntry(uint32_t Section,
auto SecOrErr = getSection(Section);
if (!SecOrErr)
return SecOrErr.takeError();
- return getEntry<T>(*SecOrErr, Entry);
+ return getEntry<T>(**SecOrErr, Entry);
}
template <class ELFT>
template <typename T>
-Expected<const T *> ELFFile<ELFT>::getEntry(const Elf_Shdr *Section,
+Expected<const T *> ELFFile<ELFT>::getEntry(const Elf_Shdr &Section,
uint32_t Entry) const {
- if (sizeof(T) != Section->sh_entsize)
- return createError("section " + getSecIndexForError(this, Section) +
- " has invalid sh_entsize: expected " + Twine(sizeof(T)) +
- ", but got " + Twine(Section->sh_entsize));
- uint64_t Pos = Section->sh_offset + (uint64_t)Entry * sizeof(T);
- if (Pos + sizeof(T) > Buf.size())
- return createError("unable to access section " +
- getSecIndexForError(this, Section) + " data at 0x" +
- Twine::utohexstr(Pos) +
- ": offset goes past the end of file");
- return reinterpret_cast<const T *>(base() + Pos);
+ Expected<ArrayRef<T>> EntriesOrErr = getSectionContentsAsArray<T>(Section);
+ if (!EntriesOrErr)
+ return EntriesOrErr.takeError();
+
+ ArrayRef<T> Arr = *EntriesOrErr;
+ if (Entry >= Arr.size())
+ return createError(
+ "can't read an entry at 0x" +
+ Twine::utohexstr(Entry * static_cast<uint64_t>(sizeof(T))) +
+ ": it goes past the end of the section (0x" +
+ Twine::utohexstr(Section.sh_size) + ")");
+ return &Arr[Entry];
+}
+
+template <typename ELFT>
+Expected<StringRef> ELFFile<ELFT>::getSymbolVersionByIndex(
+ uint32_t SymbolVersionIndex, bool &IsDefault,
+ SmallVector<Optional<VersionEntry>, 0> &VersionMap,
+ Optional<bool> IsSymHidden) const {
+ size_t VersionIndex = SymbolVersionIndex & llvm::ELF::VERSYM_VERSION;
+
+ // Special markers for unversioned symbols.
+ if (VersionIndex == llvm::ELF::VER_NDX_LOCAL ||
+ VersionIndex == llvm::ELF::VER_NDX_GLOBAL) {
+ IsDefault = false;
+ return "";
+ }
+
+ // Lookup this symbol in the version table.
+ if (VersionIndex >= VersionMap.size() || !VersionMap[VersionIndex])
+ return createError("SHT_GNU_versym section refers to a version index " +
+ Twine(VersionIndex) + " which is missing");
+
+ const VersionEntry &Entry = *VersionMap[VersionIndex];
+ // A default version (@@) is only available for defined symbols.
+ if (!Entry.IsVerDef || IsSymHidden.getValueOr(false))
+ IsDefault = false;
+ else
+ IsDefault = !(SymbolVersionIndex & llvm::ELF::VERSYM_HIDDEN);
+ return Entry.Name.c_str();
+}
+
+template <class ELFT>
+Expected<std::vector<VerDef>>
+ELFFile<ELFT>::getVersionDefinitions(const Elf_Shdr &Sec) const {
+ Expected<StringRef> StrTabOrErr = getLinkAsStrtab(Sec);
+ if (!StrTabOrErr)
+ return StrTabOrErr.takeError();
+
+ Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec);
+ if (!ContentsOrErr)
+ return createError("cannot read content of " + describe(*this, Sec) + ": " +
+ toString(ContentsOrErr.takeError()));
+
+ const uint8_t *Start = ContentsOrErr->data();
+ const uint8_t *End = Start + ContentsOrErr->size();
+
+ auto ExtractNextAux = [&](const uint8_t *&VerdauxBuf,
+ unsigned VerDefNdx) -> Expected<VerdAux> {
+ if (VerdauxBuf + sizeof(Elf_Verdaux) > End)
+ return createError("invalid " + describe(*this, Sec) +
+ ": version definition " + Twine(VerDefNdx) +
+ " refers to an auxiliary entry that goes past the end "
+ "of the section");
+
+ auto *Verdaux = reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+ VerdauxBuf += Verdaux->vda_next;
+
+ VerdAux Aux;
+ Aux.Offset = VerdauxBuf - Start;
+ if (Verdaux->vda_name <= StrTabOrErr->size())
+ Aux.Name = std::string(StrTabOrErr->drop_front(Verdaux->vda_name));
+ else
+ Aux.Name = ("<invalid vda_name: " + Twine(Verdaux->vda_name) + ">").str();
+ return Aux;
+ };
+
+ std::vector<VerDef> Ret;
+ const uint8_t *VerdefBuf = Start;
+ for (unsigned I = 1; I <= /*VerDefsNum=*/Sec.sh_info; ++I) {
+ if (VerdefBuf + sizeof(Elf_Verdef) > End)
+ return createError("invalid " + describe(*this, Sec) +
+ ": version definition " + Twine(I) +
+ " goes past the end of the section");
+
+ if (reinterpret_cast<uintptr_t>(VerdefBuf) % sizeof(uint32_t) != 0)
+ return createError(
+ "invalid " + describe(*this, Sec) +
+ ": found a misaligned version definition entry at offset 0x" +
+ Twine::utohexstr(VerdefBuf - Start));
+
+ unsigned Version = *reinterpret_cast<const Elf_Half *>(VerdefBuf);
+ if (Version != 1)
+ return createError("unable to dump " + describe(*this, Sec) +
+ ": version " + Twine(Version) +
+ " is not yet supported");
+
+ const Elf_Verdef *D = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+ VerDef &VD = *Ret.emplace(Ret.end());
+ VD.Offset = VerdefBuf - Start;
+ VD.Version = D->vd_version;
+ VD.Flags = D->vd_flags;
+ VD.Ndx = D->vd_ndx;
+ VD.Cnt = D->vd_cnt;
+ VD.Hash = D->vd_hash;
+
+ const uint8_t *VerdauxBuf = VerdefBuf + D->vd_aux;
+ for (unsigned J = 0; J < D->vd_cnt; ++J) {
+ if (reinterpret_cast<uintptr_t>(VerdauxBuf) % sizeof(uint32_t) != 0)
+ return createError("invalid " + describe(*this, Sec) +
+ ": found a misaligned auxiliary entry at offset 0x" +
+ Twine::utohexstr(VerdauxBuf - Start));
+
+ Expected<VerdAux> AuxOrErr = ExtractNextAux(VerdauxBuf, I);
+ if (!AuxOrErr)
+ return AuxOrErr.takeError();
+
+ if (J == 0)
+ VD.Name = AuxOrErr->Name;
+ else
+ VD.AuxV.push_back(*AuxOrErr);
+ }
+
+ VerdefBuf += D->vd_next;
+ }
+
+ return Ret;
+}
+
+template <class ELFT>
+Expected<std::vector<VerNeed>>
+ELFFile<ELFT>::getVersionDependencies(const Elf_Shdr &Sec,
+ WarningHandler WarnHandler) const {
+ StringRef StrTab;
+ Expected<StringRef> StrTabOrErr = getLinkAsStrtab(Sec);
+ if (!StrTabOrErr) {
+ if (Error E = WarnHandler(toString(StrTabOrErr.takeError())))
+ return std::move(E);
+ } else {
+ StrTab = *StrTabOrErr;
+ }
+
+ Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec);
+ if (!ContentsOrErr)
+ return createError("cannot read content of " + describe(*this, Sec) + ": " +
+ toString(ContentsOrErr.takeError()));
+
+ const uint8_t *Start = ContentsOrErr->data();
+ const uint8_t *End = Start + ContentsOrErr->size();
+ const uint8_t *VerneedBuf = Start;
+
+ std::vector<VerNeed> Ret;
+ for (unsigned I = 1; I <= /*VerneedNum=*/Sec.sh_info; ++I) {
+ if (VerneedBuf + sizeof(Elf_Verdef) > End)
+ return createError("invalid " + describe(*this, Sec) +
+ ": version dependency " + Twine(I) +
+ " goes past the end of the section");
+
+ if (reinterpret_cast<uintptr_t>(VerneedBuf) % sizeof(uint32_t) != 0)
+ return createError(
+ "invalid " + describe(*this, Sec) +
+ ": found a misaligned version dependency entry at offset 0x" +
+ Twine::utohexstr(VerneedBuf - Start));
+
+ unsigned Version = *reinterpret_cast<const Elf_Half *>(VerneedBuf);
+ if (Version != 1)
+ return createError("unable to dump " + describe(*this, Sec) +
+ ": version " + Twine(Version) +
+ " is not yet supported");
+
+ const Elf_Verneed *Verneed =
+ reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+
+ VerNeed &VN = *Ret.emplace(Ret.end());
+ VN.Version = Verneed->vn_version;
+ VN.Cnt = Verneed->vn_cnt;
+ VN.Offset = VerneedBuf - Start;
+
+ if (Verneed->vn_file < StrTab.size())
+ VN.File = std::string(StrTab.drop_front(Verneed->vn_file));
+ else
+ VN.File = ("<corrupt vn_file: " + Twine(Verneed->vn_file) + ">").str();
+
+ const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
+ for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
+ if (reinterpret_cast<uintptr_t>(VernauxBuf) % sizeof(uint32_t) != 0)
+ return createError("invalid " + describe(*this, Sec) +
+ ": found a misaligned auxiliary entry at offset 0x" +
+ Twine::utohexstr(VernauxBuf - Start));
+
+ if (VernauxBuf + sizeof(Elf_Vernaux) > End)
+ return createError(
+ "invalid " + describe(*this, Sec) + ": version dependency " +
+ Twine(I) +
+ " refers to an auxiliary entry that goes past the end "
+ "of the section");
+
+ const Elf_Vernaux *Vernaux =
+ reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+
+ VernAux &Aux = *VN.AuxV.emplace(VN.AuxV.end());
+ Aux.Hash = Vernaux->vna_hash;
+ Aux.Flags = Vernaux->vna_flags;
+ Aux.Other = Vernaux->vna_other;
+ Aux.Offset = VernauxBuf - Start;
+ if (StrTab.size() <= Vernaux->vna_name)
+ Aux.Name = "<corrupt>";
+ else
+ Aux.Name = std::string(StrTab.drop_front(Vernaux->vna_name));
+
+ VernauxBuf += Vernaux->vna_next;
+ }
+ VerneedBuf += Verneed->vn_next;
+ }
+ return Ret;
}
template <class ELFT>
@@ -636,14 +1048,14 @@ ELFFile<ELFT>::getSection(uint32_t Index) const {
template <class ELFT>
Expected<StringRef>
-ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section,
+ELFFile<ELFT>::getStringTable(const Elf_Shdr &Section,
WarningHandler WarnHandler) const {
- if (Section->sh_type != ELF::SHT_STRTAB)
+ if (Section.sh_type != ELF::SHT_STRTAB)
if (Error E = WarnHandler("invalid sh_type for string table section " +
- getSecIndexForError(this, Section) +
+ getSecIndexForError(*this, Section) +
": expected SHT_STRTAB, but got " +
object::getELFSectionTypeName(
- getHeader()->e_machine, Section->sh_type)))
+ getHeader().e_machine, Section.sh_type)))
return std::move(E);
auto V = getSectionContentsAsArray<char>(Section);
@@ -652,10 +1064,10 @@ ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section,
ArrayRef<char> Data = *V;
if (Data.empty())
return createError("SHT_STRTAB string table section " +
- getSecIndexForError(this, Section) + " is empty");
+ getSecIndexForError(*this, Section) + " is empty");
if (Data.back() != '\0')
return createError("SHT_STRTAB string table section " +
- getSecIndexForError(this, Section) +
+ getSecIndexForError(*this, Section) +
" is non-null terminated");
return StringRef(Data.begin(), Data.size());
}
@@ -674,7 +1086,7 @@ Expected<ArrayRef<typename ELFT::Word>>
ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section,
Elf_Shdr_Range Sections) const {
assert(Section.sh_type == ELF::SHT_SYMTAB_SHNDX);
- auto VOrErr = getSectionContentsAsArray<Elf_Word>(&Section);
+ auto VOrErr = getSectionContentsAsArray<Elf_Word>(Section);
if (!VOrErr)
return VOrErr.takeError();
ArrayRef<Elf_Word> V = *VOrErr;
@@ -684,10 +1096,10 @@ ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section,
const Elf_Shdr &SymTable = **SymTableOrErr;
if (SymTable.sh_type != ELF::SHT_SYMTAB &&
SymTable.sh_type != ELF::SHT_DYNSYM)
- return createError("SHT_SYMTAB_SHNDX section is linked with " +
- object::getELFSectionTypeName(getHeader()->e_machine,
- SymTable.sh_type) +
- " section (expected SHT_SYMTAB/SHT_DYNSYM)");
+ return createError(
+ "SHT_SYMTAB_SHNDX section is linked with " +
+ object::getELFSectionTypeName(getHeader().e_machine, SymTable.sh_type) +
+ " section (expected SHT_SYMTAB/SHT_DYNSYM)");
uint64_t Syms = SymTable.sh_size / sizeof(Elf_Sym);
if (V.size() != Syms)
@@ -715,15 +1127,33 @@ ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec,
if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM)
return createError(
"invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM");
- auto SectionOrErr = object::getSection<ELFT>(Sections, Sec.sh_link);
+ Expected<const Elf_Shdr *> SectionOrErr =
+ object::getSection<ELFT>(Sections, Sec.sh_link);
if (!SectionOrErr)
return SectionOrErr.takeError();
- return getStringTable(*SectionOrErr);
+ return getStringTable(**SectionOrErr);
+}
+
+template <class ELFT>
+Expected<StringRef>
+ELFFile<ELFT>::getLinkAsStrtab(const typename ELFT::Shdr &Sec) const {
+ Expected<const typename ELFT::Shdr *> StrTabSecOrErr =
+ getSection(Sec.sh_link);
+ if (!StrTabSecOrErr)
+ return createError("invalid section linked to " + describe(*this, Sec) +
+ ": " + toString(StrTabSecOrErr.takeError()));
+
+ Expected<StringRef> StrTabOrErr = getStringTable(**StrTabSecOrErr);
+ if (!StrTabOrErr)
+ return createError("invalid string table linked to " +
+ describe(*this, Sec) + ": " +
+ toString(StrTabOrErr.takeError()));
+ return *StrTabOrErr;
}
template <class ELFT>
Expected<StringRef>
-ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section,
+ELFFile<ELFT>::getSectionName(const Elf_Shdr &Section,
WarningHandler WarnHandler) const {
auto SectionsOrErr = sections();
if (!SectionsOrErr)
@@ -735,13 +1165,13 @@ ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section,
}
template <class ELFT>
-Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section,
+Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr &Section,
StringRef DotShstrtab) const {
- uint32_t Offset = Section->sh_name;
+ uint32_t Offset = Section.sh_name;
if (Offset == 0)
return StringRef();
if (Offset >= DotShstrtab.size())
- return createError("a section " + getSecIndexForError(this, Section) +
+ return createError("a section " + getSecIndexForError(*this, Section) +
" has an invalid sh_name (0x" +
Twine::utohexstr(Offset) +
") offset which goes past the end of the "
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h
index 62ecd8b5a7e5..fed53eef68c3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h
@@ -51,6 +51,12 @@ class ELFObjectFileBase : public ObjectFile {
friend class ELFSectionRef;
friend class ELFSymbolRef;
+ SubtargetFeatures getMIPSFeatures() const;
+ SubtargetFeatures getARMFeatures() const;
+ SubtargetFeatures getRISCVFeatures() const;
+
+ StringRef getAMDGPUCPUName() const;
+
protected:
ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
@@ -80,11 +86,7 @@ public:
SubtargetFeatures getFeatures() const override;
- SubtargetFeatures getMIPSFeatures() const;
-
- SubtargetFeatures getARMFeatures() const;
-
- SubtargetFeatures getRISCVFeatures() const;
+ Optional<StringRef> tryGetCPUName() const override;
void setARMSubArch(Triple &TheTriple) const override;
@@ -92,7 +94,8 @@ public:
virtual uint16_t getEMachine() const = 0;
- std::vector<std::pair<DataRefImpl, uint64_t>> getPltAddresses() const;
+ std::vector<std::pair<Optional<DataRefImpl>, uint64_t>>
+ getPltAddresses() const;
};
class ELFSectionRef : public SectionRef {
@@ -230,30 +233,31 @@ template <class ELFT> class ELFObjectFile : public ELFObjectFileBase {
public:
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
- using uintX_t = typename ELFT::uint;
-
- using Elf_Sym = typename ELFT::Sym;
- using Elf_Shdr = typename ELFT::Shdr;
- using Elf_Ehdr = typename ELFT::Ehdr;
- using Elf_Rel = typename ELFT::Rel;
- using Elf_Rela = typename ELFT::Rela;
- using Elf_Dyn = typename ELFT::Dyn;
-
SectionRef toSectionRef(const Elf_Shdr *Sec) const {
return SectionRef(toDRI(Sec), this);
}
+ ELFSymbolRef toSymbolRef(const Elf_Shdr *SymTable, unsigned SymbolNum) const {
+ return ELFSymbolRef({toDRI(SymTable, SymbolNum), this});
+ }
+
+ bool IsContentValid() const { return ContentValid; }
+
private:
ELFObjectFile(MemoryBufferRef Object, ELFFile<ELFT> EF,
const Elf_Shdr *DotDynSymSec, const Elf_Shdr *DotSymtabSec,
- ArrayRef<Elf_Word> ShndxTable);
+ const Elf_Shdr *DotSymtabShndxSec);
+
+ bool ContentValid = false;
protected:
ELFFile<ELFT> EF;
const Elf_Shdr *DotDynSymSec = nullptr; // Dynamic symbol table section.
const Elf_Shdr *DotSymtabSec = nullptr; // Symbol table section.
- ArrayRef<Elf_Word> ShndxTable;
+ const Elf_Shdr *DotSymtabShndxSec = nullptr; // SHT_SYMTAB_SHNDX section.
+
+ Error initContent() override;
void moveSymbolNext(DataRefImpl &Symb) const override;
Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
@@ -304,14 +308,6 @@ protected:
uint64_t getSectionOffset(DataRefImpl Sec) const override;
StringRef getRelocationTypeName(uint32_t Type) const;
- /// Get the relocation section that contains \a Rel.
- const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
- auto RelSecOrErr = EF.getSection(Rel.d.a);
- if (!RelSecOrErr)
- report_fatal_error(errorToErrorCode(RelSecOrErr.takeError()).message());
- return *RelSecOrErr;
- }
-
DataRefImpl toDRI(const Elf_Shdr *SymTable, unsigned SymbolNum) const {
DataRefImpl DRI;
if (!SymTable) {
@@ -374,7 +370,7 @@ protected:
for (const Elf_Shdr &Sec : *SectionsOrErr) {
if (Sec.sh_type == ELF::SHT_ARM_ATTRIBUTES ||
Sec.sh_type == ELF::SHT_RISCV_ATTRIBUTES) {
- auto ErrorOrContents = EF.getSectionContents(&Sec);
+ auto ErrorOrContents = EF.getSectionContents(Sec);
if (!ErrorOrContents)
return ErrorOrContents.takeError();
@@ -397,16 +393,22 @@ protected:
public:
ELFObjectFile(ELFObjectFile<ELFT> &&Other);
- static Expected<ELFObjectFile<ELFT>> create(MemoryBufferRef Object);
+ static Expected<ELFObjectFile<ELFT>> create(MemoryBufferRef Object,
+ bool InitContent = true);
const Elf_Rel *getRel(DataRefImpl Rel) const;
const Elf_Rela *getRela(DataRefImpl Rela) const;
- const Elf_Sym *getSymbol(DataRefImpl Sym) const {
- auto Ret = EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b);
- if (!Ret)
- report_fatal_error(errorToErrorCode(Ret.takeError()).message());
- return *Ret;
+ Expected<const Elf_Sym *> getSymbol(DataRefImpl Sym) const {
+ return EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b);
+ }
+
+ /// Get the relocation section that contains \a Rel.
+ const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
+ auto RelSecOrErr = EF.getSection(Rel.d.a);
+ if (!RelSecOrErr)
+ report_fatal_error(errorToErrorCode(RelSecOrErr.takeError()).message());
+ return *RelSecOrErr;
}
const Elf_Shdr *getSection(DataRefImpl Sec) const {
@@ -429,9 +431,9 @@ public:
Triple::ArchType getArch() const override;
Expected<uint64_t> getStartAddress() const override;
- unsigned getPlatformFlags() const override { return EF.getHeader()->e_flags; }
+ unsigned getPlatformFlags() const override { return EF.getHeader().e_flags; }
- const ELFFile<ELFT> *getELFFile() const { return &EF; }
+ const ELFFile<ELFT> &getELFFile() const { return EF; }
bool isDyldType() const { return isDyldELFObject; }
static bool classof(const Binary *v) {
@@ -454,9 +456,40 @@ void ELFObjectFile<ELFT>::moveSymbolNext(DataRefImpl &Sym) const {
++Sym.d.b;
}
+template <class ELFT> Error ELFObjectFile<ELFT>::initContent() {
+ auto SectionsOrErr = EF.sections();
+ if (!SectionsOrErr)
+ return SectionsOrErr.takeError();
+
+ for (const Elf_Shdr &Sec : *SectionsOrErr) {
+ switch (Sec.sh_type) {
+ case ELF::SHT_DYNSYM: {
+ if (!DotDynSymSec)
+ DotDynSymSec = &Sec;
+ break;
+ }
+ case ELF::SHT_SYMTAB: {
+ if (!DotSymtabSec)
+ DotSymtabSec = &Sec;
+ break;
+ }
+ case ELF::SHT_SYMTAB_SHNDX: {
+ if (!DotSymtabShndxSec)
+ DotSymtabShndxSec = &Sec;
+ break;
+ }
+ }
+ }
+
+ ContentValid = true;
+ return Error::success();
+}
+
template <class ELFT>
Expected<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
- const Elf_Sym *ESym = getSymbol(Sym);
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Sym);
+ if (!SymOrErr)
+ return SymOrErr.takeError();
auto SymTabOrErr = EF.getSection(Sym.d.a);
if (!SymTabOrErr)
return SymTabOrErr.takeError();
@@ -465,15 +498,15 @@ Expected<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
if (!StrTabOrErr)
return StrTabOrErr.takeError();
const Elf_Shdr *StringTableSec = *StrTabOrErr;
- auto SymStrTabOrErr = EF.getStringTable(StringTableSec);
+ auto SymStrTabOrErr = EF.getStringTable(*StringTableSec);
if (!SymStrTabOrErr)
return SymStrTabOrErr.takeError();
- Expected<StringRef> Name = ESym->getName(*SymStrTabOrErr);
+ Expected<StringRef> Name = (*SymOrErr)->getName(*SymStrTabOrErr);
if (Name && !Name->empty())
return Name;
// If the symbol name is empty use the section name.
- if (ESym->getType() == ELF::STT_SECTION) {
+ if ((*SymOrErr)->getType() == ELF::STT_SECTION) {
if (Expected<section_iterator> SecOrErr = getSymbolSection(Sym)) {
consumeError(Name.takeError());
return (*SecOrErr)->getName();
@@ -499,15 +532,18 @@ uint64_t ELFObjectFile<ELFT>::getSectionOffset(DataRefImpl Sec) const {
template <class ELFT>
uint64_t ELFObjectFile<ELFT>::getSymbolValueImpl(DataRefImpl Symb) const {
- const Elf_Sym *ESym = getSymbol(Symb);
- uint64_t Ret = ESym->st_value;
- if (ESym->st_shndx == ELF::SHN_ABS)
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
+ if (!SymOrErr)
+ report_fatal_error(SymOrErr.takeError());
+
+ uint64_t Ret = (*SymOrErr)->st_value;
+ if ((*SymOrErr)->st_shndx == ELF::SHN_ABS)
return Ret;
- const Elf_Ehdr *Header = EF.getHeader();
+ const Elf_Ehdr &Header = EF.getHeader();
// Clear the ARM/Thumb or microMIPS indicator flag.
- if ((Header->e_machine == ELF::EM_ARM || Header->e_machine == ELF::EM_MIPS) &&
- ESym->getType() == ELF::STT_FUNC)
+ if ((Header.e_machine == ELF::EM_ARM || Header.e_machine == ELF::EM_MIPS) &&
+ (*SymOrErr)->getType() == ELF::STT_FUNC)
Ret &= ~1;
return Ret;
@@ -522,22 +558,34 @@ ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb) const {
return SymbolValueOrErr.takeError();
uint64_t Result = *SymbolValueOrErr;
- const Elf_Sym *ESym = getSymbol(Symb);
- switch (ESym->st_shndx) {
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
+ if (!SymOrErr)
+ return SymOrErr.takeError();
+
+ switch ((*SymOrErr)->st_shndx) {
case ELF::SHN_COMMON:
case ELF::SHN_UNDEF:
case ELF::SHN_ABS:
return Result;
}
- const Elf_Ehdr *Header = EF.getHeader();
auto SymTabOrErr = EF.getSection(Symb.d.a);
if (!SymTabOrErr)
return SymTabOrErr.takeError();
- const Elf_Shdr *SymTab = *SymTabOrErr;
- if (Header->e_type == ELF::ET_REL) {
- auto SectionOrErr = EF.getSection(ESym, SymTab, ShndxTable);
+ if (EF.getHeader().e_type == ELF::ET_REL) {
+ ArrayRef<Elf_Word> ShndxTable;
+ if (DotSymtabShndxSec) {
+ // TODO: Test this error.
+ if (Expected<ArrayRef<Elf_Word>> ShndxTableOrErr =
+ EF.getSHNDXTable(*DotSymtabShndxSec))
+ ShndxTable = *ShndxTableOrErr;
+ else
+ return ShndxTableOrErr.takeError();
+ }
+
+ Expected<const Elf_Shdr *> SectionOrErr =
+ EF.getSection(**SymOrErr, *SymTabOrErr, ShndxTable);
if (!SectionOrErr)
return SectionOrErr.takeError();
const Elf_Shdr *Section = *SectionOrErr;
@@ -550,52 +598,68 @@ ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb) const {
template <class ELFT>
uint32_t ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb) const {
- const Elf_Sym *Sym = getSymbol(Symb);
- if (Sym->st_shndx == ELF::SHN_COMMON)
- return Sym->st_value;
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
+ if (!SymOrErr)
+ report_fatal_error(SymOrErr.takeError());
+ if ((*SymOrErr)->st_shndx == ELF::SHN_COMMON)
+ return (*SymOrErr)->st_value;
return 0;
}
template <class ELFT>
uint16_t ELFObjectFile<ELFT>::getEMachine() const {
- return EF.getHeader()->e_machine;
+ return EF.getHeader().e_machine;
}
template <class ELFT> uint16_t ELFObjectFile<ELFT>::getEType() const {
- return EF.getHeader()->e_type;
+ return EF.getHeader().e_type;
}
template <class ELFT>
uint64_t ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Sym) const {
- return getSymbol(Sym)->st_size;
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Sym);
+ if (!SymOrErr)
+ report_fatal_error(SymOrErr.takeError());
+ return (*SymOrErr)->st_size;
}
template <class ELFT>
uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
- return getSymbol(Symb)->st_size;
+ return getSymbolSize(Symb);
}
template <class ELFT>
uint8_t ELFObjectFile<ELFT>::getSymbolBinding(DataRefImpl Symb) const {
- return getSymbol(Symb)->getBinding();
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
+ if (!SymOrErr)
+ report_fatal_error(SymOrErr.takeError());
+ return (*SymOrErr)->getBinding();
}
template <class ELFT>
uint8_t ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb) const {
- return getSymbol(Symb)->st_other;
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
+ if (!SymOrErr)
+ report_fatal_error(SymOrErr.takeError());
+ return (*SymOrErr)->st_other;
}
template <class ELFT>
uint8_t ELFObjectFile<ELFT>::getSymbolELFType(DataRefImpl Symb) const {
- return getSymbol(Symb)->getType();
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
+ if (!SymOrErr)
+ report_fatal_error(SymOrErr.takeError());
+ return (*SymOrErr)->getType();
}
template <class ELFT>
Expected<SymbolRef::Type>
ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb) const {
- const Elf_Sym *ESym = getSymbol(Symb);
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
+ if (!SymOrErr)
+ return SymOrErr.takeError();
- switch (ESym->getType()) {
+ switch ((*SymOrErr)->getType()) {
case ELF::STT_NOTYPE:
return SymbolRef::ST_Unknown;
case ELF::STT_SECTION:
@@ -615,8 +679,11 @@ ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb) const {
template <class ELFT>
Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
- const Elf_Sym *ESym = getSymbol(Sym);
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Sym);
+ if (!SymOrErr)
+ return SymOrErr.takeError();
+ const Elf_Sym *ESym = *SymOrErr;
uint32_t Result = SymbolRef::SF_None;
if (ESym->getBinding() != ELF::STB_LOCAL)
@@ -649,7 +716,7 @@ Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
// TODO: Test this error.
return SymbolsOrErr.takeError();
- if (EF.getHeader()->e_machine == ELF::EM_ARM) {
+ if (EF.getHeader().e_machine == ELF::EM_ARM) {
if (Expected<StringRef> NameOrErr = getSymbolName(Sym)) {
StringRef Name = *NameOrErr;
if (Name.startswith("$d") || Name.startswith("$t") ||
@@ -682,7 +749,17 @@ template <class ELFT>
Expected<section_iterator>
ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym,
const Elf_Shdr *SymTab) const {
- auto ESecOrErr = EF.getSection(ESym, SymTab, ShndxTable);
+ ArrayRef<Elf_Word> ShndxTable;
+ if (DotSymtabShndxSec) {
+ // TODO: Test this error.
+ Expected<ArrayRef<Elf_Word>> ShndxTableOrErr =
+ EF.getSHNDXTable(*DotSymtabShndxSec);
+ if (!ShndxTableOrErr)
+ return ShndxTableOrErr.takeError();
+ ShndxTable = *ShndxTableOrErr;
+ }
+
+ auto ESecOrErr = EF.getSection(*ESym, SymTab, ShndxTable);
if (!ESecOrErr)
return ESecOrErr.takeError();
@@ -698,12 +775,14 @@ ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym,
template <class ELFT>
Expected<section_iterator>
ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb) const {
- const Elf_Sym *Sym = getSymbol(Symb);
+ Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
+ if (!SymOrErr)
+ return SymOrErr.takeError();
+
auto SymTabOrErr = EF.getSection(Symb.d.a);
if (!SymTabOrErr)
return SymTabOrErr.takeError();
- const Elf_Shdr *SymTab = *SymTabOrErr;
- return getSymbolSection(Sym, SymTab);
+ return getSymbolSection(*SymOrErr, *SymTabOrErr);
}
template <class ELFT>
@@ -714,7 +793,7 @@ void ELFObjectFile<ELFT>::moveSectionNext(DataRefImpl &Sec) const {
template <class ELFT>
Expected<StringRef> ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec) const {
- return EF.getSectionName(&*getSection(Sec));
+ return EF.getSectionName(*getSection(Sec));
}
template <class ELFT>
@@ -844,7 +923,7 @@ ELFObjectFile<ELFT>::section_rel_begin(DataRefImpl Sec) const {
if (!SectionsOrErr)
return relocation_iterator(RelocationRef());
uintptr_t SHT = reinterpret_cast<uintptr_t>((*SectionsOrErr).begin());
- RelData.d.a = (Sec.p - SHT) / EF.getHeader()->e_shentsize;
+ RelData.d.a = (Sec.p - SHT) / EF.getHeader().e_shentsize;
RelData.d.b = 0;
return relocation_iterator(RelocationRef(RelData, this));
}
@@ -871,7 +950,7 @@ ELFObjectFile<ELFT>::section_rel_end(DataRefImpl Sec) const {
template <class ELFT>
Expected<section_iterator>
ELFObjectFile<ELFT>::getRelocatedSection(DataRefImpl Sec) const {
- if (EF.getHeader()->e_type != ELF::ET_REL)
+ if (EF.getHeader().e_type != ELF::ET_REL)
return section_end();
const Elf_Shdr *EShdr = getSection(Sec);
@@ -930,7 +1009,7 @@ uint64_t ELFObjectFile<ELFT>::getRelocationType(DataRefImpl Rel) const {
template <class ELFT>
StringRef ELFObjectFile<ELFT>::getRelocationTypeName(uint32_t Type) const {
- return getELFRelocationTypeName(EF.getHeader()->e_machine, Type);
+ return getELFRelocationTypeName(EF.getHeader().e_machine, Type);
}
template <class ELFT>
@@ -970,59 +1049,34 @@ ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const {
template <class ELFT>
Expected<ELFObjectFile<ELFT>>
-ELFObjectFile<ELFT>::create(MemoryBufferRef Object) {
+ELFObjectFile<ELFT>::create(MemoryBufferRef Object, bool InitContent) {
auto EFOrErr = ELFFile<ELFT>::create(Object.getBuffer());
if (Error E = EFOrErr.takeError())
return std::move(E);
- auto EF = std::move(*EFOrErr);
- auto SectionsOrErr = EF.sections();
- if (!SectionsOrErr)
- return SectionsOrErr.takeError();
-
- const Elf_Shdr *DotDynSymSec = nullptr;
- const Elf_Shdr *DotSymtabSec = nullptr;
- ArrayRef<Elf_Word> ShndxTable;
- for (const Elf_Shdr &Sec : *SectionsOrErr) {
- switch (Sec.sh_type) {
- case ELF::SHT_DYNSYM: {
- if (!DotDynSymSec)
- DotDynSymSec = &Sec;
- break;
- }
- case ELF::SHT_SYMTAB: {
- if (!DotSymtabSec)
- DotSymtabSec = &Sec;
- break;
- }
- case ELF::SHT_SYMTAB_SHNDX: {
- auto TableOrErr = EF.getSHNDXTable(Sec);
- if (!TableOrErr)
- return TableOrErr.takeError();
- ShndxTable = *TableOrErr;
- break;
- }
- }
- }
- return ELFObjectFile<ELFT>(Object, EF, DotDynSymSec, DotSymtabSec,
- ShndxTable);
+ ELFObjectFile<ELFT> Obj = {Object, std::move(*EFOrErr), nullptr, nullptr,
+ nullptr};
+ if (InitContent)
+ if (Error E = Obj.initContent())
+ return std::move(E);
+ return std::move(Obj);
}
template <class ELFT>
ELFObjectFile<ELFT>::ELFObjectFile(MemoryBufferRef Object, ELFFile<ELFT> EF,
const Elf_Shdr *DotDynSymSec,
const Elf_Shdr *DotSymtabSec,
- ArrayRef<Elf_Word> ShndxTable)
+ const Elf_Shdr *DotSymtabShndx)
: ELFObjectFileBase(
getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits),
Object),
EF(EF), DotDynSymSec(DotDynSymSec), DotSymtabSec(DotSymtabSec),
- ShndxTable(ShndxTable) {}
+ DotSymtabShndxSec(DotSymtabShndx) {}
template <class ELFT>
ELFObjectFile<ELFT>::ELFObjectFile(ELFObjectFile<ELFT> &&Other)
: ELFObjectFile(Other.Data, Other.EF, Other.DotDynSymSec,
- Other.DotSymtabSec, Other.ShndxTable) {}
+ Other.DotSymtabSec, Other.DotSymtabShndxSec) {}
template <class ELFT>
basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin() const {
@@ -1084,9 +1138,9 @@ uint8_t ELFObjectFile<ELFT>::getBytesInAddress() const {
template <class ELFT>
StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
bool IsLittleEndian = ELFT::TargetEndianness == support::little;
- switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) {
+ switch (EF.getHeader().e_ident[ELF::EI_CLASS]) {
case ELF::ELFCLASS32:
- switch (EF.getHeader()->e_machine) {
+ switch (EF.getHeader().e_machine) {
case ELF::EM_386:
return "elf32-i386";
case ELF::EM_IAMCU:
@@ -1106,9 +1160,11 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
case ELF::EM_MSP430:
return "elf32-msp430";
case ELF::EM_PPC:
- return "elf32-powerpc";
+ return (IsLittleEndian ? "elf32-powerpcle" : "elf32-powerpc");
case ELF::EM_RISCV:
return "elf32-littleriscv";
+ case ELF::EM_CSKY:
+ return "elf32-csky";
case ELF::EM_SPARC:
case ELF::EM_SPARC32PLUS:
return "elf32-sparc";
@@ -1118,7 +1174,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
return "elf32-unknown";
}
case ELF::ELFCLASS64:
- switch (EF.getHeader()->e_machine) {
+ switch (EF.getHeader().e_machine) {
case ELF::EM_386:
return "elf64-i386";
case ELF::EM_X86_64:
@@ -1152,7 +1208,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
bool IsLittleEndian = ELFT::TargetEndianness == support::little;
- switch (EF.getHeader()->e_machine) {
+ switch (EF.getHeader().e_machine) {
case ELF::EM_386:
case ELF::EM_IAMCU:
return Triple::x86;
@@ -1169,7 +1225,7 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
case ELF::EM_LANAI:
return Triple::lanai;
case ELF::EM_MIPS:
- switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) {
+ switch (EF.getHeader().e_ident[ELF::EI_CLASS]) {
case ELF::ELFCLASS32:
return IsLittleEndian ? Triple::mipsel : Triple::mips;
case ELF::ELFCLASS64:
@@ -1180,11 +1236,11 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
case ELF::EM_MSP430:
return Triple::msp430;
case ELF::EM_PPC:
- return Triple::ppc;
+ return IsLittleEndian ? Triple::ppcle : Triple::ppc;
case ELF::EM_PPC64:
return IsLittleEndian ? Triple::ppc64le : Triple::ppc64;
case ELF::EM_RISCV:
- switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) {
+ switch (EF.getHeader().e_ident[ELF::EI_CLASS]) {
case ELF::ELFCLASS32:
return Triple::riscv32;
case ELF::ELFCLASS64:
@@ -1205,7 +1261,7 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
if (!IsLittleEndian)
return Triple::UnknownArch;
- unsigned MACH = EF.getHeader()->e_flags & ELF::EF_AMDGPU_MACH;
+ unsigned MACH = EF.getHeader().e_flags & ELF::EF_AMDGPU_MACH;
if (MACH >= ELF::EF_AMDGPU_MACH_R600_FIRST &&
MACH <= ELF::EF_AMDGPU_MACH_R600_LAST)
return Triple::r600;
@@ -1221,6 +1277,8 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
case ELF::EM_VE:
return Triple::ve;
+ case ELF::EM_CSKY:
+ return Triple::csky;
default:
return Triple::UnknownArch;
}
@@ -1228,7 +1286,7 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
template <class ELFT>
Expected<uint64_t> ELFObjectFile<ELFT>::getStartAddress() const {
- return EF.getHeader()->e_entry;
+ return EF.getHeader().e_entry;
}
template <class ELFT>
@@ -1238,7 +1296,7 @@ ELFObjectFile<ELFT>::getDynamicSymbolIterators() const {
}
template <class ELFT> bool ELFObjectFile<ELFT>::isRelocatableObject() const {
- return EF.getHeader()->e_type == ELF::ET_REL;
+ return EF.getHeader().e_type == ELF::ET_REL;
}
} // end namespace object
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ELFTypes.h b/contrib/llvm-project/llvm/include/llvm/Object/ELFTypes.h
index 5e85e6cc4653..f64e7c06e03b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/ELFTypes.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/ELFTypes.h
@@ -107,7 +107,34 @@ using ELF64BE = ELFType<support::big, true>;
using Elf_Word = typename ELFT::Word; \
using Elf_Sword = typename ELFT::Sword; \
using Elf_Xword = typename ELFT::Xword; \
- using Elf_Sxword = typename ELFT::Sxword;
+ using Elf_Sxword = typename ELFT::Sxword; \
+ using uintX_t = typename ELFT::uint; \
+ using Elf_Ehdr = typename ELFT::Ehdr; \
+ using Elf_Shdr = typename ELFT::Shdr; \
+ using Elf_Sym = typename ELFT::Sym; \
+ using Elf_Dyn = typename ELFT::Dyn; \
+ using Elf_Phdr = typename ELFT::Phdr; \
+ using Elf_Rel = typename ELFT::Rel; \
+ using Elf_Rela = typename ELFT::Rela; \
+ using Elf_Relr = typename ELFT::Relr; \
+ using Elf_Verdef = typename ELFT::Verdef; \
+ using Elf_Verdaux = typename ELFT::Verdaux; \
+ using Elf_Verneed = typename ELFT::Verneed; \
+ using Elf_Vernaux = typename ELFT::Vernaux; \
+ using Elf_Versym = typename ELFT::Versym; \
+ using Elf_Hash = typename ELFT::Hash; \
+ using Elf_GnuHash = typename ELFT::GnuHash; \
+ using Elf_Nhdr = typename ELFT::Nhdr; \
+ using Elf_Note = typename ELFT::Note; \
+ using Elf_Note_Iterator = typename ELFT::NoteIterator; \
+ using Elf_CGProfile = typename ELFT::CGProfile; \
+ using Elf_Dyn_Range = typename ELFT::DynRange; \
+ using Elf_Shdr_Range = typename ELFT::ShdrRange; \
+ using Elf_Sym_Range = typename ELFT::SymRange; \
+ using Elf_Rel_Range = typename ELFT::RelRange; \
+ using Elf_Rela_Range = typename ELFT::RelaRange; \
+ using Elf_Relr_Range = typename ELFT::RelrRange; \
+ using Elf_Phdr_Range = typename ELFT::PhdrRange; \
#define LLVM_ELF_COMMA ,
#define LLVM_ELF_IMPORT_TYPES(E, W) \
@@ -269,7 +296,6 @@ struct Elf_Versym_Impl {
template <class ELFT>
struct Elf_Verdef_Impl {
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
- using Elf_Verdaux = Elf_Verdaux_Impl<ELFT>;
Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT)
Elf_Half vd_flags; // Bitwise flags (VER_DEF_*)
Elf_Half vd_ndx; // Version index, used in .gnu.version entries
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/MachO.h b/contrib/llvm-project/llvm/include/llvm/Object/MachO.h
index f48e0f1dcd58..7eb017397846 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/MachO.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/MachO.h
@@ -615,6 +615,7 @@ public:
case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator";
case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator";
case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
+ case MachO::PLATFORM_DRIVERKIT: return "driverkit";
default:
std::string ret;
raw_string_ostream ss(ret);
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/MachOUniversal.h b/contrib/llvm-project/llvm/include/llvm/Object/MachOUniversal.h
index 5e006fd87318..9bcacb510108 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/MachOUniversal.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/MachOUniversal.h
@@ -22,8 +22,11 @@
namespace llvm {
class StringRef;
+class Module;
+class LLVMContext;
namespace object {
+class IRObjectFile;
class MachOUniversalBinary : public Binary {
virtual void anchor();
@@ -101,6 +104,8 @@ public:
}
Expected<std::unique_ptr<MachOObjectFile>> getAsObjectFile() const;
+ Expected<std::unique_ptr<IRObjectFile>>
+ getAsIRObject(LLVMContext &Ctx) const;
Expected<std::unique_ptr<Archive>> getAsArchive() const;
};
@@ -154,6 +159,9 @@ public:
Expected<std::unique_ptr<MachOObjectFile>>
getMachOObjectForArch(StringRef ArchName) const;
+ Expected<std::unique_ptr<IRObjectFile>>
+ getIRObjectForArch(StringRef ArchName, LLVMContext &Ctx) const;
+
Expected<std::unique_ptr<Archive>>
getArchiveForArch(StringRef ArchName) const;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/MachOUniversalWriter.h b/contrib/llvm-project/llvm/include/llvm/Object/MachOUniversalWriter.h
new file mode 100644
index 000000000000..cdfedcf0379e
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Object/MachOUniversalWriter.h
@@ -0,0 +1,102 @@
+//===- MachOUniversalWriter.h - MachO universal binary writer----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Declares the Slice class and writeUniversalBinary function for writing a
+// MachO universal binary file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MACHOUNIVERSALWRITER_H
+#define LLVM_OBJECT_MACHOUNIVERSALWRITER_H
+
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/MachO.h"
+
+namespace llvm {
+class LLVMContext;
+
+namespace object {
+class IRObjectFile;
+
+class Slice {
+ const Binary *B;
+ uint32_t CPUType;
+ uint32_t CPUSubType;
+ std::string ArchName;
+
+ // P2Alignment field stores slice alignment values from universal
+ // binaries. This is also needed to order the slices so the total
+ // file size can be calculated before creating the output buffer.
+ uint32_t P2Alignment;
+
+ Slice(const IRObjectFile &IRO, uint32_t CPUType, uint32_t CPUSubType,
+ std::string ArchName, uint32_t Align);
+
+public:
+ explicit Slice(const MachOObjectFile &O);
+
+ Slice(const MachOObjectFile &O, uint32_t Align);
+
+ /// This constructor takes pre-specified \param CPUType , \param CPUSubType ,
+ /// \param ArchName , \param Align instead of inferring them from the archive
+ /// members.
+ Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType,
+ std::string ArchName, uint32_t Align);
+
+ static Expected<Slice> create(const Archive &A,
+ LLVMContext *LLVMCtx = nullptr);
+
+ static Expected<Slice> create(const IRObjectFile &IRO, uint32_t Align);
+
+ void setP2Alignment(uint32_t Align) { P2Alignment = Align; }
+
+ const Binary *getBinary() const { return B; }
+
+ uint32_t getCPUType() const { return CPUType; }
+
+ uint32_t getCPUSubType() const { return CPUSubType; }
+
+ uint32_t getP2Alignment() const { return P2Alignment; }
+
+ uint64_t getCPUID() const {
+ return static_cast<uint64_t>(CPUType) << 32 | CPUSubType;
+ }
+
+ std::string getArchString() const {
+ if (!ArchName.empty())
+ return ArchName;
+ return ("unknown(" + Twine(CPUType) + "," +
+ Twine(CPUSubType & ~MachO::CPU_SUBTYPE_MASK) + ")")
+ .str();
+ }
+
+ friend bool operator<(const Slice &Lhs, const Slice &Rhs) {
+ if (Lhs.CPUType == Rhs.CPUType)
+ return Lhs.CPUSubType < Rhs.CPUSubType;
+ // force arm64-family to follow after all other slices for
+ // compatibility with cctools lipo
+ if (Lhs.CPUType == MachO::CPU_TYPE_ARM64)
+ return false;
+ if (Rhs.CPUType == MachO::CPU_TYPE_ARM64)
+ return true;
+ // Sort by alignment to minimize file size
+ return Lhs.P2Alignment < Rhs.P2Alignment;
+ }
+};
+
+Error writeUniversalBinary(ArrayRef<Slice> Slices, StringRef OutputFileName);
+
+Expected<std::unique_ptr<MemoryBuffer>>
+writeUniversalBinaryToBuffer(ArrayRef<Slice> Slices);
+
+} // end namespace object
+
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_MACHOUNIVERSALWRITER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h b/contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h
index 8e8937201716..27e40cbdbece 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h
@@ -327,6 +327,7 @@ public:
virtual StringRef getFileFormatName() const = 0;
virtual Triple::ArchType getArch() const = 0;
virtual SubtargetFeatures getFeatures() const = 0;
+ virtual Optional<StringRef> tryGetCPUName() const { return None; };
virtual void setARMSubArch(Triple &TheTriple) const { }
virtual Expected<uint64_t> getStartAddress() const {
return errorCodeToError(object_error::parse_failed);
@@ -349,7 +350,8 @@ public:
createObjectFile(StringRef ObjectPath);
static Expected<std::unique_ptr<ObjectFile>>
- createObjectFile(MemoryBufferRef Object, llvm::file_magic Type);
+ createObjectFile(MemoryBufferRef Object, llvm::file_magic Type,
+ bool InitContent = true);
static Expected<std::unique_ptr<ObjectFile>>
createObjectFile(MemoryBufferRef Object) {
return createObjectFile(Object, llvm::file_magic::unknown);
@@ -366,7 +368,7 @@ public:
createXCOFFObjectFile(MemoryBufferRef Object, unsigned FileType);
static Expected<std::unique_ptr<ObjectFile>>
- createELFObjectFile(MemoryBufferRef Object);
+ createELFObjectFile(MemoryBufferRef Object, bool InitContent = true);
static Expected<std::unique_ptr<MachOObjectFile>>
createMachOObjectFile(MemoryBufferRef Object,
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/RelocationResolver.h b/contrib/llvm-project/llvm/include/llvm/Object/RelocationResolver.h
index 1246dcc5ec73..46f74e90a91b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/RelocationResolver.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/RelocationResolver.h
@@ -31,11 +31,17 @@
namespace llvm {
namespace object {
-using RelocationResolver = uint64_t (*)(RelocationRef R, uint64_t S, uint64_t A);
+using SupportsRelocation = bool (*)(uint64_t);
+using RelocationResolver = uint64_t (*)(uint64_t Type, uint64_t Offset,
+ uint64_t S, uint64_t LocData,
+ int64_t Addend);
-std::pair<bool (*)(uint64_t), RelocationResolver>
+std::pair<SupportsRelocation, RelocationResolver>
getRelocationResolver(const ObjectFile &Obj);
+uint64_t resolveRelocation(RelocationResolver Resolver, const RelocationRef &R,
+ uint64_t S, uint64_t LocData);
+
} // end namespace object
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/StackMapParser.h b/contrib/llvm-project/llvm/include/llvm/Object/StackMapParser.h
index b408f4041034..4ee67112ea5e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/StackMapParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/StackMapParser.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Object/ELF.h"
#include "llvm/Support/Endian.h"
#include <cassert>
#include <cstddef>
@@ -35,11 +36,13 @@ public:
return tmp;
}
- bool operator==(const AccessorIterator &Other) {
+ bool operator==(const AccessorIterator &Other) const {
return A.P == Other.A.P;
}
- bool operator!=(const AccessorIterator &Other) { return !(*this == Other); }
+ bool operator!=(const AccessorIterator &Other) const {
+ return !(*this == Other);
+ }
AccessorT& operator*() { return A; }
AccessorT* operator->() { return &A; }
@@ -318,6 +321,23 @@ public:
}
}
+ /// Validates the header of the specified stack map section.
+ static Error validateHeader(ArrayRef<uint8_t> StackMapSection) {
+ // See the comment for StackMaps::emitStackmapHeader().
+ if (StackMapSection.size() < 16)
+ return object::createError(
+ "the stack map section size (" + Twine(StackMapSection.size()) +
+ ") is less than the minimum possible size of its header (16)");
+
+ unsigned Version = StackMapSection[0];
+ if (Version != 3)
+ return object::createError(
+ "the version (" + Twine(Version) +
+ ") of the stack map section is unsupported, the "
+ "supported version is 3");
+ return Error::success();
+ }
+
using function_iterator = AccessorIterator<FunctionAccessor>;
using constant_iterator = AccessorIterator<ConstantAccessor>;
using record_iterator = AccessorIterator<RecordAccessor>;
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/SymbolicFile.h b/contrib/llvm-project/llvm/include/llvm/Object/SymbolicFile.h
index a0d8b7225598..012f9f7fad07 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/SymbolicFile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/SymbolicFile.h
@@ -161,18 +161,18 @@ public:
// construction aux.
static Expected<std::unique_ptr<SymbolicFile>>
createSymbolicFile(MemoryBufferRef Object, llvm::file_magic Type,
- LLVMContext *Context);
+ LLVMContext *Context, bool InitContent = true);
static Expected<std::unique_ptr<SymbolicFile>>
createSymbolicFile(MemoryBufferRef Object) {
return createSymbolicFile(Object, llvm::file_magic::unknown, nullptr);
}
- static Expected<OwningBinary<SymbolicFile>>
- createSymbolicFile(StringRef ObjectPath);
static bool classof(const Binary *v) {
return v->isSymbolic();
}
+
+ static bool isSymbolicFile(file_magic Type, const LLVMContext *Context);
};
inline BasicSymbolRef::BasicSymbolRef(DataRefImpl SymbolP,
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/Wasm.h b/contrib/llvm-project/llvm/include/llvm/Object/Wasm.h
index dc90c891ab95..f7cd2e622ae3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/Wasm.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/Wasm.h
@@ -36,13 +36,15 @@ class WasmSymbol {
public:
WasmSymbol(const wasm::WasmSymbolInfo &Info,
const wasm::WasmGlobalType *GlobalType,
+ const wasm::WasmTableType *TableType,
const wasm::WasmEventType *EventType,
const wasm::WasmSignature *Signature)
- : Info(Info), GlobalType(GlobalType), EventType(EventType),
- Signature(Signature) {}
+ : Info(Info), GlobalType(GlobalType), TableType(TableType),
+ EventType(EventType), Signature(Signature) {}
const wasm::WasmSymbolInfo &Info;
const wasm::WasmGlobalType *GlobalType;
+ const wasm::WasmTableType *TableType;
const wasm::WasmEventType *EventType;
const wasm::WasmSignature *Signature;
@@ -50,6 +52,8 @@ public:
return Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION;
}
+ bool isTypeTable() const { return Info.Kind == wasm::WASM_SYMBOL_TYPE_TABLE; }
+
bool isTypeData() const { return Info.Kind == wasm::WASM_SYMBOL_TYPE_DATA; }
bool isTypeGlobal() const {
@@ -105,6 +109,7 @@ struct WasmSection {
uint32_t Type = 0; // Section type (See below)
uint32_t Offset = 0; // Offset with in the file
StringRef Name; // Section name (User-defined sections only)
+ uint32_t Comdat = UINT32_MAX; // From the "comdat info" section
ArrayRef<uint8_t> Content; // Section content
std::vector<wasm::WasmRelocation> Relocations; // Relocations for this section
};
@@ -146,9 +151,10 @@ public:
ArrayRef<wasm::WasmElemSegment> elements() const { return ElemSegments; }
ArrayRef<WasmSegment> dataSegments() const { return DataSegments; }
ArrayRef<wasm::WasmFunction> functions() const { return Functions; }
- ArrayRef<wasm::WasmFunctionName> debugNames() const { return DebugNames; }
+ ArrayRef<wasm::WasmDebugName> debugNames() const { return DebugNames; }
uint32_t startFunction() const { return StartFunction; }
uint32_t getNumImportedGlobals() const { return NumImportedGlobals; }
+ uint32_t getNumImportedTables() const { return NumImportedTables; }
uint32_t getNumImportedFunctions() const { return NumImportedFunctions; }
uint32_t getNumImportedEvents() const { return NumImportedEvents; }
uint32_t getNumSections() const { return Sections.size(); }
@@ -214,10 +220,13 @@ private:
bool isValidFunctionIndex(uint32_t Index) const;
bool isDefinedFunctionIndex(uint32_t Index) const;
bool isValidGlobalIndex(uint32_t Index) const;
+ bool isValidTableIndex(uint32_t Index) const;
bool isDefinedGlobalIndex(uint32_t Index) const;
+ bool isDefinedTableIndex(uint32_t Index) const;
bool isValidEventIndex(uint32_t Index) const;
bool isDefinedEventIndex(uint32_t Index) const;
bool isValidFunctionSymbol(uint32_t Index) const;
+ bool isValidTableSymbol(uint32_t Index) const;
bool isValidGlobalSymbol(uint32_t Index) const;
bool isValidEventSymbol(uint32_t Index) const;
bool isValidDataSymbol(uint32_t Index) const;
@@ -277,19 +286,22 @@ private:
llvm::Optional<size_t> DataCount;
std::vector<wasm::WasmFunction> Functions;
std::vector<WasmSymbol> Symbols;
- std::vector<wasm::WasmFunctionName> DebugNames;
+ std::vector<wasm::WasmDebugName> DebugNames;
uint32_t StartFunction = -1;
bool HasLinkingSection = false;
bool HasDylinkSection = false;
bool SeenCodeSection = false;
+ bool HasMemory64 = false;
wasm::WasmLinkingData LinkingData;
uint32_t NumImportedGlobals = 0;
+ uint32_t NumImportedTables = 0;
uint32_t NumImportedFunctions = 0;
uint32_t NumImportedEvents = 0;
uint32_t CodeSection = 0;
uint32_t DataSection = 0;
uint32_t EventSection = 0;
uint32_t GlobalSection = 0;
+ uint32_t TableSection = 0;
};
class WasmSectionOrderChecker {
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h b/contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h
index 9c2470736023..1ac00ed5e2c7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h
@@ -13,6 +13,8 @@
#ifndef LLVM_OBJECT_XCOFFOBJECTFILE_H
#define LLVM_OBJECT_XCOFFOBJECTFILE_H
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Endian.h"
@@ -370,6 +372,8 @@ public:
Expected<ArrayRef<XCOFFRelocation32>>
relocations(const XCOFFSectionHeader32 &) const;
+
+ static bool classof(const Binary *B) { return B->isXCOFF(); }
}; // XCOFFObjectFile
class XCOFFSymbolRef {
@@ -391,6 +395,103 @@ public:
bool isFunction() const;
};
+class TBVectorExt {
+ friend class XCOFFTracebackTable;
+
+ uint16_t Data;
+ uint32_t VecParmsInfo;
+
+ TBVectorExt(StringRef TBvectorStrRef);
+
+public:
+ uint8_t getNumberOfVRSaved() const;
+ bool isVRSavedOnStack() const;
+ bool hasVarArgs() const;
+ uint8_t getNumberOfVectorParms() const;
+ bool hasVMXInstruction() const;
+ SmallString<32> getVectorParmsInfoString() const;
+};
+
+/// This class provides methods to extract traceback table data from a buffer.
+/// The various accessors may reference the buffer provided via the constructor.
+
+class XCOFFTracebackTable {
+ const uint8_t *const TBPtr;
+ Optional<SmallString<32>> ParmsType;
+ Optional<uint32_t> TraceBackTableOffset;
+ Optional<uint32_t> HandlerMask;
+ Optional<uint32_t> NumOfCtlAnchors;
+ Optional<SmallVector<uint32_t, 8>> ControlledStorageInfoDisp;
+ Optional<StringRef> FunctionName;
+ Optional<uint8_t> AllocaRegister;
+ Optional<TBVectorExt> VecExt;
+ Optional<uint8_t> ExtensionTable;
+
+ XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, Error &Err);
+public:
+ /// Parse an XCOFF Traceback Table from \a Ptr with \a Size bytes.
+ /// Returns an XCOFFTracebackTable upon successful parsing, otherwise an
+ /// Error is returned.
+ ///
+ /// \param[in] Ptr
+ /// A pointer that points just past the initial 4 bytes of zeros at the
+ /// beginning of an XCOFF Traceback Table.
+ ///
+ /// \param[in, out] Size
+ /// A pointer that points to the length of the XCOFF Traceback Table.
+ /// If the XCOFF Traceback Table is not parsed successfully or there are
+ /// extra bytes that are not recognized, \a Size will be updated to be the
+ /// size up to the end of the last successfully parsed field of the table.
+ static Expected<XCOFFTracebackTable> create(const uint8_t *Ptr,
+ uint64_t &Size);
+ uint8_t getVersion() const;
+ uint8_t getLanguageID() const;
+
+ bool isGlobalLinkage() const;
+ bool isOutOfLineEpilogOrPrologue() const;
+ bool hasTraceBackTableOffset() const;
+ bool isInternalProcedure() const;
+ bool hasControlledStorage() const;
+ bool isTOCless() const;
+ bool isFloatingPointPresent() const;
+ bool isFloatingPointOperationLogOrAbortEnabled() const;
+
+ bool isInterruptHandler() const;
+ bool isFuncNamePresent() const;
+ bool isAllocaUsed() const;
+ uint8_t getOnConditionDirective() const;
+ bool isCRSaved() const;
+ bool isLRSaved() const;
+
+ bool isBackChainStored() const;
+ bool isFixup() const;
+ uint8_t getNumOfFPRsSaved() const;
+
+ bool hasVectorInfo() const;
+ bool hasExtensionTable() const;
+ uint8_t getNumOfGPRsSaved() const;
+
+ uint8_t getNumberOfFixedParms() const;
+
+ uint8_t getNumberOfFPParms() const;
+ bool hasParmsOnStack() const;
+
+ const Optional<SmallString<32>> &getParmsType() const { return ParmsType; }
+ const Optional<uint32_t> &getTraceBackTableOffset() const {
+ return TraceBackTableOffset;
+ }
+ const Optional<uint32_t> &getHandlerMask() const { return HandlerMask; }
+ const Optional<uint32_t> &getNumOfCtlAnchors() { return NumOfCtlAnchors; }
+ const Optional<SmallVector<uint32_t, 8>> &getControlledStorageInfoDisp() {
+ return ControlledStorageInfoDisp;
+ }
+ const Optional<StringRef> &getFunctionName() const { return FunctionName; }
+ const Optional<uint8_t> &getAllocaRegister() const { return AllocaRegister; }
+ const Optional<TBVectorExt> &getVectorExt() const { return VecExt; }
+ const Optional<uint8_t> &getExtensionTable() const { return ExtensionTable; }
+};
+
+bool doesXCOFFTracebackTableBegin(ArrayRef<uint8_t> Bytes);
} // namespace object
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ArchiveYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ArchiveYAML.h
new file mode 100644
index 000000000000..8d05feedcc62
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ArchiveYAML.h
@@ -0,0 +1,77 @@
+//===- ArchiveYAML.h - Archive YAMLIO implementation ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares classes for handling the YAML representation of archives.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECTYAML_ARCHIVEYAML_H
+#define LLVM_OBJECTYAML_ARCHIVEYAML_H
+
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/ADT/MapVector.h"
+
+namespace llvm {
+namespace ArchYAML {
+
+struct Archive {
+ struct Child {
+ struct Field {
+ Field() = default;
+ Field(StringRef Default, unsigned Length)
+ : DefaultValue(Default), MaxLength(Length) {}
+ StringRef Value;
+ StringRef DefaultValue;
+ unsigned MaxLength;
+ };
+
+ Child() {
+ Fields["Name"] = {"", 16};
+ Fields["LastModified"] = {"0", 12};
+ Fields["UID"] = {"0", 6};
+ Fields["GID"] = {"0", 6};
+ Fields["AccessMode"] = {"0", 8};
+ Fields["Size"] = {"0", 10};
+ Fields["Terminator"] = {"`\n", 2};
+ }
+
+ MapVector<StringRef, Field> Fields;
+
+ Optional<yaml::BinaryRef> Content;
+ Optional<llvm::yaml::Hex8> PaddingByte;
+ };
+
+ StringRef Magic;
+ Optional<std::vector<Child>> Members;
+ Optional<yaml::BinaryRef> Content;
+};
+
+} // end namespace ArchYAML
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ArchYAML::Archive::Child)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<ArchYAML::Archive> {
+ static void mapping(IO &IO, ArchYAML::Archive &A);
+ static std::string validate(IO &, ArchYAML::Archive &A);
+};
+
+template <> struct MappingTraits<ArchYAML::Archive::Child> {
+ static void mapping(IO &IO, ArchYAML::Archive::Child &C);
+ static std::string validate(IO &, ArchYAML::Archive::Child &C);
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+#endif // LLVM_OBJECTYAML_ARCHIVEYAML_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFEmitter.h
index 0ec3f90e1686..eb56d1e29326 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFEmitter.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFEmitter.h
@@ -33,15 +33,23 @@ Error emitDebugStr(raw_ostream &OS, const Data &DI);
Error emitDebugAranges(raw_ostream &OS, const Data &DI);
Error emitDebugRanges(raw_ostream &OS, const Data &DI);
-Error emitPubSection(raw_ostream &OS, const PubSection &Sect,
- bool IsLittleEndian, bool IsGNUPubSec = false);
+Error emitDebugPubnames(raw_ostream &OS, const Data &DI);
+Error emitDebugPubtypes(raw_ostream &OS, const Data &DI);
+Error emitDebugGNUPubnames(raw_ostream &OS, const Data &DI);
+Error emitDebugGNUPubtypes(raw_ostream &OS, const Data &DI);
Error emitDebugInfo(raw_ostream &OS, const Data &DI);
Error emitDebugLine(raw_ostream &OS, const Data &DI);
Error emitDebugAddr(raw_ostream &OS, const Data &DI);
+Error emitDebugStrOffsets(raw_ostream &OS, const Data &DI);
+Error emitDebugRnglists(raw_ostream &OS, const Data &DI);
+Error emitDebugLoclists(raw_ostream &OS, const Data &DI);
+std::function<Error(raw_ostream &, const Data &)>
+getDWARFEmitterByName(StringRef SecName);
Expected<StringMap<std::unique_ptr<MemoryBuffer>>>
-emitDebugSections(StringRef YAMLString, bool ApplyFixups = false,
- bool IsLittleEndian = sys::IsLittleEndianHost);
+emitDebugSections(StringRef YAMLString,
+ bool IsLittleEndian = sys::IsLittleEndianHost,
+ bool Is64BitAddrSize = true);
} // end namespace DWARFYAML
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h
index 9f62a4a2be57..856cea9a1535 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h
@@ -18,33 +18,15 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/ObjectYAML/YAML.h"
#include "llvm/Support/YAMLTraits.h"
#include <cstdint>
+#include <unordered_map>
#include <vector>
namespace llvm {
namespace DWARFYAML {
-struct InitialLength {
- uint32_t TotalLength;
- uint64_t TotalLength64;
-
- bool isDWARF64() const { return TotalLength == UINT32_MAX; }
-
- uint64_t getLength() const {
- return isDWARF64() ? TotalLength64 : TotalLength;
- }
-
- void setLength(uint64_t Len) {
- if (Len >= (uint64_t)UINT32_MAX) {
- TotalLength64 = Len;
- TotalLength = UINT32_MAX;
- } else {
- TotalLength = Len;
- }
- }
-};
-
struct AttributeAbbrev {
llvm::dwarf::Attribute Attribute;
llvm::dwarf::Form Form;
@@ -58,18 +40,23 @@ struct Abbrev {
std::vector<AttributeAbbrev> Attributes;
};
+struct AbbrevTable {
+ Optional<uint64_t> ID;
+ std::vector<Abbrev> Table;
+};
+
struct ARangeDescriptor {
llvm::yaml::Hex64 Address;
- uint64_t Length;
+ yaml::Hex64 Length;
};
struct ARange {
dwarf::DwarfFormat Format;
- uint64_t Length;
+ Optional<yaml::Hex64> Length;
uint16_t Version;
- uint32_t CuOffset;
- uint8_t AddrSize;
- uint8_t SegSize;
+ yaml::Hex64 CuOffset;
+ Optional<yaml::Hex8> AddrSize;
+ yaml::Hex8 SegSize;
std::vector<ARangeDescriptor> Descriptors;
};
@@ -94,7 +81,8 @@ struct PubEntry {
};
struct PubSection {
- InitialLength Length;
+ dwarf::DwarfFormat Format;
+ yaml::Hex64 Length;
uint16_t Version;
uint32_t UnitOffset;
uint32_t UnitSize;
@@ -120,11 +108,12 @@ struct DWARFContext {
struct Unit {
dwarf::DwarfFormat Format;
- uint64_t Length;
+ Optional<yaml::Hex64> Length;
uint16_t Version;
+ Optional<uint8_t> AddrSize;
llvm::dwarf::UnitType Type; // Added in DWARF 5
- yaml::Hex64 AbbrOffset;
- uint8_t AddrSize;
+ Optional<uint64_t> AbbrevTableID;
+ Optional<yaml::Hex64> AbbrOffset;
std::vector<Entry> Entries;
};
@@ -137,7 +126,7 @@ struct File {
struct LineTableOpcode {
dwarf::LineNumberOps Opcode;
- uint64_t ExtLen;
+ Optional<uint64_t> ExtLen;
dwarf::LineNumberExtendedOps SubOpcode;
uint64_t Data;
int64_t SData;
@@ -148,16 +137,16 @@ struct LineTableOpcode {
struct LineTable {
dwarf::DwarfFormat Format;
- uint64_t Length;
+ Optional<uint64_t> Length;
uint16_t Version;
- uint64_t PrologueLength;
+ Optional<uint64_t> PrologueLength;
uint8_t MinInstLength;
uint8_t MaxOpsPerInst;
uint8_t DefaultIsStmt;
uint8_t LineBase;
uint8_t LineRange;
- uint8_t OpcodeBase;
- std::vector<uint8_t> StandardOpcodeLengths;
+ Optional<uint8_t> OpcodeBase;
+ Optional<std::vector<uint8_t>> StandardOpcodeLengths;
std::vector<StringRef> IncludeDirs;
std::vector<File> Files;
std::vector<LineTableOpcode> Opcodes;
@@ -177,14 +166,56 @@ struct AddrTableEntry {
std::vector<SegAddrPair> SegAddrPairs;
};
+struct StringOffsetsTable {
+ dwarf::DwarfFormat Format;
+ Optional<yaml::Hex64> Length;
+ yaml::Hex16 Version;
+ yaml::Hex16 Padding;
+ std::vector<yaml::Hex64> Offsets;
+};
+
+struct DWARFOperation {
+ dwarf::LocationAtom Operator;
+ std::vector<yaml::Hex64> Values;
+};
+
+struct RnglistEntry {
+ dwarf::RnglistEntries Operator;
+ std::vector<yaml::Hex64> Values;
+};
+
+struct LoclistEntry {
+ dwarf::LoclistEntries Operator;
+ std::vector<yaml::Hex64> Values;
+ Optional<yaml::Hex64> DescriptionsLength;
+ std::vector<DWARFOperation> Descriptions;
+};
+
+template <typename EntryType> struct ListEntries {
+ Optional<std::vector<EntryType>> Entries;
+ Optional<yaml::BinaryRef> Content;
+};
+
+template <typename EntryType> struct ListTable {
+ dwarf::DwarfFormat Format;
+ Optional<yaml::Hex64> Length;
+ yaml::Hex16 Version;
+ Optional<yaml::Hex8> AddrSize;
+ yaml::Hex8 SegSelectorSize;
+ Optional<uint32_t> OffsetEntryCount;
+ Optional<std::vector<yaml::Hex64>> Offsets;
+ std::vector<ListEntries<EntryType>> Lists;
+};
+
struct Data {
bool IsLittleEndian;
bool Is64BitAddrSize;
- std::vector<Abbrev> AbbrevDecls;
- std::vector<StringRef> DebugStrings;
- std::vector<ARange> ARanges;
- std::vector<Ranges> DebugRanges;
- std::vector<AddrTableEntry> DebugAddr;
+ std::vector<AbbrevTable> DebugAbbrev;
+ Optional<std::vector<StringRef>> DebugStrings;
+ Optional<std::vector<StringOffsetsTable>> DebugStrOffsets;
+ Optional<std::vector<ARange>> DebugAranges;
+ Optional<std::vector<Ranges>> DebugRanges;
+ Optional<std::vector<AddrTableEntry>> DebugAddr;
Optional<PubSection> PubNames;
Optional<PubSection> PubTypes;
@@ -194,10 +225,23 @@ struct Data {
std::vector<Unit> CompileUnits;
std::vector<LineTable> DebugLines;
+ Optional<std::vector<ListTable<RnglistEntry>>> DebugRnglists;
+ Optional<std::vector<ListTable<LoclistEntry>>> DebugLoclists;
bool isEmpty() const;
- SetVector<StringRef> getUsedSectionNames() const;
+ SetVector<StringRef> getNonEmptySectionNames() const;
+
+ struct AbbrevTableInfo {
+ uint64_t Index;
+ uint64_t Offset;
+ };
+ Expected<AbbrevTableInfo> getAbbrevTableInfoByID(uint64_t ID) const;
+ StringRef getAbbrevTableContentByIndex(uint64_t Index) const;
+
+private:
+ mutable std::unordered_map<uint64_t, AbbrevTableInfo> AbbrevTableInfoMap;
+ mutable std::unordered_map<uint64_t, std::string> AbbrevTableContents;
};
} // end namespace DWARFYAML
@@ -205,6 +249,7 @@ struct Data {
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AttributeAbbrev)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::Abbrev)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AbbrevTable)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::ARangeDescriptor)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::ARange)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::RangeEntry)
@@ -218,6 +263,18 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LineTable)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LineTableOpcode)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::SegAddrPair)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AddrTableEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::StringOffsetsTable)
+LLVM_YAML_IS_SEQUENCE_VECTOR(
+ llvm::DWARFYAML::ListTable<DWARFYAML::RnglistEntry>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(
+ llvm::DWARFYAML::ListEntries<DWARFYAML::RnglistEntry>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::RnglistEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(
+ llvm::DWARFYAML::ListTable<DWARFYAML::LoclistEntry>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(
+ llvm::DWARFYAML::ListEntries<DWARFYAML::LoclistEntry>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LoclistEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::DWARFOperation)
namespace llvm {
namespace yaml {
@@ -226,6 +283,10 @@ template <> struct MappingTraits<DWARFYAML::Data> {
static void mapping(IO &IO, DWARFYAML::Data &DWARF);
};
+template <> struct MappingTraits<DWARFYAML::AbbrevTable> {
+ static void mapping(IO &IO, DWARFYAML::AbbrevTable &AbbrevTable);
+};
+
template <> struct MappingTraits<DWARFYAML::Abbrev> {
static void mapping(IO &IO, DWARFYAML::Abbrev &Abbrev);
};
@@ -286,12 +347,36 @@ template <> struct MappingTraits<DWARFYAML::SegAddrPair> {
static void mapping(IO &IO, DWARFYAML::SegAddrPair &SegAddrPair);
};
+template <> struct MappingTraits<DWARFYAML::DWARFOperation> {
+ static void mapping(IO &IO, DWARFYAML::DWARFOperation &DWARFOperation);
+};
+
+template <typename EntryType>
+struct MappingTraits<DWARFYAML::ListTable<EntryType>> {
+ static void mapping(IO &IO, DWARFYAML::ListTable<EntryType> &ListTable);
+};
+
+template <typename EntryType>
+struct MappingTraits<DWARFYAML::ListEntries<EntryType>> {
+ static void mapping(IO &IO, DWARFYAML::ListEntries<EntryType> &ListEntries);
+ static std::string validate(IO &IO,
+ DWARFYAML::ListEntries<EntryType> &ListEntries);
+};
+
+template <> struct MappingTraits<DWARFYAML::RnglistEntry> {
+ static void mapping(IO &IO, DWARFYAML::RnglistEntry &RnglistEntry);
+};
+
+template <> struct MappingTraits<DWARFYAML::LoclistEntry> {
+ static void mapping(IO &IO, DWARFYAML::LoclistEntry &LoclistEntry);
+};
+
template <> struct MappingTraits<DWARFYAML::AddrTableEntry> {
static void mapping(IO &IO, DWARFYAML::AddrTableEntry &AddrTable);
};
-template <> struct MappingTraits<DWARFYAML::InitialLength> {
- static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF);
+template <> struct MappingTraits<DWARFYAML::StringOffsetsTable> {
+ static void mapping(IO &IO, DWARFYAML::StringOffsetsTable &StrOffsetsTable);
};
template <> struct ScalarEnumerationTraits<dwarf::DwarfFormat> {
@@ -369,6 +454,34 @@ template <> struct ScalarEnumerationTraits<dwarf::Constants> {
}
};
+#define HANDLE_DW_RLE(unused, name) \
+ io.enumCase(value, "DW_RLE_" #name, dwarf::DW_RLE_##name);
+
+template <> struct ScalarEnumerationTraits<dwarf::RnglistEntries> {
+ static void enumeration(IO &io, dwarf::RnglistEntries &value) {
+#include "llvm/BinaryFormat/Dwarf.def"
+ }
+};
+
+#define HANDLE_DW_LLE(unused, name) \
+ io.enumCase(value, "DW_LLE_" #name, dwarf::DW_LLE_##name);
+
+template <> struct ScalarEnumerationTraits<dwarf::LoclistEntries> {
+ static void enumeration(IO &io, dwarf::LoclistEntries &value) {
+#include "llvm/BinaryFormat/Dwarf.def"
+ }
+};
+
+#define HANDLE_DW_OP(id, name, version, vendor) \
+ io.enumCase(value, "DW_OP_" #name, dwarf::DW_OP_##name);
+
+template <> struct ScalarEnumerationTraits<dwarf::LocationAtom> {
+ static void enumeration(IO &io, dwarf::LocationAtom &value) {
+#include "llvm/BinaryFormat/Dwarf.def"
+ io.enumFallback<yaml::Hex8>(value);
+ }
+};
+
} // end namespace yaml
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h
index b1ffb20681ea..4f3c76bbd82c 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -16,6 +16,8 @@
#define LLVM_OBJECTYAML_ELFYAML_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Object/ELFTypes.h"
#include "llvm/ObjectYAML/DWARFYAML.h"
#include "llvm/ObjectYAML/YAML.h"
#include "llvm/Support/YAMLTraits.h"
@@ -69,6 +71,41 @@ LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_ISA)
LLVM_YAML_STRONG_TYPEDEF(StringRef, YAMLFlowString)
LLVM_YAML_STRONG_TYPEDEF(int64_t, YAMLIntUInt)
+template <class ELFT>
+unsigned getDefaultShEntSize(unsigned EMachine, ELF_SHT SecType,
+ StringRef SecName) {
+ if (EMachine == ELF::EM_MIPS && SecType == ELF::SHT_MIPS_ABIFLAGS)
+ return sizeof(object::Elf_Mips_ABIFlags<ELFT>);
+
+ switch (SecType) {
+ case ELF::SHT_SYMTAB:
+ case ELF::SHT_DYNSYM:
+ return sizeof(typename ELFT::Sym);
+ case ELF::SHT_GROUP:
+ return sizeof(typename ELFT::Word);
+ case ELF::SHT_REL:
+ return sizeof(typename ELFT::Rel);
+ case ELF::SHT_RELA:
+ return sizeof(typename ELFT::Rela);
+ case ELF::SHT_RELR:
+ return sizeof(typename ELFT::Relr);
+ case ELF::SHT_DYNAMIC:
+ return sizeof(typename ELFT::Dyn);
+ case ELF::SHT_HASH:
+ return sizeof(typename ELFT::Word);
+ case ELF::SHT_SYMTAB_SHNDX:
+ return sizeof(typename ELFT::Word);
+ case ELF::SHT_GNU_versym:
+ return sizeof(typename ELFT::Half);
+ case ELF::SHT_LLVM_CALL_GRAPH_PROFILE:
+ return sizeof(object::Elf_CGProfile_Impl<ELFT>);
+ default:
+ if (SecName == ".debug_str")
+ return 1;
+ return 0;
+ }
+}
+
// For now, hardcode 64 bits everywhere that 32 or 64 would be needed
// since 64-bit can hold 32-bit values too.
struct FileHeader {
@@ -77,7 +114,7 @@ struct FileHeader {
ELF_ELFOSABI OSABI;
llvm::yaml::Hex8 ABIVersion;
ELF_ET Type;
- ELF_EM Machine;
+ Optional<ELF_EM> Machine;
ELF_EF Flags;
llvm::yaml::Hex64 Entry;
@@ -94,24 +131,14 @@ struct SectionHeader {
StringRef Name;
};
-struct SectionHeaderTable {
- Optional<std::vector<SectionHeader>> Sections;
- Optional<std::vector<SectionHeader>> Excluded;
- Optional<bool> NoHeaders;
-};
-
-struct SectionName {
- StringRef Section;
-};
-
struct Symbol {
StringRef Name;
ELF_STT Type;
- StringRef Section;
+ Optional<StringRef> Section;
Optional<ELF_SHN> Index;
ELF_STB Binding;
- llvm::yaml::Hex64 Value;
- llvm::yaml::Hex64 Size;
+ Optional<llvm::yaml::Hex64> Value;
+ Optional<llvm::yaml::Hex64> Size;
Optional<uint8_t> Other;
Optional<uint32_t> StName;
@@ -126,6 +153,16 @@ struct DynamicEntry {
llvm::yaml::Hex64 Val;
};
+struct BBAddrMapEntry {
+ struct BBEntry {
+ llvm::yaml::Hex32 AddressOffset;
+ llvm::yaml::Hex32 Size;
+ llvm::yaml::Hex32 Metadata;
+ };
+ llvm::yaml::Hex64 Address;
+ Optional<std::vector<BBEntry>> BBEntries;
+};
+
struct StackSizeEntry {
llvm::yaml::Hex64 Address;
llvm::yaml::Hex64 Size;
@@ -153,19 +190,29 @@ struct Chunk {
StackSizes,
SymtabShndxSection,
Symver,
+ ARMIndexTable,
MipsABIFlags,
Addrsig,
- Fill,
LinkerOptions,
DependentLibraries,
- CallGraphProfile
+ CallGraphProfile,
+ BBAddrMap,
+
+ // Special chunks.
+ SpecialChunksStart,
+ Fill = SpecialChunksStart,
+ SectionHeaderTable,
};
ChunkKind Kind;
StringRef Name;
Optional<llvm::yaml::Hex64> Offset;
- Chunk(ChunkKind K) : Kind(K) {}
+ // Usually chunks are not created implicitly, but rather loaded from YAML.
+ // This flag is used to signal whether this is the case or not.
+ bool IsImplicit;
+
+ Chunk(ChunkKind K, bool Implicit) : Kind(K), IsImplicit(Implicit) {}
virtual ~Chunk();
};
@@ -173,25 +220,35 @@ struct Section : public Chunk {
ELF_SHT Type;
Optional<ELF_SHF> Flags;
Optional<llvm::yaml::Hex64> Address;
- StringRef Link;
+ Optional<StringRef> Link;
llvm::yaml::Hex64 AddressAlign;
Optional<llvm::yaml::Hex64> EntSize;
- // Usually sections are not created implicitly, but loaded from YAML.
- // When they are, this flag is used to signal about that.
- bool IsImplicit;
+ Optional<yaml::BinaryRef> Content;
+ Optional<llvm::yaml::Hex64> Size;
// Holds the original section index.
unsigned OriginalSecNdx;
- Section(ChunkKind Kind, bool IsImplicit = false)
- : Chunk(Kind), IsImplicit(IsImplicit) {}
+ Section(ChunkKind Kind, bool IsImplicit = false) : Chunk(Kind, IsImplicit) {}
- static bool classof(const Chunk *S) { return S->Kind != ChunkKind::Fill; }
+ static bool classof(const Chunk *S) {
+ return S->Kind < ChunkKind::SpecialChunksStart;
+ }
+
+ // Some derived sections might have their own special entries. This method
+ // returns a vector of <entry name, is used> pairs. It is used for section
+ // validation.
+ virtual std::vector<std::pair<StringRef, bool>> getEntries() const {
+ return {};
+ };
// The following members are used to override section fields which is
// useful for creating invalid objects.
+ // This can be used to override the sh_addralign field.
+ Optional<llvm::yaml::Hex64> ShAddrAlign;
+
// This can be used to override the offset stored in the sh_name field.
// It does not affect the name stored in the string table.
Optional<llvm::yaml::Hex64> ShName;
@@ -206,6 +263,12 @@ struct Section : public Chunk {
// This can be used to override the sh_flags field.
Optional<llvm::yaml::Hex64> ShFlags;
+
+ // This can be used to override the sh_type field. It is useful when we
+ // want to use specific YAML keys for a section of a particular type to
+ // describe the content, but still want to have a different final type
+ // for the section.
+ Optional<ELF_SHT> ShType;
};
// Fill is a block of data which is placed outside of sections. It is
@@ -215,18 +278,57 @@ struct Fill : Chunk {
Optional<yaml::BinaryRef> Pattern;
llvm::yaml::Hex64 Size;
- Fill() : Chunk(ChunkKind::Fill) {}
+ Fill() : Chunk(ChunkKind::Fill, /*Implicit=*/false) {}
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Fill; }
};
+struct SectionHeaderTable : Chunk {
+ SectionHeaderTable(bool IsImplicit)
+ : Chunk(ChunkKind::SectionHeaderTable, IsImplicit) {}
+
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::SectionHeaderTable;
+ }
+
+ Optional<std::vector<SectionHeader>> Sections;
+ Optional<std::vector<SectionHeader>> Excluded;
+ Optional<bool> NoHeaders;
+
+ size_t getNumHeaders(size_t SectionsNum) const {
+ if (IsImplicit)
+ return SectionsNum;
+ if (NoHeaders)
+ return (*NoHeaders) ? 0 : SectionsNum;
+ return (Sections ? Sections->size() : 0) + /*Null section*/ 1;
+ }
+
+ static constexpr StringRef TypeStr = "SectionHeaderTable";
+};
+
+struct BBAddrMapSection : Section {
+ Optional<std::vector<BBAddrMapEntry>> Entries;
+
+ BBAddrMapSection() : Section(ChunkKind::BBAddrMap) {}
+
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Entries", Entries.hasValue()}};
+ };
+
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::BBAddrMap;
+ }
+};
+
struct StackSizesSection : Section {
- Optional<yaml::BinaryRef> Content;
- Optional<llvm::yaml::Hex64> Size;
Optional<std::vector<StackSizeEntry>> Entries;
StackSizesSection() : Section(ChunkKind::StackSizes) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Entries", Entries.hasValue()}};
+ };
+
static bool classof(const Chunk *S) {
return S->Kind == ChunkKind::StackSizes;
}
@@ -237,17 +339,18 @@ struct StackSizesSection : Section {
};
struct DynamicSection : Section {
- std::vector<DynamicEntry> Entries;
- Optional<yaml::BinaryRef> Content;
+ Optional<std::vector<DynamicEntry>> Entries;
DynamicSection() : Section(ChunkKind::Dynamic) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Entries", Entries.hasValue()}};
+ };
+
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Dynamic; }
};
struct RawContentSection : Section {
- Optional<yaml::BinaryRef> Content;
- Optional<llvm::yaml::Hex64> Size;
Optional<llvm::yaml::Hex64> Info;
RawContentSection() : Section(ChunkKind::RawContent) {}
@@ -261,29 +364,31 @@ struct RawContentSection : Section {
};
struct NoBitsSection : Section {
- llvm::yaml::Hex64 Size;
-
NoBitsSection() : Section(ChunkKind::NoBits) {}
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::NoBits; }
};
struct NoteSection : Section {
- Optional<yaml::BinaryRef> Content;
- Optional<llvm::yaml::Hex64> Size;
Optional<std::vector<ELFYAML::NoteEntry>> Notes;
NoteSection() : Section(ChunkKind::Note) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Notes", Notes.hasValue()}};
+ };
+
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Note; }
};
struct HashSection : Section {
- Optional<yaml::BinaryRef> Content;
- Optional<llvm::yaml::Hex64> Size;
Optional<std::vector<uint32_t>> Bucket;
Optional<std::vector<uint32_t>> Chain;
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Bucket", Bucket.hasValue()}, {"Chain", Chain.hasValue()}};
+ };
+
// The following members are used to override section fields.
// This is useful for creating invalid objects.
Optional<llvm::yaml::Hex64> NBucket;
@@ -315,8 +420,6 @@ struct GnuHashHeader {
};
struct GnuHashSection : Section {
- Optional<yaml::BinaryRef> Content;
-
Optional<GnuHashHeader> Header;
Optional<std::vector<llvm::yaml::Hex64>> BloomFilter;
Optional<std::vector<llvm::yaml::Hex32>> HashBuckets;
@@ -324,6 +427,13 @@ struct GnuHashSection : Section {
GnuHashSection() : Section(ChunkKind::GnuHash) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Header", Header.hasValue()},
+ {"BloomFilter", BloomFilter.hasValue()},
+ {"HashBuckets", HashBuckets.hasValue()},
+ {"HashValues", HashValues.hasValue()}};
+ };
+
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::GnuHash; }
};
@@ -341,24 +451,29 @@ struct VerneedEntry {
};
struct VerneedSection : Section {
- Optional<yaml::BinaryRef> Content;
Optional<std::vector<VerneedEntry>> VerneedV;
- llvm::yaml::Hex64 Info;
+ Optional<llvm::yaml::Hex64> Info;
VerneedSection() : Section(ChunkKind::Verneed) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Dependencies", VerneedV.hasValue()}};
+ };
+
static bool classof(const Chunk *S) {
return S->Kind == ChunkKind::Verneed;
}
};
struct AddrsigSection : Section {
- Optional<yaml::BinaryRef> Content;
- Optional<llvm::yaml::Hex64> Size;
Optional<std::vector<YAMLFlowString>> Symbols;
AddrsigSection() : Section(ChunkKind::Addrsig) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Symbols", Symbols.hasValue()}};
+ };
+
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Addrsig; }
};
@@ -369,10 +484,13 @@ struct LinkerOption {
struct LinkerOptionsSection : Section {
Optional<std::vector<LinkerOption>> Options;
- Optional<yaml::BinaryRef> Content;
LinkerOptionsSection() : Section(ChunkKind::LinkerOptions) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Options", Options.hasValue()}};
+ };
+
static bool classof(const Chunk *S) {
return S->Kind == ChunkKind::LinkerOptions;
}
@@ -380,10 +498,13 @@ struct LinkerOptionsSection : Section {
struct DependentLibrariesSection : Section {
Optional<std::vector<YAMLFlowString>> Libs;
- Optional<yaml::BinaryRef> Content;
DependentLibrariesSection() : Section(ChunkKind::DependentLibraries) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Libraries", Libs.hasValue()}};
+ };
+
static bool classof(const Chunk *S) {
return S->Kind == ChunkKind::DependentLibraries;
}
@@ -401,49 +522,62 @@ struct CallGraphEntry {
struct CallGraphProfileSection : Section {
Optional<std::vector<CallGraphEntry>> Entries;
- Optional<yaml::BinaryRef> Content;
CallGraphProfileSection() : Section(ChunkKind::CallGraphProfile) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Entries", Entries.hasValue()}};
+ };
+
static bool classof(const Chunk *S) {
return S->Kind == ChunkKind::CallGraphProfile;
}
};
struct SymverSection : Section {
- std::vector<uint16_t> Entries;
+ Optional<std::vector<uint16_t>> Entries;
SymverSection() : Section(ChunkKind::Symver) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Entries", Entries.hasValue()}};
+ };
+
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Symver; }
};
struct VerdefEntry {
- uint16_t Version;
- uint16_t Flags;
- uint16_t VersionNdx;
- uint32_t Hash;
+ Optional<uint16_t> Version;
+ Optional<uint16_t> Flags;
+ Optional<uint16_t> VersionNdx;
+ Optional<uint32_t> Hash;
std::vector<StringRef> VerNames;
};
struct VerdefSection : Section {
Optional<std::vector<VerdefEntry>> Entries;
- Optional<yaml::BinaryRef> Content;
-
- llvm::yaml::Hex64 Info;
+ Optional<llvm::yaml::Hex64> Info;
VerdefSection() : Section(ChunkKind::Verdef) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Entries", Entries.hasValue()}};
+ };
+
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Verdef; }
};
-struct Group : Section {
+struct GroupSection : Section {
// Members of a group contain a flag and a list of section indices
// that are part of the group.
- std::vector<SectionOrType> Members;
+ Optional<std::vector<SectionOrType>> Members;
Optional<StringRef> Signature; /* Info */
- Group() : Section(ChunkKind::Group) {}
+ GroupSection() : Section(ChunkKind::Group) {}
+
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Members", Members.hasValue()}};
+ };
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Group; }
};
@@ -456,11 +590,15 @@ struct Relocation {
};
struct RelocationSection : Section {
- std::vector<Relocation> Relocations;
+ Optional<std::vector<Relocation>> Relocations;
StringRef RelocatableSec; /* Info */
RelocationSection() : Section(ChunkKind::Relocation) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Relocations", Relocations.hasValue()}};
+ };
+
static bool classof(const Chunk *S) {
return S->Kind == ChunkKind::Relocation;
}
@@ -468,25 +606,51 @@ struct RelocationSection : Section {
struct RelrSection : Section {
Optional<std::vector<llvm::yaml::Hex64>> Entries;
- Optional<yaml::BinaryRef> Content;
RelrSection() : Section(ChunkKind::Relr) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Entries", Entries.hasValue()}};
+ };
+
static bool classof(const Chunk *S) {
return S->Kind == ChunkKind::Relr;
}
};
struct SymtabShndxSection : Section {
- std::vector<uint32_t> Entries;
+ Optional<std::vector<uint32_t>> Entries;
SymtabShndxSection() : Section(ChunkKind::SymtabShndxSection) {}
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Entries", Entries.hasValue()}};
+ };
+
static bool classof(const Chunk *S) {
return S->Kind == ChunkKind::SymtabShndxSection;
}
};
+struct ARMIndexTableEntry {
+ llvm::yaml::Hex32 Offset;
+ llvm::yaml::Hex32 Value;
+};
+
+struct ARMIndexTableSection : Section {
+ Optional<std::vector<ARMIndexTableEntry>> Entries;
+
+ ARMIndexTableSection() : Section(ChunkKind::ARMIndexTable) {}
+
+ std::vector<std::pair<StringRef, bool>> getEntries() const override {
+ return {{"Entries", Entries.hasValue()}};
+ };
+
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::ARMIndexTable;
+ }
+};
+
// Represents .MIPS.abiflags section
struct MipsABIFlags : Section {
llvm::yaml::Hex16 Version;
@@ -517,15 +681,15 @@ struct ProgramHeader {
Optional<llvm::yaml::Hex64> FileSize;
Optional<llvm::yaml::Hex64> MemSize;
Optional<llvm::yaml::Hex64> Offset;
+ Optional<StringRef> FirstSec;
+ Optional<StringRef> LastSec;
- std::vector<SectionName> Sections;
- // This vector is parallel to Sections and contains corresponding chunks.
+ // This vector contains all chunks from [FirstSec, LastSec].
std::vector<Chunk *> Chunks;
};
struct Object {
FileHeader Header;
- Optional<SectionHeaderTable> SectionHeaders;
std::vector<ProgramHeader> ProgramHeaders;
// An object might contain output section descriptions as well as
@@ -547,12 +711,26 @@ struct Object {
Ret.push_back(S);
return Ret;
}
+
+ const SectionHeaderTable &getSectionHeaderTable() const {
+ for (const std::unique_ptr<Chunk> &C : Chunks)
+ if (auto *S = dyn_cast<ELFYAML::SectionHeaderTable>(C.get()))
+ return *S;
+ llvm_unreachable("the section header table chunk must always be present");
+ }
+
+ unsigned getMachine() const;
};
+bool shouldAllocateFileSpace(ArrayRef<ProgramHeader> Phdrs,
+ const NoBitsSection &S);
+
} // end namespace ELFYAML
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::StackSizeEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::BBAddrMapEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::BBAddrMapEntry::BBEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::DynamicEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::LinkerOption)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::CallGraphEntry)
@@ -566,7 +744,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VernauxEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VerneedEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::Relocation)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::SectionOrType)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::SectionName)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::ARMIndexTableEntry)
namespace llvm {
namespace yaml {
@@ -690,29 +868,33 @@ struct MappingTraits<ELFYAML::FileHeader> {
static void mapping(IO &IO, ELFYAML::FileHeader &FileHdr);
};
-template <> struct MappingTraits<ELFYAML::SectionHeaderTable> {
- static void mapping(IO &IO, ELFYAML::SectionHeaderTable &SecHdrTable);
- static StringRef validate(IO &IO, ELFYAML::SectionHeaderTable &SecHdrTable);
-};
-
template <> struct MappingTraits<ELFYAML::SectionHeader> {
static void mapping(IO &IO, ELFYAML::SectionHeader &SHdr);
};
template <> struct MappingTraits<ELFYAML::ProgramHeader> {
static void mapping(IO &IO, ELFYAML::ProgramHeader &FileHdr);
+ static std::string validate(IO &IO, ELFYAML::ProgramHeader &FileHdr);
};
template <>
struct MappingTraits<ELFYAML::Symbol> {
static void mapping(IO &IO, ELFYAML::Symbol &Symbol);
- static StringRef validate(IO &IO, ELFYAML::Symbol &Symbol);
+ static std::string validate(IO &IO, ELFYAML::Symbol &Symbol);
};
template <> struct MappingTraits<ELFYAML::StackSizeEntry> {
static void mapping(IO &IO, ELFYAML::StackSizeEntry &Rel);
};
+template <> struct MappingTraits<ELFYAML::BBAddrMapEntry> {
+ static void mapping(IO &IO, ELFYAML::BBAddrMapEntry &Rel);
+};
+
+template <> struct MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry> {
+ static void mapping(IO &IO, ELFYAML::BBAddrMapEntry::BBEntry &Rel);
+};
+
template <> struct MappingTraits<ELFYAML::GnuHashHeader> {
static void mapping(IO &IO, ELFYAML::GnuHashHeader &Rel);
};
@@ -749,9 +931,13 @@ template <> struct MappingTraits<ELFYAML::Relocation> {
static void mapping(IO &IO, ELFYAML::Relocation &Rel);
};
+template <> struct MappingTraits<ELFYAML::ARMIndexTableEntry> {
+ static void mapping(IO &IO, ELFYAML::ARMIndexTableEntry &E);
+};
+
template <> struct MappingTraits<std::unique_ptr<ELFYAML::Chunk>> {
static void mapping(IO &IO, std::unique_ptr<ELFYAML::Chunk> &C);
- static StringRef validate(IO &io, std::unique_ptr<ELFYAML::Chunk> &C);
+ static std::string validate(IO &io, std::unique_ptr<ELFYAML::Chunk> &C);
};
template <>
@@ -763,10 +949,6 @@ template <> struct MappingTraits<ELFYAML::SectionOrType> {
static void mapping(IO &IO, ELFYAML::SectionOrType &sectionOrType);
};
-template <> struct MappingTraits<ELFYAML::SectionName> {
- static void mapping(IO &IO, ELFYAML::SectionName &sectionName);
-};
-
} // end namespace yaml
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/MachOYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/MachOYAML.h
index fb6780b6d0ed..94e66c5ae787 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -220,7 +220,7 @@ template <> struct MappingTraits<MachOYAML::Relocation> {
template <> struct MappingTraits<MachOYAML::Section> {
static void mapping(IO &IO, MachOYAML::Section &Section);
- static StringRef validate(IO &io, MachOYAML::Section &Section);
+ static std::string validate(IO &io, MachOYAML::Section &Section);
};
template <> struct MappingTraits<MachOYAML::NListEntry> {
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/MinidumpYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/MinidumpYAML.h
index c1711a28dd84..b0cee541cef2 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/MinidumpYAML.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/MinidumpYAML.h
@@ -236,7 +236,7 @@ template <> struct BlockScalarTraits<MinidumpYAML::BlockStringRef> {
template <> struct MappingTraits<std::unique_ptr<MinidumpYAML::Stream>> {
static void mapping(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S);
- static StringRef validate(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S);
+ static std::string validate(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S);
};
template <> struct MappingContextTraits<minidump::MemoryDescriptor, BinaryRef> {
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ObjectYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ObjectYAML.h
index 0015fd3dc501..dd26ce3e9703 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ObjectYAML.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ObjectYAML.h
@@ -9,6 +9,7 @@
#ifndef LLVM_OBJECTYAML_OBJECTYAML_H
#define LLVM_OBJECTYAML_OBJECTYAML_H
+#include "llvm/ObjectYAML/ArchiveYAML.h"
#include "llvm/ObjectYAML/COFFYAML.h"
#include "llvm/ObjectYAML/ELFYAML.h"
#include "llvm/ObjectYAML/MachOYAML.h"
@@ -23,6 +24,7 @@ namespace yaml {
class IO;
struct YamlObjectFile {
+ std::unique_ptr<ArchYAML::Archive> Arch;
std::unique_ptr<ELFYAML::Object> Elf;
std::unique_ptr<COFFYAML::Object> Coff;
std::unique_ptr<MachOYAML::Object> MachO;
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/WasmYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/WasmYAML.h
index bffb314e2d3b..80f1b4006205 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -53,6 +53,7 @@ struct Limits {
struct Table {
TableType ElemType;
Limits TableLimits;
+ uint32_t Index;
};
struct Export {
@@ -220,6 +221,8 @@ struct NameSection : CustomSection {
}
std::vector<NameEntry> FunctionNames;
+ std::vector<NameEntry> GlobalNames;
+ std::vector<NameEntry> DataSegmentNames;
};
struct LinkingSection : CustomSection {
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/yaml2obj.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/yaml2obj.h
index 34def363a55b..1f693475c946 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/yaml2obj.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/yaml2obj.h
@@ -40,12 +40,17 @@ namespace WasmYAML {
struct Object;
}
+namespace ArchYAML {
+struct Archive;
+}
+
namespace yaml {
class Input;
struct YamlObjectFile;
using ErrorHandler = llvm::function_ref<void(const Twine &Msg)>;
+bool yaml2archive(ArchYAML::Archive &Doc, raw_ostream &Out, ErrorHandler EH);
bool yaml2coff(COFFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH);
bool yaml2elf(ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH,
uint64_t MaxSize);
diff --git a/contrib/llvm-project/llvm/include/llvm/Option/ArgList.h b/contrib/llvm-project/llvm/include/llvm/Option/ArgList.h
index 74bfadcba726..9ce783978185 100644
--- a/contrib/llvm-project/llvm/include/llvm/Option/ArgList.h
+++ b/contrib/llvm-project/llvm/include/llvm/Option/ArgList.h
@@ -412,6 +412,10 @@ public:
return ArgStrings[Index];
}
+ void replaceArgString(unsigned Index, const Twine &S) {
+ ArgStrings[Index] = MakeArgString(S);
+ }
+
unsigned getNumInputArgStrings() const override {
return NumInputArgStrings;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Option/OptParser.td b/contrib/llvm-project/llvm/include/llvm/Option/OptParser.td
index e32355444d7b..9a179c511bd6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Option/OptParser.td
+++ b/contrib/llvm-project/llvm/include/llvm/Option/OptParser.td
@@ -13,7 +13,7 @@
// Define the kinds of options.
-class OptionKind<string name, int precedence = 0, bit sentinel = 0> {
+class OptionKind<string name, int precedence = 0, bit sentinel = false> {
string Name = name;
// The kind precedence, kinds with lower precedence are matched first.
int Precedence = precedence;
@@ -24,9 +24,9 @@ class OptionKind<string name, int precedence = 0, bit sentinel = 0> {
// An option group.
def KIND_GROUP : OptionKind<"Group">;
// The input option kind.
-def KIND_INPUT : OptionKind<"Input", 1, 1>;
+def KIND_INPUT : OptionKind<"Input", 1, true>;
// The unknown option kind.
-def KIND_UNKNOWN : OptionKind<"Unknown", 2, 1>;
+def KIND_UNKNOWN : OptionKind<"Unknown", 2, true>;
// A flag with no values.
def KIND_FLAG : OptionKind<"Flag">;
// An option which prefixes its (single) value.
@@ -97,17 +97,19 @@ class Option<list<string> prefixes, string name, OptionKind kind> {
OptionGroup Group = ?;
Option Alias = ?;
list<string> AliasArgs = [];
- string MarshallingKind = ?;
+ code MacroPrefix = "";
code KeyPath = ?;
code DefaultValue = ?;
- bit ShouldAlwaysEmit = 0;
- // Used by the Flag option kind.
- bit IsPositive = 1;
- // Used by the String option kind.
+ code ImpliedValue = ?;
+ code ImpliedCheck = "false";
+ code ShouldParse = "true";
+ bit ShouldAlwaysEmit = false;
code NormalizerRetTy = ?;
code NormalizedValuesScope = "";
code Normalizer = "";
code Denormalizer = "";
+ code ValueMerger = "mergeForwardValue";
+ code ValueExtractor = "extractForwardValue";
list<code> NormalizedValues = ?;
}
@@ -144,34 +146,85 @@ class ValuesCode<code valuecode> { code ValuesCode = valuecode; }
// Helpers for defining marshalling information.
-class MarshallingInfo<code keypath, code defaultvalue> {
- code KeyPath = keypath;
+class KeyPathAndMacro<string key_path_prefix, string key_path_base,
+ string macro_prefix = ""> {
+ code KeyPath = !strconcat(key_path_prefix, key_path_base);
+ code MacroPrefix = macro_prefix;
+}
+
+def EmptyKPM : KeyPathAndMacro<"", "">;
+
+class ImpliedByAnyOf<list<string> key_paths, code value = "true"> {
+ code ImpliedCheck = !foldl("false", key_paths, accumulator, key_path,
+ !strconcat(accumulator, " || ", key_path));
+ code ImpliedValue = value;
+}
+
+class MarshallingInfo<KeyPathAndMacro kpm, code defaultvalue> {
+ code KeyPath = kpm.KeyPath;
+ code MacroPrefix = kpm.MacroPrefix;
code DefaultValue = defaultvalue;
}
-class MarshallingInfoString<code keypath, code defaultvalue, code normalizerretty>
- : MarshallingInfo<keypath, defaultvalue> {
- string MarshallingKind = "string";
- code NormalizerRetTy = normalizerretty;
+
+class MarshallingInfoString<KeyPathAndMacro kpm, code defaultvalue="std::string()">
+ : MarshallingInfo<kpm, defaultvalue> {
+ code Normalizer = "normalizeString";
+ code Denormalizer = "denormalizeString";
+}
+
+class MarshallingInfoStringInt<KeyPathAndMacro kpm, code defaultvalue="0", code type="unsigned">
+ : MarshallingInfo<kpm, defaultvalue> {
+ code Normalizer = "normalizeStringIntegral<"#type#">";
+ code Denormalizer = "denormalizeString";
+}
+
+class MarshallingInfoStringVector<KeyPathAndMacro kpm>
+ : MarshallingInfo<kpm, "std::vector<std::string>({})"> {
+ code Normalizer = "normalizeStringVector";
+ code Denormalizer = "denormalizeStringVector";
+}
+
+class MarshallingInfoFlag<KeyPathAndMacro kpm, code defaultvalue = "false">
+ : MarshallingInfo<kpm, defaultvalue> {
+ code Normalizer = "normalizeSimpleFlag";
+ code Denormalizer = "denormalizeSimpleFlag";
+}
+
+class MarshallingInfoNegativeFlag<KeyPathAndMacro kpm, code defaultvalue = "true">
+ : MarshallingInfo<kpm, defaultvalue> {
+ code Normalizer = "normalizeSimpleNegativeFlag";
+ code Denormalizer = "denormalizeSimpleFlag";
+}
+
+class MarshallingInfoBitfieldFlag<KeyPathAndMacro kpm, code value>
+ : MarshallingInfoFlag<kpm, "0u"> {
+ code Normalizer = "makeFlagToValueNormalizer("#value#")";
+ code ValueMerger = "mergeMaskValue";
+ code ValueExtractor = "(extractMaskValue<unsigned, decltype("#value#"), "#value#">)";
}
-class MarshallingInfoFlag<code keypath, code defaultvalue>
- : MarshallingInfo<keypath, defaultvalue> {
- string MarshallingKind = "flag";
+// Marshalling info for booleans. Applied to the flag setting keypath to false.
+class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value, code name,
+ code other_value, code other_name>
+ : MarshallingInfoFlag<kpm, defaultvalue> {
+ code Normalizer = "makeBooleanOptionNormalizer("#value#", "#other_value#", OPT_"#other_name#")";
+ code Denormalizer = "makeBooleanOptionDenormalizer("#value#")";
}
// Mixins for additional marshalling attributes.
-class IsNegative { bit IsPositive = 0; }
-class AlwaysEmit { bit ShouldAlwaysEmit = 1; }
+class ShouldParseIf<code condition> { code ShouldParse = condition; }
+class AlwaysEmit { bit ShouldAlwaysEmit = true; }
class Normalizer<code normalizer> { code Normalizer = normalizer; }
class Denormalizer<code denormalizer> { code Denormalizer = denormalizer; }
class NormalizedValuesScope<code scope> { code NormalizedValuesScope = scope; }
class NormalizedValues<list<code> definitions> { list<code> NormalizedValues = definitions; }
-class DenormalizeString { code Denormalizer = "denormalizeString"; }
class AutoNormalizeEnum {
code Normalizer = "normalizeSimpleEnum";
code Denormalizer = "denormalizeSimpleEnum";
}
+class ValueMerger<code merger> { code ValueMerger = merger; }
+class ValueExtractor<code extractor> { code ValueExtractor = extractor; }
// Predefined options.
diff --git a/contrib/llvm-project/llvm/include/llvm/Option/OptTable.h b/contrib/llvm-project/llvm/include/llvm/Option/OptTable.h
index 5db30436069d..58c09b23d237 100644
--- a/contrib/llvm-project/llvm/include/llvm/Option/OptTable.h
+++ b/contrib/llvm-project/llvm/include/llvm/Option/OptTable.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Option/OptSpecifier.h"
+#include "llvm/Support/StringSaver.h"
#include <cassert>
#include <string>
#include <vector>
@@ -20,6 +21,7 @@
namespace llvm {
class raw_ostream;
+template <typename Fn> class function_ref;
namespace opt {
@@ -48,7 +50,7 @@ public:
unsigned ID;
unsigned char Kind;
unsigned char Param;
- unsigned short Flags;
+ unsigned int Flags;
unsigned short GroupID;
unsigned short AliasID;
const char *AliasArgs;
@@ -59,6 +61,8 @@ private:
/// The option information table.
std::vector<Info> OptionInfos;
bool IgnoreCase;
+ bool GroupedShortOptions = false;
+ const char *EnvVar = nullptr;
unsigned TheInputOptionID = 0;
unsigned TheUnknownOptionID = 0;
@@ -79,6 +83,8 @@ private:
return OptionInfos[id - 1];
}
+ Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const;
+
protected:
OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
@@ -120,6 +126,12 @@ public:
return getInfo(id).MetaVar;
}
+ /// Specify the environment variable where initial options should be read.
+ void setInitialOptionsFromEnvironment(const char *E) { EnvVar = E; }
+
+ /// Support grouped short options. e.g. -ab represents -a -b.
+ void setGroupedShortOptions(bool Value) { GroupedShortOptions = Value; }
+
/// Find possible value for given flags. This is used for shell
/// autocompletion.
///
@@ -140,7 +152,7 @@ public:
///
/// \return The vector of flags which start with Cur.
std::vector<std::string> findByPrefix(StringRef Cur,
- unsigned short DisableFlags) const;
+ unsigned int DisableFlags) const;
/// Find the OptTable option that most closely matches the given string.
///
@@ -213,6 +225,18 @@ public:
unsigned &MissingArgCount, unsigned FlagsToInclude = 0,
unsigned FlagsToExclude = 0) const;
+ /// A convenience helper which handles optional initial options populated from
+ /// an environment variable, expands response files recursively and parses
+ /// options.
+ ///
+ /// \param ErrorFn - Called on a formatted error message for missing arguments
+ /// or unknown options.
+ /// \return An InputArgList; on error this will contain all the options which
+ /// could be parsed.
+ InputArgList parseArgs(int Argc, char *const *Argv, OptSpecifier Unknown,
+ StringSaver &Saver,
+ function_ref<void(StringRef)> ErrorFn) const;
+
/// Render the help text for an option table.
///
/// \param OS - The stream to write the help text to.
diff --git a/contrib/llvm-project/llvm/include/llvm/Option/Option.h b/contrib/llvm-project/llvm/include/llvm/Option/Option.h
index 73ee8e0073b8..196cf656355d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Option/Option.h
+++ b/contrib/llvm-project/llvm/include/llvm/Option/Option.h
@@ -213,14 +213,16 @@ public:
/// Index to the position where argument parsing should resume
/// (even if the argument is missing values).
///
- /// \param ArgSize The number of bytes taken up by the matched Option prefix
- /// and name. This is used to determine where joined values
- /// start.
- Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const;
+ /// \p CurArg The argument to be matched. It may be shorter than the
+ /// underlying storage to represent a Joined argument.
+ /// \p GroupedShortOption If true, we are handling the fallback case of
+ /// parsing a prefix of the current argument as a short option.
+ Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption,
+ unsigned &Index) const;
private:
- Arg *acceptInternal(const ArgList &Args, unsigned &Index,
- unsigned ArgSize) const;
+ Arg *acceptInternal(const ArgList &Args, StringRef CurArg,
+ unsigned &Index) const;
public:
void print(raw_ostream &O) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/Pass.h b/contrib/llvm-project/llvm/include/llvm/Pass.h
index 2fe7aee2e37e..8aa9ba90a9ca 100644
--- a/contrib/llvm-project/llvm/include/llvm/Pass.h
+++ b/contrib/llvm-project/llvm/include/llvm/Pass.h
@@ -69,6 +69,20 @@ enum PassKind {
PT_PassManager
};
+/// This enumerates the LLVM full LTO or ThinLTO optimization phases.
+enum class ThinOrFullLTOPhase {
+ /// No LTO/ThinLTO behavior needed.
+ None,
+ /// ThinLTO prelink (summary) phase.
+ ThinLTOPreLink,
+ /// ThinLTO postlink (backend compile) phase.
+ ThinLTOPostLink,
+ /// Full LTO prelink phase.
+ FullLTOPreLink,
+ /// Full LTO postlink (backend compile) phase.
+ FullLTOPostLink
+};
+
//===----------------------------------------------------------------------===//
/// Pass interface - Implemented by all 'passes'. Subclass this if you are an
/// interprocedural optimization or you do not fit into any of the more
@@ -309,6 +323,12 @@ protected:
/// then the value of this boolean will be true, otherwise false.
/// This is the storage for the -time-passes option.
extern bool TimePassesIsEnabled;
+/// If TimePassesPerRun is true, there would be one line of report for
+/// each pass invocation.
+/// If TimePassesPerRun is false, there would be only one line of
+/// report for each pass (even there are more than one pass objects).
+/// (For new pass manager only)
+extern bool TimePassesPerRun;
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/PassAnalysisSupport.h b/contrib/llvm-project/llvm/include/llvm/PassAnalysisSupport.h
index 84df171d38d8..4bed3cb55a90 100644
--- a/contrib/llvm-project/llvm/include/llvm/PassAnalysisSupport.h
+++ b/contrib/llvm-project/llvm/include/llvm/PassAnalysisSupport.h
@@ -17,11 +17,12 @@
#if !defined(LLVM_PASS_H) || defined(LLVM_PASSANALYSISSUPPORT_H)
#error "Do not include <PassAnalysisSupport.h>; include <Pass.h> instead"
-#endif
+#endif
#ifndef LLVM_PASSANALYSISSUPPORT_H
#define LLVM_PASSANALYSISSUPPORT_H
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include <cassert>
#include <tuple>
@@ -58,6 +59,11 @@ private:
SmallVector<AnalysisID, 0> Used;
bool PreservesAll = false;
+ void pushUnique(VectorType &Set, AnalysisID ID) {
+ if (!llvm::is_contained(Set, ID))
+ Set.push_back(ID);
+ }
+
public:
AnalysisUsage() = default;
@@ -80,17 +86,17 @@ public:
///@{
/// Add the specified ID to the set of analyses preserved by this pass.
AnalysisUsage &addPreservedID(const void *ID) {
- Preserved.push_back(ID);
+ pushUnique(Preserved, ID);
return *this;
}
AnalysisUsage &addPreservedID(char &ID) {
- Preserved.push_back(&ID);
+ pushUnique(Preserved, &ID);
return *this;
}
/// Add the specified Pass class to the set of analyses preserved by this pass.
template<class PassClass>
AnalysisUsage &addPreserved() {
- Preserved.push_back(&PassClass::ID);
+ pushUnique(Preserved, &PassClass::ID);
return *this;
}
///@}
@@ -99,17 +105,17 @@ public:
/// Add the specified ID to the set of analyses used by this pass if they are
/// available..
AnalysisUsage &addUsedIfAvailableID(const void *ID) {
- Used.push_back(ID);
+ pushUnique(Used, ID);
return *this;
}
AnalysisUsage &addUsedIfAvailableID(char &ID) {
- Used.push_back(&ID);
+ pushUnique(Used, &ID);
return *this;
}
/// Add the specified Pass class to the set of analyses used by this pass.
template<class PassClass>
AnalysisUsage &addUsedIfAvailable() {
- Used.push_back(&PassClass::ID);
+ pushUnique(Used, &PassClass::ID);
return *this;
}
///@}
@@ -183,7 +189,7 @@ public:
}
/// Return analysis result or null if it doesn't exist.
- Pass *getAnalysisIfAvailable(AnalysisID ID, bool Direction) const;
+ Pass *getAnalysisIfAvailable(AnalysisID ID) const;
private:
/// This keeps track of which passes implements the interfaces that are
@@ -207,7 +213,7 @@ AnalysisType *Pass::getAnalysisIfAvailable() const {
const void *PI = &AnalysisType::ID;
- Pass *ResultPass = Resolver->getAnalysisIfAvailable(PI, true);
+ Pass *ResultPass = Resolver->getAnalysisIfAvailable(PI);
if (!ResultPass) return nullptr;
// Because the AnalysisType may not be a subclass of pass (for
diff --git a/contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h b/contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h
index 0357e4a2fc05..28f9e83bf76a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h
@@ -36,11 +36,15 @@ struct PGOOptions {
enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
- CSPGOAction CSAction = NoCSAction, bool SamplePGOSupport = false)
+ CSPGOAction CSAction = NoCSAction,
+ bool DebugInfoForProfiling = false,
+ bool PseudoProbeForProfiling = false)
: ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
ProfileRemappingFile(ProfileRemappingFile), Action(Action),
- CSAction(CSAction),
- SamplePGOSupport(SamplePGOSupport || Action == SampleUse) {
+ CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
+ (Action == SampleUse &&
+ !PseudoProbeForProfiling)),
+ PseudoProbeForProfiling(PseudoProbeForProfiling) {
// Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
// callback with IRUse action without ProfileFile.
@@ -55,16 +59,26 @@ struct PGOOptions {
// a profile.
assert(this->CSAction != CSIRUse || this->Action == IRUse);
- // If neither Action nor CSAction, SamplePGOSupport needs to be true.
+ // If neither Action nor CSAction, DebugInfoForProfiling or
+ // PseudoProbeForProfiling needs to be true.
assert(this->Action != NoAction || this->CSAction != NoCSAction ||
- this->SamplePGOSupport);
+ this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
+
+ // Pseudo probe emission does not work with -fdebug-info-for-profiling since
+ // they both use the discriminator field of debug lines but for different
+ // purposes.
+ if (this->DebugInfoForProfiling && this->PseudoProbeForProfiling) {
+ report_fatal_error(
+ "Pseudo probes cannot be used with -debug-info-for-profiling", false);
+ }
}
std::string ProfileFile;
std::string CSProfileGenFile;
std::string ProfileRemappingFile;
PGOAction Action;
CSPGOAction CSAction;
- bool SamplePGOSupport;
+ bool DebugInfoForProfiling;
+ bool PseudoProbeForProfiling;
};
/// Tunable parameters for passes in the default pipelines.
@@ -109,6 +123,13 @@ public:
/// Tuning option to enable/disable call graph profile. Its default value is
/// that of the flag: `-enable-npm-call-graph-profile`.
bool CallGraphProfile;
+
+ /// Tuning option to enable/disable function merging. Its default value is
+ /// false.
+ bool MergeFunctions;
+
+ /// Uniquefy function linkage name. Its default value is false.
+ bool UniqueLinkageNames;
};
/// This class provides access to building LLVM's passes.
@@ -118,6 +139,7 @@ public:
/// of the built-in passes, and those may reference these members during
/// construction.
class PassBuilder {
+ bool DebugLogging;
TargetMachine *TM;
PipelineTuningOptions PTO;
Optional<PGOOptions> PGOOpt;
@@ -137,18 +159,6 @@ public:
std::vector<PipelineElement> InnerPipeline;
};
- /// ThinLTO phase.
- ///
- /// This enumerates the LLVM ThinLTO optimization phases.
- enum class ThinLTOPhase {
- /// No ThinLTO behavior needed.
- None,
- /// ThinLTO prelink (summary) phase.
- PreLink,
- /// ThinLTO postlink (backend compile) phase.
- PostLink
- };
-
/// LLVM-provided high-level optimization levels.
///
/// This enumerates the LLVM-provided high-level optimization levels. Each
@@ -259,11 +269,10 @@ public:
unsigned getSizeLevel() const { return SizeLevel; }
};
- explicit PassBuilder(TargetMachine *TM = nullptr,
+ explicit PassBuilder(bool DebugLogging = false, TargetMachine *TM = nullptr,
PipelineTuningOptions PTO = PipelineTuningOptions(),
Optional<PGOOptions> PGOOpt = None,
- PassInstrumentationCallbacks *PIC = nullptr)
- : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {}
+ PassInstrumentationCallbacks *PIC = nullptr);
/// Cross register the analysis managers through their proxies.
///
@@ -321,8 +330,7 @@ public:
/// \p Phase indicates the current ThinLTO phase.
FunctionPassManager
buildFunctionSimplificationPipeline(OptimizationLevel Level,
- ThinLTOPhase Phase,
- bool DebugLogging = false);
+ ThinOrFullLTOPhase Phase);
/// Construct the core LLVM module canonicalization and simplification
/// pipeline.
@@ -339,16 +347,13 @@ public:
/// build them.
///
/// \p Phase indicates the current ThinLTO phase.
- ModulePassManager
- buildModuleSimplificationPipeline(OptimizationLevel Level,
- ThinLTOPhase Phase,
- bool DebugLogging = false);
+ ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase);
/// Construct the module pipeline that performs inlining as well as
/// the inlining-driven cleanups.
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level,
- ThinLTOPhase Phase,
- bool DebugLogging = false);
+ ThinOrFullLTOPhase Phase);
/// Construct the core LLVM module optimization pipeline.
///
@@ -364,7 +369,6 @@ public:
/// require some transformations for semantic reasons, they should explicitly
/// build them.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level,
- bool DebugLogging = false,
bool LTOPreLink = false);
/// Build a per-module default optimization pipeline.
@@ -379,7 +383,6 @@ public:
/// require some transformations for semantic reasons, they should explicitly
/// build them.
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level,
- bool DebugLogging = false,
bool LTOPreLink = false);
/// Build a pre-link, ThinLTO-targeting default optimization pipeline to
@@ -394,9 +397,7 @@ public:
/// only intended for use when attempting to optimize code. If frontends
/// require some transformations for semantic reasons, they should explicitly
/// build them.
- ModulePassManager
- buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
- bool DebugLogging = false);
+ ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level);
/// Build an ThinLTO default optimization pipeline to a pass manager.
///
@@ -410,7 +411,7 @@ public:
/// require some transformations for semantic reasons, they should explicitly
/// build them.
ModulePassManager
- buildThinLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
+ buildThinLTODefaultPipeline(OptimizationLevel Level,
const ModuleSummaryIndex *ImportSummary);
/// Build a pre-link, LTO-targeting default optimization pipeline to a pass
@@ -425,8 +426,7 @@ public:
/// only intended for use when attempting to optimize code. If frontends
/// require some transformations for semantic reasons, they should explicitly
/// build them.
- ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
- bool DebugLogging = false);
+ ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level);
/// Build an LTO default optimization pipeline to a pass manager.
///
@@ -440,11 +440,19 @@ public:
/// require some transformations for semantic reasons, they should explicitly
/// build them.
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level,
- bool DebugLogging,
ModuleSummaryIndex *ExportSummary);
+ /// Build an O0 pipeline with the minimal semantically required passes.
+ ///
+ /// This should only be used for non-LTO and LTO pre-link pipelines.
+ ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level,
+ bool LTOPreLink = false);
+
/// Build the default `AAManager` with the default alias analysis pipeline
/// registered.
+ ///
+ /// This also adds target-specific alias analyses registered via
+ /// TargetMachine::registerDefaultAliasAnalyses().
AAManager buildDefaultAAPipeline();
/// Parse a textual pass pipeline description into a \c
@@ -472,13 +480,22 @@ public:
/// module(function(loop(lpass1,lpass2,lpass3)))
///
/// This shortcut is especially useful for debugging and testing small pass
- /// combinations. Note that these shortcuts don't introduce any other magic.
- /// If the sequence of passes aren't all the exact same kind of pass, it will
- /// be an error. You cannot mix different levels implicitly, you must
- /// explicitly form a pass manager in which to nest passes.
- Error parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText,
- bool VerifyEachPass = true,
- bool DebugLogging = false);
+ /// combinations.
+ ///
+ /// The sequence of passes aren't necessarily the exact same kind of pass.
+ /// You can mix different levels implicitly if adaptor passes are defined to
+ /// make them work. For example,
+ ///
+ /// mpass1,fpass1,fpass2,mpass2,lpass1
+ ///
+ /// This pipeline uses only one pass manager: the top-level module manager.
+ /// fpass1,fpass2 and lpass1 are added into the the top-level module manager
+ /// using only adaptor passes. No nested function/loop pass managers are
+ /// added. The purpose is to allow easy pass testing when the user
+ /// specifically want the pass to run under a adaptor directly. This is
+ /// preferred when a pipeline is largely of one type, but one or just a few
+ /// passes are of different types(See PassBuilder.cpp for examples).
+ Error parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText);
/// {{@ Parse a textual pass pipeline description into a specific PassManager
///
@@ -487,15 +504,9 @@ public:
/// this is the valid pipeline text:
///
/// function(lpass)
- Error parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText,
- bool VerifyEachPass = true,
- bool DebugLogging = false);
- Error parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText,
- bool VerifyEachPass = true,
- bool DebugLogging = false);
- Error parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText,
- bool VerifyEachPass = true,
- bool DebugLogging = false);
+ Error parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText);
+ Error parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText);
+ Error parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText);
/// @}}
/// Parse a textual alias analysis pipeline into the provided AA manager.
@@ -594,17 +605,23 @@ public:
/// pipeline. This does not apply to 'backend' compiles (LTO and ThinLTO
/// link-time pipelines).
void registerPipelineStartEPCallback(
- const std::function<void(ModulePassManager &)> &C) {
+ const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
PipelineStartEPCallbacks.push_back(C);
}
+ /// Register a callback for a default optimizer pipeline extension point.
+ ///
+ /// This extension point allows adding optimization right after passes that do
+ /// basic simplification of the input IR.
+ void registerPipelineEarlySimplificationEPCallback(
+ const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+ PipelineEarlySimplificationEPCallbacks.push_back(C);
+ }
+
/// Register a callback for a default optimizer pipeline extension point
///
/// This extension point allows adding optimizations at the very end of the
- /// function optimization pipeline. A key difference between this and the
- /// legacy PassManager's OptimizerLast callback is that this extension point
- /// is not triggered at O0. Extensions to the O0 pipeline should append their
- /// passes to the end of the overall pipeline.
+ /// function optimization pipeline.
void registerOptimizerLastEPCallback(
const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
OptimizerLastEPCallbacks.push_back(C);
@@ -671,17 +688,13 @@ public:
/// PassManagers and populate the passed ModulePassManager.
void registerParseTopLevelPipelineCallback(
const std::function<bool(ModulePassManager &, ArrayRef<PipelineElement>,
- bool VerifyEachPass, bool DebugLogging)> &C) {
- TopLevelPipelineParsingCallbacks.push_back(C);
- }
+ bool DebugLogging)> &C);
/// Add PGOInstrumenation passes for O0 only.
- void addPGOInstrPassesForO0(ModulePassManager &MPM, bool DebugLogging,
- bool RunProfileGen, bool IsCS,
- std::string ProfileFile,
+ void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen,
+ bool IsCS, std::string ProfileFile,
std::string ProfileRemappingFile);
-
/// Returns PIC. External libraries can use this to register pass
/// instrumentation callbacks.
PassInstrumentationCallbacks *getPassInstrumentationCallbacks() const {
@@ -690,38 +703,32 @@ public:
private:
// O1 pass pipeline
- FunctionPassManager buildO1FunctionSimplificationPipeline(
- OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging = false);
+ FunctionPassManager
+ buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase);
+
+ void addRequiredLTOPreLinkPasses(ModulePassManager &MPM);
static Optional<std::vector<PipelineElement>>
parsePipelineText(StringRef Text);
- Error parseModulePass(ModulePassManager &MPM, const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging);
- Error parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging);
- Error parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging);
- Error parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging);
+ Error parseModulePass(ModulePassManager &MPM, const PipelineElement &E);
+ Error parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E);
+ Error parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E);
+ Error parseLoopPass(LoopPassManager &LPM, const PipelineElement &E);
bool parseAAPassName(AAManager &AA, StringRef Name);
Error parseLoopPassPipeline(LoopPassManager &LPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass, bool DebugLogging);
+ ArrayRef<PipelineElement> Pipeline);
Error parseFunctionPassPipeline(FunctionPassManager &FPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass, bool DebugLogging);
+ ArrayRef<PipelineElement> Pipeline);
Error parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass, bool DebugLogging);
+ ArrayRef<PipelineElement> Pipeline);
Error parseModulePassPipeline(ModulePassManager &MPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass, bool DebugLogging);
+ ArrayRef<PipelineElement> Pipeline);
- void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
- OptimizationLevel Level, bool RunProfileGen, bool IsCS,
- std::string ProfileFile,
+ void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level,
+ bool RunProfileGen, bool IsCS, std::string ProfileFile,
std::string ProfileRemappingFile);
void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel);
@@ -741,8 +748,11 @@ private:
SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
OptimizerLastEPCallbacks;
// Module callbacks
- SmallVector<std::function<void(ModulePassManager &)>, 2>
+ SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
PipelineStartEPCallbacks;
+ SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+ PipelineEarlySimplificationEPCallbacks;
+
SmallVector<std::function<void(ModuleAnalysisManager &)>, 2>
ModuleAnalysisRegistrationCallbacks;
SmallVector<std::function<bool(StringRef, ModulePassManager &,
@@ -750,7 +760,7 @@ private:
2>
ModulePipelineParsingCallbacks;
SmallVector<std::function<bool(ModulePassManager &, ArrayRef<PipelineElement>,
- bool VerifyEachPass, bool DebugLogging)>,
+ bool DebugLogging)>,
2>
TopLevelPipelineParsingCallbacks;
// CGSCC callbacks
diff --git a/contrib/llvm-project/llvm/include/llvm/Passes/StandardInstrumentations.h b/contrib/llvm-project/llvm/include/llvm/Passes/StandardInstrumentations.h
index 3d3002eecce9..61c86b0468f2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/contrib/llvm-project/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -16,8 +16,13 @@
#define LLVM_PASSES_STANDARDINSTRUMENTATIONS_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include <string>
#include <utility>
@@ -25,6 +30,8 @@
namespace llvm {
class Module;
+class Function;
+class PassInstrumentationCallbacks;
/// Instrumentation to print IR before/after passes.
///
@@ -32,40 +39,257 @@ class Module;
/// (typically Loop or SCC).
class PrintIRInstrumentation {
public:
- PrintIRInstrumentation() = default;
~PrintIRInstrumentation();
void registerCallbacks(PassInstrumentationCallbacks &PIC);
private:
- bool printBeforePass(StringRef PassID, Any IR);
+ void printBeforePass(StringRef PassID, Any IR);
void printAfterPass(StringRef PassID, Any IR);
void printAfterPassInvalidated(StringRef PassID);
+ bool shouldPrintBeforePass(StringRef PassID);
+ bool shouldPrintAfterPass(StringRef PassID);
+
using PrintModuleDesc = std::tuple<const Module *, std::string, StringRef>;
void pushModuleDesc(StringRef PassID, Any IR);
PrintModuleDesc popModuleDesc(StringRef PassID);
+ PassInstrumentationCallbacks *PIC;
/// Stack of Module description, enough to print the module after a given
/// pass.
SmallVector<PrintModuleDesc, 2> ModuleDescStack;
bool StoreModuleDesc = false;
};
+class OptNoneInstrumentation {
+public:
+ OptNoneInstrumentation(bool DebugLogging) : DebugLogging(DebugLogging) {}
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+private:
+ bool DebugLogging;
+ bool shouldRun(StringRef PassID, Any IR);
+};
+
+class OptBisectInstrumentation {
+public:
+ OptBisectInstrumentation() {}
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+};
+
+// Debug logging for transformation and analysis passes.
+class PrintPassInstrumentation {
+public:
+ PrintPassInstrumentation(bool DebugLogging) : DebugLogging(DebugLogging) {}
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+private:
+ bool DebugLogging;
+};
+
+class PreservedCFGCheckerInstrumentation {
+private:
+ // CFG is a map BB -> {(Succ, Multiplicity)}, where BB is a non-leaf basic
+ // block, {(Succ, Multiplicity)} set of all pairs of the block's successors
+ // and the multiplicity of the edge (BB->Succ). As the mapped sets are
+ // unordered the order of successors is not tracked by the CFG. In other words
+ // this allows basic block successors to be swapped by a pass without
+ // reporting a CFG change. CFG can be guarded by basic block tracking pointers
+ // in the Graph (BBGuard). That is if any of the block is deleted or RAUWed
+ // then the CFG is treated poisoned and no block pointer of the Graph is used.
+ struct CFG {
+ struct BBGuard final : public CallbackVH {
+ BBGuard(const BasicBlock *BB) : CallbackVH(BB) {}
+ void deleted() override { CallbackVH::deleted(); }
+ void allUsesReplacedWith(Value *) override { CallbackVH::deleted(); }
+ bool isPoisoned() const { return !getValPtr(); }
+ };
+
+ Optional<DenseMap<intptr_t, BBGuard>> BBGuards;
+ DenseMap<const BasicBlock *, DenseMap<const BasicBlock *, unsigned>> Graph;
+
+ CFG(const Function *F, bool TrackBBLifetime = false);
+
+ bool operator==(const CFG &G) const {
+ return !isPoisoned() && !G.isPoisoned() && Graph == G.Graph;
+ }
+
+ bool isPoisoned() const {
+ if (BBGuards)
+ for (auto &BB : *BBGuards) {
+ if (BB.second.isPoisoned())
+ return true;
+ }
+ return false;
+ }
+
+ static void printDiff(raw_ostream &out, const CFG &Before,
+ const CFG &After);
+ };
+
+ SmallVector<std::pair<StringRef, Optional<CFG>>, 8> GraphStackBefore;
+
+public:
+ static cl::opt<bool> VerifyPreservedCFG;
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+};
+
+// Base class for classes that report changes to the IR.
+// It presents an interface for such classes and provides calls
+// on various events as the new pass manager transforms the IR.
+// It also provides filtering of information based on hidden options
+// specifying which functions are interesting.
+// Calls are made for the following events/queries:
+// 1. The initial IR processed.
+// 2. To get the representation of the IR (of type \p T).
+// 3. When a pass does not change the IR.
+// 4. When a pass changes the IR (given both before and after representations
+// of type \p T).
+// 5. When an IR is invalidated.
+// 6. When a pass is run on an IR that is not interesting (based on options).
+// 7. When a pass is ignored (pass manager or adapter pass).
+// 8. To compare two IR representations (of type \p T).
+template <typename IRUnitT> class ChangeReporter {
+protected:
+ ChangeReporter(bool RunInVerboseMode) : VerboseMode(RunInVerboseMode) {}
+
+public:
+ virtual ~ChangeReporter();
+
+ // Determine if this pass/IR is interesting and if so, save the IR
+ // otherwise it is left on the stack without data.
+ void saveIRBeforePass(Any IR, StringRef PassID);
+ // Compare the IR from before the pass after the pass.
+ void handleIRAfterPass(Any IR, StringRef PassID);
+ // Handle the situation where a pass is invalidated.
+ void handleInvalidatedPass(StringRef PassID);
+
+protected:
+ // Register required callbacks.
+ void registerRequiredCallbacks(PassInstrumentationCallbacks &PIC);
+
+ // Return true when this is a defined function for which printing
+ // of changes is desired.
+ bool isInterestingFunction(const Function &F);
+
+ // Return true when this is a pass for which printing of changes is desired.
+ bool isInterestingPass(StringRef PassID);
+
+ // Return true when this is a pass on IR for which printing
+ // of changes is desired.
+ bool isInteresting(Any IR, StringRef PassID);
+
+ // Called on the first IR processed.
+ virtual void handleInitialIR(Any IR) = 0;
+ // Called before and after a pass to get the representation of the IR.
+ virtual void generateIRRepresentation(Any IR, StringRef PassID,
+ IRUnitT &Output) = 0;
+ // Called when the pass is not iteresting.
+ virtual void omitAfter(StringRef PassID, std::string &Name) = 0;
+ // Called when an interesting IR has changed.
+ virtual void handleAfter(StringRef PassID, std::string &Name,
+ const IRUnitT &Before, const IRUnitT &After,
+ Any) = 0;
+ // Called when an interesting pass is invalidated.
+ virtual void handleInvalidated(StringRef PassID) = 0;
+ // Called when the IR or pass is not interesting.
+ virtual void handleFiltered(StringRef PassID, std::string &Name) = 0;
+ // Called when an ignored pass is encountered.
+ virtual void handleIgnored(StringRef PassID, std::string &Name) = 0;
+ // Called to compare the before and after representations of the IR.
+ virtual bool same(const IRUnitT &Before, const IRUnitT &After) = 0;
+
+ // Stack of IRs before passes.
+ std::vector<IRUnitT> BeforeStack;
+ // Is this the first IR seen?
+ bool InitialIR = true;
+
+ // Run in verbose mode, printing everything?
+ const bool VerboseMode;
+};
+
+// An abstract template base class that handles printing banners and
+// reporting when things have not changed or are filtered out.
+template <typename IRUnitT>
+class TextChangeReporter : public ChangeReporter<IRUnitT> {
+protected:
+ TextChangeReporter(bool Verbose);
+
+ // Print a module dump of the first IR that is changed.
+ void handleInitialIR(Any IR) override;
+ // Report that the IR was omitted because it did not change.
+ void omitAfter(StringRef PassID, std::string &Name) override;
+ // Report that the pass was invalidated.
+ void handleInvalidated(StringRef PassID) override;
+ // Report that the IR was filtered out.
+ void handleFiltered(StringRef PassID, std::string &Name) override;
+ // Report that the pass was ignored.
+ void handleIgnored(StringRef PassID, std::string &Name) override;
+ // Make substitutions in \p S suitable for reporting changes
+ // after the pass and then print it.
+
+ raw_ostream &Out;
+};
+
+// A change printer based on the string representation of the IR as created
+// by unwrapAndPrint. The string representation is stored in a std::string
+// to preserve it as the IR changes in each pass. Note that the banner is
+// included in this representation but it is massaged before reporting.
+class IRChangedPrinter : public TextChangeReporter<std::string> {
+public:
+ IRChangedPrinter(bool VerboseMode)
+ : TextChangeReporter<std::string>(VerboseMode) {}
+ ~IRChangedPrinter() override;
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+protected:
+ // Called before and after a pass to get the representation of the IR.
+ void generateIRRepresentation(Any IR, StringRef PassID,
+ std::string &Output) override;
+ // Called when an interesting IR has changed.
+ void handleAfter(StringRef PassID, std::string &Name,
+ const std::string &Before, const std::string &After,
+ Any) override;
+ // Called to compare the before and after representations of the IR.
+ bool same(const std::string &Before, const std::string &After) override;
+};
+
+class VerifyInstrumentation {
+ bool DebugLogging;
+
+public:
+ VerifyInstrumentation(bool DebugLogging) : DebugLogging(DebugLogging) {}
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+};
+
/// This class provides an interface to register all the standard pass
/// instrumentations and manages their state (if any).
class StandardInstrumentations {
PrintIRInstrumentation PrintIR;
+ PrintPassInstrumentation PrintPass;
TimePassesHandler TimePasses;
+ OptNoneInstrumentation OptNone;
+ OptBisectInstrumentation OptBisect;
+ PreservedCFGCheckerInstrumentation PreservedCFGChecker;
+ IRChangedPrinter PrintChangedIR;
+ PseudoProbeVerifier PseudoProbeVerification;
+ VerifyInstrumentation Verify;
+
+ bool VerifyEach;
public:
- StandardInstrumentations() = default;
+ StandardInstrumentations(bool DebugLogging, bool VerifyEach = false);
void registerCallbacks(PassInstrumentationCallbacks &PIC);
TimePassesHandler &getTimePasses() { return TimePasses; }
};
+
+extern template class ChangeReporter<std::string>;
+extern template class TextChangeReporter<std::string>;
+
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index bf0dffc9653c..09f21677ec54 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -55,7 +55,8 @@ enum class coveragemap_error {
unsupported_version,
truncated,
malformed,
- decompression_failed
+ decompression_failed,
+ invalid_or_missing_arch_specifier
};
const std::error_category &coveragemap_category();
@@ -89,6 +90,8 @@ private:
/// A Counter is an abstract value that describes how to compute the
/// execution count for a region of code using the collected profile count data.
struct Counter {
+ /// The CounterExpression kind (Add or Subtract) is encoded in bit 0 next to
+ /// the CounterKind. This means CounterKind has to leave bit 0 free.
enum CounterKind { Zero, CounterValueReference, Expression };
static const unsigned EncodingTagBits = 2;
static const unsigned EncodingTagMask = 0x3;
@@ -218,10 +221,20 @@ struct CounterMappingRegion {
/// A GapRegion is like a CodeRegion, but its count is only set as the
/// line execution count when its the only region in the line.
- GapRegion
+ GapRegion,
+
+ /// A BranchRegion represents leaf-level boolean expressions and is
+ /// associated with two counters, each representing the number of times the
+ /// expression evaluates to true or false.
+ BranchRegion
};
+ /// Primary Counter that is also used for Branch Regions (TrueCount).
Counter Count;
+
+ /// Secondary Counter used for Branch Regions (FalseCount).
+ Counter FalseCount;
+
unsigned FileID, ExpandedFileID;
unsigned LineStart, ColumnStart, LineEnd, ColumnEnd;
RegionKind Kind;
@@ -233,6 +246,15 @@ struct CounterMappingRegion {
LineStart(LineStart), ColumnStart(ColumnStart), LineEnd(LineEnd),
ColumnEnd(ColumnEnd), Kind(Kind) {}
+ CounterMappingRegion(Counter Count, Counter FalseCount, unsigned FileID,
+ unsigned ExpandedFileID, unsigned LineStart,
+ unsigned ColumnStart, unsigned LineEnd,
+ unsigned ColumnEnd, RegionKind Kind)
+ : Count(Count), FalseCount(FalseCount), FileID(FileID),
+ ExpandedFileID(ExpandedFileID), LineStart(LineStart),
+ ColumnStart(ColumnStart), LineEnd(LineEnd), ColumnEnd(ColumnEnd),
+ Kind(Kind) {}
+
static CounterMappingRegion
makeRegion(Counter Count, unsigned FileID, unsigned LineStart,
unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd) {
@@ -262,6 +284,14 @@ struct CounterMappingRegion {
LineEnd, (1U << 31) | ColumnEnd, GapRegion);
}
+ static CounterMappingRegion
+ makeBranchRegion(Counter Count, Counter FalseCount, unsigned FileID,
+ unsigned LineStart, unsigned ColumnStart, unsigned LineEnd,
+ unsigned ColumnEnd) {
+ return CounterMappingRegion(Count, FalseCount, FileID, 0, LineStart,
+ ColumnStart, LineEnd, ColumnEnd, BranchRegion);
+ }
+
inline LineColPair startLoc() const {
return LineColPair(LineStart, ColumnStart);
}
@@ -272,9 +302,17 @@ struct CounterMappingRegion {
/// Associates a source range with an execution count.
struct CountedRegion : public CounterMappingRegion {
uint64_t ExecutionCount;
+ uint64_t FalseExecutionCount;
+ bool Folded;
CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount)
- : CounterMappingRegion(R), ExecutionCount(ExecutionCount) {}
+ : CounterMappingRegion(R), ExecutionCount(ExecutionCount),
+ FalseExecutionCount(0), Folded(false) {}
+
+ CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount,
+ uint64_t FalseExecutionCount)
+ : CounterMappingRegion(R), ExecutionCount(ExecutionCount),
+ FalseExecutionCount(FalseExecutionCount), Folded(false) {}
};
/// A Counter mapping context is used to connect the counters, expressions
@@ -311,6 +349,8 @@ struct FunctionRecord {
std::vector<std::string> Filenames;
/// Regions in the function along with their counts.
std::vector<CountedRegion> CountedRegions;
+ /// Branch Regions in the function along with their counts.
+ std::vector<CountedRegion> CountedBranchRegions;
/// The number of times this function was executed.
uint64_t ExecutionCount = 0;
@@ -320,10 +360,19 @@ struct FunctionRecord {
FunctionRecord(FunctionRecord &&FR) = default;
FunctionRecord &operator=(FunctionRecord &&) = default;
- void pushRegion(CounterMappingRegion Region, uint64_t Count) {
+ void pushRegion(CounterMappingRegion Region, uint64_t Count,
+ uint64_t FalseCount) {
+ if (Region.Kind == CounterMappingRegion::BranchRegion) {
+ CountedBranchRegions.emplace_back(Region, Count, FalseCount);
+ // If both counters are hard-coded to zero, then this region represents a
+ // constant-folded branch.
+ if (Region.Count.isZero() && Region.FalseCount.isZero())
+ CountedBranchRegions.back().Folded = true;
+ return;
+ }
if (CountedRegions.empty())
ExecutionCount = Count;
- CountedRegions.emplace_back(Region, Count);
+ CountedRegions.emplace_back(Region, Count, FalseCount);
}
};
@@ -402,7 +451,8 @@ struct CoverageSegment {
IsRegionEntry(IsRegionEntry), IsGapRegion(false) {}
CoverageSegment(unsigned Line, unsigned Col, uint64_t Count,
- bool IsRegionEntry, bool IsGapRegion = false)
+ bool IsRegionEntry, bool IsGapRegion = false,
+ bool IsBranchRegion = false)
: Line(Line), Col(Col), Count(Count), HasCount(true),
IsRegionEntry(IsRegionEntry), IsGapRegion(IsGapRegion) {}
@@ -482,6 +532,7 @@ class CoverageData {
std::string Filename;
std::vector<CoverageSegment> Segments;
std::vector<ExpansionRecord> Expansions;
+ std::vector<CountedRegion> BranchRegions;
public:
CoverageData() = default;
@@ -505,6 +556,9 @@ public:
/// Expansions that can be further processed.
ArrayRef<ExpansionRecord> getExpansions() const { return Expansions; }
+
+ /// Branches that can be further processed.
+ ArrayRef<CountedRegion> getBranches() const { return BranchRegions; }
};
/// The mapping of profile information to coverage data.
@@ -940,7 +994,9 @@ enum CovMapVersion {
Version3 = 2,
// Function records are named, uniqued, and moved to a dedicated section.
Version4 = 3,
- // The current version is Version4.
+ // Branch regions referring to two counters are added
+ Version5 = 4,
+ // The current version is Version5.
CurrentVersion = INSTR_PROF_COVMAP_VERSION
};
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
index 97f4c32eb035..3a611bcb8cd1 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
@@ -67,10 +67,10 @@ public:
increment();
return *this;
}
- bool operator==(const CoverageMappingIterator &RHS) {
+ bool operator==(const CoverageMappingIterator &RHS) const {
return Reader == RHS.Reader;
}
- bool operator!=(const CoverageMappingIterator &RHS) {
+ bool operator!=(const CoverageMappingIterator &RHS) const {
return Reader != RHS.Reader;
}
Expected<CoverageMappingRecord &> operator*() {
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/GCOV.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/GCOV.h
index 7b9ba4410b65..d4f0b9120577 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/GCOV.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/GCOV.h
@@ -15,6 +15,7 @@
#define LLVM_PROFILEDATA_GCOV_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
@@ -38,7 +39,6 @@ namespace llvm {
class GCOVFunction;
class GCOVBlock;
-class FileInfo;
namespace GCOV {
@@ -47,10 +47,11 @@ enum GCOVVersion { V304, V407, V408, V800, V900 };
/// A struct for passing gcov options between functions.
struct Options {
Options(bool A, bool B, bool C, bool F, bool P, bool U, bool I, bool L,
- bool N, bool T, bool X)
+ bool M, bool N, bool R, bool T, bool X, std::string SourcePrefix)
: AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F),
PreservePaths(P), UncondBranch(U), Intermediate(I), LongFileNames(L),
- NoOutput(N), UseStdout(T), HashFilenames(X) {}
+ Demangle(M), NoOutput(N), RelativeOnly(R), UseStdout(T),
+ HashFilenames(X), SourcePrefix(std::move(SourcePrefix)) {}
bool AllBlocks;
bool BranchInfo;
@@ -60,9 +61,12 @@ struct Options {
bool UncondBranch;
bool Intermediate;
bool LongFileNames;
+ bool Demangle;
bool NoOutput;
+ bool RelativeOnly;
bool UseStdout;
bool HashFilenames;
+ std::string SourcePrefix;
};
} // end namespace GCOV
@@ -187,39 +191,38 @@ public:
bool readGCNO(GCOVBuffer &Buffer);
bool readGCDA(GCOVBuffer &Buffer);
GCOV::GCOVVersion getVersion() const { return Version; }
- uint32_t getChecksum() const { return Checksum; }
void print(raw_ostream &OS) const;
void dump() const;
- void collectLineCounts(FileInfo &FI);
std::vector<std::string> filenames;
StringMap<unsigned> filenameToIdx;
-private:
+public:
bool GCNOInitialized = false;
GCOV::GCOVVersion Version;
uint32_t Checksum = 0;
StringRef cwd;
- SmallVector<std::unique_ptr<GCOVFunction>, 16> Functions;
+ SmallVector<std::unique_ptr<GCOVFunction>, 16> functions;
std::map<uint32_t, GCOVFunction *> IdentToFunction;
uint32_t RunCount = 0;
uint32_t ProgramCount = 0;
using iterator = pointee_iterator<
SmallVectorImpl<std::unique_ptr<GCOVFunction>>::const_iterator>;
- iterator begin() const { return iterator(Functions.begin()); }
- iterator end() const { return iterator(Functions.end()); }
+ iterator begin() const { return iterator(functions.begin()); }
+ iterator end() const { return iterator(functions.end()); }
};
struct GCOVArc {
- GCOVArc(GCOVBlock &src, GCOVBlock &dst, bool fallthrough)
- : src(src), dst(dst), fallthrough(fallthrough) {}
+ GCOVArc(GCOVBlock &src, GCOVBlock &dst, uint32_t flags)
+ : src(src), dst(dst), flags(flags) {}
+ bool onTree() const;
GCOVBlock &src;
GCOVBlock &dst;
- bool fallthrough;
- uint64_t Count = 0;
- uint64_t CyclesCount = 0;
+ uint32_t flags;
+ uint64_t count = 0;
+ uint64_t cycleCount = 0;
};
/// GCOVFunction - Collects function information.
@@ -230,21 +233,18 @@ public:
GCOVFunction(GCOVFile &file) : file(file) {}
- StringRef getName() const { return Name; }
+ StringRef getName(bool demangle) const;
StringRef getFilename() const;
- size_t getNumBlocks() const { return Blocks.size(); }
uint64_t getEntryCount() const;
- uint64_t getExitCount() const;
+ GCOVBlock &getExitBlock() const;
- BlockIterator block_begin() const { return Blocks.begin(); }
- BlockIterator block_end() const { return Blocks.end(); }
- iterator_range<BlockIterator> blocks() const {
- return make_range(block_begin(), block_end());
+ iterator_range<BlockIterator> blocksRange() const {
+ return make_range(blocks.begin(), blocks.end());
}
+ uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *pred);
void print(raw_ostream &OS) const;
void dump() const;
- void collectLineCounts(FileInfo &FI);
GCOVFile &file;
uint32_t ident = 0;
@@ -256,40 +256,31 @@ public:
uint32_t endColumn = 0;
uint8_t artificial = 0;
StringRef Name;
+ mutable SmallString<0> demangled;
unsigned srcIdx;
- SmallVector<std::unique_ptr<GCOVBlock>, 0> Blocks;
+ SmallVector<std::unique_ptr<GCOVBlock>, 0> blocks;
SmallVector<std::unique_ptr<GCOVArc>, 0> arcs, treeArcs;
+ DenseSet<const GCOVBlock *> visited;
};
/// GCOVBlock - Collects block information.
class GCOVBlock {
- struct EdgeWeight {
- EdgeWeight(GCOVBlock *D) : Dst(D) {}
-
- GCOVBlock *Dst;
- uint64_t Count = 0;
- };
-
public:
using EdgeIterator = SmallVectorImpl<GCOVArc *>::const_iterator;
- using BlockVector = SmallVector<const GCOVBlock *, 4>;
+ using BlockVector = SmallVector<const GCOVBlock *, 1>;
using BlockVectorLists = SmallVector<BlockVector, 4>;
using Edges = SmallVector<GCOVArc *, 4>;
- GCOVBlock(GCOVFunction &P, uint32_t N) : Parent(P), Number(N) {}
+ GCOVBlock(uint32_t N) : number(N) {}
- const GCOVFunction &getParent() const { return Parent; }
- void addLine(uint32_t N) { Lines.push_back(N); }
- uint32_t getLastLine() const { return Lines.back(); }
- uint64_t getCount() const { return Counter; }
+ void addLine(uint32_t N) { lines.push_back(N); }
+ uint32_t getLastLine() const { return lines.back(); }
+ uint64_t getCount() const { return count; }
void addSrcEdge(GCOVArc *Edge) { pred.push_back(Edge); }
void addDstEdge(GCOVArc *Edge) { succ.push_back(Edge); }
- size_t getNumSrcEdges() const { return pred.size(); }
- size_t getNumDstEdges() const { return succ.size(); }
-
iterator_range<EdgeIterator> srcs() const {
return make_range(pred.begin(), pred.end());
}
@@ -300,116 +291,25 @@ public:
void print(raw_ostream &OS) const;
void dump() const;
- void collectLineCounts(FileInfo &FI);
-
- static uint64_t getCycleCount(const Edges &Path);
- static void unblock(const GCOVBlock *U, BlockVector &Blocked,
- BlockVectorLists &BlockLists);
- static bool lookForCircuit(const GCOVBlock *V, const GCOVBlock *Start,
- Edges &Path, BlockVector &Blocked,
- BlockVectorLists &BlockLists,
- const BlockVector &Blocks, uint64_t &Count);
- static void getCyclesCount(const BlockVector &Blocks, uint64_t &Count);
+
+ static uint64_t
+ augmentOneCycle(GCOVBlock *src,
+ std::vector<std::pair<GCOVBlock *, size_t>> &stack);
+ static uint64_t getCyclesCount(const BlockVector &blocks);
static uint64_t getLineCount(const BlockVector &Blocks);
public:
- GCOVFunction &Parent;
- uint32_t Number;
- uint64_t Counter = 0;
+ uint32_t number;
+ uint64_t count = 0;
SmallVector<GCOVArc *, 2> pred;
SmallVector<GCOVArc *, 2> succ;
- SmallVector<uint32_t, 16> Lines;
-};
-
-struct GCOVCoverage {
- GCOVCoverage() = default;
- GCOVCoverage(StringRef Name) : Name(Name) {}
-
- StringRef Name;
-
- uint32_t LogicalLines = 0;
- uint32_t LinesExec = 0;
-
- uint32_t Branches = 0;
- uint32_t BranchesExec = 0;
- uint32_t BranchesTaken = 0;
-};
-
-struct SourceInfo {
- StringRef filename;
- std::string name;
- std::vector<GCOVFunction *> functions;
- GCOVCoverage coverage;
- SourceInfo(StringRef filename) : filename(filename) {}
+ SmallVector<uint32_t, 4> lines;
+ bool traversable = false;
+ GCOVArc *incoming = nullptr;
};
-class FileInfo {
-protected:
- // It is unlikely--but possible--for multiple functions to be on the same
- // line.
- // Therefore this typedef allows LineData.Functions to store multiple
- // functions
- // per instance. This is rare, however, so optimize for the common case.
- using FunctionVector = SmallVector<const GCOVFunction *, 1>;
- using FunctionLines = DenseMap<uint32_t, FunctionVector>;
- using BlockVector = SmallVector<const GCOVBlock *, 4>;
- using BlockLines = DenseMap<uint32_t, BlockVector>;
-
- struct LineData {
- LineData() = default;
-
- BlockLines Blocks;
- FunctionLines Functions;
- uint32_t LastLine = 0;
- };
-
-public:
- friend class GCOVFile;
- FileInfo(const GCOV::Options &Options) : Options(Options) {}
-
- void addBlockLine(StringRef Filename, uint32_t Line, const GCOVBlock *Block) {
- if (Line > LineInfo[Filename].LastLine)
- LineInfo[Filename].LastLine = Line;
- LineInfo[Filename].Blocks[Line - 1].push_back(Block);
- }
-
- void addFunctionLine(StringRef Filename, uint32_t Line,
- const GCOVFunction *Function) {
- if (Line > LineInfo[Filename].LastLine)
- LineInfo[Filename].LastLine = Line;
- LineInfo[Filename].Functions[Line - 1].push_back(Function);
- }
-
- void setRunCount(uint32_t Runs) { RunCount = Runs; }
- void setProgramCount(uint32_t Programs) { ProgramCount = Programs; }
- void print(raw_ostream &OS, StringRef MainFilename, StringRef GCNOFile,
- StringRef GCDAFile, GCOVFile &file);
-
-protected:
- std::string getCoveragePath(StringRef Filename, StringRef MainFilename);
- std::unique_ptr<raw_ostream> openCoveragePath(StringRef CoveragePath);
- void printFunctionSummary(raw_ostream &OS, const FunctionVector &Funcs) const;
- void printBlockInfo(raw_ostream &OS, const GCOVBlock &Block,
- uint32_t LineIndex, uint32_t &BlockNo) const;
- void printBranchInfo(raw_ostream &OS, const GCOVBlock &Block,
- GCOVCoverage &Coverage, uint32_t &EdgeNo);
- void printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo,
- uint64_t Count) const;
-
- void printCoverage(raw_ostream &OS, const GCOVCoverage &Coverage) const;
- void printFuncCoverage(raw_ostream &OS) const;
- void printFileCoverage(raw_ostream &OS) const;
-
- const GCOV::Options &Options;
- StringMap<LineData> LineInfo;
- uint32_t RunCount = 0;
- uint32_t ProgramCount = 0;
-
- using FuncCoverageMap = MapVector<const GCOVFunction *, GCOVCoverage>;
-
- FuncCoverageMap FuncCoverages;
- std::vector<SourceInfo> sources;
-};
+void gcovOneInput(const GCOV::Options &options, StringRef filename,
+ StringRef gcno, StringRef gcda, GCOVFile &file);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h
index 62a0c6955708..9c16c353843d 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h
@@ -74,9 +74,10 @@ inline StringRef getInstrProfValueProfFuncName() {
return INSTR_PROF_VALUE_PROF_FUNC_STR;
}
-/// Return the name profile runtime entry point to do value range profiling.
-inline StringRef getInstrProfValueRangeProfFuncName() {
- return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR;
+/// Return the name profile runtime entry point to do memop size value
+/// profiling.
+inline StringRef getInstrProfValueProfMemOpFuncName() {
+ return INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR;
}
/// Return the name prefix of variables containing instrumented function names.
@@ -561,10 +562,9 @@ StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) {
StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
finalizeSymtab();
- auto Result =
- std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash,
- [](const std::pair<uint64_t, StringRef> &LHS,
- uint64_t RHS) { return LHS.first < RHS; });
+ auto Result = llvm::lower_bound(MD5NameMap, FuncMD5Hash,
+ [](const std::pair<uint64_t, StringRef> &LHS,
+ uint64_t RHS) { return LHS.first < RHS; });
if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash)
return Result->second;
return StringRef();
@@ -572,10 +572,9 @@ StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
finalizeSymtab();
- auto Result =
- std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash,
- [](const std::pair<uint64_t, Function*> &LHS,
- uint64_t RHS) { return LHS.first < RHS; });
+ auto Result = llvm::lower_bound(MD5FuncMap, FuncMD5Hash,
+ [](const std::pair<uint64_t, Function *> &LHS,
+ uint64_t RHS) { return LHS.first < RHS; });
if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash)
return Result->second;
return nullptr;
@@ -678,8 +677,8 @@ struct InstrProfValueSiteRecord {
/// Optionally scale merged counts by \p Weight.
void merge(InstrProfValueSiteRecord &Input, uint64_t Weight,
function_ref<void(instrprof_error)> Warn);
- /// Scale up value profile data counts.
- void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
+ /// Scale up value profile data counts by N (Numerator) / D (Denominator).
+ void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
/// Compute the overlap b/w this record and Input record.
void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind,
@@ -753,8 +752,8 @@ struct InstrProfRecord {
function_ref<void(instrprof_error)> Warn);
/// Scale up profile counts (including value profile data) by
- /// \p Weight.
- void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
+ /// a factor of (N / D).
+ void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
/// Sort value profile data (per site) by count.
void sortValueData() {
@@ -839,8 +838,8 @@ private:
uint64_t Weight,
function_ref<void(instrprof_error)> Warn);
- // Scale up value profile data count.
- void scaleValueProfData(uint32_t ValueKind, uint64_t Weight,
+ // Scale up value profile data count by N (Numerator) / D (Denominator).
+ void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn);
};
@@ -982,7 +981,9 @@ enum ProfVersion {
// In this version, the frontend PGO stable hash algorithm got fixed and
// may produce hashes different from Version5.
Version6 = 6,
- // The current version is 5.
+ // An additional counter is added around logical operators.
+ Version7 = 7,
+ // The current version is 7.
CurrentVersion = INSTR_PROF_INDEX_VERSION
};
const uint64_t Version = ProfVersion::CurrentVersion;
@@ -1138,7 +1139,8 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
// aware this is an ir_level profile so it can set the version flag.
-void createIRLevelProfileFlagVar(Module &M, bool IsCS);
+void createIRLevelProfileFlagVar(Module &M, bool IsCS,
+ bool InstrEntryBBEnabled);
// Create the variable for the profile file name.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc
index a6913527e67f..f715505ba5e1 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -154,17 +154,7 @@ INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
VALUE_PROF_FUNC_PARAM(uint64_t, TargetValue, Type::getInt64Ty(Ctx)) \
INSTR_PROF_COMMA
VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA
-#ifndef VALUE_RANGE_PROF
VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
-#else /* VALUE_RANGE_PROF */
-VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \
- INSTR_PROF_COMMA
-VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \
- INSTR_PROF_COMMA
-VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeLast, Type::getInt64Ty(Ctx)) \
- INSTR_PROF_COMMA
-VALUE_PROF_FUNC_PARAM(uint64_t, LargeValue, Type::getInt64Ty(Ctx))
-#endif /*VALUE_RANGE_PROF */
#undef VALUE_PROF_FUNC_PARAM
#undef INSTR_PROF_COMMA
/* VALUE_PROF_FUNC_PARAM end */
@@ -657,9 +647,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* Raw profile format version (start from 1). */
#define INSTR_PROF_RAW_VERSION 5
/* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 6
+#define INSTR_PROF_INDEX_VERSION 7
/* Coverage mapping format version (start from 0). */
-#define INSTR_PROF_COVMAP_VERSION 3
+#define INSTR_PROF_COVMAP_VERSION 4
/* Profile version is always of type uint64_t. Reserve the upper 8 bits in the
* version for other variants of profile. We set the lowest bit of the upper 8
@@ -671,6 +661,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
#define VARIANT_MASK_IR_PROF (0x1ULL << 56)
#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
+#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58)
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
@@ -753,9 +744,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target
#define INSTR_PROF_VALUE_PROF_FUNC_STR \
INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC)
-#define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range
-#define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC)
+#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC __llvm_profile_instrument_memop
+#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_MEMOP_FUNC)
/* InstrProfile per-function control data alignment. */
#define INSTR_PROF_DATA_ALIGNMENT 8
@@ -783,3 +774,121 @@ typedef struct InstrProfValueData {
#endif
#undef COVMAP_V2_OR_V3
+
+#ifdef INSTR_PROF_VALUE_PROF_MEMOP_API
+
+#ifdef __cplusplus
+#define INSTR_PROF_INLINE inline
+#else
+#define INSTR_PROF_INLINE
+#endif
+
+/* The value range buckets (22 buckets) for the memop size value profiling looks
+ * like:
+ *
+ * [0, 0]
+ * [1, 1]
+ * [2, 2]
+ * [3, 3]
+ * [4, 4]
+ * [5, 5]
+ * [6, 6]
+ * [7, 7]
+ * [8, 8]
+ * [9, 15]
+ * [16, 16]
+ * [17, 31]
+ * [32, 32]
+ * [33, 63]
+ * [64, 64]
+ * [65, 127]
+ * [128, 128]
+ * [129, 255]
+ * [256, 256]
+ * [257, 511]
+ * [512, 512]
+ * [513, UINT64_MAX]
+ *
+ * Each range has a 'representative value' which is the lower end value of the
+ * range and used to store in the runtime profile data records and the VP
+ * metadata. For example, it's 2 for [2, 2] and 64 for [65, 127].
+ */
+
+/*
+ * Clz and Popcount. This code was copied from
+ * compiler-rt/lib/fuzzer/{FuzzerBuiltins.h,FuzzerBuiltinsMsvc.h} and
+ * llvm/include/llvm/Support/MathExtras.h.
+ */
+#if defined(_MSC_VER) && !defined(__clang__)
+
+#include <intrin.h>
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfClzll(unsigned long long X) {
+ unsigned long LeadZeroIdx = 0;
+#if !defined(_M_ARM64) && !defined(_M_X64)
+ // Scan the high 32 bits.
+ if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X >> 32)))
+ return (int)(63 - (LeadZeroIdx + 32)); // Create a bit offset
+ // from the MSB.
+ // Scan the low 32 bits.
+ if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X)))
+ return (int)(63 - LeadZeroIdx);
+#else
+ if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx;
+#endif
+ return 64;
+}
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfPopcountll(unsigned long long X) {
+ // This code originates from https://reviews.llvm.org/rG30626254510f.
+ unsigned long long v = X;
+ v = v - ((v >> 1) & 0x5555555555555555ULL);
+ v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
+ v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+ return (int)((unsigned long long)(v * 0x0101010101010101ULL) >> 56);
+}
+
+#else
+
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfClzll(unsigned long long X) { return __builtin_clzll(X); }
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfPopcountll(unsigned long long X) { return __builtin_popcountll(X); }
+
+#endif /* defined(_MSC_VER) && !defined(__clang__) */
+
+/* Map an (observed) memop size value to the representative value of its range.
+ * For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE uint64_t
+InstrProfGetRangeRepValue(uint64_t Value) {
+ if (Value <= 8)
+ // The first ranges are individually tracked. Use the value as is.
+ return Value;
+ else if (Value >= 513)
+ // The last range is mapped to its lowest value.
+ return 513;
+ else if (InstProfPopcountll(Value) == 1)
+ // If it's a power of two, use it as is.
+ return Value;
+ else
+ // Otherwise, take to the previous power of two + 1.
+ return (1 << (64 - InstProfClzll(Value) - 1)) + 1;
+}
+
+/* Return true if the range that an (observed) memop size value belongs to has
+ * only a single value in the range. For example, 0 -> true, 8 -> true, 10 ->
+ * false, 64 -> true, 100 -> false, 513 -> false. */
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE unsigned
+InstrProfIsSingleValRange(uint64_t Value) {
+ if (Value <= 8)
+ // The first ranges are individually tracked.
+ return 1;
+ else if (InstProfPopcountll(Value) == 1)
+ // If it's a power of two, there's only one value.
+ return 1;
+ else
+ // Otherwise, there's more than one value in the range.
+ return 0;
+}
+
+#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h
index f5f552672bf0..2c2cfb90d4fa 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -50,8 +50,12 @@ public:
InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
InstrProfIterator &operator++() { Increment(); return *this; }
- bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
- bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
+ bool operator==(const InstrProfIterator &RHS) const {
+ return Reader == RHS.Reader;
+ }
+ bool operator!=(const InstrProfIterator &RHS) const {
+ return Reader != RHS.Reader;
+ }
value_type &operator*() { return Record; }
value_type *operator->() { return &Record; }
};
@@ -79,6 +83,8 @@ public:
virtual bool hasCSIRLevelProfile() const = 0;
+ virtual bool instrEntryBBEnabled() const = 0;
+
/// Return the PGO symtab. There are three different readers:
/// Raw, Text, and Indexed profile readers. The first two types
/// of readers are used only by llvm-profdata tool, while the indexed
@@ -148,6 +154,7 @@ private:
line_iterator Line;
bool IsIRLevelProfile = false;
bool HasCSIRLevelProfile = false;
+ bool InstrEntryBBEnabled = false;
Error readValueProfileData(InstrProfRecord &Record);
@@ -164,6 +171,8 @@ public:
bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
+ bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
+
/// Read the header.
Error readHeader() override;
@@ -224,6 +233,10 @@ public:
return (Version & VARIANT_MASK_CSIR_PROF) != 0;
}
+ bool instrEntryBBEnabled() const override {
+ return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
+ }
+
InstrProfSymtab &getSymtab() override {
assert(Symtab.get());
return *Symtab.get();
@@ -360,6 +373,7 @@ struct InstrProfReaderIndexBase {
virtual uint64_t getVersion() const = 0;
virtual bool isIRLevelProfile() const = 0;
virtual bool hasCSIRLevelProfile() const = 0;
+ virtual bool instrEntryBBEnabled() const = 0;
virtual Error populateSymtab(InstrProfSymtab &) = 0;
};
@@ -408,6 +422,10 @@ public:
return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
}
+ bool instrEntryBBEnabled() const override {
+ return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
+ }
+
Error populateSymtab(InstrProfSymtab &Symtab) override {
return Symtab.create(HashTable->keys());
}
@@ -462,6 +480,10 @@ public:
return Index->hasCSIRLevelProfile();
}
+ bool instrEntryBBEnabled() const override {
+ return Index->instrEntryBBEnabled();
+ }
+
/// Return true if the given buffer is in an indexed instrprof format.
static bool hasFormat(const MemoryBuffer &DataBuffer);
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfWriter.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 5882fa2781e2..35c2669d55a6 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -40,13 +40,16 @@ private:
bool Sparse;
StringMap<ProfilingData> FunctionData;
ProfKind ProfileKind = PF_Unknown;
+ bool InstrEntryBBEnabled;
// Use raw pointer here for the incomplete type object.
InstrProfRecordWriterTrait *InfoObj;
public:
- InstrProfWriter(bool Sparse = false);
+ InstrProfWriter(bool Sparse = false, bool InstrEntryBBEnabled = false);
~InstrProfWriter();
+ StringMap<ProfilingData> &getProfileData() { return FunctionData; }
+
/// Add function counts for the given function. If there are already counts
/// for this function and the hash and number of counts match, each counter is
/// summed. Optionally scale counts by \p Weight.
@@ -97,6 +100,7 @@ public:
return Error::success();
}
+ void setInstrEntryBBEnabled(bool Enabled) { InstrEntryBBEnabled = Enabled; }
// Internal interface for testing purpose only.
void setValueProfDataEndianness(support::endianness Endianness);
void setOutputSparse(bool Sparse);
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/ProfileCommon.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/ProfileCommon.h
index 14c305b3d0c0..55b94b2e690d 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cstdint>
@@ -33,8 +34,8 @@ class FunctionSamples;
} // end namespace sampleprof
-inline const char *getHotSectionPrefix() { return ".hot"; }
-inline const char *getUnlikelySectionPrefix() { return ".unlikely"; }
+inline const char *getHotSectionPrefix() { return "hot"; }
+inline const char *getUnlikelySectionPrefix() { return "unlikely"; }
class ProfileSummaryBuilder {
private:
@@ -89,6 +90,8 @@ public:
void addRecord(const sampleprof::FunctionSamples &FS,
bool isCallsiteSample = false);
+ std::unique_ptr<ProfileSummary> computeSummaryForProfiles(
+ const StringMap<sampleprof::FunctionSamples> &Profiles);
std::unique_ptr<ProfileSummary> getSummary();
};
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h
index 562468333ef4..25d5b2376c11 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h
@@ -37,8 +37,6 @@
namespace llvm {
-class raw_ostream;
-
const std::error_category &sampleprof_category();
enum class sampleprof_error {
@@ -56,7 +54,8 @@ enum class sampleprof_error {
ostream_seek_unsupported,
compress_failed,
uncompress_failed,
- zlib_unavailable
+ zlib_unavailable,
+ hash_mismatch
};
inline std::error_code make_error_code(sampleprof_error E) {
@@ -122,6 +121,7 @@ enum SecType {
SecNameTable = 2,
SecProfileSymbolList = 3,
SecFuncOffsetTable = 4,
+ SecFuncMetadata = 5,
// marker for the first type of profile.
SecFuncProfileFirst = 32,
SecLBRProfile = SecFuncProfileFirst
@@ -139,6 +139,8 @@ static inline std::string getSecName(SecType Type) {
return "ProfileSymbolListSection";
case SecFuncOffsetTable:
return "FuncOffsetTableSection";
+ case SecFuncMetadata:
+ return "FunctionMetadata";
case SecLBRProfile:
return "LBRProfileSection";
}
@@ -152,6 +154,9 @@ struct SecHdrTableEntry {
uint64_t Flags;
uint64_t Offset;
uint64_t Size;
+ // The index indicating the location of the current entry in
+ // SectionHdrLayout table.
+ uint32_t LayoutIndex;
};
// Flags common for all sections are defined here. In SecHdrTableEntry::Flags,
@@ -159,7 +164,9 @@ struct SecHdrTableEntry {
// will be saved in the higher 32 bits.
enum class SecCommonFlags : uint32_t {
SecFlagInValid = 0,
- SecFlagCompress = (1 << 0)
+ SecFlagCompress = (1 << 0),
+ // Indicate the section contains only profile without context.
+ SecFlagFlat = (1 << 1)
};
// Section specific flags are defined here.
@@ -167,7 +174,10 @@ enum class SecCommonFlags : uint32_t {
// a new check in verifySecFlag.
enum class SecNameTableFlags : uint32_t {
SecFlagInValid = 0,
- SecFlagMD5Name = (1 << 0)
+ SecFlagMD5Name = (1 << 0),
+ // Store MD5 in fixed length instead of ULEB128 so NameTable can be
+ // accessed like an array.
+ SecFlagFixedLengthMD5 = (1 << 1)
};
enum class SecProfSummaryFlags : uint32_t {
SecFlagInValid = 0,
@@ -177,6 +187,11 @@ enum class SecProfSummaryFlags : uint32_t {
SecFlagPartial = (1 << 0)
};
+enum class SecFuncMetadataFlags : uint32_t {
+ SecFlagInvalid = 0,
+ SecFlagIsProbeBased = (1 << 0),
+};
+
// Verify section specific flag is used for the correct section.
template <class SecFlagType>
static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
@@ -193,6 +208,9 @@ static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
case SecProfSummary:
IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>();
break;
+ case SecFuncMetadata:
+ IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
+ break;
default:
break;
}
@@ -244,6 +262,14 @@ struct LineLocation {
(LineOffset == O.LineOffset && Discriminator < O.Discriminator);
}
+ bool operator==(const LineLocation &O) const {
+ return LineOffset == O.LineOffset && Discriminator == O.Discriminator;
+ }
+
+ bool operator!=(const LineLocation &O) const {
+ return LineOffset != O.LineOffset || Discriminator != O.Discriminator;
+ }
+
uint32_t LineOffset;
uint32_t Discriminator;
};
@@ -321,6 +347,16 @@ public:
return SortedTargets;
}
+ /// Prorate call targets by a distribution factor.
+ static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
+ float DistributionFactor) {
+ CallTargetMap AdjustedTargets;
+ for (const auto &I : Targets) {
+ AdjustedTargets[I.first()] = I.second * DistributionFactor;
+ }
+ return AdjustedTargets;
+ }
+
/// Merge the samples in \p Other into this record.
/// Optionally scale sample counts by \p Weight.
sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
@@ -341,7 +377,137 @@ private:
raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
+// State of context associated with FunctionSamples
+enum ContextStateMask {
+ UnknownContext = 0x0, // Profile without context
+ RawContext = 0x1, // Full context profile from input profile
+ SyntheticContext = 0x2, // Synthetic context created for context promotion
+ InlinedContext = 0x4, // Profile for context that is inlined into caller
+ MergedContext = 0x8 // Profile for context merged into base profile
+};
+
+// Sample context for FunctionSamples. It consists of the calling context,
+// the function name and context state. Internally sample context is represented
+// using StringRef, which is also the input for constructing a `SampleContext`.
+// It can accept and represent both full context string as well as context-less
+// function name.
+// Example of full context string (note the wrapping `[]`):
+// `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
+// Example of context-less function name (same as AutoFDO):
+// `_Z8funcLeafi`
+class SampleContext {
+public:
+ SampleContext() : State(UnknownContext) {}
+ SampleContext(StringRef ContextStr,
+ ContextStateMask CState = UnknownContext) {
+ setContext(ContextStr, CState);
+ }
+
+ // Promote context by removing top frames (represented by `ContextStrToRemove`).
+ // Note that with string representation of context, the promotion is effectively
+ // a substr operation with `ContextStrToRemove` removed from left.
+ void promoteOnPath(StringRef ContextStrToRemove) {
+ assert(FullContext.startswith(ContextStrToRemove));
+
+ // Remove leading context and frame separator " @ ".
+ FullContext = FullContext.substr(ContextStrToRemove.size() + 3);
+ CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3);
+ }
+
+ // Split the top context frame (left-most substr) from context.
+ static std::pair<StringRef, StringRef>
+ splitContextString(StringRef ContextStr) {
+ return ContextStr.split(" @ ");
+ }
+
+ // Decode context string for a frame to get function name and location.
+ // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
+ static void decodeContextString(StringRef ContextStr, StringRef &FName,
+ LineLocation &LineLoc) {
+ // Get function name
+ auto EntrySplit = ContextStr.split(':');
+ FName = EntrySplit.first;
+
+ LineLoc = {0, 0};
+ if (!EntrySplit.second.empty()) {
+ // Get line offset, use signed int for getAsInteger so string will
+ // be parsed as signed.
+ int LineOffset = 0;
+ auto LocSplit = EntrySplit.second.split('.');
+ LocSplit.first.getAsInteger(10, LineOffset);
+ LineLoc.LineOffset = LineOffset;
+
+ // Get discriminator
+ if (!LocSplit.second.empty())
+ LocSplit.second.getAsInteger(10, LineLoc.Discriminator);
+ }
+ }
+
+ operator StringRef() const { return FullContext; }
+ bool hasState(ContextStateMask S) { return State & (uint32_t)S; }
+ void setState(ContextStateMask S) { State |= (uint32_t)S; }
+ void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
+ bool hasContext() const { return State != UnknownContext; }
+ bool isBaseContext() const { return CallingContext.empty(); }
+ StringRef getNameWithoutContext() const { return Name; }
+ StringRef getCallingContext() const { return CallingContext; }
+ StringRef getNameWithContext(bool WithBracket = false) const {
+ return WithBracket ? InputContext : FullContext;
+ }
+
+private:
+ // Give a context string, decode and populate internal states like
+ // Function name, Calling context and context state. Example of input
+ // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
+ void setContext(StringRef ContextStr, ContextStateMask CState) {
+ assert(!ContextStr.empty());
+ InputContext = ContextStr;
+ // Note that `[]` wrapped input indicates a full context string, otherwise
+ // it's treated as context-less function name only.
+ bool HasContext = ContextStr.startswith("[");
+ if (!HasContext && CState == UnknownContext) {
+ State = UnknownContext;
+ Name = FullContext = ContextStr;
+ } else {
+ // Assume raw context profile if unspecified
+ if (CState == UnknownContext)
+ State = RawContext;
+ else
+ State = CState;
+
+ // Remove encapsulating '[' and ']' if any
+ if (HasContext)
+ FullContext = ContextStr.substr(1, ContextStr.size() - 2);
+ else
+ FullContext = ContextStr;
+
+ // Caller is to the left of callee in context string
+ auto NameContext = FullContext.rsplit(" @ ");
+ if (NameContext.second.empty()) {
+ Name = NameContext.first;
+ CallingContext = NameContext.second;
+ } else {
+ Name = NameContext.second;
+ CallingContext = NameContext.first;
+ }
+ }
+ }
+
+ // Input context string including bracketed calling context and leaf function
+ // name
+ StringRef InputContext;
+ // Full context string including calling context and leaf function name
+ StringRef FullContext;
+ // Function name for the associated sample profile
+ StringRef Name;
+ // Calling context (leaf function excluded) for the associated sample profile
+ StringRef CallingContext;
+ // State of the associated sample profile
+ uint32_t State;
+};
+
class FunctionSamples;
+class SampleProfileReaderItaniumRemapper;
using BodySampleMap = std::map<LineLocation, SampleRecord>;
// NOTE: Using a StringMap here makes parsed profiles consume around 17% more
@@ -369,6 +535,8 @@ public:
: sampleprof_error::success;
}
+ void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
+
sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
bool Overflowed;
TotalHeadSamples =
@@ -397,10 +565,22 @@ public:
ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
uint32_t Discriminator) const {
const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
- if (ret == BodySamples.end())
+ if (ret == BodySamples.end()) {
+ // For CSSPGO, in order to conserve profile size, we no longer write out
+ // locations profile for those not hit during training, so we need to
+ // treat them as zero instead of error here.
+ if (ProfileIsCS)
+ return 0;
+ return std::error_code();
+ // A missing counter for a probe likely means the probe was not executed.
+ // Treat it as a zero count instead of an unknown count to help edge
+ // weight inference.
+ if (FunctionSamples::ProfileIsProbeBased)
+ return 0;
return std::error_code();
- else
+ } else {
return ret->second.getSamples();
+ }
}
/// Returns the call target map collected at a given location.
@@ -414,6 +594,16 @@ public:
return ret->second.getCallTargets();
}
+ /// Returns the call target map collected at a given location specified by \p
+ /// CallSite. If the location is not found in profile, return error.
+ ErrorOr<SampleRecord::CallTargetMap>
+ findCallTargetMapAt(const LineLocation &CallSite) const {
+ const auto &Ret = BodySamples.find(CallSite);
+ if (Ret == BodySamples.end())
+ return std::error_code();
+ return Ret->second.getCallTargets();
+ }
+
/// Return the function samples at the given callsite location.
FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
return CallsiteSamples[Loc];
@@ -428,35 +618,15 @@ public:
return &iter->second;
}
- /// Returns a pointer to FunctionSamples at the given callsite location \p Loc
- /// with callee \p CalleeName. If no callsite can be found, relax the
- /// restriction to return the FunctionSamples at callsite location \p Loc
- /// with the maximum total sample count.
- const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc,
- StringRef CalleeName) const {
- std::string CalleeGUID;
- CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID);
-
- auto iter = CallsiteSamples.find(Loc);
- if (iter == CallsiteSamples.end())
- return nullptr;
- auto FS = iter->second.find(CalleeName);
- if (FS != iter->second.end())
- return &FS->second;
- // If we cannot find exact match of the callee name, return the FS with
- // the max total count. Only do this when CalleeName is not provided,
- // i.e., only for indirect calls.
- if (!CalleeName.empty())
- return nullptr;
- uint64_t MaxTotalSamples = 0;
- const FunctionSamples *R = nullptr;
- for (const auto &NameFS : iter->second)
- if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
- MaxTotalSamples = NameFS.second.getTotalSamples();
- R = &NameFS.second;
- }
- return R;
- }
+ /// Returns a pointer to FunctionSamples at the given callsite location
+ /// \p Loc with callee \p CalleeName. If no callsite can be found, relax
+ /// the restriction to return the FunctionSamples at callsite location
+ /// \p Loc with the maximum total sample count. If \p Remapper is not
+ /// nullptr, use \p Remapper to find FunctionSamples with equivalent name
+ /// as \p CalleeName.
+ const FunctionSamples *
+ findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName,
+ SampleProfileReaderItaniumRemapper *Remapper) const;
bool empty() const { return TotalSamples == 0; }
@@ -473,6 +643,11 @@ public:
/// Return the sample count of the first instruction of the function.
/// The function can be either a standalone symbol or an inlined function.
uint64_t getEntrySamples() const {
+ if (FunctionSamples::ProfileIsCS && getHeadSamples()) {
+ // For CS profile, if we already have more accurate head samples
+ // counted by branch sample from caller, use them as entry samples.
+ return getHeadSamples();
+ }
uint64_t Count = 0;
// Use either BodySamples or CallsiteSamples which ever has the smaller
// lineno.
@@ -515,6 +690,24 @@ public:
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
sampleprof_error Result = sampleprof_error::success;
Name = Other.getName();
+ if (!GUIDToFuncNameMap)
+ GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
+ if (Context.getNameWithContext(true).empty())
+ Context = Other.getContext();
+ if (FunctionHash == 0) {
+ // Set the function hash code for the target profile.
+ FunctionHash = Other.getFunctionHash();
+ } else if (FunctionHash != Other.getFunctionHash()) {
+ // The two profiles coming with different valid hash codes indicates
+ // either:
+ // 1. They are same-named static functions from different compilation
+ // units (without using -unique-internal-linkage-names), or
+ // 2. They are really the same function but from different compilations.
+ // Let's bail out in either case for now, which means one profile is
+ // dropped.
+ return sampleprof_error::hash_mismatch;
+ }
+
MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
for (const auto &I : Other.getBodySamples()) {
@@ -566,19 +759,34 @@ public:
/// Return the function name.
StringRef getName() const { return Name; }
+ /// Return function name with context.
+ StringRef getNameWithContext(bool WithBracket = false) const {
+ return FunctionSamples::ProfileIsCS
+ ? Context.getNameWithContext(WithBracket)
+ : Name;
+ }
+
/// Return the original function name.
StringRef getFuncName() const { return getFuncName(Name); }
+ void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
+
+ uint64_t getFunctionHash() const { return FunctionHash; }
+
/// Return the canonical name for a function, taking into account
/// suffix elision policy attributes.
static StringRef getCanonicalFnName(const Function &F) {
- static const char *knownSuffixes[] = { ".llvm.", ".part." };
auto AttrName = "sample-profile-suffix-elision-policy";
auto Attr = F.getFnAttribute(AttrName).getValueAsString();
+ return getCanonicalFnName(F.getName(), Attr);
+ }
+
+ static StringRef getCanonicalFnName(StringRef FnName, StringRef Attr = "") {
+ static const char *knownSuffixes[] = { ".llvm.", ".part." };
if (Attr == "" || Attr == "all") {
- return F.getName().split('.').first;
+ return FnName.split('.').first;
} else if (Attr == "selected") {
- StringRef Cand(F.getName());
+ StringRef Cand(FnName);
for (const auto &Suf : knownSuffixes) {
StringRef Suffix(Suf);
auto It = Cand.rfind(Suffix);
@@ -590,11 +798,11 @@ public:
}
return Cand;
} else if (Attr == "none") {
- return F.getName();
+ return FnName;
} else {
assert(false && "internal error: unknown suffix elision policy");
}
- return F.getName();
+ return FnName;
}
/// Translate \p Name into its original name.
@@ -609,16 +817,19 @@ public:
return Name;
assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first");
- auto iter = GUIDToFuncNameMap->find(std::stoull(Name.data()));
- if (iter == GUIDToFuncNameMap->end())
- return StringRef();
- return iter->second;
+ return GUIDToFuncNameMap->lookup(std::stoull(Name.data()));
}
/// Returns the line offset to the start line of the subprogram.
/// We assume that a single function will not exceed 65535 LOC.
static unsigned getOffset(const DILocation *DIL);
+ /// Returns a unique call site identifier for a given debug location of a call
+ /// instruction. This is wrapper of two scenarios, the probe-based profile and
+ /// regular profile, to hide implementation details from the sample loader and
+ /// the context tracker.
+ static LineLocation getCallSiteIdentifier(const DILocation *DIL);
+
/// Get the FunctionSamples of the inline instance where DIL originates
/// from.
///
@@ -628,7 +839,19 @@ public:
/// tree nodes in the profile.
///
/// \returns the FunctionSamples pointer to the inlined instance.
- const FunctionSamples *findFunctionSamples(const DILocation *DIL) const;
+ /// If \p Remapper is not nullptr, it will be used to find matching
+ /// FunctionSamples with not exactly the same but equivalent name.
+ const FunctionSamples *findFunctionSamples(
+ const DILocation *DIL,
+ SampleProfileReaderItaniumRemapper *Remapper = nullptr) const;
+
+ static bool ProfileIsProbeBased;
+
+ static bool ProfileIsCS;
+
+ SampleContext &getContext() const { return Context; }
+
+ void setContext(const SampleContext &FContext) { Context = FContext; }
static SampleProfileFormat Format;
@@ -646,10 +869,20 @@ public:
return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name);
}
+ // Find all the names in the current FunctionSamples including names in
+ // all the inline instances and names of call targets.
+ void findAllNames(DenseSet<StringRef> &NameSet) const;
+
private:
/// Mangled name of the function.
StringRef Name;
+ /// CFG hash value for the function.
+ uint64_t FunctionHash = 0;
+
+ /// Calling context for function profile
+ mutable SampleContext Context;
+
/// Total number of samples collected inside this function.
///
/// Samples are cumulative, they include all the samples collected
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h
index 0e8ee7696c54..999e75eddffa 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -27,8 +27,9 @@
// offsetA[.discriminator]: fnA:num_of_total_samples
// offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
// ...
+// !CFGChecksum: num
//
-// This is a nested tree in which the identation represents the nesting level
+// This is a nested tree in which the indentation represents the nesting level
// of the inline stack. There are no blank lines in the file. And the spacing
// within a single line is fixed. Additional spaces will result in an error
// while reading the file.
@@ -47,10 +48,11 @@
// in the prologue of the function (second number). This head sample
// count provides an indicator of how frequently the function is invoked.
//
-// There are two types of lines in the function body.
+// There are three types of lines in the function body.
//
// * Sampled line represents the profile information of a source location.
// * Callsite line represents the profile information of a callsite.
+// * Metadata line represents extra metadata of the function.
//
// Each sampled line may contain several items. Some are optional (marked
// below):
@@ -114,6 +116,18 @@
// total number of samples collected for the inlined instance at this
// callsite
//
+// Metadata line can occur in lines with one indent only, containing extra
+// information for the top-level function. Furthermore, metadata can only
+// occur after all the body samples and callsite samples.
+// Each metadata line may contain a particular type of metadata, marked by
+// the starting characters annotated with !. We process each metadata line
+// independently, hence each metadata line has to form an independent piece
+// of information that does not require cross-line reference.
+// We support the following types of metadata:
+//
+// a. CFG Checksum (a.k.a. function hash):
+// !CFGChecksum: 12345
+//
//
// Binary format
// -------------
@@ -208,10 +222,10 @@
#ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H
#define LLVM_PROFILEDATA_SAMPLEPROFREADER_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -232,6 +246,7 @@
namespace llvm {
class raw_ostream;
+class Twine;
namespace sampleprof {
@@ -275,15 +290,18 @@ public:
return Remappings->lookup(FunctionName);
}
- /// Return the samples collected for function \p F if remapper knows
- /// it is present in SampleMap.
- FunctionSamples *getSamplesFor(StringRef FunctionName);
+ /// Return the equivalent name in the profile for \p FunctionName if
+ /// it exists.
+ Optional<StringRef> lookUpNameInProfile(StringRef FunctionName);
private:
// The buffer holding the content read from remapping file.
std::unique_ptr<MemoryBuffer> Buffer;
std::unique_ptr<SymbolRemappingReader> Remappings;
- DenseMap<SymbolRemappingReader::Key, FunctionSamples *> SampleMap;
+ // Map remapping key to the name in the profile. By looking up the
+ // key in the remapper, a given new name can be mapped to the
+ // cannonical name using the NameMap.
+ DenseMap<SymbolRemappingReader::Key, StringRef> NameMap;
// The Reader the remapper is servicing.
SampleProfileReader &Reader;
// Indicate whether remapping has been applied to the profile read
@@ -370,15 +388,19 @@ public:
/// Return the samples collected for function \p F.
virtual FunctionSamples *getSamplesFor(StringRef Fname) {
- if (Remapper) {
- if (auto FS = Remapper->getSamplesFor(Fname))
- return FS;
- }
std::string FGUID;
Fname = getRepInFormat(Fname, useMD5(), FGUID);
auto It = Profiles.find(Fname);
if (It != Profiles.end())
return &It->second;
+
+ if (Remapper) {
+ if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) {
+ auto It = Profiles.find(*NameInProfile);
+ if (It != Profiles.end())
+ return &It->second;
+ }
+ }
return nullptr;
}
@@ -386,7 +408,7 @@ public:
StringMap<FunctionSamples> &getProfiles() { return Profiles; }
/// Report a parse error message.
- void reportError(int64_t LineNumber, Twine Msg) const {
+ void reportError(int64_t LineNumber, const Twine &Msg) const {
Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(),
LineNumber, Msg));
}
@@ -411,6 +433,12 @@ public:
/// \brief Return the profile format.
SampleProfileFormat getFormat() const { return Format; }
+ /// Whether input profile is based on pseudo probes.
+ bool profileIsProbeBased() const { return ProfileIsProbeBased; }
+
+ /// Whether input profile is fully context-sensitive
+ bool profileIsCS() const { return ProfileIsCS; }
+
virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() {
return nullptr;
};
@@ -423,6 +451,12 @@ public:
/// Return whether names in the profile are all MD5 numbers.
virtual bool useMD5() { return false; }
+ /// Don't read profile without context if the flag is set. This is only meaningful
+ /// for ExtBinary format.
+ virtual void setSkipFlatProf(bool Skip) {}
+
+ SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); }
+
protected:
/// Map every function to its associated profile.
///
@@ -451,6 +485,15 @@ protected:
std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
+ /// \brief Whether samples are collected based on pseudo probes.
+ bool ProfileIsProbeBased = false;
+
+ /// Whether function profiles are context-sensitive.
+ bool ProfileIsCS = false;
+
+ /// Number of context-sensitive profiles.
+ uint32_t CSProfileCount = 0;
+
/// \brief The format of sample.
SampleProfileFormat Format = SPF_None;
};
@@ -588,40 +631,25 @@ private:
protected:
std::vector<SecHdrTableEntry> SecHdrTable;
- std::unique_ptr<ProfileSymbolList> ProfSymList;
- std::error_code readSecHdrTableEntry();
+ std::error_code readSecHdrTableEntry(uint32_t Idx);
std::error_code readSecHdrTable();
- virtual std::error_code readHeader() override;
- virtual std::error_code verifySPMagic(uint64_t Magic) override = 0;
- virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
- const SecHdrTableEntry &Entry) = 0;
-
-public:
- SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
- LLVMContext &C, SampleProfileFormat Format)
- : SampleProfileReaderBinary(std::move(B), C, Format) {}
-
- /// Read sample profiles in extensible format from the associated file.
- std::error_code readImpl() override;
-
- /// Get the total size of all \p Type sections.
- uint64_t getSectionSize(SecType Type);
- /// Get the total size of header and all sections.
- uint64_t getFileSize();
- virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override;
-};
-class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
-private:
- virtual std::error_code verifySPMagic(uint64_t Magic) override;
- virtual std::error_code
- readOneSection(const uint8_t *Start, uint64_t Size,
- const SecHdrTableEntry &Entry) override;
- std::error_code readProfileSymbolList();
+ std::error_code readFuncMetadata();
std::error_code readFuncOffsetTable();
std::error_code readFuncProfiles();
std::error_code readMD5NameTable();
std::error_code readNameTableSec(bool IsMD5);
+ std::error_code readProfileSymbolList();
+
+ virtual std::error_code readHeader() override;
+ virtual std::error_code verifySPMagic(uint64_t Magic) override = 0;
+ virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
+ const SecHdrTableEntry &Entry);
+ // placeholder for subclasses to dispatch their own section readers.
+ virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0;
+ virtual ErrorOr<StringRef> readStringFromTable() override;
+
+ std::unique_ptr<ProfileSymbolList> ProfSymList;
/// The table mapping from function name to the offset of its FunctionSample
/// towards file start.
@@ -631,6 +659,12 @@ private:
/// Use all functions from the input profile.
bool UseAllFuncs = true;
+ /// Use fixed length MD5 instead of ULEB128 encoding so NameTable doesn't
+ /// need to be read in up front and can be directly accessed using index.
+ bool FixedLengthMD5 = false;
+ /// The starting address of NameTable containing fixed length MD5.
+ const uint8_t *MD5NameMemStart = nullptr;
+
/// If MD5 is used in NameTable section, the section saves uint64_t data.
/// The uint64_t data has to be converted to a string and then the string
/// will be used to initialize StringRef in NameTable.
@@ -640,26 +674,52 @@ private:
/// the lifetime of MD5StringBuf is not shorter than that of NameTable.
std::unique_ptr<std::vector<std::string>> MD5StringBuf;
+ /// If SkipFlatProf is true, skip the sections with
+ /// SecFlagFlat flag.
+ bool SkipFlatProf = false;
+
public:
- SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
- SampleProfileFormat Format = SPF_Ext_Binary)
- : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {}
+ SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
+ LLVMContext &C, SampleProfileFormat Format)
+ : SampleProfileReaderBinary(std::move(B), C, Format) {}
- /// \brief Return true if \p Buffer is in the format supported by this class.
- static bool hasFormat(const MemoryBuffer &Buffer);
+ /// Read sample profiles in extensible format from the associated file.
+ std::error_code readImpl() override;
- virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
- return std::move(ProfSymList);
- };
+ /// Get the total size of all \p Type sections.
+ uint64_t getSectionSize(SecType Type);
+ /// Get the total size of header and all sections.
+ uint64_t getFileSize();
+ virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override;
/// Collect functions with definitions in Module \p M.
void collectFuncsFrom(const Module &M) override;
/// Return whether names in the profile are all MD5 numbers.
- virtual bool useMD5() override {
- assert(!NameTable.empty() && "NameTable should have been initialized");
- return MD5StringBuf && !MD5StringBuf->empty();
- }
+ virtual bool useMD5() override { return MD5StringBuf.get(); }
+
+ virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
+ return std::move(ProfSymList);
+ };
+
+ virtual void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; }
+};
+
+class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
+private:
+ virtual std::error_code verifySPMagic(uint64_t Magic) override;
+ virtual std::error_code
+ readCustomSection(const SecHdrTableEntry &Entry) override {
+ return sampleprof_error::success;
+ };
+
+public:
+ SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
+ SampleProfileFormat Format = SPF_Ext_Binary)
+ : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {}
+
+ /// \brief Return true if \p Buffer is in the format supported by this class.
+ static bool hasFormat(const MemoryBuffer &Buffer);
};
class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 7d0df9e44f58..419ebd6eb7ae 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/ErrorOr.h"
@@ -28,6 +29,15 @@
namespace llvm {
namespace sampleprof {
+enum SectionLayout {
+ DefaultLayout,
+ // The layout splits profile with context information from profile without
+ // context information. When Thinlto is enabled, ThinLTO postlink phase only
+ // has to load profile with context information and can skip the other part.
+ CtxSplitLayout,
+ NumOfLayout,
+};
+
/// Sample-based profile writer. Base class.
class SampleProfileWriter {
public:
@@ -60,6 +70,7 @@ public:
virtual void setToCompressAllSections() {}
virtual void setUseMD5() {}
virtual void setPartialProfile() {}
+ virtual void resetSecLayout(SectionLayout SL) {}
protected:
SampleProfileWriter(std::unique_ptr<raw_ostream> &OS)
@@ -144,6 +155,36 @@ class SampleProfileWriterRawBinary : public SampleProfileWriterBinary {
using SampleProfileWriterBinary::SampleProfileWriterBinary;
};
+const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout>
+ ExtBinaryHdrLayoutTable = {
+ // Note that SecFuncOffsetTable section is written after SecLBRProfile
+ // in the profile, but is put before SecLBRProfile in SectionHdrLayout.
+ // This is because sample reader follows the order in SectionHdrLayout
+ // to read each section. To read function profiles on demand, sample
+ // reader need to get the offset of each function profile first.
+ //
+ // DefaultLayout
+ SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0},
+ {SecNameTable, 0, 0, 0, 0},
+ {SecFuncOffsetTable, 0, 0, 0, 0},
+ {SecLBRProfile, 0, 0, 0, 0},
+ {SecProfileSymbolList, 0, 0, 0, 0},
+ {SecFuncMetadata, 0, 0, 0, 0}}),
+ // CtxSplitLayout
+ SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0},
+ {SecNameTable, 0, 0, 0, 0},
+ // profile with context
+ // for next two sections
+ {SecFuncOffsetTable, 0, 0, 0, 0},
+ {SecLBRProfile, 0, 0, 0, 0},
+ // profile without context
+ // for next two sections
+ {SecFuncOffsetTable, 0, 0, 0, 0},
+ {SecLBRProfile, 0, 0, 0, 0},
+ {SecProfileSymbolList, 0, 0, 0, 0},
+ {SecFuncMetadata, 0, 0, 0, 0}}),
+};
+
class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
using SampleProfileWriterBinary::SampleProfileWriterBinary;
public:
@@ -152,10 +193,45 @@ public:
virtual void setToCompressAllSections() override;
void setToCompressSection(SecType Type);
+ virtual std::error_code writeSample(const FunctionSamples &S) override;
+
+ // Set to use MD5 to represent string in NameTable.
+ virtual void setUseMD5() override {
+ UseMD5 = true;
+ addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagMD5Name);
+ // MD5 will be stored as plain uint64_t instead of variable-length
+ // quantity format in NameTable section.
+ addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagFixedLengthMD5);
+ }
+
+ // Set the profile to be partial. It means the profile is for
+ // common/shared code. The common profile is usually merged from
+ // profiles collected from running other targets.
+ virtual void setPartialProfile() override {
+ addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagPartial);
+ }
+
+ virtual void setProfileSymbolList(ProfileSymbolList *PSL) override {
+ ProfSymList = PSL;
+ };
+
+ virtual void resetSecLayout(SectionLayout SL) override {
+ verifySecLayout(SL);
+#ifndef NDEBUG
+ // Make sure resetSecLayout is called before any flag setting.
+ for (auto &Entry : SectionHdrLayout) {
+ assert(Entry.Flags == 0 &&
+ "resetSecLayout has to be called before any flag setting");
+ }
+#endif
+ SecLayout = SL;
+ SectionHdrLayout = ExtBinaryHdrLayoutTable[SL];
+ }
protected:
- uint64_t markSectionStart(SecType Type);
- std::error_code addNewSection(SecType Sec, uint64_t SectionStart);
+ uint64_t markSectionStart(SecType Type, uint32_t LayoutIdx);
+ std::error_code addNewSection(SecType Sec, uint32_t LayoutIdx,
+ uint64_t SectionStart);
template <class SecFlagType>
void addSectionFlag(SecType Type, SecFlagType Flag) {
for (auto &Entry : SectionHdrLayout) {
@@ -163,23 +239,55 @@ protected:
addSecFlag(Entry, Flag);
}
}
+ template <class SecFlagType>
+ void addSectionFlag(uint32_t SectionIdx, SecFlagType Flag) {
+ addSecFlag(SectionHdrLayout[SectionIdx], Flag);
+ }
+
+ // placeholder for subclasses to dispatch their own section writers.
+ virtual std::error_code writeCustomSection(SecType Type) = 0;
+ // Verify the SecLayout is supported by the format.
+ virtual void verifySecLayout(SectionLayout SL) = 0;
- virtual void initSectionHdrLayout() = 0;
+ // specify the order to write sections.
virtual std::error_code
writeSections(const StringMap<FunctionSamples> &ProfileMap) = 0;
+ // Dispatch section writer for each section. \p LayoutIdx is the sequence
+ // number indicating where the section is located in SectionHdrLayout.
+ virtual std::error_code
+ writeOneSection(SecType Type, uint32_t LayoutIdx,
+ const StringMap<FunctionSamples> &ProfileMap);
+
+ // Helper function to write name table.
+ virtual std::error_code writeNameTable() override;
+
+ std::error_code writeFuncMetadata(const StringMap<FunctionSamples> &Profiles);
+
+ // Functions to write various kinds of sections.
+ std::error_code
+ writeNameTableSection(const StringMap<FunctionSamples> &ProfileMap);
+ std::error_code writeFuncOffsetTable();
+ std::error_code writeProfileSymbolListSection();
+
+ SectionLayout SecLayout = DefaultLayout;
// Specifiy the order of sections in section header table. Note
- // the order of sections in the profile may be different that the
+ // the order of sections in SecHdrTable may be different that the
// order in SectionHdrLayout. sample Reader will follow the order
// in SectionHdrLayout to read each section.
- SmallVector<SecHdrTableEntry, 8> SectionHdrLayout;
+ SmallVector<SecHdrTableEntry, 8> SectionHdrLayout =
+ ExtBinaryHdrLayoutTable[DefaultLayout];
+
+ // Save the start of SecLBRProfile so we can compute the offset to the
+ // start of SecLBRProfile for each Function's Profile and will keep it
+ // in FuncOffsetTable.
+ uint64_t SecLBRProfileStart = 0;
private:
void allocSecHdrTable();
std::error_code writeSecHdrTable();
virtual std::error_code
writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
- SecHdrTableEntry &getEntryInLayout(SecType Type);
std::error_code compressAndOutput();
// We will swap the raw_ostream held by LocalBufStream and that
@@ -196,70 +304,43 @@ private:
// The location in the output stream where the SecHdrTable should be
// written to.
uint64_t SecHdrTableOffset;
- // Initial Section Flags setting.
+ // The table contains SecHdrTableEntry entries in order of how they are
+ // populated in the writer. It may be different from the order in
+ // SectionHdrLayout which specifies the sequence in which sections will
+ // be read.
std::vector<SecHdrTableEntry> SecHdrTable;
+
+ // FuncOffsetTable maps function name to its profile offset in SecLBRProfile
+ // section. It is used to load function profile on demand.
+ MapVector<StringRef, uint64_t> FuncOffsetTable;
+ // Whether to use MD5 to represent string.
+ bool UseMD5 = false;
+
+ ProfileSymbolList *ProfSymList = nullptr;
};
class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase {
public:
SampleProfileWriterExtBinary(std::unique_ptr<raw_ostream> &OS)
- : SampleProfileWriterExtBinaryBase(OS) {
- initSectionHdrLayout();
- }
-
- virtual std::error_code writeSample(const FunctionSamples &S) override;
- virtual void setProfileSymbolList(ProfileSymbolList *PSL) override {
- ProfSymList = PSL;
- };
-
- // Set to use MD5 to represent string in NameTable.
- virtual void setUseMD5() override {
- UseMD5 = true;
- addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagMD5Name);
- }
-
- // Set the profile to be partial. It means the profile is for
- // common/shared code. The common profile is usually merged from
- // profiles collected from running other targets.
- virtual void setPartialProfile() override {
- addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagPartial);
- }
+ : SampleProfileWriterExtBinaryBase(OS) {}
private:
- virtual void initSectionHdrLayout() override {
- // Note that SecFuncOffsetTable section is written after SecLBRProfile
- // in the profile, but is put before SecLBRProfile in SectionHdrLayout.
- //
- // This is because sample reader follows the order of SectionHdrLayout to
- // read each section, to read function profiles on demand sample reader
- // need to get the offset of each function profile first.
- //
- // SecFuncOffsetTable section is written after SecLBRProfile in the
- // profile because FuncOffsetTable needs to be populated while section
- // SecLBRProfile is written.
- SectionHdrLayout = {{SecProfSummary, 0, 0, 0},
- {SecNameTable, 0, 0, 0},
- {SecFuncOffsetTable, 0, 0, 0},
- {SecLBRProfile, 0, 0, 0},
- {SecProfileSymbolList, 0, 0, 0}};
- };
+ std::error_code
+ writeDefaultLayout(const StringMap<FunctionSamples> &ProfileMap);
+ std::error_code
+ writeCtxSplitLayout(const StringMap<FunctionSamples> &ProfileMap);
+
virtual std::error_code
writeSections(const StringMap<FunctionSamples> &ProfileMap) override;
- std::error_code writeFuncOffsetTable();
- virtual std::error_code writeNameTable() override;
-
- ProfileSymbolList *ProfSymList = nullptr;
+ virtual std::error_code writeCustomSection(SecType Type) override {
+ return sampleprof_error::success;
+ };
- // Save the start of SecLBRProfile so we can compute the offset to the
- // start of SecLBRProfile for each Function's Profile and will keep it
- // in FuncOffsetTable.
- uint64_t SecLBRProfileStart = 0;
- // FuncOffsetTable maps function name to its profile offset in SecLBRProfile
- // section. It is used to load function profile on demand.
- MapVector<StringRef, uint64_t> FuncOffsetTable;
- // Whether to use MD5 to represent string.
- bool UseMD5 = false;
+ virtual void verifySecLayout(SectionLayout SL) override {
+ assert((SL == DefaultLayout || SL == CtxSplitLayout) &&
+ "Unsupported layout");
+ }
};
// CompactBinary is a compact format of binary profile which both reduces
diff --git a/contrib/llvm-project/llvm/include/llvm/Remarks/BitstreamRemarkParser.h b/contrib/llvm-project/llvm/include/llvm/Remarks/BitstreamRemarkParser.h
index 7ebd731693b2..f7553ba53958 100644
--- a/contrib/llvm-project/llvm/include/llvm/Remarks/BitstreamRemarkParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/Remarks/BitstreamRemarkParser.h
@@ -14,13 +14,13 @@
#ifndef LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H
#define LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitstream/BitstreamReader.h"
-#include "llvm/Remarks/BitstreamRemarkContainer.h"
-#include "llvm/Remarks/Remark.h"
-#include "llvm/Remarks/RemarkParser.h"
#include "llvm/Support/Error.h"
#include <array>
+#include <cstdint>
namespace llvm {
namespace remarks {
diff --git a/contrib/llvm-project/llvm/include/llvm/Remarks/HotnessThresholdParser.h b/contrib/llvm-project/llvm/include/llvm/Remarks/HotnessThresholdParser.h
new file mode 100644
index 000000000000..08bbf5f70b81
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Remarks/HotnessThresholdParser.h
@@ -0,0 +1,63 @@
+//===- HotnessThresholdParser.h - Parser for hotness threshold --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements a simple parser to decode commandline option for
+/// remarks hotness threshold that supports both int and a special 'auto' value.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_HOTNESSTHRESHOLDPARSER_H
+#define LLVM_REMARKS_HOTNESSTHRESHOLDPARSER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+namespace remarks {
+
+// Parse remarks hotness threshold argument value.
+// Valid option values are
+// 1. integer: manually specified threshold; or
+// 2. string 'auto': automatically get threshold from profile summary.
+//
+// Return None Optional if 'auto' is specified, indicating the value will
+// be filled later during PSI.
+inline Expected<Optional<uint64_t>> parseHotnessThresholdOption(StringRef Arg) {
+ if (Arg == "auto")
+ return None;
+
+ int64_t Val;
+ if (Arg.getAsInteger(10, Val))
+ return createStringError(llvm::inconvertibleErrorCode(),
+ "Not an integer: %s", Arg.data());
+
+ // Negative integer effectively means no threshold
+ return Val < 0 ? 0 : Val;
+}
+
+// A simple CL parser for '*-remarks-hotness-threshold='
+class HotnessThresholdParser : public cl::parser<Optional<uint64_t>> {
+public:
+ HotnessThresholdParser(cl::Option &O) : cl::parser<Optional<uint64_t>>(O) {}
+
+ bool parse(cl::Option &O, StringRef ArgName, StringRef Arg,
+ Optional<uint64_t> &V) {
+ auto ResultOrErr = parseHotnessThresholdOption(Arg);
+ if (!ResultOrErr)
+ return O.error("Invalid argument '" + Arg +
+ "', only integer or 'auto' is supported.");
+
+ V = *ResultOrErr;
+ return false;
+ }
+};
+
+} // namespace remarks
+} // namespace llvm
+#endif // LLVM_REMARKS_HOTNESSTHRESHOLDPARSER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def
index 13b7cfc4b5cd..332fb555e824 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -51,6 +51,21 @@ AARCH64_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 |
AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM))
+AARCH64_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_FP |
+ AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+ AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 |
+ AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM))
+// For v8-R, we do not enable crypto and align with GCC that enables a more
+// minimal set of optional architecture extensions.
+AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r",
+ ARMBuildAttrs::CPUArch::v8_R, FK_CRYPTO_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_RDM | AArch64::AEK_SSBS |
+ AArch64::AEK_DOTPROD | AArch64::AEK_FP | AArch64::AEK_SIMD |
+ AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_RAS |
+ AArch64::AEK_RCPC | AArch64::AEK_SB))
#undef AARCH64_ARCH
#ifndef AARCH64_ARCH_EXT_NAME
@@ -91,6 +106,10 @@ AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm
AARCH64_ARCH_EXT_NAME("f32mm", AArch64::AEK_F32MM, "+f32mm", "-f32mm")
AARCH64_ARCH_EXT_NAME("f64mm", AArch64::AEK_F64MM, "+f64mm", "-f64mm")
AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme")
+AARCH64_ARCH_EXT_NAME("ls64", AArch64::AEK_LS64, "+ls64", "-ls64")
+AARCH64_ARCH_EXT_NAME("brbe", AArch64::AEK_BRBE, "+brbe", "-brbe")
+AARCH64_ARCH_EXT_NAME("pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth")
+AARCH64_ARCH_EXT_NAME("flagm", AArch64::AEK_FLAGM, "+flagm", "-flagm")
#undef AARCH64_ARCH_EXT_NAME
#ifndef AARCH64_CPU_NAME
@@ -130,6 +149,11 @@ AARCH64_CPU_NAME("cortex-a77", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
+ AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_LSE))
AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
@@ -140,6 +164,15 @@ AARCH64_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_BF16 | AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
+ AArch64::AEK_I8MM | AArch64::AEK_MTE | AArch64::AEK_RAS |
+ AArch64::AEK_RCPC | AArch64::AEK_SB | AArch64::AEK_SSBS |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM))
+AARCH64_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
+ AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 |
+ AArch64::AEK_DOTPROD ))
AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_NONE))
AARCH64_CPU_NAME("apple-a7", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -156,6 +189,8 @@ AARCH64_CPU_NAME("apple-a12", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16))
AARCH64_CPU_NAME("apple-a13", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML))
+AARCH64_CPU_NAME("apple-a14", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16 | AArch64::AEK_FP16FML))
AARCH64_CPU_NAME("apple-s4", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16))
AARCH64_CPU_NAME("apple-s5", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h
index b045e31bc92a..7c9e245e3889 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -62,6 +62,10 @@ enum ArchExtKind : uint64_t {
AEK_I8MM = 1 << 30,
AEK_F32MM = 1ULL << 31,
AEK_F64MM = 1ULL << 32,
+ AEK_LS64 = 1ULL << 33,
+ AEK_BRBE = 1ULL << 34,
+ AEK_PAUTH = 1ULL << 35,
+ AEK_FLAGM = 1ULL << 36,
};
enum class ArchKind {
@@ -104,7 +108,7 @@ const ArchKind ArchKinds[] = {
};
// FIXME: These should be moved to TargetTuple once it exists
-bool getExtensionFeatures(unsigned Extensions,
+bool getExtensionFeatures(uint64_t Extensions,
std::vector<StringRef> &Features);
bool getArchFeatures(ArchKind AK, std::vector<StringRef> &Features);
@@ -117,7 +121,7 @@ StringRef getArchExtFeature(StringRef ArchExt);
// Information by Name
unsigned getDefaultFPU(StringRef CPU, ArchKind AK);
-unsigned getDefaultExtensions(StringRef CPU, ArchKind AK);
+uint64_t getDefaultExtensions(StringRef CPU, ArchKind AK);
StringRef getDefaultCPU(StringRef Arch);
ArchKind getCPUArchKind(StringRef CPU);
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h b/contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h
index 920c97f7e112..eadc25870096 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -15,6 +15,7 @@
#ifndef LLVM_SUPPORT_AMDGPUMETADATA_H
#define LLVM_SUPPORT_AMDGPUMETADATA_H
+#include "llvm/ADT/StringRef.h"
#include <cstdint>
#include <string>
#include <system_error>
@@ -430,7 +431,7 @@ struct Metadata final {
};
/// Converts \p String to \p HSAMetadata.
-std::error_code fromString(std::string String, Metadata &HSAMetadata);
+std::error_code fromString(StringRef String, Metadata &HSAMetadata);
/// Converts \p HSAMetadata to \p String.
std::error_code toString(Metadata HSAMetadata, std::string &String);
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index d1c2147536a7..bd84da43dff7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -100,7 +100,7 @@ enum : int32_t {
#define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
- COMPUTE_PGM_RSRC2(ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, 0, 1),
+ COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
@@ -162,39 +162,49 @@ struct kernel_descriptor_t {
uint8_t reserved2[6];
};
+enum : uint32_t {
+ GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
+ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
+ RESERVED0_OFFSET = 8,
+ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
+ RESERVED1_OFFSET = 24,
+ COMPUTE_PGM_RSRC3_OFFSET = 44,
+ COMPUTE_PGM_RSRC1_OFFSET = 48,
+ COMPUTE_PGM_RSRC2_OFFSET = 52,
+ KERNEL_CODE_PROPERTIES_OFFSET = 56,
+ RESERVED2_OFFSET = 58,
+};
+
static_assert(
sizeof(kernel_descriptor_t) == 64,
"invalid size for kernel_descriptor_t");
-static_assert(
- offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0,
- "invalid offset for group_segment_fixed_size");
-static_assert(
- offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4,
- "invalid offset for private_segment_fixed_size");
-static_assert(
- offsetof(kernel_descriptor_t, reserved0) == 8,
- "invalid offset for reserved0");
-static_assert(
- offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16,
- "invalid offset for kernel_code_entry_byte_offset");
-static_assert(
- offsetof(kernel_descriptor_t, reserved1) == 24,
- "invalid offset for reserved1");
-static_assert(
- offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44,
- "invalid offset for compute_pgm_rsrc3");
-static_assert(
- offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48,
- "invalid offset for compute_pgm_rsrc1");
-static_assert(
- offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52,
- "invalid offset for compute_pgm_rsrc2");
-static_assert(
- offsetof(kernel_descriptor_t, kernel_code_properties) == 56,
- "invalid offset for kernel_code_properties");
-static_assert(
- offsetof(kernel_descriptor_t, reserved2) == 58,
- "invalid offset for reserved2");
+static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
+ GROUP_SEGMENT_FIXED_SIZE_OFFSET,
+ "invalid offset for group_segment_fixed_size");
+static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
+ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
+ "invalid offset for private_segment_fixed_size");
+static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
+ "invalid offset for reserved0");
+static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
+ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
+ "invalid offset for kernel_code_entry_byte_offset");
+static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
+ "invalid offset for reserved1");
+static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
+ COMPUTE_PGM_RSRC3_OFFSET,
+ "invalid offset for compute_pgm_rsrc3");
+static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
+ COMPUTE_PGM_RSRC1_OFFSET,
+ "invalid offset for compute_pgm_rsrc1");
+static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
+ COMPUTE_PGM_RSRC2_OFFSET,
+ "invalid offset for compute_pgm_rsrc2");
+static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
+ KERNEL_CODE_PROPERTIES_OFFSET,
+ "invalid offset for kernel_code_properties");
+static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET,
+ "invalid offset for reserved2");
} // end namespace amdhsa
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def b/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def
index 9f51c841e429..37cf0a93bb04 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def
@@ -118,6 +118,12 @@ ARM_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES |
ARM::AEK_I8MM))
+ARM_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
+ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+ ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES |
+ ARM::AEK_I8MM))
ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
FK_NEON_FP_ARMV8,
(ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -294,12 +300,19 @@ ARM_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("cortex-a77", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
-ARM_CPU_NAME("cortex-a78",ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ARM_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ ARM::AEK_FP16 | ARM::AEK_DOTPROD)
ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (ARM::AEK_BF16 | ARM::AEK_DOTPROD | ARM::AEK_I8MM | ARM::AEK_RAS |
+ ARM::AEK_SB))
+ARM_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_BF16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.h b/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.h
index 4e76b3c4b83e..7dd2abd29212 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.h
@@ -250,7 +250,8 @@ StringRef getSubArch(ArchKind AK);
StringRef getArchExtName(uint64_t ArchExtKind);
StringRef getArchExtFeature(StringRef ArchExt);
bool appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
- std::vector<StringRef> &Features);
+ std::vector<StringRef> &Features,
+ unsigned &ArgFPUKind);
StringRef getHWDivName(uint64_t HWDivKind);
// Information by Name
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ARMWinEH.h b/contrib/llvm-project/llvm/include/llvm/Support/ARMWinEH.h
index 857a0d3814a8..327aa9804849 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/ARMWinEH.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/ARMWinEH.h
@@ -31,6 +31,9 @@ enum class ReturnType {
/// RuntimeFunction - An entry in the table of procedure data (.pdata)
///
+/// This is ARM specific, but the Function Start RVA, Flag and
+/// ExceptionInformationRVA fields work identically for ARM64.
+///
/// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
/// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
/// +---------------------------------------------------------------+
@@ -204,6 +207,85 @@ inline uint16_t StackAdjustment(const RuntimeFunction &RF) {
/// purpose (r0-r15) and VFP (d0-d31) registers.
std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
+/// RuntimeFunctionARM64 - An entry in the table of procedure data (.pdata)
+///
+/// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
+/// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+/// +---------------------------------------------------------------+
+/// | Function Start RVA |
+/// +-----------------+---+-+-------+-----+---------------------+---+
+/// | Frame Size |CR |H| RegI |RegF | Function Length |Flg|
+/// +-----------------+---+-+-------+-----+---------------------+---+
+///
+/// See https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+/// for the full reference for this struct.
+
+class RuntimeFunctionARM64 {
+public:
+ const support::ulittle32_t BeginAddress;
+ const support::ulittle32_t UnwindData;
+
+ RuntimeFunctionARM64(const support::ulittle32_t *Data)
+ : BeginAddress(Data[0]), UnwindData(Data[1]) {}
+
+ RuntimeFunctionARM64(const support::ulittle32_t BeginAddress,
+ const support::ulittle32_t UnwindData)
+ : BeginAddress(BeginAddress), UnwindData(UnwindData) {}
+
+ RuntimeFunctionFlag Flag() const {
+ return RuntimeFunctionFlag(UnwindData & 0x3);
+ }
+
+ uint32_t ExceptionInformationRVA() const {
+ assert(Flag() == RuntimeFunctionFlag::RFF_Unpacked &&
+ "unpacked form required for this operation");
+ return (UnwindData & ~0x3);
+ }
+
+ uint32_t PackedUnwindData() const {
+ assert((Flag() == RuntimeFunctionFlag::RFF_Packed ||
+ Flag() == RuntimeFunctionFlag::RFF_PackedFragment) &&
+ "packed form required for this operation");
+ return (UnwindData & ~0x3);
+ }
+ uint32_t FunctionLength() const {
+ assert((Flag() == RuntimeFunctionFlag::RFF_Packed ||
+ Flag() == RuntimeFunctionFlag::RFF_PackedFragment) &&
+ "packed form required for this operation");
+ return (((UnwindData & 0x00001ffc) >> 2) << 2);
+ }
+ uint8_t RegF() const {
+ assert((Flag() == RuntimeFunctionFlag::RFF_Packed ||
+ Flag() == RuntimeFunctionFlag::RFF_PackedFragment) &&
+ "packed form required for this operation");
+ return ((UnwindData & 0x0000e000) >> 13);
+ }
+ uint8_t RegI() const {
+ assert((Flag() == RuntimeFunctionFlag::RFF_Packed ||
+ Flag() == RuntimeFunctionFlag::RFF_PackedFragment) &&
+ "packed form required for this operation");
+ return ((UnwindData & 0x000f0000) >> 16);
+ }
+ bool H() const {
+ assert((Flag() == RuntimeFunctionFlag::RFF_Packed ||
+ Flag() == RuntimeFunctionFlag::RFF_PackedFragment) &&
+ "packed form required for this operation");
+ return ((UnwindData & 0x00100000) >> 20);
+ }
+ uint8_t CR() const {
+ assert((Flag() == RuntimeFunctionFlag::RFF_Packed ||
+ Flag() == RuntimeFunctionFlag::RFF_PackedFragment) &&
+ "packed form required for this operation");
+ return ((UnwindData & 0x600000) >> 21);
+ }
+ uint16_t FrameSize() const {
+ assert((Flag() == RuntimeFunctionFlag::RFF_Packed ||
+ Flag() == RuntimeFunctionFlag::RFF_PackedFragment) &&
+ "packed form required for this operation");
+ return ((UnwindData & 0xff800000) >> 23);
+ }
+};
+
/// ExceptionDataRecord - An entry in the table of exception data (.xdata)
///
/// The format on ARM is:
@@ -416,12 +498,13 @@ struct ExceptionDataRecord {
uint32_t ExceptionHandlerRVA() const {
assert(X() && "Exception Handler RVA is only valid if the X bit is set");
- return Data[HeaderWords(*this) + EpilogueCount() + CodeWords()];
+ return Data[HeaderWords(*this) + (E() ? 0 : EpilogueCount()) + CodeWords()];
}
uint32_t ExceptionHandlerParameter() const {
assert(X() && "Exception Handler RVA is only valid if the X bit is set");
- return Data[HeaderWords(*this) + EpilogueCount() + CodeWords() + 1];
+ return Data[HeaderWords(*this) + (E() ? 0 : EpilogueCount()) + CodeWords() +
+ 1];
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AlignOf.h b/contrib/llvm-project/llvm/include/llvm/Support/AlignOf.h
index eb42542b777f..f586d7f182aa 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AlignOf.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AlignOf.h
@@ -13,41 +13,20 @@
#ifndef LLVM_SUPPORT_ALIGNOF_H
#define LLVM_SUPPORT_ALIGNOF_H
-#include "llvm/Support/Compiler.h"
-#include <cstddef>
+#include <type_traits>
namespace llvm {
-namespace detail {
-
-template <typename T, typename... Ts> class AlignerImpl {
- T t;
- AlignerImpl<Ts...> rest;
- AlignerImpl() = delete;
-};
-
-template <typename T> class AlignerImpl<T> {
- T t;
- AlignerImpl() = delete;
-};
-
-template <typename T, typename... Ts> union SizerImpl {
- char arr[sizeof(T)];
- SizerImpl<Ts...> rest;
-};
-
-template <typename T> union SizerImpl<T> { char arr[sizeof(T)]; };
-} // end namespace detail
-
/// A suitably aligned and sized character array member which can hold elements
/// of any type.
///
-/// These types may be arrays, structs, or any other types. This exposes a
-/// `buffer` member which can be used as suitable storage for a placement new of
-/// any of these types.
+/// This template is equivalent to std::aligned_union_t<1, ...>, but we cannot
+/// use it due to a bug in the MSVC x86 compiler:
+/// https://github.com/microsoft/STL/issues/1533
+/// Using `alignas` here works around the bug.
template <typename T, typename... Ts> struct AlignedCharArrayUnion {
- alignas(::llvm::detail::AlignerImpl<T, Ts...>) char buffer[sizeof(
- llvm::detail::SizerImpl<T, Ts...>)];
+ using AlignedUnion = std::aligned_union_t<1, T, Ts...>;
+ alignas(alignof(AlignedUnion)) char buffer[sizeof(AlignedUnion)];
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Allocator.h b/contrib/llvm-project/llvm/include/llvm/Support/Allocator.h
index 40c967ccc485..245432debce6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Allocator.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Allocator.h
@@ -66,7 +66,8 @@ template <typename AllocatorT = MallocAllocator, size_t SlabSize = 4096,
size_t SizeThreshold = SlabSize, size_t GrowthDelay = 128>
class BumpPtrAllocatorImpl
: public AllocatorBase<BumpPtrAllocatorImpl<AllocatorT, SlabSize,
- SizeThreshold, GrowthDelay>> {
+ SizeThreshold, GrowthDelay>>,
+ private AllocatorT {
public:
static_assert(SizeThreshold <= SlabSize,
"The SizeThreshold must be at most the SlabSize to ensure "
@@ -80,15 +81,15 @@ public:
template <typename T>
BumpPtrAllocatorImpl(T &&Allocator)
- : Allocator(std::forward<T &&>(Allocator)) {}
+ : AllocatorT(std::forward<T &&>(Allocator)) {}
// Manually implement a move constructor as we must clear the old allocator's
// slabs as a matter of correctness.
BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old)
- : CurPtr(Old.CurPtr), End(Old.End), Slabs(std::move(Old.Slabs)),
+ : AllocatorT(static_cast<AllocatorT &&>(Old)), CurPtr(Old.CurPtr),
+ End(Old.End), Slabs(std::move(Old.Slabs)),
CustomSizedSlabs(std::move(Old.CustomSizedSlabs)),
- BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize),
- Allocator(std::move(Old.Allocator)) {
+ BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize) {
Old.CurPtr = Old.End = nullptr;
Old.BytesAllocated = 0;
Old.Slabs.clear();
@@ -110,7 +111,7 @@ public:
RedZoneSize = RHS.RedZoneSize;
Slabs = std::move(RHS.Slabs);
CustomSizedSlabs = std::move(RHS.CustomSizedSlabs);
- Allocator = std::move(RHS.Allocator);
+ AllocatorT::operator=(static_cast<AllocatorT &&>(RHS));
RHS.CurPtr = RHS.End = nullptr;
RHS.BytesAllocated = 0;
@@ -170,7 +171,8 @@ public:
// If Size is really big, allocate a separate slab for it.
size_t PaddedSize = SizeToAllocate + Alignment.value() - 1;
if (PaddedSize > SizeThreshold) {
- void *NewSlab = Allocator.Allocate(PaddedSize, alignof(std::max_align_t));
+ void *NewSlab =
+ AllocatorT::Allocate(PaddedSize, alignof(std::max_align_t));
// We own the new slab and don't want anyone reading anyting other than
// pieces returned from this method. So poison the whole slab.
__asan_poison_memory_region(NewSlab, PaddedSize);
@@ -315,9 +317,6 @@ private:
/// a sanitizer.
size_t RedZoneSize = 1;
- /// The allocator instance we use to get slabs of memory.
- AllocatorT Allocator;
-
static size_t computeSlabSize(unsigned SlabIdx) {
// Scale the actual allocated slab size based on the number of slabs
// allocated. Every GrowthDelay slabs allocated, we double
@@ -333,7 +332,7 @@ private:
size_t AllocatedSlabSize = computeSlabSize(Slabs.size());
void *NewSlab =
- Allocator.Allocate(AllocatedSlabSize, alignof(std::max_align_t));
+ AllocatorT::Allocate(AllocatedSlabSize, alignof(std::max_align_t));
// We own the new slab and don't want anyone reading anything other than
// pieces returned from this method. So poison the whole slab.
__asan_poison_memory_region(NewSlab, AllocatedSlabSize);
@@ -349,7 +348,7 @@ private:
for (; I != E; ++I) {
size_t AllocatedSlabSize =
computeSlabSize(std::distance(Slabs.begin(), I));
- Allocator.Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t));
+ AllocatorT::Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t));
}
}
@@ -358,7 +357,7 @@ private:
for (auto &PtrAndSize : CustomSizedSlabs) {
void *Ptr = PtrAndSize.first;
size_t Size = PtrAndSize.second;
- Allocator.Deallocate(Ptr, Size, alignof(std::max_align_t));
+ AllocatorT::Deallocate(Ptr, Size, alignof(std::max_align_t));
}
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AtomicOrdering.h b/contrib/llvm-project/llvm/include/llvm/Support/AtomicOrdering.h
index a8d89955fa2b..27ca825cef46 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AtomicOrdering.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AtomicOrdering.h
@@ -21,7 +21,7 @@
namespace llvm {
-/// Atomic ordering for C11 / C++11's memody models.
+/// Atomic ordering for C11 / C++11's memory models.
///
/// These values cannot change because they are shared with standard library
/// implementations as well as with other compilers.
@@ -87,7 +87,7 @@ inline const char *toIRString(AtomicOrdering ao) {
/// Returns true if ao is stronger than other as defined by the AtomicOrdering
/// lattice, which is based on C++'s definition.
-inline bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other) {
+inline bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other) {
static const bool lookup[8][8] = {
// NA UN RX CO AC RE AR SC
/* NotAtomic */ {false, false, false, false, false, false, false, false},
@@ -99,10 +99,10 @@ inline bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other) {
/* acq_rel */ { true, true, true, true, true, true, false, false},
/* seq_cst */ { true, true, true, true, true, true, true, false},
};
- return lookup[static_cast<size_t>(ao)][static_cast<size_t>(other)];
+ return lookup[static_cast<size_t>(AO)][static_cast<size_t>(Other)];
}
-inline bool isAtLeastOrStrongerThan(AtomicOrdering ao, AtomicOrdering other) {
+inline bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other) {
static const bool lookup[8][8] = {
// NA UN RX CO AC RE AR SC
/* NotAtomic */ { true, false, false, false, false, false, false, false},
@@ -114,26 +114,26 @@ inline bool isAtLeastOrStrongerThan(AtomicOrdering ao, AtomicOrdering other) {
/* acq_rel */ { true, true, true, true, true, true, true, false},
/* seq_cst */ { true, true, true, true, true, true, true, true},
};
- return lookup[static_cast<size_t>(ao)][static_cast<size_t>(other)];
+ return lookup[static_cast<size_t>(AO)][static_cast<size_t>(Other)];
}
-inline bool isStrongerThanUnordered(AtomicOrdering ao) {
- return isStrongerThan(ao, AtomicOrdering::Unordered);
+inline bool isStrongerThanUnordered(AtomicOrdering AO) {
+ return isStrongerThan(AO, AtomicOrdering::Unordered);
}
-inline bool isStrongerThanMonotonic(AtomicOrdering ao) {
- return isStrongerThan(ao, AtomicOrdering::Monotonic);
+inline bool isStrongerThanMonotonic(AtomicOrdering AO) {
+ return isStrongerThan(AO, AtomicOrdering::Monotonic);
}
-inline bool isAcquireOrStronger(AtomicOrdering ao) {
- return isAtLeastOrStrongerThan(ao, AtomicOrdering::Acquire);
+inline bool isAcquireOrStronger(AtomicOrdering AO) {
+ return isAtLeastOrStrongerThan(AO, AtomicOrdering::Acquire);
}
-inline bool isReleaseOrStronger(AtomicOrdering ao) {
- return isAtLeastOrStrongerThan(ao, AtomicOrdering::Release);
+inline bool isReleaseOrStronger(AtomicOrdering AO) {
+ return isAtLeastOrStrongerThan(AO, AtomicOrdering::Release);
}
-inline AtomicOrderingCABI toCABI(AtomicOrdering ao) {
+inline AtomicOrderingCABI toCABI(AtomicOrdering AO) {
static const AtomicOrderingCABI lookup[8] = {
/* NotAtomic */ AtomicOrderingCABI::relaxed,
/* Unordered */ AtomicOrderingCABI::relaxed,
@@ -144,7 +144,7 @@ inline AtomicOrderingCABI toCABI(AtomicOrdering ao) {
/* acq_rel */ AtomicOrderingCABI::acq_rel,
/* seq_cst */ AtomicOrderingCABI::seq_cst,
};
- return lookup[static_cast<size_t>(ao)];
+ return lookup[static_cast<size_t>(AO)];
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/BinaryItemStream.h b/contrib/llvm-project/llvm/include/llvm/Support/BinaryItemStream.h
index 4cd66adcc01a..4d27013ce368 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/BinaryItemStream.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/BinaryItemStream.h
@@ -88,8 +88,7 @@ private:
if (Offset >= getLength())
return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
++Offset;
- auto Iter =
- std::lower_bound(ItemEndOffsets.begin(), ItemEndOffsets.end(), Offset);
+ auto Iter = llvm::lower_bound(ItemEndOffsets, Offset);
size_t Idx = std::distance(ItemEndOffsets.begin(), Iter);
assert(Idx < Items.size() && "binary search for offset failed");
return Idx;
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h b/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h
index 5375d6a3a761..ba4c3873586d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h
@@ -121,12 +121,12 @@ public:
bool valid() const { return BorrowedImpl != nullptr; }
- bool operator==(const RefType &Other) const {
- if (BorrowedImpl != Other.BorrowedImpl)
+ friend bool operator==(const RefType &LHS, const RefType &RHS) {
+ if (LHS.BorrowedImpl != RHS.BorrowedImpl)
return false;
- if (ViewOffset != Other.ViewOffset)
+ if (LHS.ViewOffset != RHS.ViewOffset)
return false;
- if (Length != Other.Length)
+ if (LHS.Length != RHS.Length)
return false;
return true;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/CFGDiff.h b/contrib/llvm-project/llvm/include/llvm/Support/CFGDiff.h
index 94734ce70e02..c90b9aca78b5 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/CFGDiff.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/CFGDiff.h
@@ -30,67 +30,43 @@
// a non-inversed graph, the children are naturally the successors when
// InverseEdge is false and the predecessors when InverseEdge is true.
-// We define two base clases that call into GraphDiff, one for successors
-// (CFGSuccessors), where InverseEdge is false, and one for predecessors
-// (CFGPredecessors), where InverseEdge is true.
-// FIXME: Further refactoring may merge the two base classes into a single one
-// templated / parametrized on using succ_iterator/pred_iterator and false/true
-// for the InverseEdge.
-
-// CFGViewChildren and CFGViewPredecessors, both can be parametrized to
-// consider the graph inverted or not (i.e. InverseGraph). Successors
-// implicitly has InverseEdge = false and Predecessors implicitly has
-// InverseEdge = true (see calls to GraphDiff methods in there). The GraphTraits
-// instantiations that follow define the value of InverseGraph.
-
-// GraphTraits instantiations:
-// - GraphDiff<BasicBlock *> is equivalent to InverseGraph = false
-// - GraphDiff<Inverse<BasicBlock *>> is equivalent to InverseGraph = true
-// - second pair item is BasicBlock *, then InverseEdge = false (so it inherits
-// from CFGViewChildren).
-// - second pair item is Inverse<BasicBlock *>, then InverseEdge = true (so it
-// inherits from CFGViewPredecessors).
-
-// The 4 GraphTraits are as follows:
-// 1. std::pair<const GraphDiff<BasicBlock *> *, BasicBlock *>> :
-// CFGViewChildren<false>
-// Regular CFG, children means successors, InverseGraph = false,
-// InverseEdge = false.
-// 2. std::pair<const GraphDiff<Inverse<BasicBlock *>> *, BasicBlock *>> :
-// CFGViewChildren<true>
-// Reverse the graph, get successors but reverse-apply updates,
-// InverseGraph = true, InverseEdge = false.
-// 3. std::pair<const GraphDiff<BasicBlock *> *, Inverse<BasicBlock *>>> :
-// CFGViewPredecessors<false>
-// Regular CFG, reverse edges, so children mean predecessors,
-// InverseGraph = false, InverseEdge = true.
-// 4. std::pair<const GraphDiff<Inverse<BasicBlock *>> *, Inverse<BasicBlock *>>
-// : CFGViewPredecessors<true>
-// Reverse the graph and the edges, InverseGraph = true, InverseEdge = true.
-
namespace llvm {
-// GraphDiff defines a CFG snapshot: given a set of Update<NodePtr>, provide
-// utilities to skip edges marked as deleted and return a set of edges marked as
-// newly inserted. The current diff treats the CFG as a graph rather than a
+namespace detail {
+template <typename Range>
+auto reverse_if_helper(Range &&R, std::integral_constant<bool, false>) {
+ return std::forward<Range>(R);
+}
+
+template <typename Range>
+auto reverse_if_helper(Range &&R, std::integral_constant<bool, true>) {
+ return llvm::reverse(std::forward<Range>(R));
+}
+
+template <bool B, typename Range> auto reverse_if(Range &&R) {
+ return reverse_if_helper(std::forward<Range>(R),
+ std::integral_constant<bool, B>{});
+}
+} // namespace detail
+
+// GraphDiff defines a CFG snapshot: given a set of Update<NodePtr>, provides
+// a getChildren method to get a Node's children based on the additional updates
+// in the snapshot. The current diff treats the CFG as a graph rather than a
// multigraph. Added edges are pruned to be unique, and deleted edges will
// remove all existing edges between two blocks.
template <typename NodePtr, bool InverseGraph = false> class GraphDiff {
- using UpdateMapType = SmallDenseMap<NodePtr, SmallVector<NodePtr, 2>>;
- struct EdgesInsertedDeleted {
- UpdateMapType Succ;
- UpdateMapType Pred;
+ struct DeletesInserts {
+ SmallVector<NodePtr, 2> DI[2];
};
- // Store Deleted edges on position 0, and Inserted edges on position 1.
- EdgesInsertedDeleted Edges[2];
+ using UpdateMapType = SmallDenseMap<NodePtr, DeletesInserts>;
+ UpdateMapType Succ;
+ UpdateMapType Pred;
+
// By default, it is assumed that, given a CFG and a set of updates, we wish
// to apply these updates as given. If UpdatedAreReverseApplied is set, the
// updates will be applied in reverse: deleted edges are considered re-added
// and inserted edges are considered deleted when returning children.
bool UpdatedAreReverseApplied;
- // Using a singleton empty vector for all node requests with no
- // children.
- SmallVector<NodePtr, 0> Empty;
// Keep the list of legalized updates for a deterministic order of updates
// when using a GraphDiff for incremental updates in the DominatorTree.
@@ -98,14 +74,19 @@ template <typename NodePtr, bool InverseGraph = false> class GraphDiff {
SmallVector<cfg::Update<NodePtr>, 4> LegalizedUpdates;
void printMap(raw_ostream &OS, const UpdateMapType &M) const {
- for (auto Pair : M)
- for (auto Child : Pair.second) {
- OS << "(";
- Pair.first->printAsOperand(OS, false);
- OS << ", ";
- Child->printAsOperand(OS, false);
- OS << ") ";
+ StringRef DIText[2] = {"Delete", "Insert"};
+ for (auto Pair : M) {
+ for (unsigned IsInsert = 0; IsInsert <= 1; ++IsInsert) {
+ OS << DIText[IsInsert] << " edges: \n";
+ for (auto Child : Pair.second.DI[IsInsert]) {
+ OS << "(";
+ Pair.first->printAsOperand(OS, false);
+ OS << ", ";
+ Child->printAsOperand(OS, false);
+ OS << ") ";
+ }
}
+ }
OS << "\n";
}
@@ -113,15 +94,12 @@ public:
GraphDiff() : UpdatedAreReverseApplied(false) {}
GraphDiff(ArrayRef<cfg::Update<NodePtr>> Updates,
bool ReverseApplyUpdates = false) {
- cfg::LegalizeUpdates<NodePtr>(Updates, LegalizedUpdates, InverseGraph,
- /*ReverseResultOrder=*/true);
- // The legalized updates are stored in reverse so we can pop_back when doing
- // incremental updates.
+ cfg::LegalizeUpdates<NodePtr>(Updates, LegalizedUpdates, InverseGraph);
for (auto U : LegalizedUpdates) {
unsigned IsInsert =
(U.getKind() == cfg::UpdateKind::Insert) == !ReverseApplyUpdates;
- Edges[IsInsert].Succ[U.getFrom()].push_back(U.getTo());
- Edges[IsInsert].Pred[U.getTo()].push_back(U.getFrom());
+ Succ[U.getFrom()].DI[IsInsert].push_back(U.getTo());
+ Pred[U.getTo()].DI[IsInsert].push_back(U.getFrom());
}
UpdatedAreReverseApplied = ReverseApplyUpdates;
}
@@ -137,55 +115,56 @@ public:
auto U = LegalizedUpdates.pop_back_val();
unsigned IsInsert =
(U.getKind() == cfg::UpdateKind::Insert) == !UpdatedAreReverseApplied;
- auto &SuccList = Edges[IsInsert].Succ[U.getFrom()];
+ auto &SuccDIList = Succ[U.getFrom()];
+ auto &SuccList = SuccDIList.DI[IsInsert];
assert(SuccList.back() == U.getTo());
SuccList.pop_back();
- if (SuccList.empty())
- Edges[IsInsert].Succ.erase(U.getFrom());
+ if (SuccList.empty() && SuccDIList.DI[!IsInsert].empty())
+ Succ.erase(U.getFrom());
- auto &PredList = Edges[IsInsert].Pred[U.getTo()];
+ auto &PredDIList = Pred[U.getTo()];
+ auto &PredList = PredDIList.DI[IsInsert];
assert(PredList.back() == U.getFrom());
PredList.pop_back();
- if (PredList.empty())
- Edges[IsInsert].Pred.erase(U.getTo());
+ if (PredList.empty() && PredDIList.DI[!IsInsert].empty())
+ Pred.erase(U.getTo());
return U;
}
- bool ignoreChild(const NodePtr BB, NodePtr EdgeEnd, bool InverseEdge) const {
- // Used to filter nullptr in clang.
- if (EdgeEnd == nullptr)
- return true;
- auto &DeleteChildren =
- (InverseEdge != InverseGraph) ? Edges[0].Pred : Edges[0].Succ;
- auto It = DeleteChildren.find(BB);
- if (It == DeleteChildren.end())
- return false;
- auto &EdgesForBB = It->second;
- return llvm::find(EdgesForBB, EdgeEnd) != EdgesForBB.end();
- }
+ using VectRet = SmallVector<NodePtr, 8>;
+ template <bool InverseEdge> VectRet getChildren(NodePtr N) const {
+ using DirectedNodeT =
+ std::conditional_t<InverseEdge, Inverse<NodePtr>, NodePtr>;
+ auto R = children<DirectedNodeT>(N);
+ VectRet Res = VectRet(detail::reverse_if<!InverseEdge>(R));
+
+ // Remove nullptr children for clang.
+ llvm::erase_value(Res, nullptr);
+
+ auto &Children = (InverseEdge != InverseGraph) ? Pred : Succ;
+ auto It = Children.find(N);
+ if (It == Children.end())
+ return Res;
- iterator_range<typename SmallVectorImpl<NodePtr>::const_iterator>
- getAddedChildren(const NodePtr BB, bool InverseEdge) const {
- auto &InsertChildren =
- (InverseEdge != InverseGraph) ? Edges[1].Pred : Edges[1].Succ;
- auto It = InsertChildren.find(BB);
- if (It == InsertChildren.end())
- return make_range(Empty.begin(), Empty.end());
- return make_range(It->second.begin(), It->second.end());
+ // Remove children present in the CFG but not in the snapshot.
+ for (auto *Child : It->second.DI[0])
+ llvm::erase_value(Res, Child);
+
+ // Add children present in the snapshot for not in the real CFG.
+ auto &AddedChildren = It->second.DI[1];
+ llvm::append_range(Res, AddedChildren);
+
+ return Res;
}
void print(raw_ostream &OS) const {
OS << "===== GraphDiff: CFG edge changes to create a CFG snapshot. \n"
"===== (Note: notion of children/inverse_children depends on "
"the direction of edges and the graph.)\n";
- OS << "Children to insert:\n\t";
- printMap(OS, Edges[1].Succ);
- OS << "Children to delete:\n\t";
- printMap(OS, Edges[0].Succ);
- OS << "Inverse_children to insert:\n\t";
- printMap(OS, Edges[1].Pred);
- OS << "Inverse_children to delete:\n\t";
- printMap(OS, Edges[0].Pred);
+ OS << "Children to delete/insert:\n\t";
+ printMap(OS, Succ);
+ OS << "Inverse_children to delete/insert:\n\t";
+ printMap(OS, Pred);
OS << "\n";
}
@@ -193,58 +172,6 @@ public:
LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
#endif
};
-
-template <typename GraphT, bool InverseGraph = false, bool InverseEdge = false,
- typename GT = GraphTraits<GraphT>>
-struct CFGViewChildren {
- using DataRef = const GraphDiff<typename GT::NodeRef, InverseGraph> *;
- using NodeRef = std::pair<DataRef, typename GT::NodeRef>;
-
- template<typename Range>
- static auto makeChildRange(Range &&R, DataRef DR) {
- using Iter = WrappedPairNodeDataIterator<decltype(std::forward<Range>(R).begin()), NodeRef, DataRef>;
- return make_range(Iter(R.begin(), DR), Iter(R.end(), DR));
- }
-
- static auto children(NodeRef N) {
-
- // filter iterator init:
- auto R = make_range(GT::child_begin(N.second), GT::child_end(N.second));
- // This lambda is copied into the iterators and persists to callers, ensure
- // captures are by value or otherwise have sufficient lifetime.
- auto First = make_filter_range(makeChildRange(R, N.first), [N](NodeRef C) {
- return !C.first->ignoreChild(N.second, C.second, InverseEdge);
- });
-
- // new inserts iterator init:
- auto InsertVec = N.first->getAddedChildren(N.second, InverseEdge);
- auto Second = makeChildRange(InsertVec, N.first);
-
- auto CR = concat<NodeRef>(First, Second);
-
- // concat_range contains references to other ranges, returning it would
- // leave those references dangling - the iterators contain
- // other iterators by value so they're safe to return.
- return make_range(CR.begin(), CR.end());
- }
-
- static auto child_begin(NodeRef N) {
- return children(N).begin();
- }
-
- static auto child_end(NodeRef N) {
- return children(N).end();
- }
-
- using ChildIteratorType = decltype(child_end(std::declval<NodeRef>()));
-};
-
-template <typename T, bool B>
-struct GraphTraits<std::pair<const GraphDiff<T, B> *, T>>
- : CFGViewChildren<T, B> {};
-template <typename T, bool B>
-struct GraphTraits<std::pair<const GraphDiff<T, B> *, Inverse<T>>>
- : CFGViewChildren<Inverse<T>, B, true> {};
} // end namespace llvm
#endif // LLVM_SUPPORT_CFGDIFF_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/CFGUpdate.h b/contrib/llvm-project/llvm/include/llvm/Support/CFGUpdate.h
index af4cd6ed1f1d..3a12b9d86c18 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/CFGUpdate.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/CFGUpdate.h
@@ -14,7 +14,6 @@
#ifndef LLVM_SUPPORT_CFGUPDATE_H
#define LLVM_SUPPORT_CFGUPDATE_H
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/Support/Compiler.h"
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/CheckedArithmetic.h b/contrib/llvm-project/llvm/include/llvm/Support/CheckedArithmetic.h
index 035e4533322c..09e6d7ec95dc 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/CheckedArithmetic.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/CheckedArithmetic.h
@@ -28,8 +28,8 @@ template <typename T, typename F>
std::enable_if_t<std::is_integral<T>::value && sizeof(T) * 8 <= 64,
llvm::Optional<T>>
checkedOp(T LHS, T RHS, F Op, bool Signed = true) {
- llvm::APInt ALHS(/*BitSize=*/sizeof(T) * 8, LHS, Signed);
- llvm::APInt ARHS(/*BitSize=*/sizeof(T) * 8, RHS, Signed);
+ llvm::APInt ALHS(sizeof(T) * 8, LHS, Signed);
+ llvm::APInt ARHS(sizeof(T) * 8, RHS, Signed);
bool Overflow;
llvm::APInt Out = (ALHS.*Op)(ARHS, Overflow);
if (Overflow)
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h b/contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h
index 466945e40a9c..0706aa226c0e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h
@@ -71,13 +71,6 @@ bool ParseCommandLineOptions(int argc, const char *const *argv,
const char *EnvVar = nullptr,
bool LongOptionsUseDoubleDash = false);
-//===----------------------------------------------------------------------===//
-// ParseEnvironmentOptions - Environment variable option processing alternate
-// entry point.
-//
-void ParseEnvironmentOptions(const char *progName, const char *envvar,
- const char *Overview = "");
-
// Function pointer type for printing version information.
using VersionPrinterTy = std::function<void(raw_ostream &)>;
@@ -376,9 +369,22 @@ public:
virtual void setDefault() = 0;
+ // Prints the help string for an option.
+ //
+ // This maintains the Indent for multi-line descriptions.
+ // FirstLineIndentedBy is the count of chars of the first line
+ // i.e. the one containing the --<option name>.
static void printHelpStr(StringRef HelpStr, size_t Indent,
size_t FirstLineIndentedBy);
+ // Prints the help string for an enum value.
+ //
+ // This maintains the Indent for multi-line descriptions.
+ // FirstLineIndentedBy is the count of chars of the first line
+ // i.e. the one containing the =<value>.
+ static void printEnumValHelpStr(StringRef HelpStr, size_t Indent,
+ size_t FirstLineIndentedBy);
+
virtual void getExtraOptionNames(SmallVectorImpl<StringRef> &) {}
// addOccurrence - Wrapper around handleOccurrence that enforces Flags.
@@ -679,7 +685,7 @@ public:
: Values(Options) {}
template <class Opt> void apply(Opt &O) const {
- for (auto Value : Values)
+ for (const auto &Value : Values)
O.getParser().addLiteralOption(Value.Name, Value.Value,
Value.Description);
}
@@ -1488,7 +1494,7 @@ public:
template <class... Mods>
explicit opt(const Mods &... Ms)
- : Option(Optional, NotHidden), Parser(*this) {
+ : Option(llvm::cl::Optional, NotHidden), Parser(*this) {
apply(this, Ms...);
done();
}
@@ -2092,6 +2098,14 @@ bool ExpandResponseFiles(
llvm::vfs::FileSystem &FS = *llvm::vfs::getRealFileSystem(),
llvm::Optional<llvm::StringRef> CurrentDir = llvm::None);
+/// A convenience helper which concatenates the options specified by the
+/// environment variable EnvVar and command line options, then expands response
+/// files recursively. The tokenizer is a predefined GNU or Windows one.
+/// \return true if all @files were expanded successfully or there were none.
+bool expandResponseFiles(int Argc, const char *const *Argv, const char *EnvVar,
+ StringSaver &Saver,
+ SmallVectorImpl<const char *> &NewArgv);
+
/// Mark all options not part of this category as cl::ReallyHidden.
///
/// \param Category the category of options to keep displaying
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Compiler.h b/contrib/llvm-project/llvm/include/llvm/Support/Compiler.h
index 80ea76240d6c..9348ada91325 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Compiler.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Compiler.h
@@ -146,7 +146,7 @@
/// LLVM_NODISCARD - Warn if a type or return value is discarded.
// Use the 'nodiscard' attribute in C++17 or newer mode.
-#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(nodiscard)
+#if defined(__cplusplus) && __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(nodiscard)
#define LLVM_NODISCARD [[nodiscard]]
#elif LLVM_HAS_CPP_ATTRIBUTE(clang::warn_unused_result)
#define LLVM_NODISCARD [[clang::warn_unused_result]]
@@ -234,11 +234,11 @@
/// 3.4 supported this but is buggy in various cases and produces unimplemented
/// errors, just use it in GCC 4.0 and later.
#if __has_attribute(always_inline) || LLVM_GNUC_PREREQ(4, 0, 0)
-#define LLVM_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline))
#elif defined(_MSC_VER)
#define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline
#else
-#define LLVM_ATTRIBUTE_ALWAYS_INLINE
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline
#endif
#ifdef __GNUC__
@@ -268,7 +268,7 @@
#endif
/// LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
-#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(fallthrough)
+#if defined(__cplusplus) && __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(fallthrough)
#define LLVM_FALLTHROUGH [[fallthrough]]
#elif LLVM_HAS_CPP_ATTRIBUTE(gnu::fallthrough)
#define LLVM_FALLTHROUGH [[gnu::fallthrough]]
@@ -314,19 +314,9 @@
#endif
// LLVM_ATTRIBUTE_DEPRECATED(decl, "message")
-#if __has_feature(attribute_deprecated_with_message)
-# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
- decl __attribute__((deprecated(message)))
-#elif defined(__GNUC__)
-# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
- decl __attribute__((deprecated))
-#elif defined(_MSC_VER)
-# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
- __declspec(deprecated(message)) decl
-#else
-# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
- decl
-#endif
+// This macro will be removed.
+// Use C++14's attribute instead: [[deprecated("message")]]
+#define LLVM_ATTRIBUTE_DEPRECATED(decl, message) [[deprecated(message)]] decl
/// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
/// to an expression which states that it is undefined behavior for the
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h b/contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h
index 61a1bd405a4d..f756635ee1f9 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -44,11 +44,11 @@ class CrashRecoveryContextCleanup;
/// executed in any case, whether crash occurs or not. These actions may be used
/// to reclaim resources in the case of crash.
class CrashRecoveryContext {
- void *Impl;
- CrashRecoveryContextCleanup *head;
+ void *Impl = nullptr;
+ CrashRecoveryContextCleanup *head = nullptr;
public:
- CrashRecoveryContext() : Impl(nullptr), head(nullptr) {}
+ CrashRecoveryContext();
~CrashRecoveryContext();
/// Register cleanup handler, which is used when the recovery context is
@@ -102,6 +102,10 @@ public:
LLVM_ATTRIBUTE_NORETURN
void HandleExit(int RetCode);
+ /// Throw again a signal or an exception, after it was catched once by a
+ /// CrashRecoveryContext.
+ static bool throwIfCrash(int RetCode);
+
/// In case of a crash, this is the crash identifier.
int RetCode = 0;
@@ -181,7 +185,7 @@ public:
: CrashRecoveryContextCleanupBase<
CrashRecoveryContextDestructorCleanup<T>, T>(context, resource) {}
- virtual void recoverResources() {
+ void recoverResources() override {
this->resource->~T();
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/DOTGraphTraits.h b/contrib/llvm-project/llvm/include/llvm/Support/DOTGraphTraits.h
index ec01b7d9576a..a73538fa1462 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/DOTGraphTraits.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/DOTGraphTraits.h
@@ -60,7 +60,8 @@ public:
/// isNodeHidden - If the function returns true, the given node is not
/// displayed in the graph.
- static bool isNodeHidden(const void *) {
+ template <typename GraphType>
+ static bool isNodeHidden(const void *, const GraphType &) {
return false;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Error.h b/contrib/llvm-project/llvm/include/llvm/Support/Error.h
index 9dd1bb7cb96d..c0f7c10aefb4 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Error.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Error.h
@@ -629,22 +629,22 @@ private:
storage_type *getStorage() {
assert(!HasError && "Cannot get value when an error exists!");
- return reinterpret_cast<storage_type *>(TStorage.buffer);
+ return reinterpret_cast<storage_type *>(&TStorage);
}
const storage_type *getStorage() const {
assert(!HasError && "Cannot get value when an error exists!");
- return reinterpret_cast<const storage_type *>(TStorage.buffer);
+ return reinterpret_cast<const storage_type *>(&TStorage);
}
error_type *getErrorStorage() {
assert(HasError && "Cannot get error when a value exists!");
- return reinterpret_cast<error_type *>(ErrorStorage.buffer);
+ return reinterpret_cast<error_type *>(&ErrorStorage);
}
const error_type *getErrorStorage() const {
assert(HasError && "Cannot get error when a value exists!");
- return reinterpret_cast<const error_type *>(ErrorStorage.buffer);
+ return reinterpret_cast<const error_type *>(&ErrorStorage);
}
// Used by ExpectedAsOutParameter to reset the checked flag.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ErrorHandling.h b/contrib/llvm-project/llvm/include/llvm/Support/ErrorHandling.h
index 7cbc668b3a0e..0ec0242d569d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/ErrorHandling.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/ErrorHandling.h
@@ -110,9 +110,9 @@ void install_out_of_memory_new_handler();
/// the following unwind succeeds, e.g. do not trigger additional allocations
/// in the unwind chain.
///
-/// If no error handler is installed (default), then a bad_alloc exception
-/// is thrown, if LLVM is compiled with exception support, otherwise an
-/// assertion is called.
+/// If no error handler is installed (default), throws a bad_alloc exception
+/// if LLVM is compiled with exception support. Otherwise prints the error
+/// to standard error and calls abort().
LLVM_ATTRIBUTE_NORETURN void report_bad_alloc_error(const char *Reason,
bool GenCrashDiag = true);
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ErrorOr.h b/contrib/llvm-project/llvm/include/llvm/Support/ErrorOr.h
index 1fbccc1d1e26..b654c9c9c43b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/ErrorOr.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/ErrorOr.h
@@ -235,17 +235,17 @@ private:
storage_type *getStorage() {
assert(!HasError && "Cannot get value when an error exists!");
- return reinterpret_cast<storage_type*>(TStorage.buffer);
+ return reinterpret_cast<storage_type *>(&TStorage);
}
const storage_type *getStorage() const {
assert(!HasError && "Cannot get value when an error exists!");
- return reinterpret_cast<const storage_type*>(TStorage.buffer);
+ return reinterpret_cast<const storage_type *>(&TStorage);
}
std::error_code *getErrorStorage() {
assert(HasError && "Cannot get error when a value exists!");
- return reinterpret_cast<std::error_code *>(ErrorStorage.buffer);
+ return reinterpret_cast<std::error_code *>(&ErrorStorage);
}
const std::error_code *getErrorStorage() const {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ExitCodes.h b/contrib/llvm-project/llvm/include/llvm/Support/ExitCodes.h
new file mode 100644
index 000000000000..b9041f5557d5
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Support/ExitCodes.h
@@ -0,0 +1,33 @@
+//===-- llvm/Support/ExitCodes.h - Exit codes for exit() -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains definitions of exit codes for exit() function. They are
+/// either defined by sysexits.h if it is supported, or defined here if
+/// sysexits.h is not supported.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_EXITCODES_H
+#define LLVM_SUPPORT_EXITCODES_H
+
+#include "llvm/Config/llvm-config.h"
+
+#if HAVE_SYSEXITS_H
+#include <sysexits.h>
+#elif __MVS__
+// <sysexits.h> does not exist on z/OS. The only value used in LLVM is
+// EX_IOERR, which is used to signal a special error condition (broken pipe).
+// Define the macro with its usual value from BSD systems, which is chosen to
+// not clash with more standard exit codes like 1.
+#define EX_IOERR 74
+#elif LLVM_ON_UNIX
+#error Exit code EX_IOERR not available
+#endif
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h b/contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h
index 2b5e9c669b68..8ea344a347d3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h
@@ -20,6 +20,35 @@ namespace llvm {
class FileCollectorFileSystem;
class Twine;
+class FileCollectorBase {
+public:
+ FileCollectorBase();
+ virtual ~FileCollectorBase();
+
+ void addFile(const Twine &file);
+ void addDirectory(const Twine &Dir);
+
+protected:
+ bool markAsSeen(StringRef Path) {
+ if (Path.empty())
+ return false;
+ return Seen.insert(Path).second;
+ }
+
+ virtual void addFileImpl(StringRef SrcPath) = 0;
+
+ virtual llvm::vfs::directory_iterator
+ addDirectoryImpl(const llvm::Twine &Dir,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS,
+ std::error_code &EC) = 0;
+
+ /// Synchronizes access to internal data structures.
+ std::mutex Mutex;
+
+ /// Tracks already seen files so they can be skipped.
+ StringSet<> Seen;
+};
+
/// Captures file system interaction and generates data to be later replayed
/// with the RedirectingFileSystem.
///
@@ -38,16 +67,34 @@ class Twine;
///
/// In order to preserve the relative topology of files we use their real paths
/// as relative paths inside of the Root.
-class FileCollector {
+class FileCollector : public FileCollectorBase {
public:
+ /// Helper utility that encapsulates the logic for canonicalizing a virtual
+ /// path and a path to copy from.
+ class PathCanonicalizer {
+ public:
+ struct PathStorage {
+ SmallString<256> CopyFrom;
+ SmallString<256> VirtualPath;
+ };
+
+ /// Canonicalize a pair of virtual and real paths.
+ PathStorage canonicalize(StringRef SrcPath);
+
+ private:
+ /// Replace with a (mostly) real path, or don't modify. Resolves symlinks
+ /// in the directory, using \a CachedDirs to avoid redundant lookups, but
+ /// leaves the filename as a possible symlink.
+ void updateWithRealPath(SmallVectorImpl<char> &Path);
+
+ StringMap<std::string> CachedDirs;
+ };
+
/// \p Root is the directory where collected files are will be stored.
/// \p OverlayRoot is VFS mapping root.
/// \p Root directory gets created in copyFiles unless it already exists.
FileCollector(std::string Root, std::string OverlayRoot);
- void addFile(const Twine &file);
- void addDirectory(const Twine &Dir);
-
/// Write the yaml mapping (for the VFS) to the given file.
std::error_code writeMapping(StringRef MappingFile);
@@ -67,14 +114,6 @@ public:
private:
friend FileCollectorFileSystem;
- bool markAsSeen(StringRef Path) {
- if (Path.empty())
- return false;
- return Seen.insert(Path).second;
- }
-
- bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result);
-
void addFileToMapping(StringRef VirtualPath, StringRef RealPath) {
if (sys::fs::is_directory(VirtualPath))
VFSWriter.addDirectoryMapping(VirtualPath, RealPath);
@@ -83,14 +122,12 @@ private:
}
protected:
- void addFileImpl(StringRef SrcPath);
+ void addFileImpl(StringRef SrcPath) override;
llvm::vfs::directory_iterator
addDirectoryImpl(const llvm::Twine &Dir,
- IntrusiveRefCntPtr<vfs::FileSystem> FS, std::error_code &EC);
-
- /// Synchronizes access to Seen, VFSWriter and SymlinkMap.
- std::mutex Mutex;
+ IntrusiveRefCntPtr<vfs::FileSystem> FS,
+ std::error_code &EC) override;
/// The directory where collected files are copied to in copyFiles().
const std::string Root;
@@ -98,14 +135,11 @@ protected:
/// The root directory where the VFS overlay lives.
const std::string OverlayRoot;
- /// Tracks already seen files so they can be skipped.
- StringSet<> Seen;
-
/// The yaml mapping writer.
vfs::YAMLVFSWriter VFSWriter;
- /// Caches RealPath calls when resolving symlinks.
- StringMap<std::string> SymlinkMap;
+ /// Helper utility for canonicalizing paths.
+ PathCanonicalizer Canonicalizer;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm-project/llvm/include/llvm/Support/FileSystem.h
index a29a9d787947..2483aae046f5 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/FileSystem.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/FileSystem.h
@@ -34,6 +34,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem/UniqueID.h"
#include "llvm/Support/MD5.h"
#include <cassert>
#include <cstdint>
@@ -42,7 +43,6 @@
#include <stack>
#include <string>
#include <system_error>
-#include <tuple>
#include <vector>
#ifdef HAVE_SYS_STAT_H
@@ -131,26 +131,6 @@ inline perms operator~(perms x) {
static_cast<unsigned short>(~static_cast<unsigned short>(x)));
}
-class UniqueID {
- uint64_t Device;
- uint64_t File;
-
-public:
- UniqueID() = default;
- UniqueID(uint64_t Device, uint64_t File) : Device(Device), File(File) {}
-
- bool operator==(const UniqueID &Other) const {
- return Device == Other.Device && File == Other.File;
- }
- bool operator!=(const UniqueID &Other) const { return !(*this == Other); }
- bool operator<(const UniqueID &Other) const {
- return std::tie(Device, File) < std::tie(Other.Device, Other.File);
- }
-
- uint64_t getDevice() const { return Device; }
- uint64_t getFile() const { return File; }
-};
-
/// Represents the result of a call to directory_iterator::status(). This is a
/// subset of the information returned by a regular sys::fs::status() call, and
/// represents the information provided by Windows FileFirstFile/FindNextFile.
@@ -1131,6 +1111,43 @@ Expected<file_t>
openNativeFileForRead(const Twine &Name, OpenFlags Flags = OF_None,
SmallVectorImpl<char> *RealPath = nullptr);
+/// Try to locks the file during the specified time.
+///
+/// This function implements advisory locking on entire file. If it returns
+/// <em>errc::success</em>, the file is locked by the calling process. Until the
+/// process unlocks the file by calling \a unlockFile, all attempts to lock the
+/// same file will fail/block. The process that locked the file may assume that
+/// none of other processes read or write this file, provided that all processes
+/// lock the file prior to accessing its content.
+///
+/// @param FD The descriptor representing the file to lock.
+/// @param Timeout Time in milliseconds that the process should wait before
+/// reporting lock failure. Zero value means try to get lock only
+/// once.
+/// @returns errc::success if lock is successfully obtained,
+/// errc::no_lock_available if the file cannot be locked, or platform-specific
+/// error_code otherwise.
+///
+/// @note Care should be taken when using this function in a multithreaded
+/// context, as it may not prevent other threads in the same process from
+/// obtaining a lock on the same file, even if they are using a different file
+/// descriptor.
+std::error_code
+tryLockFile(int FD,
+ std::chrono::milliseconds Timeout = std::chrono::milliseconds(0));
+
+/// Lock the file.
+///
+/// This function acts as @ref tryLockFile but it waits infinitely.
+std::error_code lockFile(int FD);
+
+/// Unlock the file.
+///
+/// @param FD The descriptor representing the file to unlock.
+/// @returns errc::success if lock is successfully released or platform-specific
+/// error_code otherwise.
+std::error_code unlockFile(int FD);
+
/// @brief Close the file object. This should be used instead of ::close for
/// portability. On error, the caller should assume the file is closed, as is
/// the case for Process::SafelyCloseFileDescriptor
@@ -1142,6 +1159,35 @@ openNativeFileForRead(const Twine &Name, OpenFlags Flags = OF_None,
/// means that the filesystem may have failed to perform some buffered writes.
std::error_code closeFile(file_t &F);
+/// RAII class that facilitates file locking.
+class FileLocker {
+ int FD; ///< Locked file handle.
+ FileLocker(int FD) : FD(FD) {}
+ friend class llvm::raw_fd_ostream;
+
+public:
+ FileLocker(const FileLocker &L) = delete;
+ FileLocker(FileLocker &&L) : FD(L.FD) { L.FD = -1; }
+ ~FileLocker() {
+ if (FD != -1)
+ unlockFile(FD);
+ }
+ FileLocker &operator=(FileLocker &&L) {
+ FD = L.FD;
+ L.FD = -1;
+ return *this;
+ }
+ FileLocker &operator=(const FileLocker &L) = delete;
+ std::error_code unlock() {
+ if (FD != -1) {
+ std::error_code Result = unlockFile(FD);
+ FD = -1;
+ return Result;
+ }
+ return std::error_code();
+ }
+};
+
std::error_code getUniqueID(const Twine Path, UniqueID &Result);
/// Get disk space usage information.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/FileSystem/UniqueID.h b/contrib/llvm-project/llvm/include/llvm/Support/FileSystem/UniqueID.h
new file mode 100644
index 000000000000..229410c8292e
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Support/FileSystem/UniqueID.h
@@ -0,0 +1,52 @@
+//===- llvm/Support/FileSystem/UniqueID.h - UniqueID for files --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is cut out of llvm/Support/FileSystem.h to allow UniqueID to be
+// reused without bloating the includes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
+#define LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
+
+#include <cstdint>
+
+namespace llvm {
+namespace sys {
+namespace fs {
+
+class UniqueID {
+ uint64_t Device;
+ uint64_t File;
+
+public:
+ UniqueID() = default;
+ UniqueID(uint64_t Device, uint64_t File) : Device(Device), File(File) {}
+
+ bool operator==(const UniqueID &Other) const {
+ return Device == Other.Device && File == Other.File;
+ }
+ bool operator!=(const UniqueID &Other) const { return !(*this == Other); }
+ bool operator<(const UniqueID &Other) const {
+ /// Don't use std::tie since it bloats the compile time of this header.
+ if (Device < Other.Device)
+ return true;
+ if (Other.Device < Device)
+ return false;
+ return File < Other.File;
+ }
+
+ uint64_t getDevice() const { return Device; }
+ uint64_t getFile() const { return File; }
+};
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h b/contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h
index dfafc3ccb44e..094b054f773f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h
@@ -205,10 +205,10 @@ public:
//
// The characters '{' and '}' are reserved and cannot appear anywhere within a
// replacement sequence. Outside of a replacement sequence, in order to print
-// a literal '{' or '}' it must be doubled -- "{{" to print a literal '{' and
-// "}}" to print a literal '}'.
+// a literal '{' it must be doubled as "{{".
//
// ===Parameter Indexing===
+//
// `index` specifies the index of the parameter in the parameter pack to format
// into the output. Note that it is possible to refer to the same parameter
// index multiple times in a given format string. This makes it possible to
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h
index 10e591a69d36..18e08dbcd175 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h
@@ -28,6 +28,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CFGDiff.h"
#include "llvm/Support/CFGUpdate.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -37,7 +38,6 @@
#include <memory>
#include <type_traits>
#include <utility>
-#include <vector>
namespace llvm {
@@ -60,7 +60,7 @@ template <class NodeT> class DomTreeNodeBase {
NodeT *TheBB;
DomTreeNodeBase *IDom;
unsigned Level;
- std::vector<DomTreeNodeBase *> Children;
+ SmallVector<DomTreeNodeBase *, 4> Children;
mutable unsigned DFSNumIn = ~0;
mutable unsigned DFSNumOut = ~0;
@@ -68,9 +68,9 @@ template <class NodeT> class DomTreeNodeBase {
DomTreeNodeBase(NodeT *BB, DomTreeNodeBase *iDom)
: TheBB(BB), IDom(iDom), Level(IDom ? IDom->Level + 1 : 0) {}
- using iterator = typename std::vector<DomTreeNodeBase *>::iterator;
+ using iterator = typename SmallVector<DomTreeNodeBase *, 4>::iterator;
using const_iterator =
- typename std::vector<DomTreeNodeBase *>::const_iterator;
+ typename SmallVector<DomTreeNodeBase *, 4>::const_iterator;
iterator begin() { return Children.begin(); }
iterator end() { return Children.end(); }
@@ -211,7 +211,10 @@ void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
template <typename DomTreeT>
void ApplyUpdates(DomTreeT &DT,
- ArrayRef<typename DomTreeT::UpdateType> Updates);
+ GraphDiff<typename DomTreeT::NodePtr,
+ DomTreeT::IsPostDominator> &PreViewCFG,
+ GraphDiff<typename DomTreeT::NodePtr,
+ DomTreeT::IsPostDominator> *PostViewCFG);
template <typename DomTreeT>
bool Verify(const DomTreeT &DT, typename DomTreeT::VerificationLevel VL);
@@ -460,8 +463,8 @@ protected:
return this->Roots[0];
}
- /// findNearestCommonDominator - Find nearest common dominator basic block
- /// for basic block A and B. If there is no such block then return nullptr.
+ /// Find nearest common dominator basic block for basic block A and B. A and B
+ /// must have tree nodes.
NodeT *findNearestCommonDominator(NodeT *A, NodeT *B) const {
assert(A && B && "Pointers are not valid");
assert(A->getParent() == B->getParent() &&
@@ -477,18 +480,18 @@ protected:
DomTreeNodeBase<NodeT> *NodeA = getNode(A);
DomTreeNodeBase<NodeT> *NodeB = getNode(B);
-
- if (!NodeA || !NodeB) return nullptr;
+ assert(NodeA && "A must be in the tree");
+ assert(NodeB && "B must be in the tree");
// Use level information to go up the tree until the levels match. Then
// continue going up til we arrive at the same node.
- while (NodeA && NodeA != NodeB) {
+ while (NodeA != NodeB) {
if (NodeA->getLevel() < NodeB->getLevel()) std::swap(NodeA, NodeB);
NodeA = NodeA->IDom;
}
- return NodeA ? NodeA->getBlock() : nullptr;
+ return NodeA->getBlock();
}
const NodeT *findNearestCommonDominator(const NodeT *A,
@@ -535,10 +538,39 @@ protected:
/// The type of updates is the same for DomTreeBase<T> and PostDomTreeBase<T>
/// with the same template parameter T.
///
- /// \param Updates An unordered sequence of updates to perform.
+ /// \param Updates An unordered sequence of updates to perform. The current
+ /// CFG and the reverse of these updates provides the pre-view of the CFG.
///
void applyUpdates(ArrayRef<UpdateType> Updates) {
- DomTreeBuilder::ApplyUpdates(*this, Updates);
+ GraphDiff<NodePtr, IsPostDominator> PreViewCFG(
+ Updates, /*ReverseApplyUpdates=*/true);
+ DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, nullptr);
+ }
+
+ /// \param Updates An unordered sequence of updates to perform. The current
+ /// CFG and the reverse of these updates provides the pre-view of the CFG.
+ /// \param PostViewUpdates An unordered sequence of update to perform in order
+ /// to obtain a post-view of the CFG. The DT will be updated assuming the
+ /// obtained PostViewCFG is the desired end state.
+ void applyUpdates(ArrayRef<UpdateType> Updates,
+ ArrayRef<UpdateType> PostViewUpdates) {
+ if (Updates.empty()) {
+ GraphDiff<NodePtr, IsPostDom> PostViewCFG(PostViewUpdates);
+ DomTreeBuilder::ApplyUpdates(*this, PostViewCFG, &PostViewCFG);
+ } else {
+ // PreViewCFG needs to merge Updates and PostViewCFG. The updates in
+ // Updates need to be reversed, and match the direction in PostViewCFG.
+ // The PostViewCFG is created with updates reversed (equivalent to changes
+ // made to the CFG), so the PreViewCFG needs all the updates reverse
+ // applied.
+ SmallVector<UpdateType> AllUpdates(Updates.begin(), Updates.end());
+ for (auto &Update : PostViewUpdates)
+ AllUpdates.push_back(Update);
+ GraphDiff<NodePtr, IsPostDom> PreViewCFG(AllUpdates,
+ /*ReverseApplyUpdates=*/true);
+ GraphDiff<NodePtr, IsPostDom> PostViewCFG(PostViewUpdates);
+ DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, &PostViewCFG);
+ }
}
/// Inform the dominator tree about a CFG edge insertion and update the tree.
@@ -807,9 +839,7 @@ protected:
"NewBB should have a single successor!");
NodeRef NewBBSucc = *GraphT::child_begin(NewBB);
- std::vector<NodeRef> PredBlocks;
- for (auto Pred : children<Inverse<N>>(NewBB))
- PredBlocks.push_back(Pred);
+ SmallVector<NodeRef, 4> PredBlocks(children<Inverse<N>>(NewBB));
assert(!PredBlocks.empty() && "No predblocks?");
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index 464de4e2b3ba..4b59ad1f017f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -58,6 +58,7 @@ struct SemiNCAInfo {
using TreeNodePtr = DomTreeNodeBase<NodeT> *;
using RootsT = decltype(DomTreeT::Roots);
static constexpr bool IsPostDom = DomTreeT::IsPostDominator;
+ using GraphDiffT = GraphDiff<NodePtr, IsPostDom>;
// Information record used by Semi-NCA during tree construction.
struct InfoRec {
@@ -77,21 +78,17 @@ struct SemiNCAInfo {
using UpdateT = typename DomTreeT::UpdateType;
using UpdateKind = typename DomTreeT::UpdateKind;
struct BatchUpdateInfo {
- SmallVector<UpdateT, 4> Updates;
- using NodePtrAndKind = PointerIntPair<NodePtr, 1, UpdateKind>;
-
- // In order to be able to walk a CFG that is out of sync with the CFG
- // DominatorTree last knew about, use the list of updates to reconstruct
- // previous CFG versions of the current CFG. For each node, we store a set
- // of its virtually added/deleted future successors and predecessors.
- // Note that these children are from the future relative to what the
- // DominatorTree knows about -- using them to gets us some snapshot of the
- // CFG from the past (relative to the state of the CFG).
- DenseMap<NodePtr, SmallVector<NodePtrAndKind, 4>> FutureSuccessors;
- DenseMap<NodePtr, SmallVector<NodePtrAndKind, 4>> FuturePredecessors;
+ // Note: Updates inside PreViewCFG are aleady legalized.
+ BatchUpdateInfo(GraphDiffT &PreViewCFG, GraphDiffT *PostViewCFG = nullptr)
+ : PreViewCFG(PreViewCFG), PostViewCFG(PostViewCFG),
+ NumLegalized(PreViewCFG.getNumLegalizedUpdates()) {}
+
// Remembers if the whole tree was recalculated at some point during the
// current batch update.
bool IsRecalculated = false;
+ GraphDiffT &PreViewCFG;
+ GraphDiffT *PostViewCFG;
+ const size_t NumLegalized;
};
BatchUpdateInfo *BatchUpdates;
@@ -107,66 +104,24 @@ struct SemiNCAInfo {
// in progress, we need this information to continue it.
}
- template <bool Inverse>
- struct ChildrenGetter {
- using ResultTy = SmallVector<NodePtr, 8>;
-
- static ResultTy Get(NodePtr N, std::integral_constant<bool, false>) {
- auto RChildren = reverse(children<NodePtr>(N));
- return ResultTy(RChildren.begin(), RChildren.end());
- }
-
- static ResultTy Get(NodePtr N, std::integral_constant<bool, true>) {
- auto IChildren = inverse_children<NodePtr>(N);
- return ResultTy(IChildren.begin(), IChildren.end());
- }
+ template <bool Inversed>
+ static SmallVector<NodePtr, 8> getChildren(NodePtr N, BatchUpdatePtr BUI) {
+ if (BUI)
+ return BUI->PreViewCFG.template getChildren<Inversed>(N);
+ return getChildren<Inversed>(N);
+ }
- using Tag = std::integral_constant<bool, Inverse>;
-
- // The function below is the core part of the batch updater. It allows the
- // Depth Based Search algorithm to perform incremental updates in lockstep
- // with updates to the CFG. We emulated lockstep CFG updates by getting its
- // next snapshots by reverse-applying future updates.
- static ResultTy Get(NodePtr N, BatchUpdatePtr BUI) {
- ResultTy Res = Get(N, Tag());
- // If there's no batch update in progress, simply return node's children.
- if (!BUI) return Res;
-
- // CFG children are actually its *most current* children, and we have to
- // reverse-apply the future updates to get the node's children at the
- // point in time the update was performed.
- auto &FutureChildren = (Inverse != IsPostDom) ? BUI->FuturePredecessors
- : BUI->FutureSuccessors;
- auto FCIt = FutureChildren.find(N);
- if (FCIt == FutureChildren.end()) return Res;
-
- for (auto ChildAndKind : FCIt->second) {
- const NodePtr Child = ChildAndKind.getPointer();
- const UpdateKind UK = ChildAndKind.getInt();
-
- // Reverse-apply the future update.
- if (UK == UpdateKind::Insert) {
- // If there's an insertion in the future, it means that the edge must
- // exist in the current CFG, but was not present in it before.
- assert(llvm::find(Res, Child) != Res.end()
- && "Expected child not found in the CFG");
- Res.erase(std::remove(Res.begin(), Res.end(), Child), Res.end());
- LLVM_DEBUG(dbgs() << "\tHiding edge " << BlockNamePrinter(N) << " -> "
- << BlockNamePrinter(Child) << "\n");
- } else {
- // If there's an deletion in the future, it means that the edge cannot
- // exist in the current CFG, but existed in it before.
- assert(llvm::find(Res, Child) == Res.end() &&
- "Unexpected child found in the CFG");
- LLVM_DEBUG(dbgs() << "\tShowing virtual edge " << BlockNamePrinter(N)
- << " -> " << BlockNamePrinter(Child) << "\n");
- Res.push_back(Child);
- }
- }
+ template <bool Inversed>
+ static SmallVector<NodePtr, 8> getChildren(NodePtr N) {
+ using DirectedNodeT =
+ std::conditional_t<Inversed, Inverse<NodePtr>, NodePtr>;
+ auto R = children<DirectedNodeT>(N);
+ SmallVector<NodePtr, 8> Res(detail::reverse_if<!Inversed>(R));
- return Res;
- }
- };
+ // Remove nullptr children for clang.
+ llvm::erase_value(Res, nullptr);
+ return Res;
+ }
NodePtr getIDom(NodePtr BB) const {
auto InfoIt = NodeToInfo.find(BB);
@@ -208,6 +163,8 @@ struct SemiNCAInfo {
}
};
+ using NodeOrderMap = DenseMap<NodePtr, unsigned>;
+
// Custom DFS implementation which can skip nodes based on a provided
// predicate. It also collects ReverseChildren so that we don't have to spend
// time getting predecessors in SemiNCA.
@@ -215,9 +172,13 @@ struct SemiNCAInfo {
// If IsReverse is set to true, the DFS walk will be performed backwards
// relative to IsPostDom -- using reverse edges for dominators and forward
// edges for postdominators.
+ //
+ // If SuccOrder is specified then in this order the DFS traverses the children
+ // otherwise the order is implied by the results of getChildren().
template <bool IsReverse = false, typename DescendCondition>
unsigned runDFS(NodePtr V, unsigned LastNum, DescendCondition Condition,
- unsigned AttachToNum) {
+ unsigned AttachToNum,
+ const NodeOrderMap *SuccOrder = nullptr) {
assert(V);
SmallVector<NodePtr, 64> WorkList = {V};
if (NodeToInfo.count(V) != 0) NodeToInfo[V].Parent = AttachToNum;
@@ -233,8 +194,14 @@ struct SemiNCAInfo {
NumToNode.push_back(BB);
constexpr bool Direction = IsReverse != IsPostDom; // XOR.
- for (const NodePtr Succ :
- ChildrenGetter<Direction>::Get(BB, BatchUpdates)) {
+ auto Successors = getChildren<Direction>(BB, BatchUpdates);
+ if (SuccOrder && Successors.size() > 1)
+ llvm::sort(
+ Successors.begin(), Successors.end(), [=](NodePtr A, NodePtr B) {
+ return SuccOrder->find(A)->second < SuccOrder->find(B)->second;
+ });
+
+ for (const NodePtr Succ : Successors) {
const auto SIT = NodeToInfo.find(Succ);
// Don't visit nodes more than once but remember to collect
// ReverseChildren.
@@ -369,7 +336,7 @@ struct SemiNCAInfo {
// to CFG nodes within infinite loops.
static bool HasForwardSuccessors(const NodePtr N, BatchUpdatePtr BUI) {
assert(N && "N must be a valid node");
- return !ChildrenGetter<false>::Get(N, BUI).empty();
+ return !getChildren<false>(N, BUI).empty();
}
static NodePtr GetEntryNode(const DomTreeT &DT) {
@@ -430,6 +397,32 @@ struct SemiNCAInfo {
// nodes.
if (Total + 1 != Num) {
HasNonTrivialRoots = true;
+
+ // SuccOrder is the order of blocks in the function. It is needed to make
+ // the calculation of the FurthestAway node and the whole PostDomTree
+ // immune to swap successors transformation (e.g. canonicalizing branch
+ // predicates). SuccOrder is initialized lazily only for successors of
+ // reverse unreachable nodes.
+ Optional<NodeOrderMap> SuccOrder;
+ auto InitSuccOrderOnce = [&]() {
+ SuccOrder = NodeOrderMap();
+ for (const auto Node : nodes(DT.Parent))
+ if (SNCA.NodeToInfo.count(Node) == 0)
+ for (const auto Succ : getChildren<false>(Node, SNCA.BatchUpdates))
+ SuccOrder->try_emplace(Succ, 0);
+
+ // Add mapping for all entries of SuccOrder.
+ unsigned NodeNum = 0;
+ for (const auto Node : nodes(DT.Parent)) {
+ ++NodeNum;
+ auto Order = SuccOrder->find(Node);
+ if (Order != SuccOrder->end()) {
+ assert(Order->second == 0);
+ Order->second = NodeNum;
+ }
+ }
+ };
+
// Make another DFS pass over all other nodes to find the
// reverse-unreachable blocks, and find the furthest paths we'll be able
// to make.
@@ -454,7 +447,12 @@ struct SemiNCAInfo {
// expensive and does not always lead to a minimal set of roots.
LLVM_DEBUG(dbgs() << "\t\t\tRunning forward DFS\n");
- const unsigned NewNum = SNCA.runDFS<true>(I, Num, AlwaysDescend, Num);
+ if (!SuccOrder)
+ InitSuccOrderOnce();
+ assert(SuccOrder);
+
+ const unsigned NewNum =
+ SNCA.runDFS<true>(I, Num, AlwaysDescend, Num, &*SuccOrder);
const NodePtr FurthestAway = SNCA.NumToNode[NewNum];
LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node "
<< "(non-trivial root): "
@@ -530,7 +528,7 @@ struct SemiNCAInfo {
// If we wound another root in a (forward) DFS walk, remove the current
// root from the set of roots, as it is reverse-reachable from the other
// one.
- if (llvm::find(Roots, N) != Roots.end()) {
+ if (llvm::is_contained(Roots, N)) {
LLVM_DEBUG(dbgs() << "\tForward DFS walk found another root "
<< BlockNamePrinter(N) << "\n\tRemoving root "
<< BlockNamePrinter(Root) << "\n");
@@ -563,12 +561,21 @@ struct SemiNCAInfo {
auto *Parent = DT.Parent;
DT.reset();
DT.Parent = Parent;
- SemiNCAInfo SNCA(nullptr); // Since we are rebuilding the whole tree,
- // there's no point doing it incrementally.
+ // If the update is using the actual CFG, BUI is null. If it's using a view,
+ // BUI is non-null and the PreCFGView is used. When calculating from
+ // scratch, make the PreViewCFG equal to the PostCFGView, so Post is used.
+ BatchUpdatePtr PostViewBUI = nullptr;
+ if (BUI && BUI->PostViewCFG) {
+ BUI->PreViewCFG = *BUI->PostViewCFG;
+ PostViewBUI = BUI;
+ }
+ // This is rebuilding the whole tree, not incrementally, but PostViewBUI is
+ // used in case the caller needs a DT update with a CFGView.
+ SemiNCAInfo SNCA(PostViewBUI);
// Step #0: Number blocks in depth-first order and initialize variables used
// in later stages of the algorithm.
- DT.Roots = FindRoots(DT, nullptr);
+ DT.Roots = FindRoots(DT, PostViewBUI);
SNCA.doFullDFSWalk(DT, AlwaysDescend);
SNCA.runSemiNCA(DT);
@@ -679,8 +686,7 @@ struct SemiNCAInfo {
// root.
if (!DT.isVirtualRoot(To->getIDom())) return false;
- auto RIt = llvm::find(DT.Roots, To->getBlock());
- if (RIt == DT.Roots.end())
+ if (!llvm::is_contained(DT.Roots, To->getBlock()))
return false; // To is not a root, nothing to update.
LLVM_DEBUG(dbgs() << "\t\tAfter the insertion, " << BlockNamePrinter(To)
@@ -787,8 +793,7 @@ struct SemiNCAInfo {
//
// Invariant: there is an optimal path from `To` to TN with the minimum
// depth being CurrentLevel.
- for (const NodePtr Succ :
- ChildrenGetter<IsPostDom>::Get(TN->getBlock(), BUI)) {
+ for (const NodePtr Succ : getChildren<IsPostDom>(TN->getBlock(), BUI)) {
const TreeNodePtr SuccTN = DT.getNode(Succ);
assert(SuccTN &&
"Unreachable successor found at reachable insertion");
@@ -918,8 +923,8 @@ struct SemiNCAInfo {
// the DomTree about it.
// The check is O(N), so run it only in debug configuration.
auto IsSuccessor = [BUI](const NodePtr SuccCandidate, const NodePtr Of) {
- auto Successors = ChildrenGetter<IsPostDom>::Get(Of, BUI);
- return llvm::find(Successors, SuccCandidate) != Successors.end();
+ auto Successors = getChildren<IsPostDom>(Of, BUI);
+ return llvm::is_contained(Successors, SuccCandidate);
};
(void)IsSuccessor;
assert(!IsSuccessor(To, From) && "Deleted edge still exists in the CFG!");
@@ -1005,15 +1010,14 @@ struct SemiNCAInfo {
const TreeNodePtr TN) {
LLVM_DEBUG(dbgs() << "IsReachableFromIDom " << BlockNamePrinter(TN)
<< "\n");
- for (const NodePtr Pred :
- ChildrenGetter<!IsPostDom>::Get(TN->getBlock(), BUI)) {
+ auto TNB = TN->getBlock();
+ for (const NodePtr Pred : getChildren<!IsPostDom>(TNB, BUI)) {
LLVM_DEBUG(dbgs() << "\tPred " << BlockNamePrinter(Pred) << "\n");
if (!DT.getNode(Pred)) continue;
- const NodePtr Support =
- DT.findNearestCommonDominator(TN->getBlock(), Pred);
+ const NodePtr Support = DT.findNearestCommonDominator(TNB, Pred);
LLVM_DEBUG(dbgs() << "\tSupport " << BlockNamePrinter(Support) << "\n");
- if (Support != TN->getBlock()) {
+ if (Support != TNB) {
LLVM_DEBUG(dbgs() << "\t" << BlockNamePrinter(TN)
<< " is reachable from support "
<< BlockNamePrinter(Support) << "\n");
@@ -1054,7 +1058,7 @@ struct SemiNCAInfo {
const TreeNodePtr TN = DT.getNode(To);
assert(TN);
if (TN->getLevel() > Level) return true;
- if (llvm::find(AffectedQueue, To) == AffectedQueue.end())
+ if (!llvm::is_contained(AffectedQueue, To))
AffectedQueue.push_back(To);
return false;
@@ -1144,53 +1148,34 @@ struct SemiNCAInfo {
//===--------------------- DomTree Batch Updater --------------------------===
//~~
- static void ApplyUpdates(DomTreeT &DT, ArrayRef<UpdateT> Updates) {
- const size_t NumUpdates = Updates.size();
+ static void ApplyUpdates(DomTreeT &DT, GraphDiffT &PreViewCFG,
+ GraphDiffT *PostViewCFG) {
+ // Note: the PostViewCFG is only used when computing from scratch. It's data
+ // should already included in the PreViewCFG for incremental updates.
+ const size_t NumUpdates = PreViewCFG.getNumLegalizedUpdates();
if (NumUpdates == 0)
return;
// Take the fast path for a single update and avoid running the batch update
// machinery.
if (NumUpdates == 1) {
- const auto &Update = Updates.front();
- if (Update.getKind() == UpdateKind::Insert)
- DT.insertEdge(Update.getFrom(), Update.getTo());
- else
- DT.deleteEdge(Update.getFrom(), Update.getTo());
-
+ UpdateT Update = PreViewCFG.popUpdateForIncrementalUpdates();
+ if (!PostViewCFG) {
+ if (Update.getKind() == UpdateKind::Insert)
+ InsertEdge(DT, /*BUI=*/nullptr, Update.getFrom(), Update.getTo());
+ else
+ DeleteEdge(DT, /*BUI=*/nullptr, Update.getFrom(), Update.getTo());
+ } else {
+ BatchUpdateInfo BUI(*PostViewCFG, PostViewCFG);
+ if (Update.getKind() == UpdateKind::Insert)
+ InsertEdge(DT, &BUI, Update.getFrom(), Update.getTo());
+ else
+ DeleteEdge(DT, &BUI, Update.getFrom(), Update.getTo());
+ }
return;
}
- BatchUpdateInfo BUI;
- LLVM_DEBUG(dbgs() << "Legalizing " << BUI.Updates.size() << " updates\n");
- cfg::LegalizeUpdates<NodePtr>(Updates, BUI.Updates, IsPostDom);
-
- const size_t NumLegalized = BUI.Updates.size();
- BUI.FutureSuccessors.reserve(NumLegalized);
- BUI.FuturePredecessors.reserve(NumLegalized);
-
- // Use the legalized future updates to initialize future successors and
- // predecessors. Note that these sets will only decrease size over time, as
- // the next CFG snapshots slowly approach the actual (current) CFG.
- for (UpdateT &U : BUI.Updates) {
- BUI.FutureSuccessors[U.getFrom()].push_back({U.getTo(), U.getKind()});
- BUI.FuturePredecessors[U.getTo()].push_back({U.getFrom(), U.getKind()});
- }
-
-#if 0
- // FIXME: The LLVM_DEBUG macro only plays well with a modular
- // build of LLVM when the header is marked as textual, but doing
- // so causes redefinition errors.
- LLVM_DEBUG(dbgs() << "About to apply " << NumLegalized << " updates\n");
- LLVM_DEBUG(if (NumLegalized < 32) for (const auto &U
- : reverse(BUI.Updates)) {
- dbgs() << "\t";
- U.dump();
- dbgs() << "\n";
- });
- LLVM_DEBUG(dbgs() << "\n");
-#endif
-
+ BatchUpdateInfo BUI(PreViewCFG, PostViewCFG);
// Recalculate the DominatorTree when the number of updates
// exceeds a threshold, which usually makes direct updating slower than
// recalculation. We select this threshold proportional to the
@@ -1200,21 +1185,21 @@ struct SemiNCAInfo {
// Make unittests of the incremental algorithm work
if (DT.DomTreeNodes.size() <= 100) {
- if (NumLegalized > DT.DomTreeNodes.size())
+ if (BUI.NumLegalized > DT.DomTreeNodes.size())
CalculateFromScratch(DT, &BUI);
- } else if (NumLegalized > DT.DomTreeNodes.size() / 40)
+ } else if (BUI.NumLegalized > DT.DomTreeNodes.size() / 40)
CalculateFromScratch(DT, &BUI);
// If the DominatorTree was recalculated at some point, stop the batch
// updates. Full recalculations ignore batch updates and look at the actual
// CFG.
- for (size_t i = 0; i < NumLegalized && !BUI.IsRecalculated; ++i)
+ for (size_t i = 0; i < BUI.NumLegalized && !BUI.IsRecalculated; ++i)
ApplyNextUpdate(DT, BUI);
}
static void ApplyNextUpdate(DomTreeT &DT, BatchUpdateInfo &BUI) {
- assert(!BUI.Updates.empty() && "No updates to apply!");
- UpdateT CurrentUpdate = BUI.Updates.pop_back_val();
+ // Popping the next update, will move the PreViewCFG to the next snapshot.
+ UpdateT CurrentUpdate = BUI.PreViewCFG.popUpdateForIncrementalUpdates();
#if 0
// FIXME: The LLVM_DEBUG macro only plays well with a modular
// build of LLVM when the header is marked as textual, but doing
@@ -1223,21 +1208,6 @@ struct SemiNCAInfo {
LLVM_DEBUG(CurrentUpdate.dump(); dbgs() << "\n");
#endif
- // Move to the next snapshot of the CFG by removing the reverse-applied
- // current update. Since updates are performed in the same order they are
- // legalized it's sufficient to pop the last item here.
- auto &FS = BUI.FutureSuccessors[CurrentUpdate.getFrom()];
- assert(FS.back().getPointer() == CurrentUpdate.getTo() &&
- FS.back().getInt() == CurrentUpdate.getKind());
- FS.pop_back();
- if (FS.empty()) BUI.FutureSuccessors.erase(CurrentUpdate.getFrom());
-
- auto &FP = BUI.FuturePredecessors[CurrentUpdate.getTo()];
- assert(FP.back().getPointer() == CurrentUpdate.getFrom() &&
- FP.back().getInt() == CurrentUpdate.getKind());
- FP.pop_back();
- if (FP.empty()) BUI.FuturePredecessors.erase(CurrentUpdate.getTo());
-
if (CurrentUpdate.getKind() == UpdateKind::Insert)
InsertEdge(DT, &BUI, CurrentUpdate.getFrom(), CurrentUpdate.getTo());
else
@@ -1596,19 +1566,11 @@ void Calculate(DomTreeT &DT) {
template <typename DomTreeT>
void CalculateWithUpdates(DomTreeT &DT,
ArrayRef<typename DomTreeT::UpdateType> Updates) {
- // TODO: Move BUI creation in common method, reuse in ApplyUpdates.
- typename SemiNCAInfo<DomTreeT>::BatchUpdateInfo BUI;
- LLVM_DEBUG(dbgs() << "Legalizing " << BUI.Updates.size() << " updates\n");
- cfg::LegalizeUpdates<typename DomTreeT::NodePtr>(Updates, BUI.Updates,
- DomTreeT::IsPostDominator);
- const size_t NumLegalized = BUI.Updates.size();
- BUI.FutureSuccessors.reserve(NumLegalized);
- BUI.FuturePredecessors.reserve(NumLegalized);
- for (auto &U : BUI.Updates) {
- BUI.FutureSuccessors[U.getFrom()].push_back({U.getTo(), U.getKind()});
- BUI.FuturePredecessors[U.getTo()].push_back({U.getFrom(), U.getKind()});
- }
-
+ // FIXME: Updated to use the PreViewCFG and behave the same as until now.
+ // This behavior is however incorrect; this actually needs the PostViewCFG.
+ GraphDiff<typename DomTreeT::NodePtr, DomTreeT::IsPostDominator> PreViewCFG(
+ Updates, /*ReverseApplyUpdates=*/true);
+ typename SemiNCAInfo<DomTreeT>::BatchUpdateInfo BUI(PreViewCFG);
SemiNCAInfo<DomTreeT>::CalculateFromScratch(DT, &BUI);
}
@@ -1628,8 +1590,11 @@ void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
template <class DomTreeT>
void ApplyUpdates(DomTreeT &DT,
- ArrayRef<typename DomTreeT::UpdateType> Updates) {
- SemiNCAInfo<DomTreeT>::ApplyUpdates(DT, Updates);
+ GraphDiff<typename DomTreeT::NodePtr,
+ DomTreeT::IsPostDominator> &PreViewCFG,
+ GraphDiff<typename DomTreeT::NodePtr,
+ DomTreeT::IsPostDominator> *PostViewCFG) {
+ SemiNCAInfo<DomTreeT>::ApplyUpdates(DT, PreViewCFG, PostViewCFG);
}
template <class DomTreeT>
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/GlobPattern.h b/contrib/llvm-project/llvm/include/llvm/Support/GlobPattern.h
index 3e5989d02500..b79de6f41c49 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/GlobPattern.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/GlobPattern.h
@@ -31,6 +31,16 @@ public:
static Expected<GlobPattern> create(StringRef Pat);
bool match(StringRef S) const;
+ // Returns true for glob pattern "*". Can be used to avoid expensive
+ // preparation/acquisition of the input for match().
+ bool isTrivialMatchAll() const {
+ if (Prefix && Prefix->empty()) {
+ assert(!Suffix);
+ return true;
+ }
+ return false;
+ }
+
private:
bool matchOne(ArrayRef<BitVector> Pat, StringRef S) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/GraphWriter.h b/contrib/llvm-project/llvm/include/llvm/Support/GraphWriter.h
index f9241b1e8081..1f60fbc35126 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/GraphWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/GraphWriter.h
@@ -158,9 +158,7 @@ public:
writeNode(Node);
}
- bool isNodeHidden(NodeRef Node) {
- return DTraits.isNodeHidden(Node);
- }
+ bool isNodeHidden(NodeRef Node) { return DTraits.isNodeHidden(Node, G); }
void writeNode(NodeRef Node) {
std::string NodeAttributes = DTraits.getNodeAttributes(Node, G);
@@ -228,10 +226,10 @@ public:
child_iterator EI = GTraits::child_begin(Node);
child_iterator EE = GTraits::child_end(Node);
for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i)
- if (!DTraits.isNodeHidden(*EI))
+ if (!DTraits.isNodeHidden(*EI, G))
writeEdge(Node, i, EI);
for (; EI != EE; ++EI)
- if (!DTraits.isNodeHidden(*EI))
+ if (!DTraits.isNodeHidden(*EI, G))
writeEdge(Node, 64, EI);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Host.h b/contrib/llvm-project/llvm/include/llvm/Support/Host.h
index d4ef389450cc..b3c15f0683b9 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Host.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Host.h
@@ -65,6 +65,20 @@ namespace sys {
StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent);
StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent);
StringRef getHostCPUNameForBPF();
+
+ /// Helper functions to extract CPU details from CPUID on x86.
+ namespace x86 {
+ enum class VendorSignatures {
+ UNKNOWN,
+ GENUINE_INTEL,
+ AUTHENTIC_AMD,
+ };
+
+ /// Returns the host CPU's vendor.
+ /// MaxLeaf: if a non-nullptr pointer is specified, the EAX value will be
+ /// assigned to its pointee.
+ VendorSignatures getVendorSignature(unsigned *MaxLeaf = nullptr);
+ } // namespace x86
}
}
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h b/contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h
index 3be8d6b6d2e0..879dc1514d10 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h
@@ -9,6 +9,7 @@
#ifndef LLVM_SUPPORT_LLVM_H
#define LLVM_SUPPORT_LLVM_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/PrettyStackTrace.h"
@@ -44,7 +45,7 @@ public:
private:
BumpPtrAllocator Alloc;
SmallVector<const char *, 0> Args;
- PrettyStackTraceProgram StackPrinter;
+ Optional<PrettyStackTraceProgram> StackPrinter;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/InstructionCost.h b/contrib/llvm-project/llvm/include/llvm/Support/InstructionCost.h
new file mode 100644
index 000000000000..fbc898b878bb
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Support/InstructionCost.h
@@ -0,0 +1,238 @@
+//===- InstructionCost.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines an InstructionCost class that is used when calculating
+/// the cost of an instruction, or a group of instructions. In addition to a
+/// numeric value representing the cost the class also contains a state that
+/// can be used to encode particular properties, i.e. a cost being invalid or
+/// unknown.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_INSTRUCTIONCOST_H
+#define LLVM_SUPPORT_INSTRUCTIONCOST_H
+
+#include "llvm/ADT/Optional.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+class InstructionCost {
+public:
+ using CostType = int;
+
+ /// These states can currently be used to indicate whether a cost is valid or
+ /// invalid. Examples of an invalid cost might be where the cost is
+ /// prohibitively expensive and the user wants to prevent certain
+ /// optimizations being performed. Or perhaps the cost is simply unknown
+ /// because the operation makes no sense in certain circumstances. These
+ /// states can be expanded in future to support other cases if necessary.
+ enum CostState { Valid, Invalid };
+
+private:
+ CostType Value;
+ CostState State;
+
+ void propagateState(const InstructionCost &RHS) {
+ if (RHS.State == Invalid)
+ State = Invalid;
+ }
+
+public:
+ InstructionCost() = default;
+
+ InstructionCost(CostState) = delete;
+ InstructionCost(CostType Val) : Value(Val), State(Valid) {}
+
+ static InstructionCost getInvalid(CostType Val = 0) {
+ InstructionCost Tmp(Val);
+ Tmp.setInvalid();
+ return Tmp;
+ }
+
+ bool isValid() const { return State == Valid; }
+ void setValid() { State = Valid; }
+ void setInvalid() { State = Invalid; }
+ CostState getState() const { return State; }
+
+ /// This function is intended to be used as sparingly as possible, since the
+ /// class provides the full range of operator support required for arithmetic
+ /// and comparisons.
+ Optional<CostType> getValue() const {
+ if (isValid())
+ return Value;
+ return None;
+ }
+
+ /// For all of the arithmetic operators provided here any invalid state is
+ /// perpetuated and cannot be removed. Once a cost becomes invalid it stays
+ /// invalid, and it also inherits any invalid state from the RHS. Regardless
+ /// of the state, arithmetic and comparisons work on the actual values in the
+ /// same way as they would on a basic type, such as integer.
+
+ InstructionCost &operator+=(const InstructionCost &RHS) {
+ propagateState(RHS);
+ Value += RHS.Value;
+ return *this;
+ }
+
+ InstructionCost &operator+=(const CostType RHS) {
+ InstructionCost RHS2(RHS);
+ *this += RHS2;
+ return *this;
+ }
+
+ InstructionCost &operator-=(const InstructionCost &RHS) {
+ propagateState(RHS);
+ Value -= RHS.Value;
+ return *this;
+ }
+
+ InstructionCost &operator-=(const CostType RHS) {
+ InstructionCost RHS2(RHS);
+ *this -= RHS2;
+ return *this;
+ }
+
+ InstructionCost &operator*=(const InstructionCost &RHS) {
+ propagateState(RHS);
+ Value *= RHS.Value;
+ return *this;
+ }
+
+ InstructionCost &operator*=(const CostType RHS) {
+ InstructionCost RHS2(RHS);
+ *this *= RHS2;
+ return *this;
+ }
+
+ InstructionCost &operator/=(const InstructionCost &RHS) {
+ propagateState(RHS);
+ Value /= RHS.Value;
+ return *this;
+ }
+
+ InstructionCost &operator/=(const CostType RHS) {
+ InstructionCost RHS2(RHS);
+ *this /= RHS2;
+ return *this;
+ }
+
+ InstructionCost &operator++() {
+ *this += 1;
+ return *this;
+ }
+
+ InstructionCost operator++(int) {
+ InstructionCost Copy = *this;
+ ++*this;
+ return Copy;
+ }
+
+ InstructionCost &operator--() {
+ *this -= 1;
+ return *this;
+ }
+
+ InstructionCost operator--(int) {
+ InstructionCost Copy = *this;
+ --*this;
+ return Copy;
+ }
+
+ bool operator==(const InstructionCost &RHS) const {
+ return State == RHS.State && Value == RHS.Value;
+ }
+
+ bool operator!=(const InstructionCost &RHS) const { return !(*this == RHS); }
+
+ bool operator==(const CostType RHS) const {
+ return State == Valid && Value == RHS;
+ }
+
+ bool operator!=(const CostType RHS) const { return !(*this == RHS); }
+
+ /// For the comparison operators we have chosen to use total ordering with
+ /// the following rules:
+ /// 1. If either of the states != Valid then a lexicographical order is
+ /// applied based upon the state.
+ /// 2. If both states are valid then order based upon value.
+ /// This avoids having to add asserts the comparison operators that the states
+ /// are valid and users can test for validity of the cost explicitly.
+ bool operator<(const InstructionCost &RHS) const {
+ if (State != Valid || RHS.State != Valid)
+ return State < RHS.State;
+ return Value < RHS.Value;
+ }
+
+ bool operator>(const InstructionCost &RHS) const { return RHS < *this; }
+
+ bool operator<=(const InstructionCost &RHS) const { return !(RHS < *this); }
+
+ bool operator>=(const InstructionCost &RHS) const { return !(*this < RHS); }
+
+ bool operator<(const CostType RHS) const {
+ InstructionCost RHS2(RHS);
+ return *this < RHS2;
+ }
+
+ bool operator>(const CostType RHS) const {
+ InstructionCost RHS2(RHS);
+ return *this > RHS2;
+ }
+
+ bool operator<=(const CostType RHS) const {
+ InstructionCost RHS2(RHS);
+ return *this <= RHS2;
+ }
+
+ bool operator>=(const CostType RHS) const {
+ InstructionCost RHS2(RHS);
+ return *this >= RHS2;
+ }
+
+ void print(raw_ostream &OS) const;
+};
+
+inline InstructionCost operator+(const InstructionCost &LHS,
+ const InstructionCost &RHS) {
+ InstructionCost LHS2(LHS);
+ LHS2 += RHS;
+ return LHS2;
+}
+
+inline InstructionCost operator-(const InstructionCost &LHS,
+ const InstructionCost &RHS) {
+ InstructionCost LHS2(LHS);
+ LHS2 -= RHS;
+ return LHS2;
+}
+
+inline InstructionCost operator*(const InstructionCost &LHS,
+ const InstructionCost &RHS) {
+ InstructionCost LHS2(LHS);
+ LHS2 *= RHS;
+ return LHS2;
+}
+
+inline InstructionCost operator/(const InstructionCost &LHS,
+ const InstructionCost &RHS) {
+ InstructionCost LHS2(LHS);
+ LHS2 /= RHS;
+ return LHS2;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS, const InstructionCost &V) {
+ V.print(OS);
+ return OS;
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/JSON.h b/contrib/llvm-project/llvm/include/llvm/Support/JSON.h
index 8b1c66234fe8..c753cee60ec1 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/JSON.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/JSON.h
@@ -253,7 +253,14 @@ inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
/// === Converting JSON values to C++ types ===
///
/// The convention is to have a deserializer function findable via ADL:
-/// fromJSON(const json::Value&, T&)->bool
+/// fromJSON(const json::Value&, T&, Path) -> bool
+///
+/// The return value indicates overall success, and Path is used for precise
+/// error reporting. (The Path::Root passed in at the top level fromJSON call
+/// captures any nested error and can render it in context).
+/// If conversion fails, fromJSON calls Path::report() and immediately returns.
+/// This ensures that the first fatal error survives.
+///
/// Deserializers are provided for:
/// - bool
/// - int and int64_t
@@ -449,12 +456,12 @@ private:
friend class Object;
template <typename T, typename... U> void create(U &&... V) {
- new (reinterpret_cast<T *>(Union.buffer)) T(std::forward<U>(V)...);
+ new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...);
}
template <typename T> T &as() const {
// Using this two-step static_cast via void * instead of reinterpret_cast
// silences a -Wstrict-aliasing false positive from GCC6 and earlier.
- void *Storage = static_cast<void *>(Union.buffer);
+ void *Storage = static_cast<void *>(&Union);
return *static_cast<T *>(Storage);
}
@@ -557,81 +564,169 @@ inline bool Object::erase(StringRef K) {
return M.erase(ObjectKey(K));
}
+/// A "cursor" marking a position within a Value.
+/// The Value is a tree, and this is the path from the root to the current node.
+/// This is used to associate errors with particular subobjects.
+class Path {
+public:
+ class Root;
+
+ /// Records that the value at the current path is invalid.
+ /// Message is e.g. "expected number" and becomes part of the final error.
+ /// This overwrites any previously written error message in the root.
+ void report(llvm::StringLiteral Message);
+
+ /// The root may be treated as a Path.
+ Path(Root &R) : Parent(nullptr), Seg(&R) {}
+ /// Derives a path for an array element: this[Index]
+ Path index(unsigned Index) const { return Path(this, Segment(Index)); }
+ /// Derives a path for an object field: this.Field
+ Path field(StringRef Field) const { return Path(this, Segment(Field)); }
+
+private:
+ /// One element in a JSON path: an object field (.foo) or array index [27].
+ /// Exception: the root Path encodes a pointer to the Path::Root.
+ class Segment {
+ uintptr_t Pointer;
+ unsigned Offset;
+
+ public:
+ Segment() = default;
+ Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
+ Segment(llvm::StringRef Field)
+ : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
+ Offset(static_cast<unsigned>(Field.size())) {}
+ Segment(unsigned Index) : Pointer(0), Offset(Index) {}
+
+ bool isField() const { return Pointer != 0; }
+ StringRef field() const {
+ return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
+ }
+ unsigned index() const { return Offset; }
+ Root *root() const { return reinterpret_cast<Root *>(Pointer); }
+ };
+
+ const Path *Parent;
+ Segment Seg;
+
+ Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
+};
+
+/// The root is the trivial Path to the root value.
+/// It also stores the latest reported error and the path where it occurred.
+class Path::Root {
+ llvm::StringRef Name;
+ llvm::StringLiteral ErrorMessage;
+ std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
+
+ friend void Path::report(llvm::StringLiteral Message);
+
+public:
+ Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
+ // No copy/move allowed as there are incoming pointers.
+ Root(Root &&) = delete;
+ Root &operator=(Root &&) = delete;
+ Root(const Root &) = delete;
+ Root &operator=(const Root &) = delete;
+
+ /// Returns the last error reported, or else a generic error.
+ Error getError() const;
+ /// Print the root value with the error shown inline as a comment.
+ /// Unrelated parts of the value are elided for brevity, e.g.
+ /// {
+ /// "id": 42,
+ /// "name": /* expected string */ null,
+ /// "properties": { ... }
+ /// }
+ void printErrorContext(const Value &, llvm::raw_ostream &) const;
+};
+
// Standard deserializers are provided for primitive types.
// See comments on Value.
-inline bool fromJSON(const Value &E, std::string &Out) {
+inline bool fromJSON(const Value &E, std::string &Out, Path P) {
if (auto S = E.getAsString()) {
Out = std::string(*S);
return true;
}
+ P.report("expected string");
return false;
}
-inline bool fromJSON(const Value &E, int &Out) {
+inline bool fromJSON(const Value &E, int &Out, Path P) {
if (auto S = E.getAsInteger()) {
Out = *S;
return true;
}
+ P.report("expected integer");
return false;
}
-inline bool fromJSON(const Value &E, int64_t &Out) {
+inline bool fromJSON(const Value &E, int64_t &Out, Path P) {
if (auto S = E.getAsInteger()) {
Out = *S;
return true;
}
+ P.report("expected integer");
return false;
}
-inline bool fromJSON(const Value &E, double &Out) {
+inline bool fromJSON(const Value &E, double &Out, Path P) {
if (auto S = E.getAsNumber()) {
Out = *S;
return true;
}
+ P.report("expected number");
return false;
}
-inline bool fromJSON(const Value &E, bool &Out) {
+inline bool fromJSON(const Value &E, bool &Out, Path P) {
if (auto S = E.getAsBoolean()) {
Out = *S;
return true;
}
+ P.report("expected boolean");
return false;
}
-inline bool fromJSON(const Value &E, std::nullptr_t &Out) {
+inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
if (auto S = E.getAsNull()) {
Out = *S;
return true;
}
+ P.report("expected null");
return false;
}
-template <typename T> bool fromJSON(const Value &E, llvm::Optional<T> &Out) {
+template <typename T>
+bool fromJSON(const Value &E, llvm::Optional<T> &Out, Path P) {
if (E.getAsNull()) {
Out = llvm::None;
return true;
}
T Result;
- if (!fromJSON(E, Result))
+ if (!fromJSON(E, Result, P))
return false;
Out = std::move(Result);
return true;
}
-template <typename T> bool fromJSON(const Value &E, std::vector<T> &Out) {
+template <typename T>
+bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {
if (auto *A = E.getAsArray()) {
Out.clear();
Out.resize(A->size());
for (size_t I = 0; I < A->size(); ++I)
- if (!fromJSON((*A)[I], Out[I]))
+ if (!fromJSON((*A)[I], Out[I], P.index(I)))
return false;
return true;
}
+ P.report("expected array");
return false;
}
template <typename T>
-bool fromJSON(const Value &E, std::map<std::string, T> &Out) {
+bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {
if (auto *O = E.getAsObject()) {
Out.clear();
for (const auto &KV : *O)
- if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))]))
+ if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))],
+ P.field(KV.first)))
return false;
return true;
}
+ P.report("expected object");
return false;
}
@@ -644,42 +739,59 @@ template <typename T> Value toJSON(const llvm::Optional<T> &Opt) {
///
/// Example:
/// \code
-/// bool fromJSON(const Value &E, MyStruct &R) {
-/// ObjectMapper O(E);
-/// if (!O || !O.map("mandatory_field", R.MandatoryField))
-/// return false;
-/// O.map("optional_field", R.OptionalField);
-/// return true;
+/// bool fromJSON(const Value &E, MyStruct &R, Path P) {
+/// ObjectMapper O(E, P);
+/// // When returning false, error details were already reported.
+/// return O && O.map("mandatory_field", R.MandatoryField) &&
+/// O.mapOptional("optional_field", R.OptionalField);
/// }
/// \endcode
class ObjectMapper {
public:
- ObjectMapper(const Value &E) : O(E.getAsObject()) {}
+ /// If O is not an object, this mapper is invalid and an error is reported.
+ ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) {
+ if (!O)
+ P.report("expected object");
+ }
/// True if the expression is an object.
/// Must be checked before calling map().
- operator bool() { return O; }
+ operator bool() const { return O; }
- /// Maps a property to a field, if it exists.
- template <typename T> bool map(StringRef Prop, T &Out) {
+ /// Maps a property to a field.
+ /// If the property is missing or invalid, reports an error.
+ template <typename T> bool map(StringLiteral Prop, T &Out) {
assert(*this && "Must check this is an object before calling map()");
if (const Value *E = O->get(Prop))
- return fromJSON(*E, Out);
+ return fromJSON(*E, Out, P.field(Prop));
+ P.field(Prop).report("missing value");
return false;
}
/// Maps a property to a field, if it exists.
+ /// If the property exists and is invalid, reports an error.
/// (Optional requires special handling, because missing keys are OK).
- template <typename T> bool map(StringRef Prop, llvm::Optional<T> &Out) {
+ template <typename T> bool map(StringLiteral Prop, llvm::Optional<T> &Out) {
assert(*this && "Must check this is an object before calling map()");
if (const Value *E = O->get(Prop))
- return fromJSON(*E, Out);
+ return fromJSON(*E, Out, P.field(Prop));
Out = llvm::None;
return true;
}
+ /// Maps a property to a field, if it exists.
+ /// If the property exists and is invalid, reports an error.
+ /// If the property does not exist, Out is unchanged.
+ template <typename T> bool mapOptional(StringLiteral Prop, T &Out) {
+ assert(*this && "Must check this is an object before calling map()");
+ if (const Value *E = O->get(Prop))
+ return fromJSON(*E, Out, P.field(Prop));
+ return true;
+ }
+
private:
const Object *O;
+ Path P;
};
/// Parses the provided JSON source, or returns a ParseError.
@@ -703,9 +815,24 @@ public:
}
};
+/// Version of parse() that converts the parsed value to the type T.
+/// RootName describes the root object and is used in error messages.
+template <typename T>
+Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {
+ auto V = parse(JSON);
+ if (!V)
+ return V.takeError();
+ Path::Root R(RootName);
+ T Result;
+ if (fromJSON(*V, Result, R))
+ return std::move(Result);
+ return R.getError();
+}
+
/// json::OStream allows writing well-formed JSON without materializing
/// all structures as json::Value ahead of time.
/// It's faster, lower-level, and less safe than OS << json::Value.
+/// It also allows emitting more constructs, such as comments.
///
/// Only one "top-level" object can be written to a stream.
/// Simplest usage involves passing lambdas (Blocks) to fill in containers:
@@ -791,6 +918,21 @@ class OStream {
Contents();
objectEnd();
}
+ /// Emit an externally-serialized value.
+ /// The caller must write exactly one valid JSON value to the provided stream.
+ /// No validation or formatting of this value occurs.
+ void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) {
+ rawValueBegin();
+ Contents(OS);
+ rawValueEnd();
+ }
+ void rawValue(llvm::StringRef Contents) {
+ rawValue([&](raw_ostream &OS) { OS << Contents; });
+ }
+ /// Emit a JavaScript comment associated with the next printed value.
+ /// The string must be valid until the next attribute or value is emitted.
+ /// Comments are not part of standard JSON, and many parsers reject them!
+ void comment(llvm::StringRef);
// High level functions to output object attributes.
// Valid only within an object (any number of times).
@@ -817,8 +959,10 @@ class OStream {
void objectEnd();
void attributeBegin(llvm::StringRef Key);
void attributeEnd();
+ raw_ostream &rawValueBegin();
+ void rawValueEnd();
- private:
+private:
void attributeImpl(llvm::StringRef Key, Block Contents) {
attributeBegin(Key);
Contents();
@@ -826,18 +970,21 @@ class OStream {
}
void valueBegin();
+ void flushComment();
void newline();
enum Context {
Singleton, // Top level, or object attribute.
Array,
Object,
+ RawValue, // External code writing a value to OS directly.
};
struct State {
Context Ctx = Singleton;
bool HasValue = false;
};
llvm::SmallVector<State, 16> Stack; // Never empty.
+ llvm::StringRef PendingComment;
llvm::raw_ostream &OS;
unsigned IndentSize;
unsigned Indent = 0;
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h b/contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h
index 69040cd23f03..d854aadbd430 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h
@@ -15,6 +15,7 @@
#define LLVM_SUPPORT_KNOWNBITS_H
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Optional.h"
namespace llvm {
@@ -97,6 +98,9 @@ public:
/// Returns true if this value is known to be non-negative.
bool isNonNegative() const { return Zero.isSignBitSet(); }
+ /// Returns true if this value is known to be non-zero.
+ bool isNonZero() const { return !One.isNullValue(); }
+
/// Returns true if this value is known to be positive.
bool isStrictlyPositive() const { return Zero.isSignBitSet() && !One.isNullValue(); }
@@ -110,18 +114,38 @@ public:
Zero.setSignBit();
}
- /// Return the minimal value possible given these KnownBits.
+ /// Return the minimal unsigned value possible given these KnownBits.
APInt getMinValue() const {
// Assume that all bits that aren't known-ones are zeros.
return One;
}
- /// Return the maximal value possible given these KnownBits.
+ /// Return the minimal signed value possible given these KnownBits.
+ APInt getSignedMinValue() const {
+ // Assume that all bits that aren't known-ones are zeros.
+ APInt Min = One;
+ // Sign bit is unknown.
+ if (Zero.isSignBitClear())
+ Min.setSignBit();
+ return Min;
+ }
+
+ /// Return the maximal unsigned value possible given these KnownBits.
APInt getMaxValue() const {
// Assume that all bits that aren't known-zeros are ones.
return ~Zero;
}
+ /// Return the maximal signed value possible given these KnownBits.
+ APInt getSignedMaxValue() const {
+ // Assume that all bits that aren't known-zeros are ones.
+ APInt Max = ~Zero;
+ // Sign bit is unknown.
+ if (One.isSignBitClear())
+ Max.clearSignBit();
+ return Max;
+ }
+
/// Return known bits for a truncation of the value we're tracking.
KnownBits trunc(unsigned BitWidth) const {
return KnownBits(Zero.trunc(BitWidth), One.trunc(BitWidth));
@@ -166,6 +190,20 @@ public:
return *this;
}
+ /// Return known bits for a sign extension or truncation of the value we're
+ /// tracking.
+ KnownBits sextOrTrunc(unsigned BitWidth) const {
+ if (BitWidth > getBitWidth())
+ return sext(BitWidth);
+ if (BitWidth < getBitWidth())
+ return trunc(BitWidth);
+ return *this;
+ }
+
+ /// Return known bits for a in-register sign extension of the value we're
+ /// tracking.
+ KnownBits sextInReg(unsigned SrcBitWidth) const;
+
/// Return a KnownBits with the extracted bits
/// [bitPosition,bitPosition+numBits).
KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const {
@@ -173,6 +211,10 @@ public:
One.extractBits(NumBits, BitPosition));
}
+ /// Return KnownBits based on this, but updated given that the underlying
+ /// value is known to be greater than or equal to Val.
+ KnownBits makeGE(const APInt &Val) const;
+
/// Returns the minimum number of trailing zero bits.
unsigned countMinTrailingZeros() const {
return Zero.countTrailingOnes();
@@ -233,6 +275,16 @@ public:
return getBitWidth() - Zero.countPopulation();
}
+ /// Create known bits from a known constant.
+ static KnownBits makeConstant(const APInt &C) {
+ return KnownBits(~C, C);
+ }
+
+ /// Compute known bits common to LHS and RHS.
+ static KnownBits commonBits(const KnownBits &LHS, const KnownBits &RHS) {
+ return KnownBits(LHS.Zero & RHS.Zero, LHS.One & RHS.One);
+ }
+
/// Compute known bits resulting from adding LHS, RHS and a 1-bit Carry.
static KnownBits computeForAddCarry(
const KnownBits &LHS, const KnownBits &RHS, const KnownBits &Carry);
@@ -241,6 +293,84 @@ public:
static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS,
KnownBits RHS);
+ /// Compute known bits resulting from multiplying LHS and RHS.
+ static KnownBits computeForMul(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for udiv(LHS, RHS).
+ static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for urem(LHS, RHS).
+ static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for srem(LHS, RHS).
+ static KnownBits srem(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for umax(LHS, RHS).
+ static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for umin(LHS, RHS).
+ static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for smax(LHS, RHS).
+ static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for smin(LHS, RHS).
+ static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for shl(LHS, RHS).
+ /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
+ static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for lshr(LHS, RHS).
+ /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
+ static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute known bits for ashr(LHS, RHS).
+ /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
+ static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_EQ result.
+ static Optional<bool> eq(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_NE result.
+ static Optional<bool> ne(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_UGT result.
+ static Optional<bool> ugt(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_UGE result.
+ static Optional<bool> uge(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_ULT result.
+ static Optional<bool> ult(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_ULE result.
+ static Optional<bool> ule(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_SGT result.
+ static Optional<bool> sgt(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_SGE result.
+ static Optional<bool> sge(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_SLT result.
+ static Optional<bool> slt(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Determine if these known bits always give the same ICMP_SLE result.
+ static Optional<bool> sle(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Insert the bits from a smaller known bits starting at bitPosition.
+ void insertBits(const KnownBits &SubBits, unsigned BitPosition) {
+ Zero.insertBits(SubBits.Zero, BitPosition);
+ One.insertBits(SubBits.One, BitPosition);
+ }
+
+ /// Return a subset of the known bits from [bitPosition,bitPosition+numBits).
+ KnownBits extractBits(unsigned NumBits, unsigned BitPosition) {
+ return KnownBits(Zero.extractBits(NumBits, BitPosition),
+ One.extractBits(NumBits, BitPosition));
+ }
+
/// Update known bits based on ANDing with RHS.
KnownBits &operator&=(const KnownBits &RHS);
@@ -249,6 +379,17 @@ public:
/// Update known bits based on XORing with RHS.
KnownBits &operator^=(const KnownBits &RHS);
+
+ /// Compute known bits for the absolute value.
+ KnownBits abs(bool IntMinIsPoison = false) const;
+
+ KnownBits byteSwap() {
+ return KnownBits(Zero.byteSwap(), One.byteSwap());
+ }
+
+ KnownBits reverseBits() {
+ return KnownBits(Zero.reverseBits(), One.reverseBits());
+ }
};
inline KnownBits operator&(KnownBits LHS, const KnownBits &RHS) {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h b/contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h
index 2a1e47bfe5b7..b391412685c4 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h
@@ -9,8 +9,10 @@
#ifndef LLVM_SUPPORT_LINEITERATOR_H
#define LLVM_SUPPORT_LINEITERATOR_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include <iterator>
namespace llvm {
@@ -30,7 +32,7 @@ class MemoryBuffer;
/// Note that this iterator requires the buffer to be nul terminated.
class line_iterator
: public std::iterator<std::forward_iterator_tag, StringRef> {
- const MemoryBuffer *Buffer = nullptr;
+ Optional<MemoryBufferRef> Buffer;
char CommentMarker = '\0';
bool SkipBlanks = true;
@@ -41,6 +43,10 @@ public:
/// Default construct an "end" iterator.
line_iterator() = default;
+ /// Construct a new iterator around an unowned memory buffer.
+ explicit line_iterator(const MemoryBufferRef &Buffer, bool SkipBlanks = true,
+ char CommentMarker = '\0');
+
/// Construct a new iterator around some memory buffer.
explicit line_iterator(const MemoryBuffer &Buffer, bool SkipBlanks = true,
char CommentMarker = '\0');
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h b/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h
index 3bb8220e72e5..1c600d0108b6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h
@@ -111,113 +111,119 @@ namespace llvm {
v8i64 = 61, // 8 x i64
v16i64 = 62, // 16 x i64
v32i64 = 63, // 32 x i64
+ v64i64 = 64, // 64 x i64
+ v128i64 = 65, // 128 x i64
+ v256i64 = 66, // 256 x i64
- v1i128 = 64, // 1 x i128
+ v1i128 = 67, // 1 x i128
FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128,
- v2f16 = 65, // 2 x f16
- v3f16 = 66, // 3 x f16
- v4f16 = 67, // 4 x f16
- v8f16 = 68, // 8 x f16
- v16f16 = 69, // 16 x f16
- v32f16 = 70, // 32 x f16
- v64f16 = 71, // 64 x f16
- v128f16 = 72, // 128 x f16
- v2bf16 = 73, // 2 x bf16
- v3bf16 = 74, // 3 x bf16
- v4bf16 = 75, // 4 x bf16
- v8bf16 = 76, // 8 x bf16
- v16bf16 = 77, // 16 x bf16
- v32bf16 = 78, // 32 x bf16
- v64bf16 = 79, // 64 x bf16
- v128bf16 = 80, // 128 x bf16
- v1f32 = 81, // 1 x f32
- v2f32 = 82, // 2 x f32
- v3f32 = 83, // 3 x f32
- v4f32 = 84, // 4 x f32
- v5f32 = 85, // 5 x f32
- v8f32 = 86, // 8 x f32
- v16f32 = 87, // 16 x f32
- v32f32 = 88, // 32 x f32
- v64f32 = 89, // 64 x f32
- v128f32 = 90, // 128 x f32
- v256f32 = 91, // 256 x f32
- v512f32 = 92, // 512 x f32
- v1024f32 = 93, // 1024 x f32
- v2048f32 = 94, // 2048 x f32
- v1f64 = 95, // 1 x f64
- v2f64 = 96, // 2 x f64
- v4f64 = 97, // 4 x f64
- v8f64 = 98, // 8 x f64
- v16f64 = 99, // 16 x f64
- v32f64 = 100, // 32 x f64
+ v2f16 = 68, // 2 x f16
+ v3f16 = 69, // 3 x f16
+ v4f16 = 70, // 4 x f16
+ v8f16 = 71, // 8 x f16
+ v16f16 = 72, // 16 x f16
+ v32f16 = 73, // 32 x f16
+ v64f16 = 74, // 64 x f16
+ v128f16 = 75, // 128 x f16
+ v2bf16 = 76, // 2 x bf16
+ v3bf16 = 77, // 3 x bf16
+ v4bf16 = 78, // 4 x bf16
+ v8bf16 = 79, // 8 x bf16
+ v16bf16 = 80, // 16 x bf16
+ v32bf16 = 81, // 32 x bf16
+ v64bf16 = 82, // 64 x bf16
+ v128bf16 = 83, // 128 x bf16
+ v1f32 = 84, // 1 x f32
+ v2f32 = 85, // 2 x f32
+ v3f32 = 86, // 3 x f32
+ v4f32 = 87, // 4 x f32
+ v5f32 = 88, // 5 x f32
+ v8f32 = 89, // 8 x f32
+ v16f32 = 90, // 16 x f32
+ v32f32 = 91, // 32 x f32
+ v64f32 = 92, // 64 x f32
+ v128f32 = 93, // 128 x f32
+ v256f32 = 94, // 256 x f32
+ v512f32 = 95, // 512 x f32
+ v1024f32 = 96, // 1024 x f32
+ v2048f32 = 97, // 2048 x f32
+ v1f64 = 98, // 1 x f64
+ v2f64 = 99, // 2 x f64
+ v4f64 = 100, // 4 x f64
+ v8f64 = 101, // 8 x f64
+ v16f64 = 102, // 16 x f64
+ v32f64 = 103, // 32 x f64
+ v64f64 = 104, // 64 x f64
+ v128f64 = 105, // 128 x f64
+ v256f64 = 106, // 256 x f64
FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16,
- LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v32f64,
+ LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64,
FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
- LAST_FIXEDLEN_VECTOR_VALUETYPE = v32f64,
-
- nxv1i1 = 101, // n x 1 x i1
- nxv2i1 = 102, // n x 2 x i1
- nxv4i1 = 103, // n x 4 x i1
- nxv8i1 = 104, // n x 8 x i1
- nxv16i1 = 105, // n x 16 x i1
- nxv32i1 = 106, // n x 32 x i1
- nxv64i1 = 107, // n x 64 x i1
-
- nxv1i8 = 108, // n x 1 x i8
- nxv2i8 = 109, // n x 2 x i8
- nxv4i8 = 110, // n x 4 x i8
- nxv8i8 = 111, // n x 8 x i8
- nxv16i8 = 112, // n x 16 x i8
- nxv32i8 = 113, // n x 32 x i8
- nxv64i8 = 114, // n x 64 x i8
-
- nxv1i16 = 115, // n x 1 x i16
- nxv2i16 = 116, // n x 2 x i16
- nxv4i16 = 117, // n x 4 x i16
- nxv8i16 = 118, // n x 8 x i16
- nxv16i16 = 119, // n x 16 x i16
- nxv32i16 = 120, // n x 32 x i16
-
- nxv1i32 = 121, // n x 1 x i32
- nxv2i32 = 122, // n x 2 x i32
- nxv4i32 = 123, // n x 4 x i32
- nxv8i32 = 124, // n x 8 x i32
- nxv16i32 = 125, // n x 16 x i32
- nxv32i32 = 126, // n x 32 x i32
-
- nxv1i64 = 127, // n x 1 x i64
- nxv2i64 = 128, // n x 2 x i64
- nxv4i64 = 129, // n x 4 x i64
- nxv8i64 = 130, // n x 8 x i64
- nxv16i64 = 131, // n x 16 x i64
- nxv32i64 = 132, // n x 32 x i64
+ LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64,
+
+ nxv1i1 = 107, // n x 1 x i1
+ nxv2i1 = 108, // n x 2 x i1
+ nxv4i1 = 109, // n x 4 x i1
+ nxv8i1 = 110, // n x 8 x i1
+ nxv16i1 = 111, // n x 16 x i1
+ nxv32i1 = 112, // n x 32 x i1
+ nxv64i1 = 113, // n x 64 x i1
+
+ nxv1i8 = 114, // n x 1 x i8
+ nxv2i8 = 115, // n x 2 x i8
+ nxv4i8 = 116, // n x 4 x i8
+ nxv8i8 = 117, // n x 8 x i8
+ nxv16i8 = 118, // n x 16 x i8
+ nxv32i8 = 119, // n x 32 x i8
+ nxv64i8 = 120, // n x 64 x i8
+
+ nxv1i16 = 121, // n x 1 x i16
+ nxv2i16 = 122, // n x 2 x i16
+ nxv4i16 = 123, // n x 4 x i16
+ nxv8i16 = 124, // n x 8 x i16
+ nxv16i16 = 125, // n x 16 x i16
+ nxv32i16 = 126, // n x 32 x i16
+
+ nxv1i32 = 127, // n x 1 x i32
+ nxv2i32 = 128, // n x 2 x i32
+ nxv4i32 = 129, // n x 4 x i32
+ nxv8i32 = 130, // n x 8 x i32
+ nxv16i32 = 131, // n x 16 x i32
+ nxv32i32 = 132, // n x 32 x i32
+
+ nxv1i64 = 133, // n x 1 x i64
+ nxv2i64 = 134, // n x 2 x i64
+ nxv4i64 = 135, // n x 4 x i64
+ nxv8i64 = 136, // n x 8 x i64
+ nxv16i64 = 137, // n x 16 x i64
+ nxv32i64 = 138, // n x 32 x i64
FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64,
- nxv1f16 = 133, // n x 1 x f16
- nxv2f16 = 134, // n x 2 x f16
- nxv4f16 = 135, // n x 4 x f16
- nxv8f16 = 136, // n x 8 x f16
- nxv16f16 = 137, // n x 16 x f16
- nxv32f16 = 138, // n x 32 x f16
- nxv2bf16 = 139, // n x 2 x bf16
- nxv4bf16 = 140, // n x 4 x bf16
- nxv8bf16 = 141, // n x 8 x bf16
- nxv1f32 = 142, // n x 1 x f32
- nxv2f32 = 143, // n x 2 x f32
- nxv4f32 = 144, // n x 4 x f32
- nxv8f32 = 145, // n x 8 x f32
- nxv16f32 = 146, // n x 16 x f32
- nxv1f64 = 147, // n x 1 x f64
- nxv2f64 = 148, // n x 2 x f64
- nxv4f64 = 149, // n x 4 x f64
- nxv8f64 = 150, // n x 8 x f64
+ nxv1f16 = 139, // n x 1 x f16
+ nxv2f16 = 140, // n x 2 x f16
+ nxv4f16 = 141, // n x 4 x f16
+ nxv8f16 = 142, // n x 8 x f16
+ nxv16f16 = 143, // n x 16 x f16
+ nxv32f16 = 144, // n x 32 x f16
+ nxv2bf16 = 145, // n x 2 x bf16
+ nxv4bf16 = 146, // n x 4 x bf16
+ nxv8bf16 = 147, // n x 8 x bf16
+ nxv1f32 = 148, // n x 1 x f32
+ nxv2f32 = 149, // n x 2 x f32
+ nxv4f32 = 150, // n x 4 x f32
+ nxv8f32 = 151, // n x 8 x f32
+ nxv16f32 = 152, // n x 16 x f32
+ nxv1f64 = 153, // n x 1 x f64
+ nxv2f64 = 154, // n x 2 x f64
+ nxv4f64 = 155, // n x 4 x f64
+ nxv8f64 = 156, // n x 8 x f64
FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16,
LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
@@ -228,25 +234,27 @@ namespace llvm {
FIRST_VECTOR_VALUETYPE = v1i1,
LAST_VECTOR_VALUETYPE = nxv8f64,
- x86mmx = 151, // This is an X86 MMX value
+ x86mmx = 157, // This is an X86 MMX value
- Glue = 152, // This glues nodes together during pre-RA sched
+ Glue = 158, // This glues nodes together during pre-RA sched
- isVoid = 153, // This has no value
+ isVoid = 159, // This has no value
- Untyped = 154, // This value takes a register, but has
+ Untyped = 160, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
- exnref = 155, // WebAssembly's exnref type
+ funcref = 161, // WebAssembly's funcref type
+ externref = 162, // WebAssembly's externref type
+ x86amx = 163, // This is an X86 AMX value
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
- LAST_VALUETYPE = 156, // This always remains at the end of the list.
+ LAST_VALUETYPE = 164, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
// This value must be a multiple of 32.
- MAX_ALLOWED_VALUETYPE = 160,
+ MAX_ALLOWED_VALUETYPE = 192,
// A value of type llvm::TokenTy
token = 248,
@@ -419,13 +427,43 @@ namespace llvm {
SimpleTy == MVT::iPTRAny);
}
+ /// Return a vector with the same number of elements as this vector, but
+ /// with the element type converted to an integer type with the same
+ /// bitwidth.
+ MVT changeVectorElementTypeToInteger() const {
+ MVT EltTy = getVectorElementType();
+ MVT IntTy = MVT::getIntegerVT(EltTy.getSizeInBits());
+ MVT VecTy = MVT::getVectorVT(IntTy, getVectorElementCount());
+ assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
+ "Simple vector VT not representable by simple integer vector VT!");
+ return VecTy;
+ }
+
+ /// Return a VT for a vector type whose attributes match ourselves
+ /// with the exception of the element type that is chosen by the caller.
+ MVT changeVectorElementType(MVT EltVT) const {
+ MVT VecTy = MVT::getVectorVT(EltVT, getVectorElementCount());
+ assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
+ "Simple vector VT not representable by simple integer vector VT!");
+ return VecTy;
+ }
+
+ /// Return the type converted to an equivalently sized integer or vector
+ /// with integer element type. Similar to changeVectorElementTypeToInteger,
+ /// but also handles scalars.
+ MVT changeTypeToInteger() {
+ if (isVector())
+ return changeVectorElementTypeToInteger();
+ return MVT::getIntegerVT(getSizeInBits());
+ }
+
/// Return a VT for a vector type with the same element type but
/// half the number of elements.
MVT getHalfNumVectorElementsVT() const {
MVT EltVT = getVectorElementType();
auto EltCnt = getVectorElementCount();
- assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
- return getVectorVT(EltVT, EltCnt / 2);
+ assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
+ return getVectorVT(EltVT, EltCnt.divideCoefficientBy(2));
}
/// Returns true if the given vector is a power of 2.
@@ -529,6 +567,9 @@ namespace llvm {
case v8i64:
case v16i64:
case v32i64:
+ case v64i64:
+ case v128i64:
+ case v256i64:
case nxv1i64:
case nxv2i64:
case nxv4i64:
@@ -586,6 +627,9 @@ namespace llvm {
case v8f64:
case v16f64:
case v32f64:
+ case v64f64:
+ case v128f64:
+ case v256f64:
case nxv1f64:
case nxv2f64:
case nxv4f64:
@@ -608,21 +652,27 @@ namespace llvm {
case v256i1:
case v256i8:
case v256i32:
- case v256f32: return 256;
+ case v256i64:
+ case v256f32:
+ case v256f64: return 256;
case v128i1:
case v128i8:
case v128i16:
case v128i32:
+ case v128i64:
case v128f16:
case v128bf16:
- case v128f32: return 128;
+ case v128f32:
+ case v128f64: return 128;
case v64i1:
case v64i8:
case v64i16:
case v64i32:
+ case v64i64:
case v64f16:
case v64bf16:
case v64f32:
+ case v64f64:
case nxv64i1:
case nxv64i8: return 64;
case v32i1:
@@ -737,12 +787,12 @@ namespace llvm {
}
ElementCount getVectorElementCount() const {
- return { getVectorNumElements(), isScalableVector() };
+ return ElementCount::get(getVectorNumElements(), isScalableVector());
}
/// Given a vector type, return the minimum number of elements it contains.
unsigned getVectorMinNumElements() const {
- return getVectorElementCount().Min;
+ return getVectorElementCount().getKnownMinValue();
}
/// Returns the size of the specified MVT in bits.
@@ -910,21 +960,35 @@ namespace llvm {
case v32f64: return TypeSize::Fixed(2048);
case nxv32i64: return TypeSize::Scalable(2048);
case v128i32:
- case v128f32: return TypeSize::Fixed(4096);
+ case v64i64:
+ case v128f32:
+ case v64f64: return TypeSize::Fixed(4096);
case v256i32:
- case v256f32: return TypeSize::Fixed(8192);
+ case v128i64:
+ case v256f32:
+ case x86amx:
+ case v128f64: return TypeSize::Fixed(8192);
case v512i32:
- case v512f32: return TypeSize::Fixed(16384);
+ case v256i64:
+ case v512f32:
+ case v256f64: return TypeSize::Fixed(16384);
case v1024i32:
case v1024f32: return TypeSize::Fixed(32768);
case v2048i32:
case v2048f32: return TypeSize::Fixed(65536);
- case exnref: return TypeSize::Fixed(0); // opaque type
+ case funcref:
+ case externref: return TypeSize::Fixed(0); // opaque type
}
}
- TypeSize getScalarSizeInBits() const {
- return getScalarType().getSizeInBits();
+ /// Return the size of the specified fixed width value type in bits. The
+ /// function will assert if the type is scalable.
+ uint64_t getFixedSizeInBits() const {
+ return getSizeInBits().getFixedSize();
+ }
+
+ uint64_t getScalarSizeInBits() const {
+ return getScalarType().getSizeInBits().getFixedSize();
}
/// Return the number of bytes overwritten by a store of the specified value
@@ -950,28 +1014,56 @@ namespace llvm {
/// Returns true if the number of bits for the type is a multiple of an
/// 8-bit byte.
- bool isByteSized() const {
- return getSizeInBits().isByteSized();
+ bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); }
+
+ /// Return true if we know at compile time this has more bits than VT.
+ bool knownBitsGT(MVT VT) const {
+ return TypeSize::isKnownGT(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has more than or the same
+ /// bits as VT.
+ bool knownBitsGE(MVT VT) const {
+ return TypeSize::isKnownGE(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has fewer bits than VT.
+ bool knownBitsLT(MVT VT) const {
+ return TypeSize::isKnownLT(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has fewer than or the same
+ /// bits as VT.
+ bool knownBitsLE(MVT VT) const {
+ return TypeSize::isKnownLE(getSizeInBits(), VT.getSizeInBits());
}
/// Return true if this has more bits than VT.
bool bitsGT(MVT VT) const {
- return getSizeInBits() > VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsGT(VT);
}
/// Return true if this has no less bits than VT.
bool bitsGE(MVT VT) const {
- return getSizeInBits() >= VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsGE(VT);
}
/// Return true if this has less bits than VT.
bool bitsLT(MVT VT) const {
- return getSizeInBits() < VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsLT(VT);
}
/// Return true if this has no more bits than VT.
bool bitsLE(MVT VT) const {
- return getSizeInBits() <= VT.getSizeInBits();
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsLE(VT);
}
static MVT getFloatingPointVT(unsigned BitWidth) {
@@ -1072,6 +1164,9 @@ namespace llvm {
if (NumElements == 8) return MVT::v8i64;
if (NumElements == 16) return MVT::v16i64;
if (NumElements == 32) return MVT::v32i64;
+ if (NumElements == 64) return MVT::v64i64;
+ if (NumElements == 128) return MVT::v128i64;
+ if (NumElements == 256) return MVT::v256i64;
break;
case MVT::i128:
if (NumElements == 1) return MVT::v1i128;
@@ -1119,6 +1214,9 @@ namespace llvm {
if (NumElements == 8) return MVT::v8f64;
if (NumElements == 16) return MVT::v16f64;
if (NumElements == 32) return MVT::v32f64;
+ if (NumElements == 64) return MVT::v64f64;
+ if (NumElements == 128) return MVT::v128f64;
+ if (NumElements == 256) return MVT::v256f64;
break;
}
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
@@ -1207,9 +1305,9 @@ namespace llvm {
}
static MVT getVectorVT(MVT VT, ElementCount EC) {
- if (EC.Scalable)
- return getScalableVectorVT(VT, EC.Min);
- return getVectorVT(VT, EC.Min);
+ if (EC.isScalable())
+ return getScalableVectorVT(VT, EC.getKnownMinValue());
+ return getVectorVT(VT, EC.getKnownMinValue());
}
/// Return the value type corresponding to the specified type. This returns
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h b/contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h
index 16da3046c8ce..33b9065261e8 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h
@@ -440,7 +440,7 @@ inline uint64_t maxUIntN(uint64_t N) {
inline int64_t minIntN(int64_t N) {
assert(N > 0 && N <= 64 && "integer width out of range");
- return -(UINT64_C(1)<<(N-1));
+ return UINT64_C(1) + ~(UINT64_C(1) << (N - 1));
}
/// Gets the maximum value for a N-bit signed integer.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/MemoryBuffer.h b/contrib/llvm-project/llvm/include/llvm/Support/MemoryBuffer.h
index f47a8d2d334b..9e6ee2536c5e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/MemoryBuffer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/MemoryBuffer.h
@@ -19,14 +19,12 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include <cstddef>
#include <cstdint>
#include <memory>
namespace llvm {
-
-class MemoryBufferRef;
-
namespace sys {
namespace fs {
// Duplicated from FileSystem.h to avoid a dependency.
@@ -260,26 +258,6 @@ private:
using MemoryBuffer::getSTDIN;
};
-class MemoryBufferRef {
- StringRef Buffer;
- StringRef Identifier;
-
-public:
- MemoryBufferRef() = default;
- MemoryBufferRef(const MemoryBuffer& Buffer)
- : Buffer(Buffer.getBuffer()), Identifier(Buffer.getBufferIdentifier()) {}
- MemoryBufferRef(StringRef Buffer, StringRef Identifier)
- : Buffer(Buffer), Identifier(Identifier) {}
-
- StringRef getBuffer() const { return Buffer; }
-
- StringRef getBufferIdentifier() const { return Identifier; }
-
- const char *getBufferStart() const { return Buffer.begin(); }
- const char *getBufferEnd() const { return Buffer.end(); }
- size_t getBufferSize() const { return Buffer.size(); }
-};
-
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MemoryBuffer, LLVMMemoryBufferRef)
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/MemoryBufferRef.h b/contrib/llvm-project/llvm/include/llvm/Support/MemoryBufferRef.h
new file mode 100644
index 000000000000..b38a1f3b6565
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Support/MemoryBufferRef.h
@@ -0,0 +1,56 @@
+//===- MemoryBufferRef.h - Memory Buffer Reference --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MemoryBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MEMORYBUFFERREF_H
+#define LLVM_SUPPORT_MEMORYBUFFERREF_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+
+class MemoryBufferRef {
+ StringRef Buffer;
+ StringRef Identifier;
+
+public:
+ MemoryBufferRef() = default;
+ MemoryBufferRef(const MemoryBuffer &Buffer);
+ MemoryBufferRef(StringRef Buffer, StringRef Identifier)
+ : Buffer(Buffer), Identifier(Identifier) {}
+
+ StringRef getBuffer() const { return Buffer; }
+ StringRef getBufferIdentifier() const { return Identifier; }
+
+ const char *getBufferStart() const { return Buffer.begin(); }
+ const char *getBufferEnd() const { return Buffer.end(); }
+ size_t getBufferSize() const { return Buffer.size(); }
+
+ /// Check pointer identity (not value) of identifier and data.
+ friend bool operator==(const MemoryBufferRef &LHS,
+ const MemoryBufferRef &RHS) {
+ return LHS.Buffer.begin() == RHS.Buffer.begin() &&
+ LHS.Buffer.end() == RHS.Buffer.end() &&
+ LHS.Identifier.begin() == RHS.Identifier.begin() &&
+ LHS.Identifier.end() == RHS.Identifier.end();
+ }
+
+ friend bool operator!=(const MemoryBufferRef &LHS,
+ const MemoryBufferRef &RHS) {
+ return !(LHS == RHS);
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_MEMORYBUFFERREF_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Parallel.h b/contrib/llvm-project/llvm/include/llvm/Support/Parallel.h
index 2c0edfbb1db5..d2f006773836 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Parallel.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Parallel.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Threading.h"
@@ -120,13 +121,17 @@ void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
llvm::Log2_64(std::distance(Start, End)) + 1);
}
+// TaskGroup has a relatively high overhead, so we want to reduce
+// the number of spawn() calls. We'll create up to 1024 tasks here.
+// (Note that 1024 is an arbitrary number. This code probably needs
+// improving to take the number of available cores into account.)
+enum { MaxTasksPerGroup = 1024 };
+
template <class IterTy, class FuncTy>
void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
- // TaskGroup has a relatively high overhead, so we want to reduce
- // the number of spawn() calls. We'll create up to 1024 tasks here.
- // (Note that 1024 is an arbitrary number. This code probably needs
- // improving to take the number of available cores into account.)
- ptrdiff_t TaskSize = std::distance(Begin, End) / 1024;
+ // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
+ // overhead on large inputs.
+ ptrdiff_t TaskSize = std::distance(Begin, End) / MaxTasksPerGroup;
if (TaskSize == 0)
TaskSize = 1;
@@ -140,7 +145,9 @@ void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
template <class IndexTy, class FuncTy>
void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
- ptrdiff_t TaskSize = (End - Begin) / 1024;
+ // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
+ // overhead on large inputs.
+ ptrdiff_t TaskSize = (End - Begin) / MaxTasksPerGroup;
if (TaskSize == 0)
TaskSize = 1;
@@ -156,6 +163,50 @@ void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
Fn(J);
}
+template <class IterTy, class ResultTy, class ReduceFuncTy,
+ class TransformFuncTy>
+ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init,
+ ReduceFuncTy Reduce,
+ TransformFuncTy Transform) {
+ // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
+ // overhead on large inputs.
+ size_t NumInputs = std::distance(Begin, End);
+ if (NumInputs == 0)
+ return std::move(Init);
+ size_t NumTasks = std::min(static_cast<size_t>(MaxTasksPerGroup), NumInputs);
+ std::vector<ResultTy> Results(NumTasks, Init);
+ {
+ // Each task processes either TaskSize or TaskSize+1 inputs. Any inputs
+ // remaining after dividing them equally amongst tasks are distributed as
+ // one extra input over the first tasks.
+ TaskGroup TG;
+ size_t TaskSize = NumInputs / NumTasks;
+ size_t RemainingInputs = NumInputs % NumTasks;
+ IterTy TBegin = Begin;
+ for (size_t TaskId = 0; TaskId < NumTasks; ++TaskId) {
+ IterTy TEnd = TBegin + TaskSize + (TaskId < RemainingInputs ? 1 : 0);
+ TG.spawn([=, &Transform, &Reduce, &Results] {
+ // Reduce the result of transformation eagerly within each task.
+ ResultTy R = Init;
+ for (IterTy It = TBegin; It != TEnd; ++It)
+ R = Reduce(R, Transform(*It));
+ Results[TaskId] = R;
+ });
+ TBegin = TEnd;
+ }
+ assert(TBegin == End);
+ }
+
+ // Do a final reduction. There are at most 1024 tasks, so this only adds
+ // constant single-threaded overhead for large inputs. Hopefully most
+ // reductions are cheaper than the transformation.
+ ResultTy FinalResult = std::move(Results.front());
+ for (ResultTy &PartialResult :
+ makeMutableArrayRef(Results.data() + 1, Results.size() - 1))
+ FinalResult = Reduce(FinalResult, std::move(PartialResult));
+ return std::move(FinalResult);
+}
+
#endif
} // namespace detail
@@ -198,6 +249,22 @@ void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) {
Fn(I);
}
+template <class IterTy, class ResultTy, class ReduceFuncTy,
+ class TransformFuncTy>
+ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init,
+ ReduceFuncTy Reduce,
+ TransformFuncTy Transform) {
+#if LLVM_ENABLE_THREADS
+ if (parallel::strategy.ThreadsRequested != 1) {
+ return parallel::detail::parallel_transform_reduce(Begin, End, Init, Reduce,
+ Transform);
+ }
+#endif
+ for (IterTy I = Begin; I != End; ++I)
+ Init = Reduce(std::move(Init), Transform(*I));
+ return std::move(Init);
+}
+
// Range wrappers.
template <class RangeTy,
class Comparator = std::less<decltype(*std::begin(RangeTy()))>>
@@ -210,6 +277,31 @@ void parallelForEach(RangeTy &&R, FuncTy Fn) {
parallelForEach(std::begin(R), std::end(R), Fn);
}
+template <class RangeTy, class ResultTy, class ReduceFuncTy,
+ class TransformFuncTy>
+ResultTy parallelTransformReduce(RangeTy &&R, ResultTy Init,
+ ReduceFuncTy Reduce,
+ TransformFuncTy Transform) {
+ return parallelTransformReduce(std::begin(R), std::end(R), Init, Reduce,
+ Transform);
+}
+
+// Parallel for-each, but with error handling.
+template <class RangeTy, class FuncTy>
+Error parallelForEachError(RangeTy &&R, FuncTy Fn) {
+ // The transform_reduce algorithm requires that the initial value be copyable.
+ // Error objects are uncopyable. We only need to copy initial success values,
+ // so work around this mismatch via the C API. The C API represents success
+ // values with a null pointer. The joinErrors discards null values and joins
+ // multiple errors into an ErrorList.
+ return unwrap(parallelTransformReduce(
+ std::begin(R), std::end(R), wrap(Error::success()),
+ [](LLVMErrorRef Lhs, LLVMErrorRef Rhs) {
+ return wrap(joinErrors(unwrap(Lhs), unwrap(Rhs)));
+ },
+ [&Fn](auto &&V) { return wrap(Fn(V)); }));
+}
+
} // namespace llvm
#endif // LLVM_SUPPORT_PARALLEL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Path.h b/contrib/llvm-project/llvm/include/llvm/Support/Path.h
index 83bca5b70bc2..af70e086a1b6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Path.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Path.h
@@ -451,10 +451,48 @@ bool has_extension(const Twine &path, Style style = Style::native);
/// Is path absolute?
///
+/// According to cppreference.com, C++17 states: "An absolute path is a path
+/// that unambiguously identifies the location of a file without reference to
+/// an additional starting location."
+///
+/// In other words, the rules are:
+/// 1) POSIX style paths with nonempty root directory are absolute.
+/// 2) Windows style paths with nonempty root name and root directory are
+/// absolute.
+/// 3) No other paths are absolute.
+///
+/// \see has_root_name
+/// \see has_root_directory
+///
/// @param path Input path.
/// @result True if the path is absolute, false if it is not.
bool is_absolute(const Twine &path, Style style = Style::native);
+/// Is path absolute using GNU rules?
+///
+/// GNU rules are:
+/// 1) Paths starting with a path separator are absolute.
+/// 2) Windows style paths are also absolute if they start with a character
+/// followed by ':'.
+/// 3) No other paths are absolute.
+///
+/// On Windows style the path "C:\Users\Default" has "C:" as root name and "\"
+/// as root directory.
+///
+/// Hence "C:" on Windows is absolute under GNU rules and not absolute under
+/// C++17 because it has no root directory. Likewise "/" and "\" on Windows are
+/// absolute under GNU and are not absolute under C++17 due to empty root name.
+///
+/// \see has_root_name
+/// \see has_root_directory
+///
+/// @param path Input path.
+/// @param style The style of \p path (e.g. Windows or POSIX). "native" style
+/// means to derive the style from the host.
+/// @result True if the path is absolute following GNU rules, false if it is
+/// not.
+bool is_absolute_gnu(const Twine &path, Style style = Style::native);
+
/// Is path relative?
///
/// @param path Input path.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/PluginLoader.h b/contrib/llvm-project/llvm/include/llvm/Support/PluginLoader.h
index c0c516bdae03..95c087f03d9b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/PluginLoader.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/PluginLoader.h
@@ -16,7 +16,11 @@
#ifndef LLVM_SUPPORT_PLUGINLOADER_H
#define LLVM_SUPPORT_PLUGINLOADER_H
+#ifndef DONT_GET_PLUGIN_LOADER_OPTION
#include "llvm/Support/CommandLine.h"
+#endif
+
+#include <string>
namespace llvm {
struct PluginLoader {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Process.h b/contrib/llvm-project/llvm/include/llvm/Support/Process.h
index 0ba6d58ba287..729917bb41f4 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Process.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Process.h
@@ -29,6 +29,7 @@
#include "llvm/Support/Chrono.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Program.h"
#include <system_error>
namespace llvm {
@@ -107,10 +108,12 @@ public:
/// considered.
static Optional<std::string> FindInEnvPath(StringRef EnvName,
StringRef FileName,
- ArrayRef<std::string> IgnoreList);
+ ArrayRef<std::string> IgnoreList,
+ char Separator = EnvPathSeparator);
static Optional<std::string> FindInEnvPath(StringRef EnvName,
- StringRef FileName);
+ StringRef FileName,
+ char Separator = EnvPathSeparator);
// This functions ensures that the standard file descriptors (input, output,
// and error) are properly mapped to a file descriptor before we use any of
@@ -210,8 +213,9 @@ public:
/// Equivalent to ::exit(), except when running inside a CrashRecoveryContext.
/// In that case, the control flow will resume after RunSafely(), like for a
/// crash, rather than exiting the current process.
+ /// Use \arg NoCleanup for calling _exit() instead of exit().
LLVM_ATTRIBUTE_NORETURN
- static void Exit(int RetCode);
+ static void Exit(int RetCode, bool NoCleanup = false);
};
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Program.h b/contrib/llvm-project/llvm/include/llvm/Support/Program.h
index dbda064cda05..bfd271958788 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Program.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Program.h
@@ -14,6 +14,7 @@
#define LLVM_SUPPORT_PROGRAM_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
@@ -36,7 +37,7 @@ namespace sys {
typedef unsigned long procid_t; // Must match the type of DWORD on Windows.
typedef void *process_t; // Must match the type of HANDLE on Windows.
#else
- typedef pid_t procid_t;
+ typedef ::pid_t procid_t;
typedef procid_t process_t;
#endif
@@ -125,9 +126,11 @@ namespace sys {
///< string is non-empty upon return an error occurred while invoking the
///< program.
bool *ExecutionFailed = nullptr,
- Optional<ProcessStatistics> *ProcStat = nullptr ///< If non-zero, provides
- /// a pointer to a structure in which process execution statistics will be
- /// stored.
+ Optional<ProcessStatistics> *ProcStat = nullptr, ///< If non-zero,
+ /// provides a pointer to a structure in which process execution
+ /// statistics will be stored.
+ BitVector *AffinityMask = nullptr ///< CPUs or processors the new
+ /// program shall run on.
);
/// Similar to ExecuteAndWait, but returns immediately.
@@ -140,7 +143,8 @@ namespace sys {
ArrayRef<Optional<StringRef>> Redirects = {},
unsigned MemoryLimit = 0,
std::string *ErrMsg = nullptr,
- bool *ExecutionFailed = nullptr);
+ bool *ExecutionFailed = nullptr,
+ BitVector *AffinityMask = nullptr);
/// Return true if the given arguments fit within system-specific
/// argument length limits.
@@ -218,7 +222,7 @@ namespace sys {
/// to build a single flat command line appropriate for calling CreateProcess
/// on
/// Windows.
- std::string flattenWindowsCommandLine(ArrayRef<StringRef> Args);
+ ErrorOr<std::wstring> flattenWindowsCommandLine(ArrayRef<StringRef> Args);
#endif
}
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/RISCVTargetParser.def b/contrib/llvm-project/llvm/include/llvm/Support/RISCVTargetParser.def
index 28de6cd40132..6a06f9258105 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/RISCVTargetParser.def
+++ b/contrib/llvm-project/llvm/include/llvm/Support/RISCVTargetParser.def
@@ -1,3 +1,13 @@
+#ifndef PROC_ALIAS
+#define PROC_ALIAS(NAME, RV32, RV64)
+#endif
+
+PROC_ALIAS("generic", "generic-rv32", "generic-rv64")
+PROC_ALIAS("rocket", "rocket-rv32", "rocket-rv64")
+PROC_ALIAS("sifive-7-series", "sifive-7-rv32", "sifive-7-rv64")
+
+#undef PROC_ALIAS
+
#ifndef PROC
#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH)
#endif
@@ -7,7 +17,11 @@ PROC(GENERIC_RV32, {"generic-rv32"}, FK_NONE, {""})
PROC(GENERIC_RV64, {"generic-rv64"}, FK_64BIT, {""})
PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""})
PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""})
+PROC(SIFIVE_732, {"sifive-7-rv32"}, FK_NONE, {""})
+PROC(SIFIVE_764, {"sifive-7-rv64"}, FK_64BIT, {""})
PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"})
PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_E76, {"sifive-e76"}, FK_NONE, {"rv32imafc"})
+PROC(SIFIVE_U74, {"sifive-u74"}, FK_64BIT, {"rv64gc"})
#undef PROC
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Signals.h b/contrib/llvm-project/llvm/include/llvm/Support/Signals.h
index e0a18e72f2a7..44f5a750ff5c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Signals.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Signals.h
@@ -50,7 +50,9 @@ namespace sys {
void DisableSystemDialogsOnCrash();
/// Print the stack trace using the given \c raw_ostream object.
- void PrintStackTrace(raw_ostream &OS);
+ /// \param Depth refers to the number of stackframes to print. If not
+ /// specified, the entire frame is printed.
+ void PrintStackTrace(raw_ostream &OS, int Depth = 0);
// Run all registered signal handlers.
void RunSignalHandlers();
@@ -115,6 +117,8 @@ namespace sys {
/// Context is a system-specific failure context: it is the signal type on
/// Unix; the ExceptionContext on Windows.
void CleanupOnSignal(uintptr_t Context);
+
+ void unregisterHandlers();
} // End sys namespace
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Signposts.h b/contrib/llvm-project/llvm/include/llvm/Support/Signposts.h
index b5a8c3d61e3e..8036b1f53663 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Signposts.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Signposts.h
@@ -17,9 +17,10 @@
#ifndef LLVM_SUPPORT_SIGNPOSTS_H
#define LLVM_SUPPORT_SIGNPOSTS_H
+#include "llvm/ADT/StringRef.h"
+
namespace llvm {
class SignpostEmitterImpl;
-class Timer;
/// Manages the emission of signposts into the recording method supported by
/// the OS.
@@ -32,10 +33,10 @@ public:
bool isEnabled() const;
- /// Begin a signposted interval for the given timer.
- void startTimerInterval(Timer *T);
- /// End a signposted interval for the given timer.
- void endTimerInterval(Timer *T);
+ /// Begin a signposted interval for a given object.
+ void startInterval(const void *O, StringRef Name);
+ /// End a signposted interval for a given object.
+ void endInterval(const void *O, StringRef Name);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h b/contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h
index a0bd3ca2e0c1..28716b42f4ab 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h
@@ -172,6 +172,11 @@ public:
std::pair<unsigned, unsigned> getLineAndColumn(SMLoc Loc,
unsigned BufferID = 0) const;
+ /// Get a string with the \p SMLoc filename and line number
+ /// formatted in the standard style.
+ std::string getFormattedLocationNoOffset(SMLoc Loc,
+ bool IncludePath = false) const;
+
/// Given a line and column number in a mapped buffer, turn it into an SMLoc.
/// This will return a null SMLoc if the line/column location is invalid.
SMLoc FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo,
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/SuffixTree.h b/contrib/llvm-project/llvm/include/llvm/Support/SuffixTree.h
index 67d513d032ce..352fba511937 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/SuffixTree.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/SuffixTree.h
@@ -322,10 +322,10 @@ public:
return It;
}
- bool operator==(const RepeatedSubstringIterator &Other) {
+ bool operator==(const RepeatedSubstringIterator &Other) const {
return N == Other.N;
}
- bool operator!=(const RepeatedSubstringIterator &Other) {
+ bool operator!=(const RepeatedSubstringIterator &Other) const {
return !(*this == Other);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/SwapByteOrder.h b/contrib/llvm-project/llvm/include/llvm/Support/SwapByteOrder.h
index 0e544fc7e71e..e8612ba6654b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/SwapByteOrder.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/SwapByteOrder.h
@@ -22,7 +22,7 @@
#endif
#if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) || \
- defined(__EMSCRIPTEN__)
+ defined(__Fuchsia__) || defined(__EMSCRIPTEN__)
#include <endian.h>
#elif defined(_AIX)
#include <sys/machine.h>
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/SymbolRemappingReader.h b/contrib/llvm-project/llvm/include/llvm/Support/SymbolRemappingReader.h
index 2b9ab570eb8b..820cf9e02192 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/SymbolRemappingReader.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/SymbolRemappingReader.h
@@ -68,7 +68,7 @@ namespace llvm {
class SymbolRemappingParseError : public ErrorInfo<SymbolRemappingParseError> {
public:
- SymbolRemappingParseError(StringRef File, int64_t Line, Twine Message)
+ SymbolRemappingParseError(StringRef File, int64_t Line, const Twine &Message)
: File(File), Line(Line), Message(Message.str()) {}
void log(llvm::raw_ostream &OS) const override {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def b/contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def
index c069f5d22ba8..a63d40484089 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def
@@ -77,6 +77,10 @@ HANDLE_TARGET_OPCODE(SUBREG_TO_REG)
/// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic
HANDLE_TARGET_OPCODE(DBG_VALUE)
+/// DBG_INSTR_REF - A mapping of llvm.dbg.value referring to the instruction
+/// that defines the value, rather than a virtual register.
+HANDLE_TARGET_OPCODE(DBG_INSTR_REF)
+
/// DBG_LABEL - a mapping of the llvm.dbg.label intrinsic
HANDLE_TARGET_OPCODE(DBG_LABEL)
@@ -106,6 +110,9 @@ HANDLE_TARGET_OPCODE(BUNDLE)
HANDLE_TARGET_OPCODE(LIFETIME_START)
HANDLE_TARGET_OPCODE(LIFETIME_END)
+/// Pseudo probe
+HANDLE_TARGET_OPCODE(PSEUDO_PROBE)
+
/// A Stackmap instruction captures the location of live variables at its
/// position in the instruction stream. It is followed by a shadow of bytes
/// that must lie within the function and not contain another stackmap.
@@ -294,6 +301,12 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC)
/// INTRINSIC round intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUND)
+/// INTRINSIC round to integer intrinsic.
+HANDLE_TARGET_OPCODE(G_INTRINSIC_LRINT)
+
+/// INTRINSIC roundeven intrinsic.
+HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN)
+
/// INTRINSIC readcyclecounter
HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER)
@@ -469,6 +482,36 @@ HANDLE_TARGET_OPCODE(G_USUBSAT)
/// Generic saturating signed subtraction.
HANDLE_TARGET_OPCODE(G_SSUBSAT)
+/// Generic saturating unsigned left shift.
+HANDLE_TARGET_OPCODE(G_USHLSAT)
+
+/// Generic saturating signed left shift.
+HANDLE_TARGET_OPCODE(G_SSHLSAT)
+
+// Perform signed fixed point multiplication
+HANDLE_TARGET_OPCODE(G_SMULFIX)
+
+// Perform unsigned fixed point multiplication
+HANDLE_TARGET_OPCODE(G_UMULFIX)
+
+// Perform signed, saturating fixed point multiplication
+HANDLE_TARGET_OPCODE(G_SMULFIXSAT)
+
+// Perform unsigned, saturating fixed point multiplication
+HANDLE_TARGET_OPCODE(G_UMULFIXSAT)
+
+// Perform signed fixed point division
+HANDLE_TARGET_OPCODE(G_SDIVFIX)
+
+// Perform unsigned fixed point division
+HANDLE_TARGET_OPCODE(G_UDIVFIX)
+
+// Perform signed, saturating fixed point division
+HANDLE_TARGET_OPCODE(G_SDIVFIXSAT)
+
+// Perform unsigned, saturating fixed point division
+HANDLE_TARGET_OPCODE(G_UDIVFIXSAT)
+
/// Generic FP addition.
HANDLE_TARGET_OPCODE(G_FADD)
@@ -493,6 +536,9 @@ HANDLE_TARGET_OPCODE(G_FREM)
/// Generic FP exponentiation.
HANDLE_TARGET_OPCODE(G_FPOW)
+/// Generic FP exponentiation, with an integer exponent.
+HANDLE_TARGET_OPCODE(G_FPOWI)
+
/// Generic base-e exponential of a value.
HANDLE_TARGET_OPCODE(G_FEXP)
@@ -571,6 +617,9 @@ HANDLE_TARGET_OPCODE(G_UMIN)
/// Generic unsigned integer maximum.
HANDLE_TARGET_OPCODE(G_UMAX)
+/// Generic integer absolute value.
+HANDLE_TARGET_OPCODE(G_ABS)
+
/// Generic BRANCH instruction. This is an unconditional branch.
HANDLE_TARGET_OPCODE(G_BR)
@@ -655,10 +704,36 @@ HANDLE_TARGET_OPCODE(G_READ_REGISTER)
/// write_register intrinsic
HANDLE_TARGET_OPCODE(G_WRITE_REGISTER)
+/// llvm.memcpy intrinsic
+HANDLE_TARGET_OPCODE(G_MEMCPY)
+
+/// llvm.memmove intrinsic
+HANDLE_TARGET_OPCODE(G_MEMMOVE)
+
+/// llvm.memset intrinsic
+HANDLE_TARGET_OPCODE(G_MEMSET)
+
+/// Vector reductions
+HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FADD)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FMUL)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_FADD)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_FMUL)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAX)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_FMIN)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_ADD)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_MUL)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_AND)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_OR)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_XOR)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_SMAX)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_SMIN)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_UMAX)
+HANDLE_TARGET_OPCODE(G_VECREDUCE_UMIN)
+
/// Marker for the end of the generic opcode.
/// This is used to check if an opcode is in the range of the
/// generic opcodes.
-HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_WRITE_REGISTER)
+HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_VECREDUCE_UMIN)
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific post-isel opcode values start here.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TargetParser.h b/contrib/llvm-project/llvm/include/llvm/Support/TargetParser.h
index f521d8f836b4..450e713f27f2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TargetParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TargetParser.h
@@ -62,17 +62,20 @@ enum GPUKind : uint32_t {
// AMDGCN-based processors.
GK_GFX600 = 32,
GK_GFX601 = 33,
+ GK_GFX602 = 34,
GK_GFX700 = 40,
GK_GFX701 = 41,
GK_GFX702 = 42,
GK_GFX703 = 43,
GK_GFX704 = 44,
+ GK_GFX705 = 45,
GK_GFX801 = 50,
GK_GFX802 = 51,
GK_GFX803 = 52,
- GK_GFX810 = 53,
+ GK_GFX805 = 53,
+ GK_GFX810 = 54,
GK_GFX900 = 60,
GK_GFX902 = 61,
@@ -80,14 +83,18 @@ enum GPUKind : uint32_t {
GK_GFX906 = 63,
GK_GFX908 = 64,
GK_GFX909 = 65,
+ GK_GFX90C = 66,
GK_GFX1010 = 71,
GK_GFX1011 = 72,
GK_GFX1012 = 73,
GK_GFX1030 = 75,
+ GK_GFX1031 = 76,
+ GK_GFX1032 = 77,
+ GK_GFX1033 = 78,
GK_AMDGCN_FIRST = GK_GFX600,
- GK_AMDGCN_LAST = GK_GFX1030,
+ GK_AMDGCN_LAST = GK_GFX1033,
};
/// Instruction set architecture version.
@@ -112,12 +119,18 @@ enum ArchFeatureKind : uint32_t {
FEATURE_FAST_DENORMAL_F32 = 1 << 5,
// Wavefront 32 is available.
- FEATURE_WAVE32 = 1 << 6
+ FEATURE_WAVE32 = 1 << 6,
+
+ // Xnack is available.
+ FEATURE_XNACK = 1 << 7,
+
+ // Sram-ecc is available.
+ FEATURE_SRAMECC = 1 << 8,
};
StringRef getArchNameAMDGCN(GPUKind AK);
StringRef getArchNameR600(GPUKind AK);
-StringRef getCanonicalArchName(StringRef Arch);
+StringRef getCanonicalArchName(const Triple &T, StringRef Arch);
GPUKind parseArchAMDGCN(StringRef CPU);
GPUKind parseArchR600(StringRef CPU);
unsigned getArchAttrAMDGCN(GPUKind AK);
@@ -149,10 +162,14 @@ enum FeatureKind : unsigned {
};
bool checkCPUKind(CPUKind Kind, bool IsRV64);
+bool checkTuneCPUKind(CPUKind Kind, bool IsRV64);
CPUKind parseCPUKind(StringRef CPU);
+CPUKind parseTuneCPUKind(StringRef CPU, bool IsRV64);
StringRef getMArchFromMcpu(StringRef CPU);
void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64);
+void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64);
bool getCPUFeaturesExceptStdExt(CPUKind Kind, std::vector<StringRef> &Features);
+StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64);
} // namespace RISCV
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h b/contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h
index d91eabae8235..2c65eb60f910 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h
@@ -510,6 +510,8 @@ public:
S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW),
std::move(Emitter), RelaxAll);
break;
+ case Triple::GOFF:
+ report_fatal_error("GOFF MCObjectStreamer not implemented yet");
case Triple::XCOFF:
S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
std::move(Emitter), RelaxAll);
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TaskQueue.h b/contrib/llvm-project/llvm/include/llvm/Support/TaskQueue.h
index 4ceb056391af..6901a550b62f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TaskQueue.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TaskQueue.h
@@ -98,7 +98,7 @@ public:
IsTaskInFlight = true;
}
}
- return std::move(F);
+ return F;
}
private:
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Threading.h b/contrib/llvm-project/llvm/include/llvm/Support/Threading.h
index 13000575f270..46cf82524e57 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Threading.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Threading.h
@@ -210,7 +210,7 @@ void llvm_execute_on_thread_async(
return heavyweight_hardware_concurrency();
}
- /// Returns a default thread strategy where all available hardware ressources
+ /// Returns a default thread strategy where all available hardware resources
/// are to be used, except for those initially excluded by an affinity mask.
/// This function takes affinity into consideration. Returns 1 when LLVM is
/// configured with LLVM_ENABLE_THREADS=OFF.
@@ -220,6 +220,16 @@ void llvm_execute_on_thread_async(
return S;
}
+ /// Returns an optimal thread strategy to execute specified amount of tasks.
+ /// This strategy should prevent us from creating too many threads if we
+ /// occasionaly have an unexpectedly small amount of tasks.
+ inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
+ ThreadPoolStrategy S;
+ S.Limit = true;
+ S.ThreadsRequested = TaskCount;
+ return S;
+ }
+
/// Return the current thread id, as used in various OS system calls.
/// Note that not all platforms guarantee that the value returned will be
/// unique across the entire system, so portable code should not assume
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ToolOutputFile.h b/contrib/llvm-project/llvm/include/llvm/Support/ToolOutputFile.h
index cf01b9ecefc5..ec1d6ae52268 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/ToolOutputFile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/ToolOutputFile.h
@@ -35,6 +35,7 @@ class ToolOutputFile {
/// The flag which indicates whether we should not delete the file.
bool Keep;
+ StringRef getFilename() { return Filename; }
explicit CleanupInstaller(StringRef Filename);
~CleanupInstaller();
} Installer;
@@ -57,6 +58,9 @@ public:
/// Return the contained raw_fd_ostream.
raw_fd_ostream &os() { return *OS; }
+ /// Return the filename initialized with.
+ StringRef getFilename() { return Installer.getFilename(); }
+
/// Indicate that the tool's job wrt this output file has been successful and
/// the file should not be deleted.
void keep() { Installer.Keep = true; }
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TrigramIndex.h b/contrib/llvm-project/llvm/include/llvm/Support/TrigramIndex.h
index d635694eb5fd..0be6a1012718 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TrigramIndex.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TrigramIndex.h
@@ -27,7 +27,7 @@
#define LLVM_SUPPORT_TRIGRAMINDEX_H
#include "llvm/ADT/SmallVector.h"
-
+#include "llvm/ADT/StringRef.h"
#include <string>
#include <unordered_map>
#include <vector>
@@ -38,7 +38,7 @@ class StringRef;
class TrigramIndex {
public:
/// Inserts a new Regex into the index.
- void insert(std::string Regex);
+ void insert(const std::string &Regex);
/// Returns true, if special case list definitely does not have a line
/// that matches the query. Returns false, if it's not sure.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h b/contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h
index 76564c401e8e..d277affdbb23 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h
@@ -15,152 +15,417 @@
#ifndef LLVM_SUPPORT_TYPESIZE_H
#define LLVM_SUPPORT_TYPESIZE_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/WithColor.h"
-#include <cstdint>
+#include <algorithm>
+#include <array>
#include <cassert>
+#include <cstdint>
+#include <type_traits>
namespace llvm {
-template <typename T> struct DenseMapInfo;
+template <typename LeafTy> struct LinearPolyBaseTypeTraits {};
+
+//===----------------------------------------------------------------------===//
+// LinearPolyBase - a base class for linear polynomials with multiple
+// dimensions. This can e.g. be used to describe offsets that are have both a
+// fixed and scalable component.
+//===----------------------------------------------------------------------===//
+
+/// LinearPolyBase describes a linear polynomial:
+/// c0 * scale0 + c1 * scale1 + ... + cK * scaleK
+/// where the scale is implicit, so only the coefficients are encoded.
+template <typename LeafTy>
+class LinearPolyBase {
+public:
+ using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy;
+ static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions;
+ static_assert(Dimensions != std::numeric_limits<unsigned>::max(),
+ "Dimensions out of range");
+
+private:
+ std::array<ScalarTy, Dimensions> Coefficients;
+
+protected:
+ LinearPolyBase(ArrayRef<ScalarTy> Values) {
+ std::copy(Values.begin(), Values.end(), Coefficients.begin());
+ }
-class ElementCount {
public:
- unsigned Min; // Minimum number of vector elements.
- bool Scalable; // If true, NumElements is a multiple of 'Min' determined
- // at runtime rather than compile time.
+ friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) {
+ for (unsigned I=0; I<Dimensions; ++I)
+ LHS.Coefficients[I] += RHS.Coefficients[I];
+ return LHS;
+ }
- ElementCount() = default;
+ friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) {
+ for (unsigned I=0; I<Dimensions; ++I)
+ LHS.Coefficients[I] -= RHS.Coefficients[I];
+ return LHS;
+ }
- ElementCount(unsigned Min, bool Scalable)
- : Min(Min), Scalable(Scalable) {}
+ friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) {
+ for (auto &C : LHS.Coefficients)
+ C *= RHS;
+ return LHS;
+ }
- ElementCount operator*(unsigned RHS) {
- return { Min * RHS, Scalable };
+ friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) {
+ LeafTy Copy = LHS;
+ return Copy += RHS;
}
- ElementCount operator/(unsigned RHS) {
- assert(Min % RHS == 0 && "Min is not a multiple of RHS.");
- return { Min / RHS, Scalable };
+
+ friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) {
+ LeafTy Copy = LHS;
+ return Copy -= RHS;
}
- bool operator==(const ElementCount& RHS) const {
- return Min == RHS.Min && Scalable == RHS.Scalable;
+ friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) {
+ LeafTy Copy = LHS;
+ return Copy *= RHS;
}
- bool operator!=(const ElementCount& RHS) const {
+
+ template <typename U = ScalarTy>
+ friend typename std::enable_if_t<std::is_signed<U>::value, LeafTy>
+ operator-(const LeafTy &LHS) {
+ LeafTy Copy = LHS;
+ return Copy *= -1;
+ }
+
+ bool operator==(const LinearPolyBase &RHS) const {
+ return std::equal(Coefficients.begin(), Coefficients.end(),
+ RHS.Coefficients.begin());
+ }
+
+ bool operator!=(const LinearPolyBase &RHS) const {
return !(*this == RHS);
}
- bool operator==(unsigned RHS) const { return Min == RHS && !Scalable; }
- bool operator!=(unsigned RHS) const { return !(*this == RHS); }
- ElementCount NextPowerOf2() const {
- return ElementCount(llvm::NextPowerOf2(Min), Scalable);
+ bool isZero() const {
+ return all_of(Coefficients, [](const ScalarTy &C) { return C == 0; });
}
+ bool isNonZero() const { return !isZero(); }
+ explicit operator bool() const { return isNonZero(); }
+
+ ScalarTy getValue(unsigned Dim) const { return Coefficients[Dim]; }
};
-// This class is used to represent the size of types. If the type is of fixed
-// size, it will represent the exact size. If the type is a scalable vector,
-// it will represent the known minimum size.
-class TypeSize {
- uint64_t MinSize; // The known minimum size.
- bool IsScalable; // If true, then the runtime size is an integer multiple
- // of MinSize.
+//===----------------------------------------------------------------------===//
+// StackOffset - Represent an offset with named fixed and scalable components.
+//===----------------------------------------------------------------------===//
+
+class StackOffset;
+template <> struct LinearPolyBaseTypeTraits<StackOffset> {
+ using ScalarTy = int64_t;
+ static constexpr unsigned Dimensions = 2;
+};
+
+/// StackOffset is a class to represent an offset with 2 dimensions,
+/// named fixed and scalable, respectively. This class allows a value for both
+/// dimensions to depict e.g. "8 bytes and 16 scalable bytes", which is needed
+/// to represent stack offsets.
+class StackOffset : public LinearPolyBase<StackOffset> {
+protected:
+ StackOffset(ScalarTy Fixed, ScalarTy Scalable)
+ : LinearPolyBase<StackOffset>({Fixed, Scalable}) {}
+
+public:
+ StackOffset() : StackOffset({0, 0}) {}
+ StackOffset(const LinearPolyBase<StackOffset> &Other)
+ : LinearPolyBase<StackOffset>(Other) {}
+ static StackOffset getFixed(ScalarTy Fixed) { return {Fixed, 0}; }
+ static StackOffset getScalable(ScalarTy Scalable) { return {0, Scalable}; }
+ static StackOffset get(ScalarTy Fixed, ScalarTy Scalable) {
+ return {Fixed, Scalable};
+ }
+
+ ScalarTy getFixed() const { return this->getValue(0); }
+ ScalarTy getScalable() const { return this->getValue(1); }
+};
+
+//===----------------------------------------------------------------------===//
+// UnivariateLinearPolyBase - a base class for linear polynomials with multiple
+// dimensions, but where only one dimension can be set at any time.
+// This can e.g. be used to describe sizes that are either fixed or scalable.
+//===----------------------------------------------------------------------===//
+/// UnivariateLinearPolyBase is a base class for ElementCount and TypeSize.
+/// Like LinearPolyBase it tries to represent a linear polynomial
+/// where only one dimension can be set at any time, e.g.
+/// 0 * scale0 + 0 * scale1 + ... + cJ * scaleJ + ... + 0 * scaleK
+/// The dimension that is set is the univariate dimension.
+template <typename LeafTy>
+class UnivariateLinearPolyBase {
public:
- constexpr TypeSize(uint64_t MinSize, bool Scalable)
- : MinSize(MinSize), IsScalable(Scalable) {}
+ using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy;
+ static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions;
+ static_assert(Dimensions != std::numeric_limits<unsigned>::max(),
+ "Dimensions out of range");
+
+protected:
+ ScalarTy Value; // The value at the univeriate dimension.
+ unsigned UnivariateDim; // The univeriate dimension.
+
+ UnivariateLinearPolyBase(ScalarTy Val, unsigned UnivariateDim)
+ : Value(Val), UnivariateDim(UnivariateDim) {
+ assert(UnivariateDim < Dimensions && "Dimension out of range");
+ }
+
+ friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) {
+ assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions");
+ LHS.Value += RHS.Value;
+ return LHS;
+ }
+
+ friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) {
+ assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions");
+ LHS.Value -= RHS.Value;
+ return LHS;
+ }
+
+ friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) {
+ LHS.Value *= RHS;
+ return LHS;
+ }
- static constexpr TypeSize Fixed(uint64_t Size) {
- return TypeSize(Size, /*IsScalable=*/false);
+ friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) {
+ LeafTy Copy = LHS;
+ return Copy += RHS;
}
- static constexpr TypeSize Scalable(uint64_t MinSize) {
- return TypeSize(MinSize, /*IsScalable=*/true);
+ friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) {
+ LeafTy Copy = LHS;
+ return Copy -= RHS;
}
- // Scalable vector types with the same minimum size as a fixed size type are
- // not guaranteed to be the same size at runtime, so they are never
- // considered to be equal.
- friend bool operator==(const TypeSize &LHS, const TypeSize &RHS) {
- return LHS.MinSize == RHS.MinSize && LHS.IsScalable == RHS.IsScalable;
+ friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) {
+ LeafTy Copy = LHS;
+ return Copy *= RHS;
}
- friend bool operator!=(const TypeSize &LHS, const TypeSize &RHS) {
- return !(LHS == RHS);
+ template <typename U = ScalarTy>
+ friend typename std::enable_if<std::is_signed<U>::value, LeafTy>::type
+ operator-(const LeafTy &LHS) {
+ LeafTy Copy = LHS;
+ return Copy *= -1;
}
- // For many cases, size ordering between scalable and fixed size types cannot
+public:
+ bool operator==(const UnivariateLinearPolyBase &RHS) const {
+ return Value == RHS.Value && UnivariateDim == RHS.UnivariateDim;
+ }
+
+ bool operator!=(const UnivariateLinearPolyBase &RHS) const {
+ return !(*this == RHS);
+ }
+
+ bool isZero() const { return !Value; }
+ bool isNonZero() const { return !isZero(); }
+ explicit operator bool() const { return isNonZero(); }
+ ScalarTy getValue() const { return Value; }
+ ScalarTy getValue(unsigned Dim) const {
+ return Dim == UnivariateDim ? Value : 0;
+ }
+
+ /// Add \p RHS to the value at the univariate dimension.
+ LeafTy getWithIncrement(ScalarTy RHS) {
+ return static_cast<LeafTy>(
+ UnivariateLinearPolyBase(Value + RHS, UnivariateDim));
+ }
+
+ /// Subtract \p RHS from the value at the univariate dimension.
+ LeafTy getWithDecrement(ScalarTy RHS) {
+ return static_cast<LeafTy>(
+ UnivariateLinearPolyBase(Value - RHS, UnivariateDim));
+ }
+};
+
+
+//===----------------------------------------------------------------------===//
+// LinearPolySize - base class for fixed- or scalable sizes.
+// ^ ^
+// | |
+// | +----- ElementCount - Leaf class to represent an element count
+// | (vscale x unsigned)
+// |
+// +-------- TypeSize - Leaf class to represent a type size
+// (vscale x uint64_t)
+//===----------------------------------------------------------------------===//
+
+/// LinearPolySize is a base class to represent sizes. It is either
+/// fixed-sized or it is scalable-sized, but it cannot be both.
+template <typename LeafTy>
+class LinearPolySize : public UnivariateLinearPolyBase<LeafTy> {
+ // Make the parent class a friend, so that it can access the protected
+ // conversion/copy-constructor for UnivariatePolyBase<LeafTy> ->
+ // LinearPolySize<LeafTy>.
+ friend class UnivariateLinearPolyBase<LeafTy>;
+
+public:
+ using ScalarTy = typename UnivariateLinearPolyBase<LeafTy>::ScalarTy;
+ enum Dims : unsigned { FixedDim = 0, ScalableDim = 1 };
+
+protected:
+ LinearPolySize(ScalarTy MinVal, Dims D)
+ : UnivariateLinearPolyBase<LeafTy>(MinVal, D) {}
+
+ LinearPolySize(const UnivariateLinearPolyBase<LeafTy> &V)
+ : UnivariateLinearPolyBase<LeafTy>(V) {}
+
+public:
+
+ static LeafTy getFixed(ScalarTy MinVal) {
+ return static_cast<LeafTy>(LinearPolySize(MinVal, FixedDim));
+ }
+ static LeafTy getScalable(ScalarTy MinVal) {
+ return static_cast<LeafTy>(LinearPolySize(MinVal, ScalableDim));
+ }
+ static LeafTy get(ScalarTy MinVal, bool Scalable) {
+ return static_cast<LeafTy>(
+ LinearPolySize(MinVal, Scalable ? ScalableDim : FixedDim));
+ }
+ static LeafTy getNull() { return get(0, false); }
+
+ /// Returns the minimum value this size can represent.
+ ScalarTy getKnownMinValue() const { return this->getValue(); }
+ /// Returns whether the size is scaled by a runtime quantity (vscale).
+ bool isScalable() const { return this->UnivariateDim == ScalableDim; }
+ /// A return value of true indicates we know at compile time that the number
+ /// of elements (vscale * Min) is definitely even. However, returning false
+ /// does not guarantee that the total number of elements is odd.
+ bool isKnownEven() const { return (getKnownMinValue() & 0x1) == 0; }
+ /// This function tells the caller whether the element count is known at
+ /// compile time to be a multiple of the scalar value RHS.
+ bool isKnownMultipleOf(ScalarTy RHS) const {
+ return getKnownMinValue() % RHS == 0;
+ }
+
+ // Return the minimum value with the assumption that the count is exact.
+ // Use in places where a scalable count doesn't make sense (e.g. non-vector
+ // types, or vectors in backends which don't support scalable vectors).
+ ScalarTy getFixedValue() const {
+ assert(!isScalable() &&
+ "Request for a fixed element count on a scalable object");
+ return getKnownMinValue();
+ }
+
+ // For some cases, size ordering between scalable and fixed size types cannot
// be determined at compile time, so such comparisons aren't allowed.
//
// e.g. <vscale x 2 x i16> could be bigger than <4 x i32> with a runtime
// vscale >= 5, equal sized with a vscale of 4, and smaller with
// a vscale <= 3.
//
- // If the scalable flags match, just perform the requested comparison
- // between the minimum sizes.
- friend bool operator<(const TypeSize &LHS, const TypeSize &RHS) {
- assert(LHS.IsScalable == RHS.IsScalable &&
- "Ordering comparison of scalable and fixed types");
+ // All the functions below make use of the fact vscale is always >= 1, which
+ // means that <vscale x 4 x i32> is guaranteed to be >= <4 x i32>, etc.
- return LHS.MinSize < RHS.MinSize;
+ static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS) {
+ if (!LHS.isScalable() || RHS.isScalable())
+ return LHS.getKnownMinValue() < RHS.getKnownMinValue();
+ return false;
}
- friend bool operator>(const TypeSize &LHS, const TypeSize &RHS) {
- return RHS < LHS;
+ static bool isKnownGT(const LinearPolySize &LHS, const LinearPolySize &RHS) {
+ if (LHS.isScalable() || !RHS.isScalable())
+ return LHS.getKnownMinValue() > RHS.getKnownMinValue();
+ return false;
}
- friend bool operator<=(const TypeSize &LHS, const TypeSize &RHS) {
- return !(RHS < LHS);
+ static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS) {
+ if (!LHS.isScalable() || RHS.isScalable())
+ return LHS.getKnownMinValue() <= RHS.getKnownMinValue();
+ return false;
}
- friend bool operator>=(const TypeSize &LHS, const TypeSize& RHS) {
- return !(LHS < RHS);
+ static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS) {
+ if (LHS.isScalable() || !RHS.isScalable())
+ return LHS.getKnownMinValue() >= RHS.getKnownMinValue();
+ return false;
}
- // Convenience operators to obtain relative sizes independently of
- // the scalable flag.
- TypeSize operator*(unsigned RHS) const {
- return { MinSize * RHS, IsScalable };
+ /// We do not provide the '/' operator here because division for polynomial
+ /// types does not work in the same way as for normal integer types. We can
+ /// only divide the minimum value (or coefficient) by RHS, which is not the
+ /// same as
+ /// (Min * Vscale) / RHS
+ /// The caller is recommended to use this function in combination with
+ /// isKnownMultipleOf(RHS), which lets the caller know if it's possible to
+ /// perform a lossless divide by RHS.
+ LeafTy divideCoefficientBy(ScalarTy RHS) const {
+ return static_cast<LeafTy>(
+ LinearPolySize::get(getKnownMinValue() / RHS, isScalable()));
}
- friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) {
- return { LHS * RHS.MinSize, RHS.IsScalable };
+ LeafTy coefficientNextPowerOf2() const {
+ return static_cast<LeafTy>(LinearPolySize::get(
+ static_cast<ScalarTy>(llvm::NextPowerOf2(getKnownMinValue())),
+ isScalable()));
}
- TypeSize operator/(unsigned RHS) const {
- return { MinSize / RHS, IsScalable };
+ /// Printing function.
+ void print(raw_ostream &OS) const {
+ if (isScalable())
+ OS << "vscale x ";
+ OS << getKnownMinValue();
}
+};
- // Return the minimum size with the assumption that the size is exact.
- // Use in places where a scalable size doesn't make sense (e.g. non-vector
- // types, or vectors in backends which don't support scalable vectors).
- uint64_t getFixedSize() const {
- assert(!IsScalable && "Request for a fixed size on a scalable object");
- return MinSize;
- }
+class ElementCount;
+template <> struct LinearPolyBaseTypeTraits<ElementCount> {
+ using ScalarTy = unsigned;
+ static constexpr unsigned Dimensions = 2;
+};
- // Return the known minimum size. Use in places where the scalable property
- // doesn't matter (e.g. determining alignment) or in conjunction with the
- // isScalable method below.
- uint64_t getKnownMinSize() const {
- return MinSize;
- }
+class ElementCount : public LinearPolySize<ElementCount> {
+public:
- // Return whether or not the size is scalable.
- bool isScalable() const {
- return IsScalable;
- }
+ ElementCount(const LinearPolySize<ElementCount> &V) : LinearPolySize(V) {}
- // Returns true if the number of bits is a multiple of an 8-bit byte.
- bool isByteSized() const {
- return (MinSize & 7) == 0;
+ /// Counting predicates.
+ ///
+ ///@{ Number of elements..
+ /// Exactly one element.
+ bool isScalar() const { return !isScalable() && getKnownMinValue() == 1; }
+ /// One or more elements.
+ bool isVector() const {
+ return (isScalable() && getKnownMinValue() != 0) || getKnownMinValue() > 1;
}
+ ///@}
+};
- // Returns true if the type size is non-zero.
- bool isNonZero() const { return MinSize != 0; }
+// This class is used to represent the size of types. If the type is of fixed
+class TypeSize;
+template <> struct LinearPolyBaseTypeTraits<TypeSize> {
+ using ScalarTy = uint64_t;
+ static constexpr unsigned Dimensions = 2;
+};
- // Returns true if the type size is zero.
- bool isZero() const { return MinSize == 0; }
+// TODO: Most functionality in this class will gradually be phased out
+// so it will resemble LinearPolySize as much as possible.
+//
+// TypeSize is used to represent the size of types. If the type is of fixed
+// size, it will represent the exact size. If the type is a scalable vector,
+// it will represent the known minimum size.
+class TypeSize : public LinearPolySize<TypeSize> {
+public:
+ TypeSize(const LinearPolySize<TypeSize> &V) : LinearPolySize(V) {}
+ TypeSize(ScalarTy MinVal, bool IsScalable)
+ : LinearPolySize(LinearPolySize::get(MinVal, IsScalable)) {}
+
+ static TypeSize Fixed(ScalarTy MinVal) { return TypeSize(MinVal, false); }
+ static TypeSize Scalable(ScalarTy MinVal) { return TypeSize(MinVal, true); }
+
+ ScalarTy getFixedSize() const { return getFixedValue(); }
+ ScalarTy getKnownMinSize() const { return getKnownMinValue(); }
+
+ // All code for this class below this point is needed because of the
+ // temporary implicit conversion to uint64_t. The operator overloads are
+ // needed because otherwise the conversion of the parent class
+ // UnivariateLinearPolyBase -> TypeSize is ambiguous.
+ // TODO: Remove the implicit conversion.
// Casts to a uint64_t if this is a fixed-width size.
//
@@ -173,68 +438,54 @@ public:
// To determine how to upgrade the code:
//
// if (<algorithm works for both scalable and fixed-width vectors>)
- // use getKnownMinSize()
+ // use getKnownMinValue()
// else if (<algorithm works only for fixed-width vectors>) {
// if <algorithm can be adapted for both scalable and fixed-width vectors>
- // update the algorithm and use getKnownMinSize()
+ // update the algorithm and use getKnownMinValue()
// else
- // bail out early for scalable vectors and use getFixedSize()
+ // bail out early for scalable vectors and use getFixedValue()
// }
- operator uint64_t() const {
+ operator ScalarTy() const {
#ifdef STRICT_FIXED_SIZE_VECTORS
- return getFixedSize();
+ return getFixedValue();
#else
if (isScalable())
WithColor::warning() << "Compiler has made implicit assumption that "
"TypeSize is not scalable. This may or may not "
"lead to broken code.\n";
- return getKnownMinSize();
+ return getKnownMinValue();
#endif
}
- // Additional convenience operators needed to avoid ambiguous parses.
- // TODO: Make uint64_t the default operator?
- TypeSize operator*(uint64_t RHS) const {
- return { MinSize * RHS, IsScalable };
+ // Additional operators needed to avoid ambiguous parses
+ // because of the implicit conversion hack.
+ friend TypeSize operator*(const TypeSize &LHS, const int RHS) {
+ return LHS * (ScalarTy)RHS;
}
-
- TypeSize operator*(int RHS) const {
- return { MinSize * RHS, IsScalable };
+ friend TypeSize operator*(const TypeSize &LHS, const unsigned RHS) {
+ return LHS * (ScalarTy)RHS;
}
-
- TypeSize operator*(int64_t RHS) const {
- return { MinSize * RHS, IsScalable };
+ friend TypeSize operator*(const TypeSize &LHS, const int64_t RHS) {
+ return LHS * (ScalarTy)RHS;
}
-
- friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) {
- return { LHS * RHS.MinSize, RHS.IsScalable };
- }
-
friend TypeSize operator*(const int LHS, const TypeSize &RHS) {
- return { LHS * RHS.MinSize, RHS.IsScalable };
- }
-
- friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) {
- return { LHS * RHS.MinSize, RHS.IsScalable };
- }
-
- TypeSize operator/(uint64_t RHS) const {
- return { MinSize / RHS, IsScalable };
+ return RHS * LHS;
}
-
- TypeSize operator/(int RHS) const {
- return { MinSize / RHS, IsScalable };
+ friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) {
+ return RHS * LHS;
}
-
- TypeSize operator/(int64_t RHS) const {
- return { MinSize / RHS, IsScalable };
+ friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) {
+ return RHS * LHS;
}
-
- TypeSize NextPowerOf2() const {
- return TypeSize(llvm::NextPowerOf2(MinSize), IsScalable);
+ friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) {
+ return RHS * LHS;
}
};
+//===----------------------------------------------------------------------===//
+// Utilities
+//===----------------------------------------------------------------------===//
+
/// Returns a TypeSize with a known minimum size that is the next integer
/// (mod 2**64) that is greater than or equal to \p Value and is a multiple
/// of \p Align. \p Align must be non-zero.
@@ -242,21 +493,35 @@ public:
/// Similar to the alignTo functions in MathExtras.h
inline TypeSize alignTo(TypeSize Size, uint64_t Align) {
assert(Align != 0u && "Align must be non-zero");
- return {(Size.getKnownMinSize() + Align - 1) / Align * Align,
+ return {(Size.getKnownMinValue() + Align - 1) / Align * Align,
Size.isScalable()};
}
+/// Stream operator function for `LinearPolySize`.
+template <typename LeafTy>
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const LinearPolySize<LeafTy> &PS) {
+ PS.print(OS);
+ return OS;
+}
+
+template <typename T> struct DenseMapInfo;
template <> struct DenseMapInfo<ElementCount> {
- static inline ElementCount getEmptyKey() { return {~0U, true}; }
- static inline ElementCount getTombstoneKey() { return {~0U - 1, false}; }
- static unsigned getHashValue(const ElementCount& EltCnt) {
- if (EltCnt.Scalable)
- return (EltCnt.Min * 37U) - 1U;
+ static inline ElementCount getEmptyKey() {
+ return ElementCount::getScalable(~0U);
+ }
+ static inline ElementCount getTombstoneKey() {
+ return ElementCount::getFixed(~0U - 1);
+ }
+ static unsigned getHashValue(const ElementCount &EltCnt) {
+ unsigned HashVal = EltCnt.getKnownMinValue() * 37U;
+ if (EltCnt.isScalable())
+ return (HashVal - 1U);
- return EltCnt.Min * 37U;
+ return HashVal;
}
- static bool isEqual(const ElementCount& LHS, const ElementCount& RHS) {
+ static bool isEqual(const ElementCount &LHS, const ElementCount &RHS) {
return LHS == RHS;
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h b/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h
index af09c21085c5..c6ddbf60efdf 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -37,6 +37,7 @@
namespace llvm {
class MemoryBuffer;
+class MemoryBufferRef;
class Twine;
namespace vfs {
@@ -463,7 +464,8 @@ public:
/// false if the file or directory already exists in the file system with
/// different contents.
bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
- llvm::MemoryBuffer *Buffer, Optional<uint32_t> User = None,
+ const llvm::MemoryBufferRef &Buffer,
+ Optional<uint32_t> User = None,
Optional<uint32_t> Group = None,
Optional<llvm::sys::fs::file_type> Type = None,
Optional<llvm::sys::fs::perms> Perms = None);
@@ -498,7 +500,7 @@ llvm::sys::fs::UniqueID getNextVirtualUniqueID();
/// Gets a \p FileSystem for a virtual file system described in YAML
/// format.
-IntrusiveRefCntPtr<FileSystem>
+std::unique_ptr<FileSystem>
getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer,
llvm::SourceMgr::DiagHandlerTy DiagHandler,
StringRef YAMLFilePath, void *DiagContext = nullptr,
@@ -649,9 +651,12 @@ private:
friend class VFSFromYamlDirIterImpl;
friend class RedirectingFileSystemParser;
- bool shouldUseExternalFS() const {
- return ExternalFSValidWD && IsFallthrough;
- }
+ bool shouldUseExternalFS() const { return IsFallthrough; }
+
+ /// Canonicalize path by removing ".", "..", "./", components. This is
+ /// a VFS request, do not bother about symlinks in the path components
+ /// but canonicalize in order to perform the correct entry search.
+ std::error_code makeCanonical(SmallVectorImpl<char> &Path) const;
// In a RedirectingFileSystem, keys can be specified in Posix or Windows
// style (or even a mixture of both), so this comparison helper allows
@@ -670,9 +675,6 @@ private:
/// The current working directory of the file system.
std::string WorkingDirectory;
- /// Whether the current working directory is valid for the external FS.
- bool ExternalFSValidWD = false;
-
/// The file system to use for external references.
IntrusiveRefCntPtr<FileSystem> ExternalFS;
@@ -720,15 +722,20 @@ private:
public:
/// Looks up \p Path in \c Roots.
- ErrorOr<Entry *> lookupPath(const Twine &Path) const;
+ ErrorOr<Entry *> lookupPath(StringRef Path) const;
/// Parses \p Buffer, which is expected to be in YAML format and
/// returns a virtual file system representing its contents.
- static RedirectingFileSystem *
+ static std::unique_ptr<RedirectingFileSystem>
create(std::unique_ptr<MemoryBuffer> Buffer,
SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS);
+ /// Redirect each of the remapped files from first to second.
+ static std::unique_ptr<RedirectingFileSystem>
+ create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles,
+ bool UseExternalNames, FileSystem &ExternalFS);
+
ErrorOr<Status> status(const Twine &Path) override;
ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
@@ -749,6 +756,10 @@ public:
StringRef getExternalContentsPrefixDir() const;
+ void setFallthrough(bool Fallthrough);
+
+ std::vector<llvm::StringRef> getRoots() const;
+
void dump(raw_ostream &OS) const;
void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Win64EH.h b/contrib/llvm-project/llvm/include/llvm/Support/Win64EH.h
index 8220131e5be9..9359fcb4286a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Win64EH.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Win64EH.h
@@ -38,12 +38,14 @@ enum UnwindOpcodes {
// The following set of unwind opcodes is for ARM64. They are documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
UOP_AllocMedium,
+ UOP_SaveR19R20X,
UOP_SaveFPLRX,
UOP_SaveFPLR,
UOP_SaveReg,
UOP_SaveRegX,
UOP_SaveRegP,
UOP_SaveRegPX,
+ UOP_SaveLRPair,
UOP_SaveFReg,
UOP_SaveFRegX,
UOP_SaveFRegP,
@@ -51,7 +53,11 @@ enum UnwindOpcodes {
UOP_SetFP,
UOP_AddFP,
UOP_Nop,
- UOP_End
+ UOP_End,
+ UOP_SaveNext,
+ UOP_TrapFrame,
+ UOP_Context,
+ UOP_ClearUnwoundToCall
};
/// UnwindCode - This union describes a single operation in a function prolog,
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.def b/contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.def
index 697f8c70f962..ec19ce4e7cdd 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.def
+++ b/contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.def
@@ -44,6 +44,7 @@ X86_CPU_TYPE(INTEL_KNM, "knm")
X86_CPU_TYPE(INTEL_GOLDMONT, "goldmont")
X86_CPU_TYPE(INTEL_GOLDMONT_PLUS, "goldmont-plus")
X86_CPU_TYPE(INTEL_TREMONT, "tremont")
+X86_CPU_TYPE(AMDFAM19H, "amdfam19h")
// Alternate names supported by __builtin_cpu_is and target multiversioning.
X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom")
@@ -84,6 +85,9 @@ X86_CPU_SUBTYPE(AMDFAM17H_ZNVER2, "znver2")
X86_CPU_SUBTYPE(INTEL_COREI7_CASCADELAKE, "cascadelake")
X86_CPU_SUBTYPE(INTEL_COREI7_TIGERLAKE, "tigerlake")
X86_CPU_SUBTYPE(INTEL_COREI7_COOPERLAKE, "cooperlake")
+X86_CPU_SUBTYPE(INTEL_COREI7_SAPPHIRERAPIDS, "sapphirerapids")
+X86_CPU_SUBTYPE(INTEL_COREI7_ALDERLAKE, "alderlake")
+X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3, "znver3")
#undef X86_CPU_SUBTYPE
@@ -153,6 +157,8 @@ X86_FEATURE (F16C, "f16c")
X86_FEATURE (FSGSBASE, "fsgsbase")
X86_FEATURE (FXSR, "fxsr")
X86_FEATURE (INVPCID, "invpcid")
+X86_FEATURE (KL, "kl")
+X86_FEATURE (WIDEKL, "widekl")
X86_FEATURE (LWP, "lwp")
X86_FEATURE (LZCNT, "lzcnt")
X86_FEATURE (MOVBE, "movbe")
@@ -175,6 +181,7 @@ X86_FEATURE (SHA, "sha")
X86_FEATURE (SHSTK, "shstk")
X86_FEATURE (TBM, "tbm")
X86_FEATURE (TSXLDTRK, "tsxldtrk")
+X86_FEATURE (UINTR, "uintr")
X86_FEATURE (VAES, "vaes")
X86_FEATURE (VZEROUPPER, "vzeroupper")
X86_FEATURE (WAITPKG, "waitpkg")
@@ -184,6 +191,8 @@ X86_FEATURE (XSAVE, "xsave")
X86_FEATURE (XSAVEC, "xsavec")
X86_FEATURE (XSAVEOPT, "xsaveopt")
X86_FEATURE (XSAVES, "xsaves")
+X86_FEATURE (HRESET, "hreset")
+X86_FEATURE (AVXVNNI, "avxvnni")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.h b/contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.h
index 66c474b5c275..2d5083023a11 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/X86TargetParser.h
@@ -14,6 +14,7 @@
#define LLVM_SUPPORT_X86TARGETPARSERCOMMON_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
namespace llvm {
class StringRef;
@@ -99,6 +100,8 @@ enum CPUKind {
CK_IcelakeClient,
CK_IcelakeServer,
CK_Tigerlake,
+ CK_SapphireRapids,
+ CK_Alderlake,
CK_KNL,
CK_KNM,
CK_Lakemont,
@@ -118,18 +121,26 @@ enum CPUKind {
CK_BDVER4,
CK_ZNVER1,
CK_ZNVER2,
+ CK_ZNVER3,
CK_x86_64,
+ CK_x86_64_v2,
+ CK_x86_64_v3,
+ CK_x86_64_v4,
CK_Geode,
};
/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
/// \p Only64Bit is true.
CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false);
+CPUKind parseTuneCPU(StringRef CPU, bool Only64Bit = false);
/// Provide a list of valid CPU names. If \p Only64Bit is true, the list will
/// only contain 64-bit capable CPUs.
void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
- bool ArchIs32Bit);
+ bool Only64Bit = false);
+/// Provide a list of valid -mtune names.
+void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
+ bool Only64Bit = false);
/// Get the key feature prioritizing target multiversioning.
ProcessorFeatures getKeyFeature(CPUKind Kind);
@@ -137,10 +148,10 @@ ProcessorFeatures getKeyFeature(CPUKind Kind);
/// Fill in the features that \p CPU supports into \p Features.
void getFeaturesForCPU(StringRef CPU, SmallVectorImpl<StringRef> &Features);
-/// Fill \p Features with the features that are implied to be enabled/disabled
+/// Set or clear entries in \p Features that are implied to be enabled/disabled
/// by the provided \p Feature.
-void getImpliedFeatures(StringRef Feature, bool Enabled,
- SmallVectorImpl<StringRef> &Features);
+void updateImpliedFeatures(StringRef Feature, bool Enabled,
+ StringMap<bool> &Features);
} // namespace X86
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/YAMLParser.h b/contrib/llvm-project/llvm/include/llvm/Support/YAMLParser.h
index 53009d7ff4aa..759e11afd447 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/YAMLParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/YAMLParser.h
@@ -40,6 +40,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
#include <cassert>
#include <cstddef>
#include <iterator>
@@ -51,7 +52,6 @@
namespace llvm {
class MemoryBufferRef;
-class SourceMgr;
class raw_ostream;
class Twine;
@@ -78,6 +78,9 @@ bool scanTokens(StringRef Input);
/// escaped, but emitted verbatim.
std::string escape(StringRef Input, bool EscapePrintable = true);
+/// Parse \p S as a bool according to https://yaml.org/type/bool.html.
+llvm::Optional<bool> parseBool(StringRef S);
+
/// This class represents a YAML stream potentially containing multiple
/// documents.
class Stream {
@@ -100,7 +103,10 @@ public:
return !failed();
}
- void printError(Node *N, const Twine &Msg);
+ void printError(Node *N, const Twine &Msg,
+ SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
+ void printError(const SMRange &Range, const Twine &Msg,
+ SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
private:
friend class Document;
@@ -222,7 +228,7 @@ public:
/// Gets the value of this node as a StringRef.
///
- /// \param Storage is used to store the content of the returned StringRef iff
+ /// \param Storage is used to store the content of the returned StringRef if
/// it requires any modification from how it appeared in the source.
/// This happens with escaped characters and multi-line literals.
StringRef getValue(SmallVectorImpl<char> &Storage) const;
@@ -509,7 +515,6 @@ public:
: Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
StringRef getName() const { return Name; }
- Node *getTarget();
static bool classof(const Node *N) { return N->getType() == NK_Alias; }
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h
index 44e34a4a09b4..9ac9eb300983 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h
@@ -19,7 +19,9 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Regex.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/VersionTuple.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -61,7 +63,7 @@ struct MappingTraits {
// Must provide:
// static void mapping(IO &io, T &fields);
// Optionally may provide:
- // static StringRef validate(IO &io, T &fields);
+ // static std::string validate(IO &io, T &fields);
//
// The optional flow flag will cause generated YAML to use a flow mapping
// (e.g. { a: 0, b: 1 }):
@@ -83,7 +85,7 @@ template <class T, class Context> struct MappingContextTraits {
// Must provide:
// static void mapping(IO &io, T &fields, Context &Ctx);
// Optionally may provide:
- // static StringRef validate(IO &io, T &fields, Context &Ctx);
+ // static std::string validate(IO &io, T &fields, Context &Ctx);
//
// The optional flow flag will cause generated YAML to use a flow mapping
// (e.g. { a: 0, b: 1 }):
@@ -421,7 +423,7 @@ template <class T> struct has_MappingTraits<T, EmptyContext> {
// Test if MappingContextTraits<T>::validate() is defined on type T.
template <class T, class Context> struct has_MappingValidateTraits {
- using Signature_validate = StringRef (*)(class IO &, T &, Context &);
+ using Signature_validate = std::string (*)(class IO &, T &, Context &);
template <typename U>
static char test(SameType<Signature_validate, &U::validate>*);
@@ -435,7 +437,7 @@ template <class T, class Context> struct has_MappingValidateTraits {
// Test if MappingTraits<T>::validate() is defined on type T.
template <class T> struct has_MappingValidateTraits<T, EmptyContext> {
- using Signature_validate = StringRef (*)(class IO &, T &);
+ using Signature_validate = std::string (*)(class IO &, T &);
template <typename U>
static char test(SameType<Signature_validate, &U::validate> *);
@@ -637,6 +639,7 @@ inline bool isNull(StringRef S) {
}
inline bool isBool(StringRef S) {
+ // FIXME: using parseBool is causing multiple tests to fail.
return S.equals("true") || S.equals("True") || S.equals("TRUE") ||
S.equals("false") || S.equals("False") || S.equals("FALSE");
}
@@ -789,6 +792,7 @@ public:
virtual NodeKind getNodeKind() = 0;
virtual void setError(const Twine &) = 0;
+ virtual void setAllowUnknownKeys(bool Allow);
template <typename T>
void enumCase(T &Val, const char* Str, const T ConstVal) {
@@ -902,24 +906,7 @@ private:
template <typename T, typename Context>
void processKeyWithDefault(const char *Key, Optional<T> &Val,
const Optional<T> &DefaultValue, bool Required,
- Context &Ctx) {
- assert(DefaultValue.hasValue() == false &&
- "Optional<T> shouldn't have a value!");
- void *SaveInfo;
- bool UseDefault = true;
- const bool sameAsDefault = outputting() && !Val.hasValue();
- if (!outputting() && !Val.hasValue())
- Val = T();
- if (Val.hasValue() &&
- this->preflightKey(Key, Required, sameAsDefault, UseDefault,
- SaveInfo)) {
- yamlize(*this, Val.getValue(), Required, Ctx);
- this->postflightKey(SaveInfo);
- } else {
- if (UseDefault)
- Val = DefaultValue;
- }
- }
+ Context &Ctx);
template <typename T, typename Context>
void processKeyWithDefault(const char *Key, T &Val, const T &DefaultValue,
@@ -1057,7 +1044,7 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) {
else
io.beginMapping();
if (io.outputting()) {
- StringRef Err = MappingTraits<T>::validate(io, Val);
+ std::string Err = MappingTraits<T>::validate(io, Val);
if (!Err.empty()) {
errs() << Err << "\n";
assert(Err.empty() && "invalid struct trying to be written as yaml");
@@ -1065,7 +1052,7 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) {
}
detail::doMapping(io, Val, Ctx);
if (!io.outputting()) {
- StringRef Err = MappingTraits<T>::validate(io, Val);
+ std::string Err = MappingTraits<T>::validate(io, Val);
if (!Err.empty())
io.setError(Err);
}
@@ -1487,9 +1474,10 @@ private:
static bool classof(const MapHNode *) { return true; }
- using NameToNode = StringMap<std::unique_ptr<HNode>>;
+ using NameToNodeAndLoc =
+ StringMap<std::pair<std::unique_ptr<HNode>, SMRange>>;
- NameToNode Mapping;
+ NameToNodeAndLoc Mapping;
SmallVector<std::string, 6> ValidKeys;
};
@@ -1511,6 +1499,11 @@ private:
std::unique_ptr<Input::HNode> createHNodes(Node *node);
void setError(HNode *hnode, const Twine &message);
void setError(Node *node, const Twine &message);
+ void setError(const SMRange &Range, const Twine &message);
+
+ void reportWarning(HNode *hnode, const Twine &message);
+ void reportWarning(Node *hnode, const Twine &message);
+ void reportWarning(const SMRange &Range, const Twine &message);
public:
// These are only used by operator>>. They could be private
@@ -1521,6 +1514,8 @@ public:
/// Returns the current node that's being parsed by the YAML Parser.
const Node *getCurrentNode() const;
+ void setAllowUnknownKeys(bool Allow) override;
+
private:
SourceMgr SrcMgr; // must be before Strm
std::unique_ptr<llvm::yaml::Stream> Strm;
@@ -1531,6 +1526,7 @@ private:
std::vector<bool> BitValuesUsed;
HNode *CurrentNode = nullptr;
bool ScalarMatchFound = false;
+ bool AllowUnknownKeys = false;
};
///
@@ -1590,7 +1586,7 @@ public:
private:
void output(StringRef s);
void outputUpToEndOfLine(StringRef s);
- void newLineCheck();
+ void newLineCheck(bool EmptySequence = false);
void outputNewLine();
void paddedKey(StringRef key);
void flowKey(StringRef Key);
@@ -1625,6 +1621,42 @@ private:
StringRef PaddingBeforeContainer;
};
+template <typename T, typename Context>
+void IO::processKeyWithDefault(const char *Key, Optional<T> &Val,
+ const Optional<T> &DefaultValue, bool Required,
+ Context &Ctx) {
+ assert(DefaultValue.hasValue() == false &&
+ "Optional<T> shouldn't have a value!");
+ void *SaveInfo;
+ bool UseDefault = true;
+ const bool sameAsDefault = outputting() && !Val.hasValue();
+ if (!outputting() && !Val.hasValue())
+ Val = T();
+ if (Val.hasValue() &&
+ this->preflightKey(Key, Required, sameAsDefault, UseDefault, SaveInfo)) {
+
+ // When reading an Optional<X> key from a YAML description, we allow the
+ // special "<none>" value, which can be used to specify that no value was
+ // requested, i.e. the DefaultValue will be assigned. The DefaultValue is
+ // usually None.
+ bool IsNone = false;
+ if (!outputting())
+ if (auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode()))
+ // We use rtrim to ignore possible white spaces that might exist when a
+ // comment is present on the same line.
+ IsNone = Node->getRawValue().rtrim(' ') == "<none>";
+
+ if (IsNone)
+ Val = DefaultValue;
+ else
+ yamlize(*this, Val.getValue(), Required, Ctx);
+ this->postflightKey(SaveInfo);
+ } else {
+ if (UseDefault)
+ Val = DefaultValue;
+ }
+}
+
/// YAML I/O does conversion based on types. But often native data types
/// are just a typedef of built in intergral types (e.g. int). But the C++
/// type matching system sees through the typedef and all the typedefed types
@@ -1685,6 +1717,12 @@ struct ScalarTraits<Hex64> {
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
};
+template <> struct ScalarTraits<VersionTuple> {
+ static void output(const VersionTuple &Value, void *, llvm::raw_ostream &Out);
+ static StringRef input(StringRef, void *, VersionTuple &);
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
// Define non-member operator>> so that Input can stream in a document list.
template <typename T>
inline std::enable_if_t<has_DocumentListTraits<T>::value, Input &>
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h b/contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h
index 8d289f7c765f..7d572fe06f6f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h
@@ -15,7 +15,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
#include <cassert>
+#include <chrono>
#include <cstddef>
#include <cstdint>
#include <cstring>
@@ -30,12 +32,14 @@ class format_object_base;
class FormattedString;
class FormattedNumber;
class FormattedBytes;
+template <class T> class LLVM_NODISCARD Expected;
namespace sys {
namespace fs {
enum FileAccess : unsigned;
enum OpenFlags : unsigned;
enum CreationDisposition : unsigned;
+class FileLocker;
} // end namespace fs
} // end namespace sys
@@ -44,7 +48,16 @@ enum CreationDisposition : unsigned;
/// buffered disciplines etc. It is a simple buffer that outputs
/// a chunk at a time.
class raw_ostream {
+public:
+ // Class kinds to support LLVM-style RTTI.
+ enum class OStreamKind {
+ OK_OStream,
+ OK_FDStream,
+ };
+
private:
+ OStreamKind Kind;
+
/// The buffer is handled in such a way that the buffer is
/// uninitialized, unbuffered, or out of space when OutBufCur >=
/// OutBufEnd. Thus a single comparison suffices to determine if we
@@ -102,9 +115,10 @@ public:
static constexpr Colors SAVEDCOLOR = Colors::SAVEDCOLOR;
static constexpr Colors RESET = Colors::RESET;
- explicit raw_ostream(bool unbuffered = false)
- : BufferMode(unbuffered ? BufferKind::Unbuffered
- : BufferKind::InternalBuffer) {
+ explicit raw_ostream(bool unbuffered = false,
+ OStreamKind K = OStreamKind::OK_OStream)
+ : Kind(K), BufferMode(unbuffered ? BufferKind::Unbuffered
+ : BufferKind::InternalBuffer) {
// Start out ready to flush.
OutBufStart = OutBufEnd = OutBufCur = nullptr;
}
@@ -117,6 +131,8 @@ public:
/// tell - Return the current offset with the file.
uint64_t tell() const { return current_pos() + GetNumBytesInBuffer(); }
+ OStreamKind get_kind() const { return Kind; }
+
//===--------------------------------------------------------------------===//
// Configuration Interface
//===--------------------------------------------------------------------===//
@@ -385,8 +401,9 @@ class raw_pwrite_stream : public raw_ostream {
void anchor() override;
public:
- explicit raw_pwrite_stream(bool Unbuffered = false)
- : raw_ostream(Unbuffered) {}
+ explicit raw_pwrite_stream(bool Unbuffered = false,
+ OStreamKind K = OStreamKind::OK_OStream)
+ : raw_ostream(Unbuffered, K) {}
void pwrite(const char *Ptr, size_t Size, uint64_t Offset) {
#ifndef NDEBUG
uint64_t Pos = tell();
@@ -409,6 +426,7 @@ class raw_fd_ostream : public raw_pwrite_stream {
int FD;
bool ShouldClose;
bool SupportsSeeking = false;
+ mutable Optional<bool> HasColors;
#ifdef _WIN32
/// True if this fd refers to a Windows console device. Mintty and other
@@ -432,10 +450,17 @@ class raw_fd_ostream : public raw_pwrite_stream {
/// Determine an efficient buffer size.
size_t preferred_buffer_size() const override;
+ void anchor() override;
+
+protected:
/// Set the flag indicating that an output error has been encountered.
void error_detected(std::error_code EC) { this->EC = EC; }
- void anchor() override;
+ /// Return the file descriptor.
+ int get_fd() const { return FD; }
+
+ // Update the file position by increasing \p Delta.
+ void inc_pos(uint64_t Delta) { pos += Delta; }
public:
/// Open the specified file for writing. If an error occurs, information
@@ -460,7 +485,8 @@ public:
/// FD is the file descriptor that this writes to. If ShouldClose is true,
/// this closes the file when the stream is destroyed. If FD is for stdout or
/// stderr, it will not be closed.
- raw_fd_ostream(int fd, bool shouldClose, bool unbuffered=false);
+ raw_fd_ostream(int fd, bool shouldClose, bool unbuffered = false,
+ OStreamKind K = OStreamKind::OK_OStream);
~raw_fd_ostream() override;
@@ -468,7 +494,7 @@ public:
/// fsync.
void close();
- bool supportsSeeking() { return SupportsSeeking; }
+ bool supportsSeeking() const { return SupportsSeeking; }
/// Flushes the stream and repositions the underlying file descriptor position
/// to the offset specified from the beginning of the file.
@@ -496,6 +522,38 @@ public:
/// - from The Zen of Python, by Tim Peters
///
void clear_error() { EC = std::error_code(); }
+
+ /// Locks the underlying file.
+ ///
+ /// @returns RAII object that releases the lock upon leaving the scope, if the
+ /// locking was successful. Otherwise returns corresponding
+ /// error code.
+ ///
+ /// The function blocks the current thread until the lock become available or
+ /// error occurs.
+ ///
+ /// Possible use of this function may be as follows:
+ ///
+ /// @code{.cpp}
+ /// if (auto L = stream.lock()) {
+ /// // ... do action that require file to be locked.
+ /// } else {
+ /// handleAllErrors(std::move(L.takeError()), [&](ErrorInfoBase &EIB) {
+ /// // ... handle lock error.
+ /// });
+ /// }
+ /// @endcode
+ LLVM_NODISCARD Expected<sys::fs::FileLocker> lock();
+
+ /// Tries to lock the underlying file within the specified period.
+ ///
+ /// @returns RAII object that releases the lock upon leaving the scope, if the
+ /// locking was successful. Otherwise returns corresponding
+ /// error code.
+ ///
+ /// It is used as @ref lock.
+ LLVM_NODISCARD
+ Expected<sys::fs::FileLocker> tryLockFor(std::chrono::milliseconds Timeout);
};
/// This returns a reference to a raw_fd_ostream for standard output. Use it
@@ -513,6 +571,34 @@ raw_fd_ostream &errs();
raw_ostream &nulls();
//===----------------------------------------------------------------------===//
+// File Streams
+//===----------------------------------------------------------------------===//
+
+/// A raw_ostream of a file for reading/writing/seeking.
+///
+class raw_fd_stream : public raw_fd_ostream {
+public:
+ /// Open the specified file for reading/writing/seeking. If an error occurs,
+ /// information about the error is put into EC, and the stream should be
+ /// immediately destroyed.
+ raw_fd_stream(StringRef Filename, std::error_code &EC);
+
+ /// This reads the \p Size bytes into a buffer pointed by \p Ptr.
+ ///
+ /// \param Ptr The start of the buffer to hold data to be read.
+ ///
+ /// \param Size The number of bytes to be read.
+ ///
+ /// On success, the number of bytes read is returned, and the file position is
+ /// advanced by this number. On error, -1 is returned, use error() to get the
+ /// error code.
+ ssize_t read(char *Ptr, size_t Size);
+
+ /// Check if \p OS is a pointer of type raw_fd_stream*.
+ static bool classof(const raw_ostream *OS);
+};
+
+//===----------------------------------------------------------------------===//
// Output Stream Adaptors
//===----------------------------------------------------------------------===//
@@ -601,6 +687,18 @@ public:
~buffer_ostream() override { OS << str(); }
};
+class buffer_unique_ostream : public raw_svector_ostream {
+ std::unique_ptr<raw_ostream> OS;
+ SmallVector<char, 0> Buffer;
+
+ virtual void anchor() override;
+
+public:
+ buffer_unique_ostream(std::unique_ptr<raw_ostream> OS)
+ : raw_svector_ostream(Buffer), OS(std::move(OS)) {}
+ ~buffer_unique_ostream() override { *OS << str(); }
+};
+
} // end namespace llvm
#endif // LLVM_SUPPORT_RAW_OSTREAM_H
diff --git a/contrib/llvm-project/llvm/include/llvm/TableGen/DirectiveEmitter.h b/contrib/llvm-project/llvm/include/llvm/TableGen/DirectiveEmitter.h
new file mode 100644
index 000000000000..27ad0665a0e8
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/TableGen/DirectiveEmitter.h
@@ -0,0 +1,211 @@
+#ifndef LLVM_TABLEGEN_DIRECTIVEEMITTER_H
+#define LLVM_TABLEGEN_DIRECTIVEEMITTER_H
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/TableGen/Record.h"
+
+namespace llvm {
+
+// Wrapper class that contains DirectiveLanguage's information defined in
+// DirectiveBase.td and provides helper methods for accessing it.
+class DirectiveLanguage {
+public:
+ explicit DirectiveLanguage(const llvm::RecordKeeper &Records)
+ : Records(Records) {
+ const auto &DirectiveLanguages = getDirectiveLanguages();
+ Def = DirectiveLanguages[0];
+ }
+
+ StringRef getName() const { return Def->getValueAsString("name"); }
+
+ StringRef getCppNamespace() const {
+ return Def->getValueAsString("cppNamespace");
+ }
+
+ StringRef getDirectivePrefix() const {
+ return Def->getValueAsString("directivePrefix");
+ }
+
+ StringRef getClausePrefix() const {
+ return Def->getValueAsString("clausePrefix");
+ }
+
+ StringRef getIncludeHeader() const {
+ return Def->getValueAsString("includeHeader");
+ }
+
+ StringRef getClauseEnumSetClass() const {
+ return Def->getValueAsString("clauseEnumSetClass");
+ }
+
+ StringRef getFlangClauseBaseClass() const {
+ return Def->getValueAsString("flangClauseBaseClass");
+ }
+
+ bool hasMakeEnumAvailableInNamespace() const {
+ return Def->getValueAsBit("makeEnumAvailableInNamespace");
+ }
+
+ bool hasEnableBitmaskEnumInNamespace() const {
+ return Def->getValueAsBit("enableBitmaskEnumInNamespace");
+ }
+
+ const std::vector<Record *> getDirectives() const {
+ return Records.getAllDerivedDefinitions("Directive");
+ }
+
+ const std::vector<Record *> getClauses() const {
+ return Records.getAllDerivedDefinitions("Clause");
+ }
+
+ bool HasValidityErrors() const;
+
+private:
+ const llvm::Record *Def;
+ const llvm::RecordKeeper &Records;
+
+ const std::vector<Record *> getDirectiveLanguages() const {
+ return Records.getAllDerivedDefinitions("DirectiveLanguage");
+ }
+};
+
+// Base record class used for Directive and Clause class defined in
+// DirectiveBase.td.
+class BaseRecord {
+public:
+ explicit BaseRecord(const llvm::Record *Def) : Def(Def) {}
+
+ StringRef getName() const { return Def->getValueAsString("name"); }
+
+ StringRef getAlternativeName() const {
+ return Def->getValueAsString("alternativeName");
+ }
+
+ // Returns the name of the directive formatted for output. Whitespace are
+ // replaced with underscores.
+ std::string getFormattedName() {
+ StringRef Name = Def->getValueAsString("name");
+ std::string N = Name.str();
+ std::replace(N.begin(), N.end(), ' ', '_');
+ return N;
+ }
+
+ bool isDefault() const { return Def->getValueAsBit("isDefault"); }
+
+ // Returns the record name.
+ const StringRef getRecordName() const { return Def->getName(); }
+
+protected:
+ const llvm::Record *Def;
+};
+
+// Wrapper class that contains a Directive's information defined in
+// DirectiveBase.td and provides helper methods for accessing it.
+class Directive : public BaseRecord {
+public:
+ explicit Directive(const llvm::Record *Def) : BaseRecord(Def) {}
+
+ std::vector<Record *> getAllowedClauses() const {
+ return Def->getValueAsListOfDefs("allowedClauses");
+ }
+
+ std::vector<Record *> getAllowedOnceClauses() const {
+ return Def->getValueAsListOfDefs("allowedOnceClauses");
+ }
+
+ std::vector<Record *> getAllowedExclusiveClauses() const {
+ return Def->getValueAsListOfDefs("allowedExclusiveClauses");
+ }
+
+ std::vector<Record *> getRequiredClauses() const {
+ return Def->getValueAsListOfDefs("requiredClauses");
+ }
+};
+
+// Wrapper class that contains Clause's information defined in DirectiveBase.td
+// and provides helper methods for accessing it.
+class Clause : public BaseRecord {
+public:
+ explicit Clause(const llvm::Record *Def) : BaseRecord(Def) {}
+
+ // Optional field.
+ StringRef getClangClass() const {
+ return Def->getValueAsString("clangClass");
+ }
+
+ // Optional field.
+ StringRef getFlangClass() const {
+ return Def->getValueAsString("flangClass");
+ }
+
+ // Get the formatted name for Flang parser class. The generic formatted class
+ // name is constructed from the name were the first letter of each word is
+ // captitalized and the underscores are removed.
+ // ex: async -> Async
+ // num_threads -> NumThreads
+ std::string getFormattedParserClassName() {
+ StringRef Name = Def->getValueAsString("name");
+ std::string N = Name.str();
+ bool Cap = true;
+ std::transform(N.begin(), N.end(), N.begin(), [&Cap](unsigned char C) {
+ if (Cap == true) {
+ C = llvm::toUpper(C);
+ Cap = false;
+ } else if (C == '_') {
+ Cap = true;
+ }
+ return C;
+ });
+ N.erase(std::remove(N.begin(), N.end(), '_'), N.end());
+ return N;
+ }
+
+ // Optional field.
+ StringRef getEnumName() const {
+ return Def->getValueAsString("enumClauseValue");
+ }
+
+ std::vector<Record *> getClauseVals() const {
+ return Def->getValueAsListOfDefs("allowedClauseValues");
+ }
+
+ bool isValueOptional() const { return Def->getValueAsBit("isValueOptional"); }
+
+ bool isValueList() const { return Def->getValueAsBit("isValueList"); }
+
+ StringRef getDefaultValue() const {
+ return Def->getValueAsString("defaultValue");
+ }
+
+ bool isImplicit() const { return Def->getValueAsBit("isImplicit"); }
+};
+
+// Wrapper class that contains VersionedClause's information defined in
+// DirectiveBase.td and provides helper methods for accessing it.
+class VersionedClause {
+public:
+ explicit VersionedClause(const llvm::Record *Def) : Def(Def) {}
+
+ // Return the specific clause record wrapped in the Clause class.
+ Clause getClause() const { return Clause{Def->getValueAsDef("clause")}; }
+
+ int64_t getMinVersion() const { return Def->getValueAsInt("minVersion"); }
+
+ int64_t getMaxVersion() const { return Def->getValueAsInt("maxVersion"); }
+
+private:
+ const llvm::Record *Def;
+};
+
+class ClauseVal : public BaseRecord {
+public:
+ explicit ClauseVal(const llvm::Record *Def) : BaseRecord(Def) {}
+
+ int getValue() const { return Def->getValueAsInt("value"); }
+
+ bool isUserVisible() const { return Def->getValueAsBit("isUserValue"); }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/TableGen/Error.h b/contrib/llvm-project/llvm/include/llvm/TableGen/Error.h
index cf990427f577..f63b50ad786c 100644
--- a/contrib/llvm-project/llvm/include/llvm/TableGen/Error.h
+++ b/contrib/llvm-project/llvm/include/llvm/TableGen/Error.h
@@ -15,23 +15,38 @@
#define LLVM_TABLEGEN_ERROR_H
#include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Record.h"
namespace llvm {
void PrintNote(const Twine &Msg);
void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg);
+LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Twine &Msg);
+LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc,
+ const Twine &Msg);
+LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Record *Rec,
+ const Twine &Msg);
+LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const RecordVal *RecVal,
+ const Twine &Msg);
+
+void PrintWarning(const Twine &Msg);
void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg);
void PrintWarning(const char *Loc, const Twine &Msg);
-void PrintWarning(const Twine &Msg);
+void PrintError(const Twine &Msg);
void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg);
void PrintError(const char *Loc, const Twine &Msg);
-void PrintError(const Twine &Msg);
+void PrintError(const Record *Rec, const Twine &Msg);
+void PrintError(const RecordVal *RecVal, const Twine &Msg);
LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Twine &Msg);
LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc,
const Twine &Msg);
+LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Record *Rec,
+ const Twine &Msg);
+LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const RecordVal *RecVal,
+ const Twine &Msg);
extern SourceMgr SrcMgr;
extern unsigned ErrorsPrinted;
diff --git a/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h b/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h
index a082fe5d74a1..b71aa0a89056 100644
--- a/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h
+++ b/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h
@@ -20,10 +20,12 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/TrailingObjects.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -57,7 +59,6 @@ public:
enum RecTyKind {
BitRecTyKind,
BitsRecTyKind,
- CodeRecTyKind,
IntRecTyKind,
StringRecTyKind,
ListRecTyKind,
@@ -67,6 +68,7 @@ public:
private:
RecTyKind Kind;
+ /// ListRecTy of the list that has elements of this type.
ListRecTy *ListTy = nullptr;
public:
@@ -87,7 +89,7 @@ public:
/// a bit set is not an int, but they are convertible.
virtual bool typeIsA(const RecTy *RHS) const;
- /// Returns the type representing list<this>.
+ /// Returns the type representing list<thistype>.
ListRecTy *getListTy();
};
@@ -136,24 +138,6 @@ public:
bool typeIsA(const RecTy *RHS) const override;
};
-/// 'code' - Represent a code fragment
-class CodeRecTy : public RecTy {
- static CodeRecTy Shared;
-
- CodeRecTy() : RecTy(CodeRecTyKind) {}
-
-public:
- static bool classof(const RecTy *RT) {
- return RT->getRecTyKind() == CodeRecTyKind;
- }
-
- static CodeRecTy *get() { return &Shared; }
-
- std::string getAsString() const override { return "code"; }
-
- bool typeIsConvertibleTo(const RecTy *RHS) const override;
-};
-
/// 'int' - Represent an integer value of no particular size
class IntRecTy : public RecTy {
static IntRecTy Shared;
@@ -190,14 +174,14 @@ public:
bool typeIsConvertibleTo(const RecTy *RHS) const override;
};
-/// 'list<Ty>' - Represent a list of values, all of which must be of
-/// the specified type.
+/// 'list<Ty>' - Represent a list of element values, all of which must be of
+/// the specified type. The type is stored in ElementTy.
class ListRecTy : public RecTy {
friend ListRecTy *RecTy::getListTy();
- RecTy *Ty;
+ RecTy *ElementTy;
- explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), Ty(T) {}
+ explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), ElementTy(T) {}
public:
static bool classof(const RecTy *RT) {
@@ -205,7 +189,7 @@ public:
}
static ListRecTy *get(RecTy *T) { return T->getListTy(); }
- RecTy *getElementType() const { return Ty; }
+ RecTy *getElementType() const { return ElementTy; }
std::string getAsString() const override;
@@ -304,7 +288,6 @@ protected:
IK_FirstTypedInit,
IK_BitInit,
IK_BitsInit,
- IK_CodeInit,
IK_DagInit,
IK_DefInit,
IK_FieldInit,
@@ -337,6 +320,7 @@ private:
virtual void anchor();
public:
+ /// Get the kind (type) of the value.
InitKind getKind() const { return Kind; }
protected:
@@ -347,63 +331,61 @@ public:
Init &operator=(const Init &) = delete;
virtual ~Init() = default;
- /// This virtual method should be overridden by values that may
- /// not be completely specified yet.
+ /// Is this a complete value with no unset (uninitialized) subvalues?
virtual bool isComplete() const { return true; }
/// Is this a concrete and fully resolved value without any references or
/// stuck operations? Unset values are concrete.
virtual bool isConcrete() const { return false; }
- /// Print out this value.
+ /// Print this value.
void print(raw_ostream &OS) const { OS << getAsString(); }
- /// Convert this value to a string form.
+ /// Convert this value to a literal form.
virtual std::string getAsString() const = 0;
- /// Convert this value to a string form,
- /// without adding quote markers. This primaruly affects
- /// StringInits where we will not surround the string value with
- /// quotes.
+
+ /// Convert this value to a literal form,
+ /// without adding quotes around a string.
virtual std::string getAsUnquotedString() const { return getAsString(); }
- /// Debugging method that may be called through a debugger, just
+ /// Debugging method that may be called through a debugger; just
/// invokes print on stderr.
void dump() const;
- /// If this initializer is convertible to Ty, return an initializer whose
- /// type is-a Ty, generating a !cast operation if required. Otherwise, return
- /// nullptr.
+ /// If this value is convertible to type \p Ty, return a value whose
+ /// type is \p Ty, generating a !cast operation if required.
+ /// Otherwise, return null.
virtual Init *getCastTo(RecTy *Ty) const = 0;
- /// Convert to an initializer whose type is-a Ty, or return nullptr if this
- /// is not possible (this can happen if the initializer's type is convertible
- /// to Ty, but there are unresolved references).
+ /// Convert to a value whose type is \p Ty, or return null if this
+ /// is not possible. This can happen if the value's type is convertible
+ /// to \p Ty, but there are unresolved references.
virtual Init *convertInitializerTo(RecTy *Ty) const = 0;
- /// This method is used to implement the bitrange
- /// selection operator. Given an initializer, it selects the specified bits
- /// out, returning them as a new init of bits type. If it is not legal to use
- /// the bit subscript operator on this initializer, return null.
+ /// This function is used to implement the bit range
+ /// selection operator. Given a value, it selects the specified bits,
+ /// returning them as a new \p Init of type \p bits. If it is not legal
+ /// to use the bit selection operator on this value, null is returned.
virtual Init *convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
return nullptr;
}
- /// This method is used to implement the list slice
- /// selection operator. Given an initializer, it selects the specified list
- /// elements, returning them as a new init of list type. If it is not legal
- /// to take a slice of this, return null.
+ /// This function is used to implement the list slice
+ /// selection operator. Given a value, it selects the specified list
+ /// elements, returning them as a new \p Init of type \p list. If it
+ /// is not legal to use the slice operator, null is returned.
virtual Init *convertInitListSlice(ArrayRef<unsigned> Elements) const {
return nullptr;
}
- /// This method is used to implement the FieldInit class.
- /// Implementors of this method should return the type of the named field if
- /// they are of record type.
+ /// This function is used to implement the FieldInit class.
+ /// Implementors of this method should return the type of the named
+ /// field if they are of type record.
virtual RecTy *getFieldType(StringInit *FieldName) const {
return nullptr;
}
- /// This method is used by classes that refer to other
+ /// This function is used by classes that refer to other
/// variables which may not be defined at the time the expression is formed.
/// If a value is set for the variable later, this method will be called on
/// users of the value to allow the value to propagate out.
@@ -411,8 +393,7 @@ public:
return const_cast<Init *>(this);
}
- /// This method is used to return the initializer for the specified
- /// bit.
+ /// Get the \p Init value of the specified bit.
virtual Init *getBit(unsigned Bit) const = 0;
};
@@ -420,14 +401,14 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Init &I) {
I.print(OS); return OS;
}
-/// This is the common super-class of types that have a specific,
-/// explicit, type.
+/// This is the common superclass of types that have a specific,
+/// explicit type, stored in ValueTy.
class TypedInit : public Init {
- RecTy *Ty;
+ RecTy *ValueTy;
protected:
explicit TypedInit(InitKind K, RecTy *T, uint8_t Opc = 0)
- : Init(K, Opc), Ty(T) {}
+ : Init(K, Opc), ValueTy(T) {}
public:
TypedInit(const TypedInit &) = delete;
@@ -438,7 +419,8 @@ public:
I->getKind() <= IK_LastTypedInit;
}
- RecTy *getType() const { return Ty; }
+ /// Get the type of the Init as a RecTy.
+ RecTy *getType() const { return ValueTy; }
Init *getCastTo(RecTy *Ty) const override;
Init *convertInitializerTo(RecTy *Ty) const override;
@@ -448,12 +430,11 @@ public:
/// This method is used to implement the FieldInit class.
/// Implementors of this method should return the type of the named field if
- /// they are of record type.
- ///
+ /// they are of type record.
RecTy *getFieldType(StringInit *FieldName) const override;
};
-/// '?' - Represents an uninitialized value
+/// '?' - Represents an uninitialized value.
class UnsetInit : public Init {
UnsetInit() : Init(IK_UnsetInit) {}
@@ -465,6 +446,7 @@ public:
return I->getKind() == IK_UnsetInit;
}
+ /// Get the singleton unset Init.
static UnsetInit *get();
Init *getCastTo(RecTy *Ty) const override;
@@ -474,8 +456,12 @@ public:
return const_cast<UnsetInit*>(this);
}
+ /// Is this a complete value with no unset (uninitialized) subvalues?
bool isComplete() const override { return false; }
+
bool isConcrete() const override { return true; }
+
+ /// Get the string representation of the Init.
std::string getAsString() const override { return "?"; }
};
@@ -592,10 +578,18 @@ public:
/// "foo" - Represent an initialization by a string value.
class StringInit : public TypedInit {
+public:
+ enum StringFormat {
+ SF_String, // Format as "text"
+ SF_Code, // Format as [{text}]
+ };
+
+private:
StringRef Value;
+ StringFormat Format;
- explicit StringInit(StringRef V)
- : TypedInit(IK_StringInit, StringRecTy::get()), Value(V) {}
+ explicit StringInit(StringRef V, StringFormat Fmt)
+ : TypedInit(IK_StringInit, StringRecTy::get()), Value(V), Format(Fmt) {}
public:
StringInit(const StringInit &) = delete;
@@ -605,50 +599,25 @@ public:
return I->getKind() == IK_StringInit;
}
- static StringInit *get(StringRef);
-
- StringRef getValue() const { return Value; }
-
- Init *convertInitializerTo(RecTy *Ty) const override;
-
- bool isConcrete() const override { return true; }
- std::string getAsString() const override { return "\"" + Value.str() + "\""; }
-
- std::string getAsUnquotedString() const override {
- return std::string(Value);
- }
-
- Init *getBit(unsigned Bit) const override {
- llvm_unreachable("Illegal bit reference off string");
- }
-};
-
-class CodeInit : public TypedInit {
- StringRef Value;
- SMLoc Loc;
-
- explicit CodeInit(StringRef V, const SMLoc &Loc)
- : TypedInit(IK_CodeInit, static_cast<RecTy *>(CodeRecTy::get())),
- Value(V), Loc(Loc) {}
-
-public:
- CodeInit(const StringInit &) = delete;
- CodeInit &operator=(const StringInit &) = delete;
+ static StringInit *get(StringRef, StringFormat Fmt = SF_String);
- static bool classof(const Init *I) {
- return I->getKind() == IK_CodeInit;
+ static StringFormat determineFormat(StringFormat Fmt1, StringFormat Fmt2) {
+ return (Fmt1 == SF_Code || Fmt2 == SF_Code) ? SF_Code : SF_String;
}
- static CodeInit *get(StringRef, const SMLoc &Loc);
-
StringRef getValue() const { return Value; }
- const SMLoc &getLoc() const { return Loc; }
+ StringFormat getFormat() const { return Format; }
+ bool hasCodeFormat() const { return Format == SF_Code; }
Init *convertInitializerTo(RecTy *Ty) const override;
bool isConcrete() const override { return true; }
+
std::string getAsString() const override {
- return "[{" + Value.str() + "}]";
+ if (Format == SF_String)
+ return "\"" + Value.str() + "\"";
+ else
+ return "[{" + Value.str() + "}]";
}
std::string getAsUnquotedString() const override {
@@ -755,7 +724,7 @@ public:
///
class UnOpInit : public OpInit, public FoldingSetNode {
public:
- enum UnaryOp : uint8_t { CAST, HEAD, TAIL, SIZE, EMPTY, GETOP };
+ enum UnaryOp : uint8_t { CAST, NOT, HEAD, TAIL, SIZE, EMPTY, GETDAGOP };
private:
Init *LHS;
@@ -804,9 +773,9 @@ public:
/// !op (X, Y) - Combine two inits.
class BinOpInit : public OpInit, public FoldingSetNode {
public:
- enum BinaryOp : uint8_t { ADD, MUL, AND, OR, SHL, SRA, SRL, LISTCONCAT,
- LISTSPLAT, STRCONCAT, CONCAT, EQ, NE, LE, LT, GE,
- GT, SETOP };
+ enum BinaryOp : uint8_t { ADD, SUB, MUL, AND, OR, XOR, SHL, SRA, SRL, LISTCONCAT,
+ LISTSPLAT, STRCONCAT, INTERLEAVE, CONCAT, EQ,
+ NE, LE, LT, GE, GT, SETDAGOP };
private:
Init *LHS, *RHS;
@@ -826,7 +795,6 @@ public:
RecTy *Type);
static Init *getStrConcat(Init *lhs, Init *rhs);
static Init *getListConcat(TypedInit *lhs, Init *rhs);
- static Init *getListSplat(TypedInit *lhs, Init *rhs);
void Profile(FoldingSetNodeID &ID) const;
@@ -862,7 +830,7 @@ public:
/// !op (X, Y, Z) - Combine two inits.
class TernOpInit : public OpInit, public FoldingSetNode {
public:
- enum TernaryOp : uint8_t { SUBST, FOREACH, IF, DAG };
+ enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR };
private:
Init *LHS, *MHS, *RHS;
@@ -1397,30 +1365,70 @@ public:
// High-Level Classes
//===----------------------------------------------------------------------===//
+/// This class represents a field in a record, including its name, type,
+/// value, and source location.
class RecordVal {
friend class Record;
+public:
+ enum FieldKind {
+ FK_Normal, // A normal record field.
+ FK_NonconcreteOK, // A field that can be nonconcrete ('field' keyword).
+ FK_TemplateArg, // A template argument.
+ };
+
+private:
Init *Name;
- PointerIntPair<RecTy *, 1, bool> TyAndPrefix;
+ SMLoc Loc; // Source location of definition of name.
+ PointerIntPair<RecTy *, 2, FieldKind> TyAndKind;
Init *Value;
public:
- RecordVal(Init *N, RecTy *T, bool P);
+ RecordVal(Init *N, RecTy *T, FieldKind K);
+ RecordVal(Init *N, SMLoc Loc, RecTy *T, FieldKind K);
+ /// Get the name of the field as a StringRef.
StringRef getName() const;
+
+ /// Get the name of the field as an Init.
Init *getNameInit() const { return Name; }
+ /// Get the name of the field as a std::string.
std::string getNameInitAsString() const {
return getNameInit()->getAsUnquotedString();
}
- bool getPrefix() const { return TyAndPrefix.getInt(); }
- RecTy *getType() const { return TyAndPrefix.getPointer(); }
+ /// Get the source location of the point where the field was defined.
+ const SMLoc &getLoc() const { return Loc; }
+
+ /// Is this a field where nonconcrete values are okay?
+ bool isNonconcreteOK() const {
+ return TyAndKind.getInt() == FK_NonconcreteOK;
+ }
+
+ /// Is this a template argument?
+ bool isTemplateArg() const {
+ return TyAndKind.getInt() == FK_TemplateArg;
+ }
+
+ /// Get the type of the field value as a RecTy.
+ RecTy *getType() const { return TyAndKind.getPointer(); }
+
+ /// Get the type of the field for printing purposes.
+ std::string getPrintType() const;
+
+ /// Get the value of the field as an Init.
Init *getValue() const { return Value; }
+ /// Set the value of the field from an Init.
bool setValue(Init *V);
+ /// Set the value and source location of the field.
+ bool setValue(Init *V, SMLoc NewLoc);
+
void dump() const;
+
+ /// Print the value to an output stream, possibly with a semicolon.
void print(raw_ostream &OS, bool PrintSem = true) const;
};
@@ -1438,15 +1446,18 @@ class Record {
SmallVector<SMLoc, 4> Locs;
SmallVector<Init *, 0> TemplateArgs;
SmallVector<RecordVal, 0> Values;
+ // Vector of [source location, condition Init, message Init].
+ SmallVector<std::tuple<SMLoc, Init *, Init *>, 0> Assertions;
- // All superclasses in the inheritance forest in reverse preorder (yes, it
+ // All superclasses in the inheritance forest in post-order (yes, it
// must be a forest; diamond-shaped inheritance is not allowed).
SmallVector<std::pair<Record *, SMRange>, 0> SuperClasses;
// Tracks Record instances. Not owned by Record.
RecordKeeper &TrackedRecords;
- DefInit *TheInit = nullptr;
+ // The DefInit corresponding to this record.
+ DefInit *CorrespondingDefInit = nullptr;
// Unique record ID.
unsigned ID;
@@ -1470,8 +1481,8 @@ public:
: Record(StringInit::get(N), locs, records, false, Class) {}
// When copy-constructing a Record, we must still guarantee a globally unique
- // ID number. Don't copy TheInit either since it's owned by the original
- // record. All other fields can be copied normally.
+ // ID number. Don't copy CorrespondingDefInit either, since it's owned by the
+ // original record. All other fields can be copied normally.
Record(const Record &O)
: Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
Values(O.Values), SuperClasses(O.SuperClasses),
@@ -1511,11 +1522,18 @@ public:
ArrayRef<RecordVal> getValues() const { return Values; }
+ ArrayRef<std::tuple<SMLoc, Init *, Init *>> getAssertions() const {
+ return Assertions;
+ }
+
ArrayRef<std::pair<Record *, SMRange>> getSuperClasses() const {
return SuperClasses;
}
- /// Append the direct super classes of this record to Classes.
+ /// Determine whether this record has the specified direct superclass.
+ bool hasDirectSuperClass(const Record *SuperClass) const;
+
+ /// Append the direct superclasses of this record to Classes.
void getDirectSuperClasses(SmallVectorImpl<Record *> &Classes) const;
bool isTemplateArg(Init *Name) const {
@@ -1565,6 +1583,10 @@ public:
removeValue(StringInit::get(Name));
}
+ void addAssertion(SMLoc Loc, Init *Condition, Init *Message) {
+ Assertions.push_back(std::make_tuple(Loc, Condition, Message));
+ }
+
bool isSubClassOf(const Record *R) const {
for (const auto &SCPair : SuperClasses)
if (SCPair.first == R)
@@ -1585,7 +1607,8 @@ public:
}
void addSuperClass(Record *R, SMRange Range) {
- assert(!TheInit && "changing type of record after it has been referenced");
+ assert(!CorrespondingDefInit &&
+ "changing type of record after it has been referenced");
assert(!isSubClassOf(R) && "Already subclassing record!");
SuperClasses.push_back(std::make_pair(R, Range));
}
@@ -1612,13 +1635,15 @@ public:
return IsAnonymous;
}
- void print(raw_ostream &OS) const;
void dump() const;
//===--------------------------------------------------------------------===//
// High-level methods useful to tablegen back-ends
//
+ ///Return the source location for the named field.
+ SMLoc getFieldLoc(StringRef FieldName) const;
+
/// Return the initializer for a value with the specified name,
/// or throw an exception if the field does not exist.
Init *getValueInit(StringRef FieldName) const;
@@ -1634,6 +1659,11 @@ public:
StringRef getValueAsString(StringRef FieldName) const;
/// This method looks up the specified field and returns
+ /// its value as a string, throwing an exception if the field if the value is
+ /// not a string and llvm::Optional() if the field does not exist.
+ llvm::Optional<StringRef> getValueAsOptionalString(StringRef FieldName) const;
+
+ /// This method looks up the specified field and returns
/// its value as a BitsInit, throwing an exception if the field does not exist
/// or if the value is not the right type.
BitsInit *getValueAsBitsInit(StringRef FieldName) const;
@@ -1694,26 +1724,50 @@ raw_ostream &operator<<(raw_ostream &OS, const Record &R);
class RecordKeeper {
friend class RecordRecTy;
+
using RecordMap = std::map<std::string, std::unique_ptr<Record>, std::less<>>;
+ using GlobalMap = std::map<std::string, Init *, std::less<>>;
+
+ std::string InputFilename;
RecordMap Classes, Defs;
+ mutable StringMap<std::vector<Record *>> ClassRecordsMap;
FoldingSet<RecordRecTy> RecordTypePool;
std::map<std::string, Init *, std::less<>> ExtraGlobals;
unsigned AnonCounter = 0;
+ // These members are for the phase timing feature. We need a timer group,
+ // the last timer started, and a flag to say whether the last timer
+ // is the special "backend overall timer."
+ TimerGroup *TimingGroup = nullptr;
+ Timer *LastTimer = nullptr;
+ bool BackendTimer = false;
+
public:
+ /// Get the main TableGen input file's name.
+ const std::string getInputFilename() const { return InputFilename; }
+
+ /// Get the map of classes.
const RecordMap &getClasses() const { return Classes; }
+
+ /// Get the map of records (defs).
const RecordMap &getDefs() const { return Defs; }
+ /// Get the map of global variables.
+ const GlobalMap &getGlobals() const { return ExtraGlobals; }
+
+ /// Get the class with the specified name.
Record *getClass(StringRef Name) const {
auto I = Classes.find(Name);
return I == Classes.end() ? nullptr : I->second.get();
}
+ /// Get the concrete record with the specified name.
Record *getDef(StringRef Name) const {
auto I = Defs.find(Name);
return I == Defs.end() ? nullptr : I->second.get();
}
+ /// Get the \p Init value of the specified global variable.
Init *getGlobal(StringRef Name) const {
if (Record *R = getDef(Name))
return R->getDefInit();
@@ -1721,6 +1775,10 @@ public:
return It == ExtraGlobals.end() ? nullptr : It->second;
}
+ void saveInputFilename(std::string Filename) {
+ InputFilename = Filename;
+ }
+
void addClass(std::unique_ptr<Record> R) {
bool Ins = Classes.insert(std::make_pair(std::string(R->getName()),
std::move(R))).second;
@@ -1744,14 +1802,42 @@ public:
Init *getNewAnonymousName();
+ /// Start phase timing; called if the --time-phases option is specified.
+ void startPhaseTiming() {
+ TimingGroup = new TimerGroup("TableGen", "TableGen Phase Timing");
+ }
+
+ /// Start timing a phase. Automatically stops any previous phase timer.
+ void startTimer(StringRef Name);
+
+ /// Stop timing a phase.
+ void stopTimer();
+
+ /// Start timing the overall backend. If the backend itself starts a timer,
+ /// then this timer is cleared.
+ void startBackendTimer(StringRef Name);
+
+ /// Stop timing the overall backend.
+ void stopBackendTimer();
+
+ /// Stop phase timing and print the report.
+ void stopPhaseTiming() {
+ if (TimingGroup)
+ delete TimingGroup;
+ }
+
//===--------------------------------------------------------------------===//
- // High-level helper methods, useful for tablegen backends...
+ // High-level helper methods, useful for tablegen backends.
- /// This method returns all concrete definitions
- /// that derive from the specified class name. A class with the specified
- /// name must exist.
+ /// Get all the concrete records that inherit from the one specified
+ /// class. The class must be defined.
std::vector<Record *> getAllDerivedDefinitions(StringRef ClassName) const;
+ /// Get all the concrete records that inherit from all the specified
+ /// classes. The classes must be defined.
+ std::vector<Record *> getAllDerivedDefinitions(
+ ArrayRef<StringRef> ClassNames) const;
+
void dump() const;
};
@@ -1781,8 +1867,6 @@ struct LessRecordFieldName {
};
struct LessRecordRegister {
- static bool ascii_isdigit(char x) { return x >= '0' && x <= '9'; }
-
struct RecordParts {
SmallVector<std::pair< bool, StringRef>, 4> Parts;
@@ -1793,18 +1877,18 @@ struct LessRecordRegister {
size_t Len = 0;
const char *Start = Rec.data();
const char *Curr = Start;
- bool isDigitPart = ascii_isdigit(Curr[0]);
+ bool IsDigitPart = isDigit(Curr[0]);
for (size_t I = 0, E = Rec.size(); I != E; ++I, ++Len) {
- bool isDigit = ascii_isdigit(Curr[I]);
- if (isDigit != isDigitPart) {
- Parts.push_back(std::make_pair(isDigitPart, StringRef(Start, Len)));
+ bool IsDigit = isDigit(Curr[I]);
+ if (IsDigit != IsDigitPart) {
+ Parts.push_back(std::make_pair(IsDigitPart, StringRef(Start, Len)));
Len = 0;
Start = &Curr[I];
- isDigitPart = ascii_isdigit(Curr[I]);
+ IsDigitPart = isDigit(Curr[I]);
}
}
// Push the last part.
- Parts.push_back(std::make_pair(isDigitPart, StringRef(Start, Len)));
+ Parts.push_back(std::make_pair(IsDigitPart, StringRef(Start, Len)));
}
size_t size() { return Parts.size(); }
@@ -1927,25 +2011,6 @@ public:
bool keepUnsetBits() const override { return true; }
};
-/// Resolve all references to a specific RecordVal.
-//
-// TODO: This is used for resolving references to template arguments, in a
-// rather inefficient way. Change those uses to resolve all template
-// arguments simultaneously and get rid of this class.
-class RecordValResolver final : public Resolver {
- const RecordVal *RV;
-
-public:
- explicit RecordValResolver(Record &R, const RecordVal *RV)
- : Resolver(&R), RV(RV) {}
-
- Init *resolve(Init *VarName) override {
- if (VarName == RV->getNameInit())
- return RV->getValue();
- return nullptr;
- }
-};
-
/// Delegate resolving to a sub-resolver, but shadow some variable names.
class ShadowResolver final : public Resolver {
Resolver &R;
@@ -1996,6 +2061,7 @@ public:
Init *resolve(Init *VarName) override;
};
+void EmitDetailedRecords(RecordKeeper &RK, raw_ostream &OS);
void EmitJSON(RecordKeeper &RK, raw_ostream &OS);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/TableGen/SearchableTable.td b/contrib/llvm-project/llvm/include/llvm/TableGen/SearchableTable.td
index 2680c71218ea..61dfa5c70706 100644
--- a/contrib/llvm-project/llvm/include/llvm/TableGen/SearchableTable.td
+++ b/contrib/llvm-project/llvm/include/llvm/TableGen/SearchableTable.td
@@ -67,9 +67,13 @@ class GenericTable {
// List of the names of fields of collected records that contain the data for
// table entries, in the order that is used for initialization in C++.
//
- // For each field of the table named XXX, TableGen will look for a value
- // called TypeOf_XXX and use that as a more detailed description of the
- // type of the field if present. This is required for fields whose type
+ // TableGen needs to know the type of the fields so that it can format
+ // the initializers correctly. It can infer the type of bit, bits, string,
+ // Intrinsic, and Instruction values.
+ //
+ // For each field of the table named xxx, TableGen will look for a field
+ // named TypeOf_xxx and use that as a more detailed description of the
+ // type of the field. This is required for fields whose type
// cannot be deduced automatically, such as enum fields. For example:
//
// def MyEnum : GenericEnum {
@@ -85,15 +89,15 @@ class GenericTable {
// def MyTable : GenericTable {
// let FilterClass = "MyTableEntry";
// let Fields = ["V", ...];
- // GenericEnum TypeOf_V = MyEnum;
+ // string TypeOf_V = "MyEnum";
// }
//
- // Fields of type bit, bits<N>, string, Intrinsic, and Instruction (or
- // derived classes of those) are supported natively.
+ // If a string field was initialized with a code literal, TableGen will
+ // emit the code verbatim. However, if a string field was initialized
+ // in some other way, but should be interpreted as code, then a TypeOf_xxx
+ // field is necessary, with a value of "code":
//
- // Additionally, fields of type `code` can appear, where the value is used
- // verbatim as an initializer. However, these fields cannot be used as
- // search keys.
+ // string TypeOf_Predicate = "code";
list<string> Fields;
// (Optional) List of fields that make up the primary key.
@@ -103,7 +107,7 @@ class GenericTable {
string PrimaryKeyName;
// See SearchIndex.EarlyOut
- bit PrimaryKeyEarlyOut = 0;
+ bit PrimaryKeyEarlyOut = false;
}
// Define a record derived from this class to generate an additional search
@@ -124,7 +128,7 @@ class SearchIndex {
// instructions.
//
// Can only be used when the first field is an integral (non-string) type.
- bit EarlyOut = 0;
+ bit EarlyOut = false;
}
// Legacy table type with integrated enum.
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/CGPassBuilderOption.h b/contrib/llvm-project/llvm/include/llvm/Target/CGPassBuilderOption.h
new file mode 100644
index 000000000000..c3a221e01ceb
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -0,0 +1,65 @@
+//===- CGPassBuilderOption.h - Options for pass builder ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CCState and CCValAssign classes, used for lowering
+// and implementing calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PASSBUILDER_OPTION_H
+#define LLVM_CODEGEN_PASSBUILDER_OPTION_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Target/TargetOptions.h"
+#include <vector>
+
+namespace llvm {
+class TargetMachine;
+
+enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
+enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP };
+enum class CFLAAType { None, Steensgaard, Andersen, Both };
+
+// Not one-on-one but mostly corresponding to commandline options in
+// TargetPassConfig.cpp.
+struct CGPassBuilderOption {
+ Optional<bool> OptimizeRegAlloc;
+ Optional<bool> EnableIPRA;
+ bool DebugPM = false;
+ bool DisableVerify = false;
+ bool EnableImplicitNullChecks = false;
+ bool EnableBlockPlacementStats = false;
+ bool MISchedPostRA = false;
+ bool EarlyLiveIntervals = false;
+
+ bool DisableLSR = false;
+ bool DisableCGP = false;
+ bool PrintLSR = false;
+ bool DisableMergeICmps = false;
+ bool DisablePartialLibcallInlining = false;
+ bool DisableConstantHoisting = false;
+ bool PrintISelInput = false;
+ bool PrintGCInfo = false;
+ bool RequiresCodeGenSCCOrder = false;
+
+ RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
+ RegAllocType RegAlloc = RegAllocType::Default;
+ CFLAAType UseCFLAA = CFLAAType::None;
+ Optional<GlobalISelAbortMode> EnableGlobalISelAbort;
+
+ Optional<bool> VerifyMachineCode;
+ Optional<bool> EnableFastISelOption;
+ Optional<bool> EnableGlobalISelOption;
+};
+
+CGPassBuilderOption getCGPassBuilderOption();
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_PASSBUILDER_OPTION_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td b/contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td
index 3d8262b2404f..209925969df3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td
@@ -16,7 +16,7 @@
//------------------------------------------------------------------------------
class GenericInstruction : StandardPseudoInstruction {
- let isPreISelOpcode = 1;
+ let isPreISelOpcode = true;
}
// Provide a variant of an instruction with the same operands, but
@@ -31,8 +31,8 @@ class ConstrainedIntruction<GenericInstruction baseInst> :
// TODO: Do we need a better way to mark reads from FP mode than
// hasSideEffects?
- let hasSideEffects = 1;
- let mayRaiseFPException = 1;
+ let hasSideEffects = true;
+ let mayRaiseFPException = true;
}
// Extend the underlying scalar type of an operation, leaving the high bits
@@ -40,7 +40,7 @@ class ConstrainedIntruction<GenericInstruction baseInst> :
def G_ANYEXT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Sign extend the underlying scalar type of an operation, copying the sign bit
@@ -48,7 +48,7 @@ def G_ANYEXT : GenericInstruction {
def G_SEXT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Sign extend the a value from an arbitrary bit position, copying the sign bit
@@ -62,7 +62,7 @@ def G_SEXT : GenericInstruction {
def G_SEXT_INREG : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src, untyped_imm_0:$sz);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Zero extend the underlying scalar type of an operation, putting zero bits
@@ -70,7 +70,7 @@ def G_SEXT_INREG : GenericInstruction {
def G_ZEXT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
@@ -79,150 +79,150 @@ def G_ZEXT : GenericInstruction {
def G_TRUNC : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_IMPLICIT_DEF : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_PHI : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins variable_ops);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_FRAME_INDEX : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_GLOBAL_VALUE : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_INTTOPTR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_PTRTOINT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_BITCAST : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Only supports scalar result types
def G_CONSTANT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$imm);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Only supports scalar result types
def G_FCONSTANT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$imm);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_VASTART : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins type0:$list);
- let hasSideEffects = 0;
- let mayStore = 1;
+ let hasSideEffects = false;
+ let mayStore = true;
}
def G_VAARG : GenericInstruction {
let OutOperandList = (outs type0:$val);
let InOperandList = (ins type1:$list, unknown:$align);
- let hasSideEffects = 0;
- let mayLoad = 1;
- let mayStore = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
+ let mayStore = true;
}
def G_CTLZ : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_CTLZ_ZERO_UNDEF : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_CTTZ : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_CTTZ_ZERO_UNDEF : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_CTPOP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_BSWAP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_BITREVERSE : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_ADDRSPACE_CAST : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_BLOCK_ADDR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$ba);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_JUMP_TABLE : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$jti);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_DYN_STACKALLOC : GenericInstruction {
let OutOperandList = (outs ptype0:$dst);
let InOperandList = (ins type1:$size, i32imm:$align);
- let hasSideEffects = 1;
+ let hasSideEffects = true;
}
def G_FREEZE : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
//------------------------------------------------------------------------------
@@ -233,101 +233,101 @@ def G_FREEZE : GenericInstruction {
def G_ADD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic subtraction.
def G_SUB : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
}
// Generic multiplication.
def G_MUL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic signed division.
def G_SDIV : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
}
// Generic unsigned division.
def G_UDIV : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
}
// Generic signed remainder.
def G_SREM : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
}
// Generic unsigned remainder.
def G_UREM : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
}
// Generic bitwise and.
def G_AND : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic bitwise or.
def G_OR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic bitwise xor.
def G_XOR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic left-shift.
def G_SHL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic logical right-shift.
def G_LSHR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic arithmetic right-shift.
def G_ASHR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
/// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount.
@@ -335,7 +335,7 @@ def G_ASHR : GenericInstruction {
def G_FSHL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
/// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount.
@@ -343,35 +343,35 @@ def G_FSHL : GenericInstruction {
def G_FSHR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic integer comparison.
def G_ICMP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$tst, type1:$src1, type1:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic floating-point comparison.
def G_FCMP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$tst, type1:$src1, type1:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic select
def G_SELECT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$tst, type0:$src1, type0:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic pointer offset.
def G_PTR_ADD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic pointer mask. type1 should be an integer with the same
@@ -379,39 +379,46 @@ def G_PTR_ADD : GenericInstruction {
def G_PTRMASK : GenericInstruction {
let OutOperandList = (outs ptype0:$dst);
let InOperandList = (ins ptype0:$src, type1:$bits);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic signed integer minimum.
def G_SMIN : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic signed integer maximum.
def G_SMAX : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic unsigned integer minimum.
def G_UMIN : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic unsigned integer maximum.
def G_UMAX : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
+}
+
+// Generic integer absolute value.
+def G_ABS : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = false;
}
//------------------------------------------------------------------------------
@@ -422,73 +429,73 @@ def G_UMAX : GenericInstruction {
def G_UADDO : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic unsigned addition consuming and producing a carry flag.
def G_UADDE : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic signed addition producing a carry flag.
def G_SADDO : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic signed addition consuming and producing a carry flag.
def G_SADDE : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic unsigned subtraction producing a carry flag.
def G_USUBO : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic unsigned subtraction consuming and producing a carry flag.
def G_USUBE : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic signed subtraction producing a carry flag.
def G_SSUBO : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic signed subtraction consuming and producing a carry flag.
def G_SSUBE : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic unsigned multiplication producing a carry flag.
def G_UMULO : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic signed multiplication producing a carry flag.
def G_SMULO : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Multiply two numbers at twice the incoming bit width (unsigned) and return
@@ -496,8 +503,8 @@ def G_SMULO : GenericInstruction {
def G_UMULH : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Multiply two numbers at twice the incoming bit width (signed) and return
@@ -505,8 +512,8 @@ def G_UMULH : GenericInstruction {
def G_SMULH : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
//------------------------------------------------------------------------------
@@ -517,32 +524,119 @@ def G_SMULH : GenericInstruction {
def G_UADDSAT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic saturating signed addition.
def G_SADDSAT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic saturating unsigned subtraction.
def G_USUBSAT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
}
// Generic saturating signed subtraction.
def G_SSUBSAT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
+}
+
+// Generic saturating unsigned left shift.
+def G_USHLSAT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = false;
+ let isCommutable = false;
+}
+
+// Generic saturating signed left shift.
+def G_SSHLSAT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = false;
+ let isCommutable = false;
+}
+
+/// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point
+/// multiplication on 2 integers with the same width and scale. SCALE
+/// represents the scale of both operands as fixed point numbers. This
+/// SCALE parameter must be a constant integer. A scale of zero is
+/// effectively performing multiplication on 2 integers.
+def G_SMULFIX : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale);
+ let hasSideEffects = false;
+ let isCommutable = true;
+}
+
+def G_UMULFIX : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale);
+ let hasSideEffects = false;
+ let isCommutable = true;
+}
+
+/// Same as the corresponding unsaturated fixed point instructions, but the
+/// result is clamped between the min and max values representable by the
+/// bits of the first 2 operands.
+def G_SMULFIXSAT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale);
+ let hasSideEffects = false;
+ let isCommutable = true;
+}
+
+def G_UMULFIXSAT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale);
+ let hasSideEffects = false;
+ let isCommutable = true;
+}
+
+/// RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on
+/// 2 integers with the same width and scale. SCALE represents the scale
+/// of both operands as fixed point numbers. This SCALE parameter must be a
+/// constant integer.
+def G_SDIVFIX : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale);
+ let hasSideEffects = false;
+ let isCommutable = false;
+}
+
+def G_UDIVFIX : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale);
+ let hasSideEffects = false;
+ let isCommutable = false;
+}
+
+/// Same as the corresponding unsaturated fixed point instructions,
+/// but the result is clamped between the min and max values
+/// representable by the bits of the first 2 operands.
+def G_SDIVFIXSAT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale);
+ let hasSideEffects = false;
+ let isCommutable = false;
+}
+
+def G_UDIVFIXSAT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale);
+ let hasSideEffects = false;
+ let isCommutable = false;
}
//------------------------------------------------------------------------------
@@ -552,61 +646,61 @@ def G_SSUBSAT : GenericInstruction {
def G_FNEG : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_FPEXT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_FPTRUNC : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_FPTOSI : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_FPTOUI : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_SITOFP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_UITOFP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_FABS : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_FCOPYSIGN : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type1:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_FCANONICALIZE : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
@@ -619,15 +713,15 @@ def G_FCANONICALIZE : GenericInstruction {
def G_FMINNUM : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
def G_FMAXNUM : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
@@ -637,15 +731,15 @@ def G_FMAXNUM : GenericInstruction {
def G_FMINNUM_IEEE : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
def G_FMAXNUM_IEEE : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0
@@ -654,15 +748,15 @@ def G_FMAXNUM_IEEE : GenericInstruction {
def G_FMINIMUM : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
def G_FMAXIMUM : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
//------------------------------------------------------------------------------
@@ -673,24 +767,24 @@ def G_FMAXIMUM : GenericInstruction {
def G_FADD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic FP subtraction.
def G_FSUB : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
}
// Generic FP multiplication.
def G_FMUL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
- let isCommutable = 1;
+ let hasSideEffects = false;
+ let isCommutable = true;
}
// Generic fused multiply-add instruction.
@@ -698,8 +792,8 @@ def G_FMUL : GenericInstruction {
def G_FMA : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
}
/// Generic FP multiply and add. Perform a * b + c, while getting the
@@ -707,85 +801,92 @@ def G_FMA : GenericInstruction {
def G_FMAD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
- let hasSideEffects = 0;
- let isCommutable = 0;
+ let hasSideEffects = false;
+ let isCommutable = false;
}
// Generic FP division.
def G_FDIV : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic FP remainder.
def G_FREM : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point exponentiation.
def G_FPOW : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
+}
+
+// Floating point exponentiation, with an integer power.
+def G_FPOWI : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type1:$src1);
+ let hasSideEffects = false;
}
// Floating point base-e exponential of a value.
def G_FEXP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point base-2 exponential of a value.
def G_FEXP2 : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point base-e logarithm of a value.
def G_FLOG : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point base-2 logarithm of a value.
def G_FLOG2 : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point base-10 logarithm of a value.
def G_FLOG10 : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point ceiling of a value.
def G_FCEIL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point cosine of a value.
def G_FCOS : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point sine of a value.
def G_FSIN : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point square root of a value.
@@ -795,28 +896,28 @@ def G_FSIN : GenericInstruction {
def G_FSQRT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point floor of a value.
def G_FFLOOR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point round to next integer.
def G_FRINT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Floating point round to the nearest integer.
def G_FNEARBYINT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
//------------------------------------------------------------------------------
@@ -825,19 +926,31 @@ def G_FNEARBYINT : GenericInstruction {
def G_INTRINSIC_TRUNC : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def G_INTRINSIC_ROUND : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
+}
+
+def G_INTRINSIC_LRINT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = false;
+}
+
+def G_INTRINSIC_ROUNDEVEN : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = false;
}
def G_READCYCLECOUNTER : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins);
- let hasSideEffects = 1;
+ let hasSideEffects = true;
}
//------------------------------------------------------------------------------
@@ -852,24 +965,24 @@ def G_READCYCLECOUNTER : GenericInstruction {
def G_LOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins ptype1:$addr);
- let hasSideEffects = 0;
- let mayLoad = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
}
// Generic sign-extended load. Expects a MachineMemOperand in addition to explicit operands.
def G_SEXTLOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins ptype1:$addr);
- let hasSideEffects = 0;
- let mayLoad = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
}
// Generic zero-extended load. Expects a MachineMemOperand in addition to explicit operands.
def G_ZEXTLOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins ptype1:$addr);
- let hasSideEffects = 0;
- let mayLoad = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
}
// Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset.
@@ -878,32 +991,32 @@ def G_ZEXTLOAD : GenericInstruction {
def G_INDEXED_LOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
- let hasSideEffects = 0;
- let mayLoad = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
}
// Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD.
def G_INDEXED_SEXTLOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
- let hasSideEffects = 0;
- let mayLoad = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
}
// Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD.
def G_INDEXED_ZEXTLOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
- let hasSideEffects = 0;
- let mayLoad = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
}
// Generic store. Expects a MachineMemOperand in addition to explicit operands.
def G_STORE : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins type0:$src, ptype1:$addr);
- let hasSideEffects = 0;
- let mayStore = 1;
+ let hasSideEffects = false;
+ let mayStore = true;
}
// Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour.
@@ -911,8 +1024,8 @@ def G_INDEXED_STORE : GenericInstruction {
let OutOperandList = (outs ptype0:$newaddr);
let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset,
unknown:$am);
- let hasSideEffects = 0;
- let mayStore = 1;
+ let hasSideEffects = false;
+ let mayStore = true;
}
// Generic atomic cmpxchg with internal success check. Expects a
@@ -920,9 +1033,9 @@ def G_INDEXED_STORE : GenericInstruction {
def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction {
let OutOperandList = (outs type0:$oldval, type1:$success);
let InOperandList = (ins type2:$addr, type0:$cmpval, type0:$newval);
- let hasSideEffects = 0;
- let mayLoad = 1;
- let mayStore = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
+ let mayStore = true;
}
// Generic atomic cmpxchg. Expects a MachineMemOperand in addition to explicit
@@ -930,9 +1043,9 @@ def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction {
def G_ATOMIC_CMPXCHG : GenericInstruction {
let OutOperandList = (outs type0:$oldval);
let InOperandList = (ins ptype1:$addr, type0:$cmpval, type0:$newval);
- let hasSideEffects = 0;
- let mayLoad = 1;
- let mayStore = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
+ let mayStore = true;
}
// Generic atomicrmw. Expects a MachineMemOperand in addition to explicit
@@ -940,9 +1053,9 @@ def G_ATOMIC_CMPXCHG : GenericInstruction {
class G_ATOMICRMW_OP : GenericInstruction {
let OutOperandList = (outs type0:$oldval);
let InOperandList = (ins ptype1:$addr, type0:$val);
- let hasSideEffects = 0;
- let mayLoad = 1;
- let mayStore = 1;
+ let hasSideEffects = false;
+ let mayLoad = true;
+ let mayStore = true;
}
def G_ATOMICRMW_XCHG : G_ATOMICRMW_OP;
@@ -962,7 +1075,7 @@ def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP;
def G_FENCE : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$ordering, i32imm:$scope);
- let hasSideEffects = 1;
+ let hasSideEffects = true;
}
//------------------------------------------------------------------------------
@@ -975,7 +1088,7 @@ def G_FENCE : GenericInstruction {
def G_EXTRACT : GenericInstruction {
let OutOperandList = (outs type0:$res);
let InOperandList = (ins type1:$src, untyped_imm_0:$offset);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Extract multiple registers specified size, starting from blocks given by
@@ -987,14 +1100,14 @@ def G_EXTRACT : GenericInstruction {
def G_UNMERGE_VALUES : GenericInstruction {
let OutOperandList = (outs type0:$dst0, variable_ops);
let InOperandList = (ins type1:$src);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Insert a smaller register into a larger one at the specified bit-index.
def G_INSERT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src, type1:$op, untyped_imm_0:$offset);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Concatenate multiple registers of the same size into a wider register.
@@ -1004,7 +1117,7 @@ def G_INSERT : GenericInstruction {
def G_MERGE_VALUES : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src0, variable_ops);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
/// Create a vector from multiple scalar registers. No implicit
@@ -1013,7 +1126,7 @@ def G_MERGE_VALUES : GenericInstruction {
def G_BUILD_VECTOR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src0, variable_ops);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
/// Like G_BUILD_VECTOR, but truncates the larger operand types to fit the
@@ -1021,24 +1134,24 @@ def G_BUILD_VECTOR : GenericInstruction {
def G_BUILD_VECTOR_TRUNC : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src0, variable_ops);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
/// Create a vector by concatenating vectors together.
def G_CONCAT_VECTORS : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src0, variable_ops);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Intrinsic without side effects.
def G_INTRINSIC : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$intrin, variable_ops);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
// Conservatively assume this is convergent. If there turnes out to
- // be a need, there should be separate convergent intrinsic opcode.s
+ // be a need, there should be separate convergent intrinsic opcodes.
let isConvergent = 1;
}
@@ -1046,13 +1159,13 @@ def G_INTRINSIC : GenericInstruction {
def G_INTRINSIC_W_SIDE_EFFECTS : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$intrin, variable_ops);
- let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let hasSideEffects = true;
+ let mayLoad = true;
+ let mayStore = true;
// Conservatively assume this is convergent. If there turnes out to
- // be a need, there should be separate convergent intrinsic opcode.s
- let isConvergent = 1;
+ // be a need, there should be separate convergent intrinsic opcodes.
+ let isConvergent = true;
}
//------------------------------------------------------------------------------
@@ -1063,61 +1176,61 @@ def G_INTRINSIC_W_SIDE_EFFECTS : GenericInstruction {
def G_BR : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$src1);
- let hasSideEffects = 0;
- let isBranch = 1;
- let isTerminator = 1;
- let isBarrier = 1;
+ let hasSideEffects = false;
+ let isBranch = true;
+ let isTerminator = true;
+ let isBarrier = true;
}
// Generic conditional branch.
def G_BRCOND : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins type0:$tst, unknown:$truebb);
- let hasSideEffects = 0;
- let isBranch = 1;
- let isTerminator = 1;
+ let hasSideEffects = false;
+ let isBranch = true;
+ let isTerminator = true;
}
// Generic indirect branch.
def G_BRINDIRECT : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins type0:$src1);
- let hasSideEffects = 0;
- let isBranch = 1;
- let isTerminator = 1;
- let isBarrier = 1;
- let isIndirectBranch = 1;
+ let hasSideEffects = false;
+ let isBranch = true;
+ let isTerminator = true;
+ let isBarrier = true;
+ let isIndirectBranch = true;
}
// Generic branch to jump table entry
def G_BRJT : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins ptype0:$tbl, unknown:$jti, type1:$idx);
- let hasSideEffects = 0;
- let isBranch = 1;
- let isTerminator = 1;
- let isBarrier = 1;
- let isIndirectBranch = 1;
+ let hasSideEffects = false;
+ let isBranch = true;
+ let isTerminator = true;
+ let isBarrier = true;
+ let isIndirectBranch = true;
}
def G_READ_REGISTER : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$register);
- let hasSideEffects = 1;
+ let hasSideEffects = true;
// Assume convergent. It's probably not worth the effort of somehow
// modeling convergent and nonconvergent register accesses.
- let isConvergent = 1;
+ let isConvergent = true;
}
def G_WRITE_REGISTER : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$register, type0:$value);
- let hasSideEffects = 1;
+ let hasSideEffects = true;
// Assume convergent. It's probably not worth the effort of somehow
// modeling convergent and nonconvergent register accesses.
- let isConvergent = 1;
+ let isConvergent = true;
}
//------------------------------------------------------------------------------
@@ -1128,14 +1241,14 @@ def G_WRITE_REGISTER : GenericInstruction {
def G_INSERT_VECTOR_ELT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src, type1:$elt, type2:$idx);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic extractelement.
def G_EXTRACT_VECTOR_ELT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src, type2:$idx);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
// Generic shufflevector.
@@ -1145,10 +1258,48 @@ def G_EXTRACT_VECTOR_ELT : GenericInstruction {
def G_SHUFFLE_VECTOR: GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$v1, type1:$v2, unknown:$mask);
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
//------------------------------------------------------------------------------
+// Vector reductions
+//------------------------------------------------------------------------------
+
+class VectorReduction : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$v);
+ let hasSideEffects = false;
+}
+
+def G_VECREDUCE_SEQ_FADD : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$acc, type2:$v);
+ let hasSideEffects = false;
+}
+
+def G_VECREDUCE_SEQ_FMUL : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$acc, type2:$v);
+ let hasSideEffects = false;
+}
+
+def G_VECREDUCE_FADD : VectorReduction;
+def G_VECREDUCE_FMUL : VectorReduction;
+
+def G_VECREDUCE_FMAX : VectorReduction;
+def G_VECREDUCE_FMIN : VectorReduction;
+
+def G_VECREDUCE_ADD : VectorReduction;
+def G_VECREDUCE_MUL : VectorReduction;
+def G_VECREDUCE_AND : VectorReduction;
+def G_VECREDUCE_OR : VectorReduction;
+def G_VECREDUCE_XOR : VectorReduction;
+def G_VECREDUCE_SMAX : VectorReduction;
+def G_VECREDUCE_SMIN : VectorReduction;
+def G_VECREDUCE_UMAX : VectorReduction;
+def G_VECREDUCE_UMIN : VectorReduction;
+
+//------------------------------------------------------------------------------
// Constrained floating point ops
//------------------------------------------------------------------------------
@@ -1159,3 +1310,30 @@ def G_STRICT_FDIV : ConstrainedIntruction<G_FDIV>;
def G_STRICT_FREM : ConstrainedIntruction<G_FREM>;
def G_STRICT_FMA : ConstrainedIntruction<G_FMA>;
def G_STRICT_FSQRT : ConstrainedIntruction<G_FSQRT>;
+
+//------------------------------------------------------------------------------
+// Memory intrinsics
+//------------------------------------------------------------------------------
+
+def G_MEMCPY : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall);
+ let hasSideEffects = false;
+ let mayLoad = true;
+ let mayStore = true;
+}
+
+def G_MEMMOVE : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall);
+ let hasSideEffects = false;
+ let mayLoad = true;
+ let mayStore = true;
+}
+
+def G_MEMSET : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins ptype0:$dst_addr, type1:$value, type2:$size, untyped_imm_0:$tailcall);
+ let hasSideEffects = false;
+ let mayStore = true;
+}
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td
index 1dd3e374b524..e2c7a90a1b16 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -85,6 +85,7 @@ class GIDefMatchData<string type> : GIDefKind {
def extending_load_matchdata : GIDefMatchData<"PreferredTuple">;
def indexed_load_store_matchdata : GIDefMatchData<"IndexedLoadStoreMatchInfo">;
+def instruction_steps_matchdata: GIDefMatchData<"InstructionStepsMatchInfo">;
/// The operator at the root of a GICombineRule.Match dag.
def match;
@@ -125,11 +126,18 @@ def extending_loads : GICombineRule<
(apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>;
def combines_for_extload: GICombineGroup<[extending_loads]>;
-def sext_already_extended : GICombineRule<
+def sext_trunc_sextload : GICombineRule<
(defs root:$d),
(match (wip_match_opcode G_SEXT_INREG):$d,
- [{ return Helper.matchSextAlreadyExtended(*${d}); }]),
- (apply [{ Helper.applySextAlreadyExtended(*${d}); }])>;
+ [{ return Helper.matchSextTruncSextLoad(*${d}); }]),
+ (apply [{ Helper.applySextTruncSextLoad(*${d}); }])>;
+
+def sext_inreg_of_load_matchdata : GIDefMatchData<"std::tuple<Register, unsigned>">;
+def sext_inreg_of_load : GICombineRule<
+ (defs root:$root, sext_inreg_of_load_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SEXT_INREG):$root,
+ [{ return Helper.matchSextInRegOfLoad(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>;
def combine_indexed_load_store : GICombineRule<
(defs root:$root, indexed_load_store_matchdata:$matchinfo),
@@ -137,13 +145,11 @@ def combine_indexed_load_store : GICombineRule<
[{ return Helper.matchCombineIndexedLoadStore(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineIndexedLoadStore(*${root}, ${matchinfo}); }])>;
-// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of
-// all_combines because it wasn't there.
-def elide_br_by_inverting_cond : GICombineRule<
+def opt_brcond_by_inverting_cond : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_BR):$root,
- [{ return Helper.matchElideBrByInvertingCond(*${root}); }]),
- (apply [{ Helper.applyElideBrByInvertingCond(*${root}); }])>;
+ [{ return Helper.matchOptBrCondByInvertingCond(*${root}); }]),
+ (apply [{ Helper.applyOptBrCondByInvertingCond(*${root}); }])>;
def ptr_add_immed_matchdata : GIDefMatchData<"PtrAddChain">;
def ptr_add_immed_chain : GICombineRule<
@@ -152,6 +158,23 @@ def ptr_add_immed_chain : GICombineRule<
[{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]),
(apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>;
+// Fold shift (shift base x), y -> shift base, (x+y), if shifts are same
+def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">;
+def shift_immed_chain : GICombineRule<
+ (defs root:$d, shift_immed_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR, G_SSHLSAT, G_USHLSAT):$d,
+ [{ return Helper.matchShiftImmedChain(*${d}, ${matchinfo}); }]),
+ (apply [{ Helper.applyShiftImmedChain(*${d}, ${matchinfo}); }])>;
+
+// Transform shift (logic (shift X, C0), Y), C1
+// -> logic (shift X, (C0+C1)), (shift Y, C1), if shifts are same
+def shift_of_shifted_logic_matchdata : GIDefMatchData<"ShiftOfShiftedLogic">;
+def shift_of_shifted_logic_chain : GICombineRule<
+ (defs root:$d, shift_of_shifted_logic_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR, G_USHLSAT, G_SSHLSAT):$d,
+ [{ return Helper.matchShiftOfShiftedLogic(*${d}, ${matchinfo}); }]),
+ (apply [{ Helper.applyShiftOfShiftedLogic(*${d}, ${matchinfo}); }])>;
+
def mul_to_shl_matchdata : GIDefMatchData<"unsigned">;
def mul_to_shl : GICombineRule<
(defs root:$d, mul_to_shl_matchdata:$matchinfo),
@@ -159,6 +182,14 @@ def mul_to_shl : GICombineRule<
[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;
+// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
+def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
+def reduce_shl_of_extend : GICombineRule<
+ (defs root:$dst, reduce_shl_of_extend_matchdata:$matchinfo),
+ (match (G_SHL $dst, $src0, $src1):$mi,
+ [{ return Helper.matchCombineShlOfExtend(*${mi}, ${matchinfo}); }]),
+ (apply [{ Helper.applyCombineShlOfExtend(*${mi}, ${matchinfo}); }])>;
+
// [us]itofp(undef) = 0, because the result value is bounded.
def undef_to_fp_zero : GICombineRule<
(defs root:$root),
@@ -178,11 +209,17 @@ def undef_to_negative_one: GICombineRule<
[{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
(apply [{ Helper.replaceInstWithConstant(*${root}, -1); }])>;
+def binop_left_undef_to_zero: GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_SHL):$root,
+ [{ return Helper.matchOperandIsUndef(*${root}, 1); }]),
+ (apply [{ Helper.replaceInstWithConstant(*${root}, 0); }])>;
+
// Instructions where if any source operand is undef, the instruction can be
// replaced with undef.
def propagate_undef_any_op: GICombineRule<
(defs root:$root),
- (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR):$root,
+ (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC):$root,
[{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
@@ -209,6 +246,24 @@ def select_same_val: GICombineRule<
(apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }])
>;
+// Fold (undef ? x : y) -> y
+def select_undef_cmp: GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_SELECT):$root,
+ [{ return Helper.matchUndefSelectCmp(*${root}); }]),
+ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }])
+>;
+
+// Fold (true ? x : y) -> x
+// Fold (false ? x : y) -> y
+def select_constant_cmp_matchdata : GIDefMatchData<"unsigned">;
+def select_constant_cmp: GICombineRule<
+ (defs root:$root, select_constant_cmp_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SELECT):$root,
+ [{ return Helper.matchConstantSelectCmp(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }])
+>;
+
// Fold x op 0 -> x
def right_identity_zero: GICombineRule<
(defs root:$root),
@@ -217,6 +272,14 @@ def right_identity_zero: GICombineRule<
(apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
+// Fold x op 1 -> x
+def right_identity_one: GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_MUL):$root,
+ [{ return Helper.matchConstantOp(${root}->getOperand(2), 1); }]),
+ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
+>;
+
// Fold (x op x) - > x
def binop_same_val: GICombineRule<
(defs root:$root),
@@ -233,6 +296,13 @@ def binop_left_to_zero: GICombineRule<
(apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
+def urem_pow2_to_mask : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_UREM):$root,
+ [{ return Helper.matchOperandIsKnownToBeAPowerOfTwo(*${root}, 2); }]),
+ (apply [{ return Helper.applySimplifyURemByPow2(*${root}); }])
+>;
+
// Fold (x op 0) - > 0
def binop_right_to_zero: GICombineRule<
(defs root:$root),
@@ -257,9 +327,240 @@ def simplify_add_to_sub: GICombineRule <
(apply [{ return Helper.applySimplifyAddToSub(*${root}, ${info});}])
>;
+// Fold fp_op(cst) to the constant result of the floating point operation.
+def constant_fp_op_matchinfo: GIDefMatchData<"Optional<APFloat>">;
+def constant_fp_op: GICombineRule <
+ (defs root:$root, constant_fp_op_matchinfo:$info),
+ (match (wip_match_opcode G_FNEG, G_FABS, G_FPTRUNC, G_FSQRT, G_FLOG2):$root,
+ [{ return Helper.matchCombineConstantFoldFpUnary(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyCombineConstantFoldFpUnary(*${root}, ${info}); }])
+>;
+
+// Fold int2ptr(ptr2int(x)) -> x
+def p2i_to_i2p_matchinfo: GIDefMatchData<"Register">;
+def p2i_to_i2p: GICombineRule<
+ (defs root:$root, p2i_to_i2p_matchinfo:$info),
+ (match (wip_match_opcode G_INTTOPTR):$root,
+ [{ return Helper.matchCombineI2PToP2I(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyCombineI2PToP2I(*${root}, ${info}); }])
+>;
+
+// Fold ptr2int(int2ptr(x)) -> x
+def i2p_to_p2i_matchinfo: GIDefMatchData<"Register">;
+def i2p_to_p2i: GICombineRule<
+ (defs root:$root, i2p_to_p2i_matchinfo:$info),
+ (match (wip_match_opcode G_PTRTOINT):$root,
+ [{ return Helper.matchCombineP2IToI2P(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyCombineP2IToI2P(*${root}, ${info}); }])
+>;
+
+// Fold add ptrtoint(x), y -> ptrtoint (ptr_add x), y
+def add_p2i_to_ptradd_matchinfo : GIDefMatchData<"std::pair<Register, bool>">;
+def add_p2i_to_ptradd : GICombineRule<
+ (defs root:$root, add_p2i_to_ptradd_matchinfo:$info),
+ (match (wip_match_opcode G_ADD):$root,
+ [{ return Helper.matchCombineAddP2IToPtrAdd(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyCombineAddP2IToPtrAdd(*${root}, ${info}); }])
+>;
+
+// Fold (ptr_add (int2ptr C1), C2) -> C1 + C2
+def const_ptradd_to_i2p_matchinfo : GIDefMatchData<"int64_t">;
+def const_ptradd_to_i2p: GICombineRule<
+ (defs root:$root, const_ptradd_to_i2p_matchinfo:$info),
+ (match (wip_match_opcode G_PTR_ADD):$root,
+ [{ return Helper.matchCombineConstPtrAddToI2P(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyCombineConstPtrAddToI2P(*${root}, ${info}); }])
+>;
+
+// Simplify: (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
+def hoist_logic_op_with_same_opcode_hands: GICombineRule <
+ (defs root:$root, instruction_steps_matchdata:$info),
+ (match (wip_match_opcode G_AND, G_OR, G_XOR):$root,
+ [{ return Helper.matchHoistLogicOpWithSameOpcodeHands(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyBuildInstructionSteps(*${root}, ${info});}])
+>;
+
+// Fold ashr (shl x, C), C -> sext_inreg (C)
+def shl_ashr_to_sext_inreg_matchinfo : GIDefMatchData<"std::tuple<Register, int64_t>">;
+def shl_ashr_to_sext_inreg : GICombineRule<
+ (defs root:$root, shl_ashr_to_sext_inreg_matchinfo:$info),
+ (match (wip_match_opcode G_ASHR): $root,
+ [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
+>;
+// Fold (x & y) -> x or (x & y) -> y when (x & y) is known to equal x or equal y.
+def redundant_and_matchinfo : GIDefMatchData<"Register">;
+def redundant_and: GICombineRule <
+ (defs root:$root, redundant_and_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchRedundantAnd(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
+>;
+
+// Fold (x | y) -> x or (x | y) -> y when (x | y) is known to equal x or equal y.
+def redundant_or_matchinfo : GIDefMatchData<"Register">;
+def redundant_or: GICombineRule <
+ (defs root:$root, redundant_or_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_OR):$root,
+ [{ return Helper.matchRedundantOr(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
+>;
+
+// If the input is already sign extended, just drop the extension.
+// sext_inreg x, K ->
+// if computeNumSignBits(x) >= (x.getScalarSizeInBits() - K + 1)
+def redundant_sext_inreg: GICombineRule <
+ (defs root:$root),
+ (match (wip_match_opcode G_SEXT_INREG):$root,
+ [{ return Helper.matchRedundantSExtInReg(*${root}); }]),
+ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
+>;
+
+// Fold (anyext (trunc x)) -> x if the source type is same as
+// the destination type.
+def anyext_trunc_fold_matchinfo : GIDefMatchData<"Register">;
+def anyext_trunc_fold: GICombineRule <
+ (defs root:$root, anyext_trunc_fold_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_ANYEXT):$root,
+ [{ return Helper.matchCombineAnyExtTrunc(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.applyCombineAnyExtTrunc(*${root}, ${matchinfo}); }])
+>;
+
+// Fold ([asz]ext ([asz]ext x)) -> ([asz]ext x).
+def ext_ext_fold_matchinfo : GIDefMatchData<"std::tuple<Register, unsigned>">;
+def ext_ext_fold: GICombineRule <
+ (defs root:$root, ext_ext_fold_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_ANYEXT, G_SEXT, G_ZEXT):$root,
+ [{ return Helper.matchCombineExtOfExt(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.applyCombineExtOfExt(*${root}, ${matchinfo}); }])
+>;
+
+def not_cmp_fold_matchinfo : GIDefMatchData<"SmallVector<Register, 4>">;
+def not_cmp_fold : GICombineRule<
+ (defs root:$d, not_cmp_fold_matchinfo:$info),
+ (match (wip_match_opcode G_XOR): $d,
+ [{ return Helper.matchNotCmp(*${d}, ${info}); }]),
+ (apply [{ return Helper.applyNotCmp(*${d}, ${info}); }])
+>;
+
+// Fold (fneg (fneg x)) -> x.
+def fneg_fneg_fold_matchinfo : GIDefMatchData<"Register">;
+def fneg_fneg_fold: GICombineRule <
+ (defs root:$root, fneg_fneg_fold_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_FNEG):$root,
+ [{ return Helper.matchCombineFNegOfFNeg(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
+>;
+
+// Fold (unmerge(merge x, y, z)) -> z, y, z.
+def unmerge_merge_matchinfo : GIDefMatchData<"SmallVector<Register, 8>">;
+def unmerge_merge : GICombineRule<
+ (defs root:$d, unmerge_merge_matchinfo:$info),
+ (match (wip_match_opcode G_UNMERGE_VALUES): $d,
+ [{ return Helper.matchCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]),
+ (apply [{ return Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }])
+>;
+
+// Fold (fabs (fabs x)) -> (fabs x).
+def fabs_fabs_fold_matchinfo : GIDefMatchData<"Register">;
+def fabs_fabs_fold: GICombineRule<
+ (defs root:$root, fabs_fabs_fold_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_FABS):$root,
+ [{ return Helper.matchCombineFAbsOfFAbs(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.applyCombineFAbsOfFAbs(*${root}, ${matchinfo}); }])
+>;
+
+// Fold (unmerge cst) -> cst1, cst2, ...
+def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector<APInt, 8>">;
+def unmerge_cst : GICombineRule<
+ (defs root:$d, unmerge_cst_matchinfo:$info),
+ (match (wip_match_opcode G_UNMERGE_VALUES): $d,
+ [{ return Helper.matchCombineUnmergeConstant(*${d}, ${info}); }]),
+ (apply [{ return Helper.applyCombineUnmergeConstant(*${d}, ${info}); }])
+>;
+
+// Transform x,y<dead> = unmerge z -> x = trunc z.
+def unmerge_dead_to_trunc : GICombineRule<
+ (defs root:$d),
+ (match (wip_match_opcode G_UNMERGE_VALUES): $d,
+ [{ return Helper.matchCombineUnmergeWithDeadLanesToTrunc(*${d}); }]),
+ (apply [{ return Helper.applyCombineUnmergeWithDeadLanesToTrunc(*${d}); }])
+>;
+
+// Transform x,y = unmerge(zext(z)) -> x = zext z; y = 0.
+def unmerge_zext_to_zext : GICombineRule<
+ (defs root:$d),
+ (match (wip_match_opcode G_UNMERGE_VALUES): $d,
+ [{ return Helper.matchCombineUnmergeZExtToZExt(*${d}); }]),
+ (apply [{ return Helper.applyCombineUnmergeZExtToZExt(*${d}); }])
+>;
+
+// Fold trunc ([asz]ext x) -> x or ([asz]ext x) or (trunc x).
+def trunc_ext_fold_matchinfo : GIDefMatchData<"std::pair<Register, unsigned>">;
+def trunc_ext_fold: GICombineRule <
+ (defs root:$root, trunc_ext_fold_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_TRUNC):$root,
+ [{ return Helper.matchCombineTruncOfExt(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }])
+>;
+
+// Fold trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits().
+def trunc_shl_matchinfo : GIDefMatchData<"std::pair<Register, Register>">;
+def trunc_shl: GICombineRule <
+ (defs root:$root, trunc_shl_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_TRUNC):$root,
+ [{ return Helper.matchCombineTruncOfShl(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.applyCombineTruncOfShl(*${root}, ${matchinfo}); }])
+>;
+
+// Transform (mul x, -1) -> (sub 0, x)
+def mul_by_neg_one: GICombineRule <
+ (defs root:$root),
+ (match (wip_match_opcode G_MUL):$root,
+ [{ return Helper.matchConstantOp(${root}->getOperand(2), -1); }]),
+ (apply [{ return Helper.applyCombineMulByNegativeOne(*${root}); }])
+>;
+
+// Fold (xor (and x, y), y) -> (and (not x), y)
+def xor_of_and_with_same_reg_matchinfo :
+ GIDefMatchData<"std::pair<Register, Register>">;
+def xor_of_and_with_same_reg: GICombineRule <
+ (defs root:$root, xor_of_and_with_same_reg_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_XOR):$root,
+ [{ return Helper.matchXorOfAndWithSameReg(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.applyXorOfAndWithSameReg(*${root}, ${matchinfo}); }])
+>;
+
+// Transform (ptr_add 0, x) -> (int_to_ptr x)
+def ptr_add_with_zero: GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_PTR_ADD):$root,
+ [{ return Helper.matchPtrAddZero(*${root}); }]),
+ (apply [{ return Helper.applyPtrAddZero(*${root}); }])>;
+
+def regs_small_vec : GIDefMatchData<"SmallVector<Register, 4>">;
+def combine_insert_vec_elts_build_vector : GICombineRule<
+ (defs root:$root, regs_small_vec:$info),
+ (match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
+ [{ return Helper.matchCombineInsertVecElts(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyCombineInsertVecElts(*${root}, ${info}); }])>;
+
+def load_or_combine_matchdata :
+GIDefMatchData<"std::function<void(MachineIRBuilder &)>">;
+def load_or_combine : GICombineRule<
+ (defs root:$root, load_or_combine_matchdata:$info),
+ (match (wip_match_opcode G_OR):$root,
+ [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyLoadOrCombine(*${root}, ${info}); }])>;
+
+// Currently only the one combine above.
+def insert_vec_elt_combines : GICombineGroup<
+ [combine_insert_vec_elts_build_vector]>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
+ binop_left_undef_to_zero,
propagate_undef_any_op,
propagate_undef_all_ops,
propagate_undef_shuffle_mask,
@@ -267,9 +568,31 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
binop_same_val, binop_left_to_zero,
- binop_right_to_zero]>;
-
-def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl]>;
-def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
- combines_for_extload, combine_indexed_load_store, undef_combines,
- identity_combines, simplify_add_to_sub]>;
+ binop_right_to_zero, p2i_to_i2p,
+ i2p_to_p2i, anyext_trunc_fold,
+ fneg_fneg_fold, right_identity_one]>;
+
+def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p]>;
+
+def known_bits_simplifications : GICombineGroup<[
+ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask]>;
+
+def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
+
+def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
+
+def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
+ mul_by_neg_one]>;
+
+def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
+ ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store,
+ undef_combines, identity_combines, simplify_add_to_sub,
+ hoist_logic_op_with_same_opcode_hands,
+ shl_ashr_to_sext_inreg, sext_inreg_of_load,
+ width_reduction_combines, select_combines,
+ known_bits_simplifications, ext_ext_fold,
+ not_cmp_fold, opt_brcond_by_inverting_cond,
+ unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc,
+ unmerge_zext_to_zext, trunc_ext_fold, trunc_shl,
+ const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
+ shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine]>;
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 150834e65b2d..6fb8a6b15dd7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -26,8 +26,8 @@ class GINodeEquiv<Instruction i, SDNode node> {
// SelectionDAG has separate nodes for atomic and non-atomic memory operations
// (ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE) but GlobalISel
// stores this information in the MachineMemoryOperand.
- bit CheckMMOIsNonAtomic = 0;
- bit CheckMMOIsAtomic = 0;
+ bit CheckMMOIsNonAtomic = false;
+ bit CheckMMOIsAtomic = false;
// SelectionDAG has one node for all loads and uses predicates to
// differentiate them. GlobalISel on the other hand uses separate opcodes.
@@ -52,6 +52,8 @@ def : GINodeEquiv<G_BITCAST, bitconvert>;
def : GINodeEquiv<G_CONSTANT, imm>;
def : GINodeEquiv<G_FCONSTANT, fpimm>;
def : GINodeEquiv<G_IMPLICIT_DEF, undef>;
+def : GINodeEquiv<G_FRAME_INDEX, frameindex>;
+def : GINodeEquiv<G_BLOCK_ADDR, blockaddress>;
def : GINodeEquiv<G_ADD, add>;
def : GINodeEquiv<G_SUB, sub>;
def : GINodeEquiv<G_MUL, mul>;
@@ -71,6 +73,16 @@ def : GINodeEquiv<G_SADDSAT, saddsat>;
def : GINodeEquiv<G_UADDSAT, uaddsat>;
def : GINodeEquiv<G_SSUBSAT, ssubsat>;
def : GINodeEquiv<G_USUBSAT, usubsat>;
+def : GINodeEquiv<G_SSHLSAT, sshlsat>;
+def : GINodeEquiv<G_USHLSAT, ushlsat>;
+def : GINodeEquiv<G_SMULFIX, smulfix>;
+def : GINodeEquiv<G_UMULFIX, umulfix>;
+def : GINodeEquiv<G_SMULFIXSAT, smulfixsat>;
+def : GINodeEquiv<G_UMULFIXSAT, umulfixsat>;
+def : GINodeEquiv<G_SDIVFIX, sdivfix>;
+def : GINodeEquiv<G_UDIVFIX, udivfix>;
+def : GINodeEquiv<G_SDIVFIXSAT, sdivfixsat>;
+def : GINodeEquiv<G_UDIVFIXSAT, udivfixsat>;
def : GINodeEquiv<G_SELECT, select>;
def : GINodeEquiv<G_FNEG, fneg>;
def : GINodeEquiv<G_FPEXT, fpextend>;
@@ -104,7 +116,7 @@ def : GINodeEquiv<G_CTTZ, cttz>;
def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>;
def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
def : GINodeEquiv<G_CTPOP, ctpop>;
-def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, extractelt>;
def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
def : GINodeEquiv<G_FCEIL, fceil>;
@@ -117,11 +129,13 @@ def : GINodeEquiv<G_FRINT, frint>;
def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>;
def : GINodeEquiv<G_INTRINSIC_ROUND, fround>;
+def : GINodeEquiv<G_INTRINSIC_LRINT, lrint>;
def : GINodeEquiv<G_FCOPYSIGN, fcopysign>;
def : GINodeEquiv<G_SMIN, smin>;
def : GINodeEquiv<G_SMAX, smax>;
def : GINodeEquiv<G_UMIN, umin>;
def : GINodeEquiv<G_UMAX, umax>;
+def : GINodeEquiv<G_ABS, abs>;
def : GINodeEquiv<G_FMINNUM, fminnum>;
def : GINodeEquiv<G_FMAXNUM, fmaxnum>;
def : GINodeEquiv<G_FMINNUM_IEEE, fminnum_ieee>;
@@ -144,7 +158,7 @@ def : GINodeEquiv<G_STRICT_FSQRT, strict_fsqrt>;
// separate nodes for them. This GINodeEquiv maps the non-atomic loads to
// G_LOAD with a non-atomic MachineMemOperand.
def : GINodeEquiv<G_LOAD, ld> {
- let CheckMMOIsNonAtomic = 1;
+ let CheckMMOIsNonAtomic = true;
let IfSignExtend = G_SEXTLOAD;
let IfZeroExtend = G_ZEXTLOAD;
}
@@ -160,11 +174,17 @@ def : GINodeEquiv<G_ICMP, setcc> {
// G_STORE handles both atomic and non-atomic stores where as SelectionDAG had
// separate nodes for them. This GINodeEquiv maps the non-atomic stores to
// G_STORE with a non-atomic MachineMemOperand.
-def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = 1; }
+def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = true; }
def : GINodeEquiv<G_LOAD, atomic_load> {
- let CheckMMOIsNonAtomic = 0;
- let CheckMMOIsAtomic = 1;
+ let CheckMMOIsNonAtomic = false;
+ let CheckMMOIsAtomic = true;
+}
+
+// Operands are swapped for atomic_store vs. regular store
+def : GINodeEquiv<G_STORE, atomic_store> {
+ let CheckMMOIsNonAtomic = false;
+ let CheckMMOIsAtomic = true;
}
def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap>;
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/Target.td b/contrib/llvm-project/llvm/include/llvm/Target/Target.td
index aab5376db453..1c97d70a477f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/Target.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/Target.td
@@ -110,9 +110,9 @@ class SubRegIndex<int size, int offset = 0> {
// ComposedSubRegIndex - A sub-register that is the result of composing A and B.
// Offset is set to the sum of A and B's Offsets. Size is set to B's Size.
class ComposedSubRegIndex<SubRegIndex A, SubRegIndex B>
- : SubRegIndex<B.Size, !if(!eq(A.Offset, -1), -1,
- !if(!eq(B.Offset, -1), -1,
- !add(A.Offset, B.Offset)))> {
+ : SubRegIndex<B.Size, !cond(!eq(A.Offset, -1): -1,
+ !eq(B.Offset, -1): -1,
+ true: !add(A.Offset, B.Offset))> {
// See SubRegIndex.
let ComposedOf = [A, B];
}
@@ -175,12 +175,12 @@ class Register<string n, list<string> altNames = []> {
// completely determined by the value of its sub-registers. For example, the
// x86 register AX is covered by its sub-registers AL and AH, but EAX is not
// covered by its sub-register AX.
- bit CoveredBySubRegs = 0;
+ bit CoveredBySubRegs = false;
// HWEncoding - The target specific hardware encoding for this register.
bits<16> HWEncoding = 0;
- bit isArtificial = 0;
+ bit isArtificial = false;
}
// RegisterWithSubRegs - This can be used to define instances of Register which
@@ -252,7 +252,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
// isAllocatable - Specify that the register class can be used for virtual
// registers and register allocation. Some register classes are only used to
// model instruction operand constraints, and should have isAllocatable = 0.
- bit isAllocatable = 1;
+ bit isAllocatable = true;
// AltOrders - List of alternative allocation orders. The default order is
// MemberList itself, and that is good enough for most targets since the
@@ -278,7 +278,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
// Generate register pressure set for this register class and any class
// synthesized from it. Set to 0 to inhibit unneeded pressure sets.
- bit GeneratePressureSet = 1;
+ bit GeneratePressureSet = true;
// Weight override for register pressure calculation. This is the value
// TargetRegisterClass::getRegClassWeight() will return. The weight is in
@@ -452,7 +452,7 @@ class InstructionEncoding {
// DecodeInstB() is not able to determine if all possible values of ?? are
// valid or not. If DecodeInstB() returns Fail the decoder will attempt to
// decode the bitpattern as InstA too.
- bit hasCompleteDecoder = 1;
+ bit hasCompleteDecoder = true;
}
// Allows specifying an InstructionEncoding by HwMode. If an Instruction specifies
@@ -506,59 +506,59 @@ class Instruction : InstructionEncoding {
// Indicates if this is a pre-isel opcode that should be
// legalized/regbankselected/selected.
- bit isPreISelOpcode = 0;
+ bit isPreISelOpcode = false;
// These bits capture information about the high-level semantics of the
// instruction.
- bit isReturn = 0; // Is this instruction a return instruction?
- bit isBranch = 0; // Is this instruction a branch instruction?
- bit isEHScopeReturn = 0; // Does this instruction end an EH scope?
- bit isIndirectBranch = 0; // Is this instruction an indirect branch?
- bit isCompare = 0; // Is this instruction a comparison instruction?
- bit isMoveImm = 0; // Is this instruction a move immediate instruction?
- bit isMoveReg = 0; // Is this instruction a move register instruction?
- bit isBitcast = 0; // Is this instruction a bitcast instruction?
- bit isSelect = 0; // Is this instruction a select instruction?
- bit isBarrier = 0; // Can control flow fall through this instruction?
- bit isCall = 0; // Is this instruction a call instruction?
- bit isAdd = 0; // Is this instruction an add instruction?
- bit isTrap = 0; // Is this instruction a trap instruction?
- bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand?
- bit mayLoad = ?; // Is it possible for this inst to read memory?
- bit mayStore = ?; // Is it possible for this inst to write memory?
- bit mayRaiseFPException = 0; // Can this raise a floating-point exception?
- bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote?
- bit isCommutable = 0; // Is this 3 operand instruction commutable?
- bit isTerminator = 0; // Is this part of the terminator for a basic block?
- bit isReMaterializable = 0; // Is this instruction re-materializable?
- bit isPredicable = 0; // 1 means this instruction is predicable
- // even if it does not have any operand
- // tablegen can identify as a predicate
- bit isUnpredicable = 0; // 1 means this instruction is not predicable
- // even if it _does_ have a predicate operand
- bit hasDelaySlot = 0; // Does this instruction have an delay slot?
- bit usesCustomInserter = 0; // Pseudo instr needing special help.
- bit hasPostISelHook = 0; // To be *adjusted* after isel by target hook.
- bit hasCtrlDep = 0; // Does this instruction r/w ctrl-flow chains?
- bit isNotDuplicable = 0; // Is it unsafe to duplicate this instruction?
- bit isConvergent = 0; // Is this instruction convergent?
- bit isAuthenticated = 0; // Does this instruction authenticate a pointer?
- bit isAsCheapAsAMove = 0; // As cheap (or cheaper) than a move instruction.
- bit hasExtraSrcRegAllocReq = 0; // Sources have special regalloc requirement?
- bit hasExtraDefRegAllocReq = 0; // Defs have special regalloc requirement?
- bit isRegSequence = 0; // Is this instruction a kind of reg sequence?
- // If so, make sure to override
- // TargetInstrInfo::getRegSequenceLikeInputs.
- bit isPseudo = 0; // Is this instruction a pseudo-instruction?
- // If so, won't have encoding information for
- // the [MC]CodeEmitter stuff.
- bit isExtractSubreg = 0; // Is this instruction a kind of extract subreg?
- // If so, make sure to override
- // TargetInstrInfo::getExtractSubregLikeInputs.
- bit isInsertSubreg = 0; // Is this instruction a kind of insert subreg?
- // If so, make sure to override
- // TargetInstrInfo::getInsertSubregLikeInputs.
- bit variadicOpsAreDefs = 0; // Are variadic operands definitions?
+ bit isReturn = false; // Is this instruction a return instruction?
+ bit isBranch = false; // Is this instruction a branch instruction?
+ bit isEHScopeReturn = false; // Does this instruction end an EH scope?
+ bit isIndirectBranch = false; // Is this instruction an indirect branch?
+ bit isCompare = false; // Is this instruction a comparison instruction?
+ bit isMoveImm = false; // Is this instruction a move immediate instruction?
+ bit isMoveReg = false; // Is this instruction a move register instruction?
+ bit isBitcast = false; // Is this instruction a bitcast instruction?
+ bit isSelect = false; // Is this instruction a select instruction?
+ bit isBarrier = false; // Can control flow fall through this instruction?
+ bit isCall = false; // Is this instruction a call instruction?
+ bit isAdd = false; // Is this instruction an add instruction?
+ bit isTrap = false; // Is this instruction a trap instruction?
+ bit canFoldAsLoad = false; // Can this be folded as a simple memory operand?
+ bit mayLoad = ?; // Is it possible for this inst to read memory?
+ bit mayStore = ?; // Is it possible for this inst to write memory?
+ bit mayRaiseFPException = false; // Can this raise a floating-point exception?
+ bit isConvertibleToThreeAddress = false; // Can this 2-addr instruction promote?
+ bit isCommutable = false; // Is this 3 operand instruction commutable?
+ bit isTerminator = false; // Is this part of the terminator for a basic block?
+ bit isReMaterializable = false; // Is this instruction re-materializable?
+ bit isPredicable = false; // 1 means this instruction is predicable
+ // even if it does not have any operand
+ // tablegen can identify as a predicate
+ bit isUnpredicable = false; // 1 means this instruction is not predicable
+ // even if it _does_ have a predicate operand
+ bit hasDelaySlot = false; // Does this instruction have an delay slot?
+ bit usesCustomInserter = false; // Pseudo instr needing special help.
+ bit hasPostISelHook = false; // To be *adjusted* after isel by target hook.
+ bit hasCtrlDep = false; // Does this instruction r/w ctrl-flow chains?
+ bit isNotDuplicable = false; // Is it unsafe to duplicate this instruction?
+ bit isConvergent = false; // Is this instruction convergent?
+ bit isAuthenticated = false; // Does this instruction authenticate a pointer?
+ bit isAsCheapAsAMove = false; // As cheap (or cheaper) than a move instruction.
+ bit hasExtraSrcRegAllocReq = false; // Sources have special regalloc requirement?
+ bit hasExtraDefRegAllocReq = false; // Defs have special regalloc requirement?
+ bit isRegSequence = false; // Is this instruction a kind of reg sequence?
+ // If so, make sure to override
+ // TargetInstrInfo::getRegSequenceLikeInputs.
+ bit isPseudo = false; // Is this instruction a pseudo-instruction?
+ // If so, won't have encoding information for
+ // the [MC]CodeEmitter stuff.
+ bit isExtractSubreg = false; // Is this instruction a kind of extract subreg?
+ // If so, make sure to override
+ // TargetInstrInfo::getExtractSubregLikeInputs.
+ bit isInsertSubreg = false; // Is this instruction a kind of insert subreg?
+ // If so, make sure to override
+ // TargetInstrInfo::getInsertSubregLikeInputs.
+ bit variadicOpsAreDefs = false; // Are variadic operands definitions?
// Does the instruction have side effects that are not captured by any
// operands of the instruction or other flags?
@@ -581,15 +581,15 @@ class Instruction : InstructionEncoding {
// CodeEmitter unchanged, but duplicates a canonical instruction
// definition's encoding and should be ignored when constructing the
// assembler match tables.
- bit isCodeGenOnly = 0;
+ bit isCodeGenOnly = false;
// Is this instruction a pseudo instruction for use by the assembler parser.
- bit isAsmParserOnly = 0;
+ bit isAsmParserOnly = false;
// This instruction is not expected to be queried for scheduling latencies
// and therefore needs no scheduling information even for a complete
// scheduling model.
- bit hasNoSchedulingInfo = 0;
+ bit hasNoSchedulingInfo = false;
InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling.
@@ -630,13 +630,13 @@ class Instruction : InstructionEncoding {
/// UseNamedOperandTable - If set, the operand indices of this instruction
/// can be queried via the getNamedOperandIdx() function which is generated
/// by TableGen.
- bit UseNamedOperandTable = 0;
+ bit UseNamedOperandTable = false;
/// Should FastISel ignore this instruction. For certain ISAs, they have
/// instructions which map to the same ISD Opcode, value type operands and
/// instruction selection predicates. FastISel cannot handle such cases, but
/// SelectionDAG can.
- bit FastISelShouldIgnore = 0;
+ bit FastISelShouldIgnore = false;
}
/// Defines an additional encoding that disassembles to the given instruction
@@ -651,7 +651,7 @@ class AdditionalEncoding<Instruction I> : InstructionEncoding {
/// pseudo.
class PseudoInstExpansion<dag Result> {
dag ResultInst = Result; // The instruction to generate.
- bit isPseudo = 1;
+ bit isPseudo = true;
}
/// Predicates - These are extra conditionals which are turned into instruction
@@ -662,7 +662,7 @@ class Predicate<string cond> {
/// AssemblerMatcherPredicate - If this feature can be used by the assembler
/// matcher, this is true. Targets should set this by inheriting their
/// feature from the AssemblerPredicate class in addition to Predicate.
- bit AssemblerMatcherPredicate = 0;
+ bit AssemblerMatcherPredicate = false;
/// AssemblerCondDag - Set of subtarget features being tested used
/// as alternative condition string used for assembler matcher. Must be used
@@ -688,7 +688,7 @@ class Predicate<string cond> {
/// every function change. Most predicates can leave this at '0'.
///
/// Ignored by SelectionDAG, it always recomputes the predicate on every use.
- bit RecomputePerFunction = 0;
+ bit RecomputePerFunction = false;
}
/// NoHonorSignDependentRounding - This predicate is true if support for
@@ -788,7 +788,7 @@ class AsmOperandClass {
/// marked as IsOptional.
///
/// Optional arguments must be at the end of the operand list.
- bit IsOptional = 0;
+ bit IsOptional = false;
/// The name of the method on the target specific asm parser that returns the
/// default operand for this optional operand. This method is only used if
@@ -809,7 +809,7 @@ class Operand<ValueType ty> : DAGOperand {
ValueType Type = ty;
string PrintMethod = "printOperand";
string EncoderMethod = "";
- bit hasCompleteDecoder = 1;
+ bit hasCompleteDecoder = true;
string OperandType = "OPERAND_UNKNOWN";
dag MIOperandInfo = (ops);
@@ -877,8 +877,8 @@ def f64imm : Operand<f64>;
// have the same LLT).
class TypedOperand<string Ty> : Operand<untyped> {
let OperandType = Ty;
- bit IsPointer = 0;
- bit IsImmediate = 0;
+ bit IsPointer = false;
+ bit IsImmediate = false;
}
def type0 : TypedOperand<"OPERAND_GENERIC_0">;
@@ -888,7 +888,7 @@ def type3 : TypedOperand<"OPERAND_GENERIC_3">;
def type4 : TypedOperand<"OPERAND_GENERIC_4">;
def type5 : TypedOperand<"OPERAND_GENERIC_5">;
-let IsPointer = 1 in {
+let IsPointer = true in {
def ptype0 : TypedOperand<"OPERAND_GENERIC_0">;
def ptype1 : TypedOperand<"OPERAND_GENERIC_1">;
def ptype2 : TypedOperand<"OPERAND_GENERIC_2">;
@@ -900,7 +900,7 @@ let IsPointer = 1 in {
// untyped_imm is for operands where isImm() will be true. It currently has no
// special behaviour and is only used for clarity.
def untyped_imm_0 : TypedOperand<"OPERAND_GENERIC_IMM_0"> {
- let IsImmediate = 1;
+ let IsImmediate = true;
}
/// zero_reg definition - Special node to stand for the zero register.
@@ -952,7 +952,7 @@ class InstrInfo {
// For instance, while both Sparc and PowerPC are big-endian platforms, the
// Sparc manual specifies its instructions in the format [31..0] (big), while
// PowerPC specifies them using the format [0..31] (little).
- bit isLittleEndianEncoding = 0;
+ bit isLittleEndianEncoding = false;
// The instruction properties mayLoad, mayStore, and hasSideEffects are unset
// by default, and TableGen will infer their value from the instruction
@@ -963,7 +963,7 @@ class InstrInfo {
// is set, it will guess a safe value instead.
//
// This option is a temporary migration help. It will go away.
- bit guessInstructionProperties = 1;
+ bit guessInstructionProperties = true;
// TableGen's instruction encoder generator has support for matching operands
// to bit-field variables both by name and by position. While matching by
@@ -975,7 +975,7 @@ class InstrInfo {
// This option is temporary; it will go away once the TableGen decoder
// generator has better support for complex operands and targets have
// migrated away from using positionally encoded operands.
- bit decodePositionallyEncodedOperands = 0;
+ bit decodePositionallyEncodedOperands = false;
// When set, this indicates that there will be no overlap between those
// operands that are matched by ordering (positional operands) and those
@@ -984,7 +984,7 @@ class InstrInfo {
// This option is temporary; it will go away once the TableGen decoder
// generator has better support for complex operands and targets have
// migrated away from using positionally encoded operands.
- bit noNamedPositionallyEncodedOperands = 0;
+ bit noNamedPositionallyEncodedOperands = false;
}
// Standard Pseudo Instructions.
@@ -994,31 +994,31 @@ class InstrInfo {
// targets that set guessInstructionProperties=0. Any local definition of
// mayLoad/mayStore takes precedence over these default values.
class StandardPseudoInstruction : Instruction {
- let mayLoad = 0;
- let mayStore = 0;
- let isCodeGenOnly = 1;
- let isPseudo = 1;
- let hasNoSchedulingInfo = 1;
+ let mayLoad = false;
+ let mayStore = false;
+ let isCodeGenOnly = true;
+ let isPseudo = true;
+ let hasNoSchedulingInfo = true;
let Namespace = "TargetOpcode";
}
def PHI : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins variable_ops);
let AsmString = "PHINODE";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def INLINEASM : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "";
- let hasSideEffects = 0; // Note side effect is encoded in an operand.
+ let hasSideEffects = false; // Note side effect is encoded in an operand.
}
def INLINEASM_BR : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "";
// Unlike INLINEASM, this is always treated as having side-effects.
- let hasSideEffects = 1;
+ let hasSideEffects = true;
// Despite potentially branching, this instruction is intentionally _not_
// marked as a terminator or a branch.
}
@@ -1026,164 +1026,177 @@ def CFI_INSTRUCTION : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "";
- let hasCtrlDep = 1;
- let hasSideEffects = 0;
- let isNotDuplicable = 1;
+ let hasCtrlDep = true;
+ let hasSideEffects = false;
+ let isNotDuplicable = true;
}
def EH_LABEL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "";
- let hasCtrlDep = 1;
- let hasSideEffects = 0;
- let isNotDuplicable = 1;
+ let hasCtrlDep = true;
+ let hasSideEffects = false;
+ let isNotDuplicable = true;
}
def GC_LABEL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "";
- let hasCtrlDep = 1;
- let hasSideEffects = 0;
- let isNotDuplicable = 1;
+ let hasCtrlDep = true;
+ let hasSideEffects = false;
+ let isNotDuplicable = true;
}
def ANNOTATION_LABEL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "";
- let hasCtrlDep = 1;
- let hasSideEffects = 0;
- let isNotDuplicable = 1;
+ let hasCtrlDep = true;
+ let hasSideEffects = false;
+ let isNotDuplicable = true;
}
def KILL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def EXTRACT_SUBREG : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins unknown:$supersrc, i32imm:$subidx);
let AsmString = "";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def INSERT_SUBREG : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins unknown:$supersrc, unknown:$subsrc, i32imm:$subidx);
let AsmString = "";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
let Constraints = "$supersrc = $dst";
}
def IMPLICIT_DEF : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins);
let AsmString = "";
- let hasSideEffects = 0;
- let isReMaterializable = 1;
- let isAsCheapAsAMove = 1;
+ let hasSideEffects = false;
+ let isReMaterializable = true;
+ let isAsCheapAsAMove = true;
}
def SUBREG_TO_REG : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins unknown:$implsrc, unknown:$subsrc, i32imm:$subidx);
let AsmString = "";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def COPY_TO_REGCLASS : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins unknown:$src, i32imm:$regclass);
let AsmString = "";
- let hasSideEffects = 0;
- let isAsCheapAsAMove = 1;
+ let hasSideEffects = false;
+ let isAsCheapAsAMove = true;
}
def DBG_VALUE : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "DBG_VALUE";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
+}
+def DBG_INSTR_REF : StandardPseudoInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins variable_ops);
+ let AsmString = "DBG_INSTR_REF";
+ let hasSideEffects = false;
}
def DBG_LABEL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$label);
let AsmString = "DBG_LABEL";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def REG_SEQUENCE : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins unknown:$supersrc, variable_ops);
let AsmString = "";
- let hasSideEffects = 0;
- let isAsCheapAsAMove = 1;
+ let hasSideEffects = false;
+ let isAsCheapAsAMove = true;
}
def COPY : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins unknown:$src);
let AsmString = "";
- let hasSideEffects = 0;
- let isAsCheapAsAMove = 1;
- let hasNoSchedulingInfo = 0;
+ let hasSideEffects = false;
+ let isAsCheapAsAMove = true;
+ let hasNoSchedulingInfo = false;
}
def BUNDLE : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "BUNDLE";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def LIFETIME_START : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "LIFETIME_START";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
}
def LIFETIME_END : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "LIFETIME_END";
- let hasSideEffects = 0;
+ let hasSideEffects = false;
+}
+def PSEUDO_PROBE : StandardPseudoInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins i64imm:$guid, i64imm:$index, i8imm:$type, i32imm:$attr);
+ let AsmString = "PSEUDO_PROBE";
+ let hasSideEffects = 1;
}
+
def STACKMAP : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i64imm:$id, i32imm:$nbytes, variable_ops);
- let hasSideEffects = 1;
- let isCall = 1;
- let mayLoad = 1;
- let usesCustomInserter = 1;
+ let hasSideEffects = true;
+ let isCall = true;
+ let mayLoad = true;
+ let usesCustomInserter = true;
}
def PATCHPOINT : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins i64imm:$id, i32imm:$nbytes, unknown:$callee,
i32imm:$nargs, i32imm:$cc, variable_ops);
- let hasSideEffects = 1;
- let isCall = 1;
- let mayLoad = 1;
- let usesCustomInserter = 1;
+ let hasSideEffects = true;
+ let isCall = true;
+ let mayLoad = true;
+ let usesCustomInserter = true;
}
def STATEPOINT : StandardPseudoInstruction {
- let OutOperandList = (outs);
+ let OutOperandList = (outs variable_ops);
let InOperandList = (ins variable_ops);
- let usesCustomInserter = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let hasSideEffects = 1;
- let isCall = 1;
+ let usesCustomInserter = true;
+ let mayLoad = true;
+ let mayStore = true;
+ let hasSideEffects = true;
+ let isCall = true;
}
def LOAD_STACK_GUARD : StandardPseudoInstruction {
let OutOperandList = (outs ptr_rc:$dst);
let InOperandList = (ins);
- let mayLoad = 1;
- bit isReMaterializable = 1;
- let hasSideEffects = 0;
- bit isPseudo = 1;
+ let mayLoad = true;
+ bit isReMaterializable = true;
+ let hasSideEffects = false;
+ bit isPseudo = true;
}
def PREALLOCATED_SETUP : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$a);
- let usesCustomInserter = 1;
- let hasSideEffects = 1;
+ let usesCustomInserter = true;
+ let hasSideEffects = true;
}
def PREALLOCATED_ARG : StandardPseudoInstruction {
let OutOperandList = (outs ptr_rc:$loc);
let InOperandList = (ins i32imm:$a, i32imm:$b);
- let usesCustomInserter = 1;
- let hasSideEffects = 1;
+ let usesCustomInserter = true;
+ let hasSideEffects = true;
}
def LOCAL_ESCAPE : StandardPseudoInstruction {
// This instruction is really just a label. It has to be part of the chain so
@@ -1191,93 +1204,94 @@ def LOCAL_ESCAPE : StandardPseudoInstruction {
// no side effects.
let OutOperandList = (outs);
let InOperandList = (ins ptr_rc:$symbol, i32imm:$id);
- let hasSideEffects = 0;
- let hasCtrlDep = 1;
+ let hasSideEffects = false;
+ let hasCtrlDep = true;
}
def FAULTING_OP : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins variable_ops);
- let usesCustomInserter = 1;
- let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let isTerminator = 1;
- let isBranch = 1;
+ let usesCustomInserter = true;
+ let hasSideEffects = true;
+ let mayLoad = true;
+ let mayStore = true;
+ let isTerminator = true;
+ let isBranch = true;
}
def PATCHABLE_OP : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
- let usesCustomInserter = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let hasSideEffects = 1;
+ let usesCustomInserter = true;
+ let mayLoad = true;
+ let mayStore = true;
+ let hasSideEffects = true;
}
def PATCHABLE_FUNCTION_ENTER : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins);
let AsmString = "# XRay Function Enter.";
- let usesCustomInserter = 1;
- let hasSideEffects = 1;
+ let usesCustomInserter = true;
+ let hasSideEffects = true;
}
def PATCHABLE_RET : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "# XRay Function Patchable RET.";
- let usesCustomInserter = 1;
- let hasSideEffects = 1;
- let isTerminator = 1;
- let isReturn = 1;
+ let usesCustomInserter = true;
+ let hasSideEffects = true;
+ let isTerminator = true;
+ let isReturn = true;
}
def PATCHABLE_FUNCTION_EXIT : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins);
let AsmString = "# XRay Function Exit.";
- let usesCustomInserter = 1;
- let hasSideEffects = 1;
- let isReturn = 0; // Original return instruction will follow
+ let usesCustomInserter = true;
+ let hasSideEffects = true;
+ let isReturn = false; // Original return instruction will follow
}
def PATCHABLE_TAIL_CALL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "# XRay Tail Call Exit.";
- let usesCustomInserter = 1;
- let hasSideEffects = 1;
- let isReturn = 1;
+ let usesCustomInserter = true;
+ let hasSideEffects = true;
+ let isReturn = true;
}
def PATCHABLE_EVENT_CALL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins ptr_rc:$event, unknown:$size);
let AsmString = "# XRay Custom Event Log.";
- let usesCustomInserter = 1;
- let isCall = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let hasSideEffects = 1;
+ let usesCustomInserter = true;
+ let isCall = true;
+ let mayLoad = true;
+ let mayStore = true;
+ let hasSideEffects = true;
}
def PATCHABLE_TYPED_EVENT_CALL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$type, ptr_rc:$event, unknown:$size);
let AsmString = "# XRay Typed Event Log.";
- let usesCustomInserter = 1;
- let isCall = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let hasSideEffects = 1;
+ let usesCustomInserter = true;
+ let isCall = true;
+ let mayLoad = true;
+ let mayStore = true;
+ let hasSideEffects = true;
}
def FENTRY_CALL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins);
let AsmString = "# FEntry call";
- let usesCustomInserter = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let hasSideEffects = 1;
+ let usesCustomInserter = true;
+ let isCall = true;
+ let mayLoad = true;
+ let mayStore = true;
+ let hasSideEffects = true;
}
def ICALL_BRANCH_FUNNEL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "";
- let hasSideEffects = 1;
+ let hasSideEffects = true;
}
// Generic opcodes used in GlobalISel.
@@ -1303,7 +1317,7 @@ class AsmParser {
// ShouldEmitMatchRegisterName - Set to false if the target needs a hand
// written register name matcher
- bit ShouldEmitMatchRegisterName = 1;
+ bit ShouldEmitMatchRegisterName = true;
// Set to true if the target needs a generated 'alternative register name'
// matcher.
@@ -1311,7 +1325,7 @@ class AsmParser {
// This generates a function which can be used to lookup registers from
// their aliases. This function will fail when called on targets where
// several registers share the same alias (i.e. not a 1:1 mapping).
- bit ShouldEmitMatchRegisterAltName = 0;
+ bit ShouldEmitMatchRegisterAltName = false;
// Set to true if MatchRegisterName and MatchRegisterAltName functions
// should be generated even if there are duplicate register names. The
@@ -1319,11 +1333,11 @@ class AsmParser {
// (e.g. in validateTargetOperandClass), and there are no guarantees about
// which numeric register identifier will be returned in the case of
// multiple matches.
- bit AllowDuplicateRegisterNames = 0;
+ bit AllowDuplicateRegisterNames = false;
// HasMnemonicFirst - Set to false if target instructions don't always
// start with a mnemonic as the first token.
- bit HasMnemonicFirst = 1;
+ bit HasMnemonicFirst = true;
// ReportMultipleNearMisses -
// When 0, the assembly matcher reports an error for one encoding or operand
@@ -1331,7 +1345,7 @@ class AsmParser {
// When 1, the assembly matcher returns a list of encodings that were close
// to matching the parsed instruction, so to allow more detailed error
// messages.
- bit ReportMultipleNearMisses = 0;
+ bit ReportMultipleNearMisses = false;
}
def DefaultAsmParser : AsmParser;
@@ -1342,7 +1356,7 @@ def DefaultAsmParser : AsmParser;
//
class AsmParserVariant {
// Variant - AsmParsers can be of multiple different variants. Variants are
- // used to support targets that need to parser multiple formats for the
+ // used to support targets that need to parse multiple formats for the
// assembly language.
int Variant = 0;
@@ -1378,7 +1392,7 @@ def all_of;
/// AssemblerPredicate - This is a Predicate that can be used when the assembler
/// matches instructions and aliases.
class AssemblerPredicate<dag cond, string name = ""> {
- bit AssemblerMatcherPredicate = 1;
+ bit AssemblerMatcherPredicate = true;
dag AssemblerCondDag = cond;
string PredicateName = name;
}
@@ -1453,7 +1467,7 @@ class InstAlias<string Asm, dag Result, int Emit = 1, string VariantName = ""> {
// Setting this to 0 will cause the alias to ignore the Result instruction's
// defined AsmMatchConverter and instead use the function generated by the
// dag Result.
- bit UseInstAsmMatchConverter = 1;
+ bit UseInstAsmMatchConverter = true;
// Assembler variant name to use for this alias. If not specified then
// assembler variants will be determined based on AsmString
@@ -1558,7 +1572,8 @@ class ComplexDeprecationPredicate<string dep> {
// by the scheduler. Each Processor definition requires corresponding
// instruction itineraries.
//
-class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> {
+class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []> {
// Name - Chip set name. Used by command line (-mcpu=) to determine the
// appropriate target chip.
//
@@ -1574,6 +1589,12 @@ class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> {
// Features - list of
list<SubtargetFeature> Features = f;
+
+ // TuneFeatures - list of features for tuning for this CPU. If the target
+ // supports -mtune, this should contain the list of features used to make
+ // microarchitectural optimization decisions for a given processor. While
+ // Features should contain the architectural features for the processor.
+ list<SubtargetFeature> TuneFeatures = tunef;
}
// ProcessorModel allows subtargets to specify the more general
@@ -1582,8 +1603,9 @@ class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> {
//
// Although this class always passes NoItineraries to the Processor
// class, the SchedMachineModel may still define valid Itineraries.
-class ProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> f>
- : Processor<n, NoItineraries, f> {
+class ProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : Processor<n, NoItineraries, f, tunef> {
let SchedModel = m;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td
index 057f33083e08..b3d4fe9d0dbb 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td
@@ -187,15 +187,15 @@ class CallingConv<list<CCAction> actions> {
/// If true, this calling convention will be emitted as externally visible in
/// the llvm namespaces instead of as a static function.
- bit Entry = 0;
+ bit Entry = false;
- bit Custom = 0;
+ bit Custom = false;
}
/// CustomCallingConv - An instance of this is used to declare calling
/// conventions that are implemented using a custom function of the same name.
class CustomCallingConv : CallingConv<[]> {
- let Custom = 1;
+ let Custom = true;
}
/// CalleeSavedRegs - A list of callee saved registers for a given calling
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td
index 5623461c648d..9f2cde9d9230 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td
@@ -11,7 +11,7 @@
// MCInstPredicate definitions are used by target scheduling models to describe
// constraints on instructions.
//
-// Here is an example of an MCInstPredicate definition in tablegen:
+// Here is an example of an MCInstPredicate definition in TableGen:
//
// def MCInstPredicateExample : CheckAll<[
// CheckOpcode<[BLR]>,
@@ -126,6 +126,11 @@ class CheckRegOperand<int Index, Register R> : CheckOperandBase<Index> {
// Check if register operand at index `Index` is the invalid register.
class CheckInvalidRegOperand<int Index> : CheckOperandBase<Index>;
+// Return true if machine operand at position `Index` is a valid
+// register operand.
+class CheckValidRegOperand<int Index> :
+ CheckNot<CheckInvalidRegOperand<Index>>;
+
// Check that the operand at position `Index` is immediate `Imm`.
// If field `FunctionMapper` is a non-empty string, then function
// `FunctionMapper` is applied to the operand value, and the return value is then
@@ -254,6 +259,20 @@ class CheckFunctionPredicate<string MCInstFn, string MachineInstrFn> : MCInstPre
string MachineInstrFnName = MachineInstrFn;
}
+// Similar to CheckFunctionPredicate. However it assumes that MachineInstrFn is
+// a method in TargetInstrInfo, and MCInstrFn takes an extra pointer to
+// MCInstrInfo.
+//
+// It Expands to:
+// - TIIPointer->MachineInstrFn(MI)
+// - MCInstrFn(MI, MCII);
+class CheckFunctionPredicateWithTII<string MCInstFn, string MachineInstrFn, string
+TIIPointer = "TII"> : MCInstPredicate {
+ string MCInstFnName = MCInstFn;
+ string TIIPtrName = TIIPointer;
+ string MachineInstrFnName = MachineInstrFn;
+}
+
// Used to classify machine instructions based on a machine instruction
// predicate.
//
@@ -300,8 +319,8 @@ class DepBreakingClass<list<Instruction> opcodes, MCInstPredicate pred,
// - A list of subtarget hooks (Delegates) that are called from this function.
//
class STIPredicateDecl<string name, MCInstPredicate default = FalsePred,
- bit overrides = 1, bit expandForMC = 1,
- bit updatesOpcodeMask = 0,
+ bit overrides = true, bit expandForMC = true,
+ bit updatesOpcodeMask = false,
list<STIPredicateDecl> delegates = []> {
string Name = name;
@@ -336,7 +355,7 @@ class STIPredicate<STIPredicateDecl declaration,
// Convenience classes and definitions used by processor scheduling models to
// describe dependency breaking instructions and move elimination candidates.
-let UpdatesOpcodeMask = 1 in {
+let UpdatesOpcodeMask = true in {
def IsZeroIdiomDecl : STIPredicateDecl<"isZeroIdiom">;
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetItinerary.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetItinerary.td
index d364fab038b5..a432d4e42b61 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetItinerary.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetItinerary.td
@@ -8,7 +8,7 @@
//
// This file defines the target-independent scheduling interfaces
// which should be implemented by each target that uses instruction
-// itineraries for scheduling. Itineraries are details reservation
+// itineraries for scheduling. Itineraries are detailed reservation
// tables for each instruction class. They are most appropriate for
// in-order machine with complicated scheduling or bundling constraints.
//
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/contrib/llvm-project/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index cc6c93b6ee2b..ff27ceaeac35 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -38,6 +38,7 @@ class Module;
class SectionKind;
class StringRef;
class TargetMachine;
+class DSOLocalEquivalent;
class TargetLoweringObjectFile : public MCObjectFileInfo {
/// Name-mangler for global names.
@@ -47,6 +48,7 @@ protected:
bool SupportIndirectSymViaGOTPCRel = false;
bool SupportGOTPCRelWithOffset = true;
bool SupportDebugThreadLocalLocation = true;
+ bool SupportDSOLocalEquivalentLowering = false;
/// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values
/// for EH.
@@ -61,6 +63,8 @@ protected:
/// This section contains the static destructor pointer list.
MCSection *StaticDtorSection = nullptr;
+ const TargetMachine *TM = nullptr;
+
public:
TargetLoweringObjectFile() = default;
TargetLoweringObjectFile(const TargetLoweringObjectFile &) = delete;
@@ -81,6 +85,9 @@ public:
/// Emit the module-level metadata that the platform cares about.
virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M) const {}
+ /// Emit Call Graph Profile metadata.
+ void emitCGProfileMetadata(MCStreamer &Streamer, Module &M) const;
+
/// Get the module-level metadata that the platform cares about.
virtual void getModuleMetadata(Module &M) {}
@@ -118,6 +125,10 @@ public:
virtual MCSection *getSectionForJumpTable(const Function &F,
const TargetMachine &TM) const;
+ virtual MCSection *getSectionForLSDA(const Function &F,
+ const TargetMachine &TM) const {
+ return LSDASection;
+ }
virtual bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
const Function &F) const;
@@ -151,7 +162,7 @@ public:
unsigned getPersonalityEncoding() const { return PersonalityEncoding; }
unsigned getLSDAEncoding() const { return LSDAEncoding; }
unsigned getTTypeEncoding() const { return TTypeEncoding; }
- unsigned getCallSiteEncoding() const { return CallSiteEncoding; }
+ unsigned getCallSiteEncoding() const;
const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
MCStreamer &Streamer) const;
@@ -176,6 +187,17 @@ public:
return nullptr;
}
+ /// Target supports a native lowering of a dso_local_equivalent constant
+ /// without needing to replace it with equivalent IR.
+ bool supportDSOLocalEquivalentLowering() const {
+ return SupportDSOLocalEquivalentLowering;
+ }
+
+ virtual const MCExpr *lowerDSOLocalEquivalent(const DSOLocalEquivalent *Equiv,
+ const TargetMachine &TM) const {
+ return nullptr;
+ }
+
/// Target supports replacing a data "PC"-relative access to a symbol
/// through another symbol, by accessing the later via a GOT entry instead?
bool supportIndirectSymViaGOTPCRel() const {
@@ -203,12 +225,6 @@ public:
return nullptr;
}
- virtual void emitLinkerFlagsForGlobal(raw_ostream &OS,
- const GlobalValue *GV) const {}
-
- virtual void emitLinkerFlagsForUsed(raw_ostream &OS,
- const GlobalValue *GV) const {}
-
/// If supported, return the section to use for the llvm.commandline
/// metadata. Otherwise, return nullptr.
virtual MCSection *getSectionForCommandLines() const {
@@ -226,7 +242,8 @@ public:
/// On targets that support TOC entries, return a section for the entry given
/// the symbol it refers to.
/// TODO: Implement this interface for existing ELF targets.
- virtual MCSection *getSectionForTOCEntry(const MCSymbol *S) const {
+ virtual MCSection *getSectionForTOCEntry(const MCSymbol *S,
+ const TargetMachine &TM) const {
return nullptr;
}
@@ -247,7 +264,8 @@ public:
/// If supported, return the function entry point symbol.
/// Otherwise, returns nulltpr.
- virtual MCSymbol *getFunctionEntryPointSymbol(const Function *F,
+ /// Func must be a function or an alias which has a function as base object.
+ virtual MCSymbol *getFunctionEntryPointSymbol(const GlobalValue *Func,
const TargetMachine &TM) const {
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h b/contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h
index 6d539f1145ee..f9a054dbed3d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h
@@ -16,24 +16,36 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetOptions.h"
#include <string>
namespace llvm {
+class AAManager;
+template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
+class PassManager;
+using ModulePassManager = PassManager<Module>;
+
class Function;
class GlobalValue;
+class MachineFunctionPassManager;
+class MachineFunctionAnalysisManager;
class MachineModuleInfoWrapperPass;
class Mangler;
class MCAsmInfo;
class MCContext;
class MCInstrInfo;
class MCRegisterInfo;
+class MCStreamer;
class MCSubtargetInfo;
class MCSymbol;
class raw_pwrite_stream;
+class PassBuilder;
class PassManagerBuilder;
struct PerFunctionMIParsingState;
class SMDiagnostic;
@@ -111,6 +123,7 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
StringRef getTargetCPU() const { return TargetCPU; }
StringRef getTargetFeatureString() const { return TargetFS; }
+ void setTargetFeatureString(StringRef FS) { TargetFS = std::string(FS); }
/// Virtual method implemented by subclasses that returns a reference to that
/// target's TargetSubtargetInfo-derived member variable.
@@ -241,7 +254,9 @@ public:
Options.SupportsDebugEntryValues = Enable;
}
- bool shouldPrintMachineCode() const { return Options.PrintMachineCode; }
+ bool getAIXExtendedAltivecABI() const {
+ return Options.EnableAIXExtendedAltivecABI;
+ }
bool getUniqueSectionNames() const { return Options.UniqueSectionNames; }
@@ -262,6 +277,16 @@ public:
return Options.FunctionSections;
}
+ /// Return true if visibility attribute should not be emitted in XCOFF,
+ /// corresponding to -mignore-xcoff-visibility.
+ bool getIgnoreXCOFFVisibility() const {
+ return Options.IgnoreXCOFFVisibility;
+ }
+
+ /// Return true if XCOFF traceback table should be emitted,
+ /// corresponding to -xcoff-traceback-table.
+ bool getXCOFFTracebackTable() const { return Options.XCOFFTracebackTable; }
+
/// If basic blocks should be emitted into their own section,
/// corresponding to -fbasic-block-sections.
llvm::BasicBlockSection getBBSectionsType() const {
@@ -273,6 +298,19 @@ public:
return Options.BBSectionsFuncListBuf.get();
}
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+ return false;
+ }
+
+ /// If the specified generic pointer could be assumed as a pointer to a
+ /// specific address space, return that address space.
+ ///
+ /// Under offloading programming, the offloading target may be passed with
+ /// values only prepared on the host side and could assume certain
+ /// properties.
+ virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+
/// Get a \c TargetIRAnalysis appropriate for the target.
///
/// This is used to construct the new pass manager's target IR analysis pass,
@@ -290,6 +328,15 @@ public:
/// PassManagerBuilder::addExtension.
virtual void adjustPassManager(PassManagerBuilder &) {}
+ /// Allow the target to modify the pass pipeline with New Pass Manager
+ /// (similar to adjustPassManager for Legacy Pass manager).
+ virtual void registerPassBuilderCallbacks(PassBuilder &,
+ bool DebugPassManager) {}
+
+ /// Allow the target to register alias analyses with the AAManager for use
+ /// with the new pass manager. Only affects the "default" AAManager.
+ virtual void registerDefaultAliasAnalyses(AAManager &) {}
+
/// Add passes to the specified pass manager to get the specified file
/// emitted. Typically this will involve several steps of code generation.
/// This method should return true if emission of this file type is not
@@ -329,6 +376,8 @@ public:
/// The integer bit size to use for SjLj based exception handling.
static constexpr unsigned DefaultSjLjDataSize = 32;
virtual unsigned getSjLjDataSize() const { return DefaultSjLjDataSize; }
+
+ static std::pair<int, int> parseBinutilsVersion(StringRef Version);
};
/// This class describes a target machine that is implemented with the LLVM
@@ -364,6 +413,21 @@ public:
bool DisableVerify = true,
MachineModuleInfoWrapperPass *MMIWP = nullptr) override;
+ virtual Error buildCodeGenPipeline(ModulePassManager &,
+ MachineFunctionPassManager &,
+ MachineFunctionAnalysisManager &,
+ raw_pwrite_stream &, raw_pwrite_stream *,
+ CodeGenFileType, CGPassBuilderOption,
+ PassInstrumentationCallbacks *) {
+ return make_error<StringError>("buildCodeGenPipeline is not overriden",
+ inconvertibleErrorCode());
+ }
+
+ virtual std::pair<StringRef, bool> getPassNameFromLegacyName(StringRef) {
+ llvm_unreachable(
+ "getPassNameFromLegacyName parseMIRPipeline is not overriden");
+ }
+
/// Add passes to the specified pass manager to get machine code emitted with
/// the MCJIT. This method returns true if machine code is not supported. It
/// fills the MCContext Ctx pointer which can be used to build custom
@@ -384,6 +448,10 @@ public:
raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
MCContext &Context);
+ Expected<std::unique_ptr<MCStreamer>>
+ createMCStreamer(raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType, MCContext &Ctx);
+
/// True if the target uses physical regs (as nearly all targets do). False
/// for stack machines such as WebAssembly and other virtual-register
/// machines. If true, all vregs must be allocated before PEI. If false, then
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h b/contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h
index d73686b2bdd8..fd014d46e758 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h
@@ -67,9 +67,18 @@ namespace llvm {
Labels, // Do not use Basic Block Sections but label basic blocks. This
// is useful when associating profile counts from virtual addresses
// to basic blocks.
+ Preset, // Similar to list but the blocks are identified by passes which
+ // seek to use Basic Block Sections, e.g. MachineFunctionSplitter.
+ // This option cannot be set via the command line.
None // Do not use Basic Block Sections.
};
+ enum class StackProtectorGuards {
+ None,
+ TLS,
+ Global
+ };
+
enum class EABI {
Unknown,
Default, // Default means not specified
@@ -113,33 +122,34 @@ namespace llvm {
class TargetOptions {
public:
TargetOptions()
- : PrintMachineCode(false), UnsafeFPMath(false), NoInfsFPMath(false),
- NoNaNsFPMath(false), NoTrappingFPMath(true),
- NoSignedZerosFPMath(false),
+ : UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false),
+ NoTrappingFPMath(true), NoSignedZerosFPMath(false),
+ EnableAIXExtendedAltivecABI(false),
HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
DisableIntegratedAS(false), RelaxELFRelocations(false),
FunctionSections(false), DataSections(false),
+ IgnoreXCOFFVisibility(false), XCOFFTracebackTable(true),
UniqueSectionNames(true), UniqueBasicBlockSectionNames(false),
TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0),
EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
EmitStackSizeSection(false), EnableMachineOutliner(false),
- SupportsDefaultOutlining(false), EmitAddrsig(false),
- EmitCallSiteInfo(false), SupportsDebugEntryValues(false),
- EnableDebugEntryValues(false), ForceDwarfFrameSection(false),
- XRayOmitFunctionIndex(false),
+ EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
+ EmitAddrsig(false), EmitCallSiteInfo(false),
+ SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
+ PseudoProbeForProfiling(false), ValueTrackingVariableLocations(false),
+ ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false),
FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
- /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
- /// option is specified on the command line, and should enable debugging
- /// output from the code generator.
- unsigned PrintMachineCode : 1;
-
/// DisableFramePointerElim - This returns true if frame pointer elimination
/// optimization should be disabled for the given machine function.
bool DisableFramePointerElim(const MachineFunction &MF) const;
+ /// If greater than 0, override the default value of
+ /// MCAsmInfo::BinutilsVersion.
+ std::pair<int, int> BinutilsVersion{0, 0};
+
/// UnsafeFPMath - This flag is enabled when the
/// -enable-unsafe-fp-math flag is specified on the command line. When
/// this flag is off (the default), the code generator is not allowed to
@@ -170,6 +180,12 @@ namespace llvm {
/// argument or result as insignificant.
unsigned NoSignedZerosFPMath : 1;
+ /// EnableAIXExtendedAltivecABI - This flag returns true when -vec-extabi is
+ /// specified. The code generator is then able to use both volatile and
+ /// nonvolitle vector regisers. When false, the code generator only uses
+ /// volatile vector registers which is the default setting on AIX.
+ unsigned EnableAIXExtendedAltivecABI : 1;
+
/// HonorSignDependentRoundingFPMath - This returns true when the
/// -enable-sign-dependent-rounding-fp-math is specified. If this returns
/// false (the default), the code generator is allowed to assume that the
@@ -232,6 +248,12 @@ namespace llvm {
/// Emit data into separate sections.
unsigned DataSections : 1;
+ /// Do not emit visibility attribute for xcoff.
+ unsigned IgnoreXCOFFVisibility : 1;
+
+ /// Emit XCOFF traceback table.
+ unsigned XCOFFTracebackTable : 1;
+
unsigned UniqueSectionNames : 1;
/// Use unique names for basic block sections.
@@ -263,6 +285,9 @@ namespace llvm {
/// Enables the MachineOutliner pass.
unsigned EnableMachineOutliner : 1;
+ /// Enables the MachineFunctionSplitter pass.
+ unsigned EnableMachineFunctionSplitter : 1;
+
/// Set if the target supports default outlining behaviour.
unsigned SupportsDefaultOutlining : 1;
@@ -291,12 +316,30 @@ namespace llvm {
/// production.
bool ShouldEmitDebugEntryValues() const;
+ /// Emit pseudo probes into the binary for sample profiling
+ unsigned PseudoProbeForProfiling : 1;
+
+ // When set to true, use experimental new debug variable location tracking,
+ // which seeks to follow the values of variables rather than their location,
+ // post isel.
+ unsigned ValueTrackingVariableLocations : 1;
+
/// Emit DWARF debug frame section.
unsigned ForceDwarfFrameSection : 1;
/// Emit XRay Function Index section
unsigned XRayOmitFunctionIndex : 1;
+ /// Stack protector guard offset to use.
+ unsigned StackProtectorGuardOffset : 32;
+
+ /// Stack protector guard mode to use, e.g. tls, global.
+ StackProtectorGuards StackProtectorGuard =
+ StackProtectorGuards::None;
+
+ /// Stack protector guard reg to use, e.g. usually fs or gs in X86.
+ std::string StackProtectorGuardReg = "None";
+
/// FloatABIType - This setting is set by -float-abi=xxx option is specfied
/// on the command line. This setting may either be Default, Soft, or Hard.
/// Default selects the target's default behavior. Soft selects the ABI for
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetPfmCounters.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetPfmCounters.td
index e1d5013c1291..b00f3e19c35f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetPfmCounters.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetPfmCounters.td
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
//
// This file defines the target-independent interfaces for performance counters.
+//
+//===----------------------------------------------------------------------===//
// Definition of a hardware counters from libpfm identifiers.
class PfmCounter<string counter> {
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetSchedule.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetSchedule.td
index 9f2f27ddcb25..a822878ead7f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetSchedule.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetSchedule.td
@@ -87,7 +87,7 @@ class SchedMachineModel {
// Per-cycle resources tables.
ProcessorItineraries Itineraries = NoItineraries;
- bit PostRAScheduler = 0; // Enable Post RegAlloc Scheduler pass.
+ bit PostRAScheduler = false; // Enable Post RegAlloc Scheduler pass.
// Subtargets that define a model for only a subset of instructions
// that have a scheduling class (itinerary class or SchedRW list)
@@ -96,13 +96,13 @@ class SchedMachineModel {
// be an error. This should only be set during initial bringup,
// or there will be no way to catch simple errors in the model
// resulting from changes to the instruction definitions.
- bit CompleteModel = 1;
+ bit CompleteModel = true;
// Indicates that we should do full overlap checking for multiple InstrRWs
// defining the same instructions within the same SchedMachineModel.
// FIXME: Remove when all in tree targets are clean with the full check
// enabled.
- bit FullInstRWOverlapCheck = 1;
+ bit FullInstRWOverlapCheck = true;
// A processor may only implement part of published ISA, due to either new ISA
// extensions, (e.g. Pentium 4 doesn't have AVX) or implementation
@@ -118,12 +118,12 @@ class SchedMachineModel {
// field.
list<Predicate> UnsupportedFeatures = [];
- bit NoModel = 0; // Special tag to indicate missing machine model.
+ bit NoModel = false; // Special tag to indicate missing machine model.
}
def NoSchedModel : SchedMachineModel {
- let NoModel = 1;
- let CompleteModel = 0;
+ let NoModel = true;
+ let CompleteModel = false;
}
// Define a kind of processor resource that may be common across
@@ -254,14 +254,14 @@ class ProcWriteResources<list<ProcResourceKind> resources> {
list<int> ResourceCycles = [];
int Latency = 1;
int NumMicroOps = 1;
- bit BeginGroup = 0;
- bit EndGroup = 0;
+ bit BeginGroup = false;
+ bit EndGroup = false;
// Allow a processor to mark some scheduling classes as unsupported
// for stronger verification.
- bit Unsupported = 0;
+ bit Unsupported = false;
// Allow a processor to mark some scheduling classes as single-issue.
// SingleIssue is an alias for Begin/End Group.
- bit SingleIssue = 0;
+ bit SingleIssue = false;
SchedMachineModel SchedModel = ?;
}
@@ -317,7 +317,7 @@ class ProcReadAdvance<int cycles, list<SchedWrite> writes = []> {
list<SchedWrite> ValidWrites = writes;
// Allow a processor to mark some scheduling classes as unsupported
// for stronger verification.
- bit Unsupported = 0;
+ bit Unsupported = false;
SchedMachineModel SchedModel = ?;
}
@@ -395,7 +395,7 @@ class SchedVar<SchedPredicateBase pred, list<SchedReadWrite> selected> {
// SchedModel silences warnings but is ignored.
class SchedVariant<list<SchedVar> variants> {
list<SchedVar> Variants = variants;
- bit Variadic = 0;
+ bit Variadic = false;
SchedMachineModel SchedModel = ?;
}
@@ -428,7 +428,7 @@ class InstRW<list<SchedReadWrite> rw, dag instrlist> {
dag Instrs = instrlist;
SchedMachineModel SchedModel = ?;
// Allow a subtarget to mark some instructions as unsupported.
- bit Unsupported = 0;
+ bit Unsupported = false;
}
// Map a set of itinerary classes to SchedReadWrite resources. This is
@@ -535,7 +535,7 @@ class SchedAlias<SchedReadWrite match, SchedReadWrite alias> {
class RegisterFile<int numPhysRegs, list<RegisterClass> Classes = [],
list<int> Costs = [], list<bit> AllowMoveElim = [],
- int MaxMoveElimPerCy = 0, bit AllowZeroMoveElimOnly = 0> {
+ int MaxMoveElimPerCy = 0, bit AllowZeroMoveElimOnly = false> {
list<RegisterClass> RegClasses = Classes;
list<int> RegCosts = Costs;
list<bit> AllowMoveElimination = AllowMoveElim;
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td
index de809bb10d49..a09feca6ca9b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -164,6 +164,9 @@ def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp
def SDTFPToIntOp : SDTypeProfile<1, 1, [ // fp_to_[su]int
SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>
]>;
+def SDTFPToIntSatOp : SDTypeProfile<1, 2, [ // fp_to_[su]int_sat
+ SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisSameNumEltsAs<0, 1>
+]>;
def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg
SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>,
SDTCisVTSmallerThanOp<2, 1>
@@ -212,6 +215,8 @@ def SDTCatchret : SDTypeProfile<0, 2, [ // catchret
def SDTNone : SDTypeProfile<0, 0, []>; // ret, trap
+def SDTUBSANTrap : SDTypeProfile<0, 1, []>; // ubsantrap
+
def SDTLoad : SDTypeProfile<1, 1, [ // load
SDTCisPtrTy<1>
]>;
@@ -245,6 +250,10 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert
def SDTVecReduce : SDTypeProfile<1, 1, [ // vector reduction
SDTCisInt<0>, SDTCisVec<1>
]>;
+def SDTFPVecReduce : SDTypeProfile<1, 1, [ // FP vector reduction
+ SDTCisFP<0>, SDTCisVec<1>
+]>;
+
def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract
SDTCisSubVecOfVec<0,1>, SDTCisInt<2>
@@ -396,6 +405,8 @@ def saddsat : SDNode<"ISD::SADDSAT" , SDTIntBinOp, [SDNPCommutative]>;
def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>;
def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>;
def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>;
+def sshlsat : SDNode<"ISD::SSHLSAT" , SDTIntBinOp>;
+def ushlsat : SDNode<"ISD::USHLSAT" , SDTIntBinOp>;
def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
@@ -432,14 +443,15 @@ def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>;
def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>;
def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>;
def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>;
+def vecreduce_fadd : SDNode<"ISD::VECREDUCE_FADD", SDTFPVecReduce>;
def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;
-def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>;
-def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>;
+def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>;
+def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>;
def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>;
def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
@@ -482,6 +494,8 @@ def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>;
def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>;
def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
+def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>;
+def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>;
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
@@ -496,7 +510,7 @@ def strict_fdiv : SDNode<"ISD::STRICT_FDIV",
def strict_frem : SDNode<"ISD::STRICT_FREM",
SDTFPBinOp, [SDNPHasChain]>;
def strict_fma : SDNode<"ISD::STRICT_FMA",
- SDTFPTernaryOp, [SDNPHasChain]>;
+ SDTFPTernaryOp, [SDNPHasChain, SDNPCommutative]>;
def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fsin : SDNode<"ISD::STRICT_FSIN",
@@ -553,6 +567,8 @@ def strict_sint_to_fp : SDNode<"ISD::STRICT_SINT_TO_FP",
SDTIntToFPOp, [SDNPHasChain]>;
def strict_uint_to_fp : SDNode<"ISD::STRICT_UINT_TO_FP",
SDTIntToFPOp, [SDNPHasChain]>;
+def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTSetCC, [SDNPHasChain]>;
+def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTSetCC, [SDNPHasChain]>;
def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
def select : SDNode<"ISD::SELECT" , SDTSelect>;
@@ -571,6 +587,8 @@ def trap : SDNode<"ISD::TRAP" , SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
def debugtrap : SDNode<"ISD::DEBUGTRAP" , SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
+def ubsantrap : SDNode<"ISD::UBSANTRAP" , SDTUBSANTrap,
+ [SDNPHasChain, SDNPSideEffect]>;
def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
@@ -634,6 +652,7 @@ def ist : SDNode<"ISD::STORE" , SDTIStore,
def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
+def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
[]>;
@@ -749,7 +768,7 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
// This is useful when Fragments involves associative / commutative
// operators: a single piece of code can easily refer to all operands even
// when re-associated / commuted variants of the fragment are matched.
- bit PredicateCodeUsesOperands = 0;
+ bit PredicateCodeUsesOperands = false;
// Define a few pre-packaged predicates. This helps GlobalISel import
// existing rules from SelectionDAG for many common cases.
@@ -848,13 +867,13 @@ class ImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
SDNode ImmNode = imm>
: PatFrag<(ops), (vt ImmNode), [{}], xform> {
let ImmediateCode = pred;
- bit FastIselShouldIgnore = 0;
+ bit FastIselShouldIgnore = false;
// Is the data type of the immediate an APInt?
- bit IsAPInt = 0;
+ bit IsAPInt = false;
// Is the data type of the immediate an APFloat?
- bit IsAPFloat = 0;
+ bit IsAPFloat = false;
}
// Convenience wrapper for ImmLeaf to use timm/TargetConstant instead
@@ -871,8 +890,8 @@ class TImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
// IntImmLeaf will allow GlobalISel to import the rule.
class IntImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm>
: ImmLeaf<vt, pred, xform> {
- let IsAPInt = 1;
- let FastIselShouldIgnore = 1;
+ let IsAPInt = true;
+ let FastIselShouldIgnore = true;
}
// An ImmLeaf except that Imm is an APFloat.
@@ -881,8 +900,8 @@ class IntImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm>
// generate code for rules that make use of it.
class FPImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm>
: ImmLeaf<vt, pred, xform, fpimm> {
- let IsAPFloat = 1;
- let FastIselShouldIgnore = 1;
+ let IsAPFloat = true;
+ let FastIselShouldIgnore = true;
}
// Leaf fragments.
@@ -890,17 +909,23 @@ class FPImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm>
def vtInt : PatLeaf<(vt), [{ return N->getVT().isInteger(); }]>;
def vtFP : PatLeaf<(vt), [{ return N->getVT().isFloatingPoint(); }]>;
-// Use ISD::isBuildVectorAllOnes or ISD::isBuildVectorAllZeros to look for
-// the corresponding build_vector. Will look through bitcasts except when used
-// as a pattern root.
-def immAllOnesV; // ISD::isBuildVectorAllOnes
-def immAllZerosV; // ISD::isBuildVectorAllZeros
+// Use ISD::isConstantSplatVectorAllOnes or ISD::isConstantSplatVectorAllZeros
+// to look for the corresponding build_vector or splat_vector. Will look through
+// bitcasts and check for either opcode, except when used as a pattern root.
+// When used as a pattern root, only fixed-length build_vector and scalable
+// splat_vector are supported.
+def immAllOnesV; // ISD::isConstantSplatVectorAllOnes
+def immAllZerosV; // ISD::isConstantSplatVectorAllZeros
// Other helper fragments.
def not : PatFrag<(ops node:$in), (xor node:$in, -1)>;
def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>;
def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>;
+def zanyext : PatFrags<(ops node:$op),
+ [(zext node:$op),
+ (anyext node:$op)]>;
+
// null_frag - The null pattern operator is used in multiclass instantiations
// which accept an SDPatternOperator for use in matching patterns for internal
// definitions. When expanding a pattern, if the null fragment is referenced
@@ -910,222 +935,222 @@ def null_frag : SDPatternOperator;
// load fragments.
def unindexedload : PatFrag<(ops node:$ptr), (ld node:$ptr)> {
- let IsLoad = 1;
- let IsUnindexed = 1;
+ let IsLoad = true;
+ let IsUnindexed = true;
}
def load : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
- let IsLoad = 1;
- let IsNonExtLoad = 1;
+ let IsLoad = true;
+ let IsNonExtLoad = true;
}
// extending load fragments.
def extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
- let IsLoad = 1;
- let IsAnyExtLoad = 1;
+ let IsLoad = true;
+ let IsAnyExtLoad = true;
}
def sextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
- let IsLoad = 1;
- let IsSignExtLoad = 1;
+ let IsLoad = true;
+ let IsSignExtLoad = true;
}
def zextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
- let IsLoad = 1;
- let IsZeroExtLoad = 1;
+ let IsLoad = true;
+ let IsZeroExtLoad = true;
}
def extloadi1 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i1;
}
def extloadi8 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i8;
}
def extloadi16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i16;
}
def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i32;
}
def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = f16;
}
def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = f32;
}
def extloadf64 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = f64;
}
def sextloadi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i1;
}
def sextloadi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i8;
}
def sextloadi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i16;
}
def sextloadi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i32;
}
def zextloadi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i1;
}
def zextloadi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i8;
}
def zextloadi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i16;
}
def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let MemoryVT = i32;
}
def extloadvi1 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i1;
}
def extloadvi8 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i8;
}
def extloadvi16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i16;
}
def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i32;
}
def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = f32;
}
def extloadvf64 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = f64;
}
def sextloadvi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i1;
}
def sextloadvi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i8;
}
def sextloadvi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i16;
}
def sextloadvi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i32;
}
def zextloadvi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i1;
}
def zextloadvi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i8;
}
def zextloadvi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i16;
}
def zextloadvi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
- let IsLoad = 1;
+ let IsLoad = true;
let ScalarMemoryVT = i32;
}
// store fragments.
def unindexedstore : PatFrag<(ops node:$val, node:$ptr),
(st node:$val, node:$ptr)> {
- let IsStore = 1;
- let IsUnindexed = 1;
+ let IsStore = true;
+ let IsUnindexed = true;
}
def store : PatFrag<(ops node:$val, node:$ptr),
(unindexedstore node:$val, node:$ptr)> {
- let IsStore = 1;
- let IsTruncStore = 0;
+ let IsStore = true;
+ let IsTruncStore = false;
}
// truncstore fragments.
def truncstore : PatFrag<(ops node:$val, node:$ptr),
(unindexedstore node:$val, node:$ptr)> {
- let IsStore = 1;
- let IsTruncStore = 1;
+ let IsStore = true;
+ let IsTruncStore = true;
}
def truncstorei8 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i8;
}
def truncstorei16 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i16;
}
def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i32;
}
def truncstoref16 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = f16;
}
def truncstoref32 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = f32;
}
def truncstoref64 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = f64;
}
def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
+ let IsStore = true;
let ScalarMemoryVT = i8;
}
def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
+ let IsStore = true;
let ScalarMemoryVT = i16;
}
def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
+ let IsStore = true;
let ScalarMemoryVT = i32;
}
// indexed store fragments.
def istore : PatFrag<(ops node:$val, node:$base, node:$offset),
(ist node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
- let IsTruncStore = 0;
+ let IsStore = true;
+ let IsTruncStore = false;
}
def pre_store : PatFrag<(ops node:$val, node:$base, node:$offset),
@@ -1136,8 +1161,8 @@ def pre_store : PatFrag<(ops node:$val, node:$base, node:$offset),
def itruncstore : PatFrag<(ops node:$val, node:$base, node:$offset),
(ist node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
- let IsTruncStore = 1;
+ let IsStore = true;
+ let IsTruncStore = true;
}
def pre_truncst : PatFrag<(ops node:$val, node:$base, node:$offset),
(itruncstore node:$val, node:$base, node:$offset), [{
@@ -1146,37 +1171,37 @@ def pre_truncst : PatFrag<(ops node:$val, node:$base, node:$offset),
}]>;
def pre_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i1;
}
def pre_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i8;
}
def pre_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i16;
}
def pre_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i32;
}
def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = f32;
}
def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let ScalarMemoryVT = i8;
}
def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let ScalarMemoryVT = i16;
}
@@ -1193,37 +1218,37 @@ def post_truncst : PatFrag<(ops node:$val, node:$base, node:$offset),
}]>;
def post_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i1;
}
def post_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i8;
}
def post_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i16;
}
def post_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = i32;
}
def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let MemoryVT = f32;
}
def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let ScalarMemoryVT = i8;
}
def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncst node:$val, node:$base, node:$offset)> {
- let IsStore = 1;
+ let IsStore = true;
let ScalarMemoryVT = i16;
}
@@ -1420,82 +1445,88 @@ def any_sint_to_fp : PatFrags<(ops node:$src),
def any_uint_to_fp : PatFrags<(ops node:$src),
[(strict_uint_to_fp node:$src),
(uint_to_fp node:$src)]>;
+def any_fsetcc : PatFrags<(ops node:$lhs, node:$rhs, node:$pred),
+ [(strict_fsetcc node:$lhs, node:$rhs, node:$pred),
+ (setcc node:$lhs, node:$rhs, node:$pred)]>;
+def any_fsetccs : PatFrags<(ops node:$lhs, node:$rhs, node:$pred),
+ [(strict_fsetccs node:$lhs, node:$rhs, node:$pred),
+ (setcc node:$lhs, node:$rhs, node:$pred)]>;
multiclass binary_atomic_op_ord<SDNode atomic_op> {
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingMonotonic = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingMonotonic = true;
}
def NAME#_acquire : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingAcquire = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingAcquire = true;
}
def NAME#_release : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingRelease = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingRelease = true;
}
def NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingAcquireRelease = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingAcquireRelease = true;
}
def NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingSequentiallyConsistent = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingSequentiallyConsistent = true;
}
}
multiclass ternary_atomic_op_ord<SDNode atomic_op> {
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingMonotonic = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingMonotonic = true;
}
def NAME#_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingAcquire = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingAcquire = true;
}
def NAME#_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingRelease = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingRelease = true;
}
def NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingAcquireRelease = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingAcquireRelease = true;
}
def NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingSequentiallyConsistent = 1;
+ let IsAtomic = true;
+ let IsAtomicOrderingSequentiallyConsistent = true;
}
}
multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
def _8 : PatFrag<(ops node:$ptr, node:$val),
(atomic_op node:$ptr, node:$val)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = !if(IsInt, i8, ?);
}
def _16 : PatFrag<(ops node:$ptr, node:$val),
(atomic_op node:$ptr, node:$val)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = !if(IsInt, i16, f16);
}
def _32 : PatFrag<(ops node:$ptr, node:$val),
(atomic_op node:$ptr, node:$val)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = !if(IsInt, i32, f32);
}
def _64 : PatFrag<(ops node:$ptr, node:$val),
(atomic_op node:$ptr, node:$val)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = !if(IsInt, i64, f64);
}
@@ -1508,22 +1539,22 @@ multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
multiclass ternary_atomic_op<SDNode atomic_op> {
def _8 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(atomic_op node:$ptr, node:$cmp, node:$val)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = i8;
}
def _16 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(atomic_op node:$ptr, node:$cmp, node:$val)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = i16;
}
def _32 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(atomic_op node:$ptr, node:$cmp, node:$val)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = i32;
}
def _64 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(atomic_op node:$ptr, node:$cmp, node:$val)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = i64;
}
@@ -1551,25 +1582,25 @@ defm atomic_cmp_swap : ternary_atomic_op<atomic_cmp_swap>;
def atomic_load_8 :
PatFrag<(ops node:$ptr),
(atomic_load node:$ptr)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = i8;
}
def atomic_load_16 :
PatFrag<(ops node:$ptr),
(atomic_load node:$ptr)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = i16;
}
def atomic_load_32 :
PatFrag<(ops node:$ptr),
(atomic_load node:$ptr)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = i32;
}
def atomic_load_64 :
PatFrag<(ops node:$ptr),
(atomic_load node:$ptr)> {
- let IsAtomic = 1;
+ let IsAtomic = true;
let MemoryVT = i64;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Testing/Support/SupportHelpers.h b/contrib/llvm-project/llvm/include/llvm/Testing/Support/SupportHelpers.h
index 38726b1cfaf7..2419fc95d817 100644
--- a/contrib/llvm-project/llvm/include/llvm/Testing/Support/SupportHelpers.h
+++ b/contrib/llvm-project/llvm/include/llvm/Testing/Support/SupportHelpers.h
@@ -12,6 +12,8 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_os_ostream.h"
#include "gmock/gmock-matchers.h"
#include "gtest/gtest-printers.h"
@@ -103,7 +105,143 @@ detail::ValueIsMatcher<InnerMatcher> ValueIs(const InnerMatcher &ValueMatcher) {
return detail::ValueIsMatcher<InnerMatcher>(ValueMatcher);
}
namespace unittest {
+
SmallString<128> getInputFileDirectory(const char *Argv0);
+
+/// A RAII object that creates a temporary directory upon initialization and
+/// removes it upon destruction.
+class TempDir {
+ SmallString<128> Path;
+
+public:
+ /// Creates a managed temporary directory.
+ ///
+ /// @param Name The name of the directory to create.
+ /// @param Unique If true, the directory will be created using
+ /// llvm::sys::fs::createUniqueDirectory.
+ explicit TempDir(StringRef Name, bool Unique = false) {
+ std::error_code EC;
+ if (Unique) {
+ EC = llvm::sys::fs::createUniqueDirectory(Name, Path);
+ if (!EC) {
+ // Resolve any symlinks in the new directory.
+ std::string UnresolvedPath(Path.str());
+ EC = llvm::sys::fs::real_path(UnresolvedPath, Path);
+ }
+ } else {
+ Path = Name;
+ EC = llvm::sys::fs::create_directory(Path);
+ }
+ if (EC)
+ Path.clear();
+ EXPECT_FALSE(EC) << EC.message();
+ }
+
+ ~TempDir() {
+ if (!Path.empty()) {
+ EXPECT_FALSE(llvm::sys::fs::remove_directories(Path.str()));
+ }
+ }
+
+ TempDir(const TempDir &) = delete;
+ TempDir &operator=(const TempDir &) = delete;
+
+ TempDir(TempDir &&) = default;
+ TempDir &operator=(TempDir &&) = default;
+
+ /// The path to the temporary directory.
+ StringRef path() const { return Path; }
+
+ /// The null-terminated C string pointing to the path.
+ const char *c_str() { return Path.c_str(); }
+
+ /// Creates a new path by appending the argument to the path of the managed
+ /// directory using the native path separator.
+ SmallString<128> path(StringRef component) const {
+ SmallString<128> Result(Path);
+ SmallString<128> ComponentToAppend(component);
+ llvm::sys::path::native(ComponentToAppend);
+ llvm::sys::path::append(Result, Twine(ComponentToAppend));
+ return Result;
+ }
+};
+
+/// A RAII object that creates a link upon initialization and
+/// removes it upon destruction.
+///
+/// The link may be a soft or a hard link, depending on the platform.
+class TempLink {
+ SmallString<128> Path;
+
+public:
+ /// Creates a managed link at path Link pointing to Target.
+ TempLink(StringRef Target, StringRef Link) {
+ Path = Link;
+ std::error_code EC = sys::fs::create_link(Target, Link);
+ if (EC)
+ Path.clear();
+ EXPECT_FALSE(EC);
+ }
+ ~TempLink() {
+ if (!Path.empty()) {
+ EXPECT_FALSE(llvm::sys::fs::remove(Path.str()));
+ }
+ }
+
+ TempLink(const TempLink &) = delete;
+ TempLink &operator=(const TempLink &) = delete;
+
+ TempLink(TempLink &&) = default;
+ TempLink &operator=(TempLink &&) = default;
+
+ /// The path to the link.
+ StringRef path() const { return Path; }
+};
+
+/// A RAII object that creates a file upon initialization and
+/// removes it upon destruction.
+class TempFile {
+ SmallString<128> Path;
+
+public:
+ /// Creates a managed file.
+ ///
+ /// @param Name The name of the file to create.
+ /// @param Contents The string to write to the file.
+ /// @param Unique If true, the file will be created using
+ /// llvm::sys::fs::createTemporaryFile.
+ TempFile(StringRef Name, StringRef Suffix = "", StringRef Contents = "",
+ bool Unique = false) {
+ std::error_code EC;
+ int fd;
+ if (Unique) {
+ EC = llvm::sys::fs::createTemporaryFile(Name, Suffix, fd, Path);
+ } else {
+ Path = Name;
+ if (!Suffix.empty()) {
+ Path.append(".");
+ Path.append(Suffix);
+ }
+ EC = llvm::sys::fs::openFileForWrite(Path, fd);
+ }
+ EXPECT_FALSE(EC);
+ raw_fd_ostream OS(fd, /*shouldClose*/ true);
+ OS << Contents;
+ OS.flush();
+ EXPECT_FALSE(OS.error());
+ if (EC || OS.error())
+ Path.clear();
+ }
+ ~TempFile() {
+ if (!Path.empty()) {
+ EXPECT_FALSE(llvm::sys::fs::remove(Path.str()));
+ }
+ }
+
+ /// The path to the file.
+ StringRef path() const { return Path; }
+};
+
} // namespace unittest
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/TextAPI/MachO/Platform.h b/contrib/llvm-project/llvm/include/llvm/TextAPI/MachO/Platform.h
index a22aae9b7dce..fc59b8678af7 100644
--- a/contrib/llvm-project/llvm/include/llvm/TextAPI/MachO/Platform.h
+++ b/contrib/llvm-project/llvm/include/llvm/TextAPI/MachO/Platform.h
@@ -29,7 +29,8 @@ enum class PlatformKind : unsigned {
macCatalyst = MachO::PLATFORM_MACCATALYST,
iOSSimulator = MachO::PLATFORM_IOSSIMULATOR,
tvOSSimulator = MachO::PLATFORM_TVOSSIMULATOR,
- watchOSSimulator = MachO::PLATFORM_WATCHOSSIMULATOR
+ watchOSSimulator = MachO::PLATFORM_WATCHOSSIMULATOR,
+ driverKit = MachO::PLATFORM_DRIVERKIT,
};
using PlatformSet = SmallSet<PlatformKind, 3>;
@@ -42,4 +43,4 @@ StringRef getPlatformName(PlatformKind Platform);
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H \ No newline at end of file
+#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/contrib/llvm-project/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
index 887c8807904e..e5e24e0b6311 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -17,7 +17,6 @@
#ifndef LLVM_TRANSFORMS_AGGRESSIVE_INSTCOMBINE_INSTCOMBINE_H
#define LLVM_TRANSFORMS_AGGRESSIVE_INSTCOMBINE_INSTCOMBINE_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h
index ef05f549fbc1..204359254d4e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h
@@ -23,7 +23,7 @@ void addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder);
Pass *createCoroEarlyLegacyPass();
/// Split up coroutines into multiple functions driving their state machines.
-Pass *createCoroSplitLegacyPass();
+Pass *createCoroSplitLegacyPass(bool ReuseFrameSlot = false);
/// Analyze coroutines use sites, devirtualize resume/destroy calls and elide
/// heap allocation for coroutine frame where possible.
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h
index c3caa55c25ce..7ecdc050335d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h
@@ -22,6 +22,7 @@ class Function;
struct CoroCleanupPass : PassInfoMixin<CoroCleanupPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h
index 0f5d1e40eb17..3f5ec2abd172 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h
@@ -25,6 +25,7 @@ class Function;
struct CoroEarlyPass : PassInfoMixin<CoroEarlyPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroElide.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroElide.h
index 348e8e355ea0..ff73cf20c5bf 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroElide.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroElide.h
@@ -24,6 +24,7 @@ class Function;
struct CoroElidePass : PassInfoMixin<CoroElidePass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h
index 40424e5a7e6a..f4eef19b20e5 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h
@@ -22,8 +22,13 @@
namespace llvm {
struct CoroSplitPass : PassInfoMixin<CoroSplitPass> {
+ CoroSplitPass(bool ReuseFrameSlot = false) : ReuseFrameSlot(ReuseFrameSlot) {}
+
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
+ static bool isRequired() { return true; }
+
+ bool ReuseFrameSlot;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h b/contrib/llvm-project/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h
new file mode 100644
index 000000000000..6c753032f913
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h
@@ -0,0 +1,23 @@
+//===-- HelloWorld.h - Example Transformations ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H
+#define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class HelloWorldPass : public PassInfoMixin<HelloWorldPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO.h
index 28e454d3b0fc..af357181597a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO.h
@@ -31,6 +31,13 @@ class raw_ostream;
//===----------------------------------------------------------------------===//
//
+// This pass adds !annotation metadata to entries in the
+// @llvm.global.annotations global constant.
+//
+ModulePass *createAnnotation2MetadataLegacyPass();
+
+//===----------------------------------------------------------------------===//
+//
// These functions removes symbols from functions and modules. If OnlyDebugInfo
// is true, only debugging information is removed from the module.
//
@@ -156,12 +163,6 @@ Pass *createArgumentPromotionPass(unsigned maxElements = 3);
Pass *createOpenMPOptLegacyPass();
//===----------------------------------------------------------------------===//
-/// createIPConstantPropagationPass - This pass propagates constants from call
-/// sites into the bodies of functions.
-///
-ModulePass *createIPConstantPropagationPass();
-
-//===----------------------------------------------------------------------===//
/// createIPSCCPPass - This pass propagates constants from call sites into the
/// bodies of functions, and keeps track of whether basic blocks are executable
/// in the process.
@@ -215,6 +216,11 @@ ModulePass *createMergeFunctionsPass();
ModulePass *createHotColdSplittingPass();
//===----------------------------------------------------------------------===//
+/// createIROutlinerPass - This pass finds similar code regions and factors
+/// those regions out into functions.
+ModulePass *createIROutlinerPass();
+
+//===----------------------------------------------------------------------===//
/// createPartialInliningPass - This pass inlines parts of functions.
///
ModulePass *createPartialInliningPass();
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
index 64e25230f6da..6a208dfa6a25 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
@@ -34,6 +34,7 @@ public:
: InsertLifetime(InsertLifetime) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
+ static bool isRequired() { return true; }
};
/// Create a legacy pass manager instance of a pass to inline and remove
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Annotation2Metadata.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Annotation2Metadata.h
new file mode 100644
index 000000000000..cf7137b088c5
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Annotation2Metadata.h
@@ -0,0 +1,30 @@
+//===- Annotation2Metadata.h - Add !annotation metadata. --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// New pass manager pass to convert @llvm.global.annotations to !annotation
+// metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_ANNOTATION2METADATA_H
+#define LLVM_TRANSFORMS_IPO_ANNOTATION2METADATA_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+/// Pass to convert @llvm.global.annotations to !annotation metadata.
+struct Annotation2MetadataPass : public PassInfoMixin<Annotation2MetadataPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_SCCP_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h
index bed180e6717a..dbaf945986e4 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -97,38 +97,44 @@
#ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CGSCCPassManager.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/AbstractCallSite.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
namespace llvm {
+struct AADepGraphNode;
+struct AADepGraph;
struct Attributor;
struct AbstractAttribute;
struct InformationCache;
struct AAIsDead;
+class AAManager;
+class AAResults;
class Function;
-/// Simple enum classes that forces properties to be spelled out explicitly.
-///
+/// The value passed to the line option that defines the maximal initialization
+/// chain length.
+extern unsigned MaxInitializationChainLength;
+
///{
enum class ChangeStatus {
CHANGED,
@@ -144,6 +150,74 @@ enum class DepClassTy {
};
///}
+/// The data structure for the nodes of a dependency graph
+struct AADepGraphNode {
+public:
+ virtual ~AADepGraphNode(){};
+ using DepTy = PointerIntPair<AADepGraphNode *, 1>;
+
+protected:
+ /// Set of dependency graph nodes which should be updated if this one
+ /// is updated. The bit encodes if it is optional.
+ TinyPtrVector<DepTy> Deps;
+
+ static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
+ static AbstractAttribute *DepGetValAA(DepTy &DT) {
+ return cast<AbstractAttribute>(DT.getPointer());
+ }
+
+ operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }
+
+public:
+ using iterator =
+ mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+ using aaiterator =
+ mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;
+
+ aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
+ aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
+ iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
+ iterator child_end() { return iterator(Deps.end(), &DepGetVal); }
+
+ virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
+ TinyPtrVector<DepTy> &getDeps() { return Deps; }
+
+ friend struct Attributor;
+ friend struct AADepGraph;
+};
+
+/// The data structure for the dependency graph
+///
+/// Note that in this graph if there is an edge from A to B (A -> B),
+/// then it means that B depends on A, and when the state of A is
+/// updated, node B should also be updated
+struct AADepGraph {
+ AADepGraph() {}
+ ~AADepGraph() {}
+
+ using DepTy = AADepGraphNode::DepTy;
+ static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
+ using iterator =
+ mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+
+ /// There is no root node for the dependency graph. But the SCCIterator
+ /// requires a single entry point, so we maintain a fake("synthetic") root
+ /// node that depends on every node.
+ AADepGraphNode SyntheticRoot;
+ AADepGraphNode *GetEntryNode() { return &SyntheticRoot; }
+
+ iterator begin() { return SyntheticRoot.child_begin(); }
+ iterator end() { return SyntheticRoot.child_end(); }
+
+ void viewGraph();
+
+ /// Dump graph to file
+ void dumpGraph();
+
+ /// Print dependency graph
+ void print();
+};
+
/// Helper to describe and deal with positions in the LLVM-IR.
///
/// A position in the IR is described by an anchor value and an "offset" that
@@ -263,8 +337,14 @@ struct IRPosition {
/// Return the associated function, if any.
Function *getAssociatedFunction() const {
- if (auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
+ if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) {
+ // We reuse the logic that associates callback calles to arguments of a
+ // call site here to identify the callback callee as the associated
+ // function.
+ if (Argument *Arg = getAssociatedArgument())
+ return Arg->getParent();
return CB->getCalledFunction();
+ }
return getAnchorScope();
}
@@ -312,10 +392,11 @@ struct IRPosition {
/// Return the value this abstract attribute is associated with.
Value &getAssociatedValue() const {
- if (getArgNo() < 0 || isa<Argument>(&getAnchorValue()))
+ if (getCallSiteArgNo() < 0 || isa<Argument>(&getAnchorValue()))
return getAnchorValue();
assert(isa<CallBase>(&getAnchorValue()) && "Expected a call base!");
- return *cast<CallBase>(&getAnchorValue())->getArgOperand(getArgNo());
+ return *cast<CallBase>(&getAnchorValue())
+ ->getArgOperand(getCallSiteArgNo());
}
/// Return the type this abstract attribute is associated with.
@@ -325,19 +406,22 @@ struct IRPosition {
return getAssociatedValue().getType();
}
- /// Return the argument number of the associated value if it is an argument or
- /// call site argument, otherwise a negative value.
- int getArgNo() const {
- switch (getPositionKind()) {
- case IRPosition::IRP_ARGUMENT:
- return cast<Argument>(getAsValuePtr())->getArgNo();
- case IRPosition::IRP_CALL_SITE_ARGUMENT: {
- Use &U = *getAsUsePtr();
- return cast<CallBase>(U.getUser())->getArgOperandNo(&U);
- }
- default:
- return -1;
- }
+ /// Return the callee argument number of the associated value if it is an
+ /// argument or call site argument, otherwise a negative value. In contrast to
+ /// `getCallSiteArgNo` this method will always return the "argument number"
+ /// from the perspective of the callee. This may not the same as the call site
+ /// if this is a callback call.
+ int getCalleeArgNo() const {
+ return getArgNo(/* CallbackCalleeArgIfApplicable */ true);
+ }
+
+ /// Return the call site argument number of the associated value if it is an
+ /// argument or call site argument, otherwise a negative value. In contrast to
+ /// `getCalleArgNo` this method will always return the "operand number" from
+ /// the perspective of the call site. This may not the same as the callee
+ /// perspective if this is a callback call.
+ int getCallSiteArgNo() const {
+ return getArgNo(/* CallbackCalleeArgIfApplicable */ false);
}
/// Return the index in the attribute list for this position.
@@ -354,7 +438,7 @@ struct IRPosition {
return AttributeList::ReturnIndex;
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
- return getArgNo() + AttributeList::FirstArgIndex;
+ return getCallSiteArgNo() + AttributeList::FirstArgIndex;
}
llvm_unreachable(
"There is no attribute index for a floating or invalid position!");
@@ -439,6 +523,17 @@ struct IRPosition {
}
}
+ /// Return true if the position is an argument or call site argument.
+ bool isArgumentPosition() const {
+ switch (getPositionKind()) {
+ case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
/// Special DenseMap key values.
///
///{
@@ -485,6 +580,25 @@ private:
verify();
}
+ /// Return the callee argument number of the associated value if it is an
+ /// argument or call site argument. See also `getCalleeArgNo` and
+ /// `getCallSiteArgNo`.
+ int getArgNo(bool CallbackCalleeArgIfApplicable) const {
+ if (CallbackCalleeArgIfApplicable)
+ if (Argument *Arg = getAssociatedArgument())
+ return Arg->getArgNo();
+ switch (getPositionKind()) {
+ case IRPosition::IRP_ARGUMENT:
+ return cast<Argument>(getAsValuePtr())->getArgNo();
+ case IRPosition::IRP_CALL_SITE_ARGUMENT: {
+ Use &U = *getAsUsePtr();
+ return cast<CallBase>(U.getUser())->getArgOperandNo(&U);
+ }
+ default:
+ return -1;
+ }
+ }
+
/// IRPosition for the use \p U. The position kind \p PK needs to be
/// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value
/// the used value.
@@ -648,7 +762,10 @@ struct InformationCache {
[&](const Function &F) {
return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
}),
- AG(AG), CGSCC(CGSCC) {}
+ AG(AG), CGSCC(CGSCC) {
+ if (CGSCC)
+ initializeModuleSlice(*CGSCC);
+ }
~InformationCache() {
// The FunctionInfo objects are allocated via a BumpPtrAllocator, we call
@@ -657,6 +774,68 @@ struct InformationCache {
It.getSecond()->~FunctionInfo();
}
+ /// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is
+ /// true, constant expression users are not given to \p CB but their uses are
+ /// traversed transitively.
+ template <typename CBTy>
+ static void foreachUse(Function &F, CBTy CB,
+ bool LookThroughConstantExprUses = true) {
+ SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses()));
+
+ for (unsigned Idx = 0; Idx < Worklist.size(); ++Idx) {
+ Use &U = *Worklist[Idx];
+
+ // Allow use in constant bitcasts and simply look through them.
+ if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) {
+ for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses())
+ Worklist.push_back(&CEU);
+ continue;
+ }
+
+ CB(U);
+ }
+ }
+
+ /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains
+ /// (a subset of) all functions that we can look at during this SCC traversal.
+ /// This includes functions (transitively) called from the SCC and the
+ /// (transitive) callers of SCC functions. We also can look at a function if
+ /// there is a "reference edge", i.a., if the function somehow uses (!=calls)
+ /// a function in the SCC or a caller of a function in the SCC.
+ void initializeModuleSlice(SetVector<Function *> &SCC) {
+ ModuleSlice.insert(SCC.begin(), SCC.end());
+
+ SmallPtrSet<Function *, 16> Seen;
+ SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end());
+ while (!Worklist.empty()) {
+ Function *F = Worklist.pop_back_val();
+ ModuleSlice.insert(F);
+
+ for (Instruction &I : instructions(*F))
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *Callee = CB->getCalledFunction())
+ if (Seen.insert(Callee).second)
+ Worklist.push_back(Callee);
+ }
+
+ Seen.clear();
+ Worklist.append(SCC.begin(), SCC.end());
+ while (!Worklist.empty()) {
+ Function *F = Worklist.pop_back_val();
+ ModuleSlice.insert(F);
+
+ // Traverse all transitive uses.
+ foreachUse(*F, [&](Use &U) {
+ if (auto *UsrI = dyn_cast<Instruction>(U.getUser()))
+ if (Seen.insert(UsrI->getFunction()).second)
+ Worklist.push_back(UsrI->getFunction());
+ });
+ }
+ }
+
+ /// The slice of the module we are allowed to look at.
+ SmallPtrSet<Function *, 8> ModuleSlice;
+
/// A vector type to hold instructions.
using InstructionVectorTy = SmallVector<Instruction *, 8>;
@@ -685,9 +864,7 @@ struct InformationCache {
}
/// Return AliasAnalysis Result for function \p F.
- AAResults *getAAResultsForFunction(const Function &F) {
- return AG.getAnalysis<AAManager>(F);
- }
+ AAResults *getAAResultsForFunction(const Function &F);
/// Return true if \p Arg is involved in a must-tail call, thus the argument
/// of the caller or callee.
@@ -715,6 +892,26 @@ struct InformationCache {
/// Return the map conaining all the knowledge we have from `llvm.assume`s.
const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; }
+ /// Return if \p To is potentially reachable form \p From or not
+ /// If the same query was answered, return cached result
+ bool getPotentiallyReachable(const Instruction &From, const Instruction &To) {
+ auto KeyPair = std::make_pair(&From, &To);
+ auto Iter = PotentiallyReachableMap.find(KeyPair);
+ if (Iter != PotentiallyReachableMap.end())
+ return Iter->second;
+ const Function &F = *From.getFunction();
+ bool Result = isPotentiallyReachable(
+ &From, &To, nullptr, AG.getAnalysis<DominatorTreeAnalysis>(F),
+ AG.getAnalysis<LoopAnalysis>(F));
+ PotentiallyReachableMap.insert(std::make_pair(KeyPair, Result));
+ return Result;
+ }
+
+ /// Check whether \p F is part of module slice.
+ bool isInModuleSlice(const Function &F) {
+ return ModuleSlice.count(const_cast<Function *>(&F));
+ }
+
private:
struct FunctionInfo {
~FunctionInfo();
@@ -774,6 +971,10 @@ private:
/// Set of inlineable functions
SmallPtrSet<const Function *, 8> InlineableFunctions;
+ /// A map for caching results of queries for isPotentiallyReachable
+ DenseMap<std::pair<const Instruction *, const Instruction *>, bool>
+ PotentiallyReachableMap;
+
/// Give the Attributor access to the members so
/// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
friend struct Attributor;
@@ -876,6 +1077,7 @@ struct Attributor {
/// attribute. Using this after Attributor started running is restricted to
/// only the Attributor itself. Initial seeding of AAs can be done via this
/// function.
+ /// NOTE: ForceUpdate is ignored in any stage other than the update stage.
template <typename AAType>
const AAType &getOrCreateAAFor(const IRPosition &IRP,
const AbstractAttribute *QueryingAA = nullptr,
@@ -883,7 +1085,7 @@ struct Attributor {
DepClassTy DepClass = DepClassTy::OPTIONAL,
bool ForceUpdate = false) {
if (AAType *AAPtr = lookupAAFor<AAType>(IRP, QueryingAA, TrackDependence)) {
- if (ForceUpdate)
+ if (ForceUpdate && Phase == AttributorPhase::UPDATE)
updateAA(*AAPtr);
return *AAPtr;
}
@@ -893,7 +1095,7 @@ struct Attributor {
auto &AA = AAType::createForPosition(IRP, *this);
// If we are currenty seeding attributes, enforce seeding rules.
- if (SeedingPeriod && !shouldSeedAttribute(AA)) {
+ if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) {
AA.getState().indicatePessimisticFixpoint();
return AA;
}
@@ -907,6 +1109,9 @@ struct Attributor {
Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) ||
FnScope->hasFnAttribute(Attribute::OptimizeNone);
+ // Avoid too many nested initializations to prevent a stack overflow.
+ Invalidate |= InitializationChainLength > MaxInitializationChainLength;
+
// Bootstrap the new attribute with an initial update to propagate
// information, e.g., function -> call site. If it is not on a given
// Allowed we will not perform updates at all.
@@ -915,24 +1120,39 @@ struct Attributor {
return AA;
}
- AA.initialize(*this);
+ {
+ TimeTraceScope TimeScope(AA.getName() + "::initialize");
+ ++InitializationChainLength;
+ AA.initialize(*this);
+ --InitializationChainLength;
+ }
- // We can initialize (=look at) code outside the current function set but
- // not call update because that would again spawn new abstract attributes in
- // potentially unconnected code regions (=SCCs).
+ // Initialize and update is allowed for code outside of the current function
+ // set, but only if it is part of module slice we are allowed to look at.
+ // Only exception is AAIsDeadFunction whose initialization is prevented
+ // directly, since we don't to compute it twice.
if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) {
+ if (!getInfoCache().isInModuleSlice(*FnScope)) {
+ AA.getState().indicatePessimisticFixpoint();
+ return AA;
+ }
+ }
+
+ // If this is queried in the manifest stage, we force the AA to indicate
+ // pessimistic fixpoint immediately.
+ if (Phase == AttributorPhase::MANIFEST) {
AA.getState().indicatePessimisticFixpoint();
return AA;
}
// Allow seeded attributes to declare dependencies.
// Remember the seeding state.
- bool OldSeedingPeriod = SeedingPeriod;
- SeedingPeriod = false;
+ AttributorPhase OldPhase = Phase;
+ Phase = AttributorPhase::UPDATE;
updateAA(AA);
- SeedingPeriod = OldSeedingPeriod;
+ Phase = OldPhase;
if (TrackDependence && AA.getState().isValidState())
recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA),
@@ -1001,7 +1221,11 @@ struct Attributor {
assert(!AAPtr && "Attribute already in map!");
AAPtr = &AA;
- AllAbstractAttributes.push_back(&AA);
+ // Register AA with the synthetic root only before the manifest stage.
+ if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE)
+ DG.SyntheticRoot.Deps.push_back(
+ AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));
+
return AA;
}
@@ -1310,6 +1534,22 @@ struct Attributor {
bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred,
AbstractAttribute &QueryingAA);
+ /// Create a shallow wrapper for \p F such that \p F has internal linkage
+ /// afterwards. It also sets the original \p F 's name to anonymous
+ ///
+ /// A wrapper is a function with the same type (and attributes) as \p F
+ /// that will only call \p F and return the result, if any.
+ ///
+ /// Assuming the declaration of looks like:
+ /// rty F(aty0 arg0, ..., atyN argN);
+ ///
+ /// The wrapper will then look as follows:
+ /// rty wrapper(aty0 arg0, ..., atyN argN) {
+ /// return F(arg0, ..., argN);
+ /// }
+ ///
+ static void createShallowWrapper(Function &F);
+
/// Return the data layout associated with the anchor scope.
const DataLayout &getDataLayout() const { return InfoCache.DL; }
@@ -1333,6 +1573,10 @@ private:
/// Rewrites function signitures and updates the call graph.
ChangeStatus cleanupIR();
+ /// Identify internal functions that are effectively dead, thus not reachable
+ /// from a live entry point. The functions are added to ToBeDeletedFunctions.
+ void identifyDeadInternalFunctions();
+
/// Run `::update` on \p AA and track the dependences queried while doing so.
/// Also adjust the state if we know further updates are not necessary.
ChangeStatus updateAA(AbstractAttribute &AA);
@@ -1363,12 +1607,6 @@ private:
/// See getOrCreateAAFor.
bool shouldSeedAttribute(AbstractAttribute &AA);
- /// The set of all abstract attributes.
- ///{
- using AAVector = SmallVector<AbstractAttribute *, 64>;
- AAVector AllAbstractAttributes;
- ///}
-
/// A nested map to lookup abstract attributes based on the argument position
/// on the outer level, and the addresses of the static member (AAType::ID) on
/// the inner level.
@@ -1390,6 +1628,9 @@ private:
/// Helper to update an underlying call graph.
CallGraphUpdater &CGUpdater;
+ /// Abstract Attribute dependency graph
+ AADepGraph DG;
+
/// Set of functions for which we modified the content such that it might
/// impact the call graph.
SmallPtrSet<Function *, 8> CGModifiedFunctions;
@@ -1428,9 +1669,17 @@ private:
/// Invoke instructions with at least a single dead successor block.
SmallVector<WeakVH, 16> InvokeWithDeadSuccessor;
- /// Wheather attributes are being `seeded`, always false after ::run function
- /// gets called \see getOrCreateAAFor.
- bool SeedingPeriod = true;
+ /// A flag that indicates which stage of the process we are in. Initially, the
+ /// phase is SEEDING. Phase is changed in `Attributor::run()`
+ enum class AttributorPhase {
+ SEEDING,
+ UPDATE,
+ MANIFEST,
+ CLEANUP,
+ } Phase = AttributorPhase::SEEDING;
+
+ /// The current initialization chain length. Tracked to avoid stack overflows.
+ unsigned InitializationChainLength = 0;
/// Functions, blocks, and instructions we delete after manifest is done.
///
@@ -1439,6 +1688,8 @@ private:
SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
///}
+
+ friend AADepGraph;
};
/// An interface to query the internal state of an abstract attribute.
@@ -1917,7 +2168,7 @@ struct StateWrapper : public BaseType, public StateTy {
StateType &getState() override { return *this; }
/// See AbstractAttribute::getState(...).
- const AbstractState &getState() const override { return *this; }
+ const StateType &getState() const override { return *this; }
};
/// Helper class that provides common functionality to manifest IR attributes.
@@ -2011,7 +2262,7 @@ struct IRAttribute : public BaseType {
/// both directions will be added in the future.
/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
/// described in the file comment.
-struct AbstractAttribute : public IRPosition {
+struct AbstractAttribute : public IRPosition, public AADepGraphNode {
using StateType = AbstractState;
AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
@@ -2019,6 +2270,14 @@ struct AbstractAttribute : public IRPosition {
/// Virtual destructor.
virtual ~AbstractAttribute() {}
+ /// This function is used to identify if an \p DGN is of type
+ /// AbstractAttribute so that the dyn_cast and cast can use such information
+ /// to cast an AADepGraphNode to an AbstractAttribute.
+ ///
+ /// We eagerly return true here because all AADepGraphNodes except for the
+ /// Synthethis Node are of type AbstractAttribute
+ static bool classof(const AADepGraphNode *DGN) { return true; }
+
/// Initialize the state with the information in the Attributor \p A.
///
/// This function is called by the Attributor once all abstract attributes
@@ -2039,7 +2298,8 @@ struct AbstractAttribute : public IRPosition {
/// Helper functions, for debug purposes only.
///{
- virtual void print(raw_ostream &OS) const;
+ void print(raw_ostream &OS) const override;
+ virtual void printWithDeps(raw_ostream &OS) const;
void dump() const { print(dbgs()); }
/// This function should return the "summarized" assumed state as string.
@@ -2087,12 +2347,6 @@ protected:
///
/// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
virtual ChangeStatus updateImpl(Attributor &A) = 0;
-
-private:
- /// Set of abstract attributes which were queried by this one. The bit encodes
- /// if there is an optional of required dependence.
- using DepTy = PointerIntPair<AbstractAttribute *, 1>;
- TinyPtrVector<DepTy> Deps;
};
/// Forward declarations of output streams for debug purposes.
@@ -2374,16 +2628,17 @@ struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> {
/// Returns true if 'From' instruction is assumed to reach, 'To' instruction.
/// Users should provide two positions they are interested in, and the class
/// determines (and caches) reachability.
- bool isAssumedReachable(const Instruction *From,
- const Instruction *To) const {
- return isPotentiallyReachable(From, To);
+ bool isAssumedReachable(Attributor &A, const Instruction &From,
+ const Instruction &To) const {
+ return A.getInfoCache().getPotentiallyReachable(From, To);
}
/// Returns true if 'From' instruction is known to reach, 'To' instruction.
/// Users should provide two positions they are interested in, and the class
/// determines (and caches) reachability.
- bool isKnownReachable(const Instruction *From, const Instruction *To) const {
- return isPotentiallyReachable(From, To);
+ bool isKnownReachable(Attributor &A, const Instruction &From,
+ const Instruction &To) const {
+ return A.getInfoCache().getPotentiallyReachable(From, To);
}
/// Create an abstract attribute view for the position \p IRP.
@@ -2546,6 +2801,12 @@ public:
return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
}
+ /// Return if the edge from \p From BB to \p To BB is assumed dead.
+ /// This is specifically useful in AAReachability.
+ virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const {
+ return false;
+ }
+
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAIsDead"; }
@@ -3202,7 +3463,7 @@ struct AAValueConstantRange
/// See AbstractAttribute::getState(...).
IntegerRangeState &getState() override { return *this; }
- const AbstractState &getState() const override { return *this; }
+ const IntegerRangeState &getState() const override { return *this; }
/// Create an abstract attribute view for the position \p IRP.
static AAValueConstantRange &createForPosition(const IRPosition &IRP,
@@ -3250,6 +3511,279 @@ struct AAValueConstantRange
static const char ID;
};
+/// A class for a set state.
+/// The assumed boolean state indicates whether the corresponding set is full
+/// set or not. If the assumed state is false, this is the worst state. The
+/// worst state (invalid state) of set of potential values is when the set
+/// contains every possible value (i.e. we cannot in any way limit the value
+/// that the target position can take). That never happens naturally, we only
+/// force it. As for the conditions under which we force it, see
+/// AAPotentialValues.
+template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>>
+struct PotentialValuesState : AbstractState {
+ using SetTy = DenseSet<MemberTy, KeyInfo>;
+
+ PotentialValuesState() : IsValidState(true), UndefIsContained(false) {}
+
+ PotentialValuesState(bool IsValid)
+ : IsValidState(IsValid), UndefIsContained(false) {}
+
+ /// See AbstractState::isValidState(...)
+ bool isValidState() const override { return IsValidState.isValidState(); }
+
+ /// See AbstractState::isAtFixpoint(...)
+ bool isAtFixpoint() const override { return IsValidState.isAtFixpoint(); }
+
+ /// See AbstractState::indicatePessimisticFixpoint(...)
+ ChangeStatus indicatePessimisticFixpoint() override {
+ return IsValidState.indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractState::indicateOptimisticFixpoint(...)
+ ChangeStatus indicateOptimisticFixpoint() override {
+ return IsValidState.indicateOptimisticFixpoint();
+ }
+
+ /// Return the assumed state
+ PotentialValuesState &getAssumed() { return *this; }
+ const PotentialValuesState &getAssumed() const { return *this; }
+
+ /// Return this set. We should check whether this set is valid or not by
+ /// isValidState() before calling this function.
+ const SetTy &getAssumedSet() const {
+ assert(isValidState() && "This set shoud not be used when it is invalid!");
+ return Set;
+ }
+
+ /// Returns whether this state contains an undef value or not.
+ bool undefIsContained() const {
+ assert(isValidState() && "This flag shoud not be used when it is invalid!");
+ return UndefIsContained;
+ }
+
+ bool operator==(const PotentialValuesState &RHS) const {
+ if (isValidState() != RHS.isValidState())
+ return false;
+ if (!isValidState() && !RHS.isValidState())
+ return true;
+ if (undefIsContained() != RHS.undefIsContained())
+ return false;
+ return Set == RHS.getAssumedSet();
+ }
+
+ /// Maximum number of potential values to be tracked.
+ /// This is set by -attributor-max-potential-values command line option
+ static unsigned MaxPotentialValues;
+
+ /// Return empty set as the best state of potential values.
+ static PotentialValuesState getBestState() {
+ return PotentialValuesState(true);
+ }
+
+ static PotentialValuesState getBestState(PotentialValuesState &PVS) {
+ return getBestState();
+ }
+
+ /// Return full set as the worst state of potential values.
+ static PotentialValuesState getWorstState() {
+ return PotentialValuesState(false);
+ }
+
+ /// Union assumed set with the passed value.
+ void unionAssumed(const MemberTy &C) { insert(C); }
+
+ /// Union assumed set with assumed set of the passed state \p PVS.
+ void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); }
+
+ /// Union assumed set with an undef value.
+ void unionAssumedWithUndef() { unionWithUndef(); }
+
+ /// "Clamp" this state with \p PVS.
+ PotentialValuesState operator^=(const PotentialValuesState &PVS) {
+ IsValidState ^= PVS.IsValidState;
+ unionAssumed(PVS);
+ return *this;
+ }
+
+ PotentialValuesState operator&=(const PotentialValuesState &PVS) {
+ IsValidState &= PVS.IsValidState;
+ unionAssumed(PVS);
+ return *this;
+ }
+
+private:
+ /// Check the size of this set, and invalidate when the size is no
+ /// less than \p MaxPotentialValues threshold.
+ void checkAndInvalidate() {
+ if (Set.size() >= MaxPotentialValues)
+ indicatePessimisticFixpoint();
+ }
+
+ /// If this state contains both undef and not undef, we can reduce
+ /// undef to the not undef value.
+ void reduceUndefValue() { UndefIsContained = UndefIsContained & Set.empty(); }
+
+ /// Insert an element into this set.
+ void insert(const MemberTy &C) {
+ if (!isValidState())
+ return;
+ Set.insert(C);
+ checkAndInvalidate();
+ }
+
+ /// Take union with R.
+ void unionWith(const PotentialValuesState &R) {
+ /// If this is a full set, do nothing.;
+ if (!isValidState())
+ return;
+ /// If R is full set, change L to a full set.
+ if (!R.isValidState()) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ for (const MemberTy &C : R.Set)
+ Set.insert(C);
+ UndefIsContained |= R.undefIsContained();
+ reduceUndefValue();
+ checkAndInvalidate();
+ }
+
+ /// Take union with an undef value.
+ void unionWithUndef() {
+ UndefIsContained = true;
+ reduceUndefValue();
+ }
+
+ /// Take intersection with R.
+ void intersectWith(const PotentialValuesState &R) {
+ /// If R is a full set, do nothing.
+ if (!R.isValidState())
+ return;
+ /// If this is a full set, change this to R.
+ if (!isValidState()) {
+ *this = R;
+ return;
+ }
+ SetTy IntersectSet;
+ for (const MemberTy &C : Set) {
+ if (R.Set.count(C))
+ IntersectSet.insert(C);
+ }
+ Set = IntersectSet;
+ UndefIsContained &= R.undefIsContained();
+ reduceUndefValue();
+ }
+
+ /// A helper state which indicate whether this state is valid or not.
+ BooleanState IsValidState;
+
+ /// Container for potential values
+ SetTy Set;
+
+ /// Flag for undef value
+ bool UndefIsContained;
+};
+
+using PotentialConstantIntValuesState = PotentialValuesState<APInt>;
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const PotentialConstantIntValuesState &R);
+
+/// An abstract interface for potential values analysis.
+///
+/// This AA collects potential values for each IR position.
+/// An assumed set of potential values is initialized with the empty set (the
+/// best state) and it will grow monotonically as we find more potential values
+/// for this position.
+/// The set might be forced to the worst state, that is, to contain every
+/// possible value for this position in 2 cases.
+/// 1. We surpassed the \p MaxPotentialValues threshold. This includes the
+/// case that this position is affected (e.g. because of an operation) by a
+/// Value that is in the worst state.
+/// 2. We tried to initialize on a Value that we cannot handle (e.g. an
+/// operator we do not currently handle).
+///
+/// TODO: Support values other than constant integers.
+struct AAPotentialValues
+ : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> {
+ using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>;
+ AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+ /// See AbstractAttribute::getState(...).
+ PotentialConstantIntValuesState &getState() override { return *this; }
+ const PotentialConstantIntValuesState &getState() const override {
+ return *this;
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAPotentialValues &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Return assumed constant for the associated value
+ Optional<ConstantInt *>
+ getAssumedConstantInt(Attributor &A,
+ const Instruction *CtxI = nullptr) const {
+ if (!isValidState())
+ return nullptr;
+ if (getAssumedSet().size() == 1)
+ return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(),
+ *(getAssumedSet().begin())));
+ if (getAssumedSet().size() == 0) {
+ if (undefIsContained())
+ return cast<ConstantInt>(
+ ConstantInt::get(getAssociatedValue().getType(), 0));
+ return llvm::None;
+ }
+
+ return nullptr;
+ }
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AAPotentialValues"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAPotentialValues
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// An abstract interface for all noundef attributes.
+struct AANoUndef
+ : public IRAttribute<Attribute::NoUndef,
+ StateWrapper<BooleanState, AbstractAttribute>> {
+ AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+
+ /// Return true if we assume that the underlying value is noundef.
+ bool isAssumedNoUndef() const { return getAssumed(); }
+
+ /// Return true if we know that underlying value is noundef.
+ bool isKnownNoUndef() const { return getKnown(); }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AANoUndef"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is AANoUndef
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
/// Run options, used by the pass manager.
enum AttributorRunOption {
NONE = 0,
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/BlockExtractor.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/BlockExtractor.h
new file mode 100644
index 000000000000..deeb5ebe23d9
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/BlockExtractor.h
@@ -0,0 +1,25 @@
+//===- BlockExtractor.h - Extracts blocks into their own functions --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass extracts the specified basic blocks from the module into their
+// own functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_BLOCKEXTRACTOR_H
+#define LLVM_TRANSFORMS_IPO_BLOCKEXTRACTOR_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+struct BlockExtractorPass : PassInfoMixin<BlockExtractorPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_BLOCKEXTRACTOR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h
index c2626d0867b4..782633799ede 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h
@@ -19,7 +19,6 @@
#ifndef LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H
#define LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h
index 8440df639729..d34a51081101 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h
@@ -14,7 +14,6 @@
#ifndef LLVM_TRANSFORMS_IPO_CROSSDSOCFI_H
#define LLVM_TRANSFORMS_IPO_CROSSDSOCFI_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
index 7379009b2592..fd99843d0449 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
@@ -13,7 +13,6 @@
#ifndef LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
#define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/IROutliner.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/IROutliner.h
new file mode 100644
index 000000000000..eefcbe5235c1
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -0,0 +1,358 @@
+//===- IROutliner.h - Extract similar IR regions into functions ------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// The interface file for the IROutliner which is used by the IROutliner Pass.
+//
+// The outliner uses the IRSimilarityIdentifier to identify the similar regions
+// of code. It evaluates each set of IRSimilarityCandidates with an estimate of
+// whether it will provide code size reduction. Each region is extracted using
+// the code extractor. These extracted functions are consolidated into a single
+// function and called from the extracted call site.
+//
+// For example:
+// \code
+// %1 = add i32 %a, %b
+// %2 = add i32 %b, %a
+// %3 = add i32 %b, %a
+// %4 = add i32 %a, %b
+// \endcode
+// would become function
+// \code
+// define internal void outlined_ir_function(i32 %0, i32 %1) {
+// %1 = add i32 %0, %1
+// %2 = add i32 %1, %0
+// ret void
+// }
+// \endcode
+// with calls:
+// \code
+// call void outlined_ir_function(i32 %a, i32 %b)
+// call void outlined_ir_function(i32 %b, i32 %a)
+// \endcode
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
+#define LLVM_TRANSFORMS_IPO_IROUTLINER_H
+
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Support/InstructionCost.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include <set>
+
+struct OutlinableGroup;
+
+namespace llvm {
+using namespace IRSimilarity;
+
+class Module;
+class TargetTransformInfo;
+class OptimizationRemarkEmitter;
+
+/// The OutlinableRegion holds all the information for a specific region, or
+/// sequence of instructions. This includes what values need to be hoisted to
+/// arguments from the extracted function, inputs and outputs to the region, and
+/// mapping from the extracted function arguments to overall function arguments.
+struct OutlinableRegion {
+ /// Describes the region of code.
+ IRSimilarityCandidate *Candidate;
+
+ /// If this region is outlined, the front and back IRInstructionData could
+ /// potentially become invalidated if the only new instruction is a call.
+ /// This ensures that we replace in the instruction in the IRInstructionData.
+ IRInstructionData *NewFront = nullptr;
+ IRInstructionData *NewBack = nullptr;
+
+ /// The number of extracted inputs from the CodeExtractor.
+ unsigned NumExtractedInputs;
+
+ /// The corresponding BasicBlock with the appropriate stores for this
+ /// OutlinableRegion in the overall function.
+ unsigned OutputBlockNum;
+
+ /// Mapping the extracted argument number to the argument number in the
+ /// overall function. Since there will be inputs, such as elevated constants
+ /// that are not the same in each region in a SimilarityGroup, or values that
+ /// cannot be sunk into the extracted section in every region, we must keep
+ /// track of which extracted argument maps to which overall argument.
+ DenseMap<unsigned, unsigned> ExtractedArgToAgg;
+ DenseMap<unsigned, unsigned> AggArgToExtracted;
+
+ /// Mapping of the argument number in the deduplicated function
+ /// to a given constant, which is used when creating the arguments to the call
+ /// to the newly created deduplicated function. This is handled separately
+ /// since the CodeExtractor does not recognize constants.
+ DenseMap<unsigned, Constant *> AggArgToConstant;
+
+ /// The global value numbers that are used as outputs for this section. Once
+ /// extracted, each output will be stored to an output register. This
+ /// documents the global value numbers that are used in this pattern.
+ SmallVector<unsigned, 4> GVNStores;
+
+ /// Used to create an outlined function.
+ CodeExtractor *CE = nullptr;
+
+ /// The call site of the extracted region.
+ CallInst *Call = nullptr;
+
+ /// The function for the extracted region.
+ Function *ExtractedFunction = nullptr;
+
+ /// Flag for whether we have split out the IRSimilarityCanidate. That is,
+ /// make the region contained the IRSimilarityCandidate its own BasicBlock.
+ bool CandidateSplit = false;
+
+ /// Flag for whether we should not consider this region for extraction.
+ bool IgnoreRegion = false;
+
+ /// The BasicBlock that is before the start of the region BasicBlock,
+ /// only defined when the region has been split.
+ BasicBlock *PrevBB = nullptr;
+
+ /// The BasicBlock that contains the starting instruction of the region.
+ BasicBlock *StartBB = nullptr;
+
+ /// The BasicBlock that contains the ending instruction of the region.
+ BasicBlock *EndBB = nullptr;
+
+ /// The BasicBlock that is after the start of the region BasicBlock,
+ /// only defined when the region has been split.
+ BasicBlock *FollowBB = nullptr;
+
+ /// The Outlinable Group that contains this region and structurally similar
+ /// regions to this region.
+ OutlinableGroup *Parent = nullptr;
+
+ OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
+ : Candidate(&C), Parent(&Group) {
+ StartBB = C.getStartBB();
+ EndBB = C.getEndBB();
+ }
+
+ /// For the contained region, split the parent BasicBlock at the starting and
+ /// ending instructions of the contained IRSimilarityCandidate.
+ void splitCandidate();
+
+ /// For the contained region, reattach the BasicBlock at the starting and
+ /// ending instructions of the contained IRSimilarityCandidate, or if the
+ /// function has been extracted, the start and end of the BasicBlock
+ /// containing the called function.
+ void reattachCandidate();
+
+ /// Get the size of the code removed from the region.
+ ///
+ /// \param [in] TTI - The TargetTransformInfo for the parent function.
+ /// \returns the code size of the region
+ InstructionCost getBenefit(TargetTransformInfo &TTI);
+};
+
+/// This class is a pass that identifies similarity in a Module, extracts
+/// instances of the similarity, and then consolidating the similar regions
+/// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass
+/// to identify the similar regions of code, and then extracts the similar
+/// sections into a single function. See the above for an example as to
+/// how code is extracted and consolidated into a single function.
+class IROutliner {
+public:
+ IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
+ function_ref<IRSimilarityIdentifier &(Module &)> GIRSI,
+ function_ref<OptimizationRemarkEmitter &(Function &)> GORE)
+ : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {}
+ bool run(Module &M);
+
+private:
+ /// Find repeated similar code sequences in \p M and outline them into new
+ /// Functions.
+ ///
+ /// \param [in] M - The module to outline from.
+ /// \returns The number of Functions created.
+ unsigned doOutline(Module &M);
+
+ /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
+ /// instructions contained in a previously outlined region and put the
+ /// remaining regions in \p CurrentGroup.
+ ///
+ /// \param [in] CandidateVec - List of similarity candidates for regions with
+ /// the same similarity structure.
+ /// \param [in,out] CurrentGroup - Contains the potential sections to
+ /// be outlined.
+ void
+ pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
+ OutlinableGroup &CurrentGroup);
+
+ /// Create the function based on the overall types found in the current
+ /// regions being outlined.
+ ///
+ /// \param M - The module to outline from.
+ /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
+ /// \param [in] FunctionNameSuffix - How many functions have we previously
+ /// created.
+ /// \returns the newly created function.
+ Function *createFunction(Module &M, OutlinableGroup &CG,
+ unsigned FunctionNameSuffix);
+
+ /// Identify the needed extracted inputs in a section, and add to the overall
+ /// function if needed.
+ ///
+ /// \param [in] M - The module to outline from.
+ /// \param [in,out] Region - The region to be extracted.
+ /// \param [in] NotSame - The global value numbers of the Values in the region
+ /// that do not have the same Constant in each strucutrally similar region.
+ void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
+ DenseSet<unsigned> &NotSame);
+
+ /// Find the number of instructions that will be removed by extracting the
+ /// OutlinableRegions in \p CurrentGroup.
+ ///
+ /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
+ /// analyzed.
+ /// \returns the number of outlined instructions across all regions.
+ InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
+
+ /// Find the number of instructions that will be added by reloading arguments.
+ ///
+ /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
+ /// analyzed.
+ /// \returns the number of added reload instructions across all regions.
+ InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
+
+ /// Find the cost and the benefit of \p CurrentGroup and save it back to
+ /// \p CurrentGroup.
+ ///
+ /// \param [in] M - The module being analyzed
+ /// \param [in,out] CurrentGroup - The overall outlined section
+ void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
+
+ /// Update the output mapping based on the load instruction, and the outputs
+ /// of the extracted function.
+ ///
+ /// \param Region - The region extracted
+ /// \param Outputs - The outputs from the extracted function.
+ /// \param LI - The load instruction used to update the mapping.
+ void updateOutputMapping(OutlinableRegion &Region,
+ ArrayRef<Value *> Outputs, LoadInst *LI);
+
+ /// Extract \p Region into its own function.
+ ///
+ /// \param [in] Region - The region to be extracted into its own function.
+ /// \returns True if it was successfully outlined.
+ bool extractSection(OutlinableRegion &Region);
+
+ /// For the similarities found, and the extracted sections, create a single
+ /// outlined function with appropriate output blocks as necessary.
+ ///
+ /// \param [in] M - The module to outline from
+ /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
+ /// \param [in,out] FuncsToRemove - List of functions to remove from the
+ /// module after outlining is completed.
+ /// \param [in,out] OutlinedFunctionNum - the number of new outlined
+ /// functions.
+ void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
+ std::vector<Function *> &FuncsToRemove,
+ unsigned &OutlinedFunctionNum);
+
+ /// If true, enables us to outline from functions that have LinkOnceFromODR
+ /// linkages.
+ bool OutlineFromLinkODRs = false;
+
+ /// If false, we do not worry if the cost is greater than the benefit. This
+ /// is for debugging and testing, so that we can test small cases to ensure
+ /// that the outlining is being done correctly.
+ bool CostModel = true;
+
+ /// The set of outlined Instructions, identified by their location in the
+ /// sequential ordering of instructions in a Module.
+ DenseSet<unsigned> Outlined;
+
+ /// TargetTransformInfo lambda for target specific information.
+ function_ref<TargetTransformInfo &(Function &)> getTTI;
+
+ /// A mapping from newly created reloaded output values to the original value.
+ /// If an value is replace by an output from an outlined region, this maps
+ /// that Value, back to its original Value.
+ DenseMap<Value *, Value *> OutputMappings;
+
+ /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
+ function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
+
+ /// The optimization remark emitter for the pass.
+ function_ref<OptimizationRemarkEmitter &(Function &)> getORE;
+
+ /// The memory allocator used to allocate the CodeExtractors.
+ SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
+
+ /// The memory allocator used to allocate the OutlinableRegions.
+ SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
+
+ /// The memory allocator used to allocate new IRInstructionData.
+ SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+
+ /// Custom InstVisitor to classify different instructions for whether it can
+ /// be analyzed for similarity. This is needed as there may be instruction we
+ /// can identify as having similarity, but are more complicated to outline.
+ struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
+ InstructionAllowed() {}
+
+ // TODO: Determine a scheme to resolve when the label is similar enough.
+ bool visitBranchInst(BranchInst &BI) { return false; }
+ // TODO: Determine a scheme to resolve when the labels are similar enough.
+ bool visitPHINode(PHINode &PN) { return false; }
+ // TODO: Handle allocas.
+ bool visitAllocaInst(AllocaInst &AI) { return false; }
+ // VAArg instructions are not allowed since this could cause difficulty when
+ // differentiating between different sets of variable instructions in
+ // the deduplicated outlined regions.
+ bool visitVAArgInst(VAArgInst &VI) { return false; }
+ // We exclude all exception handling cases since they are so context
+ // dependent.
+ bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
+ bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
+ // DebugInfo should be included in the regions, but should not be
+ // analyzed for similarity as it has no bearing on the outcome of the
+ // program.
+ bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
+ // TODO: Handle specific intrinsics individually from those that can be
+ // handled.
+ bool IntrinsicInst(IntrinsicInst &II) { return false; }
+ // We only handle CallInsts that are not indirect, since we cannot guarantee
+ // that they have a name in these cases.
+ bool visitCallInst(CallInst &CI) {
+ Function *F = CI.getCalledFunction();
+ if (!F || CI.isIndirectCall() || !F->hasName())
+ return false;
+ return true;
+ }
+ // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside
+ // the outlined region, and then returned as an output, this will have to be
+ // handled differently.
+ bool visitFreezeInst(FreezeInst &CI) { return false; }
+ // TODO: We do not current handle similarity that changes the control flow.
+ bool visitInvokeInst(InvokeInst &II) { return false; }
+ // TODO: We do not current handle similarity that changes the control flow.
+ bool visitCallBrInst(CallBrInst &CBI) { return false; }
+ // TODO: Handle interblock similarity.
+ bool visitTerminator(Instruction &I) { return false; }
+ bool visitInstruction(Instruction &I) { return true; }
+ };
+
+ /// A InstVisitor used to exclude certain instructions from being outlined.
+ InstructionAllowed InstructionClassifier;
+};
+
+/// Pass to outline similar regions.
+class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Inliner.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Inliner.h
index 3454b0af0d9f..21ff86994ce1 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Inliner.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -14,8 +14,9 @@
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
#include <utility>
namespace llvm {
@@ -96,10 +97,8 @@ protected:
/// passes be composed to achieve the same end result.
class InlinerPass : public PassInfoMixin<InlinerPass> {
public:
- InlinerPass() = default;
- ~InlinerPass();
- InlinerPass(InlinerPass &&Arg)
- : ImportedFunctionsStats(std::move(Arg.ImportedFunctionsStats)) {}
+ InlinerPass(bool OnlyMandatory = false) : OnlyMandatory(OnlyMandatory) {}
+ InlinerPass(InlinerPass &&Arg) = default;
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
@@ -107,8 +106,8 @@ public:
private:
InlineAdvisor &getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
FunctionAnalysisManager &FAM, Module &M);
- std::unique_ptr<ImportedFunctionsInliningStatistics> ImportedFunctionsStats;
- Optional<DefaultInlineAdvisor> OwnedDefaultAdvisor;
+ std::unique_ptr<InlineAdvisor> OwnedAdvisor;
+ const bool OnlyMandatory;
};
/// Module pass, wrapping the inliner pass. This works in conjunction with the
@@ -121,6 +120,7 @@ class ModuleInlinerWrapperPass
public:
ModuleInlinerWrapperPass(
InlineParams Params = getInlineParams(), bool Debugging = false,
+ bool MandatoryFirst = true,
InliningAdvisorMode Mode = InliningAdvisorMode::Default,
unsigned MaxDevirtIterations = 0);
ModuleInlinerWrapperPass(ModuleInlinerWrapperPass &&Arg) = default;
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/LoopExtractor.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
new file mode 100644
index 000000000000..def3c5943919
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
@@ -0,0 +1,32 @@
+//===- LoopExtractor.h - Extract each loop into a new function ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass wrapper around the ExtractLoop() scalar transformation to extract each
+// top-level loop into its own new function. If the loop is the ONLY loop in a
+// given function, it is not touched. This is a pass most useful for debugging
+// via bugpoint.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_LOOPEXTRACTOR_H
+#define LLVM_TRANSFORMS_IPO_LOOPEXTRACTOR_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct LoopExtractorPass : public PassInfoMixin<LoopExtractorPass> {
+ LoopExtractorPass(unsigned NumLoops = ~0) : NumLoops(NumLoops) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ unsigned NumLoops;
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_LOOPEXTRACTOR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
index 5e91ae599363..eb682c437b94 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
@@ -198,10 +198,14 @@ bool isJumpTableCanonical(Function *F);
} // end namespace lowertypetests
class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> {
+ bool UseCommandLine = false;
+
+ ModuleSummaryIndex *ExportSummary = nullptr;
+ const ModuleSummaryIndex *ImportSummary = nullptr;
+ bool DropTypeTests = true;
+
public:
- ModuleSummaryIndex *ExportSummary;
- const ModuleSummaryIndex *ImportSummary;
- bool DropTypeTests;
+ LowerTypeTestsPass() : UseCommandLine(true) {}
LowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary,
bool DropTypeTests = false)
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
index d96187b73f9b..9b72ee0afd28 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
@@ -33,6 +33,11 @@ struct OpenMPInModule {
bool isKnown() { return Value != OpenMP::UNKNOWN; }
operator bool() { return Value != OpenMP::NOT_FOUND; }
+ /// Does this function \p F contain any OpenMP runtime calls?
+ bool containsOMPRuntimeCalls(Function *F) const {
+ return FuncsWithOMPRuntimeCalls.contains(F);
+ }
+
/// Return the known kernels (=GPU entry points) in the module.
SmallPtrSetImpl<Kernel> &getKernels() { return Kernels; }
@@ -42,6 +47,11 @@ struct OpenMPInModule {
private:
enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN;
+ friend bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
+
+ /// In which functions are OpenMP runtime calls present?
+ SmallPtrSet<Function *, 32> FuncsWithOMPRuntimeCalls;
+
/// Collection of known kernels (=GPU entry points) in the module.
SmallPtrSet<Kernel, 8> Kernels;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
new file mode 100644
index 000000000000..da0bdae0eaee
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -0,0 +1,152 @@
+//===- Transforms/IPO/SampleContextTracker.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file provides the interface for context-sensitive profile tracker used
+/// by CSSPGO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H
+#define LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include <list>
+#include <map>
+#include <vector>
+
+using namespace llvm;
+using namespace sampleprof;
+
+namespace llvm {
+
+// Internal trie tree representation used for tracking context tree and sample
+// profiles. The path from root node to a given node represents the context of
+// that nodes' profile.
+class ContextTrieNode {
+public:
+ ContextTrieNode(ContextTrieNode *Parent = nullptr,
+ StringRef FName = StringRef(),
+ FunctionSamples *FSamples = nullptr,
+ LineLocation CallLoc = {0, 0})
+ : ParentContext(Parent), FuncName(FName), FuncSamples(FSamples),
+ CallSiteLoc(CallLoc){};
+ ContextTrieNode *getChildContext(const LineLocation &CallSite,
+ StringRef CalleeName);
+ ContextTrieNode *getHottestChildContext(const LineLocation &CallSite);
+ ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
+ StringRef CalleeName,
+ bool AllowCreate = true);
+
+ ContextTrieNode &moveToChildContext(const LineLocation &CallSite,
+ ContextTrieNode &&NodeToMove,
+ StringRef ContextStrToRemove,
+ bool DeleteNode = true);
+ void removeChildContext(const LineLocation &CallSite, StringRef CalleeName);
+ std::map<uint32_t, ContextTrieNode> &getAllChildContext();
+ const StringRef getFuncName() const;
+ FunctionSamples *getFunctionSamples() const;
+ void setFunctionSamples(FunctionSamples *FSamples);
+ LineLocation getCallSiteLoc() const;
+ ContextTrieNode *getParentContext() const;
+ void setParentContext(ContextTrieNode *Parent);
+ void dump();
+
+private:
+ static uint32_t nodeHash(StringRef ChildName, const LineLocation &Callsite);
+
+ // Map line+discriminator location to child context
+ std::map<uint32_t, ContextTrieNode> AllChildContext;
+
+ // Link to parent context node
+ ContextTrieNode *ParentContext;
+
+ // Function name for current context
+ StringRef FuncName;
+
+ // Function Samples for current context
+ FunctionSamples *FuncSamples;
+
+ // Callsite location in parent context
+ LineLocation CallSiteLoc;
+};
+
+// Profile tracker that manages profiles and its associated context. It
+// provides interfaces used by sample profile loader to query context profile or
+// base profile for given function or location; it also manages context tree
+// manipulation that is needed to accommodate inline decisions so we have
+// accurate post-inline profile for functions. Internally context profiles
+// are organized in a trie, with each node representing profile for specific
+// calling context and the context is identified by path from root to the node.
+class SampleContextTracker {
+public:
+ using ContextSamplesTy = SmallSet<FunctionSamples *, 16>;
+
+ SampleContextTracker(StringMap<FunctionSamples> &Profiles);
+ // Query context profile for a specific callee with given name at a given
+ // call-site. The full context is identified by location of call instruction.
+ FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
+ StringRef CalleeName);
+ // Get samples for indirect call targets for call site at given location.
+ std::vector<const FunctionSamples *>
+ getIndirectCalleeContextSamplesFor(const DILocation *DIL);
+ // Query context profile for a given location. The full context
+ // is identified by input DILocation.
+ FunctionSamples *getContextSamplesFor(const DILocation *DIL);
+ // Query context profile for a given sample contxt of a function.
+ FunctionSamples *getContextSamplesFor(const SampleContext &Context);
+ // Get all context profile for given function.
+ ContextSamplesTy &getAllContextSamplesFor(const Function &Func);
+ ContextSamplesTy &getAllContextSamplesFor(StringRef Name);
+ // Query base profile for a given function. A base profile is a merged view
+ // of all context profiles for contexts that are not inlined.
+ FunctionSamples *getBaseSamplesFor(const Function &Func,
+ bool MergeContext = true);
+ // Query base profile for a given function by name.
+ FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext);
+ // Mark a context profile as inlined when function is inlined.
+ // This makes sure that inlined context profile will be excluded in
+ // function's base profile.
+ void markContextSamplesInlined(const FunctionSamples *InlinedSamples);
+ void promoteMergeContextSamplesTree(const Instruction &Inst,
+ StringRef CalleeName);
+ void addCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
+ // Dump the internal context profile trie.
+ void dump();
+
+private:
+ ContextTrieNode *getContextFor(const DILocation *DIL);
+ ContextTrieNode *getContextFor(const SampleContext &Context);
+ ContextTrieNode *getCalleeContextFor(const DILocation *DIL,
+ StringRef CalleeName);
+ ContextTrieNode *getOrCreateContextPath(const SampleContext &Context,
+ bool AllowCreate);
+ ContextTrieNode *getTopLevelContextNode(StringRef FName);
+ ContextTrieNode &addTopLevelContextNode(StringRef FName);
+ ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
+ void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
+ StringRef ContextStrToRemove);
+ ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
+ ContextTrieNode &ToNodeParent,
+ StringRef ContextStrToRemove);
+
+ // Map from function name to context profiles (excluding base profile)
+ StringMap<ContextSamplesTy> FuncToCtxtProfileSet;
+
+ // Root node for context trie tree
+ ContextTrieNode RootContext;
+};
+
+} // end namespace llvm
+#endif // LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleProfile.h
index a5ad44551bf6..3d929b974044 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleProfile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleProfile.h
@@ -24,17 +24,18 @@ class Module;
/// The sample profiler data loader pass.
class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
public:
- SampleProfileLoaderPass(std::string File = "", std::string RemappingFile = "",
- bool IsThinLTOPreLink = false)
+ SampleProfileLoaderPass(
+ std::string File = "", std::string RemappingFile = "",
+ ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
: ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
- IsThinLTOPreLink(IsThinLTOPreLink) {}
+ LTOPhase(LTOPhase) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
std::string ProfileFileName;
std::string ProfileRemappingFileName;
- bool IsThinLTOPreLink;
+ ThinOrFullLTOPhase LTOPhase;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
new file mode 100644
index 000000000000..0fd79d8ff7f3
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -0,0 +1,147 @@
+//===- Transforms/IPO/SampleProfileProbe.h ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file provides the interface for the pseudo probe implementation for
+/// AutoFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
+#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Target/TargetMachine.h"
+#include <unordered_map>
+
+namespace llvm {
+
+class Module;
+
+using namespace sampleprof;
+using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>;
+using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>;
+using ProbeFactorMap = std::unordered_map<uint64_t, float>;
+using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
+
+enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid };
+
+class PseudoProbeDescriptor {
+ uint64_t FunctionGUID;
+ uint64_t FunctionHash;
+
+public:
+ PseudoProbeDescriptor(uint64_t GUID, uint64_t Hash)
+ : FunctionGUID(GUID), FunctionHash(Hash) {}
+ uint64_t getFunctionGUID() const { return FunctionGUID; }
+ uint64_t getFunctionHash() const { return FunctionHash; }
+};
+
+// A pseudo probe verifier that can be run after each IR passes to detect the
+// violation of updating probe factors. In principle, the sum of distribution
+// factor for a probe should be identical before and after a pass. For a
+// function pass, the factor sum for a probe would be typically 100%.
+class PseudoProbeVerifier {
+public:
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+ // Implementation of pass instrumentation callbacks for new pass manager.
+ void runAfterPass(StringRef PassID, Any IR);
+
+private:
+ // Allow a little bias due the rounding to integral factors.
+ constexpr static float DistributionFactorVariance = 0.02f;
+ // Distribution factors from last pass.
+ FuncProbeFactorMap FunctionProbeFactors;
+
+ void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors);
+ void runAfterPass(const Module *M);
+ void runAfterPass(const LazyCallGraph::SCC *C);
+ void runAfterPass(const Function *F);
+ void runAfterPass(const Loop *L);
+ bool shouldVerifyFunction(const Function *F);
+ void verifyProbeFactors(const Function *F,
+ const ProbeFactorMap &ProbeFactors);
+};
+
+// This class serves sample counts correlation for SampleProfileLoader by
+// analyzing pseudo probes and their function descriptors injected by
+// SampleProfileProber.
+class PseudoProbeManager {
+ DenseMap<uint64_t, PseudoProbeDescriptor> GUIDToProbeDescMap;
+
+ const PseudoProbeDescriptor *getDesc(const Function &F) const;
+
+public:
+ PseudoProbeManager(const Module &M);
+ bool moduleIsProbed(const Module &M) const;
+ bool profileIsValid(const Function &F, const FunctionSamples &Samples) const;
+};
+
+/// Sample profile pseudo prober.
+///
+/// Insert pseudo probes for block sampling and value sampling.
+class SampleProfileProber {
+public:
+ // Give an empty module id when the prober is not used for instrumentation.
+ SampleProfileProber(Function &F, const std::string &CurModuleUniqueId);
+ void instrumentOneFunc(Function &F, TargetMachine *TM);
+
+private:
+ Function *getFunction() const { return F; }
+ uint64_t getFunctionHash() const { return FunctionHash; }
+ uint32_t getBlockId(const BasicBlock *BB) const;
+ uint32_t getCallsiteId(const Instruction *Call) const;
+ void computeCFGHash();
+ void computeProbeIdForBlocks();
+ void computeProbeIdForCallsites();
+
+ Function *F;
+
+ /// The current module ID that is used to name a static object as a comdat
+ /// group.
+ std::string CurModuleUniqueId;
+
+ /// A CFG hash code used to identify a function code changes.
+ uint64_t FunctionHash;
+
+ /// Map basic blocks to the their pseudo probe ids.
+ BlockIdMap BlockProbeIds;
+
+ /// Map indirect calls to the their pseudo probe ids.
+ InstructionIdMap CallProbeIds;
+
+ /// The ID of the last probe, Can be used to number a new probe.
+ uint32_t LastProbeId;
+};
+
+class SampleProfileProbePass : public PassInfoMixin<SampleProfileProbePass> {
+ TargetMachine *TM;
+
+public:
+ SampleProfileProbePass(TargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
+ void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
+
+public:
+ PseudoProbeUpdatePass() {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+#endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/StripSymbols.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/StripSymbols.h
new file mode 100644
index 000000000000..dd76d481d668
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/StripSymbols.h
@@ -0,0 +1,47 @@
+//===- StripSymbols.h - Strip symbols and debug info from a module --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripSymbols transformation implements code stripping. Specifically, it
+// can delete:
+//
+// * names for virtual registers
+// * symbols for internal globals and functions
+// * debug information
+//
+// Note that this transformation makes code much less readable, so it should
+// only be used in situations where the 'strip' utility would be used, such as
+// reducing code size or making it harder to reverse engineer code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_STRIPSYMBOLS_H
+#define LLVM_TRANSFORMS_IPO_STRIPSYMBOLS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct StripSymbolsPass : PassInfoMixin<StripSymbolsPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+struct StripNonDebugSymbolsPass : PassInfoMixin<StripNonDebugSymbolsPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+struct StripDebugDeclarePass : PassInfoMixin<StripDebugDeclarePass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+struct StripDeadDebugInfoPass : PassInfoMixin<StripDeadDebugInfoPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_STRIPSYMBOLS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index 86e28cfead80..6e92f8fd3f0d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -223,6 +223,9 @@ void setAfterReturnValues(MutableArrayRef<VirtualCallTarget> Targets,
struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
+ bool UseCommandLine = false;
+ WholeProgramDevirtPass()
+ : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary)
: ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
new file mode 100644
index 000000000000..aae0694e4cab
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -0,0 +1,528 @@
+//===- InstCombiner.h - InstCombine implementation --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the interface for the instcombine pass implementation.
+/// The interface is used for generic transformations in this folder and
+/// target specific combinations in the targets.
+/// The visitor implementation is in \c InstCombinerImpl in
+/// \c InstCombineInternal.h.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H
+#define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H
+
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include <cassert>
+
+#define DEBUG_TYPE "instcombine"
+
+namespace llvm {
+
+class AAResults;
+class AssumptionCache;
+class ProfileSummaryInfo;
+class TargetLibraryInfo;
+class TargetTransformInfo;
+
+/// The core instruction combiner logic.
+///
+/// This class provides both the logic to recursively visit instructions and
+/// combine them.
+class LLVM_LIBRARY_VISIBILITY InstCombiner {
+ /// Only used to call target specific inst combining.
+ TargetTransformInfo &TTI;
+
+public:
+ /// Maximum size of array considered when transforming.
+ uint64_t MaxArraySizeForCombine = 0;
+
+ /// An IRBuilder that automatically inserts new instructions into the
+ /// worklist.
+ using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;
+ BuilderTy &Builder;
+
+protected:
+ /// A worklist of the instructions that need to be simplified.
+ InstCombineWorklist &Worklist;
+
+ // Mode in which we are running the combiner.
+ const bool MinimizeSize;
+
+ AAResults *AA;
+
+ // Required analyses.
+ AssumptionCache &AC;
+ TargetLibraryInfo &TLI;
+ DominatorTree &DT;
+ const DataLayout &DL;
+ const SimplifyQuery SQ;
+ OptimizationRemarkEmitter &ORE;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
+
+ // Optional analyses. When non-null, these can both be used to do better
+ // combining and will be updated to reflect any changes.
+ LoopInfo *LI;
+
+ bool MadeIRChange = false;
+
+public:
+ InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
+ bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
+ TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
+ DominatorTree &DT, OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ const DataLayout &DL, LoopInfo *LI)
+ : TTI(TTI), Builder(Builder), Worklist(Worklist),
+ MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL),
+ SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
+
+ virtual ~InstCombiner() {}
+
+ /// Return the source operand of a potentially bitcasted value while
+ /// optionally checking if it has one use. If there is no bitcast or the one
+ /// use check is not met, return the input value itself.
+ static Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) {
+ if (auto *BitCast = dyn_cast<BitCastInst>(V))
+ if (!OneUseOnly || BitCast->hasOneUse())
+ return BitCast->getOperand(0);
+
+ // V is not a bitcast or V has more than one use and OneUseOnly is true.
+ return V;
+ }
+
+ /// Assign a complexity or rank value to LLVM Values. This is used to reduce
+ /// the amount of pattern matching needed for compares and commutative
+ /// instructions. For example, if we have:
+ /// icmp ugt X, Constant
+ /// or
+ /// xor (add X, Constant), cast Z
+ ///
+ /// We do not have to consider the commuted variants of these patterns because
+ /// canonicalization based on complexity guarantees the above ordering.
+ ///
+ /// This routine maps IR values to various complexity ranks:
+ /// 0 -> undef
+ /// 1 -> Constants
+ /// 2 -> Other non-instructions
+ /// 3 -> Arguments
+ /// 4 -> Cast and (f)neg/not instructions
+ /// 5 -> Other instructions
+ static unsigned getComplexity(Value *V) {
+ if (isa<Instruction>(V)) {
+ if (isa<CastInst>(V) || match(V, m_Neg(PatternMatch::m_Value())) ||
+ match(V, m_Not(PatternMatch::m_Value())) ||
+ match(V, m_FNeg(PatternMatch::m_Value())))
+ return 4;
+ return 5;
+ }
+ if (isa<Argument>(V))
+ return 3;
+ return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
+ }
+
+ /// Predicate canonicalization reduces the number of patterns that need to be
+ /// matched by other transforms. For example, we may swap the operands of a
+ /// conditional branch or select to create a compare with a canonical
+ /// (inverted) predicate which is then more likely to be matched with other
+ /// values.
+ static bool isCanonicalPredicate(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ // TODO: There are 16 FCMP predicates. Should others be (not) canonical?
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_OGE:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ /// Given an exploded icmp instruction, return true if the comparison only
+ /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
+ /// the result of the comparison is true when the input value is signed.
+ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
+ bool &TrueIfSigned) {
+ switch (Pred) {
+ case ICmpInst::ICMP_SLT: // True if LHS s< 0
+ TrueIfSigned = true;
+ return RHS.isNullValue();
+ case ICmpInst::ICMP_SLE: // True if LHS s<= -1
+ TrueIfSigned = true;
+ return RHS.isAllOnesValue();
+ case ICmpInst::ICMP_SGT: // True if LHS s> -1
+ TrueIfSigned = false;
+ return RHS.isAllOnesValue();
+ case ICmpInst::ICMP_SGE: // True if LHS s>= 0
+ TrueIfSigned = false;
+ return RHS.isNullValue();
+ case ICmpInst::ICMP_UGT:
+ // True if LHS u> RHS and RHS == sign-bit-mask - 1
+ TrueIfSigned = true;
+ return RHS.isMaxSignedValue();
+ case ICmpInst::ICMP_UGE:
+ // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
+ TrueIfSigned = true;
+ return RHS.isMinSignedValue();
+ case ICmpInst::ICMP_ULT:
+ // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
+ TrueIfSigned = false;
+ return RHS.isMinSignedValue();
+ case ICmpInst::ICMP_ULE:
+ // True if LHS u<= RHS and RHS == sign-bit-mask - 1
+ TrueIfSigned = false;
+ return RHS.isMaxSignedValue();
+ default:
+ return false;
+ }
+ }
+
+ /// Add one to a Constant
+ static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+ }
+
+ /// Subtract one from a Constant
+ static Constant *SubOne(Constant *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+ }
+
+ llvm::Optional<std::pair<
+ CmpInst::Predicate,
+ Constant *>> static getFlippedStrictnessPredicateAndConstant(CmpInst::
+ Predicate
+ Pred,
+ Constant *C);
+
+ static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI) {
+ // a ? b : false and a ? true : b are the canonical form of logical and/or.
+ // This includes !a ? b : false and !a ? true : b. Absorbing the not into
+ // the select by swapping operands would break recognition of this pattern
+ // in other analyses, so don't do that.
+ return match(&SI, PatternMatch::m_LogicalAnd(PatternMatch::m_Value(),
+ PatternMatch::m_Value())) ||
+ match(&SI, PatternMatch::m_LogicalOr(PatternMatch::m_Value(),
+ PatternMatch::m_Value()));
+ }
+
+ /// Return true if the specified value is free to invert (apply ~ to).
+ /// This happens in cases where the ~ can be eliminated. If WillInvertAllUses
+ /// is true, work under the assumption that the caller intends to remove all
+ /// uses of V and only keep uses of ~V.
+ ///
+ /// See also: canFreelyInvertAllUsersOf()
+ static bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
+ // ~(~(X)) -> X.
+ if (match(V, m_Not(PatternMatch::m_Value())))
+ return true;
+
+ // Constants can be considered to be not'ed values.
+ if (match(V, PatternMatch::m_AnyIntegralConstant()))
+ return true;
+
+ // Compares can be inverted if all of their uses are being modified to use
+ // the ~V.
+ if (isa<CmpInst>(V))
+ return WillInvertAllUses;
+
+ // If `V` is of the form `A + Constant` then `-1 - V` can be folded into
+ // `(-1 - Constant) - A` if we are willing to invert all of the uses.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Sub)
+ if (isa<Constant>(BO->getOperand(0)) ||
+ isa<Constant>(BO->getOperand(1)))
+ return WillInvertAllUses;
+
+ // Selects with invertible operands are freely invertible
+ if (match(V,
+ m_Select(PatternMatch::m_Value(), m_Not(PatternMatch::m_Value()),
+ m_Not(PatternMatch::m_Value()))))
+ return WillInvertAllUses;
+
+ return false;
+ }
+
+ /// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
+ /// InstCombine's freelyInvertAllUsersOf() must be kept in sync with this fn.
+ ///
+ /// See also: isFreeToInvert()
+ static bool canFreelyInvertAllUsersOf(Value *V, Value *IgnoredUser) {
+ // Look at every user of V.
+ for (Use &U : V->uses()) {
+ if (U.getUser() == IgnoredUser)
+ continue; // Don't consider this user.
+
+ auto *I = cast<Instruction>(U.getUser());
+ switch (I->getOpcode()) {
+ case Instruction::Select:
+ if (U.getOperandNo() != 0) // Only if the value is used as select cond.
+ return false;
+ if (shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(I)))
+ return false;
+ break;
+ case Instruction::Br:
+ assert(U.getOperandNo() == 0 && "Must be branching on that value.");
+ break; // Free to invert by swapping true/false values/destinations.
+ case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring
+ // it.
+ if (!match(I, m_Not(PatternMatch::m_Value())))
+ return false; // Not a 'not'.
+ break;
+ default:
+ return false; // Don't know, likely not freely invertible.
+ }
+ // So far all users were free to invert...
+ }
+ return true; // Can freely invert all users!
+ }
+
+ /// Some binary operators require special handling to avoid poison and
+ /// undefined behavior. If a constant vector has undef elements, replace those
+ /// undefs with identity constants if possible because those are always safe
+ /// to execute. If no identity constant exists, replace undef with some other
+ /// safe constant.
+ static Constant *
+ getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In,
+ bool IsRHSConstant) {
+ auto *InVTy = cast<FixedVectorType>(In->getType());
+
+ Type *EltTy = InVTy->getElementType();
+ auto *SafeC = ConstantExpr::getBinOpIdentity(Opcode, EltTy, IsRHSConstant);
+ if (!SafeC) {
+ // TODO: Should this be available as a constant utility function? It is
+ // similar to getBinOpAbsorber().
+ if (IsRHSConstant) {
+ switch (Opcode) {
+ case Instruction::SRem: // X % 1 = 0
+ case Instruction::URem: // X %u 1 = 0
+ SafeC = ConstantInt::get(EltTy, 1);
+ break;
+ case Instruction::FRem: // X % 1.0 (doesn't simplify, but it is safe)
+ SafeC = ConstantFP::get(EltTy, 1.0);
+ break;
+ default:
+ llvm_unreachable(
+ "Only rem opcodes have no identity constant for RHS");
+ }
+ } else {
+ switch (Opcode) {
+ case Instruction::Shl: // 0 << X = 0
+ case Instruction::LShr: // 0 >>u X = 0
+ case Instruction::AShr: // 0 >> X = 0
+ case Instruction::SDiv: // 0 / X = 0
+ case Instruction::UDiv: // 0 /u X = 0
+ case Instruction::SRem: // 0 % X = 0
+ case Instruction::URem: // 0 %u X = 0
+ case Instruction::Sub: // 0 - X (doesn't simplify, but it is safe)
+ case Instruction::FSub: // 0.0 - X (doesn't simplify, but it is safe)
+ case Instruction::FDiv: // 0.0 / X (doesn't simplify, but it is safe)
+ case Instruction::FRem: // 0.0 % X = 0
+ SafeC = Constant::getNullValue(EltTy);
+ break;
+ default:
+ llvm_unreachable("Expected to find identity constant for opcode");
+ }
+ }
+ }
+ assert(SafeC && "Must have safe constant for binop");
+ unsigned NumElts = InVTy->getNumElements();
+ SmallVector<Constant *, 16> Out(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *C = In->getAggregateElement(i);
+ Out[i] = isa<UndefValue>(C) ? SafeC : C;
+ }
+ return ConstantVector::get(Out);
+ }
+
+ /// Create and insert the idiom we use to indicate a block is unreachable
+ /// without having to rewrite the CFG from within InstCombine.
+ static void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
+ auto &Ctx = InsertAt->getContext();
+ new StoreInst(ConstantInt::getTrue(Ctx),
+ UndefValue::get(Type::getInt1PtrTy(Ctx)), InsertAt);
+ }
+
+ void addToWorklist(Instruction *I) { Worklist.push(I); }
+
+ AssumptionCache &getAssumptionCache() const { return AC; }
+ TargetLibraryInfo &getTargetLibraryInfo() const { return TLI; }
+ DominatorTree &getDominatorTree() const { return DT; }
+ const DataLayout &getDataLayout() const { return DL; }
+ const SimplifyQuery &getSimplifyQuery() const { return SQ; }
+ OptimizationRemarkEmitter &getOptimizationRemarkEmitter() const {
+ return ORE;
+ }
+ BlockFrequencyInfo *getBlockFrequencyInfo() const { return BFI; }
+ ProfileSummaryInfo *getProfileSummaryInfo() const { return PSI; }
+ LoopInfo *getLoopInfo() const { return LI; }
+
+ // Call target specific combiners
+ Optional<Instruction *> targetInstCombineIntrinsic(IntrinsicInst &II);
+ Optional<Value *>
+ targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask,
+ KnownBits &Known,
+ bool &KnownBitsComputed);
+ Optional<Value *> targetSimplifyDemandedVectorEltsIntrinsic(
+ IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp);
+
+ /// Inserts an instruction \p New before instruction \p Old
+ ///
+ /// Also adds the new instruction to the worklist and returns \p New so that
+ /// it is suitable for use as the return from the visitation patterns.
+ Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
+ assert(New && !New->getParent() &&
+ "New instruction already inserted into a basic block!");
+ BasicBlock *BB = Old.getParent();
+ BB->getInstList().insert(Old.getIterator(), New); // Insert inst
+ Worklist.push(New);
+ return New;
+ }
+
+ /// Same as InsertNewInstBefore, but also sets the debug loc.
+ Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
+ New->setDebugLoc(Old.getDebugLoc());
+ return InsertNewInstBefore(New, Old);
+ }
+
+ /// A combiner-aware RAUW-like routine.
+ ///
+ /// This method is to be used when an instruction is found to be dead,
+ /// replaceable with another preexisting expression. Here we add all uses of
+ /// I to the worklist, replace all uses of I with the new value, then return
+ /// I, so that the inst combiner will know that I was modified.
+ Instruction *replaceInstUsesWith(Instruction &I, Value *V) {
+ // If there are no uses to replace, then we return nullptr to indicate that
+ // no changes were made to the program.
+ if (I.use_empty())
+ return nullptr;
+
+ Worklist.pushUsersToWorkList(I); // Add all modified instrs to worklist.
+
+ // If we are replacing the instruction with itself, this must be in a
+ // segment of unreachable code, so just clobber the instruction.
+ if (&I == V)
+ V = UndefValue::get(I.getType());
+
+ LLVM_DEBUG(dbgs() << "IC: Replacing " << I << "\n"
+ << " with " << *V << '\n');
+
+ I.replaceAllUsesWith(V);
+ return &I;
+ }
+
+ /// Replace operand of instruction and add old operand to the worklist.
+ Instruction *replaceOperand(Instruction &I, unsigned OpNum, Value *V) {
+ Worklist.addValue(I.getOperand(OpNum));
+ I.setOperand(OpNum, V);
+ return &I;
+ }
+
+ /// Replace use and add the previously used value to the worklist.
+ void replaceUse(Use &U, Value *NewValue) {
+ Worklist.addValue(U);
+ U = NewValue;
+ }
+
+ /// Combiner aware instruction erasure.
+ ///
+ /// When dealing with an instruction that has side effects or produces a void
+ /// value, we can't rely on DCE to delete the instruction. Instead, visit
+ /// methods should return the value returned by this function.
+ virtual Instruction *eraseInstFromFunction(Instruction &I) = 0;
+
+ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
+ const Instruction *CxtI) const {
+ llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT);
+ }
+
+ KnownBits computeKnownBits(const Value *V, unsigned Depth,
+ const Instruction *CxtI) const {
+ return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT);
+ }
+
+ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false,
+ unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) {
+ return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, &AC, CxtI, &DT);
+ }
+
+ bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) const {
+ return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT);
+ }
+
+ unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) const {
+ return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT);
+ }
+
+ OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
+ const Value *RHS,
+ const Instruction *CxtI) const {
+ return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
+ }
+
+ OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
+ const Instruction *CxtI) const {
+ return llvm::computeOverflowForSignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
+ }
+
+ OverflowResult computeOverflowForUnsignedAdd(const Value *LHS,
+ const Value *RHS,
+ const Instruction *CxtI) const {
+ return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT);
+ }
+
+ OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS,
+ const Instruction *CxtI) const {
+ return llvm::computeOverflowForSignedAdd(LHS, RHS, DL, &AC, CxtI, &DT);
+ }
+
+ OverflowResult computeOverflowForUnsignedSub(const Value *LHS,
+ const Value *RHS,
+ const Instruction *CxtI) const {
+ return llvm::computeOverflowForUnsignedSub(LHS, RHS, DL, &AC, CxtI, &DT);
+ }
+
+ OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS,
+ const Instruction *CxtI) const {
+ return llvm::computeOverflowForSignedSub(LHS, RHS, DL, &AC, CxtI, &DT);
+ }
+
+ virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo,
+ const APInt &DemandedMask, KnownBits &Known,
+ unsigned Depth = 0) = 0;
+ virtual Value *
+ SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts,
+ unsigned Depth = 0,
+ bool AllowMultipleUsers = false) = 0;
+};
+
+} // namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation.h
index d4373d7b39ea..c960d5b0ab50 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation.h
@@ -66,6 +66,9 @@ struct GCOVOptions {
// Add the 'noredzone' attribute to added runtime library calls.
bool NoRedZone;
+ // Use atomic profile counter increments.
+ bool Atomic = false;
+
// Regexes separated by a semi-colon to filter the files to instrument.
std::string Filter;
@@ -143,9 +146,8 @@ ModulePass *createInstrProfilingLegacyPass(
ModulePass *createInstrOrderFilePass();
// Insert DataFlowSanitizer (dynamic data flow analysis) instrumentation
-ModulePass *createDataFlowSanitizerPass(
- const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
- void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
+ModulePass *createDataFlowSanitizerLegacyPassPass(
+ const std::vector<std::string> &ABIListFiles = std::vector<std::string>());
// Options for sanitizer coverage instrumentation.
struct SanitizerCoverageOptions {
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
index fea6064042ae..53ad0cbf9968 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -102,6 +102,7 @@ public:
bool Recover = false,
bool UseAfterScope = false);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
private:
bool CompileKernel;
@@ -122,6 +123,7 @@ public:
bool UseGlobalGC = true,
bool UseOdrIndicator = false);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
private:
bool CompileKernel;
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
index 120c6a8fb09f..8d70f1429b99 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
@@ -17,6 +17,7 @@ namespace llvm {
/// stores, and other memory intrinsics.
struct BoundsCheckingPass : PassInfoMixin<BoundsCheckingPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h
new file mode 100644
index 000000000000..9b57b1f9a9ea
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h
@@ -0,0 +1,32 @@
+//===- DataFlowSanitizer.h - dynamic data flow analysis -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_DATAFLOWSANITIZER_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_DATAFLOWSANITIZER_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class DataFlowSanitizerPass : public PassInfoMixin<DataFlowSanitizerPass> {
+private:
+ std::vector<std::string> ABIListFiles;
+
+public:
+ DataFlowSanitizerPass(
+ const std::vector<std::string> &ABIListFiles = std::vector<std::string>())
+ : ABIListFiles(ABIListFiles) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
index b3971e49754e..2766cc5e6263 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
@@ -26,5 +26,5 @@ private:
GCOVOptions GCOVOpts;
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
index e3104eeb1d36..68b47320f650 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -27,6 +27,7 @@ public:
explicit HWAddressSanitizerPass(bool CompileKernel = false,
bool Recover = false);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+ static bool isRequired() { return true; }
private:
bool CompileKernel;
@@ -36,6 +37,24 @@ private:
FunctionPass *createHWAddressSanitizerLegacyPassPass(bool CompileKernel = false,
bool Recover = false);
+namespace HWASanAccessInfo {
+
+// Bit field positions for the accessinfo parameter to
+// llvm.hwasan.check.memaccess. Shared between the pass and the backend. Bits
+// 0-15 are also used by the runtime.
+enum {
+ AccessSizeShift = 0, // 4 bits
+ IsWriteShift = 4,
+ RecoverShift = 5,
+ MatchAllShift = 16, // 8 bits
+ HasMatchAllShift = 24,
+ CompileKernelShift = 25,
+};
+
+enum { RuntimeMask = 0xffff };
+
+} // namespace HWASanAccessInfo
+
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 263d3b629589..5ce72cd59ac2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -68,11 +68,6 @@ private:
// vector of counter load/store pairs to be register promoted.
std::vector<LoadStorePair> PromotionCandidates;
- // The start value of precise value profile range for memory intrinsic sizes.
- int64_t MemOPSizeRangeStart;
- // The end value of precise value profile range for memory intrinsic sizes.
- int64_t MemOPSizeRangeLast;
-
int64_t TotalCountersPromoted = 0;
/// Lower instrumentation intrinsics in the function. Returns true if there
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
new file mode 100644
index 000000000000..ac6a07d299a6
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -0,0 +1,51 @@
+//===--------- Definition of the MemProfiler class --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MemProfiler class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Public interface to the memory profiler pass for instrumenting code to
+/// profile memory accesses.
+///
+/// The profiler itself is a function pass that works by inserting various
+/// calls to the MemProfiler runtime library functions. The runtime library
+/// essentially replaces malloc() and free() with custom implementations that
+/// record data about the allocations.
+class MemProfilerPass : public PassInfoMixin<MemProfilerPass> {
+public:
+ explicit MemProfilerPass();
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
+};
+
+/// Public interface to the memory profiler module pass for instrumenting code
+/// to profile memory allocations and accesses.
+class ModuleMemProfilerPass : public PassInfoMixin<ModuleMemProfilerPass> {
+public:
+ explicit ModuleMemProfilerPass();
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
+};
+
+// Insert MemProfiler instrumentation
+FunctionPass *createMemProfilerFunctionPass();
+ModulePass *createModuleMemProfilerLegacyPassPass();
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
index 01a86ee3f1fd..f5f9ec7829bd 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -41,6 +41,7 @@ struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
private:
MemorySanitizerOptions Options;
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
index 999086a29f87..e3d268cb0781 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
@@ -46,6 +46,7 @@ public:
*vfs::getRealFileSystem());
}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
private:
SanitizerCoverageOptions Options;
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
index ce0e46745abb..f9c507624e6d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -28,6 +28,7 @@ FunctionPass *createThreadSanitizerLegacyPassPass();
struct ThreadSanitizerPass : public PassInfoMixin<ThreadSanitizerPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/ObjCARC.h b/contrib/llvm-project/llvm/include/llvm/Transforms/ObjCARC.h
index 2f114c75e2e2..a89df95385c8 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/ObjCARC.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/ObjCARC.h
@@ -14,6 +14,8 @@
#ifndef LLVM_TRANSFORMS_OBJCARC_H
#define LLVM_TRANSFORMS_OBJCARC_H
+#include "llvm/IR/PassManager.h"
+
namespace llvm {
class Pass;
@@ -42,6 +44,22 @@ Pass *createObjCARCContractPass();
//
Pass *createObjCARCOptPass();
+struct ObjCARCOptPass : public PassInfoMixin<ObjCARCOptPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+struct ObjCARCContractPass : public PassInfoMixin<ObjCARCContractPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+struct ObjCARCAPElimPass : public PassInfoMixin<ObjCARCAPElimPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+struct ObjCARCExpandPass : public PassInfoMixin<ObjCARCExpandPass> {
+ PreservedAnalyses run(Function &M, FunctionAnalysisManager &AM);
+};
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h
index a1aacec76979..3db1613d7457 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h
@@ -14,6 +14,7 @@
#ifndef LLVM_TRANSFORMS_SCALAR_H
#define LLVM_TRANSFORMS_SCALAR_H
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <functional>
namespace llvm {
@@ -25,12 +26,6 @@ class Pass;
//===----------------------------------------------------------------------===//
//
-// ConstantPropagation - A worklist driven constant propagation pass
-//
-FunctionPass *createConstantPropagationPass();
-
-//===----------------------------------------------------------------------===//
-//
// AlignmentFromAssumptions - Use assume intrinsics to set load/store
// alignments.
//
@@ -38,16 +33,15 @@ FunctionPass *createAlignmentFromAssumptionsPass();
//===----------------------------------------------------------------------===//
//
-// SCCP - Sparse conditional constant propagation.
+// AnnotationRemarks - Emit remarks for !annotation metadata.
//
-FunctionPass *createSCCPPass();
+FunctionPass *createAnnotationRemarksLegacyPass();
//===----------------------------------------------------------------------===//
//
-// DeadInstElimination - This pass quickly removes trivially dead instructions
-// without modifying the CFG of the function. It is a FunctionPass.
+// SCCP - Sparse conditional constant propagation.
//
-Pass *createDeadInstEliminationPass();
+FunctionPass *createSCCPPass();
//===----------------------------------------------------------------------===//
//
@@ -163,6 +157,12 @@ Pass *createLoopInterchangePass();
//===----------------------------------------------------------------------===//
//
+// LoopFlatten - This pass flattens nested loops into a single loop.
+//
+FunctionPass *createLoopFlattenPass();
+
+//===----------------------------------------------------------------------===//
+//
// LoopStrengthReduce - This pass is strength reduces GEP instructions that use
// a loop's canonical induction variable as one of their indices.
//
@@ -190,7 +190,8 @@ Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
int Count = -1, int AllowPartial = -1,
int Runtime = -1, int UpperBound = -1,
int AllowPeeling = -1);
-// Create an unrolling pass for full unrolling that uses exact trip count only.
+// Create an unrolling pass for full unrolling that uses exact trip count only
+// and also does peeling.
Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
bool ForgetAllSCEV = false);
@@ -210,7 +211,7 @@ Pass *createLoopRerollPass();
//
// LoopRotate - This pass is a simple loop rotating pass.
//
-Pass *createLoopRotatePass(int MaxHeaderSize = -1);
+Pass *createLoopRotatePass(int MaxHeaderSize = -1, bool PrepareForLTO = false);
//===----------------------------------------------------------------------===//
//
@@ -245,10 +246,12 @@ FunctionPass *createReassociatePass();
//===----------------------------------------------------------------------===//
//
// JumpThreading - Thread control through mult-pred/multi-succ blocks where some
-// preds always go to some succ. Thresholds other than minus one override the
-// internal BB duplication default threshold.
+// preds always go to some succ. If FreezeSelectCond is true, unfold the
+// condition of a select that unfolds to branch. Thresholds other than minus one
+// override the internal BB duplication default threshold.
//
-FunctionPass *createJumpThreadingPass(int Threshold = -1);
+FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false,
+ int Threshold = -1);
//===----------------------------------------------------------------------===//
//
@@ -256,8 +259,7 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1);
// simplify terminator instructions, convert switches to lookup tables, etc.
//
FunctionPass *createCFGSimplificationPass(
- unsigned Threshold = 1, bool ForwardSwitchCond = false,
- bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false,
+ SimplifyCFGOptions Options = SimplifyCFGOptions(),
std::function<bool(const Function &)> Ftor = nullptr);
//===----------------------------------------------------------------------===//
@@ -346,6 +348,13 @@ FunctionPass *createConstantHoistingPass();
//===----------------------------------------------------------------------===//
//
+// ConstraintElimination - This pass eliminates conditions based on found
+// constraints.
+//
+FunctionPass *createConstraintEliminationPass();
+
+//===----------------------------------------------------------------------===//
+//
// Sink - Code Sinking
//
FunctionPass *createSinkingPass();
@@ -370,6 +379,13 @@ Pass *createLowerMatrixIntrinsicsPass();
//===----------------------------------------------------------------------===//
//
+// LowerMatrixIntrinsicsMinimal - Lower matrix intrinsics to vector operations
+// (lightweight, does not require extra analysis)
+//
+Pass *createLowerMatrixIntrinsicsMinimalPass();
+
+//===----------------------------------------------------------------------===//
+//
// LowerWidenableCondition - Lower widenable condition to i1 true.
//
Pass *createLowerWidenableConditionPass();
@@ -523,6 +539,21 @@ Pass *createLoopSimplifyCFGPass();
// transformations.
//
Pass *createWarnMissedTransformationsPass();
+
+//===----------------------------------------------------------------------===//
+//
+// This pass does instruction simplification on each
+// instruction in a function.
+//
+FunctionPass *createInstSimplifyLegacyPass();
+
+
+//===----------------------------------------------------------------------===//
+//
+// createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather
+// and scatter intrinsics with scalar code when target doesn't support them.
+//
+FunctionPass *createScalarizeMaskedMemIntrinLegacyPass();
} // End llvm namespace
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index be119b8ab855..10b6e1c6a21b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -37,9 +37,9 @@ struct AlignmentFromAssumptionsPass
ScalarEvolution *SE = nullptr;
DominatorTree *DT = nullptr;
- bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
- const SCEV *&OffSCEV);
- bool processAssumption(CallInst *I);
+ bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
+ const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
+ bool processAssumption(CallInst *I, unsigned Idx);
};
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h
new file mode 100644
index 000000000000..f1619766dcf4
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h
@@ -0,0 +1,26 @@
+//===- AnnotationRemarks.cpp - Emit remarks for !annotation MD --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file defines AnnotationRemarksPass for the new pass manager.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_ANNOTATION_REMARKS_H
+#define LLVM_TRANSFORMS_SCALAR_ANNOTATION_REMARKS_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+struct AnnotationRemarksPass : public PassInfoMixin<AnnotationRemarksPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_ANNOTATION_REMARKS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
index 26d4a2476a86..11379e59467f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -198,7 +198,6 @@ private:
// constant GEP base.
bool emitBaseConstants(GlobalVariable *BaseGV);
void deleteDeadCastInst() const;
- bool optimizeConstants(Function &Fn);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstraintElimination.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstraintElimination.h
new file mode 100644
index 000000000000..544a6c2eae55
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstraintElimination.h
@@ -0,0 +1,24 @@
+//===- ConstraintElimination.h - Constraint elimination pass ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_CONSTRAINTELIMINATION_H
+#define LLVM_TRANSFORMS_SCALAR_CONSTRAINTELIMINATION_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class ConstraintEliminationPass
+ : public PassInfoMixin<ConstraintEliminationPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_CONSTRAINTELIMINATION_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/DCE.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/DCE.h
index 974e4b20d152..4d83296b1d86 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/DCE.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/DCE.h
@@ -23,6 +23,12 @@ class DCEPass : public PassInfoMixin<DCEPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+
+class RedundantDbgInstEliminationPass
+ : public PassInfoMixin<RedundantDbgInstEliminationPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
}
#endif // LLVM_TRANSFORMS_SCALAR_DCE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h
index f2818c6b792e..d6b3c8ca7219 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -46,11 +46,12 @@ class FunctionPass;
class IntrinsicInst;
class LoadInst;
class LoopInfo;
+class MemorySSA;
+class MemorySSAUpdater;
class OptimizationRemarkEmitter;
class PHINode;
class TargetLibraryInfo;
class Value;
-
/// A private "module" namespace for types and utilities used by GVN. These
/// are implementation details and should not be used by clients.
namespace gvn LLVM_LIBRARY_VISIBILITY {
@@ -72,6 +73,7 @@ struct GVNOptions {
Optional<bool> AllowPRE = None;
Optional<bool> AllowLoadPRE = None;
Optional<bool> AllowLoadInLoopPRE = None;
+ Optional<bool> AllowLoadPRESplitBackedge = None;
Optional<bool> AllowMemDep = None;
GVNOptions() = default;
@@ -93,6 +95,12 @@ struct GVNOptions {
return *this;
}
+ /// Enables or disables PRE of loads in GVN.
+ GVNOptions &setLoadPRESplitBackedge(bool LoadPRESplitBackedge) {
+ AllowLoadPRESplitBackedge = LoadPRESplitBackedge;
+ return *this;
+ }
+
/// Enables or disables use of MemDepAnalysis.
GVNOptions &setMemDep(bool MemDep) {
AllowMemDep = MemDep;
@@ -129,6 +137,7 @@ public:
bool isPREEnabled() const;
bool isLoadPREEnabled() const;
bool isLoadInLoopPREEnabled() const;
+ bool isLoadPRESplitBackedgeEnabled() const;
bool isMemDepEnabled() const;
/// This class holds the mapping between values and value numbers. It is used
@@ -211,6 +220,7 @@ private:
OptimizationRemarkEmitter *ORE = nullptr;
ImplicitControlFlowTracking *ICF = nullptr;
LoopInfo *LI = nullptr;
+ MemorySSAUpdater *MSSAU = nullptr;
ValueTable VN;
@@ -246,7 +256,7 @@ private:
bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
const TargetLibraryInfo &RunTLI, AAResults &RunAA,
MemoryDependenceResults *RunMD, LoopInfo *LI,
- OptimizationRemarkEmitter *ORE);
+ OptimizationRemarkEmitter *ORE, MemorySSA *MSSA = nullptr);
/// Push a new Value to the LeaderTable onto the list for its value number.
void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
@@ -328,7 +338,6 @@ private:
BasicBlock *Curr, unsigned int ValNo);
Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
- void fillImplicitControlFlowInfo(BasicBlock *BB);
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h
index 3c20537ab76a..b5d544f1149c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h
@@ -23,7 +23,11 @@ class Loop;
class LPMUpdater;
class IndVarSimplifyPass : public PassInfoMixin<IndVarSimplifyPass> {
+ /// Perform IV widening during the pass.
+ bool WidenIndVars;
+
public:
+ IndVarSimplifyPass(bool WidenIndVars = true) : WidenIndVars(WidenIndVars) {}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h
new file mode 100644
index 000000000000..9a56b073f1c6
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h
@@ -0,0 +1,27 @@
+//===- InferAddressSpace.h - ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_INFERADDRESSSPACES_H
+#define LLVM_TRANSFORMS_SCALAR_INFERADDRESSSPACES_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct InferAddressSpacesPass : PassInfoMixin<InferAddressSpacesPass> {
+ InferAddressSpacesPass();
+ InferAddressSpacesPass(unsigned AddressSpace);
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ unsigned FlatAddrSpace = 0;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_INFERADDRESSSPACES_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h
index 0c30b6260536..f36695a8c2b7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h
@@ -36,10 +36,6 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-/// Create a legacy pass that does instruction simplification on each
-/// instruction in a function.
-FunctionPass *createInstSimplifyLegacyPass();
-
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_INSTSIMPLIFYPASS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 327bf6d00c47..951f4e487753 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -19,7 +19,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/DomTreeUpdater.h"
@@ -29,6 +28,7 @@
namespace llvm {
+class AAResults;
class BasicBlock;
class BinaryOperator;
class BranchInst;
@@ -41,6 +41,8 @@ class IntrinsicInst;
class LazyValueInfo;
class LoadInst;
class PHINode;
+class SelectInst;
+class SwitchInst;
class TargetLibraryInfo;
class Value;
@@ -77,7 +79,7 @@ enum ConstantPreference { WantInteger, WantBlockAddress };
class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
- AliasAnalysis *AA;
+ AAResults *AA;
DomTreeUpdater *DTU;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
@@ -91,15 +93,16 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
unsigned BBDupThreshold;
unsigned DefaultBBDupThreshold;
+ bool InsertFreezeWhenUnfoldingSelect;
public:
- JumpThreadingPass(int T = -1);
+ JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1);
// Glue for old PM.
- bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_,
- AliasAnalysis *AA_, DomTreeUpdater *DTU_, bool HasProfileData_,
- std::unique_ptr<BlockFrequencyInfo> BFI_,
- std::unique_ptr<BranchProbabilityInfo> BPI_);
+ bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI,
+ AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData,
+ std::unique_ptr<BlockFrequencyInfo> BFI,
+ std::unique_ptr<BranchProbabilityInfo> BPI);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -108,65 +111,65 @@ public:
BPI.reset();
}
- void FindLoopHeaders(Function &F);
- bool ProcessBlock(BasicBlock *BB);
- bool MaybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB);
- void UpdateSSA(BasicBlock *BB, BasicBlock *NewBB,
+ void findLoopHeaders(Function &F);
+ bool processBlock(BasicBlock *BB);
+ bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB);
+ void updateSSA(BasicBlock *BB, BasicBlock *NewBB,
DenseMap<Instruction *, Value *> &ValueMapping);
- DenseMap<Instruction *, Value *> CloneInstructions(BasicBlock::iterator BI,
+ DenseMap<Instruction *, Value *> cloneInstructions(BasicBlock::iterator BI,
BasicBlock::iterator BE,
BasicBlock *NewBB,
BasicBlock *PredBB);
- bool TryThreadEdge(BasicBlock *BB,
+ bool tryThreadEdge(BasicBlock *BB,
const SmallVectorImpl<BasicBlock *> &PredBBs,
BasicBlock *SuccBB);
- void ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
+ void threadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
BasicBlock *SuccBB);
- bool DuplicateCondBranchOnPHIIntoPred(
+ bool duplicateCondBranchOnPHIIntoPred(
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs);
- bool ComputeValueKnownInPredecessorsImpl(
+ bool computeValueKnownInPredecessorsImpl(
Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result,
jumpthreading::ConstantPreference Preference,
DenseSet<Value *> &RecursionSet, Instruction *CxtI = nullptr);
bool
- ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+ computeValueKnownInPredecessors(Value *V, BasicBlock *BB,
jumpthreading::PredValueInfo &Result,
jumpthreading::ConstantPreference Preference,
Instruction *CxtI = nullptr) {
DenseSet<Value *> RecursionSet;
- return ComputeValueKnownInPredecessorsImpl(V, BB, Result, Preference,
+ return computeValueKnownInPredecessorsImpl(V, BB, Result, Preference,
RecursionSet, CxtI);
}
- Constant *EvaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB,
+ Constant *evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB,
Value *cond);
- bool MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond);
- void ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB,
+ bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond);
+ void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB,
BasicBlock *BB, BasicBlock *SuccBB);
- bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ bool processThreadableEdges(Value *Cond, BasicBlock *BB,
jumpthreading::ConstantPreference Preference,
Instruction *CxtI = nullptr);
- bool ProcessBranchOnPHI(PHINode *PN);
- bool ProcessBranchOnXOR(BinaryOperator *BO);
- bool ProcessImpliedCondition(BasicBlock *BB);
+ bool processBranchOnPHI(PHINode *PN);
+ bool processBranchOnXOR(BinaryOperator *BO);
+ bool processImpliedCondition(BasicBlock *BB);
- bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
- void UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI,
+ bool simplifyPartiallyRedundantLoad(LoadInst *LI);
+ void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI,
PHINode *SIUse, unsigned Idx);
- bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
- bool TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB);
- bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
+ bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+ bool tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB);
+ bool tryToUnfoldSelectInCurrBB(BasicBlock *BB);
- bool ProcessGuards(BasicBlock *BB);
- bool ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI);
+ bool processGuards(BasicBlock *BB);
+ bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI);
private:
- BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+ BasicBlock *splitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
const char *Suffix);
- void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
+ void updateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
BasicBlock *NewBB, BasicBlock *SuccBB);
/// Check if the block has profile metadata for its outgoing edges.
bool doesBlockHaveProfileData(BasicBlock *BB);
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h
new file mode 100644
index 000000000000..41f91f090013
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h
@@ -0,0 +1,32 @@
+//===- LoopFlatten.h - Loop Flatten ---------------- -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface for the Loop Flatten Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H
+
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+class LoopFlattenPass : public PassInfoMixin<LoopFlattenPass> {
+public:
+ LoopFlattenPass() = default;
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index d2fff8bb5743..0c6406d86185 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -23,6 +23,19 @@ namespace llvm {
class Loop;
class LPMUpdater;
+/// Options to disable Loop Idiom Recognize, which can be shared with other
+/// passes.
+struct DisableLIRP {
+ /// When true, the entire pass is disabled.
+ static bool All;
+
+ /// When true, Memset is disabled.
+ static bool Memset;
+
+ /// When true, Memcpy is disabled.
+ static bool Memcpy;
+};
+
/// Performs Loop Idiom Recognize Pass.
class LoopIdiomRecognizePass : public PassInfoMixin<LoopIdiomRecognizePass> {
public:
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h
new file mode 100644
index 000000000000..9f50fc5a4127
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h
@@ -0,0 +1,24 @@
+//===- LoopInterchange.h - Loop interchange pass --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+struct LoopInterchangePass : public PassInfoMixin<LoopInterchangePass> {
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index 9b2f0fcab95b..2a342fcda3c2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -36,41 +36,163 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPPASSMANAGER_H
#define LLVM_TRANSFORMS_SCALAR_LOOPPASSMANAGER_H
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/PriorityWorklist.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include <memory>
namespace llvm {
// Forward declarations of an update tracking API used in the pass manager.
class LPMUpdater;
+namespace {
+
+template <typename PassT>
+using HasRunOnLoopT = decltype(std::declval<PassT>().run(
+ std::declval<Loop &>(), std::declval<LoopAnalysisManager &>(),
+ std::declval<LoopStandardAnalysisResults &>(),
+ std::declval<LPMUpdater &>()));
+
+} // namespace
+
// Explicit specialization and instantiation declarations for the pass manager.
// See the comments on the definition of the specialization for details on how
// it differs from the primary template.
template <>
-PreservedAnalyses
-PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
- LPMUpdater &>::run(Loop &InitialL, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AnalysisResults,
- LPMUpdater &U);
-extern template class PassManager<Loop, LoopAnalysisManager,
- LoopStandardAnalysisResults &, LPMUpdater &>;
+class PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>
+ : public PassInfoMixin<
+ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>> {
+public:
+ /// Construct a pass manager.
+ ///
+ /// If \p DebugLogging is true, we'll log our progress to llvm::dbgs().
+ explicit PassManager(bool DebugLogging = false)
+ : DebugLogging(DebugLogging) {}
+
+ // FIXME: These are equivalent to the default move constructor/move
+ // assignment. However, using = default triggers linker errors due to the
+ // explicit instantiations below. Find a way to use the default and remove the
+ // duplicated code here.
+ PassManager(PassManager &&Arg)
+ : IsLoopNestPass(std::move(Arg.IsLoopNestPass)),
+ LoopPasses(std::move(Arg.LoopPasses)),
+ LoopNestPasses(std::move(Arg.LoopNestPasses)),
+ DebugLogging(std::move(Arg.DebugLogging)) {}
+
+ PassManager &operator=(PassManager &&RHS) {
+ IsLoopNestPass = std::move(RHS.IsLoopNestPass);
+ LoopPasses = std::move(RHS.LoopPasses);
+ LoopNestPasses = std::move(RHS.LoopNestPasses);
+ DebugLogging = std::move(RHS.DebugLogging);
+ return *this;
+ }
+
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+ /// Add either a loop pass or a loop-nest pass to the pass manager. Append \p
+ /// Pass to the list of loop passes if it has a dedicated \fn run() method for
+ /// loops and to the list of loop-nest passes if the \fn run() method is for
+ /// loop-nests instead. Also append whether \p Pass is loop-nest pass or not
+ /// to the end of \var IsLoopNestPass so we can easily identify the types of
+ /// passes in the pass manager later.
+ template <typename PassT>
+ std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(PassT Pass) {
+ using LoopPassModelT =
+ detail::PassModel<Loop, PassT, PreservedAnalyses, LoopAnalysisManager,
+ LoopStandardAnalysisResults &, LPMUpdater &>;
+ IsLoopNestPass.push_back(false);
+ LoopPasses.emplace_back(new LoopPassModelT(std::move(Pass)));
+ }
+
+ template <typename PassT>
+ std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(PassT Pass) {
+ using LoopNestPassModelT =
+ detail::PassModel<LoopNest, PassT, PreservedAnalyses,
+ LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>;
+ IsLoopNestPass.push_back(true);
+ LoopNestPasses.emplace_back(new LoopNestPassModelT(std::move(Pass)));
+ }
+
+ // Specializations of `addPass` for `RepeatedPass`. These are necessary since
+ // `RepeatedPass` has a templated `run` method that will result in incorrect
+ // detection of `HasRunOnLoopT`.
+ template <typename PassT>
+ std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(RepeatedPass<PassT> Pass) {
+ using RepeatedLoopPassModelT =
+ detail::PassModel<Loop, RepeatedPass<PassT>, PreservedAnalyses,
+ LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>;
+ IsLoopNestPass.push_back(false);
+ LoopPasses.emplace_back(new RepeatedLoopPassModelT(std::move(Pass)));
+ }
+
+ template <typename PassT>
+ std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(RepeatedPass<PassT> Pass) {
+ using RepeatedLoopNestPassModelT =
+ detail::PassModel<LoopNest, RepeatedPass<PassT>, PreservedAnalyses,
+ LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>;
+ IsLoopNestPass.push_back(true);
+ LoopNestPasses.emplace_back(
+ new RepeatedLoopNestPassModelT(std::move(Pass)));
+ }
+
+ bool isEmpty() const { return LoopPasses.empty() && LoopNestPasses.empty(); }
+
+ static bool isRequired() { return true; }
+
+ size_t getNumLoopPasses() const { return LoopPasses.size(); }
+ size_t getNumLoopNestPasses() const { return LoopNestPasses.size(); }
+
+protected:
+ using LoopPassConceptT =
+ detail::PassConcept<Loop, LoopAnalysisManager,
+ LoopStandardAnalysisResults &, LPMUpdater &>;
+ using LoopNestPassConceptT =
+ detail::PassConcept<LoopNest, LoopAnalysisManager,
+ LoopStandardAnalysisResults &, LPMUpdater &>;
+
+ // BitVector that identifies whether the passes are loop passes or loop-nest
+ // passes (true for loop-nest passes).
+ BitVector IsLoopNestPass;
+ std::vector<std::unique_ptr<LoopPassConceptT>> LoopPasses;
+ std::vector<std::unique_ptr<LoopNestPassConceptT>> LoopNestPasses;
+
+ /// Flag indicating whether we should do debug logging.
+ bool DebugLogging;
+
+ /// Run either a loop pass or a loop-nest pass. Returns `None` if
+ /// PassInstrumentation's BeforePass returns false. Otherwise, returns the
+ /// preserved analyses of the pass.
+ template <typename IRUnitT, typename PassT>
+ Optional<PreservedAnalyses>
+ runSinglePass(IRUnitT &IR, PassT &Pass, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U,
+ PassInstrumentation &PI);
+
+ PreservedAnalyses runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U);
+ PreservedAnalyses runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U);
+};
/// The Loop pass manager.
///
@@ -103,7 +225,7 @@ using RequireAnalysisLoopPass =
RequireAnalysisPass<AnalysisT, Loop, LoopAnalysisManager,
LoopStandardAnalysisResults &, LPMUpdater &>;
-template <typename LoopPassT> class FunctionToLoopPassAdaptor;
+class FunctionToLoopPassAdaptor;
/// This class provides an interface for updating the loop pass manager based
/// on mutations to the loop nest.
@@ -111,6 +233,13 @@ template <typename LoopPassT> class FunctionToLoopPassAdaptor;
/// A reference to an instance of this class is passed as an argument to each
/// Loop pass, and Loop passes should use it to update LPM infrastructure if
/// they modify the loop nest structure.
+///
+/// \c LPMUpdater comes with two modes: the loop mode and the loop-nest mode. In
+/// loop mode, all the loops in the function will be pushed into the worklist
+/// and when new loops are added to the pipeline, their subloops are also
+/// inserted recursively. On the other hand, in loop-nest mode, only top-level
+/// loops are contained in the worklist and the addition of new (top-level)
+/// loops will not trigger the addition of their subloops.
class LPMUpdater {
public:
/// This can be queried by loop passes which run other loop passes (like pass
@@ -132,6 +261,8 @@ public:
/// state, this routine will mark that the current loop should be skipped by
/// the rest of the pass management infrastructure.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name) {
+ assert((!LoopNestMode || L.isOutermost()) &&
+ "L should be a top-level loop in loop-nest mode.");
LAM.clear(L, Name);
assert((&L == CurrentL || CurrentL->contains(&L)) &&
"Cannot delete a loop outside of the "
@@ -147,6 +278,8 @@ public:
/// loops within them will be visited in postorder as usual for the loop pass
/// manager.
void addChildLoops(ArrayRef<Loop *> NewChildLoops) {
+ assert(!LoopNestMode &&
+ "Child loops should not be pushed in loop-nest mode.");
// Insert ourselves back into the worklist first, as this loop should be
// revisited after all the children have been processed.
Worklist.insert(CurrentL);
@@ -178,7 +311,10 @@ public:
"All of the new loops must be siblings of the current loop!");
#endif
- appendLoopsToWorklist(NewSibLoops, Worklist);
+ if (LoopNestMode)
+ Worklist.insert(NewSibLoops);
+ else
+ appendLoopsToWorklist(NewSibLoops, Worklist);
// No need to skip the current loop or revisit it, as sibling loops
// shouldn't impact anything.
@@ -198,7 +334,7 @@ public:
}
private:
- template <typename LoopPassT> friend class llvm::FunctionToLoopPassAdaptor;
+ friend class llvm::FunctionToLoopPassAdaptor;
/// The \c FunctionToLoopPassAdaptor's worklist of loops to process.
SmallPriorityWorklist<Loop *, 4> &Worklist;
@@ -208,6 +344,7 @@ private:
Loop *CurrentL;
bool SkipCurrentLoop;
+ const bool LoopNestMode;
#ifndef NDEBUG
// In debug builds we also track the parent loop to implement asserts even in
@@ -216,10 +353,33 @@ private:
#endif
LPMUpdater(SmallPriorityWorklist<Loop *, 4> &Worklist,
- LoopAnalysisManager &LAM)
- : Worklist(Worklist), LAM(LAM) {}
+ LoopAnalysisManager &LAM, bool LoopNestMode = false)
+ : Worklist(Worklist), LAM(LAM), LoopNestMode(LoopNestMode) {}
};
+template <typename IRUnitT, typename PassT>
+Optional<PreservedAnalyses> LoopPassManager::runSinglePass(
+ IRUnitT &IR, PassT &Pass, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U, PassInstrumentation &PI) {
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns false).
+ if (!PI.runBeforePass<IRUnitT>(*Pass, IR))
+ return None;
+
+ PreservedAnalyses PA;
+ {
+ TimeTraceScope TimeScope(Pass->name(), IR.getName());
+ PA = Pass->run(IR, AM, AR, U);
+ }
+
+ // do not pass deleted Loop into the instrumentation
+ if (U.skipCurrentLoop())
+ PI.runAfterPassInvalidated<IRUnitT>(*Pass, PA);
+ else
+ PI.runAfterPass<IRUnitT>(*Pass, IR, PA);
+ return PA;
+}
+
/// Adaptor that maps from a function to its loops.
///
/// Designed to allow composition of a LoopPass(Manager) and a
@@ -227,161 +387,107 @@ private:
/// FunctionAnalysisManager it will run the \c LoopAnalysisManagerFunctionProxy
/// analysis prior to running the loop passes over the function to enable a \c
/// LoopAnalysisManager to be used within this run safely.
-template <typename LoopPassT>
+///
+/// The adaptor comes with two modes: the loop mode and the loop-nest mode, and
+/// the worklist updater lived inside will be in the same mode as the adaptor
+/// (refer to the documentation of \c LPMUpdater for more detailed explanation).
+/// Specifically, in loop mode, all loops in the funciton will be pushed into
+/// the worklist and processed by \p Pass, while only top-level loops are
+/// processed in loop-nest mode. Please refer to the various specializations of
+/// \fn createLoopFunctionToLoopPassAdaptor to see when loop mode and loop-nest
+/// mode are used.
class FunctionToLoopPassAdaptor
- : public PassInfoMixin<FunctionToLoopPassAdaptor<LoopPassT>> {
+ : public PassInfoMixin<FunctionToLoopPassAdaptor> {
public:
- explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false,
- bool DebugLogging = false)
+ using PassConceptT =
+ detail::PassConcept<Loop, LoopAnalysisManager,
+ LoopStandardAnalysisResults &, LPMUpdater &>;
+
+ explicit FunctionToLoopPassAdaptor(std::unique_ptr<PassConceptT> Pass,
+ bool UseMemorySSA = false,
+ bool UseBlockFrequencyInfo = false,
+ bool DebugLogging = false,
+ bool LoopNestMode = false)
: Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging),
- UseMemorySSA(UseMemorySSA) {
+ UseMemorySSA(UseMemorySSA),
+ UseBlockFrequencyInfo(UseBlockFrequencyInfo),
+ LoopNestMode(LoopNestMode) {
LoopCanonicalizationFPM.addPass(LoopSimplifyPass());
LoopCanonicalizationFPM.addPass(LCSSAPass());
}
/// Runs the loop passes across every loop in the function.
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
- // Before we even compute any loop analyses, first run a miniature function
- // pass pipeline to put loops into their canonical form. Note that we can
- // directly build up function analyses after this as the function pass
- // manager handles all the invalidation at that layer.
- PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(F);
-
- PreservedAnalyses PA = PreservedAnalyses::all();
- // Check the PassInstrumentation's BeforePass callbacks before running the
- // canonicalization pipeline.
- if (PI.runBeforePass<Function>(LoopCanonicalizationFPM, F)) {
- PA = LoopCanonicalizationFPM.run(F, AM);
- PI.runAfterPass<Function>(LoopCanonicalizationFPM, F);
- }
-
- // Get the loop structure for this function
- LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
-
- // If there are no loops, there is nothing to do here.
- if (LI.empty())
- return PA;
-
- // Get the analysis results needed by loop passes.
- MemorySSA *MSSA = UseMemorySSA
- ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA())
- : nullptr;
- LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
- AM.getResult<AssumptionAnalysis>(F),
- AM.getResult<DominatorTreeAnalysis>(F),
- AM.getResult<LoopAnalysis>(F),
- AM.getResult<ScalarEvolutionAnalysis>(F),
- AM.getResult<TargetLibraryAnalysis>(F),
- AM.getResult<TargetIRAnalysis>(F),
- MSSA};
-
- // Setup the loop analysis manager from its proxy. It is important that
- // this is only done when there are loops to process and we have built the
- // LoopStandardAnalysisResults object. The loop analyses cached in this
- // manager have access to those analysis results and so it must invalidate
- // itself when they go away.
- auto &LAMFP = AM.getResult<LoopAnalysisManagerFunctionProxy>(F);
- if (UseMemorySSA)
- LAMFP.markMSSAUsed();
- LoopAnalysisManager &LAM = LAMFP.getManager();
-
- // A postorder worklist of loops to process.
- SmallPriorityWorklist<Loop *, 4> Worklist;
-
- // Register the worklist and loop analysis manager so that loop passes can
- // update them when they mutate the loop nest structure.
- LPMUpdater Updater(Worklist, LAM);
-
- // Add the loop nests in the reverse order of LoopInfo. See method
- // declaration.
- appendLoopsToWorklist(LI, Worklist);
-
- do {
- Loop *L = Worklist.pop_back_val();
-
- // Reset the update structure for this loop.
- Updater.CurrentL = L;
- Updater.SkipCurrentLoop = false;
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-#ifndef NDEBUG
- // Save a parent loop pointer for asserts.
- Updater.ParentL = L->getParentLoop();
+ static bool isRequired() { return true; }
- // Verify the loop structure and LCSSA form before visiting the loop.
- L->verifyLoop();
- assert(L->isRecursivelyLCSSAForm(LAR.DT, LI) &&
- "Loops must remain in LCSSA form!");
-#endif
- // Check the PassInstrumentation's BeforePass callbacks before running the
- // pass, skip its execution completely if asked to (callback returns
- // false).
- if (!PI.runBeforePass<Loop>(Pass, *L))
- continue;
-
- PreservedAnalyses PassPA;
- {
- TimeTraceScope TimeScope(Pass.name());
- PassPA = Pass.run(*L, LAM, LAR, Updater);
- }
-
- // Do not pass deleted Loop into the instrumentation.
- if (Updater.skipCurrentLoop())
- PI.runAfterPassInvalidated<Loop>(Pass);
- else
- PI.runAfterPass<Loop>(Pass, *L);
-
- // FIXME: We should verify the set of analyses relevant to Loop passes
- // are preserved.
-
- // If the loop hasn't been deleted, we need to handle invalidation here.
- if (!Updater.skipCurrentLoop())
- // We know that the loop pass couldn't have invalidated any other
- // loop's analyses (that's the contract of a loop pass), so directly
- // handle the loop analysis manager's invalidation here.
- LAM.invalidate(*L, PassPA);
-
- // Then intersect the preserved set so that invalidation of module
- // analyses will eventually occur when the module pass completes.
- PA.intersect(std::move(PassPA));
- } while (!Worklist.empty());
-
- // By definition we preserve the proxy. We also preserve all analyses on
- // Loops. This precludes *any* invalidation of loop analyses by the proxy,
- // but that's OK because we've taken care to invalidate analyses in the
- // loop analysis manager incrementally above.
- PA.preserveSet<AllAnalysesOn<Loop>>();
- PA.preserve<LoopAnalysisManagerFunctionProxy>();
- // We also preserve the set of standard analyses.
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LoopAnalysis>();
- PA.preserve<ScalarEvolutionAnalysis>();
- if (UseMemorySSA)
- PA.preserve<MemorySSAAnalysis>();
- // FIXME: What we really want to do here is preserve an AA category, but
- // that concept doesn't exist yet.
- PA.preserve<AAManager>();
- PA.preserve<BasicAA>();
- PA.preserve<GlobalsAA>();
- PA.preserve<SCEVAA>();
- return PA;
- }
+ bool isLoopNestMode() const { return LoopNestMode; }
private:
- LoopPassT Pass;
+ std::unique_ptr<PassConceptT> Pass;
FunctionPassManager LoopCanonicalizationFPM;
bool UseMemorySSA = false;
+ bool UseBlockFrequencyInfo = false;
+ const bool LoopNestMode;
};
/// A function to deduce a loop pass type and wrap it in the templated
/// adaptor.
+///
+/// If \p Pass is a loop pass, the returned adaptor will be in loop mode.
template <typename LoopPassT>
-FunctionToLoopPassAdaptor<LoopPassT>
+inline std::enable_if_t<is_detected<HasRunOnLoopT, LoopPassT>::value,
+ FunctionToLoopPassAdaptor>
createFunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false,
+ bool UseBlockFrequencyInfo = false,
+ bool DebugLogging = false) {
+ using PassModelT =
+ detail::PassModel<Loop, LoopPassT, PreservedAnalyses, LoopAnalysisManager,
+ LoopStandardAnalysisResults &, LPMUpdater &>;
+ return FunctionToLoopPassAdaptor(
+ std::make_unique<PassModelT>(std::move(Pass)), UseMemorySSA,
+ UseBlockFrequencyInfo, DebugLogging, false);
+}
+
+/// If \p Pass is a loop-nest pass, \p Pass will first be wrapped into a
+/// \c LoopPassManager and the returned adaptor will be in loop-nest mode.
+template <typename LoopNestPassT>
+inline std::enable_if_t<!is_detected<HasRunOnLoopT, LoopNestPassT>::value,
+ FunctionToLoopPassAdaptor>
+createFunctionToLoopPassAdaptor(LoopNestPassT Pass, bool UseMemorySSA = false,
+ bool UseBlockFrequencyInfo = false,
bool DebugLogging = false) {
- return FunctionToLoopPassAdaptor<LoopPassT>(std::move(Pass), UseMemorySSA,
- DebugLogging);
+ LoopPassManager LPM(DebugLogging);
+ LPM.addPass(std::move(Pass));
+ using PassModelT =
+ detail::PassModel<Loop, LoopPassManager, PreservedAnalyses,
+ LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>;
+ return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)),
+ UseMemorySSA, UseBlockFrequencyInfo,
+ DebugLogging, true);
+}
+
+/// If \p Pass is an instance of \c LoopPassManager, the returned adaptor will
+/// be in loop-nest mode if the pass manager contains only loop-nest passes.
+template <>
+inline FunctionToLoopPassAdaptor
+createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager LPM,
+ bool UseMemorySSA,
+ bool UseBlockFrequencyInfo,
+ bool DebugLogging) {
+ // Check if LPM contains any loop pass and if it does not, returns an adaptor
+ // in loop-nest mode.
+ using PassModelT =
+ detail::PassModel<Loop, LoopPassManager, PreservedAnalyses,
+ LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>;
+ bool LoopNestMode = (LPM.getNumLoopPasses() == 0);
+ return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)),
+ UseMemorySSA, UseBlockFrequencyInfo,
+ DebugLogging, LoopNestMode);
}
/// Pass for printing a loop's contents as textual IR.
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopReroll.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopReroll.h
new file mode 100644
index 000000000000..6ae309e48a28
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopReroll.h
@@ -0,0 +1,27 @@
+//===- LoopReroll.h - Loop rerolling pass ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPREROLL_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPREROLL_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+class Function;
+
+class LoopRerollPass : public PassInfoMixin<LoopRerollPass> {
+public:
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPREROLL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopRotation.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
index 254e6072906a..f68ac70da324 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
@@ -22,12 +22,14 @@ namespace llvm {
/// A simple loop rotation transformation.
class LoopRotatePass : public PassInfoMixin<LoopRotatePass> {
public:
- LoopRotatePass(bool EnableHeaderDuplication = true);
+ LoopRotatePass(bool EnableHeaderDuplication = true,
+ bool PrepareForLTO = false);
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
private:
const bool EnableHeaderDuplication;
+ const bool PrepareForLTO;
};
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index 7b049bdc8ad1..30cc08cb42ae 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -22,7 +22,7 @@ class Function;
class Loop;
class LPMUpdater;
-/// Loop unroll pass that only does full loop unrolling.
+/// Loop unroll pass that only does full loop unrolling and peeling.
class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
const int OptLevel;
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h
new file mode 100644
index 000000000000..87d6d6759db2
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h
@@ -0,0 +1,25 @@
+//===- LoopVersioningLICM.h - LICM Loop Versioning ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+class LoopVersioningLICMPass : public PassInfoMixin<LoopVersioningLICMPass> {
+public:
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &LAR, LPMUpdater &U);
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h
index 40f8ca571f19..1d5550829f93 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h
@@ -22,6 +22,7 @@ namespace llvm {
class LowerAtomicPass : public PassInfoMixin<LowerAtomicPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+ static bool isRequired() { return true; }
};
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
index 4e47ff70d557..22b2e649e4d4 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
@@ -17,6 +17,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
namespace llvm {
@@ -31,6 +32,8 @@ struct LowerExpectIntrinsicPass : PassInfoMixin<LowerExpectIntrinsicPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
};
+extern cl::opt<uint32_t> LikelyBranchWeight;
+extern cl::opt<uint32_t> UnlikelyBranchWeight;
}
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
index 2f75cd5017aa..a2a31d302ccb 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
@@ -16,8 +16,14 @@
#include "llvm/IR/PassManager.h"
namespace llvm {
-struct LowerMatrixIntrinsicsPass : PassInfoMixin<LowerMatrixIntrinsicsPass> {
+class LowerMatrixIntrinsicsPass
+ : public PassInfoMixin<LowerMatrixIntrinsicsPass> {
+ bool Minimal;
+
+public:
+ LowerMatrixIntrinsicsPass(bool Minimal = false) : Minimal(Minimal) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 8fc6c23e6944..635b706d0bef 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -14,7 +14,6 @@
#ifndef LLVM_TRANSFORMS_SCALAR_MEMCPYOPTIMIZER_H
#define LLVM_TRANSFORMS_SCALAR_MEMCPYOPTIMIZER_H
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/PassManager.h"
#include <cstdint>
@@ -22,14 +21,19 @@
namespace llvm {
+class AAResults;
class AssumptionCache;
+class CallBase;
class CallInst;
class DominatorTree;
class Function;
class Instruction;
+class LoadInst;
class MemCpyInst;
class MemMoveInst;
class MemoryDependenceResults;
+class MemorySSA;
+class MemorySSAUpdater;
class MemSetInst;
class StoreInst;
class TargetLibraryInfo;
@@ -38,9 +42,11 @@ class Value;
class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
MemoryDependenceResults *MD = nullptr;
TargetLibraryInfo *TLI = nullptr;
- std::function<AliasAnalysis &()> LookupAliasAnalysis;
- std::function<AssumptionCache &()> LookupAssumptionCache;
- std::function<DominatorTree &()> LookupDomTree;
+ AAResults *AA = nullptr;
+ AssumptionCache *AC = nullptr;
+ DominatorTree *DT = nullptr;
+ MemorySSA *MSSA = nullptr;
+ MemorySSAUpdater *MSSAU = nullptr;
public:
MemCpyOptPass() = default;
@@ -48,11 +54,9 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
// Glue for the old PM.
- bool runImpl(Function &F, MemoryDependenceResults *MD_,
- TargetLibraryInfo *TLI_,
- std::function<AliasAnalysis &()> LookupAliasAnalysis_,
- std::function<AssumptionCache &()> LookupAssumptionCache_,
- std::function<DominatorTree &()> LookupDomTree_);
+ bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI,
+ AAResults *AA, AssumptionCache *AC, DominatorTree *DT,
+ MemorySSA *MSSA);
private:
// Helper functions
@@ -60,15 +64,18 @@ private:
bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
bool processMemMove(MemMoveInst *M);
- bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
- uint64_t cpyLen, Align cpyAlign, CallInst *C);
+ bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
+ Value *cpyDst, Value *cpySrc, uint64_t cpyLen,
+ Align cpyAlign, CallInst *C);
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet);
bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet);
bool processByValArgument(CallBase &CB, unsigned ArgNo);
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
Value *ByteVal);
+ bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI);
+ void eraseInstruction(Instruction *I);
bool iterateOnFunction(Function &F);
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h
index 26f5fe185dd5..5fa7427b2603 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h
@@ -114,7 +114,7 @@ private:
bool doOneIteration(Function &F);
// Reassociates I for better CSE.
- Instruction *tryReassociate(Instruction *I);
+ Instruction *tryReassociate(Instruction *I, const SCEV *&OrigSCEV);
// Reassociate GEP for better CSE.
Instruction *tryReassociateGEP(GetElementPtrInst *GEP);
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/Reg2Mem.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/Reg2Mem.h
new file mode 100644
index 000000000000..25f6563d7dcf
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/Reg2Mem.h
@@ -0,0 +1,27 @@
+//===- Reg2Mem.h - Convert registers to allocas -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface for the RegToMem Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_REG2MEM_H
+#define LLVM_TRANSFORMS_SCALAR_REG2MEM_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class RegToMemPass : public PassInfoMixin<RegToMemPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_REG2MEM_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SROA.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SROA.h
index 864a0cbd9db1..6ef7c6b22c0b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SROA.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SROA.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueHandle.h"
#include <vector>
namespace llvm {
@@ -77,8 +78,8 @@ class SROA : public PassInfoMixin<SROA> {
/// A collection of instructions to delete.
/// We try to batch deletions to simplify code and make things a bit more
- /// efficient.
- SetVector<Instruction *, SmallVector<Instruction *, 8>> DeadInsts;
+ /// efficient. We also make sure there is no dangling pointers.
+ SmallVector<WeakVH, 8> DeadInsts;
/// Post-promotion worklist.
///
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h
new file mode 100644
index 000000000000..19339ca13242
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h
@@ -0,0 +1,29 @@
+//===- ScalarizeMaskedMemIntrin.h - Scalarize unsupported masked mem ----===//
+// instrinsics
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces masked memory intrinsics - when unsupported by the target
+// - with a chain of basic blocks, that deal with the elements one-by-one if the
+// appropriate mask bit is set.
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_TRANSFORMS_SCALAR_SCALARIZE_MASKED_MEMINTRIN_H
+#define LLVM_TRANSFORMS_SCALAR_SCALARIZE_MASKED_MEMINTRIN_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct ScalarizeMaskedMemIntrinPass
+ : public PassInfoMixin<ScalarizeMaskedMemIntrinPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
new file mode 100644
index 000000000000..5bd6ce164dc3
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
@@ -0,0 +1,27 @@
+//===- SeparateConstOffsetFromGEP.h ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SEPARATECONSTOFFSETFROMGEP_H
+#define LLVM_TRANSFORMS_SCALAR_SEPARATECONSTOFFSETFROMGEP_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class SeparateConstOffsetFromGEPPass
+ : public PassInfoMixin<SeparateConstOffsetFromGEPPass> {
+ bool LowerGEP;
+
+public:
+ SeparateConstOffsetFromGEPPass(bool LowerGEP = false) : LowerGEP(LowerGEP) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_SEPARATECONSTOFFSETFROMGEP_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index f9792d38bbe6..7c5393851ae6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -14,9 +14,9 @@
#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
#define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
namespace llvm {
@@ -34,13 +34,7 @@ public:
/// rather than optimal IR. That is, by default we bypass transformations that
/// are likely to improve performance but make analysis for other passes more
/// difficult.
- SimplifyCFGPass()
- : SimplifyCFGPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(false)
- .convertSwitchToLookupTable(false)
- .needCanonicalLoops(true)
- .sinkCommonInsts(false)) {}
-
+ SimplifyCFGPass();
/// Construct a pass with optional optimizations.
SimplifyCFGPass(const SimplifyCFGOptions &PassOptions);
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/StraightLineStrengthReduce.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/StraightLineStrengthReduce.h
new file mode 100644
index 000000000000..11233cc65efa
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/StraightLineStrengthReduce.h
@@ -0,0 +1,24 @@
+//===- StraightLineStrengthReduce.h - -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_STRAIGHTLINESTRENGTHREDUCE_H
+#define LLVM_TRANSFORMS_SCALAR_STRAIGHTLINESTRENGTHREDUCE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class StraightLineStrengthReducePass
+ : public PassInfoMixin<StraightLineStrengthReducePass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_STRAIGHTLINESTRENGTHREDUCE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
new file mode 100644
index 000000000000..50d41acd529e
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
@@ -0,0 +1,20 @@
+//===- StructurizeCFG.h ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_STRUCTURIZECFG_H
+#define LLVM_TRANSFORMS_SCALAR_STRUCTURIZECFG_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+struct StructurizeCFGPass : PassInfoMixin<StructurizeCFGPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_STRUCTURIZECFG_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h
index 75edefac1cbd..9162a86183db 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h
@@ -117,7 +117,7 @@ extern char &LoopSimplifyID;
/// This function returns a new pass that downgrades the debug info in the
/// module to line tables only.
-ModulePass *createStripNonLineTableDebugInfoPass();
+ModulePass *createStripNonLineTableDebugLegacyPass();
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 0a63654feb98..1dda73913826 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -74,7 +74,7 @@ bool EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr,
/// in it, fold them away. This handles the case when all entries to the PHI
/// nodes in a block are guaranteed equal, such as when the block has exactly
/// one predecessor.
-void FoldSingleEntryPHINodes(BasicBlock *BB,
+bool FoldSingleEntryPHINodes(BasicBlock *BB,
MemoryDependenceResults *MemDep = nullptr);
/// Examine each PHI in the given block and delete it if it is dead. Also
@@ -196,7 +196,8 @@ struct CriticalEdgeSplittingOptions {
/// to.
BasicBlock *SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
const CriticalEdgeSplittingOptions &Options =
- CriticalEdgeSplittingOptions());
+ CriticalEdgeSplittingOptions(),
+ const Twine &BBName = "");
inline BasicBlock *
SplitCriticalEdge(BasicBlock *BB, succ_iterator SI,
@@ -244,19 +245,71 @@ unsigned SplitAllCriticalEdges(Function &F,
const CriticalEdgeSplittingOptions &Options =
CriticalEdgeSplittingOptions());
-/// Split the edge connecting specified block.
+/// Split the edge connecting the specified blocks, and return the newly created
+/// basic block between \p From and \p To.
BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To,
DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
- MemorySSAUpdater *MSSAU = nullptr);
+ MemorySSAUpdater *MSSAU = nullptr,
+ const Twine &BBName = "");
-/// Split the specified block at the specified instruction - everything before
-/// SplitPt stays in Old and everything starting with SplitPt moves to a new
-/// block. The two blocks are joined by an unconditional branch and the loop
-/// info is updated.
+/// Split the specified block at the specified instruction.
+///
+/// If \p Before is true, splitBlockBefore handles the block
+/// splitting. Otherwise, execution proceeds as described below.
+///
+/// Everything before \p SplitPt stays in \p Old and everything starting with \p
+/// SplitPt moves to a new block. The two blocks are joined by an unconditional
+/// branch. The new block with name \p BBName is returned.
+///
+/// FIXME: deprecated, switch to the DomTreeUpdater-based one.
+BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT,
+ LoopInfo *LI = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr,
+ const Twine &BBName = "", bool Before = false);
+
+/// Split the specified block at the specified instruction.
+///
+/// If \p Before is true, splitBlockBefore handles the block
+/// splitting. Otherwise, execution proceeds as described below.
+///
+/// Everything before \p SplitPt stays in \p Old and everything starting with \p
+/// SplitPt moves to a new block. The two blocks are joined by an unconditional
+/// branch. The new block with name \p BBName is returned.
BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt,
- DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
+ DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr,
MemorySSAUpdater *MSSAU = nullptr,
- const Twine &BBName = "");
+ const Twine &BBName = "", bool Before = false);
+
+/// Split the specified block at the specified instruction \p SplitPt.
+/// All instructions before \p SplitPt are moved to a new block and all
+/// instructions after \p SplitPt stay in the old block. The new block and the
+/// old block are joined by inserting an unconditional branch to the end of the
+/// new block. The new block with name \p BBName is returned.
+BasicBlock *splitBlockBefore(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, const Twine &BBName = "");
+
+/// This method introduces at least one new basic block into the function and
+/// moves some of the predecessors of BB to be predecessors of the new block.
+/// The new predecessors are indicated by the Preds array. The new block is
+/// given a suffix of 'Suffix'. Returns new basic block to which predecessors
+/// from Preds are now pointing.
+///
+/// If BB is a landingpad block then additional basicblock might be introduced.
+/// It will have Suffix+".split_lp". See SplitLandingPadPredecessors for more
+/// details on this case.
+///
+/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but
+/// no other analyses. In particular, it does not preserve LoopSimplify
+/// (because it's complicated to handle the case where one of the edges being
+/// split is an exit of a loop with other exits).
+///
+/// FIXME: deprecated, switch to the DomTreeUpdater-based one.
+BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, DominatorTree *DT,
+ LoopInfo *LI = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr,
+ bool PreserveLCSSA = false);
/// This method introduces at least one new basic block into the function and
/// moves some of the predecessors of BB to be predecessors of the new block.
@@ -274,7 +327,7 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt,
/// split is an exit of a loop with other exits).
BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
const char *Suffix,
- DominatorTree *DT = nullptr,
+ DomTreeUpdater *DTU = nullptr,
LoopInfo *LI = nullptr,
MemorySSAUpdater *MSSAU = nullptr,
bool PreserveLCSSA = false);
@@ -290,10 +343,31 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
/// no other analyses. In particular, it does not preserve LoopSimplify
/// (because it's complicated to handle the case where one of the edges being
/// split is an exit of a loop with other exits).
+///
+/// FIXME: deprecated, switch to the DomTreeUpdater-based one.
+void SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, const char *Suffix2,
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ DominatorTree *DT, LoopInfo *LI = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr,
+ bool PreserveLCSSA = false);
+
+/// This method transforms the landing pad, OrigBB, by introducing two new basic
+/// blocks into the function. One of those new basic blocks gets the
+/// predecessors listed in Preds. The other basic block gets the remaining
+/// predecessors of OrigBB. The landingpad instruction OrigBB is clone into both
+/// of the new basic blocks. The new blocks are given the suffixes 'Suffix1' and
+/// 'Suffix2', and are returned in the NewBBs vector.
+///
+/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but
+/// no other analyses. In particular, it does not preserve LoopSimplify
+/// (because it's complicated to handle the case where one of the edges being
+/// split is an exit of a loop with other exits).
void SplitLandingPadPredecessors(
BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix,
const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs,
- DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
+ DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr,
MemorySSAUpdater *MSSAU = nullptr, bool PreserveLCSSA = false);
/// This method duplicates the specified return instruction into a predecessor
@@ -325,10 +399,39 @@ ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
/// Returns the NewBasicBlock's terminator.
///
/// Updates DT and LI if given.
+///
+/// FIXME: deprecated, switch to the DomTreeUpdater-based one.
+Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
+ bool Unreachable, MDNode *BranchWeights,
+ DominatorTree *DT,
+ LoopInfo *LI = nullptr,
+ BasicBlock *ThenBlock = nullptr);
+
+/// Split the containing block at the specified instruction - everything before
+/// SplitBefore stays in the old basic block, and the rest of the instructions
+/// in the BB are moved to a new block. The two blocks are connected by a
+/// conditional branch (with value of Cmp being the condition).
+/// Before:
+/// Head
+/// SplitBefore
+/// Tail
+/// After:
+/// Head
+/// if (Cond)
+/// ThenBlock
+/// SplitBefore
+/// Tail
+///
+/// If \p ThenBlock is not specified, a new block will be created for it.
+/// If \p Unreachable is true, the newly created block will end with
+/// UnreachableInst, otherwise it branches to Tail.
+/// Returns the NewBasicBlock's terminator.
+///
+/// Updates DT and LI if given.
Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
bool Unreachable,
MDNode *BranchWeights = nullptr,
- DominatorTree *DT = nullptr,
+ DomTreeUpdater *DTU = nullptr,
LoopInfo *LI = nullptr,
BasicBlock *ThenBlock = nullptr);
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 90517e806e02..e7d41933a6c9 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -96,6 +96,10 @@ namespace llvm {
IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI);
+ /// Emit a call to the mempcpy function.
+ Value *emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI);
+
/// Emit a call to the memchr function. This assumes that Ptr is a pointer,
/// Val is an i32 value, and Len is an 'intptr_t' value.
Value *emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
index 22954b469186..f8211d60938e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
@@ -87,7 +87,7 @@ public:
/// If a new function was created by outlining, this method can be called
/// to update the call graph for the new function. Note that the old one
/// still needs to be re-analyzed or manually updated.
- void registerOutlinedFunction(Function &NewFn);
+ void registerOutlinedFunction(Function &OriginalFn, Function &NewFn);
/// Replace \p OldFn in the call graph (and SCC) with \p NewFn. The uses
/// outside the call graph and the function \p OldFn are not modified.
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Cloning.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Cloning.h
index dffb7801bc8e..aa960c625630 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -268,6 +268,49 @@ void updateProfileCallee(
Function *Callee, int64_t entryDelta,
const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr);
+/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
+/// basic blocks and extract their scope. These are candidates for duplication
+/// when cloning.
+void identifyNoAliasScopesToClone(
+ ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes);
+
+/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
+/// instruction range and extract their scope. These are candidates for
+/// duplication when cloning.
+void identifyNoAliasScopesToClone(
+ BasicBlock::iterator Start, BasicBlock::iterator End,
+ SmallVectorImpl<MDNode *> &NoAliasDeclScopes);
+
+/// Duplicate the specified list of noalias decl scopes.
+/// The 'Ext' string is added as an extension to the name.
+/// Afterwards, the ClonedScopes contains the mapping of the original scope
+/// MDNode onto the cloned scope.
+/// Be aware that the cloned scopes are still part of the original scope domain.
+void cloneNoAliasScopes(
+ ArrayRef<MDNode *> NoAliasDeclScopes,
+ DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ StringRef Ext, LLVMContext &Context);
+
+/// Adapt the metadata for the specified instruction according to the
+/// provided mapping. This is normally used after cloning an instruction, when
+/// some noalias scopes needed to be cloned.
+void adaptNoAliasScopes(
+ llvm::Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ LLVMContext &Context);
+
+/// Clone the specified noalias decl scopes. Then adapt all instructions in the
+/// NewBlocks basicblocks to the cloned versions.
+/// 'Ext' will be added to the duplicate scope names.
+void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
+ ArrayRef<BasicBlock *> NewBlocks,
+ LLVMContext &Context, StringRef Ext);
+
+/// Clone the specified noalias decl scopes. Then adapt all instructions in the
+/// [IStart, IEnd] (IEnd included !) range to the cloned versions. 'Ext' will be
+/// added to the duplicate scope names.
+void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
+ Instruction *IStart, Instruction *IEnd,
+ LLVMContext &Context, StringRef Ext);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CLONING_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Debugify.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Debugify.h
index 6f11d0a7d062..30e7d8e87adf 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Debugify.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Debugify.h
@@ -13,8 +13,11 @@
#ifndef LLVM_TRANSFORM_UTILS_DEBUGIFY_H
#define LLVM_TRANSFORM_UTILS_DEBUGIFY_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitcode/BitcodeWriterPass.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
@@ -37,8 +40,6 @@ bool applyDebugifyMetadata(
/// Returns true if any change was made.
bool stripDebugifyMetadata(Module &M);
-} // namespace llvm
-
llvm::ModulePass *createDebugifyModulePass();
llvm::FunctionPass *createDebugifyFunctionPass();
@@ -74,6 +75,8 @@ struct DebugifyStatistics {
/// Map pass names to a per-pass DebugifyStatistics instance.
using DebugifyStatsMap = llvm::MapVector<llvm::StringRef, DebugifyStatistics>;
+void exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map);
+
llvm::ModulePass *
createCheckDebugifyModulePass(bool Strip = false,
llvm::StringRef NameOfWrappedPass = "",
@@ -89,4 +92,60 @@ struct NewPMCheckDebugifyPass
llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &AM);
};
+struct DebugifyEachInstrumentation {
+ DebugifyStatsMap StatsMap;
+
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+};
+
+/// DebugifyCustomPassManager wraps each pass with the debugify passes if
+/// needed.
+/// NOTE: We support legacy custom pass manager only.
+/// TODO: Add New PM support for custom pass manager.
+class DebugifyCustomPassManager : public legacy::PassManager {
+ DebugifyStatsMap DIStatsMap;
+ bool EnableDebugifyEach = false;
+
+public:
+ using super = legacy::PassManager;
+
+ void add(Pass *P) override {
+ // Wrap each pass with (-check)-debugify passes if requested, making
+ // exceptions for passes which shouldn't see -debugify instrumentation.
+ bool WrapWithDebugify = EnableDebugifyEach && !P->getAsImmutablePass() &&
+ !isIRPrintingPass(P) && !isBitcodeWriterPass(P);
+ if (!WrapWithDebugify) {
+ super::add(P);
+ return;
+ }
+
+ // Apply -debugify/-check-debugify before/after each pass and collect
+ // debug info loss statistics.
+ PassKind Kind = P->getPassKind();
+ StringRef Name = P->getPassName();
+
+ // TODO: Implement Debugify for LoopPass.
+ switch (Kind) {
+ case PT_Function:
+ super::add(createDebugifyFunctionPass());
+ super::add(P);
+ super::add(createCheckDebugifyFunctionPass(true, Name, &DIStatsMap));
+ break;
+ case PT_Module:
+ super::add(createDebugifyModulePass());
+ super::add(P);
+ super::add(createCheckDebugifyModulePass(true, Name, &DIStatsMap));
+ break;
+ default:
+ super::add(P);
+ break;
+ }
+ }
+
+ void enableDebugifyEach() { EnableDebugifyEach = true; }
+
+ const DebugifyStatsMap &getDebugifyStatsMap() const { return DIStatsMap; }
+};
+} // namespace llvm
+
#endif // LLVM_TRANSFORM_UTILS_DEBUGIFY_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/FixIrreducible.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/FixIrreducible.h
new file mode 100644
index 000000000000..0c00b7bdbaf9
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/FixIrreducible.h
@@ -0,0 +1,20 @@
+//===- FixIrreducible.h - Convert irreducible control-flow into loops -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_FIXIRREDUCIBLE_H
+#define LLVM_TRANSFORMS_UTILS_FIXIRREDUCIBLE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+struct FixIrreduciblePass : PassInfoMixin<FixIrreduciblePass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_FIXIRREDUCIBLE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/InstructionNamer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/InstructionNamer.h
new file mode 100644
index 000000000000..4f4cc2666f10
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/InstructionNamer.h
@@ -0,0 +1,20 @@
+//===- InstructionNamer.h - Give anonymous instructions names -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_INSTRUCTIONNAMER_H
+#define LLVM_TRANSFORMS_UTILS_INSTRUCTIONNAMER_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+struct InstructionNamerPass : PassInfoMixin<InstructionNamerPass> {
+ PreservedAnalyses run(Function &, FunctionAnalysisManager &);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_INSTRUCTIONNAMER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h
index f55e336f1f6a..c712dda483e4 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h
@@ -16,7 +16,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/Utils/Local.h"
@@ -30,6 +29,8 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <cstdint>
#include <limits>
@@ -58,73 +59,6 @@ class StoreInst;
class TargetLibraryInfo;
class TargetTransformInfo;
-/// A set of parameters used to control the transforms in the SimplifyCFG pass.
-/// Options may change depending on the position in the optimization pipeline.
-/// For example, canonical form that includes switches and branches may later be
-/// replaced by lookup tables and selects.
-struct SimplifyCFGOptions {
- int BonusInstThreshold;
- bool ForwardSwitchCondToPhi;
- bool ConvertSwitchToLookupTable;
- bool NeedCanonicalLoop;
- bool SinkCommonInsts;
- bool SimplifyCondBranch;
- bool FoldTwoEntryPHINode;
-
- AssumptionCache *AC;
-
- SimplifyCFGOptions(unsigned BonusThreshold = 1,
- bool ForwardSwitchCond = false,
- bool SwitchToLookup = false, bool CanonicalLoops = true,
- bool SinkCommon = false,
- AssumptionCache *AssumpCache = nullptr,
- bool SimplifyCondBranch = true,
- bool FoldTwoEntryPHINode = true)
- : BonusInstThreshold(BonusThreshold),
- ForwardSwitchCondToPhi(ForwardSwitchCond),
- ConvertSwitchToLookupTable(SwitchToLookup),
- NeedCanonicalLoop(CanonicalLoops),
- SinkCommonInsts(SinkCommon),
- SimplifyCondBranch(SimplifyCondBranch),
- FoldTwoEntryPHINode(FoldTwoEntryPHINode),
- AC(AssumpCache) {}
-
- // Support 'builder' pattern to set members by name at construction time.
- SimplifyCFGOptions &bonusInstThreshold(int I) {
- BonusInstThreshold = I;
- return *this;
- }
- SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) {
- ForwardSwitchCondToPhi = B;
- return *this;
- }
- SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
- ConvertSwitchToLookupTable = B;
- return *this;
- }
- SimplifyCFGOptions &needCanonicalLoops(bool B) {
- NeedCanonicalLoop = B;
- return *this;
- }
- SimplifyCFGOptions &sinkCommonInsts(bool B) {
- SinkCommonInsts = B;
- return *this;
- }
- SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
- AC = Cache;
- return *this;
- }
- SimplifyCFGOptions &setSimplifyCondBranch(bool B) {
- SimplifyCondBranch = B;
- return *this;
- }
-
- SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) {
- FoldTwoEntryPHINode = B;
- return *this;
- }
-};
-
//===----------------------------------------------------------------------===//
// Local constant propagation.
//
@@ -160,7 +94,9 @@ bool wouldInstructionBeTriviallyDead(Instruction *I,
/// recursively. Return true if any instructions were deleted.
bool RecursivelyDeleteTriviallyDeadInstructions(
Value *V, const TargetLibraryInfo *TLI = nullptr,
- MemorySSAUpdater *MSSAU = nullptr);
+ MemorySSAUpdater *MSSAU = nullptr,
+ std::function<void(Value *)> AboutToDeleteCallback =
+ std::function<void(Value *)>());
/// Delete all of the instructions in `DeadInsts`, and all other instructions
/// that deleting these in turn causes to be trivially dead.
@@ -172,7 +108,9 @@ bool RecursivelyDeleteTriviallyDeadInstructions(
/// empty afterward.
void RecursivelyDeleteTriviallyDeadInstructions(
SmallVectorImpl<WeakTrackingVH> &DeadInsts,
- const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr);
+ const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr,
+ std::function<void(Value *)> AboutToDeleteCallback =
+ std::function<void(Value *)>());
/// Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow
/// instructions that are not trivially dead. These will be ignored.
@@ -180,7 +118,9 @@ void RecursivelyDeleteTriviallyDeadInstructions(
/// were found and deleted.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(
SmallVectorImpl<WeakTrackingVH> &DeadInsts,
- const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr);
+ const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr,
+ std::function<void(Value *)> AboutToDeleteCallback =
+ std::function<void(Value *)>());
/// If the specified value is an effectively dead PHI node, due to being a
/// def-use chain of single-use nodes that either forms a cycle or is terminated
@@ -209,20 +149,6 @@ bool replaceDbgUsesWithUndef(Instruction *I);
// Control Flow Graph Restructuring.
//
-/// Like BasicBlock::removePredecessor, this method is called when we're about
-/// to delete Pred as a predecessor of BB. If BB contains any PHI nodes, this
-/// drops the entries in the PHI nodes for Pred.
-///
-/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
-/// nodes that collapse into identity values. For example, if we have:
-/// x = phi(1, 0, 0, 0)
-/// y = and x, z
-///
-/// .. and delete the predecessor corresponding to the '1', this will attempt to
-/// recursively fold the 'and' to 0.
-void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- DomTreeUpdater *DTU = nullptr);
-
/// BB is a block with one predecessor and its predecessor is known to have one
/// successor (BB!). Eliminate the edge between them, moving the instructions in
/// the predecessor into BB. This deletes the predecessor block.
@@ -246,9 +172,11 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB);
/// It returns true if a modification was made, possibly deleting the basic
/// block that was pointed to. LoopHeaders is an optional input parameter
/// providing the set of loop headers that SimplifyCFG should not eliminate.
+extern cl::opt<bool> RequireAndPreserveDomTree;
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+ DomTreeUpdater *DTU = nullptr,
const SimplifyCFGOptions &Options = {},
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders = nullptr);
+ ArrayRef<WeakVH> LoopHeaders = {});
/// This function is used to flatten a CFG. For example, it uses parallel-and
/// and parallel-or mode to collapse if-conditions and merge if-regions with
@@ -258,7 +186,9 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr);
/// If this basic block is ONLY a setcc and a branch, and if a predecessor
/// branches to us and one of our successors, fold the setcc into the
/// predecessor and use logical operations to pick the right destination.
-bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr,
+bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr,
+ const TargetTransformInfo *TTI = nullptr,
unsigned BonusInstThreshold = 1);
/// This function takes a virtual register computed by an Instruction and
@@ -365,10 +295,6 @@ bool replaceDbgDeclare(Value *Address, Value *NewAddress, DIBuilder &Builder,
void replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
DIBuilder &Builder, int Offset = 0);
-/// Finds alloca where the value comes from.
-AllocaInst *findAllocaForValue(Value *V,
- DenseMap<Value *, AllocaInst *> &AllocaForValue);
-
/// Assuming the instruction \p I is going to be deleted, attempt to salvage
/// debug users of \p I by writing the effect of \p I in a DIExpression. If it
/// cannot be salvaged changes its debug uses to undef.
@@ -406,9 +332,13 @@ DIExpression *salvageDebugInfoImpl(Instruction &I, DIExpression *DIExpr,
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint,
DominatorTree &DT);
-/// Remove all instructions from a basic block other than it's terminator
-/// and any present EH pad instructions.
-unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB);
+/// Remove all instructions from a basic block other than its terminator
+/// and any present EH pad instructions. Returns a pair where the first element
+/// is the number of instructions (excluding debug info instrinsics) that have
+/// been removed, and the second element is the number of debug info intrinsics
+/// that have been removed.
+std::pair<unsigned, unsigned>
+removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB);
/// Insert an unreachable instruction before the specified
/// instruction, making it and the rest of the code in the block dead.
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopPeel.h
new file mode 100644
index 000000000000..8f857e1e5c21
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -0,0 +1,40 @@
+//===- llvm/Transforms/Utils/LoopPeel.h ----- Peeling utilities -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some loop peeling utilities. It does not define any
+// actual pass or policy.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_LOOPPEEL_H
+#define LLVM_TRANSFORMS_UTILS_LOOPPEEL_H
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+
+namespace llvm {
+
+bool canPeel(Loop *L);
+
+bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
+
+TargetTransformInfo::PeelingPreferences
+gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI,
+ Optional<bool> UserAllowPeeling,
+ Optional<bool> UserAllowProfileBasedPeeling,
+ bool UnrollingSpecficValues = false);
+
+void computePeelCount(Loop *L, unsigned LoopSize,
+ TargetTransformInfo::PeelingPreferences &PP,
+ unsigned &TripCount, ScalarEvolution &SE,
+ unsigned Threshold = UINT_MAX);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_LOOPPEEL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
index 1e80722ed8b8..61bf93b74a15 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
@@ -33,7 +33,8 @@ class TargetTransformInfo;
bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE,
MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
- bool RotationOnly, unsigned Threshold, bool IsUtilMode);
+ bool RotationOnly, unsigned Threshold, bool IsUtilMode,
+ bool PrepareForLTO = false);
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 60446bca5317..951660bbab28 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -26,6 +26,8 @@ class AAResults;
class AliasSet;
class AliasSetTracker;
class BasicBlock;
+class BlockFrequencyInfo;
+class ICFLoopSafetyInfo;
class IRBuilderBase;
class Loop;
class LoopInfo;
@@ -38,7 +40,6 @@ class ScalarEvolution;
class SCEV;
class SCEVExpander;
class TargetLibraryInfo;
-class TargetTransformInfo;
class LPPassManager;
class Instruction;
struct RuntimeCheckingPtrGroup;
@@ -74,9 +75,14 @@ bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
/// changes to CFG, preserved.
///
/// Returns true if any modifications are made.
-bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
- const DominatorTree &DT, const LoopInfo &LI,
- ScalarEvolution *SE);
+///
+/// This function may introduce unused PHI nodes. If \p PHIsToRemove is not
+/// nullptr, those are added to it (before removing, the caller has to check if
+/// they still do not have any uses). Otherwise the PHIs are directly removed.
+bool formLCSSAForInstructions(
+ SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT,
+ const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder,
+ SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr);
/// Put loop into LCSSA form.
///
@@ -105,9 +111,28 @@ bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
ScalarEvolution *SE);
-struct SinkAndHoistLICMFlags {
- bool NoOfMemAccTooLarge;
- unsigned LicmMssaOptCounter;
+/// Flags controlling how much is checked when sinking or hoisting
+/// instructions. The number of memory access in the loop (and whether there
+/// are too many) is determined in the constructors when using MemorySSA.
+class SinkAndHoistLICMFlags {
+public:
+ // Explicitly set limits.
+ SinkAndHoistLICMFlags(unsigned LicmMssaOptCap,
+ unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
+ Loop *L = nullptr, MemorySSA *MSSA = nullptr);
+ // Use default limits.
+ SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr,
+ MemorySSA *MSSA = nullptr);
+
+ void setIsSink(bool B) { IsSink = B; }
+ bool getIsSink() { return IsSink; }
+ bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; }
+ bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; }
+ void incrementClobberingCalls() { ++LicmMssaOptCounter; }
+
+protected:
+ bool NoOfMemAccTooLarge = false;
+ unsigned LicmMssaOptCounter = 0;
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
bool IsSink;
@@ -118,12 +143,13 @@ struct SinkAndHoistLICMFlags {
/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
/// uses before definitions, allowing us to sink a loop body in one pass without
/// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
-/// TargetLibraryInfo, Loop, AliasSet information for all
+/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
/// instructions of the loop and loop safety information as
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
- TargetLibraryInfo *, TargetTransformInfo *, Loop *,
- AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ BlockFrequencyInfo *, TargetLibraryInfo *,
+ TargetTransformInfo *, Loop *, AliasSetTracker *,
+ MemorySSAUpdater *, ICFLoopSafetyInfo *,
SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
/// Walk the specified region of the CFG (defined by all blocks
@@ -131,13 +157,14 @@ bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
/// first order w.r.t the DominatorTree. This allows us to visit definitions
/// before uses, allowing us to hoist a loop body in one pass without iteration.
/// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
-/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
-/// loop and loop safety information as arguments. Diagnostics is emitted via \p
-/// ORE. It returns changed status.
+/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
+/// instructions of the loop and loop safety information as arguments.
+/// Diagnostics is emitted via \p ORE. It returns changed status.
bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
- TargetLibraryInfo *, Loop *, AliasSetTracker *,
- MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
- SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+ BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
+ AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
+ ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
+ OptimizationRemarkEmitter *);
/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
@@ -153,6 +180,12 @@ bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
LoopInfo *LI, MemorySSA *MSSA = nullptr);
+/// Remove the backedge of the specified loop. Handles loop nests and general
+/// loop structures subject to the precondition that the loop has no parent
+/// loop and has a single latch block. Preserves all listed analyses.
+void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, MemorySSA *MSSA);
+
/// Try to promote memory values to scalars by sinking stores out of
/// the loop and moving loads to before the loop. We do this by looping over
/// the stores in the loop, looking for stores to Must pointers which are
@@ -187,6 +220,13 @@ Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
/// Find named metadata for a loop with an integer value.
llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
+/// Find a combination of metadata ("llvm.loop.vectorize.width" and
+/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
+/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
+/// then None is returned.
+Optional<ElementCount>
+getOptionalElementCountLoopAttribute(Loop *TheLoop);
+
/// Create a new loop identifier for a loop created from a loop transformation.
///
/// @param OrigLoopID The loop ID of the loop before the transformation.
@@ -222,6 +262,9 @@ bool hasDisableAllTransformsHint(const Loop *L);
/// Look for the loop attribute that disables the LICM transformation heuristics.
bool hasDisableLICMTransformsHint(const Loop *L);
+/// Look for the loop attribute that requires progress within the loop.
+bool hasMustProgress(const Loop *L);
+
/// The mode sets how eager a transformation should be applied.
enum TransformationMode {
/// The pass can use heuristics to determine whether a transformation should
@@ -264,6 +307,9 @@ TransformationMode hasLICMVersioningTransformation(Loop *L);
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
unsigned V = 0);
+/// Returns true if Name is applied to TheLoop and enabled.
+bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name);
+
/// Returns a loop's estimated trip count based on branch weight metadata.
/// In addition if \p EstimatedLoopInvocationWeight is not null it is
/// initialized with weight of loop's latch leading to the exit.
@@ -309,35 +355,29 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
OptimizationRemarkEmitter *ORE = nullptr);
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
-Value *createMinMaxOp(IRBuilderBase &Builder,
- RecurrenceDescriptor::MinMaxRecurrenceKind RK,
- Value *Left, Value *Right);
+Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
+ Value *Right);
/// Generates an ordered vector reduction using extracts to reduce the value.
-Value *
-getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op,
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
- RecurrenceDescriptor::MRK_Invalid,
- ArrayRef<Value *> RedOps = None);
+Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
+ unsigned Op, RecurKind MinMaxKind = RecurKind::None,
+ ArrayRef<Value *> RedOps = None);
/// Generates a vector reduction using shufflevectors to reduce the value.
/// Fast-math-flags are propagated using the IRBuilder's setting.
Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
- RecurrenceDescriptor::MinMaxRecurrenceKind
- MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
+ RecurKind MinMaxKind = RecurKind::None,
ArrayRef<Value *> RedOps = None);
/// Create a target reduction of the given vector. The reduction operation
/// is described by the \p Opcode parameter. min/max reductions require
-/// additional information supplied in \p Flags.
+/// additional information supplied in \p RdxKind.
/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.
/// Fast-math-flags are propagated using the IRBuilder's setting.
Value *createSimpleTargetReduction(IRBuilderBase &B,
- const TargetTransformInfo *TTI,
- unsigned Opcode, Value *Src,
- TargetTransformInfo::ReductionFlags Flags =
- TargetTransformInfo::ReductionFlags(),
+ const TargetTransformInfo *TTI, Value *Src,
+ RecurKind RdxKind,
ArrayRef<Value *> RedOps = None);
/// Create a generic target reduction using a recurrence descriptor \p Desc
@@ -345,8 +385,7 @@ Value *createSimpleTargetReduction(IRBuilderBase &B,
/// required to implement the reduction.
/// Fast-math-flags are propagated using the RecurrenceDescriptor.
Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
- RecurrenceDescriptor &Desc, Value *Src,
- bool NoNaN = false);
+ RecurrenceDescriptor &Desc, Value *Src);
/// Get the intersection (logical and) of all of the potential IR flags
/// of each scalar operation (VL) that will be converted into a vector (I).
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopVersioning.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopVersioning.h
index 1efdcc65b39a..4a8831ed45b2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopVersioning.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopVersioning.h
@@ -16,6 +16,7 @@
#define LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -24,7 +25,6 @@ namespace llvm {
class Loop;
class LoopAccessInfo;
class LoopInfo;
-class ScalarEvolution;
struct RuntimeCheckingPtrGroup;
typedef std::pair<const RuntimeCheckingPtrGroup *,
const RuntimeCheckingPtrGroup *>
@@ -43,9 +43,9 @@ public:
/// It uses runtime check provided by the user. If \p UseLAIChecks is true,
/// we will retain the default checks made by LAI. Otherwise, construct an
/// object having no checks and we expect the user to add them.
- LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
- DominatorTree *DT, ScalarEvolution *SE,
- bool UseLAIChecks = true);
+ LoopVersioning(const LoopAccessInfo &LAI,
+ ArrayRef<RuntimePointerCheck> Checks, Loop *L, LoopInfo *LI,
+ DominatorTree *DT, ScalarEvolution *SE);
/// Performs the CFG manipulation part of versioning the loop including
/// the DominatorTree and LoopInfo updates.
@@ -75,12 +75,6 @@ public:
/// loop may alias (i.e. one of the memchecks failed).
Loop *getNonVersionedLoop() { return NonVersionedLoop; }
- /// Sets the runtime alias checks for versioning the loop.
- void setAliasChecks(ArrayRef<RuntimePointerCheck> Checks);
-
- /// Sets the runtime SCEV checks for versioning the loop.
- void setSCEVChecks(SCEVUnionPredicate Check);
-
/// Annotate memory instructions in the versioned loop with no-alias
/// metadata based on the memchecks issued.
///
@@ -129,7 +123,7 @@ private:
SmallVector<RuntimePointerCheck, 4> AliasChecks;
/// The set of SCEV checks that we are versioning for.
- SCEVUnionPredicate Preds;
+ const SCEVUnionPredicate &Preds;
/// Maps a pointer to the pointer checking group that the pointer
/// belongs to.
@@ -148,6 +142,14 @@ private:
DominatorTree *DT;
ScalarEvolution *SE;
};
+
+/// Expose LoopVersioning as a pass. Currently this is only used for
+/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
+/// array accesses from the loop.
+class LoopVersioningPass : public PassInfoMixin<LoopVersioningPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
}
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LowerSwitch.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LowerSwitch.h
new file mode 100644
index 000000000000..97086987ffcb
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LowerSwitch.h
@@ -0,0 +1,26 @@
+//===- LowerSwitch.h - Eliminate Switch instructions ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_LOWERSWITCH_H
+#define LLVM_TRANSFORMS_UTILS_LOWERSWITCH_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+struct LowerSwitchPass : public PassInfoMixin<LowerSwitchPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_LOWERSWITCH_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MatrixUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MatrixUtils.h
new file mode 100644
index 000000000000..39a0d4bf40cc
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MatrixUtils.h
@@ -0,0 +1,94 @@
+//===- MatrixUtils.h - Utilities to lower matrix intrinsics -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for generating tiled loops for matrix operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_MATRIXUTILS_H
+#define LLVM_TRANSFORMS_UTILS_MATRIXUTILS_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class DomTreeUpdater;
+class BasicBlock;
+class Value;
+class Loop;
+class LoopInfo;
+class IRBuilderBase;
+
+/// A helper struct to create IR loop nests for tiling in IR of the following
+/// form:
+/// for CurrentColumn = 0..NumColumns
+/// for CurrentRow = 0..NumRows
+/// for CurrentInner = 0..NumInner
+struct TileInfo {
+ /// Number of rows of the matrix.
+ unsigned NumRows;
+
+ /// Number of columns of the matrix.
+ unsigned NumColumns;
+
+ /// Number of columns of the first matrix of a multiply /
+ /// number of rows of the second matrix of a multiply.
+ unsigned NumInner;
+
+ /// Number of rows/columns in a tile.
+ unsigned TileSize = -1;
+
+ /// Start row of the current tile to compute.
+ Value *CurrentRow;
+
+ /// Start column of the current tile to compute.
+ Value *CurrentCol;
+
+ /// Current tile offset during the tile computation.
+ Value *CurrentK;
+
+ /// Header of the outermost loop iterating from 0..NumColumns.
+ BasicBlock *ColumnLoopHeader = nullptr;
+
+ /// Header of the second loop iterating from 0..NumRows.
+ BasicBlock *RowLoopHeader = nullptr;
+ /// Latch of the second loop iterating from 0..NumRows.
+ BasicBlock *RowLoopLatch = nullptr;
+ /// Header of the innermost loop iterating from 0..NumInner.
+ BasicBlock *InnerLoopHeader = nullptr;
+ /// Latch of the innermost loop iterating from 0..NumInner.
+ BasicBlock *InnerLoopLatch = nullptr;
+
+ TileInfo(unsigned NumRows, unsigned NumColumns, unsigned NumInner,
+ unsigned TileSize)
+ : NumRows(NumRows), NumColumns(NumColumns), NumInner(NumInner),
+ TileSize(TileSize) {}
+
+ /// Creates an IR loop nests for tiling of the form below. Returns the block
+ /// for the inner loop body and sets {Column,Row,Inner}LoopHeader/Latch
+ /// fields.
+ ///
+ /// for CurrentColumn = 0..NumColumns
+ /// for CurrentRow = 0..NumRows
+ /// for CurrentInner = 0..NumInner
+ BasicBlock *CreateTiledLoops(BasicBlock *Start, BasicBlock *End,
+ IRBuilderBase &B, DomTreeUpdater &DTU,
+ LoopInfo &LI);
+
+private:
+ /// Creates a new loop with header, body and latch blocks that iterates from
+ /// [0, Bound). Updates \p Preheader to branch to the new header and uses \p
+ /// Exit as exit block. Adds the new loop blocks to \L and applies dominator
+ /// tree updates to \p DTU.
+ static BasicBlock *CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
+ Value *Bound, Value *Step, StringRef Name,
+ IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L,
+ LoopInfo &LI);
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MetaRenamer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MetaRenamer.h
new file mode 100644
index 000000000000..fff3dff75837
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MetaRenamer.h
@@ -0,0 +1,26 @@
+//===- MetaRenamer.h - Rename everything with metasyntatic names ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass renames everything with metasyntatic names. The intent is to use
+// this pass after bugpoint reduction to conceal the nature of the original
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_METARENAMER_H
+#define LLVM_TRANSFORMS_UTILS_METARENAMER_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+struct MetaRenamerPass : PassInfoMixin<MetaRenamerPass> {
+ PreservedAnalyses run(Module &, ModuleAnalysisManager &);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_METARENAMER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MisExpect.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MisExpect.h
deleted file mode 100644
index 1dbe8cb95936..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MisExpect.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===--- MisExpect.h - Check the use of llvm.expect with PGO data ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This contains code to emit warnings for potentially incorrect usage of the
-// llvm.expect intrinsic. This utility extracts the threshold values from
-// metadata associated with the instrumented Branch or Switch instruction. The
-// threshold values are then used to determine if a warning should be emmited.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-
-namespace llvm {
-namespace misexpect {
-
-/// verifyMisExpect - compares PGO counters to the thresholds used for
-/// llvm.expect and warns if the PGO counters are outside of the expected
-/// range.
-/// \param I The Instruction being checked
-/// \param Weights A vector of profile weights for each target block
-/// \param Ctx The current LLVM context
-void verifyMisExpect(llvm::Instruction *I,
- const llvm::SmallVector<uint32_t, 4> &Weights,
- llvm::LLVMContext &Ctx);
-
-/// checkClangInstrumentation - verify if llvm.expect matches PGO profile
-/// This function checks the frontend instrumentation in the backend when
-/// lowering llvm.expect intrinsics. It checks for existing metadata, and
-/// then validates the use of llvm.expect against the assigned branch weights.
-//
-/// \param I the Instruction being checked
-void checkFrontendInstrumentation(Instruction &I);
-
-} // namespace misexpect
-} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index 657b97c67a8b..c922476ac79d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -70,6 +70,13 @@ class raw_ostream;
enum PredicateType { PT_Branch, PT_Assume, PT_Switch };
+/// Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op
+/// is the value the constraint applies to (the ssa.copy result).
+struct PredicateConstraint {
+ CmpInst::Predicate Predicate;
+ Value *OtherOp;
+};
+
// Base class for all predicate information we provide.
// All of our predicate information has at least a comparison.
class PredicateBase : public ilist_node<PredicateBase> {
@@ -83,37 +90,34 @@ public:
// predicates, this is different to OriginalOp which refers to the initial
// operand.
Value *RenamedOp;
+ // The condition associated with this predicate.
+ Value *Condition;
+
PredicateBase(const PredicateBase &) = delete;
PredicateBase &operator=(const PredicateBase &) = delete;
PredicateBase() = delete;
virtual ~PredicateBase() = default;
-
-protected:
- PredicateBase(PredicateType PT, Value *Op) : Type(PT), OriginalOp(Op) {}
-};
-
-class PredicateWithCondition : public PredicateBase {
-public:
- Value *Condition;
static bool classof(const PredicateBase *PB) {
return PB->Type == PT_Assume || PB->Type == PT_Branch ||
PB->Type == PT_Switch;
}
+ /// Fetch condition in the form of PredicateConstraint, if possible.
+ Optional<PredicateConstraint> getConstraint() const;
+
protected:
- PredicateWithCondition(PredicateType PT, Value *Op, Value *Condition)
- : PredicateBase(PT, Op), Condition(Condition) {}
+ PredicateBase(PredicateType PT, Value *Op, Value *Condition)
+ : Type(PT), OriginalOp(Op), Condition(Condition) {}
};
// Provides predicate information for assumes. Since assumes are always true,
// we simply provide the assume instruction, so you can tell your relative
// position to it.
-class PredicateAssume : public PredicateWithCondition {
+class PredicateAssume : public PredicateBase {
public:
IntrinsicInst *AssumeInst;
PredicateAssume(Value *Op, IntrinsicInst *AssumeInst, Value *Condition)
- : PredicateWithCondition(PT_Assume, Op, Condition),
- AssumeInst(AssumeInst) {}
+ : PredicateBase(PT_Assume, Op, Condition), AssumeInst(AssumeInst) {}
PredicateAssume() = delete;
static bool classof(const PredicateBase *PB) {
return PB->Type == PT_Assume;
@@ -123,7 +127,7 @@ public:
// Mixin class for edge predicates. The FROM block is the block where the
// predicate originates, and the TO block is the block where the predicate is
// valid.
-class PredicateWithEdge : public PredicateWithCondition {
+class PredicateWithEdge : public PredicateBase {
public:
BasicBlock *From;
BasicBlock *To;
@@ -135,7 +139,7 @@ public:
protected:
PredicateWithEdge(PredicateType PType, Value *Op, BasicBlock *From,
BasicBlock *To, Value *Cond)
- : PredicateWithCondition(PType, Op, Cond), From(From), To(To) {}
+ : PredicateBase(PType, Op, Cond), From(From), To(To) {}
};
// Provides predicate information for branches.
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 0c88f9f79e76..547245cfb963 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -26,410 +26,486 @@
#include "llvm/Support/CommandLine.h"
namespace llvm {
- extern cl::opt<unsigned> SCEVCheapExpansionBudget;
+extern cl::opt<unsigned> SCEVCheapExpansionBudget;
+
+/// Return true if the given expression is safe to expand in the sense that
+/// all materialized values are safe to speculate anywhere their operands are
+/// defined.
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE);
+
+/// Return true if the given expression is safe to expand in the sense that
+/// all materialized values are defined and safe to speculate at the specified
+/// location and their operands are defined at this location.
+bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
+ ScalarEvolution &SE);
+
+/// struct for holding enough information to help calculate the cost of the
+/// given SCEV when expanded into IR.
+struct SCEVOperand {
+ explicit SCEVOperand(unsigned Opc, int Idx, const SCEV *S) :
+ ParentOpcode(Opc), OperandIdx(Idx), S(S) { }
+ /// LLVM instruction opcode that uses the operand.
+ unsigned ParentOpcode;
+ /// The use index of an expanded instruction.
+ int OperandIdx;
+ /// The SCEV operand to be costed.
+ const SCEV* S;
+};
+
+/// This class uses information about analyze scalars to rewrite expressions
+/// in canonical form.
+///
+/// Clients should create an instance of this class when rewriting is needed,
+/// and destroy it when finished to allow the release of the associated
+/// memory.
+class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
+ ScalarEvolution &SE;
+ const DataLayout &DL;
+
+ // New instructions receive a name to identify them with the current pass.
+ const char *IVName;
+
+ /// Indicates whether LCSSA phis should be created for inserted values.
+ bool PreserveLCSSA;
+
+ // InsertedExpressions caches Values for reuse, so must track RAUW.
+ DenseMap<std::pair<const SCEV *, Instruction *>, TrackingVH<Value>>
+ InsertedExpressions;
+
+ // InsertedValues only flags inserted instructions so needs no RAUW.
+ DenseSet<AssertingVH<Value>> InsertedValues;
+ DenseSet<AssertingVH<Value>> InsertedPostIncValues;
+
+ /// Keep track of the existing IR values re-used during expansion.
+ /// FIXME: Ideally re-used instructions would not be added to
+ /// InsertedValues/InsertedPostIncValues.
+ SmallPtrSet<Value *, 16> ReusedValues;
+
+ /// A memoization of the "relevant" loop for a given SCEV.
+ DenseMap<const SCEV *, const Loop *> RelevantLoops;
+
+ /// Addrecs referring to any of the given loops are expanded in post-inc
+ /// mode. For example, expanding {1,+,1}<L> in post-inc mode returns the add
+ /// instruction that adds one to the phi for {0,+,1}<L>, as opposed to a new
+ /// phi starting at 1. This is only supported in non-canonical mode.
+ PostIncLoopSet PostIncLoops;
+
+ /// When this is non-null, addrecs expanded in the loop it indicates should
+ /// be inserted with increments at IVIncInsertPos.
+ const Loop *IVIncInsertLoop;
+
+ /// When expanding addrecs in the IVIncInsertLoop loop, insert the IV
+ /// increment at this position.
+ Instruction *IVIncInsertPos;
+
+ /// Phis that complete an IV chain. Reuse
+ DenseSet<AssertingVH<PHINode>> ChainedPhis;
+
+ /// When true, SCEVExpander tries to expand expressions in "canonical" form.
+ /// When false, expressions are expanded in a more literal form.
+ ///
+ /// In "canonical" form addrecs are expanded as arithmetic based on a
+ /// canonical induction variable. Note that CanonicalMode doesn't guarantee
+ /// that all expressions are expanded in "canonical" form. For some
+ /// expressions literal mode can be preferred.
+ bool CanonicalMode;
+
+ /// When invoked from LSR, the expander is in "strength reduction" mode. The
+ /// only difference is that phi's are only reused if they are already in
+ /// "expanded" form.
+ bool LSRMode;
+
+ typedef IRBuilder<TargetFolder, IRBuilderCallbackInserter> BuilderType;
+ BuilderType Builder;
+
+ // RAII object that stores the current insertion point and restores it when
+ // the object is destroyed. This includes the debug location. Duplicated
+ // from InsertPointGuard to add SetInsertPoint() which is used to updated
+ // InsertPointGuards stack when insert points are moved during SCEV
+ // expansion.
+ class SCEVInsertPointGuard {
+ IRBuilderBase &Builder;
+ AssertingVH<BasicBlock> Block;
+ BasicBlock::iterator Point;
+ DebugLoc DbgLoc;
+ SCEVExpander *SE;
+
+ SCEVInsertPointGuard(const SCEVInsertPointGuard &) = delete;
+ SCEVInsertPointGuard &operator=(const SCEVInsertPointGuard &) = delete;
- /// Return true if the given expression is safe to expand in the sense that
- /// all materialized values are safe to speculate anywhere their operands are
- /// defined.
- bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE);
+ public:
+ SCEVInsertPointGuard(IRBuilderBase &B, SCEVExpander *SE)
+ : Builder(B), Block(B.GetInsertBlock()), Point(B.GetInsertPoint()),
+ DbgLoc(B.getCurrentDebugLocation()), SE(SE) {
+ SE->InsertPointGuards.push_back(this);
+ }
- /// Return true if the given expression is safe to expand in the sense that
- /// all materialized values are defined and safe to speculate at the specified
- /// location and their operands are defined at this location.
- bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
- ScalarEvolution &SE);
+ ~SCEVInsertPointGuard() {
+ // These guards should always created/destroyed in FIFO order since they
+ // are used to guard lexically scoped blocks of code in
+ // ScalarEvolutionExpander.
+ assert(SE->InsertPointGuards.back() == this);
+ SE->InsertPointGuards.pop_back();
+ Builder.restoreIP(IRBuilderBase::InsertPoint(Block, Point));
+ Builder.SetCurrentDebugLocation(DbgLoc);
+ }
- /// This class uses information about analyze scalars to rewrite expressions
- /// in canonical form.
- ///
- /// Clients should create an instance of this class when rewriting is needed,
- /// and destroy it when finished to allow the release of the associated
- /// memory.
- class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
- ScalarEvolution &SE;
- const DataLayout &DL;
-
- // New instructions receive a name to identify them with the current pass.
- const char* IVName;
-
- // InsertedExpressions caches Values for reuse, so must track RAUW.
- DenseMap<std::pair<const SCEV *, Instruction *>, TrackingVH<Value>>
- InsertedExpressions;
-
- // InsertedValues only flags inserted instructions so needs no RAUW.
- DenseSet<AssertingVH<Value>> InsertedValues;
- DenseSet<AssertingVH<Value>> InsertedPostIncValues;
-
- /// A memoization of the "relevant" loop for a given SCEV.
- DenseMap<const SCEV *, const Loop *> RelevantLoops;
-
- /// Addrecs referring to any of the given loops are expanded in post-inc
- /// mode. For example, expanding {1,+,1}<L> in post-inc mode returns the add
- /// instruction that adds one to the phi for {0,+,1}<L>, as opposed to a new
- /// phi starting at 1. This is only supported in non-canonical mode.
- PostIncLoopSet PostIncLoops;
-
- /// When this is non-null, addrecs expanded in the loop it indicates should
- /// be inserted with increments at IVIncInsertPos.
- const Loop *IVIncInsertLoop;
-
- /// When expanding addrecs in the IVIncInsertLoop loop, insert the IV
- /// increment at this position.
- Instruction *IVIncInsertPos;
-
- /// Phis that complete an IV chain. Reuse
- DenseSet<AssertingVH<PHINode>> ChainedPhis;
-
- /// When true, SCEVExpander tries to expand expressions in "canonical" form.
- /// When false, expressions are expanded in a more literal form.
- ///
- /// In "canonical" form addrecs are expanded as arithmetic based on a
- /// canonical induction variable. Note that CanonicalMode doesn't guarantee
- /// that all expressions are expanded in "canonical" form. For some
- /// expressions literal mode can be preferred.
- bool CanonicalMode;
-
- /// When invoked from LSR, the expander is in "strength reduction" mode. The
- /// only difference is that phi's are only reused if they are already in
- /// "expanded" form.
- bool LSRMode;
-
- typedef IRBuilder<TargetFolder> BuilderType;
- BuilderType Builder;
-
- // RAII object that stores the current insertion point and restores it when
- // the object is destroyed. This includes the debug location. Duplicated
- // from InsertPointGuard to add SetInsertPoint() which is used to updated
- // InsertPointGuards stack when insert points are moved during SCEV
- // expansion.
- class SCEVInsertPointGuard {
- IRBuilderBase &Builder;
- AssertingVH<BasicBlock> Block;
- BasicBlock::iterator Point;
- DebugLoc DbgLoc;
- SCEVExpander *SE;
-
- SCEVInsertPointGuard(const SCEVInsertPointGuard &) = delete;
- SCEVInsertPointGuard &operator=(const SCEVInsertPointGuard &) = delete;
-
- public:
- SCEVInsertPointGuard(IRBuilderBase &B, SCEVExpander *SE)
- : Builder(B), Block(B.GetInsertBlock()), Point(B.GetInsertPoint()),
- DbgLoc(B.getCurrentDebugLocation()), SE(SE) {
- SE->InsertPointGuards.push_back(this);
- }
-
- ~SCEVInsertPointGuard() {
- // These guards should always created/destroyed in FIFO order since they
- // are used to guard lexically scoped blocks of code in
- // ScalarEvolutionExpander.
- assert(SE->InsertPointGuards.back() == this);
- SE->InsertPointGuards.pop_back();
- Builder.restoreIP(IRBuilderBase::InsertPoint(Block, Point));
- Builder.SetCurrentDebugLocation(DbgLoc);
- }
-
- BasicBlock::iterator GetInsertPoint() const { return Point; }
- void SetInsertPoint(BasicBlock::iterator I) { Point = I; }
- };
-
- /// Stack of pointers to saved insert points, used to keep insert points
- /// consistent when instructions are moved.
- SmallVector<SCEVInsertPointGuard *, 8> InsertPointGuards;
+ BasicBlock::iterator GetInsertPoint() const { return Point; }
+ void SetInsertPoint(BasicBlock::iterator I) { Point = I; }
+ };
+
+ /// Stack of pointers to saved insert points, used to keep insert points
+ /// consistent when instructions are moved.
+ SmallVector<SCEVInsertPointGuard *, 8> InsertPointGuards;
#ifndef NDEBUG
- const char *DebugType;
+ const char *DebugType;
#endif
- friend struct SCEVVisitor<SCEVExpander, Value*>;
-
- public:
- /// Construct a SCEVExpander in "canonical" mode.
- explicit SCEVExpander(ScalarEvolution &se, const DataLayout &DL,
- const char *name)
- : SE(se), DL(DL), IVName(name), IVIncInsertLoop(nullptr),
- IVIncInsertPos(nullptr), CanonicalMode(true), LSRMode(false),
- Builder(se.getContext(), TargetFolder(DL)) {
+ friend struct SCEVVisitor<SCEVExpander, Value *>;
+
+public:
+ /// Construct a SCEVExpander in "canonical" mode.
+ explicit SCEVExpander(ScalarEvolution &se, const DataLayout &DL,
+ const char *name, bool PreserveLCSSA = true)
+ : SE(se), DL(DL), IVName(name), PreserveLCSSA(PreserveLCSSA),
+ IVIncInsertLoop(nullptr), IVIncInsertPos(nullptr), CanonicalMode(true),
+ LSRMode(false),
+ Builder(se.getContext(), TargetFolder(DL),
+ IRBuilderCallbackInserter(
+ [this](Instruction *I) { rememberInstruction(I); })) {
#ifndef NDEBUG
- DebugType = "";
+ DebugType = "";
#endif
- }
+ }
- ~SCEVExpander() {
- // Make sure the insert point guard stack is consistent.
- assert(InsertPointGuards.empty());
- }
+ ~SCEVExpander() {
+ // Make sure the insert point guard stack is consistent.
+ assert(InsertPointGuards.empty());
+ }
#ifndef NDEBUG
- void setDebugType(const char* s) { DebugType = s; }
+ void setDebugType(const char *s) { DebugType = s; }
#endif
- /// Erase the contents of the InsertedExpressions map so that users trying
- /// to expand the same expression into multiple BasicBlocks or different
- /// places within the same BasicBlock can do so.
- void clear() {
- InsertedExpressions.clear();
- InsertedValues.clear();
- InsertedPostIncValues.clear();
- ChainedPhis.clear();
+ /// Erase the contents of the InsertedExpressions map so that users trying
+ /// to expand the same expression into multiple BasicBlocks or different
+ /// places within the same BasicBlock can do so.
+ void clear() {
+ InsertedExpressions.clear();
+ InsertedValues.clear();
+ InsertedPostIncValues.clear();
+ ReusedValues.clear();
+ ChainedPhis.clear();
+ }
+
+ /// Return a vector containing all instructions inserted during expansion.
+ SmallVector<Instruction *, 32> getAllInsertedInstructions() const {
+ SmallVector<Instruction *, 32> Result;
+ for (auto &VH : InsertedValues) {
+ Value *V = VH;
+ if (ReusedValues.contains(V))
+ continue;
+ if (auto *Inst = dyn_cast<Instruction>(V))
+ Result.push_back(Inst);
}
-
- /// Return true for expressions that can't be evaluated at runtime
- /// within given \b Budget.
- ///
- /// At is a parameter which specifies point in code where user is going to
- /// expand this expression. Sometimes this knowledge can lead to
- /// a less pessimistic cost estimation.
- bool isHighCostExpansion(const SCEV *Expr, Loop *L, unsigned Budget,
- const TargetTransformInfo *TTI,
- const Instruction *At) {
- assert(TTI && "This function requires TTI to be provided.");
- assert(At && "This function requires At instruction to be provided.");
- if (!TTI) // In assert-less builds, avoid crashing
- return true; // by always claiming to be high-cost.
- SmallVector<const SCEV *, 8> Worklist;
- SmallPtrSet<const SCEV *, 8> Processed;
- int BudgetRemaining = Budget * TargetTransformInfo::TCC_Basic;
- Worklist.emplace_back(Expr);
- while (!Worklist.empty()) {
- const SCEV *S = Worklist.pop_back_val();
- if (isHighCostExpansionHelper(S, L, *At, BudgetRemaining, *TTI,
- Processed, Worklist))
- return true;
- }
- assert(BudgetRemaining >= 0 && "Should have returned from inner loop.");
- return false;
+ for (auto &VH : InsertedPostIncValues) {
+ Value *V = VH;
+ if (ReusedValues.contains(V))
+ continue;
+ if (auto *Inst = dyn_cast<Instruction>(V))
+ Result.push_back(Inst);
}
- /// This method returns the canonical induction variable of the specified
- /// type for the specified loop (inserting one if there is none). A
- /// canonical induction variable starts at zero and steps by one on each
- /// iteration.
- PHINode *getOrInsertCanonicalInductionVariable(const Loop *L, Type *Ty);
-
- /// Return the induction variable increment's IV operand.
- Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos,
- bool allowScale);
-
- /// Utility for hoisting an IV increment.
- bool hoistIVInc(Instruction *IncV, Instruction *InsertPos);
-
- /// replace congruent phis with their most canonical representative. Return
- /// the number of phis eliminated.
- unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts,
- const TargetTransformInfo *TTI = nullptr);
-
- /// Insert code to directly compute the specified SCEV expression into the
- /// program. The inserted code is inserted into the specified block.
- Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);
-
- /// Insert code to directly compute the specified SCEV expression into the
- /// program. The inserted code is inserted into the SCEVExpander's current
- /// insertion point. If a type is specified, the result will be expanded to
- /// have that type, with a cast if necessary.
- Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
-
-
- /// Generates a code sequence that evaluates this predicate. The inserted
- /// instructions will be at position \p Loc. The result will be of type i1
- /// and will have a value of 0 when the predicate is false and 1 otherwise.
- Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc);
-
- /// A specialized variant of expandCodeForPredicate, handling the case when
- /// we are expanding code for a SCEVEqualPredicate.
- Value *expandEqualPredicate(const SCEVEqualPredicate *Pred,
- Instruction *Loc);
-
- /// Generates code that evaluates if the \p AR expression will overflow.
- Value *generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc,
- bool Signed);
-
- /// A specialized variant of expandCodeForPredicate, handling the case when
- /// we are expanding code for a SCEVWrapPredicate.
- Value *expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc);
-
- /// A specialized variant of expandCodeForPredicate, handling the case when
- /// we are expanding code for a SCEVUnionPredicate.
- Value *expandUnionPredicate(const SCEVUnionPredicate *Pred,
- Instruction *Loc);
-
- /// Set the current IV increment loop and position.
- void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
- assert(!CanonicalMode &&
- "IV increment positions are not supported in CanonicalMode");
- IVIncInsertLoop = L;
- IVIncInsertPos = Pos;
- }
+ return Result;
+ }
- /// Enable post-inc expansion for addrecs referring to the given
- /// loops. Post-inc expansion is only supported in non-canonical mode.
- void setPostInc(const PostIncLoopSet &L) {
- assert(!CanonicalMode &&
- "Post-inc expansion is not supported in CanonicalMode");
- PostIncLoops = L;
+ /// Return true for expressions that can't be evaluated at runtime
+ /// within given \b Budget.
+ ///
+ /// At is a parameter which specifies point in code where user is going to
+ /// expand this expression. Sometimes this knowledge can lead to
+ /// a less pessimistic cost estimation.
+ bool isHighCostExpansion(const SCEV *Expr, Loop *L, unsigned Budget,
+ const TargetTransformInfo *TTI,
+ const Instruction *At) {
+ assert(TTI && "This function requires TTI to be provided.");
+ assert(At && "This function requires At instruction to be provided.");
+ if (!TTI) // In assert-less builds, avoid crashing
+ return true; // by always claiming to be high-cost.
+ SmallVector<SCEVOperand, 8> Worklist;
+ SmallPtrSet<const SCEV *, 8> Processed;
+ int BudgetRemaining = Budget * TargetTransformInfo::TCC_Basic;
+ Worklist.emplace_back(-1, -1, Expr);
+ while (!Worklist.empty()) {
+ const SCEVOperand WorkItem = Worklist.pop_back_val();
+ if (isHighCostExpansionHelper(WorkItem, L, *At, BudgetRemaining,
+ *TTI, Processed, Worklist))
+ return true;
}
+ assert(BudgetRemaining >= 0 && "Should have returned from inner loop.");
+ return false;
+ }
+
+ /// Return the induction variable increment's IV operand.
+ Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos,
+ bool allowScale);
+
+ /// Utility for hoisting an IV increment.
+ bool hoistIVInc(Instruction *IncV, Instruction *InsertPos);
+
+ /// replace congruent phis with their most canonical representative. Return
+ /// the number of phis eliminated.
+ unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts,
+ const TargetTransformInfo *TTI = nullptr);
+
+ /// Insert code to directly compute the specified SCEV expression into the
+ /// program. The code is inserted into the specified block.
+ Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I) {
+ return expandCodeForImpl(SH, Ty, I, true);
+ }
+
+ /// Insert code to directly compute the specified SCEV expression into the
+ /// program. The code is inserted into the SCEVExpander's current
+ /// insertion point. If a type is specified, the result will be expanded to
+ /// have that type, with a cast if necessary.
+ Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr) {
+ return expandCodeForImpl(SH, Ty, true);
+ }
+
+ /// Generates a code sequence that evaluates this predicate. The inserted
+ /// instructions will be at position \p Loc. The result will be of type i1
+ /// and will have a value of 0 when the predicate is false and 1 otherwise.
+ Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc);
+
+ /// A specialized variant of expandCodeForPredicate, handling the case when
+ /// we are expanding code for a SCEVEqualPredicate.
+ Value *expandEqualPredicate(const SCEVEqualPredicate *Pred, Instruction *Loc);
+
+ /// Generates code that evaluates if the \p AR expression will overflow.
+ Value *generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc,
+ bool Signed);
+
+ /// A specialized variant of expandCodeForPredicate, handling the case when
+ /// we are expanding code for a SCEVWrapPredicate.
+ Value *expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc);
+
+ /// A specialized variant of expandCodeForPredicate, handling the case when
+ /// we are expanding code for a SCEVUnionPredicate.
+ Value *expandUnionPredicate(const SCEVUnionPredicate *Pred, Instruction *Loc);
+
+ /// Set the current IV increment loop and position.
+ void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
+ assert(!CanonicalMode &&
+ "IV increment positions are not supported in CanonicalMode");
+ IVIncInsertLoop = L;
+ IVIncInsertPos = Pos;
+ }
+
+ /// Enable post-inc expansion for addrecs referring to the given
+ /// loops. Post-inc expansion is only supported in non-canonical mode.
+ void setPostInc(const PostIncLoopSet &L) {
+ assert(!CanonicalMode &&
+ "Post-inc expansion is not supported in CanonicalMode");
+ PostIncLoops = L;
+ }
+
+ /// Disable all post-inc expansion.
+ void clearPostInc() {
+ PostIncLoops.clear();
+
+ // When we change the post-inc loop set, cached expansions may no
+ // longer be valid.
+ InsertedPostIncValues.clear();
+ }
+
+ /// Disable the behavior of expanding expressions in canonical form rather
+ /// than in a more literal form. Non-canonical mode is useful for late
+ /// optimization passes.
+ void disableCanonicalMode() { CanonicalMode = false; }
+
+ void enableLSRMode() { LSRMode = true; }
+
+ /// Set the current insertion point. This is useful if multiple calls to
+ /// expandCodeFor() are going to be made with the same insert point and the
+ /// insert point may be moved during one of the expansions (e.g. if the
+ /// insert point is not a block terminator).
+ void setInsertPoint(Instruction *IP) {
+ assert(IP);
+ Builder.SetInsertPoint(IP);
+ }
+
+ /// Clear the current insertion point. This is useful if the instruction
+ /// that had been serving as the insertion point may have been deleted.
+ void clearInsertPoint() { Builder.ClearInsertionPoint(); }
+
+ /// Set location information used by debugging information.
+ void SetCurrentDebugLocation(DebugLoc L) {
+ Builder.SetCurrentDebugLocation(std::move(L));
+ }
+
+ /// Get location information used by debugging information.
+ DebugLoc getCurrentDebugLocation() const {
+ return Builder.getCurrentDebugLocation();
+ }
+
+ /// Return true if the specified instruction was inserted by the code
+ /// rewriter. If so, the client should not modify the instruction. Note that
+ /// this also includes instructions re-used during expansion.
+ bool isInsertedInstruction(Instruction *I) const {
+ return InsertedValues.count(I) || InsertedPostIncValues.count(I);
+ }
+
+ void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); }
+
+ /// Try to find the ValueOffsetPair for S. The function is mainly used to
+ /// check whether S can be expanded cheaply. If this returns a non-None
+ /// value, we know we can codegen the `ValueOffsetPair` into a suitable
+ /// expansion identical with S so that S can be expanded cheaply.
+ ///
+ /// L is a hint which tells in which loop to look for the suitable value.
+ /// On success return value which is equivalent to the expanded S at point
+ /// At. Return nullptr if value was not found.
+ ///
+ /// Note that this function does not perform an exhaustive search. I.e if it
+ /// didn't find any value it does not mean that there is no such value.
+ ///
+ Optional<ScalarEvolution::ValueOffsetPair>
+ getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L);
- /// Disable all post-inc expansion.
- void clearPostInc() {
- PostIncLoops.clear();
+ /// Returns a suitable insert point after \p I, that dominates \p
+ /// MustDominate. Skips instructions inserted by the expander.
+ BasicBlock::iterator findInsertPointAfter(Instruction *I,
+ Instruction *MustDominate);
- // When we change the post-inc loop set, cached expansions may no
- // longer be valid.
- InsertedPostIncValues.clear();
- }
+private:
+ LLVMContext &getContext() const { return SE.getContext(); }
- /// Disable the behavior of expanding expressions in canonical form rather
- /// than in a more literal form. Non-canonical mode is useful for late
- /// optimization passes.
- void disableCanonicalMode() { CanonicalMode = false; }
+ /// Insert code to directly compute the specified SCEV expression into the
+ /// program. The code is inserted into the SCEVExpander's current
+ /// insertion point. If a type is specified, the result will be expanded to
+ /// have that type, with a cast if necessary. If \p Root is true, this
+ /// indicates that \p SH is the top-level expression to expand passed from
+ /// an external client call.
+ Value *expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root);
- void enableLSRMode() { LSRMode = true; }
+ /// Insert code to directly compute the specified SCEV expression into the
+ /// program. The code is inserted into the specified block. If \p
+ /// Root is true, this indicates that \p SH is the top-level expression to
+ /// expand passed from an external client call.
+ Value *expandCodeForImpl(const SCEV *SH, Type *Ty, Instruction *I, bool Root);
- /// Set the current insertion point. This is useful if multiple calls to
- /// expandCodeFor() are going to be made with the same insert point and the
- /// insert point may be moved during one of the expansions (e.g. if the
- /// insert point is not a block terminator).
- void setInsertPoint(Instruction *IP) {
- assert(IP);
- Builder.SetInsertPoint(IP);
- }
+ /// Recursive helper function for isHighCostExpansion.
+ bool isHighCostExpansionHelper(
+ const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
+ int &BudgetRemaining, const TargetTransformInfo &TTI,
+ SmallPtrSetImpl<const SCEV *> &Processed,
+ SmallVectorImpl<SCEVOperand> &Worklist);
- /// Clear the current insertion point. This is useful if the instruction
- /// that had been serving as the insertion point may have been deleted.
- void clearInsertPoint() { Builder.ClearInsertionPoint(); }
+ /// Insert the specified binary operator, doing a small amount of work to
+ /// avoid inserting an obviously redundant operation, and hoisting to an
+ /// outer loop when the opportunity is there and it is safe.
+ Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
+ SCEV::NoWrapFlags Flags, bool IsSafeToHoist);
- /// Set location information used by debugging information.
- void SetCurrentDebugLocation(DebugLoc L) {
- Builder.SetCurrentDebugLocation(std::move(L));
- }
+ /// Arrange for there to be a cast of V to Ty at IP, reusing an existing
+ /// cast if a suitable one exists, moving an existing cast if a suitable one
+ /// exists but isn't in the right place, or creating a new one.
+ Value *ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op,
+ BasicBlock::iterator IP);
- /// Get location information used by debugging information.
- const DebugLoc &getCurrentDebugLocation() const {
- return Builder.getCurrentDebugLocation();
- }
+ /// Insert a cast of V to the specified type, which must be possible with a
+ /// noop cast, doing what we can to share the casts.
+ Value *InsertNoopCastOfTo(Value *V, Type *Ty);
- /// Return true if the specified instruction was inserted by the code
- /// rewriter. If so, the client should not modify the instruction.
- bool isInsertedInstruction(Instruction *I) const {
- return InsertedValues.count(I) || InsertedPostIncValues.count(I);
- }
+ /// Expand a SCEVAddExpr with a pointer type into a GEP instead of using
+ /// ptrtoint+arithmetic+inttoptr.
+ Value *expandAddToGEP(const SCEV *const *op_begin, const SCEV *const *op_end,
+ PointerType *PTy, Type *Ty, Value *V);
+ Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V);
- void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); }
-
- /// Try to find existing LLVM IR value for S available at the point At.
- Value *getExactExistingExpansion(const SCEV *S, const Instruction *At,
- Loop *L);
-
- /// Try to find the ValueOffsetPair for S. The function is mainly used to
- /// check whether S can be expanded cheaply. If this returns a non-None
- /// value, we know we can codegen the `ValueOffsetPair` into a suitable
- /// expansion identical with S so that S can be expanded cheaply.
- ///
- /// L is a hint which tells in which loop to look for the suitable value.
- /// On success return value which is equivalent to the expanded S at point
- /// At. Return nullptr if value was not found.
- ///
- /// Note that this function does not perform an exhaustive search. I.e if it
- /// didn't find any value it does not mean that there is no such value.
- ///
- Optional<ScalarEvolution::ValueOffsetPair>
- getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L);
-
- private:
- LLVMContext &getContext() const { return SE.getContext(); }
-
- /// Recursive helper function for isHighCostExpansion.
- bool isHighCostExpansionHelper(const SCEV *S, Loop *L,
- const Instruction &At, int &BudgetRemaining,
- const TargetTransformInfo &TTI,
- SmallPtrSetImpl<const SCEV *> &Processed,
- SmallVectorImpl<const SCEV *> &Worklist);
-
- /// Insert the specified binary operator, doing a small amount of work to
- /// avoid inserting an obviously redundant operation, and hoisting to an
- /// outer loop when the opportunity is there and it is safe.
- Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
- SCEV::NoWrapFlags Flags, bool IsSafeToHoist);
-
- /// Arrange for there to be a cast of V to Ty at IP, reusing an existing
- /// cast if a suitable one exists, moving an existing cast if a suitable one
- /// exists but isn't in the right place, or creating a new one.
- Value *ReuseOrCreateCast(Value *V, Type *Ty,
- Instruction::CastOps Op,
- BasicBlock::iterator IP);
-
- /// Insert a cast of V to the specified type, which must be possible with a
- /// noop cast, doing what we can to share the casts.
- Value *InsertNoopCastOfTo(Value *V, Type *Ty);
-
- /// Expand a SCEVAddExpr with a pointer type into a GEP instead of using
- /// ptrtoint+arithmetic+inttoptr.
- Value *expandAddToGEP(const SCEV *const *op_begin,
- const SCEV *const *op_end,
- PointerType *PTy, Type *Ty, Value *V);
- Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V);
-
- /// Find a previous Value in ExprValueMap for expand.
- ScalarEvolution::ValueOffsetPair
- FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
-
- Value *expand(const SCEV *S);
-
- /// Determine the most "relevant" loop for the given SCEV.
- const Loop *getRelevantLoop(const SCEV *);
-
- Value *visitConstant(const SCEVConstant *S) {
- return S->getValue();
- }
+ /// Find a previous Value in ExprValueMap for expand.
+ ScalarEvolution::ValueOffsetPair
+ FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
- Value *visitTruncateExpr(const SCEVTruncateExpr *S);
+ Value *expand(const SCEV *S);
- Value *visitZeroExtendExpr(const SCEVZeroExtendExpr *S);
+ /// Determine the most "relevant" loop for the given SCEV.
+ const Loop *getRelevantLoop(const SCEV *);
- Value *visitSignExtendExpr(const SCEVSignExtendExpr *S);
+ Value *visitConstant(const SCEVConstant *S) { return S->getValue(); }
- Value *visitAddExpr(const SCEVAddExpr *S);
+ Value *visitPtrToIntExpr(const SCEVPtrToIntExpr *S);
- Value *visitMulExpr(const SCEVMulExpr *S);
+ Value *visitTruncateExpr(const SCEVTruncateExpr *S);
- Value *visitUDivExpr(const SCEVUDivExpr *S);
+ Value *visitZeroExtendExpr(const SCEVZeroExtendExpr *S);
- Value *visitAddRecExpr(const SCEVAddRecExpr *S);
+ Value *visitSignExtendExpr(const SCEVSignExtendExpr *S);
- Value *visitSMaxExpr(const SCEVSMaxExpr *S);
+ Value *visitAddExpr(const SCEVAddExpr *S);
- Value *visitUMaxExpr(const SCEVUMaxExpr *S);
+ Value *visitMulExpr(const SCEVMulExpr *S);
- Value *visitSMinExpr(const SCEVSMinExpr *S);
+ Value *visitUDivExpr(const SCEVUDivExpr *S);
- Value *visitUMinExpr(const SCEVUMinExpr *S);
+ Value *visitAddRecExpr(const SCEVAddRecExpr *S);
- Value *visitUnknown(const SCEVUnknown *S) {
- return S->getValue();
- }
+ Value *visitSMaxExpr(const SCEVSMaxExpr *S);
- void rememberInstruction(Value *I);
+ Value *visitUMaxExpr(const SCEVUMaxExpr *S);
- bool isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L);
+ Value *visitSMinExpr(const SCEVSMinExpr *S);
- bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L);
+ Value *visitUMinExpr(const SCEVUMinExpr *S);
- Value *expandAddRecExprLiterally(const SCEVAddRecExpr *);
- PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
- const Loop *L,
- Type *ExpandTy,
- Type *IntTy,
- Type *&TruncTy,
- bool &InvertStep);
- Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
- Type *ExpandTy, Type *IntTy, bool useSubtract);
+ Value *visitUnknown(const SCEVUnknown *S) { return S->getValue(); }
- void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
- Instruction *Pos, PHINode *LoopPhi);
+ void rememberInstruction(Value *I);
- void fixupInsertPoints(Instruction *I);
- };
-}
+ bool isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L);
+
+ bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L);
+
+ Value *expandAddRecExprLiterally(const SCEVAddRecExpr *);
+ PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+ const Loop *L, Type *ExpandTy, Type *IntTy,
+ Type *&TruncTy, bool &InvertStep);
+ Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, Type *ExpandTy,
+ Type *IntTy, bool useSubtract);
+
+ void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
+ Instruction *Pos, PHINode *LoopPhi);
+
+ void fixupInsertPoints(Instruction *I);
+
+ /// If required, create LCSSA PHIs for \p Users' operand \p OpIdx. If new
+ /// LCSSA PHIs have been created, return the LCSSA PHI available at \p User.
+ /// If no PHIs have been created, return the unchanged operand \p OpIdx.
+ Value *fixupLCSSAFormFor(Instruction *User, unsigned OpIdx);
+};
+
+/// Helper to remove instructions inserted during SCEV expansion, unless they
+/// are marked as used.
+class SCEVExpanderCleaner {
+ SCEVExpander &Expander;
+
+ DominatorTree &DT;
+
+ /// Indicates whether the result of the expansion is used. If false, the
+ /// instructions added during expansion are removed.
+ bool ResultUsed;
+
+public:
+ SCEVExpanderCleaner(SCEVExpander &Expander, DominatorTree &DT)
+ : Expander(Expander), DT(DT), ResultUsed(false) {}
+
+ ~SCEVExpanderCleaner();
+
+ /// Indicate that the result of the expansion is used.
+ void markResultUsed() { ResultUsed = true; }
+};
+} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
new file mode 100644
index 000000000000..fb3a7490346f
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@@ -0,0 +1,77 @@
+//===- SimplifyCFGOptions.h - Control structure for SimplifyCFG -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A set of parameters used to control the transforms in the SimplifyCFG pass.
+// Options may change depending on the position in the optimization pipeline.
+// For example, canonical form that includes switches and branches may later be
+// replaced by lookup tables and selects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H
+#define LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H
+
+namespace llvm {
+
+class AssumptionCache;
+
+struct SimplifyCFGOptions {
+ int BonusInstThreshold = 1;
+ bool ForwardSwitchCondToPhi = false;
+ bool ConvertSwitchToLookupTable = false;
+ bool NeedCanonicalLoop = true;
+ bool HoistCommonInsts = false;
+ bool SinkCommonInsts = false;
+ bool SimplifyCondBranch = true;
+ bool FoldTwoEntryPHINode = true;
+
+ AssumptionCache *AC = nullptr;
+
+ // Support 'builder' pattern to set members by name at construction time.
+ SimplifyCFGOptions &bonusInstThreshold(int I) {
+ BonusInstThreshold = I;
+ return *this;
+ }
+ SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) {
+ ForwardSwitchCondToPhi = B;
+ return *this;
+ }
+ SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
+ ConvertSwitchToLookupTable = B;
+ return *this;
+ }
+ SimplifyCFGOptions &needCanonicalLoops(bool B) {
+ NeedCanonicalLoop = B;
+ return *this;
+ }
+ SimplifyCFGOptions &hoistCommonInsts(bool B) {
+ HoistCommonInsts = B;
+ return *this;
+ }
+ SimplifyCFGOptions &sinkCommonInsts(bool B) {
+ SinkCommonInsts = B;
+ return *this;
+ }
+ SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
+ AC = Cache;
+ return *this;
+ }
+ SimplifyCFGOptions &setSimplifyCondBranch(bool B) {
+ SimplifyCondBranch = B;
+ return *this;
+ }
+
+ SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) {
+ FoldTwoEntryPHINode = B;
+ return *this;
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
index 53b15e4aa66c..4ba56fb45afa 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -15,6 +15,8 @@
#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
#define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/ValueHandle.h"
namespace llvm {
@@ -57,6 +59,27 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
LoopInfo *LI, const TargetTransformInfo *TTI,
SmallVectorImpl<WeakTrackingVH> &Dead);
+/// Collect information about induction variables that are used by sign/zero
+/// extend operations. This information is recorded by CollectExtend and provides
+/// the input to WidenIV.
+struct WideIVInfo {
+ PHINode *NarrowIV = nullptr;
+
+ // Widest integer type created [sz]ext
+ Type *WidestNativeType = nullptr;
+
+ // Was a sext user seen before a zext?
+ bool IsSigned = false;
+};
+
+/// Widen Induction Variables - Extend the width of an IV to cover its
+/// widest uses.
+PHINode *createWideIV(const WideIVInfo &WI,
+ LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
+ DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
+ unsigned &NumElimExt, unsigned &NumWidened,
+ bool HasGuards, bool UsePostIncrementRanges);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index d6ee19365c72..8703434e1696 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -24,7 +24,6 @@ class CallInst;
class DataLayout;
class Instruction;
class IRBuilderBase;
-class TargetLibraryInfo;
class Function;
class OptimizationRemarkEmitter;
class BlockFrequencyInfo;
@@ -60,6 +59,7 @@ private:
Value *optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func);
Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func);
Value *optimizeStrLenChk(CallInst *CI, IRBuilderBase &B);
+ Value *optimizeMemPCpyChk(CallInst *CI, IRBuilderBase &B);
Value *optimizeMemCCpyChk(CallInst *CI, IRBuilderBase &B);
Value *optimizeSNPrintfChk(CallInst *CI, IRBuilderBase &B);
Value *optimizeSPrintfChk(CallInst *CI,IRBuilderBase &B);
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h
index 08d963475f23..3c1173b747d3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h
@@ -19,7 +19,6 @@
extern llvm::cl::opt<bool> EnablePGSO;
extern llvm::cl::opt<bool> PGSOLargeWorkingSetSizeOnly;
-extern llvm::cl::opt<bool> PGSOIRPassOrTestOnly;
extern llvm::cl::opt<bool> PGSOColdCodeOnly;
extern llvm::cl::opt<bool> PGSOColdCodeOnlyForInstrPGO;
extern llvm::cl::opt<bool> PGSOColdCodeOnlyForSamplePGO;
@@ -60,11 +59,6 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
return true;
if (!EnablePGSO)
return false;
- // Temporarily enable size optimizations only for the IR pass or test query
- // sites for gradual commit/rollout. This is to be removed later.
- if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass ||
- QueryType == PGSOQueryType::Test))
- return false;
if (isPGSOColdCodeOnly(PSI))
return AdapterT::isFunctionColdInCallGraph(F, PSI, *BFI);
if (PSI->hasSampleProfile())
@@ -85,11 +79,6 @@ bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryIn
return true;
if (!EnablePGSO)
return false;
- // Temporarily enable size optimizations only for the IR pass or test query
- // sites for gradual commit/rollout. This is to be removed later.
- if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass ||
- QueryType == PGSOQueryType::Test))
- return false;
if (isPGSOColdCodeOnly(PSI))
return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI);
if (PSI->hasSampleProfile())
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h
new file mode 100644
index 000000000000..13e6d8ac26a7
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h
@@ -0,0 +1,25 @@
+//===- StripGCRelocates.h - -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_STRIPGCRELOCATES_H
+#define LLVM_TRANSFORMS_UTILS_STRIPGCRELOCATES_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+class StripGCRelocates : public PassInfoMixin<StripGCRelocates> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_STRIPGCRELOCATES_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h
new file mode 100644
index 000000000000..20d0aabd2938
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h
@@ -0,0 +1,26 @@
+//===- StripNonLineTableDebugInfo.h - -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_STRIPNONLINETABLEDEBUGINFO_H
+#define LLVM_TRANSFORMS_UTILS_STRIPNONLINETABLEDEBUGINFO_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+class StripNonLineTableDebugInfoPass
+ : public PassInfoMixin<StripNonLineTableDebugInfoPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_STRIPNONLINETABLEDEBUGINFO_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index ff70446e163d..20b360212506 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -7,46 +7,39 @@
//===----------------------------------------------------------------------===//
//
// This pass is used to ensure that functions have at most one return and one
-// unwind instruction in them. Additionally, it keeps track of which node is
-// the new exit node of the CFG. If there are no return or unwind instructions
-// in the function, the getReturnBlock/getUnwindBlock methods will return a null
-// pointer.
+// unreachable instruction in them.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H
#define LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
namespace llvm {
class BasicBlock;
-struct UnifyFunctionExitNodes : public FunctionPass {
- BasicBlock *ReturnBlock = nullptr;
- BasicBlock *UnwindBlock = nullptr;
- BasicBlock *UnreachableBlock;
-
+class UnifyFunctionExitNodesLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- UnifyFunctionExitNodes();
+ UnifyFunctionExitNodesLegacyPass();
// We can preserve non-critical-edgeness when we unify function exit nodes
void getAnalysisUsage(AnalysisUsage &AU) const override;
- // getReturn|Unwind|UnreachableBlock - Return the new single (or nonexistent)
- // return, unwind, or unreachable basic blocks in the CFG.
- //
- BasicBlock *getReturnBlock() const { return ReturnBlock; }
- BasicBlock *getUnwindBlock() const { return UnwindBlock; }
- BasicBlock *getUnreachableBlock() const { return UnreachableBlock; }
-
bool runOnFunction(Function &F) override;
};
Pass *createUnifyFunctionExitNodesPass();
+class UnifyFunctionExitNodesPass
+ : public PassInfoMixin<UnifyFunctionExitNodesPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyLoopExits.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyLoopExits.h
new file mode 100644
index 000000000000..0b219cd12222
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyLoopExits.h
@@ -0,0 +1,22 @@
+//===- UnifyLoopExits.h - Redirect exiting edges to one block -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_UNIFYLOOPEXITS_H
+#define LLVM_TRANSFORMS_UTILS_UNIFYLOOPEXITS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class UnifyLoopExitsPass : public PassInfoMixin<UnifyLoopExitsPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_UNIFYLOOPEXITS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index bb3d02b95956..4254bd71a41c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -92,16 +92,6 @@ bool UnrollRuntimeLoopRemainder(
const TargetTransformInfo *TTI, bool PreserveLCSSA,
Loop **ResultLoop = nullptr);
-void computePeelCount(Loop *L, unsigned LoopSize,
- TargetTransformInfo::UnrollingPreferences &UP,
- TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, ScalarEvolution &SE);
-
-bool canPeel(Loop *L);
-
-bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
-
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
unsigned TripMultiple, bool UnrollRemainder,
LoopInfo *LI, ScalarEvolution *SE,
@@ -121,7 +111,6 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP,
-
bool &UseUpperBound);
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
@@ -138,12 +127,6 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
-TargetTransformInfo::PeelingPreferences
-gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
- const TargetTransformInfo &TTI,
- Optional<bool> UserAllowPeeling,
- Optional<bool> UserAllowProfileBasedPeeling);
-
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable, bool &Convergent,
const TargetTransformInfo &TTI,
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index c6c3450f7760..246db0fd2dd9 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -29,6 +29,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
namespace llvm {
@@ -43,8 +44,14 @@ namespace llvm {
/// for example 'force', means a decision has been made. So, we need to be
/// careful NOT to add them if the user hasn't specifically asked so.
class LoopVectorizeHints {
- enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
- HK_PREDICATE };
+ enum HintKind {
+ HK_WIDTH,
+ HK_UNROLL,
+ HK_FORCE,
+ HK_ISVECTORIZED,
+ HK_PREDICATE,
+ HK_SCALABLE
+ };
/// Hint - associates name and validation with the hint value.
struct Hint {
@@ -73,6 +80,9 @@ class LoopVectorizeHints {
/// Vector Predicate
Hint Predicate;
+ /// Says whether we should use fixed width or scalable vectorization.
+ Hint Scalable;
+
/// Return the loop metadata prefix.
static StringRef Prefix() { return "llvm.loop."; }
@@ -98,7 +108,9 @@ public:
/// Dumps all the hint information.
void emitRemarkWithHints() const;
- unsigned getWidth() const { return Width.Value; }
+ ElementCount getWidth() const {
+ return ElementCount::get(Width.Value, isScalable());
+ }
unsigned getInterleave() const { return Interleave.Value; }
unsigned getIsVectorized() const { return IsVectorized.Value; }
unsigned getPredicate() const { return Predicate.Value; }
@@ -109,6 +121,8 @@ public:
return (ForceKind)Force.Value;
}
+ bool isScalable() const { return Scalable.Value; }
+
/// If hints are provided that force vectorization, use the AlwaysPrint
/// pass name to force the frontend to print the diagnostic.
const char *vectorizeAnalysisPassName() const;
@@ -119,7 +133,9 @@ public:
// enabled by default because can be unsafe or inefficient. For example,
// reordering floating-point operations will change the way round-off
// error accumulates in the loop.
- return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
+ ElementCount EC = getWidth();
+ return getForce() == LoopVectorizeHints::FK_Enabled ||
+ EC.getKnownMinValue() > 1;
}
bool isPotentiallyUnsafe() const {
@@ -202,9 +218,10 @@ public:
Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
LoopInfo *LI, OptimizationRemarkEmitter *ORE,
LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
- AssumptionCache *AC)
+ AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
: TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
- GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
+ GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
+ BFI(BFI), PSI(PSI) {}
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
@@ -229,6 +246,7 @@ public:
/// Return true if we can vectorize this loop while folding its tail by
/// masking, and mark all respective loads/stores for masking.
+ /// This object's state is only modified iff this function returns true.
bool prepareToFoldTailByMasking();
/// Returns the primary induction variable.
@@ -287,6 +305,19 @@ public:
/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V);
+ /// A uniform memory op is a load or store which accesses the same memory
+ /// location on all lanes.
+ bool isUniformMemOp(Instruction &I) {
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ if (!Ptr)
+ return false;
+ // Note: There's nothing inherent which prevents predicated loads and
+ // stores from being uniform. The current lowering simply doesn't handle
+ // it; in particular, the cost model distinguishes scatter/gather from
+ // scalar w/predication, and we currently rely on the scalar path.
+ return isUniform(Ptr) && !blockNeedsPredication(I.getParent());
+ }
+
/// Returns the information that we collected about runtime memory check.
const RuntimePointerChecking *getRuntimePointerChecking() const {
return LAI->getRuntimePointerChecking();
@@ -294,17 +325,21 @@ public:
const LoopAccessInfo *getLAI() const { return LAI; }
+ bool isSafeForAnyVectorWidth() const {
+ return LAI->getDepChecker().isSafeForAnyVectorWidth();
+ }
+
unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
- uint64_t getMaxSafeRegisterWidth() const {
- return LAI->getDepChecker().getMaxSafeRegisterWidth();
+ uint64_t getMaxSafeVectorWidthInBits() const {
+ return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
}
bool hasStride(Value *V) { return LAI->hasStride(V); }
/// Returns true if vector representation of the instruction \p I
/// requires mask.
- bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
+ bool isMaskRequired(const Instruction *I) { return MaskedOp.contains(I); }
unsigned getNumStores() const { return LAI->getNumStores(); }
unsigned getNumLoads() const { return LAI->getNumLoads(); }
@@ -361,16 +396,17 @@ private:
bool canVectorizeOuterLoop();
/// Return true if all of the instructions in the block can be speculatively
- /// executed, and record the loads/stores that require masking. If's that
- /// guard loads can be ignored under "assume safety" unless \p PreserveGuards
- /// is true. This can happen when we introduces guards for which the original
- /// "unguarded-loads are safe" assumption does not hold. For example, the
- /// vectorizer's fold-tail transformation changes the loop to execute beyond
- /// its original trip-count, under a proper guard, which should be preserved.
+ /// executed, and record the loads/stores that require masking.
/// \p SafePtrs is a list of addresses that are known to be legal and we know
/// that we can read from them without segfault.
- bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
- bool PreserveGuards = false);
+ /// \p MaskedOp is a list of instructions that have to be transformed into
+ /// calls to the appropriate masked intrinsic when the loop is vectorized.
+ /// \p ConditionalAssumes is a list of assume instructions in predicated
+ /// blocks that must be dropped if the CFG gets flattened.
+ bool blockCanBePredicated(
+ BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
+ SmallPtrSetImpl<const Instruction *> &MaskedOp,
+ SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const;
/// Updates the vectorization state by adding \p Phi to the inductions list.
/// This can set \p Phi as the main induction of the loop if \p Phi is a
@@ -478,6 +514,10 @@ private:
/// Assume instructions in predicated blocks must be dropped if the CFG gets
/// flattened.
SmallPtrSet<Instruction *, 8> ConditionalAssumes;
+
+ /// BFI and PSI are used to check for profile guided size optimizations.
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 77236dec75dc..52a57939209c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -22,11 +22,11 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class AAResults;
class AssumptionCache;
class BasicBlock;
class CmpInst;
@@ -34,6 +34,7 @@ class DataLayout;
class DemandedBits;
class DominatorTree;
class Function;
+class GetElementPtrInst;
class InsertElementInst;
class InsertValueInst;
class Instruction;
@@ -63,7 +64,7 @@ struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
ScalarEvolution *SE = nullptr;
TargetTransformInfo *TTI = nullptr;
TargetLibraryInfo *TLI = nullptr;
- AliasAnalysis *AA = nullptr;
+ AAResults *AA = nullptr;
LoopInfo *LI = nullptr;
DominatorTree *DT = nullptr;
AssumptionCache *AC = nullptr;
@@ -75,7 +76,7 @@ public:
// Glue for old PM.
bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_,
- TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_,
+ TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_,
DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_,
OptimizationRemarkEmitter *ORE_);
diff --git a/contrib/llvm-project/llvm/include/llvm/module.modulemap b/contrib/llvm-project/llvm/include/llvm/module.modulemap
index 778a17c8aeee..a199f7f2d79a 100644
--- a/contrib/llvm-project/llvm/include/llvm/module.modulemap
+++ b/contrib/llvm-project/llvm/include/llvm/module.modulemap
@@ -30,6 +30,7 @@ module LLVM_Backend {
// These are intended for (repeated) textual inclusion.
textual header "CodeGen/DIEValue.def"
+ textual header "CodeGen/MachinePassRegistry.def"
}
}
@@ -65,6 +66,7 @@ module LLVM_BinaryFormat {
textual header "BinaryFormat/ELFRelocs/ARC.def"
textual header "BinaryFormat/ELFRelocs/AVR.def"
textual header "BinaryFormat/ELFRelocs/BPF.def"
+ textual header "BinaryFormat/ELFRelocs/CSKY.def"
textual header "BinaryFormat/ELFRelocs/Hexagon.def"
textual header "BinaryFormat/ELFRelocs/i386.def"
textual header "BinaryFormat/ELFRelocs/Lanai.def"
@@ -188,23 +190,30 @@ module LLVM_ExecutionEngine {
exclude header "ExecutionEngine/Orc/RemoteObjectLayer.h"
// Exclude headers from LLVM_OrcSupport.
- exclude header "ExecutionEngine/Orc/OrcError.h"
+ exclude header "ExecutionEngine/Orc/Shared/OrcError.h"
exclude header "ExecutionEngine/Orc/RPC/RPCUtils.h"
exclude header "ExecutionEngine/Orc/RPC/RPCSerialization.h"
exclude header "ExecutionEngine/Orc/RPC/RawByteChannel.h"
}
+module LLVM_FileCheck {
+ requires cplusplus
+
+ umbrella "FileCheck"
+ module * { export * }
+}
+
// Orc utilities that don't depend only on Support (not ExecutionEngine or
// IR). This is a workaround for ExecutionEngine's broken layering, and will
// be removed in the future.
module LLVM_OrcSupport {
requires cplusplus
- header "ExecutionEngine/Orc/OrcError.h"
- header "ExecutionEngine/Orc/RPC/RPCUtils.h"
- header "ExecutionEngine/Orc/RPC/RPCSerialization.h"
- header "ExecutionEngine/Orc/RPC/RawByteChannel.h"
+ header "ExecutionEngine/Orc/Shared/OrcError.h"
+ header "ExecutionEngine/Orc/Shared/RPCUtils.h"
+ header "ExecutionEngine/Orc/Shared/Serialization.h"
+ header "ExecutionEngine/Orc/Shared/RawByteChannel.h"
export *
}
@@ -253,6 +262,7 @@ module LLVM_intrinsic_gen {
module IR_CFG { header "IR/CFG.h" export * }
module IR_ConstantRange { header "IR/ConstantRange.h" export * }
module IR_Dominators { header "IR/Dominators.h" export * }
+ module IR_FixedPointBuilder { header "IR/FixedPointBuilder.h" export * }
module Analysis_PostDominators { header "Analysis/PostDominators.h" export * }
module Analysis_DomTreeUpdater { header "Analysis/DomTreeUpdater.h" export * }
module IR_IRBuilder { header "IR/IRBuilder.h" export * }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
index fec2415a0e45..fae7a84332fd 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -24,6 +24,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
@@ -54,12 +55,17 @@
#include <functional>
#include <iterator>
+#define DEBUG_TYPE "aa"
+
using namespace llvm;
+STATISTIC(NumNoAlias, "Number of NoAlias results");
+STATISTIC(NumMayAlias, "Number of MayAlias results");
+STATISTIC(NumMustAlias, "Number of MustAlias results");
+
/// Allow disabling BasicAA from the AA results. This is particularly useful
/// when testing to isolate a single AA implementation.
-static cl::opt<bool> DisableBasicAA("disable-basic-aa", cl::Hidden,
- cl::init(false));
+cl::opt<bool> DisableBasicAA("disable-basic-aa", cl::Hidden, cl::init(false));
AAResults::AAResults(AAResults &&Arg)
: TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) {
@@ -110,12 +116,25 @@ AliasResult AAResults::alias(const MemoryLocation &LocA,
AliasResult AAResults::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB, AAQueryInfo &AAQI) {
+ AliasResult Result = MayAlias;
+
+ Depth++;
for (const auto &AA : AAs) {
- auto Result = AA->alias(LocA, LocB, AAQI);
+ Result = AA->alias(LocA, LocB, AAQI);
if (Result != MayAlias)
- return Result;
+ break;
+ }
+ Depth--;
+
+ if (Depth == 0) {
+ if (Result == NoAlias)
+ ++NumNoAlias;
+ else if (Result == MustAlias)
+ ++NumMustAlias;
+ else
+ ++NumMayAlias;
}
- return MayAlias;
+ return Result;
}
bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
@@ -215,7 +234,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
unsigned ArgIdx = std::distance(Call->arg_begin(), AI);
MemoryLocation ArgLoc =
MemoryLocation::getForArgument(Call, ArgIdx, TLI);
- AliasResult ArgAlias = alias(ArgLoc, Loc);
+ AliasResult ArgAlias = alias(ArgLoc, Loc, AAQI);
if (ArgAlias != NoAlias) {
ModRefInfo ArgMask = getArgModRefInfo(Call, ArgIdx);
AllArgsMask = unionModRef(AllArgsMask, ArgMask);
@@ -235,7 +254,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
// If Loc is a constant memory location, the call definitely could not
// modify the memory location.
- if (isModSet(Result) && pointsToConstantMemory(Loc, /*OrLocal*/ false))
+ if (isModSet(Result) && pointsToConstantMemory(Loc, AAQI, /*OrLocal*/ false))
Result = clearMod(Result);
return Result;
@@ -312,7 +331,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
// ModRefC1 indicates what Call1 might do to Call2ArgLoc, and we use
// above ArgMask to update dependence info.
- ModRefInfo ModRefC1 = getModRefInfo(Call1, Call2ArgLoc);
+ ModRefInfo ModRefC1 = getModRefInfo(Call1, Call2ArgLoc, AAQI);
ArgMask = intersectModRef(ArgMask, ModRefC1);
// Conservatively clear IsMustAlias unless only MustAlias is found.
@@ -353,7 +372,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
// might Mod Call1ArgLoc, then we care about either a Mod or a Ref by
// Call2. If Call1 might Ref, then we care only about a Mod by Call2.
ModRefInfo ArgModRefC1 = getArgModRefInfo(Call1, Call1ArgIdx);
- ModRefInfo ModRefC2 = getModRefInfo(Call2, Call1ArgLoc);
+ ModRefInfo ModRefC2 = getModRefInfo(Call2, Call1ArgLoc, AAQI);
if ((isModSet(ArgModRefC1) && isModOrRefSet(ModRefC2)) ||
(isRefSet(ArgModRefC1) && isModSet(ModRefC2)))
R = intersectModRef(unionModRef(R, ArgModRefC1), Result);
@@ -628,6 +647,43 @@ ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
return ModRefInfo::ModRef;
}
+ModRefInfo AAResults::getModRefInfo(const Instruction *I,
+ const Optional<MemoryLocation> &OptLoc,
+ AAQueryInfo &AAQIP) {
+ if (OptLoc == None) {
+ if (const auto *Call = dyn_cast<CallBase>(I)) {
+ return createModRefInfo(getModRefBehavior(Call));
+ }
+ }
+
+ const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
+
+ switch (I->getOpcode()) {
+ case Instruction::VAArg:
+ return getModRefInfo((const VAArgInst *)I, Loc, AAQIP);
+ case Instruction::Load:
+ return getModRefInfo((const LoadInst *)I, Loc, AAQIP);
+ case Instruction::Store:
+ return getModRefInfo((const StoreInst *)I, Loc, AAQIP);
+ case Instruction::Fence:
+ return getModRefInfo((const FenceInst *)I, Loc, AAQIP);
+ case Instruction::AtomicCmpXchg:
+ return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP);
+ case Instruction::AtomicRMW:
+ return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP);
+ case Instruction::Call:
+ return getModRefInfo((const CallInst *)I, Loc, AAQIP);
+ case Instruction::Invoke:
+ return getModRefInfo((const InvokeInst *)I, Loc, AAQIP);
+ case Instruction::CatchPad:
+ return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP);
+ case Instruction::CatchRet:
+ return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP);
+ default:
+ return ModRefInfo::NoModRef;
+ }
+}
+
/// Return information about whether a particular call site modifies
/// or reads the specified memory location \p MemLoc before instruction \p I
/// in a BasicBlock.
@@ -641,8 +697,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
if (!DT)
return ModRefInfo::ModRef;
- const Value *Object =
- GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout());
+ const Value *Object = getUnderlyingObject(MemLoc.Ptr);
if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
isa<Constant>(Object))
return ModRefInfo::ModRef;
@@ -670,7 +725,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
!Call->isByValArgument(ArgNo)))
continue;
- AliasResult AR = alias(MemoryLocation(*CI), MemoryLocation(Object));
+ AliasResult AR = alias(*CI, Object);
// If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
@@ -828,8 +883,8 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) {
void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<BasicAAWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequiredTransitive<BasicAAWrapperPass>();
+ AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
// We also need to mark all the alias analysis passes we will potentially
// probe in runOnFunction as used here to ensure the legacy pass manager
@@ -845,6 +900,13 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addUsedIfAvailable<ExternalAAWrapperPass>();
}
+AAManager::Result AAManager::run(Function &F, FunctionAnalysisManager &AM) {
+ Result R(AM.getResult<TargetLibraryAnalysis>(F));
+ for (auto &Getter : ResultGetters)
+ (*Getter)(F, AM, R);
+ return R;
+}
+
AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
BasicAAResult &BAR) {
AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
@@ -881,9 +943,9 @@ bool llvm::isNoAliasCall(const Value *V) {
return false;
}
-bool llvm::isNoAliasArgument(const Value *V) {
+static bool isNoAliasOrByValArgument(const Value *V) {
if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasNoAliasAttr();
+ return A->hasNoAliasAttr() || A->hasByValAttr();
return false;
}
@@ -894,13 +956,13 @@ bool llvm::isIdentifiedObject(const Value *V) {
return true;
if (isNoAliasCall(V))
return true;
- if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasNoAliasAttr() || A->hasByValAttr();
+ if (isNoAliasOrByValArgument(V))
+ return true;
return false;
}
bool llvm::isIdentifiedFunctionLocal(const Value *V) {
- return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V);
+ return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V);
}
void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index b1433c579af8..bbfa82bcca6a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -140,13 +140,13 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
// iterate over the worklist, and run the full (n^2)/2 disambiguations
for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
I1 != E; ++I1) {
- auto I1Size = LocationSize::unknown();
+ auto I1Size = LocationSize::afterPointer();
Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
if (I1ElTy->isSized())
I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy));
for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
- auto I2Size = LocationSize::unknown();
+ auto I2Size = LocationSize::afterPointer();
Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType();
if (I2ElTy->isSized())
I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy));
@@ -231,7 +231,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
// Mod/ref alias analysis: compare all pairs of calls and values
for (CallBase *Call : Calls) {
for (auto Pointer : Pointers) {
- auto Size = LocationSize::unknown();
+ auto Size = LocationSize::afterPointer();
Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType();
if (ElTy->isSized())
Size = LocationSize::precise(DL.getTypeStoreSize(ElTy));
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
index 5cc68f05dc0e..486b4d99dfae 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -21,24 +20,20 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstdint>
-#include <vector>
using namespace llvm;
@@ -88,7 +83,7 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
addRef();
}
} else if (ASHadUnknownInsts) {
- UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end());
+ llvm::append_range(UnknownInsts, AS.UnknownInsts);
AS.UnknownInsts.clear();
}
@@ -443,7 +438,9 @@ void AliasSetTracker::addUnknown(Instruction *Inst) {
break;
// FIXME: Add lifetime/invariant intrinsics (See: PR30807).
case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::sideeffect:
+ case Intrinsic::pseudoprobe:
return;
}
}
@@ -675,8 +672,10 @@ void AliasSet::print(raw_ostream &OS) const {
for (iterator I = begin(), E = end(); I != E; ++I) {
if (I != begin()) OS << ", ";
I.getPointer()->printAsOperand(OS << "(");
- if (I.getSize() == LocationSize::unknown())
- OS << ", unknown)";
+ if (I.getSize() == LocationSize::afterPointer())
+ OS << ", unknown after)";
+ else if (I.getSize() == LocationSize::beforeOrAfterPointer())
+ OS << ", unknown before-or-after)";
else
OS << ", " << I.getSize() << ")";
}
@@ -740,8 +739,6 @@ AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
namespace {
class AliasSetPrinter : public FunctionPass {
- AliasSetTracker *Tracker;
-
public:
static char ID; // Pass identification, replacement for typeid
@@ -756,12 +753,11 @@ namespace {
bool runOnFunction(Function &F) override {
auto &AAWP = getAnalysis<AAResultsWrapperPass>();
- Tracker = new AliasSetTracker(AAWP.getAAResults());
+ AliasSetTracker Tracker(AAWP.getAAResults());
errs() << "Alias sets for function '" << F.getName() << "':\n";
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
- Tracker->add(&*I);
- Tracker->print(errs());
- delete Tracker;
+ Tracker.add(&*I);
+ Tracker.print(errs());
return false;
}
};
@@ -775,3 +771,16 @@ INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
"Alias Set Printer", false, true)
+
+AliasSetsPrinterPass::AliasSetsPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+PreservedAnalyses AliasSetsPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &AA = AM.getResult<AAManager>(F);
+ AliasSetTracker Tracker(AA);
+ OS << "Alias sets for function '" << F.getName() << "':\n";
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ Tracker.add(&*I);
+ Tracker.print(OS);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp
index af718526684b..db5167061509 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp
@@ -50,19 +50,20 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeAAResultsWrapperPassPass(Registry);
initializeGlobalsAAWrapperPassPass(Registry);
initializeIVUsersWrapperPassPass(Registry);
- initializeInstCountPass(Registry);
+ initializeInstCountLegacyPassPass(Registry);
initializeIntervalPartitionPass(Registry);
+ initializeIRSimilarityIdentifierWrapperPassPass(Registry);
initializeLazyBranchProbabilityInfoPassPass(Registry);
initializeLazyBlockFrequencyInfoPassPass(Registry);
initializeLazyValueInfoWrapperPassPass(Registry);
initializeLazyValueInfoPrinterPass(Registry);
initializeLegacyDivergenceAnalysisPass(Registry);
- initializeLintPass(Registry);
+ initializeLintLegacyPassPass(Registry);
initializeLoopInfoWrapperPassPass(Registry);
initializeMemDepPrinterPass(Registry);
initializeMemDerefPrinterPass(Registry);
initializeMemoryDependenceWrapperPassPass(Registry);
- initializeModuleDebugInfoPrinterPass(Registry);
+ initializeModuleDebugInfoLegacyPrinterPass(Registry);
initializeModuleSummaryIndexWrapperPassPass(Registry);
initializeMustExecutePrinterPass(Registry);
initializeMustBeExecutedContextPrinterPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp b/contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp
index 972d0d3ea7f2..0084e2f13f5f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -108,10 +108,17 @@ llvm::getKnowledgeFromBundle(CallInst &Assume,
Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey());
if (bundleHasArgument(BOI, ABA_WasOn))
Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn);
+ auto GetArgOr1 = [&](unsigned Idx) -> unsigned {
+ if (auto *ConstInt = dyn_cast<ConstantInt>(
+ getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx)))
+ return ConstInt->getZExtValue();
+ return 1;
+ };
if (BOI.End - BOI.Begin > ABA_Argument)
- Result.ArgValue =
- cast<ConstantInt>(getValueFromBundleOpInfo(Assume, BOI, ABA_Argument))
- ->getZExtValue();
+ Result.ArgValue = GetArgOr1(0);
+ if (Result.AttrKind == Attribute::Alignment)
+ if (BOI.End - BOI.Begin > ABA_Argument + 1)
+ Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1));
return Result;
}
@@ -172,12 +179,15 @@ llvm::getKnowledgeForValue(const Value *V,
if (!II || Elem.Index == AssumptionCache::ExprResultIdx)
continue;
if (RetainedKnowledge RK = getKnowledgeFromBundle(
- *II, II->bundle_op_info_begin()[Elem.Index]))
+ *II, II->bundle_op_info_begin()[Elem.Index])) {
+ if (V != RK.WasOn)
+ continue;
if (is_contained(AttrKinds, RK.AttrKind) &&
Filter(RK, II, &II->bundle_op_info_begin()[Elem.Index])) {
NumUsefullAssumeQueries++;
return RK;
}
+ }
}
return RetainedKnowledge::none();
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
index 16bfd5c75902..70053fdf8d30 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
@@ -102,13 +102,12 @@ findAffectedValues(CallInst *CI,
}
Value *B;
- ConstantInt *C;
// (A & B) or (A | B) or (A ^ B).
if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) {
AddAffected(A);
AddAffected(B);
// (A << C) or (A >>_s C) or (A >>_u C) where C is some constant.
- } else if (match(V, m_Shift(m_Value(A), m_ConstantInt(C)))) {
+ } else if (match(V, m_Shift(m_Value(A), m_ConstantInt()))) {
AddAffected(A);
}
};
@@ -116,6 +115,14 @@ findAffectedValues(CallInst *CI,
AddAffectedFromEq(A);
AddAffectedFromEq(B);
}
+
+ Value *X;
+ // Handle (A + C1) u< C2, which is the canonical form of A > C3 && A < C4,
+ // and recognized by LVI at least.
+ if (Pred == ICmpInst::ICMP_ULT &&
+ match(A, m_Add(m_Value(X), m_ConstantInt())) &&
+ match(B, m_ConstantInt()))
+ AddAffected(X);
}
}
@@ -156,15 +163,11 @@ void AssumptionCache::unregisterAssumption(CallInst *CI) {
AffectedValues.erase(AVI);
}
- AssumeHandles.erase(
- remove_if(AssumeHandles, [CI](ResultElem &RE) { return CI == RE; }),
- AssumeHandles.end());
+ erase_value(AssumeHandles, CI);
}
void AssumptionCache::AffectedValueCallbackVH::deleted() {
- auto AVI = AC->AffectedValues.find(getValPtr());
- if (AVI != AC->AffectedValues.end())
- AC->AffectedValues.erase(AVI);
+ AC->AffectedValues.erase(getValPtr());
// 'this' now dangles!
}
@@ -175,7 +178,7 @@ void AssumptionCache::transferAffectedValuesInCache(Value *OV, Value *NV) {
return;
for (auto &A : AVI->second)
- if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end())
+ if (!llvm::is_contained(NAVV, A))
NAVV.push_back(A);
AffectedValues.erase(OV);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 33f122728d2a..97d0cb63ef99 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -66,7 +67,7 @@ using namespace llvm;
/// Enable analysis of recursive PHI nodes.
static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
- cl::init(false));
+ cl::init(true));
/// By default, even on 32-bit architectures we use 64-bit integers for
/// calculations. This will allow us to more-aggressively decompose indexing
@@ -91,7 +92,7 @@ STATISTIC(SearchTimes, "Number of times a GEP is decomposed");
const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
// The max limit of the search depth in DecomposeGEPExpression() and
-// GetUnderlyingObject(), both functions need to use the same search
+// getUnderlyingObject(), both functions need to use the same search
// depth otherwise the algorithm in aliasGEP will assert.
static const unsigned MaxLookupSearchDepth = 6;
@@ -115,50 +116,6 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
// Useful predicates
//===----------------------------------------------------------------------===//
-/// Returns true if the pointer is to a function-local object that never
-/// escapes from the function.
-static bool isNonEscapingLocalObject(
- const Value *V,
- SmallDenseMap<const Value *, bool, 8> *IsCapturedCache = nullptr) {
- SmallDenseMap<const Value *, bool, 8>::iterator CacheIt;
- if (IsCapturedCache) {
- bool Inserted;
- std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false});
- if (!Inserted)
- // Found cached result, return it!
- return CacheIt->second;
- }
-
- // If this is a local allocation, check to see if it escapes.
- if (isa<AllocaInst>(V) || isNoAliasCall(V)) {
- // Set StoreCaptures to True so that we can assume in our callers that the
- // pointer is not the result of a load instruction. Currently
- // PointerMayBeCaptured doesn't have any special analysis for the
- // StoreCaptures=false case; if it did, our callers could be refined to be
- // more precise.
- auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
- if (IsCapturedCache)
- CacheIt->second = Ret;
- return Ret;
- }
-
- // If this is an argument that corresponds to a byval or noalias argument,
- // then it has not escaped before entering the function. Check if it escapes
- // inside the function.
- if (const Argument *A = dyn_cast<Argument>(V))
- if (A->hasByValAttr() || A->hasNoAliasAttr()) {
- // Note even if the argument is marked nocapture, we still need to check
- // for copies made inside the function. The nocapture attribute only
- // specifies that there are no copies made that outlive the function.
- auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
- if (IsCapturedCache)
- CacheIt->second = Ret;
- return Ret;
- }
-
- return false;
-}
-
/// Returns true if the pointer is one which would have been considered an
/// escape by isNonEscapingLocalObject.
static bool isEscapeSource(const Value *V) {
@@ -455,20 +412,22 @@ static unsigned getMaxPointerSize(const DataLayout &DL) {
/// specified amount, but which may have other unrepresented high bits. As
/// such, the gep cannot necessarily be reconstructed from its decomposed form.
///
-/// When DataLayout is around, this function is capable of analyzing everything
-/// that GetUnderlyingObject can look through. To be able to do that
-/// GetUnderlyingObject and DecomposeGEPExpression must use the same search
-/// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks
-/// through pointer casts.
-bool BasicAAResult::DecomposeGEPExpression(const Value *V,
- DecomposedGEP &Decomposed, const DataLayout &DL, AssumptionCache *AC,
- DominatorTree *DT) {
+/// This function is capable of analyzing everything that getUnderlyingObject
+/// can look through. To be able to do that getUnderlyingObject and
+/// DecomposeGEPExpression must use the same search depth
+/// (MaxLookupSearchDepth).
+BasicAAResult::DecomposedGEP
+BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
+ AssumptionCache *AC, DominatorTree *DT) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = MaxLookupSearchDepth;
SearchTimes++;
+ const Instruction *CxtI = dyn_cast<Instruction>(V);
unsigned MaxPointerSize = getMaxPointerSize(DL);
- Decomposed.VarIndices.clear();
+ DecomposedGEP Decomposed;
+ Decomposed.Offset = APInt(MaxPointerSize, 0);
+ Decomposed.HasCompileTimeConstantScale = true;
do {
// See if this is a bitcast or GEP.
const Operator *Op = dyn_cast<Operator>(V);
@@ -481,7 +440,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
}
}
Decomposed.Base = V;
- return false;
+ return Decomposed;
}
if (Op->getOpcode() == Instruction::BitCast ||
@@ -515,13 +474,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
}
Decomposed.Base = V;
- return false;
+ return Decomposed;
}
// Don't attempt to analyze GEPs over unsized objects.
if (!GEPOp->getSourceElementType()->isSized()) {
Decomposed.Base = V;
- return false;
+ return Decomposed;
}
// Don't attempt to analyze GEPs if index scale is not a compile-time
@@ -529,7 +488,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
if (isa<ScalableVectorType>(GEPOp->getSourceElementType())) {
Decomposed.Base = V;
Decomposed.HasCompileTimeConstantScale = false;
- return false;
+ return Decomposed;
}
unsigned AS = GEPOp->getPointerAddressSpace();
@@ -548,8 +507,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
if (FieldNo == 0)
continue;
- Decomposed.StructOffset +=
- DL.getStructLayout(STy)->getElementOffset(FieldNo);
+ Decomposed.Offset += DL.getStructLayout(STy)->getElementOffset(FieldNo);
continue;
}
@@ -557,10 +515,9 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
if (CIdx->isZero())
continue;
- Decomposed.OtherOffset +=
- (DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize() *
- CIdx->getValue().sextOrSelf(MaxPointerSize))
- .sextOrTrunc(MaxPointerSize);
+ Decomposed.Offset +=
+ DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize() *
+ CIdx->getValue().sextOrTrunc(MaxPointerSize);
continue;
}
@@ -593,9 +550,10 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// FIXME: C1*Scale and the other operations in the decomposed
// (C1*Scale)*V+C2*Scale can also overflow. We should check for this
// possibility.
- APInt WideScaledOffset = IndexOffset.sextOrTrunc(MaxPointerSize*2) *
- Scale.sext(MaxPointerSize*2);
- if (WideScaledOffset.getMinSignedBits() > MaxPointerSize) {
+ bool Overflow;
+ APInt ScaledOffset = IndexOffset.sextOrTrunc(MaxPointerSize)
+ .smul_ov(Scale, Overflow);
+ if (Overflow) {
Index = OrigIndex;
IndexScale = 1;
IndexOffset = 0;
@@ -604,7 +562,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
if (PointerSize > Width)
SExtBits += PointerSize - Width;
} else {
- Decomposed.OtherOffset += IndexOffset.sextOrTrunc(MaxPointerSize) * Scale;
+ Decomposed.Offset += ScaledOffset;
Scale *= IndexScale.sextOrTrunc(MaxPointerSize);
}
@@ -627,18 +585,14 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
Scale = adjustToPointerSize(Scale, PointerSize);
if (!!Scale) {
- VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, Scale};
+ VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, Scale, CxtI};
Decomposed.VarIndices.push_back(Entry);
}
}
// Take care of wrap-arounds
- if (GepHasConstantOffset) {
- Decomposed.StructOffset =
- adjustToPointerSize(Decomposed.StructOffset, PointerSize);
- Decomposed.OtherOffset =
- adjustToPointerSize(Decomposed.OtherOffset, PointerSize);
- }
+ if (GepHasConstantOffset)
+ Decomposed.Offset = adjustToPointerSize(Decomposed.Offset, PointerSize);
// Analyze the base pointer next.
V = GEPOp->getOperand(0);
@@ -647,7 +601,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// If the chain of expressions is too deep, just return early.
Decomposed.Base = V;
SearchLimitReached++;
- return true;
+ return Decomposed;
}
/// Returns whether the given pointer value points to memory that is local to
@@ -661,7 +615,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
SmallVector<const Value *, 16> Worklist;
Worklist.push_back(Loc.Ptr);
do {
- const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
+ const Value *V = getUnderlyingObject(Worklist.pop_back_val());
if (!Visited.insert(V).second) {
Visited.clear();
return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
@@ -698,8 +652,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
Visited.clear();
return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
- for (Value *IncValue : PN->incoming_values())
- Worklist.push_back(IncValue);
+ append_range(Worklist, PN->incoming_values());
continue;
}
@@ -844,23 +797,8 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
AAQueryInfo &AAQI) {
assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
-
- // If we have a directly cached entry for these locations, we have recursed
- // through this once, so just return the cached results. Notably, when this
- // happens, we don't clear the cache.
- auto CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocA, LocB));
- if (CacheIt != AAQI.AliasCache.end())
- return CacheIt->second;
-
- CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocB, LocA));
- if (CacheIt != AAQI.AliasCache.end())
- return CacheIt->second;
-
- AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr,
- LocB.Size, LocB.AATags, AAQI);
-
- VisitedPhiBBs.clear();
- return Alias;
+ return aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, LocB.Size,
+ LocB.AATags, AAQI);
}
/// Checks to see if the specified callsite can clobber the specified memory
@@ -875,7 +813,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
assert(notDifferentParent(Call, Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
- const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
+ const Value *Object = getUnderlyingObject(Loc.Ptr);
// Calls marked 'tail' cannot read or write allocas from the current frame
// because the current frame might be destroyed by the time they run. However,
@@ -924,8 +862,9 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking.
- AliasResult AR = getBestAAResults().alias(MemoryLocation(*CI),
- MemoryLocation(Object), AAQI);
+ AliasResult AR = getBestAAResults().alias(
+ MemoryLocation::getBeforeOrAfter(*CI),
+ MemoryLocation::getBeforeOrAfter(Object), AAQI);
if (AR != MustAlias)
IsMustAlias = false;
// Operand doesn't alias 'Object', continue looking for other aliases
@@ -971,26 +910,19 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
if (isMallocOrCallocLikeFn(Call, &TLI)) {
// Be conservative if the accessed pointer may alias the allocation -
// fallback to the generic handling below.
- if (getBestAAResults().alias(MemoryLocation(Call), Loc, AAQI) == NoAlias)
+ if (getBestAAResults().alias(MemoryLocation::getBeforeOrAfter(Call),
+ Loc, AAQI) == NoAlias)
return ModRefInfo::NoModRef;
}
- // The semantics of memcpy intrinsics forbid overlap between their respective
- // operands, i.e., source and destination of any given memcpy must no-alias.
- // If Loc must-aliases either one of these two locations, then it necessarily
- // no-aliases the other.
+ // The semantics of memcpy intrinsics either exactly overlap or do not
+ // overlap, i.e., source and destination of any given memcpy are either
+ // no-alias or must-alias.
if (auto *Inst = dyn_cast<AnyMemCpyInst>(Call)) {
- AliasResult SrcAA, DestAA;
-
- if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
- Loc, AAQI)) == MustAlias)
- // Loc is exactly the memcpy source thus disjoint from memcpy dest.
- return ModRefInfo::Ref;
- if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst),
- Loc, AAQI)) == MustAlias)
- // The converse case.
- return ModRefInfo::Mod;
-
+ AliasResult SrcAA =
+ getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc, AAQI);
+ AliasResult DestAA =
+ getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc, AAQI);
// It's also possible for Loc to alias both src and dest, or neither.
ModRefInfo rv = ModRefInfo::NoModRef;
if (SrcAA != NoAlias)
@@ -1015,6 +947,9 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// the guard invokes the "deopt" continuation.
if (isIntrinsicCall(Call, Intrinsic::experimental_guard))
return ModRefInfo::Ref;
+ // The same applies to deoptimize which is essentially a guard(false).
+ if (isIntrinsicCall(Call, Intrinsic::experimental_deoptimize))
+ return ModRefInfo::Ref;
// Like assumes, invariant.start intrinsics were also marked as arbitrarily
// writing so that proper control dependencies are maintained but they never
@@ -1081,166 +1016,6 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
}
-/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
-/// both having the exact same pointer operand.
-static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
- LocationSize MaybeV1Size,
- const GEPOperator *GEP2,
- LocationSize MaybeV2Size,
- const DataLayout &DL) {
- assert(GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() ==
- GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() &&
- GEP1->getPointerOperandType() == GEP2->getPointerOperandType() &&
- "Expected GEPs with the same pointer operand");
-
- // Try to determine whether GEP1 and GEP2 index through arrays, into structs,
- // such that the struct field accesses provably cannot alias.
- // We also need at least two indices (the pointer, and the struct field).
- if (GEP1->getNumIndices() != GEP2->getNumIndices() ||
- GEP1->getNumIndices() < 2)
- return MayAlias;
-
- // If we don't know the size of the accesses through both GEPs, we can't
- // determine whether the struct fields accessed can't alias.
- if (MaybeV1Size == LocationSize::unknown() ||
- MaybeV2Size == LocationSize::unknown())
- return MayAlias;
-
- const uint64_t V1Size = MaybeV1Size.getValue();
- const uint64_t V2Size = MaybeV2Size.getValue();
-
- ConstantInt *C1 =
- dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
- ConstantInt *C2 =
- dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1));
-
- // If the last (struct) indices are constants and are equal, the other indices
- // might be also be dynamically equal, so the GEPs can alias.
- if (C1 && C2) {
- unsigned BitWidth = std::max(C1->getBitWidth(), C2->getBitWidth());
- if (C1->getValue().sextOrSelf(BitWidth) ==
- C2->getValue().sextOrSelf(BitWidth))
- return MayAlias;
- }
-
- // Find the last-indexed type of the GEP, i.e., the type you'd get if
- // you stripped the last index.
- // On the way, look at each indexed type. If there's something other
- // than an array, different indices can lead to different final types.
- SmallVector<Value *, 8> IntermediateIndices;
-
- // Insert the first index; we don't need to check the type indexed
- // through it as it only drops the pointer indirection.
- assert(GEP1->getNumIndices() > 1 && "Not enough GEP indices to examine");
- IntermediateIndices.push_back(GEP1->getOperand(1));
-
- // Insert all the remaining indices but the last one.
- // Also, check that they all index through arrays.
- for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
- if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
- GEP1->getSourceElementType(), IntermediateIndices)))
- return MayAlias;
- IntermediateIndices.push_back(GEP1->getOperand(i + 1));
- }
-
- auto *Ty = GetElementPtrInst::getIndexedType(
- GEP1->getSourceElementType(), IntermediateIndices);
- StructType *LastIndexedStruct = dyn_cast<StructType>(Ty);
-
- if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
- // We know that:
- // - both GEPs begin indexing from the exact same pointer;
- // - the last indices in both GEPs are constants, indexing into a sequential
- // type (array or vector);
- // - both GEPs only index through arrays prior to that.
- //
- // Because array indices greater than the number of elements are valid in
- // GEPs, unless we know the intermediate indices are identical between
- // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't
- // partially overlap. We also need to check that the loaded size matches
- // the element size, otherwise we could still have overlap.
- Type *LastElementTy = GetElementPtrInst::getTypeAtIndex(Ty, (uint64_t)0);
- const uint64_t ElementSize =
- DL.getTypeStoreSize(LastElementTy).getFixedSize();
- if (V1Size != ElementSize || V2Size != ElementSize)
- return MayAlias;
-
- for (unsigned i = 0, e = GEP1->getNumIndices() - 1; i != e; ++i)
- if (GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1))
- return MayAlias;
-
- // Now we know that the array/pointer that GEP1 indexes into and that
- // that GEP2 indexes into must either precisely overlap or be disjoint.
- // Because they cannot partially overlap and because fields in an array
- // cannot overlap, if we can prove the final indices are different between
- // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias.
-
- // If the last indices are constants, we've already checked they don't
- // equal each other so we can exit early.
- if (C1 && C2)
- return NoAlias;
- {
- Value *GEP1LastIdx = GEP1->getOperand(GEP1->getNumOperands() - 1);
- Value *GEP2LastIdx = GEP2->getOperand(GEP2->getNumOperands() - 1);
- if (isa<PHINode>(GEP1LastIdx) || isa<PHINode>(GEP2LastIdx)) {
- // If one of the indices is a PHI node, be safe and only use
- // computeKnownBits so we don't make any assumptions about the
- // relationships between the two indices. This is important if we're
- // asking about values from different loop iterations. See PR32314.
- // TODO: We may be able to change the check so we only do this when
- // we definitely looked through a PHINode.
- if (GEP1LastIdx != GEP2LastIdx &&
- GEP1LastIdx->getType() == GEP2LastIdx->getType()) {
- KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL);
- KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL);
- if (Known1.Zero.intersects(Known2.One) ||
- Known1.One.intersects(Known2.Zero))
- return NoAlias;
- }
- } else if (isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL))
- return NoAlias;
- }
- return MayAlias;
- } else if (!LastIndexedStruct || !C1 || !C2) {
- return MayAlias;
- }
-
- if (C1->getValue().getActiveBits() > 64 ||
- C2->getValue().getActiveBits() > 64)
- return MayAlias;
-
- // We know that:
- // - both GEPs begin indexing from the exact same pointer;
- // - the last indices in both GEPs are constants, indexing into a struct;
- // - said indices are different, hence, the pointed-to fields are different;
- // - both GEPs only index through arrays prior to that.
- //
- // This lets us determine that the struct that GEP1 indexes into and the
- // struct that GEP2 indexes into must either precisely overlap or be
- // completely disjoint. Because they cannot partially overlap, indexing into
- // different non-overlapping fields of the struct will never alias.
-
- // Therefore, the only remaining thing needed to show that both GEPs can't
- // alias is that the fields are not overlapping.
- const StructLayout *SL = DL.getStructLayout(LastIndexedStruct);
- const uint64_t StructSize = SL->getSizeInBytes();
- const uint64_t V1Off = SL->getElementOffset(C1->getZExtValue());
- const uint64_t V2Off = SL->getElementOffset(C2->getZExtValue());
-
- auto EltsDontOverlap = [StructSize](uint64_t V1Off, uint64_t V1Size,
- uint64_t V2Off, uint64_t V2Size) {
- return V1Off < V2Off && V1Off + V1Size <= V2Off &&
- ((V2Off + V2Size <= StructSize) ||
- (V2Off + V2Size - StructSize <= V1Off));
- };
-
- if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) ||
- EltsDontOverlap(V2Off, V2Size, V1Off, V1Size))
- return NoAlias;
-
- return MayAlias;
-}
-
// If a we have (a) a GEP and (b) a pointer based on an alloca, and the
// beginning of the object the GEP points would have a negative offset with
// repsect to the alloca, that means the GEP can not alias pointer (b).
@@ -1276,7 +1051,7 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
LocationSize MaybeObjectAccessSize) {
// If the object access size is unknown, or the GEP isn't inbounds, bail.
- if (MaybeObjectAccessSize == LocationSize::unknown() || !GEPOp->isInBounds())
+ if (!MaybeObjectAccessSize.hasValue() || !GEPOp->isInBounds())
return false;
const uint64_t ObjectAccessSize = MaybeObjectAccessSize.getValue();
@@ -1289,9 +1064,6 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
!DecompObject.VarIndices.empty())
return false;
- APInt ObjectBaseOffset = DecompObject.StructOffset +
- DecompObject.OtherOffset;
-
// If the GEP has no variable indices, we know the precise offset
// from the base, then use it. If the GEP has variable indices,
// we can't get exact GEP offset to identify pointer alias. So return
@@ -1299,33 +1071,21 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
if (!DecompGEP.VarIndices.empty())
return false;
- APInt GEPBaseOffset = DecompGEP.StructOffset;
- GEPBaseOffset += DecompGEP.OtherOffset;
-
- return GEPBaseOffset.sge(ObjectBaseOffset + (int64_t)ObjectAccessSize);
+ return DecompGEP.Offset.sge(DecompObject.Offset + (int64_t)ObjectAccessSize);
}
/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
/// another pointer.
///
/// We know that V1 is a GEP, but we don't know anything about V2.
-/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for
+/// UnderlyingV1 is getUnderlyingObject(GEP1), UnderlyingV2 is the same for
/// V2.
AliasResult BasicAAResult::aliasGEP(
const GEPOperator *GEP1, LocationSize V1Size, const AAMDNodes &V1AAInfo,
const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo,
const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) {
- DecomposedGEP DecompGEP1, DecompGEP2;
- unsigned MaxPointerSize = getMaxPointerSize(DL);
- DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0);
- DecompGEP2.StructOffset = DecompGEP2.OtherOffset = APInt(MaxPointerSize, 0);
- DecompGEP1.HasCompileTimeConstantScale =
- DecompGEP2.HasCompileTimeConstantScale = true;
-
- bool GEP1MaxLookupReached =
- DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT);
- bool GEP2MaxLookupReached =
- DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT);
+ DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT);
+ DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT);
// Don't attempt to analyze the decomposed GEP if index scale is not a
// compile-time constant.
@@ -1333,18 +1093,14 @@ AliasResult BasicAAResult::aliasGEP(
!DecompGEP2.HasCompileTimeConstantScale)
return MayAlias;
- APInt GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
- APInt GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
-
assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 &&
"DecomposeGEPExpression returned a result different from "
- "GetUnderlyingObject");
+ "getUnderlyingObject");
// If the GEP's offset relative to its base is such that the base would
// fall below the start of the object underlying V2, then the GEP and V2
// cannot alias.
- if (!GEP1MaxLookupReached && !GEP2MaxLookupReached &&
- isGEPBaseAtNegativeOffset(GEP1, DecompGEP1, DecompGEP2, V2Size))
+ if (isGEPBaseAtNegativeOffset(GEP1, DecompGEP1, DecompGEP2, V2Size))
return NoAlias;
// If we have two gep instructions with must-alias or not-alias'ing base
// pointers, figure out if the indexes to the GEP tell us anything about the
@@ -1352,32 +1108,22 @@ AliasResult BasicAAResult::aliasGEP(
if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
// Check for the GEP base being at a negative offset, this time in the other
// direction.
- if (!GEP1MaxLookupReached && !GEP2MaxLookupReached &&
- isGEPBaseAtNegativeOffset(GEP2, DecompGEP2, DecompGEP1, V1Size))
+ if (isGEPBaseAtNegativeOffset(GEP2, DecompGEP2, DecompGEP1, V1Size))
return NoAlias;
// Do the base pointers alias?
- AliasResult BaseAlias =
- aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(),
- UnderlyingV2, LocationSize::unknown(), AAMDNodes(), AAQI);
-
- // Check for geps of non-aliasing underlying pointers where the offsets are
- // identical.
- if ((BaseAlias == MayAlias) && V1Size == V2Size) {
- // Do the base pointers alias assuming type and size.
- AliasResult PreciseBaseAlias = aliasCheck(
- UnderlyingV1, V1Size, V1AAInfo, UnderlyingV2, V2Size, V2AAInfo, AAQI);
- if (PreciseBaseAlias == NoAlias) {
- // See if the computed offset from the common pointer tells us about the
- // relation of the resulting pointer.
- // If the max search depth is reached the result is undefined
- if (GEP2MaxLookupReached || GEP1MaxLookupReached)
- return MayAlias;
-
- // Same offsets.
- if (GEP1BaseOffset == GEP2BaseOffset &&
- DecompGEP1.VarIndices == DecompGEP2.VarIndices)
- return NoAlias;
- }
+ AliasResult BaseAlias = getBestAAResults().alias(
+ MemoryLocation::getBeforeOrAfter(UnderlyingV1),
+ MemoryLocation::getBeforeOrAfter(UnderlyingV2), AAQI);
+
+ // For GEPs with identical offsets, we can preserve the size and AAInfo
+ // when performing the alias check on the underlying objects.
+ if (BaseAlias == MayAlias && DecompGEP1.Offset == DecompGEP2.Offset &&
+ DecompGEP1.VarIndices == DecompGEP2.VarIndices) {
+ AliasResult PreciseBaseAlias = getBestAAResults().alias(
+ MemoryLocation(UnderlyingV1, V1Size, V1AAInfo),
+ MemoryLocation(UnderlyingV2, V2Size, V2AAInfo), AAQI);
+ if (PreciseBaseAlias == NoAlias)
+ return NoAlias;
}
// If we get a No or May, then return it immediately, no amount of analysis
@@ -1387,28 +1133,9 @@ AliasResult BasicAAResult::aliasGEP(
return BaseAlias;
}
- // Otherwise, we have a MustAlias. Since the base pointers alias each other
- // exactly, see if the computed offset from the common pointer tells us
- // about the relation of the resulting pointer.
- // If we know the two GEPs are based off of the exact same pointer (and not
- // just the same underlying object), see if that tells us anything about
- // the resulting pointers.
- if (GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() ==
- GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() &&
- GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) {
- AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
- // If we couldn't find anything interesting, don't abandon just yet.
- if (R != MayAlias)
- return R;
- }
-
- // If the max search depth is reached, the result is undefined
- if (GEP2MaxLookupReached || GEP1MaxLookupReached)
- return MayAlias;
-
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
- GEP1BaseOffset -= GEP2BaseOffset;
+ DecompGEP1.Offset -= DecompGEP2.Offset;
GetIndexDifference(DecompGEP1.VarIndices, DecompGEP2.VarIndices);
} else {
@@ -1417,12 +1144,12 @@ AliasResult BasicAAResult::aliasGEP(
// pointer, we know they cannot alias.
// If both accesses are unknown size, we can't do anything useful here.
- if (V1Size == LocationSize::unknown() && V2Size == LocationSize::unknown())
+ if (!V1Size.hasValue() && !V2Size.hasValue())
return MayAlias;
- AliasResult R = aliasCheck(UnderlyingV1, LocationSize::unknown(),
- AAMDNodes(), V2, LocationSize::unknown(),
- V2AAInfo, AAQI, nullptr, UnderlyingV2);
+ AliasResult R = getBestAAResults().alias(
+ MemoryLocation::getBeforeOrAfter(UnderlyingV1),
+ MemoryLocation(V2, V2Size, V2AAInfo), AAQI);
if (R != MustAlias) {
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -1432,10 +1159,6 @@ AliasResult BasicAAResult::aliasGEP(
assert(R == NoAlias || R == MayAlias);
return R;
}
-
- // If the max search depth is reached the result is undefined
- if (GEP1MaxLookupReached)
- return MayAlias;
}
// In the two GEP Case, if there is no difference in the offsets of the
@@ -1444,17 +1167,17 @@ AliasResult BasicAAResult::aliasGEP(
//
// In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
// must aliases the GEP, the end result is a must alias also.
- if (GEP1BaseOffset == 0 && DecompGEP1.VarIndices.empty())
+ if (DecompGEP1.Offset == 0 && DecompGEP1.VarIndices.empty())
return MustAlias;
// If there is a constant difference between the pointers, but the difference
// is less than the size of the associated memory object, then we know
// that the objects are partially overlapping. If the difference is
// greater, we know they do not overlap.
- if (GEP1BaseOffset != 0 && DecompGEP1.VarIndices.empty()) {
- if (GEP1BaseOffset.sge(0)) {
- if (V2Size != LocationSize::unknown()) {
- if (GEP1BaseOffset.ult(V2Size.getValue()))
+ if (DecompGEP1.Offset != 0 && DecompGEP1.VarIndices.empty()) {
+ if (DecompGEP1.Offset.sge(0)) {
+ if (V2Size.hasValue()) {
+ if (DecompGEP1.Offset.ult(V2Size.getValue()))
return PartialAlias;
return NoAlias;
}
@@ -1465,11 +1188,8 @@ AliasResult BasicAAResult::aliasGEP(
// ---------------->|
// |-->V1Size |-------> V2Size
// GEP1 V2
- // We need to know that V2Size is not unknown, otherwise we might have
- // stripped a gep with negative index ('gep <ptr>, -1, ...).
- if (V1Size != LocationSize::unknown() &&
- V2Size != LocationSize::unknown()) {
- if ((-GEP1BaseOffset).ult(V1Size.getValue()))
+ if (V1Size.hasValue()) {
+ if ((-DecompGEP1.Offset).ult(V1Size.getValue()))
return PartialAlias;
return NoAlias;
}
@@ -1477,24 +1197,24 @@ AliasResult BasicAAResult::aliasGEP(
}
if (!DecompGEP1.VarIndices.empty()) {
- APInt Modulo(MaxPointerSize, 0);
- bool AllPositive = true;
+ APInt GCD;
+ bool AllNonNegative = DecompGEP1.Offset.isNonNegative();
+ bool AllNonPositive = DecompGEP1.Offset.isNonPositive();
for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
+ const APInt &Scale = DecompGEP1.VarIndices[i].Scale;
+ if (i == 0)
+ GCD = Scale.abs();
+ else
+ GCD = APIntOps::GreatestCommonDivisor(GCD, Scale.abs());
- // Try to distinguish something like &A[i][1] against &A[42][0].
- // Grab the least significant bit set in any of the scales. We
- // don't need std::abs here (even if the scale's negative) as we'll
- // be ^'ing Modulo with itself later.
- Modulo |= DecompGEP1.VarIndices[i].Scale;
-
- if (AllPositive) {
+ if (AllNonNegative || AllNonPositive) {
// If the Value could change between cycles, then any reasoning about
// the Value this cycle may not hold in the next cycle. We'll just
// give up if we can't determine conditions that hold for every cycle:
const Value *V = DecompGEP1.VarIndices[i].V;
+ const Instruction *CxtI = DecompGEP1.VarIndices[i].CxtI;
- KnownBits Known =
- computeKnownBits(V, DL, 0, &AC, dyn_cast<Instruction>(GEP1), DT);
+ KnownBits Known = computeKnownBits(V, DL, 0, &AC, CxtI, DT);
bool SignKnownZero = Known.isNonNegative();
bool SignKnownOne = Known.isNegative();
@@ -1504,36 +1224,77 @@ AliasResult BasicAAResult::aliasGEP(
SignKnownZero |= IsZExt;
SignKnownOne &= !IsZExt;
- // If the variable begins with a zero then we know it's
- // positive, regardless of whether the value is signed or
- // unsigned.
- APInt Scale = DecompGEP1.VarIndices[i].Scale;
- AllPositive =
- (SignKnownZero && Scale.sge(0)) || (SignKnownOne && Scale.slt(0));
+ AllNonNegative &= (SignKnownZero && Scale.isNonNegative()) ||
+ (SignKnownOne && Scale.isNonPositive());
+ AllNonPositive &= (SignKnownZero && Scale.isNonPositive()) ||
+ (SignKnownOne && Scale.isNonNegative());
}
}
- Modulo = Modulo ^ (Modulo & (Modulo - 1));
-
- // We can compute the difference between the two addresses
- // mod Modulo. Check whether that difference guarantees that the
- // two locations do not alias.
- APInt ModOffset = GEP1BaseOffset & (Modulo - 1);
- if (V1Size != LocationSize::unknown() &&
- V2Size != LocationSize::unknown() && ModOffset.uge(V2Size.getValue()) &&
- (Modulo - ModOffset).uge(V1Size.getValue()))
+ // We now have accesses at two offsets from the same base:
+ // 1. (...)*GCD + DecompGEP1.Offset with size V1Size
+ // 2. 0 with size V2Size
+ // Using arithmetic modulo GCD, the accesses are at
+ // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits
+ // into the range [V2Size..GCD), then we know they cannot overlap.
+ APInt ModOffset = DecompGEP1.Offset.srem(GCD);
+ if (ModOffset.isNegative())
+ ModOffset += GCD; // We want mod, not rem.
+ if (V1Size.hasValue() && V2Size.hasValue() &&
+ ModOffset.uge(V2Size.getValue()) &&
+ (GCD - ModOffset).uge(V1Size.getValue()))
return NoAlias;
- // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
- // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
- // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
- if (AllPositive && GEP1BaseOffset.sgt(0) &&
- V2Size != LocationSize::unknown() &&
- GEP1BaseOffset.uge(V2Size.getValue()))
+ // If we know all the variables are non-negative, then the total offset is
+ // also non-negative and >= DecompGEP1.Offset. We have the following layout:
+ // [0, V2Size) ... [TotalOffset, TotalOffer+V1Size]
+ // If DecompGEP1.Offset >= V2Size, the accesses don't alias.
+ if (AllNonNegative && V2Size.hasValue() &&
+ DecompGEP1.Offset.uge(V2Size.getValue()))
return NoAlias;
+ // Similarly, if the variables are non-positive, then the total offset is
+ // also non-positive and <= DecompGEP1.Offset. We have the following layout:
+ // [TotalOffset, TotalOffset+V1Size) ... [0, V2Size)
+ // If -DecompGEP1.Offset >= V1Size, the accesses don't alias.
+ if (AllNonPositive && V1Size.hasValue() &&
+ (-DecompGEP1.Offset).uge(V1Size.getValue()))
+ return NoAlias;
+
+ if (V1Size.hasValue() && V2Size.hasValue()) {
+ // Try to determine whether abs(VarIndex) > 0.
+ Optional<APInt> MinAbsVarIndex;
+ if (DecompGEP1.VarIndices.size() == 1) {
+ // VarIndex = Scale*V. If V != 0 then abs(VarIndex) >= abs(Scale).
+ const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
+ if (isKnownNonZero(Var.V, DL, 0, &AC, Var.CxtI, DT))
+ MinAbsVarIndex = Var.Scale.abs();
+ } else if (DecompGEP1.VarIndices.size() == 2) {
+ // VarIndex = Scale*V0 + (-Scale)*V1.
+ // If V0 != V1 then abs(VarIndex) >= abs(Scale).
+ // Check that VisitedPhiBBs is empty, to avoid reasoning about
+ // inequality of values across loop iterations.
+ const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0];
+ const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1];
+ if (Var0.Scale == -Var1.Scale && Var0.ZExtBits == Var1.ZExtBits &&
+ Var0.SExtBits == Var1.SExtBits && VisitedPhiBBs.empty() &&
+ isKnownNonEqual(Var0.V, Var1.V, DL, &AC, /* CxtI */ nullptr, DT))
+ MinAbsVarIndex = Var0.Scale.abs();
+ }
+
+ if (MinAbsVarIndex) {
+ // The constant offset will have added at least +/-MinAbsVarIndex to it.
+ APInt OffsetLo = DecompGEP1.Offset - *MinAbsVarIndex;
+ APInt OffsetHi = DecompGEP1.Offset + *MinAbsVarIndex;
+ // Check that an access at OffsetLo or lower, and an access at OffsetHi
+ // or higher both do not alias.
+ if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) &&
+ OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue()))
+ return NoAlias;
+ }
+ }
if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size,
- GEP1BaseOffset, &AC, DT))
+ DecompGEP1.Offset, &AC, DT))
return NoAlias;
}
@@ -1561,31 +1322,33 @@ AliasResult
BasicAAResult::aliasSelect(const SelectInst *SI, LocationSize SISize,
const AAMDNodes &SIAAInfo, const Value *V2,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderV2, AAQueryInfo &AAQI) {
+ AAQueryInfo &AAQI) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
if (SI->getCondition() == SI2->getCondition()) {
- AliasResult Alias =
- aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, SI2->getTrueValue(),
- V2Size, V2AAInfo, AAQI);
+ AliasResult Alias = getBestAAResults().alias(
+ MemoryLocation(SI->getTrueValue(), SISize, SIAAInfo),
+ MemoryLocation(SI2->getTrueValue(), V2Size, V2AAInfo), AAQI);
if (Alias == MayAlias)
return MayAlias;
- AliasResult ThisAlias =
- aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
- SI2->getFalseValue(), V2Size, V2AAInfo, AAQI);
+ AliasResult ThisAlias = getBestAAResults().alias(
+ MemoryLocation(SI->getFalseValue(), SISize, SIAAInfo),
+ MemoryLocation(SI2->getFalseValue(), V2Size, V2AAInfo), AAQI);
return MergeAliasResults(ThisAlias, Alias);
}
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
- AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
- SISize, SIAAInfo, AAQI, UnderV2);
+ AliasResult Alias = getBestAAResults().alias(
+ MemoryLocation(V2, V2Size, V2AAInfo),
+ MemoryLocation(SI->getTrueValue(), SISize, SIAAInfo), AAQI);
if (Alias == MayAlias)
return MayAlias;
- AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(),
- SISize, SIAAInfo, AAQI, UnderV2);
+ AliasResult ThisAlias = getBestAAResults().alias(
+ MemoryLocation(V2, V2Size, V2AAInfo),
+ MemoryLocation(SI->getFalseValue(), SISize, SIAAInfo), AAQI);
return MergeAliasResults(ThisAlias, Alias);
}
@@ -1595,80 +1358,41 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
const AAMDNodes &PNAAInfo, const Value *V2,
LocationSize V2Size,
const AAMDNodes &V2AAInfo,
- const Value *UnderV2, AAQueryInfo &AAQI) {
- // Track phi nodes we have visited. We use this information when we determine
- // value equivalence.
- VisitedPhiBBs.insert(PN->getParent());
-
+ AAQueryInfo &AAQI) {
// If the values are PHIs in the same block, we can do a more precise
// as well as efficient check: just check for aliases between the values
// on corresponding edges.
if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
if (PN2->getParent() == PN->getParent()) {
- AAQueryInfo::LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo),
- MemoryLocation(V2, V2Size, V2AAInfo));
- if (PN > V2)
- std::swap(Locs.first, Locs.second);
- // Analyse the PHIs' inputs under the assumption that the PHIs are
- // NoAlias.
- // If the PHIs are May/MustAlias there must be (recursively) an input
- // operand from outside the PHIs' cycle that is MayAlias/MustAlias or
- // there must be an operation on the PHIs within the PHIs' value cycle
- // that causes a MayAlias.
- // Pretend the phis do not alias.
- AliasResult Alias = NoAlias;
- AliasResult OrigAliasResult;
- {
- // Limited lifetime iterator invalidated by the aliasCheck call below.
- auto CacheIt = AAQI.AliasCache.find(Locs);
- assert((CacheIt != AAQI.AliasCache.end()) &&
- "There must exist an entry for the phi node");
- OrigAliasResult = CacheIt->second;
- CacheIt->second = NoAlias;
- }
-
+ Optional<AliasResult> Alias;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- AliasResult ThisAlias =
- aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
- PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
- V2Size, V2AAInfo, AAQI);
- Alias = MergeAliasResults(ThisAlias, Alias);
- if (Alias == MayAlias)
+ AliasResult ThisAlias = getBestAAResults().alias(
+ MemoryLocation(PN->getIncomingValue(i), PNSize, PNAAInfo),
+ MemoryLocation(
+ PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), V2Size,
+ V2AAInfo),
+ AAQI);
+ if (Alias)
+ *Alias = MergeAliasResults(*Alias, ThisAlias);
+ else
+ Alias = ThisAlias;
+ if (*Alias == MayAlias)
break;
}
-
- // Reset if speculation failed.
- if (Alias != NoAlias) {
- auto Pair =
- AAQI.AliasCache.insert(std::make_pair(Locs, OrigAliasResult));
- assert(!Pair.second && "Entry must have existed");
- Pair.first->second = OrigAliasResult;
- }
- return Alias;
+ return *Alias;
}
SmallVector<Value *, 4> V1Srcs;
- // For a recursive phi, that recurses through a contant gep, we can perform
- // aliasing calculations using the other phi operands with an unknown size to
- // specify that an unknown number of elements after the initial value are
- // potentially accessed.
+ // If a phi operand recurses back to the phi, we can still determine NoAlias
+ // if we don't alias the underlying objects of the other phi operands, as we
+ // know that the recursive phi needs to be based on them in some way.
bool isRecursive = false;
auto CheckForRecPhi = [&](Value *PV) {
if (!EnableRecPhiAnalysis)
return false;
- if (GEPOperator *PVGEP = dyn_cast<GEPOperator>(PV)) {
- // Check whether the incoming value is a GEP that advances the pointer
- // result of this PHI node (e.g. in a loop). If this is the case, we
- // would recurse and always get a MayAlias. Handle this case specially
- // below. We need to ensure that the phi is inbounds and has a constant
- // positive operand so that we can check for alias with the initial value
- // and an unknown but positive size.
- if (PVGEP->getPointerOperand() == PN && PVGEP->isInBounds() &&
- PVGEP->getNumIndices() == 1 && isa<ConstantInt>(PVGEP->idx_begin()) &&
- !cast<ConstantInt>(PVGEP->idx_begin())->isNegative()) {
- isRecursive = true;
- return true;
- }
+ if (getUnderlyingObject(PV) == PN) {
+ isRecursive = true;
+ return true;
}
return false;
};
@@ -1714,14 +1438,30 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
if (V1Srcs.empty())
return MayAlias;
- // If this PHI node is recursive, set the size of the accessed memory to
- // unknown to represent all the possible values the GEP could advance the
- // pointer to.
+ // If this PHI node is recursive, indicate that the pointer may be moved
+ // across iterations. We can only prove NoAlias if different underlying
+ // objects are involved.
if (isRecursive)
- PNSize = LocationSize::unknown();
-
- AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize,
- PNAAInfo, AAQI, UnderV2);
+ PNSize = LocationSize::beforeOrAfterPointer();
+
+ // In the recursive alias queries below, we may compare values from two
+ // different loop iterations. Keep track of visited phi blocks, which will
+ // be used when determining value equivalence.
+ bool BlockInserted = VisitedPhiBBs.insert(PN->getParent()).second;
+ auto _ = make_scope_exit([&]() {
+ if (BlockInserted)
+ VisitedPhiBBs.erase(PN->getParent());
+ });
+
+ // If we inserted a block into VisitedPhiBBs, alias analysis results that
+ // have been cached earlier may no longer be valid. Perform recursive queries
+ // with a new AAQueryInfo.
+ AAQueryInfo NewAAQI;
+ AAQueryInfo *UseAAQI = BlockInserted ? &NewAAQI : &AAQI;
+
+ AliasResult Alias = getBestAAResults().alias(
+ MemoryLocation(V2, V2Size, V2AAInfo),
+ MemoryLocation(V1Srcs[0], PNSize, PNAAInfo), *UseAAQI);
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
@@ -1737,8 +1477,9 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
Value *V = V1Srcs[i];
- AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, AAQI, UnderV2);
+ AliasResult ThisAlias = getBestAAResults().alias(
+ MemoryLocation(V2, V2Size, V2AAInfo),
+ MemoryLocation(V, PNSize, PNAAInfo), *UseAAQI);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1750,10 +1491,10 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
/// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as
/// array references.
AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
- AAMDNodes V1AAInfo, const Value *V2,
- LocationSize V2Size, AAMDNodes V2AAInfo,
- AAQueryInfo &AAQI, const Value *O1,
- const Value *O2) {
+ const AAMDNodes &V1AAInfo,
+ const Value *V2, LocationSize V2Size,
+ const AAMDNodes &V2AAInfo,
+ AAQueryInfo &AAQI) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size.isZero() || V2Size.isZero())
@@ -1781,11 +1522,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
- if (O1 == nullptr)
- O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
-
- if (O2 == nullptr)
- O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
+ const Value *O1 = getUnderlyingObject(V1, MaxLookupSearchDepth);
+ const Value *O2 = getUnderlyingObject(V2, MaxLookupSearchDepth);
// Null values in the default address space don't point to any object, so they
// don't alias any other pointer.
@@ -1840,86 +1578,120 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
TLI, NullIsValidLocation)))
return NoAlias;
+ // If one the accesses may be before the accessed pointer, canonicalize this
+ // by using unknown after-pointer sizes for both accesses. This is
+ // equivalent, because regardless of which pointer is lower, one of them
+ // will always came after the other, as long as the underlying objects aren't
+ // disjoint. We do this so that the rest of BasicAA does not have to deal
+ // with accesses before the base pointer, and to improve cache utilization by
+ // merging equivalent states.
+ if (V1Size.mayBeBeforePointer() || V2Size.mayBeBeforePointer()) {
+ V1Size = LocationSize::afterPointer();
+ V2Size = LocationSize::afterPointer();
+ }
+
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
AAQueryInfo::LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo),
MemoryLocation(V2, V2Size, V2AAInfo));
if (V1 > V2)
std::swap(Locs.first, Locs.second);
- std::pair<AAQueryInfo::AliasCacheT::iterator, bool> Pair =
- AAQI.AliasCache.try_emplace(Locs, MayAlias);
- if (!Pair.second)
- return Pair.first->second;
-
- // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
- // GEP can't simplify, we don't even look at the PHI cases.
- if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
- std::swap(V1, V2);
- std::swap(V1Size, V2Size);
- std::swap(O1, O2);
- std::swap(V1AAInfo, V2AAInfo);
+ const auto &Pair = AAQI.AliasCache.try_emplace(
+ Locs, AAQueryInfo::CacheEntry{NoAlias, 0});
+ if (!Pair.second) {
+ auto &Entry = Pair.first->second;
+ if (!Entry.isDefinitive()) {
+ // Remember that we used an assumption.
+ ++Entry.NumAssumptionUses;
+ ++AAQI.NumAssumptionUses;
+ }
+ return Entry.Result;
}
+
+ int OrigNumAssumptionUses = AAQI.NumAssumptionUses;
+ unsigned OrigNumAssumptionBasedResults = AAQI.AssumptionBasedResults.size();
+ AliasResult Result = aliasCheckRecursive(V1, V1Size, V1AAInfo, V2, V2Size,
+ V2AAInfo, AAQI, O1, O2);
+
+ auto It = AAQI.AliasCache.find(Locs);
+ assert(It != AAQI.AliasCache.end() && "Must be in cache");
+ auto &Entry = It->second;
+
+ // Check whether a NoAlias assumption has been used, but disproven.
+ bool AssumptionDisproven = Entry.NumAssumptionUses > 0 && Result != NoAlias;
+ if (AssumptionDisproven)
+ Result = MayAlias;
+
+ // This is a definitive result now, when considered as a root query.
+ AAQI.NumAssumptionUses -= Entry.NumAssumptionUses;
+ Entry.Result = Result;
+ Entry.NumAssumptionUses = -1;
+
+ // If the assumption has been disproven, remove any results that may have
+ // been based on this assumption. Do this after the Entry updates above to
+ // avoid iterator invalidation.
+ if (AssumptionDisproven)
+ while (AAQI.AssumptionBasedResults.size() > OrigNumAssumptionBasedResults)
+ AAQI.AliasCache.erase(AAQI.AssumptionBasedResults.pop_back_val());
+
+ // The result may still be based on assumptions higher up in the chain.
+ // Remember it, so it can be purged from the cache later.
+ if (OrigNumAssumptionUses != AAQI.NumAssumptionUses && Result != MayAlias)
+ AAQI.AssumptionBasedResults.push_back(Locs);
+ return Result;
+}
+
+AliasResult BasicAAResult::aliasCheckRecursive(
+ const Value *V1, LocationSize V1Size, const AAMDNodes &V1AAInfo,
+ const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo,
+ AAQueryInfo &AAQI, const Value *O1, const Value *O2) {
if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
AliasResult Result =
aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2, AAQI);
- if (Result != MayAlias) {
- auto ItInsPair = AAQI.AliasCache.insert(std::make_pair(Locs, Result));
- assert(!ItInsPair.second && "Entry must have existed");
- ItInsPair.first->second = Result;
+ if (Result != MayAlias)
+ return Result;
+ } else if (const GEPOperator *GV2 = dyn_cast<GEPOperator>(V2)) {
+ AliasResult Result =
+ aliasGEP(GV2, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, O2, O1, AAQI);
+ if (Result != MayAlias)
return Result;
- }
}
- if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
- std::swap(V1, V2);
- std::swap(O1, O2);
- std::swap(V1Size, V2Size);
- std::swap(V1AAInfo, V2AAInfo);
- }
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
AliasResult Result =
- aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI);
- if (Result != MayAlias) {
- Pair = AAQI.AliasCache.try_emplace(Locs, Result);
- assert(!Pair.second && "Entry must have existed");
- return Pair.first->second = Result;
- }
+ aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, AAQI);
+ if (Result != MayAlias)
+ return Result;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(V2)) {
+ AliasResult Result =
+ aliasPHI(PN, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, AAQI);
+ if (Result != MayAlias)
+ return Result;
}
- if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
- std::swap(V1, V2);
- std::swap(O1, O2);
- std::swap(V1Size, V2Size);
- std::swap(V1AAInfo, V2AAInfo);
- }
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
AliasResult Result =
- aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI);
- if (Result != MayAlias) {
- Pair = AAQI.AliasCache.try_emplace(Locs, Result);
- assert(!Pair.second && "Entry must have existed");
- return Pair.first->second = Result;
- }
+ aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, AAQI);
+ if (Result != MayAlias)
+ return Result;
+ } else if (const SelectInst *S2 = dyn_cast<SelectInst>(V2)) {
+ AliasResult Result =
+ aliasSelect(S2, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, AAQI);
+ if (Result != MayAlias)
+ return Result;
}
// If both pointers are pointing into the same object and one of them
// accesses the entire object, then the accesses must overlap in some way.
- if (O1 == O2)
+ if (O1 == O2) {
+ bool NullIsValidLocation = NullPointerIsDefined(&F);
if (V1Size.isPrecise() && V2Size.isPrecise() &&
(isObjectSize(O1, V1Size.getValue(), DL, TLI, NullIsValidLocation) ||
- isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation))) {
- Pair = AAQI.AliasCache.try_emplace(Locs, PartialAlias);
- assert(!Pair.second && "Entry must have existed");
- return Pair.first->second = PartialAlias;
- }
+ isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation)))
+ return PartialAlias;
+ }
- // Recurse back into the best AA results we have, potentially with refined
- // memory locations. We have already ensured that BasicAA has a MayAlias
- // cache result for these, so any recursion back into BasicAA won't loop.
- AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second, AAQI);
- Pair = AAQI.AliasCache.try_emplace(Locs, Result);
- assert(!Pair.second && "Entry must have existed");
- return Pair.first->second = Result;
+ return MayAlias;
}
/// Check whether two Values can be considered equivalent.
@@ -1988,7 +1760,7 @@ void BasicAAResult::GetIndexDifference(
// If we didn't consume this entry, add it to the end of the Dest list.
if (!!Scale) {
- VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};
+ VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale, Src[i].CxtI};
Dest.push_back(Entry);
}
}
@@ -1998,8 +1770,8 @@ bool BasicAAResult::constantOffsetHeuristic(
const SmallVectorImpl<VariableGEPIndex> &VarIndices,
LocationSize MaybeV1Size, LocationSize MaybeV2Size, const APInt &BaseOffset,
AssumptionCache *AC, DominatorTree *DT) {
- if (VarIndices.size() != 2 || MaybeV1Size == LocationSize::unknown() ||
- MaybeV2Size == LocationSize::unknown())
+ if (VarIndices.size() != 2 || !MaybeV1Size.hasValue() ||
+ !MaybeV2Size.hasValue())
return false;
const uint64_t V1Size = MaybeV1Size.getValue();
@@ -2059,13 +1831,12 @@ bool BasicAAResult::constantOffsetHeuristic(
AnalysisKey BasicAA::Key;
BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) {
- return BasicAAResult(F.getParent()->getDataLayout(),
- F,
- AM.getResult<TargetLibraryAnalysis>(F),
- AM.getResult<AssumptionAnalysis>(F),
- &AM.getResult<DominatorTreeAnalysis>(F),
- AM.getCachedResult<LoopAnalysis>(F),
- AM.getCachedResult<PhiValuesAnalysis>(F));
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *LI = AM.getCachedResult<LoopAnalysis>(F);
+ auto *PV = AM.getCachedResult<PhiValuesAnalysis>(F);
+ return BasicAAResult(F.getParent()->getDataLayout(), F, TLI, AC, DT, LI, PV);
}
BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
@@ -2107,9 +1878,9 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) {
void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequiredTransitive<AssumptionCacheTracker>();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
AU.addUsedIfAvailable<PhiValuesWrapperPass>();
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index a396b5ad21c6..884ba484ae19 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -61,6 +61,7 @@ INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
@@ -95,8 +96,6 @@ char BranchProbabilityInfoWrapperPass::ID = 0;
// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
static const uint32_t LBH_TAKEN_WEIGHT = 124;
static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
-// Unlikely edges within a loop are half as likely as other edges
-static const uint32_t LBH_UNLIKELY_WEIGHT = 62;
/// Unreachable-terminating branch taken probability.
///
@@ -105,20 +104,6 @@ static const uint32_t LBH_UNLIKELY_WEIGHT = 62;
/// All reachable probability will proportionally share the remaining part.
static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1);
-/// Weight for a branch taken going into a cold block.
-///
-/// This is the weight for a branch taken toward a block marked
-/// cold. A block is marked cold if it's postdominated by a
-/// block containing a call to a cold function. Cold functions
-/// are those marked with attribute 'cold'.
-static const uint32_t CC_TAKEN_WEIGHT = 4;
-
-/// Weight for a branch not-taken into a cold block.
-///
-/// This is the weight for a branch not taken toward a block marked
-/// cold.
-static const uint32_t CC_NONTAKEN_WEIGHT = 64;
-
static const uint32_t PH_TAKEN_WEIGHT = 20;
static const uint32_t PH_NONTAKEN_WEIGHT = 12;
@@ -135,144 +120,183 @@ static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
/// exceptional case, so the result is unlikely.
static const uint32_t FPH_UNO_WEIGHT = 1;
-/// Invoke-terminating normal branch taken weight
-///
-/// This is the weight for branching to the normal destination of an invoke
-/// instruction. We expect this to happen most of the time. Set the weight to an
-/// absurdly high value so that nested loops subsume it.
-static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
+/// Set of dedicated "absolute" execution weights for a block. These weights are
+/// meaningful relative to each other and their derivatives only.
+enum class BlockExecWeight : std::uint32_t {
+ /// Special weight used for cases with exact zero probability.
+ ZERO = 0x0,
+ /// Minimal possible non zero weight.
+ LOWEST_NON_ZERO = 0x1,
+ /// Weight to an 'unreachable' block.
+ UNREACHABLE = ZERO,
+ /// Weight to a block containing non returning call.
+ NORETURN = LOWEST_NON_ZERO,
+ /// Weight to 'unwind' block of an invoke instruction.
+ UNWIND = LOWEST_NON_ZERO,
+ /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
+ /// with attribute 'cold'.
+ COLD = 0xffff,
+ /// Default weight is used in cases when there is no dedicated execution
+ /// weight set. It is not propagated through the domination line either.
+ DEFAULT = 0xfffff
+};
+
+BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) {
+ // Record SCC numbers of blocks in the CFG to identify irreducible loops.
+ // FIXME: We could only calculate this if the CFG is known to be irreducible
+ // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
+ int SccNum = 0;
+ for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd();
+ ++It, ++SccNum) {
+ // Ignore single-block SCCs since they either aren't loops or LoopInfo will
+ // catch them.
+ const std::vector<const BasicBlock *> &Scc = *It;
+ if (Scc.size() == 1)
+ continue;
-/// Invoke-terminating normal branch not-taken weight.
-///
-/// This is the weight for branching to the unwind destination of an invoke
-/// instruction. This is essentially never taken.
-static const uint32_t IH_NONTAKEN_WEIGHT = 1;
-
-static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT,
- SmallVectorImpl<const BasicBlock *> &WorkList,
- SmallPtrSetImpl<const BasicBlock *> &TargetSet) {
- SmallVector<BasicBlock *, 8> Descendants;
- SmallPtrSet<const BasicBlock *, 16> NewItems;
-
- PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants);
- for (auto *BB : Descendants)
- if (TargetSet.insert(BB).second)
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if (!TargetSet.count(*PI))
- NewItems.insert(*PI);
- WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end());
+ LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
+ for (const auto *BB : Scc) {
+ LLVM_DEBUG(dbgs() << " " << BB->getName());
+ SccNums[BB] = SccNum;
+ calculateSccBlockType(BB, SccNum);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+ }
}
-/// Compute a set of basic blocks that are post-dominated by unreachables.
-void BranchProbabilityInfo::computePostDominatedByUnreachable(
- const Function &F, PostDominatorTree *PDT) {
- SmallVector<const BasicBlock *, 8> WorkList;
- for (auto &BB : F) {
- const Instruction *TI = BB.getTerminator();
- if (TI->getNumSuccessors() == 0) {
- if (isa<UnreachableInst>(TI) ||
- // If this block is terminated by a call to
- // @llvm.experimental.deoptimize then treat it like an unreachable
- // since the @llvm.experimental.deoptimize call is expected to
- // practically never execute.
- BB.getTerminatingDeoptimizeCall())
- UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable);
- }
+int BranchProbabilityInfo::SccInfo::getSCCNum(const BasicBlock *BB) const {
+ auto SccIt = SccNums.find(BB);
+ if (SccIt == SccNums.end())
+ return -1;
+ return SccIt->second;
+}
+
+void BranchProbabilityInfo::SccInfo::getSccEnterBlocks(
+ int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const {
+
+ for (auto MapIt : SccBlocks[SccNum]) {
+ const auto *BB = MapIt.first;
+ if (isSCCHeader(BB, SccNum))
+ for (const auto *Pred : predecessors(BB))
+ if (getSCCNum(Pred) != SccNum)
+ Enters.push_back(const_cast<BasicBlock *>(BB));
}
+}
- while (!WorkList.empty()) {
- const BasicBlock *BB = WorkList.pop_back_val();
- if (PostDominatedByUnreachable.count(BB))
- continue;
- // If the terminator is an InvokeInst, check only the normal destination
- // block as the unwind edge of InvokeInst is also very unlikely taken.
- if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- if (PostDominatedByUnreachable.count(II->getNormalDest()))
- UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
- }
- // If all the successors are unreachable, BB is unreachable as well.
- else if (!successors(BB).empty() &&
- llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
- return PostDominatedByUnreachable.count(Succ);
- }))
- UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
+void BranchProbabilityInfo::SccInfo::getSccExitBlocks(
+ int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const {
+ for (auto MapIt : SccBlocks[SccNum]) {
+ const auto *BB = MapIt.first;
+ if (isSCCExitingBlock(BB, SccNum))
+ for (const auto *Succ : successors(BB))
+ if (getSCCNum(Succ) != SccNum)
+ Exits.push_back(const_cast<BasicBlock *>(BB));
}
}
-/// compute a set of basic blocks that are post-dominated by ColdCalls.
-void BranchProbabilityInfo::computePostDominatedByColdCall(
- const Function &F, PostDominatorTree *PDT) {
- SmallVector<const BasicBlock *, 8> WorkList;
- for (auto &BB : F)
- for (auto &I : BB)
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (CI->hasFnAttr(Attribute::Cold))
- UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall);
+uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB,
+ int SccNum) const {
+ assert(getSCCNum(BB) == SccNum);
- while (!WorkList.empty()) {
- const BasicBlock *BB = WorkList.pop_back_val();
+ assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC");
+ const auto &SccBlockTypes = SccBlocks[SccNum];
- // If the terminator is an InvokeInst, check only the normal destination
- // block as the unwind edge of InvokeInst is also very unlikely taken.
- if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- if (PostDominatedByColdCall.count(II->getNormalDest()))
- UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
- }
- // If all of successor are post dominated then BB is also done.
- else if (!successors(BB).empty() &&
- llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
- return PostDominatedByColdCall.count(Succ);
- }))
- UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
+ auto It = SccBlockTypes.find(BB);
+ if (It != SccBlockTypes.end()) {
+ return It->second;
}
+ return Inner;
}
-/// Calculate edge weights for successors lead to unreachable.
-///
-/// Predict that a successor which leads necessarily to an
-/// unreachable-terminated block as extremely unlikely.
-bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
- const Instruction *TI = BB->getTerminator();
- (void) TI;
- assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
- assert(!isa<InvokeInst>(TI) &&
- "Invokes should have already been handled by calcInvokeHeuristics");
+void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB,
+ int SccNum) {
+ assert(getSCCNum(BB) == SccNum);
+ uint32_t BlockType = Inner;
- SmallVector<unsigned, 4> UnreachableEdges;
- SmallVector<unsigned, 4> ReachableEdges;
+ if (llvm::any_of(predecessors(BB), [&](const BasicBlock *Pred) {
+ // Consider any block that is an entry point to the SCC as
+ // a header.
+ return getSCCNum(Pred) != SccNum;
+ }))
+ BlockType |= Header;
- for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
- if (PostDominatedByUnreachable.count(*I))
- UnreachableEdges.push_back(I.getSuccessorIndex());
- else
- ReachableEdges.push_back(I.getSuccessorIndex());
+ if (llvm::any_of(successors(BB), [&](const BasicBlock *Succ) {
+ return getSCCNum(Succ) != SccNum;
+ }))
+ BlockType |= Exiting;
- // Skip probabilities if all were reachable.
- if (UnreachableEdges.empty())
- return false;
+ // Lazily compute the set of headers for a given SCC and cache the results
+ // in the SccHeaderMap.
+ if (SccBlocks.size() <= static_cast<unsigned>(SccNum))
+ SccBlocks.resize(SccNum + 1);
+ auto &SccBlockTypes = SccBlocks[SccNum];
+
+ if (BlockType != Inner) {
+ bool IsInserted;
+ std::tie(std::ignore, IsInserted) =
+ SccBlockTypes.insert(std::make_pair(BB, BlockType));
+ assert(IsInserted && "Duplicated block in SCC");
+ }
+}
- SmallVector<BranchProbability, 4> EdgeProbabilities(
- BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown());
- if (ReachableEdges.empty()) {
- BranchProbability Prob(1, UnreachableEdges.size());
- for (unsigned SuccIdx : UnreachableEdges)
- EdgeProbabilities[SuccIdx] = Prob;
- setEdgeProbability(BB, EdgeProbabilities);
- return true;
+BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB,
+ const LoopInfo &LI,
+ const SccInfo &SccI)
+ : BB(BB) {
+ LD.first = LI.getLoopFor(BB);
+ if (!LD.first) {
+ LD.second = SccI.getSCCNum(BB);
}
+}
- auto UnreachableProb = UR_TAKEN_PROB;
- auto ReachableProb =
- (BranchProbability::getOne() - UR_TAKEN_PROB * UnreachableEdges.size()) /
- ReachableEdges.size();
+bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const {
+ const auto &SrcBlock = Edge.first;
+ const auto &DstBlock = Edge.second;
+ return (DstBlock.getLoop() &&
+ !DstBlock.getLoop()->contains(SrcBlock.getLoop())) ||
+ // Assume that SCCs can't be nested.
+ (DstBlock.getSccNum() != -1 &&
+ SrcBlock.getSccNum() != DstBlock.getSccNum());
+}
- for (unsigned SuccIdx : UnreachableEdges)
- EdgeProbabilities[SuccIdx] = UnreachableProb;
- for (unsigned SuccIdx : ReachableEdges)
- EdgeProbabilities[SuccIdx] = ReachableProb;
+bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const {
+ return isLoopEnteringEdge({Edge.second, Edge.first});
+}
- setEdgeProbability(BB, EdgeProbabilities);
- return true;
+bool BranchProbabilityInfo::isLoopEnteringExitingEdge(
+ const LoopEdge &Edge) const {
+ return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge);
+}
+
+bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const {
+ const auto &SrcBlock = Edge.first;
+ const auto &DstBlock = Edge.second;
+ return SrcBlock.belongsToSameLoop(DstBlock) &&
+ ((DstBlock.getLoop() &&
+ DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) ||
+ (DstBlock.getSccNum() != -1 &&
+ SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum())));
+}
+
+void BranchProbabilityInfo::getLoopEnterBlocks(
+ const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const {
+ if (LB.getLoop()) {
+ auto *Header = LB.getLoop()->getHeader();
+ Enters.append(pred_begin(Header), pred_end(Header));
+ } else {
+ assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
+ SccI->getSccEnterBlocks(LB.getSccNum(), Enters);
+ }
+}
+
+void BranchProbabilityInfo::getLoopExitBlocks(
+ const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const {
+ if (LB.getLoop()) {
+ LB.getLoop()->getExitBlocks(Exits);
+ } else {
+ assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
+ SccI->getSccExitBlocks(LB.getSccNum(), Exits);
+ }
}
// Propagate existing explicit probabilities from either profile data or
@@ -315,7 +339,12 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
"Too many bits for uint32_t");
Weights.push_back(Weight->getZExtValue());
WeightSum += Weights.back();
- if (PostDominatedByUnreachable.count(TI->getSuccessor(I - 1)))
+ const LoopBlock SrcLoopBB = getLoopBlock(BB);
+ const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I - 1));
+ auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
+ if (EstimatedWeight &&
+ EstimatedWeight.getValue() <=
+ static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
UnreachableIdxs.push_back(I - 1);
else
ReachableIdxs.push_back(I - 1);
@@ -420,60 +449,6 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
return true;
}
-/// Calculate edge weights for edges leading to cold blocks.
-///
-/// A cold block is one post-dominated by a block with a call to a
-/// cold function. Those edges are unlikely to be taken, so we give
-/// them relatively low weight.
-///
-/// Return true if we could compute the weights for cold edges.
-/// Return false, otherwise.
-bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
- const Instruction *TI = BB->getTerminator();
- (void) TI;
- assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
- assert(!isa<InvokeInst>(TI) &&
- "Invokes should have already been handled by calcInvokeHeuristics");
-
- // Determine which successors are post-dominated by a cold block.
- SmallVector<unsigned, 4> ColdEdges;
- SmallVector<unsigned, 4> NormalEdges;
- for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
- if (PostDominatedByColdCall.count(*I))
- ColdEdges.push_back(I.getSuccessorIndex());
- else
- NormalEdges.push_back(I.getSuccessorIndex());
-
- // Skip probabilities if no cold edges.
- if (ColdEdges.empty())
- return false;
-
- SmallVector<BranchProbability, 4> EdgeProbabilities(
- BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown());
- if (NormalEdges.empty()) {
- BranchProbability Prob(1, ColdEdges.size());
- for (unsigned SuccIdx : ColdEdges)
- EdgeProbabilities[SuccIdx] = Prob;
- setEdgeProbability(BB, EdgeProbabilities);
- return true;
- }
-
- auto ColdProb = BranchProbability::getBranchProbability(
- CC_TAKEN_WEIGHT,
- (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(ColdEdges.size()));
- auto NormalProb = BranchProbability::getBranchProbability(
- CC_NONTAKEN_WEIGHT,
- (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size()));
-
- for (unsigned SuccIdx : ColdEdges)
- EdgeProbabilities[SuccIdx] = ColdProb;
- for (unsigned SuccIdx : NormalEdges)
- EdgeProbabilities[SuccIdx] = NormalProb;
-
- setEdgeProbability(BB, EdgeProbabilities);
- return true;
-}
-
// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
// between two pointer or pointer and NULL will fail.
bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
@@ -511,38 +486,6 @@ bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
return true;
}
-static int getSCCNum(const BasicBlock *BB,
- const BranchProbabilityInfo::SccInfo &SccI) {
- auto SccIt = SccI.SccNums.find(BB);
- if (SccIt == SccI.SccNums.end())
- return -1;
- return SccIt->second;
-}
-
-// Consider any block that is an entry point to the SCC as a header.
-static bool isSCCHeader(const BasicBlock *BB, int SccNum,
- BranchProbabilityInfo::SccInfo &SccI) {
- assert(getSCCNum(BB, SccI) == SccNum);
-
- // Lazily compute the set of headers for a given SCC and cache the results
- // in the SccHeaderMap.
- if (SccI.SccHeaders.size() <= static_cast<unsigned>(SccNum))
- SccI.SccHeaders.resize(SccNum + 1);
- auto &HeaderMap = SccI.SccHeaders[SccNum];
- bool Inserted;
- BranchProbabilityInfo::SccHeaderMap::iterator HeaderMapIt;
- std::tie(HeaderMapIt, Inserted) = HeaderMap.insert(std::make_pair(BB, false));
- if (Inserted) {
- bool IsHeader = llvm::any_of(make_range(pred_begin(BB), pred_end(BB)),
- [&](const BasicBlock *Pred) {
- return getSCCNum(Pred, SccI) != SccNum;
- });
- HeaderMapIt->second = IsHeader;
- return IsHeader;
- } else
- return HeaderMapIt->second;
-}
-
// Compute the unlikely successors to the block BB in the loop L, specifically
// those that are unlikely because this is a loop, and add them to the
// UnlikelyBlocks set.
@@ -625,8 +568,7 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L,
// we can constant-evaluate the compare to see if it makes the branch be
// taken or not.
Constant *CmpLHSConst = dyn_cast<Constant>(V);
- if (!CmpLHSConst ||
- std::find(succ_begin(BB), succ_end(BB), B) == succ_end(BB))
+ if (!CmpLHSConst || !llvm::is_contained(successors(BB), B))
continue;
// First collapse InstChain
for (Instruction *I : llvm::reverse(InstChain)) {
@@ -650,92 +592,324 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L,
}
}
-// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
-// as taken, exiting edges as not-taken.
-bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
- const LoopInfo &LI,
- SccInfo &SccI) {
- int SccNum;
- Loop *L = LI.getLoopFor(BB);
- if (!L) {
- SccNum = getSCCNum(BB, SccI);
- if (SccNum < 0)
- return false;
+Optional<uint32_t>
+BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const {
+ auto WeightIt = EstimatedBlockWeight.find(BB);
+ if (WeightIt == EstimatedBlockWeight.end())
+ return None;
+ return WeightIt->second;
+}
+
+Optional<uint32_t>
+BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const {
+ auto WeightIt = EstimatedLoopWeight.find(L);
+ if (WeightIt == EstimatedLoopWeight.end())
+ return None;
+ return WeightIt->second;
+}
+
+Optional<uint32_t>
+BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const {
+ // For edges entering a loop take weight of a loop rather than an individual
+ // block in the loop.
+ return isLoopEnteringEdge(Edge)
+ ? getEstimatedLoopWeight(Edge.second.getLoopData())
+ : getEstimatedBlockWeight(Edge.second.getBlock());
+}
+
+template <class IterT>
+Optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
+ const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const {
+ SmallVector<uint32_t, 4> Weights;
+ Optional<uint32_t> MaxWeight;
+ for (const BasicBlock *DstBB : Successors) {
+ const LoopBlock DstLoopBB = getLoopBlock(DstBB);
+ auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
+
+ if (!Weight)
+ return None;
+
+ if (!MaxWeight || MaxWeight.getValue() < Weight.getValue())
+ MaxWeight = Weight;
}
- SmallPtrSet<const BasicBlock*, 8> UnlikelyBlocks;
- if (L)
- computeUnlikelySuccessors(BB, L, UnlikelyBlocks);
+ return MaxWeight;
+}
- SmallVector<unsigned, 8> BackEdges;
- SmallVector<unsigned, 8> ExitingEdges;
- SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
- SmallVector<unsigned, 8> UnlikelyEdges;
+// Updates \p LoopBB's weight and returns true. If \p LoopBB has already
+// an associated weight it is unchanged and false is returned.
+//
+// Please note by the algorithm the weight is not expected to change once set
+// thus 'false' status is used to track visited blocks.
+bool BranchProbabilityInfo::updateEstimatedBlockWeight(
+ LoopBlock &LoopBB, uint32_t BBWeight,
+ SmallVectorImpl<BasicBlock *> &BlockWorkList,
+ SmallVectorImpl<LoopBlock> &LoopWorkList) {
+ BasicBlock *BB = LoopBB.getBlock();
+
+ // In general, weight is assigned to a block when it has final value and
+ // can't/shouldn't be changed. However, there are cases when a block
+ // inherently has several (possibly "contradicting") weights. For example,
+ // "unwind" block may also contain "cold" call. In that case the first
+ // set weight is favored and all consequent weights are ignored.
+ if (!EstimatedBlockWeight.insert({BB, BBWeight}).second)
+ return false;
- for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- // Use LoopInfo if we have it, otherwise fall-back to SCC info to catch
- // irreducible loops.
- if (L) {
- if (UnlikelyBlocks.count(*I) != 0)
- UnlikelyEdges.push_back(I.getSuccessorIndex());
- else if (!L->contains(*I))
- ExitingEdges.push_back(I.getSuccessorIndex());
- else if (L->getHeader() == *I)
- BackEdges.push_back(I.getSuccessorIndex());
- else
- InEdges.push_back(I.getSuccessorIndex());
- } else {
- if (getSCCNum(*I, SccI) != SccNum)
- ExitingEdges.push_back(I.getSuccessorIndex());
- else if (isSCCHeader(*I, SccNum, SccI))
- BackEdges.push_back(I.getSuccessorIndex());
- else
- InEdges.push_back(I.getSuccessorIndex());
+ for (BasicBlock *PredBlock : predecessors(BB)) {
+ LoopBlock PredLoop = getLoopBlock(PredBlock);
+ // Add affected block/loop to a working list.
+ if (isLoopExitingEdge({PredLoop, LoopBB})) {
+ if (!EstimatedLoopWeight.count(PredLoop.getLoopData()))
+ LoopWorkList.push_back(PredLoop);
+ } else if (!EstimatedBlockWeight.count(PredBlock))
+ BlockWorkList.push_back(PredBlock);
+ }
+ return true;
+}
+
+// Starting from \p BB traverse through dominator blocks and assign \p BBWeight
+// to all such blocks that are post dominated by \BB. In other words to all
+// blocks that the one is executed if and only if another one is executed.
+// Importantly, we skip loops here for two reasons. First weights of blocks in
+// a loop should be scaled by trip count (yet possibly unknown). Second there is
+// no any value in doing that because that doesn't give any additional
+// information regarding distribution of probabilities inside the loop.
+// Exception is loop 'enter' and 'exit' edges that are handled in a special way
+// at calcEstimatedHeuristics.
+//
+// In addition, \p WorkList is populated with basic blocks if at leas one
+// successor has updated estimated weight.
+void BranchProbabilityInfo::propagateEstimatedBlockWeight(
+ const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT,
+ uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList,
+ SmallVectorImpl<LoopBlock> &LoopWorkList) {
+ const BasicBlock *BB = LoopBB.getBlock();
+ const auto *DTStartNode = DT->getNode(BB);
+ const auto *PDTStartNode = PDT->getNode(BB);
+
+ // TODO: Consider propagating weight down the domination line as well.
+ for (const auto *DTNode = DTStartNode; DTNode != nullptr;
+ DTNode = DTNode->getIDom()) {
+ auto *DomBB = DTNode->getBlock();
+ // Consider blocks which lie on one 'line'.
+ if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB)))
+ // If BB doesn't post dominate DomBB it will not post dominate dominators
+ // of DomBB as well.
+ break;
+
+ LoopBlock DomLoopBB = getLoopBlock(DomBB);
+ const LoopEdge Edge{DomLoopBB, LoopBB};
+ // Don't propagate weight to blocks belonging to different loops.
+ if (!isLoopEnteringExitingEdge(Edge)) {
+ if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList,
+ LoopWorkList))
+ // If DomBB has weight set then all it's predecessors are already
+ // processed (since we propagate weight up to the top of IR each time).
+ break;
+ } else if (isLoopExitingEdge(Edge)) {
+ LoopWorkList.push_back(DomLoopBB);
}
}
+}
+
+Optional<uint32_t> BranchProbabilityInfo::getInitialEstimatedBlockWeight(
+ const BasicBlock *BB) {
+ // Returns true if \p BB has call marked with "NoReturn" attribute.
+ auto hasNoReturn = [&](const BasicBlock *BB) {
+ for (const auto &I : reverse(*BB))
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->hasFnAttr(Attribute::NoReturn))
+ return true;
- if (BackEdges.empty() && ExitingEdges.empty() && UnlikelyEdges.empty())
return false;
+ };
- // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
- // normalize them so that they sum up to one.
- unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
- (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
- (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) +
- (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
+ // Important note regarding the order of checks. They are ordered by weight
+ // from lowest to highest. Doing that allows to avoid "unstable" results
+ // when several conditions heuristics can be applied simultaneously.
+ if (isa<UnreachableInst>(BB->getTerminator()) ||
+ // If this block is terminated by a call to
+ // @llvm.experimental.deoptimize then treat it like an unreachable
+ // since it is expected to practically never execute.
+ // TODO: Should we actually treat as never returning call?
+ BB->getTerminatingDeoptimizeCall())
+ return hasNoReturn(BB)
+ ? static_cast<uint32_t>(BlockExecWeight::NORETURN)
+ : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE);
+
+ // Check if the block is 'unwind' handler of some invoke instruction.
+ for (const auto *Pred : predecessors(BB))
+ if (Pred)
+ if (const auto *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
+ if (II->getUnwindDest() == BB)
+ return static_cast<uint32_t>(BlockExecWeight::UNWIND);
+
+ // Check if the block contains 'cold' call.
+ for (const auto &I : *BB)
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->hasFnAttr(Attribute::Cold))
+ return static_cast<uint32_t>(BlockExecWeight::COLD);
+
+ return None;
+}
- SmallVector<BranchProbability, 4> EdgeProbabilities(
- BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown());
- if (uint32_t numBackEdges = BackEdges.size()) {
- BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
- auto Prob = TakenProb / numBackEdges;
- for (unsigned SuccIdx : BackEdges)
- EdgeProbabilities[SuccIdx] = Prob;
- }
+// Does RPO traversal over all blocks in \p F and assigns weights to
+// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
+// best to propagate the weight to up/down the IR.
+void BranchProbabilityInfo::computeEestimateBlockWeight(
+ const Function &F, DominatorTree *DT, PostDominatorTree *PDT) {
+ SmallVector<BasicBlock *, 8> BlockWorkList;
+ SmallVector<LoopBlock, 8> LoopWorkList;
+
+ // By doing RPO we make sure that all predecessors already have weights
+ // calculated before visiting theirs successors.
+ ReversePostOrderTraversal<const Function *> RPOT(&F);
+ for (const auto *BB : RPOT)
+ if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
+ // If we were able to find estimated weight for the block set it to this
+ // block and propagate up the IR.
+ propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT,
+ BBWeight.getValue(), BlockWorkList,
+ LoopWorkList);
+
+ // BlockWorklist/LoopWorkList contains blocks/loops with at least one
+ // successor/exit having estimated weight. Try to propagate weight to such
+ // blocks/loops from successors/exits.
+ // Process loops and blocks. Order is not important.
+ do {
+ while (!LoopWorkList.empty()) {
+ const LoopBlock LoopBB = LoopWorkList.pop_back_val();
+
+ if (EstimatedLoopWeight.count(LoopBB.getLoopData()))
+ continue;
- if (uint32_t numInEdges = InEdges.size()) {
- BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
- auto Prob = TakenProb / numInEdges;
- for (unsigned SuccIdx : InEdges)
- EdgeProbabilities[SuccIdx] = Prob;
- }
+ SmallVector<BasicBlock *, 4> Exits;
+ getLoopExitBlocks(LoopBB, Exits);
+ auto LoopWeight = getMaxEstimatedEdgeWeight(
+ LoopBB, make_range(Exits.begin(), Exits.end()));
+
+ if (LoopWeight) {
+ // If we never exit the loop then we can enter it once at maximum.
+ if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
+ LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
+
+ EstimatedLoopWeight.insert(
+ {LoopBB.getLoopData(), LoopWeight.getValue()});
+ // Add all blocks entering the loop into working list.
+ getLoopEnterBlocks(LoopBB, BlockWorkList);
+ }
+ }
- if (uint32_t numExitingEdges = ExitingEdges.size()) {
- BranchProbability NotTakenProb = BranchProbability(LBH_NONTAKEN_WEIGHT,
- Denom);
- auto Prob = NotTakenProb / numExitingEdges;
- for (unsigned SuccIdx : ExitingEdges)
- EdgeProbabilities[SuccIdx] = Prob;
+ while (!BlockWorkList.empty()) {
+ // We can reach here only if BlockWorkList is not empty.
+ const BasicBlock *BB = BlockWorkList.pop_back_val();
+ if (EstimatedBlockWeight.count(BB))
+ continue;
+
+ // We take maximum over all weights of successors. In other words we take
+ // weight of "hot" path. In theory we can probably find a better function
+ // which gives higher accuracy results (comparing to "maximum") but I
+ // can't
+ // think of any right now. And I doubt it will make any difference in
+ // practice.
+ const LoopBlock LoopBB = getLoopBlock(BB);
+ auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB));
+
+ if (MaxWeight)
+ propagateEstimatedBlockWeight(LoopBB, DT, PDT, MaxWeight.getValue(),
+ BlockWorkList, LoopWorkList);
+ }
+ } while (!BlockWorkList.empty() || !LoopWorkList.empty());
+}
+
+// Calculate edge probabilities based on block's estimated weight.
+// Note that gathered weights were not scaled for loops. Thus edges entering
+// and exiting loops requires special processing.
+bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) {
+ assert(BB->getTerminator()->getNumSuccessors() > 1 &&
+ "expected more than one successor!");
+
+ const LoopBlock LoopBB = getLoopBlock(BB);
+
+ SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks;
+ uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT;
+ if (LoopBB.getLoop())
+ computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks);
+
+ // Changed to 'true' if at least one successor has estimated weight.
+ bool FoundEstimatedWeight = false;
+ SmallVector<uint32_t, 4> SuccWeights;
+ uint64_t TotalWeight = 0;
+ // Go over all successors of BB and put their weights into SuccWeights.
+ for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ const BasicBlock *SuccBB = *I;
+ Optional<uint32_t> Weight;
+ const LoopBlock SuccLoopBB = getLoopBlock(SuccBB);
+ const LoopEdge Edge{LoopBB, SuccLoopBB};
+
+ Weight = getEstimatedEdgeWeight(Edge);
+
+ if (isLoopExitingEdge(Edge) &&
+ // Avoid adjustment of ZERO weight since it should remain unchanged.
+ Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
+ // Scale down loop exiting weight by trip count.
+ Weight = std::max(
+ static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
+ Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
+ TC);
+ }
+ bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB);
+ if (IsUnlikelyEdge &&
+ // Avoid adjustment of ZERO weight since it should remain unchanged.
+ Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
+ // 'Unlikely' blocks have twice lower weight.
+ Weight = std::max(
+ static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
+ Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
+ 2);
+ }
+
+ if (Weight)
+ FoundEstimatedWeight = true;
+
+ auto WeightVal =
+ Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
+ TotalWeight += WeightVal;
+ SuccWeights.push_back(WeightVal);
}
- if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) {
- BranchProbability UnlikelyProb = BranchProbability(LBH_UNLIKELY_WEIGHT,
- Denom);
- auto Prob = UnlikelyProb / numUnlikelyEdges;
- for (unsigned SuccIdx : UnlikelyEdges)
- EdgeProbabilities[SuccIdx] = Prob;
+ // If non of blocks have estimated weight bail out.
+ // If TotalWeight is 0 that means weight of each successor is 0 as well and
+ // equally likely. Bail out early to not deal with devision by zero.
+ if (!FoundEstimatedWeight || TotalWeight == 0)
+ return false;
+
+ assert(SuccWeights.size() == succ_size(BB) && "Missed successor?");
+ const unsigned SuccCount = SuccWeights.size();
+
+ // If the sum of weights does not fit in 32 bits, scale every weight down
+ // accordingly.
+ if (TotalWeight > UINT32_MAX) {
+ uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1;
+ TotalWeight = 0;
+ for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
+ SuccWeights[Idx] /= ScalingFactor;
+ if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO))
+ SuccWeights[Idx] =
+ static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
+ TotalWeight += SuccWeights[Idx];
+ }
+ assert(TotalWeight <= UINT32_MAX && "Total weight overflows");
}
+ // Finally set probabilities to edges according to estimated block weights.
+ SmallVector<BranchProbability, 4> EdgeProbabilities(
+ SuccCount, BranchProbability::getUnknown());
+
+ for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
+ EdgeProbabilities[Idx] =
+ BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight);
+ }
setEdgeProbability(BB, EdgeProbabilities);
return true;
}
@@ -766,7 +940,7 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
// we don't have information about probabilities.
if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
if (LHS->getOpcode() == Instruction::And)
- if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
+ if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(1)))
if (AndRHS->getValue().isPowerOf2())
return false;
@@ -782,7 +956,8 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
Func == LibFunc_strcmp ||
Func == LibFunc_strncasecmp ||
Func == LibFunc_strncmp ||
- Func == LibFunc_memcmp) {
+ Func == LibFunc_memcmp ||
+ Func == LibFunc_bcmp) {
// strcmp and similar functions return zero, negative, or positive, if the
// first string is equal, less, or greater than the second. We consider it
// likely that the strings are not equal, so a comparison with zero is
@@ -900,18 +1075,6 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
return true;
}
-bool BranchProbabilityInfo::calcInvokeHeuristics(const BasicBlock *BB) {
- const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
- if (!II)
- return false;
-
- BranchProbability TakenProb(IH_TAKEN_WEIGHT,
- IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT);
- setEdgeProbability(
- BB, SmallVector<BranchProbability, 2>({TakenProb, TakenProb.getCompl()}));
- return true;
-}
-
void BranchProbabilityInfo::releaseMemory() {
Probs.clear();
Handles.clear();
@@ -951,8 +1114,7 @@ BranchProbabilityInfo::getHotSucc(const BasicBlock *BB) const {
auto MaxProb = BranchProbability::getZero();
const BasicBlock *MaxSucc = nullptr;
- for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- const BasicBlock *Succ = *I;
+ for (const auto *Succ : successors(BB)) {
auto Prob = getEdgeProbability(BB, Succ);
if (Prob > MaxProb) {
MaxProb = Prob;
@@ -975,6 +1137,10 @@ BranchProbability
BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
unsigned IndexInSuccessors) const {
auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
+ assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) ==
+ (Probs.end() == I) &&
+ "Probability for I-th successor must always be defined along with the "
+ "probability for the first successor");
if (I != Probs.end())
return I->second;
@@ -993,44 +1159,32 @@ BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
BranchProbability
BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
const BasicBlock *Dst) const {
+ if (!Probs.count(std::make_pair(Src, 0)))
+ return BranchProbability(llvm::count(successors(Src), Dst), succ_size(Src));
+
auto Prob = BranchProbability::getZero();
- bool FoundProb = false;
- uint32_t EdgeCount = 0;
for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
- if (*I == Dst) {
- ++EdgeCount;
- auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex()));
- if (MapI != Probs.end()) {
- FoundProb = true;
- Prob += MapI->second;
- }
- }
- uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src));
- return FoundProb ? Prob : BranchProbability(EdgeCount, succ_num);
-}
+ if (*I == Dst)
+ Prob += Probs.find(std::make_pair(Src, I.getSuccessorIndex()))->second;
-/// Set the edge probability for a given edge specified by PredBlock and an
-/// index to the successors.
-void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src,
- unsigned IndexInSuccessors,
- BranchProbability Prob) {
- Probs[std::make_pair(Src, IndexInSuccessors)] = Prob;
- Handles.insert(BasicBlockCallbackVH(Src, this));
- LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> "
- << IndexInSuccessors << " successor probability to " << Prob
- << "\n");
+ return Prob;
}
/// Set the edge probability for all edges at once.
void BranchProbabilityInfo::setEdgeProbability(
const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) {
assert(Src->getTerminator()->getNumSuccessors() == Probs.size());
+ eraseBlock(Src); // Erase stale data if any.
if (Probs.size() == 0)
return; // Nothing to set.
+ Handles.insert(BasicBlockCallbackVH(Src, this));
uint64_t TotalNumerator = 0;
for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) {
- setEdgeProbability(Src, SuccIdx, Probs[SuccIdx]);
+ this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx];
+ LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx
+ << " successor probability to " << Probs[SuccIdx]
+ << "\n");
TotalNumerator += Probs[SuccIdx].getNumerator();
}
@@ -1043,6 +1197,25 @@ void BranchProbabilityInfo::setEdgeProbability(
assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size());
}
+void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src,
+ BasicBlock *Dst) {
+ eraseBlock(Dst); // Erase stale data if any.
+ unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors();
+ assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors());
+ if (NumSuccessors == 0)
+ return; // Nothing to set.
+ if (this->Probs.find(std::make_pair(Src, 0)) == this->Probs.end())
+ return; // No probability is set for edges from Src. Keep the same for Dst.
+
+ Handles.insert(BasicBlockCallbackVH(Dst, this));
+ for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) {
+ auto Prob = this->Probs[std::make_pair(Src, SuccIdx)];
+ this->Probs[std::make_pair(Dst, SuccIdx)] = Prob;
+ LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx
+ << " successor probability to " << Prob << "\n");
+ }
+}
+
raw_ostream &
BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
const BasicBlock *Src,
@@ -1056,52 +1229,55 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
}
void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) {
- for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- auto MapI = Probs.find(std::make_pair(BB, I.getSuccessorIndex()));
- if (MapI != Probs.end())
- Probs.erase(MapI);
+ LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n");
+
+ // Note that we cannot use successors of BB because the terminator of BB may
+ // have changed when eraseBlock is called as a BasicBlockCallbackVH callback.
+ // Instead we remove prob data for the block by iterating successors by their
+ // indices from 0 till the last which exists. There could not be prob data for
+ // a pair (BB, N) if there is no data for (BB, N-1) because the data is always
+ // set for all successors from 0 to M at once by the method
+ // setEdgeProbability().
+ Handles.erase(BasicBlockCallbackVH(BB, this));
+ for (unsigned I = 0;; ++I) {
+ auto MapI = Probs.find(std::make_pair(BB, I));
+ if (MapI == Probs.end()) {
+ assert(Probs.count(std::make_pair(BB, I + 1)) == 0 &&
+ "Must be no more successors");
+ return;
+ }
+ Probs.erase(MapI);
}
}
-void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
+void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI,
const TargetLibraryInfo *TLI,
+ DominatorTree *DT,
PostDominatorTree *PDT) {
LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
<< " ----\n\n");
LastF = &F; // Store the last function we ran on for printing.
- assert(PostDominatedByUnreachable.empty());
- assert(PostDominatedByColdCall.empty());
+ LI = &LoopI;
- // Record SCC numbers of blocks in the CFG to identify irreducible loops.
- // FIXME: We could only calculate this if the CFG is known to be irreducible
- // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
- int SccNum = 0;
- SccInfo SccI;
- for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd();
- ++It, ++SccNum) {
- // Ignore single-block SCCs since they either aren't loops or LoopInfo will
- // catch them.
- const std::vector<const BasicBlock *> &Scc = *It;
- if (Scc.size() == 1)
- continue;
+ SccI = std::make_unique<SccInfo>(F);
- LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
- for (auto *BB : Scc) {
- LLVM_DEBUG(dbgs() << " " << BB->getName());
- SccI.SccNums[BB] = SccNum;
- }
- LLVM_DEBUG(dbgs() << "\n");
- }
+ assert(EstimatedBlockWeight.empty());
+ assert(EstimatedLoopWeight.empty());
+ std::unique_ptr<DominatorTree> DTPtr;
std::unique_ptr<PostDominatorTree> PDTPtr;
+ if (!DT) {
+ DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F));
+ DT = DTPtr.get();
+ }
+
if (!PDT) {
PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
PDT = PDTPtr.get();
}
- computePostDominatedByUnreachable(F, PDT);
- computePostDominatedByColdCall(F, PDT);
+ computeEestimateBlockWeight(F, DT, PDT);
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
@@ -1113,13 +1289,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
continue;
if (calcMetadataWeights(BB))
continue;
- if (calcInvokeHeuristics(BB))
- continue;
- if (calcUnreachableHeuristics(BB))
- continue;
- if (calcColdCallHeuristics(BB))
- continue;
- if (calcLoopBranchHeuristics(BB, LI, SccI))
+ if (calcEstimatedHeuristics(BB))
continue;
if (calcPointerHeuristics(BB))
continue;
@@ -1129,8 +1299,9 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
continue;
}
- PostDominatedByUnreachable.clear();
- PostDominatedByColdCall.clear();
+ EstimatedLoopWeight.clear();
+ EstimatedBlockWeight.clear();
+ SccI.reset();
if (PrintBranchProb &&
(PrintBranchProbFuncName.empty() ||
@@ -1147,6 +1318,7 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.setPreservesAll();
}
@@ -1155,9 +1327,10 @@ bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
PostDominatorTree &PDT =
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- BPI.calculate(F, LI, &TLI, &PDT);
+ BPI.calculate(F, LI, &TLI, &DT, &PDT);
return false;
}
@@ -1174,6 +1347,7 @@ BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
BranchProbabilityInfo BPI;
BPI.calculate(F, AM.getResult<LoopAnalysis>(F),
&AM.getResult<TargetLibraryAnalysis>(F),
+ &AM.getResult<DominatorTreeAnalysis>(F),
&AM.getResult<PostDominatorTreeAnalysis>(F));
return BPI;
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp
index b46a6951dd25..33602ed71675 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp
@@ -14,9 +14,18 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+// The max number of basic blocks explored during reachability analysis between
+// two basic blocks. This is kept reasonably small to limit compile time when
+// repeatedly used by clients of this analysis (such as captureTracking).
+static cl::opt<unsigned> DefaultMaxBBsToExplore(
+ "dom-tree-reachability-max-bbs-to-explore", cl::Hidden,
+ cl::desc("Max number of BBs to explore for reachability analysis"),
+ cl::init(32));
+
/// FindFunctionBackedges - Analyze the specified function to find all of the
/// loop backedges in the function and return them. This is a relatively cheap
/// (compared to computing dominators and loop info) analysis.
@@ -94,7 +103,7 @@ bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest,
assert(TI->isTerminator() && "Must be a terminator to have successors!");
if (TI->getNumSuccessors() == 1) return false;
- assert(find(predecessors(Dest), TI->getParent()) != pred_end(Dest) &&
+ assert(is_contained(predecessors(Dest), TI->getParent()) &&
"No edge between TI's block and Dest.");
const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
@@ -152,9 +161,7 @@ bool llvm::isPotentiallyReachableFromMany(
const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr;
- // Limit the number of blocks we visit. The goal is to avoid run-away compile
- // times on large CFGs without hampering sensible code. Arbitrarily chosen.
- unsigned Limit = 32;
+ unsigned Limit = DefaultMaxBBsToExplore;
SmallPtrSet<const BasicBlock*, 32> Visited;
do {
BasicBlock *BB = Worklist.pop_back_val();
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
index cf4afc8cfd9c..33b5a4603272 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
@@ -272,16 +272,17 @@ FunctionPass *llvm::createCFGOnlyPrinterLegacyPassPass() {
void DOTGraphTraits<DOTFuncInfo *>::computeHiddenNodes(const Function *F) {
auto evaluateBB = [&](const BasicBlock *Node) {
- if (succ_begin(Node) == succ_end(Node)) {
+ if (succ_empty(Node)) {
const Instruction *TI = Node->getTerminator();
isHiddenBasicBlock[Node] =
(HideUnreachablePaths && isa<UnreachableInst>(TI)) ||
(HideDeoptimizePaths && Node->getTerminatingDeoptimizeCall());
return;
}
- isHiddenBasicBlock[Node] = std::all_of(
- succ_begin(Node), succ_end(Node),
- [this](const BasicBlock *BB) { return isHiddenBasicBlock[BB]; });
+ isHiddenBasicBlock[Node] =
+ llvm::all_of(successors(Node), [this](const BasicBlock *BB) {
+ return isHiddenBasicBlock[BB];
+ });
};
/// The post order traversal iteration is done to know the status of
/// isHiddenBasicBlock for all the successors on the current BB.
@@ -289,7 +290,8 @@ void DOTGraphTraits<DOTFuncInfo *>::computeHiddenNodes(const Function *F) {
evaluateBB);
}
-bool DOTGraphTraits<DOTFuncInfo *>::isNodeHidden(const BasicBlock *Node) {
+bool DOTGraphTraits<DOTFuncInfo *>::isNodeHidden(const BasicBlock *Node,
+ const DOTFuncInfo *CFGInfo) {
// If both restricting flags are false, all nodes are displayed.
if (!HideUnreachablePaths && !HideDeoptimizePaths)
return false;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 179f0633df06..2be23a56cc7b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -559,8 +559,7 @@ bool CFLAndersAAResult::FunctionInfo::mayAlias(
if (RangePair.first != RangePair.second) {
// Be conservative about unknown sizes
- if (MaybeLHSSize == LocationSize::unknown() ||
- MaybeRHSSize == LocationSize::unknown())
+ if (!MaybeLHSSize.hasValue() || !MaybeRHSSize.hasValue())
return true;
const uint64_t LHSSize = MaybeLHSSize.getValue();
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp
index fd3166f8cd0c..3230e9036b8e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -20,10 +20,13 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PassManagerImpl.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -36,6 +39,11 @@ using namespace llvm;
// template typedefs.
namespace llvm {
+static cl::opt<bool> AbortOnMaxDevirtIterationsReached(
+ "abort-on-max-devirt-iterations-reached",
+ cl::desc("Abort when the max iterations for devirtualization CGSCC repeat "
+ "pass is reached"));
+
// Explicit instantiations for the core proxy templates.
template class AllAnalysesOn<LazyCallGraph::SCC>;
template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>;
@@ -78,9 +86,6 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
if (!PI.runBeforePass(*Pass, *C))
continue;
- if (DebugLogging)
- dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n";
-
PreservedAnalyses PassPA;
{
TimeTraceScope TimeScope(Pass->name());
@@ -88,9 +93,9 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
}
if (UR.InvalidatedSCCs.count(C))
- PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass);
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA);
else
- PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C);
+ PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA);
// Update the SCC if necessary.
C = UR.UpdatedC ? UR.UpdatedC : C;
@@ -143,6 +148,452 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
return PA;
}
+PreservedAnalyses
+ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
+ // Setup the CGSCC analysis manager from its proxy.
+ CGSCCAnalysisManager &CGAM =
+ AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager();
+
+ // Get the call graph for this module.
+ LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
+
+ // Get Function analysis manager from its proxy.
+ FunctionAnalysisManager &FAM =
+ AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager();
+
+ // We keep worklists to allow us to push more work onto the pass manager as
+ // the passes are run.
+ SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
+ SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
+
+ // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
+ // iterating off the worklists.
+ SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
+ SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
+
+ SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
+ InlinedInternalEdges;
+
+ CGSCCUpdateResult UR = {
+ RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet,
+ nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges,
+ {}};
+
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ CG.buildRefSCCs();
+ for (auto RCI = CG.postorder_ref_scc_begin(),
+ RCE = CG.postorder_ref_scc_end();
+ RCI != RCE;) {
+ assert(RCWorklist.empty() &&
+ "Should always start with an empty RefSCC worklist");
+ // The postorder_ref_sccs range we are walking is lazily constructed, so
+ // we only push the first one onto the worklist. The worklist allows us
+ // to capture *new* RefSCCs created during transformations.
+ //
+ // We really want to form RefSCCs lazily because that makes them cheaper
+ // to update as the program is simplified and allows us to have greater
+ // cache locality as forming a RefSCC touches all the parts of all the
+ // functions within that RefSCC.
+ //
+ // We also eagerly increment the iterator to the next position because
+ // the CGSCC passes below may delete the current RefSCC.
+ RCWorklist.insert(&*RCI++);
+
+ do {
+ LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
+ if (InvalidRefSCCSet.count(RC)) {
+ LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n");
+ continue;
+ }
+
+ assert(CWorklist.empty() &&
+ "Should always start with an empty SCC worklist");
+
+ LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC
+ << "\n");
+
+ // The top of the worklist may *also* be the same SCC we just ran over
+ // (and invalidated for). Keep track of that last SCC we processed due
+ // to SCC update to avoid redundant processing when an SCC is both just
+ // updated itself and at the top of the worklist.
+ LazyCallGraph::SCC *LastUpdatedC = nullptr;
+
+ // Push the initial SCCs in reverse post-order as we'll pop off the
+ // back and so see this in post-order.
+ for (LazyCallGraph::SCC &C : llvm::reverse(*RC))
+ CWorklist.insert(&C);
+
+ do {
+ LazyCallGraph::SCC *C = CWorklist.pop_back_val();
+ // Due to call graph mutations, we may have invalid SCCs or SCCs from
+ // other RefSCCs in the worklist. The invalid ones are dead and the
+ // other RefSCCs should be queued above, so we just need to skip both
+ // scenarios here.
+ if (InvalidSCCSet.count(C)) {
+ LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n");
+ continue;
+ }
+ if (LastUpdatedC == C) {
+ LLVM_DEBUG(dbgs() << "Skipping redundant run on SCC: " << *C << "\n");
+ continue;
+ }
+ if (&C->getOuterRefSCC() != RC) {
+ LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other "
+ "RefSCC...\n");
+ continue;
+ }
+
+ // Ensure we can proxy analysis updates from the CGSCC analysis manager
+ // into the the Function analysis manager by getting a proxy here.
+ // This also needs to update the FunctionAnalysisManager, as this may be
+ // the first time we see this SCC.
+ CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM(
+ FAM);
+
+ // Each time we visit a new SCC pulled off the worklist,
+ // a transformation of a child SCC may have also modified this parent
+ // and invalidated analyses. So we invalidate using the update record's
+ // cross-SCC preserved set. This preserved set is intersected by any
+ // CGSCC pass that handles invalidation (primarily pass managers) prior
+ // to marking its SCC as preserved. That lets us track everything that
+ // might need invalidation across SCCs without excessive invalidations
+ // on a single SCC.
+ //
+ // This essentially allows SCC passes to freely invalidate analyses
+ // of any ancestor SCC. If this becomes detrimental to successfully
+ // caching analyses, we could force each SCC pass to manually
+ // invalidate the analyses for any SCCs other than themselves which
+ // are mutated. However, that seems to lose the robustness of the
+ // pass-manager driven invalidation scheme.
+ CGAM.invalidate(*C, UR.CrossSCCPA);
+
+ do {
+ // Check that we didn't miss any update scenario.
+ assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
+ assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+ assert(&C->getOuterRefSCC() == RC &&
+ "Processing an SCC in a different RefSCC!");
+
+ LastUpdatedC = UR.UpdatedC;
+ UR.UpdatedRC = nullptr;
+ UR.UpdatedC = nullptr;
+
+ // Check the PassInstrumentation's BeforePass callbacks before
+ // running the pass, skip its execution completely if asked to
+ // (callback returns false).
+ if (!PI.runBeforePass<LazyCallGraph::SCC>(*Pass, *C))
+ continue;
+
+ PreservedAnalyses PassPA;
+ {
+ TimeTraceScope TimeScope(Pass->name());
+ PassPA = Pass->run(*C, CGAM, CG, UR);
+ }
+
+ if (UR.InvalidatedSCCs.count(C))
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA);
+ else
+ PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA);
+
+ // Update the SCC and RefSCC if necessary.
+ C = UR.UpdatedC ? UR.UpdatedC : C;
+ RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
+
+ if (UR.UpdatedC) {
+ // If we're updating the SCC, also update the FAM inside the proxy's
+ // result.
+ CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM(
+ FAM);
+ }
+
+ // If the CGSCC pass wasn't able to provide a valid updated SCC,
+ // the current SCC may simply need to be skipped if invalid.
+ if (UR.InvalidatedSCCs.count(C)) {
+ LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
+ break;
+ }
+ // Check that we didn't miss any update scenario.
+ assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+
+ // We handle invalidating the CGSCC analysis manager's information
+ // for the (potentially updated) SCC here. Note that any other SCCs
+ // whose structure has changed should have been invalidated by
+ // whatever was updating the call graph. This SCC gets invalidated
+ // late as it contains the nodes that were actively being
+ // processed.
+ CGAM.invalidate(*C, PassPA);
+
+ // Then intersect the preserved set so that invalidation of module
+ // analyses will eventually occur when the module pass completes.
+ // Also intersect with the cross-SCC preserved set to capture any
+ // cross-SCC invalidation.
+ UR.CrossSCCPA.intersect(PassPA);
+ PA.intersect(std::move(PassPA));
+
+ // The pass may have restructured the call graph and refined the
+ // current SCC and/or RefSCC. We need to update our current SCC and
+ // RefSCC pointers to follow these. Also, when the current SCC is
+ // refined, re-run the SCC pass over the newly refined SCC in order
+ // to observe the most precise SCC model available. This inherently
+ // cannot cycle excessively as it only happens when we split SCCs
+ // apart, at most converging on a DAG of single nodes.
+ // FIXME: If we ever start having RefSCC passes, we'll want to
+ // iterate there too.
+ if (UR.UpdatedC)
+ LLVM_DEBUG(dbgs()
+ << "Re-running SCC passes after a refinement of the "
+ "current SCC: "
+ << *UR.UpdatedC << "\n");
+
+ // Note that both `C` and `RC` may at this point refer to deleted,
+ // invalid SCC and RefSCCs respectively. But we will short circuit
+ // the processing when we check them in the loop above.
+ } while (UR.UpdatedC);
+ } while (!CWorklist.empty());
+
+ // We only need to keep internal inlined edge information within
+ // a RefSCC, clear it to save on space and let the next time we visit
+ // any of these functions have a fresh start.
+ InlinedInternalEdges.clear();
+ } while (!RCWorklist.empty());
+ }
+
+ // By definition we preserve the call garph, all SCC analyses, and the
+ // analysis proxies by handling them above and in any nested pass managers.
+ PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
+ PA.preserve<LazyCallGraphAnalysis>();
+ PA.preserve<CGSCCAnalysisManagerModuleProxy>();
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ return PA;
+}
+
+PreservedAnalyses DevirtSCCRepeatedPass::run(LazyCallGraph::SCC &InitialC,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ PassInstrumentation PI =
+ AM.getResult<PassInstrumentationAnalysis>(InitialC, CG);
+
+ // The SCC may be refined while we are running passes over it, so set up
+ // a pointer that we can update.
+ LazyCallGraph::SCC *C = &InitialC;
+
+ // Struct to track the counts of direct and indirect calls in each function
+ // of the SCC.
+ struct CallCount {
+ int Direct;
+ int Indirect;
+ };
+
+ // Put value handles on all of the indirect calls and return the number of
+ // direct calls for each function in the SCC.
+ auto ScanSCC = [](LazyCallGraph::SCC &C,
+ SmallMapVector<Value *, WeakTrackingVH, 16> &CallHandles) {
+ assert(CallHandles.empty() && "Must start with a clear set of handles.");
+
+ SmallDenseMap<Function *, CallCount> CallCounts;
+ CallCount CountLocal = {0, 0};
+ for (LazyCallGraph::Node &N : C) {
+ CallCount &Count =
+ CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal))
+ .first->second;
+ for (Instruction &I : instructions(N.getFunction()))
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->getCalledFunction()) {
+ ++Count.Direct;
+ } else {
+ ++Count.Indirect;
+ CallHandles.insert({CB, WeakTrackingVH(CB)});
+ }
+ }
+ }
+
+ return CallCounts;
+ };
+
+ UR.IndirectVHs.clear();
+ // Populate the initial call handles and get the initial call counts.
+ auto CallCounts = ScanSCC(*C, UR.IndirectVHs);
+
+ for (int Iteration = 0;; ++Iteration) {
+ if (!PI.runBeforePass<LazyCallGraph::SCC>(*Pass, *C))
+ continue;
+
+ PreservedAnalyses PassPA = Pass->run(*C, AM, CG, UR);
+
+ if (UR.InvalidatedSCCs.count(C))
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA);
+ else
+ PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA);
+
+ // If the SCC structure has changed, bail immediately and let the outer
+ // CGSCC layer handle any iteration to reflect the refined structure.
+ if (UR.UpdatedC && UR.UpdatedC != C) {
+ PA.intersect(std::move(PassPA));
+ break;
+ }
+
+ // Check that we didn't miss any update scenario.
+ assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
+ assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+
+ // Check whether any of the handles were devirtualized.
+ bool Devirt = llvm::any_of(UR.IndirectVHs, [](auto &P) -> bool {
+ if (P.second) {
+ if (CallBase *CB = dyn_cast<CallBase>(P.second)) {
+ if (CB->getCalledFunction()) {
+ LLVM_DEBUG(dbgs() << "Found devirtualized call: " << *CB << "\n");
+ return true;
+ }
+ }
+ }
+ return false;
+ });
+
+ // Rescan to build up a new set of handles and count how many direct
+ // calls remain. If we decide to iterate, this also sets up the input to
+ // the next iteration.
+ UR.IndirectVHs.clear();
+ auto NewCallCounts = ScanSCC(*C, UR.IndirectVHs);
+
+ // If we haven't found an explicit devirtualization already see if we
+ // have decreased the number of indirect calls and increased the number
+ // of direct calls for any function in the SCC. This can be fooled by all
+ // manner of transformations such as DCE and other things, but seems to
+ // work well in practice.
+ if (!Devirt)
+ // Iterate over the keys in NewCallCounts, if Function also exists in
+ // CallCounts, make the check below.
+ for (auto &Pair : NewCallCounts) {
+ auto &CallCountNew = Pair.second;
+ auto CountIt = CallCounts.find(Pair.first);
+ if (CountIt != CallCounts.end()) {
+ const auto &CallCountOld = CountIt->second;
+ if (CallCountOld.Indirect > CallCountNew.Indirect &&
+ CallCountOld.Direct < CallCountNew.Direct) {
+ Devirt = true;
+ break;
+ }
+ }
+ }
+
+ if (!Devirt) {
+ PA.intersect(std::move(PassPA));
+ break;
+ }
+
+ // Otherwise, if we've already hit our max, we're done.
+ if (Iteration >= MaxIterations) {
+ if (AbortOnMaxDevirtIterationsReached)
+ report_fatal_error("Max devirtualization iterations reached");
+ LLVM_DEBUG(
+ dbgs() << "Found another devirtualization after hitting the max "
+ "number of repetitions ("
+ << MaxIterations << ") on SCC: " << *C << "\n");
+ PA.intersect(std::move(PassPA));
+ break;
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "Repeating an SCC pass after finding a devirtualization in: "
+ << *C << "\n");
+
+ // Move over the new call counts in preparation for iterating.
+ CallCounts = std::move(NewCallCounts);
+
+ // Update the analysis manager with each run and intersect the total set
+ // of preserved analyses so we're ready to iterate.
+ AM.invalidate(*C, PassPA);
+
+ PA.intersect(std::move(PassPA));
+ }
+
+ // Note that we don't add any preserved entries here unlike a more normal
+ // "pass manager" because we only handle invalidation *between* iterations,
+ // not after the last iteration.
+ return PA;
+}
+
+PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
+ // Setup the function analysis manager from its proxy.
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+
+ SmallVector<LazyCallGraph::Node *, 4> Nodes;
+ for (LazyCallGraph::Node &N : C)
+ Nodes.push_back(&N);
+
+ // The SCC may get split while we are optimizing functions due to deleting
+ // edges. If this happens, the current SCC can shift, so keep track of
+ // a pointer we can overwrite.
+ LazyCallGraph::SCC *CurrentC = &C;
+
+ LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C << "\n");
+
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ for (LazyCallGraph::Node *N : Nodes) {
+ // Skip nodes from other SCCs. These may have been split out during
+ // processing. We'll eventually visit those SCCs and pick up the nodes
+ // there.
+ if (CG.lookupSCC(*N) != CurrentC)
+ continue;
+
+ Function &F = N->getFunction();
+
+ PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F);
+ if (!PI.runBeforePass<Function>(*Pass, F))
+ continue;
+
+ PreservedAnalyses PassPA;
+ {
+ TimeTraceScope TimeScope(Pass->name());
+ PassPA = Pass->run(F, FAM);
+ }
+
+ PI.runAfterPass<Function>(*Pass, F, PassPA);
+
+ // We know that the function pass couldn't have invalidated any other
+ // function's analyses (that's the contract of a function pass), so
+ // directly handle the function analysis manager's invalidation here.
+ FAM.invalidate(F, PassPA);
+
+ // Then intersect the preserved set so that invalidation of module
+ // analyses will eventually occur when the module pass completes.
+ PA.intersect(std::move(PassPA));
+
+ // If the call graph hasn't been preserved, update it based on this
+ // function pass. This may also update the current SCC to point to
+ // a smaller, more refined SCC.
+ auto PAC = PA.getChecker<LazyCallGraphAnalysis>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) {
+ CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N,
+ AM, UR, FAM);
+ assert(CG.lookupSCC(*N) == CurrentC &&
+ "Current SCC not updated to the SCC containing the current node!");
+ }
+ }
+
+ // By definition we preserve the proxy. And we preserve all analyses on
+ // Functions. This precludes *any* invalidation of function analyses by the
+ // proxy, but that's OK because we've taken care to invalidate analyses in
+ // the function analysis manager incrementally above.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+
+ // We've also ensured that we updated the call graph along the way.
+ PA.preserve<LazyCallGraphAnalysis>();
+
+ return PA;
+}
+
bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
Module &M, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &Inv) {
@@ -382,7 +833,7 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR) {
using SCC = LazyCallGraph::SCC;
- if (NewSCCRange.begin() == NewSCCRange.end())
+ if (NewSCCRange.empty())
return C;
// Add the current SCC to the worklist as its shape has changed.
@@ -466,46 +917,61 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass(
// First walk the function and handle all called functions. We do this first
// because if there is a single call edge, whether there are ref edges is
// irrelevant.
- for (Instruction &I : instructions(F))
- if (auto *CB = dyn_cast<CallBase>(&I))
- if (Function *Callee = CB->getCalledFunction())
+ for (Instruction &I : instructions(F)) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (Function *Callee = CB->getCalledFunction()) {
if (Visited.insert(Callee).second && !Callee->isDeclaration()) {
- Node &CalleeN = *G.lookup(*Callee);
- Edge *E = N->lookup(CalleeN);
+ Node *CalleeN = G.lookup(*Callee);
+ assert(CalleeN &&
+ "Visited function should already have an associated node");
+ Edge *E = N->lookup(*CalleeN);
assert((E || !FunctionPass) &&
"No function transformations should introduce *new* "
"call edges! Any new calls should be modeled as "
"promoted existing ref edges!");
- bool Inserted = RetainedEdges.insert(&CalleeN).second;
+ bool Inserted = RetainedEdges.insert(CalleeN).second;
(void)Inserted;
assert(Inserted && "We should never visit a function twice.");
if (!E)
- NewCallEdges.insert(&CalleeN);
+ NewCallEdges.insert(CalleeN);
else if (!E->isCall())
- PromotedRefTargets.insert(&CalleeN);
+ PromotedRefTargets.insert(CalleeN);
}
+ } else {
+ // We can miss devirtualization if an indirect call is created then
+ // promoted before updateCGAndAnalysisManagerForPass runs.
+ auto *Entry = UR.IndirectVHs.find(CB);
+ if (Entry == UR.IndirectVHs.end())
+ UR.IndirectVHs.insert({CB, WeakTrackingVH(CB)});
+ else if (!Entry->second)
+ Entry->second = WeakTrackingVH(CB);
+ }
+ }
+ }
// Now walk all references.
for (Instruction &I : instructions(F))
for (Value *Op : I.operand_values())
- if (auto *C = dyn_cast<Constant>(Op))
- if (Visited.insert(C).second)
- Worklist.push_back(C);
+ if (auto *OpC = dyn_cast<Constant>(Op))
+ if (Visited.insert(OpC).second)
+ Worklist.push_back(OpC);
auto VisitRef = [&](Function &Referee) {
- Node &RefereeN = *G.lookup(Referee);
- Edge *E = N->lookup(RefereeN);
+ Node *RefereeN = G.lookup(Referee);
+ assert(RefereeN &&
+ "Visited function should already have an associated node");
+ Edge *E = N->lookup(*RefereeN);
assert((E || !FunctionPass) &&
"No function transformations should introduce *new* ref "
"edges! Any new ref edges would require IPO which "
"function passes aren't allowed to do!");
- bool Inserted = RetainedEdges.insert(&RefereeN).second;
+ bool Inserted = RetainedEdges.insert(RefereeN).second;
(void)Inserted;
assert(Inserted && "We should never visit a function twice.");
if (!E)
- NewRefEdges.insert(&RefereeN);
+ NewRefEdges.insert(RefereeN);
else if (E->isCall())
- DemotedCallTargets.insert(&RefereeN);
+ DemotedCallTargets.insert(RefereeN);
};
LazyCallGraph::visitReferences(Worklist, Visited, VisitRef);
@@ -528,15 +994,17 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass(
// TODO: This only allows trivial edges to be added for now.
assert((RC == &TargetRC ||
RC->isAncestorOf(TargetRC)) && "New call edge is not trivial!");
- RC->insertTrivialCallEdge(N, *CallTarget);
+ // Add a trivial ref edge to be promoted later on alongside
+ // PromotedRefTargets.
+ RC->insertTrivialRefEdge(N, *CallTarget);
}
// Include synthetic reference edges to known, defined lib functions.
- for (auto *F : G.getLibFunctions())
+ for (auto *LibFn : G.getLibFunctions())
// While the list of lib functions doesn't have repeats, don't re-visit
// anything handled above.
- if (!Visited.count(F))
- VisitRef(*F);
+ if (!Visited.count(LibFn))
+ VisitRef(*LibFn);
// First remove all of the edges that are no longer present in this function.
// The first step makes these edges uniformly ref edges and accumulates them
@@ -563,23 +1031,20 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass(
DeadTargets.push_back(&E.getNode());
}
// Remove the easy cases quickly and actually pull them out of our list.
- DeadTargets.erase(
- llvm::remove_if(DeadTargets,
- [&](Node *TargetN) {
- SCC &TargetC = *G.lookupSCC(*TargetN);
- RefSCC &TargetRC = TargetC.getOuterRefSCC();
-
- // We can't trivially remove internal targets, so skip
- // those.
- if (&TargetRC == RC)
- return false;
-
- RC->removeOutgoingEdge(N, *TargetN);
- LLVM_DEBUG(dbgs() << "Deleting outgoing edge from '"
- << N << "' to '" << TargetN << "'\n");
- return true;
- }),
- DeadTargets.end());
+ llvm::erase_if(DeadTargets, [&](Node *TargetN) {
+ SCC &TargetC = *G.lookupSCC(*TargetN);
+ RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+ // We can't trivially remove internal targets, so skip
+ // those.
+ if (&TargetRC == RC)
+ return false;
+
+ RC->removeOutgoingEdge(N, *TargetN);
+ LLVM_DEBUG(dbgs() << "Deleting outgoing edge from '" << N << "' to '"
+ << TargetN << "'\n");
+ return true;
+ });
// Now do a batch removal of the internal ref edges left.
auto NewRefSCCs = RC->removeInternalRefEdge(N, DeadTargets);
@@ -643,6 +1108,11 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass(
C, AM, UR);
}
+ // We added a ref edge earlier for new call edges, promote those to call edges
+ // alongside PromotedRefTargets.
+ for (Node *E : NewCallEdges)
+ PromotedRefTargets.insert(E);
+
// Now promote ref edges into call edges.
for (Node *CallTarget : PromotedRefTargets) {
SCC &TargetC = *G.lookupSCC(*CallTarget);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
index 55adb454b733..9b212e564a46 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
@@ -167,20 +167,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
return F;
}
-/// spliceFunction - Replace the function represented by this node by another.
-/// This does not rescan the body of the function, so it is suitable when
-/// splicing the body of the old function to the new while also updating all
-/// callers from old to new.
-void CallGraph::spliceFunction(const Function *From, const Function *To) {
- assert(FunctionMap.count(From) && "No CallGraphNode for function!");
- assert(!FunctionMap.count(To) &&
- "Pointing CallGraphNode at a function that already exists");
- FunctionMapTy::iterator I = FunctionMap.find(From);
- I->second->F = const_cast<Function*>(To);
- FunctionMap[To] = std::move(I->second);
- FunctionMap.erase(I);
-}
-
// getOrInsertFunction - This method is identical to calling operator[], but
// it will insert a new CallGraphNode for the specified function if one does
// not already exist.
@@ -281,13 +267,37 @@ void CallGraphNode::replaceCallEdge(CallBase &Call, CallBase &NewCall,
I->second = NewNode;
NewNode->AddRef();
- // Refresh callback references.
- forEachCallbackFunction(Call, [=](Function *CB) {
- removeOneAbstractEdgeTo(CG->getOrInsertFunction(CB));
+ // Refresh callback references. Do not resize CalledFunctions if the
+ // number of callbacks is the same for new and old call sites.
+ SmallVector<CallGraphNode *, 4u> OldCBs;
+ SmallVector<CallGraphNode *, 4u> NewCBs;
+ forEachCallbackFunction(Call, [this, &OldCBs](Function *CB) {
+ OldCBs.push_back(CG->getOrInsertFunction(CB));
});
- forEachCallbackFunction(NewCall, [=](Function *CB) {
- addCalledFunction(nullptr, CG->getOrInsertFunction(CB));
+ forEachCallbackFunction(NewCall, [this, &NewCBs](Function *CB) {
+ NewCBs.push_back(CG->getOrInsertFunction(CB));
});
+ if (OldCBs.size() == NewCBs.size()) {
+ for (unsigned N = 0; N < OldCBs.size(); ++N) {
+ CallGraphNode *OldNode = OldCBs[N];
+ CallGraphNode *NewNode = NewCBs[N];
+ for (auto J = CalledFunctions.begin();; ++J) {
+ assert(J != CalledFunctions.end() &&
+ "Cannot find callsite to update!");
+ if (!J->first && J->second == OldNode) {
+ J->second = NewNode;
+ OldNode->DropRef();
+ NewNode->AddRef();
+ break;
+ }
+ }
+ }
+ } else {
+ for (auto *CGN : OldCBs)
+ removeOneAbstractEdgeTo(CGN);
+ for (auto *CGN : NewCBs)
+ addCalledFunction(nullptr, CGN);
+ }
return;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 91f8029cc326..38057d44e2b8 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -21,13 +21,14 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/AbstractCallSite.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/IR/StructuralHash.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -42,8 +43,8 @@ using namespace llvm;
#define DEBUG_TYPE "cgscc-passmgr"
-static cl::opt<unsigned>
-MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
+cl::opt<unsigned> MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden,
+ cl::init(4));
STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
@@ -466,16 +467,30 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
initializeAnalysisImpl(P);
+#ifdef EXPENSIVE_CHECKS
+ uint64_t RefHash = StructuralHash(CG.getModule());
+#endif
+
// Actually run this pass on the current SCC.
- Changed |= RunPassOnSCC(P, CurSCC, CG,
- CallGraphUpToDate, DevirtualizedCall);
+ bool LocalChanged =
+ RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate, DevirtualizedCall);
- if (Changed)
+ Changed |= LocalChanged;
+
+#ifdef EXPENSIVE_CHECKS
+ if (!LocalChanged && (RefHash != StructuralHash(CG.getModule()))) {
+ llvm::errs() << "Pass modifies its input and doesn't report it: "
+ << P->getPassName() << "\n";
+ llvm_unreachable("Pass modifies its input and doesn't report it");
+ }
+#endif
+ if (LocalChanged)
dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
dumpPreservedSet(P);
verifyPreservedAnalysis(P);
- removeNotPreservedAnalysis(P);
+ if (LocalChanged)
+ removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
removeDeadPasses(P, "", ON_CG_MSG);
}
@@ -524,12 +539,12 @@ bool CGPassManager::runOnModule(Module &M) {
<< '\n');
DevirtualizedCall = false;
Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
- } while (Iteration++ < MaxIterations && DevirtualizedCall);
+ } while (Iteration++ < MaxDevirtIterations && DevirtualizedCall);
if (DevirtualizedCall)
LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after "
<< Iteration
- << " times, due to -max-cg-scc-iterations\n");
+ << " times, due to -max-devirt-iterations\n");
MaxSCCIterations.updateMax(Iteration);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
index bb447411ec47..872a91ad7cbf 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
@@ -143,7 +143,8 @@ struct DOTGraphTraits<CallGraphDOTInfo *> : public DefaultDOTGraphTraits {
std::string(CGInfo->getModule()->getModuleIdentifier());
}
- static bool isNodeHidden(const CallGraphNode *Node) {
+ static bool isNodeHidden(const CallGraphNode *Node,
+ const CallGraphDOTInfo *CGInfo) {
if (CallMultiGraph || Node->getFunction())
return false;
return true;
@@ -195,7 +196,7 @@ struct DOTGraphTraits<CallGraphDOTInfo *> : public DefaultDOTGraphTraits {
Function *F = Node->getFunction();
if (F == nullptr)
return "";
- std::string attrs = "";
+ std::string attrs;
if (ShowHeatColors) {
uint64_t freq = CGInfo->getFreq(F);
std::string color = getHeatColor(freq, CGInfo->getMaxFreq());
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
index 8b101e3b2cc4..b2fc6e603f9e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -29,6 +30,13 @@
using namespace llvm;
+#define DEBUG_TYPE "capture-tracking"
+
+STATISTIC(NumCaptured, "Number of pointers maybe captured");
+STATISTIC(NumNotCaptured, "Number of pointers not captured");
+STATISTIC(NumCapturedBefore, "Number of pointers maybe captured before");
+STATISTIC(NumNotCapturedBefore, "Number of pointers not captured before");
+
/// The default value for MaxUsesToExplore argument. It's relatively small to
/// keep the cost of analysis reasonable for clients like BasicAliasAnalysis,
/// where the results can't be cached.
@@ -159,9 +167,6 @@ namespace {
if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
return false;
- if (!shouldExplore(U))
- return false;
-
Captured = true;
return true;
}
@@ -197,6 +202,10 @@ bool llvm::PointerMayBeCaptured(const Value *V,
SimpleCaptureTracker SCT(ReturnCaptures);
PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
+ if (SCT.Captured)
+ ++NumCaptured;
+ else
+ ++NumNotCaptured;
return SCT.Captured;
}
@@ -225,6 +234,10 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
CapturesBefore CB(ReturnCaptures, I, DT, IncludeI);
PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
+ if (CB.Captured)
+ ++NumCapturedBefore;
+ else
+ ++NumNotCapturedBefore;
return CB.Captured;
}
@@ -243,21 +256,24 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
for (const Use &U : V->uses()) {
// If there are lots of uses, conservatively say that the value
// is captured to avoid taking too much compile time.
- if (Count++ >= MaxUsesToExplore)
- return Tracker->tooManyUses();
+ if (Count++ >= MaxUsesToExplore) {
+ Tracker->tooManyUses();
+ return false;
+ }
if (!Visited.insert(&U).second)
continue;
if (!Tracker->shouldExplore(&U))
continue;
Worklist.push_back(&U);
}
+ return true;
};
- AddUses(V);
+ if (!AddUses(V))
+ return;
while (!Worklist.empty()) {
const Use *U = Worklist.pop_back_val();
Instruction *I = cast<Instruction>(U->getUser());
- V = U->get();
switch (I->getOpcode()) {
case Instruction::Call:
@@ -273,11 +289,12 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
// The pointer is not captured if returned pointer is not captured.
// NOTE: CaptureTracking users should not assume that only functions
// marked with nocapture do not capture. This means that places like
- // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression
+ // getUnderlyingObject in ValueTracking or DecomposeGEPExpression
// in BasicAA also need to know about this property.
if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call,
true)) {
- AddUses(Call);
+ if (!AddUses(Call))
+ return;
break;
}
@@ -295,13 +312,11 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
// that loading a value from a pointer does not cause the pointer to be
// captured, even though the loaded value might be the pointer itself
// (think of self-referential objects).
- for (auto IdxOpPair : enumerate(Call->data_ops())) {
- int Idx = IdxOpPair.index();
- Value *A = IdxOpPair.value();
- if (A == V && !Call->doesNotCapture(Idx))
- // The parameter is not marked 'nocapture' - captured.
- if (Tracker->captured(U))
- return;
+ if (Call->isDataOperand(U) &&
+ !Call->doesNotCapture(Call->getDataOperandNo(U))) {
+ // The parameter is not marked 'nocapture' - captured.
+ if (Tracker->captured(U))
+ return;
}
break;
}
@@ -315,9 +330,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
// "va-arg" from a pointer does not cause it to be captured.
break;
case Instruction::Store:
- // Stored the pointer - conservatively assume it may be captured.
- // Volatile stores make the address observable.
- if (V == I->getOperand(0) || cast<StoreInst>(I)->isVolatile())
+ // Stored the pointer - conservatively assume it may be captured.
+ // Volatile stores make the address observable.
+ if (U->getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile())
if (Tracker->captured(U))
return;
break;
@@ -328,7 +343,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
// but the value being stored is.
// Volatile stores make the address observable.
auto *ARMWI = cast<AtomicRMWInst>(I);
- if (ARMWI->getValOperand() == V || ARMWI->isVolatile())
+ if (U->getOperandNo() == 1 || ARMWI->isVolatile())
if (Tracker->captured(U))
return;
break;
@@ -340,7 +355,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
// but the value being stored is.
// Volatile stores make the address observable.
auto *ACXI = cast<AtomicCmpXchgInst>(I);
- if (ACXI->getCompareOperand() == V || ACXI->getNewValOperand() == V ||
+ if (U->getOperandNo() == 1 || U->getOperandNo() == 2 ||
ACXI->isVolatile())
if (Tracker->captured(U))
return;
@@ -352,17 +367,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
case Instruction::Select:
case Instruction::AddrSpaceCast:
// The original value is not captured via this if the new value isn't.
- AddUses(I);
+ if (!AddUses(I))
+ return;
break;
case Instruction::ICmp: {
- unsigned Idx = (I->getOperand(0) == V) ? 0 : 1;
+ unsigned Idx = U->getOperandNo();
unsigned OtherIdx = 1 - Idx;
if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
// Don't count comparisons of a no-alias return value against null as
// captures. This allows us to ignore comparisons of malloc results
// with null, for example.
if (CPN->getType()->getAddressSpace() == 0)
- if (isNoAliasCall(V->stripPointerCasts()))
+ if (isNoAliasCall(U->get()->stripPointerCasts()))
break;
if (!I->getFunction()->nullPointerIsDefined()) {
auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
@@ -395,3 +411,44 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
// All uses examined.
}
+
+bool llvm::isNonEscapingLocalObject(
+ const Value *V, SmallDenseMap<const Value *, bool, 8> *IsCapturedCache) {
+ SmallDenseMap<const Value *, bool, 8>::iterator CacheIt;
+ if (IsCapturedCache) {
+ bool Inserted;
+ std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false});
+ if (!Inserted)
+ // Found cached result, return it!
+ return CacheIt->second;
+ }
+
+ // If this is a local allocation, check to see if it escapes.
+ if (isa<AllocaInst>(V) || isNoAliasCall(V)) {
+ // Set StoreCaptures to True so that we can assume in our callers that the
+ // pointer is not the result of a load instruction. Currently
+ // PointerMayBeCaptured doesn't have any special analysis for the
+ // StoreCaptures=false case; if it did, our callers could be refined to be
+ // more precise.
+ auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ if (IsCapturedCache)
+ CacheIt->second = Ret;
+ return Ret;
+ }
+
+ // If this is an argument that corresponds to a byval or noalias argument,
+ // then it has not escaped before entering the function. Check if it escapes
+ // inside the function.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ if (A->hasByValAttr() || A->hasNoAliasAttr()) {
+ // Note even if the argument is marked nocapture, we still need to check
+ // for copies made inside the function. The nocapture attribute only
+ // specifies that there are no copies made that outlive the function.
+ auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ if (IsCapturedCache)
+ CacheIt->second = Ret;
+ return Ret;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm-project/llvm/lib/Analysis/CodeMetrics.cpp
index 0b2b6f9bfa46..157811c04eb5 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CodeMetrics.cpp
@@ -112,9 +112,9 @@ void CodeMetrics::collectEphemeralValues(
/// Fill in the current structure with information gleaned from the specified
/// block.
-void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
- const TargetTransformInfo &TTI,
- const SmallPtrSetImpl<const Value*> &EphValues) {
+void CodeMetrics::analyzeBasicBlock(
+ const BasicBlock *BB, const TargetTransformInfo &TTI,
+ const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
++NumBlocks;
unsigned NumInstsBeforeThisBB = NumInsts;
for (const Instruction &I : *BB) {
@@ -125,11 +125,16 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
// Special handling for calls.
if (const auto *Call = dyn_cast<CallBase>(&I)) {
if (const Function *F = Call->getCalledFunction()) {
+ bool IsLoweredToCall = TTI.isLoweredToCall(F);
// If a function is both internal and has a single use, then it is
// extremely likely to get inlined in the future (it was probably
// exposed by an interleaved devirtualization pass).
- if (!Call->isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+ // When preparing for LTO, liberally consider calls as inline
+ // candidates.
+ if (!Call->isNoInline() && IsLoweredToCall &&
+ ((F->hasInternalLinkage() && F->hasOneUse()) || PrepareForLTO)) {
++NumInlineCandidates;
+ }
// If this call is to function itself, then the function is recursive.
// Inlining it into other functions is a bad idea, because this is
@@ -138,7 +143,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
if (F == BB->getParent())
isRecursive = true;
- if (TTI.isLoweredToCall(F))
+ if (IsLoweredToCall)
++NumCalls;
} else {
// We don't want inline asm to count as a call - that would prevent loop
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
index 6feffcbb98e1..cc1ce4c65821 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -41,6 +42,8 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
@@ -102,16 +105,16 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
"Invalid constantexpr bitcast!");
// Catch the obvious splat cases.
- if (C->isNullValue() && !DestTy->isX86_MMXTy())
+ if (C->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy())
return Constant::getNullValue(DestTy);
- if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
+ if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy() &&
!DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
return Constant::getAllOnesValue(DestTy);
if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
// Handle a vector->scalar integer/fp cast.
if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
- unsigned NumSrcElts = VTy->getNumElements();
+ unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements();
Type *SrcEltTy = VTy->getElementType();
// If the vector is a vector of floating point, convert it to vector of int
@@ -154,8 +157,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
return ConstantExpr::getBitCast(C, DestTy);
// If the element types match, IR can fold it.
- unsigned NumDstElt = DestVTy->getNumElements();
- unsigned NumSrcElt = cast<VectorType>(C->getType())->getNumElements();
+ unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements();
+ unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements();
if (NumDstElt == NumSrcElt)
return ConstantExpr::getBitCast(C, DestTy);
@@ -292,7 +295,11 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
/// If this constant is a constant offset from a global, return the global and
/// the constant. Because of constantexprs, this function is recursive.
bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
- APInt &Offset, const DataLayout &DL) {
+ APInt &Offset, const DataLayout &DL,
+ DSOLocalEquivalent **DSOEquiv) {
+ if (DSOEquiv)
+ *DSOEquiv = nullptr;
+
// Trivial case, constant is the global.
if ((GV = dyn_cast<GlobalValue>(C))) {
unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
@@ -300,6 +307,15 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
return true;
}
+ if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) {
+ if (DSOEquiv)
+ *DSOEquiv = FoundDSOEquiv;
+ GV = FoundDSOEquiv->getGlobalValue();
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
+ Offset = APInt(BitWidth, 0);
+ return true;
+ }
+
// Otherwise, if this isn't a constant expr, bail out.
auto *CE = dyn_cast<ConstantExpr>(C);
if (!CE) return false;
@@ -307,7 +323,8 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
// Look through ptr->int and ptr->ptr casts.
if (CE->getOpcode() == Instruction::PtrToInt ||
CE->getOpcode() == Instruction::BitCast)
- return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL);
+ return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL,
+ DSOEquiv);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
auto *GEP = dyn_cast<GEPOperator>(CE);
@@ -318,7 +335,8 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
APInt TmpOffset(BitWidth, 0);
// If the base isn't a global+constant, we aren't either.
- if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL))
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL,
+ DSOEquiv))
return false;
// Otherwise, add any offset that our operands provide.
@@ -340,12 +358,13 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
// Catch the obvious splat cases (since all-zeros can coerce non-integral
// pointers legally).
- if (C->isNullValue() && !DestTy->isX86_MMXTy())
+ if (C->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy())
return Constant::getNullValue(DestTy);
if (C->isAllOnesValue() &&
(DestTy->isIntegerTy() || DestTy->isFloatingPointTy() ||
DestTy->isVectorTy()) &&
- !DestTy->isX86_MMXTy() && !DestTy->isPtrOrPtrVectorTy())
+ !DestTy->isX86_AMXTy() && !DestTy->isX86_MMXTy() &&
+ !DestTy->isPtrOrPtrVectorTy())
// Get ones when the input is trivial, but
// only for supported types inside getAllOnesValue.
return Constant::getAllOnesValue(DestTy);
@@ -489,8 +508,8 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
NumElts = AT->getNumElements();
EltTy = AT->getElementType();
} else {
- NumElts = cast<VectorType>(C->getType())->getNumElements();
- EltTy = cast<VectorType>(C->getType())->getElementType();
+ NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
+ EltTy = cast<FixedVectorType>(C->getType())->getElementType();
}
uint64_t EltSize = DL.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
@@ -557,14 +576,16 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
C = FoldBitCast(C, MapTy->getPointerTo(AS), DL);
if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) {
- if (Res->isNullValue() && !LoadTy->isX86_MMXTy())
+ if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
+ !LoadTy->isX86_AMXTy())
// Materializing a zero can be done trivially without a bitcast
return Constant::getNullValue(LoadTy);
Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
Res = FoldBitCast(Res, CastTy, DL);
if (LoadTy->isPtrOrPtrVectorTy()) {
// For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
- if (Res->isNullValue() && !LoadTy->isX86_MMXTy())
+ if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
+ !LoadTy->isX86_AMXTy())
return Constant::getNullValue(LoadTy);
if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
// Be careful not to replace a load of an addrspace value with an inttoptr here
@@ -717,7 +738,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
// If this load comes from anywhere in a constant global, and if the global
// is all undef or zero, we know what it loads.
- if (auto *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, DL))) {
+ if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(CE))) {
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
if (GV->getInitializer()->isNullValue())
return Constant::getNullValue(Ty);
@@ -1070,6 +1091,8 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
default: return nullptr;
case Instruction::ICmp:
case Instruction::FCmp: llvm_unreachable("Invalid for compares");
+ case Instruction::Freeze:
+ return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;
case Instruction::Call:
if (auto *F = dyn_cast<Function>(Ops.back())) {
const auto *Call = cast<CallBase>(InstOrCE);
@@ -1433,6 +1456,12 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::masked_load:
+ case Intrinsic::get_active_lane_mask:
+ case Intrinsic::abs:
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin:
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
@@ -1447,15 +1476,25 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::smul_fix_sat:
case Intrinsic::bitreverse:
case Intrinsic::is_constant:
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_umin:
- case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_umax:
+ // Target intrinsics
+ case Intrinsic::arm_mve_vctp8:
+ case Intrinsic::arm_mve_vctp16:
+ case Intrinsic::arm_mve_vctp32:
+ case Intrinsic::arm_mve_vctp64:
+ // WebAssembly float semantics are always known
+ case Intrinsic::wasm_trunc_signed:
+ case Intrinsic::wasm_trunc_unsigned:
+ case Intrinsic::wasm_trunc_saturate_signed:
+ case Intrinsic::wasm_trunc_saturate_unsigned:
return true;
// Floating point operations cannot be folded in strictfp functions in
@@ -1476,6 +1515,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::powi:
case Intrinsic::fma:
case Intrinsic::fmuladd:
+ case Intrinsic::fptoui_sat:
+ case Intrinsic::fptosi_sat:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
case Intrinsic::amdgcn_cos:
@@ -1484,6 +1525,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::amdgcn_cubesc:
case Intrinsic::amdgcn_cubetc:
case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fma_legacy:
case Intrinsic::amdgcn_fract:
case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_sin:
@@ -1691,31 +1733,31 @@ Constant *ConstantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
return nullptr;
const APInt &X = CI->getValue();
switch (IID) {
- case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::vector_reduce_add:
Acc = Acc + X;
break;
- case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::vector_reduce_mul:
Acc = Acc * X;
break;
- case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::vector_reduce_and:
Acc = Acc & X;
break;
- case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::vector_reduce_or:
Acc = Acc | X;
break;
- case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::vector_reduce_xor:
Acc = Acc ^ X;
break;
- case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::vector_reduce_smin:
Acc = APIntOps::smin(Acc, X);
break;
- case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::vector_reduce_smax:
Acc = APIntOps::smax(Acc, X);
break;
- case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::vector_reduce_umin:
Acc = APIntOps::umin(Acc, X);
break;
- case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::vector_reduce_umax:
Acc = APIntOps::umax(Acc, X);
break;
}
@@ -1766,19 +1808,6 @@ double getValueAsDouble(ConstantFP *Op) {
return APF.convertToDouble();
}
-static bool isManifestConstant(const Constant *c) {
- if (isa<ConstantData>(c)) {
- return true;
- } else if (isa<ConstantAggregate>(c) || isa<ConstantExpr>(c)) {
- for (const Value *subc : c->operand_values()) {
- if (!isManifestConstant(cast<Constant>(subc)))
- return false;
- }
- return true;
- }
- return false;
-}
-
static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
if (auto *CI = dyn_cast<ConstantInt>(Op)) {
C = &CI->getValue();
@@ -1803,15 +1832,18 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
// We know we have a "Constant" argument. But we want to only
// return true for manifest constants, not those that depend on
// constants with unknowable values, e.g. GlobalValue or BlockAddress.
- if (isManifestConstant(Operands[0]))
+ if (Operands[0]->isManifestConstant())
return ConstantInt::getTrue(Ty->getContext());
return nullptr;
}
if (isa<UndefValue>(Operands[0])) {
// cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
// ctpop() is between 0 and bitwidth, pick 0 for undef.
+ // fptoui.sat and fptosi.sat can always fold to zero (for a zero input).
if (IntrinsicID == Intrinsic::cos ||
- IntrinsicID == Intrinsic::ctpop)
+ IntrinsicID == Intrinsic::ctpop ||
+ IntrinsicID == Intrinsic::fptoui_sat ||
+ IntrinsicID == Intrinsic::fptosi_sat)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::bswap ||
IntrinsicID == Intrinsic::bitreverse ||
@@ -1848,11 +1880,55 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
}
+ APFloat U = Op->getValueAPF();
+
+ if (IntrinsicID == Intrinsic::wasm_trunc_signed ||
+ IntrinsicID == Intrinsic::wasm_trunc_unsigned ||
+ IntrinsicID == Intrinsic::wasm_trunc_saturate_signed ||
+ IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned) {
+
+ bool Saturating = IntrinsicID == Intrinsic::wasm_trunc_saturate_signed ||
+ IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned;
+ bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed ||
+ IntrinsicID == Intrinsic::wasm_trunc_saturate_signed;
+
+ if (U.isNaN())
+ return Saturating ? ConstantInt::get(Ty, 0) : nullptr;
+
+ unsigned Width = Ty->getIntegerBitWidth();
+ APSInt Int(Width, !Signed);
+ bool IsExact = false;
+ APFloat::opStatus Status =
+ U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
+
+ if (Status == APFloat::opOK || Status == APFloat::opInexact)
+ return ConstantInt::get(Ty, Int);
+
+ if (!Saturating)
+ return nullptr;
+
+ if (U.isNegative())
+ return Signed ? ConstantInt::get(Ty, APInt::getSignedMinValue(Width))
+ : ConstantInt::get(Ty, APInt::getMinValue(Width));
+ else
+ return Signed ? ConstantInt::get(Ty, APInt::getSignedMaxValue(Width))
+ : ConstantInt::get(Ty, APInt::getMaxValue(Width));
+ }
+
+ if (IntrinsicID == Intrinsic::fptoui_sat ||
+ IntrinsicID == Intrinsic::fptosi_sat) {
+ // convertToInteger() already has the desired saturation semantics.
+ APSInt Int(Ty->getIntegerBitWidth(),
+ IntrinsicID == Intrinsic::fptoui_sat);
+ bool IsExact;
+ U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
+ return ConstantInt::get(Ty, Int);
+ }
+
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
return nullptr;
// Use internal versions of these intrinsics.
- APFloat U = Op->getValueAPF();
if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
U.roundToIntegral(APFloat::rmNearestTiesToEven);
@@ -2186,15 +2262,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (isa<ConstantAggregateZero>(Operands[0])) {
switch (IntrinsicID) {
default: break;
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_umin:
- case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_umax:
return ConstantInt::get(Ty, 0);
}
}
@@ -2205,15 +2281,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
auto *Op = cast<Constant>(Operands[0]);
switch (IntrinsicID) {
default: break;
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_umin:
- case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_umax:
if (Constant *C = ConstantFoldVectorReduce(IntrinsicID, Op))
return C;
break;
@@ -2251,6 +2327,25 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
const CallBase *Call) {
assert(Operands.size() == 2 && "Wrong number of operands.");
+ if (Ty->isFloatingPointTy()) {
+ // TODO: We should have undef handling for all of the FP intrinsics that
+ // are attempted to be folded in this function.
+ bool IsOp0Undef = isa<UndefValue>(Operands[0]);
+ bool IsOp1Undef = isa<UndefValue>(Operands[1]);
+ switch (IntrinsicID) {
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ case Intrinsic::maximum:
+ case Intrinsic::minimum:
+ // If one argument is undef, return the other argument.
+ if (IsOp0Undef)
+ return Operands[1];
+ if (IsOp1Undef)
+ return Operands[0];
+ break;
+ }
+ }
+
if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
return nullptr;
@@ -2298,8 +2393,8 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
if (IntrinsicID == Intrinsic::amdgcn_fmul_legacy) {
const APFloat &C1 = Op1->getValueAPF();
const APFloat &C2 = Op2->getValueAPF();
- // The legacy behaviour is that multiplying zero by anything, even NaN
- // or infinity, gives +0.0.
+ // The legacy behaviour is that multiplying +/- 0.0 by anything, even
+ // NaN or infinity, gives +0.0.
if (C1.isZero() || C2.isZero())
return ConstantFP::getNullValue(Ty);
return ConstantFP::get(Ty->getContext(), C1 * C2);
@@ -2378,8 +2473,37 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
!getConstIntOrUndef(Operands[1], C1))
return nullptr;
+ unsigned BitWidth = Ty->getScalarSizeInBits();
switch (IntrinsicID) {
default: break;
+ case Intrinsic::smax:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return ConstantInt::get(Ty, APInt::getSignedMaxValue(BitWidth));
+ return ConstantInt::get(Ty, C0->sgt(*C1) ? *C0 : *C1);
+
+ case Intrinsic::smin:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth));
+ return ConstantInt::get(Ty, C0->slt(*C1) ? *C0 : *C1);
+
+ case Intrinsic::umax:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return ConstantInt::get(Ty, APInt::getMaxValue(BitWidth));
+ return ConstantInt::get(Ty, C0->ugt(*C1) ? *C0 : *C1);
+
+ case Intrinsic::umin:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return ConstantInt::get(Ty, APInt::getMinValue(BitWidth));
+ return ConstantInt::get(Ty, C0->ult(*C1) ? *C0 : *C1);
+
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::uadd_with_overflow:
@@ -2464,6 +2588,18 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return ConstantInt::get(Ty, C0->countTrailingZeros());
else
return ConstantInt::get(Ty, C0->countLeadingZeros());
+
+ case Intrinsic::abs:
+ // Undef or minimum val operand with poison min --> undef
+ assert(C1 && "Must be constant int");
+ if (C1->isOneValue() && (!C0 || C0->isMinSignedValue()))
+ return UndefValue::get(Ty);
+
+ // Undef operand with no poison min --> 0 (sign bit must be clear)
+ if (C1->isNullValue() && !C0)
+ return Constant::getNullValue(Ty);
+
+ return ConstantInt::get(Ty, C0->abs());
}
return nullptr;
@@ -2592,6 +2728,19 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
switch (IntrinsicID) {
default: break;
+ case Intrinsic::amdgcn_fma_legacy: {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ // The legacy behaviour is that multiplying +/- 0.0 by anything, even
+ // NaN or infinity, gives +0.0.
+ if (C1.isZero() || C2.isZero()) {
+ const APFloat &C3 = Op3->getValueAPF();
+ // It's tempting to just return C3 here, but that would give the
+ // wrong result if C3 was -0.0.
+ return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);
+ }
+ LLVM_FALLTHROUGH;
+ }
case Intrinsic::fma:
case Intrinsic::fmuladd: {
APFloat V = Op1->getValueAPF();
@@ -2719,7 +2868,8 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
SmallVector<Constant *, 4> Lane(Operands.size());
Type *Ty = FVTy->getElementType();
- if (IntrinsicID == Intrinsic::masked_load) {
+ switch (IntrinsicID) {
+ case Intrinsic::masked_load: {
auto *SrcPtr = Operands[0];
auto *Mask = Operands[2];
auto *Passthru = Operands[3];
@@ -2757,6 +2907,51 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
return nullptr;
return ConstantVector::get(NewElements);
}
+ case Intrinsic::arm_mve_vctp8:
+ case Intrinsic::arm_mve_vctp16:
+ case Intrinsic::arm_mve_vctp32:
+ case Intrinsic::arm_mve_vctp64: {
+ if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
+ unsigned Lanes = FVTy->getNumElements();
+ uint64_t Limit = Op->getZExtValue();
+ // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make
+ // sure we get the limit right in that case and set all relevant lanes.
+ if (IntrinsicID == Intrinsic::arm_mve_vctp64)
+ Limit *= 2;
+
+ SmallVector<Constant *, 16> NCs;
+ for (unsigned i = 0; i < Lanes; i++) {
+ if (i < Limit)
+ NCs.push_back(ConstantInt::getTrue(Ty));
+ else
+ NCs.push_back(ConstantInt::getFalse(Ty));
+ }
+ return ConstantVector::get(NCs);
+ }
+ break;
+ }
+ case Intrinsic::get_active_lane_mask: {
+ auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
+ auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
+ if (Op0 && Op1) {
+ unsigned Lanes = FVTy->getNumElements();
+ uint64_t Base = Op0->getZExtValue();
+ uint64_t Limit = Op1->getZExtValue();
+
+ SmallVector<Constant *, 16> NCs;
+ for (unsigned i = 0; i < Lanes; i++) {
+ if (Base + i < Limit)
+ NCs.push_back(ConstantInt::getTrue(Ty));
+ else
+ NCs.push_back(ConstantInt::getFalse(Ty));
+ }
+ return ConstantVector::get(NCs);
+ }
+ break;
+ }
+ default:
+ break;
+ }
for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
// Gather a column of constants.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp
new file mode 100644
index 000000000000..9739c6af5769
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp
@@ -0,0 +1,158 @@
+//===- ConstraintSytem.cpp - A system of linear constraints. ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ConstraintSystem.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+
+#include <algorithm>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "constraint-system"
+
+bool ConstraintSystem::eliminateUsingFM() {
+ // Implementation of Fourier–Motzkin elimination, with some tricks from the
+ // paper Pugh, William. "The Omega test: a fast and practical integer
+ // programming algorithm for dependence
+ // analysis."
+ // Supercomputing'91: Proceedings of the 1991 ACM/
+ // IEEE conference on Supercomputing. IEEE, 1991.
+ assert(!Constraints.empty() &&
+ "should only be called for non-empty constraint systems");
+ unsigned NumVariables = Constraints[0].size();
+ SmallVector<SmallVector<int64_t, 8>, 4> NewSystem;
+
+ unsigned NumConstraints = Constraints.size();
+ uint32_t NewGCD = 1;
+ // FIXME do not use copy
+ for (unsigned R1 = 0; R1 < NumConstraints; R1++) {
+ if (Constraints[R1][1] == 0) {
+ SmallVector<int64_t, 8> NR;
+ NR.push_back(Constraints[R1][0]);
+ for (unsigned i = 2; i < NumVariables; i++) {
+ NR.push_back(Constraints[R1][i]);
+ }
+ NewSystem.push_back(std::move(NR));
+ continue;
+ }
+
+ // FIXME do not use copy
+ for (unsigned R2 = R1 + 1; R2 < NumConstraints; R2++) {
+ if (R1 == R2)
+ continue;
+
+ // FIXME: can we do better than just dropping things here?
+ if (Constraints[R2][1] == 0)
+ continue;
+
+ if ((Constraints[R1][1] < 0 && Constraints[R2][1] < 0) ||
+ (Constraints[R1][1] > 0 && Constraints[R2][1] > 0))
+ continue;
+
+ unsigned LowerR = R1;
+ unsigned UpperR = R2;
+ if (Constraints[UpperR][1] < 0)
+ std::swap(LowerR, UpperR);
+
+ SmallVector<int64_t, 8> NR;
+ for (unsigned I = 0; I < NumVariables; I++) {
+ if (I == 1)
+ continue;
+
+ int64_t M1, M2, N;
+ if (MulOverflow(Constraints[UpperR][I],
+ ((-1) * Constraints[LowerR][1] / GCD), M1))
+ return false;
+ if (MulOverflow(Constraints[LowerR][I],
+ (Constraints[UpperR][1] / GCD), M2))
+ return false;
+ if (AddOverflow(M1, M2, N))
+ return false;
+ NR.push_back(N);
+
+ NewGCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)NR.back()},
+ {32, NewGCD})
+ .getZExtValue();
+ }
+ NewSystem.push_back(std::move(NR));
+ // Give up if the new system gets too big.
+ if (NewSystem.size() > 500)
+ return false;
+ }
+ }
+ Constraints = std::move(NewSystem);
+ GCD = NewGCD;
+
+ return true;
+}
+
+bool ConstraintSystem::mayHaveSolutionImpl() {
+ while (!Constraints.empty() && Constraints[0].size() > 1) {
+ if (!eliminateUsingFM())
+ return true;
+ }
+
+ if (Constraints.empty() || Constraints[0].size() > 1)
+ return true;
+
+ return all_of(Constraints, [](auto &R) { return R[0] >= 0; });
+}
+
+void ConstraintSystem::dump(ArrayRef<std::string> Names) const {
+ if (Constraints.empty())
+ return;
+
+ for (auto &Row : Constraints) {
+ SmallVector<std::string, 16> Parts;
+ for (unsigned I = 1, S = Row.size(); I < S; ++I) {
+ if (Row[I] == 0)
+ continue;
+ std::string Coefficient;
+ if (Row[I] != 1)
+ Coefficient = std::to_string(Row[I]) + " * ";
+ Parts.push_back(Coefficient + Names[I - 1]);
+ }
+ assert(!Parts.empty() && "need to have at least some parts");
+ LLVM_DEBUG(dbgs() << join(Parts, std::string(" + "))
+ << " <= " << std::to_string(Row[0]) << "\n");
+ }
+}
+
+void ConstraintSystem::dump() const {
+ SmallVector<std::string, 16> Names;
+ for (unsigned i = 1; i < Constraints.back().size(); ++i)
+ Names.push_back("x" + std::to_string(i));
+ LLVM_DEBUG(dbgs() << "---\n");
+ dump(Names);
+}
+
+bool ConstraintSystem::mayHaveSolution() {
+ LLVM_DEBUG(dump());
+ bool HasSolution = mayHaveSolutionImpl();
+ LLVM_DEBUG(dbgs() << (HasSolution ? "sat" : "unsat") << "\n");
+ return HasSolution;
+}
+
+bool ConstraintSystem::isConditionImplied(SmallVector<int64_t, 8> R) {
+ // If all variable coefficients are 0, we have 'C >= 0'. If the constant is >=
+ // 0, R is always true, regardless of the system.
+ if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
+ return R[0] >= 0;
+
+ // If there is no solution with the negation of R added to the system, the
+ // condition must hold based on the existing constraints.
+ R = ConstraintSystem::negate(R);
+
+ auto NewSystem = *this;
+ NewSystem.addVariableRow(R);
+ return !NewSystem.mayHaveSolution();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
index 953da964c435..19c307b4ef8e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
@@ -35,7 +35,10 @@ static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
clEnumValN(TargetTransformInfo::TCK_Latency,
"latency", "Instruction latency"),
clEnumValN(TargetTransformInfo::TCK_CodeSize,
- "code-size", "Code size")));
+ "code-size", "Code size"),
+ clEnumValN(TargetTransformInfo::TCK_SizeAndLatency,
+ "size-latency", "Code size and latency")));
+
#define CM_NAME "cost-model"
#define DEBUG_TYPE CM_NAME
@@ -54,7 +57,7 @@ namespace {
/// Returns -1 if the cost is unknown.
/// Note, this method does not cache the cost calculation and it
/// can be expensive in some cases.
- unsigned getInstructionCost(const Instruction *I) const {
+ InstructionCost getInstructionCost(const Instruction *I) const {
return TTI->getInstructionCost(I, TargetTransformInfo::TCK_RecipThroughput);
}
@@ -100,9 +103,9 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
for (BasicBlock &B : *F) {
for (Instruction &Inst : B) {
- unsigned Cost = TTI->getInstructionCost(&Inst, CostKind);
- if (Cost != (unsigned)-1)
- OS << "Cost Model: Found an estimated cost of " << Cost;
+ InstructionCost Cost = TTI->getInstructionCost(&Inst, CostKind);
+ if (auto CostVal = Cost.getValue())
+ OS << "Cost Model: Found an estimated cost of " << *CostVal;
else
OS << "Cost Model: Unknown cost";
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp
index 280d9ef79efa..da5de75a038c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp
@@ -49,7 +49,7 @@ bool DDGNode::collectInstructions(
assert(!isa<PiBlockDDGNode>(PN) && "Nested PiBlocks are not supported.");
SmallVector<Instruction *, 8> TmpIList;
PN->collectInstructions(Pred, TmpIList);
- IList.insert(IList.end(), TmpIList.begin(), TmpIList.end());
+ llvm::append_range(IList, TmpIList);
}
} else
llvm_unreachable("unimplemented type of node");
@@ -190,8 +190,7 @@ DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
// directions.
BasicBlockListType BBList;
for (auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
- for (BasicBlock * BB : SCC)
- BBList.push_back(BB);
+ append_range(BBList, SCC);
std::reverse(BBList.begin(), BBList.end());
DDGBuilder(*this, D, BBList).populate();
}
@@ -207,8 +206,7 @@ DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI,
LoopBlocksDFS DFS(&L);
DFS.perform(&LI);
BasicBlockListType BBList;
- for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
- BBList.push_back(BB);
+ append_range(BBList, make_range(DFS.beginRPO(), DFS.endRPO()));
DDGBuilder(*this, D, BBList).populate();
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DDGPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/DDGPrinter.cpp
new file mode 100644
index 000000000000..51bd54809857
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/DDGPrinter.cpp
@@ -0,0 +1,150 @@
+//===- DDGPrinter.cpp - DOT printer for the data dependence graph ----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// This file defines the `-dot-ddg` analysis pass, which emits DDG in DOT format
+// in a file named `ddg.<graph-name>.dot` for each loop in a function.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DDGPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DotOnly("dot-ddg-only", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("simple ddg dot graph"));
+static cl::opt<std::string> DDGDotFilenamePrefix(
+ "dot-ddg-filename-prefix", cl::init("ddg"), cl::Hidden,
+ cl::desc("The prefix used for the DDG dot file names."));
+
+static void writeDDGToDotFile(DataDependenceGraph &G, bool DOnly = false);
+
+//===--------------------------------------------------------------------===//
+// Implementation of DDG DOT Printer for a loop
+//===--------------------------------------------------------------------===//
+PreservedAnalyses DDGDotPrinterPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ writeDDGToDotFile(*AM.getResult<DDGAnalysis>(L, AR), DotOnly);
+ return PreservedAnalyses::all();
+}
+
+static void writeDDGToDotFile(DataDependenceGraph &G, bool DOnly) {
+ std::string Filename =
+ Twine(DDGDotFilenamePrefix + "." + G.getName() + ".dot").str();
+ errs() << "Writing '" << Filename << "'...";
+
+ std::error_code EC;
+ raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+ if (!EC)
+ // We only provide the constant verson of the DOTGraphTrait specialization,
+ // hence the conversion to const pointer
+ WriteGraph(File, (const DataDependenceGraph *)&G, DOnly);
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+}
+
+//===--------------------------------------------------------------------===//
+// DDG DOT Printer Implementation
+//===--------------------------------------------------------------------===//
+std::string DDGDotGraphTraits::getNodeLabel(const DDGNode *Node,
+ const DataDependenceGraph *Graph) {
+ if (isSimple())
+ return getSimpleNodeLabel(Node, Graph);
+ else
+ return getVerboseNodeLabel(Node, Graph);
+}
+
+std::string DDGDotGraphTraits::getEdgeAttributes(
+ const DDGNode *Node, GraphTraits<const DDGNode *>::ChildIteratorType I,
+ const DataDependenceGraph *G) {
+ const DDGEdge *E = static_cast<const DDGEdge *>(*I.getCurrent());
+ if (isSimple())
+ return getSimpleEdgeAttributes(Node, E, G);
+ else
+ return getVerboseEdgeAttributes(Node, E, G);
+}
+
+bool DDGDotGraphTraits::isNodeHidden(const DDGNode *Node,
+ const DataDependenceGraph *Graph) {
+ if (isSimple() && isa<RootDDGNode>(Node))
+ return true;
+ assert(Graph && "expected a valid graph pointer");
+ return Graph->getPiBlock(*Node) != nullptr;
+}
+
+std::string
+DDGDotGraphTraits::getSimpleNodeLabel(const DDGNode *Node,
+ const DataDependenceGraph *G) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ if (isa<SimpleDDGNode>(Node))
+ for (auto *II : static_cast<const SimpleDDGNode *>(Node)->getInstructions())
+ OS << *II << "\n";
+ else if (isa<PiBlockDDGNode>(Node))
+ OS << "pi-block\nwith\n"
+ << cast<PiBlockDDGNode>(Node)->getNodes().size() << " nodes\n";
+ else if (isa<RootDDGNode>(Node))
+ OS << "root\n";
+ else
+ llvm_unreachable("Unimplemented type of node");
+ return OS.str();
+}
+
+std::string
+DDGDotGraphTraits::getVerboseNodeLabel(const DDGNode *Node,
+ const DataDependenceGraph *G) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "<kind:" << Node->getKind() << ">\n";
+ if (isa<SimpleDDGNode>(Node))
+ for (auto *II : static_cast<const SimpleDDGNode *>(Node)->getInstructions())
+ OS << *II << "\n";
+ else if (isa<PiBlockDDGNode>(Node)) {
+ OS << "--- start of nodes in pi-block ---\n";
+ unsigned Count = 0;
+ const auto &PNodes = cast<PiBlockDDGNode>(Node)->getNodes();
+ for (auto *PN : PNodes) {
+ OS << getVerboseNodeLabel(PN, G);
+ if (++Count != PNodes.size())
+ OS << "\n";
+ }
+ OS << "--- end of nodes in pi-block ---\n";
+ } else if (isa<RootDDGNode>(Node))
+ OS << "root\n";
+ else
+ llvm_unreachable("Unimplemented type of node");
+ return OS.str();
+}
+
+std::string DDGDotGraphTraits::getSimpleEdgeAttributes(
+ const DDGNode *Src, const DDGEdge *Edge, const DataDependenceGraph *G) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ DDGEdge::EdgeKind Kind = Edge->getKind();
+ OS << "label=\"[" << Kind << "]\"";
+ return OS.str();
+}
+
+std::string DDGDotGraphTraits::getVerboseEdgeAttributes(
+ const DDGNode *Src, const DDGEdge *Edge, const DataDependenceGraph *G) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ DDGEdge::EdgeKind Kind = Edge->getKind();
+ OS << "label=\"[";
+ if (Kind == DDGEdge::EdgeKind::MemoryDependence)
+ OS << G->getDependenceString(*Src, Edge->getTargetNode());
+ else
+ OS << Kind;
+ OS << "]\"";
+ return OS.str();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
index 60cd1b5317d6..87a41bbf16a5 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -23,6 +24,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -54,22 +56,8 @@ public:
void print(raw_ostream &O, const Module *M = nullptr) const override;
};
-} // end anonymous namespace
-
-void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
-}
-
-bool Delinearization::runOnFunction(Function &F) {
- this->F = &F;
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- return false;
-}
-
-void Delinearization::print(raw_ostream &O, const Module *) const {
+void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
+ ScalarEvolution *SE) {
O << "Delinearization on function " << F->getName() << ":\n";
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
Instruction *Inst = &(*I);
@@ -120,6 +108,25 @@ void Delinearization::print(raw_ostream &O, const Module *) const {
}
}
+} // end anonymous namespace
+
+void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+}
+
+bool Delinearization::runOnFunction(Function &F) {
+ this->F = &F;
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ return false;
+}
+
+void Delinearization::print(raw_ostream &O, const Module *) const {
+ printDelinearization(O, F, LI, SE);
+}
+
char Delinearization::ID = 0;
static const char delinearization_name[] = "Delinearization";
INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
@@ -128,3 +135,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
+
+DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS)
+ : OS(OS) {}
+PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ printDelinearization(OS, &F, &AM.getResult<LoopAnalysis>(F),
+ &AM.getResult<ScalarEvolutionAnalysis>(F));
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
index aaee8c21f289..dd11b0b02bf8 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
@@ -80,7 +80,7 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
static bool isAlwaysLive(Instruction *I) {
return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
- I->mayHaveSideEffects();
+ I->mayHaveSideEffects() || !I->willReturn();
}
void DemandedBits::determineLiveOperandBits(
@@ -115,7 +115,7 @@ void DemandedBits::determineLiveOperandBits(
default: break;
case Instruction::Call:
case Instruction::Invoke:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::bswap:
@@ -170,10 +170,33 @@ void DemandedBits::determineLiveOperandBits(
}
break;
}
+ case Intrinsic::umax:
+ case Intrinsic::umin:
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ // If low bits of result are not demanded, they are also not demanded
+ // for the min/max operands.
+ AB = APInt::getBitsSetFrom(BitWidth, AOut.countTrailingZeros());
+ break;
}
+ }
break;
case Instruction::Add:
+ if (AOut.isMask()) {
+ AB = AOut;
+ } else {
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+ AB = determineLiveOperandBitsAdd(OperandNo, AOut, Known, Known2);
+ }
+ break;
case Instruction::Sub:
+ if (AOut.isMask()) {
+ AB = AOut;
+ } else {
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+ AB = determineLiveOperandBitsSub(OperandNo, AOut, Known, Known2);
+ }
+ break;
case Instruction::Mul:
// Find the highest live output bit. We don't need any more input
// bits than that (adds, and thus subtracts, ripple only to the
@@ -469,6 +492,86 @@ void DemandedBits::print(raw_ostream &OS) {
}
}
+static APInt determineLiveOperandBitsAddCarry(unsigned OperandNo,
+ const APInt &AOut,
+ const KnownBits &LHS,
+ const KnownBits &RHS,
+ bool CarryZero, bool CarryOne) {
+ assert(!(CarryZero && CarryOne) &&
+ "Carry can't be zero and one at the same time");
+
+ // The following check should be done by the caller, as it also indicates
+ // that LHS and RHS don't need to be computed.
+ //
+ // if (AOut.isMask())
+ // return AOut;
+
+ // Boundary bits' carry out is unaffected by their carry in.
+ APInt Bound = (LHS.Zero & RHS.Zero) | (LHS.One & RHS.One);
+
+ // First, the alive carry bits are determined from the alive output bits:
+ // Let demand ripple to the right but only up to any set bit in Bound.
+ // AOut = -1----
+ // Bound = ----1-
+ // ACarry&~AOut = --111-
+ APInt RBound = Bound.reverseBits();
+ APInt RAOut = AOut.reverseBits();
+ APInt RProp = RAOut + (RAOut | ~RBound);
+ APInt RACarry = RProp ^ ~RBound;
+ APInt ACarry = RACarry.reverseBits();
+
+ // Then, the alive input bits are determined from the alive carry bits:
+ APInt NeededToMaintainCarryZero;
+ APInt NeededToMaintainCarryOne;
+ if (OperandNo == 0) {
+ NeededToMaintainCarryZero = LHS.Zero | ~RHS.Zero;
+ NeededToMaintainCarryOne = LHS.One | ~RHS.One;
+ } else {
+ NeededToMaintainCarryZero = RHS.Zero | ~LHS.Zero;
+ NeededToMaintainCarryOne = RHS.One | ~LHS.One;
+ }
+
+ // As in computeForAddCarry
+ APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero;
+ APInt PossibleSumOne = LHS.One + RHS.One + CarryOne;
+
+ // The below is simplified from
+ //
+ // APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero);
+ // APInt CarryKnownOne = PossibleSumOne ^ LHS.One ^ RHS.One;
+ // APInt CarryUnknown = ~(CarryKnownZero | CarryKnownOne);
+ //
+ // APInt NeededToMaintainCarry =
+ // (CarryKnownZero & NeededToMaintainCarryZero) |
+ // (CarryKnownOne & NeededToMaintainCarryOne) |
+ // CarryUnknown;
+
+ APInt NeededToMaintainCarry = (~PossibleSumZero | NeededToMaintainCarryZero) &
+ (PossibleSumOne | NeededToMaintainCarryOne);
+
+ APInt AB = AOut | (ACarry & NeededToMaintainCarry);
+ return AB;
+}
+
+APInt DemandedBits::determineLiveOperandBitsAdd(unsigned OperandNo,
+ const APInt &AOut,
+ const KnownBits &LHS,
+ const KnownBits &RHS) {
+ return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, RHS, true,
+ false);
+}
+
+APInt DemandedBits::determineLiveOperandBitsSub(unsigned OperandNo,
+ const APInt &AOut,
+ const KnownBits &LHS,
+ const KnownBits &RHS) {
+ KnownBits NRHS;
+ NRHS.Zero = RHS.One;
+ NRHS.One = RHS.Zero;
+ return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, NRHS, false,
+ true);
+}
+
FunctionPass *llvm::createDemandedBitsWrapperPass() {
return new DemandedBitsWrapperPass();
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
index bcfeef7fb8ab..c2c61131e40e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -653,14 +653,16 @@ static AliasResult underlyingObjectsAlias(AAResults *AA,
const MemoryLocation &LocB) {
// Check the original locations (minus size) for noalias, which can happen for
// tbaa, incompatible underlying object locations, etc.
- MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags);
- MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags);
+ MemoryLocation LocAS =
+ MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags);
+ MemoryLocation LocBS =
+ MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags);
if (AA->alias(LocAS, LocBS) == NoAlias)
return NoAlias;
// Check the underlying objects are the same
- const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL);
- const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL);
+ const Value *AObj = getUnderlyingObject(LocA.Ptr);
+ const Value *BObj = getUnderlyingObject(LocB.Ptr);
// If the underlying objects are the same, they must alias
if (AObj == BObj)
@@ -871,8 +873,8 @@ void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
const SCEV *Dst = Pair->Dst;
if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
(isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
- const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
- const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
+ const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src);
+ const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst);
const SCEV *SrcCastOp = SrcCast->getOperand();
const SCEV *DstCastOp = DstCast->getOperand();
if (SrcCastOp->getType() == DstCastOp->getType()) {
@@ -969,8 +971,8 @@ bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
isa<SCEVSignExtendExpr>(Y)) ||
(isa<SCEVZeroExtendExpr>(X) &&
isa<SCEVZeroExtendExpr>(Y))) {
- const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
- const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
+ const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X);
+ const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y);
const SCEV *Xop = CX->getOperand();
const SCEV *Yop = CY->getOperand();
if (Xop->getType() == Yop->getType()) {
@@ -1459,19 +1461,6 @@ static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
return Q;
}
-
-static
-APInt maxAPInt(APInt A, APInt B) {
- return A.sgt(B) ? A : B;
-}
-
-
-static
-APInt minAPInt(APInt A, APInt B) {
- return A.slt(B) ? A : B;
-}
-
-
// exactSIVtest -
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
// where i is an induction variable, c1 and c2 are loop invariant, and a1
@@ -1542,18 +1531,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
// test(BM/G, LM-X) and test(-BM/G, X-UM)
APInt TMUL = BM.sdiv(G);
if (TMUL.sgt(0)) {
- TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL));
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
if (UMvalid) {
- TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(UM - X, TMUL));
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
}
}
else {
- TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL));
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (UMvalid) {
- TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(UM - X, TMUL));
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
}
}
@@ -1561,18 +1550,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
// test(AM/G, LM-Y) and test(-AM/G, Y-UM)
TMUL = AM.sdiv(G);
if (TMUL.sgt(0)) {
- TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL));
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
if (UMvalid) {
- TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(UM - Y, TMUL));
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
}
}
else {
- TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL));
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (UMvalid) {
- TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(UM - Y, TMUL));
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
}
}
@@ -1591,11 +1580,11 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
LLVM_DEBUG(dbgs() << "\t exploring LT direction\n");
TMUL = AM - BM;
if (TMUL.sgt(0)) {
- TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y + 1, TMUL));
LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
}
else {
- TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(X - Y + 1, TMUL));
LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
}
if (TL.sle(TU)) {
@@ -1608,20 +1597,20 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
TL = SaveTL;
LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n");
if (TMUL.sgt(0)) {
- TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y, TMUL));
LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
}
else {
- TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(X - Y, TMUL));
LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
}
TMUL = BM - AM;
if (TMUL.sgt(0)) {
- TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X, TMUL));
LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
}
else {
- TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(Y - X, TMUL));
LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
}
if (TL.sle(TU)) {
@@ -1634,11 +1623,11 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
TL = SaveTL;
LLVM_DEBUG(dbgs() << "\t exploring GT direction\n");
if (TMUL.sgt(0)) {
- TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X + 1, TMUL));
LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
}
else {
- TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(Y - X + 1, TMUL));
LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
}
if (TL.sle(TU)) {
@@ -1950,18 +1939,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
// test(BM/G, LM-X) and test(-BM/G, X-UM)
APInt TMUL = BM.sdiv(G);
if (TMUL.sgt(0)) {
- TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL));
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
if (SrcUMvalid) {
- TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(SrcUM - X, TMUL));
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
}
}
else {
- TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL));
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (SrcUMvalid) {
- TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(SrcUM - X, TMUL));
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
}
}
@@ -1969,18 +1958,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
// test(AM/G, LM-Y) and test(-AM/G, Y-UM)
TMUL = AM.sdiv(G);
if (TMUL.sgt(0)) {
- TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL));
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
if (DstUMvalid) {
- TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(DstUM - Y, TMUL));
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
}
}
else {
- TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+ TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL));
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (DstUMvalid) {
- TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
+ TL = APIntOps::smax(TL, ceilingOfQuotient(DstUM - Y, TMUL));
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
}
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
index 7a98d844e4cb..6b90db4bafe1 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
@@ -140,75 +140,74 @@ template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
if (*N == PiNode || NodesInSCC.count(N))
continue;
- for (NodeType *SCCNode : NL) {
-
- enum Direction {
- Incoming, // Incoming edges to the SCC
- Outgoing, // Edges going ot of the SCC
- DirectionCount // To make the enum usable as an array index.
- };
-
- // Use these flags to help us avoid creating redundant edges. If there
- // are more than one edges from an outside node to inside nodes, we only
- // keep one edge from that node to the pi-block node. Similarly, if
- // there are more than one edges from inside nodes to an outside node,
- // we only keep one edge from the pi-block node to the outside node.
- // There is a flag defined for each direction (incoming vs outgoing) and
- // for each type of edge supported, using a two-dimensional boolean
- // array.
- using EdgeKind = typename EdgeType::EdgeKind;
- EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{
- false, false};
-
- auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
- const EdgeKind K) {
- switch (K) {
- case EdgeKind::RegisterDefUse:
- createDefUseEdge(Src, Dst);
- break;
- case EdgeKind::MemoryDependence:
- createMemoryEdge(Src, Dst);
- break;
- case EdgeKind::Rooted:
- createRootedEdge(Src, Dst);
- break;
- default:
- llvm_unreachable("Unsupported type of edge.");
- }
- };
-
- auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
- const Direction Dir) {
- if (!Src->hasEdgeTo(*Dst))
- return;
- LLVM_DEBUG(dbgs()
- << "reconnecting("
- << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
- << ":\nSrc:" << *Src << "\nDst:" << *Dst
- << "\nNew:" << *New << "\n");
- assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
- "Invalid direction.");
-
- SmallVector<EdgeType *, 10> EL;
- Src->findEdgesTo(*Dst, EL);
- for (EdgeType *OldEdge : EL) {
- EdgeKind Kind = OldEdge->getKind();
- if (!EdgeAlreadyCreated[Dir][Kind]) {
- if (Dir == Direction::Incoming) {
- createEdgeOfKind(*Src, *New, Kind);
- LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
- } else if (Dir == Direction::Outgoing) {
- createEdgeOfKind(*New, *Dst, Kind);
- LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
- }
- EdgeAlreadyCreated[Dir][Kind] = true;
+ enum Direction {
+ Incoming, // Incoming edges to the SCC
+ Outgoing, // Edges going ot of the SCC
+ DirectionCount // To make the enum usable as an array index.
+ };
+
+ // Use these flags to help us avoid creating redundant edges. If there
+ // are more than one edges from an outside node to inside nodes, we only
+ // keep one edge from that node to the pi-block node. Similarly, if
+ // there are more than one edges from inside nodes to an outside node,
+ // we only keep one edge from the pi-block node to the outside node.
+ // There is a flag defined for each direction (incoming vs outgoing) and
+ // for each type of edge supported, using a two-dimensional boolean
+ // array.
+ using EdgeKind = typename EdgeType::EdgeKind;
+ EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false,
+ false};
+
+ auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
+ const EdgeKind K) {
+ switch (K) {
+ case EdgeKind::RegisterDefUse:
+ createDefUseEdge(Src, Dst);
+ break;
+ case EdgeKind::MemoryDependence:
+ createMemoryEdge(Src, Dst);
+ break;
+ case EdgeKind::Rooted:
+ createRootedEdge(Src, Dst);
+ break;
+ default:
+ llvm_unreachable("Unsupported type of edge.");
+ }
+ };
+
+ auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
+ const Direction Dir) {
+ if (!Src->hasEdgeTo(*Dst))
+ return;
+ LLVM_DEBUG(
+ dbgs() << "reconnecting("
+ << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
+ << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New
+ << "\n");
+ assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
+ "Invalid direction.");
+
+ SmallVector<EdgeType *, 10> EL;
+ Src->findEdgesTo(*Dst, EL);
+ for (EdgeType *OldEdge : EL) {
+ EdgeKind Kind = OldEdge->getKind();
+ if (!EdgeAlreadyCreated[Dir][Kind]) {
+ if (Dir == Direction::Incoming) {
+ createEdgeOfKind(*Src, *New, Kind);
+ LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
+ } else if (Dir == Direction::Outgoing) {
+ createEdgeOfKind(*New, *Dst, Kind);
+ LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
}
- Src->removeEdge(*OldEdge);
- destroyEdge(*OldEdge);
- LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
+ EdgeAlreadyCreated[Dir][Kind] = true;
}
- };
+ Src->removeEdge(*OldEdge);
+ destroyEdge(*OldEdge);
+ LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
+ }
+ };
+ for (NodeType *SCCNode : NL) {
// Process incoming edges incident to the pi-block node.
reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
@@ -492,16 +491,14 @@ void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
// Put members of the pi-block right after the pi-block itself, for
// convenience.
const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
- NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(),
- PiBlockMembers.end());
+ llvm::append_range(NodesInPO, PiBlockMembers);
}
NodesInPO.push_back(N);
}
size_t OldSize = Graph.Nodes.size();
Graph.Nodes.clear();
- for (NodeType *N : reverse(NodesInPO))
- Graph.Nodes.push_back(N);
+ append_range(Graph.Nodes, reverse(NodesInPO));
if (Graph.Nodes.size() != OldSize)
assert(false &&
"Expected the number of nodes to stay the same after the sort");
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
new file mode 100644
index 000000000000..e138e82c8b05
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -0,0 +1,531 @@
+//===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a model runner using Tensorflow C APIs, allowing the
+// loading of a model from a command line option.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
+#if defined(LLVM_HAVE_TF_API)
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+#include "llvm/Analysis/MLInlineAdvisor.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<std::string> TrainingLog(
+ "training-log", cl::Hidden,
+ cl::desc("Path where the development - mode inlining log is saved."));
+
+static cl::opt<std::string> TFModelUnderTrainingPath(
+ "ml-inliner-model-under-training", cl::Hidden,
+ cl::desc(R"(Path to SavedModel from the previous training iteration.
+The directory is also expected to contain a JSON specification of the
+outputs expected to be logged, where the first entry must be the
+inlining decision. The file containing the specification should be
+called output_spec.json. The expected JSON value is an array of
+dictionaries. Each dictionary should have 2 keys:
+
+- "tensor_spec, followed by the TensorSpec description of the
+output; and
+- "logging_name", a string indicating the name to use when
+logging the output values.
+
+Example:
+[
+ {
+ "logging_name" : "some_name",
+ "tensor_spec" : {
+ "name" : "model_name",
+ "port" : 0,
+ "shape" : [2, 3],
+ "type" : "float"
+ }
+ }
+]
+
+The first value must always correspond to the decision.)"));
+
+static cl::opt<std::string> TFOutputSpecOverride(
+ "ml-inliner-output-spec-override", cl::Hidden,
+ cl::desc("Override the path to the output spec json file. See "
+ "-ml-inliner-model-under-training documentation for the "
+ "specification of that file."));
+
+static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
+ cl::Hidden, cl::init("action_"),
+ cl::desc("Prefix for feature names."));
+
+namespace {
+/// An InlineEvent, used by TrainingLogger.
+struct InlineEvent {
+ /// What the default policy's decision would have been.
+ int64_t DefaultDecision = 0;
+
+ /// What we advised. When training off the default policy, this is the same as
+ /// DefaultDecision.
+ int64_t AdvisedDecision = 0;
+
+ /// What actually happened. This would be 'false' in the case of an inline
+ /// error, even if AdvisedDecision were true, otherwise it agrees with
+ /// AdvisedDecision.
+ bool Effect = false;
+
+ /// What the change in size was: size_after - size_before
+ int64_t Reward = 0;
+};
+
+/// Collect data we may use for training a model, and write it as a textual
+/// Tensorflow SequenceExample
+/// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
+/// protobuf (https://developers.google.com/protocol-buffers).
+/// Because this is a protobuf, we cannot just stream the events as they come.
+/// Internally, TrainingLogger stores data in column-major format, because that
+/// lines up with how TF SequenceExample represents it.
+class ModelUnderTrainingRunner;
+class TrainingLogger final {
+public:
+ TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
+
+ /// Log one inlining event.
+ void logInlineEvent(const InlineEvent &Event,
+ const MLModelRunner &ModelRunner);
+
+ /// Print the stored tensors.
+ void print();
+
+private:
+ StringRef LogFileName;
+ const ModelUnderTrainingRunner *const MUTR;
+ std::unique_ptr<Logger> L;
+ std::vector<bool> Effects;
+ /// There's at least one output. We'll set this to a different value if MUTR
+ /// is avaliable.
+ size_t OutputCount = 1;
+ /// Set these 2 clearly OOB, to make sure we set them later.
+ size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
+ size_t DecisionPos = std::numeric_limits<size_t>::max();
+};
+
+/// An extension of the MLInlineAdvisor for the 'development' mode, targeting
+/// the offline training scenario. Note that training happens outside of the
+/// compiler, this facility is concerned with producing training data ("logs").
+/// This InlineAdvisor can operate in the following modes:
+///
+/// 1) collect logs for the default policy. This is useful for bootstrapping
+/// training, which will be considerably faster by starting from a reasonable
+/// policy.
+///
+/// 2) collect logs for the ML policy, using a model from a previous
+/// training. Potentially, that model uses internally some small random
+/// perturbation of its weights, to induce exploration (setting this up is the
+/// responsibility of the training algorithm). The logs would then be used to
+/// retrain and improve on this model.
+///
+/// 3) use the provided model, with no logging. This is useful for end to end
+/// validation - the model, in this case, is a release candidate and shouldn't
+/// have random perturbations. It is a convenience feature: rather than needing
+/// to take the release candidate model and compile it in 'release' mode,
+/// validate it, then potentially discard it, it's easier to just pass the model
+/// to the compiler, albeit compilation would be slower, as a one-off. Once the
+/// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
+/// release mode. The expectation is that a well-trained model provides a good
+/// policy over a sufficiently diverse codebase, over many changes (i.e.
+/// training happens seldom).
+class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
+public:
+ DevelopmentModeMLInlineAdvisor(
+ Module &M, ModuleAnalysisManager &MAM,
+ std::unique_ptr<MLModelRunner> ModelRunner,
+ std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
+ std::unique_ptr<TrainingLogger> Logger);
+
+ size_t getTotalSizeEstimate();
+
+ virtual ~DevelopmentModeMLInlineAdvisor();
+ void updateNativeSizeEstimate(int64_t Change) {
+ *CurrentNativeSize += Change;
+ }
+ void resetNativeSize(Function *F) {
+ FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
+ }
+
+ std::unique_ptr<MLInlineAdvice>
+ getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
+
+ Optional<size_t> getNativeSizeEstimate(const Function &F) const;
+
+private:
+ bool isLogging() const { return !!Logger; }
+ std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
+
+ std::function<bool(CallBase &)> GetDefaultAdvice;
+ const bool IsDoingInference;
+ std::unique_ptr<TrainingLogger> Logger;
+
+ const Optional<int32_t> InitialNativeSize;
+ Optional<int32_t> CurrentNativeSize;
+};
+
+/// A variant of MLInlineAdvice that tracks all non-trivial inlining
+/// decisions, for training/logging.
+class LoggingMLInlineAdvice : public MLInlineAdvice {
+public:
+ LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
+ OptimizationRemarkEmitter &ORE, bool Recommendation,
+ TrainingLogger &Logger,
+ Optional<size_t> CallerSizeEstimateBefore,
+ Optional<size_t> CalleeSizeEstimateBefore,
+ bool DefaultDecision, bool Mandatory = false)
+ : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
+ CallerSizeEstimateBefore(CallerSizeEstimateBefore),
+ CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
+ DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
+
+ virtual ~LoggingMLInlineAdvice() = default;
+
+private:
+ DevelopmentModeMLInlineAdvisor *getAdvisor() const {
+ return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
+ }
+ void recordInliningImpl() override {
+ MLInlineAdvice::recordInliningImpl();
+ getAdvisor()->resetNativeSize(Caller);
+ int Reward = std::numeric_limits<int>::max();
+ if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
+ !getAdvisor()->isForcedToStop()) {
+ int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
+ *CalleeSizeEstimateBefore;
+ Reward = NativeSizeAfter -
+ (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
+ getAdvisor()->updateNativeSizeEstimate(Reward);
+ }
+ log(Reward, /*Success=*/true);
+ }
+
+ void recordInliningWithCalleeDeletedImpl() override {
+ MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
+ getAdvisor()->resetNativeSize(Caller);
+ if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
+ !getAdvisor()->isForcedToStop()) {
+ int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
+ int Reward = NativeSizeAfter -
+ (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
+ getAdvisor()->updateNativeSizeEstimate(Reward);
+ log(Reward, /*Success=*/true);
+ }
+ }
+
+ void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
+ MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
+ log(NoReward, /*Success=*/false);
+ }
+
+ void recordUnattemptedInliningImpl() override {
+ MLInlineAdvice::recordUnattemptedInliningImpl();
+ log(NoReward, /*Success=*/false);
+ }
+
+ void log(int64_t Reward, bool Success) {
+ if (Mandatory)
+ return;
+ InlineEvent Event;
+ Event.AdvisedDecision = isInliningRecommended();
+ Event.DefaultDecision = DefaultDecision;
+ Event.Effect = Success;
+ Event.Reward = Reward;
+ Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
+ }
+
+ static const int64_t NoReward = 0;
+ TrainingLogger &Logger;
+ const Optional<size_t> CallerSizeEstimateBefore;
+ const Optional<size_t> CalleeSizeEstimateBefore;
+ const int64_t DefaultDecision;
+ const int64_t Mandatory;
+};
+
+/// A pseudo model runner. We use it to store feature values when collecting
+/// logs for the default policy, but never ask it to 'run'.
+class NoInferenceModelRunner : public MLModelRunner {
+public:
+ NoInferenceModelRunner(LLVMContext &Ctx)
+ : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
+ void setFeature(FeatureIndex Index, int64_t Value) override {
+ Features[static_cast<int>(Index)] = Value;
+ }
+
+ int64_t getFeature(int Index) const override { return Features[Index]; }
+ bool run() override {
+ llvm_unreachable("We shouldn't call run on this model runner.");
+ }
+
+private:
+ InlineFeatures Features;
+};
+
+/// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
+/// to dynamically load and evaluate a TF SavedModel
+/// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
+/// sacrificed for ease of use while training.
+class ModelUnderTrainingRunner final : public MLModelRunner {
+public:
+ ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
+
+ bool run() override;
+
+ // Disallows copy and assign.
+ ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
+ ModelUnderTrainingRunner &
+ operator=(const ModelUnderTrainingRunner &) = delete;
+
+ void setFeature(FeatureIndex Index, int64_t Value) override;
+ int64_t getFeature(int Index) const override;
+ bool isValid() const { return !!Evaluator; }
+
+ const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const {
+ return OutputSpecs;
+ }
+
+ const Optional<TFModelEvaluator::EvaluationResult> &
+ lastEvaluationResult() const {
+ return LastEvaluationResult;
+ }
+
+private:
+ std::unique_ptr<TFModelEvaluator> Evaluator;
+ std::vector<LoggedFeatureSpec> OutputSpecs;
+ Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;
+
+ // The training framework needs some additional features.
+ const std::vector<TensorSpec> TrainingOnlyFeatures{
+ TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
+ TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
+ TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
+ TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
+};
+} // namespace
+
+TrainingLogger::TrainingLogger(StringRef LogFileName,
+ const ModelUnderTrainingRunner *MUTR)
+ : LogFileName(LogFileName), MUTR(MUTR) {
+ // The first output is the inlining decision.
+ if (MUTR)
+ OutputCount = MUTR->outputLoggedFeatureSpecs().size();
+ std::vector<LoggedFeatureSpec> FT;
+
+ for (size_t I = 0; I < NumberOfFeatures; ++I)
+ FT.push_back(
+ {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
+ if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
+ append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs()));
+
+ DefaultDecisionPos = FT.size();
+ FT.push_back(
+ {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
+
+ DecisionPos = FT.size();
+ FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
+
+ L = std::make_unique<Logger>(
+ FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
+ InlineSizeEstimatorAnalysis::isEvaluatorRequested());
+}
+
+/// Log one inlining event.
+void TrainingLogger::logInlineEvent(const InlineEvent &Event,
+ const MLModelRunner &ModelRunner) {
+ size_t CurrentFeature = 0;
+ for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
+ int64_t F = ModelRunner.getFeature(CurrentFeature);
+ L->logTensorValue(CurrentFeature, &F);
+ }
+
+ for (size_t I = 1; I < OutputCount; ++I) {
+ const auto &Result = *MUTR->lastEvaluationResult();
+ auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec;
+ const char *RawData =
+ reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
+ L->logTensorValue(CurrentFeature, RawData,
+ Spec.getElementCount() * Spec.getElementByteSize());
+ ++CurrentFeature;
+ }
+
+ assert(CurrentFeature == DefaultDecisionPos);
+ L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision);
+ L->logTensorValue(DecisionPos, &Event.AdvisedDecision);
+ if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
+ L->logReward(Event.Reward);
+
+ // For debugging / later use
+ Effects.push_back(Event.Effect);
+}
+
+void TrainingLogger::print() {
+ std::error_code EC;
+ raw_fd_ostream OutFile(LogFileName, EC);
+ L->print(OutFile);
+}
+
+DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
+ Module &M, ModuleAnalysisManager &MAM,
+ std::unique_ptr<MLModelRunner> ModelRunner,
+ std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
+ std::unique_ptr<TrainingLogger> Logger)
+ : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
+ GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
+ Logger(std::move(Logger)),
+ InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
+ CurrentNativeSize(InitialNativeSize) {
+ // We cannot have the case of neither inference nor logging.
+ assert(IsDoingInference || isLogging());
+}
+
+DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
+ if (isLogging())
+ Logger->print();
+}
+
+Optional<size_t>
+DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
+ if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
+ return None;
+ auto &R =
+ FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
+ if (!R) {
+ F.getParent()->getContext().emitError(
+ "Native size estimator is not present.");
+ return 0;
+ }
+ return *R;
+}
+
+std::unique_ptr<MLInlineAdvice>
+DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
+ return std::make_unique<LoggingMLInlineAdvice>(
+ /*Advisor=*/this,
+ /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
+ /*Logger=*/*Logger,
+ /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
+ /*CalleeSizeEstimateBefore=*/
+ getNativeSizeEstimate(*CB.getCalledFunction()),
+ /*DefaultDecision=*/true, /*Mandatory*/ true);
+}
+
+std::unique_ptr<MLInlineAdvice>
+DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
+ CallBase &CB, OptimizationRemarkEmitter &ORE) {
+ if (IsDoingInference && !isLogging())
+ return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
+
+ bool DefaultAdvice = GetDefaultAdvice(CB);
+ auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
+ return std::make_unique<LoggingMLInlineAdvice>(
+ /*Advisor=*/this,
+ /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
+ /*Logger=*/*Logger,
+ /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
+ /*CalleeSizeEstimateBefore=*/
+ getNativeSizeEstimate(*CB.getCalledFunction()),
+ /*DefaultDecision=*/DefaultAdvice);
+}
+
+size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
+ if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
+ return 0;
+ size_t Ret = 0;
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ if (isFunctionDeleted(&F))
+ continue;
+ Ret += *getNativeSizeEstimate(F);
+ }
+ return Ret;
+}
+
+ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
+ const std::string &ModelPath)
+ : MLModelRunner(Ctx) {
+ std::vector<TensorSpec> InputSpecs;
+ for (size_t I = 0; I < NumberOfFeatures; ++I)
+ InputSpecs.push_back(
+ TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
+ append_range(InputSpecs, TrainingOnlyFeatures);
+ if (auto MaybeOutSpecs =
+ loadOutputSpecs(Ctx, DecisionName, ModelPath, TFOutputSpecOverride))
+ OutputSpecs = std::move(*MaybeOutSpecs);
+ else
+ return;
+
+ Evaluator = std::make_unique<TFModelEvaluator>(
+ ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
+ OutputSpecs.size());
+ if (!Evaluator || !Evaluator->isValid()) {
+ Ctx.emitError("Failed to create inliner saved model evaluator");
+ Evaluator.reset();
+ return;
+ }
+}
+
+bool ModelUnderTrainingRunner::run() {
+ LastEvaluationResult = Evaluator->evaluate();
+ if (!LastEvaluationResult.hasValue()) {
+ Ctx.emitError("Error evaluating model.");
+ return false;
+ }
+ int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0);
+ return static_cast<bool>(Decision);
+}
+
+int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
+ return *Evaluator->getInput<int64_t>(Index);
+}
+
+void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
+ size_t NumericIndex = static_cast<size_t>(Index);
+ *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
+}
+
+std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
+ Module &M, ModuleAnalysisManager &MAM,
+ std::function<bool(CallBase &)> GetDefaultAdvice) {
+ auto &Ctx = M.getContext();
+ std::unique_ptr<MLModelRunner> Runner;
+ ModelUnderTrainingRunner *MUTRPtr = nullptr;
+ bool IsDoingInference = false;
+ if (TFModelUnderTrainingPath.empty())
+ Runner.reset(new NoInferenceModelRunner(Ctx));
+ else {
+ auto MUTR = std::make_unique<ModelUnderTrainingRunner>(
+ Ctx, TFModelUnderTrainingPath);
+ if (!MUTR || !MUTR->isValid()) {
+ Ctx.emitError("Could not load the policy model from the provided path");
+ return nullptr;
+ }
+ IsDoingInference = true;
+ MUTRPtr = MUTR.get();
+ Runner = std::move(MUTR);
+ }
+ std::unique_ptr<TrainingLogger> Logger;
+ if (!TrainingLog.empty())
+ Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr);
+
+ return std::make_unique<DevelopmentModeMLInlineAdvisor>(
+ M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference,
+ std::move(Logger));
+}
+#endif // defined(LLVM_HAVE_TF_API)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 343406c9bba1..287c13278014 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
+//===---- DivergenceAnalysis.cpp --- Divergence Analysis Implementation ----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -84,7 +84,6 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <vector>
using namespace llvm;
@@ -97,42 +96,18 @@ DivergenceAnalysis::DivergenceAnalysis(
: F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
IsLCSSAForm(IsLCSSAForm) {}
-void DivergenceAnalysis::markDivergent(const Value &DivVal) {
+bool DivergenceAnalysis::markDivergent(const Value &DivVal) {
+ if (isAlwaysUniform(DivVal))
+ return false;
assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
- DivergentValues.insert(&DivVal);
+ return DivergentValues.insert(&DivVal).second;
}
void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
UniformOverrides.insert(&UniVal);
}
-bool DivergenceAnalysis::updateTerminator(const Instruction &Term) const {
- if (Term.getNumSuccessors() <= 1)
- return false;
- if (auto *BranchTerm = dyn_cast<BranchInst>(&Term)) {
- assert(BranchTerm->isConditional());
- return isDivergent(*BranchTerm->getCondition());
- }
- if (auto *SwitchTerm = dyn_cast<SwitchInst>(&Term)) {
- return isDivergent(*SwitchTerm->getCondition());
- }
- if (isa<InvokeInst>(Term)) {
- return false; // ignore abnormal executions through landingpad
- }
-
- llvm_unreachable("unexpected terminator");
-}
-
-bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const {
- // TODO function calls with side effects, etc
- for (const auto &Op : I.operands()) {
- if (isDivergent(*Op))
- return true;
- }
- return false;
-}
-
bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
const Value &Val) const {
const auto *Inst = dyn_cast<const Instruction>(&Val);
@@ -143,39 +118,13 @@ bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
for (const auto *Loop = LI.getLoopFor(Inst->getParent());
Loop != RegionLoop && !Loop->contains(&ObservingBlock);
Loop = Loop->getParentLoop()) {
- if (DivergentLoops.find(Loop) != DivergentLoops.end())
+ if (DivergentLoops.contains(Loop))
return true;
}
return false;
}
-bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const {
- // joining divergent disjoint path in Phi parent block
- if (!Phi.hasConstantOrUndefValue() && isJoinDivergent(*Phi.getParent())) {
- return true;
- }
-
- // An incoming value could be divergent by itself.
- // Otherwise, an incoming value could be uniform within the loop
- // that carries its definition but it may appear divergent
- // from outside the loop. This happens when divergent loop exits
- // drop definitions of that uniform value in different iterations.
- //
- // for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop
- // if (i % thread_id == 0) break; // divergent loop exit
- // }
- // int divI = i; // divI is divergent
- for (size_t i = 0; i < Phi.getNumIncomingValues(); ++i) {
- const auto *InVal = Phi.getIncomingValue(i);
- if (isDivergent(*Phi.getIncomingValue(i)) ||
- isTemporalDivergent(*Phi.getParent(), *InVal)) {
- return true;
- }
- }
- return false;
-}
-
bool DivergenceAnalysis::inRegion(const Instruction &I) const {
return I.getParent() && inRegion(*I.getParent());
}
@@ -184,69 +133,103 @@ bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
}
-static bool usesLiveOut(const Instruction &I, const Loop *DivLoop) {
- for (auto &Op : I.operands()) {
- auto *OpInst = dyn_cast<Instruction>(&Op);
+void DivergenceAnalysis::pushUsers(const Value &V) {
+ const auto *I = dyn_cast<const Instruction>(&V);
+
+ if (I && I->isTerminator()) {
+ analyzeControlDivergence(*I);
+ return;
+ }
+
+ for (const auto *User : V.users()) {
+ const auto *UserInst = dyn_cast<const Instruction>(User);
+ if (!UserInst)
+ continue;
+
+ // only compute divergent inside loop
+ if (!inRegion(*UserInst))
+ continue;
+
+ // All users of divergent values are immediate divergent
+ if (markDivergent(*UserInst))
+ Worklist.push_back(UserInst);
+ }
+}
+
+static const Instruction *getIfCarriedInstruction(const Use &U,
+ const Loop &DivLoop) {
+ const auto *I = dyn_cast<const Instruction>(&U);
+ if (!I)
+ return nullptr;
+ if (!DivLoop.contains(I))
+ return nullptr;
+ return I;
+}
+
+void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I,
+ const Loop &OuterDivLoop) {
+ if (isAlwaysUniform(I))
+ return;
+ if (isDivergent(I))
+ return;
+
+ LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << I.getName() << "\n");
+ assert((isa<PHINode>(I) || !IsLCSSAForm) &&
+ "In LCSSA form all users of loop-exiting defs are Phi nodes.");
+ for (const Use &Op : I.operands()) {
+ const auto *OpInst = getIfCarriedInstruction(Op, OuterDivLoop);
if (!OpInst)
continue;
- if (DivLoop->contains(OpInst->getParent()))
- return true;
+ if (markDivergent(I))
+ pushUsers(I);
+ return;
}
- return false;
}
// marks all users of loop-carried values of the loop headed by LoopHeader as
// divergent
-void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
- auto *DivLoop = LI.getLoopFor(&LoopHeader);
- assert(DivLoop && "loopHeader is not actually part of a loop");
+void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit,
+ const Loop &OuterDivLoop) {
+ // All users are in immediate exit blocks
+ if (IsLCSSAForm) {
+ for (const auto &Phi : DivExit.phis()) {
+ analyzeTemporalDivergence(Phi, OuterDivLoop);
+ }
+ return;
+ }
- SmallVector<BasicBlock *, 8> TaintStack;
- DivLoop->getExitBlocks(TaintStack);
+ // For non-LCSSA we have to follow all live out edges wherever they may lead.
+ const BasicBlock &LoopHeader = *OuterDivLoop.getHeader();
+ SmallVector<const BasicBlock *, 8> TaintStack;
+ TaintStack.push_back(&DivExit);
// Otherwise potential users of loop-carried values could be anywhere in the
// dominance region of DivLoop (including its fringes for phi nodes)
DenseSet<const BasicBlock *> Visited;
- for (auto *Block : TaintStack) {
- Visited.insert(Block);
- }
- Visited.insert(&LoopHeader);
+ Visited.insert(&DivExit);
- while (!TaintStack.empty()) {
- auto *UserBlock = TaintStack.back();
- TaintStack.pop_back();
+ do {
+ auto *UserBlock = TaintStack.pop_back_val();
// don't spread divergence beyond the region
if (!inRegion(*UserBlock))
continue;
- assert(!DivLoop->contains(UserBlock) &&
+ assert(!OuterDivLoop.contains(UserBlock) &&
"irreducible control flow detected");
// phi nodes at the fringes of the dominance region
if (!DT.dominates(&LoopHeader, UserBlock)) {
// all PHI nodes of UserBlock become divergent
for (auto &Phi : UserBlock->phis()) {
- Worklist.push_back(&Phi);
+ analyzeTemporalDivergence(Phi, OuterDivLoop);
}
continue;
}
- // taint outside users of values carried by DivLoop
+ // Taint outside users of values carried by OuterDivLoop.
for (auto &I : *UserBlock) {
- if (isAlwaysUniform(I))
- continue;
- if (isDivergent(I))
- continue;
- if (!usesLiveOut(I, DivLoop))
- continue;
-
- markDivergent(I);
- if (I.isTerminator()) {
- propagateBranchDivergence(I);
- } else {
- pushUsers(I);
- }
+ analyzeTemporalDivergence(I, OuterDivLoop);
}
// visit all blocks in the dominance region
@@ -256,56 +239,57 @@ void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
}
TaintStack.push_back(SuccBlock);
}
- }
+ } while (!TaintStack.empty());
}
-void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) {
- for (const auto &Phi : Block.phis()) {
- if (isDivergent(Phi))
- continue;
- Worklist.push_back(&Phi);
+void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit,
+ const Loop &InnerDivLoop) {
+ LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n");
+
+ // Find outer-most loop that does not contain \p DivExit
+ const Loop *DivLoop = &InnerDivLoop;
+ const Loop *OuterDivLoop = DivLoop;
+ const Loop *ExitLevelLoop = LI.getLoopFor(&DivExit);
+ const unsigned LoopExitDepth =
+ ExitLevelLoop ? ExitLevelLoop->getLoopDepth() : 0;
+ while (DivLoop && DivLoop->getLoopDepth() > LoopExitDepth) {
+ DivergentLoops.insert(DivLoop); // all crossed loops are divergent
+ OuterDivLoop = DivLoop;
+ DivLoop = DivLoop->getParentLoop();
}
-}
-
-void DivergenceAnalysis::pushUsers(const Value &V) {
- for (const auto *User : V.users()) {
- const auto *UserInst = dyn_cast<const Instruction>(User);
- if (!UserInst)
- continue;
-
- if (isDivergent(*UserInst))
- continue;
+ LLVM_DEBUG(dbgs() << "\tOuter-most left loop: " << OuterDivLoop->getName()
+ << "\n");
- // only compute divergent inside loop
- if (!inRegion(*UserInst))
- continue;
- Worklist.push_back(UserInst);
- }
+ analyzeLoopExitDivergence(DivExit, *OuterDivLoop);
}
-bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
- const Loop *BranchLoop) {
- LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n");
+// this is a divergent join point - mark all phi nodes as divergent and push
+// them onto the stack.
+void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
+ LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName()
+ << "\n");
// ignore divergence outside the region
if (!inRegion(JoinBlock)) {
- return false;
+ return;
}
// push non-divergent phi nodes in JoinBlock to the worklist
- pushPHINodes(JoinBlock);
-
- // disjoint-paths divergent at JoinBlock
- markBlockJoinDivergent(JoinBlock);
-
- // JoinBlock is a divergent loop exit
- return BranchLoop && !BranchLoop->contains(&JoinBlock);
+ for (const auto &Phi : JoinBlock.phis()) {
+ if (isDivergent(Phi))
+ continue;
+ // FIXME Theoretically ,the 'undef' value could be replaced by any other
+ // value causing spurious divergence.
+ if (Phi.hasConstantOrUndefValue())
+ continue;
+ if (markDivergent(Phi))
+ Worklist.push_back(&Phi);
+ }
}
-void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) {
- LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n");
-
- markDivergent(Term);
+void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) {
+ LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName()
+ << "\n");
// Don't propagate divergence from unreachable blocks.
if (!DT.isReachableFromEntry(Term.getParent()))
@@ -313,113 +297,45 @@ void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) {
const auto *BranchLoop = LI.getLoopFor(Term.getParent());
- // whether there is a divergent loop exit from BranchLoop (if any)
- bool IsBranchLoopDivergent = false;
+ const auto &DivDesc = SDA.getJoinBlocks(Term);
- // iterate over all blocks reachable by disjoint from Term within the loop
- // also iterates over loop exits that become divergent due to Term.
- for (const auto *JoinBlock : SDA.join_blocks(Term)) {
- IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
+ // Iterate over all blocks now reachable by a disjoint path join
+ for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
+ taintAndPushPhiNodes(*JoinBlock);
}
- // Branch loop is a divergent loop due to the divergent branch in Term
- if (IsBranchLoopDivergent) {
- assert(BranchLoop);
- if (!DivergentLoops.insert(BranchLoop).second) {
- return;
- }
- propagateLoopDivergence(*BranchLoop);
- }
-}
-
-void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) {
- LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n");
-
- // don't propagate beyond region
- if (!inRegion(*ExitingLoop.getHeader()))
- return;
-
- const auto *BranchLoop = ExitingLoop.getParentLoop();
-
- // Uses of loop-carried values could occur anywhere
- // within the dominance region of the definition. All loop-carried
- // definitions are dominated by the loop header (reducible control).
- // Thus all users have to be in the dominance region of the loop header,
- // except PHI nodes that can also live at the fringes of the dom region
- // (incoming defining value).
- if (!IsLCSSAForm)
- taintLoopLiveOuts(*ExitingLoop.getHeader());
-
- // whether there is a divergent loop exit from BranchLoop (if any)
- bool IsBranchLoopDivergent = false;
-
- // iterate over all blocks reachable by disjoint paths from exits of
- // ExitingLoop also iterates over loop exits (of BranchLoop) that in turn
- // become divergent.
- for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) {
- IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
- }
-
- // Branch loop is a divergent due to divergent loop exit in ExitingLoop
- if (IsBranchLoopDivergent) {
- assert(BranchLoop);
- if (!DivergentLoops.insert(BranchLoop).second) {
- return;
- }
- propagateLoopDivergence(*BranchLoop);
+ assert(DivDesc.LoopDivBlocks.empty() || BranchLoop);
+ for (const auto *DivExitBlock : DivDesc.LoopDivBlocks) {
+ propagateLoopExitDivergence(*DivExitBlock, *BranchLoop);
}
}
void DivergenceAnalysis::compute() {
- for (auto *DivVal : DivergentValues) {
+ // Initialize worklist.
+ auto DivValuesCopy = DivergentValues;
+ for (const auto *DivVal : DivValuesCopy) {
+ assert(isDivergent(*DivVal) && "Worklist invariant violated!");
pushUsers(*DivVal);
}
- // propagate divergence
+ // All values on the Worklist are divergent.
+ // Their users may not have been updated yed.
while (!Worklist.empty()) {
const Instruction &I = *Worklist.back();
Worklist.pop_back();
- // maintain uniformity of overrides
- if (isAlwaysUniform(I))
- continue;
-
- bool WasDivergent = isDivergent(I);
- if (WasDivergent)
- continue;
-
- // propagate divergence caused by terminator
- if (I.isTerminator()) {
- if (updateTerminator(I)) {
- // propagate control divergence to affected instructions
- propagateBranchDivergence(I);
- continue;
- }
- }
-
- // update divergence of I due to divergent operands
- bool DivergentUpd = false;
- const auto *Phi = dyn_cast<const PHINode>(&I);
- if (Phi) {
- DivergentUpd = updatePHINode(*Phi);
- } else {
- DivergentUpd = updateNormalInstruction(I);
- }
-
// propagate value divergence to users
- if (DivergentUpd) {
- markDivergent(I);
- pushUsers(I);
- }
+ assert(isDivergent(I) && "Worklist invariant violated!");
+ pushUsers(I);
}
}
bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
- return UniformOverrides.find(&V) != UniformOverrides.end();
+ return UniformOverrides.contains(&V);
}
bool DivergenceAnalysis::isDivergent(const Value &V) const {
- return DivergentValues.find(&V) != DivergentValues.end();
+ return DivergentValues.contains(&V);
}
bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
@@ -444,7 +360,7 @@ GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
const PostDominatorTree &PDT,
const LoopInfo &LI,
const TargetTransformInfo &TTI)
- : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, false) {
+ : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, /* LCSSA */ false) {
for (auto &I : instructions(F)) {
if (TTI.isSourceOfDivergence(&I)) {
DA.markDivergent(I);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
index 9594da0a4f91..8ac7d9d4efd0 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
@@ -32,8 +32,7 @@ bool DomTreeUpdater::isUpdateValid(
// Since isUpdateValid() must be called *after* the Terminator of From is
// altered we can determine if the update is unnecessary for batch updates
// or invalid for a single update.
- const bool HasEdge = llvm::any_of(
- successors(From), [To](const BasicBlock *B) { return B == To; });
+ const bool HasEdge = llvm::is_contained(successors(From), To);
// If the IR does not match the update,
// 1. In batch updates, this update is unnecessary.
@@ -167,7 +166,7 @@ bool DomTreeUpdater::hasPendingPostDomTreeUpdates() const {
bool DomTreeUpdater::isBBPendingDeletion(llvm::BasicBlock *DelBB) const {
if (Strategy == UpdateStrategy::Eager || DeletedBBs.empty())
return false;
- return DeletedBBs.count(DelBB) != 0;
+ return DeletedBBs.contains(DelBB);
}
// The DT and PDT require the nodes related to updates
diff --git a/contrib/llvm-project/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm-project/llvm/lib/Analysis/EHPersonalities.cpp
index 2242541696a4..a982f266b2d6 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/EHPersonalities.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/EHPersonalities.cpp
@@ -24,22 +24,23 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
if (!F)
return EHPersonality::Unknown;
return StringSwitch<EHPersonality>(F->getName())
- .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
- .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
- .Case("__gxx_personality_seh0", EHPersonality::GNU_CXX)
- .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj)
- .Case("__gcc_personality_v0", EHPersonality::GNU_C)
- .Case("__gcc_personality_seh0", EHPersonality::GNU_C)
- .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj)
- .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
- .Case("_except_handler3", EHPersonality::MSVC_X86SEH)
- .Case("_except_handler4", EHPersonality::MSVC_X86SEH)
- .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH)
- .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
- .Case("ProcessCLRException", EHPersonality::CoreCLR)
- .Case("rust_eh_personality", EHPersonality::Rust)
- .Case("__gxx_wasm_personality_v0", EHPersonality::Wasm_CXX)
- .Default(EHPersonality::Unknown);
+ .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
+ .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
+ .Case("__gxx_personality_seh0", EHPersonality::GNU_CXX)
+ .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj)
+ .Case("__gcc_personality_v0", EHPersonality::GNU_C)
+ .Case("__gcc_personality_seh0", EHPersonality::GNU_C)
+ .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj)
+ .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
+ .Case("_except_handler3", EHPersonality::MSVC_X86SEH)
+ .Case("_except_handler4", EHPersonality::MSVC_X86SEH)
+ .Case("__C_specific_handler", EHPersonality::MSVC_TableSEH)
+ .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
+ .Case("ProcessCLRException", EHPersonality::CoreCLR)
+ .Case("rust_eh_personality", EHPersonality::Rust)
+ .Case("__gxx_wasm_personality_v0", EHPersonality::Wasm_CXX)
+ .Case("__xlcxx_personality_v1", EHPersonality::XL_CXX)
+ .Default(EHPersonality::Unknown);
}
StringRef llvm::getEHPersonalityName(EHPersonality Pers) {
@@ -51,11 +52,14 @@ StringRef llvm::getEHPersonalityName(EHPersonality Pers) {
case EHPersonality::GNU_C_SjLj: return "__gcc_personality_sj0";
case EHPersonality::GNU_ObjC: return "__objc_personality_v0";
case EHPersonality::MSVC_X86SEH: return "_except_handler3";
- case EHPersonality::MSVC_Win64SEH: return "__C_specific_handler";
+ case EHPersonality::MSVC_TableSEH:
+ return "__C_specific_handler";
case EHPersonality::MSVC_CXX: return "__CxxFrameHandler3";
case EHPersonality::CoreCLR: return "ProcessCLRException";
case EHPersonality::Rust: return "rust_eh_personality";
case EHPersonality::Wasm_CXX: return "__gxx_wasm_personality_v0";
+ case EHPersonality::XL_CXX:
+ return "__xlcxx_personality_v1";
case EHPersonality::Unknown: llvm_unreachable("Unknown EHPersonality!");
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
new file mode 100644
index 000000000000..33519038e225
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
@@ -0,0 +1,88 @@
+//===- FunctionPropertiesAnalysis.cpp - Function Properties Analysis ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis
+// classes used to extract function properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+FunctionPropertiesInfo
+FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F,
+ const LoopInfo &LI) {
+
+ FunctionPropertiesInfo FPI;
+
+ FPI.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
+
+ for (const auto &BB : F) {
+ ++FPI.BasicBlockCount;
+
+ if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
+ if (BI->isConditional())
+ FPI.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors();
+ } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
+ FPI.BlocksReachedFromConditionalInstruction +=
+ (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
+ }
+
+ for (const auto &I : BB) {
+ if (auto *CS = dyn_cast<CallBase>(&I)) {
+ const auto *Callee = CS->getCalledFunction();
+ if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
+ ++FPI.DirectCallsToDefinedFunctions;
+ }
+ if (I.getOpcode() == Instruction::Load) {
+ ++FPI.LoadInstCount;
+ } else if (I.getOpcode() == Instruction::Store) {
+ ++FPI.StoreInstCount;
+ }
+ }
+ // Loop Depth of the Basic Block
+ int64_t LoopDepth;
+ LoopDepth = LI.getLoopDepth(&BB);
+ if (FPI.MaxLoopDepth < LoopDepth)
+ FPI.MaxLoopDepth = LoopDepth;
+ }
+ FPI.TopLevelLoopCount += llvm::size(LI);
+ return FPI;
+}
+
+void FunctionPropertiesInfo::print(raw_ostream &OS) const {
+ OS << "BasicBlockCount: " << BasicBlockCount << "\n"
+ << "BlocksReachedFromConditionalInstruction: "
+ << BlocksReachedFromConditionalInstruction << "\n"
+ << "Uses: " << Uses << "\n"
+ << "DirectCallsToDefinedFunctions: " << DirectCallsToDefinedFunctions
+ << "\n"
+ << "LoadInstCount: " << LoadInstCount << "\n"
+ << "StoreInstCount: " << StoreInstCount << "\n"
+ << "MaxLoopDepth: " << MaxLoopDepth << "\n"
+ << "TopLevelLoopCount: " << TopLevelLoopCount << "\n\n";
+}
+
+AnalysisKey FunctionPropertiesAnalysis::Key;
+
+FunctionPropertiesInfo
+FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
+ return FunctionPropertiesInfo::getFunctionPropertiesInfo(
+ F, FAM.getResult<LoopAnalysis>(F));
+}
+
+PreservedAnalyses
+FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
+ OS << "Printing analysis results of CFA for function "
+ << "'" << F.getName() << "':"
+ << "\n";
+ AM.getResult<FunctionPropertiesAnalysis>(F).print(OS);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
index 8c8ccf04ebba..145baf82b65b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -44,7 +44,7 @@ STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
// An option to enable unsafe alias results from the GlobalsModRef analysis.
// When enabled, GlobalsModRef will provide no-alias results which in extremely
// rare cases may not be conservatively correct. In particular, in the face of
-// transforms which cause assymetry between how effective GetUnderlyingObject
+// transforms which cause asymmetry between how effective getUnderlyingObject
// is for two pointers, it may produce incorrect results.
//
// These unsafe results have been returned by GMR for many years without
@@ -367,7 +367,8 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
} else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
if (AnalyzeUsesOfPointer(I, Readers, Writers))
return true;
- } else if (Operator::getOpcode(I) == Instruction::BitCast) {
+ } else if (Operator::getOpcode(I) == Instruction::BitCast ||
+ Operator::getOpcode(I) == Instruction::AddrSpaceCast) {
if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
return true;
} else if (auto *Call = dyn_cast<CallBase>(I)) {
@@ -435,8 +436,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
continue;
// Check the value being stored.
- Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
- GV->getParent()->getDataLayout());
+ Value *Ptr = getUnderlyingObject(SI->getOperand(0));
if (!isAllocLikeFn(Ptr, &GetTLI(*SI->getFunction())))
return false; // Too hard to analyze.
@@ -661,12 +661,12 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
return false;
if (auto *LI = dyn_cast<LoadInst>(Input)) {
- Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL));
+ Inputs.push_back(getUnderlyingObject(LI->getPointerOperand()));
continue;
}
if (auto *SI = dyn_cast<SelectInst>(Input)) {
- const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
- const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+ const Value *LHS = getUnderlyingObject(SI->getTrueValue());
+ const Value *RHS = getUnderlyingObject(SI->getFalseValue());
if (Visited.insert(LHS).second)
Inputs.push_back(LHS);
if (Visited.insert(RHS).second)
@@ -675,7 +675,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
}
if (auto *PN = dyn_cast<PHINode>(Input)) {
for (const Value *Op : PN->incoming_values()) {
- Op = GetUnderlyingObject(Op, DL);
+ Op = getUnderlyingObject(Op);
if (Visited.insert(Op).second)
Inputs.push_back(Op);
}
@@ -774,7 +774,7 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
if (auto *LI = dyn_cast<LoadInst>(Input)) {
// A pointer loaded from a global would have been captured, and we know
// that the global is non-escaping, so no alias.
- const Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL);
+ const Value *Ptr = getUnderlyingObject(LI->getPointerOperand());
if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL))
// The load does not alias with GV.
continue;
@@ -782,8 +782,8 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
return false;
}
if (auto *SI = dyn_cast<SelectInst>(Input)) {
- const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
- const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+ const Value *LHS = getUnderlyingObject(SI->getTrueValue());
+ const Value *RHS = getUnderlyingObject(SI->getFalseValue());
if (Visited.insert(LHS).second)
Inputs.push_back(LHS);
if (Visited.insert(RHS).second)
@@ -792,7 +792,7 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
}
if (auto *PN = dyn_cast<PHINode>(Input)) {
for (const Value *Op : PN->incoming_values()) {
- Op = GetUnderlyingObject(Op, DL);
+ Op = getUnderlyingObject(Op);
if (Visited.insert(Op).second)
Inputs.push_back(Op);
}
@@ -827,8 +827,10 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB,
AAQueryInfo &AAQI) {
// Get the base object these pointers point to.
- const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL);
- const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL);
+ const Value *UV1 =
+ getUnderlyingObject(LocA.Ptr->stripPointerCastsAndInvariantGroups());
+ const Value *UV2 =
+ getUnderlyingObject(LocB.Ptr->stripPointerCastsAndInvariantGroups());
// If either of the underlying values is a global, they may be non-addr-taken
// globals, which we can answer queries about.
@@ -915,14 +917,15 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
// is based on GV, return the conservative result.
for (auto &A : Call->args()) {
SmallVector<const Value*, 4> Objects;
- GetUnderlyingObjects(A, Objects, DL);
+ getUnderlyingObjects(A, Objects);
// All objects must be identified.
if (!all_of(Objects, isIdentifiedObject) &&
// Try ::alias to see if all objects are known not to alias GV.
!all_of(Objects, [&](const Value *V) {
- return this->alias(MemoryLocation(V), MemoryLocation(GV), AAQI) ==
- NoAlias;
+ return this->alias(MemoryLocation::getBeforeOrAfter(V),
+ MemoryLocation::getBeforeOrAfter(GV),
+ AAQI) == NoAlias;
}))
return ConservativeResult;
@@ -942,7 +945,7 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
// If we are asking for mod/ref info of a direct call with a pointer to a
// global we are tracking, return information if we have it.
if (const GlobalValue *GV =
- dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
+ dyn_cast<GlobalValue>(getUnderlyingObject(Loc.Ptr)))
// If GV is internal to this IR and there is no function with local linkage
// that has had their address taken, keep looking for a tighter ModRefInfo.
if (GV->hasLocalLinkage() && !UnknownFunctionsWithLocalLinkage)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
new file mode 100644
index 000000000000..25443a667908
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -0,0 +1,937 @@
+//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// Implementation file for the IRSimilarityIdentifier for identifying
+// similarities in IR including the IRInstructionMapper.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/SuffixTree.h"
+
+using namespace llvm;
+using namespace IRSimilarity;
+
+IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
+ IRInstructionDataList &IDList)
+ : Inst(&I), Legal(Legality), IDL(&IDList) {
+ // We check for whether we have a comparison instruction. If it is, we
+ // find the "less than" version of the predicate for consistency for
+ // comparison instructions throught the program.
+ if (CmpInst *C = dyn_cast<CmpInst>(&I)) {
+ CmpInst::Predicate Predicate = predicateForConsistency(C);
+ if (Predicate != C->getPredicate())
+ RevisedPredicate = Predicate;
+ }
+
+ // Here we collect the operands and their types for determining whether
+ // the structure of the operand use matches between two different candidates.
+ for (Use &OI : I.operands()) {
+ if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) {
+ // If we have a CmpInst where the predicate is reversed, it means the
+ // operands must be reversed as well.
+ OperVals.insert(OperVals.begin(), OI.get());
+ continue;
+ }
+
+ OperVals.push_back(OI.get());
+ }
+}
+
+CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
+ switch (CI->getPredicate()) {
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE:
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGE:
+ case CmpInst::ICMP_UGE:
+ return CI->getSwappedPredicate();
+ default:
+ return CI->getPredicate();
+ }
+}
+
+CmpInst::Predicate IRInstructionData::getPredicate() const {
+ assert(isa<CmpInst>(Inst) &&
+ "Can only get a predicate from a compare instruction");
+
+ if (RevisedPredicate.hasValue())
+ return RevisedPredicate.getValue();
+
+ return cast<CmpInst>(Inst)->getPredicate();
+}
+
+static StringRef getCalledFunctionName(CallInst &CI) {
+ assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?");
+
+ return CI.getCalledFunction()->getName();
+}
+
+bool IRSimilarity::isClose(const IRInstructionData &A,
+ const IRInstructionData &B) {
+
+ if (!A.Legal || !B.Legal)
+ return false;
+
+ // Check if we are performing the same sort of operation on the same types
+ // but not on the same values.
+ if (!A.Inst->isSameOperationAs(B.Inst)) {
+ // If there is a predicate, this means that either there is a swapped
+ // predicate, or that the types are different, we want to make sure that
+ // the predicates are equivalent via swapping.
+ if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
+
+ if (A.getPredicate() != B.getPredicate())
+ return false;
+
+ // If the predicates are the same via swap, make sure that the types are
+ // still the same.
+ auto ZippedTypes = zip(A.OperVals, B.OperVals);
+
+ return all_of(
+ ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
+ return std::get<0>(R)->getType() == std::get<1>(R)->getType();
+ });
+ }
+
+ return false;
+ }
+
+ // Since any GEP Instruction operands after the first operand cannot be
+ // defined by a register, we must make sure that the operands after the first
+ // are the same in the two instructions
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
+ auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
+
+ // If the instructions do not have the same inbounds restrictions, we do
+ // not consider them the same.
+ if (GEP->isInBounds() != OtherGEP->isInBounds())
+ return false;
+
+ auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
+
+ // We increment here since we do not care about the first instruction,
+ // we only care about the following operands since they must be the
+ // exact same to be considered similar.
+ return all_of(drop_begin(ZippedOperands),
+ [](std::tuple<llvm::Use &, llvm::Use &> R) {
+ return std::get<0>(R) == std::get<1>(R);
+ });
+ }
+
+ // If the instructions are functions, we make sure that the function name is
+ // the same. We already know that the types are since is isSameOperationAs is
+ // true.
+ if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
+ CallInst *CIA = cast<CallInst>(A.Inst);
+ CallInst *CIB = cast<CallInst>(B.Inst);
+ if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0)
+ return false;
+ }
+
+ return true;
+}
+
+// TODO: This is the same as the MachineOutliner, and should be consolidated
+// into the same interface.
+void IRInstructionMapper::convertToUnsignedVec(
+ BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
+ std::vector<unsigned> &IntegerMapping) {
+ BasicBlock::iterator It = BB.begin();
+
+ std::vector<unsigned> IntegerMappingForBB;
+ std::vector<IRInstructionData *> InstrListForBB;
+
+ HaveLegalRange = false;
+ CanCombineWithPrevInstr = false;
+ AddedIllegalLastTime = true;
+
+ for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
+ switch (InstClassifier.visit(*It)) {
+ case InstrType::Legal:
+ mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
+ break;
+ case InstrType::Illegal:
+ mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
+ break;
+ case InstrType::Invisible:
+ AddedIllegalLastTime = false;
+ break;
+ }
+ }
+
+ if (HaveLegalRange) {
+ mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
+ for_each(InstrListForBB,
+ [this](IRInstructionData *ID) { this->IDL->push_back(*ID); });
+ llvm::append_range(InstrList, InstrListForBB);
+ llvm::append_range(IntegerMapping, IntegerMappingForBB);
+ }
+}
+
+// TODO: This is the same as the MachineOutliner, and should be consolidated
+// into the same interface.
+unsigned IRInstructionMapper::mapToLegalUnsigned(
+ BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
+ std::vector<IRInstructionData *> &InstrListForBB) {
+ // We added something legal, so we should unset the AddedLegalLastTime
+ // flag.
+ AddedIllegalLastTime = false;
+
+ // If we have at least two adjacent legal instructions (which may have
+ // invisible instructions in between), remember that.
+ if (CanCombineWithPrevInstr)
+ HaveLegalRange = true;
+ CanCombineWithPrevInstr = true;
+
+ // Get the integer for this instruction or give it the current
+ // LegalInstrNumber.
+ IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
+ InstrListForBB.push_back(ID);
+
+ // Add to the instruction list
+ bool WasInserted;
+ DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
+ ResultIt;
+ std::tie(ResultIt, WasInserted) =
+ InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
+ unsigned INumber = ResultIt->second;
+
+ // There was an insertion.
+ if (WasInserted)
+ LegalInstrNumber++;
+
+ IntegerMappingForBB.push_back(INumber);
+
+ // Make sure we don't overflow or use any integers reserved by the DenseMap.
+ assert(LegalInstrNumber < IllegalInstrNumber &&
+ "Instruction mapping overflow!");
+
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
+ "Tried to assign DenseMap tombstone or empty key to instruction.");
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
+ "Tried to assign DenseMap tombstone or empty key to instruction.");
+
+ return INumber;
+}
+
+IRInstructionData *
+IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
+ IRInstructionDataList &IDL) {
+ return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
+}
+
+IRInstructionDataList *
+IRInstructionMapper::allocateIRInstructionDataList() {
+ return new (IDLAllocator->Allocate()) IRInstructionDataList();
+}
+
+// TODO: This is the same as the MachineOutliner, and should be consolidated
+// into the same interface.
+unsigned IRInstructionMapper::mapToIllegalUnsigned(
+ BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
+ std::vector<IRInstructionData *> &InstrListForBB, bool End) {
+ // Can't combine an illegal instruction. Set the flag.
+ CanCombineWithPrevInstr = false;
+
+ // Only add one illegal number per range of legal numbers.
+ if (AddedIllegalLastTime)
+ return IllegalInstrNumber;
+
+ IRInstructionData *ID = nullptr;
+ if (!End)
+ ID = allocateIRInstructionData(*It, false, *IDL);
+ InstrListForBB.push_back(ID);
+
+ // Remember that we added an illegal number last time.
+ AddedIllegalLastTime = true;
+ unsigned INumber = IllegalInstrNumber;
+ IntegerMappingForBB.push_back(IllegalInstrNumber--);
+
+ assert(LegalInstrNumber < IllegalInstrNumber &&
+ "Instruction mapping overflow!");
+
+ assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ return INumber;
+}
+
+IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
+ IRInstructionData *FirstInstIt,
+ IRInstructionData *LastInstIt)
+ : StartIdx(StartIdx), Len(Len) {
+
+ assert(FirstInstIt != nullptr && "Instruction is nullptr!");
+ assert(LastInstIt != nullptr && "Instruction is nullptr!");
+ assert(StartIdx + Len > StartIdx &&
+ "Overflow for IRSimilarityCandidate range?");
+ assert(Len - 1 == static_cast<unsigned>(std::distance(
+ iterator(FirstInstIt), iterator(LastInstIt))) &&
+ "Length of the first and last IRInstructionData do not match the "
+ "given length");
+
+ // We iterate over the given instructions, and map each unique value
+ // to a unique number in the IRSimilarityCandidate ValueToNumber and
+ // NumberToValue maps. A constant get its own value globally, the individual
+ // uses of the constants are not considered to be unique.
+ //
+ // IR: Mapping Added:
+ // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
+ // %add2 = add i32 %a, %1 %add2 -> 4
+ // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
+ //
+ // when replace with global values, starting from 1, would be
+ //
+ // 3 = add i32 1, 2
+ // 4 = add i32 1, 3
+ // 6 = add i32 5, 2
+ unsigned LocalValNumber = 1;
+ IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
+ for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
+ // Map the operand values to an unsigned integer if it does not already
+ // have an unsigned integer assigned to it.
+ for (Value *Arg : ID->OperVals)
+ if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
+ ValueToNumber.try_emplace(Arg, LocalValNumber);
+ NumberToValue.try_emplace(LocalValNumber, Arg);
+ LocalValNumber++;
+ }
+
+ // Mapping the instructions to an unsigned integer if it is not already
+ // exist in the mapping.
+ if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
+ ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
+ NumberToValue.try_emplace(LocalValNumber, ID->Inst);
+ LocalValNumber++;
+ }
+ }
+
+ // Setting the first and last instruction data pointers for the candidate. If
+ // we got through the entire for loop without hitting an assert, we know
+ // that both of these instructions are not nullptrs.
+ FirstInst = FirstInstIt;
+ LastInst = LastInstIt;
+}
+
+bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B) {
+ if (A.getLength() != B.getLength())
+ return false;
+
+ auto InstrDataForBoth =
+ zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
+
+ return all_of(InstrDataForBoth,
+ [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
+ IRInstructionData &A = std::get<0>(R);
+ IRInstructionData &B = std::get<1>(R);
+ if (!A.Legal || !B.Legal)
+ return false;
+ return isClose(A, B);
+ });
+}
+
+/// Determine if one or more of the assigned global value numbers for the
+/// operands in \p TargetValueNumbers is in the current mapping set for operand
+/// numbers in \p SourceOperands. The set of possible corresponding global
+/// value numbers are replaced with the most recent version of compatible
+/// values.
+///
+/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
+/// value number for the source IRInstructionCandidate.
+/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
+/// IRSimilarityCandidate global value numbers to a set of possible numbers in
+/// the target.
+/// \param [in] SourceOperands - The operands in the original
+/// IRSimilarityCandidate in the current instruction.
+/// \param [in] TargetValueNumbers - The global value numbers of the operands in
+/// the corresponding Instruction in the other IRSimilarityCandidate.
+/// \returns true if there exists a possible mapping between the source
+/// Instruction operands and the target Instruction operands, and false if not.
+static bool checkNumberingAndReplaceCommutative(
+ const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
+ DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
+ ArrayRef<Value *> &SourceOperands,
+ DenseSet<unsigned> &TargetValueNumbers){
+
+ DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
+
+ unsigned ArgVal;
+ bool WasInserted;
+
+ // Iterate over the operands in the source IRSimilarityCandidate to determine
+ // whether there exists an operand in the other IRSimilarityCandidate that
+ // creates a valid mapping of Value to Value between the
+ // IRSimilarityCaniddates.
+ for (Value *V : SourceOperands) {
+ ArgVal = SourceValueToNumberMapping.find(V)->second;
+
+ std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
+ std::make_pair(ArgVal, TargetValueNumbers));
+
+ // Instead of finding a current mapping, we inserted a set. This means a
+ // mapping did not exist for the source Instruction operand, it has no
+ // current constraints we need to check.
+ if (WasInserted)
+ continue;
+
+ // If a mapping already exists for the source operand to the values in the
+ // other IRSimilarityCandidate we need to iterate over the items in other
+ // IRSimilarityCandidate's Instruction to determine whether there is a valid
+ // mapping of Value to Value.
+ DenseSet<unsigned> NewSet;
+ for (unsigned &Curr : ValueMappingIt->second)
+ // If we can find the value in the mapping, we add it to the new set.
+ if (TargetValueNumbers.contains(Curr))
+ NewSet.insert(Curr);
+
+ // If we could not find a Value, return 0.
+ if (NewSet.empty())
+ return false;
+
+ // Otherwise replace the old mapping with the newly constructed one.
+ if (NewSet.size() != ValueMappingIt->second.size())
+ ValueMappingIt->second.swap(NewSet);
+
+ // We have reached no conclusions about the mapping, and cannot remove
+ // any items from the other operands, so we move to check the next operand.
+ if (ValueMappingIt->second.size() != 1)
+ continue;
+
+
+ unsigned ValToRemove = *ValueMappingIt->second.begin();
+ // When there is only one item left in the mapping for and operand, remove
+ // the value from the other operands. If it results in there being no
+ // mapping, return false, it means the mapping is wrong
+ for (Value *InnerV : SourceOperands) {
+ if (V == InnerV)
+ continue;
+
+ unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
+ ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
+ if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
+ continue;
+
+ ValueMappingIt->second.erase(ValToRemove);
+ if (ValueMappingIt->second.empty())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// Determine if operand number \p TargetArgVal is in the current mapping set
+/// for operand number \p SourceArgVal.
+///
+/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
+/// value numbers from source IRSimilarityCandidate to target
+/// IRSimilarityCandidate.
+/// \param [in] SourceArgVal The global value number for an operand in the
+/// in the original candidate.
+/// \param [in] TargetArgVal The global value number for the corresponding
+/// operand in the other candidate.
+/// \returns True if there exists a mapping and false if not.
+bool checkNumberingAndReplace(
+ DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
+ unsigned SourceArgVal, unsigned TargetArgVal) {
+ // We are given two unsigned integers representing the global values of
+ // the operands in different IRSimilarityCandidates and a current mapping
+ // between the two.
+ //
+ // Source Operand GVN: 1
+ // Target Operand GVN: 2
+ // CurrentMapping: {1: {1, 2}}
+ //
+ // Since we have mapping, and the target operand is contained in the set, we
+ // update it to:
+ // CurrentMapping: {1: {2}}
+ // and can return true. But, if the mapping was
+ // CurrentMapping: {1: {3}}
+ // we would return false.
+
+ bool WasInserted;
+ DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
+
+ std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
+ std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
+
+ // If we created a new mapping, then we are done.
+ if (WasInserted)
+ return true;
+
+ // If there is more than one option in the mapping set, and the target value
+ // is included in the mapping set replace that set with one that only includes
+ // the target value, as it is the only valid mapping via the non commutative
+ // instruction.
+
+ DenseSet<unsigned> &TargetSet = Val->second;
+ if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
+ TargetSet.clear();
+ TargetSet.insert(TargetArgVal);
+ return true;
+ }
+
+ // Return true if we can find the value in the set.
+ return TargetSet.contains(TargetArgVal);
+}
+
+bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
+ OperandMapping A, OperandMapping B) {
+ // Iterators to keep track of where we are in the operands for each
+ // Instruction.
+ ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
+ ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
+ unsigned OperandLength = A.OperVals.size();
+
+ // For each operand, get the value numbering and ensure it is consistent.
+ for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
+ unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
+ unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
+
+ // Attempt to add a set with only the target value. If there is no mapping
+ // we can create it here.
+ //
+ // For an instruction like a subtraction:
+ // IRSimilarityCandidateA: IRSimilarityCandidateB:
+ // %resultA = sub %a, %b %resultB = sub %d, %e
+ //
+ // We map %a -> %d and %b -> %e.
+ //
+ // And check to see whether their mapping is consistent in
+ // checkNumberingAndReplace.
+
+ if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
+ return false;
+
+ if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
+ return false;
+ }
+ return true;
+}
+
+bool IRSimilarityCandidate::compareCommutativeOperandMapping(
+ OperandMapping A, OperandMapping B) {
+ DenseSet<unsigned> ValueNumbersA;
+ DenseSet<unsigned> ValueNumbersB;
+
+ ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
+ ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
+ unsigned OperandLength = A.OperVals.size();
+
+ // Find the value number sets for the operands.
+ for (unsigned Idx = 0; Idx < OperandLength;
+ Idx++, VItA++, VItB++) {
+ ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
+ ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
+ }
+
+ // Iterate over the operands in the first IRSimilarityCandidate and make sure
+ // there exists a possible mapping with the operands in the second
+ // IRSimilarityCandidate.
+ if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
+ A.ValueNumberMapping, A.OperVals,
+ ValueNumbersB))
+ return false;
+
+ // Iterate over the operands in the second IRSimilarityCandidate and make sure
+ // there exists a possible mapping with the operands in the first
+ // IRSimilarityCandidate.
+ if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
+ B.ValueNumberMapping, B.OperVals,
+ ValueNumbersA))
+ return false;
+
+ return true;
+}
+
+bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B) {
+ if (A.getLength() != B.getLength())
+ return false;
+
+ if (A.ValueToNumber.size() != B.ValueToNumber.size())
+ return false;
+
+ iterator ItA = A.begin();
+ iterator ItB = B.begin();
+
+ // These sets create a create a mapping between the values in one candidate
+ // to values in the other candidate. If we create a set with one element,
+ // and that same element maps to the original element in the candidate
+ // we have a good mapping.
+ DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
+ DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
+ DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
+
+ bool WasInserted;
+
+ // Iterate over the instructions contained in each candidate
+ unsigned SectionLength = A.getStartIdx() + A.getLength();
+ for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
+ ItA++, ItB++, Loc++) {
+ // Make sure the instructions are similar to one another.
+ if (!isClose(*ItA, *ItB))
+ return false;
+
+ Instruction *IA = ItA->Inst;
+ Instruction *IB = ItB->Inst;
+
+ if (!ItA->Legal || !ItB->Legal)
+ return false;
+
+ // Get the operand sets for the instructions.
+ ArrayRef<Value *> OperValsA = ItA->OperVals;
+ ArrayRef<Value *> OperValsB = ItB->OperVals;
+
+ unsigned InstValA = A.ValueToNumber.find(IA)->second;
+ unsigned InstValB = B.ValueToNumber.find(IB)->second;
+
+ // Ensure that the mappings for the instructions exists.
+ std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
+ std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
+ if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
+ return false;
+
+ std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
+ std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
+ if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
+ return false;
+
+ // We have different paths for commutative instructions and non-commutative
+ // instructions since commutative instructions could allow multiple mappings
+ // to certain values.
+ if (IA->isCommutative() && !isa<FPMathOperator>(IA)) {
+ if (!compareCommutativeOperandMapping(
+ {A, OperValsA, ValueNumberMappingA},
+ {B, OperValsB, ValueNumberMappingB}))
+ return false;
+ continue;
+ }
+
+ // Handle the non-commutative cases.
+ if (!compareNonCommutativeOperandMapping(
+ {A, OperValsA, ValueNumberMappingA},
+ {B, OperValsB, ValueNumberMappingB}))
+ return false;
+ }
+ return true;
+}
+
+bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B) {
+ auto DoesOverlap = [](const IRSimilarityCandidate &X,
+ const IRSimilarityCandidate &Y) {
+ // Check:
+ // XXXXXX X starts before Y ends
+ // YYYYYYY Y starts after X starts
+ return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
+ };
+
+ return DoesOverlap(A, B) || DoesOverlap(B, A);
+}
+
+void IRSimilarityIdentifier::populateMapper(
+ Module &M, std::vector<IRInstructionData *> &InstrList,
+ std::vector<unsigned> &IntegerMapping) {
+
+ std::vector<IRInstructionData *> InstrListForModule;
+ std::vector<unsigned> IntegerMappingForModule;
+ // Iterate over the functions in the module to map each Instruction in each
+ // BasicBlock to an unsigned integer.
+ for (Function &F : M) {
+
+ if (F.empty())
+ continue;
+
+ for (BasicBlock &BB : F) {
+
+ if (BB.sizeWithoutDebug() < 2)
+ continue;
+
+ // BB has potential to have similarity since it has a size greater than 2
+ // and can therefore match other regions greater than 2. Map it to a list
+ // of unsigned integers.
+ Mapper.convertToUnsignedVec(BB, InstrListForModule,
+ IntegerMappingForModule);
+ }
+ }
+
+ // Insert the InstrListForModule at the end of the overall InstrList so that
+ // we can have a long InstrList for the entire set of Modules being analyzed.
+ llvm::append_range(InstrList, InstrListForModule);
+ // Do the same as above, but for IntegerMapping.
+ llvm::append_range(IntegerMapping, IntegerMappingForModule);
+}
+
+void IRSimilarityIdentifier::populateMapper(
+ ArrayRef<std::unique_ptr<Module>> &Modules,
+ std::vector<IRInstructionData *> &InstrList,
+ std::vector<unsigned> &IntegerMapping) {
+
+ // Iterate over, and map the instructions in each module.
+ for (const std::unique_ptr<Module> &M : Modules)
+ populateMapper(*M, InstrList, IntegerMapping);
+}
+
+/// From a repeated subsequence, find all the different instances of the
+/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
+/// the IRInstructionData in subsequence.
+///
+/// \param [in] Mapper - The instruction mapper for sanity checks.
+/// \param [in] InstrList - The vector that holds the instruction data.
+/// \param [in] IntegerMapping - The vector that holds the mapped integers.
+/// \param [out] CandsForRepSubstring - The vector to store the generated
+/// IRSimilarityCandidates.
+static void createCandidatesFromSuffixTree(
+ IRInstructionMapper Mapper, std::vector<IRInstructionData *> &InstrList,
+ std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
+ std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
+
+ unsigned StringLen = RS.Length;
+
+ // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
+ for (const unsigned &StartIdx : RS.StartIndices) {
+ unsigned EndIdx = StartIdx + StringLen - 1;
+
+ // Check that this subsequence does not contain an illegal instruction.
+ bool ContainsIllegal = false;
+ for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
+ unsigned Key = IntegerMapping[CurrIdx];
+ if (Key > Mapper.IllegalInstrNumber) {
+ ContainsIllegal = true;
+ break;
+ }
+ }
+
+ // If we have an illegal instruction, we should not create an
+ // IRSimilarityCandidate for this region.
+ if (ContainsIllegal)
+ continue;
+
+ // We are getting iterators to the instructions in this region of code
+ // by advancing the start and end indices from the start of the
+ // InstrList.
+ std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
+ std::advance(StartIt, StartIdx);
+ std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
+ std::advance(EndIt, EndIdx);
+
+ CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
+ }
+}
+
+/// From the list of IRSimilarityCandidates, perform a comparison between each
+/// IRSimilarityCandidate to determine if there are overlapping
+/// IRInstructionData, or if they do not have the same structure.
+///
+/// \param [in] CandsForRepSubstring - The vector containing the
+/// IRSimilarityCandidates.
+/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
+/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
+/// vector are structurally similar to one another.
+static void findCandidateStructures(
+ std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
+ DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
+ std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
+ InnerCandEndIt;
+
+ // IRSimilarityCandidates each have a structure for operand use. It is
+ // possible that two instances of the same subsequences have different
+ // structure. Each type of structure found is assigned a number. This
+ // DenseMap maps an IRSimilarityCandidate to which type of similarity
+ // discovered it fits within.
+ DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
+
+ // Find the compatibility from each candidate to the others to determine
+ // which candidates overlap and which have the same structure by mapping
+ // each structure to a different group.
+ bool SameStructure;
+ bool Inserted;
+ unsigned CurrentGroupNum = 0;
+ unsigned OuterGroupNum;
+ DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
+ DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
+ DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
+
+ // Iterate over the candidates to determine its structural and overlapping
+ // compatibility with other instructions
+ for (CandIt = CandsForRepSubstring.begin(),
+ CandEndIt = CandsForRepSubstring.end();
+ CandIt != CandEndIt; CandIt++) {
+
+ // Determine if it has an assigned structural group already.
+ CandToGroupIt = CandToGroup.find(&*CandIt);
+ if (CandToGroupIt == CandToGroup.end()) {
+ // If not, we assign it one, and add it to our mapping.
+ std::tie(CandToGroupIt, Inserted) =
+ CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
+ }
+
+ // Get the structural group number from the iterator.
+ OuterGroupNum = CandToGroupIt->second;
+
+ // Check if we already have a list of IRSimilarityCandidates for the current
+ // structural group. Create one if one does not exist.
+ CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
+ if (CurrentGroupPair == StructuralGroups.end())
+ std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
+ std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
+
+ // Iterate over the IRSimilarityCandidates following the current
+ // IRSimilarityCandidate in the list to determine whether the two
+ // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
+ // of IRSimilarityCandidates.
+ for (InnerCandIt = std::next(CandIt),
+ InnerCandEndIt = CandsForRepSubstring.end();
+ InnerCandIt != InnerCandEndIt; InnerCandIt++) {
+
+ // We check if the inner item has a group already, if it does, we skip it.
+ CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
+ if (CandToGroupItInner != CandToGroup.end())
+ continue;
+
+ // Otherwise we determine if they have the same structure and add it to
+ // vector if they match.
+ SameStructure =
+ IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt);
+ if (!SameStructure)
+ continue;
+
+ CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
+ CurrentGroupPair->second.push_back(*InnerCandIt);
+ }
+ }
+}
+
+void IRSimilarityIdentifier::findCandidates(
+ std::vector<IRInstructionData *> &InstrList,
+ std::vector<unsigned> &IntegerMapping) {
+ SuffixTree ST(IntegerMapping);
+
+ std::vector<IRSimilarityCandidate> CandsForRepSubstring;
+ std::vector<SimilarityGroup> NewCandidateGroups;
+
+ DenseMap<unsigned, SimilarityGroup> StructuralGroups;
+
+ // Iterate over the subsequences found by the Suffix Tree to create
+ // IRSimilarityCandidates for each repeated subsequence and determine which
+ // instances are structurally similar to one another.
+ for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
+ createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, *It,
+ CandsForRepSubstring);
+
+ if (CandsForRepSubstring.size() < 2)
+ continue;
+
+ findCandidateStructures(CandsForRepSubstring, StructuralGroups);
+ for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
+ // We only add the group if it contains more than one
+ // IRSimilarityCandidate. If there is only one, that means there is no
+ // other repeated subsequence with the same structure.
+ if (Group.second.size() > 1)
+ SimilarityCandidates->push_back(Group.second);
+
+ CandsForRepSubstring.clear();
+ StructuralGroups.clear();
+ NewCandidateGroups.clear();
+ }
+}
+
+SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
+ ArrayRef<std::unique_ptr<Module>> Modules) {
+ resetSimilarityCandidates();
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> IntegerMapping;
+
+ populateMapper(Modules, InstrList, IntegerMapping);
+ findCandidates(InstrList, IntegerMapping);
+
+ return SimilarityCandidates.getValue();
+}
+
+SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
+ resetSimilarityCandidates();
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> IntegerMapping;
+
+ populateMapper(M, InstrList, IntegerMapping);
+ findCandidates(InstrList, IntegerMapping);
+
+ return SimilarityCandidates.getValue();
+}
+
+INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
+ "ir-similarity-identifier", false, true)
+
+IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
+ : ModulePass(ID) {
+ initializeIRSimilarityIdentifierWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
+ IRSI.reset(new IRSimilarityIdentifier(M));
+ return false;
+}
+
+bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
+ IRSI.reset();
+ return false;
+}
+
+bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
+ // All the real work is done in the constructor for the pass.
+ IRSI.reset(new IRSimilarityIdentifier(M));
+ return false;
+}
+
+AnalysisKey IRSimilarityAnalysis::Key;
+IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
+ ModuleAnalysisManager &) {
+
+ return IRSimilarityIdentifier(M);
+}
+
+PreservedAnalyses
+IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
+ IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
+ Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity();
+
+ for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
+ OS << CandVec.size() << " candidates of length "
+ << CandVec.begin()->getLength() << ". Found in: \n";
+ for (IRSimilarityCandidate &Cand : CandVec) {
+ OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
+ << ", Basic Block: ";
+ if (Cand.front()->Inst->getParent()->getName().str() == "")
+ OS << "(unnamed)\n";
+ else
+ OS << Cand.front()->Inst->getParent()->getName().str() << "\n";
+ }
+ }
+
+ return PreservedAnalyses::all();
+}
+
+char IRSimilarityIdentifierWrapperPass::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
index 6686848d75c9..94a24ccf2155 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
@@ -12,7 +12,6 @@
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/ADT/ScopeExit.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/DomTreeUpdater.h"
@@ -48,33 +47,36 @@ bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
return true;
}
-bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
+bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
switch (Kind) {
default:
break;
- case RK_IntegerAdd:
- case RK_IntegerMult:
- case RK_IntegerOr:
- case RK_IntegerAnd:
- case RK_IntegerXor:
- case RK_IntegerMinMax:
+ case RecurKind::Add:
+ case RecurKind::Mul:
+ case RecurKind::Or:
+ case RecurKind::And:
+ case RecurKind::Xor:
+ case RecurKind::SMax:
+ case RecurKind::SMin:
+ case RecurKind::UMax:
+ case RecurKind::UMin:
return true;
}
return false;
}
-bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
- return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
+bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) {
+ return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
}
-bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
+bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) {
switch (Kind) {
default:
break;
- case RK_IntegerAdd:
- case RK_IntegerMult:
- case RK_FloatAdd:
- case RK_FloatMult:
+ case RecurKind::Add:
+ case RecurKind::Mul:
+ case RecurKind::FAdd:
+ case RecurKind::FMul:
return true;
}
return false;
@@ -187,7 +189,7 @@ static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
}
}
-bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
+bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
Loop *TheLoop, bool HasFunNoNaNAttr,
RecurrenceDescriptor &RedDes,
DemandedBits *DB,
@@ -241,11 +243,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
if (RecurrenceType->isFloatingPointTy()) {
if (!isFloatingPointRecurrenceKind(Kind))
return false;
- } else {
+ } else if (RecurrenceType->isIntegerTy()) {
if (!isIntegerRecurrenceKind(Kind))
return false;
if (isArithmeticRecurrenceKind(Kind))
Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+ } else {
+ // Pointer min/max may exist, but it is not supported as a reduction op.
+ return false;
}
Worklist.push_back(Start);
@@ -271,8 +276,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// * An instruction type other than PHI or the reduction operation.
// * A PHI in the header other than the initial PHI.
while (!Worklist.empty()) {
- Instruction *Cur = Worklist.back();
- Worklist.pop_back();
+ Instruction *Cur = Worklist.pop_back_val();
// No Users.
// If the instruction has no users then this is a broken chain and can't be
@@ -303,29 +307,35 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// FIXME: FMF is allowed on phi, but propagation is not handled correctly.
if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi)
FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
+ // Update this reduction kind if we matched a new instruction.
+ // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
+ // state accurate while processing the worklist?
+ if (ReduxDesc.getRecKind() != RecurKind::None)
+ Kind = ReduxDesc.getRecKind();
}
bool IsASelect = isa<SelectInst>(Cur);
// A conditional reduction operation must only have 2 or less uses in
// VisitedInsts.
- if (IsASelect && (Kind == RK_FloatAdd || Kind == RK_FloatMult) &&
+ if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
hasMultipleUsesOf(Cur, VisitedInsts, 2))
return false;
// A reduction operation must only have one use of the reduction value.
- if (!IsAPhi && !IsASelect && Kind != RK_IntegerMinMax &&
- Kind != RK_FloatMinMax && hasMultipleUsesOf(Cur, VisitedInsts, 1))
+ if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
+ hasMultipleUsesOf(Cur, VisitedInsts, 1))
return false;
// All inputs to a PHI node must be a reduction value.
if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
return false;
- if (Kind == RK_IntegerMinMax &&
+ if (isIntMinMaxRecurrenceKind(Kind) &&
(isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
- if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
+ if (isFPMinMaxRecurrenceKind(Kind) &&
+ (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
// Check whether we found a reduction operator.
@@ -390,8 +400,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// This means we have seen one but not the other instruction of the
// pattern or more than just a select and cmp.
- if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
- NumCmpSelectPatternInst != 2)
+ if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2)
return false;
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
@@ -409,7 +418,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// can be ignore in the cost model. If we compute a different type than we
// did when evaluating the 'and', the 'and' will not be eliminated, and we
// will end up with different kinds of operations in the recurrence
- // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
+ // expression (e.g., IntegerAND, IntegerADD). We give up if this is
// the case.
//
// The vectorizer relies on InstCombine to perform the actual
@@ -437,6 +446,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// instructions that are a part of the reduction. The vectorizer cost
// model could then apply the recurrence type to these instructions,
// without needing a white list of instructions to ignore.
+ // This may also be useful for the inloop reductions, if it can be
+ // kept simple enough.
collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
}
@@ -447,61 +458,50 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// is saved as part of the RecurrenceDescriptor.
// Save the description of this reduction variable.
- RecurrenceDescriptor RD(
- RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getMinMaxKind(),
- ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
+ RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF,
+ ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType,
+ IsSigned, CastInsts);
RedDes = RD;
return true;
}
-/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
-/// pattern corresponding to a min(X, Y) or max(X, Y).
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
-
- assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
- "Expect a select instruction");
- Instruction *Cmp = nullptr;
- SelectInst *Select = nullptr;
+RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
+ const InstDesc &Prev) {
+ assert((isa<CmpInst>(I) || isa<SelectInst>(I)) &&
+ "Expected a cmp or select instruction");
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
- if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
- if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
- return InstDesc(false, I);
- return InstDesc(Select, Prev.getMinMaxKind());
+ CmpInst::Predicate Pred;
+ if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
+ if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
+ return InstDesc(Select, Prev.getRecKind());
}
- // Only handle single use cases for now.
- if (!(Select = dyn_cast<SelectInst>(I)))
- return InstDesc(false, I);
- if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
- !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
- return InstDesc(false, I);
- if (!Cmp->hasOneUse())
+ // Only match select with single use cmp condition.
+ if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
+ m_Value())))
return InstDesc(false, I);
- Value *CmpLeft;
- Value *CmpRight;
-
// Look for a min/max pattern.
- if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_UIntMin);
- else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_UIntMax);
- else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_SIntMax);
- else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_SIntMin);
- else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMin);
- else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMax);
- else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMin);
- else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMax);
+ if (match(I, m_UMin(m_Value(), m_Value())))
+ return InstDesc(I, RecurKind::UMin);
+ if (match(I, m_UMax(m_Value(), m_Value())))
+ return InstDesc(I, RecurKind::UMax);
+ if (match(I, m_SMax(m_Value(), m_Value())))
+ return InstDesc(I, RecurKind::SMax);
+ if (match(I, m_SMin(m_Value(), m_Value())))
+ return InstDesc(I, RecurKind::SMin);
+ if (match(I, m_OrdFMin(m_Value(), m_Value())))
+ return InstDesc(I, RecurKind::FMin);
+ if (match(I, m_OrdFMax(m_Value(), m_Value())))
+ return InstDesc(I, RecurKind::FMax);
+ if (match(I, m_UnordFMin(m_Value(), m_Value())))
+ return InstDesc(I, RecurKind::FMin);
+ if (match(I, m_UnordFMax(m_Value(), m_Value())))
+ return InstDesc(I, RecurKind::FMax);
return InstDesc(false, I);
}
@@ -516,8 +516,7 @@ RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
/// %add = fadd %0, %sum.1
/// %sum.2 = select %cmp, %add, %sum.1
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isConditionalRdxPattern(
- RecurrenceKind Kind, Instruction *I) {
+RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {
SelectInst *SI = dyn_cast<SelectInst>(I);
if (!SI)
return InstDesc(false, I);
@@ -545,16 +544,16 @@ RecurrenceDescriptor::isConditionalRdxPattern(
if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
I1->isFast())
- return InstDesc(Kind == RK_FloatAdd, SI);
+ return InstDesc(Kind == RecurKind::FAdd, SI);
if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
- return InstDesc(Kind == RK_FloatMult, SI);
+ return InstDesc(Kind == RecurKind::FMul, SI);
return InstDesc(false, I);
}
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr) {
Instruction *UAI = Prev.getUnsafeAlgebraInst();
if (!UAI && isa<FPMathOperator>(I) && !I->hasAllowReassoc())
@@ -564,31 +563,32 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
default:
return InstDesc(false, I);
case Instruction::PHI:
- return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
+ return InstDesc(I, Prev.getRecKind(), Prev.getUnsafeAlgebraInst());
case Instruction::Sub:
case Instruction::Add:
- return InstDesc(Kind == RK_IntegerAdd, I);
+ return InstDesc(Kind == RecurKind::Add, I);
case Instruction::Mul:
- return InstDesc(Kind == RK_IntegerMult, I);
+ return InstDesc(Kind == RecurKind::Mul, I);
case Instruction::And:
- return InstDesc(Kind == RK_IntegerAnd, I);
+ return InstDesc(Kind == RecurKind::And, I);
case Instruction::Or:
- return InstDesc(Kind == RK_IntegerOr, I);
+ return InstDesc(Kind == RecurKind::Or, I);
case Instruction::Xor:
- return InstDesc(Kind == RK_IntegerXor, I);
+ return InstDesc(Kind == RecurKind::Xor, I);
+ case Instruction::FDiv:
case Instruction::FMul:
- return InstDesc(Kind == RK_FloatMult, I, UAI);
+ return InstDesc(Kind == RecurKind::FMul, I, UAI);
case Instruction::FSub:
case Instruction::FAdd:
- return InstDesc(Kind == RK_FloatAdd, I, UAI);
+ return InstDesc(Kind == RecurKind::FAdd, I, UAI);
case Instruction::Select:
- if (Kind == RK_FloatAdd || Kind == RK_FloatMult)
+ if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul)
return isConditionalRdxPattern(Kind, I);
LLVM_FALLTHROUGH;
case Instruction::FCmp:
case Instruction::ICmp:
- if (Kind != RK_IntegerMinMax &&
- (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+ if (!isIntMinMaxRecurrenceKind(Kind) &&
+ (!HasFunNoNaNAttr || !isFPMinMaxRecurrenceKind(Kind)))
return InstDesc(false, I);
return isMinMaxSelectCmpPattern(I, Prev);
}
@@ -618,50 +618,69 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
bool HasFunNoNaNAttr =
F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
- if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ if (AddReductionVar(Phi, RecurKind::Add, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ if (AddReductionVar(Phi, RecurKind::Or, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ if (AddReductionVar(Phi, RecurKind::And, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
+ if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, HasFunNoNaNAttr, RedDes,
DB, AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, HasFunNoNaNAttr, RedDes,
+ DB, AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, HasFunNoNaNAttr, RedDes,
+ DB, AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, HasFunNoNaNAttr, RedDes,
+ DB, AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, HasFunNoNaNAttr, RedDes,
+ DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, HasFunNoNaNAttr, RedDes,
+ DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi
- << "\n");
+ if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, HasFunNoNaNAttr, RedDes,
+ DB, AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, HasFunNoNaNAttr, RedDes,
+ DB, AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
return true;
}
// Not a reduction of known type.
@@ -745,57 +764,143 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
/// This function returns the identity element (or neutral element) for
/// the operation K.
-Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
- Type *Tp) {
+Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp) {
switch (K) {
- case RK_IntegerXor:
- case RK_IntegerAdd:
- case RK_IntegerOr:
+ case RecurKind::Xor:
+ case RecurKind::Add:
+ case RecurKind::Or:
// Adding, Xoring, Oring zero to a number does not change it.
return ConstantInt::get(Tp, 0);
- case RK_IntegerMult:
+ case RecurKind::Mul:
// Multiplying a number by 1 does not change it.
return ConstantInt::get(Tp, 1);
- case RK_IntegerAnd:
+ case RecurKind::And:
// AND-ing a number with an all-1 value does not change it.
return ConstantInt::get(Tp, -1, true);
- case RK_FloatMult:
+ case RecurKind::FMul:
// Multiplying a number by 1 does not change it.
return ConstantFP::get(Tp, 1.0L);
- case RK_FloatAdd:
+ case RecurKind::FAdd:
// Adding zero to a number does not change it.
return ConstantFP::get(Tp, 0.0L);
+ case RecurKind::UMin:
+ return ConstantInt::get(Tp, -1);
+ case RecurKind::UMax:
+ return ConstantInt::get(Tp, 0);
+ case RecurKind::SMin:
+ return ConstantInt::get(Tp,
+ APInt::getSignedMaxValue(Tp->getIntegerBitWidth()));
+ case RecurKind::SMax:
+ return ConstantInt::get(Tp,
+ APInt::getSignedMinValue(Tp->getIntegerBitWidth()));
+ case RecurKind::FMin:
+ return ConstantFP::getInfinity(Tp, true);
+ case RecurKind::FMax:
+ return ConstantFP::getInfinity(Tp, false);
default:
llvm_unreachable("Unknown recurrence kind");
}
}
-/// This function translates the recurrence kind to an LLVM binary operator.
-unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
+unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
switch (Kind) {
- case RK_IntegerAdd:
+ case RecurKind::Add:
return Instruction::Add;
- case RK_IntegerMult:
+ case RecurKind::Mul:
return Instruction::Mul;
- case RK_IntegerOr:
+ case RecurKind::Or:
return Instruction::Or;
- case RK_IntegerAnd:
+ case RecurKind::And:
return Instruction::And;
- case RK_IntegerXor:
+ case RecurKind::Xor:
return Instruction::Xor;
- case RK_FloatMult:
+ case RecurKind::FMul:
return Instruction::FMul;
- case RK_FloatAdd:
+ case RecurKind::FAdd:
return Instruction::FAdd;
- case RK_IntegerMinMax:
+ case RecurKind::SMax:
+ case RecurKind::SMin:
+ case RecurKind::UMax:
+ case RecurKind::UMin:
return Instruction::ICmp;
- case RK_FloatMinMax:
+ case RecurKind::FMax:
+ case RecurKind::FMin:
return Instruction::FCmp;
default:
llvm_unreachable("Unknown recurrence operation");
}
}
+SmallVector<Instruction *, 4>
+RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
+ SmallVector<Instruction *, 4> ReductionOperations;
+ unsigned RedOp = getOpcode(Kind);
+
+ // Search down from the Phi to the LoopExitInstr, looking for instructions
+ // with a single user of the correct type for the reduction.
+
+ // Note that we check that the type of the operand is correct for each item in
+ // the chain, including the last (the loop exit value). This can come up from
+ // sub, which would otherwise be treated as an add reduction. MinMax also need
+ // to check for a pair of icmp/select, for which we use getNextInstruction and
+ // isCorrectOpcode functions to step the right number of instruction, and
+ // check the icmp/select pair.
+ // FIXME: We also do not attempt to look through Phi/Select's yet, which might
+ // be part of the reduction chain, or attempt to looks through And's to find a
+ // smaller bitwidth. Subs are also currently not allowed (which are usually
+ // treated as part of a add reduction) as they are expected to generally be
+ // more expensive than out-of-loop reductions, and need to be costed more
+ // carefully.
+ unsigned ExpectedUses = 1;
+ if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)
+ ExpectedUses = 2;
+
+ auto getNextInstruction = [&](Instruction *Cur) {
+ if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
+ // We are expecting a icmp/select pair, which we go to the next select
+ // instruction if we can. We already know that Cur has 2 uses.
+ if (isa<SelectInst>(*Cur->user_begin()))
+ return cast<Instruction>(*Cur->user_begin());
+ else
+ return cast<Instruction>(*std::next(Cur->user_begin()));
+ }
+ return cast<Instruction>(*Cur->user_begin());
+ };
+ auto isCorrectOpcode = [&](Instruction *Cur) {
+ if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
+ Value *LHS, *RHS;
+ return SelectPatternResult::isMinOrMax(
+ matchSelectPattern(Cur, LHS, RHS).Flavor);
+ }
+ return Cur->getOpcode() == RedOp;
+ };
+
+ // The loop exit instruction we check first (as a quick test) but add last. We
+ // check the opcode is correct (and dont allow them to be Subs) and that they
+ // have expected to have the expected number of uses. They will have one use
+ // from the phi and one from a LCSSA value, no matter the type.
+ if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2))
+ return {};
+
+ // Check that the Phi has one (or two for min/max) uses.
+ if (!Phi->hasNUses(ExpectedUses))
+ return {};
+ Instruction *Cur = getNextInstruction(Phi);
+
+ // Each other instruction in the chain should have the expected number of uses
+ // and be the correct opcode.
+ while (Cur != LoopExitInstr) {
+ if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
+ return {};
+
+ ReductionOperations.push_back(Cur);
+ Cur = getNextInstruction(Cur);
+ }
+
+ ReductionOperations.push_back(Cur);
+ return ReductionOperations;
+}
+
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
const SCEV *Step, BinaryOperator *BOp,
SmallVectorImpl<Instruction *> *Casts)
@@ -834,13 +939,6 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
}
}
-int InductionDescriptor::getConsecutiveDirection() const {
- ConstantInt *ConstStep = getConstIntStepValue();
- if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
- return ConstStep->getSExtValue();
- return 0;
-}
-
ConstantInt *InductionDescriptor::getConstIntStepValue() const {
if (isa<SCEVConstant>(Step))
return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm-project/llvm/lib/Analysis/ImportedFunctionsInliningStatistics.cpp
index ea93f99d69e3..a7b5fda237d1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ImportedFunctionsInliningStatistics.cpp
@@ -9,10 +9,11 @@
// ThinLTO.
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -20,6 +21,15 @@
#include <sstream>
using namespace llvm;
+cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
+ "inliner-function-import-stats",
+ cl::init(InlinerFunctionImportStatsOpts::No),
+ cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic",
+ "basic statistics"),
+ clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
+ "printing of statistics for each inlined function")),
+ cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
+
ImportedFunctionsInliningStatistics::InlineGraphNode &
ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) {
@@ -186,7 +196,7 @@ ImportedFunctionsInliningStatistics::SortedNodesTy
ImportedFunctionsInliningStatistics::getSortedNodes() {
SortedNodesTy SortedNodes;
SortedNodes.reserve(NodesMap.size());
- for (const NodesMapTy::value_type& Node : NodesMap)
+ for (const NodesMapTy::value_type &Node : NodesMap)
SortedNodes.push_back(&Node);
llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index c32aa0340ceb..b112ed2e4439 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -22,9 +22,7 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include <string>
-#include <utility>
-#include <vector>
+#include <memory>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp
index 74a536d1ce2f..9a2276a16132 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -48,45 +49,33 @@ static cl::opt<int>
cl::desc("Scale to limit the cost of inline deferral"),
cl::init(2), cl::Hidden);
-namespace {
-class DefaultInlineAdvice : public InlineAdvice {
-public:
- DefaultInlineAdvice(DefaultInlineAdvisor *Advisor, CallBase &CB,
- Optional<InlineCost> OIC, OptimizationRemarkEmitter &ORE)
- : InlineAdvice(Advisor, CB, ORE, OIC.hasValue()), OriginalCB(&CB),
- OIC(OIC) {}
-
-private:
- void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
- using namespace ore;
- llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) +
- "; " + inlineCostStr(*OIC));
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller) << ": "
- << NV("Reason", Result.getFailureReason());
- });
- }
+extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
- void recordInliningWithCalleeDeletedImpl() override {
- emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
- }
+void DefaultInlineAdvice::recordUnsuccessfulInliningImpl(
+ const InlineResult &Result) {
+ using namespace ore;
+ llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) +
+ "; " + inlineCostStr(*OIC));
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+ << NV("Callee", Callee) << " will not be inlined into "
+ << NV("Caller", Caller) << ": "
+ << NV("Reason", Result.getFailureReason());
+ });
+}
- void recordInliningImpl() override {
+void DefaultInlineAdvice::recordInliningWithCalleeDeletedImpl() {
+ if (EmitRemarks)
emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
- }
-
-private:
- CallBase *const OriginalCB;
- Optional<InlineCost> OIC;
-};
+}
-} // namespace
+void DefaultInlineAdvice::recordInliningImpl() {
+ if (EmitRemarks)
+ emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+}
-llvm::Optional<llvm::InlineCost>
-getDefaultInlineAdvice(CallBase &CB, FunctionAnalysisManager &FAM,
- const InlineParams &Params) {
+llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice(
+ CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params) {
Function &Caller = *CB.getCaller();
ProfileSummaryInfo *PSI =
FAM.getResult<ModuleAnalysisManagerFunctionProxy>(Caller)
@@ -114,11 +103,11 @@ getDefaultInlineAdvice(CallBase &CB, FunctionAnalysisManager &FAM,
GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
};
return llvm::shouldInline(CB, GetInlineCost, ORE,
- Params.EnableDeferral.hasValue() &&
- Params.EnableDeferral.getValue());
+ Params.EnableDeferral.getValueOr(false));
}
-std::unique_ptr<InlineAdvice> DefaultInlineAdvisor::getAdvice(CallBase &CB) {
+std::unique_ptr<InlineAdvice>
+DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) {
auto OIC = getDefaultInlineAdvice(CB, FAM, Params);
return std::make_unique<DefaultInlineAdvice>(
this, CB, OIC,
@@ -144,8 +133,20 @@ void InlineAdvisor::freeDeletedFunctions() {
DeletedFunctions.clear();
}
+void InlineAdvice::recordInlineStatsIfNeeded() {
+ if (Advisor->ImportedFunctionsStats)
+ Advisor->ImportedFunctionsStats->recordInline(*Caller, *Callee);
+}
+
+void InlineAdvice::recordInlining() {
+ markRecorded();
+ recordInlineStatsIfNeeded();
+ recordInliningImpl();
+}
+
void InlineAdvice::recordInliningWithCalleeDeleted() {
markRecorded();
+ recordInlineStatsIfNeeded();
Advisor->markFunctionAsDeleted(Callee);
recordInliningWithCalleeDeletedImpl();
}
@@ -153,14 +154,28 @@ void InlineAdvice::recordInliningWithCalleeDeleted() {
AnalysisKey InlineAdvisorAnalysis::Key;
bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params,
- InliningAdvisorMode Mode) {
+ InliningAdvisorMode Mode,
+ StringRef ReplayFile) {
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
switch (Mode) {
case InliningAdvisorMode::Default:
- Advisor.reset(new DefaultInlineAdvisor(FAM, Params));
+ Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params));
+ // Restrict replay to default advisor, ML advisors are stateful so
+ // replay will need augmentations to interleave with them correctly.
+ if (!ReplayFile.empty()) {
+ Advisor = std::make_unique<ReplayInlineAdvisor>(
+ M, FAM, M.getContext(), std::move(Advisor), ReplayFile,
+ /* EmitRemarks =*/true);
+ }
break;
case InliningAdvisorMode::Development:
- // To be added subsequently under conditional compilation.
+#ifdef LLVM_HAVE_TF_API
+ Advisor =
+ llvm::getDevelopmentModeAdvisor(M, MAM, [&FAM, Params](CallBase &CB) {
+ auto OIC = getDefaultInlineAdvice(CB, FAM, Params);
+ return OIC.hasValue();
+ });
+#endif
break;
case InliningAdvisorMode::Release:
#ifdef LLVM_HAVE_TF_AOT
@@ -168,6 +183,7 @@ bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params,
#endif
break;
}
+
return !!Advisor;
}
@@ -366,9 +382,35 @@ llvm::shouldInline(CallBase &CB,
return IC;
}
+std::string llvm::getCallSiteLocation(DebugLoc DLoc) {
+ std::ostringstream CallSiteLoc;
+ bool First = true;
+ for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) {
+ if (!First)
+ CallSiteLoc << " @ ";
+ // Note that negative line offset is actually possible, but we use
+ // unsigned int to match line offset representation in remarks so
+ // it's directly consumable by relay advisor.
+ uint32_t Offset =
+ DIL->getLine() - DIL->getScope()->getSubprogram()->getLine();
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+ StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
+ if (Name.empty())
+ Name = DIL->getScope()->getSubprogram()->getName();
+ CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset) << ":"
+ << llvm::utostr(DIL->getColumn());
+ if (Discriminator)
+ CallSiteLoc << "." << llvm::utostr(Discriminator);
+ First = false;
+ }
+
+ return CallSiteLoc.str();
+}
+
void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
- if (!DLoc.get())
+ if (!DLoc.get()) {
return;
+ }
bool First = true;
Remark << " at callsite ";
@@ -381,11 +423,14 @@ void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
if (Name.empty())
Name = DIL->getScope()->getSubprogram()->getName();
- Remark << Name << ":" << ore::NV("Line", Offset);
+ Remark << Name << ":" << ore::NV("Line", Offset) << ":"
+ << ore::NV("Column", DIL->getColumn());
if (Discriminator)
Remark << "." << ore::NV("Disc", Discriminator);
First = false;
}
+
+ Remark << ";";
}
void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
@@ -406,3 +451,64 @@ void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
return Remark;
});
}
+
+InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM)
+ : M(M), FAM(FAM) {
+ if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
+ ImportedFunctionsStats =
+ std::make_unique<ImportedFunctionsInliningStatistics>();
+ ImportedFunctionsStats->setModuleInfo(M);
+ }
+}
+
+InlineAdvisor::~InlineAdvisor() {
+ if (ImportedFunctionsStats) {
+ assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No);
+ ImportedFunctionsStats->dump(InlinerFunctionImportStats ==
+ InlinerFunctionImportStatsOpts::Verbose);
+ }
+
+ freeDeletedFunctions();
+}
+
+std::unique_ptr<InlineAdvice> InlineAdvisor::getMandatoryAdvice(CallBase &CB,
+ bool Advice) {
+ return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
+}
+
+InlineAdvisor::MandatoryInliningKind
+InlineAdvisor::getMandatoryKind(CallBase &CB, FunctionAnalysisManager &FAM,
+ OptimizationRemarkEmitter &ORE) {
+ auto &Callee = *CB.getCalledFunction();
+
+ auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+
+ auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
+
+ auto TrivialDecision =
+ llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI);
+
+ if (TrivialDecision.hasValue()) {
+ if (TrivialDecision->isSuccess())
+ return MandatoryInliningKind::Always;
+ else
+ return MandatoryInliningKind::Never;
+ }
+ return MandatoryInliningKind::NotMandatory;
+}
+
+std::unique_ptr<InlineAdvice> InlineAdvisor::getAdvice(CallBase &CB,
+ bool MandatoryOnly) {
+ if (!MandatoryOnly)
+ return getAdviceImpl(CB);
+ bool Advice = CB.getCaller() != CB.getCalledFunction() &&
+ MandatoryInliningKind::Always ==
+ getMandatoryKind(CB, FAM, getCallerORE(CB));
+ return getMandatoryAdvice(CB, Advice);
+}
+
+OptimizationRemarkEmitter &InlineAdvisor::getCallerORE(CallBase &CB) {
+ return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller());
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
index 33d714406d7f..a35f5e11f0e0 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
@@ -71,6 +71,20 @@ static cl::opt<int>
cl::init(45), cl::ZeroOrMore,
cl::desc("Threshold for inlining cold callsites"));
+static cl::opt<bool> InlineEnableCostBenefitAnalysis(
+ "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
+ cl::desc("Enable the cost-benefit analysis for the inliner"));
+
+static cl::opt<int> InlineSavingsMultiplier(
+ "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore,
+ cl::desc("Multiplier to multiply cycle savings by during inlining"));
+
+static cl::opt<int>
+ InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
+ cl::ZeroOrMore,
+ cl::desc("The maximum size of a callee that get's "
+ "inlined without sufficient cycle savings"));
+
// We introduce this threshold to help performance of instrumentation based
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
@@ -183,6 +197,9 @@ protected:
CallBase &CandidateCall;
/// Extension points for handling callsite features.
+ // Called before a basic block was analyzed.
+ virtual void onBlockStart(const BasicBlock *BB) {}
+
/// Called after a basic block was analyzed.
virtual void onBlockAnalyzed(const BasicBlock *BB) {}
@@ -454,12 +471,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
/// Ignore the threshold when finalizing analysis.
const bool IgnoreThreshold;
+ // True if the cost-benefit-analysis-based inliner is enabled.
+ const bool CostBenefitAnalysisEnabled;
+
/// Inlining cost measured in abstract units, accounts for all the
/// instructions expected to be executed for a given function invocation.
/// Instructions that are statically proven to be dead based on call-site
/// arguments are not counted here.
int Cost = 0;
+ // The cumulative cost at the beginning of the basic block being analyzed. At
+ // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
+ // the size of that basic block.
+ int CostAtBBStart = 0;
+
+ // The static size of live but cold basic blocks. This is "static" in the
+ // sense that it's not weighted by profile counts at all.
+ int ColdSize = 0;
+
bool SingleBB = true;
unsigned SROACostSavings = 0;
@@ -597,7 +626,21 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
SROACostSavings += InlineConstants::InstrCost;
}
+ void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
+
void onBlockAnalyzed(const BasicBlock *BB) override {
+ if (CostBenefitAnalysisEnabled) {
+ // Keep track of the static size of live but cold basic blocks. For now,
+ // we define a cold basic block to be one that's never executed.
+ assert(GetBFI && "GetBFI must be available");
+ BlockFrequencyInfo *BFI = &(GetBFI(F));
+ assert(BFI && "BFI must be available");
+ auto ProfileCount = BFI->getBlockProfileCount(BB);
+ assert(ProfileCount.hasValue());
+ if (ProfileCount.getValue() == 0)
+ ColdSize += Cost - CostAtBBStart;
+ }
+
auto *TI = BB->getTerminator();
// If we had any successors at this point, than post-inlining is likely to
// have them as well. Note that we assume any basic blocks which existed
@@ -628,6 +671,131 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
InstructionCostDetailMap[I].ThresholdAfter = Threshold;
}
+ bool isCostBenefitAnalysisEnabled() {
+ if (!InlineEnableCostBenefitAnalysis)
+ return false;
+
+ if (!PSI || !PSI->hasProfileSummary())
+ return false;
+
+ if (!GetBFI)
+ return false;
+
+ auto *Caller = CandidateCall.getParent()->getParent();
+ if (!Caller->getEntryCount())
+ return false;
+
+ BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
+ if (!CallerBFI)
+ return false;
+
+ // For now, limit to hot call site.
+ if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
+ return false;
+
+ if (!F.getEntryCount())
+ return false;
+
+ BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
+ if (!CalleeBFI)
+ return false;
+
+ return true;
+ }
+
+ // Determine whether we should inline the given call site, taking into account
+ // both the size cost and the cycle savings. Return None if we don't have
+ // suficient profiling information to determine.
+ Optional<bool> costBenefitAnalysis() {
+ if (!CostBenefitAnalysisEnabled)
+ return None;
+
+ // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
+ // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
+ // falling back to the cost-based metric.
+ // TODO: Improve this hacky condition.
+ if (Threshold == 0)
+ return None;
+
+ assert(GetBFI);
+ BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
+ assert(CalleeBFI);
+
+ // The cycle savings expressed as the sum of InlineConstants::InstrCost
+ // multiplied by the estimated dynamic count of each instruction we can
+ // avoid. Savings come from the call site cost, such as argument setup and
+ // the call instruction, as well as the instructions that are folded.
+ //
+ // We use 128-bit APInt here to avoid potential overflow. This variable
+ // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
+ // assumes that we can avoid or fold a billion instructions, each with a
+ // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
+ // period on a 4GHz machine.
+ APInt CycleSavings(128, 0);
+
+ for (auto &BB : F) {
+ APInt CurrentSavings(128, 0);
+ for (auto &I : BB) {
+ if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
+ // Count a conditional branch as savings if it becomes unconditional.
+ if (BI->isConditional() &&
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(BI->getCondition()))) {
+ CurrentSavings += InlineConstants::InstrCost;
+ }
+ } else if (Value *V = dyn_cast<Value>(&I)) {
+ // Count an instruction as savings if we can fold it.
+ if (SimplifiedValues.count(V)) {
+ CurrentSavings += InlineConstants::InstrCost;
+ }
+ }
+ // TODO: Consider other forms of savings like switch statements,
+ // indirect calls becoming direct, SROACostSavings, LoadEliminationCost,
+ // etc.
+ }
+
+ auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
+ assert(ProfileCount.hasValue());
+ CurrentSavings *= ProfileCount.getValue();
+ CycleSavings += CurrentSavings;
+ }
+
+ // Compute the cycle savings per call.
+ auto EntryProfileCount = F.getEntryCount();
+ assert(EntryProfileCount.hasValue());
+ auto EntryCount = EntryProfileCount.getCount();
+ CycleSavings += EntryCount / 2;
+ CycleSavings = CycleSavings.udiv(EntryCount);
+
+ // Compute the total savings for the call site.
+ auto *CallerBB = CandidateCall.getParent();
+ BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
+ CycleSavings += getCallsiteCost(this->CandidateCall, DL);
+ CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue();
+
+ // Remove the cost of the cold basic blocks.
+ int Size = Cost - ColdSize;
+
+ // Allow tiny callees to be inlined regardless of whether they meet the
+ // savings threshold.
+ Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1;
+
+ // Return true if the savings justify the cost of inlining. Specifically,
+ // we evaluate the following inequality:
+ //
+ // CycleSavings PSI->getOrCompHotCountThreshold()
+ // -------------- >= -----------------------------------
+ // Size InlineSavingsMultiplier
+ //
+ // Note that the left hand side is specific to a call site. The right hand
+ // side is a constant for the entire executable.
+ APInt LHS = CycleSavings;
+ LHS *= InlineSavingsMultiplier;
+ APInt RHS(128, PSI->getOrCompHotCountThreshold());
+ RHS *= Size;
+ return LHS.uge(RHS);
+ }
+
InlineResult finalizeAnalysis() override {
// Loops generally act a lot like calls in that they act like barriers to
// movement, require a certain amount of setup, etc. So when optimising for
@@ -656,6 +824,13 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
else if (NumVectorInstructions <= NumInstructions / 2)
Threshold -= VectorBonus / 2;
+ if (auto Result = costBenefitAnalysis()) {
+ if (Result.getValue())
+ return InlineResult::success();
+ else
+ return InlineResult::failure("Cost over threshold.");
+ }
+
if (IgnoreThreshold || Cost < std::max(1, Threshold))
return InlineResult::success();
return InlineResult::failure("Cost over threshold.");
@@ -726,9 +901,11 @@ public:
bool IgnoreThreshold = false)
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
ComputeFullInlineCost(OptComputeFullInlineCost ||
- Params.ComputeFullInlineCost || ORE),
+ Params.ComputeFullInlineCost || ORE ||
+ isCostBenefitAnalysisEnabled()),
Params(Params), Threshold(Params.DefaultThreshold),
BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
+ CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
Writer(this) {}
/// Annotation Writer for instruction details
@@ -841,11 +1018,11 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
SmallVector<Value *, 4> Operands;
Operands.push_back(GEP.getOperand(0));
- for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
- if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
+ for (const Use &Op : GEP.indices())
+ if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
Operands.push_back(SimpleOp);
else
- Operands.push_back(*I);
+ Operands.push_back(Op);
return TargetTransformInfo::TCC_Free ==
TTI.getUserCost(&GEP, Operands,
TargetTransformInfo::TCK_SizeAndLatency);
@@ -867,7 +1044,7 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// is needed to track stack usage during inlining.
Type *Ty = I.getAllocatedType();
AllocatedSize = SaturatingMultiplyAdd(
- AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(),
+ AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getKnownMinSize(),
AllocatedSize);
if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline) {
HasDynamicAlloca = true;
@@ -881,7 +1058,7 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
if (I.isStaticAlloca()) {
Type *Ty = I.getAllocatedType();
AllocatedSize =
- SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(), AllocatedSize);
+ SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
}
// We will happily inline static alloca instructions.
@@ -1017,8 +1194,8 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
// Lambda to check whether a GEP's indices are all constant.
auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
- for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
- if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
+ for (const Use &Op : GEP.indices())
+ if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
return false;
return true;
};
@@ -1101,7 +1278,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// integer is large enough to represent the pointer.
unsigned IntegerSize = I.getType()->getScalarSizeInBits();
unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
- if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
+ if (IntegerSize == DL.getPointerSizeInBits(AS)) {
std::pair<Value *, APInt> BaseAndOffset =
ConstantOffsetPtrs.lookup(I.getOperand(0));
if (BaseAndOffset.first)
@@ -1403,6 +1580,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
// Finally, take the target-specific inlining threshold multiplier into
// account.
Threshold *= TTI.getInliningThresholdMultiplier();
+ Threshold += TTI.adjustInliningThreshold(&Call);
SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
VectorBonus = Threshold * VectorBonusPercent / 100;
@@ -1884,8 +2062,8 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
// We found something we don't understand or can't handle. Mark any SROA-able
// values in the operand list as no longer viable.
- for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
- disableSROA(*OI);
+ for (const Use &Op : I.operands())
+ disableSROA(Op);
return false;
}
@@ -1900,7 +2078,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
InlineResult
CallAnalyzer::analyzeBlock(BasicBlock *BB,
SmallPtrSetImpl<const Value *> &EphValues) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (Instruction &I : *BB) {
// FIXME: Currently, the number of instructions in a function regardless of
// our ability to simplify them during inline to constants or dead code,
// are actually used by the vector bonus heuristic. As long as that's true,
@@ -1911,12 +2089,16 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (isa<DbgInfoIntrinsic>(I))
continue;
+ // Skip pseudo-probes.
+ if (isa<PseudoProbeInst>(I))
+ continue;
+
// Skip ephemeral values.
- if (EphValues.count(&*I))
+ if (EphValues.count(&I))
continue;
++NumInstructions;
- if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
+ if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
++NumVectorInstructions;
// If the instruction simplified to a constant, there is no cost to this
@@ -1924,14 +2106,14 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
// all of the per-instruction logic. The visit tree returns true if we
// consumed the instruction in any way, and false if the instruction's base
// cost should count against inlining.
- onInstructionAnalysisStart(&*I);
+ onInstructionAnalysisStart(&I);
- if (Base::visit(&*I))
+ if (Base::visit(&I))
++NumInstructionsSimplified;
else
onMissedSimplification();
- onInstructionAnalysisFinish(&*I);
+ onInstructionAnalysisFinish(&I);
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
InlineResult IR = InlineResult::success();
@@ -2092,23 +2274,23 @@ InlineResult CallAnalyzer::analyze() {
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
auto CAI = CandidateCall.arg_begin();
- for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
- FAI != FAE; ++FAI, ++CAI) {
+ for (Argument &FAI : F.args()) {
assert(CAI != CandidateCall.arg_end());
if (Constant *C = dyn_cast<Constant>(CAI))
- SimplifiedValues[&*FAI] = C;
+ SimplifiedValues[&FAI] = C;
Value *PtrArg = *CAI;
if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
- ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
+ ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
// We can SROA any pointer arguments derived from alloca instructions.
if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
- SROAArgValues[&*FAI] = SROAArg;
+ SROAArgValues[&FAI] = SROAArg;
onInitializeSROAArg(SROAArg);
EnabledSROAAllocas.insert(SROAArg);
}
}
+ ++CAI;
}
NumConstantArgs = SimplifiedValues.size();
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
@@ -2142,6 +2324,8 @@ InlineResult CallAnalyzer::analyze() {
if (BB->empty())
continue;
+ onBlockStart(BB);
+
// Disallow inlining a blockaddress with uses other than strictly callbr.
// A blockaddress only has defined behavior for an indirect branch in the
// same function, and we do not currently support inlining indirect
@@ -2328,6 +2512,13 @@ Optional<InlineResult> llvm::getAttributeBasedInliningDecision(
if (!Callee)
return InlineResult::failure("indirect call");
+ // When callee coroutine function is inlined into caller coroutine function
+ // before coro-split pass,
+ // coro-early pass can not handle this quiet well.
+ // So we won't inline the coroutine function if it have not been unsplited
+ if (Callee->isPresplitCoroutine())
+ return InlineResult::failure("unsplited coroutine call");
+
// Never inline calls with byval arguments that does not have the alloca
// address space. Since byval arguments can be replaced with a copy to an
// alloca, the inlined code would need to be adjusted to handle that the
@@ -2378,6 +2569,15 @@ Optional<InlineResult> llvm::getAttributeBasedInliningDecision(
if (Call.isNoInline())
return InlineResult::failure("noinline call site attribute");
+ // Don't inline functions if one does not have any stack protector attribute
+ // but the other does.
+ if (Caller->hasStackProtectorFnAttr() && !Callee->hasStackProtectorFnAttr())
+ return InlineResult::failure(
+ "stack protected caller but callee requested no stack protector");
+ if (Callee->hasStackProtectorFnAttr() && !Caller->hasStackProtectorFnAttr())
+ return InlineResult::failure(
+ "stack protected callee but caller requested no stack protector");
+
return None;
}
@@ -2419,25 +2619,26 @@ InlineCost llvm::getInlineCost(
InlineResult llvm::isInlineViable(Function &F) {
bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
- for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ for (BasicBlock &BB : F) {
// Disallow inlining of functions which contain indirect branches.
- if (isa<IndirectBrInst>(BI->getTerminator()))
+ if (isa<IndirectBrInst>(BB.getTerminator()))
return InlineResult::failure("contains indirect branches");
// Disallow inlining of blockaddresses which are used by non-callbr
// instructions.
- if (BI->hasAddressTaken())
- for (User *U : BlockAddress::get(&*BI)->users())
+ if (BB.hasAddressTaken())
+ for (User *U : BlockAddress::get(&BB)->users())
if (!isa<CallBrInst>(*U))
return InlineResult::failure("blockaddress used outside of callbr");
- for (auto &II : *BI) {
+ for (auto &II : BB) {
CallBase *Call = dyn_cast<CallBase>(&II);
if (!Call)
continue;
// Disallow recursive calls.
- if (&F == Call->getCalledFunction())
+ Function *Callee = Call->getCalledFunction();
+ if (&F == Callee)
return InlineResult::failure("recursive call");
// Disallow calls which expose returns-twice to a function not previously
@@ -2446,8 +2647,8 @@ InlineResult llvm::isInlineViable(Function &F) {
cast<CallInst>(Call)->canReturnTwice())
return InlineResult::failure("exposes returns-twice attribute");
- if (Call->getCalledFunction())
- switch (Call->getCalledFunction()->getIntrinsicID()) {
+ if (Callee)
+ switch (Callee->getIntrinsicID()) {
default:
break;
case llvm::Intrinsic::icall_branch_funnel:
@@ -2574,7 +2775,7 @@ InlineCostAnnotationPrinterPass::run(Function &F,
// We can add a flag which determines InlineParams for this run. Right now,
// the default InlineParams are used.
const InlineParams Params = llvm::getInlineParams();
- for (BasicBlock &BB : F) {
+ for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
Function *CalledFunction = CI->getCalledFunction();
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineFeaturesAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineFeaturesAnalysis.cpp
deleted file mode 100644
index 90f521bbaab4..000000000000
--- a/contrib/llvm-project/llvm/lib/Analysis/InlineFeaturesAnalysis.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-//===- InlineFeaturesAnalysis.cpp - Feature extraction for ML Policies ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements an analysis extracting function features, which may be
-// used by ML-driven policies, for example.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/InlineFeaturesAnalysis.h"
-#include "llvm/IR/Instructions.h"
-
-using namespace llvm;
-
-AnalysisKey InlineFeaturesAnalysis::Key;
-
-InlineFeaturesAnalysis::Result
-InlineFeaturesAnalysis::run(const Function &F, FunctionAnalysisManager &FAM) {
- Result Ret;
- Ret.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
- for (const auto &BB : F) {
- ++Ret.BasicBlockCount;
- if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
- if (BI->isConditional())
- Ret.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors();
- } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()))
- Ret.BlocksReachedFromConditionalInstruction +=
- (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
- for (const auto &I : BB)
- if (auto *CS = dyn_cast<CallBase>(&I)) {
- const auto *Callee = CS->getCalledFunction();
- if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
- ++Ret.DirectCallsToDefinedFunctions;
- }
- }
- return Ret;
-} \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
index ebc59879d357..3c90e82fb952 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@@ -67,8 +67,6 @@ public:
static const size_t NumNamedFeatures =
static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
struct FunctionFeatures {
- static std::vector<std::pair<size_t, size_t>>
- ImportantInstructionSuccessions;
static const size_t FeatureCount;
std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
@@ -84,53 +82,38 @@ public:
static FunctionFeatures getFunctionFeatures(Function &F,
FunctionAnalysisManager &FAM);
-
-private:
- /// Sort once the feature tuples.
- struct SortFeatureTuples {
- bool IsSorted = false;
- SortFeatureTuples() {
- std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
- FunctionFeatures::ImportantInstructionSuccessions.end());
- IsSorted = true;
- }
- };
-
- static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;
-
- static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
};
-llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
- IRToNativeSizeLearning::TupleSorter;
// This is a point in time - we determined including these pairs of
// consecutive instructions (in the IR layout available at inline time) as
// features improves the model performance. We want to move away from manual
// feature selection.
-// The vector is given in opcode pairs rather than labels because 1) labels
-// weren't readily available, and 2) the successions were hand - extracted
-std::vector<std::pair<size_t, size_t>>
- IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
- {{1, 34}, {15, 27}, {53, 53}, {53, 34}, {1, 11}, {32, 2}, {2, 48},
- {28, 48}, {1, 45}, {49, 32}, {57, 56}, {55, 53}, {1, 28}, {57, 34},
- {1, 1}, {32, 28}, {32, 15}, {49, 28}, {53, 1}, {2, 53}, {48, 34},
- {28, 53}, {2, 32}, {1, 40}, {32, 48}, {29, 56}, {56, 32}, {55, 56},
- {48, 56}, {1, 31}, {33, 34}, {2, 28}, {1, 12}, {55, 1}, {31, 31},
- {65, 1}, {33, 56}, {32, 32}, {13, 13}, {1, 26}, {13, 26}, {2, 1},
- {1, 33}, {47, 49}, {64, 1}, {2, 38}, {34, 53}, {48, 2}, {55, 34},
- {34, 32}, {1, 5}, {56, 13}, {2, 2}, {2, 49}, {33, 2}, {49, 39},
- {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
- {47, 15}, {13, 34}, {2, 33}, {32, 49}, {49, 34}, {56, 33}, {1, 30},
- {33, 33}, {31, 33}, {2, 29}, {56, 7}, {32, 13}, {2, 55}, {56, 56},
- {2, 34}, {1, 42}, {34, 49}, {1, 20}, {32, 33}, {1, 25}, {53, 28},
- {1, 14}, {31, 49}, {28, 2}, {2, 13}, {2, 56}, {1, 32}, {56, 53},
- {65, 65}, {33, 53}, {64, 64}, {13, 2}, {34, 33}, {1, 4}, {49, 2},
- {1, 9}, {56, 1}, {33, 1}, {53, 57}, {32, 53}, {13, 56}, {32, 56},
- {55, 55}, {1, 18}, {49, 56}, {34, 34}, {1, 7}, {56, 64}, {32, 1},
- {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
- {32, 40}, {1, 29}, {53, 2}, {34, 1}, {32, 34}, {49, 49}, {1, 24},
- {40, 34}, {1, 13}, {38, 34}, {29, 2}, {34, 2}, {1, 39}, {1, 22},
- {1, 27}, {49, 1}, {1, 8}, {56, 2}};
+// The array is given in opcode pairs rather than labels because 1) labels
+// weren't readily available, and 2) the successions were hand - extracted.
+//
+// This array must be sorted.
+static const std::array<std::pair<size_t, size_t>, 137>
+ ImportantInstructionSuccessions{
+ {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11},
+ {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24},
+ {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31},
+ {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45},
+ {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33},
+ {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56},
+ {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
+ {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31},
+ {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15},
+ {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
+ {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32},
+ {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2},
+ {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
+ {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56},
+ {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
+ {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53},
+ {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
+ {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34},
+ {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
+ {64, 1}, {64, 64}, {65, 1}, {65, 65}}};
// We have: 9 calculated features (the features here); 1 feature for each
// instruction opcode; and 1 feature for each manually-identified sequence.
@@ -140,16 +123,15 @@ std::vector<std::pair<size_t, size_t>>
// Note that instruction opcodes start from 1. For convenience, we also have an
// always 0 feature for the '0' opcode, hence the extra 1.
const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
- IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
- .size() +
- getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;
+ ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
+ IRToNativeSizeLearning::NumNamedFeatures;
size_t getSize(Function &F, TargetTransformInfo &TTI) {
size_t Ret = 0;
- for (auto &BB : F)
- for (auto &I : BB)
- Ret += TTI.getInstructionCost(
- &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
+ for (const auto &BB : F)
+ for (const auto &I : BB)
+ Ret += *(TTI.getInstructionCost(
+ &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
return Ret;
}
@@ -161,8 +143,8 @@ size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
unsigned getMaxDominatorTreeDepth(const Function &F,
const DominatorTree &Tree) {
unsigned Ret = 0;
- for (auto &BB : F)
- if (auto *TN = Tree.getNode(&BB))
+ for (const auto &BB : F)
+ if (const auto *TN = Tree.getNode(&BB))
Ret = std::max(Ret, TN->getLevel());
return Ret;
}
@@ -171,42 +153,37 @@ unsigned getMaxDominatorTreeDepth(const Function &F,
IRToNativeSizeLearning::FunctionFeatures
IRToNativeSizeLearning::getFunctionFeatures(Function &F,
FunctionAnalysisManager &FAM) {
- assert(ensureSortedTuples() && "expected lazy initialization");
+ assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
+ "expected function features are sorted");
auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
FunctionFeatures FF;
size_t InstrCount = getMaxInstructionID() + 1;
FF.InstructionHistogram.resize(InstrCount);
- FF.InstructionPairHistogram.resize(
- FunctionFeatures::ImportantInstructionSuccessions.size());
+ FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
- auto StartID = 0;
- auto LastID = StartID;
+ int StartID = 0;
+ int LastID = StartID;
auto getPairIndex = [](size_t a, size_t b) {
- auto I =
- std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
- FunctionFeatures::ImportantInstructionSuccessions.end(),
- std::make_pair(a, b));
- if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
+ auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
+ if (I == ImportantInstructionSuccessions.end())
return -1;
- return static_cast<int>(std::distance(
- FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
+ return static_cast<int>(
+ std::distance(ImportantInstructionSuccessions.begin(), I));
};
// We don't want debug calls, because they'd just add noise.
- for (auto &BB : F) {
- for (auto I = BB.instructionsWithoutDebug().begin(),
- E = BB.instructionsWithoutDebug().end();
- I != E; ++I) {
- auto ID = I->getOpcode();
+ for (const auto &BB : F) {
+ for (const auto &I : BB.instructionsWithoutDebug()) {
+ auto ID = I.getOpcode();
++FF.InstructionHistogram[ID];
int PairIndex = getPairIndex(LastID, ID);
if (PairIndex >= 0)
++FF.InstructionPairHistogram[PairIndex];
LastID = ID;
- if (isa<CallBase>(*I))
+ if (isa<CallBase>(I))
++FF[NamedFeatureIndex::Calls];
}
}
@@ -244,19 +221,18 @@ InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
if (!isEvaluatorRequested()) {
return;
}
- std::vector<std::string> InputNames{"serving_default_input_1"};
- std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+ std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
+ "serving_default_input_1",
+ {1, static_cast<int64_t>(
+ IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
+ std::vector<TensorSpec> OutputSpecs{
+ TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
Evaluator = std::make_unique<TFModelEvaluator>(
- TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
+ TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
if (!Evaluator || !Evaluator->isValid()) {
Evaluator.reset();
return;
}
- static const std::vector<int64_t> Dim{
- 1, static_cast<int64_t>(
- IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
-
- Evaluator->initInput<int32_t>(0, Dim);
}
InlineSizeEstimatorAnalysis::Result
@@ -296,4 +272,12 @@ InlineSizeEstimatorAnalysis::run(const Function &F,
return None;
}
bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
-#endif \ No newline at end of file
+#endif
+
+PreservedAnalyses
+InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
+ << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
index bb9c7b7eb11f..8366bee083f2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/InstCount.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Function.h"
@@ -23,57 +24,71 @@ using namespace llvm;
#define DEBUG_TYPE "instcount"
-STATISTIC(TotalInsts , "Number of instructions (of all types)");
+STATISTIC(TotalInsts, "Number of instructions (of all types)");
STATISTIC(TotalBlocks, "Number of basic blocks");
-STATISTIC(TotalFuncs , "Number of non-external functions");
+STATISTIC(TotalFuncs, "Number of non-external functions");
-#define HANDLE_INST(N, OPCODE, CLASS) \
- STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ STATISTIC(Num##OPCODE##Inst, "Number of " #OPCODE " insts");
#include "llvm/IR/Instruction.def"
namespace {
- class InstCount : public FunctionPass, public InstVisitor<InstCount> {
- friend class InstVisitor<InstCount>;
+class InstCount : public InstVisitor<InstCount> {
+ friend class InstVisitor<InstCount>;
- void visitFunction (Function &F) { ++TotalFuncs; }
- void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; }
+ void visitFunction(Function &F) { ++TotalFuncs; }
+ void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; }
-#define HANDLE_INST(N, OPCODE, CLASS) \
- void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; }
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ void visit##OPCODE(CLASS &) { \
+ ++Num##OPCODE##Inst; \
+ ++TotalInsts; \
+ }
#include "llvm/IR/Instruction.def"
- void visitInstruction(Instruction &I) {
- errs() << "Instruction Count does not know about " << I;
- llvm_unreachable(nullptr);
- }
- public:
- static char ID; // Pass identification, replacement for typeid
- InstCount() : FunctionPass(ID) {
- initializeInstCountPass(*PassRegistry::getPassRegistry());
- }
+ void visitInstruction(Instruction &I) {
+ errs() << "Instruction Count does not know about " << I;
+ llvm_unreachable(nullptr);
+ }
+};
+} // namespace
- bool runOnFunction(Function &F) override;
+PreservedAnalyses InstCountPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ LLVM_DEBUG(dbgs() << "INSTCOUNT: running on function " << F.getName()
+ << "\n");
+ InstCount().visit(F);
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- void print(raw_ostream &O, const Module *M) const override {}
+ return PreservedAnalyses::all();
+}
+namespace {
+class InstCountLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ InstCountLegacyPass() : FunctionPass(ID) {
+ initializeInstCountLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ LLVM_DEBUG(dbgs() << "INSTCOUNT: running on function " << F.getName()
+ << "\n");
+ InstCount().visit(F);
+ return false;
};
-}
-char InstCount::ID = 0;
-INITIALIZE_PASS(InstCount, "instcount",
- "Counts the various types of Instructions", false, true)
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
-FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
+ void print(raw_ostream &O, const Module *M) const override {}
+};
+} // namespace
-// InstCount::run - This is the main Analysis entry point for a
-// function.
-//
-bool InstCount::runOnFunction(Function &F) {
- visit(F);
- return false;
-}
+char InstCountLegacyPass::ID = 0;
+INITIALIZE_PASS(InstCountLegacyPass, "instcount",
+ "Counts the various types of Instructions", false, true)
+
+FunctionPass *llvm::createInstCountPass() { return new InstCountLegacyPass(); }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
index e744a966a104..a12816885c40 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -228,64 +228,56 @@ static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
return false;
}
-/// Simplify "A op (B op' C)" by distributing op over op', turning it into
-/// "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is
-/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
-/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
-/// Returns the simplified value, or null if no simplification was performed.
-static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
- Instruction::BinaryOps OpcodeToExpand,
+/// Try to simplify a binary operator of form "V op OtherOp" where V is
+/// "(B0 opex B1)" by distributing 'op' across 'opex' as
+/// "(B0 op OtherOp) opex (B1 op OtherOp)".
+static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V,
+ Value *OtherOp, Instruction::BinaryOps OpcodeToExpand,
const SimplifyQuery &Q, unsigned MaxRecurse) {
- // Recursion is always used, so bail out at once if we already hit the limit.
- if (!MaxRecurse--)
+ auto *B = dyn_cast<BinaryOperator>(V);
+ if (!B || B->getOpcode() != OpcodeToExpand)
+ return nullptr;
+ Value *B0 = B->getOperand(0), *B1 = B->getOperand(1);
+ Value *L = SimplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(),
+ MaxRecurse);
+ if (!L)
+ return nullptr;
+ Value *R = SimplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(),
+ MaxRecurse);
+ if (!R)
return nullptr;
- // Check whether the expression has the form "(A op' B) op C".
- if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
- if (Op0->getOpcode() == OpcodeToExpand) {
- // It does! Try turning it into "(A op C) op' (B op C)".
- Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
- // Do "A op C" and "B op C" both simplify?
- if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse))
- if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
- // They do! Return "L op' R" if it simplifies or is already available.
- // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
- if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
- && L == B && R == A)) {
- ++NumExpand;
- return LHS;
- }
- // Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
- ++NumExpand;
- return V;
- }
- }
- }
+ // Does the expanded pair of binops simplify to the existing binop?
+ if ((L == B0 && R == B1) ||
+ (Instruction::isCommutative(OpcodeToExpand) && L == B1 && R == B0)) {
+ ++NumExpand;
+ return B;
+ }
- // Check whether the expression has the form "A op (B op' C)".
- if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
- if (Op1->getOpcode() == OpcodeToExpand) {
- // It does! Try turning it into "(A op B) op' (A op C)".
- Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
- // Do "A op B" and "A op C" both simplify?
- if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse))
- if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) {
- // They do! Return "L op' R" if it simplifies or is already available.
- // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
- if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
- && L == C && R == B)) {
- ++NumExpand;
- return RHS;
- }
- // Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
- ++NumExpand;
- return V;
- }
- }
- }
+ // Otherwise, return "L op' R" if it simplifies.
+ Value *S = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse);
+ if (!S)
+ return nullptr;
+ ++NumExpand;
+ return S;
+}
+
+/// Try to simplify binops of form "A op (B op' C)" or the commuted variant by
+/// distributing op over op'.
+static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode,
+ Value *L, Value *R,
+ Instruction::BinaryOps OpcodeToExpand,
+ const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return nullptr;
+
+ if (Value *V = expandBinOp(Opcode, L, R, OpcodeToExpand, Q, MaxRecurse))
+ return V;
+ if (Value *V = expandBinOp(Opcode, R, L, OpcodeToExpand, Q, MaxRecurse))
+ return V;
return nullptr;
}
@@ -423,9 +415,9 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
return TV;
// If one branch simplified to undef, return the other one.
- if (TV && isa<UndefValue>(TV))
+ if (TV && Q.isUndefValue(TV))
return FV;
- if (FV && isa<UndefValue>(FV))
+ if (FV && Q.isUndefValue(FV))
return TV;
// If applying the operation did not change the true and false select values,
@@ -620,7 +612,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
return C;
// X + undef -> undef
- if (match(Op1, m_Undef()))
+ if (Q.isUndefValue(Op1))
return Op1;
// X + 0 -> X
@@ -740,7 +732,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// X - undef -> undef
// undef - X -> undef
- if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return UndefValue::get(Op0->getType());
// X - 0 -> X
@@ -875,7 +867,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// X * undef -> 0
// X * 0 -> 0
- if (match(Op1, m_CombineOr(m_Undef(), m_Zero())))
+ if (Q.isUndefValue(Op1) || match(Op1, m_Zero()))
return Constant::getNullValue(Op0->getType());
// X * 1 -> X
@@ -901,8 +893,8 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return V;
// Mul distributes over Add. Try some generic simplifications based on this.
- if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
- Q, MaxRecurse))
+ if (Value *V = expandCommutativeBinOp(Instruction::Mul, Op0, Op1,
+ Instruction::Add, Q, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
@@ -928,36 +920,37 @@ Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
/// Check for common or similar folds of integer division or integer remainder.
/// This applies to all 4 opcodes (sdiv/udiv/srem/urem).
-static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
+static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv,
+ const SimplifyQuery &Q) {
Type *Ty = Op0->getType();
- // X / undef -> undef
- // X % undef -> undef
- if (match(Op1, m_Undef()))
- return Op1;
+ // X / undef -> poison
+ // X % undef -> poison
+ if (Q.isUndefValue(Op1))
+ return PoisonValue::get(Ty);
- // X / 0 -> undef
- // X % 0 -> undef
+ // X / 0 -> poison
+ // X % 0 -> poison
// We don't need to preserve faults!
if (match(Op1, m_Zero()))
- return UndefValue::get(Ty);
+ return PoisonValue::get(Ty);
- // If any element of a constant divisor fixed width vector is zero or undef,
- // the whole op is undef.
+ // If any element of a constant divisor fixed width vector is zero or undef
+ // the behavior is undefined and we can fold the whole op to poison.
auto *Op1C = dyn_cast<Constant>(Op1);
auto *VTy = dyn_cast<FixedVectorType>(Ty);
if (Op1C && VTy) {
unsigned NumElts = VTy->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = Op1C->getAggregateElement(i);
- if (Elt && (Elt->isNullValue() || isa<UndefValue>(Elt)))
- return UndefValue::get(Ty);
+ if (Elt && (Elt->isNullValue() || Q.isUndefValue(Elt)))
+ return PoisonValue::get(Ty);
}
}
// undef / X -> 0
// undef % X -> 0
- if (match(Op0, m_Undef()))
+ if (Q.isUndefValue(Op0))
return Constant::getNullValue(Ty);
// 0 / X -> 0
@@ -1051,7 +1044,7 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
- if (Value *V = simplifyDivRem(Op0, Op1, true))
+ if (Value *V = simplifyDivRem(Op0, Op1, true, Q))
return V;
bool IsSigned = Opcode == Instruction::SDiv;
@@ -1109,7 +1102,7 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
- if (Value *V = simplifyDivRem(Op0, Op1, false))
+ if (Value *V = simplifyDivRem(Op0, Op1, false, Q))
return V;
// (X % Y) % Y -> X % Y
@@ -1204,14 +1197,14 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit);
}
-/// Returns true if a shift by \c Amount always yields undef.
-static bool isUndefShift(Value *Amount) {
+/// Returns true if a shift by \c Amount always yields poison.
+static bool isPoisonShift(Value *Amount, const SimplifyQuery &Q) {
Constant *C = dyn_cast<Constant>(Amount);
if (!C)
return false;
- // X shift by undef -> undef because it may shift by the bitwidth.
- if (isa<UndefValue>(C))
+ // X shift by undef -> poison because it may shift by the bitwidth.
+ if (Q.isUndefValue(C))
return true;
// Shifting by the bitwidth or more is undefined.
@@ -1222,9 +1215,10 @@ static bool isUndefShift(Value *Amount) {
// If all lanes of a vector shift are undefined the whole shift is.
if (isa<ConstantVector>(C) || isa<ConstantDataVector>(C)) {
- for (unsigned I = 0, E = cast<VectorType>(C->getType())->getNumElements();
+ for (unsigned I = 0,
+ E = cast<FixedVectorType>(C->getType())->getNumElements();
I != E; ++I)
- if (!isUndefShift(C->getAggregateElement(I)))
+ if (!isPoisonShift(C->getAggregateElement(I), Q))
return false;
return true;
}
@@ -1252,8 +1246,8 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
return Op0;
// Fold undefined shifts.
- if (isUndefShift(Op1))
- return UndefValue::get(Op0->getType());
+ if (isPoisonShift(Op1, Q))
+ return PoisonValue::get(Op0->getType());
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
@@ -1271,7 +1265,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
// the number of bits in the type, the shift is undefined.
KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (Known.One.getLimitedValue() >= Known.getBitWidth())
- return UndefValue::get(Op0->getType());
+ return PoisonValue::get(Op0->getType());
// If all valid bits in the shift amount are known zero, the first operand is
// unchanged.
@@ -1296,7 +1290,7 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
// undef >> X -> 0
// undef >> X -> undef (if it's exact)
- if (match(Op0, m_Undef()))
+ if (Q.isUndefValue(Op0))
return isExact ? Op0 : Constant::getNullValue(Op0->getType());
// The low bit cannot be shifted out of an exact shift if it is set.
@@ -1318,7 +1312,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// undef << X -> 0
// undef << X -> undef if (if it's NSW/NUW)
- if (match(Op0, m_Undef()))
+ if (Q.isUndefValue(Op0))
return isNSW || isNUW ? Op0 : Constant::getNullValue(Op0->getType());
// (X >> A) << A -> X
@@ -1704,25 +1698,27 @@ static Value *simplifyAndOrOfICmpsWithLimitConst(ICmpInst *Cmp0, ICmpInst *Cmp1,
if (!Cmp0->isEquality())
return nullptr;
- // The equality compare must be against a constant. Convert the 'null' pointer
- // constant to an integer zero value.
- APInt MinMaxC;
- const APInt *C;
- if (match(Cmp0->getOperand(1), m_APInt(C)))
- MinMaxC = *C;
- else if (isa<ConstantPointerNull>(Cmp0->getOperand(1)))
- MinMaxC = APInt::getNullValue(8);
- else
- return nullptr;
-
// The non-equality compare must include a common operand (X). Canonicalize
// the common operand as operand 0 (the predicate is swapped if the common
// operand was operand 1).
ICmpInst::Predicate Pred0 = Cmp0->getPredicate();
Value *X = Cmp0->getOperand(0);
ICmpInst::Predicate Pred1;
- if (!match(Cmp1, m_c_ICmp(Pred1, m_Specific(X), m_Value())) ||
- ICmpInst::isEquality(Pred1))
+ bool HasNotOp = match(Cmp1, m_c_ICmp(Pred1, m_Not(m_Specific(X)), m_Value()));
+ if (!HasNotOp && !match(Cmp1, m_c_ICmp(Pred1, m_Specific(X), m_Value())))
+ return nullptr;
+ if (ICmpInst::isEquality(Pred1))
+ return nullptr;
+
+ // The equality compare must be against a constant. Flip bits if we matched
+ // a bitwise not. Convert a null pointer constant to an integer zero value.
+ APInt MinMaxC;
+ const APInt *C;
+ if (match(Cmp0->getOperand(1), m_APInt(C)))
+ MinMaxC = HasNotOp ? ~*C : *C;
+ else if (isa<ConstantPointerNull>(Cmp0->getOperand(1)))
+ MinMaxC = APInt::getNullValue(8);
+ else
return nullptr;
// DeMorganize if this is 'or': P0 || P1 --> !P0 && !P1.
@@ -2003,6 +1999,30 @@ static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0,
return NotOp1;
}
+/// Given a bitwise logic op, check if the operands are add/sub with a common
+/// source value and inverted constant (identity: C - X -> ~(X + ~C)).
+static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1,
+ Instruction::BinaryOps Opcode) {
+ assert(Op0->getType() == Op1->getType() && "Mismatched binop types");
+ assert(BinaryOperator::isBitwiseLogicOp(Opcode) && "Expected logic op");
+ Value *X;
+ Constant *C1, *C2;
+ if ((match(Op0, m_Add(m_Value(X), m_Constant(C1))) &&
+ match(Op1, m_Sub(m_Constant(C2), m_Specific(X)))) ||
+ (match(Op1, m_Add(m_Value(X), m_Constant(C1))) &&
+ match(Op0, m_Sub(m_Constant(C2), m_Specific(X))))) {
+ if (ConstantExpr::getNot(C1) == C2) {
+ // (X + C) & (~C - X) --> (X + C) & ~(X + C) --> 0
+ // (X + C) | (~C - X) --> (X + C) | ~(X + C) --> -1
+ // (X + C) ^ (~C - X) --> (X + C) ^ ~(X + C) --> -1
+ Type *Ty = Op0->getType();
+ return Opcode == Instruction::And ? ConstantInt::getNullValue(Ty)
+ : ConstantInt::getAllOnesValue(Ty);
+ }
+ }
+ return nullptr;
+}
+
/// Given operands for an And, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -2011,7 +2031,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return C;
// X & undef -> 0
- if (match(Op1, m_Undef()))
+ if (Q.isUndefValue(Op1))
return Constant::getNullValue(Op0->getType());
// X & X = X
@@ -2039,6 +2059,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (match(Op1, m_c_Or(m_Specific(Op0), m_Value())))
return Op0;
+ if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And))
+ return V;
+
// A mask that only clears known zeros of a shifted value is a no-op.
Value *X;
const APInt *Mask;
@@ -2095,21 +2118,30 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return V;
// And distributes over Or. Try some generic simplifications based on this.
- if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
- Q, MaxRecurse))
+ if (Value *V = expandCommutativeBinOp(Instruction::And, Op0, Op1,
+ Instruction::Or, Q, MaxRecurse))
return V;
// And distributes over Xor. Try some generic simplifications based on this.
- if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
- Q, MaxRecurse))
+ if (Value *V = expandCommutativeBinOp(Instruction::And, Op0, Op1,
+ Instruction::Xor, Q, MaxRecurse))
return V;
- // If the operation is with the result of a select instruction, check whether
- // operating on either branch of the select always yields the same value.
- if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) {
+ if (Op0->getType()->isIntOrIntVectorTy(1)) {
+ // A & (A && B) -> A && B
+ if (match(Op1, m_Select(m_Specific(Op0), m_Value(), m_Zero())))
+ return Op1;
+ else if (match(Op0, m_Select(m_Specific(Op1), m_Value(), m_Zero())))
+ return Op0;
+ }
+ // If the operation is with the result of a select instruction, check
+ // whether operating on either branch of the select always yields the same
+ // value.
if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
MaxRecurse))
return V;
+ }
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
@@ -2169,7 +2201,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// X | undef -> -1
// X | -1 = -1
// Do not return Op1 because it may contain undef elements if it's a vector.
- if (match(Op1, m_Undef()) || match(Op1, m_AllOnes()))
+ if (Q.isUndefValue(Op1) || match(Op1, m_AllOnes()))
return Constant::getAllOnesValue(Op0->getType());
// X | X = X
@@ -2198,7 +2230,10 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (match(Op1, m_Not(m_c_And(m_Specific(Op0), m_Value()))))
return Constant::getAllOnesValue(Op0->getType());
- Value *A, *B;
+ if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or))
+ return V;
+
+ Value *A, *B, *NotA;
// (A & ~B) | (A ^ B) -> (A ^ B)
// (~B & A) | (A ^ B) -> (A ^ B)
// (A & ~B) | (B ^ A) -> (B ^ A)
@@ -2227,6 +2262,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
return Op1;
+ // Commute the 'or' operands.
// (~A ^ B) | (A & B) -> (~A ^ B)
// (~A ^ B) | (B & A) -> (~A ^ B)
// (B ^ ~A) | (A & B) -> (B ^ ~A)
@@ -2236,6 +2272,25 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
return Op0;
+ // (~A & B) | ~(A | B) --> ~A
+ // (~A & B) | ~(B | A) --> ~A
+ // (B & ~A) | ~(A | B) --> ~A
+ // (B & ~A) | ~(B | A) --> ~A
+ if (match(Op0, m_c_And(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))),
+ m_Value(B))) &&
+ match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return NotA;
+
+ // Commute the 'or' operands.
+ // ~(A | B) | (~A & B) --> ~A
+ // ~(B | A) | (~A & B) --> ~A
+ // ~(A | B) | (B & ~A) --> ~A
+ // ~(B | A) | (B & ~A) --> ~A
+ if (match(Op1, m_c_And(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))),
+ m_Value(B))) &&
+ match(Op0, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return NotA;
+
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
@@ -2253,16 +2308,25 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return V;
// Or distributes over And. Try some generic simplifications based on this.
- if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q,
- MaxRecurse))
+ if (Value *V = expandCommutativeBinOp(Instruction::Or, Op0, Op1,
+ Instruction::And, Q, MaxRecurse))
return V;
- // If the operation is with the result of a select instruction, check whether
- // operating on either branch of the select always yields the same value.
- if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) {
+ if (Op0->getType()->isIntOrIntVectorTy(1)) {
+ // A | (A || B) -> A || B
+ if (match(Op1, m_Select(m_Specific(Op0), m_One(), m_Value())))
+ return Op1;
+ else if (match(Op0, m_Select(m_Specific(Op1), m_One(), m_Value())))
+ return Op0;
+ }
+ // If the operation is with the result of a select instruction, check
+ // whether operating on either branch of the select always yields the same
+ // value.
if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
return V;
+ }
// (A & C1)|(B & C2)
const APInt *C1, *C2;
@@ -2311,7 +2375,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return C;
// A ^ undef -> undef
- if (match(Op1, m_Undef()))
+ if (Q.isUndefValue(Op1))
return Op1;
// A ^ 0 = A
@@ -2327,6 +2391,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op1, m_Not(m_Specific(Op0))))
return Constant::getAllOnesValue(Op0->getType());
+ if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor))
+ return V;
+
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
MaxRecurse))
@@ -2533,8 +2600,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
// memory within the lifetime of the current function (allocas, byval
// arguments, globals), then determine the comparison result here.
SmallVector<const Value *, 8> LHSUObjs, RHSUObjs;
- GetUnderlyingObjects(LHS, LHSUObjs, DL);
- GetUnderlyingObjects(RHS, RHSUObjs, DL);
+ getUnderlyingObjects(LHS, LHSUObjs);
+ getUnderlyingObjects(RHS, RHSUObjs);
// Is the set of underlying objects all noalias calls?
auto IsNAC = [](ArrayRef<const Value *> Objects) {
@@ -2741,7 +2808,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
}
const APInt *C;
- if (!match(RHS, m_APInt(C)))
+ if (!match(RHS, m_APIntAllowUndef(C)))
return nullptr;
// Rule out tautological comparisons (eg., ult 0 or uge 0).
@@ -2759,17 +2826,166 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
return ConstantInt::getFalse(ITy);
}
+ // (mul nuw/nsw X, MulC) != C --> true (if C is not a multiple of MulC)
+ // (mul nuw/nsw X, MulC) == C --> false (if C is not a multiple of MulC)
+ const APInt *MulC;
+ if (ICmpInst::isEquality(Pred) &&
+ ((match(LHS, m_NUWMul(m_Value(), m_APIntAllowUndef(MulC))) &&
+ *MulC != 0 && C->urem(*MulC) != 0) ||
+ (match(LHS, m_NSWMul(m_Value(), m_APIntAllowUndef(MulC))) &&
+ *MulC != 0 && C->srem(*MulC) != 0)))
+ return ConstantInt::get(ITy, Pred == ICmpInst::ICMP_NE);
+
return nullptr;
}
+static Value *simplifyICmpWithBinOpOnLHS(
+ CmpInst::Predicate Pred, BinaryOperator *LBO, Value *RHS,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ Type *ITy = GetCompareTy(RHS); // The return type.
+
+ Value *Y = nullptr;
+ // icmp pred (or X, Y), X
+ if (match(LBO, m_c_Or(m_Value(Y), m_Specific(RHS)))) {
+ if (Pred == ICmpInst::ICMP_ULT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_UGE)
+ return getTrue(ITy);
+
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
+ KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (RHSKnown.isNonNegative() && YKnown.isNegative())
+ return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy);
+ if (RHSKnown.isNegative() || YKnown.isNonNegative())
+ return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy);
+ }
+ }
+
+ // icmp pred (and X, Y), X
+ if (match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) {
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ }
+
+ // icmp pred (urem X, Y), Y
+ if (match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: {
+ KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (!Known.isNonNegative())
+ break;
+ LLVM_FALLTHROUGH;
+ }
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return getFalse(ITy);
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: {
+ KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (!Known.isNonNegative())
+ break;
+ LLVM_FALLTHROUGH;
+ }
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return getTrue(ITy);
+ }
+ }
+
+ // icmp pred (urem X, Y), X
+ if (match(LBO, m_URem(m_Specific(RHS), m_Value()))) {
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ }
+
+ // x >> y <=u x
+ // x udiv y <=u x.
+ if (match(LBO, m_LShr(m_Specific(RHS), m_Value())) ||
+ match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) {
+ // icmp pred (X op Y), X
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ }
+
+ // (x*C1)/C2 <= x for C1 <= C2.
+ // This holds even if the multiplication overflows: Assume that x != 0 and
+ // arithmetic is modulo M. For overflow to occur we must have C1 >= M/x and
+ // thus C2 >= M/x. It follows that (x*C1)/C2 <= (M-1)/C2 <= ((M-1)*x)/M < x.
+ //
+ // Additionally, either the multiplication and division might be represented
+ // as shifts:
+ // (x*C1)>>C2 <= x for C1 < 2**C2.
+ // (x<<C1)/C2 <= x for 2**C1 < C2.
+ const APInt *C1, *C2;
+ if ((match(LBO, m_UDiv(m_Mul(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) &&
+ C1->ule(*C2)) ||
+ (match(LBO, m_LShr(m_Mul(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) &&
+ C1->ule(APInt(C2->getBitWidth(), 1) << *C2)) ||
+ (match(LBO, m_UDiv(m_Shl(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) &&
+ (APInt(C1->getBitWidth(), 1) << *C1).ule(*C2))) {
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ }
+
+ return nullptr;
+}
+
+
+// If only one of the icmp's operands has NSW flags, try to prove that:
+//
+// icmp slt (x + C1), (x +nsw C2)
+//
+// is equivalent to:
+//
+// icmp slt C1, C2
+//
+// which is true if x + C2 has the NSW flags set and:
+// *) C1 < C2 && C1 >= 0, or
+// *) C2 < C1 && C1 <= 0.
+//
+static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS) {
+ // TODO: only support icmp slt for now.
+ if (Pred != CmpInst::ICMP_SLT)
+ return false;
+
+ // Canonicalize nsw add as RHS.
+ if (!match(RHS, m_NSWAdd(m_Value(), m_Value())))
+ std::swap(LHS, RHS);
+ if (!match(RHS, m_NSWAdd(m_Value(), m_Value())))
+ return false;
+
+ Value *X;
+ const APInt *C1, *C2;
+ if (!match(LHS, m_c_Add(m_Value(X), m_APInt(C1))) ||
+ !match(RHS, m_c_Add(m_Specific(X), m_APInt(C2))))
+ return false;
+
+ return (C1->slt(*C2) && C1->isNonNegative()) ||
+ (C2->slt(*C1) && C1->isNonPositive());
+}
+
+
/// TODO: A large part of this logic is duplicated in InstCombine's
/// foldICmpBinOp(). We should be able to share that and avoid the code
/// duplication.
static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
- Type *ITy = GetCompareTy(LHS); // The return type.
-
BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
if (MaxRecurse && (LBO || RBO)) {
@@ -2813,8 +3029,9 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return V;
// icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
- if (A && C && (A == C || A == D || B == C || B == D) && NoLHSWrapProblem &&
- NoRHSWrapProblem) {
+ bool CanSimplify = (NoLHSWrapProblem && NoRHSWrapProblem) ||
+ trySimplifyICmpWithAdds(Pred, LHS, RHS);
+ if (A && C && (A == C || A == D || B == C || B == D) && CanSimplify) {
// Determine Y and Z in the form icmp (X+Y), (X+Z).
Value *Y, *Z;
if (A == C) {
@@ -2840,195 +3057,66 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
}
}
- {
- Value *Y = nullptr;
- // icmp pred (or X, Y), X
- if (LBO && match(LBO, m_c_Or(m_Value(Y), m_Specific(RHS)))) {
- if (Pred == ICmpInst::ICMP_ULT)
- return getFalse(ITy);
- if (Pred == ICmpInst::ICMP_UGE)
- return getTrue(ITy);
-
- if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
- KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (RHSKnown.isNonNegative() && YKnown.isNegative())
- return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy);
- if (RHSKnown.isNegative() || YKnown.isNonNegative())
- return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy);
- }
- }
- // icmp pred X, (or X, Y)
- if (RBO && match(RBO, m_c_Or(m_Value(Y), m_Specific(LHS)))) {
- if (Pred == ICmpInst::ICMP_ULE)
- return getTrue(ITy);
- if (Pred == ICmpInst::ICMP_UGT)
- return getFalse(ITy);
-
- if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) {
- KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (LHSKnown.isNonNegative() && YKnown.isNegative())
- return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy);
- if (LHSKnown.isNegative() || YKnown.isNonNegative())
- return Pred == ICmpInst::ICMP_SGT ? getFalse(ITy) : getTrue(ITy);
- }
- }
- }
+ if (LBO)
+ if (Value *V = simplifyICmpWithBinOpOnLHS(Pred, LBO, RHS, Q, MaxRecurse))
+ return V;
- // icmp pred (and X, Y), X
- if (LBO && match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) {
- if (Pred == ICmpInst::ICMP_UGT)
- return getFalse(ITy);
- if (Pred == ICmpInst::ICMP_ULE)
- return getTrue(ITy);
- }
- // icmp pred X, (and X, Y)
- if (RBO && match(RBO, m_c_And(m_Value(), m_Specific(LHS)))) {
- if (Pred == ICmpInst::ICMP_UGE)
- return getTrue(ITy);
- if (Pred == ICmpInst::ICMP_ULT)
- return getFalse(ITy);
- }
+ if (RBO)
+ if (Value *V = simplifyICmpWithBinOpOnLHS(
+ ICmpInst::getSwappedPredicate(Pred), RBO, LHS, Q, MaxRecurse))
+ return V;
// 0 - (zext X) pred C
if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) {
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
- if (RHSC->getValue().isStrictlyPositive()) {
- if (Pred == ICmpInst::ICMP_SLT)
- return ConstantInt::getTrue(RHSC->getContext());
- if (Pred == ICmpInst::ICMP_SGE)
- return ConstantInt::getFalse(RHSC->getContext());
- if (Pred == ICmpInst::ICMP_EQ)
- return ConstantInt::getFalse(RHSC->getContext());
- if (Pred == ICmpInst::ICMP_NE)
- return ConstantInt::getTrue(RHSC->getContext());
+ const APInt *C;
+ if (match(RHS, m_APInt(C))) {
+ if (C->isStrictlyPositive()) {
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue(GetCompareTy(RHS));
+ if (Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(GetCompareTy(RHS));
}
- if (RHSC->getValue().isNonNegative()) {
+ if (C->isNonNegative()) {
if (Pred == ICmpInst::ICMP_SLE)
- return ConstantInt::getTrue(RHSC->getContext());
+ return ConstantInt::getTrue(GetCompareTy(RHS));
if (Pred == ICmpInst::ICMP_SGT)
- return ConstantInt::getFalse(RHSC->getContext());
+ return ConstantInt::getFalse(GetCompareTy(RHS));
}
}
}
- // icmp pred (urem X, Y), Y
- if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
- switch (Pred) {
- default:
- break;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: {
- KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (!Known.isNonNegative())
- break;
- LLVM_FALLTHROUGH;
- }
- case ICmpInst::ICMP_EQ:
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE:
- return getFalse(ITy);
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: {
- KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (!Known.isNonNegative())
- break;
- LLVM_FALLTHROUGH;
- }
- case ICmpInst::ICMP_NE:
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE:
- return getTrue(ITy);
- }
- }
-
- // icmp pred X, (urem Y, X)
- if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
- switch (Pred) {
- default:
- break;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: {
- KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (!Known.isNonNegative())
- break;
- LLVM_FALLTHROUGH;
- }
- case ICmpInst::ICMP_NE:
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE:
- return getTrue(ITy);
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: {
- KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (!Known.isNonNegative())
- break;
- LLVM_FALLTHROUGH;
- }
- case ICmpInst::ICMP_EQ:
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE:
- return getFalse(ITy);
+ // If C2 is a power-of-2 and C is not:
+ // (C2 << X) == C --> false
+ // (C2 << X) != C --> true
+ const APInt *C;
+ if (match(LHS, m_Shl(m_Power2(), m_Value())) &&
+ match(RHS, m_APIntAllowUndef(C)) && !C->isPowerOf2()) {
+ // C2 << X can equal zero in some circumstances.
+ // This simplification might be unsafe if C is zero.
+ //
+ // We know it is safe if:
+ // - The shift is nsw. We can't shift out the one bit.
+ // - The shift is nuw. We can't shift out the one bit.
+ // - C2 is one.
+ // - C isn't zero.
+ if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
+ Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
+ match(LHS, m_Shl(m_One(), m_Value())) || !C->isNullValue()) {
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(GetCompareTy(RHS));
+ if (Pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue(GetCompareTy(RHS));
}
}
- // x >> y <=u x
- // x udiv y <=u x.
- if (LBO && (match(LBO, m_LShr(m_Specific(RHS), m_Value())) ||
- match(LBO, m_UDiv(m_Specific(RHS), m_Value())))) {
- // icmp pred (X op Y), X
+ // TODO: This is overly constrained. LHS can be any power-of-2.
+ // (1 << X) >u 0x8000 --> false
+ // (1 << X) <=u 0x8000 --> true
+ if (match(LHS, m_Shl(m_One(), m_Value())) && match(RHS, m_SignMask())) {
if (Pred == ICmpInst::ICMP_UGT)
- return getFalse(ITy);
+ return ConstantInt::getFalse(GetCompareTy(RHS));
if (Pred == ICmpInst::ICMP_ULE)
- return getTrue(ITy);
- }
-
- // x >=u x >> y
- // x >=u x udiv y.
- if (RBO && (match(RBO, m_LShr(m_Specific(LHS), m_Value())) ||
- match(RBO, m_UDiv(m_Specific(LHS), m_Value())))) {
- // icmp pred X, (X op Y)
- if (Pred == ICmpInst::ICMP_ULT)
- return getFalse(ITy);
- if (Pred == ICmpInst::ICMP_UGE)
- return getTrue(ITy);
- }
-
- // handle:
- // CI2 << X == CI
- // CI2 << X != CI
- //
- // where CI2 is a power of 2 and CI isn't
- if (auto *CI = dyn_cast<ConstantInt>(RHS)) {
- const APInt *CI2Val, *CIVal = &CI->getValue();
- if (LBO && match(LBO, m_Shl(m_APInt(CI2Val), m_Value())) &&
- CI2Val->isPowerOf2()) {
- if (!CIVal->isPowerOf2()) {
- // CI2 << X can equal zero in some circumstances,
- // this simplification is unsafe if CI is zero.
- //
- // We know it is safe if:
- // - The shift is nsw, we can't shift out the one bit.
- // - The shift is nuw, we can't shift out the one bit.
- // - CI2 is one
- // - CI isn't zero
- if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
- Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
- CI2Val->isOneValue() || !CI->isZero()) {
- if (Pred == ICmpInst::ICMP_EQ)
- return ConstantInt::getFalse(RHS->getContext());
- if (Pred == ICmpInst::ICMP_NE)
- return ConstantInt::getTrue(RHS->getContext());
- }
- }
- if (CIVal->isSignMask() && CI2Val->isOneValue()) {
- if (Pred == ICmpInst::ICMP_UGT)
- return ConstantInt::getFalse(RHS->getContext());
- if (Pred == ICmpInst::ICMP_ULE)
- return ConstantInt::getTrue(RHS->getContext());
- }
- }
+ return ConstantInt::getTrue(GetCompareTy(RHS));
}
if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
@@ -3226,55 +3314,38 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
break;
}
case CmpInst::ICMP_UGE:
- // Always true.
return getTrue(ITy);
case CmpInst::ICMP_ULT:
- // Always false.
return getFalse(ITy);
}
}
- // Variants on "max(x,y) >= min(x,z)".
+ // Comparing 1 each of min/max with a common operand?
+ // Canonicalize min operand to RHS.
+ if (match(LHS, m_UMin(m_Value(), m_Value())) ||
+ match(LHS, m_SMin(m_Value(), m_Value()))) {
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
Value *C, *D;
if (match(LHS, m_SMax(m_Value(A), m_Value(B))) &&
match(RHS, m_SMin(m_Value(C), m_Value(D))) &&
(A == C || A == D || B == C || B == D)) {
- // max(x, ?) pred min(x, ?).
+ // smax(A, B) >=s smin(A, D) --> true
if (Pred == CmpInst::ICMP_SGE)
- // Always true.
return getTrue(ITy);
+ // smax(A, B) <s smin(A, D) --> false
if (Pred == CmpInst::ICMP_SLT)
- // Always false.
- return getFalse(ITy);
- } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
- match(RHS, m_SMax(m_Value(C), m_Value(D))) &&
- (A == C || A == D || B == C || B == D)) {
- // min(x, ?) pred max(x, ?).
- if (Pred == CmpInst::ICMP_SLE)
- // Always true.
- return getTrue(ITy);
- if (Pred == CmpInst::ICMP_SGT)
- // Always false.
return getFalse(ITy);
} else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
(A == C || A == D || B == C || B == D)) {
- // max(x, ?) pred min(x, ?).
+ // umax(A, B) >=u umin(A, D) --> true
if (Pred == CmpInst::ICMP_UGE)
- // Always true.
return getTrue(ITy);
+ // umax(A, B) <u umin(A, D) --> false
if (Pred == CmpInst::ICMP_ULT)
- // Always false.
- return getFalse(ITy);
- } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
- match(RHS, m_UMax(m_Value(C), m_Value(D))) &&
- (A == C || A == D || B == C || B == D)) {
- // min(x, ?) pred max(x, ?).
- if (Pred == CmpInst::ICMP_ULE)
- // Always true.
- return getTrue(ITy);
- if (Pred == CmpInst::ICMP_UGT)
- // Always false.
return getFalse(ITy);
}
@@ -3327,12 +3398,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// For EQ and NE, we can always pick a value for the undef to make the
// predicate pass or fail, so we can return undef.
// Matches behavior in llvm::ConstantFoldCompareInstruction.
- if (isa<UndefValue>(RHS) && ICmpInst::isEquality(Pred))
+ if (Q.isUndefValue(RHS) && ICmpInst::isEquality(Pred))
return UndefValue::get(ITy);
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
- if (LHS == RHS || isa<UndefValue>(RHS))
+ if (LHS == RHS || Q.isUndefValue(RHS))
return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
if (Value *V = simplifyICmpOfBools(Pred, LHS, RHS, Q))
@@ -3588,7 +3659,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// expression GEP with the same indices and a null base pointer to see
// what constant folding can make out of it.
Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
- SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
+ SmallVector<Value *, 4> IndicesLHS(GLHS->indices());
Constant *NewLHS = ConstantExpr::getGetElementPtr(
GLHS->getSourceElementType(), Null, IndicesLHS);
@@ -3659,7 +3730,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// fcmp pred x, undef and fcmp pred undef, x
// fold to true if unordered, false if ordered
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
+ if (Q.isUndefValue(LHS) || Q.isUndefValue(RHS)) {
// Choosing NaN for the undef will always make unordered comparison succeed
// and ordered comparison fail.
return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred));
@@ -3703,6 +3774,21 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
}
}
+
+ // LHS == Inf
+ if (Pred == FCmpInst::FCMP_OEQ && isKnownNeverInfinity(LHS, Q.TLI))
+ return getFalse(RetTy);
+ // LHS != Inf
+ if (Pred == FCmpInst::FCMP_UNE && isKnownNeverInfinity(LHS, Q.TLI))
+ return getTrue(RetTy);
+ // LHS == Inf || LHS == NaN
+ if (Pred == FCmpInst::FCMP_UEQ && isKnownNeverInfinity(LHS, Q.TLI) &&
+ isKnownNeverNaN(LHS, Q.TLI))
+ return getFalse(RetTy);
+ // LHS != Inf && LHS != NaN
+ if (Pred == FCmpInst::FCMP_ONE && isKnownNeverInfinity(LHS, Q.TLI) &&
+ isKnownNeverNaN(LHS, Q.TLI))
+ return getTrue(RetTy);
}
if (C->isNegative() && !C->isNegZero()) {
assert(!C->isNaN() && "Unexpected NaN constant!");
@@ -3834,18 +3920,33 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// We can't replace %sel with %add unless we strip away the flags (which will
// be done in InstCombine).
// TODO: This is unsound, because it only catches some forms of refinement.
- if (!AllowRefinement && canCreatePoison(I))
+ if (!AllowRefinement && canCreatePoison(cast<Operator>(I)))
return nullptr;
+ // The simplification queries below may return the original value. Consider:
+ // %div = udiv i32 %arg, %arg2
+ // %mul = mul nsw i32 %div, %arg2
+ // %cmp = icmp eq i32 %mul, %arg
+ // %sel = select i1 %cmp, i32 %div, i32 undef
+ // Replacing %arg by %mul, %div becomes "udiv i32 %mul, %arg2", which
+ // simplifies back to %arg. This can only happen because %mul does not
+ // dominate %div. To ensure a consistent return value contract, we make sure
+ // that this case returns nullptr as well.
+ auto PreventSelfSimplify = [V](Value *Simplified) {
+ return Simplified != V ? Simplified : nullptr;
+ };
+
// If this is a binary operator, try to simplify it with the replaced op.
if (auto *B = dyn_cast<BinaryOperator>(I)) {
if (MaxRecurse) {
if (B->getOperand(0) == Op)
- return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q,
- MaxRecurse - 1);
+ return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), RepOp,
+ B->getOperand(1), Q,
+ MaxRecurse - 1));
if (B->getOperand(1) == Op)
- return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, Q,
- MaxRecurse - 1);
+ return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(),
+ B->getOperand(0), RepOp, Q,
+ MaxRecurse - 1));
}
}
@@ -3853,11 +3954,13 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
if (MaxRecurse) {
if (C->getOperand(0) == Op)
- return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), Q,
- MaxRecurse - 1);
+ return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), RepOp,
+ C->getOperand(1), Q,
+ MaxRecurse - 1));
if (C->getOperand(1) == Op)
- return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, Q,
- MaxRecurse - 1);
+ return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(),
+ C->getOperand(0), RepOp, Q,
+ MaxRecurse - 1));
}
}
@@ -3867,8 +3970,8 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
SmallVector<Value *, 8> NewOps(GEP->getNumOperands());
transform(GEP->operands(), NewOps.begin(),
[&](Value *V) { return V == Op ? RepOp : V; });
- return SimplifyGEPInst(GEP->getSourceElementType(), NewOps, Q,
- MaxRecurse - 1);
+ return PreventSelfSimplify(SimplifyGEPInst(GEP->getSourceElementType(),
+ NewOps, Q, MaxRecurse - 1));
}
}
@@ -3987,10 +4090,8 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
// Test for a bogus zero-shift-guard-op around funnel-shift or rotate.
Value *ShAmt;
- auto isFsh = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(),
- m_Value(ShAmt)),
- m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X),
- m_Value(ShAmt)));
+ auto isFsh = m_CombineOr(m_FShl(m_Value(X), m_Value(), m_Value(ShAmt)),
+ m_FShr(m_Value(), m_Value(X), m_Value(ShAmt)));
// (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
// (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
@@ -4001,17 +4102,24 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
// intrinsics do not have that problem.
// We do not allow this transform for the general funnel shift case because
// that would not preserve the poison safety of the original code.
- auto isRotate = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X),
- m_Deferred(X),
- m_Value(ShAmt)),
- m_Intrinsic<Intrinsic::fshr>(m_Value(X),
- m_Deferred(X),
- m_Value(ShAmt)));
+ auto isRotate =
+ m_CombineOr(m_FShl(m_Value(X), m_Deferred(X), m_Value(ShAmt)),
+ m_FShr(m_Value(X), m_Deferred(X), m_Value(ShAmt)));
// (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt)
// (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt)
if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt &&
Pred == ICmpInst::ICMP_EQ)
return FalseVal;
+
+ // X == 0 ? abs(X) : -abs(X) --> -abs(X)
+ // X == 0 ? -abs(X) : abs(X) --> abs(X)
+ if (match(TrueVal, m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS))) &&
+ match(FalseVal, m_Neg(m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS)))))
+ return FalseVal;
+ if (match(TrueVal,
+ m_Neg(m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS)))) &&
+ match(FalseVal, m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS))))
+ return FalseVal;
}
// Check for other compares that behave like bit test.
@@ -4019,10 +4127,12 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
TrueVal, FalseVal))
return V;
- // If we have an equality comparison, then we know the value in one of the
- // arms of the select. See if substituting this value into the arm and
+ // If we have a scalar equality comparison, then we know the value in one of
+ // the arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
- if (Pred == ICmpInst::ICMP_EQ) {
+ // Note that the equivalence/replacement opportunity does not hold for vectors
+ // because each element of a vector select is chosen independently.
+ if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) {
if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
/* AllowRefinement */ false, MaxRecurse) ==
TrueVal ||
@@ -4083,7 +4193,7 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
return ConstantFoldSelectInstruction(CondC, TrueC, FalseC);
// select undef, X, Y -> X or Y
- if (isa<UndefValue>(CondC))
+ if (Q.isUndefValue(CondC))
return isa<Constant>(FalseVal) ? FalseVal : TrueVal;
// TODO: Vector constants with undef elements don't simplify.
@@ -4109,16 +4219,24 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
if (TrueVal == FalseVal)
return TrueVal;
- if (isa<UndefValue>(TrueVal)) // select ?, undef, X -> X
+ // If the true or false value is undef, we can fold to the other value as
+ // long as the other value isn't poison.
+ // select ?, undef, X -> X
+ if (Q.isUndefValue(TrueVal) &&
+ isGuaranteedNotToBeUndefOrPoison(FalseVal, Q.AC, Q.CxtI, Q.DT))
return FalseVal;
- if (isa<UndefValue>(FalseVal)) // select ?, X, undef -> X
+ // select ?, X, undef -> X
+ if (Q.isUndefValue(FalseVal) &&
+ isGuaranteedNotToBeUndefOrPoison(TrueVal, Q.AC, Q.CxtI, Q.DT))
return TrueVal;
// Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC''
Constant *TrueC, *FalseC;
- if (TrueVal->getType()->isVectorTy() && match(TrueVal, m_Constant(TrueC)) &&
+ if (isa<FixedVectorType>(TrueVal->getType()) &&
+ match(TrueVal, m_Constant(TrueC)) &&
match(FalseVal, m_Constant(FalseC))) {
- unsigned NumElts = cast<VectorType>(TrueC->getType())->getNumElements();
+ unsigned NumElts =
+ cast<FixedVectorType>(TrueC->getType())->getNumElements();
SmallVector<Constant *, 16> NewC;
for (unsigned i = 0; i != NumElts; ++i) {
// Bail out on incomplete vector constants.
@@ -4131,9 +4249,11 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
// one element is undef, choose the defined element as the safe result.
if (TEltC == FEltC)
NewC.push_back(TEltC);
- else if (isa<UndefValue>(TEltC))
+ else if (Q.isUndefValue(TEltC) &&
+ isGuaranteedNotToBeUndefOrPoison(FEltC))
NewC.push_back(FEltC);
- else if (isa<UndefValue>(FEltC))
+ else if (Q.isUndefValue(FEltC) &&
+ isGuaranteedNotToBeUndefOrPoison(TEltC))
NewC.push_back(TEltC);
else
break;
@@ -4184,7 +4304,12 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType()))
GEPTy = VectorType::get(GEPTy, VT->getElementCount());
- if (isa<UndefValue>(Ops[0]))
+ // getelementptr poison, idx -> poison
+ // getelementptr baseptr, poison -> poison
+ if (any_of(Ops, [](const auto *V) { return isa<PoisonValue>(V); }))
+ return PoisonValue::get(GEPTy);
+
+ if (Q.isUndefValue(Ops[0]))
return UndefValue::get(GEPTy);
bool IsScalableVec = isa<ScalableVectorType>(SrcTy);
@@ -4207,9 +4332,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
// doesn't truncate the pointers.
if (Ops[1]->getType()->getScalarSizeInBits() ==
Q.DL.getPointerSizeInBits(AS)) {
- auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
- if (match(P, m_Zero()))
- return Constant::getNullValue(GEPTy);
+ auto PtrToInt = [GEPTy](Value *P) -> Value * {
Value *Temp;
if (match(P, m_PtrToInt(m_Value(Temp))))
if (Temp->getType() == GEPTy)
@@ -4217,10 +4340,14 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
return nullptr;
};
+ // FIXME: The following transforms are only legal if P and V have the
+ // same provenance (PR44403). Check whether getUnderlyingObject() is
+ // the same?
+
// getelementptr V, (sub P, V) -> P if P points to a type of size 1.
if (TyAllocSize == 1 &&
match(Ops[1], m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0])))))
- if (Value *R = PtrToIntOrZero(P))
+ if (Value *R = PtrToInt(P))
return R;
// getelementptr V, (ashr (sub P, V), C) -> Q
@@ -4229,7 +4356,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
m_AShr(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))),
m_ConstantInt(C))) &&
TyAllocSize == 1ULL << C)
- if (Value *R = PtrToIntOrZero(P))
+ if (Value *R = PtrToInt(P))
return R;
// getelementptr V, (sdiv (sub P, V), C) -> Q
@@ -4237,7 +4364,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
if (match(Ops[1],
m_SDiv(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))),
m_SpecificInt(TyAllocSize))))
- if (Value *R = PtrToIntOrZero(P))
+ if (Value *R = PtrToInt(P))
return R;
}
}
@@ -4254,15 +4381,21 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL,
BasePtrOffset);
+ // Avoid creating inttoptr of zero here: While LLVMs treatment of
+ // inttoptr is generally conservative, this particular case is folded to
+ // a null pointer, which will have incorrect provenance.
+
// gep (gep V, C), (sub 0, V) -> C
if (match(Ops.back(),
- m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr))))) {
+ m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr)))) &&
+ !BasePtrOffset.isNullValue()) {
auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset);
return ConstantExpr::getIntToPtr(CI, GEPTy);
}
// gep (gep V, C), (xor V, -1) -> C-1
if (match(Ops.back(),
- m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes()))) {
+ m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes())) &&
+ !BasePtrOffset.isOneValue()) {
auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1);
return ConstantExpr::getIntToPtr(CI, GEPTy);
}
@@ -4293,7 +4426,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
// insertvalue x, undef, n -> x
- if (match(Val, m_Undef()))
+ if (Q.isUndefValue(Val))
return Agg;
// insertvalue x, (extractvalue y, n), n
@@ -4301,7 +4434,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
if (EV->getAggregateOperand()->getType() == Agg->getType() &&
EV->getIndices() == Idxs) {
// insertvalue undef, (extractvalue y, n), n -> y
- if (match(Agg, m_Undef()))
+ if (Q.isUndefValue(Agg))
return EV->getAggregateOperand();
// insertvalue y, (extractvalue y, n), n -> y
@@ -4325,22 +4458,23 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
auto *ValC = dyn_cast<Constant>(Val);
auto *IdxC = dyn_cast<Constant>(Idx);
if (VecC && ValC && IdxC)
- return ConstantFoldInsertElementInstruction(VecC, ValC, IdxC);
+ return ConstantExpr::getInsertElement(VecC, ValC, IdxC);
- // For fixed-length vector, fold into undef if index is out of bounds.
+ // For fixed-length vector, fold into poison if index is out of bounds.
if (auto *CI = dyn_cast<ConstantInt>(Idx)) {
if (isa<FixedVectorType>(Vec->getType()) &&
CI->uge(cast<FixedVectorType>(Vec->getType())->getNumElements()))
- return UndefValue::get(Vec->getType());
+ return PoisonValue::get(Vec->getType());
}
// If index is undef, it might be out of bounds (see above case)
- if (isa<UndefValue>(Idx))
- return UndefValue::get(Vec->getType());
+ if (Q.isUndefValue(Idx))
+ return PoisonValue::get(Vec->getType());
- // If the scalar is undef, and there is no risk of propagating poison from the
- // vector value, simplify to the vector value.
- if (isa<UndefValue>(Val) && isGuaranteedNotToBeUndefOrPoison(Vec))
+ // If the scalar is poison, or it is undef and there is no risk of
+ // propagating poison from the vector value, simplify to the vector value.
+ if (isa<PoisonValue>(Val) ||
+ (Q.isUndefValue(Val) && isGuaranteedNotToBePoison(Vec)))
return Vec;
// If we are extracting a value from a vector, then inserting it into the same
@@ -4384,18 +4518,18 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
/// Given operands for an ExtractElementInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &,
- unsigned) {
+static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx,
+ const SimplifyQuery &Q, unsigned) {
auto *VecVTy = cast<VectorType>(Vec->getType());
if (auto *CVec = dyn_cast<Constant>(Vec)) {
if (auto *CIdx = dyn_cast<Constant>(Idx))
- return ConstantFoldExtractElementInstruction(CVec, CIdx);
+ return ConstantExpr::getExtractElement(CVec, CIdx);
// The index is not relevant if our vector is a splat.
if (auto *Splat = CVec->getSplatValue())
return Splat;
- if (isa<UndefValue>(Vec))
+ if (Q.isUndefValue(Vec))
return UndefValue::get(VecVTy->getElementType());
}
@@ -4404,16 +4538,16 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ
if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
// For fixed-length vector, fold into undef if index is out of bounds.
if (isa<FixedVectorType>(VecVTy) &&
- IdxC->getValue().uge(VecVTy->getNumElements()))
- return UndefValue::get(VecVTy->getElementType());
+ IdxC->getValue().uge(cast<FixedVectorType>(VecVTy)->getNumElements()))
+ return PoisonValue::get(VecVTy->getElementType());
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
return Elt;
}
// An undef extract index can be arbitrarily chosen to be an out-of-range
- // index value, which would result in the instruction being undef.
- if (isa<UndefValue>(Idx))
- return UndefValue::get(VecVTy->getElementType());
+ // index value, which would result in the instruction being poison.
+ if (Q.isUndefValue(Idx))
+ return PoisonValue::get(VecVTy->getElementType());
return nullptr;
}
@@ -4425,6 +4559,10 @@ Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx,
/// See if we can fold the given phi. If not, returns null.
static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) {
+ // WARNING: no matter how worthwhile it may seem, we can not perform PHI CSE
+ // here, because the PHI we may succeed simplifying to was not
+ // def-reachable from the original PHI!
+
// If all of the PHI's incoming values are the same then replace the PHI node
// with the common value.
Value *CommonValue = nullptr;
@@ -4432,7 +4570,7 @@ static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) {
for (Value *Incoming : PN->incoming_values()) {
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PN) continue;
- if (isa<UndefValue>(Incoming)) {
+ if (Q.isUndefValue(Incoming)) {
// Remember that we saw an undef value, but otherwise ignore them.
HasUndefInput = true;
continue;
@@ -4510,7 +4648,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
return nullptr;
// The mask value chooses which source operand we need to look at next.
- int InVecNumElts = cast<VectorType>(Op0->getType())->getNumElements();
+ int InVecNumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
int RootElt = MaskVal;
Value *SourceOp = Op0;
if (MaskVal >= InVecNumElts) {
@@ -4557,16 +4695,16 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
unsigned MaskNumElts = Mask.size();
ElementCount InVecEltCount = InVecTy->getElementCount();
- bool Scalable = InVecEltCount.Scalable;
+ bool Scalable = InVecEltCount.isScalable();
SmallVector<int, 32> Indices;
Indices.assign(Mask.begin(), Mask.end());
// Canonicalization: If mask does not select elements from an input vector,
- // replace that input vector with undef.
+ // replace that input vector with poison.
if (!Scalable) {
bool MaskSelects0 = false, MaskSelects1 = false;
- unsigned InVecNumElts = InVecEltCount.Min;
+ unsigned InVecNumElts = InVecEltCount.getKnownMinValue();
for (unsigned i = 0; i != MaskNumElts; ++i) {
if (Indices[i] == -1)
continue;
@@ -4576,9 +4714,9 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
MaskSelects1 = true;
}
if (!MaskSelects0)
- Op0 = UndefValue::get(InVecTy);
+ Op0 = PoisonValue::get(InVecTy);
if (!MaskSelects1)
- Op1 = UndefValue::get(InVecTy);
+ Op1 = PoisonValue::get(InVecTy);
}
auto *Op0Const = dyn_cast<Constant>(Op0);
@@ -4587,15 +4725,16 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
// If all operands are constant, constant fold the shuffle. This
// transformation depends on the value of the mask which is not known at
// compile time for scalable vectors
- if (!Scalable && Op0Const && Op1Const)
- return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);
+ if (Op0Const && Op1Const)
+ return ConstantExpr::getShuffleVector(Op0Const, Op1Const, Mask);
// Canonicalization: if only one input vector is constant, it shall be the
// second one. This transformation depends on the value of the mask which
// is not known at compile time for scalable vectors
if (!Scalable && Op0Const && !Op1Const) {
std::swap(Op0, Op1);
- ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min);
+ ShuffleVectorInst::commuteShuffleMask(Indices,
+ InVecEltCount.getKnownMinValue());
}
// A splat of an inserted scalar constant becomes a vector constant:
@@ -4627,7 +4766,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
// A shuffle of a splat is always the splat itself. Legal if the shuffle's
// value type is same as the input vectors' type.
if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0))
- if (isa<UndefValue>(Op1) && RetTy == InVecTy &&
+ if (Q.isUndefValue(Op1) && RetTy == InVecTy &&
is_splat(OpShuf->getShuffleMask()))
return Op0;
@@ -4639,7 +4778,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
// Don't fold a shuffle with undef mask elements. This may get folded in a
// better way using demanded bits or other analysis.
// TODO: Should we allow this?
- if (find(Indices, -1) != Indices.end())
+ if (is_contained(Indices, -1))
return nullptr;
// Check if every element of this shuffle can be mapped back to the
@@ -4708,19 +4847,20 @@ static Constant *propagateNaN(Constant *In) {
/// transforms based on undef/NaN because the operation itself makes no
/// difference to the result.
static Constant *simplifyFPOp(ArrayRef<Value *> Ops,
- FastMathFlags FMF = FastMathFlags()) {
+ FastMathFlags FMF,
+ const SimplifyQuery &Q) {
for (Value *V : Ops) {
bool IsNan = match(V, m_NaN());
bool IsInf = match(V, m_Inf());
- bool IsUndef = match(V, m_Undef());
+ bool IsUndef = Q.isUndefValue(V);
// If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand
// (an undef operand can be chosen to be Nan/Inf), then the result of
- // this operation is poison. That result can be relaxed to undef.
+ // this operation is poison.
if (FMF.noNaNs() && (IsNan || IsUndef))
- return UndefValue::get(V->getType());
+ return PoisonValue::get(V->getType());
if (FMF.noInfs() && (IsInf || IsUndef))
- return UndefValue::get(V->getType());
+ return PoisonValue::get(V->getType());
if (IsUndef || IsNan)
return propagateNaN(cast<Constant>(V));
@@ -4735,7 +4875,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPOp({Op0, Op1}, FMF))
+ if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q))
return C;
// fadd X, -0 ==> X
@@ -4782,7 +4922,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPOp({Op0, Op1}, FMF))
+ if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q))
return C;
// fsub X, +0 ==> X
@@ -4824,7 +4964,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse) {
- if (Constant *C = simplifyFPOp({Op0, Op1}, FMF))
+ if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q))
return C;
// fmul X, 1.0 ==> X
@@ -4891,7 +5031,7 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPOp({Op0, Op1}, FMF))
+ if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q))
return C;
// X / 1.0 -> X
@@ -4936,7 +5076,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPOp({Op0, Op1}, FMF))
+ if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q))
return C;
// Unlike fdiv, the result of frem always matches the sign of the dividend.
@@ -5181,6 +5321,15 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
// bitreverse(bitreverse(x)) -> x
if (match(Op0, m_BitReverse(m_Value(X)))) return X;
break;
+ case Intrinsic::ctpop: {
+ // If everything but the lowest bit is zero, that bit is the pop-count. Ex:
+ // ctpop(and X, 1) --> and X, 1
+ unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ if (MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, BitWidth - 1),
+ Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ return Op0;
+ break;
+ }
case Intrinsic::exp:
// exp(log(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
@@ -5233,27 +5382,156 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
return nullptr;
}
+static Intrinsic::ID getMaxMinOpposite(Intrinsic::ID IID) {
+ switch (IID) {
+ case Intrinsic::smax: return Intrinsic::smin;
+ case Intrinsic::smin: return Intrinsic::smax;
+ case Intrinsic::umax: return Intrinsic::umin;
+ case Intrinsic::umin: return Intrinsic::umax;
+ default: llvm_unreachable("Unexpected intrinsic");
+ }
+}
+
+static APInt getMaxMinLimit(Intrinsic::ID IID, unsigned BitWidth) {
+ switch (IID) {
+ case Intrinsic::smax: return APInt::getSignedMaxValue(BitWidth);
+ case Intrinsic::smin: return APInt::getSignedMinValue(BitWidth);
+ case Intrinsic::umax: return APInt::getMaxValue(BitWidth);
+ case Intrinsic::umin: return APInt::getMinValue(BitWidth);
+ default: llvm_unreachable("Unexpected intrinsic");
+ }
+}
+
+static ICmpInst::Predicate getMaxMinPredicate(Intrinsic::ID IID) {
+ switch (IID) {
+ case Intrinsic::smax: return ICmpInst::ICMP_SGE;
+ case Intrinsic::smin: return ICmpInst::ICMP_SLE;
+ case Intrinsic::umax: return ICmpInst::ICMP_UGE;
+ case Intrinsic::umin: return ICmpInst::ICMP_ULE;
+ default: llvm_unreachable("Unexpected intrinsic");
+ }
+}
+
+/// Given a min/max intrinsic, see if it can be removed based on having an
+/// operand that is another min/max intrinsic with shared operand(s). The caller
+/// is expected to swap the operand arguments to handle commutation.
+static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) {
+ Value *X, *Y;
+ if (!match(Op0, m_MaxOrMin(m_Value(X), m_Value(Y))))
+ return nullptr;
+
+ auto *MM0 = dyn_cast<IntrinsicInst>(Op0);
+ if (!MM0)
+ return nullptr;
+ Intrinsic::ID IID0 = MM0->getIntrinsicID();
+
+ if (Op1 == X || Op1 == Y ||
+ match(Op1, m_c_MaxOrMin(m_Specific(X), m_Specific(Y)))) {
+ // max (max X, Y), X --> max X, Y
+ if (IID0 == IID)
+ return MM0;
+ // max (min X, Y), X --> X
+ if (IID0 == getMaxMinOpposite(IID))
+ return Op1;
+ }
+ return nullptr;
+}
+
static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
const SimplifyQuery &Q) {
Intrinsic::ID IID = F->getIntrinsicID();
Type *ReturnType = F->getReturnType();
+ unsigned BitWidth = ReturnType->getScalarSizeInBits();
switch (IID) {
+ case Intrinsic::abs:
+ // abs(abs(x)) -> abs(x). We don't need to worry about the nsw arg here.
+ // It is always ok to pick the earlier abs. We'll just lose nsw if its only
+ // on the outer abs.
+ if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(), m_Value())))
+ return Op0;
+ break;
+
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin: {
+ // If the arguments are the same, this is a no-op.
+ if (Op0 == Op1)
+ return Op0;
+
+ // Canonicalize constant operand as Op1.
+ if (isa<Constant>(Op0))
+ std::swap(Op0, Op1);
+
+ // Assume undef is the limit value.
+ if (Q.isUndefValue(Op1))
+ return ConstantInt::get(ReturnType, getMaxMinLimit(IID, BitWidth));
+
+ const APInt *C;
+ if (match(Op1, m_APIntAllowUndef(C))) {
+ // Clamp to limit value. For example:
+ // umax(i8 %x, i8 255) --> 255
+ if (*C == getMaxMinLimit(IID, BitWidth))
+ return ConstantInt::get(ReturnType, *C);
+
+ // If the constant op is the opposite of the limit value, the other must
+ // be larger/smaller or equal. For example:
+ // umin(i8 %x, i8 255) --> %x
+ if (*C == getMaxMinLimit(getMaxMinOpposite(IID), BitWidth))
+ return Op0;
+
+ // Remove nested call if constant operands allow it. Example:
+ // max (max X, 7), 5 -> max X, 7
+ auto *MinMax0 = dyn_cast<IntrinsicInst>(Op0);
+ if (MinMax0 && MinMax0->getIntrinsicID() == IID) {
+ // TODO: loosen undef/splat restrictions for vector constants.
+ Value *M00 = MinMax0->getOperand(0), *M01 = MinMax0->getOperand(1);
+ const APInt *InnerC;
+ if ((match(M00, m_APInt(InnerC)) || match(M01, m_APInt(InnerC))) &&
+ ((IID == Intrinsic::smax && InnerC->sge(*C)) ||
+ (IID == Intrinsic::smin && InnerC->sle(*C)) ||
+ (IID == Intrinsic::umax && InnerC->uge(*C)) ||
+ (IID == Intrinsic::umin && InnerC->ule(*C))))
+ return Op0;
+ }
+ }
+
+ if (Value *V = foldMinMaxSharedOp(IID, Op0, Op1))
+ return V;
+ if (Value *V = foldMinMaxSharedOp(IID, Op1, Op0))
+ return V;
+
+ ICmpInst::Predicate Pred = getMaxMinPredicate(IID);
+ if (isICmpTrue(Pred, Op0, Op1, Q.getWithoutUndef(), RecursionLimit))
+ return Op0;
+ if (isICmpTrue(Pred, Op1, Op0, Q.getWithoutUndef(), RecursionLimit))
+ return Op1;
+
+ if (Optional<bool> Imp =
+ isImpliedByDomCondition(Pred, Op0, Op1, Q.CxtI, Q.DL))
+ return *Imp ? Op0 : Op1;
+ if (Optional<bool> Imp =
+ isImpliedByDomCondition(Pred, Op1, Op0, Q.CxtI, Q.DL))
+ return *Imp ? Op1 : Op0;
+
+ break;
+ }
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
// X - X -> { 0, false }
- if (Op0 == Op1)
+ // X - undef -> { 0, false }
+ // undef - X -> { 0, false }
+ if (Op0 == Op1 || Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return Constant::getNullValue(ReturnType);
- LLVM_FALLTHROUGH;
+ break;
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
- // X - undef -> { undef, false }
- // undef - X -> { undef, false }
- // X + undef -> { undef, false }
- // undef + x -> { undef, false }
- if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) {
+ // X + undef -> { -1, false }
+ // undef + x -> { -1, false }
+ if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) {
return ConstantStruct::get(
cast<StructType>(ReturnType),
- {UndefValue::get(ReturnType->getStructElementType(0)),
+ {Constant::getAllOnesValue(ReturnType->getStructElementType(0)),
Constant::getNullValue(ReturnType->getStructElementType(1))});
}
break;
@@ -5265,7 +5543,7 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
return Constant::getNullValue(ReturnType);
// undef * X -> { 0, false }
// X * undef -> { 0, false }
- if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return Constant::getNullValue(ReturnType);
break;
case Intrinsic::uadd_sat:
@@ -5279,7 +5557,7 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
// sat(undef + X) -> -1
// For unsigned: Assume undef is MAX, thus we saturate to MAX (-1).
// For signed: Assume undef is ~X, in which case X + ~X = -1.
- if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return Constant::getAllOnesValue(ReturnType);
// X + 0 -> X
@@ -5296,7 +5574,7 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
LLVM_FALLTHROUGH;
case Intrinsic::ssub_sat:
// X - X -> 0, X - undef -> 0, undef - X -> 0
- if (Op0 == Op1 || match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ if (Op0 == Op1 || Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return Constant::getNullValue(ReturnType);
// X - 0 -> X
if (match(Op1, m_Zero()))
@@ -5334,18 +5612,43 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
// If the arguments are the same, this is a no-op.
if (Op0 == Op1) return Op0;
- // If one argument is undef, return the other argument.
- if (match(Op0, m_Undef()))
- return Op1;
- if (match(Op1, m_Undef()))
+ // Canonicalize constant operand as Op1.
+ if (isa<Constant>(Op0))
+ std::swap(Op0, Op1);
+
+ // If an argument is undef, return the other argument.
+ if (Q.isUndefValue(Op1))
return Op0;
- // If one argument is NaN, return other or NaN appropriately.
bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
- if (match(Op0, m_NaN()))
- return PropagateNaN ? Op0 : Op1;
+ bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum;
+
+ // minnum(X, nan) -> X
+ // maxnum(X, nan) -> X
+ // minimum(X, nan) -> nan
+ // maximum(X, nan) -> nan
if (match(Op1, m_NaN()))
- return PropagateNaN ? Op1 : Op0;
+ return PropagateNaN ? propagateNaN(cast<Constant>(Op1)) : Op0;
+
+ // In the following folds, inf can be replaced with the largest finite
+ // float, if the ninf flag is set.
+ const APFloat *C;
+ if (match(Op1, m_APFloat(C)) &&
+ (C->isInfinity() || (Q.CxtI->hasNoInfs() && C->isLargest()))) {
+ // minnum(X, -inf) -> -inf
+ // maxnum(X, +inf) -> +inf
+ // minimum(X, -inf) -> -inf if nnan
+ // maximum(X, +inf) -> +inf if nnan
+ if (C->isNegative() == IsMin && (!PropagateNaN || Q.CxtI->hasNoNaNs()))
+ return ConstantFP::get(ReturnType, *C);
+
+ // minnum(X, +inf) -> X if nnan
+ // maxnum(X, -inf) -> X if nnan
+ // minimum(X, +inf) -> X
+ // maximum(X, -inf) -> X
+ if (C->isNegative() != IsMin && (PropagateNaN || Q.CxtI->hasNoNaNs()))
+ return Op0;
+ }
// Min/max of the same operation with common operand:
// m(m(X, Y)), X --> m(X, Y) (4 commuted variants)
@@ -5358,20 +5661,6 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
(M1->getOperand(0) == Op0 || M1->getOperand(1) == Op0))
return Op1;
- // min(X, -Inf) --> -Inf (and commuted variant)
- // max(X, +Inf) --> +Inf (and commuted variant)
- bool UseNegInf = IID == Intrinsic::minnum || IID == Intrinsic::minimum;
- const APFloat *C;
- if ((match(Op0, m_APFloat(C)) && C->isInfinity() &&
- C->isNegative() == UseNegInf) ||
- (match(Op1, m_APFloat(C)) && C->isInfinity() &&
- C->isNegative() == UseNegInf))
- return ConstantFP::getInfinity(ReturnType, UseNegInf);
-
- // TODO: minnum(nnan x, inf) -> x
- // TODO: minnum(nnan ninf x, flt_max) -> x
- // TODO: maxnum(nnan x, -inf) -> x
- // TODO: maxnum(nnan ninf x, -flt_max) -> x
break;
}
default:
@@ -5414,11 +5703,11 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
*ShAmtArg = Call->getArgOperand(2);
// If both operands are undef, the result is undef.
- if (match(Op0, m_Undef()) && match(Op1, m_Undef()))
+ if (Q.isUndefValue(Op0) && Q.isUndefValue(Op1))
return UndefValue::get(F->getReturnType());
// If shift amount is undef, assume it is zero.
- if (match(ShAmtArg, m_Undef()))
+ if (Q.isUndefValue(ShAmtArg))
return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
const APInt *ShAmtC;
@@ -5435,7 +5724,7 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
Value *Op0 = Call->getArgOperand(0);
Value *Op1 = Call->getArgOperand(1);
Value *Op2 = Call->getArgOperand(2);
- if (Value *V = simplifyFPOp({ Op0, Op1, Op2 }))
+ if (Value *V = simplifyFPOp({ Op0, Op1, Op2 }, {}, Q))
return V;
return nullptr;
}
@@ -5444,28 +5733,9 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
}
}
-Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
- Value *Callee = Call->getCalledOperand();
-
- // musttail calls can only be simplified if they are also DCEd.
- // As we can't guarantee this here, don't simplify them.
- if (Call->isMustTailCall())
- return nullptr;
-
- // call undef -> undef
- // call null -> undef
- if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee))
- return UndefValue::get(Call->getType());
-
- Function *F = dyn_cast<Function>(Callee);
- if (!F)
- return nullptr;
-
- if (F->isIntrinsic())
- if (Value *Ret = simplifyIntrinsic(Call, Q))
- return Ret;
-
- if (!canConstantFoldCallTo(Call, F))
+static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) {
+ auto *F = dyn_cast<Function>(Call->getCalledOperand());
+ if (!F || !canConstantFoldCallTo(Call, F))
return nullptr;
SmallVector<Constant *, 4> ConstantArgs;
@@ -5484,10 +5754,33 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
}
+Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
+ // musttail calls can only be simplified if they are also DCEd.
+ // As we can't guarantee this here, don't simplify them.
+ if (Call->isMustTailCall())
+ return nullptr;
+
+ // call undef -> poison
+ // call null -> poison
+ Value *Callee = Call->getCalledOperand();
+ if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee))
+ return PoisonValue::get(Call->getType());
+
+ if (Value *V = tryConstantFoldCall(Call, Q))
+ return V;
+
+ auto *F = dyn_cast<Function>(Callee);
+ if (F && F->isIntrinsic())
+ if (Value *Ret = simplifyIntrinsic(Call, Q))
+ return Ret;
+
+ return nullptr;
+}
+
/// Given operands for a Freeze, see if we can fold the result.
static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
// Use a utility function defined in ValueTracking.
- if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0, Q.CxtI, Q.DT))
+ if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT))
return Op0;
// We have room for improvement.
return nullptr;
@@ -5596,7 +5889,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
I->getOperand(2), Q);
break;
case Instruction::GetElementPtr: {
- SmallVector<Value *, 8> Ops(I->op_begin(), I->op_end());
+ SmallVector<Value *, 8> Ops(I->operands());
Result = SimplifyGEPInst(cast<GetElementPtrInst>(I)->getSourceElementType(),
Ops, Q);
break;
@@ -5733,13 +6026,6 @@ static bool replaceAndRecursivelySimplifyImpl(
return Simplified;
}
-bool llvm::recursivelySimplifyInstruction(Instruction *I,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionCache *AC) {
- return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC, nullptr);
-}
-
bool llvm::replaceAndRecursivelySimplify(
Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Interval.cpp b/contrib/llvm-project/llvm/lib/Analysis/Interval.cpp
index 07d6e27c13be..e228ec4f2126 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Interval.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Interval.cpp
@@ -22,17 +22,6 @@ using namespace llvm;
// Interval Implementation
//===----------------------------------------------------------------------===//
-// isLoop - Find out if there is a back edge in this interval...
-bool Interval::isLoop() const {
- // There is a loop in this interval iff one of the predecessors of the header
- // node lives in the interval.
- for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
- I != E; ++I)
- if (contains(*I))
- return true;
- return false;
-}
-
void Interval::print(raw_ostream &OS) const {
OS << "-------------------------------------------------------------\n"
<< "Interval Contents:\n";
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp
index efded17cef4e..f2c85a69f125 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -19,6 +19,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@@ -255,6 +256,24 @@ void LazyCallGraph::SCC::verify() {
"Must set low link to -1 when adding a node to an SCC!");
for (Edge &E : **N)
assert(E.getNode().isPopulated() && "Can't have an unpopulated node!");
+
+#ifdef EXPENSIVE_CHECKS
+ // Verify that all nodes in this SCC can reach all other nodes.
+ SmallVector<Node *, 4> Worklist;
+ SmallPtrSet<Node *, 4> Visited;
+ Worklist.push_back(N);
+ while (!Worklist.empty()) {
+ Node *VisitingNode = Worklist.pop_back_val();
+ if (!Visited.insert(VisitingNode).second)
+ continue;
+ for (Edge &E : (*VisitingNode)->calls())
+ Worklist.push_back(&E.getNode());
+ }
+ for (Node *NodeToVisit : Nodes) {
+ assert(Visited.contains(NodeToVisit) &&
+ "Cannot reach all nodes within SCC");
+ }
+#endif
}
}
#endif
@@ -357,6 +376,31 @@ void LazyCallGraph::RefSCC::verify() {
}
}
}
+
+#ifdef EXPENSIVE_CHECKS
+ // Verify that all nodes in this RefSCC can reach all other nodes.
+ SmallVector<Node *> Nodes;
+ for (SCC *C : SCCs) {
+ for (Node &N : *C)
+ Nodes.push_back(&N);
+ }
+ for (Node *N : Nodes) {
+ SmallVector<Node *, 4> Worklist;
+ SmallPtrSet<Node *, 4> Visited;
+ Worklist.push_back(N);
+ while (!Worklist.empty()) {
+ Node *VisitingNode = Worklist.pop_back_val();
+ if (!Visited.insert(VisitingNode).second)
+ continue;
+ for (Edge &E : **VisitingNode)
+ Worklist.push_back(&E.getNode());
+ }
+ for (Node *NodeToVisit : Nodes) {
+ assert(Visited.contains(NodeToVisit) &&
+ "Cannot reach all nodes within RefSCC");
+ }
+ }
+#endif
}
#endif
@@ -822,7 +866,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
PendingSCCStack.clear();
while (!DFSStack.empty())
OldSCC.Nodes.push_back(DFSStack.pop_back_val().first);
- for (Node &N : make_range(OldSCC.begin() + OldSize, OldSCC.end())) {
+ for (Node &N : drop_begin(OldSCC, OldSize)) {
N.DFSNumber = N.LowLink = -1;
G->SCCMap[&N] = &OldSCC;
}
@@ -1373,23 +1417,6 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN,
return Result;
}
-void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN,
- Node &TargetN) {
- // The only trivial case that requires any graph updates is when we add new
- // ref edge and may connect different RefSCCs along that path. This is only
- // because of the parents set. Every other part of the graph remains constant
- // after this edge insertion.
- assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
- RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
- if (&TargetRC == this)
- return;
-
-#ifdef EXPENSIVE_CHECKS
- assert(TargetRC.isDescendantOf(*this) &&
- "Target must be a descendant of the Source.");
-#endif
-}
-
void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN,
Node &TargetN) {
#ifndef NDEBUG
@@ -1420,9 +1447,6 @@ void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN,
// Create the new edge.
SourceN->Edges.emplace_back(TargetN, Edge::Call);
}
-
- // Now that we have the edge, handle the graph fallout.
- handleTrivialEdgeInsertion(SourceN, TargetN);
}
void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) {
@@ -1449,9 +1473,6 @@ void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) {
// Create the new edge.
SourceN->Edges.emplace_back(TargetN, Edge::Ref);
-
- // Now that we have the edge, handle the graph fallout.
- handleTrivialEdgeInsertion(SourceN, TargetN);
}
void LazyCallGraph::RefSCC::replaceNodeFunction(Node &N, Function &NewF) {
@@ -1565,19 +1586,213 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
// allocators.
}
-void LazyCallGraph::addNewFunctionIntoSCC(Function &NewF, SCC &C) {
- addNodeToSCC(C, createNode(NewF));
+// Gets the Edge::Kind from one function to another by looking at the function's
+// instructions. Asserts if there is no edge.
+// Useful for determining what type of edge should exist between functions when
+// the edge hasn't been created yet.
+static LazyCallGraph::Edge::Kind getEdgeKind(Function &OriginalFunction,
+ Function &NewFunction) {
+ // In release builds, assume that if there are no direct calls to the new
+ // function, then there is a ref edge. In debug builds, keep track of
+ // references to assert that there is actually a ref edge if there is no call
+ // edge.
+#ifndef NDEBUG
+ SmallVector<Constant *, 16> Worklist;
+ SmallPtrSet<Constant *, 16> Visited;
+#endif
+
+ for (Instruction &I : instructions(OriginalFunction)) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (Function *Callee = CB->getCalledFunction()) {
+ if (Callee == &NewFunction)
+ return LazyCallGraph::Edge::Kind::Call;
+ }
+ }
+#ifndef NDEBUG
+ for (Value *Op : I.operand_values()) {
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ if (Visited.insert(C).second)
+ Worklist.push_back(C);
+ }
+ }
+#endif
+ }
+
+#ifndef NDEBUG
+ bool FoundNewFunction = false;
+ LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &F) {
+ if (&F == &NewFunction)
+ FoundNewFunction = true;
+ });
+ assert(FoundNewFunction && "No edge from original function to new function");
+#endif
+
+ return LazyCallGraph::Edge::Kind::Ref;
}
-void LazyCallGraph::addNewFunctionIntoRefSCC(Function &NewF, RefSCC &RC) {
- Node &N = createNode(NewF);
+void LazyCallGraph::addSplitFunction(Function &OriginalFunction,
+ Function &NewFunction) {
+ assert(lookup(OriginalFunction) &&
+ "Original function's node should already exist");
+ Node &OriginalN = get(OriginalFunction);
+ SCC *OriginalC = lookupSCC(OriginalN);
+ RefSCC *OriginalRC = lookupRefSCC(OriginalN);
- auto *C = createSCC(RC, SmallVector<Node *, 1>());
- addNodeToSCC(*C, N);
+#ifndef NDEBUG
+ OriginalRC->verify();
+ auto VerifyOnExit = make_scope_exit([&]() { OriginalRC->verify(); });
+#endif
- auto Index = RC.SCCIndices.size();
- RC.SCCIndices[C] = Index;
- RC.SCCs.push_back(C);
+ assert(!lookup(NewFunction) &&
+ "New function's node should not already exist");
+ Node &NewN = initNode(NewFunction);
+
+ Edge::Kind EK = getEdgeKind(OriginalFunction, NewFunction);
+
+ SCC *NewC = nullptr;
+ for (Edge &E : *NewN) {
+ Node &EN = E.getNode();
+ if (EK == Edge::Kind::Call && E.isCall() && lookupSCC(EN) == OriginalC) {
+ // If the edge to the new function is a call edge and there is a call edge
+ // from the new function to any function in the original function's SCC,
+ // it is in the same SCC (and RefSCC) as the original function.
+ NewC = OriginalC;
+ NewC->Nodes.push_back(&NewN);
+ break;
+ }
+ }
+
+ if (!NewC) {
+ for (Edge &E : *NewN) {
+ Node &EN = E.getNode();
+ if (lookupRefSCC(EN) == OriginalRC) {
+ // If there is any edge from the new function to any function in the
+ // original function's RefSCC, it is in the same RefSCC as the original
+ // function but a new SCC.
+ RefSCC *NewRC = OriginalRC;
+ NewC = createSCC(*NewRC, SmallVector<Node *, 1>({&NewN}));
+
+ // The new function's SCC is not the same as the original function's
+ // SCC, since that case was handled earlier. If the edge from the
+ // original function to the new function was a call edge, then we need
+ // to insert the newly created function's SCC before the original
+ // function's SCC. Otherwise either the new SCC comes after the original
+ // function's SCC, or it doesn't matter, and in both cases we can add it
+ // to the very end.
+ int InsertIndex = EK == Edge::Kind::Call ? NewRC->SCCIndices[OriginalC]
+ : NewRC->SCCIndices.size();
+ NewRC->SCCs.insert(NewRC->SCCs.begin() + InsertIndex, NewC);
+ for (int I = InsertIndex, Size = NewRC->SCCs.size(); I < Size; ++I)
+ NewRC->SCCIndices[NewRC->SCCs[I]] = I;
+
+ break;
+ }
+ }
+ }
+
+ if (!NewC) {
+ // We didn't find any edges back to the original function's RefSCC, so the
+ // new function belongs in a new RefSCC. The new RefSCC goes before the
+ // original function's RefSCC.
+ RefSCC *NewRC = createRefSCC(*this);
+ NewC = createSCC(*NewRC, SmallVector<Node *, 1>({&NewN}));
+ NewRC->SCCIndices[NewC] = 0;
+ NewRC->SCCs.push_back(NewC);
+ auto OriginalRCIndex = RefSCCIndices.find(OriginalRC)->second;
+ PostOrderRefSCCs.insert(PostOrderRefSCCs.begin() + OriginalRCIndex, NewRC);
+ for (int I = OriginalRCIndex, Size = PostOrderRefSCCs.size(); I < Size; ++I)
+ RefSCCIndices[PostOrderRefSCCs[I]] = I;
+ }
+
+ SCCMap[&NewN] = NewC;
+
+ OriginalN->insertEdgeInternal(NewN, EK);
+}
+
+void LazyCallGraph::addSplitRefRecursiveFunctions(
+ Function &OriginalFunction, ArrayRef<Function *> NewFunctions) {
+ assert(!NewFunctions.empty() && "Can't add zero functions");
+ assert(lookup(OriginalFunction) &&
+ "Original function's node should already exist");
+ Node &OriginalN = get(OriginalFunction);
+ RefSCC *OriginalRC = lookupRefSCC(OriginalN);
+
+#ifndef NDEBUG
+ OriginalRC->verify();
+ auto VerifyOnExit = make_scope_exit([&]() {
+ OriginalRC->verify();
+#ifdef EXPENSIVE_CHECKS
+ for (Function *NewFunction : NewFunctions)
+ lookupRefSCC(get(*NewFunction))->verify();
+#endif
+ });
+#endif
+
+ bool ExistsRefToOriginalRefSCC = false;
+
+ for (Function *NewFunction : NewFunctions) {
+ Node &NewN = initNode(*NewFunction);
+
+ OriginalN->insertEdgeInternal(NewN, Edge::Kind::Ref);
+
+ // Check if there is any edge from any new function back to any function in
+ // the original function's RefSCC.
+ for (Edge &E : *NewN) {
+ if (lookupRefSCC(E.getNode()) == OriginalRC) {
+ ExistsRefToOriginalRefSCC = true;
+ break;
+ }
+ }
+ }
+
+ RefSCC *NewRC;
+ if (ExistsRefToOriginalRefSCC) {
+ // If there is any edge from any new function to any function in the
+ // original function's RefSCC, all new functions will be in the same RefSCC
+ // as the original function.
+ NewRC = OriginalRC;
+ } else {
+ // Otherwise the new functions are in their own RefSCC.
+ NewRC = createRefSCC(*this);
+ // The new RefSCC goes before the original function's RefSCC in postorder
+ // since there are only edges from the original function's RefSCC to the new
+ // RefSCC.
+ auto OriginalRCIndex = RefSCCIndices.find(OriginalRC)->second;
+ PostOrderRefSCCs.insert(PostOrderRefSCCs.begin() + OriginalRCIndex, NewRC);
+ for (int I = OriginalRCIndex, Size = PostOrderRefSCCs.size(); I < Size; ++I)
+ RefSCCIndices[PostOrderRefSCCs[I]] = I;
+ }
+
+ for (Function *NewFunction : NewFunctions) {
+ Node &NewN = get(*NewFunction);
+ // Each new function is in its own new SCC. The original function can only
+ // have a ref edge to new functions, and no other existing functions can
+ // have references to new functions. Each new function only has a ref edge
+ // to the other new functions.
+ SCC *NewC = createSCC(*NewRC, SmallVector<Node *, 1>({&NewN}));
+ // The new SCCs are either sibling SCCs or parent SCCs to all other existing
+ // SCCs in the RefSCC. Either way, they can go at the back of the postorder
+ // SCC list.
+ auto Index = NewRC->SCCIndices.size();
+ NewRC->SCCIndices[NewC] = Index;
+ NewRC->SCCs.push_back(NewC);
+ SCCMap[&NewN] = NewC;
+ }
+
+#ifndef NDEBUG
+ for (Function *F1 : NewFunctions) {
+ assert(getEdgeKind(OriginalFunction, *F1) == Edge::Kind::Ref &&
+ "Expected ref edges from original function to every new function");
+ Node &N1 = get(*F1);
+ for (Function *F2 : NewFunctions) {
+ if (F1 == F2)
+ continue;
+ Node &N2 = get(*F2);
+ assert(!N1->lookup(N2)->isCall() &&
+ "Edges between new functions must be ref edges");
+ }
+ }
+#endif
}
LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
@@ -1594,21 +1809,14 @@ void LazyCallGraph::updateGraphPtrs() {
RC->G = this;
}
-LazyCallGraph::Node &LazyCallGraph::createNode(Function &F) {
- assert(!lookup(F) && "node already exists");
-
+LazyCallGraph::Node &LazyCallGraph::initNode(Function &F) {
Node &N = get(F);
- NodeMap[&F] = &N;
N.DFSNumber = N.LowLink = -1;
N.populate();
+ NodeMap[&F] = &N;
return N;
}
-void LazyCallGraph::addNodeToSCC(LazyCallGraph::SCC &C, Node &N) {
- C.Nodes.push_back(&N);
- SCCMap[&N] = &C;
-}
-
template <typename RootsT, typename GetBeginT, typename GetEndT,
typename GetNodeT, typename FormSCCCallbackT>
void LazyCallGraph::buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
@@ -1750,10 +1958,7 @@ void LazyCallGraph::buildRefSCCs() {
for (Edge &E : *this)
Roots.push_back(&E.getNode());
- // The roots will be popped of a stack, so use reverse to get a less
- // surprising order. This doesn't change any of the semantics anywhere.
- std::reverse(Roots.begin(), Roots.end());
-
+ // The roots will be iterated in order.
buildGenericSCCs(
Roots,
[](Node &N) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
index f5ffa7286b3b..ba2b6fe94c18 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -36,6 +36,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
using namespace llvm;
@@ -150,15 +151,21 @@ namespace {
} // end anonymous namespace
namespace {
+ using NonNullPointerSet = SmallDenseSet<AssertingVH<Value>, 2>;
+
/// This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
class LazyValueInfoCache {
/// This is all of the cached information for one basic block. It contains
/// the per-value lattice elements, as well as a separate set for
- /// overdefined values to reduce memory usage.
+ /// overdefined values to reduce memory usage. Additionally pointers
+ /// dereferenced in the block are cached for nullability queries.
struct BlockCacheEntry {
SmallDenseMap<AssertingVH<Value>, ValueLatticeElement, 4> LatticeElements;
SmallDenseSet<AssertingVH<Value>, 4> OverDefined;
+ // None indicates that the nonnull pointers for this basic block
+ // block have not been computed yet.
+ Optional<NonNullPointerSet> NonNullPointers;
};
/// Cached information per basic block.
@@ -220,6 +227,19 @@ namespace {
return LatticeIt->second;
}
+ bool isNonNullAtEndOfBlock(
+ Value *V, BasicBlock *BB,
+ function_ref<NonNullPointerSet(BasicBlock *)> InitFn) {
+ BlockCacheEntry *Entry = getOrCreateBlockEntry(BB);
+ if (!Entry->NonNullPointers) {
+ Entry->NonNullPointers = InitFn(BB);
+ for (Value *V : *Entry->NonNullPointers)
+ addValueHandle(V);
+ }
+
+ return Entry->NonNullPointers->count(V);
+ }
+
/// clear - Empty the cache.
void clear() {
BlockCache.clear();
@@ -244,6 +264,8 @@ void LazyValueInfoCache::eraseValue(Value *V) {
for (auto &Pair : BlockCache) {
Pair.second->LatticeElements.erase(V);
Pair.second->OverDefined.erase(V);
+ if (Pair.second->NonNullPointers)
+ Pair.second->NonNullPointers->erase(V);
}
auto HandleIt = ValueHandles.find_as(V);
@@ -311,7 +333,7 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc,
if (!changed) continue;
- worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+ llvm::append_range(worklist, successors(ToUpdate));
}
}
@@ -388,8 +410,8 @@ class LazyValueInfoImpl {
BasicBlock *BB);
Optional<ValueLatticeElement> solveBlockValueSelect(SelectInst *S,
BasicBlock *BB);
- Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I,
- BasicBlock *BB);
+ Optional<ConstantRange> getRangeFor(Value *V, Instruction *CxtI,
+ BasicBlock *BB);
Optional<ValueLatticeElement> solveBlockValueBinaryOpImpl(
Instruction *I, BasicBlock *BB,
std::function<ConstantRange(const ConstantRange &,
@@ -400,12 +422,11 @@ class LazyValueInfoImpl {
BasicBlock *BB);
Optional<ValueLatticeElement> solveBlockValueOverflowIntrinsic(
WithOverflowInst *WO, BasicBlock *BB);
- Optional<ValueLatticeElement> solveBlockValueSaturatingIntrinsic(
- SaturatingInst *SI, BasicBlock *BB);
Optional<ValueLatticeElement> solveBlockValueIntrinsic(IntrinsicInst *II,
BasicBlock *BB);
Optional<ValueLatticeElement> solveBlockValueExtractValue(
ExtractValueInst *EVI, BasicBlock *BB);
+ bool isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
ValueLatticeElement &BBLV,
Instruction *BBI);
@@ -413,14 +434,16 @@ class LazyValueInfoImpl {
void solve();
public:
- /// This is the query interface to determine the lattice
- /// value for the specified Value* at the end of the specified block.
+ /// This is the query interface to determine the lattice value for the
+ /// specified Value* at the context instruction (if specified) or at the
+ /// start of the block.
ValueLatticeElement getValueInBlock(Value *V, BasicBlock *BB,
Instruction *CxtI = nullptr);
- /// This is the query interface to determine the lattice
- /// value for the specified Value* at the specified instruction (generally
- /// from an assume intrinsic).
+ /// This is the query interface to determine the lattice value for the
+ /// specified Value* at the specified instruction using only information
+ /// from assumes/guards and range metadata. Unlike getValueInBlock(), no
+ /// recursive query is performed.
ValueLatticeElement getValueAt(Value *V, Instruction *CxtI);
/// This is the query interface to determine the lattice
@@ -605,53 +628,43 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueImpl(
return getFromRangeMetadata(BBI);
}
-static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
+static void AddNonNullPointer(Value *Ptr, NonNullPointerSet &PtrSet) {
+ // TODO: Use NullPointerIsDefined instead.
+ if (Ptr->getType()->getPointerAddressSpace() == 0)
+ PtrSet.insert(getUnderlyingObject(Ptr));
+}
+
+static void AddNonNullPointersByInstruction(
+ Instruction *I, NonNullPointerSet &PtrSet) {
if (LoadInst *L = dyn_cast<LoadInst>(I)) {
- return L->getPointerAddressSpace() == 0 &&
- GetUnderlyingObject(L->getPointerOperand(),
- L->getModule()->getDataLayout()) == Ptr;
- }
- if (StoreInst *S = dyn_cast<StoreInst>(I)) {
- return S->getPointerAddressSpace() == 0 &&
- GetUnderlyingObject(S->getPointerOperand(),
- S->getModule()->getDataLayout()) == Ptr;
- }
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
- if (MI->isVolatile()) return false;
+ AddNonNullPointer(L->getPointerOperand(), PtrSet);
+ } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+ AddNonNullPointer(S->getPointerOperand(), PtrSet);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (MI->isVolatile()) return;
// FIXME: check whether it has a valuerange that excludes zero?
ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
- if (!Len || Len->isZero()) return false;
+ if (!Len || Len->isZero()) return;
- if (MI->getDestAddressSpace() == 0)
- if (GetUnderlyingObject(MI->getRawDest(),
- MI->getModule()->getDataLayout()) == Ptr)
- return true;
+ AddNonNullPointer(MI->getRawDest(), PtrSet);
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
- if (MTI->getSourceAddressSpace() == 0)
- if (GetUnderlyingObject(MTI->getRawSource(),
- MTI->getModule()->getDataLayout()) == Ptr)
- return true;
+ AddNonNullPointer(MTI->getRawSource(), PtrSet);
}
- return false;
}
-/// Return true if the allocation associated with Val is ever dereferenced
-/// within the given basic block. This establishes the fact Val is not null,
-/// but does not imply that the memory at Val is dereferenceable. (Val may
-/// point off the end of the dereferenceable part of the object.)
-static bool isObjectDereferencedInBlock(Value *Val, BasicBlock *BB) {
- assert(Val->getType()->isPointerTy());
+bool LazyValueInfoImpl::isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB) {
+ if (NullPointerIsDefined(BB->getParent(),
+ Val->getType()->getPointerAddressSpace()))
+ return false;
- const DataLayout &DL = BB->getModule()->getDataLayout();
- Value *UnderlyingVal = GetUnderlyingObject(Val, DL);
- // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
- // inside InstructionDereferencesPointer either.
- if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, DL, 1))
+ Val = getUnderlyingObject(Val);
+ return TheCache.isNonNullAtEndOfBlock(Val, BB, [](BasicBlock *BB) {
+ NonNullPointerSet NonNullPointers;
for (Instruction &I : *BB)
- if (InstructionDereferencesPointer(&I, UnderlyingVal))
- return true;
- return false;
+ AddNonNullPointersByInstruction(&I, NonNullPointers);
+ return NonNullPointers;
+ });
}
Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueNonLocal(
@@ -662,16 +675,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueNonLocal(
// value is overdefined.
if (BB == &BB->getParent()->getEntryBlock()) {
assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
- // Before giving up, see if we can prove the pointer non-null local to
- // this particular block.
- PointerType *PTy = dyn_cast<PointerType>(Val->getType());
- if (PTy &&
- (isKnownNonZero(Val, DL) ||
- (isObjectDereferencedInBlock(Val, BB) &&
- !NullPointerIsDefined(BB->getParent(), PTy->getAddressSpace()))))
- return ValueLatticeElement::getNot(ConstantPointerNull::get(PTy));
- else
- return ValueLatticeElement::getOverdefined();
+ return ValueLatticeElement::getOverdefined();
}
// Loop over all of our predecessors, merging what we know from them into
@@ -696,14 +700,6 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueNonLocal(
if (Result.isOverdefined()) {
LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because of pred (non local).\n");
- // Before giving up, see if we can prove the pointer non-null local to
- // this particular block.
- PointerType *PTy = dyn_cast<PointerType>(Val->getType());
- if (PTy && isObjectDereferencedInBlock(Val, BB) &&
- !NullPointerIsDefined(BB->getParent(), PTy->getAddressSpace())) {
- Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy));
- }
-
return Result;
}
}
@@ -776,16 +772,23 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
}
// If guards are not used in the module, don't spend time looking for them
- if (!GuardDecl || GuardDecl->use_empty())
- return;
+ if (GuardDecl && !GuardDecl->use_empty() &&
+ BBI->getIterator() != BB->begin()) {
+ for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()),
+ BB->rend())) {
+ Value *Cond = nullptr;
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond))))
+ BBLV = intersect(BBLV, getValueFromCondition(Val, Cond));
+ }
+ }
- if (BBI->getIterator() == BB->begin())
- return;
- for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()),
- BB->rend())) {
- Value *Cond = nullptr;
- if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond))))
- BBLV = intersect(BBLV, getValueFromCondition(Val, Cond));
+ if (BBLV.isOverdefined()) {
+ // Check whether we're checking at the terminator, and the pointer has
+ // been dereferenced in this block.
+ PointerType *PTy = dyn_cast<PointerType>(Val->getType());
+ if (PTy && BB->getTerminator() == BBI &&
+ isNonNullAtEndOfBlock(Val, BB))
+ BBLV = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy));
}
}
@@ -919,20 +922,19 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueSelect(
return Result;
}
-Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op,
- Instruction *I,
- BasicBlock *BB) {
- Optional<ValueLatticeElement> OptVal = getBlockValue(I->getOperand(Op), BB);
+Optional<ConstantRange> LazyValueInfoImpl::getRangeFor(Value *V,
+ Instruction *CxtI,
+ BasicBlock *BB) {
+ Optional<ValueLatticeElement> OptVal = getBlockValue(V, BB);
if (!OptVal)
return None;
ValueLatticeElement &Val = *OptVal;
- intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I);
+ intersectAssumeOrGuardBlockValueConstantRange(V, Val, CxtI);
if (Val.isConstantRange())
return Val.getConstantRange();
- const unsigned OperandBitWidth =
- DL.getTypeSizeInBits(I->getOperand(Op)->getType());
+ const unsigned OperandBitWidth = DL.getTypeSizeInBits(V->getType());
return ConstantRange::getFull(OperandBitWidth);
}
@@ -962,7 +964,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueCast(
// Figure out the range of the LHS. If that fails, we still apply the
// transfer rule on the full set since we may be able to locally infer
// interesting facts.
- Optional<ConstantRange> LHSRes = getRangeForOperand(0, CI, BB);
+ Optional<ConstantRange> LHSRes = getRangeFor(CI->getOperand(0), CI, BB);
if (!LHSRes.hasValue())
// More work to do before applying this transfer rule.
return None;
@@ -985,8 +987,8 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
// conservative range, but apply the transfer rule anyways. This
// lets us pick up facts from expressions like "and i32 (call i32
// @foo()), 32"
- Optional<ConstantRange> LHSRes = getRangeForOperand(0, I, BB);
- Optional<ConstantRange> RHSRes = getRangeForOperand(1, I, BB);
+ Optional<ConstantRange> LHSRes = getRangeFor(I->getOperand(0), I, BB);
+ Optional<ConstantRange> RHSRes = getRangeFor(I->getOperand(1), I, BB);
if (!LHSRes.hasValue() || !RHSRes.hasValue())
// More work to do before applying this transfer rule.
return None;
@@ -1036,43 +1038,24 @@ LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(WithOverflowInst *WO,
});
}
-Optional<ValueLatticeElement>
-LazyValueInfoImpl::solveBlockValueSaturatingIntrinsic(SaturatingInst *SI,
- BasicBlock *BB) {
- switch (SI->getIntrinsicID()) {
- case Intrinsic::uadd_sat:
- return solveBlockValueBinaryOpImpl(
- SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
- return CR1.uadd_sat(CR2);
- });
- case Intrinsic::usub_sat:
- return solveBlockValueBinaryOpImpl(
- SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
- return CR1.usub_sat(CR2);
- });
- case Intrinsic::sadd_sat:
- return solveBlockValueBinaryOpImpl(
- SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
- return CR1.sadd_sat(CR2);
- });
- case Intrinsic::ssub_sat:
- return solveBlockValueBinaryOpImpl(
- SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
- return CR1.ssub_sat(CR2);
- });
- default:
- llvm_unreachable("All llvm.sat intrinsic are handled.");
- }
-}
-
Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueIntrinsic(
IntrinsicInst *II, BasicBlock *BB) {
- if (auto *SI = dyn_cast<SaturatingInst>(II))
- return solveBlockValueSaturatingIntrinsic(SI, BB);
+ if (!ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined (unknown intrinsic).\n");
+ return ValueLatticeElement::getOverdefined();
+ }
- LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined (unknown intrinsic).\n");
- return ValueLatticeElement::getOverdefined();
+ SmallVector<ConstantRange, 2> OpRanges;
+ for (Value *Op : II->args()) {
+ Optional<ConstantRange> Range = getRangeFor(Op, II, BB);
+ if (!Range)
+ return None;
+ OpRanges.push_back(*Range);
+ }
+
+ return ValueLatticeElement::getRange(
+ ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges));
}
Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueExtractValue(
@@ -1116,6 +1099,26 @@ static bool matchICmpOperand(const APInt *&Offset, Value *LHS, Value *Val,
return false;
}
+/// Get value range for a "(Val + Offset) Pred RHS" condition.
+static ValueLatticeElement getValueFromSimpleICmpCondition(
+ CmpInst::Predicate Pred, Value *RHS, const APInt *Offset) {
+ ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(),
+ /*isFullSet=*/true);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS))
+ RHSRange = ConstantRange(CI->getValue());
+ else if (Instruction *I = dyn_cast<Instruction>(RHS))
+ if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+ RHSRange = getConstantRangeFromMetadata(*Ranges);
+
+ ConstantRange TrueValues =
+ ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
+
+ if (Offset)
+ TrueValues = TrueValues.subtract(*Offset);
+
+ return ValueLatticeElement::getRange(std::move(TrueValues));
+}
+
static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
bool isTrueDest) {
Value *LHS = ICI->getOperand(0);
@@ -1138,30 +1141,27 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
return ValueLatticeElement::getOverdefined();
const APInt *Offset = nullptr;
- if (!matchICmpOperand(Offset, LHS, Val, EdgePred)) {
- std::swap(LHS, RHS);
- EdgePred = CmpInst::getSwappedPredicate(EdgePred);
- if (!matchICmpOperand(Offset, LHS, Val, EdgePred))
- return ValueLatticeElement::getOverdefined();
+ if (matchICmpOperand(Offset, LHS, Val, EdgePred))
+ return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset);
+
+ CmpInst::Predicate SwappedPred = CmpInst::getSwappedPredicate(EdgePred);
+ if (matchICmpOperand(Offset, RHS, Val, SwappedPred))
+ return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset);
+
+ // If (Val & Mask) == C then all the masked bits are known and we can compute
+ // a value range based on that.
+ const APInt *Mask, *C;
+ if (EdgePred == ICmpInst::ICMP_EQ &&
+ match(LHS, m_And(m_Specific(Val), m_APInt(Mask))) &&
+ match(RHS, m_APInt(C))) {
+ KnownBits Known;
+ Known.Zero = ~*C & *Mask;
+ Known.One = *C & *Mask;
+ return ValueLatticeElement::getRange(
+ ConstantRange::fromKnownBits(Known, /*IsSigned*/ false));
}
- // Calculate the range of values that are allowed by the comparison.
- ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(),
- /*isFullSet=*/true);
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS))
- RHSRange = ConstantRange(CI->getValue());
- else if (Instruction *I = dyn_cast<Instruction>(RHS))
- if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
- RHSRange = getConstantRangeFromMetadata(*Ranges);
-
- // If we're interested in the false dest, invert the condition
- ConstantRange TrueValues =
- ConstantRange::makeAllowedICmpRegion(EdgePred, RHSRange);
-
- if (Offset) // Apply the offset from above.
- TrueValues = TrueValues.subtract(*Offset);
-
- return ValueLatticeElement::getRange(std::move(TrueValues));
+ return ValueLatticeElement::getOverdefined();
}
// Handle conditions of the form
@@ -1201,26 +1201,36 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 1)
return getValueFromOverflowCondition(Val, WO, isTrueDest);
- // Handle conditions in the form of (cond1 && cond2), we know that on the
- // true dest path both of the conditions hold. Similarly for conditions of
- // the form (cond1 || cond2), we know that on the false dest path neither
- // condition holds.
- BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond);
- if (!BO || (isTrueDest && BO->getOpcode() != BinaryOperator::And) ||
- (!isTrueDest && BO->getOpcode() != BinaryOperator::Or))
+ Value *L, *R;
+ bool IsAnd;
+ if (match(Cond, m_LogicalAnd(m_Value(L), m_Value(R))))
+ IsAnd = true;
+ else if (match(Cond, m_LogicalOr(m_Value(L), m_Value(R))))
+ IsAnd = false;
+ else
return ValueLatticeElement::getOverdefined();
// Prevent infinite recursion if Cond references itself as in this example:
// Cond: "%tmp4 = and i1 %tmp4, undef"
// BL: "%tmp4 = and i1 %tmp4, undef"
// BR: "i1 undef"
- Value *BL = BO->getOperand(0);
- Value *BR = BO->getOperand(1);
- if (BL == Cond || BR == Cond)
+ if (L == Cond || R == Cond)
return ValueLatticeElement::getOverdefined();
- return intersect(getValueFromCondition(Val, BL, isTrueDest, Visited),
- getValueFromCondition(Val, BR, isTrueDest, Visited));
+ // if (L && R) -> intersect L and R
+ // if (!(L || R)) -> intersect L and R
+ // if (L || R) -> union L and R
+ // if (!(L && R)) -> union L and R
+ if (isTrueDest ^ IsAnd) {
+ ValueLatticeElement V = getValueFromCondition(Val, L, isTrueDest, Visited);
+ if (V.isOverdefined())
+ return V;
+ V.mergeIn(getValueFromCondition(Val, R, isTrueDest, Visited));
+ return V;
+ }
+
+ return intersect(getValueFromCondition(Val, L, isTrueDest, Visited),
+ getValueFromCondition(Val, R, isTrueDest, Visited));
}
static ValueLatticeElement
@@ -1244,15 +1254,15 @@ ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond,
// Return true if Usr has Op as an operand, otherwise false.
static bool usesOperand(User *Usr, Value *Op) {
- return find(Usr->operands(), Op) != Usr->op_end();
+ return is_contained(Usr->operands(), Op);
}
// Return true if the instruction type of Val is supported by
-// constantFoldUser(). Currently CastInst and BinaryOperator only. Call this
-// before calling constantFoldUser() to find out if it's even worth attempting
-// to call it.
+// constantFoldUser(). Currently CastInst, BinaryOperator and FreezeInst only.
+// Call this before calling constantFoldUser() to find out if it's even worth
+// attempting to call it.
static bool isOperationFoldable(User *Usr) {
- return isa<CastInst>(Usr) || isa<BinaryOperator>(Usr);
+ return isa<CastInst>(Usr) || isa<BinaryOperator>(Usr) || isa<FreezeInst>(Usr);
}
// Check if Usr can be simplified to an integer constant when the value of one
@@ -1283,6 +1293,9 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op,
SimplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) {
return ValueLatticeElement::getRange(ConstantRange(C->getValue()));
}
+ } else if (isa<FreezeInst>(Usr)) {
+ assert(cast<FreezeInst>(Usr)->getOperand(0) == Op && "Operand 0 isn't Op");
+ return ValueLatticeElement::getRange(ConstantRange(OpConstVal));
}
return ValueLatticeElement::getOverdefined();
}
@@ -1585,12 +1598,12 @@ static bool isKnownNonConstant(Value *V) {
return false;
}
-Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
- Instruction *CxtI) {
+Constant *LazyValueInfo::getConstant(Value *V, Instruction *CxtI) {
// Bail out early if V is known not to be a Constant.
if (isKnownNonConstant(V))
return nullptr;
+ BasicBlock *BB = CxtI->getParent();
ValueLatticeElement Result =
getImpl(PImpl, AC, BB->getModule()).getValueInBlock(V, BB, CxtI);
@@ -1604,11 +1617,11 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
return nullptr;
}
-ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB,
- Instruction *CxtI,
+ConstantRange LazyValueInfo::getConstantRange(Value *V, Instruction *CxtI,
bool UndefAllowed) {
assert(V->getType()->isIntegerTy());
unsigned Width = V->getType()->getIntegerBitWidth();
+ BasicBlock *BB = CxtI->getParent();
ValueLatticeElement Result =
getImpl(PImpl, AC, BB->getModule()).getValueInBlock(V, BB, CxtI);
if (Result.isUnknown())
@@ -1741,7 +1754,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
LazyValueInfo::Tristate
LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
- Instruction *CxtI) {
+ Instruction *CxtI, bool UseBlockValue) {
// Is or is not NonNull are common predicates being queried. If
// isKnownNonZero can tell us the result of the predicate, we can
// return it quickly. But this is only a fastpath, and falling
@@ -1755,7 +1768,10 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
else if (Pred == ICmpInst::ICMP_NE)
return LazyValueInfo::True;
}
- ValueLatticeElement Result = getImpl(PImpl, AC, M).getValueAt(V, CxtI);
+
+ ValueLatticeElement Result = UseBlockValue
+ ? getImpl(PImpl, AC, M).getValueInBlock(V, CxtI->getParent(), CxtI)
+ : getImpl(PImpl, AC, M).getValueAt(V, CxtI);
Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI);
if (Ret != Unknown)
return Ret;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 10ead1019206..30eec5a611f7 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -299,9 +299,9 @@ FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
}
void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<PostDominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
index 564c00dbad98..e188c23cf32b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
@@ -63,6 +63,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
@@ -80,134 +81,102 @@
using namespace llvm;
namespace {
- namespace MemRef {
- static const unsigned Read = 1;
- static const unsigned Write = 2;
- static const unsigned Callee = 4;
- static const unsigned Branchee = 8;
- } // end namespace MemRef
-
- class Lint : public FunctionPass, public InstVisitor<Lint> {
- friend class InstVisitor<Lint>;
-
- void visitFunction(Function &F);
-
- void visitCallBase(CallBase &CB);
- void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size,
- MaybeAlign Alignment, Type *Ty, unsigned Flags);
- void visitEHBeginCatch(IntrinsicInst *II);
- void visitEHEndCatch(IntrinsicInst *II);
-
- void visitReturnInst(ReturnInst &I);
- void visitLoadInst(LoadInst &I);
- void visitStoreInst(StoreInst &I);
- void visitXor(BinaryOperator &I);
- void visitSub(BinaryOperator &I);
- void visitLShr(BinaryOperator &I);
- void visitAShr(BinaryOperator &I);
- void visitShl(BinaryOperator &I);
- void visitSDiv(BinaryOperator &I);
- void visitUDiv(BinaryOperator &I);
- void visitSRem(BinaryOperator &I);
- void visitURem(BinaryOperator &I);
- void visitAllocaInst(AllocaInst &I);
- void visitVAArgInst(VAArgInst &I);
- void visitIndirectBrInst(IndirectBrInst &I);
- void visitExtractElementInst(ExtractElementInst &I);
- void visitInsertElementInst(InsertElementInst &I);
- void visitUnreachableInst(UnreachableInst &I);
-
- Value *findValue(Value *V, bool OffsetOk) const;
- Value *findValueImpl(Value *V, bool OffsetOk,
- SmallPtrSetImpl<Value *> &Visited) const;
-
- public:
- Module *Mod;
- const DataLayout *DL;
- AliasAnalysis *AA;
- AssumptionCache *AC;
- DominatorTree *DT;
- TargetLibraryInfo *TLI;
-
- std::string Messages;
- raw_string_ostream MessagesStr;
-
- static char ID; // Pass identification, replacement for typeid
- Lint() : FunctionPass(ID), MessagesStr(Messages) {
- initializeLintPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- }
- void print(raw_ostream &O, const Module *M) const override {}
-
- void WriteValues(ArrayRef<const Value *> Vs) {
- for (const Value *V : Vs) {
- if (!V)
- continue;
- if (isa<Instruction>(V)) {
- MessagesStr << *V << '\n';
- } else {
- V->printAsOperand(MessagesStr, true, Mod);
- MessagesStr << '\n';
- }
+namespace MemRef {
+static const unsigned Read = 1;
+static const unsigned Write = 2;
+static const unsigned Callee = 4;
+static const unsigned Branchee = 8;
+} // end namespace MemRef
+
+class Lint : public InstVisitor<Lint> {
+ friend class InstVisitor<Lint>;
+
+ void visitFunction(Function &F);
+
+ void visitCallBase(CallBase &CB);
+ void visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
+ MaybeAlign Alignment, Type *Ty, unsigned Flags);
+ void visitEHBeginCatch(IntrinsicInst *II);
+ void visitEHEndCatch(IntrinsicInst *II);
+
+ void visitReturnInst(ReturnInst &I);
+ void visitLoadInst(LoadInst &I);
+ void visitStoreInst(StoreInst &I);
+ void visitXor(BinaryOperator &I);
+ void visitSub(BinaryOperator &I);
+ void visitLShr(BinaryOperator &I);
+ void visitAShr(BinaryOperator &I);
+ void visitShl(BinaryOperator &I);
+ void visitSDiv(BinaryOperator &I);
+ void visitUDiv(BinaryOperator &I);
+ void visitSRem(BinaryOperator &I);
+ void visitURem(BinaryOperator &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitVAArgInst(VAArgInst &I);
+ void visitIndirectBrInst(IndirectBrInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitUnreachableInst(UnreachableInst &I);
+
+ Value *findValue(Value *V, bool OffsetOk) const;
+ Value *findValueImpl(Value *V, bool OffsetOk,
+ SmallPtrSetImpl<Value *> &Visited) const;
+
+public:
+ Module *Mod;
+ const DataLayout *DL;
+ AliasAnalysis *AA;
+ AssumptionCache *AC;
+ DominatorTree *DT;
+ TargetLibraryInfo *TLI;
+
+ std::string Messages;
+ raw_string_ostream MessagesStr;
+
+ Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA,
+ AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI)
+ : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI),
+ MessagesStr(Messages) {}
+
+ void WriteValues(ArrayRef<const Value *> Vs) {
+ for (const Value *V : Vs) {
+ if (!V)
+ continue;
+ if (isa<Instruction>(V)) {
+ MessagesStr << *V << '\n';
+ } else {
+ V->printAsOperand(MessagesStr, true, Mod);
+ MessagesStr << '\n';
}
}
+ }
- /// A check failed, so printout out the condition and the message.
- ///
- /// This provides a nice place to put a breakpoint if you want to see why
- /// something is not correct.
- void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
-
- /// A check failed (with values to print).
- ///
- /// This calls the Message-only version so that the above is easier to set
- /// a breakpoint on.
- template <typename T1, typename... Ts>
- void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
- CheckFailed(Message);
- WriteValues({V1, Vs...});
- }
- };
+ /// A check failed, so printout out the condition and the message.
+ ///
+ /// This provides a nice place to put a breakpoint if you want to see why
+ /// something is not correct.
+ void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
+
+ /// A check failed (with values to print).
+ ///
+ /// This calls the Message-only version so that the above is easier to set
+ /// a breakpoint on.
+ template <typename T1, typename... Ts>
+ void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) {
+ CheckFailed(Message);
+ WriteValues({V1, Vs...});
+ }
+};
} // end anonymous namespace
-char Lint::ID = 0;
-INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
- false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
- false, true)
-
// Assert - We know that cond should be true, if not print an error message.
-#define Assert(C, ...) \
- do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false)
-
-// Lint::run - This is the main Analysis entry point for a
-// function.
-//
-bool Lint::runOnFunction(Function &F) {
- Mod = F.getParent();
- DL = &F.getParent()->getDataLayout();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- visit(F);
- dbgs() << MessagesStr.str();
- Messages.clear();
- return false;
-}
+#define Assert(C, ...) \
+ do { \
+ if (!(C)) { \
+ CheckFailed(__VA_ARGS__); \
+ return; \
+ } \
+ } while (false)
void Lint::visitFunction(Function &F) {
// This isn't undefined behavior, it's just a little unusual, and it's a
@@ -221,7 +190,7 @@ void Lint::visitFunction(Function &F) {
void Lint::visitCallBase(CallBase &I) {
Value *Callee = I.getCalledOperand();
- visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, None, nullptr,
+ visitMemoryReference(I, MemoryLocation::getAfter(Callee), None, nullptr,
MemRef::Callee);
if (Function *F = dyn_cast<Function>(findValue(Callee,
@@ -281,10 +250,10 @@ void Lint::visitCallBase(CallBase &I) {
// Check that an sret argument points to valid memory.
if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
- Type *Ty =
- cast<PointerType>(Formal->getType())->getElementType();
- visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
- DL->getABITypeAlign(Ty), Ty,
+ Type *Ty = Formal->getParamStructRetType();
+ MemoryLocation Loc(
+ Actual, LocationSize::precise(DL->getTypeStoreSize(Ty)));
+ visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty,
MemRef::Read | MemRef::Write);
}
}
@@ -309,25 +278,24 @@ void Lint::visitCallBase(CallBase &I) {
}
}
-
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
switch (II->getIntrinsicID()) {
- default: break;
+ default:
+ break;
- // TODO: Check more intrinsics
+ // TODO: Check more intrinsics
case Intrinsic::memcpy: {
MemCpyInst *MCI = cast<MemCpyInst>(&I);
- // TODO: If the size is known, use it.
- visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
+ visitMemoryReference(I, MemoryLocation::getForDest(MCI),
MCI->getDestAlign(), nullptr, MemRef::Write);
- visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
+ visitMemoryReference(I, MemoryLocation::getForSource(MCI),
MCI->getSourceAlign(), nullptr, MemRef::Read);
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
// isn't expressive enough for what we really want to do. Known partial
// overlap is not distinguished from the case where nothing is known.
- auto Size = LocationSize::unknown();
+ auto Size = LocationSize::afterPointer();
if (const ConstantInt *Len =
dyn_cast<ConstantInt>(findValue(MCI->getLength(),
/*OffsetOk=*/false)))
@@ -341,10 +309,10 @@ void Lint::visitCallBase(CallBase &I) {
case Intrinsic::memcpy_inline: {
MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I);
const uint64_t Size = MCII->getLength()->getValue().getLimitedValue();
- visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlign(),
- nullptr, MemRef::Write);
- visitMemoryReference(I, MCII->getSource(), Size, MCII->getSourceAlign(),
- nullptr, MemRef::Read);
+ visitMemoryReference(I, MemoryLocation::getForDest(MCII),
+ MCII->getDestAlign(), nullptr, MemRef::Write);
+ visitMemoryReference(I, MemoryLocation::getForSource(MCII),
+ MCII->getSourceAlign(), nullptr, MemRef::Read);
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
// isn't expressive enough for what we really want to do. Known partial
@@ -356,17 +324,15 @@ void Lint::visitCallBase(CallBase &I) {
}
case Intrinsic::memmove: {
MemMoveInst *MMI = cast<MemMoveInst>(&I);
- // TODO: If the size is known, use it.
- visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
+ visitMemoryReference(I, MemoryLocation::getForDest(MMI),
MMI->getDestAlign(), nullptr, MemRef::Write);
- visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
+ visitMemoryReference(I, MemoryLocation::getForSource(MMI),
MMI->getSourceAlign(), nullptr, MemRef::Read);
break;
}
case Intrinsic::memset: {
MemSetInst *MSI = cast<MemSetInst>(&I);
- // TODO: If the size is known, use it.
- visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
+ visitMemoryReference(I, MemoryLocation::getForDest(MSI),
MSI->getDestAlign(), nullptr, MemRef::Write);
break;
}
@@ -376,26 +342,31 @@ void Lint::visitCallBase(CallBase &I) {
"Undefined behavior: va_start called in a non-varargs function",
&I);
- visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize,
- None, nullptr, MemRef::Read | MemRef::Write);
+ visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None,
+ nullptr, MemRef::Read | MemRef::Write);
break;
case Intrinsic::vacopy:
- visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize,
- None, nullptr, MemRef::Write);
- visitMemoryReference(I, I.getArgOperand(1), MemoryLocation::UnknownSize,
- None, nullptr, MemRef::Read);
+ visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None,
+ nullptr, MemRef::Write);
+ visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI), None,
+ nullptr, MemRef::Read);
break;
case Intrinsic::vaend:
- visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize,
- None, nullptr, MemRef::Read | MemRef::Write);
+ visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None,
+ nullptr, MemRef::Read | MemRef::Write);
break;
case Intrinsic::stackrestore:
// Stackrestore doesn't read or write memory, but it sets the
// stack pointer, which the compiler may read from or write to
// at any time, so check it for both readability and writeability.
- visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize,
- None, nullptr, MemRef::Read | MemRef::Write);
+ visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None,
+ nullptr, MemRef::Read | MemRef::Write);
+ break;
+ case Intrinsic::get_active_lane_mask:
+ if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1)))
+ Assert(!TripCount->isZero(), "get_active_lane_mask: operand #2 "
+ "must be greater than 0", &I);
break;
}
}
@@ -413,13 +384,14 @@ void Lint::visitReturnInst(ReturnInst &I) {
// TODO: Check that the reference is in bounds.
// TODO: Check readnone/readonly function attributes.
-void Lint::visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size,
+void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
MaybeAlign Align, Type *Ty, unsigned Flags) {
// If no memory is being referenced, it doesn't matter if the pointer
// is valid.
- if (Size == 0)
+ if (Loc.Size.isZero())
return;
+ Value *Ptr = const_cast<Value *>(Loc.Ptr);
Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
Assert(!isa<ConstantPointerNull>(UnderlyingObject),
"Undefined behavior: Null pointer dereference", &I);
@@ -487,9 +459,8 @@ void Lint::visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size,
// Accesses from before the start or after the end of the object are not
// defined.
- Assert(Size == MemoryLocation::UnknownSize ||
- BaseSize == MemoryLocation::UnknownSize ||
- (Offset >= 0 && Offset + Size <= BaseSize),
+ Assert(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize ||
+ (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize),
"Undefined behavior: Buffer overflow", &I);
// Accesses that say that the memory is more aligned than it is are not
@@ -503,15 +474,13 @@ void Lint::visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size,
}
void Lint::visitLoadInst(LoadInst &I) {
- visitMemoryReference(I, I.getPointerOperand(),
- DL->getTypeStoreSize(I.getType()), I.getAlign(),
- I.getType(), MemRef::Read);
+ visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(),
+ MemRef::Read);
}
void Lint::visitStoreInst(StoreInst &I) {
- visitMemoryReference(I, I.getPointerOperand(),
- DL->getTypeStoreSize(I.getOperand(0)->getType()),
- I.getAlign(), I.getOperand(0)->getType(), MemRef::Write);
+ visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(),
+ I.getOperand(0)->getType(), MemRef::Write);
}
void Lint::visitXor(BinaryOperator &I) {
@@ -553,7 +522,8 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
VectorType *VecTy = dyn_cast<VectorType>(V->getType());
if (!VecTy) {
- KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
+ KnownBits Known =
+ computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
return Known.isZero();
}
@@ -567,7 +537,8 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
// For a vector, KnownZero will only be true if all values are zero, so check
// this per component
- for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
+ for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements();
+ I != N; ++I) {
Constant *Elem = C->getAggregateElement(I);
if (isa<UndefValue>(Elem))
return true;
@@ -610,12 +581,12 @@ void Lint::visitAllocaInst(AllocaInst &I) {
}
void Lint::visitVAArgInst(VAArgInst &I) {
- visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, None,
- nullptr, MemRef::Read | MemRef::Write);
+ visitMemoryReference(I, MemoryLocation::get(&I), None, nullptr,
+ MemRef::Read | MemRef::Write);
}
void Lint::visitIndirectBrInst(IndirectBrInst &I) {
- visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, None,
+ visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), None,
nullptr, MemRef::Branchee);
Assert(I.getNumDestinations() != 0,
@@ -625,14 +596,17 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) {
void Lint::visitExtractElementInst(ExtractElementInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
/*OffsetOk=*/false)))
- Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
- "Undefined result: extractelement index out of range", &I);
+ Assert(
+ CI->getValue().ult(
+ cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()),
+ "Undefined result: extractelement index out of range", &I);
}
void Lint::visitInsertElementInst(InsertElementInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
/*OffsetOk=*/false)))
- Assert(CI->getValue().ult(I.getType()->getNumElements()),
+ Assert(CI->getValue().ult(
+ cast<FixedVectorType>(I.getType())->getNumElements()),
"Undefined result: insertelement index out of range", &I);
}
@@ -669,7 +643,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
// TODO: Look through eliminable cast pairs.
// TODO: Look through calls with unique return values.
// TODO: Look through vector insert/extract/shuffle.
- V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();
+ V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts();
if (LoadInst *L = dyn_cast<LoadInst>(V)) {
BasicBlock::iterator BBI = L->getIterator();
BasicBlock *BB = L->getParent();
@@ -678,11 +652,13 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
if (!VisitedBlocks.insert(BB).second)
break;
if (Value *U =
- FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA))
+ FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA))
return findValueImpl(U, OffsetOk, Visited);
- if (BBI != BB->begin()) break;
+ if (BBI != BB->begin())
+ break;
BB = BB->getUniquePredecessor();
- if (!BB) break;
+ if (!BB)
+ break;
BBI = BB->end();
}
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
@@ -692,8 +668,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
if (CI->isNoopCast(*DL))
return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
} else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
- if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
- Ex->getIndices()))
+ if (Value *W =
+ FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices()))
if (W != V)
return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
@@ -724,22 +700,75 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
return V;
}
+PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto *Mod = F.getParent();
+ auto *DL = &F.getParent()->getDataLayout();
+ auto *AA = &AM.getResult<AAManager>(F);
+ auto *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ Lint L(Mod, DL, AA, AC, DT, TLI);
+ L.visit(F);
+ dbgs() << L.MessagesStr.str();
+ return PreservedAnalyses::all();
+}
+
+class LintLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ LintLegacyPass() : FunctionPass(ID) {
+ initializeLintLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ }
+ void print(raw_ostream &O, const Module *M) const override {}
+};
+
+char LintLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR",
+ false, true)
+
+bool LintLegacyPass::runOnFunction(Function &F) {
+ auto *Mod = F.getParent();
+ auto *DL = &F.getParent()->getDataLayout();
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ Lint L(Mod, DL, AA, AC, DT, TLI);
+ L.visit(F);
+ dbgs() << L.MessagesStr.str();
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Implement the public interfaces to this file...
//===----------------------------------------------------------------------===//
-FunctionPass *llvm::createLintPass() {
- return new Lint();
-}
+FunctionPass *llvm::createLintLegacyPassPass() { return new LintLegacyPass(); }
/// lintFunction - Check a function for errors, printing messages on stderr.
///
void llvm::lintFunction(const Function &f) {
- Function &F = const_cast<Function&>(f);
+ Function &F = const_cast<Function &>(f);
assert(!F.isDeclaration() && "Cannot lint external functions");
legacy::FunctionPassManager FPM(F.getParent());
- Lint *V = new Lint();
+ auto *V = new LintLegacyPass();
FPM.add(V);
FPM.run(F);
}
@@ -748,7 +777,7 @@ void llvm::lintFunction(const Function &f) {
///
void llvm::lintModule(const Module &M) {
legacy::PassManager PM;
- Lint *V = new Lint();
+ auto *V = new LintLegacyPass();
PM.add(V);
- PM.run(const_cast<Module&>(M));
+ PM.run(const_cast<Module &>(M));
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
index e5245225d905..8f373f70f216 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
@@ -12,7 +12,9 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -107,11 +109,50 @@ static bool isDereferenceableAndAlignedPointer(
return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment,
Size, DL, CtxI, DT, Visited, MaxDepth);
- if (const auto *Call = dyn_cast<CallBase>(V))
+ if (const auto *Call = dyn_cast<CallBase>(V)) {
if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI,
DT, Visited, MaxDepth);
+ // If we have a call we can't recurse through, check to see if this is an
+ // allocation function for which we can establish an minimum object size.
+ // Such a minimum object size is analogous to a deref_or_null attribute in
+ // that we still need to prove the result non-null at point of use.
+ // NOTE: We can only use the object size as a base fact as we a) need to
+ // prove alignment too, and b) don't want the compile time impact of a
+ // separate recursive walk.
+ ObjectSizeOpts Opts;
+ // TODO: It may be okay to round to align, but that would imply that
+ // accessing slightly out of bounds was legal, and we're currently
+ // inconsistent about that. For the moment, be conservative.
+ Opts.RoundToAlign = false;
+ Opts.NullIsUnknownSize = true;
+ uint64_t ObjSize;
+ // TODO: Plumb through TLI so that malloc routines and such working.
+ if (getObjectSize(V, ObjSize, DL, nullptr, Opts)) {
+ APInt KnownDerefBytes(Size.getBitWidth(), ObjSize);
+ if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) &&
+ isKnownNonZero(V, DL, 0, nullptr, CtxI, DT) &&
+ // TODO: We're currently inconsistent about whether deref(N) is a
+ // global fact or a point in time fact. Once D61652 eventually
+ // lands, this check will be restricted to the point in time
+ // variant. For that variant, we need to prove that object hasn't
+ // been conditionally freed before ontext instruction - if it has, we
+ // might be hoisting over the inverse conditional and creating a
+ // dynamic use after free.
+ !PointerMayBeCapturedBefore(V, true, true, CtxI, DT, true)) {
+ // As we recursed through GEPs to get here, we've incrementally
+ // checked that each step advanced by a multiple of the alignment. If
+ // our base is properly aligned, then the original offset accessed
+ // must also be.
+ Type *Ty = V->getType();
+ assert(Ty->isSized() && "must be sized");
+ APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
+ return isAligned(V, Offset, Alignment, DL);
+ }
+ }
+ }
+
// If we don't know, assume the worst.
return false;
}
@@ -199,7 +240,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
Value *Ptr = LI->getPointerOperand();
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
- DL.getTypeStoreSize(LI->getType()));
+ DL.getTypeStoreSize(LI->getType()).getFixedSize());
const Align Alignment = LI->getAlign();
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
@@ -222,9 +263,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
if (Step->getAPInt() != EltSize)
return false;
- // TODO: If the symbolic trip count has a small bound (max count), we might
- // be able to prove safety.
- auto TC = SE.getSmallConstantTripCount(L);
+ auto TC = SE.getSmallConstantMaxTripCount(L);
if (!TC)
return false;
@@ -503,3 +542,23 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
// block.
return nullptr;
}
+
+bool llvm::canReplacePointersIfEqual(Value *A, Value *B, const DataLayout &DL,
+ Instruction *CtxI) {
+ Type *Ty = A->getType();
+ assert(Ty == B->getType() && Ty->isPointerTy() &&
+ "values must have matching pointer types");
+
+ // NOTE: The checks in the function are incomplete and currently miss illegal
+ // cases! The current implementation is a starting point and the
+ // implementation should be made stricter over time.
+ if (auto *C = dyn_cast<Constant>(B)) {
+ // Do not allow replacing a pointer with a constant pointer, unless it is
+ // either null or at least one byte is dereferenceable.
+ APInt OneByte(DL.getPointerTypeSizeInBits(Ty), 1);
+ return C->isNullValue() ||
+ isDereferenceableAndAlignedPointer(B, Align(1), OneByte, DL, CtxI);
+ }
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index f409cd322146..e632fe25c24c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -149,27 +149,23 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
// symbolic stride replaced by one.
ValueToValueMap::const_iterator SI =
PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
- if (SI != PtrToStride.end()) {
- Value *StrideVal = SI->second;
+ if (SI == PtrToStride.end())
+ // For a non-symbolic stride, just return the original expression.
+ return OrigSCEV;
- // Strip casts.
- StrideVal = stripIntegerCast(StrideVal);
+ Value *StrideVal = stripIntegerCast(SI->second);
- ScalarEvolution *SE = PSE.getSE();
- const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
- const auto *CT =
- static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
-
- PSE.addPredicate(*SE->getEqualPredicate(U, CT));
- auto *Expr = PSE.getSCEV(Ptr);
+ ScalarEvolution *SE = PSE.getSE();
+ const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
+ const auto *CT =
+ static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
- LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
- << " by: " << *Expr << "\n");
- return Expr;
- }
+ PSE.addPredicate(*SE->getEqualPredicate(U, CT));
+ auto *Expr = PSE.getSCEV(Ptr);
- // Otherwise, just return the SCEV of the original pointer.
- return OrigSCEV;
+ LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
+ << " by: " << *Expr << "\n");
+ return Expr;
}
RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
@@ -227,9 +223,10 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
}
// Add the size of the pointed element to ScEnd.
- unsigned EltSize =
- Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8;
- const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
+ auto &DL = Lp->getHeader()->getModule()->getDataLayout();
+ Type *IdxTy = DL.getIndexType(Ptr->getType());
+ const SCEV *EltSizeSCEV =
+ SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
}
@@ -508,16 +505,16 @@ public:
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
- AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AAResults *AA,
- LoopInfo *LI, MemoryDepChecker::DepCandidates &DA,
+ AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
+ MemoryDepChecker::DepCandidates &DA,
PredicatedScalarEvolution &PSE)
- : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
+ : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
/// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
+ AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, false));
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
@@ -526,7 +523,7 @@ public:
/// Register a store.
void addStore(MemoryLocation &Loc) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
+ AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, true));
}
@@ -585,8 +582,6 @@ private:
/// Set of all accesses.
PtrAccessSet Accesses;
- const DataLayout &DL;
-
/// The loop being checked.
const Loop *TheLoop;
@@ -732,7 +727,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
// First, count how many write and read accesses are in the alias set. Also
// collect MemAccessInfos for later.
SmallVector<MemAccessInfo, 4> AccessInfos;
- for (auto A : AS) {
+ for (const auto &A : AS) {
Value *Ptr = A.getValue();
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
@@ -866,7 +861,7 @@ void AccessAnalysis::processMemAccesses() {
// compatibility and potential for underlying-object overlap. As a result, we
// only need to check for potential pointer dependencies within each alias
// set.
- for (auto &AS : AST) {
+ for (const auto &AS : AST) {
// Note that both the alias-set tracker and the alias sets themselves used
// linked lists internally and so the iteration order here is deterministic
// (matching the original instruction order within each set).
@@ -886,12 +881,12 @@ void AccessAnalysis::processMemAccesses() {
bool UseDeferred = SetIteration > 0;
PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
- for (auto AV : AS) {
+ for (const auto &AV : AS) {
Value *Ptr = AV.getValue();
// For a single memory access in AliasSetTracker, Accesses may contain
// both read and write, and they both need to be handled for CheckDeps.
- for (auto AC : S) {
+ for (const auto &AC : S) {
if (AC.getPointer() != Ptr)
continue;
@@ -938,7 +933,7 @@ void AccessAnalysis::processMemAccesses() {
typedef SmallVector<const Value *, 16> ValueVector;
ValueVector TempObjects;
- GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
+ getUnderlyingObjects(Ptr, TempObjects, LI);
LLVM_DEBUG(dbgs()
<< "Underlying objects for pointer " << *Ptr << "\n");
for (const Value *UnderlyingObj : TempObjects) {
@@ -992,7 +987,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
// Make sure there is only one non-const index and analyze that.
Value *NonConstIndex = nullptr;
- for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
+ for (Value *Index : GEP->indices())
if (!isa<ConstantInt>(Index)) {
if (NonConstIndex)
return false;
@@ -1142,7 +1137,7 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
// first pointer in the array.
Value *Ptr0 = VL[0];
const SCEV *Scev0 = SE.getSCEV(Ptr0);
- Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
+ Value *Obj0 = getUnderlyingObject(Ptr0);
llvm::SmallSet<int64_t, 4> Offsets;
for (auto *Ptr : VL) {
@@ -1153,7 +1148,7 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
return false;
// If a pointer refers to a different underlying object, bail - the
// pointers are by definition incomparable.
- Value *CurrObj = GetUnderlyingObject(Ptr, DL);
+ Value *CurrObj = getUnderlyingObject(Ptr);
if (CurrObj != Obj0)
return false;
@@ -1343,7 +1338,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
// If the number of vector iteration between the store and the load are
// small we could incur conflicts.
if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
- MaxVFWithoutSLForwardIssues = (VF >>= 1);
+ MaxVFWithoutSLForwardIssues = (VF >> 1);
break;
}
}
@@ -1659,7 +1654,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
<< " with max VF = " << MaxVF << '\n');
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
- MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
+ MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
return Dependence::BackwardVectorizable;
}
@@ -1771,7 +1766,7 @@ bool LoopAccessInfo::canAnalyzeLoop() {
<< TheLoop->getHeader()->getName() << '\n');
// We can only analyze innermost loops.
- if (!TheLoop->empty()) {
+ if (!TheLoop->isInnermost()) {
LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
return false;
@@ -1786,29 +1781,9 @@ bool LoopAccessInfo::canAnalyzeLoop() {
return false;
}
- // We must have a single exiting block.
- if (!TheLoop->getExitingBlock()) {
- LLVM_DEBUG(
- dbgs() << "LAA: loop control flow is not understood by analyzer\n");
- recordAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by analyzer";
- return false;
- }
-
- // We only handle bottom-tested loops, i.e. loop in which the condition is
- // checked at the end of each iteration. With that we can assume that all
- // instructions in the loop are executed the same number of times.
- if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
- LLVM_DEBUG(
- dbgs() << "LAA: loop control flow is not understood by analyzer\n");
- recordAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by analyzer";
- return false;
- }
-
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = PSE->getBackedgeTakenCount();
- if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
+ if (isa<SCEVCouldNotCompute>(ExitCount)) {
recordAnalysis("CantComputeNumberOfIterations")
<< "could not determine number of loop iterations";
LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
@@ -1947,10 +1922,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
}
MemoryDepChecker::DepCandidates DependentAccesses;
- AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
- TheLoop, AA, LI, DependentAccesses, *PSE);
+ AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
- // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
+ // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
// for read and once for write, it will only appear once (on the write
// list). This is okay, since we are going to check for conflicts between
@@ -2152,12 +2126,8 @@ bool LoopAccessInfo::isUniform(Value *V) const {
}
void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
- Value *Ptr = nullptr;
- if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
- Ptr = LI->getPointerOperand();
- else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
- Ptr = SI->getPointerOperand();
- else
+ Value *Ptr = getLoadStorePointerOperand(MemAccess);
+ if (!Ptr)
return;
Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp
index 21017c04da99..4ad5641da147 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 6ba247a87c22..cf68596bfbc3 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -29,7 +29,11 @@
#include "llvm/ADT/BreadthFirstIterator.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -145,7 +149,7 @@ IndexedReference::IndexedReference(Instruction &StoreOrLoadInst,
Optional<bool> IndexedReference::hasSpacialReuse(const IndexedReference &Other,
unsigned CLS,
- AliasAnalysis &AA) const {
+ AAResults &AA) const {
assert(IsValid && "Expecting a valid reference");
if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
@@ -202,7 +206,7 @@ Optional<bool> IndexedReference::hasTemporalReuse(const IndexedReference &Other,
unsigned MaxDistance,
const Loop &L,
DependenceInfo &DI,
- AliasAnalysis &AA) const {
+ AAResults &AA) const {
assert(IsValid && "Expecting a valid reference");
if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
@@ -457,7 +461,7 @@ bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
}
bool IndexedReference::isAliased(const IndexedReference &Other,
- AliasAnalysis &AA) const {
+ AAResults &AA) const {
const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
return AA.isMustAlias(Loc1, Loc2);
@@ -476,7 +480,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) {
CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
ScalarEvolution &SE, TargetTransformInfo &TTI,
- AliasAnalysis &AA, DependenceInfo &DI,
+ AAResults &AA, DependenceInfo &DI,
Optional<unsigned> TRT)
: Loops(Loops), TripCounts(), LoopCosts(),
TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
@@ -495,14 +499,13 @@ CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
std::unique_ptr<CacheCost>
CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
DependenceInfo &DI, Optional<unsigned> TRT) {
- if (Root.getParentLoop()) {
+ if (!Root.isOutermost()) {
LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
return nullptr;
}
LoopVectorTy Loops;
- for (Loop *L : breadth_first(&Root))
- Loops.push_back(L);
+ append_range(Loops, breadth_first(&Root));
if (!getInnerMostLoop(Loops)) {
LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
index b5af210f1b92..a85869b16333 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
@@ -34,6 +34,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -431,6 +432,10 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
for (const Use &U : I.uses()) {
const Instruction *UI = cast<Instruction>(U.getUser());
const BasicBlock *UserBB = UI->getParent();
+
+ // For practical purposes, we consider that the use in a PHI
+ // occurs in the respective predecessor block. For more info,
+ // see the `phi` doc in LangRef and the LCSSA doc.
if (const PHINode *P = dyn_cast<PHINode>(UI))
UserBB = P->getIncomingBlock(U);
@@ -535,6 +540,22 @@ void Loop::setLoopAlreadyUnrolled() {
setLoopID(NewLoopID);
}
+void Loop::setLoopMustProgress() {
+ LLVMContext &Context = getHeader()->getContext();
+
+ MDNode *MustProgress = findOptionMDForLoop(this, "llvm.loop.mustprogress");
+
+ if (MustProgress)
+ return;
+
+ MDNode *MustProgressMD =
+ MDNode::get(Context, MDString::get(Context, "llvm.loop.mustprogress"));
+ MDNode *LoopID = getLoopID();
+ MDNode *NewLoopID =
+ makePostTransformationMetadata(Context, LoopID, {}, {MustProgressMD});
+ setLoopID(NewLoopID);
+}
+
bool Loop::isAnnotatedParallel() const {
MDNode *DesiredLoopIdMetadata = getLoopID();
@@ -546,7 +567,7 @@ bool Loop::isAnnotatedParallel() const {
SmallPtrSet<MDNode *, 4>
ParallelAccessGroups; // For scalable 'contains' check.
if (ParallelAccesses) {
- for (const MDOperand &MD : drop_begin(ParallelAccesses->operands(), 1)) {
+ for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) {
MDNode *AccGroup = cast<MDNode>(MD.get());
assert(isValidAsAccessGroup(AccGroup) &&
"List item must be an access group");
@@ -764,7 +785,7 @@ void UnloopUpdater::removeBlocksFromAncestors() {
/// Update the parent loop for all subloops directly nested within unloop.
void UnloopUpdater::updateSubloopParents() {
- while (!Unloop.empty()) {
+ while (!Unloop.isInnermost()) {
Loop *Subloop = *std::prev(Unloop.end());
Unloop.removeChildLoop(std::prev(Unloop.end()));
@@ -862,7 +883,7 @@ void LoopInfo::erase(Loop *Unloop) {
auto InvalidateOnExit = make_scope_exit([&]() { destroy(Unloop); });
// First handle the special case of no parent loop to simplify the algorithm.
- if (!Unloop->getParentLoop()) {
+ if (Unloop->isOutermost()) {
// Since BBLoop had no parent, Unloop blocks are no longer in a loop.
for (Loop::block_iterator I = Unloop->block_begin(),
E = Unloop->block_end();
@@ -887,7 +908,7 @@ void LoopInfo::erase(Loop *Unloop) {
}
// Move all of the subloops to the top-level.
- while (!Unloop->empty())
+ while (!Unloop->isInnermost())
addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end())));
return;
@@ -1017,8 +1038,7 @@ MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context,
SmallVector<Metadata *, 4> MDs;
// Reserve first location for self reference to the LoopID metadata node.
- TempMDTuple TempNode = MDNode::getTemporary(Context, None);
- MDs.push_back(TempNode.get());
+ MDs.push_back(nullptr);
// Remove metadata for the transformation that has been applied or that became
// outdated.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopNestAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopNestAnalysis.cpp
index 61e53de93151..7133abcc3504 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopNestAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopNestAnalysis.cpp
@@ -42,8 +42,7 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop,
LoopNest::LoopNest(Loop &Root, ScalarEvolution &SE)
: MaxPerfectDepth(getMaxPerfectDepth(Root, SE)) {
- for (Loop *L : breadth_first(&Root))
- Loops.push_back(L);
+ append_range(Loops, breadth_first(&Root));
}
std::unique_ptr<LoopNest> LoopNest::getLoopNest(Loop &Root,
@@ -53,8 +52,8 @@ std::unique_ptr<LoopNest> LoopNest::getLoopNest(Loop &Root,
bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
ScalarEvolution &SE) {
- assert(!OuterLoop.getSubLoops().empty() && "Outer loop should have subloops");
- assert(InnerLoop.getParentLoop() && "Inner loop should have a parent");
+ assert(!OuterLoop.isInnermost() && "Outer loop should have subloops");
+ assert(!InnerLoop.isOutermost() && "Inner loop should have a parent");
LLVM_DEBUG(dbgs() << "Checking whether loop '" << OuterLoop.getName()
<< "' and '" << InnerLoop.getName()
<< "' are perfectly nested.\n");
@@ -206,6 +205,31 @@ unsigned LoopNest::getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE) {
return CurrentDepth;
}
+const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From,
+ const BasicBlock *End) {
+ assert(From && "Expecting valid From");
+ assert(End && "Expecting valid End");
+
+ if (From == End || !From->getSingleSuccessor())
+ return *From;
+
+ auto IsEmpty = [](const BasicBlock *BB) {
+ return (BB->getInstList().size() == 1);
+ };
+
+ // Visited is used to avoid running into an infinite loop.
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ const BasicBlock *BB = From->getSingleSuccessor();
+ const BasicBlock *PredBB = BB;
+ while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB)) {
+ Visited.insert(BB);
+ PredBB = BB;
+ BB = BB->getSingleSuccessor();
+ }
+
+ return (BB == End) ? *End : *PredBB;
+}
+
static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop,
ScalarEvolution &SE) {
// The inner loop must be the only outer loop's child.
@@ -228,34 +252,92 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop,
InnerLoop.getExitingBlock() != InnerLoopLatch || !InnerLoopExit)
return false;
+ // Returns whether the block `ExitBlock` contains at least one LCSSA Phi node.
+ auto ContainsLCSSAPhi = [](const BasicBlock &ExitBlock) {
+ return any_of(ExitBlock.phis(), [](const PHINode &PN) {
+ return PN.getNumIncomingValues() == 1;
+ });
+ };
+
+ // Returns whether the block `BB` qualifies for being an extra Phi block. The
+ // extra Phi block is the additional block inserted after the exit block of an
+ // "guarded" inner loop which contains "only" Phi nodes corresponding to the
+ // LCSSA Phi nodes in the exit block.
+ auto IsExtraPhiBlock = [&](const BasicBlock &BB) {
+ return BB.getFirstNonPHI() == BB.getTerminator() &&
+ all_of(BB.phis(), [&](const PHINode &PN) {
+ return all_of(PN.blocks(), [&](const BasicBlock *IncomingBlock) {
+ return IncomingBlock == InnerLoopExit ||
+ IncomingBlock == OuterLoopHeader;
+ });
+ });
+ };
+
+ const BasicBlock *ExtraPhiBlock = nullptr;
// Ensure the only branch that may exist between the loops is the inner loop
// guard.
if (OuterLoopHeader != InnerLoopPreHeader) {
- const BranchInst *BI =
- dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
-
- if (!BI || BI != InnerLoop.getLoopGuardBranch())
- return false;
-
- // The successors of the inner loop guard should be the inner loop
- // preheader and the outer loop latch.
- for (const BasicBlock *Succ : BI->successors()) {
- if (Succ == InnerLoopPreHeader)
- continue;
- if (Succ == OuterLoopLatch)
- continue;
-
- DEBUG_WITH_TYPE(VerboseDebug, {
- dbgs() << "Inner loop guard successor " << Succ->getName()
- << " doesn't lead to inner loop preheader or "
- "outer loop latch.\n";
- });
- return false;
+ const BasicBlock &SingleSucc =
+ LoopNest::skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader);
+
+ // no conditional branch present
+ if (&SingleSucc != InnerLoopPreHeader) {
+ const BranchInst *BI = dyn_cast<BranchInst>(SingleSucc.getTerminator());
+
+ if (!BI || BI != InnerLoop.getLoopGuardBranch())
+ return false;
+
+ bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit);
+
+ // The successors of the inner loop guard should be the inner loop
+ // preheader or the outer loop latch possibly through empty blocks.
+ for (const BasicBlock *Succ : BI->successors()) {
+ const BasicBlock *PotentialInnerPreHeader = Succ;
+ const BasicBlock *PotentialOuterLatch = Succ;
+
+ // Ensure the inner loop guard successor is empty before skipping
+ // blocks.
+ if (Succ->getInstList().size() == 1) {
+ PotentialInnerPreHeader =
+ &LoopNest::skipEmptyBlockUntil(Succ, InnerLoopPreHeader);
+ PotentialOuterLatch =
+ &LoopNest::skipEmptyBlockUntil(Succ, OuterLoopLatch);
+ }
+
+ if (PotentialInnerPreHeader == InnerLoopPreHeader)
+ continue;
+ if (PotentialOuterLatch == OuterLoopLatch)
+ continue;
+
+ // If `InnerLoopExit` contains LCSSA Phi instructions, additional block
+ // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The
+ // loops are still considered perfectly nested if the extra block only
+ // contains Phi instructions from InnerLoopExit and OuterLoopHeader.
+ if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) &&
+ Succ->getSingleSuccessor() == OuterLoopLatch) {
+ // Points to the extra block so that we can reference it later in the
+ // final check. We can also conclude that the inner loop is
+ // guarded and there exists LCSSA Phi node in the exit block later if
+ // we see a non-null `ExtraPhiBlock`.
+ ExtraPhiBlock = Succ;
+ continue;
+ }
+
+ DEBUG_WITH_TYPE(VerboseDebug, {
+ dbgs() << "Inner loop guard successor " << Succ->getName()
+ << " doesn't lead to inner loop preheader or "
+ "outer loop latch.\n";
+ });
+ return false;
+ }
}
}
- // Ensure the inner loop exit block leads to the outer loop latch.
- if (InnerLoopExit->getSingleSuccessor() != OuterLoopLatch) {
+ // Ensure the inner loop exit block lead to the outer loop latch possibly
+ // through empty blocks.
+ const BasicBlock &SuccInner =
+ LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(), OuterLoopLatch);
+ if (&SuccInner != OuterLoopLatch && &SuccInner != ExtraPhiBlock) {
DEBUG_WITH_TYPE(
VerboseDebug,
dbgs() << "Inner loop exit block " << *InnerLoopExit
@@ -266,6 +348,8 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop,
return true;
}
+AnalysisKey LoopNestAnalysis::Key;
+
raw_ostream &llvm::operator<<(raw_ostream &OS, const LoopNest &LN) {
OS << "IsPerfect=";
if (LN.getMaxPerfectDepth() == LN.getNestDepth())
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
index 520f06003dd2..9e470e998e67 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
@@ -15,11 +15,12 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/IR/StructuralHash.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TimeProfiler.h"
@@ -76,7 +77,7 @@ LPPassManager::LPPassManager()
// Insert loop into loop nest (LoopInfo) and loop queue (LQ).
void LPPassManager::addLoop(Loop &L) {
- if (!L.getParentLoop()) {
+ if (L.isOutermost()) {
// This is the top level loop.
LQ.push_front(&L);
return;
@@ -116,7 +117,7 @@ void LPPassManager::markLoopAsDeleted(Loop &L) {
// there. However, we have to be careful to not remove the back of the queue
// as that is assumed to match the current loop.
assert(LQ.back() == CurrentLoop && "Loop queue back isn't the current loop!");
- LQ.erase(std::remove(LQ.begin(), LQ.end(), &L), LQ.end());
+ llvm::erase_value(LQ, &L);
if (&L == CurrentLoop) {
CurrentLoopDeleted = true;
@@ -191,7 +192,19 @@ bool LPPassManager::runOnFunction(Function &F) {
{
PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
TimeRegion PassTimer(getPassTimer(P));
+#ifdef EXPENSIVE_CHECKS
+ uint64_t RefHash = StructuralHash(F);
+#endif
LocalChanged = P->runOnLoop(CurrentLoop, *this);
+
+#ifdef EXPENSIVE_CHECKS
+ if (!LocalChanged && (RefHash != StructuralHash(F))) {
+ llvm::errs() << "Pass modifies its input and doesn't report it: "
+ << P->getPassName() << "\n";
+ llvm_unreachable("Pass modifies its input and doesn't report it");
+ }
+#endif
+
Changed |= LocalChanged;
if (EmitICRemark) {
unsigned NewSize = F.getInstructionCount();
@@ -241,7 +254,8 @@ bool LPPassManager::runOnFunction(Function &F) {
F.getContext().yield();
}
- removeNotPreservedAnalysis(P);
+ if (LocalChanged)
+ removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
removeDeadPasses(P,
CurrentLoopDeleted ? "<deleted>"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 45873f260f23..89f4ff427dff 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -11,14 +11,17 @@
// 'release' mode) or a runtime-loaded model (the 'development' case).
//
//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
+#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
+
#include <limits>
#include <unordered_map>
#include <unordered_set>
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/InlineFeaturesAnalysis.h"
#include "llvm/Analysis/MLInlineAdvisor.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -63,8 +66,8 @@ CallBase *getInlinableCS(Instruction &I) {
MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
std::unique_ptr<MLModelRunner> Runner)
: InlineAdvisor(
- MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
- M(M), ModelRunner(std::move(Runner)), CG(new CallGraph(M)),
+ M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
+ ModelRunner(std::move(Runner)), CG(new CallGraph(M)),
InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
assert(ModelRunner);
@@ -115,7 +118,8 @@ void MLInlineAdvisor::onPassEntry() {
}
int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
- return FAM.getResult<InlineFeaturesAnalysis>(F).DirectCallsToDefinedFunctions;
+ return FAM.getResult<FunctionPropertiesAnalysis>(F)
+ .DirectCallsToDefinedFunctions;
}
// Update the internal state of the advisor, and force invalidate feature
@@ -130,7 +134,7 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
Function *Callee = Advice.getCallee();
// The caller features aren't valid anymore.
- FAM.invalidate<InlineFeaturesAnalysis>(*Caller);
+ FAM.invalidate<FunctionPropertiesAnalysis>(*Caller);
int64_t IRSizeAfter =
getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
@@ -143,14 +147,15 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
// For edges, we 'forget' the edges that the caller and callee used to have
// before inlining, and add back what they currently have together.
int64_t NewCallerAndCalleeEdges =
- FAM.getResult<InlineFeaturesAnalysis>(*Caller)
+ FAM.getResult<FunctionPropertiesAnalysis>(*Caller)
.DirectCallsToDefinedFunctions;
if (CalleeWasDeleted)
--NodeCount;
else
- NewCallerAndCalleeEdges += FAM.getResult<InlineFeaturesAnalysis>(*Callee)
- .DirectCallsToDefinedFunctions;
+ NewCallerAndCalleeEdges +=
+ FAM.getResult<FunctionPropertiesAnalysis>(*Callee)
+ .DirectCallsToDefinedFunctions;
EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
}
@@ -163,32 +168,27 @@ int64_t MLInlineAdvisor::getModuleIRSize() const {
return Ret;
}
-std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdvice(CallBase &CB) {
+std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
auto &Caller = *CB.getCaller();
auto &Callee = *CB.getCalledFunction();
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
- auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
- return FAM.getResult<TargetLibraryAnalysis>(F);
- };
-
auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
- auto TrivialDecision =
- llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI);
-
+ auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
// If this is a "never inline" case, there won't be any changes to internal
// state we need to track, so we can just return the base InlineAdvice, which
// will do nothing interesting.
// Same thing if this is a recursive case.
- if ((TrivialDecision.hasValue() && !TrivialDecision->isSuccess()) ||
+ if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
&Caller == &Callee)
- return std::make_unique<InlineAdvice>(this, CB, ORE, false);
+ return getMandatoryAdvice(CB, false);
- bool Mandatory = TrivialDecision.hasValue() && TrivialDecision->isSuccess();
+ bool Mandatory =
+ MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
// If we need to stop, we won't want to track anymore any state changes, so
// we just return the base InlineAdvice, which acts as a noop.
@@ -214,15 +214,15 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdvice(CallBase &CB) {
}
if (Mandatory)
- return getMandatoryAdvice(CB, ORE);
+ return getMandatoryAdvice(CB, true);
auto NrCtantParams = 0;
for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
NrCtantParams += (isa<Constant>(*I));
}
- auto &CallerBefore = FAM.getResult<InlineFeaturesAnalysis>(Caller);
- auto &CalleeBefore = FAM.getResult<InlineFeaturesAnalysis>(Callee);
+ auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);
+ auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);
ModelRunner->setFeature(FeatureIndex::CalleeBasicBlockCount,
CalleeBefore.BasicBlockCount);
@@ -249,10 +249,22 @@ MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
return std::make_unique<MLInlineAdvice>(this, CB, ORE, ModelRunner->run());
}
+std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
+ bool Advice) {
+ // Make sure we track inlinings in all cases - mandatory or not.
+ if (Advice && !ForceStop)
+ return getMandatoryAdviceImpl(CB);
+
+ // If this is a "never inline" case, there won't be any changes to internal
+ // state we need to track, so we can just return the base InlineAdvice, which
+ // will do nothing interesting.
+ // Same if we are forced to stop - we don't track anymore.
+ return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
+}
+
std::unique_ptr<MLInlineAdvice>
-MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
- OptimizationRemarkEmitter &ORE) {
- return std::make_unique<MLInlineAdvice>(this, CB, ORE, true);
+MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
+ return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
}
void MLInlineAdvice::reportContextForRemark(
@@ -298,4 +310,5 @@ void MLInlineAdvice::recordUnattemptedInliningImpl() {
reportContextForRemark(R);
return R;
});
-} \ No newline at end of file
+}
+#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
index 9524ec96bb61..00642347102a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"
@@ -71,9 +72,6 @@ namespace {
assert(dep.isUnknown() && "unexpected dependence type");
return InstTypePair(dep.getInst(), Unknown);
}
- static InstTypePair getInstTypePair(const Instruction* inst, DepType type) {
- return InstTypePair(inst, type);
- }
};
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
index 564410b8af08..0078ceacbad2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/MemDerefPrinter.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/DataLayout.h"
@@ -17,6 +18,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
namespace {
@@ -76,3 +78,35 @@ void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {
OS << "\n\n";
}
}
+
+PreservedAnalyses MemDerefPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ OS << "Memory Dereferencibility of pointers in function '" << F.getName()
+ << "'\n";
+
+ SmallVector<Value *, 4> Deref;
+ SmallPtrSet<Value *, 4> DerefAndAligned;
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (auto &I : instructions(F)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ Value *PO = LI->getPointerOperand();
+ if (isDereferenceablePointer(PO, LI->getType(), DL))
+ Deref.push_back(PO);
+ if (isDereferenceableAndAlignedPointer(
+ PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL))
+ DerefAndAligned.insert(PO);
+ }
+ }
+
+ OS << "The following are dereferenceable:\n";
+ for (Value *V : Deref) {
+ V->print(OS);
+ if (DerefAndAligned.count(V))
+ OS << "\t(aligned)";
+ else
+ OS << "\t(unaligned)";
+ OS << "\n\n";
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
index 0b61b1c0eabd..5dda96a2ca94 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -72,6 +72,7 @@ struct AllocFnsTy {
// know which functions are nounwind, noalias, nocapture parameters, etc.
static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
{LibFunc_malloc, {MallocLike, 1, 0, -1}},
+ {LibFunc_vec_malloc, {MallocLike, 1, 0, -1}},
{LibFunc_valloc, {MallocLike, 1, 0, -1}},
{LibFunc_Znwj, {OpNewLike, 1, 0, -1}}, // new(unsigned int)
{LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow)
@@ -103,7 +104,9 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
{LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned long long, nothrow)
{LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1}},
{LibFunc_calloc, {CallocLike, 2, 0, 1}},
+ {LibFunc_vec_calloc, {CallocLike, 2, 0, 1}},
{LibFunc_realloc, {ReallocLike, 2, 1, -1}},
+ {LibFunc_vec_realloc, {ReallocLike, 2, 1, -1}},
{LibFunc_reallocf, {ReallocLike, 2, 1, -1}},
{LibFunc_strdup, {StrDupLike, 1, -1, -1}},
{LibFunc_strndup, {StrDupLike, 2, 1, -1}}
@@ -378,9 +381,8 @@ PointerType *llvm::getMallocType(const CallInst *CI,
unsigned NumOfBitCastUses = 0;
// Determine if CallInst has a bitcast use.
- for (Value::const_user_iterator UI = CI->user_begin(), E = CI->user_end();
- UI != E;)
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
+ for (const User *U : CI->users())
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
MallocType = cast<PointerType>(BCI->getDestTy());
NumOfBitCastUses++;
}
@@ -566,8 +568,16 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
Value *UseZero =
Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second);
ResultSize = Builder.CreateZExtOrTrunc(ResultSize, ResultType);
- return Builder.CreateSelect(UseZero, ConstantInt::get(ResultType, 0),
- ResultSize);
+ Value *Ret = Builder.CreateSelect(
+ UseZero, ConstantInt::get(ResultType, 0), ResultSize);
+
+ // The non-constant size expression cannot evaluate to -1.
+ if (!isa<Constant>(SizeOffsetPair.first) ||
+ !isa<Constant>(SizeOffsetPair.second))
+ Builder.CreateAssumption(
+ Builder.CreateICmpNE(Ret, ConstantInt::get(ResultType, -1)));
+
+ return Ret;
}
}
@@ -676,13 +686,14 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
}
SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
+ Type *MemoryTy = A.getPointeeInMemoryValueType();
// No interprocedural analysis is done at the moment.
- if (!A.hasPassPointeeByValueAttr()) {
+ if (!MemoryTy|| !MemoryTy->isSized()) {
++ObjectVisitorArgument;
return unknown();
}
- PointerType *PT = cast<PointerType>(A.getType());
- APInt Size(IntTyBits, DL.getTypeAllocSize(PT->getElementType()));
+
+ APInt Size(IntTyBits, DL.getTypeAllocSize(MemoryTy));
return std::make_pair(align(Size, A.getParamAlignment()), Zero);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 566eba5c54af..886b5bf4acd3 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -148,7 +148,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
// calls to free() deallocate the entire structure
- Loc = MemoryLocation(CI->getArgOperand(0));
+ Loc = MemoryLocation::getAfter(CI->getArgOperand(0));
return ModRefInfo::Mod;
}
@@ -166,6 +166,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
return ModRefInfo::Mod;
+ case Intrinsic::masked_load:
+ Loc = MemoryLocation::getForArgument(II, 0, TLI);
+ return ModRefInfo::Ref;
+ case Intrinsic::masked_store:
+ Loc = MemoryLocation::getForArgument(II, 1, TLI);
+ return ModRefInfo::Mod;
default:
break;
}
@@ -338,7 +344,9 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// If we hit load/store with the same invariant.group metadata (and the
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
- if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
+ if ((isa<LoadInst>(U) ||
+ (isa<StoreInst>(U) &&
+ cast<StoreInst>(U)->getPointerOperand() == Ptr)) &&
U->hasMetadata(LLVMContext::MD_invariant_group))
ClosestDependency = GetClosestDependency(ClosestDependency, U);
}
@@ -362,6 +370,8 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+ // We can batch AA queries, because IR does not change during a MemDep query.
+ BatchAAResults BatchAA(AA);
bool isInvariantLoad = false;
unsigned DefaultLimit = getDefaultBlockScanLimit();
@@ -406,8 +416,6 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
isInvariantLoad = true;
}
- const DataLayout &DL = BB->getModule()->getDataLayout();
-
// Return "true" if and only if the instruction I is either a non-simple
// load or a non-simple store.
auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
@@ -442,15 +450,32 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// If we reach a lifetime begin or end marker, then the query ends here
// because the value is undefined.
- if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ Intrinsic::ID ID = II->getIntrinsicID();
+ switch (ID) {
+ case Intrinsic::lifetime_start: {
// FIXME: This only considers queries directly on the invariant-tagged
// pointer, not on query pointers that are indexed off of them. It'd
// be nice to handle that at some point (the right approach is to use
// GetPointerBaseWithConstantOffset).
- if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
+ MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
+ if (BatchAA.isMustAlias(ArgLoc, MemLoc))
return MemDepResult::getDef(II);
continue;
}
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_store: {
+ MemoryLocation Loc;
+ /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
+ AliasResult R = BatchAA.alias(Loc, MemLoc);
+ if (R == NoAlias)
+ continue;
+ if (R == MustAlias)
+ return MemDepResult::getDef(II);
+ if (ID == Intrinsic::masked_load)
+ continue;
+ return MemDepResult::getClobber(II);
+ }
+ }
}
// Values depend on loads if the pointers are must aliased. This means
@@ -487,7 +512,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
MemoryLocation LoadLoc = MemoryLocation::get(LI);
// If we found a pointer, check if it could be the same as our pointer.
- AliasResult R = AA.alias(LoadLoc, MemLoc);
+ AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
if (isLoad) {
if (R == NoAlias)
@@ -518,7 +543,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
continue;
// Stores don't alias loads from read-only memory.
- if (AA.pointsToConstantMemory(LoadLoc))
+ if (BatchAA.pointsToConstantMemory(LoadLoc))
continue;
// Stores depend on may/must aliased loads.
@@ -549,7 +574,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// If alias analysis can tell that this store is guaranteed to not modify
// the query pointer, ignore it. Use getModRefInfo to handle cases where
// the query pointer points to constant memory etc.
- if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc)))
+ if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
continue;
// Ok, this store might clobber the query pointer. Check to see if it is
@@ -558,7 +583,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
MemoryLocation StoreLoc = MemoryLocation::get(SI);
// If we found a pointer, check if it could be the same as our pointer.
- AliasResult R = AA.alias(StoreLoc, MemLoc);
+ AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
if (R == NoAlias)
continue;
@@ -576,8 +601,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// looking for a clobber in many cases; that's an alias property and is
// handled by BasicAA.
if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
- const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
- if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
+ const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
+ if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
return MemDepResult::getDef(Inst);
}
@@ -594,9 +619,10 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
continue;
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
- ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
+ ModRefInfo MR = BatchAA.getModRefInfo(Inst, MemLoc);
// If necessary, perform additional analysis.
if (isModAndRefSet(MR))
+ // TODO: Support callCapturesBefore() on BatchAAResults.
MR = AA.callCapturesBefore(Inst, MemLoc, &DT);
switch (clearMust(MR)) {
case ModRefInfo::NoModRef:
@@ -728,8 +754,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
} else {
// Seed DirtyBlocks with each of the preds of QueryInst's block.
BasicBlock *QueryBB = QueryCall->getParent();
- for (BasicBlock *Pred : PredCache.get(QueryBB))
- DirtyBlocks.push_back(Pred);
+ append_range(DirtyBlocks, PredCache.get(QueryBB));
++NumUncacheNonLocal;
}
@@ -743,8 +768,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
// Iterate while we still have blocks to update.
while (!DirtyBlocks.empty()) {
- BasicBlock *DirtyBB = DirtyBlocks.back();
- DirtyBlocks.pop_back();
+ BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
// Already processed this block?
if (!Visited.insert(DirtyBB).second)
@@ -814,8 +838,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
// If the block *is* completely transparent to the load, we need to check
// the predecessors of this block. Add them to our worklist.
- for (BasicBlock *Pred : PredCache.get(DirtyBB))
- DirtyBlocks.push_back(Pred);
+ append_range(DirtyBlocks, PredCache.get(DirtyBB));
}
}
@@ -992,7 +1015,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache.begin(), Cache.end(), Val);
+ llvm::upper_bound(Cache, Val);
Cache.insert(Entry, Val);
}
break;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
index 4c31d6786ed8..ef9cda37ce35 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
@@ -20,8 +20,10 @@ using namespace llvm;
void LocationSize::print(raw_ostream &OS) const {
OS << "LocationSize::";
- if (*this == unknown())
- OS << "unknown";
+ if (*this == beforeOrAfterPointer())
+ OS << "beforeOrAfterPointer";
+ else if (*this == afterPointer())
+ OS << "afterPointer";
else if (*this == mapEmpty())
OS << "mapEmpty";
else if (*this == mapTombstone())
@@ -57,8 +59,8 @@ MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
AAMDNodes AATags;
VI->getAAMetadata(AATags);
- return MemoryLocation(VI->getPointerOperand(), LocationSize::unknown(),
- AATags);
+ return MemoryLocation(VI->getPointerOperand(),
+ LocationSize::afterPointer(), AATags);
}
MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
@@ -109,7 +111,7 @@ MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) {
}
MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
- auto Size = LocationSize::unknown();
+ auto Size = LocationSize::afterPointer();
if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
Size = LocationSize::precise(C->getValue().getZExtValue());
@@ -130,7 +132,7 @@ MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) {
}
MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
- auto Size = LocationSize::unknown();
+ auto Size = LocationSize::afterPointer();
if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()))
Size = LocationSize::precise(C->getValue().getZExtValue());
@@ -158,13 +160,14 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
break;
case Intrinsic::memset:
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memory intrinsic");
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
AATags);
- break;
+ return MemoryLocation::getAfter(Arg, AATags);
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
@@ -176,6 +179,21 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
cast<ConstantInt>(II->getArgOperand(0))->getZExtValue()),
AATags);
+ case Intrinsic::masked_load:
+ assert(ArgIdx == 0 && "Invalid argument index");
+ return MemoryLocation(
+ Arg,
+ LocationSize::upperBound(DL.getTypeStoreSize(II->getType())),
+ AATags);
+
+ case Intrinsic::masked_store:
+ assert(ArgIdx == 1 && "Invalid argument index");
+ return MemoryLocation(
+ Arg,
+ LocationSize::upperBound(
+ DL.getTypeStoreSize(II->getArgOperand(0)->getType())),
+ AATags);
+
case Intrinsic::invariant_end:
// The first argument to an invariant.end is a "descriptor" type (e.g. a
// pointer to a empty struct) which is never actually dereferenced.
@@ -210,20 +228,48 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
LibFunc F;
- if (TLI && Call->getCalledFunction() &&
- TLI->getLibFunc(*Call->getCalledFunction(), F) &&
- F == LibFunc_memset_pattern16 && TLI->has(F)) {
- assert((ArgIdx == 0 || ArgIdx == 1) &&
- "Invalid argument index for memset_pattern16");
- if (ArgIdx == 1)
- return MemoryLocation(Arg, LocationSize::precise(16), AATags);
- if (const ConstantInt *LenCI =
- dyn_cast<ConstantInt>(Call->getArgOperand(2)))
- return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
- AATags);
+ if (TLI && TLI->getLibFunc(*Call, F) && TLI->has(F)) {
+ switch (F) {
+ case LibFunc_memset_pattern16:
+ assert((ArgIdx == 0 || ArgIdx == 1) &&
+ "Invalid argument index for memset_pattern16");
+ if (ArgIdx == 1)
+ return MemoryLocation(Arg, LocationSize::precise(16), AATags);
+ if (const ConstantInt *LenCI =
+ dyn_cast<ConstantInt>(Call->getArgOperand(2)))
+ return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
+ AATags);
+ return MemoryLocation::getAfter(Arg, AATags);
+ case LibFunc_bcmp:
+ case LibFunc_memcmp:
+ assert((ArgIdx == 0 || ArgIdx == 1) &&
+ "Invalid argument index for memcmp/bcmp");
+ if (const ConstantInt *LenCI =
+ dyn_cast<ConstantInt>(Call->getArgOperand(2)))
+ return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
+ AATags);
+ return MemoryLocation::getAfter(Arg, AATags);
+ case LibFunc_memchr:
+ assert((ArgIdx == 0) && "Invalid argument index for memchr");
+ if (const ConstantInt *LenCI =
+ dyn_cast<ConstantInt>(Call->getArgOperand(2)))
+ return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
+ AATags);
+ return MemoryLocation::getAfter(Arg, AATags);
+ case LibFunc_memccpy:
+ assert((ArgIdx == 0 || ArgIdx == 1) &&
+ "Invalid argument index for memccpy");
+ // We only know an upper bound on the number of bytes read/written.
+ if (const ConstantInt *LenCI =
+ dyn_cast<ConstantInt>(Call->getArgOperand(3)))
+ return MemoryLocation(
+ Arg, LocationSize::upperBound(LenCI->getZExtValue()), AATags);
+ return MemoryLocation::getAfter(Arg, AATags);
+ default:
+ break;
+ };
}
// FIXME: Handle memset_pattern4 and memset_pattern8 also.
- return MemoryLocation(Call->getArgOperand(ArgIdx), LocationSize::unknown(),
- AATags);
+ return MemoryLocation::getBeforeOrAfter(Call->getArgOperand(ArgIdx), AATags);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
index f2f5fd70f471..4722b68e20e9 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFGPrinter.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Config/llvm-config.h"
@@ -59,6 +60,11 @@ using namespace llvm;
#define DEBUG_TYPE "memoryssa"
+static cl::opt<std::string>
+ DotCFGMSSA("dot-cfg-mssa",
+ cl::value_desc("file name for generated dot file"),
+ cl::desc("file name for generated dot file"), cl::init(""));
+
INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -264,7 +270,6 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
const Instruction *UseInst, AliasAnalysisType &AA) {
Instruction *DefInst = MD->getMemoryInst();
assert(DefInst && "Defining instruction not actually an instruction");
- const auto *UseCall = dyn_cast<CallBase>(UseInst);
Optional<AliasResult> AR;
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
@@ -276,15 +281,10 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
// clobbers where they don't really exist at all. Please see D43269 for
// context.
switch (II->getIntrinsicID()) {
- case Intrinsic::lifetime_start:
- if (UseCall)
- return {false, NoAlias};
- AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc);
- return {AR != NoAlias, AR};
- case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
return {false, NoAlias};
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare:
@@ -296,14 +296,14 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
}
}
- if (UseCall) {
- ModRefInfo I = AA.getModRefInfo(DefInst, UseCall);
+ if (auto *CB = dyn_cast_or_null<CallBase>(UseInst)) {
+ ModRefInfo I = AA.getModRefInfo(DefInst, CB);
AR = isMustSet(I) ? MustAlias : MayAlias;
return {isModOrRefSet(I), AR};
}
if (auto *DefLoad = dyn_cast<LoadInst>(DefInst))
- if (auto *UseLoad = dyn_cast<LoadInst>(UseInst))
+ if (auto *UseLoad = dyn_cast_or_null<LoadInst>(UseInst))
return {!areLoadsReorderable(UseLoad, DefLoad), MayAlias};
ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc);
@@ -357,28 +357,15 @@ struct UpwardsMemoryQuery {
} // end anonymous namespace
-static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
- BatchAAResults &AA) {
- Instruction *Inst = MD->getMemoryInst();
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::lifetime_end:
- return AA.alias(MemoryLocation(II->getArgOperand(1)), Loc) == MustAlias;
- default:
- return false;
- }
- }
- return false;
-}
-
template <typename AliasAnalysisType>
static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
const Instruction *I) {
// If the memory can't be changed, then loads of the memory can't be
// clobbered.
- return isa<LoadInst>(I) && (I->hasMetadata(LLVMContext::MD_invariant_load) ||
- AA.pointsToConstantMemory(MemoryLocation(
- cast<LoadInst>(I)->getPointerOperand())));
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ return I->hasMetadata(LLVMContext::MD_invariant_load) ||
+ AA.pointsToConstantMemory(MemoryLocation::get(LI));
+ return false;
}
/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
@@ -465,10 +452,15 @@ checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
}
assert(isa<MemoryPhi>(MA));
- Worklist.append(
- upward_defs_begin({const_cast<MemoryAccess *>(MA), MAP.second},
- MSSA.getDomTree()),
- upward_defs_end());
+
+ // Add reachable phi predecessors
+ for (auto ItB = upward_defs_begin(
+ {const_cast<MemoryAccess *>(MA), MAP.second},
+ MSSA.getDomTree()),
+ ItE = upward_defs_end();
+ ItB != ItE; ++ItB)
+ if (MSSA.getDomTree().isReachableFromEntry(ItB.getPhiArgBlock()))
+ Worklist.emplace_back(*ItB);
}
}
@@ -519,9 +511,16 @@ template <class AliasAnalysisType> class ClobberWalker {
UpwardsMemoryQuery *Query;
unsigned *UpwardWalkLimit;
- // Phi optimization bookkeeping
+ // Phi optimization bookkeeping:
+ // List of DefPath to process during the current phi optimization walk.
SmallVector<DefPath, 32> Paths;
+ // List of visited <Access, Location> pairs; we can skip paths already
+ // visited with the same memory location.
DenseSet<ConstMemoryAccessPair> VisitedPhis;
+ // Record if phi translation has been performed during the current phi
+ // optimization walk, as merging alias results after phi translation can
+ // yield incorrect results. Context in PR46156.
+ bool PerformedPhiTranslation = false;
/// Find the nearest def or phi that `From` can legally be optimized to.
const MemoryAccess *getWalkTarget(const MemoryPhi *From) const {
@@ -596,8 +595,9 @@ template <class AliasAnalysisType> class ClobberWalker {
void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches,
ListIndex PriorNode) {
- auto UpwardDefs = make_range(
- upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT), upward_defs_end());
+ auto UpwardDefsBegin = upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT,
+ &PerformedPhiTranslation);
+ auto UpwardDefs = make_range(UpwardDefsBegin, upward_defs_end());
for (const MemoryAccessPair &P : UpwardDefs) {
PausedSearches.push_back(Paths.size());
Paths.emplace_back(P.second, P.first, PriorNode);
@@ -651,8 +651,16 @@ template <class AliasAnalysisType> class ClobberWalker {
// - We still cache things for A, so C only needs to walk up a bit.
// If this behavior becomes problematic, we can fix without a ton of extra
// work.
- if (!VisitedPhis.insert({Node.Last, Node.Loc}).second)
+ if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) {
+ if (PerformedPhiTranslation) {
+ // If visiting this path performed Phi translation, don't continue,
+ // since it may not be correct to merge results from two paths if one
+ // relies on the phi translation.
+ TerminatedPath Term{Node.Last, PathIndex};
+ return Term;
+ }
continue;
+ }
const MemoryAccess *SkipStopWhere = nullptr;
if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) {
@@ -765,7 +773,7 @@ template <class AliasAnalysisType> class ClobberWalker {
/// terminates when a MemoryAccess that clobbers said MemoryLocation is found.
OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start,
const MemoryLocation &Loc) {
- assert(Paths.empty() && VisitedPhis.empty() &&
+ assert(Paths.empty() && VisitedPhis.empty() && !PerformedPhiTranslation &&
"Reset the optimization state.");
Paths.emplace_back(Loc, Start, Phi, None);
@@ -921,6 +929,7 @@ template <class AliasAnalysisType> class ClobberWalker {
void resetPhiOptznState() {
Paths.clear();
VisitedPhis.clear();
+ PerformedPhiTranslation = false;
}
public:
@@ -1439,15 +1448,6 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
}
MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]);
- // If the lifetime of the pointer ends at this instruction, it's live on
- // entry.
- if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) {
- // Reset UpperBound to liveOnEntryDef's place in the stack
- UpperBound = 0;
- FoundClobberResult = true;
- LocInfo.AR = MustAlias;
- break;
- }
ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA);
if (CA.IsClobber) {
FoundClobberResult = true;
@@ -1709,8 +1709,11 @@ MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
if (CreationMustSucceed)
assert(NewAccess != nullptr && "Tried to create a memory access for a "
"non-memory touching instruction");
- if (NewAccess)
+ if (NewAccess) {
+ assert((!Definition || !isa<MemoryUse>(Definition)) &&
+ "A use cannot be a defining access");
NewAccess->setDefiningAccess(Definition);
+ }
return NewAccess;
}
@@ -1739,9 +1742,15 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
// dependencies here.
// FIXME: Replace this special casing with a more accurate modelling of
// assume's control dependency.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- if (II->getIntrinsicID() == Intrinsic::assume)
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
return nullptr;
+ }
+ }
// Using a nonstandard AA pipelines might leave us with unexpected modref
// results for I, so add a check to not model instructions that may not read
@@ -1751,8 +1760,8 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
bool Def, Use;
if (Template) {
- Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
- Use = dyn_cast_or_null<MemoryUse>(Template) != nullptr;
+ Def = isa<MemoryDef>(Template);
+ Use = isa<MemoryUse>(Template);
#if !defined(NDEBUG)
ModRefInfo ModRef = AAP->getModRefInfo(I, None);
bool DefCheck, UseCheck;
@@ -1789,23 +1798,6 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
return MUD;
}
-/// Returns true if \p Replacer dominates \p Replacee .
-bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
- const MemoryAccess *Replacee) const {
- if (isa<MemoryUseOrDef>(Replacee))
- return DT->dominates(Replacer->getBlock(), Replacee->getBlock());
- const auto *MP = cast<MemoryPhi>(Replacee);
- // For a phi node, the use occurs in the predecessor block of the phi node.
- // Since we may occur multiple times in the phi node, we have to check each
- // operand to ensure Replacer dominates each operand where Replacee occurs.
- for (const Use &Arg : MP->operands()) {
- if (Arg.get() != Replacee &&
- !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg)))
- return false;
- }
- return true;
-}
-
/// Properly remove \p MA from all of MemorySSA's lookup tables.
void MemorySSA::removeFromLookups(MemoryAccess *MA) {
assert(MA->use_empty() &&
@@ -1989,8 +1981,7 @@ void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
"Incomplete MemoryPhi Node");
for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
verifyUseInDefs(Phi->getIncomingValue(I), Phi);
- assert(find(predecessors(&B), Phi->getIncomingBlock(I)) !=
- pred_end(&B) &&
+ assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) &&
"Incoming phi block not a block predecessor");
}
#endif
@@ -2237,9 +2228,98 @@ void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MemorySSAWrapperPass>();
}
+class DOTFuncMSSAInfo {
+private:
+ const Function &F;
+ MemorySSAAnnotatedWriter MSSAWriter;
+
+public:
+ DOTFuncMSSAInfo(const Function &F, MemorySSA &MSSA)
+ : F(F), MSSAWriter(&MSSA) {}
+
+ const Function *getFunction() { return &F; }
+ MemorySSAAnnotatedWriter &getWriter() { return MSSAWriter; }
+};
+
+namespace llvm {
+
+template <>
+struct GraphTraits<DOTFuncMSSAInfo *> : public GraphTraits<const BasicBlock *> {
+ static NodeRef getEntryNode(DOTFuncMSSAInfo *CFGInfo) {
+ return &(CFGInfo->getFunction()->getEntryBlock());
+ }
+
+ // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+ using nodes_iterator = pointer_iterator<Function::const_iterator>;
+
+ static nodes_iterator nodes_begin(DOTFuncMSSAInfo *CFGInfo) {
+ return nodes_iterator(CFGInfo->getFunction()->begin());
+ }
+
+ static nodes_iterator nodes_end(DOTFuncMSSAInfo *CFGInfo) {
+ return nodes_iterator(CFGInfo->getFunction()->end());
+ }
+
+ static size_t size(DOTFuncMSSAInfo *CFGInfo) {
+ return CFGInfo->getFunction()->size();
+ }
+};
+
+template <>
+struct DOTGraphTraits<DOTFuncMSSAInfo *> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
+
+ static std::string getGraphName(DOTFuncMSSAInfo *CFGInfo) {
+ return "MSSA CFG for '" + CFGInfo->getFunction()->getName().str() +
+ "' function";
+ }
+
+ std::string getNodeLabel(const BasicBlock *Node, DOTFuncMSSAInfo *CFGInfo) {
+ return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel(
+ Node, nullptr,
+ [CFGInfo](raw_string_ostream &OS, const BasicBlock &BB) -> void {
+ BB.print(OS, &CFGInfo->getWriter(), true, true);
+ },
+ [](std::string &S, unsigned &I, unsigned Idx) -> void {
+ std::string Str = S.substr(I, Idx - I);
+ StringRef SR = Str;
+ if (SR.count(" = MemoryDef(") || SR.count(" = MemoryPhi(") ||
+ SR.count("MemoryUse("))
+ return;
+ DOTGraphTraits<DOTFuncInfo *>::eraseComment(S, I, Idx);
+ });
+ }
+
+ static std::string getEdgeSourceLabel(const BasicBlock *Node,
+ const_succ_iterator I) {
+ return DOTGraphTraits<DOTFuncInfo *>::getEdgeSourceLabel(Node, I);
+ }
+
+ /// Display the raw branch weights from PGO.
+ std::string getEdgeAttributes(const BasicBlock *Node, const_succ_iterator I,
+ DOTFuncMSSAInfo *CFGInfo) {
+ return "";
+ }
+
+ std::string getNodeAttributes(const BasicBlock *Node,
+ DOTFuncMSSAInfo *CFGInfo) {
+ return getNodeLabel(Node, CFGInfo).find(';') != std::string::npos
+ ? "style=filled, fillcolor=lightpink"
+ : "";
+ }
+};
+
+} // namespace llvm
+
bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSA.print(dbgs());
+ if (DotCFGMSSA != "") {
+ DOTFuncMSSAInfo CFGInfo(F, MSSA);
+ WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA);
+ } else
+ MSSA.print(dbgs());
+
if (VerifyMemorySSA)
MSSA.verifyMemorySSA();
return false;
@@ -2265,8 +2345,14 @@ bool MemorySSAAnalysis::Result::invalidate(
PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
- OS << "MemorySSA for function: " << F.getName() << "\n";
- AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS);
+ auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ if (DotCFGMSSA != "") {
+ DOTFuncMSSAInfo CFGInfo(F, MSSA);
+ WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA);
+ } else {
+ OS << "MemorySSA for function: " << F.getName() << "\n";
+ MSSA.print(OS);
+ }
return PreservedAnalyses::all();
}
@@ -2336,7 +2422,7 @@ MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
UpwardsMemoryQuery Q;
Q.OriginalAccess = StartingUseOrDef;
Q.StartingLoc = Loc;
- Q.Inst = I;
+ Q.Inst = nullptr;
Q.IsCall = false;
// Unlike the other function, do not walk to the def of a def, because we are
@@ -2458,3 +2544,19 @@ void MemoryDef::deleteMe(DerivedUser *Self) {
void MemoryUse::deleteMe(DerivedUser *Self) {
delete static_cast<MemoryUse *>(Self);
}
+
+bool upward_defs_iterator::IsGuaranteedLoopInvariant(Value *Ptr) const {
+ auto IsGuaranteedLoopInvariantBase = [](Value *Ptr) {
+ Ptr = Ptr->stripPointerCasts();
+ if (!isa<Instruction>(Ptr))
+ return true;
+ return isa<AllocaInst>(Ptr);
+ };
+
+ Ptr = Ptr->stripPointerCasts();
+ if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
+ GEP->hasAllConstantIndices();
+ }
+ return IsGuaranteedLoopInvariantBase(Ptr);
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
index 85af091772e7..99fa58b8872a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -319,8 +319,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
bool DefBeforeSameBlock = false;
if (DefBefore->getBlock() == MD->getBlock() &&
!(isa<MemoryPhi>(DefBefore) &&
- std::find(InsertedPHIs.begin(), InsertedPHIs.end(), DefBefore) !=
- InsertedPHIs.end()))
+ llvm::is_contained(InsertedPHIs, DefBefore)))
DefBeforeSameBlock = true;
// There is a def before us, which means we can replace any store/phi uses
@@ -343,6 +342,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
+ SmallSet<WeakVH, 8> ExistingPhis;
+
// Remember the index where we may insert new phis.
unsigned NewPhiIndex = InsertedPHIs.size();
if (!DefBeforeSameBlock) {
@@ -383,6 +384,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
if (!MPhi) {
MPhi = MSSA->createMemoryPhi(BBIDF);
NewInsertedPHIs.push_back(MPhi);
+ } else {
+ ExistingPhis.insert(MPhi);
}
// Add the phis created into the IDF blocks to NonOptPhis, so they are not
// optimized out as trivial by the call to getPreviousDefFromEnd below.
@@ -428,10 +431,11 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
if (NewPhiSize)
tryRemoveTrivialPhis(ArrayRef<WeakVH>(&InsertedPHIs[NewPhiIndex], NewPhiSize));
- // Now that all fixups are done, rename all uses if we are asked.
- if (RenameUses) {
+ // Now that all fixups are done, rename all uses if we are asked. Skip
+ // renaming for defs in unreachable blocks.
+ BasicBlock *StartBlock = MD->getBlock();
+ if (RenameUses && MSSA->getDomTree().getNode(StartBlock)) {
SmallPtrSet<BasicBlock *, 16> Visited;
- BasicBlock *StartBlock = MD->getBlock();
// We are guaranteed there is a def in the block, because we just got it
// handed to us in this function.
MemoryAccess *FirstDef = &*MSSA->getWritableBlockDefs(StartBlock)->begin();
@@ -448,6 +452,13 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
if (Phi)
MSSA->renamePass(Phi->getBlock(), nullptr, Visited);
}
+ // Existing Phi blocks may need renaming too, if an access was previously
+ // optimized and the inserted Defs "covers" the Optimized value.
+ for (auto &MP : ExistingPhis) {
+ MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MP);
+ if (Phi)
+ MSSA->renamePass(Phi->getBlock(), nullptr, Visited);
+ }
}
}
@@ -544,6 +555,20 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From,
}
}
+/// If all arguments of a MemoryPHI are defined by the same incoming
+/// argument, return that argument.
+static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
+ MemoryAccess *MA = nullptr;
+
+ for (auto &Arg : MP->operands()) {
+ if (!MA)
+ MA = cast<MemoryAccess>(Arg);
+ else if (MA != Arg)
+ return nullptr;
+ }
+ return MA;
+}
+
static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
const ValueToValueMapTy &VMap,
PhiToDefMap &MPhiMap,
@@ -700,6 +725,10 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
NewPhi->addIncoming(IncPhi, IncBB);
}
}
+ if (auto *SingleAccess = onlySingleValue(NewPhi)) {
+ MPhiMap[Phi] = SingleAccess;
+ removeMemoryAccess(NewPhi);
+ }
};
auto ProcessBlock = [&](BasicBlock *BB) {
@@ -782,27 +811,42 @@ void MemorySSAUpdater::updateExitBlocksForClonedLoop(
}
void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
- DominatorTree &DT) {
+ DominatorTree &DT, bool UpdateDT) {
SmallVector<CFGUpdate, 4> DeleteUpdates;
+ SmallVector<CFGUpdate, 4> RevDeleteUpdates;
SmallVector<CFGUpdate, 4> InsertUpdates;
for (auto &Update : Updates) {
if (Update.getKind() == DT.Insert)
InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
- else
+ else {
DeleteUpdates.push_back({DT.Delete, Update.getFrom(), Update.getTo()});
+ RevDeleteUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
+ }
}
if (!DeleteUpdates.empty()) {
- // Update for inserted edges: use newDT and snapshot CFG as if deletes had
- // not occurred.
- // FIXME: This creates a new DT, so it's more expensive to do mix
- // delete/inserts vs just inserts. We can do an incremental update on the DT
- // to revert deletes, than re-delete the edges. Teaching DT to do this, is
- // part of a pending cleanup.
- DominatorTree NewDT(DT, DeleteUpdates);
- GraphDiff<BasicBlock *> GD(DeleteUpdates, /*ReverseApplyUpdates=*/true);
- applyInsertUpdates(InsertUpdates, NewDT, &GD);
+ if (!UpdateDT) {
+ SmallVector<CFGUpdate, 0> Empty;
+ // Deletes are reversed applied, because this CFGView is pretending the
+ // deletes did not happen yet, hence the edges still exist.
+ DT.applyUpdates(Empty, RevDeleteUpdates);
+ } else {
+ // Apply all updates, with the RevDeleteUpdates as PostCFGView.
+ DT.applyUpdates(Updates, RevDeleteUpdates);
+ }
+
+ // Note: the MSSA update below doesn't distinguish between a GD with
+ // (RevDelete,false) and (Delete, true), but this matters for the DT
+ // updates above; for "children" purposes they are equivalent; but the
+ // updates themselves convey the desired update, used inside DT only.
+ GraphDiff<BasicBlock *> GD(RevDeleteUpdates);
+ applyInsertUpdates(InsertUpdates, DT, &GD);
+ // Update DT to redelete edges; this matches the real CFG so we can perform
+ // the standard update without a postview of the CFG.
+ DT.applyUpdates(DeleteUpdates);
} else {
+ if (UpdateDT)
+ DT.applyUpdates(Updates);
GraphDiff<BasicBlock *> GD;
applyInsertUpdates(InsertUpdates, DT, &GD);
}
@@ -832,8 +876,8 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
// Check number of predecessors, we only care if there's more than one.
unsigned Count = 0;
BasicBlock *Pred = nullptr;
- for (auto &Pair : children<GraphDiffInvBBPair>({GD, BB})) {
- Pred = Pair.second;
+ for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BB)) {
+ Pred = Pi;
Count++;
if (Count == 2)
break;
@@ -926,8 +970,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
auto *BB = BBPredPair.first;
const auto &AddedBlockSet = BBPredPair.second.Added;
auto &PrevBlockSet = BBPredPair.second.Prev;
- for (auto &Pair : children<GraphDiffInvBBPair>({GD, BB})) {
- BasicBlock *Pi = Pair.second;
+ for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BB)) {
if (!AddedBlockSet.count(Pi))
PrevBlockSet.insert(Pi);
EdgeCountMap[{Pi, BB}]++;
@@ -1078,10 +1121,8 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I)
IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I)));
} else {
- for (auto &Pair : children<GraphDiffInvBBPair>({GD, BBIDF})) {
- BasicBlock *Pi = Pair.second;
+ for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BBIDF))
IDFPhi->addIncoming(GetLastDef(Pi), Pi);
- }
}
}
}
@@ -1227,20 +1268,6 @@ void MemorySSAUpdater::moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To,
MPhi->setIncomingBlock(MPhi->getBasicBlockIndex(From), To);
}
-/// If all arguments of a MemoryPHI are defined by the same incoming
-/// argument, return that argument.
-static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
- MemoryAccess *MA = nullptr;
-
- for (auto &Arg : MP->operands()) {
- if (!MA)
- MA = cast<MemoryAccess>(Arg);
- else if (MA != Arg)
- return nullptr;
- }
- return MA;
-}
-
void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds,
bool IdenticalEdgesWereMerged) {
@@ -1314,6 +1341,7 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) {
// Note: We assume MemorySSA is not used in metadata since it's not really
// part of the IR.
+ assert(NewDefTarget != MA && "Going into an infinite loop");
while (!MA->use_empty()) {
Use &U = *MA->use_begin();
if (auto *MUD = dyn_cast<MemoryUseOrDef>(U.getUser()))
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 52b884fb88e0..64fd5eb1acd4 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -14,9 +14,11 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ModuleDebugInfoPrinter.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
@@ -24,32 +26,34 @@
using namespace llvm;
namespace {
- class ModuleDebugInfoPrinter : public ModulePass {
- DebugInfoFinder Finder;
- public:
- static char ID; // Pass identification, replacement for typeid
- ModuleDebugInfoPrinter() : ModulePass(ID) {
- initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
- }
+class ModuleDebugInfoLegacyPrinter : public ModulePass {
+ DebugInfoFinder Finder;
- bool runOnModule(Module &M) override;
+public:
+ static char ID; // Pass identification, replacement for typeid
+ ModuleDebugInfoLegacyPrinter() : ModulePass(ID) {
+ initializeModuleDebugInfoLegacyPrinterPass(
+ *PassRegistry::getPassRegistry());
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- void print(raw_ostream &O, const Module *M) const override;
- };
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ void print(raw_ostream &O, const Module *M) const override;
+};
}
-char ModuleDebugInfoPrinter::ID = 0;
-INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
+char ModuleDebugInfoLegacyPrinter::ID = 0;
+INITIALIZE_PASS(ModuleDebugInfoLegacyPrinter, "module-debuginfo",
"Decodes module-level debug info", false, true)
ModulePass *llvm::createModuleDebugInfoPrinterPass() {
- return new ModuleDebugInfoPrinter();
+ return new ModuleDebugInfoLegacyPrinter();
}
-bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
+bool ModuleDebugInfoLegacyPrinter::runOnModule(Module &M) {
Finder.processModule(M);
return false;
}
@@ -67,7 +71,8 @@ static void printFile(raw_ostream &O, StringRef Filename, StringRef Directory,
O << ":" << Line;
}
-void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+static void printModuleDebugInfo(raw_ostream &O, const Module *M,
+ const DebugInfoFinder &Finder) {
// Printing the nodes directly isn't particularly helpful (since they
// reference other nodes that won't be printed, particularly for the
// filenames), so just print a few useful things.
@@ -126,3 +131,18 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
O << '\n';
}
}
+
+void ModuleDebugInfoLegacyPrinter::print(raw_ostream &O,
+ const Module *M) const {
+ printModuleDebugInfo(O, M, Finder);
+}
+
+ModuleDebugInfoPrinterPass::ModuleDebugInfoPrinterPass(raw_ostream &OS)
+ : OS(OS) {}
+
+PreservedAnalyses ModuleDebugInfoPrinterPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ Finder.processModule(M);
+ printModuleDebugInfo(OS, &M, Finder);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index e7d529d0b51e..5f7746eeed15 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -145,7 +145,7 @@ static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid,
SetVector<FunctionSummary::ConstVCall> &ConstVCalls) {
std::vector<uint64_t> Args;
// Start from the second argument to skip the "this" pointer.
- for (auto &Arg : make_range(Call.CB.arg_begin() + 1, Call.CB.arg_end())) {
+ for (auto &Arg : drop_begin(Call.CB.args())) {
auto *CI = dyn_cast<ConstantInt>(Arg);
if (!CI || CI->getBitWidth() > 64) {
VCalls.insert({Guid, Call.Offset});
@@ -472,7 +472,7 @@ static void computeFunctionSummary(
F.hasFnAttribute(Attribute::AlwaysInline)};
std::vector<FunctionSummary::ParamAccess> ParamAccesses;
if (auto *SSI = GetSSICallback(F))
- ParamAccesses = SSI->getParamAccesses();
+ ParamAccesses = SSI->getParamAccesses(Index);
auto FuncSummary = std::make_unique<FunctionSummary>(
Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
CallGraphEdges.takeVector(), TypeTests.takeVector(),
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp
index 6e3ff67bdddb..1e7626013eed 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp
@@ -16,9 +16,11 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -300,30 +302,31 @@ bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const Instruction &I,
}
namespace {
- struct MustExecutePrinter : public FunctionPass {
+struct MustExecutePrinter : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- MustExecutePrinter() : FunctionPass(ID) {
- initializeMustExecutePrinterPass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- }
- bool runOnFunction(Function &F) override;
- };
- struct MustBeExecutedContextPrinter : public ModulePass {
- static char ID;
+ static char ID; // Pass identification, replacement for typeid
+ MustExecutePrinter() : FunctionPass(ID) {
+ initializeMustExecutePrinterPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+ bool runOnFunction(Function &F) override;
+};
+struct MustBeExecutedContextPrinter : public ModulePass {
+ static char ID;
- MustBeExecutedContextPrinter() : ModulePass(ID) {
- initializeMustBeExecutedContextPrinterPass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- bool runOnModule(Module &M) override;
- };
+ MustBeExecutedContextPrinter() : ModulePass(ID) {
+ initializeMustBeExecutedContextPrinterPass(
+ *PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ bool runOnModule(Module &M) override;
+};
}
char MustExecutePrinter::ID = 0;
@@ -339,15 +342,16 @@ FunctionPass *llvm::createMustExecutePrinter() {
}
char MustBeExecutedContextPrinter::ID = 0;
-INITIALIZE_PASS_BEGIN(
- MustBeExecutedContextPrinter, "print-must-be-executed-contexts",
- "print the must-be-executed-contexed for all instructions", false, true)
+INITIALIZE_PASS_BEGIN(MustBeExecutedContextPrinter,
+ "print-must-be-executed-contexts",
+ "print the must-be-executed-context for all instructions",
+ false, true)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(MustBeExecutedContextPrinter,
"print-must-be-executed-contexts",
- "print the must-be-executed-contexed for all instructions",
+ "print the must-be-executed-context for all instructions",
false, true)
ModulePass *llvm::createMustBeExecutedContextPrinter() {
@@ -627,8 +631,7 @@ MustBeExecutedContextExplorer::findForwardJoinPoint(const BasicBlock *InitBB) {
if (!TransfersExecution)
return nullptr;
- for (const BasicBlock *AdjacentBB : successors(ToBB))
- Worklist.push_back(AdjacentBB);
+ append_range(Worklist, successors(ToBB));
}
}
@@ -835,3 +838,42 @@ const Instruction *MustBeExecutedIterator::advance() {
Tail = nullptr;
return nullptr;
}
+
+PreservedAnalyses MustExecutePrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+
+ MustExecuteAnnotatedWriter Writer(F, DT, LI);
+ F.print(OS, &Writer);
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses
+MustBeExecutedContextPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ GetterTy<const LoopInfo> LIGetter = [&](const Function &F) {
+ return &FAM.getResult<LoopAnalysis>(const_cast<Function &>(F));
+ };
+ GetterTy<const DominatorTree> DTGetter = [&](const Function &F) {
+ return &FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F));
+ };
+ GetterTy<const PostDominatorTree> PDTGetter = [&](const Function &F) {
+ return &FAM.getResult<PostDominatorTreeAnalysis>(const_cast<Function &>(F));
+ };
+
+ MustBeExecutedContextExplorer Explorer(
+ /* ExploreInterBlock */ true,
+ /* ExploreCFGForward */ true,
+ /* ExploreCFGBackward */ true, LIGetter, DTGetter, PDTGetter);
+
+ for (Function &F : M) {
+ for (Instruction &I : instructions(F)) {
+ OS << "-- Explore context of: " << I << "\n";
+ for (const Instruction *CI : Explorer.range(&I))
+ OS << " [F: " << CI->getFunction()->getName() << "] " << *CI << "\n";
+ }
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 80e019f5fc92..786d03f694b2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -54,10 +54,11 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *UA = GetUnderlyingObjCPtr(SA, DL);
- const Value *UB = GetUnderlyingObjCPtr(SB, DL);
+ const Value *UA = GetUnderlyingObjCPtr(SA);
+ const Value *UB = GetUnderlyingObjCPtr(SB);
if (UA != SA || UB != SB) {
- Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB), AAQI);
+ Result = AAResultBase::alias(MemoryLocation::getBeforeOrAfter(UA),
+ MemoryLocation::getBeforeOrAfter(UB), AAQI);
// We can't use MustAlias or PartialAlias results here because
// GetUnderlyingObjCPtr may return an offsetted pointer value.
if (Result == NoAlias)
@@ -83,10 +84,10 @@ bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *U = GetUnderlyingObjCPtr(S, DL);
+ const Value *U = GetUnderlyingObjCPtr(S);
if (U != S)
- return AAResultBase::pointsToConstantMemory(MemoryLocation(U), AAQI,
- OrLocal);
+ return AAResultBase::pointsToConstantMemory(
+ MemoryLocation::getBeforeOrAfter(U), AAQI, OrLocal);
// If that failed, fail. We don't need to chain here, since that's covered
// by the earlier precise query.
@@ -133,6 +134,8 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
return AAResultBase::getModRefInfo(Call, Loc, AAQI);
}
+AnalysisKey ObjCARCAA::Key;
+
ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
return ObjCARCAAResult(F.getParent()->getDataLayout());
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
index 56d1cb421225..d34a3c636362 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -23,3 +24,22 @@ bool llvm::objcarc::EnableARCOpts;
static cl::opt<bool, true> EnableARCOptimizations(
"enable-objc-arc-opts", cl::desc("enable/disable all ARC Optimizations"),
cl::location(EnableARCOpts), cl::init(true), cl::Hidden);
+
+bool llvm::objcarc::IsPotentialRetainableObjPtr(const Value *Op,
+ AAResults &AA) {
+ // First make the rudimentary check.
+ if (!IsPotentialRetainableObjPtr(Op))
+ return false;
+
+ // Objects in constant memory are not reference-counted.
+ if (AA.pointsToConstantMemory(Op))
+ return false;
+
+ // Pointers in constant memory are not pointing to reference-counted objects.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+ if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+ return false;
+
+ // Otherwise assume the worst.
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCInstKind.cpp
index fb416a79ac26..951907804227 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -19,7 +19,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ObjCARCInstKind.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
#include "llvm/IR/Intrinsics.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
index 2cdf7a177216..6f3d4d536c40 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/LLVMContext.h"
@@ -36,7 +37,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
LI.analyze(DT);
// Then compute BranchProbabilityInfo.
- BranchProbabilityInfo BPI(*F, LI);
+ BranchProbabilityInfo BPI(*F, LI, nullptr, &DT, nullptr);
// Finally compute BFI.
OwnedBFI = std::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
@@ -96,9 +97,17 @@ OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) {
BlockFrequencyInfo *BFI;
- if (Fn.getContext().getDiagnosticsHotnessRequested())
+ auto &Context = Fn.getContext();
+ if (Context.getDiagnosticsHotnessRequested()) {
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
- else
+ // Get hotness threshold from PSI. This should only happen once.
+ if (Context.isDiagnosticsHotnessThresholdSetFromPSI()) {
+ if (ProfileSummaryInfo *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI())
+ Context.setDiagnosticsHotnessThreshold(
+ PSI->getOrCompHotCountThreshold());
+ }
+ } else
BFI = nullptr;
ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn, BFI);
@@ -108,6 +117,7 @@ bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) {
void OptimizationRemarkEmitterWrapperPass::getAnalysisUsage(
AnalysisUsage &AU) const {
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.setPreservesAll();
}
@@ -117,10 +127,19 @@ OptimizationRemarkEmitter
OptimizationRemarkEmitterAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
BlockFrequencyInfo *BFI;
+ auto &Context = F.getContext();
- if (F.getContext().getDiagnosticsHotnessRequested())
+ if (Context.getDiagnosticsHotnessRequested()) {
BFI = &AM.getResult<BlockFrequencyAnalysis>(F);
- else
+ // Get hotness threshold from PSI. This should only happen once.
+ if (Context.isDiagnosticsHotnessThresholdSetFromPSI()) {
+ auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ if (ProfileSummaryInfo *PSI =
+ MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()))
+ Context.setDiagnosticsHotnessThreshold(
+ PSI->getOrCompHotCountThreshold());
+ }
+ } else
BFI = nullptr;
return OptimizationRemarkEmitter(&F, BFI);
@@ -133,5 +152,6 @@ static const char ore_name[] = "Optimization Remark Emitter";
INITIALIZE_PASS_BEGIN(OptimizationRemarkEmitterWrapperPass, ORE_NAME, ore_name,
false, true)
INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(OptimizationRemarkEmitterWrapperPass, ORE_NAME, ore_name,
false, true)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp b/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp
index 198647dafbef..656a17e9dc30 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/PhiValues.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
index 88629517d484..3ba0bb9eaf2c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
@@ -17,11 +17,8 @@
#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp
index 6c0d17b45c62..a73607dbef61 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/StructuralHash.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -30,8 +31,6 @@ char RGPassManager::ID = 0;
RGPassManager::RGPassManager()
: FunctionPass(ID), PMDataManager() {
- skipThisRegion = false;
- redoThisRegion = false;
RI = nullptr;
CurrentRegion = nullptr;
}
@@ -75,8 +74,6 @@ bool RGPassManager::runOnFunction(Function &F) {
while (!RQ.empty()) {
CurrentRegion = RQ.back();
- skipThisRegion = false;
- redoThisRegion = false;
// Run all passes on the current Region.
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
@@ -90,63 +87,60 @@ bool RGPassManager::runOnFunction(Function &F) {
initializeAnalysisImpl(P);
+ bool LocalChanged = false;
{
PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry());
TimeRegion PassTimer(getPassTimer(P));
- Changed |= P->runOnRegion(CurrentRegion, *this);
+#ifdef EXPENSIVE_CHECKS
+ uint64_t RefHash = StructuralHash(F);
+#endif
+ LocalChanged = P->runOnRegion(CurrentRegion, *this);
+
+#ifdef EXPENSIVE_CHECKS
+ if (!LocalChanged && (RefHash != StructuralHash(F))) {
+ llvm::errs() << "Pass modifies its input and doesn't report it: "
+ << P->getPassName() << "\n";
+ llvm_unreachable("Pass modifies its input and doesn't report it");
+ }
+#endif
+
+ Changed |= LocalChanged;
}
if (isPassDebuggingExecutionsOrMore()) {
- if (Changed)
+ if (LocalChanged)
dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
- skipThisRegion ? "<deleted>" :
CurrentRegion->getNameStr());
dumpPreservedSet(P);
}
- if (!skipThisRegion) {
- // Manually check that this region is still healthy. This is done
- // instead of relying on RegionInfo::verifyRegion since RegionInfo
- // is a function pass and it's really expensive to verify every
- // Region in the function every time. That level of checking can be
- // enabled with the -verify-region-info option.
- {
- TimeRegion PassTimer(getPassTimer(P));
- CurrentRegion->verifyRegion();
- }
-
- // Then call the regular verifyAnalysis functions.
- verifyPreservedAnalysis(P);
+ // Manually check that this region is still healthy. This is done
+ // instead of relying on RegionInfo::verifyRegion since RegionInfo
+ // is a function pass and it's really expensive to verify every
+ // Region in the function every time. That level of checking can be
+ // enabled with the -verify-region-info option.
+ {
+ TimeRegion PassTimer(getPassTimer(P));
+ CurrentRegion->verifyRegion();
}
- removeNotPreservedAnalysis(P);
+ // Then call the regular verifyAnalysis functions.
+ verifyPreservedAnalysis(P);
+
+ if (LocalChanged)
+ removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
removeDeadPasses(P,
- (!isPassDebuggingExecutionsOrMore() || skipThisRegion) ?
- "<deleted>" : CurrentRegion->getNameStr(),
+ (!isPassDebuggingExecutionsOrMore())
+ ? "<deleted>"
+ : CurrentRegion->getNameStr(),
ON_REGION_MSG);
-
- if (skipThisRegion)
- // Do not run other passes on this region.
- break;
}
- // If the region was deleted, release all the region passes. This frees up
- // some memory, and avoids trouble with the pass manager trying to call
- // verifyAnalysis on them.
- if (skipThisRegion)
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- Pass *P = getContainedPass(Index);
- freePass(P, "<deleted>", ON_REGION_MSG);
- }
-
// Pop the region from queue after running all passes.
RQ.pop_back();
- if (redoThisRegion)
- RQ.push_back(CurrentRegion);
-
// Free all region nodes created in region passes.
RI->clearNodeCache();
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ReleaseModeModelRunner.cpp b/contrib/llvm-project/llvm/lib/Analysis/ReleaseModeModelRunner.cpp
index 4c0ffbc17ff7..0b038b3e1c30 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ReleaseModeModelRunner.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ReleaseModeModelRunner.cpp
@@ -10,6 +10,8 @@
// Only inference is supported.
//
//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
+#if defined(LLVM_HAVE_TF_AOT)
#include "llvm/Analysis/InlineModelFeatureMaps.h"
#include "llvm/Analysis/MLInlineAdvisor.h"
@@ -23,8 +25,8 @@
using namespace llvm;
namespace {
-static const char *const FeedPrefix = "feed_";
-static const char *const FetchPrefix = "fetch_";
+const char FeedPrefix[] = "feed_";
+const char FetchPrefix[] = "fetch_";
/// MLModelRunner - production mode implementation. It uses a AOT-compiled
/// SavedModel for efficient execution.
@@ -85,3 +87,4 @@ llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
auto AOTRunner = std::make_unique<ReleaseModeModelRunner>(M.getContext());
return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
}
+#endif // defined(LLVM_HAVE_TF_AOT)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
new file mode 100644
index 000000000000..b9dac2f3ff11
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
@@ -0,0 +1,82 @@
+//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ReplayInlineAdvisor that replays inline decisions based
+// on previous inline remarks from optimization remark log. This is a best
+// effort approach useful for testing compiler/source changes while holding
+// inlining steady.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/LineIterator.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline-replay"
+
+ReplayInlineAdvisor::ReplayInlineAdvisor(
+ Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor, StringRef RemarksFile,
+ bool EmitRemarks)
+ : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)),
+ HasReplayRemarks(false), EmitRemarks(EmitRemarks) {
+ auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
+ std::error_code EC = BufferOrErr.getError();
+ if (EC) {
+ Context.emitError("Could not open remarks file: " + EC.message());
+ return;
+ }
+
+ // Example for inline remarks to parse:
+ // main:3:1.1: _Z3subii inlined into main at callsite sum:1 @ main:3:1.1
+ // We use the callsite string after `at callsite` to replay inlining.
+ line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ StringRef Line = *LineIt;
+ auto Pair = Line.split(" at callsite ");
+
+ auto Callee = Pair.first.split(" inlined into").first.rsplit(": ").second;
+
+ auto CallSite = Pair.second.split(";").first;
+
+ if (Callee.empty() || CallSite.empty())
+ continue;
+
+ std::string Combined = (Callee + CallSite).str();
+ InlineSitesFromRemarks.insert(Combined);
+ }
+
+ HasReplayRemarks = true;
+}
+
+std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdviceImpl(CallBase &CB) {
+ assert(HasReplayRemarks);
+
+ Function &Caller = *CB.getCaller();
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
+
+ if (InlineSitesFromRemarks.empty())
+ return std::make_unique<DefaultInlineAdvice>(this, CB, None, ORE,
+ EmitRemarks);
+
+ std::string CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
+ StringRef Callee = CB.getCalledFunction()->getName();
+ std::string Combined = (Callee + CallSiteLoc).str();
+ auto Iter = InlineSitesFromRemarks.find(Combined);
+
+ Optional<InlineCost> InlineRecommended = None;
+ if (Iter != InlineSitesFromRemarks.end()) {
+ InlineRecommended = llvm::InlineCost::getAlways("found in replay");
+ }
+
+ return std::make_unique<DefaultInlineAdvice>(this, CB, InlineRecommended, ORE,
+ EmitRemarks);
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
index 3c96b3f20461..1a9ae68573e9 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -135,6 +135,7 @@
#include <vector>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "scalar-evolution"
@@ -226,6 +227,11 @@ ClassifyExpressions("scalar-evolution-classify-expressions",
cl::Hidden, cl::init(true),
cl::desc("When printing analysis, include information on every instruction"));
+static cl::opt<bool> UseExpensiveRangeSharpening(
+ "scalar-evolution-use-expensive-range-sharpening", cl::Hidden,
+ cl::init(false),
+ cl::desc("Use more powerful methods of sharpening expression ranges. May "
+ "be costly in terms of compile time"));
//===----------------------------------------------------------------------===//
// SCEV class definitions
@@ -243,10 +249,17 @@ LLVM_DUMP_METHOD void SCEV::dump() const {
#endif
void SCEV::print(raw_ostream &OS) const {
- switch (static_cast<SCEVTypes>(getSCEVType())) {
+ switch (getSCEVType()) {
case scConstant:
cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
return;
+ case scPtrToInt: {
+ const SCEVPtrToIntExpr *PtrToInt = cast<SCEVPtrToIntExpr>(this);
+ const SCEV *Op = PtrToInt->getOperand();
+ OS << "(ptrtoint " << *Op->getType() << " " << *Op << " to "
+ << *PtrToInt->getType() << ")";
+ return;
+ }
case scTruncate: {
const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
const SCEV *Op = Trunc->getOperand();
@@ -304,6 +317,8 @@ void SCEV::print(raw_ostream &OS) const {
case scSMinExpr:
OpStr = " smin ";
break;
+ default:
+ llvm_unreachable("There are no other nary expression types.");
}
OS << "(";
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
@@ -320,6 +335,10 @@ void SCEV::print(raw_ostream &OS) const {
OS << "<nuw>";
if (NAry->hasNoSignedWrap())
OS << "<nsw>";
+ break;
+ default:
+ // Nothing to print for other nary expressions.
+ break;
}
return;
}
@@ -361,9 +380,10 @@ void SCEV::print(raw_ostream &OS) const {
}
Type *SCEV::getType() const {
- switch (static_cast<SCEVTypes>(getSCEVType())) {
+ switch (getSCEVType()) {
case scConstant:
return cast<SCEVConstant>(this)->getType();
+ case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend:
@@ -445,28 +465,42 @@ ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
return getConstant(ConstantInt::get(ITy, V, isSigned));
}
-SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
- unsigned SCEVTy, const SCEV *op, Type *ty)
- : SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {}
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
+ const SCEV *op, Type *ty)
+ : SCEV(ID, SCEVTy, computeExpressionSize(op)), Ty(ty) {
+ Operands[0] = op;
+}
-SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
- const SCEV *op, Type *ty)
- : SCEVCastExpr(ID, scTruncate, op, ty) {
- assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
+SCEVPtrToIntExpr::SCEVPtrToIntExpr(const FoldingSetNodeIDRef ID, const SCEV *Op,
+ Type *ITy)
+ : SCEVCastExpr(ID, scPtrToInt, Op, ITy) {
+ assert(getOperand()->getType()->isPointerTy() && Ty->isIntegerTy() &&
+ "Must be a non-bit-width-changing pointer-to-integer cast!");
+}
+
+SCEVIntegralCastExpr::SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID,
+ SCEVTypes SCEVTy, const SCEV *op,
+ Type *ty)
+ : SCEVCastExpr(ID, SCEVTy, op, ty) {}
+
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op,
+ Type *ty)
+ : SCEVIntegralCastExpr(ID, scTruncate, op, ty) {
+ assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate non-integer value!");
}
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
- : SCEVCastExpr(ID, scZeroExtend, op, ty) {
- assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
+ : SCEVIntegralCastExpr(ID, scZeroExtend, op, ty) {
+ assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot zero extend non-integer value!");
}
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
- : SCEVCastExpr(ID, scSignExtend, op, ty) {
- assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
+ : SCEVIntegralCastExpr(ID, scSignExtend, op, ty) {
+ assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot sign extend non-integer value!");
}
@@ -665,7 +699,7 @@ static int CompareSCEVComplexity(
return 0;
// Primarily, sort the SCEVs by their getSCEVType().
- unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+ SCEVTypes LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
if (LType != RType)
return (int)LType - (int)RType;
@@ -674,7 +708,7 @@ static int CompareSCEVComplexity(
// Aside from the getSCEVType() ordering, the particular ordering
// isn't very important except that it's beneficial to be consistent,
// so that (a + b) and (b + a) don't end up as different expressions.
- switch (static_cast<SCEVTypes>(LType)) {
+ switch (LType) {
case scUnknown: {
const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
@@ -776,6 +810,7 @@ static int CompareSCEVComplexity(
return X;
}
+ case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend: {
@@ -999,6 +1034,115 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
// SCEV Expression folder implementations
//===----------------------------------------------------------------------===//
+const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty,
+ unsigned Depth) {
+ assert(Ty->isIntegerTy() && "Target type must be an integer type!");
+ assert(Depth <= 1 && "getPtrToIntExpr() should self-recurse at most once.");
+
+ // We could be called with an integer-typed operands during SCEV rewrites.
+ // Since the operand is an integer already, just perform zext/trunc/self cast.
+ if (!Op->getType()->isPointerTy())
+ return getTruncateOrZeroExtend(Op, Ty);
+
+ // What would be an ID for such a SCEV cast expression?
+ FoldingSetNodeID ID;
+ ID.AddInteger(scPtrToInt);
+ ID.AddPointer(Op);
+
+ void *IP = nullptr;
+
+ // Is there already an expression for such a cast?
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
+ return getTruncateOrZeroExtend(S, Ty);
+
+ // If not, is this expression something we can't reduce any further?
+ if (isa<SCEVUnknown>(Op)) {
+ // Create an explicit cast node.
+ // We can reuse the existing insert position since if we get here,
+ // we won't have made any changes which would invalidate it.
+ Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType());
+ assert(getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(
+ Op->getType())) == getDataLayout().getTypeSizeInBits(IntPtrTy) &&
+ "We can only model ptrtoint if SCEV's effective (integer) type is "
+ "sufficiently wide to represent all possible pointer values.");
+ SCEV *S = new (SCEVAllocator)
+ SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy);
+ UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
+ return getTruncateOrZeroExtend(S, Ty);
+ }
+
+ assert(Depth == 0 &&
+ "getPtrToIntExpr() should not self-recurse for non-SCEVUnknown's.");
+
+ // Otherwise, we've got some expression that is more complex than just a
+ // single SCEVUnknown. But we don't want to have a SCEVPtrToIntExpr of an
+ // arbitrary expression, we want to have SCEVPtrToIntExpr of an SCEVUnknown
+ // only, and the expressions must otherwise be integer-typed.
+ // So sink the cast down to the SCEVUnknown's.
+
+ /// The SCEVPtrToIntSinkingRewriter takes a scalar evolution expression,
+ /// which computes a pointer-typed value, and rewrites the whole expression
+ /// tree so that *all* the computations are done on integers, and the only
+ /// pointer-typed operands in the expression are SCEVUnknown.
+ class SCEVPtrToIntSinkingRewriter
+ : public SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter> {
+ using Base = SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter>;
+
+ public:
+ SCEVPtrToIntSinkingRewriter(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {}
+
+ static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE) {
+ SCEVPtrToIntSinkingRewriter Rewriter(SE);
+ return Rewriter.visit(Scev);
+ }
+
+ const SCEV *visit(const SCEV *S) {
+ Type *STy = S->getType();
+ // If the expression is not pointer-typed, just keep it as-is.
+ if (!STy->isPointerTy())
+ return S;
+ // Else, recursively sink the cast down into it.
+ return Base::visit(S);
+ }
+
+ const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ SmallVector<const SCEV *, 2> Operands;
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getAddExpr(Operands, Expr->getNoWrapFlags());
+ }
+
+ const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+ SmallVector<const SCEV *, 2> Operands;
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getMulExpr(Operands, Expr->getNoWrapFlags());
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ Type *ExprPtrTy = Expr->getType();
+ assert(ExprPtrTy->isPointerTy() &&
+ "Should only reach pointer-typed SCEVUnknown's.");
+ Type *ExprIntPtrTy = SE.getDataLayout().getIntPtrType(ExprPtrTy);
+ return SE.getPtrToIntExpr(Expr, ExprIntPtrTy, /*Depth=*/1);
+ }
+ };
+
+ // And actually perform the cast sinking.
+ const SCEV *IntOp = SCEVPtrToIntSinkingRewriter::rewrite(Op, *this);
+ assert(IntOp->getType()->isIntegerTy() &&
+ "We must have succeeded in sinking the cast, "
+ "and ending up with an integer-typed expression!");
+ return getTruncateOrZeroExtend(IntOp, Ty);
+}
+
const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
@@ -1050,7 +1194,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
++i) {
const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1);
- if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S))
+ if (!isa<SCEVIntegralCastExpr>(CommOp->getOperand(i)) &&
+ isa<SCEVTruncateExpr>(S))
numTruncs++;
Operands.push_back(S);
}
@@ -1077,6 +1222,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
+ // Return zero if truncating to known zeros.
+ uint32_t MinTrailingZeros = GetMinTrailingZeros(Op);
+ if (MinTrailingZeros >= getTypeSizeInBits(Ty))
+ return getZero(Ty);
+
// The cast wasn't folded; create an explicit cast node. We can reuse
// the existing insert position since if we get here, we won't have
// made any changes which would invalidate it.
@@ -1237,7 +1387,7 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
// If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
// or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
// `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact.
- const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
+ SE->setNoWrapFlags(const_cast<SCEVAddRecExpr *>(PreAR), WrapType);
}
return PreStart;
}
@@ -1441,7 +1591,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
if (!AR->hasNoUnsignedWrap()) {
auto NewFlags = proveNoWrapViaConstantRanges(AR);
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
}
// If we have special knowledge that this addrec won't overflow,
@@ -1461,8 +1611,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// that value once it has finished.
const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
- // Manually compute the final value for AR, checking for
- // overflow.
+ // Manually compute the final value for AR, checking for overflow.
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
@@ -1490,7 +1639,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV::FlagAnyWrap, Depth + 1);
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NUW, which is propagated to this AddRec.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
@@ -1509,7 +1658,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NW, which is propagated to this AddRec.
// Negative step causes unsigned wrap, but it still can't self-wrap.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
@@ -1529,27 +1678,24 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// doing extra work that may not pay off.
if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
!AC.assumptions().empty()) {
- // If the backedge is guarded by a comparison with the pre-inc
- // value the addrec is safe. Also, if the entry is guarded by
- // a comparison with the start value and the backedge is
- // guarded by a comparison with the post-inc value, the addrec
- // is safe.
- if (isKnownPositive(Step)) {
- const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
- getUnsignedRangeMax(Step));
- if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
- isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) {
- // Cache knowledge of AR NUW, which is propagated to this
- // AddRec.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
- // Return the expression with the addrec on the outside.
- return getAddRecExpr(
+
+ auto NewFlags = proveNoUnsignedWrapViaInduction(AR);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
+ if (AR->hasNoUnsignedWrap()) {
+ // Same as nuw case above - duplicated here to avoid a compile time
+ // issue. It's not clear that the order of checks does matter, but
+ // it's one of two issue possible causes for a change which was
+ // reverted. Be conservative for the moment.
+ return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
Depth + 1),
getZeroExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
- }
- } else if (isKnownNegative(Step)) {
+ }
+
+ // For a negative step, we can extend the operands iff doing so only
+ // traverses values in the range zext([0,UINT_MAX]).
+ if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
getSignedRangeMin(Step));
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
@@ -1557,7 +1703,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// Cache knowledge of AR NW, which is propagated to this
// AddRec. Negative step causes unsigned wrap, but it
// still can't self-wrap.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
@@ -1586,7 +1732,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
}
if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
@@ -1785,7 +1931,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
if (!AR->hasNoSignedWrap()) {
auto NewFlags = proveNoWrapViaConstantRanges(AR);
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
}
// If we have special knowledge that this addrec won't overflow,
@@ -1834,7 +1980,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV::FlagAnyWrap, Depth + 1);
if (SAdd == OperandExtendedAdd) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
@@ -1859,7 +2005,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
// (SAdd == OperandExtendedAdd => AR is NW)
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
@@ -1871,33 +2017,16 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
}
}
- // Normally, in the cases we can prove no-overflow via a
- // backedge guarding condition, we can also compute a backedge
- // taken count for the loop. The exceptions are assumptions and
- // guards present in the loop -- SCEV is not great at exploiting
- // these to compute max backedge taken counts, but can still use
- // these to prove lack of overflow. Use this fact to avoid
- // doing extra work that may not pay off.
-
- if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
- !AC.assumptions().empty()) {
- // If the backedge is guarded by a comparison with the pre-inc
- // value the addrec is safe. Also, if the entry is guarded by
- // a comparison with the start value and the backedge is
- // guarded by a comparison with the post-inc value, the addrec
- // is safe.
- ICmpInst::Predicate Pred;
- const SCEV *OverflowLimit =
- getSignedOverflowLimitForStep(Step, &Pred, this);
- if (OverflowLimit &&
- (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
- isKnownOnEveryIteration(Pred, AR, OverflowLimit))) {
- // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
- return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
- getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
- }
+ auto NewFlags = proveNoSignedWrapViaInduction(AR);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
+ if (AR->hasNoSignedWrap()) {
+ // Same as nsw case above - duplicated here to avoid a compile time
+ // issue. It's not clear that the order of checks does matter, but
+ // it's one of two issue possible causes for a change which was
+ // reverted. Be conservative for the moment.
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
// sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw>
@@ -1918,7 +2047,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
}
if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
return getAddRecExpr(
getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
@@ -2048,7 +2177,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
} else {
// A multiplication of a constant with some other value. Update
// the map.
- SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+ SmallVector<const SCEV *, 4> MulOps(drop_begin(Mul->operands()));
const SCEV *Key = SE.getMulExpr(MulOps);
auto Pair = M.insert({Key, NewScale});
if (Pair.second) {
@@ -2152,9 +2281,9 @@ bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) {
/// Get a canonical add expression, or something simpler if possible.
const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
- SCEV::NoWrapFlags Flags,
+ SCEV::NoWrapFlags OrigFlags,
unsigned Depth) {
- assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
+ assert(!(OrigFlags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
"only nuw or nsw allowed");
assert(!Ops.empty() && "Cannot get empty add!");
if (Ops.size() == 1) return Ops[0];
@@ -2168,8 +2297,6 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, &LI, DT);
- Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
-
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
@@ -2192,12 +2319,20 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Ops.size() == 1) return Ops[0];
}
+ // Delay expensive flag strengthening until necessary.
+ auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
+ return StrengthenNoWrapFlags(this, scAddExpr, Ops, OrigFlags);
+ };
+
// Limit recursion calls depth.
if (Depth > MaxArithDepth || hasHugeExpression(Ops))
- return getOrCreateAddExpr(Ops, Flags);
+ return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
if (SCEV *S = std::get<0>(findExistingSCEVInCache(scAddExpr, Ops))) {
- static_cast<SCEVAddExpr *>(S)->setNoWrapFlags(Flags);
+ // Don't strengthen flags if we have no new information.
+ SCEVAddExpr *Add = static_cast<SCEVAddExpr *>(S);
+ if (Add->getNoWrapFlags(OrigFlags) != OrigFlags)
+ Add->setNoWrapFlags(ComputeFlags(Ops));
return S;
}
@@ -2223,7 +2358,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
FoundMatch = true;
}
if (FoundMatch)
- return getAddExpr(Ops, Flags, Depth + 1);
+ return getAddExpr(Ops, OrigFlags, Depth + 1);
// Check for truncates. If all the operands are truncated from the same
// type, see if factoring out the truncate would permit the result to be
@@ -2458,11 +2593,16 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// If we found some loop invariants, fold them into the recurrence.
if (!LIOps.empty()) {
+ // Compute nowrap flags for the addition of the loop-invariant ops and
+ // the addrec. Temporarily push it as an operand for that purpose.
+ LIOps.push_back(AddRec);
+ SCEV::NoWrapFlags Flags = ComputeFlags(LIOps);
+ LIOps.pop_back();
+
// NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
LIOps.push_back(AddRec->getStart());
- SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
- AddRec->op_end());
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
// This follows from the fact that the no-wrap flags on the outer add
// expression are applicable on the 0th iteration, when the add recurrence
// will be equal to its start value.
@@ -2500,8 +2640,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
"AddRecExprs are not sorted in reverse dominance order?");
if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
// Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
- SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
- AddRec->op_end());
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
@@ -2532,7 +2671,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// Okay, it looks like we really DO need an add expr. Check to see if we
// already have one, otherwise create a new one.
- return getOrCreateAddExpr(Ops, Flags);
+ return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
}
const SCEV *
@@ -2576,7 +2715,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
}
- S->setNoWrapFlags(Flags);
+ setNoWrapFlags(S, Flags);
return S;
}
@@ -2658,9 +2797,9 @@ static bool containsConstantInAddMulChain(const SCEV *StartExpr) {
/// Get a canonical multiply expression, or something simpler if possible.
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
- SCEV::NoWrapFlags Flags,
+ SCEV::NoWrapFlags OrigFlags,
unsigned Depth) {
- assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
+ assert(OrigFlags == maskFlags(OrigFlags, SCEV::FlagNUW | SCEV::FlagNSW) &&
"only nuw or nsw allowed");
assert(!Ops.empty() && "Cannot get empty mul!");
if (Ops.size() == 1) return Ops[0];
@@ -2674,24 +2813,52 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, &LI, DT);
- Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ Ops[0] = getConstant(LHSC->getAPInt() * RHSC->getAPInt());
+ if (Ops.size() == 2) return Ops[0];
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we have a multiply of zero, it will always be zero.
+ if (LHSC->getValue()->isZero())
+ return LHSC;
+
+ // If we are left with a constant one being multiplied, strip it off.
+ if (LHSC->getValue()->isOne()) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ }
- // Limit recursion calls depth, but fold all-constant expressions.
- // `Ops` is sorted, so it's enough to check just last one.
- if ((Depth > MaxArithDepth || hasHugeExpression(Ops)) &&
- !isa<SCEVConstant>(Ops.back()))
- return getOrCreateMulExpr(Ops, Flags);
+ if (Ops.size() == 1)
+ return Ops[0];
+ }
+
+ // Delay expensive flag strengthening until necessary.
+ auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
+ return StrengthenNoWrapFlags(this, scMulExpr, Ops, OrigFlags);
+ };
+
+ // Limit recursion calls depth.
+ if (Depth > MaxArithDepth || hasHugeExpression(Ops))
+ return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) {
- static_cast<SCEVMulExpr *>(S)->setNoWrapFlags(Flags);
+ // Don't strengthen flags if we have no new information.
+ SCEVMulExpr *Mul = static_cast<SCEVMulExpr *>(S);
+ if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags)
+ Mul->setNoWrapFlags(ComputeFlags(Ops));
return S;
}
- // If there are any constants, fold them together.
- unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
-
- if (Ops.size() == 2)
+ if (Ops.size() == 2) {
// C1*(C2+V) -> C1*C2 + C1*V
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
// If any of Add's ops are Adds or Muls with a constant, apply this
@@ -2707,28 +2874,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::FlagAnyWrap, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1);
- ++Idx;
- while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
- // We found two constants, fold them together!
- ConstantInt *Fold =
- ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
- Ops[0] = getConstant(Fold);
- Ops.erase(Ops.begin()+1); // Erase the folded element
- if (Ops.size() == 1) return Ops[0];
- LHSC = cast<SCEVConstant>(Ops[0]);
- }
-
- // If we are left with a constant one being multiplied, strip it off.
- if (cast<SCEVConstant>(Ops[0])->getValue()->isOne()) {
- Ops.erase(Ops.begin());
- --Idx;
- } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
- // If we have a multiply of zero, it will always be zero.
- return Ops[0];
- } else if (Ops[0]->isAllOnesValue()) {
- // If we have a mul by -1 of an add, try distributing the -1 among the
- // add operands.
- if (Ops.size() == 2) {
+ if (Ops[0]->isAllOnesValue()) {
+ // If we have a mul by -1 of an add, try distributing the -1 among the
+ // add operands.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
SmallVector<const SCEV *, 4> NewOps;
bool AnyFolded = false;
@@ -2752,9 +2900,6 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
}
}
}
-
- if (Ops.size() == 1)
- return Ops[0];
}
// Skip over the add expression until we get to a multiply.
@@ -2816,8 +2961,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
//
// No self-wrap cannot be guaranteed after changing the step size, but
// will be inferred if either NUW or NSW is true.
- Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
- const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
+ SCEV::NoWrapFlags Flags = ComputeFlags({Scale, AddRec});
+ const SCEV *NewRec = getAddRecExpr(
+ NewOps, AddRecLoop, AddRec->getNoWrapFlags(Flags));
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
@@ -2910,7 +3056,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Okay, it looks like we really DO need an mul expr. Check to see if we
// already have one, otherwise create a new one.
- return getOrCreateMulExpr(Ops, Flags);
+ return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
}
/// Represents an unsigned remainder expression based on unsigned division.
@@ -3034,8 +3180,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
const SCEV *Op = M->getOperand(i);
const SCEV *Div = getUDivExpr(Op, RHSC);
if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
- Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
- M->op_end());
+ Operands = SmallVector<const SCEV *, 4>(M->operands());
Operands[i] = Div;
return getMulExpr(Operands);
}
@@ -3129,8 +3274,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
// first element of the mulexpr.
if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
if (LHSCst == RHSCst) {
- SmallVector<const SCEV *, 2> Operands;
- Operands.append(Mul->op_begin() + 1, Mul->op_end());
+ SmallVector<const SCEV *, 2> Operands(drop_begin(Mul->operands()));
return getMulExpr(Operands);
}
@@ -3220,8 +3364,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
? (L->getLoopDepth() < NestedLoop->getLoopDepth())
: (!NestedLoop->contains(L) &&
DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
- SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
- NestedAR->op_end());
+ SmallVector<const SCEV *, 4> NestedOperands(NestedAR->operands());
Operands[0] = NestedAR->getStart();
// AddRecs require their operands be loop-invariant with respect to their
// loops. Don't perform this transformation if it would break this
@@ -3274,12 +3417,12 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
// flow and the no-overflow bits may not be valid for the expression in any
// context. This can be fixed similarly to how these flags are handled for
// adds.
- SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW
- : SCEV::FlagAnyWrap;
+ SCEV::NoWrapFlags OffsetWrap =
+ GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- const SCEV *TotalOffset = getZero(IntIdxTy);
Type *CurTy = GEP->getType();
bool FirstIter = true;
+ SmallVector<const SCEV *, 4> Offsets;
for (const SCEV *IndexExpr : IndexExprs) {
// Compute the (potentially symbolic) offset in bytes for this index.
if (StructType *STy = dyn_cast<StructType>(CurTy)) {
@@ -3287,9 +3430,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
unsigned FieldNo = Index->getZExtValue();
const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo);
-
- // Add the field offset to the running total offset.
- TotalOffset = getAddExpr(TotalOffset, FieldOffset);
+ Offsets.push_back(FieldOffset);
// Update CurTy to the type of the field at Index.
CurTy = STy->getTypeAtIndex(Index);
@@ -3309,19 +3450,27 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy);
// Multiply the index by the element size to compute the element offset.
- const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
-
- // Add the element offset to the running total offset.
- TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+ const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap);
+ Offsets.push_back(LocalOffset);
}
}
- // Add the total offset from all the GEP indices to the base.
- return getAddExpr(BaseExpr, TotalOffset, Wrap);
+ // Handle degenerate case of GEP without offsets.
+ if (Offsets.empty())
+ return BaseExpr;
+
+ // Add the offsets together, assuming nsw if inbounds.
+ const SCEV *Offset = getAddExpr(Offsets, OffsetWrap);
+ // Add the base address and the offset. We cannot use the nsw flag, as the
+ // base address is unsigned. However, if we know that the offset is
+ // non-negative, we can use nuw.
+ SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset)
+ ? SCEV::FlagNUW : SCEV::FlagAnyWrap;
+ return getAddExpr(BaseExpr, Offset, BaseWrap);
}
std::tuple<SCEV *, FoldingSetNodeID, void *>
-ScalarEvolution::findExistingSCEVInCache(int SCEVType,
+ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
ArrayRef<const SCEV *> Ops) {
FoldingSetNodeID ID;
void *IP = nullptr;
@@ -3332,7 +3481,17 @@ ScalarEvolution::findExistingSCEVInCache(int SCEVType,
UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP);
}
-const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
+const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) {
+ SCEV::NoWrapFlags Flags = IsNSW ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+ return getSMaxExpr(Op, getNegativeSCEV(Op, Flags));
+}
+
+const SCEV *ScalarEvolution::getSignumExpr(const SCEV *Op) {
+ Type *Ty = Op->getType();
+ return getSMinExpr(getSMaxExpr(Op, getMinusOne(Ty)), getOne(Ty));
+}
+
+const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
if (Ops.size() == 1) return Ops[0];
@@ -3456,8 +3615,8 @@ const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
return ExistingSCEV;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
- SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
- ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
+ SCEV *S = new (SCEVAllocator)
+ SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
@@ -3502,25 +3661,42 @@ const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
return getMinMaxExpr(scUMinExpr, Ops);
}
+const SCEV *
+ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy,
+ ScalableVectorType *ScalableTy) {
+ Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo());
+ Constant *One = ConstantInt::get(IntTy, 1);
+ Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One);
+ // Note that the expression we created is the final expression, we don't
+ // want to simplify it any further Also, if we call a normal getSCEV(),
+ // we'll end up in an endless recursion. So just create an SCEVUnknown.
+ return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy));
+}
+
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
- // We can bypass creating a target-independent
- // constant expression and then folding it back into a ConstantInt.
- // This is just a compile-time optimization.
- if (isa<ScalableVectorType>(AllocTy)) {
- Constant *NullPtr = Constant::getNullValue(AllocTy->getPointerTo());
- Constant *One = ConstantInt::get(IntTy, 1);
- Constant *GEP = ConstantExpr::getGetElementPtr(AllocTy, NullPtr, One);
- return getSCEV(ConstantExpr::getPtrToInt(GEP, IntTy));
- }
+ if (auto *ScalableAllocTy = dyn_cast<ScalableVectorType>(AllocTy))
+ return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy);
+ // We can bypass creating a target-independent constant expression and then
+ // folding it back into a ConstantInt. This is just a compile-time
+ // optimization.
return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
}
+const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) {
+ if (auto *ScalableStoreTy = dyn_cast<ScalableVectorType>(StoreTy))
+ return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy);
+ // We can bypass creating a target-independent constant expression and then
+ // folding it back into a ConstantInt. This is just a compile-time
+ // optimization.
+ return getConstant(IntTy, getDataLayout().getTypeStoreSize(StoreTy));
+}
+
const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
StructType *STy,
unsigned FieldNo) {
- // We can bypass creating a target-independent
- // constant expression and then folding it back into a ConstantInt.
- // This is just a compile-time optimization.
+ // We can bypass creating a target-independent constant expression and then
+ // folding it back into a ConstantInt. This is just a compile-time
+ // optimization.
return getConstant(
IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
}
@@ -3744,8 +3920,7 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
- return getMulExpr(
- V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
+ return getMulExpr(V, getMinusOne(Ty), Flags);
}
/// If Expr computes ~A, return A else return nullptr
@@ -3779,9 +3954,8 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
return (const SCEV *)nullptr;
MatchedOperands.push_back(Matched);
}
- return getMinMaxExpr(
- SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())),
- MatchedOperands);
+ return getMinMaxExpr(SCEVMinMaxExpr::negate(MME->getSCEVType()),
+ MatchedOperands);
};
if (const SCEV *Replaced = MatchMinMaxNegation(MME))
return Replaced;
@@ -3789,9 +3963,7 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
- const SCEV *AllOnes =
- getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
- return getMinusSCEV(AllOnes, V);
+ return getMinusSCEV(getMinusOne(Ty), V);
}
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
@@ -3938,6 +4110,7 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(
MaxType = getWiderType(MaxType, S->getType());
else
MaxType = S->getType();
+ assert(MaxType && "Failed to find maximum type!");
// Extend all ops to max type.
SmallVector<const SCEV *, 2> PromotedOps;
@@ -3954,7 +4127,7 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
return V;
while (true) {
- if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
+ if (const SCEVIntegralCastExpr *Cast = dyn_cast<SCEVIntegralCastExpr>(V)) {
V = Cast->getOperand();
} else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
const SCEV *PtrOp = nullptr;
@@ -4257,6 +4430,107 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
return Result;
}
+SCEV::NoWrapFlags
+ScalarEvolution::proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR) {
+ SCEV::NoWrapFlags Result = AR->getNoWrapFlags();
+
+ if (AR->hasNoSignedWrap())
+ return Result;
+
+ if (!AR->isAffine())
+ return Result;
+
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ const Loop *L = AR->getLoop();
+
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
+
+ // Normally, in the cases we can prove no-overflow via a
+ // backedge guarding condition, we can also compute a backedge
+ // taken count for the loop. The exceptions are assumptions and
+ // guards present in the loop -- SCEV is not great at exploiting
+ // these to compute max backedge taken counts, but can still use
+ // these to prove lack of overflow. Use this fact to avoid
+ // doing extra work that may not pay off.
+
+ if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
+ AC.assumptions().empty())
+ return Result;
+
+ // If the backedge is guarded by a comparison with the pre-inc value the
+ // addrec is safe. Also, if the entry is guarded by a comparison with the
+ // start value and the backedge is guarded by a comparison with the post-inc
+ // value, the addrec is safe.
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit =
+ getSignedOverflowLimitForStep(Step, &Pred, this);
+ if (OverflowLimit &&
+ (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+ isKnownOnEveryIteration(Pred, AR, OverflowLimit))) {
+ Result = setFlags(Result, SCEV::FlagNSW);
+ }
+ return Result;
+}
+SCEV::NoWrapFlags
+ScalarEvolution::proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR) {
+ SCEV::NoWrapFlags Result = AR->getNoWrapFlags();
+
+ if (AR->hasNoUnsignedWrap())
+ return Result;
+
+ if (!AR->isAffine())
+ return Result;
+
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ unsigned BitWidth = getTypeSizeInBits(AR->getType());
+ const Loop *L = AR->getLoop();
+
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
+
+ // Normally, in the cases we can prove no-overflow via a
+ // backedge guarding condition, we can also compute a backedge
+ // taken count for the loop. The exceptions are assumptions and
+ // guards present in the loop -- SCEV is not great at exploiting
+ // these to compute max backedge taken counts, but can still use
+ // these to prove lack of overflow. Use this fact to avoid
+ // doing extra work that may not pay off.
+
+ if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
+ AC.assumptions().empty())
+ return Result;
+
+ // If the backedge is guarded by a comparison with the pre-inc value the
+ // addrec is safe. Also, if the entry is guarded by a comparison with the
+ // start value and the backedge is guarded by a comparison with the post-inc
+ // value, the addrec is safe.
+ if (isKnownPositive(Step)) {
+ const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+ getUnsignedRangeMax(Step));
+ if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+ isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) {
+ Result = setFlags(Result, SCEV::FlagNUW);
+ }
+ }
+
+ return Result;
+}
+
namespace {
/// Represents an abstract binary operation. This may exist as a
@@ -4268,6 +4542,7 @@ struct BinaryOp {
Value *RHS;
bool IsNSW = false;
bool IsNUW = false;
+ bool IsExact = false;
/// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
/// constant expression.
@@ -4280,11 +4555,14 @@ struct BinaryOp {
IsNSW = OBO->hasNoSignedWrap();
IsNUW = OBO->hasNoUnsignedWrap();
}
+ if (auto *PEO = dyn_cast<PossiblyExactOperator>(Op))
+ IsExact = PEO->isExact();
}
explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
- bool IsNUW = false)
- : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
+ bool IsNUW = false, bool IsExact = false)
+ : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW),
+ IsExact(IsExact) {}
};
} // end anonymous namespace
@@ -4981,8 +5259,15 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
bool follow(const SCEV *S) {
switch (S->getSCEVType()) {
- case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
- case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
+ case scConstant:
+ case scPtrToInt:
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
case scUMinExpr:
case scSMinExpr:
// These expressions are available if their operand(s) is/are.
@@ -5020,7 +5305,7 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
// We do not try to smart about these at all.
return setUnavailable();
}
- llvm_unreachable("switch should be fully covered!");
+ llvm_unreachable("Unknown SCEV kind!");
}
bool isDone() { return TraversalDone; }
@@ -5240,6 +5525,9 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
return C->getAPInt().countTrailingZeros();
+ if (const SCEVPtrToIntExpr *I = dyn_cast<SCEVPtrToIntExpr>(S))
+ return GetMinTrailingZeros(I->getOperand());
+
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
return std::min(GetMinTrailingZeros(T->getOperand()),
(uint32_t)getTypeSizeInBits(T->getType()));
@@ -5331,6 +5619,15 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
return None;
}
+void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec,
+ SCEV::NoWrapFlags Flags) {
+ if (AddRec->getNoWrapFlags(Flags) != Flags) {
+ AddRec->setNoWrapFlags(Flags);
+ UnsignedRanges.erase(AddRec);
+ SignedRanges.erase(AddRec);
+ }
+}
+
/// Determine the range for a particular SCEV. If SignHint is
/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
/// with a "cleaner" unsigned (resp. signed) representation.
@@ -5445,6 +5742,11 @@ ScalarEvolution::getRangeRef(const SCEV *S,
RangeType));
}
+ if (const SCEVPtrToIntExpr *PtrToInt = dyn_cast<SCEVPtrToIntExpr>(S)) {
+ ConstantRange X = getRangeRef(PtrToInt->getOperand(), SignHint);
+ return setRange(PtrToInt, SignHint, X);
+ }
+
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
return setRange(Trunc, SignHint,
@@ -5497,16 +5799,28 @@ ScalarEvolution::getRangeRef(const SCEV *S,
auto RangeFromAffine = getRangeForAffineAR(
AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
BitWidth);
- if (!RangeFromAffine.isFullSet())
- ConservativeResult =
- ConservativeResult.intersectWith(RangeFromAffine, RangeType);
+ ConservativeResult =
+ ConservativeResult.intersectWith(RangeFromAffine, RangeType);
auto RangeFromFactoring = getRangeViaFactoring(
AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
BitWidth);
- if (!RangeFromFactoring.isFullSet())
+ ConservativeResult =
+ ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
+ }
+
+ // Now try symbolic BE count and more powerful methods.
+ if (UseExpensiveRangeSharpening) {
+ const SCEV *SymbolicMaxBECount =
+ getSymbolicMaxBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(SymbolicMaxBECount) &&
+ getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
+ AddRec->hasNoSelfWrap()) {
+ auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
+ AddRec, SymbolicMaxBECount, BitWidth, SignHint);
ConservativeResult =
- ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
+ ConservativeResult.intersectWith(RangeFromAffineNew, RangeType);
+ }
}
}
@@ -5677,6 +5991,74 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
return SR.intersectWith(UR, ConstantRange::Smallest);
}
+ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
+ const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth,
+ ScalarEvolution::RangeSignHint SignHint) {
+ assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n");
+ assert(AddRec->hasNoSelfWrap() &&
+ "This only works for non-self-wrapping AddRecs!");
+ const bool IsSigned = SignHint == HINT_RANGE_SIGNED;
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+ // Only deal with constant step to save compile time.
+ if (!isa<SCEVConstant>(Step))
+ return ConstantRange::getFull(BitWidth);
+ // Let's make sure that we can prove that we do not self-wrap during
+ // MaxBECount iterations. We need this because MaxBECount is a maximum
+ // iteration count estimate, and we might infer nw from some exit for which we
+ // do not know max exit count (or any other side reasoning).
+ // TODO: Turn into assert at some point.
+ if (getTypeSizeInBits(MaxBECount->getType()) >
+ getTypeSizeInBits(AddRec->getType()))
+ return ConstantRange::getFull(BitWidth);
+ MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType());
+ const SCEV *RangeWidth = getMinusOne(AddRec->getType());
+ const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step));
+ const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs);
+ if (!isKnownPredicateViaConstantRanges(ICmpInst::ICMP_ULE, MaxBECount,
+ MaxItersWithoutWrap))
+ return ConstantRange::getFull(BitWidth);
+
+ ICmpInst::Predicate LEPred =
+ IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ ICmpInst::Predicate GEPred =
+ IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+
+ // We know that there is no self-wrap. Let's take Start and End values and
+ // look at all intermediate values V1, V2, ..., Vn that IndVar takes during
+ // the iteration. They either lie inside the range [Min(Start, End),
+ // Max(Start, End)] or outside it:
+ //
+ // Case 1: RangeMin ... Start V1 ... VN End ... RangeMax;
+ // Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax;
+ //
+ // No self wrap flag guarantees that the intermediate values cannot be BOTH
+ // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that
+ // knowledge, let's try to prove that we are dealing with Case 1. It is so if
+ // Start <= End and step is positive, or Start >= End and step is negative.
+ const SCEV *Start = AddRec->getStart();
+ ConstantRange StartRange = getRangeRef(Start, SignHint);
+ ConstantRange EndRange = getRangeRef(End, SignHint);
+ ConstantRange RangeBetween = StartRange.unionWith(EndRange);
+ // If they already cover full iteration space, we will know nothing useful
+ // even if we prove what we want to prove.
+ if (RangeBetween.isFullSet())
+ return RangeBetween;
+ // Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax).
+ bool IsWrappedSet = IsSigned ? RangeBetween.isSignWrappedSet()
+ : RangeBetween.isWrappedSet();
+ if (IsWrappedSet)
+ return ConstantRange::getFull(BitWidth);
+
+ if (isKnownPositive(Step) &&
+ isKnownPredicateViaConstantRanges(LEPred, Start, End))
+ return RangeBetween;
+ else if (isKnownNegative(Step) &&
+ isKnownPredicateViaConstantRanges(GEPred, Start, End))
+ return RangeBetween;
+ return ConstantRange::getFull(BitWidth);
+}
+
ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
const SCEV *Step,
const SCEV *MaxBECount,
@@ -5709,7 +6091,7 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
}
// Peel off a cast operation
- if (auto *SCast = dyn_cast<SCEVCastExpr>(S)) {
+ if (auto *SCast = dyn_cast<SCEVIntegralCastExpr>(S)) {
CastOp = SCast->getSCEVType();
S = SCast->getOperand();
}
@@ -5910,7 +6292,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
const Instruction *Poison = PoisonStack.pop_back_val();
for (auto *PoisonUser : Poison->users()) {
- if (propagatesPoison(cast<Instruction>(PoisonUser))) {
+ if (propagatesPoison(cast<Operator>(PoisonUser))) {
if (Pushed.insert(cast<Instruction>(PoisonUser)).second)
PoisonStack.push_back(cast<Instruction>(PoisonUser));
} else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) {
@@ -5974,6 +6356,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
else if (isa<ConstantPointerNull>(V))
+ // FIXME: we shouldn't special-case null pointer constant.
return getZero(V->getType());
else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee());
@@ -6264,6 +6647,15 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
}
}
}
+ if (BO->IsExact) {
+ // Given exact arithmetic in-bounds right-shift by a constant,
+ // we can lower it into: (abs(x) EXACT/u (1<<C)) * signum(x)
+ const SCEV *X = getSCEV(BO->LHS);
+ const SCEV *AbsX = getAbsExpr(X, /*IsNSW=*/false);
+ APInt Mult = APInt::getOneBitSet(BitWidth, AShrAmt);
+ const SCEV *Div = getUDivExactExpr(AbsX, getConstant(Mult));
+ return getMulExpr(Div, getSignumExpr(X), SCEV::FlagNSW);
+ }
break;
}
}
@@ -6300,6 +6692,29 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return getSCEV(U->getOperand(0));
break;
+ case Instruction::PtrToInt: {
+ // Pointer to integer cast is straight-forward, so do model it.
+ Value *Ptr = U->getOperand(0);
+ const SCEV *Op = getSCEV(Ptr);
+ Type *DstIntTy = U->getType();
+ // SCEV doesn't have constant pointer expression type, but it supports
+ // nullptr constant (and only that one), which is modelled in SCEV as a
+ // zero integer constant. So just skip the ptrtoint cast for constants.
+ if (isa<SCEVConstant>(Op))
+ return getTruncateOrZeroExtend(Op, DstIntTy);
+ Type *PtrTy = Ptr->getType();
+ Type *IntPtrTy = getDataLayout().getIntPtrType(PtrTy);
+ // But only if effective SCEV (integer) type is wide enough to represent
+ // all possible pointer values.
+ if (getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(PtrTy)) !=
+ getDataLayout().getTypeSizeInBits(IntPtrTy))
+ return getUnknown(V);
+ return getPtrToIntExpr(Op, DstIntTy);
+ }
+ case Instruction::IntToPtr:
+ // Just don't deal with inttoptr casts.
+ return getUnknown(V);
+
case Instruction::SDiv:
// If both operands are non-negative, this is just an udiv.
if (isKnownNonNegative(getSCEV(U->getOperand(0))) &&
@@ -6314,11 +6729,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return getURemExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)));
break;
- // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
- // lead to pointer expressions which cannot safely be expanded to GEPs,
- // because ScalarEvolution doesn't respect the GEP aliasing rules when
- // simplifying integer expressions.
-
case Instruction::GetElementPtr:
return createNodeForGEP(cast<GEPOperator>(U));
@@ -6339,6 +6749,45 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
case Instruction::Invoke:
if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand())
return getSCEV(RV);
+
+ if (auto *II = dyn_cast<IntrinsicInst>(U)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::abs:
+ return getAbsExpr(
+ getSCEV(II->getArgOperand(0)),
+ /*IsNSW=*/cast<ConstantInt>(II->getArgOperand(1))->isOne());
+ case Intrinsic::umax:
+ return getUMaxExpr(getSCEV(II->getArgOperand(0)),
+ getSCEV(II->getArgOperand(1)));
+ case Intrinsic::umin:
+ return getUMinExpr(getSCEV(II->getArgOperand(0)),
+ getSCEV(II->getArgOperand(1)));
+ case Intrinsic::smax:
+ return getSMaxExpr(getSCEV(II->getArgOperand(0)),
+ getSCEV(II->getArgOperand(1)));
+ case Intrinsic::smin:
+ return getSMinExpr(getSCEV(II->getArgOperand(0)),
+ getSCEV(II->getArgOperand(1)));
+ case Intrinsic::usub_sat: {
+ const SCEV *X = getSCEV(II->getArgOperand(0));
+ const SCEV *Y = getSCEV(II->getArgOperand(1));
+ const SCEV *ClampedY = getUMinExpr(X, Y);
+ return getMinusSCEV(X, ClampedY, SCEV::FlagNUW);
+ }
+ case Intrinsic::uadd_sat: {
+ const SCEV *X = getSCEV(II->getArgOperand(0));
+ const SCEV *Y = getSCEV(II->getArgOperand(1));
+ const SCEV *ClampedX = getUMinExpr(X, getNotSCEV(Y));
+ return getAddExpr(ClampedX, Y, SCEV::FlagNUW);
+ }
+ case Intrinsic::start_loop_iterations:
+ // A start_loop_iterations is just equivalent to the first operand for
+ // SCEV purposes.
+ return getSCEV(II->getArgOperand(0));
+ default:
+ break;
+ }
+ }
break;
}
@@ -6371,8 +6820,9 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
return 0;
}
-unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L,
- BasicBlock *ExitingBlock) {
+unsigned
+ScalarEvolution::getSmallConstantTripCount(const Loop *L,
+ const BasicBlock *ExitingBlock) {
assert(ExitingBlock && "Must pass a non-null exiting block!");
assert(L->isLoopExiting(ExitingBlock) &&
"Exiting block must actually branch out of the loop!");
@@ -6409,7 +6859,7 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
/// that control exits the loop via ExitingBlock.
unsigned
ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
- BasicBlock *ExitingBlock) {
+ const BasicBlock *ExitingBlock) {
assert(ExitingBlock && "Must pass a non-null exiting block!");
assert(L->isLoopExiting(ExitingBlock) &&
"Exiting block must actually branch out of the loop!");
@@ -6440,13 +6890,14 @@ ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
}
const SCEV *ScalarEvolution::getExitCount(const Loop *L,
- BasicBlock *ExitingBlock,
+ const BasicBlock *ExitingBlock,
ExitCountKind Kind) {
switch (Kind) {
case Exact:
+ case SymbolicMaximum:
return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
case ConstantMaximum:
- return getBackedgeTakenInfo(L).getMax(ExitingBlock, this);
+ return getBackedgeTakenInfo(L).getConstantMax(ExitingBlock, this);
};
llvm_unreachable("Invalid ExitCountKind!");
}
@@ -6463,13 +6914,15 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L,
case Exact:
return getBackedgeTakenInfo(L).getExact(L, this);
case ConstantMaximum:
- return getBackedgeTakenInfo(L).getMax(this);
+ return getBackedgeTakenInfo(L).getConstantMax(this);
+ case SymbolicMaximum:
+ return getBackedgeTakenInfo(L).getSymbolicMax(L, this);
};
llvm_unreachable("Invalid ExitCountKind!");
}
bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
- return getBackedgeTakenInfo(L).isMaxOrZero(this);
+ return getBackedgeTakenInfo(L).isConstantMaxOrZero(this);
}
/// Push PHI nodes in the header of the given loop onto the given Worklist.
@@ -6499,7 +6952,7 @@ ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
}
-const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::BackedgeTakenInfo &
ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Initially insert an invalid entry for this loop. If the insertion
// succeeds, proceed to actually compute a backedge-taken count and
@@ -6523,12 +6976,11 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
const SCEV *BEExact = Result.getExact(L, this);
if (BEExact != getCouldNotCompute()) {
assert(isLoopInvariant(BEExact, L) &&
- isLoopInvariant(Result.getMax(this), L) &&
+ isLoopInvariant(Result.getConstantMax(this), L) &&
"Computed backedge-taken count isn't loop invariant for loop!");
++NumTripCountsComputed;
- }
- else if (Result.getMax(this) == getCouldNotCompute() &&
- isa<PHINode>(L->getHeader()->begin())) {
+ } else if (Result.getConstantMax(this) == getCouldNotCompute() &&
+ isa<PHINode>(L->getHeader()->begin())) {
// Only count loops that have phi nodes as not being computable.
++NumTripCountsNotComputed;
}
@@ -6769,7 +7221,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
/// Get the exact not taken count for this loop exit.
const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
+ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock,
ScalarEvolution *SE) const {
for (auto &ENT : ExitNotTaken)
if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
@@ -6778,9 +7230,8 @@ ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
return SE->getCouldNotCompute();
}
-const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getMax(BasicBlock *ExitingBlock,
- ScalarEvolution *SE) const {
+const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
+ const BasicBlock *ExitingBlock, ScalarEvolution *SE) const {
for (auto &ENT : ExitNotTaken)
if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
return ENT.MaxNotTaken;
@@ -6788,22 +7239,32 @@ ScalarEvolution::BackedgeTakenInfo::getMax(BasicBlock *ExitingBlock,
return SE->getCouldNotCompute();
}
-/// getMax - Get the max backedge taken count for the loop.
+/// getConstantMax - Get the constant max backedge taken count for the loop.
const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const {
auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
return !ENT.hasAlwaysTruePredicate();
};
- if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax())
+ if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getConstantMax())
return SE->getCouldNotCompute();
- assert((isa<SCEVCouldNotCompute>(getMax()) || isa<SCEVConstant>(getMax())) &&
+ assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
+ isa<SCEVConstant>(getConstantMax())) &&
"No point in having a non-constant max backedge taken count!");
- return getMax();
+ return getConstantMax();
+}
+
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L,
+ ScalarEvolution *SE) {
+ if (!SymbolicMax)
+ SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L);
+ return SymbolicMax;
}
-bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const {
+bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero(
+ ScalarEvolution *SE) const {
auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
return !ENT.hasAlwaysTruePredicate();
};
@@ -6812,8 +7273,8 @@ bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const
bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
ScalarEvolution *SE) const {
- if (getMax() && getMax() != SE->getCouldNotCompute() &&
- SE->hasOperand(getMax(), S))
+ if (getConstantMax() && getConstantMax() != SE->getCouldNotCompute() &&
+ SE->hasOperand(getConstantMax(), S))
return true;
for (auto &ENT : ExitNotTaken)
@@ -6866,10 +7327,9 @@ ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
- ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
- ExitCounts,
- bool Complete, const SCEV *MaxCount, bool MaxOrZero)
- : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
+ ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> ExitCounts,
+ bool IsComplete, const SCEV *ConstantMax, bool MaxOrZero)
+ : ConstantMax(ConstantMax), IsComplete(IsComplete), MaxOrZero(MaxOrZero) {
using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
ExitNotTaken.reserve(ExitCounts.size());
@@ -6889,7 +7349,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
std::move(Predicate));
});
- assert((isa<SCEVCouldNotCompute>(MaxCount) || isa<SCEVConstant>(MaxCount)) &&
+ assert((isa<SCEVCouldNotCompute>(ConstantMax) ||
+ isa<SCEVConstant>(ConstantMax)) &&
"No point in having a non-constant max backedge taken count!");
}
@@ -7078,114 +7539,10 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
bool ControlsExit, bool AllowPredicates) {
- // Check if the controlling expression for this loop is an And or Or.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
- if (BO->getOpcode() == Instruction::And) {
- // Recurse on the operands of the and.
- bool EitherMayExit = !ExitIfTrue;
- ExitLimit EL0 = computeExitLimitFromCondCached(
- Cache, L, BO->getOperand(0), ExitIfTrue,
- ControlsExit && !EitherMayExit, AllowPredicates);
- ExitLimit EL1 = computeExitLimitFromCondCached(
- Cache, L, BO->getOperand(1), ExitIfTrue,
- ControlsExit && !EitherMayExit, AllowPredicates);
- // Be robust against unsimplified IR for the form "and i1 X, true"
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
- return CI->isOne() ? EL0 : EL1;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0)))
- return CI->isOne() ? EL1 : EL0;
- const SCEV *BECount = getCouldNotCompute();
- const SCEV *MaxBECount = getCouldNotCompute();
- if (EitherMayExit) {
- // Both conditions must be true for the loop to continue executing.
- // Choose the less conservative count.
- if (EL0.ExactNotTaken == getCouldNotCompute() ||
- EL1.ExactNotTaken == getCouldNotCompute())
- BECount = getCouldNotCompute();
- else
- BECount =
- getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
- if (EL0.MaxNotTaken == getCouldNotCompute())
- MaxBECount = EL1.MaxNotTaken;
- else if (EL1.MaxNotTaken == getCouldNotCompute())
- MaxBECount = EL0.MaxNotTaken;
- else
- MaxBECount =
- getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
- } else {
- // Both conditions must be true at the same time for the loop to exit.
- // For now, be conservative.
- if (EL0.MaxNotTaken == EL1.MaxNotTaken)
- MaxBECount = EL0.MaxNotTaken;
- if (EL0.ExactNotTaken == EL1.ExactNotTaken)
- BECount = EL0.ExactNotTaken;
- }
-
- // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
- // to be more aggressive when computing BECount than when computing
- // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
- // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
- // to not.
- if (isa<SCEVCouldNotCompute>(MaxBECount) &&
- !isa<SCEVCouldNotCompute>(BECount))
- MaxBECount = getConstant(getUnsignedRangeMax(BECount));
-
- return ExitLimit(BECount, MaxBECount, false,
- {&EL0.Predicates, &EL1.Predicates});
- }
- if (BO->getOpcode() == Instruction::Or) {
- // Recurse on the operands of the or.
- bool EitherMayExit = ExitIfTrue;
- ExitLimit EL0 = computeExitLimitFromCondCached(
- Cache, L, BO->getOperand(0), ExitIfTrue,
- ControlsExit && !EitherMayExit, AllowPredicates);
- ExitLimit EL1 = computeExitLimitFromCondCached(
- Cache, L, BO->getOperand(1), ExitIfTrue,
- ControlsExit && !EitherMayExit, AllowPredicates);
- // Be robust against unsimplified IR for the form "or i1 X, true"
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
- return CI->isZero() ? EL0 : EL1;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0)))
- return CI->isZero() ? EL1 : EL0;
- const SCEV *BECount = getCouldNotCompute();
- const SCEV *MaxBECount = getCouldNotCompute();
- if (EitherMayExit) {
- // Both conditions must be false for the loop to continue executing.
- // Choose the less conservative count.
- if (EL0.ExactNotTaken == getCouldNotCompute() ||
- EL1.ExactNotTaken == getCouldNotCompute())
- BECount = getCouldNotCompute();
- else
- BECount =
- getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
- if (EL0.MaxNotTaken == getCouldNotCompute())
- MaxBECount = EL1.MaxNotTaken;
- else if (EL1.MaxNotTaken == getCouldNotCompute())
- MaxBECount = EL0.MaxNotTaken;
- else
- MaxBECount =
- getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
- } else {
- // Both conditions must be false at the same time for the loop to exit.
- // For now, be conservative.
- if (EL0.MaxNotTaken == EL1.MaxNotTaken)
- MaxBECount = EL0.MaxNotTaken;
- if (EL0.ExactNotTaken == EL1.ExactNotTaken)
- BECount = EL0.ExactNotTaken;
- }
- // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
- // to be more aggressive when computing BECount than when computing
- // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
- // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
- // to not.
- if (isa<SCEVCouldNotCompute>(MaxBECount) &&
- !isa<SCEVCouldNotCompute>(BECount))
- MaxBECount = getConstant(getUnsignedRangeMax(BECount));
-
- return ExitLimit(BECount, MaxBECount, false,
- {&EL0.Predicates, &EL1.Predicates});
- }
- }
+ // Handle BinOp conditions (And, Or).
+ if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp(
+ Cache, L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
+ return *LimitFromBinOp;
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
@@ -7217,6 +7574,95 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
return computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
}
+Optional<ScalarEvolution::ExitLimit>
+ScalarEvolution::computeExitLimitFromCondFromBinOp(
+ ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
+ bool ControlsExit, bool AllowPredicates) {
+ // Check if the controlling expression for this loop is an And or Or.
+ Value *Op0, *Op1;
+ bool IsAnd = false;
+ if (match(ExitCond, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
+ IsAnd = true;
+ else if (match(ExitCond, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
+ IsAnd = false;
+ else
+ return None;
+
+ // EitherMayExit is true in these two cases:
+ // br (and Op0 Op1), loop, exit
+ // br (or Op0 Op1), exit, loop
+ bool EitherMayExit = IsAnd ^ ExitIfTrue;
+ ExitLimit EL0 = computeExitLimitFromCondCached(Cache, L, Op0, ExitIfTrue,
+ ControlsExit && !EitherMayExit,
+ AllowPredicates);
+ ExitLimit EL1 = computeExitLimitFromCondCached(Cache, L, Op1, ExitIfTrue,
+ ControlsExit && !EitherMayExit,
+ AllowPredicates);
+
+ // Be robust against unsimplified IR for the form "op i1 X, NeutralElement"
+ const Constant *NeutralElement = ConstantInt::get(ExitCond->getType(), IsAnd);
+ if (isa<ConstantInt>(Op1))
+ return Op1 == NeutralElement ? EL0 : EL1;
+ if (isa<ConstantInt>(Op0))
+ return Op0 == NeutralElement ? EL1 : EL0;
+
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (EitherMayExit) {
+ // Both conditions must be same for the loop to continue executing.
+ // Choose the less conservative count.
+ // If ExitCond is a short-circuit form (select), using
+ // umin(EL0.ExactNotTaken, EL1.ExactNotTaken) is unsafe in general.
+ // To see the detailed examples, please see
+ // test/Analysis/ScalarEvolution/exit-count-select.ll
+ bool PoisonSafe = isa<BinaryOperator>(ExitCond);
+ if (!PoisonSafe)
+ // Even if ExitCond is select, we can safely derive BECount using both
+ // EL0 and EL1 in these cases:
+ // (1) EL0.ExactNotTaken is non-zero
+ // (2) EL1.ExactNotTaken is non-poison
+ // (3) EL0.ExactNotTaken is zero (BECount should be simply zero and
+ // it cannot be umin(0, ..))
+ // The PoisonSafe assignment below is simplified and the assertion after
+ // BECount calculation fully guarantees the condition (3).
+ PoisonSafe = isa<SCEVConstant>(EL0.ExactNotTaken) ||
+ isa<SCEVConstant>(EL1.ExactNotTaken);
+ if (EL0.ExactNotTaken != getCouldNotCompute() &&
+ EL1.ExactNotTaken != getCouldNotCompute() && PoisonSafe) {
+ BECount =
+ getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
+
+ // If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form,
+ // it should have been simplified to zero (see the condition (3) above)
+ assert(!isa<BinaryOperator>(ExitCond) || !EL0.ExactNotTaken->isZero() ||
+ BECount->isZero());
+ }
+ if (EL0.MaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL1.MaxNotTaken;
+ else if (EL1.MaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL0.MaxNotTaken;
+ else
+ MaxBECount = getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
+ } else {
+ // Both conditions must be same at the same time for the loop to exit.
+ // For now, be conservative.
+ if (EL0.ExactNotTaken == EL1.ExactNotTaken)
+ BECount = EL0.ExactNotTaken;
+ }
+
+ // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
+ // to be more aggressive when computing BECount than when computing
+ // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
+ // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
+ // to not.
+ if (isa<SCEVCouldNotCompute>(MaxBECount) &&
+ !isa<SCEVCouldNotCompute>(BECount))
+ MaxBECount = getConstant(getUnsignedRangeMax(BECount));
+
+ return ExitLimit(BECount, MaxBECount, false,
+ { &EL0.Predicates, &EL1.Predicates });
+}
+
ScalarEvolution::ExitLimit
ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
@@ -7911,100 +8357,110 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
/// Returns NULL if the SCEV isn't representable as a Constant.
static Constant *BuildConstantFromSCEV(const SCEV *V) {
- switch (static_cast<SCEVTypes>(V->getSCEVType())) {
- case scCouldNotCompute:
- case scAddRecExpr:
- break;
- case scConstant:
- return cast<SCEVConstant>(V)->getValue();
- case scUnknown:
- return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
- case scSignExtend: {
- const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
- if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
- return ConstantExpr::getSExt(CastOp, SS->getType());
- break;
- }
- case scZeroExtend: {
- const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
- if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
- return ConstantExpr::getZExt(CastOp, SZ->getType());
- break;
- }
- case scTruncate: {
- const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
- if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
- return ConstantExpr::getTrunc(CastOp, ST->getType());
- break;
- }
- case scAddExpr: {
- const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
- if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
- if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
- unsigned AS = PTy->getAddressSpace();
+ switch (V->getSCEVType()) {
+ case scCouldNotCompute:
+ case scAddRecExpr:
+ return nullptr;
+ case scConstant:
+ return cast<SCEVConstant>(V)->getValue();
+ case scUnknown:
+ return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
+ case scSignExtend: {
+ const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
+ return ConstantExpr::getSExt(CastOp, SS->getType());
+ return nullptr;
+ }
+ case scZeroExtend: {
+ const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
+ return ConstantExpr::getZExt(CastOp, SZ->getType());
+ return nullptr;
+ }
+ case scPtrToInt: {
+ const SCEVPtrToIntExpr *P2I = cast<SCEVPtrToIntExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(P2I->getOperand()))
+ return ConstantExpr::getPtrToInt(CastOp, P2I->getType());
+
+ return nullptr;
+ }
+ case scTruncate: {
+ const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
+ return ConstantExpr::getTrunc(CastOp, ST->getType());
+ return nullptr;
+ }
+ case scAddExpr: {
+ const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
+ if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ unsigned AS = PTy->getAddressSpace();
+ Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+ C = ConstantExpr::getBitCast(C, DestPtrTy);
+ }
+ for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
+ Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
+ if (!C2)
+ return nullptr;
+
+ // First pointer!
+ if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+ unsigned AS = C2->getType()->getPointerAddressSpace();
+ std::swap(C, C2);
Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+ // The offsets have been converted to bytes. We can add bytes to an
+ // i8* by GEP with the byte count in the first index.
C = ConstantExpr::getBitCast(C, DestPtrTy);
}
- for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
- Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
- if (!C2) return nullptr;
-
- // First pointer!
- if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
- unsigned AS = C2->getType()->getPointerAddressSpace();
- std::swap(C, C2);
- Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
- // The offsets have been converted to bytes. We can add bytes to an
- // i8* by GEP with the byte count in the first index.
- C = ConstantExpr::getBitCast(C, DestPtrTy);
- }
- // Don't bother trying to sum two pointers. We probably can't
- // statically compute a load that results from it anyway.
- if (C2->getType()->isPointerTy())
- return nullptr;
+ // Don't bother trying to sum two pointers. We probably can't
+ // statically compute a load that results from it anyway.
+ if (C2->getType()->isPointerTy())
+ return nullptr;
- if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
- if (PTy->getElementType()->isStructTy())
- C2 = ConstantExpr::getIntegerCast(
- C2, Type::getInt32Ty(C->getContext()), true);
- C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
- } else
- C = ConstantExpr::getAdd(C, C2);
- }
- return C;
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ if (PTy->getElementType()->isStructTy())
+ C2 = ConstantExpr::getIntegerCast(
+ C2, Type::getInt32Ty(C->getContext()), true);
+ C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
+ } else
+ C = ConstantExpr::getAdd(C, C2);
}
- break;
+ return C;
}
- case scMulExpr: {
- const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
- if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
- // Don't bother with pointers at all.
- if (C->getType()->isPointerTy()) return nullptr;
- for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
- Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
- if (!C2 || C2->getType()->isPointerTy()) return nullptr;
- C = ConstantExpr::getMul(C, C2);
- }
- return C;
+ return nullptr;
+ }
+ case scMulExpr: {
+ const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
+ if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
+ // Don't bother with pointers at all.
+ if (C->getType()->isPointerTy())
+ return nullptr;
+ for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
+ Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
+ if (!C2 || C2->getType()->isPointerTy())
+ return nullptr;
+ C = ConstantExpr::getMul(C, C2);
}
- break;
- }
- case scUDivExpr: {
- const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
- if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
- if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
- if (LHS->getType() == RHS->getType())
- return ConstantExpr::getUDiv(LHS, RHS);
- break;
+ return C;
}
- case scSMaxExpr:
- case scUMaxExpr:
- case scSMinExpr:
- case scUMinExpr:
- break; // TODO: smax, umax, smin, umax.
+ return nullptr;
}
- return nullptr;
+ case scUDivExpr: {
+ const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
+ if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
+ if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
+ if (LHS->getType() == RHS->getType())
+ return ConstantExpr::getUDiv(LHS, RHS);
+ return nullptr;
+ }
+ case scSMaxExpr:
+ case scUMaxExpr:
+ case scSMinExpr:
+ case scUMinExpr:
+ return nullptr; // TODO: smax, umax, smin, umax.
+ }
+ llvm_unreachable("Unknown SCEV kind!");
}
const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
@@ -8015,22 +8471,22 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
if (PHINode *PN = dyn_cast<PHINode>(I)) {
- const Loop *LI = this->LI[I->getParent()];
+ const Loop *CurrLoop = this->LI[I->getParent()];
// Looking for loop exit value.
- if (LI && LI->getParentLoop() == L &&
- PN->getParent() == LI->getHeader()) {
+ if (CurrLoop && CurrLoop->getParentLoop() == L &&
+ PN->getParent() == CurrLoop->getHeader()) {
// Okay, there is no closed form solution for the PHI node. Check
// to see if the loop that contains it has a known backedge-taken
// count. If so, we may be able to force computation of the exit
// value.
- const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+ const SCEV *BackedgeTakenCount = getBackedgeTakenCount(CurrLoop);
// This trivial case can show up in some degenerate cases where
// the incoming IR has not yet been fully simplified.
if (BackedgeTakenCount->isZero()) {
Value *InitValue = nullptr;
bool MultipleInitValues = false;
for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
- if (!LI->contains(PN->getIncomingBlock(i))) {
+ if (!CurrLoop->contains(PN->getIncomingBlock(i))) {
if (!InitValue)
InitValue = PN->getIncomingValue(i);
else if (InitValue != PN->getIncomingValue(i)) {
@@ -8048,17 +8504,18 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
isKnownPositive(BackedgeTakenCount) &&
PN->getNumIncomingValues() == 2) {
- unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1;
+ unsigned InLoopPred =
+ CurrLoop->contains(PN->getIncomingBlock(0)) ? 0 : 1;
Value *BackedgeVal = PN->getIncomingValue(InLoopPred);
- if (LI->isLoopInvariant(BackedgeVal))
+ if (CurrLoop->isLoopInvariant(BackedgeVal))
return getSCEV(BackedgeVal);
}
if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
// Okay, we know how many times the containing loop executes. If
// this is a constant evolving PHI node, get the final value at
// the specified iteration number.
- Constant *RV =
- getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
+ Constant *RV = getConstantEvolutionLoopExitValue(
+ PN, BTCC->getAPInt(), CurrLoop);
if (RV) return getSCEV(RV);
}
}
@@ -8114,9 +8571,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
Operands[1], DL, &TLI);
- else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!LI->isVolatile())
- C = ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
+ else if (const LoadInst *Load = dyn_cast<LoadInst>(I)) {
+ if (!Load->isVolatile())
+ C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(),
+ DL);
} else
C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
if (!C) return V;
@@ -8233,6 +8691,13 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
return getTruncateExpr(Op, Cast->getType());
}
+ if (const SCEVPtrToIntExpr *Cast = dyn_cast<SCEVPtrToIntExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getPtrToIntExpr(Op, Cast->getType());
+ }
+
llvm_unreachable("Unknown SCEV type!");
}
@@ -8647,7 +9112,10 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// 1*N = -Start; -1*N = Start (mod 2^BW), so:
// N = Distance (as unsigned)
if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) {
- APInt MaxBECount = getUnsignedRangeMax(Distance);
+ APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L));
+ APInt MaxBECountBase = getUnsignedRangeMax(Distance);
+ if (MaxBECountBase.ult(MaxBECount))
+ MaxBECount = MaxBECountBase;
// When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
// we end up with a loop whose backedge-taken count is n - 1. Detect this
@@ -8712,18 +9180,19 @@ ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) {
return getCouldNotCompute();
}
-std::pair<BasicBlock *, BasicBlock *>
-ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
+std::pair<const BasicBlock *, const BasicBlock *>
+ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB)
+ const {
// If the block has a unique predecessor, then there is no path from the
// predecessor to the block that does not go through the direct edge
// from the predecessor to the block.
- if (BasicBlock *Pred = BB->getSinglePredecessor())
+ if (const BasicBlock *Pred = BB->getSinglePredecessor())
return {Pred, BB};
// A loop's header is defined to be a block that dominates the loop.
// If the header has a unique predecessor outside the loop, it must be
// a block that has exactly one successor that can reach the loop.
- if (Loop *L = LI.getLoopFor(BB))
+ if (const Loop *L = LI.getLoopFor(BB))
return {L->getLoopPredecessor(), L->getHeader()};
return {nullptr, nullptr};
@@ -9052,6 +9521,14 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS);
}
+bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const Instruction *Context) {
+ // TODO: Analyze guards and assumes from Context's block.
+ return isKnownPredicate(Pred, LHS, RHS) ||
+ isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS);
+}
+
bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred,
const SCEVAddRecExpr *LHS,
const SCEV *RHS) {
@@ -9060,31 +9537,30 @@ bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred,
isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS);
}
-bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
- ICmpInst::Predicate Pred,
- bool &Increasing) {
- bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing);
+Optional<ScalarEvolution::MonotonicPredicateType>
+ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred) {
+ auto Result = getMonotonicPredicateTypeImpl(LHS, Pred);
#ifndef NDEBUG
// Verify an invariant: inverting the predicate should turn a monotonically
// increasing change to a monotonically decreasing one, and vice versa.
- bool IncreasingSwapped;
- bool ResultSwapped = isMonotonicPredicateImpl(
- LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped);
+ if (Result) {
+ auto ResultSwapped =
+ getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred));
- assert(Result == ResultSwapped && "should be able to analyze both!");
- if (ResultSwapped)
- assert(Increasing == !IncreasingSwapped &&
+ assert(ResultSwapped.hasValue() && "should be able to analyze both!");
+ assert(ResultSwapped.getValue() != Result.getValue() &&
"monotonicity should flip as we flip the predicate");
+ }
#endif
return Result;
}
-bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
- ICmpInst::Predicate Pred,
- bool &Increasing) {
-
+Optional<ScalarEvolution::MonotonicPredicateType>
+ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred) {
// A zero step value for LHS means the induction variable is essentially a
// loop invariant value. We don't really depend on the predicate actually
// flipping from false to true (for increasing predicates, and the other way
@@ -9095,56 +9571,46 @@ bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
// where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
// as general as possible.
- switch (Pred) {
- default:
- return false; // Conservative answer
-
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE:
- if (!LHS->hasNoUnsignedWrap())
- return false;
+ // Only handle LE/LT/GE/GT predicates.
+ if (!ICmpInst::isRelational(Pred))
+ return None;
- Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
- return true;
+ bool IsGreater = ICmpInst::isGE(Pred) || ICmpInst::isGT(Pred);
+ assert((IsGreater || ICmpInst::isLE(Pred) || ICmpInst::isLT(Pred)) &&
+ "Should be greater or less!");
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE:
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: {
+ // Check that AR does not wrap.
+ if (ICmpInst::isUnsigned(Pred)) {
+ if (!LHS->hasNoUnsignedWrap())
+ return None;
+ return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
+ } else {
+ assert(ICmpInst::isSigned(Pred) &&
+ "Relational predicate is either signed or unsigned!");
if (!LHS->hasNoSignedWrap())
- return false;
+ return None;
const SCEV *Step = LHS->getStepRecurrence(*this);
- if (isKnownNonNegative(Step)) {
- Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
- return true;
- }
+ if (isKnownNonNegative(Step))
+ return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
- if (isKnownNonPositive(Step)) {
- Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
- return true;
- }
-
- return false;
- }
+ if (isKnownNonPositive(Step))
+ return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
+ return None;
}
-
- llvm_unreachable("switch has default clause!");
}
-bool ScalarEvolution::isLoopInvariantPredicate(
- ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
- ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
- const SCEV *&InvariantRHS) {
+Optional<ScalarEvolution::LoopInvariantPredicate>
+ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const Loop *L) {
// If there is a loop-invariant, force it into the RHS, otherwise bail out.
if (!isLoopInvariant(RHS, L)) {
if (!isLoopInvariant(LHS, L))
- return false;
+ return None;
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
@@ -9152,12 +9618,11 @@ bool ScalarEvolution::isLoopInvariantPredicate(
const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
if (!ArLHS || ArLHS->getLoop() != L)
- return false;
-
- bool Increasing;
- if (!isMonotonicPredicate(ArLHS, Pred, Increasing))
- return false;
+ return None;
+ auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred);
+ if (!MonotonicType)
+ return None;
// If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
// true as the loop iterates, and the backedge is control dependent on
// "ArLHS `Pred` RHS" == true then we can reason as follows:
@@ -9175,16 +9640,77 @@ bool ScalarEvolution::isLoopInvariantPredicate(
//
// A similar reasoning applies for a monotonically decreasing predicate, by
// replacing true with false and false with true in the above two bullets.
-
+ bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing;
auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
- return false;
+ return None;
- InvariantPred = Pred;
- InvariantLHS = ArLHS->getStart();
- InvariantRHS = RHS;
- return true;
+ return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), RHS);
+}
+
+Optional<ScalarEvolution::LoopInvariantPredicate>
+ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
+ ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
+ const Instruction *Context, const SCEV *MaxIter) {
+ // Try to prove the following set of facts:
+ // - The predicate is monotonic in the iteration space.
+ // - If the check does not fail on the 1st iteration:
+ // - No overflow will happen during first MaxIter iterations;
+ // - It will not fail on the MaxIter'th iteration.
+ // If the check does fail on the 1st iteration, we leave the loop and no
+ // other checks matter.
+
+ // If there is a loop-invariant, force it into the RHS, otherwise bail out.
+ if (!isLoopInvariant(RHS, L)) {
+ if (!isLoopInvariant(LHS, L))
+ return None;
+
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ auto *AR = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!AR || AR->getLoop() != L)
+ return None;
+
+ // The predicate must be relational (i.e. <, <=, >=, >).
+ if (!ICmpInst::isRelational(Pred))
+ return None;
+
+ // TODO: Support steps other than +/- 1.
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ auto *One = getOne(Step->getType());
+ auto *MinusOne = getNegativeSCEV(One);
+ if (Step != One && Step != MinusOne)
+ return None;
+
+ // Type mismatch here means that MaxIter is potentially larger than max
+ // unsigned value in start type, which mean we cannot prove no wrap for the
+ // indvar.
+ if (AR->getType() != MaxIter->getType())
+ return None;
+
+ // Value of IV on suggested last iteration.
+ const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this);
+ // Does it still meet the requirement?
+ if (!isLoopBackedgeGuardedByCond(L, Pred, Last, RHS))
+ return None;
+ // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does
+ // not exceed max unsigned value of this type), this effectively proves
+ // that there is no wrap during the iteration. To prove that there is no
+ // signed/unsigned wrap, we need to check that
+ // Start <= Last for step = 1 or Start >= Last for step = -1.
+ ICmpInst::Predicate NoOverflowPred =
+ CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ if (Step == MinusOne)
+ NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred);
+ const SCEV *Start = AR->getStart();
+ if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context))
+ return None;
+
+ // Everything is fine.
+ return ScalarEvolution::LoopInvariantPredicate(Pred, Start, RHS);
}
bool ScalarEvolution::isKnownPredicateViaConstantRanges(
@@ -9269,6 +9795,24 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative())
return true;
break;
+
+ case ICmpInst::ICMP_UGE:
+ std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_ULE:
+ // X u<= (X + C)<nuw> for any C
+ if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW))
+ return true;
+ break;
+
+ case ICmpInst::ICMP_UGT:
+ std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_ULT:
+ // X u< (X + C)<nuw> if C != 0
+ if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW) && !C.isNullValue())
+ return true;
+ break;
}
return false;
@@ -9296,14 +9840,14 @@ bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS);
}
-bool ScalarEvolution::isImpliedViaGuard(BasicBlock *BB,
+bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// No need to even try if we know the module has no guards.
if (!HasGuards)
return false;
- return any_of(*BB, [&](Instruction &I) {
+ return any_of(*BB, [&](const Instruction &I) {
using namespace llvm::PatternMatch;
Value *Condition;
@@ -9426,24 +9970,14 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
return false;
}
-bool
-ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
- ICmpInst::Predicate Pred,
- const SCEV *LHS, const SCEV *RHS) {
- // Interpret a null as meaning no loop, where there is obviously no guard
- // (interprocedural conditions notwithstanding).
- if (!L) return false;
-
+bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS) {
if (VerifyIR)
- assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
+ assert(!verifyFunction(*BB->getParent(), &dbgs()) &&
"This cannot be done on broken IR!");
- // Both LHS and RHS must be available at loop entry.
- assert(isAvailableAtLoopEntry(LHS, L) &&
- "LHS is not available at Loop Entry");
- assert(isAvailableAtLoopEntry(RHS, L) &&
- "RHS is not available at Loop Entry");
-
if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
return true;
@@ -9467,7 +10001,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
}
// Try to prove (Pred, LHS, RHS) using isImpliedViaGuard.
- auto ProveViaGuard = [&](BasicBlock *Block) {
+ auto ProveViaGuard = [&](const BasicBlock *Block) {
if (isImpliedViaGuard(Block, Pred, LHS, RHS))
return true;
if (ProvingStrictComparison) {
@@ -9484,35 +10018,39 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
};
// Try to prove (Pred, LHS, RHS) using isImpliedCond.
- auto ProveViaCond = [&](Value *Condition, bool Inverse) {
- if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse))
+ auto ProveViaCond = [&](const Value *Condition, bool Inverse) {
+ const Instruction *Context = &BB->front();
+ if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context))
return true;
if (ProvingStrictComparison) {
if (!ProvedNonStrictComparison)
- ProvedNonStrictComparison =
- isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse);
+ ProvedNonStrictComparison = isImpliedCond(NonStrictPredicate, LHS, RHS,
+ Condition, Inverse, Context);
if (!ProvedNonEquality)
- ProvedNonEquality =
- isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse);
+ ProvedNonEquality = isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS,
+ Condition, Inverse, Context);
if (ProvedNonStrictComparison && ProvedNonEquality)
return true;
}
return false;
};
- // Starting at the loop predecessor, climb up the predecessor chain, as long
+ // Starting at the block's predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
- // leading to the original header.
- for (std::pair<BasicBlock *, BasicBlock *>
- Pair(L->getLoopPredecessor(), L->getHeader());
- Pair.first;
- Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
-
+ // leading to the original block.
+ const Loop *ContainingLoop = LI.getLoopFor(BB);
+ const BasicBlock *PredBB;
+ if (ContainingLoop && ContainingLoop->getHeader() == BB)
+ PredBB = ContainingLoop->getLoopPredecessor();
+ else
+ PredBB = BB->getSinglePredecessor();
+ for (std::pair<const BasicBlock *, const BasicBlock *> Pair(PredBB, BB);
+ Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
if (ProveViaGuard(Pair.first))
return true;
- BranchInst *LoopEntryPredicate =
- dyn_cast<BranchInst>(Pair.first->getTerminator());
+ const BranchInst *LoopEntryPredicate =
+ dyn_cast<BranchInst>(Pair.first->getTerminator());
if (!LoopEntryPredicate ||
LoopEntryPredicate->isUnconditional())
continue;
@@ -9527,7 +10065,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
if (!AssumeVH)
continue;
auto *CI = cast<CallInst>(AssumeVH);
- if (!DT.dominates(CI, L->getHeader()))
+ if (!DT.dominates(CI, BB))
continue;
if (ProveViaCond(CI->getArgOperand(0), false))
@@ -9537,10 +10075,27 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
return false;
}
-bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
- const SCEV *LHS, const SCEV *RHS,
- Value *FoundCondValue,
- bool Inverse) {
+bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS) {
+ // Interpret a null as meaning no loop, where there is obviously no guard
+ // (interprocedural conditions notwithstanding).
+ if (!L)
+ return false;
+
+ // Both LHS and RHS must be available at loop entry.
+ assert(isAvailableAtLoopEntry(LHS, L) &&
+ "LHS is not available at Loop Entry");
+ assert(isAvailableAtLoopEntry(RHS, L) &&
+ "RHS is not available at Loop Entry");
+ return isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS);
+}
+
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS,
+ const Value *FoundCondValue, bool Inverse,
+ const Instruction *Context) {
if (!PendingLoopPredicates.insert(FoundCondValue).second)
return false;
@@ -9548,19 +10103,23 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });
// Recursively handle And and Or conditions.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
+ if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
if (BO->getOpcode() == Instruction::And) {
if (!Inverse)
- return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
- isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse,
+ Context) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse,
+ Context);
} else if (BO->getOpcode() == Instruction::Or) {
if (Inverse)
- return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
- isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse,
+ Context) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse,
+ Context);
}
}
- ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
+ const ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
if (!ICI) return false;
// Now that we found a conditional branch that dominates the loop or controls
@@ -9574,17 +10133,36 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
- return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS);
+ return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context);
}
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS,
ICmpInst::Predicate FoundPred,
- const SCEV *FoundLHS,
- const SCEV *FoundRHS) {
+ const SCEV *FoundLHS, const SCEV *FoundRHS,
+ const Instruction *Context) {
// Balance the types.
if (getTypeSizeInBits(LHS->getType()) <
getTypeSizeInBits(FoundLHS->getType())) {
+ // For unsigned and equality predicates, try to prove that both found
+ // operands fit into narrow unsigned range. If so, try to prove facts in
+ // narrow types.
+ if (!CmpInst::isSigned(FoundPred)) {
+ auto *NarrowType = LHS->getType();
+ auto *WideType = FoundLHS->getType();
+ auto BitWidth = getTypeSizeInBits(NarrowType);
+ const SCEV *MaxValue = getZeroExtendExpr(
+ getConstant(APInt::getMaxValue(BitWidth)), WideType);
+ if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) &&
+ isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) {
+ const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType);
+ const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType);
+ if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS,
+ TruncFoundRHS, Context))
+ return true;
+ }
+ }
+
if (CmpInst::isSigned(Pred)) {
LHS = getSignExtendExpr(LHS, FoundLHS->getType());
RHS = getSignExtendExpr(RHS, FoundLHS->getType());
@@ -9602,7 +10180,17 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
}
}
+ return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS,
+ FoundRHS, Context);
+}
+bool ScalarEvolution::isImpliedCondBalancedTypes(
+ ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+ ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS,
+ const Instruction *Context) {
+ assert(getTypeSizeInBits(LHS->getType()) ==
+ getTypeSizeInBits(FoundLHS->getType()) &&
+ "Types should be balanced!");
// Canonicalize the query to match the way instcombine will have
// canonicalized the comparison.
if (SimplifyICmpOperands(Pred, LHS, RHS))
@@ -9625,16 +10213,16 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
// Check whether the found predicate is the same as the desired predicate.
if (FoundPred == Pred)
- return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);
// Check whether swapping the found predicate makes it the same as the
// desired predicate.
if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
if (isa<SCEVConstant>(RHS))
- return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context);
else
- return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
- RHS, LHS, FoundLHS, FoundRHS);
+ return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), RHS,
+ LHS, FoundLHS, FoundRHS, Context);
}
// Unsigned comparison is the same as signed comparison when both the operands
@@ -9642,7 +10230,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
if (CmpInst::isUnsigned(FoundPred) &&
CmpInst::getSignedPredicate(FoundPred) == Pred &&
isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
- return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);
// Check if we can make progress by sharpening ranges.
if (FoundPred == ICmpInst::ICMP_NE &&
@@ -9679,8 +10267,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
case ICmpInst::ICMP_UGE:
// We know V `Pred` SharperMin. If this implies LHS `Pred`
// RHS, we're done.
- if (isImpliedCondOperands(Pred, LHS, RHS, V,
- getConstant(SharperMin)))
+ if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin),
+ Context))
return true;
LLVM_FALLTHROUGH;
@@ -9695,10 +10283,26 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
//
// If V `Pred` Min implies LHS `Pred` RHS, we're done.
- if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
+ if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min),
+ Context))
+ return true;
+ break;
+
+ // `LHS < RHS` and `LHS <= RHS` are handled in the same way as `RHS > LHS` and `RHS >= LHS` respectively.
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE:
+ if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
+ LHS, V, getConstant(SharperMin), Context))
return true;
LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT:
+ if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
+ LHS, V, getConstant(Min), Context))
+ return true;
+ break;
+
default:
// No change
break;
@@ -9709,11 +10313,12 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
// Check whether the actual condition is beyond sufficient.
if (FoundPred == ICmpInst::ICMP_EQ)
if (ICmpInst::isTrueWhenEqual(Pred))
- if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context))
return true;
if (Pred == ICmpInst::ICMP_NE)
if (!ICmpInst::isTrueWhenEqual(FoundPred))
- if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
+ if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS,
+ Context))
return true;
// Otherwise assume the worst.
@@ -9792,6 +10397,51 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
return None;
}
+bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart(
+ ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) {
+ // Try to recognize the following pattern:
+ //
+ // FoundRHS = ...
+ // ...
+ // loop:
+ // FoundLHS = {Start,+,W}
+ // context_bb: // Basic block from the same loop
+ // known(Pred, FoundLHS, FoundRHS)
+ //
+ // If some predicate is known in the context of a loop, it is also known on
+ // each iteration of this loop, including the first iteration. Therefore, in
+ // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to
+ // prove the original pred using this fact.
+ if (!Context)
+ return false;
+ const BasicBlock *ContextBB = Context->getParent();
+ // Make sure AR varies in the context block.
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) {
+ const Loop *L = AR->getLoop();
+ // Make sure that context belongs to the loop and executes on 1st iteration
+ // (if it ever executes at all).
+ if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch()))
+ return false;
+ if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop()))
+ return false;
+ return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS);
+ }
+
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundRHS)) {
+ const Loop *L = AR->getLoop();
+ // Make sure that context belongs to the loop and executes on 1st iteration
+ // (if it ever executes at all).
+ if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch()))
+ return false;
+ if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop()))
+ return false;
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart());
+ }
+
+ return false;
+}
+
bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS, const SCEV *FoundRHS) {
@@ -9972,6 +10622,10 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
if (!dominates(RHS, IncBB))
return false;
const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
+ // Make sure L does not refer to a value from a potentially previous
+ // iteration of a loop.
+ if (!properlyDominates(L, IncBB))
+ return false;
if (!ProvedEasily(L, RHS))
return false;
}
@@ -9982,13 +10636,18 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
- const SCEV *FoundRHS) {
+ const SCEV *FoundRHS,
+ const Instruction *Context) {
if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
+ if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS,
+ Context))
+ return true;
+
return isImpliedCondOperandsHelper(Pred, LHS, RHS,
FoundLHS, FoundRHS) ||
// ~x < ~y --> x > y
@@ -10005,7 +10664,7 @@ static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
if (!MinMaxExpr)
return false;
- return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end();
+ return is_contained(MinMaxExpr->operands(), Candidate);
}
static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
@@ -10087,13 +10746,31 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
// We want to avoid hurting the compile time with analysis of too big trees.
if (Depth > MaxSCEVOperationsImplicationDepth)
return false;
- // We only want to work with ICMP_SGT comparison so far.
- // TODO: Extend to ICMP_UGT?
- if (Pred == ICmpInst::ICMP_SLT) {
- Pred = ICmpInst::ICMP_SGT;
+
+ // We only want to work with GT comparison so far.
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) {
+ Pred = CmpInst::getSwappedPredicate(Pred);
std::swap(LHS, RHS);
std::swap(FoundLHS, FoundRHS);
}
+
+ // For unsigned, try to reduce it to corresponding signed comparison.
+ if (Pred == ICmpInst::ICMP_UGT)
+ // We can replace unsigned predicate with its signed counterpart if all
+ // involved values are non-negative.
+ // TODO: We could have better support for unsigned.
+ if (isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) {
+ // Knowing that both FoundLHS and FoundRHS are non-negative, and knowing
+ // FoundLHS >u FoundRHS, we also know that FoundLHS >s FoundRHS. Let us
+ // use this fact to prove that LHS and RHS are non-negative.
+ const SCEV *MinusOne = getMinusOne(LHS->getType());
+ if (isImpliedCondOperands(ICmpInst::ICMP_SGT, LHS, MinusOne, FoundLHS,
+ FoundRHS) &&
+ isImpliedCondOperands(ICmpInst::ICMP_SGT, RHS, MinusOne, FoundLHS,
+ FoundRHS))
+ Pred = ICmpInst::ICMP_SGT;
+ }
+
if (Pred != ICmpInst::ICMP_SGT)
return false;
@@ -10133,7 +10810,7 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
auto *LL = LHSAddExpr->getOperand(0);
auto *LR = LHSAddExpr->getOperand(1);
- auto *MinusOne = getNegativeSCEV(getOne(RHS->getType()));
+ auto *MinusOne = getMinusOne(RHS->getType());
// Checks that S1 >= 0 && S2 > RHS, trivially or using the found context.
auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) {
@@ -10206,7 +10883,7 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
// 1. If FoundLHS is negative, then the result is 0.
// 2. If FoundLHS is non-negative, then the result is non-negative.
// Anyways, the result is non-negative.
- auto *MinusOne = getNegativeSCEV(getOne(WTy));
+ auto *MinusOne = getMinusOne(WTy);
auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt);
if (isKnownNegative(RHS) &&
IsSGTViaContext(FoundRHSExt, NegDenomMinusOne))
@@ -10561,7 +11238,13 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
BECount = BECountIfBackedgeTaken;
else {
- End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
+ // If we know that RHS >= Start in the context of loop, then we know that
+ // max(RHS, Start) = RHS at this point.
+ if (isLoopEntryGuardedByCond(
+ L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, RHS, Start))
+ End = RHS;
+ else
+ End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
}
@@ -10628,8 +11311,15 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
- End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start);
+ if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
+ // If we know that Start >= RHS in the context of loop, then we know that
+ // min(RHS, Start) = RHS at this point.
+ if (isLoopEntryGuardedByCond(
+ L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, Start, RHS))
+ End = RHS;
+ else
+ End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start);
+ }
const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
@@ -10669,7 +11359,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
// If the start is a non-zero constant, shift the range to simplify things.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
if (!SC->getValue()->isZero()) {
- SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
+ SmallVector<const SCEV *, 4> Operands(operands());
Operands[0] = SE.getZero(SC->getType());
const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
getNoWrapFlags(FlagNW));
@@ -10952,9 +11642,7 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
}
// Remove all SCEVConstants.
- Terms.erase(
- remove_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }),
- Terms.end());
+ erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); });
if (Terms.size() > 0)
if (!findArrayDimensionsRec(SE, Terms, Sizes))
@@ -11282,7 +11970,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
// so that future queries will recompute the expressions using the new
// value.
Value *Old = getValPtr();
- SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
+ SmallVector<User *, 16> Worklist(Old->users());
SmallPtrSet<User *, 8> Visited;
while (!Worklist.empty()) {
User *U = Worklist.pop_back_val();
@@ -11295,7 +11983,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
if (PHINode *PN = dyn_cast<PHINode>(U))
SE->ConstantEvolutionLoopExitValue.erase(PN);
SE->eraseValueFromMap(U);
- Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
+ llvm::append_range(Worklist, U->users());
}
// Delete the Old value.
if (PHINode *PN = dyn_cast<PHINode>(Old))
@@ -11577,9 +12265,10 @@ ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
ScalarEvolution::LoopDisposition
ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
- switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+ switch (S->getSCEVType()) {
case scConstant:
return LoopInvariant;
+ case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend:
@@ -11684,9 +12373,10 @@ ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
ScalarEvolution::BlockDisposition
ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
- switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+ switch (S->getSCEVType()) {
case scConstant:
return ProperlyDominatesBlock;
+ case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend:
@@ -11858,7 +12548,7 @@ void ScalarEvolution::verify() const {
while (!LoopStack.empty()) {
auto *L = LoopStack.pop_back_val();
- LoopStack.insert(LoopStack.end(), L->begin(), L->end());
+ llvm::append_range(LoopStack, *L);
auto *CurBECount = SCM.visit(
const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L));
@@ -11902,6 +12592,25 @@ void ScalarEvolution::verify() const {
std::abort();
}
}
+
+ // Collect all valid loops currently in LoopInfo.
+ SmallPtrSet<Loop *, 32> ValidLoops;
+ SmallVector<Loop *, 32> Worklist(LI.begin(), LI.end());
+ while (!Worklist.empty()) {
+ Loop *L = Worklist.pop_back_val();
+ if (ValidLoops.contains(L))
+ continue;
+ ValidLoops.insert(L);
+ Worklist.append(L->begin(), L->end());
+ }
+ // Check for SCEV expressions referencing invalid/deleted loops.
+ for (auto &KV : ValueExprMap) {
+ auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second);
+ if (!AR)
+ continue;
+ assert(ValidLoops.contains(AR->getLoop()) &&
+ "AddRec references invalid loop");
+ }
}
bool ScalarEvolution::invalidate(
@@ -11934,6 +12643,11 @@ ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses
ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
+ // For compatibility with opt's -analyze feature under legacy pass manager
+ // which was not ported to NPM. This keeps tests using
+ // update_analyze_test_checks.py working.
+ OS << "Printing analysis 'Scalar Evolution Analysis' for function '"
+ << F.getName() << "':\n";
AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
return PreservedAnalyses::all();
}
@@ -12429,11 +13143,24 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const {
}
// Match the mathematical pattern A - (A / B) * B, where A and B can be
-// arbitrary expressions.
+// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used
+// for URem with constant power-of-2 second operands.
// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is
// 4, A / B becomes X / 8).
bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
const SCEV *&RHS) {
+ // Try to match 'zext (trunc A to iB) to iY', which is used
+ // for URem with constant power-of-2 second operands. Make sure the size of
+ // the operand A matches the size of the whole expressions.
+ if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr))
+ if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) {
+ LHS = Trunc->getOperand();
+ if (LHS->getType() != Expr->getType())
+ LHS = getZeroExtendExpr(LHS, Expr->getType());
+ RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1)
+ << getTypeSizeInBits(Trunc->getType()));
+ return true;
+ }
const auto *Add = dyn_cast<SCEVAddExpr>(Expr);
if (Add == nullptr || Add->getNumOperands() != 2)
return false;
@@ -12467,3 +13194,146 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0)));
return false;
}
+
+const SCEV *
+ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) {
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Form an expression for the maximum exit count possible for this loop. We
+ // merge the max and exact information to approximate a version of
+ // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
+ SmallVector<const SCEV*, 4> ExitCounts;
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ const SCEV *ExitCount = getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ ExitCount = getExitCount(L, ExitingBB,
+ ScalarEvolution::ConstantMaximum);
+ if (!isa<SCEVCouldNotCompute>(ExitCount)) {
+ assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
+ "We should only have known counts for exiting blocks that "
+ "dominate latch!");
+ ExitCounts.push_back(ExitCount);
+ }
+ }
+ if (ExitCounts.empty())
+ return getCouldNotCompute();
+ return getUMinFromMismatchedTypes(ExitCounts);
+}
+
+/// This rewriter is similar to SCEVParameterRewriter (it replaces SCEVUnknown
+/// components following the Map (Value -> SCEV)), but skips AddRecExpr because
+/// we cannot guarantee that the replacement is loop invariant in the loop of
+/// the AddRec.
+class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
+ ValueToSCEVMapTy &Map;
+
+public:
+ SCEVLoopGuardRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M)
+ : SCEVRewriteVisitor(SE), Map(M) {}
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ auto I = Map.find(Expr->getValue());
+ if (I == Map.end())
+ return Expr;
+ return I->second;
+ }
+};
+
+const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
+ auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
+ const SCEV *RHS, ValueToSCEVMapTy &RewriteMap) {
+ if (!isa<SCEVUnknown>(LHS)) {
+ std::swap(LHS, RHS);
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ }
+
+ // For now, limit to conditions that provide information about unknown
+ // expressions.
+ auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS);
+ if (!LHSUnknown)
+ return;
+
+ // TODO: use information from more predicates.
+ switch (Predicate) {
+ case CmpInst::ICMP_ULT: {
+ if (!containsAddRecurrence(RHS)) {
+ const SCEV *Base = LHS;
+ auto I = RewriteMap.find(LHSUnknown->getValue());
+ if (I != RewriteMap.end())
+ Base = I->second;
+
+ RewriteMap[LHSUnknown->getValue()] =
+ getUMinExpr(Base, getMinusSCEV(RHS, getOne(RHS->getType())));
+ }
+ break;
+ }
+ case CmpInst::ICMP_ULE: {
+ if (!containsAddRecurrence(RHS)) {
+ const SCEV *Base = LHS;
+ auto I = RewriteMap.find(LHSUnknown->getValue());
+ if (I != RewriteMap.end())
+ Base = I->second;
+ RewriteMap[LHSUnknown->getValue()] = getUMinExpr(Base, RHS);
+ }
+ break;
+ }
+ case CmpInst::ICMP_EQ:
+ if (isa<SCEVConstant>(RHS))
+ RewriteMap[LHSUnknown->getValue()] = RHS;
+ break;
+ case CmpInst::ICMP_NE:
+ if (isa<SCEVConstant>(RHS) &&
+ cast<SCEVConstant>(RHS)->getValue()->isNullValue())
+ RewriteMap[LHSUnknown->getValue()] =
+ getUMaxExpr(LHS, getOne(RHS->getType()));
+ break;
+ default:
+ break;
+ }
+ };
+ // Starting at the loop predecessor, climb up the predecessor chain, as long
+ // as there are predecessors that can be found that have unique successors
+ // leading to the original header.
+ // TODO: share this logic with isLoopEntryGuardedByCond.
+ ValueToSCEVMapTy RewriteMap;
+ for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
+ L->getLoopPredecessor(), L->getHeader());
+ Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
+
+ const BranchInst *LoopEntryPredicate =
+ dyn_cast<BranchInst>(Pair.first->getTerminator());
+ if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional())
+ continue;
+
+ // TODO: use information from more complex conditions, e.g. AND expressions.
+ auto *Cmp = dyn_cast<ICmpInst>(LoopEntryPredicate->getCondition());
+ if (!Cmp)
+ continue;
+
+ auto Predicate = Cmp->getPredicate();
+ if (LoopEntryPredicate->getSuccessor(1) == Pair.second)
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ CollectCondition(Predicate, getSCEV(Cmp->getOperand(0)),
+ getSCEV(Cmp->getOperand(1)), RewriteMap);
+ }
+
+ // Also collect information from assumptions dominating the loop.
+ for (auto &AssumeVH : AC.assumptions()) {
+ if (!AssumeVH)
+ continue;
+ auto *AssumeI = cast<CallInst>(AssumeVH);
+ auto *Cmp = dyn_cast<ICmpInst>(AssumeI->getOperand(0));
+ if (!Cmp || !DT.dominates(AssumeI, L->getHeader()))
+ continue;
+ CollectCondition(Cmp->getPredicate(), getSCEV(Cmp->getOperand(0)),
+ getSCEV(Cmp->getOperand(1)), RewriteMap);
+ }
+
+ if (RewriteMap.empty())
+ return Expr;
+ SCEVLoopGuardRewriter Rewriter(*this, RewriteMap);
+ return Rewriter.visit(Expr);
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 79640256f695..8f289feb3dcf 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -82,10 +82,12 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
Value *BO = GetBaseValue(BS);
if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
if (alias(MemoryLocation(AO ? AO : LocA.Ptr,
- AO ? LocationSize::unknown() : LocA.Size,
+ AO ? LocationSize::beforeOrAfterPointer()
+ : LocA.Size,
AO ? AAMDNodes() : LocA.AATags),
MemoryLocation(BO ? BO : LocB.Ptr,
- BO ? LocationSize::unknown() : LocB.Size,
+ BO ? LocationSize::beforeOrAfterPointer()
+ : LocB.Size,
BO ? AAMDNodes() : LocB.AATags),
AAQI) == NoAlias)
return NoAlias;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
index 19bf5766f448..64e908bdf342 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
@@ -215,16 +215,14 @@ void SCEVDivision::visitMulExpr(const SCEVMulExpr *Numerator) {
return cannotDivide(Numerator);
// The Remainder is obtained by replacing Denominator by 0 in Numerator.
- ValueToValueMap RewriteMap;
- RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
- cast<SCEVConstant>(Zero)->getValue();
- Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+ ValueToSCEVMapTy RewriteMap;
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = Zero;
+ Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap);
if (Remainder->isZero()) {
// The Quotient is obtained by replacing Denominator by 1 in Numerator.
- RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
- cast<SCEVConstant>(One)->getValue();
- Quotient = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = One;
+ Quotient = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap);
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 8928678d6ab2..6b38d6716b92 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -34,6 +34,7 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -50,31 +51,6 @@ using namespace llvm;
static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias",
cl::init(true), cl::Hidden);
-namespace {
-
-/// This is a simple wrapper around an MDNode which provides a higher-level
-/// interface by hiding the details of how alias analysis information is encoded
-/// in its operands.
-class AliasScopeNode {
- const MDNode *Node = nullptr;
-
-public:
- AliasScopeNode() = default;
- explicit AliasScopeNode(const MDNode *N) : Node(N) {}
-
- /// Get the MDNode for this AliasScopeNode.
- const MDNode *getNode() const { return Node; }
-
- /// Get the MDNode for this AliasScopeNode's domain.
- const MDNode *getDomain() const {
- if (Node->getNumOperands() < 2)
- return nullptr;
- return dyn_cast_or_null<MDNode>(Node->getOperand(1));
- }
-};
-
-} // end anonymous namespace
-
AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB,
AAQueryInfo &AAQI) {
@@ -185,7 +161,7 @@ ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F,
char ScopedNoAliasAAWrapperPass::ID = 0;
-INITIALIZE_PASS(ScopedNoAliasAAWrapperPass, "scoped-noalias",
+INITIALIZE_PASS(ScopedNoAliasAAWrapperPass, "scoped-noalias-aa",
"Scoped NoAlias Alias Analysis", false, true)
ImmutablePass *llvm::createScopedNoAliasAAWrapperPass() {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/StackLifetime.cpp b/contrib/llvm-project/llvm/lib/Analysis/StackLifetime.cpp
index 9727b7a33d1f..ab5f2db7d1cd 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/StackLifetime.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/StackLifetime.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/BasicBlock.h"
@@ -63,13 +64,28 @@ bool StackLifetime::isAliveAfter(const AllocaInst *AI,
return getLiveRange(AI).test(InstNum);
}
-static bool readMarker(const Instruction *I, bool *IsStart) {
- if (!I->isLifetimeStartOrEnd())
- return false;
+// Returns unique alloca annotated by lifetime marker only if
+// markers has the same size and points to the alloca start.
+static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II,
+ const DataLayout &DL) {
+ const AllocaInst *AI = findAllocaForValue(II.getArgOperand(1), true);
+ if (!AI)
+ return nullptr;
- auto *II = cast<IntrinsicInst>(I);
- *IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start;
- return true;
+ auto AllocaSizeInBits = AI->getAllocationSizeInBits(DL);
+ if (!AllocaSizeInBits)
+ return nullptr;
+ int64_t AllocaSize = AllocaSizeInBits.getValue() / 8;
+
+ auto *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
+ if (!Size)
+ return nullptr;
+ int64_t LifetimeSize = Size->getSExtValue();
+
+ if (LifetimeSize != -1 && LifetimeSize != AllocaSize)
+ return nullptr;
+
+ return AI;
}
void StackLifetime::collectMarkers() {
@@ -77,28 +93,27 @@ void StackLifetime::collectMarkers() {
DenseMap<const BasicBlock *, SmallDenseMap<const IntrinsicInst *, Marker>>
BBMarkerSet;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
// Compute the set of start/end markers per basic block.
- for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
- const AllocaInst *AI = Allocas[AllocaNo];
- SmallVector<const Instruction *, 8> WorkList;
- WorkList.push_back(AI);
- while (!WorkList.empty()) {
- const Instruction *I = WorkList.pop_back_val();
- for (const User *U : I->users()) {
- if (auto *BI = dyn_cast<BitCastInst>(U)) {
- WorkList.push_back(BI);
- continue;
- }
- auto *UI = dyn_cast<IntrinsicInst>(U);
- if (!UI)
- continue;
- bool IsStart;
- if (!readMarker(UI, &IsStart))
- continue;
- if (IsStart)
- InterestingAllocas.set(AllocaNo);
- BBMarkerSet[UI->getParent()][UI] = {AllocaNo, IsStart};
+ for (const BasicBlock *BB : depth_first(&F)) {
+ for (const Instruction &I : *BB) {
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II || !II->isLifetimeStartOrEnd())
+ continue;
+ const AllocaInst *AI = findMatchingAlloca(*II, DL);
+ if (!AI) {
+ HasUnknownLifetimeStartOrEnd = true;
+ continue;
}
+ auto It = AllocaNumbering.find(AI);
+ if (It == AllocaNumbering.end())
+ continue;
+ auto AllocaNo = It->second;
+ bool IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start;
+ if (IsStart)
+ InterestingAllocas.set(AllocaNo);
+ BBMarkerSet[BB][II] = {AllocaNo, IsStart};
}
}
@@ -277,7 +292,7 @@ LLVM_DUMP_METHOD void StackLifetime::dumpBlockLiveness() const {
const BasicBlock *BB = IT.getFirst();
const BlockLifetimeInfo &BlockInfo = BlockLiveness.find(BB)->getSecond();
auto BlockRange = BlockInstRange.find(BB)->getSecond();
- dbgs() << " BB [" << BlockRange.first << ", " << BlockRange.second
+ dbgs() << " BB (" << BB->getName() << ") [" << BlockRange.first << ", " << BlockRange.second
<< "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End
<< ", livein " << BlockInfo.LiveIn << ", liveout "
<< BlockInfo.LiveOut << "\n";
@@ -304,6 +319,20 @@ StackLifetime::StackLifetime(const Function &F,
}
void StackLifetime::run() {
+ if (HasUnknownLifetimeStartOrEnd) {
+ // There is marker which we can't assign to a specific alloca, so we
+ // fallback to the most conservative results for the type.
+ switch (Type) {
+ case LivenessType::May:
+ LiveRanges.resize(NumAllocas, getFullLiveRange());
+ break;
+ case LivenessType::Must:
+ LiveRanges.resize(NumAllocas, LiveRange(Instructions.size()));
+ break;
+ }
+ return;
+ }
+
LiveRanges.resize(NumAllocas, LiveRange(Instructions.size()));
for (unsigned I = 0; I < NumAllocas; ++I)
if (!InterestingAllocas.test(I))
diff --git a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index bbfc303aefac..73096eb4baef 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -37,6 +38,25 @@ using namespace llvm;
STATISTIC(NumAllocaStackSafe, "Number of safe allocas");
STATISTIC(NumAllocaTotal, "Number of total allocas");
+STATISTIC(NumCombinedCalleeLookupTotal,
+ "Number of total callee lookups on combined index.");
+STATISTIC(NumCombinedCalleeLookupFailed,
+ "Number of failed callee lookups on combined index.");
+STATISTIC(NumModuleCalleeLookupTotal,
+ "Number of total callee lookups on module index.");
+STATISTIC(NumModuleCalleeLookupFailed,
+ "Number of failed callee lookups on module index.");
+STATISTIC(NumCombinedParamAccessesBefore,
+ "Number of total param accesses before generateParamAccessSummary.");
+STATISTIC(NumCombinedParamAccessesAfter,
+ "Number of total param accesses after generateParamAccessSummary.");
+STATISTIC(NumCombinedDataFlowNodes,
+ "Number of total nodes in combined index for dataflow processing.");
+STATISTIC(NumIndexCalleeUnhandled, "Number of index callee which are unhandled.");
+STATISTIC(NumIndexCalleeMultipleWeak, "Number of index callee non-unique weak.");
+STATISTIC(NumIndexCalleeMultipleExternal, "Number of index callee non-unique external.");
+
+
static cl::opt<int> StackSafetyMaxIterations("stack-safety-max-iterations",
cl::init(20), cl::Hidden);
@@ -48,26 +68,48 @@ static cl::opt<bool> StackSafetyRun("stack-safety-run", cl::init(false),
namespace {
+// Check if we should bailout for such ranges.
+bool isUnsafe(const ConstantRange &R) {
+ return R.isEmptySet() || R.isFullSet() || R.isUpperSignWrapped();
+}
+
+ConstantRange addOverflowNever(const ConstantRange &L, const ConstantRange &R) {
+ assert(!L.isSignWrappedSet());
+ assert(!R.isSignWrappedSet());
+ if (L.signedAddMayOverflow(R) !=
+ ConstantRange::OverflowResult::NeverOverflows)
+ return ConstantRange::getFull(L.getBitWidth());
+ ConstantRange Result = L.add(R);
+ assert(!Result.isSignWrappedSet());
+ return Result;
+}
+
+ConstantRange unionNoWrap(const ConstantRange &L, const ConstantRange &R) {
+ assert(!L.isSignWrappedSet());
+ assert(!R.isSignWrappedSet());
+ auto Result = L.unionWith(R);
+ // Two non-wrapped sets can produce wrapped.
+ if (Result.isSignWrappedSet())
+ Result = ConstantRange::getFull(Result.getBitWidth());
+ return Result;
+}
+
/// Describes use of address in as a function call argument.
template <typename CalleeTy> struct CallInfo {
/// Function being called.
const CalleeTy *Callee = nullptr;
/// Index of argument which pass address.
size_t ParamNo = 0;
- // Offset range of address from base address (alloca or calling function
- // argument).
- // Range should never set to empty-set, that is an invalid access range
- // that can cause empty-set to be propagated with ConstantRange::add
- ConstantRange Offset;
- CallInfo(const CalleeTy *Callee, size_t ParamNo, const ConstantRange &Offset)
- : Callee(Callee), ParamNo(ParamNo), Offset(Offset) {}
-};
-template <typename CalleeTy>
-raw_ostream &operator<<(raw_ostream &OS, const CallInfo<CalleeTy> &P) {
- return OS << "@" << P.Callee->getName() << "(arg" << P.ParamNo << ", "
- << P.Offset << ")";
-}
+ CallInfo(const CalleeTy *Callee, size_t ParamNo)
+ : Callee(Callee), ParamNo(ParamNo) {}
+
+ struct Less {
+ bool operator()(const CallInfo &L, const CallInfo &R) const {
+ return std::tie(L.ParamNo, L.Callee) < std::tie(R.ParamNo, R.Callee);
+ }
+ };
+};
/// Describe uses of address (alloca or parameter) inside of the function.
template <typename CalleeTy> struct UseInfo {
@@ -76,37 +118,29 @@ template <typename CalleeTy> struct UseInfo {
ConstantRange Range;
// List of calls which pass address as an argument.
- SmallVector<CallInfo<CalleeTy>, 4> Calls;
+ // Value is offset range of address from base address (alloca or calling
+ // function argument). Range should never set to empty-set, that is an invalid
+ // access range that can cause empty-set to be propagated with
+ // ConstantRange::add
+ using CallsTy = std::map<CallInfo<CalleeTy>, ConstantRange,
+ typename CallInfo<CalleeTy>::Less>;
+ CallsTy Calls;
UseInfo(unsigned PointerSize) : Range{PointerSize, false} {}
- void updateRange(const ConstantRange &R) {
- assert(!R.isUpperSignWrapped());
- Range = Range.unionWith(R);
- assert(!Range.isUpperSignWrapped());
- }
+ void updateRange(const ConstantRange &R) { Range = unionNoWrap(Range, R); }
};
template <typename CalleeTy>
raw_ostream &operator<<(raw_ostream &OS, const UseInfo<CalleeTy> &U) {
OS << U.Range;
for (auto &Call : U.Calls)
- OS << ", " << Call;
+ OS << ", "
+ << "@" << Call.first.Callee->getName() << "(arg" << Call.first.ParamNo
+ << ", " << Call.second << ")";
return OS;
}
-// Check if we should bailout for such ranges.
-bool isUnsafe(const ConstantRange &R) {
- return R.isEmptySet() || R.isFullSet() || R.isUpperSignWrapped();
-}
-
-ConstantRange addOverflowNever(const ConstantRange &L, const ConstantRange &R) {
- if (L.signedAddMayOverflow(R) !=
- ConstantRange::OverflowResult::NeverOverflows)
- return ConstantRange(L.getBitWidth(), true);
- return L.add(R);
-}
-
/// Calculate the allocation size of a given alloca. Returns empty range
// in case of confution.
ConstantRange getStaticAllocaSizeRange(const AllocaInst &AI) {
@@ -174,6 +208,7 @@ template <typename CalleeTy> struct FunctionInfo {
} else {
assert(Allocas.empty());
}
+ O << "\n";
}
};
@@ -384,7 +419,11 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
}
assert(isa<Function>(Callee) || isa<GlobalAlias>(Callee));
- US.Calls.emplace_back(Callee, ArgNo, offsetFrom(UI, Ptr));
+ ConstantRange Offsets = offsetFrom(UI, Ptr);
+ auto Insert =
+ US.Calls.emplace(CallInfo<GlobalValue>(Callee, ArgNo), Offsets);
+ if (!Insert.second)
+ Insert.first->second = Insert.first->second.unionWith(Offsets);
break;
}
@@ -417,7 +456,7 @@ FunctionInfo<GlobalValue> StackSafetyLocalAnalysis::run() {
analyzeAllUses(AI, UI, SL);
}
- for (Argument &A : make_range(F.arg_begin(), F.arg_end())) {
+ for (Argument &A : F.args()) {
// Non pointers and bypass arguments are not going to be used in any global
// processing.
if (A.getType()->isPointerTy() && !A.hasByValAttr()) {
@@ -490,18 +529,18 @@ template <typename CalleeTy>
bool StackSafetyDataFlowAnalysis<CalleeTy>::updateOneUse(UseInfo<CalleeTy> &US,
bool UpdateToFullSet) {
bool Changed = false;
- for (auto &CS : US.Calls) {
- assert(!CS.Offset.isEmptySet() &&
+ for (auto &KV : US.Calls) {
+ assert(!KV.second.isEmptySet() &&
"Param range can't be empty-set, invalid offset range");
ConstantRange CalleeRange =
- getArgumentAccessRange(CS.Callee, CS.ParamNo, CS.Offset);
+ getArgumentAccessRange(KV.first.Callee, KV.first.ParamNo, KV.second);
if (!US.Range.contains(CalleeRange)) {
Changed = true;
if (UpdateToFullSet)
US.Range = UnknownRange;
else
- US.Range = US.Range.unionWith(CalleeRange);
+ US.updateRange(CalleeRange);
}
}
return Changed;
@@ -535,7 +574,7 @@ void StackSafetyDataFlowAnalysis<CalleeTy>::runDataFlow() {
auto &FS = F.second;
for (auto &KV : FS.Params)
for (auto &CS : KV.second.Calls)
- Callees.push_back(CS.Callee);
+ Callees.push_back(CS.first.Callee);
llvm::sort(Callees);
Callees.erase(std::unique(Callees.begin(), Callees.end()), Callees.end());
@@ -570,14 +609,52 @@ StackSafetyDataFlowAnalysis<CalleeTy>::run() {
return Functions;
}
-FunctionSummary *resolveCallee(GlobalValueSummary *S) {
+FunctionSummary *findCalleeFunctionSummary(ValueInfo VI, StringRef ModuleId) {
+ if (!VI)
+ return nullptr;
+ auto SummaryList = VI.getSummaryList();
+ GlobalValueSummary* S = nullptr;
+ for (const auto& GVS : SummaryList) {
+ if (!GVS->isLive())
+ continue;
+ if (const AliasSummary *AS = dyn_cast<AliasSummary>(GVS.get()))
+ if (!AS->hasAliasee())
+ continue;
+ if (!isa<FunctionSummary>(GVS->getBaseObject()))
+ continue;
+ if (GlobalValue::isLocalLinkage(GVS->linkage())) {
+ if (GVS->modulePath() == ModuleId) {
+ S = GVS.get();
+ break;
+ }
+ } else if (GlobalValue::isExternalLinkage(GVS->linkage())) {
+ if (S) {
+ ++NumIndexCalleeMultipleExternal;
+ return nullptr;
+ }
+ S = GVS.get();
+ } else if (GlobalValue::isWeakLinkage(GVS->linkage())) {
+ if (S) {
+ ++NumIndexCalleeMultipleWeak;
+ return nullptr;
+ }
+ S = GVS.get();
+ } else if (GlobalValue::isAvailableExternallyLinkage(GVS->linkage()) ||
+ GlobalValue::isLinkOnceLinkage(GVS->linkage())) {
+ if (SummaryList.size() == 1)
+ S = GVS.get();
+ // According thinLTOResolvePrevailingGUID these are unlikely prevailing.
+ } else {
+ ++NumIndexCalleeUnhandled;
+ }
+ };
while (S) {
if (!S->isLive() || !S->isDSOLocal())
return nullptr;
if (FunctionSummary *FS = dyn_cast<FunctionSummary>(S))
return FS;
AliasSummary *AS = dyn_cast<AliasSummary>(S);
- if (!AS)
+ if (!AS || !AS->hasAliasee())
return nullptr;
S = AS->getBaseObject();
if (S == AS)
@@ -602,16 +679,6 @@ const Function *findCalleeInModule(const GlobalValue *GV) {
return nullptr;
}
-GlobalValueSummary *getGlobalValueSummary(const ModuleSummaryIndex *Index,
- uint64_t ValueGUID) {
- auto VI = Index->getValueInfo(ValueGUID);
- if (!VI || VI.getSummaryList().empty())
- return nullptr;
- assert(VI.getSummaryList().size() == 1);
- auto &Summary = VI.getSummaryList()[0];
- return Summary.get();
-}
-
const ConstantRange *findParamAccess(const FunctionSummary &FS,
uint32_t ParamNo) {
assert(FS.isLive());
@@ -625,31 +692,34 @@ const ConstantRange *findParamAccess(const FunctionSummary &FS,
void resolveAllCalls(UseInfo<GlobalValue> &Use,
const ModuleSummaryIndex *Index) {
ConstantRange FullSet(Use.Range.getBitWidth(), true);
- for (auto &C : Use.Calls) {
- const Function *F = findCalleeInModule(C.Callee);
+ // Move Use.Calls to a temp storage and repopulate - don't use std::move as it
+ // leaves Use.Calls in an undefined state.
+ UseInfo<GlobalValue>::CallsTy TmpCalls;
+ std::swap(TmpCalls, Use.Calls);
+ for (const auto &C : TmpCalls) {
+ const Function *F = findCalleeInModule(C.first.Callee);
if (F) {
- C.Callee = F;
+ Use.Calls.emplace(CallInfo<GlobalValue>(F, C.first.ParamNo), C.second);
continue;
}
if (!Index)
return Use.updateRange(FullSet);
- GlobalValueSummary *GVS = getGlobalValueSummary(Index, C.Callee->getGUID());
-
- FunctionSummary *FS = resolveCallee(GVS);
- if (!FS)
+ FunctionSummary *FS =
+ findCalleeFunctionSummary(Index->getValueInfo(C.first.Callee->getGUID()),
+ C.first.Callee->getParent()->getModuleIdentifier());
+ ++NumModuleCalleeLookupTotal;
+ if (!FS) {
+ ++NumModuleCalleeLookupFailed;
return Use.updateRange(FullSet);
- const ConstantRange *Found = findParamAccess(*FS, C.ParamNo);
- if (!Found)
+ }
+ const ConstantRange *Found = findParamAccess(*FS, C.first.ParamNo);
+ if (!Found || Found->isFullSet())
return Use.updateRange(FullSet);
ConstantRange Access = Found->sextOrTrunc(Use.Range.getBitWidth());
- Use.updateRange(addOverflowNever(Access, C.Offset));
- C.Callee = nullptr;
+ if (!Access.isEmptySet())
+ Use.updateRange(addOverflowNever(Access, C.second));
}
-
- Use.Calls.erase(std::remove_if(Use.Calls.begin(), Use.Calls.end(),
- [](auto &T) { return !T.Callee; }),
- Use.Calls.end());
}
GVToSSI createGlobalStackSafetyInfo(
@@ -663,8 +733,11 @@ GVToSSI createGlobalStackSafetyInfo(
auto Copy = Functions;
for (auto &FnKV : Copy)
- for (auto &KV : FnKV.second.Params)
+ for (auto &KV : FnKV.second.Params) {
resolveAllCalls(KV.second, Index);
+ if (KV.second.Range.isFullSet())
+ KV.second.Calls.clear();
+ }
uint32_t PointerSize = Copy.begin()
->first->getParent()
@@ -679,8 +752,8 @@ GVToSSI createGlobalStackSafetyInfo(
auto &A = KV.second;
resolveAllCalls(A, Index);
for (auto &C : A.Calls) {
- A.updateRange(
- SSDFA.getArgumentAccessRange(C.Callee, C.ParamNo, C.Offset));
+ A.updateRange(SSDFA.getArgumentAccessRange(C.first.Callee,
+ C.first.ParamNo, C.second));
}
// FIXME: This is needed only to preserve calls in print() results.
A.Calls = SrcF.Allocas.find(KV.first)->second.Calls;
@@ -749,7 +822,7 @@ const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const {
}
std::vector<FunctionSummary::ParamAccess>
-StackSafetyInfo::getParamAccesses() const {
+StackSafetyInfo::getParamAccesses(ModuleSummaryIndex &Index) const {
// Implementation transforms internal representation of parameter information
// into FunctionSummary format.
std::vector<FunctionSummary::ParamAccess> ParamAccesses;
@@ -770,13 +843,21 @@ StackSafetyInfo::getParamAccesses() const {
// will make ParamAccess::Range as FullSet anyway. So we can drop the
// entire parameter like we did above.
// TODO(vitalybuka): Return already filtered parameters from getInfo().
- if (C.Offset.isFullSet()) {
+ if (C.second.isFullSet()) {
ParamAccesses.pop_back();
break;
}
- Param.Calls.emplace_back(C.ParamNo, C.Callee->getGUID(), C.Offset);
+ Param.Calls.emplace_back(C.first.ParamNo,
+ Index.getOrInsertValueInfo(C.first.Callee),
+ C.second);
}
}
+ for (FunctionSummary::ParamAccess &Param : ParamAccesses) {
+ sort(Param.Calls, [](const FunctionSummary::ParamAccess::Call &L,
+ const FunctionSummary::ParamAccess::Call &R) {
+ return std::tie(L.ParamNo, L.Callee) < std::tie(R.ParamNo, R.Callee);
+ });
+ }
return ParamAccesses;
}
@@ -921,14 +1002,28 @@ bool llvm::needsParamAccessSummary(const Module &M) {
}
void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) {
+ if (!Index.hasParamAccess())
+ return;
const ConstantRange FullSet(FunctionSummary::ParamAccess::RangeWidth, true);
+
+ auto CountParamAccesses = [&](auto &Stat) {
+ if (!AreStatisticsEnabled())
+ return;
+ for (auto &GVS : Index)
+ for (auto &GV : GVS.second.SummaryList)
+ if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GV.get()))
+ Stat += FS->paramAccesses().size();
+ };
+
+ CountParamAccesses(NumCombinedParamAccessesBefore);
+
std::map<const FunctionSummary *, FunctionInfo<FunctionSummary>> Functions;
// Convert the ModuleSummaryIndex to a FunctionMap
for (auto &GVS : Index) {
for (auto &GV : GVS.second.SummaryList) {
FunctionSummary *FS = dyn_cast<FunctionSummary>(GV.get());
- if (!FS)
+ if (!FS || FS->paramAccesses().empty())
continue;
if (FS->isLive() && FS->isDSOLocal()) {
FunctionInfo<FunctionSummary> FI;
@@ -940,14 +1035,17 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) {
US.Range = PS.Use;
for (auto &Call : PS.Calls) {
assert(!Call.Offsets.isFullSet());
- FunctionSummary *S = resolveCallee(
- Index.findSummaryInModule(Call.Callee, FS->modulePath()));
+ FunctionSummary *S =
+ findCalleeFunctionSummary(Call.Callee, FS->modulePath());
+ ++NumCombinedCalleeLookupTotal;
if (!S) {
+ ++NumCombinedCalleeLookupFailed;
US.Range = FullSet;
US.Calls.clear();
break;
}
- US.Calls.emplace_back(S, Call.ParamNo, Call.Offsets);
+ US.Calls.emplace(CallInfo<FunctionSummary>(S, Call.ParamNo),
+ Call.Offsets);
}
}
Functions.emplace(FS, std::move(FI));
@@ -958,12 +1056,16 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) {
FS->setParamAccesses({});
}
}
+ NumCombinedDataFlowNodes += Functions.size();
StackSafetyDataFlowAnalysis<FunctionSummary> SSDFA(
FunctionSummary::ParamAccess::RangeWidth, std::move(Functions));
for (auto &KV : SSDFA.run()) {
std::vector<FunctionSummary::ParamAccess> NewParams;
NewParams.reserve(KV.second.Params.size());
for (auto &Param : KV.second.Params) {
+ // It's not needed as FullSet is processed the same as a missing value.
+ if (Param.second.Range.isFullSet())
+ continue;
NewParams.emplace_back();
FunctionSummary::ParamAccess &New = NewParams.back();
New.ParamNo = Param.first;
@@ -972,6 +1074,8 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) {
const_cast<FunctionSummary *>(KV.first)->setParamAccesses(
std::move(NewParams));
}
+
+ CountParamAccesses(NumCombinedParamAccessesAfter);
}
static const char LocalPassArg[] = "stack-safety-local";
diff --git a/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
index ccf520dcea66..67a1365b698d 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -1,5 +1,4 @@
-//===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
-//--===//
+//===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -99,280 +98,364 @@
// loop exit and the loop header (_after_ SSA construction).
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/SyncDependenceAnalysis.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/SyncDependenceAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include <functional>
#include <stack>
#include <unordered_set>
#define DEBUG_TYPE "sync-dependence"
+// The SDA algorithm operates on a modified CFG - we modify the edges leaving
+// loop headers as follows:
+//
+// * We remove all edges leaving all loop headers.
+// * We add additional edges from the loop headers to their exit blocks.
+//
+// The modification is virtual, that is whenever we visit a loop header we
+// pretend it had different successors.
+namespace {
+using namespace llvm;
+
+// Custom Post-Order Traveral
+//
+// We cannot use the vanilla (R)PO computation of LLVM because:
+// * We (virtually) modify the CFG.
+// * We want a loop-compact block enumeration, that is the numbers assigned by
+// the traveral to the blocks of a loop are an interval.
+using POCB = std::function<void(const BasicBlock &)>;
+using VisitedSet = std::set<const BasicBlock *>;
+using BlockStack = std::vector<const BasicBlock *>;
+
+// forward
+static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
+ VisitedSet &Finalized);
+
+// for a nested region (top-level loop or nested loop)
+static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop,
+ POCB CallBack, VisitedSet &Finalized) {
+ const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr;
+ while (!Stack.empty()) {
+ const auto *NextBB = Stack.back();
+
+ auto *NestedLoop = LI.getLoopFor(NextBB);
+ bool IsNestedLoop = NestedLoop != Loop;
+
+ // Treat the loop as a node
+ if (IsNestedLoop) {
+ SmallVector<BasicBlock *, 3> NestedExits;
+ NestedLoop->getUniqueExitBlocks(NestedExits);
+ bool PushedNodes = false;
+ for (const auto *NestedExitBB : NestedExits) {
+ if (NestedExitBB == LoopHeader)
+ continue;
+ if (Loop && !Loop->contains(NestedExitBB))
+ continue;
+ if (Finalized.count(NestedExitBB))
+ continue;
+ PushedNodes = true;
+ Stack.push_back(NestedExitBB);
+ }
+ if (!PushedNodes) {
+ // All loop exits finalized -> finish this node
+ Stack.pop_back();
+ computeLoopPO(LI, *NestedLoop, CallBack, Finalized);
+ }
+ continue;
+ }
+
+ // DAG-style
+ bool PushedNodes = false;
+ for (const auto *SuccBB : successors(NextBB)) {
+ if (SuccBB == LoopHeader)
+ continue;
+ if (Loop && !Loop->contains(SuccBB))
+ continue;
+ if (Finalized.count(SuccBB))
+ continue;
+ PushedNodes = true;
+ Stack.push_back(SuccBB);
+ }
+ if (!PushedNodes) {
+ // Never push nodes twice
+ Stack.pop_back();
+ if (!Finalized.insert(NextBB).second)
+ continue;
+ CallBack(*NextBB);
+ }
+ }
+}
+
+static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) {
+ VisitedSet Finalized;
+ BlockStack Stack;
+ Stack.reserve(24); // FIXME made-up number
+ Stack.push_back(&F.getEntryBlock());
+ computeStackPO(Stack, LI, nullptr, CallBack, Finalized);
+}
+
+static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
+ VisitedSet &Finalized) {
+ /// Call CallBack on all loop blocks.
+ std::vector<const BasicBlock *> Stack;
+ const auto *LoopHeader = Loop.getHeader();
+
+ // Visit the header last
+ Finalized.insert(LoopHeader);
+ CallBack(*LoopHeader);
+
+ // Initialize with immediate successors
+ for (const auto *BB : successors(LoopHeader)) {
+ if (!Loop.contains(BB))
+ continue;
+ if (BB == LoopHeader)
+ continue;
+ Stack.push_back(BB);
+ }
+
+ // Compute PO inside region
+ computeStackPO(Stack, LI, &Loop, CallBack, Finalized);
+}
+
+} // namespace
+
namespace llvm {
-ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
+ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc;
SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
const PostDominatorTree &PDT,
const LoopInfo &LI)
- : FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
+ : DT(DT), PDT(PDT), LI(LI) {
+ computeTopLevelPO(*DT.getRoot()->getParent(), LI,
+ [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); });
+}
SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
-using FunctionRPOT = ReversePostOrderTraversal<const Function *>;
-
// divergence propagator for reducible CFGs
struct DivergencePropagator {
- const FunctionRPOT &FuncRPOT;
+ const ModifiedPO &LoopPOT;
const DominatorTree &DT;
const PostDominatorTree &PDT;
const LoopInfo &LI;
-
- // identified join points
- std::unique_ptr<ConstBlockSet> JoinBlocks;
-
- // reached loop exits (by a path disjoint to a path to the loop header)
- SmallPtrSet<const BasicBlock *, 4> ReachedLoopExits;
-
- // if DefMap[B] == C then C is the dominating definition at block B
- // if DefMap[B] ~ undef then we haven't seen B yet
- // if DefMap[B] == B then B is a join point of disjoint paths from X or B is
- // an immediate successor of X (initial value).
- using DefiningBlockMap = std::map<const BasicBlock *, const BasicBlock *>;
- DefiningBlockMap DefMap;
-
- // all blocks with pending visits
- std::unordered_set<const BasicBlock *> PendingUpdates;
-
- DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
- const PostDominatorTree &PDT, const LoopInfo &LI)
- : FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
- JoinBlocks(new ConstBlockSet) {}
-
- // set the definition at @block and mark @block as pending for a visit
- void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
- bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
- if (WasAdded)
- PendingUpdates.insert(&Block);
- }
+ const BasicBlock &DivTermBlock;
+
+ // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at
+ // block B
+ // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet
+ // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths
+ // from X or B is an immediate successor of X (initial value).
+ using BlockLabelVec = std::vector<const BasicBlock *>;
+ BlockLabelVec BlockLabels;
+ // divergent join and loop exit descriptor.
+ std::unique_ptr<ControlDivergenceDesc> DivDesc;
+
+ DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT,
+ const PostDominatorTree &PDT, const LoopInfo &LI,
+ const BasicBlock &DivTermBlock)
+ : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock),
+ BlockLabels(LoopPOT.size(), nullptr),
+ DivDesc(new ControlDivergenceDesc) {}
void printDefs(raw_ostream &Out) {
- Out << "Propagator::DefMap {\n";
- for (const auto *Block : FuncRPOT) {
- auto It = DefMap.find(Block);
- Out << Block->getName() << " : ";
- if (It == DefMap.end()) {
- Out << "\n";
+ Out << "Propagator::BlockLabels {\n";
+ for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) {
+ const auto *Label = BlockLabels[BlockIdx];
+ Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx
+ << ") : ";
+ if (!Label) {
+ Out << "<null>\n";
} else {
- const auto *DefBlock = It->second;
- Out << (DefBlock ? DefBlock->getName() : "<null>") << "\n";
+ Out << Label->getName() << "\n";
}
}
Out << "}\n";
}
- // process @succBlock with reaching definition @defBlock
- // the original divergent branch was in @parentLoop (if any)
- void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
- const BasicBlock &DefBlock) {
+ // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
+ // causes a divergent join.
+ bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) {
+ auto SuccIdx = LoopPOT.getIndexOf(SuccBlock);
- // @succBlock is a loop exit
- if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
- DefMap.emplace(&SuccBlock, &DefBlock);
- ReachedLoopExits.insert(&SuccBlock);
- return;
+ // unset or same reaching label
+ const auto *OldLabel = BlockLabels[SuccIdx];
+ if (!OldLabel || (OldLabel == &PushedLabel)) {
+ BlockLabels[SuccIdx] = &PushedLabel;
+ return false;
}
- // first reaching def?
- auto ItLastDef = DefMap.find(&SuccBlock);
- if (ItLastDef == DefMap.end()) {
- addPending(SuccBlock, DefBlock);
- return;
- }
+ // Update the definition
+ BlockLabels[SuccIdx] = &SuccBlock;
+ return true;
+ }
- // a join of at least two definitions
- if (ItLastDef->second != &DefBlock) {
- // do we know this join already?
- if (!JoinBlocks->insert(&SuccBlock).second)
- return;
+ // visiting a virtual loop exit edge from the loop header --> temporal
+ // divergence on join
+ bool visitLoopExitEdge(const BasicBlock &ExitBlock,
+ const BasicBlock &DefBlock, bool FromParentLoop) {
+ // Pushing from a non-parent loop cannot cause temporal divergence.
+ if (!FromParentLoop)
+ return visitEdge(ExitBlock, DefBlock);
+
+ if (!computeJoin(ExitBlock, DefBlock))
+ return false;
+
+ // Identified a divergent loop exit
+ DivDesc->LoopDivBlocks.insert(&ExitBlock);
+ LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName()
+ << "\n");
+ return true;
+ }
- // update the definition
- addPending(SuccBlock, SuccBlock);
- }
+ // process \p SuccBlock with reaching definition \p DefBlock
+ bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) {
+ if (!computeJoin(SuccBlock, DefBlock))
+ return false;
+
+ // Divergent, disjoint paths join.
+ DivDesc->JoinDivBlocks.insert(&SuccBlock);
+ LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName());
+ return true;
}
- // find all blocks reachable by two disjoint paths from @rootTerm.
- // This method works for both divergent terminators and loops with
- // divergent exits.
- // @rootBlock is either the block containing the branch or the header of the
- // divergent loop.
- // @nodeSuccessors is the set of successors of the node (Loop or Terminator)
- // headed by @rootBlock.
- // @parentLoop is the parent loop of the Loop or the loop that contains the
- // Terminator.
- template <typename SuccessorIterable>
- std::unique_ptr<ConstBlockSet>
- computeJoinPoints(const BasicBlock &RootBlock,
- SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
- assert(JoinBlocks);
-
- LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " << (ParentLoop ? ParentLoop->getName() : "<null>") << "\n" );
+ std::unique_ptr<ControlDivergenceDesc> computeJoinPoints() {
+ assert(DivDesc);
+
+ LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName()
+ << "\n");
+
+ const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock);
+
+ // Early stopping criterion
+ int FloorIdx = LoopPOT.size() - 1;
+ const BasicBlock *FloorLabel = nullptr;
// bootstrap with branch targets
- for (const auto *SuccBlock : NodeSuccessors) {
- DefMap.emplace(SuccBlock, SuccBlock);
+ int BlockIdx = 0;
- if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
- // immediate loop exit from node.
- ReachedLoopExits.insert(SuccBlock);
- } else {
- // regular successor
- PendingUpdates.insert(SuccBlock);
- }
- }
+ for (const auto *SuccBlock : successors(&DivTermBlock)) {
+ auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock);
+ BlockLabels[SuccIdx] = SuccBlock;
- LLVM_DEBUG(
- dbgs() << "SDA: rpo order:\n";
- for (const auto * RpoBlock : FuncRPOT) {
- dbgs() << "- " << RpoBlock->getName() << "\n";
- }
- );
+ // Find the successor with the highest index to start with
+ BlockIdx = std::max<int>(BlockIdx, SuccIdx);
+ FloorIdx = std::min<int>(FloorIdx, SuccIdx);
- auto ItBeginRPO = FuncRPOT.begin();
- auto ItEndRPO = FuncRPOT.end();
+ // Identify immediate divergent loop exits
+ if (!DivBlockLoop)
+ continue;
- // skip until term (TODO RPOT won't let us start at @term directly)
- for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {
- assert(ItBeginRPO != ItEndRPO && "Unable to find RootBlock");
+ const auto *BlockLoop = LI.getLoopFor(SuccBlock);
+ if (BlockLoop && DivBlockLoop->contains(BlockLoop))
+ continue;
+ DivDesc->LoopDivBlocks.insert(SuccBlock);
+ LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: "
+ << SuccBlock->getName() << "\n");
}
// propagate definitions at the immediate successors of the node in RPO
- auto ItBlockRPO = ItBeginRPO;
- while ((++ItBlockRPO != ItEndRPO) &&
- !PendingUpdates.empty()) {
- const auto *Block = *ItBlockRPO;
- LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
+ for (; BlockIdx >= FloorIdx; --BlockIdx) {
+ LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs()));
- // skip Block if not pending update
- auto ItPending = PendingUpdates.find(Block);
- if (ItPending == PendingUpdates.end())
+ // Any label available here
+ const auto *Label = BlockLabels[BlockIdx];
+ if (!Label)
continue;
- PendingUpdates.erase(ItPending);
- // propagate definition at Block to its successors
- auto ItDef = DefMap.find(Block);
- const auto *DefBlock = ItDef->second;
- assert(DefBlock);
+ // Ok. Get the block
+ const auto *Block = LoopPOT.getBlockAt(BlockIdx);
+ LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
auto *BlockLoop = LI.getLoopFor(Block);
- if (ParentLoop &&
- (ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
- // if the successor is the header of a nested loop pretend its a
- // single node with the loop's exits as successors
+ bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block;
+ bool CausedJoin = false;
+ int LoweredFloorIdx = FloorIdx;
+ if (IsLoopHeader) {
+ // Disconnect from immediate successors and propagate directly to loop
+ // exits.
SmallVector<BasicBlock *, 4> BlockLoopExits;
BlockLoop->getExitBlocks(BlockLoopExits);
+
+ bool IsParentLoop = BlockLoop->contains(&DivTermBlock);
for (const auto *BlockLoopExit : BlockLoopExits) {
- visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock);
+ CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop);
+ LoweredFloorIdx = std::min<int>(LoweredFloorIdx,
+ LoopPOT.getIndexOf(*BlockLoopExit));
}
-
} else {
- // the successors are either on the same loop level or loop exits
+ // Acyclic successor case
for (const auto *SuccBlock : successors(Block)) {
- visitSuccessor(*SuccBlock, ParentLoop, *DefBlock);
+ CausedJoin |= visitEdge(*SuccBlock, *Label);
+ LoweredFloorIdx =
+ std::min<int>(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock));
}
}
- }
- LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
-
- // We need to know the definition at the parent loop header to decide
- // whether the definition at the header is different from the definition at
- // the loop exits, which would indicate a divergent loop exits.
- //
- // A // loop header
- // |
- // B // nested loop header
- // |
- // C -> X (exit from B loop) -..-> (A latch)
- // |
- // D -> back to B (B latch)
- // |
- // proper exit from both loops
- //
- // analyze reached loop exits
- if (!ReachedLoopExits.empty()) {
- const BasicBlock *ParentLoopHeader =
- ParentLoop ? ParentLoop->getHeader() : nullptr;
-
- assert(ParentLoop);
- auto ItHeaderDef = DefMap.find(ParentLoopHeader);
- const auto *HeaderDefBlock = (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second;
-
- LLVM_DEBUG(printDefs(dbgs()));
- assert(HeaderDefBlock && "no definition at header of carrying loop");
-
- for (const auto *ExitBlock : ReachedLoopExits) {
- auto ItExitDef = DefMap.find(ExitBlock);
- assert((ItExitDef != DefMap.end()) &&
- "no reaching def at reachable loop exit");
- if (ItExitDef->second != HeaderDefBlock) {
- JoinBlocks->insert(ExitBlock);
- }
+ // Floor update
+ if (CausedJoin) {
+ // 1. Different labels pushed to successors
+ FloorIdx = LoweredFloorIdx;
+ } else if (FloorLabel != Label) {
+ // 2. No join caused BUT we pushed a label that is different than the
+ // last pushed label
+ FloorIdx = LoweredFloorIdx;
+ FloorLabel = Label;
}
}
- return std::move(JoinBlocks);
+ LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
+
+ return std::move(DivDesc);
}
};
-const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
- using LoopExitVec = SmallVector<BasicBlock *, 4>;
- LoopExitVec LoopExits;
- Loop.getExitBlocks(LoopExits);
- if (LoopExits.size() < 1) {
- return EmptyBlockSet;
+#ifndef NDEBUG
+static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) {
+ Out << "[";
+ bool First = true;
+ for (const auto *BB : Blocks) {
+ if (!First)
+ Out << ", ";
+ First = false;
+ Out << BB->getName();
}
-
- // already available in cache?
- auto ItCached = CachedLoopExitJoins.find(&Loop);
- if (ItCached != CachedLoopExitJoins.end()) {
- return *ItCached->second;
- }
-
- // compute all join points
- DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
- auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
- *Loop.getHeader(), LoopExits, Loop.getParentLoop());
-
- auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
- assert(ItInserted.second);
- return *ItInserted.first->second;
+ Out << "]";
}
+#endif
-const ConstBlockSet &
-SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
+const ControlDivergenceDesc &
+SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) {
// trivial case
- if (Term.getNumSuccessors() < 1) {
- return EmptyBlockSet;
+ if (Term.getNumSuccessors() <= 1) {
+ return EmptyDivergenceDesc;
}
// already available in cache?
- auto ItCached = CachedBranchJoins.find(&Term);
- if (ItCached != CachedBranchJoins.end())
+ auto ItCached = CachedControlDivDescs.find(&Term);
+ if (ItCached != CachedControlDivDescs.end())
return *ItCached->second;
// compute all join points
- DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
+ // Special handling of divergent loop exits is not needed for LCSSA
const auto &TermBlock = *Term.getParent();
- auto JoinBlocks = Propagator.computeJoinPoints<const_succ_range>(
- TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
+ DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock);
+ auto DivDesc = Propagator.computeJoinPoints();
+
+ LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n";
+ dbgs() << "JoinDivBlocks: ";
+ printBlockSet(DivDesc->JoinDivBlocks, dbgs());
+ dbgs() << "\nLoopDivBlocks: ";
+ printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";);
- auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
+ auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc));
assert(ItInserted.second);
return *ItInserted.first->second;
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TFUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/TFUtils.cpp
index 19e6d626e238..1377cac217ab 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TFUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TFUtils.cpp
@@ -10,17 +10,23 @@
// This file implements utilities for interfacing with tensorflow C APIs.
//
//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
+#if defined(LLVM_HAVE_TF_API)
-#include "llvm/Analysis/Utils/TFUtils.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/JSON.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "tensorflow/c/c_api.h"
#include "tensorflow/c/c_api_experimental.h"
#include <cassert>
+#include <numeric>
using namespace llvm;
@@ -58,6 +64,89 @@ TFStatusPtr createTFStatus() {
TFSessionOptionsPtr createTFSessionOptions() {
return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
}
+
+/// Write the values of one tensor as a list.
+template <typename T>
+void writeTensorValues(raw_ostream &OutFile, const char *TensorData,
+ size_t ElemCount) {
+ OutFile << "[";
+ const T *TypedData = reinterpret_cast<const T *>(TensorData);
+ for (size_t I = 0; I < ElemCount; ++I) {
+ if (I > 0)
+ OutFile << ", ";
+ OutFile << TypedData[I];
+ }
+ OutFile << "]";
+}
+
+/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
+/// The tensors are assumed to be stored contiguously, in row-major format,
+/// in the TensorData buffer. Each tensor has the shape given by Spec. The
+/// feature name in the output is either the provided LoggingName, if
+/// specified, otherwise it's the name of the tensor (as given by Spec).
+void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
+ const LoggedFeatureSpec &LoggedSpec,
+ const char *TensorData, size_t TensorCount,
+ bool FinalReward = false) {
+ const char *FieldName = "<invalid>";
+ std::function<void(const char *)> ValueWriter;
+ const auto &Spec = LoggedSpec.Spec;
+ // The 'Feature' protobuf only has 3 possible fields: float_list,
+ // int64_list, or bytes_list, so we capture int32 values as int64. We don't
+ // support any other types.
+ if (Spec.isElementType<int64_t>()) {
+ FieldName = "int64_list";
+ ValueWriter = [&](const char *Data) {
+ writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount());
+ };
+ } else if (Spec.isElementType<int32_t>()) {
+ FieldName = "int64_list";
+ ValueWriter = [&](const char *Data) {
+ writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount());
+ };
+
+ } else if (Spec.isElementType<float>()) {
+ FieldName = "float_list";
+ ValueWriter = [&](const char *Data) {
+ writeTensorValues<float>(OutFile, Data, Spec.getElementCount());
+ };
+
+ } else {
+ llvm_unreachable("Unsupported tensor type.");
+ }
+
+ OutFile << " feature_list: {\n";
+ OutFile << " key: "
+ << "\""
+ << (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name())
+ << "\" ";
+ OutFile << "value: {\n";
+ size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
+
+ auto WriteFeatureProto = [&](const char *P) {
+ OutFile << " feature: { " << FieldName << ": { value: ";
+ ValueWriter(P);
+ OutFile << " } }\n";
+ };
+
+ const char *CurrentTensor = TensorData;
+ static int64_t Zero = 0;
+ // Write all but the last value. If this is the final reward, don't increment
+ // the CurrentTensor, and just write 0.
+ for (size_t I = 0; I < TensorCount - 1; ++I) {
+ if (FinalReward)
+ WriteFeatureProto(reinterpret_cast<const char *>(&Zero));
+ else {
+ WriteFeatureProto(CurrentTensor);
+ CurrentTensor += TensorByteSize;
+ }
+ }
+
+ WriteFeatureProto(CurrentTensor);
+
+ OutFile << " }\n";
+ OutFile << " }\n";
+}
} // namespace
namespace llvm {
@@ -81,12 +170,122 @@ private:
std::vector<TF_Tensor *> Output;
};
+size_t TensorSpec::getElementByteSize() const {
+ return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex));
+}
+
+TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex,
+ const std::vector<int64_t> &Shape)
+ : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape),
+ ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1,
+ std::multiplies<int64_t>())) {}
+
+Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
+ const json::Value &Value) {
+ auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> {
+ std::string S;
+ llvm::raw_string_ostream OS(S);
+ OS << Value;
+ Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S);
+ return None;
+ };
+ // FIXME: accept a Path as a parameter, and use it for error reporting.
+ json::Path::Root Root("tensor_spec");
+ json::ObjectMapper Mapper(Value, Root);
+ if (!Mapper)
+ return EmitError("Value is not a dict");
+
+ std::string TensorName;
+ int TensorPort = -1;
+ std::string TensorType;
+ std::vector<int64_t> TensorShape;
+
+ if (!Mapper.map<std::string>("name", TensorName))
+ return EmitError("'name' property not present or not a string");
+ if (!Mapper.map<std::string>("type", TensorType))
+ return EmitError("'type' property not present or not a string");
+ if (!Mapper.map<int>("port", TensorPort))
+ return EmitError("'port' property not present or not an int");
+ if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape))
+ return EmitError("'shape' property not present or not an int array");
+
+#define PARSE_TYPE(T, E) \
+ if (TensorType == #T) \
+ return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort);
+ TFUTILS_SUPPORTED_TYPES(PARSE_TYPE)
+#undef PARSE_TYPE
+ return None;
+}
+
+Optional<std::vector<LoggedFeatureSpec>>
+loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
+ StringRef ModelPath, StringRef SpecFileOverride) {
+ SmallVector<char, 128> OutputSpecsPath;
+ StringRef FileName = SpecFileOverride;
+ if (FileName.empty()) {
+ llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
+ FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()};
+ }
+
+ auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
+ if (!BufferOrError) {
+ Ctx.emitError("Error opening output specs file: " + FileName + " : " +
+ BufferOrError.getError().message());
+ return None;
+ }
+ auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
+ if (!ParsedJSONValues) {
+ Ctx.emitError("Could not parse specs file: " + FileName);
+ return None;
+ }
+ auto ValuesArray = ParsedJSONValues->getAsArray();
+ if (!ValuesArray) {
+ Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
+ "logging_name:<name>} dictionaries");
+ return None;
+ }
+ std::vector<LoggedFeatureSpec> Ret;
+ for (const auto &Value : *ValuesArray)
+ if (const auto *Obj = Value.getAsObject())
+ if (const auto *SpecPart = Obj->get("tensor_spec"))
+ if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
+ if (auto LoggingName = Obj->getString("logging_name")) {
+ if (!TensorSpec->isElementType<int64_t>() &&
+ !TensorSpec->isElementType<int32_t>() &&
+ !TensorSpec->isElementType<float>()) {
+ Ctx.emitError(
+ "Only int64, int32, and float tensors are supported. "
+ "Found unsupported type for tensor named " +
+ TensorSpec->name());
+ return None;
+ }
+ Ret.push_back({*TensorSpec, LoggingName->str()});
+ }
+
+ if (ValuesArray->size() != Ret.size()) {
+ Ctx.emitError(
+ "Unable to parse output spec. It should be a json file containing an "
+ "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
+ "with a json object describing a TensorSpec; and a 'logging_name' key, "
+ "which is a string to use as name when logging this tensor in the "
+ "training log.");
+ return None;
+ }
+ if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
+ Ctx.emitError("The first output spec must describe the decision tensor, "
+ "and must have the logging_name " +
+ StringRef(ExpectedDecisionName));
+ return None;
+ }
+ return Ret;
+}
+
class TFModelEvaluatorImpl {
public:
TFModelEvaluatorImpl(StringRef SavedModelPath,
- const std::vector<std::string> &InputNames,
- const std::vector<std::string> &OutputNames,
- const char *Tags);
+ const std::vector<TensorSpec> &InputSpecs,
+ function_ref<TensorSpec(size_t)> GetOutputSpecs,
+ size_t OutputSpecsSize, const char *Tags);
bool isValid() const { return IsValid; }
size_t OutputSize() const { return OutputFeed.size(); }
@@ -130,16 +329,18 @@ private:
/// Reusable utility for ensuring we can bind the requested Name to a node in
/// the SavedModel Graph.
- bool checkReportAndInvalidate(const TF_Output &Output, StringRef Name);
+ bool checkReportAndInvalidate(const TF_Output &Output,
+ const TensorSpec &OutputSpec);
};
} // namespace llvm
TFModelEvaluatorImpl::TFModelEvaluatorImpl(
- StringRef SavedModelPath, const std::vector<std::string> &InputNames,
- const std::vector<std::string> &OutputNames, const char *Tags)
+ StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
+ function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
+ const char *Tags = "serve")
: Graph(createTFGraph()), Options(createTFSessionOptions()),
- InputFeed(InputNames.size()), Input(InputNames.size()),
- OutputFeed(OutputNames.size()) {
+ InputFeed(InputSpecs.size()), Input(InputSpecs.size()),
+ OutputFeed(OutputSpecsSize) {
if (!ensureInitTF()) {
errs() << "Tensorflow should have been initialized";
return;
@@ -153,30 +354,44 @@ TFModelEvaluatorImpl::TFModelEvaluatorImpl(
errs() << TF_Message(Status.get());
invalidate();
}
- for (size_t I = 0; I < InputNames.size(); ++I) {
+ for (size_t I = 0; I < InputSpecs.size(); ++I) {
+ auto &InputSpec = InputSpecs[I];
InputFeed[I] = {
- TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0};
- if (!checkReportAndInvalidate(InputFeed[I], InputNames[I]))
+ TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()),
+ InputSpec.port()};
+ if (!checkReportAndInvalidate(InputFeed[I], InputSpec))
return;
+ initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()),
+ InputSpec.shape());
}
- for (size_t I = 0; I < OutputNames.size(); ++I) {
+ for (size_t I = 0; I < OutputSpecsSize; ++I) {
+ auto OutputSpec = GetOutputSpecs(I);
OutputFeed[I] = {
- TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0};
- if (!checkReportAndInvalidate(OutputFeed[I], OutputNames[I]))
+ TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()),
+ OutputSpec.port()};
+ if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec))
return;
}
}
-TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
- const std::vector<std::string> &InputNames,
- const std::vector<std::string> &OutputNames,
- const char *Tags)
- : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputNames, OutputNames,
- Tags)) {
+TFModelEvaluator::TFModelEvaluator(
+ StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
+ function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
+ const char *Tags)
+ : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs,
+ OutputSpecsSize, Tags)) {
if (!Impl->isValid())
Impl.reset();
}
+TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
+ const std::vector<TensorSpec> &InputSpecs,
+ const std::vector<TensorSpec> &OutputSpecs,
+ const char *Tags)
+ : TFModelEvaluator(
+ SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; },
+ OutputSpecs.size(), Tags) {}
+
TFModelEvaluatorImpl::~TFModelEvaluatorImpl() {
for (auto *T : Input) {
TF_DeleteTensor(T);
@@ -190,11 +405,11 @@ TFModelEvaluatorImpl::~TFModelEvaluatorImpl() {
errs() << "Could not delete TF session";
}
-bool TFModelEvaluatorImpl::checkReportAndInvalidate(const TF_Output &Output,
- StringRef Name) {
+bool TFModelEvaluatorImpl::checkReportAndInvalidate(
+ const TF_Output &Output, const TensorSpec &OutputSpec) {
if (Output.oper)
return true;
- errs() << "Could not find TF_Output named: " + Name;
+ errs() << "Could not find TF_Output named: " + OutputSpec.name();
IsValid = false;
return IsValid;
}
@@ -236,54 +451,55 @@ TFModelEvaluator::EvaluationResult::EvaluationResult(
TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other)
: Impl(std::move(Other.Impl)) {}
-void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) {
- return TF_TensorData(Impl->getOutput()[Index]);
+TFModelEvaluator::EvaluationResult &
+TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) {
+ Impl = std::move(Other.Impl);
+ return *this;
}
-void TFModelEvaluator::initInput(size_t Index, int TypeIndex,
- const std::vector<int64_t> &Dimensions) {
- Impl->initInput(Index, static_cast<TF_DataType>(TypeIndex), Dimensions);
+void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) {
+ return TF_TensorData(Impl->getOutput()[Index]);
}
-template <> int TFModelEvaluator::getModelTypeIndex<float>() {
- return TF_FLOAT;
+const void *
+TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const {
+ return TF_TensorData(Impl->getOutput()[Index]);
}
-template <> int TFModelEvaluator::getModelTypeIndex<double>() {
- return TF_DOUBLE;
-}
+#define TFUTILS_GETDATATYPE_IMPL(T, E) \
+ template <> int TensorSpec::getDataType<T>() { return E; }
-template <> int TFModelEvaluator::getModelTypeIndex<int8_t>() {
- return TF_INT8;
-}
+TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)
-template <> int TFModelEvaluator::getModelTypeIndex<uint8_t>() {
- return TF_UINT8;
-}
+#undef TFUTILS_GETDATATYPE_IMPL
-template <> int TFModelEvaluator::getModelTypeIndex<int16_t>() {
- return TF_INT16;
-}
-
-template <> int TFModelEvaluator::getModelTypeIndex<uint16_t>() {
- return TF_UINT16;
-}
+TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
+TFModelEvaluator::~TFModelEvaluator() {}
-template <> int TFModelEvaluator::getModelTypeIndex<int32_t>() {
- return TF_INT32;
-}
+void Logger::print(raw_ostream &OS) {
+ if (RawLogData.empty())
+ return;
+ if (RawLogData[0].empty())
+ return;
+ size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() *
+ FeatureSpecs[0].Spec.getElementByteSize();
+ size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size;
+ if (NumberOfRecords == 0)
+ return;
+ size_t RewardSize =
+ RewardSpec.getElementCount() * RewardSpec.getElementByteSize();
+ size_t NumberOfRewards = RawLogData.back().size() / RewardSize;
-template <> int TFModelEvaluator::getModelTypeIndex<uint32_t>() {
- return TF_UINT32;
-}
+ OS << "feature_lists: {\n";
+ for (size_t I = 0; I < FeatureSpecs.size(); ++I)
+ writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(),
+ NumberOfRecords);
-template <> int TFModelEvaluator::getModelTypeIndex<int64_t>() {
- return TF_INT64;
-}
+ if (IncludeReward)
+ writeRawTensorsAsFeatureLists(OS, {RewardSpec, None},
+ RawLogData.back().data(), NumberOfRecords,
+ NumberOfRewards == 1);
-template <> int TFModelEvaluator::getModelTypeIndex<uint64_t>() {
- return TF_UINT64;
+ OS << "}\n";
}
-
-TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
-TFModelEvaluator::~TFModelEvaluator() {}
+#endif // defined(LLVM_HAVE_TF_API)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 60cfb04634c4..a4de21a2541e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -24,6 +24,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
"No vector functions library"),
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
"Accelerate framework"),
+ clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86",
+ "GLIBC Vector Math library"),
clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
"IBM MASS vector library"),
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
@@ -549,6 +551,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_nvvm_reflect);
}
+ // These vec_malloc/free routines are only available on AIX.
+ if (!T.isOSAIX()) {
+ TLI.setUnavailable(LibFunc_vec_calloc);
+ TLI.setUnavailable(LibFunc_vec_malloc);
+ TLI.setUnavailable(LibFunc_vec_realloc);
+ TLI.setUnavailable(LibFunc_vec_free);
+ }
+
TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
}
@@ -636,7 +646,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
const DataLayout *DL) const {
LLVMContext &Ctx = FTy.getContext();
Type *PCharTy = Type::getInt8PtrTy(Ctx);
- Type *SizeTTy = DL ? DL->getIntPtrType(Ctx, /*AS=*/0) : nullptr;
+ Type *SizeTTy = DL ? DL->getIntPtrType(Ctx, /*AddressSpace=*/0) : nullptr;
auto IsSizeTTy = [SizeTTy](Type *Ty) {
return SizeTTy ? Ty == SizeTTy : Ty->isIntegerTy();
};
@@ -829,6 +839,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_system:
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
case LibFunc_malloc:
+ case LibFunc_vec_malloc:
return (NumParams == 1 && FTy.getReturnType()->isPointerTy());
case LibFunc_memcmp:
return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
@@ -847,6 +858,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
case LibFunc_memcpy_chk:
+ case LibFunc_mempcpy_chk:
case LibFunc_memmove_chk:
--NumParams;
if (!IsSizeTTy(FTy.getParamType(NumParams)))
@@ -882,6 +894,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (FTy.getReturnType()->isPointerTy());
case LibFunc_realloc:
case LibFunc_reallocf:
+ case LibFunc_vec_realloc:
return (NumParams == 2 && FTy.getReturnType() == PCharTy &&
FTy.getParamType(0) == FTy.getReturnType() &&
IsSizeTTy(FTy.getParamType(1)));
@@ -909,6 +922,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_bzero:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
case LibFunc_calloc:
+ case LibFunc_vec_calloc:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy());
case LibFunc_atof:
@@ -959,6 +973,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_mkdir:
case LibFunc_mktime:
case LibFunc_times:
+ case LibFunc_vec_free:
return (NumParams != 0 && FTy.getParamType(0)->isPointerTy());
case LibFunc_fopen:
@@ -1228,6 +1243,15 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_ZdaPvmSt11align_val_t:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy());
+ // void __atomic_load(size_t, void *, void *, int)
+ case LibFunc_atomic_load:
+ // void __atomic_store(size_t, void *, void *, int)
+ case LibFunc_atomic_store:
+ return (NumParams == 4 && FTy.getParamType(0)->isIntegerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getParamType(2)->isPointerTy() &&
+ FTy.getParamType(3)->isIntegerTy());
+
case LibFunc_memset_pattern16:
return (!FTy.isVarArg() && NumParams == 3 &&
FTy.getParamType(0)->isPointerTy() &&
@@ -1531,10 +1555,10 @@ static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
}
void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
- VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end());
+ llvm::append_range(VectorDescs, Fns);
llvm::sort(VectorDescs, compareByScalarFnName);
- ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end());
+ llvm::append_range(ScalarDescs, Fns);
llvm::sort(ScalarDescs, compareByVectorFnName);
}
@@ -1549,6 +1573,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
addVectorizableFunctions(VecFuncs);
break;
}
+ case LIBMVEC_X86: {
+ const VecDesc VecFuncs[] = {
+ #define TLI_DEFINE_LIBMVEC_X86_VECFUNCS
+ #include "llvm/Analysis/VecFuncs.def"
+ };
+ addVectorizableFunctions(VecFuncs);
+ break;
+ }
case MASSV: {
const VecDesc VecFuncs[] = {
#define TLI_DEFINE_MASSV_VECFUNCS
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
index 2f051e53790b..e498401eb8b5 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -71,6 +71,7 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();
+ Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
FunctionType *FTy =
CI.getCalledFunction()->getFunctionType();
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
@@ -78,9 +79,10 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
const CallBase &CI,
- unsigned Factor) :
- RetTy(CI.getType()), IID(Id), VF(Factor) {
+ ElementCount Factor)
+ : RetTy(CI.getType()), IID(Id), VF(Factor) {
+ assert(!Factor.isScalable() && "Scalable vectors are not yet supported");
if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();
@@ -92,9 +94,9 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
const CallBase &CI,
- unsigned Factor,
- unsigned ScalarCost) :
- RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
+ ElementCount Factor,
+ unsigned ScalarCost)
+ : RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();
@@ -215,7 +217,11 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
// Note that this block may not be the loop latch block, even if the loop
// has a latch block.
ExitBlock = BB;
- ExitCount = EC;
+ TripCount = SE.getAddExpr(EC, SE.getOne(EC->getType()));
+
+ if (!EC->getType()->isPointerTy() && EC->getType() != CountType)
+ TripCount = SE.getZeroExtendExpr(TripCount, CountType);
+
break;
}
@@ -241,6 +247,11 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
return TTIImpl->getInliningThresholdMultiplier();
}
+unsigned
+TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
+ return TTIImpl->adjustInliningThreshold(CB);
+}
+
int TargetTransformInfo::getInlinerVectorBonusPercent() const {
return TTIImpl->getInlinerVectorBonusPercent();
}
@@ -296,6 +307,10 @@ bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
}
+unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
+ return TTIImpl->getAssumedAddrSpace(V);
+}
+
Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
IntrinsicInst *II, Value *OldV, Value *NewV) const {
return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
@@ -322,6 +337,29 @@ bool TargetTransformInfo::emitGetActiveLaneMask() const {
return TTIImpl->emitGetActiveLaneMask();
}
+Optional<Instruction *>
+TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const {
+ return TTIImpl->instCombineIntrinsic(IC, II);
+}
+
+Optional<Value *> TargetTransformInfo::simplifyDemandedUseBitsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) const {
+ return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
+ KnownBitsComputed);
+}
+
+Optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const {
+ return TTIImpl->simplifyDemandedVectorEltsIntrinsic(
+ IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+ SimplifyAndSetOp);
+}
+
void TargetTransformInfo::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
return TTIImpl->getUnrollingPreferences(L, SE, UP);
@@ -353,6 +391,10 @@ bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
return TTIImpl->isLSRCostLess(C1, C2);
}
+bool TargetTransformInfo::isNumRegsMajorCostOfLSR() const {
+ return TTIImpl->isNumRegsMajorCostOfLSR();
+}
+
bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const {
return TTIImpl->isProfitableLSRChainElement(I);
}
@@ -454,6 +496,10 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
return TTIImpl->isTypeLegal(Ty);
}
+unsigned TargetTransformInfo::getRegUsageForType(Type *Ty) const {
+ return TTIImpl->getRegUsageForType(Ty);
+}
+
bool TargetTransformInfo::shouldBuildLookupTables() const {
return TTIImpl->shouldBuildLookupTables();
}
@@ -547,11 +593,11 @@ int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty,
return Cost;
}
-int
-TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) const {
- int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
+int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) const {
+ int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -586,6 +632,10 @@ unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
return TTIImpl->getMinVectorRegisterBitWidth();
}
+Optional<unsigned> TargetTransformInfo::getMaxVScale() const {
+ return TTIImpl->getMaxVScale();
+}
+
bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const {
return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
}
@@ -594,6 +644,11 @@ unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
return TTIImpl->getMinimumVF(ElemWidth);
}
+unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth,
+ unsigned Opcode) const {
+ return TTIImpl->getMaximumVF(ElemWidth, Opcode);
+}
+
bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
return TTIImpl->shouldConsiderAddressTypePromotion(
@@ -707,12 +762,57 @@ int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty,
return Cost;
}
+TTI::CastContextHint
+TargetTransformInfo::getCastContextHint(const Instruction *I) {
+ if (!I)
+ return CastContextHint::None;
+
+ auto getLoadStoreKind = [](const Value *V, unsigned LdStOp, unsigned MaskedOp,
+ unsigned GatScatOp) {
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return CastContextHint::None;
+
+ if (I->getOpcode() == LdStOp)
+ return CastContextHint::Normal;
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == MaskedOp)
+ return TTI::CastContextHint::Masked;
+ if (II->getIntrinsicID() == GatScatOp)
+ return TTI::CastContextHint::GatherScatter;
+ }
+
+ return TTI::CastContextHint::None;
+ };
+
+ switch (I->getOpcode()) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPExt:
+ return getLoadStoreKind(I->getOperand(0), Instruction::Load,
+ Intrinsic::masked_load, Intrinsic::masked_gather);
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ if (I->hasOneUse())
+ return getLoadStoreKind(*I->user_begin(), Instruction::Store,
+ Intrinsic::masked_store,
+ Intrinsic::masked_scatter);
+ break;
+ default:
+ return CastContextHint::None;
+ }
+
+ return TTI::CastContextHint::None;
+}
+
int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction.");
- int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -734,11 +834,13 @@ int TargetTransformInfo::getCFInstrCost(unsigned Opcode,
int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction.");
- int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ int Cost =
+ TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -846,6 +948,13 @@ int TargetTransformInfo::getMinMaxReductionCost(
return Cost;
}
+InstructionCost TargetTransformInfo::getExtendedAddReductionCost(
+ bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ TTI::TargetCostKind CostKind) const {
+ return TTIImpl->getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
+ CostKind);
+}
+
unsigned
TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
@@ -945,6 +1054,16 @@ bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, Type *Ty,
return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
}
+bool TargetTransformInfo::preferInLoopReduction(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const {
+ return TTIImpl->preferInLoopReduction(Opcode, Ty, Flags);
+}
+
+bool TargetTransformInfo::preferPredicatedReductionSelect(
+ unsigned Opcode, Type *Ty, ReductionFlags Flags) const {
+ return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags);
+}
+
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);
}
@@ -953,6 +1072,10 @@ unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
return TTIImpl->getGISelRematGlobalCost();
}
+bool TargetTransformInfo::supportsScalableVectors() const {
+ return TTIImpl->supportsScalableVectors();
+}
+
int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
return TTIImpl->getInstructionLatency(I);
}
@@ -966,7 +1089,8 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
else if (!SI)
return false;
- SmallVector<int, 32> Mask(SI->getType()->getNumElements(), -1);
+ SmallVector<int, 32> Mask(
+ cast<FixedVectorType>(SI->getType())->getNumElements(), -1);
// Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
// we look at the left or right side.
@@ -1105,7 +1229,7 @@ TTI::ReductionKind TTI::matchPairwiseReduction(
if (!RD)
return TTI::RK_None;
- auto *VecTy = cast<VectorType>(RdxStart->getType());
+ auto *VecTy = cast<FixedVectorType>(RdxStart->getType());
unsigned NumVecElems = VecTy->getNumElements();
if (!isPowerOf2_32(NumVecElems))
return TTI::RK_None;
@@ -1170,7 +1294,7 @@ TTI::ReductionKind TTI::matchVectorSplittingReduction(
if (!RD)
return TTI::RK_None;
- auto *VecTy = cast<VectorType>(ReduxRoot->getOperand(0)->getType());
+ auto *VecTy = cast<FixedVectorType>(ReduxRoot->getOperand(0)->getType());
unsigned NumVecElems = VecTy->getNumElements();
if (!isPowerOf2_32(NumVecElems))
return TTI::RK_None;
@@ -1229,6 +1353,18 @@ TTI::ReductionKind TTI::matchVectorSplittingReduction(
return RD->Kind;
}
+TTI::ReductionKind
+TTI::matchVectorReduction(const ExtractElementInst *Root, unsigned &Opcode,
+ VectorType *&Ty, bool &IsPairwise) {
+ TTI::ReductionKind RdxKind = matchVectorSplittingReduction(Root, Opcode, Ty);
+ if (RdxKind != TTI::ReductionKind::RK_None) {
+ IsPairwise = false;
+ return RdxKind;
+ }
+ IsPairwise = true;
+ return matchPairwiseReduction(Root, Opcode, Ty);
+}
+
int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index da4520066b46..268acb682cf1 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -111,6 +111,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -736,3 +737,84 @@ bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
+
+MDNode *AAMDNodes::ShiftTBAA(MDNode *MD, size_t Offset) {
+ // Fast path if there's no offset
+ if (Offset == 0)
+ return MD;
+ // Fast path if there's no path tbaa node (and thus scalar)
+ if (!isStructPathTBAA(MD))
+ return MD;
+
+ TBAAStructTagNode Tag(MD);
+ SmallVector<Metadata *, 5> Sub;
+ Sub.push_back(MD->getOperand(0));
+ Sub.push_back(MD->getOperand(1));
+ ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(2));
+
+ if (Tag.isNewFormat()) {
+ ConstantInt *InnerSize = mdconst::extract<ConstantInt>(MD->getOperand(3));
+
+ if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) {
+ return nullptr;
+ }
+
+ uint64_t NewSize = InnerSize->getZExtValue();
+ uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
+ if (InnerOffset->getZExtValue() < Offset) {
+ NewOffset = 0;
+ NewSize -= Offset - InnerOffset->getZExtValue();
+ }
+
+ Sub.push_back(ConstantAsMetadata::get(
+ ConstantInt::get(InnerOffset->getType(), NewOffset)));
+
+ Sub.push_back(ConstantAsMetadata::get(
+ ConstantInt::get(InnerSize->getType(), NewSize)));
+
+ // immutable type
+ if (MD->getNumOperands() >= 5)
+ Sub.push_back(MD->getOperand(4));
+ } else {
+ if (InnerOffset->getZExtValue() < Offset)
+ return nullptr;
+
+ Sub.push_back(ConstantAsMetadata::get(ConstantInt::get(
+ InnerOffset->getType(), InnerOffset->getZExtValue() - Offset)));
+
+ // immutable type
+ if (MD->getNumOperands() >= 4)
+ Sub.push_back(MD->getOperand(3));
+ }
+ return MDNode::get(MD->getContext(), Sub);
+}
+
+MDNode *AAMDNodes::ShiftTBAAStruct(MDNode *MD, size_t Offset) {
+ // Fast path if there's no offset
+ if (Offset == 0)
+ return MD;
+ SmallVector<Metadata *, 3> Sub;
+ for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) {
+ ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(i));
+ ConstantInt *InnerSize =
+ mdconst::extract<ConstantInt>(MD->getOperand(i + 1));
+ // Don't include any triples that aren't in bounds
+ if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset)
+ continue;
+
+ uint64_t NewSize = InnerSize->getZExtValue();
+ uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
+ if (InnerOffset->getZExtValue() < Offset) {
+ NewOffset = 0;
+ NewSize -= Offset - InnerOffset->getZExtValue();
+ }
+
+ // Shift the offset of the triple
+ Sub.push_back(ConstantAsMetadata::get(
+ ConstantInt::get(InnerOffset->getType(), NewOffset)));
+ Sub.push_back(ConstantAsMetadata::get(
+ ConstantInt::get(InnerSize->getType(), NewSize)));
+ Sub.push_back(MD->getOperand(i + 2));
+ }
+ return MDNode::get(MD->getContext(), Sub);
+} \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
index 0192a216b2f7..faa46537ad17 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
@@ -290,9 +290,9 @@ bool verifyAllVectorsHaveSameWidth(FunctionType *Signature) {
assert(VecTys.size() > 1 && "Invalid number of elements.");
const ElementCount EC = VecTys[0]->getElementCount();
- return llvm::all_of(
- llvm::make_range(VecTys.begin() + 1, VecTys.end()),
- [&EC](VectorType *VTy) { return (EC == VTy->getElementCount()); });
+ return llvm::all_of(llvm::drop_begin(VecTys), [&EC](VectorType *VTy) {
+ return (EC == VTy->getElementCount());
+ });
}
#endif // NDEBUG
@@ -310,7 +310,7 @@ ElementCount getECFromSignature(FunctionType *Signature) {
if (auto *VTy = dyn_cast<VectorType>(Ty))
return VTy->getElementCount();
- return ElementCount(/*Min=*/1, /*Scalable=*/false);
+ return ElementCount::getFixed(/*Min=*/1);
}
} // namespace
@@ -442,7 +442,7 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName,
if (!F)
return None;
const ElementCount EC = getECFromSignature(F->getFunctionType());
- VF = EC.Min;
+ VF = EC.getKnownMinValue();
}
// Sanity checks.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
index 43caaa62c2ec..75486d3c80e7 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
@@ -77,8 +77,6 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-const unsigned MaxDepth = 6;
-
// Controls the number of uses of the value searched for possible
// dominating comparisons.
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
@@ -117,7 +115,7 @@ struct Query {
/// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
/// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
/// (all of which can call computeKnownBits), and so on.
- std::array<const Value *, MaxDepth> Excluded;
+ std::array<const Value *, MaxAnalysisRecursionDepth> Excluded;
/// If true, it is safe to use metadata during simplification.
InstrInfoQuery IIQ;
@@ -172,8 +170,8 @@ static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
return false;
int NumElts =
- cast<VectorType>(Shuf->getOperand(0)->getType())->getNumElements();
- int NumMaskElts = Shuf->getType()->getNumElements();
+ cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
+ int NumMaskElts = cast<FixedVectorType>(Shuf->getType())->getNumElements();
DemandedLHS = DemandedRHS = APInt::getNullValue(NumElts);
if (DemandedElts.isNullValue())
return true;
@@ -352,13 +350,14 @@ bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
return Known.isNegative();
}
-static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
+static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
+ const Query &Q);
bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
const DataLayout &DL, AssumptionCache *AC,
const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
- return ::isKnownNonEqual(V1, V2,
+ return ::isKnownNonEqual(V1, V2, 0,
Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT,
UseInstrInfo, /*ORE=*/nullptr));
}
@@ -417,7 +416,6 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
const APInt &DemandedElts, KnownBits &Known,
KnownBits &Known2, unsigned Depth,
const Query &Q) {
- unsigned BitWidth = Known.getBitWidth();
computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
@@ -435,89 +433,18 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
bool isKnownNegativeOp0 = Known2.isNegative();
// The product of two numbers with the same sign is non-negative.
isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
- (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
+ (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
// The product of a negative number and a non-negative number is either
// negative or zero.
if (!isKnownNonNegative)
- isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
- isKnownNonZero(Op0, Depth, Q)) ||
- (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
- isKnownNonZero(Op1, Depth, Q));
+ isKnownNegative =
+ (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
+ Known2.isNonZero()) ||
+ (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
}
}
- assert(!Known.hasConflict() && !Known2.hasConflict());
- // Compute a conservative estimate for high known-0 bits.
- unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
- Known2.countMinLeadingZeros(),
- BitWidth) - BitWidth;
- LeadZ = std::min(LeadZ, BitWidth);
-
- // The result of the bottom bits of an integer multiply can be
- // inferred by looking at the bottom bits of both operands and
- // multiplying them together.
- // We can infer at least the minimum number of known trailing bits
- // of both operands. Depending on number of trailing zeros, we can
- // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming
- // a and b are divisible by m and n respectively.
- // We then calculate how many of those bits are inferrable and set
- // the output. For example, the i8 mul:
- // a = XXXX1100 (12)
- // b = XXXX1110 (14)
- // We know the bottom 3 bits are zero since the first can be divided by
- // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4).
- // Applying the multiplication to the trimmed arguments gets:
- // XX11 (3)
- // X111 (7)
- // -------
- // XX11
- // XX11
- // XX11
- // XX11
- // -------
- // XXXXX01
- // Which allows us to infer the 2 LSBs. Since we're multiplying the result
- // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits.
- // The proof for this can be described as:
- // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) &&
- // (C7 == (1 << (umin(countTrailingZeros(C1), C5) +
- // umin(countTrailingZeros(C2), C6) +
- // umin(C5 - umin(countTrailingZeros(C1), C5),
- // C6 - umin(countTrailingZeros(C2), C6)))) - 1)
- // %aa = shl i8 %a, C5
- // %bb = shl i8 %b, C6
- // %aaa = or i8 %aa, C1
- // %bbb = or i8 %bb, C2
- // %mul = mul i8 %aaa, %bbb
- // %mask = and i8 %mul, C7
- // =>
- // %mask = i8 ((C1*C2)&C7)
- // Where C5, C6 describe the known bits of %a, %b
- // C1, C2 describe the known bottom bits of %a, %b.
- // C7 describes the mask of the known bits of the result.
- APInt Bottom0 = Known.One;
- APInt Bottom1 = Known2.One;
-
- // How many times we'd be able to divide each argument by 2 (shr by 1).
- // This gives us the number of trailing zeros on the multiplication result.
- unsigned TrailBitsKnown0 = (Known.Zero | Known.One).countTrailingOnes();
- unsigned TrailBitsKnown1 = (Known2.Zero | Known2.One).countTrailingOnes();
- unsigned TrailZero0 = Known.countMinTrailingZeros();
- unsigned TrailZero1 = Known2.countMinTrailingZeros();
- unsigned TrailZ = TrailZero0 + TrailZero1;
-
- // Figure out the fewest known-bits operand.
- unsigned SmallestOperand = std::min(TrailBitsKnown0 - TrailZero0,
- TrailBitsKnown1 - TrailZero1);
- unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth);
-
- APInt BottomKnown = Bottom0.getLoBits(TrailBitsKnown0) *
- Bottom1.getLoBits(TrailBitsKnown1);
-
- Known.resetAll();
- Known.Zero.setHighBits(LeadZ);
- Known.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown);
- Known.One |= BottomKnown.getLoBits(ResultBitsKnown);
+ Known = KnownBits::computeForMul(Known, Known2);
// Only make use of no-wrap flags if we failed to compute the sign bit
// directly. This matters if the multiplication always overflows, in
@@ -549,10 +476,10 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
// The first CommonPrefixBits of all values in Range are equal.
unsigned CommonPrefixBits =
(Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
-
APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
- Known.One &= Range.getUnsignedMax() & Mask;
- Known.Zero &= ~Range.getUnsignedMax() & Mask;
+ APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth);
+ Known.One &= UnsignedMax & Mask;
+ Known.Zero &= ~UnsignedMax & Mask;
}
}
@@ -582,9 +509,7 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
if (V == I || isSafeToSpeculativelyExecute(V)) {
EphValues.insert(V);
if (const User *U = dyn_cast<User>(V))
- for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
- J != JE; ++J)
- WorkSet.push_back(*J);
+ append_range(WorkSet, U->operands());
}
}
}
@@ -601,6 +526,7 @@ bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
// FIXME: This list is repeated from NoTTI::getIntrinsicCost.
case Intrinsic::assume:
case Intrinsic::sideeffect:
+ case Intrinsic::pseudoprobe:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_label:
@@ -608,6 +534,7 @@ bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
case Intrinsic::invariant_end:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::objectsize:
case Intrinsic::ptr_annotation:
case Intrinsic::var_annotation:
@@ -662,41 +589,30 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
return false;
}
+static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
+ // v u> y implies v != 0.
+ if (Pred == ICmpInst::ICMP_UGT)
+ return true;
+
+ // Special-case v != 0 to also handle v != null.
+ if (Pred == ICmpInst::ICMP_NE)
+ return match(RHS, m_Zero());
+
+ // All other predicates - rely on generic ConstantRange handling.
+ const APInt *C;
+ if (!match(RHS, m_APInt(C)))
+ return false;
+
+ ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
+ return !TrueValues.contains(APInt::getNullValue(C->getBitWidth()));
+}
+
static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
// Use of assumptions is context-sensitive. If we don't have a context, we
// cannot use them!
if (!Q.AC || !Q.CxtI)
return false;
- // Note that the patterns below need to be kept in sync with the code
- // in AssumptionCache::updateAffectedValues.
-
- auto CmpExcludesZero = [V](ICmpInst *Cmp) {
- auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
-
- Value *RHS;
- CmpInst::Predicate Pred;
- if (!match(Cmp, m_c_ICmp(Pred, m_V, m_Value(RHS))))
- return false;
- // assume(v u> y) -> assume(v != 0)
- if (Pred == ICmpInst::ICMP_UGT)
- return true;
-
- // assume(v != 0)
- // We special-case this one to ensure that we handle `assume(v != null)`.
- if (Pred == ICmpInst::ICMP_NE)
- return match(RHS, m_Zero());
-
- // All other predicates - rely on generic ConstantRange handling.
- ConstantInt *CI;
- if (!match(RHS, m_ConstantInt(CI)))
- return false;
- ConstantRange RHSRange(CI->getValue());
- ConstantRange TrueValues =
- ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
- return !TrueValues.contains(APInt::getNullValue(CI->getBitWidth()));
- };
-
if (Q.CxtI && V->getType()->isPointerTy()) {
SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NonNull};
if (!NullPointerIsDefined(Q.CxtI->getFunction(),
@@ -723,12 +639,13 @@ static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
"must be an assume intrinsic");
- Value *Arg = I->getArgOperand(0);
- ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
- if (!Cmp)
- continue;
+ Value *RHS;
+ CmpInst::Predicate Pred;
+ auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
+ if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
+ return false;
- if (CmpExcludesZero(Cmp) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
+ if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
return true;
}
@@ -744,6 +661,14 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
unsigned BitWidth = Known.getBitWidth();
+ // Refine Known set if the pointer alignment is set by assume bundles.
+ if (V->getType()->isPointerTy()) {
+ if (RetainedKnowledge RK = getKnowledgeValidInContext(
+ V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) {
+ Known.Zero.setLowBits(Log2_32(RK.ArgValue));
+ }
+ }
+
// Note that the patterns below need to be kept in sync with the code
// in AssumptionCache::updateAffectedValues.
@@ -778,7 +703,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
}
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth == MaxDepth)
+ if (Depth == MaxAnalysisRecursionDepth)
continue;
ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
@@ -1044,26 +969,31 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
/// Compute known bits from a shift operator, including those with a
/// non-constant shift amount. Known is the output of this function. Known2 is a
-/// pre-allocated temporary with the same bit width as Known. KZF and KOF are
-/// operator-specific functions that, given the known-zero or known-one bits
-/// respectively, and a shift amount, compute the implied known-zero or
-/// known-one bits of the shift operator's result respectively for that shift
-/// amount. The results from calling KZF and KOF are conservatively combined for
-/// all permitted shift amounts.
+/// pre-allocated temporary with the same bit width as Known and on return
+/// contains the known bit of the shift value source. KF is an
+/// operator-specific function that, given the known-bits and a shift amount,
+/// compute the implied known-bits of the shift operator's result respectively
+/// for that shift amount. The results from calling KF are conservatively
+/// combined for all permitted shift amounts.
static void computeKnownBitsFromShiftOperator(
const Operator *I, const APInt &DemandedElts, KnownBits &Known,
KnownBits &Known2, unsigned Depth, const Query &Q,
- function_ref<APInt(const APInt &, unsigned)> KZF,
- function_ref<APInt(const APInt &, unsigned)> KOF) {
+ function_ref<KnownBits(const KnownBits &, const KnownBits &)> KF) {
unsigned BitWidth = Known.getBitWidth();
-
+ computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
- if (Known.isConstant()) {
- unsigned ShiftAmt = Known.getConstant().getLimitedValue(BitWidth - 1);
- computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
- Known.Zero = KZF(Known.Zero, ShiftAmt);
- Known.One = KOF(Known.One, ShiftAmt);
+ // Note: We cannot use Known.Zero.getLimitedValue() here, because if
+ // BitWidth > 64 and any upper bits are known, we'll end up returning the
+ // limit value (which implies all bits are known).
+ uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
+ uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
+ bool ShiftAmtIsConstant = Known.isConstant();
+ bool MaxShiftAmtIsOutOfRange = Known.getMaxValue().uge(BitWidth);
+
+ if (ShiftAmtIsConstant) {
+ Known = KF(Known2, Known);
+
// If the known bits conflict, this must be an overflowing left shift, so
// the shift result is poison. We can return anything we want. Choose 0 for
// the best folding opportunity.
@@ -1077,17 +1007,11 @@ static void computeKnownBitsFromShiftOperator(
// LHS, the value could be poison, but bail out because the check below is
// expensive.
// TODO: Should we just carry on?
- if (Known.getMaxValue().uge(BitWidth)) {
+ if (MaxShiftAmtIsOutOfRange) {
Known.resetAll();
return;
}
- // Note: We cannot use Known.Zero.getLimitedValue() here, because if
- // BitWidth > 64 and any upper bits are known, we'll end up returning the
- // limit value (which implies all bits are known).
- uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
- uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
-
// It would be more-clearly correct to use the two temporaries for this
// calculation. Reusing the APInts here to prevent unnecessary allocations.
Known.resetAll();
@@ -1106,8 +1030,6 @@ static void computeKnownBitsFromShiftOperator(
return;
}
- computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
-
Known.Zero.setAllBits();
Known.One.setAllBits();
for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
@@ -1128,8 +1050,8 @@ static void computeKnownBitsFromShiftOperator(
continue;
}
- Known.Zero &= KZF(Known2.Zero, ShiftAmt);
- Known.One &= KOF(Known2.One, ShiftAmt);
+ Known = KnownBits::commonBits(
+ Known, KF(Known2, KnownBits::makeConstant(APInt(32, ShiftAmt))));
}
// If the known bits conflict, the result is poison. Return a 0 and hope the
@@ -1193,19 +1115,9 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
case Instruction::UDiv: {
- // For the purposes of computing leading zeros we can conservatively
- // treat a udiv as a logical right shift by the power of 2 known to
- // be less than the denominator.
- computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
- unsigned LeadZ = Known2.countMinLeadingZeros();
-
- Known2.resetAll();
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
- unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
- if (RHSMaxLeadingZeros != BitWidth)
- LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
-
- Known.Zero.setHighBits(LeadZ);
+ Known = KnownBits::udiv(Known, Known2);
break;
}
case Instruction::Select: {
@@ -1214,59 +1126,40 @@ static void computeKnownBitsFromOperator(const Operator *I,
if (SelectPatternResult::isMinOrMax(SPF)) {
computeKnownBits(RHS, Known, Depth + 1, Q);
computeKnownBits(LHS, Known2, Depth + 1, Q);
- } else {
- computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
- computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ switch (SPF) {
+ default:
+ llvm_unreachable("Unhandled select pattern flavor!");
+ case SPF_SMAX:
+ Known = KnownBits::smax(Known, Known2);
+ break;
+ case SPF_SMIN:
+ Known = KnownBits::smin(Known, Known2);
+ break;
+ case SPF_UMAX:
+ Known = KnownBits::umax(Known, Known2);
+ break;
+ case SPF_UMIN:
+ Known = KnownBits::umin(Known, Known2);
+ break;
+ }
+ break;
}
- unsigned MaxHighOnes = 0;
- unsigned MaxHighZeros = 0;
- if (SPF == SPF_SMAX) {
- // If both sides are negative, the result is negative.
- if (Known.isNegative() && Known2.isNegative())
- // We can derive a lower bound on the result by taking the max of the
- // leading one bits.
- MaxHighOnes =
- std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
- // If either side is non-negative, the result is non-negative.
- else if (Known.isNonNegative() || Known2.isNonNegative())
- MaxHighZeros = 1;
- } else if (SPF == SPF_SMIN) {
- // If both sides are non-negative, the result is non-negative.
- if (Known.isNonNegative() && Known2.isNonNegative())
- // We can derive an upper bound on the result by taking the max of the
- // leading zero bits.
- MaxHighZeros = std::max(Known.countMinLeadingZeros(),
- Known2.countMinLeadingZeros());
- // If either side is negative, the result is negative.
- else if (Known.isNegative() || Known2.isNegative())
- MaxHighOnes = 1;
- } else if (SPF == SPF_UMAX) {
- // We can derive a lower bound on the result by taking the max of the
- // leading one bits.
- MaxHighOnes =
- std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
- } else if (SPF == SPF_UMIN) {
- // We can derive an upper bound on the result by taking the max of the
- // leading zero bits.
- MaxHighZeros =
- std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
- } else if (SPF == SPF_ABS) {
+ computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+
+ // Only known if known in both the LHS and RHS.
+ Known = KnownBits::commonBits(Known, Known2);
+
+ if (SPF == SPF_ABS) {
// RHS from matchSelectPattern returns the negation part of abs pattern.
// If the negate has an NSW flag we can assume the sign bit of the result
// will be 0 because that makes abs(INT_MIN) undefined.
if (match(RHS, m_Neg(m_Specific(LHS))) &&
Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
- MaxHighZeros = 1;
+ Known.Zero.setSignBit();
}
- // Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
- if (MaxHighOnes > 0)
- Known.One.setHighBits(MaxHighOnes);
- if (MaxHighZeros > 0)
- Known.Zero.setHighBits(MaxHighZeros);
break;
}
case Instruction::FPTrunc:
@@ -1321,58 +1214,37 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
case Instruction::Shl: {
- // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
- auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) {
- APInt KZResult = KnownZero << ShiftAmt;
- KZResult.setLowBits(ShiftAmt); // Low bits known 0.
+ auto KF = [NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
+ KnownBits Result = KnownBits::shl(KnownVal, KnownAmt);
// If this shift has "nsw" keyword, then the result is either a poison
// value or has the same sign bit as the first operand.
- if (NSW && KnownZero.isSignBitSet())
- KZResult.setSignBit();
- return KZResult;
- };
-
- auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) {
- APInt KOResult = KnownOne << ShiftAmt;
- if (NSW && KnownOne.isSignBitSet())
- KOResult.setSignBit();
- return KOResult;
+ if (NSW) {
+ if (KnownVal.Zero.isSignBitSet())
+ Result.Zero.setSignBit();
+ if (KnownVal.One.isSignBitSet())
+ Result.One.setSignBit();
+ }
+ return Result;
};
-
computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
- KZF, KOF);
+ KF);
break;
}
case Instruction::LShr: {
- // (lshr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
- APInt KZResult = KnownZero.lshr(ShiftAmt);
- // High bits known zero.
- KZResult.setHighBits(ShiftAmt);
- return KZResult;
+ auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
+ return KnownBits::lshr(KnownVal, KnownAmt);
};
-
- auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
- return KnownOne.lshr(ShiftAmt);
- };
-
computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
- KZF, KOF);
+ KF);
break;
}
case Instruction::AShr: {
- // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
- return KnownZero.ashr(ShiftAmt);
+ auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
+ return KnownBits::ashr(KnownVal, KnownAmt);
};
-
- auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
- return KnownOne.ashr(ShiftAmt);
- };
-
computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
- KZF, KOF);
+ KF);
break;
}
case Instruction::Sub: {
@@ -1388,86 +1260,45 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
case Instruction::SRem:
- if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
- APInt RA = Rem->getValue().abs();
- if (RA.isPowerOf2()) {
- APInt LowBits = RA - 1;
- computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
-
- // The low bits of the first operand are unchanged by the srem.
- Known.Zero = Known2.Zero & LowBits;
- Known.One = Known2.One & LowBits;
-
- // If the first operand is non-negative or has all low bits zero, then
- // the upper bits are all zero.
- if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
- Known.Zero |= ~LowBits;
-
- // If the first operand is negative and not all low bits are zero, then
- // the upper bits are all one.
- if (Known2.isNegative() && LowBits.intersects(Known2.One))
- Known.One |= ~LowBits;
-
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
- break;
- }
- }
-
- // The sign bit is the LHS's sign bit, except when the result of the
- // remainder is zero.
- computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
- // If it's known zero, our sign bit is also zero.
- if (Known2.isNonNegative())
- Known.makeNonNegative();
-
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ Known = KnownBits::srem(Known, Known2);
break;
- case Instruction::URem: {
- if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
- const APInt &RA = Rem->getValue();
- if (RA.isPowerOf2()) {
- APInt LowBits = (RA - 1);
- computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
- Known.Zero |= ~LowBits;
- Known.One &= LowBits;
- break;
- }
- }
- // Since the result is less than or equal to either operand, any leading
- // zero bits in either operand must also exist in the result.
+ case Instruction::URem:
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
-
- unsigned Leaders =
- std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
- Known.resetAll();
- Known.Zero.setHighBits(Leaders);
+ Known = KnownBits::urem(Known, Known2);
break;
- }
case Instruction::Alloca:
Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
break;
case Instruction::GetElementPtr: {
// Analyze all of the subscripts of this getelementptr instruction
// to determine if we can prove known low zero bits.
- KnownBits LocalKnown(BitWidth);
- computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
- unsigned TrailZ = LocalKnown.countMinTrailingZeros();
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ // Accumulate the constant indices in a separate variable
+ // to minimize the number of calls to computeForAddSub.
+ APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
gep_type_iterator GTI = gep_type_begin(I);
for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
// TrailZ can only become smaller, short-circuit if we hit zero.
- if (TrailZ == 0)
+ if (Known.isUnknown())
break;
Value *Index = I->getOperand(i);
+
+ // Handle case when index is zero.
+ Constant *CIndex = dyn_cast<Constant>(Index);
+ if (CIndex && CIndex->isZeroValue())
+ continue;
+
if (StructType *STy = GTI.getStructTypeOrNull()) {
// Handle struct member offset arithmetic.
- // Handle case when index is vector zeroinitializer
- Constant *CIndex = cast<Constant>(Index);
- if (CIndex->isZeroValue())
- continue;
+ assert(CIndex &&
+ "Access to structure field must be known at compile time");
if (CIndex->getType()->isVectorTy())
Index = CIndex->getSplatValue();
@@ -1475,26 +1306,56 @@ static void computeKnownBitsFromOperator(const Operator *I,
unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
const StructLayout *SL = Q.DL.getStructLayout(STy);
uint64_t Offset = SL->getElementOffset(Idx);
- TrailZ = std::min<unsigned>(TrailZ,
- countTrailingZeros(Offset));
+ AccConstIndices += Offset;
+ continue;
+ }
+
+ // Handle array index arithmetic.
+ Type *IndexedTy = GTI.getIndexedType();
+ if (!IndexedTy->isSized()) {
+ Known.resetAll();
+ break;
+ }
+
+ unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
+ KnownBits IndexBits(IndexBitWidth);
+ computeKnownBits(Index, IndexBits, Depth + 1, Q);
+ TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy);
+ uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize();
+ KnownBits ScalingFactor(IndexBitWidth);
+ // Multiply by current sizeof type.
+ // &A[i] == A + i * sizeof(*A[i]).
+ if (IndexTypeSize.isScalable()) {
+ // For scalable types the only thing we know about sizeof is
+ // that this is a multiple of the minimum size.
+ ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes));
+ } else if (IndexBits.isConstant()) {
+ APInt IndexConst = IndexBits.getConstant();
+ APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
+ IndexConst *= ScalingFactor;
+ AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
+ continue;
} else {
- // Handle array index arithmetic.
- Type *IndexedTy = GTI.getIndexedType();
- if (!IndexedTy->isSized()) {
- TrailZ = 0;
- break;
- }
- unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
- uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy).getKnownMinSize();
- LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0);
- computeKnownBits(Index, LocalKnown, Depth + 1, Q);
- TrailZ = std::min(TrailZ,
- unsigned(countTrailingZeros(TypeSize) +
- LocalKnown.countMinTrailingZeros()));
+ ScalingFactor =
+ KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes));
}
- }
+ IndexBits = KnownBits::computeForMul(IndexBits, ScalingFactor);
- Known.Zero.setLowBits(TrailZ);
+ // If the offsets have a different width from the pointer, according
+ // to the language reference we need to sign-extend or truncate them
+ // to the width of the pointer.
+ IndexBits = IndexBits.sextOrTrunc(BitWidth);
+
+ // Note that inbounds does *not* guarantee nsw for the addition, as only
+ // the offset is signed, while the base address is unsigned.
+ Known = KnownBits::computeForAddSub(
+ /*Add=*/true, /*NSW=*/false, Known, IndexBits);
+ }
+ if (!Known.isUnknown() && !AccConstIndices.isNullValue()) {
+ KnownBits Index = KnownBits::makeConstant(AccConstIndices);
+ Known = KnownBits::computeForAddSub(
+ /*Add=*/true, /*NSW=*/false, Known, Index);
+ }
break;
}
case Instruction::PHI: {
@@ -1593,7 +1454,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// Otherwise take the unions of the known bit sets of the operands,
// taking conservative care to avoid excessive recursion.
- if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) {
+ if (Depth < MaxAnalysisRecursionDepth - 1 && !Known.Zero && !Known.One) {
// Skip if every incoming value references to ourself.
if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
break;
@@ -1615,12 +1476,11 @@ static void computeKnownBitsFromOperator(const Operator *I,
Known2 = KnownBits(BitWidth);
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
- computeKnownBits(IncValue, Known2, MaxDepth - 1, RecQ);
- Known.Zero &= Known2.Zero;
- Known.One &= Known2.One;
+ computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ);
+ Known = KnownBits::commonBits(Known, Known2);
// If all bits have been ruled out, there's no need to check
// more operands.
- if (!Known.Zero && !Known.One)
+ if (Known.isUnknown())
break;
}
}
@@ -1642,6 +1502,12 @@ static void computeKnownBitsFromOperator(const Operator *I,
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::abs: {
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+ bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
+ Known = Known2.abs(IntMinIsPoison);
+ break;
+ }
case Intrinsic::bitreverse:
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
Known.Zero |= Known2.Zero.reverseBits();
@@ -1655,7 +1521,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
case Intrinsic::ctlz: {
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
// If we have a known 1, its position is our upper bound.
- unsigned PossibleLZ = Known2.One.countLeadingZeros();
+ unsigned PossibleLZ = Known2.countMaxLeadingZeros();
// If this call is undefined for 0, the result will be less than 2^n.
if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
@@ -1666,7 +1532,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
case Intrinsic::cttz: {
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
// If we have a known 1, its position is our upper bound.
- unsigned PossibleTZ = Known2.One.countTrailingZeros();
+ unsigned PossibleTZ = Known2.countMaxTrailingZeros();
// If this call is undefined for 0, the result will be less than 2^n.
if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
@@ -1737,6 +1603,26 @@ static void computeKnownBitsFromOperator(const Operator *I,
}
break;
}
+ case Intrinsic::umin:
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ Known = KnownBits::umin(Known, Known2);
+ break;
+ case Intrinsic::umax:
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ Known = KnownBits::umax(Known, Known2);
+ break;
+ case Intrinsic::smin:
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ Known = KnownBits::smin(Known, Known2);
+ break;
+ case Intrinsic::smax:
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ Known = KnownBits::smax(Known, Known2);
+ break;
case Intrinsic::x86_sse42_crc32_64_64:
Known.Zero.setBitsFrom(32);
break;
@@ -1769,8 +1655,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
if (!!DemandedRHS) {
const Value *RHS = Shuf->getOperand(1);
computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
@@ -1799,8 +1684,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
DemandedVecElts.clearBit(EltIdx);
if (!!DemandedVecElts) {
computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
@@ -1850,6 +1734,11 @@ static void computeKnownBitsFromOperator(const Operator *I,
}
}
break;
+ case Instruction::Freeze:
+ if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
+ Depth + 1))
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ break;
}
}
@@ -1895,7 +1784,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
}
assert(V && "No Value?");
- assert(Depth <= MaxDepth && "Limit Search Depth");
+ assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
#ifndef NDEBUG
Type *Ty = V->getType();
@@ -1926,8 +1815,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
const APInt *C;
if (match(V, m_APInt(C))) {
// We know all of the bits for a scalar constant or a splat vector constant!
- Known.One = *C;
- Known.Zero = ~Known.One;
+ Known = KnownBits::makeConstant(*C);
return;
}
// Null and aggregate-zero are all-zeros.
@@ -1982,9 +1870,8 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
// assumptions. Confirm that we've handled them all.
assert(!isa<ConstantData>(V) && "Unhandled constant data!");
- // Limit search depth.
// All recursive calls that increase depth must come after this.
- if (Depth == MaxDepth)
+ if (Depth == MaxAnalysisRecursionDepth)
return;
// A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
@@ -2001,7 +1888,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
// Aligned pointers have trailing zeros - refine Known.Zero set
if (isa<PointerType>(V->getType())) {
Align Alignment = V->getPointerAlignment(Q.DL);
- Known.Zero.setLowBits(countTrailingZeros(Alignment.value()));
+ Known.Zero.setLowBits(Log2(Alignment));
}
// computeKnownBitsFromAssume strictly refines Known.
@@ -2019,7 +1906,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
/// types and vectors of integers.
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
const Query &Q) {
- assert(Depth <= MaxDepth && "Limit Search Depth");
+ assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
// Attempt to match against constants.
if (OrZero && match(V, m_Power2OrZero()))
@@ -2038,7 +1925,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
return true;
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth++ == MaxDepth)
+ if (Depth++ == MaxAnalysisRecursionDepth)
return false;
Value *X = nullptr, *Y = nullptr;
@@ -2167,7 +2054,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
// to recurse 10k times just because we have 10k GEP operands. We don't
// bail completely out because we want to handle constant GEPs regardless
// of depth.
- if (Depth++ >= MaxDepth)
+ if (Depth++ >= MaxAnalysisRecursionDepth)
continue;
if (isKnownNonZero(GTI.getOperand(), Depth, Q))
@@ -2199,7 +2086,8 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
if (auto *CalledFunc = CB->getCalledFunction())
for (const Argument &Arg : CalledFunc->args())
if (CB->getArgOperand(Arg.getArgNo()) == V &&
- Arg.hasNonNullAttr() && DT->dominates(CB, CtxI))
+ Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) &&
+ DT->dominates(CB, CtxI))
return true;
// If the value is used as a load/store, then the pointer must be non null.
@@ -2212,10 +2100,17 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
}
// Consider only compare instructions uniquely controlling a branch
+ Value *RHS;
CmpInst::Predicate Pred;
- if (!match(const_cast<User *>(U),
- m_c_ICmp(Pred, m_Specific(V), m_Zero())) ||
- (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE))
+ if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
+ continue;
+
+ bool NonNullIfTrue;
+ if (cmpExcludesZero(Pred, RHS))
+ NonNullIfTrue = true;
+ else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS))
+ NonNullIfTrue = false;
+ else
continue;
SmallVector<const User *, 4> WorkList;
@@ -2232,24 +2127,23 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
// propagate "pred != null" condition through AND because it is only
// correct to assume that all conditions of AND are met in true branch.
// TODO: Support similar logic of OR and EQ predicate?
- if (Pred == ICmpInst::ICMP_NE)
- if (auto *BO = dyn_cast<BinaryOperator>(Curr))
- if (BO->getOpcode() == Instruction::And) {
- for (auto *BOU : BO->users())
- if (Visited.insert(BOU).second)
- WorkList.push_back(BOU);
- continue;
- }
+ if (NonNullIfTrue)
+ if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
+ for (auto *CurrU : Curr->users())
+ if (Visited.insert(CurrU).second)
+ WorkList.push_back(CurrU);
+ continue;
+ }
if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
assert(BI->isConditional() && "uses a comparison!");
BasicBlock *NonNullSuccessor =
- BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
+ BI->getSuccessor(NonNullIfTrue ? 0 : 1);
BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
return true;
- } else if (Pred == ICmpInst::ICMP_NE && isGuard(Curr) &&
+ } else if (NonNullIfTrue && isGuard(Curr) &&
DT->dominates(cast<Instruction>(Curr), CtxI)) {
return true;
}
@@ -2302,8 +2196,9 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
// See the comment for IntToPtr/PtrToInt instructions below.
if (CE->getOpcode() == Instruction::IntToPtr ||
CE->getOpcode() == Instruction::PtrToInt)
- if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) <=
- Q.DL.getTypeSizeInBits(CE->getType()))
+ if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType())
+ .getFixedSize() <=
+ Q.DL.getTypeSizeInBits(CE->getType()).getFixedSize())
return isKnownNonZero(CE->getOperand(0), Depth, Q);
}
@@ -2349,19 +2244,24 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
return true;
// Some of the tests below are recursive, so bail out if we hit the limit.
- if (Depth++ >= MaxDepth)
+ if (Depth++ >= MaxAnalysisRecursionDepth)
return false;
// Check for pointer simplifications.
- if (V->getType()->isPointerTy()) {
+
+ if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) {
// Alloca never returns null, malloc might.
if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0)
return true;
- // A byval, inalloca, or nonnull argument is never null.
- if (const Argument *A = dyn_cast<Argument>(V))
- if (A->hasPassPointeeByValueAttr() || A->hasNonNullAttr())
+ // A byval, inalloca may not be null in a non-default addres space. A
+ // nonnull argument is assumed never 0.
+ if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (((A->hasPassPointeeByValueCopyAttr() &&
+ !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
+ A->hasNonNullAttr()))
return true;
+ }
// A Load tagged with nonnull metadata is never null.
if (const LoadInst *LI = dyn_cast<LoadInst>(V))
@@ -2389,23 +2289,22 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
// truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
// as casts that can alter the value, e.g., AddrSpaceCasts.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- if (isGEPKnownNonNull(GEP, Depth, Q))
- return true;
+ return isGEPKnownNonNull(GEP, Depth, Q);
if (auto *BCO = dyn_cast<BitCastOperator>(V))
return isKnownNonZero(BCO->getOperand(0), Depth, Q);
if (auto *I2P = dyn_cast<IntToPtrInst>(V))
- if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()) <=
- Q.DL.getTypeSizeInBits(I2P->getDestTy()))
+ if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()).getFixedSize() <=
+ Q.DL.getTypeSizeInBits(I2P->getDestTy()).getFixedSize())
return isKnownNonZero(I2P->getOperand(0), Depth, Q);
}
// Similar to int2ptr above, we can look through ptr2int here if the cast
// is a no-op or an extend and not a truncate.
if (auto *P2I = dyn_cast<PtrToIntInst>(V))
- if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()) <=
- Q.DL.getTypeSizeInBits(P2I->getDestTy()))
+ if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()).getFixedSize() <=
+ Q.DL.getTypeSizeInBits(P2I->getDestTy()).getFixedSize())
return isKnownNonZero(P2I->getOperand(0), Depth, Q);
unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
@@ -2532,23 +2431,35 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
}
}
}
- // Check if all incoming values are non-zero constant.
- bool AllNonZeroConstants = llvm::all_of(PN->operands(), [](Value *V) {
- return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero();
+ // Check if all incoming values are non-zero using recursion.
+ Query RecQ = Q;
+ unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
+ return llvm::all_of(PN->operands(), [&](const Use &U) {
+ if (U.get() == PN)
+ return true;
+ RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
+ return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ);
});
- if (AllNonZeroConstants)
- return true;
}
// ExtractElement
else if (const auto *EEI = dyn_cast<ExtractElementInst>(V)) {
const Value *Vec = EEI->getVectorOperand();
const Value *Idx = EEI->getIndexOperand();
auto *CIdx = dyn_cast<ConstantInt>(Idx);
- unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
- APInt DemandedVecElts = APInt::getAllOnesValue(NumElts);
- if (CIdx && CIdx->getValue().ult(NumElts))
- DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
- return isKnownNonZero(Vec, DemandedVecElts, Depth, Q);
+ if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
+ unsigned NumElts = VecTy->getNumElements();
+ APInt DemandedVecElts = APInt::getAllOnesValue(NumElts);
+ if (CIdx && CIdx->getValue().ult(NumElts))
+ DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
+ return isKnownNonZero(Vec, DemandedVecElts, Depth, Q);
+ }
+ }
+ // Freeze
+ else if (const FreezeInst *FI = dyn_cast<FreezeInst>(V)) {
+ auto *Op = FI->getOperand(0);
+ if (isKnownNonZero(Op, Depth, Q) &&
+ isGuaranteedNotToBePoison(Op, Q.AC, Q.CxtI, Q.DT, Depth))
+ return true;
}
KnownBits Known(BitWidth);
@@ -2569,7 +2480,8 @@ bool isKnownNonZero(const Value* V, unsigned Depth, const Query& Q) {
}
/// Return true if V2 == V1 + X, where X is known non-zero.
-static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
+static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth,
+ const Query &Q) {
const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
if (!BO || BO->getOpcode() != Instruction::Add)
return false;
@@ -2580,24 +2492,75 @@ static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
Op = BO->getOperand(0);
else
return false;
- return isKnownNonZero(Op, 0, Q);
+ return isKnownNonZero(Op, Depth + 1, Q);
}
+
/// Return true if it is known that V1 != V2.
-static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
+static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
+ const Query &Q) {
if (V1 == V2)
return false;
if (V1->getType() != V2->getType())
// We can't look through casts yet.
return false;
- if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q))
+
+ if (Depth >= MaxAnalysisRecursionDepth)
+ return false;
+
+ // See if we can recurse through (exactly one of) our operands. This
+ // requires our operation be 1-to-1 and map every input value to exactly
+ // one output value. Such an operation is invertible.
+ auto *O1 = dyn_cast<Operator>(V1);
+ auto *O2 = dyn_cast<Operator>(V2);
+ if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
+ switch (O1->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ // Assume operand order has been canonicalized
+ if (O1->getOperand(0) == O2->getOperand(0))
+ return isKnownNonEqual(O1->getOperand(1), O2->getOperand(1),
+ Depth + 1, Q);
+ if (O1->getOperand(1) == O2->getOperand(1))
+ return isKnownNonEqual(O1->getOperand(0), O2->getOperand(0),
+ Depth + 1, Q);
+ break;
+ case Instruction::Mul: {
+ // invertible if A * B == (A * B) mod 2^N where A, and B are integers
+ // and N is the bitwdith. The nsw case is non-obvious, but proven by
+ // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
+ auto *OBO1 = cast<OverflowingBinaryOperator>(O1);
+ auto *OBO2 = cast<OverflowingBinaryOperator>(O2);
+ if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
+ (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
+ break;
+
+ // Assume operand order has been canonicalized
+ if (O1->getOperand(1) == O2->getOperand(1) &&
+ isa<ConstantInt>(O1->getOperand(1)) &&
+ !cast<ConstantInt>(O1->getOperand(1))->isZero())
+ return isKnownNonEqual(O1->getOperand(0), O2->getOperand(0),
+ Depth + 1, Q);
+ break;
+ }
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ if (O1->getOperand(0)->getType() == O2->getOperand(0)->getType())
+ return isKnownNonEqual(O1->getOperand(0), O2->getOperand(0),
+ Depth + 1, Q);
+ break;
+ };
+ }
+
+ if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q))
return true;
if (V1->getType()->isIntOrIntVectorTy()) {
// Are any known bits in V1 contradictory to known bits in V2? If V1
// has a known zero where V2 has a known one, they must not be equal.
- KnownBits Known1 = computeKnownBits(V1, 0, Q);
- KnownBits Known2 = computeKnownBits(V2, 0, Q);
+ KnownBits Known1 = computeKnownBits(V1, Depth, Q);
+ KnownBits Known2 = computeKnownBits(V2, Depth, Q);
if (Known1.Zero.intersects(Known2.One) ||
Known2.Zero.intersects(Known1.One))
@@ -2709,7 +2672,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
return 1;
#ifndef NDEBUG
- assert(Depth <= MaxDepth && "Limit Search Depth");
+ assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
assert(
@@ -2736,8 +2699,8 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// Note that ConstantInt is handled by the general computeKnownBits case
// below.
- if (Depth == MaxDepth)
- return 1; // Limit search depth.
+ if (Depth == MaxAnalysisRecursionDepth)
+ return 1;
if (auto *U = dyn_cast<Operator>(V)) {
switch (Operator::getOpcode(V)) {
@@ -2929,11 +2892,13 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// Take the minimum of all incoming values. This can't infinitely loop
// because of our depth threshold.
- Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
- for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
+ Query RecQ = Q;
+ Tmp = TyBits;
+ for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
if (Tmp == 1) return Tmp;
+ RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
Tmp = std::min(
- Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
+ Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ));
}
return Tmp;
}
@@ -2981,10 +2946,22 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// fall-back.
if (Tmp == 1)
break;
- assert(Tmp <= Ty->getScalarSizeInBits() &&
- "Failed to determine minimum sign bits");
+ assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
return Tmp;
}
+ case Instruction::Call: {
+ if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::abs:
+ Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (Tmp == 1) break;
+
+ // Absolute value reduces number of sign bits by at most 1.
+ return Tmp - 1;
+ }
+ }
+ }
}
}
@@ -3012,7 +2989,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
bool LookThroughSExt, unsigned Depth) {
assert(V && "No Value?");
- assert(Depth <= MaxDepth && "Limit Search Depth");
+ assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
Type *T = V->getType();
@@ -3040,7 +3017,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
return true;
}
- if (Depth == MaxDepth) return false; // Limit search depth.
+ if (Depth == MaxAnalysisRecursionDepth) return false;
Operator *I = dyn_cast<Operator>(V);
if (!I) return false;
@@ -3074,11 +3051,11 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
if (Constant *Op1C = dyn_cast<Constant>(Op1))
if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
- if (Op1C->getType()->getPrimitiveSizeInBits() <
- MulC->getType()->getPrimitiveSizeInBits())
+ if (Op1C->getType()->getPrimitiveSizeInBits().getFixedSize() <
+ MulC->getType()->getPrimitiveSizeInBits().getFixedSize())
Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
- if (Op1C->getType()->getPrimitiveSizeInBits() >
- MulC->getType()->getPrimitiveSizeInBits())
+ if (Op1C->getType()->getPrimitiveSizeInBits().getFixedSize() >
+ MulC->getType()->getPrimitiveSizeInBits().getFixedSize())
MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
// V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
@@ -3098,11 +3075,11 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
if (Constant *Op0C = dyn_cast<Constant>(Op0))
if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
- if (Op0C->getType()->getPrimitiveSizeInBits() <
- MulC->getType()->getPrimitiveSizeInBits())
+ if (Op0C->getType()->getPrimitiveSizeInBits().getFixedSize() <
+ MulC->getType()->getPrimitiveSizeInBits().getFixedSize())
Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
- if (Op0C->getType()->getPrimitiveSizeInBits() >
- MulC->getType()->getPrimitiveSizeInBits())
+ if (Op0C->getType()->getPrimitiveSizeInBits().getFixedSize() >
+ MulC->getType()->getPrimitiveSizeInBits().getFixedSize())
MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
// V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
@@ -3242,8 +3219,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
if (auto *CFP = dyn_cast<ConstantFP>(V))
return !CFP->getValueAPF().isNegZero();
- // Limit search depth.
- if (Depth == MaxDepth)
+ if (Depth == MaxAnalysisRecursionDepth)
return false;
auto *Op = dyn_cast<Operator>(V);
@@ -3311,8 +3287,8 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
}
}
- if (Depth == MaxDepth)
- return false; // Limit search depth.
+ if (Depth == MaxAnalysisRecursionDepth)
+ return false;
const Operator *I = dyn_cast<Operator>(V);
if (!I)
@@ -3464,7 +3440,7 @@ bool llvm::isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
if (auto *CFP = dyn_cast<ConstantFP>(V))
return !CFP->isInfinity();
- if (Depth == MaxDepth)
+ if (Depth == MaxAnalysisRecursionDepth)
return false;
if (auto *Inst = dyn_cast<Instruction>(V)) {
@@ -3473,20 +3449,30 @@ bool llvm::isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
return isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1) &&
isKnownNeverInfinity(Inst->getOperand(2), TLI, Depth + 1);
}
- case Instruction::UIToFP:
- // If the input type fits into the floating type the result is finite.
- return ilogb(APFloat::getLargest(
- Inst->getType()->getScalarType()->getFltSemantics())) >=
- (int)Inst->getOperand(0)->getType()->getScalarSizeInBits();
+ case Instruction::SIToFP:
+ case Instruction::UIToFP: {
+ // Get width of largest magnitude integer (remove a bit if signed).
+ // This still works for a signed minimum value because the largest FP
+ // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
+ int IntSize = Inst->getOperand(0)->getType()->getScalarSizeInBits();
+ if (Inst->getOpcode() == Instruction::SIToFP)
+ --IntSize;
+
+ // If the exponent of the largest finite FP value can hold the largest
+ // integer, the result of the cast must be finite.
+ Type *FPTy = Inst->getType()->getScalarType();
+ return ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize;
+ }
default:
break;
}
}
// try to handle fixed width vector constants
- if (isa<FixedVectorType>(V->getType()) && isa<Constant>(V)) {
+ auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
+ if (VFVTy && isa<Constant>(V)) {
// For vectors, verify that each element is not infinity.
- unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+ unsigned NumElts = VFVTy->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
if (!Elt)
@@ -3518,7 +3504,7 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
if (auto *CFP = dyn_cast<ConstantFP>(V))
return !CFP->isNaN();
- if (Depth == MaxDepth)
+ if (Depth == MaxAnalysisRecursionDepth)
return false;
if (auto *Inst = dyn_cast<Instruction>(V)) {
@@ -3588,9 +3574,10 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
}
// Try to handle fixed width vector constants
- if (isa<FixedVectorType>(V->getType()) && isa<Constant>(V)) {
+ auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
+ if (VFVTy && isa<Constant>(V)) {
// For vectors, verify that each element is not NaN.
- unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+ unsigned NumElts = VFVTy->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
if (!Elt)
@@ -3668,12 +3655,13 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::IntToPtr) {
- auto PS = DL.getPointerSizeInBits(
- cast<PointerType>(CE->getType())->getAddressSpace());
- return isBytewiseValue(
- ConstantExpr::getIntegerCast(CE->getOperand(0),
- Type::getIntNTy(Ctx, PS), false),
- DL);
+ if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) {
+ unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace());
+ return isBytewiseValue(
+ ConstantExpr::getIntegerCast(CE->getOperand(0),
+ Type::getIntNTy(Ctx, BitWidth), false),
+ DL);
+ }
}
}
@@ -4142,8 +4130,7 @@ static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
return true;
}
-Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
- unsigned MaxLookup) {
+Value *llvm::getUnderlyingObject(Value *V, unsigned MaxLookup) {
if (!V->getType()->isPointerTy())
return V;
for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
@@ -4188,16 +4175,15 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
return V;
}
-void llvm::GetUnderlyingObjects(const Value *V,
+void llvm::getUnderlyingObjects(const Value *V,
SmallVectorImpl<const Value *> &Objects,
- const DataLayout &DL, LoopInfo *LI,
- unsigned MaxLookup) {
+ LoopInfo *LI, unsigned MaxLookup) {
SmallPtrSet<const Value *, 4> Visited;
SmallVector<const Value *, 4> Worklist;
Worklist.push_back(V);
do {
const Value *P = Worklist.pop_back_val();
- P = GetUnderlyingObject(P, DL, MaxLookup);
+ P = getUnderlyingObject(P, MaxLookup);
if (!Visited.insert(P).second)
continue;
@@ -4221,8 +4207,7 @@ void llvm::GetUnderlyingObjects(const Value *V,
// underlying objects.
if (!LI || !LI->isLoopHeader(PN->getParent()) ||
isSameUnderlyingObjectInLoop(PN, LI))
- for (Value *IncValue : PN->incoming_values())
- Worklist.push_back(IncValue);
+ append_range(Worklist, PN->incoming_values());
continue;
}
@@ -4258,19 +4243,18 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
} while (true);
}
-/// This is a wrapper around GetUnderlyingObjects and adds support for basic
+/// This is a wrapper around getUnderlyingObjects and adds support for basic
/// ptrtoint+arithmetic+inttoptr sequences.
-/// It returns false if unidentified object is found in GetUnderlyingObjects.
+/// It returns false if unidentified object is found in getUnderlyingObjects.
bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
- SmallVectorImpl<Value *> &Objects,
- const DataLayout &DL) {
+ SmallVectorImpl<Value *> &Objects) {
SmallPtrSet<const Value *, 16> Visited;
SmallVector<const Value *, 4> Working(1, V);
do {
V = Working.pop_back_val();
SmallVector<const Value *, 4> Objs;
- GetUnderlyingObjects(V, Objs, DL);
+ getUnderlyingObjects(V, Objs);
for (const Value *V : Objs) {
if (!Visited.insert(V).second)
@@ -4283,7 +4267,7 @@ bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
continue;
}
}
- // If GetUnderlyingObjects fails to find an identifiable object,
+ // If getUnderlyingObjects fails to find an identifiable object,
// getUnderlyingObjectsForCodeGen also fails for safety.
if (!isIdentifiedObject(V)) {
Objects.clear();
@@ -4295,18 +4279,72 @@ bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
return true;
}
-/// Return true if the only users of this pointer are lifetime markers.
-bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
+AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) {
+ AllocaInst *Result = nullptr;
+ SmallPtrSet<Value *, 4> Visited;
+ SmallVector<Value *, 4> Worklist;
+
+ auto AddWork = [&](Value *V) {
+ if (Visited.insert(V).second)
+ Worklist.push_back(V);
+ };
+
+ AddWork(V);
+ do {
+ V = Worklist.pop_back_val();
+ assert(Visited.count(V));
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ if (Result && Result != AI)
+ return nullptr;
+ Result = AI;
+ } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ AddWork(CI->getOperand(0));
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ for (Value *IncValue : PN->incoming_values())
+ AddWork(IncValue);
+ } else if (auto *SI = dyn_cast<SelectInst>(V)) {
+ AddWork(SI->getTrueValue());
+ AddWork(SI->getFalseValue());
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ if (OffsetZero && !GEP->hasAllZeroIndices())
+ return nullptr;
+ AddWork(GEP->getPointerOperand());
+ } else {
+ return nullptr;
+ }
+ } while (!Worklist.empty());
+
+ return Result;
+}
+
+static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
+ const Value *V, bool AllowLifetime, bool AllowDroppable) {
for (const User *U : V->users()) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
- if (!II) return false;
-
- if (!II->isLifetimeStartOrEnd())
+ if (!II)
return false;
+
+ if (AllowLifetime && II->isLifetimeStartOrEnd())
+ continue;
+
+ if (AllowDroppable && II->isDroppable())
+ continue;
+
+ return false;
}
return true;
}
+bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
+ return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
+ V, /* AllowLifetime */ true, /* AllowDroppable */ false);
+}
+bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) {
+ return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
+ V, /* AllowLifetime */ true, /* AllowDroppable */ true);
+}
+
bool llvm::mustSuppressSpeculation(const LoadInst &LI) {
if (!LI.isUnordered())
return true;
@@ -4352,7 +4390,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
if (*Denominator == 0)
return false;
// It's safe to hoist if the denominator is not 0 or -1.
- if (*Denominator != -1)
+ if (!Denominator->isAllOnesValue())
return true;
// At this point we know that the denominator is -1. It is safe to hoist as
// long we know that the numerator is not INT_MIN.
@@ -4660,31 +4698,30 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
}
-bool llvm::canCreatePoison(const Instruction *I) {
+static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) {
// See whether I has flags that may create poison
- if (isa<OverflowingBinaryOperator>(I) &&
- (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
- return true;
- if (isa<PossiblyExactOperator>(I) && I->isExact())
- return true;
- if (auto *FP = dyn_cast<FPMathOperator>(I)) {
+ if (const auto *OvOp = dyn_cast<OverflowingBinaryOperator>(Op)) {
+ if (OvOp->hasNoSignedWrap() || OvOp->hasNoUnsignedWrap())
+ return true;
+ }
+ if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(Op))
+ if (ExactOp->isExact())
+ return true;
+ if (const auto *FP = dyn_cast<FPMathOperator>(Op)) {
auto FMF = FP->getFastMathFlags();
if (FMF.noNaNs() || FMF.noInfs())
return true;
}
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
- if (GEP->isInBounds())
- return true;
- unsigned Opcode = I->getOpcode();
+ unsigned Opcode = Op->getOpcode();
- // Check whether opcode is a poison-generating operation
+ // Check whether opcode is a poison/undef-generating operation
switch (Opcode) {
case Instruction::Shl:
case Instruction::AShr:
case Instruction::LShr: {
// Shifts return poison if shiftwidth is larger than the bitwidth.
- if (auto *C = dyn_cast<Constant>(I->getOperand(1))) {
+ if (auto *C = dyn_cast<Constant>(Op->getOperand(1))) {
SmallVector<Constant *, 4> ShiftAmounts;
if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) {
unsigned NumElts = FVTy->getNumElements();
@@ -4696,8 +4733,8 @@ bool llvm::canCreatePoison(const Instruction *I) {
ShiftAmounts.push_back(C);
bool Safe = llvm::all_of(ShiftAmounts, [](Constant *C) {
- auto *CI = dyn_cast<ConstantInt>(C);
- return CI && CI->getZExtValue() < C->getType()->getIntegerBitWidth();
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth());
});
return !Safe;
}
@@ -4710,72 +4747,138 @@ bool llvm::canCreatePoison(const Instruction *I) {
return true;
case Instruction::Call:
case Instruction::CallBr:
- case Instruction::Invoke:
- // Function calls can return a poison value even if args are non-poison
- // values.
- return true;
+ case Instruction::Invoke: {
+ const auto *CB = cast<CallBase>(Op);
+ return !CB->hasRetAttr(Attribute::NoUndef);
+ }
case Instruction::InsertElement:
case Instruction::ExtractElement: {
// If index exceeds the length of the vector, it returns poison
- auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
- unsigned IdxOp = I->getOpcode() == Instruction::InsertElement ? 2 : 1;
- auto *Idx = dyn_cast<ConstantInt>(I->getOperand(IdxOp));
- if (!Idx || Idx->getZExtValue() >= VTy->getElementCount().Min)
+ auto *VTy = cast<VectorType>(Op->getOperand(0)->getType());
+ unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1;
+ auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp));
+ if (!Idx || Idx->getValue().uge(VTy->getElementCount().getKnownMinValue()))
return true;
return false;
}
+ case Instruction::ShuffleVector: {
+ // shufflevector may return undef.
+ if (PoisonOnly)
+ return false;
+ ArrayRef<int> Mask = isa<ConstantExpr>(Op)
+ ? cast<ConstantExpr>(Op)->getShuffleMask()
+ : cast<ShuffleVectorInst>(Op)->getShuffleMask();
+ return is_contained(Mask, UndefMaskElem);
+ }
case Instruction::FNeg:
case Instruction::PHI:
case Instruction::Select:
case Instruction::URem:
case Instruction::SRem:
- case Instruction::ShuffleVector:
case Instruction::ExtractValue:
case Instruction::InsertValue:
case Instruction::Freeze:
case Instruction::ICmp:
case Instruction::FCmp:
- case Instruction::GetElementPtr:
return false;
- default:
- if (isa<CastInst>(I))
+ case Instruction::GetElementPtr: {
+ const auto *GEP = cast<GEPOperator>(Op);
+ return GEP->isInBounds();
+ }
+ default: {
+ const auto *CE = dyn_cast<ConstantExpr>(Op);
+ if (isa<CastInst>(Op) || (CE && CE->isCast()))
return false;
- else if (isa<BinaryOperator>(I))
+ else if (Instruction::isBinaryOp(Opcode))
return false;
// Be conservative and return true.
return true;
}
+ }
}
-bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V,
- const Instruction *CtxI,
- const DominatorTree *DT,
- unsigned Depth) {
+bool llvm::canCreateUndefOrPoison(const Operator *Op) {
+ return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false);
+}
+
+bool llvm::canCreatePoison(const Operator *Op) {
+ return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true);
+}
+
+static bool directlyImpliesPoison(const Value *ValAssumedPoison,
+ const Value *V, unsigned Depth) {
+ if (ValAssumedPoison == V)
+ return true;
+
+ const unsigned MaxDepth = 2;
if (Depth >= MaxDepth)
return false;
- // If the value is a freeze instruction, then it can never
- // be undef or poison.
- if (isa<FreezeInst>(V))
+ const auto *I = dyn_cast<Instruction>(V);
+ if (I && propagatesPoison(cast<Operator>(I))) {
+ return any_of(I->operands(), [=](const Value *Op) {
+ return directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1);
+ });
+ }
+ return false;
+}
+
+static bool impliesPoison(const Value *ValAssumedPoison, const Value *V,
+ unsigned Depth) {
+ if (isGuaranteedNotToBeUndefOrPoison(ValAssumedPoison))
return true;
- // TODO: Some instructions are guaranteed to return neither undef
- // nor poison if their arguments are not poison/undef.
+
+ if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0))
+ return true;
+
+ const unsigned MaxDepth = 2;
+ if (Depth >= MaxDepth)
+ return false;
+
+ const auto *I = dyn_cast<Instruction>(ValAssumedPoison);
+ if (I && !canCreatePoison(cast<Operator>(I))) {
+ return all_of(I->operands(), [=](const Value *Op) {
+ return impliesPoison(Op, V, Depth + 1);
+ });
+ }
+ return false;
+}
+
+bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) {
+ return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0);
+}
+
+static bool programUndefinedIfUndefOrPoison(const Value *V,
+ bool PoisonOnly);
+
+static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
+ AssumptionCache *AC,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ unsigned Depth, bool PoisonOnly) {
+ if (Depth >= MaxAnalysisRecursionDepth)
+ return false;
+
+ if (isa<MetadataAsValue>(V))
+ return false;
+
+ if (const auto *A = dyn_cast<Argument>(V)) {
+ if (A->hasAttribute(Attribute::NoUndef))
+ return true;
+ }
if (auto *C = dyn_cast<Constant>(V)) {
- // TODO: We can analyze ConstExpr by opcode to determine if there is any
- // possibility of poison.
- if (isa<UndefValue>(C) || isa<ConstantExpr>(C))
- return false;
+ if (isa<UndefValue>(C))
+ return PoisonOnly && !isa<PoisonValue>(C);
if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) ||
isa<ConstantPointerNull>(C) || isa<Function>(C))
return true;
- if (C->getType()->isVectorTy())
- return !C->containsUndefElement() && !C->containsConstantExpression();
-
- // TODO: Recursively analyze aggregates or other constants.
- return false;
+ if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C))
+ return (PoisonOnly ? !C->containsPoisonElement()
+ : !C->containsUndefOrPoisonElement()) &&
+ !C->containsConstantExpression();
}
// Strip cast operations from a pointer value.
@@ -4792,40 +4895,45 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V,
return true;
auto OpCheck = [&](const Value *V) {
- return isGuaranteedNotToBeUndefOrPoison(V, CtxI, DT, Depth + 1);
+ return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1,
+ PoisonOnly);
};
- if (auto *I = dyn_cast<Instruction>(V)) {
- switch (I->getOpcode()) {
- case Instruction::GetElementPtr: {
- auto *GEPI = dyn_cast<GetElementPtrInst>(I);
- if (!GEPI->isInBounds() && llvm::all_of(GEPI->operands(), OpCheck))
- return true;
- break;
- }
- case Instruction::FCmp: {
- auto *FI = dyn_cast<FCmpInst>(I);
- if (FI->getFastMathFlags().none() &&
- llvm::all_of(FI->operands(), OpCheck))
- return true;
- break;
- }
- case Instruction::BitCast:
- case Instruction::PHI:
- case Instruction::ICmp:
- if (llvm::all_of(I->operands(), OpCheck))
+ if (auto *Opr = dyn_cast<Operator>(V)) {
+ // If the value is a freeze instruction, then it can never
+ // be undef or poison.
+ if (isa<FreezeInst>(V))
+ return true;
+
+ if (const auto *CB = dyn_cast<CallBase>(V)) {
+ if (CB->hasRetAttr(Attribute::NoUndef))
return true;
- break;
- default:
- break;
}
- if (programUndefinedIfPoison(I) && I->getType()->isIntegerTy(1))
- // Note: once we have an agreement that poison is a value-wise concept,
- // we can remove the isIntegerTy(1) constraint.
+ if (const auto *PN = dyn_cast<PHINode>(V)) {
+ unsigned Num = PN->getNumIncomingValues();
+ bool IsWellDefined = true;
+ for (unsigned i = 0; i < Num; ++i) {
+ auto *TI = PN->getIncomingBlock(i)->getTerminator();
+ if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI,
+ DT, Depth + 1, PoisonOnly)) {
+ IsWellDefined = false;
+ break;
+ }
+ }
+ if (IsWellDefined)
+ return true;
+ } else if (!canCreateUndefOrPoison(Opr) && all_of(Opr->operands(), OpCheck))
return true;
}
+ if (auto *I = dyn_cast<LoadInst>(V))
+ if (I->getMetadata(LLVMContext::MD_noundef))
+ return true;
+
+ if (programUndefinedIfUndefOrPoison(V, PoisonOnly))
+ return true;
+
// CxtI may be null or a cloned instruction.
if (!CtxI || !CtxI->getParent() || !DT)
return false;
@@ -4844,20 +4952,48 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V,
while (Dominator) {
auto *TI = Dominator->getBlock()->getTerminator();
+ Value *Cond = nullptr;
if (auto BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isConditional() && BI->getCondition() == V)
- return true;
+ if (BI->isConditional())
+ Cond = BI->getCondition();
} else if (auto SI = dyn_cast<SwitchInst>(TI)) {
- if (SI->getCondition() == V)
+ Cond = SI->getCondition();
+ }
+
+ if (Cond) {
+ if (Cond == V)
return true;
+ else if (PoisonOnly && isa<Operator>(Cond)) {
+ // For poison, we can analyze further
+ auto *Opr = cast<Operator>(Cond);
+ if (propagatesPoison(Opr) && is_contained(Opr->operand_values(), V))
+ return true;
+ }
}
Dominator = Dominator->getIDom();
}
+ SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NoUndef};
+ if (getKnowledgeValidInContext(V, AttrKinds, CtxI, DT, AC))
+ return true;
+
return false;
}
+bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ unsigned Depth) {
+ return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, false);
+}
+
+bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
+ const Instruction *CtxI,
+ const DominatorTree *DT, unsigned Depth) {
+ return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, true);
+}
+
OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
const DataLayout &DL,
AssumptionCache *AC,
@@ -4882,49 +5018,14 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
// arbitrary length of time, but programs aren't allowed to rely on that.
// If there is no successor, then execution can't transfer to it.
- if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
- return !CRI->unwindsToCaller();
- if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
- return !CatchSwitch->unwindsToCaller();
- if (isa<ResumeInst>(I))
- return false;
if (isa<ReturnInst>(I))
return false;
if (isa<UnreachableInst>(I))
return false;
- // Calls can throw, or contain an infinite loop, or kill the process.
- if (const auto *CB = dyn_cast<CallBase>(I)) {
- // Call sites that throw have implicit non-local control flow.
- if (!CB->doesNotThrow())
- return false;
-
- // A function which doens't throw and has "willreturn" attribute will
- // always return.
- if (CB->hasFnAttr(Attribute::WillReturn))
- return true;
-
- // Non-throwing call sites can loop infinitely, call exit/pthread_exit
- // etc. and thus not return. However, LLVM already assumes that
- //
- // - Thread exiting actions are modeled as writes to memory invisible to
- // the program.
- //
- // - Loops that don't have side effects (side effects are volatile/atomic
- // stores and IO) always terminate (see http://llvm.org/PR965).
- // Furthermore IO itself is also modeled as writes to memory invisible to
- // the program.
- //
- // We rely on those assumptions here, and use the memory effects of the call
- // target as a proxy for checking that it always returns.
-
- // FIXME: This isn't aggressive enough; a call which only writes to a global
- // is guaranteed to return.
- return CB->onlyReadsMemory() || CB->onlyAccessesArgMemory();
- }
-
- // Other instructions return normally.
- return true;
+ // An instruction that returns without throwing must transfer control flow
+ // to a successor.
+ return !I->mayThrow() && I->willReturn();
}
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
@@ -4951,7 +5052,7 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
llvm_unreachable("Instruction not contained in its own parent basic block.");
}
-bool llvm::propagatesPoison(const Instruction *I) {
+bool llvm::propagatesPoison(const Operator *I) {
switch (I->getOpcode()) {
case Instruction::Freeze:
case Instruction::Select:
@@ -4972,86 +5073,141 @@ bool llvm::propagatesPoison(const Instruction *I) {
}
}
-const Value *llvm::getGuaranteedNonPoisonOp(const Instruction *I) {
+void llvm::getGuaranteedNonPoisonOps(const Instruction *I,
+ SmallPtrSetImpl<const Value *> &Operands) {
switch (I->getOpcode()) {
case Instruction::Store:
- return cast<StoreInst>(I)->getPointerOperand();
+ Operands.insert(cast<StoreInst>(I)->getPointerOperand());
+ break;
case Instruction::Load:
- return cast<LoadInst>(I)->getPointerOperand();
+ Operands.insert(cast<LoadInst>(I)->getPointerOperand());
+ break;
case Instruction::AtomicCmpXchg:
- return cast<AtomicCmpXchgInst>(I)->getPointerOperand();
+ Operands.insert(cast<AtomicCmpXchgInst>(I)->getPointerOperand());
+ break;
case Instruction::AtomicRMW:
- return cast<AtomicRMWInst>(I)->getPointerOperand();
+ Operands.insert(cast<AtomicRMWInst>(I)->getPointerOperand());
+ break;
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
- return I->getOperand(1);
+ Operands.insert(I->getOperand(1));
+ break;
case Instruction::Call:
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::assume:
- return II->getArgOperand(0);
- default:
- return nullptr;
- }
+ case Instruction::Invoke: {
+ const CallBase *CB = cast<CallBase>(I);
+ if (CB->isIndirectCall())
+ Operands.insert(CB->getCalledOperand());
+ for (unsigned i = 0; i < CB->arg_size(); ++i) {
+ if (CB->paramHasAttr(i, Attribute::NoUndef))
+ Operands.insert(CB->getArgOperand(i));
}
- return nullptr;
+ break;
+ }
default:
- return nullptr;
+ break;
}
}
bool llvm::mustTriggerUB(const Instruction *I,
const SmallSet<const Value *, 16>& KnownPoison) {
- auto *NotPoison = getGuaranteedNonPoisonOp(I);
- return (NotPoison && KnownPoison.count(NotPoison));
-}
+ SmallPtrSet<const Value *, 4> NonPoisonOps;
+ getGuaranteedNonPoisonOps(I, NonPoisonOps);
+
+ for (const auto *V : NonPoisonOps)
+ if (KnownPoison.count(V))
+ return true;
+ return false;
+}
-bool llvm::programUndefinedIfPoison(const Instruction *PoisonI) {
- // We currently only look for uses of poison values within the same basic
+static bool programUndefinedIfUndefOrPoison(const Value *V,
+ bool PoisonOnly) {
+ // We currently only look for uses of values within the same basic
// block, as that makes it easier to guarantee that the uses will be
- // executed given that PoisonI is executed.
+ // executed given that Inst is executed.
//
// FIXME: Expand this to consider uses beyond the same basic block. To do
// this, look out for the distinction between post-dominance and strong
// post-dominance.
- const BasicBlock *BB = PoisonI->getParent();
+ const BasicBlock *BB = nullptr;
+ BasicBlock::const_iterator Begin;
+ if (const auto *Inst = dyn_cast<Instruction>(V)) {
+ BB = Inst->getParent();
+ Begin = Inst->getIterator();
+ Begin++;
+ } else if (const auto *Arg = dyn_cast<Argument>(V)) {
+ BB = &Arg->getParent()->getEntryBlock();
+ Begin = BB->begin();
+ } else {
+ return false;
+ }
+
+ // Limit number of instructions we look at, to avoid scanning through large
+ // blocks. The current limit is chosen arbitrarily.
+ unsigned ScanLimit = 32;
+ BasicBlock::const_iterator End = BB->end();
+
+ if (!PoisonOnly) {
+ // Be conservative & just check whether a value is passed to a noundef
+ // argument.
+ // Instructions that raise UB with a poison operand are well-defined
+ // or have unclear semantics when the input is partially undef.
+ // For example, 'udiv x, (undef | 1)' isn't UB.
+
+ for (auto &I : make_range(Begin, End)) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (--ScanLimit == 0)
+ break;
- // Set of instructions that we have proved will yield poison if PoisonI
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ for (unsigned i = 0; i < CB->arg_size(); ++i) {
+ if (CB->paramHasAttr(i, Attribute::NoUndef) &&
+ CB->getArgOperand(i) == V)
+ return true;
+ }
+ }
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ break;
+ }
+ return false;
+ }
+
+ // Set of instructions that we have proved will yield poison if Inst
// does.
SmallSet<const Value *, 16> YieldsPoison;
SmallSet<const BasicBlock *, 4> Visited;
- YieldsPoison.insert(PoisonI);
- Visited.insert(PoisonI->getParent());
- BasicBlock::const_iterator Begin = PoisonI->getIterator(), End = BB->end();
+ YieldsPoison.insert(V);
+ auto Propagate = [&](const User *User) {
+ if (propagatesPoison(cast<Operator>(User)))
+ YieldsPoison.insert(User);
+ };
+ for_each(V->users(), Propagate);
+ Visited.insert(BB);
- unsigned Iter = 0;
- while (Iter++ < MaxDepth) {
+ while (true) {
for (auto &I : make_range(Begin, End)) {
- if (&I != PoisonI) {
- if (mustTriggerUB(&I, YieldsPoison))
- return true;
- if (!isGuaranteedToTransferExecutionToSuccessor(&I))
- return false;
- }
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (--ScanLimit == 0)
+ return false;
+ if (mustTriggerUB(&I, YieldsPoison))
+ return true;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ return false;
// Mark poison that propagates from I through uses of I.
- if (YieldsPoison.count(&I)) {
- for (const User *User : I.users()) {
- const Instruction *UserI = cast<Instruction>(User);
- if (propagatesPoison(UserI))
- YieldsPoison.insert(User);
- }
- }
+ if (YieldsPoison.count(&I))
+ for_each(I.users(), Propagate);
}
if (auto *NextBB = BB->getSingleSuccessor()) {
@@ -5068,6 +5224,14 @@ bool llvm::programUndefinedIfPoison(const Instruction *PoisonI) {
return false;
}
+bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) {
+ return ::programUndefinedIfUndefOrPoison(Inst, false);
+}
+
+bool llvm::programUndefinedIfPoison(const Instruction *Inst) {
+ return ::programUndefinedIfUndefOrPoison(Inst, true);
+}
+
static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
if (FMF.noNaNs())
return true;
@@ -5430,10 +5594,10 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
// elements because those can not be back-propagated for analysis.
Value *OutputZeroVal = nullptr;
if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) &&
- !cast<Constant>(TrueVal)->containsUndefElement())
+ !cast<Constant>(TrueVal)->containsUndefOrPoisonElement())
OutputZeroVal = TrueVal;
else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) &&
- !cast<Constant>(FalseVal)->containsUndefElement())
+ !cast<Constant>(FalseVal)->containsUndefOrPoisonElement())
OutputZeroVal = FalseVal;
if (OutputZeroVal) {
@@ -5710,7 +5874,7 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp,
unsigned Depth) {
- if (Depth >= MaxDepth)
+ if (Depth >= MaxAnalysisRecursionDepth)
return {SPF_UNKNOWN, SPNB_NA, false};
SelectInst *SI = dyn_cast<SelectInst>(V);
@@ -5789,6 +5953,46 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) {
return getMinMaxPred(getInverseMinMaxFlavor(SPF));
}
+std::pair<Intrinsic::ID, bool>
+llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
+ // Check if VL contains select instructions that can be folded into a min/max
+ // vector intrinsic and return the intrinsic if it is possible.
+ // TODO: Support floating point min/max.
+ bool AllCmpSingleUse = true;
+ SelectPatternResult SelectPattern;
+ SelectPattern.Flavor = SPF_UNKNOWN;
+ if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) {
+ Value *LHS, *RHS;
+ auto CurrentPattern = matchSelectPattern(I, LHS, RHS);
+ if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) ||
+ CurrentPattern.Flavor == SPF_FMINNUM ||
+ CurrentPattern.Flavor == SPF_FMAXNUM ||
+ !I->getType()->isIntOrIntVectorTy())
+ return false;
+ if (SelectPattern.Flavor != SPF_UNKNOWN &&
+ SelectPattern.Flavor != CurrentPattern.Flavor)
+ return false;
+ SelectPattern = CurrentPattern;
+ AllCmpSingleUse &=
+ match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value()));
+ return true;
+ })) {
+ switch (SelectPattern.Flavor) {
+ case SPF_SMIN:
+ return {Intrinsic::smin, AllCmpSingleUse};
+ case SPF_UMIN:
+ return {Intrinsic::umin, AllCmpSingleUse};
+ case SPF_SMAX:
+ return {Intrinsic::smax, AllCmpSingleUse};
+ case SPF_UMAX:
+ return {Intrinsic::umax, AllCmpSingleUse};
+ default:
+ llvm_unreachable("unexpected select pattern flavor");
+ }
+ }
+ return {Intrinsic::not_intrinsic, false};
+}
+
/// Return true if "icmp Pred LHS RHS" is always true.
static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
const Value *RHS, const DataLayout &DL,
@@ -5968,25 +6172,25 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
/// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
/// false. Otherwise, return None if we can't infer anything. We expect the
-/// RHS to be an icmp and the LHS to be an 'and' or an 'or' instruction.
+/// RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' instruction.
static Optional<bool>
-isImpliedCondAndOr(const BinaryOperator *LHS, CmpInst::Predicate RHSPred,
+isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred,
const Value *RHSOp0, const Value *RHSOp1,
-
const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
- // The LHS must be an 'or' or an 'and' instruction.
+ // The LHS must be an 'or', 'and', or a 'select' instruction.
assert((LHS->getOpcode() == Instruction::And ||
- LHS->getOpcode() == Instruction::Or) &&
- "Expected LHS to be 'and' or 'or'.");
+ LHS->getOpcode() == Instruction::Or ||
+ LHS->getOpcode() == Instruction::Select) &&
+ "Expected LHS to be 'and', 'or', or 'select'.");
- assert(Depth <= MaxDepth && "Hit recursion limit");
+ assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit");
// If the result of an 'or' is false, then we know both legs of the 'or' are
// false. Similarly, if the result of an 'and' is true, then we know both
// legs of the 'and' are true.
- Value *ALHS, *ARHS;
- if ((!LHSIsTrue && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) ||
- (LHSIsTrue && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) {
+ const Value *ALHS, *ARHS;
+ if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) ||
+ (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) {
// FIXME: Make this non-recursion.
if (Optional<bool> Implication = isImpliedCondition(
ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1))
@@ -6004,7 +6208,7 @@ llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred,
const Value *RHSOp0, const Value *RHSOp1,
const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
// Bail out when we hit the limit.
- if (Depth == MaxDepth)
+ if (Depth == MaxAnalysisRecursionDepth)
return None;
// A mismatch occurs when we compare a scalar cmp to a vector cmp, for
@@ -6027,13 +6231,14 @@ llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred,
return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue,
Depth);
- /// The LHS should be an 'or' or an 'and' instruction. We expect the RHS to
- /// be / an icmp. FIXME: Add support for and/or on the RHS.
- const BinaryOperator *LHSBO = dyn_cast<BinaryOperator>(LHS);
- if (LHSBO) {
- if ((LHSBO->getOpcode() == Instruction::And ||
- LHSBO->getOpcode() == Instruction::Or))
- return isImpliedCondAndOr(LHSBO, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue,
+ /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect
+ /// the RHS to be an icmp.
+ /// FIXME: Add support for and/or/select on the RHS.
+ if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
+ if ((LHSI->getOpcode() == Instruction::And ||
+ LHSI->getOpcode() == Instruction::Or ||
+ LHSI->getOpcode() == Instruction::Select))
+ return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue,
Depth);
}
return None;
@@ -6266,6 +6471,13 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
unsigned Width = Lower.getBitWidth();
const APInt *C;
switch (II.getIntrinsicID()) {
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ // Maximum of set/clear bits is the bit width.
+ assert(Lower == 0 && "Expected lower bound to be zero");
+ Upper = Width + 1;
+ break;
case Intrinsic::uadd_sat:
// uadd.sat(x, C) produces [C, UINT_MAX].
if (match(II.getOperand(0), m_APInt(C)) ||
@@ -6317,6 +6529,41 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
}
}
break;
+ case Intrinsic::umin:
+ case Intrinsic::umax:
+ case Intrinsic::smin:
+ case Intrinsic::smax:
+ if (!match(II.getOperand(0), m_APInt(C)) &&
+ !match(II.getOperand(1), m_APInt(C)))
+ break;
+
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::umin:
+ Upper = *C + 1;
+ break;
+ case Intrinsic::umax:
+ Lower = *C;
+ break;
+ case Intrinsic::smin:
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = *C + 1;
+ break;
+ case Intrinsic::smax:
+ Lower = *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ break;
+ default:
+ llvm_unreachable("Must be min/max intrinsic");
+ }
+ break;
+ case Intrinsic::abs:
+ // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX],
+ // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
+ if (match(II.getOperand(1), m_One()))
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ else
+ Upper = APInt::getSignedMinValue(Width) + 1;
+ break;
default:
break;
}
@@ -6381,7 +6628,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo,
unsigned Depth) {
assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
- if (Depth == MaxDepth)
+ if (Depth == MaxAnalysisRecursionDepth)
return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
const APInt *C;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
index 23531b65ea32..9a4c96b6f7c2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
@@ -43,13 +43,18 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
/// hasVectorInstrinsicScalarOpd).
bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::bswap: // Begin integer bit-manipulation.
+ case Intrinsic::abs: // Begin integer bit-manipulation.
+ case Intrinsic::bswap:
case Intrinsic::bitreverse:
case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::fshl:
case Intrinsic::fshr:
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin:
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::uadd_sat:
@@ -94,6 +99,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
unsigned ScalarOpdIdx) {
switch (ID) {
+ case Intrinsic::abs:
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::powi:
@@ -119,7 +125,8 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
- ID == Intrinsic::sideeffect)
+ ID == Intrinsic::experimental_noalias_scope_decl ||
+ ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
return ID;
return Intrinsic::not_intrinsic;
}
@@ -130,7 +137,7 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
const DataLayout &DL = Gep->getModule()->getDataLayout();
unsigned LastOperand = Gep->getNumOperands() - 1;
- unsigned GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
+ TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
// Walk backwards and try to peel off zeros.
while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
@@ -202,7 +209,7 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
if (Ptr != OrigPtr)
// Strip off casts.
- while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
+ while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
V = C->getOperand();
const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
@@ -235,7 +242,7 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
// Strip off casts.
Type *StripedOffRecurrenceCast = nullptr;
- if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
+ if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
StripedOffRecurrenceCast = C->getType();
V = C->getOperand();
}
@@ -284,6 +291,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
if (EltNo == IIElt)
return III->getOperand(1);
+ // Guard against infinite loop on malformed, unreachable IR.
+ if (III == III->getOperand(0))
+ return nullptr;
+
// Otherwise, the insertelement doesn't modify the value, recurse on its
// vector input.
return findScalarElement(III->getOperand(0), EltNo);
@@ -336,7 +347,7 @@ int llvm::getSplatIndex(ArrayRef<int> Mask) {
/// This function is not fully general. It checks only 2 cases:
/// the input value is (1) a splat constant vector or (2) a sequence
/// of instructions that broadcasts a scalar at element 0.
-const llvm::Value *llvm::getSplatValue(const Value *V) {
+Value *llvm::getSplatValue(const Value *V) {
if (isa<VectorType>(V->getType()))
if (auto *C = dyn_cast<Constant>(V))
return C->getSplatValue();
@@ -351,12 +362,8 @@ const llvm::Value *llvm::getSplatValue(const Value *V) {
return nullptr;
}
-// This setting is based on its counterpart in value tracking, but it could be
-// adjusted if needed.
-const unsigned MaxDepth = 6;
-
bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
- assert(Depth <= MaxDepth && "Limit Search Depth");
+ assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
if (isa<VectorType>(V->getType())) {
if (isa<UndefValue>(V))
@@ -383,7 +390,7 @@ bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
}
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth++ == MaxDepth)
+ if (Depth++ == MaxAnalysisRecursionDepth)
return false;
// If both operands of a binop are splats, the result is a splat.
@@ -414,8 +421,7 @@ void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
ScaledMask.clear();
for (int MaskElt : Mask) {
if (MaskElt >= 0) {
- assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <=
- std::numeric_limits<int32_t>::max() &&
+ assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
"Overflowed 32-bits");
}
for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
@@ -817,15 +823,14 @@ static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1,
VecTy1->getScalarType() == VecTy2->getScalarType() &&
"Expect two vectors with the same element type");
- unsigned NumElts1 = VecTy1->getNumElements();
- unsigned NumElts2 = VecTy2->getNumElements();
+ unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
+ unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
if (NumElts1 > NumElts2) {
// Extend with UNDEFs.
V2 = Builder.CreateShuffleVector(
- V2, UndefValue::get(VecTy2),
- createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
+ V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
}
return Builder.CreateShuffleVector(
@@ -861,13 +866,22 @@ Value *llvm::concatenateVectors(IRBuilderBase &Builder,
}
bool llvm::maskIsAllZeroOrUndef(Value *Mask) {
+ assert(isa<VectorType>(Mask->getType()) &&
+ isa<IntegerType>(Mask->getType()->getScalarType()) &&
+ cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
+ 1 &&
+ "Mask must be a vector of i1");
+
auto *ConstMask = dyn_cast<Constant>(Mask);
if (!ConstMask)
return false;
if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
return true;
- for (unsigned I = 0,
- E = cast<VectorType>(ConstMask->getType())->getNumElements();
+ if (isa<ScalableVectorType>(ConstMask->getType()))
+ return false;
+ for (unsigned
+ I = 0,
+ E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
I != E; ++I) {
if (auto *MaskElt = ConstMask->getAggregateElement(I))
if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
@@ -879,13 +893,22 @@ bool llvm::maskIsAllZeroOrUndef(Value *Mask) {
bool llvm::maskIsAllOneOrUndef(Value *Mask) {
+ assert(isa<VectorType>(Mask->getType()) &&
+ isa<IntegerType>(Mask->getType()->getScalarType()) &&
+ cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
+ 1 &&
+ "Mask must be a vector of i1");
+
auto *ConstMask = dyn_cast<Constant>(Mask);
if (!ConstMask)
return false;
if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
return true;
- for (unsigned I = 0,
- E = cast<VectorType>(ConstMask->getType())->getNumElements();
+ if (isa<ScalableVectorType>(ConstMask->getType()))
+ return false;
+ for (unsigned
+ I = 0,
+ E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
I != E; ++I) {
if (auto *MaskElt = ConstMask->getAggregateElement(I))
if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
@@ -898,8 +921,14 @@ bool llvm::maskIsAllOneOrUndef(Value *Mask) {
/// TODO: This is a lot like known bits, but for
/// vectors. Is there something we can common this with?
APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {
-
- const unsigned VWidth = cast<VectorType>(Mask->getType())->getNumElements();
+ assert(isa<FixedVectorType>(Mask->getType()) &&
+ isa<IntegerType>(Mask->getType()->getScalarType()) &&
+ cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
+ 1 &&
+ "Mask must be a fixed width vector of i1");
+
+ const unsigned VWidth =
+ cast<FixedVectorType>(Mask->getType())->getNumElements();
APInt DemandedElts = APInt::getAllOnesValue(VWidth);
if (auto *CV = dyn_cast<ConstantVector>(Mask))
for (unsigned i = 0; i < VWidth; i++)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/models/inliner/README.txt b/contrib/llvm-project/llvm/lib/Analysis/models/inliner/README.txt
new file mode 100644
index 000000000000..a76bb8930d6e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/models/inliner/README.txt
@@ -0,0 +1,3 @@
+Reference model for inliner -Oz decision policy.
+Note that, currently, this model is also referenced by test/Transforms/Inline/ML
+tests - if replacing it, check those tests, too.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/models/inliner/output_spec.json b/contrib/llvm-project/llvm/lib/Analysis/models/inliner/output_spec.json
new file mode 100644
index 000000000000..5f9d13d8f8b8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/models/inliner/output_spec.json
@@ -0,0 +1,14 @@
+[
+ {
+ "logging_name": "inlining_decision",
+ "tensor_spec": {
+ "name": "StatefulPartitionedCall",
+ "port": 0,
+ "type": "int64_t",
+ "shape": [
+ 1
+ ]
+ }
+ }
+]
+ \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
index 777ce3abdddd..427de74f91ac 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
@@ -531,6 +531,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(undef);
KEYWORD(null);
KEYWORD(none);
+ KEYWORD(poison);
KEYWORD(to);
KEYWORD(caller);
KEYWORD(within);
@@ -624,6 +625,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(amdgpu_ps);
KEYWORD(amdgpu_cs);
KEYWORD(amdgpu_kernel);
+ KEYWORD(amdgpu_gfx);
KEYWORD(tailcc);
KEYWORD(cc);
@@ -651,6 +653,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(nest);
KEYWORD(noalias);
KEYWORD(nobuiltin);
+ KEYWORD(nocallback);
KEYWORD(nocapture);
KEYWORD(noduplicate);
KEYWORD(nofree);
@@ -660,6 +663,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(nonlazybind);
KEYWORD(nomerge);
KEYWORD(nonnull);
+ KEYWORD(noprofile);
KEYWORD(noredzone);
KEYWORD(noreturn);
KEYWORD(nosync);
@@ -697,6 +701,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(writeonly);
KEYWORD(zeroext);
KEYWORD(immarg);
+ KEYWORD(byref);
+ KEYWORD(mustprogress);
KEYWORD(type);
KEYWORD(opaque);
@@ -721,6 +727,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(vscale);
KEYWORD(x);
KEYWORD(blockaddress);
+ KEYWORD(dso_local_equivalent);
// Metadata types.
KEYWORD(distinct);
@@ -834,6 +841,7 @@ lltok::Kind LLLexer::LexIdentifier() {
TYPEKEYWORD("label", Type::getLabelTy(Context));
TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
+ TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context));
TYPEKEYWORD("token", Type::getTokenTy(Context));
#undef TYPEKEYWORD
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
index c9f21ee83826..2a3fb8fb6658 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
@@ -65,20 +65,20 @@ bool LLParser::Run(bool UpgradeDebugInfo,
Lex.Lex();
if (Context.shouldDiscardValueNames())
- return Error(
+ return error(
Lex.getLoc(),
"Can't read textual IR with a Context that discards named Values");
if (M) {
- if (ParseTargetDefinitions())
+ if (parseTargetDefinitions())
return true;
if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple()))
M->setDataLayout(*LayoutOverride);
}
- return ParseTopLevelEntities() || ValidateEndOfModule(UpgradeDebugInfo) ||
- ValidateEndOfIndex();
+ return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) ||
+ validateEndOfIndex();
}
bool LLParser::parseStandaloneConstantValue(Constant *&C,
@@ -87,10 +87,10 @@ bool LLParser::parseStandaloneConstantValue(Constant *&C,
Lex.Lex();
Type *Ty = nullptr;
- if (ParseType(Ty) || parseConstantValue(Ty, C))
+ if (parseType(Ty) || parseConstantValue(Ty, C))
return true;
if (Lex.getKind() != lltok::Eof)
- return Error(Lex.getLoc(), "expected end of string");
+ return error(Lex.getLoc(), "expected end of string");
return false;
}
@@ -102,7 +102,7 @@ bool LLParser::parseTypeAtBeginning(Type *&Ty, unsigned &Read,
Read = 0;
SMLoc Start = Lex.getLoc();
Ty = nullptr;
- if (ParseType(Ty))
+ if (parseType(Ty))
return true;
SMLoc End = Lex.getLoc();
Read = End.getPointer() - Start.getPointer();
@@ -123,9 +123,9 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) {
std::make_pair(I.first, std::make_pair(I.second, LocTy())));
}
-/// ValidateEndOfModule - Do final validity and sanity checks at the end of the
+/// validateEndOfModule - Do final validity and sanity checks at the end of the
/// module.
-bool LLParser::ValidateEndOfModule(bool UpgradeDebugInfo) {
+bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
if (!M)
return false;
// Handle any function attribute group forward references.
@@ -190,39 +190,39 @@ bool LLParser::ValidateEndOfModule(bool UpgradeDebugInfo) {
// If there are entries in ForwardRefBlockAddresses at this point, the
// function was never defined.
if (!ForwardRefBlockAddresses.empty())
- return Error(ForwardRefBlockAddresses.begin()->first.Loc,
+ return error(ForwardRefBlockAddresses.begin()->first.Loc,
"expected function name in blockaddress");
for (const auto &NT : NumberedTypes)
if (NT.second.second.isValid())
- return Error(NT.second.second,
+ return error(NT.second.second,
"use of undefined type '%" + Twine(NT.first) + "'");
for (StringMap<std::pair<Type*, LocTy> >::iterator I =
NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I)
if (I->second.second.isValid())
- return Error(I->second.second,
+ return error(I->second.second,
"use of undefined type named '" + I->getKey() + "'");
if (!ForwardRefComdats.empty())
- return Error(ForwardRefComdats.begin()->second,
+ return error(ForwardRefComdats.begin()->second,
"use of undefined comdat '$" +
ForwardRefComdats.begin()->first + "'");
if (!ForwardRefVals.empty())
- return Error(ForwardRefVals.begin()->second.second,
+ return error(ForwardRefVals.begin()->second.second,
"use of undefined value '@" + ForwardRefVals.begin()->first +
- "'");
+ "'");
if (!ForwardRefValIDs.empty())
- return Error(ForwardRefValIDs.begin()->second.second,
+ return error(ForwardRefValIDs.begin()->second.second,
"use of undefined value '@" +
- Twine(ForwardRefValIDs.begin()->first) + "'");
+ Twine(ForwardRefValIDs.begin()->first) + "'");
if (!ForwardRefMDNodes.empty())
- return Error(ForwardRefMDNodes.begin()->second.second,
+ return error(ForwardRefMDNodes.begin()->second.second,
"use of undefined metadata '!" +
- Twine(ForwardRefMDNodes.begin()->first) + "'");
+ Twine(ForwardRefMDNodes.begin()->first) + "'");
// Resolve metadata cycles.
for (auto &N : NumberedMetadata) {
@@ -275,22 +275,22 @@ bool LLParser::ValidateEndOfModule(bool UpgradeDebugInfo) {
}
/// Do final validity and sanity checks at the end of the index.
-bool LLParser::ValidateEndOfIndex() {
+bool LLParser::validateEndOfIndex() {
if (!Index)
return false;
if (!ForwardRefValueInfos.empty())
- return Error(ForwardRefValueInfos.begin()->second.front().second,
+ return error(ForwardRefValueInfos.begin()->second.front().second,
"use of undefined summary '^" +
Twine(ForwardRefValueInfos.begin()->first) + "'");
if (!ForwardRefAliasees.empty())
- return Error(ForwardRefAliasees.begin()->second.front().second,
+ return error(ForwardRefAliasees.begin()->second.front().second,
"use of undefined summary '^" +
Twine(ForwardRefAliasees.begin()->first) + "'");
if (!ForwardRefTypeIds.empty())
- return Error(ForwardRefTypeIds.begin()->second.front().second,
+ return error(ForwardRefTypeIds.begin()->second.front().second,
"use of undefined type id summary '^" +
Twine(ForwardRefTypeIds.begin()->first) + "'");
@@ -301,15 +301,15 @@ bool LLParser::ValidateEndOfIndex() {
// Top-Level Entities
//===----------------------------------------------------------------------===//
-bool LLParser::ParseTargetDefinitions() {
+bool LLParser::parseTargetDefinitions() {
while (true) {
switch (Lex.getKind()) {
case lltok::kw_target:
- if (ParseTargetDefinition())
+ if (parseTargetDefinition())
return true;
break;
case lltok::kw_source_filename:
- if (ParseSourceFileName())
+ if (parseSourceFileName())
return true;
break;
default:
@@ -318,7 +318,7 @@ bool LLParser::ParseTargetDefinitions() {
}
}
-bool LLParser::ParseTopLevelEntities() {
+bool LLParser::parseTopLevelEntities() {
// If there is no Module, then parse just the summary index entries.
if (!M) {
while (true) {
@@ -326,11 +326,11 @@ bool LLParser::ParseTopLevelEntities() {
case lltok::Eof:
return false;
case lltok::SummaryID:
- if (ParseSummaryEntry())
+ if (parseSummaryEntry())
return true;
break;
case lltok::kw_source_filename:
- if (ParseSourceFileName())
+ if (parseSourceFileName())
return true;
break;
default:
@@ -341,27 +341,64 @@ bool LLParser::ParseTopLevelEntities() {
}
while (true) {
switch (Lex.getKind()) {
- default: return TokError("expected top-level entity");
+ default:
+ return tokError("expected top-level entity");
case lltok::Eof: return false;
- case lltok::kw_declare: if (ParseDeclare()) return true; break;
- case lltok::kw_define: if (ParseDefine()) return true; break;
- case lltok::kw_module: if (ParseModuleAsm()) return true; break;
- case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
- case lltok::LocalVarID: if (ParseUnnamedType()) return true; break;
- case lltok::LocalVar: if (ParseNamedType()) return true; break;
- case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break;
- case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break;
+ case lltok::kw_declare:
+ if (parseDeclare())
+ return true;
+ break;
+ case lltok::kw_define:
+ if (parseDefine())
+ return true;
+ break;
+ case lltok::kw_module:
+ if (parseModuleAsm())
+ return true;
+ break;
+ case lltok::kw_deplibs:
+ if (parseDepLibs())
+ return true;
+ break;
+ case lltok::LocalVarID:
+ if (parseUnnamedType())
+ return true;
+ break;
+ case lltok::LocalVar:
+ if (parseNamedType())
+ return true;
+ break;
+ case lltok::GlobalID:
+ if (parseUnnamedGlobal())
+ return true;
+ break;
+ case lltok::GlobalVar:
+ if (parseNamedGlobal())
+ return true;
+ break;
case lltok::ComdatVar: if (parseComdat()) return true; break;
- case lltok::exclaim: if (ParseStandaloneMetadata()) return true; break;
+ case lltok::exclaim:
+ if (parseStandaloneMetadata())
+ return true;
+ break;
case lltok::SummaryID:
- if (ParseSummaryEntry())
+ if (parseSummaryEntry())
+ return true;
+ break;
+ case lltok::MetadataVar:
+ if (parseNamedMetadata())
+ return true;
+ break;
+ case lltok::kw_attributes:
+ if (parseUnnamedAttrGrp())
+ return true;
+ break;
+ case lltok::kw_uselistorder:
+ if (parseUseListOrder())
return true;
break;
- case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break;
- case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break;
- case lltok::kw_uselistorder: if (ParseUseListOrder()) return true; break;
case lltok::kw_uselistorder_bb:
- if (ParseUseListOrderBB())
+ if (parseUseListOrderBB())
return true;
break;
}
@@ -370,13 +407,14 @@ bool LLParser::ParseTopLevelEntities() {
/// toplevelentity
/// ::= 'module' 'asm' STRINGCONSTANT
-bool LLParser::ParseModuleAsm() {
+bool LLParser::parseModuleAsm() {
assert(Lex.getKind() == lltok::kw_module);
Lex.Lex();
std::string AsmStr;
- if (ParseToken(lltok::kw_asm, "expected 'module asm'") ||
- ParseStringConstant(AsmStr)) return true;
+ if (parseToken(lltok::kw_asm, "expected 'module asm'") ||
+ parseStringConstant(AsmStr))
+ return true;
M->appendModuleInlineAsm(AsmStr);
return false;
@@ -385,22 +423,23 @@ bool LLParser::ParseModuleAsm() {
/// toplevelentity
/// ::= 'target' 'triple' '=' STRINGCONSTANT
/// ::= 'target' 'datalayout' '=' STRINGCONSTANT
-bool LLParser::ParseTargetDefinition() {
+bool LLParser::parseTargetDefinition() {
assert(Lex.getKind() == lltok::kw_target);
std::string Str;
switch (Lex.Lex()) {
- default: return TokError("unknown target property");
+ default:
+ return tokError("unknown target property");
case lltok::kw_triple:
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' after target triple") ||
- ParseStringConstant(Str))
+ if (parseToken(lltok::equal, "expected '=' after target triple") ||
+ parseStringConstant(Str))
return true;
M->setTargetTriple(Str);
return false;
case lltok::kw_datalayout:
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' after target datalayout") ||
- ParseStringConstant(Str))
+ if (parseToken(lltok::equal, "expected '=' after target datalayout") ||
+ parseStringConstant(Str))
return true;
M->setDataLayout(Str);
return false;
@@ -409,11 +448,11 @@ bool LLParser::ParseTargetDefinition() {
/// toplevelentity
/// ::= 'source_filename' '=' STRINGCONSTANT
-bool LLParser::ParseSourceFileName() {
+bool LLParser::parseSourceFileName() {
assert(Lex.getKind() == lltok::kw_source_filename);
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' after source_filename") ||
- ParseStringConstant(SourceFileName))
+ if (parseToken(lltok::equal, "expected '=' after source_filename") ||
+ parseStringConstant(SourceFileName))
return true;
if (M)
M->setSourceFileName(SourceFileName);
@@ -424,11 +463,11 @@ bool LLParser::ParseSourceFileName() {
/// ::= 'deplibs' '=' '[' ']'
/// ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']'
/// FIXME: Remove in 4.0. Currently parse, but ignore.
-bool LLParser::ParseDepLibs() {
+bool LLParser::parseDepLibs() {
assert(Lex.getKind() == lltok::kw_deplibs);
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' after deplibs") ||
- ParseToken(lltok::lsquare, "expected '=' after deplibs"))
+ if (parseToken(lltok::equal, "expected '=' after deplibs") ||
+ parseToken(lltok::lsquare, "expected '=' after deplibs"))
return true;
if (EatIfPresent(lltok::rsquare))
@@ -436,31 +475,32 @@ bool LLParser::ParseDepLibs() {
do {
std::string Str;
- if (ParseStringConstant(Str)) return true;
+ if (parseStringConstant(Str))
+ return true;
} while (EatIfPresent(lltok::comma));
- return ParseToken(lltok::rsquare, "expected ']' at end of list");
+ return parseToken(lltok::rsquare, "expected ']' at end of list");
}
-/// ParseUnnamedType:
+/// parseUnnamedType:
/// ::= LocalVarID '=' 'type' type
-bool LLParser::ParseUnnamedType() {
+bool LLParser::parseUnnamedType() {
LocTy TypeLoc = Lex.getLoc();
unsigned TypeID = Lex.getUIntVal();
Lex.Lex(); // eat LocalVarID;
- if (ParseToken(lltok::equal, "expected '=' after name") ||
- ParseToken(lltok::kw_type, "expected 'type' after '='"))
+ if (parseToken(lltok::equal, "expected '=' after name") ||
+ parseToken(lltok::kw_type, "expected 'type' after '='"))
return true;
Type *Result = nullptr;
- if (ParseStructDefinition(TypeLoc, "",
- NumberedTypes[TypeID], Result)) return true;
+ if (parseStructDefinition(TypeLoc, "", NumberedTypes[TypeID], Result))
+ return true;
if (!isa<StructType>(Result)) {
std::pair<Type*, LocTy> &Entry = NumberedTypes[TypeID];
if (Entry.first)
- return Error(TypeLoc, "non-struct types may not be recursive");
+ return error(TypeLoc, "non-struct types may not be recursive");
Entry.first = Result;
Entry.second = SMLoc();
}
@@ -470,23 +510,23 @@ bool LLParser::ParseUnnamedType() {
/// toplevelentity
/// ::= LocalVar '=' 'type' type
-bool LLParser::ParseNamedType() {
+bool LLParser::parseNamedType() {
std::string Name = Lex.getStrVal();
LocTy NameLoc = Lex.getLoc();
Lex.Lex(); // eat LocalVar.
- if (ParseToken(lltok::equal, "expected '=' after name") ||
- ParseToken(lltok::kw_type, "expected 'type' after name"))
+ if (parseToken(lltok::equal, "expected '=' after name") ||
+ parseToken(lltok::kw_type, "expected 'type' after name"))
return true;
Type *Result = nullptr;
- if (ParseStructDefinition(NameLoc, Name,
- NamedTypes[Name], Result)) return true;
+ if (parseStructDefinition(NameLoc, Name, NamedTypes[Name], Result))
+ return true;
if (!isa<StructType>(Result)) {
std::pair<Type*, LocTy> &Entry = NamedTypes[Name];
if (Entry.first)
- return Error(NameLoc, "non-struct types may not be recursive");
+ return error(NameLoc, "non-struct types may not be recursive");
Entry.first = Result;
Entry.second = SMLoc();
}
@@ -496,7 +536,7 @@ bool LLParser::ParseNamedType() {
/// toplevelentity
/// ::= 'declare' FunctionHeader
-bool LLParser::ParseDeclare() {
+bool LLParser::parseDeclare() {
assert(Lex.getKind() == lltok::kw_declare);
Lex.Lex();
@@ -504,13 +544,13 @@ bool LLParser::ParseDeclare() {
while (Lex.getKind() == lltok::MetadataVar) {
unsigned MDK;
MDNode *N;
- if (ParseMetadataAttachment(MDK, N))
+ if (parseMetadataAttachment(MDK, N))
return true;
MDs.push_back({MDK, N});
}
Function *F;
- if (ParseFunctionHeader(F, false))
+ if (parseFunctionHeader(F, false))
return true;
for (auto &MD : MDs)
F->addMetadata(MD.first, *MD.second);
@@ -519,33 +559,32 @@ bool LLParser::ParseDeclare() {
/// toplevelentity
/// ::= 'define' FunctionHeader (!dbg !56)* '{' ...
-bool LLParser::ParseDefine() {
+bool LLParser::parseDefine() {
assert(Lex.getKind() == lltok::kw_define);
Lex.Lex();
Function *F;
- return ParseFunctionHeader(F, true) ||
- ParseOptionalFunctionMetadata(*F) ||
- ParseFunctionBody(*F);
+ return parseFunctionHeader(F, true) || parseOptionalFunctionMetadata(*F) ||
+ parseFunctionBody(*F);
}
-/// ParseGlobalType
+/// parseGlobalType
/// ::= 'constant'
/// ::= 'global'
-bool LLParser::ParseGlobalType(bool &IsConstant) {
+bool LLParser::parseGlobalType(bool &IsConstant) {
if (Lex.getKind() == lltok::kw_constant)
IsConstant = true;
else if (Lex.getKind() == lltok::kw_global)
IsConstant = false;
else {
IsConstant = false;
- return TokError("expected 'global' or 'constant'");
+ return tokError("expected 'global' or 'constant'");
}
Lex.Lex();
return false;
}
-bool LLParser::ParseOptionalUnnamedAddr(
+bool LLParser::parseOptionalUnnamedAddr(
GlobalVariable::UnnamedAddr &UnnamedAddr) {
if (EatIfPresent(lltok::kw_unnamed_addr))
UnnamedAddr = GlobalValue::UnnamedAddr::Global;
@@ -556,16 +595,17 @@ bool LLParser::ParseOptionalUnnamedAddr(
return false;
}
-/// ParseUnnamedGlobal:
+/// parseUnnamedGlobal:
/// OptionalVisibility (ALIAS | IFUNC) ...
/// OptionalLinkage OptionalPreemptionSpecifier OptionalVisibility
/// OptionalDLLStorageClass
/// ... -> global variable
/// GlobalID '=' OptionalVisibility (ALIAS | IFUNC) ...
-/// GlobalID '=' OptionalLinkage OptionalPreemptionSpecifier OptionalVisibility
+/// GlobalID '=' OptionalLinkage OptionalPreemptionSpecifier
+/// OptionalVisibility
/// OptionalDLLStorageClass
/// ... -> global variable
-bool LLParser::ParseUnnamedGlobal() {
+bool LLParser::parseUnnamedGlobal() {
unsigned VarID = NumberedVals.size();
std::string Name;
LocTy NameLoc = Lex.getLoc();
@@ -573,11 +613,11 @@ bool LLParser::ParseUnnamedGlobal() {
// Handle the GlobalID form.
if (Lex.getKind() == lltok::GlobalID) {
if (Lex.getUIntVal() != VarID)
- return Error(Lex.getLoc(), "variable expected to be numbered '%" +
- Twine(VarID) + "'");
+ return error(Lex.getLoc(),
+ "variable expected to be numbered '%" + Twine(VarID) + "'");
Lex.Lex(); // eat GlobalID;
- if (ParseToken(lltok::equal, "expected '=' after name"))
+ if (parseToken(lltok::equal, "expected '=' after name"))
return true;
}
@@ -586,25 +626,25 @@ bool LLParser::ParseUnnamedGlobal() {
bool DSOLocal;
GlobalVariable::ThreadLocalMode TLM;
GlobalVariable::UnnamedAddr UnnamedAddr;
- if (ParseOptionalLinkage(Linkage, HasLinkage, Visibility, DLLStorageClass,
+ if (parseOptionalLinkage(Linkage, HasLinkage, Visibility, DLLStorageClass,
DSOLocal) ||
- ParseOptionalThreadLocal(TLM) || ParseOptionalUnnamedAddr(UnnamedAddr))
+ parseOptionalThreadLocal(TLM) || parseOptionalUnnamedAddr(UnnamedAddr))
return true;
if (Lex.getKind() != lltok::kw_alias && Lex.getKind() != lltok::kw_ifunc)
- return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
+ return parseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
return parseIndirectSymbol(Name, NameLoc, Linkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
}
-/// ParseNamedGlobal:
+/// parseNamedGlobal:
/// GlobalVar '=' OptionalVisibility (ALIAS | IFUNC) ...
/// GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
/// OptionalVisibility OptionalDLLStorageClass
/// ... -> global variable
-bool LLParser::ParseNamedGlobal() {
+bool LLParser::parseNamedGlobal() {
assert(Lex.getKind() == lltok::GlobalVar);
LocTy NameLoc = Lex.getLoc();
std::string Name = Lex.getStrVal();
@@ -615,14 +655,14 @@ bool LLParser::ParseNamedGlobal() {
bool DSOLocal;
GlobalVariable::ThreadLocalMode TLM;
GlobalVariable::UnnamedAddr UnnamedAddr;
- if (ParseToken(lltok::equal, "expected '=' in global variable") ||
- ParseOptionalLinkage(Linkage, HasLinkage, Visibility, DLLStorageClass,
+ if (parseToken(lltok::equal, "expected '=' in global variable") ||
+ parseOptionalLinkage(Linkage, HasLinkage, Visibility, DLLStorageClass,
DSOLocal) ||
- ParseOptionalThreadLocal(TLM) || ParseOptionalUnnamedAddr(UnnamedAddr))
+ parseOptionalThreadLocal(TLM) || parseOptionalUnnamedAddr(UnnamedAddr))
return true;
if (Lex.getKind() != lltok::kw_alias && Lex.getKind() != lltok::kw_ifunc)
- return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
+ return parseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
return parseIndirectSymbol(Name, NameLoc, Linkage, Visibility,
@@ -635,16 +675,16 @@ bool LLParser::parseComdat() {
LocTy NameLoc = Lex.getLoc();
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' here"))
+ if (parseToken(lltok::equal, "expected '=' here"))
return true;
- if (ParseToken(lltok::kw_comdat, "expected comdat keyword"))
- return TokError("expected comdat type");
+ if (parseToken(lltok::kw_comdat, "expected comdat keyword"))
+ return tokError("expected comdat type");
Comdat::SelectionKind SK;
switch (Lex.getKind()) {
default:
- return TokError("unknown selection kind");
+ return tokError("unknown selection kind");
case lltok::kw_any:
SK = Comdat::Any;
break;
@@ -667,7 +707,7 @@ bool LLParser::parseComdat() {
Module::ComdatSymTabType &ComdatSymTab = M->getComdatSymbolTable();
Module::ComdatSymTabType::iterator I = ComdatSymTab.find(Name);
if (I != ComdatSymTab.end() && !ForwardRefComdats.erase(Name))
- return Error(NameLoc, "redefinition of comdat '$" + Name + "'");
+ return error(NameLoc, "redefinition of comdat '$" + Name + "'");
Comdat *C;
if (I != ComdatSymTab.end())
@@ -681,20 +721,21 @@ bool LLParser::parseComdat() {
// MDString:
// ::= '!' STRINGCONSTANT
-bool LLParser::ParseMDString(MDString *&Result) {
+bool LLParser::parseMDString(MDString *&Result) {
std::string Str;
- if (ParseStringConstant(Str)) return true;
+ if (parseStringConstant(Str))
+ return true;
Result = MDString::get(Context, Str);
return false;
}
// MDNode:
// ::= '!' MDNodeNumber
-bool LLParser::ParseMDNodeID(MDNode *&Result) {
+bool LLParser::parseMDNodeID(MDNode *&Result) {
// !{ ..., !42, ... }
LocTy IDLoc = Lex.getLoc();
unsigned MID = 0;
- if (ParseUInt32(MID))
+ if (parseUInt32(MID))
return true;
// If not a forward reference, just return it now.
@@ -712,61 +753,60 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
return false;
}
-/// ParseNamedMetadata:
+/// parseNamedMetadata:
/// !foo = !{ !1, !2 }
-bool LLParser::ParseNamedMetadata() {
+bool LLParser::parseNamedMetadata() {
assert(Lex.getKind() == lltok::MetadataVar);
std::string Name = Lex.getStrVal();
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' here") ||
- ParseToken(lltok::exclaim, "Expected '!' here") ||
- ParseToken(lltok::lbrace, "Expected '{' here"))
+ if (parseToken(lltok::equal, "expected '=' here") ||
+ parseToken(lltok::exclaim, "Expected '!' here") ||
+ parseToken(lltok::lbrace, "Expected '{' here"))
return true;
NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name);
if (Lex.getKind() != lltok::rbrace)
do {
MDNode *N = nullptr;
- // Parse DIExpressions inline as a special case. They are still MDNodes,
+ // parse DIExpressions inline as a special case. They are still MDNodes,
// so they can still appear in named metadata. Remove this logic if they
// become plain Metadata.
if (Lex.getKind() == lltok::MetadataVar &&
Lex.getStrVal() == "DIExpression") {
- if (ParseDIExpression(N, /*IsDistinct=*/false))
+ if (parseDIExpression(N, /*IsDistinct=*/false))
return true;
- } else if (ParseToken(lltok::exclaim, "Expected '!' here") ||
- ParseMDNodeID(N)) {
+ } else if (parseToken(lltok::exclaim, "Expected '!' here") ||
+ parseMDNodeID(N)) {
return true;
}
NMD->addOperand(N);
} while (EatIfPresent(lltok::comma));
- return ParseToken(lltok::rbrace, "expected end of metadata node");
+ return parseToken(lltok::rbrace, "expected end of metadata node");
}
-/// ParseStandaloneMetadata:
+/// parseStandaloneMetadata:
/// !42 = !{...}
-bool LLParser::ParseStandaloneMetadata() {
+bool LLParser::parseStandaloneMetadata() {
assert(Lex.getKind() == lltok::exclaim);
Lex.Lex();
unsigned MetadataID = 0;
MDNode *Init;
- if (ParseUInt32(MetadataID) ||
- ParseToken(lltok::equal, "expected '=' here"))
+ if (parseUInt32(MetadataID) || parseToken(lltok::equal, "expected '=' here"))
return true;
// Detect common error, from old metadata syntax.
if (Lex.getKind() == lltok::Type)
- return TokError("unexpected type in metadata definition");
+ return tokError("unexpected type in metadata definition");
bool IsDistinct = EatIfPresent(lltok::kw_distinct);
if (Lex.getKind() == lltok::MetadataVar) {
- if (ParseSpecializedMDNode(Init, IsDistinct))
+ if (parseSpecializedMDNode(Init, IsDistinct))
return true;
- } else if (ParseToken(lltok::exclaim, "Expected '!' here") ||
- ParseMDTuple(Init, IsDistinct))
+ } else if (parseToken(lltok::exclaim, "Expected '!' here") ||
+ parseMDTuple(Init, IsDistinct))
return true;
// See if this was forward referenced, if so, handle it.
@@ -778,7 +818,7 @@ bool LLParser::ParseStandaloneMetadata() {
assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
} else {
if (NumberedMetadata.count(MetadataID))
- return TokError("Metadata id is already used");
+ return tokError("Metadata id is already used");
NumberedMetadata[MetadataID].reset(Init);
}
@@ -786,18 +826,25 @@ bool LLParser::ParseStandaloneMetadata() {
}
// Skips a single module summary entry.
-bool LLParser::SkipModuleSummaryEntry() {
+bool LLParser::skipModuleSummaryEntry() {
// Each module summary entry consists of a tag for the entry
- // type, followed by a colon, then the fields surrounded by nested sets of
- // parentheses. The "tag:" looks like a Label. Once parsing support is
- // in place we will look for the tokens corresponding to the expected tags.
+ // type, followed by a colon, then the fields which may be surrounded by
+ // nested sets of parentheses. The "tag:" looks like a Label. Once parsing
+ // support is in place we will look for the tokens corresponding to the
+ // expected tags.
if (Lex.getKind() != lltok::kw_gv && Lex.getKind() != lltok::kw_module &&
- Lex.getKind() != lltok::kw_typeid)
- return TokError(
- "Expected 'gv', 'module', or 'typeid' at the start of summary entry");
+ Lex.getKind() != lltok::kw_typeid && Lex.getKind() != lltok::kw_flags &&
+ Lex.getKind() != lltok::kw_blockcount)
+ return tokError(
+ "Expected 'gv', 'module', 'typeid', 'flags' or 'blockcount' at the "
+ "start of summary entry");
+ if (Lex.getKind() == lltok::kw_flags)
+ return parseSummaryIndexFlags();
+ if (Lex.getKind() == lltok::kw_blockcount)
+ return parseBlockCount();
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' at start of summary entry") ||
- ParseToken(lltok::lparen, "expected '(' at start of summary entry"))
+ if (parseToken(lltok::colon, "expected ':' at start of summary entry") ||
+ parseToken(lltok::lparen, "expected '(' at start of summary entry"))
return true;
// Now walk through the parenthesized entry, until the number of open
// parentheses goes back down to 0 (the first '(' was parsed above).
@@ -811,7 +858,7 @@ bool LLParser::SkipModuleSummaryEntry() {
NumOpenParen--;
break;
case lltok::Eof:
- return TokError("found end of file while parsing summary entry");
+ return tokError("found end of file while parsing summary entry");
default:
// Skip everything in between parentheses.
break;
@@ -823,7 +870,7 @@ bool LLParser::SkipModuleSummaryEntry() {
/// SummaryEntry
/// ::= SummaryID '=' GVEntry | ModuleEntry | TypeIdEntry
-bool LLParser::ParseSummaryEntry() {
+bool LLParser::parseSummaryEntry() {
assert(Lex.getKind() == lltok::SummaryID);
unsigned SummaryID = Lex.getUIntVal();
@@ -832,35 +879,35 @@ bool LLParser::ParseSummaryEntry() {
Lex.setIgnoreColonInIdentifiers(true);
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' here"))
+ if (parseToken(lltok::equal, "expected '=' here"))
return true;
// If we don't have an index object, skip the summary entry.
if (!Index)
- return SkipModuleSummaryEntry();
+ return skipModuleSummaryEntry();
bool result = false;
switch (Lex.getKind()) {
case lltok::kw_gv:
- result = ParseGVEntry(SummaryID);
+ result = parseGVEntry(SummaryID);
break;
case lltok::kw_module:
- result = ParseModuleEntry(SummaryID);
+ result = parseModuleEntry(SummaryID);
break;
case lltok::kw_typeid:
- result = ParseTypeIdEntry(SummaryID);
+ result = parseTypeIdEntry(SummaryID);
break;
case lltok::kw_typeidCompatibleVTable:
- result = ParseTypeIdCompatibleVtableEntry(SummaryID);
+ result = parseTypeIdCompatibleVtableEntry(SummaryID);
break;
case lltok::kw_flags:
- result = ParseSummaryIndexFlags();
+ result = parseSummaryIndexFlags();
break;
case lltok::kw_blockcount:
- result = ParseBlockCount();
+ result = parseBlockCount();
break;
default:
- result = Error(Lex.getLoc(), "unexpected summary kind");
+ result = error(Lex.getLoc(), "unexpected summary kind");
break;
}
Lex.setIgnoreColonInIdentifiers(false);
@@ -910,16 +957,16 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
GlobalValue::LinkageTypes Linkage = (GlobalValue::LinkageTypes) L;
if(IsAlias && !GlobalAlias::isValidLinkage(Linkage))
- return Error(NameLoc, "invalid linkage type for alias");
+ return error(NameLoc, "invalid linkage type for alias");
if (!isValidVisibilityForLinkage(Visibility, L))
- return Error(NameLoc,
+ return error(NameLoc,
"symbol with local linkage must have default visibility");
Type *Ty;
LocTy ExplicitTypeLoc = Lex.getLoc();
- if (ParseType(Ty) ||
- ParseToken(lltok::comma, "expected comma after alias or ifunc's type"))
+ if (parseType(Ty) ||
+ parseToken(lltok::comma, "expected comma after alias or ifunc's type"))
return true;
Constant *Aliasee;
@@ -928,33 +975,31 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
Lex.getKind() != lltok::kw_getelementptr &&
Lex.getKind() != lltok::kw_addrspacecast &&
Lex.getKind() != lltok::kw_inttoptr) {
- if (ParseGlobalTypeAndValue(Aliasee))
+ if (parseGlobalTypeAndValue(Aliasee))
return true;
} else {
// The bitcast dest type is not present, it is implied by the dest type.
ValID ID;
- if (ParseValID(ID))
+ if (parseValID(ID))
return true;
if (ID.Kind != ValID::t_Constant)
- return Error(AliaseeLoc, "invalid aliasee");
+ return error(AliaseeLoc, "invalid aliasee");
Aliasee = ID.ConstantVal;
}
Type *AliaseeType = Aliasee->getType();
auto *PTy = dyn_cast<PointerType>(AliaseeType);
if (!PTy)
- return Error(AliaseeLoc, "An alias or ifunc must have pointer type");
+ return error(AliaseeLoc, "An alias or ifunc must have pointer type");
unsigned AddrSpace = PTy->getAddressSpace();
if (IsAlias && Ty != PTy->getElementType())
- return Error(
- ExplicitTypeLoc,
- "explicit pointee type doesn't match operand's pointee type");
+ return error(ExplicitTypeLoc,
+ "explicit pointee type doesn't match operand's pointee type");
if (!IsAlias && !PTy->getElementType()->isFunctionTy())
- return Error(
- ExplicitTypeLoc,
- "explicit pointee type should be a function type");
+ return error(ExplicitTypeLoc,
+ "explicit pointee type should be a function type");
GlobalValue *GVal = nullptr;
@@ -964,7 +1009,7 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
GVal = M->getNamedValue(Name);
if (GVal) {
if (!ForwardRefVals.erase(Name))
- return Error(NameLoc, "redefinition of global '@" + Name + "'");
+ return error(NameLoc, "redefinition of global '@" + Name + "'");
}
} else {
auto I = ForwardRefValIDs.find(NumberedVals.size());
@@ -998,10 +1043,10 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
if (Lex.getKind() == lltok::kw_partition) {
Lex.Lex();
GA->setPartition(Lex.getStrVal());
- if (ParseToken(lltok::StringConstant, "expected partition string"))
+ if (parseToken(lltok::StringConstant, "expected partition string"))
return true;
} else {
- return TokError("unknown alias or ifunc property!");
+ return tokError("unknown alias or ifunc property!");
}
}
@@ -1011,7 +1056,7 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
if (GVal) {
// Verify that types agree.
if (GVal->getType() != GA->getType())
- return Error(
+ return error(
ExplicitTypeLoc,
"forward reference and definition of alias have different types");
@@ -1034,7 +1079,7 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
return false;
}
-/// ParseGlobal
+/// parseGlobal
/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
/// OptionalVisibility OptionalDLLStorageClass
/// OptionalThreadLocal OptionalUnnamedAddr OptionalAddrSpace
@@ -1047,13 +1092,13 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
/// Everything up to and including OptionalUnnamedAddr has been parsed
/// already.
///
-bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
+bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc,
unsigned Linkage, bool HasLinkage,
unsigned Visibility, unsigned DLLStorageClass,
bool DSOLocal, GlobalVariable::ThreadLocalMode TLM,
GlobalVariable::UnnamedAddr UnnamedAddr) {
if (!isValidVisibilityForLinkage(Visibility, Linkage))
- return Error(NameLoc,
+ return error(NameLoc,
"symbol with local linkage must have default visibility");
unsigned AddrSpace;
@@ -1062,12 +1107,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
LocTy TyLoc;
Type *Ty = nullptr;
- if (ParseOptionalAddrSpace(AddrSpace) ||
- ParseOptionalToken(lltok::kw_externally_initialized,
+ if (parseOptionalAddrSpace(AddrSpace) ||
+ parseOptionalToken(lltok::kw_externally_initialized,
IsExternallyInitialized,
&IsExternallyInitializedLoc) ||
- ParseGlobalType(IsConstant) ||
- ParseType(Ty, TyLoc))
+ parseGlobalType(IsConstant) || parseType(Ty, TyLoc))
return true;
// If the linkage is specified and is external, then no initializer is
@@ -1076,12 +1120,12 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
if (!HasLinkage ||
!GlobalValue::isValidDeclarationLinkage(
(GlobalValue::LinkageTypes)Linkage)) {
- if (ParseGlobalValue(Ty, Init))
+ if (parseGlobalValue(Ty, Init))
return true;
}
if (Ty->isFunctionTy() || !PointerType::isValidElementType(Ty))
- return Error(TyLoc, "invalid type for global variable");
+ return error(TyLoc, "invalid type for global variable");
GlobalValue *GVal = nullptr;
@@ -1090,7 +1134,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
GVal = M->getNamedValue(Name);
if (GVal) {
if (!ForwardRefVals.erase(Name))
- return Error(NameLoc, "redefinition of global '@" + Name + "'");
+ return error(NameLoc, "redefinition of global '@" + Name + "'");
}
} else {
auto I = ForwardRefValIDs.find(NumberedVals.size());
@@ -1107,8 +1151,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
AddrSpace);
} else {
if (GVal->getValueType() != Ty)
- return Error(TyLoc,
- "forward reference and definition of global have different types");
+ return error(
+ TyLoc,
+ "forward reference and definition of global have different types");
GV = cast<GlobalVariable>(GVal);
@@ -1131,26 +1176,27 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
GV->setThreadLocalMode(TLM);
GV->setUnnamedAddr(UnnamedAddr);
- // Parse attributes on the global.
+ // parse attributes on the global.
while (Lex.getKind() == lltok::comma) {
Lex.Lex();
if (Lex.getKind() == lltok::kw_section) {
Lex.Lex();
GV->setSection(Lex.getStrVal());
- if (ParseToken(lltok::StringConstant, "expected global section string"))
+ if (parseToken(lltok::StringConstant, "expected global section string"))
return true;
} else if (Lex.getKind() == lltok::kw_partition) {
Lex.Lex();
GV->setPartition(Lex.getStrVal());
- if (ParseToken(lltok::StringConstant, "expected partition string"))
+ if (parseToken(lltok::StringConstant, "expected partition string"))
return true;
} else if (Lex.getKind() == lltok::kw_align) {
MaybeAlign Alignment;
- if (ParseOptionalAlignment(Alignment)) return true;
+ if (parseOptionalAlignment(Alignment))
+ return true;
GV->setAlignment(Alignment);
} else if (Lex.getKind() == lltok::MetadataVar) {
- if (ParseGlobalObjectMetadataAttachment(*GV))
+ if (parseGlobalObjectMetadataAttachment(*GV))
return true;
} else {
Comdat *C;
@@ -1159,14 +1205,14 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
if (C)
GV->setComdat(C);
else
- return TokError("unknown global variable property!");
+ return tokError("unknown global variable property!");
}
}
AttrBuilder Attrs;
LocTy BuiltinLoc;
std::vector<unsigned> FwdRefAttrGrps;
- if (ParseFnAttributeValuePairs(Attrs, FwdRefAttrGrps, false, BuiltinLoc))
+ if (parseFnAttributeValuePairs(Attrs, FwdRefAttrGrps, false, BuiltinLoc))
return true;
if (Attrs.hasAttributes() || !FwdRefAttrGrps.empty()) {
GV->setAttributes(AttributeSet::get(Context, Attrs));
@@ -1176,37 +1222,37 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
return false;
}
-/// ParseUnnamedAttrGrp
+/// parseUnnamedAttrGrp
/// ::= 'attributes' AttrGrpID '=' '{' AttrValPair+ '}'
-bool LLParser::ParseUnnamedAttrGrp() {
+bool LLParser::parseUnnamedAttrGrp() {
assert(Lex.getKind() == lltok::kw_attributes);
LocTy AttrGrpLoc = Lex.getLoc();
Lex.Lex();
if (Lex.getKind() != lltok::AttrGrpID)
- return TokError("expected attribute group id");
+ return tokError("expected attribute group id");
unsigned VarID = Lex.getUIntVal();
std::vector<unsigned> unused;
LocTy BuiltinLoc;
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' here") ||
- ParseToken(lltok::lbrace, "expected '{' here") ||
- ParseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true,
+ if (parseToken(lltok::equal, "expected '=' here") ||
+ parseToken(lltok::lbrace, "expected '{' here") ||
+ parseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true,
BuiltinLoc) ||
- ParseToken(lltok::rbrace, "expected end of attribute group"))
+ parseToken(lltok::rbrace, "expected end of attribute group"))
return true;
if (!NumberedAttrBuilders[VarID].hasAttributes())
- return Error(AttrGrpLoc, "attribute group has no attributes");
+ return error(AttrGrpLoc, "attribute group has no attributes");
return false;
}
-/// ParseFnAttributeValuePairs
+/// parseFnAttributeValuePairs
/// ::= <attr> | <attr> '=' <value>
-bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
+bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B,
std::vector<unsigned> &FwdRefAttrGrps,
bool inAttrGrp, LocTy &BuiltinLoc) {
bool HaveError = false;
@@ -1220,7 +1266,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
switch (Token) {
default:
if (!inAttrGrp) return HaveError;
- return Error(Lex.getLoc(), "unterminated attribute group");
+ return error(Lex.getLoc(), "unterminated attribute group");
case lltok::rbrace:
// Finished.
return false;
@@ -1230,9 +1276,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
//
// define void @foo() #1 { ... }
if (inAttrGrp)
- HaveError |=
- Error(Lex.getLoc(),
- "cannot have an attribute group reference in an attribute group");
+ HaveError |= error(
+ Lex.getLoc(),
+ "cannot have an attribute group reference in an attribute group");
unsigned AttrGrpNum = Lex.getUIntVal();
if (inAttrGrp) break;
@@ -1243,7 +1289,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
}
// Target-dependent attributes:
case lltok::StringConstant: {
- if (ParseStringAttribute(B))
+ if (parseStringAttribute(B))
return true;
continue;
}
@@ -1257,11 +1303,11 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
if (inAttrGrp) {
Lex.Lex();
uint32_t Value = 0;
- if (ParseToken(lltok::equal, "expected '=' here") || ParseUInt32(Value))
+ if (parseToken(lltok::equal, "expected '=' here") || parseUInt32(Value))
return true;
Alignment = Align(Value);
} else {
- if (ParseOptionalAlignment(Alignment))
+ if (parseOptionalAlignment(Alignment))
return true;
}
B.addAlignmentAttr(Alignment);
@@ -1271,11 +1317,11 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
unsigned Alignment;
if (inAttrGrp) {
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' here") ||
- ParseUInt32(Alignment))
+ if (parseToken(lltok::equal, "expected '=' here") ||
+ parseUInt32(Alignment))
return true;
} else {
- if (ParseOptionalStackAlignment(Alignment))
+ if (parseOptionalStackAlignment(Alignment))
return true;
}
B.addStackAlignmentAttr(Alignment);
@@ -1294,6 +1340,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_argmemonly: B.addAttribute(Attribute::ArgMemOnly); break;
case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break;
case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
+ case lltok::kw_hot: B.addAttribute(Attribute::Hot); break;
case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break;
case lltok::kw_inaccessiblememonly:
B.addAttribute(Attribute::InaccessibleMemOnly); break;
@@ -1302,8 +1349,14 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break;
case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
+ case lltok::kw_mustprogress:
+ B.addAttribute(Attribute::MustProgress);
+ break;
case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break;
+ case lltok::kw_nocallback:
+ B.addAttribute(Attribute::NoCallback);
+ break;
case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break;
case lltok::kw_nofree: B.addAttribute(Attribute::NoFree); break;
case lltok::kw_noimplicitfloat:
@@ -1315,6 +1368,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
case lltok::kw_nosync: B.addAttribute(Attribute::NoSync); break;
case lltok::kw_nocf_check: B.addAttribute(Attribute::NoCfCheck); break;
+ case lltok::kw_noprofile: B.addAttribute(Attribute::NoProfile); break;
case lltok::kw_norecurse: B.addAttribute(Attribute::NoRecurse); break;
case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
case lltok::kw_null_pointer_is_valid:
@@ -1354,19 +1408,18 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break;
case lltok::kw_preallocated: {
Type *Ty;
- if (ParsePreallocated(Ty))
+ if (parsePreallocated(Ty))
return true;
B.addPreallocatedAttr(Ty);
break;
}
- // Error handling.
+ // error handling.
case lltok::kw_inreg:
case lltok::kw_signext:
case lltok::kw_zeroext:
HaveError |=
- Error(Lex.getLoc(),
- "invalid use of attribute on a function");
+ error(Lex.getLoc(), "invalid use of attribute on a function");
break;
case lltok::kw_byval:
case lltok::kw_dereferenceable:
@@ -1382,13 +1435,14 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_swifterror:
case lltok::kw_swiftself:
case lltok::kw_immarg:
+ case lltok::kw_byref:
HaveError |=
- Error(Lex.getLoc(),
- "invalid use of parameter-only attribute on a function");
+ error(Lex.getLoc(),
+ "invalid use of parameter-only attribute on a function");
break;
}
- // ParsePreallocated() consumes token
+ // parsePreallocated() consumes token
if (Token != lltok::kw_preallocated)
Lex.Lex();
}
@@ -1424,22 +1478,22 @@ Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
return Val;
}
if (Ty->isLabelTy())
- Error(Loc, "'" + Name + "' is not a basic block");
+ error(Loc, "'" + Name + "' is not a basic block");
else
- Error(Loc, "'" + Name + "' defined with type '" +
+ error(Loc, "'" + Name + "' defined with type '" +
getTypeString(Val->getType()) + "' but expected '" +
getTypeString(SuggestedTy) + "'");
return nullptr;
}
-/// GetGlobalVal - Get a value with the specified name or ID, creating a
+/// getGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
-GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
+GlobalValue *LLParser::getGlobalVal(const std::string &Name, Type *Ty,
LocTy Loc, bool IsCall) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
if (!PTy) {
- Error(Loc, "global variable reference must have pointer type");
+ error(Loc, "global variable reference must have pointer type");
return nullptr;
}
@@ -1466,11 +1520,11 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
return FwdVal;
}
-GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc,
+GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc,
bool IsCall) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
if (!PTy) {
- Error(Loc, "global variable reference must have pointer type");
+ error(Loc, "global variable reference must have pointer type");
return nullptr;
}
@@ -1516,56 +1570,56 @@ Comdat *LLParser::getComdat(const std::string &Name, LocTy Loc) {
// Helper Routines.
//===----------------------------------------------------------------------===//
-/// ParseToken - If the current token has the specified kind, eat it and return
+/// parseToken - If the current token has the specified kind, eat it and return
/// success. Otherwise, emit the specified error and return failure.
-bool LLParser::ParseToken(lltok::Kind T, const char *ErrMsg) {
+bool LLParser::parseToken(lltok::Kind T, const char *ErrMsg) {
if (Lex.getKind() != T)
- return TokError(ErrMsg);
+ return tokError(ErrMsg);
Lex.Lex();
return false;
}
-/// ParseStringConstant
+/// parseStringConstant
/// ::= StringConstant
-bool LLParser::ParseStringConstant(std::string &Result) {
+bool LLParser::parseStringConstant(std::string &Result) {
if (Lex.getKind() != lltok::StringConstant)
- return TokError("expected string constant");
+ return tokError("expected string constant");
Result = Lex.getStrVal();
Lex.Lex();
return false;
}
-/// ParseUInt32
+/// parseUInt32
/// ::= uint32
-bool LLParser::ParseUInt32(uint32_t &Val) {
+bool LLParser::parseUInt32(uint32_t &Val) {
if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
- return TokError("expected integer");
+ return tokError("expected integer");
uint64_t Val64 = Lex.getAPSIntVal().getLimitedValue(0xFFFFFFFFULL+1);
if (Val64 != unsigned(Val64))
- return TokError("expected 32-bit integer (too large)");
+ return tokError("expected 32-bit integer (too large)");
Val = Val64;
Lex.Lex();
return false;
}
-/// ParseUInt64
+/// parseUInt64
/// ::= uint64
-bool LLParser::ParseUInt64(uint64_t &Val) {
+bool LLParser::parseUInt64(uint64_t &Val) {
if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
- return TokError("expected integer");
+ return tokError("expected integer");
Val = Lex.getAPSIntVal().getLimitedValue();
Lex.Lex();
return false;
}
-/// ParseTLSModel
+/// parseTLSModel
/// := 'localdynamic'
/// := 'initialexec'
/// := 'localexec'
-bool LLParser::ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM) {
+bool LLParser::parseTLSModel(GlobalVariable::ThreadLocalMode &TLM) {
switch (Lex.getKind()) {
default:
- return TokError("expected localdynamic, initialexec or localexec");
+ return tokError("expected localdynamic, initialexec or localexec");
case lltok::kw_localdynamic:
TLM = GlobalVariable::LocalDynamicTLSModel;
break;
@@ -1581,11 +1635,11 @@ bool LLParser::ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM) {
return false;
}
-/// ParseOptionalThreadLocal
+/// parseOptionalThreadLocal
/// := /*empty*/
/// := 'thread_local'
/// := 'thread_local' '(' tlsmodel ')'
-bool LLParser::ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM) {
+bool LLParser::parseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM) {
TLM = GlobalVariable::NotThreadLocal;
if (!EatIfPresent(lltok::kw_thread_local))
return false;
@@ -1593,39 +1647,40 @@ bool LLParser::ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM) {
TLM = GlobalVariable::GeneralDynamicTLSModel;
if (Lex.getKind() == lltok::lparen) {
Lex.Lex();
- return ParseTLSModel(TLM) ||
- ParseToken(lltok::rparen, "expected ')' after thread local model");
+ return parseTLSModel(TLM) ||
+ parseToken(lltok::rparen, "expected ')' after thread local model");
}
return false;
}
-/// ParseOptionalAddrSpace
+/// parseOptionalAddrSpace
/// := /*empty*/
/// := 'addrspace' '(' uint32 ')'
-bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS) {
+bool LLParser::parseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS) {
AddrSpace = DefaultAS;
if (!EatIfPresent(lltok::kw_addrspace))
return false;
- return ParseToken(lltok::lparen, "expected '(' in address space") ||
- ParseUInt32(AddrSpace) ||
- ParseToken(lltok::rparen, "expected ')' in address space");
+ return parseToken(lltok::lparen, "expected '(' in address space") ||
+ parseUInt32(AddrSpace) ||
+ parseToken(lltok::rparen, "expected ')' in address space");
}
-/// ParseStringAttribute
+/// parseStringAttribute
/// := StringConstant
/// := StringConstant '=' StringConstant
-bool LLParser::ParseStringAttribute(AttrBuilder &B) {
+bool LLParser::parseStringAttribute(AttrBuilder &B) {
std::string Attr = Lex.getStrVal();
Lex.Lex();
std::string Val;
- if (EatIfPresent(lltok::equal) && ParseStringConstant(Val))
+ if (EatIfPresent(lltok::equal) && parseStringConstant(Val))
return true;
B.addAttribute(Attr, Val);
return false;
}
-/// ParseOptionalParamAttrs - Parse a potentially empty list of parameter attributes.
-bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
+/// parseOptionalParamAttrs - parse a potentially empty list of parameter
+/// attributes.
+bool LLParser::parseOptionalParamAttrs(AttrBuilder &B) {
bool HaveError = false;
B.clear();
@@ -1636,45 +1691,59 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
default: // End of attributes.
return HaveError;
case lltok::StringConstant: {
- if (ParseStringAttribute(B))
+ if (parseStringAttribute(B))
return true;
continue;
}
case lltok::kw_align: {
MaybeAlign Alignment;
- if (ParseOptionalAlignment(Alignment, true))
+ if (parseOptionalAlignment(Alignment, true))
return true;
B.addAlignmentAttr(Alignment);
continue;
}
case lltok::kw_byval: {
Type *Ty;
- if (ParseByValWithOptionalType(Ty))
+ if (parseRequiredTypeAttr(Ty, lltok::kw_byval))
return true;
B.addByValAttr(Ty);
continue;
}
+ case lltok::kw_sret: {
+ Type *Ty;
+ if (parseRequiredTypeAttr(Ty, lltok::kw_sret))
+ return true;
+ B.addStructRetAttr(Ty);
+ continue;
+ }
case lltok::kw_preallocated: {
Type *Ty;
- if (ParsePreallocated(Ty))
+ if (parsePreallocated(Ty))
return true;
B.addPreallocatedAttr(Ty);
continue;
}
case lltok::kw_dereferenceable: {
uint64_t Bytes;
- if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
+ if (parseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
return true;
B.addDereferenceableAttr(Bytes);
continue;
}
case lltok::kw_dereferenceable_or_null: {
uint64_t Bytes;
- if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
+ if (parseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
return true;
B.addDereferenceableOrNullAttr(Bytes);
continue;
}
+ case lltok::kw_byref: {
+ Type *Ty;
+ if (parseByRef(Ty))
+ return true;
+ B.addByRefAttr(Ty);
+ continue;
+ }
case lltok::kw_inalloca: B.addAttribute(Attribute::InAlloca); break;
case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break;
case lltok::kw_nest: B.addAttribute(Attribute::Nest); break;
@@ -1689,7 +1758,6 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break;
case lltok::kw_returned: B.addAttribute(Attribute::Returned); break;
case lltok::kw_signext: B.addAttribute(Attribute::SExt); break;
- case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break;
case lltok::kw_swifterror: B.addAttribute(Attribute::SwiftError); break;
case lltok::kw_swiftself: B.addAttribute(Attribute::SwiftSelf); break;
case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break;
@@ -1703,6 +1771,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_inlinehint:
case lltok::kw_jumptable:
case lltok::kw_minsize:
+ case lltok::kw_mustprogress:
case lltok::kw_naked:
case lltok::kw_nobuiltin:
case lltok::kw_noduplicate:
@@ -1710,6 +1779,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_noinline:
case lltok::kw_nonlazybind:
case lltok::kw_nomerge:
+ case lltok::kw_noprofile:
case lltok::kw_noredzone:
case lltok::kw_noreturn:
case lltok::kw_nocf_check:
@@ -1731,7 +1801,8 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_shadowcallstack:
case lltok::kw_strictfp:
case lltok::kw_uwtable:
- HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
+ HaveError |=
+ error(Lex.getLoc(), "invalid use of function-only attribute");
break;
}
@@ -1739,8 +1810,9 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
}
}
-/// ParseOptionalReturnAttrs - Parse a potentially empty list of return attributes.
-bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
+/// parseOptionalReturnAttrs - parse a potentially empty list of return
+/// attributes.
+bool LLParser::parseOptionalReturnAttrs(AttrBuilder &B) {
bool HaveError = false;
B.clear();
@@ -1751,27 +1823,27 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
default: // End of attributes.
return HaveError;
case lltok::StringConstant: {
- if (ParseStringAttribute(B))
+ if (parseStringAttribute(B))
return true;
continue;
}
case lltok::kw_dereferenceable: {
uint64_t Bytes;
- if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
+ if (parseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
return true;
B.addDereferenceableAttr(Bytes);
continue;
}
case lltok::kw_dereferenceable_or_null: {
uint64_t Bytes;
- if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
+ if (parseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
return true;
B.addDereferenceableOrNullAttr(Bytes);
continue;
}
case lltok::kw_align: {
MaybeAlign Alignment;
- if (ParseOptionalAlignment(Alignment))
+ if (parseOptionalAlignment(Alignment))
return true;
B.addAlignmentAttr(Alignment);
continue;
@@ -1785,7 +1857,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_signext: B.addAttribute(Attribute::SExt); break;
case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break;
- // Error handling.
+ // error handling.
case lltok::kw_byval:
case lltok::kw_inalloca:
case lltok::kw_nest:
@@ -1795,7 +1867,9 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_swifterror:
case lltok::kw_swiftself:
case lltok::kw_immarg:
- HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
+ case lltok::kw_byref:
+ HaveError |=
+ error(Lex.getLoc(), "invalid use of parameter-only attribute");
break;
case lltok::kw_alignstack:
@@ -1806,6 +1880,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_inlinehint:
case lltok::kw_jumptable:
case lltok::kw_minsize:
+ case lltok::kw_mustprogress:
case lltok::kw_naked:
case lltok::kw_nobuiltin:
case lltok::kw_noduplicate:
@@ -1813,6 +1888,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_noinline:
case lltok::kw_nonlazybind:
case lltok::kw_nomerge:
+ case lltok::kw_noprofile:
case lltok::kw_noredzone:
case lltok::kw_noreturn:
case lltok::kw_nocf_check:
@@ -1834,15 +1910,17 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_shadowcallstack:
case lltok::kw_strictfp:
case lltok::kw_uwtable:
- HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
+ HaveError |=
+ error(Lex.getLoc(), "invalid use of function-only attribute");
break;
case lltok::kw_readnone:
case lltok::kw_readonly:
- HaveError |= Error(Lex.getLoc(), "invalid use of attribute on return type");
+ HaveError |=
+ error(Lex.getLoc(), "invalid use of attribute on return type");
break;
case lltok::kw_preallocated:
HaveError |=
- Error(Lex.getLoc(),
+ error(Lex.getLoc(),
"invalid use of parameter-only/call site-only attribute");
break;
}
@@ -1882,7 +1960,7 @@ static unsigned parseOptionalLinkageAux(lltok::Kind Kind, bool &HasLinkage) {
}
}
-/// ParseOptionalLinkage
+/// parseOptionalLinkage
/// ::= /*empty*/
/// ::= 'private'
/// ::= 'internal'
@@ -1895,25 +1973,24 @@ static unsigned parseOptionalLinkageAux(lltok::Kind Kind, bool &HasLinkage) {
/// ::= 'common'
/// ::= 'extern_weak'
/// ::= 'external'
-bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage,
+bool LLParser::parseOptionalLinkage(unsigned &Res, bool &HasLinkage,
unsigned &Visibility,
- unsigned &DLLStorageClass,
- bool &DSOLocal) {
+ unsigned &DLLStorageClass, bool &DSOLocal) {
Res = parseOptionalLinkageAux(Lex.getKind(), HasLinkage);
if (HasLinkage)
Lex.Lex();
- ParseOptionalDSOLocal(DSOLocal);
- ParseOptionalVisibility(Visibility);
- ParseOptionalDLLStorageClass(DLLStorageClass);
+ parseOptionalDSOLocal(DSOLocal);
+ parseOptionalVisibility(Visibility);
+ parseOptionalDLLStorageClass(DLLStorageClass);
if (DSOLocal && DLLStorageClass == GlobalValue::DLLImportStorageClass) {
- return Error(Lex.getLoc(), "dso_location and DLL-StorageClass mismatch");
+ return error(Lex.getLoc(), "dso_location and DLL-StorageClass mismatch");
}
return false;
}
-void LLParser::ParseOptionalDSOLocal(bool &DSOLocal) {
+void LLParser::parseOptionalDSOLocal(bool &DSOLocal) {
switch (Lex.getKind()) {
default:
DSOLocal = false;
@@ -1929,13 +2006,13 @@ void LLParser::ParseOptionalDSOLocal(bool &DSOLocal) {
}
}
-/// ParseOptionalVisibility
+/// parseOptionalVisibility
/// ::= /*empty*/
/// ::= 'default'
/// ::= 'hidden'
/// ::= 'protected'
///
-void LLParser::ParseOptionalVisibility(unsigned &Res) {
+void LLParser::parseOptionalVisibility(unsigned &Res) {
switch (Lex.getKind()) {
default:
Res = GlobalValue::DefaultVisibility;
@@ -1953,12 +2030,12 @@ void LLParser::ParseOptionalVisibility(unsigned &Res) {
Lex.Lex();
}
-/// ParseOptionalDLLStorageClass
+/// parseOptionalDLLStorageClass
/// ::= /*empty*/
/// ::= 'dllimport'
/// ::= 'dllexport'
///
-void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
+void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
switch (Lex.getKind()) {
default:
Res = GlobalValue::DefaultStorageClass;
@@ -1973,7 +2050,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
Lex.Lex();
}
-/// ParseOptionalCallingConv
+/// parseOptionalCallingConv
/// ::= /*empty*/
/// ::= 'ccc'
/// ::= 'fastcc'
@@ -2019,7 +2096,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'tailcc'
/// ::= 'cc' UINT
///
-bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
+bool LLParser::parseOptionalCallingConv(unsigned &CC) {
switch (Lex.getKind()) {
default: CC = CallingConv::C; return false;
case lltok::kw_ccc: CC = CallingConv::C; break;
@@ -2059,6 +2136,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break;
case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break;
case lltok::kw_amdgpu_vs: CC = CallingConv::AMDGPU_VS; break;
+ case lltok::kw_amdgpu_gfx: CC = CallingConv::AMDGPU_Gfx; break;
case lltok::kw_amdgpu_ls: CC = CallingConv::AMDGPU_LS; break;
case lltok::kw_amdgpu_hs: CC = CallingConv::AMDGPU_HS; break;
case lltok::kw_amdgpu_es: CC = CallingConv::AMDGPU_ES; break;
@@ -2069,7 +2147,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
case lltok::kw_cc: {
Lex.Lex();
- return ParseUInt32(CC);
+ return parseUInt32(CC);
}
}
@@ -2077,28 +2155,28 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
return false;
}
-/// ParseMetadataAttachment
+/// parseMetadataAttachment
/// ::= !dbg !42
-bool LLParser::ParseMetadataAttachment(unsigned &Kind, MDNode *&MD) {
+bool LLParser::parseMetadataAttachment(unsigned &Kind, MDNode *&MD) {
assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata attachment");
std::string Name = Lex.getStrVal();
Kind = M->getMDKindID(Name);
Lex.Lex();
- return ParseMDNode(MD);
+ return parseMDNode(MD);
}
-/// ParseInstructionMetadata
+/// parseInstructionMetadata
/// ::= !dbg !42 (',' !dbg !57)*
-bool LLParser::ParseInstructionMetadata(Instruction &Inst) {
+bool LLParser::parseInstructionMetadata(Instruction &Inst) {
do {
if (Lex.getKind() != lltok::MetadataVar)
- return TokError("expected metadata after comma");
+ return tokError("expected metadata after comma");
unsigned MDK;
MDNode *N;
- if (ParseMetadataAttachment(MDK, N))
+ if (parseMetadataAttachment(MDK, N))
return true;
Inst.setMetadata(MDK, N);
@@ -2110,31 +2188,31 @@ bool LLParser::ParseInstructionMetadata(Instruction &Inst) {
return false;
}
-/// ParseGlobalObjectMetadataAttachment
+/// parseGlobalObjectMetadataAttachment
/// ::= !dbg !57
-bool LLParser::ParseGlobalObjectMetadataAttachment(GlobalObject &GO) {
+bool LLParser::parseGlobalObjectMetadataAttachment(GlobalObject &GO) {
unsigned MDK;
MDNode *N;
- if (ParseMetadataAttachment(MDK, N))
+ if (parseMetadataAttachment(MDK, N))
return true;
GO.addMetadata(MDK, *N);
return false;
}
-/// ParseOptionalFunctionMetadata
+/// parseOptionalFunctionMetadata
/// ::= (!dbg !57)*
-bool LLParser::ParseOptionalFunctionMetadata(Function &F) {
+bool LLParser::parseOptionalFunctionMetadata(Function &F) {
while (Lex.getKind() == lltok::MetadataVar)
- if (ParseGlobalObjectMetadataAttachment(F))
+ if (parseGlobalObjectMetadataAttachment(F))
return true;
return false;
}
-/// ParseOptionalAlignment
+/// parseOptionalAlignment
/// ::= /* empty */
/// ::= 'align' 4
-bool LLParser::ParseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) {
+bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) {
Alignment = None;
if (!EatIfPresent(lltok::kw_align))
return false;
@@ -2148,26 +2226,26 @@ bool LLParser::ParseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) {
HaveParens = true;
}
- if (ParseUInt32(Value))
+ if (parseUInt32(Value))
return true;
if (HaveParens && !EatIfPresent(lltok::rparen))
- return Error(ParenLoc, "expected ')'");
+ return error(ParenLoc, "expected ')'");
if (!isPowerOf2_32(Value))
- return Error(AlignLoc, "alignment is not a power of two");
+ return error(AlignLoc, "alignment is not a power of two");
if (Value > Value::MaximumAlignment)
- return Error(AlignLoc, "huge alignments are not supported yet");
+ return error(AlignLoc, "huge alignments are not supported yet");
Alignment = Align(Value);
return false;
}
-/// ParseOptionalDerefAttrBytes
+/// parseOptionalDerefAttrBytes
/// ::= /* empty */
/// ::= AttrKind '(' 4 ')'
///
/// where AttrKind is either 'dereferenceable' or 'dereferenceable_or_null'.
-bool LLParser::ParseOptionalDerefAttrBytes(lltok::Kind AttrKind,
+bool LLParser::parseOptionalDerefAttrBytes(lltok::Kind AttrKind,
uint64_t &Bytes) {
assert((AttrKind == lltok::kw_dereferenceable ||
AttrKind == lltok::kw_dereferenceable_or_null) &&
@@ -2178,24 +2256,25 @@ bool LLParser::ParseOptionalDerefAttrBytes(lltok::Kind AttrKind,
return false;
LocTy ParenLoc = Lex.getLoc();
if (!EatIfPresent(lltok::lparen))
- return Error(ParenLoc, "expected '('");
+ return error(ParenLoc, "expected '('");
LocTy DerefLoc = Lex.getLoc();
- if (ParseUInt64(Bytes)) return true;
+ if (parseUInt64(Bytes))
+ return true;
ParenLoc = Lex.getLoc();
if (!EatIfPresent(lltok::rparen))
- return Error(ParenLoc, "expected ')'");
+ return error(ParenLoc, "expected ')'");
if (!Bytes)
- return Error(DerefLoc, "dereferenceable bytes must be non-zero");
+ return error(DerefLoc, "dereferenceable bytes must be non-zero");
return false;
}
-/// ParseOptionalCommaAlign
+/// parseOptionalCommaAlign
/// ::=
/// ::= ',' align 4
///
/// This returns with AteExtraComma set to true if it ate an excess comma at the
/// end.
-bool LLParser::ParseOptionalCommaAlign(MaybeAlign &Alignment,
+bool LLParser::parseOptionalCommaAlign(MaybeAlign &Alignment,
bool &AteExtraComma) {
AteExtraComma = false;
while (EatIfPresent(lltok::comma)) {
@@ -2206,22 +2285,22 @@ bool LLParser::ParseOptionalCommaAlign(MaybeAlign &Alignment,
}
if (Lex.getKind() != lltok::kw_align)
- return Error(Lex.getLoc(), "expected metadata or 'align'");
+ return error(Lex.getLoc(), "expected metadata or 'align'");
- if (ParseOptionalAlignment(Alignment)) return true;
+ if (parseOptionalAlignment(Alignment))
+ return true;
}
return false;
}
-/// ParseOptionalCommaAddrSpace
+/// parseOptionalCommaAddrSpace
/// ::=
/// ::= ',' addrspace(1)
///
/// This returns with AteExtraComma set to true if it ate an excess comma at the
/// end.
-bool LLParser::ParseOptionalCommaAddrSpace(unsigned &AddrSpace,
- LocTy &Loc,
+bool LLParser::parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc,
bool &AteExtraComma) {
AteExtraComma = false;
while (EatIfPresent(lltok::comma)) {
@@ -2233,9 +2312,9 @@ bool LLParser::ParseOptionalCommaAddrSpace(unsigned &AddrSpace,
Loc = Lex.getLoc();
if (Lex.getKind() != lltok::kw_addrspace)
- return Error(Lex.getLoc(), "expected metadata or 'addrspace'");
+ return error(Lex.getLoc(), "expected metadata or 'addrspace'");
- if (ParseOptionalAddrSpace(AddrSpace))
+ if (parseOptionalAddrSpace(AddrSpace))
return true;
}
@@ -2248,18 +2327,18 @@ bool LLParser::parseAllocSizeArguments(unsigned &BaseSizeArg,
auto StartParen = Lex.getLoc();
if (!EatIfPresent(lltok::lparen))
- return Error(StartParen, "expected '('");
+ return error(StartParen, "expected '('");
- if (ParseUInt32(BaseSizeArg))
+ if (parseUInt32(BaseSizeArg))
return true;
if (EatIfPresent(lltok::comma)) {
auto HowManyAt = Lex.getLoc();
unsigned HowMany;
- if (ParseUInt32(HowMany))
+ if (parseUInt32(HowMany))
return true;
if (HowMany == BaseSizeArg)
- return Error(HowManyAt,
+ return error(HowManyAt,
"'allocsize' indices can't refer to the same parameter");
HowManyArg = HowMany;
} else
@@ -2267,42 +2346,42 @@ bool LLParser::parseAllocSizeArguments(unsigned &BaseSizeArg,
auto EndParen = Lex.getLoc();
if (!EatIfPresent(lltok::rparen))
- return Error(EndParen, "expected ')'");
+ return error(EndParen, "expected ')'");
return false;
}
-/// ParseScopeAndOrdering
+/// parseScopeAndOrdering
/// if isAtomic: ::= SyncScope? AtomicOrdering
/// else: ::=
///
/// This sets Scope and Ordering to the parsed values.
-bool LLParser::ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID,
+bool LLParser::parseScopeAndOrdering(bool IsAtomic, SyncScope::ID &SSID,
AtomicOrdering &Ordering) {
- if (!isAtomic)
+ if (!IsAtomic)
return false;
- return ParseScope(SSID) || ParseOrdering(Ordering);
+ return parseScope(SSID) || parseOrdering(Ordering);
}
-/// ParseScope
+/// parseScope
/// ::= syncscope("singlethread" | "<target scope>")?
///
/// This sets synchronization scope ID to the ID of the parsed value.
-bool LLParser::ParseScope(SyncScope::ID &SSID) {
+bool LLParser::parseScope(SyncScope::ID &SSID) {
SSID = SyncScope::System;
if (EatIfPresent(lltok::kw_syncscope)) {
auto StartParenAt = Lex.getLoc();
if (!EatIfPresent(lltok::lparen))
- return Error(StartParenAt, "Expected '(' in syncscope");
+ return error(StartParenAt, "Expected '(' in syncscope");
std::string SSN;
auto SSNAt = Lex.getLoc();
- if (ParseStringConstant(SSN))
- return Error(SSNAt, "Expected synchronization scope name");
+ if (parseStringConstant(SSN))
+ return error(SSNAt, "Expected synchronization scope name");
auto EndParenAt = Lex.getLoc();
if (!EatIfPresent(lltok::rparen))
- return Error(EndParenAt, "Expected ')' in syncscope");
+ return error(EndParenAt, "Expected ')' in syncscope");
SSID = Context.getOrInsertSyncScopeID(SSN);
}
@@ -2310,13 +2389,14 @@ bool LLParser::ParseScope(SyncScope::ID &SSID) {
return false;
}
-/// ParseOrdering
+/// parseOrdering
/// ::= AtomicOrdering
///
/// This sets Ordering to the parsed value.
-bool LLParser::ParseOrdering(AtomicOrdering &Ordering) {
+bool LLParser::parseOrdering(AtomicOrdering &Ordering) {
switch (Lex.getKind()) {
- default: return TokError("Expected ordering on atomic instruction");
+ default:
+ return tokError("Expected ordering on atomic instruction");
case lltok::kw_unordered: Ordering = AtomicOrdering::Unordered; break;
case lltok::kw_monotonic: Ordering = AtomicOrdering::Monotonic; break;
// Not specified yet:
@@ -2332,50 +2412,53 @@ bool LLParser::ParseOrdering(AtomicOrdering &Ordering) {
return false;
}
-/// ParseOptionalStackAlignment
+/// parseOptionalStackAlignment
/// ::= /* empty */
/// ::= 'alignstack' '(' 4 ')'
-bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
+bool LLParser::parseOptionalStackAlignment(unsigned &Alignment) {
Alignment = 0;
if (!EatIfPresent(lltok::kw_alignstack))
return false;
LocTy ParenLoc = Lex.getLoc();
if (!EatIfPresent(lltok::lparen))
- return Error(ParenLoc, "expected '('");
+ return error(ParenLoc, "expected '('");
LocTy AlignLoc = Lex.getLoc();
- if (ParseUInt32(Alignment)) return true;
+ if (parseUInt32(Alignment))
+ return true;
ParenLoc = Lex.getLoc();
if (!EatIfPresent(lltok::rparen))
- return Error(ParenLoc, "expected ')'");
+ return error(ParenLoc, "expected ')'");
if (!isPowerOf2_32(Alignment))
- return Error(AlignLoc, "stack alignment is not a power of two");
+ return error(AlignLoc, "stack alignment is not a power of two");
return false;
}
-/// ParseIndexList - This parses the index list for an insert/extractvalue
+/// parseIndexList - This parses the index list for an insert/extractvalue
/// instruction. This sets AteExtraComma in the case where we eat an extra
/// comma at the end of the line and find that it is followed by metadata.
/// Clients that don't allow metadata can call the version of this function that
/// only takes one argument.
///
-/// ParseIndexList
+/// parseIndexList
/// ::= (',' uint32)+
///
-bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
+bool LLParser::parseIndexList(SmallVectorImpl<unsigned> &Indices,
bool &AteExtraComma) {
AteExtraComma = false;
if (Lex.getKind() != lltok::comma)
- return TokError("expected ',' as start of index list");
+ return tokError("expected ',' as start of index list");
while (EatIfPresent(lltok::comma)) {
if (Lex.getKind() == lltok::MetadataVar) {
- if (Indices.empty()) return TokError("expected index");
+ if (Indices.empty())
+ return tokError("expected index");
AteExtraComma = true;
return false;
}
unsigned Idx = 0;
- if (ParseUInt32(Idx)) return true;
+ if (parseUInt32(Idx))
+ return true;
Indices.push_back(Idx);
}
@@ -2386,12 +2469,12 @@ bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
// Type Parsing.
//===----------------------------------------------------------------------===//
-/// ParseType - Parse a type.
-bool LLParser::ParseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
+/// parseType - parse a type.
+bool LLParser::parseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
SMLoc TypeLoc = Lex.getLoc();
switch (Lex.getKind()) {
default:
- return TokError(Msg);
+ return tokError(Msg);
case lltok::Type:
// Type ::= 'float' | 'void' (etc)
Result = Lex.getTyVal();
@@ -2399,23 +2482,23 @@ bool LLParser::ParseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
break;
case lltok::lbrace:
// Type ::= StructType
- if (ParseAnonStructType(Result, false))
+ if (parseAnonStructType(Result, false))
return true;
break;
case lltok::lsquare:
// Type ::= '[' ... ']'
Lex.Lex(); // eat the lsquare.
- if (ParseArrayVectorType(Result, false))
+ if (parseArrayVectorType(Result, false))
return true;
break;
case lltok::less: // Either vector or packed struct.
// Type ::= '<' ... '>'
Lex.Lex();
if (Lex.getKind() == lltok::lbrace) {
- if (ParseAnonStructType(Result, true) ||
- ParseToken(lltok::greater, "expected '>' at end of packed struct"))
+ if (parseAnonStructType(Result, true) ||
+ parseToken(lltok::greater, "expected '>' at end of packed struct"))
return true;
- } else if (ParseArrayVectorType(Result, true))
+ } else if (parseArrayVectorType(Result, true))
return true;
break;
case lltok::LocalVar: {
@@ -2449,23 +2532,23 @@ bool LLParser::ParseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
}
}
- // Parse the type suffixes.
+ // parse the type suffixes.
while (true) {
switch (Lex.getKind()) {
// End of type.
default:
if (!AllowVoid && Result->isVoidTy())
- return Error(TypeLoc, "void type only allowed for function results");
+ return error(TypeLoc, "void type only allowed for function results");
return false;
// Type ::= Type '*'
case lltok::star:
if (Result->isLabelTy())
- return TokError("basic block pointers are invalid");
+ return tokError("basic block pointers are invalid");
if (Result->isVoidTy())
- return TokError("pointers to void are invalid - use i8* instead");
+ return tokError("pointers to void are invalid - use i8* instead");
if (!PointerType::isValidElementType(Result))
- return TokError("pointer to this type is invalid");
+ return tokError("pointer to this type is invalid");
Result = PointerType::getUnqual(Result);
Lex.Lex();
break;
@@ -2473,14 +2556,14 @@ bool LLParser::ParseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
// Type ::= Type 'addrspace' '(' uint32 ')' '*'
case lltok::kw_addrspace: {
if (Result->isLabelTy())
- return TokError("basic block pointers are invalid");
+ return tokError("basic block pointers are invalid");
if (Result->isVoidTy())
- return TokError("pointers to void are invalid; use i8* instead");
+ return tokError("pointers to void are invalid; use i8* instead");
if (!PointerType::isValidElementType(Result))
- return TokError("pointer to this type is invalid");
+ return tokError("pointer to this type is invalid");
unsigned AddrSpace;
- if (ParseOptionalAddrSpace(AddrSpace) ||
- ParseToken(lltok::star, "expected '*' in address space"))
+ if (parseOptionalAddrSpace(AddrSpace) ||
+ parseToken(lltok::star, "expected '*' in address space"))
return true;
Result = PointerType::get(Result, AddrSpace);
@@ -2489,55 +2572,55 @@ bool LLParser::ParseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
/// Types '(' ArgTypeListI ')' OptFuncAttrs
case lltok::lparen:
- if (ParseFunctionType(Result))
+ if (parseFunctionType(Result))
return true;
break;
}
}
}
-/// ParseParameterList
+/// parseParameterList
/// ::= '(' ')'
/// ::= '(' Arg (',' Arg)* ')'
/// Arg
/// ::= Type OptionalAttributes Value OptionalAttributes
-bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+bool LLParser::parseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
PerFunctionState &PFS, bool IsMustTailCall,
bool InVarArgsFunc) {
- if (ParseToken(lltok::lparen, "expected '(' in call"))
+ if (parseToken(lltok::lparen, "expected '(' in call"))
return true;
while (Lex.getKind() != lltok::rparen) {
// If this isn't the first argument, we need a comma.
if (!ArgList.empty() &&
- ParseToken(lltok::comma, "expected ',' in argument list"))
+ parseToken(lltok::comma, "expected ',' in argument list"))
return true;
- // Parse an ellipsis if this is a musttail call in a variadic function.
+ // parse an ellipsis if this is a musttail call in a variadic function.
if (Lex.getKind() == lltok::dotdotdot) {
const char *Msg = "unexpected ellipsis in argument list for ";
if (!IsMustTailCall)
- return TokError(Twine(Msg) + "non-musttail call");
+ return tokError(Twine(Msg) + "non-musttail call");
if (!InVarArgsFunc)
- return TokError(Twine(Msg) + "musttail call in non-varargs function");
+ return tokError(Twine(Msg) + "musttail call in non-varargs function");
Lex.Lex(); // Lex the '...', it is purely for readability.
- return ParseToken(lltok::rparen, "expected ')' at end of argument list");
+ return parseToken(lltok::rparen, "expected ')' at end of argument list");
}
- // Parse the argument.
+ // parse the argument.
LocTy ArgLoc;
Type *ArgTy = nullptr;
AttrBuilder ArgAttrs;
Value *V;
- if (ParseType(ArgTy, ArgLoc))
+ if (parseType(ArgTy, ArgLoc))
return true;
if (ArgTy->isMetadataTy()) {
- if (ParseMetadataAsValue(V, PFS))
+ if (parseMetadataAsValue(V, PFS))
return true;
} else {
// Otherwise, handle normal operands.
- if (ParseOptionalParamAttrs(ArgAttrs) || ParseValue(ArgTy, V, PFS))
+ if (parseOptionalParamAttrs(ArgAttrs) || parseValue(ArgTy, V, PFS))
return true;
}
ArgList.push_back(ParamInfo(
@@ -2545,45 +2628,41 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
}
if (IsMustTailCall && InVarArgsFunc)
- return TokError("expected '...' at end of argument list for musttail call "
+ return tokError("expected '...' at end of argument list for musttail call "
"in varargs function");
Lex.Lex(); // Lex the ')'.
return false;
}
-/// ParseByValWithOptionalType
-/// ::= byval
-/// ::= byval(<ty>)
-bool LLParser::ParseByValWithOptionalType(Type *&Result) {
+/// parseRequiredTypeAttr
+/// ::= attrname(<ty>)
+bool LLParser::parseRequiredTypeAttr(Type *&Result, lltok::Kind AttrName) {
Result = nullptr;
- if (!EatIfPresent(lltok::kw_byval))
+ if (!EatIfPresent(AttrName))
return true;
if (!EatIfPresent(lltok::lparen))
- return false;
- if (ParseType(Result))
+ return error(Lex.getLoc(), "expected '('");
+ if (parseType(Result))
return true;
if (!EatIfPresent(lltok::rparen))
- return Error(Lex.getLoc(), "expected ')'");
+ return error(Lex.getLoc(), "expected ')'");
return false;
}
-/// ParsePreallocated
+/// parsePreallocated
/// ::= preallocated(<ty>)
-bool LLParser::ParsePreallocated(Type *&Result) {
- Result = nullptr;
- if (!EatIfPresent(lltok::kw_preallocated))
- return true;
- if (!EatIfPresent(lltok::lparen))
- return Error(Lex.getLoc(), "expected '('");
- if (ParseType(Result))
- return true;
- if (!EatIfPresent(lltok::rparen))
- return Error(Lex.getLoc(), "expected ')'");
- return false;
+bool LLParser::parsePreallocated(Type *&Result) {
+ return parseRequiredTypeAttr(Result, lltok::kw_preallocated);
+}
+
+/// parseByRef
+/// ::= byref(<type>)
+bool LLParser::parseByRef(Type *&Result) {
+ return parseRequiredTypeAttr(Result, lltok::kw_byref);
}
-/// ParseOptionalOperandBundles
+/// parseOptionalOperandBundles
/// ::= /*empty*/
/// ::= '[' OperandBundle [, OperandBundle ]* ']'
///
@@ -2592,7 +2671,7 @@ bool LLParser::ParsePreallocated(Type *&Result) {
/// ::= bundle-tag '(' Type Value [, Type Value ]* ')'
///
/// bundle-tag ::= String Constant
-bool LLParser::ParseOptionalOperandBundles(
+bool LLParser::parseOptionalOperandBundles(
SmallVectorImpl<OperandBundleDef> &BundleList, PerFunctionState &PFS) {
LocTy BeginLoc = Lex.getLoc();
if (!EatIfPresent(lltok::lsquare))
@@ -2601,26 +2680,26 @@ bool LLParser::ParseOptionalOperandBundles(
while (Lex.getKind() != lltok::rsquare) {
// If this isn't the first operand bundle, we need a comma.
if (!BundleList.empty() &&
- ParseToken(lltok::comma, "expected ',' in input list"))
+ parseToken(lltok::comma, "expected ',' in input list"))
return true;
std::string Tag;
- if (ParseStringConstant(Tag))
+ if (parseStringConstant(Tag))
return true;
- if (ParseToken(lltok::lparen, "expected '(' in operand bundle"))
+ if (parseToken(lltok::lparen, "expected '(' in operand bundle"))
return true;
std::vector<Value *> Inputs;
while (Lex.getKind() != lltok::rparen) {
// If this isn't the first input, we need a comma.
if (!Inputs.empty() &&
- ParseToken(lltok::comma, "expected ',' in input list"))
+ parseToken(lltok::comma, "expected ',' in input list"))
return true;
Type *Ty = nullptr;
Value *Input = nullptr;
- if (ParseType(Ty) || ParseValue(Ty, Input, PFS))
+ if (parseType(Ty) || parseValue(Ty, Input, PFS))
return true;
Inputs.push_back(Input);
}
@@ -2631,13 +2710,13 @@ bool LLParser::ParseOptionalOperandBundles(
}
if (BundleList.empty())
- return Error(BeginLoc, "operand bundle set must not be empty");
+ return error(BeginLoc, "operand bundle set must not be empty");
Lex.Lex(); // Lex the ']'.
return false;
}
-/// ParseArgumentList - Parse the argument list for a function type or function
+/// parseArgumentList - parse the argument list for a function type or function
/// prototype.
/// ::= '(' ArgTypeListI ')'
/// ArgTypeListI
@@ -2646,17 +2725,17 @@ bool LLParser::ParseOptionalOperandBundles(
/// ::= ArgTypeList ',' '...'
/// ::= ArgType (',' ArgType)*
///
-bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
- bool &isVarArg){
+bool LLParser::parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
+ bool &IsVarArg) {
unsigned CurValID = 0;
- isVarArg = false;
+ IsVarArg = false;
assert(Lex.getKind() == lltok::lparen);
Lex.Lex(); // eat the (.
if (Lex.getKind() == lltok::rparen) {
// empty
} else if (Lex.getKind() == lltok::dotdotdot) {
- isVarArg = true;
+ IsVarArg = true;
Lex.Lex();
} else {
LocTy TypeLoc = Lex.getLoc();
@@ -2664,25 +2743,25 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
AttrBuilder Attrs;
std::string Name;
- if (ParseType(ArgTy) ||
- ParseOptionalParamAttrs(Attrs)) return true;
+ if (parseType(ArgTy) || parseOptionalParamAttrs(Attrs))
+ return true;
if (ArgTy->isVoidTy())
- return Error(TypeLoc, "argument can not have void type");
+ return error(TypeLoc, "argument can not have void type");
if (Lex.getKind() == lltok::LocalVar) {
Name = Lex.getStrVal();
Lex.Lex();
} else if (Lex.getKind() == lltok::LocalVarID) {
if (Lex.getUIntVal() != CurValID)
- return Error(TypeLoc, "argument expected to be numbered '%" +
+ return error(TypeLoc, "argument expected to be numbered '%" +
Twine(CurValID) + "'");
++CurValID;
Lex.Lex();
}
if (!FunctionType::isValidArgumentType(ArgTy))
- return Error(TypeLoc, "invalid type for function argument");
+ return error(TypeLoc, "invalid type for function argument");
ArgList.emplace_back(TypeLoc, ArgTy,
AttributeSet::get(ArgTy->getContext(), Attrs),
@@ -2691,16 +2770,17 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
while (EatIfPresent(lltok::comma)) {
// Handle ... at end of arg list.
if (EatIfPresent(lltok::dotdotdot)) {
- isVarArg = true;
+ IsVarArg = true;
break;
}
// Otherwise must be an argument type.
TypeLoc = Lex.getLoc();
- if (ParseType(ArgTy) || ParseOptionalParamAttrs(Attrs)) return true;
+ if (parseType(ArgTy) || parseOptionalParamAttrs(Attrs))
+ return true;
if (ArgTy->isVoidTy())
- return Error(TypeLoc, "argument can not have void type");
+ return error(TypeLoc, "argument can not have void type");
if (Lex.getKind() == lltok::LocalVar) {
Name = Lex.getStrVal();
@@ -2708,7 +2788,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
} else {
if (Lex.getKind() == lltok::LocalVarID) {
if (Lex.getUIntVal() != CurValID)
- return Error(TypeLoc, "argument expected to be numbered '%" +
+ return error(TypeLoc, "argument expected to be numbered '%" +
Twine(CurValID) + "'");
Lex.Lex();
}
@@ -2717,7 +2797,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
}
if (!ArgTy->isFirstClassType())
- return Error(TypeLoc, "invalid type for function argument");
+ return error(TypeLoc, "invalid type for function argument");
ArgList.emplace_back(TypeLoc, ArgTy,
AttributeSet::get(ArgTy->getContext(), Attrs),
@@ -2725,28 +2805,28 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
}
}
- return ParseToken(lltok::rparen, "expected ')' at end of argument list");
+ return parseToken(lltok::rparen, "expected ')' at end of argument list");
}
-/// ParseFunctionType
+/// parseFunctionType
/// ::= Type ArgumentList OptionalAttrs
-bool LLParser::ParseFunctionType(Type *&Result) {
+bool LLParser::parseFunctionType(Type *&Result) {
assert(Lex.getKind() == lltok::lparen);
if (!FunctionType::isValidReturnType(Result))
- return TokError("invalid function return type");
+ return tokError("invalid function return type");
SmallVector<ArgInfo, 8> ArgList;
- bool isVarArg;
- if (ParseArgumentList(ArgList, isVarArg))
+ bool IsVarArg;
+ if (parseArgumentList(ArgList, IsVarArg))
return true;
// Reject names on the arguments lists.
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
if (!ArgList[i].Name.empty())
- return Error(ArgList[i].Loc, "argument name invalid in function type");
+ return error(ArgList[i].Loc, "argument name invalid in function type");
if (ArgList[i].Attrs.hasAttributes())
- return Error(ArgList[i].Loc,
+ return error(ArgList[i].Loc,
"argument attributes invalid in function type");
}
@@ -2754,27 +2834,28 @@ bool LLParser::ParseFunctionType(Type *&Result) {
for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
ArgListTy.push_back(ArgList[i].Ty);
- Result = FunctionType::get(Result, ArgListTy, isVarArg);
+ Result = FunctionType::get(Result, ArgListTy, IsVarArg);
return false;
}
-/// ParseAnonStructType - Parse an anonymous struct type, which is inlined into
+/// parseAnonStructType - parse an anonymous struct type, which is inlined into
/// other structs.
-bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) {
+bool LLParser::parseAnonStructType(Type *&Result, bool Packed) {
SmallVector<Type*, 8> Elts;
- if (ParseStructBody(Elts)) return true;
+ if (parseStructBody(Elts))
+ return true;
Result = StructType::get(Context, Elts, Packed);
return false;
}
-/// ParseStructDefinition - Parse a struct in a 'type' definition.
-bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
- std::pair<Type*, LocTy> &Entry,
+/// parseStructDefinition - parse a struct in a 'type' definition.
+bool LLParser::parseStructDefinition(SMLoc TypeLoc, StringRef Name,
+ std::pair<Type *, LocTy> &Entry,
Type *&ResultTy) {
// If the type was already defined, diagnose the redefinition.
if (Entry.first && !Entry.second.isValid())
- return Error(TypeLoc, "redefinition of type");
+ return error(TypeLoc, "redefinition of type");
// If we have opaque, just return without filling in the definition for the
// struct. This counts as a definition as far as the .ll file goes.
@@ -2797,12 +2878,12 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
// forward referenced and not allowed to be recursive.
if (Lex.getKind() != lltok::lbrace) {
if (Entry.first)
- return Error(TypeLoc, "forward references to non-struct type");
+ return error(TypeLoc, "forward references to non-struct type");
ResultTy = nullptr;
if (isPacked)
- return ParseArrayVectorType(ResultTy, true);
- return ParseType(ResultTy);
+ return parseArrayVectorType(ResultTy, true);
+ return parseType(ResultTy);
}
// This type is being defined, so clear the location to indicate this.
@@ -2815,8 +2896,8 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
StructType *STy = cast<StructType>(Entry.first);
SmallVector<Type*, 8> Body;
- if (ParseStructBody(Body) ||
- (isPacked && ParseToken(lltok::greater, "expected '>' in packed struct")))
+ if (parseStructBody(Body) ||
+ (isPacked && parseToken(lltok::greater, "expected '>' in packed struct")))
return true;
STy->setBody(Body, isPacked);
@@ -2824,13 +2905,13 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
return false;
}
-/// ParseStructType: Handles packed and unpacked types. </> parsed elsewhere.
+/// parseStructType: Handles packed and unpacked types. </> parsed elsewhere.
/// StructType
/// ::= '{' '}'
/// ::= '{' Type (',' Type)* '}'
/// ::= '<' '{' '}' '>'
/// ::= '<' '{' Type (',' Type)* '}' '>'
-bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
+bool LLParser::parseStructBody(SmallVectorImpl<Type *> &Body) {
assert(Lex.getKind() == lltok::lbrace);
Lex.Lex(); // Consume the '{'
@@ -2840,37 +2921,39 @@ bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
LocTy EltTyLoc = Lex.getLoc();
Type *Ty = nullptr;
- if (ParseType(Ty)) return true;
+ if (parseType(Ty))
+ return true;
Body.push_back(Ty);
if (!StructType::isValidElementType(Ty))
- return Error(EltTyLoc, "invalid element type for struct");
+ return error(EltTyLoc, "invalid element type for struct");
while (EatIfPresent(lltok::comma)) {
EltTyLoc = Lex.getLoc();
- if (ParseType(Ty)) return true;
+ if (parseType(Ty))
+ return true;
if (!StructType::isValidElementType(Ty))
- return Error(EltTyLoc, "invalid element type for struct");
+ return error(EltTyLoc, "invalid element type for struct");
Body.push_back(Ty);
}
- return ParseToken(lltok::rbrace, "expected '}' at end of struct");
+ return parseToken(lltok::rbrace, "expected '}' at end of struct");
}
-/// ParseArrayVectorType - Parse an array or vector type, assuming the first
+/// parseArrayVectorType - parse an array or vector type, assuming the first
/// token has already been consumed.
/// Type
/// ::= '[' APSINTVAL 'x' Types ']'
/// ::= '<' APSINTVAL 'x' Types '>'
/// ::= '<' 'vscale' 'x' APSINTVAL 'x' Types '>'
-bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
+bool LLParser::parseArrayVectorType(Type *&Result, bool IsVector) {
bool Scalable = false;
- if (isVector && Lex.getKind() == lltok::kw_vscale) {
+ if (IsVector && Lex.getKind() == lltok::kw_vscale) {
Lex.Lex(); // consume the 'vscale'
- if (ParseToken(lltok::kw_x, "expected 'x' after vscale"))
+ if (parseToken(lltok::kw_x, "expected 'x' after vscale"))
return true;
Scalable = true;
@@ -2878,34 +2961,35 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
Lex.getAPSIntVal().getBitWidth() > 64)
- return TokError("expected number in address space");
+ return tokError("expected number in address space");
LocTy SizeLoc = Lex.getLoc();
uint64_t Size = Lex.getAPSIntVal().getZExtValue();
Lex.Lex();
- if (ParseToken(lltok::kw_x, "expected 'x' after element count"))
- return true;
+ if (parseToken(lltok::kw_x, "expected 'x' after element count"))
+ return true;
LocTy TypeLoc = Lex.getLoc();
Type *EltTy = nullptr;
- if (ParseType(EltTy)) return true;
+ if (parseType(EltTy))
+ return true;
- if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
+ if (parseToken(IsVector ? lltok::greater : lltok::rsquare,
"expected end of sequential type"))
return true;
- if (isVector) {
+ if (IsVector) {
if (Size == 0)
- return Error(SizeLoc, "zero element vector is illegal");
+ return error(SizeLoc, "zero element vector is illegal");
if ((unsigned)Size != Size)
- return Error(SizeLoc, "size too large for vector");
+ return error(SizeLoc, "size too large for vector");
if (!VectorType::isValidElementType(EltTy))
- return Error(TypeLoc, "invalid vector element type");
+ return error(TypeLoc, "invalid vector element type");
Result = VectorType::get(EltTy, unsigned(Size), Scalable);
} else {
if (!ArrayType::isValidElementType(EltTy))
- return Error(TypeLoc, "invalid array element type");
+ return error(TypeLoc, "invalid array element type");
Result = ArrayType::get(EltTy, Size);
}
return false;
@@ -2945,22 +3029,22 @@ LLParser::PerFunctionState::~PerFunctionState() {
}
}
-bool LLParser::PerFunctionState::FinishFunction() {
+bool LLParser::PerFunctionState::finishFunction() {
if (!ForwardRefVals.empty())
- return P.Error(ForwardRefVals.begin()->second.second,
+ return P.error(ForwardRefVals.begin()->second.second,
"use of undefined value '%" + ForwardRefVals.begin()->first +
- "'");
+ "'");
if (!ForwardRefValIDs.empty())
- return P.Error(ForwardRefValIDs.begin()->second.second,
+ return P.error(ForwardRefValIDs.begin()->second.second,
"use of undefined value '%" +
- Twine(ForwardRefValIDs.begin()->first) + "'");
+ Twine(ForwardRefValIDs.begin()->first) + "'");
return false;
}
-/// GetVal - Get a value with the specified name or ID, creating a
+/// getVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
-Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Type *Ty,
+Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty,
LocTy Loc, bool IsCall) {
// Look this name up in the normal function symbol table.
Value *Val = F.getValueSymbolTable()->lookup(Name);
@@ -2979,7 +3063,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Type *Ty,
// Don't make placeholders with invalid type.
if (!Ty->isFirstClassType()) {
- P.Error(Loc, "invalid use of a non-first-class type");
+ P.error(Loc, "invalid use of a non-first-class type");
return nullptr;
}
@@ -2995,7 +3079,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Type *Ty,
return FwdVal;
}
-Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc,
+Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc,
bool IsCall) {
// Look this name up in the normal function symbol table.
Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr;
@@ -3013,7 +3097,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc,
return P.checkValidVariableType(Loc, "%" + Twine(ID), Ty, Val, IsCall);
if (!Ty->isFirstClassType()) {
- P.Error(Loc, "invalid use of a non-first-class type");
+ P.error(Loc, "invalid use of a non-first-class type");
return nullptr;
}
@@ -3029,15 +3113,15 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc,
return FwdVal;
}
-/// SetInstName - After an instruction is parsed and inserted into its
+/// setInstName - After an instruction is parsed and inserted into its
/// basic block, this installs its name.
-bool LLParser::PerFunctionState::SetInstName(int NameID,
+bool LLParser::PerFunctionState::setInstName(int NameID,
const std::string &NameStr,
LocTy NameLoc, Instruction *Inst) {
// If this instruction has void type, it cannot have a name or ID specified.
if (Inst->getType()->isVoidTy()) {
if (NameID != -1 || !NameStr.empty())
- return P.Error(NameLoc, "instructions returning void cannot have a name");
+ return P.error(NameLoc, "instructions returning void cannot have a name");
return false;
}
@@ -3049,15 +3133,16 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
NameID = NumberedVals.size();
if (unsigned(NameID) != NumberedVals.size())
- return P.Error(NameLoc, "instruction expected to be numbered '%" +
- Twine(NumberedVals.size()) + "'");
+ return P.error(NameLoc, "instruction expected to be numbered '%" +
+ Twine(NumberedVals.size()) + "'");
auto FI = ForwardRefValIDs.find(NameID);
if (FI != ForwardRefValIDs.end()) {
Value *Sentinel = FI->second.first;
if (Sentinel->getType() != Inst->getType())
- return P.Error(NameLoc, "instruction forward referenced with type '" +
- getTypeString(FI->second.first->getType()) + "'");
+ return P.error(NameLoc, "instruction forward referenced with type '" +
+ getTypeString(FI->second.first->getType()) +
+ "'");
Sentinel->replaceAllUsesWith(Inst);
Sentinel->deleteValue();
@@ -3073,8 +3158,9 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
if (FI != ForwardRefVals.end()) {
Value *Sentinel = FI->second.first;
if (Sentinel->getType() != Inst->getType())
- return P.Error(NameLoc, "instruction forward referenced with type '" +
- getTypeString(FI->second.first->getType()) + "'");
+ return P.error(NameLoc, "instruction forward referenced with type '" +
+ getTypeString(FI->second.first->getType()) +
+ "'");
Sentinel->replaceAllUsesWith(Inst);
Sentinel->deleteValue();
@@ -3085,46 +3171,46 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
Inst->setName(NameStr);
if (Inst->getName() != NameStr)
- return P.Error(NameLoc, "multiple definition of local value named '" +
- NameStr + "'");
+ return P.error(NameLoc, "multiple definition of local value named '" +
+ NameStr + "'");
return false;
}
-/// GetBB - Get a basic block with the specified name or ID, creating a
+/// getBB - Get a basic block with the specified name or ID, creating a
/// forward reference record if needed.
-BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name,
+BasicBlock *LLParser::PerFunctionState::getBB(const std::string &Name,
LocTy Loc) {
return dyn_cast_or_null<BasicBlock>(
- GetVal(Name, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false));
+ getVal(Name, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false));
}
-BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
+BasicBlock *LLParser::PerFunctionState::getBB(unsigned ID, LocTy Loc) {
return dyn_cast_or_null<BasicBlock>(
- GetVal(ID, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false));
+ getVal(ID, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false));
}
-/// DefineBB - Define the specified basic block, which is either named or
+/// defineBB - Define the specified basic block, which is either named or
/// unnamed. If there is an error, this returns null otherwise it returns
/// the block being defined.
-BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
+BasicBlock *LLParser::PerFunctionState::defineBB(const std::string &Name,
int NameID, LocTy Loc) {
BasicBlock *BB;
if (Name.empty()) {
if (NameID != -1 && unsigned(NameID) != NumberedVals.size()) {
- P.Error(Loc, "label expected to be numbered '" +
+ P.error(Loc, "label expected to be numbered '" +
Twine(NumberedVals.size()) + "'");
return nullptr;
}
- BB = GetBB(NumberedVals.size(), Loc);
+ BB = getBB(NumberedVals.size(), Loc);
if (!BB) {
- P.Error(Loc, "unable to create block numbered '" +
+ P.error(Loc, "unable to create block numbered '" +
Twine(NumberedVals.size()) + "'");
return nullptr;
}
} else {
- BB = GetBB(Name, Loc);
+ BB = getBB(Name, Loc);
if (!BB) {
- P.Error(Loc, "unable to create block named '" + Name + "'");
+ P.error(Loc, "unable to create block named '" + Name + "'");
return nullptr;
}
}
@@ -3149,16 +3235,17 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
// Constants.
//===----------------------------------------------------------------------===//
-/// ParseValID - Parse an abstract value that doesn't necessarily have a
+/// parseValID - parse an abstract value that doesn't necessarily have a
/// type implied. For example, if we parse "4" we don't know what integer type
/// it has. The value will later be combined with its type and checked for
/// sanity. PFS is used to convert function-local operands of metadata (since
/// metadata operands are not just parsed here but also converted to values).
/// PFS can be null when we are not parsing metadata values inside a function.
-bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
+bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS) {
ID.Loc = Lex.getLoc();
switch (Lex.getKind()) {
- default: return TokError("expected value token");
+ default:
+ return tokError("expected value token");
case lltok::GlobalID: // @42
ID.UIntVal = Lex.getUIntVal();
ID.Kind = ValID::t_GlobalID;
@@ -3193,6 +3280,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
break;
case lltok::kw_null: ID.Kind = ValID::t_Null; break;
case lltok::kw_undef: ID.Kind = ValID::t_Undef; break;
+ case lltok::kw_poison: ID.Kind = ValID::t_Poison; break;
case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break;
case lltok::kw_none: ID.Kind = ValID::t_None; break;
@@ -3200,8 +3288,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
// ValID ::= '{' ConstVector '}'
Lex.Lex();
SmallVector<Constant*, 16> Elts;
- if (ParseGlobalValueVector(Elts) ||
- ParseToken(lltok::rbrace, "expected end of struct constant"))
+ if (parseGlobalValueVector(Elts) ||
+ parseToken(lltok::rbrace, "expected end of struct constant"))
return true;
ID.ConstantStructElts = std::make_unique<Constant *[]>(Elts.size());
@@ -3219,10 +3307,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
SmallVector<Constant*, 16> Elts;
LocTy FirstEltLoc = Lex.getLoc();
- if (ParseGlobalValueVector(Elts) ||
+ if (parseGlobalValueVector(Elts) ||
(isPackedStruct &&
- ParseToken(lltok::rbrace, "expected end of packed struct")) ||
- ParseToken(lltok::greater, "expected end of constant"))
+ parseToken(lltok::rbrace, "expected end of packed struct")) ||
+ parseToken(lltok::greater, "expected end of constant"))
return true;
if (isPackedStruct) {
@@ -3235,20 +3323,21 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
}
if (Elts.empty())
- return Error(ID.Loc, "constant vector must not be empty");
+ return error(ID.Loc, "constant vector must not be empty");
if (!Elts[0]->getType()->isIntegerTy() &&
!Elts[0]->getType()->isFloatingPointTy() &&
!Elts[0]->getType()->isPointerTy())
- return Error(FirstEltLoc,
- "vector elements must have integer, pointer or floating point type");
+ return error(
+ FirstEltLoc,
+ "vector elements must have integer, pointer or floating point type");
// Verify that all the vector elements have the same type.
for (unsigned i = 1, e = Elts.size(); i != e; ++i)
if (Elts[i]->getType() != Elts[0]->getType())
- return Error(FirstEltLoc,
- "vector element #" + Twine(i) +
- " is not of type '" + getTypeString(Elts[0]->getType()));
+ return error(FirstEltLoc, "vector element #" + Twine(i) +
+ " is not of type '" +
+ getTypeString(Elts[0]->getType()));
ID.ConstantVal = ConstantVector::get(Elts);
ID.Kind = ValID::t_Constant;
@@ -3258,8 +3347,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
Lex.Lex();
SmallVector<Constant*, 16> Elts;
LocTy FirstEltLoc = Lex.getLoc();
- if (ParseGlobalValueVector(Elts) ||
- ParseToken(lltok::rsquare, "expected end of array constant"))
+ if (parseGlobalValueVector(Elts) ||
+ parseToken(lltok::rsquare, "expected end of array constant"))
return true;
// Handle empty element.
@@ -3271,17 +3360,17 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
}
if (!Elts[0]->getType()->isFirstClassType())
- return Error(FirstEltLoc, "invalid array element type: " +
- getTypeString(Elts[0]->getType()));
+ return error(FirstEltLoc, "invalid array element type: " +
+ getTypeString(Elts[0]->getType()));
ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
// Verify all elements are correct type!
for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
if (Elts[i]->getType() != Elts[0]->getType())
- return Error(FirstEltLoc,
- "array element #" + Twine(i) +
- " is not of type '" + getTypeString(Elts[0]->getType()));
+ return error(FirstEltLoc, "array element #" + Twine(i) +
+ " is not of type '" +
+ getTypeString(Elts[0]->getType()));
}
ID.ConstantVal = ConstantArray::get(ATy, Elts);
@@ -3292,7 +3381,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
Lex.Lex();
ID.ConstantVal = ConstantDataArray::getString(Context, Lex.getStrVal(),
false);
- if (ParseToken(lltok::StringConstant, "expected string")) return true;
+ if (parseToken(lltok::StringConstant, "expected string"))
+ return true;
ID.Kind = ValID::t_Constant;
return false;
@@ -3301,12 +3391,12 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
// STRINGCONSTANT
bool HasSideEffect, AlignStack, AsmDialect;
Lex.Lex();
- if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) ||
- ParseOptionalToken(lltok::kw_alignstack, AlignStack) ||
- ParseOptionalToken(lltok::kw_inteldialect, AsmDialect) ||
- ParseStringConstant(ID.StrVal) ||
- ParseToken(lltok::comma, "expected comma in inline asm expression") ||
- ParseToken(lltok::StringConstant, "expected constraint string"))
+ if (parseOptionalToken(lltok::kw_sideeffect, HasSideEffect) ||
+ parseOptionalToken(lltok::kw_alignstack, AlignStack) ||
+ parseOptionalToken(lltok::kw_inteldialect, AsmDialect) ||
+ parseStringConstant(ID.StrVal) ||
+ parseToken(lltok::comma, "expected comma in inline asm expression") ||
+ parseToken(lltok::StringConstant, "expected constraint string"))
return true;
ID.StrVal2 = Lex.getStrVal();
ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1) |
@@ -3321,17 +3411,18 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ValID Fn, Label;
- if (ParseToken(lltok::lparen, "expected '(' in block address expression") ||
- ParseValID(Fn) ||
- ParseToken(lltok::comma, "expected comma in block address expression")||
- ParseValID(Label) ||
- ParseToken(lltok::rparen, "expected ')' in block address expression"))
+ if (parseToken(lltok::lparen, "expected '(' in block address expression") ||
+ parseValID(Fn) ||
+ parseToken(lltok::comma,
+ "expected comma in block address expression") ||
+ parseValID(Label) ||
+ parseToken(lltok::rparen, "expected ')' in block address expression"))
return true;
if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName)
- return Error(Fn.Loc, "expected function name in blockaddress");
+ return error(Fn.Loc, "expected function name in blockaddress");
if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName)
- return Error(Label.Loc, "expected basic block name in blockaddress");
+ return error(Label.Loc, "expected basic block name in blockaddress");
// Try to find the function (but skip it if it's forward-referenced).
GlobalValue *GV = nullptr;
@@ -3345,10 +3436,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (GV) {
// Confirm that it's actually a function with a definition.
if (!isa<Function>(GV))
- return Error(Fn.Loc, "expected function name in blockaddress");
+ return error(Fn.Loc, "expected function name in blockaddress");
F = cast<Function>(GV);
if (F->isDeclaration())
- return Error(Fn.Loc, "cannot take blockaddress inside a declaration");
+ return error(Fn.Loc, "cannot take blockaddress inside a declaration");
}
if (!F) {
@@ -3372,19 +3463,19 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
BasicBlock *BB;
if (BlockAddressPFS && F == &BlockAddressPFS->getFunction()) {
if (Label.Kind == ValID::t_LocalID)
- BB = BlockAddressPFS->GetBB(Label.UIntVal, Label.Loc);
+ BB = BlockAddressPFS->getBB(Label.UIntVal, Label.Loc);
else
- BB = BlockAddressPFS->GetBB(Label.StrVal, Label.Loc);
+ BB = BlockAddressPFS->getBB(Label.StrVal, Label.Loc);
if (!BB)
- return Error(Label.Loc, "referenced value is not a basic block");
+ return error(Label.Loc, "referenced value is not a basic block");
} else {
if (Label.Kind == ValID::t_LocalID)
- return Error(Label.Loc, "cannot take address of numeric label after "
+ return error(Label.Loc, "cannot take address of numeric label after "
"the function is defined");
BB = dyn_cast_or_null<BasicBlock>(
F->getValueSymbolTable()->lookup(Label.StrVal));
if (!BB)
- return Error(Label.Loc, "referenced value is not a basic block");
+ return error(Label.Loc, "referenced value is not a basic block");
}
ID.ConstantVal = BlockAddress::get(F, BB);
@@ -3392,6 +3483,39 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return false;
}
+ case lltok::kw_dso_local_equivalent: {
+ // ValID ::= 'dso_local_equivalent' @foo
+ Lex.Lex();
+
+ ValID Fn;
+
+ if (parseValID(Fn))
+ return true;
+
+ if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName)
+ return error(Fn.Loc,
+ "expected global value name in dso_local_equivalent");
+
+ // Try to find the function (but skip it if it's forward-referenced).
+ GlobalValue *GV = nullptr;
+ if (Fn.Kind == ValID::t_GlobalID) {
+ if (Fn.UIntVal < NumberedVals.size())
+ GV = NumberedVals[Fn.UIntVal];
+ } else if (!ForwardRefVals.count(Fn.StrVal)) {
+ GV = M->getNamedValue(Fn.StrVal);
+ }
+
+ assert(GV && "Could not find a corresponding global variable");
+
+ if (!GV->getValueType()->isFunctionTy())
+ return error(Fn.Loc, "expected a function, alias to function, or ifunc "
+ "in dso_local_equivalent");
+
+ ID.ConstantVal = DSOLocalEquivalent::get(GV);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
case lltok::kw_trunc:
case lltok::kw_zext:
case lltok::kw_sext:
@@ -3409,16 +3533,16 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
Type *DestTy = nullptr;
Constant *SrcVal;
Lex.Lex();
- if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
- ParseGlobalTypeAndValue(SrcVal) ||
- ParseToken(lltok::kw_to, "expected 'to' in constantexpr cast") ||
- ParseType(DestTy) ||
- ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
+ if (parseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
+ parseGlobalTypeAndValue(SrcVal) ||
+ parseToken(lltok::kw_to, "expected 'to' in constantexpr cast") ||
+ parseType(DestTy) ||
+ parseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
return true;
if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy))
- return Error(ID.Loc, "invalid cast opcode for cast from '" +
- getTypeString(SrcVal->getType()) + "' to '" +
- getTypeString(DestTy) + "'");
+ return error(ID.Loc, "invalid cast opcode for cast from '" +
+ getTypeString(SrcVal->getType()) + "' to '" +
+ getTypeString(DestTy) + "'");
ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc,
SrcVal, DestTy);
ID.Kind = ValID::t_Constant;
@@ -3428,16 +3552,16 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
Lex.Lex();
Constant *Val;
SmallVector<unsigned, 4> Indices;
- if (ParseToken(lltok::lparen, "expected '(' in extractvalue constantexpr")||
- ParseGlobalTypeAndValue(Val) ||
- ParseIndexList(Indices) ||
- ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
+ if (parseToken(lltok::lparen,
+ "expected '(' in extractvalue constantexpr") ||
+ parseGlobalTypeAndValue(Val) || parseIndexList(Indices) ||
+ parseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
return true;
if (!Val->getType()->isAggregateType())
- return Error(ID.Loc, "extractvalue operand must be aggregate type");
+ return error(ID.Loc, "extractvalue operand must be aggregate type");
if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
- return Error(ID.Loc, "invalid indices for extractvalue");
+ return error(ID.Loc, "invalid indices for extractvalue");
ID.ConstantVal = ConstantExpr::getExtractValue(Val, Indices);
ID.Kind = ValID::t_Constant;
return false;
@@ -3446,21 +3570,21 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
Lex.Lex();
Constant *Val0, *Val1;
SmallVector<unsigned, 4> Indices;
- if (ParseToken(lltok::lparen, "expected '(' in insertvalue constantexpr")||
- ParseGlobalTypeAndValue(Val0) ||
- ParseToken(lltok::comma, "expected comma in insertvalue constantexpr")||
- ParseGlobalTypeAndValue(Val1) ||
- ParseIndexList(Indices) ||
- ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
+ if (parseToken(lltok::lparen, "expected '(' in insertvalue constantexpr") ||
+ parseGlobalTypeAndValue(Val0) ||
+ parseToken(lltok::comma,
+ "expected comma in insertvalue constantexpr") ||
+ parseGlobalTypeAndValue(Val1) || parseIndexList(Indices) ||
+ parseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
return true;
if (!Val0->getType()->isAggregateType())
- return Error(ID.Loc, "insertvalue operand must be aggregate type");
+ return error(ID.Loc, "insertvalue operand must be aggregate type");
Type *IndexedType =
ExtractValueInst::getIndexedType(Val0->getType(), Indices);
if (!IndexedType)
- return Error(ID.Loc, "invalid indices for insertvalue");
+ return error(ID.Loc, "invalid indices for insertvalue");
if (IndexedType != Val1->getType())
- return Error(ID.Loc, "insertvalue operand and field disagree in type: '" +
+ return error(ID.Loc, "insertvalue operand and field disagree in type: '" +
getTypeString(Val1->getType()) +
"' instead of '" + getTypeString(IndexedType) +
"'");
@@ -3473,28 +3597,28 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
unsigned PredVal, Opc = Lex.getUIntVal();
Constant *Val0, *Val1;
Lex.Lex();
- if (ParseCmpPredicate(PredVal, Opc) ||
- ParseToken(lltok::lparen, "expected '(' in compare constantexpr") ||
- ParseGlobalTypeAndValue(Val0) ||
- ParseToken(lltok::comma, "expected comma in compare constantexpr") ||
- ParseGlobalTypeAndValue(Val1) ||
- ParseToken(lltok::rparen, "expected ')' in compare constantexpr"))
+ if (parseCmpPredicate(PredVal, Opc) ||
+ parseToken(lltok::lparen, "expected '(' in compare constantexpr") ||
+ parseGlobalTypeAndValue(Val0) ||
+ parseToken(lltok::comma, "expected comma in compare constantexpr") ||
+ parseGlobalTypeAndValue(Val1) ||
+ parseToken(lltok::rparen, "expected ')' in compare constantexpr"))
return true;
if (Val0->getType() != Val1->getType())
- return Error(ID.Loc, "compare operands must have the same type");
+ return error(ID.Loc, "compare operands must have the same type");
CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
if (Opc == Instruction::FCmp) {
if (!Val0->getType()->isFPOrFPVectorTy())
- return Error(ID.Loc, "fcmp requires floating point operands");
+ return error(ID.Loc, "fcmp requires floating point operands");
ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
} else {
assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
if (!Val0->getType()->isIntOrIntVectorTy() &&
!Val0->getType()->isPtrOrPtrVectorTy())
- return Error(ID.Loc, "icmp requires pointer or integer operands");
+ return error(ID.Loc, "icmp requires pointer or integer operands");
ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
}
ID.Kind = ValID::t_Constant;
@@ -3506,16 +3630,16 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
unsigned Opc = Lex.getUIntVal();
Constant *Val;
Lex.Lex();
- if (ParseToken(lltok::lparen, "expected '(' in unary constantexpr") ||
- ParseGlobalTypeAndValue(Val) ||
- ParseToken(lltok::rparen, "expected ')' in unary constantexpr"))
+ if (parseToken(lltok::lparen, "expected '(' in unary constantexpr") ||
+ parseGlobalTypeAndValue(Val) ||
+ parseToken(lltok::rparen, "expected ')' in unary constantexpr"))
return true;
// Check that the type is valid for the operator.
switch (Opc) {
case Instruction::FNeg:
if (!Val->getType()->isFPOrFPVectorTy())
- return Error(ID.Loc, "constexpr requires fp operands");
+ return error(ID.Loc, "constexpr requires fp operands");
break;
default: llvm_unreachable("Unknown unary operator!");
}
@@ -3561,14 +3685,14 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (EatIfPresent(lltok::kw_exact))
Exact = true;
}
- if (ParseToken(lltok::lparen, "expected '(' in binary constantexpr") ||
- ParseGlobalTypeAndValue(Val0) ||
- ParseToken(lltok::comma, "expected comma in binary constantexpr") ||
- ParseGlobalTypeAndValue(Val1) ||
- ParseToken(lltok::rparen, "expected ')' in binary constantexpr"))
+ if (parseToken(lltok::lparen, "expected '(' in binary constantexpr") ||
+ parseGlobalTypeAndValue(Val0) ||
+ parseToken(lltok::comma, "expected comma in binary constantexpr") ||
+ parseGlobalTypeAndValue(Val1) ||
+ parseToken(lltok::rparen, "expected ')' in binary constantexpr"))
return true;
if (Val0->getType() != Val1->getType())
- return Error(ID.Loc, "operands of constexpr must have same type");
+ return error(ID.Loc, "operands of constexpr must have same type");
// Check that the type is valid for the operator.
switch (Opc) {
case Instruction::Add:
@@ -3582,7 +3706,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
case Instruction::AShr:
case Instruction::LShr:
if (!Val0->getType()->isIntOrIntVectorTy())
- return Error(ID.Loc, "constexpr requires integer operands");
+ return error(ID.Loc, "constexpr requires integer operands");
break;
case Instruction::FAdd:
case Instruction::FSub:
@@ -3590,7 +3714,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
case Instruction::FDiv:
case Instruction::FRem:
if (!Val0->getType()->isFPOrFPVectorTy())
- return Error(ID.Loc, "constexpr requires fp operands");
+ return error(ID.Loc, "constexpr requires fp operands");
break;
default: llvm_unreachable("Unknown binary operator!");
}
@@ -3611,16 +3735,16 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
unsigned Opc = Lex.getUIntVal();
Constant *Val0, *Val1;
Lex.Lex();
- if (ParseToken(lltok::lparen, "expected '(' in logical constantexpr") ||
- ParseGlobalTypeAndValue(Val0) ||
- ParseToken(lltok::comma, "expected comma in logical constantexpr") ||
- ParseGlobalTypeAndValue(Val1) ||
- ParseToken(lltok::rparen, "expected ')' in logical constantexpr"))
+ if (parseToken(lltok::lparen, "expected '(' in logical constantexpr") ||
+ parseGlobalTypeAndValue(Val0) ||
+ parseToken(lltok::comma, "expected comma in logical constantexpr") ||
+ parseGlobalTypeAndValue(Val1) ||
+ parseToken(lltok::rparen, "expected ')' in logical constantexpr"))
return true;
if (Val0->getType() != Val1->getType())
- return Error(ID.Loc, "operands of constexpr must have same type");
+ return error(ID.Loc, "operands of constexpr must have same type");
if (!Val0->getType()->isIntOrIntVectorTy())
- return Error(ID.Loc,
+ return error(ID.Loc,
"constexpr requires integer or integer vector operands");
ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
ID.Kind = ValID::t_Constant;
@@ -3641,31 +3765,31 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (Opc == Instruction::GetElementPtr)
InBounds = EatIfPresent(lltok::kw_inbounds);
- if (ParseToken(lltok::lparen, "expected '(' in constantexpr"))
+ if (parseToken(lltok::lparen, "expected '(' in constantexpr"))
return true;
LocTy ExplicitTypeLoc = Lex.getLoc();
if (Opc == Instruction::GetElementPtr) {
- if (ParseType(Ty) ||
- ParseToken(lltok::comma, "expected comma after getelementptr's type"))
+ if (parseType(Ty) ||
+ parseToken(lltok::comma, "expected comma after getelementptr's type"))
return true;
}
Optional<unsigned> InRangeOp;
- if (ParseGlobalValueVector(
+ if (parseGlobalValueVector(
Elts, Opc == Instruction::GetElementPtr ? &InRangeOp : nullptr) ||
- ParseToken(lltok::rparen, "expected ')' in constantexpr"))
+ parseToken(lltok::rparen, "expected ')' in constantexpr"))
return true;
if (Opc == Instruction::GetElementPtr) {
if (Elts.size() == 0 ||
!Elts[0]->getType()->isPtrOrPtrVectorTy())
- return Error(ID.Loc, "base of getelementptr must be a pointer");
+ return error(ID.Loc, "base of getelementptr must be a pointer");
Type *BaseType = Elts[0]->getType();
auto *BasePointerType = cast<PointerType>(BaseType->getScalarType());
if (Ty != BasePointerType->getElementType())
- return Error(
+ return error(
ExplicitTypeLoc,
"explicit pointee type doesn't match operand's pointee type");
@@ -3678,11 +3802,11 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
for (Constant *Val : Indices) {
Type *ValTy = Val->getType();
if (!ValTy->isIntOrIntVectorTy())
- return Error(ID.Loc, "getelementptr index must be an integer");
+ return error(ID.Loc, "getelementptr index must be an integer");
if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
unsigned ValNumEl = cast<FixedVectorType>(ValVTy)->getNumElements();
if (GEPWidth && (ValNumEl != GEPWidth))
- return Error(
+ return error(
ID.Loc,
"getelementptr vector index has a wrong number of elements");
// GEPWidth may have been unknown because the base is a scalar,
@@ -3693,14 +3817,14 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
SmallPtrSet<Type*, 4> Visited;
if (!Indices.empty() && !Ty->isSized(&Visited))
- return Error(ID.Loc, "base element of getelementptr must be sized");
+ return error(ID.Loc, "base element of getelementptr must be sized");
if (!GetElementPtrInst::getIndexedType(Ty, Indices))
- return Error(ID.Loc, "invalid getelementptr indices");
+ return error(ID.Loc, "invalid getelementptr indices");
if (InRangeOp) {
if (*InRangeOp == 0)
- return Error(ID.Loc,
+ return error(ID.Loc,
"inrange keyword may not appear on pointer operand");
--*InRangeOp;
}
@@ -3709,31 +3833,31 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
InBounds, InRangeOp);
} else if (Opc == Instruction::Select) {
if (Elts.size() != 3)
- return Error(ID.Loc, "expected three operands to select");
+ return error(ID.Loc, "expected three operands to select");
if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1],
Elts[2]))
- return Error(ID.Loc, Reason);
+ return error(ID.Loc, Reason);
ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]);
} else if (Opc == Instruction::ShuffleVector) {
if (Elts.size() != 3)
- return Error(ID.Loc, "expected three operands to shufflevector");
+ return error(ID.Loc, "expected three operands to shufflevector");
if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
- return Error(ID.Loc, "invalid operands to shufflevector");
+ return error(ID.Loc, "invalid operands to shufflevector");
SmallVector<int, 16> Mask;
ShuffleVectorInst::getShuffleMask(cast<Constant>(Elts[2]), Mask);
ID.ConstantVal = ConstantExpr::getShuffleVector(Elts[0], Elts[1], Mask);
} else if (Opc == Instruction::ExtractElement) {
if (Elts.size() != 2)
- return Error(ID.Loc, "expected two operands to extractelement");
+ return error(ID.Loc, "expected two operands to extractelement");
if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1]))
- return Error(ID.Loc, "invalid extractelement operands");
+ return error(ID.Loc, "invalid extractelement operands");
ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]);
} else {
assert(Opc == Instruction::InsertElement && "Unknown opcode");
if (Elts.size() != 3)
- return Error(ID.Loc, "expected three operands to insertelement");
+ return error(ID.Loc, "expected three operands to insertelement");
if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
- return Error(ID.Loc, "invalid insertelement operands");
+ return error(ID.Loc, "invalid insertelement operands");
ID.ConstantVal =
ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]);
}
@@ -3747,22 +3871,21 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return false;
}
-/// ParseGlobalValue - Parse a global value with the specified type.
-bool LLParser::ParseGlobalValue(Type *Ty, Constant *&C) {
+/// parseGlobalValue - parse a global value with the specified type.
+bool LLParser::parseGlobalValue(Type *Ty, Constant *&C) {
C = nullptr;
ValID ID;
Value *V = nullptr;
- bool Parsed = ParseValID(ID) ||
- ConvertValIDToValue(Ty, ID, V, nullptr, /*IsCall=*/false);
+ bool Parsed = parseValID(ID) ||
+ convertValIDToValue(Ty, ID, V, nullptr, /*IsCall=*/false);
if (V && !(C = dyn_cast<Constant>(V)))
- return Error(ID.Loc, "global values must be constants");
+ return error(ID.Loc, "global values must be constants");
return Parsed;
}
-bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
+bool LLParser::parseGlobalTypeAndValue(Constant *&V) {
Type *Ty = nullptr;
- return ParseType(Ty) ||
- ParseGlobalValue(Ty, V);
+ return parseType(Ty) || parseGlobalValue(Ty, V);
}
bool LLParser::parseOptionalComdat(StringRef GlobalName, Comdat *&C) {
@@ -3774,24 +3897,24 @@ bool LLParser::parseOptionalComdat(StringRef GlobalName, Comdat *&C) {
if (EatIfPresent(lltok::lparen)) {
if (Lex.getKind() != lltok::ComdatVar)
- return TokError("expected comdat variable");
+ return tokError("expected comdat variable");
C = getComdat(Lex.getStrVal(), Lex.getLoc());
Lex.Lex();
- if (ParseToken(lltok::rparen, "expected ')' after comdat var"))
+ if (parseToken(lltok::rparen, "expected ')' after comdat var"))
return true;
} else {
if (GlobalName.empty())
- return TokError("comdat cannot be unnamed");
+ return tokError("comdat cannot be unnamed");
C = getComdat(std::string(GlobalName), KwLoc);
}
return false;
}
-/// ParseGlobalValueVector
+/// parseGlobalValueVector
/// ::= /*empty*/
/// ::= [inrange] TypeAndValue (',' [inrange] TypeAndValue)*
-bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts,
+bool LLParser::parseGlobalValueVector(SmallVectorImpl<Constant *> &Elts,
Optional<unsigned> *InRangeOp) {
// Empty list.
if (Lex.getKind() == lltok::rbrace ||
@@ -3805,16 +3928,17 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts,
*InRangeOp = Elts.size();
Constant *C;
- if (ParseGlobalTypeAndValue(C)) return true;
+ if (parseGlobalTypeAndValue(C))
+ return true;
Elts.push_back(C);
} while (EatIfPresent(lltok::comma));
return false;
}
-bool LLParser::ParseMDTuple(MDNode *&MD, bool IsDistinct) {
+bool LLParser::parseMDTuple(MDNode *&MD, bool IsDistinct) {
SmallVector<Metadata *, 16> Elts;
- if (ParseMDNodeVector(Elts))
+ if (parseMDNodeVector(Elts))
return true;
MD = (IsDistinct ? MDTuple::getDistinct : MDTuple::get)(Context, Elts);
@@ -3825,21 +3949,20 @@ bool LLParser::ParseMDTuple(MDNode *&MD, bool IsDistinct) {
/// ::= !{ ... }
/// ::= !7
/// ::= !DILocation(...)
-bool LLParser::ParseMDNode(MDNode *&N) {
+bool LLParser::parseMDNode(MDNode *&N) {
if (Lex.getKind() == lltok::MetadataVar)
- return ParseSpecializedMDNode(N);
+ return parseSpecializedMDNode(N);
- return ParseToken(lltok::exclaim, "expected '!' here") ||
- ParseMDNodeTail(N);
+ return parseToken(lltok::exclaim, "expected '!' here") || parseMDNodeTail(N);
}
-bool LLParser::ParseMDNodeTail(MDNode *&N) {
+bool LLParser::parseMDNodeTail(MDNode *&N) {
// !{ ... }
if (Lex.getKind() == lltok::lbrace)
- return ParseMDTuple(N);
+ return parseMDTuple(N);
// !42
- return ParseMDNodeID(N);
+ return parseMDNodeID(N);
}
namespace {
@@ -4037,9 +4160,9 @@ struct MDSignedOrUnsignedField
namespace llvm {
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDAPSIntField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, MDAPSIntField &Result) {
if (Lex.getKind() != lltok::APSInt)
- return TokError("expected integer");
+ return tokError("expected integer");
Result.assign(Lex.getAPSIntVal());
Lex.Lex();
@@ -4047,14 +4170,14 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDAPSIntField &Result) {
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
MDUnsignedField &Result) {
if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
- return TokError("expected unsigned integer");
+ return tokError("expected unsigned integer");
auto &U = Lex.getAPSIntVal();
if (U.ugt(Result.Max))
- return TokError("value for '" + Name + "' too large, limit is " +
+ return tokError("value for '" + Name + "' too large, limit is " +
Twine(Result.Max));
Result.assign(U.getZExtValue());
assert(Result.Val <= Result.Max && "Expected value in range");
@@ -4063,25 +4186,25 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, LineField &Result) {
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, LineField &Result) {
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, ColumnField &Result) {
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, ColumnField &Result) {
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfTagField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfTagField &Result) {
if (Lex.getKind() == lltok::APSInt)
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
if (Lex.getKind() != lltok::DwarfTag)
- return TokError("expected DWARF tag");
+ return tokError("expected DWARF tag");
unsigned Tag = dwarf::getTag(Lex.getStrVal());
if (Tag == dwarf::DW_TAG_invalid)
- return TokError("invalid DWARF tag" + Twine(" '") + Lex.getStrVal() + "'");
+ return tokError("invalid DWARF tag" + Twine(" '") + Lex.getStrVal() + "'");
assert(Tag <= Result.Max && "Expected valid DWARF tag");
Result.assign(Tag);
@@ -4090,18 +4213,18 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfTagField &Result) {
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
DwarfMacinfoTypeField &Result) {
if (Lex.getKind() == lltok::APSInt)
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
if (Lex.getKind() != lltok::DwarfMacinfo)
- return TokError("expected DWARF macinfo type");
+ return tokError("expected DWARF macinfo type");
unsigned Macinfo = dwarf::getMacinfo(Lex.getStrVal());
if (Macinfo == dwarf::DW_MACINFO_invalid)
- return TokError(
- "invalid DWARF macinfo type" + Twine(" '") + Lex.getStrVal() + "'");
+ return tokError("invalid DWARF macinfo type" + Twine(" '") +
+ Lex.getStrVal() + "'");
assert(Macinfo <= Result.Max && "Expected valid DWARF macinfo type");
Result.assign(Macinfo);
@@ -4110,17 +4233,17 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
DwarfVirtualityField &Result) {
if (Lex.getKind() == lltok::APSInt)
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
if (Lex.getKind() != lltok::DwarfVirtuality)
- return TokError("expected DWARF virtuality code");
+ return tokError("expected DWARF virtuality code");
unsigned Virtuality = dwarf::getVirtuality(Lex.getStrVal());
if (Virtuality == dwarf::DW_VIRTUALITY_invalid)
- return TokError("invalid DWARF virtuality code" + Twine(" '") +
+ return tokError("invalid DWARF virtuality code" + Twine(" '") +
Lex.getStrVal() + "'");
assert(Virtuality <= Result.Max && "Expected valid DWARF virtuality code");
Result.assign(Virtuality);
@@ -4129,16 +4252,16 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) {
if (Lex.getKind() == lltok::APSInt)
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
if (Lex.getKind() != lltok::DwarfLang)
- return TokError("expected DWARF language");
+ return tokError("expected DWARF language");
unsigned Lang = dwarf::getLanguage(Lex.getStrVal());
if (!Lang)
- return TokError("invalid DWARF language" + Twine(" '") + Lex.getStrVal() +
+ return tokError("invalid DWARF language" + Twine(" '") + Lex.getStrVal() +
"'");
assert(Lang <= Result.Max && "Expected valid DWARF language");
Result.assign(Lang);
@@ -4147,17 +4270,17 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) {
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfCCField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfCCField &Result) {
if (Lex.getKind() == lltok::APSInt)
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
if (Lex.getKind() != lltok::DwarfCC)
- return TokError("expected DWARF calling convention");
+ return tokError("expected DWARF calling convention");
unsigned CC = dwarf::getCallingConvention(Lex.getStrVal());
if (!CC)
- return TokError("invalid DWARF calling convention" + Twine(" '") + Lex.getStrVal() +
- "'");
+ return tokError("invalid DWARF calling convention" + Twine(" '") +
+ Lex.getStrVal() + "'");
assert(CC <= Result.Max && "Expected valid DWARF calling convention");
Result.assign(CC);
Lex.Lex();
@@ -4165,16 +4288,17 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfCCField &Result) {
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, EmissionKindField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
+ EmissionKindField &Result) {
if (Lex.getKind() == lltok::APSInt)
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
if (Lex.getKind() != lltok::EmissionKind)
- return TokError("expected emission kind");
+ return tokError("expected emission kind");
auto Kind = DICompileUnit::getEmissionKind(Lex.getStrVal());
if (!Kind)
- return TokError("invalid emission kind" + Twine(" '") + Lex.getStrVal() +
+ return tokError("invalid emission kind" + Twine(" '") + Lex.getStrVal() +
"'");
assert(*Kind <= Result.Max && "Expected valid emission kind");
Result.assign(*Kind);
@@ -4183,17 +4307,17 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, EmissionKindField &Result
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
NameTableKindField &Result) {
if (Lex.getKind() == lltok::APSInt)
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
if (Lex.getKind() != lltok::NameTableKind)
- return TokError("expected nameTable kind");
+ return tokError("expected nameTable kind");
auto Kind = DICompileUnit::getNameTableKind(Lex.getStrVal());
if (!Kind)
- return TokError("invalid nameTable kind" + Twine(" '") + Lex.getStrVal() +
+ return tokError("invalid nameTable kind" + Twine(" '") + Lex.getStrVal() +
"'");
assert(((unsigned)*Kind) <= Result.Max && "Expected valid nameTable kind");
Result.assign((unsigned)*Kind);
@@ -4202,17 +4326,17 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
DwarfAttEncodingField &Result) {
if (Lex.getKind() == lltok::APSInt)
- return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
if (Lex.getKind() != lltok::DwarfAttEncoding)
- return TokError("expected DWARF type attribute encoding");
+ return tokError("expected DWARF type attribute encoding");
unsigned Encoding = dwarf::getAttributeEncoding(Lex.getStrVal());
if (!Encoding)
- return TokError("invalid DWARF type attribute encoding" + Twine(" '") +
+ return tokError("invalid DWARF type attribute encoding" + Twine(" '") +
Lex.getStrVal() + "'");
assert(Encoding <= Result.Max && "Expected valid DWARF language");
Result.assign(Encoding);
@@ -4225,29 +4349,29 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
/// ::= DIFlagVector
/// ::= DIFlagVector '|' DIFlagFwdDecl '|' uint32 '|' DIFlagPublic
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
- // Parser for a single flag.
+ // parser for a single flag.
auto parseFlag = [&](DINode::DIFlags &Val) {
if (Lex.getKind() == lltok::APSInt && !Lex.getAPSIntVal().isSigned()) {
uint32_t TempVal = static_cast<uint32_t>(Val);
- bool Res = ParseUInt32(TempVal);
+ bool Res = parseUInt32(TempVal);
Val = static_cast<DINode::DIFlags>(TempVal);
return Res;
}
if (Lex.getKind() != lltok::DIFlag)
- return TokError("expected debug info flag");
+ return tokError("expected debug info flag");
Val = DINode::getFlag(Lex.getStrVal());
if (!Val)
- return TokError(Twine("invalid debug info flag flag '") +
+ return tokError(Twine("invalid debug info flag flag '") +
Lex.getStrVal() + "'");
Lex.Lex();
return false;
};
- // Parse the flags and combine them together.
+ // parse the flags and combine them together.
DINode::DIFlags Combined = DINode::FlagZero;
do {
DINode::DIFlags Val;
@@ -4265,29 +4389,29 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
/// ::= DISPFlagVector
/// ::= DISPFlagVector '|' DISPFlag* '|' uint32
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DISPFlagField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, DISPFlagField &Result) {
- // Parser for a single flag.
+ // parser for a single flag.
auto parseFlag = [&](DISubprogram::DISPFlags &Val) {
if (Lex.getKind() == lltok::APSInt && !Lex.getAPSIntVal().isSigned()) {
uint32_t TempVal = static_cast<uint32_t>(Val);
- bool Res = ParseUInt32(TempVal);
+ bool Res = parseUInt32(TempVal);
Val = static_cast<DISubprogram::DISPFlags>(TempVal);
return Res;
}
if (Lex.getKind() != lltok::DISPFlag)
- return TokError("expected debug info flag");
+ return tokError("expected debug info flag");
Val = DISubprogram::getFlag(Lex.getStrVal());
if (!Val)
- return TokError(Twine("invalid subprogram debug info flag '") +
+ return tokError(Twine("invalid subprogram debug info flag '") +
Lex.getStrVal() + "'");
Lex.Lex();
return false;
};
- // Parse the flags and combine them together.
+ // parse the flags and combine them together.
DISubprogram::DISPFlags Combined = DISubprogram::SPFlagZero;
do {
DISubprogram::DISPFlags Val;
@@ -4301,17 +4425,16 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DISPFlagField &Result) {
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
- MDSignedField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, MDSignedField &Result) {
if (Lex.getKind() != lltok::APSInt)
- return TokError("expected signed integer");
+ return tokError("expected signed integer");
auto &S = Lex.getAPSIntVal();
if (S < Result.Min)
- return TokError("value for '" + Name + "' too small, limit is " +
+ return tokError("value for '" + Name + "' too small, limit is " +
Twine(Result.Min));
if (S > Result.Max)
- return TokError("value for '" + Name + "' too large, limit is " +
+ return tokError("value for '" + Name + "' too large, limit is " +
Twine(Result.Max));
Result.assign(S.getExtValue());
assert(Result.Val >= Result.Min && "Expected value in range");
@@ -4321,10 +4444,10 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDBoolField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, MDBoolField &Result) {
switch (Lex.getKind()) {
default:
- return TokError("expected 'true' or 'false'");
+ return tokError("expected 'true' or 'false'");
case lltok::kw_true:
Result.assign(true);
break;
@@ -4337,17 +4460,17 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDBoolField &Result) {
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, MDField &Result) {
if (Lex.getKind() == lltok::kw_null) {
if (!Result.AllowNull)
- return TokError("'" + Name + "' cannot be null");
+ return tokError("'" + Name + "' cannot be null");
Lex.Lex();
Result.assign(nullptr);
return false;
}
Metadata *MD;
- if (ParseMetadata(MD, nullptr))
+ if (parseMetadata(MD, nullptr))
return true;
Result.assign(MD);
@@ -4355,12 +4478,12 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDField &Result) {
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
MDSignedOrMDField &Result) {
// Try to parse a signed int.
if (Lex.getKind() == lltok::APSInt) {
MDSignedField Res = Result.A;
- if (!ParseMDField(Loc, Name, Res)) {
+ if (!parseMDField(Loc, Name, Res)) {
Result.assign(Res);
return false;
}
@@ -4369,7 +4492,7 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
// Otherwise, try to parse as an MDField.
MDField Res = Result.B;
- if (!ParseMDField(Loc, Name, Res)) {
+ if (!parseMDField(Loc, Name, Res)) {
Result.assign(Res);
return false;
}
@@ -4378,23 +4501,23 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDStringField &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, MDStringField &Result) {
LocTy ValueLoc = Lex.getLoc();
std::string S;
- if (ParseStringConstant(S))
+ if (parseStringConstant(S))
return true;
if (!Result.AllowEmpty && S.empty())
- return Error(ValueLoc, "'" + Name + "' cannot be empty");
+ return error(ValueLoc, "'" + Name + "' cannot be empty");
Result.assign(S.empty() ? nullptr : MDString::get(Context, S));
return false;
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDFieldList &Result) {
+bool LLParser::parseMDField(LocTy Loc, StringRef Name, MDFieldList &Result) {
SmallVector<Metadata *, 4> MDs;
- if (ParseMDNodeVector(MDs))
+ if (parseMDNodeVector(MDs))
return true;
Result.assign(std::move(MDs));
@@ -4402,14 +4525,14 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDFieldList &Result) {
}
template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
ChecksumKindField &Result) {
Optional<DIFile::ChecksumKind> CSKind =
DIFile::getChecksumKind(Lex.getStrVal());
if (Lex.getKind() != lltok::ChecksumKind || !CSKind)
- return TokError(
- "invalid checksum kind" + Twine(" '") + Lex.getStrVal() + "'");
+ return tokError("invalid checksum kind" + Twine(" '") + Lex.getStrVal() +
+ "'");
Result.assign(*CSKind);
Lex.Lex();
@@ -4419,12 +4542,12 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
} // end namespace llvm
template <class ParserTy>
-bool LLParser::ParseMDFieldsImplBody(ParserTy parseField) {
+bool LLParser::parseMDFieldsImplBody(ParserTy ParseField) {
do {
if (Lex.getKind() != lltok::LabelStr)
- return TokError("expected field label here");
+ return tokError("expected field label here");
- if (parseField())
+ if (ParseField())
return true;
} while (EatIfPresent(lltok::comma));
@@ -4432,67 +4555,70 @@ bool LLParser::ParseMDFieldsImplBody(ParserTy parseField) {
}
template <class ParserTy>
-bool LLParser::ParseMDFieldsImpl(ParserTy parseField, LocTy &ClosingLoc) {
+bool LLParser::parseMDFieldsImpl(ParserTy ParseField, LocTy &ClosingLoc) {
assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name");
Lex.Lex();
- if (ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::lparen, "expected '(' here"))
return true;
if (Lex.getKind() != lltok::rparen)
- if (ParseMDFieldsImplBody(parseField))
+ if (parseMDFieldsImplBody(ParseField))
return true;
ClosingLoc = Lex.getLoc();
- return ParseToken(lltok::rparen, "expected ')' here");
+ return parseToken(lltok::rparen, "expected ')' here");
}
template <class FieldTy>
-bool LLParser::ParseMDField(StringRef Name, FieldTy &Result) {
+bool LLParser::parseMDField(StringRef Name, FieldTy &Result) {
if (Result.Seen)
- return TokError("field '" + Name + "' cannot be specified more than once");
+ return tokError("field '" + Name + "' cannot be specified more than once");
LocTy Loc = Lex.getLoc();
Lex.Lex();
- return ParseMDField(Loc, Name, Result);
+ return parseMDField(Loc, Name, Result);
}
-bool LLParser::ParseSpecializedMDNode(MDNode *&N, bool IsDistinct) {
+bool LLParser::parseSpecializedMDNode(MDNode *&N, bool IsDistinct) {
assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name");
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) \
if (Lex.getStrVal() == #CLASS) \
- return Parse##CLASS(N, IsDistinct);
+ return parse##CLASS(N, IsDistinct);
#include "llvm/IR/Metadata.def"
- return TokError("expected metadata type");
+ return tokError("expected metadata type");
}
#define DECLARE_FIELD(NAME, TYPE, INIT) TYPE NAME INIT
#define NOP_FIELD(NAME, TYPE, INIT)
#define REQUIRE_FIELD(NAME, TYPE, INIT) \
if (!NAME.Seen) \
- return Error(ClosingLoc, "missing required field '" #NAME "'");
+ return error(ClosingLoc, "missing required field '" #NAME "'");
#define PARSE_MD_FIELD(NAME, TYPE, DEFAULT) \
if (Lex.getStrVal() == #NAME) \
- return ParseMDField(#NAME, NAME);
+ return parseMDField(#NAME, NAME);
#define PARSE_MD_FIELDS() \
VISIT_MD_FIELDS(DECLARE_FIELD, DECLARE_FIELD) \
do { \
LocTy ClosingLoc; \
- if (ParseMDFieldsImpl([&]() -> bool { \
- VISIT_MD_FIELDS(PARSE_MD_FIELD, PARSE_MD_FIELD) \
- return TokError(Twine("invalid field '") + Lex.getStrVal() + "'"); \
- }, ClosingLoc)) \
+ if (parseMDFieldsImpl( \
+ [&]() -> bool { \
+ VISIT_MD_FIELDS(PARSE_MD_FIELD, PARSE_MD_FIELD) \
+ return tokError(Twine("invalid field '") + Lex.getStrVal() + \
+ "'"); \
+ }, \
+ ClosingLoc)) \
return true; \
VISIT_MD_FIELDS(NOP_FIELD, REQUIRE_FIELD) \
} while (false)
#define GET_OR_DISTINCT(CLASS, ARGS) \
(IsDistinct ? CLASS::getDistinct ARGS : CLASS::get ARGS)
-/// ParseDILocationFields:
+/// parseDILocationFields:
/// ::= !DILocation(line: 43, column: 8, scope: !5, inlinedAt: !6,
/// isImplicitCode: true)
-bool LLParser::ParseDILocation(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDILocation(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(line, LineField, ); \
OPTIONAL(column, ColumnField, ); \
@@ -4508,9 +4634,9 @@ bool LLParser::ParseDILocation(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseGenericDINode:
+/// parseGenericDINode:
/// ::= !GenericDINode(tag: 15, header: "...", operands: {...})
-bool LLParser::ParseGenericDINode(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseGenericDINode(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(tag, DwarfTagField, ); \
OPTIONAL(header, MDStringField, ); \
@@ -4523,11 +4649,11 @@ bool LLParser::ParseGenericDINode(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDISubrange:
+/// parseDISubrange:
/// ::= !DISubrange(count: 30, lowerBound: 2)
/// ::= !DISubrange(count: !node, lowerBound: 2)
/// ::= !DISubrange(lowerBound: !node1, upperBound: !node2, stride: !node3)
-bool LLParser::ParseDISubrange(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDISubrange(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(count, MDSignedOrMDField, (-1, -1, INT64_MAX, false)); \
OPTIONAL(lowerBound, MDSignedOrMDField, ); \
@@ -4565,9 +4691,42 @@ bool LLParser::ParseDISubrange(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIEnumerator:
+/// parseDIGenericSubrange:
+/// ::= !DIGenericSubrange(lowerBound: !node1, upperBound: !node2, stride:
+/// !node3)
+bool LLParser::parseDIGenericSubrange(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
+ OPTIONAL(count, MDSignedOrMDField, ); \
+ OPTIONAL(lowerBound, MDSignedOrMDField, ); \
+ OPTIONAL(upperBound, MDSignedOrMDField, ); \
+ OPTIONAL(stride, MDSignedOrMDField, );
+ PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+ auto ConvToMetadata = [&](MDSignedOrMDField Bound) -> Metadata * {
+ if (Bound.isMDSignedField())
+ return DIExpression::get(
+ Context, {dwarf::DW_OP_consts,
+ static_cast<uint64_t>(Bound.getMDSignedValue())});
+ if (Bound.isMDField())
+ return Bound.getMDFieldValue();
+ return nullptr;
+ };
+
+ Metadata *Count = ConvToMetadata(count);
+ Metadata *LowerBound = ConvToMetadata(lowerBound);
+ Metadata *UpperBound = ConvToMetadata(upperBound);
+ Metadata *Stride = ConvToMetadata(stride);
+
+ Result = GET_OR_DISTINCT(DIGenericSubrange,
+ (Context, Count, LowerBound, UpperBound, Stride));
+
+ return false;
+}
+
+/// parseDIEnumerator:
/// ::= !DIEnumerator(value: 30, isUnsigned: true, name: "SomeKind")
-bool LLParser::ParseDIEnumerator(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDIEnumerator(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(name, MDStringField, ); \
REQUIRED(value, MDAPSIntField, ); \
@@ -4576,7 +4735,7 @@ bool LLParser::ParseDIEnumerator(MDNode *&Result, bool IsDistinct) {
#undef VISIT_MD_FIELDS
if (isUnsigned.Val && value.Val.isNegative())
- return TokError("unsigned enumerator with negative value");
+ return tokError("unsigned enumerator with negative value");
APSInt Value(value.Val);
// Add a leading zero so that unsigned values with the msb set are not
@@ -4590,10 +4749,10 @@ bool LLParser::ParseDIEnumerator(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIBasicType:
+/// parseDIBasicType:
/// ::= !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32,
/// encoding: DW_ATE_encoding, flags: 0)
-bool LLParser::ParseDIBasicType(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDIBasicType(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type)); \
OPTIONAL(name, MDStringField, ); \
@@ -4609,12 +4768,33 @@ bool LLParser::ParseDIBasicType(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIDerivedType:
+/// parseDIStringType:
+/// ::= !DIStringType(name: "character(4)", size: 32, align: 32)
+bool LLParser::parseDIStringType(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
+ OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_string_type)); \
+ OPTIONAL(name, MDStringField, ); \
+ OPTIONAL(stringLength, MDField, ); \
+ OPTIONAL(stringLengthExpression, MDField, ); \
+ OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \
+ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \
+ OPTIONAL(encoding, DwarfAttEncodingField, );
+ PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+ Result = GET_OR_DISTINCT(DIStringType,
+ (Context, tag.Val, name.Val, stringLength.Val,
+ stringLengthExpression.Val, size.Val, align.Val,
+ encoding.Val));
+ return false;
+}
+
+/// parseDIDerivedType:
/// ::= !DIDerivedType(tag: DW_TAG_pointer_type, name: "int", file: !0,
/// line: 7, scope: !1, baseType: !2, size: 32,
/// align: 32, offset: 0, flags: 0, extraData: !3,
/// dwarfAddressSpace: 3)
-bool LLParser::ParseDIDerivedType(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDIDerivedType(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(tag, DwarfTagField, ); \
OPTIONAL(name, MDStringField, ); \
@@ -4643,7 +4823,7 @@ bool LLParser::ParseDIDerivedType(MDNode *&Result, bool IsDistinct) {
return false;
}
-bool LLParser::ParseDICompositeType(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(tag, DwarfTagField, ); \
OPTIONAL(name, MDStringField, ); \
@@ -4661,17 +4841,28 @@ bool LLParser::ParseDICompositeType(MDNode *&Result, bool IsDistinct) {
OPTIONAL(templateParams, MDField, ); \
OPTIONAL(identifier, MDStringField, ); \
OPTIONAL(discriminator, MDField, ); \
- OPTIONAL(dataLocation, MDField, );
+ OPTIONAL(dataLocation, MDField, ); \
+ OPTIONAL(associated, MDField, ); \
+ OPTIONAL(allocated, MDField, ); \
+ OPTIONAL(rank, MDSignedOrMDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
+ Metadata *Rank = nullptr;
+ if (rank.isMDSignedField())
+ Rank = ConstantAsMetadata::get(ConstantInt::getSigned(
+ Type::getInt64Ty(Context), rank.getMDSignedValue()));
+ else if (rank.isMDField())
+ Rank = rank.getMDFieldValue();
+
// If this has an identifier try to build an ODR type.
if (identifier.Val)
if (auto *CT = DICompositeType::buildODRType(
Context, *identifier.Val, tag.Val, name.Val, file.Val, line.Val,
scope.Val, baseType.Val, size.Val, align.Val, offset.Val, flags.Val,
elements.Val, runtimeLang.Val, vtableHolder.Val, templateParams.Val,
- discriminator.Val, dataLocation.Val)) {
+ discriminator.Val, dataLocation.Val, associated.Val, allocated.Val,
+ Rank)) {
Result = CT;
return false;
}
@@ -4683,11 +4874,12 @@ bool LLParser::ParseDICompositeType(MDNode *&Result, bool IsDistinct) {
(Context, tag.Val, name.Val, file.Val, line.Val, scope.Val, baseType.Val,
size.Val, align.Val, offset.Val, flags.Val, elements.Val,
runtimeLang.Val, vtableHolder.Val, templateParams.Val, identifier.Val,
- discriminator.Val, dataLocation.Val));
+ discriminator.Val, dataLocation.Val, associated.Val, allocated.Val,
+ Rank));
return false;
}
-bool LLParser::ParseDISubroutineType(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDISubroutineType(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(flags, DIFlagField, ); \
OPTIONAL(cc, DwarfCCField, ); \
@@ -4700,12 +4892,12 @@ bool LLParser::ParseDISubroutineType(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIFileType:
+/// parseDIFileType:
/// ::= !DIFileType(filename: "path/to/file", directory: "/path/to/dir",
/// checksumkind: CSK_MD5,
/// checksum: "000102030405060708090a0b0c0d0e0f",
/// source: "source file contents")
-bool LLParser::ParseDIFile(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) {
// The default constructed value for checksumkind is required, but will never
// be used, as the parser checks if the field was actually Seen before using
// the Val.
@@ -4732,14 +4924,14 @@ bool LLParser::ParseDIFile(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDICompileUnit:
+/// parseDICompileUnit:
/// ::= !DICompileUnit(language: DW_LANG_C99, file: !0, producer: "clang",
/// isOptimized: true, flags: "-O2", runtimeVersion: 1,
/// splitDebugFilename: "abc.debug",
/// emissionKind: FullDebug, enums: !1, retainedTypes: !2,
/// globals: !4, imports: !5, macros: !6, dwoId: 0x0abcd,
/// sysroot: "/", sdk: "MacOSX.sdk")
-bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
if (!IsDistinct)
return Lex.Error("missing 'distinct', required for !DICompileUnit");
@@ -4776,7 +4968,7 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDISubprogram:
+/// parseDISubprogram:
/// ::= !DISubprogram(scope: !0, name: "foo", linkageName: "_Zfoo",
/// file: !1, line: 7, type: !2, isLocal: false,
/// isDefinition: true, scopeLine: 8, containingType: !3,
@@ -4784,7 +4976,7 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
/// virtualIndex: 10, thisAdjustment: 4, flags: 11,
/// spFlags: 10, isOptimized: false, templateParams: !4,
/// declaration: !5, retainedNodes: !6, thrownTypes: !7)
-bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
auto Loc = Lex.getLoc();
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(scope, MDField, ); \
@@ -4830,9 +5022,9 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDILexicalBlock:
+/// parseDILexicalBlock:
/// ::= !DILexicalBlock(scope: !0, file: !2, line: 7, column: 9)
-bool LLParser::ParseDILexicalBlock(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDILexicalBlock(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(scope, MDField, (/* AllowNull */ false)); \
OPTIONAL(file, MDField, ); \
@@ -4846,9 +5038,9 @@ bool LLParser::ParseDILexicalBlock(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDILexicalBlockFile:
+/// parseDILexicalBlockFile:
/// ::= !DILexicalBlockFile(scope: !0, file: !2, discriminator: 9)
-bool LLParser::ParseDILexicalBlockFile(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDILexicalBlockFile(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(scope, MDField, (/* AllowNull */ false)); \
OPTIONAL(file, MDField, ); \
@@ -4861,9 +5053,9 @@ bool LLParser::ParseDILexicalBlockFile(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDICommonBlock:
+/// parseDICommonBlock:
/// ::= !DICommonBlock(scope: !0, file: !2, name: "COMMON name", line: 9)
-bool LLParser::ParseDICommonBlock(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDICommonBlock(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(scope, MDField, ); \
OPTIONAL(declaration, MDField, ); \
@@ -4879,9 +5071,9 @@ bool LLParser::ParseDICommonBlock(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDINamespace:
+/// parseDINamespace:
/// ::= !DINamespace(scope: !0, file: !2, name: "SomeNamespace", line: 9)
-bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDINamespace(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(scope, MDField, ); \
OPTIONAL(name, MDStringField, ); \
@@ -4894,9 +5086,10 @@ bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIMacro:
-/// ::= !DIMacro(macinfo: type, line: 9, name: "SomeMacro", value: "SomeValue")
-bool LLParser::ParseDIMacro(MDNode *&Result, bool IsDistinct) {
+/// parseDIMacro:
+/// ::= !DIMacro(macinfo: type, line: 9, name: "SomeMacro", value:
+/// "SomeValue")
+bool LLParser::parseDIMacro(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(type, DwarfMacinfoTypeField, ); \
OPTIONAL(line, LineField, ); \
@@ -4910,9 +5103,9 @@ bool LLParser::ParseDIMacro(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIMacroFile:
+/// parseDIMacroFile:
/// ::= !DIMacroFile(line: 9, file: !2, nodes: !3)
-bool LLParser::ParseDIMacroFile(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDIMacroFile(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(type, DwarfMacinfoTypeField, (dwarf::DW_MACINFO_start_file)); \
OPTIONAL(line, LineField, ); \
@@ -4926,11 +5119,11 @@ bool LLParser::ParseDIMacroFile(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIModule:
+/// parseDIModule:
/// ::= !DIModule(scope: !0, name: "SomeModule", configMacros:
/// "-DNDEBUG", includePath: "/usr/include", apinotes: "module.apinotes",
-/// file: !1, line: 4)
-bool LLParser::ParseDIModule(MDNode *&Result, bool IsDistinct) {
+/// file: !1, line: 4, isDecl: false)
+bool LLParser::parseDIModule(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(scope, MDField, ); \
REQUIRED(name, MDStringField, ); \
@@ -4938,19 +5131,20 @@ bool LLParser::ParseDIModule(MDNode *&Result, bool IsDistinct) {
OPTIONAL(includePath, MDStringField, ); \
OPTIONAL(apinotes, MDStringField, ); \
OPTIONAL(file, MDField, ); \
- OPTIONAL(line, LineField, );
+ OPTIONAL(line, LineField, ); \
+ OPTIONAL(isDecl, MDBoolField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
Result = GET_OR_DISTINCT(DIModule, (Context, file.Val, scope.Val, name.Val,
configMacros.Val, includePath.Val,
- apinotes.Val, line.Val));
+ apinotes.Val, line.Val, isDecl.Val));
return false;
}
-/// ParseDITemplateTypeParameter:
+/// parseDITemplateTypeParameter:
/// ::= !DITemplateTypeParameter(name: "Ty", type: !1, defaulted: false)
-bool LLParser::ParseDITemplateTypeParameter(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDITemplateTypeParameter(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(name, MDStringField, ); \
REQUIRED(type, MDField, ); \
@@ -4963,11 +5157,11 @@ bool LLParser::ParseDITemplateTypeParameter(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDITemplateValueParameter:
+/// parseDITemplateValueParameter:
/// ::= !DITemplateValueParameter(tag: DW_TAG_template_value_parameter,
/// name: "V", type: !1, defaulted: false,
/// value: i32 7)
-bool LLParser::ParseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_template_value_parameter)); \
OPTIONAL(name, MDStringField, ); \
@@ -4984,12 +5178,12 @@ bool LLParser::ParseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIGlobalVariable:
+/// parseDIGlobalVariable:
/// ::= !DIGlobalVariable(scope: !0, name: "foo", linkageName: "foo",
/// file: !1, line: 7, type: !2, isLocal: false,
/// isDefinition: true, templateParams: !3,
/// declaration: !4, align: 8)
-bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(name, MDStringField, (/* AllowEmpty */ false)); \
OPTIONAL(scope, MDField, ); \
@@ -5013,14 +5207,14 @@ bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDILocalVariable:
+/// parseDILocalVariable:
/// ::= !DILocalVariable(arg: 7, scope: !0, name: "foo",
/// file: !1, line: 7, type: !2, arg: 2, flags: 7,
/// align: 8)
/// ::= !DILocalVariable(scope: !0, name: "foo",
/// file: !1, line: 7, type: !2, arg: 2, flags: 7,
/// align: 8)
-bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDILocalVariable(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(scope, MDField, (/* AllowNull */ false)); \
OPTIONAL(name, MDStringField, ); \
@@ -5039,9 +5233,9 @@ bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDILabel:
+/// parseDILabel:
/// ::= !DILabel(scope: !0, name: "foo", file: !1, line: 7)
-bool LLParser::ParseDILabel(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDILabel(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(scope, MDField, (/* AllowNull */ false)); \
REQUIRED(name, MDStringField, ); \
@@ -5055,13 +5249,13 @@ bool LLParser::ParseDILabel(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIExpression:
+/// parseDIExpression:
/// ::= !DIExpression(0, 7, -1)
-bool LLParser::ParseDIExpression(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDIExpression(MDNode *&Result, bool IsDistinct) {
assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name");
Lex.Lex();
- if (ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::lparen, "expected '(' here"))
return true;
SmallVector<uint64_t, 8> Elements;
@@ -5073,7 +5267,7 @@ bool LLParser::ParseDIExpression(MDNode *&Result, bool IsDistinct) {
Elements.push_back(Op);
continue;
}
- return TokError(Twine("invalid DWARF op '") + Lex.getStrVal() + "'");
+ return tokError(Twine("invalid DWARF op '") + Lex.getStrVal() + "'");
}
if (Lex.getKind() == lltok::DwarfAttEncoding) {
@@ -5082,29 +5276,30 @@ bool LLParser::ParseDIExpression(MDNode *&Result, bool IsDistinct) {
Elements.push_back(Op);
continue;
}
- return TokError(Twine("invalid DWARF attribute encoding '") + Lex.getStrVal() + "'");
+ return tokError(Twine("invalid DWARF attribute encoding '") +
+ Lex.getStrVal() + "'");
}
if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
- return TokError("expected unsigned integer");
+ return tokError("expected unsigned integer");
auto &U = Lex.getAPSIntVal();
if (U.ugt(UINT64_MAX))
- return TokError("element too large, limit is " + Twine(UINT64_MAX));
+ return tokError("element too large, limit is " + Twine(UINT64_MAX));
Elements.push_back(U.getZExtValue());
Lex.Lex();
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
Result = GET_OR_DISTINCT(DIExpression, (Context, Elements));
return false;
}
-/// ParseDIGlobalVariableExpression:
+/// parseDIGlobalVariableExpression:
/// ::= !DIGlobalVariableExpression(var: !0, expr: !1)
-bool LLParser::ParseDIGlobalVariableExpression(MDNode *&Result,
+bool LLParser::parseDIGlobalVariableExpression(MDNode *&Result,
bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(var, MDField, ); \
@@ -5117,10 +5312,10 @@ bool LLParser::ParseDIGlobalVariableExpression(MDNode *&Result,
return false;
}
-/// ParseDIObjCProperty:
+/// parseDIObjCProperty:
/// ::= !DIObjCProperty(name: "foo", file: !1, line: 7, setter: "setFoo",
/// getter: "getFoo", attributes: 7, type: !2)
-bool LLParser::ParseDIObjCProperty(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDIObjCProperty(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(name, MDStringField, ); \
OPTIONAL(file, MDField, ); \
@@ -5138,10 +5333,10 @@ bool LLParser::ParseDIObjCProperty(MDNode *&Result, bool IsDistinct) {
return false;
}
-/// ParseDIImportedEntity:
+/// parseDIImportedEntity:
/// ::= !DIImportedEntity(tag: DW_TAG_imported_module, scope: !0, entity: !1,
/// line: 7, name: "foo")
-bool LLParser::ParseDIImportedEntity(MDNode *&Result, bool IsDistinct) {
+bool LLParser::parseDIImportedEntity(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(tag, DwarfTagField, ); \
REQUIRED(scope, MDField, ); \
@@ -5163,45 +5358,45 @@ bool LLParser::ParseDIImportedEntity(MDNode *&Result, bool IsDistinct) {
#undef REQUIRE_FIELD
#undef DECLARE_FIELD
-/// ParseMetadataAsValue
+/// parseMetadataAsValue
/// ::= metadata i32 %local
/// ::= metadata i32 @global
/// ::= metadata i32 7
/// ::= metadata !0
/// ::= metadata !{...}
/// ::= metadata !"string"
-bool LLParser::ParseMetadataAsValue(Value *&V, PerFunctionState &PFS) {
+bool LLParser::parseMetadataAsValue(Value *&V, PerFunctionState &PFS) {
// Note: the type 'metadata' has already been parsed.
Metadata *MD;
- if (ParseMetadata(MD, &PFS))
+ if (parseMetadata(MD, &PFS))
return true;
V = MetadataAsValue::get(Context, MD);
return false;
}
-/// ParseValueAsMetadata
+/// parseValueAsMetadata
/// ::= i32 %local
/// ::= i32 @global
/// ::= i32 7
-bool LLParser::ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
+bool LLParser::parseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
PerFunctionState *PFS) {
Type *Ty;
LocTy Loc;
- if (ParseType(Ty, TypeMsg, Loc))
+ if (parseType(Ty, TypeMsg, Loc))
return true;
if (Ty->isMetadataTy())
- return Error(Loc, "invalid metadata-value-metadata roundtrip");
+ return error(Loc, "invalid metadata-value-metadata roundtrip");
Value *V;
- if (ParseValue(Ty, V, PFS))
+ if (parseValue(Ty, V, PFS))
return true;
MD = ValueAsMetadata::get(V);
return false;
}
-/// ParseMetadata
+/// parseMetadata
/// ::= i32 %local
/// ::= i32 @global
/// ::= i32 7
@@ -5209,10 +5404,10 @@ bool LLParser::ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
/// ::= !{...}
/// ::= !"string"
/// ::= !DILocation(...)
-bool LLParser::ParseMetadata(Metadata *&MD, PerFunctionState *PFS) {
+bool LLParser::parseMetadata(Metadata *&MD, PerFunctionState *PFS) {
if (Lex.getKind() == lltok::MetadataVar) {
MDNode *N;
- if (ParseSpecializedMDNode(N))
+ if (parseSpecializedMDNode(N))
return true;
MD = N;
return false;
@@ -5221,7 +5416,7 @@ bool LLParser::ParseMetadata(Metadata *&MD, PerFunctionState *PFS) {
// ValueAsMetadata:
// <type> <value>
if (Lex.getKind() != lltok::exclaim)
- return ParseValueAsMetadata(MD, "expected metadata operand", PFS);
+ return parseValueAsMetadata(MD, "expected metadata operand", PFS);
// '!'.
assert(Lex.getKind() == lltok::exclaim && "Expected '!' here");
@@ -5231,7 +5426,7 @@ bool LLParser::ParseMetadata(Metadata *&MD, PerFunctionState *PFS) {
// ::= '!' STRINGCONSTANT
if (Lex.getKind() == lltok::StringConstant) {
MDString *S;
- if (ParseMDString(S))
+ if (parseMDString(S))
return true;
MD = S;
return false;
@@ -5241,7 +5436,7 @@ bool LLParser::ParseMetadata(Metadata *&MD, PerFunctionState *PFS) {
// !{ ... }
// !7
MDNode *N;
- if (ParseMDNodeTail(N))
+ if (parseMDNodeTail(N))
return true;
MD = N;
return false;
@@ -5251,48 +5446,52 @@ bool LLParser::ParseMetadata(Metadata *&MD, PerFunctionState *PFS) {
// Function Parsing.
//===----------------------------------------------------------------------===//
-bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
+bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
PerFunctionState *PFS, bool IsCall) {
if (Ty->isFunctionTy())
- return Error(ID.Loc, "functions are not values, refer to them as pointers");
+ return error(ID.Loc, "functions are not values, refer to them as pointers");
switch (ID.Kind) {
case ValID::t_LocalID:
- if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
- V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc, IsCall);
+ if (!PFS)
+ return error(ID.Loc, "invalid use of function-local name");
+ V = PFS->getVal(ID.UIntVal, Ty, ID.Loc, IsCall);
return V == nullptr;
case ValID::t_LocalName:
- if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
- V = PFS->GetVal(ID.StrVal, Ty, ID.Loc, IsCall);
+ if (!PFS)
+ return error(ID.Loc, "invalid use of function-local name");
+ V = PFS->getVal(ID.StrVal, Ty, ID.Loc, IsCall);
return V == nullptr;
case ValID::t_InlineAsm: {
if (!ID.FTy || !InlineAsm::Verify(ID.FTy, ID.StrVal2))
- return Error(ID.Loc, "invalid type for inline asm constraint string");
+ return error(ID.Loc, "invalid type for inline asm constraint string");
V = InlineAsm::get(ID.FTy, ID.StrVal, ID.StrVal2, ID.UIntVal & 1,
(ID.UIntVal >> 1) & 1,
(InlineAsm::AsmDialect(ID.UIntVal >> 2)));
return false;
}
case ValID::t_GlobalName:
- V = GetGlobalVal(ID.StrVal, Ty, ID.Loc, IsCall);
+ V = getGlobalVal(ID.StrVal, Ty, ID.Loc, IsCall);
return V == nullptr;
case ValID::t_GlobalID:
- V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc, IsCall);
+ V = getGlobalVal(ID.UIntVal, Ty, ID.Loc, IsCall);
return V == nullptr;
case ValID::t_APSInt:
if (!Ty->isIntegerTy())
- return Error(ID.Loc, "integer constant must have integer type");
+ return error(ID.Loc, "integer constant must have integer type");
ID.APSIntVal = ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
V = ConstantInt::get(Context, ID.APSIntVal);
return false;
case ValID::t_APFloat:
if (!Ty->isFloatingPointTy() ||
!ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
- return Error(ID.Loc, "floating point constant invalid for type");
+ return error(ID.Loc, "floating point constant invalid for type");
// The lexer has no type info, so builds all half, bfloat, float, and double
// FP constants as double. Fix this here. Long double does not need this.
if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble()) {
+ // Check for signaling before potentially converting and losing that info.
+ bool IsSNAN = ID.APFloatVal.isSignaling();
bool Ignored;
if (Ty->isHalfTy())
ID.APFloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
@@ -5303,66 +5502,81 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
else if (Ty->isFloatTy())
ID.APFloatVal.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
&Ignored);
+ if (IsSNAN) {
+ // The convert call above may quiet an SNaN, so manufacture another
+ // SNaN. The bitcast works because the payload (significand) parameter
+ // is truncated to fit.
+ APInt Payload = ID.APFloatVal.bitcastToAPInt();
+ ID.APFloatVal = APFloat::getSNaN(ID.APFloatVal.getSemantics(),
+ ID.APFloatVal.isNegative(), &Payload);
+ }
}
V = ConstantFP::get(Context, ID.APFloatVal);
if (V->getType() != Ty)
- return Error(ID.Loc, "floating point constant does not have type '" +
- getTypeString(Ty) + "'");
+ return error(ID.Loc, "floating point constant does not have type '" +
+ getTypeString(Ty) + "'");
return false;
case ValID::t_Null:
if (!Ty->isPointerTy())
- return Error(ID.Loc, "null must be a pointer type");
+ return error(ID.Loc, "null must be a pointer type");
V = ConstantPointerNull::get(cast<PointerType>(Ty));
return false;
case ValID::t_Undef:
// FIXME: LabelTy should not be a first-class type.
if (!Ty->isFirstClassType() || Ty->isLabelTy())
- return Error(ID.Loc, "invalid type for undef constant");
+ return error(ID.Loc, "invalid type for undef constant");
V = UndefValue::get(Ty);
return false;
case ValID::t_EmptyArray:
if (!Ty->isArrayTy() || cast<ArrayType>(Ty)->getNumElements() != 0)
- return Error(ID.Loc, "invalid empty array initializer");
+ return error(ID.Loc, "invalid empty array initializer");
V = UndefValue::get(Ty);
return false;
case ValID::t_Zero:
// FIXME: LabelTy should not be a first-class type.
if (!Ty->isFirstClassType() || Ty->isLabelTy())
- return Error(ID.Loc, "invalid type for null constant");
+ return error(ID.Loc, "invalid type for null constant");
V = Constant::getNullValue(Ty);
return false;
case ValID::t_None:
if (!Ty->isTokenTy())
- return Error(ID.Loc, "invalid type for none constant");
+ return error(ID.Loc, "invalid type for none constant");
V = Constant::getNullValue(Ty);
return false;
+ case ValID::t_Poison:
+ // FIXME: LabelTy should not be a first-class type.
+ if (!Ty->isFirstClassType() || Ty->isLabelTy())
+ return error(ID.Loc, "invalid type for poison constant");
+ V = PoisonValue::get(Ty);
+ return false;
case ValID::t_Constant:
if (ID.ConstantVal->getType() != Ty)
- return Error(ID.Loc, "constant expression type mismatch");
-
+ return error(ID.Loc, "constant expression type mismatch");
V = ID.ConstantVal;
return false;
case ValID::t_ConstantStruct:
case ValID::t_PackedConstantStruct:
if (StructType *ST = dyn_cast<StructType>(Ty)) {
if (ST->getNumElements() != ID.UIntVal)
- return Error(ID.Loc,
+ return error(ID.Loc,
"initializer with struct type has wrong # elements");
if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct))
- return Error(ID.Loc, "packed'ness of initializer and type don't match");
+ return error(ID.Loc, "packed'ness of initializer and type don't match");
// Verify that the elements are compatible with the structtype.
for (unsigned i = 0, e = ID.UIntVal; i != e; ++i)
if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i))
- return Error(ID.Loc, "element " + Twine(i) +
- " of struct initializer doesn't match struct element type");
+ return error(
+ ID.Loc,
+ "element " + Twine(i) +
+ " of struct initializer doesn't match struct element type");
V = ConstantStruct::get(
ST, makeArrayRef(ID.ConstantStructElts.get(), ID.UIntVal));
} else
- return Error(ID.Loc, "constant expression type mismatch");
+ return error(ID.Loc, "constant expression type mismatch");
return false;
}
llvm_unreachable("Invalid ValID");
@@ -5372,7 +5586,7 @@ bool LLParser::parseConstantValue(Type *Ty, Constant *&C) {
C = nullptr;
ValID ID;
auto Loc = Lex.getLoc();
- if (ParseValID(ID, /*PFS=*/nullptr))
+ if (parseValID(ID, /*PFS=*/nullptr))
return true;
switch (ID.Kind) {
case ValID::t_APSInt:
@@ -5382,7 +5596,7 @@ bool LLParser::parseConstantValue(Type *Ty, Constant *&C) {
case ValID::t_ConstantStruct:
case ValID::t_PackedConstantStruct: {
Value *V;
- if (ConvertValIDToValue(Ty, ID, V, /*PFS=*/nullptr, /*IsCall=*/false))
+ if (convertValIDToValue(Ty, ID, V, /*PFS=*/nullptr, /*IsCall=*/false))
return true;
assert(isa<Constant>(V) && "Expected a constant value");
C = cast<Constant>(V);
@@ -5392,30 +5606,30 @@ bool LLParser::parseConstantValue(Type *Ty, Constant *&C) {
C = Constant::getNullValue(Ty);
return false;
default:
- return Error(Loc, "expected a constant value");
+ return error(Loc, "expected a constant value");
}
}
-bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
+bool LLParser::parseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
V = nullptr;
ValID ID;
- return ParseValID(ID, PFS) ||
- ConvertValIDToValue(Ty, ID, V, PFS, /*IsCall=*/false);
+ return parseValID(ID, PFS) ||
+ convertValIDToValue(Ty, ID, V, PFS, /*IsCall=*/false);
}
-bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) {
+bool LLParser::parseTypeAndValue(Value *&V, PerFunctionState *PFS) {
Type *Ty = nullptr;
- return ParseType(Ty) ||
- ParseValue(Ty, V, PFS);
+ return parseType(Ty) || parseValue(Ty, V, PFS);
}
-bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+bool LLParser::parseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
PerFunctionState &PFS) {
Value *V;
Loc = Lex.getLoc();
- if (ParseTypeAndValue(V, PFS)) return true;
+ if (parseTypeAndValue(V, PFS))
+ return true;
if (!isa<BasicBlock>(V))
- return Error(Loc, "expected a basic block");
+ return error(Loc, "expected a basic block");
BB = cast<BasicBlock>(V);
return false;
}
@@ -5425,8 +5639,8 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
/// OptionalCallingConv OptRetAttrs OptUnnamedAddr Type GlobalName
/// '(' ArgList ')' OptAddrSpace OptFuncAttrs OptSection OptionalAlign
/// OptGC OptionalPrefix OptionalPrologue OptPersonalityFn
-bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
- // Parse the linkage.
+bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
+ // parse the linkage.
LocTy LinkageLoc = Lex.getLoc();
unsigned Linkage;
unsigned Visibility;
@@ -5437,10 +5651,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
bool HasLinkage;
Type *RetType = nullptr;
LocTy RetTypeLoc = Lex.getLoc();
- if (ParseOptionalLinkage(Linkage, HasLinkage, Visibility, DLLStorageClass,
+ if (parseOptionalLinkage(Linkage, HasLinkage, Visibility, DLLStorageClass,
DSOLocal) ||
- ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
- ParseType(RetType, RetTypeLoc, true /*void allowed*/))
+ parseOptionalCallingConv(CC) || parseOptionalReturnAttrs(RetAttrs) ||
+ parseType(RetType, RetTypeLoc, true /*void allowed*/))
return true;
// Verify that the linkage is ok.
@@ -5448,8 +5662,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
case GlobalValue::ExternalLinkage:
break; // always ok.
case GlobalValue::ExternalWeakLinkage:
- if (isDefine)
- return Error(LinkageLoc, "invalid linkage for function definition");
+ if (IsDefine)
+ return error(LinkageLoc, "invalid linkage for function definition");
break;
case GlobalValue::PrivateLinkage:
case GlobalValue::InternalLinkage:
@@ -5458,20 +5672,20 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
case GlobalValue::LinkOnceODRLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
- if (!isDefine)
- return Error(LinkageLoc, "invalid linkage for function declaration");
+ if (!IsDefine)
+ return error(LinkageLoc, "invalid linkage for function declaration");
break;
case GlobalValue::AppendingLinkage:
case GlobalValue::CommonLinkage:
- return Error(LinkageLoc, "invalid function linkage type");
+ return error(LinkageLoc, "invalid function linkage type");
}
if (!isValidVisibilityForLinkage(Visibility, Linkage))
- return Error(LinkageLoc,
+ return error(LinkageLoc,
"symbol with local linkage must have default visibility");
if (!FunctionType::isValidReturnType(RetType))
- return Error(RetTypeLoc, "invalid function return type");
+ return error(RetTypeLoc, "invalid function return type");
LocTy NameLoc = Lex.getLoc();
@@ -5482,19 +5696,19 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
unsigned NameID = Lex.getUIntVal();
if (NameID != NumberedVals.size())
- return TokError("function expected to be numbered '%" +
+ return tokError("function expected to be numbered '%" +
Twine(NumberedVals.size()) + "'");
} else {
- return TokError("expected function name");
+ return tokError("expected function name");
}
Lex.Lex();
if (Lex.getKind() != lltok::lparen)
- return TokError("expected '(' in function argument list");
+ return tokError("expected '(' in function argument list");
SmallVector<ArgInfo, 8> ArgList;
- bool isVarArg;
+ bool IsVarArg;
AttrBuilder FuncAttrs;
std::vector<unsigned> FwdRefAttrGrps;
LocTy BuiltinLoc;
@@ -5509,29 +5723,24 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
Constant *PersonalityFn = nullptr;
Comdat *C;
- if (ParseArgumentList(ArgList, isVarArg) ||
- ParseOptionalUnnamedAddr(UnnamedAddr) ||
- ParseOptionalProgramAddrSpace(AddrSpace) ||
- ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false,
+ if (parseArgumentList(ArgList, IsVarArg) ||
+ parseOptionalUnnamedAddr(UnnamedAddr) ||
+ parseOptionalProgramAddrSpace(AddrSpace) ||
+ parseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false,
BuiltinLoc) ||
- (EatIfPresent(lltok::kw_section) &&
- ParseStringConstant(Section)) ||
- (EatIfPresent(lltok::kw_partition) &&
- ParseStringConstant(Partition)) ||
+ (EatIfPresent(lltok::kw_section) && parseStringConstant(Section)) ||
+ (EatIfPresent(lltok::kw_partition) && parseStringConstant(Partition)) ||
parseOptionalComdat(FunctionName, C) ||
- ParseOptionalAlignment(Alignment) ||
- (EatIfPresent(lltok::kw_gc) &&
- ParseStringConstant(GC)) ||
- (EatIfPresent(lltok::kw_prefix) &&
- ParseGlobalTypeAndValue(Prefix)) ||
- (EatIfPresent(lltok::kw_prologue) &&
- ParseGlobalTypeAndValue(Prologue)) ||
+ parseOptionalAlignment(Alignment) ||
+ (EatIfPresent(lltok::kw_gc) && parseStringConstant(GC)) ||
+ (EatIfPresent(lltok::kw_prefix) && parseGlobalTypeAndValue(Prefix)) ||
+ (EatIfPresent(lltok::kw_prologue) && parseGlobalTypeAndValue(Prologue)) ||
(EatIfPresent(lltok::kw_personality) &&
- ParseGlobalTypeAndValue(PersonalityFn)))
+ parseGlobalTypeAndValue(PersonalityFn)))
return true;
if (FuncAttrs.contains(Attribute::Builtin))
- return Error(BuiltinLoc, "'builtin' attribute not valid on function");
+ return error(BuiltinLoc, "'builtin' attribute not valid on function");
// If the alignment was parsed as an attribute, move to the alignment field.
if (FuncAttrs.hasAlignmentAttr()) {
@@ -5554,10 +5763,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
AttributeSet::get(Context, RetAttrs), Attrs);
if (PAL.hasAttribute(1, Attribute::StructRet) && !RetType->isVoidTy())
- return Error(RetTypeLoc, "functions with 'sret' argument must return void");
+ return error(RetTypeLoc, "functions with 'sret' argument must return void");
- FunctionType *FT =
- FunctionType::get(RetType, ParamTypeList, isVarArg);
+ FunctionType *FT = FunctionType::get(RetType, ParamTypeList, IsVarArg);
PointerType *PFT = PointerType::get(FT, AddrSpace);
Fn = nullptr;
@@ -5568,20 +5776,24 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
if (FRVI != ForwardRefVals.end()) {
Fn = M->getFunction(FunctionName);
if (!Fn)
- return Error(FRVI->second.second, "invalid forward reference to "
- "function as global value!");
+ return error(FRVI->second.second, "invalid forward reference to "
+ "function as global value!");
if (Fn->getType() != PFT)
- return Error(FRVI->second.second, "invalid forward reference to "
- "function '" + FunctionName + "' with wrong type: "
- "expected '" + getTypeString(PFT) + "' but was '" +
- getTypeString(Fn->getType()) + "'");
+ return error(FRVI->second.second,
+ "invalid forward reference to "
+ "function '" +
+ FunctionName +
+ "' with wrong type: "
+ "expected '" +
+ getTypeString(PFT) + "' but was '" +
+ getTypeString(Fn->getType()) + "'");
ForwardRefVals.erase(FRVI);
} else if ((Fn = M->getFunction(FunctionName))) {
// Reject redefinitions.
- return Error(NameLoc, "invalid redefinition of function '" +
- FunctionName + "'");
+ return error(NameLoc,
+ "invalid redefinition of function '" + FunctionName + "'");
} else if (M->getNamedValue(FunctionName)) {
- return Error(NameLoc, "redefinition of function '@" + FunctionName + "'");
+ return error(NameLoc, "redefinition of function '@" + FunctionName + "'");
}
} else {
@@ -5591,10 +5803,12 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
if (I != ForwardRefValIDs.end()) {
Fn = cast<Function>(I->second.first);
if (Fn->getType() != PFT)
- return Error(NameLoc, "type of definition and forward reference of '@" +
- Twine(NumberedVals.size()) + "' disagree: "
- "expected '" + getTypeString(PFT) + "' but was '" +
- getTypeString(Fn->getType()) + "'");
+ return error(NameLoc, "type of definition and forward reference of '@" +
+ Twine(NumberedVals.size()) +
+ "' disagree: "
+ "expected '" +
+ getTypeString(PFT) + "' but was '" +
+ getTypeString(Fn->getType()) + "'");
ForwardRefValIDs.erase(I);
}
}
@@ -5637,11 +5851,11 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
ArgIt->setName(ArgList[i].Name);
if (ArgIt->getName() != ArgList[i].Name)
- return Error(ArgList[i].Loc, "redefinition of argument '%" +
- ArgList[i].Name + "'");
+ return error(ArgList[i].Loc,
+ "redefinition of argument '%" + ArgList[i].Name + "'");
}
- if (isDefine)
+ if (IsDefine)
return false;
// Check the declaration has no block address forward references.
@@ -5655,7 +5869,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
}
auto Blocks = ForwardRefBlockAddresses.find(ID);
if (Blocks != ForwardRefBlockAddresses.end())
- return Error(Blocks->first.Loc,
+ return error(Blocks->first.Loc,
"cannot take blockaddress inside a declaration");
return false;
}
@@ -5682,11 +5896,11 @@ bool LLParser::PerFunctionState::resolveForwardRefBlockAddresses() {
"Expected local id or name");
BasicBlock *BB;
if (BBID.Kind == ValID::t_LocalName)
- BB = GetBB(BBID.StrVal, BBID.Loc);
+ BB = getBB(BBID.StrVal, BBID.Loc);
else
- BB = GetBB(BBID.UIntVal, BBID.Loc);
+ BB = getBB(BBID.UIntVal, BBID.Loc);
if (!BB)
- return P.Error(BBID.Loc, "referenced value is not a basic block");
+ return P.error(BBID.Loc, "referenced value is not a basic block");
GV->replaceAllUsesWith(BlockAddress::get(&F, BB));
GV->eraseFromParent();
@@ -5696,11 +5910,11 @@ bool LLParser::PerFunctionState::resolveForwardRefBlockAddresses() {
return false;
}
-/// ParseFunctionBody
+/// parseFunctionBody
/// ::= '{' BasicBlock+ UseListOrderDirective* '}'
-bool LLParser::ParseFunctionBody(Function &Fn) {
+bool LLParser::parseFunctionBody(Function &Fn) {
if (Lex.getKind() != lltok::lbrace)
- return TokError("expected '{' in function body");
+ return tokError("expected '{' in function body");
Lex.Lex(); // eat the {.
int FunctionNumber = -1;
@@ -5716,26 +5930,27 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
// We need at least one basic block.
if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_uselistorder)
- return TokError("function body requires at least one basic block");
+ return tokError("function body requires at least one basic block");
while (Lex.getKind() != lltok::rbrace &&
Lex.getKind() != lltok::kw_uselistorder)
- if (ParseBasicBlock(PFS)) return true;
+ if (parseBasicBlock(PFS))
+ return true;
while (Lex.getKind() != lltok::rbrace)
- if (ParseUseListOrder(&PFS))
+ if (parseUseListOrder(&PFS))
return true;
// Eat the }.
Lex.Lex();
// Verify function is ok.
- return PFS.FinishFunction();
+ return PFS.finishFunction();
}
-/// ParseBasicBlock
+/// parseBasicBlock
/// ::= (LabelStr|LabelID)? Instruction*
-bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
+bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
// If this basic block starts out with a name, remember it.
std::string Name;
int NameID = -1;
@@ -5748,13 +5963,13 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
Lex.Lex();
}
- BasicBlock *BB = PFS.DefineBB(Name, NameID, NameLoc);
+ BasicBlock *BB = PFS.defineBB(Name, NameID, NameLoc);
if (!BB)
return true;
std::string NameStr;
- // Parse the instructions in this block until we get a terminator.
+ // parse the instructions in this block until we get a terminator.
Instruction *Inst;
do {
// This instruction may have three possibilities for a name: a) none
@@ -5766,17 +5981,18 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
if (Lex.getKind() == lltok::LocalVarID) {
NameID = Lex.getUIntVal();
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' after instruction id"))
+ if (parseToken(lltok::equal, "expected '=' after instruction id"))
return true;
} else if (Lex.getKind() == lltok::LocalVar) {
NameStr = Lex.getStrVal();
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' after instruction name"))
+ if (parseToken(lltok::equal, "expected '=' after instruction name"))
return true;
}
- switch (ParseInstruction(Inst, BB, PFS)) {
- default: llvm_unreachable("Unknown ParseInstruction result!");
+ switch (parseInstruction(Inst, BB, PFS)) {
+ default:
+ llvm_unreachable("Unknown parseInstruction result!");
case InstError: return true;
case InstNormal:
BB->getInstList().push_back(Inst);
@@ -5784,7 +6000,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// With a normal result, we check to see if the instruction is followed by
// a comma and metadata.
if (EatIfPresent(lltok::comma))
- if (ParseInstructionMetadata(*Inst))
+ if (parseInstructionMetadata(*Inst))
return true;
break;
case InstExtraComma:
@@ -5792,13 +6008,14 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// If the instruction parser ate an extra comma at the end of it, it
// *must* be followed by metadata.
- if (ParseInstructionMetadata(*Inst))
+ if (parseInstructionMetadata(*Inst))
return true;
break;
}
// Set the name on the instruction.
- if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
+ if (PFS.setInstName(NameID, NameStr, NameLoc, Inst))
+ return true;
} while (!Inst->isTerminator());
return false;
@@ -5808,37 +6025,50 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// Instruction Parsing.
//===----------------------------------------------------------------------===//
-/// ParseInstruction - Parse one of the many different instructions.
+/// parseInstruction - parse one of the many different instructions.
///
-int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB,
PerFunctionState &PFS) {
lltok::Kind Token = Lex.getKind();
if (Token == lltok::Eof)
- return TokError("found end of file when expecting more instructions");
+ return tokError("found end of file when expecting more instructions");
LocTy Loc = Lex.getLoc();
unsigned KeywordVal = Lex.getUIntVal();
Lex.Lex(); // Eat the keyword.
switch (Token) {
- default: return Error(Loc, "expected instruction opcode");
+ default:
+ return error(Loc, "expected instruction opcode");
// Terminator Instructions.
case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false;
- case lltok::kw_ret: return ParseRet(Inst, BB, PFS);
- case lltok::kw_br: return ParseBr(Inst, PFS);
- case lltok::kw_switch: return ParseSwitch(Inst, PFS);
- case lltok::kw_indirectbr: return ParseIndirectBr(Inst, PFS);
- case lltok::kw_invoke: return ParseInvoke(Inst, PFS);
- case lltok::kw_resume: return ParseResume(Inst, PFS);
- case lltok::kw_cleanupret: return ParseCleanupRet(Inst, PFS);
- case lltok::kw_catchret: return ParseCatchRet(Inst, PFS);
- case lltok::kw_catchswitch: return ParseCatchSwitch(Inst, PFS);
- case lltok::kw_catchpad: return ParseCatchPad(Inst, PFS);
- case lltok::kw_cleanuppad: return ParseCleanupPad(Inst, PFS);
- case lltok::kw_callbr: return ParseCallBr(Inst, PFS);
+ case lltok::kw_ret:
+ return parseRet(Inst, BB, PFS);
+ case lltok::kw_br:
+ return parseBr(Inst, PFS);
+ case lltok::kw_switch:
+ return parseSwitch(Inst, PFS);
+ case lltok::kw_indirectbr:
+ return parseIndirectBr(Inst, PFS);
+ case lltok::kw_invoke:
+ return parseInvoke(Inst, PFS);
+ case lltok::kw_resume:
+ return parseResume(Inst, PFS);
+ case lltok::kw_cleanupret:
+ return parseCleanupRet(Inst, PFS);
+ case lltok::kw_catchret:
+ return parseCatchRet(Inst, PFS);
+ case lltok::kw_catchswitch:
+ return parseCatchSwitch(Inst, PFS);
+ case lltok::kw_catchpad:
+ return parseCatchPad(Inst, PFS);
+ case lltok::kw_cleanuppad:
+ return parseCleanupPad(Inst, PFS);
+ case lltok::kw_callbr:
+ return parseCallBr(Inst, PFS);
// Unary Operators.
case lltok::kw_fneg: {
FastMathFlags FMF = EatFastMathFlagsIfPresent();
- int Res = ParseUnaryOp(Inst, PFS, KeywordVal, /*IsFP*/true);
+ int Res = parseUnaryOp(Inst, PFS, KeywordVal, /*IsFP*/ true);
if (Res != 0)
return Res;
if (FMF.any())
@@ -5854,7 +6084,8 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
bool NSW = EatIfPresent(lltok::kw_nsw);
if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
- if (ParseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/false)) return true;
+ if (parseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/ false))
+ return true;
if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
@@ -5866,7 +6097,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_fdiv:
case lltok::kw_frem: {
FastMathFlags FMF = EatFastMathFlagsIfPresent();
- int Res = ParseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/true);
+ int Res = parseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/ true);
if (Res != 0)
return Res;
if (FMF.any())
@@ -5880,21 +6111,25 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_ashr: {
bool Exact = EatIfPresent(lltok::kw_exact);
- if (ParseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/false)) return true;
+ if (parseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/ false))
+ return true;
if (Exact) cast<BinaryOperator>(Inst)->setIsExact(true);
return false;
}
case lltok::kw_urem:
- case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal,
- /*IsFP*/false);
+ case lltok::kw_srem:
+ return parseArithmetic(Inst, PFS, KeywordVal,
+ /*IsFP*/ false);
case lltok::kw_and:
case lltok::kw_or:
- case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal);
- case lltok::kw_icmp: return ParseCompare(Inst, PFS, KeywordVal);
+ case lltok::kw_xor:
+ return parseLogical(Inst, PFS, KeywordVal);
+ case lltok::kw_icmp:
+ return parseCompare(Inst, PFS, KeywordVal);
case lltok::kw_fcmp: {
FastMathFlags FMF = EatFastMathFlagsIfPresent();
- int Res = ParseCompare(Inst, PFS, KeywordVal);
+ int Res = parseCompare(Inst, PFS, KeywordVal);
if (Res != 0)
return Res;
if (FMF.any())
@@ -5915,63 +6150,84 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_fptoui:
case lltok::kw_fptosi:
case lltok::kw_inttoptr:
- case lltok::kw_ptrtoint: return ParseCast(Inst, PFS, KeywordVal);
+ case lltok::kw_ptrtoint:
+ return parseCast(Inst, PFS, KeywordVal);
// Other.
case lltok::kw_select: {
FastMathFlags FMF = EatFastMathFlagsIfPresent();
- int Res = ParseSelect(Inst, PFS);
+ int Res = parseSelect(Inst, PFS);
if (Res != 0)
return Res;
if (FMF.any()) {
if (!isa<FPMathOperator>(Inst))
- return Error(Loc, "fast-math-flags specified for select without "
+ return error(Loc, "fast-math-flags specified for select without "
"floating-point scalar or vector return type");
Inst->setFastMathFlags(FMF);
}
return 0;
}
- case lltok::kw_va_arg: return ParseVA_Arg(Inst, PFS);
- case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
- case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS);
- case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS);
+ case lltok::kw_va_arg:
+ return parseVAArg(Inst, PFS);
+ case lltok::kw_extractelement:
+ return parseExtractElement(Inst, PFS);
+ case lltok::kw_insertelement:
+ return parseInsertElement(Inst, PFS);
+ case lltok::kw_shufflevector:
+ return parseShuffleVector(Inst, PFS);
case lltok::kw_phi: {
FastMathFlags FMF = EatFastMathFlagsIfPresent();
- int Res = ParsePHI(Inst, PFS);
+ int Res = parsePHI(Inst, PFS);
if (Res != 0)
return Res;
if (FMF.any()) {
if (!isa<FPMathOperator>(Inst))
- return Error(Loc, "fast-math-flags specified for phi without "
+ return error(Loc, "fast-math-flags specified for phi without "
"floating-point scalar or vector return type");
Inst->setFastMathFlags(FMF);
}
return 0;
}
- case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS);
- case lltok::kw_freeze: return ParseFreeze(Inst, PFS);
+ case lltok::kw_landingpad:
+ return parseLandingPad(Inst, PFS);
+ case lltok::kw_freeze:
+ return parseFreeze(Inst, PFS);
// Call.
- case lltok::kw_call: return ParseCall(Inst, PFS, CallInst::TCK_None);
- case lltok::kw_tail: return ParseCall(Inst, PFS, CallInst::TCK_Tail);
- case lltok::kw_musttail: return ParseCall(Inst, PFS, CallInst::TCK_MustTail);
- case lltok::kw_notail: return ParseCall(Inst, PFS, CallInst::TCK_NoTail);
+ case lltok::kw_call:
+ return parseCall(Inst, PFS, CallInst::TCK_None);
+ case lltok::kw_tail:
+ return parseCall(Inst, PFS, CallInst::TCK_Tail);
+ case lltok::kw_musttail:
+ return parseCall(Inst, PFS, CallInst::TCK_MustTail);
+ case lltok::kw_notail:
+ return parseCall(Inst, PFS, CallInst::TCK_NoTail);
// Memory.
- case lltok::kw_alloca: return ParseAlloc(Inst, PFS);
- case lltok::kw_load: return ParseLoad(Inst, PFS);
- case lltok::kw_store: return ParseStore(Inst, PFS);
- case lltok::kw_cmpxchg: return ParseCmpXchg(Inst, PFS);
- case lltok::kw_atomicrmw: return ParseAtomicRMW(Inst, PFS);
- case lltok::kw_fence: return ParseFence(Inst, PFS);
- case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
- case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS);
- case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS);
+ case lltok::kw_alloca:
+ return parseAlloc(Inst, PFS);
+ case lltok::kw_load:
+ return parseLoad(Inst, PFS);
+ case lltok::kw_store:
+ return parseStore(Inst, PFS);
+ case lltok::kw_cmpxchg:
+ return parseCmpXchg(Inst, PFS);
+ case lltok::kw_atomicrmw:
+ return parseAtomicRMW(Inst, PFS);
+ case lltok::kw_fence:
+ return parseFence(Inst, PFS);
+ case lltok::kw_getelementptr:
+ return parseGetElementPtr(Inst, PFS);
+ case lltok::kw_extractvalue:
+ return parseExtractValue(Inst, PFS);
+ case lltok::kw_insertvalue:
+ return parseInsertValue(Inst, PFS);
}
}
-/// ParseCmpPredicate - Parse an integer or fp predicate, based on Kind.
-bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
+/// parseCmpPredicate - parse an integer or fp predicate, based on Kind.
+bool LLParser::parseCmpPredicate(unsigned &P, unsigned Opc) {
if (Opc == Instruction::FCmp) {
switch (Lex.getKind()) {
- default: return TokError("expected fcmp predicate (e.g. 'oeq')");
+ default:
+ return tokError("expected fcmp predicate (e.g. 'oeq')");
case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break;
case lltok::kw_one: P = CmpInst::FCMP_ONE; break;
case lltok::kw_olt: P = CmpInst::FCMP_OLT; break;
@@ -5991,7 +6247,8 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
}
} else {
switch (Lex.getKind()) {
- default: return TokError("expected icmp predicate (e.g. 'eq')");
+ default:
+ return tokError("expected icmp predicate (e.g. 'eq')");
case lltok::kw_eq: P = CmpInst::ICMP_EQ; break;
case lltok::kw_ne: P = CmpInst::ICMP_NE; break;
case lltok::kw_slt: P = CmpInst::ICMP_SLT; break;
@@ -6012,45 +6269,48 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
// Terminator Instructions.
//===----------------------------------------------------------------------===//
-/// ParseRet - Parse a return instruction.
+/// parseRet - parse a return instruction.
/// ::= 'ret' void (',' !dbg, !1)*
/// ::= 'ret' TypeAndValue (',' !dbg, !1)*
-bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
+bool LLParser::parseRet(Instruction *&Inst, BasicBlock *BB,
PerFunctionState &PFS) {
SMLoc TypeLoc = Lex.getLoc();
Type *Ty = nullptr;
- if (ParseType(Ty, true /*void allowed*/)) return true;
+ if (parseType(Ty, true /*void allowed*/))
+ return true;
Type *ResType = PFS.getFunction().getReturnType();
if (Ty->isVoidTy()) {
if (!ResType->isVoidTy())
- return Error(TypeLoc, "value doesn't match function result type '" +
- getTypeString(ResType) + "'");
+ return error(TypeLoc, "value doesn't match function result type '" +
+ getTypeString(ResType) + "'");
Inst = ReturnInst::Create(Context);
return false;
}
Value *RV;
- if (ParseValue(Ty, RV, PFS)) return true;
+ if (parseValue(Ty, RV, PFS))
+ return true;
if (ResType != RV->getType())
- return Error(TypeLoc, "value doesn't match function result type '" +
- getTypeString(ResType) + "'");
+ return error(TypeLoc, "value doesn't match function result type '" +
+ getTypeString(ResType) + "'");
Inst = ReturnInst::Create(Context, RV);
return false;
}
-/// ParseBr
+/// parseBr
/// ::= 'br' TypeAndValue
/// ::= 'br' TypeAndValue ',' TypeAndValue ',' TypeAndValue
-bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseBr(Instruction *&Inst, PerFunctionState &PFS) {
LocTy Loc, Loc2;
Value *Op0;
BasicBlock *Op1, *Op2;
- if (ParseTypeAndValue(Op0, Loc, PFS)) return true;
+ if (parseTypeAndValue(Op0, Loc, PFS))
+ return true;
if (BasicBlock *BB = dyn_cast<BasicBlock>(Op0)) {
Inst = BranchInst::Create(BB);
@@ -6058,52 +6318,52 @@ bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
}
if (Op0->getType() != Type::getInt1Ty(Context))
- return Error(Loc, "branch condition must have 'i1' type");
+ return error(Loc, "branch condition must have 'i1' type");
- if (ParseToken(lltok::comma, "expected ',' after branch condition") ||
- ParseTypeAndBasicBlock(Op1, Loc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after true destination") ||
- ParseTypeAndBasicBlock(Op2, Loc2, PFS))
+ if (parseToken(lltok::comma, "expected ',' after branch condition") ||
+ parseTypeAndBasicBlock(Op1, Loc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after true destination") ||
+ parseTypeAndBasicBlock(Op2, Loc2, PFS))
return true;
Inst = BranchInst::Create(Op1, Op2, Op0);
return false;
}
-/// ParseSwitch
+/// parseSwitch
/// Instruction
/// ::= 'switch' TypeAndValue ',' TypeAndValue '[' JumpTable ']'
/// JumpTable
/// ::= (TypeAndValue ',' TypeAndValue)*
-bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
LocTy CondLoc, BBLoc;
Value *Cond;
BasicBlock *DefaultBB;
- if (ParseTypeAndValue(Cond, CondLoc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after switch condition") ||
- ParseTypeAndBasicBlock(DefaultBB, BBLoc, PFS) ||
- ParseToken(lltok::lsquare, "expected '[' with switch table"))
+ if (parseTypeAndValue(Cond, CondLoc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after switch condition") ||
+ parseTypeAndBasicBlock(DefaultBB, BBLoc, PFS) ||
+ parseToken(lltok::lsquare, "expected '[' with switch table"))
return true;
if (!Cond->getType()->isIntegerTy())
- return Error(CondLoc, "switch condition must have integer type");
+ return error(CondLoc, "switch condition must have integer type");
- // Parse the jump table pairs.
+ // parse the jump table pairs.
SmallPtrSet<Value*, 32> SeenCases;
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 32> Table;
while (Lex.getKind() != lltok::rsquare) {
Value *Constant;
BasicBlock *DestBB;
- if (ParseTypeAndValue(Constant, CondLoc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after case value") ||
- ParseTypeAndBasicBlock(DestBB, PFS))
+ if (parseTypeAndValue(Constant, CondLoc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after case value") ||
+ parseTypeAndBasicBlock(DestBB, PFS))
return true;
if (!SeenCases.insert(Constant).second)
- return Error(CondLoc, "duplicate case value in switch");
+ return error(CondLoc, "duplicate case value in switch");
if (!isa<ConstantInt>(Constant))
- return Error(CondLoc, "case value is not a constant integer");
+ return error(CondLoc, "case value is not a constant integer");
Table.push_back(std::make_pair(cast<ConstantInt>(Constant), DestBB));
}
@@ -6117,37 +6377,37 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
-/// ParseIndirectBr
+/// parseIndirectBr
/// Instruction
/// ::= 'indirectbr' TypeAndValue ',' '[' LabelList ']'
-bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
LocTy AddrLoc;
Value *Address;
- if (ParseTypeAndValue(Address, AddrLoc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after indirectbr address") ||
- ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
+ if (parseTypeAndValue(Address, AddrLoc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after indirectbr address") ||
+ parseToken(lltok::lsquare, "expected '[' with indirectbr"))
return true;
if (!Address->getType()->isPointerTy())
- return Error(AddrLoc, "indirectbr address must have pointer type");
+ return error(AddrLoc, "indirectbr address must have pointer type");
- // Parse the destination list.
+ // parse the destination list.
SmallVector<BasicBlock*, 16> DestList;
if (Lex.getKind() != lltok::rsquare) {
BasicBlock *DestBB;
- if (ParseTypeAndBasicBlock(DestBB, PFS))
+ if (parseTypeAndBasicBlock(DestBB, PFS))
return true;
DestList.push_back(DestBB);
while (EatIfPresent(lltok::comma)) {
- if (ParseTypeAndBasicBlock(DestBB, PFS))
+ if (parseTypeAndBasicBlock(DestBB, PFS))
return true;
DestList.push_back(DestBB);
}
}
- if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
+ if (parseToken(lltok::rsquare, "expected ']' at end of block list"))
return true;
IndirectBrInst *IBI = IndirectBrInst::Create(Address, DestList.size());
@@ -6157,10 +6417,10 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
-/// ParseInvoke
+/// parseInvoke
/// ::= 'invoke' OptionalCallingConv OptionalAttrs Type Value ParamList
/// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
-bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
LocTy CallLoc = Lex.getLoc();
AttrBuilder RetAttrs, FnAttrs;
std::vector<unsigned> FwdRefAttrGrps;
@@ -6174,17 +6434,17 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
SmallVector<OperandBundleDef, 2> BundleList;
BasicBlock *NormalBB, *UnwindBB;
- if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
- ParseOptionalProgramAddrSpace(InvokeAddrSpace) ||
- ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
- ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) ||
- ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
+ if (parseOptionalCallingConv(CC) || parseOptionalReturnAttrs(RetAttrs) ||
+ parseOptionalProgramAddrSpace(InvokeAddrSpace) ||
+ parseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ parseValID(CalleeID) || parseParameterList(ArgList, PFS) ||
+ parseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
NoBuiltinLoc) ||
- ParseOptionalOperandBundles(BundleList, PFS) ||
- ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
- ParseTypeAndBasicBlock(NormalBB, PFS) ||
- ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
- ParseTypeAndBasicBlock(UnwindBB, PFS))
+ parseOptionalOperandBundles(BundleList, PFS) ||
+ parseToken(lltok::kw_to, "expected 'to' in invoke") ||
+ parseTypeAndBasicBlock(NormalBB, PFS) ||
+ parseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
+ parseTypeAndBasicBlock(UnwindBB, PFS))
return true;
// If RetType is a non-function pointer type, then this is the short syntax
@@ -6198,7 +6458,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
ParamTypes.push_back(ArgList[i].V->getType());
if (!FunctionType::isValidReturnType(RetType))
- return Error(RetTypeLoc, "Invalid result type for LLVM function");
+ return error(RetTypeLoc, "Invalid result type for LLVM function");
Ty = FunctionType::get(RetType, ParamTypes, false);
}
@@ -6207,7 +6467,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
// Look up the callee.
Value *Callee;
- if (ConvertValIDToValue(PointerType::get(Ty, InvokeAddrSpace), CalleeID,
+ if (convertValIDToValue(PointerType::get(Ty, InvokeAddrSpace), CalleeID,
Callee, &PFS, /*IsCall=*/true))
return true;
@@ -6224,21 +6484,21 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
if (I != E) {
ExpectedTy = *I++;
} else if (!Ty->isVarArg()) {
- return Error(ArgList[i].Loc, "too many arguments specified");
+ return error(ArgList[i].Loc, "too many arguments specified");
}
if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
- return Error(ArgList[i].Loc, "argument is not of expected type '" +
- getTypeString(ExpectedTy) + "'");
+ return error(ArgList[i].Loc, "argument is not of expected type '" +
+ getTypeString(ExpectedTy) + "'");
Args.push_back(ArgList[i].V);
ArgAttrs.push_back(ArgList[i].Attrs);
}
if (I != E)
- return Error(CallLoc, "not enough parameters specified for call");
+ return error(CallLoc, "not enough parameters specified for call");
if (FnAttrs.hasAlignmentAttr())
- return Error(CallLoc, "invoke instructions may not have an alignment");
+ return error(CallLoc, "invoke instructions may not have an alignment");
// Finish off the Attribute and check them
AttributeList PAL =
@@ -6254,11 +6514,11 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
-/// ParseResume
+/// parseResume
/// ::= 'resume' TypeAndValue
-bool LLParser::ParseResume(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseResume(Instruction *&Inst, PerFunctionState &PFS) {
Value *Exn; LocTy ExnLoc;
- if (ParseTypeAndValue(Exn, ExnLoc, PFS))
+ if (parseTypeAndValue(Exn, ExnLoc, PFS))
return true;
ResumeInst *RI = ResumeInst::Create(Exn);
@@ -6266,29 +6526,29 @@ bool LLParser::ParseResume(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
-bool LLParser::ParseExceptionArgs(SmallVectorImpl<Value *> &Args,
+bool LLParser::parseExceptionArgs(SmallVectorImpl<Value *> &Args,
PerFunctionState &PFS) {
- if (ParseToken(lltok::lsquare, "expected '[' in catchpad/cleanuppad"))
+ if (parseToken(lltok::lsquare, "expected '[' in catchpad/cleanuppad"))
return true;
while (Lex.getKind() != lltok::rsquare) {
// If this isn't the first argument, we need a comma.
if (!Args.empty() &&
- ParseToken(lltok::comma, "expected ',' in argument list"))
+ parseToken(lltok::comma, "expected ',' in argument list"))
return true;
- // Parse the argument.
+ // parse the argument.
LocTy ArgLoc;
Type *ArgTy = nullptr;
- if (ParseType(ArgTy, ArgLoc))
+ if (parseType(ArgTy, ArgLoc))
return true;
Value *V;
if (ArgTy->isMetadataTy()) {
- if (ParseMetadataAsValue(V, PFS))
+ if (parseMetadataAsValue(V, PFS))
return true;
} else {
- if (ParseValue(ArgTy, V, PFS))
+ if (parseValue(ArgTy, V, PFS))
return true;
}
Args.push_back(V);
@@ -6298,27 +6558,27 @@ bool LLParser::ParseExceptionArgs(SmallVectorImpl<Value *> &Args,
return false;
}
-/// ParseCleanupRet
+/// parseCleanupRet
/// ::= 'cleanupret' from Value unwind ('to' 'caller' | TypeAndValue)
-bool LLParser::ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseCleanupRet(Instruction *&Inst, PerFunctionState &PFS) {
Value *CleanupPad = nullptr;
- if (ParseToken(lltok::kw_from, "expected 'from' after cleanupret"))
+ if (parseToken(lltok::kw_from, "expected 'from' after cleanupret"))
return true;
- if (ParseValue(Type::getTokenTy(Context), CleanupPad, PFS))
+ if (parseValue(Type::getTokenTy(Context), CleanupPad, PFS))
return true;
- if (ParseToken(lltok::kw_unwind, "expected 'unwind' in cleanupret"))
+ if (parseToken(lltok::kw_unwind, "expected 'unwind' in cleanupret"))
return true;
BasicBlock *UnwindBB = nullptr;
if (Lex.getKind() == lltok::kw_to) {
Lex.Lex();
- if (ParseToken(lltok::kw_caller, "expected 'caller' in cleanupret"))
+ if (parseToken(lltok::kw_caller, "expected 'caller' in cleanupret"))
return true;
} else {
- if (ParseTypeAndBasicBlock(UnwindBB, PFS)) {
+ if (parseTypeAndBasicBlock(UnwindBB, PFS)) {
return true;
}
}
@@ -6327,65 +6587,64 @@ bool LLParser::ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
-/// ParseCatchRet
+/// parseCatchRet
/// ::= 'catchret' from Parent Value 'to' TypeAndValue
-bool LLParser::ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseCatchRet(Instruction *&Inst, PerFunctionState &PFS) {
Value *CatchPad = nullptr;
- if (ParseToken(lltok::kw_from, "expected 'from' after catchret"))
+ if (parseToken(lltok::kw_from, "expected 'from' after catchret"))
return true;
- if (ParseValue(Type::getTokenTy(Context), CatchPad, PFS))
+ if (parseValue(Type::getTokenTy(Context), CatchPad, PFS))
return true;
BasicBlock *BB;
- if (ParseToken(lltok::kw_to, "expected 'to' in catchret") ||
- ParseTypeAndBasicBlock(BB, PFS))
- return true;
+ if (parseToken(lltok::kw_to, "expected 'to' in catchret") ||
+ parseTypeAndBasicBlock(BB, PFS))
+ return true;
Inst = CatchReturnInst::Create(CatchPad, BB);
return false;
}
-/// ParseCatchSwitch
+/// parseCatchSwitch
/// ::= 'catchswitch' within Parent
-bool LLParser::ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS) {
Value *ParentPad;
- if (ParseToken(lltok::kw_within, "expected 'within' after catchswitch"))
+ if (parseToken(lltok::kw_within, "expected 'within' after catchswitch"))
return true;
if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar &&
Lex.getKind() != lltok::LocalVarID)
- return TokError("expected scope value for catchswitch");
+ return tokError("expected scope value for catchswitch");
- if (ParseValue(Type::getTokenTy(Context), ParentPad, PFS))
+ if (parseValue(Type::getTokenTy(Context), ParentPad, PFS))
return true;
- if (ParseToken(lltok::lsquare, "expected '[' with catchswitch labels"))
+ if (parseToken(lltok::lsquare, "expected '[' with catchswitch labels"))
return true;
SmallVector<BasicBlock *, 32> Table;
do {
BasicBlock *DestBB;
- if (ParseTypeAndBasicBlock(DestBB, PFS))
+ if (parseTypeAndBasicBlock(DestBB, PFS))
return true;
Table.push_back(DestBB);
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rsquare, "expected ']' after catchswitch labels"))
+ if (parseToken(lltok::rsquare, "expected ']' after catchswitch labels"))
return true;
- if (ParseToken(lltok::kw_unwind,
- "expected 'unwind' after catchswitch scope"))
+ if (parseToken(lltok::kw_unwind, "expected 'unwind' after catchswitch scope"))
return true;
BasicBlock *UnwindBB = nullptr;
if (EatIfPresent(lltok::kw_to)) {
- if (ParseToken(lltok::kw_caller, "expected 'caller' in catchswitch"))
+ if (parseToken(lltok::kw_caller, "expected 'caller' in catchswitch"))
return true;
} else {
- if (ParseTypeAndBasicBlock(UnwindBB, PFS))
+ if (parseTypeAndBasicBlock(UnwindBB, PFS))
return true;
}
@@ -6397,45 +6656,45 @@ bool LLParser::ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
-/// ParseCatchPad
+/// parseCatchPad
/// ::= 'catchpad' ParamList 'to' TypeAndValue 'unwind' TypeAndValue
-bool LLParser::ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseCatchPad(Instruction *&Inst, PerFunctionState &PFS) {
Value *CatchSwitch = nullptr;
- if (ParseToken(lltok::kw_within, "expected 'within' after catchpad"))
+ if (parseToken(lltok::kw_within, "expected 'within' after catchpad"))
return true;
if (Lex.getKind() != lltok::LocalVar && Lex.getKind() != lltok::LocalVarID)
- return TokError("expected scope value for catchpad");
+ return tokError("expected scope value for catchpad");
- if (ParseValue(Type::getTokenTy(Context), CatchSwitch, PFS))
+ if (parseValue(Type::getTokenTy(Context), CatchSwitch, PFS))
return true;
SmallVector<Value *, 8> Args;
- if (ParseExceptionArgs(Args, PFS))
+ if (parseExceptionArgs(Args, PFS))
return true;
Inst = CatchPadInst::Create(CatchSwitch, Args);
return false;
}
-/// ParseCleanupPad
+/// parseCleanupPad
/// ::= 'cleanuppad' within Parent ParamList
-bool LLParser::ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) {
Value *ParentPad = nullptr;
- if (ParseToken(lltok::kw_within, "expected 'within' after cleanuppad"))
+ if (parseToken(lltok::kw_within, "expected 'within' after cleanuppad"))
return true;
if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar &&
Lex.getKind() != lltok::LocalVarID)
- return TokError("expected scope value for cleanuppad");
+ return tokError("expected scope value for cleanuppad");
- if (ParseValue(Type::getTokenTy(Context), ParentPad, PFS))
+ if (parseValue(Type::getTokenTy(Context), ParentPad, PFS))
return true;
SmallVector<Value *, 8> Args;
- if (ParseExceptionArgs(Args, PFS))
+ if (parseExceptionArgs(Args, PFS))
return true;
Inst = CleanupPadInst::Create(ParentPad, Args);
@@ -6446,32 +6705,32 @@ bool LLParser::ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) {
// Unary Operators.
//===----------------------------------------------------------------------===//
-/// ParseUnaryOp
+/// parseUnaryOp
/// ::= UnaryOp TypeAndValue ',' Value
///
/// If IsFP is false, then any integer operand is allowed, if it is true, any fp
/// operand is allowed.
-bool LLParser::ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
+bool LLParser::parseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
unsigned Opc, bool IsFP) {
LocTy Loc; Value *LHS;
- if (ParseTypeAndValue(LHS, Loc, PFS))
+ if (parseTypeAndValue(LHS, Loc, PFS))
return true;
bool Valid = IsFP ? LHS->getType()->isFPOrFPVectorTy()
: LHS->getType()->isIntOrIntVectorTy();
if (!Valid)
- return Error(Loc, "invalid operand type for instruction");
+ return error(Loc, "invalid operand type for instruction");
Inst = UnaryOperator::Create((Instruction::UnaryOps)Opc, LHS);
return false;
}
-/// ParseCallBr
+/// parseCallBr
/// ::= 'callbr' OptionalCallingConv OptionalAttrs Type Value ParamList
/// OptionalAttrs OptionalOperandBundles 'to' TypeAndValue
/// '[' LabelList ']'
-bool LLParser::ParseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
LocTy CallLoc = Lex.getLoc();
AttrBuilder RetAttrs, FnAttrs;
std::vector<unsigned> FwdRefAttrGrps;
@@ -6484,34 +6743,34 @@ bool LLParser::ParseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
SmallVector<OperandBundleDef, 2> BundleList;
BasicBlock *DefaultDest;
- if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
- ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
- ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) ||
- ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
+ if (parseOptionalCallingConv(CC) || parseOptionalReturnAttrs(RetAttrs) ||
+ parseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ parseValID(CalleeID) || parseParameterList(ArgList, PFS) ||
+ parseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
NoBuiltinLoc) ||
- ParseOptionalOperandBundles(BundleList, PFS) ||
- ParseToken(lltok::kw_to, "expected 'to' in callbr") ||
- ParseTypeAndBasicBlock(DefaultDest, PFS) ||
- ParseToken(lltok::lsquare, "expected '[' in callbr"))
+ parseOptionalOperandBundles(BundleList, PFS) ||
+ parseToken(lltok::kw_to, "expected 'to' in callbr") ||
+ parseTypeAndBasicBlock(DefaultDest, PFS) ||
+ parseToken(lltok::lsquare, "expected '[' in callbr"))
return true;
- // Parse the destination list.
+ // parse the destination list.
SmallVector<BasicBlock *, 16> IndirectDests;
if (Lex.getKind() != lltok::rsquare) {
BasicBlock *DestBB;
- if (ParseTypeAndBasicBlock(DestBB, PFS))
+ if (parseTypeAndBasicBlock(DestBB, PFS))
return true;
IndirectDests.push_back(DestBB);
while (EatIfPresent(lltok::comma)) {
- if (ParseTypeAndBasicBlock(DestBB, PFS))
+ if (parseTypeAndBasicBlock(DestBB, PFS))
return true;
IndirectDests.push_back(DestBB);
}
}
- if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
+ if (parseToken(lltok::rsquare, "expected ']' at end of block list"))
return true;
// If RetType is a non-function pointer type, then this is the short syntax
@@ -6525,7 +6784,7 @@ bool LLParser::ParseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
ParamTypes.push_back(ArgList[i].V->getType());
if (!FunctionType::isValidReturnType(RetType))
- return Error(RetTypeLoc, "Invalid result type for LLVM function");
+ return error(RetTypeLoc, "Invalid result type for LLVM function");
Ty = FunctionType::get(RetType, ParamTypes, false);
}
@@ -6534,7 +6793,7 @@ bool LLParser::ParseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
// Look up the callee.
Value *Callee;
- if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
+ if (convertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
/*IsCall=*/true))
return true;
@@ -6551,21 +6810,21 @@ bool LLParser::ParseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
if (I != E) {
ExpectedTy = *I++;
} else if (!Ty->isVarArg()) {
- return Error(ArgList[i].Loc, "too many arguments specified");
+ return error(ArgList[i].Loc, "too many arguments specified");
}
if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
- return Error(ArgList[i].Loc, "argument is not of expected type '" +
+ return error(ArgList[i].Loc, "argument is not of expected type '" +
getTypeString(ExpectedTy) + "'");
Args.push_back(ArgList[i].V);
ArgAttrs.push_back(ArgList[i].Attrs);
}
if (I != E)
- return Error(CallLoc, "not enough parameters specified for call");
+ return error(CallLoc, "not enough parameters specified for call");
if (FnAttrs.hasAlignmentAttr())
- return Error(CallLoc, "callbr instructions may not have an alignment");
+ return error(CallLoc, "callbr instructions may not have an alignment");
// Finish off the Attribute and check them
AttributeList PAL =
@@ -6586,70 +6845,70 @@ bool LLParser::ParseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
// Binary Operators.
//===----------------------------------------------------------------------===//
-/// ParseArithmetic
+/// parseArithmetic
/// ::= ArithmeticOps TypeAndValue ',' Value
///
/// If IsFP is false, then any integer operand is allowed, if it is true, any fp
/// operand is allowed.
-bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
+bool LLParser::parseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
unsigned Opc, bool IsFP) {
LocTy Loc; Value *LHS, *RHS;
- if (ParseTypeAndValue(LHS, Loc, PFS) ||
- ParseToken(lltok::comma, "expected ',' in arithmetic operation") ||
- ParseValue(LHS->getType(), RHS, PFS))
+ if (parseTypeAndValue(LHS, Loc, PFS) ||
+ parseToken(lltok::comma, "expected ',' in arithmetic operation") ||
+ parseValue(LHS->getType(), RHS, PFS))
return true;
bool Valid = IsFP ? LHS->getType()->isFPOrFPVectorTy()
: LHS->getType()->isIntOrIntVectorTy();
if (!Valid)
- return Error(Loc, "invalid operand type for instruction");
+ return error(Loc, "invalid operand type for instruction");
Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
return false;
}
-/// ParseLogical
+/// parseLogical
/// ::= ArithmeticOps TypeAndValue ',' Value {
-bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
+bool LLParser::parseLogical(Instruction *&Inst, PerFunctionState &PFS,
unsigned Opc) {
LocTy Loc; Value *LHS, *RHS;
- if (ParseTypeAndValue(LHS, Loc, PFS) ||
- ParseToken(lltok::comma, "expected ',' in logical operation") ||
- ParseValue(LHS->getType(), RHS, PFS))
+ if (parseTypeAndValue(LHS, Loc, PFS) ||
+ parseToken(lltok::comma, "expected ',' in logical operation") ||
+ parseValue(LHS->getType(), RHS, PFS))
return true;
if (!LHS->getType()->isIntOrIntVectorTy())
- return Error(Loc,"instruction requires integer or integer vector operands");
+ return error(Loc,
+ "instruction requires integer or integer vector operands");
Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
return false;
}
-/// ParseCompare
+/// parseCompare
/// ::= 'icmp' IPredicates TypeAndValue ',' Value
/// ::= 'fcmp' FPredicates TypeAndValue ',' Value
-bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
+bool LLParser::parseCompare(Instruction *&Inst, PerFunctionState &PFS,
unsigned Opc) {
- // Parse the integer/fp comparison predicate.
+ // parse the integer/fp comparison predicate.
LocTy Loc;
unsigned Pred;
Value *LHS, *RHS;
- if (ParseCmpPredicate(Pred, Opc) ||
- ParseTypeAndValue(LHS, Loc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after compare value") ||
- ParseValue(LHS->getType(), RHS, PFS))
+ if (parseCmpPredicate(Pred, Opc) || parseTypeAndValue(LHS, Loc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after compare value") ||
+ parseValue(LHS->getType(), RHS, PFS))
return true;
if (Opc == Instruction::FCmp) {
if (!LHS->getType()->isFPOrFPVectorTy())
- return Error(Loc, "fcmp requires floating point operands");
+ return error(Loc, "fcmp requires floating point operands");
Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
} else {
assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
if (!LHS->getType()->isIntOrIntVectorTy() &&
!LHS->getType()->isPtrOrPtrVectorTy())
- return Error(Loc, "icmp requires integer operands");
+ return error(Loc, "icmp requires integer operands");
Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
}
return false;
@@ -6659,133 +6918,132 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
// Other Instructions.
//===----------------------------------------------------------------------===//
-
-/// ParseCast
+/// parseCast
/// ::= CastOpc TypeAndValue 'to' Type
-bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
+bool LLParser::parseCast(Instruction *&Inst, PerFunctionState &PFS,
unsigned Opc) {
LocTy Loc;
Value *Op;
Type *DestTy = nullptr;
- if (ParseTypeAndValue(Op, Loc, PFS) ||
- ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
- ParseType(DestTy))
+ if (parseTypeAndValue(Op, Loc, PFS) ||
+ parseToken(lltok::kw_to, "expected 'to' after cast value") ||
+ parseType(DestTy))
return true;
if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
- return Error(Loc, "invalid cast opcode for cast from '" +
- getTypeString(Op->getType()) + "' to '" +
- getTypeString(DestTy) + "'");
+ return error(Loc, "invalid cast opcode for cast from '" +
+ getTypeString(Op->getType()) + "' to '" +
+ getTypeString(DestTy) + "'");
}
Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy);
return false;
}
-/// ParseSelect
+/// parseSelect
/// ::= 'select' TypeAndValue ',' TypeAndValue ',' TypeAndValue
-bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseSelect(Instruction *&Inst, PerFunctionState &PFS) {
LocTy Loc;
Value *Op0, *Op1, *Op2;
- if (ParseTypeAndValue(Op0, Loc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after select condition") ||
- ParseTypeAndValue(Op1, PFS) ||
- ParseToken(lltok::comma, "expected ',' after select value") ||
- ParseTypeAndValue(Op2, PFS))
+ if (parseTypeAndValue(Op0, Loc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after select condition") ||
+ parseTypeAndValue(Op1, PFS) ||
+ parseToken(lltok::comma, "expected ',' after select value") ||
+ parseTypeAndValue(Op2, PFS))
return true;
if (const char *Reason = SelectInst::areInvalidOperands(Op0, Op1, Op2))
- return Error(Loc, Reason);
+ return error(Loc, Reason);
Inst = SelectInst::Create(Op0, Op1, Op2);
return false;
}
-/// ParseVA_Arg
+/// parseVAArg
/// ::= 'va_arg' TypeAndValue ',' Type
-bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseVAArg(Instruction *&Inst, PerFunctionState &PFS) {
Value *Op;
Type *EltTy = nullptr;
LocTy TypeLoc;
- if (ParseTypeAndValue(Op, PFS) ||
- ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
- ParseType(EltTy, TypeLoc))
+ if (parseTypeAndValue(Op, PFS) ||
+ parseToken(lltok::comma, "expected ',' after vaarg operand") ||
+ parseType(EltTy, TypeLoc))
return true;
if (!EltTy->isFirstClassType())
- return Error(TypeLoc, "va_arg requires operand with first class type");
+ return error(TypeLoc, "va_arg requires operand with first class type");
Inst = new VAArgInst(Op, EltTy);
return false;
}
-/// ParseExtractElement
+/// parseExtractElement
/// ::= 'extractelement' TypeAndValue ',' TypeAndValue
-bool LLParser::ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseExtractElement(Instruction *&Inst, PerFunctionState &PFS) {
LocTy Loc;
Value *Op0, *Op1;
- if (ParseTypeAndValue(Op0, Loc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after extract value") ||
- ParseTypeAndValue(Op1, PFS))
+ if (parseTypeAndValue(Op0, Loc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after extract value") ||
+ parseTypeAndValue(Op1, PFS))
return true;
if (!ExtractElementInst::isValidOperands(Op0, Op1))
- return Error(Loc, "invalid extractelement operands");
+ return error(Loc, "invalid extractelement operands");
Inst = ExtractElementInst::Create(Op0, Op1);
return false;
}
-/// ParseInsertElement
+/// parseInsertElement
/// ::= 'insertelement' TypeAndValue ',' TypeAndValue ',' TypeAndValue
-bool LLParser::ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseInsertElement(Instruction *&Inst, PerFunctionState &PFS) {
LocTy Loc;
Value *Op0, *Op1, *Op2;
- if (ParseTypeAndValue(Op0, Loc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after insertelement value") ||
- ParseTypeAndValue(Op1, PFS) ||
- ParseToken(lltok::comma, "expected ',' after insertelement value") ||
- ParseTypeAndValue(Op2, PFS))
+ if (parseTypeAndValue(Op0, Loc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after insertelement value") ||
+ parseTypeAndValue(Op1, PFS) ||
+ parseToken(lltok::comma, "expected ',' after insertelement value") ||
+ parseTypeAndValue(Op2, PFS))
return true;
if (!InsertElementInst::isValidOperands(Op0, Op1, Op2))
- return Error(Loc, "invalid insertelement operands");
+ return error(Loc, "invalid insertelement operands");
Inst = InsertElementInst::Create(Op0, Op1, Op2);
return false;
}
-/// ParseShuffleVector
+/// parseShuffleVector
/// ::= 'shufflevector' TypeAndValue ',' TypeAndValue ',' TypeAndValue
-bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
LocTy Loc;
Value *Op0, *Op1, *Op2;
- if (ParseTypeAndValue(Op0, Loc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after shuffle mask") ||
- ParseTypeAndValue(Op1, PFS) ||
- ParseToken(lltok::comma, "expected ',' after shuffle value") ||
- ParseTypeAndValue(Op2, PFS))
+ if (parseTypeAndValue(Op0, Loc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after shuffle mask") ||
+ parseTypeAndValue(Op1, PFS) ||
+ parseToken(lltok::comma, "expected ',' after shuffle value") ||
+ parseTypeAndValue(Op2, PFS))
return true;
if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
- return Error(Loc, "invalid shufflevector operands");
+ return error(Loc, "invalid shufflevector operands");
Inst = new ShuffleVectorInst(Op0, Op1, Op2);
return false;
}
-/// ParsePHI
+/// parsePHI
/// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')*
-int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parsePHI(Instruction *&Inst, PerFunctionState &PFS) {
Type *Ty = nullptr; LocTy TypeLoc;
Value *Op0, *Op1;
- if (ParseType(Ty, TypeLoc) ||
- ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
- ParseValue(Ty, Op0, PFS) ||
- ParseToken(lltok::comma, "expected ',' after insertelement value") ||
- ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
- ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+ if (parseType(Ty, TypeLoc) ||
+ parseToken(lltok::lsquare, "expected '[' in phi value list") ||
+ parseValue(Ty, Op0, PFS) ||
+ parseToken(lltok::comma, "expected ',' after insertelement value") ||
+ parseValue(Type::getLabelTy(Context), Op1, PFS) ||
+ parseToken(lltok::rsquare, "expected ']' in phi value list"))
return true;
bool AteExtraComma = false;
@@ -6802,16 +7060,16 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
break;
}
- if (ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
- ParseValue(Ty, Op0, PFS) ||
- ParseToken(lltok::comma, "expected ',' after insertelement value") ||
- ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
- ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+ if (parseToken(lltok::lsquare, "expected '[' in phi value list") ||
+ parseValue(Ty, Op0, PFS) ||
+ parseToken(lltok::comma, "expected ',' after insertelement value") ||
+ parseValue(Type::getLabelTy(Context), Op1, PFS) ||
+ parseToken(lltok::rsquare, "expected ']' in phi value list"))
return true;
}
if (!Ty->isFirstClassType())
- return Error(TypeLoc, "phi node must have first class type");
+ return error(TypeLoc, "phi node must have first class type");
PHINode *PN = PHINode::Create(Ty, PHIVals.size());
for (unsigned i = 0, e = PHIVals.size(); i != e; ++i)
@@ -6820,16 +7078,16 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
return AteExtraComma ? InstExtraComma : InstNormal;
}
-/// ParseLandingPad
+/// parseLandingPad
/// ::= 'landingpad' Type 'personality' TypeAndValue 'cleanup'? Clause+
/// Clause
/// ::= 'catch' TypeAndValue
/// ::= 'filter'
/// ::= 'filter' TypeAndValue ( ',' TypeAndValue )*
-bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
Type *Ty = nullptr; LocTy TyLoc;
- if (ParseType(Ty, TyLoc))
+ if (parseType(Ty, TyLoc))
return true;
std::unique_ptr<LandingPadInst> LP(LandingPadInst::Create(Ty, 0));
@@ -6842,26 +7100,26 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
else if (EatIfPresent(lltok::kw_filter))
CT = LandingPadInst::Filter;
else
- return TokError("expected 'catch' or 'filter' clause type");
+ return tokError("expected 'catch' or 'filter' clause type");
Value *V;
LocTy VLoc;
- if (ParseTypeAndValue(V, VLoc, PFS))
+ if (parseTypeAndValue(V, VLoc, PFS))
return true;
// A 'catch' type expects a non-array constant. A filter clause expects an
// array constant.
if (CT == LandingPadInst::Catch) {
if (isa<ArrayType>(V->getType()))
- Error(VLoc, "'catch' clause has an invalid type");
+ error(VLoc, "'catch' clause has an invalid type");
} else {
if (!isa<ArrayType>(V->getType()))
- Error(VLoc, "'filter' clause has an invalid type");
+ error(VLoc, "'filter' clause has an invalid type");
}
Constant *CV = dyn_cast<Constant>(V);
if (!CV)
- return Error(VLoc, "clause argument must be a constant");
+ return error(VLoc, "clause argument must be a constant");
LP->addClause(CV);
}
@@ -6869,19 +7127,19 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
-/// ParseFreeze
+/// parseFreeze
/// ::= 'freeze' Type Value
-bool LLParser::ParseFreeze(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::parseFreeze(Instruction *&Inst, PerFunctionState &PFS) {
LocTy Loc;
Value *Op;
- if (ParseTypeAndValue(Op, Loc, PFS))
+ if (parseTypeAndValue(Op, Loc, PFS))
return true;
Inst = new FreezeInst(Op);
return false;
}
-/// ParseCall
+/// parseCall
/// ::= 'call' OptionalFastMathFlags OptionalCallingConv
/// OptionalAttrs Type Value ParameterList OptionalAttrs
/// ::= 'tail' 'call' OptionalFastMathFlags OptionalCallingConv
@@ -6890,7 +7148,7 @@ bool LLParser::ParseFreeze(Instruction *&Inst, PerFunctionState &PFS) {
/// OptionalAttrs Type Value ParameterList OptionalAttrs
/// ::= 'notail' 'call' OptionalFastMathFlags OptionalCallingConv
/// OptionalAttrs Type Value ParameterList OptionalAttrs
-bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
+bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS,
CallInst::TailCallKind TCK) {
AttrBuilder RetAttrs, FnAttrs;
std::vector<unsigned> FwdRefAttrGrps;
@@ -6905,20 +7163,20 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
LocTy CallLoc = Lex.getLoc();
if (TCK != CallInst::TCK_None &&
- ParseToken(lltok::kw_call,
+ parseToken(lltok::kw_call,
"expected 'tail call', 'musttail call', or 'notail call'"))
return true;
FastMathFlags FMF = EatFastMathFlagsIfPresent();
- if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
- ParseOptionalProgramAddrSpace(CallAddrSpace) ||
- ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
- ParseValID(CalleeID) ||
- ParseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail,
+ if (parseOptionalCallingConv(CC) || parseOptionalReturnAttrs(RetAttrs) ||
+ parseOptionalProgramAddrSpace(CallAddrSpace) ||
+ parseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ parseValID(CalleeID) ||
+ parseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail,
PFS.getFunction().isVarArg()) ||
- ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, BuiltinLoc) ||
- ParseOptionalOperandBundles(BundleList, PFS))
+ parseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, BuiltinLoc) ||
+ parseOptionalOperandBundles(BundleList, PFS))
return true;
// If RetType is a non-function pointer type, then this is the short syntax
@@ -6932,7 +7190,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
ParamTypes.push_back(ArgList[i].V->getType());
if (!FunctionType::isValidReturnType(RetType))
- return Error(RetTypeLoc, "Invalid result type for LLVM function");
+ return error(RetTypeLoc, "Invalid result type for LLVM function");
Ty = FunctionType::get(RetType, ParamTypes, false);
}
@@ -6941,7 +7199,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
// Look up the callee.
Value *Callee;
- if (ConvertValIDToValue(PointerType::get(Ty, CallAddrSpace), CalleeID, Callee,
+ if (convertValIDToValue(PointerType::get(Ty, CallAddrSpace), CalleeID, Callee,
&PFS, /*IsCall=*/true))
return true;
@@ -6959,21 +7217,21 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
if (I != E) {
ExpectedTy = *I++;
} else if (!Ty->isVarArg()) {
- return Error(ArgList[i].Loc, "too many arguments specified");
+ return error(ArgList[i].Loc, "too many arguments specified");
}
if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
- return Error(ArgList[i].Loc, "argument is not of expected type '" +
- getTypeString(ExpectedTy) + "'");
+ return error(ArgList[i].Loc, "argument is not of expected type '" +
+ getTypeString(ExpectedTy) + "'");
Args.push_back(ArgList[i].V);
Attrs.push_back(ArgList[i].Attrs);
}
if (I != E)
- return Error(CallLoc, "not enough parameters specified for call");
+ return error(CallLoc, "not enough parameters specified for call");
if (FnAttrs.hasAlignmentAttr())
- return Error(CallLoc, "call instructions may not have an alignment");
+ return error(CallLoc, "call instructions may not have an alignment");
// Finish off the Attribute and check them
AttributeList PAL =
@@ -6986,8 +7244,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
if (FMF.any()) {
if (!isa<FPMathOperator>(CI)) {
CI->deleteValue();
- return Error(CallLoc, "fast-math-flags specified for call without "
- "floating-point scalar or vector return type");
+ return error(CallLoc, "fast-math-flags specified for call without "
+ "floating-point scalar or vector return type");
}
CI->setFastMathFlags(FMF);
}
@@ -7001,10 +7259,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
// Memory Instructions.
//===----------------------------------------------------------------------===//
-/// ParseAlloc
+/// parseAlloc
/// ::= 'alloca' 'inalloca'? 'swifterror'? Type (',' TypeAndValue)?
/// (',' 'align' i32)? (',', 'addrspace(n))?
-int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
Value *Size = nullptr;
LocTy SizeLoc, TyLoc, ASLoc;
MaybeAlign Alignment;
@@ -7014,36 +7272,37 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
bool IsInAlloca = EatIfPresent(lltok::kw_inalloca);
bool IsSwiftError = EatIfPresent(lltok::kw_swifterror);
- if (ParseType(Ty, TyLoc)) return true;
+ if (parseType(Ty, TyLoc))
+ return true;
if (Ty->isFunctionTy() || !PointerType::isValidElementType(Ty))
- return Error(TyLoc, "invalid type for alloca");
+ return error(TyLoc, "invalid type for alloca");
bool AteExtraComma = false;
if (EatIfPresent(lltok::comma)) {
if (Lex.getKind() == lltok::kw_align) {
- if (ParseOptionalAlignment(Alignment))
+ if (parseOptionalAlignment(Alignment))
return true;
- if (ParseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma))
+ if (parseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma))
return true;
} else if (Lex.getKind() == lltok::kw_addrspace) {
ASLoc = Lex.getLoc();
- if (ParseOptionalAddrSpace(AddrSpace))
+ if (parseOptionalAddrSpace(AddrSpace))
return true;
} else if (Lex.getKind() == lltok::MetadataVar) {
AteExtraComma = true;
} else {
- if (ParseTypeAndValue(Size, SizeLoc, PFS))
+ if (parseTypeAndValue(Size, SizeLoc, PFS))
return true;
if (EatIfPresent(lltok::comma)) {
if (Lex.getKind() == lltok::kw_align) {
- if (ParseOptionalAlignment(Alignment))
+ if (parseOptionalAlignment(Alignment))
return true;
- if (ParseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma))
+ if (parseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma))
return true;
} else if (Lex.getKind() == lltok::kw_addrspace) {
ASLoc = Lex.getLoc();
- if (ParseOptionalAddrSpace(AddrSpace))
+ if (parseOptionalAddrSpace(AddrSpace))
return true;
} else if (Lex.getKind() == lltok::MetadataVar) {
AteExtraComma = true;
@@ -7053,11 +7312,11 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
}
if (Size && !Size->getType()->isIntegerTy())
- return Error(SizeLoc, "element count must have integer type");
+ return error(SizeLoc, "element count must have integer type");
SmallPtrSet<Type *, 4> Visited;
if (!Alignment && !Ty->isSized(&Visited))
- return Error(TyLoc, "Cannot allocate unsized type");
+ return error(TyLoc, "Cannot allocate unsized type");
if (!Alignment)
Alignment = M->getDataLayout().getPrefTypeAlign(Ty);
AllocaInst *AI = new AllocaInst(Ty, AddrSpace, Size, *Alignment);
@@ -7067,11 +7326,11 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
return AteExtraComma ? InstExtraComma : InstNormal;
}
-/// ParseLoad
+/// parseLoad
/// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)?
/// ::= 'load' 'atomic' 'volatile'? TypeAndValue
/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
-int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parseLoad(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val; LocTy Loc;
MaybeAlign Alignment;
bool AteExtraComma = false;
@@ -7092,39 +7351,39 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
Type *Ty;
LocTy ExplicitTypeLoc = Lex.getLoc();
- if (ParseType(Ty) ||
- ParseToken(lltok::comma, "expected comma after load's type") ||
- ParseTypeAndValue(Val, Loc, PFS) ||
- ParseScopeAndOrdering(isAtomic, SSID, Ordering) ||
- ParseOptionalCommaAlign(Alignment, AteExtraComma))
+ if (parseType(Ty) ||
+ parseToken(lltok::comma, "expected comma after load's type") ||
+ parseTypeAndValue(Val, Loc, PFS) ||
+ parseScopeAndOrdering(isAtomic, SSID, Ordering) ||
+ parseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
if (!Val->getType()->isPointerTy() || !Ty->isFirstClassType())
- return Error(Loc, "load operand must be a pointer to a first class type");
+ return error(Loc, "load operand must be a pointer to a first class type");
if (isAtomic && !Alignment)
- return Error(Loc, "atomic load must have explicit non-zero alignment");
+ return error(Loc, "atomic load must have explicit non-zero alignment");
if (Ordering == AtomicOrdering::Release ||
Ordering == AtomicOrdering::AcquireRelease)
- return Error(Loc, "atomic load cannot use Release ordering");
+ return error(Loc, "atomic load cannot use Release ordering");
if (Ty != cast<PointerType>(Val->getType())->getElementType())
- return Error(ExplicitTypeLoc,
+ return error(ExplicitTypeLoc,
"explicit pointee type doesn't match operand's pointee type");
SmallPtrSet<Type *, 4> Visited;
if (!Alignment && !Ty->isSized(&Visited))
- return Error(ExplicitTypeLoc, "loading unsized types is not allowed");
+ return error(ExplicitTypeLoc, "loading unsized types is not allowed");
if (!Alignment)
Alignment = M->getDataLayout().getABITypeAlign(Ty);
Inst = new LoadInst(Ty, Val, "", isVolatile, *Alignment, Ordering, SSID);
return AteExtraComma ? InstExtraComma : InstNormal;
}
-/// ParseStore
+/// parseStore
/// ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)?
/// ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue
/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
-int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parseStore(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val, *Ptr; LocTy Loc, PtrLoc;
MaybeAlign Alignment;
bool AteExtraComma = false;
@@ -7143,27 +7402,27 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
Lex.Lex();
}
- if (ParseTypeAndValue(Val, Loc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after store operand") ||
- ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
- ParseScopeAndOrdering(isAtomic, SSID, Ordering) ||
- ParseOptionalCommaAlign(Alignment, AteExtraComma))
+ if (parseTypeAndValue(Val, Loc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after store operand") ||
+ parseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ parseScopeAndOrdering(isAtomic, SSID, Ordering) ||
+ parseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
if (!Ptr->getType()->isPointerTy())
- return Error(PtrLoc, "store operand must be a pointer");
+ return error(PtrLoc, "store operand must be a pointer");
if (!Val->getType()->isFirstClassType())
- return Error(Loc, "store operand must be a first class value");
+ return error(Loc, "store operand must be a first class value");
if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
- return Error(Loc, "stored value and pointer type do not match");
+ return error(Loc, "stored value and pointer type do not match");
if (isAtomic && !Alignment)
- return Error(Loc, "atomic store must have explicit non-zero alignment");
+ return error(Loc, "atomic store must have explicit non-zero alignment");
if (Ordering == AtomicOrdering::Acquire ||
Ordering == AtomicOrdering::AcquireRelease)
- return Error(Loc, "atomic store cannot use Acquire ordering");
+ return error(Loc, "atomic store cannot use Acquire ordering");
SmallPtrSet<Type *, 4> Visited;
if (!Alignment && !Val->getType()->isSized(&Visited))
- return Error(Loc, "storing unsized types is not allowed");
+ return error(Loc, "storing unsized types is not allowed");
if (!Alignment)
Alignment = M->getDataLayout().getABITypeAlign(Val->getType());
@@ -7171,10 +7430,10 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
return AteExtraComma ? InstExtraComma : InstNormal;
}
-/// ParseCmpXchg
+/// parseCmpXchg
/// ::= 'cmpxchg' 'weak'? 'volatile'? TypeAndValue ',' TypeAndValue ','
/// TypeAndValue 'singlethread'? AtomicOrdering AtomicOrdering
-int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
Value *Ptr, *Cmp, *New; LocTy PtrLoc, CmpLoc, NewLoc;
bool AteExtraComma = false;
AtomicOrdering SuccessOrdering = AtomicOrdering::NotAtomic;
@@ -7189,33 +7448,33 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
if (EatIfPresent(lltok::kw_volatile))
isVolatile = true;
- if (ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after cmpxchg address") ||
- ParseTypeAndValue(Cmp, CmpLoc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") ||
- ParseTypeAndValue(New, NewLoc, PFS) ||
- ParseScopeAndOrdering(true /*Always atomic*/, SSID, SuccessOrdering) ||
- ParseOrdering(FailureOrdering))
+ if (parseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after cmpxchg address") ||
+ parseTypeAndValue(Cmp, CmpLoc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") ||
+ parseTypeAndValue(New, NewLoc, PFS) ||
+ parseScopeAndOrdering(true /*Always atomic*/, SSID, SuccessOrdering) ||
+ parseOrdering(FailureOrdering))
return true;
if (SuccessOrdering == AtomicOrdering::Unordered ||
FailureOrdering == AtomicOrdering::Unordered)
- return TokError("cmpxchg cannot be unordered");
+ return tokError("cmpxchg cannot be unordered");
if (isStrongerThan(FailureOrdering, SuccessOrdering))
- return TokError("cmpxchg failure argument shall be no stronger than the "
+ return tokError("cmpxchg failure argument shall be no stronger than the "
"success argument");
if (FailureOrdering == AtomicOrdering::Release ||
FailureOrdering == AtomicOrdering::AcquireRelease)
- return TokError(
+ return tokError(
"cmpxchg failure ordering cannot include release semantics");
if (!Ptr->getType()->isPointerTy())
- return Error(PtrLoc, "cmpxchg operand must be a pointer");
+ return error(PtrLoc, "cmpxchg operand must be a pointer");
if (cast<PointerType>(Ptr->getType())->getElementType() != Cmp->getType())
- return Error(CmpLoc, "compare value and pointer type do not match");
+ return error(CmpLoc, "compare value and pointer type do not match");
if (cast<PointerType>(Ptr->getType())->getElementType() != New->getType())
- return Error(NewLoc, "new value and pointer type do not match");
+ return error(NewLoc, "new value and pointer type do not match");
if (!New->getType()->isFirstClassType())
- return Error(NewLoc, "cmpxchg operand must be a first class value");
+ return error(NewLoc, "cmpxchg operand must be a first class value");
Align Alignment(
PFS.getFunction().getParent()->getDataLayout().getTypeStoreSize(
@@ -7229,10 +7488,10 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
return AteExtraComma ? InstExtraComma : InstNormal;
}
-/// ParseAtomicRMW
+/// parseAtomicRMW
/// ::= 'atomicrmw' 'volatile'? BinOp TypeAndValue ',' TypeAndValue
/// 'singlethread'? AtomicOrdering
-int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
Value *Ptr, *Val; LocTy PtrLoc, ValLoc;
bool AteExtraComma = false;
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
@@ -7245,7 +7504,8 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
isVolatile = true;
switch (Lex.getKind()) {
- default: return TokError("expected binary operation in atomicrmw");
+ default:
+ return tokError("expected binary operation in atomicrmw");
case lltok::kw_xchg: Operation = AtomicRMWInst::Xchg; break;
case lltok::kw_add: Operation = AtomicRMWInst::Add; break;
case lltok::kw_sub: Operation = AtomicRMWInst::Sub; break;
@@ -7268,43 +7528,43 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
}
Lex.Lex(); // Eat the operation.
- if (ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
- ParseToken(lltok::comma, "expected ',' after atomicrmw address") ||
- ParseTypeAndValue(Val, ValLoc, PFS) ||
- ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering))
+ if (parseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ parseToken(lltok::comma, "expected ',' after atomicrmw address") ||
+ parseTypeAndValue(Val, ValLoc, PFS) ||
+ parseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering))
return true;
if (Ordering == AtomicOrdering::Unordered)
- return TokError("atomicrmw cannot be unordered");
+ return tokError("atomicrmw cannot be unordered");
if (!Ptr->getType()->isPointerTy())
- return Error(PtrLoc, "atomicrmw operand must be a pointer");
+ return error(PtrLoc, "atomicrmw operand must be a pointer");
if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
- return Error(ValLoc, "atomicrmw value and pointer type do not match");
+ return error(ValLoc, "atomicrmw value and pointer type do not match");
if (Operation == AtomicRMWInst::Xchg) {
if (!Val->getType()->isIntegerTy() &&
!Val->getType()->isFloatingPointTy()) {
- return Error(ValLoc, "atomicrmw " +
- AtomicRMWInst::getOperationName(Operation) +
- " operand must be an integer or floating point type");
+ return error(ValLoc,
+ "atomicrmw " + AtomicRMWInst::getOperationName(Operation) +
+ " operand must be an integer or floating point type");
}
} else if (IsFP) {
if (!Val->getType()->isFloatingPointTy()) {
- return Error(ValLoc, "atomicrmw " +
- AtomicRMWInst::getOperationName(Operation) +
- " operand must be a floating point type");
+ return error(ValLoc, "atomicrmw " +
+ AtomicRMWInst::getOperationName(Operation) +
+ " operand must be a floating point type");
}
} else {
if (!Val->getType()->isIntegerTy()) {
- return Error(ValLoc, "atomicrmw " +
- AtomicRMWInst::getOperationName(Operation) +
- " operand must be an integer");
+ return error(ValLoc, "atomicrmw " +
+ AtomicRMWInst::getOperationName(Operation) +
+ " operand must be an integer");
}
}
unsigned Size = Val->getType()->getPrimitiveSizeInBits();
if (Size < 8 || (Size & (Size - 1)))
- return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
+ return error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
" integer");
Align Alignment(
PFS.getFunction().getParent()->getDataLayout().getTypeStoreSize(
@@ -7316,26 +7576,26 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
return AteExtraComma ? InstExtraComma : InstNormal;
}
-/// ParseFence
+/// parseFence
/// ::= 'fence' 'singlethread'? AtomicOrdering
-int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parseFence(Instruction *&Inst, PerFunctionState &PFS) {
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
SyncScope::ID SSID = SyncScope::System;
- if (ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering))
+ if (parseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering))
return true;
if (Ordering == AtomicOrdering::Unordered)
- return TokError("fence cannot be unordered");
+ return tokError("fence cannot be unordered");
if (Ordering == AtomicOrdering::Monotonic)
- return TokError("fence cannot be monotonic");
+ return tokError("fence cannot be monotonic");
Inst = new FenceInst(Context, Ordering, SSID);
return InstNormal;
}
-/// ParseGetElementPtr
+/// parseGetElementPtr
/// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
-int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
Value *Ptr = nullptr;
Value *Val = nullptr;
LocTy Loc, EltLoc;
@@ -7344,18 +7604,18 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
Type *Ty = nullptr;
LocTy ExplicitTypeLoc = Lex.getLoc();
- if (ParseType(Ty) ||
- ParseToken(lltok::comma, "expected comma after getelementptr's type") ||
- ParseTypeAndValue(Ptr, Loc, PFS))
+ if (parseType(Ty) ||
+ parseToken(lltok::comma, "expected comma after getelementptr's type") ||
+ parseTypeAndValue(Ptr, Loc, PFS))
return true;
Type *BaseType = Ptr->getType();
PointerType *BasePointerType = dyn_cast<PointerType>(BaseType->getScalarType());
if (!BasePointerType)
- return Error(Loc, "base of getelementptr must be a pointer");
+ return error(Loc, "base of getelementptr must be a pointer");
if (Ty != BasePointerType->getElementType())
- return Error(ExplicitTypeLoc,
+ return error(ExplicitTypeLoc,
"explicit pointee type doesn't match operand's pointee type");
SmallVector<Value*, 16> Indices;
@@ -7364,22 +7624,24 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
// All vector parameters should have the same vector width.
ElementCount GEPWidth = BaseType->isVectorTy()
? cast<VectorType>(BaseType)->getElementCount()
- : ElementCount(0, false);
+ : ElementCount::getFixed(0);
while (EatIfPresent(lltok::comma)) {
if (Lex.getKind() == lltok::MetadataVar) {
AteExtraComma = true;
break;
}
- if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
+ if (parseTypeAndValue(Val, EltLoc, PFS))
+ return true;
if (!Val->getType()->isIntOrIntVectorTy())
- return Error(EltLoc, "getelementptr index must be an integer");
+ return error(EltLoc, "getelementptr index must be an integer");
if (auto *ValVTy = dyn_cast<VectorType>(Val->getType())) {
ElementCount ValNumEl = ValVTy->getElementCount();
- if (GEPWidth != ElementCount(0, false) && GEPWidth != ValNumEl)
- return Error(EltLoc,
- "getelementptr vector index has a wrong number of elements");
+ if (GEPWidth != ElementCount::getFixed(0) && GEPWidth != ValNumEl)
+ return error(
+ EltLoc,
+ "getelementptr vector index has a wrong number of elements");
GEPWidth = ValNumEl;
}
Indices.push_back(Val);
@@ -7387,55 +7649,55 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
SmallPtrSet<Type*, 4> Visited;
if (!Indices.empty() && !Ty->isSized(&Visited))
- return Error(Loc, "base element of getelementptr must be sized");
+ return error(Loc, "base element of getelementptr must be sized");
if (!GetElementPtrInst::getIndexedType(Ty, Indices))
- return Error(Loc, "invalid getelementptr indices");
+ return error(Loc, "invalid getelementptr indices");
Inst = GetElementPtrInst::Create(Ty, Ptr, Indices);
if (InBounds)
cast<GetElementPtrInst>(Inst)->setIsInBounds(true);
return AteExtraComma ? InstExtraComma : InstNormal;
}
-/// ParseExtractValue
+/// parseExtractValue
/// ::= 'extractvalue' TypeAndValue (',' uint32)+
-int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val; LocTy Loc;
SmallVector<unsigned, 4> Indices;
bool AteExtraComma;
- if (ParseTypeAndValue(Val, Loc, PFS) ||
- ParseIndexList(Indices, AteExtraComma))
+ if (parseTypeAndValue(Val, Loc, PFS) ||
+ parseIndexList(Indices, AteExtraComma))
return true;
if (!Val->getType()->isAggregateType())
- return Error(Loc, "extractvalue operand must be aggregate type");
+ return error(Loc, "extractvalue operand must be aggregate type");
if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
- return Error(Loc, "invalid indices for extractvalue");
+ return error(Loc, "invalid indices for extractvalue");
Inst = ExtractValueInst::Create(Val, Indices);
return AteExtraComma ? InstExtraComma : InstNormal;
}
-/// ParseInsertValue
+/// parseInsertValue
/// ::= 'insertvalue' TypeAndValue ',' TypeAndValue (',' uint32)+
-int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
+int LLParser::parseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val0, *Val1; LocTy Loc0, Loc1;
SmallVector<unsigned, 4> Indices;
bool AteExtraComma;
- if (ParseTypeAndValue(Val0, Loc0, PFS) ||
- ParseToken(lltok::comma, "expected comma after insertvalue operand") ||
- ParseTypeAndValue(Val1, Loc1, PFS) ||
- ParseIndexList(Indices, AteExtraComma))
+ if (parseTypeAndValue(Val0, Loc0, PFS) ||
+ parseToken(lltok::comma, "expected comma after insertvalue operand") ||
+ parseTypeAndValue(Val1, Loc1, PFS) ||
+ parseIndexList(Indices, AteExtraComma))
return true;
if (!Val0->getType()->isAggregateType())
- return Error(Loc0, "insertvalue operand must be aggregate type");
+ return error(Loc0, "insertvalue operand must be aggregate type");
Type *IndexedType = ExtractValueInst::getIndexedType(Val0->getType(), Indices);
if (!IndexedType)
- return Error(Loc0, "invalid indices for insertvalue");
+ return error(Loc0, "invalid indices for insertvalue");
if (IndexedType != Val1->getType())
- return Error(Loc1, "insertvalue operand and field disagree in type: '" +
+ return error(Loc1, "insertvalue operand and field disagree in type: '" +
getTypeString(Val1->getType()) + "' instead of '" +
getTypeString(IndexedType) + "'");
Inst = InsertValueInst::Create(Val0, Val1, Indices);
@@ -7446,12 +7708,12 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
// Embedded metadata.
//===----------------------------------------------------------------------===//
-/// ParseMDNodeVector
+/// parseMDNodeVector
/// ::= { Element (',' Element)* }
/// Element
/// ::= 'null' | TypeAndValue
-bool LLParser::ParseMDNodeVector(SmallVectorImpl<Metadata *> &Elts) {
- if (ParseToken(lltok::lbrace, "expected '{' here"))
+bool LLParser::parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts) {
+ if (parseToken(lltok::lbrace, "expected '{' here"))
return true;
// Check for an empty list.
@@ -7466,12 +7728,12 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Metadata *> &Elts) {
}
Metadata *MD;
- if (ParseMetadata(MD, nullptr))
+ if (parseMetadata(MD, nullptr))
return true;
Elts.push_back(MD);
} while (EatIfPresent(lltok::comma));
- return ParseToken(lltok::rbrace, "expected end of metadata node");
+ return parseToken(lltok::rbrace, "expected end of metadata node");
}
//===----------------------------------------------------------------------===//
@@ -7480,7 +7742,7 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Metadata *> &Elts) {
bool LLParser::sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes,
SMLoc Loc) {
if (V->use_empty())
- return Error(Loc, "value has no uses");
+ return error(Loc, "value has no uses");
unsigned NumUses = 0;
SmallDenseMap<const Use *, unsigned, 16> Order;
@@ -7490,9 +7752,9 @@ bool LLParser::sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes,
Order[&U] = Indexes[NumUses - 1];
}
if (NumUses < 2)
- return Error(Loc, "value only has one use");
+ return error(Loc, "value only has one use");
if (Order.size() != Indexes.size() || NumUses > Indexes.size())
- return Error(Loc,
+ return error(Loc,
"wrong number of indexes, expected " + Twine(V->getNumUses()));
V->sortUseList([&](const Use &L, const Use &R) {
@@ -7501,11 +7763,11 @@ bool LLParser::sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes,
return false;
}
-/// ParseUseListOrderIndexes
+/// parseUseListOrderIndexes
/// ::= '{' uint32 (',' uint32)+ '}'
-bool LLParser::ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes) {
+bool LLParser::parseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes) {
SMLoc Loc = Lex.getLoc();
- if (ParseToken(lltok::lbrace, "expected '{' here"))
+ if (parseToken(lltok::lbrace, "expected '{' here"))
return true;
if (Lex.getKind() == lltok::rbrace)
return Lex.Error("expected non-empty list of uselistorder indexes");
@@ -7519,7 +7781,7 @@ bool LLParser::ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes) {
assert(Indexes.empty() && "Expected empty order vector");
do {
unsigned Index;
- if (ParseUInt32(Index))
+ if (parseUInt32(Index))
return true;
// Update consistency checks.
@@ -7530,50 +7792,51 @@ bool LLParser::ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes) {
Indexes.push_back(Index);
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rbrace, "expected '}' here"))
+ if (parseToken(lltok::rbrace, "expected '}' here"))
return true;
if (Indexes.size() < 2)
- return Error(Loc, "expected >= 2 uselistorder indexes");
+ return error(Loc, "expected >= 2 uselistorder indexes");
if (Offset != 0 || Max >= Indexes.size())
- return Error(Loc, "expected distinct uselistorder indexes in range [0, size)");
+ return error(Loc,
+ "expected distinct uselistorder indexes in range [0, size)");
if (IsOrdered)
- return Error(Loc, "expected uselistorder indexes to change the order");
+ return error(Loc, "expected uselistorder indexes to change the order");
return false;
}
-/// ParseUseListOrder
+/// parseUseListOrder
/// ::= 'uselistorder' Type Value ',' UseListOrderIndexes
-bool LLParser::ParseUseListOrder(PerFunctionState *PFS) {
+bool LLParser::parseUseListOrder(PerFunctionState *PFS) {
SMLoc Loc = Lex.getLoc();
- if (ParseToken(lltok::kw_uselistorder, "expected uselistorder directive"))
+ if (parseToken(lltok::kw_uselistorder, "expected uselistorder directive"))
return true;
Value *V;
SmallVector<unsigned, 16> Indexes;
- if (ParseTypeAndValue(V, PFS) ||
- ParseToken(lltok::comma, "expected comma in uselistorder directive") ||
- ParseUseListOrderIndexes(Indexes))
+ if (parseTypeAndValue(V, PFS) ||
+ parseToken(lltok::comma, "expected comma in uselistorder directive") ||
+ parseUseListOrderIndexes(Indexes))
return true;
return sortUseListOrder(V, Indexes, Loc);
}
-/// ParseUseListOrderBB
+/// parseUseListOrderBB
/// ::= 'uselistorder_bb' @foo ',' %bar ',' UseListOrderIndexes
-bool LLParser::ParseUseListOrderBB() {
+bool LLParser::parseUseListOrderBB() {
assert(Lex.getKind() == lltok::kw_uselistorder_bb);
SMLoc Loc = Lex.getLoc();
Lex.Lex();
ValID Fn, Label;
SmallVector<unsigned, 16> Indexes;
- if (ParseValID(Fn) ||
- ParseToken(lltok::comma, "expected comma in uselistorder_bb directive") ||
- ParseValID(Label) ||
- ParseToken(lltok::comma, "expected comma in uselistorder_bb directive") ||
- ParseUseListOrderIndexes(Indexes))
+ if (parseValID(Fn) ||
+ parseToken(lltok::comma, "expected comma in uselistorder_bb directive") ||
+ parseValID(Label) ||
+ parseToken(lltok::comma, "expected comma in uselistorder_bb directive") ||
+ parseUseListOrderIndexes(Indexes))
return true;
// Check the function.
@@ -7583,25 +7846,26 @@ bool LLParser::ParseUseListOrderBB() {
else if (Fn.Kind == ValID::t_GlobalID)
GV = Fn.UIntVal < NumberedVals.size() ? NumberedVals[Fn.UIntVal] : nullptr;
else
- return Error(Fn.Loc, "expected function name in uselistorder_bb");
+ return error(Fn.Loc, "expected function name in uselistorder_bb");
if (!GV)
- return Error(Fn.Loc, "invalid function forward reference in uselistorder_bb");
+ return error(Fn.Loc,
+ "invalid function forward reference in uselistorder_bb");
auto *F = dyn_cast<Function>(GV);
if (!F)
- return Error(Fn.Loc, "expected function name in uselistorder_bb");
+ return error(Fn.Loc, "expected function name in uselistorder_bb");
if (F->isDeclaration())
- return Error(Fn.Loc, "invalid declaration in uselistorder_bb");
+ return error(Fn.Loc, "invalid declaration in uselistorder_bb");
// Check the basic block.
if (Label.Kind == ValID::t_LocalID)
- return Error(Label.Loc, "invalid numeric label in uselistorder_bb");
+ return error(Label.Loc, "invalid numeric label in uselistorder_bb");
if (Label.Kind != ValID::t_LocalName)
- return Error(Label.Loc, "expected basic block name in uselistorder_bb");
+ return error(Label.Loc, "expected basic block name in uselistorder_bb");
Value *V = F->getValueSymbolTable()->lookup(Label.StrVal);
if (!V)
- return Error(Label.Loc, "invalid basic block in uselistorder_bb");
+ return error(Label.Loc, "invalid basic block in uselistorder_bb");
if (!isa<BasicBlock>(V))
- return Error(Label.Loc, "expected basic block in uselistorder_bb");
+ return error(Label.Loc, "expected basic block in uselistorder_bb");
return sortUseListOrder(V, Indexes, Loc);
}
@@ -7609,32 +7873,32 @@ bool LLParser::ParseUseListOrderBB() {
/// ModuleEntry
/// ::= 'module' ':' '(' 'path' ':' STRINGCONSTANT ',' 'hash' ':' Hash ')'
/// Hash ::= '(' UInt32 ',' UInt32 ',' UInt32 ',' UInt32 ',' UInt32 ')'
-bool LLParser::ParseModuleEntry(unsigned ID) {
+bool LLParser::parseModuleEntry(unsigned ID) {
assert(Lex.getKind() == lltok::kw_module);
Lex.Lex();
std::string Path;
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_path, "expected 'path' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseStringConstant(Path) ||
- ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_hash, "expected 'hash' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseToken(lltok::kw_path, "expected 'path' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseStringConstant(Path) ||
+ parseToken(lltok::comma, "expected ',' here") ||
+ parseToken(lltok::kw_hash, "expected 'hash' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
ModuleHash Hash;
- if (ParseUInt32(Hash[0]) || ParseToken(lltok::comma, "expected ',' here") ||
- ParseUInt32(Hash[1]) || ParseToken(lltok::comma, "expected ',' here") ||
- ParseUInt32(Hash[2]) || ParseToken(lltok::comma, "expected ',' here") ||
- ParseUInt32(Hash[3]) || ParseToken(lltok::comma, "expected ',' here") ||
- ParseUInt32(Hash[4]))
+ if (parseUInt32(Hash[0]) || parseToken(lltok::comma, "expected ',' here") ||
+ parseUInt32(Hash[1]) || parseToken(lltok::comma, "expected ',' here") ||
+ parseUInt32(Hash[2]) || parseToken(lltok::comma, "expected ',' here") ||
+ parseUInt32(Hash[3]) || parseToken(lltok::comma, "expected ',' here") ||
+ parseUInt32(Hash[4]))
return true;
- if (ParseToken(lltok::rparen, "expected ')' here") ||
- ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here") ||
+ parseToken(lltok::rparen, "expected ')' here"))
return true;
auto ModuleEntry = Index->addModule(Path, ID, Hash);
@@ -7645,21 +7909,21 @@ bool LLParser::ParseModuleEntry(unsigned ID) {
/// TypeIdEntry
/// ::= 'typeid' ':' '(' 'name' ':' STRINGCONSTANT ',' TypeIdSummary ')'
-bool LLParser::ParseTypeIdEntry(unsigned ID) {
+bool LLParser::parseTypeIdEntry(unsigned ID) {
assert(Lex.getKind() == lltok::kw_typeid);
Lex.Lex();
std::string Name;
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_name, "expected 'name' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseStringConstant(Name))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseToken(lltok::kw_name, "expected 'name' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseStringConstant(Name))
return true;
TypeIdSummary &TIS = Index->getOrInsertTypeIdSummary(Name);
- if (ParseToken(lltok::comma, "expected ',' here") ||
- ParseTypeIdSummary(TIS) || ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::comma, "expected ',' here") ||
+ parseTypeIdSummary(TIS) || parseToken(lltok::rparen, "expected ')' here"))
return true;
// Check if this ID was forward referenced, and if so, update the
@@ -7679,20 +7943,20 @@ bool LLParser::ParseTypeIdEntry(unsigned ID) {
/// TypeIdSummary
/// ::= 'summary' ':' '(' TypeTestResolution [',' OptionalWpdResolutions]? ')'
-bool LLParser::ParseTypeIdSummary(TypeIdSummary &TIS) {
- if (ParseToken(lltok::kw_summary, "expected 'summary' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseTypeTestResolution(TIS.TTRes))
+bool LLParser::parseTypeIdSummary(TypeIdSummary &TIS) {
+ if (parseToken(lltok::kw_summary, "expected 'summary' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseTypeTestResolution(TIS.TTRes))
return true;
if (EatIfPresent(lltok::comma)) {
// Expect optional wpdResolutions field
- if (ParseOptionalWpdResolutions(TIS.WPDRes))
+ if (parseOptionalWpdResolutions(TIS.WPDRes))
return true;
}
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -7705,40 +7969,40 @@ static ValueInfo EmptyVI =
/// ::= 'typeidCompatibleVTable' ':' '(' 'name' ':' STRINGCONSTANT ','
/// TypeIdCompatibleVtableInfo
/// ')'
-bool LLParser::ParseTypeIdCompatibleVtableEntry(unsigned ID) {
+bool LLParser::parseTypeIdCompatibleVtableEntry(unsigned ID) {
assert(Lex.getKind() == lltok::kw_typeidCompatibleVTable);
Lex.Lex();
std::string Name;
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_name, "expected 'name' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseStringConstant(Name))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseToken(lltok::kw_name, "expected 'name' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseStringConstant(Name))
return true;
TypeIdCompatibleVtableInfo &TI =
Index->getOrInsertTypeIdCompatibleVtableSummary(Name);
- if (ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_summary, "expected 'summary' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::comma, "expected ',' here") ||
+ parseToken(lltok::kw_summary, "expected 'summary' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
IdToIndexMapType IdToIndexMap;
- // Parse each call edge
+ // parse each call edge
do {
uint64_t Offset;
- if (ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_offset, "expected 'offset' here") ||
- ParseToken(lltok::colon, "expected ':' here") || ParseUInt64(Offset) ||
- ParseToken(lltok::comma, "expected ',' here"))
+ if (parseToken(lltok::lparen, "expected '(' here") ||
+ parseToken(lltok::kw_offset, "expected 'offset' here") ||
+ parseToken(lltok::colon, "expected ':' here") || parseUInt64(Offset) ||
+ parseToken(lltok::comma, "expected ',' here"))
return true;
LocTy Loc = Lex.getLoc();
unsigned GVId;
ValueInfo VI;
- if (ParseGVReference(VI, GVId))
+ if (parseGVReference(VI, GVId))
return true;
// Keep track of the TypeIdCompatibleVtableInfo array index needing a
@@ -7748,25 +8012,23 @@ bool LLParser::ParseTypeIdCompatibleVtableEntry(unsigned ID) {
IdToIndexMap[GVId].push_back(std::make_pair(TI.size(), Loc));
TI.push_back({Offset, VI});
- if (ParseToken(lltok::rparen, "expected ')' in call"))
+ if (parseToken(lltok::rparen, "expected ')' in call"))
return true;
} while (EatIfPresent(lltok::comma));
// Now that the TI vector is finalized, it is safe to save the locations
// of any forward GV references that need updating later.
for (auto I : IdToIndexMap) {
+ auto &Infos = ForwardRefValueInfos[I.first];
for (auto P : I.second) {
assert(TI[P.first].VTableVI == EmptyVI &&
"Forward referenced ValueInfo expected to be empty");
- auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
- I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
- FwdRef.first->second.push_back(
- std::make_pair(&TI[P.first].VTableVI, P.second));
+ Infos.emplace_back(&TI[P.first].VTableVI, P.second);
}
}
- if (ParseToken(lltok::rparen, "expected ')' here") ||
- ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here") ||
+ parseToken(lltok::rparen, "expected ')' here"))
return true;
// Check if this ID was forward referenced, and if so, update the
@@ -7790,12 +8052,12 @@ bool LLParser::ParseTypeIdCompatibleVtableEntry(unsigned ID) {
/// 'sizeM1BitWidth' ':' SizeM1BitWidth [',' 'alignLog2' ':' UInt64]?
/// [',' 'sizeM1' ':' UInt64]? [',' 'bitMask' ':' UInt8]?
/// [',' 'inlinesBits' ':' UInt64]? ')'
-bool LLParser::ParseTypeTestResolution(TypeTestResolution &TTRes) {
- if (ParseToken(lltok::kw_typeTestRes, "expected 'typeTestRes' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_kind, "expected 'kind' here") ||
- ParseToken(lltok::colon, "expected ':' here"))
+bool LLParser::parseTypeTestResolution(TypeTestResolution &TTRes) {
+ if (parseToken(lltok::kw_typeTestRes, "expected 'typeTestRes' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseToken(lltok::kw_kind, "expected 'kind' here") ||
+ parseToken(lltok::colon, "expected ':' here"))
return true;
switch (Lex.getKind()) {
@@ -7818,34 +8080,34 @@ bool LLParser::ParseTypeTestResolution(TypeTestResolution &TTRes) {
TTRes.TheKind = TypeTestResolution::AllOnes;
break;
default:
- return Error(Lex.getLoc(), "unexpected TypeTestResolution kind");
+ return error(Lex.getLoc(), "unexpected TypeTestResolution kind");
}
Lex.Lex();
- if (ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_sizeM1BitWidth, "expected 'sizeM1BitWidth' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseUInt32(TTRes.SizeM1BitWidth))
+ if (parseToken(lltok::comma, "expected ',' here") ||
+ parseToken(lltok::kw_sizeM1BitWidth, "expected 'sizeM1BitWidth' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseUInt32(TTRes.SizeM1BitWidth))
return true;
- // Parse optional fields
+ // parse optional fields
while (EatIfPresent(lltok::comma)) {
switch (Lex.getKind()) {
case lltok::kw_alignLog2:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") ||
- ParseUInt64(TTRes.AlignLog2))
+ if (parseToken(lltok::colon, "expected ':'") ||
+ parseUInt64(TTRes.AlignLog2))
return true;
break;
case lltok::kw_sizeM1:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseUInt64(TTRes.SizeM1))
+ if (parseToken(lltok::colon, "expected ':'") || parseUInt64(TTRes.SizeM1))
return true;
break;
case lltok::kw_bitMask: {
unsigned Val;
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseUInt32(Val))
+ if (parseToken(lltok::colon, "expected ':'") || parseUInt32(Val))
return true;
assert(Val <= 0xff);
TTRes.BitMask = (uint8_t)Val;
@@ -7853,16 +8115,16 @@ bool LLParser::ParseTypeTestResolution(TypeTestResolution &TTRes) {
}
case lltok::kw_inlineBits:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") ||
- ParseUInt64(TTRes.InlineBits))
+ if (parseToken(lltok::colon, "expected ':'") ||
+ parseUInt64(TTRes.InlineBits))
return true;
break;
default:
- return Error(Lex.getLoc(), "expected optional TypeTestResolution field");
+ return error(Lex.getLoc(), "expected optional TypeTestResolution field");
}
}
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -7871,26 +8133,26 @@ bool LLParser::ParseTypeTestResolution(TypeTestResolution &TTRes) {
/// OptionalWpdResolutions
/// ::= 'wpsResolutions' ':' '(' WpdResolution [',' WpdResolution]* ')'
/// WpdResolution ::= '(' 'offset' ':' UInt64 ',' WpdRes ')'
-bool LLParser::ParseOptionalWpdResolutions(
+bool LLParser::parseOptionalWpdResolutions(
std::map<uint64_t, WholeProgramDevirtResolution> &WPDResMap) {
- if (ParseToken(lltok::kw_wpdResolutions, "expected 'wpdResolutions' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::kw_wpdResolutions, "expected 'wpdResolutions' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
do {
uint64_t Offset;
WholeProgramDevirtResolution WPDRes;
- if (ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_offset, "expected 'offset' here") ||
- ParseToken(lltok::colon, "expected ':' here") || ParseUInt64(Offset) ||
- ParseToken(lltok::comma, "expected ',' here") || ParseWpdRes(WPDRes) ||
- ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::lparen, "expected '(' here") ||
+ parseToken(lltok::kw_offset, "expected 'offset' here") ||
+ parseToken(lltok::colon, "expected ':' here") || parseUInt64(Offset) ||
+ parseToken(lltok::comma, "expected ',' here") || parseWpdRes(WPDRes) ||
+ parseToken(lltok::rparen, "expected ')' here"))
return true;
WPDResMap[Offset] = WPDRes;
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -7904,12 +8166,12 @@ bool LLParser::ParseOptionalWpdResolutions(
/// [',' OptionalResByArg]? ')'
/// ::= 'wpdRes' ':' '(' 'kind' ':' 'branchFunnel'
/// [',' OptionalResByArg]? ')'
-bool LLParser::ParseWpdRes(WholeProgramDevirtResolution &WPDRes) {
- if (ParseToken(lltok::kw_wpdRes, "expected 'wpdRes' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_kind, "expected 'kind' here") ||
- ParseToken(lltok::colon, "expected ':' here"))
+bool LLParser::parseWpdRes(WholeProgramDevirtResolution &WPDRes) {
+ if (parseToken(lltok::kw_wpdRes, "expected 'wpdRes' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseToken(lltok::kw_kind, "expected 'kind' here") ||
+ parseToken(lltok::colon, "expected ':' here"))
return true;
switch (Lex.getKind()) {
@@ -7923,30 +8185,30 @@ bool LLParser::ParseWpdRes(WholeProgramDevirtResolution &WPDRes) {
WPDRes.TheKind = WholeProgramDevirtResolution::BranchFunnel;
break;
default:
- return Error(Lex.getLoc(), "unexpected WholeProgramDevirtResolution kind");
+ return error(Lex.getLoc(), "unexpected WholeProgramDevirtResolution kind");
}
Lex.Lex();
- // Parse optional fields
+ // parse optional fields
while (EatIfPresent(lltok::comma)) {
switch (Lex.getKind()) {
case lltok::kw_singleImplName:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseStringConstant(WPDRes.SingleImplName))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseStringConstant(WPDRes.SingleImplName))
return true;
break;
case lltok::kw_resByArg:
- if (ParseOptionalResByArg(WPDRes.ResByArg))
+ if (parseOptionalResByArg(WPDRes.ResByArg))
return true;
break;
default:
- return Error(Lex.getLoc(),
+ return error(Lex.getLoc(),
"expected optional WholeProgramDevirtResolution field");
}
}
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -7959,22 +8221,22 @@ bool LLParser::ParseWpdRes(WholeProgramDevirtResolution &WPDRes) {
/// 'virtualConstProp' )
/// [',' 'info' ':' UInt64]? [',' 'byte' ':' UInt32]?
/// [',' 'bit' ':' UInt32]? ')'
-bool LLParser::ParseOptionalResByArg(
+bool LLParser::parseOptionalResByArg(
std::map<std::vector<uint64_t>, WholeProgramDevirtResolution::ByArg>
&ResByArg) {
- if (ParseToken(lltok::kw_resByArg, "expected 'resByArg' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::kw_resByArg, "expected 'resByArg' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
do {
std::vector<uint64_t> Args;
- if (ParseArgs(Args) || ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_byArg, "expected 'byArg here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_kind, "expected 'kind' here") ||
- ParseToken(lltok::colon, "expected ':' here"))
+ if (parseArgs(Args) || parseToken(lltok::comma, "expected ',' here") ||
+ parseToken(lltok::kw_byArg, "expected 'byArg here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseToken(lltok::kw_kind, "expected 'kind' here") ||
+ parseToken(lltok::colon, "expected ':' here"))
return true;
WholeProgramDevirtResolution::ByArg ByArg;
@@ -7992,45 +8254,45 @@ bool LLParser::ParseOptionalResByArg(
ByArg.TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp;
break;
default:
- return Error(Lex.getLoc(),
+ return error(Lex.getLoc(),
"unexpected WholeProgramDevirtResolution::ByArg kind");
}
Lex.Lex();
- // Parse optional fields
+ // parse optional fields
while (EatIfPresent(lltok::comma)) {
switch (Lex.getKind()) {
case lltok::kw_info:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseUInt64(ByArg.Info))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseUInt64(ByArg.Info))
return true;
break;
case lltok::kw_byte:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseUInt32(ByArg.Byte))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseUInt32(ByArg.Byte))
return true;
break;
case lltok::kw_bit:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseUInt32(ByArg.Bit))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseUInt32(ByArg.Bit))
return true;
break;
default:
- return Error(Lex.getLoc(),
+ return error(Lex.getLoc(),
"expected optional whole program devirt field");
}
}
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
ResByArg[Args] = ByArg;
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -8038,20 +8300,20 @@ bool LLParser::ParseOptionalResByArg(
/// OptionalResByArg
/// ::= 'args' ':' '(' UInt64[, UInt64]* ')'
-bool LLParser::ParseArgs(std::vector<uint64_t> &Args) {
- if (ParseToken(lltok::kw_args, "expected 'args' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+bool LLParser::parseArgs(std::vector<uint64_t> &Args) {
+ if (parseToken(lltok::kw_args, "expected 'args' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
do {
uint64_t Val;
- if (ParseUInt64(Val))
+ if (parseUInt64(Val))
return true;
Args.push_back(Val);
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -8072,7 +8334,7 @@ static void resolveFwdRef(ValueInfo *Fwd, ValueInfo &Resolved) {
/// Stores the given Name/GUID and associated summary into the Index.
/// Also updates any forward references to the associated entry ID.
-void LLParser::AddGlobalValueToIndex(
+void LLParser::addGlobalValueToIndex(
std::string Name, GlobalValue::GUID GUID, GlobalValue::LinkageTypes Linkage,
unsigned ID, std::unique_ptr<GlobalValueSummary> Summary) {
// First create the ValueInfo utilizing the Name or GUID.
@@ -8134,46 +8396,48 @@ void LLParser::AddGlobalValueToIndex(
}
}
-/// ParseSummaryIndexFlags
+/// parseSummaryIndexFlags
/// ::= 'flags' ':' UInt64
-bool LLParser::ParseSummaryIndexFlags() {
+bool LLParser::parseSummaryIndexFlags() {
assert(Lex.getKind() == lltok::kw_flags);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here"))
+ if (parseToken(lltok::colon, "expected ':' here"))
return true;
uint64_t Flags;
- if (ParseUInt64(Flags))
+ if (parseUInt64(Flags))
return true;
- Index->setFlags(Flags);
+ if (Index)
+ Index->setFlags(Flags);
return false;
}
-/// ParseBlockCount
+/// parseBlockCount
/// ::= 'blockcount' ':' UInt64
-bool LLParser::ParseBlockCount() {
+bool LLParser::parseBlockCount() {
assert(Lex.getKind() == lltok::kw_blockcount);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here"))
+ if (parseToken(lltok::colon, "expected ':' here"))
return true;
uint64_t BlockCount;
- if (ParseUInt64(BlockCount))
+ if (parseUInt64(BlockCount))
return true;
- Index->setBlockCount(BlockCount);
+ if (Index)
+ Index->setBlockCount(BlockCount);
return false;
}
-/// ParseGVEntry
+/// parseGVEntry
/// ::= 'gv' ':' '(' ('name' ':' STRINGCONSTANT | 'guid' ':' UInt64)
/// [',' 'summaries' ':' Summary[',' Summary]* ]? ')'
/// Summary ::= '(' (FunctionSummary | VariableSummary | AliasSummary) ')'
-bool LLParser::ParseGVEntry(unsigned ID) {
+bool LLParser::parseGVEntry(unsigned ID) {
assert(Lex.getKind() == lltok::kw_gv);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
std::string Name;
@@ -8181,23 +8445,23 @@ bool LLParser::ParseGVEntry(unsigned ID) {
switch (Lex.getKind()) {
case lltok::kw_name:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseStringConstant(Name))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseStringConstant(Name))
return true;
// Can't create GUID/ValueInfo until we have the linkage.
break;
case lltok::kw_guid:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") || ParseUInt64(GUID))
+ if (parseToken(lltok::colon, "expected ':' here") || parseUInt64(GUID))
return true;
break;
default:
- return Error(Lex.getLoc(), "expected name or guid tag");
+ return error(Lex.getLoc(), "expected name or guid tag");
}
if (!EatIfPresent(lltok::comma)) {
// No summaries. Wrap up.
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
// This was created for a call to an external or indirect target.
// A GUID with no summary came from a VALUE_GUID record, dummy GUID
@@ -8205,37 +8469,37 @@ bool LLParser::ParseGVEntry(unsigned ID) {
// an external definition. We pass ExternalLinkage since that is only
// used when the GUID must be computed from Name, and in that case
// the symbol must have external linkage.
- AddGlobalValueToIndex(Name, GUID, GlobalValue::ExternalLinkage, ID,
+ addGlobalValueToIndex(Name, GUID, GlobalValue::ExternalLinkage, ID,
nullptr);
return false;
}
// Have a list of summaries
- if (ParseToken(lltok::kw_summaries, "expected 'summaries' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::kw_summaries, "expected 'summaries' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
do {
switch (Lex.getKind()) {
case lltok::kw_function:
- if (ParseFunctionSummary(Name, GUID, ID))
+ if (parseFunctionSummary(Name, GUID, ID))
return true;
break;
case lltok::kw_variable:
- if (ParseVariableSummary(Name, GUID, ID))
+ if (parseVariableSummary(Name, GUID, ID))
return true;
break;
case lltok::kw_alias:
- if (ParseAliasSummary(Name, GUID, ID))
+ if (parseAliasSummary(Name, GUID, ID))
return true;
break;
default:
- return Error(Lex.getLoc(), "expected summary type");
+ return error(Lex.getLoc(), "expected summary type");
}
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here") ||
- ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here") ||
+ parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -8246,7 +8510,7 @@ bool LLParser::ParseGVEntry(unsigned ID) {
/// ',' 'insts' ':' UInt32 [',' OptionalFFlags]? [',' OptionalCalls]?
/// [',' OptionalTypeIdInfo]? [',' OptionalParamAccesses]?
/// [',' OptionalRefs]? ')'
-bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
+bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
unsigned ID) {
assert(Lex.getKind() == lltok::kw_function);
Lex.Lex();
@@ -8262,44 +8526,44 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
std::vector<ValueInfo> Refs;
// Default is all-zeros (conservative values).
FunctionSummary::FFlags FFlags = {};
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseModuleReference(ModulePath) ||
- ParseToken(lltok::comma, "expected ',' here") || ParseGVFlags(GVFlags) ||
- ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_insts, "expected 'insts' here") ||
- ParseToken(lltok::colon, "expected ':' here") || ParseUInt32(InstCount))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseModuleReference(ModulePath) ||
+ parseToken(lltok::comma, "expected ',' here") || parseGVFlags(GVFlags) ||
+ parseToken(lltok::comma, "expected ',' here") ||
+ parseToken(lltok::kw_insts, "expected 'insts' here") ||
+ parseToken(lltok::colon, "expected ':' here") || parseUInt32(InstCount))
return true;
- // Parse optional fields
+ // parse optional fields
while (EatIfPresent(lltok::comma)) {
switch (Lex.getKind()) {
case lltok::kw_funcFlags:
- if (ParseOptionalFFlags(FFlags))
+ if (parseOptionalFFlags(FFlags))
return true;
break;
case lltok::kw_calls:
- if (ParseOptionalCalls(Calls))
+ if (parseOptionalCalls(Calls))
return true;
break;
case lltok::kw_typeIdInfo:
- if (ParseOptionalTypeIdInfo(TypeIdInfo))
+ if (parseOptionalTypeIdInfo(TypeIdInfo))
return true;
break;
case lltok::kw_refs:
- if (ParseOptionalRefs(Refs))
+ if (parseOptionalRefs(Refs))
return true;
break;
case lltok::kw_params:
- if (ParseOptionalParamAccesses(ParamAccesses))
+ if (parseOptionalParamAccesses(ParamAccesses))
return true;
break;
default:
- return Error(Lex.getLoc(), "expected optional function summary field");
+ return error(Lex.getLoc(), "expected optional function summary field");
}
}
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
auto FS = std::make_unique<FunctionSummary>(
@@ -8313,7 +8577,7 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
FS->setModulePath(ModulePath);
- AddGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
+ addGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
ID, std::move(FS));
return false;
@@ -8322,7 +8586,7 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
/// VariableSummary
/// ::= 'variable' ':' '(' 'module' ':' ModuleReference ',' GVFlags
/// [',' OptionalRefs]? ')'
-bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
+bool LLParser::parseVariableSummary(std::string Name, GlobalValue::GUID GUID,
unsigned ID) {
assert(Lex.getKind() == lltok::kw_variable);
Lex.Lex();
@@ -8337,31 +8601,31 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
GlobalObject::VCallVisibilityPublic);
std::vector<ValueInfo> Refs;
VTableFuncList VTableFuncs;
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseModuleReference(ModulePath) ||
- ParseToken(lltok::comma, "expected ',' here") || ParseGVFlags(GVFlags) ||
- ParseToken(lltok::comma, "expected ',' here") ||
- ParseGVarFlags(GVarFlags))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseModuleReference(ModulePath) ||
+ parseToken(lltok::comma, "expected ',' here") || parseGVFlags(GVFlags) ||
+ parseToken(lltok::comma, "expected ',' here") ||
+ parseGVarFlags(GVarFlags))
return true;
- // Parse optional fields
+ // parse optional fields
while (EatIfPresent(lltok::comma)) {
switch (Lex.getKind()) {
case lltok::kw_vTableFuncs:
- if (ParseOptionalVTableFuncs(VTableFuncs))
+ if (parseOptionalVTableFuncs(VTableFuncs))
return true;
break;
case lltok::kw_refs:
- if (ParseOptionalRefs(Refs))
+ if (parseOptionalRefs(Refs))
return true;
break;
default:
- return Error(Lex.getLoc(), "expected optional variable summary field");
+ return error(Lex.getLoc(), "expected optional variable summary field");
}
}
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
auto GS =
@@ -8370,7 +8634,7 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
GS->setModulePath(ModulePath);
GS->setVTableFuncs(std::move(VTableFuncs));
- AddGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
+ addGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
ID, std::move(GS));
return false;
@@ -8379,7 +8643,7 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
/// AliasSummary
/// ::= 'alias' ':' '(' 'module' ':' ModuleReference ',' GVFlags ','
/// 'aliasee' ':' GVReference ')'
-bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
+bool LLParser::parseAliasSummary(std::string Name, GlobalValue::GUID GUID,
unsigned ID) {
assert(Lex.getKind() == lltok::kw_alias);
LocTy Loc = Lex.getLoc();
@@ -8389,21 +8653,21 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags(
/*Linkage=*/GlobalValue::ExternalLinkage, /*NotEligibleToImport=*/false,
/*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false);
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseModuleReference(ModulePath) ||
- ParseToken(lltok::comma, "expected ',' here") || ParseGVFlags(GVFlags) ||
- ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_aliasee, "expected 'aliasee' here") ||
- ParseToken(lltok::colon, "expected ':' here"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here") ||
+ parseModuleReference(ModulePath) ||
+ parseToken(lltok::comma, "expected ',' here") || parseGVFlags(GVFlags) ||
+ parseToken(lltok::comma, "expected ',' here") ||
+ parseToken(lltok::kw_aliasee, "expected 'aliasee' here") ||
+ parseToken(lltok::colon, "expected ':' here"))
return true;
ValueInfo AliaseeVI;
unsigned GVId;
- if (ParseGVReference(AliaseeVI, GVId))
+ if (parseGVReference(AliaseeVI, GVId))
return true;
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
auto AS = std::make_unique<AliasSummary>(GVFlags);
@@ -8412,16 +8676,14 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
// Record forward reference if the aliasee is not parsed yet.
if (AliaseeVI.getRef() == FwdVIRef) {
- auto FwdRef = ForwardRefAliasees.insert(
- std::make_pair(GVId, std::vector<std::pair<AliasSummary *, LocTy>>()));
- FwdRef.first->second.push_back(std::make_pair(AS.get(), Loc));
+ ForwardRefAliasees[GVId].emplace_back(AS.get(), Loc);
} else {
auto Summary = Index->findSummaryInModule(AliaseeVI, ModulePath);
assert(Summary && "Aliasee must be a definition");
AS->setAliasee(AliaseeVI, Summary);
}
- AddGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
+ addGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
ID, std::move(AS));
return false;
@@ -8429,9 +8691,9 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
/// Flag
/// ::= [0|1]
-bool LLParser::ParseFlag(unsigned &Val) {
+bool LLParser::parseFlag(unsigned &Val) {
if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
- return TokError("expected integer");
+ return tokError("expected integer");
Val = (unsigned)Lex.getAPSIntVal().getBoolValue();
Lex.Lex();
return false;
@@ -8444,12 +8706,12 @@ bool LLParser::ParseFlag(unsigned &Val) {
/// [',' 'noInline' ':' Flag]? ')'
/// [',' 'alwaysInline' ':' Flag]? ')'
-bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
+bool LLParser::parseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
assert(Lex.getKind() == lltok::kw_funcFlags);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' in funcFlags") |
- ParseToken(lltok::lparen, "expected '(' in funcFlags"))
+ if (parseToken(lltok::colon, "expected ':' in funcFlags") |
+ parseToken(lltok::lparen, "expected '(' in funcFlags"))
return true;
do {
@@ -8457,46 +8719,46 @@ bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
switch (Lex.getKind()) {
case lltok::kw_readNone:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Val))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
return true;
FFlags.ReadNone = Val;
break;
case lltok::kw_readOnly:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Val))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
return true;
FFlags.ReadOnly = Val;
break;
case lltok::kw_noRecurse:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Val))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
return true;
FFlags.NoRecurse = Val;
break;
case lltok::kw_returnDoesNotAlias:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Val))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
return true;
FFlags.ReturnDoesNotAlias = Val;
break;
case lltok::kw_noInline:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Val))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
return true;
FFlags.NoInline = Val;
break;
case lltok::kw_alwaysInline:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Val))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
return true;
FFlags.AlwaysInline = Val;
break;
default:
- return Error(Lex.getLoc(), "expected function flag type");
+ return error(Lex.getLoc(), "expected function flag type");
}
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' in funcFlags"))
+ if (parseToken(lltok::rparen, "expected ')' in funcFlags"))
return true;
return false;
@@ -8506,26 +8768,26 @@ bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
/// := 'calls' ':' '(' Call [',' Call]* ')'
/// Call ::= '(' 'callee' ':' GVReference
/// [( ',' 'hotness' ':' Hotness | ',' 'relbf' ':' UInt32 )]? ')'
-bool LLParser::ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
+bool LLParser::parseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
assert(Lex.getKind() == lltok::kw_calls);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' in calls") |
- ParseToken(lltok::lparen, "expected '(' in calls"))
+ if (parseToken(lltok::colon, "expected ':' in calls") |
+ parseToken(lltok::lparen, "expected '(' in calls"))
return true;
IdToIndexMapType IdToIndexMap;
- // Parse each call edge
+ // parse each call edge
do {
ValueInfo VI;
- if (ParseToken(lltok::lparen, "expected '(' in call") ||
- ParseToken(lltok::kw_callee, "expected 'callee' in call") ||
- ParseToken(lltok::colon, "expected ':'"))
+ if (parseToken(lltok::lparen, "expected '(' in call") ||
+ parseToken(lltok::kw_callee, "expected 'callee' in call") ||
+ parseToken(lltok::colon, "expected ':'"))
return true;
LocTy Loc = Lex.getLoc();
unsigned GVId;
- if (ParseGVReference(VI, GVId))
+ if (parseGVReference(VI, GVId))
return true;
CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown;
@@ -8533,11 +8795,11 @@ bool LLParser::ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
if (EatIfPresent(lltok::comma)) {
// Expect either hotness or relbf
if (EatIfPresent(lltok::kw_hotness)) {
- if (ParseToken(lltok::colon, "expected ':'") || ParseHotness(Hotness))
+ if (parseToken(lltok::colon, "expected ':'") || parseHotness(Hotness))
return true;
} else {
- if (ParseToken(lltok::kw_relbf, "expected relbf") ||
- ParseToken(lltok::colon, "expected ':'") || ParseUInt32(RelBF))
+ if (parseToken(lltok::kw_relbf, "expected relbf") ||
+ parseToken(lltok::colon, "expected ':'") || parseUInt32(RelBF))
return true;
}
}
@@ -8548,24 +8810,22 @@ bool LLParser::ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
IdToIndexMap[GVId].push_back(std::make_pair(Calls.size(), Loc));
Calls.push_back(FunctionSummary::EdgeTy{VI, CalleeInfo(Hotness, RelBF)});
- if (ParseToken(lltok::rparen, "expected ')' in call"))
+ if (parseToken(lltok::rparen, "expected ')' in call"))
return true;
} while (EatIfPresent(lltok::comma));
// Now that the Calls vector is finalized, it is safe to save the locations
// of any forward GV references that need updating later.
for (auto I : IdToIndexMap) {
+ auto &Infos = ForwardRefValueInfos[I.first];
for (auto P : I.second) {
assert(Calls[P.first].first.getRef() == FwdVIRef &&
"Forward referenced ValueInfo expected to be empty");
- auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
- I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
- FwdRef.first->second.push_back(
- std::make_pair(&Calls[P.first].first, P.second));
+ Infos.emplace_back(&Calls[P.first].first, P.second);
}
}
- if (ParseToken(lltok::rparen, "expected ')' in calls"))
+ if (parseToken(lltok::rparen, "expected ')' in calls"))
return true;
return false;
@@ -8573,7 +8833,7 @@ bool LLParser::ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
/// Hotness
/// := ('unknown'|'cold'|'none'|'hot'|'critical')
-bool LLParser::ParseHotness(CalleeInfo::HotnessType &Hotness) {
+bool LLParser::parseHotness(CalleeInfo::HotnessType &Hotness) {
switch (Lex.getKind()) {
case lltok::kw_unknown:
Hotness = CalleeInfo::HotnessType::Unknown;
@@ -8591,7 +8851,7 @@ bool LLParser::ParseHotness(CalleeInfo::HotnessType &Hotness) {
Hotness = CalleeInfo::HotnessType::Critical;
break;
default:
- return Error(Lex.getLoc(), "invalid call edge hotness");
+ return error(Lex.getLoc(), "invalid call edge hotness");
}
Lex.Lex();
return false;
@@ -8600,32 +8860,32 @@ bool LLParser::ParseHotness(CalleeInfo::HotnessType &Hotness) {
/// OptionalVTableFuncs
/// := 'vTableFuncs' ':' '(' VTableFunc [',' VTableFunc]* ')'
/// VTableFunc ::= '(' 'virtFunc' ':' GVReference ',' 'offset' ':' UInt64 ')'
-bool LLParser::ParseOptionalVTableFuncs(VTableFuncList &VTableFuncs) {
+bool LLParser::parseOptionalVTableFuncs(VTableFuncList &VTableFuncs) {
assert(Lex.getKind() == lltok::kw_vTableFuncs);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' in vTableFuncs") |
- ParseToken(lltok::lparen, "expected '(' in vTableFuncs"))
+ if (parseToken(lltok::colon, "expected ':' in vTableFuncs") |
+ parseToken(lltok::lparen, "expected '(' in vTableFuncs"))
return true;
IdToIndexMapType IdToIndexMap;
- // Parse each virtual function pair
+ // parse each virtual function pair
do {
ValueInfo VI;
- if (ParseToken(lltok::lparen, "expected '(' in vTableFunc") ||
- ParseToken(lltok::kw_virtFunc, "expected 'callee' in vTableFunc") ||
- ParseToken(lltok::colon, "expected ':'"))
+ if (parseToken(lltok::lparen, "expected '(' in vTableFunc") ||
+ parseToken(lltok::kw_virtFunc, "expected 'callee' in vTableFunc") ||
+ parseToken(lltok::colon, "expected ':'"))
return true;
LocTy Loc = Lex.getLoc();
unsigned GVId;
- if (ParseGVReference(VI, GVId))
+ if (parseGVReference(VI, GVId))
return true;
uint64_t Offset;
- if (ParseToken(lltok::comma, "expected comma") ||
- ParseToken(lltok::kw_offset, "expected offset") ||
- ParseToken(lltok::colon, "expected ':'") || ParseUInt64(Offset))
+ if (parseToken(lltok::comma, "expected comma") ||
+ parseToken(lltok::kw_offset, "expected offset") ||
+ parseToken(lltok::colon, "expected ':'") || parseUInt64(Offset))
return true;
// Keep track of the VTableFuncs array index needing a forward reference.
@@ -8635,55 +8895,53 @@ bool LLParser::ParseOptionalVTableFuncs(VTableFuncList &VTableFuncs) {
IdToIndexMap[GVId].push_back(std::make_pair(VTableFuncs.size(), Loc));
VTableFuncs.push_back({VI, Offset});
- if (ParseToken(lltok::rparen, "expected ')' in vTableFunc"))
+ if (parseToken(lltok::rparen, "expected ')' in vTableFunc"))
return true;
} while (EatIfPresent(lltok::comma));
// Now that the VTableFuncs vector is finalized, it is safe to save the
// locations of any forward GV references that need updating later.
for (auto I : IdToIndexMap) {
+ auto &Infos = ForwardRefValueInfos[I.first];
for (auto P : I.second) {
assert(VTableFuncs[P.first].FuncVI == EmptyVI &&
"Forward referenced ValueInfo expected to be empty");
- auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
- I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
- FwdRef.first->second.push_back(
- std::make_pair(&VTableFuncs[P.first].FuncVI, P.second));
+ Infos.emplace_back(&VTableFuncs[P.first].FuncVI, P.second);
}
}
- if (ParseToken(lltok::rparen, "expected ')' in vTableFuncs"))
+ if (parseToken(lltok::rparen, "expected ')' in vTableFuncs"))
return true;
return false;
}
/// ParamNo := 'param' ':' UInt64
-bool LLParser::ParseParamNo(uint64_t &ParamNo) {
- if (ParseToken(lltok::kw_param, "expected 'param' here") ||
- ParseToken(lltok::colon, "expected ':' here") || ParseUInt64(ParamNo))
+bool LLParser::parseParamNo(uint64_t &ParamNo) {
+ if (parseToken(lltok::kw_param, "expected 'param' here") ||
+ parseToken(lltok::colon, "expected ':' here") || parseUInt64(ParamNo))
return true;
return false;
}
/// ParamAccessOffset := 'offset' ':' '[' APSINTVAL ',' APSINTVAL ']'
-bool LLParser::ParseParamAccessOffset(ConstantRange &Range) {
+bool LLParser::parseParamAccessOffset(ConstantRange &Range) {
APSInt Lower;
APSInt Upper;
auto ParseAPSInt = [&](APSInt &Val) {
if (Lex.getKind() != lltok::APSInt)
- return TokError("expected integer");
+ return tokError("expected integer");
Val = Lex.getAPSIntVal();
Val = Val.extOrTrunc(FunctionSummary::ParamAccess::RangeWidth);
Val.setIsSigned(true);
Lex.Lex();
return false;
};
- if (ParseToken(lltok::kw_offset, "expected 'offset' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lsquare, "expected '[' here") || ParseAPSInt(Lower) ||
- ParseToken(lltok::comma, "expected ',' here") || ParseAPSInt(Upper) ||
- ParseToken(lltok::rsquare, "expected ']' here"))
+ if (parseToken(lltok::kw_offset, "expected 'offset' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lsquare, "expected '[' here") || ParseAPSInt(Lower) ||
+ parseToken(lltok::comma, "expected ',' here") || ParseAPSInt(Upper) ||
+ parseToken(lltok::rsquare, "expected ']' here"))
return true;
++Upper;
@@ -8697,26 +8955,29 @@ bool LLParser::ParseParamAccessOffset(ConstantRange &Range) {
/// ParamAccessCall
/// := '(' 'callee' ':' GVReference ',' ParamNo ',' ParamAccessOffset ')'
-bool LLParser::ParseParamAccessCall(FunctionSummary::ParamAccess::Call &Call) {
- if (ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_callee, "expected 'callee' here") ||
- ParseToken(lltok::colon, "expected ':' here"))
+bool LLParser::parseParamAccessCall(FunctionSummary::ParamAccess::Call &Call,
+ IdLocListType &IdLocList) {
+ if (parseToken(lltok::lparen, "expected '(' here") ||
+ parseToken(lltok::kw_callee, "expected 'callee' here") ||
+ parseToken(lltok::colon, "expected ':' here"))
return true;
unsigned GVId;
ValueInfo VI;
- if (ParseGVReference(VI, GVId))
+ LocTy Loc = Lex.getLoc();
+ if (parseGVReference(VI, GVId))
return true;
- Call.Callee = VI.getGUID();
+ Call.Callee = VI;
+ IdLocList.emplace_back(GVId, Loc);
- if (ParseToken(lltok::comma, "expected ',' here") ||
- ParseParamNo(Call.ParamNo) ||
- ParseToken(lltok::comma, "expected ',' here") ||
- ParseParamAccessOffset(Call.Offsets))
+ if (parseToken(lltok::comma, "expected ',' here") ||
+ parseParamNo(Call.ParamNo) ||
+ parseToken(lltok::comma, "expected ',' here") ||
+ parseParamAccessOffset(Call.Offsets))
return true;
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -8725,30 +8986,31 @@ bool LLParser::ParseParamAccessCall(FunctionSummary::ParamAccess::Call &Call) {
/// ParamAccess
/// := '(' ParamNo ',' ParamAccessOffset [',' OptionalParamAccessCalls]? ')'
/// OptionalParamAccessCalls := '(' Call [',' Call]* ')'
-bool LLParser::ParseParamAccess(FunctionSummary::ParamAccess &Param) {
- if (ParseToken(lltok::lparen, "expected '(' here") ||
- ParseParamNo(Param.ParamNo) ||
- ParseToken(lltok::comma, "expected ',' here") ||
- ParseParamAccessOffset(Param.Use))
+bool LLParser::parseParamAccess(FunctionSummary::ParamAccess &Param,
+ IdLocListType &IdLocList) {
+ if (parseToken(lltok::lparen, "expected '(' here") ||
+ parseParamNo(Param.ParamNo) ||
+ parseToken(lltok::comma, "expected ',' here") ||
+ parseParamAccessOffset(Param.Use))
return true;
if (EatIfPresent(lltok::comma)) {
- if (ParseToken(lltok::kw_calls, "expected 'calls' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::kw_calls, "expected 'calls' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
do {
FunctionSummary::ParamAccess::Call Call;
- if (ParseParamAccessCall(Call))
+ if (parseParamAccessCall(Call, IdLocList))
return true;
Param.Calls.push_back(Call);
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
}
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -8756,36 +9018,53 @@ bool LLParser::ParseParamAccess(FunctionSummary::ParamAccess &Param) {
/// OptionalParamAccesses
/// := 'params' ':' '(' ParamAccess [',' ParamAccess]* ')'
-bool LLParser::ParseOptionalParamAccesses(
+bool LLParser::parseOptionalParamAccesses(
std::vector<FunctionSummary::ParamAccess> &Params) {
assert(Lex.getKind() == lltok::kw_params);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
+ IdLocListType VContexts;
+ size_t CallsNum = 0;
do {
FunctionSummary::ParamAccess ParamAccess;
- if (ParseParamAccess(ParamAccess))
+ if (parseParamAccess(ParamAccess, VContexts))
return true;
- Params.push_back(ParamAccess);
+ CallsNum += ParamAccess.Calls.size();
+ assert(VContexts.size() == CallsNum);
+ Params.emplace_back(std::move(ParamAccess));
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
+ // Now that the Params is finalized, it is safe to save the locations
+ // of any forward GV references that need updating later.
+ IdLocListType::const_iterator ItContext = VContexts.begin();
+ for (auto &PA : Params) {
+ for (auto &C : PA.Calls) {
+ if (C.Callee.getRef() == FwdVIRef)
+ ForwardRefValueInfos[ItContext->first].emplace_back(&C.Callee,
+ ItContext->second);
+ ++ItContext;
+ }
+ }
+ assert(ItContext == VContexts.end());
+
return false;
}
/// OptionalRefs
/// := 'refs' ':' '(' GVReference [',' GVReference]* ')'
-bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
+bool LLParser::parseOptionalRefs(std::vector<ValueInfo> &Refs) {
assert(Lex.getKind() == lltok::kw_refs);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' in refs") ||
- ParseToken(lltok::lparen, "expected '(' in refs"))
+ if (parseToken(lltok::colon, "expected ':' in refs") ||
+ parseToken(lltok::lparen, "expected '(' in refs"))
return true;
struct ValueContext {
@@ -8794,11 +9073,11 @@ bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
LocTy Loc;
};
std::vector<ValueContext> VContexts;
- // Parse each ref edge
+ // parse each ref edge
do {
ValueContext VC;
VC.Loc = Lex.getLoc();
- if (ParseGVReference(VC.VI, VC.GVId))
+ if (parseGVReference(VC.VI, VC.GVId))
return true;
VContexts.push_back(VC);
} while (EatIfPresent(lltok::comma));
@@ -8823,16 +9102,15 @@ bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
// Now that the Refs vector is finalized, it is safe to save the locations
// of any forward GV references that need updating later.
for (auto I : IdToIndexMap) {
+ auto &Infos = ForwardRefValueInfos[I.first];
for (auto P : I.second) {
assert(Refs[P.first].getRef() == FwdVIRef &&
"Forward referenced ValueInfo expected to be empty");
- auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
- I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
- FwdRef.first->second.push_back(std::make_pair(&Refs[P.first], P.second));
+ Infos.emplace_back(&Refs[P.first], P.second);
}
}
- if (ParseToken(lltok::rparen, "expected ')' in refs"))
+ if (parseToken(lltok::rparen, "expected ')' in refs"))
return true;
return false;
@@ -8842,47 +9120,47 @@ bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
/// := 'typeidinfo' ':' '(' [',' TypeTests]? [',' TypeTestAssumeVCalls]?
/// [',' TypeCheckedLoadVCalls]? [',' TypeTestAssumeConstVCalls]?
/// [',' TypeCheckedLoadConstVCalls]? ')'
-bool LLParser::ParseOptionalTypeIdInfo(
+bool LLParser::parseOptionalTypeIdInfo(
FunctionSummary::TypeIdInfo &TypeIdInfo) {
assert(Lex.getKind() == lltok::kw_typeIdInfo);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' in typeIdInfo"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' in typeIdInfo"))
return true;
do {
switch (Lex.getKind()) {
case lltok::kw_typeTests:
- if (ParseTypeTests(TypeIdInfo.TypeTests))
+ if (parseTypeTests(TypeIdInfo.TypeTests))
return true;
break;
case lltok::kw_typeTestAssumeVCalls:
- if (ParseVFuncIdList(lltok::kw_typeTestAssumeVCalls,
+ if (parseVFuncIdList(lltok::kw_typeTestAssumeVCalls,
TypeIdInfo.TypeTestAssumeVCalls))
return true;
break;
case lltok::kw_typeCheckedLoadVCalls:
- if (ParseVFuncIdList(lltok::kw_typeCheckedLoadVCalls,
+ if (parseVFuncIdList(lltok::kw_typeCheckedLoadVCalls,
TypeIdInfo.TypeCheckedLoadVCalls))
return true;
break;
case lltok::kw_typeTestAssumeConstVCalls:
- if (ParseConstVCallList(lltok::kw_typeTestAssumeConstVCalls,
+ if (parseConstVCallList(lltok::kw_typeTestAssumeConstVCalls,
TypeIdInfo.TypeTestAssumeConstVCalls))
return true;
break;
case lltok::kw_typeCheckedLoadConstVCalls:
- if (ParseConstVCallList(lltok::kw_typeCheckedLoadConstVCalls,
+ if (parseConstVCallList(lltok::kw_typeCheckedLoadConstVCalls,
TypeIdInfo.TypeCheckedLoadConstVCalls))
return true;
break;
default:
- return Error(Lex.getLoc(), "invalid typeIdInfo list type");
+ return error(Lex.getLoc(), "invalid typeIdInfo list type");
}
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' in typeIdInfo"))
+ if (parseToken(lltok::rparen, "expected ')' in typeIdInfo"))
return true;
return false;
@@ -8891,12 +9169,12 @@ bool LLParser::ParseOptionalTypeIdInfo(
/// TypeTests
/// ::= 'typeTests' ':' '(' (SummaryID | UInt64)
/// [',' (SummaryID | UInt64)]* ')'
-bool LLParser::ParseTypeTests(std::vector<GlobalValue::GUID> &TypeTests) {
+bool LLParser::parseTypeTests(std::vector<GlobalValue::GUID> &TypeTests) {
assert(Lex.getKind() == lltok::kw_typeTests);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' in typeIdInfo"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' in typeIdInfo"))
return true;
IdToIndexMapType IdToIndexMap;
@@ -8910,7 +9188,7 @@ bool LLParser::ParseTypeTests(std::vector<GlobalValue::GUID> &TypeTests) {
// can only do so once the std::vector is finalized.
IdToIndexMap[ID].push_back(std::make_pair(TypeTests.size(), Loc));
Lex.Lex();
- } else if (ParseUInt64(GUID))
+ } else if (parseUInt64(GUID))
return true;
TypeTests.push_back(GUID);
} while (EatIfPresent(lltok::comma));
@@ -8918,17 +9196,15 @@ bool LLParser::ParseTypeTests(std::vector<GlobalValue::GUID> &TypeTests) {
// Now that the TypeTests vector is finalized, it is safe to save the
// locations of any forward GV references that need updating later.
for (auto I : IdToIndexMap) {
+ auto &Ids = ForwardRefTypeIds[I.first];
for (auto P : I.second) {
assert(TypeTests[P.first] == 0 &&
"Forward referenced type id GUID expected to be 0");
- auto FwdRef = ForwardRefTypeIds.insert(std::make_pair(
- I.first, std::vector<std::pair<GlobalValue::GUID *, LocTy>>()));
- FwdRef.first->second.push_back(
- std::make_pair(&TypeTests[P.first], P.second));
+ Ids.emplace_back(&TypeTests[P.first], P.second);
}
}
- if (ParseToken(lltok::rparen, "expected ')' in typeIdInfo"))
+ if (parseToken(lltok::rparen, "expected ')' in typeIdInfo"))
return true;
return false;
@@ -8936,36 +9212,34 @@ bool LLParser::ParseTypeTests(std::vector<GlobalValue::GUID> &TypeTests) {
/// VFuncIdList
/// ::= Kind ':' '(' VFuncId [',' VFuncId]* ')'
-bool LLParser::ParseVFuncIdList(
+bool LLParser::parseVFuncIdList(
lltok::Kind Kind, std::vector<FunctionSummary::VFuncId> &VFuncIdList) {
assert(Lex.getKind() == Kind);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
IdToIndexMapType IdToIndexMap;
do {
FunctionSummary::VFuncId VFuncId;
- if (ParseVFuncId(VFuncId, IdToIndexMap, VFuncIdList.size()))
+ if (parseVFuncId(VFuncId, IdToIndexMap, VFuncIdList.size()))
return true;
VFuncIdList.push_back(VFuncId);
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
// Now that the VFuncIdList vector is finalized, it is safe to save the
// locations of any forward GV references that need updating later.
for (auto I : IdToIndexMap) {
+ auto &Ids = ForwardRefTypeIds[I.first];
for (auto P : I.second) {
assert(VFuncIdList[P.first].GUID == 0 &&
"Forward referenced type id GUID expected to be 0");
- auto FwdRef = ForwardRefTypeIds.insert(std::make_pair(
- I.first, std::vector<std::pair<GlobalValue::GUID *, LocTy>>()));
- FwdRef.first->second.push_back(
- std::make_pair(&VFuncIdList[P.first].GUID, P.second));
+ Ids.emplace_back(&VFuncIdList[P.first].GUID, P.second);
}
}
@@ -8974,37 +9248,35 @@ bool LLParser::ParseVFuncIdList(
/// ConstVCallList
/// ::= Kind ':' '(' ConstVCall [',' ConstVCall]* ')'
-bool LLParser::ParseConstVCallList(
+bool LLParser::parseConstVCallList(
lltok::Kind Kind,
std::vector<FunctionSummary::ConstVCall> &ConstVCallList) {
assert(Lex.getKind() == Kind);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
IdToIndexMapType IdToIndexMap;
do {
FunctionSummary::ConstVCall ConstVCall;
- if (ParseConstVCall(ConstVCall, IdToIndexMap, ConstVCallList.size()))
+ if (parseConstVCall(ConstVCall, IdToIndexMap, ConstVCallList.size()))
return true;
ConstVCallList.push_back(ConstVCall);
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
// Now that the ConstVCallList vector is finalized, it is safe to save the
// locations of any forward GV references that need updating later.
for (auto I : IdToIndexMap) {
+ auto &Ids = ForwardRefTypeIds[I.first];
for (auto P : I.second) {
assert(ConstVCallList[P.first].VFunc.GUID == 0 &&
"Forward referenced type id GUID expected to be 0");
- auto FwdRef = ForwardRefTypeIds.insert(std::make_pair(
- I.first, std::vector<std::pair<GlobalValue::GUID *, LocTy>>()));
- FwdRef.first->second.push_back(
- std::make_pair(&ConstVCallList[P.first].VFunc.GUID, P.second));
+ Ids.emplace_back(&ConstVCallList[P.first].VFunc.GUID, P.second);
}
}
@@ -9013,17 +9285,17 @@ bool LLParser::ParseConstVCallList(
/// ConstVCall
/// ::= '(' VFuncId ',' Args ')'
-bool LLParser::ParseConstVCall(FunctionSummary::ConstVCall &ConstVCall,
+bool LLParser::parseConstVCall(FunctionSummary::ConstVCall &ConstVCall,
IdToIndexMapType &IdToIndexMap, unsigned Index) {
- if (ParseToken(lltok::lparen, "expected '(' here") ||
- ParseVFuncId(ConstVCall.VFunc, IdToIndexMap, Index))
+ if (parseToken(lltok::lparen, "expected '(' here") ||
+ parseVFuncId(ConstVCall.VFunc, IdToIndexMap, Index))
return true;
if (EatIfPresent(lltok::comma))
- if (ParseArgs(ConstVCall.Args))
+ if (parseArgs(ConstVCall.Args))
return true;
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -9032,13 +9304,13 @@ bool LLParser::ParseConstVCall(FunctionSummary::ConstVCall &ConstVCall,
/// VFuncId
/// ::= 'vFuncId' ':' '(' (SummaryID | 'guid' ':' UInt64) ','
/// 'offset' ':' UInt64 ')'
-bool LLParser::ParseVFuncId(FunctionSummary::VFuncId &VFuncId,
+bool LLParser::parseVFuncId(FunctionSummary::VFuncId &VFuncId,
IdToIndexMapType &IdToIndexMap, unsigned Index) {
assert(Lex.getKind() == lltok::kw_vFuncId);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
if (Lex.getKind() == lltok::SummaryID) {
@@ -9050,16 +9322,16 @@ bool LLParser::ParseVFuncId(FunctionSummary::VFuncId &VFuncId,
// can only do so once the caller's std::vector is finalized.
IdToIndexMap[ID].push_back(std::make_pair(Index, Loc));
Lex.Lex();
- } else if (ParseToken(lltok::kw_guid, "expected 'guid' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseUInt64(VFuncId.GUID))
+ } else if (parseToken(lltok::kw_guid, "expected 'guid' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseUInt64(VFuncId.GUID))
return true;
- if (ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_offset, "expected 'offset' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseUInt64(VFuncId.Offset) ||
- ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::comma, "expected ',' here") ||
+ parseToken(lltok::kw_offset, "expected 'offset' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseUInt64(VFuncId.Offset) ||
+ parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -9069,12 +9341,12 @@ bool LLParser::ParseVFuncId(FunctionSummary::VFuncId &VFuncId,
/// ::= 'flags' ':' '(' 'linkage' ':' OptionalLinkageAux ','
/// 'notEligibleToImport' ':' Flag ',' 'live' ':' Flag ','
/// 'dsoLocal' ':' Flag ',' 'canAutoHide' ':' Flag ')'
-bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
+bool LLParser::parseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
assert(Lex.getKind() == lltok::kw_flags);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
do {
@@ -9082,7 +9354,7 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
switch (Lex.getKind()) {
case lltok::kw_linkage:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'"))
+ if (parseToken(lltok::colon, "expected ':'"))
return true;
bool HasLinkage;
GVFlags.Linkage = parseOptionalLinkageAux(Lex.getKind(), HasLinkage);
@@ -9091,34 +9363,34 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
break;
case lltok::kw_notEligibleToImport:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Flag))
return true;
GVFlags.NotEligibleToImport = Flag;
break;
case lltok::kw_live:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Flag))
return true;
GVFlags.Live = Flag;
break;
case lltok::kw_dsoLocal:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Flag))
return true;
GVFlags.DSOLocal = Flag;
break;
case lltok::kw_canAutoHide:
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Flag))
return true;
GVFlags.CanAutoHide = Flag;
break;
default:
- return Error(Lex.getLoc(), "expected gv flag type");
+ return error(Lex.getLoc(), "expected gv flag type");
}
} while (EatIfPresent(lltok::comma));
- if (ParseToken(lltok::rparen, "expected ')' here"))
+ if (parseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -9128,19 +9400,19 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
/// ::= 'varFlags' ':' '(' 'readonly' ':' Flag
/// ',' 'writeonly' ':' Flag
/// ',' 'constant' ':' Flag ')'
-bool LLParser::ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
+bool LLParser::parseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
assert(Lex.getKind() == lltok::kw_varFlags);
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here"))
+ if (parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::lparen, "expected '(' here"))
return true;
auto ParseRest = [this](unsigned int &Val) {
Lex.Lex();
- if (ParseToken(lltok::colon, "expected ':'"))
+ if (parseToken(lltok::colon, "expected ':'"))
return true;
- return ParseFlag(Val);
+ return parseFlag(Val);
};
do {
@@ -9167,19 +9439,19 @@ bool LLParser::ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
GVarFlags.VCallVisibility = Flag;
break;
default:
- return Error(Lex.getLoc(), "expected gvar flag type");
+ return error(Lex.getLoc(), "expected gvar flag type");
}
} while (EatIfPresent(lltok::comma));
- return ParseToken(lltok::rparen, "expected ')' here");
+ return parseToken(lltok::rparen, "expected ')' here");
}
/// ModuleReference
/// ::= 'module' ':' UInt
-bool LLParser::ParseModuleReference(StringRef &ModulePath) {
- // Parse module id.
- if (ParseToken(lltok::kw_module, "expected 'module' here") ||
- ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::SummaryID, "expected module ID"))
+bool LLParser::parseModuleReference(StringRef &ModulePath) {
+ // parse module id.
+ if (parseToken(lltok::kw_module, "expected 'module' here") ||
+ parseToken(lltok::colon, "expected ':' here") ||
+ parseToken(lltok::SummaryID, "expected module ID"))
return true;
unsigned ModuleID = Lex.getUIntVal();
@@ -9192,11 +9464,11 @@ bool LLParser::ParseModuleReference(StringRef &ModulePath) {
/// GVReference
/// ::= SummaryID
-bool LLParser::ParseGVReference(ValueInfo &VI, unsigned &GVId) {
+bool LLParser::parseGVReference(ValueInfo &VI, unsigned &GVId) {
bool WriteOnly = false, ReadOnly = EatIfPresent(lltok::kw_readonly);
if (!ReadOnly)
WriteOnly = EatIfPresent(lltok::kw_writeonly);
- if (ParseToken(lltok::SummaryID, "expected GV ID"))
+ if (parseToken(lltok::SummaryID, "expected GV ID"))
return true;
GVId = Lex.getUIntVal();
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.h b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.h
index ebd8655dc35e..aa79823ce986 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.h
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.h
@@ -46,7 +46,7 @@ namespace llvm {
t_LocalID, t_GlobalID, // ID in UIntVal.
t_LocalName, t_GlobalName, // Name in StrVal.
t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal.
- t_Null, t_Undef, t_Zero, t_None, // No value.
+ t_Null, t_Undef, t_Zero, t_None, t_Poison, // No value.
t_EmptyArray, // No value: []
t_Constant, // Value in ConstantVal.
t_InlineAsm, // Value in FTy/StrVal/StrVal2/UIntVal.
@@ -166,8 +166,8 @@ namespace llvm {
: Context(Context), Lex(F, SM, Err, Context), M(M), Index(Index),
Slots(Slots), BlockAddressPFS(nullptr) {}
bool Run(
- bool UpgradeDebugInfo,
- DataLayoutCallbackTy DataLayoutCallback = [](Module *) {});
+ bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback =
+ [](StringRef) { return None; });
bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots);
@@ -177,31 +177,26 @@ namespace llvm {
LLVMContext &getContext() { return Context; }
private:
-
- bool Error(LocTy L, const Twine &Msg) const {
- return Lex.Error(L, Msg);
- }
- bool TokError(const Twine &Msg) const {
- return Error(Lex.getLoc(), Msg);
- }
+ bool error(LocTy L, const Twine &Msg) const { return Lex.Error(L, Msg); }
+ bool tokError(const Twine &Msg) const { return error(Lex.getLoc(), Msg); }
/// Restore the internal name and slot mappings using the mappings that
/// were created at an earlier parsing stage.
void restoreParsingState(const SlotMapping *Slots);
- /// GetGlobalVal - Get a value with the specified name or ID, creating a
+ /// getGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
- GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc,
+ GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc,
bool IsCall);
- GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
+ GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
/// Get a Comdat with the specified name, creating a forward reference
/// record if needed.
Comdat *getComdat(const std::string &Name, LocTy Loc);
// Helper Routines.
- bool ParseToken(lltok::Kind T, const char *ErrMsg);
+ bool parseToken(lltok::Kind T, const char *ErrMsg);
bool EatIfPresent(lltok::Kind T) {
if (Lex.getKind() != T) return false;
Lex.Lex();
@@ -228,7 +223,7 @@ namespace llvm {
return FMF;
}
- bool ParseOptionalToken(lltok::Kind T, bool &Present,
+ bool parseOptionalToken(lltok::Kind T, bool &Present,
LocTy *Loc = nullptr) {
if (Lex.getKind() != T) {
Present = false;
@@ -240,80 +235,81 @@ namespace llvm {
}
return false;
}
- bool ParseStringConstant(std::string &Result);
- bool ParseUInt32(unsigned &Val);
- bool ParseUInt32(unsigned &Val, LocTy &Loc) {
+ bool parseStringConstant(std::string &Result);
+ bool parseUInt32(unsigned &Val);
+ bool parseUInt32(unsigned &Val, LocTy &Loc) {
Loc = Lex.getLoc();
- return ParseUInt32(Val);
+ return parseUInt32(Val);
}
- bool ParseUInt64(uint64_t &Val);
- bool ParseUInt64(uint64_t &Val, LocTy &Loc) {
+ bool parseUInt64(uint64_t &Val);
+ bool parseUInt64(uint64_t &Val, LocTy &Loc) {
Loc = Lex.getLoc();
- return ParseUInt64(Val);
+ return parseUInt64(Val);
}
- bool ParseFlag(unsigned &Val);
+ bool parseFlag(unsigned &Val);
- bool ParseStringAttribute(AttrBuilder &B);
+ bool parseStringAttribute(AttrBuilder &B);
- bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
- bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
- bool ParseOptionalUnnamedAddr(GlobalVariable::UnnamedAddr &UnnamedAddr);
- bool ParseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS = 0);
- bool ParseOptionalProgramAddrSpace(unsigned &AddrSpace) {
- return ParseOptionalAddrSpace(
+ bool parseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
+ bool parseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
+ bool parseOptionalUnnamedAddr(GlobalVariable::UnnamedAddr &UnnamedAddr);
+ bool parseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS = 0);
+ bool parseOptionalProgramAddrSpace(unsigned &AddrSpace) {
+ return parseOptionalAddrSpace(
AddrSpace, M->getDataLayout().getProgramAddressSpace());
};
- bool ParseOptionalParamAttrs(AttrBuilder &B);
- bool ParseOptionalReturnAttrs(AttrBuilder &B);
- bool ParseOptionalLinkage(unsigned &Res, bool &HasLinkage,
+ bool parseOptionalParamAttrs(AttrBuilder &B);
+ bool parseOptionalReturnAttrs(AttrBuilder &B);
+ bool parseOptionalLinkage(unsigned &Res, bool &HasLinkage,
unsigned &Visibility, unsigned &DLLStorageClass,
bool &DSOLocal);
- void ParseOptionalDSOLocal(bool &DSOLocal);
- void ParseOptionalVisibility(unsigned &Res);
- void ParseOptionalDLLStorageClass(unsigned &Res);
- bool ParseOptionalCallingConv(unsigned &CC);
- bool ParseOptionalAlignment(MaybeAlign &Alignment,
+ void parseOptionalDSOLocal(bool &DSOLocal);
+ void parseOptionalVisibility(unsigned &Res);
+ void parseOptionalDLLStorageClass(unsigned &Res);
+ bool parseOptionalCallingConv(unsigned &CC);
+ bool parseOptionalAlignment(MaybeAlign &Alignment,
bool AllowParens = false);
- bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
- bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID,
+ bool parseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
+ bool parseScopeAndOrdering(bool IsAtomic, SyncScope::ID &SSID,
AtomicOrdering &Ordering);
- bool ParseScope(SyncScope::ID &SSID);
- bool ParseOrdering(AtomicOrdering &Ordering);
- bool ParseOptionalStackAlignment(unsigned &Alignment);
- bool ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma);
- bool ParseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc,
+ bool parseScope(SyncScope::ID &SSID);
+ bool parseOrdering(AtomicOrdering &Ordering);
+ bool parseOptionalStackAlignment(unsigned &Alignment);
+ bool parseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma);
+ bool parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc,
bool &AteExtraComma);
- bool ParseOptionalCommaInAlloca(bool &IsInAlloca);
+ bool parseOptionalCommaInAlloca(bool &IsInAlloca);
bool parseAllocSizeArguments(unsigned &BaseSizeArg,
Optional<unsigned> &HowManyArg);
- bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,
+ bool parseIndexList(SmallVectorImpl<unsigned> &Indices,
bool &AteExtraComma);
- bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
+ bool parseIndexList(SmallVectorImpl<unsigned> &Indices) {
bool AteExtraComma;
- if (ParseIndexList(Indices, AteExtraComma)) return true;
+ if (parseIndexList(Indices, AteExtraComma))
+ return true;
if (AteExtraComma)
- return TokError("expected index");
+ return tokError("expected index");
return false;
}
// Top-Level Entities
- bool ParseTopLevelEntities();
- bool ValidateEndOfModule(bool UpgradeDebugInfo);
- bool ValidateEndOfIndex();
- bool ParseTargetDefinitions();
- bool ParseTargetDefinition();
- bool ParseModuleAsm();
- bool ParseSourceFileName();
- bool ParseDepLibs(); // FIXME: Remove in 4.0.
- bool ParseUnnamedType();
- bool ParseNamedType();
- bool ParseDeclare();
- bool ParseDefine();
-
- bool ParseGlobalType(bool &IsConstant);
- bool ParseUnnamedGlobal();
- bool ParseNamedGlobal();
- bool ParseGlobal(const std::string &Name, LocTy NameLoc, unsigned Linkage,
+ bool parseTopLevelEntities();
+ bool validateEndOfModule(bool UpgradeDebugInfo);
+ bool validateEndOfIndex();
+ bool parseTargetDefinitions();
+ bool parseTargetDefinition();
+ bool parseModuleAsm();
+ bool parseSourceFileName();
+ bool parseDepLibs(); // FIXME: Remove in 4.0.
+ bool parseUnnamedType();
+ bool parseNamedType();
+ bool parseDeclare();
+ bool parseDefine();
+
+ bool parseGlobalType(bool &IsConstant);
+ bool parseUnnamedGlobal();
+ bool parseNamedGlobal();
+ bool parseGlobal(const std::string &Name, LocTy NameLoc, unsigned Linkage,
bool HasLinkage, unsigned Visibility,
unsigned DLLStorageClass, bool DSOLocal,
GlobalVariable::ThreadLocalMode TLM,
@@ -324,92 +320,96 @@ namespace llvm {
GlobalVariable::ThreadLocalMode TLM,
GlobalVariable::UnnamedAddr UnnamedAddr);
bool parseComdat();
- bool ParseStandaloneMetadata();
- bool ParseNamedMetadata();
- bool ParseMDString(MDString *&Result);
- bool ParseMDNodeID(MDNode *&Result);
- bool ParseUnnamedAttrGrp();
- bool ParseFnAttributeValuePairs(AttrBuilder &B,
+ bool parseStandaloneMetadata();
+ bool parseNamedMetadata();
+ bool parseMDString(MDString *&Result);
+ bool parseMDNodeID(MDNode *&Result);
+ bool parseUnnamedAttrGrp();
+ bool parseFnAttributeValuePairs(AttrBuilder &B,
std::vector<unsigned> &FwdRefAttrGrps,
bool inAttrGrp, LocTy &BuiltinLoc);
- bool ParseByValWithOptionalType(Type *&Result);
- bool ParsePreallocated(Type *&Result);
+ bool parseRequiredTypeAttr(Type *&Result, lltok::Kind AttrName);
+ bool parsePreallocated(Type *&Result);
+ bool parseByRef(Type *&Result);
// Module Summary Index Parsing.
- bool SkipModuleSummaryEntry();
- bool ParseSummaryEntry();
- bool ParseModuleEntry(unsigned ID);
- bool ParseModuleReference(StringRef &ModulePath);
- bool ParseGVReference(ValueInfo &VI, unsigned &GVId);
- bool ParseSummaryIndexFlags();
- bool ParseBlockCount();
- bool ParseGVEntry(unsigned ID);
- bool ParseFunctionSummary(std::string Name, GlobalValue::GUID, unsigned ID);
- bool ParseVariableSummary(std::string Name, GlobalValue::GUID, unsigned ID);
- bool ParseAliasSummary(std::string Name, GlobalValue::GUID, unsigned ID);
- bool ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags);
- bool ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags);
- bool ParseOptionalFFlags(FunctionSummary::FFlags &FFlags);
- bool ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls);
- bool ParseHotness(CalleeInfo::HotnessType &Hotness);
- bool ParseOptionalTypeIdInfo(FunctionSummary::TypeIdInfo &TypeIdInfo);
- bool ParseTypeTests(std::vector<GlobalValue::GUID> &TypeTests);
- bool ParseVFuncIdList(lltok::Kind Kind,
+ bool skipModuleSummaryEntry();
+ bool parseSummaryEntry();
+ bool parseModuleEntry(unsigned ID);
+ bool parseModuleReference(StringRef &ModulePath);
+ bool parseGVReference(ValueInfo &VI, unsigned &GVId);
+ bool parseSummaryIndexFlags();
+ bool parseBlockCount();
+ bool parseGVEntry(unsigned ID);
+ bool parseFunctionSummary(std::string Name, GlobalValue::GUID, unsigned ID);
+ bool parseVariableSummary(std::string Name, GlobalValue::GUID, unsigned ID);
+ bool parseAliasSummary(std::string Name, GlobalValue::GUID, unsigned ID);
+ bool parseGVFlags(GlobalValueSummary::GVFlags &GVFlags);
+ bool parseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags);
+ bool parseOptionalFFlags(FunctionSummary::FFlags &FFlags);
+ bool parseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls);
+ bool parseHotness(CalleeInfo::HotnessType &Hotness);
+ bool parseOptionalTypeIdInfo(FunctionSummary::TypeIdInfo &TypeIdInfo);
+ bool parseTypeTests(std::vector<GlobalValue::GUID> &TypeTests);
+ bool parseVFuncIdList(lltok::Kind Kind,
std::vector<FunctionSummary::VFuncId> &VFuncIdList);
- bool ParseConstVCallList(
+ bool parseConstVCallList(
lltok::Kind Kind,
std::vector<FunctionSummary::ConstVCall> &ConstVCallList);
using IdToIndexMapType =
std::map<unsigned, std::vector<std::pair<unsigned, LocTy>>>;
- bool ParseConstVCall(FunctionSummary::ConstVCall &ConstVCall,
+ bool parseConstVCall(FunctionSummary::ConstVCall &ConstVCall,
IdToIndexMapType &IdToIndexMap, unsigned Index);
- bool ParseVFuncId(FunctionSummary::VFuncId &VFuncId,
+ bool parseVFuncId(FunctionSummary::VFuncId &VFuncId,
IdToIndexMapType &IdToIndexMap, unsigned Index);
- bool ParseOptionalVTableFuncs(VTableFuncList &VTableFuncs);
- bool ParseOptionalParamAccesses(
+ bool parseOptionalVTableFuncs(VTableFuncList &VTableFuncs);
+ bool parseOptionalParamAccesses(
std::vector<FunctionSummary::ParamAccess> &Params);
- bool ParseParamNo(uint64_t &ParamNo);
- bool ParseParamAccess(FunctionSummary::ParamAccess &Param);
- bool ParseParamAccessCall(FunctionSummary::ParamAccess::Call &Call);
- bool ParseParamAccessOffset(ConstantRange &range);
- bool ParseOptionalRefs(std::vector<ValueInfo> &Refs);
- bool ParseTypeIdEntry(unsigned ID);
- bool ParseTypeIdSummary(TypeIdSummary &TIS);
- bool ParseTypeIdCompatibleVtableEntry(unsigned ID);
- bool ParseTypeTestResolution(TypeTestResolution &TTRes);
- bool ParseOptionalWpdResolutions(
+ bool parseParamNo(uint64_t &ParamNo);
+ using IdLocListType = std::vector<std::pair<unsigned, LocTy>>;
+ bool parseParamAccess(FunctionSummary::ParamAccess &Param,
+ IdLocListType &IdLocList);
+ bool parseParamAccessCall(FunctionSummary::ParamAccess::Call &Call,
+ IdLocListType &IdLocList);
+ bool parseParamAccessOffset(ConstantRange &Range);
+ bool parseOptionalRefs(std::vector<ValueInfo> &Refs);
+ bool parseTypeIdEntry(unsigned ID);
+ bool parseTypeIdSummary(TypeIdSummary &TIS);
+ bool parseTypeIdCompatibleVtableEntry(unsigned ID);
+ bool parseTypeTestResolution(TypeTestResolution &TTRes);
+ bool parseOptionalWpdResolutions(
std::map<uint64_t, WholeProgramDevirtResolution> &WPDResMap);
- bool ParseWpdRes(WholeProgramDevirtResolution &WPDRes);
- bool ParseOptionalResByArg(
+ bool parseWpdRes(WholeProgramDevirtResolution &WPDRes);
+ bool parseOptionalResByArg(
std::map<std::vector<uint64_t>, WholeProgramDevirtResolution::ByArg>
&ResByArg);
- bool ParseArgs(std::vector<uint64_t> &Args);
- void AddGlobalValueToIndex(std::string Name, GlobalValue::GUID,
+ bool parseArgs(std::vector<uint64_t> &Args);
+ void addGlobalValueToIndex(std::string Name, GlobalValue::GUID,
GlobalValue::LinkageTypes Linkage, unsigned ID,
std::unique_ptr<GlobalValueSummary> Summary);
// Type Parsing.
- bool ParseType(Type *&Result, const Twine &Msg, bool AllowVoid = false);
- bool ParseType(Type *&Result, bool AllowVoid = false) {
- return ParseType(Result, "expected type", AllowVoid);
+ bool parseType(Type *&Result, const Twine &Msg, bool AllowVoid = false);
+ bool parseType(Type *&Result, bool AllowVoid = false) {
+ return parseType(Result, "expected type", AllowVoid);
}
- bool ParseType(Type *&Result, const Twine &Msg, LocTy &Loc,
+ bool parseType(Type *&Result, const Twine &Msg, LocTy &Loc,
bool AllowVoid = false) {
Loc = Lex.getLoc();
- return ParseType(Result, Msg, AllowVoid);
+ return parseType(Result, Msg, AllowVoid);
}
- bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
+ bool parseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
Loc = Lex.getLoc();
- return ParseType(Result, AllowVoid);
+ return parseType(Result, AllowVoid);
}
- bool ParseAnonStructType(Type *&Result, bool Packed);
- bool ParseStructBody(SmallVectorImpl<Type*> &Body);
- bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
- std::pair<Type*, LocTy> &Entry,
+ bool parseAnonStructType(Type *&Result, bool Packed);
+ bool parseStructBody(SmallVectorImpl<Type *> &Body);
+ bool parseStructDefinition(SMLoc TypeLoc, StringRef Name,
+ std::pair<Type *, LocTy> &Entry,
Type *&ResultTy);
- bool ParseArrayVectorType(Type *&Result, bool isVector);
- bool ParseFunctionType(Type *&Result);
+ bool parseArrayVectorType(Type *&Result, bool IsVector);
+ bool parseFunctionType(Type *&Result);
// Function Semantic Analysis.
class PerFunctionState {
@@ -428,67 +428,65 @@ namespace llvm {
Function &getFunction() const { return F; }
- bool FinishFunction();
+ bool finishFunction();
/// GetVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
- Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc, bool IsCall);
- Value *GetVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
+ Value *getVal(const std::string &Name, Type *Ty, LocTy Loc, bool IsCall);
+ Value *getVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
- /// SetInstName - After an instruction is parsed and inserted into its
+ /// setInstName - After an instruction is parsed and inserted into its
/// basic block, this installs its name.
- bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
+ bool setInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
Instruction *Inst);
/// GetBB - Get a basic block with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// is not a BasicBlock.
- BasicBlock *GetBB(const std::string &Name, LocTy Loc);
- BasicBlock *GetBB(unsigned ID, LocTy Loc);
+ BasicBlock *getBB(const std::string &Name, LocTy Loc);
+ BasicBlock *getBB(unsigned ID, LocTy Loc);
/// DefineBB - Define the specified basic block, which is either named or
/// unnamed. If there is an error, this returns null otherwise it returns
/// the block being defined.
- BasicBlock *DefineBB(const std::string &Name, int NameID, LocTy Loc);
+ BasicBlock *defineBB(const std::string &Name, int NameID, LocTy Loc);
bool resolveForwardRefBlockAddresses();
};
- bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
+ bool convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
PerFunctionState *PFS, bool IsCall);
Value *checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
Value *Val, bool IsCall);
bool parseConstantValue(Type *Ty, Constant *&C);
- bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
- bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
- return ParseValue(Ty, V, &PFS);
+ bool parseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
+ bool parseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
+ return parseValue(Ty, V, &PFS);
}
- bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
- PerFunctionState &PFS) {
+ bool parseValue(Type *Ty, Value *&V, LocTy &Loc, PerFunctionState &PFS) {
Loc = Lex.getLoc();
- return ParseValue(Ty, V, &PFS);
+ return parseValue(Ty, V, &PFS);
}
- bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
- bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
- return ParseTypeAndValue(V, &PFS);
+ bool parseTypeAndValue(Value *&V, PerFunctionState *PFS);
+ bool parseTypeAndValue(Value *&V, PerFunctionState &PFS) {
+ return parseTypeAndValue(V, &PFS);
}
- bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
+ bool parseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
Loc = Lex.getLoc();
- return ParseTypeAndValue(V, PFS);
+ return parseTypeAndValue(V, PFS);
}
- bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+ bool parseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
PerFunctionState &PFS);
- bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
+ bool parseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
LocTy Loc;
- return ParseTypeAndBasicBlock(BB, Loc, PFS);
+ return parseTypeAndBasicBlock(BB, Loc, PFS);
}
-
struct ParamInfo {
LocTy Loc;
Value *V;
@@ -496,49 +494,47 @@ namespace llvm {
ParamInfo(LocTy loc, Value *v, AttributeSet attrs)
: Loc(loc), V(v), Attrs(attrs) {}
};
- bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
- PerFunctionState &PFS,
- bool IsMustTailCall = false,
+ bool parseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+ PerFunctionState &PFS, bool IsMustTailCall = false,
bool InVarArgsFunc = false);
bool
- ParseOptionalOperandBundles(SmallVectorImpl<OperandBundleDef> &BundleList,
+ parseOptionalOperandBundles(SmallVectorImpl<OperandBundleDef> &BundleList,
PerFunctionState &PFS);
- bool ParseExceptionArgs(SmallVectorImpl<Value *> &Args,
+ bool parseExceptionArgs(SmallVectorImpl<Value *> &Args,
PerFunctionState &PFS);
// Constant Parsing.
- bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
- bool ParseGlobalValue(Type *Ty, Constant *&C);
- bool ParseGlobalTypeAndValue(Constant *&V);
- bool ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts,
+ bool parseValID(ValID &ID, PerFunctionState *PFS = nullptr);
+ bool parseGlobalValue(Type *Ty, Constant *&C);
+ bool parseGlobalTypeAndValue(Constant *&V);
+ bool parseGlobalValueVector(SmallVectorImpl<Constant *> &Elts,
Optional<unsigned> *InRangeOp = nullptr);
bool parseOptionalComdat(StringRef GlobalName, Comdat *&C);
- bool ParseMetadataAsValue(Value *&V, PerFunctionState &PFS);
- bool ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
+ bool parseMetadataAsValue(Value *&V, PerFunctionState &PFS);
+ bool parseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
PerFunctionState *PFS);
- bool ParseMetadata(Metadata *&MD, PerFunctionState *PFS);
- bool ParseMDTuple(MDNode *&MD, bool IsDistinct = false);
- bool ParseMDNode(MDNode *&N);
- bool ParseMDNodeTail(MDNode *&N);
- bool ParseMDNodeVector(SmallVectorImpl<Metadata *> &Elts);
- bool ParseMetadataAttachment(unsigned &Kind, MDNode *&MD);
- bool ParseInstructionMetadata(Instruction &Inst);
- bool ParseGlobalObjectMetadataAttachment(GlobalObject &GO);
- bool ParseOptionalFunctionMetadata(Function &F);
+ bool parseMetadata(Metadata *&MD, PerFunctionState *PFS);
+ bool parseMDTuple(MDNode *&MD, bool IsDistinct = false);
+ bool parseMDNode(MDNode *&N);
+ bool parseMDNodeTail(MDNode *&N);
+ bool parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts);
+ bool parseMetadataAttachment(unsigned &Kind, MDNode *&MD);
+ bool parseInstructionMetadata(Instruction &Inst);
+ bool parseGlobalObjectMetadataAttachment(GlobalObject &GO);
+ bool parseOptionalFunctionMetadata(Function &F);
template <class FieldTy>
- bool ParseMDField(LocTy Loc, StringRef Name, FieldTy &Result);
- template <class FieldTy> bool ParseMDField(StringRef Name, FieldTy &Result);
- template <class ParserTy>
- bool ParseMDFieldsImplBody(ParserTy parseField);
+ bool parseMDField(LocTy Loc, StringRef Name, FieldTy &Result);
+ template <class FieldTy> bool parseMDField(StringRef Name, FieldTy &Result);
+ template <class ParserTy> bool parseMDFieldsImplBody(ParserTy ParseField);
template <class ParserTy>
- bool ParseMDFieldsImpl(ParserTy parseField, LocTy &ClosingLoc);
- bool ParseSpecializedMDNode(MDNode *&N, bool IsDistinct = false);
+ bool parseMDFieldsImpl(ParserTy ParseField, LocTy &ClosingLoc);
+ bool parseSpecializedMDNode(MDNode *&N, bool IsDistinct = false);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) \
- bool Parse##CLASS(MDNode *&Result, bool IsDistinct);
+ bool parse##CLASS(MDNode *&Result, bool IsDistinct);
#include "llvm/IR/Metadata.def"
// Function Parsing.
@@ -550,64 +546,64 @@ namespace llvm {
ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
: Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
};
- bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
- bool ParseFunctionHeader(Function *&Fn, bool isDefine);
- bool ParseFunctionBody(Function &Fn);
- bool ParseBasicBlock(PerFunctionState &PFS);
+ bool parseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &IsVarArg);
+ bool parseFunctionHeader(Function *&Fn, bool IsDefine);
+ bool parseFunctionBody(Function &Fn);
+ bool parseBasicBlock(PerFunctionState &PFS);
enum TailCallType { TCT_None, TCT_Tail, TCT_MustTail };
// Instruction Parsing. Each instruction parsing routine can return with a
// normal result, an error result, or return having eaten an extra comma.
enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
- int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+ int parseInstruction(Instruction *&Inst, BasicBlock *BB,
PerFunctionState &PFS);
- bool ParseCmpPredicate(unsigned &P, unsigned Opc);
-
- bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
- bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseCallBr(Instruction *&Inst, PerFunctionState &PFS);
-
- bool ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
+ bool parseCmpPredicate(unsigned &P, unsigned Opc);
+
+ bool parseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
+ bool parseBr(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseSwitch(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseInvoke(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseResume(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseCleanupRet(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseCatchRet(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseCallBr(Instruction *&Inst, PerFunctionState &PFS);
+
+ bool parseUnaryOp(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
bool IsFP);
- bool ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
- bool IsFP);
- bool ParseLogical(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
- bool ParseCompare(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
- bool ParseCast(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
- bool ParseSelect(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS);
- int ParsePHI(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseCall(Instruction *&Inst, PerFunctionState &PFS,
+ bool parseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc, bool IsFP);
+ bool parseLogical(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
+ bool parseCompare(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
+ bool parseCast(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
+ bool parseSelect(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseVAArg(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseExtractElement(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseInsertElement(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseShuffleVector(Instruction *&Inst, PerFunctionState &PFS);
+ int parsePHI(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseLandingPad(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseCall(Instruction *&Inst, PerFunctionState &PFS,
CallInst::TailCallKind TCK);
- int ParseAlloc(Instruction *&Inst, PerFunctionState &PFS);
- int ParseLoad(Instruction *&Inst, PerFunctionState &PFS);
- int ParseStore(Instruction *&Inst, PerFunctionState &PFS);
- int ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS);
- int ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS);
- int ParseFence(Instruction *&Inst, PerFunctionState &PFS);
- int ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS);
- int ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS);
- int ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS);
- bool ParseFreeze(Instruction *&I, PerFunctionState &PFS);
+ int parseAlloc(Instruction *&Inst, PerFunctionState &PFS);
+ int parseLoad(Instruction *&Inst, PerFunctionState &PFS);
+ int parseStore(Instruction *&Inst, PerFunctionState &PFS);
+ int parseCmpXchg(Instruction *&Inst, PerFunctionState &PFS);
+ int parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS);
+ int parseFence(Instruction *&Inst, PerFunctionState &PFS);
+ int parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS);
+ int parseExtractValue(Instruction *&Inst, PerFunctionState &PFS);
+ int parseInsertValue(Instruction *&Inst, PerFunctionState &PFS);
+ bool parseFreeze(Instruction *&I, PerFunctionState &PFS);
// Use-list order directives.
- bool ParseUseListOrder(PerFunctionState *PFS = nullptr);
- bool ParseUseListOrderBB();
- bool ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes);
+ bool parseUseListOrder(PerFunctionState *PFS = nullptr);
+ bool parseUseListOrderBB();
+ bool parseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes);
bool sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes, SMLoc Loc);
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLToken.h b/contrib/llvm-project/llvm/lib/AsmParser/LLToken.h
index 0fb3bae77dd3..5149f861837a 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLToken.h
@@ -74,6 +74,7 @@ enum Kind {
kw_localexec,
kw_zeroinitializer,
kw_undef,
+ kw_poison,
kw_null,
kw_none,
kw_to,
@@ -170,6 +171,7 @@ enum Kind {
kw_amdgpu_ps,
kw_amdgpu_cs,
kw_amdgpu_kernel,
+ kw_amdgpu_gfx,
kw_tailcc,
// Attributes:
@@ -198,6 +200,7 @@ enum Kind {
kw_noalias,
kw_noundef,
kw_nobuiltin,
+ kw_nocallback,
kw_nocapture,
kw_noduplicate,
kw_nofree,
@@ -207,6 +210,7 @@ enum Kind {
kw_nonlazybind,
kw_nomerge,
kw_nonnull,
+ kw_noprofile,
kw_noredzone,
kw_noreturn,
kw_nosync,
@@ -240,6 +244,8 @@ enum Kind {
kw_writeonly,
kw_zeroext,
kw_immarg,
+ kw_byref,
+ kw_mustprogress,
kw_type,
kw_opaque,
@@ -357,6 +363,7 @@ enum Kind {
kw_extractvalue,
kw_insertvalue,
kw_blockaddress,
+ kw_dso_local_equivalent,
kw_freeze,
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp
index a497c16685c1..e4747369f3e3 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -151,6 +151,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
return "DW_OP_LLVM_tag_offset";
case DW_OP_LLVM_entry_value:
return "DW_OP_LLVM_entry_value";
+ case DW_OP_LLVM_implicit_pointer:
+ return "DW_OP_LLVM_implicit_pointer";
}
}
@@ -163,6 +165,7 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
.Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment)
.Case("DW_OP_LLVM_tag_offset", DW_OP_LLVM_tag_offset)
.Case("DW_OP_LLVM_entry_value", DW_OP_LLVM_entry_value)
+ .Case("DW_OP_LLVM_implicit_pointer", DW_OP_LLVM_implicit_pointer)
.Default(0);
}
@@ -488,6 +491,17 @@ StringRef llvm::dwarf::MacroString(unsigned Encoding) {
}
}
+StringRef llvm::dwarf::GnuMacroString(unsigned Encoding) {
+ switch (Encoding) {
+ default:
+ return StringRef();
+#define HANDLE_DW_MACRO_GNU(ID, NAME) \
+ case DW_MACRO_GNU_##NAME: \
+ return "DW_MACRO_GNU_" #NAME;
+#include "llvm/BinaryFormat/Dwarf.def"
+ }
+}
+
unsigned llvm::dwarf::getMacro(StringRef MacroString) {
return StringSwitch<unsigned>(MacroString)
#define HANDLE_DW_MACRO(ID, NAME) .Case("DW_MACRO_" #NAME, ID)
@@ -784,6 +798,17 @@ StringRef llvm::dwarf::FormatString(bool IsDWARF64) {
return FormatString(IsDWARF64 ? DWARF64 : DWARF32);
}
+StringRef llvm::dwarf::RLEString(unsigned RLE) {
+ switch (RLE) {
+ default:
+ return StringRef();
+#define HANDLE_DW_RLE(ID, NAME) \
+ case DW_RLE_##NAME: \
+ return "DW_RLE_" #NAME;
+#include "llvm/BinaryFormat/Dwarf.def"
+ }
+}
+
constexpr char llvm::dwarf::EnumTraits<Attribute>::Type[];
constexpr char llvm::dwarf::EnumTraits<Form>::Type[];
constexpr char llvm::dwarf::EnumTraits<Index>::Type[];
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/MachO.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/MachO.cpp
index 2b9eb8025521..02a515c94399 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/MachO.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/MachO.cpp
@@ -55,10 +55,10 @@ static MachO::CPUSubTypeARM getARMSubType(const Triple &T) {
}
static MachO::CPUSubTypeARM64 getARM64SubType(const Triple &T) {
- assert(T.isAArch64() || T.getArch() == Triple::aarch64_32);
+ assert(T.isAArch64());
if (T.isArch32Bit())
return (MachO::CPUSubTypeARM64)MachO::CPU_SUBTYPE_ARM64_32_V8;
- if (T.getArchName() == "arm64e")
+ if (T.isArm64e())
return MachO::CPU_SUBTYPE_ARM64E;
return MachO::CPU_SUBTYPE_ARM64_ALL;
@@ -84,9 +84,7 @@ Expected<uint32_t> MachO::getCPUType(const Triple &T) {
if (T.isARM() || T.isThumb())
return MachO::CPU_TYPE_ARM;
if (T.isAArch64())
- return MachO::CPU_TYPE_ARM64;
- if (T.getArch() == Triple::aarch64_32)
- return MachO::CPU_TYPE_ARM64_32;
+ return T.isArch32Bit() ? MachO::CPU_TYPE_ARM64_32 : MachO::CPU_TYPE_ARM64;
if (T.getArch() == Triple::ppc)
return MachO::CPU_TYPE_POWERPC;
if (T.getArch() == Triple::ppc64)
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp
index 53720c542e14..81ea4cee1a9d 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp
@@ -277,6 +277,8 @@ void Document::writeToBlob(std::string &Blob) {
case Type::String:
MPWriter.write(Node.getString());
break;
+ case Type::Empty:
+ llvm_unreachable("unhandled empty msgpack node");
default:
llvm_unreachable("unhandled msgpack object kind");
}
@@ -310,4 +312,3 @@ void Document::writeToBlob(std::string &Blob) {
}
}
}
-
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/Wasm.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/Wasm.cpp
index 88608168783b..126680ac41c2 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/Wasm.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/Wasm.cpp
@@ -14,6 +14,8 @@ std::string llvm::wasm::toString(wasm::WasmSymbolType Type) {
return "WASM_SYMBOL_TYPE_FUNCTION";
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
return "WASM_SYMBOL_TYPE_GLOBAL";
+ case wasm::WASM_SYMBOL_TYPE_TABLE:
+ return "WASM_SYMBOL_TYPE_TABLE";
case wasm::WASM_SYMBOL_TYPE_DATA:
return "WASM_SYMBOL_TYPE_DATA";
case wasm::WASM_SYMBOL_TYPE_SECTION:
@@ -46,7 +48,9 @@ bool llvm::wasm::relocTypeHasAddend(uint32_t Type) {
case R_WASM_MEMORY_ADDR_REL_SLEB64:
case R_WASM_MEMORY_ADDR_I32:
case R_WASM_MEMORY_ADDR_I64:
+ case R_WASM_MEMORY_ADDR_TLS_SLEB:
case R_WASM_FUNCTION_OFFSET_I32:
+ case R_WASM_FUNCTION_OFFSET_I64:
case R_WASM_SECTION_OFFSET_I32:
return true;
default:
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/XCOFF.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/XCOFF.cpp
index 3c8a2cdbc3aa..0f270a5cea1b 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/XCOFF.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/XCOFF.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
using namespace llvm;
@@ -75,4 +76,81 @@ StringRef XCOFF::getRelocationTypeString(XCOFF::RelocationType Type) {
}
return "Unknown";
}
+
+#define LANG_CASE(A) \
+ case XCOFF::TracebackTable::A: \
+ return #A;
+
+StringRef XCOFF::getNameForTracebackTableLanguageId(
+ XCOFF::TracebackTable::LanguageID LangId) {
+ switch (LangId) {
+ LANG_CASE(C)
+ LANG_CASE(Fortran)
+ LANG_CASE(Pascal)
+ LANG_CASE(Ada)
+ LANG_CASE(PL1)
+ LANG_CASE(Basic)
+ LANG_CASE(Lisp)
+ LANG_CASE(Cobol)
+ LANG_CASE(Modula2)
+ LANG_CASE(Rpg)
+ LANG_CASE(PL8)
+ LANG_CASE(Assembly)
+ LANG_CASE(Java)
+ LANG_CASE(ObjectiveC)
+ LANG_CASE(CPlusPlus)
+ }
+ return "Unknown";
+}
+#undef LANG_CASE
+
+SmallString<32> XCOFF::parseParmsType(uint32_t Value, unsigned ParmsNum) {
+ SmallString<32> ParmsType;
+ for (unsigned I = 0; I < ParmsNum; ++I) {
+ if (I != 0)
+ ParmsType += ", ";
+ if ((Value & TracebackTable::ParmTypeIsFloatingBit) == 0) {
+ // Fixed parameter type.
+ ParmsType += "i";
+ Value <<= 1;
+ } else {
+ if ((Value & TracebackTable::ParmTypeFloatingIsDoubleBit) == 0)
+ // Float parameter type.
+ ParmsType += "f";
+ else
+ // Double parameter type.
+ ParmsType += "d";
+
+ Value <<= 2;
+ }
+ }
+ assert(Value == 0u && "ParmsType encodes more than ParmsNum parameters.");
+ return ParmsType;
+}
+
+SmallString<32> XCOFF::getExtendedTBTableFlagString(uint8_t Flag) {
+ SmallString<32> Res;
+
+ if (Flag & ExtendedTBTableFlag::TB_OS1)
+ Res += "TB_OS1 ";
+ if (Flag & ExtendedTBTableFlag::TB_RESERVED)
+ Res += "TB_RESERVED ";
+ if (Flag & ExtendedTBTableFlag::TB_SSP_CANARY)
+ Res += "TB_SSP_CANARY ";
+ if (Flag & ExtendedTBTableFlag::TB_OS2)
+ Res += "TB_OS2 ";
+ if (Flag & ExtendedTBTableFlag::TB_EH_INFO)
+ Res += "TB_EH_INFO ";
+ if (Flag & ExtendedTBTableFlag::TB_LONGTBTABLE2)
+ Res += "TB_LONGTBTABLE2 ";
+
+ // Two of the bits that haven't got used in the mask.
+ if (Flag & 0x06)
+ Res += "Unknown ";
+
+ // Pop the last space.
+ Res.pop_back();
+ return Res;
+}
+
#undef RELOC_CASE
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 2ce064c7685a..e91af121ea08 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -135,6 +135,7 @@ static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(MODULE_CODE, FUNCTION)
STRINGIFY_CODE(MODULE_CODE, ALIAS)
STRINGIFY_CODE(MODULE_CODE, GCNAME)
+ STRINGIFY_CODE(MODULE_CODE, COMDAT)
STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
@@ -176,16 +177,20 @@ static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(TYPE_CODE, OPAQUE)
STRINGIFY_CODE(TYPE_CODE, INTEGER)
STRINGIFY_CODE(TYPE_CODE, POINTER)
+ STRINGIFY_CODE(TYPE_CODE, HALF)
STRINGIFY_CODE(TYPE_CODE, ARRAY)
STRINGIFY_CODE(TYPE_CODE, VECTOR)
STRINGIFY_CODE(TYPE_CODE, X86_FP80)
STRINGIFY_CODE(TYPE_CODE, FP128)
STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
STRINGIFY_CODE(TYPE_CODE, METADATA)
+ STRINGIFY_CODE(TYPE_CODE, X86_MMX)
STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
STRINGIFY_CODE(TYPE_CODE, FUNCTION)
+ STRINGIFY_CODE(TYPE_CODE, TOKEN)
+ STRINGIFY_CODE(TYPE_CODE, BFLOAT)
}
case bitc::CONSTANTS_BLOCK_ID:
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 659e26c2bd25..f2800201e871 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -20,8 +20,9 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Bitstream/BitstreamReader.h"
+#include "llvm/Bitcode/BitcodeCommon.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -578,9 +579,7 @@ public:
/// \returns true if an error occurred.
Error parseBitcodeInto(
Module *M, bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
- DataLayoutCallbackTy DataLayoutCallback = [](std::string) {
- return None;
- });
+ DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; });
static uint64_t decodeSignRotatedValue(uint64_t V);
@@ -650,7 +649,7 @@ private:
/// Read a value/type pair out of the specified record from slot 'Slot'.
/// Increment Slot past the number of slots used in the record. Return true on
/// failure.
- bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
+ bool getValueTypePair(const SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
unsigned InstNum, Value *&ResVal,
Type **FullTy = nullptr) {
if (Slot == Record.size()) return true;
@@ -677,7 +676,7 @@ private:
/// Read a value out of the specified record from slot 'Slot'. Increment Slot
/// past the number of slots used by the value in the record. Return true if
/// there is an error.
- bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
+ bool popValue(const SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
unsigned InstNum, Type *Ty, Value *&ResVal) {
if (getValue(Record, Slot, InstNum, Ty, ResVal))
return true;
@@ -687,7 +686,7 @@ private:
}
/// Like popValue, but does not increment the Slot number.
- bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
+ bool getValue(const SmallVectorImpl<uint64_t> &Record, unsigned Slot,
unsigned InstNum, Type *Ty, Value *&ResVal) {
ResVal = getValue(Record, Slot, InstNum, Ty);
return ResVal == nullptr;
@@ -695,7 +694,7 @@ private:
/// Version of getValue that returns ResVal directly, or 0 if there is an
/// error.
- Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
+ Value *getValue(const SmallVectorImpl<uint64_t> &Record, unsigned Slot,
unsigned InstNum, Type *Ty) {
if (Slot == Record.size()) return nullptr;
unsigned ValNo = (unsigned)Record[Slot];
@@ -706,7 +705,7 @@ private:
}
/// Like getValue, but decodes signed VBRs.
- Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
+ Value *getValueSigned(const SmallVectorImpl<uint64_t> &Record, unsigned Slot,
unsigned InstNum, Type *Ty) {
if (Slot == Record.size()) return nullptr;
unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
@@ -716,9 +715,9 @@ private:
return getFnValueByID(ValNo, Ty);
}
- /// Upgrades old-style typeless byval attributes by adding the corresponding
- /// argument's pointee type.
- void propagateByValTypes(CallBase *CB, ArrayRef<Type *> ArgsFullTys);
+ /// Upgrades old-style typeless byval or sret attributes by adding the
+ /// corresponding argument's pointee type.
+ void propagateByValSRetTypes(CallBase *CB, ArrayRef<Type *> ArgsFullTys);
/// Converts alignment exponent (i.e. power of two (or zero)) to the
/// corresponding alignment to use. If alignment is too large, returns
@@ -834,6 +833,8 @@ private:
void parseTypeIdCompatibleVtableSummaryRecord(ArrayRef<uint64_t> Record);
void parseTypeIdCompatibleVtableInfo(ArrayRef<uint64_t> Record, size_t &Slot,
TypeIdCompatibleVtableInfo &TypeId);
+ std::vector<FunctionSummary::ParamAccess>
+ parseParamAccesses(ArrayRef<uint64_t> Record);
std::pair<ValueInfo, GlobalValue::GUID>
getValueInfoFromValueId(unsigned ValueId);
@@ -1432,6 +1433,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::NoAlias;
case bitc::ATTR_KIND_NO_BUILTIN:
return Attribute::NoBuiltin;
+ case bitc::ATTR_KIND_NO_CALLBACK:
+ return Attribute::NoCallback;
case bitc::ATTR_KIND_NO_CAPTURE:
return Attribute::NoCapture;
case bitc::ATTR_KIND_NO_DUPLICATE:
@@ -1532,6 +1535,12 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::Preallocated;
case bitc::ATTR_KIND_NOUNDEF:
return Attribute::NoUndef;
+ case bitc::ATTR_KIND_BYREF:
+ return Attribute::ByRef;
+ case bitc::ATTR_KIND_MUSTPROGRESS:
+ return Attribute::MustProgress;
+ case bitc::ATTR_KIND_HOT:
+ return Attribute::Hot;
}
}
@@ -1606,6 +1615,8 @@ Error BitcodeReader::parseAttributeGroupBlock() {
// this AttributeList with a function.
if (Kind == Attribute::ByVal)
B.addByValAttr(nullptr);
+ else if (Kind == Attribute::StructRet)
+ B.addStructRetAttr(nullptr);
B.addAttribute(Kind);
} else if (Record[i] == 1) { // Integer attribute
@@ -1649,6 +1660,10 @@ Error BitcodeReader::parseAttributeGroupBlock() {
return Err;
if (Kind == Attribute::ByVal) {
B.addByValAttr(HasType ? getTypeByID(Record[++i]) : nullptr);
+ } else if (Kind == Attribute::StructRet) {
+ B.addStructRetAttr(HasType ? getTypeByID(Record[++i]) : nullptr);
+ } else if (Kind == Attribute::ByRef) {
+ B.addByRefAttr(getTypeByID(Record[++i]));
} else if (Kind == Attribute::Preallocated) {
B.addPreallocatedAttr(getTypeByID(Record[++i]));
}
@@ -1711,7 +1726,7 @@ Error BitcodeReader::parseTypeTableBody() {
case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
// TYPE_CODE_NUMENTRY contains a count of the number of types in the
// type list. This allows us to reserve space.
- if (Record.size() < 1)
+ if (Record.empty())
return error("Invalid record");
TypeList.resize(Record[0]);
continue;
@@ -1748,11 +1763,14 @@ Error BitcodeReader::parseTypeTableBody() {
case bitc::TYPE_CODE_X86_MMX: // X86_MMX
ResultTy = Type::getX86_MMXTy(Context);
break;
+ case bitc::TYPE_CODE_X86_AMX: // X86_AMX
+ ResultTy = Type::getX86_AMXTy(Context);
+ break;
case bitc::TYPE_CODE_TOKEN: // TOKEN
ResultTy = Type::getTokenTy(Context);
break;
case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width]
- if (Record.size() < 1)
+ if (Record.empty())
return error("Invalid record");
uint64_t NumBits = Record[0];
@@ -1764,7 +1782,7 @@ Error BitcodeReader::parseTypeTableBody() {
}
case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
// [pointee type, address space]
- if (Record.size() < 1)
+ if (Record.empty())
return error("Invalid record");
unsigned AddressSpace = 0;
if (Record.size() == 2)
@@ -1819,7 +1837,7 @@ Error BitcodeReader::parseTypeTableBody() {
break;
}
case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N]
- if (Record.size() < 1)
+ if (Record.empty())
return error("Invalid record");
SmallVector<Type*, 8> EltTys;
for (unsigned i = 1, e = Record.size(); i != e; ++i) {
@@ -1839,7 +1857,7 @@ Error BitcodeReader::parseTypeTableBody() {
continue;
case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
- if (Record.size() < 1)
+ if (Record.empty())
return error("Invalid record");
if (NumRecords >= TypeList.size())
@@ -2399,6 +2417,9 @@ Error BitcodeReader::parseConstants() {
case bitc::CST_CODE_UNDEF: // UNDEF
V = UndefValue::get(CurTy);
break;
+ case bitc::CST_CODE_POISON: // POISON
+ V = PoisonValue::get(CurTy);
+ break;
case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid]
if (Record.empty())
return error("Invalid record");
@@ -2912,8 +2933,7 @@ Error BitcodeReader::parseUseLists() {
if (RecordLength < 3)
// Records should have at least an ID and two indexes.
return error("Invalid record");
- unsigned ID = Record.back();
- Record.pop_back();
+ unsigned ID = Record.pop_back_val();
Value *V;
if (IsBB) {
@@ -2965,12 +2985,15 @@ Error BitcodeReader::materializeMetadata() {
}
// Upgrade "Linker Options" module flag to "llvm.linker.options" module-level
- // metadata.
- if (Metadata *Val = TheModule->getModuleFlag("Linker Options")) {
- NamedMDNode *LinkerOpts =
- TheModule->getOrInsertNamedMetadata("llvm.linker.options");
- for (const MDOperand &MDOptions : cast<MDNode>(Val)->operands())
- LinkerOpts->addOperand(cast<MDNode>(MDOptions));
+ // metadata. Only upgrade if the new option doesn't exist to avoid upgrade
+ // multiple times.
+ if (!TheModule->getNamedMetadata("llvm.linker.options")) {
+ if (Metadata *Val = TheModule->getModuleFlag("Linker Options")) {
+ NamedMDNode *LinkerOpts =
+ TheModule->getOrInsertNamedMetadata("llvm.linker.options");
+ for (const MDOperand &MDOptions : cast<MDNode>(Val)->operands())
+ LinkerOpts->addOperand(cast<MDNode>(MDOptions));
+ }
}
DeferredMetadataInfo.clear();
@@ -3278,17 +3301,24 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
Func->setLinkage(getDecodedLinkage(RawLinkage));
Func->setAttributes(getAttributes(Record[4]));
- // Upgrade any old-style byval without a type by propagating the argument's
- // pointee type. There should be no opaque pointers where the byval type is
- // implicit.
+ // Upgrade any old-style byval or sret without a type by propagating the
+ // argument's pointee type. There should be no opaque pointers where the byval
+ // type is implicit.
for (unsigned i = 0; i != Func->arg_size(); ++i) {
- if (!Func->hasParamAttribute(i, Attribute::ByVal))
- continue;
+ for (Attribute::AttrKind Kind : {Attribute::ByVal, Attribute::StructRet}) {
+ if (!Func->hasParamAttribute(i, Kind))
+ continue;
- Type *PTy = cast<FunctionType>(FullFTy)->getParamType(i);
- Func->removeParamAttr(i, Attribute::ByVal);
- Func->addParamAttr(i, Attribute::getWithByValType(
- Context, getPointerElementFlatType(PTy)));
+ Func->removeParamAttr(i, Kind);
+
+ Type *PTy = cast<FunctionType>(FullFTy)->getParamType(i);
+ Type *PtrEltTy = getPointerElementFlatType(PTy);
+ Attribute NewAttr =
+ Kind == Attribute::ByVal
+ ? Attribute::getWithByValType(Context, PtrEltTy)
+ : Attribute::getWithStructRetType(Context, PtrEltTy);
+ Func->addParamAttr(i, NewAttr);
+ }
}
MaybeAlign Alignment;
@@ -3708,7 +3738,7 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
break;
/// MODULE_CODE_VSTOFFSET: [offset]
case bitc::MODULE_CODE_VSTOFFSET:
- if (Record.size() < 1)
+ if (Record.empty())
return error("Invalid record");
// Note that we subtract 1 here because the offset is relative to one word
// before the start of the identification or module block, which was
@@ -3749,16 +3779,22 @@ Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
return Error::success();
}
-void BitcodeReader::propagateByValTypes(CallBase *CB,
- ArrayRef<Type *> ArgsFullTys) {
+void BitcodeReader::propagateByValSRetTypes(CallBase *CB,
+ ArrayRef<Type *> ArgsFullTys) {
for (unsigned i = 0; i != CB->arg_size(); ++i) {
- if (!CB->paramHasAttr(i, Attribute::ByVal))
- continue;
+ for (Attribute::AttrKind Kind : {Attribute::ByVal, Attribute::StructRet}) {
+ if (!CB->paramHasAttr(i, Kind))
+ continue;
+
+ CB->removeParamAttr(i, Kind);
- CB->removeParamAttr(i, Attribute::ByVal);
- CB->addParamAttr(
- i, Attribute::getWithByValType(
- Context, getPointerElementFlatType(ArgsFullTys[i])));
+ Type *PtrEltTy = getPointerElementFlatType(ArgsFullTys[i]);
+ Attribute NewAttr =
+ Kind == Attribute::ByVal
+ ? Attribute::getWithByValType(Context, PtrEltTy)
+ : Attribute::getWithStructRetType(Context, PtrEltTy);
+ CB->addParamAttr(i, NewAttr);
+ }
}
}
@@ -3862,7 +3898,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
default: // Default behavior: reject
return error("Invalid value");
case bitc::FUNC_CODE_DECLAREBLOCKS: { // DECLAREBLOCKS: [nblocks]
- if (Record.size() < 1 || Record[0] == 0)
+ if (Record.empty() || Record[0] == 0)
return error("Invalid record");
// Create all the basic blocks for the function.
FunctionBBs.resize(Record[0]);
@@ -3929,7 +3965,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (!IA)
return error("Invalid record");
}
- LastLoc = DebugLoc::get(Line, Col, Scope, IA, isImplicitCode);
+ LastLoc = DILocation::get(Scope->getContext(), Line, Col, Scope, IA,
+ isImplicitCode);
I->setDebugLoc(LastLoc);
I = nullptr;
continue;
@@ -4610,7 +4647,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
cast<InvokeInst>(I)->setCallingConv(
static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
cast<InvokeInst>(I)->setAttributes(PAL);
- propagateByValTypes(cast<CallBase>(I), ArgsFullTys);
+ propagateByValSRetTypes(cast<CallBase>(I), ArgsFullTys);
break;
}
@@ -4704,7 +4741,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
- if (Record.size() < 1)
+ if (Record.empty())
return error("Invalid record");
// The first record specifies the type.
FullTy = getFullyStructuredTypeByID(Record[0]);
@@ -4806,17 +4843,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
if (Record.size() != 4)
return error("Invalid record");
- uint64_t AlignRecord = Record[3];
- const uint64_t InAllocaMask = uint64_t(1) << 5;
- const uint64_t ExplicitTypeMask = uint64_t(1) << 6;
- const uint64_t SwiftErrorMask = uint64_t(1) << 7;
- const uint64_t FlagMask = InAllocaMask | ExplicitTypeMask |
- SwiftErrorMask;
- bool InAlloca = AlignRecord & InAllocaMask;
- bool SwiftError = AlignRecord & SwiftErrorMask;
+ using APV = AllocaPackedValues;
+ const uint64_t Rec = Record[3];
+ const bool InAlloca = Bitfield::get<APV::UsedWithInAlloca>(Rec);
+ const bool SwiftError = Bitfield::get<APV::SwiftError>(Rec);
FullTy = getFullyStructuredTypeByID(Record[0]);
Type *Ty = flattenPointerTypes(FullTy);
- if ((AlignRecord & ExplicitTypeMask) == 0) {
+ if (!Bitfield::get<APV::ExplicitType>(Rec)) {
auto *PTy = dyn_cast_or_null<PointerType>(Ty);
if (!PTy)
return error("Old-style alloca with a non-pointer type");
@@ -4825,7 +4858,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Type *OpTy = getTypeByID(Record[1]);
Value *Size = getFnValueByID(Record[2], OpTy);
MaybeAlign Align;
- if (Error Err = parseAlignmentValue(AlignRecord & ~FlagMask, Align)) {
+ if (Error Err =
+ parseAlignmentValue(Bitfield::get<APV::Align>(Rec), Align)) {
return Err;
}
if (!Ty || !Size)
@@ -4982,54 +5016,55 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
- case bitc::FUNC_CODE_INST_CMPXCHG_OLD:
- case bitc::FUNC_CODE_INST_CMPXCHG: {
- // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, ssid,
- // failureordering?, isweak?]
+ case bitc::FUNC_CODE_INST_CMPXCHG_OLD: {
+ // CMPXCHG_OLD: [ptrty, ptr, cmp, val, vol, ordering, synchscope,
+ // failure_ordering?, weak?]
+ const size_t NumRecords = Record.size();
unsigned OpNum = 0;
- Value *Ptr, *Cmp, *New;
+ Value *Ptr = nullptr;
if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy))
return error("Invalid record");
if (!isa<PointerType>(Ptr->getType()))
return error("Cmpxchg operand is not a pointer type");
- if (BitCode == bitc::FUNC_CODE_INST_CMPXCHG) {
- if (getValueTypePair(Record, OpNum, NextValueNo, Cmp, &FullTy))
- return error("Invalid record");
- } else if (popValue(Record, OpNum, NextValueNo,
- getPointerElementFlatType(FullTy), Cmp))
+ Value *Cmp = nullptr;
+ if (popValue(Record, OpNum, NextValueNo,
+ getPointerElementFlatType(FullTy), Cmp))
return error("Invalid record");
- else
- FullTy = cast<PointerType>(FullTy)->getElementType();
+ FullTy = cast<PointerType>(FullTy)->getElementType();
+
+ Value *New = nullptr;
if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) ||
- Record.size() < OpNum + 3 || Record.size() > OpNum + 5)
+ NumRecords < OpNum + 3 || NumRecords > OpNum + 5)
return error("Invalid record");
- AtomicOrdering SuccessOrdering = getDecodedOrdering(Record[OpNum + 1]);
+ const AtomicOrdering SuccessOrdering =
+ getDecodedOrdering(Record[OpNum + 1]);
if (SuccessOrdering == AtomicOrdering::NotAtomic ||
SuccessOrdering == AtomicOrdering::Unordered)
return error("Invalid record");
- SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]);
+
+ const SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]);
if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType()))
return Err;
- AtomicOrdering FailureOrdering;
- if (Record.size() < 7)
- FailureOrdering =
- AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering);
- else
- FailureOrdering = getDecodedOrdering(Record[OpNum + 3]);
- Align Alignment(
+ const AtomicOrdering FailureOrdering =
+ NumRecords < 7
+ ? AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering)
+ : getDecodedOrdering(Record[OpNum + 3]);
+
+ const Align Alignment(
TheModule->getDataLayout().getTypeStoreSize(Cmp->getType()));
+
I = new AtomicCmpXchgInst(Ptr, Cmp, New, Alignment, SuccessOrdering,
FailureOrdering, SSID);
- FullTy = StructType::get(Context, {FullTy, Type::getInt1Ty(Context)});
cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
+ FullTy = StructType::get(Context, {FullTy, Type::getInt1Ty(Context)});
- if (Record.size() < 8) {
+ if (NumRecords < 8) {
// Before weak cmpxchgs existed, the instruction simply returned the
// value loaded from memory, so bitcode files from that era will be
// expecting the first component of a modern cmpxchg.
@@ -5037,12 +5072,59 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
I = ExtractValueInst::Create(I, 0);
FullTy = cast<StructType>(FullTy)->getElementType(0);
} else {
- cast<AtomicCmpXchgInst>(I)->setWeak(Record[OpNum+4]);
+ cast<AtomicCmpXchgInst>(I)->setWeak(Record[OpNum + 4]);
}
InstructionList.push_back(I);
break;
}
+ case bitc::FUNC_CODE_INST_CMPXCHG: {
+ // CMPXCHG: [ptrty, ptr, cmp, val, vol, success_ordering, synchscope,
+ // failure_ordering, weak]
+ const size_t NumRecords = Record.size();
+ unsigned OpNum = 0;
+ Value *Ptr = nullptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy))
+ return error("Invalid record");
+
+ if (!isa<PointerType>(Ptr->getType()))
+ return error("Cmpxchg operand is not a pointer type");
+
+ Value *Cmp = nullptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Cmp, &FullTy))
+ return error("Invalid record");
+
+ Value *Val = nullptr;
+ if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), Val) ||
+ NumRecords < OpNum + 3 || NumRecords > OpNum + 5)
+ return error("Invalid record");
+
+ const AtomicOrdering SuccessOrdering =
+ getDecodedOrdering(Record[OpNum + 1]);
+ if (SuccessOrdering == AtomicOrdering::NotAtomic ||
+ SuccessOrdering == AtomicOrdering::Unordered)
+ return error("Invalid record");
+
+ const SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]);
+
+ if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType()))
+ return Err;
+
+ const AtomicOrdering FailureOrdering =
+ getDecodedOrdering(Record[OpNum + 3]);
+
+ const Align Alignment(
+ TheModule->getDataLayout().getTypeStoreSize(Cmp->getType()));
+
+ I = new AtomicCmpXchgInst(Ptr, Cmp, Val, Alignment, SuccessOrdering,
+ FailureOrdering, SSID);
+ FullTy = StructType::get(Context, {FullTy, Type::getInt1Ty(Context)});
+ cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
+ cast<AtomicCmpXchgInst>(I)->setWeak(Record[OpNum + 4]);
+
+ InstructionList.push_back(I);
+ break;
+ }
case bitc::FUNC_CODE_INST_ATOMICRMW: {
// ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid]
unsigned OpNum = 0;
@@ -5172,7 +5254,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
TCK = CallInst::TCK_NoTail;
cast<CallInst>(I)->setTailCallKind(TCK);
cast<CallInst>(I)->setAttributes(PAL);
- propagateByValTypes(cast<CallBase>(I), ArgsFullTys);
+ propagateByValSRetTypes(cast<CallBase>(I), ArgsFullTys);
if (FMF.any()) {
if (!isa<FPMathOperator>(I))
return error("Fast-math-flags specified for call without "
@@ -5200,7 +5282,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// number of operand bundle blocks. These blocks are read into
// OperandBundles and consumed at the next call or invoke instruction.
- if (Record.size() < 1 || Record[0] >= BundleTags.size())
+ if (Record.empty() || Record[0] >= BundleTags.size())
return error("Invalid record");
std::vector<Value *> Inputs;
@@ -5390,6 +5472,36 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
}
}
+ // "Upgrade" older incorrect branch weights by dropping them.
+ for (auto &I : instructions(F)) {
+ if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) {
+ if (MD->getOperand(0) != nullptr && isa<MDString>(MD->getOperand(0))) {
+ MDString *MDS = cast<MDString>(MD->getOperand(0));
+ StringRef ProfName = MDS->getString();
+ // Check consistency of !prof branch_weights metadata.
+ if (!ProfName.equals("branch_weights"))
+ continue;
+ unsigned ExpectedNumOperands = 0;
+ if (BranchInst *BI = dyn_cast<BranchInst>(&I))
+ ExpectedNumOperands = BI->getNumSuccessors();
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I))
+ ExpectedNumOperands = SI->getNumSuccessors();
+ else if (isa<CallInst>(&I))
+ ExpectedNumOperands = 1;
+ else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(&I))
+ ExpectedNumOperands = IBI->getNumDestinations();
+ else if (isa<SelectInst>(&I))
+ ExpectedNumOperands = 2;
+ else
+ continue; // ignore and continue.
+
+ // If branch weight doesn't match, just strip branch weight.
+ if (MD->getNumOperands() != 1 + ExpectedNumOperands)
+ I.setMetadata(LLVMContext::MD_prof, nullptr);
+ }
+ }
+ }
+
// Look for functions that rely on old function attribute behavior.
UpgradeFunctionAttributes(*F);
@@ -5703,7 +5815,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
}
/// MODULE_CODE_VSTOFFSET: [offset]
case bitc::MODULE_CODE_VSTOFFSET:
- if (Record.size() < 1)
+ if (Record.empty())
return error("Invalid record");
// Note that we subtract 1 here because the offset is relative to one
// word before the start of the identification or module block, which
@@ -5821,8 +5933,8 @@ static void parseTypeIdSummaryRecord(ArrayRef<uint64_t> Record,
parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId);
}
-static std::vector<FunctionSummary::ParamAccess>
-parseParamAccesses(ArrayRef<uint64_t> Record) {
+std::vector<FunctionSummary::ParamAccess>
+ModuleSummaryIndexBitcodeReader::parseParamAccesses(ArrayRef<uint64_t> Record) {
auto ReadRange = [&]() {
APInt Lower(FunctionSummary::ParamAccess::RangeWidth,
BitcodeReader::decodeSignRotatedValue(Record.front()));
@@ -5848,7 +5960,7 @@ parseParamAccesses(ArrayRef<uint64_t> Record) {
for (auto &Call : ParamAccess.Calls) {
Call.ParamNo = Record.front();
Record = Record.drop_front();
- Call.Callee = Record.front();
+ Call.Callee = getValueInfoFromValueId(Record.front()).first;
Record = Record.drop_front();
Call.Offsets = ReadRange();
}
@@ -6245,8 +6357,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
}
case bitc::FS_TYPE_TESTS:
assert(PendingTypeTests.empty());
- PendingTypeTests.insert(PendingTypeTests.end(), Record.begin(),
- Record.end());
+ llvm::append_range(PendingTypeTests, Record);
break;
case bitc::FS_TYPE_TEST_ASSUME_VCALLS:
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index a8bf579bd180..8cdda62870ff 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -63,7 +62,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -75,7 +73,6 @@
#include <deque>
#include <limits>
#include <map>
-#include <memory>
#include <string>
#include <system_error>
#include <tuple>
@@ -368,7 +365,7 @@ public:
~PlaceholderQueue() {
assert(empty() && "PlaceholderQueue hasn't been flushed before being destroyed");
}
- bool empty() { return PHs.empty(); }
+ bool empty() const { return PHs.empty(); }
DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID);
void flush(BitcodeReaderMetadataList &MetadataList);
@@ -441,6 +438,20 @@ class MetadataLoader::MetadataLoaderImpl {
/// Index that keeps track of where to find a metadata record in the stream.
std::vector<uint64_t> GlobalMetadataBitPosIndex;
+ /// Cursor position of the start of the global decl attachments, to enable
+ /// loading using the index built for lazy loading, instead of forward
+ /// references.
+ uint64_t GlobalDeclAttachmentPos = 0;
+
+#ifndef NDEBUG
+ /// Sanity check that we end up parsing all of the global decl attachments.
+ unsigned NumGlobalDeclAttachSkipped = 0;
+ unsigned NumGlobalDeclAttachParsed = 0;
+#endif
+
+ /// Load the global decl attachments, using the index built for lazy loading.
+ Expected<bool> loadGlobalDeclAttachments();
+
/// Populate the index above to enable lazily loading of metadata, and load
/// the named metadata as well as the transitively referenced global
/// Metadata.
@@ -665,7 +676,7 @@ public:
return FunctionsWithSPs.lookup(F);
}
- bool hasSeenOldLoopTags() { return HasSeenOldLoopTags; }
+ bool hasSeenOldLoopTags() const { return HasSeenOldLoopTags; }
Error parseMetadataAttachment(
Function &F, const SmallVectorImpl<Instruction *> &InstructionList);
@@ -673,7 +684,7 @@ public:
Error parseMetadataKinds();
void setStripTBAA(bool Value) { StripTBAA = Value; }
- bool isStrippingTBAA() { return StripTBAA; }
+ bool isStrippingTBAA() const { return StripTBAA; }
unsigned size() const { return MetadataList.size(); }
void shrinkTo(unsigned N) { MetadataList.shrinkTo(N); }
@@ -684,8 +695,10 @@ Expected<bool>
MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
IndexCursor = Stream;
SmallVector<uint64_t, 64> Record;
+ GlobalDeclAttachmentPos = 0;
// Get the abbrevs, and preload record positions to make them lazy-loadable.
while (true) {
+ uint64_t SavedPos = IndexCursor.GetCurrentBitNo();
Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks(
BitstreamCursor::AF_DontPopBlockAtEnd);
if (!MaybeEntry)
@@ -820,25 +833,11 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
break;
}
case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
- // FIXME: we need to do this early because we don't materialize global
- // value explicitly.
- if (Error Err = IndexCursor.JumpToBit(CurrentPos))
- return std::move(Err);
- Record.clear();
- if (Expected<unsigned> MaybeRecord =
- IndexCursor.readRecord(Entry.ID, Record))
- ;
- else
- return MaybeRecord.takeError();
- if (Record.size() % 2 == 0)
- return error("Invalid record");
- unsigned ValueID = Record[0];
- if (ValueID >= ValueList.size())
- return error("Invalid record");
- if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID]))
- if (Error Err = parseGlobalObjectAttachment(
- *GO, ArrayRef<uint64_t>(Record).slice(1)))
- return std::move(Err);
+ if (!GlobalDeclAttachmentPos)
+ GlobalDeclAttachmentPos = SavedPos;
+#ifndef NDEBUG
+ NumGlobalDeclAttachSkipped++;
+#endif
break;
}
case bitc::METADATA_KIND:
@@ -853,6 +852,7 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
case bitc::METADATA_SUBRANGE:
case bitc::METADATA_ENUMERATOR:
case bitc::METADATA_BASIC_TYPE:
+ case bitc::METADATA_STRING_TYPE:
case bitc::METADATA_DERIVED_TYPE:
case bitc::METADATA_COMPOSITE_TYPE:
case bitc::METADATA_SUBROUTINE_TYPE:
@@ -875,6 +875,7 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
case bitc::METADATA_OBJC_PROPERTY:
case bitc::METADATA_IMPORTED_ENTITY:
case bitc::METADATA_GLOBAL_VAR_EXPR:
+ case bitc::METADATA_GENERIC_SUBRANGE:
// We don't expect to see any of these, if we see one, give up on
// lazy-loading and fallback.
MDStringRef.clear();
@@ -887,6 +888,83 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
}
}
+// Load the global decl attachments after building the lazy loading index.
+// We don't load them "lazily" - all global decl attachments must be
+// parsed since they aren't materialized on demand. However, by delaying
+// their parsing until after the index is created, we can use the index
+// instead of creating temporaries.
+Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() {
+ // Nothing to do if we didn't find any of these metadata records.
+ if (!GlobalDeclAttachmentPos)
+ return true;
+ // Use a temporary cursor so that we don't mess up the main Stream cursor or
+ // the lazy loading IndexCursor (which holds the necessary abbrev ids).
+ BitstreamCursor TempCursor = Stream;
+ SmallVector<uint64_t, 64> Record;
+ // Jump to the position before the first global decl attachment, so we can
+ // scan for the first BitstreamEntry record.
+ if (Error Err = TempCursor.JumpToBit(GlobalDeclAttachmentPos))
+ return std::move(Err);
+ while (true) {
+ Expected<BitstreamEntry> MaybeEntry = TempCursor.advanceSkippingSubblocks(
+ BitstreamCursor::AF_DontPopBlockAtEnd);
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ // Sanity check that we parsed them all.
+ assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed);
+ return true;
+ case BitstreamEntry::Record:
+ break;
+ }
+ uint64_t CurrentPos = TempCursor.GetCurrentBitNo();
+ Expected<unsigned> MaybeCode = TempCursor.skipRecord(Entry.ID);
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ if (MaybeCode.get() != bitc::METADATA_GLOBAL_DECL_ATTACHMENT) {
+ // Anything other than a global decl attachment signals the end of
+ // these records. sanity check that we parsed them all.
+ assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed);
+ return true;
+ }
+#ifndef NDEBUG
+ NumGlobalDeclAttachParsed++;
+#endif
+ // FIXME: we need to do this early because we don't materialize global
+ // value explicitly.
+ if (Error Err = TempCursor.JumpToBit(CurrentPos))
+ return std::move(Err);
+ Record.clear();
+ if (Expected<unsigned> MaybeRecord =
+ TempCursor.readRecord(Entry.ID, Record))
+ ;
+ else
+ return MaybeRecord.takeError();
+ if (Record.size() % 2 == 0)
+ return error("Invalid record");
+ unsigned ValueID = Record[0];
+ if (ValueID >= ValueList.size())
+ return error("Invalid record");
+ if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID])) {
+ // Need to save and restore the current position since
+ // parseGlobalObjectAttachment will resolve all forward references which
+ // would require parsing from locations stored in the index.
+ CurrentPos = TempCursor.GetCurrentBitNo();
+ if (Error Err = parseGlobalObjectAttachment(
+ *GO, ArrayRef<uint64_t>(Record).slice(1)))
+ return std::move(Err);
+ if (Error Err = TempCursor.JumpToBit(CurrentPos))
+ return std::move(Err);
+ }
+ }
+}
+
/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
/// module level metadata.
Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
@@ -916,6 +994,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
MetadataList.resize(MDStringRef.size() +
GlobalMetadataBitPosIndex.size());
+ // Now that we have built the index, load the global decl attachments
+ // that were deferred during that process. This avoids creating
+ // temporaries.
+ SuccessOrErr = loadGlobalDeclAttachments();
+ if (!SuccessOrErr)
+ return SuccessOrErr.takeError();
+ assert(SuccessOrErr.get());
+
// Reading the named metadata created forward references and/or
// placeholders, that we flush here.
resolveForwardRefsAndPlaceholders(Placeholders);
@@ -1286,6 +1372,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
NextMetadataNo++;
break;
}
+ case bitc::METADATA_GENERIC_SUBRANGE: {
+ Metadata *Val = nullptr;
+ Val = GET_OR_DISTINCT(DIGenericSubrange,
+ (Context, getMDOrNull(Record[1]),
+ getMDOrNull(Record[2]), getMDOrNull(Record[3]),
+ getMDOrNull(Record[4])));
+
+ MetadataList.assignValue(Val, NextMetadataNo);
+ IsDistinct = Record[0] & 1;
+ NextMetadataNo++;
+ break;
+ }
case bitc::METADATA_ENUMERATOR: {
if (Record.size() < 3)
return error("Invalid record");
@@ -1325,6 +1423,20 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
NextMetadataNo++;
break;
}
+ case bitc::METADATA_STRING_TYPE: {
+ if (Record.size() != 8)
+ return error("Invalid record");
+
+ IsDistinct = Record[0];
+ MetadataList.assignValue(
+ GET_OR_DISTINCT(DIStringType,
+ (Context, Record[1], getMDString(Record[2]),
+ getMDOrNull(Record[3]), getMDOrNull(Record[4]),
+ Record[5], Record[6], Record[7])),
+ NextMetadataNo);
+ NextMetadataNo++;
+ break;
+ }
case bitc::METADATA_DERIVED_TYPE: {
if (Record.size() < 12 || Record.size() > 13)
return error("Invalid record");
@@ -1350,7 +1462,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_COMPOSITE_TYPE: {
- if (Record.size() < 16 || Record.size() > 18)
+ if (Record.size() < 16 || Record.size() > 21)
return error("Invalid record");
// If we have a UUID and this is not a forward declaration, lookup the
@@ -1375,6 +1487,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Metadata *TemplateParams = nullptr;
Metadata *Discriminator = nullptr;
Metadata *DataLocation = nullptr;
+ Metadata *Associated = nullptr;
+ Metadata *Allocated = nullptr;
+ Metadata *Rank = nullptr;
auto *Identifier = getMDString(Record[15]);
// If this module is being parsed so that it can be ThinLTO imported
// into another module, composite types only need to be imported
@@ -1399,13 +1514,21 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Discriminator = getMDOrNull(Record[16]);
if (Record.size() > 17)
DataLocation = getMDOrNull(Record[17]);
+ if (Record.size() > 19) {
+ Associated = getMDOrNull(Record[18]);
+ Allocated = getMDOrNull(Record[19]);
+ }
+ if (Record.size() > 20) {
+ Rank = getMDOrNull(Record[20]);
+ }
}
DICompositeType *CT = nullptr;
if (Identifier)
CT = DICompositeType::buildODRType(
Context, *Identifier, Tag, Name, File, Line, Scope, BaseType,
SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
- VTableHolder, TemplateParams, Discriminator, DataLocation);
+ VTableHolder, TemplateParams, Discriminator, DataLocation, Associated,
+ Allocated, Rank);
// Create a node if we didn't get a lazy ODR type.
if (!CT)
@@ -1413,7 +1536,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, Tag, Name, File, Line, Scope, BaseType,
SizeInBits, AlignInBits, OffsetInBits, Flags,
Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier, Discriminator, DataLocation));
+ Identifier, Discriminator, DataLocation, Associated,
+ Allocated, Rank));
if (!IsNotUsedInTypeRef && Identifier)
MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
@@ -1441,19 +1565,20 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
}
case bitc::METADATA_MODULE: {
- if (Record.size() < 5 || Record.size() > 8)
+ if (Record.size() < 5 || Record.size() > 9)
return error("Invalid record");
- unsigned Offset = Record.size() >= 7 ? 2 : 1;
+ unsigned Offset = Record.size() >= 8 ? 2 : 1;
IsDistinct = Record[0];
MetadataList.assignValue(
GET_OR_DISTINCT(
DIModule,
- (Context, Record.size() >= 7 ? getMDOrNull(Record[1]) : nullptr,
+ (Context, Record.size() >= 8 ? getMDOrNull(Record[1]) : nullptr,
getMDOrNull(Record[0 + Offset]), getMDString(Record[1 + Offset]),
getMDString(Record[2 + Offset]), getMDString(Record[3 + Offset]),
getMDString(Record[4 + Offset]),
- Record.size() <= 7 ? 0 : Record[7])),
+ Record.size() <= 7 ? 0 : Record[7],
+ Record.size() <= 8 ? false : Record[8])),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -2001,7 +2126,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseGlobalObjectAttachment(
auto K = MDKindMap.find(Record[I]);
if (K == MDKindMap.end())
return error("Invalid ID");
- MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[I + 1]);
+ MDNode *MD =
+ dyn_cast_or_null<MDNode>(getMetadataFwdRefOrLoad(Record[I + 1]));
if (!MD)
return error("Invalid metadata attachment: expect fwd ref to MDNode");
GO.addMetadata(K->second, *MD);
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp
index 63a206eeb022..ddfa28c6b1e4 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp
@@ -16,14 +16,11 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
-#include <cassert>
#include <cstddef>
#include <limits>
-#include <utility>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9c15a5f9f193..37ecb9992e44 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitcodeCommon.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitstream/BitCodes.h"
@@ -85,6 +86,9 @@ static cl::opt<unsigned>
IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
cl::desc("Number of metadatas above which we emit an index "
"to enable lazy-loading"));
+static cl::opt<uint32_t> FlushThreshold(
+ "bitcode-flush-threshold", cl::Hidden, cl::init(512),
+ cl::desc("The threshold (unit M) for flushing LLVM bitcode."));
static cl::opt<bool> WriteRelBFToSummary(
"write-relbf-to-summary", cl::Hidden, cl::init(false),
@@ -296,10 +300,15 @@ private:
SmallVectorImpl<uint64_t> &Record, unsigned &Abbrev);
void writeDISubrange(const DISubrange *N, SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev);
+ void writeDIGenericSubrange(const DIGenericSubrange *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
void writeDIEnumerator(const DIEnumerator *N,
SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
void writeDIBasicType(const DIBasicType *N, SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev);
+ void writeDIStringType(const DIStringType *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
void writeDIDerivedType(const DIDerivedType *N,
SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
void writeDICompositeType(const DICompositeType *N,
@@ -394,6 +403,8 @@ private:
unsigned getEncodedSyncScopeID(SyncScope::ID SSID) {
return unsigned(SSID);
}
+
+ unsigned getEncodedAlign(MaybeAlign Alignment) { return encode(Alignment); }
};
/// Class to manage the bitcode writing for a combined index.
@@ -615,6 +626,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_IN_ALLOCA;
case Attribute::Cold:
return bitc::ATTR_KIND_COLD;
+ case Attribute::Hot:
+ return bitc::ATTR_KIND_HOT;
case Attribute::InaccessibleMemOnly:
return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY;
case Attribute::InaccessibleMemOrArgMemOnly:
@@ -635,6 +648,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NO_ALIAS;
case Attribute::NoBuiltin:
return bitc::ATTR_KIND_NO_BUILTIN;
+ case Attribute::NoCallback:
+ return bitc::ATTR_KIND_NO_CALLBACK;
case Attribute::NoCapture:
return bitc::ATTR_KIND_NO_CAPTURE;
case Attribute::NoDuplicate:
@@ -665,6 +680,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NOSYNC;
case Attribute::NoCfCheck:
return bitc::ATTR_KIND_NOCF_CHECK;
+ case Attribute::NoProfile:
+ return bitc::ATTR_KIND_NO_PROFILE;
case Attribute::NoUnwind:
return bitc::ATTR_KIND_NO_UNWIND;
case Attribute::NullPointerIsValid:
@@ -733,6 +750,10 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_PREALLOCATED;
case Attribute::NoUndef:
return bitc::ATTR_KIND_NOUNDEF;
+ case Attribute::ByRef:
+ return bitc::ATTR_KIND_BYREF;
+ case Attribute::MustProgress:
+ return bitc::ATTR_KIND_MUSTPROGRESS;
case Attribute::EndAttrKinds:
llvm_unreachable("Can not encode end-attribute kinds marker.");
case Attribute::None:
@@ -894,6 +915,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break;
case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
+ case Type::X86_AMXTyID: Code = bitc::TYPE_CODE_X86_AMX; break;
case Type::TokenTyID: Code = bitc::TYPE_CODE_TOKEN; break;
case Type::IntegerTyID:
// INTEGER: [width]
@@ -963,7 +985,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
// VECTOR [numelts, eltty] or
// [numelts, eltty, scalable]
Code = bitc::TYPE_CODE_VECTOR;
- TypeVals.push_back(VT->getElementCount().Min);
+ TypeVals.push_back(VT->getElementCount().getKnownMinValue());
TypeVals.push_back(VE.getTypeID(VT->getElementType()));
if (isa<ScalableVectorType>(VT))
TypeVals.push_back(true);
@@ -1179,10 +1201,14 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// compute the maximum alignment value.
std::map<std::string, unsigned> SectionMap;
std::map<std::string, unsigned> GCMap;
- unsigned MaxAlignment = 0;
+ MaybeAlign MaxAlignment;
unsigned MaxGlobalType = 0;
+ const auto UpdateMaxAlignment = [&MaxAlignment](const MaybeAlign A) {
+ if (A)
+ MaxAlignment = !MaxAlignment ? *A : std::max(*MaxAlignment, *A);
+ };
for (const GlobalVariable &GV : M.globals()) {
- MaxAlignment = std::max(MaxAlignment, GV.getAlignment());
+ UpdateMaxAlignment(GV.getAlign());
MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV.getValueType()));
if (GV.hasSection()) {
// Give section names unique ID's.
@@ -1195,7 +1221,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
}
}
for (const Function &F : M) {
- MaxAlignment = std::max(MaxAlignment, F.getAlignment());
+ UpdateMaxAlignment(F.getAlign());
if (F.hasSection()) {
// Give section names unique ID's.
unsigned &Entry = SectionMap[std::string(F.getSection())];
@@ -1231,10 +1257,10 @@ void ModuleBitcodeWriter::writeModuleInfo() {
//| constant
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Initializer.
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // Linkage.
- if (MaxAlignment == 0) // Alignment.
+ if (!MaxAlignment) // Alignment.
Abbv->Add(BitCodeAbbrevOp(0));
else {
- unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1;
+ unsigned MaxEncAlignment = getEncodedAlign(MaxAlignment);
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
Log2_32_Ceil(MaxEncAlignment+1)));
}
@@ -1287,7 +1313,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back(GV.isDeclaration() ? 0 :
(VE.getValueID(GV.getInitializer()) + 1));
Vals.push_back(getEncodedLinkage(GV));
- Vals.push_back(Log2_32(GV.getAlignment())+1);
+ Vals.push_back(getEncodedAlign(GV.getAlign()));
Vals.push_back(GV.hasSection() ? SectionMap[std::string(GV.getSection())]
: 0);
if (GV.isThreadLocal() ||
@@ -1333,7 +1359,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back(F.isDeclaration());
Vals.push_back(getEncodedLinkage(F));
Vals.push_back(VE.getAttributeListID(F.getAttributes()));
- Vals.push_back(Log2_32(F.getAlignment())+1);
+ Vals.push_back(getEncodedAlign(F.getAlign()));
Vals.push_back(F.hasSection() ? SectionMap[std::string(F.getSection())]
: 0);
Vals.push_back(getEncodedVisibility(F));
@@ -1535,6 +1561,19 @@ void ModuleBitcodeWriter::writeDISubrange(const DISubrange *N,
Record.clear();
}
+void ModuleBitcodeWriter::writeDIGenericSubrange(
+ const DIGenericSubrange *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back((uint64_t)N->isDistinct());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawCountNode()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawLowerBound()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawUpperBound()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawStride()));
+
+ Stream.EmitRecord(bitc::METADATA_GENERIC_SUBRANGE, Record, Abbrev);
+ Record.clear();
+}
+
static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
if ((int64_t)V >= 0)
Vals.push_back(V << 1);
@@ -1581,6 +1620,22 @@ void ModuleBitcodeWriter::writeDIBasicType(const DIBasicType *N,
Record.clear();
}
+void ModuleBitcodeWriter::writeDIStringType(const DIStringType *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getTag());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getStringLength()));
+ Record.push_back(VE.getMetadataOrNullID(N->getStringLengthExp()));
+ Record.push_back(N->getSizeInBits());
+ Record.push_back(N->getAlignInBits());
+ Record.push_back(N->getEncoding());
+
+ Stream.EmitRecord(bitc::METADATA_STRING_TYPE, Record, Abbrev);
+ Record.clear();
+}
+
void ModuleBitcodeWriter::writeDIDerivedType(const DIDerivedType *N,
SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev) {
@@ -1630,6 +1685,9 @@ void ModuleBitcodeWriter::writeDICompositeType(
Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier()));
Record.push_back(VE.getMetadataOrNullID(N->getDiscriminator()));
Record.push_back(VE.getMetadataOrNullID(N->getRawDataLocation()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawAssociated()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawAllocated()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawRank()));
Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev);
Record.clear();
@@ -1813,6 +1871,7 @@ void ModuleBitcodeWriter::writeDIModule(const DIModule *N,
for (auto &I : N->operands())
Record.push_back(VE.getMetadataOrNullID(I));
Record.push_back(N->getLineNo());
+ Record.push_back(N->getIsDecl());
Stream.EmitRecord(bitc::METADATA_MODULE, Record, Abbrev);
Record.clear();
@@ -2374,6 +2433,8 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
unsigned AbbrevToUse = 0;
if (C->isNullValue()) {
Code = bitc::CST_CODE_NULL;
+ } else if (isa<PoisonValue>(C)) {
+ Code = bitc::CST_CODE_POISON;
} else if (isa<UndefValue>(C)) {
Code = bitc::CST_CODE_UNDEF;
} else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
@@ -2941,14 +3002,13 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.push_back(VE.getTypeID(AI.getAllocatedType()));
Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
- unsigned AlignRecord = Log2_32(AI.getAlignment()) + 1;
- assert(Log2_32(Value::MaximumAlignment) + 1 < 1 << 5 &&
- "not enough bits for maximum alignment");
- assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64");
- AlignRecord |= AI.isUsedWithInAlloca() << 5;
- AlignRecord |= 1 << 6;
- AlignRecord |= AI.isSwiftError() << 7;
- Vals.push_back(AlignRecord);
+ using APV = AllocaPackedValues;
+ unsigned Record = 0;
+ Bitfield::set<APV::Align>(Record, getEncodedAlign(AI.getAlign()));
+ Bitfield::set<APV::UsedWithInAlloca>(Record, AI.isUsedWithInAlloca());
+ Bitfield::set<APV::ExplicitType>(Record, true);
+ Bitfield::set<APV::SwiftError>(Record, AI.isSwiftError());
+ Vals.push_back(Record);
break;
}
@@ -2962,7 +3022,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
}
Vals.push_back(VE.getTypeID(I.getType()));
- Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
+ Vals.push_back(getEncodedAlign(cast<LoadInst>(I).getAlign()));
Vals.push_back(cast<LoadInst>(I).isVolatile());
if (cast<LoadInst>(I).isAtomic()) {
Vals.push_back(getEncodedOrdering(cast<LoadInst>(I).getOrdering()));
@@ -2976,7 +3036,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Code = bitc::FUNC_CODE_INST_STORE;
pushValueAndType(I.getOperand(1), InstID, Vals); // ptrty + ptr
pushValueAndType(I.getOperand(0), InstID, Vals); // valty + val
- Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
+ Vals.push_back(getEncodedAlign(cast<StoreInst>(I).getAlign()));
Vals.push_back(cast<StoreInst>(I).isVolatile());
if (cast<StoreInst>(I).isAtomic()) {
Vals.push_back(getEncodedOrdering(cast<StoreInst>(I).getOrdering()));
@@ -3539,8 +3599,10 @@ void IndexBitcodeWriter::writeModStrings() {
/// Write the function type metadata related records that need to appear before
/// a function summary entry (whether per-module or combined).
+template <typename Fn>
static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
- FunctionSummary *FS) {
+ FunctionSummary *FS,
+ Fn GetValueID) {
if (!FS->type_tests().empty())
Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
@@ -3569,7 +3631,7 @@ static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
Record.clear();
Record.push_back(VC.VFunc.GUID);
Record.push_back(VC.VFunc.Offset);
- Record.insert(Record.end(), VC.Args.begin(), VC.Args.end());
+ llvm::append_range(Record, VC.Args);
Stream.EmitRecord(Ty, Record);
}
};
@@ -3590,16 +3652,25 @@ static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
if (!FS->paramAccesses().empty()) {
Record.clear();
for (auto &Arg : FS->paramAccesses()) {
+ size_t UndoSize = Record.size();
Record.push_back(Arg.ParamNo);
WriteRange(Arg.Use);
Record.push_back(Arg.Calls.size());
for (auto &Call : Arg.Calls) {
Record.push_back(Call.ParamNo);
- Record.push_back(Call.Callee);
+ Optional<unsigned> ValueID = GetValueID(Call.Callee);
+ if (!ValueID) {
+ // If ValueID is unknown we can't drop just this call, we must drop
+ // entire parameter.
+ Record.resize(UndoSize);
+ break;
+ }
+ Record.push_back(*ValueID);
WriteRange(Call.Offsets);
}
}
- Stream.EmitRecord(bitc::FS_PARAM_ACCESS, Record);
+ if (!Record.empty())
+ Stream.EmitRecord(bitc::FS_PARAM_ACCESS, Record);
}
}
@@ -3634,7 +3705,7 @@ static void writeWholeProgramDevirtResolutionByArg(
SmallVector<uint64_t, 64> &NameVals, const std::vector<uint64_t> &args,
const WholeProgramDevirtResolution::ByArg &ByArg) {
NameVals.push_back(args.size());
- NameVals.insert(NameVals.end(), args.begin(), args.end());
+ llvm::append_range(NameVals, args);
NameVals.push_back(ByArg.TheKind);
NameVals.push_back(ByArg.Info);
@@ -3696,7 +3767,11 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
- writeFunctionTypeMetadataRecords(Stream, FS);
+
+ writeFunctionTypeMetadataRecords(
+ Stream, FS, [&](const ValueInfo &VI) -> Optional<unsigned> {
+ return {VE.getValueID(VI.getValue())};
+ });
auto SpecialRefCnts = FS->specialRefCounts();
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
@@ -3757,7 +3832,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
NameVals.push_back(VE.getValueID(RI.getValue()));
// Sort the refs for determinism output, the vector returned by FS->refs() has
// been initialized from a DenseSet.
- llvm::sort(NameVals.begin() + SizeBeforeRefs, NameVals.end());
+ llvm::sort(drop_begin(NameVals, SizeBeforeRefs));
if (VTableFuncs.empty())
Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals,
@@ -4073,8 +4148,38 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
return;
}
+ auto GetValueId = [&](const ValueInfo &VI) -> Optional<unsigned> {
+ GlobalValue::GUID GUID = VI.getGUID();
+ Optional<unsigned> CallValueId = getValueId(GUID);
+ if (CallValueId)
+ return CallValueId;
+ // For SamplePGO, the indirect call targets for local functions will
+ // have its original name annotated in profile. We try to find the
+ // corresponding PGOFuncName as the GUID.
+ GUID = Index.getGUIDFromOriginalID(GUID);
+ if (!GUID)
+ return None;
+ CallValueId = getValueId(GUID);
+ if (!CallValueId)
+ return None;
+ // The mapping from OriginalId to GUID may return a GUID
+ // that corresponds to a static variable. Filter it out here.
+ // This can happen when
+ // 1) There is a call to a library function which does not have
+ // a CallValidId;
+ // 2) There is a static variable with the OriginalGUID identical
+ // to the GUID of the library function in 1);
+ // When this happens, the logic for SamplePGO kicks in and
+ // the static variable in 2) will be found, which needs to be
+ // filtered out.
+ auto *GVSum = Index.getGlobalValueSummary(GUID, false);
+ if (GVSum && GVSum->getSummaryKind() == GlobalValueSummary::GlobalVarKind)
+ return None;
+ return CallValueId;
+ };
+
auto *FS = cast<FunctionSummary>(S);
- writeFunctionTypeMetadataRecords(Stream, FS);
+ writeFunctionTypeMetadataRecords(Stream, FS, GetValueId);
getReferencedTypeIds(FS, ReferencedTypeIds);
NameVals.push_back(*ValueId);
@@ -4116,33 +4221,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
for (auto &EI : FS->calls()) {
// If this GUID doesn't have a value id, it doesn't have a function
// summary and we don't need to record any calls to it.
- GlobalValue::GUID GUID = EI.first.getGUID();
- auto CallValueId = getValueId(GUID);
- if (!CallValueId) {
- // For SamplePGO, the indirect call targets for local functions will
- // have its original name annotated in profile. We try to find the
- // corresponding PGOFuncName as the GUID.
- GUID = Index.getGUIDFromOriginalID(GUID);
- if (GUID == 0)
- continue;
- CallValueId = getValueId(GUID);
- if (!CallValueId)
- continue;
- // The mapping from OriginalId to GUID may return a GUID
- // that corresponds to a static variable. Filter it out here.
- // This can happen when
- // 1) There is a call to a library function which does not have
- // a CallValidId;
- // 2) There is a static variable with the OriginalGUID identical
- // to the GUID of the library function in 1);
- // When this happens, the logic for SamplePGO kicks in and
- // the static variable in 2) will be found, which needs to be
- // filtered out.
- auto *GVSum = Index.getGlobalValueSummary(GUID, false);
- if (GVSum &&
- GVSum->getSummaryKind() == GlobalValueSummary::GlobalVarKind)
- continue;
- }
+ Optional<unsigned> CallValueId = GetValueId(EI.first);
+ if (!CallValueId)
+ continue;
NameVals.push_back(*CallValueId);
if (HasProfileData)
NameVals.push_back(static_cast<uint8_t>(EI.second.Hotness));
@@ -4404,8 +4485,8 @@ static void writeBitcodeHeader(BitstreamWriter &Stream) {
Stream.Emit(0xD, 4);
}
-BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
- : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) {
+BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer, raw_fd_stream *FS)
+ : Buffer(Buffer), Stream(new BitstreamWriter(Buffer, FS, FlushThreshold)) {
writeBitcodeHeader(*Stream);
}
@@ -4516,7 +4597,7 @@ void llvm::WriteBitcodeToFile(const Module &M, raw_ostream &Out,
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
- BitcodeWriter Writer(Buffer);
+ BitcodeWriter Writer(Buffer, dyn_cast<raw_fd_stream>(&Out));
Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash,
ModHash);
Writer.writeSymtab();
@@ -4526,7 +4607,8 @@ void llvm::WriteBitcodeToFile(const Module &M, raw_ostream &Out,
emitDarwinBCHeaderAndTrailer(Buffer, TT);
// Write the generated bitstream to "Out".
- Out.write((char*)&Buffer.front(), Buffer.size());
+ if (!Buffer.empty())
+ Out.write((char *)&Buffer.front(), Buffer.size());
}
void IndexBitcodeWriter::write() {
@@ -4730,6 +4812,9 @@ static const char *getSectionNameForBitcode(const Triple &T) {
case Triple::Wasm:
case Triple::UnknownObjectFormat:
return ".llvmbc";
+ case Triple::GOFF:
+ llvm_unreachable("GOFF is not yet implemented");
+ break;
case Triple::XCOFF:
llvm_unreachable("XCOFF is not yet implemented");
break;
@@ -4746,6 +4831,9 @@ static const char *getSectionNameForCommandline(const Triple &T) {
case Triple::Wasm:
case Triple::UnknownObjectFormat:
return ".llvmcmd";
+ case Triple::GOFF:
+ llvm_unreachable("GOFF is not yet implemented");
+ break;
case Triple::XCOFF:
llvm_unreachable("XCOFF is not yet implemented");
break;
@@ -4754,8 +4842,8 @@ static const char *getSectionNameForCommandline(const Triple &T) {
}
void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
- bool EmbedBitcode, bool EmbedMarker,
- const std::vector<uint8_t> *CmdArgs) {
+ bool EmbedBitcode, bool EmbedCmdline,
+ const std::vector<uint8_t> &CmdArgs) {
// Save llvm.compiler.used and remove it.
SmallVector<Constant *, 2> UsedArray;
SmallPtrSet<GlobalValue *, 4> UsedGlobals;
@@ -4774,11 +4862,10 @@ void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
std::string Data;
ArrayRef<uint8_t> ModuleData;
Triple T(M.getTargetTriple());
- // Create a constant that contains the bitcode.
- // In case of embedding a marker, ignore the input Buf and use the empty
- // ArrayRef. It is also legal to create a bitcode marker even Buf is empty.
+
if (EmbedBitcode) {
- if (!isBitcode((const unsigned char *)Buf.getBufferStart(),
+ if (Buf.getBufferSize() == 0 ||
+ !isBitcode((const unsigned char *)Buf.getBufferStart(),
(const unsigned char *)Buf.getBufferEnd())) {
// If the input is LLVM Assembly, bitcode is produced by serializing
// the module. Use-lists order need to be preserved in this case.
@@ -4797,6 +4884,9 @@ void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage,
ModuleConstant);
GV->setSection(getSectionNameForBitcode(T));
+ // Set alignment to 1 to prevent padding between two contributions from input
+ // sections after linking.
+ GV->setAlignment(Align(1));
UsedArray.push_back(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
if (llvm::GlobalVariable *Old =
@@ -4810,16 +4900,17 @@ void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
}
// Skip if only bitcode needs to be embedded.
- if (EmbedMarker) {
+ if (EmbedCmdline) {
// Embed command-line options.
- ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CmdArgs->data()),
- CmdArgs->size());
+ ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CmdArgs.data()),
+ CmdArgs.size());
llvm::Constant *CmdConstant =
llvm::ConstantDataArray::get(M.getContext(), CmdData);
GV = new llvm::GlobalVariable(M, CmdConstant->getType(), true,
llvm::GlobalValue::PrivateLinkage,
CmdConstant);
GV->setSection(getSectionNameForCommandline(T));
+ GV->setAlignment(Align(1));
UsedArray.push_back(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) {
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 8bdddc27e95a..bbee8b324954 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -11,11 +11,9 @@
//===----------------------------------------------------------------------===//
#include "ValueEnumerator.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -32,7 +30,6 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
-#include "llvm/IR/UseListOrder.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueSymbolTable.h"
@@ -42,12 +39,9 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <cassert>
#include <cstddef>
#include <iterator>
#include <tuple>
-#include <utility>
-#include <vector>
using namespace llvm;
@@ -84,6 +78,16 @@ struct OrderMap {
} // end anonymous namespace
+/// Look for a value that might be wrapped as metadata, e.g. a value in a
+/// metadata operand. Returns nullptr for a non-wrapped input value if
+/// OnlyWrapped is true, or it returns the input value as-is if false.
+static const Value *skipMetadataWrapper(const Value *V, bool OnlyWrapped) {
+ if (const auto *MAV = dyn_cast<MetadataAsValue>(V))
+ if (const auto *VAM = dyn_cast<ValueAsMetadata>(MAV->getMetadata()))
+ return VAM->getValue();
+ return OnlyWrapped ? nullptr : V;
+}
+
static void orderValue(const Value *V, OrderMap &OM) {
if (OM.lookup(V).first)
return;
@@ -129,6 +133,25 @@ static OrderMap orderModule(const Module &M) {
if (!isa<GlobalValue>(U.get()))
orderValue(U.get(), OM);
}
+
+ // As constants used in metadata operands are emitted as module-level
+ // constants, we must order them before other operands. Also, we must order
+ // these before global values, as these will be read before setting the
+ // global values' initializers. The latter matters for constants which have
+ // uses towards other constants that are used as initializers.
+ for (const Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ for (const Value *V : I.operands()) {
+ if (const Value *Op = skipMetadataWrapper(V, true)) {
+ if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
+ isa<InlineAsm>(*Op))
+ orderValue(Op, OM);
+ }
+ }
+ }
OM.LastGlobalConstantID = OM.size();
// Initializers of GlobalValues are processed in
@@ -979,6 +1002,8 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
EnumerateValue(&I);
if (I.hasAttribute(Attribute::ByVal))
EnumerateType(I.getParamByValType());
+ else if (I.hasAttribute(Attribute::StructRet))
+ EnumerateType(I.getParamStructRetType());
}
FirstFuncConstantID = Values.size();
diff --git a/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp b/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp
index 2739137c1e44..28adfe6268f9 100644
--- a/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp
@@ -27,8 +27,7 @@ Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
if (BlockInfo) {
if (const BitstreamBlockInfo::BlockInfo *Info =
BlockInfo->getBlockInfo(BlockID)) {
- CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
- Info->Abbrevs.end());
+ llvm::append_range(CurAbbrevs, Info->Abbrevs);
}
}
@@ -156,8 +155,9 @@ Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
report_fatal_error("Array element type can't be an Array or a Blob");
case BitCodeAbbrevOp::Fixed:
assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
- if (Error Err = JumpToBit(GetCurrentBitNo() +
- NumElts * EltEnc.getEncodingData()))
+ if (Error Err =
+ JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
+ EltEnc.getEncodingData()))
return std::move(Err);
break;
case BitCodeAbbrevOp::VBR:
@@ -186,7 +186,7 @@ Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
SkipToFourByteBoundary(); // 32-bit alignment
// Figure out where the end of this blob will be including tail padding.
- size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
+ const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
// If this would read off the end of the bitcode file, just set the
// record to empty and return.
@@ -314,7 +314,7 @@ Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
// Figure out where the end of this blob will be including tail padding.
size_t CurBitPos = GetCurrentBitNo();
- size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
+ const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
// If this would read off the end of the bitcode file, just set the
// record to empty and return.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
index c99800659bfd..2aef1234ac0e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -26,17 +26,15 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
// Compare VirtRegMap::getRegAllocPref().
-AllocationOrder::AllocationOrder(unsigned VirtReg,
- const VirtRegMap &VRM,
- const RegisterClassInfo &RegClassInfo,
- const LiveRegMatrix *Matrix)
- : Pos(0), HardHints(false) {
+AllocationOrder AllocationOrder::create(unsigned VirtReg, const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix) {
const MachineFunction &MF = VRM.getMachineFunction();
const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
- Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
- if (TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix))
- HardHints = true;
- rewind();
+ auto Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
+ SmallVector<MCPhysReg, 16> Hints;
+ bool HardHints =
+ TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix);
LLVM_DEBUG({
if (!Hints.empty()) {
@@ -51,4 +49,5 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
assert(is_contained(Order, Hints[I]) &&
"Target hint is outside allocation order.");
#endif
+ return AllocationOrder(std::move(Hints), Order, HardHints);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
index fa0690ab4ea5..0701e6810100 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
@@ -17,9 +17,9 @@
#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCRegister.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
namespace llvm {
@@ -28,67 +28,95 @@ class VirtRegMap;
class LiveRegMatrix;
class LLVM_LIBRARY_VISIBILITY AllocationOrder {
- SmallVector<MCPhysReg, 16> Hints;
+ const SmallVector<MCPhysReg, 16> Hints;
ArrayRef<MCPhysReg> Order;
- int Pos;
-
- // If HardHints is true, *only* Hints will be returned.
- bool HardHints;
+ // How far into the Order we can iterate. This is 0 if the AllocationOrder is
+ // constructed with HardHints = true, Order.size() otherwise. While
+ // technically a size_t, it will participate in comparisons with the
+ // Iterator's Pos, which must be signed, so it's typed here as signed, too, to
+ // avoid warnings and under the assumption that the size of Order is
+ // relatively small.
+ // IterationLimit defines an invalid iterator position.
+ const int IterationLimit;
public:
+ /// Forward iterator for an AllocationOrder.
+ class Iterator final {
+ const AllocationOrder &AO;
+ int Pos = 0;
+
+ public:
+ Iterator(const AllocationOrder &AO, int Pos) : AO(AO), Pos(Pos) {}
+
+ /// Return true if the curent position is that of a preferred register.
+ bool isHint() const { return Pos < 0; }
+
+ /// Return the next physical register in the allocation order.
+ MCRegister operator*() const {
+ if (Pos < 0)
+ return AO.Hints.end()[Pos];
+ assert(Pos < AO.IterationLimit);
+ return AO.Order[Pos];
+ }
+
+ /// Advance the iterator to the next position. If that's past the Hints
+ /// list, advance to the first value that's not also in the Hints list.
+ Iterator &operator++() {
+ if (Pos < AO.IterationLimit)
+ ++Pos;
+ while (Pos >= 0 && Pos < AO.IterationLimit && AO.isHint(AO.Order[Pos]))
+ ++Pos;
+ return *this;
+ }
+
+ bool operator==(const Iterator &Other) const {
+ assert(&AO == &Other.AO);
+ return Pos == Other.Pos;
+ }
+
+ bool operator!=(const Iterator &Other) const { return !(*this == Other); }
+ };
/// Create a new AllocationOrder for VirtReg.
/// @param VirtReg Virtual register to allocate for.
/// @param VRM Virtual register map for function.
/// @param RegClassInfo Information about reserved and allocatable registers.
- AllocationOrder(unsigned VirtReg,
- const VirtRegMap &VRM,
- const RegisterClassInfo &RegClassInfo,
- const LiveRegMatrix *Matrix);
-
- /// Get the allocation order without reordered hints.
- ArrayRef<MCPhysReg> getOrder() const { return Order; }
-
- /// Return the next physical register in the allocation order, or 0.
- /// It is safe to call next() again after it returned 0, it will keep
- /// returning 0 until rewind() is called.
- unsigned next(unsigned Limit = 0) {
- if (Pos < 0)
- return Hints.end()[Pos++];
- if (HardHints)
- return 0;
- if (!Limit)
- Limit = Order.size();
- while (Pos < int(Limit)) {
- unsigned Reg = Order[Pos++];
- if (!isHint(Reg))
- return Reg;
- }
- return 0;
+ static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix);
+
+ /// Create an AllocationOrder given the Hits, Order, and HardHits values.
+ /// Use the create method above - the ctor is for unittests.
+ AllocationOrder(SmallVector<MCPhysReg, 16> &&Hints, ArrayRef<MCPhysReg> Order,
+ bool HardHints)
+ : Hints(std::move(Hints)), Order(Order),
+ IterationLimit(HardHints ? 0 : static_cast<int>(Order.size())) {}
+
+ Iterator begin() const {
+ return Iterator(*this, -(static_cast<int>(Hints.size())));
}
- /// As next(), but allow duplicates to be returned, and stop before the
- /// Limit'th register in the RegisterClassInfo allocation order.
- ///
- /// This can produce more than Limit registers if there are hints.
- unsigned nextWithDups(unsigned Limit) {
- if (Pos < 0)
- return Hints.end()[Pos++];
- if (HardHints)
- return 0;
- if (Pos < int(Limit))
- return Order[Pos++];
- return 0;
- }
+ Iterator end() const { return Iterator(*this, IterationLimit); }
- /// Start over from the beginning.
- void rewind() { Pos = -int(Hints.size()); }
+ Iterator getOrderLimitEnd(unsigned OrderLimit) const {
+ assert(OrderLimit <= Order.size());
+ if (OrderLimit == 0)
+ return end();
+ Iterator Ret(*this,
+ std::min(static_cast<int>(OrderLimit) - 1, IterationLimit));
+ return ++Ret;
+ }
- /// Return true if the last register returned from next() was a preferred register.
- bool isHint() const { return Pos <= 0; }
+ /// Get the allocation order without reordered hints.
+ ArrayRef<MCPhysReg> getOrder() const { return Order; }
- /// Return true if PhysReg is a preferred register.
- bool isHint(unsigned PhysReg) const { return is_contained(Hints, PhysReg); }
+ /// Return true if Reg is a preferred physical register.
+ bool isHint(Register Reg) const {
+ assert(!Reg.isPhysical() ||
+ Reg.id() <
+ static_cast<uint32_t>(std::numeric_limits<MCPhysReg>::max()));
+ return Reg.isPhysical() && is_contained(Hints, Reg.id());
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index 7da28ffec85c..ebeff1fec30b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -88,19 +88,25 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = DL.getStructLayout(STy);
+ // If the Offsets aren't needed, don't query the struct layout. This allows
+ // us to support structs with scalable vectors for operations that don't
+ // need offsets.
+ const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
for (StructType::element_iterator EB = STy->element_begin(),
EI = EB,
EE = STy->element_end();
- EI != EE; ++EI)
+ EI != EE; ++EI) {
+ // Don't compute the element offset if we didn't get a StructLayout above.
+ uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0;
ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
- StartingOffset + SL->getElementOffset(EI - EB));
+ StartingOffset + EltOffset);
+ }
return;
}
// Given an array type, recursively traverse the elements.
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Type *EltTy = ATy->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
StartingOffset + i * EltSize);
@@ -131,16 +137,21 @@ void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(&Ty)) {
- const StructLayout *SL = DL.getStructLayout(STy);
- for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
+ // If the Offsets aren't needed, don't query the struct layout. This allows
+ // us to support structs with scalable vectors for operations that don't
+ // need offsets.
+ const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) {
+ uint64_t EltOffset = SL ? SL->getElementOffset(I) : 0;
computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
- StartingOffset + SL->getElementOffset(I));
+ StartingOffset + EltOffset);
+ }
return;
}
// Given an array type, recursively traverse the elements.
if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
Type *EltTy = ATy->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
StartingOffset + i * EltSize);
@@ -174,27 +185,6 @@ GlobalValue *llvm::ExtractTypeInfo(Value *V) {
return GV;
}
-/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
-/// processed uses a memory 'm' constraint.
-bool
-llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
- const TargetLowering &TLI) {
- for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
- InlineAsm::ConstraintInfo &CI = CInfos[i];
- for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
- TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
- if (CType == TargetLowering::C_Memory)
- return true;
- }
-
- // Indirect operand accesses access memory.
- if (CI.isIndirect)
- return true;
- }
-
- return false;
-}
-
/// getFCmpCondCode - Return the ISD condition code corresponding to
/// the given LLVM IR floating-point condition code. This includes
/// consideration of global floating-point math flags.
@@ -537,11 +527,15 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
// Debug info intrinsics do not get in the way of tail call optimization.
if (isa<DbgInfoIntrinsic>(BBI))
continue;
- // A lifetime end or assume intrinsic should not stop tail call
- // optimization.
+ // Pseudo probe intrinsics do not block tail call optimization either.
+ if (isa<PseudoProbeInst>(BBI))
+ continue;
+ // A lifetime end, assume or noalias.decl intrinsic should not stop tail
+ // call optimization.
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
- II->getIntrinsicID() == Intrinsic::assume)
+ II->getIntrinsicID() == Intrinsic::assume ||
+ II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl)
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
!isSafeToSpeculativelyExecute(&*BBI))
@@ -739,8 +733,7 @@ static void collectEHScopeMembers(
if (Visiting->isEHScopeReturnBlock())
continue;
- for (const MachineBasicBlock *Succ : Visiting->successors())
- Worklist.push_back(Succ);
+ append_range(Worklist, Visiting->successors());
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
new file mode 100644
index 000000000000..95d878e65be4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -0,0 +1,79 @@
+//===-- CodeGen/AsmPrinter/AIXException.cpp - AIX Exception Impl ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing AIX exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionXCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+AIXException::AIXException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
+
+void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
+ const MCSymbol *PerSym) {
+ // Generate EH Info Table.
+ // The EH Info Table, aka, 'compat unwind section' on AIX, have the following
+ // format: struct eh_info_t {
+ // unsigned version; /* EH info verion 0 */
+ // #if defined(__64BIT__)
+ // char _pad[4]; /* padding */
+ // #endif
+ // unsigned long lsda; /* Pointer to LSDA */
+ // unsigned long personality; /* Pointer to the personality routine */
+ // }
+
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getCompactUnwindSection());
+ MCSymbol *EHInfoLabel =
+ TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(Asm->MF);
+ Asm->OutStreamer->emitLabel(EHInfoLabel);
+
+ // Version number.
+ Asm->emitInt32(0);
+
+ const DataLayout &DL = MMI->getModule()->getDataLayout();
+ const unsigned PointerSize = DL.getPointerSize();
+
+ // Add necessary paddings in 64 bit mode.
+ Asm->OutStreamer->emitValueToAlignment(PointerSize);
+
+ // LSDA location.
+ Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(LSDA, Asm->OutContext),
+ PointerSize);
+
+ // Personality routine.
+ Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(PerSym, Asm->OutContext),
+ PointerSize);
+}
+
+void AIXException::endFunction(const MachineFunction *MF) {
+ if (!TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF))
+ return;
+
+ const MCSymbol *LSDALabel = emitExceptionTable();
+
+ const Function &F = MF->getFunction();
+ assert(F.hasPersonalityFn() &&
+ "Landingpads are presented, but no personality routine is found.");
+ const Function *Per =
+ dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ const MCSymbol *PerSym = Asm->TM.getSymbol(Per);
+
+ emitExceptionInfoTable(LSDALabel, PerSym);
+}
+
+} // End of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index dea0227f7578..4e45a0ffc60f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -190,7 +190,6 @@ public:
template <typename DataT>
class Dwarf5AccelTableWriter : public AccelTableWriter {
struct Header {
- uint32_t UnitLength = 0;
uint16_t Version = 5;
uint16_t Padding = 0;
uint32_t CompUnitCount;
@@ -271,7 +270,7 @@ void AccelTableWriter::emitOffsets(const MCSymbol *Base) const {
continue;
PrevHash = HashValue;
Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
- Asm->emitLabelDifference(Hash->Sym, Base, sizeof(uint32_t));
+ Asm->emitLabelDifference(Hash->Sym, Base, Asm->getDwarfOffsetByteSize());
}
}
}
@@ -367,9 +366,8 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit(
assert(CompUnitCount > 0 && "Index must have at least one CU.");
AsmPrinter *Asm = Ctx.Asm;
- Asm->OutStreamer->AddComment("Header: unit length");
- Asm->emitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart,
- sizeof(uint32_t));
+ Asm->emitDwarfUnitLength(Ctx.ContributionEnd, Ctx.ContributionStart,
+ "Header: unit length");
Asm->OutStreamer->emitLabel(Ctx.ContributionStart);
Asm->OutStreamer->AddComment("Header: version");
Asm->emitInt16(Version);
@@ -506,7 +504,7 @@ template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const {
for (const auto *Value : Hash->Values)
emitEntry(*static_cast<const DataT *>(Value));
Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString());
- Asm->emitInt32(0);
+ Asm->emitInt8(0);
}
}
}
@@ -593,10 +591,14 @@ void llvm::emitDWARF5AccelTable(
}
void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const {
+ assert(Die.getDebugSectionOffset() <= UINT32_MAX &&
+ "The section offset exceeds the limit.");
Asm->emitInt32(Die.getDebugSectionOffset());
}
void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const {
+ assert(Die.getDebugSectionOffset() <= UINT32_MAX &&
+ "The section offset exceeds the limit.");
Asm->emitInt32(Die.getDebugSectionOffset());
Asm->emitInt16(Die.getTag());
Asm->emitInt8(0);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 883aaf5aefc4..3df8e35accc4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -29,9 +29,7 @@ MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start");
MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end");
- Asm.OutStreamer->AddComment("Length of contribution");
- Asm.emitLabelDifference(EndLabel, BeginLabel,
- 4); // TODO: Support DWARF64 format.
+ Asm.emitDwarfUnitLength(EndLabel, BeginLabel, "Length of contribution");
Asm.OutStreamer->emitLabel(BeginLabel);
Asm.OutStreamer->AddComment("DWARF version number");
Asm.emitInt16(Asm.getDwarfVersion());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
index f92cf72093ca..f1edc6c330d5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -48,7 +48,7 @@ public:
bool hasBeenUsed() const { return HasBeenUsed; }
- void resetUsedFlag() { HasBeenUsed = false; }
+ void resetUsedFlag(bool HasBeenUsed = false) { this->HasBeenUsed = HasBeenUsed; }
MCSymbol *getLabel() { return AddressTableBaseSym; }
void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c7eb0257d71b..85754bf29d0c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -14,6 +14,7 @@
#include "CodeViewDebug.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
+#include "PseudoProbePrinter.h"
#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
@@ -30,6 +31,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -77,6 +79,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -131,17 +134,25 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-static const char *const DWARFGroupName = "dwarf";
-static const char *const DWARFGroupDescription = "DWARF Emission";
-static const char *const DbgTimerName = "emit";
-static const char *const DbgTimerDescription = "Debug Info Emission";
-static const char *const EHTimerName = "write_exception";
-static const char *const EHTimerDescription = "DWARF Exception Writer";
-static const char *const CFGuardName = "Control Flow Guard";
-static const char *const CFGuardDescription = "Control Flow Guard";
-static const char *const CodeViewLineTablesGroupName = "linetables";
-static const char *const CodeViewLineTablesGroupDescription =
- "CodeView Line Tables";
+// FIXME: this option currently only applies to DWARF, and not CodeView, tables
+static cl::opt<bool>
+ DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
+const char DWARFGroupName[] = "dwarf";
+const char DWARFGroupDescription[] = "DWARF Emission";
+const char DbgTimerName[] = "emit";
+const char DbgTimerDescription[] = "Debug Info Emission";
+const char EHTimerName[] = "write_exception";
+const char EHTimerDescription[] = "DWARF Exception Writer";
+const char CFGuardName[] = "Control Flow Guard";
+const char CFGuardDescription[] = "Control Flow Guard";
+const char CodeViewLineTablesGroupName[] = "linetables";
+const char CodeViewLineTablesGroupDescription[] = "CodeView Line Tables";
+const char PPTimerName[] = "emit";
+const char PPTimerDescription[] = "Pseudo Probe Emission";
+const char PPGroupName[] = "pseudo probe";
+const char PPGroupDescription[] = "Pseudo Probe Emission";
STATISTIC(EmittedInsts, "Number of machine instrs printed");
@@ -188,7 +199,8 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
}
AsmPrinter::~AsmPrinter() {
- assert(!DD && Handlers.empty() && "Debug/EH info didn't get finalized");
+ assert(!DD && Handlers.size() == NumUserHandlers &&
+ "Debug/EH info didn't get finalized");
if (GCMetadataPrinters) {
gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
@@ -231,9 +243,11 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
}
void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) {
- assert(DD && "Dwarf debug file is not defined.");
- assert(OutStreamer->hasRawTextSupport() && "Expected assembly output mode.");
- (void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
+ if (DD) {
+ assert(OutStreamer->hasRawTextSupport() &&
+ "Expected assembly output mode.");
+ (void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
+ }
}
/// getCurrentSection() - Return the current section we are emitting to.
@@ -261,6 +275,9 @@ bool AsmPrinter::doInitialization(Module &M) {
OutStreamer->InitSections(false);
+ if (DisableDebugInfoPrinting)
+ MMI->setDebugInfoAvailability(false);
+
// Emit the version-min deployment target directive if needed.
//
// FIXME: If we end up with a collection of these sorts of Darwin-specific
@@ -296,6 +313,7 @@ bool AsmPrinter::doInitialization(Module &M) {
std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
TM.getTargetTriple().str(), TM.getTargetCPU(),
TM.getTargetFeatureString()));
+ assert(STI && "Unable to create subtarget info");
OutStreamer->AddComment("Start of file scope inline assembly");
OutStreamer->AddBlankLine();
emitInlineAsm(M.getModuleInlineAsm() + "\n",
@@ -313,14 +331,21 @@ bool AsmPrinter::doInitialization(Module &M) {
CodeViewLineTablesGroupDescription);
}
if (!EmitCodeView || M.getDwarfVersion()) {
- DD = new DwarfDebug(this, &M);
- DD->beginModule();
- Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
- DbgTimerDescription, DWARFGroupName,
- DWARFGroupDescription);
+ if (!DisableDebugInfoPrinting) {
+ DD = new DwarfDebug(this);
+ Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
+ DbgTimerDescription, DWARFGroupName,
+ DWARFGroupDescription);
+ }
}
}
+ if (M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+ PP = new PseudoProbeHandler(this, &M);
+ Handlers.emplace_back(std::unique_ptr<PseudoProbeHandler>(PP), PPTimerName,
+ PPTimerDescription, PPGroupName, PPGroupDescription);
+ }
+
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
@@ -368,6 +393,9 @@ bool AsmPrinter::doInitialization(Module &M) {
case ExceptionHandling::Wasm:
ES = new WasmException(this);
break;
+ case ExceptionHandling::AIX:
+ ES = new AIXException(this);
+ break;
}
if (ES)
Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName,
@@ -379,6 +407,13 @@ bool AsmPrinter::doInitialization(Module &M) {
Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
CFGuardDescription, DWARFGroupName,
DWARFGroupDescription);
+
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->beginModule(&M);
+ }
+
return false;
}
@@ -449,10 +484,8 @@ MCSymbol *AsmPrinter::getSymbolPreferLocal(const GlobalValue &GV) const {
if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) {
const Module &M = *GV.getParent();
if (TM.getRelocationModel() != Reloc::Static &&
- M.getPIELevel() == PIELevel::Default)
- if (GV.isDSOLocal() || (TM.getTargetTriple().isX86() &&
- GV.getParent()->noSemanticInterposition()))
- return getSymbolWithGlobalValueBase(&GV, "$local");
+ M.getPIELevel() == PIELevel::Default && GV.isDSOLocal())
+ return getSymbolWithGlobalValueBase(&GV, "$local");
}
return TM.getSymbol(&GV);
}
@@ -500,8 +533,8 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
GVSym->redefineIfPossible();
if (GVSym->isDefined() || GVSym->isVariable())
- report_fatal_error("symbol '" + Twine(GVSym->getName()) +
- "' is already defined");
+ OutContext.reportError(SMLoc(), "symbol '" + Twine(GVSym->getName()) +
+ "' is already defined");
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer->emitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject);
@@ -812,13 +845,21 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
if ((Size = MI.getRestoreSize(TII))) {
CommentOS << *Size << "-byte Reload\n";
} else if ((Size = MI.getFoldedRestoreSize(TII))) {
- if (*Size)
- CommentOS << *Size << "-byte Folded Reload\n";
+ if (*Size) {
+ if (*Size == unsigned(MemoryLocation::UnknownSize))
+ CommentOS << "Unknown-size Folded Reload\n";
+ else
+ CommentOS << *Size << "-byte Folded Reload\n";
+ }
} else if ((Size = MI.getSpillSize(TII))) {
CommentOS << *Size << "-byte Spill\n";
} else if ((Size = MI.getFoldedSpillSize(TII))) {
- if (*Size)
- CommentOS << *Size << "-byte Folded Spill\n";
+ if (*Size) {
+ if (*Size == unsigned(MemoryLocation::UnknownSize))
+ CommentOS << "Unknown-size Folded Spill\n";
+ else
+ CommentOS << *Size << "-byte Folded Spill\n";
+ }
}
// Check for spill-induced copies
@@ -877,7 +918,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
// The second operand is only an offset if it's an immediate.
bool MemLoc = MI->isIndirectDebugValue();
- int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0;
+ auto Offset = StackOffset::getFixed(MemLoc ? MI->getOperand(1).getImm() : 0);
const DIExpression *Expr = MI->getDebugExpression();
if (Expr->getNumElements()) {
OS << '[';
@@ -916,6 +957,8 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
} else if (MI->getDebugOperand(0).isTargetIndex()) {
auto Op = MI->getDebugOperand(0);
OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer->emitRawComment(OS.str());
return true;
} else {
Register Reg;
@@ -941,7 +984,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
if (MemLoc)
- OS << '+' << Offset << ']';
+ OS << '+' << Offset.getFixed() << ']';
// NOTE: Want this comment at start of line, don't emit with AddComment.
AP.OutStreamer->emitRawComment(OS.str());
@@ -1023,6 +1066,56 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
MCConstantExpr::create(FrameOffset, OutContext));
}
+/// Returns the BB metadata to be emitted in the .llvm_bb_addr_map section for a
+/// given basic block. This can be used to capture more precise profile
+/// information. We use the last 3 bits (LSBs) to ecnode the following
+/// information:
+/// * (1): set if return block (ret or tail call).
+/// * (2): set if ends with a tail call.
+/// * (3): set if exception handling (EH) landing pad.
+/// The remaining bits are zero.
+static unsigned getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+ return ((unsigned)MBB.isReturnBlock()) |
+ ((!MBB.empty() && TII->isTailCall(MBB.back())) << 1) |
+ (MBB.isEHPad() << 2);
+}
+
+void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
+ MCSection *BBAddrMapSection =
+ getObjFileLowering().getBBAddrMapSection(*MF.getSection());
+ assert(BBAddrMapSection && ".llvm_bb_addr_map section is not initialized.");
+
+ const MCSymbol *FunctionSymbol = getFunctionBegin();
+
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(BBAddrMapSection);
+ OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize());
+ // Emit the total number of basic blocks in this function.
+ OutStreamer->emitULEB128IntValue(MF.size());
+ // Emit BB Information for each basic block in the funciton.
+ for (const MachineBasicBlock &MBB : MF) {
+ const MCSymbol *MBBSymbol =
+ MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol();
+ // Emit the basic block offset.
+ emitLabelDifferenceAsULEB128(MBBSymbol, FunctionSymbol);
+ // Emit the basic block size. When BBs have alignments, their size cannot
+ // always be computed from their offsets.
+ emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
+ OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
+ }
+ OutStreamer->PopSection();
+}
+
+void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) {
+ auto GUID = MI.getOperand(0).getImm();
+ auto Index = MI.getOperand(1).getImm();
+ auto Type = MI.getOperand(2).getImm();
+ auto Attr = MI.getOperand(3).getImm();
+ DILocation *DebugLoc = MI.getDebugLoc();
+ PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc);
+}
+
void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
if (!MF.getTarget().Options.EmitStackSizeSection)
return;
@@ -1069,8 +1162,6 @@ void AsmPrinter::emitFunctionBody() {
// Emit target-specific gunk before the function body.
emitFunctionBodyStart();
- bool ShouldPrintDebugScopes = MMI->hasDebugInfo();
-
if (isVerbose()) {
// Get MachineDominatorTree or compute it on the fly if it's unavailable
MDT = getAnalysisIfAvailable<MachineDominatorTree>();
@@ -1093,9 +1184,11 @@ void AsmPrinter::emitFunctionBody() {
bool HasAnyRealCode = false;
int NumInstsInFunction = 0;
+ bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
for (auto &MBB : *MF) {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
+ DenseMap<StringRef, unsigned> MnemonicCounts;
for (auto &MI : MBB) {
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
@@ -1108,13 +1201,10 @@ void AsmPrinter::emitFunctionBody() {
if (MCSymbol *S = MI.getPreInstrSymbol())
OutStreamer->emitLabel(S);
- if (ShouldPrintDebugScopes) {
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
- HI.TimerGroupName, HI.TimerGroupDescription,
- TimePassesIsEnabled);
- HI.Handler->beginInstruction(&MI);
- }
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->beginInstruction(&MI);
}
if (isVerbose())
@@ -1142,6 +1232,11 @@ void AsmPrinter::emitFunctionBody() {
emitInstruction(&MI);
}
break;
+ case TargetOpcode::DBG_INSTR_REF:
+ // This instruction reference will have been resolved to a machine
+ // location, and a nearby DBG_VALUE created. We can safely ignore
+ // the instruction reference.
+ break;
case TargetOpcode::DBG_LABEL:
if (isVerbose()) {
if (!emitDebugLabelComment(&MI, *this))
@@ -1154,8 +1249,18 @@ void AsmPrinter::emitFunctionBody() {
case TargetOpcode::KILL:
if (isVerbose()) emitKill(&MI, *this);
break;
+ case TargetOpcode::PSEUDO_PROBE:
+ emitPseudoProbe(MI);
+ break;
default:
emitInstruction(&MI);
+ if (CanDoExtraAnalysis) {
+ MCInst MCI;
+ MCI.setOpcode(MI.getOpcode());
+ auto Name = OutStreamer->getMnemonic(MCI);
+ auto I = MnemonicCounts.insert({Name, 0u});
+ I.first->second++;
+ }
break;
}
@@ -1163,54 +1268,69 @@ void AsmPrinter::emitFunctionBody() {
if (MCSymbol *S = MI.getPostInstrSymbol())
OutStreamer->emitLabel(S);
- if (ShouldPrintDebugScopes) {
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
- HI.TimerGroupName, HI.TimerGroupDescription,
- TimePassesIsEnabled);
- HI.Handler->endInstruction();
- }
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->endInstruction();
}
}
- // We need a temporary symbol for the end of this basic block, if either we
- // have BBLabels enabled and we want to emit size directive for the BBs, or
- // if this basic blocks marks the end of a section (except the section
- // containing the entry basic block as the end symbol for that section is
- // CurrentFnEnd).
- MCSymbol *CurrentBBEnd = nullptr;
- if ((MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) ||
- (MBB.isEndSection() && !MBB.sameSection(&MF->front()))) {
- CurrentBBEnd = OutContext.createTempSymbol();
- OutStreamer->emitLabel(CurrentBBEnd);
- }
+ // We must emit temporary symbol for the end of this basic block, if either
+ // we have BBLabels enabled or if this basic blocks marks the end of a
+ // section (except the section containing the entry basic block as the end
+ // symbol for that section is CurrentFnEnd).
+ if (MF->hasBBLabels() ||
+ (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection() &&
+ !MBB.sameSection(&MF->front())))
+ OutStreamer->emitLabel(MBB.getEndSymbol());
- // Helper for emitting the size directive associated with a basic block
- // symbol.
- auto emitELFSizeDirective = [&](MCSymbol *SymForSize) {
- assert(CurrentBBEnd && "Basicblock end symbol not set!");
- const MCExpr *SizeExp = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(CurrentBBEnd, OutContext),
- MCSymbolRefExpr::create(SymForSize, OutContext), OutContext);
- OutStreamer->emitELFSize(SymForSize, SizeExp);
- };
-
- // Emit size directive for the size of each basic block, if BBLabels is
- // enabled.
- if (MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels())
- emitELFSizeDirective(MBB.getSymbol());
-
- // Emit size directive for the size of each basic block section once we
- // get to the end of that section.
if (MBB.isEndSection()) {
+ // The size directive for the section containing the entry block is
+ // handled separately by the function section.
if (!MBB.sameSection(&MF->front())) {
- if (MAI->hasDotTypeDotSizeDirective())
- emitELFSizeDirective(CurrentSectionBeginSym);
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ // Emit the size directive for the basic block section.
+ const MCExpr *SizeExp = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(MBB.getEndSymbol(), OutContext),
+ MCSymbolRefExpr::create(CurrentSectionBeginSym, OutContext),
+ OutContext);
+ OutStreamer->emitELFSize(CurrentSectionBeginSym, SizeExp);
+ }
MBBSectionRanges[MBB.getSectionIDNum()] =
- MBBSectionRange{CurrentSectionBeginSym, CurrentBBEnd};
+ MBBSectionRange{CurrentSectionBeginSym, MBB.getEndSymbol()};
}
}
emitBasicBlockEnd(MBB);
+
+ if (CanDoExtraAnalysis) {
+ // Skip empty blocks.
+ if (MBB.empty())
+ continue;
+
+ MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionMix",
+ MBB.begin()->getDebugLoc(), &MBB);
+
+ // Generate instruction mix remark. First, sort counts in descending order
+ // by count and name.
+ SmallVector<std::pair<StringRef, unsigned>, 128> MnemonicVec;
+ for (auto &KV : MnemonicCounts)
+ MnemonicVec.emplace_back(KV.first, KV.second);
+
+ sort(MnemonicVec, [](const std::pair<StringRef, unsigned> &A,
+ const std::pair<StringRef, unsigned> &B) {
+ if (A.second > B.second)
+ return true;
+ if (A.second == B.second)
+ return StringRef(A.first) < StringRef(B.first);
+ return false;
+ });
+ R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n";
+ for (auto &KV : MnemonicVec) {
+ auto Name = (Twine("INST_") + KV.first.trim()).str();
+ R << KV.first << ": " << ore::NV(Name, KV.second) << "\n";
+ }
+ ORE->emit(R);
+ }
}
EmittedInsts += NumInstsInFunction;
@@ -1297,6 +1417,11 @@ void AsmPrinter::emitFunctionBody() {
HI.Handler->endFunction(MF);
}
+ // Emit section containing BB address offsets and their metadata, when
+ // BB labels are requested for this function.
+ if (MF->hasBBLabels())
+ emitBBAddrMapSection(*MF);
+
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
@@ -1390,16 +1515,7 @@ void AsmPrinter::emitGlobalGOTEquivs() {
void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
const GlobalIndirectSymbol& GIS) {
MCSymbol *Name = getSymbol(&GIS);
-
- if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
- OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
- else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
- OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
- else
- assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
-
bool IsFunction = GIS.getValueType()->isFunctionTy();
-
// Treat bitcasts of functions as functions also. This is important at least
// on WebAssembly where object and function addresses can't alias each other.
if (!IsFunction)
@@ -1408,6 +1524,30 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
IsFunction =
CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
+ // AIX's assembly directive `.set` is not usable for aliasing purpose,
+ // so AIX has to use the extra-label-at-definition strategy. At this
+ // point, all the extra label is emitted, we just have to emit linkage for
+ // those labels.
+ if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
+ assert(!isa<GlobalIFunc>(GIS) && "IFunc is not supported on AIX.");
+ assert(MAI->hasVisibilityOnlyWithLinkage() &&
+ "Visibility should be handled with emitLinkage() on AIX.");
+ emitLinkage(&GIS, Name);
+ // If it's a function, also emit linkage for aliases of function entry
+ // point.
+ if (IsFunction)
+ emitLinkage(&GIS,
+ getObjFileLowering().getFunctionEntryPointSymbol(&GIS, TM));
+ return;
+ }
+
+ if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
+ else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
+
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
if (IsFunction)
@@ -1517,9 +1657,8 @@ bool AsmPrinter::doFinalization(Module &M) {
// Variable `Name` is the function descriptor symbol (see above). Get the
// function entry point symbol.
MCSymbol *FnEntryPointSym = TLOF.getFunctionEntryPointSymbol(&F, TM);
- if (cast<MCSymbolXCOFF>(FnEntryPointSym)->hasRepresentedCsectSet())
- // Emit linkage for the function entry point.
- emitLinkage(&F, FnEntryPointSym);
+ // Emit linkage for the function entry point.
+ emitLinkage(&F, FnEntryPointSym);
// Emit linkage for the function descriptor.
emitLinkage(&F, Name);
@@ -1584,7 +1723,11 @@ bool AsmPrinter::doFinalization(Module &M) {
HI.TimerGroupDescription, TimePassesIsEnabled);
HI.Handler->endModule();
}
- Handlers.clear();
+
+ // This deletes all the ephemeral handlers that AsmPrinter added, while
+ // keeping all the user-added handlers alive until the AsmPrinter is
+ // destroyed.
+ Handlers.erase(Handlers.begin() + NumUserHandlers, Handlers.end());
DD = nullptr;
// If the target wants to know about weak references, print them all.
@@ -1668,51 +1811,6 @@ bool AsmPrinter::doFinalization(Module &M) {
if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
OutStreamer->SwitchSection(S);
- if (TM.getTargetTriple().isOSBinFormatCOFF()) {
- // Emit /EXPORT: flags for each exported global as necessary.
- const auto &TLOF = getObjFileLowering();
- std::string Flags;
-
- for (const GlobalValue &GV : M.global_values()) {
- raw_string_ostream OS(Flags);
- TLOF.emitLinkerFlagsForGlobal(OS, &GV);
- OS.flush();
- if (!Flags.empty()) {
- OutStreamer->SwitchSection(TLOF.getDrectveSection());
- OutStreamer->emitBytes(Flags);
- }
- Flags.clear();
- }
-
- // Emit /INCLUDE: flags for each used global as necessary.
- if (const auto *LU = M.getNamedGlobal("llvm.used")) {
- assert(LU->hasInitializer() &&
- "expected llvm.used to have an initializer");
- assert(isa<ArrayType>(LU->getValueType()) &&
- "expected llvm.used to be an array type");
- if (const auto *A = cast<ConstantArray>(LU->getInitializer())) {
- for (const Value *Op : A->operands()) {
- const auto *GV = cast<GlobalValue>(Op->stripPointerCasts());
- // Global symbols with internal or private linkage are not visible to
- // the linker, and thus would cause an error when the linker tried to
- // preserve the symbol due to the `/include:` directive.
- if (GV->hasLocalLinkage())
- continue;
-
- raw_string_ostream OS(Flags);
- TLOF.emitLinkerFlagsForUsed(OS, GV);
- OS.flush();
-
- if (!Flags.empty()) {
- OutStreamer->SwitchSection(TLOF.getDrectveSection());
- OutStreamer->emitBytes(Flags);
- }
- Flags.clear();
- }
- }
- }
- }
-
if (TM.Options.EmitAddrsig) {
// Emit address-significance attributes for all globals.
OutStreamer->emitAddrsig();
@@ -1756,10 +1854,11 @@ bool AsmPrinter::doFinalization(Module &M) {
return false;
}
-MCSymbol *AsmPrinter::getCurExceptionSym() {
- if (!CurExceptionSym)
- CurExceptionSym = createTempSymbol("exception");
- return CurExceptionSym;
+MCSymbol *AsmPrinter::getMBBExceptionSym(const MachineBasicBlock &MBB) {
+ auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionIDNum());
+ if (Res.second)
+ Res.first->second = createTempSymbol("exception");
+ return Res.first->second;
}
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
@@ -1786,13 +1885,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnBegin = nullptr;
CurrentSectionBeginSym = nullptr;
MBBSectionRanges.clear();
- CurExceptionSym = nullptr;
+ MBBSectionExceptionSyms.clear();
bool NeedsLocalForSize = MAI->needsLocalForSize();
if (F.hasFnAttribute("patchable-function-entry") ||
F.hasFnAttribute("function-instrument") ||
F.hasFnAttribute("xray-instruction-threshold") ||
needFuncLabelsForEHOrDebugInfo(MF) || NeedsLocalForSize ||
- MF.getTarget().Options.EmitStackSizeSection) {
+ MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
@@ -1882,8 +1981,7 @@ void AsmPrinter::emitConstantPool() {
unsigned NewOffset = alignTo(Offset, CPE.getAlign());
OutStreamer->emitZeros(NewOffset - Offset);
- Type *Ty = CPE.getType();
- Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty);
+ Offset = NewOffset + CPE.getSizeInBytes(getDataLayout());
OutStreamer->emitLabel(Sym);
if (CPE.isMachineConstantPoolEntry())
@@ -2083,47 +2181,50 @@ void AsmPrinter::emitLLVMUsedList(const ConstantArray *InitList) {
}
}
-namespace {
-
-struct Structor {
- int Priority = 0;
- Constant *Func = nullptr;
- GlobalValue *ComdatKey = nullptr;
-
- Structor() = default;
-};
-
-} // end anonymous namespace
-
-/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
-/// priority.
-void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
- bool isCtor) {
- // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is the
- // init priority.
- if (!isa<ConstantArray>(List)) return;
+void AsmPrinter::preprocessXXStructorList(const DataLayout &DL,
+ const Constant *List,
+ SmallVector<Structor, 8> &Structors) {
+ // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is
+ // the init priority.
+ if (!isa<ConstantArray>(List))
+ return;
// Gather the structors in a form that's convenient for sorting by priority.
- SmallVector<Structor, 8> Structors;
for (Value *O : cast<ConstantArray>(List)->operands()) {
auto *CS = cast<ConstantStruct>(O);
if (CS->getOperand(1)->isNullValue())
- break; // Found a null terminator, skip the rest.
+ break; // Found a null terminator, skip the rest.
ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
- if (!Priority) continue; // Malformed.
+ if (!Priority)
+ continue; // Malformed.
Structors.push_back(Structor());
Structor &S = Structors.back();
S.Priority = Priority->getLimitedValue(65535);
S.Func = CS->getOperand(1);
- if (!CS->getOperand(2)->isNullValue())
+ if (!CS->getOperand(2)->isNullValue()) {
+ if (TM.getTargetTriple().isOSAIX())
+ llvm::report_fatal_error(
+ "associated data of XXStructor list is not yet supported on AIX");
S.ComdatKey =
dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
+ }
}
// Emit the function pointers in the target-specific order
llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) {
return L.Priority < R.Priority;
});
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
+/// priority.
+void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool IsCtor) {
+ SmallVector<Structor, 8> Structors;
+ preprocessXXStructorList(DL, List, Structors);
+ if (Structors.empty())
+ return;
+
const Align Align = DL.getPointerPrefAlignment();
for (Structor &S : Structors) {
const TargetLoweringObjectFile &Obj = getObjFileLowering();
@@ -2139,8 +2240,9 @@ void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
KeySym = getSymbol(GV);
}
+
MCSection *OutputSection =
- (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
+ (IsCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
: Obj.getStaticDtorSection(S.Priority, KeySym));
OutStreamer->SwitchSection(OutputSection);
if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
@@ -2274,12 +2376,25 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx);
+ if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV))
+ return getObjFileLowering().lowerDSOLocalEquivalent(Equiv, TM);
+
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
if (!CE) {
llvm_unreachable("Unknown constant value to lower!");
}
switch (CE->getOpcode()) {
+ case Instruction::AddrSpaceCast: {
+ const Constant *Op = CE->getOperand(0);
+ unsigned DstAS = CE->getType()->getPointerAddressSpace();
+ unsigned SrcAS = Op->getType()->getPointerAddressSpace();
+ if (TM.isNoopAddrSpaceCast(SrcAS, DstAS))
+ return lowerConstant(Op);
+
+ // Fallthrough to error.
+ LLVM_FALLTHROUGH;
+ }
default: {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
@@ -2345,7 +2460,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
//
// If the pointer is larger than the resultant integer, then
// as with Trunc just depend on the assembler to truncate it.
- if (DL.getTypeAllocSize(Ty) <= DL.getTypeAllocSize(Op->getType()))
+ if (DL.getTypeAllocSize(Ty).getFixedSize() <=
+ DL.getTypeAllocSize(Op->getType()).getFixedSize())
return OpExpr;
// Otherwise the pointer is smaller than the resultant integer, mask off
@@ -2359,18 +2475,25 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
case Instruction::Sub: {
GlobalValue *LHSGV;
APInt LHSOffset;
+ DSOLocalEquivalent *DSOEquiv;
if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHSGV, LHSOffset,
- getDataLayout())) {
+ getDataLayout(), &DSOEquiv)) {
GlobalValue *RHSGV;
APInt RHSOffset;
if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset,
getDataLayout())) {
const MCExpr *RelocExpr =
getObjFileLowering().lowerRelativeReference(LHSGV, RHSGV, TM);
- if (!RelocExpr)
+ if (!RelocExpr) {
+ const MCExpr *LHSExpr =
+ MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx);
+ if (DSOEquiv &&
+ getObjFileLowering().supportDSOLocalEquivalentLowering())
+ LHSExpr =
+ getObjFileLowering().lowerDSOLocalEquivalent(DSOEquiv, TM);
RelocExpr = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx),
- MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx);
+ LHSExpr, MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx);
+ }
int64_t Addend = (LHSOffset - RHSOffset).getSExtValue();
if (Addend != 0)
RelocExpr = MCBinaryExpr::createAdd(
@@ -3001,7 +3124,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
OS.indent(Loop->getLoopDepth()*2-2);
OS << "This ";
- if (Loop->empty())
+ if (Loop->isInnermost())
OS << "Inner ";
OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
@@ -3025,6 +3148,16 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
if (Alignment != Align(1))
emitAlignment(Alignment);
+ // Switch to a new section if this basic block must begin a section. The
+ // entry block is always placed in the function section and is handled
+ // separately.
+ if (MBB.isBeginSection() && !MBB.isEntryBlock()) {
+ OutStreamer->SwitchSection(
+ getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(),
+ MBB, TM));
+ CurrentSectionBeginSym = MBB.getSymbol();
+ }
+
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
// here, because multiple LLVM BB's may have been RAUW'd to this block after
@@ -3055,33 +3188,25 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
emitBasicBlockLoopComments(MBB, MLI, *this);
}
- if (MBB.pred_empty() ||
- (!MF->hasBBLabels() && isBlockOnlyReachableByFallthrough(&MBB) &&
- !MBB.isEHFuncletEntry() && !MBB.hasLabelMustBeEmitted())) {
+ // Print the main label for the block.
+ if (shouldEmitLabelForBasicBlock(MBB)) {
+ if (isVerbose() && MBB.hasLabelMustBeEmitted())
+ OutStreamer->AddComment("Label of block must be emitted");
+ OutStreamer->emitLabel(MBB.getSymbol());
+ } else {
if (isVerbose()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
false);
}
- } else {
- if (isVerbose() && MBB.hasLabelMustBeEmitted()) {
- OutStreamer->AddComment("Label of block must be emitted");
- }
- auto *BBSymbol = MBB.getSymbol();
- // Switch to a new section if this basic block must begin a section.
- if (MBB.isBeginSection()) {
- OutStreamer->SwitchSection(
- getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(),
- MBB, TM));
- CurrentSectionBeginSym = BBSymbol;
- }
- OutStreamer->emitLabel(BBSymbol);
- // With BB sections, each basic block must handle CFI information on its own
- // if it begins a section.
- if (MBB.isBeginSection())
- for (const HandlerInfo &HI : Handlers)
- HI.Handler->beginBasicBlock(MBB);
}
+
+ // With BB sections, each basic block must handle CFI information on its own
+ // if it begins a section (Entry block is handled separately by
+ // AsmPrinterHandler::beginFunction).
+ if (MBB.isBeginSection() && !MBB.isEntryBlock())
+ for (const HandlerInfo &HI : Handlers)
+ HI.Handler->beginBasicBlock(MBB);
}
void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
@@ -3113,15 +3238,26 @@ void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility,
OutStreamer->emitSymbolAttribute(Sym, Attr);
}
+bool AsmPrinter::shouldEmitLabelForBasicBlock(
+ const MachineBasicBlock &MBB) const {
+ // With `-fbasic-block-sections=`, a label is needed for every non-entry block
+ // in the labels mode (option `=labels`) and every section beginning in the
+ // sections mode (`=all` and `=list=`).
+ if ((MF->hasBBLabels() || MBB.isBeginSection()) && !MBB.isEntryBlock())
+ return true;
+ // A label is needed for any block with at least one predecessor (when that
+ // predecessor is not the fallthrough predecessor, or if it is an EH funclet
+ // entry, or if a label is forced).
+ return !MBB.pred_empty() &&
+ (!isBlockOnlyReachableByFallthrough(&MBB) || MBB.isEHFuncletEntry() ||
+ MBB.hasLabelMustBeEmitted());
+}
+
/// isBlockOnlyReachableByFallthough - Return true if the basic block has
/// exactly one predecessor and the control transfer mechanism between
/// the predecessor and this block is a fall-through.
bool AsmPrinter::
isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
- // With BasicBlock Sections, beginning of the section is not a fallthrough.
- if (MBB->isBeginSection())
- return false;
-
// If this is a landing pad, it isn't a fall through. If it has no preds,
// then nothing falls through to it.
if (MBB->isEHPad() || MBB->pred_empty())
@@ -3232,14 +3368,10 @@ void AsmPrinter::emitXRayTable() {
MCSection *InstMap = nullptr;
MCSection *FnSledIndex = nullptr;
const Triple &TT = TM.getTargetTriple();
- // Use PC-relative addresses on all targets except MIPS (MIPS64 cannot use
- // PC-relative addresses because R_MIPS_PC64 does not exist).
- bool PCRel = !TT.isMIPS();
+ // Use PC-relative addresses on all targets.
if (TT.isOSBinFormatELF()) {
auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
- if (!PCRel)
- Flags |= ELF::SHF_WRITE;
StringRef GroupName;
if (F.hasComdat()) {
Flags |= ELF::SHF_GROUP;
@@ -3273,25 +3405,20 @@ void AsmPrinter::emitXRayTable() {
OutStreamer->SwitchSection(InstMap);
OutStreamer->emitLabel(SledsStart);
for (const auto &Sled : Sleds) {
- if (PCRel) {
- MCSymbol *Dot = Ctx.createTempSymbol();
- OutStreamer->emitLabel(Dot);
- OutStreamer->emitValueImpl(
- MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx),
- MCSymbolRefExpr::create(Dot, Ctx), Ctx),
- WordSizeBytes);
- OutStreamer->emitValueImpl(
- MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(CurrentFnBegin, Ctx),
- MCBinaryExpr::createAdd(
- MCSymbolRefExpr::create(Dot, Ctx),
- MCConstantExpr::create(WordSizeBytes, Ctx), Ctx),
- Ctx),
- WordSizeBytes);
- } else {
- OutStreamer->emitSymbolValue(Sled.Sled, WordSizeBytes);
- OutStreamer->emitSymbolValue(CurrentFnSym, WordSizeBytes);
- }
+ MCSymbol *Dot = Ctx.createTempSymbol();
+ OutStreamer->emitLabel(Dot);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx),
+ MCSymbolRefExpr::create(Dot, Ctx), Ctx),
+ WordSizeBytes);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(CurrentFnBegin, Ctx),
+ MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Dot, Ctx),
+ MCConstantExpr::create(WordSizeBytes, Ctx),
+ Ctx),
+ Ctx),
+ WordSizeBytes);
Sled.emit(WordSizeBytes, OutStreamer.get());
}
MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true);
@@ -3366,3 +3493,17 @@ uint16_t AsmPrinter::getDwarfVersion() const {
void AsmPrinter::setDwarfVersion(uint16_t Version) {
OutStreamer->getContext().setDwarfVersion(Version);
}
+
+bool AsmPrinter::isDwarf64() const {
+ return OutStreamer->getContext().getDwarfFormat() == dwarf::DWARF64;
+}
+
+unsigned int AsmPrinter::getDwarfOffsetByteSize() const {
+ return dwarf::getDwarfOffsetByteSize(
+ OutStreamer->getContext().getDwarfFormat());
+}
+
+unsigned int AsmPrinter::getUnitLengthFieldByteSize() const {
+ return dwarf::getUnitLengthFieldByteSize(
+ OutStreamer->getContext().getDwarfFormat());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index b6a9a9568360..c6e43445e7d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -97,6 +98,12 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {
case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8
:
return "indirect pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel |
+ dwarf::DW_EH_PE_sdata4:
+ return "indirect datarel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel |
+ dwarf::DW_EH_PE_sdata8:
+ return "indirect datarel sdata8";
}
return "<unknown encoding>";
@@ -137,8 +144,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
}
}
-void AsmPrinter::emitTTypeReference(const GlobalValue *GV,
- unsigned Encoding) const {
+void AsmPrinter::emitTTypeReference(const GlobalValue *GV, unsigned Encoding) {
if (GV) {
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
@@ -154,19 +160,22 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
if (!ForceOffset) {
// On COFF targets, we have to emit the special .secrel32 directive.
if (MAI->needsDwarfSectionOffsetDirective()) {
+ assert(!isDwarf64() &&
+ "emitting DWARF64 is not implemented for COFF targets");
OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0);
return;
}
// If the format uses relocations with dwarf, refer to the symbol directly.
if (MAI->doesDwarfUseRelocationsAcrossSections()) {
- OutStreamer->emitSymbolValue(Label, 4);
+ OutStreamer->emitSymbolValue(Label, getDwarfOffsetByteSize());
return;
}
}
// Otherwise, emit it as a label difference from the start of the section.
- emitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
+ emitLabelDifference(Label, Label->getSection().getBeginSymbol(),
+ getDwarfOffsetByteSize());
}
void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {
@@ -177,12 +186,38 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {
}
// Just emit the offset directly; no need for symbol math.
- emitInt32(S.Offset);
+ OutStreamer->emitIntValue(S.Offset, getDwarfOffsetByteSize());
}
void AsmPrinter::emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
- // TODO: Support DWARF64
- emitLabelPlusOffset(Label, Offset, 4);
+ emitLabelPlusOffset(Label, Offset, getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::emitDwarfLengthOrOffset(uint64_t Value) const {
+ assert(isDwarf64() || Value <= UINT32_MAX);
+ OutStreamer->emitIntValue(Value, getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::maybeEmitDwarf64Mark() const {
+ if (!isDwarf64())
+ return;
+ OutStreamer->AddComment("DWARF64 Mark");
+ OutStreamer->emitInt32(dwarf::DW_LENGTH_DWARF64);
+}
+
+void AsmPrinter::emitDwarfUnitLength(uint64_t Length,
+ const Twine &Comment) const {
+ assert(isDwarf64() || Length <= dwarf::DW_LENGTH_lo_reserved);
+ maybeEmitDwarf64Mark();
+ OutStreamer->AddComment(Comment);
+ OutStreamer->emitIntValue(Length, getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo,
+ const Twine &Comment) const {
+ maybeEmitDwarf64Mark();
+ OutStreamer->AddComment(Comment);
+ OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, getDwarfOffsetByteSize());
}
void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 538107cecd8b..4a67b0bc2c4d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -146,6 +147,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
// we only need MCInstrInfo for asm parsing. We create one unconditionally
// because it's not subtarget dependent.
std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
+ assert(MII && "Failed to create instruction info");
std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
STI, *Parser, *MII, MCOptions));
if (!TAP)
@@ -232,7 +234,8 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
const char *IDStart = LastEmitted;
const char *IDEnd = IDStart;
- while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+ while (isDigit(*IDEnd))
+ ++IDEnd;
unsigned Val;
if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
@@ -397,7 +400,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
const char *IDStart = LastEmitted;
const char *IDEnd = IDStart;
- while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+ while (isDigit(*IDEnd))
+ ++IDEnd;
unsigned Val;
if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
@@ -547,22 +551,23 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
- // that might give surprising results.
- std::vector<std::string> RestrRegs;
+ // that might lead to undefined behaviour.
+ SmallVector<Register, 8> RestrRegs;
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
// Start with the first operand descriptor, and iterate over them.
for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands();
I < NumOps; ++I) {
const MachineOperand &MO = MI->getOperand(I);
- if (MO.isImm()) {
- unsigned Flags = MO.getImm();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber &&
- !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) {
- RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg()));
- }
- // Skip to one before the next operand descriptor, if it exists.
- I += InlineAsm::getNumOperandRegisters(Flags);
+ if (!MO.isImm())
+ continue;
+ unsigned Flags = MO.getImm();
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber) {
+ Register Reg = MI->getOperand(I + 1).getReg();
+ if (!TRI->isAsmClobberable(*MF, Reg))
+ RestrRegs.push_back(Reg);
}
+ // Skip to one before the next operand descriptor, if it exists.
+ I += InlineAsm::getNumOperandRegisters(Flags);
}
if (!RestrRegs.empty()) {
@@ -572,14 +577,15 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin());
std::string Msg = "inline asm clobber list contains reserved registers: ";
- for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) {
+ for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; ++I) {
if(I != RestrRegs.begin())
Msg += ", ";
- Msg += *I;
+ Msg += TRI->getName(*I);
}
- std::string Note = "Reserved registers on the clobber list may not be "
- "preserved across the asm statement, and clobbering them may "
- "lead to undefined behaviour.";
+ const char *Note =
+ "Reserved registers on the clobber list may not be "
+ "preserved across the asm statement, and clobbering them may "
+ "lead to undefined behaviour.";
SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg);
SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 90929a217368..5e7db1f2f76c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -29,7 +29,7 @@ class ByteStreamer {
public:
// For now we're just handling the calls we need for dwarf emission/hashing.
- virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
+ virtual void emitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "",
unsigned PadTo = 0) = 0;
@@ -41,7 +41,7 @@ private:
public:
APByteStreamer(AsmPrinter &Asm) : AP(Asm) {}
- void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
AP.OutStreamer->AddComment(Comment);
AP.emitInt8(Byte);
}
@@ -61,7 +61,7 @@ class HashingByteStreamer final : public ByteStreamer {
DIEHash &Hash;
public:
HashingByteStreamer(DIEHash &H) : Hash(H) {}
- void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
Hash.update(Byte);
}
void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
@@ -88,7 +88,7 @@ public:
std::vector<std::string> &Comments, bool GenerateComments)
: Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {
}
- void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
Buffer.push_back(Byte);
if (GenerateComments)
Comments.push_back(Comment.str());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 39069e24e061..b15e750aaf85 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -13,15 +13,10 @@
#include "CodeViewDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/Triple.h"
@@ -40,7 +35,6 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
@@ -48,14 +42,12 @@
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -71,7 +63,6 @@
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
@@ -85,12 +76,8 @@
#include <cassert>
#include <cctype>
#include <cstddef>
-#include <cstdint>
#include <iterator>
#include <limits>
-#include <string>
-#include <utility>
-#include <vector>
using namespace llvm;
using namespace llvm::codeview;
@@ -139,7 +126,9 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
case Triple::ArchType::x86_64:
return CPUType::X64;
case Triple::ArchType::thumb:
- return CPUType::Thumb;
+ // LLVM currently doesn't support Windows CE and so thumb
+ // here is indiscriminately mapped to ARMNT specifically.
+ return CPUType::ARMNT;
case Triple::ArchType::aarch64:
return CPUType::ARM64;
default:
@@ -148,28 +137,7 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
}
CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
- : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {
- // If module doesn't have named metadata anchors or COFF debug section
- // is not available, skip any debug info related stuff.
- if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
- !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) {
- Asm = nullptr;
- MMI->setDebugInfoAvailability(false);
- return;
- }
- // Tell MMI that we have debug info.
- MMI->setDebugInfoAvailability(true);
-
- TheCPU =
- mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
-
- collectGlobalVariableInfo();
-
- // Check if we should emit type record hashes.
- ConstantInt *GH = mdconst::extract_or_null<ConstantInt>(
- MMI->getModule()->getModuleFlag("CodeViewGHash"));
- EmitDebugGlobalHashes = GH && !GH->isZero();
-}
+ : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {}
StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
std::string &Filepath = FileToFilepathMap[File];
@@ -507,8 +475,7 @@ void CodeViewDebug::recordLocalVariable(LocalVariable &&Var,
static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs,
const DILocation *Loc) {
- auto B = Locs.begin(), E = Locs.end();
- if (std::find(B, E, Loc) == E)
+ if (!llvm::is_contained(Locs, Loc))
Locs.push_back(Loc);
}
@@ -574,12 +541,31 @@ void CodeViewDebug::emitCodeViewMagicVersion() {
OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
}
+void CodeViewDebug::beginModule(Module *M) {
+ // If module doesn't have named metadata anchors or COFF debug section
+ // is not available, skip any debug info related stuff.
+ if (!M->getNamedMetadata("llvm.dbg.cu") ||
+ !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
+ Asm = nullptr;
+ return;
+ }
+ // Tell MMI that we have and need debug info.
+ MMI->setDebugInfoAvailability(true);
+
+ TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
+
+ collectGlobalVariableInfo();
+
+ // Check if we should emit type record hashes.
+ ConstantInt *GH =
+ mdconst::extract_or_null<ConstantInt>(M->getModuleFlag("CodeViewGHash"));
+ EmitDebugGlobalHashes = GH && !GH->isZero();
+}
+
void CodeViewDebug::endModule() {
if (!Asm || !MMI->hasDebugInfo())
return;
- assert(Asm != nullptr);
-
// The COFF .debug$S section consists of several subsections, each starting
// with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
// of the payload followed by the payload itself. The subsections are 4-byte
@@ -600,13 +586,18 @@ void CodeViewDebug::endModule() {
if (!P.first->isDeclarationForLinker())
emitDebugInfoForFunction(P.first, *P.second);
- // Emit global variable debug information.
- setCurrentSubprogram(nullptr);
- emitDebugInfoForGlobals();
+ // Get types used by globals without emitting anything.
+ // This is meant to collect all static const data members so they can be
+ // emitted as globals.
+ collectDebugInfoForGlobals();
// Emit retained types.
emitDebugInfoForRetainedTypes();
+ // Emit global variable debug information.
+ setCurrentSubprogram(nullptr);
+ emitDebugInfoForGlobals();
+
// Switch back to the generic .debug$S section after potentially processing
// comdat symbol sections.
switchToDebugSectionForSymbol(nullptr);
@@ -1195,12 +1186,15 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
// Get the frame register used and the offset.
Register FrameReg;
- int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
+ StackOffset FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
+ assert(!FrameOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+
// Calculate the label ranges.
LocalVarDefRange DefRange =
- createDefRangeMem(CVReg, FrameOffset + ExprOffset);
+ createDefRangeMem(CVReg, FrameOffset.getFixed() + ExprOffset);
for (const InsnRange &Range : Scope->getRanges()) {
const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
@@ -2155,6 +2149,15 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
const DIDerivedType *DDTy) {
if (!DDTy->getName().empty()) {
Info.Members.push_back({DDTy, 0});
+
+ // Collect static const data members with values.
+ if ((DDTy->getFlags() & DINode::FlagStaticMember) ==
+ DINode::FlagStaticMember) {
+ if (DDTy->getConstant() && (isa<ConstantInt>(DDTy->getConstant()) ||
+ isa<ConstantFP>(DDTy->getConstant())))
+ StaticConstMembers.push_back(DDTy);
+ }
+
return;
}
@@ -3057,15 +3060,32 @@ void CodeViewDebug::collectGlobalVariableInfo() {
}
}
+void CodeViewDebug::collectDebugInfoForGlobals() {
+ for (const CVGlobalVariable &CVGV : GlobalVariables) {
+ const DIGlobalVariable *DIGV = CVGV.DIGV;
+ const DIScope *Scope = DIGV->getScope();
+ getCompleteTypeIndex(DIGV->getType());
+ getFullyQualifiedName(Scope, DIGV->getName());
+ }
+
+ for (const CVGlobalVariable &CVGV : ComdatVariables) {
+ const DIGlobalVariable *DIGV = CVGV.DIGV;
+ const DIScope *Scope = DIGV->getScope();
+ getCompleteTypeIndex(DIGV->getType());
+ getFullyQualifiedName(Scope, DIGV->getName());
+ }
+}
+
void CodeViewDebug::emitDebugInfoForGlobals() {
// First, emit all globals that are not in a comdat in a single symbol
// substream. MSVC doesn't like it if the substream is empty, so only open
// it if we have at least one global to emit.
switchToDebugSectionForSymbol(nullptr);
- if (!GlobalVariables.empty()) {
+ if (!GlobalVariables.empty() || !StaticConstMembers.empty()) {
OS.AddComment("Symbol subsection for globals");
MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
emitGlobalVariableList(GlobalVariables);
+ emitStaticConstMemberList();
endCVSubsection(EndLabel);
}
@@ -3104,6 +3124,61 @@ void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
}
}
+void CodeViewDebug::emitStaticConstMemberList() {
+ for (const DIDerivedType *DTy : StaticConstMembers) {
+ const DIScope *Scope = DTy->getScope();
+
+ APSInt Value;
+ if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(DTy->getConstant()))
+ Value = APSInt(CI->getValue(),
+ DebugHandlerBase::isUnsignedDIType(DTy->getBaseType()));
+ else if (const ConstantFP *CFP =
+ dyn_cast_or_null<ConstantFP>(DTy->getConstant()))
+ Value = APSInt(CFP->getValueAPF().bitcastToAPInt(), true);
+ else
+ llvm_unreachable("cannot emit a constant without a value");
+
+ std::string QualifiedName = getFullyQualifiedName(Scope, DTy->getName());
+
+ MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
+ OS.AddComment("Type");
+ OS.emitInt32(getTypeIndex(DTy->getBaseType()).getIndex());
+ OS.AddComment("Value");
+
+ // Encoded integers shouldn't need more than 10 bytes.
+ uint8_t Data[10];
+ BinaryStreamWriter Writer(Data, llvm::support::endianness::little);
+ CodeViewRecordIO IO(Writer);
+ cantFail(IO.mapEncodedInteger(Value));
+ StringRef SRef((char *)Data, Writer.getOffset());
+ OS.emitBinaryData(SRef);
+
+ OS.AddComment("Name");
+ emitNullTerminatedSymbolName(OS, QualifiedName);
+ endSymbolRecord(SConstantEnd);
+ }
+}
+
+static bool isFloatDIType(const DIType *Ty) {
+ if (isa<DICompositeType>(Ty))
+ return false;
+
+ if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ dwarf::Tag T = (dwarf::Tag)Ty->getTag();
+ if (T == dwarf::DW_TAG_pointer_type ||
+ T == dwarf::DW_TAG_ptr_to_member_type ||
+ T == dwarf::DW_TAG_reference_type ||
+ T == dwarf::DW_TAG_rvalue_reference_type)
+ return false;
+ assert(DTy->getBaseType() && "Expected valid base type");
+ return isFloatDIType(DTy->getBaseType());
+ }
+
+ auto *BTy = cast<DIBasicType>(Ty);
+ return (BTy->getEncoding() == dwarf::DW_ATE_float);
+}
+
void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
const DIGlobalVariable *DIGV = CVGV.DIGV;
@@ -3139,7 +3214,12 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>();
assert(DIE->isConstant() &&
"Global constant variables must contain a constant expression.");
- uint64_t Val = DIE->getElement(1);
+
+ // Use unsigned for floats.
+ bool isUnsigned = isFloatDIType(DIGV->getType())
+ ? true
+ : DebugHandlerBase::isUnsignedDIType(DIGV->getType());
+ APSInt Value(APInt(/*BitWidth=*/64, DIE->getElement(1)), isUnsigned);
MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
OS.AddComment("Type");
@@ -3150,7 +3230,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
uint8_t data[10];
BinaryStreamWriter Writer(data, llvm::support::endianness::little);
CodeViewRecordIO IO(Writer);
- cantFail(IO.mapEncodedInteger(Val));
+ cantFail(IO.mapEncodedInteger(Value));
StringRef SRef((char *)data, Writer.getOffset());
OS.emitBinaryData(SRef);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 82f0293874d0..9eee5492bc81 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -203,6 +203,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
// Array of non-COMDAT global variables.
SmallVector<CVGlobalVariable, 1> GlobalVariables;
+ /// List of static const data members to be emitted as S_CONSTANTs.
+ SmallVector<const DIDerivedType *, 4> StaticConstMembers;
+
/// The set of comdat .debug$S sections that we've seen so far. Each section
/// must start with a magic version number that must only be emitted once.
/// This set tracks which sections we've already opened.
@@ -227,10 +230,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void calculateRanges(LocalVariable &Var,
const DbgValueHistoryMap::Entries &Entries);
- static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children,
- const FunctionInfo &FI,
- const InlineSite &Site);
-
/// Remember some debug info about each function. Keep it in a stable order to
/// emit at the end of the TU.
MapVector<const Function *, std::unique_ptr<FunctionInfo>> FnDebugInfo;
@@ -313,9 +312,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitDebugInfoForUDTs(
const std::vector<std::pair<std::string, const DIType *>> &UDTs);
+ void collectDebugInfoForGlobals();
void emitDebugInfoForGlobals();
void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV);
+ void emitStaticConstMemberList();
/// Opens a subsection of the given kind in a .debug$S codeview section.
/// Returns an end label for use with endCVSubsection when the subsection is
@@ -464,6 +465,8 @@ protected:
public:
CodeViewDebug(AsmPrinter *AP);
+ void beginModule(Module *M) override;
+
void setSymbolSize(const MCSymbol *, uint64_t) override {}
/// Emit the COFF section that holds the line table information.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index edf82fbed650..39b0b027c765 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -194,7 +194,7 @@ DIEAbbrev DIE::generateAbbrev() const {
return Abbrev;
}
-unsigned DIE::getDebugSectionOffset() const {
+uint64_t DIE::getDebugSectionOffset() const {
const DIEUnit *Unit = getUnit();
assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset");
return Unit->getDebugSectionOffset() + getOffset();
@@ -313,10 +313,8 @@ unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
//===----------------------------------------------------------------------===//
// DIEUnit Implementation
//===----------------------------------------------------------------------===//
-DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag)
- : Die(UnitTag), Section(nullptr), Offset(0), Length(0), Version(V),
- AddrSize(A)
-{
+DIEUnit::DIEUnit(dwarf::Tag UnitTag)
+ : Die(UnitTag), Section(nullptr), Offset(0) {
Die.Owner = this;
assert((UnitTag == dwarf::DW_TAG_compile_unit ||
UnitTag == dwarf::DW_TAG_skeleton_unit ||
@@ -430,10 +428,10 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of integer value in bytes.
///
unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- dwarf::FormParams Params = {0, 0, dwarf::DWARF32};
- if (AP)
- Params = {AP->getDwarfVersion(), uint8_t(AP->getPointerSize()),
- AP->OutStreamer->getContext().getDwarfFormat()};
+ assert(AP && "AsmPrinter is required to set FormParams");
+ dwarf::FormParams Params = {AP->getDwarfVersion(),
+ uint8_t(AP->getPointerSize()),
+ AP->OutStreamer->getContext().getDwarfFormat()};
if (Optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, Params))
return *FixedSize;
@@ -472,10 +470,16 @@ void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
/// SizeOf - Determine size of expression value in bytes.
///
unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- if (Form == dwarf::DW_FORM_data4) return 4;
- if (Form == dwarf::DW_FORM_sec_offset) return 4;
- if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getPointerSize();
+ switch (Form) {
+ case dwarf::DW_FORM_data4:
+ return 4;
+ case dwarf::DW_FORM_data8:
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ return AP->getDwarfOffsetByteSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
}
LLVM_DUMP_METHOD
@@ -488,19 +492,26 @@ void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
/// EmitValue - Emit label value.
///
void DIELabel::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->emitLabelReference(
- Label, SizeOf(AP, Form),
- Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset ||
- Form == dwarf::DW_FORM_ref_addr || Form == dwarf::DW_FORM_data4);
+ bool IsSectionRelative = Form != dwarf::DW_FORM_addr;
+ AP->emitLabelReference(Label, SizeOf(AP, Form), IsSectionRelative);
}
/// SizeOf - Determine size of label value in bytes.
///
unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- if (Form == dwarf::DW_FORM_data4) return 4;
- if (Form == dwarf::DW_FORM_sec_offset) return 4;
- if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->MAI->getCodePointerSize();
+ switch (Form) {
+ case dwarf::DW_FORM_data4:
+ return 4;
+ case dwarf::DW_FORM_data8:
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ case dwarf::DW_FORM_strp:
+ return AP->getDwarfOffsetByteSize();
+ case dwarf::DW_FORM_addr:
+ return AP->MAI->getCodePointerSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
}
LLVM_DUMP_METHOD
@@ -536,10 +547,16 @@ void DIEDelta::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
/// SizeOf - Determine size of delta value in bytes.
///
unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- if (Form == dwarf::DW_FORM_data4) return 4;
- if (Form == dwarf::DW_FORM_sec_offset) return 4;
- if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->MAI->getCodePointerSize();
+ switch (Form) {
+ case dwarf::DW_FORM_data4:
+ return 4;
+ case dwarf::DW_FORM_data8:
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ return AP->getDwarfOffsetByteSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
}
LLVM_DUMP_METHOD
@@ -645,7 +662,7 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_ref_addr: {
// Get the absolute offset for this DIE within the debug info/types section.
- unsigned Addr = Entry->getDebugSectionOffset();
+ uint64_t Addr = Entry->getDebugSectionOffset();
if (const MCSymbol *SectionSym =
Entry->getUnit()->getCrossSectionRelativeBaseAddress()) {
AP->emitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
@@ -802,13 +819,24 @@ void DIEBlock::print(raw_ostream &O) const {
//===----------------------------------------------------------------------===//
unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- if (Form == dwarf::DW_FORM_loclistx)
+ switch (Form) {
+ case dwarf::DW_FORM_loclistx:
return getULEB128Size(Index);
- if (Form == dwarf::DW_FORM_data4)
- return 4;
- if (Form == dwarf::DW_FORM_sec_offset)
+ case dwarf::DW_FORM_data4:
+ assert(!AP->isDwarf64() &&
+ "DW_FORM_data4 is not suitable to emit a pointer to a location list "
+ "in the 64-bit DWARF format");
return 4;
- return AP->MAI->getCodePointerSize();
+ case dwarf::DW_FORM_data8:
+ assert(AP->isDwarf64() &&
+ "DW_FORM_data8 is not suitable to emit a pointer to a location list "
+ "in the 32-bit DWARF format");
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ return AP->getDwarfOffsetByteSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
}
/// EmitValue - Emit label value.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index f26ef63eedec..da9997efc01f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -12,6 +12,7 @@
#include "DIEHash.h"
#include "ByteStreamer.h"
+#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
@@ -214,7 +215,15 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
// all of the data is going to be added as integers.
void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
for (const auto &V : Values)
- Hash.update((uint64_t)V.getDIEInteger().getValue());
+ if (V.getType() == DIEValue::isBaseTypeRef) {
+ const DIE &C =
+ *CU->ExprRefedBaseTypes[V.getDIEBaseTypeRef().getIndex()].Die;
+ StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);
+ assert(!Name.empty() &&
+ "Base types referenced from DW_OP_convert should have a name");
+ hashNestedType(C, Name);
+ } else
+ Hash.update((uint64_t)V.getDIEInteger().getValue());
}
// Hash the contents of a loclistptr class.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 1a69f6772873..29e1da4c5d60 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -31,7 +31,8 @@ class DIEHash {
};
public:
- DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
+ DIEHash(AsmPrinter *A = nullptr, DwarfCompileUnit *CU = nullptr)
+ : AP(A), CU(CU) {}
/// Computes the CU signature.
uint64_t computeCUSignature(StringRef DWOName, const DIE &Die);
@@ -101,6 +102,7 @@ private:
private:
MD5 Hash;
AsmPrinter *AP;
+ DwarfCompileUnit *CU;
DenseMap<const DIE *, unsigned> Numbering;
};
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 584b7614915d..1c9131edab83 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -8,9 +8,11 @@
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -51,6 +53,37 @@ static Register isDescribedByReg(const MachineInstr &MI) {
: Register();
}
+void InstructionOrdering::initialize(const MachineFunction &MF) {
+ // We give meta instructions the same ordinal as the preceding instruction
+ // because this class is written for the task of comparing positions of
+ // variable location ranges against scope ranges. To reflect what we'll see
+ // in the binary, when we look at location ranges we must consider all
+ // DBG_VALUEs between two real instructions at the same position. And a
+ // scope range which ends on a meta instruction should be considered to end
+ // at the last seen real instruction. E.g.
+ //
+ // 1 instruction p Both the variable location for x and for y start
+ // 1 DBG_VALUE for "x" after instruction p so we give them all the same
+ // 1 DBG_VALUE for "y" number. If a scope range ends at DBG_VALUE for "y",
+ // 2 instruction q we should treat it as ending after instruction p
+ // because it will be the last real instruction in the
+ // range. DBG_VALUEs at or after this position for
+ // variables declared in the scope will have no effect.
+ clear();
+ unsigned Position = 0;
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB)
+ InstNumberMap[&MI] = MI.isMetaInstruction() ? Position : ++Position;
+}
+
+bool InstructionOrdering::isBefore(const MachineInstr *A,
+ const MachineInstr *B) const {
+ assert(A->getParent() && B->getParent() && "Operands must have a parent");
+ assert(A->getMF() == B->getMF() &&
+ "Operands must be in the same MachineFunction");
+ return InstNumberMap.lookup(A) < InstNumberMap.lookup(B);
+}
+
bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var,
const MachineInstr &MI,
EntryIndex &NewIndex) {
@@ -90,6 +123,156 @@ void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {
EndIndex = Index;
}
+/// Check if the instruction range [StartMI, EndMI] intersects any instruction
+/// range in Ranges. EndMI can be nullptr to indicate that the range is
+/// unbounded. Assumes Ranges is ordered and disjoint. Returns true and points
+/// to the first intersecting scope range if one exists.
+static Optional<ArrayRef<InsnRange>::iterator>
+intersects(const MachineInstr *StartMI, const MachineInstr *EndMI,
+ const ArrayRef<InsnRange> &Ranges,
+ const InstructionOrdering &Ordering) {
+ for (auto RangesI = Ranges.begin(), RangesE = Ranges.end();
+ RangesI != RangesE; ++RangesI) {
+ if (EndMI && Ordering.isBefore(EndMI, RangesI->first))
+ return None;
+ if (EndMI && !Ordering.isBefore(RangesI->second, EndMI))
+ return RangesI;
+ if (Ordering.isBefore(StartMI, RangesI->second))
+ return RangesI;
+ }
+ return None;
+}
+
+void DbgValueHistoryMap::trimLocationRanges(
+ const MachineFunction &MF, LexicalScopes &LScopes,
+ const InstructionOrdering &Ordering) {
+ // The indices of the entries we're going to remove for each variable.
+ SmallVector<EntryIndex, 4> ToRemove;
+ // Entry reference count for each variable. Clobbers left with no references
+ // will be removed.
+ SmallVector<int, 4> ReferenceCount;
+ // Entries reference other entries by index. Offsets is used to remap these
+ // references if any entries are removed.
+ SmallVector<size_t, 4> Offsets;
+
+ for (auto &Record : VarEntries) {
+ auto &HistoryMapEntries = Record.second;
+ if (HistoryMapEntries.empty())
+ continue;
+
+ InlinedEntity Entity = Record.first;
+ const DILocalVariable *LocalVar = cast<DILocalVariable>(Entity.first);
+
+ LexicalScope *Scope = nullptr;
+ if (const DILocation *InlinedAt = Entity.second) {
+ Scope = LScopes.findInlinedScope(LocalVar->getScope(), InlinedAt);
+ } else {
+ Scope = LScopes.findLexicalScope(LocalVar->getScope());
+ // Ignore variables for non-inlined function level scopes. The scope
+ // ranges (from scope->getRanges()) will not include any instructions
+ // before the first one with a debug-location, which could cause us to
+ // incorrectly drop a location. We could introduce special casing for
+ // these variables, but it doesn't seem worth it because no out-of-scope
+ // locations have been observed for variables declared in function level
+ // scopes.
+ if (Scope &&
+ (Scope->getScopeNode() == Scope->getScopeNode()->getSubprogram()) &&
+ (Scope->getScopeNode() == LocalVar->getScope()))
+ continue;
+ }
+
+ // If there is no scope for the variable then something has probably gone
+ // wrong.
+ if (!Scope)
+ continue;
+
+ ToRemove.clear();
+ // Zero the reference counts.
+ ReferenceCount.assign(HistoryMapEntries.size(), 0);
+ // Index of the DBG_VALUE which marks the start of the current location
+ // range.
+ EntryIndex StartIndex = 0;
+ ArrayRef<InsnRange> ScopeRanges(Scope->getRanges());
+ for (auto EI = HistoryMapEntries.begin(), EE = HistoryMapEntries.end();
+ EI != EE; ++EI, ++StartIndex) {
+ // Only DBG_VALUEs can open location ranges so skip anything else.
+ if (!EI->isDbgValue())
+ continue;
+
+ // Index of the entry which closes this range.
+ EntryIndex EndIndex = EI->getEndIndex();
+ // If this range is closed bump the reference count of the closing entry.
+ if (EndIndex != NoEntry)
+ ReferenceCount[EndIndex] += 1;
+ // Skip this location range if the opening entry is still referenced. It
+ // may close a location range which intersects a scope range.
+ // TODO: We could be 'smarter' and trim these kinds of ranges such that
+ // they do not leak out of the scope ranges if they partially overlap.
+ if (ReferenceCount[StartIndex] > 0)
+ continue;
+
+ const MachineInstr *StartMI = EI->getInstr();
+ const MachineInstr *EndMI = EndIndex != NoEntry
+ ? HistoryMapEntries[EndIndex].getInstr()
+ : nullptr;
+ // Check if the location range [StartMI, EndMI] intersects with any scope
+ // range for the variable.
+ if (auto R = intersects(StartMI, EndMI, ScopeRanges, Ordering)) {
+ // Adjust ScopeRanges to exclude ranges which subsequent location ranges
+ // cannot possibly intersect.
+ ScopeRanges = ArrayRef<InsnRange>(R.getValue(), ScopeRanges.end());
+ } else {
+ // If the location range does not intersect any scope range then the
+ // DBG_VALUE which opened this location range is usless, mark it for
+ // removal.
+ ToRemove.push_back(StartIndex);
+ // Because we'll be removing this entry we need to update the reference
+ // count of the closing entry, if one exists.
+ if (EndIndex != NoEntry)
+ ReferenceCount[EndIndex] -= 1;
+ }
+ }
+
+ // If there is nothing to remove then jump to next variable.
+ if (ToRemove.empty())
+ continue;
+
+ // Mark clobbers that will no longer close any location ranges for removal.
+ for (size_t i = 0; i < HistoryMapEntries.size(); ++i)
+ if (ReferenceCount[i] <= 0 && HistoryMapEntries[i].isClobber())
+ ToRemove.push_back(i);
+
+ llvm::sort(ToRemove);
+
+ // Build an offset map so we can update the EndIndex of the remaining
+ // entries.
+ // Zero the offsets.
+ Offsets.assign(HistoryMapEntries.size(), 0);
+ size_t CurOffset = 0;
+ auto ToRemoveItr = ToRemove.begin();
+ for (size_t EntryIdx = *ToRemoveItr; EntryIdx < HistoryMapEntries.size();
+ ++EntryIdx) {
+ // Check if this is an entry which will be removed.
+ if (ToRemoveItr != ToRemove.end() && *ToRemoveItr == EntryIdx) {
+ ++ToRemoveItr;
+ ++CurOffset;
+ }
+ Offsets[EntryIdx] = CurOffset;
+ }
+
+ // Update the EndIndex of the entries to account for those which will be
+ // removed.
+ for (auto &Entry : HistoryMapEntries)
+ if (Entry.isClosed())
+ Entry.EndIndex -= Offsets[Entry.EndIndex];
+
+ // Now actually remove the entries. Iterate backwards so that our remaining
+ // ToRemove indices are valid after each erase.
+ for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr)
+ HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr);
+ }
+}
+
void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
assert(MI.isDebugLabel() && "not a DBG_LABEL");
LabelInstr[Label] = &MI;
@@ -234,7 +417,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
DbgValueHistoryMap &DbgValues,
DbgLabelInstrMap &DbgLabels) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
Register FrameReg = TRI->getFrameRegister(*MF);
RegDescribedVarsMap RegVars;
DbgValueEntriesMap LiveEntries;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 880791a06d93..68a4bfba42a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -21,11 +21,16 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
+/// If true, we drop variable location ranges which exist entirely outside the
+/// variable's lexical scope instruction ranges.
+static cl::opt<bool> TrimVarLocs("trim-var-locs", cl::Hidden, cl::init(true));
+
Optional<DbgVariableLocation>
DbgVariableLocation::extractFromMachineInstruction(
const MachineInstr &Instruction) {
@@ -86,6 +91,11 @@ DbgVariableLocation::extractFromMachineInstruction(
DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
+void DebugHandlerBase::beginModule(Module *M) {
+ if (M->debug_compile_units().empty())
+ Asm = nullptr;
+}
+
// Each LexicalScope has first instruction and last instruction to mark
// beginning and end of a scope respectively. Create an inverse map that list
// scopes starts (and ends) with an instruction. One instruction may start (or
@@ -153,6 +163,54 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
return getBaseTypeSize(BaseType);
}
+bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
+ return false;
+
+ // (Pieces of) aggregate types that get hacked apart by SROA may be
+ // represented by a constant. Encode them as unsigned bytes.
+ return true;
+ }
+
+ if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ dwarf::Tag T = (dwarf::Tag)Ty->getTag();
+ // Encode pointer constants as unsigned bytes. This is used at least for
+ // null pointer constant emission.
+ // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
+ // here, but accept them for now due to a bug in SROA producing bogus
+ // dbg.values.
+ if (T == dwarf::DW_TAG_pointer_type ||
+ T == dwarf::DW_TAG_ptr_to_member_type ||
+ T == dwarf::DW_TAG_reference_type ||
+ T == dwarf::DW_TAG_rvalue_reference_type)
+ return true;
+ assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
+ T == dwarf::DW_TAG_volatile_type ||
+ T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
+ assert(DTy->getBaseType() && "Expected valid base type");
+ return isUnsignedDIType(DTy->getBaseType());
+ }
+
+ auto *BTy = cast<DIBasicType>(Ty);
+ unsigned Encoding = BTy->getEncoding();
+ assert((Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_signed ||
+ Encoding == dwarf::DW_ATE_signed_char ||
+ Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
+ Encoding == dwarf::DW_ATE_boolean ||
+ (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
+ Ty->getName() == "decltype(nullptr)")) &&
+ "Unsupported encoding");
+ return Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+ Ty->getTag() == dwarf::DW_TAG_unspecified_type;
+}
+
static bool hasDebugInfo(const MachineModuleInfo *MMI,
const MachineFunction *MF) {
if (!MMI->hasDebugInfo())
@@ -191,6 +249,9 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!");
calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
DbgValues, DbgLabels);
+ InstOrdering.initialize(*MF);
+ if (TrimVarLocs)
+ DbgValues.trimLocationRanges(*MF, LScopes, InstOrdering);
LLVM_DEBUG(DbgValues.dump());
// Request labels for the full history.
@@ -212,10 +273,16 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
// doing that violates the ranges that are calculated in the history map.
// However, we currently do not emit debug values for constant arguments
// directly at the start of the function, so this code is still useful.
+ // FIXME: If the first mention of an argument is in a unique section basic
+ // block, we cannot always assign the CurrentFnBeginLabel as it lies in a
+ // different section. Temporarily, we disable generating loc list
+ // information or DW_AT_const_value when the block is in a different
+ // section.
const DILocalVariable *DIVar =
Entries.front().getInstr()->getDebugVariable();
if (DIVar->isParameter() &&
- getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) {
+ getDISubprogram(DIVar->getScope())->describes(&MF->getFunction()) &&
+ Entries.front().getInstr()->getParent()->sameSection(&MF->front())) {
if (!IsDescribedByReg(Entries.front().getInstr()))
LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin();
if (Entries.front().getInstr()->getDebugExpression()->isFragment()) {
@@ -262,7 +329,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
}
void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
- if (!MMI->hasDebugInfo())
+ if (!Asm || !MMI->hasDebugInfo())
return;
assert(CurMI == nullptr);
@@ -288,7 +355,7 @@ void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
}
void DebugHandlerBase::endInstruction() {
- if (!MMI->hasDebugInfo())
+ if (!Asm || !MMI->hasDebugInfo())
return;
assert(CurMI != nullptr);
@@ -320,12 +387,13 @@ void DebugHandlerBase::endInstruction() {
}
void DebugHandlerBase::endFunction(const MachineFunction *MF) {
- if (hasDebugInfo(MMI, MF))
+ if (Asm && hasDebugInfo(MMI, MF))
endFunctionImpl(MF);
DbgValues.clear();
DbgLabels.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
+ InstOrdering.clear();
}
void DebugHandlerBase::beginBasicBlock(const MachineBasicBlock &MBB) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 11ed1062f77e..c20ac6040aef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -81,8 +81,9 @@ void DwarfCFIException::endModule() {
}
}
-static MCSymbol *getExceptionSym(AsmPrinter *Asm) {
- return Asm->getCurExceptionSym();
+static MCSymbol *getExceptionSym(AsmPrinter *Asm,
+ const MachineBasicBlock *MBB) {
+ return Asm->getMBBExceptionSym(*MBB);
}
void DwarfCFIException::beginFunction(const MachineFunction *MF) {
@@ -161,7 +162,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
// Provide LSDA information.
if (shouldEmitLSDA)
- Asm->OutStreamer->emitCFILsda(ESP(Asm), TLOF.getLSDAEncoding());
+ Asm->OutStreamer->emitCFILsda(ESP(Asm, MBB), TLOF.getLSDAEncoding());
}
/// endFunction - Gather and emit post-function exception information.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 296c380ae550..befc4bba19a2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -12,18 +12,12 @@
#include "DwarfCompileUnit.h"
#include "AddressPool.h"
-#include "DwarfDebug.h"
#include "DwarfExpression.h"
-#include "DwarfUnit.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
-#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -32,22 +26,16 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
#include <iterator>
-#include <memory>
#include <string>
#include <utility>
@@ -117,7 +105,7 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None,
CUID);
return Asm->OutStreamer->emitDwarfFileDirective(
- 0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File),
+ 0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
File->getSource(), CUID);
}
@@ -260,7 +248,9 @@ void DwarfCompileUnit::addLocationAttribute(
: dwarf::DW_OP_const8u);
// 2) containing the (relocated) offset of the TLS variable
// within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
+ addExpr(*Loc,
+ PointerSize == 4 ? dwarf::DW_FORM_data4
+ : dwarf::DW_FORM_data8,
Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
} else {
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
@@ -432,7 +422,10 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
// FIXME: duplicated from Target/WebAssembly/WebAssembly.h
// don't want to depend on target specific headers in this code?
const unsigned TI_GLOBAL_RELOC = 3;
- if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
+ // FIXME: when writing dwo, we need to avoid relocations. Probably
+ // the "right" solution is to treat globals the way func and data symbols
+ // are (with entries in .debug_addr).
+ if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC && !isDwoUnit()) {
// These need to be relocatable.
assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
auto SPSym = cast<MCSymbolWasm>(
@@ -449,8 +442,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
true});
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
- addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind);
- addLabel(*Loc, dwarf::DW_FORM_udata, SPSym);
+ addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
+ addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
DD->addArangeLabel(SymbolCU(this, SPSym));
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
@@ -565,7 +558,12 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
void DwarfCompileUnit::attachRangesOrLowHighPC(
DIE &Die, SmallVector<RangeSpan, 2> Ranges) {
- if (Ranges.size() == 1 || !DD->useRangesSection()) {
+ assert(!Ranges.empty());
+ if (!DD->useRangesSection() ||
+ (Ranges.size() == 1 &&
+ (!DD->alwaysUseRanges() ||
+ DD->getSectionLabel(&Ranges.front().Begin->getSection()) ==
+ Ranges.front().Begin))) {
const RangeSpan &Front = Ranges.front();
const RangeSpan &Back = Ranges.back();
attachLowHighPC(Die, Front.Begin, Back.End);
@@ -688,9 +686,9 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
// Add variable address.
- unsigned Offset = DV.getDebugLocListIndex();
- if (Offset != ~0U) {
- addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
+ unsigned Index = DV.getDebugLocListIndex();
+ if (Index != ~0U) {
+ addLocationList(*VariableDie, dwarf::DW_AT_location, Index);
auto TagOffset = DV.getDebugLocListTagOffset();
if (TagOffset)
addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
@@ -722,6 +720,13 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
addConstantFPValue(*VariableDie, DVal->getConstantFP());
} else if (DVal->isConstantInt()) {
addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType());
+ } else if (DVal->isTargetIndexLocation()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ const DIBasicType *BT = dyn_cast<DIBasicType>(
+ static_cast<const Metadata *>(DV.getVariable()->getType()));
+ DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr);
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
}
return VariableDie;
}
@@ -737,10 +742,14 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
Register FrameReg;
const DIExpression *Expr = Fragment.Expr;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
- int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
+ StackOffset Offset =
+ TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
DwarfExpr.addFragmentOffset(Expr);
+
+ auto *TRI = Asm->MF->getSubtarget().getRegisterInfo();
SmallVector<uint64_t, 8> Ops;
- DIExpression::appendOffset(Ops, Offset);
+ TRI->getOffsetOpcodes(Offset, Ops);
+
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
// cuda-gdb requires DW_AT_address_class for all variables to be able to
@@ -801,6 +810,10 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
return Result;
if (auto *DLVar = Array->getDataLocation())
Result.push_back(DLVar);
+ if (auto *AsVar = Array->getAssociated())
+ Result.push_back(AsVar);
+ if (auto *AlVar = Array->getAllocated())
+ Result.push_back(AlVar);
for (auto *El : Array->getElements()) {
if (auto *Subrange = dyn_cast<DISubrange>(El)) {
if (auto Count = Subrange->getCount())
@@ -815,6 +828,19 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
if (auto ST = Subrange->getStride())
if (auto *Dependency = ST.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
+ } else if (auto *GenericSubrange = dyn_cast<DIGenericSubrange>(El)) {
+ if (auto Count = GenericSubrange->getCount())
+ if (auto *Dependency = Count.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ if (auto LB = GenericSubrange->getLowerBound())
+ if (auto *Dependency = LB.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ if (auto UB = GenericSubrange->getUpperBound())
+ if (auto *Dependency = UB.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ if (auto ST = GenericSubrange->getStride())
+ if (auto *Dependency = ST.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
}
}
return Result;
@@ -996,7 +1022,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
}
bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const {
- return DD->getDwarfVersion() == 4 && DD->tuneForGDB();
+ return DD->getDwarfVersion() == 4 && !DD->tuneForLLDB();
}
dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const {
@@ -1352,11 +1378,9 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
/// Add a Dwarf loclistptr attribute data and value.
void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
unsigned Index) {
- dwarf::Form Form = dwarf::DW_FORM_data4;
- if (DD->getDwarfVersion() == 4)
- Form =dwarf::DW_FORM_sec_offset;
- if (DD->getDwarfVersion() >= 5)
- Form =dwarf::DW_FORM_loclistx;
+ dwarf::Form Form = (DD->getDwarfVersion() >= 5)
+ ? dwarf::DW_FORM_loclistx
+ : DD->getDwarfSectionOffsetForm();
Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index));
}
@@ -1417,8 +1441,8 @@ void DwarfCompileUnit::addAddrTableBase() {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
MCSymbol *Label = DD->getAddressPool().getLabel();
addSectionLabel(getUnitDie(),
- getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
- : dwarf::DW_AT_GNU_addr_base,
+ DD->getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
+ : dwarf::DW_AT_GNU_addr_base,
Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 4ccd8c96dd0d..6d8186a5ee2b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -22,7 +22,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
-#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Casting.h"
@@ -34,6 +33,9 @@
namespace llvm {
class AsmPrinter;
+class DIE;
+class DIELoc;
+class DIEValueList;
class DwarfFile;
class GlobalVariable;
class MCExpr;
@@ -55,7 +57,7 @@ class DwarfCompileUnit final : public DwarfUnit {
DwarfCompileUnit *Skeleton = nullptr;
/// The start of the unit within its section.
- MCSymbol *LabelBegin;
+ MCSymbol *LabelBegin = nullptr;
/// The start of the unit macro info within macro section.
MCSymbol *MacroLabelBegin;
@@ -287,8 +289,8 @@ public:
return DwarfUnit::getHeaderSize() + DWOIdSize;
}
unsigned getLength() {
- return sizeof(uint32_t) + // Length field
- getHeaderSize() + getUnitDie().getSize();
+ return Asm->getUnitLengthFieldByteSize() + // Length field
+ getHeaderSize() + getUnitDie().getSize();
}
void emitHeader(bool UseOffsets) override;
@@ -297,7 +299,7 @@ public:
void addAddrTableBase();
MCSymbol *getLabelBegin() const {
- assert(getSection());
+ assert(LabelBegin && "LabelBegin is not initialized");
return LabelBegin;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 45ed5256deb9..462682743c6a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -13,30 +13,18 @@
#include "DwarfDebug.h"
#include "ByteStreamer.h"
#include "DIEHash.h"
-#include "DebugLocEntry.h"
-#include "DebugLocStream.h"
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
-#include "DwarfFile.h"
#include "DwarfUnit.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -46,14 +34,11 @@
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -71,15 +56,10 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include <algorithm>
-#include <cassert>
#include <cstddef>
-#include <cstdint>
#include <iterator>
#include <string>
-#include <utility>
-#include <vector>
using namespace llvm;
@@ -87,18 +67,10 @@ using namespace llvm;
STATISTIC(NumCSParams, "Number of dbg call site params created");
-static cl::opt<bool>
-DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
- cl::desc("Disable debug info printing"));
-
static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier(
"use-dwarf-ranges-base-address-specifier", cl::Hidden,
cl::desc("Use base address specifiers in debug_ranges"), cl::init(false));
-static cl::opt<bool> EmitDwarfDebugEntryValues(
- "emit-debug-entry-values", cl::Hidden,
- cl::desc("Emit the debug entry values"), cl::init(false));
-
static cl::opt<bool> GenerateARangeSection("generate-arange-section",
cl::Hidden,
cl::desc("Generate dwarf aranges"),
@@ -151,6 +123,18 @@ static cl::opt<DefaultOnOff> DwarfSectionsAsReferences(
clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
cl::init(Default));
+static cl::opt<bool>
+ UseGNUDebugMacro("use-gnu-debug-macro", cl::Hidden,
+ cl::desc("Emit the GNU .debug_macro format with DWARF <5"),
+ cl::init(false));
+
+static cl::opt<DefaultOnOff> DwarfOpConvert(
+ "dwarf-op-convert", cl::Hidden,
+ cl::desc("Enable use of the DWARFv5 DW_OP_convert operator"),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
+ cl::init(Default));
+
enum LinkageNameOption {
DefaultLinkageNames,
AllLinkageNames,
@@ -167,19 +151,23 @@ static cl::opt<LinkageNameOption>
"Abstract subprograms")),
cl::init(DefaultLinkageNames));
-static cl::opt<unsigned> LocationAnalysisSizeLimit(
- "singlevarlocation-input-bb-limit",
- cl::desc("Maximum block size to analyze for single-location variables"),
- cl::init(30000), cl::Hidden);
+static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option(
+ "minimize-addr-in-v5", cl::Hidden,
+ cl::desc("Always use DW_AT_ranges in DWARFv5 whenever it could allow more "
+ "address pool entry sharing to reduce relocations/object size"),
+ cl::values(clEnumValN(DwarfDebug::MinimizeAddrInV5::Default, "Default",
+ "Default address minimization strategy"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Ranges, "Ranges",
+ "Use rnglists for contiguous ranges if that allows "
+ "using a pre-existing base address"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Disabled, "Disabled",
+ "Stuff")),
+ cl::init(DwarfDebug::MinimizeAddrInV5::Default));
-static const char *const DWARFGroupName = "dwarf";
-static const char *const DWARFGroupDescription = "DWARF Emission";
-static const char *const DbgTimerName = "writer";
-static const char *const DbgTimerDescription = "DWARF Debug Writer";
static constexpr unsigned ULEB128PadSize = 4;
void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
- getActiveStreamer().EmitInt8(
+ getActiveStreamer().emitInt8(
Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
: dwarf::OperationEncodingString(Op));
}
@@ -193,7 +181,7 @@ void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
}
void DebugLocDwarfExpression::emitData1(uint8_t Value) {
- getActiveStreamer().EmitInt8(Value, Twine(Value));
+ getActiveStreamer().emitInt8(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
@@ -202,7 +190,7 @@ void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
}
bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) {
+ llvm::Register MachineReg) {
// This information is not available while emitting .debug_loc entries.
return false;
}
@@ -227,7 +215,7 @@ void DebugLocDwarfExpression::commitTemporaryBuffer() {
const char *Comment = (Byte.index() < TmpBuf->Comments.size())
? TmpBuf->Comments[Byte.index()].c_str()
: "";
- OutBS.EmitInt8(Byte.value(), Comment);
+ OutBS.emitInt8(Byte.value(), Comment);
}
TmpBuf->Bytes.clear();
TmpBuf->Comments.clear();
@@ -242,8 +230,8 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
const DIExpression *Expr = MI->getDebugExpression();
assert(MI->getNumOperands() == 4);
if (MI->getDebugOperand(0).isReg()) {
- auto RegOp = MI->getDebugOperand(0);
- auto Op1 = MI->getDebugOffset();
+ const auto &RegOp = MI->getDebugOperand(0);
+ const auto &Op1 = MI->getDebugOffset();
// If the second operand is an immediate, this is a
// register-indirect address.
assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
@@ -251,7 +239,7 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
return DbgValueLoc(Expr, MLoc);
}
if (MI->getDebugOperand(0).isTargetIndex()) {
- auto Op = MI->getDebugOperand(0);
+ const auto &Op = MI->getDebugOperand(0);
return DbgValueLoc(Expr,
TargetIndexLocation(Op.getIndex(), Op.getOffset()));
}
@@ -354,7 +342,7 @@ static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
return AccelTableKind::None;
}
-DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+DwarfDebug::DwarfDebug(AsmPrinter *A)
: DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
InfoHolder(A, "info_string", DIEValueAllocator),
SkeletonHolder(A, "skel_string", DIEValueAllocator),
@@ -397,6 +385,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfVersion =
TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);
+ bool Dwarf64 = Asm->TM.Options.MCOptions.Dwarf64 &&
+ DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3.
+ TT.isArch64Bit() && // DWARF64 requires 64-bit relocations.
+ TT.isOSBinFormatELF(); // Support only ELF for now.
+
UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();
// Use sections as references. Force for NVPTX.
@@ -406,8 +399,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
UseSectionsAsReferences = DwarfSectionsAsReferences == Enable;
// Don't generate type units for unsupported object file formats.
- GenerateTypeUnits =
- A->TM.getTargetTriple().isOSBinFormatELF() && GenerateDwarfTypeUnits;
+ GenerateTypeUnits = (A->TM.getTargetTriple().isOSBinFormatELF() ||
+ A->TM.getTargetTriple().isOSBinFormatWasm()) &&
+ GenerateDwarfTypeUnits;
TheAccelTableKind = computeAccelTableKind(
DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());
@@ -430,11 +424,31 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// Emit call-site-param debug info for GDB and LLDB, if the target supports
// the debug entry values feature. It can also be enabled explicitly.
- EmitDebugEntryValues = (Asm->TM.Options.ShouldEmitDebugEntryValues() &&
- (tuneForGDB() || tuneForLLDB())) ||
- EmitDwarfDebugEntryValues;
+ EmitDebugEntryValues = Asm->TM.Options.ShouldEmitDebugEntryValues();
+
+ // It is unclear if the GCC .debug_macro extension is well-specified
+ // for split DWARF. For now, do not allow LLVM to emit it.
+ UseDebugMacroSection =
+ DwarfVersion >= 5 || (UseGNUDebugMacro && !useSplitDwarf());
+ if (DwarfOpConvert == Default)
+ EnableOpConvert = !((tuneForGDB() && useSplitDwarf()) || (tuneForLLDB() && !TT.isOSBinFormatMachO()));
+ else
+ EnableOpConvert = (DwarfOpConvert == Enable);
+
+ // Split DWARF would benefit object size significantly by trading reductions
+ // in address pool usage for slightly increased range list encodings.
+ if (DwarfVersion >= 5) {
+ MinimizeAddr = MinimizeAddrInV5Option;
+ // FIXME: In the future, enable this by default for Split DWARF where the
+ // tradeoff is more pronounced due to being able to offload the range
+ // lists to the dwo file and shrink object files/reduce relocations there.
+ if (MinimizeAddr == MinimizeAddrInV5::Default)
+ MinimizeAddr = MinimizeAddrInV5::Disabled;
+ }
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
+ Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64
+ : dwarf::DWARF32);
}
// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
@@ -583,7 +597,7 @@ static const DIExpression *combineDIExpressions(const DIExpression *Original,
std::vector<uint64_t> Elts = Addition->getElements().vec();
// Avoid multiple DW_OP_stack_values.
if (Original->isImplicit() && Addition->isImplicit())
- erase_if(Elts, [](uint64_t Op) { return Op == dwarf::DW_OP_stack_value; });
+ erase_value(Elts, dwarf::DW_OP_stack_value);
const DIExpression *CombinedExpr =
(Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original;
return CombinedExpr;
@@ -709,11 +723,11 @@ static void interpretValues(const MachineInstr *CurMI,
ForwardedRegWorklist[ParamFwdReg], Params);
} else if (ParamValue->first.isReg()) {
Register RegLoc = ParamValue->first.getReg();
- unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
Register FP = TRI.getFrameRegister(*MF);
bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
if (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
- MachineLocation MLoc(RegLoc, /*IsIndirect=*/IsSPorFP);
+ MachineLocation MLoc(RegLoc, /*Indirect=*/IsSPorFP);
finishCallSiteParams(MLoc, ParamValue->second,
ForwardedRegWorklist[ParamFwdReg], Params);
} else {
@@ -797,6 +811,11 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
(void)InsertedReg;
}
+ // Do not emit CSInfo for undef forwarding registers.
+ for (auto &MO : CallMI->uses())
+ if (MO.isReg() && MO.isUndef())
+ ForwardedRegWorklist.erase(MO.getReg());
+
// We erase, from the ForwardedRegWorklist, those forwarding registers for
// which we successfully describe a loaded value (by using
// the describeLoadedValue()). For those remaining arguments in the working
@@ -1071,9 +1090,8 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
// compilation directory.
if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
Asm->OutStreamer->emitDwarfFile0Directive(
- CompilationDir, DIUnit->getFilename(),
- NewCU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(),
- NewCU.getUniqueID());
+ CompilationDir, DIUnit->getFilename(), getMD5AsBytes(DIUnit->getFile()),
+ DIUnit->getSource(), NewCU.getUniqueID());
if (useSplitDwarf()) {
NewCU.setSkeleton(constructSkeletonCU(NewCU));
@@ -1126,21 +1144,17 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
// Emit all Dwarf sections that should come prior to the content. Create
// global DIEs and emit initial debug info sections. This is invoked by
// the target AsmPrinter.
-void DwarfDebug::beginModule() {
- NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName,
- DWARFGroupDescription, TimePassesIsEnabled);
- if (DisableDebugInfoPrinting) {
- MMI->setDebugInfoAvailability(false);
- return;
- }
+void DwarfDebug::beginModule(Module *M) {
+ DebugHandlerBase::beginModule(M);
- const Module *M = MMI->getModule();
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
- // Tell MMI whether we have debug info.
- assert(MMI->hasDebugInfo() == (NumDebugCUs > 0) &&
- "DebugInfoAvailabilty initialized unexpectedly");
+ assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
+ assert(MMI->hasDebugInfo() &&
+ "DebugInfoAvailabilty unexpectedly not initialized");
SingleCU = NumDebugCUs == 1;
DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
GVMap;
@@ -1292,7 +1306,7 @@ void DwarfDebug::finalizeModuleInfo() {
Asm->TM.Options.MCOptions.SplitDwarfFile);
// Emit a unique identifier for this CU.
uint64_t ID =
- DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie());
+ DIEHash(Asm, &TheCU).computeCUSignature(DWOName, TheCU.getUnitDie());
if (getDwarfVersion() >= 5) {
TheCU.setDWOId(ID);
SkCU->setDWOId(ID);
@@ -1353,15 +1367,18 @@ void DwarfDebug::finalizeModuleInfo() {
// If compile Unit has macros, emit "DW_AT_macro_info/DW_AT_macros"
// attribute.
if (CUNode->getMacros()) {
- if (getDwarfVersion() >= 5) {
+ if (UseDebugMacroSection) {
if (useSplitDwarf())
TheCU.addSectionDelta(
TheCU.getUnitDie(), dwarf::DW_AT_macros, U.getMacroLabelBegin(),
TLOF.getDwarfMacroDWOSection()->getBeginSymbol());
- else
- U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macros,
- U.getMacroLabelBegin(),
+ else {
+ dwarf::Attribute MacrosAttr = getDwarfVersion() >= 5
+ ? dwarf::DW_AT_macros
+ : dwarf::DW_AT_GNU_macros;
+ U.addSectionLabel(U.getUnitDie(), MacrosAttr, U.getMacroLabelBegin(),
TLOF.getDwarfMacroSection()->getBeginSymbol());
+ }
} else {
if (useSplitDwarf())
TheCU.addSectionDelta(
@@ -1398,9 +1415,8 @@ void DwarfDebug::endModule() {
}
// If we aren't actually generating debug info (check beginModule -
- // conditionalized on !DisableDebugInfoPrinting and the presence of the
- // llvm.dbg.cu metadata node)
- if (!MMI->hasDebugInfo())
+ // conditionalized on the presence of the llvm.dbg.cu metadata node)
+ if (!Asm || !MMI->hasDebugInfo())
return;
// Finalize the debug info for the module.
@@ -1532,7 +1548,8 @@ void DwarfDebug::collectVariableInfoFromMFTable(
/// either open or otherwise rolls off the end of the scope.
static bool validThroughout(LexicalScopes &LScopes,
const MachineInstr *DbgValue,
- const MachineInstr *RangeEnd) {
+ const MachineInstr *RangeEnd,
+ const InstructionOrdering &Ordering) {
assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
auto MBB = DbgValue->getParent();
auto DL = DbgValue->getDebugLoc();
@@ -1544,34 +1561,30 @@ static bool validThroughout(LexicalScopes &LScopes,
if (LSRange.size() == 0)
return false;
-
- // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
const MachineInstr *LScopeBegin = LSRange.front().first;
- // Early exit if the lexical scope begins outside of the current block.
- if (LScopeBegin->getParent() != MBB)
- return false;
-
- // If there are instructions belonging to our scope in another block, and
- // we're not a constant (see DWARF2 comment below), then we can't be
- // validThroughout.
- const MachineInstr *LScopeEnd = LSRange.back().second;
- if (RangeEnd && LScopeEnd->getParent() != MBB)
- return false;
-
- MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
- for (++Pred; Pred != MBB->rend(); ++Pred) {
- if (Pred->getFlag(MachineInstr::FrameSetup))
- break;
- auto PredDL = Pred->getDebugLoc();
- if (!PredDL || Pred->isMetaInstruction())
- continue;
- // Check whether the instruction preceding the DBG_VALUE is in the same
- // (sub)scope as the DBG_VALUE.
- if (DL->getScope() == PredDL->getScope())
- return false;
- auto *PredScope = LScopes.findLexicalScope(PredDL);
- if (!PredScope || LScope->dominates(PredScope))
+ // If the scope starts before the DBG_VALUE then we may have a negative
+ // result. Otherwise the location is live coming into the scope and we
+ // can skip the following checks.
+ if (!Ordering.isBefore(DbgValue, LScopeBegin)) {
+ // Exit if the lexical scope begins outside of the current block.
+ if (LScopeBegin->getParent() != MBB)
return false;
+
+ MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
+ for (++Pred; Pred != MBB->rend(); ++Pred) {
+ if (Pred->getFlag(MachineInstr::FrameSetup))
+ break;
+ auto PredDL = Pred->getDebugLoc();
+ if (!PredDL || Pred->isMetaInstruction())
+ continue;
+ // Check whether the instruction preceding the DBG_VALUE is in the same
+ // (sub)scope as the DBG_VALUE.
+ if (DL->getScope() == PredDL->getScope())
+ return false;
+ auto *PredScope = LScopes.findLexicalScope(PredDL);
+ if (!PredScope || LScope->dominates(PredScope))
+ return false;
+ }
}
// If the range of the DBG_VALUE is open-ended, report success.
@@ -1585,24 +1598,10 @@ static bool validThroughout(LexicalScopes &LScopes,
if (DbgValue->getDebugOperand(0).isImm() && MBB->pred_empty())
return true;
- // Now check for situations where an "open-ended" DBG_VALUE isn't enough to
- // determine eligibility for a single location, e.g. nested scopes, inlined
- // functions.
- // FIXME: For now we just handle a simple (but common) case where the scope
- // is contained in MBB. We could be smarter here.
- //
- // At this point we know that our scope ends in MBB. So, if RangeEnd exists
- // outside of the block we can ignore it; the location is just leaking outside
- // its scope.
- assert(LScopeEnd->getParent() == MBB && "Scope ends outside MBB");
- if (RangeEnd->getParent() != DbgValue->getParent())
- return true;
-
- // The location range and variable's enclosing scope are both contained within
- // MBB, test if location terminates before end of scope.
- for (auto I = RangeEnd->getIterator(); I != MBB->end(); ++I)
- if (&*I == LScopeEnd)
- return false;
+ // Test if the location terminates before the end of the scope.
+ const MachineInstr *LScopeEnd = LSRange.back().second;
+ if (Ordering.isBefore(RangeEnd, LScopeEnd))
+ return false;
// There's a single location which starts at the scope start, and ends at or
// after the scope end.
@@ -1642,10 +1641,8 @@ static bool validThroughout(LexicalScopes &LScopes,
// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)]
// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)]
// [4-) [(@g, fragment 0, 96)]
-bool DwarfDebug::buildLocationList(
- SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::Entries &Entries,
- DenseSet<const MachineBasicBlock *> &VeryLargeBlocks) {
+bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries) {
using OpenRange =
std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>;
SmallVector<OpenRange, 4> OpenRanges;
@@ -1658,9 +1655,7 @@ bool DwarfDebug::buildLocationList(
// Remove all values that are no longer live.
size_t Index = std::distance(EB, EI);
- auto Last =
- remove_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });
- OpenRanges.erase(Last, OpenRanges.end());
+ erase_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });
// If we are dealing with a clobbering entry, this iteration will result in
// a location list entry starting after the clobbering instruction.
@@ -1741,14 +1736,8 @@ bool DwarfDebug::buildLocationList(
DebugLoc.pop_back();
}
- // If there's a single entry, safe for a single location, and not part of
- // an over-sized basic block, then ask validThroughout whether this
- // location can be represented as a single variable location.
- if (DebugLoc.size() != 1 || !isSafeForSingleLocation)
- return false;
- if (VeryLargeBlocks.count(StartDebugMI->getParent()))
- return false;
- return validThroughout(LScopes, StartDebugMI, EndMI);
+ return DebugLoc.size() == 1 && isSafeForSingleLocation &&
+ validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering());
}
DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
@@ -1780,13 +1769,6 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(TheCU, Processed);
- // Identify blocks that are unreasonably sized, so that we can later
- // skip lexical scope analysis over them.
- DenseSet<const MachineBasicBlock *> VeryLargeBlocks;
- for (const auto &MBB : *CurFn)
- if (MBB.size() > LocationAnalysisSizeLimit)
- VeryLargeBlocks.insert(&MBB);
-
for (const auto &I : DbgValues) {
InlinedEntity IV = I.first;
if (Processed.count(IV))
@@ -1823,8 +1805,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
if (HistSize == 1 || SingleValueWithClobber) {
const auto *End =
SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
- if (VeryLargeBlocks.count(MInsn->getParent()) == 0 &&
- validThroughout(LScopes, MInsn, End)) {
+ if (validThroughout(LScopes, MInsn, End, getInstOrdering())) {
RegVar->initializeDbgValue(MInsn);
continue;
}
@@ -1839,8 +1820,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Build the location list for this variable.
SmallVector<DebugLocEntry, 8> Entries;
- bool isValidSingleLocation =
- buildLocationList(Entries, HistoryMapEntries, VeryLargeBlocks);
+ bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries);
// Check whether buildLocationList managed to merge all locations to one
// that is valid throughout the variable's scope. If so, produce single
@@ -1945,7 +1925,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
}
DebugHandlerBase::beginInstruction(MI);
- assert(CurMI);
+ if (!CurMI)
+ return;
if (NoDebug)
return;
@@ -2382,10 +2363,10 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
TheU = Skeleton;
// Emit the header.
- Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
- Asm->emitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->emitDwarfUnitLength(EndLabel, BeginLabel,
+ "Length of Public " + Name + " Info");
Asm->OutStreamer->emitLabel(BeginLabel);
@@ -2396,7 +2377,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
emitSectionReference(*TheU);
Asm->OutStreamer->AddComment("Compilation Unit Length");
- Asm->emitInt32(TheU->getLength());
+ Asm->emitDwarfLengthOrOffset(TheU->getLength());
// Emit the pubnames for this compilation unit.
for (const auto &GI : Globals) {
@@ -2404,7 +2385,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
const DIE *Entity = GI.second;
Asm->OutStreamer->AddComment("DIE offset");
- Asm->emitInt32(Entity->getOffset());
+ Asm->emitDwarfLengthOrOffset(Entity->getOffset());
if (GnuStyle) {
dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
@@ -2419,7 +2400,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
}
Asm->OutStreamer->AddComment("End Mark");
- Asm->emitInt32(0);
+ Asm->emitDwarfLengthOrOffset(0);
Asm->OutStreamer->emitLabel(EndLabel);
}
@@ -2458,7 +2439,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
for (auto &Op : Expr) {
assert(Op.getCode() != dwarf::DW_OP_const_type &&
"3 operand ops not yet supported");
- Streamer.EmitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
+ Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
Offset++;
for (unsigned I = 0; I < 2; ++I) {
if (Op.getDescription().Op[I] == Encoding::SizeNA)
@@ -2474,7 +2455,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
Comment++;
} else {
for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
- Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
+ Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
}
Offset = Op.getOperandEndOffset(I);
}
@@ -2511,10 +2492,26 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
TargetIndexLocation Loc = Value.getTargetIndexLocation();
// TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific
// encoding is supported.
+ assert(AP.TM.getTargetTriple().isWasm());
DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
+ DwarfExpr.addExpression(std::move(ExprCursor));
+ return;
} else if (Value.isConstantFP()) {
- APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
- DwarfExpr.addUnsignedConstant(RawBytes);
+ if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() &&
+ !ExprCursor) {
+ DwarfExpr.addConstantFP(Value.getConstantFP()->getValueAPF(), AP);
+ return;
+ }
+ if (Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth() <=
+ 64 /*bits*/)
+ DwarfExpr.addUnsignedConstant(
+ Value.getConstantFP()->getValueAPF().bitcastToAPInt());
+ else
+ LLVM_DEBUG(
+ dbgs()
+ << "Skipped DwarfExpression creation for ConstantFP of size"
+ << Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth()
+ << " bits\n");
}
DwarfExpr.addExpression(std::move(ExprCursor));
}
@@ -2537,7 +2534,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
}) && "all values are expected to be fragments");
assert(llvm::is_sorted(Values) && "fragments are expected to be sorted");
- for (auto Fragment : Values)
+ for (const auto &Fragment : Values)
DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
} else {
@@ -2580,7 +2577,8 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
Asm->OutStreamer->emitLabel(Holder.getRnglistsTableBaseSym());
for (const RangeSpanList &List : Holder.getRangeLists())
- Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), 4);
+ Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(),
+ Asm->getDwarfOffsetByteSize());
return TableEnd;
}
@@ -2599,7 +2597,8 @@ static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
Asm->OutStreamer->emitLabel(DebugLocs.getSym());
for (const auto &List : DebugLocs.getLists())
- Asm->emitLabelDifference(List.Label, DebugLocs.getSym(), 4);
+ Asm->emitLabelDifference(List.Label, DebugLocs.getSym(),
+ Asm->getDwarfOffsetByteSize());
return TableEnd;
}
@@ -2881,23 +2880,23 @@ void DwarfDebug::emitDebugARanges() {
// Emit size of content not including length itself.
unsigned ContentSize =
- sizeof(int16_t) + // DWARF ARange version number
- sizeof(int32_t) + // Offset of CU in the .debug_info section
- sizeof(int8_t) + // Pointer Size (in bytes)
- sizeof(int8_t); // Segment Size (in bytes)
+ sizeof(int16_t) + // DWARF ARange version number
+ Asm->getDwarfOffsetByteSize() + // Offset of CU in the .debug_info
+ // section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int8_t); // Segment Size (in bytes)
unsigned TupleSize = PtrSize * 2;
// 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
- unsigned Padding =
- offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize));
+ unsigned Padding = offsetToAlignment(
+ Asm->getUnitLengthFieldByteSize() + ContentSize, Align(TupleSize));
ContentSize += Padding;
ContentSize += (List.size() + 1) * TupleSize;
// For each compile unit, write the list of spans it covers.
- Asm->OutStreamer->AddComment("Length of ARange Set");
- Asm->emitInt32(ContentSize);
+ Asm->emitDwarfUnitLength(ContentSize, "Length of ARange Set");
Asm->OutStreamer->AddComment("DWARF Arange version number");
Asm->emitInt16(dwarf::DW_ARANGES_VERSION);
Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
@@ -2983,25 +2982,30 @@ void DwarfDebug::emitDebugRangesDWO() {
Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
}
-/// Emit the header of a DWARF 5 macro section.
+/// Emit the header of a DWARF 5 macro section, or the GNU extension for
+/// DWARF 4.
static void emitMacroHeader(AsmPrinter *Asm, const DwarfDebug &DD,
- const DwarfCompileUnit &CU) {
+ const DwarfCompileUnit &CU, uint16_t DwarfVersion) {
enum HeaderFlagMask {
#define HANDLE_MACRO_FLAG(ID, NAME) MACRO_FLAG_##NAME = ID,
#include "llvm/BinaryFormat/Dwarf.def"
};
- uint8_t Flags = 0;
Asm->OutStreamer->AddComment("Macro information version");
- Asm->emitInt16(5);
- // We are setting Offset and line offset flags unconditionally here,
- // since we're only supporting DWARF32 and line offset should be mostly
- // present.
- // FIXME: Add support for DWARF64.
- Flags |= MACRO_FLAG_DEBUG_LINE_OFFSET;
- Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present");
- Asm->emitInt8(Flags);
+ Asm->emitInt16(DwarfVersion >= 5 ? DwarfVersion : 4);
+ // We emit the line offset flag unconditionally here, since line offset should
+ // be mostly present.
+ if (Asm->isDwarf64()) {
+ Asm->OutStreamer->AddComment("Flags: 64 bit, debug_line_offset present");
+ Asm->emitInt8(MACRO_FLAG_OFFSET_SIZE | MACRO_FLAG_DEBUG_LINE_OFFSET);
+ } else {
+ Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present");
+ Asm->emitInt8(MACRO_FLAG_DEBUG_LINE_OFFSET);
+ }
Asm->OutStreamer->AddComment("debug_line_offset");
- Asm->OutStreamer->emitSymbolValue(CU.getLineTableStartSym(), /*Size=*/4);
+ if (DD.useSplitDwarf())
+ Asm->emitDwarfLengthOrOffset(0);
+ else
+ Asm->emitDwarfSymbolReference(CU.getLineTableStartSym());
}
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
@@ -3018,55 +3022,63 @@ void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
void DwarfDebug::emitMacro(DIMacro &M) {
StringRef Name = M.getName();
StringRef Value = M.getValue();
- bool UseMacro = getDwarfVersion() >= 5;
-
- if (UseMacro) {
- unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
- ? dwarf::DW_MACRO_define_strx
- : dwarf::DW_MACRO_undef_strx;
- Asm->OutStreamer->AddComment(dwarf::MacroString(Type));
- Asm->emitULEB128(Type);
- Asm->OutStreamer->AddComment("Line Number");
- Asm->emitULEB128(M.getLine());
- Asm->OutStreamer->AddComment("Macro String");
- if (!Value.empty())
- Asm->emitULEB128(this->InfoHolder.getStringPool()
- .getIndexedEntry(*Asm, (Name + " " + Value).str())
- .getIndex());
- else
- // DW_MACRO_undef_strx doesn't have a value, so just emit the macro
- // string.
- Asm->emitULEB128(this->InfoHolder.getStringPool()
- .getIndexedEntry(*Asm, (Name).str())
- .getIndex());
+
+ // There should be one space between the macro name and the macro value in
+ // define entries. In undef entries, only the macro name is emitted.
+ std::string Str = Value.empty() ? Name.str() : (Name + " " + Value).str();
+
+ if (UseDebugMacroSection) {
+ if (getDwarfVersion() >= 5) {
+ unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
+ ? dwarf::DW_MACRO_define_strx
+ : dwarf::DW_MACRO_undef_strx;
+ Asm->OutStreamer->AddComment(dwarf::MacroString(Type));
+ Asm->emitULEB128(Type);
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(M.getLine());
+ Asm->OutStreamer->AddComment("Macro String");
+ Asm->emitULEB128(
+ InfoHolder.getStringPool().getIndexedEntry(*Asm, Str).getIndex());
+ } else {
+ unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
+ ? dwarf::DW_MACRO_GNU_define_indirect
+ : dwarf::DW_MACRO_GNU_undef_indirect;
+ Asm->OutStreamer->AddComment(dwarf::GnuMacroString(Type));
+ Asm->emitULEB128(Type);
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(M.getLine());
+ Asm->OutStreamer->AddComment("Macro String");
+ Asm->emitDwarfSymbolReference(
+ InfoHolder.getStringPool().getEntry(*Asm, Str).getSymbol());
+ }
} else {
Asm->OutStreamer->AddComment(dwarf::MacinfoString(M.getMacinfoType()));
Asm->emitULEB128(M.getMacinfoType());
Asm->OutStreamer->AddComment("Line Number");
Asm->emitULEB128(M.getLine());
Asm->OutStreamer->AddComment("Macro String");
- Asm->OutStreamer->emitBytes(Name);
- if (!Value.empty()) {
- // There should be one space between macro name and macro value.
- Asm->emitInt8(' ');
- Asm->OutStreamer->AddComment("Macro Value=");
- Asm->OutStreamer->emitBytes(Value);
- }
+ Asm->OutStreamer->emitBytes(Str);
Asm->emitInt8('\0');
}
}
void DwarfDebug::emitMacroFileImpl(
- DIMacroFile &F, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile,
+ DIMacroFile &MF, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile,
StringRef (*MacroFormToString)(unsigned Form)) {
Asm->OutStreamer->AddComment(MacroFormToString(StartFile));
Asm->emitULEB128(StartFile);
Asm->OutStreamer->AddComment("Line Number");
- Asm->emitULEB128(F.getLine());
+ Asm->emitULEB128(MF.getLine());
Asm->OutStreamer->AddComment("File Number");
- Asm->emitULEB128(U.getOrCreateSourceID(F.getFile()));
- handleMacroNodes(F.getElements(), U);
+ DIFile &F = *MF.getFile();
+ if (useSplitDwarf())
+ Asm->emitULEB128(getDwoLineTable(U)->getFile(
+ F.getDirectory(), F.getFilename(), getMD5AsBytes(&F),
+ Asm->OutContext.getDwarfVersion(), F.getSource()));
+ else
+ Asm->emitULEB128(U.getOrCreateSourceID(&F));
+ handleMacroNodes(MF.getElements(), U);
Asm->OutStreamer->AddComment(MacroFormToString(EndFile));
Asm->emitULEB128(EndFile);
}
@@ -3075,10 +3087,10 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
// DWARFv5 macro and DWARFv4 macinfo share some common encodings,
// so for readibility/uniformity, We are explicitly emitting those.
assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
- bool UseMacro = getDwarfVersion() >= 5;
- if (UseMacro)
- emitMacroFileImpl(F, U, dwarf::DW_MACRO_start_file,
- dwarf::DW_MACRO_end_file, dwarf::MacroString);
+ if (UseDebugMacroSection)
+ emitMacroFileImpl(
+ F, U, dwarf::DW_MACRO_start_file, dwarf::DW_MACRO_end_file,
+ (getDwarfVersion() >= 5) ? dwarf::MacroString : dwarf::GnuMacroString);
else
emitMacroFileImpl(F, U, dwarf::DW_MACINFO_start_file,
dwarf::DW_MACINFO_end_file, dwarf::MacinfoString);
@@ -3095,8 +3107,8 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
continue;
Asm->OutStreamer->SwitchSection(Section);
Asm->OutStreamer->emitLabel(U.getMacroLabelBegin());
- if (getDwarfVersion() >= 5)
- emitMacroHeader(Asm, *this, U);
+ if (UseDebugMacroSection)
+ emitMacroHeader(Asm, *this, U, getDwarfVersion());
handleMacroNodes(Macros, U);
Asm->OutStreamer->AddComment("End Of Macro List Mark");
Asm->emitInt8(0);
@@ -3106,14 +3118,14 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
/// Emit macros into a debug macinfo/macro section.
void DwarfDebug::emitDebugMacinfo() {
auto &ObjLower = Asm->getObjFileLowering();
- emitDebugMacinfoImpl(getDwarfVersion() >= 5
+ emitDebugMacinfoImpl(UseDebugMacroSection
? ObjLower.getDwarfMacroSection()
: ObjLower.getDwarfMacinfoSection());
}
void DwarfDebug::emitDebugMacinfoDWO() {
auto &ObjLower = Asm->getObjFileLowering();
- emitDebugMacinfoImpl(getDwarfVersion() >= 5
+ emitDebugMacinfoImpl(UseDebugMacroSection
? ObjLower.getDwarfMacroDWOSection()
: ObjLower.getDwarfMacinfoDWOSection());
}
@@ -3200,7 +3212,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
const DICompileUnit *DIUnit = CU.getCUNode();
SplitTypeUnitFileTable.maybeSetRootFile(
DIUnit->getDirectory(), DIUnit->getFilename(),
- CU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
+ getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
return &SplitTypeUnitFileTable;
}
@@ -3303,14 +3315,14 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
: DD(DD),
- TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) {
+ TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
DD->TypeUnitsUnderConstruction.clear();
- assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed());
+ DD->AddrPool.resetUsedFlag();
}
DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
- DD->AddrPool.resetUsedFlag();
+ DD->AddrPool.resetUsedFlag(AddrPoolUsed);
}
DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
@@ -3375,6 +3387,15 @@ uint16_t DwarfDebug::getDwarfVersion() const {
return Asm->OutStreamer->getContext().getDwarfVersion();
}
+dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const {
+ if (Asm->getDwarfVersion() >= 4)
+ return dwarf::Form::DW_FORM_sec_offset;
+ assert((!Asm->isDwarf64() || (Asm->getDwarfVersion() == 3)) &&
+ "DWARF64 is not defined prior DWARFv3");
+ return Asm->isDwarf64() ? dwarf::Form::DW_FORM_data8
+ : dwarf::Form::DW_FORM_data4;
+}
+
const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
return SectionLabels.find(S)->second;
}
@@ -3383,3 +3404,20 @@ void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
if (useSplitDwarf() || getDwarfVersion() >= 5)
AddrPool.getIndex(S);
}
+
+Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const {
+ assert(File);
+ if (getDwarfVersion() < 5)
+ return None;
+ Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
+ if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
+ return None;
+
+ // Convert the string checksum to an MD5Result for the streamer.
+ // The verifier validates the checksum so we assume it's okay.
+ // An MD5 checksum is 16 bytes.
+ std::string ChecksumString = fromHex(Checksum->Value);
+ MD5::MD5Result CKMem;
+ std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
+ return CKMem;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index ad2f2f3edd8e..df19ef458888 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -114,7 +114,7 @@ public:
///
/// Variables that have been optimized out use none of these fields.
class DbgVariable : public DbgEntity {
- /// Offset in DebugLocs.
+ /// Index of the entry list in DebugLocs.
unsigned DebugLocListIndex = ~0u;
/// DW_OP_LLVM_tag_offset value from DebugLocs.
Optional<uint8_t> DebugLocListTagOffset;
@@ -372,6 +372,23 @@ class DwarfDebug : public DebugHandlerBase {
/// Generate DWARF v4 type units.
bool GenerateTypeUnits;
+ /// Emit a .debug_macro section instead of .debug_macinfo.
+ bool UseDebugMacroSection;
+
+ /// Avoid using DW_OP_convert due to consumer incompatibilities.
+ bool EnableOpConvert;
+
+public:
+ enum class MinimizeAddrInV5 {
+ Default,
+ Disabled,
+ Ranges,
+ };
+
+private:
+ /// Force the use of DW_AT_ranges even for single-entry range lists.
+ MinimizeAddrInV5 MinimizeAddr = MinimizeAddrInV5::Disabled;
+
/// DWARF5 Experimental Options
/// @{
AccelTableKind TheAccelTableKind;
@@ -409,6 +426,9 @@ class DwarfDebug : public DebugHandlerBase {
bool SingleCU;
bool IsDarwin;
+ /// Map for tracking Fortran deferred CHARACTER lengths.
+ DenseMap<const DIStringType *, unsigned> StringTypeLocMap;
+
AddressPool AddrPool;
/// Accelerator tables.
@@ -592,10 +612,8 @@ class DwarfDebug : public DebugHandlerBase {
/// function that describe the same variable. If the resulting
/// list has only one entry that is valid for entire variable's
/// scope return true.
- bool buildLocationList(
- SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::Entries &Entries,
- DenseSet<const MachineBasicBlock *> &VeryLargeBlocks);
+ bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries);
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
@@ -617,13 +635,13 @@ public:
//===--------------------------------------------------------------------===//
// Main entry points.
//
- DwarfDebug(AsmPrinter *A, Module *M);
+ DwarfDebug(AsmPrinter *A);
~DwarfDebug() override;
/// Emit all Dwarf sections that should come prior to the
/// content.
- void beginModule();
+ void beginModule(Module *M) override;
/// Emit all Dwarf sections that should come after the content.
void endModule() override;
@@ -645,6 +663,7 @@ public:
class NonTypeUnitContext {
DwarfDebug *DD;
decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
+ bool AddrPoolUsed;
friend class DwarfDebug;
NonTypeUnitContext(DwarfDebug *DD);
public:
@@ -681,6 +700,12 @@ public:
/// Returns whether ranges section should be emitted.
bool useRangesSection() const { return UseRangesSection; }
+ /// Returns whether range encodings should be used for single entry range
+ /// lists.
+ bool alwaysUseRanges() const {
+ return MinimizeAddr == MinimizeAddrInV5::Ranges;
+ }
+
/// Returns whether to use sections as labels rather than temp symbols.
bool useSectionsAsReferences() const {
return UseSectionsAsReferences;
@@ -719,11 +744,21 @@ public:
return EmitDebugEntryValues;
}
+ bool useOpConvert() const {
+ return EnableOpConvert;
+ }
+
bool shareAcrossDWOCUs() const;
/// Returns the Dwarf Version.
uint16_t getDwarfVersion() const;
+ /// Returns a suitable DWARF form to represent a section offset, i.e.
+ /// * DW_FORM_sec_offset for DWARF version >= 4;
+ /// * DW_FORM_data8 for 64-bit DWARFv3;
+ /// * DW_FORM_data4 for 32-bit DWARFv3 and DWARFv2.
+ dwarf::Form getDwarfSectionOffsetForm() const;
+
/// Returns the previous CU that was being updated
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
@@ -768,6 +803,16 @@ public:
return CUDieMap.lookup(Die);
}
+ unsigned getStringTypeLoc(const DIStringType *ST) const {
+ return StringTypeLocMap.lookup(ST);
+ }
+
+ void addStringTypeLoc(const DIStringType *ST, unsigned Loc) {
+ assert(ST);
+ if (Loc)
+ StringTypeLocMap[ST] = Loc;
+ }
+
/// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
///
/// Returns whether we are "tuning" for a given debugger.
@@ -777,13 +822,16 @@ public:
bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
/// @}
- void addSectionLabel(const MCSymbol *Sym);
const MCSymbol *getSectionLabel(const MCSection *S);
void insertSectionLabel(const MCSymbol *S);
static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
const DbgValueLoc &Value,
DwarfExpression &DwarfExpr);
+
+ /// If the \p File has an MD5 checksum, return it as an MD5Result
+ /// allocated in the MCContext.
+ Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index c2956380438f..b19b4365383f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -92,6 +92,20 @@ public:
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
};
+
+class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase {
+ /// This is AIX's compat unwind section, which unwinder would use
+ /// to find the location of LSDA area and personality rountine.
+ void emitExceptionInfoTable(const MCSymbol *LSDA, const MCSymbol *PerSym);
+
+public:
+ AIXException(AsmPrinter *A);
+
+ void endModule() override {}
+ void beginFunction(const MachineFunction *MF) override {}
+
+ void endFunction(const MachineFunction *MF) override;
+};
} // End of namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index d4762121d105..59ad7646ce1c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -17,14 +17,14 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
-#include <cassert>
-#include <cstdint>
using namespace llvm;
+#define DEBUG_TYPE "dwarfdebug"
+
void DwarfExpression::emitConstu(uint64_t Value) {
if (Value < 32)
emitOp(dwarf::DW_OP_lit0 + Value);
@@ -97,7 +97,8 @@ void DwarfExpression::addAnd(unsigned Mask) {
}
bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
- unsigned MachineReg, unsigned MaxSize) {
+ llvm::Register MachineReg,
+ unsigned MaxSize) {
if (!llvm::Register::isPhysicalRegister(MachineReg)) {
if (isFrameRegister(TRI, MachineReg)) {
DwarfRegs.push_back(Register::createRegister(-1, nullptr));
@@ -219,9 +220,36 @@ void DwarfExpression::addUnsignedConstant(const APInt &Value) {
}
}
+void DwarfExpression::addConstantFP(const APFloat &APF, const AsmPrinter &AP) {
+ assert(isImplicitLocation() || isUnknownLocation());
+ APInt API = APF.bitcastToAPInt();
+ int NumBytes = API.getBitWidth() / 8;
+ if (NumBytes == 4 /*float*/ || NumBytes == 8 /*double*/) {
+ // FIXME: Add support for `long double`.
+ emitOp(dwarf::DW_OP_implicit_value);
+ emitUnsigned(NumBytes /*Size of the block in bytes*/);
+
+ // The loop below is emitting the value starting at least significant byte,
+ // so we need to perform a byte-swap to get the byte order correct in case
+ // of a big-endian target.
+ if (AP.getDataLayout().isBigEndian())
+ API = API.byteSwap();
+
+ for (int i = 0; i < NumBytes; ++i) {
+ emitData1(API.getZExtValue() & 0xFF);
+ API = API.lshr(8);
+ }
+
+ return;
+ }
+ LLVM_DEBUG(
+ dbgs() << "Skipped DW_OP_implicit_value creation for ConstantFP of size: "
+ << API.getBitWidth() << " bits\n");
+}
+
bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
DIExpressionCursor &ExprCursor,
- unsigned MachineReg,
+ llvm::Register MachineReg,
unsigned FragmentOffsetInBits) {
auto Fragment = ExprCursor.getFragmentInfo();
if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) {
@@ -498,6 +526,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
case dwarf::DW_OP_not:
case dwarf::DW_OP_dup:
case dwarf::DW_OP_push_object_address:
+ case dwarf::DW_OP_over:
emitOp(OpNum);
break;
case dwarf::DW_OP_deref:
@@ -513,10 +542,15 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
assert(!isRegisterLocation());
emitConstu(Op->getArg(0));
break;
+ case dwarf::DW_OP_consts:
+ assert(!isRegisterLocation());
+ emitOp(dwarf::DW_OP_consts);
+ emitSigned(Op->getArg(0));
+ break;
case dwarf::DW_OP_LLVM_convert: {
unsigned BitSize = Op->getArg(0);
dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1));
- if (DwarfVersion >= 5) {
+ if (DwarfVersion >= 5 && CU.getDwarfDebug().useOpConvert()) {
emitOp(dwarf::DW_OP_convert);
// If targeting a location-list; simply emit the index into the raw
// byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 757b17511453..8fca9f5a630b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -218,7 +218,7 @@ protected:
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) = 0;
+ llvm::Register MachineReg) = 0;
/// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
/// register location description.
@@ -245,7 +245,7 @@ protected:
/// multiple subregisters that alias the register.
///
/// \return false if no DWARF register exists for MachineReg.
- bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ bool addMachineReg(const TargetRegisterInfo &TRI, llvm::Register MachineReg,
unsigned MaxSize = ~1U);
/// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
@@ -299,6 +299,9 @@ public:
/// Emit an unsigned constant.
void addUnsignedConstant(const APInt &Value);
+ /// Emit an floating point constant.
+ void addConstantFP(const APFloat &Value, const AsmPrinter &AP);
+
/// Lock this down to become a memory location description.
void setMemoryLocationKind() {
assert(isUnknownLocation());
@@ -322,7 +325,8 @@ public:
/// \return false if no DWARF register exists
/// for MachineReg.
bool addMachineRegExpression(const TargetRegisterInfo &TRI,
- DIExpressionCursor &Expr, unsigned MachineReg,
+ DIExpressionCursor &Expr,
+ llvm::Register MachineReg,
unsigned FragmentOffsetInBits = 0);
/// Begin emission of an entry value dwarf operation. The entry value's
@@ -385,7 +389,7 @@ class DebugLocDwarfExpression final : public DwarfExpression {
void commitTemporaryBuffer() override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) override;
+ llvm::Register MachineReg) override;
public:
DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS,
@@ -415,7 +419,7 @@ class DIEDwarfExpression final : public DwarfExpression {
void commitTemporaryBuffer() override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) override;
+ llvm::Register MachineReg) override;
public:
DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 812e6383288f..838e1c9a10be 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -10,10 +10,9 @@
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "DwarfUnit.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/DIE.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCStreamer.h"
#include <algorithm>
#include <cstdint>
@@ -59,7 +58,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
// Compute the size and offset for each DIE.
void DwarfFile::computeSizeAndOffsets() {
// Offset from the first CU in the debug info section is 0 initially.
- unsigned SecOffset = 0;
+ uint64_t SecOffset = 0;
// Iterate over each compile unit and set the size and offsets for each
// DIE within each compile unit. All offsets are CU relative.
@@ -75,12 +74,15 @@ void DwarfFile::computeSizeAndOffsets() {
TheU->setDebugSectionOffset(SecOffset);
SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
}
+ if (SecOffset > UINT32_MAX && !Asm->isDwarf64())
+ report_fatal_error("The generated debug information is too large "
+ "for the 32-bit DWARF format.");
}
unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) {
// CU-relative offset is reset to 0 here.
- unsigned Offset = sizeof(int32_t) + // Length of Unit Info
- TheU->getHeaderSize(); // Unit-specific headers
+ unsigned Offset = Asm->getUnitLengthFieldByteSize() + // Length of Unit Info
+ TheU->getHeaderSize(); // Unit-specific headers
// The return value here is CU-relative, after laying out
// all of the CU DIE.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index cf293d7534d0..79a6ce7801b7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -14,7 +14,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DIE.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/Support/Allocator.h"
#include <map>
#include <memory>
@@ -26,10 +25,12 @@ class AsmPrinter;
class DbgEntity;
class DbgVariable;
class DbgLabel;
+class DINode;
class DwarfCompileUnit;
class DwarfUnit;
class LexicalScope;
class MCSection;
+class MDNode;
// Data structure to hold a range for range lists.
struct RangeSpan {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index a43929d8e8f7..a876f8ccace9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -8,7 +8,6 @@
#include "DwarfStringPool.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -33,7 +32,6 @@ DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
NumBytes += Str.size() + 1;
- assert(NumBytes > Entry.Offset && "Unexpected overflow");
}
return *I.first;
}
@@ -58,13 +56,13 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
if (getNumIndexedStrings() == 0)
return;
Asm.OutStreamer->SwitchSection(Section);
- unsigned EntrySize = 4;
- // FIXME: DWARF64
+ unsigned EntrySize = Asm.getDwarfOffsetByteSize();
// We are emitting the header for a contribution to the string offsets
// table. The header consists of an entry with the contribution's
// size (not including the size of the length field), the DWARF version and
// 2 bytes of padding.
- Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4);
+ Asm.emitDwarfUnitLength(getNumIndexedStrings() * EntrySize + 4,
+ "Length of String Offsets Set");
Asm.emitInt16(Asm.getDwarfVersion());
Asm.emitInt16(0);
// Define the symbol that marks the start of the contribution. It is
@@ -120,7 +118,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
}
Asm.OutStreamer->SwitchSection(OffsetSection);
- unsigned size = 4; // FIXME: DWARF64 is 8.
+ unsigned size = Asm.getDwarfOffsetByteSize();
for (const auto &Entry : Entries)
if (UseRelativeOffsets)
Asm.emitDwarfStringOffset(Entry->getValue());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index c5f5637fdae3..79b5df89e338 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -28,7 +28,7 @@ class DwarfStringPool {
StringMap<EntryTy, BumpPtrAllocator &> Pool;
StringRef Prefix;
- unsigned NumBytes = 0;
+ uint64_t NumBytes = 0;
unsigned NumIndexedStrings = 0;
bool ShouldCreateSymbols;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index ceeae14c1073..118b5fcc3bf6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -13,7 +13,6 @@
#include "DwarfUnit.h"
#include "AddressPool.h"
#include "DwarfCompileUnit.h"
-#include "DwarfDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -84,15 +83,14 @@ unsigned DIEDwarfExpression::getTemporaryBufferSize() {
void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); }
bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) {
+ llvm::Register MachineReg) {
return MachineReg == TRI.getFrameRegister(*AP.MF);
}
DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
- : DIEUnit(A->getDwarfVersion(), A->MAI->getCodePointerSize(), UnitTag),
- CUNode(Node), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
-}
+ : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU),
+ IndexTyDie(nullptr) {}
DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
@@ -301,27 +299,7 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
uint64_t Integer) {
- if (DD->getDwarfVersion() >= 4)
- addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer);
- else
- addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
-}
-
-Optional<MD5::MD5Result> DwarfUnit::getMD5AsBytes(const DIFile *File) const {
- assert(File);
- if (DD->getDwarfVersion() < 5)
- return None;
- Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
- if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
- return None;
-
- // Convert the string checksum to an MD5Result for the streamer.
- // The verifier validates the checksum so we assume it's okay.
- // An MD5 checksum is 16 bytes.
- std::string ChecksumString = fromHex(Checksum->Value);
- MD5::MD5Result CKMem;
- std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
- return CKMem;
+ addUInt(Die, Attribute, DD->getDwarfSectionOffsetForm(), Integer);
}
unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
@@ -332,10 +310,9 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
// This is a split type unit that needs a line table.
addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
}
- return SplitLineTable->getFile(File->getDirectory(), File->getFilename(),
- getMD5AsBytes(File),
- Asm->OutContext.getDwarfVersion(),
- File->getSource());
+ return SplitLineTable->getFile(
+ File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
+ Asm->OutContext.getDwarfVersion(), File->getSource());
}
void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
@@ -353,7 +330,7 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
}
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Die, dwarf::DW_FORM_udata, Sym);
+ addLabel(Die, dwarf::DW_FORM_addr, Sym);
}
void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
@@ -457,77 +434,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
addSourceLine(Die, Ty->getLine(), Ty->getFile());
}
-/// Return true if type encoding is unsigned.
-static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
- if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
- // FIXME: Enums without a fixed underlying type have unknown signedness
- // here, leading to incorrectly emitted constants.
- if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
- return false;
-
- // (Pieces of) aggregate types that get hacked apart by SROA may be
- // represented by a constant. Encode them as unsigned bytes.
- return true;
- }
-
- if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
- dwarf::Tag T = (dwarf::Tag)Ty->getTag();
- // Encode pointer constants as unsigned bytes. This is used at least for
- // null pointer constant emission.
- // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
- // here, but accept them for now due to a bug in SROA producing bogus
- // dbg.values.
- if (T == dwarf::DW_TAG_pointer_type ||
- T == dwarf::DW_TAG_ptr_to_member_type ||
- T == dwarf::DW_TAG_reference_type ||
- T == dwarf::DW_TAG_rvalue_reference_type)
- return true;
- assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
- T == dwarf::DW_TAG_volatile_type ||
- T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
- assert(DTy->getBaseType() && "Expected valid base type");
- return isUnsignedDIType(DD, DTy->getBaseType());
- }
-
- auto *BTy = cast<DIBasicType>(Ty);
- unsigned Encoding = BTy->getEncoding();
- assert((Encoding == dwarf::DW_ATE_unsigned ||
- Encoding == dwarf::DW_ATE_unsigned_char ||
- Encoding == dwarf::DW_ATE_signed ||
- Encoding == dwarf::DW_ATE_signed_char ||
- Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
- Encoding == dwarf::DW_ATE_boolean ||
- (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
- Ty->getName() == "decltype(nullptr)")) &&
- "Unsupported encoding");
- return Encoding == dwarf::DW_ATE_unsigned ||
- Encoding == dwarf::DW_ATE_unsigned_char ||
- Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
- Ty->getTag() == dwarf::DW_TAG_unspecified_type;
-}
-
-void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
- assert(MO.isFPImm() && "Invalid machine operand!");
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
- APFloat FPImm = MO.getFPImm()->getValueAPF();
-
- // Get the raw data form of the floating point.
- const APInt FltVal = FPImm.bitcastToAPInt();
- const char *FltPtr = (const char *)FltVal.getRawData();
-
- int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
- bool LittleEndian = Asm->getDataLayout().isLittleEndian();
- int Incr = (LittleEndian ? 1 : -1);
- int Start = (LittleEndian ? 0 : NumBytes - 1);
- int Stop = (LittleEndian ? NumBytes : -1);
-
- // Output the constant to DWARF one byte at a time.
- for (; Start != Stop; Start += Incr)
- addUInt(*Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]);
-
- addBlock(Die, dwarf::DW_AT_const_value, Block);
-}
-
void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) {
// Pass this down to addConstantValue as an unsigned bag of bits.
addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
@@ -538,15 +444,8 @@ void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI,
addConstantValue(Die, CI->getValue(), Ty);
}
-void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO,
- const DIType *Ty) {
- assert(MO.isImm() && "Invalid machine operand!");
-
- addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm());
-}
-
void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) {
- addConstantValue(Die, isUnsignedDIType(DD, Ty), Val);
+ addConstantValue(Die, DD->isUnsignedDIType(Ty), Val);
}
void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
@@ -557,7 +456,7 @@ void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
}
void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) {
- addConstantValue(Die, Val, isUnsignedDIType(DD, Ty));
+ addConstantValue(Die, Val, DD->isUnsignedDIType(Ty));
}
void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
@@ -654,6 +553,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
if (auto *BT = dyn_cast<DIBasicType>(Ty))
constructTypeDIE(TyDIE, BT);
+ else if (auto *ST = dyn_cast<DIStringType>(Ty))
+ constructTypeDIE(TyDIE, ST);
else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
constructTypeDIE(TyDIE, STy);
else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
@@ -772,8 +673,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
return;
- addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
- BTy->getEncoding());
+ if (BTy->getTag() != dwarf::DW_TAG_string_type)
+ addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy->getEncoding());
uint64_t Size = BTy->getSizeInBits() >> 3;
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
@@ -784,6 +686,37 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little);
}
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) {
+ // Get core information.
+ StringRef Name = STy->getName();
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(Buffer, dwarf::DW_AT_name, Name);
+
+ if (DIVariable *Var = STy->getStringLength()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_string_length, *VarDIE);
+ } else if (DIExpression *Expr = STy->getStringLengthExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ // This is to describe the memory location of the
+ // length of a Fortran deferred length string, so
+ // lock it down as such.
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_string_length, DwarfExpr.finalize());
+ } else {
+ uint64_t Size = STy->getSizeInBits() >> 3;
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+ }
+
+ if (STy->getEncoding()) {
+ // For eventual Unicode support.
+ addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ STy->getEncoding());
+ }
+}
+
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// Get core information.
StringRef Name = DTy->getName();
@@ -910,6 +843,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
}
}
+ // Add template parameters to a class, structure or union types.
+ if (Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+ addTemplateParams(Buffer, CTy->getTemplateParams());
+
// Add elements to structure type.
DINodeArray Elements = CTy->getElements();
for (const auto *Element : Elements) {
@@ -929,7 +867,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
- if (isUnsignedDIType(DD, Discriminator->getBaseType()))
+ if (DD->isUnsignedDIType(Discriminator->getBaseType()))
addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue());
else
addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue());
@@ -979,12 +917,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (CTy->isObjcClassComplete())
addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
- // Add template parameters to a class, structure or union types.
- // FIXME: The support isn't in the metadata for this yet.
- if (Tag == dwarf::DW_TAG_class_type ||
- Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
- addTemplateParams(Buffer, CTy->getTemplateParams());
-
// Add the type's non-standard calling convention.
uint8_t CC = 0;
if (CTy->isTypePassByValue())
@@ -1008,8 +940,10 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
Tag == dwarf::DW_TAG_union_type) {
// Add size if non-zero (derived types might be zero-sized.)
+ // Ignore the size if it's a non-enum forward decl.
// TODO: Do we care about size for enum forward declarations?
- if (Size)
+ if (Size &&
+ (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type))
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
else if (!CTy->isForwardDecl())
// Add zero size if it is not a forward declaration.
@@ -1133,6 +1067,8 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
getOrCreateSourceID(M->getFile()));
if (M->getLineNo())
addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo());
+ if (M->getIsDecl())
+ addFlag(MDie, dwarf::DW_AT_declaration);
return &MDie;
}
@@ -1354,7 +1290,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>())
Count = CI->getSExtValue();
- auto addBoundTypeEntry = [&](dwarf::Attribute Attr,
+ auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
DISubrange::BoundType Bound) -> void {
if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
if (auto *VarDIE = getDIE(BV))
@@ -1372,7 +1308,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
}
};
- addBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound());
+ AddBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound());
if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) {
if (auto *CountVarDIE = getDIE(CV))
@@ -1380,9 +1316,45 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
} else if (Count != -1)
addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count);
- addBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound());
+ AddBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound());
- addBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride());
+ AddBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride());
+}
+
+void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer,
+ const DIGenericSubrange *GSR,
+ DIE *IndexTy) {
+ DIE &DwGenericSubrange =
+ createAndAddDIE(dwarf::DW_TAG_generic_subrange, Buffer);
+ addDIEEntry(DwGenericSubrange, dwarf::DW_AT_type, *IndexTy);
+
+ int64_t DefaultLowerBound = getDefaultLowerBound();
+
+ auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
+ DIGenericSubrange::BoundType Bound) -> void {
+ if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
+ if (auto *VarDIE = getDIE(BV))
+ addDIEEntry(DwGenericSubrange, Attr, *VarDIE);
+ } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
+ if (BE->isSignedConstant()) {
+ if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
+ static_cast<int64_t>(BE->getElement(1)) != DefaultLowerBound)
+ addSInt(DwGenericSubrange, Attr, dwarf::DW_FORM_sdata,
+ BE->getElement(1));
+ } else {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(BE);
+ addBlock(DwGenericSubrange, Attr, DwarfExpr.finalize());
+ }
+ }
+ };
+
+ AddBoundTypeEntry(dwarf::DW_AT_lower_bound, GSR->getLowerBound());
+ AddBoundTypeEntry(dwarf::DW_AT_count, GSR->getCount());
+ AddBoundTypeEntry(dwarf::DW_AT_upper_bound, GSR->getUpperBound());
+ AddBoundTypeEntry(dwarf::DW_AT_byte_stride, GSR->getStride());
}
DIE *DwarfUnit::getIndexTyDie() {
@@ -1447,6 +1419,39 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize());
}
+ if (DIVariable *Var = CTy->getAssociated()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_associated, *VarDIE);
+ } else if (DIExpression *Expr = CTy->getAssociatedExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_associated, DwarfExpr.finalize());
+ }
+
+ if (DIVariable *Var = CTy->getAllocated()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_allocated, *VarDIE);
+ } else if (DIExpression *Expr = CTy->getAllocatedExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_allocated, DwarfExpr.finalize());
+ }
+
+ if (auto *RankConst = CTy->getRankConst()) {
+ addSInt(Buffer, dwarf::DW_AT_rank, dwarf::DW_FORM_sdata,
+ RankConst->getSExtValue());
+ } else if (auto *RankExpr = CTy->getRankExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(RankExpr);
+ addBlock(Buffer, dwarf::DW_AT_rank, DwarfExpr.finalize());
+ }
+
// Emit the element type.
addType(Buffer, CTy->getBaseType());
@@ -1459,15 +1464,19 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DINodeArray Elements = CTy->getElements();
for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
// FIXME: Should this really be such a loose cast?
- if (auto *Element = dyn_cast_or_null<DINode>(Elements[i]))
+ if (auto *Element = dyn_cast_or_null<DINode>(Elements[i])) {
if (Element->getTag() == dwarf::DW_TAG_subrange_type)
constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy);
+ else if (Element->getTag() == dwarf::DW_TAG_generic_subrange)
+ constructGenericSubrangeDIE(Buffer, cast<DIGenericSubrange>(Element),
+ IdxTy);
+ }
}
}
void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
const DIType *DTy = CTy->getBaseType();
- bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy);
+ bool IsUnsigned = DTy && DD->isUnsignedDIType(DTy);
if (DTy) {
if (DD->getDwarfVersion() >= 3)
addType(Buffer, DTy);
@@ -1666,15 +1675,15 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
- Asm->OutStreamer->AddComment("Length of Unit");
if (!DD->useSectionsAsReferences()) {
StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_";
MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start");
EndLabel = Asm->createTempSymbol(Prefix + "end");
- Asm->emitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->emitDwarfUnitLength(EndLabel, BeginLabel, "Length of Unit");
Asm->OutStreamer->emitLabel(BeginLabel);
} else
- Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
+ Asm->emitDwarfUnitLength(getHeaderSize() + getUnitDie().getSize(),
+ "Length of Unit");
Asm->OutStreamer->AddComment("DWARF version number");
unsigned Version = DD->getDwarfVersion();
@@ -1694,7 +1703,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
if (UseOffsets)
- Asm->emitInt32(0);
+ Asm->emitDwarfLengthOrOffset(0);
else
Asm->emitDwarfSymbolReference(
TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
@@ -1713,16 +1722,14 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) {
Asm->OutStreamer->emitIntValue(TypeSignature, sizeof(TypeSignature));
Asm->OutStreamer->AddComment("Type DIE Offset");
// In a skeleton type unit there is no type DIE so emit a zero offset.
- Asm->OutStreamer->emitIntValue(Ty ? Ty->getOffset() : 0,
- sizeof(Ty->getOffset()));
+ Asm->emitDwarfLengthOrOffset(Ty ? Ty->getOffset() : 0);
}
DIE::value_iterator
DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
return Die.addValue(DIEValueAllocator, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
+ DD->getDwarfSectionOffsetForm(),
new (DIEValueAllocator) DIEDelta(Hi, Lo));
}
@@ -1730,10 +1737,7 @@ DIE::value_iterator
DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label, const MCSymbol *Sec) {
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- return addLabel(Die, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- Label);
+ return addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label);
return addSectionDelta(Die, Attribute, Label, Sec);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 34f3a34ed336..5c643760fd56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -16,22 +16,19 @@
#include "DwarfDebug.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
+#include <string>
namespace llvm {
-class MachineOperand;
-class ConstantInt;
class ConstantFP;
+class ConstantInt;
class DbgVariable;
class DwarfCompileUnit;
+class MachineOperand;
+class MCDwarfDwoLineTable;
+class MCSymbol;
//===----------------------------------------------------------------------===//
/// This dwarf writer support class manages information associated with a
@@ -77,7 +74,6 @@ protected:
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
- bool shareAcrossDWOCUs() const;
bool isShareableAcrossCUs(const DINode *D) const;
public:
@@ -86,8 +82,7 @@ public:
MCSymbol *getEndLabel() const { return EndLabel; }
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
const DICompileUnit *getCUNode() const { return CUNode; }
-
- uint16_t getDwarfVersion() const { return DD->getDwarfVersion(); }
+ DwarfDebug &getDwarfDebug() const { return *DD; }
/// Return true if this compile unit has something to write out.
bool hasContent() const { return getUnitDie().hasChildren(); }
@@ -195,7 +190,6 @@ public:
void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
/// Add constant value entry in variable DIE.
- void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty);
void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
@@ -203,7 +197,6 @@ public:
void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
/// Add constant value entry in variable DIE.
- void addConstantFPValue(DIE &Die, const MachineOperand &MO);
void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
/// Add a linkage name, if it isn't empty.
@@ -255,9 +248,9 @@ public:
/// Compute the size of a header for this unit, not including the initial
/// length field.
virtual unsigned getHeaderSize() const {
- return sizeof(int16_t) + // DWARF version number
- sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t) + // Pointer Size (in bytes)
+ return sizeof(int16_t) + // DWARF version number
+ Asm->getDwarfOffsetByteSize() + // Offset Into Abbrev. Section
+ sizeof(int8_t) + // Pointer Size (in bytes)
(DD->getDwarfVersion() >= 5 ? sizeof(int8_t)
: 0); // DWARF v5 unit type
}
@@ -284,10 +277,6 @@ public:
const MCSymbol *Label,
const MCSymbol *Sec);
- /// If the \p File has an MD5 checksum, return it as an MD5Result
- /// allocated in the MCContext.
- Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
-
/// Get context owner's DIE.
DIE *createTypeDIE(const DICompositeType *Ty);
@@ -306,9 +295,12 @@ protected:
private:
void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
+ void constructTypeDIE(DIE &Buffer, const DIStringType *BTy);
void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
void constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy);
void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy);
+ void constructGenericSubrangeDIE(DIE &Buffer, const DIGenericSubrange *SR,
+ DIE *IndexTy);
void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy);
void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy);
DIE &constructMemberDIE(DIE &Buffer, const DIDerivedType *DT);
@@ -361,7 +353,7 @@ public:
void emitHeader(bool UseOffsets) override;
unsigned getHeaderSize() const override {
return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
- sizeof(uint32_t); // Type DIE Offset
+ Asm->getDwarfOffsetByteSize(); // Type DIE Offset
}
void addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) override;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 99ee4567fa58..2ffe8a7b0469 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -44,15 +44,9 @@ EHStreamer::~EHStreamer() = default;
unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
const LandingPadInfo *R) {
const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
- unsigned LSize = LIds.size(), RSize = RIds.size();
- unsigned MinSize = LSize < RSize ? LSize : RSize;
- unsigned Count = 0;
-
- for (; Count != MinSize; ++Count)
- if (LIds[Count] != RIds[Count])
- return Count;
-
- return Count;
+ return std::mismatch(LIds.begin(), LIds.end(), RIds.begin(), RIds.end())
+ .first -
+ LIds.begin();
}
/// Compute the actions table and gather the first action index for each landing
@@ -220,15 +214,30 @@ void EHStreamer::computePadMap(
/// the landing pad and the action. Calls marked 'nounwind' have no entry and
/// must not be contained in the try-range of any entry - they form gaps in the
/// table. Entries must be ordered by try-range address.
-void EHStreamer::
-computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
- const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
- const SmallVectorImpl<unsigned> &FirstActions) {
+///
+/// Call-sites are split into one or more call-site ranges associated with
+/// different sections of the function.
+///
+/// - Without -basic-block-sections, all call-sites are grouped into one
+/// call-site-range corresponding to the function section.
+///
+/// - With -basic-block-sections, one call-site range is created for each
+/// section, with its FragmentBeginLabel and FragmentEndLabel respectively
+// set to the beginning and ending of the corresponding section and its
+// ExceptionLabel set to the exception symbol dedicated for this section.
+// Later, one LSDA header will be emitted for each call-site range with its
+// call-sites following. The action table and type info table will be
+// shared across all ranges.
+void EHStreamer::computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
RangeMapType PadMap;
computePadMap(LandingPads, PadMap);
// The end label of the previous invoke or nounwind try-range.
- MCSymbol *LastLabel = nullptr;
+ MCSymbol *LastLabel = Asm->getFunctionBegin();
// Whether there is a potentially throwing instruction (currently this means
// an ordinary call) between the end of the previous try-range and now.
@@ -241,6 +250,21 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// Visit all instructions in order of address.
for (const auto &MBB : *Asm->MF) {
+ if (&MBB == &Asm->MF->front() || MBB.isBeginSection()) {
+ // We start a call-site range upon function entry and at the beginning of
+ // every basic block section.
+ CallSiteRanges.push_back(
+ {Asm->MBBSectionRanges[MBB.getSectionIDNum()].BeginLabel,
+ Asm->MBBSectionRanges[MBB.getSectionIDNum()].EndLabel,
+ Asm->getMBBExceptionSym(MBB), CallSites.size()});
+ PreviousIsInvoke = false;
+ SawPotentiallyThrowing = false;
+ LastLabel = nullptr;
+ }
+
+ if (MBB.isEHPad())
+ CallSiteRanges.back().IsLPRange = true;
+
for (const auto &MI : MBB) {
if (!MI.isEHLabel()) {
if (MI.isCall())
@@ -264,13 +288,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
"Inconsistent landing pad map!");
- // For Dwarf exception handling (SjLj handling doesn't use this). If some
- // instruction between the previous try-range and this one may throw,
- // create a call-site entry with no landing pad for the region between the
- // try-ranges.
- if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) {
- CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 };
- CallSites.push_back(Site);
+ // For Dwarf and AIX exception handling (SjLj handling doesn't use this).
+ // If some instruction between the previous try-range and this one may
+ // throw, create a call-site entry with no landing pad for the region
+ // between the try-ranges.
+ if (SawPotentiallyThrowing &&
+ (Asm->MAI->usesCFIForEH() ||
+ Asm->MAI->getExceptionHandlingType() == ExceptionHandling::AIX)) {
+ CallSites.push_back({LastLabel, BeginLabel, nullptr, 0});
PreviousIsInvoke = false;
}
@@ -313,14 +338,21 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
PreviousIsInvoke = true;
}
}
- }
- // If some instruction between the previous try-range and the end of the
- // function may throw, create a call-site entry with no landing pad for the
- // region following the try-range.
- if (SawPotentiallyThrowing && !IsSJLJ) {
- CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
- CallSites.push_back(Site);
+ // We end the call-site range upon function exit and at the end of every
+ // basic block section.
+ if (&MBB == &Asm->MF->back() || MBB.isEndSection()) {
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for
+ // the region following the try-range.
+ if (SawPotentiallyThrowing && !IsSJLJ) {
+ CallSiteEntry Site = {LastLabel, CallSiteRanges.back().FragmentEndLabel,
+ nullptr, 0};
+ CallSites.push_back(Site);
+ SawPotentiallyThrowing = false;
+ }
+ CallSiteRanges.back().CallSiteEndIdx = CallSites.size();
+ }
}
}
@@ -371,19 +403,25 @@ MCSymbol *EHStreamer::emitExceptionTable() {
SmallVector<unsigned, 64> FirstActions;
computeActionsTable(LandingPads, Actions, FirstActions);
- // Compute the call-site table.
+ // Compute the call-site table and call-site ranges. Normally, there is only
+ // one call-site-range which covers the whole funciton. With
+ // -basic-block-sections, there is one call-site-range per basic block
+ // section.
SmallVector<CallSiteEntry, 64> CallSites;
- computeCallSiteTable(CallSites, LandingPads, FirstActions);
+ SmallVector<CallSiteRange, 4> CallSiteRanges;
+ computeCallSiteTable(CallSites, CallSiteRanges, LandingPads, FirstActions);
bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
+ bool HasLEB128Directives = Asm->MAI->hasLEB128Directives();
unsigned CallSiteEncoding =
IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) :
Asm->getObjFileLowering().getCallSiteEncoding();
bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
// Type infos.
- MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+ MCSection *LSDASection =
+ Asm->getObjFileLowering().getSectionForLSDA(MF->getFunction(), Asm->TM);
unsigned TTypeEncoding;
if (!HaveTTData) {
@@ -433,35 +471,122 @@ MCSymbol *EHStreamer::emitExceptionTable() {
Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+
Twine(Asm->getFunctionNumber()));
Asm->OutStreamer->emitLabel(GCCETSym);
- Asm->OutStreamer->emitLabel(Asm->getCurExceptionSym());
-
- // Emit the LSDA header.
- Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
- Asm->emitEncodingByte(TTypeEncoding, "@TType");
+ MCSymbol *CstEndLabel = Asm->createTempSymbol(
+ CallSiteRanges.size() > 1 ? "action_table_base" : "cst_end");
MCSymbol *TTBaseLabel = nullptr;
- if (HaveTTData) {
- // N.B.: There is a dependency loop between the size of the TTBase uleb128
- // here and the amount of padding before the aligned type table. The
- // assembler must sometimes pad this uleb128 or insert extra padding before
- // the type table. See PR35809 or GNU as bug 4029.
- MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref");
+ if (HaveTTData)
TTBaseLabel = Asm->createTempSymbol("ttbase");
- Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel);
- Asm->OutStreamer->emitLabel(TTBaseRefLabel);
- }
- bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+ const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+
+ // Helper for emitting references (offsets) for type table and the end of the
+ // call-site table (which marks the beginning of the action table).
+ // * For Itanium, these references will be emitted for every callsite range.
+ // * For SJLJ and Wasm, they will be emitted only once in the LSDA header.
+ auto EmitTypeTableRefAndCallSiteTableEndRef = [&]() {
+ Asm->emitEncodingByte(TTypeEncoding, "@TType");
+ if (HaveTTData) {
+ // N.B.: There is a dependency loop between the size of the TTBase uleb128
+ // here and the amount of padding before the aligned type table. The
+ // assembler must sometimes pad this uleb128 or insert extra padding
+ // before the type table. See PR35809 or GNU as bug 4029.
+ MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref");
+ Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel);
+ Asm->OutStreamer->emitLabel(TTBaseRefLabel);
+ }
+
+ // The Action table follows the call-site table. So we emit the
+ // label difference from here (start of the call-site table for SJLJ and
+ // Wasm, and start of a call-site range for Itanium) to the end of the
+ // whole call-site table (end of the last call-site range for Itanium).
+ MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin");
+ Asm->emitEncodingByte(CallSiteEncoding, "Call site");
+ Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
+ Asm->OutStreamer->emitLabel(CstBeginLabel);
+ };
+
+ // An alternative path to EmitTypeTableRefAndCallSiteTableEndRef.
+ // For some platforms, the system assembler does not accept the form of
+ // `.uleb128 label2 - label1`. In those situations, we would need to calculate
+ // the size between label1 and label2 manually.
+ // In this case, we would need to calculate the LSDA size and the call
+ // site table size.
+ auto EmitTypeTableOffsetAndCallSiteTableOffset = [&]() {
+ assert(CallSiteEncoding == dwarf::DW_EH_PE_udata4 && !HasLEB128Directives &&
+ "Targets supporting .uleb128 do not need to take this path.");
+ if (CallSiteRanges.size() > 1)
+ report_fatal_error(
+ "-fbasic-block-sections is not yet supported on "
+ "platforms that do not have general LEB128 directive support.");
+
+ uint64_t CallSiteTableSize = 0;
+ const CallSiteRange &CSRange = CallSiteRanges.back();
+ for (size_t CallSiteIdx = CSRange.CallSiteBeginIdx;
+ CallSiteIdx < CSRange.CallSiteEndIdx; ++CallSiteIdx) {
+ const CallSiteEntry &S = CallSites[CallSiteIdx];
+ // Each call site entry consists of 3 udata4 fields (12 bytes) and
+ // 1 ULEB128 field.
+ CallSiteTableSize += 12 + getULEB128Size(S.Action);
+ assert(isUInt<32>(CallSiteTableSize) && "CallSiteTableSize overflows.");
+ }
+
+ Asm->emitEncodingByte(TTypeEncoding, "@TType");
+ if (HaveTTData) {
+ const unsigned ByteSizeOfCallSiteOffset =
+ getULEB128Size(CallSiteTableSize);
+ uint64_t ActionTableSize = 0;
+ for (const ActionEntry &Action : Actions) {
+ // Each action entry consists of two SLEB128 fields.
+ ActionTableSize += getSLEB128Size(Action.ValueForTypeID) +
+ getSLEB128Size(Action.NextAction);
+ assert(isUInt<32>(ActionTableSize) && "ActionTableSize overflows.");
+ }
+
+ const unsigned TypeInfoSize =
+ Asm->GetSizeOfEncodedValue(TTypeEncoding) * MF->getTypeInfos().size();
+
+ const uint64_t LSDASizeBeforeAlign =
+ 1 // Call site encoding byte.
+ + ByteSizeOfCallSiteOffset // ULEB128 encoding of CallSiteTableSize.
+ + CallSiteTableSize // Call site table content.
+ + ActionTableSize; // Action table content.
+
+ const uint64_t LSDASizeWithoutAlign = LSDASizeBeforeAlign + TypeInfoSize;
+ const unsigned ByteSizeOfLSDAWithoutAlign =
+ getULEB128Size(LSDASizeWithoutAlign);
+ const uint64_t DisplacementBeforeAlign =
+ 2 // LPStartEncoding and TypeTableEncoding.
+ + ByteSizeOfLSDAWithoutAlign + LSDASizeBeforeAlign;
+
+ // The type info area starts with 4 byte alignment.
+ const unsigned NeedAlignVal = (4 - DisplacementBeforeAlign % 4) % 4;
+ uint64_t LSDASizeWithAlign = LSDASizeWithoutAlign + NeedAlignVal;
+ const unsigned ByteSizeOfLSDAWithAlign =
+ getULEB128Size(LSDASizeWithAlign);
+
+ // The LSDASizeWithAlign could use 1 byte less padding for alignment
+ // when the data we use to represent the LSDA Size "needs" to be 1 byte
+ // larger than the one previously calculated without alignment.
+ if (ByteSizeOfLSDAWithAlign > ByteSizeOfLSDAWithoutAlign)
+ LSDASizeWithAlign -= 1;
+
+ Asm->OutStreamer->emitULEB128IntValue(LSDASizeWithAlign,
+ ByteSizeOfLSDAWithAlign);
+ }
- // Emit the landing pad call site table.
- MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin");
- MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end");
- Asm->emitEncodingByte(CallSiteEncoding, "Call site");
- Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
- Asm->OutStreamer->emitLabel(CstBeginLabel);
+ Asm->emitEncodingByte(CallSiteEncoding, "Call site");
+ Asm->OutStreamer->emitULEB128IntValue(CallSiteTableSize);
+ };
// SjLj / Wasm Exception handling
if (IsSJLJ || IsWasm) {
+ Asm->OutStreamer->emitLabel(Asm->getMBBExceptionSym(Asm->MF->front()));
+
+ // emit the LSDA header.
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ EmitTypeTableRefAndCallSiteTableEndRef();
+
unsigned idx = 0;
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
@@ -486,6 +611,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
}
Asm->emitULEB128(S.Action);
}
+ Asm->OutStreamer->emitLabel(CstEndLabel);
} else {
// Itanium LSDA exception handling
@@ -507,57 +633,127 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// A missing entry in the call-site table indicates that a call is not
// supposed to throw.
+ assert(CallSiteRanges.size() != 0 && "No call-site ranges!");
+
+ // There should be only one call-site range which includes all the landing
+ // pads. Find that call-site range here.
+ const CallSiteRange *LandingPadRange = nullptr;
+ for (const CallSiteRange &CSRange : CallSiteRanges) {
+ if (CSRange.IsLPRange) {
+ assert(LandingPadRange == nullptr &&
+ "All landing pads must be in a single callsite range.");
+ LandingPadRange = &CSRange;
+ }
+ }
+
+ // The call-site table is split into its call-site ranges, each being
+ // emitted as:
+ // [ LPStartEncoding | LPStart ]
+ // [ TypeTableEncoding | TypeTableOffset ]
+ // [ CallSiteEncoding | CallSiteTableEndOffset ]
+ // cst_begin -> { call-site entries contained in this range }
+ //
+ // and is followed by the next call-site range.
+ //
+ // For each call-site range, CallSiteTableEndOffset is computed as the
+ // difference between cst_begin of that range and the last call-site-table's
+ // end label. This offset is used to find the action table.
+
unsigned Entry = 0;
- for (SmallVectorImpl<CallSiteEntry>::const_iterator
- I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
- const CallSiteEntry &S = *I;
+ for (const CallSiteRange &CSRange : CallSiteRanges) {
+ if (CSRange.CallSiteBeginIdx != 0) {
+ // Align the call-site range for all ranges except the first. The
+ // first range is already aligned due to the exception table alignment.
+ Asm->emitAlignment(Align(4));
+ }
+ Asm->OutStreamer->emitLabel(CSRange.ExceptionLabel);
+
+ // Emit the LSDA header.
+ // If only one call-site range exists, LPStart is omitted as it is the
+ // same as the function entry.
+ if (CallSiteRanges.size() == 1) {
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ } else if (!Asm->isPositionIndependent()) {
+ // For more than one call-site ranges, LPStart must be explicitly
+ // specified.
+ // For non-PIC we can simply use the absolute value.
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_absptr, "@LPStart");
+ Asm->OutStreamer->emitSymbolValue(LandingPadRange->FragmentBeginLabel,
+ Asm->MAI->getCodePointerSize());
+ } else {
+ // For PIC mode, we Emit a PC-relative address for LPStart.
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_pcrel, "@LPStart");
+ MCContext &Context = Asm->OutStreamer->getContext();
+ MCSymbol *Dot = Context.createTempSymbol();
+ Asm->OutStreamer->emitLabel(Dot);
+ Asm->OutStreamer->emitValue(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(LandingPadRange->FragmentBeginLabel,
+ Context),
+ MCSymbolRefExpr::create(Dot, Context), Context),
+ Asm->MAI->getCodePointerSize());
+ }
+
+ if (HasLEB128Directives)
+ EmitTypeTableRefAndCallSiteTableEndRef();
+ else
+ EmitTypeTableOffsetAndCallSiteTableOffset();
+
+ for (size_t CallSiteIdx = CSRange.CallSiteBeginIdx;
+ CallSiteIdx != CSRange.CallSiteEndIdx; ++CallSiteIdx) {
+ const CallSiteEntry &S = CallSites[CallSiteIdx];
+
+ MCSymbol *EHFuncBeginSym = CSRange.FragmentBeginLabel;
+ MCSymbol *EHFuncEndSym = CSRange.FragmentEndLabel;
- MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
-
- MCSymbol *BeginLabel = S.BeginLabel;
- if (!BeginLabel)
- BeginLabel = EHFuncBeginSym;
- MCSymbol *EndLabel = S.EndLabel;
- if (!EndLabel)
- EndLabel = Asm->getFunctionEnd();
-
- // Offset of the call site relative to the start of the procedure.
- if (VerboseAsm)
- Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
- Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
- if (VerboseAsm)
- Asm->OutStreamer->AddComment(Twine(" Call between ") +
- BeginLabel->getName() + " and " +
- EndLabel->getName());
- Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
-
- // Offset of the landing pad relative to the start of the procedure.
- if (!S.LPad) {
+ MCSymbol *BeginLabel = S.BeginLabel;
+ if (!BeginLabel)
+ BeginLabel = EHFuncBeginSym;
+ MCSymbol *EndLabel = S.EndLabel;
+ if (!EndLabel)
+ EndLabel = EHFuncEndSym;
+
+ // Offset of the call site relative to the start of the procedure.
if (VerboseAsm)
- Asm->OutStreamer->AddComment(" has no landing pad");
- Asm->emitCallSiteValue(0, CallSiteEncoding);
- } else {
+ Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) +
+ " <<");
+ Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
if (VerboseAsm)
- Asm->OutStreamer->AddComment(Twine(" jumps to ") +
- S.LPad->LandingPadLabel->getName());
- Asm->emitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym,
- CallSiteEncoding);
- }
+ Asm->OutStreamer->AddComment(Twine(" Call between ") +
+ BeginLabel->getName() + " and " +
+ EndLabel->getName());
+ Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
+
+ // Offset of the landing pad relative to the start of the landing pad
+ // fragment.
+ if (!S.LPad) {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(" has no landing pad");
+ Asm->emitCallSiteValue(0, CallSiteEncoding);
+ } else {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(Twine(" jumps to ") +
+ S.LPad->LandingPadLabel->getName());
+ Asm->emitCallSiteOffset(S.LPad->LandingPadLabel,
+ LandingPadRange->FragmentBeginLabel,
+ CallSiteEncoding);
+ }
- // Offset of the first associated action record, relative to the start of
- // the action table. This value is biased by 1 (1 indicates the start of
- // the action table), and 0 indicates that there are no actions.
- if (VerboseAsm) {
- if (S.Action == 0)
- Asm->OutStreamer->AddComment(" On action: cleanup");
- else
- Asm->OutStreamer->AddComment(" On action: " +
- Twine((S.Action - 1) / 2 + 1));
+ // Offset of the first associated action record, relative to the start
+ // of the action table. This value is biased by 1 (1 indicates the start
+ // of the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer->AddComment(" On action: cleanup");
+ else
+ Asm->OutStreamer->AddComment(" On action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->emitULEB128(S.Action);
}
- Asm->emitULEB128(S.Action);
}
+ Asm->OutStreamer->emitLabel(CstEndLabel);
}
- Asm->OutStreamer->emitLabel(CstEndLabel);
// Emit the Action Table.
int Entry = 0;
@@ -587,15 +783,12 @@ MCSymbol *EHStreamer::emitExceptionTable() {
Asm->emitSLEB128(Action.ValueForTypeID);
// Action Record
- //
- // Self-relative signed displacement in bytes of the next action record,
- // or 0 if there is no next action record.
if (VerboseAsm) {
- if (Action.NextAction == 0) {
+ if (Action.Previous == unsigned(-1)) {
Asm->OutStreamer->AddComment(" No further actions");
} else {
- unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
- Asm->OutStreamer->AddComment(" Continue to action "+Twine(NextAction));
+ Asm->OutStreamer->AddComment(" Continue to action " +
+ Twine(Action.Previous + 1));
}
}
Asm->emitSLEB128(Action.NextAction);
@@ -615,7 +808,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
const std::vector<unsigned> &FilterIds = MF->getFilterIds();
- bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+ const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
int Entry = 0;
// Emit the Catch TypeInfos.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index e62cf17a05d4..234e62506a56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -69,23 +69,48 @@ protected:
unsigned Action;
};
+ /// Structure describing a contiguous range of call-sites which reside
+ /// in the same procedure fragment. With -fbasic-block-sections, there will
+ /// be one call site range per basic block section. Otherwise, we will have
+ /// one call site range containing all the call sites in the function.
+ struct CallSiteRange {
+ // Symbol marking the beginning of the precedure fragment.
+ MCSymbol *FragmentBeginLabel = nullptr;
+ // Symbol marking the end of the procedure fragment.
+ MCSymbol *FragmentEndLabel = nullptr;
+ // LSDA symbol for this call-site range.
+ MCSymbol *ExceptionLabel = nullptr;
+ // Index of the first call-site entry in the call-site table which
+ // belongs to this range.
+ size_t CallSiteBeginIdx = 0;
+ // Index just after the last call-site entry in the call-site table which
+ // belongs to this range.
+ size_t CallSiteEndIdx = 0;
+ // Whether this is the call-site range containing all the landing pads.
+ bool IsLPRange = false;
+ };
+
/// Compute the actions table and gather the first action index for each
/// landing pad site.
- void computeActionsTable(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
- SmallVectorImpl<ActionEntry> &Actions,
- SmallVectorImpl<unsigned> &FirstActions);
+ void computeActionsTable(
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
RangeMapType &PadMap);
- /// Compute the call-site table. The entry for an invoke has a try-range
- /// containing the call, a non-zero landing pad and an appropriate action.
- /// The entry for an ordinary call has a try-range containing the call and
- /// zero for the landing pad and the action. Calls marked 'nounwind' have
- /// no entry and must not be contained in the try-range of any entry - they
- /// form gaps in the table. Entries must be ordered by try-range address.
+ /// Compute the call-site table and the call-site ranges. The entry for an
+ /// invoke has a try-range containing the call, a non-zero landing pad and an
+ /// appropriate action. The entry for an ordinary call has a try-range
+ /// containing the call and zero for the landing pad and the action. Calls
+ /// marked 'nounwind' have no entry and must not be contained in the try-range
+ /// of any entry - they form gaps in the table. Entries must be ordered by
+ /// try-range address. CallSiteRanges vector is only populated for Itanium
+ /// exception handling.
virtual void computeCallSiteTable(
SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 8fa83f515910..354b638b47a2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -145,9 +145,10 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
report_fatal_error("Function '" + FI.getFunction().getName() +
"' is too large for the ocaml GC! "
"Frame size " +
- Twine(FrameSize) + ">= 65536.\n"
- "(" +
- Twine(uintptr_t(&FI)) + ")");
+ Twine(FrameSize) +
+ ">= 65536.\n"
+ "(" +
+ Twine(reinterpret_cast<uintptr_t>(&FI)) + ")");
}
AP.OutStreamer->AddComment("live roots for " +
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
new file mode 100644
index 000000000000..e8636052c54c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -0,0 +1,84 @@
+//===- llvm/CodeGen/PseudoProbePrinter.cpp - Pseudo Probe Emission -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing pseudo probe info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PseudoProbePrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/MC/MCPseudoProbe.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pseudoprobe"
+
+PseudoProbeHandler::~PseudoProbeHandler() = default;
+
+PseudoProbeHandler::PseudoProbeHandler(AsmPrinter *A, Module *M) : Asm(A) {
+ NamedMDNode *FuncInfo = M->getNamedMetadata(PseudoProbeDescMetadataName);
+ assert(FuncInfo && "Pseudo probe descriptors are missing");
+ for (const auto *Operand : FuncInfo->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ auto GUID =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
+ auto Name = cast<MDString>(MD->getOperand(2))->getString();
+ // We may see pairs with same name but different GUIDs here in LTO mode, due
+ // to static same-named functions inlined from other modules into this
+ // module. Function profiles with the same name will be merged no matter
+ // whether they are collected on the same function. Therefore we just pick
+ // up the last <Name, GUID> pair here to represent the same-named function
+ // collection and all probes from the collection will be merged into a
+ // single profile eventually.
+ Names[Name] = GUID;
+ }
+
+ LLVM_DEBUG(dump());
+}
+
+void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
+ uint64_t Type, uint64_t Attr,
+ const DILocation *DebugLoc) {
+ // Gather all the inlined-at nodes.
+ // When it's done ReversedInlineStack looks like ([66, B], [88, A])
+ // which means, Function A inlines function B at calliste with a probe id 88,
+ // and B inlines C at probe 66 where C is represented by Guid.
+ SmallVector<InlineSite, 8> ReversedInlineStack;
+ auto *InlinedAt = DebugLoc ? DebugLoc->getInlinedAt() : nullptr;
+ while (InlinedAt) {
+ const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
+ // Use linkage name for C++ if possible.
+ auto Name = SP->getLinkageName();
+ if (Name.empty())
+ Name = SP->getName();
+ assert(Names.count(Name) && "Pseudo probe descriptor missing for function");
+ uint64_t CallerGuid = Names[Name];
+ uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex(
+ InlinedAt->getDiscriminator());
+ ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId);
+ InlinedAt = InlinedAt->getInlinedAt();
+ }
+
+ SmallVector<InlineSite, 8> InlineStack(ReversedInlineStack.rbegin(),
+ ReversedInlineStack.rend());
+ Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack);
+}
+
+#ifndef NDEBUG
+void PseudoProbeHandler::dump() const {
+ dbgs() << "\n=============================\n";
+ dbgs() << "\nFunction Name to GUID map:\n";
+ dbgs() << "\n=============================\n";
+ for (const auto &Item : Names)
+ dbgs() << "Func: " << Item.first << " GUID: " << Item.second << "\n";
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
new file mode 100644
index 000000000000..bea07ceae9d4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -0,0 +1,53 @@
+//===- PseudoProbePrinter.h - Pseudo probe encoding support -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing pseudo probe info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MCStreamer;
+class Module;
+class DILocation;
+
+class PseudoProbeHandler : public AsmPrinterHandler {
+ // Target of pseudo probe emission.
+ AsmPrinter *Asm;
+ // Name to GUID map
+ DenseMap<StringRef, uint64_t> Names;
+
+public:
+ PseudoProbeHandler(AsmPrinter *A, Module *M);
+ ~PseudoProbeHandler() override;
+
+ void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+ uint64_t Attr, const DILocation *DebugLoc);
+
+ // Unused.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+ void endModule() override {}
+ void beginFunction(const MachineFunction *MF) override {}
+ void endFunction(const MachineFunction *MF) override {}
+ void beginInstruction(const MachineInstr *MI) override {}
+ void endInstruction() override {}
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+} // namespace llvm
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
index baef4d2cc849..352a33e8639d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -18,11 +18,11 @@
using namespace llvm;
void WasmException::endModule() {
- // This is the symbol used in 'throw' and 'br_on_exn' instruction to denote
- // this is a C++ exception. This symbol has to be emitted somewhere once in
- // the module. Check if the symbol has already been created, i.e., we have at
- // least one 'throw' or 'br_on_exn' instruction in the module, and emit the
- // symbol only if so.
+ // This is the symbol used in 'throw' and 'catch' instruction to denote this
+ // is a C++ exception. This symbol has to be emitted somewhere once in the
+ // module. Check if the symbol has already been created, i.e., we have at
+ // least one 'throw' or 'catch' instruction in the module, and emit the symbol
+ // only if so.
SmallString<60> NameStr;
Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
if (Asm->OutContext.lookupSymbol(NameStr)) {
@@ -76,6 +76,7 @@ void WasmException::endFunction(const MachineFunction *MF) {
// information.
void WasmException::computeCallSiteTable(
SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions) {
MachineFunction &MF = *Asm->MF;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
index 1893b6b2df43..f06de786bd76 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
@@ -32,6 +32,7 @@ protected:
// Compute the call site table for wasm EH.
void computeCallSiteTable(
SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions) override;
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 914308d9147e..1e3f33e70715 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file contains support for writing the metadata for Windows Control Flow
-// Guard, including address-taken functions, and valid longjmp targets.
+// Guard, including address-taken functions and valid longjmp targets.
//
//===----------------------------------------------------------------------===//
@@ -17,8 +17,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -38,8 +38,7 @@ void WinCFGuard::endFunction(const MachineFunction *MF) {
return;
// Copy the function's longjmp targets to a module-level list.
- LongjmpTargets.insert(LongjmpTargets.end(), MF->getLongjmpTargets().begin(),
- MF->getLongjmpTargets().end());
+ llvm::append_range(LongjmpTargets, MF->getLongjmpTargets());
}
/// Returns true if this function's address is escaped in a way that might make
@@ -78,20 +77,50 @@ static bool isPossibleIndirectCallTarget(const Function *F) {
return false;
}
+MCSymbol *WinCFGuard::lookupImpSymbol(const MCSymbol *Sym) {
+ if (Sym->getName().startswith("__imp_"))
+ return nullptr;
+ return Asm->OutContext.lookupSymbol(Twine("__imp_") + Sym->getName());
+}
+
void WinCFGuard::endModule() {
const Module *M = Asm->MMI->getModule();
- std::vector<const Function *> Functions;
- for (const Function &F : *M)
- if (isPossibleIndirectCallTarget(&F))
- Functions.push_back(&F);
- if (Functions.empty() && LongjmpTargets.empty())
+ std::vector<const MCSymbol *> GFIDsEntries;
+ std::vector<const MCSymbol *> GIATsEntries;
+ for (const Function &F : *M) {
+ if (isPossibleIndirectCallTarget(&F)) {
+ // If F is a dllimport and has an "__imp_" symbol already defined, add the
+ // "__imp_" symbol to the .giats section.
+ if (F.hasDLLImportStorageClass()) {
+ if (MCSymbol *impSym = lookupImpSymbol(Asm->getSymbol(&F))) {
+ GIATsEntries.push_back(impSym);
+ }
+ }
+ // Add the function's symbol to the .gfids section.
+ // Note: For dllimport functions, MSVC sometimes does not add this symbol
+ // to the .gfids section, but only adds the corresponding "__imp_" symbol
+ // to the .giats section. Here we always add the symbol to the .gfids
+ // section, since this does not introduce security risks.
+ GFIDsEntries.push_back(Asm->getSymbol(&F));
+ }
+ }
+
+ if (GFIDsEntries.empty() && GIATsEntries.empty() && LongjmpTargets.empty())
return;
+
+ // Emit the symbol index of each GFIDs entry to form the .gfids section.
auto &OS = *Asm->OutStreamer;
OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
- for (const Function *F : Functions)
- OS.EmitCOFFSymbolIndex(Asm->getSymbol(F));
+ for (const MCSymbol *S : GFIDsEntries)
+ OS.EmitCOFFSymbolIndex(S);
+
+ // Emit the symbol index of each GIATs entry to form the .giats section.
+ OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection());
+ for (const MCSymbol *S : GIATsEntries) {
+ OS.EmitCOFFSymbolIndex(S);
+ }
- // Emit the symbol index of each longjmp target.
+ // Emit the symbol index of each longjmp target to form the .gljmp section.
OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
for (const MCSymbol *S : LongjmpTargets) {
OS.EmitCOFFSymbolIndex(S);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
index 494a153b05ba..0e472af52c8f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -24,6 +24,7 @@ class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler {
/// Target of directive emission.
AsmPrinter *Asm;
std::vector<const MCSymbol *> LongjmpTargets;
+ MCSymbol *lookupImpSymbol(const MCSymbol *Sym);
public:
WinCFGuard(AsmPrinter *A);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index cd8077e7d548..3a9c9df79783 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -137,8 +137,8 @@ void WinException::endFunction(const MachineFunction *MF) {
endFuncletImpl();
- // endFunclet will emit the necessary .xdata tables for x64 SEH.
- if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets())
+ // endFunclet will emit the necessary .xdata tables for table-based SEH.
+ if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets())
return;
if (shouldEmitPersonality || shouldEmitLSDA) {
@@ -151,7 +151,7 @@ void WinException::endFunction(const MachineFunction *MF) {
// Emit the tables appropriate to the personality function in use. If we
// don't recognize the personality, assume it uses an Itanium-style LSDA.
- if (Per == EHPersonality::MSVC_Win64SEH)
+ if (Per == EHPersonality::MSVC_TableSEH)
emitCSpecificHandlerTable(MF);
else if (Per == EHPersonality::MSVC_X86SEH)
emitExceptHandlerTable(MF);
@@ -258,31 +258,35 @@ void WinException::endFuncletImpl() {
if (F.hasPersonalityFn())
Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
- // On funclet exit, we emit a fake "function" end marker, so that the call
- // to EmitWinEHHandlerData below can calculate the size of the funclet or
- // function.
- if (isAArch64) {
- MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
- Asm->OutStreamer->getCurrentSectionOnly());
- Asm->OutStreamer->SwitchSection(XData);
- }
-
- // Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
-
if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality &&
!CurrentFuncletEntry->isCleanupFuncletEntry()) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+
// If this is a C++ catch funclet (or the parent function),
// emit a reference to the LSDA for the parent function.
StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
Twine("$cppxdata$", FuncLinkageName));
Asm->OutStreamer->emitValue(create32bitRef(FuncInfoXData), 4);
- } else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() &&
+ } else if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets() &&
!CurrentFuncletEntry->isEHFuncletEntry()) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+
// If this is the parent function in Win64 SEH, emit the LSDA immediately
// following .seh_handlerdata.
emitCSpecificHandlerTable(MF);
+ } else if (shouldEmitPersonality || shouldEmitLSDA) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+ // In these cases, no further info is written to the .xdata section
+ // right here, but is written by e.g. emitExceptionTable in endFunction()
+ // above.
+ } else {
+ // No need to emit the EH handler data right here if nothing needs
+ // writing to the .xdata section; it will be emitted for all
+ // functions that need it in the end anyway.
}
// Switch back to the funclet start .text section now that we are done
@@ -339,22 +343,24 @@ int WinException::getFrameIndexOffset(int FrameIndex,
const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
Register UnusedReg;
if (Asm->MAI->usesWindowsCFI()) {
- int Offset =
+ StackOffset Offset =
TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg,
/*IgnoreSPUpdates*/ true);
assert(UnusedReg ==
Asm->MF->getSubtarget()
.getTargetLowering()
->getStackPointerRegisterToSaveRestore());
- return Offset;
+ return Offset.getFixed();
}
// For 32-bit, offsets should be relative to the end of the EH registration
// node. For 64-bit, it's relative to SP at the end of the prologue.
assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
- int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
- Offset += FuncInfo.EHRegNodeEndOffset;
- return Offset;
+ StackOffset Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
+ Offset += StackOffset::getFixed(FuncInfo.EHRegNodeEndOffset);
+ assert(!Offset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ return Offset.getFixed();
}
namespace {
@@ -951,7 +957,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
int FI = FuncInfo.EHRegNodeFrameIndex;
if (FI != INT_MAX) {
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
- Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI);
+ Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI).getFixed();
}
MCContext &Ctx = Asm->OutContext;
@@ -1015,7 +1021,8 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
Register UnusedReg;
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
int SSPIdx = MFI.getStackProtectorIndex();
- GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg);
+ GSCookieOffset =
+ TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg).getFixed();
}
// Retrieve the EH Guard slot.
@@ -1025,7 +1032,8 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
Register UnusedReg;
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
int EHGuardIdx = FuncInfo.EHGuardFrameIndex;
- EHCookieOffset = TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg);
+ EHCookieOffset =
+ TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg).getFixed();
}
AddComment("GSCookieOffset");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c61531c5141a..4026022caa07 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1507,8 +1507,8 @@ void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
bool expanded = expandAtomicOpToLibcall(
I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
- (void)expanded;
- assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
}
void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
@@ -1520,8 +1520,8 @@ void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
bool expanded = expandAtomicOpToLibcall(
I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
- (void)expanded;
- assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
}
void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
@@ -1535,8 +1535,8 @@ void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
Libcalls);
- (void)expanded;
- assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
}
static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
@@ -1685,6 +1685,11 @@ bool AtomicExpand::expandAtomicOpToLibcall(
return false;
}
+ if (!TLI->getLibcallName(RTLibType)) {
+ // This target does not implement the requested atomic libcall so give up.
+ return false;
+ }
+
// Build up the function call. There's two kinds. First, the sized
// variants. These calls are going to be one of the following (with
// N=1,2,4,8,16):
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
index a35c4d813acc..7499ea8b42d4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -1,4 +1,4 @@
-//===-- BBSectionsPrepare.cpp ---=========---------------------------------===//
+//===-- BasicBlockSections.cpp ---=========--------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// BBSectionsPrepare implementation.
+// BasicBlockSections implementation.
//
// The purpose of this pass is to assign sections to basic blocks when
// -fbasic-block-sections= option is used. Further, with profile information
@@ -48,19 +48,11 @@
// Basic Block Labels
// ==================
//
-// With -fbasic-block-sections=labels, or when a basic block is placed in a
-// unique section, it is labelled with a symbol. This allows easy mapping of
-// virtual addresses from PMU profiles back to the corresponding basic blocks.
-// Since the number of basic blocks is large, the labeling bloats the symbol
-// table sizes and the string table sizes significantly. While the binary size
-// does increase, it does not affect performance as the symbol table is not
-// loaded in memory during run-time. The string table size bloat is kept very
-// minimal using a unary naming scheme that uses string suffix compression. The
-// basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ... This
-// turns out to be very good for string table sizes and the bloat in the string
-// table size for a very large binary is ~8 %. The naming also allows using
-// the --symbol-ordering-file option in LLD to arbitrarily reorder the
-// sections.
+// With -fbasic-block-sections=labels, we emit the offsets of BB addresses of
+// every function into the .llvm_bb_addr_map section. Along with the function
+// symbols, this allows for mapping of virtual addresses in PMU profiles back to
+// the corresponding basic blocks. This logic is implemented in AsmPrinter. This
+// pass only assigns the BBSectionType of every function to ``labels``.
//
//===----------------------------------------------------------------------===//
@@ -69,6 +61,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -86,6 +79,15 @@ using llvm::StringMap;
using llvm::StringRef;
using namespace llvm;
+// Placing the cold clusters in a separate section mitigates against poor
+// profiles and allows optimizations such as hugepage mapping to be applied at a
+// section granularity. Defaults to ".text.split." which is recognized by lld
+// via the `-z keep-text-section-prefix` flag.
+cl::opt<std::string> llvm::BBSectionsColdTextPrefix(
+ "bbsections-cold-text-prefix",
+ cl::desc("The text prefix to use for cold basic block clusters"),
+ cl::init(".text.split."), cl::Hidden);
+
namespace {
// This struct represents the cluster information for a machine basic block.
@@ -100,7 +102,7 @@ struct BBClusterInfo {
using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo, 4>>;
-class BBSectionsPrepare : public MachineFunctionPass {
+class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;
@@ -119,13 +121,13 @@ public:
// name for which we have mapping in ProgramBBClusterInfo.
StringMap<StringRef> FuncAliasMap;
- BBSectionsPrepare(const MemoryBuffer *Buf)
+ BasicBlockSections(const MemoryBuffer *Buf)
: MachineFunctionPass(ID), MBuf(Buf) {
- initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry());
+ initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
};
- BBSectionsPrepare() : MachineFunctionPass(ID) {
- initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry());
+ BasicBlockSections() : MachineFunctionPass(ID) {
+ initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
}
StringRef getPassName() const override {
@@ -144,8 +146,8 @@ public:
} // end anonymous namespace
-char BBSectionsPrepare::ID = 0;
-INITIALIZE_PASS(BBSectionsPrepare, "bbsections-prepare",
+char BasicBlockSections::ID = 0;
+INITIALIZE_PASS(BasicBlockSections, "bbsections-prepare",
"Prepares for basic block sections, by splitting functions "
"into clusters of basic blocks.",
false, false)
@@ -226,9 +228,9 @@ static bool getBBClusterInfoForFunction(
// and "Cold" succeeding all other clusters.
// FuncBBClusterInfo represent the cluster information for basic blocks. If this
// is empty, it means unique sections for all basic blocks in the function.
-static bool assignSectionsAndSortBasicBlocks(
- MachineFunction &MF,
- const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
+static void
+assignSections(MachineFunction &MF,
+ const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
assert(MF.hasBBSections() && "BB Sections is not set for function.");
// This variable stores the section ID of the cluster containing eh_pads (if
// all eh_pads are one cluster). If more than one cluster contain eh_pads, we
@@ -271,12 +273,69 @@ static bool assignSectionsAndSortBasicBlocks(
for (auto &MBB : MF)
if (MBB.isEHPad())
MBB.setSectionID(EHPadsSectionID.getValue());
+}
+void llvm::sortBasicBlocksAndUpdateBranches(
+ MachineFunction &MF, MachineBasicBlockComparator MBBCmp) {
SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs(
MF.getNumBlockIDs());
for (auto &MBB : MF)
PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
+ MF.sort(MBBCmp);
+
+ // Set IsBeginSection and IsEndSection according to the assigned section IDs.
+ MF.assignBeginEndSections();
+
+ // After reordering basic blocks, we must update basic block branches to
+ // insert explicit fallthrough branches when required and optimize branches
+ // when possible.
+ updateBranches(MF, PreLayoutFallThroughs);
+}
+
+// If the exception section begins with a landing pad, that landing pad will
+// assume a zero offset (relative to @LPStart) in the LSDA. However, a value of
+// zero implies "no landing pad." This function inserts a NOP just before the EH
+// pad label to ensure a nonzero offset. Returns true if padding is not needed.
+static bool avoidZeroOffsetLandingPad(MachineFunction &MF) {
+ for (auto &MBB : MF) {
+ if (MBB.isBeginSection() && MBB.isEHPad()) {
+ MachineBasicBlock::iterator MI = MBB.begin();
+ while (!MI->isEHLabel())
+ ++MI;
+ MCInst Noop;
+ MF.getSubtarget().getInstrInfo()->getNoop(Noop);
+ BuildMI(MBB, MI, DebugLoc(),
+ MF.getSubtarget().getInstrInfo()->get(Noop.getOpcode()));
+ return false;
+ }
+ }
+ return true;
+}
+
+bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
+ auto BBSectionsType = MF.getTarget().getBBSectionsType();
+ assert(BBSectionsType != BasicBlockSection::None &&
+ "BB Sections not enabled!");
+ // Renumber blocks before sorting them for basic block sections. This is
+ // useful during sorting, basic blocks in the same section will retain the
+ // default order. This renumbering should also be done for basic block
+ // labels to match the profiles with the correct blocks.
+ MF.RenumberBlocks();
+
+ if (BBSectionsType == BasicBlockSection::Labels) {
+ MF.setBBSectionsType(BBSectionsType);
+ return true;
+ }
+
+ std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
+ if (BBSectionsType == BasicBlockSection::List &&
+ !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
+ FuncBBClusterInfo))
+ return true;
+ MF.setBBSectionsType(BBSectionsType);
+ assignSections(MF, FuncBBClusterInfo);
+
// We make sure that the cluster including the entry basic block precedes all
// other clusters.
auto EntryBBSectionID = MF.front().getSectionID();
@@ -300,7 +359,8 @@ static bool assignSectionsAndSortBasicBlocks(
// contiguous and ordered accordingly. Furthermore, clusters are ordered in
// increasing order of their section IDs, with the exception and the
// cold section placed at the end of the function.
- MF.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
+ auto Comparator = [&](const MachineBasicBlock &X,
+ const MachineBasicBlock &Y) {
auto XSectionID = X.getSectionID();
auto YSectionID = Y.getSectionID();
if (XSectionID != YSectionID)
@@ -311,43 +371,10 @@ static bool assignSectionsAndSortBasicBlocks(
return FuncBBClusterInfo[X.getNumber()]->PositionInCluster <
FuncBBClusterInfo[Y.getNumber()]->PositionInCluster;
return X.getNumber() < Y.getNumber();
- });
-
- // Set IsBeginSection and IsEndSection according to the assigned section IDs.
- MF.assignBeginEndSections();
-
- // After reordering basic blocks, we must update basic block branches to
- // insert explicit fallthrough branches when required and optimize branches
- // when possible.
- updateBranches(MF, PreLayoutFallThroughs);
-
- return true;
-}
-
-bool BBSectionsPrepare::runOnMachineFunction(MachineFunction &MF) {
- auto BBSectionsType = MF.getTarget().getBBSectionsType();
- assert(BBSectionsType != BasicBlockSection::None &&
- "BB Sections not enabled!");
- // Renumber blocks before sorting them for basic block sections. This is
- // useful during sorting, basic blocks in the same section will retain the
- // default order. This renumbering should also be done for basic block
- // labels to match the profiles with the correct blocks.
- MF.RenumberBlocks();
-
- if (BBSectionsType == BasicBlockSection::Labels) {
- MF.setBBSectionsType(BBSectionsType);
- MF.createBBLabels();
- return true;
- }
+ };
- std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
- if (BBSectionsType == BasicBlockSection::List &&
- !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
- FuncBBClusterInfo))
- return true;
- MF.setBBSectionsType(BBSectionsType);
- MF.createBBLabels();
- assignSectionsAndSortBasicBlocks(MF, FuncBBClusterInfo);
+ sortBasicBlocksAndUpdateBranches(MF, Comparator);
+ avoidZeroOffsetLandingPad(MF);
return true;
}
@@ -438,7 +465,7 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf,
return Error::success();
}
-bool BBSectionsPrepare::doInitialization(Module &M) {
+bool BasicBlockSections::doInitialization(Module &M) {
if (!MBuf)
return false;
if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
@@ -446,12 +473,12 @@ bool BBSectionsPrepare::doInitialization(Module &M) {
return false;
}
-void BBSectionsPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
MachineFunctionPass *
-llvm::createBBSectionsPreparePass(const MemoryBuffer *Buf) {
- return new BBSectionsPrepare(Buf);
+llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) {
+ return new BasicBlockSections(Buf);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index c6d5aa37834f..fd3f465fb390 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -18,16 +18,12 @@
#include "BranchFolding.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -64,7 +60,6 @@
#include <cstddef>
#include <iterator>
#include <numeric>
-#include <vector>
using namespace llvm;
@@ -139,17 +134,18 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
MF.getSubtarget().getRegisterInfo());
}
-BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+BranchFolder::BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
- ProfileSummaryInfo *PSI,
- unsigned MinTailLength)
+ ProfileSummaryInfo *PSI, unsigned MinTailLength)
: EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
if (MinCommonTailLength == 0)
MinCommonTailLength = TailMergeSize;
switch (FlagEnableTailMerge) {
- case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_UNSET:
+ EnableTailMerge = DefaultEnableTailMerge;
+ break;
case cl::BOU_TRUE: EnableTailMerge = true; break;
case cl::BOU_FALSE: EnableTailMerge = false; break;
}
@@ -1407,7 +1403,7 @@ ReoptimizeBlock:
LLVM_DEBUG(dbgs() << "\nMerging into block: " << PrevBB
<< "From MBB: " << *MBB);
// Remove redundant DBG_VALUEs first.
- if (PrevBB.begin() != PrevBB.end()) {
+ if (!PrevBB.empty()) {
MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
--PrevBBIter;
MachineBasicBlock::iterator MBBIter = MBB->begin();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
index 49c6bcae2db4..2a4ea92a92aa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
@@ -32,8 +32,7 @@ class TargetRegisterInfo;
class LLVM_LIBRARY_VISIBILITY BranchFolder {
public:
- explicit BranchFolder(bool defaultEnableTailMerge,
- bool CommonHoist,
+ explicit BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
ProfileSummaryInfo *PSI,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index 5a3ec1a36f96..366c303614d6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -507,25 +507,31 @@ bool BranchRelaxation::relaxBranchInstructions() {
Next = std::next(J);
MachineInstr &MI = *J;
- if (MI.isConditionalBranch()) {
- MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
- if (!isBlockInRange(MI, *DestBB)) {
- if (Next != MBB.end() && Next->isConditionalBranch()) {
- // If there are multiple conditional branches, this isn't an
- // analyzable block. Split later terminators into a new block so
- // each one will be analyzable.
-
- splitBlockBeforeInstr(*Next, DestBB);
- } else {
- fixupConditionalBranch(MI);
- ++NumConditionalRelaxed;
- }
+ if (!MI.isConditionalBranch())
+ continue;
+
+ if (MI.getOpcode() == TargetOpcode::FAULTING_OP)
+ // FAULTING_OP's destination is not encoded in the instruction stream
+ // and thus never needs relaxed.
+ continue;
+
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+ if (!isBlockInRange(MI, *DestBB)) {
+ if (Next != MBB.end() && Next->isConditionalBranch()) {
+ // If there are multiple conditional branches, this isn't an
+ // analyzable block. Split later terminators into a new block so
+ // each one will be analyzable.
+
+ splitBlockBeforeInstr(*Next, DestBB);
+ } else {
+ fixupConditionalBranch(MI);
+ ++NumConditionalRelaxed;
+ }
- Changed = true;
+ Changed = true;
- // This may have modified all of the terminators, so start over.
- Next = MBB.getFirstTerminator();
- }
+ // This may have modified all of the terminators, so start over.
+ Next = MBB.getFirstTerminator();
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
index b01a264dd97d..b11db3e65770 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -118,7 +118,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
if (!MO.isRenamable())
return false;
- Register OriginalReg = MO.getReg();
+ MCRegister OriginalReg = MO.getReg().asMCReg();
// Update only undef operands that have reg units that are mapped to one root.
for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
@@ -171,8 +171,8 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
unsigned Pref) {
- Register reg = MI->getOperand(OpIdx).getReg();
- unsigned Clearance = RDA->getClearance(MI, reg);
+ MCRegister Reg = MI->getOperand(OpIdx).getReg().asMCReg();
+ unsigned Clearance = RDA->getClearance(MI, Reg);
LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
if (Pref > Clearance) {
@@ -186,17 +186,24 @@ bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
void BreakFalseDeps::processDefs(MachineInstr *MI) {
assert(!MI->isDebugInstr() && "Won't process debug values");
+ const MCInstrDesc &MCID = MI->getDesc();
+
// Break dependence on undef uses. Do this before updating LiveRegs below.
// This can remove a false dependence with no additional instructions.
- unsigned OpNum;
- unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
- if (Pref) {
- bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
- // We don't need to bother trying to break a dependency if this
- // instruction has a true dependency on that register through another
- // operand - we'll have to wait for it to be available regardless.
- if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
- UndefReads.push_back(std::make_pair(MI, OpNum));
+ for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef())
+ continue;
+
+ unsigned Pref = TII->getUndefRegClearance(*MI, i, TRI);
+ if (Pref) {
+ bool HadTrueDependency = pickBestRegisterForUndef(MI, i, Pref);
+ // We don't need to bother trying to break a dependency if this
+ // instruction has a true dependency on that register through another
+ // operand - we'll have to wait for it to be available regardless.
+ if (!HadTrueDependency && shouldBreakDependence(MI, i, Pref))
+ UndefReads.push_back(std::make_pair(MI, i));
+ }
}
// The code below allows the target to create a new instruction to break the
@@ -204,7 +211,6 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) {
if (MF->getFunction().hasMinSize())
return;
- const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
i != e; ++i) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 5d6ee09c8438..16f380c1eb62 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -28,66 +28,59 @@ using namespace llvm;
#define DEBUG_TYPE "calcspillweights"
-void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
- MachineFunction &MF,
- VirtRegMap *VRM,
- const MachineLoopInfo &MLI,
- const MachineBlockFrequencyInfo &MBFI,
- VirtRegAuxInfo::NormalizingFn norm) {
+void VirtRegAuxInfo::calculateSpillWeightsAndHints() {
LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
<< "********** Function: " << MF.getName() << '\n');
MachineRegisterInfo &MRI = MF.getRegInfo();
- VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm);
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ unsigned Reg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
- VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg));
+ calculateSpillWeightAndHint(LIS.getInterval(Reg));
}
}
// Return the preferred allocation register for reg, given a COPY instruction.
-static Register copyHint(const MachineInstr *mi, unsigned reg,
- const TargetRegisterInfo &tri,
- const MachineRegisterInfo &mri) {
- unsigned sub, hsub;
- Register hreg;
- if (mi->getOperand(0).getReg() == reg) {
- sub = mi->getOperand(0).getSubReg();
- hreg = mi->getOperand(1).getReg();
- hsub = mi->getOperand(1).getSubReg();
+static Register copyHint(const MachineInstr *MI, unsigned Reg,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
+ unsigned Sub, HSub;
+ Register HReg;
+ if (MI->getOperand(0).getReg() == Reg) {
+ Sub = MI->getOperand(0).getSubReg();
+ HReg = MI->getOperand(1).getReg();
+ HSub = MI->getOperand(1).getSubReg();
} else {
- sub = mi->getOperand(1).getSubReg();
- hreg = mi->getOperand(0).getReg();
- hsub = mi->getOperand(0).getSubReg();
+ Sub = MI->getOperand(1).getSubReg();
+ HReg = MI->getOperand(0).getReg();
+ HSub = MI->getOperand(0).getSubReg();
}
- if (!hreg)
+ if (!HReg)
return 0;
- if (Register::isVirtualRegister(hreg))
- return sub == hsub ? hreg : Register();
+ if (Register::isVirtualRegister(HReg))
+ return Sub == HSub ? HReg : Register();
- const TargetRegisterClass *rc = mri.getRegClass(reg);
- Register CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
+ const TargetRegisterClass *rc = MRI.getRegClass(Reg);
+ MCRegister CopiedPReg = HSub ? TRI.getSubReg(HReg, HSub) : HReg.asMCReg();
if (rc->contains(CopiedPReg))
return CopiedPReg;
// Check if reg:sub matches so that a super register could be hinted.
- if (sub)
- return tri.getMatchingSuperReg(CopiedPReg, sub, rc);
+ if (Sub)
+ return TRI.getMatchingSuperReg(CopiedPReg, Sub, rc);
return 0;
}
// Check if all values in LI are rematerializable
-static bool isRematerializable(const LiveInterval &LI,
- const LiveIntervals &LIS,
- VirtRegMap *VRM,
+static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS,
+ const VirtRegMap &VRM,
const TargetInstrInfo &TII) {
- unsigned Reg = LI.reg;
- unsigned Original = VRM ? VRM->getOriginal(Reg) : 0;
+ unsigned Reg = LI.reg();
+ unsigned Original = VRM.getOriginal(Reg);
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I != E; ++I) {
const VNInfo *VNI = *I;
@@ -102,31 +95,28 @@ static bool isRematerializable(const LiveInterval &LI,
// Trace copies introduced by live range splitting. The inline
// spiller can rematerialize through these copies, so the spill
// weight must reflect this.
- if (VRM) {
- while (MI->isFullCopy()) {
- // The copy destination must match the interval register.
- if (MI->getOperand(0).getReg() != Reg)
- return false;
-
- // Get the source register.
- Reg = MI->getOperand(1).getReg();
-
- // If the original (pre-splitting) registers match this
- // copy came from a split.
- if (!Register::isVirtualRegister(Reg) ||
- VRM->getOriginal(Reg) != Original)
- return false;
-
- // Follow the copy live-in value.
- const LiveInterval &SrcLI = LIS.getInterval(Reg);
- LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
- VNI = SrcQ.valueIn();
- assert(VNI && "Copy from non-existing value");
- if (VNI->isPHIDef())
- return false;
- MI = LIS.getInstructionFromIndex(VNI->def);
- assert(MI && "Dead valno in interval");
- }
+ while (MI->isFullCopy()) {
+ // The copy destination must match the interval register.
+ if (MI->getOperand(0).getReg() != Reg)
+ return false;
+
+ // Get the source register.
+ Reg = MI->getOperand(1).getReg();
+
+ // If the original (pre-splitting) registers match this
+ // copy came from a split.
+ if (!Register::isVirtualRegister(Reg) || VRM.getOriginal(Reg) != Original)
+ return false;
+
+ // Follow the copy live-in value.
+ const LiveInterval &SrcLI = LIS.getInterval(Reg);
+ LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+ VNI = SrcQ.valueIn();
+ assert(VNI && "Copy from non-existing value");
+ if (VNI->isPHIDef())
+ return false;
+ MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
}
if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis()))
@@ -135,43 +125,55 @@ static bool isRematerializable(const LiveInterval &LI,
return true;
}
-void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
- float weight = weightCalcHelper(li);
+void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) {
+ float Weight = weightCalcHelper(LI);
// Check if unspillable.
- if (weight < 0)
+ if (Weight < 0)
return;
- li.weight = weight;
+ LI.setWeight(Weight);
}
-float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start,
- SlotIndex end) {
- return weightCalcHelper(li, &start, &end);
+float VirtRegAuxInfo::futureWeight(LiveInterval &LI, SlotIndex Start,
+ SlotIndex End) {
+ return weightCalcHelper(LI, &Start, &End);
}
-float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
- SlotIndex *end) {
- MachineRegisterInfo &mri = MF.getRegInfo();
- const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
- MachineBasicBlock *mbb = nullptr;
- MachineLoop *loop = nullptr;
- bool isExiting = false;
- float totalWeight = 0;
- unsigned numInstr = 0; // Number of instructions using li
- SmallPtrSet<MachineInstr*, 8> visited;
-
- std::pair<unsigned, unsigned> TargetHint = mri.getRegAllocationHint(li.reg);
+float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
+ SlotIndex *End) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineBasicBlock *MBB = nullptr;
+ MachineLoop *Loop = nullptr;
+ bool IsExiting = false;
+ float TotalWeight = 0;
+ unsigned NumInstr = 0; // Number of instructions using LI
+ SmallPtrSet<MachineInstr *, 8> Visited;
+
+ std::pair<Register, Register> TargetHint = MRI.getRegAllocationHint(LI.reg());
+
+ if (LI.isSpillable()) {
+ Register Reg = LI.reg();
+ Register Original = VRM.getOriginal(Reg);
+ const LiveInterval &OrigInt = LIS.getInterval(Original);
+ // li comes from a split of OrigInt. If OrigInt was marked
+ // as not spillable, make sure the new interval is marked
+ // as not spillable as well.
+ if (!OrigInt.isSpillable())
+ LI.markNotSpillable();
+ }
// Don't recompute spill weight for an unspillable register.
- bool Spillable = li.isSpillable();
+ bool IsSpillable = LI.isSpillable();
- bool localSplitArtifact = start && end;
+ bool IsLocalSplitArtifact = Start && End;
// Do not update future local split artifacts.
- bool updateLI = !localSplitArtifact;
+ bool ShouldUpdateLI = !IsLocalSplitArtifact;
- if (localSplitArtifact) {
- MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end);
- assert(localMBB == LIS.getMBBFromIndex(*start) &&
+ if (IsLocalSplitArtifact) {
+ MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*End);
+ assert(localMBB == LIS.getMBBFromIndex(*Start) &&
"start and end are expected to be in the same basic block");
// Local split artifact will have 2 additional copy instructions and they
@@ -179,116 +181,119 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
// localLI = COPY other
// ...
// other = COPY localLI
- totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
- totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
+ TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
+ TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
- numInstr += 2;
+ NumInstr += 2;
}
// CopyHint is a sortable hint derived from a COPY instruction.
struct CopyHint {
- unsigned Reg;
- float Weight;
- bool IsPhys;
- CopyHint(unsigned R, float W, bool P) :
- Reg(R), Weight(W), IsPhys(P) {}
- bool operator<(const CopyHint &rhs) const {
+ const Register Reg;
+ const float Weight;
+ CopyHint(Register R, float W) : Reg(R), Weight(W) {}
+ bool operator<(const CopyHint &Rhs) const {
// Always prefer any physreg hint.
- if (IsPhys != rhs.IsPhys)
- return (IsPhys && !rhs.IsPhys);
- if (Weight != rhs.Weight)
- return (Weight > rhs.Weight);
- return Reg < rhs.Reg; // Tie-breaker.
+ if (Reg.isPhysical() != Rhs.Reg.isPhysical())
+ return Reg.isPhysical();
+ if (Weight != Rhs.Weight)
+ return (Weight > Rhs.Weight);
+ return Reg.id() < Rhs.Reg.id(); // Tie-breaker.
}
};
- std::set<CopyHint> CopyHints;
+ std::set<CopyHint> CopyHints;
+ DenseMap<unsigned, float> Hint;
for (MachineRegisterInfo::reg_instr_nodbg_iterator
- I = mri.reg_instr_nodbg_begin(li.reg),
- E = mri.reg_instr_nodbg_end();
+ I = MRI.reg_instr_nodbg_begin(LI.reg()),
+ E = MRI.reg_instr_nodbg_end();
I != E;) {
- MachineInstr *mi = &*(I++);
+ MachineInstr *MI = &*(I++);
// For local split artifacts, we are interested only in instructions between
// the expected start and end of the range.
- SlotIndex si = LIS.getInstructionIndex(*mi);
- if (localSplitArtifact && ((si < *start) || (si > *end)))
+ SlotIndex SI = LIS.getInstructionIndex(*MI);
+ if (IsLocalSplitArtifact && ((SI < *Start) || (SI > *End)))
continue;
- numInstr++;
- if (mi->isIdentityCopy() || mi->isImplicitDef())
+ NumInstr++;
+ if (MI->isIdentityCopy() || MI->isImplicitDef())
continue;
- if (!visited.insert(mi).second)
+ if (!Visited.insert(MI).second)
continue;
- float weight = 1.0f;
- if (Spillable) {
+ // For terminators that produce values, ask the backend if the register is
+ // not spillable.
+ if (TII.isUnspillableTerminator(MI) && MI->definesRegister(LI.reg())) {
+ LI.markNotSpillable();
+ return -1.0f;
+ }
+
+ float Weight = 1.0f;
+ if (IsSpillable) {
// Get loop info for mi.
- if (mi->getParent() != mbb) {
- mbb = mi->getParent();
- loop = Loops.getLoopFor(mbb);
- isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ if (MI->getParent() != MBB) {
+ MBB = MI->getParent();
+ Loop = Loops.getLoopFor(MBB);
+ IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
}
// Calculate instr weight.
- bool reads, writes;
- std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
- weight = LiveIntervals::getSpillWeight(writes, reads, &MBFI, *mi);
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
+ Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI);
// Give extra weight to what looks like a loop induction variable update.
- if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
- weight *= 3;
+ if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB))
+ Weight *= 3;
- totalWeight += weight;
+ TotalWeight += Weight;
}
// Get allocation hints from copies.
- if (!mi->isCopy())
+ if (!MI->isCopy())
continue;
- Register hint = copyHint(mi, li.reg, tri, mri);
- if (!hint)
+ Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
+ if (!HintReg)
continue;
// Force hweight onto the stack so that x86 doesn't add hidden precision,
// making the comparison incorrectly pass (i.e., 1 > 1 == true??).
//
// FIXME: we probably shouldn't use floats at all.
- volatile float hweight = Hint[hint] += weight;
- if (Register::isVirtualRegister(hint) || mri.isAllocatable(hint))
- CopyHints.insert(
- CopyHint(hint, hweight, Register::isPhysicalRegister(hint)));
+ volatile float HWeight = Hint[HintReg] += Weight;
+ if (HintReg.isVirtual() || MRI.isAllocatable(HintReg))
+ CopyHints.insert(CopyHint(HintReg, HWeight));
}
- Hint.clear();
-
// Pass all the sorted copy hints to mri.
- if (updateLI && CopyHints.size()) {
+ if (ShouldUpdateLI && CopyHints.size()) {
// Remove a generic hint if previously added by target.
if (TargetHint.first == 0 && TargetHint.second)
- mri.clearSimpleHint(li.reg);
+ MRI.clearSimpleHint(LI.reg());
- std::set<unsigned> HintedRegs;
+ std::set<Register> HintedRegs;
for (auto &Hint : CopyHints) {
if (!HintedRegs.insert(Hint.Reg).second ||
(TargetHint.first != 0 && Hint.Reg == TargetHint.second))
// Don't add the same reg twice or the target-type hint again.
continue;
- mri.addRegAllocationHint(li.reg, Hint.Reg);
+ MRI.addRegAllocationHint(LI.reg(), Hint.Reg);
}
// Weakly boost the spill weight of hinted registers.
- totalWeight *= 1.01F;
+ TotalWeight *= 1.01F;
}
// If the live interval was already unspillable, leave it that way.
- if (!Spillable)
+ if (!IsSpillable)
return -1.0;
// Mark li as unspillable if all live ranges are tiny and the interval
// is not live at any reg mask. If the interval is live at a reg mask
// spilling may be required.
- if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) &&
- !li.isLiveAtIndexes(LIS.getRegMaskSlots())) {
- li.markNotSpillable();
+ if (ShouldUpdateLI && LI.isZeroLength(LIS.getSlotIndexes()) &&
+ !LI.isLiveAtIndexes(LIS.getRegMaskSlots())) {
+ LI.markNotSpillable();
return -1.0;
}
@@ -296,10 +301,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
// it is a preferred candidate for spilling.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
- if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
- totalWeight *= 0.5F;
+ if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
+ TotalWeight *= 0.5F;
- if (localSplitArtifact)
- return normalize(totalWeight, start->distance(*end), numInstr);
- return normalize(totalWeight, li.getSize(), numInstr);
+ if (IsLocalSplitArtifact)
+ return normalize(TotalWeight, Start->distance(*End), NumInstr);
+ return normalize(TotalWeight, LI.getSize(), NumInstr);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
index 3d8c2c8b00aa..c9246f6e8754 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -62,6 +63,11 @@ void CCState::MarkAllocated(MCPhysReg Reg) {
UsedRegs[*AI / 32] |= 1 << (*AI & 31);
}
+void CCState::MarkUnallocated(MCPhysReg Reg) {
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI / 32] &= ~(1 << (*AI & 31));
+}
+
bool CCState::IsShadowAllocatedReg(MCRegister Reg) const {
if (!isAllocated(Reg))
return false;
@@ -184,14 +190,17 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
}
}
+void CCState::ensureMaxAlignment(Align Alignment) {
+ if (!AnalyzingMustTailForwardedRegs)
+ MF.getFrameInfo().ensureMaxAlignment(Alignment);
+}
+
static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
if (VT.isVector())
return true; // Assume -msse-regparm might be in effect.
if (!VT.isInteger())
return false;
- if (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall)
- return true;
- return false;
+ return (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall);
}
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
@@ -207,8 +216,8 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
// Allocate something of this value type repeatedly until we get assigned a
// location in memory.
- bool HaveRegParm = true;
- while (HaveRegParm) {
+ bool HaveRegParm;
+ do {
if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
dbgs() << "Call has unhandled type " << EVT(VT).getEVTString()
@@ -217,7 +226,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
llvm_unreachable(nullptr);
}
HaveRegParm = Locs.back().isRegLoc();
- }
+ } while (HaveRegParm);
// Copy all the registers from the value locations we added.
assert(NumLocs < Locs.size() && "CC assignment failed to add location");
@@ -248,7 +257,7 @@ void CCState::analyzeMustTailForwardedRegisters(
const TargetLowering *TL = MF.getSubtarget().getTargetLowering();
const TargetRegisterClass *RC = TL->getRegClassFor(RegVT);
for (MCPhysReg PReg : RemainingRegs) {
- unsigned VReg = MF.addLiveIn(PReg, RC);
+ Register VReg = MF.addLiveIn(PReg, RC);
Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT));
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index 7a8c022c82da..d2400d0371e3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -20,16 +20,17 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
- initializeBBSectionsPreparePass(Registry);
+ initializeBasicBlockSectionsPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCFGuardLongjmpPass(Registry);
initializeCFIInstrInserterPass(Registry);
+ initializeCheckDebugMachineModulePass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
initializeDebugifyMachineModulePass(Registry);
initializeDetectDeadLanesPass(Registry);
- initializeDwarfEHPreparePass(Registry);
+ initializeDwarfEHPrepareLegacyPassPass(Registry);
initializeEarlyIfConverterPass(Registry);
initializeEarlyIfPredicatorPass(Registry);
initializeEarlyMachineLICMPass(Registry);
@@ -98,7 +99,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
initializeSafeStackLegacyPassPass(Registry);
- initializeScalarizeMaskedMemIntrinPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSjLjEHPreparePass(Registry);
initializeSlotIndexesPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
new file mode 100644
index 000000000000..7f37f2069a3b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
@@ -0,0 +1,25 @@
+//===--- CodeGenPassBuilder.cpp --------------------------------------- ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CodeGenPassBuilder.h"
+
+using namespace llvm;
+
+namespace llvm {
+#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ AnalysisKey PASS_NAME::Key;
+#include "llvm/CodeGen/MachinePassRegistry.def"
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ AnalysisKey PASS_NAME::Key;
+#include "llvm/CodeGen/MachinePassRegistry.def"
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index e8b8e6c93cf0..b2bc75c19709 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -376,6 +376,7 @@ class TypePromotionTransaction;
return *DT;
}
+ void removeAllAssertingVHReferences(Value *V);
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -383,6 +384,7 @@ class TypePromotionTransaction;
void eliminateMostlyEmptyBlock(BasicBlock *BB);
bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
bool isPreheader);
+ bool makeBitReverse(Instruction &I);
bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
bool optimizeInst(Instruction *I, bool &ModifiedDT);
bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
@@ -437,7 +439,11 @@ char CodeGenPrepare::ID = 0;
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
@@ -466,13 +472,21 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
OptSize = F.hasOptSize();
if (ProfileGuidedSectionPrefix) {
- if (PSI->isFunctionHotInCallGraph(&F, *BFI))
- F.setSectionPrefix(".hot");
- else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
- F.setSectionPrefix(".unlikely");
+ // The hot attribute overwrites profile count based hotness while profile
+ // counts based hotness overwrite the cold attribute.
+ // This is a conservative behabvior.
+ if (F.hasFnAttribute(Attribute::Hot) ||
+ PSI->isFunctionHotInCallGraph(&F, *BFI))
+ F.setSectionPrefix("hot");
+ // If PSI shows this function is not hot, we will placed the function
+ // into unlikely section if (1) PSI shows this is a cold function, or
+ // (2) the function has a attribute of cold.
+ else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
+ F.hasFnAttribute(Attribute::Cold))
+ F.setSectionPrefix("unlikely");
else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
PSI->isFunctionHotnessUnknown(F))
- F.setSectionPrefix(".unknown");
+ F.setSectionPrefix("unknown");
}
/// This optimization identifies DIV instructions that can be
@@ -538,6 +552,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LargeOffsetGEPID.clear();
}
+ NewGEPBases.clear();
SunkAddrs.clear();
if (!DisableBranchOpts) {
@@ -547,13 +562,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// are removed.
SmallSetVector<BasicBlock*, 8> WorkList;
for (BasicBlock &BB : F) {
- SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
+ SmallVector<BasicBlock *, 2> Successors(successors(&BB));
MadeChange |= ConstantFoldTerminator(&BB, true);
if (!MadeChange) continue;
for (SmallVectorImpl<BasicBlock*>::iterator
II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_begin(*II) == pred_end(*II))
+ if (pred_empty(*II))
WorkList.insert(*II);
}
@@ -561,13 +576,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= !WorkList.empty();
while (!WorkList.empty()) {
BasicBlock *BB = WorkList.pop_back_val();
- SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+ SmallVector<BasicBlock*, 2> Successors(successors(BB));
DeleteDeadBlock(BB);
for (SmallVectorImpl<BasicBlock*>::iterator
II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_begin(*II) == pred_end(*II))
+ if (pred_empty(*II))
WorkList.insert(*II);
}
@@ -601,6 +616,33 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
+/// An instruction is about to be deleted, so remove all references to it in our
+/// GEP-tracking data strcutures.
+void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
+ LargeOffsetGEPMap.erase(V);
+ NewGEPBases.erase(V);
+
+ auto GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP)
+ return;
+
+ LargeOffsetGEPID.erase(GEP);
+
+ auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
+ if (VecI == LargeOffsetGEPMap.end())
+ return;
+
+ auto &GEPVector = VecI->second;
+ const auto &I =
+ llvm::find_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
+ if (I == GEPVector.end())
+ return;
+
+ GEPVector.erase(I);
+ if (GEPVector.empty())
+ LargeOffsetGEPMap.erase(VecI);
+}
+
// Verify BFI has been updated correctly by recomputing BFI and comparing them.
void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
DominatorTree NewDT(F);
@@ -619,9 +661,10 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
// Use a temporary array to avoid iterator being invalidated when
// deleting blocks.
SmallVector<WeakTrackingVH, 16> Blocks;
- for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
+ for (auto &Block : llvm::drop_begin(F))
Blocks.push_back(&Block);
+ SmallSet<WeakTrackingVH, 16> Preds;
for (auto &Block : Blocks) {
auto *BB = cast_or_null<BasicBlock>(Block);
if (!BB)
@@ -640,8 +683,16 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
// Merge BB into SinglePred and delete it.
MergeBlockIntoPredecessor(BB);
+ Preds.insert(SinglePred);
}
}
+
+ // (Repeatedly) merging blocks into their predecessors can create redundant
+ // debug intrinsics.
+ for (auto &Pred : Preds)
+ if (auto *BB = cast_or_null<BasicBlock>(Pred))
+ RemoveRedundantDbgInstrs(BB);
+
return Changed;
}
@@ -686,7 +737,7 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
while (!LoopList.empty()) {
Loop *L = LoopList.pop_back_val();
- LoopList.insert(LoopList.end(), L->begin(), L->end());
+ llvm::append_range(LoopList, *L);
if (BasicBlock *Preheader = L->getLoopPreheader())
Preheaders.insert(Preheader);
}
@@ -696,7 +747,7 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
// as we remove them.
// Note that this intentionally skips the entry block.
SmallVector<WeakTrackingVH, 16> Blocks;
- for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
+ for (auto &Block : llvm::drop_begin(F))
Blocks.push_back(&Block);
for (auto &Block : Blocks) {
@@ -2011,7 +2062,14 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::assume: {
+ Value *Operand = II->getOperand(0);
II->eraseFromParent();
+ // Prune the operand, it's most likely dead.
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ RecursivelyDeleteTriviallyDeadInstructions(
+ Operand, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
+ });
return true;
}
@@ -2172,8 +2230,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
EVI = dyn_cast<ExtractValueInst>(V);
if (EVI) {
V = EVI->getOperand(0);
- if (!std::all_of(EVI->idx_begin(), EVI->idx_end(),
- [](unsigned idx) { return idx == 0; }))
+ if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
return false;
}
@@ -2192,13 +2249,12 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
// Skip over debug and the bitcast.
do {
++BI;
- } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI);
+ } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI ||
+ isa<PseudoProbeInst>(BI));
if (&*BI != RetI)
return false;
} else {
- BasicBlock::iterator BI = BB->begin();
- while (isa<DbgInfoIntrinsic>(BI)) ++BI;
- if (&*BI != RetI)
+ if (BB->getFirstNonPHIOrDbg(true) != RetI)
return false;
}
@@ -2223,18 +2279,12 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
if (!VisitedBBs.insert(*PI).second)
continue;
-
- BasicBlock::InstListType &InstList = (*PI)->getInstList();
- BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
- BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
- do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
- if (RI == RE)
- continue;
-
- CallInst *CI = dyn_cast<CallInst>(&*RI);
- if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
- attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCallBBs.push_back(*PI);
+ if (Instruction *I = (*PI)->rbegin()->getPrevNonDebugInstruction(true)) {
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
+ attributesPermitTailCall(F, CI, RetI, *TLI))
+ TailCallBBs.push_back(*PI);
+ }
}
}
@@ -2258,7 +2308,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
}
// If we eliminated all predecessors of the block, delete the block now.
- if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
+ if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
BB->eraseFromParent();
return Changed;
@@ -3109,9 +3159,7 @@ public:
/// \returns whether the element is actually removed, i.e. was in the
/// collection before the operation.
bool erase(PHINode *Ptr) {
- auto it = NodeMap.find(Ptr);
- if (it != NodeMap.end()) {
- NodeMap.erase(Ptr);
+ if (NodeMap.erase(Ptr)) {
SkipRemovedElements(FirstValidElement);
return true;
}
@@ -3666,8 +3714,7 @@ private:
PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
Map[Current] = PHI;
ST.insertNewPhi(PHI);
- for (Value *P : CurrentPhi->incoming_values())
- Worklist.push_back(P);
+ append_range(Worklist, CurrentPhi->incoming_values());
}
}
}
@@ -4289,7 +4336,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned SrcAS
= AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
- if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
return matchAddr(AddrInst->getOperand(0), Depth);
return false;
}
@@ -4921,8 +4968,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
- for (Value *IncValue : P->incoming_values())
- worklist.push_back(IncValue);
+ append_range(worklist, P->incoming_values());
PhiOrSelectSeen = true;
continue;
}
@@ -5236,20 +5282,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// If we have no uses, recursively delete the value and all dead instructions
// using it.
if (Repl->use_empty()) {
- // This can cause recursive deletion, which can invalidate our iterator.
- // Use a WeakTrackingVH to hold onto it in case this happens.
- Value *CurValue = &*CurInstIterator;
- WeakTrackingVH IterHandle(CurValue);
- BasicBlock *BB = CurInstIterator->getParent();
-
- RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
-
- if (IterHandle != CurValue) {
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
- }
+ resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
+ RecursivelyDeleteTriviallyDeadInstructions(
+ Repl, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
+ });
}
++NumMemoryInsts;
return true;
@@ -5270,92 +5307,112 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
///
/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
/// followed by a GEP with an all zeroes vector index. This will enable
-/// SelectionDAGBuilder to use a the scalar GEP as the uniform base and have a
+/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
/// zero index.
bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
Value *Ptr) {
- const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP || !GEP->hasIndices())
- return false;
+ Value *NewAddr;
- // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
- // FIXME: We should support this by sinking the GEP.
- if (MemoryInst->getParent() != GEP->getParent())
- return false;
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ // Don't optimize GEPs that don't have indices.
+ if (!GEP->hasIndices())
+ return false;
- SmallVector<Value *, 2> Ops(GEP->op_begin(), GEP->op_end());
+ // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
+ // FIXME: We should support this by sinking the GEP.
+ if (MemoryInst->getParent() != GEP->getParent())
+ return false;
- bool RewriteGEP = false;
+ SmallVector<Value *, 2> Ops(GEP->operands());
- if (Ops[0]->getType()->isVectorTy()) {
- Ops[0] = const_cast<Value *>(getSplatValue(Ops[0]));
- if (!Ops[0])
- return false;
- RewriteGEP = true;
- }
+ bool RewriteGEP = false;
- unsigned FinalIndex = Ops.size() - 1;
+ if (Ops[0]->getType()->isVectorTy()) {
+ Ops[0] = getSplatValue(Ops[0]);
+ if (!Ops[0])
+ return false;
+ RewriteGEP = true;
+ }
- // Ensure all but the last index is 0.
- // FIXME: This isn't strictly required. All that's required is that they are
- // all scalars or splats.
- for (unsigned i = 1; i < FinalIndex; ++i) {
- auto *C = dyn_cast<Constant>(Ops[i]);
- if (!C)
- return false;
- if (isa<VectorType>(C->getType()))
- C = C->getSplatValue();
- auto *CI = dyn_cast_or_null<ConstantInt>(C);
- if (!CI || !CI->isZero())
- return false;
- // Scalarize the index if needed.
- Ops[i] = CI;
- }
-
- // Try to scalarize the final index.
- if (Ops[FinalIndex]->getType()->isVectorTy()) {
- if (Value *V = const_cast<Value *>(getSplatValue(Ops[FinalIndex]))) {
- auto *C = dyn_cast<ConstantInt>(V);
- // Don't scalarize all zeros vector.
- if (!C || !C->isZero()) {
- Ops[FinalIndex] = V;
- RewriteGEP = true;
+ unsigned FinalIndex = Ops.size() - 1;
+
+ // Ensure all but the last index is 0.
+ // FIXME: This isn't strictly required. All that's required is that they are
+ // all scalars or splats.
+ for (unsigned i = 1; i < FinalIndex; ++i) {
+ auto *C = dyn_cast<Constant>(Ops[i]);
+ if (!C)
+ return false;
+ if (isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (!CI || !CI->isZero())
+ return false;
+ // Scalarize the index if needed.
+ Ops[i] = CI;
+ }
+
+ // Try to scalarize the final index.
+ if (Ops[FinalIndex]->getType()->isVectorTy()) {
+ if (Value *V = getSplatValue(Ops[FinalIndex])) {
+ auto *C = dyn_cast<ConstantInt>(V);
+ // Don't scalarize all zeros vector.
+ if (!C || !C->isZero()) {
+ Ops[FinalIndex] = V;
+ RewriteGEP = true;
+ }
}
}
- }
- // If we made any changes or the we have extra operands, we need to generate
- // new instructions.
- if (!RewriteGEP && Ops.size() == 2)
- return false;
-
- unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements();
+ // If we made any changes or the we have extra operands, we need to generate
+ // new instructions.
+ if (!RewriteGEP && Ops.size() == 2)
+ return false;
- IRBuilder<> Builder(MemoryInst);
+ auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
- Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
+ IRBuilder<> Builder(MemoryInst);
- Value *NewAddr;
+ Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
- // If the final index isn't a vector, emit a scalar GEP containing all ops
- // and a vector GEP with all zeroes final index.
- if (!Ops[FinalIndex]->getType()->isVectorTy()) {
- NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front());
- auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts);
- NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
- } else {
- Value *Base = Ops[0];
- Value *Index = Ops[FinalIndex];
+ // If the final index isn't a vector, emit a scalar GEP containing all ops
+ // and a vector GEP with all zeroes final index.
+ if (!Ops[FinalIndex]->getType()->isVectorTy()) {
+ NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front());
+ auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
+ NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
+ } else {
+ Value *Base = Ops[0];
+ Value *Index = Ops[FinalIndex];
+
+ // Create a scalar GEP if there are more than 2 operands.
+ if (Ops.size() != 2) {
+ // Replace the last index with 0.
+ Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
+ Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front());
+ }
- // Create a scalar GEP if there are more than 2 operands.
- if (Ops.size() != 2) {
- // Replace the last index with 0.
- Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
- Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front());
+ // Now create the GEP with scalar pointer and vector index.
+ NewAddr = Builder.CreateGEP(Base, Index);
}
+ } else if (!isa<Constant>(Ptr)) {
+ // Not a GEP, maybe its a splat and we can create a GEP to enable
+ // SelectionDAGBuilder to use it as a uniform base.
+ Value *V = getSplatValue(Ptr);
+ if (!V)
+ return false;
+
+ auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
- // Now create the GEP with scalar pointer and vector index.
- NewAddr = Builder.CreateGEP(Base, Index);
+ IRBuilder<> Builder(MemoryInst);
+
+ // Emit a vector GEP with a scalar pointer and all 0s vector index.
+ Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
+ auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
+ NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy));
+ } else {
+ // Constant, SelectionDAGBuilder knows to check if its a splat.
+ return false;
}
MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
@@ -5363,7 +5420,9 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
// If we have no uses, recursively delete the value and all dead instructions
// using it.
if (Ptr->use_empty())
- RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(
+ Ptr, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
return true;
}
@@ -5752,6 +5811,12 @@ bool CodeGenPrepare::optimizePhiType(
Visited.insert(I);
SmallPtrSet<Instruction *, 4> Defs;
SmallPtrSet<Instruction *, 4> Uses;
+ // This works by adding extra bitcasts between load/stores and removing
+ // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
+ // we can get in the situation where we remove a bitcast in one iteration
+ // just to add it again in the next. We need to ensure that at least one
+ // bitcast we remove are anchored to something that will not change back.
+ bool AnyAnchored = false;
while (!Worklist.empty()) {
Instruction *II = Worklist.pop_back_val();
@@ -5768,6 +5833,8 @@ bool CodeGenPrepare::optimizePhiType(
Worklist.push_back(OpPhi);
}
} else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
+ if (!OpLoad->isSimple())
+ return false;
if (!Defs.count(OpLoad)) {
Defs.insert(OpLoad);
Worklist.push_back(OpLoad);
@@ -5785,9 +5852,12 @@ bool CodeGenPrepare::optimizePhiType(
if (!Defs.count(OpBC)) {
Defs.insert(OpBC);
Worklist.push_back(OpBC);
+ AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
+ !isa<ExtractElementInst>(OpBC->getOperand(0));
}
- } else if (!isa<UndefValue>(V))
+ } else if (!isa<UndefValue>(V)) {
return false;
+ }
}
}
@@ -5802,7 +5872,7 @@ bool CodeGenPrepare::optimizePhiType(
Worklist.push_back(OpPhi);
}
} else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
- if (OpStore->getOperand(0) != II)
+ if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
return false;
Uses.insert(OpStore);
} else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
@@ -5811,12 +5881,15 @@ bool CodeGenPrepare::optimizePhiType(
if (OpBC->getType() != ConvertTy)
return false;
Uses.insert(OpBC);
- } else
+ AnyAnchored |=
+ any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
+ } else {
return false;
+ }
}
}
- if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
+ if (!ConvertTy || !AnyAnchored || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
return false;
LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
@@ -5827,11 +5900,13 @@ bool CodeGenPrepare::optimizePhiType(
ValueToValueMap ValMap;
ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy);
for (Instruction *D : Defs) {
- if (isa<BitCastInst>(D))
+ if (isa<BitCastInst>(D)) {
ValMap[D] = D->getOperand(0);
- else
+ DeletedInstrs.insert(D);
+ } else {
ValMap[D] =
new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
+ }
}
for (PHINode *Phi : PhiNodes)
ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
@@ -5842,15 +5917,17 @@ bool CodeGenPrepare::optimizePhiType(
for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
Phi->getIncomingBlock(i));
+ Visited.insert(NewPhi);
}
// And finally pipe up the stores and bitcasts
for (Instruction *U : Uses) {
if (isa<BitCastInst>(U)) {
DeletedInstrs.insert(U);
U->replaceAllUsesWith(ValMap[U->getOperand(0)]);
- } else
+ } else {
U->setOperand(0,
new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
+ }
}
// Save the removed phis to be deleted later.
@@ -6445,9 +6522,7 @@ bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
- // If branch conversion isn't desirable, exit early.
- if (DisableSelectToBranch || OptSize ||
- llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))
+ if (DisableSelectToBranch)
return false;
// Find all consecutive select instructions that share the same condition.
@@ -6483,7 +6558,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
SelectKind = TargetLowering::ScalarValSelect;
if (TLI->isSelectSupported(SelectKind) &&
- !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
+ (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
+ llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
return false;
// The DominatorTree needs to be rebuilt by any consumers after this
@@ -6621,6 +6697,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
/// in MVE takes a GPR (integer) register, and the instruction that incorporate
/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+ // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
m_Undef(), m_ZeroMask())))
return false;
@@ -6640,14 +6717,12 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
Builder.SetInsertPoint(SVI);
Value *BC1 = Builder.CreateBitCast(
cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
- Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1,
- (uint64_t)0);
- Value *Shuffle = Builder.CreateShuffleVector(
- Insert, UndefValue::get(NewVecType), SVI->getShuffleMask());
+ Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
SVI->replaceAllUsesWith(BC2);
- RecursivelyDeleteTriviallyDeadInstructions(SVI);
+ RecursivelyDeleteTriviallyDeadInstructions(
+ SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
// Also hoist the bitcast up to its operand if it they are not in the same
// block.
@@ -6920,10 +6995,10 @@ class VectorPromoteHelper {
if (UseSplat)
return ConstantVector::getSplat(EC, Val);
- if (!EC.Scalable) {
+ if (!EC.isScalable()) {
SmallVector<Constant *, 4> ConstVec;
UndefValue *UndefVal = UndefValue::get(Val->getType());
- for (unsigned Idx = 0; Idx != EC.Min; ++Idx) {
+ for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
if (Idx == ExtractIdx)
ConstVec.push_back(Val);
else
@@ -7604,11 +7679,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
/// Given an OR instruction, check to see if this is a bitreverse
/// idiom. If so, insert the new intrinsic and return true.
-static bool makeBitReverse(Instruction &I, const DataLayout &DL,
- const TargetLowering &TLI) {
+bool CodeGenPrepare::makeBitReverse(Instruction &I) {
if (!I.getType()->isIntegerTy() ||
- !TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
- TLI.getValueType(DL, I.getType(), true)))
+ !TLI->isOperationLegalOrCustom(ISD::BITREVERSE,
+ TLI->getValueType(*DL, I.getType(), true)))
return false;
SmallVector<Instruction*, 4> Insts;
@@ -7616,7 +7690,8 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL,
return false;
Instruction *LastInst = Insts.back();
I.replaceAllUsesWith(LastInst);
- RecursivelyDeleteTriviallyDeadInstructions(&I);
+ RecursivelyDeleteTriviallyDeadInstructions(
+ &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
return true;
}
@@ -7638,7 +7713,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
while (MadeBitReverse) {
MadeBitReverse = false;
for (auto &I : reverse(BB)) {
- if (makeBitReverse(I, *DL, *TLI)) {
+ if (makeBitReverse(I)) {
MadeBitReverse = MadeChange = true;
break;
}
@@ -7757,9 +7832,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
// %cond2 = icmp|fcmp|binary instruction ...
// %cond.or = or|and i1 %cond1, cond2
// br i1 %cond.or label %dest1, label %dest2"
- BinaryOperator *LogicOp;
+ Instruction *LogicOp;
BasicBlock *TBB, *FBB;
- if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
+ if (!match(BB.getTerminator(),
+ m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
continue;
auto *Br1 = cast<BranchInst>(BB.getTerminator());
@@ -7772,17 +7848,22 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
unsigned Opc;
Value *Cond1, *Cond2;
- if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
- m_OneUse(m_Value(Cond2)))))
+ if (match(LogicOp,
+ m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
Opc = Instruction::And;
- else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
- m_OneUse(m_Value(Cond2)))))
+ else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
+ m_OneUse(m_Value(Cond2)))))
Opc = Instruction::Or;
else
continue;
- if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
- !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
+ auto IsGoodCond = [](Value *Cond) {
+ return match(
+ Cond,
+ m_CombineOr(m_Cmp(), m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
+ m_LogicalOr(m_Value(), m_Value()))));
+ };
+ if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
continue;
LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index 12dadf97e02c..97c110afdda4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -58,6 +58,7 @@ CGOPT(bool, EnableNoInfsFPMath)
CGOPT(bool, EnableNoNaNsFPMath)
CGOPT(bool, EnableNoSignedZerosFPMath)
CGOPT(bool, EnableNoTrappingFPMath)
+CGOPT(bool, EnableAIXExtendedAltivecABI)
CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath)
CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math)
CGOPT(bool, EnableHonorSignDependentRoundingFPMath)
@@ -74,7 +75,12 @@ CGOPT(bool, UseCtors)
CGOPT(bool, RelaxELFRelocations)
CGOPT_EXP(bool, DataSections)
CGOPT_EXP(bool, FunctionSections)
+CGOPT(bool, IgnoreXCOFFVisibility)
+CGOPT(bool, XCOFFTracebackTable)
CGOPT(std::string, BBSections)
+CGOPT(std::string, StackProtectorGuard)
+CGOPT(unsigned, StackProtectorGuardOffset)
+CGOPT(std::string, StackProtectorGuardReg)
CGOPT(unsigned, TLSSize)
CGOPT(bool, EmulatedTLS)
CGOPT(bool, UniqueSectionNames)
@@ -84,7 +90,10 @@ CGOPT(DebuggerKind, DebuggerTuningOpt)
CGOPT(bool, EnableStackSizeSection)
CGOPT(bool, EnableAddrsig)
CGOPT(bool, EmitCallSiteInfo)
+CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
+CGOPT(bool, PseudoProbeForProfiling)
+CGOPT(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
@@ -276,6 +285,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(DontPlaceZerosInBSS);
+ static cl::opt<bool> EnableAIXExtendedAltivecABI(
+ "vec-extabi", cl::desc("Enable the AIX Extended Altivec ABI."),
+ cl::init(false));
+ CGBINDOPT(EnableAIXExtendedAltivecABI);
+
static cl::opt<bool> EnableGuaranteedTailCallOpt(
"tailcallopt",
cl::desc(
@@ -331,13 +345,40 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(FunctionSections);
+ static cl::opt<bool> IgnoreXCOFFVisibility(
+ "ignore-xcoff-visibility",
+ cl::desc("Not emit the visibility attribute for asm in AIX OS or give "
+ "all symbols 'unspecified' visibility in XCOFF object file"),
+ cl::init(false));
+ CGBINDOPT(IgnoreXCOFFVisibility);
+
+ static cl::opt<bool> XCOFFTracebackTable(
+ "xcoff-traceback-table", cl::desc("Emit the XCOFF traceback table"),
+ cl::init(true));
+ CGBINDOPT(XCOFFTracebackTable);
+
static cl::opt<std::string> BBSections(
- "basicblock-sections",
+ "basic-block-sections",
cl::desc("Emit basic blocks into separate sections"),
cl::value_desc("all | <function list (file)> | labels | none"),
cl::init("none"));
CGBINDOPT(BBSections);
+ static cl::opt<std::string> StackProtectorGuard(
+ "stack-protector-guard", cl::desc("Stack protector guard mode"),
+ cl::init("none"));
+ CGBINDOPT(StackProtectorGuard);
+
+ static cl::opt<std::string> StackProtectorGuardReg(
+ "stack-protector-guard-reg", cl::desc("Stack protector guard register"),
+ cl::init("none"));
+ CGBINDOPT(StackProtectorGuardReg);
+
+ static cl::opt<unsigned> StackProtectorGuardOffset(
+ "stack-protector-guard-offset", cl::desc("Stack protector guard offset"),
+ cl::init((unsigned)-1));
+ CGBINDOPT(StackProtectorGuardOffset);
+
static cl::opt<unsigned> TLSSize(
"tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0));
CGBINDOPT(TLSSize);
@@ -352,7 +393,7 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
CGBINDOPT(UniqueSectionNames);
static cl::opt<bool> UniqueBasicBlockSectionNames(
- "unique-bb-section-names",
+ "unique-basic-block-section-names",
cl::desc("Give unique names to every basic block section"),
cl::init(false));
CGBINDOPT(UniqueBasicBlockSectionNames);
@@ -400,6 +441,24 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableDebugEntryValues);
+ static cl::opt<bool> PseudoProbeForProfiling(
+ "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"),
+ cl::init(false));
+ CGBINDOPT(PseudoProbeForProfiling);
+
+ static cl::opt<bool> ValueTrackingVariableLocations(
+ "experimental-debug-variable-locations",
+ cl::desc("Use experimental new value-tracking variable locations"),
+ cl::init(false));
+ CGBINDOPT(ValueTrackingVariableLocations);
+
+ static cl::opt<bool> EnableMachineFunctionSplitter(
+ "split-machine-functions",
+ cl::desc("Split out cold basic blocks from machine functions based on "
+ "profile information"),
+ cl::init(false));
+ CGBINDOPT(EnableMachineFunctionSplitter);
+
static cl::opt<bool> ForceDwarfFrameSection(
"force-dwarf-frame-section",
cl::desc("Always emit a debug frame section."), cl::init(false));
@@ -436,9 +495,28 @@ codegen::getBBSectionsMode(llvm::TargetOptions &Options) {
}
}
+llvm::StackProtectorGuards
+codegen::getStackProtectorGuardMode(llvm::TargetOptions &Options) {
+ if (getStackProtectorGuard() == "tls")
+ return StackProtectorGuards::TLS;
+ if (getStackProtectorGuard() == "global")
+ return StackProtectorGuards::Global;
+ if (getStackProtectorGuard() != "none") {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(getStackProtectorGuard());
+ if (!MBOrErr)
+ errs() << "error illegal stack protector guard mode: "
+ << MBOrErr.getError().message() << "\n";
+ else
+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
+ }
+ return StackProtectorGuards::None;
+}
+
// Common utility function tightly tied to the options listed here. Initializes
// a TargetOptions object with CodeGen flags and returns it.
-TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() {
+TargetOptions
+codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
TargetOptions Options;
Options.AllowFPOpFusion = getFuseFPOps();
Options.UnsafeFPMath = getEnableUnsafeFPMath();
@@ -456,25 +534,35 @@ TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() {
getEnableHonorSignDependentRoundingFPMath();
if (getFloatABIForCalls() != FloatABI::Default)
Options.FloatABIType = getFloatABIForCalls();
+ Options.EnableAIXExtendedAltivecABI = getEnableAIXExtendedAltivecABI();
Options.NoZerosInBSS = getDontPlaceZerosInBSS();
Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
Options.StackAlignmentOverride = getOverrideStackAlignment();
Options.StackSymbolOrdering = getStackSymbolOrdering();
Options.UseInitArray = !getUseCtors();
Options.RelaxELFRelocations = getRelaxELFRelocations();
- Options.DataSections = getDataSections();
+ Options.DataSections =
+ getExplicitDataSections().getValueOr(TheTriple.hasDefaultDataSections());
Options.FunctionSections = getFunctionSections();
+ Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility();
+ Options.XCOFFTracebackTable = getXCOFFTracebackTable();
Options.BBSections = getBBSectionsMode(Options);
Options.UniqueSectionNames = getUniqueSectionNames();
Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames();
+ Options.StackProtectorGuard = getStackProtectorGuardMode(Options);
+ Options.StackProtectorGuardOffset = getStackProtectorGuardOffset();
+ Options.StackProtectorGuardReg = getStackProtectorGuardReg();
Options.TLSSize = getTLSSize();
Options.EmulatedTLS = getEmulatedTLS();
Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0;
Options.ExceptionModel = getExceptionModel();
Options.EmitStackSizeSection = getEnableStackSizeSection();
+ Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter();
Options.EmitAddrsig = getEnableAddrsig();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
+ Options.PseudoProbeForProfiling = getPseudoProbeForProfiling();
+ Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index d1529b08f708..93467e9d09b8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -48,6 +49,8 @@ namespace {
private:
bool isDead(const MachineInstr *MI) const;
+
+ bool eliminateDeadMI(MachineFunction &MF);
};
}
char DeadMachineInstructionElim::ID = 0;
@@ -107,7 +110,13 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ bool AnyChanges = eliminateDeadMI(MF);
+ while (AnyChanges && eliminateDeadMI(MF))
+ ;
+ return AnyChanges;
+}
+bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
bool AnyChanges = false;
MRI = &MF.getRegInfo();
TRI = MF.getSubtarget().getRegisterInfo();
@@ -116,22 +125,24 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
// be cleaned up.
- for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) {
+ for (MachineBasicBlock *MBB : post_order(&MF)) {
// Start out assuming that reserved registers are live out of this block.
LivePhysRegs = MRI->getReservedRegs();
// Add live-ins from successors to LivePhysRegs. Normally, physregs are not
// live across blocks, but some targets (x86) can have flags live out of a
// block.
- for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
- E = MBB.succ_end(); S != E; S++)
+ for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
+ E = MBB->succ_end();
+ S != E; S++)
for (const auto &LI : (*S)->liveins())
LivePhysRegs.set(LI.PhysReg);
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
- for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(),
- MIE = MBB.rend(); MII != MIE; ) {
+ for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+ MIE = MBB->rend();
+ MII != MIE;) {
MachineInstr *MI = &*MII++;
// If the instruction is dead, delete it!
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 6d5306c1dc0c..03fe5f155291 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -25,11 +25,7 @@
//
//===----------------------------------------------------------------------===//
-#include <deque>
-#include <vector>
-
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -40,6 +36,7 @@
#include "llvm/PassRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <deque>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index c75c957bff8a..97e0162f35a1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -44,67 +45,44 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered");
namespace {
- class DwarfEHPrepare : public FunctionPass {
- // RewindFunction - _Unwind_Resume or the target equivalent.
- FunctionCallee RewindFunction = nullptr;
+class DwarfEHPrepare {
+ CodeGenOpt::Level OptLevel;
- CodeGenOpt::Level OptLevel;
- DominatorTree *DT = nullptr;
- const TargetLowering *TLI = nullptr;
-
- bool InsertUnwindResumeCalls(Function &Fn);
- Value *GetExceptionObject(ResumeInst *RI);
- size_t
- pruneUnreachableResumes(Function &Fn,
- SmallVectorImpl<ResumeInst *> &Resumes,
- SmallVectorImpl<LandingPadInst *> &CleanupLPads);
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ FunctionCallee &RewindFunction;
- public:
- static char ID; // Pass identification, replacement for typeid.
-
- DwarfEHPrepare(CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
- : FunctionPass(ID), OptLevel(OptLevel) {}
+ Function &F;
+ const TargetLowering &TLI;
+ DomTreeUpdater *DTU;
+ const TargetTransformInfo *TTI;
- bool runOnFunction(Function &Fn) override;
+ /// Return the exception object from the value passed into
+ /// the 'resume' instruction (typically an aggregate). Clean up any dead
+ /// instructions, including the 'resume' instruction.
+ Value *GetExceptionObject(ResumeInst *RI);
- bool doFinalization(Module &M) override {
- RewindFunction = nullptr;
- return false;
- }
+ /// Replace resumes that are not reachable from a cleanup landing pad with
+ /// unreachable and then simplify those blocks.
+ size_t
+ pruneUnreachableResumes(SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads);
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ /// Convert the ResumeInsts that are still present
+ /// into calls to the appropriate _Unwind_Resume function.
+ bool InsertUnwindResumeCalls();
- StringRef getPassName() const override {
- return "Exception handling preparation";
- }
- };
+public:
+ DwarfEHPrepare(CodeGenOpt::Level OptLevel_, FunctionCallee &RewindFunction_,
+ Function &F_, const TargetLowering &TLI_, DomTreeUpdater *DTU_,
+ const TargetTransformInfo *TTI_)
+ : OptLevel(OptLevel_), RewindFunction(RewindFunction_), F(F_), TLI(TLI_),
+ DTU(DTU_), TTI(TTI_) {}
-} // end anonymous namespace
+ bool run();
+};
-char DwarfEHPrepare::ID = 0;
+} // namespace
-INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE,
- "Prepare DWARF exceptions", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE,
- "Prepare DWARF exceptions", false, false)
-
-FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) {
- return new DwarfEHPrepare(OptLevel);
-}
-
-void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetPassConfig>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- if (OptLevel != CodeGenOpt::None)
- AU.addRequired<DominatorTreeWrapperPass>();
-}
-
-/// GetExceptionObject - Return the exception object from the value passed into
-/// the 'resume' instruction (typically an aggregate). Clean up any dead
-/// instructions, including the 'resume' instruction.
Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
Value *V = RI->getOperand(0);
Value *ExnObj = nullptr;
@@ -142,16 +120,16 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
return ExnObj;
}
-/// Replace resumes that are not reachable from a cleanup landing pad with
-/// unreachable and then simplify those blocks.
size_t DwarfEHPrepare::pruneUnreachableResumes(
- Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<ResumeInst *> &Resumes,
SmallVectorImpl<LandingPadInst *> &CleanupLPads) {
+ assert(DTU && "Should have DomTreeUpdater here.");
+
BitVector ResumeReachable(Resumes.size());
size_t ResumeIndex = 0;
for (auto *RI : Resumes) {
for (auto *LP : CleanupLPads) {
- if (isPotentiallyReachable(LP, RI, nullptr, DT)) {
+ if (isPotentiallyReachable(LP, RI, nullptr, &DTU->getDomTree())) {
ResumeReachable.set(ResumeIndex);
break;
}
@@ -163,9 +141,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
if (ResumeReachable.all())
return Resumes.size();
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
- LLVMContext &Ctx = Fn.getContext();
+ LLVMContext &Ctx = F.getContext();
// Otherwise, insert unreachable instructions and call simplifycfg.
size_t ResumesLeft = 0;
@@ -177,19 +153,17 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
BasicBlock *BB = RI->getParent();
new UnreachableInst(Ctx, RI);
RI->eraseFromParent();
- simplifyCFG(BB, TTI);
+ simplifyCFG(BB, *TTI, RequireAndPreserveDomTree ? DTU : nullptr);
}
}
Resumes.resize(ResumesLeft);
return ResumesLeft;
}
-/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
-/// into calls to the appropriate _Unwind_Resume function.
-bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
- SmallVector<ResumeInst*, 16> Resumes;
- SmallVector<LandingPadInst*, 16> CleanupLPads;
- for (BasicBlock &BB : Fn) {
+bool DwarfEHPrepare::InsertUnwindResumeCalls() {
+ SmallVector<ResumeInst *, 16> Resumes;
+ SmallVector<LandingPadInst *, 16> CleanupLPads;
+ for (BasicBlock &BB : F) {
if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
Resumes.push_back(RI);
if (auto *LP = BB.getLandingPadInst())
@@ -201,25 +175,25 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
return false;
// Check the personality, don't do anything if it's scope-based.
- EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn());
+ EHPersonality Pers = classifyEHPersonality(F.getPersonalityFn());
if (isScopedEHPersonality(Pers))
return false;
- LLVMContext &Ctx = Fn.getContext();
+ LLVMContext &Ctx = F.getContext();
size_t ResumesLeft = Resumes.size();
if (OptLevel != CodeGenOpt::None)
- ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
+ ResumesLeft = pruneUnreachableResumes(Resumes, CleanupLPads);
if (ResumesLeft == 0)
return true; // We pruned them all.
// Find the rewind function if we didn't already.
if (!RewindFunction) {
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
- Type::getInt8PtrTy(Ctx), false);
- const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
- RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
+ FunctionType *FTy =
+ FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false);
+ const char *RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);
}
// Create the basic block where the _Unwind_Resume call will live.
@@ -232,22 +206,27 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
// Call the _Unwind_Resume function.
CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
- CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
// We never expect _Unwind_Resume to return.
+ CI->setDoesNotReturn();
new UnreachableInst(Ctx, UnwindBB);
return true;
}
- BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
- PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft,
- "exn.obj", UnwindBB);
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(Resumes.size());
+
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj",
+ UnwindBB);
// Extract the exception object from the ResumeInst and add it to the PHI node
// that feeds the _Unwind_Resume call.
for (ResumeInst *RI : Resumes) {
BasicBlock *Parent = RI->getParent();
BranchInst::Create(UnwindBB, Parent);
+ Updates.push_back({DominatorTree::Insert, Parent, UnwindBB});
Value *ExnObj = GetExceptionObject(RI);
PN->addIncoming(ExnObj, Parent);
@@ -257,21 +236,100 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
// Call the function.
CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
- CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
// We never expect _Unwind_Resume to return.
+ CI->setDoesNotReturn();
new UnreachableInst(Ctx, UnwindBB);
+
+ if (DTU && RequireAndPreserveDomTree)
+ DTU->applyUpdates(Updates);
+
return true;
}
-bool DwarfEHPrepare::runOnFunction(Function &Fn) {
- const TargetMachine &TM =
- getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
- DT = OptLevel != CodeGenOpt::None
- ? &getAnalysis<DominatorTreeWrapperPass>().getDomTree() : nullptr;
- TLI = TM.getSubtargetImpl(Fn)->getTargetLowering();
- bool Changed = InsertUnwindResumeCalls(Fn);
- DT = nullptr;
- TLI = nullptr;
+bool DwarfEHPrepare::run() {
+ assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) ||
+ (DTU &&
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
+ "Original domtree is invalid?");
+
+ bool Changed = InsertUnwindResumeCalls();
+
+ assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) ||
+ (DTU &&
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
+ "Original domtree is invalid?");
+
return Changed;
}
+
+static bool prepareDwarfEH(CodeGenOpt::Level OptLevel,
+ FunctionCallee &RewindFunction, Function &F,
+ const TargetLowering &TLI, DominatorTree *DT,
+ const TargetTransformInfo *TTI) {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+
+ return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr,
+ TTI)
+ .run();
+}
+
+namespace {
+
+class DwarfEHPrepareLegacyPass : public FunctionPass {
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ FunctionCallee RewindFunction = nullptr;
+
+ CodeGenOpt::Level OptLevel;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ DwarfEHPrepareLegacyPass(CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
+ : FunctionPass(ID), OptLevel(OptLevel) {}
+
+ bool runOnFunction(Function &F) override {
+ const TargetMachine &TM =
+ getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ const TargetLowering &TLI = *TM.getSubtargetImpl(F)->getTargetLowering();
+ DominatorTree *DT = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
+ if (OptLevel != CodeGenOpt::None) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ }
+ return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (OptLevel != CodeGenOpt::None) {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (RequireAndPreserveDomTree)
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+ }
+
+ StringRef getPassName() const override {
+ return "Exception handling preparation";
+ }
+};
+
+} // end anonymous namespace
+
+char DwarfEHPrepareLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DwarfEHPrepareLegacyPass, DEBUG_TYPE,
+ "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(DwarfEHPrepareLegacyPass, DEBUG_TYPE,
+ "Prepare DWARF exceptions", false, false)
+
+FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) {
+ return new DwarfEHPrepareLegacyPass(OptLevel);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 96d4efb856c1..cf7d93d6a33a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
@@ -264,7 +265,8 @@ bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {
// Remember clobbered regunits.
if (MO.isDef() && Register::isPhysicalRegister(Reg))
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units)
ClobberedRegUnits.set(*Units);
if (!MO.readsReg() || !Register::isVirtualRegister(Reg))
@@ -363,7 +365,7 @@ bool SSAIfConv::findInsertionPoint() {
// Keep track of live regunits before the current position.
// Only track RegUnits that are also in ClobberedRegUnits.
LiveRegUnits.clear();
- SmallVector<unsigned, 8> Reads;
+ SmallVector<MCRegister, 8> Reads;
MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
MachineBasicBlock::iterator I = Head->end();
MachineBasicBlock::iterator B = Head->begin();
@@ -385,11 +387,12 @@ bool SSAIfConv::findInsertionPoint() {
continue;
// I clobbers Reg, so it isn't live before I.
if (MO.isDef())
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units)
LiveRegUnits.erase(*Units);
// Unless I reads Reg.
if (MO.readsReg())
- Reads.push_back(Reg);
+ Reads.push_back(Reg.asMCReg());
}
// Anything read by I is live before I.
while (!Reads.empty())
@@ -794,6 +797,17 @@ static unsigned adjCycles(unsigned Cyc, int Delta) {
return Cyc + Delta;
}
+namespace {
+/// Helper class to simplify emission of cycle counts into optimization remarks.
+struct Cycles {
+ const char *Key;
+ unsigned Value;
+};
+template <typename Remark> Remark &operator<<(Remark &R, Cycles C) {
+ return R << ore::NV(C.Key, C.Value) << (C.Value == 1 ? " cycle" : " cycles");
+}
+} // anonymous namespace
+
/// Apply cost model and heuristics to the if-conversion in IfConv.
/// Return true if the conversion is a good idea.
///
@@ -814,6 +828,9 @@ bool EarlyIfConverter::shouldConvertIf() {
// Set a somewhat arbitrary limit on the critical path extension we accept.
unsigned CritLimit = SchedModel.MispredictPenalty/2;
+ MachineBasicBlock &MBB = *IfConv.Head;
+ MachineOptimizationRemarkEmitter MORE(*MBB.getParent(), nullptr);
+
// If-conversion only makes sense when there is unexploited ILP. Compute the
// maximum-ILP resource length of the trace after if-conversion. Compare it
// to the shortest critical path.
@@ -825,6 +842,17 @@ bool EarlyIfConverter::shouldConvertIf() {
<< ", minimal critical path " << MinCrit << '\n');
if (ResLength > MinCrit + CritLimit) {
LLVM_DEBUG(dbgs() << "Not enough available ILP.\n");
+ MORE.emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",
+ MBB.findDebugLoc(MBB.back()), &MBB);
+ R << "did not if-convert branch: the resulting critical path ("
+ << Cycles{"ResLength", ResLength}
+ << ") would extend the shorter leg's critical path ("
+ << Cycles{"MinCrit", MinCrit} << ") by more than the threshold of "
+ << Cycles{"CritLimit", CritLimit}
+ << ", which cannot be hidden by available ILP.";
+ return R;
+ });
return false;
}
@@ -839,6 +867,14 @@ bool EarlyIfConverter::shouldConvertIf() {
// Look at all the tail phis, and compute the critical path extension caused
// by inserting select instructions.
MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ struct CriticalPathInfo {
+ unsigned Extra; // Count of extra cycles that the component adds.
+ unsigned Depth; // Absolute depth of the component in cycles.
+ };
+ CriticalPathInfo Cond{};
+ CriticalPathInfo TBlock{};
+ CriticalPathInfo FBlock{};
+ bool ShouldConvert = true;
for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);
@@ -850,9 +886,11 @@ bool EarlyIfConverter::shouldConvertIf() {
if (CondDepth > MaxDepth) {
unsigned Extra = CondDepth - MaxDepth;
LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > Cond.Extra)
+ Cond = {Extra, CondDepth};
if (Extra > CritLimit) {
LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
- return false;
+ ShouldConvert = false;
}
}
@@ -861,9 +899,11 @@ bool EarlyIfConverter::shouldConvertIf() {
if (TDepth > MaxDepth) {
unsigned Extra = TDepth - MaxDepth;
LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > TBlock.Extra)
+ TBlock = {Extra, TDepth};
if (Extra > CritLimit) {
LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
- return false;
+ ShouldConvert = false;
}
}
@@ -872,13 +912,63 @@ bool EarlyIfConverter::shouldConvertIf() {
if (FDepth > MaxDepth) {
unsigned Extra = FDepth - MaxDepth;
LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > FBlock.Extra)
+ FBlock = {Extra, FDepth};
if (Extra > CritLimit) {
LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
- return false;
+ ShouldConvert = false;
}
}
}
- return true;
+
+ // Organize by "short" and "long" legs, since the diagnostics get confusing
+ // when referring to the "true" and "false" sides of the branch, given that
+ // those don't always correlate with what the user wrote in source-terms.
+ const CriticalPathInfo Short = TBlock.Extra > FBlock.Extra ? FBlock : TBlock;
+ const CriticalPathInfo Long = TBlock.Extra > FBlock.Extra ? TBlock : FBlock;
+
+ if (ShouldConvert) {
+ MORE.emit([&]() {
+ MachineOptimizationRemark R(DEBUG_TYPE, "IfConversion",
+ MBB.back().getDebugLoc(), &MBB);
+ R << "performing if-conversion on branch: the condition adds "
+ << Cycles{"CondCycles", Cond.Extra} << " to the critical path";
+ if (Short.Extra > 0)
+ R << ", and the short leg adds another "
+ << Cycles{"ShortCycles", Short.Extra};
+ if (Long.Extra > 0)
+ R << ", and the long leg adds another "
+ << Cycles{"LongCycles", Long.Extra};
+ R << ", each staying under the threshold of "
+ << Cycles{"CritLimit", CritLimit} << ".";
+ return R;
+ });
+ } else {
+ MORE.emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",
+ MBB.back().getDebugLoc(), &MBB);
+ R << "did not if-convert branch: the condition would add "
+ << Cycles{"CondCycles", Cond.Extra} << " to the critical path";
+ if (Cond.Extra > CritLimit)
+ R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
+ if (Short.Extra > 0) {
+ R << ", and the short leg would add another "
+ << Cycles{"ShortCycles", Short.Extra};
+ if (Short.Extra > CritLimit)
+ R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
+ }
+ if (Long.Extra > 0) {
+ R << ", and the long leg would add another "
+ << Cycles{"LongCycles", Long.Extra};
+ if (Long.Extra > CritLimit)
+ R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
+ }
+ R << ".";
+ return R;
+ });
+ }
+
+ return ShouldConvert;
}
/// Attempt repeated if-conversion on MBB, return true if successful.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
index 45f21c1085dd..a4c9f02dc64d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This pass implements IR expansion for reduction intrinsics, allowing targets
-// to enable the experimental intrinsics until just before codegen.
+// to enable the intrinsics until just before codegen.
//
//===----------------------------------------------------------------------===//
@@ -30,49 +30,49 @@ namespace {
unsigned getOpcode(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::vector_reduce_fadd:
return Instruction::FAdd;
- case Intrinsic::experimental_vector_reduce_v2_fmul:
+ case Intrinsic::vector_reduce_fmul:
return Instruction::FMul;
- case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::vector_reduce_add:
return Instruction::Add;
- case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::vector_reduce_mul:
return Instruction::Mul;
- case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::vector_reduce_and:
return Instruction::And;
- case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::vector_reduce_or:
return Instruction::Or;
- case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::vector_reduce_xor:
return Instruction::Xor;
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
return Instruction::ICmp;
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
return Instruction::FCmp;
default:
llvm_unreachable("Unexpected ID");
}
}
-RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
+RecurKind getRK(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::experimental_vector_reduce_smax:
- return RecurrenceDescriptor::MRK_SIntMax;
- case Intrinsic::experimental_vector_reduce_smin:
- return RecurrenceDescriptor::MRK_SIntMin;
- case Intrinsic::experimental_vector_reduce_umax:
- return RecurrenceDescriptor::MRK_UIntMax;
- case Intrinsic::experimental_vector_reduce_umin:
- return RecurrenceDescriptor::MRK_UIntMin;
- case Intrinsic::experimental_vector_reduce_fmax:
- return RecurrenceDescriptor::MRK_FloatMax;
- case Intrinsic::experimental_vector_reduce_fmin:
- return RecurrenceDescriptor::MRK_FloatMin;
+ case Intrinsic::vector_reduce_smax:
+ return RecurKind::SMax;
+ case Intrinsic::vector_reduce_smin:
+ return RecurKind::SMin;
+ case Intrinsic::vector_reduce_umax:
+ return RecurKind::UMax;
+ case Intrinsic::vector_reduce_umin:
+ return RecurKind::UMin;
+ case Intrinsic::vector_reduce_fmax:
+ return RecurKind::FMax;
+ case Intrinsic::vector_reduce_fmin:
+ return RecurKind::FMin;
default:
- return RecurrenceDescriptor::MRK_Invalid;
+ return RecurKind::None;
}
}
@@ -83,19 +83,19 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
default: break;
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
if (TTI->shouldExpandReduction(II))
Worklist.push_back(II);
@@ -108,7 +108,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
FastMathFlags FMF =
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
Intrinsic::ID ID = II->getIntrinsicID();
- RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
+ RecurKind RK = getRK(ID);
Value *Rdx = nullptr;
IRBuilder<> Builder(II);
@@ -116,42 +116,54 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
Builder.setFastMathFlags(FMF);
switch (ID) {
default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul: {
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
// FMFs must be attached to the call, otherwise it's an ordered reduction
// and it can't be handled by generating a shuffle sequence.
Value *Acc = II->getArgOperand(0);
Value *Vec = II->getArgOperand(1);
if (!FMF.allowReassoc())
- Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
+ Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK);
else {
if (!isPowerOf2_32(
cast<FixedVectorType>(Vec->getType())->getNumElements()))
continue;
- Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
Acc, Rdx, "bin.rdx");
}
break;
}
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin: {
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin: {
Value *Vec = II->getArgOperand(0);
if (!isPowerOf2_32(
cast<FixedVectorType>(Vec->getType())->getNumElements()))
continue;
- Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ break;
+ }
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin: {
+ // FIXME: We only expand 'fast' reductions here because the underlying
+ // code in createMinMaxOp() assumes that comparisons use 'fast'
+ // semantics.
+ Value *Vec = II->getArgOperand(0);
+ if (!isPowerOf2_32(
+ cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
+ !FMF.isFast())
+ continue;
+
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index 27319804049d..f8f99b7e87f2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -46,6 +46,20 @@ static cl::opt<bool> FixupSCSExtendSlotSize(
cl::desc("Allow spill in spill slot of greater size than register size"),
cl::Hidden);
+static cl::opt<bool> PassGCPtrInCSR(
+ "fixup-allow-gcptr-in-csr", cl::Hidden, cl::init(false),
+ cl::desc("Allow passing GC Pointer arguments in callee saved registers"));
+
+static cl::opt<bool> EnableCopyProp(
+ "fixup-scs-enable-copy-propagation", cl::Hidden, cl::init(true),
+ cl::desc("Enable simple copy propagation during register reloading"));
+
+// This is purely debugging option.
+// It may be handy for investigating statepoint spilling issues.
+static cl::opt<unsigned> MaxStatepointsWithRegs(
+ "fixup-max-csr-statepoints", cl::Hidden,
+ cl::desc("Max number of statepoints allowed to pass GC Ptrs in registers"));
+
namespace {
class FixupStatepointCallerSaved : public MachineFunctionPass {
@@ -67,6 +81,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
+
} // End anonymous namespace.
char FixupStatepointCallerSaved::ID = 0;
@@ -83,7 +98,101 @@ static unsigned getRegisterSize(const TargetRegisterInfo &TRI, Register Reg) {
return TRI.getSpillSize(*RC);
}
+// Try to eliminate redundant copy to register which we're going to
+// spill, i.e. try to change:
+// X = COPY Y
+// SPILL X
+// to
+// SPILL Y
+// If there are no uses of X between copy and STATEPOINT, that COPY
+// may be eliminated.
+// Reg - register we're about to spill
+// RI - On entry points to statepoint.
+// On successful copy propagation set to new spill point.
+// IsKill - set to true if COPY is Kill (there are no uses of Y)
+// Returns either found source copy register or original one.
+static Register performCopyPropagation(Register Reg,
+ MachineBasicBlock::iterator &RI,
+ bool &IsKill, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) {
+ // First check if statepoint itself uses Reg in non-meta operands.
+ int Idx = RI->findRegisterUseOperandIdx(Reg, false, &TRI);
+ if (Idx >= 0 && (unsigned)Idx < StatepointOpers(&*RI).getNumDeoptArgsIdx()) {
+ IsKill = false;
+ return Reg;
+ }
+
+ if (!EnableCopyProp)
+ return Reg;
+
+ MachineBasicBlock *MBB = RI->getParent();
+ MachineBasicBlock::reverse_iterator E = MBB->rend();
+ MachineInstr *Def = nullptr, *Use = nullptr;
+ for (auto It = ++(RI.getReverse()); It != E; ++It) {
+ if (It->readsRegister(Reg, &TRI) && !Use)
+ Use = &*It;
+ if (It->modifiesRegister(Reg, &TRI)) {
+ Def = &*It;
+ break;
+ }
+ }
+
+ if (!Def)
+ return Reg;
+
+ auto DestSrc = TII.isCopyInstr(*Def);
+ if (!DestSrc || DestSrc->Destination->getReg() != Reg)
+ return Reg;
+
+ Register SrcReg = DestSrc->Source->getReg();
+
+ if (getRegisterSize(TRI, Reg) != getRegisterSize(TRI, SrcReg))
+ return Reg;
+
+ LLVM_DEBUG(dbgs() << "spillRegisters: perform copy propagation "
+ << printReg(Reg, &TRI) << " -> " << printReg(SrcReg, &TRI)
+ << "\n");
+
+ // Insert spill immediately after Def
+ RI = ++MachineBasicBlock::iterator(Def);
+ IsKill = DestSrc->Source->isKill();
+
+ // There are no uses of original register between COPY and STATEPOINT.
+ // There can't be any after STATEPOINT, so we can eliminate Def.
+ if (!Use) {
+ LLVM_DEBUG(dbgs() << "spillRegisters: removing dead copy " << *Def);
+ Def->eraseFromParent();
+ }
+ return SrcReg;
+}
+
namespace {
+// Pair {Register, FrameIndex}
+using RegSlotPair = std::pair<Register, int>;
+
+// Keeps track of what reloads were inserted in MBB.
+class RegReloadCache {
+ using ReloadSet = SmallSet<RegSlotPair, 8>;
+ DenseMap<const MachineBasicBlock *, ReloadSet> Reloads;
+
+public:
+ RegReloadCache() = default;
+
+ // Record reload of Reg from FI in block MBB
+ void recordReload(Register Reg, int FI, const MachineBasicBlock *MBB) {
+ RegSlotPair RSP(Reg, FI);
+ auto Res = Reloads[MBB].insert(RSP);
+ (void)Res;
+ assert(Res.second && "reload already exists");
+ }
+
+ // Does basic block MBB contains reload of Reg from FI?
+ bool hasReload(Register Reg, int FI, const MachineBasicBlock *MBB) {
+ RegSlotPair RSP(Reg, FI);
+ return Reloads.count(MBB) && Reloads[MBB].count(RSP);
+ }
+};
+
// Cache used frame indexes during statepoint re-write to re-use them in
// processing next statepoint instruction.
// Two strategies. One is to preserve the size of spill slot while another one
@@ -105,24 +214,62 @@ private:
// size will be increased.
DenseMap<unsigned, FrameIndexesPerSize> Cache;
+ // Keeps track of slots reserved for the shared landing pad processing.
+ // Initialized from GlobalIndices for the current EHPad.
+ SmallSet<int, 8> ReservedSlots;
+
+ // Landing pad can be destination of several statepoints. Every register
+ // defined by such statepoints must be spilled to the same stack slot.
+ // This map keeps that information.
+ DenseMap<const MachineBasicBlock *, SmallVector<RegSlotPair, 8>>
+ GlobalIndices;
+
+ FrameIndexesPerSize &getCacheBucket(unsigned Size) {
+ // In FixupSCSExtendSlotSize mode the bucket with 0 index is used
+ // for all sizes.
+ return Cache[FixupSCSExtendSlotSize ? 0 : Size];
+ }
+
public:
FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI)
: MFI(MFI), TRI(TRI) {}
// Reset the current state of used frame indexes. After invocation of
- // this function all frame indexes are available for allocation.
- void reset() {
+ // this function all frame indexes are available for allocation with
+ // the exception of slots reserved for landing pad processing (if any).
+ void reset(const MachineBasicBlock *EHPad) {
for (auto &It : Cache)
It.second.Index = 0;
+
+ ReservedSlots.clear();
+ if (EHPad && GlobalIndices.count(EHPad))
+ for (auto &RSP : GlobalIndices[EHPad])
+ ReservedSlots.insert(RSP.second);
}
+
// Get frame index to spill the register.
- int getFrameIndex(Register Reg) {
+ int getFrameIndex(Register Reg, MachineBasicBlock *EHPad) {
+ // Check if slot for Reg is already reserved at EHPad.
+ auto It = GlobalIndices.find(EHPad);
+ if (It != GlobalIndices.end()) {
+ auto &Vec = It->second;
+ auto Idx = llvm::find_if(
+ Vec, [Reg](RegSlotPair &RSP) { return Reg == RSP.first; });
+ if (Idx != Vec.end()) {
+ int FI = Idx->second;
+ LLVM_DEBUG(dbgs() << "Found global FI " << FI << " for register "
+ << printReg(Reg, &TRI) << " at "
+ << printMBBReference(*EHPad) << "\n");
+ assert(ReservedSlots.count(FI) && "using unreserved slot");
+ return FI;
+ }
+ }
+
unsigned Size = getRegisterSize(TRI, Reg);
- // In FixupSCSExtendSlotSize mode the bucket with 0 index is used
- // for all sizes.
- unsigned Bucket = FixupSCSExtendSlotSize ? 0 : Size;
- FrameIndexesPerSize &Line = Cache[Bucket];
- if (Line.Index < Line.Slots.size()) {
+ FrameIndexesPerSize &Line = getCacheBucket(Size);
+ while (Line.Index < Line.Slots.size()) {
int FI = Line.Slots[Line.Index++];
+ if (ReservedSlots.count(FI))
+ continue;
// If all sizes are kept together we probably need to extend the
// spill slot size.
if (MFI.getObjectSize(FI) < Size) {
@@ -136,15 +283,25 @@ public:
NumSpillSlotsAllocated++;
Line.Slots.push_back(FI);
++Line.Index;
+
+ // Remember assignment {Reg, FI} for EHPad
+ if (EHPad) {
+ GlobalIndices[EHPad].push_back(std::make_pair(Reg, FI));
+ LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling reg "
+ << printReg(Reg, &TRI) << " at landing pad "
+ << printMBBReference(*EHPad) << "\n");
+ }
+
return FI;
}
+
// Sort all registers to spill in descendent order. In the
// FixupSCSExtendSlotSize mode it will minimize the total frame size.
// In non FixupSCSExtendSlotSize mode we can skip this step.
void sortRegisters(SmallVectorImpl<Register> &Regs) {
if (!FixupSCSExtendSlotSize)
return;
- llvm::sort(Regs.begin(), Regs.end(), [&](Register &A, Register &B) {
+ llvm::sort(Regs, [&](Register &A, Register &B) {
return getRegisterSize(TRI, A) > getRegisterSize(TRI, B);
});
}
@@ -156,6 +313,8 @@ private:
// statepoint instruction.
MachineInstr &MI;
MachineFunction &MF;
+ // If non-null then statepoint is invoke, and this points to the landing pad.
+ MachineBasicBlock *EHPad;
const TargetRegisterInfo &TRI;
const TargetInstrInfo &TII;
MachineFrameInfo &MFI;
@@ -163,36 +322,77 @@ private:
const uint32_t *Mask;
// Cache of frame indexes used on previous instruction processing.
FrameIndexesCache &CacheFI;
+ bool AllowGCPtrInCSR;
// Operands with physical registers requiring spilling.
SmallVector<unsigned, 8> OpsToSpill;
// Set of register to spill.
SmallVector<Register, 8> RegsToSpill;
+ // Set of registers to reload after statepoint.
+ SmallVector<Register, 8> RegsToReload;
// Map Register to Frame Slot index.
DenseMap<Register, int> RegToSlotIdx;
public:
StatepointState(MachineInstr &MI, const uint32_t *Mask,
- FrameIndexesCache &CacheFI)
+ FrameIndexesCache &CacheFI, bool AllowGCPtrInCSR)
: MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()),
TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()),
- Mask(Mask), CacheFI(CacheFI) {}
+ Mask(Mask), CacheFI(CacheFI), AllowGCPtrInCSR(AllowGCPtrInCSR) {
+
+ // Find statepoint's landing pad, if any.
+ EHPad = nullptr;
+ MachineBasicBlock *MBB = MI.getParent();
+ // Invoke statepoint must be last one in block.
+ bool Last = std::none_of(++MI.getIterator(), MBB->end().getInstrIterator(),
+ [](MachineInstr &I) {
+ return I.getOpcode() == TargetOpcode::STATEPOINT;
+ });
+
+ if (!Last)
+ return;
+
+ auto IsEHPad = [](MachineBasicBlock *B) { return B->isEHPad(); };
+
+ assert(llvm::count_if(MBB->successors(), IsEHPad) < 2 && "multiple EHPads");
+
+ auto It = llvm::find_if(MBB->successors(), IsEHPad);
+ if (It != MBB->succ_end())
+ EHPad = *It;
+ }
+
+ MachineBasicBlock *getEHPad() const { return EHPad; }
+
// Return true if register is callee saved.
bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; }
+
// Iterates over statepoint meta args to find caller saver registers.
// Also cache the size of found registers.
// Returns true if caller save registers found.
bool findRegistersToSpill() {
+ SmallSet<Register, 8> GCRegs;
+ // All GC pointer operands assigned to registers produce new value.
+ // Since they're tied to their defs, it is enough to collect def registers.
+ for (const auto &Def : MI.defs())
+ GCRegs.insert(Def.getReg());
+
SmallSet<Register, 8> VisitedRegs;
for (unsigned Idx = StatepointOpers(&MI).getVarIdx(),
EndIdx = MI.getNumOperands();
Idx < EndIdx; ++Idx) {
MachineOperand &MO = MI.getOperand(Idx);
- if (!MO.isReg() || MO.isImplicit())
+ // Leave `undef` operands as is, StackMaps will rewrite them
+ // into a constant.
+ if (!MO.isReg() || MO.isImplicit() || MO.isUndef())
continue;
Register Reg = MO.getReg();
assert(Reg.isPhysical() && "Only physical regs are expected");
- if (isCalleeSaved(Reg))
+
+ if (isCalleeSaved(Reg) && (AllowGCPtrInCSR || !is_contained(GCRegs, Reg)))
continue;
+
+ LLVM_DEBUG(dbgs() << "Will spill " << printReg(Reg, &TRI) << " at index "
+ << Idx << "\n");
+
if (VisitedRegs.insert(Reg).second)
RegsToSpill.push_back(Reg);
OpsToSpill.push_back(Idx);
@@ -200,30 +400,109 @@ public:
CacheFI.sortRegisters(RegsToSpill);
return !RegsToSpill.empty();
}
+
// Spill all caller saved registers right before statepoint instruction.
// Remember frame index where register is spilled.
void spillRegisters() {
for (Register Reg : RegsToSpill) {
- int FI = CacheFI.getFrameIndex(Reg);
+ int FI = CacheFI.getFrameIndex(Reg, EHPad);
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(*MI.getParent(), MI, Reg, true /*is_Kill*/, FI,
- RC, &TRI);
+
NumSpilledRegisters++;
RegToSlotIdx[Reg] = FI;
+
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, &TRI) << " to FI " << FI
+ << "\n");
+
+ // Perform trivial copy propagation
+ bool IsKill = true;
+ MachineBasicBlock::iterator InsertBefore(MI);
+ Reg = performCopyPropagation(Reg, InsertBefore, IsKill, TII, TRI);
+
+ LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore);
+ TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI,
+ RC, &TRI);
+ }
+ }
+
+ void insertReloadBefore(unsigned Reg, MachineBasicBlock::iterator It,
+ MachineBasicBlock *MBB) {
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ int FI = RegToSlotIdx[Reg];
+ if (It != MBB->end()) {
+ TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI);
+ return;
}
+
+ // To insert reload at the end of MBB, insert it before last instruction
+ // and then swap them.
+ assert(!MBB->empty() && "Empty block");
+ --It;
+ TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI);
+ MachineInstr *Reload = It->getPrevNode();
+ int Dummy = 0;
+ (void)Dummy;
+ assert(TII.isLoadFromStackSlot(*Reload, Dummy) == Reg);
+ assert(Dummy == FI);
+ MBB->remove(Reload);
+ MBB->insertAfter(It, Reload);
}
+
+ // Insert reloads of (relocated) registers spilled in statepoint.
+ void insertReloads(MachineInstr *NewStatepoint, RegReloadCache &RC) {
+ MachineBasicBlock *MBB = NewStatepoint->getParent();
+ auto InsertPoint = std::next(NewStatepoint->getIterator());
+
+ for (auto Reg : RegsToReload) {
+ insertReloadBefore(Reg, InsertPoint, MBB);
+ LLVM_DEBUG(dbgs() << "Reloading " << printReg(Reg, &TRI) << " from FI "
+ << RegToSlotIdx[Reg] << " after statepoint\n");
+
+ if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) {
+ RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad);
+ auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin());
+ insertReloadBefore(Reg, EHPadInsertPoint, EHPad);
+ LLVM_DEBUG(dbgs() << "...also reload at EHPad "
+ << printMBBReference(*EHPad) << "\n");
+ }
+ }
+ }
+
// Re-write statepoint machine instruction to replace caller saved operands
// with indirect memory location (frame index).
- void rewriteStatepoint() {
+ MachineInstr *rewriteStatepoint() {
MachineInstr *NewMI =
MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true);
MachineInstrBuilder MIB(MF, NewMI);
+ unsigned NumOps = MI.getNumOperands();
+
+ // New indices for the remaining defs.
+ SmallVector<unsigned, 8> NewIndices;
+ unsigned NumDefs = MI.getNumDefs();
+ for (unsigned I = 0; I < NumDefs; ++I) {
+ MachineOperand &DefMO = MI.getOperand(I);
+ assert(DefMO.isReg() && DefMO.isDef() && "Expected Reg Def operand");
+ Register Reg = DefMO.getReg();
+ if (!AllowGCPtrInCSR) {
+ assert(is_contained(RegsToSpill, Reg));
+ RegsToReload.push_back(Reg);
+ } else {
+ if (isCalleeSaved(Reg)) {
+ NewIndices.push_back(NewMI->getNumOperands());
+ MIB.addReg(Reg, RegState::Define);
+ } else {
+ NewIndices.push_back(NumOps);
+ RegsToReload.push_back(Reg);
+ }
+ }
+ }
+
// Add End marker.
OpsToSpill.push_back(MI.getNumOperands());
unsigned CurOpIdx = 0;
- for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ for (unsigned I = NumDefs; I < MI.getNumOperands(); ++I) {
MachineOperand &MO = MI.getOperand(I);
if (I == OpsToSpill[CurOpIdx]) {
int FI = RegToSlotIdx[MO.getReg()];
@@ -234,23 +513,38 @@ public:
MIB.addFrameIndex(FI);
MIB.addImm(0);
++CurOpIdx;
- } else
+ } else {
MIB.add(MO);
+ unsigned OldDef;
+ if (AllowGCPtrInCSR && MI.isRegTiedToDefOperand(I, &OldDef)) {
+ assert(OldDef < NumDefs);
+ assert(NewIndices[OldDef] < NumOps);
+ MIB->tieOperands(NewIndices[OldDef], MIB->getNumOperands() - 1);
+ }
+ }
}
assert(CurOpIdx == (OpsToSpill.size() - 1) && "Not all operands processed");
// Add mem operands.
NewMI->setMemRefs(MF, MI.memoperands());
for (auto It : RegToSlotIdx) {
+ Register R = It.first;
int FrameIndex = It.second;
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
- auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- getRegisterSize(TRI, It.first),
- MFI.getObjectAlign(FrameIndex));
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad;
+ if (is_contained(RegsToReload, R))
+ Flags |= MachineMemOperand::MOStore;
+ auto *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, getRegisterSize(TRI, R),
+ MFI.getObjectAlign(FrameIndex));
NewMI->addMemOperand(MF, MMO);
}
+
// Insert new statepoint and erase old one.
MI.getParent()->insert(MI, NewMI);
+
+ LLVM_DEBUG(dbgs() << "rewritten statepoint to : " << *NewMI << "\n");
MI.eraseFromParent();
+ return NewMI;
}
};
@@ -259,28 +553,33 @@ private:
MachineFunction &MF;
const TargetRegisterInfo &TRI;
FrameIndexesCache CacheFI;
+ RegReloadCache ReloadCache;
public:
StatepointProcessor(MachineFunction &MF)
: MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()),
CacheFI(MF.getFrameInfo(), TRI) {}
- bool process(MachineInstr &MI) {
+ bool process(MachineInstr &MI, bool AllowGCPtrInCSR) {
StatepointOpers SO(&MI);
uint64_t Flags = SO.getFlags();
// Do nothing for LiveIn, it supports all registers.
if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn)
return false;
+ LLVM_DEBUG(dbgs() << "\nMBB " << MI.getParent()->getNumber() << " "
+ << MI.getParent()->getName() << " : process statepoint "
+ << MI);
CallingConv::ID CC = SO.getCallingConv();
const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC);
- CacheFI.reset();
- StatepointState SS(MI, Mask, CacheFI);
+ StatepointState SS(MI, Mask, CacheFI, AllowGCPtrInCSR);
+ CacheFI.reset(SS.getEHPad());
if (!SS.findRegistersToSpill())
return false;
SS.spillRegisters();
- SS.rewriteStatepoint();
+ auto *NewStatepoint = SS.rewriteStatepoint();
+ SS.insertReloads(NewStatepoint, ReloadCache);
return true;
}
};
@@ -305,7 +604,14 @@ bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
StatepointProcessor SPP(MF);
- for (MachineInstr *I : Statepoints)
- Changed |= SPP.process(*I);
+ unsigned NumStatepoints = 0;
+ bool AllowGCPtrInCSR = PassGCPtrInCSR;
+ for (MachineInstr *I : Statepoints) {
+ ++NumStatepoints;
+ if (MaxStatepointsWithRegs.getNumOccurrences() &&
+ NumStatepoints >= MaxStatepointsWithRegs)
+ AllowGCPtrInCSR = false;
+ Changed |= SPP.process(*I, AllowGCPtrInCSR);
+ }
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
index c6730aa6b00d..e2ee0c97f94d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -296,7 +296,10 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
} else {
Register FrameReg; // FIXME: surely GCRoot ought to store the
// register that the offset is from?
- RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
+ auto FrameOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
+ assert(!FrameOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ RI->StackOffset = FrameOffset.getFixed();
++RI;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index c4d8777615d2..2fa208fbfaaf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -59,6 +59,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
case TargetOpcode::G_UNMERGE_VALUES:
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_PTR_ADD:
+ case TargetOpcode::G_EXTRACT:
return true;
}
return false;
@@ -366,23 +367,30 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
return *this;
}
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDReg(Register Reg) const {
+ LLT Ty = MRI.getType(Reg);
+ if (Ty.isValid())
+ addNodeIDRegType(Ty);
+
+ if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
+ if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
+ addNodeIDRegType(RB);
+ else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+ addNodeIDRegType(RC);
+ }
+ return *this;
+}
+
const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
const MachineOperand &MO) const {
if (MO.isReg()) {
Register Reg = MO.getReg();
if (!MO.isDef())
addNodeIDRegNum(Reg);
- LLT Ty = MRI.getType(Reg);
- if (Ty.isValid())
- addNodeIDRegType(Ty);
-
- if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
- if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
- addNodeIDRegType(RB);
- else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
- addNodeIDRegType(RC);
- }
+ // Profile the register properties.
+ addNodeIDReg(Reg);
assert(!MO.isImplicit() && "Unhandled case");
} else if (MO.isImm())
ID.AddInteger(MO.getImm());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 88173dc4d302..2c86f06a602d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/IR/DebugInfoMetadata.h"
using namespace llvm;
@@ -41,8 +42,14 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
if (MI) {
CSEInfo->countOpcodeHit(MI->getOpcode());
auto CurrPos = getInsertPt();
- if (!dominates(MI, CurrPos))
+ auto MII = MachineBasicBlock::iterator(MI);
+ if (MII == CurrPos) {
+ // Move the insert point ahead of the instruction so any future uses of
+ // this builder will have the def ready.
+ setInsertPt(*CurMBB, std::next(MII));
+ } else if (!dominates(MI, CurrPos)) {
CurMBB->splice(CurrPos, CurMBB, MI);
+ }
return MachineInstrBuilder(getMF(), MI);
}
return MachineInstrBuilder();
@@ -61,6 +68,11 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op,
case DstOp::DstType::Ty_RC:
B.addNodeIDRegType(Op.getRegClass());
break;
+ case DstOp::DstType::Ty_Reg: {
+ // Regs can have LLT&(RB|RC). If those exist, profile them as well.
+ B.addNodeIDReg(Op.getReg());
+ break;
+ }
default:
B.addNodeIDRegType(Op.getLLTTy(*getMRI()));
break;
@@ -70,6 +82,9 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op,
void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
GISelInstProfileBuilder &B) const {
switch (Op.getSrcOpKind()) {
+ case SrcOp::SrcType::Ty_Imm:
+ B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm()));
+ break;
case SrcOp::SrcType::Ty_Predicate:
B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
break;
@@ -115,7 +130,7 @@ bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) {
if (DstOps.size() == 1)
return true; // always possible to emit copy to just 1 vreg.
- return std::all_of(DstOps.begin(), DstOps.end(), [](const DstOp &Op) {
+ return llvm::all_of(DstOps, [](const DstOp &Op) {
DstOp::DstType DT = Op.getDstOpKind();
return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC;
});
@@ -131,6 +146,21 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg)
return buildCopy(Op.getReg(), MIB.getReg(0));
}
+
+ // If we didn't generate a copy then we're re-using an existing node directly
+ // instead of emitting any code. Merge the debug location we wanted to emit
+ // into the instruction we're CSE'ing with. Debug locations arent part of the
+ // profile so we don't need to recompute it.
+ if (getDebugLoc()) {
+ GISelChangeObserver *Observer = getState().Observer;
+ if (Observer)
+ Observer->changingInstr(*MIB);
+ MIB->setDebugLoc(
+ DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc()));
+ if (Observer)
+ Observer->changedInstr(*MIB);
+ }
+
return MIB;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 1be0ca441205..803e1527a4f0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -30,6 +30,51 @@ using namespace llvm;
void CallLowering::anchor() {}
+/// Helper function which updates \p Flags when \p AttrFn returns true.
+static void
+addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
+ const std::function<bool(Attribute::AttrKind)> &AttrFn) {
+ if (AttrFn(Attribute::SExt))
+ Flags.setSExt();
+ if (AttrFn(Attribute::ZExt))
+ Flags.setZExt();
+ if (AttrFn(Attribute::InReg))
+ Flags.setInReg();
+ if (AttrFn(Attribute::StructRet))
+ Flags.setSRet();
+ if (AttrFn(Attribute::Nest))
+ Flags.setNest();
+ if (AttrFn(Attribute::ByVal))
+ Flags.setByVal();
+ if (AttrFn(Attribute::Preallocated))
+ Flags.setPreallocated();
+ if (AttrFn(Attribute::InAlloca))
+ Flags.setInAlloca();
+ if (AttrFn(Attribute::Returned))
+ Flags.setReturned();
+ if (AttrFn(Attribute::SwiftSelf))
+ Flags.setSwiftSelf();
+ if (AttrFn(Attribute::SwiftError))
+ Flags.setSwiftError();
+}
+
+ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
+ unsigned ArgIdx) const {
+ ISD::ArgFlagsTy Flags;
+ addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) {
+ return Call.paramHasAttr(ArgIdx, Attr);
+ });
+ return Flags;
+}
+
+void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
+ const AttributeList &Attrs,
+ unsigned OpIdx) const {
+ addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) {
+ return Attrs.hasAttribute(OpIdx, Attr);
+ });
+}
+
bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
ArrayRef<Register> ResRegs,
ArrayRef<ArrayRef<Register>> ArgRegs,
@@ -37,6 +82,29 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
std::function<unsigned()> GetCalleeReg) const {
CallLoweringInfo Info;
const DataLayout &DL = MIRBuilder.getDataLayout();
+ MachineFunction &MF = MIRBuilder.getMF();
+ bool CanBeTailCalled = CB.isTailCall() &&
+ isInTailCallPosition(CB, MF.getTarget()) &&
+ (MF.getFunction()
+ .getFnAttribute("disable-tail-calls")
+ .getValueAsString() != "true");
+
+ CallingConv::ID CallConv = CB.getCallingConv();
+ Type *RetTy = CB.getType();
+ bool IsVarArg = CB.getFunctionType()->isVarArg();
+
+ SmallVector<BaseArgInfo, 4> SplitArgs;
+ getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL);
+ Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg);
+
+ if (!Info.CanLowerReturn) {
+ // Callee requires sret demotion.
+ insertSRetOutgoingArgument(MIRBuilder, CB, Info);
+
+ // The sret demotion isn't compatible with tail-calls, since the sret
+ // argument points into the caller's stack frame.
+ CanBeTailCalled = false;
+ }
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
@@ -44,9 +112,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
unsigned i = 0;
unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
for (auto &Arg : CB.args()) {
- ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
+ ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i),
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg))
+ CanBeTailCalled = false;
+
Info.OrigArgs.push_back(OrigArg);
++i;
}
@@ -59,21 +133,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
else
Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- Info.OrigRet = ArgInfo{ResRegs, CB.getType(), ISD::ArgFlagsTy{}};
+ Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}};
if (!Info.OrigRet.Ty->isVoidTy())
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
- MachineFunction &MF = MIRBuilder.getMF();
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
- Info.CallConv = CB.getCallingConv();
+ Info.CallConv = CallConv;
Info.SwiftErrorVReg = SwiftErrorVReg;
Info.IsMustTailCall = CB.isMustTailCall();
- Info.IsTailCall =
- CB.isTailCall() && isInTailCallPosition(CB, MF.getTarget()) &&
- (MF.getFunction()
- .getFnAttribute("disable-tail-calls")
- .getValueAsString() != "true");
- Info.IsVarArg = CB.getFunctionType()->isVarArg();
+ Info.IsTailCall = CanBeTailCalled;
+ Info.IsVarArg = IsVarArg;
return lowerCall(MIRBuilder, Info);
}
@@ -83,24 +152,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const FuncInfoTy &FuncInfo) const {
auto &Flags = Arg.Flags[0];
const AttributeList &Attrs = FuncInfo.getAttributes();
- if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
- Flags.setZExt();
- if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
- Flags.setSExt();
- if (Attrs.hasAttribute(OpIdx, Attribute::InReg))
- Flags.setInReg();
- if (Attrs.hasAttribute(OpIdx, Attribute::StructRet))
- Flags.setSRet();
- if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf))
- Flags.setSwiftSelf();
- if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError))
- Flags.setSwiftError();
- if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
- Flags.setByVal();
- if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated))
- Flags.setPreallocated();
- if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
- Flags.setInAlloca();
+ addArgFlagsFromAttributes(Flags, Attrs, OpIdx);
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
@@ -117,8 +169,6 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
Flags.setByValAlign(FrameAlign);
}
- if (Attrs.hasAttribute(OpIdx, Attribute::Nest))
- Flags.setNest();
Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
}
@@ -195,99 +245,97 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
EVT CurVT = EVT::getEVT(Args[i].Ty);
- if (!CurVT.isSimple() ||
- Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
- CCValAssign::Full, Args[i], Args[i].Flags[0],
- CCInfo)) {
- MVT NewVT = TLI->getRegisterTypeForCallingConv(
- F.getContext(), F.getCallingConv(), EVT(CurVT));
-
- // If we need to split the type over multiple regs, check it's a scenario
- // we currently support.
- unsigned NumParts = TLI->getNumRegistersForCallingConv(
- F.getContext(), F.getCallingConv(), CurVT);
- if (NumParts > 1) {
- // For now only handle exact splits.
- if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
- return false;
- }
+ if (CurVT.isSimple() &&
+ !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
+ CCValAssign::Full, Args[i], Args[i].Flags[0],
+ CCInfo))
+ continue;
+
+ MVT NewVT = TLI->getRegisterTypeForCallingConv(
+ F.getContext(), F.getCallingConv(), EVT(CurVT));
+
+ // If we need to split the type over multiple regs, check it's a scenario
+ // we currently support.
+ unsigned NumParts = TLI->getNumRegistersForCallingConv(
+ F.getContext(), F.getCallingConv(), CurVT);
+
+ if (NumParts == 1) {
+ // Try to use the register type if we couldn't assign the VT.
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[0], CCInfo))
+ return false;
+ continue;
+ }
- // For incoming arguments (physregs to vregs), we could have values in
- // physregs (or memlocs) which we want to extract and copy to vregs.
- // During this, we might have to deal with the LLT being split across
- // multiple regs, so we have to record this information for later.
- //
- // If we have outgoing args, then we have the opposite case. We have a
- // vreg with an LLT which we want to assign to a physical location, and
- // we might have to record that the value has to be split later.
- if (Handler.isIncomingArgumentHandler()) {
- if (NumParts == 1) {
- // Try to use the register type if we couldn't assign the VT.
- if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
- Args[i].Flags[0], CCInfo))
- return false;
+ assert(NumParts > 1);
+ // For now only handle exact splits.
+ if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
+ return false;
+
+ // For incoming arguments (physregs to vregs), we could have values in
+ // physregs (or memlocs) which we want to extract and copy to vregs.
+ // During this, we might have to deal with the LLT being split across
+ // multiple regs, so we have to record this information for later.
+ //
+ // If we have outgoing args, then we have the opposite case. We have a
+ // vreg with an LLT which we want to assign to a physical location, and
+ // we might have to record that the value has to be split later.
+ if (Handler.isIncomingArgumentHandler()) {
+ // We're handling an incoming arg which is split over multiple regs.
+ // E.g. passing an s128 on AArch64.
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ Args[i].OrigRegs.push_back(Args[i].Regs[0]);
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ LLT NewLLT = getLLTForMVT(NewVT);
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ Register Reg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (Part == 0) {
+ Flags.setSplit();
} else {
- // We're handling an incoming arg which is split over multiple regs.
- // E.g. passing an s128 on AArch64.
- ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
- Args[i].OrigRegs.push_back(Args[i].Regs[0]);
- Args[i].Regs.clear();
- Args[i].Flags.clear();
- LLT NewLLT = getLLTForMVT(NewVT);
- // For each split register, create and assign a vreg that will store
- // the incoming component of the larger value. These will later be
- // merged to form the final vreg.
- for (unsigned Part = 0; Part < NumParts; ++Part) {
- Register Reg =
- MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
- ISD::ArgFlagsTy Flags = OrigFlags;
- if (Part == 0) {
- Flags.setSplit();
- } else {
- Flags.setOrigAlign(Align(1));
- if (Part == NumParts - 1)
- Flags.setSplitEnd();
- }
- Args[i].Regs.push_back(Reg);
- Args[i].Flags.push_back(Flags);
- if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full,
- Args[i], Args[i].Flags[Part], CCInfo)) {
- // Still couldn't assign this smaller part type for some reason.
- return false;
- }
- }
+ Flags.setOrigAlign(Align(1));
+ if (Part == NumParts - 1)
+ Flags.setSplitEnd();
}
- } else {
- // Handling an outgoing arg that might need to be split.
- if (NumParts < 2)
- return false; // Don't know how to deal with this type combination.
-
- // This type is passed via multiple registers in the calling convention.
- // We need to extract the individual parts.
- Register LargeReg = Args[i].Regs[0];
- LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
- auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
- assert(Unmerge->getNumOperands() == NumParts + 1);
- ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
- // We're going to replace the regs and flags with the split ones.
- Args[i].Regs.clear();
- Args[i].Flags.clear();
- for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
- ISD::ArgFlagsTy Flags = OrigFlags;
- if (PartIdx == 0) {
- Flags.setSplit();
- } else {
- Flags.setOrigAlign(Align(1));
- if (PartIdx == NumParts - 1)
- Flags.setSplitEnd();
- }
- Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
- Args[i].Flags.push_back(Flags);
- if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full,
- Args[i], Args[i].Flags[PartIdx], CCInfo))
- return false;
+ Args[i].Regs.push_back(Reg);
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[Part], CCInfo)) {
+ // Still couldn't assign this smaller part type for some reason.
+ return false;
}
}
+ } else {
+ // This type is passed via multiple registers in the calling convention.
+ // We need to extract the individual parts.
+ Register LargeReg = Args[i].Regs[0];
+ LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
+ auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
+ assert(Unmerge->getNumOperands() == NumParts + 1);
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ // We're going to replace the regs and flags with the split ones.
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (PartIdx == 0) {
+ Flags.setSplit();
+ } else {
+ Flags.setOrigAlign(Align(1));
+ if (PartIdx == NumParts - 1)
+ Flags.setSplitEnd();
+ }
+ Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full,
+ Args[i], Args[i].Flags[PartIdx], CCInfo))
+ return false;
+ }
}
}
@@ -313,85 +361,239 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
EVT VAVT = VA.getValVT();
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
- if (VA.isRegLoc()) {
- if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) {
- if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) {
- // Expected to be multiple regs for a single incoming arg.
- unsigned NumArgRegs = Args[i].Regs.size();
- if (NumArgRegs < 2)
- return false;
-
- assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
- "Too many regs for number of args");
- for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
- // There should be Regs.size() ArgLocs per argument.
- VA = ArgLocs[j + Part];
- Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
- }
- j += NumArgRegs - 1;
- // Merge the split registers into the expected larger result vreg
- // of the original call.
- MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
- continue;
- }
- const LLT VATy(VAVT.getSimpleVT());
- Register NewReg =
- MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
- Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
- // If it's a vector type, we either need to truncate the elements
- // or do an unmerge to get the lower block of elements.
- if (VATy.isVector() &&
- VATy.getNumElements() > OrigVT.getVectorNumElements()) {
- // Just handle the case where the VA type is 2 * original type.
- if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
- LLVM_DEBUG(dbgs()
- << "Incoming promoted vector arg has too many elts");
- return false;
- }
- auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
- MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
- } else {
- MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
+ // Expected to be multiple regs for a single incoming arg.
+ // There should be Regs.size() ArgLocs per argument.
+ unsigned NumArgRegs = Args[i].Regs.size();
+
+ assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
+ "Too many regs for number of args");
+ for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
+ // There should be Regs.size() ArgLocs per argument.
+ VA = ArgLocs[j + Part];
+ if (VA.isMemLoc()) {
+ // Don't currently support loading/storing a type that needs to be split
+ // to the stack. Should be easy, just not implemented yet.
+ if (NumArgRegs > 1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Load/store a split arg to/from the stack not implemented yet\n");
+ return false;
}
- } else if (!Handler.isIncomingArgumentHandler()) {
- assert((j + (Args[i].Regs.size() - 1)) < ArgLocs.size() &&
- "Too many regs for number of args");
- // This is an outgoing argument that might have been split.
- for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) {
- // There should be Regs.size() ArgLocs per argument.
- VA = ArgLocs[j + Part];
- Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+
+ // FIXME: Use correct address space for pointer size
+ EVT LocVT = VA.getValVT();
+ unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize()
+ : LocVT.getStoreSize();
+ unsigned Offset = VA.getLocMemOffset();
+ MachinePointerInfo MPO;
+ Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO);
+ Handler.assignValueToAddress(Args[i], StackAddr,
+ MemSize, MPO, VA);
+ continue;
+ }
+
+ assert(VA.isRegLoc() && "custom loc should have been handled already");
+
+ // GlobalISel does not currently work for scalable vectors.
+ if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() ||
+ !Handler.isIncomingArgumentHandler()) {
+ // This is an argument that might have been split. There should be
+ // Regs.size() ArgLocs per argument.
+
+ // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge
+ // to the original register after handling all of the parts.
+ Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+ continue;
+ }
+
+ // This ArgLoc covers multiple pieces, so we need to split it.
+ const LLT VATy(VAVT.getSimpleVT());
+ Register NewReg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
+ Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
+ // If it's a vector type, we either need to truncate the elements
+ // or do an unmerge to get the lower block of elements.
+ if (VATy.isVector() &&
+ VATy.getNumElements() > OrigVT.getVectorNumElements()) {
+ // Just handle the case where the VA type is 2 * original type.
+ if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
+ LLVM_DEBUG(dbgs()
+ << "Incoming promoted vector arg has too many elts");
+ return false;
}
- j += Args[i].Regs.size() - 1;
+ auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
+ MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
} else {
- Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
}
- } else if (VA.isMemLoc()) {
- // Don't currently support loading/storing a type that needs to be split
- // to the stack. Should be easy, just not implemented yet.
- if (Args[i].Regs.size() > 1) {
- LLVM_DEBUG(
- dbgs()
- << "Load/store a split arg to/from the stack not implemented yet");
- return false;
+ }
+
+ // Now that all pieces have been handled, re-pack any arguments into any
+ // wider, original registers.
+ if (Handler.isIncomingArgumentHandler()) {
+ if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) {
+ assert(NumArgRegs >= 2);
+
+ // Merge the split registers into the expected larger result vreg
+ // of the original call.
+ MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
}
+ }
- EVT LocVT = VA.getValVT();
- unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize()
- : LocVT.getStoreSize();
+ j += NumArgRegs - 1;
+ }
- unsigned Offset = VA.getLocMemOffset();
- MachinePointerInfo MPO;
- Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO);
- Handler.assignValueToAddress(Args[i], StackAddr, MemSize, MPO, VA);
- } else {
- // FIXME: Support byvals and other weirdness
+ return true;
+}
+
+void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs, Register DemoteReg,
+ int FI) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const DataLayout &DL = MF.getDataLayout();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+ assert(VRegs.size() == SplitVTs.size());
+
+ unsigned NumValues = SplitVTs.size();
+ Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+ Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
+ LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL);
+
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+
+ for (unsigned I = 0; I < NumValues; ++I) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MRI.getType(VRegs[I]).getSizeInBytes(),
+ commonAlignment(BaseAlign, Offsets[I]));
+ MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
+ }
+}
+
+void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs,
+ Register DemoteReg) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const DataLayout &DL = MF.getDataLayout();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+ assert(VRegs.size() == SplitVTs.size());
+
+ unsigned NumValues = SplitVTs.size();
+ Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+ unsigned AS = DL.getAllocaAddrSpace();
+ LLT OffsetLLTy =
+ getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL);
+
+ MachinePointerInfo PtrInfo(AS);
+
+ for (unsigned I = 0; I < NumValues; ++I) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MRI.getType(VRegs[I]).getSizeInBytes(),
+ commonAlignment(BaseAlign, Offsets[I]));
+ MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
+ }
+}
+
+void CallLowering::insertSRetIncomingArgument(
+ const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg,
+ MachineRegisterInfo &MRI, const DataLayout &DL) const {
+ unsigned AS = DL.getAllocaAddrSpace();
+ DemoteReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(AS, DL.getPointerSizeInBits(AS)));
+
+ Type *PtrTy = PointerType::get(F.getReturnType(), AS);
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs);
+
+ // NOTE: Assume that a pointer won't get split into more than one VT.
+ assert(ValueVTs.size() == 1);
+
+ ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()));
+ setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F);
+ DemoteArg.Flags[0].setSRet();
+ SplitArgs.insert(SplitArgs.begin(), DemoteArg);
+}
+
+void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder,
+ const CallBase &CB,
+ CallLoweringInfo &Info) const {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ Type *RetTy = CB.getType();
+ unsigned AS = DL.getAllocaAddrSpace();
+ LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+
+ int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject(
+ DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false);
+
+ Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0);
+ ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS));
+ setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB);
+ DemoteArg.Flags[0].setSRet();
+
+ Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg);
+ Info.DemoteStackIndex = FI;
+ Info.DemoteRegister = DemoteReg;
+}
+
+bool CallLowering::checkReturn(CCState &CCInfo,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ CCAssignFn *Fn) const {
+ for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
+ MVT VT = MVT::getVT(Outs[I].Ty);
+ if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo))
return false;
- }
}
return true;
}
+void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy,
+ AttributeList Attrs,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ const DataLayout &DL) const {
+ LLVMContext &Context = RetTy->getContext();
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+
+ SmallVector<EVT, 4> SplitVTs;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs);
+ addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex);
+
+ for (EVT VT : SplitVTs) {
+ unsigned NumParts =
+ TLI->getNumRegistersForCallingConv(Context, CallConv, VT);
+ MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT);
+ Type *PartTy = EVT(RegVT).getTypeForEVT(Context);
+
+ for (unsigned I = 0; I < NumParts; ++I) {
+ Outs.emplace_back(PartTy, Flags);
+ }
+ }
+}
+
+bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const {
+ const auto &F = MF.getFunction();
+ Type *ReturnType = F.getReturnType();
+ CallingConv::ID CallConv = F.getCallingConv();
+
+ SmallVector<BaseArgInfo, 4> SplitArgs;
+ getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs,
+ MF.getDataLayout());
+ return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg());
+}
+
bool CallLowering::analyzeArgInfo(CCState &CCState,
SmallVectorImpl<ArgInfo> &Args,
CCAssignFn &AssignFnFixed,
@@ -409,6 +611,58 @@ bool CallLowering::analyzeArgInfo(CCState &CCState,
return true;
}
+bool CallLowering::parametersInCSRMatch(
+ const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask,
+ const SmallVectorImpl<CCValAssign> &OutLocs,
+ const SmallVectorImpl<ArgInfo> &OutArgs) const {
+ for (unsigned i = 0; i < OutLocs.size(); ++i) {
+ auto &ArgLoc = OutLocs[i];
+ // If it's not a register, it's fine.
+ if (!ArgLoc.isRegLoc())
+ continue;
+
+ MCRegister PhysReg = ArgLoc.getLocReg();
+
+ // Only look at callee-saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg))
+ continue;
+
+ LLVM_DEBUG(
+ dbgs()
+ << "... Call has an argument passed in a callee-saved register.\n");
+
+ // Check if it was copied from.
+ const ArgInfo &OutInfo = OutArgs[i];
+
+ if (OutInfo.Regs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs() << "... Cannot handle arguments in multiple registers.\n");
+ return false;
+ }
+
+ // Check if we copy the register, walking through copies from virtual
+ // registers. Note that getDefIgnoringCopies does not ignore copies from
+ // physical registers.
+ MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
+ if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Parameter was not copied into a VReg, cannot tail call.\n");
+ return false;
+ }
+
+ // Got a copy. Verify that it's the same as the register we want.
+ Register CopyRHS = RegDef->getOperand(1).getReg();
+ if (CopyRHS != PhysReg) {
+ LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
+ "VReg, cannot tail call.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
MachineFunction &MF,
SmallVectorImpl<ArgInfo> &InArgs,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index b4562a5c6601..f1071d96e5a3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -130,8 +130,6 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
WrapperObserver.addObserver(CSEInfo);
RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
for (MachineBasicBlock *MBB : post_order(&MF)) {
- if (MBB->empty())
- continue;
for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) {
MachineInstr *CurMI = &*MII;
++MII;
@@ -155,5 +153,8 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
MFChanged |= Changed;
} while (Changed);
+ assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) &&
+ "CSEInfo is not consistent. Likely missing calls to "
+ "observer on mutations"));
return MFChanged;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 194961ae3b21..a9353bdfb780 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -34,7 +35,6 @@ static cl::opt<bool>
cl::desc("Force all indexed operations to be "
"legal for the GlobalISel combiner"));
-
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB,
MachineDominatorTree *MDT,
@@ -44,6 +44,75 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
(void)this->KB;
}
+const TargetLowering &CombinerHelper::getTargetLowering() const {
+ return *Builder.getMF().getSubtarget().getTargetLowering();
+}
+
+/// \returns The little endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 0
+static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return I;
+}
+
+/// \returns The big endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 3
+static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return ByteWidth - I - 1;
+}
+
+/// Given a map from byte offsets in memory to indices in a load/store,
+/// determine if that map corresponds to a little or big endian byte pattern.
+///
+/// \param MemOffset2Idx maps memory offsets to address offsets.
+/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
+///
+/// \returns true if the map corresponds to a big endian byte pattern, false
+/// if it corresponds to a little endian byte pattern, and None otherwise.
+///
+/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
+/// are as follows:
+///
+/// AddrOffset Little endian Big endian
+/// 0 0 3
+/// 1 1 2
+/// 2 2 1
+/// 3 3 0
+static Optional<bool>
+isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ int64_t LowestIdx) {
+ // Need at least two byte positions to decide on endianness.
+ unsigned Width = MemOffset2Idx.size();
+ if (Width < 2)
+ return None;
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
+ auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
+ if (MemOffsetAndIdx == MemOffset2Idx.end())
+ return None;
+ const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
+ assert(Idx >= 0 && "Expected non-negative byte offset?");
+ LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
+ BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
+ if (!BigEndian && !LittleEndian)
+ return None;
+ }
+
+ assert((BigEndian != LittleEndian) &&
+ "Pattern cannot be both big and little endian!");
+ return BigEndian;
+}
+
+bool CombinerHelper::isLegalOrBeforeLegalizer(
+ const LegalityQuery &Query) const {
+ return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
+}
+
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
@@ -555,13 +624,13 @@ bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
return false;
-
- // Loop through the basic block until we find one of the instructions.
- MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
- for (; &*I != &DefMI && &*I != &UseMI; ++I)
- return &*I == &DefMI;
-
- llvm_unreachable("Block must contain instructions");
+ const MachineBasicBlock &MBB = *DefMI.getParent();
+ auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
+ return &MI == &DefMI || &MI == &UseMI;
+ });
+ if (DefOrUse == MBB.end())
+ llvm_unreachable("Block must contain both DefMI and UseMI!");
+ return &*DefOrUse == &DefMI;
}
bool CombinerHelper::dominates(const MachineInstr &DefMI,
@@ -576,20 +645,97 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI,
return isPredecessor(DefMI, UseMI);
}
-bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
+bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
Register SrcReg = MI.getOperand(1).getReg();
- unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
- unsigned NumSextBits =
- MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
- MI.getOperand(2).getImm();
- return SrcSignBits >= NumSextBits;
+ Register LoadUser = SrcReg;
+
+ if (MRI.getType(SrcReg).isVector())
+ return false;
+
+ Register TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
+ LoadUser = TruncSrc;
+
+ uint64_t SizeInBits = MI.getOperand(2).getImm();
+ // If the source is a G_SEXTLOAD from the same bit width, then we don't
+ // need any extend at all, just a truncate.
+ if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
+ const auto &MMO = **LoadMI->memoperands_begin();
+ // If truncating more than the original extended value, abort.
+ if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
+ return false;
+ if (MMO.getSizeInBits() == SizeInBits)
+ return true;
+ }
+ return false;
}
-bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
+bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
- MachineIRBuilder MIB(MI);
- MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchSextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+ // Only supports scalars for now.
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
+ if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
+ return false;
+
+ // If the sign extend extends from a narrower width than the load's width,
+ // then we can narrow the load width when we combine to a G_SEXTLOAD.
+ auto &MMO = **LoadDef->memoperands_begin();
+ // Don't do this for non-simple loads.
+ if (MMO.isAtomic() || MMO.isVolatile())
+ return false;
+
+ // Avoid widening the load at all.
+ unsigned NewSizeBits =
+ std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
+
+ // Don't generate G_SEXTLOADs with a < 1 byte width.
+ if (NewSizeBits < 8)
+ return false;
+ // Don't bother creating a non-power-2 sextload, it will likely be broken up
+ // anyway for most targets.
+ if (!isPowerOf2_32(NewSizeBits))
+ return false;
+ MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
+ return true;
+}
+
+bool CombinerHelper::applySextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register LoadReg;
+ unsigned ScalarSizeBits;
+ std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
+ auto *LoadDef = MRI.getVRegDef(LoadReg);
+ assert(LoadDef && "Expected a load reg");
+
+ // If we have the following:
+ // %ld = G_LOAD %ptr, (load 2)
+ // %ext = G_SEXT_INREG %ld, 8
+ // ==>
+ // %ld = G_SEXTLOAD %ptr (load 1)
+
+ auto &MMO = **LoadDef->memoperands_begin();
+ Builder.setInstrAndDebugLoc(MI);
+ auto &MF = Builder.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
+ Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
+ LoadDef->getOperand(1).getReg(), *NewMMO);
MI.eraseFromParent();
return true;
}
@@ -611,7 +757,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
return false;
LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
-
+ // FIXME: The following use traversal needs a bail out for patholigical cases.
for (auto &Use : MRI.use_nodbg_instructions(Base)) {
if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
continue;
@@ -738,6 +884,11 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS
Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
return false;
+ // For now, no targets actually support these opcodes so don't waste time
+ // running these unless we're forced to for testing.
+ if (!ForceLegalIndexing)
+ return false;
+
MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
@@ -790,14 +941,12 @@ void CombinerHelper::applyCombineIndexedLoadStore(
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
}
-bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
+bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_BR)
return false;
// Try to match the following:
// bb1:
- // %c(s32) = G_ICMP pred, %a, %b
- // %c1(s1) = G_TRUNC %c(s32)
// G_BRCOND %c1, %bb2
// G_BR %bb3
// bb2:
@@ -807,7 +956,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
// The above pattern does not have a fall through to the successor bb2, always
// resulting in a branch no matter which path is taken. Here we try to find
// and replace that pattern with conditional branch to bb3 and otherwise
- // fallthrough to bb2.
+ // fallthrough to bb2. This is generally better for branch predictors.
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::iterator BrIt(MI);
@@ -819,43 +968,38 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
return false;
- // Check that the next block is the conditional branch target.
- if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
- return false;
-
- MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
- if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
- !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
- return false;
- return true;
-}
-
-bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
- if (!matchElideBrByInvertingCond(MI))
- return false;
- applyElideBrByInvertingCond(MI);
- return true;
+ // Check that the next block is the conditional branch target. Also make sure
+ // that it isn't the same as the G_BR's target (otherwise, this will loop.)
+ MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
+ return BrCondTarget != MI.getOperand(0).getMBB() &&
+ MBB->isLayoutSuccessor(BrCondTarget);
}
-void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
+void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
MachineBasicBlock::iterator BrIt(MI);
MachineInstr *BrCond = &*std::prev(BrIt);
- MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
- CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
- (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
+ Builder.setInstrAndDebugLoc(*BrCond);
+ LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
+ // FIXME: Does int/fp matter for this? If so, we might need to restrict
+ // this to i1 only since we might not know for sure what kind of
+ // compare generated the condition value.
+ auto True = Builder.buildConstant(
+ Ty, getICmpTrueVal(getTargetLowering(), false, false));
+ auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
- // Invert the G_ICMP condition.
- Observer.changingInstr(*CmpMI);
- CmpMI->getOperand(1).setPredicate(InversePred);
- Observer.changedInstr(*CmpMI);
+ auto *FallthroughBB = BrCond->getOperand(1).getMBB();
+ Observer.changingInstr(MI);
+ MI.getOperand(0).setMBB(FallthroughBB);
+ Observer.changedInstr(MI);
- // Change the conditional branch target.
+ // Change the conditional branch to use the inverted condition and
+ // new target block.
Observer.changingInstr(*BrCond);
+ BrCond->getOperand(0).setReg(Xor.getReg(0));
BrCond->getOperand(1).setMBB(BrTarget);
Observer.changedInstr(*BrCond);
- MI.eraseFromParent();
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
@@ -946,8 +1090,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
unsigned NumBits = Ty.getScalarSizeInBits();
auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
if (!Ty.isVector() && ValVRegAndVal) {
- unsigned KnownVal = ValVRegAndVal->Value;
- APInt Scalar = APInt(8, KnownVal);
+ APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
APInt SplatVal = APInt::getSplat(NumBits, Scalar);
return MIB.buildConstant(Ty, SplatVal).getReg(0);
}
@@ -1299,13 +1442,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
}
bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ const unsigned Opc = MI.getOpcode();
// This combine is fairly complex so it's not written with a separate
// matcher function.
- assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
- Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
- assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
- ID == Intrinsic::memset) &&
- "Expected a memcpy like intrinsic");
+ assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
+ Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
auto MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
@@ -1316,11 +1457,11 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
- Register Dst = MI.getOperand(1).getReg();
- Register Src = MI.getOperand(2).getReg();
- Register Len = MI.getOperand(3).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
- if (ID != Intrinsic::memset) {
+ if (Opc != TargetOpcode::G_MEMSET) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
MemOp = *(++MMOIt);
SrcAlign = MemOp->getBaseAlign();
@@ -1330,7 +1471,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
if (!LenVRegAndVal)
return false; // Leave it to the legalizer to lower it to a libcall.
- unsigned KnownLen = LenVRegAndVal->Value;
+ unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
MI.eraseFromParent();
@@ -1340,15 +1481,78 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
if (MaxLen && KnownLen > MaxLen)
return false;
- if (ID == Intrinsic::memcpy)
+ if (Opc == TargetOpcode::G_MEMCPY)
return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (ID == Intrinsic::memmove)
+ if (Opc == TargetOpcode::G_MEMMOVE)
return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (ID == Intrinsic::memset)
+ if (Opc == TargetOpcode::G_MEMSET)
return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
return false;
}
+static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
+ const Register Op,
+ const MachineRegisterInfo &MRI) {
+ const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
+ if (!MaybeCst)
+ return None;
+
+ APFloat V = MaybeCst->getValueAPF();
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_FNEG: {
+ V.changeSign();
+ return V;
+ }
+ case TargetOpcode::G_FABS: {
+ V.clearSign();
+ return V;
+ }
+ case TargetOpcode::G_FPTRUNC:
+ break;
+ case TargetOpcode::G_FSQRT: {
+ bool Unused;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+ V = APFloat(sqrt(V.convertToDouble()));
+ break;
+ }
+ case TargetOpcode::G_FLOG2: {
+ bool Unused;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+ V = APFloat(log2(V.convertToDouble()));
+ break;
+ }
+ }
+ // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
+ // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
+ // and `G_FLOG2` reach here.
+ bool Unused;
+ V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
+ return V;
+}
+
+bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
+ return Cst.hasValue();
+}
+
+bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst) {
+ assert(Cst.hasValue() && "Optional is unexpectedly empty!");
+ Builder.setInstrAndDebugLoc(MI);
+ MachineFunction &MF = Builder.getMF();
+ auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.buildFConstant(DstReg, *FPVal);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
PtrAddChain &MatchInfo) {
// We're trying to match the following pattern:
@@ -1377,7 +1581,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
return false;
// Pass the combined immediate to the apply function.
- MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
MatchInfo.Base = Base;
return true;
}
@@ -1395,15 +1599,211 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
+ // %t1 = SHIFT %base, G_CONSTANT imm1
+ // %root = SHIFT %t1, G_CONSTANT imm2
+ // -->
+ // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
+
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Register Shl2 = MI.getOperand(1).getReg();
+ Register Imm1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
+ if (Shl2Def->getOpcode() != Opcode)
+ return false;
+
+ Register Base = Shl2Def->getOperand(1).getReg();
+ Register Imm2 = Shl2Def->getOperand(2).getReg();
+ auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ if (!MaybeImm2Val)
+ return false;
+
+ // Pass the combined immediate to the apply function.
+ MatchInfo.Imm =
+ (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Reg = Base;
+
+ // There is no simple replacement for a saturating unsigned left shift that
+ // exceeds the scalar size.
+ if (Opcode == TargetOpcode::G_USHLSAT &&
+ MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
+ return false;
+
+ return true;
+}
+
+bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Builder.setInstrAndDebugLoc(MI);
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
+ auto Imm = MatchInfo.Imm;
+
+ if (Imm >= ScalarSizeInBits) {
+ // Any logical shift that exceeds scalar size will produce zero.
+ if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
+ Builder.buildConstant(MI.getOperand(0), 0);
+ MI.eraseFromParent();
+ return true;
+ }
+ // Arithmetic shift and saturating signed left shift have no effect beyond
+ // scalar size.
+ Imm = ScalarSizeInBits - 1;
+ }
+
+ LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
+ Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(MatchInfo.Reg);
+ MI.getOperand(2).setReg(NewImm);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
+ // with any of G_AND/G_OR/G_XOR logic instructions.
+ // %t1 = SHIFT %X, G_CONSTANT C0
+ // %t2 = LOGIC %t1, %Y
+ // %root = SHIFT %t2, G_CONSTANT C1
+ // -->
+ // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
+ // %t4 = SHIFT %Y, G_CONSTANT C1
+ // %root = LOGIC %t3, %t4
+ unsigned ShiftOpcode = MI.getOpcode();
+ assert((ShiftOpcode == TargetOpcode::G_SHL ||
+ ShiftOpcode == TargetOpcode::G_ASHR ||
+ ShiftOpcode == TargetOpcode::G_LSHR ||
+ ShiftOpcode == TargetOpcode::G_USHLSAT ||
+ ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ // Match a one-use bitwise logic op.
+ Register LogicDest = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(LogicDest))
+ return false;
+
+ MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
+ unsigned LogicOpcode = LogicMI->getOpcode();
+ if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
+ LogicOpcode != TargetOpcode::G_XOR)
+ return false;
+
+ // Find a matching one-use shift by constant.
+ const Register C1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
+
+ auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
+ // Shift should match previous one and should be a one-use.
+ if (MI->getOpcode() != ShiftOpcode ||
+ !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+ return false;
+
+ // Must be a constant.
+ auto MaybeImmVal =
+ getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
+ MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
+ Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
+ MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
+ uint64_t C0Val;
+
+ if (matchFirstShift(LogicMIOp1, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg2;
+ MatchInfo.Shift2 = LogicMIOp1;
+ } else if (matchFirstShift(LogicMIOp2, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg1;
+ MatchInfo.Shift2 = LogicMIOp2;
+ } else
+ return false;
+
+ MatchInfo.ValSum = C0Val + C1Val;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
+ return false;
+
+ MatchInfo.Logic = LogicMI;
+ return true;
+}
+
+bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
+ Opcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
+ LLT DestType = MRI.getType(MI.getOperand(0).getReg());
+ Builder.setInstrAndDebugLoc(MI);
+
+ Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
+
+ Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
+ Register Shift1 =
+ Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
+
+ Register Shift2Const = MI.getOperand(2).getReg();
+ Register Shift2 = Builder
+ .buildInstr(Opcode, {DestType},
+ {MatchInfo.LogicNonShiftReg, Shift2Const})
+ .getReg(0);
+
+ Register Dest = MI.getOperand(0).getReg();
+ Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
+
+ // These were one use so it's safe to remove them.
+ MatchInfo.Shift2->eraseFromParent();
+ MatchInfo.Logic->eraseFromParent();
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
auto MaybeImmVal =
getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
- if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
+ if (!MaybeImmVal)
return false;
- ShiftVal = Log2_64(MaybeImmVal->Value);
- return true;
+
+ ShiftVal = MaybeImmVal->Value.exactLogBase2();
+ return (static_cast<int32_t>(ShiftVal) != -1);
}
bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
@@ -1419,6 +1819,254 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
return true;
}
+// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
+bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
+ RegisterImmPair &MatchData) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+
+ Register LHS = MI.getOperand(1).getReg();
+
+ Register ExtSrc;
+ if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
+ return false;
+
+ // TODO: Should handle vector splat.
+ Register RHS = MI.getOperand(2).getReg();
+ auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!MaybeShiftAmtVal)
+ return false;
+
+ if (LI) {
+ LLT SrcTy = MRI.getType(ExtSrc);
+
+ // We only really care about the legality with the shifted value. We can
+ // pick any type the constant shift amount, so ask the target what to
+ // use. Otherwise we would have to guess and hope it is reported as legal.
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
+ return false;
+ }
+
+ int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+ MatchData.Reg = ExtSrc;
+ MatchData.Imm = ShiftAmt;
+
+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
+ return MinLeadingZeros >= ShiftAmt;
+}
+
+bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
+ const RegisterImmPair &MatchData) {
+ Register ExtSrcReg = MatchData.Reg;
+ int64_t ShiftAmtVal = MatchData.Imm;
+
+ LLT ExtSrcTy = MRI.getType(ExtSrcReg);
+ Builder.setInstrAndDebugLoc(MI);
+ auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
+ auto NarrowShift =
+ Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
+ Builder.buildZExt(MI.getOperand(0), NarrowShift);
+ MI.eraseFromParent();
+ return true;
+}
+
+static Register peekThroughBitcast(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
+ ;
+
+ return Reg;
+}
+
+bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ Register SrcReg =
+ peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
+
+ MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+ if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
+ SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
+ SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
+ return false;
+
+ // Check the source type of the merge.
+ LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
+ LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
+ if (SrcMergeTy != Dst0Ty && !SameSize)
+ return false;
+ // They are the same now (modulo a bitcast).
+ // We can collect all the src registers.
+ for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
+ ++Idx)
+ Operands.push_back(SrcInstr->getOperand(Idx).getReg());
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Operands.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+
+ LLT SrcTy = MRI.getType(Operands[0]);
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ bool CanReuseInputDirectly = DstTy == SrcTy;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Register SrcReg = Operands[Idx];
+ if (CanReuseInputDirectly)
+ replaceRegWith(MRI, DstReg, SrcReg);
+ else
+ Builder.buildCast(DstReg, SrcReg);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ unsigned SrcIdx = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+ if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
+ SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
+ return false;
+ // Break down the big constant in smaller ones.
+ const MachineOperand &CstVal = SrcInstr->getOperand(1);
+ APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
+ ? CstVal.getCImm()->getValue()
+ : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+
+ LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned ShiftAmt = Dst0Ty.getSizeInBits();
+ // Unmerge a constant.
+ for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
+ Csts.emplace_back(Val.trunc(ShiftAmt));
+ Val = Val.lshr(ShiftAmt);
+ }
+
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Csts.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Builder.buildConstant(DstReg, Csts[Idx]);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ // Check that all the lanes are dead except the first one.
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
+ return false;
+ }
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ Builder.setInstrAndDebugLoc(MI);
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ // Truncating a vector is going to truncate every single lane,
+ // whereas we want the full lowbits.
+ // Do the operation on a scalar instead.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ SrcReg =
+ Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ if (Dst0Ty.isVector()) {
+ auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
+ Builder.buildCast(Dst0Reg, MIB);
+ } else
+ Builder.buildTrunc(Dst0Reg, SrcReg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ // G_ZEXT on vector applies to each lane, so it will
+ // affect all destinations. Therefore we won't be able
+ // to simplify the unmerge to just the first definition.
+ if (Dst0Ty.isVector())
+ return false;
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ return false;
+
+ Register ZExtSrcReg;
+ if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
+ return false;
+
+ // Finally we can replace the first definition with
+ // a zext of the source if the definition is big enough to hold
+ // all of ZExtSrc bits.
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+ return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
+}
+
+bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+
+ MachineInstr *ZExtInstr =
+ MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
+ assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
+ "Expecting a G_ZEXT");
+
+ Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+
+ if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
+ Builder.buildZExt(Dst0Reg, ZExtSrcReg);
+ } else {
+ assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
+ "ZExt src doesn't fit in destination");
+ replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
+ }
+
+ Register ZeroReg;
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!ZeroReg)
+ ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
+ replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
unsigned TargetShiftSize,
unsigned &ShiftVal) {
@@ -1440,7 +2088,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
if (!MaybeImmVal)
return false;
- ShiftVal = MaybeImmVal->Value;
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
return ShiftVal >= Size / 2 && ShiftVal < Size;
}
@@ -1529,6 +2177,296 @@ bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
return false;
}
+bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI,
+ m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
+}
+
+bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildCopy(DstReg, Reg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
+}
+
+bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildZExtOrTrunc(DstReg, Reg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ADD);
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT IntTy = MRI.getType(LHS);
+
+ // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
+ // instruction.
+ PtrReg.second = false;
+ for (Register SrcReg : {LHS, RHS}) {
+ if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
+ // Don't handle cases where the integer is implicitly converted to the
+ // pointer width.
+ LLT PtrTy = MRI.getType(PtrReg.first);
+ if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
+ return true;
+ }
+
+ PtrReg.second = true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ const bool DoCommute = PtrReg.second;
+ if (DoCommute)
+ std::swap(LHS, RHS);
+ LHS = PtrReg.first;
+
+ LLT PtrTy = MRI.getType(LHS);
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
+ Builder.buildPtrToInt(Dst, PtrAdd);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
+ int64_t &NewCst) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
+
+ if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
+ int64_t Cst;
+ if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
+ NewCst = Cst + *RHSCst;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
+ int64_t &NewCst) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+ Register Dst = MI.getOperand(0).getReg();
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildConstant(Dst, NewCst);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ return mi_match(SrcReg, MRI,
+ m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
+}
+
+bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, Reg);
+ return true;
+}
+
+bool CombinerHelper::matchCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
+ unsigned Opc = MI.getOpcode();
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (Opc == SrcOpc ||
+ (Opc == TargetOpcode::G_ANYEXT &&
+ (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
+ (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
+ MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+
+ Register Reg = std::get<0>(MatchInfo);
+ unsigned SrcExtOp = std::get<1>(MatchInfo);
+
+ // Combine exts with the same opcode.
+ if (MI.getOpcode() == SrcExtOp) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Reg);
+ Observer.changedInstr(MI);
+ return true;
+ }
+
+ // Combine:
+ // - anyext([sz]ext x) to [sz]ext x
+ // - sext(zext x) to zext x
+ if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ (MI.getOpcode() == TargetOpcode::G_SEXT &&
+ SrcExtOp == TargetOpcode::G_ZEXT)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
+ MI.getFlags());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
+}
+
+bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Src = MI.getOperand(1).getReg();
+ Register AbsSrc;
+ return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
+}
+
+bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Register Dst = MI.getOperand(0).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, Dst, Src);
+ return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
+ SrcOpc == TargetOpcode::G_ZEXT) {
+ MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MatchInfo.first;
+ unsigned SrcExtOp = MatchInfo.second;
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(DstReg);
+ if (SrcTy == DstTy) {
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, SrcReg);
+ return true;
+ }
+ Builder.setInstrAndDebugLoc(MI);
+ if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
+ Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
+ else
+ Builder.buildTrunc(DstReg, SrcReg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfShl(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register ShiftSrc;
+ Register ShiftAmt;
+
+ if (MRI.hasOneNonDBGUse(SrcReg) &&
+ mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
+ isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SHL,
+ {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
+ KnownBits Known = KB->getKnownBits(ShiftAmt);
+ unsigned Size = DstTy.getSizeInBits();
+ if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfShl(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+
+ Register ShiftSrc = MatchInfo.first;
+ Register ShiftAmt = MatchInfo.second;
+ Builder.setInstrAndDebugLoc(MI);
+ auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
+ Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
return MO.isReg() &&
@@ -1555,6 +2493,22 @@ bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
MRI);
}
+bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ if (auto MaybeCstCmp =
+ getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
+ OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
+ return true;
+ }
+ return false;
+}
+
bool CombinerHelper::eraseInst(MachineInstr &MI) {
MI.eraseFromParent();
return true;
@@ -1651,6 +2605,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
return true;
}
+bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+ Register Replacement) {
+ assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+ Register OldReg = MI.getOperand(0).getReg();
+ assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+ MI.eraseFromParent();
+ replaceRegWith(MRI, OldReg, Replacement);
+ return true;
+}
+
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SELECT);
// Match (cond ? x : x)
@@ -1671,6 +2635,18 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
MRI);
}
+bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return MO.isReg() &&
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+}
+
+bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
+}
+
bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
@@ -1706,9 +2682,7 @@ bool CombinerHelper::matchSimplifyAddToSub(
// ((0-A) + B) -> B - A
// (A + (0-B)) -> A - B
auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
- int64_t Cst;
- if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
- Cst != 0)
+ if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
return false;
NewLHS = MaybeNewLHS;
return true;
@@ -1717,6 +2691,67 @@ bool CombinerHelper::matchSimplifyAddToSub(
return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
}
+bool CombinerHelper::matchCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
+ "Invalid opcode");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
+ unsigned NumElts = DstTy.getNumElements();
+ // If this MI is part of a sequence of insert_vec_elts, then
+ // don't do the combine in the middle of the sequence.
+ if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
+ TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ MachineInstr *CurrInst = &MI;
+ MachineInstr *TmpInst;
+ int64_t IntImm;
+ Register TmpReg;
+ MatchInfo.resize(NumElts);
+ while (mi_match(
+ CurrInst->getOperand(0).getReg(), MRI,
+ m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
+ if (IntImm >= NumElts)
+ return false;
+ if (!MatchInfo[IntImm])
+ MatchInfo[IntImm] = TmpReg;
+ CurrInst = TmpInst;
+ }
+ // Variable index.
+ if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+ for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
+ if (!MatchInfo[I - 1].isValid())
+ MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
+ }
+ return true;
+ }
+ // If we didn't end in a G_IMPLICIT_DEF, bail out.
+ return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+}
+
+bool CombinerHelper::applyCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ Builder.setInstr(MI);
+ Register UndefReg;
+ auto GetUndef = [&]() {
+ if (UndefReg)
+ return UndefReg;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
+ return UndefReg;
+ };
+ for (unsigned I = 0; I < MatchInfo.size(); ++I) {
+ if (!MatchInfo[I])
+ MatchInfo[I] = GetUndef();
+ }
+ Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::applySimplifyAddToSub(
MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
Builder.setInstr(MI);
@@ -1727,6 +2762,812 @@ bool CombinerHelper::applySimplifyAddToSub(
return true;
}
+bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
+ //
+ // Creates the new hand + logic instruction (but does not insert them.)
+ //
+ // On success, MatchInfo is populated with the new instructions. These are
+ // inserted in applyHoistLogicOpWithSameOpcodeHands.
+ unsigned LogicOpcode = MI.getOpcode();
+ assert(LogicOpcode == TargetOpcode::G_AND ||
+ LogicOpcode == TargetOpcode::G_OR ||
+ LogicOpcode == TargetOpcode::G_XOR);
+ MachineIRBuilder MIB(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ // Don't recompute anything.
+ if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
+ return false;
+
+ // Make sure we have (hand x, ...), (hand y, ...)
+ MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
+ MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
+ if (!LeftHandInst || !RightHandInst)
+ return false;
+ unsigned HandOpcode = LeftHandInst->getOpcode();
+ if (HandOpcode != RightHandInst->getOpcode())
+ return false;
+ if (!LeftHandInst->getOperand(1).isReg() ||
+ !RightHandInst->getOperand(1).isReg())
+ return false;
+
+ // Make sure the types match up, and if we're doing this post-legalization,
+ // we end up with legal types.
+ Register X = LeftHandInst->getOperand(1).getReg();
+ Register Y = RightHandInst->getOperand(1).getReg();
+ LLT XTy = MRI.getType(X);
+ LLT YTy = MRI.getType(Y);
+ if (XTy != YTy)
+ return false;
+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ return false;
+
+ // Optional extra source register.
+ Register ExtraHandOpSrcReg;
+ switch (HandOpcode) {
+ default:
+ return false;
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT: {
+ // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
+ break;
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_SHL: {
+ // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
+ MachineOperand &ZOp = LeftHandInst->getOperand(2);
+ if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
+ return false;
+ ExtraHandOpSrcReg = ZOp.getReg();
+ break;
+ }
+ }
+
+ // Record the steps to build the new instructions.
+ //
+ // Steps to build (logic x, y)
+ auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
+ OperandBuildSteps LogicBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
+ InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
+
+ // Steps to build hand (logic x, y), ...z
+ OperandBuildSteps HandBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
+ if (ExtraHandOpSrcReg.isValid())
+ HandBuildSteps.push_back(
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
+ InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
+
+ MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
+ return true;
+}
+
+bool CombinerHelper::applyBuildInstructionSteps(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ assert(MatchInfo.InstrsToBuild.size() &&
+ "Expected at least one instr to build?");
+ Builder.setInstr(MI);
+ for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
+ assert(InstrToBuild.Opcode && "Expected a valid opcode?");
+ assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
+ MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
+ for (auto &OperandFn : InstrToBuild.OperandFns)
+ OperandFn(Instr);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchAshrShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ int64_t ShlCst, AshrCst;
+ Register Src;
+ // FIXME: detect splat constant vectors.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
+ return false;
+ if (ShlCst != AshrCst)
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
+ return false;
+ MatchInfo = std::make_tuple(Src, ShlCst);
+ return true;
+}
+bool CombinerHelper::applyAshShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ Register Src;
+ int64_t ShiftAmt;
+ std::tie(Src, ShiftAmt) = MatchInfo;
+ unsigned Size = MRI.getType(Src).getScalarSizeInBits();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
+ Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_AND %x, %y
+ //
+ // Eliminate the G_AND when it is known that x & y == x or x & y == y.
+ //
+ // Patterns like this can appear as a result of legalization. E.g.
+ //
+ // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
+ // %one:_(s32) = G_CONSTANT i32 1
+ // %and:_(s32) = G_AND %cmp, %one
+ //
+ // In this case, G_ICMP only produces a single bit, so x & 1 == x.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ if (!KB)
+ return false;
+
+ Register AndDst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(AndDst);
+
+ // FIXME: This should be removed once GISelKnownBits supports vectors.
+ if (DstTy.isVector())
+ return false;
+
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x & Mask == x.
+ // x & 1 == x, always
+ // x & 0 == x, only if x is also 0
+ // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
+ //
+ // Check if we can replace AndDst with the LHS of the G_AND
+ if (canReplaceReg(AndDst, LHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace AndDst with the RHS of the G_AND
+ if (canReplaceReg(AndDst, RHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_OR %x, %y
+ //
+ // Eliminate the G_OR when it is known that x | y == x or x | y == y.
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ if (!KB)
+ return false;
+
+ Register OrDst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(OrDst);
+
+ // FIXME: This should be removed once GISelKnownBits supports vectors.
+ if (DstTy.isVector())
+ return false;
+
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x | Mask == x.
+ // x | 0 == x, always
+ // x | 1 == x, only if x is also 1
+ // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
+ //
+ // Check if we can replace OrDst with the LHS of the G_OR
+ if (canReplaceReg(OrDst, LHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace OrDst with the RHS of the G_OR
+ if (canReplaceReg(OrDst, RHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
+ // If the input is already sign extended, just drop the extension.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned ExtBits = MI.getOperand(2).getImm();
+ unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
+ return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
+}
+
+static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
+ int64_t Cst, bool IsVector, bool IsFP) {
+ // For i1, Cst will always be -1 regardless of boolean contents.
+ return (ScalarSizeBits == 1 && Cst == -1) ||
+ isConstTrueVal(TLI, Cst, IsVector, IsFP);
+}
+
+bool CombinerHelper::matchNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
+ Register XorSrc;
+ Register CstReg;
+ // We match xor(src, true) here.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
+ return false;
+
+ if (!MRI.hasOneNonDBGUse(XorSrc))
+ return false;
+
+ // Check that XorSrc is the root of a tree of comparisons combined with ANDs
+ // and ORs. The suffix of RegsToNegate starting from index I is used a work
+ // list of tree nodes to visit.
+ RegsToNegate.push_back(XorSrc);
+ // Remember whether the comparisons are all integer or all floating point.
+ bool IsInt = false;
+ bool IsFP = false;
+ for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
+ Register Reg = RegsToNegate[I];
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return false;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ switch (Def->getOpcode()) {
+ default:
+ // Don't match if the tree contains anything other than ANDs, ORs and
+ // comparisons.
+ return false;
+ case TargetOpcode::G_ICMP:
+ if (IsFP)
+ return false;
+ IsInt = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_FCMP:
+ if (IsInt)
+ return false;
+ IsFP = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ // Implement De Morgan's laws:
+ // ~(x & y) -> ~x | ~y
+ // ~(x | y) -> ~x & ~y
+ // When we apply the combine we will change the opcode and recursively
+ // negate the operands.
+ RegsToNegate.push_back(Def->getOperand(1).getReg());
+ RegsToNegate.push_back(Def->getOperand(2).getReg());
+ break;
+ }
+ }
+
+ // Now we know whether the comparisons are integer or floating point, check
+ // the constant in the xor.
+ int64_t Cst;
+ if (Ty.isVector()) {
+ MachineInstr *CstDef = MRI.getVRegDef(CstReg);
+ auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
+ if (!MaybeCst)
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
+ return false;
+ } else {
+ if (!mi_match(CstReg, MRI, m_ICst(Cst)))
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
+ return false;
+ }
+
+ return true;
+}
+
+bool CombinerHelper::applyNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ for (Register Reg : RegsToNegate) {
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ Observer.changingInstr(*Def);
+ // For each comparison, invert the opcode. For each AND and OR, change the
+ // opcode.
+ switch (Def->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP: {
+ MachineOperand &PredOp = Def->getOperand(1);
+ CmpInst::Predicate NewP = CmpInst::getInversePredicate(
+ (CmpInst::Predicate)PredOp.getPredicate());
+ PredOp.setPredicate(NewP);
+ break;
+ }
+ case TargetOpcode::G_AND:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
+ break;
+ case TargetOpcode::G_OR:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ break;
+ }
+ Observer.changedInstr(*Def);
+ }
+
+ replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Match (xor (and x, y), y) (or any of its commuted cases)
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ Register &X = MatchInfo.first;
+ Register &Y = MatchInfo.second;
+ Register AndReg = MI.getOperand(1).getReg();
+ Register SharedReg = MI.getOperand(2).getReg();
+
+ // Find a G_AND on either side of the G_XOR.
+ // Look for one of
+ //
+ // (xor (and x, y), SharedReg)
+ // (xor SharedReg, (and x, y))
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
+ std::swap(AndReg, SharedReg);
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
+ return false;
+ }
+
+ // Only do this if we'll eliminate the G_AND.
+ if (!MRI.hasOneNonDBGUse(AndReg))
+ return false;
+
+ // We can combine if SharedReg is the same as either the LHS or RHS of the
+ // G_AND.
+ if (Y != SharedReg)
+ std::swap(X, Y);
+ return Y == SharedReg;
+}
+
+bool CombinerHelper::applyXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Fold (xor (and x, y), y) -> (and (not x), y)
+ Builder.setInstrAndDebugLoc(MI);
+ Register X, Y;
+ std::tie(X, Y) = MatchInfo;
+ auto Not = Builder.buildNot(MRI.getType(X), X);
+ Observer.changingInstr(MI);
+ MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ MI.getOperand(1).setReg(Not->getOperand(0).getReg());
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ const DataLayout &DL = Builder.getMF().getDataLayout();
+
+ if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
+ return false;
+
+ if (Ty.isPointer()) {
+ auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+ return ConstVal && *ConstVal == 0;
+ }
+
+ assert(Ty.isVector() && "Expecting a vector type");
+ const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ return isBuildVectorAllZeros(*VecMI, MRI);
+}
+
+bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
+ MI.eraseFromParent();
+ return true;
+}
+
+/// The second source operand is known to be a power of 2.
+bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Pow2Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ Builder.setInstrAndDebugLoc(MI);
+
+ // Fold (urem x, pow2) -> (and x, pow2-1)
+ auto NegOne = Builder.buildConstant(Ty, -1);
+ auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
+ Builder.buildAnd(DstReg, Src0, Add);
+ MI.eraseFromParent();
+ return true;
+}
+
+Optional<SmallVector<Register, 8>>
+CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
+ assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
+ // We want to detect if Root is part of a tree which represents a bunch
+ // of loads being merged into a larger load. We'll try to recognize patterns
+ // like, for example:
+ //
+ // Reg Reg
+ // \ /
+ // OR_1 Reg
+ // \ /
+ // OR_2
+ // \ Reg
+ // .. /
+ // Root
+ //
+ // Reg Reg Reg Reg
+ // \ / \ /
+ // OR_1 OR_2
+ // \ /
+ // \ /
+ // ...
+ // Root
+ //
+ // Each "Reg" may have been produced by a load + some arithmetic. This
+ // function will save each of them.
+ SmallVector<Register, 8> RegsToVisit;
+ SmallVector<const MachineInstr *, 7> Ors = {Root};
+
+ // In the "worst" case, we're dealing with a load for each byte. So, there
+ // are at most #bytes - 1 ORs.
+ const unsigned MaxIter =
+ MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
+ for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
+ if (Ors.empty())
+ break;
+ const MachineInstr *Curr = Ors.pop_back_val();
+ Register OrLHS = Curr->getOperand(1).getReg();
+ Register OrRHS = Curr->getOperand(2).getReg();
+
+ // In the combine, we want to elimate the entire tree.
+ if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
+ return None;
+
+ // If it's a G_OR, save it and continue to walk. If it's not, then it's
+ // something that may be a load + arithmetic.
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrLHS);
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrRHS);
+ }
+
+ // We're going to try and merge each register into a wider power-of-2 type,
+ // so we ought to have an even number of registers.
+ if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
+ return None;
+ return RegsToVisit;
+}
+
+/// Helper function for findLoadOffsetsForLoadOrCombine.
+///
+/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
+/// and then moving that value into a specific byte offset.
+///
+/// e.g. x[i] << 24
+///
+/// \returns The load instruction and the byte offset it is moved into.
+static Optional<std::pair<MachineInstr *, int64_t>>
+matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
+ const MachineRegisterInfo &MRI) {
+ assert(MRI.hasOneNonDBGUse(Reg) &&
+ "Expected Reg to only have one non-debug use?");
+ Register MaybeLoad;
+ int64_t Shift;
+ if (!mi_match(Reg, MRI,
+ m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
+ Shift = 0;
+ MaybeLoad = Reg;
+ }
+
+ if (Shift % MemSizeInBits != 0)
+ return None;
+
+ // TODO: Handle other types of loads.
+ auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
+ if (!Load)
+ return None;
+
+ const auto &MMO = **Load->memoperands_begin();
+ if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
+ return None;
+
+ return std::make_pair(Load, Shift / MemSizeInBits);
+}
+
+Optional<std::pair<MachineInstr *, int64_t>>
+CombinerHelper::findLoadOffsetsForLoadOrCombine(
+ SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
+
+ // Each load found for the pattern. There should be one for each RegsToVisit.
+ SmallSetVector<const MachineInstr *, 8> Loads;
+
+ // The lowest index used in any load. (The lowest "i" for each x[i].)
+ int64_t LowestIdx = INT64_MAX;
+
+ // The load which uses the lowest index.
+ MachineInstr *LowestIdxLoad = nullptr;
+
+ // Keeps track of the load indices we see. We shouldn't see any indices twice.
+ SmallSet<int64_t, 8> SeenIdx;
+
+ // Ensure each load is in the same MBB.
+ // TODO: Support multiple MachineBasicBlocks.
+ MachineBasicBlock *MBB = nullptr;
+ const MachineMemOperand *MMO = nullptr;
+
+ // Earliest instruction-order load in the pattern.
+ MachineInstr *EarliestLoad = nullptr;
+
+ // Latest instruction-order load in the pattern.
+ MachineInstr *LatestLoad = nullptr;
+
+ // Base pointer which every load should share.
+ Register BasePtr;
+
+ // We want to find a load for each register. Each load should have some
+ // appropriate bit twiddling arithmetic. During this loop, we will also keep
+ // track of the load which uses the lowest index. Later, we will check if we
+ // can use its pointer in the final, combined load.
+ for (auto Reg : RegsToVisit) {
+ // Find the load, and find the position that it will end up in (e.g. a
+ // shifted) value.
+ auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
+ if (!LoadAndPos)
+ return None;
+ MachineInstr *Load;
+ int64_t DstPos;
+ std::tie(Load, DstPos) = *LoadAndPos;
+
+ // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
+ // it is difficult to check for stores/calls/etc between loads.
+ MachineBasicBlock *LoadMBB = Load->getParent();
+ if (!MBB)
+ MBB = LoadMBB;
+ if (LoadMBB != MBB)
+ return None;
+
+ // Make sure that the MachineMemOperands of every seen load are compatible.
+ const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
+ if (!MMO)
+ MMO = LoadMMO;
+ if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
+ return None;
+
+ // Find out what the base pointer and index for the load is.
+ Register LoadPtr;
+ int64_t Idx;
+ if (!mi_match(Load->getOperand(1).getReg(), MRI,
+ m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
+ LoadPtr = Load->getOperand(1).getReg();
+ Idx = 0;
+ }
+
+ // Don't combine things like a[i], a[i] -> a bigger load.
+ if (!SeenIdx.insert(Idx).second)
+ return None;
+
+ // Every load must share the same base pointer; don't combine things like:
+ //
+ // a[i], b[i + 1] -> a bigger load.
+ if (!BasePtr.isValid())
+ BasePtr = LoadPtr;
+ if (BasePtr != LoadPtr)
+ return None;
+
+ if (Idx < LowestIdx) {
+ LowestIdx = Idx;
+ LowestIdxLoad = Load;
+ }
+
+ // Keep track of the byte offset that this load ends up at. If we have seen
+ // the byte offset, then stop here. We do not want to combine:
+ //
+ // a[i] << 16, a[i + k] << 16 -> a bigger load.
+ if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
+ return None;
+ Loads.insert(Load);
+
+ // Keep track of the position of the earliest/latest loads in the pattern.
+ // We will check that there are no load fold barriers between them later
+ // on.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ if (!EarliestLoad || dominates(*Load, *EarliestLoad))
+ EarliestLoad = Load;
+ if (!LatestLoad || dominates(*LatestLoad, *Load))
+ LatestLoad = Load;
+ }
+
+ // We found a load for each register. Let's check if each load satisfies the
+ // pattern.
+ assert(Loads.size() == RegsToVisit.size() &&
+ "Expected to find a load for each register?");
+ assert(EarliestLoad != LatestLoad && EarliestLoad &&
+ LatestLoad && "Expected at least two loads?");
+
+ // Check if there are any stores, calls, etc. between any of the loads. If
+ // there are, then we can't safely perform the combine.
+ //
+ // MaxIter is chosen based off the (worst case) number of iterations it
+ // typically takes to succeed in the LLVM test suite plus some padding.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ const unsigned MaxIter = 20;
+ unsigned Iter = 0;
+ for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
+ LatestLoad->getIterator())) {
+ if (Loads.count(&MI))
+ continue;
+ if (MI.isLoadFoldBarrier())
+ return None;
+ if (Iter++ == MaxIter)
+ return None;
+ }
+
+ return std::make_pair(LowestIdxLoad, LowestIdx);
+}
+
+bool CombinerHelper::matchLoadOrCombine(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ MachineFunction &MF = *MI.getMF();
+ // Assuming a little-endian target, transform:
+ // s8 *a = ...
+ // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+ // =>
+ // s32 val = *((i32)a)
+ //
+ // s8 *a = ...
+ // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+ // =>
+ // s32 val = BSWAP(*((s32)a))
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (Ty.isVector())
+ return false;
+
+ // We need to combine at least two loads into this type. Since the smallest
+ // possible load is into a byte, we need at least a 16-bit wide type.
+ const unsigned WideMemSizeInBits = Ty.getSizeInBits();
+ if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
+ return false;
+
+ // Match a collection of non-OR instructions in the pattern.
+ auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
+ if (!RegsToVisit)
+ return false;
+
+ // We have a collection of non-OR instructions. Figure out how wide each of
+ // the small loads should be based off of the number of potential loads we
+ // found.
+ const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
+ if (NarrowMemSizeInBits % 8 != 0)
+ return false;
+
+ // Check if each register feeding into each OR is a load from the same
+ // base pointer + some arithmetic.
+ //
+ // e.g. a[0], a[1] << 8, a[2] << 16, etc.
+ //
+ // Also verify that each of these ends up putting a[i] into the same memory
+ // offset as a load into a wide type would.
+ SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
+ MachineInstr *LowestIdxLoad;
+ int64_t LowestIdx;
+ auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
+ MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
+ if (!MaybeLoadInfo)
+ return false;
+ std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
+
+ // We have a bunch of loads being OR'd together. Using the addresses + offsets
+ // we found before, check if this corresponds to a big or little endian byte
+ // pattern. If it does, then we can represent it using a load + possibly a
+ // BSWAP.
+ bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
+ Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
+ if (!IsBigEndian.hasValue())
+ return false;
+ bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
+ if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
+ return false;
+
+ // Make sure that the load from the lowest index produces offset 0 in the
+ // final value.
+ //
+ // This ensures that we won't combine something like this:
+ //
+ // load x[i] -> byte 2
+ // load x[i+1] -> byte 0 ---> wide_load x[i]
+ // load x[i+2] -> byte 1
+ const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
+ const unsigned ZeroByteOffset =
+ *IsBigEndian
+ ? bigEndianByteAt(NumLoadsInTy, 0)
+ : littleEndianByteAt(NumLoadsInTy, 0);
+ auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
+ if (ZeroOffsetIdx == MemOffset2Idx.end() ||
+ ZeroOffsetIdx->second != LowestIdx)
+ return false;
+
+ // We wil reuse the pointer from the load which ends up at byte offset 0. It
+ // may not use index 0.
+ Register Ptr = LowestIdxLoad->getOperand(1).getReg();
+ const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
+ LegalityQuery::MemDesc MMDesc;
+ MMDesc.SizeInBits = WideMemSizeInBits;
+ MMDesc.AlignInBits = MMO.getAlign().value() * 8;
+ MMDesc.Ordering = MMO.getOrdering();
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
+ return false;
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
+
+ // Load must be allowed and fast on the target.
+ LLVMContext &C = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ bool Fast = false;
+ if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
+ !Fast)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
+ MIB.buildLoad(LoadDst, Ptr, *NewMMO);
+ if (NeedsBSwap)
+ MIB.buildBSwap(Dst, LoadDst);
+ };
+ return true;
+}
+
+bool CombinerHelper::applyLoadOrCombine(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ MatchInfo(Builder);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
index bdaa6378e901..59f4d60a41d8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -16,7 +16,7 @@
using namespace llvm;
void GISelChangeObserver::changingAllUsesOfReg(
- const MachineRegisterInfo &MRI, unsigned Reg) {
+ const MachineRegisterInfo &MRI, Register Reg) {
for (auto &ChangingMI : MRI.use_instructions(Reg)) {
changingInstr(ChangingMI);
ChangingAllUsesOfReg.insert(&ChangingMI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 0e9c6e4fab9f..2de20489e1d1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -11,7 +11,6 @@
//
//===------------------
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -95,6 +94,25 @@ dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
<< "\n";
}
+/// Compute known bits for the intersection of \p Src0 and \p Src1
+void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ // Test src1 first, since we canonicalize simpler expressions to the RHS.
+ computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ return;
+
+ KnownBits Known2;
+ computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
+
+ // Only known if known in both the LHS and RHS.
+ Known = KnownBits::commonBits(Known, Known2);
+}
+
void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts,
unsigned Depth) {
@@ -182,8 +200,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
// For COPYs we don't do anything, don't increase the depth.
computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
// If we reach a point where we don't know anything
// just stop looking through the operands.
if (Known.One == 0 && Known.Zero == 0)
@@ -200,8 +217,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
auto CstVal = getConstantVRegVal(R, MRI);
if (!CstVal)
break;
- Known.One = *CstVal;
- Known.Zero = ~Known.One;
+ Known = KnownBits::makeConstant(*CstVal);
break;
}
case TargetOpcode::G_FRAME_INDEX: {
@@ -268,33 +284,50 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
Depth + 1);
- // If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conservative estimate for high known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- unsigned TrailZ =
- Known.countMinTrailingZeros() + Known2.countMinTrailingZeros();
- unsigned LeadZ =
- std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(),
- BitWidth) -
- BitWidth;
-
- Known.resetAll();
- Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
- Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
+ Known = KnownBits::computeForMul(Known, Known2);
break;
}
case TargetOpcode::G_SELECT: {
- computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts,
+ computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
+ Known, DemandedElts, Depth + 1);
+ break;
+ }
+ case TargetOpcode::G_SMIN: {
+ // TODO: Handle clamp pattern with number of sign bits
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
- // If we don't know any bits, early out.
- if (Known.isUnknown())
- break;
- computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
Depth + 1);
- // Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::smin(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_SMAX: {
+ // TODO: Handle clamp pattern with number of sign bits
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::smax(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_UMIN: {
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+ DemandedElts, Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+ DemandedElts, Depth + 1);
+ Known = KnownBits::umin(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_UMAX: {
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+ DemandedElts, Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+ DemandedElts, Depth + 1);
+ Known = KnownBits::umax(Known, KnownRHS);
break;
}
case TargetOpcode::G_FCMP:
@@ -314,61 +347,56 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = Known.sext(BitWidth);
break;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ Known = Known.sextInReg(MI.getOperand(2).getImm());
+ break;
+ }
case TargetOpcode::G_ANYEXT: {
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
- Known = Known.zext(BitWidth);
+ Known = Known.anyext(BitWidth);
break;
}
case TargetOpcode::G_LOAD: {
- if (MI.hasOneMemOperand()) {
- const MachineMemOperand *MMO = *MI.memoperands_begin();
- if (const MDNode *Ranges = MMO->getRanges()) {
- computeKnownBitsFromRangeMetadata(*Ranges, Known);
- }
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ if (const MDNode *Ranges = MMO->getRanges()) {
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
}
+
break;
}
case TargetOpcode::G_ZEXTLOAD: {
// Everything above the retrieved bits is zero
- if (MI.hasOneMemOperand())
- Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
+ Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
break;
}
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_SHL: {
- KnownBits RHSKnown;
+ case TargetOpcode::G_ASHR: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+ Depth + 1);
computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
Depth + 1);
- if (!RHSKnown.isConstant()) {
- LLVM_DEBUG(
- MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg());
- dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI);
- break;
- }
- uint64_t Shift = RHSKnown.getConstant().getZExtValue();
- LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n');
-
- computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Known = KnownBits::ashr(LHSKnown, RHSKnown);
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
Depth + 1);
-
- switch (Opcode) {
- case TargetOpcode::G_ASHR:
- Known.Zero = Known.Zero.ashr(Shift);
- Known.One = Known.One.ashr(Shift);
- break;
- case TargetOpcode::G_LSHR:
- Known.Zero = Known.Zero.lshr(Shift);
- Known.One = Known.One.lshr(Shift);
- Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift);
- break;
- case TargetOpcode::G_SHL:
- Known.Zero = Known.Zero.shl(Shift);
- Known.One = Known.One.shl(Shift);
- Known.Zero.setBits(0, Shift);
- break;
- }
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::lshr(LHSKnown, RHSKnown);
+ break;
+ }
+ case TargetOpcode::G_SHL: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::shl(LHSKnown, RHSKnown);
break;
}
case TargetOpcode::G_INTTOPTR:
@@ -390,6 +418,48 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero.setBitsFrom(SrcBitWidth);
break;
}
+ case TargetOpcode::G_MERGE_VALUES: {
+ unsigned NumOps = MI.getNumOperands();
+ unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+
+ for (unsigned I = 0; I != NumOps - 1; ++I) {
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
+ DemandedElts, Depth + 1);
+ Known.insertBits(SrcOpKnown, I * OpSize);
+ }
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ unsigned NumOps = MI.getNumOperands();
+ Register SrcReg = MI.getOperand(NumOps - 1).getReg();
+ if (MRI.getType(SrcReg).isVector())
+ return; // TODO: Handle vectors.
+
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
+
+ // Figure out the result operand index
+ unsigned DstIdx = 0;
+ for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
+ ++DstIdx)
+ ;
+
+ Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
+ break;
+ }
+ case TargetOpcode::G_BSWAP: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known.byteSwap();
+ break;
+ }
+ case TargetOpcode::G_BITREVERSE: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known.reverseBits();
+ break;
+ }
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -399,6 +469,17 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
ComputeKnownBitsCache[R] = Known;
}
+/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
+unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ // Test src1 first, since we canonicalize simpler expressions to the RHS.
+ unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
+ if (Src1SignBits == 1)
+ return 1;
+ return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
+}
+
unsigned GISelKnownBits::computeNumSignBits(Register R,
const APInt &DemandedElts,
unsigned Depth) {
@@ -442,15 +523,30 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ // Max of the input and what this extends.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned SrcBits = MI.getOperand(2).getImm();
+ unsigned InRegBits = TyBits - SrcBits + 1;
+ return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
+ }
case TargetOpcode::G_SEXTLOAD: {
- Register Dst = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(Dst);
- // TODO: add vector support
- if (Ty.isVector())
- break;
- if (MI.hasOneMemOperand())
- return Ty.getSizeInBits() - (*MI.memoperands_begin())->getSizeInBits();
- break;
+ // FIXME: We need an in-memory type representation.
+ if (DstTy.isVector())
+ return 1;
+
+ // e.g. i16->i32 = '17' bits known.
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return TyBits - MMO->getSizeInBits() + 1;
+ }
+ case TargetOpcode::G_ZEXTLOAD: {
+ // FIXME: We need an in-memory type representation.
+ if (DstTy.isVector())
+ return 1;
+
+ // e.g. i16->i32 = '16' bits known.
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return TyBits - MMO->getSizeInBits();
}
case TargetOpcode::G_TRUNC: {
Register Src = MI.getOperand(1).getReg();
@@ -464,6 +560,11 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
return NumSrcSignBits - (NumSrcBits - DstTyBits);
break;
}
+ case TargetOpcode::G_SELECT: {
+ return computeNumSignBitsMin(MI.getOperand(2).getReg(),
+ MI.getOperand(3).getReg(), DemandedElts,
+ Depth + 1);
+ }
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
default: {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 8f6643b2f193..b7883cbc3120 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -29,9 +29,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -48,11 +50,13 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -70,6 +74,7 @@
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
+#include <cstddef>
#include <cstdint>
#include <iterator>
#include <string>
@@ -90,6 +95,8 @@ INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
@@ -110,7 +117,8 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator() : MachineFunctionPass(ID) { }
+IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
+ : MachineFunctionPass(ID), OptLevel(optlevel) {}
#ifndef NDEBUG
namespace {
@@ -154,13 +162,17 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ if (OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
IRTranslator::ValueToVRegInfo::VRegListT &
IRTranslator::allocateVRegs(const Value &Val) {
- assert(!VMap.contains(Val) && "Value already allocated in VMap");
+ auto VRegsIt = VMap.findVRegs(Val);
+ if (VRegsIt != VMap.vregs_end())
+ return *VRegsIt->second;
auto *Regs = VMap.getVRegs(Val);
auto *Offsets = VMap.getOffsets(Val);
SmallVector<LLT, 4> SplitTys;
@@ -222,8 +234,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
}
int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
- if (FrameIndices.find(&AI) != FrameIndices.end())
- return FrameIndices[&AI];
+ auto MapEntry = FrameIndices.find(&AI);
+ if (MapEntry != FrameIndices.end())
+ return MapEntry->second;
uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
uint64_t Size =
@@ -293,25 +306,8 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
return true;
}
-bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
- // -0.0 - X --> G_FNEG
- if (isa<Constant>(U.getOperand(0)) &&
- U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
- Register Op1 = getOrCreateVReg(*U.getOperand(1));
- Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
- if (isa<Instruction>(U)) {
- const Instruction &I = cast<Instruction>(U);
- Flags = MachineInstr::copyFlagsFromInstruction(I);
- }
- // Negate the last operand of the FSUB
- MIRBuilder.buildFNeg(Res, Op1, Flags);
- return true;
- }
- return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
-}
-
-bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder) {
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
uint16_t Flags = 0;
@@ -319,10 +315,14 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
}
- MIRBuilder.buildFNeg(Res, Op0, Flags);
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
return true;
}
+bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+ return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
+}
+
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
auto *CI = dyn_cast<CmpInst>(&U);
@@ -368,31 +368,289 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
- return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg);
+ return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
+}
+
+void IRTranslator::emitBranchForMergedCondition(
+ const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+ BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ CmpInst::Predicate Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
+ Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ }
+
+ SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
+ BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
+ CurBuilder->getDebugLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+ return;
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
+ SwitchCG::CaseBlock CB(
+ Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
+ nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+}
+
+static bool isValInBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+void IRTranslator::findMergedConditions(
+ const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond) {
+ using namespace PatternMatch;
+ assert((Opc == Instruction::And || Opc == Instruction::Or) &&
+ "Expected Opc to be AND/OR");
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ Value *NotCond;
+ if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+ isValInBlock(NotCond, CurBB->getBasicBlock())) {
+ findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ const Value *BOpOp0, *BOpOp1;
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
+ if (BOp) {
+ BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::And
+ : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::Or
+ : (Instruction::BinaryOps)0);
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
+ // Note that all nodes in the tree should have same opcode.
+ bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
+ if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
+ !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
+ !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
+ emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
+ InvertCond);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI(CurBB);
+ MachineBasicBlock *TmpBB =
+ MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
+ // Emit the LHS condition.
+ findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
+ // Emit the LHS condition.
+ findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ }
+}
+
+bool IRTranslator::shouldEmitAsBranches(
+ const std::vector<SwitchCG::CaseBlock> &Cases) {
+ // For multiple cases, it's better to emit as branches.
+ if (Cases.size() != 2)
+ return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
+ Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
+ Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
}
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
const BranchInst &BrInst = cast<BranchInst>(U);
- unsigned Succ = 0;
- if (!BrInst.isUnconditional()) {
- // We want a G_BRCOND to the true BB followed by an unconditional branch.
- Register Tst = getOrCreateVReg(*BrInst.getCondition());
- const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
- MachineBasicBlock &TrueBB = getMBB(TrueTgt);
- MIRBuilder.buildBrCond(Tst, TrueBB);
+ auto &CurMBB = MIRBuilder.getMBB();
+ auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
+
+ if (BrInst.isUnconditional()) {
+ // If the unconditional target is the layout successor, fallthrough.
+ if (!CurMBB.isLayoutSuccessor(Succ0MBB))
+ MIRBuilder.buildBr(*Succ0MBB);
+
+ // Link successors.
+ for (const BasicBlock *Succ : successors(&BrInst))
+ CurMBB.addSuccessor(&getMBB(*Succ));
+ return true;
}
- const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
- MachineBasicBlock &TgtBB = getMBB(BrTgt);
- MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = BrInst.getCondition();
+ MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
- // If the unconditional target is the layout successor, fallthrough.
- if (!CurBB.isLayoutSuccessor(&TgtBB))
- MIRBuilder.buildBr(TgtBB);
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
- // Link successors.
- for (const BasicBlock *Succ : successors(&BrInst))
- CurBB.addSuccessor(&getMBB(*Succ));
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive (exceptions for multi-use logic ops,
+ // unpredictable branches, and vector extracts because those jumps are likely
+ // expensive for any target), this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ using namespace PatternMatch;
+ const Instruction *CondI = dyn_cast<Instruction>(CondVal);
+ if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
+ !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
+ Value *Vec;
+ const Value *BOp0, *BOp1;
+ if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::And;
+ else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::Or;
+
+ if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+ findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
+ getEdgeProbability(&CurMBB, Succ0MBB),
+ getEdgeProbability(&CurMBB, Succ1MBB),
+ /*InvertCond=*/false);
+ assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (shouldEmitAsBranches(SL->SwitchCases)) {
+ // Emit the branch for this block.
+ emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
+ SL->SwitchCases.erase(SL->SwitchCases.begin());
+ return true;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
+ MF->erase(SL->SwitchCases[I].ThisBB);
+
+ SL->SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
+ ConstantInt::getTrue(MF->getFunction().getContext()),
+ nullptr, Succ0MBB, Succ1MBB, &CurMBB,
+ CurBuilder->getDebugLoc());
+
+ // Use emitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ emitSwitchCase(CB, &CurMBB, *CurBuilder);
return true;
}
@@ -457,6 +715,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
}
SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
+ SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
dbgs() << "Case clusters: ";
@@ -577,8 +836,22 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
const LLT i1Ty = LLT::scalar(1);
// Build the compare.
if (!CB.CmpMHS) {
- Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
- Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
+ // For conditional branch lowering, we might try to do something silly like
+ // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
+ // just re-use the existing condition vreg.
+ if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
+ CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+ Cond = CondLHS;
+ } else {
+ Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+ if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
+ Cond =
+ MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ else
+ Cond =
+ MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ }
} else {
assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
"Can only handle SLE ranges");
@@ -611,17 +884,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
CB.ThisBB->normalizeSuccProbs();
- // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock())
- addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
- CB.ThisBB);
-
- // If the lhs block is the next block, invert the condition so that we can
- // fall through to the lhs instead of the rhs block.
- if (CB.TrueBB == CB.ThisBB->getNextNode()) {
- std::swap(CB.TrueBB, CB.FalseBB);
- auto True = MIB.buildConstant(i1Ty, 1);
- Cond = MIB.buildXor(i1Ty, Cond, True).getReg(0);
- }
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
+ CB.ThisBB);
MIB.buildBrCond(Cond, *CB.TrueBB);
MIB.buildBr(*CB.FalseBB);
@@ -734,6 +998,156 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
return true;
}
+void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ MachineIRBuilder &MIB = *CurBuilder;
+ MIB.setMBB(*SwitchBB);
+
+ // Subtract the minimum value.
+ Register SwitchOpReg = getOrCreateVReg(*B.SValue);
+
+ LLT SwitchOpTy = MRI->getType(SwitchOpReg);
+ Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
+ auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
+
+ // Ensure that the type will fit the mask value.
+ LLT MaskTy = SwitchOpTy;
+ for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
+ if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ MaskTy = LLT::scalar(64);
+ break;
+ }
+ }
+ Register SubReg = RangeSub.getReg(0);
+ if (SwitchOpTy != MaskTy)
+ SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
+
+ B.RegVT = getMVTForLLT(MaskTy);
+ B.Reg = SubReg;
+
+ MachineBasicBlock *MBB = B.Cases[0].ThisBB;
+
+ if (!B.OmitRangeCheck)
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+
+ SwitchBB->normalizeSuccProbs();
+
+ if (!B.OmitRangeCheck) {
+ // Conditional branch to the default block.
+ auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
+ auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
+ RangeSub, RangeCst);
+ MIB.buildBrCond(RangeCmp, *B.Default);
+ }
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (MBB != SwitchBB->getNextNode())
+ MIB.buildBr(*MBB);
+}
+
+void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
+ MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext,
+ Register Reg, SwitchCG::BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ MachineIRBuilder &MIB = *CurBuilder;
+ MIB.setMBB(*SwitchBB);
+
+ LLT SwitchTy = getLLTForMVT(BB.RegVT);
+ Register Cmp;
+ unsigned PopCount = countPopulation(B.Mask);
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ auto MaskTrailingZeros =
+ MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
+ Cmp =
+ MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
+ .getReg(0);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ auto MaskTrailingOnes =
+ MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
+ .getReg(0);
+ } else {
+ // Make desired shift.
+ auto CstOne = MIB.buildConstant(SwitchTy, 1);
+ auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
+
+ // Emit bit tests and jumps.
+ auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
+ auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
+ auto CstZero = MIB.buildConstant(SwitchTy, 0);
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
+ .getReg(0);
+ }
+
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
+
+ // Record the fact that the IR edge from the header to the bit test target
+ // will go through our new block. Neeeded for PHIs to have nodes added.
+ addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
+ SwitchBB);
+
+ MIB.buildBrCond(Cmp, *B.TargetBB);
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (NextMBB != SwitchBB->getNextNode())
+ MIB.buildBr(*NextMBB);
+}
+
+bool IRTranslator::lowerBitTestWorkItem(
+ SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+ BranchProbability DefaultProb, BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = SwitchMBB->getParent();
+ // FIXME: Optimize away range check based on pivot comparisons.
+ BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
+ // The bit test blocks haven't been inserted yet; insert them here.
+ for (BitTestCase &BTC : BTB->Cases)
+ CurMF->insert(BBI, BTC.ThisBB);
+
+ // Fill in fields of the BitTestBlock.
+ BTB->Parent = CurMBB;
+ BTB->Default = Fallthrough;
+
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ BTB->OmitRangeCheck = true;
+ }
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
+ emitBitTestHeader(*BTB, SwitchMBB);
+ BTB->Emitted = true;
+ }
+ return true;
+}
+
bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
Value *Cond,
MachineBasicBlock *SwitchMBB,
@@ -794,9 +1208,15 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
switch (I->Kind) {
case CC_BitTests: {
- LLVM_DEBUG(dbgs() << "Switch to bit test optimization unimplemented");
- return false; // Bit tests currently unimplemented.
+ if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
+ DefaultProb, UnhandledProbs, I, Fallthrough,
+ FallthroughUnreachable)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
+ return false;
+ }
+ break;
}
+
case CC_JumpTable: {
if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
UnhandledProbs, I, Fallthrough,
@@ -1137,16 +1557,33 @@ bool IRTranslator::translateGetElementPtr(const User &U,
bool IRTranslator::translateMemFunc(const CallInst &CI,
MachineIRBuilder &MIRBuilder,
- Intrinsic::ID ID) {
+ unsigned Opcode) {
// If the source is undef, then just emit a nop.
if (isa<UndefValue>(CI.getArgOperand(1)))
return true;
- ArrayRef<Register> Res;
- auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true);
- for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI)
- ICall.addUse(getOrCreateVReg(**AI));
+ SmallVector<Register, 3> SrcRegs;
+
+ unsigned MinPtrSize = UINT_MAX;
+ for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
+ Register SrcReg = getOrCreateVReg(**AI);
+ LLT SrcTy = MRI->getType(SrcReg);
+ if (SrcTy.isPointer())
+ MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
+ SrcRegs.push_back(SrcReg);
+ }
+
+ LLT SizeTy = LLT::scalar(MinPtrSize);
+
+ // The size operand should be the minimum of the pointer sizes.
+ Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
+ if (MRI->getType(SizeOpReg) != SizeTy)
+ SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
+
+ auto ICall = MIRBuilder.buildInstr(Opcode);
+ for (Register SrcReg : SrcRegs)
+ ICall.addUse(SrcReg);
Align DstAlign;
Align SrcAlign;
@@ -1175,7 +1612,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
ICall.addMemOperand(MF->getMachineMemOperand(
MachinePointerInfo(CI.getArgOperand(0)),
MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
- if (ID != Intrinsic::memset)
+ if (Opcode != TargetOpcode::G_MEMSET)
ICall.addMemOperand(MF->getMachineMemOperand(
MachinePointerInfo(CI.getArgOperand(1)),
MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
@@ -1214,6 +1651,16 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
return true;
}
+bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
+ MachineIRBuilder &MIRBuilder) {
+ Register Dst = getOrCreateVReg(CI);
+ Register Src0 = getOrCreateVReg(*CI.getOperand(0));
+ Register Src1 = getOrCreateVReg(*CI.getOperand(1));
+ uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
+ MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
+ return true;
+}
+
unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
switch (ID) {
default:
@@ -1264,10 +1711,14 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FNEARBYINT;
case Intrinsic::pow:
return TargetOpcode::G_FPOW;
+ case Intrinsic::powi:
+ return TargetOpcode::G_FPOWI;
case Intrinsic::rint:
return TargetOpcode::G_FRINT;
case Intrinsic::round:
return TargetOpcode::G_INTRINSIC_ROUND;
+ case Intrinsic::roundeven:
+ return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
case Intrinsic::sin:
return TargetOpcode::G_FSIN;
case Intrinsic::sqrt:
@@ -1278,6 +1729,31 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_READCYCLECOUNTER;
case Intrinsic::ptrmask:
return TargetOpcode::G_PTRMASK;
+ case Intrinsic::lrint:
+ return TargetOpcode::G_INTRINSIC_LRINT;
+ // FADD/FMUL require checking the FMF, so are handled elsewhere.
+ case Intrinsic::vector_reduce_fmin:
+ return TargetOpcode::G_VECREDUCE_FMIN;
+ case Intrinsic::vector_reduce_fmax:
+ return TargetOpcode::G_VECREDUCE_FMAX;
+ case Intrinsic::vector_reduce_add:
+ return TargetOpcode::G_VECREDUCE_ADD;
+ case Intrinsic::vector_reduce_mul:
+ return TargetOpcode::G_VECREDUCE_MUL;
+ case Intrinsic::vector_reduce_and:
+ return TargetOpcode::G_VECREDUCE_AND;
+ case Intrinsic::vector_reduce_or:
+ return TargetOpcode::G_VECREDUCE_OR;
+ case Intrinsic::vector_reduce_xor:
+ return TargetOpcode::G_VECREDUCE_XOR;
+ case Intrinsic::vector_reduce_smax:
+ return TargetOpcode::G_VECREDUCE_SMAX;
+ case Intrinsic::vector_reduce_smin:
+ return TargetOpcode::G_VECREDUCE_SMIN;
+ case Intrinsic::vector_reduce_umax:
+ return TargetOpcode::G_VECREDUCE_UMAX;
+ case Intrinsic::vector_reduce_umin:
+ return TargetOpcode::G_VECREDUCE_UMIN;
}
return Intrinsic::not_intrinsic;
}
@@ -1370,7 +1846,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// Get the underlying objects for the location passed on the lifetime
// marker.
SmallVector<const Value *, 4> Allocas;
- GetUnderlyingObjects(CI.getArgOperand(1), Allocas, *DL);
+ getUnderlyingObjects(CI.getArgOperand(1), Allocas);
// Iterate over each underlying object, creating lifetime markers for each
// static alloca. Quit if we find a non-static alloca.
@@ -1484,6 +1960,37 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
case Intrinsic::ssub_sat:
return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
+ case Intrinsic::ushl_sat:
+ return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
+ case Intrinsic::sshl_sat:
+ return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
+ case Intrinsic::umin:
+ return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
+ case Intrinsic::umax:
+ return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
+ case Intrinsic::smin:
+ return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
+ case Intrinsic::smax:
+ return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
+ case Intrinsic::abs:
+ // TODO: Preserve "int min is poison" arg in GMIR?
+ return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
+ case Intrinsic::smul_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
+ case Intrinsic::umul_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
+ case Intrinsic::smul_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
+ case Intrinsic::umul_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
+ case Intrinsic::sdiv_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
+ case Intrinsic::udiv_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
+ case Intrinsic::sdiv_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
+ case Intrinsic::udiv_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
@@ -1507,10 +2014,24 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
}
return true;
}
+ case Intrinsic::convert_from_fp16:
+ // FIXME: This intrinsic should probably be removed from the IR.
+ MIRBuilder.buildFPExt(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ case Intrinsic::convert_to_fp16:
+ // FIXME: This intrinsic should probably be removed from the IR.
+ MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
case Intrinsic::memcpy:
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
case Intrinsic::memmove:
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
case Intrinsic::memset:
- return translateMemFunc(CI, MIRBuilder, ID);
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
Register Reg = getOrCreateVReg(CI);
@@ -1593,7 +2114,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
}
case Intrinsic::invariant_end:
return true;
+ case Intrinsic::expect:
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group: {
+ // Drop the intrinsic, but forward the value.
+ MIRBuilder.buildCopy(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ }
case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::var_annotation:
case Intrinsic::sideeffect:
// Discard annotate attributes, assumptions, and artificial side-effects.
@@ -1613,6 +2145,68 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
}
+ case Intrinsic::localescape: {
+ MachineBasicBlock &EntryMBB = MF->front();
+ StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
+
+ // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
+ // is the same on all targets.
+ for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
+ Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
+ if (isa<ConstantPointerNull>(Arg))
+ continue; // Skip null pointers. They represent a hole in index space.
+
+ int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
+ MCSymbol *FrameAllocSym =
+ MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
+ Idx);
+
+ // This should be inserted at the start of the entry block.
+ auto LocalEscape =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
+ .addSym(FrameAllocSym)
+ .addFrameIndex(FI);
+
+ EntryMBB.insert(EntryMBB.begin(), LocalEscape);
+ }
+
+ return true;
+ }
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
+ // Need to check for the reassoc flag to decide whether we want a
+ // sequential reduction opcode or not.
+ Register Dst = getOrCreateVReg(CI);
+ Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
+ Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
+ unsigned Opc = 0;
+ if (!CI.hasAllowReassoc()) {
+ // The sequential ordering case.
+ Opc = ID == Intrinsic::vector_reduce_fadd
+ ? TargetOpcode::G_VECREDUCE_SEQ_FADD
+ : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
+ MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ }
+ // We split the operation into a separate G_FADD/G_FMUL + the reduce,
+ // since the associativity doesn't matter.
+ unsigned ScalarOpc;
+ if (ID == Intrinsic::vector_reduce_fadd) {
+ Opc = TargetOpcode::G_VECREDUCE_FADD;
+ ScalarOpc = TargetOpcode::G_FADD;
+ } else {
+ Opc = TargetOpcode::G_VECREDUCE_FMUL;
+ ScalarOpc = TargetOpcode::G_FMUL;
+ }
+ LLT DstTy = MRI->getType(Dst);
+ auto Rdx = MIRBuilder.buildInstr(
+ Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
+ MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
+ MachineInstr::copyFlagsFromInstruction(CI));
+
+ return true;
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -1722,10 +2316,6 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
MIB->copyIRFlags(CI);
for (auto &Arg : enumerate(CI.arg_operands())) {
- // Some intrinsics take metadata parameters. Reject them.
- if (isa<MetadataAsValue>(Arg.value()))
- return false;
-
// If this is required to be an immediate, don't materialize it in a
// register.
if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
@@ -1738,6 +2328,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
} else {
MIB.addFPImm(cast<ConstantFP>(Arg.value()));
}
+ } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
+ auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
+ if (!MDN) // This was probably an MDString.
+ return false;
+ MIB.addMetadata(MDN);
} else {
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
if (VRegs.size() > 1)
@@ -1762,6 +2357,62 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
+bool IRTranslator::findUnwindDestinations(
+ const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality = classifyEHPersonality(
+ EHPadBB->getParent()->getFunction().getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
+ bool IsSEH = isAsynchronousEHPersonality(Personality);
+
+ if (IsWasmCXX) {
+ // Ignore this for now.
+ return false;
+ }
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+ break;
+ }
+ if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ }
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsSEH)
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+ return true;
+}
+
bool IRTranslator::translateInvoke(const User &U,
MachineIRBuilder &MIRBuilder) {
const InvokeInst &I = cast<InvokeInst>(U);
@@ -1787,7 +2438,7 @@ bool IRTranslator::translateInvoke(const User &U,
return false;
// FIXME: support Windows exception handling.
- if (!isa<LandingPadInst>(EHPadBB->front()))
+ if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
return false;
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
@@ -1801,14 +2452,28 @@ bool IRTranslator::translateInvoke(const User &U,
MCSymbol *EndSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
- // FIXME: track probabilities.
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+
+ if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
+ return false;
+
MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
&ReturnMBB = getMBB(*ReturnBB);
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, &ReturnMBB);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
+
MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
- MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
- MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
MIRBuilder.buildBr(ReturnMBB);
-
return true;
}
@@ -1846,6 +2511,12 @@ bool IRTranslator::translateLandingPad(const User &U,
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
.addSym(MF->addLandingPad(&MBB));
+ // If the unwinder does not preserve all registers, ensure that the
+ // function marks the clobbered registers as used.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+ MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
LLT Ty = getLLTForType(*LP.getType(), *DL);
Register Undef = MRI->createGenericVirtualRegister(Ty);
MIRBuilder.buildUndef(Undef);
@@ -2184,8 +2855,8 @@ bool IRTranslator::translate(const Instruction &Inst) {
// We only emit constants into the entry block from here. To prevent jumpy
// debug behaviour set the line to 0.
if (const DebugLoc &DL = Inst.getDebugLoc())
- EntryBuilder->setDebugLoc(
- DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
+ EntryBuilder->setDebugLoc(DILocation::get(
+ Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt()));
else
EntryBuilder->setDebugLoc(DebugLoc());
@@ -2263,6 +2934,57 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
}
void IRTranslator::finalizeBasicBlock() {
+ for (auto &BTB : SL->BitTestCases) {
+ // Emit header first, if it wasn't already emitted.
+ if (!BTB.Emitted)
+ emitBitTestHeader(BTB, BTB.Parent);
+
+ BranchProbability UnhandledProb = BTB.Prob;
+ for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
+ UnhandledProb -= BTB.Cases[j].ExtraProb;
+ // Set the current basic block to the mbb we wish to insert the code into
+ MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range. In this case, there is no need
+ // to perform the last bit test, as it will always be true. Instead, make
+ // the second-to-last bit-test fall through to the target of the last bit
+ // test, and delete the last bit test.
+
+ MachineBasicBlock *NextMBB;
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range: fall through to the
+ // target of the final bit test.
+ NextMBB = BTB.Cases[j + 1].TargetBB;
+ } else if (j + 1 == ej) {
+ // For the last bit test, fall through to Default.
+ NextMBB = BTB.Default;
+ } else {
+ // Otherwise, fall through to the next bit test.
+ NextMBB = BTB.Cases[j + 1].ThisBB;
+ }
+
+ emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
+
+ // FIXME delete this block below?
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Since we're not going to use the final bit test, remove it.
+ BTB.Cases.pop_back();
+ break;
+ }
+ }
+ // This is "default" BB. We have two jumps to it. From "header" BB and from
+ // last "case" BB, unless the latter was skipped.
+ CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
+ BTB.Default->getBasicBlock()};
+ addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
+ if (!BTB.ContiguousRange) {
+ addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
+ }
+ }
+ SL->BitTestCases.clear();
+
for (auto &JTCase : SL->JTCases) {
// Emit header first, if it wasn't already emitted.
if (!JTCase.first.Emitted)
@@ -2271,6 +2993,10 @@ void IRTranslator::finalizeBasicBlock() {
emitJumpTable(JTCase.second, JTCase.second.MBB);
}
SL->JTCases.clear();
+
+ for (auto &SwCase : SL->SwitchCases)
+ emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
+ SL->SwitchCases.clear();
}
void IRTranslator::finalizeFunction() {
@@ -2332,14 +3058,23 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
+ const TargetMachine &TM = MF->getTarget();
+ TM.resetTargetOptions(F);
+ EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
FuncInfo.MF = MF;
- FuncInfo.BPI = nullptr;
+ if (EnableOpts)
+ FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ else
+ FuncInfo.BPI = nullptr;
+
+ FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
+
const auto &TLI = *MF->getSubtarget().getTargetLowering();
- const TargetMachine &TM = MF->getTarget();
+
SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
SL->init(TLI, TM, *DL);
- EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+
assert(PendingPHIs.empty() && "stale PHIs");
@@ -2407,7 +3142,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
}
}
- if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) {
+ if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 1e2a82615da8..bb4d41cfd69f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -562,6 +562,11 @@ bool InlineAsmLowering::lowerInlineAsm(
}
unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs);
+ if (OpInfo.Regs.front().isVirtual()) {
+ // Put the register class of the virtual registers in the flag word.
+ const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
Inst.addImm(Flag);
if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder))
return false;
@@ -657,6 +662,7 @@ bool InlineAsmLowering::lowerAsmOperandForConstraint(
default:
return false;
case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // immediate integer with a known value.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
assert(CI->getBitWidth() <= 64 &&
"expected immediate to fit into 64-bits");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index f32278d07052..25fae5487187 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -41,7 +41,7 @@ static cl::opt<std::string>
cl::desc("Record GlobalISel rule coverage files of this "
"prefix if instrumentation was generated"));
#else
-static const std::string CoveragePrefix = "";
+static const std::string CoveragePrefix;
#endif
char InstructionSelect::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 2fedc034d315..4fec9e628ddb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -33,24 +33,12 @@ InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
InstructionSelector::InstructionSelector() = default;
-bool InstructionSelector::constrainOperandRegToRegClass(
- MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC,
- const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const {
- MachineBasicBlock &MBB = *I.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC,
- I.getOperand(OpIdx));
-}
-
bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
if (MO.isReg() && MO.getReg())
if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
- return VRegVal->Value == Value;
+ return VRegVal->Value.getSExtValue() == Value;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index a83742f2138f..1993f6033291 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -10,6 +10,17 @@
//
//===----------------------------------------------------------------------===//
+// Enable optimizations to work around MSVC debug mode bug in 32-bit:
+// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html
+// FIXME: Remove this when the issue is closed.
+#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)
+// We have to disable runtime checks in order to enable optimizations. This is
+// done for the entire file because the problem is actually observed in STL
+// template functions.
+#pragma runtime_checks("", off)
+#pragma optimize("gs", on)
+#endif
+
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
using namespace llvm;
@@ -24,7 +35,7 @@ LegalityPredicates::typeInSet(unsigned TypeIdx,
std::initializer_list<LLT> TypesInit) {
SmallVector<LLT, 4> Types = TypesInit;
return [=](const LegalityQuery &Query) {
- return std::find(Types.begin(), Types.end(), Query.Types[TypeIdx]) != Types.end();
+ return llvm::is_contained(Types, Query.Types[TypeIdx]);
};
}
@@ -34,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairInSet(
SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit;
return [=](const LegalityQuery &Query) {
std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]};
- return std::find(Types.begin(), Types.end(), Match) != Types.end();
+ return llvm::is_contained(Types, Match);
};
}
@@ -46,11 +57,10 @@ LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
Query.MMODescrs[MMOIdx].SizeInBits,
Query.MMODescrs[MMOIdx].AlignInBits};
- return std::find_if(
- TypesAndMemDesc.begin(), TypesAndMemDesc.end(),
- [=](const TypePairAndMemDesc &Entry) ->bool {
- return Match.isCompatible(Entry);
- }) != TypesAndMemDesc.end();
+ return llvm::any_of(TypesAndMemDesc,
+ [=](const TypePairAndMemDesc &Entry) -> bool {
+ return Match.isCompatible(Entry);
+ });
};
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index fcbecf90a845..f3ba3f080198 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -43,6 +43,16 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
};
}
+LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits());
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
+ };
+}
+
LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
unsigned Min) {
return [=](const LegalityQuery &Query) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1d7be54de3b0..5ba9367cac8a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -284,7 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
WrapperObserver)) {
WorkListObserver.printNewInstrs();
for (auto *DeadMI : DeadInstructions) {
- LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
+ LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI);
RemoveDeadInstFromLists(DeadMI);
DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 244e7a9583d6..66871ca3b926 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -29,6 +30,7 @@
using namespace llvm;
using namespace LegalizeActions;
+using namespace MIPatternMatch;
/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
///
@@ -75,6 +77,8 @@ static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
return Type::getFloatTy(Ctx);
case 64:
return Type::getDoubleTy(Ctx);
+ case 80:
+ return Type::getX86_FP80Ty(Ctx);
case 128:
return Type::getFP128Ty(Ctx);
default:
@@ -86,16 +90,15 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF,
GISelChangeObserver &Observer,
MachineIRBuilder &Builder)
: MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
- LI(*MF.getSubtarget().getLegalizerInfo()) {
- MIRBuilder.setChangeObserver(Observer);
-}
+ LI(*MF.getSubtarget().getLegalizerInfo()),
+ TLI(*MF.getSubtarget().getTargetLowering()) { }
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
GISelChangeObserver &Observer,
MachineIRBuilder &B)
- : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI) {
- MIRBuilder.setChangeObserver(Observer);
-}
+ : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
+ TLI(*MF.getSubtarget().getTargetLowering()) { }
+
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
@@ -237,22 +240,21 @@ void LegalizerHelper::insertParts(Register DstReg,
}
}
-/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs
+/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+ const int StartIdx = Regs.size();
const int NumResults = MI.getNumOperands() - 1;
- Regs.resize(NumResults);
+ Regs.resize(Regs.size() + NumResults);
for (int I = 0; I != NumResults; ++I)
- Regs[I] = MI.getOperand(I).getReg();
+ Regs[StartIdx + I] = MI.getOperand(I).getReg();
}
-LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
- LLT NarrowTy, Register SrcReg) {
+void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
+ LLT GCDTy, Register SrcReg) {
LLT SrcTy = MRI.getType(SrcReg);
-
- LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy));
if (SrcTy == GCDTy) {
// If the source already evenly divides the result type, we don't need to do
// anything.
@@ -262,7 +264,13 @@ LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
getUnmergeResults(Parts, *Unmerge);
}
+}
+LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
+ LLT NarrowTy, Register SrcReg) {
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+ extractGCDType(Parts, GCDTy, SrcReg);
return GCDTy;
}
@@ -376,7 +384,14 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
}
if (LCMTy.isVector()) {
- MIRBuilder.buildExtract(DstReg, Remerge, 0);
+ unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
+ SmallVector<Register, 8> UnmergeDefs(NumDefs);
+ UnmergeDefs[0] = DstReg;
+ for (unsigned I = 1; I != NumDefs; ++I)
+ UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
+
+ MIRBuilder.buildUnmerge(UnmergeDefs,
+ MIRBuilder.buildMerge(LCMTy, RemergeRegs));
return;
}
@@ -384,7 +399,7 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
}
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
-#define RTLIBCASE(LibcallPrefix) \
+#define RTLIBCASE_INT(LibcallPrefix) \
do { \
switch (Size) { \
case 32: \
@@ -398,19 +413,33 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
} \
} while (0)
- assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+#define RTLIBCASE(LibcallPrefix) \
+ do { \
+ switch (Size) { \
+ case 32: \
+ return RTLIB::LibcallPrefix##32; \
+ case 64: \
+ return RTLIB::LibcallPrefix##64; \
+ case 80: \
+ return RTLIB::LibcallPrefix##80; \
+ case 128: \
+ return RTLIB::LibcallPrefix##128; \
+ default: \
+ llvm_unreachable("unexpected size"); \
+ } \
+ } while (0)
switch (Opcode) {
case TargetOpcode::G_SDIV:
- RTLIBCASE(SDIV_I);
+ RTLIBCASE_INT(SDIV_I);
case TargetOpcode::G_UDIV:
- RTLIBCASE(UDIV_I);
+ RTLIBCASE_INT(UDIV_I);
case TargetOpcode::G_SREM:
- RTLIBCASE(SREM_I);
+ RTLIBCASE_INT(SREM_I);
case TargetOpcode::G_UREM:
- RTLIBCASE(UREM_I);
+ RTLIBCASE_INT(UREM_I);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- RTLIBCASE(CTLZ_I);
+ RTLIBCASE_INT(CTLZ_I);
case TargetOpcode::G_FADD:
RTLIBCASE(ADD_F);
case TargetOpcode::G_FSUB:
@@ -453,13 +482,16 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(RINT_F);
case TargetOpcode::G_FNEARBYINT:
RTLIBCASE(NEARBYINT_F);
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ RTLIBCASE(ROUNDEVEN_F);
}
llvm_unreachable("Unknown libcall function");
}
/// True if an instruction is in tail position in its caller. Intended for
/// legalizing libcalls as tail calls when possible.
-static bool isLibCallInTailPosition(MachineInstr &MI) {
+static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
+ MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const Function &F = MBB.getParent()->getFunction();
@@ -479,7 +511,6 @@ static bool isLibCallInTailPosition(MachineInstr &MI) {
return false;
// Only tail call if the following instruction is a standard return.
- auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
return false;
@@ -531,12 +562,11 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
LegalizerHelper::LegalizeResult
llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
SmallVector<CallLowering::ArgInfo, 3> Args;
// Add all the args, except for the last which is an imm denoting 'tail'.
- for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
+ for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
Register Reg = MI.getOperand(i).getReg();
// Need derive an IR type for call lowering.
@@ -551,31 +581,28 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
RTLIB::Libcall RTLibcall;
- switch (ID) {
- case Intrinsic::memcpy:
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_MEMCPY:
RTLibcall = RTLIB::MEMCPY;
break;
- case Intrinsic::memset:
- RTLibcall = RTLIB::MEMSET;
- break;
- case Intrinsic::memmove:
+ case TargetOpcode::G_MEMMOVE:
RTLibcall = RTLIB::MEMMOVE;
break;
+ case TargetOpcode::G_MEMSET:
+ RTLibcall = RTLIB::MEMSET;
+ break;
default:
return LegalizerHelper::UnableToLegalize;
}
const char *Name = TLI.getLibcallName(RTLibcall);
- MIRBuilder.setInstrAndDebugLoc(MI);
-
CallLowering::CallLoweringInfo Info;
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
- Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
- isLibCallInTailPosition(MI);
+ Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
+ isLibCallInTailPosition(MIRBuilder.getTII(), MI);
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
@@ -668,10 +695,11 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FRINT:
- case TargetOpcode::G_FNEARBYINT: {
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
- if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) {
- LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n");
+ if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
+ LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
return UnableToLegalize;
}
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
@@ -720,6 +748,13 @@ LegalizerHelper::libcall(MachineInstr &MI) {
return Status;
break;
}
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET: {
+ LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
+ MI.eraseFromParent();
+ return Result;
+ }
}
MI.eraseFromParent();
@@ -900,7 +935,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
case TargetOpcode::G_INSERT:
return narrowScalarInsert(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_LOAD: {
- const auto &MMO = **MI.memoperands_begin();
+ auto &MMO = **MI.memoperands_begin();
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
@@ -908,7 +943,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- auto &MMO = **MI.memoperands_begin();
MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
MIRBuilder.buildAnyExt(DstReg, TmpReg);
MI.eraseFromParent();
@@ -925,10 +959,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = **MI.memoperands_begin();
- if (MMO.getSizeInBits() == NarrowSize) {
+ unsigned MemSize = MMO.getSizeInBits();
+
+ if (MemSize == NarrowSize) {
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
- } else {
+ } else if (MemSize < NarrowSize) {
MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
+ } else if (MemSize > NarrowSize) {
+ // FIXME: Need to split the load.
+ return UnableToLegalize;
}
if (ZExt)
@@ -1024,6 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+
unsigned NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs(NumParts);
SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
@@ -1204,6 +1248,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_PTRMASK: {
if (TypeIdx != 1)
return UnableToLegalize;
@@ -1212,6 +1257,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI:
+ return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_FPEXT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
}
}
@@ -1272,10 +1327,8 @@ void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
- Register DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildExtract(MO, DstExt, 0);
- MO.setReg(DstExt);
+ MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
}
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
@@ -1443,6 +1496,40 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
+Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
+ Register WideReg = MRI.createGenericVirtualRegister(WideTy);
+ LLT OrigTy = MRI.getType(OrigReg);
+ LLT LCMTy = getLCMType(WideTy, OrigTy);
+
+ const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
+ const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
+
+ Register UnmergeSrc = WideReg;
+
+ // Create a merge to the LCM type, padding with undef
+ // %0:_(<3 x s32>) = G_FOO => <4 x s32>
+ // =>
+ // %1:_(<4 x s32>) = G_FOO
+ // %2:_(<4 x s32>) = G_IMPLICIT_DEF
+ // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
+ // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
+ if (NumMergeParts > 1) {
+ Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
+ SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
+ MergeParts[0] = WideReg;
+ UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
+ }
+
+ // Unmerge to the original register and pad with dead defs.
+ SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
+ UnmergeResults[0] = OrigReg;
+ for (int I = 1; I != NumUnmergeParts; ++I)
+ UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
+
+ MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
+ return WideReg;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
@@ -1512,35 +1599,60 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
- // Create a sequence of unmerges to the original results. since we may have
- // widened the source, we will need to pad the results with dead defs to cover
- // the source register.
- // e.g. widen s16 to s32:
- // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48)
+ // Create a sequence of unmerges and merges to the original results. Since we
+ // may have widened the source, we will need to pad the results with dead defs
+ // to cover the source register.
+ // e.g. widen s48 to s64:
+ // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
//
// =>
- // %4:_(s64) = G_ANYEXT %0:_(s48)
- // %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge
- // %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs
- // %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def
-
+ // %4:_(s192) = G_ANYEXT %0:_(s96)
+ // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
+ // ; unpack to GCD type, with extra dead defs
+ // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
+ // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
+ // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
+ // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
+ // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
+ const LLT GCDTy = getGCDType(WideTy, DstTy);
const int NumUnmerge = Unmerge->getNumOperands() - 1;
- const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
-
- for (int I = 0; I != NumUnmerge; ++I) {
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
-
- for (int J = 0; J != PartsPerUnmerge; ++J) {
- int Idx = I * PartsPerUnmerge + J;
- if (Idx < NumDst)
- MIB.addDef(MI.getOperand(Idx).getReg());
- else {
- // Create dead def for excess components.
- MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
+
+ // Directly unmerge to the destination without going through a GCD type
+ // if possible
+ if (PartsPerRemerge == 1) {
+ const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
+
+ for (int I = 0; I != NumUnmerge; ++I) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+
+ for (int J = 0; J != PartsPerUnmerge; ++J) {
+ int Idx = I * PartsPerUnmerge + J;
+ if (Idx < NumDst)
+ MIB.addDef(MI.getOperand(Idx).getReg());
+ else {
+ // Create dead def for excess components.
+ MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ }
}
+
+ MIB.addUse(Unmerge.getReg(I));
}
+ } else {
+ SmallVector<Register, 16> Parts;
+ for (int J = 0; J != NumUnmerge; ++J)
+ extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
+
+ SmallVector<Register, 8> RemergeParts;
+ for (int I = 0; I != NumDst; ++I) {
+ for (int J = 0; J < PartsPerRemerge; ++J) {
+ const int Idx = I * PartsPerRemerge + J;
+ RemergeParts.emplace_back(Parts[Idx]);
+ }
- MIB.addUse(Unmerge.getReg(I));
+ MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
+ RemergeParts.clear();
+ }
}
MI.eraseFromParent();
@@ -1590,8 +1702,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
Src = MIRBuilder.buildAnyExt(WideTy, Src);
ShiftTy = WideTy;
- } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
- return UnableToLegalize;
+ }
auto LShr = MIRBuilder.buildLShr(
ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
@@ -1629,7 +1740,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- if (TypeIdx != 0)
+ if (TypeIdx != 0 || WideTy.isVector())
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
@@ -1639,14 +1750,45 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
+LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx == 1)
+ return UnableToLegalize; // TODO
+ unsigned Op = MI.getOpcode();
+ unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO
+ ? TargetOpcode::G_ADD
+ : TargetOpcode::G_SUB;
+ unsigned ExtOpcode =
+ Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO
+ ? TargetOpcode::G_ZEXT
+ : TargetOpcode::G_SEXT;
+ auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
+ auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
+ // Do the arithmetic in the larger type.
+ auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt});
+ LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
+ auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
+ auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
+ // There is no overflow if the ExtOp is the same as NewOp.
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
+ // Now trunc the NewOp to the original result.
+ MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
- MI.getOpcode() == TargetOpcode::G_SSUBSAT;
+ MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
+ MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+ bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+ MI.getOpcode() == TargetOpcode::G_USHLSAT;
// We can convert this to:
// 1. Any extend iN to iM
// 2. SHL by M-N
- // 3. [US][ADD|SUB]SAT
+ // 3. [US][ADD|SUB|SHL]SAT
// 4. L/ASHR by M-N
//
// It may be more efficient to lower this to a min and a max operation in
@@ -1657,11 +1799,14 @@ LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx,
unsigned NewBits = WideTy.getScalarSizeInBits();
unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
+ // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
+ // must not left shift the RHS to preserve the shift amount.
auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
- auto RHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
+ auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
+ : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
- auto ShiftR = MIRBuilder.buildShl(WideTy, RHS, ShiftK);
+ auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
{ShiftL, ShiftR}, MI.getFlags());
@@ -1689,34 +1834,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return widenScalarMergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_UNMERGE_VALUES:
return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
case TargetOpcode::G_UADDO:
- case TargetOpcode::G_USUBO: {
- if (TypeIdx == 1)
- return UnableToLegalize; // TODO
- auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
- auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
- unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
- ? TargetOpcode::G_ADD
- : TargetOpcode::G_SUB;
- // Do the arithmetic in the larger type.
- auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
- LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
- APInt Mask =
- APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
- auto AndOp = MIRBuilder.buildAnd(
- WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
- // There is no overflow if the AndOp is the same as NewOp.
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
- // Now trunc the NewOp to the original result.
- MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
- MI.eraseFromParent();
- return Legalized;
- }
+ case TargetOpcode::G_USUBO:
+ return widenScalarAddoSubo(MI, TypeIdx, WideTy);
case TargetOpcode::G_SADDSAT:
case TargetOpcode::G_SSUBSAT:
+ case TargetOpcode::G_SSHLSAT:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
- return widenScalarAddSubSat(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_USHLSAT:
+ return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
@@ -1908,21 +2037,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SITOFP:
- if (TypeIdx != 1)
- return UnableToLegalize;
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+
Observer.changedInstr(MI);
return Legalized;
-
case TargetOpcode::G_UITOFP:
- if (TypeIdx != 1)
- return UnableToLegalize;
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+
Observer.changedInstr(MI);
return Legalized;
-
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
@@ -1936,7 +2069,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return UnableToLegalize;
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (!isPowerOf2_32(Ty.getSizeInBits()))
+ if (!Ty.isScalar())
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -2134,6 +2267,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPOW:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
assert(TypeIdx == 0);
Observer.changingInstr(MI);
@@ -2143,6 +2277,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_FPOWI: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_INTTOPTR:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -2169,8 +2312,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
// Avoid changing the result vector type if the source element type was
// requested.
if (TypeIdx == 1) {
- auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
- MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
} else {
widenScalarDst(MI, WideTy, 0);
}
@@ -2273,6 +2415,376 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
return UnableToLegalize;
}
+/// Figure out the bit offset into a register when coercing a vector index for
+/// the wide element type. This is only for the case when promoting vector to
+/// one with larger elements.
+//
+///
+/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
+ Register Idx,
+ unsigned NewEltSize,
+ unsigned OldEltSize) {
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ LLT IdxTy = B.getMRI()->getType(Idx);
+
+ // Now figure out the amount we need to shift to get the target bits.
+ auto OffsetMask = B.buildConstant(
+ IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
+ auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
+ return B.buildShl(IdxTy, OffsetIdx,
+ B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
+}
+
+/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
+/// is casting to a vector with a smaller element size, perform multiple element
+/// extracts and merge the results. If this is coercing to a vector with larger
+/// elements, index the bitcasted vector and extract the target element with bit
+/// operations. This is intended to force the indexing in the native register
+/// size for architectures that can dynamically index the register file.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register Idx = MI.getOperand(2).getReg();
+ LLT SrcVecTy = MRI.getType(SrcVec);
+ LLT IdxTy = MRI.getType(Idx);
+
+ LLT SrcEltTy = SrcVecTy.getElementType();
+ unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+ unsigned OldNumElts = SrcVecTy.getNumElements();
+
+ LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+ Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+
+ const unsigned NewEltSize = NewEltTy.getSizeInBits();
+ const unsigned OldEltSize = SrcEltTy.getSizeInBits();
+ if (NewNumElts > OldNumElts) {
+ // Decreasing the vector element size
+ //
+ // e.g. i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+ if (NewNumElts % OldNumElts != 0)
+ return UnableToLegalize;
+
+ // Type of the intermediate result vector.
+ const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
+ LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
+
+ auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
+
+ SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
+ auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
+
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
+ auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
+ auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
+ NewOps[I] = Elt.getReg(0);
+ }
+
+ auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
+ MIRBuilder.buildBitcast(Dst, NewVec);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (NewNumElts < OldNumElts) {
+ if (NewEltSize % OldEltSize != 0)
+ return UnableToLegalize;
+
+ // This only depends on powers of 2 because we use bit tricks to figure out
+ // the bit offset we need to shift to get the target element. A general
+ // expansion could emit division/multiply.
+ if (!isPowerOf2_32(NewEltSize / OldEltSize))
+ return UnableToLegalize;
+
+ // Increasing the vector element size.
+ // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
+ //
+ // =>
+ //
+ // %cast = G_BITCAST %vec
+ // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
+ // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
+ // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+ // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+ // %elt_bits = G_LSHR %wide_elt, %offset_bits
+ // %elt = G_TRUNC %elt_bits
+
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+ // Divide to get the index in the wider element type.
+ auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+ Register WideElt = CastVec;
+ if (CastTy.isVector()) {
+ WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+ ScaledIdx).getReg(0);
+ }
+
+ // Compute the bit offset into the register of the target element.
+ Register OffsetBits = getBitcastWiderVectorElementOffset(
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+ // Shift the wide element to get the target element.
+ auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
+ MIRBuilder.buildTrunc(Dst, ExtractedBits);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
+/// TargetReg, while preserving other bits in \p TargetReg.
+///
+/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
+static Register buildBitFieldInsert(MachineIRBuilder &B,
+ Register TargetReg, Register InsertReg,
+ Register OffsetBits) {
+ LLT TargetTy = B.getMRI()->getType(TargetReg);
+ LLT InsertTy = B.getMRI()->getType(InsertReg);
+ auto ZextVal = B.buildZExt(TargetTy, InsertReg);
+ auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
+
+ // Produce a bitmask of the value to insert
+ auto EltMask = B.buildConstant(
+ TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
+ InsertTy.getSizeInBits()));
+ // Shift it into position
+ auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
+ auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
+
+ // Clear out the bits in the wide element
+ auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
+
+ // The value to insert has all zeros already, so stick it into the masked
+ // wide element.
+ return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
+}
+
+/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
+/// is increasing the element size, perform the indexing in the target element
+/// type, and use bit operations to insert at the element position. This is
+/// intended for architectures that can dynamically index the register file and
+/// want to force indexing in the native register size.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register Val = MI.getOperand(2).getReg();
+ Register Idx = MI.getOperand(3).getReg();
+
+ LLT VecTy = MRI.getType(Dst);
+ LLT IdxTy = MRI.getType(Idx);
+
+ LLT VecEltTy = VecTy.getElementType();
+ LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+ const unsigned NewEltSize = NewEltTy.getSizeInBits();
+ const unsigned OldEltSize = VecEltTy.getSizeInBits();
+
+ unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+ unsigned OldNumElts = VecTy.getNumElements();
+
+ Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+ if (NewNumElts < OldNumElts) {
+ if (NewEltSize % OldEltSize != 0)
+ return UnableToLegalize;
+
+ // This only depends on powers of 2 because we use bit tricks to figure out
+ // the bit offset we need to shift to get the target element. A general
+ // expansion could emit division/multiply.
+ if (!isPowerOf2_32(NewEltSize / OldEltSize))
+ return UnableToLegalize;
+
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+ // Divide to get the index in the wider element type.
+ auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+ Register ExtractedElt = CastVec;
+ if (CastTy.isVector()) {
+ ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+ ScaledIdx).getReg(0);
+ }
+
+ // Compute the bit offset into the register of the target element.
+ Register OffsetBits = getBitcastWiderVectorElementOffset(
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+ Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
+ Val, OffsetBits);
+ if (CastTy.isVector()) {
+ InsertedElt = MIRBuilder.buildInsertVectorElement(
+ CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
+ }
+
+ MIRBuilder.buildBitcast(Dst, InsertedElt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerLoad(MachineInstr &MI) {
+ // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ auto &MMO = **MI.memoperands_begin();
+
+ if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
+ if (MI.getOpcode() == TargetOpcode::G_LOAD) {
+ // This load needs splitting into power of 2 sized loads.
+ if (DstTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(DstTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Our strategy here is to generate anyextending loads for the smaller
+ // types up to next power-2 result type, and then combine the two larger
+ // result values together, before truncating back down to the non-pow-2
+ // type.
+ // E.g. v1 = i24 load =>
+ // v2 = i32 zextload (2 byte)
+ // v3 = i32 load (1 byte)
+ // v4 = i32 shl v3, 16
+ // v5 = i32 or v4, v2
+ // v1 = i24 trunc v5
+ // By doing this we generate the correct truncate which should get
+ // combined away as an artifact with a matching extend.
+ uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
+ uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
+ &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
+ LLT AnyExtTy = LLT::scalar(AnyExtSize);
+ Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ auto LargeLoad = MIRBuilder.buildLoadInstr(
+ TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
+
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
+ auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
+ *SmallMMO);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+ auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+ MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (DstTy.isScalar()) {
+ Register TmpReg =
+ MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
+ MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case TargetOpcode::G_LOAD:
+ MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
+ break;
+ case TargetOpcode::G_SEXTLOAD:
+ MIRBuilder.buildSExt(DstReg, TmpReg);
+ break;
+ case TargetOpcode::G_ZEXTLOAD:
+ MIRBuilder.buildZExt(DstReg, TmpReg);
+ break;
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerStore(MachineInstr &MI) {
+ // Lower a non-power of 2 store into multiple pow-2 stores.
+ // E.g. split an i24 store into an i16 store + i8 store.
+ // We do this by first extending the stored value to the next largest power
+ // of 2 type, and then using truncating stores to store the components.
+ // By doing this, likewise with G_LOAD, generate an extend that can be
+ // artifact-combined away instead of leaving behind extracts.
+ Register SrcReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+ if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
+ return UnableToLegalize;
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(SrcTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Extend to the next pow-2.
+ const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
+ auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
+
+ // Obtain the smaller value by shifting away the larger value.
+ uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
+ uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
+ auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
+ auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
+
+ // Generate the PtrAdd and truncating stores.
+ LLT PtrTy = MRI.getType(PtrReg);
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO =
+ MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+ MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
+ MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
switch (MI.getOpcode()) {
@@ -2321,13 +2833,24 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
default:
return UnableToLegalize;
}
}
+// Legalize an instruction by changing the opcode in place.
+void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
+ Observer.changingInstr(MI);
+ MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
+ Observer.changedInstr(MI);
+}
+
LegalizerHelper::LegalizeResult
-LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
using namespace TargetOpcode;
switch(MI.getOpcode()) {
@@ -2337,6 +2860,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerBitcast(MI);
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
auto Quot =
MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
{MI.getOperand(1), MI.getOperand(2)});
@@ -2349,6 +2873,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO:
return lowerSADDO_SSUBO(MI);
+ case TargetOpcode::G_UMULH:
+ case TargetOpcode::G_SMULH:
+ return lowerSMULH_UMULH(MI);
case TargetOpcode::G_SMULO:
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
@@ -2357,6 +2884,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
Register Overflow = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
+ LLT Ty = MRI.getType(Res);
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
? TargetOpcode::G_SMULH
@@ -2386,31 +2914,29 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_FNEG: {
+ Register Res = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Res);
+
// TODO: Handle vector types once we are able to
// represent them.
if (Ty.isVector())
return UnableToLegalize;
- Register Res = MI.getOperand(0).getReg();
- LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty);
- if (!ZeroTy)
- return UnableToLegalize;
- ConstantFP &ZeroForNegation =
- *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
- auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
+ auto SignMask =
+ MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
Register SubByReg = MI.getOperand(1).getReg();
- Register ZeroReg = Zero.getReg(0);
- MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
+ MIRBuilder.buildXor(Res, SubByReg, SignMask);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FSUB: {
+ Register Res = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Res);
+
// Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
// First, check if G_FNEG is marked as Lower. If so, we may
// end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
return UnableToLegalize;
- Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
Register Neg = MRI.createGenericVirtualRegister(Ty);
@@ -2425,6 +2951,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerFFloor(MI);
case TargetOpcode::G_INTRINSIC_ROUND:
return lowerIntrinsicRound(MI);
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ // Since round even is the assumed rounding mode for unconstrained FP
+ // operations, rint and roundeven are the same operation.
+ changeOpcode(MI, TargetOpcode::G_FRINT);
+ return Legalized;
+ }
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
@@ -2439,145 +2971,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD: {
- // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
- Register DstReg = MI.getOperand(0).getReg();
- Register PtrReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- auto &MMO = **MI.memoperands_begin();
-
- if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
- if (MI.getOpcode() == TargetOpcode::G_LOAD) {
- // This load needs splitting into power of 2 sized loads.
- if (DstTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(DstTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
- // Our strategy here is to generate anyextending loads for the smaller
- // types up to next power-2 result type, and then combine the two larger
- // result values together, before truncating back down to the non-pow-2
- // type.
- // E.g. v1 = i24 load =>
- // v2 = i32 zextload (2 byte)
- // v3 = i32 load (1 byte)
- // v4 = i32 shl v3, 16
- // v5 = i32 or v4, v2
- // v1 = i24 trunc v5
- // By doing this we generate the correct truncate which should get
- // combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
- uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
-
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
- &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
-
- LLT PtrTy = MRI.getType(PtrReg);
- unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
- LLT AnyExtTy = LLT::scalar(AnyExtSize);
- Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
- Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
- auto LargeLoad = MIRBuilder.buildLoadInstr(
- TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
-
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
- auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
- *SmallMMO);
-
- auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
- auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
- auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
- MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
- MI.eraseFromParent();
- return Legalized;
- }
- MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
- MI.eraseFromParent();
- return Legalized;
- }
-
- if (DstTy.isScalar()) {
- Register TmpReg =
- MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
- MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode");
- case TargetOpcode::G_LOAD:
- MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
- break;
- case TargetOpcode::G_SEXTLOAD:
- MIRBuilder.buildSExt(DstReg, TmpReg);
- break;
- case TargetOpcode::G_ZEXTLOAD:
- MIRBuilder.buildZExt(DstReg, TmpReg);
- break;
- }
- MI.eraseFromParent();
- return Legalized;
- }
-
- return UnableToLegalize;
- }
- case TargetOpcode::G_STORE: {
- // Lower a non-power of 2 store into multiple pow-2 stores.
- // E.g. split an i24 store into an i16 store + i8 store.
- // We do this by first extending the stored value to the next largest power
- // of 2 type, and then using truncating stores to store the components.
- // By doing this, likewise with G_LOAD, generate an extend that can be
- // artifact-combined away instead of leaving behind extracts.
- Register SrcReg = MI.getOperand(0).getReg();
- Register PtrReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- MachineMemOperand &MMO = **MI.memoperands_begin();
- if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
- return UnableToLegalize;
- if (SrcTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(SrcTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
- // Extend to the next pow-2.
- const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
- auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
-
- // Obtain the smaller value by shifting away the larger value.
- uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
- uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
- auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
- auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
-
- // Generate the PtrAdd and truncating stores.
- LLT PtrTy = MRI.getType(PtrReg);
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
-
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO =
- MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
- MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
- MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
- MI.eraseFromParent();
- return Legalized;
- }
+ case TargetOpcode::G_ZEXTLOAD:
+ return lowerLoad(MI);
+ case TargetOpcode::G_STORE:
+ return lowerStore(MI);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTPOP:
- return lowerBitCount(MI, TypeIdx, Ty);
+ return lowerBitCount(MI);
case G_UADDO: {
Register Res = MI.getOperand(0).getReg();
Register CarryOut = MI.getOperand(1).getReg();
@@ -2639,22 +3042,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case G_UITOFP:
- return lowerUITOFP(MI, TypeIdx, Ty);
+ return lowerUITOFP(MI);
case G_SITOFP:
- return lowerSITOFP(MI, TypeIdx, Ty);
+ return lowerSITOFP(MI);
case G_FPTOUI:
- return lowerFPTOUI(MI, TypeIdx, Ty);
+ return lowerFPTOUI(MI);
case G_FPTOSI:
return lowerFPTOSI(MI);
case G_FPTRUNC:
- return lowerFPTRUNC(MI, TypeIdx, Ty);
+ return lowerFPTRUNC(MI);
+ case G_FPOWI:
+ return lowerFPOWI(MI);
case G_SMIN:
case G_SMAX:
case G_UMIN:
case G_UMAX:
- return lowerMinMax(MI, TypeIdx, Ty);
+ return lowerMinMax(MI);
case G_FCOPYSIGN:
- return lowerFCopySign(MI, TypeIdx, Ty);
+ return lowerFCopySign(MI);
case G_FMINNUM:
case G_FMAXNUM:
return lowerFMinNumMaxNum(MI);
@@ -2677,6 +3082,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case G_EXTRACT_VECTOR_ELT:
+ case G_INSERT_VECTOR_ELT:
+ return lowerExtractInsertVectorElt(MI);
case G_SHUFFLE_VECTOR:
return lowerShuffleVector(MI);
case G_DYN_STACKALLOC:
@@ -2692,33 +3100,123 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case G_READ_REGISTER:
case G_WRITE_REGISTER:
return lowerReadWriteRegister(MI);
+ case G_UADDSAT:
+ case G_USUBSAT: {
+ // Try to make a reasonable guess about which lowering strategy to use. The
+ // target can override this with custom lowering and calling the
+ // implementation functions.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (LI.isLegalOrCustom({G_UMIN, Ty}))
+ return lowerAddSubSatToMinMax(MI);
+ return lowerAddSubSatToAddoSubo(MI);
+ }
+ case G_SADDSAT:
+ case G_SSUBSAT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ // FIXME: It would probably make more sense to see if G_SADDO is preferred,
+ // since it's a shorter expansion. However, we would need to figure out the
+ // preferred boolean type for the carry out for the query.
+ if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
+ return lowerAddSubSatToMinMax(MI);
+ return lowerAddSubSatToAddoSubo(MI);
+ }
+ case G_SSHLSAT:
+ case G_USHLSAT:
+ return lowerShlSat(MI);
+ case G_ABS: {
+ // Expand %res = G_ABS %a into:
+ // %v1 = G_ASHR %a, scalar_size-1
+ // %v2 = G_ADD %a, %v1
+ // %res = G_XOR %v2, %v1
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register OpReg = MI.getOperand(1).getReg();
+ auto ShiftAmt =
+ MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
+ auto Shift =
+ MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
+ auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
+ MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
+ MI.eraseFromParent();
+ return Legalized;
}
+ case G_SELECT:
+ return lowerSelect(MI);
+ }
+}
+
+Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
+ Align MinAlign) const {
+ // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
+ // datalayout for the preferred alignment. Also there should be a target hook
+ // for this to allow targets to reduce the alignment and ignore the
+ // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
+ // the type.
+ return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
+}
+
+MachineInstrBuilder
+LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
+ MachinePointerInfo &PtrInfo) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
+
+ unsigned AddrSpace = DL.getAllocaAddrSpace();
+ LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+
+ PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
+ return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
+}
+
+static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
+ LLT VecTy) {
+ int64_t IdxVal;
+ if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
+ return IdxReg;
+
+ LLT IdxTy = B.getMRI()->getType(IdxReg);
+ unsigned NElts = VecTy.getNumElements();
+ if (isPowerOf2_32(NElts)) {
+ APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
+ return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
+ }
+
+ return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
+ .getReg(0);
+}
+
+Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
+ Register Index) {
+ LLT EltTy = VecTy.getElementType();
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltTy.getSizeInBits() &&
+ "Converting bits to bytes lost precision");
+
+ Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
+
+ LLT IdxTy = MRI.getType(Index);
+ auto Mul = MIRBuilder.buildMul(IdxTy, Index,
+ MIRBuilder.buildConstant(IdxTy, EltSize));
+
+ LLT PtrTy = MRI.getType(VecPtr);
+ return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
}
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
- SmallVector<Register, 2> DstRegs;
-
- unsigned NarrowSize = NarrowTy.getSizeInBits();
Register DstReg = MI.getOperand(0).getReg();
- unsigned Size = MRI.getType(DstReg).getSizeInBits();
- int NumParts = Size / NarrowSize;
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (Size % NarrowSize != 0)
- return UnableToLegalize;
+ LLT DstTy = MRI.getType(DstReg);
+ LLT LCMTy = getLCMType(DstTy, NarrowTy);
- for (int i = 0; i < NumParts; ++i) {
- Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUndef(TmpReg);
- DstRegs.push_back(TmpReg);
- }
+ unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
+ SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
+ buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
MI.eraseFromParent();
return Legalized;
}
@@ -2839,7 +3337,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
return UnableToLegalize;
- NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
+ NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
} else {
NumParts = DstTy.getNumElements();
NarrowTy1 = SrcTy.getElementType();
@@ -3112,63 +3610,116 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
return Legalized;
}
+// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
+// a vector
+//
+// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
+// undef as necessary.
+//
+// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
+// -> <2 x s16>
+//
+// %4:_(s16) = G_IMPLICIT_DEF
+// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
+// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
+// %7:_(<2 x s16>) = G_IMPLICIT_DEF
+// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
+// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
- unsigned TypeIdx,
- LLT NarrowTy) {
- assert(TypeIdx == 0 && "not a vector type index");
+LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = DstTy.getElementType();
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
- int DstNumElts = DstTy.getNumElements();
- int NarrowNumElts = NarrowTy.getNumElements();
- int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
- LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
+ // Break into a common type
+ SmallVector<Register, 16> Parts;
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+ extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
- SmallVector<Register, 8> ConcatOps;
- SmallVector<Register, 8> SubBuildVector;
+ // Build the requested new merge, padding with undef.
+ LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
+ TargetOpcode::G_ANYEXT);
- Register UndefReg;
- if (WidenedDstTy != DstTy)
- UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
+ // Pack into the original result register.
+ buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
- // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
- // necessary.
- //
- // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
- // -> <2 x s16>
- //
- // %4:_(s16) = G_IMPLICIT_DEF
- // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
- // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
- // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
- // %3:_(<3 x s16>) = G_EXTRACT %7, 0
- for (int I = 0; I != NumConcat; ++I) {
- for (int J = 0; J != NarrowNumElts; ++J) {
- int SrcIdx = NarrowNumElts * I + J;
-
- if (SrcIdx < DstNumElts) {
- Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
- SubBuildVector.push_back(SrcReg);
- } else
- SubBuildVector.push_back(UndefReg);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowVecTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register InsertVal;
+ bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
+
+ assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
+ if (IsInsert)
+ InsertVal = MI.getOperand(2).getReg();
+
+ Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+ // TODO: Handle total scalarization case.
+ if (!NarrowVecTy.isVector())
+ return UnableToLegalize;
+
+ LLT VecTy = MRI.getType(SrcVec);
+
+ // If the index is a constant, we can really break this down as you would
+ // expect, and index into the target size pieces.
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+ // Avoid out of bounds indexing the pieces.
+ if (IdxVal >= VecTy.getNumElements()) {
+ MIRBuilder.buildUndef(DstReg);
+ MI.eraseFromParent();
+ return Legalized;
}
- auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
- ConcatOps.push_back(BuildVec.getReg(0));
- SubBuildVector.clear();
- }
+ SmallVector<Register, 8> VecParts;
+ LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
+
+ // Build a sequence of NarrowTy pieces in VecParts for this operand.
+ LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
+ TargetOpcode::G_ANYEXT);
+
+ unsigned NewNumElts = NarrowVecTy.getNumElements();
- if (DstTy == WidenedDstTy)
- MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
- else {
- auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
- MIRBuilder.buildExtract(DstReg, Concat, 0);
+ LLT IdxTy = MRI.getType(Idx);
+ int64_t PartIdx = IdxVal / NewNumElts;
+ auto NewIdx =
+ MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
+
+ if (IsInsert) {
+ LLT PartTy = MRI.getType(VecParts[PartIdx]);
+
+ // Use the adjusted index to insert into one of the subvectors.
+ auto InsertPart = MIRBuilder.buildInsertVectorElement(
+ PartTy, VecParts[PartIdx], InsertVal, NewIdx);
+ VecParts[PartIdx] = InsertPart.getReg(0);
+
+ // Recombine the inserted subvector with the others to reform the result
+ // vector.
+ buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
+ } else {
+ MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
}
- MI.eraseFromParent();
- return Legalized;
+ // With a variable index, we can't perform the operation in a smaller type, so
+ // we're forced to expand this.
+ //
+ // TODO: We could emit a chain of compare/select to figure out which piece to
+ // index.
+ return lowerExtractInsertVectorElt(MI);
}
LegalizerHelper::LegalizeResult
@@ -3214,7 +3765,8 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
if (NumParts == -1)
return UnableToLegalize;
- const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
+ LLT PtrTy = MRI.getType(AddrReg);
+ const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
unsigned TotalSize = ValTy.getSizeInBits();
@@ -3412,6 +3964,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_ADD:
case G_SUB:
case G_MUL:
+ case G_PTR_ADD:
case G_SMULH:
case G_UMULH:
case G_FADD:
@@ -3435,6 +3988,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FFLOOR:
case G_FRINT:
case G_INTRINSIC_ROUND:
+ case G_INTRINSIC_ROUNDEVEN:
case G_INTRINSIC_TRUNC:
case G_FCOS:
case G_FSIN:
@@ -3466,6 +4020,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_SHL:
case G_LSHR:
case G_ASHR:
+ case G_SSHLSAT:
+ case G_USHLSAT:
case G_CTLZ:
case G_CTLZ_ZERO_UNDEF:
case G_CTTZ:
@@ -3496,7 +4052,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_UNMERGE_VALUES:
return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
case G_BUILD_VECTOR:
- return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
+ assert(TypeIdx == 0 && "not a vector type index");
+ return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+ case G_CONCAT_VECTORS:
+ if (TypeIdx != 1) // TODO: This probably does work as expected already.
+ return UnableToLegalize;
+ return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+ case G_EXTRACT_VECTOR_ELT:
+ case G_INSERT_VECTOR_ELT:
+ return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
case G_LOAD:
case G_STORE:
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
@@ -3920,6 +4484,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
+
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+
+ // If all finite floats fit into the narrowed integer type, we can just swap
+ // out the result type. This is practically only useful for conversions from
+ // half to at least 16-bits, so just handle the one case.
+ if (SrcTy.getScalarType() != LLT::scalar(16) ||
+ NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16))
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0,
+ IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 1)
@@ -4268,9 +4857,9 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
- auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ const auto &TII = MIRBuilder.getTII();
auto isSupported = [this](const LegalityQuery &Q) {
auto QAction = LI.getAction(Q).Action;
return QAction == Legal || QAction == Libcall || QAction == Custom;
@@ -4358,15 +4947,15 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
// Ref: "Hacker's Delight" by Henry Warren
- auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
- auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1);
+ auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
+ auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
auto MIBTmp = MIRBuilder.buildAnd(
- Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1));
- if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
- isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
- auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
+ SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
+ if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
+ isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
+ auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
- MIRBuilder.buildCTLZ(Ty, MIBTmp));
+ MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
MI.eraseFromParent();
return Legalized;
}
@@ -4375,6 +4964,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_CTPOP: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(SrcReg);
unsigned Size = Ty.getSizeInBits();
MachineIRBuilder &B = MIRBuilder;
@@ -4384,11 +4975,11 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// B2Count = val - { (val >> 1) & 0x55555555 }
// since it gives same result in blocks of 2 with one instruction less.
auto C_1 = B.buildConstant(Ty, 1);
- auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1);
+ auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
- auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi);
+ auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
// In order to get count in blocks of 4 add values from adjacent block of 2.
// B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
@@ -4487,8 +5078,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -4516,8 +5106,7 @@ LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -4563,8 +5152,7 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -4781,7 +5369,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -4796,6 +5384,20 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
+// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
+// multiplication tree.
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
+ MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
+ MI.eraseFromParent();
+ return Legalized;
+}
+
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SMIN:
@@ -4811,8 +5413,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
}
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
@@ -4828,7 +5429,7 @@ LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
@@ -5050,6 +5651,71 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
return Legalized;
}
+/// Lower a vector extract or insert by writing the vector to a stack temporary
+/// and reloading the element or vector.
+///
+/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
+/// =>
+/// %stack_temp = G_FRAME_INDEX
+/// G_STORE %vec, %stack_temp
+/// %idx = clamp(%idx, %vec.getNumElements())
+/// %element_ptr = G_PTR_ADD %stack_temp, %idx
+/// %dst = G_LOAD %element_ptr
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register InsertVal;
+ if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ InsertVal = MI.getOperand(2).getReg();
+
+ Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+ LLT VecTy = MRI.getType(SrcVec);
+ LLT EltTy = VecTy.getElementType();
+ if (!EltTy.isByteSized()) { // Not implemented.
+ LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
+ return UnableToLegalize;
+ }
+
+ unsigned EltBytes = EltTy.getSizeInBytes();
+ Align VecAlign = getStackTemporaryAlignment(VecTy);
+ Align EltAlign;
+
+ MachinePointerInfo PtrInfo;
+ auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
+ VecAlign, PtrInfo);
+ MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
+
+ // Get the pointer to the element, and be sure not to hit undefined behavior
+ // if the index is out of bounds.
+ Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
+
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+ int64_t Offset = IdxVal * EltBytes;
+ PtrInfo = PtrInfo.getWithOffset(Offset);
+ EltAlign = commonAlignment(VecAlign, Offset);
+ } else {
+ // We lose information with a variable offset.
+ EltAlign = getStackTemporaryAlignment(EltTy);
+ PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
+ }
+
+ if (InsertVal) {
+ // Write the inserted element
+ MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
+
+ // Reload the whole vector.
+ MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
+ } else {
+ MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
@@ -5120,7 +5786,6 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
LLT PtrTy = MRI.getType(Dst);
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
@@ -5266,6 +5931,185 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Res);
+ bool IsSigned;
+ bool IsAdd;
+ unsigned BaseOp;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected addsat/subsat opcode");
+ case TargetOpcode::G_UADDSAT:
+ IsSigned = false;
+ IsAdd = true;
+ BaseOp = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_SADDSAT:
+ IsSigned = true;
+ IsAdd = true;
+ BaseOp = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_USUBSAT:
+ IsSigned = false;
+ IsAdd = false;
+ BaseOp = TargetOpcode::G_SUB;
+ break;
+ case TargetOpcode::G_SSUBSAT:
+ IsSigned = true;
+ IsAdd = false;
+ BaseOp = TargetOpcode::G_SUB;
+ break;
+ }
+
+ if (IsSigned) {
+ // sadd.sat(a, b) ->
+ // hi = 0x7fffffff - smax(a, 0)
+ // lo = 0x80000000 - smin(a, 0)
+ // a + smin(smax(lo, b), hi)
+ // ssub.sat(a, b) ->
+ // lo = smax(a, -1) - 0x7fffffff
+ // hi = smin(a, -1) - 0x80000000
+ // a - smin(smax(lo, b), hi)
+ // TODO: AMDGPU can use a "median of 3" instruction here:
+ // a +/- med3(lo, b, hi)
+ uint64_t NumBits = Ty.getScalarSizeInBits();
+ auto MaxVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
+ auto MinVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+ MachineInstrBuilder Hi, Lo;
+ if (IsAdd) {
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
+ Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
+ } else {
+ auto NegOne = MIRBuilder.buildConstant(Ty, -1);
+ Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
+ MaxVal);
+ Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
+ MinVal);
+ }
+ auto RHSClamped =
+ MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
+ MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
+ } else {
+ // uadd.sat(a, b) -> a + umin(~a, b)
+ // usub.sat(a, b) -> a - umin(a, b)
+ Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
+ auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
+ MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Res);
+ LLT BoolTy = Ty.changeElementSize(1);
+ bool IsSigned;
+ bool IsAdd;
+ unsigned OverflowOp;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected addsat/subsat opcode");
+ case TargetOpcode::G_UADDSAT:
+ IsSigned = false;
+ IsAdd = true;
+ OverflowOp = TargetOpcode::G_UADDO;
+ break;
+ case TargetOpcode::G_SADDSAT:
+ IsSigned = true;
+ IsAdd = true;
+ OverflowOp = TargetOpcode::G_SADDO;
+ break;
+ case TargetOpcode::G_USUBSAT:
+ IsSigned = false;
+ IsAdd = false;
+ OverflowOp = TargetOpcode::G_USUBO;
+ break;
+ case TargetOpcode::G_SSUBSAT:
+ IsSigned = true;
+ IsAdd = false;
+ OverflowOp = TargetOpcode::G_SSUBO;
+ break;
+ }
+
+ auto OverflowRes =
+ MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
+ Register Tmp = OverflowRes.getReg(0);
+ Register Ov = OverflowRes.getReg(1);
+ MachineInstrBuilder Clamp;
+ if (IsSigned) {
+ // sadd.sat(a, b) ->
+ // {tmp, ov} = saddo(a, b)
+ // ov ? (tmp >>s 31) + 0x80000000 : r
+ // ssub.sat(a, b) ->
+ // {tmp, ov} = ssubo(a, b)
+ // ov ? (tmp >>s 31) + 0x80000000 : r
+ uint64_t NumBits = Ty.getScalarSizeInBits();
+ auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
+ auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
+ auto MinVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+ Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
+ } else {
+ // uadd.sat(a, b) ->
+ // {tmp, ov} = uaddo(a, b)
+ // ov ? 0xffffffff : tmp
+ // usub.sat(a, b) ->
+ // {tmp, ov} = usubo(a, b)
+ // ov ? 0 : tmp
+ Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
+ }
+ MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerShlSat(MachineInstr &MI) {
+ assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+ MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
+ "Expected shlsat opcode!");
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Res);
+ LLT BoolTy = Ty.changeElementSize(1);
+
+ unsigned BW = Ty.getScalarSizeInBits();
+ auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
+ auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
+ : MIRBuilder.buildLShr(Ty, Result, RHS);
+
+ MachineInstrBuilder SatVal;
+ if (IsSigned) {
+ auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
+ auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
+ auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
+ MIRBuilder.buildConstant(Ty, 0));
+ SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
+ } else {
+ SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
+ }
+ auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
+ MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBswap(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -5345,8 +6189,6 @@ LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
MachineFunction &MF = MIRBuilder.getMF();
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetLowering *TLI = STI.getTargetLowering();
bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
int NameOpIdx = IsRead ? 1 : 0;
@@ -5357,7 +6199,7 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
const MDString *RegStr = cast<MDString>(
cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
- Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
+ Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
if (!PhysReg.isValid())
return UnableToLegalize;
@@ -5369,3 +6211,63 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
+ unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+ Register Result = MI.getOperand(0).getReg();
+ LLT OrigTy = MRI.getType(Result);
+ auto SizeInBits = OrigTy.getScalarSizeInBits();
+ LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
+
+ auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
+ auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
+ auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
+ unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
+ auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
+ MIRBuilder.buildTrunc(Result, Shifted);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
+ // Implement vector G_SELECT in terms of XOR, AND, OR.
+ Register DstReg = MI.getOperand(0).getReg();
+ Register MaskReg = MI.getOperand(1).getReg();
+ Register Op1Reg = MI.getOperand(2).getReg();
+ Register Op2Reg = MI.getOperand(3).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT MaskTy = MRI.getType(MaskReg);
+ LLT Op1Ty = MRI.getType(Op1Reg);
+ if (!DstTy.isVector())
+ return UnableToLegalize;
+
+ // Vector selects can have a scalar predicate. If so, splat into a vector and
+ // finish for later legalization attempts to try again.
+ if (MaskTy.isScalar()) {
+ Register MaskElt = MaskReg;
+ if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
+ MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
+ // Generate a vector splat idiom to be pattern matched later.
+ auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(ShufSplat.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
+ return UnableToLegalize;
+ }
+
+ auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
+ auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
+ auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
+ MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 4abd0c4df97a..30acac14bc5f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -105,6 +105,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
const std::pair<unsigned, LLT> &Mutation) {
switch (Rule.getAction()) {
+ case Legal:
case Custom:
case Lower:
case MoreElements:
@@ -122,7 +123,7 @@ static bool mutationIsSane(const LegalizeRule &Rule,
std::pair<unsigned, LLT> Mutation) {
// If the user wants a custom mutation, then we can't really say much about
// it. Return true, and trust that they're doing the right thing.
- if (Rule.getAction() == Custom)
+ if (Rule.getAction() == Custom || Rule.getAction() == Legal)
return true;
const unsigned TypeIdx = Mutation.first;
@@ -147,7 +148,8 @@ static bool mutationIsSane(const LegalizeRule &Rule,
if (NewTy.getNumElements() <= OldElts)
return false;
}
- }
+ } else if (Rule.getAction() == MoreElements)
+ return false;
// Make sure the element type didn't change.
return NewTy.getScalarType() == OldTy.getScalarType();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index a07416d08614..30c00c63f6f4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -56,6 +57,20 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
return InsertMBB == Def.getParent();
}
+bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const {
+ MachineInstr *MI = Op.getParent();
+ if (!MI->isPHI())
+ return false;
+
+ Register SrcReg = Op.getReg();
+ for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) {
+ auto &MO = MI->getOperand(Idx);
+ if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg)
+ return true;
+ }
+ return false;
+}
+
bool Localizer::localizeInterBlock(MachineFunction &MF,
LocalizedSetVecT &LocalizedInstrs) {
bool Changed = false;
@@ -93,6 +108,14 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LocalizedInstrs.insert(&MI);
continue;
}
+
+ // If the use is a phi operand that's not unique, don't try to localize.
+ // If we do, we can cause unnecessary instruction bloat by duplicating
+ // into each predecessor block, when the existing one is sufficient and
+ // allows for easier optimization later.
+ if (isNonUniquePhiValue(MOUse))
+ continue;
+
LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
Changed = true;
auto MBBAndReg = std::make_pair(InsertMBB, Reg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 10f696d6a3b3..67ef02a4e7b2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -9,8 +9,8 @@
/// This file implements the MachineIRBuidler class.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -106,8 +106,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
} else if (auto *CFP = dyn_cast<ConstantFP>(&C)) {
MIB.addFPImm(CFP);
} else {
- // Insert %noreg if we didn't find a usable constant and had to drop it.
- MIB.addReg(0U);
+ // Insert $noreg if we didn't find a usable constant and had to drop it.
+ MIB.addReg(Register());
}
MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
@@ -162,6 +162,11 @@ MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
.addJumpTableIndex(JTI);
}
+void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0) && "type mismatch");
+}
+
void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0,
const LLT Op1) {
assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
@@ -312,17 +317,29 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
return buildFConstant(Res, *CFP);
}
-MachineInstrBuilder MachineIRBuilder::buildBrCond(Register Tst,
+MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst,
MachineBasicBlock &Dest) {
- assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");
+ assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
- return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
+ auto MIB = buildInstr(TargetOpcode::G_BRCOND);
+ Tst.addSrcToMIB(MIB);
+ MIB.addMBB(&Dest);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildLoad(const DstOp &Res,
- const SrcOp &Addr,
- MachineMemOperand &MMO) {
- return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
+MachineInstrBuilder
+MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes()));
+ MachineMemOperand *MMO =
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return buildLoad(Dst, Addr, *MMO);
}
MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
@@ -369,6 +386,21 @@ MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
return MIB;
}
+MachineInstrBuilder
+MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes()));
+ MachineMemOperand *MMO =
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return buildStore(Val, Addr, *MMO);
+}
+
MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_ANYEXT, Res, Op);
@@ -603,6 +635,35 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
}
+MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res,
+ const SrcOp &Src) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() &&
+ "Expected Src to match Dst elt ty");
+ auto UndefVec = buildUndef(DstTy);
+ auto Zero = buildConstant(LLT::scalar(64), 0);
+ auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero);
+ SmallVector<int, 16> ZeroMask(DstTy.getNumElements());
+ return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
+ const SrcOp &Src1,
+ const SrcOp &Src2,
+ ArrayRef<int> Mask) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ LLT Src1Ty = Src1.getLLTTy(*getMRI());
+ LLT Src2Ty = Src2.getLLTTy(*getMRI());
+ assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
+ assert(DstTy.getElementType() == Src1Ty.getElementType() &&
+ DstTy.getElementType() == Src2Ty.getElementType());
+ (void)Src1Ty;
+ (void)Src2Ty;
+ ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask);
+ return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2})
+ .addShuffleMask(MaskAlloc);
+}
+
MachineInstrBuilder
MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
// Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
@@ -925,6 +986,14 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI()));
break;
}
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_ABS:
+ // All these are unary ops.
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateUnaryOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()));
+ break;
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
@@ -953,7 +1022,9 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
}
case TargetOpcode::G_SHL:
case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR: {
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_USHLSAT:
+ case TargetOpcode::G_SSHLSAT: {
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 2 && "Invalid Srcs");
validateShiftOp(DstOps[0].getLLTTy(*getMRI()),
@@ -1018,11 +1089,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
case TargetOpcode::G_UNMERGE_VALUES: {
assert(!DstOps.empty() && "Invalid trivial sequence");
assert(SrcOps.size() == 1 && "Invalid src for Unmerge");
- assert(std::all_of(DstOps.begin(), DstOps.end(),
- [&, this](const DstOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- DstOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(DstOps,
+ [&, this](const DstOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ DstOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in output list");
assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1032,11 +1103,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
case TargetOpcode::G_MERGE_VALUES: {
assert(!SrcOps.empty() && "invalid trivial sequence");
assert(DstOps.size() == 1 && "Invalid Dst");
- assert(std::all_of(SrcOps.begin(), SrcOps.end(),
- [&, this](const SrcOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1083,11 +1154,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid DstOps");
assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
"Res type must be a vector");
- assert(std::all_of(SrcOps.begin(), SrcOps.end(),
- [&, this](const SrcOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1100,11 +1171,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid DstOps");
assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
"Res type must be a vector");
- assert(std::all_of(SrcOps.begin(), SrcOps.end(),
- [&, this](const SrcOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in input list");
if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits())
@@ -1115,12 +1186,12 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid DstOps");
assert((!SrcOps.empty() || SrcOps.size() < 2) &&
"Must have at least 2 operands");
- assert(std::all_of(SrcOps.begin(), SrcOps.end(),
- [&, this](const SrcOp &Op) {
- return (Op.getLLTTy(*getMRI()).isVector() &&
- Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI()));
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return (Op.getLLTTy(*getMRI()).isVector() &&
+ Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI()));
+ }) &&
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 255ea693b5c4..e2a963747101 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -421,8 +421,7 @@ RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
// Then the alternative mapping, if any.
InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
- for (const InstructionMapping *AltMapping : AltMappings)
- PossibleMappings.push_back(AltMapping);
+ append_range(PossibleMappings, AltMappings);
#ifndef NDEBUG
for (const InstructionMapping *Mapping : PossibleMappings)
assert(Mapping->verify(MI) && "Mapping is invalid");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 8a7fb4fbbf2d..cd2483224489 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -11,8 +11,11 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -20,13 +23,16 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "globalisel-utils"
using namespace llvm;
+using namespace MIPatternMatch;
Register llvm::constrainRegToClass(MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
@@ -42,7 +48,7 @@ Register llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt,
- const TargetRegisterClass &RegClass, const MachineOperand &RegMO) {
+ const TargetRegisterClass &RegClass, MachineOperand &RegMO) {
Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
@@ -63,6 +69,13 @@ Register llvm::constrainOperandRegClass(
TII.get(TargetOpcode::COPY), Reg)
.addReg(ConstrainedReg);
}
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ Observer->changingInstr(*RegMO.getParent());
+ }
+ RegMO.setReg(ConstrainedReg);
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ Observer->changedInstr(*RegMO.getParent());
+ }
} else {
if (GISelChangeObserver *Observer = MF.getObserver()) {
if (!RegMO.isDef()) {
@@ -80,7 +93,7 @@ Register llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
- const MachineOperand &RegMO, unsigned OpIdx) {
+ MachineOperand &RegMO, unsigned OpIdx) {
Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
@@ -150,8 +163,7 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
// If the operand is a vreg, we should constrain its regclass, and only
// insert COPYs if that's impossible.
// constrainOperandRegClass does that for us.
- MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
- MO, OpI));
+ constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI);
// Tie uses to defs as indicated in MCInstrDesc if this hasn't already been
// done.
@@ -180,6 +192,14 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg,
bool llvm::isTriviallyDead(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
+ // FIXME: This logical is mostly duplicated with
+ // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in
+ // MachineInstr::isLabel?
+
+ // Don't delete frame allocation labels.
+ if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
+ return false;
+
// If we can move an instruction, we can remove it. Otherwise, it has
// a side-effect of some sort.
bool SawStore = false;
@@ -242,8 +262,8 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
-Optional<int64_t> llvm::getConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
+Optional<APInt> llvm::getConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
Optional<ValueAndVReg> ValAndVReg =
getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
@@ -253,9 +273,17 @@ Optional<int64_t> llvm::getConstantVRegVal(Register VReg,
return ValAndVReg->Value;
}
+Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
+ Optional<APInt> Val = getConstantVRegVal(VReg, MRI);
+ if (Val && Val->getBitWidth() <= 64)
+ return Val->getSExtValue();
+ return None;
+}
+
Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
- bool HandleFConstant) {
+ bool HandleFConstant, bool LookThroughAnyExt) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
@@ -282,6 +310,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
LookThroughInstrs) {
switch (MI->getOpcode()) {
+ case TargetOpcode::G_ANYEXT:
+ if (!LookThroughAnyExt)
+ return None;
+ LLVM_FALLTHROUGH;
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
@@ -315,6 +347,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
case TargetOpcode::G_TRUNC:
Val = Val.trunc(OpcodeAndSize.second);
break;
+ case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT:
Val = Val.sext(OpcodeAndSize.second);
break;
@@ -324,13 +357,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
}
}
- if (Val.getBitWidth() > 64)
- return None;
-
- return ValueAndVReg{Val.getSExtValue(), VReg};
+ return ValueAndVReg{Val, VReg};
}
-const llvm::ConstantFP *
+const ConstantFP *
llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
MachineInstr *MI = MRI.getVRegDef(VReg);
if (TargetOpcode::G_FCONSTANT != MI->getOpcode())
@@ -338,15 +368,8 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
return MI->getOperand(1).getFPImm();
}
-namespace {
-struct DefinitionAndSourceRegister {
- llvm::MachineInstr *MI;
- Register Reg;
-};
-} // namespace
-
-static llvm::Optional<DefinitionAndSourceRegister>
-getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
+Optional<DefinitionAndSourceRegister>
+llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
Register DefSrcReg = Reg;
auto *DefMI = MRI.getVRegDef(Reg);
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
@@ -355,7 +378,7 @@ getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
while (DefMI->getOpcode() == TargetOpcode::COPY) {
Register SrcReg = DefMI->getOperand(1).getReg();
auto SrcTy = MRI.getType(SrcReg);
- if (!SrcTy.isValid() || SrcTy != DstTy)
+ if (!SrcTy.isValid())
break;
DefMI = MRI.getVRegDef(SrcReg);
DefSrcReg = SrcReg;
@@ -363,8 +386,8 @@ getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
return DefinitionAndSourceRegister{DefMI, DefSrcReg};
}
-llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
- const MachineRegisterInfo &MRI) {
+MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI) {
Optional<DefinitionAndSourceRegister> DefSrcReg =
getDefSrcRegIgnoringCopies(Reg, MRI);
return DefSrcReg ? DefSrcReg->MI : nullptr;
@@ -377,8 +400,8 @@ Register llvm::getSrcRegIgnoringCopies(Register Reg,
return DefSrcReg ? DefSrcReg->Reg : Register();
}
-llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
- const MachineRegisterInfo &MRI) {
+MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
+ const MachineRegisterInfo &MRI) {
MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr;
}
@@ -407,9 +430,8 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
if (!MaybeOp1Cst)
return None;
- LLT Ty = MRI.getType(Op1);
- APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
- APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
+ const APInt &C1 = *MaybeOp1Cst;
+ const APInt &C2 = *MaybeOp2Cst;
switch (Opcode) {
default:
break;
@@ -458,7 +480,8 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (!DefMI)
return false;
- if (DefMI->getFlag(MachineInstr::FmNoNans))
+ const TargetMachine& TM = DefMI->getMF()->getTarget();
+ if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
return true;
if (SNaN) {
@@ -489,75 +512,304 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
return Align(1);
}
+Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ MCRegister PhysReg,
+ const TargetRegisterClass &RC,
+ LLT RegTy) {
+ DebugLoc DL; // FIXME: Is no location the right choice?
+ MachineBasicBlock &EntryMBB = MF.front();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register LiveIn = MRI.getLiveInVirtReg(PhysReg);
+ if (LiveIn) {
+ MachineInstr *Def = MRI.getVRegDef(LiveIn);
+ if (Def) {
+ // FIXME: Should the verifier check this is in the entry block?
+ assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block");
+ return LiveIn;
+ }
+
+ // It's possible the incoming argument register and copy was added during
+ // lowering, but later deleted due to being/becoming dead. If this happens,
+ // re-insert the copy.
+ } else {
+ // The live in register was not present, so add it.
+ LiveIn = MF.addLiveIn(PhysReg, &RC);
+ if (RegTy.isValid())
+ MRI.setType(LiveIn, RegTy);
+ }
+
+ BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn)
+ .addReg(PhysReg);
+ if (!EntryMBB.isLiveIn(PhysReg))
+ EntryMBB.addLiveIn(PhysReg);
+ return LiveIn;
+}
+
Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm,
const MachineRegisterInfo &MRI) {
auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
if (MaybeOp1Cst) {
- LLT Ty = MRI.getType(Op1);
- APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
switch (Opcode) {
default:
break;
- case TargetOpcode::G_SEXT_INREG:
- return C1.trunc(Imm).sext(C1.getBitWidth());
+ case TargetOpcode::G_SEXT_INREG: {
+ LLT Ty = MRI.getType(Op1);
+ return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits());
+ }
}
}
return None;
}
+bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB) {
+ Optional<DefinitionAndSourceRegister> DefSrcReg =
+ getDefSrcRegIgnoringCopies(Reg, MRI);
+ if (!DefSrcReg)
+ return false;
+
+ const MachineInstr &MI = *DefSrcReg->MI;
+ const LLT Ty = MRI.getType(Reg);
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONSTANT: {
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+ const ConstantInt *CI = MI.getOperand(1).getCImm();
+ return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2();
+ }
+ case TargetOpcode::G_SHL: {
+ // A left-shift of a constant one will have exactly one bit set because
+ // shifting the bit off the end is undefined.
+
+ // TODO: Constant splat
+ if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (*ConstLHS == 1)
+ return true;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (ConstLHS->isSignMask())
+ return true;
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // TODO: Are all operands of a build vector constant powers of two?
+ if (!KB)
+ return false;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to computeKnownBits to catch other known cases.
+ KnownBits Known = KB->getKnownBits(Reg);
+ return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
-LLT llvm::getLCMType(LLT Ty0, LLT Ty1) {
- if (!Ty0.isVector() && !Ty1.isVector()) {
- unsigned Mul = Ty0.getSizeInBits() * Ty1.getSizeInBits();
- int GCDSize = greatestCommonDivisor(Ty0.getSizeInBits(),
- Ty1.getSizeInBits());
- return LLT::scalar(Mul / GCDSize);
- }
+static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) {
+ unsigned Mul = OrigSize * TargetSize;
+ unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize);
+ return Mul / GCDSize;
+}
+
+LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
+ const unsigned OrigSize = OrigTy.getSizeInBits();
+ const unsigned TargetSize = TargetTy.getSizeInBits();
+
+ if (OrigSize == TargetSize)
+ return OrigTy;
+
+ if (OrigTy.isVector()) {
+ const LLT OrigElt = OrigTy.getElementType();
- if (Ty0.isVector() && !Ty1.isVector()) {
- assert(Ty0.getElementType() == Ty1 && "not yet handled");
- return Ty0;
+ if (TargetTy.isVector()) {
+ const LLT TargetElt = TargetTy.getElementType();
+
+ if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+ int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(),
+ TargetTy.getNumElements());
+ // Prefer the original element type.
+ int Mul = OrigTy.getNumElements() * TargetTy.getNumElements();
+ return LLT::vector(Mul / GCDElts, OrigTy.getElementType());
+ }
+ } else {
+ if (OrigElt.getSizeInBits() == TargetSize)
+ return OrigTy;
+ }
+
+ unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+ return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
}
- if (Ty1.isVector() && !Ty0.isVector()) {
- assert(Ty1.getElementType() == Ty0 && "not yet handled");
- return Ty1;
+ if (TargetTy.isVector()) {
+ unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+ return LLT::vector(LCMSize / OrigSize, OrigTy);
}
- if (Ty0.isVector() && Ty1.isVector()) {
- assert(Ty0.getElementType() == Ty1.getElementType() && "not yet handled");
+ unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+
+ // Preserve pointer types.
+ if (LCMSize == OrigSize)
+ return OrigTy;
+ if (LCMSize == TargetSize)
+ return TargetTy;
- int GCDElts = greatestCommonDivisor(Ty0.getNumElements(),
- Ty1.getNumElements());
+ return LLT::scalar(LCMSize);
+}
- int Mul = Ty0.getNumElements() * Ty1.getNumElements();
- return LLT::vector(Mul / GCDElts, Ty0.getElementType());
+LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
+ const unsigned OrigSize = OrigTy.getSizeInBits();
+ const unsigned TargetSize = TargetTy.getSizeInBits();
+
+ if (OrigSize == TargetSize)
+ return OrigTy;
+
+ if (OrigTy.isVector()) {
+ LLT OrigElt = OrigTy.getElementType();
+ if (TargetTy.isVector()) {
+ LLT TargetElt = TargetTy.getElementType();
+ if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+ int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
+ TargetTy.getNumElements());
+ return LLT::scalarOrVector(GCD, OrigElt);
+ }
+ } else {
+ // If the source is a vector of pointers, return a pointer element.
+ if (OrigElt.getSizeInBits() == TargetSize)
+ return OrigElt;
+ }
+
+ unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+ if (GCD == OrigElt.getSizeInBits())
+ return OrigElt;
+
+ // If we can't produce the original element type, we have to use a smaller
+ // scalar.
+ if (GCD < OrigElt.getSizeInBits())
+ return LLT::scalar(GCD);
+ return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt);
+ }
+
+ if (TargetTy.isVector()) {
+ // Try to preserve the original element type.
+ LLT TargetElt = TargetTy.getElementType();
+ if (TargetElt.getSizeInBits() == OrigSize)
+ return OrigTy;
}
- llvm_unreachable("not yet handled");
+ unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+ return LLT::scalar(GCD);
}
-LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
- if (OrigTy.isVector() && TargetTy.isVector()) {
- assert(OrigTy.getElementType() == TargetTy.getElementType());
- int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
- TargetTy.getNumElements());
- return LLT::scalarOrVector(GCD, OrigTy.getElementType());
+Optional<int> llvm::getSplatIndex(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Only G_SHUFFLE_VECTOR can have a splat index!");
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
+
+ // If all elements are undefined, this shuffle can be considered a splat.
+ // Return 0 for better potential for callers to simplify.
+ if (FirstDefinedIdx == Mask.end())
+ return 0;
+
+ // Make sure all remaining elements are either undef or the same
+ // as the first non-undef value.
+ int SplatValue = *FirstDefinedIdx;
+ if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
+ [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
+ return None;
+
+ return SplatValue;
+}
+
+static bool isBuildVectorOp(unsigned Opcode) {
+ return Opcode == TargetOpcode::G_BUILD_VECTOR ||
+ Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;
+}
+
+// TODO: Handle mixed undef elements.
+static bool isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue) {
+ if (!isBuildVectorOp(MI.getOpcode()))
+ return false;
+
+ const unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I != NumOps; ++I) {
+ Register Element = MI.getOperand(I).getReg();
+ if (!mi_match(Element, MRI, m_SpecificICst(SplatValue)))
+ return false;
}
- if (OrigTy.isVector() && !TargetTy.isVector()) {
- assert(OrigTy.getElementType() == TargetTy);
- return TargetTy;
+ return true;
+}
+
+Optional<int64_t>
+llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ if (!isBuildVectorOp(MI.getOpcode()))
+ return None;
+
+ const unsigned NumOps = MI.getNumOperands();
+ Optional<int64_t> Scalar;
+ for (unsigned I = 1; I != NumOps; ++I) {
+ Register Element = MI.getOperand(I).getReg();
+ int64_t ElementValue;
+ if (!mi_match(Element, MRI, m_ICst(ElementValue)))
+ return None;
+ if (!Scalar)
+ Scalar = ElementValue;
+ else if (*Scalar != ElementValue)
+ return None;
}
- assert(!OrigTy.isVector() && !TargetTy.isVector() &&
- "GCD type of vector and scalar not implemented");
+ return Scalar;
+}
+
+bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return isBuildVectorConstantSplat(MI, MRI, 0);
+}
- int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
- TargetTy.getSizeInBits());
- return LLT::scalar(GCD);
+bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return isBuildVectorConstantSplat(MI, MRI, -1);
+}
+
+bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
+ bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ return Val & 0x1;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return Val == 1;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return Val == -1;
+ }
+ llvm_unreachable("Invalid boolean contents");
+}
+
+int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
+ bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return 1;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return -1;
+ }
+ llvm_unreachable("Invalid boolean contents");
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index 1e20c02ba160..6c1ce4c1efb0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -223,8 +223,9 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// FIXME: Find better heuristics
llvm::stable_sort(
Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
- return DL.getTypeAllocSize(GV1->getValueType()) <
- DL.getTypeAllocSize(GV2->getValueType());
+ // We don't support scalable global variables.
+ return DL.getTypeAllocSize(GV1->getValueType()).getFixedSize() <
+ DL.getTypeAllocSize(GV2->getValueType()).getFixedSize();
});
// If we want to just blindly group all globals together, do so.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
index 0ba7e920e507..810b10c9c82a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -165,7 +165,7 @@ namespace {
Value *InitLoopCount();
// Insert the set_loop_iteration intrinsic.
- void InsertIterationSetup(Value *LoopCountInit);
+ Value *InsertIterationSetup(Value *LoopCountInit);
// Insert the loop_decrement intrinsic.
void InsertLoopDec();
@@ -187,7 +187,7 @@ namespace {
const DataLayout &DL,
OptimizationRemarkEmitter *ORE) :
SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
- ExitCount(Info.ExitCount),
+ TripCount(Info.TripCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
LoopDecrement(Info.LoopDecrement),
@@ -202,7 +202,7 @@ namespace {
OptimizationRemarkEmitter *ORE = nullptr;
Loop *L = nullptr;
Module *M = nullptr;
- const SCEV *ExitCount = nullptr;
+ const SCEV *TripCount = nullptr;
Type *CountType = nullptr;
BranchInst *ExitBranch = nullptr;
Value *LoopDecrement = nullptr;
@@ -234,7 +234,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
Loop *L = *I;
- if (!L->getParentLoop())
+ if (L->isOutermost())
TryConvertLoop(L);
}
@@ -298,7 +298,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
}
assert(
- (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
+ (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) &&
"Hardware Loop must have set exit info.");
BasicBlock *Preheader = L->getLoopPreheader();
@@ -325,11 +325,11 @@ void HardwareLoop::Create() {
return;
}
- InsertIterationSetup(LoopCountInit);
+ Value *Setup = InsertIterationSetup(LoopCountInit);
if (UsePHICounter || ForceHardwareLoopPHI) {
Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
- Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
+ Value *EltsRem = InsertPHICounter(Setup, LoopDec);
LoopDec->setOperand(0, EltsRem);
UpdateBranch(LoopDec);
} else
@@ -383,18 +383,13 @@ Value *HardwareLoop::InitLoopCount() {
// loop counter and tests that is not zero?
SCEVExpander SCEVE(SE, DL, "loopcnt");
- if (!ExitCount->getType()->isPointerTy() &&
- ExitCount->getType() != CountType)
- ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
-
- ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
// If we're trying to use the 'test and set' form of the intrinsic, we need
// to replace a conditional branch that is controlling entry to the loop. It
// is likely (guaranteed?) that the preheader has an unconditional branch to
// the loop header, so also check if it has a single predecessor.
- if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
- SE.getZero(ExitCount->getType()))) {
+ if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount,
+ SE.getZero(TripCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
UseLoopGuard |= ForceGuardLoopEntry;
} else
@@ -402,16 +397,23 @@ Value *HardwareLoop::InitLoopCount() {
BasicBlock *BB = L->getLoopPreheader();
if (UseLoopGuard && BB->getSinglePredecessor() &&
- cast<BranchInst>(BB->getTerminator())->isUnconditional())
- BB = BB->getSinglePredecessor();
+ cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
+ BasicBlock *Predecessor = BB->getSinglePredecessor();
+ // If it's not safe to create a while loop then don't force it and create a
+ // do-while loop instead
+ if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE))
+ UseLoopGuard = false;
+ else
+ BB = Predecessor;
+ }
- if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
- LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
- << *ExitCount << "\n");
+ if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) {
+ LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount "
+ << *TripCount << "\n");
return nullptr;
}
- Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
+ Value *Count = SCEVE.expandCodeFor(TripCount, CountType,
BB->getTerminator());
// FIXME: We've expanded Count where we hope to insert the counter setting
@@ -430,11 +432,13 @@ Value *HardwareLoop::InitLoopCount() {
return Count;
}
-void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
+Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
Type *Ty = LoopCountInit->getType();
- Intrinsic::ID ID = UseLoopGuard ?
- Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations;
+ bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
+ Intrinsic::ID ID = UseLoopGuard ? Intrinsic::test_set_loop_iterations
+ : (UsePhi ? Intrinsic::start_loop_iterations
+ : Intrinsic::set_loop_iterations);
Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
@@ -450,6 +454,7 @@ void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
}
LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
<< *SetCount << "\n");
+ return UseLoopGuard ? LoopCountInit : SetCount;
}
void HardwareLoop::InsertLoopDec() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
index 1a5c5d685017..37be2eabf5fe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
@@ -751,7 +751,7 @@ bool IfConverter::CountDuplicatedInstructions(
// A pred-clobbering instruction in the shared portion prevents
// if-conversion.
std::vector<MachineOperand> PredDefs;
- if (TII->DefinesPredicate(*TIB, PredDefs))
+ if (TII->ClobbersPredicate(*TIB, PredDefs, false))
return false;
// If we get all the way to the branch instructions, don't count them.
if (!TIB->isBranch())
@@ -1146,7 +1146,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI,
// FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
// still potentially predicable.
std::vector<MachineOperand> PredDefs;
- if (TII->DefinesPredicate(MI, PredDefs))
+ if (TII->ClobbersPredicate(MI, PredDefs, true))
BBI.ClobbersPred = true;
if (!TII->isPredicable(MI)) {
@@ -2264,8 +2264,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
if (ToBBI.IsBrAnalyzable)
ToBBI.BB->normalizeSuccProbs();
- SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(),
- FromMBB.succ_end());
+ SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.successors());
MachineBasicBlock *NBB = getNextBlock(FromMBB);
MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
// The edge probability from ToBBI.BB to FromMBB, which is only needed when
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 16c9bfc672af..5cdaa9b74e80 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -200,10 +200,16 @@ class ImplicitNullChecks : public MachineFunctionPass {
unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts);
+ /// Returns true if \p DependenceMI can clobber the liveIns in NullSucc block
+ /// if it was hoisted to the NullCheck block. This is used by caller
+ /// canHoistInst to decide if DependenceMI can be hoisted safely.
+ bool canDependenceHoistingClobberLiveIns(MachineInstr *DependenceMI,
+ MachineBasicBlock *NullSucc);
+
/// Return true if \p FaultingMI can be hoisted from after the
/// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a
- /// non-null value if we also need to (and legally can) hoist a depedency.
- bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg,
+ /// non-null value if we also need to (and legally can) hoist a dependency.
+ bool canHoistInst(MachineInstr *FaultingMI,
ArrayRef<MachineInstr *> InstsSeenSoFar,
MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
@@ -275,12 +281,12 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A,
// between A and B here -- for instance, we should not be dealing with heap
// load-store dependencies here.
- for (auto MOA : A->operands()) {
+ for (const auto &MOA : A->operands()) {
if (!(MOA.isReg() && MOA.getReg()))
continue;
Register RegA = MOA.getReg();
- for (auto MOB : B->operands()) {
+ for (const auto &MOB : B->operands()) {
if (!(MOB.isReg() && MOB.getReg()))
continue;
@@ -347,11 +353,9 @@ ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI,
return AR_MayAlias;
continue;
}
- llvm::AliasResult AAResult =
- AA->alias(MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
- MMO1->getAAInfo()),
- MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
- MMO2->getAAInfo()));
+ llvm::AliasResult AAResult = AA->alias(
+ MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
+ MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo()));
if (AAResult != NoAlias)
return AR_MayAlias;
}
@@ -363,23 +367,105 @@ ImplicitNullChecks::SuitabilityResult
ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts) {
- int64_t Offset;
- bool OffsetIsScalable;
- const MachineOperand *BaseOp;
+ // Implementation restriction for faulting_op insertion
+ // TODO: This could be relaxed if we find a test case which warrants it.
+ if (MI.getDesc().getNumDefs() > 1)
+ return SR_Unsuitable;
+ if (!MI.mayLoadOrStore() || MI.isPredicable())
+ return SR_Unsuitable;
+ auto AM = TII->getAddrModeFromMemoryOp(MI, TRI);
+ if (!AM)
+ return SR_Unsuitable;
+ auto AddrMode = *AM;
+ const Register BaseReg = AddrMode.BaseReg, ScaledReg = AddrMode.ScaledReg;
+ int64_t Displacement = AddrMode.Displacement;
- if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI) ||
- !BaseOp->isReg() || BaseOp->getReg() != PointerReg)
+ // We need the base of the memory instruction to be same as the register
+ // where the null check is performed (i.e. PointerReg).
+ if (BaseReg != PointerReg && ScaledReg != PointerReg)
+ return SR_Unsuitable;
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ unsigned PointerRegSizeInBits = TRI->getRegSizeInBits(PointerReg, MRI);
+ // Bail out of the sizes of BaseReg, ScaledReg and PointerReg are not the
+ // same.
+ if ((BaseReg &&
+ TRI->getRegSizeInBits(BaseReg, MRI) != PointerRegSizeInBits) ||
+ (ScaledReg &&
+ TRI->getRegSizeInBits(ScaledReg, MRI) != PointerRegSizeInBits))
return SR_Unsuitable;
- // FIXME: This algorithm assumes instructions have fixed-size offsets.
- if (OffsetIsScalable)
+ // Returns true if RegUsedInAddr is used for calculating the displacement
+ // depending on addressing mode. Also calculates the Displacement.
+ auto CalculateDisplacementFromAddrMode = [&](Register RegUsedInAddr,
+ int64_t Multiplier) {
+ // The register can be NoRegister, which is defined as zero for all targets.
+ // Consider instruction of interest as `movq 8(,%rdi,8), %rax`. Here the
+ // ScaledReg is %rdi, while there is no BaseReg.
+ if (!RegUsedInAddr)
+ return false;
+ assert(Multiplier && "expected to be non-zero!");
+ MachineInstr *ModifyingMI = nullptr;
+ for (auto It = std::next(MachineBasicBlock::const_reverse_iterator(&MI));
+ It != MI.getParent()->rend(); It++) {
+ const MachineInstr *CurrMI = &*It;
+ if (CurrMI->modifiesRegister(RegUsedInAddr, TRI)) {
+ ModifyingMI = const_cast<MachineInstr *>(CurrMI);
+ break;
+ }
+ }
+ if (!ModifyingMI)
+ return false;
+ // Check for the const value defined in register by ModifyingMI. This means
+ // all other previous values for that register has been invalidated.
+ int64_t ImmVal;
+ if (!TII->getConstValDefinedInReg(*ModifyingMI, RegUsedInAddr, ImmVal))
+ return false;
+ // Calculate the reg size in bits, since this is needed for bailing out in
+ // case of overflow.
+ int32_t RegSizeInBits = TRI->getRegSizeInBits(RegUsedInAddr, MRI);
+ APInt ImmValC(RegSizeInBits, ImmVal, true /*IsSigned*/);
+ APInt MultiplierC(RegSizeInBits, Multiplier);
+ assert(MultiplierC.isStrictlyPositive() &&
+ "expected to be a positive value!");
+ bool IsOverflow;
+ // Sign of the product depends on the sign of the ImmVal, since Multiplier
+ // is always positive.
+ APInt Product = ImmValC.smul_ov(MultiplierC, IsOverflow);
+ if (IsOverflow)
+ return false;
+ APInt DisplacementC(64, Displacement, true /*isSigned*/);
+ DisplacementC = Product.sadd_ov(DisplacementC, IsOverflow);
+ if (IsOverflow)
+ return false;
+
+ // We only handle diplacements upto 64 bits wide.
+ if (DisplacementC.getActiveBits() > 64)
+ return false;
+ Displacement = DisplacementC.getSExtValue();
+ return true;
+ };
+
+ // If a register used in the address is constant, fold it's effect into the
+ // displacement for ease of analysis.
+ bool BaseRegIsConstVal = false, ScaledRegIsConstVal = false;
+ if (CalculateDisplacementFromAddrMode(BaseReg, 1))
+ BaseRegIsConstVal = true;
+ if (CalculateDisplacementFromAddrMode(ScaledReg, AddrMode.Scale))
+ ScaledRegIsConstVal = true;
+
+ // The register which is not null checked should be part of the Displacement
+ // calculation, otherwise we do not know whether the Displacement is made up
+ // by some symbolic values.
+ // This matters because we do not want to incorrectly assume that load from
+ // falls in the zeroth faulting page in the "sane offset check" below.
+ if ((BaseReg && BaseReg != PointerReg && !BaseRegIsConstVal) ||
+ (ScaledReg && ScaledReg != PointerReg && !ScaledRegIsConstVal))
return SR_Unsuitable;
// We want the mem access to be issued at a sane offset from PointerReg,
// so that if PointerReg is null then the access reliably page faults.
- if (!(MI.mayLoadOrStore() && !MI.isPredicable() &&
- -PageSize < Offset && Offset < PageSize))
+ if (!(-PageSize < Displacement && Displacement < PageSize))
return SR_Unsuitable;
// Finally, check whether the current memory access aliases with previous one.
@@ -393,8 +479,39 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
return SR_Suitable;
}
+bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns(
+ MachineInstr *DependenceMI, MachineBasicBlock *NullSucc) {
+ for (const auto &DependenceMO : DependenceMI->operands()) {
+ if (!(DependenceMO.isReg() && DependenceMO.getReg()))
+ continue;
+
+ // Make sure that we won't clobber any live ins to the sibling block by
+ // hoisting Dependency. For instance, we can't hoist INST to before the
+ // null check (even if it safe, and does not violate any dependencies in
+ // the non_null_block) if %rdx is live in to _null_block.
+ //
+ // test %rcx, %rcx
+ // je _null_block
+ // _non_null_block:
+ // %rdx = INST
+ // ...
+ //
+ // This restriction does not apply to the faulting load inst because in
+ // case the pointer loaded from is in the null page, the load will not
+ // semantically execute, and affect machine state. That is, if the load
+ // was loading into %rax and it faults, the value of %rax should stay the
+ // same as it would have been had the load not have executed and we'd have
+ // branched to NullSucc directly.
+ if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg()))
+ return true;
+
+ }
+
+ // The dependence does not clobber live-ins in NullSucc block.
+ return false;
+}
+
bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
- unsigned PointerReg,
ArrayRef<MachineInstr *> InstsSeenSoFar,
MachineBasicBlock *NullSucc,
MachineInstr *&Dependence) {
@@ -419,37 +536,8 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
if (DependenceMI->mayLoadOrStore())
return false;
- for (auto &DependenceMO : DependenceMI->operands()) {
- if (!(DependenceMO.isReg() && DependenceMO.getReg()))
- continue;
-
- // Make sure that we won't clobber any live ins to the sibling block by
- // hoisting Dependency. For instance, we can't hoist INST to before the
- // null check (even if it safe, and does not violate any dependencies in
- // the non_null_block) if %rdx is live in to _null_block.
- //
- // test %rcx, %rcx
- // je _null_block
- // _non_null_block:
- // %rdx = INST
- // ...
- //
- // This restriction does not apply to the faulting load inst because in
- // case the pointer loaded from is in the null page, the load will not
- // semantically execute, and affect machine state. That is, if the load
- // was loading into %rax and it faults, the value of %rax should stay the
- // same as it would have been had the load not have executed and we'd have
- // branched to NullSucc directly.
- if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg()))
- return false;
-
- // The Dependency can't be re-defining the base register -- then we won't
- // get the memory operation on the address we want. This is already
- // checked in \c IsSuitableMemoryOp.
- assert(!(DependenceMO.isDef() &&
- TRI->regsOverlap(DependenceMO.getReg(), PointerReg)) &&
- "Should have been checked before!");
- }
+ if (canDependenceHoistingClobberLiveIns(DependenceMI, NullSucc))
+ return false;
auto DepDepResult =
computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr});
@@ -486,9 +574,9 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
MBP.Predicate == MachineBranchPredicate::PRED_EQ)))
return false;
- // If we cannot erase the test instruction itself, then making the null check
- // implicit does not buy us much.
- if (!MBP.SingleUseCondition)
+ // If there is a separate condition generation instruction, we chose not to
+ // transform unless we can remove both condition and consuming branch.
+ if (MBP.ConditionDef && !MBP.SingleUseCondition)
return false;
MachineBasicBlock *NotNullSucc, *NullSucc;
@@ -506,32 +594,34 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
if (NotNullSucc->pred_size() != 1)
return false;
- // To prevent the invalid transformation of the following code:
- //
- // mov %rax, %rcx
- // test %rax, %rax
- // %rax = ...
- // je throw_npe
- // mov(%rcx), %r9
- // mov(%rax), %r10
- //
- // into:
- //
- // mov %rax, %rcx
- // %rax = ....
- // faulting_load_op("movl (%rax), %r10", throw_npe)
- // mov(%rcx), %r9
- //
- // we must ensure that there are no instructions between the 'test' and
- // conditional jump that modify %rax.
const Register PointerReg = MBP.LHS.getReg();
- assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block");
-
- for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I)
- if (I->modifiesRegister(PointerReg, TRI))
- return false;
+ if (MBP.ConditionDef) {
+ // To prevent the invalid transformation of the following code:
+ //
+ // mov %rax, %rcx
+ // test %rax, %rax
+ // %rax = ...
+ // je throw_npe
+ // mov(%rcx), %r9
+ // mov(%rax), %r10
+ //
+ // into:
+ //
+ // mov %rax, %rcx
+ // %rax = ....
+ // faulting_load_op("movl (%rax), %r10", throw_npe)
+ // mov(%rcx), %r9
+ //
+ // we must ensure that there are no instructions between the 'test' and
+ // conditional jump that modify %rax.
+ assert(MBP.ConditionDef->getParent() == &MBB &&
+ "Should be in basic block");
+ for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I)
+ if (I->modifiesRegister(PointerReg, TRI))
+ return false;
+ }
// Starting with a code fragment like:
//
// test %rax, %rax
@@ -597,17 +687,15 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
if (SR == SR_Impossible)
return false;
if (SR == SR_Suitable &&
- canHoistInst(&MI, PointerReg, InstsSeenSoFar, NullSucc, Dependence)) {
+ canHoistInst(&MI, InstsSeenSoFar, NullSucc, Dependence)) {
NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
NullSucc, Dependence);
return true;
}
- // If MI re-defines the PointerReg then we cannot move further.
- if (llvm::any_of(MI.operands(), [&](MachineOperand &MO) {
- return MO.isReg() && MO.getReg() && MO.isDef() &&
- TRI->regsOverlap(MO.getReg(), PointerReg);
- }))
+ // If MI re-defines the PointerReg in a way that changes the value of
+ // PointerReg if it was null, then we cannot move further.
+ if (!TII->preservesZeroValueInReg(&MI, PointerReg, TRI))
return false;
InstsSeenSoFar.push_back(&MI);
}
@@ -712,9 +800,11 @@ void ImplicitNullChecks::rewriteNullChecks(
}
NC.getMemOperation()->eraseFromParent();
- NC.getCheckOperation()->eraseFromParent();
+ if (auto *CheckOp = NC.getCheckOperation())
+ CheckOp->eraseFromParent();
- // Insert an *unconditional* branch to not-null successor.
+ // Insert an *unconditional* branch to not-null successor - we expect
+ // block placement to remove fallthroughs later.
TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
/*Cond=*/None, DL);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index 41eef2fed840..876e1d3f932a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -153,7 +153,7 @@ public:
unsigned Original);
bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot);
void hoistAllSpills();
- void LRE_DidCloneVirtReg(unsigned, unsigned) override;
+ void LRE_DidCloneVirtReg(Register, Register) override;
};
class InlineSpiller : public Spiller {
@@ -269,6 +269,14 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
return Register();
}
+static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) {
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg()))
+ LIS.getInterval(MO.getReg());
+ }
+}
+
/// isSnippet - Identify if a live interval is a snippet that should be spilled.
/// It is assumed that SnipLI is a virtual register with the same original as
/// Edit->getReg().
@@ -289,8 +297,9 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
// Check that all uses satisfy our criteria.
for (MachineRegisterInfo::reg_instr_nodbg_iterator
- RI = MRI.reg_instr_nodbg_begin(SnipLI.reg),
- E = MRI.reg_instr_nodbg_end(); RI != E; ) {
+ RI = MRI.reg_instr_nodbg_begin(SnipLI.reg()),
+ E = MRI.reg_instr_nodbg_end();
+ RI != E;) {
MachineInstr &MI = *RI++;
// Allow copies to/from Reg.
@@ -299,11 +308,11 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
// Allow stack slot loads.
int FI;
- if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+ if (SnipLI.reg() == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
continue;
// Allow stack slot stores.
- if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+ if (SnipLI.reg() == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
continue;
// Allow a single additional instruction.
@@ -409,14 +418,21 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
MII = DefMI;
++MII;
}
+ MachineInstrSpan MIS(MII, MBB);
// Insert spill without kill flag immediately after def.
TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
MRI.getRegClass(SrcReg), &TRI);
+ LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII);
+ for (const MachineInstr &MI : make_range(MIS.begin(), MII))
+ getVDefInterval(MI, LIS);
--MII; // Point to store instruction.
- LIS.InsertMachineInstrInMaps(*MII);
LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
- HSpiller.addToMergeableSpills(*MII, StackSlot, Original);
+ // If there is only 1 store instruction is required for spill, add it
+ // to mergeable list. In X86 AMX, 2 intructions are required to store.
+ // We disable the merge for this case.
+ if (MIS.begin() == MII)
+ HSpiller.addToMergeableSpills(*MII, StackSlot, Original);
++NumSpills;
return true;
}
@@ -432,7 +448,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
do {
LiveInterval *LI;
std::tie(LI, VNI) = WorkList.pop_back_val();
- Register Reg = LI->reg;
+ Register Reg = LI->reg();
LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@'
<< VNI->def << " in " << *LI << '\n');
@@ -511,7 +527,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
if (!SnippetCopies.count(MI))
continue;
LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
- assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
+ assert(isRegToSpill(SnipLI.reg()) && "Unexpected register in copy");
VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
assert(SnipVNI && "Snippet undefined before copy");
WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
@@ -556,7 +572,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg,
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Analyze instruction
SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
- VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg, &Ops);
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg(), &Ops);
if (!RI.Reads)
return false;
@@ -568,7 +584,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg())
MO.setIsUndef();
}
LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);
@@ -608,7 +624,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// If we can't guarantee that we'll be able to actually assign the new vreg,
// we can't remat.
- if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) {
+ if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg(), MI)) {
markValueUsed(&VirtReg, ParentVNI);
LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
return false;
@@ -633,7 +649,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Replace operands
for (const auto &OpPair : Ops) {
MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
- if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) {
MO.setReg(NewVReg);
MO.setIsKill();
}
@@ -810,6 +826,14 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
bool WasCopy = MI->isCopy();
Register ImpReg;
+ // TII::foldMemoryOperand will do what we need here for statepoint
+ // (fold load into use and remove corresponding def). We will replace
+ // uses of removed def with loads (spillAroundUses).
+ // For that to work we need to untie def and use to pass it through
+ // foldMemoryOperand and signal foldPatchpoint that it is allowed to
+ // fold them.
+ bool UntieRegs = MI->getOpcode() == TargetOpcode::STATEPOINT;
+
// Spill subregs if the target allows it.
// We always want to spill subregs for stackmap/patchpoint pseudos.
bool SpillSubRegs = TII.isSubregFoldable() ||
@@ -835,7 +859,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (LoadMI && MO.isDef())
return false;
// Tied use operands should not be passed to foldMemoryOperand.
- if (!MI->isRegTiedToDefOperand(Idx))
+ if (UntieRegs || !MI->isRegTiedToDefOperand(Idx))
FoldOps.push_back(Idx);
}
@@ -846,11 +870,31 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
MachineInstrSpan MIS(MI, MI->getParent());
+ SmallVector<std::pair<unsigned, unsigned> > TiedOps;
+ if (UntieRegs)
+ for (unsigned Idx : FoldOps) {
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (!MO.isTied())
+ continue;
+ unsigned Tied = MI->findTiedOperandIdx(Idx);
+ if (MO.isUse())
+ TiedOps.emplace_back(Tied, Idx);
+ else {
+ assert(MO.isDef() && "Tied to not use and def?");
+ TiedOps.emplace_back(Idx, Tied);
+ }
+ MI->untieRegOperand(Idx);
+ }
+
MachineInstr *FoldMI =
LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
: TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM);
- if (!FoldMI)
+ if (!FoldMI) {
+ // Re-tie operands.
+ for (auto Tied : TiedOps)
+ MI->tieOperands(Tied.first, Tied.second);
return false;
+ }
// Remove LIS for any dead defs in the original MI not in FoldMI.
for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) {
@@ -869,7 +913,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
// FoldMI does not define this physreg. Remove the LI segment.
assert(MO->isDead() && "Cannot fold physreg def");
SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
- LIS.removePhysRegDefAt(Reg, Idx);
+ LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);
}
int FI;
@@ -906,7 +950,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
++NumFolded;
else if (Ops.front().second == 0) {
++NumSpills;
- HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original);
+ // If there is only 1 store instruction is required for spill, add it
+ // to mergeable list. In X86 AMX, 2 intructions are required to store.
+ // We disable the merge for this case.
+ if (std::distance(MIS.begin(), MIS.end()) <= 1)
+ HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original);
} else
++NumReloads;
return true;
@@ -953,6 +1001,7 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill,
MachineInstrSpan MIS(MI, &MBB);
MachineBasicBlock::iterator SpillBefore = std::next(MI);
bool IsRealSpill = isRealSpill(*MI);
+
if (IsRealSpill)
TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot,
MRI.getRegClass(NewVReg), &TRI);
@@ -966,11 +1015,16 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill,
MachineBasicBlock::iterator Spill = std::next(MI);
LIS.InsertMachineInstrRangeInMaps(Spill, MIS.end());
+ for (const MachineInstr &MI : make_range(Spill, MIS.end()))
+ getVDefInterval(MI, LIS);
LLVM_DEBUG(
dumpMachineInstrRangeWithSlotIndex(Spill, MIS.end(), LIS, "spill"));
++NumSpills;
- if (IsRealSpill)
+ // If there is only 1 store instruction is required for spill, add it
+ // to mergeable list. In X86 AMX, 2 intructions are required to store.
+ // We disable the merge for this case.
+ if (IsRealSpill && std::distance(Spill, MIS.end()) <= 1)
HSpiller.addToMergeableSpills(*Spill, StackSlot, Original);
}
@@ -1160,7 +1214,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
// save a copy of LiveInterval in StackSlotToOrigLI because the original
// LiveInterval may be cleared after all its references are spilled.
if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) {
- auto LI = std::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight);
+ auto LI = std::make_unique<LiveInterval>(OrigLI.reg(), OrigLI.weight());
LI->assign(OrigLI, Allocator);
StackSlotToOrigLI[StackSlot] = std::move(LI);
}
@@ -1188,7 +1242,7 @@ bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill,
bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
MachineBasicBlock &BB, Register &LiveReg) {
SlotIndex Idx;
- Register OrigReg = OrigLI.reg;
+ Register OrigReg = OrigLI.reg();
MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB);
if (MI != BB.end())
Idx = LIS.getInstructionIndex(*MI);
@@ -1516,10 +1570,13 @@ void HoistSpillHelper::hoistAllSpills() {
for (auto const &Insert : SpillsToIns) {
MachineBasicBlock *BB = Insert.first;
Register LiveReg = Insert.second;
- MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB);
- TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot,
+ MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(OrigLI, *BB);
+ MachineInstrSpan MIS(MII, BB);
+ TII.storeRegToStackSlot(*BB, MII, LiveReg, false, Slot,
MRI.getRegClass(LiveReg), &TRI);
- LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI);
+ LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII);
+ for (const MachineInstr &MI : make_range(MIS.begin(), MII))
+ getVDefInterval(MI, LIS);
++NumSpills;
}
@@ -1539,11 +1596,13 @@ void HoistSpillHelper::hoistAllSpills() {
/// For VirtReg clone, the \p New register should have the same physreg or
/// stackslot as the \p old register.
-void HoistSpillHelper::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+void HoistSpillHelper::LRE_DidCloneVirtReg(Register New, Register Old) {
if (VRM.hasPhys(Old))
VRM.assignVirt2Phys(New, VRM.getPhys(Old));
else if (VRM.getStackSlot(Old) != VirtRegMap::NO_STACK_SLOT)
VRM.assignVirt2StackSlot(New, VRM.getStackSlot(Old));
else
llvm_unreachable("VReg should be assigned either physreg or stackslot");
+ if (VRM.hasShape(Old))
+ VRM.assignVirt2Shape(New, VRM.getShape(Old));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
index 7b50dac4cd1a..a56485cdbc67 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -12,19 +12,15 @@
#include "InterferenceCache.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
-#include <cstdlib>
#include <tuple>
using namespace llvm;
@@ -64,8 +60,8 @@ void InterferenceCache::init(MachineFunction *mf,
Entries[i].clear(mf, indexes, lis);
}
-InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
- unsigned E = PhysRegEntries[PhysReg];
+InterferenceCache::Entry *InterferenceCache::get(MCRegister PhysReg) {
+ unsigned char E = PhysRegEntries[PhysReg.id()];
if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
if (!Entries[E].valid(LIUArray, TRI))
Entries[E].revalidate(LIUArray, TRI);
@@ -101,7 +97,7 @@ void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
RegUnits[i].VirtTag = LIUArray[*Units].getTag();
}
-void InterferenceCache::Entry::reset(unsigned physReg,
+void InterferenceCache::Entry::reset(MCRegister physReg,
LiveIntervalUnion *LIUArray,
const TargetRegisterInfo *TRI,
const MachineFunction *MF) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
index 9019e9f61fa0..ace1691c1363 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
@@ -44,7 +44,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
/// of PhysReg in all basic blocks.
class Entry {
/// PhysReg - The register currently represented.
- unsigned PhysReg = 0;
+ MCRegister PhysReg = 0;
/// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
/// change.
@@ -102,13 +102,13 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
assert(!hasRefs() && "Cannot clear cache entry with references");
- PhysReg = 0;
+ PhysReg = MCRegister::NoRegister;
MF = mf;
Indexes = indexes;
LIS = lis;
}
- unsigned getPhysReg() const { return PhysReg; }
+ MCRegister getPhysReg() const { return PhysReg; }
void addRef(int Delta) { RefCount += Delta; }
@@ -120,10 +120,8 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
/// reset - Initialize entry to represent physReg's aliases.
- void reset(unsigned physReg,
- LiveIntervalUnion *LIUArray,
- const TargetRegisterInfo *TRI,
- const MachineFunction *MF);
+ void reset(MCRegister physReg, LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI, const MachineFunction *MF);
/// get - Return an up to date BlockInterference.
BlockInterference *get(unsigned MBBNum) {
@@ -154,7 +152,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
Entry Entries[CacheEntries];
// get - Get a valid entry for PhysReg.
- Entry *get(unsigned PhysReg);
+ Entry *get(MCRegister PhysReg);
public:
InterferenceCache() = default;
@@ -207,11 +205,11 @@ public:
~Cursor() { setEntry(nullptr); }
/// setPhysReg - Point this cursor to PhysReg's interference.
- void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+ void setPhysReg(InterferenceCache &Cache, MCRegister PhysReg) {
// Release reference before getting a new one. That guarantees we can
// actually have CacheEntries live cursors.
setEntry(nullptr);
- if (PhysReg)
+ if (PhysReg.isValid())
setEntry(Cache.get(PhysReg));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c4d83547a06c..b22e6faeb91c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -22,8 +22,8 @@
//
// E.g. An interleaved load (Factor = 2):
// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
-// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
-// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
+// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
+// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
//
// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
// intrinsic in ARM backend.
@@ -66,6 +66,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <utility>
@@ -118,6 +119,15 @@ private:
/// replacements are also performed.
bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
ArrayRef<ShuffleVectorInst *> Shuffles);
+
+ /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
+ /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
+ /// interleaving load. Any newly created shuffles that operate on \p LI will
+ /// be added to \p Shuffles. Returns true, if any changes to the IR have been
+ /// made.
+ bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
+ SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
+ LoadInst *LI);
};
} // end anonymous namespace.
@@ -283,67 +293,97 @@ bool InterleavedAccess::lowerInterleavedLoad(
if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
return false;
+ // Check if all users of this load are shufflevectors. If we encounter any
+ // users that are extractelement instructions or binary operators, we save
+ // them to later check if they can be modified to extract from one of the
+ // shufflevectors instead of the load.
+
SmallVector<ShuffleVectorInst *, 4> Shuffles;
SmallVector<ExtractElementInst *, 4> Extracts;
+ // BinOpShuffles need to be handled a single time in case both operands of the
+ // binop are the same load.
+ SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
- // Check if all users of this load are shufflevectors. If we encounter any
- // users that are extractelement instructions, we save them to later check if
- // they can be modifed to extract from one of the shufflevectors instead of
- // the load.
- for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
- auto *Extract = dyn_cast<ExtractElementInst>(*UI);
+ for (auto *User : LI->users()) {
+ auto *Extract = dyn_cast<ExtractElementInst>(User);
if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
Extracts.push_back(Extract);
continue;
}
- ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI);
+ auto *BI = dyn_cast<BinaryOperator>(User);
+ if (BI && BI->hasOneUse()) {
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(*BI->user_begin())) {
+ BinOpShuffles.insert(SVI);
+ continue;
+ }
+ }
+ auto *SVI = dyn_cast<ShuffleVectorInst>(User);
if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
return false;
Shuffles.push_back(SVI);
}
- if (Shuffles.empty())
+ if (Shuffles.empty() && BinOpShuffles.empty())
return false;
unsigned Factor, Index;
unsigned NumLoadElements =
cast<FixedVectorType>(LI->getType())->getNumElements();
+ auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
// Check if the first shufflevector is DE-interleave shuffle.
- if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
- MaxFactor, NumLoadElements))
+ if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
+ NumLoadElements))
return false;
// Holds the corresponding index for each DE-interleave shuffle.
SmallVector<unsigned, 4> Indices;
- Indices.push_back(Index);
- Type *VecTy = Shuffles[0]->getType();
+ Type *VecTy = FirstSVI->getType();
// Check if other shufflevectors are also DE-interleaved of the same type
// and factor as the first shufflevector.
- for (unsigned i = 1; i < Shuffles.size(); i++) {
- if (Shuffles[i]->getType() != VecTy)
+ for (auto *Shuffle : Shuffles) {
+ if (Shuffle->getType() != VecTy)
return false;
-
- if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor,
+ if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
Index))
return false;
+ assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
Indices.push_back(Index);
}
+ for (auto *Shuffle : BinOpShuffles) {
+ if (Shuffle->getType() != VecTy)
+ return false;
+ if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
+ Index))
+ return false;
+
+ assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
+
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
+ Indices.push_back(Index);
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
+ Indices.push_back(Index);
+ }
// Try and modify users of the load that are extractelement instructions to
// use the shufflevector instructions instead of the load.
if (!tryReplaceExtracts(Extracts, Shuffles))
return false;
+ bool BinOpShuffleChanged =
+ replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
+
LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
// Try to create target specific intrinsics to replace the load and shuffles.
- if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
- return false;
+ if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+ return !Extracts.empty() || BinOpShuffleChanged;
+ }
for (auto SVI : Shuffles)
DeadInsts.push_back(SVI);
@@ -352,6 +392,39 @@ bool InterleavedAccess::lowerInterleavedLoad(
return true;
}
+bool InterleavedAccess::replaceBinOpShuffles(
+ ArrayRef<ShuffleVectorInst *> BinOpShuffles,
+ SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
+ for (auto *SVI : BinOpShuffles) {
+ BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
+ Type *BIOp0Ty = BI->getOperand(0)->getType();
+ ArrayRef<int> Mask = SVI->getShuffleMask();
+ assert(all_of(Mask, [&](int Idx) {
+ return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
+ }));
+
+ auto *NewSVI1 =
+ new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
+ Mask, SVI->getName(), SVI);
+ auto *NewSVI2 = new ShuffleVectorInst(
+ BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
+ SVI->getName(), SVI);
+ Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2,
+ BI->getName(), SVI);
+ SVI->replaceAllUsesWith(NewBI);
+ LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
+ << "\n With : " << *NewSVI1 << "\n And : "
+ << *NewSVI2 << "\n And : " << *NewBI << "\n");
+ RecursivelyDeleteTriviallyDeadInstructions(SVI);
+ if (NewSVI1->getOperand(0) == LI)
+ Shuffles.push_back(NewSVI1);
+ if (NewSVI2->getOperand(0) == LI)
+ Shuffles.push_back(NewSVI2);
+ }
+
+ return !BinOpShuffles.empty();
+}
+
bool InterleavedAccess::tryReplaceExtracts(
ArrayRef<ExtractElementInst *> Extracts,
ArrayRef<ShuffleVectorInst *> Shuffles) {
@@ -421,7 +494,7 @@ bool InterleavedAccess::lowerInterleavedStore(
if (!SI->isSimple())
return false;
- ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+ auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
return false;
@@ -461,10 +534,10 @@ bool InterleavedAccess::runOnFunction(Function &F) {
bool Changed = false;
for (auto &I : instructions(F)) {
- if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+ if (auto *LI = dyn_cast<LoadInst>(&I))
Changed |= lowerInterleavedLoad(LI, DeadInsts);
- if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ if (auto *SI = dyn_cast<StoreInst>(&I))
Changed |= lowerInterleavedStore(SI, DeadInsts);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index f7131926ee65..ff3f93d51ea8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -1104,10 +1104,8 @@ InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) {
// All LIs are within the same BB. Select the first for a reference.
BasicBlock *BB = (*LIs.begin())->getParent();
- BasicBlock::iterator FLI =
- std::find_if(BB->begin(), BB->end(), [&LIs](Instruction &I) -> bool {
- return is_contained(LIs, &I);
- });
+ BasicBlock::iterator FLI = llvm::find_if(
+ *BB, [&LIs](Instruction &I) -> bool { return is_contained(LIs, &I); });
assert(FLI != BB->end());
return cast<LoadInst>(FLI);
@@ -1130,8 +1128,8 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
std::set<Instruction *> Is;
std::set<Instruction *> SVIs;
- unsigned InterleavedCost;
- unsigned InstructionCost = 0;
+ InstructionCost InterleavedCost;
+ InstructionCost InstructionCost = 0;
// Get the interleave factor
unsigned Factor = InterleavedLoad.size();
@@ -1174,6 +1172,10 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
}
}
+ // We need to have a valid cost in order to proceed.
+ if (!InstructionCost.isValid())
+ return false;
+
// We know that all LoadInst are within the same BB. This guarantees that
// either everything or nothing is loaded.
LoadInst *First = findFirstLoad(LIs);
@@ -1236,8 +1238,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
Mask.push_back(i + j * Factor);
Builder.SetInsertPoint(VI.SVI);
- auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()),
- Mask, "interleaved.shuffle");
+ auto SVI = Builder.CreateShuffleVector(LI, Mask, "interleaved.shuffle");
VI.SVI->replaceAllUsesWith(SVI);
i++;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
index e37c21e76597..55089d3b90d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -329,6 +329,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::var_annotation:
break; // Strip out these intrinsics
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index b485f2cf7261..f9b7bf613ff6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -40,13 +40,16 @@ static cl::opt<bool> EnableTrapUnreachable("trap-unreachable",
void LLVMTargetMachine::initAsmInfo() {
MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
+ assert(MRI && "Unable to create reg info");
MII.reset(TheTarget.createMCInstrInfo());
+ assert(MII && "Unable to create instruction info");
// FIXME: Having an MCSubtargetInfo on the target machine is a hack due
// to some backends having subtarget feature dependent module level
// code generation. This is similar to the hack in the AsmPrinter for
// module level assembly etc.
STI.reset(TheTarget.createMCSubtargetInfo(
getTargetTriple().str(), getTargetCPU(), getTargetFeatureString()));
+ assert(STI && "Unable to create subtarget info");
MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
*MRI, getTargetTriple().str(), Options.MCOptions);
@@ -58,6 +61,9 @@ void LLVMTargetMachine::initAsmInfo() {
"Make sure you include the correct TargetSelect.h"
"and that InitializeAllTargetMCs() is being invoked!");
+ if (Options.BinutilsVersion.first > 0)
+ TmpAsmInfo->setBinutilsVersion(Options.BinutilsVersion);
+
if (Options.DisableIntegratedAS)
TmpAsmInfo->setUseIntegratedAssembler(false);
@@ -118,6 +124,24 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
raw_pwrite_stream *DwoOut,
CodeGenFileType FileType,
MCContext &Context) {
+ Expected<std::unique_ptr<MCStreamer>> MCStreamerOrErr =
+ createMCStreamer(Out, DwoOut, FileType, Context);
+ if (auto Err = MCStreamerOrErr.takeError())
+ return true;
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer =
+ getTarget().createAsmPrinter(*this, std::move(*MCStreamerOrErr));
+ if (!Printer)
+ return true;
+
+ PM.add(Printer);
+ return false;
+}
+
+Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
+ raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
+ MCContext &Context) {
if (Options.MCOptions.MCSaveTempLabels)
Context.setAllowTemporaryLabels(false);
@@ -152,10 +176,14 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
+ if (!MCE)
+ return make_error<StringError>("createMCCodeEmitter failed",
+ inconvertibleErrorCode());
MCAsmBackend *MAB =
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
- if (!MCE || !MAB)
- return true;
+ if (!MAB)
+ return make_error<StringError>("createMCAsmBackend failed",
+ inconvertibleErrorCode());
Triple T(getTargetTriple().str());
AsmStreamer.reset(getTarget().createMCObjectStreamer(
@@ -174,14 +202,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
break;
}
- // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
- FunctionPass *Printer =
- getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
- if (!Printer)
- return true;
-
- PM.add(Printer);
- return false;
+ return std::move(AsmStreamer);
}
bool LLVMTargetMachine::addPassesToEmitFile(
@@ -196,20 +217,14 @@ bool LLVMTargetMachine::addPassesToEmitFile(
if (!PassConfig)
return true;
- if (!TargetPassConfig::willCompleteCodeGenPipeline()) {
- if (this->getTargetTriple().isOSAIX()) {
- // On AIX, we might manifest MCSymbols during SDAG lowering. For MIR
- // testing to be meaningful, we need to ensure that the symbols created
- // are MCSymbolXCOFF variants, which requires that
- // the TargetLoweringObjectFile instance has been initialized.
- MCContext &Ctx = MMIWP->getMMI().getContext();
- const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering())
- .Initialize(Ctx, *this);
- }
- PM.add(createPrintMIRPass(Out));
- } else if (addAsmPrinter(PM, Out, DwoOut, FileType,
- MMIWP->getMMI().getContext()))
- return true;
+ if (TargetPassConfig::willCompleteCodeGenPipeline()) {
+ if (addAsmPrinter(PM, Out, DwoOut, FileType, MMIWP->getMMI().getContext()))
+ return true;
+ } else {
+ // MIR printing is redundant with -filetype=null.
+ if (FileType != CGFT_Null)
+ PM.add(createPrintMIRPass(Out));
+ }
PM.add(createFreeMachineFunctionPass());
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
index 690b429832a5..8139c2cbb6cd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -324,7 +324,7 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
Set = std::make_unique<BlockSetT>();
getMachineBasicBlocks(DL, *Set);
}
- return Set->count(MBB) != 0;
+ return Set->contains(MBB);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
new file mode 100644
index 000000000000..18ffe8ba0669
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -0,0 +1,3363 @@
+//===- InstrRefBasedImpl.cpp - Tracking Debug Value MIs -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file InstrRefBasedImpl.cpp
+///
+/// This is a separate implementation of LiveDebugValues, see
+/// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information.
+///
+/// This pass propagates variable locations between basic blocks, resolving
+/// control flow conflicts between them. The problem is much like SSA
+/// construction, where each DBG_VALUE instruction assigns the *value* that
+/// a variable has, and every instruction where the variable is in scope uses
+/// that variable. The resulting map of instruction-to-value is then translated
+/// into a register (or spill) location for each variable over each instruction.
+///
+/// This pass determines which DBG_VALUE dominates which instructions, or if
+/// none do, where values must be merged (like PHI nodes). The added
+/// complication is that because codegen has already finished, a PHI node may
+/// be needed for a variable location to be correct, but no register or spill
+/// slot merges the necessary values. In these circumstances, the variable
+/// location is dropped.
+///
+/// What makes this analysis non-trivial is loops: we cannot tell in advance
+/// whether a variable location is live throughout a loop, or whether its
+/// location is clobbered (or redefined by another DBG_VALUE), without
+/// exploring all the way through.
+///
+/// To make this simpler we perform two kinds of analysis. First, we identify
+/// every value defined by every instruction (ignoring those that only move
+/// another value), then compute a map of which values are available for each
+/// instruction. This is stronger than a reaching-def analysis, as we create
+/// PHI values where other values merge.
+///
+/// Secondly, for each variable, we effectively re-construct SSA using each
+/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the
+/// first analysis from the location they refer to. We can then compute the
+/// dominance frontiers of where a variable has a value, and create PHI nodes
+/// where they merge.
+/// This isn't precisely SSA-construction though, because the function shape
+/// is pre-defined. If a variable location requires a PHI node, but no
+/// PHI for the relevant values is present in the function (as computed by the
+/// first analysis), the location must be dropped.
+///
+/// Once both are complete, we can pass back over all instructions knowing:
+/// * What _value_ each variable should contain, either defined by an
+/// instruction or where control flow merges
+/// * What the location of that value is (if any).
+/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when
+/// a value moves location. After this pass runs, all variable locations within
+/// a block should be specified by DBG_VALUEs within that block, allowing
+/// DbgEntityHistoryCalculator to focus on individual blocks.
+///
+/// This pass is able to go fast because the size of the first
+/// reaching-definition analysis is proportional to the working-set size of
+/// the function, which the compiler tries to keep small. (It's also
+/// proportional to the number of blocks). Additionally, we repeatedly perform
+/// the second reaching-definition analysis with only the variables and blocks
+/// in a single lexical scope, exploiting their locality.
+///
+/// Determining where PHIs happen is trickier with this approach, and it comes
+/// to a head in the major problem for LiveDebugValues: is a value live-through
+/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of
+/// facts about a function, however this analysis needs to generate new value
+/// numbers at joins.
+///
+/// To do this, consider a lattice of all definition values, from instructions
+/// and from PHIs. Each PHI is characterised by the RPO number of the block it
+/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B):
+/// with non-PHI values at the top, and any PHI value in the last block (by RPO
+/// order) at the bottom.
+///
+/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below,
+/// "rank" always refers to the former).
+///
+/// At any join, for each register, we consider:
+/// * All incoming values, and
+/// * The PREVIOUS live-in value at this join.
+/// If all incoming values agree: that's the live-in value. If they do not, the
+/// incoming values are ranked according to the partial order, and the NEXT
+/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of
+/// the same rank are ignored as conflicting). If there are no candidate values,
+/// or if the rank of the live-in would be lower than the rank of the current
+/// blocks PHIs, create a new PHI value.
+///
+/// Intuitively: if it's not immediately obvious what value a join should result
+/// in, we iteratively descend from instruction-definitions down through PHI
+/// values, getting closer to the current block each time. If the current block
+/// is a loop head, this ordering is effectively searching outer levels of
+/// loops, to find a value that's live-through the current loop.
+///
+/// If there is no value that's live-through this loop, a PHI is created for
+/// this location instead. We can't use a lower-ranked PHI because by definition
+/// it doesn't dominate the current block. We can't create a PHI value any
+/// earlier, because we risk creating a PHI value at a location where values do
+/// not in fact merge, thus misrepresenting the truth, and not making the true
+/// live-through value for variable locations.
+///
+/// This algorithm applies to both calculating the availability of values in
+/// the first analysis, and the location of variables in the second. However
+/// for the second we add an extra dimension of pain: creating a variable
+/// location PHI is only valid if, for each incoming edge,
+/// * There is a value for the variable on the incoming edge, and
+/// * All the edges have that value in the same register.
+/// Or put another way: we can only create a variable-location PHI if there is
+/// a matching machine-location PHI, each input to which is the variables value
+/// in the predecessor block.
+///
+/// To accommodate this difference, each point on the lattice is split in
+/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately
+/// have a location determined are "definite" PHIs, and no further work is
+/// needed. Otherwise, a location that all non-backedge predecessors agree
+/// on is picked and propagated as a "proposed" PHI value. If that PHI value
+/// is truly live-through, it'll appear on the loop backedges on the next
+/// dataflow iteration, after which the block live-in moves to be a "definite"
+/// PHI. If it's not truly live-through, the variable value will be downgraded
+/// further as we explore the lattice, or remains "proposed" and is considered
+/// invalid once dataflow completes.
+///
+/// ### Terminology
+///
+/// A machine location is a register or spill slot, a value is something that's
+/// defined by an instruction or PHI node, while a variable value is the value
+/// assigned to a variable. A variable location is a machine location, that must
+/// contain the appropriate variable value. A value that is a PHI node is
+/// occasionally called an mphi.
+///
+/// The first dataflow problem is the "machine value location" problem,
+/// because we're determining which machine locations contain which values.
+/// The "locations" are constant: what's unknown is what value they contain.
+///
+/// The second dataflow problem (the one for variables) is the "variable value
+/// problem", because it's determining what values a variable has, rather than
+/// what location those values are placed in. Unfortunately, it's not that
+/// simple, because producing a PHI value always involves picking a location.
+/// This is an imperfection that we just have to accept, at least for now.
+///
+/// TODO:
+/// Overlapping fragments
+/// Entry values
+/// Add back DEBUG statements for debugging this
+/// Collect statistics
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <queue>
+#include <tuple>
+#include <utility>
+#include <vector>
+#include <limits.h>
+#include <limits>
+
+#include "LiveDebugValues.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "livedebugvalues"
+
+STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
+STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed");
+
+// Act more like the VarLoc implementation, by propagating some locations too
+// far and ignoring some transfers.
+static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden,
+ cl::desc("Act like old LiveDebugValues did"),
+ cl::init(false));
+
+// Rely on isStoreToStackSlotPostFE and similar to observe all stack spills.
+static cl::opt<bool>
+ ObserveAllStackops("observe-all-stack-ops", cl::Hidden,
+ cl::desc("Allow non-kill spill and restores"),
+ cl::init(false));
+
+namespace {
+
+// The location at which a spilled value resides. It consists of a register and
+// an offset.
+struct SpillLoc {
+ unsigned SpillBase;
+ StackOffset SpillOffset;
+ bool operator==(const SpillLoc &Other) const {
+ return std::make_pair(SpillBase, SpillOffset) ==
+ std::make_pair(Other.SpillBase, Other.SpillOffset);
+ }
+ bool operator<(const SpillLoc &Other) const {
+ return std::make_tuple(SpillBase, SpillOffset.getFixed(),
+ SpillOffset.getScalable()) <
+ std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(),
+ Other.SpillOffset.getScalable());
+ }
+};
+
+class LocIdx {
+ unsigned Location;
+
+ // Default constructor is private, initializing to an illegal location number.
+ // Use only for "not an entry" elements in IndexedMaps.
+ LocIdx() : Location(UINT_MAX) { }
+
+public:
+ #define NUM_LOC_BITS 24
+ LocIdx(unsigned L) : Location(L) {
+ assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits");
+ }
+
+ static LocIdx MakeIllegalLoc() {
+ return LocIdx();
+ }
+
+ bool isIllegal() const {
+ return Location == UINT_MAX;
+ }
+
+ uint64_t asU64() const {
+ return Location;
+ }
+
+ bool operator==(unsigned L) const {
+ return Location == L;
+ }
+
+ bool operator==(const LocIdx &L) const {
+ return Location == L.Location;
+ }
+
+ bool operator!=(unsigned L) const {
+ return !(*this == L);
+ }
+
+ bool operator!=(const LocIdx &L) const {
+ return !(*this == L);
+ }
+
+ bool operator<(const LocIdx &Other) const {
+ return Location < Other.Location;
+ }
+};
+
+class LocIdxToIndexFunctor {
+public:
+ using argument_type = LocIdx;
+ unsigned operator()(const LocIdx &L) const {
+ return L.asU64();
+ }
+};
+
+/// Unique identifier for a value defined by an instruction, as a value type.
+/// Casts back and forth to a uint64_t. Probably replacable with something less
+/// bit-constrained. Each value identifies the instruction and machine location
+/// where the value is defined, although there may be no corresponding machine
+/// operand for it (ex: regmasks clobbering values). The instructions are
+/// one-based, and definitions that are PHIs have instruction number zero.
+///
+/// The obvious limits of a 1M block function or 1M instruction blocks are
+/// problematic; but by that point we should probably have bailed out of
+/// trying to analyse the function.
+class ValueIDNum {
+ uint64_t BlockNo : 20; /// The block where the def happens.
+ uint64_t InstNo : 20; /// The Instruction where the def happens.
+ /// One based, is distance from start of block.
+ uint64_t LocNo : NUM_LOC_BITS; /// The machine location where the def happens.
+
+public:
+ // XXX -- temporarily enabled while the live-in / live-out tables are moved
+ // to something more type-y
+ ValueIDNum() : BlockNo(0xFFFFF),
+ InstNo(0xFFFFF),
+ LocNo(0xFFFFFF) { }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc)
+ : BlockNo(Block), InstNo(Inst), LocNo(Loc) { }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc)
+ : BlockNo(Block), InstNo(Inst), LocNo(Loc.asU64()) { }
+
+ uint64_t getBlock() const { return BlockNo; }
+ uint64_t getInst() const { return InstNo; }
+ uint64_t getLoc() const { return LocNo; }
+ bool isPHI() const { return InstNo == 0; }
+
+ uint64_t asU64() const {
+ uint64_t TmpBlock = BlockNo;
+ uint64_t TmpInst = InstNo;
+ return TmpBlock << 44ull | TmpInst << NUM_LOC_BITS | LocNo;
+ }
+
+ static ValueIDNum fromU64(uint64_t v) {
+ uint64_t L = (v & 0x3FFF);
+ return {v >> 44ull, ((v >> NUM_LOC_BITS) & 0xFFFFF), L};
+ }
+
+ bool operator<(const ValueIDNum &Other) const {
+ return asU64() < Other.asU64();
+ }
+
+ bool operator==(const ValueIDNum &Other) const {
+ return std::tie(BlockNo, InstNo, LocNo) ==
+ std::tie(Other.BlockNo, Other.InstNo, Other.LocNo);
+ }
+
+ bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); }
+
+ std::string asString(const std::string &mlocname) const {
+ return Twine("Value{bb: ")
+ .concat(Twine(BlockNo).concat(
+ Twine(", inst: ")
+ .concat((InstNo ? Twine(InstNo) : Twine("live-in"))
+ .concat(Twine(", loc: ").concat(Twine(mlocname)))
+ .concat(Twine("}")))))
+ .str();
+ }
+
+ static ValueIDNum EmptyValue;
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
+/// the the value, and Boolean of whether or not it's indirect.
+class DbgValueProperties {
+public:
+ DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
+ : DIExpr(DIExpr), Indirect(Indirect) {}
+
+ /// Extract properties from an existing DBG_VALUE instruction.
+ DbgValueProperties(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ DIExpr = MI.getDebugExpression();
+ Indirect = MI.getOperand(1).isImm();
+ }
+
+ bool operator==(const DbgValueProperties &Other) const {
+ return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect);
+ }
+
+ bool operator!=(const DbgValueProperties &Other) const {
+ return !(*this == Other);
+ }
+
+ const DIExpression *DIExpr;
+ bool Indirect;
+};
+
+/// Tracker for what values are in machine locations. Listens to the Things
+/// being Done by various instructions, and maintains a table of what machine
+/// locations have what values (as defined by a ValueIDNum).
+///
+/// There are potentially a much larger number of machine locations on the
+/// target machine than the actual working-set size of the function. On x86 for
+/// example, we're extremely unlikely to want to track values through control
+/// or debug registers. To avoid doing so, MLocTracker has several layers of
+/// indirection going on, with two kinds of ``location'':
+/// * A LocID uniquely identifies a register or spill location, with a
+/// predictable value.
+/// * A LocIdx is a key (in the database sense) for a LocID and a ValueIDNum.
+/// Whenever a location is def'd or used by a MachineInstr, we automagically
+/// create a new LocIdx for a location, but not otherwise. This ensures we only
+/// account for locations that are actually used or defined. The cost is another
+/// vector lookup (of LocID -> LocIdx) over any other implementation. This is
+/// fairly cheap, and the compiler tries to reduce the working-set at any one
+/// time in the function anyway.
+///
+/// Register mask operands completely blow this out of the water; I've just
+/// piled hacks on top of hacks to get around that.
+class MLocTracker {
+public:
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const TargetLowering &TLI;
+
+ /// IndexedMap type, mapping from LocIdx to ValueIDNum.
+ using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>;
+
+ /// Map of LocIdxes to the ValueIDNums that they store. This is tightly
+ /// packed, entries only exist for locations that are being tracked.
+ LocToValueType LocIdxToIDNum;
+
+ /// "Map" of machine location IDs (i.e., raw register or spill number) to the
+ /// LocIdx key / number for that location. There are always at least as many
+ /// as the number of registers on the target -- if the value in the register
+ /// is not being tracked, then the LocIdx value will be zero. New entries are
+ /// appended if a new spill slot begins being tracked.
+ /// This, and the corresponding reverse map persist for the analysis of the
+ /// whole function, and is necessarying for decoding various vectors of
+ /// values.
+ std::vector<LocIdx> LocIDToLocIdx;
+
+ /// Inverse map of LocIDToLocIdx.
+ IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID;
+
+ /// Unique-ification of spill slots. Used to number them -- their LocID
+ /// number is the index in SpillLocs minus one plus NumRegs.
+ UniqueVector<SpillLoc> SpillLocs;
+
+ // If we discover a new machine location, assign it an mphi with this
+ // block number.
+ unsigned CurBB;
+
+ /// Cached local copy of the number of registers the target has.
+ unsigned NumRegs;
+
+ /// Collection of register mask operands that have been observed. Second part
+ /// of pair indicates the instruction that they happened in. Used to
+ /// reconstruct where defs happened if we start tracking a location later
+ /// on.
+ SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks;
+
+ /// Iterator for locations and the values they contain. Dereferencing
+ /// produces a struct/pair containing the LocIdx key for this location,
+ /// and a reference to the value currently stored. Simplifies the process
+ /// of seeking a particular location.
+ class MLocIterator {
+ LocToValueType &ValueMap;
+ LocIdx Idx;
+
+ public:
+ class value_type {
+ public:
+ value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) { }
+ const LocIdx Idx; /// Read-only index of this location.
+ ValueIDNum &Value; /// Reference to the stored value at this location.
+ };
+
+ MLocIterator(LocToValueType &ValueMap, LocIdx Idx)
+ : ValueMap(ValueMap), Idx(Idx) { }
+
+ bool operator==(const MLocIterator &Other) const {
+ assert(&ValueMap == &Other.ValueMap);
+ return Idx == Other.Idx;
+ }
+
+ bool operator!=(const MLocIterator &Other) const {
+ return !(*this == Other);
+ }
+
+ void operator++() {
+ Idx = LocIdx(Idx.asU64() + 1);
+ }
+
+ value_type operator*() {
+ return value_type(Idx, ValueMap[LocIdx(Idx)]);
+ }
+ };
+
+ MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI, const TargetLowering &TLI)
+ : MF(MF), TII(TII), TRI(TRI), TLI(TLI),
+ LocIdxToIDNum(ValueIDNum::EmptyValue),
+ LocIdxToLocID(0) {
+ NumRegs = TRI.getNumRegs();
+ reset();
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure
+
+ // Always track SP. This avoids the implicit clobbering caused by regmasks
+ // from affectings its values. (LiveDebugValues disbelieves calls and
+ // regmasks that claim to clobber SP).
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+ if (SP) {
+ unsigned ID = getLocID(SP, false);
+ (void)lookupOrTrackRegister(ID);
+ }
+ }
+
+ /// Produce location ID number for indexing LocIDToLocIdx. Takes the register
+ /// or spill number, and flag for whether it's a spill or not.
+ unsigned getLocID(Register RegOrSpill, bool isSpill) {
+ return (isSpill) ? RegOrSpill.id() + NumRegs - 1 : RegOrSpill.id();
+ }
+
+ /// Accessor for reading the value at Idx.
+ ValueIDNum getNumAtPos(LocIdx Idx) const {
+ assert(Idx.asU64() < LocIdxToIDNum.size());
+ return LocIdxToIDNum[Idx];
+ }
+
+ unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); }
+
+ /// Reset all locations to contain a PHI value at the designated block. Used
+ /// sometimes for actual PHI values, othertimes to indicate the block entry
+ /// value (before any more information is known).
+ void setMPhis(unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ for (auto Location : locations())
+ Location.Value = {CurBB, 0, Location.Idx};
+ }
+
+ /// Load values for each location from array of ValueIDNums. Take current
+ /// bbnum just in case we read a value from a hitherto untouched register.
+ void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ // Iterate over all tracked locations, and load each locations live-in
+ // value into our local index.
+ for (auto Location : locations())
+ Location.Value = Locs[Location.Idx.asU64()];
+ }
+
+ /// Wipe any un-necessary location records after traversing a block.
+ void reset(void) {
+ // We could reset all the location values too; however either loadFromArray
+ // or setMPhis should be called before this object is re-used. Just
+ // clear Masks, they're definitely not needed.
+ Masks.clear();
+ }
+
+ /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
+ /// the information in this pass uninterpretable.
+ void clear(void) {
+ reset();
+ LocIDToLocIdx.clear();
+ LocIdxToLocID.clear();
+ LocIdxToIDNum.clear();
+ //SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from 0
+ SpillLocs = decltype(SpillLocs)();
+
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ }
+
+ /// Set a locaiton to a certain value.
+ void setMLoc(LocIdx L, ValueIDNum Num) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ LocIdxToIDNum[L] = Num;
+ }
+
+ /// Create a LocIdx for an untracked register ID. Initialize it to either an
+ /// mphi value representing a live-in, or a recent register mask clobber.
+ LocIdx trackRegister(unsigned ID) {
+ assert(ID != 0);
+ LocIdx NewIdx = LocIdx(LocIdxToIDNum.size());
+ LocIdxToIDNum.grow(NewIdx);
+ LocIdxToLocID.grow(NewIdx);
+
+ // Default: it's an mphi.
+ ValueIDNum ValNum = {CurBB, 0, NewIdx};
+ // Was this reg ever touched by a regmask?
+ for (const auto &MaskPair : reverse(Masks)) {
+ if (MaskPair.first->clobbersPhysReg(ID)) {
+ // There was an earlier def we skipped.
+ ValNum = {CurBB, MaskPair.second, NewIdx};
+ break;
+ }
+ }
+
+ LocIdxToIDNum[NewIdx] = ValNum;
+ LocIdxToLocID[NewIdx] = ID;
+ return NewIdx;
+ }
+
+ LocIdx lookupOrTrackRegister(unsigned ID) {
+ LocIdx &Index = LocIDToLocIdx[ID];
+ if (Index.isIllegal())
+ Index = trackRegister(ID);
+ return Index;
+ }
+
+ /// Record a definition of the specified register at the given block / inst.
+ /// This doesn't take a ValueIDNum, because the definition and its location
+ /// are synonymous.
+ void defReg(Register R, unsigned BB, unsigned Inst) {
+ unsigned ID = getLocID(R, false);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ ValueIDNum ValueID = {BB, Inst, Idx};
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ /// Set a register to a value number. To be used if the value number is
+ /// known in advance.
+ void setReg(Register R, ValueIDNum ValueID) {
+ unsigned ID = getLocID(R, false);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ ValueIDNum readReg(Register R) {
+ unsigned ID = getLocID(R, false);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ return LocIdxToIDNum[Idx];
+ }
+
+ /// Reset a register value to zero / empty. Needed to replicate the
+ /// VarLoc implementation where a copy to/from a register effectively
+ /// clears the contents of the source register. (Values can only have one
+ /// machine location in VarLocBasedImpl).
+ void wipeRegister(Register R) {
+ unsigned ID = getLocID(R, false);
+ LocIdx Idx = LocIDToLocIdx[ID];
+ LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue;
+ }
+
+ /// Determine the LocIdx of an existing register.
+ LocIdx getRegMLoc(Register R) {
+ unsigned ID = getLocID(R, false);
+ return LocIDToLocIdx[ID];
+ }
+
+ /// Record a RegMask operand being executed. Defs any register we currently
+ /// track, stores a pointer to the mask in case we have to account for it
+ /// later.
+ void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID) {
+ // Ensure SP exists, so that we don't override it later.
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+
+ // Def any register we track have that isn't preserved. The regmask
+ // terminates the liveness of a register, meaning its value can't be
+ // relied upon -- we represent this by giving it a new value.
+ for (auto Location : locations()) {
+ unsigned ID = LocIdxToLocID[Location.Idx];
+ // Don't clobber SP, even if the mask says it's clobbered.
+ if (ID < NumRegs && ID != SP && MO->clobbersPhysReg(ID))
+ defReg(ID, CurBB, InstID);
+ }
+ Masks.push_back(std::make_pair(MO, InstID));
+ }
+
+ /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
+ LocIdx getOrTrackSpillLoc(SpillLoc L) {
+ unsigned SpillID = SpillLocs.idFor(L);
+ if (SpillID == 0) {
+ SpillID = SpillLocs.insert(L);
+ unsigned L = getLocID(SpillID, true);
+ LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx
+ LocIdxToIDNum.grow(Idx);
+ LocIdxToLocID.grow(Idx);
+ LocIDToLocIdx.push_back(Idx);
+ LocIdxToLocID[Idx] = L;
+ return Idx;
+ } else {
+ unsigned L = getLocID(SpillID, true);
+ LocIdx Idx = LocIDToLocIdx[L];
+ return Idx;
+ }
+ }
+
+ /// Set the value stored in a spill slot.
+ void setSpill(SpillLoc L, ValueIDNum ValueID) {
+ LocIdx Idx = getOrTrackSpillLoc(L);
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ /// Read whatever value is in a spill slot, or None if it isn't tracked.
+ Optional<ValueIDNum> readSpill(SpillLoc L) {
+ unsigned SpillID = SpillLocs.idFor(L);
+ if (SpillID == 0)
+ return None;
+
+ unsigned LocID = getLocID(SpillID, true);
+ LocIdx Idx = LocIDToLocIdx[LocID];
+ return LocIdxToIDNum[Idx];
+ }
+
+ /// Determine the LocIdx of a spill slot. Return None if it previously
+ /// hasn't had a value assigned.
+ Optional<LocIdx> getSpillMLoc(SpillLoc L) {
+ unsigned SpillID = SpillLocs.idFor(L);
+ if (SpillID == 0)
+ return None;
+ unsigned LocNo = getLocID(SpillID, true);
+ return LocIDToLocIdx[LocNo];
+ }
+
+ /// Return true if Idx is a spill machine location.
+ bool isSpill(LocIdx Idx) const {
+ return LocIdxToLocID[Idx] >= NumRegs;
+ }
+
+ MLocIterator begin() {
+ return MLocIterator(LocIdxToIDNum, 0);
+ }
+
+ MLocIterator end() {
+ return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size());
+ }
+
+ /// Return a range over all locations currently tracked.
+ iterator_range<MLocIterator> locations() {
+ return llvm::make_range(begin(), end());
+ }
+
+ std::string LocIdxToName(LocIdx Idx) const {
+ unsigned ID = LocIdxToLocID[Idx];
+ if (ID >= NumRegs)
+ return Twine("slot ").concat(Twine(ID - NumRegs)).str();
+ else
+ return TRI.getRegAsmName(ID).str();
+ }
+
+ std::string IDAsString(const ValueIDNum &Num) const {
+ std::string DefName = LocIdxToName(Num.getLoc());
+ return Num.asString(DefName);
+ }
+
+ LLVM_DUMP_METHOD
+ void dump() {
+ for (auto Location : locations()) {
+ std::string MLocName = LocIdxToName(Location.Value.getLoc());
+ std::string DefName = Location.Value.asString(MLocName);
+ dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";
+ }
+ }
+
+ LLVM_DUMP_METHOD
+ void dump_mloc_map() {
+ for (auto Location : locations()) {
+ std::string foo = LocIdxToName(Location.Idx);
+ dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n";
+ }
+ }
+
+ /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the
+ /// information in \pProperties, for variable Var. Don't insert it anywhere,
+ /// just return the builder for it.
+ MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
+ DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
+ Var.getVariable()->getScope(),
+ const_cast<DILocation *>(Var.getInlinedAt()));
+ auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE));
+
+ const DIExpression *Expr = Properties.DIExpr;
+ if (!MLoc) {
+ // No location -> DBG_VALUE $noreg
+ MIB.addReg(0, RegState::Debug);
+ MIB.addReg(0, RegState::Debug);
+ } else if (LocIdxToLocID[*MLoc] >= NumRegs) {
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ const SpillLoc &Spill = SpillLocs[LocID - NumRegs + 1];
+
+ auto *TRI = MF.getSubtarget().getRegisterInfo();
+ Expr = TRI->prependOffsetExpression(Expr, DIExpression::ApplyOffset,
+ Spill.SpillOffset);
+ unsigned Base = Spill.SpillBase;
+ MIB.addReg(Base, RegState::Debug);
+ MIB.addImm(0);
+ } else {
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ MIB.addReg(LocID, RegState::Debug);
+ if (Properties.Indirect)
+ MIB.addImm(0);
+ else
+ MIB.addReg(0, RegState::Debug);
+ }
+
+ MIB.addMetadata(Var.getVariable());
+ MIB.addMetadata(Expr);
+ return MIB;
+ }
+};
+
+/// Class recording the (high level) _value_ of a variable. Identifies either
+/// the value of the variable as a ValueIDNum, or a constant MachineOperand.
+/// This class also stores meta-information about how the value is qualified.
+/// Used to reason about variable values when performing the second
+/// (DebugVariable specific) dataflow analysis.
+class DbgValue {
+public:
+ union {
+ /// If Kind is Def, the value number that this value is based on.
+ ValueIDNum ID;
+ /// If Kind is Const, the MachineOperand defining this value.
+ MachineOperand MO;
+ /// For a NoVal DbgValue, which block it was generated in.
+ unsigned BlockNo;
+ };
+ /// Qualifiers for the ValueIDNum above.
+ DbgValueProperties Properties;
+
+ typedef enum {
+ Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
+ Def, // This value is defined by an inst, or is a PHI value.
+ Const, // A constant value contained in the MachineOperand field.
+ Proposed, // This is a tentative PHI value, which may be confirmed or
+ // invalidated later.
+ NoVal // Empty DbgValue, generated during dataflow. BlockNo stores
+ // which block this was generated in.
+ } KindT;
+ /// Discriminator for whether this is a constant or an in-program value.
+ KindT Kind;
+
+ DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind)
+ : ID(Val), Properties(Prop), Kind(Kind) {
+ assert(Kind == Def || Kind == Proposed);
+ }
+
+ DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
+ : BlockNo(BlockNo), Properties(Prop), Kind(Kind) {
+ assert(Kind == NoVal);
+ }
+
+ DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind)
+ : MO(MO), Properties(Prop), Kind(Kind) {
+ assert(Kind == Const);
+ }
+
+ DbgValue(const DbgValueProperties &Prop, KindT Kind)
+ : Properties(Prop), Kind(Kind) {
+ assert(Kind == Undef &&
+ "Empty DbgValue constructor must pass in Undef kind");
+ }
+
+ void dump(const MLocTracker *MTrack) const {
+ if (Kind == Const) {
+ MO.dump();
+ } else if (Kind == NoVal) {
+ dbgs() << "NoVal(" << BlockNo << ")";
+ } else if (Kind == Proposed) {
+ dbgs() << "VPHI(" << MTrack->IDAsString(ID) << ")";
+ } else {
+ assert(Kind == Def);
+ dbgs() << MTrack->IDAsString(ID);
+ }
+ if (Properties.Indirect)
+ dbgs() << " indir";
+ if (Properties.DIExpr)
+ dbgs() << " " << *Properties.DIExpr;
+ }
+
+ bool operator==(const DbgValue &Other) const {
+ if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
+ return false;
+ else if (Kind == Proposed && ID != Other.ID)
+ return false;
+ else if (Kind == Def && ID != Other.ID)
+ return false;
+ else if (Kind == NoVal && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == Const)
+ return MO.isIdenticalTo(Other.MO);
+
+ return true;
+ }
+
+ bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
+};
+
+/// Types for recording sets of variable fragments that overlap. For a given
+/// local variable, we record all other fragments of that variable that could
+/// overlap it, to reduce search time.
+using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+/// Collection of DBG_VALUEs observed when traversing a block. Records each
+/// variable and the value the DBG_VALUE refers to. Requires the machine value
+/// location dataflow algorithm to have run already, so that values can be
+/// identified.
+class VLocTracker {
+public:
+ /// Map DebugVariable to the latest Value it's defined to have.
+ /// Needs to be a MapVector because we determine order-in-the-input-MIR from
+ /// the order in this container.
+ /// We only retain the last DbgValue in each block for each variable, to
+ /// determine the blocks live-out variable value. The Vars container forms the
+ /// transfer function for this block, as part of the dataflow analysis. The
+ /// movement of values between locations inside of a block is handled at a
+ /// much later stage, in the TransferTracker class.
+ MapVector<DebugVariable, DbgValue> Vars;
+ DenseMap<DebugVariable, const DILocation *> Scopes;
+ MachineBasicBlock *MBB;
+
+public:
+ VLocTracker() {}
+
+ void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
+ Optional<ValueIDNum> ID) {
+ assert(MI.isDebugValue() || MI.isDebugRef());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def)
+ : DbgValue(Properties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+
+ void defVar(const MachineInstr &MI, const MachineOperand &MO) {
+ // Only DBG_VALUEs can define constant-valued variables.
+ assert(MI.isDebugValue());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValueProperties Properties(MI);
+ DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+};
+
+/// Tracker for converting machine value locations and variable values into
+/// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs
+/// specifying block live-in locations and transfers within blocks.
+///
+/// Operating on a per-block basis, this class takes a (pre-loaded) MLocTracker
+/// and must be initialized with the set of variable values that are live-in to
+/// the block. The caller then repeatedly calls process(). TransferTracker picks
+/// out variable locations for the live-in variable values (if there _is_ a
+/// location) and creates the corresponding DBG_VALUEs. Then, as the block is
+/// stepped through, transfers of values between machine locations are
+/// identified and if profitable, a DBG_VALUE created.
+///
+/// This is where debug use-before-defs would be resolved: a variable with an
+/// unavailable value could materialize in the middle of a block, when the
+/// value becomes available. Or, we could detect clobbers and re-specify the
+/// variable in a backup location. (XXX these are unimplemented).
+class TransferTracker {
+public:
+ const TargetInstrInfo *TII;
+ /// This machine location tracker is assumed to always contain the up-to-date
+ /// value mapping for all machine locations. TransferTracker only reads
+ /// information from it. (XXX make it const?)
+ MLocTracker *MTracker;
+ MachineFunction &MF;
+
+ /// Record of all changes in variable locations at a block position. Awkwardly
+ /// we allow inserting either before or after the point: MBB != nullptr
+ /// indicates it's before, otherwise after.
+ struct Transfer {
+ MachineBasicBlock::iterator Pos; /// Position to insert DBG_VALUes
+ MachineBasicBlock *MBB; /// non-null if we should insert after.
+ SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert.
+ };
+
+ typedef struct {
+ LocIdx Loc;
+ DbgValueProperties Properties;
+ } LocAndProperties;
+
+ /// Collection of transfers (DBG_VALUEs) to be inserted.
+ SmallVector<Transfer, 32> Transfers;
+
+ /// Local cache of what-value-is-in-what-LocIdx. Used to identify differences
+ /// between TransferTrackers view of variable locations and MLocTrackers. For
+ /// example, MLocTracker observes all clobbers, but TransferTracker lazily
+ /// does not.
+ std::vector<ValueIDNum> VarLocs;
+
+ /// Map from LocIdxes to which DebugVariables are based that location.
+ /// Mantained while stepping through the block. Not accurate if
+ /// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx].
+ std::map<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
+
+ /// Map from DebugVariable to it's current location and qualifying meta
+ /// information. To be used in conjunction with ActiveMLocs to construct
+ /// enough information for the DBG_VALUEs for a particular LocIdx.
+ DenseMap<DebugVariable, LocAndProperties> ActiveVLocs;
+
+ /// Temporary cache of DBG_VALUEs to be entered into the Transfers collection.
+ SmallVector<MachineInstr *, 4> PendingDbgValues;
+
+ /// Record of a use-before-def: created when a value that's live-in to the
+ /// current block isn't available in any machine location, but it will be
+ /// defined in this block.
+ struct UseBeforeDef {
+ /// Value of this variable, def'd in block.
+ ValueIDNum ID;
+ /// Identity of this variable.
+ DebugVariable Var;
+ /// Additional variable properties.
+ DbgValueProperties Properties;
+ };
+
+ /// Map from instruction index (within the block) to the set of UseBeforeDefs
+ /// that become defined at that instruction.
+ DenseMap<unsigned, SmallVector<UseBeforeDef, 1>> UseBeforeDefs;
+
+ /// The set of variables that are in UseBeforeDefs and can become a location
+ /// once the relevant value is defined. An element being erased from this
+ /// collection prevents the use-before-def materializing.
+ DenseSet<DebugVariable> UseBeforeDefVariables;
+
+ const TargetRegisterInfo &TRI;
+ const BitVector &CalleeSavedRegs;
+
+ TransferTracker(const TargetInstrInfo *TII, MLocTracker *MTracker,
+ MachineFunction &MF, const TargetRegisterInfo &TRI,
+ const BitVector &CalleeSavedRegs)
+ : TII(TII), MTracker(MTracker), MF(MF), TRI(TRI),
+ CalleeSavedRegs(CalleeSavedRegs) {}
+
+ /// Load object with live-in variable values. \p mlocs contains the live-in
+ /// values in each machine location, while \p vlocs the live-in variable
+ /// values. This method picks variable locations for the live-in variables,
+ /// creates DBG_VALUEs and puts them in #Transfers, then prepares the other
+ /// object fields to track variable locations as we step through the block.
+ /// FIXME: could just examine mloctracker instead of passing in \p mlocs?
+ void loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs,
+ SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs,
+ unsigned NumLocs) {
+ ActiveMLocs.clear();
+ ActiveVLocs.clear();
+ VarLocs.clear();
+ VarLocs.reserve(NumLocs);
+ UseBeforeDefs.clear();
+ UseBeforeDefVariables.clear();
+
+ auto isCalleeSaved = [&](LocIdx L) {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ if (Reg >= MTracker->NumRegs)
+ return false;
+ for (MCRegAliasIterator RAI(Reg, &TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ };
+
+ // Map of the preferred location for each value.
+ std::map<ValueIDNum, LocIdx> ValueToLoc;
+
+ // Produce a map of value numbers to the current machine locs they live
+ // in. When emulating VarLocBasedImpl, there should only be one
+ // location; when not, we get to pick.
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ ValueIDNum &VNum = MLocs[Idx.asU64()];
+ VarLocs.push_back(VNum);
+ auto it = ValueToLoc.find(VNum);
+ // In order of preference, pick:
+ // * Callee saved registers,
+ // * Other registers,
+ // * Spill slots.
+ if (it == ValueToLoc.end() || MTracker->isSpill(it->second) ||
+ (!isCalleeSaved(it->second) && isCalleeSaved(Idx.asU64()))) {
+ // Insert, or overwrite if insertion failed.
+ auto PrefLocRes = ValueToLoc.insert(std::make_pair(VNum, Idx));
+ if (!PrefLocRes.second)
+ PrefLocRes.first->second = Idx;
+ }
+ }
+
+ // Now map variables to their picked LocIdxes.
+ for (auto Var : VLocs) {
+ if (Var.second.Kind == DbgValue::Const) {
+ PendingDbgValues.push_back(
+ emitMOLoc(Var.second.MO, Var.first, Var.second.Properties));
+ continue;
+ }
+
+ // If the value has no location, we can't make a variable location.
+ const ValueIDNum &Num = Var.second.ID;
+ auto ValuesPreferredLoc = ValueToLoc.find(Num);
+ if (ValuesPreferredLoc == ValueToLoc.end()) {
+ // If it's a def that occurs in this block, register it as a
+ // use-before-def to be resolved as we step through the block.
+ if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI())
+ addUseBeforeDef(Var.first, Var.second.Properties, Num);
+ continue;
+ }
+
+ LocIdx M = ValuesPreferredLoc->second;
+ auto NewValue = LocAndProperties{M, Var.second.Properties};
+ auto Result = ActiveVLocs.insert(std::make_pair(Var.first, NewValue));
+ if (!Result.second)
+ Result.first->second = NewValue;
+ ActiveMLocs[M].insert(Var.first);
+ PendingDbgValues.push_back(
+ MTracker->emitLoc(M, Var.first, Var.second.Properties));
+ }
+ flushDbgValues(MBB.begin(), &MBB);
+ }
+
+ /// Record that \p Var has value \p ID, a value that becomes available
+ /// later in the function.
+ void addUseBeforeDef(const DebugVariable &Var,
+ const DbgValueProperties &Properties, ValueIDNum ID) {
+ UseBeforeDef UBD = {ID, Var, Properties};
+ UseBeforeDefs[ID.getInst()].push_back(UBD);
+ UseBeforeDefVariables.insert(Var);
+ }
+
+ /// After the instruction at index \p Inst and position \p pos has been
+ /// processed, check whether it defines a variable value in a use-before-def.
+ /// If so, and the variable value hasn't changed since the start of the
+ /// block, create a DBG_VALUE.
+ void checkInstForNewValues(unsigned Inst, MachineBasicBlock::iterator pos) {
+ auto MIt = UseBeforeDefs.find(Inst);
+ if (MIt == UseBeforeDefs.end())
+ return;
+
+ for (auto &Use : MIt->second) {
+ LocIdx L = Use.ID.getLoc();
+
+ // If something goes very wrong, we might end up labelling a COPY
+ // instruction or similar with an instruction number, where it doesn't
+ // actually define a new value, instead it moves a value. In case this
+ // happens, discard.
+ if (MTracker->LocIdxToIDNum[L] != Use.ID)
+ continue;
+
+ // If a different debug instruction defined the variable value / location
+ // since the start of the block, don't materialize this use-before-def.
+ if (!UseBeforeDefVariables.count(Use.Var))
+ continue;
+
+ PendingDbgValues.push_back(MTracker->emitLoc(L, Use.Var, Use.Properties));
+ }
+ flushDbgValues(pos, nullptr);
+ }
+
+ /// Helper to move created DBG_VALUEs into Transfers collection.
+ void flushDbgValues(MachineBasicBlock::iterator Pos, MachineBasicBlock *MBB) {
+ if (PendingDbgValues.size() > 0) {
+ Transfers.push_back({Pos, MBB, PendingDbgValues});
+ PendingDbgValues.clear();
+ }
+ }
+
+ /// Change a variable value after encountering a DBG_VALUE inside a block.
+ void redefVar(const MachineInstr &MI) {
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValueProperties Properties(MI);
+
+ const MachineOperand &MO = MI.getOperand(0);
+
+ // Ignore non-register locations, we don't transfer those.
+ if (!MO.isReg() || MO.getReg() == 0) {
+ auto It = ActiveVLocs.find(Var);
+ if (It != ActiveVLocs.end()) {
+ ActiveMLocs[It->second.Loc].erase(Var);
+ ActiveVLocs.erase(It);
+ }
+ // Any use-before-defs no longer apply.
+ UseBeforeDefVariables.erase(Var);
+ return;
+ }
+
+ Register Reg = MO.getReg();
+ LocIdx NewLoc = MTracker->getRegMLoc(Reg);
+ redefVar(MI, Properties, NewLoc);
+ }
+
+ /// Handle a change in variable location within a block. Terminate the
+ /// variables current location, and record the value it now refers to, so
+ /// that we can detect location transfers later on.
+ void redefVar(const MachineInstr &MI, const DbgValueProperties &Properties,
+ Optional<LocIdx> OptNewLoc) {
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ // Any use-before-defs no longer apply.
+ UseBeforeDefVariables.erase(Var);
+
+ // Erase any previous location,
+ auto It = ActiveVLocs.find(Var);
+ if (It != ActiveVLocs.end())
+ ActiveMLocs[It->second.Loc].erase(Var);
+
+ // If there _is_ no new location, all we had to do was erase.
+ if (!OptNewLoc)
+ return;
+ LocIdx NewLoc = *OptNewLoc;
+
+ // Check whether our local copy of values-by-location in #VarLocs is out of
+ // date. Wipe old tracking data for the location if it's been clobbered in
+ // the meantime.
+ if (MTracker->getNumAtPos(NewLoc) != VarLocs[NewLoc.asU64()]) {
+ for (auto &P : ActiveMLocs[NewLoc]) {
+ ActiveVLocs.erase(P);
+ }
+ ActiveMLocs[NewLoc.asU64()].clear();
+ VarLocs[NewLoc.asU64()] = MTracker->getNumAtPos(NewLoc);
+ }
+
+ ActiveMLocs[NewLoc].insert(Var);
+ if (It == ActiveVLocs.end()) {
+ ActiveVLocs.insert(
+ std::make_pair(Var, LocAndProperties{NewLoc, Properties}));
+ } else {
+ It->second.Loc = NewLoc;
+ It->second.Properties = Properties;
+ }
+ }
+
+ /// Explicitly terminate variable locations based on \p mloc. Creates undef
+ /// DBG_VALUEs for any variables that were located there, and clears
+ /// #ActiveMLoc / #ActiveVLoc tracking information for that location.
+ void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos) {
+ assert(MTracker->isSpill(MLoc));
+ auto ActiveMLocIt = ActiveMLocs.find(MLoc);
+ if (ActiveMLocIt == ActiveMLocs.end())
+ return;
+
+ VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue;
+
+ for (auto &Var : ActiveMLocIt->second) {
+ auto ActiveVLocIt = ActiveVLocs.find(Var);
+ // Create an undef. We can't feed in a nullptr DIExpression alas,
+ // so use the variables last expression. Pass None as the location.
+ const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr;
+ DbgValueProperties Properties(Expr, false);
+ PendingDbgValues.push_back(MTracker->emitLoc(None, Var, Properties));
+ ActiveVLocs.erase(ActiveVLocIt);
+ }
+ flushDbgValues(Pos, nullptr);
+
+ ActiveMLocIt->second.clear();
+ }
+
+ /// Transfer variables based on \p Src to be based on \p Dst. This handles
+ /// both register copies as well as spills and restores. Creates DBG_VALUEs
+ /// describing the movement.
+ void transferMlocs(LocIdx Src, LocIdx Dst, MachineBasicBlock::iterator Pos) {
+ // Does Src still contain the value num we expect? If not, it's been
+ // clobbered in the meantime, and our variable locations are stale.
+ if (VarLocs[Src.asU64()] != MTracker->getNumAtPos(Src))
+ return;
+
+ // assert(ActiveMLocs[Dst].size() == 0);
+ //^^^ Legitimate scenario on account of un-clobbered slot being assigned to?
+ ActiveMLocs[Dst] = ActiveMLocs[Src];
+ VarLocs[Dst.asU64()] = VarLocs[Src.asU64()];
+
+ // For each variable based on Src; create a location at Dst.
+ for (auto &Var : ActiveMLocs[Src]) {
+ auto ActiveVLocIt = ActiveVLocs.find(Var);
+ assert(ActiveVLocIt != ActiveVLocs.end());
+ ActiveVLocIt->second.Loc = Dst;
+
+ assert(Dst != 0);
+ MachineInstr *MI =
+ MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties);
+ PendingDbgValues.push_back(MI);
+ }
+ ActiveMLocs[Src].clear();
+ flushDbgValues(Pos, nullptr);
+
+ // XXX XXX XXX "pretend to be old LDV" means dropping all tracking data
+ // about the old location.
+ if (EmulateOldLDV)
+ VarLocs[Src.asU64()] = ValueIDNum::EmptyValue;
+ }
+
+ MachineInstrBuilder emitMOLoc(const MachineOperand &MO,
+ const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
+ DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
+ Var.getVariable()->getScope(),
+ const_cast<DILocation *>(Var.getInlinedAt()));
+ auto MIB = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE));
+ MIB.add(MO);
+ if (Properties.Indirect)
+ MIB.addImm(0);
+ else
+ MIB.addReg(0);
+ MIB.addMetadata(Var.getVariable());
+ MIB.addMetadata(Properties.DIExpr);
+ return MIB;
+ }
+};
+
+class InstrRefBasedLDV : public LDVImpl {
+private:
+ using FragmentInfo = DIExpression::FragmentInfo;
+ using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+
+ // Helper while building OverlapMap, a map of all fragments seen for a given
+ // DILocalVariable.
+ using VarToFragments =
+ DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
+ /// Machine location/value transfer function, a mapping of which locations
+ /// are assigned which new values.
+ using MLocTransferMap = std::map<LocIdx, ValueIDNum>;
+
+ /// Live in/out structure for the variable values: a per-block map of
+ /// variables to their values. XXX, better name?
+ using LiveIdxT =
+ DenseMap<const MachineBasicBlock *, DenseMap<DebugVariable, DbgValue> *>;
+
+ using VarAndLoc = std::pair<DebugVariable, DbgValue>;
+
+ /// Type for a live-in value: the predecessor block, and its value.
+ using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
+
+ /// Vector (per block) of a collection (inner smallvector) of live-ins.
+ /// Used as the result type for the variable value dataflow problem.
+ using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
+
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
+ BitVector CalleeSavedRegs;
+ LexicalScopes LS;
+ TargetPassConfig *TPC;
+
+ /// Object to track machine locations as we step through a block. Could
+ /// probably be a field rather than a pointer, as it's always used.
+ MLocTracker *MTracker;
+
+ /// Number of the current block LiveDebugValues is stepping through.
+ unsigned CurBB;
+
+ /// Number of the current instruction LiveDebugValues is evaluating.
+ unsigned CurInst;
+
+ /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl
+ /// steps through a block. Reads the values at each location from the
+ /// MLocTracker object.
+ VLocTracker *VTracker;
+
+ /// Tracker for transfers, listens to DBG_VALUEs and transfers of values
+ /// between locations during stepping, creates new DBG_VALUEs when values move
+ /// location.
+ TransferTracker *TTracker;
+
+ /// Blocks which are artificial, i.e. blocks which exclusively contain
+ /// instructions without DebugLocs, or with line 0 locations.
+ SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+
+ // Mapping of blocks to and from their RPOT order.
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
+ DenseMap<unsigned, unsigned> BBNumToRPO;
+
+ /// Pair of MachineInstr, and its 1-based offset into the containing block.
+ using InstAndNum = std::pair<const MachineInstr *, unsigned>;
+ /// Map from debug instruction number to the MachineInstr labelled with that
+ /// number, and its location within the function. Used to transform
+ /// instruction numbers in DBG_INSTR_REFs into machine value numbers.
+ std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+
+ // Map of overlapping variable fragments.
+ OverlapMap OverlapFragments;
+ VarToFragments SeenFragments;
+
+ /// Tests whether this instruction is a spill to a stack slot.
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
+
+ /// Decide if @MI is a spill instruction and return true if it is. We use 2
+ /// criteria to make this decision:
+ /// - Is this instruction a store to a spill slot?
+ /// - Is there a register operand that is both used and killed?
+ /// TODO: Store optimization can fold spills into other stores (including
+ /// other spills). We do not handle this yet (more than one memory operand).
+ bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+
+ /// If a given instruction is identified as a spill, return the spill slot
+ /// and set \p Reg to the spilled register.
+ Optional<SpillLoc> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg);
+
+ /// Given a spill instruction, extract the register and offset used to
+ /// address the spill slot in a target independent way.
+ SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
+
+ /// Observe a single instruction while stepping through a block.
+ void process(MachineInstr &MI);
+
+ /// Examines whether \p MI is a DBG_VALUE and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugValue(const MachineInstr &MI);
+
+ /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugInstrRef(MachineInstr &MI);
+
+ /// Examines whether \p MI is copy instruction, and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferRegisterCopy(MachineInstr &MI);
+
+ /// Examines whether \p MI is stack spill or restore instruction, and
+ /// notifies trackers. \returns true if MI was recognized and processed.
+ bool transferSpillOrRestoreInst(MachineInstr &MI);
+
+ /// Examines \p MI for any registers that it defines, and notifies trackers.
+ void transferRegisterDef(MachineInstr &MI);
+
+ /// Copy one location to the other, accounting for movement of subregisters
+ /// too.
+ void performCopy(Register Src, Register Dst);
+
+ void accumulateFragmentMap(MachineInstr &MI);
+
+ /// Step through the function, recording register definitions and movements
+ /// in an MLocTracker. Convert the observations into a per-block transfer
+ /// function in \p MLocTransfer, suitable for using with the machine value
+ /// location dataflow problem.
+ void
+ produceMLocTransferFunction(MachineFunction &MF,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer,
+ unsigned MaxNumBlocks);
+
+ /// Solve the machine value location dataflow problem. Takes as input the
+ /// transfer functions in \p MLocTransfer. Writes the output live-in and
+ /// live-out arrays to the (initialized to zero) multidimensional arrays in
+ /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
+ /// number, the inner by LocIdx.
+ void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Perform a control flow join (lattice value meet) of the values in machine
+ /// locations at \p MBB. Follows the algorithm described in the file-comment,
+ /// reading live-outs of predecessors from \p OutLocs, the current live ins
+ /// from \p InLocs, and assigning the newly computed live ins back into
+ /// \p InLocs. \returns two bools -- the first indicates whether a change
+ /// was made, the second whether a lattice downgrade occurred. If the latter
+ /// is true, revisiting this block is necessary.
+ std::tuple<bool, bool>
+ mlocJoin(MachineBasicBlock &MBB,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs);
+
+ /// Solve the variable value dataflow problem, for a single lexical scope.
+ /// Uses the algorithm from the file comment to resolve control flow joins,
+ /// although there are extra hacks, see vlocJoin. Reads the
+ /// locations of values from the \p MInLocs and \p MOutLocs arrays (see
+ /// mlocDataflow) and reads the variable values transfer function from
+ /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally,
+ /// with the live-ins permanently stored to \p Output once the fixedpoint is
+ /// reached.
+ /// \p VarsWeCareAbout contains a collection of the variables in \p Scope
+ /// that we should be tracking.
+ /// \p AssignBlocks contains the set of blocks that aren't in \p Scope, but
+ /// which do contain DBG_VALUEs, which VarLocBasedImpl tracks locations
+ /// through.
+ void vlocDataflow(const LexicalScope *Scope, const DILocation *DILoc,
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
+ LiveInsT &Output, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs);
+
+ /// Compute the live-ins to a block, considering control flow merges according
+ /// to the method in the file comment. Live out and live in variable values
+ /// are stored in \p VLOCOutLocs and \p VLOCInLocs. The live-ins for \p MBB
+ /// are computed and stored into \p VLOCInLocs. \returns true if the live-ins
+ /// are modified.
+ /// \p InLocsT Output argument, storage for calculated live-ins.
+ /// \returns two bools -- the first indicates whether a change
+ /// was made, the second whether a lattice downgrade occurred. If the latter
+ /// is true, revisiting this block is necessary.
+ std::tuple<bool, bool>
+ vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
+ SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited,
+ unsigned BBNum, const SmallSet<DebugVariable, 4> &AllVars,
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
+ SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
+ DenseMap<DebugVariable, DbgValue> &InLocsT);
+
+ /// Continue exploration of the variable-value lattice, as explained in the
+ /// file-level comment. \p OldLiveInLocation contains the current
+ /// exploration position, from which we need to descend further. \p Values
+ /// contains the set of live-in values, \p CurBlockRPONum the RPO number of
+ /// the current block, and \p CandidateLocations a set of locations that
+ /// should be considered as PHI locations, if we reach the bottom of the
+ /// lattice. \returns true if we should downgrade; the value is the agreeing
+ /// value number in a non-backedge predecessor.
+ bool vlocDowngradeLattice(const MachineBasicBlock &MBB,
+ const DbgValue &OldLiveInLocation,
+ const SmallVectorImpl<InValueT> &Values,
+ unsigned CurBlockRPONum);
+
+ /// For the given block and live-outs feeding into it, try to find a
+ /// machine location where they all join. If a solution for all predecessors
+ /// can't be found, a location where all non-backedge-predecessors join
+ /// will be returned instead. While this method finds a join location, this
+ /// says nothing as to whether it should be used.
+ /// \returns Pair of value ID if found, and true when the correct value
+ /// is available on all predecessor edges, or false if it's only available
+ /// for non-backedge predecessors.
+ std::tuple<Optional<ValueIDNum>, bool>
+ pickVPHILoc(MachineBasicBlock &MBB, const DebugVariable &Var,
+ const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs,
+ const SmallVectorImpl<MachineBasicBlock *> &BlockOrders);
+
+ /// Given the solutions to the two dataflow problems, machine value locations
+ /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the
+ /// TransferTracker class over the function to produce live-in and transfer
+ /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the
+ /// order given by AllVarsNumbering -- this could be any stable order, but
+ /// right now "order of appearence in function, when explored in RPO", so
+ /// that we can compare explictly against VarLocBasedImpl.
+ void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
+ ValueIDNum **MInLocs,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering);
+
+ /// Boilerplate computation of some initial sets, artifical blocks and
+ /// RPOT block ordering.
+ void initialSetup(MachineFunction &MF);
+
+ bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
+
+public:
+ /// Default construct and initialize the pass.
+ InstrRefBasedLDV();
+
+ LLVM_DUMP_METHOD
+ void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
+
+ bool isCalleeSaved(LocIdx L) {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ }
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
+
+/// Default construct and initialize the pass.
+InstrRefBasedLDV::InstrRefBasedLDV() {}
+
+//===----------------------------------------------------------------------===//
+// Debug Range Extension Implementation
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+// Something to restore in the future.
+// void InstrRefBasedLDV::printVarLocInMBB(..)
+#endif
+
+SpillLoc
+InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
+ assert(MI.hasOneMemOperand() &&
+ "Spill instruction does not have exactly one memory operand?");
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ assert(PVal->kind() == PseudoSourceValue::FixedStack &&
+ "Inconsistent memory operand in spill instruction");
+ int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
+ const MachineBasicBlock *MBB = MI.getParent();
+ Register Reg;
+ StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ return {Reg, Offset};
+}
+
+/// End all previous ranges related to @MI and start a new range from @MI
+/// if it is a DBG_VALUE instr.
+bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
+ if (!MI.isDebugValue())
+ return false;
+
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ "Expected inlined-at fields to agree");
+
+ DebugVariable V(Var, Expr, InlinedAt);
+ DbgValueProperties Properties(MI);
+
+ // If there are no instructions in this lexical scope, do no location tracking
+ // at all, this variable shouldn't get a legitimate location range.
+ auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get());
+ if (Scope == nullptr)
+ return true; // handled it; by doing nothing
+
+ const MachineOperand &MO = MI.getOperand(0);
+
+ // MLocTracker needs to know that this register is read, even if it's only
+ // read by a debug inst.
+ if (MO.isReg() && MO.getReg() != 0)
+ (void)MTracker->readReg(MO.getReg());
+
+ // If we're preparing for the second analysis (variables), the machine value
+ // locations are already solved, and we report this DBG_VALUE and the value
+ // it refers to to VLocTracker.
+ if (VTracker) {
+ if (MO.isReg()) {
+ // Feed defVar the new variable location, or if this is a
+ // DBG_VALUE $noreg, feed defVar None.
+ if (MO.getReg())
+ VTracker->defVar(MI, Properties, MTracker->readReg(MO.getReg()));
+ else
+ VTracker->defVar(MI, Properties, None);
+ } else if (MI.getOperand(0).isImm() || MI.getOperand(0).isFPImm() ||
+ MI.getOperand(0).isCImm()) {
+ VTracker->defVar(MI, MI.getOperand(0));
+ }
+ }
+
+ // If performing final tracking of transfers, report this variable definition
+ // to the TransferTracker too.
+ if (TTracker)
+ TTracker->redefVar(MI);
+ return true;
+}
+
+bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) {
+ if (!MI.isDebugRef())
+ return false;
+
+ // Only handle this instruction when we are building the variable value
+ // transfer function.
+ if (!VTracker)
+ return false;
+
+ unsigned InstNo = MI.getOperand(0).getImm();
+ unsigned OpNo = MI.getOperand(1).getImm();
+
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ "Expected inlined-at fields to agree");
+
+ DebugVariable V(Var, Expr, InlinedAt);
+
+ auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get());
+ if (Scope == nullptr)
+ return true; // Handled by doing nothing. This variable is never in scope.
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+
+ // Various optimizations may have happened to the value during codegen,
+ // recorded in the value substitution table. Apply any substitutions to
+ // the instruction / operand number in this DBG_INSTR_REF.
+ auto Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo));
+ while (Sub != MF.DebugValueSubstitutions.end()) {
+ InstNo = Sub->second.first;
+ OpNo = Sub->second.second;
+ Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo));
+ }
+
+ // Default machine value number is <None> -- if no instruction defines
+ // the corresponding value, it must have been optimized out.
+ Optional<ValueIDNum> NewID = None;
+
+ // Try to lookup the instruction number, and find the machine value number
+ // that it defines.
+ auto InstrIt = DebugInstrNumToInstr.find(InstNo);
+ if (InstrIt != DebugInstrNumToInstr.end()) {
+ const MachineInstr &TargetInstr = *InstrIt->second.first;
+ uint64_t BlockNo = TargetInstr.getParent()->getNumber();
+
+ // Pick out the designated operand.
+ assert(OpNo < TargetInstr.getNumOperands());
+ const MachineOperand &MO = TargetInstr.getOperand(OpNo);
+
+ // Today, this can only be a register.
+ assert(MO.isReg() && MO.isDef());
+
+ unsigned LocID = MTracker->getLocID(MO.getReg(), false);
+ LocIdx L = MTracker->LocIDToLocIdx[LocID];
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ }
+
+ // We, we have a value number or None. Tell the variable value tracker about
+ // it. The rest of this LiveDebugValues implementation acts exactly the same
+ // for DBG_INSTR_REFs as DBG_VALUEs (just, the former can refer to values that
+ // aren't immediately available).
+ DbgValueProperties Properties(Expr, false);
+ VTracker->defVar(MI, Properties, NewID);
+
+ // If we're on the final pass through the function, decompose this INSTR_REF
+ // into a plain DBG_VALUE.
+ if (!TTracker)
+ return true;
+
+ // Pick a location for the machine value number, if such a location exists.
+ // (This information could be stored in TransferTracker to make it faster).
+ Optional<LocIdx> FoundLoc = None;
+ for (auto Location : MTracker->locations()) {
+ LocIdx CurL = Location.Idx;
+ ValueIDNum ID = MTracker->LocIdxToIDNum[CurL];
+ if (NewID && ID == NewID) {
+ // If this is the first location with that value, pick it. Otherwise,
+ // consider whether it's a "longer term" location.
+ if (!FoundLoc) {
+ FoundLoc = CurL;
+ continue;
+ }
+
+ if (MTracker->isSpill(CurL))
+ FoundLoc = CurL; // Spills are a longer term location.
+ else if (!MTracker->isSpill(*FoundLoc) &&
+ !MTracker->isSpill(CurL) &&
+ !isCalleeSaved(*FoundLoc) &&
+ isCalleeSaved(CurL))
+ FoundLoc = CurL; // Callee saved regs are longer term than normal.
+ }
+ }
+
+ // Tell transfer tracker that the variable value has changed.
+ TTracker->redefVar(MI, Properties, FoundLoc);
+
+ // If there was a value with no location; but the value is defined in a
+ // later instruction in this block, this is a block-local use-before-def.
+ if (!FoundLoc && NewID && NewID->getBlock() == CurBB &&
+ NewID->getInst() > CurInst)
+ TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false}, *NewID);
+
+ // Produce a DBG_VALUE representing what this DBG_INSTR_REF meant.
+ // This DBG_VALUE is potentially a $noreg / undefined location, if
+ // FoundLoc is None.
+ // (XXX -- could morph the DBG_INSTR_REF in the future).
+ MachineInstr *DbgMI = MTracker->emitLoc(FoundLoc, V, Properties);
+ TTracker->PendingDbgValues.push_back(DbgMI);
+ TTracker->flushDbgValues(MI.getIterator(), nullptr);
+
+ return true;
+}
+
+void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
+ // Meta Instructions do not affect the debug liveness of any register they
+ // define.
+ if (MI.isImplicitDef()) {
+ // Except when there's an implicit def, and the location it's defining has
+ // no value number. The whole point of an implicit def is to announce that
+ // the register is live, without be specific about it's value. So define
+ // a value if there isn't one already.
+ ValueIDNum Num = MTracker->readReg(MI.getOperand(0).getReg());
+ // Has a legitimate value -> ignore the implicit def.
+ if (Num.getLoc() != 0)
+ return;
+ // Otherwise, def it here.
+ } else if (MI.isMetaInstruction())
+ return;
+
+ MachineFunction *MF = MI.getMF();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+
+ // Find the regs killed by MI, and find regmasks of preserved regs.
+ // Max out the number of statically allocated elements in `DeadRegs`, as this
+ // prevents fallback to std::set::count() operations.
+ SmallSet<uint32_t, 32> DeadRegs;
+ SmallVector<const uint32_t *, 4> RegMasks;
+ SmallVector<const MachineOperand *, 4> RegMaskPtrs;
+ for (const MachineOperand &MO : MI.operands()) {
+ // Determine whether the operand is a register def.
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ Register::isPhysicalRegister(MO.getReg()) &&
+ !(MI.isCall() && MO.getReg() == SP)) {
+ // Remove ranges of all aliased registers.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ // FIXME: Can we break out of this loop early if no insertion occurs?
+ DeadRegs.insert(*RAI);
+ } else if (MO.isRegMask()) {
+ RegMasks.push_back(MO.getRegMask());
+ RegMaskPtrs.push_back(&MO);
+ }
+ }
+
+ // Tell MLocTracker about all definitions, of regmasks and otherwise.
+ for (uint32_t DeadReg : DeadRegs)
+ MTracker->defReg(DeadReg, CurBB, CurInst);
+
+ for (auto *MO : RegMaskPtrs)
+ MTracker->writeRegMask(MO, CurBB, CurInst);
+}
+
+void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
+ ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);
+
+ MTracker->setReg(DstRegNum, SrcValue);
+
+ // In all circumstances, re-def the super registers. It's definitely a new
+ // value now. This doesn't uniquely identify the composition of subregs, for
+ // example, two identical values in subregisters composed in different
+ // places would not get equal value numbers.
+ for (MCSuperRegIterator SRI(DstRegNum, TRI); SRI.isValid(); ++SRI)
+ MTracker->defReg(*SRI, CurBB, CurInst);
+
+ // If we're emulating VarLocBasedImpl, just define all the subregisters.
+ // DBG_VALUEs of them will expect to be tracked from the DBG_VALUE, not
+ // through prior copies.
+ if (EmulateOldLDV) {
+ for (MCSubRegIndexIterator DRI(DstRegNum, TRI); DRI.isValid(); ++DRI)
+ MTracker->defReg(DRI.getSubReg(), CurBB, CurInst);
+ return;
+ }
+
+ // Otherwise, actually copy subregisters from one location to another.
+ // XXX: in addition, any subregisters of DstRegNum that don't line up with
+ // the source register should be def'd.
+ for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) {
+ unsigned SrcSubReg = SRI.getSubReg();
+ unsigned SubRegIdx = SRI.getSubRegIndex();
+ unsigned DstSubReg = TRI->getSubReg(DstRegNum, SubRegIdx);
+ if (!DstSubReg)
+ continue;
+
+ // Do copy. There are two matching subregisters, the source value should
+ // have been def'd when the super-reg was, the latter might not be tracked
+ // yet.
+ // This will force SrcSubReg to be tracked, if it isn't yet.
+ (void)MTracker->readReg(SrcSubReg);
+ LocIdx SrcL = MTracker->getRegMLoc(SrcSubReg);
+ assert(SrcL.asU64());
+ (void)MTracker->readReg(DstSubReg);
+ LocIdx DstL = MTracker->getRegMLoc(DstSubReg);
+ assert(DstL.asU64());
+ (void)DstL;
+ ValueIDNum CpyValue = {SrcValue.getBlock(), SrcValue.getInst(), SrcL};
+
+ MTracker->setReg(DstSubReg, CpyValue);
+ }
+}
+
+bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF) {
+ // TODO: Handle multiple stores folded into one.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
+ return false; // This is not a spill instruction, since no valid size was
+ // returned from either function.
+
+ return true;
+}
+
+bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!isSpillInstruction(MI, MF))
+ return false;
+
+ // XXX FIXME: On x86, isStoreToStackSlotPostFE returns '1' instead of an
+ // actual register number.
+ if (ObserveAllStackops) {
+ int FI;
+ Reg = TII->isStoreToStackSlotPostFE(MI, FI);
+ return Reg != 0;
+ }
+
+ auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
+ if (!MO.isReg() || !MO.isUse()) {
+ Reg = 0;
+ return false;
+ }
+ Reg = MO.getReg();
+ return MO.isKill();
+ };
+
+ for (const MachineOperand &MO : MI.operands()) {
+ // In a spill instruction generated by the InlineSpiller the spilled
+ // register has its kill flag set.
+ if (isKilledReg(MO, Reg))
+ return true;
+ if (Reg != 0) {
+ // Check whether next instruction kills the spilled register.
+ // FIXME: Current solution does not cover search for killed register in
+ // bundles and instructions further down the chain.
+ auto NextI = std::next(MI.getIterator());
+ // Skip next instruction that points to basic block end iterator.
+ if (MI.getParent()->end() == NextI)
+ continue;
+ unsigned RegNext;
+ for (const MachineOperand &MONext : NextI->operands()) {
+ // Return true if we came across the register from the
+ // previous spill instruction that is killed in NextI.
+ if (isKilledReg(MONext, RegNext) && RegNext == Reg)
+ return true;
+ }
+ }
+ }
+ // Return false if we didn't find spilled register.
+ return false;
+}
+
+Optional<SpillLoc>
+InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!MI.hasOneMemOperand())
+ return None;
+
+ // FIXME: Handle folded restore instructions with more than one memory
+ // operand.
+ if (MI.getRestoreSize(TII)) {
+ Reg = MI.getOperand(0).getReg();
+ return extractSpillBaseRegAndOffset(MI);
+ }
+ return None;
+}
+
+bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
+ // XXX -- it's too difficult to implement VarLocBasedImpl's stack location
+ // limitations under the new model. Therefore, when comparing them, compare
+ // versions that don't attempt spills or restores at all.
+ if (EmulateOldLDV)
+ return false;
+
+ MachineFunction *MF = MI.getMF();
+ unsigned Reg;
+ Optional<SpillLoc> Loc;
+
+ LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+
+ // First, if there are any DBG_VALUEs pointing at a spill slot that is
+ // written to, terminate that variable location. The value in memory
+ // will have changed. DbgEntityHistoryCalculator doesn't try to detect this.
+ if (isSpillInstruction(MI, MF)) {
+ Loc = extractSpillBaseRegAndOffset(MI);
+
+ if (TTracker) {
+ Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc);
+ if (MLoc)
+ TTracker->clobberMloc(*MLoc, MI.getIterator());
+ }
+ }
+
+ // Try to recognise spill and restore instructions that may transfer a value.
+ if (isLocationSpill(MI, MF, Reg)) {
+ Loc = extractSpillBaseRegAndOffset(MI);
+ auto ValueID = MTracker->readReg(Reg);
+
+ // If the location is empty, produce a phi, signify it's the live-in value.
+ if (ValueID.getLoc() == 0)
+ ValueID = {CurBB, 0, MTracker->getRegMLoc(Reg)};
+
+ MTracker->setSpill(*Loc, ValueID);
+ auto OptSpillLocIdx = MTracker->getSpillMLoc(*Loc);
+ assert(OptSpillLocIdx && "Spill slot set but has no LocIdx?");
+ LocIdx SpillLocIdx = *OptSpillLocIdx;
+
+ // Tell TransferTracker about this spill, produce DBG_VALUEs for it.
+ if (TTracker)
+ TTracker->transferMlocs(MTracker->getRegMLoc(Reg), SpillLocIdx,
+ MI.getIterator());
+ } else {
+ if (!(Loc = isRestoreInstruction(MI, MF, Reg)))
+ return false;
+
+ // Is there a value to be restored?
+ auto OptValueID = MTracker->readSpill(*Loc);
+ if (OptValueID) {
+ ValueIDNum ValueID = *OptValueID;
+ LocIdx SpillLocIdx = *MTracker->getSpillMLoc(*Loc);
+ // XXX -- can we recover sub-registers of this value? Until we can, first
+ // overwrite all defs of the register being restored to.
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+
+ // Now override the reg we're restoring to.
+ MTracker->setReg(Reg, ValueID);
+
+ // Report this restore to the transfer tracker too.
+ if (TTracker)
+ TTracker->transferMlocs(SpillLocIdx, MTracker->getRegMLoc(Reg),
+ MI.getIterator());
+ } else {
+ // There isn't anything in the location; not clear if this is a code path
+ // that still runs. Def this register anyway just in case.
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+
+ // Force the spill slot to be tracked.
+ LocIdx L = MTracker->getOrTrackSpillLoc(*Loc);
+
+ // Set the restored value to be a machine phi number, signifying that it's
+ // whatever the spills live-in value is in this block. Definitely has
+ // a LocIdx due to the setSpill above.
+ ValueIDNum ValueID = {CurBB, 0, L};
+ MTracker->setReg(Reg, ValueID);
+ MTracker->setSpill(*Loc, ValueID);
+ }
+ }
+ return true;
+}
+
+bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (!DestSrc)
+ return false;
+
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
+
+ auto isCalleeSavedReg = [&](unsigned Reg) {
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ };
+
+ Register SrcReg = SrcRegOp->getReg();
+ Register DestReg = DestRegOp->getReg();
+
+ // Ignore identity copies. Yep, these make it as far as LiveDebugValues.
+ if (SrcReg == DestReg)
+ return true;
+
+ // For emulating VarLocBasedImpl:
+ // We want to recognize instructions where destination register is callee
+ // saved register. If register that could be clobbered by the call is
+ // included, there would be a great chance that it is going to be clobbered
+ // soon. It is more likely that previous register, which is callee saved, is
+ // going to stay unclobbered longer, even if it is killed.
+ //
+ // For InstrRefBasedImpl, we can track multiple locations per value, so
+ // ignore this condition.
+ if (EmulateOldLDV && !isCalleeSavedReg(DestReg))
+ return false;
+
+ // InstrRefBasedImpl only followed killing copies.
+ if (EmulateOldLDV && !SrcRegOp->isKill())
+ return false;
+
+ // Copy MTracker info, including subregs if available.
+ InstrRefBasedLDV::performCopy(SrcReg, DestReg);
+
+ // Only produce a transfer of DBG_VALUE within a block where old LDV
+ // would have. We might make use of the additional value tracking in some
+ // other way, later.
+ if (TTracker && isCalleeSavedReg(DestReg) && SrcRegOp->isKill())
+ TTracker->transferMlocs(MTracker->getRegMLoc(SrcReg),
+ MTracker->getRegMLoc(DestReg), MI.getIterator());
+
+ // VarLocBasedImpl would quit tracking the old location after copying.
+ if (EmulateOldLDV && SrcReg != DestReg)
+ MTracker->defReg(SrcReg, CurBB, CurInst);
+
+ return true;
+}
+
+/// Accumulate a mapping between each DILocalVariable fragment and other
+/// fragments of that DILocalVariable which overlap. This reduces work during
+/// the data-flow stage from "Find any overlapping fragments" to "Check if the
+/// known-to-overlap fragments are present".
+/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for
+/// fragment usage.
+void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
+ DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
+
+ // If this is the first sighting of this variable, then we are guaranteed
+ // there are currently no overlapping fragments either. Initialize the set
+ // of seen fragments, record no overlaps for the current one, and return.
+ auto SeenIt = SeenFragments.find(MIVar.getVariable());
+ if (SeenIt == SeenFragments.end()) {
+ SmallSet<FragmentInfo, 4> OneFragment;
+ OneFragment.insert(ThisFragment);
+ SeenFragments.insert({MIVar.getVariable(), OneFragment});
+
+ OverlapFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
+ return;
+ }
+
+ // If this particular Variable/Fragment pair already exists in the overlap
+ // map, it has already been accounted for.
+ auto IsInOLapMap =
+ OverlapFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
+ if (!IsInOLapMap.second)
+ return;
+
+ auto &ThisFragmentsOverlaps = IsInOLapMap.first->second;
+ auto &AllSeenFragments = SeenIt->second;
+
+ // Otherwise, examine all other seen fragments for this variable, with "this"
+ // fragment being a previously unseen fragment. Record any pair of
+ // overlapping fragments.
+ for (auto &ASeenFragment : AllSeenFragments) {
+ // Does this previously seen fragment overlap?
+ if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) {
+ // Yes: Mark the current fragment as being overlapped.
+ ThisFragmentsOverlaps.push_back(ASeenFragment);
+ // Mark the previously seen fragment as being overlapped by the current
+ // one.
+ auto ASeenFragmentsOverlaps =
+ OverlapFragments.find({MIVar.getVariable(), ASeenFragment});
+ assert(ASeenFragmentsOverlaps != OverlapFragments.end() &&
+ "Previously seen var fragment has no vector of overlaps");
+ ASeenFragmentsOverlaps->second.push_back(ThisFragment);
+ }
+ }
+
+ AllSeenFragments.insert(ThisFragment);
+}
+
+void InstrRefBasedLDV::process(MachineInstr &MI) {
+ // Try to interpret an MI as a debug or transfer instruction. Only if it's
+ // none of these should we interpret it's register defs as new value
+ // definitions.
+ if (transferDebugValue(MI))
+ return;
+ if (transferDebugInstrRef(MI))
+ return;
+ if (transferRegisterCopy(MI))
+ return;
+ if (transferSpillOrRestoreInst(MI))
+ return;
+ transferRegisterDef(MI);
+}
+
+void InstrRefBasedLDV::produceMLocTransferFunction(
+ MachineFunction &MF, SmallVectorImpl<MLocTransferMap> &MLocTransfer,
+ unsigned MaxNumBlocks) {
+ // Because we try to optimize around register mask operands by ignoring regs
+ // that aren't currently tracked, we set up something ugly for later: RegMask
+ // operands that are seen earlier than the first use of a register, still need
+ // to clobber that register in the transfer function. But this information
+ // isn't actively recorded. Instead, we track each RegMask used in each block,
+ // and accumulated the clobbered but untracked registers in each block into
+ // the following bitvector. Later, if new values are tracked, we can add
+ // appropriate clobbers.
+ SmallVector<BitVector, 32> BlockMasks;
+ BlockMasks.resize(MaxNumBlocks);
+
+ // Reserve one bit per register for the masks described above.
+ unsigned BVWords = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ for (auto &BV : BlockMasks)
+ BV.resize(TRI->getNumRegs(), true);
+
+ // Step through all instructions and inhale the transfer function.
+ for (auto &MBB : MF) {
+ // Object fields that are read by trackers to know where we are in the
+ // function.
+ CurBB = MBB.getNumber();
+ CurInst = 1;
+
+ // Set all machine locations to a PHI value. For transfer function
+ // production only, this signifies the live-in value to the block.
+ MTracker->reset();
+ MTracker->setMPhis(CurBB);
+
+ // Step through each instruction in this block.
+ for (auto &MI : MBB) {
+ process(MI);
+ // Also accumulate fragment map.
+ if (MI.isDebugValue())
+ accumulateFragmentMap(MI);
+
+ // Create a map from the instruction number (if present) to the
+ // MachineInstr and its position.
+ if (uint64_t InstrNo = MI.peekDebugInstrNum()) {
+ auto InstrAndPos = std::make_pair(&MI, CurInst);
+ auto InsertResult =
+ DebugInstrNumToInstr.insert(std::make_pair(InstrNo, InstrAndPos));
+
+ // There should never be duplicate instruction numbers.
+ assert(InsertResult.second);
+ (void)InsertResult;
+ }
+
+ ++CurInst;
+ }
+
+ // Produce the transfer function, a map of machine location to new value. If
+ // any machine location has the live-in phi value from the start of the
+ // block, it's live-through and doesn't need recording in the transfer
+ // function.
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ ValueIDNum &P = Location.Value;
+ if (P.isPHI() && P.getLoc() == Idx.asU64())
+ continue;
+
+ // Insert-or-update.
+ auto &TransferMap = MLocTransfer[CurBB];
+ auto Result = TransferMap.insert(std::make_pair(Idx.asU64(), P));
+ if (!Result.second)
+ Result.first->second = P;
+ }
+
+ // Accumulate any bitmask operands into the clobberred reg mask for this
+ // block.
+ for (auto &P : MTracker->Masks) {
+ BlockMasks[CurBB].clearBitsNotInMask(P.first->getRegMask(), BVWords);
+ }
+ }
+
+ // Compute a bitvector of all the registers that are tracked in this block.
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+ BitVector UsedRegs(TRI->getNumRegs());
+ for (auto Location : MTracker->locations()) {
+ unsigned ID = MTracker->LocIdxToLocID[Location.Idx];
+ if (ID >= TRI->getNumRegs() || ID == SP)
+ continue;
+ UsedRegs.set(ID);
+ }
+
+ // Check that any regmask-clobber of a register that gets tracked, is not
+ // live-through in the transfer function. It needs to be clobbered at the
+ // very least.
+ for (unsigned int I = 0; I < MaxNumBlocks; ++I) {
+ BitVector &BV = BlockMasks[I];
+ BV.flip();
+ BV &= UsedRegs;
+ // This produces all the bits that we clobber, but also use. Check that
+ // they're all clobbered or at least set in the designated transfer
+ // elem.
+ for (unsigned Bit : BV.set_bits()) {
+ unsigned ID = MTracker->getLocID(Bit, false);
+ LocIdx Idx = MTracker->LocIDToLocIdx[ID];
+ auto &TransferMap = MLocTransfer[I];
+
+ // Install a value representing the fact that this location is effectively
+ // written to in this block. As there's no reserved value, instead use
+ // a value number that is never generated. Pick the value number for the
+ // first instruction in the block, def'ing this location, which we know
+ // this block never used anyway.
+ ValueIDNum NotGeneratedNum = ValueIDNum(I, 1, Idx);
+ auto Result =
+ TransferMap.insert(std::make_pair(Idx.asU64(), NotGeneratedNum));
+ if (!Result.second) {
+ ValueIDNum &ValueID = Result.first->second;
+ if (ValueID.getBlock() == I && ValueID.isPHI())
+ // It was left as live-through. Set it to clobbered.
+ ValueID = NotGeneratedNum;
+ }
+ }
+ }
+}
+
+std::tuple<bool, bool>
+InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs) {
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
+ bool Changed = false;
+ bool DowngradeOccurred = false;
+
+ // Collect predecessors that have been visited. Anything that hasn't been
+ // visited yet is a backedge on the first iteration, and the meet of it's
+ // lattice value for all locations will be unaffected.
+ SmallVector<const MachineBasicBlock *, 8> BlockOrders;
+ for (auto Pred : MBB.predecessors()) {
+ if (Visited.count(Pred)) {
+ BlockOrders.push_back(Pred);
+ }
+ }
+
+ // Visit predecessors in RPOT order.
+ auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+ return BBToOrder.find(A)->second < BBToOrder.find(B)->second;
+ };
+ llvm::sort(BlockOrders, Cmp);
+
+ // Skip entry block.
+ if (BlockOrders.size() == 0)
+ return std::tuple<bool, bool>(false, false);
+
+ // Step through all machine locations, then look at each predecessor and
+ // detect disagreements.
+ unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second;
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ // Pick out the first predecessors live-out value for this location. It's
+ // guaranteed to be not a backedge, as we order by RPO.
+ ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+
+ // Some flags for whether there's a disagreement, and whether it's a
+ // disagreement with a backedge or not.
+ bool Disagree = false;
+ bool NonBackEdgeDisagree = false;
+
+ // Loop around everything that wasn't 'base'.
+ for (unsigned int I = 1; I < BlockOrders.size(); ++I) {
+ auto *MBB = BlockOrders[I];
+ if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) {
+ // Live-out of a predecessor disagrees with the first predecessor.
+ Disagree = true;
+
+ // Test whether it's a disagreemnt in the backedges or not.
+ if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e
+ NonBackEdgeDisagree = true;
+ }
+ }
+
+ bool OverRide = false;
+ if (Disagree && !NonBackEdgeDisagree) {
+ // Only the backedges disagree. Consider demoting the livein
+ // lattice value, as per the file level comment. The value we consider
+ // demoting to is the value that the non-backedge predecessors agree on.
+ // The order of values is that non-PHIs are \top, a PHI at this block
+ // \bot, and phis between the two are ordered by their RPO number.
+ // If there's no agreement, or we've already demoted to this PHI value
+ // before, replace with a PHI value at this block.
+
+ // Calculate order numbers: zero means normal def, nonzero means RPO
+ // number.
+ unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1;
+ if (!BaseVal.isPHI())
+ BaseBlockRPONum = 0;
+
+ ValueIDNum &InLocID = InLocs[Idx.asU64()];
+ unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1;
+ if (!InLocID.isPHI())
+ InLocRPONum = 0;
+
+ // Should we ignore the disagreeing backedges, and override with the
+ // value the other predecessors agree on (in "base")?
+ unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1;
+ if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) {
+ // Override.
+ OverRide = true;
+ DowngradeOccurred = true;
+ }
+ }
+ // else: if we disagree in the non-backedges, then this is definitely
+ // a control flow merge where different values merge. Make it a PHI.
+
+ // Generate a phi...
+ ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx};
+ ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal;
+ if (InLocs[Idx.asU64()] != NewVal) {
+ Changed |= true;
+ InLocs[Idx.asU64()] = NewVal;
+ }
+ }
+
+ // TODO: Reimplement NumInserted and NumRemoved.
+ return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+}
+
+void InstrRefBasedLDV::mlocDataflow(
+ ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist, Pending;
+
+ // We track what is on the current and pending worklist to avoid inserting
+ // the same thing twice. We could avoid this with a custom priority queue,
+ // but this is probably not worth it.
+ SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist;
+
+ // Initialize worklist with every block to be visited.
+ for (unsigned int I = 0; I < BBToOrder.size(); ++I) {
+ Worklist.push(I);
+ OnWorklist.insert(OrderToBB[I]);
+ }
+
+ MTracker->reset();
+
+ // Set inlocs for entry block -- each as a PHI at the entry block. Represents
+ // the incoming value to the function.
+ MTracker->setMPhis(0);
+ for (auto Location : MTracker->locations())
+ MInLocs[0][Location.Idx.asU64()] = Location.Value;
+
+ SmallPtrSet<const MachineBasicBlock *, 16> Visited;
+ while (!Worklist.empty() || !Pending.empty()) {
+ // Vector for storing the evaluated block transfer function.
+ SmallVector<std::pair<LocIdx, ValueIDNum>, 32> ToRemap;
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
+ CurBB = MBB->getNumber();
+ Worklist.pop();
+
+ // Join the values in all predecessor blocks.
+ bool InLocsChanged, DowngradeOccurred;
+ std::tie(InLocsChanged, DowngradeOccurred) =
+ mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
+ InLocsChanged |= Visited.insert(MBB).second;
+
+ // If a downgrade occurred, book us in for re-examination on the next
+ // iteration.
+ if (DowngradeOccurred && OnPending.insert(MBB).second)
+ Pending.push(BBToOrder[MBB]);
+
+ // Don't examine transfer function if we've visited this loc at least
+ // once, and inlocs haven't changed.
+ if (!InLocsChanged)
+ continue;
+
+ // Load the current set of live-ins into MLocTracker.
+ MTracker->loadFromArray(MInLocs[CurBB], CurBB);
+
+ // Each element of the transfer function can be a new def, or a read of
+ // a live-in value. Evaluate each element, and store to "ToRemap".
+ ToRemap.clear();
+ for (auto &P : MLocTransfer[CurBB]) {
+ if (P.second.getBlock() == CurBB && P.second.isPHI()) {
+ // This is a movement of whatever was live in. Read it.
+ ValueIDNum NewID = MTracker->getNumAtPos(P.second.getLoc());
+ ToRemap.push_back(std::make_pair(P.first, NewID));
+ } else {
+ // It's a def. Just set it.
+ assert(P.second.getBlock() == CurBB);
+ ToRemap.push_back(std::make_pair(P.first, P.second));
+ }
+ }
+
+ // Commit the transfer function changes into mloc tracker, which
+ // transforms the contents of the MLocTracker into the live-outs.
+ for (auto &P : ToRemap)
+ MTracker->setMLoc(P.first, P.second);
+
+ // Now copy out-locs from mloc tracker into out-loc vector, checking
+ // whether changes have occurred. These changes can have come from both
+ // the transfer function, and mlocJoin.
+ bool OLChanged = false;
+ for (auto Location : MTracker->locations()) {
+ OLChanged |= MOutLocs[CurBB][Location.Idx.asU64()] != Location.Value;
+ MOutLocs[CurBB][Location.Idx.asU64()] = Location.Value;
+ }
+
+ MTracker->reset();
+
+ // No need to examine successors again if out-locs didn't change.
+ if (!OLChanged)
+ continue;
+
+ // All successors should be visited: put any back-edges on the pending
+ // list for the next dataflow iteration, and any other successors to be
+ // visited this iteration, if they're not going to be already.
+ for (auto s : MBB->successors()) {
+ // Does branching to this successor represent a back-edge?
+ if (BBToOrder[s] > BBToOrder[MBB]) {
+ // No: visit it during this dataflow iteration.
+ if (OnWorklist.insert(s).second)
+ Worklist.push(BBToOrder[s]);
+ } else {
+ // Yes: visit it on the next iteration.
+ if (OnPending.insert(s).second)
+ Pending.push(BBToOrder[s]);
+ }
+ }
+ }
+
+ Worklist.swap(Pending);
+ std::swap(OnPending, OnWorklist);
+ OnPending.clear();
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
+ }
+
+ // Once all the live-ins don't change on mlocJoin(), we've reached a
+ // fixedpoint.
+}
+
+bool InstrRefBasedLDV::vlocDowngradeLattice(
+ const MachineBasicBlock &MBB, const DbgValue &OldLiveInLocation,
+ const SmallVectorImpl<InValueT> &Values, unsigned CurBlockRPONum) {
+ // Ranking value preference: see file level comment, the highest rank is
+ // a plain def, followed by PHI values in reverse post-order. Numerically,
+ // we assign all defs the rank '0', all PHIs their blocks RPO number plus
+ // one, and consider the lowest value the highest ranked.
+ int OldLiveInRank = BBNumToRPO[OldLiveInLocation.ID.getBlock()] + 1;
+ if (!OldLiveInLocation.ID.isPHI())
+ OldLiveInRank = 0;
+
+ // Allow any unresolvable conflict to be over-ridden.
+ if (OldLiveInLocation.Kind == DbgValue::NoVal) {
+ // Although if it was an unresolvable conflict from _this_ block, then
+ // all other seeking of downgrades and PHIs must have failed before hand.
+ if (OldLiveInLocation.BlockNo == (unsigned)MBB.getNumber())
+ return false;
+ OldLiveInRank = INT_MIN;
+ }
+
+ auto &InValue = *Values[0].second;
+
+ if (InValue.Kind == DbgValue::Const || InValue.Kind == DbgValue::NoVal)
+ return false;
+
+ unsigned ThisRPO = BBNumToRPO[InValue.ID.getBlock()];
+ int ThisRank = ThisRPO + 1;
+ if (!InValue.ID.isPHI())
+ ThisRank = 0;
+
+ // Too far down the lattice?
+ if (ThisRPO >= CurBlockRPONum)
+ return false;
+
+ // Higher in the lattice than what we've already explored?
+ if (ThisRank <= OldLiveInRank)
+ return false;
+
+ return true;
+}
+
+std::tuple<Optional<ValueIDNum>, bool> InstrRefBasedLDV::pickVPHILoc(
+ MachineBasicBlock &MBB, const DebugVariable &Var, const LiveIdxT &LiveOuts,
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ const SmallVectorImpl<MachineBasicBlock *> &BlockOrders) {
+ // Collect a set of locations from predecessor where its live-out value can
+ // be found.
+ SmallVector<SmallVector<LocIdx, 4>, 8> Locs;
+ unsigned NumLocs = MTracker->getNumLocs();
+ unsigned BackEdgesStart = 0;
+
+ for (auto p : BlockOrders) {
+ // Pick out where backedges start in the list of predecessors. Relies on
+ // BlockOrders being sorted by RPO.
+ if (BBToOrder[p] < BBToOrder[&MBB])
+ ++BackEdgesStart;
+
+ // For each predecessor, create a new set of locations.
+ Locs.resize(Locs.size() + 1);
+ unsigned ThisBBNum = p->getNumber();
+ auto LiveOutMap = LiveOuts.find(p);
+ if (LiveOutMap == LiveOuts.end())
+ // This predecessor isn't in scope, it must have no live-in/live-out
+ // locations.
+ continue;
+
+ auto It = LiveOutMap->second->find(Var);
+ if (It == LiveOutMap->second->end())
+ // There's no value recorded for this variable in this predecessor,
+ // leave an empty set of locations.
+ continue;
+
+ const DbgValue &OutVal = It->second;
+
+ if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal)
+ // Consts and no-values cannot have locations we can join on.
+ continue;
+
+ assert(OutVal.Kind == DbgValue::Proposed || OutVal.Kind == DbgValue::Def);
+ ValueIDNum ValToLookFor = OutVal.ID;
+
+ // Search the live-outs of the predecessor for the specified value.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ if (MOutLocs[ThisBBNum][I] == ValToLookFor)
+ Locs.back().push_back(LocIdx(I));
+ }
+ }
+
+ // If there were no locations at all, return an empty result.
+ if (Locs.empty())
+ return std::tuple<Optional<ValueIDNum>, bool>(None, false);
+
+ // Lambda for seeking a common location within a range of location-sets.
+ using LocsIt = SmallVector<SmallVector<LocIdx, 4>, 8>::iterator;
+ auto SeekLocation =
+ [&Locs](llvm::iterator_range<LocsIt> SearchRange) -> Optional<LocIdx> {
+ // Starting with the first set of locations, take the intersection with
+ // subsequent sets.
+ SmallVector<LocIdx, 4> base = Locs[0];
+ for (auto &S : SearchRange) {
+ SmallVector<LocIdx, 4> new_base;
+ std::set_intersection(base.begin(), base.end(), S.begin(), S.end(),
+ std::inserter(new_base, new_base.begin()));
+ base = new_base;
+ }
+ if (base.empty())
+ return None;
+
+ // We now have a set of LocIdxes that contain the right output value in
+ // each of the predecessors. Pick the lowest; if there's a register loc,
+ // that'll be it.
+ return *base.begin();
+ };
+
+ // Search for a common location for all predecessors. If we can't, then fall
+ // back to only finding a common location between non-backedge predecessors.
+ bool ValidForAllLocs = true;
+ auto TheLoc = SeekLocation(Locs);
+ if (!TheLoc) {
+ ValidForAllLocs = false;
+ TheLoc =
+ SeekLocation(make_range(Locs.begin(), Locs.begin() + BackEdgesStart));
+ }
+
+ if (!TheLoc)
+ return std::tuple<Optional<ValueIDNum>, bool>(None, false);
+
+ // Return a PHI-value-number for the found location.
+ LocIdx L = *TheLoc;
+ ValueIDNum PHIVal = {(unsigned)MBB.getNumber(), 0, L};
+ return std::tuple<Optional<ValueIDNum>, bool>(PHIVal, ValidForAllLocs);
+}
+
+std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(
+ MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
+ SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, unsigned BBNum,
+ const SmallSet<DebugVariable, 4> &AllVars, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs,
+ SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
+ SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
+ DenseMap<DebugVariable, DbgValue> &InLocsT) {
+ bool DowngradeOccurred = false;
+
+ // To emulate VarLocBasedImpl, process this block if it's not in scope but
+ // _does_ assign a variable value. No live-ins for this scope are transferred
+ // in though, so we can return immediately.
+ if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) {
+ if (VLOCVisited)
+ return std::tuple<bool, bool>(true, false);
+ return std::tuple<bool, bool>(false, false);
+ }
+
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
+ bool Changed = false;
+
+ // Find any live-ins computed in a prior iteration.
+ auto ILSIt = VLOCInLocs.find(&MBB);
+ assert(ILSIt != VLOCInLocs.end());
+ auto &ILS = *ILSIt->second;
+
+ // Order predecessors by RPOT order, for exploring them in that order.
+ SmallVector<MachineBasicBlock *, 8> BlockOrders;
+ for (auto p : MBB.predecessors())
+ BlockOrders.push_back(p);
+
+ auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return BBToOrder[A] < BBToOrder[B];
+ };
+
+ llvm::sort(BlockOrders, Cmp);
+
+ unsigned CurBlockRPONum = BBToOrder[&MBB];
+
+ // Force a re-visit to loop heads in the first dataflow iteration.
+ // FIXME: if we could "propose" Const values this wouldn't be needed,
+ // because they'd need to be confirmed before being emitted.
+ if (!BlockOrders.empty() &&
+ BBToOrder[BlockOrders[BlockOrders.size() - 1]] >= CurBlockRPONum &&
+ VLOCVisited)
+ DowngradeOccurred = true;
+
+ auto ConfirmValue = [&InLocsT](const DebugVariable &DV, DbgValue VR) {
+ auto Result = InLocsT.insert(std::make_pair(DV, VR));
+ (void)Result;
+ assert(Result.second);
+ };
+
+ auto ConfirmNoVal = [&ConfirmValue, &MBB](const DebugVariable &Var, const DbgValueProperties &Properties) {
+ DbgValue NoLocPHIVal(MBB.getNumber(), Properties, DbgValue::NoVal);
+
+ ConfirmValue(Var, NoLocPHIVal);
+ };
+
+ // Attempt to join the values for each variable.
+ for (auto &Var : AllVars) {
+ // Collect all the DbgValues for this variable.
+ SmallVector<InValueT, 8> Values;
+ bool Bail = false;
+ unsigned BackEdgesStart = 0;
+ for (auto p : BlockOrders) {
+ // If the predecessor isn't in scope / to be explored, we'll never be
+ // able to join any locations.
+ if (!BlocksToExplore.contains(p)) {
+ Bail = true;
+ break;
+ }
+
+ // Don't attempt to handle unvisited predecessors: they're implicitly
+ // "unknown"s in the lattice.
+ if (VLOCVisited && !VLOCVisited->count(p))
+ continue;
+
+ // If the predecessors OutLocs is absent, there's not much we can do.
+ auto OL = VLOCOutLocs.find(p);
+ if (OL == VLOCOutLocs.end()) {
+ Bail = true;
+ break;
+ }
+
+ // No live-out value for this predecessor also means we can't produce
+ // a joined value.
+ auto VIt = OL->second->find(Var);
+ if (VIt == OL->second->end()) {
+ Bail = true;
+ break;
+ }
+
+ // Keep track of where back-edges begin in the Values vector. Relies on
+ // BlockOrders being sorted by RPO.
+ unsigned ThisBBRPONum = BBToOrder[p];
+ if (ThisBBRPONum < CurBlockRPONum)
+ ++BackEdgesStart;
+
+ Values.push_back(std::make_pair(p, &VIt->second));
+ }
+
+ // If there were no values, or one of the predecessors couldn't have a
+ // value, then give up immediately. It's not safe to produce a live-in
+ // value.
+ if (Bail || Values.size() == 0)
+ continue;
+
+ // Enumeration identifying the current state of the predecessors values.
+ enum {
+ Unset = 0,
+ Agreed, // All preds agree on the variable value.
+ PropDisagree, // All preds agree, but the value kind is Proposed in some.
+ BEDisagree, // Only back-edges disagree on variable value.
+ PHINeeded, // Non-back-edge predecessors have conflicing values.
+ NoSolution // Conflicting Value metadata makes solution impossible.
+ } OurState = Unset;
+
+ // All (non-entry) blocks have at least one non-backedge predecessor.
+ // Pick the variable value from the first of these, to compare against
+ // all others.
+ const DbgValue &FirstVal = *Values[0].second;
+ const ValueIDNum &FirstID = FirstVal.ID;
+
+ // Scan for variable values that can't be resolved: if they have different
+ // DIExpressions, different indirectness, or are mixed constants /
+ // non-constants.
+ for (auto &V : Values) {
+ if (V.second->Properties != FirstVal.Properties)
+ OurState = NoSolution;
+ if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const)
+ OurState = NoSolution;
+ }
+
+ // Flags diagnosing _how_ the values disagree.
+ bool NonBackEdgeDisagree = false;
+ bool DisagreeOnPHINess = false;
+ bool IDDisagree = false;
+ bool Disagree = false;
+ if (OurState == Unset) {
+ for (auto &V : Values) {
+ if (*V.second == FirstVal)
+ continue; // No disagreement.
+
+ Disagree = true;
+
+ // Flag whether the value number actually diagrees.
+ if (V.second->ID != FirstID)
+ IDDisagree = true;
+
+ // Distinguish whether disagreement happens in backedges or not.
+ // Relies on Values (and BlockOrders) being sorted by RPO.
+ unsigned ThisBBRPONum = BBToOrder[V.first];
+ if (ThisBBRPONum < CurBlockRPONum)
+ NonBackEdgeDisagree = true;
+
+ // Is there a difference in whether the value is definite or only
+ // proposed?
+ if (V.second->Kind != FirstVal.Kind &&
+ (V.second->Kind == DbgValue::Proposed ||
+ V.second->Kind == DbgValue::Def) &&
+ (FirstVal.Kind == DbgValue::Proposed ||
+ FirstVal.Kind == DbgValue::Def))
+ DisagreeOnPHINess = true;
+ }
+
+ // Collect those flags together and determine an overall state for
+ // what extend the predecessors agree on a live-in value.
+ if (!Disagree)
+ OurState = Agreed;
+ else if (!IDDisagree && DisagreeOnPHINess)
+ OurState = PropDisagree;
+ else if (!NonBackEdgeDisagree)
+ OurState = BEDisagree;
+ else
+ OurState = PHINeeded;
+ }
+
+ // An extra indicator: if we only disagree on whether the value is a
+ // Def, or proposed, then also flag whether that disagreement happens
+ // in backedges only.
+ bool PropOnlyInBEs = Disagree && !IDDisagree && DisagreeOnPHINess &&
+ !NonBackEdgeDisagree && FirstVal.Kind == DbgValue::Def;
+
+ const auto &Properties = FirstVal.Properties;
+
+ auto OldLiveInIt = ILS.find(Var);
+ const DbgValue *OldLiveInLocation =
+ (OldLiveInIt != ILS.end()) ? &OldLiveInIt->second : nullptr;
+
+ bool OverRide = false;
+ if (OurState == BEDisagree && OldLiveInLocation) {
+ // Only backedges disagree: we can consider downgrading. If there was a
+ // previous live-in value, use it to work out whether the current
+ // incoming value represents a lattice downgrade or not.
+ OverRide =
+ vlocDowngradeLattice(MBB, *OldLiveInLocation, Values, CurBlockRPONum);
+ }
+
+ // Use the current state of predecessor agreement and other flags to work
+ // out what to do next. Possibilities include:
+ // * Accept a value all predecessors agree on, or accept one that
+ // represents a step down the exploration lattice,
+ // * Use a PHI value number, if one can be found,
+ // * Propose a PHI value number, and see if it gets confirmed later,
+ // * Emit a 'NoVal' value, indicating we couldn't resolve anything.
+ if (OurState == Agreed) {
+ // Easiest solution: all predecessors agree on the variable value.
+ ConfirmValue(Var, FirstVal);
+ } else if (OurState == BEDisagree && OverRide) {
+ // Only backedges disagree, and the other predecessors have produced
+ // a new live-in value further down the exploration lattice.
+ DowngradeOccurred = true;
+ ConfirmValue(Var, FirstVal);
+ } else if (OurState == PropDisagree) {
+ // Predecessors agree on value, but some say it's only a proposed value.
+ // Propagate it as proposed: unless it was proposed in this block, in
+ // which case we're able to confirm the value.
+ if (FirstID.getBlock() == (uint64_t)MBB.getNumber() && FirstID.isPHI()) {
+ ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
+ } else if (PropOnlyInBEs) {
+ // If only backedges disagree, a higher (in RPO) block confirmed this
+ // location, and we need to propagate it into this loop.
+ ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
+ } else {
+ // Otherwise; a Def meeting a Proposed is still a Proposed.
+ ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Proposed));
+ }
+ } else if ((OurState == PHINeeded || OurState == BEDisagree)) {
+ // Predecessors disagree and can't be downgraded: this can only be
+ // solved with a PHI. Use pickVPHILoc to go look for one.
+ Optional<ValueIDNum> VPHI;
+ bool AllEdgesVPHI = false;
+ std::tie(VPHI, AllEdgesVPHI) =
+ pickVPHILoc(MBB, Var, VLOCOutLocs, MOutLocs, MInLocs, BlockOrders);
+
+ if (VPHI && AllEdgesVPHI) {
+ // There's a PHI value that's valid for all predecessors -- we can use
+ // it. If any of the non-backedge predecessors have proposed values
+ // though, this PHI is also only proposed, until the predecessors are
+ // confirmed.
+ DbgValue::KindT K = DbgValue::Def;
+ for (unsigned int I = 0; I < BackEdgesStart; ++I)
+ if (Values[I].second->Kind == DbgValue::Proposed)
+ K = DbgValue::Proposed;
+
+ ConfirmValue(Var, DbgValue(*VPHI, Properties, K));
+ } else if (VPHI) {
+ // There's a PHI value, but it's only legal for backedges. Leave this
+ // as a proposed PHI value: it might come back on the backedges,
+ // and allow us to confirm it in the future.
+ DbgValue NoBEValue = DbgValue(*VPHI, Properties, DbgValue::Proposed);
+ ConfirmValue(Var, NoBEValue);
+ } else {
+ ConfirmNoVal(Var, Properties);
+ }
+ } else {
+ // Otherwise: we don't know. Emit a "phi but no real loc" phi.
+ ConfirmNoVal(Var, Properties);
+ }
+ }
+
+ // Store newly calculated in-locs into VLOCInLocs, if they've changed.
+ Changed = ILS != InLocsT;
+ if (Changed)
+ ILS = InLocsT;
+
+ return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+}
+
+void InstrRefBasedLDV::vlocDataflow(
+ const LexicalScope *Scope, const DILocation *DILoc,
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs) {
+ // This method is much like mlocDataflow: but focuses on a single
+ // LexicalScope at a time. Pick out a set of blocks and variables that are
+ // to have their value assignments solved, then run our dataflow algorithm
+ // until a fixedpoint is reached.
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist, Pending;
+ SmallPtrSet<MachineBasicBlock *, 16> OnWorklist, OnPending;
+
+ // The set of blocks we'll be examining.
+ SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore;
+
+ // The order in which to examine them (RPO).
+ SmallVector<MachineBasicBlock *, 8> BlockOrders;
+
+ // RPO ordering function.
+ auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return BBToOrder[A] < BBToOrder[B];
+ };
+
+ LS.getMachineBasicBlocks(DILoc, BlocksToExplore);
+
+ // A separate container to distinguish "blocks we're exploring" versus
+ // "blocks that are potentially in scope. See comment at start of vlocJoin.
+ SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore;
+
+ // Old LiveDebugValues tracks variable locations that come out of blocks
+ // not in scope, where DBG_VALUEs occur. This is something we could
+ // legitimately ignore, but lets allow it for now.
+ if (EmulateOldLDV)
+ BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
+
+ // We also need to propagate variable values through any artificial blocks
+ // that immediately follow blocks in scope.
+ DenseSet<const MachineBasicBlock *> ToAdd;
+
+ // Helper lambda: For a given block in scope, perform a depth first search
+ // of all the artificial successors, adding them to the ToAdd collection.
+ auto AccumulateArtificialBlocks =
+ [this, &ToAdd, &BlocksToExplore,
+ &InScopeBlocks](const MachineBasicBlock *MBB) {
+ // Depth-first-search state: each node is a block and which successor
+ // we're currently exploring.
+ SmallVector<std::pair<const MachineBasicBlock *,
+ MachineBasicBlock::const_succ_iterator>,
+ 8>
+ DFS;
+
+ // Find any artificial successors not already tracked.
+ for (auto *succ : MBB->successors()) {
+ if (BlocksToExplore.count(succ) || InScopeBlocks.count(succ))
+ continue;
+ if (!ArtificialBlocks.count(succ))
+ continue;
+ DFS.push_back(std::make_pair(succ, succ->succ_begin()));
+ ToAdd.insert(succ);
+ }
+
+ // Search all those blocks, depth first.
+ while (!DFS.empty()) {
+ const MachineBasicBlock *CurBB = DFS.back().first;
+ MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second;
+ // Walk back if we've explored this blocks successors to the end.
+ if (CurSucc == CurBB->succ_end()) {
+ DFS.pop_back();
+ continue;
+ }
+
+ // If the current successor is artificial and unexplored, descend into
+ // it.
+ if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
+ DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
+ ToAdd.insert(*CurSucc);
+ continue;
+ }
+
+ ++CurSucc;
+ }
+ };
+
+ // Search in-scope blocks and those containing a DBG_VALUE from this scope
+ // for artificial successors.
+ for (auto *MBB : BlocksToExplore)
+ AccumulateArtificialBlocks(MBB);
+ for (auto *MBB : InScopeBlocks)
+ AccumulateArtificialBlocks(MBB);
+
+ BlocksToExplore.insert(ToAdd.begin(), ToAdd.end());
+ InScopeBlocks.insert(ToAdd.begin(), ToAdd.end());
+
+ // Single block scope: not interesting! No propagation at all. Note that
+ // this could probably go above ArtificialBlocks without damage, but
+ // that then produces output differences from original-live-debug-values,
+ // which propagates from a single block into many artificial ones.
+ if (BlocksToExplore.size() == 1)
+ return;
+
+ // Picks out relevants blocks RPO order and sort them.
+ for (auto *MBB : BlocksToExplore)
+ BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB));
+
+ llvm::sort(BlockOrders, Cmp);
+ unsigned NumBlocks = BlockOrders.size();
+
+ // Allocate some vectors for storing the live ins and live outs. Large.
+ SmallVector<DenseMap<DebugVariable, DbgValue>, 32> LiveIns, LiveOuts;
+ LiveIns.resize(NumBlocks);
+ LiveOuts.resize(NumBlocks);
+
+ // Produce by-MBB indexes of live-in/live-outs, to ease lookup within
+ // vlocJoin.
+ LiveIdxT LiveOutIdx, LiveInIdx;
+ LiveOutIdx.reserve(NumBlocks);
+ LiveInIdx.reserve(NumBlocks);
+ for (unsigned I = 0; I < NumBlocks; ++I) {
+ LiveOutIdx[BlockOrders[I]] = &LiveOuts[I];
+ LiveInIdx[BlockOrders[I]] = &LiveIns[I];
+ }
+
+ for (auto *MBB : BlockOrders) {
+ Worklist.push(BBToOrder[MBB]);
+ OnWorklist.insert(MBB);
+ }
+
+ // Iterate over all the blocks we selected, propagating variable values.
+ bool FirstTrip = true;
+ SmallPtrSet<const MachineBasicBlock *, 16> VLOCVisited;
+ while (!Worklist.empty() || !Pending.empty()) {
+ while (!Worklist.empty()) {
+ auto *MBB = OrderToBB[Worklist.top()];
+ CurBB = MBB->getNumber();
+ Worklist.pop();
+
+ DenseMap<DebugVariable, DbgValue> JoinedInLocs;
+
+ // Join values from predecessors. Updates LiveInIdx, and writes output
+ // into JoinedInLocs.
+ bool InLocsChanged, DowngradeOccurred;
+ std::tie(InLocsChanged, DowngradeOccurred) = vlocJoin(
+ *MBB, LiveOutIdx, LiveInIdx, (FirstTrip) ? &VLOCVisited : nullptr,
+ CurBB, VarsWeCareAbout, MOutLocs, MInLocs, InScopeBlocks,
+ BlocksToExplore, JoinedInLocs);
+
+ bool FirstVisit = VLOCVisited.insert(MBB).second;
+
+ // Always explore transfer function if inlocs changed, or if we've not
+ // visited this block before.
+ InLocsChanged |= FirstVisit;
+
+ // If a downgrade occurred, book us in for re-examination on the next
+ // iteration.
+ if (DowngradeOccurred && OnPending.insert(MBB).second)
+ Pending.push(BBToOrder[MBB]);
+
+ if (!InLocsChanged)
+ continue;
+
+ // Do transfer function.
+ auto &VTracker = AllTheVLocs[MBB->getNumber()];
+ for (auto &Transfer : VTracker.Vars) {
+ // Is this var we're mangling in this scope?
+ if (VarsWeCareAbout.count(Transfer.first)) {
+ // Erase on empty transfer (DBG_VALUE $noreg).
+ if (Transfer.second.Kind == DbgValue::Undef) {
+ JoinedInLocs.erase(Transfer.first);
+ } else {
+ // Insert new variable value; or overwrite.
+ auto NewValuePair = std::make_pair(Transfer.first, Transfer.second);
+ auto Result = JoinedInLocs.insert(NewValuePair);
+ if (!Result.second)
+ Result.first->second = Transfer.second;
+ }
+ }
+ }
+
+ // Did the live-out locations change?
+ bool OLChanged = JoinedInLocs != *LiveOutIdx[MBB];
+
+ // If they haven't changed, there's no need to explore further.
+ if (!OLChanged)
+ continue;
+
+ // Commit to the live-out record.
+ *LiveOutIdx[MBB] = JoinedInLocs;
+
+ // We should visit all successors. Ensure we'll visit any non-backedge
+ // successors during this dataflow iteration; book backedge successors
+ // to be visited next time around.
+ for (auto s : MBB->successors()) {
+ // Ignore out of scope / not-to-be-explored successors.
+ if (LiveInIdx.find(s) == LiveInIdx.end())
+ continue;
+
+ if (BBToOrder[s] > BBToOrder[MBB]) {
+ if (OnWorklist.insert(s).second)
+ Worklist.push(BBToOrder[s]);
+ } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) {
+ Pending.push(BBToOrder[s]);
+ }
+ }
+ }
+ Worklist.swap(Pending);
+ std::swap(OnWorklist, OnPending);
+ OnPending.clear();
+ assert(Pending.empty());
+ FirstTrip = false;
+ }
+
+ // Dataflow done. Now what? Save live-ins. Ignore any that are still marked
+ // as being variable-PHIs, because those did not have their machine-PHI
+ // value confirmed. Such variable values are places that could have been
+ // PHIs, but are not.
+ for (auto *MBB : BlockOrders) {
+ auto &VarMap = *LiveInIdx[MBB];
+ for (auto &P : VarMap) {
+ if (P.second.Kind == DbgValue::Proposed ||
+ P.second.Kind == DbgValue::NoVal)
+ continue;
+ Output[MBB->getNumber()].push_back(P);
+ }
+ }
+
+ BlockOrders.clear();
+ BlocksToExplore.clear();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void InstrRefBasedLDV::dump_mloc_transfer(
+ const MLocTransferMap &mloc_transfer) const {
+ for (auto &P : mloc_transfer) {
+ std::string foo = MTracker->LocIdxToName(P.first);
+ std::string bar = MTracker->IDAsString(P.second);
+ dbgs() << "Loc " << foo << " --> " << bar << "\n";
+ }
+}
+#endif
+
+void InstrRefBasedLDV::emitLocations(
+ MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MInLocs,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering) {
+ TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs);
+ unsigned NumLocs = MTracker->getNumLocs();
+
+ // For each block, load in the machine value locations and variable value
+ // live-ins, then step through each instruction in the block. New DBG_VALUEs
+ // to be inserted will be created along the way.
+ for (MachineBasicBlock &MBB : MF) {
+ unsigned bbnum = MBB.getNumber();
+ MTracker->reset();
+ MTracker->loadFromArray(MInLocs[bbnum], bbnum);
+ TTracker->loadInlocs(MBB, MInLocs[bbnum], SavedLiveIns[MBB.getNumber()],
+ NumLocs);
+
+ CurBB = bbnum;
+ CurInst = 1;
+ for (auto &MI : MBB) {
+ process(MI);
+ TTracker->checkInstForNewValues(CurInst, MI.getIterator());
+ ++CurInst;
+ }
+ }
+
+ // We have to insert DBG_VALUEs in a consistent order, otherwise they appeaer
+ // in DWARF in different orders. Use the order that they appear when walking
+ // through each block / each instruction, stored in AllVarsNumbering.
+ auto OrderDbgValues = [&](const MachineInstr *A,
+ const MachineInstr *B) -> bool {
+ DebugVariable VarA(A->getDebugVariable(), A->getDebugExpression(),
+ A->getDebugLoc()->getInlinedAt());
+ DebugVariable VarB(B->getDebugVariable(), B->getDebugExpression(),
+ B->getDebugLoc()->getInlinedAt());
+ return AllVarsNumbering.find(VarA)->second <
+ AllVarsNumbering.find(VarB)->second;
+ };
+
+ // Go through all the transfers recorded in the TransferTracker -- this is
+ // both the live-ins to a block, and any movements of values that happen
+ // in the middle.
+ for (auto &P : TTracker->Transfers) {
+ // Sort them according to appearance order.
+ llvm::sort(P.Insts, OrderDbgValues);
+ // Insert either before or after the designated point...
+ if (P.MBB) {
+ MachineBasicBlock &MBB = *P.MBB;
+ for (auto *MI : P.Insts) {
+ MBB.insert(P.Pos, MI);
+ }
+ } else {
+ MachineBasicBlock &MBB = *P.Pos->getParent();
+ for (auto *MI : P.Insts) {
+ MBB.insertAfter(P.Pos, MI);
+ }
+ }
+ }
+}
+
+void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
+ // Build some useful data structures.
+ auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
+ if (const DebugLoc &DL = MI.getDebugLoc())
+ return DL.getLine() != 0;
+ return false;
+ };
+ // Collect a set of all the artificial blocks.
+ for (auto &MBB : MF)
+ if (none_of(MBB.instrs(), hasNonArtificialLocation))
+ ArtificialBlocks.insert(&MBB);
+
+ // Compute mappings of block <=> RPO order.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ unsigned int RPONumber = 0;
+ for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+ OrderToBB[RPONumber] = *RI;
+ BBToOrder[*RI] = RPONumber;
+ BBNumToRPO[(*RI)->getNumber()] = RPONumber;
+ ++RPONumber;
+ }
+}
+
+/// Calculate the liveness information for the given machine function and
+/// extend ranges across basic blocks.
+bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
+ TargetPassConfig *TPC) {
+ // No subprogram means this function contains no debuginfo.
+ if (!MF.getFunction().getSubprogram())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
+ this->TPC = TPC;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TFI = MF.getSubtarget().getFrameLowering();
+ TFI->getCalleeSaves(MF, CalleeSavedRegs);
+ LS.initialize(MF);
+
+ MTracker =
+ new MLocTracker(MF, *TII, *TRI, *MF.getSubtarget().getTargetLowering());
+ VTracker = nullptr;
+ TTracker = nullptr;
+
+ SmallVector<MLocTransferMap, 32> MLocTransfer;
+ SmallVector<VLocTracker, 8> vlocs;
+ LiveInsT SavedLiveIns;
+
+ int MaxNumBlocks = -1;
+ for (auto &MBB : MF)
+ MaxNumBlocks = std::max(MBB.getNumber(), MaxNumBlocks);
+ assert(MaxNumBlocks >= 0);
+ ++MaxNumBlocks;
+
+ MLocTransfer.resize(MaxNumBlocks);
+ vlocs.resize(MaxNumBlocks);
+ SavedLiveIns.resize(MaxNumBlocks);
+
+ initialSetup(MF);
+
+ produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks);
+
+ // Allocate and initialize two array-of-arrays for the live-in and live-out
+ // machine values. The outer dimension is the block number; while the inner
+ // dimension is a LocIdx from MLocTracker.
+ ValueIDNum **MOutLocs = new ValueIDNum *[MaxNumBlocks];
+ ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks];
+ unsigned NumLocs = MTracker->getNumLocs();
+ for (int i = 0; i < MaxNumBlocks; ++i) {
+ MOutLocs[i] = new ValueIDNum[NumLocs];
+ MInLocs[i] = new ValueIDNum[NumLocs];
+ }
+
+ // Solve the machine value dataflow problem using the MLocTransfer function,
+ // storing the computed live-ins / live-outs into the array-of-arrays. We use
+ // both live-ins and live-outs for decision making in the variable value
+ // dataflow problem.
+ mlocDataflow(MInLocs, MOutLocs, MLocTransfer);
+
+ // Walk back through each block / instruction, collecting DBG_VALUE
+ // instructions and recording what machine value their operands refer to.
+ for (auto &OrderPair : OrderToBB) {
+ MachineBasicBlock &MBB = *OrderPair.second;
+ CurBB = MBB.getNumber();
+ VTracker = &vlocs[CurBB];
+ VTracker->MBB = &MBB;
+ MTracker->loadFromArray(MInLocs[CurBB], CurBB);
+ CurInst = 1;
+ for (auto &MI : MBB) {
+ process(MI);
+ ++CurInst;
+ }
+ MTracker->reset();
+ }
+
+ // Number all variables in the order that they appear, to be used as a stable
+ // insertion order later.
+ DenseMap<DebugVariable, unsigned> AllVarsNumbering;
+
+ // Map from one LexicalScope to all the variables in that scope.
+ DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>> ScopeToVars;
+
+ // Map from One lexical scope to all blocks in that scope.
+ DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>>
+ ScopeToBlocks;
+
+ // Store a DILocation that describes a scope.
+ DenseMap<const LexicalScope *, const DILocation *> ScopeToDILocation;
+
+ // To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise
+ // the order is unimportant, it just has to be stable.
+ for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
+ auto *MBB = OrderToBB[I];
+ auto *VTracker = &vlocs[MBB->getNumber()];
+ // Collect each variable with a DBG_VALUE in this block.
+ for (auto &idx : VTracker->Vars) {
+ const auto &Var = idx.first;
+ const DILocation *ScopeLoc = VTracker->Scopes[Var];
+ assert(ScopeLoc != nullptr);
+ auto *Scope = LS.findLexicalScope(ScopeLoc);
+
+ // No insts in scope -> shouldn't have been recorded.
+ assert(Scope != nullptr);
+
+ AllVarsNumbering.insert(std::make_pair(Var, AllVarsNumbering.size()));
+ ScopeToVars[Scope].insert(Var);
+ ScopeToBlocks[Scope].insert(VTracker->MBB);
+ ScopeToDILocation[Scope] = ScopeLoc;
+ }
+ }
+
+ // OK. Iterate over scopes: there might be something to be said for
+ // ordering them by size/locality, but that's for the future. For each scope,
+ // solve the variable value problem, producing a map of variables to values
+ // in SavedLiveIns.
+ for (auto &P : ScopeToVars) {
+ vlocDataflow(P.first, ScopeToDILocation[P.first], P.second,
+ ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
+ vlocs);
+ }
+
+ // Using the computed value locations and variable values for each block,
+ // create the DBG_VALUE instructions representing the extended variable
+ // locations.
+ emitLocations(MF, SavedLiveIns, MInLocs, AllVarsNumbering);
+
+ for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) {
+ delete[] MOutLocs[Idx];
+ delete[] MInLocs[Idx];
+ }
+ delete[] MOutLocs;
+ delete[] MInLocs;
+
+ // Did we actually make any changes? If we created any DBG_VALUEs, then yes.
+ bool Changed = TTracker->Transfers.size() != 0;
+
+ delete MTracker;
+ delete TTracker;
+ MTracker = nullptr;
+ VTracker = nullptr;
+ TTracker = nullptr;
+
+ ArtificialBlocks.clear();
+ OrderToBB.clear();
+ BBToOrder.clear();
+ BBNumToRPO.clear();
+ DebugInstrNumToInstr.clear();
+
+ return Changed;
+}
+
+LDVImpl *llvm::makeInstrRefBasedLiveDebugValues() {
+ return new InstrRefBasedLDV();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
new file mode 100644
index 000000000000..770c46ec8436
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -0,0 +1,97 @@
+//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LiveDebugValues.h"
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+/// \file LiveDebugValues.cpp
+///
+/// The LiveDebugValues pass extends the range of variable locations
+/// (specified by DBG_VALUE instructions) from single blocks to successors
+/// and any other code locations where the variable location is valid.
+/// There are currently two implementations: the "VarLoc" implementation
+/// explicitly tracks the location of a variable, while the "InstrRef"
+/// implementation tracks the values defined by instructions through locations.
+///
+/// This file implements neither; it merely registers the pass, allows the
+/// user to pick which implementation will be used to propagate variable
+/// locations.
+
+#define DEBUG_TYPE "livedebugvalues"
+
+using namespace llvm;
+
+/// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or
+/// InstrRefBasedLDV to perform location propagation, via the LDVImpl
+/// base class.
+class LiveDebugValues : public MachineFunctionPass {
+public:
+ static char ID;
+
+ LiveDebugValues();
+ ~LiveDebugValues() {
+ if (TheImpl)
+ delete TheImpl;
+ }
+
+ /// Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ LDVImpl *TheImpl;
+ TargetPassConfig *TPC;
+};
+
+char LiveDebugValues::ID = 0;
+
+char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
+
+INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false,
+ false)
+
+/// Default construct and initialize the pass.
+LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
+ initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
+ TheImpl = nullptr;
+}
+
+bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ if (!TheImpl) {
+ TPC = getAnalysisIfAvailable<TargetPassConfig>();
+
+ bool InstrRefBased = false;
+ if (TPC) {
+ auto &TM = TPC->getTM<TargetMachine>();
+ InstrRefBased = TM.Options.ValueTrackingVariableLocations;
+ }
+
+ if (InstrRefBased)
+ TheImpl = llvm::makeInstrRefBasedLiveDebugValues();
+ else
+ TheImpl = llvm::makeVarLocBasedLiveDebugValues();
+ }
+
+ return TheImpl->ExtendRanges(MF, TPC);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
new file mode 100644
index 000000000000..6b05bc68d74d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -0,0 +1,32 @@
+//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+namespace llvm {
+
+// Inline namespace for types / symbols shared between different
+// LiveDebugValues implementations.
+inline namespace SharedLiveDebugValues {
+
+// Expose a base class for LiveDebugValues interfaces to inherit from. This
+// allows the generic LiveDebugValues pass handles to call into the
+// implementation.
+class LDVImpl {
+public:
+ virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) = 0;
+ virtual ~LDVImpl() {}
+};
+
+} // namespace SharedLiveDebugValues
+
+// Factory functions for LiveDebugValues implementations.
+extern LDVImpl *makeVarLocBasedLiveDebugValues();
+extern LDVImpl *makeInstrRefBasedLiveDebugValues();
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 07a275b546f6..e2daa46fe6b9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -1,4 +1,4 @@
-//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
+//===- VarLocBasedImpl.cpp - Tracking Debug Value MIs with VarLoc class----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
///
-/// \file LiveDebugValues.cpp
+/// \file VarLocBasedImpl.cpp
///
/// LiveDebugValues is an optimistic "available expressions" dataflow
/// algorithm. The set of expressions is the set of machine locations
@@ -17,7 +17,12 @@
/// DebugVariable, and continues until that location is clobbered or
/// re-specified by a different DBG_VALUE for the same DebugVariable.
///
-/// The cannonical "available expressions" problem doesn't have expression
+/// The output of LiveDebugValues is additional DBG_VALUE instructions,
+/// placed to extend variable locations as far they're available. This file
+/// and the VarLocBasedLDV class is an implementation that explicitly tracks
+/// locations, using the VarLoc class.
+///
+/// The canonical "available expressions" problem doesn't have expression
/// clobbering, instead when a variable is re-assigned, any expressions using
/// that variable get invalidated. LiveDebugValues can map onto "available
/// expressions" by having every register represented by a variable, which is
@@ -101,6 +106,8 @@
///
//===----------------------------------------------------------------------===//
+#include "LiveDebugValues.h"
+
#include "llvm/ADT/CoalescingBitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -138,6 +145,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
@@ -264,11 +272,12 @@ struct LocIndex {
}
};
-class LiveDebugValues : public MachineFunctionPass {
+class VarLocBasedLDV : public LDVImpl {
private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
const TargetFrameLowering *TFI;
+ TargetPassConfig *TPC;
BitVector CalleeSavedRegs;
LexicalScopes LS;
VarLocSet::Allocator Alloc;
@@ -284,7 +293,7 @@ private:
// register and an offset.
struct SpillLoc {
unsigned SpillBase;
- int SpillOffset;
+ StackOffset SpillOffset;
bool operator==(const SpillLoc &Other) const {
return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset;
}
@@ -315,21 +324,20 @@ private:
/// The value location. Stored separately to avoid repeatedly
/// extracting it from MI.
- union {
+ union LocUnion {
uint64_t RegNo;
SpillLoc SpillLocation;
uint64_t Hash;
int64_t Immediate;
const ConstantFP *FPImm;
const ConstantInt *CImm;
+ LocUnion() : Hash(0) {}
} Loc;
VarLoc(const MachineInstr &MI, LexicalScopes &LS)
: Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt()),
Expr(MI.getDebugExpression()), MI(MI) {
- static_assert((sizeof(Loc) == sizeof(uint64_t)),
- "hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
if (int RegNo = isDbgValueDescribedByReg(MI)) {
@@ -405,7 +413,7 @@ private:
/// Take the variable described by DBG_VALUE MI, and create a VarLoc
/// locating it in the specified spill location.
static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase,
- int SpillOffset, LexicalScopes &LS) {
+ StackOffset SpillOffset, LexicalScopes &LS) {
VarLoc VL(MI, LS);
assert(VL.Kind == RegisterKind);
VL.Kind = SpillLocKind;
@@ -442,7 +450,8 @@ private:
// Use the original DBG_VALUEs expression to build the spilt location
// on top of. FIXME: spill locations created before this pass runs
// are not recognized, and not handled here.
- auto *SpillExpr = DIExpression::prepend(
+ auto *TRI = MF.getSubtarget().getRegisterInfo();
+ auto *SpillExpr = TRI->prependOffsetExpression(
DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset);
unsigned Base = Loc.SpillLocation.SpillBase;
return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr);
@@ -457,7 +466,7 @@ private:
llvm_unreachable(
"Tried to produce DBG_VALUE for invalid or backup VarLoc");
}
- llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum");
+ llvm_unreachable("Unrecognized VarLocBasedLDV.VarLoc.Kind enum");
}
/// Is the Loc field a constant or constant object?
@@ -511,7 +520,9 @@ private:
break;
case SpillLocKind:
Out << printReg(Loc.SpillLocation.SpillBase, TRI);
- Out << "[" << Loc.SpillLocation.SpillOffset << "]";
+ Out << "[" << Loc.SpillLocation.SpillOffset.getFixed() << " + "
+ << Loc.SpillLocation.SpillOffset.getScalable() << "x vscale"
+ << "]";
break;
case ImmediateKind:
Out << Loc.Immediate;
@@ -534,14 +545,46 @@ private:
#endif
bool operator==(const VarLoc &Other) const {
- return Kind == Other.Kind && Var == Other.Var &&
- Loc.Hash == Other.Loc.Hash && Expr == Other.Expr;
+ if (Kind != Other.Kind || !(Var == Other.Var) || Expr != Other.Expr)
+ return false;
+
+ switch (Kind) {
+ case SpillLocKind:
+ return Loc.SpillLocation == Other.Loc.SpillLocation;
+ case RegisterKind:
+ case ImmediateKind:
+ case EntryValueKind:
+ case EntryValueBackupKind:
+ case EntryValueCopyBackupKind:
+ return Loc.Hash == Other.Loc.Hash;
+ default:
+ llvm_unreachable("Invalid kind");
+ }
}
/// This operator guarantees that VarLocs are sorted by Variable first.
bool operator<(const VarLoc &Other) const {
- return std::tie(Var, Kind, Loc.Hash, Expr) <
- std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr);
+ switch (Kind) {
+ case SpillLocKind:
+ return std::make_tuple(Var, Kind, Loc.SpillLocation.SpillBase,
+ Loc.SpillLocation.SpillOffset.getFixed(),
+ Loc.SpillLocation.SpillOffset.getScalable(),
+ Expr) <
+ std::make_tuple(
+ Other.Var, Other.Kind, Other.Loc.SpillLocation.SpillBase,
+ Other.Loc.SpillLocation.SpillOffset.getFixed(),
+ Other.Loc.SpillLocation.SpillOffset.getScalable(),
+ Other.Expr);
+ case RegisterKind:
+ case ImmediateKind:
+ case EntryValueKind:
+ case EntryValueBackupKind:
+ case EntryValueCopyBackupKind:
+ return std::tie(Var, Kind, Loc.Hash, Expr) <
+ std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr);
+ default:
+ llvm_unreachable("Invalid kind");
+ }
}
};
@@ -793,30 +836,18 @@ private:
/// had their instruction creation deferred.
void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs);
- bool ExtendRanges(MachineFunction &MF);
+ bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
public:
- static char ID;
-
/// Default construct and initialize the pass.
- LiveDebugValues();
+ VarLocBasedLDV();
- /// Tell the pass manager which passes we depend on and what
- /// information we preserve.
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
+ ~VarLocBasedLDV();
/// Print to ostream with a message.
void printVarLocInMBB(const MachineFunction &MF, const VarLocInMBB &V,
const VarLocMap &VarLocIDs, const char *msg,
raw_ostream &Out) const;
-
- /// Calculate the liveness information for the given machine function.
- bool runOnMachineFunction(MachineFunction &MF) override;
};
} // end anonymous namespace
@@ -825,31 +856,16 @@ public:
// Implementation
//===----------------------------------------------------------------------===//
-char LiveDebugValues::ID = 0;
-
-char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
-
-INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis",
- false, false)
+VarLocBasedLDV::VarLocBasedLDV() { }
-/// Default construct and initialize the pass.
-LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
- initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
-}
-
-/// Tell the pass manager which passes we depend on and what information we
-/// preserve.
-void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
-}
+VarLocBasedLDV::~VarLocBasedLDV() { }
/// Erase a variable from the set of open ranges, and additionally erase any
-/// fragments that may overlap it. If the VarLoc is a buckup location, erase
+/// fragments that may overlap it. If the VarLoc is a backup location, erase
/// the variable from the EntryValuesBackupVars set, indicating we should stop
/// tracking its backup entry location. Otherwise, if the VarLoc is primary
/// location, erase the variable from the Vars set.
-void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) {
+void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) {
// Erasure helper.
auto DoErase = [VL, this](DebugVariable VarToErase) {
auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
@@ -875,15 +891,15 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) {
auto MapIt = OverlappingFragments.find({Var.getVariable(), ThisFragment});
if (MapIt != OverlappingFragments.end()) {
for (auto Fragment : MapIt->second) {
- LiveDebugValues::OptFragmentInfo FragmentHolder;
+ VarLocBasedLDV::OptFragmentInfo FragmentHolder;
if (!DebugVariable::isDefaultFragment(Fragment))
- FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment);
+ FragmentHolder = VarLocBasedLDV::OptFragmentInfo(Fragment);
DoErase({Var.getVariable(), FragmentHolder, Var.getInlinedAt()});
}
}
}
-void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet,
+void VarLocBasedLDV::OpenRangesSet::erase(const VarLocSet &KillSet,
const VarLocMap &VarLocIDs) {
VarLocs.intersectWithComplement(KillSet);
for (uint64_t ID : KillSet) {
@@ -893,7 +909,7 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet,
}
}
-void LiveDebugValues::OpenRangesSet::insert(LocIndex VarLocID,
+void VarLocBasedLDV::OpenRangesSet::insert(LocIndex VarLocID,
const VarLoc &VL) {
auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
VarLocs.set(VarLocID.getAsRawInteger());
@@ -903,7 +919,7 @@ void LiveDebugValues::OpenRangesSet::insert(LocIndex VarLocID,
/// Return the Loc ID of an entry value backup location, if it exists for the
/// variable.
llvm::Optional<LocIndex>
-LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
+VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
auto It = EntryValuesBackupVars.find(Var);
if (It != EntryValuesBackupVars.end())
return It->second;
@@ -911,7 +927,7 @@ LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
return llvm::None;
}
-void LiveDebugValues::collectIDsForRegs(VarLocSet &Collected,
+void VarLocBasedLDV::collectIDsForRegs(VarLocSet &Collected,
const DefinedRegsSet &Regs,
const VarLocSet &CollectFrom) const {
assert(!Regs.empty() && "Nothing to collect");
@@ -937,7 +953,7 @@ void LiveDebugValues::collectIDsForRegs(VarLocSet &Collected,
}
}
-void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom,
+void VarLocBasedLDV::getUsedRegs(const VarLocSet &CollectFrom,
SmallVectorImpl<uint32_t> &UsedRegs) const {
// All register-based VarLocs are assigned indices greater than or equal to
// FirstRegIndex.
@@ -967,7 +983,7 @@ void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom,
//===----------------------------------------------------------------------===//
#ifndef NDEBUG
-void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
+void VarLocBasedLDV::printVarLocInMBB(const MachineFunction &MF,
const VarLocInMBB &V,
const VarLocMap &VarLocIDs,
const char *msg,
@@ -991,8 +1007,8 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
}
#endif
-LiveDebugValues::VarLoc::SpillLoc
-LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
+VarLocBasedLDV::VarLoc::SpillLoc
+VarLocBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
auto MMOI = MI.memoperands_begin();
@@ -1002,14 +1018,14 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
const MachineBasicBlock *MBB = MI.getParent();
Register Reg;
- int Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
return {Reg, Offset};
}
/// Try to salvage the debug entry value if we encounter a new debug value
/// describing the same parameter, otherwise stop tracking the value. Return
/// true if we should stop tracking the entry value, otherwise return false.
-bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
+bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
const VarLoc &EntryVL) {
@@ -1061,7 +1077,7 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
-void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
+void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs) {
if (!MI.isDebugValue())
@@ -1112,7 +1128,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
}
/// Turn the entry value backup locations into primary locations.
-void LiveDebugValues::emitEntryValues(MachineInstr &MI,
+void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers,
@@ -1150,7 +1166,7 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI,
/// new VarLoc. If \p NewReg is different than default zero value then the
/// new location will be register location created by the copy like instruction,
/// otherwise it is variable's location on the stack.
-void LiveDebugValues::insertTransferDebugPair(
+void VarLocBasedLDV::insertTransferDebugPair(
MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
VarLocMap &VarLocIDs, LocIndex OldVarID, TransferKind Kind,
Register NewReg) {
@@ -1217,7 +1233,7 @@ void LiveDebugValues::insertTransferDebugPair(
}
/// A definition of a register may mark the end of a range.
-void LiveDebugValues::transferRegisterDef(
+void VarLocBasedLDV::transferRegisterDef(
MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
TransferMap &Transfers) {
@@ -1278,14 +1294,14 @@ void LiveDebugValues::transferRegisterDef(
collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs());
OpenRanges.erase(KillSet, VarLocIDs);
- if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ if (TPC) {
auto &TM = TPC->getTM<TargetMachine>();
if (TM.Options.ShouldEmitDebugEntryValues())
emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet);
}
}
-bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
+bool VarLocBasedLDV::isSpillInstruction(const MachineInstr &MI,
MachineFunction *MF) {
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
@@ -1298,7 +1314,7 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
return true;
}
-bool LiveDebugValues::isLocationSpill(const MachineInstr &MI,
+bool VarLocBasedLDV::isLocationSpill(const MachineInstr &MI,
MachineFunction *MF, Register &Reg) {
if (!isSpillInstruction(MI, MF))
return false;
@@ -1338,8 +1354,8 @@ bool LiveDebugValues::isLocationSpill(const MachineInstr &MI,
return false;
}
-Optional<LiveDebugValues::VarLoc::SpillLoc>
-LiveDebugValues::isRestoreInstruction(const MachineInstr &MI,
+Optional<VarLocBasedLDV::VarLoc::SpillLoc>
+VarLocBasedLDV::isRestoreInstruction(const MachineInstr &MI,
MachineFunction *MF, Register &Reg) {
if (!MI.hasOneMemOperand())
return None;
@@ -1360,7 +1376,7 @@ LiveDebugValues::isRestoreInstruction(const MachineInstr &MI,
/// the DBG_VALUE without inserting it and keep track of it in \p Transfers.
/// It will be inserted into the BB when we're done iterating over the
/// instructions.
-void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
+void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers) {
@@ -1449,7 +1465,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
/// If \p MI is a register copy instruction, that copies a previously tracked
/// value from one register to another register that is callee saved, we
/// create new DBG_VALUE instruction described with copy destination register.
-void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
+void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers) {
@@ -1519,7 +1535,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
}
/// Terminate all open ranges at the end of the current basic block.
-bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
+bool VarLocBasedLDV::transferTerminator(MachineBasicBlock *CurMBB,
OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs,
const VarLocMap &VarLocIDs) {
@@ -1551,7 +1567,7 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
/// Variable which are known to exist.
/// \param OverlappingFragments The overlap map being constructed, from one
/// Var/Fragment pair to a vector of fragments known to overlap.
-void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
+void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI,
VarToFragments &SeenFragments,
OverlapMap &OverlappingFragments) {
DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
@@ -1603,7 +1619,7 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
}
/// This routine creates OpenRanges.
-void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers) {
transferDebugValue(MI, OpenRanges, VarLocIDs);
transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers);
@@ -1614,7 +1630,7 @@ void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
/// This routine joins the analysis results of all incoming edges in @MBB by
/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
/// source variable in all the predecessors of @MBB reside in the same location.
-bool LiveDebugValues::join(
+bool VarLocBasedLDV::join(
MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
@@ -1697,7 +1713,7 @@ bool LiveDebugValues::join(
return Changed;
}
-void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
+void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs,
VarLocMap &VarLocIDs) {
// PendingInLocs records all locations propagated into blocks, which have
// not had DBG_VALUE insts created. Go through and create those insts now.
@@ -1721,7 +1737,7 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
}
}
-bool LiveDebugValues::isEntryValueCandidate(
+bool VarLocBasedLDV::isEntryValueCandidate(
const MachineInstr &MI, const DefinedRegsSet &DefinedRegs) const {
assert(MI.isDebugValue() && "This must be DBG_VALUE.");
@@ -1770,11 +1786,11 @@ static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs,
/// This routine records the entry values of function parameters. The values
/// could be used as backup values. If we loose the track of some unmodified
/// parameters, the backup values will be used as a primary locations.
-void LiveDebugValues::recordEntryValue(const MachineInstr &MI,
+void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI,
const DefinedRegsSet &DefinedRegs,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs) {
- if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ if (TPC) {
auto &TM = TPC->getTM<TargetMachine>();
if (!TM.Options.ShouldEmitDebugEntryValues())
return;
@@ -1800,9 +1816,25 @@ void LiveDebugValues::recordEntryValue(const MachineInstr &MI,
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
-bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
+bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
+ if (!MF.getFunction().getSubprogram())
+ // VarLocBaseLDV will already have removed all DBG_VALUEs.
+ return false;
+
+ // Skip functions from NoDebug compilation units.
+ if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return false;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TFI = MF.getSubtarget().getFrameLowering();
+ TFI->getCalleeSaves(MF, CalleeSavedRegs);
+ this->TPC = TPC;
+ LS.initialize(MF);
+
bool Changed = false;
bool OLChanged = false;
bool MBBJoined = false;
@@ -1840,8 +1872,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
MachineBasicBlock &First_MBB = *(MF.begin());
for (auto &MI : First_MBB) {
collectRegDefs(MI, DefinedRegs, TRI);
- if (MI.isDebugValue())
- recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs);
+ if (MI.isDebugValue())
+ recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs);
}
// Initialize per-block structures and scan for fragment overlaps.
@@ -1878,7 +1910,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
if (MI.isDebugValue())
++NumInputDbgValues;
if (NumInputDbgValues > InputDbgValueLimit) {
- LLVM_DEBUG(dbgs() << "Disabling LiveDebugValues: " << MF.getName()
+ LLVM_DEBUG(dbgs() << "Disabling VarLocBasedLDV: " << MF.getName()
<< " has " << RPONumber << " basic blocks and "
<< NumInputDbgValues
<< " input DBG_VALUEs, exceeding limits.\n");
@@ -1955,22 +1987,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
return Changed;
}
-bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
- if (!MF.getFunction().getSubprogram())
- // LiveDebugValues will already have removed all DBG_VALUEs.
- return false;
-
- // Skip functions from NoDebug compilation units.
- if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
- DICompileUnit::NoDebug)
- return false;
-
- TRI = MF.getSubtarget().getRegisterInfo();
- TII = MF.getSubtarget().getInstrInfo();
- TFI = MF.getSubtarget().getFrameLowering();
- TFI->getCalleeSaves(MF, CalleeSavedRegs);
- LS.initialize(MF);
-
- bool Changed = ExtendRanges(MF);
- return Changed;
+LDVImpl *
+llvm::makeVarLocBasedLiveDebugValues()
+{
+ return new VarLocBasedLDV();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 158e873370b1..2325341070a3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -54,7 +54,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -96,6 +95,7 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) {
enum : unsigned { UndefLocNo = ~0U };
+namespace {
/// Describes a debug variable value by location number and expression along
/// with some flags about the original usage of the location.
class DbgVariableValue {
@@ -136,6 +136,7 @@ private:
unsigned WasIndirect : 1;
const DIExpression *Expression = nullptr;
};
+} // namespace
/// Map of where a user value is live to that value.
using LocMap = IntervalMap<SlotIndex, DbgVariableValue, 4>;
@@ -394,6 +395,11 @@ class LDVImpl {
LiveIntervals *LIS;
const TargetRegisterInfo *TRI;
+ using StashedInstrRef =
+ std::tuple<unsigned, unsigned, const DILocalVariable *,
+ const DIExpression *, DebugLoc>;
+ std::map<SlotIndex, std::vector<StashedInstrRef>> StashedInstrReferences;
+
/// Whether emitDebugValues is called.
bool EmitDone = false;
@@ -430,6 +436,16 @@ class LDVImpl {
/// \returns True if the DBG_VALUE instruction should be deleted.
bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
+ /// Track a DBG_INSTR_REF. This needs to be removed from the MachineFunction
+ /// during regalloc -- but there's no need to maintain live ranges, as we
+ /// refer to a value rather than a location.
+ ///
+ /// \param MI DBG_INSTR_REF instruction
+ /// \param Idx Last valid SlotIndex before instruction
+ ///
+ /// \returns True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx);
+
/// Add DBG_LABEL instruction to UserLabel.
///
/// \param MI DBG_LABEL instruction
@@ -458,6 +474,7 @@ public:
/// Release all memory.
void clear() {
MF = nullptr;
+ StashedInstrReferences.clear();
userValues.clear();
userLabels.clear();
virtRegToEqClass.clear();
@@ -665,6 +682,19 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
return true;
}
+bool LDVImpl::handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx) {
+ assert(MI.isDebugRef());
+ unsigned InstrNum = MI.getOperand(0).getImm();
+ unsigned OperandNum = MI.getOperand(1).getImm();
+ auto *Var = MI.getDebugVariable();
+ auto *Expr = MI.getDebugExpression();
+ auto &DL = MI.getDebugLoc();
+ StashedInstrRef Stashed =
+ std::make_tuple(InstrNum, OperandNum, Var, Expr, DL);
+ StashedInstrReferences[Idx].push_back(Stashed);
+ return true;
+}
+
bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
// DBG_LABEL label
if (MI.getNumOperands() != 1 || !MI.getOperand(0).isMetadata()) {
@@ -712,6 +742,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
// Only handle DBG_VALUE in handleDebugValue(). Skip all other
// kinds of debug instructions.
if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) ||
+ (MBBI->isDebugRef() && handleDebugInstrRef(*MBBI, Idx)) ||
(MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) {
MBBI = MBB->erase(MBBI);
Changed = true;
@@ -775,12 +806,12 @@ void UserValue::addDefsFromCopies(
if (Kills.empty())
return;
// Don't track copies from physregs, there are too many uses.
- if (!Register::isVirtualRegister(LI->reg))
+ if (!Register::isVirtualRegister(LI->reg()))
return;
// Collect all the (vreg, valno) pairs that are copies of LI.
SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
- for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) {
+ for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg())) {
MachineInstr *MI = MO.getParent();
// Copies of the full value.
if (MO.getSubReg() || !MI->isCopy())
@@ -991,10 +1022,10 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
return Changed;
}
-static void removeDebugValues(MachineFunction &mf) {
+static void removeDebugInstrs(MachineFunction &mf) {
for (MachineBasicBlock &MBB : mf) {
for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) {
- if (!MBBI->isDebugValue()) {
+ if (!MBBI->isDebugInstr()) {
++MBBI;
continue;
}
@@ -1007,7 +1038,7 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
if (!EnableLDV)
return false;
if (!mf.getFunction().getSubprogram()) {
- removeDebugValues(mf);
+ removeDebugInstrs(mf);
return false;
}
if (!pImpl)
@@ -1064,7 +1095,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
LII->start < LocMapI.stop()) {
// Overlapping correct location. Allocate NewLocNo now.
if (NewLocNo == UndefLocNo) {
- MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
+ MachineOperand MO = MachineOperand::CreateReg(LI->reg(), false);
MO.setSubReg(locations[OldLocNo].getSubReg());
NewLocNo = getLocationNo(MO);
DidChange = true;
@@ -1434,6 +1465,28 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
LLVM_DEBUG(userLabel->print(dbgs(), TRI));
userLabel->emitDebugLabel(*LIS, *TII);
}
+
+ LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n");
+
+ // Re-insert any DBG_INSTR_REFs back in the position they were. Ordering
+ // is preserved by vector.
+ auto Slots = LIS->getSlotIndexes();
+ const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
+ for (auto &P : StashedInstrReferences) {
+ const SlotIndex &Idx = P.first;
+ auto *MBB = Slots->getMBBFromIndex(Idx);
+ MachineBasicBlock::iterator insertPos = findInsertLocation(MBB, Idx, *LIS);
+ for (auto &Stashed : P.second) {
+ auto MIB = BuildMI(*MF, std::get<4>(Stashed), RefII);
+ MIB.addImm(std::get<0>(Stashed));
+ MIB.addImm(std::get<1>(Stashed));
+ MIB.addMetadata(std::get<2>(Stashed));
+ MIB.addMetadata(std::get<3>(Stashed));
+ MachineInstr *New = MIB;
+ MBB->insert(insertPos, New);
+ }
+ }
+
EmitDone = true;
}
@@ -1442,10 +1495,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
}
-bool LiveDebugVariables::doInitialization(Module &M) {
- return Pass::doInitialization(M);
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LiveDebugVariables::dump() const {
if (pImpl)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
index 74e738ec3e56..07dd3a83866f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -56,7 +56,6 @@ private:
bool runOnMachineFunction(MachineFunction &) override;
void releaseMemory() override;
void getAnalysisUsage(AnalysisUsage &) const override;
- bool doInitialization(Module &) override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
index 930dc116205a..ce0e58772068 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
@@ -951,9 +951,9 @@ void LiveInterval::refineSubRanges(
MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
// Now that the subrange is split in half, make sure we
// only keep in the subranges the VNIs that touch the related half.
- stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI,
+ stripValuesNotDefiningMask(reg(), *MatchingRange, Matching, Indexes, TRI,
ComposeSubRegIdx);
- stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI,
+ stripValuesNotDefiningMask(reg(), SR, SR.LaneMask, Indexes, TRI,
ComposeSubRegIdx);
}
Apply(*MatchingRange);
@@ -977,11 +977,11 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
LaneBitmask LaneMask,
const MachineRegisterInfo &MRI,
const SlotIndexes &Indexes) const {
- assert(Register::isVirtualRegister(reg));
- LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg);
+ assert(Register::isVirtualRegister(reg()));
+ LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg());
assert((VRegMask & LaneMask).any());
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- for (const MachineOperand &MO : MRI.def_operands(reg)) {
+ for (const MachineOperand &MO : MRI.def_operands(reg())) {
if (!MO.isUndef())
continue;
unsigned SubReg = MO.getSubReg();
@@ -1043,12 +1043,12 @@ void LiveInterval::SubRange::print(raw_ostream &OS) const {
}
void LiveInterval::print(raw_ostream &OS) const {
- OS << printReg(reg) << ' ';
+ OS << printReg(reg()) << ' ';
super::print(OS);
// Print subranges
for (const SubRange &SR : subranges())
OS << SR;
- OS << " weight:" << weight;
+ OS << " weight:" << Weight;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1087,7 +1087,7 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
// Make sure SubRanges are fine and LaneMasks are disjunct.
LaneBitmask Mask;
- LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg)
+ LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg())
: LaneBitmask::getAll();
for (const SubRange &SR : subranges()) {
// Subrange lanemask should be disjunct to any previous subrange masks.
@@ -1361,8 +1361,9 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
MachineRegisterInfo &MRI) {
// Rewrite instructions.
- for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
- RE = MRI.reg_end(); RI != RE;) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg()),
+ RE = MRI.reg_end();
+ RI != RE;) {
MachineOperand &MO = *RI;
MachineInstr *MI = RI->getParent();
++RI;
@@ -1382,7 +1383,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
if (!VNI)
continue;
if (unsigned EqClass = getEqClass(VNI))
- MO.setReg(LIV[EqClass-1]->reg);
+ MO.setReg(LIV[EqClass - 1]->reg());
}
// Distribute subregister liveranges.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
index 30c2d74a71c5..2756086cb8b1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
@@ -60,7 +60,7 @@ void LiveIntervalCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
// Visit all def operands. If the same instruction has multiple defs of Reg,
// createDeadDef() will deduplicate.
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
- unsigned Reg = LI.reg;
+ unsigned Reg = LI.reg();
for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
if (!MO.isDef() && !MO.readsReg())
continue;
@@ -127,7 +127,7 @@ void LiveIntervalCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
}
}
resetLiveOutMap();
- extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI);
+ extendToUses(MainRange, LI.reg(), LaneBitmask::getAll(), &LI);
}
void LiveIntervalCalc::createDeadDefs(LiveRange &LR, Register Reg) {
@@ -202,4 +202,4 @@ void LiveIntervalCalc::extendToUses(LiveRange &LR, Register Reg,
// reading Reg multiple times. That is OK, extend() is idempotent.
extend(LR, UseIdx, Reg, Undefs);
}
-} \ No newline at end of file
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 43fa8f2d7157..7ccb8df4bc05 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -85,8 +85,8 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
return;
}
for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
- OS << " [" << SI.start() << ' ' << SI.stop() << "):"
- << printReg(SI.value()->reg, TRI);
+ OS << " [" << SI.start() << ' ' << SI.stop()
+ << "):" << printReg(SI.value()->reg(), TRI);
}
OS << '\n';
}
@@ -95,10 +95,20 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
// Verify the live intervals in this union and add them to the visited set.
void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
- VisitedVRegs.set(SI.value()->reg);
+ VisitedVRegs.set(SI.value()->reg());
}
#endif //!NDEBUG
+LiveInterval *LiveIntervalUnion::getOneVReg() const {
+ if (empty())
+ return nullptr;
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ // return the first valid live interval
+ return SI.value();
+ }
+ return nullptr;
+}
+
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index e8ee0599e1a2..a32b486240c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -159,7 +160,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
// Dump the virtregs.
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (hasInterval(Reg))
OS << getInterval(Reg) << '\n';
}
@@ -183,7 +184,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
}
#endif
-LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+LiveInterval *LiveIntervals::createInterval(Register reg) {
float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F;
return new LiveInterval(reg, Weight);
}
@@ -193,13 +194,13 @@ bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LICalc && "LICalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
+ LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg()));
return computeDeadValues(LI, nullptr);
}
void LiveIntervals::computeVirtRegs() {
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
LiveInterval &LI = createEmptyInterval(Reg);
@@ -225,6 +226,15 @@ void LiveIntervals::computeRegMasks() {
RegMaskBits.push_back(Mask);
}
+ // Unwinders may clobber additional registers.
+ // FIXME: This functionality can possibly be merged into
+ // MachineBasicBlock::getBeginClobberMask().
+ if (MBB.isEHPad())
+ if (auto *Mask = TRI->getCustomEHPadPreservedMask(*MBB.getParent())) {
+ RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB));
+ RegMaskBits.push_back(Mask);
+ }
+
for (const MachineInstr &MI : MBB) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
@@ -277,7 +287,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
bool IsRootReserved = true;
for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
Super.isValid(); ++Super) {
- unsigned Reg = *Super;
+ MCRegister Reg = *Super;
if (!MRI->reg_empty(Reg))
LICalc->createDeadDefs(LR, Reg);
// A register unit is considered reserved if all its roots and all their
@@ -296,7 +306,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
Super.isValid(); ++Super) {
- unsigned Reg = *Super;
+ MCRegister Reg = *Super;
if (!MRI->reg_empty(Reg))
LICalc->extendToUses(LR, Reg);
}
@@ -362,7 +372,7 @@ static void createSegmentsForValues(LiveRange &LR,
void LiveIntervals::extendSegmentsToUses(LiveRange &Segments,
ShrinkToUsesWorkList &WorkList,
- unsigned Reg, LaneBitmask LaneMask) {
+ Register Reg, LaneBitmask LaneMask) {
// Keep track of the PHIs that are in use.
SmallPtrSet<VNInfo*, 8> UsedPHIs;
// Blocks that have already been added to WorkList as live-out.
@@ -444,13 +454,13 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments,
bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n');
- assert(Register::isVirtualRegister(li->reg) &&
+ assert(Register::isVirtualRegister(li->reg()) &&
"Can only shrink virtual registers");
// Shrink subregister live ranges.
bool NeedsCleanup = false;
for (LiveInterval::SubRange &S : li->subranges()) {
- shrinkToUses(S, li->reg);
+ shrinkToUses(S, li->reg());
if (S.empty())
NeedsCleanup = true;
}
@@ -460,8 +470,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Find all the values used, including PHI kills.
ShrinkToUsesWorkList WorkList;
- // Visit all instructions reading li->reg.
- unsigned Reg = li->reg;
+ // Visit all instructions reading li->reg().
+ Register Reg = li->reg();
for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) {
if (UseMI.isDebugValue() || !UseMI.readsVirtualRegister(Reg))
continue;
@@ -514,7 +524,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// Is the register live before? Otherwise we may have to add a read-undef
// flag for subregister defs.
- unsigned VReg = LI.reg;
+ Register VReg = LI.reg();
if (MRI->shouldTrackSubRegLiveness(VReg)) {
if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
MachineInstr *MI = getInstructionFromIndex(Def);
@@ -534,7 +544,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(Def);
assert(MI && "No instruction defining live value");
- MI->addRegisterDead(LI.reg, TRI);
+ MI->addRegisterDead(LI.reg(), TRI);
if (HaveDeadDef)
MayHaveSplitComponents = true;
HaveDeadDef = true;
@@ -548,7 +558,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
return MayHaveSplitComponents;
}
-void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
+void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) {
LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n');
assert(Register::isVirtualRegister(Reg) &&
"Can only shrink virtual registers");
@@ -697,7 +707,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
LiveRange::const_iterator>, 4> SRs;
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
const LiveInterval &LI = getInterval(Reg);
@@ -868,14 +878,12 @@ float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB) {
- BlockFrequency Freq = MBFI->getBlockFreq(MBB);
- const float Scale = 1.0f / MBFI->getEntryFreq();
- return (isDef + isUse) * (Freq.getFrequency() * Scale);
+ return (isDef + isUse) * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
}
LiveRange::Segment
-LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) {
- LiveInterval& Interval = createEmptyInterval(reg);
+LiveIntervals::addSegmentToEndOfBlock(Register Reg, MachineInstr &startInst) {
+ LiveInterval &Interval = createEmptyInterval(Reg);
VNInfo *VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getVNInfoAllocator());
@@ -1030,7 +1038,8 @@ public:
// For physregs, only update the regunits that actually have a
// precomputed live range.
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
+ ++Units)
if (LiveRange *LR = getRegUnitLI(*Units))
updateRange(*LR, *Units, LaneBitmask::getNone());
}
@@ -1041,7 +1050,7 @@ public:
private:
/// Update a single live range, assuming an instruction has been moved from
/// OldIdx to NewIdx.
- void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
+ void updateRange(LiveRange &LR, Register Reg, LaneBitmask LaneMask) {
if (!Updated.insert(&LR).second)
return;
LLVM_DEBUG({
@@ -1238,7 +1247,7 @@ private:
/// Update LR to reflect an instruction has been moved upwards from OldIdx
/// to NewIdx (NewIdx < OldIdx).
- void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
+ void handleMoveUp(LiveRange &LR, Register Reg, LaneBitmask LaneMask) {
LiveRange::iterator E = LR.end();
// Segment going into OldIdx.
LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex());
@@ -1420,7 +1429,7 @@ private:
}
// Return the last use of reg between NewIdx and OldIdx.
- SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg,
+ SlotIndex findLastUseBefore(SlotIndex Before, Register Reg,
LaneBitmask LaneMask) {
if (Register::isVirtualRegister(Reg)) {
SlotIndex LastUse = Before;
@@ -1533,17 +1542,17 @@ void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart,
void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
const MachineBasicBlock::iterator End,
- const SlotIndex endIdx,
- LiveRange &LR, const unsigned Reg,
+ const SlotIndex EndIdx, LiveRange &LR,
+ const Register Reg,
LaneBitmask LaneMask) {
- LiveInterval::iterator LII = LR.find(endIdx);
+ LiveInterval::iterator LII = LR.find(EndIdx);
SlotIndex lastUseIdx;
if (LII == LR.begin()) {
// This happens when the function is called for a subregister that only
// occurs _after_ the range that is to be repaired.
return;
}
- if (LII != LR.end() && LII->start < endIdx)
+ if (LII != LR.end() && LII->start < EndIdx)
lastUseIdx = LII->end;
else
--LII;
@@ -1637,11 +1646,11 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
while (End != MBB->end() && !Indexes->hasIndex(*End))
++End;
- SlotIndex endIdx;
+ SlotIndex EndIdx;
if (End == MBB->end())
- endIdx = getMBBEndIdx(MBB).getPrevSlot();
+ EndIdx = getMBBEndIdx(MBB).getPrevSlot();
else
- endIdx = getInstructionIndex(*End);
+ EndIdx = getInstructionIndex(*End);
Indexes->repairIndexesInRange(MBB, Begin, End);
@@ -1670,13 +1679,13 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
continue;
for (LiveInterval::SubRange &S : LI.subranges())
- repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask);
+ repairOldRegInRange(Begin, End, EndIdx, S, Reg, S.LaneMask);
- repairOldRegInRange(Begin, End, endIdx, LI, Reg);
+ repairOldRegInRange(Begin, End, EndIdx, LI, Reg);
}
}
-void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
+void LiveIntervals::removePhysRegDefAt(MCRegister Reg, SlotIndex Pos) {
for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
if (LiveRange *LR = getCachedRegUnit(*Unit))
if (VNInfo *VNI = LR->getVNInfoAt(Pos))
@@ -1709,7 +1718,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
if (NumComp <= 1)
return;
LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
- unsigned Reg = LI.reg;
+ Register Reg = LI.reg();
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
for (unsigned I = 1; I < NumComp; ++I) {
Register NewVReg = MRI->createVirtualRegister(RegClass);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 9de77c19a23a..037cb5426235 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -188,7 +187,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
MachineInstr *DefMI = nullptr, *UseMI = nullptr;
// Check that there is a single def and a single use.
- for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) {
+ for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg())) {
MachineInstr *MI = MO.getParent();
if (MO.isDef()) {
if (DefMI && DefMI != MI)
@@ -224,7 +223,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
<< " into single use: " << *UseMI);
SmallVector<unsigned, 8> Ops;
- if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
+ if (UseMI->readsWritesVirtualRegister(LI->reg(), &Ops).second)
return false;
MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS);
@@ -236,7 +235,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (UseMI->shouldUpdateCallSiteInfo())
UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI);
UseMI->eraseFromParent();
- DefMI->addRegisterDead(LI->reg, nullptr);
+ DefMI->addRegisterDead(LI->reg(), nullptr);
Dead.push_back(DefMI);
++NumDCEFoldedLoads;
return true;
@@ -316,7 +315,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
else if (MOI->isDef())
- LIS.removePhysRegDefAt(Reg, Idx);
+ LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);
continue;
}
LiveInterval &LI = LIS.getInterval(Reg);
@@ -332,7 +331,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// Remove defined value.
if (MOI->isDef()) {
if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
- TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
+ TheDelegate->LRE_WillShrinkVirtReg(LI.reg());
LIS.removeVRegDefAt(LI, Idx);
if (LI.empty())
RegsToErase.push_back(Reg);
@@ -369,7 +368,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
pop_back();
DeadRemats->insert(MI);
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- MI->substituteRegister(Dest, NewLI.reg, 0, TRI);
+ MI->substituteRegister(Dest, NewLI.reg(), 0, TRI);
MI->getOperand(0).setIsDead(true);
} else {
if (TheDelegate)
@@ -409,7 +408,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
ToShrink.pop_back();
if (foldAsLoad(LI, Dead))
continue;
- unsigned VReg = LI->reg;
+ unsigned VReg = LI->reg();
if (TheDelegate)
TheDelegate->LRE_WillShrinkVirtReg(VReg);
if (!LIS.shrinkToUses(LI, &Dead))
@@ -436,15 +435,15 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
if (!SplitLIs.empty())
++NumFracRanges;
- unsigned Original = VRM ? VRM->getOriginal(VReg) : 0;
+ Register Original = VRM ? VRM->getOriginal(VReg) : Register();
for (const LiveInterval *SplitLI : SplitLIs) {
// If LI is an original interval that hasn't been split yet, make the new
// intervals their own originals instead of referring to LI. The original
// interval must contain all the split products, and LI doesn't.
if (Original != VReg && Original != 0)
- VRM->setIsSplitFromReg(SplitLI->reg, Original);
+ VRM->setIsSplitFromReg(SplitLI->reg(), Original);
if (TheDelegate)
- TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg);
+ TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg(), VReg);
}
}
}
@@ -463,14 +462,14 @@ void
LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
const MachineLoopInfo &Loops,
const MachineBlockFrequencyInfo &MBFI) {
- VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI);
+ VirtRegAuxInfo VRAI(MF, LIS, *VRM, Loops, MBFI);
for (unsigned I = 0, Size = size(); I < Size; ++I) {
LiveInterval &LI = LIS.getInterval(get(I));
- if (MRI.recomputeRegClass(LI.reg))
+ if (MRI.recomputeRegClass(LI.reg()))
LLVM_DEBUG({
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- dbgs() << "Inflated " << printReg(LI.reg) << " to "
- << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n';
+ dbgs() << "Inflated " << printReg(LI.reg()) << " to "
+ << TRI->getRegClassName(MRI.getRegClass(LI.reg())) << '\n';
});
VRAI.calculateSpillWeightAndHint(LI);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 26439a656917..7fa14fd902ef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
// If MI has side effects, it should become a barrier for code motion.
// IOM is rebuild from the next instruction to prevent later
// instructions from being moved before this MI.
- if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
+ if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() &&
+ Next != MBB.end()) {
BuildInstOrderMap(Next, IOM);
SawStore = false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 08f046420fa1..a69aa6557e46 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -78,7 +78,7 @@ void LiveRegMatrix::releaseMemory() {
template <typename Callable>
static bool foreachUnit(const TargetRegisterInfo *TRI,
- LiveInterval &VRegInterval, unsigned PhysReg,
+ LiveInterval &VRegInterval, MCRegister PhysReg,
Callable Func) {
if (VRegInterval.hasSubRanges()) {
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
@@ -101,11 +101,11 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
return false;
}
-void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
- LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) << " to "
+void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) {
+ LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to "
<< printReg(PhysReg, TRI) << ':');
- assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
- VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg(), PhysReg);
foreachUnit(
TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) {
@@ -119,10 +119,10 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
}
void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
- Register PhysReg = VRM->getPhys(VirtReg.reg);
- LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from "
- << printReg(PhysReg, TRI) << ':');
- VRM->clearVirt(VirtReg.reg);
+ Register PhysReg = VRM->getPhys(VirtReg.reg());
+ LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg(), TRI)
+ << " from " << printReg(PhysReg, TRI) << ':');
+ VRM->clearVirt(VirtReg.reg());
foreachUnit(TRI, VirtReg, PhysReg,
[&](unsigned Unit, const LiveRange &Range) {
@@ -135,7 +135,7 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
LLVM_DEBUG(dbgs() << '\n');
}
-bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
+bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const {
for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
if (!Matrix[*Unit].empty())
return true;
@@ -144,12 +144,12 @@ bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
}
bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
- unsigned PhysReg) {
+ MCRegister PhysReg) {
// Check if the cached information is valid.
// The same BitVector can be reused for all PhysRegs.
// We could cache multiple VirtRegs if it becomes necessary.
- if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) {
- RegMaskVirtReg = VirtReg.reg;
+ if (RegMaskVirtReg != VirtReg.reg() || RegMaskTag != UserTag) {
+ RegMaskVirtReg = VirtReg.reg();
RegMaskTag = UserTag;
RegMaskUsable.clear();
LIS->checkRegMaskInterference(VirtReg, RegMaskUsable);
@@ -162,10 +162,10 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
}
bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
- unsigned PhysReg) {
+ MCRegister PhysReg) {
if (VirtReg.empty())
return false;
- CoalescerPair CP(VirtReg.reg, PhysReg, *TRI);
+ CoalescerPair CP(VirtReg.reg(), PhysReg, *TRI);
bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
const LiveRange &Range) {
@@ -176,14 +176,14 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
}
LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
- unsigned RegUnit) {
+ MCRegister RegUnit) {
LiveIntervalUnion::Query &Q = Queries[RegUnit];
Q.init(UserTag, LR, Matrix[RegUnit]);
return Q;
}
LiveRegMatrix::InterferenceKind
-LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
+LiveRegMatrix::checkInterference(LiveInterval &VirtReg, MCRegister PhysReg) {
if (VirtReg.empty())
return IK_Free;
@@ -197,9 +197,9 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
// Check the matrix for virtual register interference.
bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
- [&](unsigned Unit, const LiveRange &LR) {
- return query(LR, Unit).checkInterference();
- });
+ [&](MCRegister Unit, const LiveRange &LR) {
+ return query(LR, Unit).checkInterference();
+ });
if (Interference)
return IK_VirtReg;
@@ -207,7 +207,7 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
}
bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
- unsigned PhysReg) {
+ MCRegister PhysReg) {
// Construct artificial live range containing only one segment [Start, End).
VNInfo valno(0, Start);
LiveRange::Segment Seg(Start, End, &valno);
@@ -221,3 +221,13 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
}
return false;
}
+
+Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
+ LiveInterval *VRegInterval = nullptr;
+ for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
+ if ((VRegInterval = Matrix[*Unit].getOneVReg()))
+ return VRegInterval->reg();
+ }
+
+ return MCRegister::NoRegister;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
index b2731aa0e7db..ea2075bc139d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -11,15 +11,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveRegUnits.h"
-
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index 6610491dd111..49b880c30936 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -82,17 +82,15 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
#endif
/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
-LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
- assert(Register::isVirtualRegister(RegIdx) &&
- "getVarInfo: not a virtual register!");
- VirtRegInfo.grow(RegIdx);
- return VirtRegInfo[RegIdx];
+LiveVariables::VarInfo &LiveVariables::getVarInfo(Register Reg) {
+ assert(Reg.isVirtual() && "getVarInfo: not a virtual register!");
+ VirtRegInfo.grow(Reg);
+ return VirtRegInfo[Reg];
}
-void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
- MachineBasicBlock *DefBlock,
- MachineBasicBlock *MBB,
- std::vector<MachineBasicBlock*> &WorkList) {
+void LiveVariables::MarkVirtRegAliveInBlock(
+ VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &WorkList) {
unsigned BBNum = MBB->getNumber();
// Check to see if this basic block is one of the killing blocks. If so,
@@ -118,7 +116,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
MachineBasicBlock *DefBlock,
MachineBasicBlock *MBB) {
- std::vector<MachineBasicBlock*> WorkList;
+ SmallVector<MachineBasicBlock *, 16> WorkList;
MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
while (!WorkList.empty()) {
@@ -128,13 +126,13 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
}
}
-void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+void LiveVariables::HandleVirtRegUse(Register Reg, MachineBasicBlock *MBB,
MachineInstr &MI) {
- assert(MRI->getVRegDef(reg) && "Register use before def!");
+ assert(MRI->getVRegDef(Reg) && "Register use before def!");
unsigned BBNum = MBB->getNumber();
- VarInfo& VRInfo = getVarInfo(reg);
+ VarInfo &VRInfo = getVarInfo(Reg);
// Check to see if this basic block is already a kill block.
if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
@@ -165,7 +163,8 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
// where there is a use in a PHI node that's a predecessor to the defining
// block. We don't want to mark all predecessors as having the value "alive"
// in this case.
- if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+ if (MBB == MRI->getVRegDef(Reg)->getParent())
+ return;
// Add a new kill entry for this basic block. If this virtual register is
// already marked as alive in this basic block, that means it is alive in at
@@ -176,10 +175,10 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
// Update all dominating blocks to mark them as "known live".
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
E = MBB->pred_end(); PI != E; ++PI)
- MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(Reg)->getParent(), *PI);
}
-void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr &MI) {
+void LiveVariables::HandleVirtRegDef(Register Reg, MachineInstr &MI) {
VarInfo &VRInfo = getVarInfo(Reg);
if (VRInfo.AliveBlocks.empty())
@@ -189,8 +188,9 @@ void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr &MI) {
/// FindLastPartialDef - Return the last partial def of the specified register.
/// Also returns the sub-registers that're defined by the instruction.
-MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
- SmallSet<unsigned,4> &PartDefRegs) {
+MachineInstr *
+LiveVariables::FindLastPartialDef(Register Reg,
+ SmallSet<unsigned, 4> &PartDefRegs) {
unsigned LastDefReg = 0;
unsigned LastDefDist = 0;
MachineInstr *LastDef = nullptr;
@@ -228,7 +228,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
/// implicit defs to a machine instruction if there was an earlier def of its
/// super-register.
-void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) {
+void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
MachineInstr *LastDef = PhysRegDef[Reg];
// If there was a previous use or a "full" def all is well.
if (!LastDef && !PhysRegUse[Reg]) {
@@ -278,7 +278,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) {
/// FindLastRefOrPartRef - Return the last reference or partial reference of
/// the specified register.
-MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+MachineInstr *LiveVariables::FindLastRefOrPartRef(Register Reg) {
MachineInstr *LastDef = PhysRegDef[Reg];
MachineInstr *LastUse = PhysRegUse[Reg];
if (!LastDef && !LastUse)
@@ -308,7 +308,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
return LastRefOrPartRef;
}
-bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
MachineInstr *LastDef = PhysRegDef[Reg];
MachineInstr *LastUse = PhysRegUse[Reg];
if (!LastDef && !LastUse)
@@ -440,7 +440,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
}
}
-void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
SmallVectorImpl<unsigned> &Defs) {
// What parts of the register are previously defined?
SmallSet<unsigned, 32> Live;
@@ -486,7 +486,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
while (!Defs.empty()) {
- unsigned Reg = Defs.back();
+ Register Reg = Defs.back();
Defs.pop_back();
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
@@ -653,7 +653,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Convert and transfer the dead / killed information we have gathered into
// VirtRegInfo onto MI's.
for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
- const unsigned Reg = Register::index2VirtReg(i);
+ const Register Reg = Register::index2VirtReg(i);
for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
@@ -666,7 +666,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// other part of the code generator if this happens.
#ifndef NDEBUG
for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
- assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+ assert(Visited.contains(&*i) && "unreachable basic block found");
#endif
PhysRegDef.clear();
@@ -678,7 +678,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
-void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr &OldMI,
+void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI,
MachineInstr &NewMI) {
VarInfo &VI = getVarInfo(Reg);
std::replace(VI.Kills.begin(), VI.Kills.end(), &OldMI, &NewMI);
@@ -718,8 +718,7 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
}
bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
- unsigned Reg,
- MachineRegisterInfo &MRI) {
+ Register Reg, MachineRegisterInfo &MRI) {
unsigned Num = MBB.getNumber();
// Reg is live-through.
@@ -735,7 +734,7 @@ bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
return findKill(&MBB);
}
-bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+bool LiveVariables::isLiveOut(Register Reg, const MachineBasicBlock &MBB) {
LiveVariables::VarInfo &VI = getVarInfo(Reg);
SmallPtrSet<const MachineBasicBlock *, 8> Kills;
@@ -793,7 +792,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
// Update info for all live variables
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
// If the Defs is defined in the successor it can't be live in BB.
if (Defs.count(Reg))
@@ -819,7 +818,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
SparseBitVector<> &BV = LiveInSets[SuccBB->getNumber()];
for (auto R = BV.begin(), E = BV.end(); R != E; R++) {
- unsigned VirtReg = Register::index2VirtReg(*R);
+ Register VirtReg = Register::index2VirtReg(*R);
LiveVariables::VarInfo &VI = getVarInfo(VirtReg);
VI.AliveBlocks.set(NumNew);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 204fb556d810..ec6e693e8a46 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -117,7 +117,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
// If the target doesn't want/need this pass, or if there are no locals
// to consider, early exit.
- if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+ if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF))
return true;
// Make sure we have enough space to store the local offsets.
@@ -416,15 +416,16 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
- LLVM_DEBUG(dbgs() << " Materializing base register " << BaseReg
+ LLVM_DEBUG(dbgs() << " Materializing base register"
<< " at frame local offset "
- << LocalOffset + InstrOffset << "\n");
+ << LocalOffset + InstrOffset);
// Tell the target to insert the instruction to initialize
// the base register.
// MachineBasicBlock::iterator InsertionPt = Entry->begin();
- TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
- InstrOffset);
+ BaseReg = TRI->materializeFrameBaseRegister(Entry, FrameIdx, InstrOffset);
+
+ LLVM_DEBUG(dbgs() << " into " << printReg(BaseReg, TRI) << '\n');
// The base register already includes any offset specified
// by the instruction, so account for that so it doesn't get
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
index 33752a1f9230..2bda586db8c7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/raw_ostream.h"
@@ -58,3 +59,18 @@ LLT llvm::getLLTForMVT(MVT Ty) {
return LLT::vector(Ty.getVectorNumElements(),
Ty.getVectorElementType().getSizeInBits());
}
+
+const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
+ assert(Ty.isScalar() && "Expected a scalar type.");
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ return APFloat::IEEEhalf();
+ case 32:
+ return APFloat::IEEEsingle();
+ case 64:
+ return APFloat::IEEEdouble();
+ case 128:
+ return APFloat::IEEEquad();
+ }
+ llvm_unreachable("Invalid FP type size.");
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
index 36b863178b47..a06d1d6255c7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -15,8 +15,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
@@ -44,6 +44,7 @@ private:
GlobalVariable *to) {
to->setLinkage(from->getLinkage());
to->setVisibility(from->getVisibility());
+ to->setDSOLocal(from->isDSOLocal());
if (from->hasComdat()) {
to->setComdat(M.getOrInsertComdat(to->getName()));
to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
index 5110f75ebb42..4755defec793 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
@@ -30,6 +30,18 @@ void MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
MergedBBFreq[MBB] = F;
}
+Optional<uint64_t>
+MBFIWrapper::getBlockProfileCount(const MachineBasicBlock *MBB) const {
+ auto I = MergedBBFreq.find(MBB);
+
+ // Modified block frequency also impacts profile count. So we should compute
+ // profile count from new block frequency if it has been changed.
+ if (I != MergedBBFreq.end())
+ return MBFI.getProfileCountFromFreq(I->second.getFrequency());
+
+ return MBFI.getBlockProfileCount(MBB);
+}
+
raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS,
const MachineBasicBlock *MBB) const {
return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 9eddb8626f60..8ef6aca602a1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -85,9 +85,7 @@ static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
return {};
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
std::vector<MachineBasicBlock *> RPOList;
- for (auto MBB : RPOT) {
- RPOList.push_back(MBB);
- }
+ append_range(RPOList, RPOT);
return RPOList;
}
@@ -108,7 +106,7 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions,
OS.flush();
// Trim the assignment, or start from the beginning in the case of a store.
- const size_t i = S.find("=");
+ const size_t i = S.find('=');
StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
}
@@ -198,8 +196,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
if (II->getOperand(i).isReg()) {
if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
- if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
- PhysRegDefs.end()) {
+ if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
continue;
}
}
@@ -276,9 +273,9 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
// Sort the defs for users of multiple defs lexographically.
for (const auto &E : MultiUserLookup) {
- auto UseI =
- std::find_if(MBB->instr_begin(), MBB->instr_end(),
- [&](MachineInstr &MI) -> bool { return &MI == E.second; });
+ auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
+ return &MI == E.second;
+ });
if (UseI == MBB->instr_end())
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 98af46dc4872..b86fd6b41318 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -212,11 +212,12 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("contract", MIToken::kw_contract)
.Case("afn", MIToken::kw_afn)
.Case("reassoc", MIToken::kw_reassoc)
- .Case("nuw" , MIToken::kw_nuw)
- .Case("nsw" , MIToken::kw_nsw)
- .Case("exact" , MIToken::kw_exact)
+ .Case("nuw", MIToken::kw_nuw)
+ .Case("nsw", MIToken::kw_nsw)
+ .Case("exact", MIToken::kw_exact)
.Case("nofpexcept", MIToken::kw_nofpexcept)
.Case("debug-location", MIToken::kw_debug_location)
+ .Case("debug-instr-number", MIToken::kw_debug_instr_number)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
.Case("rel_offset", MIToken::kw_cfi_rel_offset)
@@ -231,7 +232,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("undefined", MIToken::kw_cfi_undefined)
.Case("register", MIToken::kw_cfi_register)
.Case("window_save", MIToken::kw_cfi_window_save)
- .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state)
+ .Case("negate_ra_sign_state",
+ MIToken::kw_cfi_aarch64_negate_ra_sign_state)
.Case("blockaddress", MIToken::kw_blockaddress)
.Case("intrinsic", MIToken::kw_intrinsic)
.Case("target-index", MIToken::kw_target_index)
@@ -247,6 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("dereferenceable", MIToken::kw_dereferenceable)
.Case("invariant", MIToken::kw_invariant)
.Case("align", MIToken::kw_align)
+ .Case("basealign", MIToken::kw_align)
.Case("addrspace", MIToken::kw_addrspace)
.Case("stack", MIToken::kw_stack)
.Case("got", MIToken::kw_got)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
index ef16da94d21b..452eda721331 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -74,6 +74,7 @@ struct MIToken {
kw_exact,
kw_nofpexcept,
kw_debug_location,
+ kw_debug_instr_number,
kw_cfi_same_value,
kw_cfi_offset,
kw_cfi_rel_offset,
@@ -103,6 +104,7 @@ struct MIToken {
kw_non_temporal,
kw_invariant,
kw_align,
+ kw_basealign,
kw_addrspace,
kw_stack,
kw_got,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index ded31cd08fb5..fe979b981886 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -369,10 +369,7 @@ static void initSlots2Values(const Function &F,
const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) {
if (Slots2Values.empty())
initSlots2Values(MF.getFunction(), Slots2Values);
- auto ValueInfo = Slots2Values.find(Slot);
- if (ValueInfo == Slots2Values.end())
- return nullptr;
- return ValueInfo->second;
+ return Slots2Values.lookup(Slot);
}
namespace {
@@ -984,6 +981,7 @@ bool MIParser::parse(MachineInstr *&MI) {
Token.isNot(MIToken::kw_post_instr_symbol) &&
Token.isNot(MIToken::kw_heap_alloc_marker) &&
Token.isNot(MIToken::kw_debug_location) &&
+ Token.isNot(MIToken::kw_debug_instr_number) &&
Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
auto Loc = Token.location();
Optional<unsigned> TiedDefIdx;
@@ -1014,6 +1012,19 @@ bool MIParser::parse(MachineInstr *&MI) {
if (parseHeapAllocMarker(HeapAllocMarker))
return true;
+ unsigned InstrNum = 0;
+ if (Token.is(MIToken::kw_debug_instr_number)) {
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after 'debug-instr-number'");
+ if (getUnsigned(InstrNum))
+ return true;
+ lex();
+ // Lex past trailing comma if present.
+ if (Token.is(MIToken::comma))
+ lex();
+ }
+
DebugLoc DebugLocation;
if (Token.is(MIToken::kw_debug_location)) {
lex();
@@ -1070,6 +1081,8 @@ bool MIParser::parse(MachineInstr *&MI) {
MI->setHeapAllocMarker(MF, HeapAllocMarker);
if (!MemOperands.empty())
MI->setMemRefs(MF, MemOperands);
+ if (InstrNum)
+ MI->setDebugInstrNum(InstrNum);
return false;
}
@@ -2713,7 +2726,7 @@ bool MIParser::parseOffset(int64_t &Offset) {
}
bool MIParser::parseAlignment(unsigned &Alignment) {
- assert(Token.is(MIToken::kw_align));
+ assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign));
lex();
if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
return error("expected an integer literal after 'align'");
@@ -3061,6 +3074,12 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
while (consumeIfPresent(MIToken::comma)) {
switch (Token.kind()) {
case MIToken::kw_align:
+ // align is printed if it is different than size.
+ if (parseAlignment(BaseAlignment))
+ return true;
+ break;
+ case MIToken::kw_basealign:
+ // basealign is printed if it is different than align.
if (parseAlignment(BaseAlignment))
return true;
break;
@@ -3153,10 +3172,7 @@ static void initSlots2BasicBlocks(
static const BasicBlock *getIRBlockFromSlot(
unsigned Slot,
const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
- auto BlockInfo = Slots2BasicBlocks.find(Slot);
- if (BlockInfo == Slots2BasicBlocks.end())
- return nullptr;
- return BlockInfo->second;
+ return Slots2BasicBlocks.lookup(Slot);
}
const BasicBlock *MIParser::getIRBlock(unsigned Slot) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 2e0b0e745e9e..ffa9aeb21edb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -161,6 +161,9 @@ private:
SMRange SourceRange);
void computeFunctionProperties(MachineFunction &MF);
+
+ void setupDebugValueTracking(MachineFunction &MF,
+ PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF);
};
} // end namespace llvm
@@ -322,9 +325,14 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
static bool isSSA(const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg))
return false;
+
+ // Subregister defs are invalid in SSA.
+ const MachineOperand *RegDef = MRI.getOneDef(Reg);
+ if (RegDef && RegDef->getSubReg() != 0)
+ return false;
}
return true;
}
@@ -397,6 +405,23 @@ bool MIRParserImpl::initializeCallSiteInfo(
return false;
}
+void MIRParserImpl::setupDebugValueTracking(
+ MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ // Compute the value of the "next instruction number" field.
+ unsigned MaxInstrNum = 0;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ MaxInstrNum = std::max((unsigned)MI.peekDebugInstrNum(), MaxInstrNum);
+ MF.setDebugInstrNumberingCount(MaxInstrNum);
+
+ // Load any substitutions.
+ for (auto &Sub : YamlMF.DebugValueSubstitutions) {
+ MF.makeDebugValueSubstitution(std::make_pair(Sub.SrcInst, Sub.SrcOp),
+ std::make_pair(Sub.DstInst, Sub.DstOp));
+ }
+}
+
bool
MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MachineFunction &MF) {
@@ -446,10 +471,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
}
// Check Basic Block Section Flags.
if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) {
- MF.createBBLabels();
MF.setBBSectionsType(BasicBlockSection::Labels);
} else if (MF.hasBBSections()) {
- MF.createBBLabels();
MF.assignBeginEndSections();
}
PFS.SM = &SM;
@@ -507,6 +530,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (initializeCallSiteInfo(PFS, YamlMF))
return false;
+ setupDebugValueTracking(MF, PFS, YamlMF);
+
MF.getSubtarget().mirFileLoaded(MF);
MF.verify();
@@ -634,6 +659,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
// Compute MachineRegisterInfo::UsedPhysRegMask
for (const MachineBasicBlock &MBB : MF) {
+ // Make sure MRI knows about registers clobbered by unwinder.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (MBB.isEHPad())
+ if (auto *RegMask = TRI->getCustomEHPadPreservedMask(MF))
+ MRI.addPhysRegsUsedFromRegMask(RegMask);
+
for (const MachineInstr &MI : MBB) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index fa23df6288e9..eae174019b56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -220,6 +220,10 @@ void MIRPrinter::print(const MachineFunction &MF) {
convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
convertStackObjects(YamlMF, MF, MST);
convertCallSiteObjects(YamlMF, MF, MST);
+ for (auto &Sub : MF.DebugValueSubstitutions)
+ YamlMF.DebugValueSubstitutions.push_back({Sub.first.first, Sub.first.second,
+ Sub.second.first,
+ Sub.second.second});
if (const auto *ConstantPool = MF.getConstantPool())
convert(YamlMF, *ConstantPool);
if (const auto *JumpTableInfo = MF.getJumpTableInfo())
@@ -363,9 +367,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
ModuleSlotTracker &MST) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
// Process fixed stack objects.
+ assert(YMF.FixedStackObjects.empty());
+ SmallVector<int, 32> FixedStackObjectsIdx;
+ const int BeginIdx = MFI.getObjectIndexBegin();
+ if (BeginIdx < 0)
+ FixedStackObjectsIdx.reserve(-BeginIdx);
+
unsigned ID = 0;
- for (int I = MFI.getObjectIndexBegin(); I < 0; ++I, ++ID) {
+ for (int I = BeginIdx; I < 0; ++I, ++ID) {
+ FixedStackObjectsIdx.push_back(-1); // Fill index for possible dead.
if (MFI.isDeadObjectIndex(I))
continue;
@@ -380,14 +392,22 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
+ // Save the ID' position in FixedStackObjects storage vector.
+ FixedStackObjectsIdx[ID] = YMF.FixedStackObjects.size();
YMF.FixedStackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(
std::make_pair(I, FrameIndexOperand::createFixed(ID)));
}
// Process ordinary stack objects.
+ assert(YMF.StackObjects.empty());
+ SmallVector<unsigned, 32> StackObjectsIdx;
+ const int EndIdx = MFI.getObjectIndexEnd();
+ if (EndIdx > 0)
+ StackObjectsIdx.reserve(EndIdx);
ID = 0;
- for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I, ++ID) {
+ for (int I = 0; I < EndIdx; ++I, ++ID) {
+ StackObjectsIdx.push_back(-1); // Fill index for possible dead.
if (MFI.isDeadObjectIndex(I))
continue;
@@ -395,7 +415,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.ID = ID;
if (const auto *Alloca = MFI.getObjectAllocation(I))
YamlObject.Name.Value = std::string(
- Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>");
+ Alloca->hasName() ? Alloca->getName() : "");
YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
? yaml::MachineStackObject::SpillSlot
: MFI.isVariableSizedObjectIndex(I)
@@ -406,41 +426,42 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.Alignment = MFI.getObjectAlign(I);
YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
+ // Save the ID' position in StackObjects storage vector.
+ StackObjectsIdx[ID] = YMF.StackObjects.size();
YMF.StackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(std::make_pair(
I, FrameIndexOperand::create(YamlObject.Name.Value, ID)));
}
for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
- if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(CSInfo.getFrameIdx()))
+ const int FrameIdx = CSInfo.getFrameIdx();
+ if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(FrameIdx))
continue;
yaml::StringValue Reg;
printRegMIR(CSInfo.getReg(), Reg, TRI);
if (!CSInfo.isSpilledToReg()) {
- auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
- assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ assert(FrameIdx >= MFI.getObjectIndexBegin() &&
+ FrameIdx < MFI.getObjectIndexEnd() &&
"Invalid stack object index");
- const FrameIndexOperand &StackObject = StackObjectInfo->second;
- if (StackObject.IsFixed) {
- YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
- YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
- CSInfo.isRestored();
+ if (FrameIdx < 0) { // Negative index means fixed objects.
+ auto &Object =
+ YMF.FixedStackObjects
+ [FixedStackObjectsIdx[FrameIdx + MFI.getNumFixedObjects()]];
+ Object.CalleeSavedRegister = Reg;
+ Object.CalleeSavedRestored = CSInfo.isRestored();
} else {
- YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
- YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
- CSInfo.isRestored();
+ auto &Object = YMF.StackObjects[StackObjectsIdx[FrameIdx]];
+ Object.CalleeSavedRegister = Reg;
+ Object.CalleeSavedRestored = CSInfo.isRestored();
}
}
}
for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
auto LocalObject = MFI.getLocalFrameObjectMap(I);
- auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first);
- assert(StackObjectInfo != StackObjectOperandMapping.end() &&
- "Invalid stack object index");
- const FrameIndexOperand &StackObject = StackObjectInfo->second;
- assert(!StackObject.IsFixed && "Expected a locally mapped stack object");
- YMF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
+ assert(LocalObject.first >= 0 && "Expected a locally mapped stack object");
+ YMF.StackObjects[StackObjectsIdx[LocalObject.first]].LocalOffset =
+ LocalObject.second;
}
// Print the stack object references in the frame information class after
@@ -454,15 +475,16 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
// Print the debug variable information.
for (const MachineFunction::VariableDbgInfo &DebugVar :
MF.getVariableDbgInfo()) {
- auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot);
- assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ assert(DebugVar.Slot >= MFI.getObjectIndexBegin() &&
+ DebugVar.Slot < MFI.getObjectIndexEnd() &&
"Invalid stack object index");
- const FrameIndexOperand &StackObject = StackObjectInfo->second;
- if (StackObject.IsFixed) {
- auto &Object = YMF.FixedStackObjects[StackObject.ID];
+ if (DebugVar.Slot < 0) { // Negative index means fixed objects.
+ auto &Object =
+ YMF.FixedStackObjects[FixedStackObjectsIdx[DebugVar.Slot +
+ MFI.getNumFixedObjects()]];
printStackObjectDbgInfo(DebugVar, Object, MST);
} else {
- auto &Object = YMF.StackObjects[StackObject.ID];
+ auto &Object = YMF.StackObjects[StackObjectsIdx[DebugVar.Slot]];
printStackObjectDbgInfo(DebugVar, Object, MST);
}
}
@@ -608,58 +630,10 @@ bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const {
void MIPrinter::print(const MachineBasicBlock &MBB) {
assert(MBB.getNumber() >= 0 && "Invalid MBB number");
- OS << "bb." << MBB.getNumber();
- bool HasAttributes = false;
- if (const auto *BB = MBB.getBasicBlock()) {
- if (BB->hasName()) {
- OS << "." << BB->getName();
- } else {
- HasAttributes = true;
- OS << " (";
- int Slot = MST.getLocalSlot(BB);
- if (Slot == -1)
- OS << "<ir-block badref>";
- else
- OS << (Twine("%ir-block.") + Twine(Slot)).str();
- }
- }
- if (MBB.hasAddressTaken()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "address-taken";
- HasAttributes = true;
- }
- if (MBB.isEHPad()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "landing-pad";
- HasAttributes = true;
- }
- if (MBB.isEHFuncletEntry()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "ehfunclet-entry";
- HasAttributes = true;
- }
- if (MBB.getAlignment() != Align(1)) {
- OS << (HasAttributes ? ", " : " (");
- OS << "align " << MBB.getAlignment().value();
- HasAttributes = true;
- }
- if (MBB.getSectionID() != MBBSectionID(0)) {
- OS << (HasAttributes ? ", " : " (");
- OS << "bbsections ";
- switch (MBB.getSectionID().Type) {
- case MBBSectionID::SectionType::Exception:
- OS << "Exception";
- break;
- case MBBSectionID::SectionType::Cold:
- OS << "Cold";
- break;
- default:
- OS << MBB.getSectionID().Number;
- }
- HasAttributes = true;
- }
- if (HasAttributes)
- OS << ")";
+ MBB.printName(OS,
+ MachineBasicBlock::PrintNameIr |
+ MachineBasicBlock::PrintNameAttributes,
+ &MST);
OS << ":\n";
bool HasLineAttributes = false;
@@ -818,6 +792,13 @@ void MIPrinter::print(const MachineInstr &MI) {
NeedComma = true;
}
+ if (auto Num = MI.peekDebugInstrNum()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " debug-instr-number " << Num;
+ NeedComma = true;
+ }
+
if (PrintLocations) {
if (const DebugLoc &DL = MI.getDebugLoc()) {
if (NeedComma)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index 54441301d65b..3d4f66f31174 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -8,6 +8,7 @@
#include "MIRVRegNamerUtils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineStableHash.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
@@ -15,6 +16,11 @@ using namespace llvm;
#define DEBUG_TYPE "mir-vregnamer-utils"
+static cl::opt<bool>
+ UseStableNamerHash("mir-vreg-namer-use-stable-hash", cl::init(false),
+ cl::Hidden,
+ cl::desc("Use Stable Hashing for MIR VReg Renaming"));
+
using VRegRenameMap = std::map<unsigned, unsigned>;
bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) {
@@ -52,6 +58,14 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
std::string S;
raw_string_ostream OS(S);
+ if (UseStableNamerHash) {
+ auto Hash = stableHashValue(MI, /* HashVRegs */ true,
+ /* HashConstantPoolIndices */ true,
+ /* HashMemOperands */ true);
+ assert(Hash && "Expected non-zero Hash");
+ return std::to_string(Hash).substr(0, 5);
+ }
+
// Gets a hashable artifact from a given MachineOperand (ie an unsigned).
auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned {
switch (MO.getType()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 2d4b60435d96..b4187af02975 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -60,38 +60,25 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
if (!CachedMCSymbol) {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
- auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
- assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
-
- // We emit a non-temporary symbol for every basic block if we have BBLabels
- // or -- with basic block sections -- when a basic block begins a section.
- // With basic block symbols, we use a unary encoding which can
- // compress the symbol names significantly. For basic block sections where
- // this block is the first in a cluster, we use a non-temp descriptive name.
- // Otherwise we fall back to use temp label.
- if (MF->hasBBLabels()) {
- auto Iter = MF->getBBSectionsSymbolPrefix().begin();
- if (getNumber() < 0 ||
- getNumber() >= (int)MF->getBBSectionsSymbolPrefix().size())
- report_fatal_error("Unreachable MBB: " + Twine(getNumber()));
- // The basic blocks for function foo are named a.BB.foo, aa.BB.foo, and
- // so on.
- std::string Prefix(Iter + 1, Iter + getNumber() + 1);
- std::reverse(Prefix.begin(), Prefix.end());
- CachedMCSymbol =
- Ctx.getOrCreateSymbol(Twine(Prefix) + ".BB." + Twine(MF->getName()));
- } else if (MF->hasBBSections() && isBeginSection()) {
+ // We emit a non-temporary symbol -- with a descriptive name -- if it begins
+ // a section (with basic block sections). Otherwise we fall back to use temp
+ // label.
+ if (MF->hasBBSections() && isBeginSection()) {
SmallString<5> Suffix;
if (SectionID == MBBSectionID::ColdSectionID) {
Suffix += ".cold";
} else if (SectionID == MBBSectionID::ExceptionSectionID) {
Suffix += ".eh";
} else {
- Suffix += "." + std::to_string(SectionID.Number);
+ // For symbols that represent basic block sections, we add ".__part." to
+ // allow tools like symbolizers to know that this represents a part of
+ // the original function.
+ Suffix = (Suffix + Twine(".__part.") + Twine(SectionID.Number)).str();
}
CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix);
} else {
+ const StringRef Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
Twine(MF->getFunctionNumber()) +
"_" + Twine(getNumber()));
@@ -100,6 +87,17 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
+MCSymbol *MachineBasicBlock::getEndSymbol() const {
+ if (!CachedEndMCSymbol) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+ CachedEndMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB_END" +
+ Twine(MF->getFunctionNumber()) +
+ "_" + Twine(getNumber()));
+ }
+ return CachedEndMCSymbol;
+}
raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
MBB.print(OS);
@@ -271,6 +269,10 @@ bool MachineBasicBlock::hasEHPadSuccessor() const {
return false;
}
+bool MachineBasicBlock::isEntryBlock() const {
+ return getParent()->begin() == getIterator();
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
print(dbgs());
@@ -338,39 +340,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes && PrintSlotIndexes)
OS << Indexes->getMBBStartIdx(this) << '\t';
- OS << "bb." << getNumber();
- bool HasAttributes = false;
- if (const auto *BB = getBasicBlock()) {
- if (BB->hasName()) {
- OS << "." << BB->getName();
- } else {
- HasAttributes = true;
- OS << " (";
- int Slot = MST.getLocalSlot(BB);
- if (Slot == -1)
- OS << "<ir-block badref>";
- else
- OS << (Twine("%ir-block.") + Twine(Slot)).str();
- }
- }
-
- if (hasAddressTaken()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "address-taken";
- HasAttributes = true;
- }
- if (isEHPad()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "landing-pad";
- HasAttributes = true;
- }
- if (getAlignment() != Align(1)) {
- OS << (HasAttributes ? ", " : " (");
- OS << "align " << Log2(getAlignment());
- HasAttributes = true;
- }
- if (HasAttributes)
- OS << ")";
+ printName(OS, PrintNameIr | PrintNameAttributes, &MST);
OS << ":\n";
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -383,11 +353,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes) OS << '\t';
// Don't indent(2), align with previous line attributes.
OS << "; predecessors: ";
- for (auto I = pred_begin(), E = pred_end(); I != E; ++I) {
- if (I != pred_begin())
- OS << ", ";
- OS << printMBBReference(**I);
- }
+ ListSeparator LS;
+ for (auto *Pred : predecessors())
+ OS << LS << printMBBReference(*Pred);
OS << '\n';
HasLineAttributes = true;
}
@@ -396,10 +364,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes) OS << '\t';
// Print the successors
OS.indent(2) << "successors: ";
+ ListSeparator LS;
for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
- if (I != succ_begin())
- OS << ", ";
- OS << printMBBReference(**I);
+ OS << LS << printMBBReference(**I);
if (!Probs.empty())
OS << '('
<< format("0x%08" PRIx32, getSuccProbability(I).getNumerator())
@@ -408,11 +375,10 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (!Probs.empty() && IsStandalone) {
// Print human readable probabilities as comments.
OS << "; ";
+ ListSeparator LS;
for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
const BranchProbability &BP = getSuccProbability(I);
- if (I != succ_begin())
- OS << ", ";
- OS << printMBBReference(**I) << '('
+ OS << LS << printMBBReference(**I) << '('
<< format("%.2f%%",
rint(((double)BP.getNumerator() / BP.getDenominator()) *
100.0 * 100.0) /
@@ -429,12 +395,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes) OS << '\t';
OS.indent(2) << "liveins: ";
- bool First = true;
+ ListSeparator LS;
for (const auto &LI : liveins()) {
- if (!First)
- OS << ", ";
- First = false;
- OS << printReg(LI.PhysReg, TRI);
+ OS << LS << printReg(LI.PhysReg, TRI);
if (!LI.LaneMask.all())
OS << ":0x" << PrintLaneMask(LI.LaneMask);
}
@@ -478,9 +441,99 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
}
+/// Print the basic block's name as:
+///
+/// bb.{number}[.{ir-name}] [(attributes...)]
+///
+/// The {ir-name} is only printed when the \ref PrintNameIr flag is passed
+/// (which is the default). If the IR block has no name, it is identified
+/// numerically using the attribute syntax as "(%ir-block.{ir-slot})".
+///
+/// When the \ref PrintNameAttributes flag is passed, additional attributes
+/// of the block are printed when set.
+///
+/// \param printNameFlags Combination of \ref PrintNameFlag flags indicating
+/// the parts to print.
+/// \param moduleSlotTracker Optional ModuleSlotTracker. This method will
+/// incorporate its own tracker when necessary to
+/// determine the block's IR name.
+void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
+ ModuleSlotTracker *moduleSlotTracker) const {
+ os << "bb." << getNumber();
+ bool hasAttributes = false;
+
+ if (printNameFlags & PrintNameIr) {
+ if (const auto *bb = getBasicBlock()) {
+ if (bb->hasName()) {
+ os << '.' << bb->getName();
+ } else {
+ hasAttributes = true;
+ os << " (";
+
+ int slot = -1;
+
+ if (moduleSlotTracker) {
+ slot = moduleSlotTracker->getLocalSlot(bb);
+ } else if (bb->getParent()) {
+ ModuleSlotTracker tmpTracker(bb->getModule(), false);
+ tmpTracker.incorporateFunction(*bb->getParent());
+ slot = tmpTracker.getLocalSlot(bb);
+ }
+
+ if (slot == -1)
+ os << "<ir-block badref>";
+ else
+ os << (Twine("%ir-block.") + Twine(slot)).str();
+ }
+ }
+ }
+
+ if (printNameFlags & PrintNameAttributes) {
+ if (hasAddressTaken()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "address-taken";
+ hasAttributes = true;
+ }
+ if (isEHPad()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "landing-pad";
+ hasAttributes = true;
+ }
+ if (isEHFuncletEntry()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "ehfunclet-entry";
+ hasAttributes = true;
+ }
+ if (getAlignment() != Align(1)) {
+ os << (hasAttributes ? ", " : " (");
+ os << "align " << getAlignment().value();
+ hasAttributes = true;
+ }
+ if (getSectionID() != MBBSectionID(0)) {
+ os << (hasAttributes ? ", " : " (");
+ os << "bbsections ";
+ switch (getSectionID().Type) {
+ case MBBSectionID::SectionType::Exception:
+ os << "Exception";
+ break;
+ case MBBSectionID::SectionType::Cold:
+ os << "Cold";
+ break;
+ default:
+ os << getSectionID().Number;
+ }
+ hasAttributes = true;
+ }
+ }
+
+ if (hasAttributes)
+ os << ')';
+}
+
void MachineBasicBlock::printAsOperand(raw_ostream &OS,
bool /*PrintType*/) const {
- OS << "%bb." << getNumber();
+ OS << '%';
+ printName(OS, 0);
}
void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
@@ -530,7 +583,7 @@ void MachineBasicBlock::sortUniqueLiveIns() {
Register
MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) {
assert(getParent() && "MBB must be inserted in function");
- assert(PhysReg.isPhysical() && "Expected physreg");
+ assert(Register::isPhysicalRegister(PhysReg) && "Expected physreg");
assert(RC && "Register class is required");
assert((isEHPad() || this == &getParent()->front()) &&
"Only the entry block and landing pads can have physreg live ins");
@@ -696,7 +749,7 @@ void MachineBasicBlock::splitSuccessor(MachineBasicBlock *Old,
bool NormalizeSuccProbs) {
succ_iterator OldI = llvm::find(successors(), Old);
assert(OldI != succ_end() && "Old is not a successor of this block!");
- assert(llvm::find(successors(), New) == succ_end() &&
+ assert(!llvm::is_contained(successors(), New) &&
"New is already a successor of this block!");
// Add a new successor with equal probability as the original one. Note
@@ -775,7 +828,7 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig,
succ_iterator I) {
- if (Orig->Probs.empty())
+ if (!Orig->Probs.empty())
addSuccessor(*I, Orig->getSuccProbability(I));
else
addSuccessorWithoutProb(*I);
@@ -891,6 +944,47 @@ bool MachineBasicBlock::canFallThrough() {
return getFallThrough() != nullptr;
}
+MachineBasicBlock *MachineBasicBlock::splitAt(MachineInstr &MI,
+ bool UpdateLiveIns,
+ LiveIntervals *LIS) {
+ MachineBasicBlock::iterator SplitPoint(&MI);
+ ++SplitPoint;
+
+ if (SplitPoint == end()) {
+ // Don't bother with a new block.
+ return this;
+ }
+
+ MachineFunction *MF = getParent();
+
+ LivePhysRegs LiveRegs;
+ if (UpdateLiveIns) {
+ // Make sure we add any physregs we define in the block as liveins to the
+ // new block.
+ MachineBasicBlock::iterator Prev(&MI);
+ LiveRegs.init(*MF->getSubtarget().getRegisterInfo());
+ LiveRegs.addLiveOuts(*this);
+ for (auto I = rbegin(), E = Prev.getReverse(); I != E; ++I)
+ LiveRegs.stepBackward(*I);
+ }
+
+ MachineBasicBlock *SplitBB = MF->CreateMachineBasicBlock(getBasicBlock());
+
+ MF->insert(++MachineFunction::iterator(this), SplitBB);
+ SplitBB->splice(SplitBB->begin(), this, SplitPoint, end());
+
+ SplitBB->transferSuccessorsAndUpdatePHIs(this);
+ addSuccessor(SplitBB);
+
+ if (UpdateLiveIns)
+ addLiveIns(*SplitBB, LiveRegs);
+
+ if (LIS)
+ LIS->insertMBBInMaps(SplitBB);
+
+ return SplitBB;
+}
+
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *Succ, Pass &P,
std::vector<SparseBitVector<>> *LiveInSets) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 1168b01a835f..54e0a14e0555 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -241,16 +241,21 @@ MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
return MBFI ? MBFI->getProfileCountFromFreq(F, Freq) : None;
}
-bool
-MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) {
+bool MachineBlockFrequencyInfo::isIrrLoopHeader(
+ const MachineBasicBlock *MBB) const {
assert(MBFI && "Expected analysis to be available");
return MBFI->isIrrLoopHeader(MBB);
}
-void MachineBlockFrequencyInfo::setBlockFreq(const MachineBasicBlock *MBB,
- uint64_t Freq) {
+void MachineBlockFrequencyInfo::onEdgeSplit(
+ const MachineBasicBlock &NewPredecessor,
+ const MachineBasicBlock &NewSuccessor,
+ const MachineBranchProbabilityInfo &MBPI) {
assert(MBFI && "Expected analysis to be available");
- MBFI->setBlockFreq(MBB, Freq);
+ auto NewSuccFreq = MBFI->getBlockFreq(&NewPredecessor) *
+ MBPI.getEdgeProbability(&NewPredecessor, &NewSuccessor);
+
+ MBFI->setBlockFreq(&NewSuccessor, NewSuccFreq.getFrequency());
}
const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 783d22fafee9..048baa460e49 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -177,6 +177,14 @@ static cl::opt<unsigned> TailDupPlacementPenalty(
cl::init(2),
cl::Hidden);
+// Heuristic for tail duplication if profile count is used in cost model.
+static cl::opt<unsigned> TailDupProfilePercentThreshold(
+ "tail-dup-profile-percent-threshold",
+ cl::desc("If profile count information is used in tail duplication cost "
+ "model, the gained fall through number from tail duplication "
+ "should be at least this percent of hot count."),
+ cl::init(50), cl::Hidden);
+
// Heuristic for triangle chains.
static cl::opt<unsigned> TriangleChainCount(
"triangle-chain-count",
@@ -377,6 +385,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Partial tail duplication threshold.
BlockFrequency DupThreshold;
+ /// True: use block profile count to compute tail duplication cost.
+ /// False: use block frequency to compute tail duplication cost.
+ bool UseProfileCount;
+
/// Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -402,6 +414,19 @@ class MachineBlockPlacement : public MachineFunctionPass {
SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
#endif
+ /// Get block profile count or frequency according to UseProfileCount.
+ /// The return value is used to model tail duplication cost.
+ BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) {
+ if (UseProfileCount) {
+ auto Count = MBFI->getBlockProfileCount(BB);
+ if (Count)
+ return *Count;
+ else
+ return 0;
+ } else
+ return MBFI->getBlockFreq(BB);
+ }
+
/// Scale the DupThreshold according to basic block size.
BlockFrequency scaleThreshold(MachineBasicBlock *BB);
void initDupThreshold();
@@ -424,10 +449,6 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBasicBlock *BB, const BlockChain &Chain,
const BlockFilterSet *BlockFilter,
SmallVector<MachineBasicBlock *, 4> &Successors);
- bool shouldPredBlockBeOutlined(
- const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
- const BlockChain &Chain, const BlockFilterSet *BlockFilter,
- BranchProbability SuccProb, BranchProbability HotProb);
bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
BlockFilterSet *BlockFilter);
void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
@@ -1652,11 +1673,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
// some code complexity) into the loop below.
- WorkList.erase(llvm::remove_if(WorkList,
- [&](MachineBasicBlock *BB) {
- return BlockToChain.lookup(BB) == &Chain;
- }),
- WorkList.end());
+ llvm::erase_if(WorkList, [&](MachineBasicBlock *BB) {
+ return BlockToChain.lookup(BB) == &Chain;
+ });
if (WorkList.empty())
return nullptr;
@@ -2287,6 +2306,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
if (Bottom == ExitingBB)
return;
+ // The entry block should always be the first BB in a function.
+ if (Top->isEntryBlock())
+ return;
+
bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
// If the header has viable fallthrough, check whether the current loop
@@ -2361,6 +2384,11 @@ void MachineBlockPlacement::rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
auto RotationPos = LoopChain.end();
+ MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
+
+ // The entry block should always be the first BB in a function.
+ if (ChainHeaderBB->isEntryBlock())
+ return;
BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2379,7 +2407,6 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// chain head is not the loop header. As we only consider natural loops with
// single header, this computation can be done only once.
BlockFrequency HeaderFallThroughCost(0);
- MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
for (auto *Pred : ChainHeaderBB->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (!LoopBlockSet.count(Pred) &&
@@ -2516,10 +2543,14 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
MBPI->getEdgeProbability(LoopPred, L.getHeader());
for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ if (LoopBlockSet.count(LoopBB))
+ continue;
auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
continue;
- LoopBlockSet.insert(LoopBB);
+ BlockChain *Chain = BlockToChain[LoopBB];
+ for (MachineBasicBlock *ChainBB : *Chain)
+ LoopBlockSet.insert(ChainBB);
}
} else
LoopBlockSet.insert(L.block_begin(), L.block_end());
@@ -3011,12 +3042,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
if (RemBB->isEHPad())
RemoveList = EHPadWorkList;
- RemoveList.erase(
- llvm::remove_if(RemoveList,
- [RemBB](MachineBasicBlock *BB) {
- return BB == RemBB;
- }),
- RemoveList.end());
+ llvm::erase_value(RemoveList, RemBB);
}
// Handle the filter set
@@ -3120,7 +3146,7 @@ bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
// Compute the number of reduced taken branches if Pred falls through to BB
// instead of another successor. Then compare it with threshold.
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
BlockFrequency Gain = PredFreq * (BBProb - BestProb);
return Gain > scaleThreshold(BB);
}
@@ -3134,8 +3160,8 @@ void MachineBlockPlacement::findDuplicateCandidates(
MachineBasicBlock *Fallthrough = nullptr;
BranchProbability DefaultBranchProb = BranchProbability::getZero();
BlockFrequency BBDupThreshold(scaleThreshold(BB));
- SmallVector<MachineBasicBlock *, 8> Preds(BB->pred_begin(), BB->pred_end());
- SmallVector<MachineBasicBlock *, 8> Succs(BB->succ_begin(), BB->succ_end());
+ SmallVector<MachineBasicBlock *, 8> Preds(BB->predecessors());
+ SmallVector<MachineBasicBlock *, 8> Succs(BB->successors());
// Sort for highest frequency.
auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
@@ -3194,7 +3220,7 @@ void MachineBlockPlacement::findDuplicateCandidates(
// it. But it can beneficially fall through to BB, and duplicate BB into other
// predecessors.
for (MachineBasicBlock *Pred : Preds) {
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
if (!TailDup.canTailDuplicate(BB, Pred)) {
// BB can't be duplicated into Pred, but it is possible to be layout
@@ -3243,6 +3269,15 @@ void MachineBlockPlacement::initDupThreshold() {
if (!F->getFunction().hasProfileData())
return;
+ // We prefer to use prifile count.
+ uint64_t HotThreshold = PSI->getOrCompHotCountThreshold();
+ if (HotThreshold != UINT64_MAX) {
+ UseProfileCount = true;
+ DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100;
+ return;
+ }
+
+ // Profile count is not available, we can use block frequency instead.
BlockFrequency MaxFreq = 0;
for (MachineBasicBlock &MBB : *F) {
BlockFrequency Freq = MBFI->getBlockFreq(&MBB);
@@ -3250,10 +3285,9 @@ void MachineBlockPlacement::initDupThreshold() {
MaxFreq = Freq;
}
- // FIXME: we may use profile count instead of frequency,
- // and need more fine tuning.
BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
DupThreshold = MaxFreq * ThresholdProb;
+ UseProfileCount = false;
}
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
@@ -3326,8 +3360,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
unsigned TailMergeSize = TailDupSize + 1;
- BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
- *MBPI, PSI, TailMergeSize);
+ BranchFolder BF(/*DefaultEnableTailMerge=*/true, /*CommonHoist=*/false,
+ *MBFI, *MBPI, PSI, TailMergeSize);
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI,
/*AfterPlacement=*/true)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
index 09531276bc10..199fe2dc6454 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
@@ -115,18 +116,18 @@ namespace {
bool PerformTrivialCopyPropagation(MachineInstr *MI,
MachineBasicBlock *MBB);
- bool isPhysDefTriviallyDead(unsigned Reg,
+ bool isPhysDefTriviallyDead(MCRegister Reg,
MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator E) const;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned, 8> &PhysRefs,
+ SmallSet<MCRegister, 8> &PhysRefs,
PhysDefVector &PhysDefs, bool &PhysUseDef) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned, 8> &PhysRefs,
+ SmallSet<MCRegister, 8> &PhysRefs,
PhysDefVector &PhysDefs, bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI);
- bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ bool isProfitableToCSE(Register CSReg, Register Reg,
MachineBasicBlock *CSBB, MachineInstr *MI);
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
@@ -218,10 +219,9 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
return Changed;
}
-bool
-MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
- MachineBasicBlock::const_iterator I,
- MachineBasicBlock::const_iterator E) const {
+bool MachineCSE::isPhysDefTriviallyDead(
+ MCRegister Reg, MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const {
unsigned LookAheadLeft = LookAheadLimit;
while (LookAheadLeft) {
// Skip over dbg_value's.
@@ -255,7 +255,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
return false;
}
-static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
+static bool isCallerPreservedOrConstPhysReg(MCRegister Reg,
const MachineFunction &MF,
const TargetRegisterInfo &TRI) {
// MachineRegisterInfo::isConstantPhysReg directly called by
@@ -276,7 +276,7 @@ static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
/// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned, 8> &PhysRefs,
+ SmallSet<MCRegister, 8> &PhysRefs,
PhysDefVector &PhysDefs,
bool &PhysUseDef) const {
// First, add all uses to PhysRefs.
@@ -289,7 +289,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (Register::isVirtualRegister(Reg))
continue;
// Reading either caller preserved or constant physregs is ok.
- if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
+ if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), *MI->getMF(), *TRI))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
}
@@ -308,12 +308,12 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (Register::isVirtualRegister(Reg))
continue;
// Check against PhysRefs even if the def is "dead".
- if (PhysRefs.count(Reg))
+ if (PhysRefs.count(Reg.asMCReg()))
PhysUseDef = true;
// If the def is dead, it's ok. But the def may not marked "dead". That's
// common since this pass is run before livevariables. We can scan
// forward a few instructions and check if it is obviously dead.
- if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
+ if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
}
@@ -327,7 +327,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
}
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned, 8> &PhysRefs,
+ SmallSet<MCRegister, 8> &PhysRefs,
PhysDefVector &PhysDefs,
bool &NonLocal) const {
// For now conservatively returns false if the common subexpression is
@@ -382,7 +382,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
Register MOReg = MO.getReg();
if (Register::isVirtualRegister(MOReg))
continue;
- if (PhysRefs.count(MOReg))
+ if (PhysRefs.count(MOReg.asMCReg()))
return false;
}
@@ -429,7 +429,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
/// common expression that defines Reg. CSBB is basic block where CSReg is
/// defined.
-bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
+bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
MachineBasicBlock *CSBB, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
@@ -556,7 +556,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// used, then it's not safe to replace it with a common subexpression.
// It's also not safe if the instruction uses physical registers.
bool CrossMBBPhysDef = false;
- SmallSet<unsigned, 8> PhysRefs;
+ SmallSet<MCRegister, 8> PhysRefs;
PhysDefVector PhysDefs;
bool PhysUseDef = false;
if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
@@ -640,7 +640,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// Actually perform the elimination.
if (DoCSE) {
- for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
+ for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
unsigned OldReg = CSEPair.first;
unsigned NewReg = CSEPair.second;
// OldReg may have been unused but is used now, clear the Dead flag
@@ -656,7 +656,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// we should make sure it is not dead at CSMI.
for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
- for (auto PhysDef : PhysDefs)
+ for (const auto &PhysDef : PhysDefs)
if (!MI->getOperand(PhysDef.first).isDead())
CSMI->getOperand(PhysDef.first).setIsDead(false);
@@ -748,8 +748,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
Node = WorkList.pop_back_val();
Scopes.push_back(Node);
OpenChildren[Node] = Node->getNumChildren();
- for (MachineDomTreeNode *Child : Node->children())
- WorkList.push_back(Child);
+ append_range(WorkList, Node->children());
} while (!WorkList.empty());
// Now perform CSE.
@@ -777,11 +776,11 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) {
MI->getNumExplicitDefs() != 1)
return false;
- for (auto def : MI->defs())
+ for (const auto &def : MI->defs())
if (!Register::isVirtualRegister(def.getReg()))
return false;
- for (auto use : MI->uses())
+ for (const auto &use : MI->uses())
if (use.isReg() && !Register::isVirtualRegister(use.getReg()))
return false;
@@ -861,8 +860,7 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
BBs.push_back(DT->getRootNode());
do {
auto Node = BBs.pop_back_val();
- for (MachineDomTreeNode *Child : Node->children())
- BBs.push_back(Child);
+ append_range(BBs, Node->children());
MachineBasicBlock *MBB = Node->getBlock();
Changed |= ProcessBlockPRE(DT, MBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
new file mode 100644
index 000000000000..bd7f0f862947
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
@@ -0,0 +1,126 @@
+//===- MachineCheckDebugify.cpp - Check debug info ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This checks debug info after mir-debugify (+ pass-to-test). Currently
+/// it simply checks the integrity of line info in DILocation and
+/// DILocalVariable which mir-debugifiy generated before.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Debugify.h"
+
+#define DEBUG_TYPE "mir-check-debugify"
+
+using namespace llvm;
+
+namespace {
+
+struct CheckDebugMachineModule : public ModulePass {
+ bool runOnModule(Module &M) override {
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
+ NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify");
+ if (!NMD) {
+ errs() << "WARNING: Please run mir-debugify to generate "
+ "llvm.mir.debugify metadata first.\n";
+ return false;
+ }
+
+ auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
+ return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
+ ->getZExtValue();
+ };
+ assert(NMD->getNumOperands() == 2 &&
+ "llvm.mir.debugify should have exactly 2 operands!");
+ unsigned NumLines = getDebugifyOperand(0);
+ unsigned NumVars = getDebugifyOperand(1);
+ BitVector MissingLines{NumLines, true};
+ BitVector MissingVars{NumVars, true};
+
+ for (Function &F : M.functions()) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+ if (!MF)
+ continue;
+ for (MachineBasicBlock &MBB : *MF) {
+ // Find missing lines.
+ // TODO: Avoid meta instructions other than dbg_val.
+ for (MachineInstr &MI : MBB) {
+ if (MI.isDebugValue())
+ continue;
+ const DebugLoc DL = MI.getDebugLoc();
+ if (DL && DL.getLine() != 0) {
+ MissingLines.reset(DL.getLine() - 1);
+ continue;
+ }
+
+ if (!DL) {
+ errs() << "WARNING: Instruction with empty DebugLoc in function ";
+ errs() << F.getName() << " --";
+ MI.print(errs());
+ }
+ }
+
+ // Find missing variables.
+ // TODO: Handle DBG_INSTR_REF which is under an experimental option now.
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isDebugValue())
+ continue;
+ const DILocalVariable *LocalVar = MI.getDebugVariable();
+ unsigned Var = ~0U;
+
+ (void)to_integer(LocalVar->getName(), Var, 10);
+ assert(Var <= NumVars && "Unexpected name for DILocalVariable");
+ MissingVars.reset(Var - 1);
+ }
+ }
+ }
+
+ bool Fail = false;
+ for (unsigned Idx : MissingLines.set_bits()) {
+ errs() << "WARNING: Missing line " << Idx + 1 << "\n";
+ Fail = true;
+ }
+
+ for (unsigned Idx : MissingVars.set_bits()) {
+ errs() << "WARNING: Missing variable " << Idx + 1 << "\n";
+ Fail = true;
+ }
+ errs() << "Machine IR debug info check: ";
+ errs() << (Fail ? "FAIL" : "PASS") << "\n";
+
+ return false;
+ }
+
+ CheckDebugMachineModule() : ModulePass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ static char ID; // Pass identification.
+};
+char CheckDebugMachineModule::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(CheckDebugMachineModule, DEBUG_TYPE,
+ "Machine Check Debug Module", false, false)
+INITIALIZE_PASS_END(CheckDebugMachineModule, DEBUG_TYPE,
+ "Machine Check Debug Module", false, false)
+
+ModulePass *llvm::createCheckDebugMachineModulePass() {
+ return new CheckDebugMachineModule();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
index f241435a0482..e2b6cfe55c16 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
@@ -72,6 +73,7 @@ class MachineCombiner : public MachineFunctionPass {
MachineTraceMetrics::Ensemble *MinInstr;
MachineBlockFrequencyInfo *MBFI;
ProfileSummaryInfo *PSI;
+ RegisterClassInfo RegClassInfo;
TargetSchedModel TSchedModel;
@@ -103,6 +105,10 @@ private:
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
MachineCombinerPattern Pattern, bool SlackIsAccurate);
+ bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineCombinerPattern Pattern);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -257,8 +263,9 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
/// The combiner's goal may differ based on which pattern it is attempting
/// to optimize.
enum class CombinerObjective {
- MustReduceDepth, // The data dependency chain must be improved.
- Default // The critical path must not be lengthened.
+ MustReduceDepth, // The data dependency chain must be improved.
+ MustReduceRegisterPressure, // The register pressure must be reduced.
+ Default // The critical path must not be lengthened.
};
static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
@@ -272,6 +279,9 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
return CombinerObjective::MustReduceDepth;
+ case MachineCombinerPattern::REASSOC_XY_BCA:
+ case MachineCombinerPattern::REASSOC_XY_BAC:
+ return CombinerObjective::MustReduceRegisterPressure;
default:
return CombinerObjective::Default;
}
@@ -300,6 +310,18 @@ std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
return {NewRootLatency, RootLatency};
}
+bool MachineCombiner::reduceRegisterPressure(
+ MachineInstr &Root, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineCombinerPattern Pattern) {
+ // FIXME: for now, we don't do any check for the register pressure patterns.
+ // We treat them as always profitable. But we can do better if we make
+ // RegPressureTracker class be aware of TIE attribute. Then we can get an
+ // accurate compare of register pressure with DelInstrs or InsInstrs.
+ return true;
+}
+
/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
/// The new code sequence ends in MI NewRoot. A necessary condition for the new
/// sequence to replace the old sequence is that it cannot lengthen the critical
@@ -438,6 +460,8 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
/// \param DelInstrs instruction to delete from \p MBB
/// \param MinInstr is a pointer to the machine trace information
/// \param RegUnits set of live registers, needed to compute instruction depths
+/// \param TII is target instruction info, used to call target hook
+/// \param Pattern is used to call target hook finalizeInsInstrs
/// \param IncrementalUpdate if true, compute instruction depths incrementally,
/// otherwise invalidate the trace
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
@@ -445,7 +469,18 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
SmallVector<MachineInstr *, 16> DelInstrs,
MachineTraceMetrics::Ensemble *MinInstr,
SparseSet<LiveRegUnit> &RegUnits,
+ const TargetInstrInfo *TII,
+ MachineCombinerPattern Pattern,
bool IncrementalUpdate) {
+ // If we want to fix up some placeholder for some target, do it now.
+ // We need this because in genAlternativeCodeSequence, we have not decided the
+ // better pattern InsInstrs or DelInstrs, so we don't want generate some
+ // sideeffect to the function. For example we need to delay the constant pool
+ // entry creation here after InsInstrs is selected as better pattern.
+ // Otherwise the constant pool entry created for InsInstrs will not be deleted
+ // even if InsInstrs is not the better pattern.
+ TII->finalizeInsInstrs(MI, Pattern, InsInstrs);
+
for (auto *InstrPtr : InsInstrs)
MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
@@ -522,6 +557,9 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
+ bool DoRegPressureReduce =
+ TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
+
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
SmallVector<MachineCombinerPattern, 16> Patterns;
@@ -552,7 +590,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// machine-combiner-verify-pattern-order is enabled, all patterns are
// checked to ensure later patterns do not provide better latency savings.
- if (!TII->getMachineCombinerPatterns(MI, Patterns))
+ if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce))
continue;
if (VerifyPatternOrder)
@@ -588,12 +626,33 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (ML && TII->isThroughputPattern(P))
SubstituteAlways = true;
- if (IncrementalUpdate) {
+ if (IncrementalUpdate && LastUpdate != BlockIter) {
// Update depths since the last incremental update.
MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
LastUpdate = BlockIter;
}
+ if (DoRegPressureReduce &&
+ getCombinerObjective(P) ==
+ CombinerObjective::MustReduceRegisterPressure) {
+ if (MBB->size() > inc_threshold) {
+ // Use incremental depth updates for basic blocks above threshold
+ IncrementalUpdate = true;
+ LastUpdate = BlockIter;
+ }
+ if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
+ // Replace DelInstrs with InsInstrs.
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ RegUnits, TII, P, IncrementalUpdate);
+ Changed |= true;
+
+ // Go back to previous instruction as it may have ILP reassociation
+ // opportunity.
+ BlockIter--;
+ break;
+ }
+ }
+
// Substitute when we optimize for codesize and the new sequence has
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
@@ -601,7 +660,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (SubstituteAlways ||
doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
- RegUnits, IncrementalUpdate);
+ RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
break;
@@ -624,7 +683,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
- RegUnits, IncrementalUpdate);
+ RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
@@ -660,6 +719,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
nullptr;
MinInstr = nullptr;
OptSize = MF.getFunction().hasOptSize();
+ RegClassInfo.runOnMachineFunction(MF);
LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 4c4839ca6522..d8659c1c7853 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -88,18 +88,18 @@ namespace {
class CopyTracker {
struct CopyInfo {
MachineInstr *MI;
- SmallVector<unsigned, 4> DefRegs;
+ SmallVector<MCRegister, 4> DefRegs;
bool Avail;
};
- DenseMap<unsigned, CopyInfo> Copies;
+ DenseMap<MCRegister, CopyInfo> Copies;
public:
/// Mark all of the given registers and their subregisters as unavailable for
/// copying.
- void markRegsUnavailable(ArrayRef<unsigned> Regs,
+ void markRegsUnavailable(ArrayRef<MCRegister> Regs,
const TargetRegisterInfo &TRI) {
- for (unsigned Reg : Regs) {
+ for (MCRegister Reg : Regs) {
// Source of copy is no longer available for propagation.
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
auto CI = Copies.find(*RUI);
@@ -110,30 +110,30 @@ public:
}
/// Remove register from copy maps.
- void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+ void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI) {
// Since Reg might be a subreg of some registers, only invalidate Reg is not
// enough. We have to find the COPY defines Reg or registers defined by Reg
// and invalidate all of them.
- SmallSet<unsigned, 8> RegsToInvalidate;
+ SmallSet<MCRegister, 8> RegsToInvalidate;
RegsToInvalidate.insert(Reg);
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
if (MachineInstr *MI = I->second.MI) {
- RegsToInvalidate.insert(MI->getOperand(0).getReg());
- RegsToInvalidate.insert(MI->getOperand(1).getReg());
+ RegsToInvalidate.insert(MI->getOperand(0).getReg().asMCReg());
+ RegsToInvalidate.insert(MI->getOperand(1).getReg().asMCReg());
}
RegsToInvalidate.insert(I->second.DefRegs.begin(),
I->second.DefRegs.end());
}
}
- for (unsigned InvalidReg : RegsToInvalidate)
+ for (MCRegister InvalidReg : RegsToInvalidate)
for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI)
Copies.erase(*RUI);
}
/// Clobber a single register, removing it from the tracker's copy maps.
- void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+ void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI) {
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
@@ -143,7 +143,7 @@ public:
// When we clobber the destination of a copy, we need to clobber the
// whole register it defined.
if (MachineInstr *MI = I->second.MI)
- markRegsUnavailable({MI->getOperand(0).getReg()}, TRI);
+ markRegsUnavailable({MI->getOperand(0).getReg().asMCReg()}, TRI);
// Now we can erase the copy.
Copies.erase(I);
}
@@ -154,8 +154,8 @@ public:
void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) {
assert(MI->isCopy() && "Tracking non-copy?");
- Register Def = MI->getOperand(0).getReg();
- Register Src = MI->getOperand(1).getReg();
+ MCRegister Def = MI->getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI->getOperand(1).getReg().asMCReg();
// Remember Def is defined by the copy.
for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
@@ -175,8 +175,9 @@ public:
return !Copies.empty();
}
- MachineInstr *findCopyForUnit(unsigned RegUnit, const TargetRegisterInfo &TRI,
- bool MustBeAvailable = false) {
+ MachineInstr *findCopyForUnit(MCRegister RegUnit,
+ const TargetRegisterInfo &TRI,
+ bool MustBeAvailable = false) {
auto CI = Copies.find(RegUnit);
if (CI == Copies.end())
return nullptr;
@@ -185,8 +186,8 @@ public:
return CI->second.MI;
}
- MachineInstr *findCopyDefViaUnit(unsigned RegUnit,
- const TargetRegisterInfo &TRI) {
+ MachineInstr *findCopyDefViaUnit(MCRegister RegUnit,
+ const TargetRegisterInfo &TRI) {
auto CI = Copies.find(RegUnit);
if (CI == Copies.end())
return nullptr;
@@ -196,7 +197,7 @@ public:
return findCopyForUnit(*RUI, TRI, true);
}
- MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg,
+ MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg,
const TargetRegisterInfo &TRI) {
MCRegUnitIterator RUI(Reg, &TRI);
MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
@@ -217,7 +218,7 @@ public:
return AvailCopy;
}
- MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
+ MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg,
const TargetRegisterInfo &TRI) {
// We check the first RegUnit here, since we'll only be interested in the
// copy if it copies the entire register anyway.
@@ -274,12 +275,10 @@ public:
private:
typedef enum { DebugUse = false, RegularUse = true } DebugType;
- void ClobberRegister(unsigned Reg);
- void ReadRegister(unsigned Reg, MachineInstr &Reader,
- DebugType DT);
+ void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
- bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
+ bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def);
void forwardUses(MachineInstr &MI);
void propagateDefs(MachineInstr &MI);
bool isForwardableRegClassCopy(const MachineInstr &Copy,
@@ -288,6 +287,8 @@ private:
const MachineInstr &UseI,
unsigned UseIdx);
bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
+ bool hasOverlappingMultipleDef(const MachineInstr &MI,
+ const MachineOperand &MODef, Register Def);
/// Candidates for deletion.
SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
@@ -309,7 +310,7 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
"Machine Copy Propagation Pass", false, false)
-void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader,
+void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
DebugType DT) {
// If 'Reg' is defined by a copy, the copy is no longer a candidate
// for elimination. If a copy is "read" by a debug user, record the user
@@ -332,10 +333,10 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader,
/// PreviousCopy. e.g.
/// isNopCopy("ecx = COPY eax", AX, CX) == true
/// isNopCopy("ecx = COPY eax", AH, CL) == false
-static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
- unsigned Def, const TargetRegisterInfo *TRI) {
- Register PreviousSrc = PreviousCopy.getOperand(1).getReg();
- Register PreviousDef = PreviousCopy.getOperand(0).getReg();
+static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src,
+ MCRegister Def, const TargetRegisterInfo *TRI) {
+ MCRegister PreviousSrc = PreviousCopy.getOperand(1).getReg().asMCReg();
+ MCRegister PreviousDef = PreviousCopy.getOperand(0).getReg().asMCReg();
if (Src == PreviousSrc && Def == PreviousDef)
return true;
if (!TRI->isSubRegister(PreviousSrc, Src))
@@ -347,8 +348,8 @@ static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
/// Remove instruction \p Copy if there exists a previous copy that copies the
/// register \p Src to the register \p Def; This may happen indirectly by
/// copying the super registers.
-bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
- unsigned Def) {
+bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
+ MCRegister Src, MCRegister Def) {
// Avoid eliminating a copy from/to a reserved registers as we cannot predict
// the value (Example: The sparc zero register is writable but stays zero).
if (MRI->isReserved(Src) || MRI->isReserved(Def))
@@ -459,6 +460,21 @@ bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
return false;
}
+/// For an MI that has multiple definitions, check whether \p MI has
+/// a definition that overlaps with another of its definitions.
+/// For example, on ARM: umull r9, r9, lr, r0
+/// The umull instruction is unpredictable unless RdHi and RdLo are different.
+bool MachineCopyPropagation::hasOverlappingMultipleDef(
+ const MachineInstr &MI, const MachineOperand &MODef, Register Def) {
+ for (const MachineOperand &MIDef : MI.defs()) {
+ if ((&MIDef != &MODef) && MIDef.isReg() &&
+ TRI->regsOverlap(Def, MIDef.getReg()))
+ return true;
+ }
+
+ return false;
+}
+
/// Look for available copies whose destination register is used by \p MI and
/// replace the use in \p MI with the copy's source register.
void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
@@ -489,7 +505,8 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (!MOUse.isRenamable())
continue;
- MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg(), *TRI);
+ MachineInstr *Copy =
+ Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), *TRI);
if (!Copy)
continue;
@@ -561,13 +578,13 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Analyze copies (which don't overlap themselves).
if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(),
MI->getOperand(1).getReg())) {
- Register Def = MI->getOperand(0).getReg();
- Register Src = MI->getOperand(1).getReg();
-
- assert(!Register::isVirtualRegister(Def) &&
- !Register::isVirtualRegister(Src) &&
+ assert(MI->getOperand(0).getReg().isPhysical() &&
+ MI->getOperand(1).getReg().isPhysical() &&
"MachineCopyPropagation should be run after register allocation!");
+ MCRegister Def = MI->getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI->getOperand(1).getReg().asMCReg();
+
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
// %ecx = COPY %eax
@@ -589,7 +606,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
forwardUses(*MI);
// Src may have been changed by forwardUses()
- Src = MI->getOperand(1).getReg();
+ Src = MI->getOperand(1).getReg().asMCReg();
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
@@ -597,7 +614,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.readsReg())
continue;
- Register Reg = MO.getReg();
+ MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
ReadRegister(Reg, *MI, RegularUse);
@@ -620,7 +637,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- Register Reg = MO.getReg();
+ MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
Tracker.clobberRegister(Reg, *TRI);
@@ -634,7 +651,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Clobber any earlyclobber regs first.
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isEarlyClobber()) {
- Register Reg = MO.getReg();
+ MCRegister Reg = MO.getReg().asMCReg();
// If we have a tied earlyclobber, that means it is also read by this
// instruction, so we need to make sure we don't remove it as dead
// later.
@@ -646,7 +663,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
forwardUses(*MI);
// Not a copy.
- SmallVector<unsigned, 2> Defs;
+ SmallVector<Register, 2> Defs;
const MachineOperand *RegMask = nullptr;
for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask())
@@ -657,14 +674,14 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
if (!Reg)
continue;
- assert(!Register::isVirtualRegister(Reg) &&
+ assert(!Reg.isVirtual() &&
"MachineCopyPropagation should be run after register allocation!");
if (MO.isDef() && !MO.isEarlyClobber()) {
- Defs.push_back(Reg);
+ Defs.push_back(Reg.asMCReg());
continue;
} else if (MO.readsReg())
- ReadRegister(Reg, *MI, MO.isDebug() ? DebugUse : RegularUse);
+ ReadRegister(Reg.asMCReg(), *MI, MO.isDebug() ? DebugUse : RegularUse);
}
// The instruction has a register mask operand which means that it clobbers
@@ -676,7 +693,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
MaybeDeadCopies.begin();
DI != MaybeDeadCopies.end();) {
MachineInstr *MaybeDead = *DI;
- Register Reg = MaybeDead->getOperand(0).getReg();
+ MCRegister Reg = MaybeDead->getOperand(0).getReg().asMCReg();
assert(!MRI->isReserved(Reg));
if (!RegMask->clobbersPhysReg(Reg)) {
@@ -701,7 +718,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
}
// Any previous copy definition or reading the Defs is no longer available.
- for (unsigned Reg : Defs)
+ for (MCRegister Reg : Defs)
Tracker.clobberRegister(Reg, *TRI);
}
@@ -716,7 +733,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Update matching debug values, if any.
assert(MaybeDead->isCopy());
- unsigned SrcReg = MaybeDead->getOperand(1).getReg();
+ Register SrcReg = MaybeDead->getOperand(1).getReg();
MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]);
MaybeDead->eraseFromParent();
@@ -768,7 +785,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
continue;
MachineInstr *Copy =
- Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI);
+ Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI);
if (!Copy)
continue;
@@ -784,6 +801,9 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
if (hasImplicitOverlap(MI, MODef))
continue;
+ if (hasOverlappingMultipleDef(MI, MODef, Def))
+ continue;
+
LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
<< "\n with " << printReg(Def, TRI) << "\n in "
<< MI << " from " << *Copy);
@@ -813,8 +833,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
!TRI->regsOverlap(MI->getOperand(0).getReg(),
MI->getOperand(1).getReg())) {
- Register Def = MI->getOperand(0).getReg();
- Register Src = MI->getOperand(1).getReg();
+ MCRegister Def = MI->getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI->getOperand(1).getReg().asMCReg();
// Unlike forward cp, we don't invoke propagateDefs here,
// just let forward cp do COPY-to-COPY propagation.
@@ -829,7 +849,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
// Invalidate any earlyclobber regs first.
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isEarlyClobber()) {
- Register Reg = MO.getReg();
+ MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
Tracker.invalidateRegister(Reg, *TRI);
@@ -844,10 +864,10 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
continue;
if (MO.isDef())
- Tracker.invalidateRegister(MO.getReg(), *TRI);
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
if (MO.readsReg())
- Tracker.invalidateRegister(MO.getReg(), *TRI);
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
index bf57ec0e8c28..599a81847592 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -89,10 +90,11 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
// Do this by introducing debug uses of each register definition. If that is
// not possible (e.g. we have a phi or a meta instruction), emit a constant.
uint64_t NextImm = 0;
+ SmallSet<DILocalVariable *, 16> VarSet;
const MCInstrDesc &DbgValDesc = TII.get(TargetOpcode::DBG_VALUE);
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator FirstNonPHIIt = MBB.getFirstNonPHI();
- for (auto I = MBB.begin(), E = MBB.end(); I != E; ) {
+ for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
MachineInstr &MI = *I;
++I;
@@ -113,6 +115,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
Line = EarliestDVI->getDebugLoc().getLine();
DILocalVariable *LocalVar = Line2Var[Line];
assert(LocalVar && "No variable for current line?");
+ VarSet.insert(LocalVar);
// Emit DBG_VALUEs for register definitions.
SmallVector<MachineOperand *, 4> RegDefs;
@@ -132,6 +135,33 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
}
}
+ // Here we save the number of lines and variables into "llvm.mir.debugify".
+ // It is useful for mir-check-debugify.
+ NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify");
+ IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+ if (!NMD) {
+ NMD = M.getOrInsertNamedMetadata("llvm.mir.debugify");
+ auto addDebugifyOperand = [&](unsigned N) {
+ NMD->addOperand(MDNode::get(
+ Ctx, ValueAsMetadata::getConstant(ConstantInt::get(Int32Ty, N))));
+ };
+ // Add number of lines.
+ addDebugifyOperand(NextLine - 1);
+ // Add number of variables.
+ addDebugifyOperand(VarSet.size());
+ } else {
+ assert(NMD->getNumOperands() == 2 &&
+ "llvm.mir.debugify should have exactly 2 operands!");
+ auto setDebugifyOperand = [&](unsigned Idx, unsigned N) {
+ NMD->setOperand(Idx, MDNode::get(Ctx, ValueAsMetadata::getConstant(
+ ConstantInt::get(Int32Ty, N))));
+ };
+ // Set number of lines.
+ setDebugifyOperand(0, NextLine - 1);
+ // Set number of variables.
+ setDebugifyOperand(1, VarSet.size());
+ }
+
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 6d45f08804ed..3f44578b1a2c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -273,20 +273,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
}
DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const {
- if (&FPType == &APFloat::IEEEsingle()) {
- Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
- StringRef Val = Attr.getValueAsString();
- if (!Val.empty())
- return parseDenormalFPAttribute(Val);
-
- // If the f32 variant of the attribute isn't specified, try to use the
- // generic one.
- }
-
- // TODO: Should probably avoid the connection to the IR and store directly
- // in the MachineFunction.
- Attribute Attr = F.getFnAttribute("denormal-fp-math");
- return parseDenormalFPAttribute(Attr.getValueAsString());
+ return F.getDenormalMode(FPType);
}
/// Should we be emitting segmented stack stuff for the function
@@ -341,33 +328,6 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
MBBNumbering.resize(BlockNo);
}
-/// This is used with -fbasic-block-sections or -fbasicblock-labels option.
-/// A unary encoding of basic block labels is done to keep ".strtab" sizes
-/// small.
-void MachineFunction::createBBLabels() {
- const TargetInstrInfo *TII = getSubtarget().getInstrInfo();
- this->BBSectionsSymbolPrefix.resize(getNumBlockIDs(), 'a');
- for (auto MBBI = begin(), E = end(); MBBI != E; ++MBBI) {
- assert(
- (MBBI->getNumber() >= 0 && MBBI->getNumber() < (int)getNumBlockIDs()) &&
- "BasicBlock number was out of range!");
- // 'a' - Normal block.
- // 'r' - Return block.
- // 'l' - Landing Pad.
- // 'L' - Return and landing pad.
- bool isEHPad = MBBI->isEHPad();
- bool isRetBlock = MBBI->isReturnBlock() && !TII->isTailCall(MBBI->back());
- char type = 'a';
- if (isEHPad && isRetBlock)
- type = 'L';
- else if (isEHPad)
- type = 'l';
- else if (isRetBlock)
- type = 'r';
- BBSectionsSymbolPrefix[MBBI->getNumber()] = type;
- }
-}
-
/// This method iterates over the basic blocks and assigns their IsBeginSection
/// and IsEndSection fields. This must be called after MBB layout is finalized
/// and the SectionID's are assigned to MBBs.
@@ -387,9 +347,9 @@ void MachineFunction::assignBeginEndSections() {
/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'.
MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
const DebugLoc &DL,
- bool NoImp) {
+ bool NoImplicit) {
return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
- MachineInstr(*this, MCID, DL, NoImp);
+ MachineInstr(*this, MCID, DL, NoImplicit);
}
/// Create a new MachineInstr which is a copy of the 'Orig' instruction,
@@ -460,6 +420,9 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
void
MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
assert(MBB->getParent() == this && "MBB parent mismatch!");
+ // Clean up any references to MBB in jump tables before deleting it.
+ if (JumpTableInfo)
+ JumpTableInfo->RemoveMBBFromJumpTables(MBB);
MBB->~MachineBasicBlock();
BasicBlockRecycler.Deallocate(Allocator, MBB);
}
@@ -474,6 +437,13 @@ MachineMemOperand *MachineFunction::getMachineMemOperand(
SSID, Ordering, FailureOrdering);
}
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) {
+ return new (Allocator) MachineMemOperand(
+ PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(), AAMDNodes(), nullptr,
+ MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering());
+}
+
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
@@ -485,9 +455,11 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
? commonAlignment(MMO->getBaseAlign(), Offset)
: MMO->getBaseAlign();
+ // Do not preserve ranges, since we don't necessarily know what the high bits
+ // are anymore.
return new (Allocator)
MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size,
- Alignment, AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ Alignment, MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(),
MMO->getOrdering(), MMO->getFailureOrdering());
}
@@ -896,7 +868,7 @@ try_next:;
// Add the new filter.
int FilterID = -(1 + FilterIds.size());
FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
- FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
+ llvm::append_range(FilterIds, TyIds);
FilterEnds.push_back(FilterIds.size());
FilterIds.push_back(0); // terminator
return FilterID;
@@ -974,6 +946,46 @@ void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
CallSitesInfo[New] = CSInfo;
}
+void MachineFunction::setDebugInstrNumberingCount(unsigned Num) {
+ DebugInstrNumberingCount = Num;
+}
+
+void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A,
+ DebugInstrOperandPair B) {
+ auto Result = DebugValueSubstitutions.insert(std::make_pair(A, B));
+ (void)Result;
+ assert(Result.second && "Substitution for an already substituted value?");
+}
+
+void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old,
+ MachineInstr &New,
+ unsigned MaxOperand) {
+ // If the Old instruction wasn't tracked at all, there is no work to do.
+ unsigned OldInstrNum = Old.peekDebugInstrNum();
+ if (!OldInstrNum)
+ return;
+
+ // Iterate over all operands looking for defs to create substitutions for.
+ // Avoid creating new instr numbers unless we create a new substitution.
+ // While this has no functional effect, it risks confusing someone reading
+ // MIR output.
+ // Examine all the operands, or the first N specified by the caller.
+ MaxOperand = std::min(MaxOperand, Old.getNumOperands());
+ for (unsigned int I = 0; I < Old.getNumOperands(); ++I) {
+ const auto &OldMO = Old.getOperand(I);
+ auto &NewMO = New.getOperand(I);
+ (void)NewMO;
+
+ if (!OldMO.isReg() || !OldMO.isDef())
+ continue;
+ assert(NewMO.isDef());
+
+ unsigned NewInstrNum = New.getDebugInstrNum();
+ makeDebugValueSubstitution(std::make_pair(OldInstrNum, I),
+ std::make_pair(NewInstrNum, I));
+ }
+}
+
/// \}
//===----------------------------------------------------------------------===//
@@ -1038,6 +1050,17 @@ bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
return MadeChange;
}
+/// If MBB is present in any jump tables, remove it.
+bool MachineJumpTableInfo::RemoveMBBFromJumpTables(MachineBasicBlock *MBB) {
+ bool MadeChange = false;
+ for (MachineJumpTableEntry &JTE : JumpTables) {
+ auto removeBeginItr = std::remove(JTE.MBBs.begin(), JTE.MBBs.end(), MBB);
+ MadeChange |= (removeBeginItr != JTE.MBBs.end());
+ JTE.MBBs.erase(removeBeginItr, JTE.MBBs.end());
+ }
+ return MadeChange;
+}
+
/// If Old is a target of the jump tables, update the jump table to branch to
/// New instead.
bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
@@ -1084,10 +1107,14 @@ Printable llvm::printJumpTableEntryReference(unsigned Idx) {
void MachineConstantPoolValue::anchor() {}
-Type *MachineConstantPoolEntry::getType() const {
+unsigned MachineConstantPoolValue::getSizeInBytes(const DataLayout &DL) const {
+ return DL.getTypeAllocSize(Ty);
+}
+
+unsigned MachineConstantPoolEntry::getSizeInBytes(const DataLayout &DL) const {
if (isMachineConstantPoolEntry())
- return Val.MachineCPVal->getType();
- return Val.ConstVal->getType();
+ return Val.MachineCPVal->getSizeInBytes(DL);
+ return DL.getTypeAllocSize(Val.ConstVal->getType());
}
bool MachineConstantPoolEntry::needsRelocation() const {
@@ -1100,7 +1127,7 @@ SectionKind
MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
if (needsRelocation())
return SectionKind::getReadOnlyWithRel();
- switch (DL->getTypeAllocSize(getType())) {
+ switch (getSizeInBytes(*DL)) {
case 4:
return SectionKind::getMergeableConst4();
case 8:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 03149aa7db4a..16cde1f601f9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/GlobalsModRef.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 3645a4e3466b..c31c065b1976 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -14,7 +14,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -44,7 +44,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- if (!llvm::isFunctionInPrintList(MF.getName()))
+ if (!isFunctionInPrintList(MF.getName()))
return false;
OS << "# " << Banner << ":\n";
MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
new file mode 100644
index 000000000000..483809a8ed96
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -0,0 +1,155 @@
+//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// Uses profile information to split out cold blocks.
+//
+// This pass splits out cold machine basic blocks from the parent function. This
+// implementation leverages the basic block section framework. Blocks marked
+// cold by this pass are grouped together in a separate section prefixed with
+// ".text.unlikely.*". The linker can then group these together as a cold
+// section. The split part of the function is a contiguous region identified by
+// the symbol "foo.cold". Grouping all cold blocks across functions together
+// decreases fragmentation and improves icache and itlb utilization. Note that
+// the overall changes to the binary size are negligible; only a small number of
+// additional jump instructions may be introduced.
+//
+// For the original RFC of this pass please see
+// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+// FIXME: This cutoff value is CPU dependent and should be moved to
+// TargetTransformInfo once we consider enabling this on other platforms.
+// The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
+// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
+// The default was empirically determined to be optimal when considering cutoff
+// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
+// Intel CPUs.
+static cl::opt<unsigned>
+ PercentileCutoff("mfs-psi-cutoff",
+ cl::desc("Percentile profile summary cutoff used to "
+ "determine cold blocks. Unused if set to zero."),
+ cl::init(999950), cl::Hidden);
+
+static cl::opt<unsigned> ColdCountThreshold(
+ "mfs-count-threshold",
+ cl::desc(
+ "Minimum number of times a block must be executed to be retained."),
+ cl::init(1), cl::Hidden);
+
+namespace {
+
+class MachineFunctionSplitter : public MachineFunctionPass {
+public:
+ static char ID;
+ MachineFunctionSplitter() : MachineFunctionPass(ID) {
+ initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Machine Function Splitter Transformation";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+} // end anonymous namespace
+
+static bool isColdBlock(MachineBasicBlock &MBB,
+ const MachineBlockFrequencyInfo *MBFI,
+ ProfileSummaryInfo *PSI) {
+ Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
+ if (!Count.hasValue())
+ return true;
+
+ if (PercentileCutoff > 0) {
+ return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
+ }
+ return (*Count < ColdCountThreshold);
+}
+
+bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
+ // TODO: We only target functions with profile data. Static information may
+ // also be considered but we don't see performance improvements yet.
+ if (!MF.getFunction().hasProfileData())
+ return false;
+
+ // TODO: We don't split functions where a section attribute has been set
+ // since the split part may not be placed in a contiguous region. It may also
+ // be more beneficial to augment the linker to ensure contiguous layout of
+ // split functions within the same section as specified by the attribute.
+ if (!MF.getFunction().getSection().empty())
+ return false;
+
+ // We don't want to proceed further for cold functions
+ // or functions of unknown hotness. Lukewarm functions have no prefix.
+ Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
+ if (SectionPrefix.hasValue() &&
+ (SectionPrefix.getValue().equals("unlikely") ||
+ SectionPrefix.getValue().equals("unknown"))) {
+ return false;
+ }
+
+ // Renumbering blocks here preserves the order of the blocks as
+ // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
+ // blocks. Preserving the order of blocks is essential to retaining decisions
+ // made by prior passes such as MachineBlockPlacement.
+ MF.RenumberBlocks();
+ MF.setBBSectionsType(BasicBlockSection::Preset);
+ auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+ for (auto &MBB : MF) {
+ // FIXME: We retain the entry block and conservatively keep all landing pad
+ // blocks as part of the original function. Once D73739 is submitted, we can
+ // improve the handling of ehpads.
+ if ((MBB.pred_empty() || MBB.isEHPad()))
+ continue;
+ if (isColdBlock(MBB, MBFI, PSI))
+ MBB.setSectionID(MBBSectionID::ColdSectionID);
+ }
+
+ auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
+ return X.getSectionID().Type < Y.getSectionID().Type;
+ };
+ llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
+
+ return true;
+}
+
+void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+}
+
+char MachineFunctionSplitter::ID = 0;
+INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
+ "Split machine functions using profile information", false,
+ false)
+
+MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
+ return new MachineFunctionSplitter();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index d4181591deab..b6cfd7dcbfbc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -116,7 +117,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// the MCInstrDesc.
MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
DebugLoc dl, bool NoImp)
- : MCID(&tid), debugLoc(std::move(dl)) {
+ : MCID(&tid), debugLoc(std::move(dl)), DebugInstrNum(0) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
@@ -130,10 +131,12 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
addImplicitDefUseOperands(MF);
}
-/// MachineInstr ctor - Copies MachineInstr arg exactly
-///
+/// MachineInstr ctor - Copies MachineInstr arg exactly.
+/// Does not copy the number from debug instruction numbering, to preserve
+/// uniqueness.
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()) {
+ : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()),
+ DebugInstrNum(0) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -147,6 +150,10 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
setFlags(MI.Flags);
}
+void MachineInstr::moveBefore(MachineInstr *MovePos) {
+ MovePos->getParent()->splice(MovePos, getParent(), getIterator());
+}
+
/// getRegInfo - If this instruction is embedded into a MachineFunction,
/// return the MachineRegisterInfo object for the current function, otherwise
/// return null.
@@ -701,11 +708,10 @@ bool MachineInstr::isCandidateForCallSiteEntry(QueryType Type) const {
if (!isCall(Type))
return false;
switch (getOpcode()) {
- case TargetOpcode::PATCHABLE_EVENT_CALL:
- case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
case TargetOpcode::PATCHPOINT:
case TargetOpcode::STACKMAP:
case TargetOpcode::STATEPOINT:
+ case TargetOpcode::FENTRY_CALL:
return false;
}
return true;
@@ -835,27 +841,27 @@ const DILabel *MachineInstr::getDebugLabel() const {
}
const MachineOperand &MachineInstr::getDebugVariableOp() const {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return getOperand(2);
}
MachineOperand &MachineInstr::getDebugVariableOp() {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return getOperand(2);
}
const DILocalVariable *MachineInstr::getDebugVariable() const {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return cast<DILocalVariable>(getOperand(2).getMetadata());
}
MachineOperand &MachineInstr::getDebugExpressionOp() {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return getOperand(3);
}
const DIExpression *MachineInstr::getDebugExpression() const {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return cast<DIExpression>(getOperand(3).getMetadata());
}
@@ -1094,10 +1100,12 @@ void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) {
if (DefIdx < TiedMax)
UseMO.TiedTo = DefIdx + 1;
else {
- // Inline asm can use the group descriptors to find tied operands, but on
- // normal instruction, the tied def must be within the first TiedMax
+ // Inline asm can use the group descriptors to find tied operands,
+ // statepoint tied operands are trivial to match (1-1 reg def with reg use),
+ // but on normal instruction, the tied def must be within the first TiedMax
// operands.
- assert(isInlineAsm() && "DefIdx out of range");
+ assert((isInlineAsm() || getOpcode() == TargetOpcode::STATEPOINT) &&
+ "DefIdx out of range");
UseMO.TiedTo = TiedMax;
}
@@ -1117,7 +1125,7 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
return MO.TiedTo - 1;
// Uses on normal instructions can be out of range.
- if (!isInlineAsm()) {
+ if (!isInlineAsm() && getOpcode() != TargetOpcode::STATEPOINT) {
// Normal tied defs must be in the 0..TiedMax-1 range.
if (MO.isUse())
return TiedMax - 1;
@@ -1130,6 +1138,25 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
llvm_unreachable("Can't find tied use");
}
+ if (getOpcode() == TargetOpcode::STATEPOINT) {
+ // In STATEPOINT defs correspond 1-1 to GC pointer operands passed
+ // on registers.
+ StatepointOpers SO(this);
+ unsigned CurUseIdx = SO.getFirstGCPtrIdx();
+ assert(CurUseIdx != -1U && "only gc pointer statepoint operands can be tied");
+ unsigned NumDefs = getNumDefs();
+ for (unsigned CurDefIdx = 0; CurDefIdx < NumDefs; ++CurDefIdx) {
+ while (!getOperand(CurUseIdx).isReg())
+ CurUseIdx = StackMaps::getNextMetaArgIdx(this, CurUseIdx);
+ if (OpIdx == CurDefIdx)
+ return CurUseIdx;
+ if (OpIdx == CurUseIdx)
+ return CurDefIdx;
+ CurUseIdx = StackMaps::getNextMetaArgIdx(this, CurUseIdx);
+ }
+ llvm_unreachable("Can't find tied use");
+ }
+
// Now deal with inline asm by parsing the operand group descriptor flags.
// Find the beginning of each operand group.
SmallVector<unsigned, 8> GroupIdx;
@@ -1213,7 +1240,7 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const {
// See if this instruction does a load. If so, we have to guarantee that the
// loaded value doesn't change between the load and the its intended
- // destination. The check for isInvariantLoad gives the targe the chance to
+ // destination. The check for isInvariantLoad gives the target the chance to
// classify the load as always returning a constant, e.g. a constant pool
// load.
if (mayLoad() && !isDereferenceableInvariantLoad(AA))
@@ -1224,47 +1251,21 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const {
return true;
}
-bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
- bool UseTBAA) const {
- const MachineFunction *MF = getMF();
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- const MachineFrameInfo &MFI = MF->getFrameInfo();
-
- // If neither instruction stores to memory, they can't alias in any
- // meaningful way, even if they read from the same address.
- if (!mayStore() && !Other.mayStore())
- return false;
-
- // Both instructions must be memory operations to be able to alias.
- if (!mayLoadOrStore() || !Other.mayLoadOrStore())
- return false;
-
- // Let the target decide if memory accesses cannot possibly overlap.
- if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
- return false;
-
- // FIXME: Need to handle multiple memory operands to support all targets.
- if (!hasOneMemOperand() || !Other.hasOneMemOperand())
- return true;
-
- MachineMemOperand *MMOa = *memoperands_begin();
- MachineMemOperand *MMOb = *Other.memoperands_begin();
-
- // The following interface to AA is fashioned after DAGCombiner::isAlias
- // and operates with MachineMemOperand offset with some important
- // assumptions:
+static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
+ bool UseTBAA, const MachineMemOperand *MMOa,
+ const MachineMemOperand *MMOb) {
+ // The following interface to AA is fashioned after DAGCombiner::isAlias and
+ // operates with MachineMemOperand offset with some important assumptions:
// - LLVM fundamentally assumes flat address spaces.
- // - MachineOperand offset can *only* result from legalization and
- // cannot affect queries other than the trivial case of overlap
- // checking.
- // - These offsets never wrap and never step outside
- // of allocated objects.
+ // - MachineOperand offset can *only* result from legalization and cannot
+ // affect queries other than the trivial case of overlap checking.
+ // - These offsets never wrap and never step outside of allocated objects.
// - There should never be any negative offsets here.
//
// FIXME: Modify API to hide this math from "user"
- // Even before we go to AA we can reason locally about some
- // memory objects. It can save compile time, and possibly catch some
- // corner cases not currently covered.
+ // Even before we go to AA we can reason locally about some memory objects. It
+ // can save compile time, and possibly catch some corner cases not currently
+ // covered.
int64_t OffsetA = MMOa->getOffset();
int64_t OffsetB = MMOb->getOffset();
@@ -1306,20 +1307,63 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
- int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
- : MemoryLocation::UnknownSize;
- int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
- : MemoryLocation::UnknownSize;
+ int64_t OverlapA =
+ KnownWidthA ? WidthA + OffsetA - MinOffset : MemoryLocation::UnknownSize;
+ int64_t OverlapB =
+ KnownWidthB ? WidthB + OffsetB - MinOffset : MemoryLocation::UnknownSize;
AliasResult AAResult = AA->alias(
- MemoryLocation(ValA, OverlapA,
- UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
MemoryLocation(ValB, OverlapB,
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
return (AAResult != NoAlias);
}
+bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
+ bool UseTBAA) const {
+ const MachineFunction *MF = getMF();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+
+ // Exclude call instruction which may alter the memory but can not be handled
+ // by this function.
+ if (isCall() || Other.isCall())
+ return true;
+
+ // If neither instruction stores to memory, they can't alias in any
+ // meaningful way, even if they read from the same address.
+ if (!mayStore() && !Other.mayStore())
+ return false;
+
+ // Both instructions must be memory operations to be able to alias.
+ if (!mayLoadOrStore() || !Other.mayLoadOrStore())
+ return false;
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
+ return false;
+
+ // Memory operations without memory operands may access anything. Be
+ // conservative and assume `MayAlias`.
+ if (memoperands_empty() || Other.memoperands_empty())
+ return true;
+
+ // Skip if there are too many memory operands.
+ auto NumChecks = getNumMemOperands() * Other.getNumMemOperands();
+ if (NumChecks > TII->getMemOperandAACheckLimit())
+ return true;
+
+ // Check each pair of memory operands from both instructions, which can't
+ // alias only if all pairs won't alias.
+ for (auto *MMOa : memoperands())
+ for (auto *MMOb : Other.memoperands())
+ if (MemOperandsHaveAlias(MFI, AA, UseTBAA, MMOa, MMOb))
+ return true;
+
+ return false;
+}
+
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
/// or volatile memory reference, or if the information describing the memory
/// reference is not available. Return false if it is known to have no ordered
@@ -1418,7 +1462,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
}
bool MachineInstr::isLoadFoldBarrier() const {
- return mayStore() || isCall() || hasUnmodeledSideEffects();
+ return mayStore() || isCall() ||
+ (hasUnmodeledSideEffects() && !isPseudoProbe());
}
/// allDefsAreDead - Return true if all the defs of this instruction are dead.
@@ -1447,6 +1492,8 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
bool MachineInstr::hasComplexRegisterTies() const {
const MCInstrDesc &MCID = getDesc();
+ if (MCID.Opcode == TargetOpcode::STATEPOINT)
+ return true;
for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
const auto &Operand = getOperand(I);
if (!Operand.isReg() || Operand.isDef())
@@ -1753,6 +1800,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
HeapAllocMarker->printAsOperand(OS, MST);
}
+ if (DebugInstrNum) {
+ if (!FirstOp)
+ OS << ",";
+ OS << " debug-instr-number " << DebugInstrNum;
+ }
+
if (!SkipDebugLoc) {
if (const DebugLoc &DL = getDebugLoc()) {
if (!FirstOp)
@@ -2227,3 +2280,9 @@ MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const {
return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
return None;
}
+
+unsigned MachineInstr::getDebugInstrNum() {
+ if (DebugInstrNum == 0)
+ DebugInstrNum = getParent()->getParent()->getNewDebugInstrNum();
+ return DebugInstrNum;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
index 5e8a916b3b3b..c06bc39b4940 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -90,7 +91,7 @@ static cl::opt<UseBFI>
DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
cl::desc("Disable hoisting instructions to"
" hotter blocks"),
- cl::init(UseBFI::None), cl::Hidden,
+ cl::init(UseBFI::PGO), cl::Hidden,
cl::values(clEnumValN(UseBFI::None, "none",
"disable the feature"),
clEnumValN(UseBFI::PGO, "pgo",
@@ -145,7 +146,7 @@ namespace {
}
// Track 'estimated' register pressure.
- SmallSet<unsigned, 32> RegSeen;
+ SmallSet<Register, 32> RegSeen;
SmallVector<unsigned, 8> RegPressure;
// Register pressure "limit" per register pressure set. If the pressure
@@ -156,7 +157,7 @@ namespace {
SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
// For each opcode, keep a list of potential CSE instructions.
- DenseMap<unsigned, std::vector<const MachineInstr *>> CSEMap;
+ DenseMap<unsigned, std::vector<MachineInstr *>> CSEMap;
enum {
SpeculateFalse = 0,
@@ -212,7 +213,7 @@ namespace {
BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
SmallVectorImpl<CandidateInfo> &Candidates);
- void AddToLiveIns(unsigned Reg);
+ void AddToLiveIns(MCRegister Reg);
bool IsLICMCandidate(MachineInstr &I);
@@ -221,7 +222,7 @@ namespace {
bool HasLoopPHIUse(const MachineInstr *MI) const;
bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
- unsigned Reg) const;
+ Register Reg) const;
bool IsCheapInstruction(MachineInstr &MI) const;
@@ -245,8 +246,6 @@ namespace {
void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
- void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
-
void SinkIntoLoop();
void InitRegPressure(MachineBasicBlock *BB);
@@ -260,13 +259,12 @@ namespace {
MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
- const MachineInstr *
- LookForDuplicate(const MachineInstr *MI,
- std::vector<const MachineInstr *> &PrevMIs);
+ MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<MachineInstr *> &PrevMIs);
- bool EliminateCSE(
- MachineInstr *MI,
- DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
+ bool
+ EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
bool MayCSE(MachineInstr *MI);
@@ -606,7 +604,7 @@ void MachineLICMBase::HoistRegionPostRA() {
/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
/// sure it is not killed by any instructions in the loop.
-void MachineLICMBase::AddToLiveIns(unsigned Reg) {
+void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
@@ -802,8 +800,13 @@ void MachineLICMBase::SinkIntoLoop() {
I != Preheader->instr_end(); ++I) {
// We need to ensure that we can safely move this instruction into the loop.
// As such, it must not have side-effects, e.g. such as a call has.
- if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
+ LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I);
+ if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) {
+ LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n");
Candidates.push_back(&*I);
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n");
}
for (MachineInstr *I : Candidates) {
@@ -813,8 +816,11 @@ void MachineLICMBase::SinkIntoLoop() {
if (!MRI->hasOneDef(MO.getReg()))
continue;
bool CanSink = true;
- MachineBasicBlock *B = nullptr;
+ MachineBasicBlock *SinkBlock = nullptr;
+ LLVM_DEBUG(dbgs() << "LICM: Try sinking: " << *I);
+
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+ LLVM_DEBUG(dbgs() << "LICM: Analysing use: "; MI.dump());
// FIXME: Come up with a proper cost model that estimates whether sinking
// the instruction (and thus possibly executing it on every loop
// iteration) is more expensive than a register.
@@ -823,24 +829,40 @@ void MachineLICMBase::SinkIntoLoop() {
CanSink = false;
break;
}
- if (!B) {
- B = MI.getParent();
+ if (!SinkBlock) {
+ SinkBlock = MI.getParent();
+ LLVM_DEBUG(dbgs() << "LICM: Setting sink block to: "
+ << printMBBReference(*SinkBlock) << "\n");
continue;
}
- B = DT->findNearestCommonDominator(B, MI.getParent());
- if (!B) {
+ SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
+ if (!SinkBlock) {
+ LLVM_DEBUG(dbgs() << "LICM: Can't find nearest dominator\n");
CanSink = false;
break;
}
+ LLVM_DEBUG(dbgs() << "LICM: Setting nearest common dom block: " <<
+ printMBBReference(*SinkBlock) << "\n");
+ }
+ if (!CanSink) {
+ LLVM_DEBUG(dbgs() << "LICM: Can't sink instruction.\n");
+ continue;
+ }
+ if (!SinkBlock) {
+ LLVM_DEBUG(dbgs() << "LICM: Not sinking, can't find sink block.\n");
+ continue;
}
- if (!CanSink || !B || B == Preheader)
+ if (SinkBlock == Preheader) {
+ LLVM_DEBUG(dbgs() << "LICM: Not sinking, sink block is the preheader\n");
continue;
+ }
- LLVM_DEBUG(dbgs() << "Sinking to " << printMBBReference(*B) << " from "
- << printMBBReference(*I->getParent()) << ": " << *I);
- B->splice(B->getFirstNonPHI(), Preheader, I);
+ LLVM_DEBUG(dbgs() << "LICM: Sinking to " << printMBBReference(*SinkBlock)
+ << " from " << printMBBReference(*I->getParent())
+ << ": " << *I);
+ SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
- // The instruction is is moved from its basic block, so do not retain the
+ // The instruction is moved from its basic block, so do not retain the
// debug information.
assert(!I->isDebugInstr() && "Should not sink debug inst");
I->setDebugLoc(DebugLoc());
@@ -978,7 +1000,7 @@ static bool isInvariantStore(const MachineInstr &MI,
Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
if (Register::isVirtualRegister(Reg))
return false;
- if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF()))
+ if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
return false;
else
FoundCallerPresReg = true;
@@ -1008,7 +1030,7 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
if (Register::isVirtualRegister(CopySrcReg))
return false;
- if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF))
+ if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
return false;
Register CopyDstReg = MI.getOperand(0).getReg();
@@ -1030,6 +1052,7 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
bool DontMoveAcrossStore = true;
if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
!(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
+ LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
return false;
}
@@ -1040,65 +1063,28 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
// indexed load from a jump table.
// Stores and side effects are already checked by isSafeToMove.
if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
- !IsGuaranteedToExecute(I.getParent()))
+ !IsGuaranteedToExecute(I.getParent())) {
+ LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
+ return false;
+ }
+
+ // Convergent attribute has been used on operations that involve inter-thread
+ // communication which results are implicitly affected by the enclosing
+ // control flows. It is not safe to hoist or sink such operations across
+ // control flow.
+ if (I.isConvergent())
return false;
return true;
}
/// Returns true if the instruction is loop invariant.
-/// I.e., all virtual register operands are defined outside of the loop,
-/// physical registers aren't accessed explicitly, and there are no side
-/// effects that aren't captured by the operands or other flags.
bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
- if (!IsLICMCandidate(I))
+ if (!IsLICMCandidate(I)) {
+ LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
return false;
-
- // The instruction is loop invariant if all of its operands are.
- for (const MachineOperand &MO : I.operands()) {
- if (!MO.isReg())
- continue;
-
- Register Reg = MO.getReg();
- if (Reg == 0) continue;
-
- // Don't hoist an instruction that uses or defines a physical register.
- if (Register::isPhysicalRegister(Reg)) {
- if (MO.isUse()) {
- // If the physreg has no defs anywhere, it's just an ambient register
- // and we can freely move its uses. Alternatively, if it's allocatable,
- // it could get allocated to something with a def during allocation.
- // However, if the physreg is known to always be caller saved/restored
- // then this use is safe to hoist.
- if (!MRI->isConstantPhysReg(Reg) &&
- !(TRI->isCallerPreservedPhysReg(Reg, *I.getMF())))
- return false;
- // Otherwise it's safe to move.
- continue;
- } else if (!MO.isDead()) {
- // A def that isn't dead. We can't move it.
- return false;
- } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
- // If the reg is live into the loop, we can't hoist an instruction
- // which would clobber it.
- return false;
- }
- }
-
- if (!MO.isUse())
- continue;
-
- assert(MRI->getVRegDef(Reg) &&
- "Machine instr not mapped for this vreg?!");
-
- // If the loop contains the definition of an operand, then the instruction
- // isn't loop invariant.
- if (CurLoop->contains(MRI->getVRegDef(Reg)))
- return false;
}
-
- // If we got this far, the instruction is loop invariant!
- return true;
+ return CurLoop->isLoopInvariant(I);
}
/// Return true if the specified instruction is used by a phi node and hoisting
@@ -1138,9 +1124,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
/// Compute operand latency between a def of 'Reg' and an use in the current
/// loop, return true if the target considered it high.
-bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI,
- unsigned DefIdx,
- unsigned Reg) const {
+bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ Register Reg) const {
if (MRI->use_nodbg_empty(Reg))
return false;
@@ -1400,10 +1385,10 @@ void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
/// Find an instruction amount PrevMIs that is a duplicate of MI.
/// Return this instruction if it's found.
-const MachineInstr*
+MachineInstr *
MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
- std::vector<const MachineInstr*> &PrevMIs) {
- for (const MachineInstr *PrevMI : PrevMIs)
+ std::vector<MachineInstr *> &PrevMIs) {
+ for (MachineInstr *PrevMI : PrevMIs)
if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
return PrevMI;
@@ -1414,14 +1399,15 @@ MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
/// computes the same value. If it's found, do a RAU on with the definition of
/// the existing instruction rather than hoisting the instruction to the
/// preheader.
-bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
- DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) {
+bool MachineLICMBase::EliminateCSE(
+ MachineInstr *MI,
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
// the undef property onto uses.
if (CI == CSEMap.end() || MI->isImplicitDef())
return false;
- if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
// Replace virtual registers defined by MI by their counterparts defined
@@ -1461,6 +1447,9 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
Register DupReg = Dup->getOperand(Idx).getReg();
MRI->replaceRegWith(Reg, DupReg);
MRI->clearKillFlags(DupReg);
+ // Clear Dup dead flag if any, we reuse it for Reg.
+ if (!MRI->use_nodbg_empty(DupReg))
+ Dup->getOperand(Idx).setIsDead(false);
}
MI->eraseFromParent();
@@ -1474,8 +1463,8 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
/// the loop.
bool MachineLICMBase::MayCSE(MachineInstr *MI) {
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
- CI = CSEMap.find(Opcode);
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ CSEMap.find(Opcode);
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
// the undef property onto uses.
if (CI == CSEMap.end() || MI->isImplicitDef())
@@ -1529,8 +1518,8 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Look for opportunity to CSE the hoisted instruction.
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
- CI = CSEMap.find(Opcode);
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ CSEMap.find(Opcode);
if (!EliminateCSE(MI, CI)) {
// Otherwise, splice the instruction to the preheader.
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 0c1439da9b29..78480d0e1488 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -16,11 +16,14 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
@@ -146,6 +149,59 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L,
return Preheader;
}
+bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
+ MachineFunction *MF = I.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+
+ // The instruction is loop invariant if all of its operands are.
+ for (const MachineOperand &MO : I.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // An instruction that uses or defines a physical register can't e.g. be
+ // hoisted, so mark this as not invariant.
+ if (Register::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ // However, if the physreg is known to always be caller saved/restored
+ // then this use is safe to hoist.
+ if (!MRI->isConstantPhysReg(Reg) &&
+ !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())))
+ return false;
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead can't be moved.
+ return false;
+ } else if (getHeader()->isLiveIn(Reg)) {
+ // If the reg is live into the loop, we can't hoist an instruction
+ // which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (contains(MRI->getVRegDef(Reg)))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineLoop::dump() const {
print(dbgs());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
index 2295e1ca6d4e..fdcc8472f1c2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -130,14 +130,3 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
return NewBB;
}
-
-bool llvm::isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg) {
- SmallVector<MachineBasicBlock *, 4> ExitBlocks;
- Loop->getExitBlocks(ExitBlocks);
-
- for (auto *MBB : ExitBlocks)
- if (MBB->isLiveIn(PhysReg))
- return true;
-
- return false;
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
index f866c7ca53c6..5565b9cededa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -104,7 +104,8 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
BBCallbacks.back().setMap(this);
Entry.Index = BBCallbacks.size() - 1;
Entry.Fn = BB->getParent();
- MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken());
+ MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol()
+ : Context.createTempSymbol();
Entry.Symbols.push_back(Sym);
return Entry.Symbols;
}
@@ -143,8 +144,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
BBCallbacks[OldEntry.Index] = nullptr; // Update the callback.
// Otherwise, we need to add the old symbols to the new block's set.
- NewEntry.Symbols.insert(NewEntry.Symbols.end(), OldEntry.Symbols.begin(),
- OldEntry.Symbols.end());
+ llvm::append_range(NewEntry.Symbols, OldEntry.Symbols);
}
void MMIAddrLabelMapCallbackPtr::deleted() {
@@ -170,6 +170,7 @@ void MachineModuleInfo::finalize() {
AddrLabelSymbols = nullptr;
Context.reset();
+ // We don't clear the ExternalContext.
delete ObjFileMMI;
ObjFileMMI = nullptr;
@@ -178,7 +179,8 @@ void MachineModuleInfo::finalize() {
MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
: TM(std::move(MMI.TM)),
Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(),
- MMI.TM.getObjFileLowering(), nullptr, nullptr, false) {
+ MMI.TM.getObjFileLowering(), nullptr, nullptr, false),
+ MachineFunctions(std::move(MMI.MachineFunctions)) {
ObjFileMMI = MMI.ObjFileMMI;
CurCallSite = MMI.CurCallSite;
UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint;
@@ -186,6 +188,7 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
HasSplitStack = MMI.HasSplitStack;
HasNosplitStack = MMI.HasNosplitStack;
AddrLabelSymbols = MMI.AddrLabelSymbols;
+ ExternalContext = MMI.ExternalContext;
TheModule = MMI.TheModule;
}
@@ -195,6 +198,14 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
initialize();
}
+MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM,
+ MCContext *ExtContext)
+ : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
+ TM->getObjFileLowering(), nullptr, nullptr, false),
+ ExternalContext(ExtContext) {
+ initialize();
+}
+
MachineModuleInfo::~MachineModuleInfo() { finalize(); }
//===- Address of Block Management ----------------------------------------===//
@@ -203,7 +214,7 @@ ArrayRef<MCSymbol *>
MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
// Lazily create AddrLabelSymbols.
if (!AddrLabelSymbols)
- AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ AddrLabelSymbols = new MMIAddrLabelMap(getContext());
return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
}
@@ -295,6 +306,12 @@ MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass(
initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
+MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass(
+ const LLVMTargetMachine *TM, MCContext *ExtContext)
+ : ImmutablePass(ID), MMI(TM, ExtContext) {
+ initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
// Handle the Pass registration stuff necessary to use DataLayout's.
INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo",
"Machine Module Information", false, false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index 2b4fd654e46c..9b09f5273298 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -85,7 +85,7 @@ void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx,
}
void MachineOperand::substPhysReg(MCRegister Reg, const TargetRegisterInfo &TRI) {
- assert(Reg.isPhysical());
+ assert(Register::isPhysicalRegister(Reg));
if (getSubReg()) {
Reg = TRI.getSubReg(Reg, getSubReg());
// Note that getSubReg() may return 0 if the sub-register doesn't exist.
@@ -153,22 +153,25 @@ void MachineOperand::removeRegFromUses() {
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
-void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+void MachineOperand::ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags) {
assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
removeRegFromUses();
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
+ setTargetFlags(TargetFlags);
}
-void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
+void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm,
+ unsigned TargetFlags) {
assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
removeRegFromUses();
OpKind = MO_FPImmediate;
Contents.CFP = FPImm;
+ setTargetFlags(TargetFlags);
}
void MachineOperand::ChangeToES(const char *SymName,
@@ -197,7 +200,7 @@ void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset,
setTargetFlags(TargetFlags);
}
-void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
+void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into an MCSymbol");
@@ -205,9 +208,10 @@ void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
OpKind = MO_MCSymbol;
Contents.Sym = Sym;
+ setTargetFlags(TargetFlags);
}
-void MachineOperand::ChangeToFrameIndex(int Idx) {
+void MachineOperand::ChangeToFrameIndex(int Idx, unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into a FrameIndex");
@@ -215,6 +219,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) {
OpKind = MO_FrameIndex;
setIndex(Idx);
+ setTargetFlags(TargetFlags);
}
void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
@@ -415,6 +420,11 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
return nullptr;
}
+const char *MachineOperand::getTargetIndexName() const {
+ const MachineFunction *MF = getMFIfAvailable(*this);
+ return MF ? ::getTargetIndexName(*MF, this->getIndex()) : nullptr;
+}
+
static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
for (const auto &I : Flags) {
@@ -823,7 +833,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "target-index(";
const char *Name = "<unknown>";
if (const MachineFunction *MF = getMFIfAvailable(*this))
- if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex()))
+ if (const auto *TargetIndexName = ::getTargetIndexName(*MF, getIndex()))
Name = TargetIndexName;
OS << Name << ')';
printOperandOffset(OS, getOffset());
@@ -1142,7 +1152,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
const MIRFormatter *Formatter = TII->getMIRFormatter();
// FIXME: This is not necessarily the correct MIR serialization format for
// a custom pseudo source value, but at least it allows
- // -print-machineinstrs to work on a target with custom pseudo source
+ // MIR printing to work on a target with custom pseudo source
// values.
OS << "custom \"";
Formatter->printCustomPseudoSourceValue(OS, MST, *PVal);
@@ -1152,8 +1162,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
}
MachineOperand::printOperandOffset(OS, getOffset());
- if (getBaseAlign() != getSize())
- OS << ", align " << getBaseAlign().value();
+ if (getAlign() != getSize())
+ OS << ", align " << getAlign().value();
+ if (getAlign() != getBaseAlign())
+ OS << ", basealign " << getBaseAlign().value();
auto AAInfo = getAAInfo();
if (AAInfo.TBAA) {
OS << ", !tbaa ";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index f9d099e02995..02998d41d831 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -59,10 +59,8 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -309,10 +307,8 @@ struct InstructionMapper {
// repeated substring.
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
InstrListForMBB);
- InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
- InstrListForMBB.end());
- UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
- UnsignedVecForMBB.end());
+ llvm::append_range(InstrList, InstrListForMBB);
+ llvm::append_range(UnsignedVec, UnsignedVecForMBB);
}
}
@@ -549,11 +545,10 @@ void MachineOutliner::findCandidates(
// That is, one must either
// * End before the other starts
// * Start after the other ends
- if (std::all_of(
- CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(),
- [&StartIdx, &EndIdx](const Candidate &C) {
- return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
- })) {
+ if (llvm::all_of(CandidatesForRepeatedSeq, [&StartIdx,
+ &EndIdx](const Candidate &C) {
+ return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
+ })) {
// It doesn't overlap with anything, so we can outline it.
// Each sequence is over [StartIt, EndIt].
// Save the candidate and its location.
@@ -656,6 +651,8 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
OriginalMF->getFrameInstructions();
for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
++I) {
+ if (I->isDebugInstr())
+ continue;
MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
if (I->isCFIInstruction()) {
unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex();
@@ -691,7 +688,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
// The live-in set for the outlined function is the union of the live-ins
// from all the outlining points.
- for (MCPhysReg Reg : make_range(CandLiveIns.begin(), CandLiveIns.end()))
+ for (MCPhysReg Reg : CandLiveIns)
LiveIns.addReg(Reg);
}
addLiveIns(MBB, LiveIns);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
new file mode 100644
index 000000000000..e81575c88935
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -0,0 +1,121 @@
+//===---------- MachinePassManager.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass management machinery for machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/PassManagerImpl.h"
+
+using namespace llvm;
+
+namespace llvm {
+template class AllAnalysesOn<MachineFunction>;
+template class AnalysisManager<MachineFunction>;
+template class PassManager<MachineFunction>;
+
+Error MachineFunctionPassManager::run(Module &M,
+ MachineFunctionAnalysisManager &MFAM) {
+ // MachineModuleAnalysis is a module analysis pass that is never invalidated
+ // because we don't run any module pass in codegen pipeline. This is very
+ // important because the codegen state is stored in MMI which is the analysis
+ // result of MachineModuleAnalysis. MMI should not be recomputed.
+ auto &MMI = MFAM.getResult<MachineModuleAnalysis>(M);
+
+ (void)RequireCodeGenSCCOrder;
+ assert(!RequireCodeGenSCCOrder && "not implemented");
+
+ // Add a PIC to verify machine functions.
+ if (VerifyMachineFunction) {
+ PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(M);
+
+ // No need to pop this callback later since MIR pipeline is flat which means
+ // current pipeline is the top-level pipeline. Callbacks are not used after
+ // current pipeline.
+ PI.pushBeforeNonSkippedPassCallback([&MFAM](StringRef PassID, Any IR) {
+ assert(any_isa<const MachineFunction *>(IR));
+ const MachineFunction *MF = any_cast<const MachineFunction *>(IR);
+ assert(MF && "Machine function should be valid for printing");
+ std::string Banner = std::string("After ") + std::string(PassID);
+ verifyMachineFunction(&MFAM, Banner, *MF);
+ });
+ }
+
+ if (DebugLogging) {
+ dbgs() << "Starting " << getTypeName<MachineFunction>()
+ << " pass manager run.\n";
+ }
+
+ for (auto &F : InitializationFuncs) {
+ if (auto Err = F(M, MFAM))
+ return Err;
+ }
+
+ unsigned Idx = 0;
+ size_t Size = Passes.size();
+ do {
+ // Run machine module passes
+ for (; MachineModulePasses.count(Idx) && Idx != Size; ++Idx) {
+ if (DebugLogging)
+ dbgs() << "Running pass: " << Passes[Idx]->name() << " on "
+ << M.getName() << '\n';
+ if (auto Err = MachineModulePasses.at(Idx)(M, MFAM))
+ return Err;
+ }
+
+ // Finish running all passes.
+ if (Idx == Size)
+ break;
+
+ // Run machine function passes
+
+ // Get index range of machine function passes.
+ unsigned Begin = Idx;
+ for (; !MachineModulePasses.count(Idx) && Idx != Size; ++Idx)
+ ;
+
+ for (Function &F : M) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ continue;
+
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
+ PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(MF);
+
+ for (unsigned I = Begin, E = Idx; I != E; ++I) {
+ auto *P = Passes[I].get();
+
+ if (!PI.runBeforePass<MachineFunction>(*P, MF))
+ continue;
+
+ // TODO: EmitSizeRemarks
+ PreservedAnalyses PassPA = P->run(MF, MFAM);
+ PI.runAfterPass(*P, MF, PassPA);
+ MFAM.invalidate(MF, PassPA);
+ }
+ }
+ } while (true);
+
+ for (auto &F : FinalizationFuncs) {
+ if (auto Err = F(M, MFAM))
+ return Err;
+ }
+
+ if (DebugLogging) {
+ dbgs() << "Finished " << getTypeName<MachineFunction>()
+ << " pass manager run.\n";
+ }
+
+ return Error::success();
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index ef4b02ca9e3e..d0fe29f65ede 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -268,6 +268,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
// Reset the pragma for the next loop in iteration.
disabledByPragma = false;
+ II_setByPragma = 0;
MachineBasicBlock *LBLK = L.getTopBlock();
@@ -441,6 +442,16 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
return SMS.hasNewSchedule();
}
+void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
if (II_setByPragma > 0)
MII = II_setByPragma;
@@ -705,14 +716,13 @@ static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
/// This function calls the code in ValueTracking, but first checks that the
/// instruction has a memory operand.
static void getUnderlyingObjects(const MachineInstr *MI,
- SmallVectorImpl<const Value *> &Objs,
- const DataLayout &DL) {
+ SmallVectorImpl<const Value *> &Objs) {
if (!MI->hasOneMemOperand())
return;
MachineMemOperand *MM = *MI->memoperands_begin();
if (!MM->getValue())
return;
- GetUnderlyingObjects(MM->getValue(), Objs, DL);
+ getUnderlyingObjects(MM->getValue(), Objs);
for (const Value *V : Objs) {
if (!isIdentifiedObject(V)) {
Objs.clear();
@@ -736,7 +746,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
PendingLoads.clear();
else if (MI.mayLoad()) {
SmallVector<const Value *, 4> Objs;
- getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ ::getUnderlyingObjects(&MI, Objs);
if (Objs.empty())
Objs.push_back(UnknownValue);
for (auto V : Objs) {
@@ -745,7 +755,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
}
} else if (MI.mayStore()) {
SmallVector<const Value *, 4> Objs;
- getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ ::getUnderlyingObjects(&MI, Objs);
if (Objs.empty())
Objs.push_back(UnknownValue);
for (auto V : Objs) {
@@ -803,10 +813,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
continue;
}
AliasResult AAResult = AA->alias(
- MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
- MMO1->getAAInfo()),
- MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
- MMO2->getAAInfo()));
+ MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
+ MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo()));
if (AAResult != NoAlias) {
SDep Dep(Load, SDep::Barrier);
@@ -1587,12 +1595,12 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
SmallPtrSet<SUnit *, 8> &Visited) {
if (Cur->isBoundaryNode())
return false;
- if (Exclude.count(Cur) != 0)
+ if (Exclude.contains(Cur))
return false;
- if (DestNodes.count(Cur) != 0)
+ if (DestNodes.contains(Cur))
return true;
if (!Visited.insert(Cur).second)
- return Path.count(Cur) != 0;
+ return Path.contains(Cur);
bool FoundPath = false;
for (auto &SI : Cur->Succs)
FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
@@ -1632,7 +1640,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
if (Register::isVirtualRegister(Reg))
Uses.insert(Reg);
else if (MRI.isAllocatable(Reg))
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units)
Uses.insert(*Units);
}
}
@@ -1645,7 +1654,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
LiveOutRegs.push_back(RegisterMaskPair(Reg,
LaneBitmask::getNone()));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units)
if (!Uses.count(*Units))
LiveOutRegs.push_back(RegisterMaskPair(*Units,
LaneBitmask::getNone()));
@@ -1741,7 +1751,6 @@ void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
}
NodeSets.clear();
LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
- return;
}
/// Add the nodes that do not belong to a recurrence set into groups
@@ -1946,7 +1955,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
for (const auto &I : maxHeight->Succs) {
if (Nodes.count(I.getSUnit()) == 0)
continue;
- if (NodeOrder.count(I.getSUnit()) != 0)
+ if (NodeOrder.contains(I.getSUnit()))
continue;
if (ignoreDependence(I, false))
continue;
@@ -1958,7 +1967,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
continue;
if (Nodes.count(I.getSUnit()) == 0)
continue;
- if (NodeOrder.count(I.getSUnit()) != 0)
+ if (NodeOrder.contains(I.getSUnit()))
continue;
R.insert(I.getSUnit());
}
@@ -1997,7 +2006,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
for (const auto &I : maxDepth->Preds) {
if (Nodes.count(I.getSUnit()) == 0)
continue;
- if (NodeOrder.count(I.getSUnit()) != 0)
+ if (NodeOrder.contains(I.getSUnit()))
continue;
R.insert(I.getSUnit());
}
@@ -2007,7 +2016,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
continue;
if (Nodes.count(I.getSUnit()) == 0)
continue;
- if (NodeOrder.count(I.getSUnit()) != 0)
+ if (NodeOrder.contains(I.getSUnit()))
continue;
R.insert(I.getSUnit());
}
@@ -2270,7 +2279,7 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
/// Return the instruction in the loop that defines the register.
/// If the definition is a Phi, then follow the Phi operand to
/// the instruction in the loop.
-MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
+MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
SmallPtrSet<MachineInstr *, 8> Visited;
MachineInstr *Def = MRI.getVRegDef(Reg);
while (Def->isPHI()) {
@@ -2943,7 +2952,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
}
// Replace the old order with the new order.
cycleInstrs.swap(newOrderPhi);
- cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
+ llvm::append_range(cycleInstrs, newOrderI);
SSD->fixupRegisterOverlaps(cycleInstrs);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 4c733738840a..5325eda9d478 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -417,17 +417,11 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(Register Reg) const {
}
bool MachineRegisterInfo::hasOneNonDBGUse(Register RegNo) const {
- use_nodbg_iterator UI = use_nodbg_begin(RegNo);
- if (UI == use_nodbg_end())
- return false;
- return ++UI == use_nodbg_end();
+ return hasSingleElement(use_nodbg_operands(RegNo));
}
bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const {
- use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo);
- if (UI == use_instr_nodbg_end())
- return false;
- return ++UI == use_instr_nodbg_end();
+ return hasSingleElement(use_nodbg_instructions(RegNo));
}
/// clearKillFlags - Iterate over all the uses of the given register and
@@ -532,13 +526,6 @@ bool MachineRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
return true;
}
-bool
-MachineRegisterInfo::isCallerPreservedOrConstPhysReg(MCRegister PhysReg) const {
- const TargetRegisterInfo *TRI = getTargetRegisterInfo();
- return isConstantPhysReg(PhysReg) ||
- TRI->isCallerPreservedPhysReg(PhysReg, *MF);
-}
-
/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
/// specified register as undefined which causes the DBG_VALUE to be
/// deleted during LiveDebugVariables analysis.
@@ -630,8 +617,7 @@ void MachineRegisterInfo::disableCalleeSavedRegister(MCRegister Reg) {
// Remove the register (and its aliases from the list).
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI),
- UpdatedCSRs.end());
+ llvm::erase_value(UpdatedCSRs, *AI);
}
const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
@@ -645,8 +631,7 @@ void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) {
if (IsUpdatedCSRsInitialized)
UpdatedCSRs.clear();
- for (MCPhysReg Reg : CSRs)
- UpdatedCSRs.push_back(Reg);
+ append_range(UpdatedCSRs, CSRs);
// Zero value represents the end of the register list
// (no more registers should be pushed).
@@ -660,7 +645,7 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
bool IsRootReserved = true;
for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
Super.isValid(); ++Super) {
- unsigned Reg = *Super;
+ MCRegister Reg = *Super;
if (!isReserved(Reg)) {
IsRootReserved = false;
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index b12557d6d326..462082df5d05 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -50,15 +50,18 @@ MachineSSAUpdater::~MachineSSAUpdater() {
}
/// Initialize - Reset this object to get ready for a new set of SSA
-/// updates. ProtoValue is the value used to name PHI nodes.
-void MachineSSAUpdater::Initialize(Register V) {
+/// updates.
+void MachineSSAUpdater::Initialize(const TargetRegisterClass *RC) {
if (!AV)
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
- VR = V;
- VRC = MRI->getRegClass(VR);
+ VRC = RC;
+}
+
+void MachineSSAUpdater::Initialize(Register V) {
+ Initialize(MRI->getRegClass(V));
}
/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index cf75d531deb2..8d51bb26103a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
@@ -73,6 +74,8 @@ using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
+STATISTIC(NumClustered, "Number of load/store pairs clustered");
+
namespace llvm {
cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
@@ -126,6 +129,15 @@ static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
cl::desc("Enable memop clustering."),
cl::init(true));
+static cl::opt<bool>
+ ForceFastCluster("force-fast-cluster", cl::Hidden,
+ cl::desc("Switch to fast cluster algorithm with the lost "
+ "of some fusion opportunities"),
+ cl::init(false));
+static cl::opt<unsigned>
+ FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
+ cl::desc("The threshold for fast cluster"),
+ cl::init(1000));
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -228,8 +240,13 @@ char PostMachineScheduler::ID = 0;
char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
-INITIALIZE_PASS(PostMachineScheduler, "postmisched",
- "PostRA Machine Instruction Scheduler", false, false)
+INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
+ "PostRA Machine Instruction Scheduler", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
+ "PostRA Machine Instruction Scheduler", false, false)
PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
@@ -1098,7 +1115,7 @@ updateScheduledPressure(const SUnit *SU,
void ScheduleDAGMILive::updatePressureDiffs(
ArrayRef<RegisterMaskPair> LiveUses) {
for (const RegisterMaskPair &P : LiveUses) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
/// FIXME: Currently assuming single-use physregs.
if (!Register::isVirtualRegister(Reg))
continue;
@@ -1298,7 +1315,7 @@ void ScheduleDAGMILive::computeDFSResult() {
/// The cyclic path estimation identifies a def-use pair that crosses the back
/// edge and considers the depth and height of the nodes. For example, consider
/// the following instruction sequence where each instruction has unit latency
-/// and defines an epomymous virtual register:
+/// and defines an eponymous virtual register:
///
/// a->b(a,c)->c(b)->d(c)->exit
///
@@ -1323,7 +1340,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
unsigned MaxCyclicLatency = 0;
// Visit each live out vreg def to find def/use pairs that cross iterations.
for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
if (!Register::isVirtualRegister(Reg))
continue;
const LiveInterval &LI = LIS->getInterval(Reg);
@@ -1527,7 +1544,12 @@ public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
protected:
- void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
+ void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
+ ScheduleDAGInstrs *DAG);
+ void collectMemOpRecords(std::vector<SUnit> &SUnits,
+ SmallVectorImpl<MemOpInfo> &MemOpRecords);
+ bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
};
class StoreClusterMutation : public BaseMemOpClusterMutation {
@@ -1563,109 +1585,179 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII,
} // end namespace llvm
+// Sorting all the loads/stores first, then for each load/store, checking the
+// following load/store one by one, until reach the first non-dependent one and
+// call target hook to see if they can cluster.
+// If FastCluster is enabled, we assume that, all the loads/stores have been
+// preprocessed and now, they didn't have dependencies on each other.
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
- ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
- SmallVector<MemOpInfo, 32> MemOpRecords;
- for (SUnit *SU : MemOps) {
- const MachineInstr &MI = *SU->getInstr();
- SmallVector<const MachineOperand *, 4> BaseOps;
- int64_t Offset;
- bool OffsetIsScalable;
- unsigned Width;
- if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
- OffsetIsScalable, Width, TRI)) {
- MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width));
-
- LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
- << Offset << ", OffsetIsScalable: " << OffsetIsScalable
- << ", Width: " << Width << "\n");
- }
-#ifndef NDEBUG
- for (auto *Op : BaseOps)
- assert(Op);
-#endif
- }
- if (MemOpRecords.size() < 2)
- return;
-
- llvm::sort(MemOpRecords);
+ ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
+ ScheduleDAGInstrs *DAG) {
+ // Keep track of the current cluster length and bytes for each SUnit.
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
// cluster mem ops collected within `MemOpRecords` array.
- unsigned ClusterLength = 1;
- unsigned CurrentClusterBytes = MemOpRecords[0].Width;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
// Decision to cluster mem ops is taken based on target dependent logic
auto MemOpa = MemOpRecords[Idx];
- auto MemOpb = MemOpRecords[Idx + 1];
- ++ClusterLength;
- CurrentClusterBytes += MemOpb.Width;
- if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
- CurrentClusterBytes)) {
- // Current mem ops pair could not be clustered, reset cluster length, and
- // go to next pair
- ClusterLength = 1;
- CurrentClusterBytes = MemOpb.Width;
+
+ // Seek for the next load/store to do the cluster.
+ unsigned NextIdx = Idx + 1;
+ for (; NextIdx < End; ++NextIdx)
+ // Skip if MemOpb has been clustered already or has dependency with
+ // MemOpa.
+ if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
+ (FastCluster ||
+ (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
+ !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
+ break;
+ if (NextIdx == End)
continue;
+
+ auto MemOpb = MemOpRecords[NextIdx];
+ unsigned ClusterLength = 2;
+ unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
+ if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
+ ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
+ CurrentClusterBytes =
+ SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
}
+ if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
+ CurrentClusterBytes))
+ continue;
+
SUnit *SUa = MemOpa.SU;
SUnit *SUb = MemOpb.SU;
if (SUa->NodeNum > SUb->NodeNum)
std::swap(SUa, SUb);
// FIXME: Is this check really required?
- if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
- ClusterLength = 1;
- CurrentClusterBytes = MemOpb.Width;
+ if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
continue;
- }
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
<< SUb->NodeNum << ")\n");
-
- // Copy successor edges from SUa to SUb. Interleaving computation
- // dependent on SUa can prevent load combining due to register reuse.
- // Predecessor edges do not need to be copied from SUb to SUa since
- // nearby loads should have effectively the same inputs.
- for (const SDep &Succ : SUa->Succs) {
- if (Succ.getSUnit() == SUb)
- continue;
- LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
- << ")\n");
- DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+ ++NumClustered;
+
+ if (IsLoad) {
+ // Copy successor edges from SUa to SUb. Interleaving computation
+ // dependent on SUa can prevent load combining due to register reuse.
+ // Predecessor edges do not need to be copied from SUb to SUa since
+ // nearby loads should have effectively the same inputs.
+ for (const SDep &Succ : SUa->Succs) {
+ if (Succ.getSUnit() == SUb)
+ continue;
+ LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
+ << ")\n");
+ DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+ }
+ } else {
+ // Copy predecessor edges from SUb to SUa to avoid the SUnits that
+ // SUb dependent on scheduled in-between SUb and SUa. Successor edges
+ // do not need to be copied from SUa to SUb since no one will depend
+ // on stores.
+ // Notice that, we don't need to care about the memory dependency as
+ // we won't try to cluster them if they have any memory dependency.
+ for (const SDep &Pred : SUb->Preds) {
+ if (Pred.getSUnit() == SUa)
+ continue;
+ LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
+ << ")\n");
+ DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
+ }
}
+ SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
+ CurrentClusterBytes};
+
LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
<< ", Curr cluster bytes: " << CurrentClusterBytes
<< "\n");
}
}
-/// Callback from DAG postProcessing to create cluster edges for loads.
-void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
- // Map DAG NodeNum to a set of dependent MemOps in store chain.
- DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains;
- for (SUnit &SU : DAG->SUnits) {
+void BaseMemOpClusterMutation::collectMemOpRecords(
+ std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
+ for (auto &SU : SUnits) {
if ((IsLoad && !SU.getInstr()->mayLoad()) ||
(!IsLoad && !SU.getInstr()->mayStore()))
continue;
+ const MachineInstr &MI = *SU.getInstr();
+ SmallVector<const MachineOperand *, 4> BaseOps;
+ int64_t Offset;
+ bool OffsetIsScalable;
+ unsigned Width;
+ if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
+ OffsetIsScalable, Width, TRI)) {
+ MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
+
+ LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
+ << Offset << ", OffsetIsScalable: " << OffsetIsScalable
+ << ", Width: " << Width << "\n");
+ }
+#ifndef NDEBUG
+ for (auto *Op : BaseOps)
+ assert(Op);
+#endif
+ }
+}
+
+bool BaseMemOpClusterMutation::groupMemOps(
+ ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
+ bool FastCluster =
+ ForceFastCluster ||
+ MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
+
+ for (const auto &MemOp : MemOps) {
unsigned ChainPredID = DAG->SUnits.size();
- for (const SDep &Pred : SU.Preds) {
- if (Pred.isCtrl() && !Pred.isArtificial()) {
- ChainPredID = Pred.getSUnit()->NodeNum;
- break;
+ if (FastCluster) {
+ for (const SDep &Pred : MemOp.SU->Preds) {
+ // We only want to cluster the mem ops that have the same ctrl(non-data)
+ // pred so that they didn't have ctrl dependency for each other. But for
+ // store instrs, we can still cluster them if the pred is load instr.
+ if ((Pred.isCtrl() &&
+ (IsLoad ||
+ (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
+ !Pred.isArtificial()) {
+ ChainPredID = Pred.getSUnit()->NodeNum;
+ break;
+ }
}
- }
- // Insert the SU to corresponding store chain.
- auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second;
- Chain.push_back(&SU);
+ } else
+ ChainPredID = 0;
+
+ Groups[ChainPredID].push_back(MemOp);
}
+ return FastCluster;
+}
- // Iterate over the store chains.
- for (auto &SCD : StoreChains)
- clusterNeighboringMemOps(SCD.second, DAG);
+/// Callback from DAG postProcessing to create cluster edges for loads/stores.
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
+ // Collect all the clusterable loads/stores
+ SmallVector<MemOpInfo, 32> MemOpRecords;
+ collectMemOpRecords(DAG->SUnits, MemOpRecords);
+
+ if (MemOpRecords.size() < 2)
+ return;
+
+ // Put the loads/stores without dependency into the same group with some
+ // heuristic if the DAG is too complex to avoid compiling time blow up.
+ // Notice that, some fusion pair could be lost with this.
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;
+ bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
+
+ for (auto &Group : Groups) {
+ // Sorting the loads/stores, so that, we can stop the cluster as early as
+ // possible.
+ llvm::sort(Group.second);
+
+ // Trying to cluster all the neighboring loads/stores.
+ clusterNeighboringMemOps(Group.second, FastCluster, DAG);
+ }
}
//===----------------------------------------------------------------------===//
@@ -2724,7 +2816,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
GenericSchedulerBase::SchedCandidate &Cand,
SchedBoundary &Zone) {
if (Zone.isTop()) {
- if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+ // Prefer the candidate with the lesser depth, but only if one of them has
+ // depth greater than the total latency scheduled so far, otherwise either
+ // of them could be scheduled now with no stall.
+ if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
+ Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
return true;
@@ -2733,7 +2829,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
TryCand, Cand, GenericSchedulerBase::TopPathReduce))
return true;
} else {
- if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+ // Prefer the candidate with the lesser height, but only if one of them has
+ // height greater than the total latency scheduled so far, otherwise either
+ // of them could be scheduled now with no stall.
+ if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
+ Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
return true;
@@ -3356,13 +3456,13 @@ ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
return DAG;
}
-static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
return createGenericSchedLive(C);
}
static MachineSchedRegistry
GenericSchedRegistry("converge", "Standard converging scheduler.",
- createConveringSched);
+ createConvergingSched);
//===----------------------------------------------------------------------===//
// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
@@ -3736,7 +3836,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
return true;
}
- static bool isNodeHidden(const SUnit *Node) {
+ static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
if (ViewMISchedCutoff == 0)
return false;
return (Node->Preds.size() > ViewMISchedCutoff
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index 5f958bbc31b7..378df1b75e25 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -34,6 +34,8 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -77,6 +79,18 @@ static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
"splitted critical edge"),
cl::init(40), cl::Hidden);
+static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
+ "machine-sink-load-instrs-threshold",
+ cl::desc("Do not try to find alias store for a load if there is a in-path "
+ "block whose instruction number is higher than this threshold."),
+ cl::init(2000), cl::Hidden);
+
+static cl::opt<unsigned> SinkLoadBlocksThreshold(
+ "machine-sink-load-blocks-threshold",
+ cl::desc("Do not try to find alias store for a load if the block number in "
+ "the straight line is higher than this threshold."),
+ cl::init(20), cl::Hidden);
+
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -94,6 +108,7 @@ namespace {
MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
AliasAnalysis *AA;
+ RegisterClassInfo RegClassInfo;
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
@@ -127,6 +142,15 @@ namespace {
/// current block.
DenseSet<DebugVariable> SeenDbgVars;
+ std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
+ HasStoreCache;
+ std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
+ std::vector<MachineInstr *>>
+ StoreInstrCache;
+
+ /// Cached BB's register pressure.
+ std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure;
+
public:
static char ID; // Pass identification
@@ -159,6 +183,9 @@ namespace {
MachineBasicBlock *From,
MachineBasicBlock *To);
+ bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
+ MachineInstr &MI);
+
/// Postpone the splitting of the given critical
/// edge (\p From, \p To).
///
@@ -184,12 +211,12 @@ namespace {
/// to the copy source.
void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
MachineBasicBlock *TargetBlock);
- bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
- MachineBasicBlock *DefMBB,
- bool &BreakPHIEdge, bool &LocalUse) const;
+ bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
+ bool &LocalUse) const;
MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
- bool isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
+ bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *SuccToSinkTo,
AllSuccsCache &AllSuccessors);
@@ -200,6 +227,8 @@ namespace {
SmallVector<MachineBasicBlock *, 4> &
GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const;
+
+ std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB);
};
} // end anonymous namespace
@@ -253,12 +282,11 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
/// occur in blocks dominated by the specified block. If any use is in the
/// definition block, then return false since it is never legal to move def
/// after uses.
-bool
-MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
- MachineBasicBlock *MBB,
- MachineBasicBlock *DefMBB,
- bool &BreakPHIEdge,
- bool &LocalUse) const {
+bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs");
// Ignore debug uses because debug info doesn't affect the code.
@@ -327,6 +355,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ RegClassInfo.runOnMachineFunction(MF);
bool EverMadeChange = false;
@@ -347,11 +376,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
<< printMBBReference(*Pair.first) << " -- "
<< printMBBReference(*NewSucc) << " -- "
<< printMBBReference(*Pair.second) << '\n');
- if (MBFI) {
- auto NewSuccFreq = MBFI->getBlockFreq(Pair.first) *
- MBPI->getEdgeProbability(Pair.first, NewSucc);
- MBFI->setBlockFreq(NewSucc, NewSuccFreq.getFrequency());
- }
+ if (MBFI)
+ MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI);
+
MadeChange = true;
++NumSplit;
} else
@@ -362,6 +389,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
EverMadeChange = true;
}
+ HasStoreCache.clear();
+ StoreInstrCache.clear();
+
// Now clear any kill flags for recorded registers.
for (auto I : RegsToClearKillFlags)
MRI->clearKillFlags(I);
@@ -419,6 +449,8 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
SeenDbgUsers.clear();
SeenDbgVars.clear();
+ // recalculate the bb register pressure after sinking one BB.
+ CachedRegisterPressure.clear();
return MadeChange;
}
@@ -430,7 +462,7 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
- bool SeenBefore = SeenDbgVars.count(Var) != 0;
+ bool SeenBefore = SeenDbgVars.contains(Var);
MachineOperand &MO = MI.getDebugOperand(0);
if (MO.isReg() && MO.getReg().isVirtual())
@@ -561,8 +593,44 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
return true;
}
+std::vector<unsigned> &
+MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
+ // Currently to save compiling time, MBB's register pressure will not change
+ // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
+ // register pressure is changed after sinking any instructions into it.
+ // FIXME: need a accurate and cheap register pressure estiminate model here.
+ auto RP = CachedRegisterPressure.find(&MBB);
+ if (RP != CachedRegisterPressure.end())
+ return RP->second;
+
+ RegionPressure Pressure;
+ RegPressureTracker RPTracker(Pressure);
+
+ // Initialize the register pressure tracker.
+ RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(),
+ /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
+
+ for (MachineBasicBlock::iterator MII = MBB.instr_end(),
+ MIE = MBB.instr_begin();
+ MII != MIE; --MII) {
+ MachineInstr &MI = *std::prev(MII);
+ if (MI.isDebugValue() || MI.isDebugLabel())
+ continue;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, false, false);
+ RPTracker.recedeSkipDebugValues();
+ assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
+ RPTracker.recede(RegOpers);
+ }
+
+ RPTracker.closeRegion();
+ auto It = CachedRegisterPressure.insert(
+ std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
+ return It.first->second;
+}
+
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
-bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
+bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *SuccToSinkTo,
AllSuccsCache &AllSuccessors) {
@@ -598,9 +666,73 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
- // If SuccToSinkTo is final destination and it is a post dominator of current
- // block then it is not profitable to sink MI into SuccToSinkTo block.
- return false;
+ MachineLoop *ML = LI->getLoopFor(MBB);
+
+ // If the instruction is not inside a loop, it is not profitable to sink MI to
+ // a post dominate block SuccToSinkTo.
+ if (!ML)
+ return false;
+
+ auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
+ unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
+ const int *PS = TRI->getRegClassPressureSets(RC);
+ // Get register pressure for block SuccToSinkTo.
+ std::vector<unsigned> BBRegisterPressure =
+ getBBRegisterPressure(*SuccToSinkTo);
+ for (; *PS != -1; PS++)
+ // check if any register pressure set exceeds limit in block SuccToSinkTo
+ // after sinking.
+ if (Weight + BBRegisterPressure[*PS] >=
+ TRI->getRegPressureSetLimit(*MBB->getParent(), *PS))
+ return true;
+ return false;
+ };
+
+ // If this instruction is inside a loop and sinking this instruction can make
+ // more registers live range shorten, it is still prifitable.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ // Ignore non-register operands.
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // Don't handle physical register.
+ if (Register::isPhysicalRegister(Reg))
+ return false;
+
+ // Users for the defs are all dominated by SuccToSinkTo.
+ if (MO.isDef()) {
+ // This def register's live range is shortened after sinking.
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
+ LocalUse))
+ return false;
+ } else {
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ // DefMI is defined outside of loop. There should be no live range
+ // impact for this operand. Defination outside of loop means:
+ // 1: defination is outside of loop.
+ // 2: defination is in this loop, but it is a PHI in the loop header.
+ if (LI->getLoopFor(DefMI->getParent()) != ML ||
+ (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
+ continue;
+ // The DefMI is defined inside the loop.
+ // If sinking this operand makes some register pressure set exceed limit,
+ // it is not profitable.
+ if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
+ LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
+ return false;
+ }
+ }
+ }
+
+ // If MI is in loop and all its operands are alive across the whole loop or if
+ // no operand sinking make register pressure set exceed limit, it is
+ // profitable to sink MI.
+ return true;
}
/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
@@ -613,8 +745,7 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
if (Succs != AllSuccessors.end())
return Succs->second;
- SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->succ_begin(),
- MBB->succ_end());
+ SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors());
// Handle cases where sinking can happen but where the sink point isn't a
// successor. For example:
@@ -876,6 +1007,97 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
}
}
+/// hasStoreBetween - check if there is store betweeen straight line blocks From
+/// and To.
+bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
+ MachineBasicBlock *To, MachineInstr &MI) {
+ // Make sure From and To are in straight line which means From dominates To
+ // and To post dominates From.
+ if (!DT->dominates(From, To) || !PDT->dominates(To, From))
+ return true;
+
+ auto BlockPair = std::make_pair(From, To);
+
+ // Does these two blocks pair be queried before and have a definite cached
+ // result?
+ if (HasStoreCache.find(BlockPair) != HasStoreCache.end())
+ return HasStoreCache[BlockPair];
+
+ if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end())
+ return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) {
+ return I->mayAlias(AA, MI, false);
+ });
+
+ bool SawStore = false;
+ bool HasAliasedStore = false;
+ DenseSet<MachineBasicBlock *> HandledBlocks;
+ DenseSet<MachineBasicBlock *> HandledDomBlocks;
+ // Go through all reachable blocks from From.
+ for (MachineBasicBlock *BB : depth_first(From)) {
+ // We insert the instruction at the start of block To, so no need to worry
+ // about stores inside To.
+ // Store in block From should be already considered when just enter function
+ // SinkInstruction.
+ if (BB == To || BB == From)
+ continue;
+
+ // We already handle this BB in previous iteration.
+ if (HandledBlocks.count(BB))
+ continue;
+
+ HandledBlocks.insert(BB);
+ // To post dominates BB, it must be a path from block From.
+ if (PDT->dominates(To, BB)) {
+ if (!HandledDomBlocks.count(BB))
+ HandledDomBlocks.insert(BB);
+
+ // If this BB is too big or the block number in straight line between From
+ // and To is too big, stop searching to save compiling time.
+ if (BB->size() > SinkLoadInstsPerBlockThreshold ||
+ HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
+ for (auto *DomBB : HandledDomBlocks) {
+ if (DomBB != BB && DT->dominates(DomBB, BB))
+ HasStoreCache[std::make_pair(DomBB, To)] = true;
+ else if(DomBB != BB && DT->dominates(BB, DomBB))
+ HasStoreCache[std::make_pair(From, DomBB)] = true;
+ }
+ HasStoreCache[BlockPair] = true;
+ return true;
+ }
+
+ for (MachineInstr &I : *BB) {
+ // Treat as alias conservatively for a call or an ordered memory
+ // operation.
+ if (I.isCall() || I.hasOrderedMemoryRef()) {
+ for (auto *DomBB : HandledDomBlocks) {
+ if (DomBB != BB && DT->dominates(DomBB, BB))
+ HasStoreCache[std::make_pair(DomBB, To)] = true;
+ else if(DomBB != BB && DT->dominates(BB, DomBB))
+ HasStoreCache[std::make_pair(From, DomBB)] = true;
+ }
+ HasStoreCache[BlockPair] = true;
+ return true;
+ }
+
+ if (I.mayStore()) {
+ SawStore = true;
+ // We still have chance to sink MI if all stores between are not
+ // aliased to MI.
+ // Cache all store instructions, so that we don't need to go through
+ // all From reachable blocks for next load instruction.
+ if (I.mayAlias(AA, MI, false))
+ HasAliasedStore = true;
+ StoreInstrCache[BlockPair].push_back(&I);
+ }
+ }
+ }
+ }
+ // If there is no store at all, cache the result.
+ if (!SawStore)
+ HasStoreCache[BlockPair] = false;
+ return HasAliasedStore;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -936,8 +1158,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
bool TryBreak = false;
- bool store = true;
- if (!MI.isSafeToMove(AA, store)) {
+ bool Store =
+ MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true;
+ if (!MI.isSafeToMove(AA, Store)) {
LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
TryBreak = true;
}
@@ -1268,9 +1491,9 @@ static bool hasRegisterDependency(MachineInstr *MI,
return HasRegDependency;
}
-static SmallSet<unsigned, 4> getRegUnits(unsigned Reg,
- const TargetRegisterInfo *TRI) {
- SmallSet<unsigned, 4> RegUnits;
+static SmallSet<MCRegister, 4> getRegUnits(MCRegister Reg,
+ const TargetRegisterInfo *TRI) {
+ SmallSet<MCRegister, 4> RegUnits;
for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI)
RegUnits.insert(*RI);
return RegUnits;
@@ -1320,8 +1543,8 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
continue;
// Record debug use of each reg unit.
- SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
- for (unsigned Reg : Units)
+ SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI);
+ for (MCRegister Reg : Units)
SeenDbgInstrs[Reg].push_back(MI);
}
continue;
@@ -1365,18 +1588,17 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// recorded which reg units that DBG_VALUEs read, if this instruction
// writes any of those units then the corresponding DBG_VALUEs must sink.
SetVector<MachineInstr *> DbgValsToSinkSet;
- SmallVector<MachineInstr *, 4> DbgValsToSink;
for (auto &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
- for (unsigned Reg : Units)
+ SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI);
+ for (MCRegister Reg : Units)
for (auto *MI : SeenDbgInstrs.lookup(Reg))
DbgValsToSinkSet.insert(MI);
}
- DbgValsToSink.insert(DbgValsToSink.begin(), DbgValsToSinkSet.begin(),
- DbgValsToSinkSet.end());
+ SmallVector<MachineInstr *, 4> DbgValsToSink(DbgValsToSinkSet.begin(),
+ DbgValsToSinkSet.end());
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
new file mode 100644
index 000000000000..fb14f0a33209
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -0,0 +1,194 @@
+//===- lib/CodeGen/MachineStableHash.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Stable hashing for MachineInstr and MachineOperand. Useful or getting a
+// hash across runs, modules, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/MIRFormatter.h"
+#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StableHashing.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "machine-stable-hash"
+
+using namespace llvm;
+
+STATISTIC(StableHashBailingMachineBasicBlock,
+ "Number of encountered unsupported MachineOperands that were "
+ "MachineBasicBlocks while computing stable hashes");
+STATISTIC(StableHashBailingConstantPoolIndex,
+ "Number of encountered unsupported MachineOperands that were "
+ "ConstantPoolIndex while computing stable hashes");
+STATISTIC(StableHashBailingTargetIndexNoName,
+ "Number of encountered unsupported MachineOperands that were "
+ "TargetIndex with no name");
+STATISTIC(StableHashBailingGlobalAddress,
+ "Number of encountered unsupported MachineOperands that were "
+ "GlobalAddress while computing stable hashes");
+STATISTIC(StableHashBailingBlockAddress,
+ "Number of encountered unsupported MachineOperands that were "
+ "BlockAddress while computing stable hashes");
+STATISTIC(StableHashBailingMetadataUnsupported,
+ "Number of encountered unsupported MachineOperands that were "
+ "Metadata of an unsupported kind while computing stable hashes");
+
+stable_hash llvm::stableHashValue(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (Register::isVirtualRegister(MO.getReg())) {
+ const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo();
+ return MRI.getVRegDef(MO.getReg())->getOpcode();
+ }
+
+ // Register operands don't have target flags.
+ return stable_hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(),
+ MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ case MachineOperand::MO_FPImmediate: {
+ auto Val = MO.isCImm() ? MO.getCImm()->getValue()
+ : MO.getFPImm()->getValueAPF().bitcastToAPInt();
+ auto ValHash =
+ stable_hash_combine_array(Val.getRawData(), Val.getNumWords());
+ return hash_combine(MO.getType(), MO.getTargetFlags(), ValHash);
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ StableHashBailingMachineBasicBlock++;
+ return 0;
+ case MachineOperand::MO_ConstantPoolIndex:
+ StableHashBailingConstantPoolIndex++;
+ return 0;
+ case MachineOperand::MO_BlockAddress:
+ StableHashBailingBlockAddress++;
+ return 0;
+ case MachineOperand::MO_Metadata:
+ StableHashBailingMetadataUnsupported++;
+ return 0;
+ case MachineOperand::MO_GlobalAddress:
+ StableHashBailingGlobalAddress++;
+ return 0;
+ case MachineOperand::MO_TargetIndex: {
+ if (const char *Name = MO.getTargetIndexName())
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_string(Name),
+ MO.getOffset());
+ StableHashBailingTargetIndexNoName++;
+ return 0;
+ }
+
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getIndex());
+
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ stable_hash_combine_string(MO.getSymbolName()));
+
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+
+ case MachineOperand::MO_ShuffleMask: {
+ std::vector<llvm::stable_hash> ShuffleMaskHashes;
+
+ llvm::transform(
+ MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes),
+ [](int S) -> llvm::stable_hash { return llvm::stable_hash(S); });
+
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_array(ShuffleMaskHashes.data(),
+ ShuffleMaskHashes.size()));
+ }
+ case MachineOperand::MO_MCSymbol: {
+ auto SymbolName = MO.getMCSymbol()->getName();
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_string(SymbolName));
+ }
+ case MachineOperand::MO_CFIIndex:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getCFIIndex());
+ case MachineOperand::MO_IntrinsicID:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getIntrinsicID());
+ case MachineOperand::MO_Predicate:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getPredicate());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+/// A stable hash value for machine instructions.
+/// Returns 0 if no stable hash could be computed.
+/// The hashing and equality testing functions ignore definitions so this is
+/// useful for CSE, etc.
+stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
+ bool HashConstantPoolIndices,
+ bool HashMemOperands) {
+ // Build up a buffer of hash code components.
+ SmallVector<stable_hash, 16> HashComponents;
+ HashComponents.reserve(MI.getNumOperands() + MI.getNumMemOperands() + 2);
+ HashComponents.push_back(MI.getOpcode());
+ HashComponents.push_back(MI.getFlags());
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!HashVRegs && MO.isReg() && MO.isDef() &&
+ Register::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+
+ if (MO.isCPI()) {
+ HashComponents.push_back(stable_hash_combine(
+ MO.getType(), MO.getTargetFlags(), MO.getIndex()));
+ continue;
+ }
+
+ stable_hash StableHash = stableHashValue(MO);
+ if (!StableHash)
+ return 0;
+ HashComponents.push_back(StableHash);
+ }
+
+ for (const auto *Op : MI.memoperands()) {
+ if (!HashMemOperands)
+ break;
+ HashComponents.push_back(static_cast<unsigned>(Op->getSize()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getFlags()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getOffset()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getOrdering()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getAddrSpace()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getSyncScopeID()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getBaseAlign().value()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getFailureOrdering()));
+ }
+
+ return stable_hash_combine_range(HashComponents.begin(),
+ HashComponents.end());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index e6b51b7e1e56..8df23b781ffd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -701,17 +701,15 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
SmallVectorImpl<DataDep> &Deps,
SparseSet<LiveRegUnit> &RegUnits,
const TargetRegisterInfo *TRI) {
- SmallVector<unsigned, 8> Kills;
+ SmallVector<MCRegister, 8> Kills;
SmallVector<unsigned, 8> LiveDefOps;
for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(),
ME = UseMI->operands_end(); MI != ME; ++MI) {
const MachineOperand &MO = *MI;
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (!Register::isPhysicalRegister(Reg))
+ if (!MO.isReg() || !MO.getReg().isPhysical())
continue;
+ MCRegister Reg = MO.getReg().asMCReg();
// Track live defs and kills for updating RegUnits.
if (MO.isDef()) {
if (MO.isDead())
@@ -734,13 +732,14 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
// Update RegUnits to reflect live registers after UseMI.
// First kills.
- for (unsigned Kill : Kills)
+ for (MCRegister Kill : Kills)
for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
RegUnits.erase(*Units);
// Second, live defs.
for (unsigned DefOp : LiveDefOps) {
- for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+ for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg().asMCReg(),
+ TRI);
Units.isValid(); ++Units) {
LiveRegUnit &LRU = RegUnits[*Units];
LRU.MI = UseMI;
@@ -766,7 +765,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
assert(TBI.HasValidInstrHeights && "Missing height info");
unsigned MaxLen = 0;
for (const LiveInReg &LIR : TBI.LiveIns) {
- if (!Register::isVirtualRegister(LIR.Reg))
+ if (!LIR.Reg.isVirtual())
continue;
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
// Ignore dependencies outside the current trace.
@@ -912,7 +911,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
continue;
// This is a def of Reg. Remove corresponding entries from RegUnits, and
// update MI Height to consider the physreg dependencies.
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units) {
SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
if (I == RegUnits.end())
continue;
@@ -930,15 +930,15 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
}
// Now we know the height of MI. Update any regunits read.
- for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
- Register Reg = MI.getOperand(ReadOps[i]).getReg();
+ for (size_t I = 0, E = ReadOps.size(); I != E; ++I) {
+ MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg();
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
LiveRegUnit &LRU = RegUnits[*Units];
// Set the height to the highest reader of the unit.
if (LRU.Cycle <= Height && LRU.MI != &MI) {
LRU.Cycle = Height;
LRU.MI = &MI;
- LRU.Op = ReadOps[i];
+ LRU.Op = ReadOps[I];
}
}
}
@@ -979,7 +979,7 @@ void MachineTraceMetrics::Ensemble::
addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
ArrayRef<const MachineBasicBlock*> Trace) {
assert(!Trace.empty() && "Trace should contain at least one block");
- unsigned Reg = DefMI->getOperand(DefOp).getReg();
+ Register Reg = DefMI->getOperand(DefOp).getReg();
assert(Register::isVirtualRegister(Reg));
const MachineBasicBlock *DefMBB = DefMI->getParent();
@@ -1027,7 +1027,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
if (MBB) {
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
for (LiveInReg &LI : TBI.LiveIns) {
- if (Register::isVirtualRegister(LI.Reg)) {
+ if (LI.Reg.isVirtual()) {
// For virtual registers, the def latency is included.
unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
if (Height < LI.Height)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index c1a2c4e0bc6e..0f6d9b888f47 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -86,7 +86,7 @@ namespace {
struct MachineVerifier {
MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {}
- unsigned verify(MachineFunction &MF);
+ unsigned verify(const MachineFunction &MF);
Pass *const PASS;
const char *Banner;
@@ -102,10 +102,10 @@ namespace {
bool isFunctionRegBankSelected;
bool isFunctionSelected;
- using RegVector = SmallVector<unsigned, 16>;
+ using RegVector = SmallVector<Register, 16>;
using RegMaskVector = SmallVector<const uint32_t *, 4>;
- using RegSet = DenseSet<unsigned>;
- using RegMap = DenseMap<unsigned, const MachineInstr *>;
+ using RegSet = DenseSet<Register>;
+ using RegMap = DenseMap<Register, const MachineInstr *>;
using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
const MachineInstr *FirstNonPHI;
@@ -120,11 +120,10 @@ namespace {
SlotIndex lastIndex;
// Add Reg and any sub-registers to RV
- void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ void addRegWithSubRegs(RegVector &RV, Register Reg) {
RV.push_back(Reg);
- if (Register::isPhysicalRegister(Reg))
- for (const MCPhysReg &SubReg : TRI->subregs(Reg))
- RV.push_back(SubReg);
+ if (Reg.isPhysical())
+ append_range(RV, TRI->subregs(Reg.asMCReg()));
}
struct BBInfo {
@@ -132,7 +131,8 @@ namespace {
bool reachable = false;
// Vregs that must be live in because they are used without being
- // defined. Map value is the user.
+ // defined. Map value is the user. vregsLiveIn doesn't include regs
+ // that only are used by PHI nodes.
RegMap vregsLiveIn;
// Regs killed in MBB. They may be defined again, and will then be in both
@@ -158,8 +158,8 @@ namespace {
// Add register to vregsRequired if it belongs there. Return true if
// anything changed.
- bool addRequired(unsigned Reg) {
- if (!Register::isVirtualRegister(Reg))
+ bool addRequired(Register Reg) {
+ if (!Reg.isVirtual())
return false;
if (regsLiveOut.count(Reg))
return false;
@@ -169,7 +169,7 @@ namespace {
// Same for a full set.
bool addRequired(const RegSet &RS) {
bool Changed = false;
- for (unsigned Reg : RS)
+ for (Register Reg : RS)
Changed |= addRequired(Reg);
return Changed;
}
@@ -183,7 +183,7 @@ namespace {
}
// Live-out registers are either in regsLiveOut or vregsPassed.
- bool isLiveOut(unsigned Reg) const {
+ bool isLiveOut(Register Reg) const {
return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
}
};
@@ -191,13 +191,13 @@ namespace {
// Extra register info per MBB.
DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
- bool isReserved(unsigned Reg) {
- return Reg < regsReserved.size() && regsReserved.test(Reg);
+ bool isReserved(Register Reg) {
+ return Reg.id() < regsReserved.size() && regsReserved.test(Reg.id());
}
- bool isAllocatable(unsigned Reg) const {
- return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
- !regsReserved.test(Reg);
+ bool isAllocatable(Register Reg) const {
+ return Reg.id() < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
+ !regsReserved.test(Reg.id());
}
// Analysis information if available
@@ -225,7 +225,7 @@ namespace {
LLT MOVRegType = LLT{});
void report_context(const LiveInterval &LI) const;
- void report_context(const LiveRange &LR, unsigned VRegUnit,
+ void report_context(const LiveRange &LR, Register VRegUnit,
LaneBitmask LaneMask) const;
void report_context(const LiveRange::Segment &S) const;
void report_context(const VNInfo &VNI) const;
@@ -233,18 +233,19 @@ namespace {
void report_context(MCPhysReg PhysReg) const;
void report_context_liverange(const LiveRange &LR) const;
void report_context_lanemask(LaneBitmask LaneMask) const;
- void report_context_vreg(unsigned VReg) const;
- void report_context_vreg_regunit(unsigned VRegOrUnit) const;
+ void report_context_vreg(Register VReg) const;
+ void report_context_vreg_regunit(Register VRegOrUnit) const;
void verifyInlineAsm(const MachineInstr *MI);
void checkLiveness(const MachineOperand *MO, unsigned MONum);
void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum,
- SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit,
+ SlotIndex UseIdx, const LiveRange &LR,
+ Register VRegOrUnit,
LaneBitmask LaneMask = LaneBitmask::getNone());
void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
- SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
- bool SubRangeCheck = false,
+ SlotIndex DefIdx, const LiveRange &LR,
+ Register VRegOrUnit, bool SubRangeCheck = false,
LaneBitmask LaneMask = LaneBitmask::getNone());
void markReachable(const MachineBasicBlock *MBB);
@@ -255,12 +256,12 @@ namespace {
void verifyLiveVariables();
void verifyLiveIntervals();
void verifyLiveInterval(const LiveInterval&);
- void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned,
+ void verifyLiveRangeValue(const LiveRange &, const VNInfo *, Register,
LaneBitmask);
- void verifyLiveRangeSegment(const LiveRange&,
- const LiveRange::const_iterator I, unsigned,
+ void verifyLiveRangeSegment(const LiveRange &,
+ const LiveRange::const_iterator I, Register,
LaneBitmask);
- void verifyLiveRange(const LiveRange&, unsigned,
+ void verifyLiveRange(const LiveRange &, Register,
LaneBitmask LaneMask = LaneBitmask::getNone());
void verifyStackFrame();
@@ -303,6 +304,19 @@ FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) {
return new MachineVerifierPass(Banner);
}
+void llvm::verifyMachineFunction(MachineFunctionAnalysisManager *,
+ const std::string &Banner,
+ const MachineFunction &MF) {
+ // TODO: Use MFAM after porting below analyses.
+ // LiveVariables *LiveVars;
+ // LiveIntervals *LiveInts;
+ // LiveStacks *LiveStks;
+ // SlotIndexes *Indexes;
+ unsigned FoundErrors = MachineVerifier(nullptr, Banner.c_str()).verify(MF);
+ if (FoundErrors)
+ report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors.");
+}
+
bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors)
const {
MachineFunction &MF = const_cast<MachineFunction&>(*this);
@@ -335,7 +349,7 @@ void MachineVerifier::verifyProperties(const MachineFunction &MF) {
report("Function has NoVRegs property but there are VReg operands", &MF);
}
-unsigned MachineVerifier::verify(MachineFunction &MF) {
+unsigned MachineVerifier::verify(const MachineFunction &MF) {
foundErrors = 0;
this->MF = &MF;
@@ -474,7 +488,7 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
errs() << "- instruction: ";
if (Indexes && Indexes->hasIndex(*MI))
errs() << Indexes->getInstructionIndex(*MI) << '\t';
- MI->print(errs(), /*SkipOpers=*/true);
+ MI->print(errs(), /*IsStandalone=*/true);
}
void MachineVerifier::report(const char *msg, const MachineOperand *MO,
@@ -494,7 +508,7 @@ void MachineVerifier::report_context(const LiveInterval &LI) const {
errs() << "- interval: " << LI << '\n';
}
-void MachineVerifier::report_context(const LiveRange &LR, unsigned VRegUnit,
+void MachineVerifier::report_context(const LiveRange &LR, Register VRegUnit,
LaneBitmask LaneMask) const {
report_context_liverange(LR);
report_context_vreg_regunit(VRegUnit);
@@ -518,11 +532,11 @@ void MachineVerifier::report_context(MCPhysReg PReg) const {
errs() << "- p. register: " << printReg(PReg, TRI) << '\n';
}
-void MachineVerifier::report_context_vreg(unsigned VReg) const {
+void MachineVerifier::report_context_vreg(Register VReg) const {
errs() << "- v. register: " << printReg(VReg, TRI) << '\n';
}
-void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const {
+void MachineVerifier::report_context_vreg_regunit(Register VRegOrUnit) const {
if (Register::isVirtualRegister(VRegOrUnit)) {
report_context_vreg(VRegOrUnit);
} else {
@@ -776,9 +790,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
}
// Ensure non-terminators don't follow terminators.
- // Ignore predicated terminators formed by if conversion.
- // FIXME: If conversion shouldn't need to violate this rule.
- if (MI->isTerminator() && !TII->isPredicated(*MI)) {
+ if (MI->isTerminator()) {
if (!FirstTerminator)
FirstTerminator = MI;
} else if (FirstTerminator) {
@@ -992,16 +1004,15 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
case TargetOpcode::G_PHI: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
- if (!DstTy.isValid() ||
- !std::all_of(MI->operands_begin() + 1, MI->operands_end(),
- [this, &DstTy](const MachineOperand &MO) {
- if (!MO.isReg())
- return true;
- LLT Ty = MRI->getType(MO.getReg());
- if (!Ty.isValid() || (Ty != DstTy))
- return false;
- return true;
- }))
+ if (!DstTy.isValid() || !all_of(drop_begin(MI->operands()),
+ [this, &DstTy](const MachineOperand &MO) {
+ if (!MO.isReg())
+ return true;
+ LLT Ty = MRI->getType(MO.getReg());
+ if (!Ty.isValid() || (Ty != DstTy))
+ return false;
+ return true;
+ }))
report("Generic Instruction G_PHI has operands with incompatible/missing "
"types",
MI);
@@ -1343,20 +1354,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
}
- switch (IntrID) {
- case Intrinsic::memcpy:
- if (MI->getNumOperands() != 5)
- report("Expected memcpy intrinsic to have 5 operands", MI);
- break;
- case Intrinsic::memmove:
- if (MI->getNumOperands() != 5)
- report("Expected memmove intrinsic to have 5 operands", MI);
- break;
- case Intrinsic::memset:
- if (MI->getNumOperands() != 5)
- report("Expected memset intrinsic to have 5 operands", MI);
- break;
- }
+
break;
}
case TargetOpcode::G_SEXT_INREG: {
@@ -1434,6 +1432,95 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE: {
+ ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
+ if (MMOs.size() != 2) {
+ report("memcpy/memmove must have 2 memory operands", MI);
+ break;
+ }
+
+ if ((!MMOs[0]->isStore() || MMOs[0]->isLoad()) ||
+ (MMOs[1]->isStore() || !MMOs[1]->isLoad())) {
+ report("wrong memory operand types", MI);
+ break;
+ }
+
+ if (MMOs[0]->getSize() != MMOs[1]->getSize())
+ report("inconsistent memory operand sizes", MI);
+
+ LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcPtrTy = MRI->getType(MI->getOperand(1).getReg());
+
+ if (!DstPtrTy.isPointer() || !SrcPtrTy.isPointer()) {
+ report("memory instruction operand must be a pointer", MI);
+ break;
+ }
+
+ if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
+ report("inconsistent store address space", MI);
+ if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
+ report("inconsistent load address space", MI);
+
+ break;
+ }
+ case TargetOpcode::G_MEMSET: {
+ ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
+ if (MMOs.size() != 1) {
+ report("memset must have 1 memory operand", MI);
+ break;
+ }
+
+ if ((!MMOs[0]->isStore() || MMOs[0]->isLoad())) {
+ report("memset memory operand must be a store", MI);
+ break;
+ }
+
+ LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg());
+ if (!DstPtrTy.isPointer()) {
+ report("memset operand must be a pointer", MI);
+ break;
+ }
+
+ if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
+ report("inconsistent memset address space", MI);
+
+ break;
+ }
+ case TargetOpcode::G_VECREDUCE_SEQ_FADD:
+ case TargetOpcode::G_VECREDUCE_SEQ_FMUL: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());
+ LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());
+ if (!DstTy.isScalar())
+ report("Vector reduction requires a scalar destination type", MI);
+ if (!Src1Ty.isScalar())
+ report("Sequential FADD/FMUL vector reduction requires a scalar 1st operand", MI);
+ if (!Src2Ty.isVector())
+ report("Sequential FADD/FMUL vector reduction must have a vector 2nd operand", MI);
+ break;
+ }
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ case TargetOpcode::G_VECREDUCE_FMAX:
+ case TargetOpcode::G_VECREDUCE_FMIN:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ case TargetOpcode::G_VECREDUCE_MUL:
+ case TargetOpcode::G_VECREDUCE_AND:
+ case TargetOpcode::G_VECREDUCE_OR:
+ case TargetOpcode::G_VECREDUCE_XOR:
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ case TargetOpcode::G_VECREDUCE_UMIN: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isScalar())
+ report("Vector reduction requires a scalar destination type", MI);
+ if (!SrcTy.isVector())
+ report("Vector reduction requires vector source=", MI);
+ break;
+ }
default:
break;
}
@@ -1461,6 +1548,16 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
if (MI->isInlineAsm())
verifyInlineAsm(MI);
+ // Check that unspillable terminators define a reg and have at most one use.
+ if (TII->isUnspillableTerminator(MI)) {
+ if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
+ report("Unspillable Terminator does not define a reg", MI);
+ Register Def = MI->getOperand(0).getReg();
+ if (Def.isVirtual() &&
+ std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1)
+ report("Unspillable Terminator expected to have at most one use!", MI);
+ }
+
// A fully-formed DBG_VALUE must have a location. Ignore partially formed
// DBG_VALUEs: these are convenient to use in tests, but should never get
// generated.
@@ -1468,6 +1565,11 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
if (!MI->getDebugLoc())
report("Missing DebugLoc for debug instruction", MI);
+ // Meta instructions should never be the subject of debug value tracking,
+ // they don't create a value in the output program at all.
+ if (MI->isMetaInstruction() && MI->peekDebugInstrNum())
+ report("Metadata instruction should not have a value tracking number", MI);
+
// Check the MachineMemOperands for basic consistency.
for (MachineMemOperand *Op : MI->memoperands()) {
if (Op->isLoad() && !MI->mayLoad())
@@ -1543,6 +1645,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
auto VerifyStackMapConstant = [&](unsigned Offset) {
+ if (Offset >= MI->getNumOperands()) {
+ report("stack map constant to STATEPOINT is out of range!", MI);
+ return;
+ }
if (!MI->getOperand(Offset - 1).isImm() ||
MI->getOperand(Offset - 1).getImm() != StackMaps::ConstantOp ||
!MI->getOperand(Offset).isImm())
@@ -1551,6 +1657,25 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
VerifyStackMapConstant(SO.getCCIdx());
VerifyStackMapConstant(SO.getFlagsIdx());
VerifyStackMapConstant(SO.getNumDeoptArgsIdx());
+ VerifyStackMapConstant(SO.getNumGCPtrIdx());
+ VerifyStackMapConstant(SO.getNumAllocaIdx());
+ VerifyStackMapConstant(SO.getNumGcMapEntriesIdx());
+
+ // Verify that all explicit statepoint defs are tied to gc operands as
+ // they are expected to be a relocation of gc operands.
+ unsigned FirstGCPtrIdx = SO.getFirstGCPtrIdx();
+ unsigned LastGCPtrIdx = SO.getNumAllocaIdx() - 2;
+ for (unsigned Idx = 0; Idx < MI->getNumDefs(); Idx++) {
+ unsigned UseOpIdx;
+ if (!MI->isRegTiedToUseOperand(Idx, &UseOpIdx)) {
+ report("STATEPOINT defs expected to be tied", MI);
+ break;
+ }
+ if (UseOpIdx < FirstGCPtrIdx || UseOpIdx > LastGCPtrIdx) {
+ report("STATEPOINT def tied to non-gc operand", MI);
+ break;
+ }
+ }
// TODO: verify we have properly encoded deopt arguments
} break;
@@ -1865,8 +1990,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
- unsigned MONum, SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit,
- LaneBitmask LaneMask) {
+ unsigned MONum, SlotIndex UseIdx,
+ const LiveRange &LR,
+ Register VRegOrUnit,
+ LaneBitmask LaneMask) {
LiveQueryResult LRQ = LR.Query(UseIdx);
// Check if we have a segment at the use, note however that we only need one
// live subregister range, the others may be dead.
@@ -1887,8 +2014,11 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
}
void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
- unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
- bool SubRangeCheck, LaneBitmask LaneMask) {
+ unsigned MONum, SlotIndex DefIdx,
+ const LiveRange &LR,
+ Register VRegOrUnit,
+ bool SubRangeCheck,
+ LaneBitmask LaneMask) {
if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) {
assert(VNI && "NULL valno is not allowed");
if (VNI->def != DefIdx) {
@@ -1932,7 +2062,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
- const unsigned Reg = MO->getReg();
+ const Register Reg = MO->getReg();
// Both use and def operands can read a register.
if (MO->readsReg()) {
@@ -1950,8 +2080,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
// Check the cached regunit intervals.
- if (Register::isPhysicalRegister(Reg) && !isReserved(Reg)) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (Reg.isPhysical() && !isReserved(Reg)) {
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units) {
if (MRI->isReservedRegUnit(*Units))
continue;
if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units))
@@ -2097,9 +2228,9 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
// Kill any masked registers.
while (!regMasks.empty()) {
const uint32_t *Mask = regMasks.pop_back_val();
- for (unsigned Reg : regsLive)
- if (Register::isPhysicalRegister(Reg) &&
- MachineOperand::clobbersPhysReg(Mask, Reg))
+ for (Register Reg : regsLive)
+ if (Reg.isPhysical() &&
+ MachineOperand::clobbersPhysReg(Mask, Reg.asMCReg()))
regsDead.push_back(Reg);
}
set_subtract(regsLive, regsDead); regsDead.clear();
@@ -2132,7 +2263,7 @@ struct VRegFilter {
// Add elements to the filter itself. \pre Input set \p FromRegSet must have
// no duplicates. Both virtual and physical registers are fine.
template <typename RegSetT> void add(const RegSetT &FromRegSet) {
- SmallVector<unsigned, 0> VRegsBuffer;
+ SmallVector<Register, 0> VRegsBuffer;
filterAndAdd(FromRegSet, VRegsBuffer);
}
// Filter \p FromRegSet through the filter and append passed elements into \p
@@ -2140,13 +2271,13 @@ struct VRegFilter {
// \returns true if anything changed.
template <typename RegSetT>
bool filterAndAdd(const RegSetT &FromRegSet,
- SmallVectorImpl<unsigned> &ToVRegs) {
+ SmallVectorImpl<Register> &ToVRegs) {
unsigned SparseUniverse = Sparse.size();
unsigned NewSparseUniverse = SparseUniverse;
unsigned NewDenseSize = Dense.size();
size_t Begin = ToVRegs.size();
- for (unsigned Reg : FromRegSet) {
- if (!Register::isVirtualRegister(Reg))
+ for (Register Reg : FromRegSet) {
+ if (!Reg.isVirtual())
continue;
unsigned Index = Register::virtReg2Index(Reg);
if (Index < SparseUniverseMax) {
@@ -2170,7 +2301,7 @@ struct VRegFilter {
Sparse.resize(NewSparseUniverse);
Dense.reserve(NewDenseSize);
for (unsigned I = Begin; I < End; ++I) {
- unsigned Reg = ToVRegs[I];
+ Register Reg = ToVRegs[I];
unsigned Index = Register::virtReg2Index(Reg);
if (Index < SparseUniverseMax)
Sparse.set(Index);
@@ -2203,7 +2334,7 @@ private:
// universe). filter_b implicitly contains all physical registers at all times.
class FilteringVRegSet {
VRegFilter Filter;
- SmallVector<unsigned, 0> VRegs;
+ SmallVector<Register, 0> VRegs;
public:
// Set-up the filter_b. \pre Input register set \p RS must have no duplicates.
@@ -2229,63 +2360,28 @@ public:
// can pass through an MBB live, but may not be live every time. It is assumed
// that all vregsPassed sets are empty before the call.
void MachineVerifier::calcRegsPassed() {
- // This is a forward dataflow, doing it in RPO. A standard map serves as a
- // priority (sorting by RPO number) queue, deduplicating worklist, and an RPO
- // number to MBB mapping all at once.
- std::map<unsigned, const MachineBasicBlock *> RPOWorklist;
- DenseMap<const MachineBasicBlock *, unsigned> RPONumbers;
- if (MF->empty()) {
+ if (MF->empty())
// ReversePostOrderTraversal doesn't handle empty functions.
return;
- }
- std::vector<FilteringVRegSet> VRegsPassedSets(MF->size());
- for (const MachineBasicBlock *MBB :
- ReversePostOrderTraversal<const MachineFunction *>(MF)) {
- // Careful with the evaluation order, fetch next number before allocating.
- unsigned Number = RPONumbers.size();
- RPONumbers[MBB] = Number;
- // Set-up the transfer functions for all blocks.
- const BBInfo &MInfo = MBBInfoMap[MBB];
- VRegsPassedSets[Number].addToFilter(MInfo.regsKilled);
- VRegsPassedSets[Number].addToFilter(MInfo.regsLiveOut);
- }
- // First push live-out regs to successors' vregsPassed. Remember the MBBs that
- // have any vregsPassed.
- for (const MachineBasicBlock &MBB : *MF) {
- const BBInfo &MInfo = MBBInfoMap[&MBB];
- if (!MInfo.reachable)
- continue;
- for (const MachineBasicBlock *Succ : MBB.successors()) {
- unsigned SuccNumber = RPONumbers[Succ];
- FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber];
- if (SuccSet.add(MInfo.regsLiveOut))
- RPOWorklist.emplace(SuccNumber, Succ);
- }
- }
- // Iteratively push vregsPassed to successors.
- while (!RPOWorklist.empty()) {
- auto Next = RPOWorklist.begin();
- const MachineBasicBlock *MBB = Next->second;
- RPOWorklist.erase(Next);
- FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[MBB]];
- for (const MachineBasicBlock *Succ : MBB->successors()) {
- if (Succ == MBB)
+ for (const MachineBasicBlock *MB :
+ ReversePostOrderTraversal<const MachineFunction *>(MF)) {
+ FilteringVRegSet VRegs;
+ BBInfo &Info = MBBInfoMap[MB];
+ assert(Info.reachable);
+
+ VRegs.addToFilter(Info.regsKilled);
+ VRegs.addToFilter(Info.regsLiveOut);
+ for (const MachineBasicBlock *Pred : MB->predecessors()) {
+ const BBInfo &PredInfo = MBBInfoMap[Pred];
+ if (!PredInfo.reachable)
continue;
- unsigned SuccNumber = RPONumbers[Succ];
- FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber];
- if (SuccSet.add(MSet))
- RPOWorklist.emplace(SuccNumber, Succ);
+
+ VRegs.add(PredInfo.regsLiveOut);
+ VRegs.add(PredInfo.vregsPassed);
}
- }
- // Copy the results back to BBInfos.
- for (const MachineBasicBlock &MBB : *MF) {
- BBInfo &MInfo = MBBInfoMap[&MBB];
- if (!MInfo.reachable)
- continue;
- const FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[&MBB]];
- MInfo.vregsPassed.reserve(MSet.size());
- MInfo.vregsPassed.insert(MSet.begin(), MSet.end());
+ Info.vregsPassed.reserve(VRegs.size());
+ Info.vregsPassed.insert(VRegs.begin(), VRegs.end());
}
}
@@ -2302,6 +2398,23 @@ void MachineVerifier::calcRegsRequired() {
if (PInfo.addRequired(MInfo.vregsLiveIn))
todo.insert(Pred);
}
+
+ // Handle the PHI node.
+ for (const MachineInstr &MI : MBB.phis()) {
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ // Skip those Operands which are undef regs or not regs.
+ if (!MI.getOperand(i).isReg() || !MI.getOperand(i).readsReg())
+ continue;
+
+ // Get register and predecessor for one PHI edge.
+ Register Reg = MI.getOperand(i).getReg();
+ const MachineBasicBlock *Pred = MI.getOperand(i + 1).getMBB();
+
+ BBInfo &PInfo = MBBInfoMap[Pred];
+ if (PInfo.addRequired(Reg))
+ todo.insert(Pred);
+ }
+ }
}
// Iteratively push vregsRequired to predecessors. This will converge to the
@@ -2399,7 +2512,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
// Check for killed virtual registers that should be live out.
for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
- for (unsigned VReg : MInfo.vregsRequired)
+ for (Register VReg : MInfo.vregsRequired)
if (MInfo.regsKilled.count(VReg)) {
report("Virtual register killed in block, but needed live out.", &MBB);
errs() << "Virtual register " << printReg(VReg)
@@ -2409,7 +2522,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
if (!MF->empty()) {
BBInfo &MInfo = MBBInfoMap[&MF->front()];
- for (unsigned VReg : MInfo.vregsRequired) {
+ for (Register VReg : MInfo.vregsRequired) {
report("Virtual register defs don't dominate all uses.", MF);
report_context_vreg(VReg);
}
@@ -2449,12 +2562,27 @@ void MachineVerifier::visitMachineFunctionAfter() {
for (auto CSInfo : MF->getCallSitesInfo())
if (!CSInfo.first->isCall())
report("Call site info referencing instruction that is not call", MF);
+
+ // If there's debug-info, check that we don't have any duplicate value
+ // tracking numbers.
+ if (MF->getFunction().getSubprogram()) {
+ DenseSet<unsigned> SeenNumbers;
+ for (auto &MBB : *MF) {
+ for (auto &MI : MBB) {
+ if (auto Num = MI.peekDebugInstrNum()) {
+ auto Result = SeenNumbers.insert((unsigned)Num);
+ if (!Result.second)
+ report("Instruction has a duplicated value tracking number", &MI);
+ }
+ }
+ }
+ }
}
void MachineVerifier::verifyLiveVariables() {
assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
- for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
@@ -2479,8 +2607,8 @@ void MachineVerifier::verifyLiveVariables() {
void MachineVerifier::verifyLiveIntervals() {
assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
- for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
// Spilling and splitting may leave unused registers around. Skip them.
if (MRI->reg_nodbg_empty(Reg))
@@ -2493,7 +2621,7 @@ void MachineVerifier::verifyLiveIntervals() {
}
const LiveInterval &LI = LiveInts->getInterval(Reg);
- assert(Reg == LI.reg && "Invalid reg to interval mapping");
+ assert(Reg == LI.reg() && "Invalid reg to interval mapping");
verifyLiveInterval(LI);
}
@@ -2504,7 +2632,7 @@ void MachineVerifier::verifyLiveIntervals() {
}
void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
- const VNInfo *VNI, unsigned Reg,
+ const VNInfo *VNI, Register Reg,
LaneBitmask LaneMask) {
if (VNI->isUnused())
return;
@@ -2597,8 +2725,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const LiveRange::const_iterator I,
- unsigned Reg, LaneBitmask LaneMask)
-{
+ Register Reg,
+ LaneBitmask LaneMask) {
const LiveRange::Segment &S = *I;
const VNInfo *VNI = S.valno;
assert(VNI && "Live segment has no valno");
@@ -2809,7 +2937,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
}
}
-void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
+void MachineVerifier::verifyLiveRange(const LiveRange &LR, Register Reg,
LaneBitmask LaneMask) {
for (const VNInfo *VNI : LR.valnos)
verifyLiveRangeValue(LR, VNI, Reg, LaneMask);
@@ -2819,7 +2947,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
}
void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
- unsigned Reg = LI.reg;
+ Register Reg = LI.reg();
assert(Register::isVirtualRegister(Reg));
verifyLiveRange(LI, Reg);
@@ -2836,10 +2964,10 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
}
if (SR.empty()) {
report("Subrange must not be empty", MF);
- report_context(SR, LI.reg, SR.LaneMask);
+ report_context(SR, LI.reg(), SR.LaneMask);
}
Mask |= SR.LaneMask;
- verifyLiveRange(SR, LI.reg, SR.LaneMask);
+ verifyLiveRange(SR, LI.reg(), SR.LaneMask);
if (!LI.covers(SR)) {
report("A Subrange is not covered by the main range", MF);
report_context(LI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index d85b1b7988ce..095da09ea82b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -11,9 +11,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp
new file mode 100644
index 000000000000..e4cd92ac4868
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp
@@ -0,0 +1,92 @@
+//===- MultiHazardRecognizer.cpp - Scheduler Support ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MultiHazardRecognizer class, which is a wrapper
+// for a set of ScheduleHazardRecognizer instances
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MultiHazardRecognizer.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <functional>
+#include <numeric>
+
+using namespace llvm;
+
+void MultiHazardRecognizer::AddHazardRecognizer(
+ std::unique_ptr<ScheduleHazardRecognizer> &&R) {
+ MaxLookAhead = std::max(MaxLookAhead, R->getMaxLookAhead());
+ Recognizers.push_back(std::move(R));
+}
+
+bool MultiHazardRecognizer::atIssueLimit() const {
+ return llvm::any_of(Recognizers,
+ std::mem_fn(&ScheduleHazardRecognizer::atIssueLimit));
+}
+
+ScheduleHazardRecognizer::HazardType
+MultiHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ for (auto &R : Recognizers) {
+ auto res = R->getHazardType(SU, Stalls);
+ if (res != NoHazard)
+ return res;
+ }
+ return NoHazard;
+}
+
+void MultiHazardRecognizer::Reset() {
+ for (auto &R : Recognizers)
+ R->Reset();
+}
+
+void MultiHazardRecognizer::EmitInstruction(SUnit *SU) {
+ for (auto &R : Recognizers)
+ R->EmitInstruction(SU);
+}
+
+void MultiHazardRecognizer::EmitInstruction(MachineInstr *MI) {
+ for (auto &R : Recognizers)
+ R->EmitInstruction(MI);
+}
+
+unsigned MultiHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ auto MN = [=](unsigned a, std::unique_ptr<ScheduleHazardRecognizer> &R) {
+ return std::max(a, R->PreEmitNoops(SU));
+ };
+ return std::accumulate(Recognizers.begin(), Recognizers.end(), 0u, MN);
+}
+
+unsigned MultiHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+ auto MN = [=](unsigned a, std::unique_ptr<ScheduleHazardRecognizer> &R) {
+ return std::max(a, R->PreEmitNoops(MI));
+ };
+ return std::accumulate(Recognizers.begin(), Recognizers.end(), 0u, MN);
+}
+
+bool MultiHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ auto SPA = [=](std::unique_ptr<ScheduleHazardRecognizer> &R) {
+ return R->ShouldPreferAnother(SU);
+ };
+ return llvm::any_of(Recognizers, SPA);
+}
+
+void MultiHazardRecognizer::AdvanceCycle() {
+ for (auto &R : Recognizers)
+ R->AdvanceCycle();
+}
+
+void MultiHazardRecognizer::RecedeCycle() {
+ for (auto &R : Recognizers)
+ R->RecedeCycle();
+}
+
+void MultiHazardRecognizer::EmitNoop() {
+ for (auto &R : Recognizers)
+ R->EmitNoop();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
index 311b87fa9e3b..8148b64d8443 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -101,10 +101,10 @@ namespace {
// These functions are temporary abstractions around LiveVariables and
// LiveIntervals, so they can go away when LiveVariables does.
- bool isLiveIn(unsigned Reg, const MachineBasicBlock *MBB);
- bool isLiveOutPastPHIs(unsigned Reg, const MachineBasicBlock *MBB);
+ bool isLiveIn(Register Reg, const MachineBasicBlock *MBB);
+ bool isLiveOutPastPHIs(Register Reg, const MachineBasicBlock *MBB);
- using BBVRegPair = std::pair<unsigned, unsigned>;
+ using BBVRegPair = std::pair<unsigned, Register>;
using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
VRegPHIUse VRegPHIUseCount;
@@ -324,21 +324,43 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Increment use count of the newly created virtual register.
LV->setPHIJoin(IncomingReg);
- // When we are reusing the incoming register, it may already have been
- // killed in this block. The old kill will also have been inserted at
- // AfterPHIsIt, so it appears before the current PHICopy.
- if (reusedIncoming)
- if (MachineInstr *OldKill = VI.findKill(&MBB)) {
- LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill);
- LV->removeVirtualRegisterKilled(IncomingReg, *OldKill);
- LLVM_DEBUG(MBB.dump());
+ MachineInstr *OldKill = nullptr;
+ bool IsPHICopyAfterOldKill = false;
+
+ if (reusedIncoming && (OldKill = VI.findKill(&MBB))) {
+ // Calculate whether the PHICopy is after the OldKill.
+ // In general, the PHICopy is inserted as the first non-phi instruction
+ // by default, so it's before the OldKill. But some Target hooks for
+ // createPHIDestinationCopy() may modify the default insert position of
+ // PHICopy.
+ for (auto I = MBB.SkipPHIsAndLabels(MBB.begin()), E = MBB.end();
+ I != E; ++I) {
+ if (I == PHICopy)
+ break;
+
+ if (I == OldKill) {
+ IsPHICopyAfterOldKill = true;
+ break;
+ }
}
+ }
- // Add information to LiveVariables to know that the incoming value is
- // killed. Note that because the value is defined in several places (once
- // each for each incoming block), the "def" block and instruction fields
- // for the VarInfo is not filled in.
- LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);
+ // When we are reusing the incoming register and it has been marked killed
+ // by OldKill, if the PHICopy is after the OldKill, we should remove the
+ // killed flag from OldKill.
+ if (IsPHICopyAfterOldKill) {
+ LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+ LV->removeVirtualRegisterKilled(IncomingReg, *OldKill);
+ LLVM_DEBUG(MBB.dump());
+ }
+
+ // Add information to LiveVariables to know that the first used incoming
+ // value or the resued incoming value whose PHICopy is after the OldKIll
+ // is killed. Note that because the value is defined in several places
+ // (once each for each incoming block), the "def" block and instruction
+ // fields for the VarInfo is not filled in.
+ if (!OldKill || IsPHICopyAfterOldKill)
+ LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);
}
// Since we are going to be deleting the PHI node, if it is the last use of
@@ -372,8 +394,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
LiveInterval &DestLI = LIS->getInterval(DestReg);
- assert(DestLI.begin() != DestLI.end() &&
- "PHIs should have nonempty LiveIntervals.");
+ assert(!DestLI.empty() && "PHIs should have nonempty LiveIntervals.");
if (DestLI.endIndex().isDead()) {
// A dead PHI's live range begins and ends at the start of the MBB, but
// the lowered copy, which will still be dead, needs to begin and end at
@@ -420,6 +441,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (!MBBsInsertedInto.insert(&opBlock).second)
continue; // If the copy has already been emitted, we're done.
+ MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
+ if (SrcRegDef && TII->isUnspillableTerminator(SrcRegDef)) {
+ assert(SrcRegDef->getOperand(0).isReg() &&
+ SrcRegDef->getOperand(0).isDef() &&
+ "Expected operand 0 to be a reg def!");
+ // Now that the PHI's use has been removed (as the instruction was
+ // removed) there should be no other uses of the SrcReg.
+ assert(MRI->use_empty(SrcReg) &&
+ "Expected a single use from UnspillableTerminator");
+ SrcRegDef->getOperand(0).setReg(IncomingReg);
+ continue;
+ }
+
// Find a safe location to insert the copy, this may be the first terminator
// in the block (or end()).
MachineBasicBlock::iterator InsertPos =
@@ -670,7 +704,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
return Changed;
}
-bool PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock *MBB) {
+bool PHIElimination::isLiveIn(Register Reg, const MachineBasicBlock *MBB) {
assert((LV || LIS) &&
"isLiveIn() requires either LiveVariables or LiveIntervals");
if (LIS)
@@ -679,7 +713,7 @@ bool PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock *MBB) {
return LV->isLiveIn(Reg, *MBB);
}
-bool PHIElimination::isLiveOutPastPHIs(unsigned Reg,
+bool PHIElimination::isLiveOutPastPHIs(Register Reg,
const MachineBasicBlock *MBB) {
assert((LV || LIS) &&
"isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index 2a72717e711d..016335f420d3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -8,9 +8,9 @@
#include "PHIEliminationUtils.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+
using namespace llvm;
// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
index c19ed1f8f71d..849b667254bd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
@@ -28,6 +28,8 @@ static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
function_ref<std::unique_ptr<TargetMachine>()> TMFactory,
CodeGenFileType FileType) {
std::unique_ptr<TargetMachine> TM = TMFactory();
+ assert(TM && "Failed to create target machine!");
+
legacy::PassManager CodeGenPasses;
if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType))
report_fatal_error("Failed to setup codegen");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 4a66863ea803..34ac396c0471 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -178,6 +178,11 @@ namespace {
}
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
/// Track Def -> Use info used for rewriting copies.
using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
@@ -196,41 +201,39 @@ namespace {
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool optimizeRecurrence(MachineInstr &PHI);
bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
- bool isMoveImmediate(MachineInstr &MI,
- SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
- bool foldImmediate(MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs);
+ bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs);
/// Finds recurrence cycles, but only ones that formulated around
/// a def operand and a use operand that are tied. If there is a use
/// operand commutable with the tied use operand, find recurrence cycle
/// along that operand as well.
- bool findTargetRecurrence(unsigned Reg,
- const SmallSet<unsigned, 2> &TargetReg,
+ bool findTargetRecurrence(Register Reg,
+ const SmallSet<Register, 2> &TargetReg,
RecurrenceCycle &RC);
/// If copy instruction \p MI is a virtual register copy, track it in
- /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
- /// previously seen as a copy, replace the uses of this copy with the
- /// previously seen copy's destination register.
+ /// the set \p CopyMIs. If this virtual register was previously seen as a
+ /// copy, replace the uses of this copy with the previously seen copy's
+ /// destination register.
bool foldRedundantCopy(MachineInstr &MI,
- SmallSet<unsigned, 4> &CopySrcRegs,
- DenseMap<unsigned, MachineInstr *> &CopyMIs);
+ DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs);
/// Is the register \p Reg a non-allocatable physical register?
- bool isNAPhysCopy(unsigned Reg);
+ bool isNAPhysCopy(Register Reg);
/// If copy instruction \p MI is a non-allocatable virtual<->physical
/// register copy, track it in the \p NAPhysToVirtMIs map. If this
/// non-allocatable physical register was previously copied to a virtual
/// registered and hasn't been clobbered, the virt->phys copy can be
/// deleted.
- bool foldRedundantNAPhysCopy(MachineInstr &MI,
- DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs);
+ bool foldRedundantNAPhysCopy(
+ MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
bool isLoadFoldable(MachineInstr &MI,
- SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
+ SmallSet<Register, 16> &FoldAsLoadDefCandidates);
/// Check whether \p MI is understood by the register coalescer
/// but may require some rewriting.
@@ -291,7 +294,7 @@ namespace {
public:
ValueTrackerResult() = default;
- ValueTrackerResult(unsigned Reg, unsigned SubReg) {
+ ValueTrackerResult(Register Reg, unsigned SubReg) {
addSource(Reg, SubReg);
}
@@ -305,11 +308,11 @@ namespace {
Inst = nullptr;
}
- void addSource(unsigned SrcReg, unsigned SrcSubReg) {
+ void addSource(Register SrcReg, unsigned SrcSubReg) {
RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
}
- void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
+ void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
assert(Idx < getNumSources() && "Reg pair source out of index");
RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
}
@@ -320,7 +323,7 @@ namespace {
return RegSrcs[Idx];
}
- unsigned getSrcReg(int Idx) const {
+ Register getSrcReg(int Idx) const {
assert(Idx < getNumSources() && "Reg source out of index");
return RegSrcs[Idx].Reg;
}
@@ -330,7 +333,7 @@ namespace {
return RegSrcs[Idx].SubReg;
}
- bool operator==(const ValueTrackerResult &Other) {
+ bool operator==(const ValueTrackerResult &Other) const {
if (Other.getInst() != getInst())
return false;
@@ -373,7 +376,7 @@ namespace {
unsigned DefSubReg;
/// The register where the value can be found.
- unsigned Reg;
+ Register Reg;
/// MachineRegisterInfo used to perform tracking.
const MachineRegisterInfo &MRI;
@@ -415,11 +418,11 @@ namespace {
/// Indeed, when \p Reg is a physical register that constructor does not
/// know which definition of \p Reg it should track.
/// Use the next constructor to track a physical register.
- ValueTracker(unsigned Reg, unsigned DefSubReg,
+ ValueTracker(Register Reg, unsigned DefSubReg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII = nullptr)
: DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
- if (!Register::isPhysicalRegister(Reg)) {
+ if (!Reg.isPhysical()) {
Def = MRI.getVRegDef(Reg);
DefIdx = MRI.def_begin(Reg).getOperandNo();
}
@@ -824,7 +827,7 @@ public:
/// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
/// \return True if the rewriting was possible, false otherwise.
- virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) = 0;
+ virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
};
/// Rewriter for COPY instructions.
@@ -852,7 +855,7 @@ public:
return true;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
if (CurrentSrcIdx != 1)
return false;
MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
@@ -897,7 +900,7 @@ public:
return true;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
return false;
}
};
@@ -941,7 +944,7 @@ public:
return true;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
if (CurrentSrcIdx != 2)
return false;
// We are rewriting the inserted reg.
@@ -988,7 +991,7 @@ public:
return true;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
// The only source we can rewrite is the input register.
if (CurrentSrcIdx != 1)
return false;
@@ -1066,7 +1069,7 @@ public:
return MODef.getSubReg() == 0;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
// We cannot rewrite out of bound operands.
// Moreover, rewritable sources are at odd positions.
if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
@@ -1312,7 +1315,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
/// We only fold loads to virtual registers and the virtual register defined
/// has a single user.
bool PeepholeOptimizer::isLoadFoldable(
- MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
+ MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
if (!MI.canFoldAsLoad() || !MI.mayLoad())
return false;
const MCInstrDesc &MCID = MI.getDesc();
@@ -1323,7 +1326,7 @@ bool PeepholeOptimizer::isLoadFoldable(
// To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
// loads. It should be checked when processing uses of the load, since
// uses can be removed during peephole.
- if (!MI.getOperand(0).getSubReg() && Register::isVirtualRegister(Reg) &&
+ if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
MRI->hasOneNonDBGUser(Reg)) {
FoldAsLoadDefCandidates.insert(Reg);
return true;
@@ -1332,15 +1335,15 @@ bool PeepholeOptimizer::isLoadFoldable(
}
bool PeepholeOptimizer::isMoveImmediate(
- MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
+ MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs) {
const MCInstrDesc &MCID = MI.getDesc();
if (!MI.isMoveImmediate())
return false;
if (MCID.getNumDefs() != 1)
return false;
Register Reg = MI.getOperand(0).getReg();
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
ImmDefMIs.insert(std::make_pair(Reg, &MI));
ImmDefRegs.insert(Reg);
return true;
@@ -1352,22 +1355,19 @@ bool PeepholeOptimizer::isMoveImmediate(
/// Try folding register operands that are defined by move immediate
/// instructions, i.e. a trivial constant folding optimization, if
/// and only if the def and use are in the same BB.
-bool PeepholeOptimizer::foldImmediate(MachineInstr &MI,
- SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
+bool PeepholeOptimizer::foldImmediate(
+ MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs) {
for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isDef())
continue;
- // Ignore dead implicit defs.
- if (MO.isImplicit() && MO.isDead())
- continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (ImmDefRegs.count(Reg) == 0)
continue;
- DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg);
assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) {
++NumImmFold;
@@ -1391,33 +1391,30 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr &MI,
// %2 = COPY %0:sub1
//
// Should replace %2 uses with %1:sub1
-bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
- SmallSet<unsigned, 4> &CopySrcRegs,
- DenseMap<unsigned, MachineInstr *> &CopyMIs) {
+bool PeepholeOptimizer::foldRedundantCopy(
+ MachineInstr &MI, DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs) {
assert(MI.isCopy() && "expected a COPY machine instruction");
Register SrcReg = MI.getOperand(1).getReg();
- if (!Register::isVirtualRegister(SrcReg))
+ unsigned SrcSubReg = MI.getOperand(1).getSubReg();
+ if (!SrcReg.isVirtual())
return false;
Register DstReg = MI.getOperand(0).getReg();
- if (!Register::isVirtualRegister(DstReg))
+ if (!DstReg.isVirtual())
return false;
- if (CopySrcRegs.insert(SrcReg).second) {
+ RegSubRegPair SrcPair(SrcReg, SrcSubReg);
+
+ if (CopyMIs.insert(std::make_pair(SrcPair, &MI)).second) {
// First copy of this reg seen.
- CopyMIs.insert(std::make_pair(SrcReg, &MI));
return false;
}
- MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
+ MachineInstr *PrevCopy = CopyMIs.find(SrcPair)->second;
- unsigned SrcSubReg = MI.getOperand(1).getSubReg();
- unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
-
- // Can't replace different subregister extracts.
- if (SrcSubReg != PrevSrcSubReg)
- return false;
+ assert(SrcSubReg == PrevCopy->getOperand(1).getSubReg() &&
+ "Unexpected mismatching subreg!");
Register PrevDstReg = PrevCopy->getOperand(0).getReg();
@@ -1435,12 +1432,12 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
return true;
}
-bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
- return Register::isPhysicalRegister(Reg) && !MRI->isAllocatable(Reg);
+bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
+ return Reg.isPhysical() && !MRI->isAllocatable(Reg);
}
bool PeepholeOptimizer::foldRedundantNAPhysCopy(
- MachineInstr &MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) {
+ MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
assert(MI.isCopy() && "expected a COPY machine instruction");
if (DisableNAPhysCopyOpt)
@@ -1449,17 +1446,17 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) {
- // %vreg = COPY %physreg
+ // %vreg = COPY $physreg
// Avoid using a datastructure which can track multiple live non-allocatable
// phys->virt copies since LLVM doesn't seem to do this.
NAPhysToVirtMIs.insert({SrcReg, &MI});
return false;
}
- if (!(Register::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
+ if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
return false;
- // %physreg = COPY %vreg
+ // $physreg = COPY %vreg
auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
if (PrevCopy == NAPhysToVirtMIs.end()) {
// We can't remove the copy: there was an intervening clobber of the
@@ -1489,13 +1486,11 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
/// \bried Returns true if \p MO is a virtual register operand.
static bool isVirtualRegisterOperand(MachineOperand &MO) {
- if (!MO.isReg())
- return false;
- return Register::isVirtualRegister(MO.getReg());
+ return MO.isReg() && MO.getReg().isVirtual();
}
bool PeepholeOptimizer::findTargetRecurrence(
- unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
+ Register Reg, const SmallSet<Register, 2> &TargetRegs,
RecurrenceCycle &RC) {
// Recurrence found if Reg is in TargetRegs.
if (TargetRegs.count(Reg))
@@ -1566,7 +1561,7 @@ bool PeepholeOptimizer::findTargetRecurrence(
/// %1 of ADD instruction, the redundant move instruction can be
/// avoided.
bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
- SmallSet<unsigned, 2> TargetRegs;
+ SmallSet<Register, 2> TargetRegs;
for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
MachineOperand &MO = PHI.getOperand(Idx);
assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
@@ -1622,20 +1617,20 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// during the scan, if a MI is not in the set, it is assumed to be located
// after. Newly created MIs have to be inserted in the set as well.
SmallPtrSet<MachineInstr*, 16> LocalMIs;
- SmallSet<unsigned, 4> ImmDefRegs;
- DenseMap<unsigned, MachineInstr*> ImmDefMIs;
- SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
+ SmallSet<Register, 4> ImmDefRegs;
+ DenseMap<Register, MachineInstr *> ImmDefMIs;
+ SmallSet<Register, 16> FoldAsLoadDefCandidates;
// Track when a non-allocatable physical register is copied to a virtual
// register so that useless moves can be removed.
//
- // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg`
- // without any intervening re-definition of %physreg.
- DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
+ // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
+ // without any intervening re-definition of $physreg.
+ DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
- // Set of virtual registers that are copied from.
- SmallSet<unsigned, 4> CopySrcRegs;
- DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+ // Set of pairs of virtual registers and their subregs that are copied
+ // from.
+ DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
bool IsLoopHeader = MLI->isLoopHeader(&MBB);
@@ -1646,9 +1641,10 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
++MII;
LocalMIs.insert(MI);
- // Skip debug instructions. They should not affect this peephole optimization.
+ // Skip debug instructions. They should not affect this peephole
+ // optimization.
if (MI->isDebugInstr())
- continue;
+ continue;
if (MI->isPosition())
continue;
@@ -1678,7 +1674,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
} else if (MO.isRegMask()) {
const uint32_t *RegMask = MO.getRegMask();
for (auto &RegMI : NAPhysToVirtMIs) {
- unsigned Def = RegMI.first;
+ Register Def = RegMI.first;
if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
LLVM_DEBUG(dbgs()
<< "NAPhysCopy: invalidating because of " << *MI);
@@ -1723,9 +1719,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- if (MI->isCopy() &&
- (foldRedundantCopy(*MI, CopySrcRegs, CopySrcMIs) ||
- foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
+ if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs) ||
+ foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
LocalMIs.erase(MI);
LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
MI->eraseFromParent();
@@ -1763,13 +1758,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &MOp = MI->getOperand(i);
if (!MOp.isReg())
continue;
- unsigned FoldAsLoadDefReg = MOp.getReg();
+ Register FoldAsLoadDefReg = MOp.getReg();
if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
// We need to fold load after optimizeCmpInstr, since
// optimizeCmpInstr can enable folding by converting SUB to CMP.
// Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
// we need it for markUsesInDebugValueAsUndef().
- unsigned FoldedReg = FoldAsLoadDefReg;
+ Register FoldedReg = FoldAsLoadDefReg;
MachineInstr *DefMI = nullptr;
if (MachineInstr *FoldMI =
TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 4f88f4d3dd6a..82ed386db827 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -82,11 +82,9 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
for (MachineInstr &MI : MBB) {
// If we need to emit noops prior to this instruction, then do so.
unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
- for (unsigned i = 0; i != NumPreNoops; ++i) {
- HazardRec->EmitNoop();
- TII->insertNoop(MBB, MachineBasicBlock::iterator(MI));
- ++NumNoops;
- }
+ HazardRec->EmitNoops(NumPreNoops);
+ TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops);
+ NumNoops += NumPreNoops;
HazardRec->EmitInstruction(&MI);
if (HazardRec->atIssueLimit()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 1be9544848ec..80c38f3ec341 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -96,7 +96,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
++I;
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
- SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
+ SmallVector<Value *, 8> Args(CI->args());
CallInst *NewCI = Builder.CreateCall(FCache, Args);
NewCI->setName(CI->getName());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index a489f493d5ee..378aaba2a65f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -620,12 +620,12 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
if (!MFI.hasCalls())
NumLeafFuncWithSpills++;
- for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
insertCSRSaves(*SaveBlock, CSI);
- // Update the live-in information of all the blocks up to the save
- // point.
- updateLiveness(MF);
- }
+
+ // Update the live-in information of all the blocks up to the save point.
+ updateLiveness(MF);
+
for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
insertCSRRestores(*RestoreBlock, CSI);
}
@@ -1077,7 +1077,26 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
+ int64_t OffsetBeforeAlignment = Offset;
Offset = alignTo(Offset, StackAlign, Skew);
+
+ // If we have increased the offset to fulfill the alignment constrants,
+ // then the scavenging spill slots may become harder to reach from the
+ // stack pointer, float them so they stay close.
+ if (OffsetBeforeAlignment != Offset && RS && !EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs()
+ << "Adjusting emergency spill slots!\n";);
+ int64_t Delta = Offset - OffsetBeforeAlignment;
+ for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end();
+ I != IE; ++I) {
+ LLVM_DEBUG(llvm::dbgs() << "Adjusting offset of emergency spill slot #"
+ << *I << " from " << MFI.getObjectOffset(*I););
+ MFI.setObjectOffset(*I, MFI.getObjectOffset(*I) - Delta);
+ LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(*I) << "\n";);
+ }
+ }
}
// Update frame info to pretend that this is part of the stack...
@@ -1209,7 +1228,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
unsigned FrameIdx = MI.getOperand(0).getIndex();
unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
- int64_t Offset =
+ StackOffset Offset =
TFI->getFrameIndexReference(MF, FrameIdx, Reg);
MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
MI.getOperand(0).setIsDebug();
@@ -1236,7 +1255,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
// Make the DBG_VALUE direct.
MI.getDebugOffset().ChangeToRegister(0, false);
}
- DIExpr = DIExpression::prepend(DIExpr, PrependFlags, Offset);
+
+ DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset);
MI.getDebugExpressionOp().setMetadata(DIExpr);
continue;
}
@@ -1252,9 +1272,11 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
"DBG_VALUE machine instruction");
Register Reg;
MachineOperand &Offset = MI.getOperand(i + 1);
- int refOffset = TFI->getFrameIndexReferencePreferSP(
+ StackOffset refOffset = TFI->getFrameIndexReferencePreferSP(
MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
- Offset.setImm(Offset.getImm() + refOffset + SPAdj);
+ assert(!refOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj);
MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
new file mode 100644
index 000000000000..9c716a5a37ea
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -0,0 +1,95 @@
+//===- PseudoProbeInserter.cpp - Insert annotation for callsite profiling -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements PseudoProbeInserter pass, which inserts pseudo probe
+// annotations for call instructions with a pseudo-probe-specific dwarf
+// discriminator. such discriminator indicates that the call instruction comes
+// with a pseudo probe, and the discriminator value holds information to
+// identify the corresponding counter.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include <unordered_map>
+
+#define DEBUG_TYPE "pseudo-probe-inserter"
+
+using namespace llvm;
+
+namespace {
+class PseudoProbeInserter : public MachineFunctionPass {
+public:
+ static char ID;
+
+ PseudoProbeInserter() : MachineFunctionPass(ID) {
+ initializePseudoProbeInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Pseudo Probe Inserter"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.isCall()) {
+ if (DILocation *DL = MI.getDebugLoc()) {
+ auto Value = DL->getDiscriminator();
+ if (DILocation::isPseudoProbeDiscriminator(Value)) {
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::PSEUDO_PROBE))
+ .addImm(getFuncGUID(MF.getFunction().getParent(), DL))
+ .addImm(
+ PseudoProbeDwarfDiscriminator::extractProbeIndex(Value))
+ .addImm(
+ PseudoProbeDwarfDiscriminator::extractProbeType(Value))
+ .addImm(PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+ Value));
+ Changed = true;
+ }
+ }
+ }
+ }
+ }
+
+ return Changed;
+ }
+
+private:
+ uint64_t getFuncGUID(Module *M, DILocation *DL) {
+ auto *SP = DL->getScope()->getSubprogram();
+ auto Name = SP->getLinkageName();
+ if (Name.empty())
+ Name = SP->getName();
+ return Function::getGUID(Name);
+ }
+};
+} // namespace
+
+char PseudoProbeInserter::ID = 0;
+INITIALIZE_PASS_BEGIN(PseudoProbeInserter, DEBUG_TYPE,
+ "Insert pseudo probe annotations for value profiling",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(PseudoProbeInserter, DEBUG_TYPE,
+ "Insert pseudo probe annotations for value profiling",
+ false, false)
+
+FunctionPass *llvm::createPseudoProbeInserter() {
+ return new PseudoProbeInserter();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
index 437a6b030096..cebb902f0a4a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
@@ -984,11 +984,6 @@ RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
LaneBitmask M = AR.Mask & BR.Mask;
return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef();
}
-#ifndef NDEBUG
-// RegisterRef NAR = PRI.normalize(AR);
-// RegisterRef NBR = PRI.normalize(BR);
-// assert(NAR.Reg != NBR.Reg);
-#endif
// This isn't strictly correct, because the overlap may happen in the
// part masked out.
if (PRI.alias(AR, BR))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
index 0bcd27f8ea45..76bf0c280970 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -23,8 +23,10 @@
// <10.1145/2086696.2086706>. <hal-00647369>
//
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -45,6 +47,7 @@
#include <cstdint>
#include <iterator>
#include <map>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -108,7 +111,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
const RegisterAggr &DefRRs) {
NodeList RDefs; // Return value.
SetVector<NodeId> DefQ;
- SetVector<NodeId> Owners;
+ DenseMap<MachineInstr*, uint32_t> OrdMap;
// Dead defs will be treated as if they were live, since they are actually
// on the data-flow path. They cannot be ignored because even though they
@@ -151,18 +154,9 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
DefQ.insert(RD);
- }
-
- // Remove all non-phi defs that are not aliased to RefRR, and collect
- // the owners of the remaining defs.
- SetVector<NodeId> Defs;
- for (NodeId N : DefQ) {
- auto TA = DFG.addr<DefNode*>(N);
- bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
- if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
- continue;
- Defs.insert(TA.Id);
- Owners.insert(TA.Addr->getOwner(DFG).Id);
+ // Don't visit sibling defs. They share the same reaching def (which
+ // will be visited anyway), but they define something not aliased to
+ // this ref.
}
// Return the MachineBasicBlock containing a given instruction.
@@ -174,38 +168,80 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG);
return BA.Addr->getCode();
};
- // Less(A,B) iff instruction A is further down in the dominator tree than B.
- auto Less = [&Block,this] (NodeId A, NodeId B) -> bool {
+
+ SmallSet<NodeId,32> Defs;
+
+ // Remove all non-phi defs that are not aliased to RefRR, and segregate
+ // the the remaining defs into buckets for containing blocks.
+ std::map<NodeId, NodeAddr<InstrNode*>> Owners;
+ std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks;
+ for (NodeId N : DefQ) {
+ auto TA = DFG.addr<DefNode*>(N);
+ bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
+ if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
+ continue;
+ Defs.insert(TA.Id);
+ NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG);
+ Owners[TA.Id] = IA;
+ Blocks[Block(IA)].push_back(IA.Id);
+ }
+
+ auto Precedes = [this,&OrdMap] (NodeId A, NodeId B) {
if (A == B)
return false;
- auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B);
- MachineBasicBlock *BA = Block(OA), *BB = Block(OB);
- if (BA != BB)
- return MDT.dominates(BB, BA);
- // They are in the same block.
+ NodeAddr<InstrNode*> OA = DFG.addr<InstrNode*>(A);
+ NodeAddr<InstrNode*> OB = DFG.addr<InstrNode*>(B);
bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt;
bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt;
- if (StmtA) {
- if (!StmtB) // OB is a phi and phis dominate statements.
- return true;
- MachineInstr *CA = NodeAddr<StmtNode*>(OA).Addr->getCode();
- MachineInstr *CB = NodeAddr<StmtNode*>(OB).Addr->getCode();
- // The order must be linear, so tie-break such equalities.
- if (CA == CB)
- return A < B;
- return MDT.dominates(CB, CA);
- } else {
- // OA is a phi.
- if (StmtB)
- return false;
- // Both are phis. There is no ordering between phis (in terms of
- // the data-flow), so tie-break this via node id comparison.
+ if (StmtA && StmtB) {
+ const MachineInstr *InA = NodeAddr<StmtNode*>(OA).Addr->getCode();
+ const MachineInstr *InB = NodeAddr<StmtNode*>(OB).Addr->getCode();
+ assert(InA->getParent() == InB->getParent());
+ auto FA = OrdMap.find(InA);
+ if (FA != OrdMap.end())
+ return FA->second < OrdMap.find(InB)->second;
+ const MachineBasicBlock *BB = InA->getParent();
+ for (auto It = BB->begin(), E = BB->end(); It != E; ++It) {
+ if (It == InA->getIterator())
+ return true;
+ if (It == InB->getIterator())
+ return false;
+ }
+ llvm_unreachable("InA and InB should be in the same block");
+ }
+ // One of them is a phi node.
+ if (!StmtA && !StmtB) {
+ // Both are phis, which are unordered. Break the tie by id numbers.
return A < B;
}
+ // Only one of them is a phi. Phis always precede statements.
+ return !StmtA;
};
- std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
- llvm::sort(Tmp, Less);
+ auto GetOrder = [&OrdMap] (MachineBasicBlock &B) {
+ uint32_t Pos = 0;
+ for (MachineInstr &In : B)
+ OrdMap.insert({&In, ++Pos});
+ };
+
+ // For each block, sort the nodes in it.
+ std::vector<MachineBasicBlock*> TmpBB;
+ for (auto &Bucket : Blocks) {
+ TmpBB.push_back(Bucket.first);
+ if (Bucket.second.size() > 2)
+ GetOrder(*Bucket.first);
+ llvm::sort(Bucket.second, Precedes);
+ }
+
+ // Sort the blocks with respect to dominance.
+ llvm::sort(TmpBB,
+ [this](auto A, auto B) { return MDT.properlyDominates(A, B); });
+
+ std::vector<NodeId> TmpInst;
+ for (auto I = TmpBB.rbegin(), E = TmpBB.rend(); I != E; ++I) {
+ auto &Bucket = Blocks[*I];
+ TmpInst.insert(TmpInst.end(), Bucket.rbegin(), Bucket.rend());
+ }
// The vector is a list of instructions, so that defs coming from
// the same instruction don't need to be artificially ordered.
@@ -220,6 +256,9 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be
// covered if we added A first, and A would be covered
// if we added B first.
+ // In this example we want both A and B, because we don't want to give
+ // either one priority over the other, since they belong to the same
+ // statement.
RegisterAggr RRs(DefRRs);
@@ -227,7 +266,8 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
return TA.Addr->getKind() == NodeAttrs::Def &&
Defs.count(TA.Id);
};
- for (NodeId T : Tmp) {
+
+ for (NodeId T : TmpInst) {
if (!FullChain && RRs.hasCoverOf(RefRR))
break;
auto TA = DFG.addr<InstrNode*>(T);
@@ -246,7 +286,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
if (FullChain || IsPhi || !RRs.hasCoverOf(QR))
Ds.push_back(DA);
}
- RDefs.insert(RDefs.end(), Ds.begin(), Ds.end());
+ llvm::append_range(RDefs, Ds);
for (NodeAddr<DefNode*> DA : Ds) {
// When collecting a full chain of definitions, do not consider phi
// defs to actually define a register.
@@ -260,7 +300,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
auto DeadP = [](const NodeAddr<DefNode*> DA) -> bool {
return DA.Addr->getFlags() & NodeAttrs::Dead;
};
- RDefs.resize(std::distance(RDefs.begin(), llvm::remove_if(RDefs, DeadP)));
+ llvm::erase_if(RDefs, DeadP);
return RDefs;
}
@@ -430,13 +470,13 @@ void Liveness::computePhiInfo() {
NodeList Blocks = FA.Addr->members(DFG);
for (NodeAddr<BlockNode*> BA : Blocks) {
auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
- Phis.insert(Phis.end(), Ps.begin(), Ps.end());
+ llvm::append_range(Phis, Ps);
}
// phi use -> (map: reaching phi -> set of registers defined in between)
std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp;
std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
- std::map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it.
+ std::unordered_map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it.
// Go over all phis.
for (NodeAddr<PhiNode*> PhiA : Phis) {
@@ -474,7 +514,7 @@ void Liveness::computePhiInfo() {
NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
uint16_t F = A.Addr->getFlags();
if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) {
- RegisterRef R = PRI.normalize(A.Addr->getRegRef(DFG));
+ RegisterRef R = A.Addr->getRegRef(DFG);
RealUses[R.Reg].insert({A.Id,R.Mask});
}
UN = A.Addr->getSibling();
@@ -612,6 +652,23 @@ void Liveness::computePhiInfo() {
// is covered, or until reaching the final phi. Only assume that the
// reference reaches the phi in the latter case.
+ // The operation "clearIn" can be expensive. For a given set of intervening
+ // defs, cache the result of subtracting these defs from a given register
+ // ref.
+ using SubMap = std::unordered_map<RegisterRef, RegisterRef>;
+ std::unordered_map<RegisterAggr, SubMap> Subs;
+ auto ClearIn = [] (RegisterRef RR, const RegisterAggr &Mid, SubMap &SM) {
+ if (Mid.empty())
+ return RR;
+ auto F = SM.find(RR);
+ if (F != SM.end())
+ return F->second;
+ RegisterRef S = Mid.clearIn(RR);
+ SM.insert({RR, S});
+ return S;
+ };
+
+ // Go over all phis.
for (unsigned i = 0; i < PhiUQ.size(); ++i) {
auto PA = DFG.addr<PhiNode*>(PhiUQ[i]);
NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG);
@@ -619,17 +676,17 @@ void Liveness::computePhiInfo() {
for (NodeAddr<UseNode*> UA : PUs) {
std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id];
- RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG));
+ RegisterRef UR = UA.Addr->getRegRef(DFG);
for (const std::pair<const NodeId, RegisterAggr> &P : PUM) {
bool Changed = false;
const RegisterAggr &MidDefs = P.second;
-
// Collect the set PropUp of uses that are reached by the current
// phi PA, and are not covered by any intervening def between the
// currently visited use UA and the upward phi P.
if (MidDefs.hasCoverOf(UR))
continue;
+ SubMap &SM = Subs[MidDefs];
// General algorithm:
// for each (R,U) : U is use node of R, U is reached by PA
@@ -649,7 +706,7 @@ void Liveness::computePhiInfo() {
LaneBitmask M = R.Mask & V.second;
if (M.none())
continue;
- if (RegisterRef SS = MidDefs.clearIn(RegisterRef(R.Reg, M))) {
+ if (RegisterRef SS = ClearIn(RegisterRef(R.Reg, M), MidDefs, SM)) {
NodeRefSet &RS = RealUseMap[P.first][SS.Reg];
Changed |= RS.insert({V.first,SS.Mask}).second;
}
@@ -1073,7 +1130,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
if (UA.Addr->getFlags() & NodeAttrs::Undef)
continue;
- RegisterRef RR = PRI.normalize(UA.Addr->getRegRef(DFG));
+ RegisterRef RR = UA.Addr->getRegRef(DFG);
for (NodeAddr<DefNode*> D : getAllReachingDefs(UA))
if (getBlockWithRef(D.Id) != B)
LiveIn[RR.Reg].insert({D.Id,RR.Mask});
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
index bd8661816e71..8760ba118934 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
@@ -84,18 +84,23 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) {
BitVector PU(TRI.getNumRegUnits());
const uint32_t *MB = RegMasks.get(M);
- for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
- if (!(MB[i/32] & (1u << (i%32))))
+ for (unsigned I = 1, E = TRI.getNumRegs(); I != E; ++I) {
+ if (!(MB[I / 32] & (1u << (I % 32))))
continue;
- for (MCRegUnitIterator U(i, &TRI); U.isValid(); ++U)
+ for (MCRegUnitIterator U(MCRegister::from(I), &TRI); U.isValid(); ++U)
PU.set(*U);
}
MaskInfos[M].Units = PU.flip();
}
-}
-RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const {
- return RR;
+ AliasInfos.resize(TRI.getNumRegUnits());
+ for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) {
+ BitVector AS(TRI.getNumRegs());
+ for (MCRegUnitRootIterator R(U, &TRI); R.isValid(); ++R)
+ for (MCSuperRegIterator S(*R, &TRI, true); S.isValid(); ++S)
+ AS.set(*S);
+ AliasInfos[U].Regs = AS;
+ }
}
std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
@@ -321,26 +326,17 @@ RegisterRef RegisterAggr::makeRegRef() const {
if (U < 0)
return RegisterRef();
- auto AliasedRegs = [this] (uint32_t Unit, BitVector &Regs) {
- for (MCRegUnitRootIterator R(Unit, &PRI.getTRI()); R.isValid(); ++R)
- for (MCSuperRegIterator S(*R, &PRI.getTRI(), true); S.isValid(); ++S)
- Regs.set(*S);
- };
-
// Find the set of all registers that are aliased to all the units
// in this aggregate.
// Get all the registers aliased to the first unit in the bit vector.
- BitVector Regs(PRI.getTRI().getNumRegs());
- AliasedRegs(U, Regs);
+ BitVector Regs = PRI.getUnitAliases(U);
U = Units.find_next(U);
// For each other unit, intersect it with the set of all registers
// aliased that unit.
while (U >= 0) {
- BitVector AR(PRI.getTRI().getNumRegs());
- AliasedRegs(U, AR);
- Regs &= AR;
+ Regs &= PRI.getUnitAliases(U);
U = Units.find_next(U);
}
@@ -378,3 +374,8 @@ RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG,
Pos = End ? Masks.end() : Masks.begin();
Index = End ? Masks.size() : 0;
}
+
+raw_ostream &rdf::operator<<(raw_ostream &OS, const RegisterAggr &A) {
+ A.print(OS);
+ return OS;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 5bd8b4b8e27f..d16e90a7e0b4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -29,7 +29,7 @@ static bool isValidRegUse(const MachineOperand &MO) {
return isValidReg(MO) && MO.isUse();
}
-static bool isValidRegUseOf(const MachineOperand &MO, int PhysReg) {
+static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg) {
return isValidRegUse(MO) && MO.getReg() == PhysReg;
}
@@ -37,7 +37,7 @@ static bool isValidRegDef(const MachineOperand &MO) {
return isValidReg(MO) && MO.isDef();
}
-static bool isValidRegDefOf(const MachineOperand &MO, int PhysReg) {
+static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg) {
return isValidRegDef(MO) && MO.getReg() == PhysReg;
}
@@ -121,7 +121,8 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
for (auto &MO : MI->operands()) {
if (!isValidRegDef(MO))
continue;
- for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) {
+ for (MCRegUnitIterator Unit(MO.getReg().asMCReg(), TRI); Unit.isValid();
+ ++Unit) {
// This instruction explicitly defines the current reg unit.
LLVM_DEBUG(dbgs() << printReg(*Unit, TRI) << ":\t" << CurInstr
<< '\t' << *MI);
@@ -143,10 +144,9 @@ void ReachingDefAnalysis::reprocessBasicBlock(MachineBasicBlock *MBB) {
"Unexpected basic block number.");
// Count number of non-debug instructions for end of block adjustment.
- int NumInsts = 0;
- for (const MachineInstr &MI : *MBB)
- if (!MI.isDebugInstr())
- NumInsts++;
+ auto NonDbgInsts =
+ instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end());
+ int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
// When reprocessing a block, the only thing we need to do is check whether
// there is now a more recent incoming reaching definition from a predecessor.
@@ -197,10 +197,9 @@ void ReachingDefAnalysis::processBasicBlock(
}
enterBasicBlock(MBB);
- for (MachineInstr &MI : *MBB) {
- if (!MI.isDebugInstr())
- processDefs(&MI);
- }
+ for (MachineInstr &MI :
+ instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end()))
+ processDefs(&MI);
leaveBasicBlock(MBB);
}
@@ -254,7 +253,8 @@ void ReachingDefAnalysis::traverse() {
#endif
}
-int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const {
+int ReachingDefAnalysis::getReachingDef(MachineInstr *MI,
+ MCRegister PhysReg) const {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
int InstId = InstIds.lookup(MI);
int DefRes = ReachingDefDefaultVal;
@@ -273,13 +273,16 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const {
return LatestDef;
}
-MachineInstr* ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI,
- int PhysReg) const {
- return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg));
+MachineInstr *
+ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ return hasLocalDefBefore(MI, PhysReg)
+ ? getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg))
+ : nullptr;
}
bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
- int PhysReg) const {
+ MCRegister PhysReg) const {
MachineBasicBlock *ParentA = A->getParent();
MachineBasicBlock *ParentB = B->getParent();
if (ParentA != ParentB)
@@ -307,18 +310,19 @@ MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
return nullptr;
}
-int
-ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) const {
+int ReachingDefAnalysis::getClearance(MachineInstr *MI,
+ MCRegister PhysReg) const {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
return InstIds.lookup(MI) - getReachingDef(MI, PhysReg);
}
-bool
-ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI, int PhysReg) const {
+bool ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI,
+ MCRegister PhysReg) const {
return getReachingDef(MI, PhysReg) >= 0;
}
-void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg,
+void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,
+ MCRegister PhysReg,
InstSet &Uses) const {
MachineBasicBlock *MBB = Def->getParent();
MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def);
@@ -342,12 +346,11 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg,
}
}
-bool
-ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg,
- InstSet &Uses) const {
- for (auto &MI : *MBB) {
- if (MI.isDebugInstr())
- continue;
+bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB,
+ MCRegister PhysReg,
+ InstSet &Uses) const {
+ for (MachineInstr &MI :
+ instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) {
for (auto &MO : MI.operands()) {
if (!isValidRegUseOf(MO, PhysReg))
continue;
@@ -356,12 +359,14 @@ ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg,
Uses.insert(&MI);
}
}
- return isReachingDefLiveOut(&MBB->back(), PhysReg);
+ auto Last = MBB->getLastNonDebugInstr();
+ if (Last == MBB->end())
+ return true;
+ return isReachingDefLiveOut(&*Last, PhysReg);
}
-void
-ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg,
- InstSet &Uses) const {
+void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,
+ InstSet &Uses) const {
MachineBasicBlock *MBB = MI->getParent();
// Collect the uses that each def touches within the block.
@@ -372,9 +377,7 @@ ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg,
if (LiveOut != MI)
return;
- SmallVector<MachineBasicBlock*, 4> ToVisit;
- ToVisit.insert(ToVisit.begin(), MBB->successors().begin(),
- MBB->successors().end());
+ SmallVector<MachineBasicBlock *, 4> ToVisit(MBB->successors());
SmallPtrSet<MachineBasicBlock*, 4>Visited;
while (!ToVisit.empty()) {
MachineBasicBlock *MBB = ToVisit.back();
@@ -382,22 +385,33 @@ ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg,
if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg))
continue;
if (getLiveInUses(MBB, PhysReg, Uses))
- ToVisit.insert(ToVisit.end(), MBB->successors().begin(),
- MBB->successors().end());
+ llvm::append_range(ToVisit, MBB->successors());
Visited.insert(MBB);
}
}
}
-void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg,
- InstSet &Defs) const {
+void ReachingDefAnalysis::getGlobalReachingDefs(MachineInstr *MI,
+ MCRegister PhysReg,
+ InstSet &Defs) const {
+ if (auto *Def = getUniqueReachingMIDef(MI, PhysReg)) {
+ Defs.insert(Def);
+ return;
+ }
+
+ for (auto *MBB : MI->getParent()->predecessors())
+ getLiveOuts(MBB, PhysReg, Defs);
+}
+
+void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
+ MCRegister PhysReg, InstSet &Defs) const {
SmallPtrSet<MachineBasicBlock*, 2> VisitedBBs;
getLiveOuts(MBB, PhysReg, Defs, VisitedBBs);
}
-void
-ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg,
- InstSet &Defs, BlockSet &VisitedBBs) const {
+void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
+ MCRegister PhysReg, InstSet &Defs,
+ BlockSet &VisitedBBs) const {
if (VisitedBBs.count(MBB))
return;
@@ -414,26 +428,25 @@ ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg,
getLiveOuts(Pred, PhysReg, Defs, VisitedBBs);
}
-MachineInstr *ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI,
- int PhysReg) const {
+MachineInstr *
+ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const {
// If there's a local def before MI, return it.
MachineInstr *LocalDef = getReachingLocalMIDef(MI, PhysReg);
if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI))
return LocalDef;
- SmallPtrSet<MachineBasicBlock*, 4> VisitedBBs;
SmallPtrSet<MachineInstr*, 2> Incoming;
- for (auto *Pred : MI->getParent()->predecessors())
- getLiveOuts(Pred, PhysReg, Incoming, VisitedBBs);
-
- // If we have a local def and an incoming instruction, then there's not a
- // unique instruction def.
- if (!Incoming.empty() && LocalDef)
- return nullptr;
- else if (Incoming.size() == 1)
+ MachineBasicBlock *Parent = MI->getParent();
+ for (auto *Pred : Parent->predecessors())
+ getLiveOuts(Pred, PhysReg, Incoming);
+
+ // Check that we have a single incoming value and that it does not
+ // come from the same block as MI - since it would mean that the def
+ // is executed after MI.
+ if (Incoming.size() == 1 && (*Incoming.begin())->getParent() != Parent)
return *Incoming.begin();
- else
- return LocalDef;
+ return nullptr;
}
MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
@@ -448,7 +461,8 @@ MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
return getUniqueReachingMIDef(MI, MO.getReg());
}
-bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const {
+bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
+ MCRegister PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
@@ -459,18 +473,21 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const {
// Walk backwards through the block to see if the register is live at some
// point.
- for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) {
- LiveRegs.stepBackward(*Last);
+ for (MachineInstr &Last :
+ instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) {
+ LiveRegs.stepBackward(Last);
if (LiveRegs.contains(PhysReg))
- return InstIds.lookup(&*Last) > InstIds.lookup(MI);
+ return InstIds.lookup(&Last) > InstIds.lookup(MI);
}
return false;
}
bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
- int PhysReg) const {
+ MCRegister PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
- if (getReachingDef(MI, PhysReg) != getReachingDef(&MBB->back(), PhysReg))
+ auto Last = MBB->getLastNonDebugInstr();
+ if (Last != MBB->end() &&
+ getReachingDef(MI, PhysReg) != getReachingDef(&*Last, PhysReg))
return true;
if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
@@ -479,17 +496,17 @@ bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
return false;
}
-bool
-ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const {
+bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
+ MCRegister PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
if (!LiveRegs.contains(PhysReg))
return false;
- MachineInstr *Last = &MBB->back();
+ auto Last = MBB->getLastNonDebugInstr();
int Def = getReachingDef(MI, PhysReg);
- if (getReachingDef(Last, PhysReg) != Def)
+ if (Last != MBB->end() && getReachingDef(&*Last, PhysReg) != Def)
return false;
// Finally check that the last instruction doesn't redefine the register.
@@ -500,18 +517,22 @@ ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const {
return true;
}
-MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
- int PhysReg) const {
+MachineInstr *
+ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
+ MCRegister PhysReg) const {
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
if (!LiveRegs.contains(PhysReg))
return nullptr;
- MachineInstr *Last = &MBB->back();
- int Def = getReachingDef(Last, PhysReg);
+ auto Last = MBB->getLastNonDebugInstr();
+ if (Last == MBB->end())
+ return nullptr;
+
+ int Def = getReachingDef(&*Last, PhysReg);
for (auto &MO : Last->operands())
if (isValidRegDefOf(MO, PhysReg))
- return Last;
+ return &*Last;
return Def < 0 ? nullptr : getInstFromId(MBB, Def);
}
@@ -528,7 +549,7 @@ static bool mayHaveSideEffects(MachineInstr &MI) {
template<typename Iterator>
bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From,
MachineInstr *To) const {
- if (From->getParent() != To->getParent())
+ if (From->getParent() != To->getParent() || From == To)
return false;
SmallSet<int, 2> Defs;
@@ -557,12 +578,22 @@ bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From,
bool ReachingDefAnalysis::isSafeToMoveForwards(MachineInstr *From,
MachineInstr *To) const {
- return isSafeToMove<MachineBasicBlock::reverse_iterator>(From, To);
+ using Iterator = MachineBasicBlock::iterator;
+ // Walk forwards until we find the instruction.
+ for (auto I = Iterator(From), E = From->getParent()->end(); I != E; ++I)
+ if (&*I == To)
+ return isSafeToMove<Iterator>(From, To);
+ return false;
}
bool ReachingDefAnalysis::isSafeToMoveBackwards(MachineInstr *From,
MachineInstr *To) const {
- return isSafeToMove<MachineBasicBlock::iterator>(From, To);
+ using Iterator = MachineBasicBlock::reverse_iterator;
+ // Walk backwards until we find the instruction.
+ for (auto I = Iterator(From), E = From->getParent()->rend(); I != E; ++I)
+ if (&*I == To)
+ return isSafeToMove<Iterator>(From, To);
+ return false;
}
bool ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI,
@@ -612,7 +643,10 @@ ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited,
void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
InstSet &Dead) const {
Dead.insert(MI);
- auto IsDead = [this, &Dead](MachineInstr *Def, int PhysReg) {
+ auto IsDead = [this, &Dead](MachineInstr *Def, MCRegister PhysReg) {
+ if (mayHaveSideEffects(*Def))
+ return false;
+
unsigned LiveDefs = 0;
for (auto &MO : Def->operands()) {
if (!isValidRegDef(MO))
@@ -642,18 +676,18 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
}
bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI,
- int PhysReg) const {
+ MCRegister PhysReg) const {
SmallPtrSet<MachineInstr*, 1> Ignore;
return isSafeToDefRegAt(MI, PhysReg, Ignore);
}
-bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, int PhysReg,
+bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
InstSet &Ignore) const {
// Check for any uses of the register after MI.
if (isRegUsedAfter(MI, PhysReg)) {
if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) {
SmallPtrSet<MachineInstr*, 2> Uses;
- getReachingLocalUses(Def, PhysReg, Uses);
+ getGlobalUses(Def, PhysReg, Uses);
for (auto *Use : Uses)
if (!Ignore.count(Use))
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
index d22826853672..aa749ca43e74 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -73,7 +73,7 @@ void RegAllocBase::seedLiveRegs() {
NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName,
TimerGroupDescription, TimePassesIsEnabled);
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
enqueue(&LIS->getInterval(Reg));
@@ -87,13 +87,13 @@ void RegAllocBase::allocatePhysRegs() {
// Continue assigning vregs one at a time to available physical registers.
while (LiveInterval *VirtReg = dequeue()) {
- assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+ assert(!VRM->hasPhys(VirtReg->reg()) && "Register already assigned");
// Unused registers can appear when the spiller coalesces snippets.
- if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+ if (MRI->reg_nodbg_empty(VirtReg->reg())) {
LLVM_DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
aboutToRemoveInterval(*VirtReg);
- LIS->removeInterval(VirtReg->reg);
+ LIS->removeInterval(VirtReg->reg());
continue;
}
@@ -104,21 +104,22 @@ void RegAllocBase::allocatePhysRegs() {
// register if possible and populate a list of new live intervals that
// result from splitting.
LLVM_DEBUG(dbgs() << "\nselectOrSplit "
- << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
- << ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
+ << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg()))
+ << ':' << *VirtReg << " w=" << VirtReg->weight() << '\n');
using VirtRegVec = SmallVector<Register, 4>;
VirtRegVec SplitVRegs;
- unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+ MCRegister AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
if (AvailablePhysReg == ~0u) {
// selectOrSplit failed to find a register!
// Probably caused by an inline asm.
MachineInstr *MI = nullptr;
for (MachineRegisterInfo::reg_instr_iterator
- I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end();
- I != E; ) {
+ I = MRI->reg_instr_begin(VirtReg->reg()),
+ E = MRI->reg_instr_end();
+ I != E;) {
MI = &*(I++);
if (MI->isInlineAsm())
break;
@@ -133,28 +134,29 @@ void RegAllocBase::allocatePhysRegs() {
report_fatal_error("ran out of registers during register allocation");
}
// Keep going after reporting the error.
- VRM->assignVirt2Phys(VirtReg->reg,
- RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+ VRM->assignVirt2Phys(
+ VirtReg->reg(),
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg())).front());
continue;
}
if (AvailablePhysReg)
Matrix->assign(*VirtReg, AvailablePhysReg);
- for (unsigned Reg : SplitVRegs) {
+ for (Register Reg : SplitVRegs) {
assert(LIS->hasInterval(Reg));
LiveInterval *SplitVirtReg = &LIS->getInterval(Reg);
- assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
- if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ assert(!VRM->hasPhys(SplitVirtReg->reg()) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg())) {
assert(SplitVirtReg->empty() && "Non-empty but used interval");
LLVM_DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
aboutToRemoveInterval(*SplitVirtReg);
- LIS->removeInterval(SplitVirtReg->reg);
+ LIS->removeInterval(SplitVirtReg->reg());
continue;
}
LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
- assert(Register::isVirtualRegister(SplitVirtReg->reg) &&
+ assert(Register::isVirtualRegister(SplitVirtReg->reg()) &&
"expect split value in virtual register");
enqueue(SplitVirtReg);
++NumNewQueued;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
index 8e931eaae99a..3144605345e9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
@@ -101,8 +101,8 @@ protected:
// Each call must guarantee forward progess by returning an available PhysReg
// or new set of split live virtual registers. It is up to the splitter to
// converge quickly toward fully spilled live ranges.
- virtual Register selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &splitLVRs) = 0;
+ virtual MCRegister selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &splitLVRs) = 0;
// Use this group name for NamedRegionTimer.
static const char TimerGroupName[];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
index 5009bcc0a397..8f2cb48c5d69 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -46,7 +46,7 @@ static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
namespace {
struct CompSpillWeight {
bool operator()(LiveInterval *A, LiveInterval *B) const {
- return A->weight < B->weight;
+ return A->weight() < B->weight();
}
};
}
@@ -72,8 +72,8 @@ class RABasic : public MachineFunctionPass,
// selectOrSplit().
BitVector UsableRegs;
- bool LRE_CanEraseVirtReg(unsigned) override;
- void LRE_WillShrinkVirtReg(unsigned) override;
+ bool LRE_CanEraseVirtReg(Register) override;
+ void LRE_WillShrinkVirtReg(Register) override;
public:
RABasic();
@@ -100,8 +100,8 @@ public:
return LI;
}
- Register selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &SplitVRegs) override;
+ MCRegister selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &SplitVRegs) override;
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &mf) override;
@@ -111,10 +111,15 @@ public:
MachineFunctionProperties::Property::NoPHIs);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
// Helper for spilling all live virtual registers currently unified under preg
// that interfere with the most recently queried lvr. Return true if spilling
// was successful, and append any new spilled/split intervals to splitLVRs.
- bool spillInterferences(LiveInterval &VirtReg, Register PhysReg,
+ bool spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
SmallVectorImpl<Register> &SplitVRegs);
static char ID;
@@ -141,7 +146,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
false)
-bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) {
+bool RABasic::LRE_CanEraseVirtReg(Register VirtReg) {
LiveInterval &LI = LIS->getInterval(VirtReg);
if (VRM->hasPhys(VirtReg)) {
Matrix->unassign(LI);
@@ -156,7 +161,7 @@ bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) {
return false;
}
-void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) {
if (!VRM->hasPhys(VirtReg))
return;
@@ -201,7 +206,7 @@ void RABasic::releaseMemory() {
// Spill or split all live virtual registers currently unified under PhysReg
// that interfere with VirtReg. The newly spilled or split live intervals are
// returned by appending them to SplitVRegs.
-bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
+bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
SmallVectorImpl<Register> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
@@ -213,7 +218,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
Q.collectInterferingVRegs();
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
- if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
+ if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
return false;
Intfs.push_back(Intf);
}
@@ -227,7 +232,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
LiveInterval &Spill = *Intfs[i];
// Skip duplicates.
- if (!VRM->hasPhys(Spill.reg))
+ if (!VRM->hasPhys(Spill.reg()))
continue;
// Deallocate the interfering vreg by removing it from the union.
@@ -253,14 +258,16 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
// |vregs| * |machineregs|. And since the number of interference tests is
// minimal, there is no value in caching them outside the scope of
// selectOrSplit().
-Register RABasic::selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &SplitVRegs) {
+MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &SplitVRegs) {
// Populate a list of physical register spill candidates.
- SmallVector<Register, 8> PhysRegSpillCands;
+ SmallVector<MCRegister, 8> PhysRegSpillCands;
// Check for an available register in this class.
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- while (Register PhysReg = Order.next()) {
+ auto Order =
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
+ for (MCRegister PhysReg : Order) {
+ assert(PhysReg.isValid());
// Check for interference in PhysReg
switch (Matrix->checkInterference(VirtReg, PhysReg)) {
case LiveRegMatrix::IK_Free:
@@ -279,8 +286,9 @@ Register RABasic::selectOrSplit(LiveInterval &VirtReg,
}
// Try to spill another interfering reg with less spill weight.
- for (SmallVectorImpl<Register>::iterator PhysRegI = PhysRegSpillCands.begin(),
- PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+ for (auto PhysRegI = PhysRegSpillCands.begin(),
+ PhysRegE = PhysRegSpillCands.end();
+ PhysRegI != PhysRegE; ++PhysRegI) {
if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
continue;
@@ -310,10 +318,9 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
RegAllocBase::init(getAnalysis<VirtRegMap>(),
getAnalysis<LiveIntervals>(),
getAnalysis<LiveRegMatrix>());
-
- calculateSpillWeightsAndHints(*LIS, *MF, VRM,
- getAnalysis<MachineLoopInfo>(),
- getAnalysis<MachineBlockFrequencyInfo>());
+ VirtRegAuxInfo VRAI(*MF, *LIS, *VRM, getAnalysis<MachineLoopInfo>(),
+ getAnalysis<MachineBlockFrequencyInfo>());
+ VRAI.calculateSpillWeightsAndHints();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index cf3eaba23bee..6e548d4a93c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -56,6 +56,10 @@ STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads , "Number of loads added");
STATISTIC(NumCoalesced, "Number of copies coalesced");
+// FIXME: Remove this switch when all testcases are fixed!
+static cl::opt<bool> IgnoreMissingDefs("rafast-ignore-missing-defs",
+ cl::Hidden);
+
static RegisterRegAlloc
fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
@@ -85,8 +89,9 @@ namespace {
MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
Register VirtReg; ///< Virtual register number.
MCPhysReg PhysReg = 0; ///< Currently held here.
- unsigned short LastOpNum = 0; ///< OpNum on LastUse.
- bool Dirty = false; ///< Register needs spill.
+ bool LiveOut = false; ///< Register is possibly live out.
+ bool Reloaded = false; ///< Register was reloaded.
+ bool Error = false; ///< Could not allocate.
explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
@@ -100,44 +105,51 @@ namespace {
/// available in a physical register.
LiveRegMap LiveVirtRegs;
+ /// Stores assigned virtual registers present in the bundle MI.
+ DenseMap<Register, MCPhysReg> BundleVirtRegsMap;
+
DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
+ /// List of DBG_VALUE that we encountered without the vreg being assigned
+ /// because they were placed after the last use of the vreg.
+ DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues;
/// Has a bit set for every virtual register for which it was determined
/// that it is alive across blocks.
BitVector MayLiveAcrossBlocks;
- /// State of a physical register.
- enum RegState {
- /// A disabled register is not available for allocation, but an alias may
- /// be in use. A register can only be moved out of the disabled state if
- /// all aliases are disabled.
- regDisabled,
-
+ /// State of a register unit.
+ enum RegUnitState {
/// A free register is not currently in use and can be allocated
/// immediately without checking aliases.
regFree,
- /// A reserved register has been assigned explicitly (e.g., setting up a
- /// call parameter), and it remains reserved until it is used.
- regReserved
+ /// A pre-assigned register has been assigned before register allocation
+ /// (e.g., setting up a call parameter).
+ regPreAssigned,
+
+ /// Used temporarily in reloadAtBegin() to mark register units that are
+ /// live-in to the basic block.
+ regLiveIn,
/// A register state may also be a virtual register number, indication
/// that the physical register is currently allocated to a virtual
/// register. In that case, LiveVirtRegs contains the inverse mapping.
};
- /// Maps each physical register to a RegState enum or a virtual register.
- std::vector<unsigned> PhysRegState;
+ /// Maps each physical register to a RegUnitState enum or virtual register.
+ std::vector<unsigned> RegUnitStates;
- SmallVector<Register, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
/// Set of register units that are used in the current instruction, and so
/// cannot be allocated.
RegUnitSet UsedInInstr;
+ RegUnitSet PhysRegUses;
+ SmallVector<uint16_t, 8> DefOperandIndexes;
void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
+ bool isPhysRegFree(MCPhysReg PhysReg) const;
/// Mark a physreg as used in this instruction.
void markRegUsedInInstr(MCPhysReg PhysReg) {
@@ -146,13 +158,29 @@ namespace {
}
/// Check if a physreg or any of its aliases are used in this instruction.
- bool isRegUsedInInstr(MCPhysReg PhysReg) const {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
if (UsedInInstr.count(*Units))
return true;
+ if (LookAtPhysRegUses && PhysRegUses.count(*Units))
+ return true;
+ }
return false;
}
+ /// Mark physical register as being used in a register use operand.
+ /// This is only used by the special livethrough handling code.
+ void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ PhysRegUses.insert(*Units);
+ }
+
+ /// Remove mark of physical register being used in the instruction.
+ void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ UsedInInstr.erase(*Units);
+ }
+
enum : unsigned {
spillClean = 50,
spillDirty = 100,
@@ -178,27 +206,29 @@ namespace {
MachineFunctionProperties::Property::NoVRegs);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
private:
bool runOnMachineFunction(MachineFunction &MF) override;
void allocateBasicBlock(MachineBasicBlock &MBB);
+
+ void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+ Register Reg) const;
+
void allocateInstruction(MachineInstr &MI);
void handleDebugValue(MachineInstr &MI);
- void handleThroughOperands(MachineInstr &MI,
- SmallVectorImpl<Register> &VirtDead);
- bool isLastUseOfLocalReg(const MachineOperand &MO) const;
-
- void addKillFlag(const LiveReg &LRI);
- void killVirtReg(LiveReg &LR);
- void killVirtReg(Register VirtReg);
- void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
- void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg);
-
- void usePhysReg(MachineOperand &MO);
- void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
- RegState NewState);
+ void handleBundle(MachineInstr &MI);
+
+ bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ void freePhysReg(MCPhysReg PhysReg);
+
unsigned calcSpillCost(MCPhysReg PhysReg) const;
- void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
@@ -208,28 +238,38 @@ namespace {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
- void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint);
+ void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint,
+ bool LookAtPhysRegUses = false);
void allocVirtRegUndef(MachineOperand &MO);
- MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
- Register Hint);
- LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
- Register Hint);
- void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
- bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+ void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg,
+ MCPhysReg Reg);
+ void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg);
+ void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ bool LookAtPhysRegUses = false);
+ void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
+
+ MachineBasicBlock::iterator
+ getMBBBeginInsertionPoint(MachineBasicBlock &MBB,
+ SmallSet<Register, 2> &PrologLiveIns) const;
+
+ void reloadAtBegin(MachineBasicBlock &MBB);
+ void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
Register traceCopies(Register VirtReg) const;
Register traceCopyChain(Register Reg) const;
int getStackSpaceFor(Register VirtReg);
void spill(MachineBasicBlock::iterator Before, Register VirtReg,
- MCPhysReg AssignedReg, bool Kill);
+ MCPhysReg AssignedReg, bool Kill, bool LiveOut);
void reload(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg PhysReg);
bool mayLiveOut(Register VirtReg);
bool mayLiveIn(Register VirtReg);
- void dumpState();
+ void dumpState() const;
};
} // end anonymous namespace
@@ -240,7 +280,16 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
false)
void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
- PhysRegState[PhysReg] = NewState;
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
+ RegUnitStates[*UI] = NewState;
+}
+
+bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const {
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ if (RegUnitStates[*UI] != regFree)
+ return false;
+ }
+ return true;
}
/// This allocates space for the specified virtual register to be held on the
@@ -263,6 +312,20 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) {
return FrameIdx;
}
+static bool dominates(MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator A,
+ MachineBasicBlock::const_iterator B) {
+ auto MBBEnd = MBB.end();
+ if (B == MBBEnd)
+ return true;
+
+ MachineBasicBlock::const_iterator I = MBB.begin();
+ for (; &*I != A && &*I != B; ++I)
+ ;
+
+ return &*I == A;
+}
+
/// Returns false if \p VirtReg is known to not live out of the current block.
bool RegAllocFast::mayLiveOut(Register VirtReg) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
@@ -270,23 +333,38 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
return !MBB->succ_empty();
}
- // If this block loops back to itself, it would be necessary to check whether
- // the use comes after the def.
+ const MachineInstr *SelfLoopDef = nullptr;
+
+ // If this block loops back to itself, it is necessary to check whether the
+ // use comes after the def.
if (MBB->isSuccessor(MBB)) {
- MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
- return true;
+ SelfLoopDef = MRI->getUniqueVRegDef(VirtReg);
+ if (!SelfLoopDef) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ }
}
// See if the first \p Limit uses of the register are all in the current
// block.
static const unsigned Limit = 8;
unsigned C = 0;
- for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
+ for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
if (UseInst.getParent() != MBB || ++C >= Limit) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
}
+
+ if (SelfLoopDef) {
+ // Try to handle some simple cases to avoid spilling and reloading every
+ // value inside a self looping block.
+ if (SelfLoopDef == &UseInst ||
+ !dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ }
+ }
}
return false;
@@ -313,7 +391,7 @@ bool RegAllocFast::mayLiveIn(Register VirtReg) {
/// Insert spill instruction for \p AssignedReg before \p Before. Update
/// DBG_VALUEs with \p VirtReg operands with the stack slot.
void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
- MCPhysReg AssignedReg, bool Kill) {
+ MCPhysReg AssignedReg, bool Kill, bool LiveOut) {
LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
<< " in " << printReg(AssignedReg, TRI));
int FI = getStackSpaceFor(VirtReg);
@@ -323,15 +401,32 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI);
++NumStores;
- // If this register is used by DBG_VALUE then insert new DBG_VALUE to
- // identify spilled location as the place to find corresponding variable's
- // value.
+ MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator();
+
+ // When we spill a virtual register, we will have spill instructions behind
+ // every definition of it, meaning we can switch all the DBG_VALUEs over
+ // to just reference the stack slot.
SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg];
for (MachineInstr *DBG : LRIDbgValues) {
MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI);
assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV);
+
+ if (LiveOut) {
+ // We need to insert a DBG_VALUE at the end of the block if the spill slot
+ // is live out, but there is another use of the value after the
+ // spill. This will allow LiveDebugValues to see the correct live out
+ // value to propagate to the successors.
+ MachineInstr *ClonedDV = MBB->getParent()->CloneMachineInstr(NewDV);
+ MBB->insert(FirstTerm, ClonedDV);
+ LLVM_DEBUG(dbgs() << "Cloning debug info due to live out spill\n");
+ }
+
+ // Rewrite unassigned dbg_values to use the stack slot.
+ MachineOperand &MO = DBG->getOperand(0);
+ if (MO.isReg() && MO.getReg() == 0)
+ updateDbgValueForSpill(*DBG, FI);
}
// Now this register is spilled there is should not be any DBG_VALUE
// pointing to this register because they are all pointing to spilled value
@@ -350,100 +445,75 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
++NumLoads;
}
-/// Return true if MO is the only remaining reference to its virtual register,
-/// and it is guaranteed to be a block-local register.
-bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const {
- // If the register has ever been spilled or reloaded, we conservatively assume
- // it is a global register used in multiple blocks.
- if (StackSlotForVirtReg[MO.getReg()] != -1)
- return false;
-
- // Check that the use/def chain has exactly one operand - MO.
- MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
- if (&*I != &MO)
- return false;
- return ++I == MRI->reg_nodbg_end();
-}
+/// Get basic block begin insertion point.
+/// This is not just MBB.begin() because surprisingly we have EH_LABEL
+/// instructions marking the begin of a basic block. This means we must insert
+/// new instructions after such labels...
+MachineBasicBlock::iterator
+RegAllocFast::getMBBBeginInsertionPoint(
+ MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const {
+ MachineBasicBlock::iterator I = MBB.begin();
+ while (I != MBB.end()) {
+ if (I->isLabel()) {
+ ++I;
+ continue;
+ }
-/// Set kill flags on last use of a virtual register.
-void RegAllocFast::addKillFlag(const LiveReg &LR) {
- if (!LR.LastUse) return;
- MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
- if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
- if (MO.getReg() == LR.PhysReg)
- MO.setIsKill();
- // else, don't do anything we are problably redefining a
- // subreg of this register and given we don't track which
- // lanes are actually dead, we cannot insert a kill flag here.
- // Otherwise we may end up in a situation like this:
- // ... = (MO) physreg:sub1, implicit killed physreg
- // ... <== Here we would allow later pass to reuse physreg:sub1
- // which is potentially wrong.
- // LR:sub0 = ...
- // ... = LR.sub1 <== This is going to use physreg:sub1
- }
-}
+ // Most reloads should be inserted after prolog instructions.
+ if (!TII->isBasicBlockPrologue(*I))
+ break;
-/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(LiveReg &LR) {
- addKillFlag(LR);
- assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
- "Broken RegState mapping");
- setPhysRegState(LR.PhysReg, regFree);
- LR.PhysReg = 0;
-}
+ // However if a prolog instruction reads a register that needs to be
+ // reloaded, the reload should be inserted before the prolog.
+ for (MachineOperand &MO : I->operands()) {
+ if (MO.isReg())
+ PrologLiveIns.insert(MO.getReg());
+ }
-/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(Register VirtReg) {
- assert(Register::isVirtualRegister(VirtReg) &&
- "killVirtReg needs a virtual register");
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
- killVirtReg(*LRI);
-}
+ ++I;
+ }
-/// This method spills the value specified by VirtReg into the corresponding
-/// stack slot if needed.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
- Register VirtReg) {
- assert(Register::isVirtualRegister(VirtReg) &&
- "Spilling a physical register is illegal!");
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Spilling unmapped virtual register");
- spillVirtReg(MI, *LRI);
+ return I;
}
-/// Do the actual work of spilling.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
- assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
+/// Reload all currently assigned virtual registers.
+void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
+ if (LiveVirtRegs.empty())
+ return;
- if (LR.Dirty) {
- // If this physreg is used by the instruction, we want to kill it on the
- // instruction, not on the spill.
- bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
- LR.Dirty = false;
+ for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
+ MCPhysReg Reg = P.PhysReg;
+ // Set state to live-in. This possibly overrides mappings to virtual
+ // registers but we don't care anymore at this point.
+ setPhysRegState(Reg, regLiveIn);
+ }
- spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
- if (SpillKill)
- LR.LastUse = nullptr; // Don't kill register again
- }
- killVirtReg(LR);
-}
+ SmallSet<Register, 2> PrologLiveIns;
-/// Spill all dirty virtregs without killing them.
-void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
- if (LiveVirtRegs.empty())
- return;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
// of spilling here is deterministic, if arbitrary.
- for (LiveReg &LR : LiveVirtRegs) {
- if (!LR.PhysReg)
+ MachineBasicBlock::iterator InsertBefore
+ = getMBBBeginInsertionPoint(MBB, PrologLiveIns);
+ for (const LiveReg &LR : LiveVirtRegs) {
+ MCPhysReg PhysReg = LR.PhysReg;
+ if (PhysReg == 0)
continue;
- if (OnlyLiveOut && !mayLiveOut(LR.VirtReg))
+
+ MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+ if (RegUnitStates[FirstUnit] == regLiveIn)
continue;
- spillVirtReg(MI, LR);
+
+ assert((&MBB != &MBB.getParent()->front() || IgnoreMissingDefs) &&
+ "no reload in start block. Missing vreg def?");
+
+ if (PrologLiveIns.count(PhysReg)) {
+ // FIXME: Theoretically this should use an insert point skipping labels
+ // but I'm not sure how labels should interact with prolog instruction
+ // that need reloads.
+ reload(MBB.begin(), LR.VirtReg, PhysReg);
+ } else
+ reload(InsertBefore, LR.VirtReg, PhysReg);
}
LiveVirtRegs.clear();
}
@@ -451,105 +521,73 @@ void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
/// Handle the direct use of a physical register. Check that the register is
/// not used by a virtreg. Kill the physreg, marking it free. This may add
/// implicit kills to MO->getParent() and invalidate MO.
-void RegAllocFast::usePhysReg(MachineOperand &MO) {
- // Ignore undef uses.
- if (MO.isUndef())
- return;
+bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+ assert(Register::isPhysicalRegister(Reg) && "expected physreg");
+ bool displacedAny = displacePhysReg(MI, Reg);
+ setPhysRegState(Reg, regPreAssigned);
+ markRegUsedInInstr(Reg);
+ return displacedAny;
+}
- Register PhysReg = MO.getReg();
- assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
+bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+ bool displacedAny = displacePhysReg(MI, Reg);
+ setPhysRegState(Reg, regPreAssigned);
+ return displacedAny;
+}
- markRegUsedInInstr(PhysReg);
- switch (PhysRegState[PhysReg]) {
- case regDisabled:
- break;
- case regReserved:
- PhysRegState[PhysReg] = regFree;
- LLVM_FALLTHROUGH;
- case regFree:
- MO.setIsKill();
- return;
- default:
- // The physreg was allocated to a virtual register. That means the value we
- // wanted has been clobbered.
- llvm_unreachable("Instruction uses an allocated register");
- }
+/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
+/// similar to defineVirtReg except the physreg is reserved instead of
+/// allocated.
+bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
+ bool displacedAny = false;
- // Maybe a superregister is reserved?
- for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- MCPhysReg Alias = *AI;
- switch (PhysRegState[Alias]) {
- case regDisabled:
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ unsigned Unit = *UI;
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
+ default: {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "datastructures in sync");
+ MachineBasicBlock::iterator ReloadBefore =
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
+ reload(ReloadBefore, VirtReg, LRI->PhysReg);
+
+ setPhysRegState(LRI->PhysReg, regFree);
+ LRI->PhysReg = 0;
+ LRI->Reloaded = true;
+ displacedAny = true;
+ break;
+ }
+ case regPreAssigned:
+ RegUnitStates[Unit] = regFree;
+ displacedAny = true;
break;
- case regReserved:
- // Either PhysReg is a subregister of Alias and we mark the
- // whole register as free, or PhysReg is the superregister of
- // Alias and we mark all the aliases as disabled before freeing
- // PhysReg.
- // In the latter case, since PhysReg was disabled, this means that
- // its value is defined only by physical sub-registers. This check
- // is performed by the assert of the default case in this loop.
- // Note: The value of the superregister may only be partial
- // defined, that is why regDisabled is a valid state for aliases.
- assert((TRI->isSuperRegister(PhysReg, Alias) ||
- TRI->isSuperRegister(Alias, PhysReg)) &&
- "Instruction is not using a subregister of a reserved register");
- LLVM_FALLTHROUGH;
case regFree:
- if (TRI->isSuperRegister(PhysReg, Alias)) {
- // Leave the superregister in the working set.
- setPhysRegState(Alias, regFree);
- MO.getParent()->addRegisterKilled(Alias, TRI, true);
- return;
- }
- // Some other alias was in the working set - clear it.
- setPhysRegState(Alias, regDisabled);
break;
- default:
- llvm_unreachable("Instruction uses an alias of an allocated register");
}
}
-
- // All aliases are disabled, bring register into working set.
- setPhysRegState(PhysReg, regFree);
- MO.setIsKill();
+ return displacedAny;
}
-/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
-/// similar to defineVirtReg except the physreg is reserved instead of
-/// allocated.
-void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
- MCPhysReg PhysReg, RegState NewState) {
- markRegUsedInInstr(PhysReg);
- switch (Register VirtReg = PhysRegState[PhysReg]) {
- case regDisabled:
- break;
- default:
- spillVirtReg(MI, VirtReg);
- LLVM_FALLTHROUGH;
+void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
+ LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':');
+
+ MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+ switch (unsigned VirtReg = RegUnitStates[FirstUnit]) {
case regFree:
- case regReserved:
- setPhysRegState(PhysReg, NewState);
+ LLVM_DEBUG(dbgs() << '\n');
return;
- }
-
- // This is a disabled register, disable all aliases.
- setPhysRegState(PhysReg, NewState);
- for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- MCPhysReg Alias = *AI;
- switch (Register VirtReg = PhysRegState[Alias]) {
- case regDisabled:
- break;
- default:
- spillVirtReg(MI, VirtReg);
- LLVM_FALLTHROUGH;
- case regFree:
- case regReserved:
- setPhysRegState(Alias, regDisabled);
- if (TRI->isSuperRegister(PhysReg, Alias))
- return;
- break;
+ case regPreAssigned:
+ LLVM_DEBUG(dbgs() << '\n');
+ setPhysRegState(PhysReg, regFree);
+ return;
+ default: {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end());
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n');
+ setPhysRegState(LRI->PhysReg, regFree);
+ LRI->PhysReg = 0;
}
+ return;
}
}
@@ -558,57 +596,61 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
/// disabled - it can be allocated directly.
/// \returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
- if (isRegUsedInInstr(PhysReg)) {
- LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
- << " is already used in instr.\n");
- return spillImpossible;
- }
- switch (Register VirtReg = PhysRegState[PhysReg]) {
- case regDisabled:
- break;
- case regFree:
- return 0;
- case regReserved:
- LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
- << printReg(PhysReg, TRI) << " is reserved already.\n");
- return spillImpossible;
- default: {
- LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Missing VirtReg entry");
- return LRI->Dirty ? spillDirty : spillClean;
- }
- }
-
- // This is a disabled register, add up cost of aliases.
- LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
- unsigned Cost = 0;
- for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- MCPhysReg Alias = *AI;
- switch (Register VirtReg = PhysRegState[Alias]) {
- case regDisabled:
- break;
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ switch (unsigned VirtReg = RegUnitStates[*UI]) {
case regFree:
- ++Cost;
break;
- case regReserved:
+ case regPreAssigned:
+ LLVM_DEBUG(dbgs() << "Cannot spill pre-assigned "
+ << printReg(PhysReg, TRI) << '\n');
return spillImpossible;
default: {
- LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Missing VirtReg entry");
- Cost += LRI->Dirty ? spillDirty : spillClean;
- break;
+ bool SureSpill = StackSlotForVirtReg[VirtReg] != -1 ||
+ findLiveVirtReg(VirtReg)->LiveOut;
+ return SureSpill ? spillClean : spillDirty;
}
}
}
- return Cost;
+ return 0;
+}
+
+void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
+ Register VirtReg, MCPhysReg Reg) {
+ auto UDBGValIter = DanglingDbgValues.find(VirtReg);
+ if (UDBGValIter == DanglingDbgValues.end())
+ return;
+
+ SmallVectorImpl<MachineInstr*> &Dangling = UDBGValIter->second;
+ for (MachineInstr *DbgValue : Dangling) {
+ assert(DbgValue->isDebugValue());
+ MachineOperand &MO = DbgValue->getOperand(0);
+ if (!MO.isReg())
+ continue;
+
+ // Test whether the physreg survives from the definition to the DBG_VALUE.
+ MCPhysReg SetToReg = Reg;
+ unsigned Limit = 20;
+ for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()),
+ E = DbgValue->getIterator(); I != E; ++I) {
+ if (I->modifiesRegister(Reg, TRI) || --Limit == 0) {
+ LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
+ << '\n');
+ SetToReg = 0;
+ break;
+ }
+ }
+ MO.setReg(SetToReg);
+ if (SetToReg != 0)
+ MO.setIsRenamable();
+ }
+ Dangling.clear();
}
/// This method updates local state so that we know that PhysReg is the
/// proper container for VirtReg now. The physical register must not be used
/// for anything else when this is called.
-void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
+void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
+ MCPhysReg PhysReg) {
Register VirtReg = LR.VirtReg;
LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
<< printReg(PhysReg, TRI) << '\n');
@@ -616,6 +658,8 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
assert(PhysReg != 0 && "Trying to assign no register");
LR.PhysReg = PhysReg;
setPhysRegState(PhysReg, VirtReg);
+
+ assignDanglingDebugValues(AtMI, VirtReg, PhysReg);
}
static bool isCoalescable(const MachineInstr &MI) {
@@ -659,11 +703,10 @@ Register RegAllocFast::traceCopies(Register VirtReg) const {
}
/// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
+ Register Hint0, bool LookAtPhysRegUses) {
const Register VirtReg = LR.VirtReg;
-
- assert(Register::isVirtualRegister(VirtReg) &&
- "Can only allocate virtual registers");
+ assert(LR.PhysReg == 0);
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
@@ -671,41 +714,36 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
<< " with hint " << printReg(Hint0, TRI) << '\n');
// Take hint when possible.
- if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) &&
- RC.contains(Hint0)) {
- // Ignore the hint if we would have to spill a dirty register.
- unsigned Cost = calcSpillCost(Hint0);
- if (Cost < spillDirty) {
+ if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0) &&
+ !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) {
+ // Take hint if the register is currently free.
+ if (isPhysRegFree(Hint0)) {
LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
<< '\n');
- if (Cost)
- definePhysReg(MI, Hint0, regFree);
- assignVirtToPhysReg(LR, Hint0);
+ assignVirtToPhysReg(MI, LR, Hint0);
return;
} else {
- LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
- << "occupied\n");
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI)
+ << " occupied\n");
}
} else {
Hint0 = Register();
}
+
// Try other hint.
Register Hint1 = traceCopies(VirtReg);
- if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) &&
- RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) {
- // Ignore the hint if we would have to spill a dirty register.
- unsigned Cost = calcSpillCost(Hint1);
- if (Cost < spillDirty) {
+ if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
+ !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) {
+ // Take hint if the register is currently free.
+ if (isPhysRegFree(Hint1)) {
LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
- << '\n');
- if (Cost)
- definePhysReg(MI, Hint1, regFree);
- assignVirtToPhysReg(LR, Hint1);
+ << '\n');
+ assignVirtToPhysReg(MI, LR, Hint1);
return;
} else {
- LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
- << "occupied\n");
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI)
+ << " occupied\n");
}
} else {
Hint1 = Register();
@@ -716,15 +754,20 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
for (MCPhysReg PhysReg : AllocationOrder) {
LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
+ if (isRegUsedInInstr(PhysReg, LookAtPhysRegUses)) {
+ LLVM_DEBUG(dbgs() << "already used in instr.\n");
+ continue;
+ }
+
unsigned Cost = calcSpillCost(PhysReg);
LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n');
// Immediate take a register with cost 0.
if (Cost == 0) {
- assignVirtToPhysReg(LR, PhysReg);
+ assignVirtToPhysReg(MI, LR, PhysReg);
return;
}
- if (PhysReg == Hint1 || PhysReg == Hint0)
+ if (PhysReg == Hint0 || PhysReg == Hint1)
Cost -= spillPrefBonus;
if (Cost < BestCost) {
@@ -740,13 +783,14 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
MI.emitError("inline assembly requires more registers than available");
else
MI.emitError("ran out of registers during register allocation");
- definePhysReg(MI, *AllocationOrder.begin(), regFree);
- assignVirtToPhysReg(LR, *AllocationOrder.begin());
+
+ LR.Error = true;
+ LR.PhysReg = 0;
return;
}
- definePhysReg(MI, BestReg, regFree);
- assignVirtToPhysReg(LR, BestReg);
+ displacePhysReg(MI, BestReg);
+ assignVirtToPhysReg(MI, LR, BestReg);
}
void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
@@ -774,347 +818,491 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
MO.setIsRenamable(true);
}
-/// Allocates a register for VirtReg and mark it as dirty.
-MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
- Register VirtReg, Register Hint) {
- assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
+/// Variation of defineVirtReg() with special handling for livethrough regs
+/// (tied or earlyclobber) that may interfere with preassigned uses.
+void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg) {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ if (LRI != LiveVirtRegs.end()) {
+ MCPhysReg PrevReg = LRI->PhysReg;
+ if (PrevReg != 0 && isRegUsedInInstr(PrevReg, true)) {
+ LLVM_DEBUG(dbgs() << "Need new assignment for " << printReg(PrevReg, TRI)
+ << " (tied/earlyclobber resolution)\n");
+ freePhysReg(PrevReg);
+ LRI->PhysReg = 0;
+ allocVirtReg(MI, *LRI, 0, true);
+ MachineBasicBlock::iterator InsertBefore =
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
+ LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to "
+ << printReg(PrevReg, TRI) << '\n');
+ BuildMI(*MBB, InsertBefore, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), PrevReg)
+ .addReg(LRI->PhysReg, llvm::RegState::Kill);
+ }
+ MachineOperand &MO = MI.getOperand(OpNum);
+ if (MO.getSubReg() && !MO.isUndef()) {
+ LRI->LastUse = &MI;
+ }
+ }
+ return defineVirtReg(MI, OpNum, VirtReg, true);
+}
+
+/// Allocates a register for VirtReg definition. Typically the register is
+/// already assigned from a use of the virtreg, however we still need to
+/// perform an allocation if:
+/// - It is a dead definition without any uses.
+/// - The value is live out and all uses are in different basic blocks.
+void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg, bool LookAtPhysRegUses) {
+ assert(VirtReg.isVirtual() && "Not a virtual register");
+ MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- if (!LRI->PhysReg) {
- // If there is no hint, peek at the only use of this register.
- if ((!Hint || !Hint.isPhysical()) &&
- MRI->hasOneNonDBGUse(VirtReg)) {
- const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
- // It's a copy, use the destination register as a hint.
- if (UseMI.isCopyLike())
- Hint = UseMI.getOperand(0).getReg();
+ if (New) {
+ if (!MO.isDead()) {
+ if (mayLiveOut(VirtReg)) {
+ LRI->LiveOut = true;
+ } else {
+ // It is a dead def without the dead flag; add the flag now.
+ MO.setIsDead(true);
+ }
}
- allocVirtReg(MI, *LRI, Hint);
- } else if (LRI->LastUse) {
- // Redefining a live register - kill at the last use, unless it is this
- // instruction defining VirtReg multiple times.
- if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
- addKillFlag(*LRI);
}
- assert(LRI->PhysReg && "Register not assigned");
- LRI->LastUse = &MI;
- LRI->LastOpNum = OpNum;
- LRI->Dirty = true;
- markRegUsedInInstr(LRI->PhysReg);
- return LRI->PhysReg;
+ if (LRI->PhysReg == 0)
+ allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses);
+ else {
+ assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) &&
+ "TODO: preassign mismatch");
+ LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI)
+ << " use existing assignment to "
+ << printReg(LRI->PhysReg, TRI) << '\n');
+ }
+
+ MCPhysReg PhysReg = LRI->PhysReg;
+ assert(PhysReg != 0 && "Register not assigned");
+ if (LRI->Reloaded || LRI->LiveOut) {
+ if (!MI.isImplicitDef()) {
+ MachineBasicBlock::iterator SpillBefore =
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
+ LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: "
+ << LRI->Reloaded << '\n');
+ bool Kill = LRI->LastUse == nullptr;
+ spill(SpillBefore, VirtReg, PhysReg, Kill, LRI->LiveOut);
+ LRI->LastUse = nullptr;
+ }
+ LRI->LiveOut = false;
+ LRI->Reloaded = false;
+ }
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ BundleVirtRegsMap[VirtReg] = PhysReg;
+ }
+ markRegUsedInInstr(PhysReg);
+ setPhysReg(MI, MO, PhysReg);
}
-/// Make sure VirtReg is available in a physreg and return it.
-RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
- unsigned OpNum,
- Register VirtReg,
- Register Hint) {
- assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
+/// Allocates a register for a VirtReg use.
+void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg) {
+ assert(VirtReg.isVirtual() && "Not a virtual register");
+ MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- MachineOperand &MO = MI.getOperand(OpNum);
- if (!LRI->PhysReg) {
- allocVirtReg(MI, *LRI, Hint);
- reload(MI, VirtReg, LRI->PhysReg);
- } else if (LRI->Dirty) {
- if (isLastUseOfLocalReg(MO)) {
- LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n');
- if (MO.isUse())
- MO.setIsKill();
- else
- MO.setIsDead();
- } else if (MO.isKill()) {
- LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n');
- MO.setIsKill(false);
- } else if (MO.isDead()) {
- LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n');
- MO.setIsDead(false);
+ if (New) {
+ MachineOperand &MO = MI.getOperand(OpNum);
+ if (!MO.isKill()) {
+ if (mayLiveOut(VirtReg)) {
+ LRI->LiveOut = true;
+ } else {
+ // It is a last (killing) use without the kill flag; add the flag now.
+ MO.setIsKill(true);
+ }
}
- } else if (MO.isKill()) {
- // We must remove kill flags from uses of reloaded registers because the
- // register would be killed immediately, and there might be a second use:
- // %foo = OR killed %x, %x
- // This would cause a second reload of %x into a different register.
- LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n');
- MO.setIsKill(false);
- } else if (MO.isDead()) {
- LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n');
- MO.setIsDead(false);
+ } else {
+ assert((!MO.isKill() || LRI->LastUse == &MI) && "Invalid kill flag");
}
- assert(LRI->PhysReg && "Register not assigned");
+
+ // If necessary allocate a register.
+ if (LRI->PhysReg == 0) {
+ assert(!MO.isTied() && "tied op should be allocated");
+ Register Hint;
+ if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) {
+ Hint = MI.getOperand(0).getReg();
+ assert(Hint.isPhysical() &&
+ "Copy destination should already be assigned");
+ }
+ allocVirtReg(MI, *LRI, Hint, false);
+ if (LRI->Error) {
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ setPhysReg(MI, MO, *AllocationOrder.begin());
+ return;
+ }
+ }
+
LRI->LastUse = &MI;
- LRI->LastOpNum = OpNum;
+
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ BundleVirtRegsMap[VirtReg] = LRI->PhysReg;
+ }
markRegUsedInInstr(LRI->PhysReg);
- return *LRI;
+ setPhysReg(MI, MO, LRI->PhysReg);
}
/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
/// may invalidate any operand pointers. Return true if the operand kills its
/// register.
-bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
+void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
MCPhysReg PhysReg) {
- bool Dead = MO.isDead();
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
MO.setIsRenamable(true);
- return MO.isKill() || Dead;
+ return;
}
// Handle subregister index.
- MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register());
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : MCRegister());
MO.setIsRenamable(true);
- MO.setSubReg(0);
+ // Note: We leave the subreg number around a little longer in case of defs.
+ // This is so that the register freeing logic in allocateInstruction can still
+ // recognize this as subregister defs. The code there will clear the number.
+ if (!MO.isDef())
+ MO.setSubReg(0);
// A kill flag implies killing the full register. Add corresponding super
// register kill.
if (MO.isKill()) {
MI.addRegisterKilled(PhysReg, TRI, true);
- return true;
+ return;
}
// A <def,read-undef> of a sub-register requires an implicit def of the full
// register.
- if (MO.isDef() && MO.isUndef())
- MI.addRegisterDefined(PhysReg, TRI);
-
- return Dead;
-}
-
-// Handles special instruction operand like early clobbers and tied ops when
-// there are additional physreg defines.
-void RegAllocFast::handleThroughOperands(MachineInstr &MI,
- SmallVectorImpl<Register> &VirtDead) {
- LLVM_DEBUG(dbgs() << "Scanning for through registers:");
- SmallSet<Register, 8> ThroughRegs;
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Reg.isVirtual())
- continue;
- if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
- (MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
- if (ThroughRegs.insert(Reg).second)
- LLVM_DEBUG(dbgs() << ' ' << printReg(Reg));
- }
- }
-
- // If any physreg defines collide with preallocated through registers,
- // we must spill and reallocate.
- LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef()) continue;
- Register Reg = MO.getReg();
- if (!Reg || !Reg.isPhysical())
- continue;
- markRegUsedInInstr(Reg);
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- if (ThroughRegs.count(PhysRegState[*AI]))
- definePhysReg(MI, *AI, regFree);
- }
- }
-
- SmallVector<Register, 8> PartialDefs;
- LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
- continue;
- if (MO.isUse()) {
- if (!MO.isTied()) continue;
- LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
- << ") is tied to operand " << MI.findTiedOperandIdx(I)
- << ".\n");
- LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
- MCPhysReg PhysReg = LR.PhysReg;
- setPhysReg(MI, MO, PhysReg);
- // Note: we don't update the def operand yet. That would cause the normal
- // def-scan to attempt spilling.
- } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
- LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n');
- // Reload the register, but don't assign to the operand just yet.
- // That would confuse the later phys-def processing pass.
- LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
- PartialDefs.push_back(LR.PhysReg);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n");
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
- continue;
- if (!MO.isEarlyClobber())
- continue;
- // Note: defineVirtReg may invalidate MO.
- MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0);
- if (setPhysReg(MI, MI.getOperand(I), PhysReg))
- VirtDead.push_back(Reg);
- }
-
- // Restore UsedInInstr to a state usable for allocating normal virtual uses.
- UsedInInstr.clear();
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
- Register Reg = MO.getReg();
- if (!Reg || !Reg.isPhysical())
- continue;
- LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
- << " as used in instr\n");
- markRegUsedInInstr(Reg);
+ if (MO.isDef() && MO.isUndef()) {
+ if (MO.isDead())
+ MI.addRegisterDead(PhysReg, TRI, true);
+ else
+ MI.addRegisterDefined(PhysReg, TRI);
}
-
- // Also mark PartialDefs as used to avoid reallocation.
- for (Register PartialDef : PartialDefs)
- markRegUsedInInstr(PartialDef);
}
#ifndef NDEBUG
-void RegAllocFast::dumpState() {
- for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
- if (PhysRegState[Reg] == regDisabled) continue;
- dbgs() << " " << printReg(Reg, TRI);
- switch(PhysRegState[Reg]) {
+
+void RegAllocFast::dumpState() const {
+ for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE;
+ ++Unit) {
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
case regFree:
break;
- case regReserved:
- dbgs() << "*";
+ case regPreAssigned:
+ dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
break;
+ case regLiveIn:
+ llvm_unreachable("Should not have regLiveIn in map");
default: {
- dbgs() << '=' << printReg(PhysRegState[Reg]);
- LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Missing VirtReg entry");
- if (LRI->Dirty)
- dbgs() << "*";
- assert(LRI->PhysReg == Reg && "Bad inverse map");
+ dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
+ if (I->LiveOut || I->Reloaded) {
+ dbgs() << '[';
+ if (I->LiveOut) dbgs() << 'O';
+ if (I->Reloaded) dbgs() << 'R';
+ dbgs() << ']';
+ }
+ assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
break;
}
}
}
dbgs() << '\n';
// Check that LiveVirtRegs is the inverse.
- for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
- e = LiveVirtRegs.end(); i != e; ++i) {
- if (!i->PhysReg)
- continue;
- assert(i->VirtReg.isVirtual() && "Bad map key");
- assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
- assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
+ for (const LiveReg &LR : LiveVirtRegs) {
+ Register VirtReg = LR.VirtReg;
+ assert(VirtReg.isVirtual() && "Bad map key");
+ MCPhysReg PhysReg = LR.PhysReg;
+ if (PhysReg != 0) {
+ assert(Register::isPhysicalRegister(PhysReg) &&
+ "mapped to physreg");
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ assert(RegUnitStates[*UI] == VirtReg && "inverse map valid");
+ }
+ }
}
}
#endif
-void RegAllocFast::allocateInstruction(MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // If this is a copy, we may be able to coalesce.
- Register CopySrcReg;
- Register CopyDstReg;
- unsigned CopySrcSub = 0;
- unsigned CopyDstSub = 0;
- if (MI.isCopy()) {
- CopyDstReg = MI.getOperand(0).getReg();
- CopySrcReg = MI.getOperand(1).getReg();
- CopyDstSub = MI.getOperand(0).getSubReg();
- CopySrcSub = MI.getOperand(1).getSubReg();
+/// Count number of defs consumed from each register class by \p Reg
+void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+ Register Reg) const {
+ assert(RegClassDefCounts.size() == TRI->getNumRegClasses());
+
+ if (Reg.isVirtual()) {
+ const TargetRegisterClass *OpRC = MRI->getRegClass(Reg);
+ for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
+ RCIdx != RCIdxEnd; ++RCIdx) {
+ const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
+ // FIXME: Consider aliasing sub/super registers.
+ if (OpRC->hasSubClassEq(IdxRC))
+ ++RegClassDefCounts[RCIdx];
+ }
+
+ return;
}
- // Track registers used by instruction.
- UsedInInstr.clear();
+ for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
+ RCIdx != RCIdxEnd; ++RCIdx) {
+ const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
+ for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
+ if (IdxRC->contains(*Alias)) {
+ ++RegClassDefCounts[RCIdx];
+ break;
+ }
+ }
+ }
+}
- // First scan.
- // Mark physreg uses and early clobbers as used.
- // Find the end of the virtreg operands
- unsigned VirtOpEnd = 0;
- bool hasTiedOps = false;
- bool hasEarlyClobbers = false;
- bool hasPartialRedefs = false;
- bool hasPhysDefs = false;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- // Make sure MRI knows about registers clobbered by regmasks.
- if (MO.isRegMask()) {
- MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
- continue;
+void RegAllocFast::allocateInstruction(MachineInstr &MI) {
+ // The basic algorithm here is:
+ // 1. Mark registers of def operands as free
+ // 2. Allocate registers to use operands and place reload instructions for
+ // registers displaced by the allocation.
+ //
+ // However we need to handle some corner cases:
+ // - pre-assigned defs and uses need to be handled before the other def/use
+ // operands are processed to avoid the allocation heuristics clashing with
+ // the pre-assignment.
+ // - The "free def operands" step has to come last instead of first for tied
+ // operands and early-clobbers.
+
+ UsedInInstr.clear();
+ BundleVirtRegsMap.clear();
+
+ // Scan for special cases; Apply pre-assigned register defs to state.
+ bool HasPhysRegUse = false;
+ bool HasRegMask = false;
+ bool HasVRegDef = false;
+ bool HasDef = false;
+ bool HasEarlyClobber = false;
+ bool NeedToAssignLiveThroughs = false;
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual()) {
+ if (MO.isDef()) {
+ HasDef = true;
+ HasVRegDef = true;
+ if (MO.isEarlyClobber()) {
+ HasEarlyClobber = true;
+ NeedToAssignLiveThroughs = true;
+ }
+ if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef()))
+ NeedToAssignLiveThroughs = true;
+ }
+ } else if (Reg.isPhysical()) {
+ if (!MRI->isReserved(Reg)) {
+ if (MO.isDef()) {
+ HasDef = true;
+ bool displacedAny = definePhysReg(MI, Reg);
+ if (MO.isEarlyClobber())
+ HasEarlyClobber = true;
+ if (!displacedAny)
+ MO.setIsDead(true);
+ }
+ if (MO.readsReg())
+ HasPhysRegUse = true;
+ }
+ }
+ } else if (MO.isRegMask()) {
+ HasRegMask = true;
}
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Reg) continue;
- if (Register::isVirtualRegister(Reg)) {
- VirtOpEnd = i+1;
- if (MO.isUse()) {
- hasTiedOps = hasTiedOps ||
- MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+ }
+
+ // Allocate virtreg defs.
+ if (HasDef) {
+ if (HasVRegDef) {
+ // Special handling for early clobbers, tied operands or subregister defs:
+ // Compared to "normal" defs these:
+ // - Must not use a register that is pre-assigned for a use operand.
+ // - In order to solve tricky inline assembly constraints we change the
+ // heuristic to figure out a good operand order before doing
+ // assignments.
+ if (NeedToAssignLiveThroughs) {
+ DefOperandIndexes.clear();
+ PhysRegUses.clear();
+
+ // Track number of defs which may consume a register from the class.
+ std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
+ assert(RegClassDefCounts[0] == 0);
+
+ LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (MO.readsReg()) {
+ if (Reg.isPhysical()) {
+ LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI)
+ << '\n');
+ markPhysRegUsedInInstr(Reg);
+ }
+ }
+
+ if (MO.isDef()) {
+ if (Reg.isVirtual())
+ DefOperandIndexes.push_back(I);
+
+ addRegClassDefCounts(RegClassDefCounts, Reg);
+ }
+ }
+
+ llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
+ const MachineOperand &MO0 = MI.getOperand(I0);
+ const MachineOperand &MO1 = MI.getOperand(I1);
+ Register Reg0 = MO0.getReg();
+ Register Reg1 = MO1.getReg();
+ const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0);
+ const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1);
+
+ // Identify regclass that are easy to use up completely just in this
+ // instruction.
+ unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size();
+ unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size();
+
+ bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()];
+ bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()];
+ if (SmallClass0 > SmallClass1)
+ return true;
+ if (SmallClass0 < SmallClass1)
+ return false;
+
+ // Allocate early clobbers and livethrough operands first.
+ bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() ||
+ (MO0.getSubReg() == 0 && !MO0.isUndef());
+ bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() ||
+ (MO1.getSubReg() == 0 && !MO1.isUndef());
+ if (Livethrough0 > Livethrough1)
+ return true;
+ if (Livethrough0 < Livethrough1)
+ return false;
+
+ // Tie-break rule: operand index.
+ return I0 < I1;
+ });
+
+ for (uint16_t OpIdx : DefOperandIndexes) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
+ unsigned Reg = MO.getReg();
+ if (MO.isEarlyClobber() || MO.isTied() ||
+ (MO.getSubReg() && !MO.isUndef())) {
+ defineLiveThroughVirtReg(MI, OpIdx, Reg);
+ } else {
+ defineVirtReg(MI, OpIdx, Reg);
+ }
+ }
} else {
- if (MO.isEarlyClobber())
- hasEarlyClobbers = true;
- if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
- hasPartialRedefs = true;
+ // Assign virtual register defs.
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual())
+ defineVirtReg(MI, I, Reg);
+ }
}
- continue;
}
- if (!MRI->isAllocatable(Reg)) continue;
- if (MO.isUse()) {
- usePhysReg(MO);
- } else if (MO.isEarlyClobber()) {
- definePhysReg(MI, Reg,
- (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
- hasEarlyClobbers = true;
- } else
- hasPhysDefs = true;
+
+ // Free registers occupied by defs.
+ // Iterate operands in reverse order, so we see the implicit super register
+ // defs first (we added them earlier in case of <def,read-undef>).
+ for (unsigned I = MI.getNumOperands(); I-- > 0;) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ // subreg defs don't free the full register. We left the subreg number
+ // around as a marker in setPhysReg() to recognize this case here.
+ if (MO.getSubReg() != 0) {
+ MO.setSubReg(0);
+ continue;
+ }
+
+ // Do not free tied operands and early clobbers.
+ if (MO.isTied() || MO.isEarlyClobber())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(Reg.isPhysical());
+ if (MRI->isReserved(Reg))
+ continue;
+ freePhysReg(Reg);
+ unmarkRegUsedInInstr(Reg);
+ }
}
- // The instruction may have virtual register operands that must be allocated
- // the same register at use-time and def-time: early clobbers and tied
- // operands. If there are also physical defs, these registers must avoid
- // both physical defs and uses, making them more constrained than normal
- // operands.
- // Similarly, if there are multiple defs and tied operands, we must make
- // sure the same register is allocated to uses and defs.
- // We didn't detect inline asm tied operands above, so just make this extra
- // pass for all inline asm.
- if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
- (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
- handleThroughOperands(MI, VirtDead);
- // Don't attempt coalescing when we have funny stuff going on.
- CopyDstReg = Register();
- // Pretend we have early clobbers so the use operands get marked below.
- // This is not necessary for the common case of a single tied use.
- hasEarlyClobbers = true;
+ // Displace clobbered registers.
+ if (HasRegMask) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ // MRI bookkeeping.
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
+ // Displace clobbered registers.
+ const uint32_t *Mask = MO.getRegMask();
+ for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(),
+ LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) {
+ MCPhysReg PhysReg = LRI->PhysReg;
+ if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg))
+ displacePhysReg(MI, PhysReg);
+ }
+ }
+ }
}
- // Second scan.
- // Allocate virtreg uses.
+ // Apply pre-assigned register uses to state.
+ if (HasPhysRegUse) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical())
+ continue;
+ if (MRI->isReserved(Reg))
+ continue;
+ bool displacedAny = usePhysReg(MI, Reg);
+ if (!displacedAny && !MRI->isReserved(Reg))
+ MO.setIsKill(true);
+ }
+ }
+
+ // Allocate virtreg uses and insert reloads as necessary.
bool HasUndefUse = false;
- for (unsigned I = 0; I != VirtOpEnd; ++I) {
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
+ if (!MO.isReg() || !MO.isUse())
+ continue;
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
- if (MO.isUse()) {
- if (MO.isUndef()) {
- HasUndefUse = true;
- // There is no need to allocate a register for an undef use.
- continue;
- }
- // Populate MayLiveAcrossBlocks in case the use block is allocated before
- // the def block (removing the vreg uses).
- mayLiveIn(Reg);
-
- LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
- MCPhysReg PhysReg = LR.PhysReg;
- CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
- if (setPhysReg(MI, MO, PhysReg))
- killVirtReg(LR);
+ if (MO.isUndef()) {
+ HasUndefUse = true;
+ continue;
}
+
+
+ // Populate MayLiveAcrossBlocks in case the use block is allocated before
+ // the def block (removing the vreg uses).
+ mayLiveIn(Reg);
+
+
+ assert(!MO.isInternalRead() && "Bundles not supported");
+ assert(MO.readsReg() && "reading use");
+ useVirtReg(MI, I, Reg);
}
// Allocate undef operands. This is a separate step because in a situation
@@ -1133,76 +1321,40 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
}
- // Track registers defined by instruction - early clobbers and tied uses at
- // this point.
- UsedInInstr.clear();
- if (hasEarlyClobbers) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Reg || !Reg.isPhysical())
+ // Free early clobbers.
+ if (HasEarlyClobber) {
+ for (unsigned I = MI.getNumOperands(); I-- > 0; ) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
continue;
- // Look for physreg defs and tied uses.
- if (!MO.isDef() && !MO.isTied()) continue;
- markRegUsedInInstr(Reg);
- }
- }
-
- unsigned DefOpEnd = MI.getNumOperands();
- if (MI.isCall()) {
- // Spill all virtregs before a call. This serves one purpose: If an
- // exception is thrown, the landing pad is going to expect to find
- // registers in their spill slots.
- // Note: although this is appealing to just consider all definitions
- // as call-clobbered, this is not correct because some of those
- // definitions may be used later on and we do not want to reuse
- // those for virtual registers in between.
- LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
- spillAll(MI, /*OnlyLiveOut*/ false);
- }
-
- // Third scan.
- // Mark all physreg defs as used before allocating virtreg defs.
- for (unsigned I = 0; I != DefOpEnd; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
- continue;
- Register Reg = MO.getReg();
-
- if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg))
- continue;
- definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
- }
+ // subreg defs don't free the full register. We left the subreg number
+ // around as a marker in setPhysReg() to recognize this case here.
+ if (MO.getSubReg() != 0) {
+ MO.setSubReg(0);
+ continue;
+ }
- // Fourth scan.
- // Allocate defs and collect dead defs.
- for (unsigned I = 0; I != DefOpEnd; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
- continue;
- Register Reg = MO.getReg();
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(Reg.isPhysical() && "should have register assigned");
+
+ // We sometimes get odd situations like:
+ // early-clobber %x0 = INSTRUCTION %x0
+ // which is semantically questionable as the early-clobber should
+ // apply before the use. But in practice we consider the use to
+ // happen before the early clobber now. Don't free the early clobber
+ // register in this case.
+ if (MI.readsRegister(Reg, TRI))
+ continue;
- // We have already dealt with phys regs in the previous scan.
- if (Reg.isPhysical())
- continue;
- MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
- if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
- VirtDead.push_back(Reg);
- CopyDstReg = Register(); // cancel coalescing;
- } else
- CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
+ freePhysReg(Reg);
+ }
}
- // Kill dead defs after the scan to ensure that multiple defs of the same
- // register are allocated identically. We didn't need to do this for uses
- // because we are crerating our own kill flags, and they are always at the
- // last use.
- for (Register VirtReg : VirtDead)
- killVirtReg(VirtReg);
- VirtDead.clear();
-
LLVM_DEBUG(dbgs() << "<< " << MI);
- if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
+ if (MI.isCopy() && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+ MI.getNumOperands() == 2) {
LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n");
Coalesced.push_back(&MI);
}
@@ -1219,23 +1371,22 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
if (!Register::isVirtualRegister(Reg))
return;
+ // Already spilled to a stackslot?
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS != -1) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ updateDbgValueForSpill(MI, SS);
+ LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI);
+ return;
+ }
+
// See if this virtual register has already been allocated to a physical
// register or spilled to a stack slot.
LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
setPhysReg(MI, MO, LRI->PhysReg);
} else {
- int SS = StackSlotForVirtReg[Reg];
- if (SS != -1) {
- // Modify DBG_VALUE now that the value is in a spill slot.
- updateDbgValueForSpill(MI, SS);
- LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI);
- return;
- }
-
- // We can't allocate a physreg for a DebugValue, sorry!
- LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
- MO.setReg(Register());
+ DanglingDbgValues[Reg].push_back(&MI);
}
// If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
@@ -1243,25 +1394,46 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
LiveDbgValueMap[Reg].push_back(&MI);
}
+void RegAllocFast::handleBundle(MachineInstr &MI) {
+ MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
+ ++BundledMI;
+ while (BundledMI->isBundledWithPred()) {
+ for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) {
+ MachineOperand &MO = BundledMI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+
+ DenseMap<Register, MCPhysReg>::iterator DI;
+ DI = BundleVirtRegsMap.find(Reg);
+ assert(DI != BundleVirtRegsMap.end() && "Unassigned virtual register");
+
+ setPhysReg(MI, MO, DI->second);
+ }
+
+ ++BundledMI;
+ }
+}
+
void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
- PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+ RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
- MachineBasicBlock::iterator MII = MBB.begin();
-
- // Add live-in registers as live.
- for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins())
- if (MRI->isAllocatable(LI.PhysReg))
- definePhysReg(MII, LI.PhysReg, regReserved);
+ for (MachineBasicBlock *Succ : MBB.successors()) {
+ for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins())
+ setPhysRegState(LI.PhysReg, regPreAssigned);
+ }
- VirtDead.clear();
Coalesced.clear();
- // Otherwise, sequentially allocate each instruction in the MBB.
- for (MachineInstr &MI : MBB) {
+ // Traverse block in reverse order allocating instructions one by one.
+ for (MachineInstr &MI : reverse(MBB)) {
LLVM_DEBUG(
dbgs() << "\n>> " << MI << "Regs:";
dumpState()
@@ -1275,11 +1447,22 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
}
allocateInstruction(MI);
+
+ // Once BUNDLE header is assigned registers, same assignments need to be
+ // done for bundled MIs.
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ handleBundle(MI);
+ }
}
+ LLVM_DEBUG(
+ dbgs() << "Begin Regs:";
+ dumpState()
+ );
+
// Spill all physical registers holding virtual registers now.
- LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
- spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true);
+ LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n");
+ reloadAtBegin(MBB);
// Erase all the coalesced copies. We are delaying it until now because
// LiveVirtRegs might refer to the instrs.
@@ -1287,6 +1470,20 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
MBB.erase(MI);
NumCoalesced += Coalesced.size();
+ for (auto &UDBGPair : DanglingDbgValues) {
+ for (MachineInstr *DbgValue : UDBGPair.second) {
+ assert(DbgValue->isDebugValue() && "expected DBG_VALUE");
+ MachineOperand &MO = DbgValue->getOperand(0);
+ // Nothing to do if the vreg was spilled in the meantime.
+ if (!MO.isReg())
+ continue;
+ LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
+ << '\n');
+ MO.setReg(0);
+ }
+ }
+ DanglingDbgValues.clear();
+
LLVM_DEBUG(MBB.dump());
}
@@ -1300,8 +1497,11 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
MFI = &MF.getFrameInfo();
MRI->freezeReservedRegs(MF);
RegClassInfo.runOnMachineFunction(MF);
+ unsigned NumRegUnits = TRI->getNumRegUnits();
UsedInInstr.clear();
- UsedInInstr.setUniverse(TRI->getNumRegUnits());
+ UsedInInstr.setUniverse(NumRegUnits);
+ PhysRegUses.clear();
+ PhysRegUses.setUniverse(NumRegUnits);
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 41cf00261265..166414e4ffa1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -147,7 +147,7 @@ class RAGreedy : public MachineFunctionPass,
// Convenient shortcuts.
using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
- using SmallVirtRegSet = SmallSet<unsigned, 16>;
+ using SmallVirtRegSet = SmallSet<Register, 16>;
// context
MachineFunction *MF;
@@ -172,6 +172,7 @@ class RAGreedy : public MachineFunctionPass,
std::unique_ptr<Spiller> SpillerInstance;
PQueue Queue;
unsigned NextCascade;
+ std::unique_ptr<VirtRegAuxInfo> VRAI;
// Live ranges pass through a number of stages as we try to allocate them.
// Some of the stages may also create new live ranges:
@@ -247,19 +248,19 @@ class RAGreedy : public MachineFunctionPass,
IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
LiveRangeStage getStage(const LiveInterval &VirtReg) const {
- return ExtraRegInfo[VirtReg.reg].Stage;
+ return ExtraRegInfo[VirtReg.reg()].Stage;
}
void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
ExtraRegInfo.resize(MRI->getNumVirtRegs());
- ExtraRegInfo[VirtReg.reg].Stage = Stage;
+ ExtraRegInfo[VirtReg.reg()].Stage = Stage;
}
template<typename Iterator>
void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
ExtraRegInfo.resize(MRI->getNumVirtRegs());
for (;Begin != End; ++Begin) {
- unsigned Reg = *Begin;
+ Register Reg = *Begin;
if (ExtraRegInfo[Reg].Stage == RS_New)
ExtraRegInfo[Reg].Stage = NewStage;
}
@@ -290,8 +291,8 @@ class RAGreedy : public MachineFunctionPass,
public:
using EvictorInfo =
- std::pair<unsigned /* evictor */, unsigned /* physreg */>;
- using EvicteeInfo = llvm::DenseMap<unsigned /* evictee */, EvictorInfo>;
+ std::pair<Register /* evictor */, MCRegister /* physreg */>;
+ using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>;
private:
/// Each Vreg that has been evicted in the last stage of selectOrSplit will
@@ -307,14 +308,14 @@ class RAGreedy : public MachineFunctionPass,
/// longer relevant.
/// \param Evictee The evictee Vreg for whom we want to clear collected
/// eviction info.
- void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); }
+ void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); }
/// Track new eviction.
/// The Evictor vreg has evicted the Evictee vreg from Physreg.
/// \param PhysReg The physical register Evictee was evicted from.
/// \param Evictor The evictor Vreg that evicted Evictee.
/// \param Evictee The evictee Vreg.
- void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {
+ void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) {
Evictees[Evictee].first = Evictor;
Evictees[Evictee].second = PhysReg;
}
@@ -323,7 +324,7 @@ class RAGreedy : public MachineFunctionPass,
/// \param Evictee The evictee vreg.
/// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
/// nobody has evicted Evictee from PhysReg.
- EvictorInfo getEvictor(unsigned Evictee) {
+ EvictorInfo getEvictor(Register Evictee) {
if (Evictees.count(Evictee)) {
return Evictees[Evictee];
}
@@ -348,7 +349,7 @@ class RAGreedy : public MachineFunctionPass,
/// Global live range splitting candidate info.
struct GlobalSplitCandidate {
// Register intended for assignment, or 0.
- unsigned PhysReg;
+ MCRegister PhysReg;
// SplitKit interval index for this candidate.
unsigned IntvIdx;
@@ -360,7 +361,7 @@ class RAGreedy : public MachineFunctionPass,
BitVector LiveBundles;
SmallVector<unsigned, 8> ActiveBlocks;
- void reset(InterferenceCache &Cache, unsigned Reg) {
+ void reset(InterferenceCache &Cache, MCRegister Reg) {
PhysReg = Reg;
IntvIdx = 0;
Intf.setPhysReg(Cache, Reg);
@@ -368,12 +369,12 @@ class RAGreedy : public MachineFunctionPass,
ActiveBlocks.clear();
}
- // Set B[i] = C for every live bundle where B[i] was NoCand.
+ // Set B[I] = C for every live bundle where B[I] was NoCand.
unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
unsigned Count = 0;
- for (unsigned i : LiveBundles.set_bits())
- if (B[i] == NoCand) {
- B[i] = C;
+ for (unsigned I : LiveBundles.set_bits())
+ if (B[I] == NoCand) {
+ B[I] = C;
Count++;
}
return Count;
@@ -417,7 +418,8 @@ public:
Spiller &spiller() override { return *SpillerInstance; }
void enqueue(LiveInterval *LI) override;
LiveInterval *dequeue() override;
- Register selectOrSplit(LiveInterval&, SmallVectorImpl<Register>&) override;
+ MCRegister selectOrSplit(LiveInterval &,
+ SmallVectorImpl<Register> &) override;
void aboutToRemoveInterval(LiveInterval &) override;
/// Perform register allocation.
@@ -428,15 +430,20 @@ public:
MachineFunctionProperties::Property::NoPHIs);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
static char ID;
private:
- Register selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned = 0);
+ MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
+ SmallVirtRegSet &, unsigned = 0);
- bool LRE_CanEraseVirtReg(unsigned) override;
- void LRE_WillShrinkVirtReg(unsigned) override;
- void LRE_DidCloneVirtReg(unsigned, unsigned) override;
+ bool LRE_CanEraseVirtReg(Register) override;
+ void LRE_WillShrinkVirtReg(Register) override;
+ void LRE_DidCloneVirtReg(Register, Register) override;
void enqueue(PQueue &CurQueue, LiveInterval *LI);
LiveInterval *dequeue(PQueue &CurQueue);
@@ -444,7 +451,7 @@ private:
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
bool growRegion(GlobalSplitCandidate &Cand);
- bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
+ bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order);
bool splitCanCauseLocalSpill(unsigned VirtRegToSplit,
@@ -455,20 +462,20 @@ private:
bool *CanCauseEvictionChain);
bool calcCompactRegion(GlobalSplitCandidate&);
void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
- void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+ void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
Register canReassign(LiveInterval &VirtReg, Register PrevReg);
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
- bool canEvictInterference(LiveInterval&, Register, bool, EvictionCost&,
- const SmallVirtRegSet&);
- bool canEvictInterferenceInRange(LiveInterval &VirtReg, Register oPhysReg,
+ bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
+ const SmallVirtRegSet &);
+ bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg,
SlotIndex Start, SlotIndex End,
EvictionCost &MaxCost);
- unsigned getCheapestEvicteeWeight(const AllocationOrder &Order,
- LiveInterval &VirtReg, SlotIndex Start,
- SlotIndex End, float *BestEvictWeight);
- void evictInterference(LiveInterval&, Register,
- SmallVectorImpl<Register>&);
- bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
+ MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
+ LiveInterval &VirtReg, SlotIndex Start,
+ SlotIndex End, float *BestEvictWeight);
+ void evictInterference(LiveInterval &, MCRegister,
+ SmallVectorImpl<Register> &);
+ bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
SmallLISet &RecoloringCandidates,
const SmallVirtRegSet &FixedRegisters);
@@ -478,8 +485,8 @@ private:
unsigned tryEvict(LiveInterval&, AllocationOrder&,
SmallVectorImpl<Register>&, unsigned,
const SmallVirtRegSet&);
- unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<Register>&);
+ MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
/// Calculate cost of region splitting.
unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
AllocationOrder &Order,
@@ -492,9 +499,10 @@ private:
SmallVectorImpl<Register> &NewVRegs);
/// Check other options before using a callee-saved register for the first
/// time.
- unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
- Register PhysReg, unsigned &CostPerUseLimit,
- SmallVectorImpl<Register> &NewVRegs);
+ MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg,
+ AllocationOrder &Order, MCRegister PhysReg,
+ unsigned &CostPerUseLimit,
+ SmallVectorImpl<Register> &NewVRegs);
void initializeCSRCost();
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<Register>&);
@@ -528,8 +536,8 @@ private:
};
using HintsInfo = SmallVector<HintInfo, 4>;
- BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
- void collectHintInfo(unsigned, HintsInfo &);
+ BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister);
+ void collectHintInfo(Register, HintsInfo &);
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
@@ -626,7 +634,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
// LiveRangeEdit delegate methods
//===----------------------------------------------------------------------===//
-bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
LiveInterval &LI = LIS->getInterval(VirtReg);
if (VRM->hasPhys(VirtReg)) {
Matrix->unassign(LI);
@@ -641,7 +649,7 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
return false;
}
-void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
if (!VRM->hasPhys(VirtReg))
return;
@@ -651,7 +659,7 @@ void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
enqueue(&LI);
}
-void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
// Cloning a register we haven't even heard about yet? Just ignore it.
if (!ExtraRegInfo.inBounds(Old))
return;
@@ -677,9 +685,8 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Prioritize live ranges by size, assigning larger ranges first.
// The queue holds (size, reg) pairs.
const unsigned Size = LI->getSize();
- const unsigned Reg = LI->reg;
- assert(Register::isVirtualRegister(Reg) &&
- "Can only enqueue virtual registers");
+ const Register Reg = LI->reg();
+ assert(Reg.isVirtual() && "Can only enqueue virtual registers");
unsigned Prio;
ExtraRegInfo.grow(Reg);
@@ -756,26 +763,33 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
const SmallVirtRegSet &FixedRegisters) {
- Order.rewind();
Register PhysReg;
- while ((PhysReg = Order.next()))
- if (!Matrix->checkInterference(VirtReg, PhysReg))
- break;
- if (!PhysReg || Order.isHint())
+ for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
+ assert(*I);
+ if (!Matrix->checkInterference(VirtReg, *I)) {
+ if (I.isHint())
+ return *I;
+ else
+ PhysReg = *I;
+ }
+ }
+ if (!PhysReg.isValid())
return PhysReg;
// PhysReg is available, but there may be a better choice.
// If we missed a simple hint, try to cheaply evict interference from the
// preferred register.
- if (Register Hint = MRI->getSimpleHint(VirtReg.reg))
+ if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
if (Order.isHint(Hint)) {
- LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
+ MCRegister PhysHint = Hint.asMCReg();
+ LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
EvictionCost MaxCost;
MaxCost.setBrokenHints(1);
- if (canEvictInterference(VirtReg, Hint, true, MaxCost, FixedRegisters)) {
- evictInterference(VirtReg, Hint, NewVRegs);
- return Hint;
+ if (canEvictInterference(VirtReg, PhysHint, true, MaxCost,
+ FixedRegisters)) {
+ evictInterference(VirtReg, PhysHint, NewVRegs);
+ return PhysHint;
}
// Record the missed hint, we may be able to recover
// at the end if the surrounding allocation changed.
@@ -800,13 +814,14 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg,
//===----------------------------------------------------------------------===//
Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) {
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- Register PhysReg;
- while ((PhysReg = Order.next())) {
- if (PhysReg == PrevReg)
+ auto Order =
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
+ MCRegister PhysReg;
+ for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
+ if ((*I).id() == PrevReg.id())
continue;
- MCRegUnitIterator Units(PhysReg, TRI);
+ MCRegUnitIterator Units(*I, TRI);
for (; Units.isValid(); ++Units) {
// Instantiate a "subquery", not to be confused with the Queries array.
LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
@@ -815,7 +830,7 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) {
}
// If no units have interference, break out with the current PhysReg.
if (!Units.isValid())
- break;
+ PhysReg = *I;
}
if (PhysReg)
LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
@@ -846,8 +861,8 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
if (CanSplit && IsHint && !BreaksHint)
return true;
- if (A.weight > B.weight) {
- LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
+ if (A.weight() > B.weight()) {
+ LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n');
return true;
}
return false;
@@ -862,7 +877,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
bool IsHint, EvictionCost &MaxCost,
const SmallVirtRegSet &FixedRegisters) {
// It is only possible to evict virtual register interference.
@@ -878,7 +893,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
//
// This works out so a register without a cascade number is allowed to evict
// anything, and it can be evicted by anything.
- unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
if (!Cascade)
Cascade = NextCascade;
@@ -890,15 +905,14 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
return false;
// Check if any interfering live range is heavier than MaxWeight.
- for (unsigned i = Q.interferingVRegs().size(); i; --i) {
- LiveInterval *Intf = Q.interferingVRegs()[i - 1];
- assert(Register::isVirtualRegister(Intf->reg) &&
+ for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
+ assert(Register::isVirtualRegister(Intf->reg()) &&
"Only expecting virtual register interference from query");
// Do not allow eviction of a virtual register if we are in the middle
// of last-chance recoloring and this virtual register is one that we
// have scavenged a physical register for.
- if (FixedRegisters.count(Intf->reg))
+ if (FixedRegisters.count(Intf->reg()))
return false;
// Never evict spill products. They cannot split or spill.
@@ -910,12 +924,14 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
//
// Also allow urgent evictions of unspillable ranges from a strictly
// larger allocation order.
- bool Urgent = !VirtReg.isSpillable() &&
- (Intf->isSpillable() ||
- RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
- RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
+ bool Urgent =
+ !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
+ RegClassInfo.getNumAllocatableRegs(
+ MRI->getRegClass(Intf->reg())));
// Only evict older cascades or live ranges without a cascade.
- unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+ unsigned IntfCascade = ExtraRegInfo[Intf->reg()].Cascade;
if (Cascade <= IntfCascade) {
if (!Urgent)
return false;
@@ -924,10 +940,10 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
Cost.BrokenHints += 10;
}
// Would this break a satisfied hint?
- bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
// Update eviction cost.
Cost.BrokenHints += BreaksHint;
- Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
// Abort if this would be too expensive.
if (!(Cost < MaxCost))
return false;
@@ -960,7 +976,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
/// when returning true.
/// \return True when interference can be evicted cheaper than MaxCost.
bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
- Register PhysReg, SlotIndex Start,
+ MCRegister PhysReg, SlotIndex Start,
SlotIndex End,
EvictionCost &MaxCost) {
EvictionCost Cost;
@@ -969,25 +985,23 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// Check if any interfering live range is heavier than MaxWeight.
- for (unsigned i = Q.interferingVRegs().size(); i; --i) {
- LiveInterval *Intf = Q.interferingVRegs()[i - 1];
-
+ for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
// Check if interference overlast the segment in interest.
if (!Intf->overlaps(Start, End))
continue;
// Cannot evict non virtual reg interference.
- if (!Register::isVirtualRegister(Intf->reg))
+ if (!Register::isVirtualRegister(Intf->reg()))
return false;
// Never evict spill products. They cannot split or spill.
if (getStage(*Intf) == RS_Done)
return false;
// Would this break a satisfied hint?
- bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
// Update eviction cost.
Cost.BrokenHints += BreaksHint;
- Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
// Abort if this would be too expensive.
if (!(Cost < MaxCost))
return false;
@@ -1012,17 +1026,17 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
/// \param BestEvictweight The eviction cost of that eviction
/// \return The PhysReg which is the best candidate for eviction and the
/// eviction cost in BestEvictweight
-unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
- LiveInterval &VirtReg,
- SlotIndex Start, SlotIndex End,
- float *BestEvictweight) {
+MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
+ LiveInterval &VirtReg,
+ SlotIndex Start, SlotIndex End,
+ float *BestEvictweight) {
EvictionCost BestEvictCost;
BestEvictCost.setMax();
- BestEvictCost.MaxWeight = VirtReg.weight;
- unsigned BestEvicteePhys = 0;
+ BestEvictCost.MaxWeight = VirtReg.weight();
+ MCRegister BestEvicteePhys;
// Go over all physical registers and find the best candidate for eviction
- for (auto PhysReg : Order.getOrder()) {
+ for (MCRegister PhysReg : Order.getOrder()) {
if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
BestEvictCost))
@@ -1038,14 +1052,14 @@ unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
/// evictInterference - Evict any interferring registers that prevent VirtReg
/// from being assigned to Physreg. This assumes that canEvictInterference
/// returned true.
-void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg,
+void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
SmallVectorImpl<Register> &NewVRegs) {
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
// evicted by a newer cascade, preventing infinite loops.
- unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
if (!Cascade)
- Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+ Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++;
LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
@@ -1064,21 +1078,20 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg,
}
// Evict them second. This will invalidate the queries.
- for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
- LiveInterval *Intf = Intfs[i];
+ for (LiveInterval *Intf : Intfs) {
// The same VirtReg may be present in multiple RegUnits. Skip duplicates.
- if (!VRM->hasPhys(Intf->reg))
+ if (!VRM->hasPhys(Intf->reg()))
continue;
- LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg);
+ LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
Matrix->unassign(*Intf);
- assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+ assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
"Cannot decrease cascade number, illegal eviction");
- ExtraRegInfo[Intf->reg].Cascade = Cascade;
+ ExtraRegInfo[Intf->reg()].Cascade = Cascade;
++NumEvicted;
- NewVRegs.push_back(Intf->reg);
+ NewVRegs.push_back(Intf->reg());
}
}
@@ -1107,17 +1120,17 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
// Keep track of the cheapest interference seen so far.
EvictionCost BestCost;
BestCost.setMax();
- unsigned BestPhys = 0;
+ MCRegister BestPhys;
unsigned OrderLimit = Order.getOrder().size();
// When we are just looking for a reduced cost per use, don't break any
// hints, and only evict smaller spill weights.
if (CostPerUseLimit < ~0u) {
BestCost.BrokenHints = 0;
- BestCost.MaxWeight = VirtReg.weight;
+ BestCost.MaxWeight = VirtReg.weight();
// Check of any registers in RC are below CostPerUseLimit.
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
unsigned MinCost = RegClassInfo.getMinCost(RC);
if (MinCost >= CostPerUseLimit) {
LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
@@ -1134,8 +1147,10 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
}
}
- Order.rewind();
- while (MCRegister PhysReg = Order.next(OrderLimit)) {
+ for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
+ ++I) {
+ MCRegister PhysReg = *I;
+ assert(PhysReg);
if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
@@ -1156,7 +1171,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
BestPhys = PhysReg;
// Stop if the hint can be used.
- if (Order.isHint())
+ if (I.isHint())
break;
}
@@ -1183,9 +1198,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
// Reset interference dependent info.
SplitConstraints.resize(UseBlocks.size());
BlockFrequency StaticCost = 0;
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
- SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+ for (unsigned I = 0; I != UseBlocks.size(); ++I) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
BC.Number = BI.MBB->getNumber();
Intf.moveToBlock(BC.Number);
@@ -1256,8 +1271,7 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
unsigned TBS[GroupSize];
unsigned B = 0, T = 0;
- for (unsigned i = 0; i != Blocks.size(); ++i) {
- unsigned Number = Blocks[i];
+ for (unsigned Number : Blocks) {
Intf.moveToBlock(Number);
if (!Intf.hasInterference()) {
@@ -1314,8 +1328,7 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
while (true) {
ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
// Find new through blocks in the periphery of PrefRegBundles.
- for (int i = 0, e = NewBundles.size(); i != e; ++i) {
- unsigned Bundle = NewBundles[i];
+ for (unsigned Bundle : NewBundles) {
// Look at all blocks connected to Bundle in the full graph.
ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
@@ -1367,7 +1380,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
return false;
// Compact regions don't correspond to any physreg.
- Cand.reset(IntfCache, 0);
+ Cand.reset(IntfCache, MCRegister::NoRegister);
LLVM_DEBUG(dbgs() << "Compact region bundles");
@@ -1395,8 +1408,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
}
LLVM_DEBUG({
- for (int i : Cand.LiveBundles.set_bits())
- dbgs() << " EB#" << i;
+ for (int I : Cand.LiveBundles.set_bits())
+ dbgs() << " EB#" << I;
dbgs() << ".\n";
});
return true;
@@ -1407,8 +1420,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
BlockFrequency RAGreedy::calcSpillCost() {
BlockFrequency Cost = 0;
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
unsigned Number = BI.MBB->getNumber();
// We normally only need one spill instruction - a load or a store.
Cost += SpillPlacer->getBlockFrequency(Number);
@@ -1473,20 +1485,20 @@ BlockFrequency RAGreedy::calcSpillCost() {
/// artifact of Evictee.
/// \return True if splitting Evictee may cause a bad eviction chain, false
/// otherwise.
-bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,
+bool RAGreedy::splitCanCauseEvictionChain(Register Evictee,
GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order) {
EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
unsigned Evictor = VregEvictorInfo.first;
- unsigned PhysReg = VregEvictorInfo.second;
+ MCRegister PhysReg = VregEvictorInfo.second;
// No actual evictor.
if (!Evictor || !PhysReg)
return false;
float MaxWeight = 0;
- unsigned FutureEvictedPhysReg =
+ MCRegister FutureEvictedPhysReg =
getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
@@ -1511,10 +1523,9 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,
// Now, check to see if the local interval we will create is going to be
// expensive enough to evict somebody If so, this may cause a bad eviction
// chain.
- VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI);
float splitArtifactWeight =
- VRAI.futureWeight(LIS->getInterval(Evictee),
- Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
+ VRAI->futureWeight(LIS->getInterval(Evictee),
+ Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
return false;
@@ -1548,16 +1559,15 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
// Check if the local interval will evict a cheaper interval.
float CheapestEvictWeight = 0;
- unsigned FutureEvictedPhysReg = getCheapestEvicteeWeight(
+ MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight(
Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(),
Cand.Intf.last(), &CheapestEvictWeight);
// Have we found an interval that can be evicted?
if (FutureEvictedPhysReg) {
- VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI);
float splitArtifactWeight =
- VRAI.futureWeight(LIS->getInterval(VirtRegToSplit),
- Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
+ VRAI->futureWeight(LIS->getInterval(VirtRegToSplit),
+ Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
// Will the weight of the local interval be higher than the cheapest evictee
// weight? If so it will evict it and will not cause a spill.
if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight)
@@ -1578,11 +1588,11 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
bool *CanCauseEvictionChain) {
BlockFrequency GlobalCost = 0;
const BitVector &LiveBundles = Cand.LiveBundles;
- unsigned VirtRegToSplit = SA->getParent().reg;
+ Register VirtRegToSplit = SA->getParent().reg();
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
- SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+ for (unsigned I = 0; I != UseBlocks.size(); ++I) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
unsigned Ins = 0;
@@ -1620,8 +1630,7 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
}
- for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
- unsigned Number = Cand.ActiveBlocks[i];
+ for (unsigned Number : Cand.ActiveBlocks) {
bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
if (!RegIn && !RegOut)
@@ -1679,13 +1688,12 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// Isolate even single instructions when dealing with a proper sub-class.
// That guarantees register class inflation for the stack interval because it
// is all copies.
- unsigned Reg = SA->getParent().reg;
+ Register Reg = SA->getParent().reg();
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
// First handle all the blocks with uses.
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
unsigned Number = BI.MBB->getNumber();
unsigned IntvIn = 0, IntvOut = 0;
SlotIndex IntfIn, IntfOut;
@@ -1730,8 +1738,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
BitVector Todo = SA->getThroughBlocks();
for (unsigned c = 0; c != UsedCands.size(); ++c) {
ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- unsigned Number = Blocks[i];
+ for (unsigned Number : Blocks) {
if (!Todo.test(Number))
continue;
Todo.reset(Number);
@@ -1774,8 +1781,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// - Candidate intervals can be assigned to Cand.PhysReg.
// - Block-local splits are candidates for local splitting.
// - DCE leftovers should go back on the queue.
- for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
- LiveInterval &Reg = LIS->getInterval(LREdit.get(i));
+ for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
+ LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
// Ignore old intervals from DCE.
if (getStage(Reg) != RS_New)
@@ -1783,14 +1790,14 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// Remainder interval. Don't try splitting again, spill if it doesn't
// allocate.
- if (IntvMap[i] == 0) {
+ if (IntvMap[I] == 0) {
setStage(Reg, RS_Spill);
continue;
}
// Global intervals. Allow repeated splitting as long as the number of live
// blocks is strictly decreasing.
- if (IntvMap[i] < NumGlobalIntvs) {
+ if (IntvMap[I] < NumGlobalIntvs) {
if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
<< " blocks as original.\n");
@@ -1808,10 +1815,11 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
MF->verify(this, "After splitting live range around region");
}
-unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<Register> &NewVRegs) {
+MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs) {
if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
- return 0;
+ return MCRegister::NoRegister;
unsigned NumCands = 0;
BlockFrequency SpillCost = calcSpillCost();
BlockFrequency BestCost;
@@ -1841,12 +1849,12 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// current max frequency.
if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
CanCauseEvictionChain) {
- return 0;
+ return MCRegister::NoRegister;
}
// No solutions found, fall back to single block splitting.
if (!HasCompact && BestCand == NoCand)
- return 0;
+ return MCRegister::NoRegister;
return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
}
@@ -1857,8 +1865,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
unsigned &NumCands, bool IgnoreCSR,
bool *CanCauseEvictionChain) {
unsigned BestCand = NoCand;
- Order.rewind();
- while (unsigned PhysReg = Order.next()) {
+ for (MCPhysReg PhysReg : Order) {
+ assert(PhysReg);
if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
continue;
@@ -1867,12 +1875,12 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
if (NumCands == IntfCache.getMaxCursors()) {
unsigned WorstCount = ~0u;
unsigned Worst = 0;
- for (unsigned i = 0; i != NumCands; ++i) {
- if (i == BestCand || !GlobalCand[i].PhysReg)
+ for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
+ if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
continue;
- unsigned Count = GlobalCand[i].LiveBundles.count();
+ unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
if (Count < WorstCount) {
- Worst = i;
+ Worst = CandIndex;
WorstCount = Count;
}
}
@@ -1923,8 +1931,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
LLVM_DEBUG({
dbgs() << ", total = ";
MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
- for (int i : Cand.LiveBundles.set_bits())
- dbgs() << " EB#" << i;
+ for (int I : Cand.LiveBundles.set_bits())
+ dbgs() << " EB#" << I;
dbgs() << ".\n";
});
if (Cost < BestCost) {
@@ -1942,7 +1950,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
// See splitCanCauseEvictionChain for detailed description of bad
// eviction chain scenarios.
LLVM_DEBUG(dbgs() << "Best split candidate of vreg "
- << printReg(VirtReg.reg, TRI) << " may ");
+ << printReg(VirtReg.reg(), TRI) << " may ");
if (!(*CanCauseEvictionChain))
LLVM_DEBUG(dbgs() << "not ");
LLVM_DEBUG(dbgs() << "cause bad eviction chain\n");
@@ -2001,13 +2009,12 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
- Register Reg = VirtReg.reg;
+ Register Reg = VirtReg.reg();
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
SE->splitSingleBlock(BI);
}
@@ -2026,9 +2033,9 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Sort out the new intervals created by splitting. The remainder interval
// goes straight to spilling, the new local ranges get to stay RS_New.
- for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
- LiveInterval &LI = LIS->getInterval(LREdit.get(i));
- if (getStage(LI) == RS_New && IntvMap[i] == 0)
+ for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
+ LiveInterval &LI = LIS->getInterval(LREdit.get(I));
+ if (getStage(LI) == RS_New && IntvMap[I] == 0)
setStage(LI, RS_Spill);
}
@@ -2044,7 +2051,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// Get the number of allocatable registers that match the constraints of \p Reg
/// on \p MI and that are also in \p SuperRC.
static unsigned getNumAllocatableRegsForConstraints(
- const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC,
+ const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
const RegisterClassInfo &RCI) {
assert(SuperRC && "Invalid register class");
@@ -2067,7 +2074,7 @@ static unsigned getNumAllocatableRegsForConstraints(
unsigned
RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
- const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
// There is no point to this if there are no larger sub-classes.
if (!RegClassInfo.isProperSubClass(CurRC))
return 0;
@@ -2091,18 +2098,18 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// the constraints on the virtual register.
// Otherwise, splitting just inserts uncoalescable copies that do not help
// the allocation.
- for (unsigned i = 0; i != Uses.size(); ++i) {
- if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
+ for (const auto &Use : Uses) {
+ if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
if (MI->isFullCopy() ||
SuperRCNumAllocatableRegs ==
- getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,
- TRI, RCI)) {
- LLVM_DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
+ getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
+ TII, TRI, RCI)) {
+ LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
continue;
}
SE->openIntv();
- SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
- SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]);
+ SlotIndex SegStart = SE->enterIntvBefore(Use);
+ SlotIndex SegStop = SE->leaveIntvAfter(Use);
SE->useIntv(SegStart, SegStop);
}
@@ -2113,7 +2120,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
- DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
+ DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
ExtraRegInfo.resize(MRI->getNumVirtRegs());
// Assign all new registers to RS_Spill. This was the last chance.
@@ -2128,9 +2135,9 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
/// in order to use PhysReg between two entries in SA->UseSlots.
///
-/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
///
-void RAGreedy::calcGapWeights(unsigned PhysReg,
+void RAGreedy::calcGapWeights(MCRegister PhysReg,
SmallVectorImpl<float> &GapWeight) {
assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
@@ -2169,7 +2176,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
break;
// Update the gaps covered by IntI.
- const float weight = IntI.value()->weight;
+ const float weight = IntI.value()->weight();
for (; Gap != NumGaps; ++Gap) {
GapWeight[Gap] = std::max(GapWeight[Gap], weight);
if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
@@ -2231,8 +2238,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
LLVM_DEBUG({
dbgs() << "tryLocalSplit: ";
- for (unsigned i = 0, e = Uses.size(); i != e; ++i)
- dbgs() << ' ' << Uses[i];
+ for (const auto &Use : Uses)
+ dbgs() << ' ' << Use;
dbgs() << '\n';
});
@@ -2244,25 +2251,25 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
// Constrain to VirtReg's live range.
- unsigned ri =
+ unsigned RI =
llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
- unsigned re = RMS.size();
- for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
- // Look for Uses[i] <= RMS <= Uses[i+1].
- assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
- if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
+ unsigned RE = RMS.size();
+ for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
+ // Look for Uses[I] <= RMS <= Uses[I + 1].
+ assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
+ if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
continue;
// Skip a regmask on the same instruction as the last use. It doesn't
// overlap the live range.
- if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
+ if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
break;
- LLVM_DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-'
- << Uses[i + 1]);
- RegMaskGaps.push_back(i);
+ LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
+ << Uses[I + 1]);
+ RegMaskGaps.push_back(I);
// Advance ri to the next gap. A regmask on one of the uses counts in
// both gaps.
- while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
- ++ri;
+ while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
+ ++RI;
}
LLVM_DEBUG(dbgs() << '\n');
}
@@ -2297,16 +2304,16 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
(1.0f / MBFI->getEntryFreq());
SmallVector<float, 8> GapWeight;
- Order.rewind();
- while (unsigned PhysReg = Order.next()) {
+ for (MCPhysReg PhysReg : Order) {
+ assert(PhysReg);
// Keep track of the largest spill weight that would need to be evicted in
- // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+ // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
calcGapWeights(PhysReg, GapWeight);
// Remove any gaps with regmask clobbers.
if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
- for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
- GapWeight[RegMaskGaps[i]] = huge_valf;
+ for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
+ GapWeight[RegMaskGaps[I]] = huge_valf;
// Try to find the best sequence of gaps to close.
// The new spill weight must be larger than any gap interference.
@@ -2324,7 +2331,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
- << '-' << Uses[SplitAfter] << " i=" << MaxGap);
+ << '-' << Uses[SplitAfter] << " I=" << MaxGap);
// Stop before the interval gets so big we wouldn't be making progress.
if (!LiveBefore && !LiveAfter) {
@@ -2373,8 +2380,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Recompute the max when necessary.
if (GapWeight[SplitBefore - 1] >= MaxGap) {
MaxGap = GapWeight[SplitBefore];
- for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
- MaxGap = std::max(MaxGap, GapWeight[i]);
+ for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
+ MaxGap = std::max(MaxGap, GapWeight[I]);
}
continue;
}
@@ -2409,7 +2416,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SE->useIntv(SegStart, SegStop);
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
- DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
+ DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
// If the new range has the same number of instructions as before, mark it as
// RS_Split2 so the next split will be forced to make progress. Otherwise,
@@ -2420,10 +2427,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (NewGaps >= NumGaps) {
LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: ");
assert(!ProgressRequired && "Didn't make progress when it was required.");
- for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
- if (IntvMap[i] == 1) {
- setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
- LLVM_DEBUG(dbgs() << printReg(LREdit.get(i)));
+ for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
+ if (IntvMap[I] == 1) {
+ setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
+ LLVM_DEBUG(dbgs() << printReg(LREdit.get(I)));
}
LLVM_DEBUG(dbgs() << '\n');
}
@@ -2477,7 +2484,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
// ranges already made dubious progress with region splitting, so they go
// straight to single block splitting.
if (getStage(VirtReg) < RS_Split2) {
- unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
if (PhysReg || !NewVRegs.empty())
return PhysReg;
}
@@ -2507,11 +2514,10 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
/// for \p VirtReg.
/// \p FixedRegisters contains all the virtual registers that cannot be
/// recolored.
-bool
-RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
- SmallLISet &RecoloringCandidates,
- const SmallVirtRegSet &FixedRegisters) {
- const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
+bool RAGreedy::mayRecolorAllInterferences(
+ MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates,
+ const SmallVirtRegSet &FixedRegisters) {
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
@@ -2523,16 +2529,16 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
CutOffInfo |= CO_Interf;
return false;
}
- for (unsigned i = Q.interferingVRegs().size(); i; --i) {
- LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
// If Intf is done and sit on the same register class as VirtReg,
// it would not be recolorable as it is in the same state as VirtReg.
// However, if VirtReg has tied defs and Intf doesn't, then
// there is still a point in examining if it can be recolorable.
if (((getStage(*Intf) == RS_Done &&
- MRI->getRegClass(Intf->reg) == CurRC) &&
- !(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) ||
- FixedRegisters.count(Intf->reg)) {
+ MRI->getRegClass(Intf->reg()) == CurRC) &&
+ !(hasTiedDef(MRI, VirtReg.reg()) &&
+ !hasTiedDef(MRI, Intf->reg()))) ||
+ FixedRegisters.count(Intf->reg())) {
LLVM_DEBUG(
dbgs() << "Early abort: the interference is not recolorable.\n");
return false;
@@ -2587,6 +2593,9 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
unsigned Depth) {
+ if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
+ return ~0u;
+
LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
// Ranges must be Done.
assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
@@ -2605,15 +2614,15 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
SmallLISet RecoloringCandidates;
// Record the original mapping virtual register to physical register in case
// the recoloring fails.
- DenseMap<Register, Register> VirtRegToPhysReg;
+ DenseMap<Register, MCRegister> VirtRegToPhysReg;
// Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
// this recoloring "session".
- assert(!FixedRegisters.count(VirtReg.reg));
- FixedRegisters.insert(VirtReg.reg);
+ assert(!FixedRegisters.count(VirtReg.reg()));
+ FixedRegisters.insert(VirtReg.reg());
SmallVector<Register, 4> CurrentNewVRegs;
- Order.rewind();
- while (Register PhysReg = Order.next()) {
+ for (MCRegister PhysReg : Order) {
+ assert(PhysReg.isValid());
LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
<< printReg(PhysReg, TRI) << '\n');
RecoloringCandidates.clear();
@@ -2644,7 +2653,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
for (SmallLISet::iterator It = RecoloringCandidates.begin(),
EndIt = RecoloringCandidates.end();
It != EndIt; ++It) {
- Register ItVirtReg = (*It)->reg;
+ Register ItVirtReg = (*It)->reg();
enqueue(RecoloringQueue, *It);
assert(VRM->hasPhys(ItVirtReg) &&
"Interferences are supposed to be with allocated variables");
@@ -2697,10 +2706,10 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
for (SmallLISet::iterator It = RecoloringCandidates.begin(),
EndIt = RecoloringCandidates.end();
It != EndIt; ++It) {
- Register ItVirtReg = (*It)->reg;
+ Register ItVirtReg = (*It)->reg();
if (VRM->hasPhys(ItVirtReg))
Matrix->unassign(**It);
- Register ItPhysReg = VirtRegToPhysReg[ItVirtReg];
+ MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg];
Matrix->assign(**It, ItPhysReg);
}
}
@@ -2724,8 +2733,8 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
while (!RecoloringQueue.empty()) {
LiveInterval *LI = dequeue(RecoloringQueue);
LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
- Register PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
- Depth + 1);
+ MCRegister PhysReg =
+ selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
// When splitting happens, the live-range may actually be empty.
// In that case, this is okay to continue the recoloring even
// if we did not find an alternative color for it. Indeed,
@@ -2743,7 +2752,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
<< " succeeded with: " << printReg(PhysReg, TRI) << '\n');
Matrix->assign(*LI, PhysReg);
- FixedRegisters.insert(LI->reg);
+ FixedRegisters.insert(LI->reg());
}
return true;
}
@@ -2752,12 +2761,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
// Main Entry Point
//===----------------------------------------------------------------------===//
-Register RAGreedy::selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &NewVRegs) {
+MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs) {
CutOffInfo = CO_None;
LLVMContext &Ctx = MF->getFunction().getContext();
SmallVirtRegSet FixedRegisters;
- Register Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+ MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
if (Reg == ~0U && (CutOffInfo != CO_None)) {
uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
if (CutOffEncountered == CO_Depth)
@@ -2782,11 +2791,10 @@ Register RAGreedy::selectOrSplit(LiveInterval &VirtReg,
/// Spilling a live range in the cold path can have lower cost than using
/// the CSR for the first time. Returns the physical register if we decide
/// to use the CSR; otherwise return 0.
-unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
- AllocationOrder &Order,
- Register PhysReg,
- unsigned &CostPerUseLimit,
- SmallVectorImpl<Register> &NewVRegs) {
+MCRegister
+RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
+ MCRegister PhysReg, unsigned &CostPerUseLimit,
+ SmallVectorImpl<Register> &NewVRegs) {
if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
// We choose spill over using the CSR for the first time if the spill cost
// is lower than CSRCost.
@@ -2851,7 +2859,7 @@ void RAGreedy::initializeCSRCost() {
/// Collect the hint info for \p Reg.
/// The results are stored into \p Out.
/// \p Out is not cleared before being populated.
-void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
+void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
if (!Instr.isFullCopy())
continue;
@@ -2863,9 +2871,8 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
continue;
}
// Get the current assignment.
- Register OtherPhysReg = Register::isPhysicalRegister(OtherReg)
- ? OtherReg
- : VRM->getPhys(OtherReg);
+ MCRegister OtherPhysReg =
+ OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
// Push the collected information.
Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
OtherPhysReg));
@@ -2876,7 +2883,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
/// \p PhysReg was used.
/// \return The cost of \p List for \p PhysReg.
BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
- unsigned PhysReg) {
+ MCRegister PhysReg) {
BlockFrequency Cost = 0;
for (const HintInfo &Info : List) {
if (Info.PhysReg != PhysReg)
@@ -2897,11 +2904,11 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
// We have a broken hint, check if it is possible to fix it by
// reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
// some register and PhysReg may be available for the other live-ranges.
- SmallSet<unsigned, 4> Visited;
+ SmallSet<Register, 4> Visited;
SmallVector<unsigned, 2> RecoloringCandidates;
HintsInfo Info;
- unsigned Reg = VirtReg.reg;
- Register PhysReg = VRM->getPhys(Reg);
+ Register Reg = VirtReg.reg();
+ MCRegister PhysReg = VRM->getPhys(Reg);
// Start the recoloring algorithm from the input live-interval, then
// it will propagate to the ones that are copy-related with it.
Visited.insert(Reg);
@@ -2922,7 +2929,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
// Get the live interval mapped with this virtual register to be able
// to check for the interference with the new color.
LiveInterval &LI = LIS->getInterval(Reg);
- Register CurrPhys = VRM->getPhys(Reg);
+ MCRegister CurrPhys = VRM->getPhys(Reg);
// Check that the new color matches the register class constraints and
// that it is free for this live range.
if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
@@ -3003,33 +3010,35 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
/// getting rid of 2 copies.
void RAGreedy::tryHintsRecoloring() {
for (LiveInterval *LI : SetOfBrokenHints) {
- assert(Register::isVirtualRegister(LI->reg) &&
+ assert(Register::isVirtualRegister(LI->reg()) &&
"Recoloring is possible only for virtual registers");
// Some dead defs may be around (e.g., because of debug uses).
// Ignore those.
- if (!VRM->hasPhys(LI->reg))
+ if (!VRM->hasPhys(LI->reg()))
continue;
tryHintRecoloring(*LI);
}
}
-Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &NewVRegs,
- SmallVirtRegSet &FixedRegisters,
- unsigned Depth) {
+MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ unsigned Depth) {
unsigned CostPerUseLimit = ~0u;
// First try assigning a free register.
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
+ auto Order =
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
+ if (MCRegister PhysReg =
+ tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
// If VirtReg got an assignment, the eviction info is no longre relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg);
+ LastEvicted.clearEvicteeInfo(VirtReg.reg());
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
NewVRegs.empty()) {
- Register CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
- CostPerUseLimit, NewVRegs);
+ MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
+ CostPerUseLimit, NewVRegs);
if (CSRReg || !NewVRegs.empty())
// Return now if we decide to use a CSR or create new vregs due to
// pre-splitting.
@@ -3040,7 +3049,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
LiveRangeStage Stage = getStage(VirtReg);
LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
- << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
+ << ExtraRegInfo[VirtReg.reg()].Cascade << '\n');
// Try to evict a less worthy live range, but only for ranges from the primary
// queue. The RS_Split ranges already failed to do this, and they should not
@@ -3049,7 +3058,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
if (Register PhysReg =
tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
FixedRegisters)) {
- Register Hint = MRI->getSimpleHint(VirtReg.reg);
+ Register Hint = MRI->getSimpleHint(VirtReg.reg());
// If VirtReg has a hint and that hint is broken record this
// virtual register as a recoloring candidate for broken hint.
// Indeed, since we evicted a variable in its neighborhood it is
@@ -3059,7 +3068,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
SetOfBrokenHints.insert(&VirtReg);
// If VirtReg eviction someone, the eviction info for it as an evictee is
// no longre relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg);
+ LastEvicted.clearEvicteeInfo(VirtReg.reg());
return PhysReg;
}
@@ -3071,7 +3080,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
if (Stage < RS_Split) {
setStage(VirtReg, RS_Split);
LLVM_DEBUG(dbgs() << "wait for second round\n");
- NewVRegs.push_back(VirtReg.reg);
+ NewVRegs.push_back(VirtReg.reg());
return 0;
}
@@ -3081,7 +3090,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
// If VirtReg got split, the eviction info is no longer relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg);
+ LastEvicted.clearEvicteeInfo(VirtReg.reg());
return PhysReg;
}
}
@@ -3093,14 +3102,16 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
Depth);
// Finally spill VirtReg itself.
- if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
+ if ((EnableDeferredSpilling ||
+ TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
+ getStage(VirtReg) < RS_Memory) {
// TODO: This is experimental and in particular, we do not model
// the live range splitting done by spilling correctly.
// We would need a deep integration with the spiller to do the
// right thing here. Anyway, that is still good for early testing.
setStage(VirtReg, RS_Memory);
LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
- NewVRegs.push_back(VirtReg.reg);
+ NewVRegs.push_back(VirtReg.reg());
} else {
NamedRegionTimer T("spill", "Spiller", TimerGroupName,
TimerGroupDescription, TimePassesIsEnabled);
@@ -3111,7 +3122,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Tell LiveDebugVariables about the new ranges. Ranges not being covered by
// the new regs are kept in LDV (still mapping to the old register), until
// we rewrite spilled locations in LDV at a later stage.
- DebugVars->splitRegister(VirtReg.reg, LRE.regs(), *LIS);
+ DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
if (VerifyEnabled)
MF->verify(this, "After spilling");
@@ -3230,7 +3241,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
initializeCSRCost();
- calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI);
+ VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
+
+ VRAI->calculateSpillWeightsAndHints();
LLVM_DEBUG(LIS->dump());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 7590dbf1b977..7c5af1a0c56e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -140,14 +140,13 @@ public:
MachineFunctionProperties::Property::NoPHIs);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
private:
- using LI2NodeMap = std::map<const LiveInterval *, unsigned>;
- using Node2LIMap = std::vector<const LiveInterval *>;
- using AllowedSet = std::vector<unsigned>;
- using AllowedSetMap = std::vector<AllowedSet>;
- using RegPair = std::pair<unsigned, unsigned>;
- using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>;
- using RegSet = std::set<unsigned>;
+ using RegSet = std::set<Register>;
char *customPassID;
@@ -199,7 +198,7 @@ public:
for (auto NId : G.nodeIds()) {
PBQP::PBQPNum SpillCost =
- LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight;
+ LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight();
if (SpillCost == 0.0)
SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
else
@@ -231,9 +230,9 @@ private:
return false;
if (NRegs < MRegs)
- return D.count(IKey(NRegs, MRegs)) > 0;
+ return D.contains(IKey(NRegs, MRegs));
- return D.count(IKey(MRegs, NRegs)) > 0;
+ return D.contains(IKey(MRegs, NRegs));
}
void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
@@ -290,7 +289,7 @@ private:
// If two intervals end at the same point, we need a way to break the tie or
// the set will assume they're actually equal and refuse to insert a
// "duplicate". Just compare the vregs - fast and guaranteed unique.
- return std::get<0>(I1)->reg < std::get<0>(I2)->reg;
+ return std::get<0>(I1)->reg() < std::get<0>(I2)->reg();
}
static bool isAtLastSegment(const IntervalInfo &I) {
@@ -331,7 +330,7 @@ public:
// Start by building the inactive set.
for (auto NId : G.nodeIds()) {
- unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ Register VReg = G.getNodeMetadata(NId).getVReg();
LiveInterval &LI = LIS.getInterval(VReg);
assert(!LI.empty() && "PBQP graph contains node for empty interval");
Inactive.push(std::make_tuple(&LI, 0, NId));
@@ -413,9 +412,9 @@ private:
PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
bool NodesInterfere = false;
for (unsigned I = 0; I != NRegs.size(); ++I) {
- unsigned PRegN = NRegs[I];
+ MCRegister PRegN = NRegs[I];
for (unsigned J = 0; J != MRegs.size(); ++J) {
- unsigned PRegM = MRegs[J];
+ MCRegister PRegM = MRegs[J];
if (TRI.regsOverlap(PRegN, PRegM)) {
M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
NodesInterfere = true;
@@ -448,11 +447,10 @@ public:
if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
continue;
- unsigned DstReg = CP.getDstReg();
- unsigned SrcReg = CP.getSrcReg();
+ Register DstReg = CP.getDstReg();
+ Register SrcReg = CP.getSrcReg();
- const float Scale = 1.0f / MBFI.getEntryFreq();
- PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale;
+ PBQP::PBQPNum CBenefit = MBFI.getBlockFreqRelativeToEntryBlock(&MBB);
if (CP.isPhys()) {
if (!MF.getRegInfo().isAllocatable(DstReg))
@@ -464,7 +462,7 @@ public:
G.getNodeMetadata(NId).getAllowedRegs();
unsigned PRegOpt = 0;
- while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg)
+ while (PRegOpt < Allowed.size() && Allowed[PRegOpt].id() != DstReg)
++PRegOpt;
if (PRegOpt < Allowed.size()) {
@@ -509,9 +507,9 @@ private:
assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
for (unsigned I = 0; I != Allowed1.size(); ++I) {
- unsigned PReg1 = Allowed1[I];
+ MCRegister PReg1 = Allowed1[I];
for (unsigned J = 0; J != Allowed2.size(); ++J) {
- unsigned PReg2 = Allowed2[J];
+ MCRegister PReg2 = Allowed2[J];
if (PReg1 == PReg2)
CostMat[I + 1][J + 1] -= Benefit;
}
@@ -519,6 +517,20 @@ private:
}
};
+/// PBQP-specific implementation of weight normalization.
+class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo {
+ float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr) override {
+ // All intervals have a spill weight that is mostly proportional to the
+ // number of uses, with uses in loops having a bigger weight.
+ return NumInstr * VirtRegAuxInfo::normalize(UseDefFreq, Size, 1);
+ }
+
+public:
+ PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
+ const MachineLoopInfo &Loops,
+ const MachineBlockFrequencyInfo &MBFI)
+ : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {}
+};
} // end anonymous namespace
// Out-of-line destructor/anchor for PBQPRAConstraint.
@@ -558,18 +570,19 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
// Iterate over all live ranges.
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
VRegsToAlloc.insert(Reg);
}
}
-static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
+static bool isACalleeSavedRegister(MCRegister Reg,
+ const TargetRegisterInfo &TRI,
const MachineFunction &MF) {
const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSR[i] != 0; ++i)
- if (TRI.regsOverlap(reg, CSR[i]))
+ if (TRI.regsOverlap(Reg, CSR[i]))
return true;
return false;
}
@@ -583,12 +596,12 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getSubtarget().getRegisterInfo();
- std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
+ std::vector<Register> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
- std::map<unsigned, std::vector<unsigned>> VRegAllowedMap;
+ std::map<Register, std::vector<MCRegister>> VRegAllowedMap;
while (!Worklist.empty()) {
- unsigned VReg = Worklist.back();
+ Register VReg = Worklist.back();
Worklist.pop_back();
LiveInterval &VRegLI = LIS.getInterval(VReg);
@@ -596,8 +609,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
// If this is an empty interval move it to the EmptyIntervalVRegs set then
// continue.
if (VRegLI.empty()) {
- EmptyIntervalVRegs.insert(VRegLI.reg);
- VRegsToAlloc.erase(VRegLI.reg);
+ EmptyIntervalVRegs.insert(VRegLI.reg());
+ VRegsToAlloc.erase(VRegLI.reg());
continue;
}
@@ -608,10 +621,10 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
// Compute an initial allowed set for the current vreg.
- std::vector<unsigned> VRegAllowed;
+ std::vector<MCRegister> VRegAllowed;
ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
for (unsigned I = 0; I != RawPRegOrder.size(); ++I) {
- unsigned PReg = RawPRegOrder[I];
+ MCRegister PReg(RawPRegOrder[I]);
if (MRI.isReserved(PReg))
continue;
@@ -639,10 +652,11 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
if (VRegAllowed.empty()) {
SmallVector<Register, 8> NewVRegs;
spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
- Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
+ llvm::append_range(Worklist, NewVRegs);
continue;
- } else
- VRegAllowedMap[VReg] = std::move(VRegAllowed);
+ }
+
+ VRegAllowedMap[VReg.id()] = std::move(VRegAllowed);
}
for (auto &KV : VRegAllowedMap) {
@@ -685,7 +699,7 @@ void RegAllocPBQP::spillVReg(Register VReg,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
(void)TRI;
LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
- << LRE.getParent().weight << ", New vregs: ");
+ << LRE.getParent().weight() << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
// allocate.
@@ -693,8 +707,8 @@ void RegAllocPBQP::spillVReg(Register VReg,
I != E; ++I) {
const LiveInterval &LI = LIS.getInterval(*I);
assert(!LI.empty() && "Empty spill range.");
- LLVM_DEBUG(dbgs() << printReg(LI.reg, &TRI) << " ");
- VRegsToAlloc.insert(LI.reg);
+ LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " ");
+ VRegsToAlloc.insert(LI.reg());
}
LLVM_DEBUG(dbgs() << ")\n");
@@ -718,11 +732,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
// Iterate over the nodes mapping the PBQP solution to a register
// assignment.
for (auto NId : G.nodeIds()) {
- unsigned VReg = G.getNodeMetadata(NId).getVReg();
- unsigned AllocOption = Solution.getSelection(NId);
+ Register VReg = G.getNodeMetadata(NId).getVReg();
+ unsigned AllocOpt = Solution.getSelection(NId);
- if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
- unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
+ if (AllocOpt != PBQP::RegAlloc::getSpillOptionIdx()) {
+ MCRegister PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOpt - 1];
LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
<< TRI.getName(PReg) << "\n");
assert(PReg != 0 && "Invalid preg selected.");
@@ -750,12 +764,12 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
I != E; ++I) {
LiveInterval &LI = LIS.getInterval(*I);
- unsigned PReg = MRI.getSimpleHint(LI.reg);
+ Register PReg = MRI.getSimpleHint(LI.reg());
if (PReg == 0) {
- const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
+ const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg());
const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
- for (unsigned CandidateReg : RawPRegOrder) {
+ for (MCRegister CandidateReg : RawPRegOrder) {
if (!VRM.getRegInfo().isReserved(CandidateReg)) {
PReg = CandidateReg;
break;
@@ -765,7 +779,7 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
"No un-reserved physical registers in this register class");
}
- VRM.assignVirt2Phys(LI.reg, PReg);
+ VRM.assignVirt2Phys(LI.reg(), PReg);
}
}
@@ -779,13 +793,6 @@ void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
DeadRemats.clear();
}
-static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
- unsigned NumInstr) {
- // All intervals have a spill weight that is mostly proportional to the number
- // of uses, with uses in loops having a bigger weight.
- return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1);
-}
-
bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
LiveIntervals &LIS = getAnalysis<LiveIntervals>();
MachineBlockFrequencyInfo &MBFI =
@@ -793,8 +800,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
VirtRegMap &VRM = getAnalysis<VirtRegMap>();
- calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(),
- MBFI, normalizePBQPSpillWeight);
+ PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI);
+ VRAI.calculateSpillWeightsAndHints();
std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
@@ -878,7 +885,7 @@ static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
return Printable([NId, &G](raw_ostream &OS) {
const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ Register VReg = G.getNodeMetadata(NId).getVReg();
const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
});
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 1523bd4d1649..0488db3d09cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -188,7 +188,14 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
}
assert(RC && "Failed to find register class");
compute(RC);
- unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
- return TRI->getRegPressureSetLimit(*MF, Idx) -
- TRI->getRegClassWeight(RC).RegWeight * NReserved;
+ unsigned NAllocatableRegs = getNumAllocatableRegs(RC);
+ unsigned RegPressureSetLimit = TRI->getRegPressureSetLimit(*MF, Idx);
+ // If all the regs are reserved, return raw RegPressureSetLimit.
+ // One example is VRSAVERC in PowerPC.
+ // Avoid returning zero, getRegPressureSetLimit(Idx) assumes computePSetLimit
+ // return non-zero value.
+ if (NAllocatableRegs == 0)
+ return RegPressureSetLimit;
+ unsigned NReserved = RC->getNumRegs() - NAllocatableRegs;
+ return RegPressureSetLimit - TRI->getRegClassWeight(RC).RegWeight * NReserved;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 17160a9f42cd..7fdc85a6e444 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -137,13 +137,13 @@ namespace {
/// ordered-by-slot-index set of DBG_VALUEs, to help quick
/// identification of whether coalescing may change location validity.
using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
- DenseMap<unsigned, std::vector<DbgValueLoc>> DbgVRegToValues;
+ DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues;
/// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached.
/// To avoid repeatedly merging sets of DbgValueLocs, instead record
/// which vregs have been coalesced, and where to. This map is from
/// vreg => {set of vregs merged in}.
- DenseMap<unsigned, SmallVector<unsigned, 4>> DbgMergedVRegNums;
+ DenseMap<Register, SmallVector<Register, 4>> DbgMergedVRegNums;
/// A LaneMask to remember on which subregister live ranges we need to call
/// shrinkToUses() later.
@@ -173,16 +173,16 @@ namespace {
SmallVector<MachineInstr*, 8> DeadDefs;
/// Virtual registers to be considered for register class inflation.
- SmallVector<unsigned, 8> InflateRegs;
+ SmallVector<Register, 8> InflateRegs;
/// The collection of live intervals which should have been updated
/// immediately after rematerialiation but delayed until
/// lateLiveIntervalUpdate is called.
- DenseSet<unsigned> ToBeUpdated;
+ DenseSet<Register> ToBeUpdated;
/// Record how many times the large live interval with many valnos
/// has been tried to join with other live interval.
- DenseMap<unsigned, unsigned long> LargeLIVisitCounter;
+ DenseMap<Register, unsigned long> LargeLIVisitCounter;
/// Recursively eliminate dead defs in DeadDefs.
void eliminateDeadDefs();
@@ -211,6 +211,18 @@ namespace {
/// live interval update is costly.
void lateLiveIntervalUpdate();
+ /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
+ /// has no value defined in the predecessors. If the incoming value is the
+ /// same as defined by the copy itself, the value is considered undefined.
+ bool copyValueUndefInPredecessors(LiveRange &S,
+ const MachineBasicBlock *MBB,
+ LiveQueryResult SLRQ);
+
+ /// Set necessary undef flags on subregister uses after pruning out undef
+ /// lane segments from the subrange.
+ void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
+ LaneBitmask PrunedLanes);
+
/// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
/// src/dst of the copy instruction CopyMI. This returns true if the copy
/// was successfully coalesced away. If it is not currently possible to
@@ -285,7 +297,7 @@ namespace {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -351,7 +363,7 @@ namespace {
JoinVals &LHSVals, LiveRange &RHS,
JoinVals &RHSVals);
- void checkMergingChangesDbgValuesImpl(unsigned Reg, LiveRange &OtherRange,
+ void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
LiveRange &RegRange, JoinVals &Vals2);
public:
@@ -388,8 +400,8 @@ INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri,
- const MachineInstr *MI, unsigned &Src,
- unsigned &Dst, unsigned &SrcSub,
+ const MachineInstr *MI, Register &Src,
+ Register &Dst, unsigned &SrcSub,
unsigned &DstSub) {
if (MI->isCopy()) {
Dst = MI->getOperand(0).getReg();
@@ -424,12 +436,13 @@ static bool isSplitEdge(const MachineBasicBlock *MBB) {
}
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
- SrcReg = DstReg = 0;
+ SrcReg = DstReg = Register();
SrcIdx = DstIdx = 0;
NewRC = nullptr;
Flipped = CrossClass = false;
- unsigned Src, Dst, SrcSub, DstSub;
+ Register Src, Dst;
+ unsigned SrcSub = 0, DstSub = 0;
if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
return false;
Partial = SrcSub || DstSub;
@@ -523,7 +536,8 @@ bool CoalescerPair::flip() {
bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (!MI)
return false;
- unsigned Src, Dst, SrcSub, DstSub;
+ Register Src, Dst;
+ unsigned SrcSub = 0, DstSub = 0;
if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
return false;
@@ -536,8 +550,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
}
// Now check that Dst matches DstReg.
- if (Register::isPhysicalRegister(DstReg)) {
- if (!Register::isPhysicalRegister(Dst))
+ if (DstReg.isPhysical()) {
+ if (!Dst.isPhysical())
return false;
assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
// DstSub could be set for a physreg from INSERT_SUBREG.
@@ -547,7 +561,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (!SrcSub)
return DstReg == Dst;
// This is a partial register copy. Check that the parts match.
- return TRI.getSubReg(DstReg, SrcSub) == Dst;
+ return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
} else {
// DstReg is virtual.
if (DstReg != Dst)
@@ -649,7 +663,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// in IntB, we can merge them.
if (ValS+1 != BS) return false;
- LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
+ LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
// We are about to delete CopyMI, so need to remove it as the 'instruction
@@ -692,13 +706,13 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// If the source instruction was killing the source register before the
// merge, unset the isKill marker given the live range has been extended.
- int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
if (UIdx != -1) {
ValSEndInst->getOperand(UIdx).setIsKill(false);
}
// Rewrite the copy.
- CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
+ CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
// If the copy instruction was killing the destination register or any
// subrange before the merge trim the live range.
bool RecomputeLiveRange = AS->end == CopyIdx;
@@ -817,7 +831,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return { false, false };
// If DefMI is a two-address instruction then commuting it will change the
// destination register.
- int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
assert(DefIdx != -1);
unsigned UseOpIdx;
if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
@@ -838,7 +852,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
Register NewReg = NewDstMO.getReg();
- if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
+ if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
return { false, false };
// Make sure there are no other definitions of IntB that would reach the
@@ -848,7 +862,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// If some of the uses of IntA.reg is already coalesced away, return false.
// It's not possible to determine whether it's safe to perform the coalescing.
- for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) {
+ for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
MachineInstr *UseMI = MO.getParent();
unsigned OpNo = &MO - &UseMI->getOperand(0);
SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
@@ -870,9 +884,9 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
return { false, false };
- if (Register::isVirtualRegister(IntA.reg) &&
- Register::isVirtualRegister(IntB.reg) &&
- !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
+ if (Register::isVirtualRegister(IntA.reg()) &&
+ Register::isVirtualRegister(IntB.reg()) &&
+ !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
return { false, false };
if (NewMI != DefMI) {
LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
@@ -891,9 +905,10 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// = B
// Update uses of IntA of the specific Val# with IntB.
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()),
UE = MRI->use_end();
- UI != UE; /* ++UI is below because of possible MI removal */) {
+ UI != UE;
+ /* ++UI is below because of possible MI removal */) {
MachineOperand &UseMO = *UI;
++UI;
if (UseMO.isUndef())
@@ -920,7 +935,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
continue;
if (!UseMI->isCopy())
continue;
- if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ if (UseMI->getOperand(0).getReg() != IntB.reg() ||
UseMI->getOperand(0).getSubReg())
continue;
@@ -951,10 +966,10 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
if (!IntA.hasSubRanges()) {
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
IntA.createSubRangeFrom(Allocator, Mask, IntA);
} else if (!IntB.hasSubRanges()) {
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
IntB.createSubRangeFrom(Allocator, Mask, IntB);
}
SlotIndex AIdx = CopyIdx.getRegSlot(true);
@@ -1100,8 +1115,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
continue;
}
// Check DefMI is a reverse copy and it is in BB Pred.
- if (DefMI->getOperand(0).getReg() != IntA.reg ||
- DefMI->getOperand(1).getReg() != IntB.reg ||
+ if (DefMI->getOperand(0).getReg() != IntA.reg() ||
+ DefMI->getOperand(1).getReg() != IntB.reg() ||
DefMI->getParent() != Pred) {
CopyLeftBB = Pred;
continue;
@@ -1158,8 +1173,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
// Insert new copy to CopyLeftBB.
MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
- TII->get(TargetOpcode::COPY), IntB.reg)
- .addReg(IntA.reg);
+ TII->get(TargetOpcode::COPY), IntB.reg())
+ .addReg(IntA.reg());
SlotIndex NewCopyIdx =
LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
@@ -1212,7 +1227,10 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
}
++I;
}
- LIS->extendToIndices(SR, EndPoints);
+ SmallVector<SlotIndex, 8> Undefs;
+ IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
+ *LIS->getSlotIndexes());
+ LIS->extendToIndices(SR, EndPoints, Undefs);
}
// If any dead defs were extended, truncate them.
shrinkToUses(&IntB);
@@ -1224,9 +1242,9 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
/// defining a subregister.
-static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
- assert(!Register::isPhysicalRegister(Reg) &&
- "This code cannot handle physreg aliasing");
+static bool definesFullReg(const MachineInstr &MI, Register Reg) {
+ assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
+
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
continue;
@@ -1242,9 +1260,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MachineInstr *CopyMI,
bool &IsDefCopy) {
IsDefCopy = false;
- unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+ Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
- unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
if (Register::isPhysicalRegister(SrcReg))
return false;
@@ -1291,8 +1309,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
if (!DefMI->isImplicitDef()) {
- if (Register::isPhysicalRegister(DstReg)) {
- unsigned NewDstReg = DstReg;
+ if (DstReg.isPhysical()) {
+ Register NewDstReg = DstReg;
unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
DefMI->getOperand(0).getSubReg());
@@ -1366,7 +1384,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
// We need to remember these so we can add intervals once we insert
// NewMI into SlotIndexes.
- SmallVector<unsigned, 4> NewMIImplDefs;
+ SmallVector<MCRegister, 4> NewMIImplDefs;
for (unsigned i = NewMI.getDesc().getNumOperands(),
e = NewMI.getNumOperands();
i != e; ++i) {
@@ -1374,11 +1392,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (MO.isReg() && MO.isDef()) {
assert(MO.isImplicit() && MO.isDead() &&
Register::isPhysicalRegister(MO.getReg()));
- NewMIImplDefs.push_back(MO.getReg());
+ NewMIImplDefs.push_back(MO.getReg().asMCReg());
}
}
- if (Register::isVirtualRegister(DstReg)) {
+ if (DstReg.isVirtual()) {
unsigned NewIdx = NewMI.getOperand(0).getSubReg();
if (DefRC != nullptr) {
@@ -1513,7 +1531,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
- unsigned Reg = NewMIImplDefs[i];
+ MCRegister Reg = NewMIImplDefs[i];
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
@@ -1571,7 +1589,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// Note that we do not query CoalescerPair here but redo isMoveInstr as the
// CoalescerPair may have a new register class with adjusted subreg indices
// at this point.
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ Register SrcReg, DstReg;
+ unsigned SrcSubIdx = 0, DstSubIdx = 0;
if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
return nullptr;
@@ -1696,7 +1715,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}
-void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg,
+void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
unsigned SubIdx) {
bool DstIsPhys = Register::isPhysicalRegister(DstReg);
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1752,7 +1771,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg,
if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
if (!DstInt->hasSubRanges()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
@@ -1802,6 +1821,49 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
return false;
}
+bool RegisterCoalescer::copyValueUndefInPredecessors(
+ LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
+ if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
+ // If this is a self loop, we may be reading the same value.
+ if (V->id != SLRQ.valueOutOrDead()->id)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
+ Register Reg,
+ LaneBitmask PrunedLanes) {
+ // If we had other instructions in the segment reading the undef sublane
+ // value, we need to mark them with undef.
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ unsigned SubRegIdx = MO.getSubReg();
+ if (SubRegIdx == 0 || MO.isUndef())
+ continue;
+
+ LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
+ MO.setIsUndef();
+ break;
+ }
+ }
+ }
+
+ LI.removeEmptySubRanges();
+
+ // A def of a subregister may be a use of other register lanes. Replacing
+ // such a def with a def of a different register will eliminate the use,
+ // and may cause the recorded live range to be larger than the actual
+ // liveness in the program IR.
+ LIS->shrinkToUses(&LI);
+}
+
bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
Again = false;
LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
@@ -1861,16 +1923,35 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
VNInfo *ReadVNI = LRQ.valueIn();
assert(ReadVNI && "No value before copy and no <undef> flag.");
assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
- LI.MergeValueNumberInto(DefVNI, ReadVNI);
+
+ // Track incoming undef lanes we need to eliminate from the subrange.
+ LaneBitmask PrunedLanes;
+ MachineBasicBlock *MBB = CopyMI->getParent();
// Process subregister liveranges.
for (LiveInterval::SubRange &S : LI.subranges()) {
LiveQueryResult SLRQ = S.Query(CopyIdx);
if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
- VNInfo *SReadVNI = SLRQ.valueIn();
- S.MergeValueNumberInto(SDefVNI, SReadVNI);
+ if (VNInfo *SReadVNI = SLRQ.valueIn())
+ SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
+
+ // If this copy introduced an undef subrange from an incoming value,
+ // we need to eliminate the undef live in values from the subrange.
+ if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
+ LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
+ PrunedLanes |= S.LaneMask;
+ S.removeValNo(SDefVNI);
+ }
}
}
+
+ LI.MergeValueNumberInto(DefVNI, ReadVNI);
+ if (PrunedLanes.any()) {
+ LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
+ << PrunedLanes << '\n');
+ setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
+ }
+
LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
}
deleteInstr(CopyMI);
@@ -1885,7 +1966,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (!canJoinPhys(CP)) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
- bool IsDefCopy;
+ bool IsDefCopy = false;
if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
return true;
if (IsDefCopy)
@@ -1924,7 +2005,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// If definition of source is defined by trivial computation, try
// rematerializing it.
- bool IsDefCopy;
+ bool IsDefCopy = false;
if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
return true;
@@ -1938,7 +2019,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (Changed) {
deleteInstr(CopyMI);
if (Shrink) {
- unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
LiveInterval &DstLI = LIS->getInterval(DstReg);
shrinkToUses(&DstLI);
LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n');
@@ -1991,7 +2072,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
continue;
LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
<< ")\n");
- LIS->shrinkToUses(S, LI.reg);
+ LIS->shrinkToUses(S, LI.reg());
}
LI.removeEmptySubRanges();
}
@@ -2030,8 +2111,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
- unsigned DstReg = CP.getDstReg();
- unsigned SrcReg = CP.getSrcReg();
+ Register DstReg = CP.getDstReg();
+ Register SrcReg = CP.getSrcReg();
assert(CP.isPhys() && "Must be a physreg copy");
assert(MRI->isReserved(DstReg) && "Not a reserved register");
LiveInterval &RHS = LIS->getInterval(SrcReg);
@@ -2128,7 +2209,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
<< printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
- LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
+ LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
// Create a new dead def at the new def location.
for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
LiveRange &LR = LIS->getRegUnit(*UI);
@@ -2219,7 +2300,7 @@ class JoinVals {
LiveRange &LR;
/// (Main) register we work on.
- const unsigned Reg;
+ const Register Reg;
/// Reg (and therefore the values in this liverange) will end up as
/// subregister SubIdx in the coalesced register. Either CP.DstIdx or
@@ -2339,7 +2420,7 @@ class JoinVals {
LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
/// Find the ultimate value that VNI was copied from.
- std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
+ std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
@@ -2378,7 +2459,7 @@ class JoinVals {
/// Return true if MI uses any of the given Lanes from Reg.
/// This does not include partial redefinitions of Reg.
- bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
+ bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
/// Determine if ValNo is a copy of a value number in LR or Other.LR that will
/// be pruned:
@@ -2389,14 +2470,15 @@ class JoinVals {
bool isPrunedValue(unsigned ValNo, JoinVals &Other);
public:
- JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
- SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
+ JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
+ SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
bool TrackSubRegLiveness)
- : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
- SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
- NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
- TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
+ : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
+ SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
+ NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
+ TRI(TRI), Assignments(LR.getNumValNums(), -1),
+ Vals(LR.getNumValNums()) {}
/// Analyze defs in LR and compute a value mapping in NewVNInfo.
/// Returns false if any conflicts were impossible to resolve.
@@ -2462,9 +2544,9 @@ LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
return L;
}
-std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
- const VNInfo *VNI) const {
- unsigned TrackReg = Reg;
+std::pair<const VNInfo *, Register>
+JoinVals::followCopyChain(const VNInfo *VNI) const {
+ Register TrackReg = Reg;
while (!VNI->isPHIDef()) {
SlotIndex Def = VNI->def;
@@ -2473,7 +2555,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
if (!MI->isFullCopy())
return std::make_pair(VNI, TrackReg);
Register SrcReg = MI->getOperand(1).getReg();
- if (!Register::isVirtualRegister(SrcReg))
+ if (!SrcReg.isVirtual())
return std::make_pair(VNI, TrackReg);
const LiveInterval &LI = LIS->getInterval(SrcReg);
@@ -2518,13 +2600,13 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
const JoinVals &Other) const {
const VNInfo *Orig0;
- unsigned Reg0;
+ Register Reg0;
std::tie(Orig0, Reg0) = followCopyChain(Value0);
if (Orig0 == Value1 && Reg0 == Other.Reg)
return true;
const VNInfo *Orig1;
- unsigned Reg1;
+ Register Reg1;
std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
// If both values are undefined, and the source registers are the same
// register, the values are identical. Filter out cases where only one
@@ -2685,14 +2767,8 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
return CR_Replace;
// Check for simple erasable conflicts.
- if (DefMI->isImplicitDef()) {
- // We need the def for the subregister if there is nothing else live at the
- // subrange at this point.
- if (TrackSubRegLiveness
- && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
- return CR_Replace;
+ if (DefMI->isImplicitDef())
return CR_Erase;
- }
// Include the non-conflict where DefMI is a coalescable copy that kills
// OtherVNI. We still want the copy erased and value numbers merged.
@@ -2881,7 +2957,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
return true;
}
-bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
+bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
LaneBitmask Lanes) const {
if (MI.isDebugInstr())
return false;
@@ -3353,7 +3429,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
if (LI.valnos.size() < LargeIntervalSizeThreshold)
return false;
- auto &Counter = LargeLIVisitCounter[LI.reg];
+ auto &Counter = LargeLIVisitCounter[LI.reg()];
if (Counter < LargeIntervalFreqThreshold) {
Counter++;
return false;
@@ -3456,8 +3532,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// Kill flags are going to be wrong if the live ranges were overlapping.
// Eventually, we should simply clear all kill flags when computing live
// ranges. They are reinserted after register allocation.
- MRI->clearKillFlags(LHS.reg);
- MRI->clearKillFlags(RHS.reg);
+ MRI->clearKillFlags(LHS.reg());
+ MRI->clearKillFlags(RHS.reg());
if (!EndPoints.empty()) {
// Recompute the parts of the live range we had to remove because of
@@ -3525,20 +3601,20 @@ void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
JoinVals &LHSVals,
LiveRange &RHS,
JoinVals &RHSVals) {
- auto ScanForDstReg = [&](unsigned Reg) {
+ auto ScanForDstReg = [&](Register Reg) {
checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
};
- auto ScanForSrcReg = [&](unsigned Reg) {
+ auto ScanForSrcReg = [&](Register Reg) {
checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
};
// Scan for potentially unsound DBG_VALUEs: examine first the register number
// Reg, and then any other vregs that may have been merged into it.
- auto PerformScan = [this](unsigned Reg, std::function<void(unsigned)> Func) {
+ auto PerformScan = [this](Register Reg, std::function<void(Register)> Func) {
Func(Reg);
if (DbgMergedVRegNums.count(Reg))
- for (unsigned X : DbgMergedVRegNums[Reg])
+ for (Register X : DbgMergedVRegNums[Reg])
Func(X);
};
@@ -3547,7 +3623,7 @@ void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
PerformScan(CP.getDstReg(), ScanForDstReg);
}
-void RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg,
+void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
LiveRange &OtherLR,
LiveRange &RegLR,
JoinVals &RegVals) {
@@ -3673,7 +3749,7 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
}
void RegisterCoalescer::lateLiveIntervalUpdate() {
- for (unsigned reg : ToBeUpdated) {
+ for (Register reg : ToBeUpdated) {
if (!LIS->hasInterval(reg))
continue;
LiveInterval &LI = LIS->getInterval(reg);
@@ -3707,7 +3783,7 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
/// Check if DstReg is a terminal node.
/// I.e., it does not have any affinity other than \p Copy.
-static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
+static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
const MachineRegisterInfo *MRI) {
assert(Copy.isCopyLike());
// Check if the destination of this copy as any other affinity.
@@ -3721,15 +3797,16 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
assert(Copy.isCopyLike());
if (!UseTerminalRule)
return false;
- unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
+ Register SrcReg, DstReg;
+ unsigned SrcSubReg = 0, DstSubReg = 0;
if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
return false;
// Check if the destination of this copy has any other affinity.
- if (Register::isPhysicalRegister(DstReg) ||
+ if (DstReg.isPhysical() ||
// If SrcReg is a physical register, the copy won't be coalesced.
// Ignoring it may have other side effect (like missing
// rematerialization). So keep it.
- Register::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI))
+ SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
return false;
// DstReg is a terminal node. Check if it interferes with any other
@@ -3745,7 +3822,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
// For now, just consider the copies that are in the same block.
if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
continue;
- unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
+ Register OtherSrcReg, OtherReg;
+ unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
OtherSubReg))
return false;
@@ -3930,7 +4008,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
<< " regs.\n");
for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
- unsigned Reg = InflateRegs[i];
+ Register Reg = InflateRegs[i];
if (MRI->reg_nodbg_empty(Reg))
continue;
if (MRI->recomputeRegClass(Reg)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h
index f505d46cd338..f265d93fb0d6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
#define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
+#include "llvm/CodeGen/Register.h"
+
namespace llvm {
class MachineInstr;
@@ -28,10 +30,10 @@ class TargetRegisterInfo;
/// The register that will be left after coalescing. It can be a
/// virtual or physical register.
- unsigned DstReg = 0;
+ Register DstReg;
/// The virtual register that will be coalesced into dstReg.
- unsigned SrcReg = 0;
+ Register SrcReg;
/// The sub-register index of the old DstReg in the new coalesced register.
unsigned DstIdx = 0;
@@ -59,9 +61,9 @@ class TargetRegisterInfo;
/// Create a CoalescerPair representing a virtreg-to-physreg copy.
/// No need to call setRegisters().
- CoalescerPair(unsigned VirtReg, unsigned PhysReg,
+ CoalescerPair(Register VirtReg, MCRegister PhysReg,
const TargetRegisterInfo &tri)
- : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {}
+ : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {}
/// Set registers to match the copy instruction MI. Return
/// false if MI is not a coalescable copy instruction.
@@ -92,10 +94,10 @@ class TargetRegisterInfo;
/// Return the register (virtual or physical) that will remain
/// after coalescing.
- unsigned getDstReg() const { return DstReg; }
+ Register getDstReg() const { return DstReg; }
/// Return the virtual register that will be coalesced away.
- unsigned getSrcReg() const { return SrcReg; }
+ Register getSrcReg() const { return SrcReg; }
/// Return the subregister index that DstReg will be coalesced into, or 0.
unsigned getDstIdx() const { return DstIdx; }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
index ecbc4ed63ef6..8f1fc103e869 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -62,7 +62,7 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
- const MachineRegisterInfo &MRI, unsigned Reg,
+ const MachineRegisterInfo &MRI, Register Reg,
LaneBitmask PrevMask, LaneBitmask NewMask) {
//assert((NewMask & !PrevMask) == 0 && "Must not add bits");
if (NewMask.any() || PrevMask.none())
@@ -152,7 +152,7 @@ void RegPressureDelta::dump() const {
#endif
-void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
+void RegPressureTracker::increaseRegPressure(Register RegUnit,
LaneBitmask PreviousMask,
LaneBitmask NewMask) {
if (PreviousMask.any() || NewMask.none())
@@ -167,7 +167,7 @@ void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
}
}
-void RegPressureTracker::decreaseRegPressure(unsigned RegUnit,
+void RegPressureTracker::decreaseRegPressure(Register RegUnit,
LaneBitmask PreviousMask,
LaneBitmask NewMask) {
decreaseSetPressure(CurrSetPressure, *MRI, RegUnit, PreviousMask, NewMask);
@@ -360,7 +360,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0);
assert(isBottomClosed() && "need bottom-up tracking to intialize.");
for (const RegisterMaskPair &Pair : P.LiveOutRegs) {
- unsigned RegUnit = Pair.RegUnit;
+ Register RegUnit = Pair.RegUnit;
if (Register::isVirtualRegister(RegUnit)
&& !RPTracker.hasUntiedDef(RegUnit))
increaseSetPressure(LiveThruPressure, *MRI, RegUnit,
@@ -369,7 +369,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
}
static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
- unsigned RegUnit) {
+ Register RegUnit) {
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
@@ -380,7 +380,7 @@ static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
- unsigned RegUnit = Pair.RegUnit;
+ Register RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
@@ -393,7 +393,7 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
}
static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
- unsigned RegUnit) {
+ Register RegUnit) {
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
@@ -406,7 +406,7 @@ static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
- unsigned RegUnit = Pair.RegUnit;
+ Register RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
@@ -418,11 +418,12 @@ static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
}
}
-static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit,
- SlotIndex Pos, LaneBitmask SafeDefault,
- bool(*Property)(const LiveRange &LR, SlotIndex Pos)) {
- if (Register::isVirtualRegister(RegUnit)) {
+static LaneBitmask
+getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, Register RegUnit, SlotIndex Pos,
+ LaneBitmask SafeDefault,
+ bool (*Property)(const LiveRange &LR, SlotIndex Pos)) {
+ if (RegUnit.isVirtual()) {
const LiveInterval &LI = LIS.getInterval(RegUnit);
LaneBitmask Result;
if (TrackLaneMasks && LI.hasSubRanges()) {
@@ -448,7 +449,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
const MachineRegisterInfo &MRI,
- bool TrackLaneMasks, unsigned RegUnit,
+ bool TrackLaneMasks, Register RegUnit,
SlotIndex Pos) {
return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos,
LaneBitmask::getAll(),
@@ -457,7 +458,6 @@ static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
});
}
-
namespace {
/// Collect this instruction's unique uses and defs into SmallVectors for
@@ -517,12 +517,13 @@ class RegisterOperandsCollector {
}
}
- void pushReg(unsigned Reg,
+ void pushReg(Register Reg,
SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
+ ++Units)
addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
}
}
@@ -549,15 +550,16 @@ class RegisterOperandsCollector {
}
}
- void pushRegLanes(unsigned Reg, unsigned SubRegIdx,
+ void pushRegLanes(Register Reg, unsigned SubRegIdx,
SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
LaneBitmask LaneMask = SubRegIdx != 0
? TRI.getSubRegIndexLaneMask(SubRegIdx)
: MRI.getMaxLaneMaskForVReg(Reg);
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
+ ++Units)
addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
}
}
@@ -580,7 +582,7 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
const LiveIntervals &LIS) {
SlotIndex SlotIdx = LIS.getInstructionIndex(MI);
for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) {
- unsigned Reg = RI->RegUnit;
+ Register Reg = RI->RegUnit;
const LiveRange *LR = getLiveRange(LIS, Reg);
if (LR != nullptr) {
LiveQueryResult LRQ = LR->Query(SlotIdx);
@@ -605,7 +607,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
Pos.getDeadSlot());
// If the def is all that is live after the instruction, then in case
// of a subregister def we need a read-undef flag.
- unsigned RegUnit = I->RegUnit;
+ Register RegUnit = I->RegUnit;
if (Register::isVirtualRegister(RegUnit) &&
AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none())
AddFlagsMI->setRegisterDefReadUndef(RegUnit);
@@ -631,7 +633,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
}
if (AddFlagsMI != nullptr) {
for (const RegisterMaskPair &P : DeadDefs) {
- unsigned RegUnit = P.RegUnit;
+ Register RegUnit = P.RegUnit;
if (!Register::isVirtualRegister(RegUnit))
continue;
LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
@@ -667,7 +669,7 @@ void PressureDiffs::addInstruction(unsigned Idx,
}
/// Add a change in pressure to the pressure diff of a given instruction.
-void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
+void PressureDiff::addPressureChange(Register RegUnit, bool IsDec,
const MachineRegisterInfo *MRI) {
PSetIterator PSetI = MRI->getPressureSets(RegUnit);
int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
@@ -714,7 +716,7 @@ void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) {
assert(Pair.LaneMask.any());
- unsigned RegUnit = Pair.RegUnit;
+ Register RegUnit = Pair.RegUnit;
auto I = llvm::find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
return Other.RegUnit == RegUnit;
});
@@ -742,13 +744,13 @@ void RegPressureTracker::discoverLiveOut(RegisterMaskPair Pair) {
void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
for (const RegisterMaskPair &P : DeadDefs) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
LaneBitmask BumpedMask = LiveMask | P.LaneMask;
increaseRegPressure(Reg, LiveMask, BumpedMask);
}
for (const RegisterMaskPair &P : DeadDefs) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
LaneBitmask BumpedMask = LiveMask | P.LaneMask;
decreaseRegPressure(Reg, BumpedMask, LiveMask);
@@ -770,7 +772,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
// Kill liveness at live defs.
// TODO: consider earlyclobbers?
for (const RegisterMaskPair &Def : RegOpers.Defs) {
- unsigned Reg = Def.RegUnit;
+ Register Reg = Def.RegUnit;
LaneBitmask PreviousMask = LiveRegs.erase(Def);
LaneBitmask NewMask = PreviousMask & ~Def.LaneMask;
@@ -800,7 +802,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
// Generate liveness for uses.
for (const RegisterMaskPair &Use : RegOpers.Uses) {
- unsigned Reg = Use.RegUnit;
+ Register Reg = Use.RegUnit;
assert(Use.LaneMask.any());
LaneBitmask PreviousMask = LiveRegs.insert(Use);
LaneBitmask NewMask = PreviousMask | Use.LaneMask;
@@ -840,7 +842,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
}
if (TrackUntiedDefs) {
for (const RegisterMaskPair &Def : RegOpers.Defs) {
- unsigned RegUnit = Def.RegUnit;
+ Register RegUnit = Def.RegUnit;
if (Register::isVirtualRegister(RegUnit) &&
(LiveRegs.contains(RegUnit) & Def.LaneMask).none())
UntiedDefs.insert(RegUnit);
@@ -911,7 +913,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
}
for (const RegisterMaskPair &Use : RegOpers.Uses) {
- unsigned Reg = Use.RegUnit;
+ Register Reg = Use.RegUnit;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
LaneBitmask LiveIn = Use.LaneMask & ~LiveMask;
if (LiveIn.any()) {
@@ -1060,7 +1062,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Kill liveness at live defs.
for (const RegisterMaskPair &P : RegOpers.Defs) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
LaneBitmask LiveLanes = LiveRegs.contains(Reg);
LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
LaneBitmask DefLanes = P.LaneMask;
@@ -1069,7 +1071,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
}
// Generate liveness for uses.
for (const RegisterMaskPair &P : RegOpers.Uses) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
LaneBitmask LiveLanes = LiveRegs.contains(Reg);
LaneBitmask LiveAfter = LiveLanes | P.LaneMask;
increaseRegPressure(Reg, LiveLanes, LiveAfter);
@@ -1240,7 +1242,7 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
return LastUseMask;
}
-LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit,
+LaneBitmask RegPressureTracker::getLiveLanesAt(Register RegUnit,
SlotIndex Pos) const {
assert(RequireIntervals);
return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
@@ -1250,7 +1252,7 @@ LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit,
});
}
-LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
+LaneBitmask RegPressureTracker::getLastUsedLanes(Register RegUnit,
SlotIndex Pos) const {
assert(RequireIntervals);
return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit,
@@ -1261,7 +1263,7 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
});
}
-LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit,
+LaneBitmask RegPressureTracker::getLiveThroughAt(Register RegUnit,
SlotIndex Pos) const {
assert(RequireIntervals);
return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
@@ -1294,7 +1296,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
if (RequireIntervals) {
for (const RegisterMaskPair &Use : RegOpers.Uses) {
- unsigned Reg = Use.RegUnit;
+ Register Reg = Use.RegUnit;
LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
if (LastUseMask.none())
continue;
@@ -1317,7 +1319,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
// Generate liveness for defs.
for (const RegisterMaskPair &Def : RegOpers.Defs) {
- unsigned Reg = Def.RegUnit;
+ Register Reg = Def.RegUnit;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
LaneBitmask NewMask = LiveMask | Def.LaneMask;
increaseRegPressure(Reg, LiveMask, NewMask);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
index 41b6de1441d7..a833895c115d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -91,18 +91,18 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
LiveUnits.addLiveOuts(MBB);
// Move internal iterator at the last instruction of the block.
- if (MBB.begin() != MBB.end()) {
+ if (!MBB.empty()) {
MBBI = std::prev(MBB.end());
Tracking = true;
}
}
-void RegScavenger::addRegUnits(BitVector &BV, Register Reg) {
+void RegScavenger::addRegUnits(BitVector &BV, MCRegister Reg) {
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
BV.set(*RUI);
}
-void RegScavenger::removeRegUnits(BitVector &BV, Register Reg) {
+void RegScavenger::removeRegUnits(BitVector &BV, MCRegister Reg) {
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
BV.reset(*RUI);
}
@@ -134,9 +134,9 @@ void RegScavenger::determineKillsAndDefs() {
}
if (!MO.isReg())
continue;
- Register Reg = MO.getReg();
- if (!Register::isPhysicalRegister(Reg) || isReserved(Reg))
+ if (!MO.getReg().isPhysical() || isReserved(MO.getReg()))
continue;
+ MCRegister Reg = MO.getReg().asMCReg();
if (MO.isUse()) {
// Ignore undef uses.
@@ -154,25 +154,6 @@ void RegScavenger::determineKillsAndDefs() {
}
}
-void RegScavenger::unprocess() {
- assert(Tracking && "Cannot unprocess because we're not tracking");
-
- MachineInstr &MI = *MBBI;
- if (!MI.isDebugInstr()) {
- determineKillsAndDefs();
-
- // Commit the changes.
- setUnused(DefRegUnits);
- setUsed(KillRegUnits);
- }
-
- if (MBBI == MBB->begin()) {
- MBBI = MachineBasicBlock::iterator(nullptr);
- Tracking = false;
- } else
- --MBBI;
-}
-
void RegScavenger::forward() {
// Move ptr forward.
if (!Tracking) {
@@ -592,9 +573,8 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
RestoreAfter);
MCPhysReg Reg = P.first;
MachineBasicBlock::iterator SpillBefore = P.second;
- assert(Reg != 0 && "No register left to scavenge!");
// Found an available register?
- if (SpillBefore == MBB.end()) {
+ if (Reg != 0 && SpillBefore == MBB.end()) {
LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI)
<< '\n');
return Reg;
@@ -603,6 +583,8 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
if (!AllowSpill)
return 0;
+ assert(Reg != 0 && "No register left to scavenge!");
+
MachineBasicBlock::iterator ReloadAfter =
RestoreAfter ? std::next(MBBI) : MBBI;
MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
@@ -652,11 +634,10 @@ static Register scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
// we get a single contiguous lifetime.
//
// Definitions in MRI.def_begin() are unordered, search for the first.
- MachineRegisterInfo::def_iterator FirstDef =
- std::find_if(MRI.def_begin(VReg), MRI.def_end(),
- [VReg, &TRI](const MachineOperand &MO) {
- return !MO.getParent()->readsRegister(VReg, &TRI);
- });
+ MachineRegisterInfo::def_iterator FirstDef = llvm::find_if(
+ MRI.def_operands(VReg), [VReg, &TRI](const MachineOperand &MO) {
+ return !MO.getParent()->readsRegister(VReg, &TRI);
+ });
assert(FirstDef != MRI.def_end() &&
"Must have one definition that does not redefine vreg");
MachineInstr &DefMI = *FirstDef->getParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 4ee28d6bbb46..0872ec303460 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -130,7 +130,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
return false;
// Create a new VReg for each class.
- unsigned Reg = LI.reg;
+ unsigned Reg = LI.reg();
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
SmallVector<LiveInterval*, 4> Intervals;
Intervals.push_back(&LI);
@@ -175,7 +175,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes,
// across subranges when they are affected by the same MachineOperand.
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
Classes.grow(NumComponents);
- unsigned Reg = LI.reg;
+ unsigned Reg = LI.reg();
for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
if (!MO.isDef() && !MO.readsReg())
continue;
@@ -212,7 +212,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
const SmallVectorImpl<LiveInterval*> &Intervals) const {
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
- unsigned Reg = Intervals[0]->reg;
+ unsigned Reg = Intervals[0]->reg();
for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
E = MRI->reg_nodbg_end(); I != E; ) {
MachineOperand &MO = *I++;
@@ -242,7 +242,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
break;
}
- unsigned VReg = Intervals[ID]->reg;
+ unsigned VReg = Intervals[ID]->reg();
MO.setReg(VReg);
if (MO.isTied() && Reg != VReg) {
@@ -304,7 +304,7 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
const SlotIndexes &Indexes = *LIS->getSlotIndexes();
for (size_t I = 0, E = Intervals.size(); I < E; ++I) {
LiveInterval &LI = *Intervals[I];
- unsigned Reg = LI.reg;
+ unsigned Reg = LI.reg();
LI.removeEmptySubRanges();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index 55478c232dd7..31797631c97b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -151,7 +151,7 @@ class SafeStack {
Value *getStackGuard(IRBuilder<> &IRB, Function &F);
/// Load stack guard from the frame and check if it has changed.
- void checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
+ void checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI,
AllocaInst *StackGuardSlot, Value *StackGuard);
/// Find all static allocas, dynamic allocas, return instructions and
@@ -160,7 +160,7 @@ class SafeStack {
void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
SmallVectorImpl<AllocaInst *> &DynamicAllocas,
SmallVectorImpl<Argument *> &ByValArguments,
- SmallVectorImpl<ReturnInst *> &Returns,
+ SmallVectorImpl<Instruction *> &Returns,
SmallVectorImpl<Instruction *> &StackRestorePoints);
/// Calculate the allocation size of a given alloca. Returns 0 if the
@@ -168,15 +168,13 @@ class SafeStack {
uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI);
/// Allocate space for all static allocas in \p StaticAllocas,
- /// replace allocas with pointers into the unsafe stack and generate code to
- /// restore the stack pointer before all return instructions in \p Returns.
+ /// replace allocas with pointers into the unsafe stack.
///
/// \returns A pointer to the top of the unsafe stack after all unsafe static
/// allocas are allocated.
Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F,
ArrayRef<AllocaInst *> StaticAllocas,
ArrayRef<Argument *> ByValArguments,
- ArrayRef<ReturnInst *> Returns,
Instruction *BasePointer,
AllocaInst *StackGuardSlot);
@@ -383,7 +381,7 @@ void SafeStack::findInsts(Function &F,
SmallVectorImpl<AllocaInst *> &StaticAllocas,
SmallVectorImpl<AllocaInst *> &DynamicAllocas,
SmallVectorImpl<Argument *> &ByValArguments,
- SmallVectorImpl<ReturnInst *> &Returns,
+ SmallVectorImpl<Instruction *> &Returns,
SmallVectorImpl<Instruction *> &StackRestorePoints) {
for (Instruction &I : instructions(&F)) {
if (auto AI = dyn_cast<AllocaInst>(&I)) {
@@ -401,7 +399,10 @@ void SafeStack::findInsts(Function &F,
DynamicAllocas.push_back(AI);
}
} else if (auto RI = dyn_cast<ReturnInst>(&I)) {
- Returns.push_back(RI);
+ if (CallInst *CI = I.getParent()->getTerminatingMustTailCall())
+ Returns.push_back(CI);
+ else
+ Returns.push_back(RI);
} else if (auto CI = dyn_cast<CallInst>(&I)) {
// setjmps require stack restore.
if (CI->getCalledFunction() && CI->canReturnTwice())
@@ -465,7 +466,7 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
return DynamicTop;
}
-void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
+void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI,
AllocaInst *StackGuardSlot, Value *StackGuard) {
Value *V = IRB.CreateLoad(StackPtrTy, StackGuardSlot);
Value *Cmp = IRB.CreateICmpNE(StackGuard, V);
@@ -490,8 +491,8 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
/// prologue into a local variable and restore it in the epilogue.
Value *SafeStack::moveStaticAllocasToUnsafeStack(
IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas,
- ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns,
- Instruction *BasePointer, AllocaInst *StackGuardSlot) {
+ ArrayRef<Argument *> ByValArguments, Instruction *BasePointer,
+ AllocaInst *StackGuardSlot) {
if (StaticAllocas.empty() && ByValArguments.empty())
return BasePointer;
@@ -759,7 +760,7 @@ bool SafeStack::run() {
SmallVector<AllocaInst *, 16> StaticAllocas;
SmallVector<AllocaInst *, 4> DynamicAllocas;
SmallVector<Argument *, 4> ByValArguments;
- SmallVector<ReturnInst *, 4> Returns;
+ SmallVector<Instruction *, 4> Returns;
// Collect all points where stack gets unwound and needs to be restored
// This is only necessary because the runtime (setjmp and unwind code) is
@@ -788,7 +789,8 @@ bool SafeStack::run() {
// Calls must always have a debug location, or else inlining breaks. So
// we explicitly set a artificial debug location here.
if (DISubprogram *SP = F.getSubprogram())
- IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP));
+ IRB.SetCurrentDebugLocation(
+ DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP));
if (SafeStackUsePointerAddress) {
FunctionCallee Fn = F.getParent()->getOrInsertFunction(
"__safestack_pointer_address", StackPtrTy->getPointerTo(0));
@@ -812,7 +814,7 @@ bool SafeStack::run() {
StackGuardSlot = IRB.CreateAlloca(StackPtrTy, nullptr);
IRB.CreateStore(StackGuard, StackGuardSlot);
- for (ReturnInst *RI : Returns) {
+ for (Instruction *RI : Returns) {
IRBuilder<> IRBRet(RI);
checkStackGuard(IRBRet, F, *RI, StackGuardSlot, StackGuard);
}
@@ -820,9 +822,8 @@ bool SafeStack::run() {
// The top of the unsafe stack after all unsafe static allocas are
// allocated.
- Value *StaticTop =
- moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, ByValArguments,
- Returns, BasePointer, StackGuardSlot);
+ Value *StaticTop = moveStaticAllocasToUnsafeStack(
+ IRB, F, StaticAllocas, ByValArguments, BasePointer, StackGuardSlot);
// Safe stack object that stores the current unsafe stack top. It is updated
// as unsafe dynamic (non-constant-sized) allocas are allocated and freed.
@@ -838,7 +839,7 @@ bool SafeStack::run() {
DynamicAllocas);
// Restore the unsafe stack pointer before each return.
- for (ReturnInst *RI : Returns) {
+ for (Instruction *RI : Returns) {
IRB.SetInsertPoint(RI);
IRB.CreateStore(BasePointer, UnsafeStackPtr);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
index c823454f825c..5d61b3a146b4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "SafeStackLayout.h"
-#include "llvm/Analysis/StackLifetime.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -141,10 +140,10 @@ void StackLayout::computeLayout() {
// Sort objects by size (largest first) to reduce fragmentation.
if (StackObjects.size() > 2)
- std::stable_sort(StackObjects.begin() + 1, StackObjects.end(),
- [](const StackObject &a, const StackObject &b) {
- return a.Size > b.Size;
- });
+ llvm::stable_sort(drop_begin(StackObjects),
+ [](const StackObject &a, const StackObject &b) {
+ return a.Size > b.Size;
+ });
for (auto &Obj : StackObjects)
layoutObject(Obj);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 10da2d421797..5899da777fe9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -154,7 +154,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
} else if (const Value *V = MMO->getValue()) {
SmallVector<Value *, 4> Objs;
- if (!getUnderlyingObjectsForCodeGen(V, Objs, DL))
+ if (!getUnderlyingObjectsForCodeGen(V, Objs))
return false;
for (Value *V : Objs) {
@@ -199,7 +199,10 @@ void ScheduleDAGInstrs::exitRegion() {
}
void ScheduleDAGInstrs::addSchedBarrierDeps() {
- MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
+ MachineInstr *ExitMI =
+ RegionEnd != BB->end()
+ ? &*skipDebugInstructionsBackward(RegionEnd, RegionBegin)
+ : nullptr;
ExitSU.setInstr(ExitMI);
// Add dependencies on the defs and uses of the instruction.
if (ExitMI) {
@@ -241,8 +244,6 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
!DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
- if (!Uses.contains(*Alias))
- continue;
for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
SUnit *UseSU = I->SU;
if (UseSU == SU)
@@ -513,6 +514,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
+ assert(!MI->isDebugInstr());
+
const MachineOperand &MO = MI->getOperand(OperIdx);
Register Reg = MO.getReg();
@@ -804,7 +807,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
DbgMI = nullptr;
}
- if (MI.isDebugValue()) {
+ if (MI.isDebugValue() || MI.isDebugRef()) {
DbgMI = &MI;
continue;
}
@@ -1184,7 +1187,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
else if (SU == &ExitSU)
oss << "<exit>";
else
- SU->getInstr()->print(oss, /*SkipOpers=*/true);
+ SU->getInstr()->print(oss, /*IsStandalone=*/true);
return oss.str();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index a113c30f851b..05b2a3764cca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -35,7 +35,7 @@ namespace llvm {
return true;
}
- static bool isNodeHidden(const SUnit *Node) {
+ static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
return (Node->NumPreds > 10 || Node->NumSuccs > 10);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ec384d2a7c56..7f2add81e80d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24,12 +24,14 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -410,9 +412,11 @@ namespace {
SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitADDCARRY(SDNode *N);
+ SDValue visitSADDO_CARRY(SDNode *N);
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitSUBCARRY(SDNode *N);
+ SDValue visitSSUBO_CARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
@@ -464,6 +468,7 @@ namespace {
SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
+ SDValue visitSTRICT_FADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
SDValue visitFMA(SDNode *N);
@@ -539,6 +544,7 @@ namespace {
SDValue convertSelectOfFPConstantsToLoadOffset(
const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC);
+ SDValue foldSignChangeInBitcast(SDNode *N);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
@@ -586,7 +592,7 @@ namespace {
const SDLoc &DL);
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
- SDValue MatchStoreCombine(StoreSDNode *N);
+ SDValue mergeTruncStores(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -641,14 +647,18 @@ namespace {
// Classify the origin of a stored value.
enum class StoreSource { Unknown, Constant, Extract, Load };
StoreSource getStoreSource(SDValue StoreVal) {
- if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal))
+ switch (StoreVal.getOpcode()) {
+ case ISD::Constant:
+ case ISD::ConstantFP:
return StoreSource::Constant;
- if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::EXTRACT_SUBVECTOR:
return StoreSource::Extract;
- if (isa<LoadSDNode>(StoreVal))
+ case ISD::LOAD:
return StoreSource::Load;
- return StoreSource::Unknown;
+ default:
+ return StoreSource::Unknown;
+ }
}
/// This is a helper function for visitMUL to check the profitability
@@ -752,9 +762,7 @@ namespace {
/// is legal or custom before legalizing operations, and whether is
/// legal (but not custom) after legalization.
bool hasOperation(unsigned Opcode, EVT VT) {
- if (LegalOperations)
- return TLI.isOperationLegal(Opcode, VT);
- return TLI.isOperationLegalOrCustom(Opcode, VT);
+ return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
}
public:
@@ -924,23 +932,40 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-// Returns the SDNode if it is a constant float BuildVector
-// or constant float.
-static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
- if (isa<ConstantFPSDNode>(N))
- return N.getNode();
- if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
- return N.getNode();
- return nullptr;
+static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
+ if (!ScalarTy.isSimple())
+ return false;
+
+ uint64_t MaskForTy = 0ULL;
+ switch (ScalarTy.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ MaskForTy = 0xFFULL;
+ break;
+ case MVT::i16:
+ MaskForTy = 0xFFFFULL;
+ break;
+ case MVT::i32:
+ MaskForTy = 0xFFFFFFFFULL;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ APInt Val;
+ if (ISD::isConstantSplatVector(N, Val))
+ return Val.getLimitedValue() == MaskForTy;
+
+ return false;
}
-// Determines if it is a constant integer or a build vector of constant
+// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
// Do not permit build vector implicit truncation.
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
- if (N.getOpcode() != ISD::BUILD_VECTOR)
+ if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
return false;
unsigned BitWidth = N.getScalarValueSizeInBits();
for (const SDValue &Op : N->op_values()) {
@@ -1554,9 +1579,15 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
DAG.ReplaceAllUsesWith(N, &RV);
}
- // Push the new node and any users onto the worklist
- AddToWorklist(RV.getNode());
- AddUsersToWorklist(RV.getNode());
+ // Push the new node and any users onto the worklist. Omit this if the
+ // new node is the EntryToken (e.g. if a store managed to get optimized
+ // out), because re-visiting the EntryToken and its users will not uncover
+ // any additional opportunities, but there may be a large number of such
+ // users, potentially causing compile time explosion.
+ if (RV.getOpcode() != ISD::EntryToken) {
+ AddToWorklist(RV.getNode());
+ AddUsersToWorklist(RV.getNode());
+ }
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
@@ -1589,8 +1620,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::ADDCARRY: return visitADDCARRY(N);
+ case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -1646,6 +1679,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
+ case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA(N);
@@ -1805,6 +1839,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
+ // Don't simplify the token factor if the node itself has too many operands.
+ if (N->getNumOperands() > TokenFactorInlineLimit)
+ return SDValue();
+
// If the sole user is a token factor, we should make sure we have a
// chance to merge them together. This prevents TF chains from inhibiting
// optimizations.
@@ -1890,7 +1928,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
// If this is an Op, we can remove the op from the list. Remark any
// search associated with it as from the current OpNumber.
- if (SeenOps.count(Op) != 0) {
+ if (SeenOps.contains(Op)) {
Changed = true;
DidPruneOps = true;
unsigned OrigOpNumber = 0;
@@ -2002,6 +2040,62 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
+/// Return true if 'Use' is a load or a store that uses N as its base pointer
+/// and that N may be folded in the load / store addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ unsigned AS;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+ VT.getTypeForEVT(*DAG.getContext()), AS);
+}
+
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
"Unexpected binary operator");
@@ -2021,12 +2115,12 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
SDValue CT = Sel.getOperand(1);
if (!isConstantOrConstantVector(CT, true) &&
- !isConstantFPBuildVectorOrConstantFP(CT))
+ !DAG.isConstantFPBuildVectorOrConstantFP(CT))
return SDValue();
SDValue CF = Sel.getOperand(2);
if (!isConstantOrConstantVector(CF, true) &&
- !isConstantFPBuildVectorOrConstantFP(CF))
+ !DAG.isConstantFPBuildVectorOrConstantFP(CF))
return SDValue();
// Bail out if any constants are opaque because we can't constant fold those.
@@ -2043,19 +2137,10 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!CanFoldNonConst &&
!isConstantOrConstantVector(CBO, true) &&
- !isConstantFPBuildVectorOrConstantFP(CBO))
+ !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
return SDValue();
- EVT VT = Sel.getValueType();
-
- // In case of shift value and shift amount may have different VT. For instance
- // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
- // swapped operands and value types do not match. NB: x86 is fine if operands
- // are not swapped with shift amount VT being not bigger than shifted value.
- // TODO: that is possible to check for a shift operation, correct VTs and
- // still perform optimization on x86 if needed.
- if (SelOpNo && VT != CBO.getValueType())
- return SDValue();
+ EVT VT = BO->getValueType(0);
// We have a select-of-constants followed by a binary operator with a
// constant. Eliminate the binop by pulling the constant math into the select.
@@ -2065,14 +2150,14 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
: DAG.getNode(BinOpcode, DL, VT, CT, CBO);
if (!CanFoldNonConst && !NewCT.isUndef() &&
!isConstantOrConstantVector(NewCT, true) &&
- !isConstantFPBuildVectorOrConstantFP(NewCT))
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
return SDValue();
SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
: DAG.getNode(BinOpcode, DL, VT, CF, CBO);
if (!CanFoldNonConst && !NewCF.isUndef() &&
!isConstantOrConstantVector(NewCF, true) &&
- !isConstantFPBuildVectorOrConstantFP(NewCF))
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
@@ -2402,8 +2487,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
- APInt C0 = N0->getConstantOperandAPInt(0);
- APInt C1 = N1->getConstantOperandAPInt(0);
+ const APInt &C0 = N0->getConstantOperandAPInt(0);
+ const APInt &C1 = N1->getConstantOperandAPInt(0);
return DAG.getVScale(DL, VT, C0 + C1);
}
@@ -2411,9 +2496,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if ((N0.getOpcode() == ISD::ADD) &&
(N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
(N1.getOpcode() == ISD::VSCALE)) {
- auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
- auto VS1 = N1->getConstantOperandAPInt(0);
- auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
+ const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
+ const APInt &VS1 = N1->getConstantOperandAPInt(0);
+ SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
}
@@ -2631,36 +2716,18 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
return SDValue();
}
-static SDValue flipBoolean(SDValue V, const SDLoc &DL,
- SelectionDAG &DAG, const TargetLowering &TLI) {
- EVT VT = V.getValueType();
-
- SDValue Cst;
- switch (TLI.getBooleanContents(VT)) {
- case TargetLowering::ZeroOrOneBooleanContent:
- case TargetLowering::UndefinedBooleanContent:
- Cst = DAG.getConstant(1, DL, VT);
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- Cst = DAG.getAllOnesConstant(DL, VT);
- break;
- }
-
- return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
-}
-
/**
* Flips a boolean if it is cheaper to compute. If the Force parameters is set,
* then the flip also occurs if computing the inverse is the same cost.
* This function returns an empty SDValue in case it cannot flip the boolean
* without increasing the cost of the computation. If you want to flip a boolean
- * no matter what, use flipBoolean.
+ * no matter what, use DAG.getLogicalNOT.
*/
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
const TargetLowering &TLI,
bool Force) {
if (Force && isa<ConstantSDNode>(V))
- return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
if (V.getOpcode() != ISD::XOR)
return SDValue();
@@ -2687,7 +2754,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
if (IsFlip)
return V.getOperand(0);
if (Force)
- return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
return SDValue();
}
@@ -2724,8 +2791,8 @@ SDValue DAGCombiner::visitADDO(SDNode *N) {
if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
DAG.getConstant(0, DL, VT), N0.getOperand(0));
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ return CombineTo(
+ N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
if (SDValue Combined = visitUADDOLike(N0, N1, N))
@@ -2820,6 +2887,28 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (saddo_carry x, y, false) -> (saddo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
/**
* If we are facing some sort of diamond carry propapagtion pattern try to
* break it up to generate something like:
@@ -3005,8 +3094,8 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDLoc DL(N);
SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
N0.getOperand(0), NotC);
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ return CombineTo(
+ N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
// Iff the flag result is dead:
@@ -3111,6 +3200,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// 0 - X --> X if X is 0 or the minimum signed value.
return N1;
}
+
+ // Convert 0 - abs(x).
+ SDValue Result;
+ if (N1->getOpcode() == ISD::ABS &&
+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
+ TLI.expandABS(N1.getNode(), Result, DAG, true))
+ return Result;
}
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
@@ -3306,12 +3402,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
SDValue S0 = N1.getOperand(0);
- if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
- unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
- if (C->getAPIntValue() == (OpSizeInBits - 1))
+ if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
- }
}
}
@@ -3342,7 +3436,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
if (N1.getOpcode() == ISD::VSCALE) {
- APInt IntVal = N1.getConstantOperandAPInt(0);
+ const APInt &IntVal = N1.getConstantOperandAPInt(0);
return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
}
@@ -3501,6 +3595,21 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (ssubo_carry x, y, false) -> (ssubo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
+ return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
// UMULFIXSAT here.
SDValue DAGCombiner::visitMULFIX(SDNode *N) {
@@ -3606,19 +3715,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
getShiftAmountTy(N0.getValueType()))));
}
- // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
+ // Try to transform:
+ // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
// mul x, (2^N + 1) --> add (shl x, N), x
// mul x, (2^N - 1) --> sub (shl x, N), x
// Examples: x * 33 --> (x << 5) + x
// x * 15 --> (x << 4) - x
// x * -33 --> -((x << 5) + x)
// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
+ // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
+ // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
+ // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
+ // Examples: x * 0x8800 --> (x << 15) + (x << 11)
+ // x * 0xf800 --> (x << 16) - (x << 11)
+ // x * -0x8800 --> -((x << 15) + (x << 11))
+ // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
unsigned MathOp = ISD::DELETED_NODE;
APInt MulC = ConstValue1.abs();
+ // The constant `2` should be treated as (2^0 + 1).
+ unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
+ MulC.lshrInPlace(TZeros);
if ((MulC - 1).isPowerOf2())
MathOp = ISD::ADD;
else if ((MulC + 1).isPowerOf2())
@@ -3627,12 +3747,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (MathOp != ISD::DELETED_NODE) {
unsigned ShAmt =
MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
+ ShAmt += TZeros;
assert(ShAmt < VT.getScalarSizeInBits() &&
"multiply-by-constant generated out of bounds shift");
SDLoc DL(N);
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
- SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
+ SDValue R =
+ TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
+ DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(TZeros, DL, VT)))
+ : DAG.getNode(MathOp, DL, VT, Shl, N0);
if (ConstValue1.isNegative())
R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
return R;
@@ -3684,11 +3809,42 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
- APInt C0 = N0.getConstantOperandAPInt(0);
- APInt C1 = NC1->getAPIntValue();
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = NC1->getAPIntValue();
return DAG.getVScale(SDLoc(N), VT, C0 * C1);
}
+ // Fold ((mul x, 0/undef) -> 0,
+ // (mul x, 1) -> x) -> x)
+ // -> and(x, mask)
+ // We can replace vectors with '0' and '1' factors with a clearing mask.
+ if (VT.isFixedLengthVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallBitVector ClearMask;
+ ClearMask.reserve(NumElts);
+ auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
+ if (!V || V->isNullValue()) {
+ ClearMask.push_back(true);
+ return true;
+ }
+ ClearMask.push_back(false);
+ return V->isOne();
+ };
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
+ ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
+ assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
+ SDLoc DL(N);
+ EVT LegalSVT = N1.getOperand(0).getValueType();
+ SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
+ SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (ClearMask[I])
+ Mask[I] = Zero;
+ return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
+ }
+ }
+
// reassociate mul
if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
@@ -4108,9 +4264,9 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
- SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
if (DAG.isKnownToBeAPowerOfTwo(N1)) {
// fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
@@ -4118,6 +4274,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (N1.getOpcode() == ISD::SHL &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
@@ -4186,7 +4343,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
- if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
+ !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4242,7 +4400,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
- if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
+ !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4448,6 +4607,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
}
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4916,8 +5079,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
if (!LDST->isSimple())
return false;
+ EVT LdStMemVT = LDST->getMemoryVT();
+
+ // Bail out when changing the scalable property, since we can't be sure that
+ // we're actually narrowing here.
+ if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
+ return false;
+
// Verify that we are actually reducing a load width here.
- if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
+ if (LdStMemVT.bitsLT(MemVT))
return false;
// Ensure that this isn't going to produce an unsupported memory access.
@@ -5272,6 +5442,31 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return N1;
if (ISD::isBuildVectorAllOnes(N1.getNode()))
return N0;
+
+ // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
+ auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
+ auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
+ if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ EVT LoadVT = MLoad->getMemoryVT();
+ EVT ExtVT = VT;
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
+ // For this AND to be a zero extension of the masked load the elements
+ // of the BuildVec must mask the bottom bits of the extended element
+ // type
+ if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
+ uint64_t ElementSize =
+ LoadVT.getVectorElementType().getScalarSizeInBits();
+ if (Splat->getAPIntValue().isMask(ElementSize)) {
+ return DAG.getMaskedLoad(
+ ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
+ MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
+ LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
+ ISD::ZEXTLOAD, MLoad->isExpandingLoad());
+ }
+ }
+ }
+ }
}
// fold (and c1, c2) -> c1&c2
@@ -5440,6 +5635,28 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ // fold (and (masked_gather x)) -> (zext_masked_gather x)
+ if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
+ EVT MemVT = GN0->getMemoryVT();
+ EVT ScalarVT = MemVT.getScalarType();
+
+ if (SDValue(GN0, 0).hasOneUse() &&
+ isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
+ GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
+
+ SDValue ZExtLoad = DAG.getMaskedGather(
+ DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
+ GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
+
+ CombineTo(N, ZExtLoad);
+ AddToWorklist(ZExtLoad.getNode());
+ // Avoid recheck of N.
+ return SDValue(N, 0);
+ }
+ }
+
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
@@ -5534,6 +5751,31 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
return V;
+ // Recognize the following pattern:
+ //
+ // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
+ //
+ // where bitmask is a mask that clears the upper bits of AndVT. The
+ // number of bits in bitmask must be a power of two.
+ auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
+ if (LHS->getOpcode() != ISD::SIGN_EXTEND)
+ return false;
+
+ auto *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C)
+ return false;
+
+ if (!C->getAPIntValue().isMask(
+ LHS.getOperand(0).getValueType().getFixedSizeInBits()))
+ return false;
+
+ return true;
+ };
+
+ // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
+ if (IsAndZeroExtMask(N0, N1))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+
return SDValue();
}
@@ -6275,8 +6517,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
+//
+// The IsRotate flag should be set when the LHS of both shifts is the same.
+// Otherwise if matching a general funnel shift, it should be clear.
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG, bool IsRotate) {
// If EltSize is a power of 2 then:
//
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -6308,8 +6553,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// always invokes undefined behavior for 32-bit X.
//
// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+ //
+ // NOTE: We can only do this when matching an AND and not a general
+ // funnel shift.
unsigned MaskLoBits = 0;
- if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+ if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
unsigned Bits = Log2_64(EltSize);
@@ -6399,7 +6647,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
+ /*IsRotate*/ true)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg);
@@ -6428,7 +6677,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// fold (or (shl x0, (*ext (sub 32, y))),
// (srl x1, (*ext y))) ->
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
- if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
+ if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
HasPos ? Pos : Neg);
@@ -6782,11 +7031,11 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
return None;
}
-static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
+static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
return i;
}
-static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
+static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
return BW - i - 1;
}
@@ -6803,8 +7052,8 @@ static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
bool BigEndian = true, LittleEndian = true;
for (unsigned i = 0; i < Width; i++) {
int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
- LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
- BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
+ LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
+ BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
if (!BigEndian && !LittleEndian)
return None;
}
@@ -6847,80 +7096,98 @@ static SDValue stripTruncAndExt(SDValue Value) {
/// p[3] = (val >> 0) & 0xFF;
/// =>
/// *((i32)p) = BSWAP(val);
-SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
- // Collect all the stores in the chain.
- SDValue Chain;
- SmallVector<StoreSDNode *, 8> Stores;
- for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
- // TODO: Allow unordered atomics when wider type is legal (see D66309)
- if (Store->getMemoryVT() != MVT::i8 ||
- !Store->isSimple() || Store->isIndexed())
+SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
+ // The matching looks for "store (trunc x)" patterns that appear early but are
+ // likely to be replaced by truncating store nodes during combining.
+ // TODO: If there is evidence that running this later would help, this
+ // limitation could be removed. Legality checks may need to be added
+ // for the created store and optional bswap/rotate.
+ if (LegalOperations)
+ return SDValue();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ // TODO: Allow unordered atomics when wider type is legal (see D66309)
+ EVT MemVT = N->getMemoryVT();
+ if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
+ !N->isSimple() || N->isIndexed())
+ return SDValue();
+
+ // Collect all of the stores in the chain.
+ SDValue Chain = N->getChain();
+ SmallVector<StoreSDNode *, 8> Stores = {N};
+ while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
+ // All stores must be the same size to ensure that we are writing all of the
+ // bytes in the wide value.
+ // TODO: We could allow multiple sizes by tracking each stored byte.
+ if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
+ Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
}
- // Handle the simple type only.
- unsigned Width = Stores.size();
- EVT VT = EVT::getIntegerVT(
- *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ // There is no reason to continue if we do not have at least a pair of stores.
+ if (Stores.size() < 2)
return SDValue();
- if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
+ // Handle simple types only.
+ LLVMContext &Context = *DAG.getContext();
+ unsigned NumStores = Stores.size();
+ unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
+ unsigned WideNumBits = NumStores * NarrowNumBits;
+ EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
+ if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
return SDValue();
- // Check if all the bytes of the combined value we are looking at are stored
- // to the same base address. Collect bytes offsets from Base address into
- // ByteOffsets.
- SDValue CombinedValue;
- SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
+ // Check if all bytes of the source value that we are looking at are stored
+ // to the same base address. Collect offsets from Base address into OffsetMap.
+ SDValue SourceValue;
+ SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
Optional<BaseIndexOffset> Base;
for (auto Store : Stores) {
- // All the stores store different byte of the CombinedValue. A truncate is
- // required to get that byte value.
+ // All the stores store different parts of the CombinedValue. A truncate is
+ // required to get the partial value.
SDValue Trunc = Store->getValue();
if (Trunc.getOpcode() != ISD::TRUNCATE)
return SDValue();
- // A shift operation is required to get the right byte offset, except the
- // first byte.
+ // Other than the first/last part, a shift operation is required to get the
+ // offset.
int64_t Offset = 0;
- SDValue Value = Trunc.getOperand(0);
- if (Value.getOpcode() == ISD::SRL ||
- Value.getOpcode() == ISD::SRA) {
- auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
- // Trying to match the following pattern. The shift offset must be
- // a constant and a multiple of 8. It is the byte offset in "y".
+ SDValue WideVal = Trunc.getOperand(0);
+ if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
+ isa<ConstantSDNode>(WideVal.getOperand(1))) {
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
//
- // x = srl y, offset
+ // x = srl y, ShiftAmtC
// i8 z = trunc x
// store z, ...
- if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
+ uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
+ if (ShiftAmtC % NarrowNumBits != 0)
return SDValue();
- Offset = ShiftOffset->getSExtValue()/8;
- Value = Value.getOperand(0);
+ Offset = ShiftAmtC / NarrowNumBits;
+ WideVal = WideVal.getOperand(0);
}
- // Stores must share the same combined value with different offsets.
- if (!CombinedValue)
- CombinedValue = Value;
- else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
+ // Stores must share the same source value with different offsets.
+ // Truncate and extends should be stripped to get the single source value.
+ if (!SourceValue)
+ SourceValue = WideVal;
+ else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
return SDValue();
-
- // The trunc and all the extend operation should be stripped to get the
- // real value we are stored.
- else if (CombinedValue.getValueType() != VT) {
- if (Value.getValueType() == VT ||
- Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
- CombinedValue = Value;
- // Give up if the combined value type is smaller than the store size.
- if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
+ else if (SourceValue.getValueType() != WideVT) {
+ if (WideVal.getValueType() == WideVT ||
+ WideVal.getScalarValueSizeInBits() >
+ SourceValue.getScalarValueSizeInBits())
+ SourceValue = WideVal;
+ // Give up if the source value type is smaller than the store size.
+ if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
return SDValue();
}
- // Stores must share the same base address
+ // Stores must share the same base address.
BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
@@ -6928,60 +7195,78 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
return SDValue();
- // Remember the first byte store
+ // Remember the first store.
if (ByteOffsetFromBase < FirstOffset) {
FirstStore = Store;
FirstOffset = ByteOffsetFromBase;
}
// Map the offset in the store and the offset in the combined value, and
// early return if it has been set before.
- if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
+ if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
return SDValue();
- ByteOffsets[Offset] = ByteOffsetFromBase;
+ OffsetMap[Offset] = ByteOffsetFromBase;
}
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
assert(FirstStore && "First store must be set");
- // Check if the bytes of the combined value we are looking at match with
- // either big or little endian value store.
- Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
- if (!IsBigEndian.hasValue())
- return SDValue();
-
- // The node we are looking at matches with the pattern, check if we can
- // replace it with a single bswap if needed and store.
-
- // If the store needs byte swap check if the target supports it
- bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
-
- // Before legalize we can introduce illegal bswaps which will be later
- // converted to an explicit bswap sequence. This way we end up with a single
- // store and byte shuffling instead of several stores and byte shuffling.
- if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
- return SDValue();
-
// Check that a store of the wide type is both allowed and fast on the target
+ const DataLayout &Layout = DAG.getDataLayout();
bool Fast = false;
- bool Allowed =
- TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- *FirstStore->getMemOperand(), &Fast);
+ bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
+ *FirstStore->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
- if (VT != CombinedValue.getValueType()) {
- assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
- "Get unexpected store value to combine");
- CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
- CombinedValue);
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStores; ++i)
+ if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
+ if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(Layout.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
+ NeedRotate = true;
+ else
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ if (WideVT != SourceValue.getValueType()) {
+ assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
+ "Unexpected store value to merge");
+ SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
}
- if (NeedsBswap)
- CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
+ // Before legalize we can introduce illegal bswaps/rotates which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // store and byte shuffling instead of several stores and byte shuffling.
+ if (NeedBswap) {
+ SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
+ } else if (NeedRotate) {
+ assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
+ SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
+ SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
+ }
SDValue NewStore =
- DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
- FirstStore->getPointerInfo(), FirstStore->getAlignment());
+ DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
+ FirstStore->getPointerInfo(), FirstStore->getAlign());
// Rely on other DAG combine rules to remove the other individual stores.
DAG.ReplaceAllUsesWith(N, NewStore.getNode());
@@ -7036,8 +7321,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
"can only analyze providers for individual bytes not bit");
unsigned LoadByteWidth = LoadBitWidth / 8;
return IsBigEndianTarget
- ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
- : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
+ : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
};
Optional<BaseIndexOffset> Base;
@@ -7164,10 +7449,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
if (!Allowed || !Fast)
return SDValue();
- SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
- SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), MemVT,
- FirstLoad->getAlignment());
+ SDValue NewLoad =
+ DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
+ Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
// Transfer chain users from old loads to the new load.
for (LoadSDNode *L : Loads)
@@ -7337,9 +7622,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (N0.hasOneUse()) {
// FIXME Can we handle multiple uses? Could we token factor the chain
// results from the new/old setcc?
- SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
- N0.getOperand(0),
- N0Opcode == ISD::STRICT_FSETCCS);
+ SDValue SetCC =
+ DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
+ N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
CombineTo(N, SetCC);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
recursivelyDeleteUnusedNodes(N0.getNode());
@@ -7440,12 +7725,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
SDValue S0 = S.getOperand(0);
- if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
- unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
- if (C->getAPIntValue() == (OpSizeInBits - 1))
+ if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
return DAG.getNode(ISD::ABS, DL, VT, S0);
- }
}
}
@@ -7980,10 +8263,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
- auto DL = SDLoc(N);
- APInt C0 = N0.getConstantOperandAPInt(0);
- APInt C1 = NC1->getAPIntValue();
- return DAG.getVScale(DL, VT, C0 << C1);
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = NC1->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, C0 << C1);
}
return SDValue();
@@ -8032,12 +8314,6 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
if (NarrowVT != RightOp.getOperand(0).getValueType())
return SDValue();
- // Only transform into mulh if mulh for the narrow type is cheaper than
- // a multiply followed by a shift. This should also check if mulh is
- // legal for NarrowVT on the target.
- if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
- return SDValue();
-
// Proceed with the transformation if the wide type is twice as large
// as the narrow type.
unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
@@ -8055,6 +8331,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// we use mulhs. Othewise, zero extends (zext) use mulhu.
unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
+ // Combine to mulh if mulh is legal/custom for the narrow type on the target.
+ if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
+ return SDValue();
+
SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
RightOp.getOperand(0));
return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
@@ -8556,8 +8836,8 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
RHS->getAddressSpace(), NewAlign,
RHS->getMemOperand()->getFlags(), &Fast) &&
Fast) {
- SDValue NewPtr =
- DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(
+ RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
AddToWorklist(NewPtr.getNode());
SDValue Load = DAG.getLoad(
VT, DL, RHS->getChain(), NewPtr,
@@ -9154,16 +9434,75 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
+bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
+ if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
+ return false;
+
+ // For now we check only the LHS of the add.
+ SDValue LHS = Index.getOperand(0);
+ SDValue SplatVal = DAG.getSplatValue(LHS);
+ if (!SplatVal)
+ return false;
+
+ BasePtr = SplatVal;
+ Index = Index.getOperand(1);
+ return true;
+}
+
+// Fold sext/zext of index into index type.
+bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
+ bool Scaled, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (Index.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue Op = Index.getOperand(0);
+ MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ Index = Op;
+ return true;
+ }
+ }
+
+ if (Index.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Op = Index.getOperand(0);
+ MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ Index = Op;
+ return true;
+ }
+ }
+
+ return false;
+}
+
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
SDValue Chain = MSC->getChain();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ SDValue StoreVal = MSC->getValue();
+ SDValue BasePtr = MSC->getBasePtr();
SDLoc DL(N);
// Zap scatters with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ if (refineUniformBase(BasePtr, Index, DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(
+ DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ }
+
+ if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(
+ DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ }
+
return SDValue();
}
@@ -9177,6 +9516,14 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ // If this is a masked load with an all ones mask, we can use a unmasked load.
+ // FIXME: Can we do this for indexed, compressing, or truncating stores?
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
+ MST->isUnindexed() && !MST->isCompressingStore() &&
+ !MST->isTruncatingStore())
+ return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
+ MST->getBasePtr(), MST->getMemOperand());
+
// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
@@ -9187,11 +9534,32 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
+ SDValue Chain = MGT->getChain();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ SDValue PassThru = MGT->getPassThru();
+ SDValue BasePtr = MGT->getBasePtr();
SDLoc DL(N);
// Zap gathers with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
- return CombineTo(N, MGT->getPassThru(), MGT->getChain());
+ return CombineTo(N, PassThru, MGT->getChain());
+
+ if (refineUniformBase(BasePtr, Index, DAG)) {
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
+ PassThru.getValueType(), DL, Ops,
+ MGT->getMemOperand(), MGT->getIndexType(),
+ MGT->getExtensionType());
+ }
+
+ if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
+ PassThru.getValueType(), DL, Ops,
+ MGT->getMemOperand(), MGT->getIndexType(),
+ MGT->getExtensionType());
+ }
return SDValue();
}
@@ -9205,6 +9573,16 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+ // If this is a masked load with an all ones mask, we can use a unmasked load.
+ // FIXME: Can we do this for indexed, expanding, or extending loads?
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
+ MLD->isUnindexed() && !MLD->isExpandingLoad() &&
+ MLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
+ MLD->getBasePtr(), MLD->getMemOperand());
+ return CombineTo(N, NewLd, NewLd.getValue(1));
+ }
+
// Try transforming N to an indexed load.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
@@ -9364,6 +9742,113 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
}
}
+
+ // Match VSELECTs into add with unsigned saturation.
+ if (hasOperation(ISD::UADDSAT, VT)) {
+ // Check if one of the arms of the VSELECT is vector with all bits set.
+ // If it's on the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ ISD::CondCode SatCC = CC;
+ if (ISD::isBuildVectorAllOnes(N1.getNode())) {
+ Other = N2;
+ SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
+ } else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
+ Other = N1;
+ }
+
+ if (Other && Other.getOpcode() == ISD::ADD) {
+ SDValue CondLHS = LHS, CondRHS = RHS;
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ // Canonicalize condition operands.
+ if (SatCC == ISD::SETUGE) {
+ std::swap(CondLHS, CondRHS);
+ SatCC = ISD::SETULE;
+ }
+
+ // We can test against either of the addition operands.
+ // x <= x+y ? x+y : ~0 --> uaddsat x, y
+ // x+y >= x ? x+y : ~0 --> uaddsat x, y
+ if (SatCC == ISD::SETULE && Other == CondRHS &&
+ (OpLHS == CondLHS || OpRHS == CondLHS))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+
+ if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
+ CondLHS == OpLHS) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x >= ~C ? x+C : ~0 --> uaddsat x, C
+ auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return Cond->getAPIntValue() == ~Op->getAPIntValue();
+ };
+ if (SatCC == ISD::SETULE &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+
+ // Match VSELECTs into sub with unsigned saturation.
+ if (hasOperation(ISD::USUBSAT, VT)) {
+ // Check if one of the arms of the VSELECT is a zero vector. If it's on
+ // the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ ISD::CondCode SatCC = CC;
+ if (ISD::isBuildVectorAllZeros(N1.getNode())) {
+ Other = N2;
+ SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
+ } else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
+ Other = N1;
+ }
+
+ if (Other && Other.getNumOperands() == 2 && Other.getOperand(0) == LHS) {
+ SDValue CondRHS = RHS;
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ // Look for a general sub with unsigned saturation first.
+ // x >= y ? x-y : 0 --> usubsat x, y
+ // x > y ? x-y : 0 --> usubsat x, y
+ if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
+ Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+
+ if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
+ if (isa<BuildVectorSDNode>(CondRHS)) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x > C-1 ? x+-C : 0 --> usubsat x, C
+ auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return (!Op && !Cond) ||
+ (Op && Cond &&
+ Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
+ };
+ if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
+ /*AllowUndefs*/ true)) {
+ OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ OpRHS);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+ }
+
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use computeKnownBits to determine
+ // whether it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> usubsat x, C
+ if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
+ if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+ OpRHSConst->getAPIntValue().isSignMask()) {
+ // Note that we have to rebuild the RHS constant here to ensure
+ // we don't rely on particular values of undef lanes.
+ OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+ }
+ }
+ }
}
if (SimplifySelectOps(N, N1, N2))
@@ -9722,14 +10207,14 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue BasePtr = LN0->getBasePtr();
for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
const unsigned Offset = Idx * Stride;
- const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+ const Align Align = commonAlignment(LN0->getAlign(), Offset);
SDValue SplitLoad = DAG.getExtLoad(
ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
+ BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
@@ -10146,7 +10631,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- EVT N00VT = N0.getOperand(0).getValueType();
+ EVT N00VT = N00.getValueType();
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
@@ -10240,6 +10725,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
+ // fold sext (not i1 X) -> add (zext i1 X), -1
+ // TODO: This could be extended to handle bool vectors.
+ if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
+ (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
+ TLI.isOperationLegal(ISD::ADD, VT)))) {
+ // If we can eliminate the 'not', the sext form should be better
+ if (SDValue NewXor = visitXOR(N0.getNode())) {
+ // Returning N0 is a form of in-visit replacement that may have
+ // invalidated N0.
+ if (NewXor.getNode() == N0.getNode()) {
+ // Return SDValue here as the xor should have already been replaced in
+ // this sext.
+ return SDValue();
+ } else {
+ // Return a new sext with the new xor.
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
+ }
+ }
+
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+ }
+
return SDValue();
}
@@ -10507,13 +11015,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getValueType());
}
- // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
SDLoc DL(N);
+ EVT N0VT = N0.getValueType();
+ EVT N00VT = N0.getOperand(0).getValueType();
if (SDValue SCC = SimplifySelectCC(
- DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
- DAG.getConstant(0, DL, VT),
+ DL, N0.getOperand(0), N0.getOperand(1),
+ DAG.getBoolConstant(true, DL, N0VT, N00VT),
+ DAG.getBoolConstant(false, DL, N0VT, N00VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
- return SCC;
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
}
// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
@@ -10602,22 +11113,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (load x)) -> (aext (truncate (extload x)))
// None of the supported targets knows how to perform load and any_ext
- // on vectors in one instruction. We only perform this transformation on
- // scalars.
- if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
- ISD::isUNINDEXEDLoad(N0.getNode()) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ // on vectors in one instruction, so attempt to fold to zext instead.
+ if (VT.isVector()) {
+ // Try to simplify (zext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
+ return foldedExt;
+ } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
bool DoXform = true;
- SmallVector<SDNode*, 4> SetCCs;
+ SmallVector<SDNode *, 4> SetCCs;
if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
- TLI);
+ DoXform =
+ ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
- LN0->getBasePtr(), N0.getValueType(),
- LN0->getMemOperand());
+ LN0->getChain(), LN0->getBasePtr(),
+ N0.getValueType(), LN0->getMemOperand());
ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
@@ -10626,8 +11141,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
recursivelyDeleteUnusedNodes(LN0);
} else {
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad);
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
CombineTo(LN0, Trunc, ExtLoad.getValue(1));
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -10832,12 +11347,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
uint64_t ShiftAmt = N01->getZExtValue();
- uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
+ uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
else
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() - ShiftAmt);
+ VT.getScalarSizeInBits() - ShiftAmt);
} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.
auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
@@ -10864,12 +11379,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue SRL = N0;
if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
ShAmt = ConstShift->getZExtValue();
- unsigned EVTBits = ExtVT.getSizeInBits();
+ unsigned EVTBits = ExtVT.getScalarSizeInBits();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
// Is the load width a multiple of size of VT?
- if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
+ if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
return SDValue();
}
@@ -10899,7 +11414,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
ShiftMask.countTrailingOnes());
// If the mask is smaller, recompute the type.
- if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
+ if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
ExtVT = MaskedVT;
}
@@ -10930,8 +11445,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
- unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
- unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ unsigned LVTStoreBits =
+ LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
return LVTStoreBits - EVTStoreBits - ShAmt;
};
@@ -10941,13 +11457,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ShAmt = AdjustBigEndianShift(ShAmt);
uint64_t PtrOff = ShAmt / 8;
- unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- SDValue NewPtr =
- DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
+ TypeSize::Fixed(PtrOff), DL, Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -10969,13 +11485,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue Result = Load;
if (ShLeftAmt != 0) {
EVT ShImmTy = getShiftAmountTy(Result.getValueType());
- if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
ShImmTy = VT;
// If the shift amount is as large as the result size (but, presumably,
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
- if (ShLeftAmt >= VT.getSizeInBits())
+ if (ShLeftAmt >= VT.getScalarSizeInBits())
Result = DAG.getConstant(0, DL, VT);
else
Result = DAG.getNode(ISD::SHL, DL, VT,
@@ -11125,6 +11641,41 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+ // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
+ // ignore it if the masked load is already sign extended
+ if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
+ if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
+ Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
+ SDValue ExtMaskedLoad = DAG.getMaskedLoad(
+ VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
+ Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
+ Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
+ CombineTo(N, ExtMaskedLoad);
+ CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
+ if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
+ if (SDValue(GN0, 0).hasOneUse() &&
+ ExtVT == GN0->getMemoryVT() &&
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
+ GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
+
+ SDValue ExtLoad = DAG.getMaskedGather(
+ DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
+ GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
+
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ AddToWorklist(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -11225,10 +11776,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
EVT ExTy = N0.getValueType();
EVT TrTy = N->getValueType(0);
- unsigned NumElem = VecTy.getVectorNumElements();
+ auto EltCnt = VecTy.getVectorElementCount();
unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+ auto NewEltCnt = EltCnt * SizeRatio;
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
SDValue EltNo = N0->getOperand(1);
@@ -11342,8 +11894,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (LN0->isSimple() &&
- LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
+ if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
VT, LN0->getChain(), LN0->getBasePtr(),
LN0->getMemoryVT(),
@@ -11416,8 +11967,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
// Simplify the operands using demanded-bits information.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
@@ -11644,7 +12194,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
*LN0->getMemOperand())) {
SDValue Load =
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), LN0->getAlignment(),
+ LN0->getPointerInfo(), LN0->getAlign(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
@@ -11991,7 +12541,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
- SDNodeFlags Flags = N->getFlags();
bool CanFuse = Options.UnsafeFPMath || isContractable(N);
bool CanReassociate =
Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
@@ -12024,15 +12573,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1), N1, Flags);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
+ N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(0), N1.getOperand(1), N0, Flags);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
+ N1.getOperand(1), N0);
}
// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
@@ -12055,8 +12604,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue B = FMA.getOperand(1);
SDValue C = FMA.getOperand(2).getOperand(0);
SDValue D = FMA.getOperand(2).getOperand(1);
- SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags);
- return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags);
+ SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -12068,10 +12617,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)), N1, Flags);
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ N1);
}
}
@@ -12083,10 +12631,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)), N0, Flags);
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
+ N0);
}
}
@@ -12094,14 +12641,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (Aggressive) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
- auto FoldFAddFMAFPExtFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
- SDNodeFlags Flags) {
+ auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
+ SDValue Z) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z, Flags), Flags);
+ Z));
};
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
@@ -12112,7 +12658,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
- N1, Flags);
+ N1);
}
}
}
@@ -12122,16 +12668,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- auto FoldFAddFPExtFMAFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
- SDNodeFlags Flags) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z, Flags), Flags);
+ auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
+ SDValue Z) {
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -12142,7 +12686,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
- N1, Flags);
+ N1);
}
}
}
@@ -12158,7 +12702,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
- N0, Flags);
+ N0);
}
}
}
@@ -12177,7 +12721,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
- N0, Flags);
+ N0);
}
}
}
@@ -12235,8 +12779,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
- XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z),
- Flags);
+ XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
}
return SDValue();
};
@@ -12247,7 +12790,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
- YZ.getOperand(1), X, Flags);
+ YZ.getOperand(1), X);
}
return SDValue();
};
@@ -12278,7 +12821,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
- DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
}
// Look through FP_EXTEND nodes to do more combining.
@@ -12291,11 +12834,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
}
}
@@ -12307,13 +12848,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N10) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)),
- N0, Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
}
}
@@ -12330,13 +12869,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1, Flags));
+ return DAG.getNode(
+ ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
@@ -12354,13 +12892,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N000.getValueType())) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1, Flags));
+ return DAG.getNode(
+ ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
@@ -12372,13 +12909,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
N0.getOperand(2)->hasOneUse()) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
+ N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
// fold (fsub x, (fma y, z, (fmul u, v)))
@@ -12388,13 +12924,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N20),
- N21, N0, Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
}
@@ -12408,15 +12942,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
@@ -12434,18 +12966,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
@@ -12461,16 +12990,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1200)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1201),
- N0, Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
}
}
@@ -12491,18 +13017,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N100)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1020)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1021),
- N0, Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
}
}
}
@@ -12518,7 +13041,6 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
- const SDNodeFlags Flags = N->getFlags();
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
@@ -12550,56 +13072,56 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
// fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
- auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
+ auto FuseFADD = [&](SDValue X, SDValue Y) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
if (C->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
+ Y);
if (C->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
}
}
return SDValue();
};
- if (SDValue FMA = FuseFADD(N0, N1, Flags))
+ if (SDValue FMA = FuseFADD(N0, N1))
return FMA;
- if (SDValue FMA = FuseFADD(N1, N0, Flags))
+ if (SDValue FMA = FuseFADD(N1, N0))
return FMA;
// fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
// fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
// fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
// fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
- auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
+ auto FuseFSUB = [&](SDValue X, SDValue Y) {
if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
if (C0->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- Y, Flags);
+ Y);
if (C0->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
}
if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
if (C1->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
if (C1->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
+ Y);
}
}
return SDValue();
};
- if (SDValue FMA = FuseFSUB(N0, N1, Flags))
+ if (SDValue FMA = FuseFSUB(N0, N1))
return FMA;
- if (SDValue FMA = FuseFSUB(N1, N0, Flags))
+ if (SDValue FMA = FuseFSUB(N1, N0))
return FMA;
return SDValue();
@@ -12608,12 +13130,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- const SDNodeFlags Flags = N->getFlags();
+ SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -12625,11 +13148,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
@@ -12644,13 +13167,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
N1, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
// fold (fadd (fneg A), B) -> (fsub B, A)
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
N0, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -12662,14 +13185,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
if (isFMulNegTwo(N0)) {
SDValue B = N0.getOperand(0);
- SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
- return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
}
// fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
if (isFMulNegTwo(N1)) {
SDValue B = N1.getOperand(0);
- SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
- return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
}
// No FP constant should be created after legalization as Instruction
@@ -12695,9 +13218,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
if (N1CFP && N0.getOpcode() == ISD::FADD &&
- isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
- return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
}
// We can fold chains of FADD's of the same value into multiplications.
@@ -12705,14 +13228,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
- bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
+ bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
@@ -12720,20 +13243,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
- DAG.getConstantFP(2.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
}
}
if (N1.getOpcode() == ISD::FMUL) {
- bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
- bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
+ bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
@@ -12741,28 +13264,28 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
- DAG.getConstantFP(2.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
}
}
if (N0.getOpcode() == ISD::FADD) {
- bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N1, DAG.getConstantFP(3.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1,
+ DAG.getConstantFP(3.0, DL, VT));
}
}
if (N1.getOpcode() == ISD::FADD) {
- bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N0, DAG.getConstantFP(3.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(3.0, DL, VT));
}
}
@@ -12772,7 +13295,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
- DAG.getConstantFP(4.0, DL, VT), Flags);
+ DAG.getConstantFP(4.0, DL, VT));
}
}
} // enable-unsafe-fp-math
@@ -12785,6 +13308,33 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N0 = N->getOperand(1);
+ SDValue N1 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT ChainVT = N->getValueType(1);
+ SDLoc DL(N);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+ if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
+ N1, DAG, LegalOperations, ForCodeSize)) {
+ return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+ {Chain, N0, NegN1});
+ }
+
+ // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+ if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
+ N0, DAG, LegalOperations, ForCodeSize)) {
+ return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+ {Chain, N1, NegN0});
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12794,6 +13344,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -12805,7 +13356,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -12825,18 +13376,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// (fsub -0.0, N1) -> -N1
- // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
- // FSUB does not specify the sign bit of a NaN. Also note that for
- // the same reason, the inverse transform is not safe, unless fast math
- // flags are in play.
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
- return NegN1;
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
+ // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
+ // flushed to zero, unless all users treat denorms as zero (DAZ).
+ // FIXME: This transform will change the sign of a NaN and the behavior
+ // of a signaling NaN. It is only valid when a NoNaN flag is present.
+ DenormalMode DenormMode = DAG.getDenormalMode(VT);
+ if (DenormMode == DenormalMode::getIEEE()) {
+ if (SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+ return NegN1;
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1);
+ }
}
}
@@ -12845,16 +13399,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
if (N0 == N1->getOperand(0))
- return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
// X - (Y + X) -> -Y
if (N0 == N1->getOperand(1))
- return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (SDValue NegN1 =
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -12874,6 +13428,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -12887,35 +13442,28 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
// canonicalize constant to RHS
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
- (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
- // fold (fmul A, 0) -> 0
- if (N1CFP && N1CFP->isZero())
- return N1;
- }
-
if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
- if (isConstantFPBuildVectorOrConstantFP(N1) &&
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
N0.getOpcode() == ISD::FMUL) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
// Avoid an infinite loop by making sure that N00 is not a constant
// (the inner multiply has not been constant folded yet).
- if (isConstantFPBuildVectorOrConstantFP(N01) &&
- !isConstantFPBuildVectorOrConstantFP(N00)) {
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
}
}
@@ -12924,14 +13472,14 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
N0.getOperand(0) == N0.getOperand(1)) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
}
}
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
@@ -12950,7 +13498,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
@@ -13016,10 +13564,11 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
-
// FMA nodes have flags that propagate to the created nodes.
- const SDNodeFlags Flags = N->getFlags();
- bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ bool UnsafeFPMath =
+ Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
@@ -13040,7 +13589,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
@@ -13048,51 +13597,45 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP && N1CFP->isZero())
return N2;
}
- // TODO: The FMA node should have flags that propagate to these nodes.
+
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
if (UnsafeFPMath) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
- isConstantFPBuildVectorOrConstantFP(N1) &&
- isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
- Flags), Flags);
+ DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
}
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
if (N0.getOpcode() == ISD::FMUL &&
- isConstantFPBuildVectorOrConstantFP(N1) &&
- isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- return DAG.getNode(ISD::FMA, DL, VT,
- N0.getOperand(0),
- DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
- Flags),
+ DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
N2);
}
}
- // (fma x, 1, y) -> (fadd x, y)
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
- // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
AddToWorklist(RHSNeg.getNode());
- // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
@@ -13102,25 +13645,23 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
ForCodeSize)))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
+ DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
}
}
if (UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1,
- DAG.getConstantFP(1.0, DL, VT), Flags),
- Flags);
+ return DAG.getNode(
+ ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
}
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1,
- DAG.getConstantFP(-1.0, DL, VT), Flags),
- Flags);
+ return DAG.getNode(
+ ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
}
}
@@ -13129,7 +13670,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (!TLI.isFNegFree(VT))
if (SDValue Neg = TLI.getCheaperNegatedExpression(
SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags);
+ return DAG.getNode(ISD::FNEG, DL, VT, Neg);
return SDValue();
}
@@ -13150,14 +13691,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
return SDValue();
// Skip if current node is a reciprocal/fneg-reciprocal.
- SDValue N0 = N->getOperand(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
return SDValue();
// Exit early if the target does not want this transform or if there can't
// possibly be enough uses of the divisor to make the transform worthwhile.
- SDValue N1 = N->getOperand(1);
unsigned MinUses = TLI.combineRepeatedFPDivisors();
// For splat vectors, scale the number of uses by the splat factor. If we can
@@ -13175,6 +13715,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
SetVector<SDNode *> Users;
for (auto *U : N1->uses()) {
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
+ if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
+ U->getOperand(0) == U->getOperand(1).getOperand(0) &&
+ U->getFlags().hasAllowReassociation() &&
+ U->getFlags().hasNoSignedZeros())
+ continue;
+
// This division is eligible for optimization only if global unsafe math
// is enabled or if this division allows reciprocal formation.
if (UnsafeMath || U->getFlags().hasAllowReciprocal())
@@ -13216,6 +13763,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -13227,7 +13775,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -13252,29 +13800,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getConstantFP(Recip, DL, VT), Flags);
+ DAG.getConstantFP(Recip, DL, VT));
}
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
- Flags)) {
+ if (SDValue RV =
+ buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
- Flags)) {
+ if (SDValue RV =
+ buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
@@ -13289,29 +13837,34 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
if (Sqrt.getNode()) {
// If the other multiply operand is known positive, pull it into the
- // sqrt. That will eliminate the division if we convert to an estimate:
- // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
- // TODO: Also fold the case where A == Z (fabs is missing).
+ // sqrt. That will eliminate the division if we convert to an estimate.
if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
- N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() &&
- Y.getOpcode() == ISD::FABS && Y.hasOneUse()) {
- SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0),
- Y.getOperand(0), Flags);
- SDValue AAZ =
- DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags);
- if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
- return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags);
-
- // Estimate creation failed. Clean up speculatively created nodes.
- recursivelyDeleteUnusedNodes(AAZ.getNode());
+ N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
+ SDValue A;
+ if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
+ A = Y.getOperand(0);
+ else if (Y == Sqrt.getOperand(0))
+ A = Y;
+ if (A) {
+ // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
+ // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
+ SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
+ SDValue AAZ =
+ DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
+
+ // Estimate creation failed. Clean up speculatively created nodes.
+ recursivelyDeleteUnusedNodes(AAZ.getNode());
+ }
}
// We found a FSQRT, so try to make this fold:
// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
- SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags);
+ SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
AddToWorklist(Div.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
}
}
}
@@ -13322,6 +13875,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
return RV;
}
+ // Fold X/Sqrt(X) -> Sqrt(X)
+ if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
+ (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
+ if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
+ return N1;
+
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
TargetLowering::NegatibleCost CostN0 =
TargetLowering::NegatibleCost::Expensive;
@@ -13334,7 +13893,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
return SDValue();
}
@@ -13346,13 +13905,14 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -13366,7 +13926,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
// Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
// sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
- if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+ if (!Flags.hasApproximateFuncs() ||
(!Options.NoInfsFPMath && !Flags.hasNoInfs()))
return SDValue();
@@ -13375,6 +13935,10 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
return SDValue();
// FSQRT nodes have flags that propagate to the created nodes.
+ // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
+ // transform the fdiv, we may produce a sub-optimal estimate sequence
+ // because the reciprocal calculation may not have to filter out a
+ // 0.0 input.
return buildSqrtEstimate(N0, Flags);
}
@@ -13398,8 +13962,8 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
if (N0CFP && N1CFP) // Constant fold
@@ -13446,6 +14010,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
if (!ExponentC)
return SDValue();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// Try to convert x ** (1/3) into cube root.
// TODO: Handle the various flavors of long double.
@@ -13472,7 +14037,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
return SDValue();
- return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+ return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
}
// Try to convert x ** (1/4) and x ** (3/4) into square roots.
@@ -13507,12 +14072,12 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
// pow(X, 0.25) --> sqrt(sqrt(X))
SDLoc DL(N);
- SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
- SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
+ SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
if (ExponentIs025)
return SqrtSqrt;
// pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
- return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
}
return SDValue();
@@ -13695,7 +14260,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
return DAG.getUNDEF(VT);
// fold (fp_to_sint c1fp) -> c1
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
return FoldIntToFPToInt(N, DAG);
@@ -13710,7 +14275,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
return DAG.getUNDEF(VT);
// fold (fp_to_uint c1fp) -> c1
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
return FoldIntToFPToInt(N, DAG);
@@ -13782,7 +14347,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue();
// fold (fp_extend c1fp) -> c1fp
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
// fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
@@ -13830,7 +14395,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (fceil c1) -> fceil(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
return SDValue();
@@ -13841,7 +14406,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ftrunc c1) -> ftrunc(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
// fold ftrunc (known rounded int x) -> x
@@ -13865,19 +14430,19 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ffloor c1) -> ffloor(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
return SDValue();
}
-// FIXME: FNEG and FABS have a lot in common; refactor.
SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// Constant fold FNEG.
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (SDValue NegN0 =
@@ -13892,51 +14457,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
(DAG.getTarget().Options.NoSignedZerosFPMath ||
N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
- N0.getOperand(0), N->getFlags());
- }
-
- // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
- // constant pool values.
- if (!TLI.isFNegFree(VT) &&
- N0.getOpcode() == ISD::BITCAST &&
- N0.getNode()->hasOneUse()) {
- SDValue Int = N0.getOperand(0);
- EVT IntVT = Int.getValueType();
- if (IntVT.isInteger() && !IntVT.isVector()) {
- APInt SignMask;
- if (N0.getValueType().isVector()) {
- // For a vector, get a mask such as 0x80... per scalar element
- // and splat it.
- SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
- SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
- } else {
- // For a scalar, just generate 0x80...
- SignMask = APInt::getSignMask(IntVT.getSizeInBits());
- }
- SDLoc DL0(N0);
- Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
- DAG.getConstant(SignMask, DL0, IntVT));
- AddToWorklist(Int.getNode());
- return DAG.getBitcast(VT, Int);
- }
- }
-
- // (fneg (fmul c, x)) -> (fmul -c, x)
- if (N0.getOpcode() == ISD::FMUL &&
- (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
- ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
- if (CFP1) {
- APFloat CVal = CFP1->getValueAPF();
- CVal.changeSign();
- if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
- TLI.isOperationLegal(ISD::ConstantFP, VT)))
- return DAG.getNode(
- ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
- N0->getFlags());
- }
+ N0.getOperand(0));
}
+ if (SDValue Cast = foldSignChangeInBitcast(N))
+ return Cast;
+
return SDValue();
}
@@ -13947,6 +14473,11 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
EVT VT = N->getValueType(0);
const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opc = N->getOpcode();
+ bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
+ bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
@@ -13955,10 +14486,39 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
}
// Canonicalize to constant on RHS.
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ if (N1CFP) {
+ const APFloat &AF = N1CFP->getValueAPF();
+
+ // minnum(X, nan) -> X
+ // maxnum(X, nan) -> X
+ // minimum(X, nan) -> nan
+ // maximum(X, nan) -> nan
+ if (AF.isNaN())
+ return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
+
+ // In the following folds, inf can be replaced with the largest finite
+ // float, if the ninf flag is set.
+ if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
+ // minnum(X, -inf) -> -inf
+ // maxnum(X, +inf) -> +inf
+ // minimum(X, -inf) -> -inf if nnan
+ // maximum(X, +inf) -> +inf if nnan
+ if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
+ return N->getOperand(1);
+
+ // minnum(X, +inf) -> X if nnan
+ // maxnum(X, -inf) -> X if nnan
+ // minimum(X, +inf) -> X
+ // maximum(X, -inf) -> X
+ if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
+ return N->getOperand(0);
+ }
+ }
+
return SDValue();
}
@@ -13983,7 +14543,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (fabs c1) -> fabs(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// fold (fabs (fabs x)) -> (fabs x)
@@ -13995,28 +14555,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
- // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
- if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
- SDValue Int = N0.getOperand(0);
- EVT IntVT = Int.getValueType();
- if (IntVT.isInteger() && !IntVT.isVector()) {
- APInt SignMask;
- if (N0.getValueType().isVector()) {
- // For a vector, get a mask such as 0x7f... per scalar element
- // and splat it.
- SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
- SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
- } else {
- // For a scalar, just generate 0x7f...
- SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
- }
- SDLoc DL(N0);
- Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
- DAG.getConstant(SignMask, DL, IntVT));
- AddToWorklist(Int.getNode());
- return DAG.getBitcast(N->getValueType(0), Int);
- }
- }
+ if (SDValue Cast = foldSignChangeInBitcast(N))
+ return Cast;
return SDValue();
}
@@ -14026,6 +14566,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
+ // nondeterministic jumps).
+ if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
+ N1->getOperand(0), N2);
+ }
+
// If N is a constant we could fold this into a fallthrough or unconditional
// branch. However that doesn't happen very often in normal code, because
// Instcombine/SimplifyCFG should have handled the available opportunities.
@@ -14179,63 +14726,6 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
return SDValue();
}
-/// Return true if 'Use' is a load or a store that uses N as its base pointer
-/// and that N may be folded in the load / store addressing mode.
-static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
- SelectionDAG &DAG,
- const TargetLowering &TLI) {
- EVT VT;
- unsigned AS;
-
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
- if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
- return false;
- VT = LD->getMemoryVT();
- AS = LD->getAddressSpace();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
- if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
- return false;
- VT = ST->getMemoryVT();
- AS = ST->getAddressSpace();
- } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
- if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
- return false;
- VT = LD->getMemoryVT();
- AS = LD->getAddressSpace();
- } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
- if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
- return false;
- VT = ST->getMemoryVT();
- AS = ST->getAddressSpace();
- } else
- return false;
-
- TargetLowering::AddrMode AM;
- if (N->getOpcode() == ISD::ADD) {
- AM.HasBaseReg = true;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (Offset)
- // [reg +/- imm]
- AM.BaseOffs = Offset->getSExtValue();
- else
- // [reg +/- reg]
- AM.Scale = 1;
- } else if (N->getOpcode() == ISD::SUB) {
- AM.HasBaseReg = true;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (Offset)
- // [reg +/- imm]
- AM.BaseOffs = -Offset->getSExtValue();
- else
- // [reg +/- reg]
- AM.Scale = 1;
- } else
- return false;
-
- return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
- VT.getTypeForEVT(*DAG.getContext()), AS);
-}
-
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
bool &IsLoad, bool &IsMasked, SDValue &Ptr,
const TargetLowering &TLI) {
@@ -14464,16 +14954,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Therefore, we have:
// t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
- ConstantSDNode *CN =
- cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
- int X0, X1, Y0, Y1;
+ auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
const APInt &Offset0 = CN->getAPIntValue();
- APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
-
- X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
- Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
- X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
- Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
+ const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
+ int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+ int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+ int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+ int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
@@ -14665,8 +15152,8 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
}
-static inline int numVectorEltsOrZero(EVT T) {
- return T.isVector() ? T.getVectorNumElements() : 0;
+static inline ElementCount numVectorEltsOrZero(EVT T) {
+ return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
}
bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
@@ -14734,6 +15221,24 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
EVT STMemType = ST->getMemoryVT();
EVT STType = ST->getValue().getValueType();
+ // There are two cases to consider here:
+ // 1. The store is fixed width and the load is scalable. In this case we
+ // don't know at compile time if the store completely envelops the load
+ // so we abandon the optimisation.
+ // 2. The store is scalable and the load is fixed width. We could
+ // potentially support a limited number of cases here, but there has been
+ // no cost-benefit analysis to prove it's worth it.
+ bool LdStScalable = LDMemType.isScalableVector();
+ if (LdStScalable != STMemType.isScalableVector())
+ return SDValue();
+
+ // If we are dealing with scalable vectors on a big endian platform the
+ // calculation of offsets below becomes trickier, since we do not know at
+ // compile time the absolute size of the vector. Until we've done more
+ // analysis on big-endian platforms it seems better to bail out for now.
+ if (LdStScalable && DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
int64_t Offset;
@@ -14745,13 +15250,21 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
if (DAG.getDataLayout().isBigEndian())
- Offset = ((int64_t)STMemType.getStoreSizeInBits() -
- (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
+ Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
+ (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
+ 8 -
+ Offset;
// Check that the stored value cover all bits that are loaded.
- bool STCoversLD =
- (Offset >= 0) &&
- (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
+ bool STCoversLD;
+
+ TypeSize LdMemSize = LDMemType.getSizeInBits();
+ TypeSize StMemSize = STMemType.getSizeInBits();
+ if (LdStScalable)
+ STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
+ else
+ STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
+ StMemSize.getFixedSize());
auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
if (LD->isIndexed()) {
@@ -14772,15 +15285,15 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// Memory as copy space (potentially masked).
if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
// Simple case: Direct non-truncating forwarding
- if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
+ if (LDType.getSizeInBits() == LdMemSize)
return ReplaceLd(LD, ST->getValue(), Chain);
// Can we model the truncate and extension with an and mask?
if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
!LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
// Mask to size of LDMemType
auto Mask =
- DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
- STMemType.getSizeInBits()),
+ DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
+ StMemSize.getFixedSize()),
SDLoc(ST), STType);
auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
return ReplaceLd(LD, Val, Chain);
@@ -15603,8 +16116,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// Figure out the offset for the store and the alignment of the access.
unsigned StOffset;
- unsigned NewAlign = St->getAlignment();
-
if (DAG.getDataLayout().isLittleEndian())
StOffset = ByteShift;
else
@@ -15613,8 +16124,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue Ptr = St->getBasePtr();
if (StOffset) {
SDLoc DL(IVal);
- Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
- NewAlign = MinAlign(NewAlign, StOffset);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
}
// Truncate down to the new size.
@@ -15623,7 +16133,8 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
++OpsNarrowed;
return DAG
.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
- St->getPointerInfo().getWithOffset(StOffset), NewAlign);
+ St->getPointerInfo().getWithOffset(StOffset),
+ St->getOriginalAlign());
}
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
@@ -15727,7 +16238,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
return SDValue();
- SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
+ SDValue NewPtr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
SDValue NewLD =
DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
@@ -16035,9 +16547,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
if (!UseTrunc) {
- NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- FirstInChain->getAlignment());
+ NewStore =
+ DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstInChain->getAlign());
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -16049,8 +16561,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
+ FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
}
// Replace all merged stores with the new store.
@@ -16065,23 +16576,19 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
SDNode *&RootNode) {
// This holds the base pointer, index, and the offset in bytes from the base
- // pointer.
+ // pointer. We must have a base and an offset. Do not handle stores to undef
+ // base pointers.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
- EVT MemVT = St->getMemoryVT();
-
- SDValue Val = peekThroughBitcasts(St->getValue());
- // We must have a base and an offset.
- if (!BasePtr.getBase().getNode())
- return;
-
- // Do not handle stores to undef base pointers.
- if (BasePtr.getBase().isUndef())
+ if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
return;
+ SDValue Val = peekThroughBitcasts(St->getValue());
StoreSource StoreSrc = getStoreSource(Val);
assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
- BaseIndexOffset LBasePtr;
+
// Match on loadbaseptr if relevant.
+ EVT MemVT = St->getMemoryVT();
+ BaseIndexOffset LBasePtr;
EVT LoadVT;
if (StoreSrc == StoreSource::Load) {
auto *Ld = cast<LoadSDNode>(Val);
@@ -16102,7 +16609,7 @@ void DAGCombiner::getStoreMergeCandidates(
int64_t &Offset) -> bool {
// The memory operands must not be volatile/indexed/atomic.
// TODO: May be able to relax for unordered atomics (see D66309)
- if (!Other->isSimple() || Other->isIndexed())
+ if (!Other->isSimple() || Other->isIndexed())
return false;
// Don't mix temporal stores with non-temporal stores.
if (St->isNonTemporal() != Other->isNonTemporal())
@@ -16111,37 +16618,38 @@ void DAGCombiner::getStoreMergeCandidates(
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
- if (StoreSrc == StoreSource::Load) {
+ switch (StoreSrc) {
+ case StoreSource::Load: {
if (NoTypeMatch)
return false;
- // The Load's Base Ptr must also match
- if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
- BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
- if (LoadVT != OtherLd->getMemoryVT())
- return false;
- // Loads must only have one use.
- if (!OtherLd->hasNUsesOfValue(1, 0))
- return false;
- // The memory operands must not be volatile/indexed/atomic.
- // TODO: May be able to relax for unordered atomics (see D66309)
- if (!OtherLd->isSimple() ||
- OtherLd->isIndexed())
- return false;
- // Don't mix temporal loads with non-temporal loads.
- if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
- return false;
- if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
- return false;
- } else
+ // The Load's Base Ptr must also match.
+ auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
+ if (!OtherLd)
+ return false;
+ BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
+ if (LoadVT != OtherLd->getMemoryVT())
+ return false;
+ // Loads must only have one use.
+ if (!OtherLd->hasNUsesOfValue(1, 0))
return false;
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!OtherLd->isSimple() || OtherLd->isIndexed())
+ return false;
+ // Don't mix temporal loads with non-temporal loads.
+ if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
+ return false;
+ if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
+ return false;
+ break;
}
- if (StoreSrc == StoreSource::Constant) {
+ case StoreSource::Constant:
if (NoTypeMatch)
return false;
if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
return false;
- }
- if (StoreSrc == StoreSource::Extract) {
+ break;
+ case StoreSource::Extract:
// Do not merge truncated stores here.
if (Other->isTruncatingStore())
return false;
@@ -16150,6 +16658,9 @@ void DAGCombiner::getStoreMergeCandidates(
if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
+ break;
+ default:
+ llvm_unreachable("Unhandled store source for merging");
}
Ptr = BaseIndexOffset::match(Other, DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
@@ -16160,11 +16671,22 @@ void DAGCombiner::getStoreMergeCandidates(
auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
SDNode *RootNode) -> bool {
auto RootCount = StoreRootCountMap.find(StoreNode);
- if (RootCount != StoreRootCountMap.end() &&
- RootCount->second.first == RootNode &&
- RootCount->second.second > StoreMergeDependenceLimit)
- return true;
- return false;
+ return RootCount != StoreRootCountMap.end() &&
+ RootCount->second.first == RootNode &&
+ RootCount->second.second > StoreMergeDependenceLimit;
+ };
+
+ auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
+ // This must be a chain use.
+ if (UseIter.getOperandNo() != 0)
+ return;
+ if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
+ BaseIndexOffset Ptr;
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherStore, RootNode))
+ StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
+ }
};
// We looking for a root node which is an ancestor to all mergable
@@ -16186,31 +16708,21 @@ void DAGCombiner::getStoreMergeCandidates(
RootNode = St->getChain().getNode();
unsigned NumNodesExplored = 0;
- if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
+ const unsigned MaxSearchNodes = 1024;
+ if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
RootNode = Ldn->getChain().getNode();
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
- I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
- if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
+ I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
+ if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
- if (I2.getOperandNo() == 0)
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
- BaseIndexOffset Ptr;
- int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
- !OverLimitInDependenceCheck(OtherST, RootNode))
- StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
- }
- } else
+ TryToAddCandidate(I2);
+ }
+ }
+ } else {
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
- I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
- if (I.getOperandNo() == 0)
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
- BaseIndexOffset Ptr;
- int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
- !OverLimitInDependenceCheck(OtherST, RootNode))
- StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
- }
+ I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
+ TryToAddCandidate(I);
+ }
}
// We need to check that merging these stores does not cause a loop in
@@ -16580,7 +17092,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
}
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ Align FirstStoreAlign = FirstInChain->getAlign();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
// Scan the memory operations on the chain and find the first
@@ -16675,7 +17187,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
// the NumElem refers to array/index size.
unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
NumElem = std::min(LastLegalType, NumElem);
- unsigned FirstLoadAlign = FirstLoad->getAlignment();
+ Align FirstLoadAlign = FirstLoad->getAlign();
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
@@ -16687,8 +17199,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
// can here.
unsigned NumSkip = 1;
while ((NumSkip < LoadNodes.size()) &&
- (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
@@ -16761,11 +17273,10 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), JointMemOpVT,
FirstLoadAlign, LdMMOFlags);
- NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), JointMemOpVT,
- FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
+ NewStore = DAG.getTruncStore(
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), JointMemOpVT,
+ FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
}
// Transfer chain users from old loads to the new load.
@@ -16967,17 +17478,15 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
- ST->getAlignment(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
- Alignment = MinAlign(Alignment, 4U);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
- Alignment, MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
St0, St1);
}
@@ -17038,7 +17547,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return NewST;
// Try transforming several stores into STORE (BSWAP).
- if (SDValue Store = MatchStoreCombine(ST))
+ if (SDValue Store = mergeTruncStores(ST))
return Store;
if (ST->isUnindexed()) {
@@ -17111,11 +17620,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
!ST1->getBasePtr().isUndef() &&
// BaseIndexOffset and the code below requires knowing the size
// of a vector, so bail out if MemoryVT is scalable.
+ !ST->getMemoryVT().isScalableVector() &&
!ST1->getMemoryVT().isScalableVector()) {
const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
- unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
- unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
+ unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
+ unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
// If this is a store who's preceding store to a subset of the current
// location and no one other node is chained to that store we can
// effectively drop the store. Do not remove stores to undef as they may
@@ -17186,8 +17696,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
// We walk up the chains to find stores.
SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
while (!Chains.empty()) {
- SDValue Chain = Chains.back();
- Chains.pop_back();
+ SDValue Chain = Chains.pop_back_val();
if (!Chain.hasOneUse())
continue;
switch (Chain.getOpcode()) {
@@ -17207,11 +17716,16 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
// TODO: Can relax for unordered atomics (see D66309)
if (!ST->isSimple() || ST->isIndexed())
continue;
+ const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
+ // The bounds of a scalable store are not known until runtime, so this
+ // store cannot be elided.
+ if (StoreSize.isScalable())
+ continue;
const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
// If we store purely within object bounds just before its lifetime ends,
// we can remove the store.
if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
- ST->getMemoryVT().getStoreSizeInBits())) {
+ StoreSize.getFixedSize() * 8)) {
LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
dbgs() << "\nwithin LIFETIME_END of : ";
LifetimeEndBase.dump(); dbgs() << "\n");
@@ -17310,7 +17824,6 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
return SDValue();
// Start to split store.
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
@@ -17323,13 +17836,12 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
SDValue Ptr = ST->getBasePtr();
// Lower value store.
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
- ST->getAlignment(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
// Higher value store.
- SDValue St1 =
- DAG.getStore(St0, DL, Hi, Ptr,
- ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
- Alignment / 2, MMOFlags, AAInfo);
+ SDValue St1 = DAG.getStore(
+ St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
return St1;
}
@@ -17567,6 +18079,13 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
+
+ // If the vector element type is not a multiple of a byte then we are unable
+ // to correctly compute an address to load only the extracted element as a
+ // scalar.
+ if (!VecEltVT.isByteSized())
+ return SDValue();
+
Align Alignment = OriginalLoad->getAlign();
Align NewAlign = DAG.getDataLayout().getABITypeAlign(
VecEltVT.getTypeForEVT(*DAG.getContext()));
@@ -18202,20 +18721,24 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// operands will all be based off of VecIn1, even those in VecIn2.
unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ uint64_t InVT1Size = InVT1.getFixedSizeInBits();
+ uint64_t InVT2Size = InVT2.getFixedSizeInBits();
+
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
if (InVT1 != VT || InVT2 != VT) {
- if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
+ if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
// If the output vector length is a multiple of both input lengths,
// we can concatenate them and pad the rest with undefs.
- unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
+ unsigned NumConcats = VTSize / InVT1Size;
assert(NumConcats >= 2 && "Concat needs at least two inputs!");
SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
ConcatOps[0] = VecIn1;
ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
VecIn2 = SDValue();
- } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
+ } else if (InVT1Size == VTSize * 2) {
if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
return SDValue();
@@ -18228,7 +18751,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// Since we now have shorter input vectors, adjust the offset of the
// second vector's start.
Vec2Offset = NumElems;
- } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
+ } else if (InVT2Size <= InVT1Size) {
// VecIn1 is wider than the output, and we have another, possibly
// smaller input. Pad the smaller input with undefs, shuffle at the
// input vector width, and extract the output.
@@ -18253,8 +18776,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// when we start sorting the vectors by type.
return SDValue();
}
- } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
- InVT1.getSizeInBits() == VT.getSizeInBits()) {
+ } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
ConcatOps[0] = VecIn2;
VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
@@ -18445,8 +18967,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// Have we seen this input vector before?
// The vectors are expected to be tiny (usually 1 or 2 elements), so using
// a map back from SDValues to numbers isn't worth it.
- unsigned Idx = std::distance(
- VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
+ unsigned Idx = std::distance(VecIn.begin(), find(VecIn, ExtractedFromVec));
if (Idx == VecIn.size())
VecIn.push_back(ExtractedFromVec);
@@ -18904,7 +19425,7 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
// check the other type in the cast to make sure this is really legal.
EVT VT = N->getValueType(0);
EVT SrcEltVT = SrcVT.getVectorElementType();
- unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
+ ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (CastOpcode) {
@@ -18941,9 +19462,8 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return DAG.getUNDEF(VT);
// Optimize concat_vectors where all but the first of the vectors are undef.
- if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
- return Op.isUndef();
- })) {
+ if (all_of(drop_begin(N->ops()),
+ [](const SDValue &Op) { return Op.isUndef(); })) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
@@ -19116,15 +19636,16 @@ static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
V.getOperand(0).getValueType() == SubVT &&
- (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
- uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
+ (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
return V.getOperand(SubIdx);
}
return SDValue();
}
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ bool LegalOperations) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = Extract->getOperand(0);
unsigned BinOpcode = BinOp.getOpcode();
@@ -19138,7 +19659,7 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
SDValue Index = Extract->getOperand(1);
EVT SubVT = Extract->getValueType(0);
- if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
+ if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
return SDValue();
SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
@@ -19159,11 +19680,12 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
/// If we are extracting a subvector produced by a wide binary operator try
/// to use a narrow binary operator and/or avoid concatenation and extraction.
-static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
+static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
+ bool LegalOperations) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
- if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
+ if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
return V;
// The extract index must be a constant, so we can map it to a concat operand.
@@ -19308,19 +19830,15 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
unsigned Index = ExtIdx->getZExtValue();
- unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorMinNumElements();
- // If the index is a multiple of the extract element count, we can offset the
- // address by the store size multiplied by the subvector index. Otherwise if
- // the scalar type is byte sized, we can just use the index multiplied by
- // the element size in bytes as the offset.
- unsigned Offset;
- if (Index % NumElts == 0)
- Offset = (Index / NumElts) * VT.getStoreSize();
- else if (VT.getScalarType().isByteSized())
- Offset = Index * VT.getScalarType().getStoreSize();
- else
- return SDValue();
+ // The definition of EXTRACT_SUBVECTOR states that the index must be a
+ // multiple of the minimum number of elements in the result type.
+ assert(Index % NumElts == 0 && "The extract subvector index is not a "
+ "multiple of the result's element count");
+
+ // It's fine to use TypeSize here as we know the offset will not be negative.
+ TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
@@ -19329,13 +19847,21 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
SDLoc DL(Extract);
- SDValue BaseAddr = Ld->getBasePtr();
// TODO: Use "BaseIndexOffset" to make this more effective.
- SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+ SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
+
+ uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
- VT.getStoreSize());
+ MachineMemOperand *MMO;
+ if (Offset.isScalable()) {
+ MachinePointerInfo MPI =
+ MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
+ } else
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
+ StoreSize);
+
SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
return NewLd;
@@ -19388,8 +19914,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
if ((DestNumElts % SrcNumElts) == 0) {
unsigned DestSrcRatio = DestNumElts / SrcNumElts;
- if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
- ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
+ if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
+ ElementCount NewExtEC =
+ NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
EVT ScalarVT = SrcVT.getScalarType();
if ((ExtIdx % DestSrcRatio) == 0) {
SDLoc DL(N);
@@ -19403,7 +19930,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
}
- if (NewExtEC == 1 &&
+ if (NewExtEC.isScalar() &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
SDValue NewExtract =
@@ -19508,7 +20035,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
N->getOperand(1));
}
- if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
+ if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
return NarrowBOp;
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
@@ -20286,52 +20813,52 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
- // Canonicalize shuffles according to rules:
- // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
- // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
- // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
- if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
- N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- TLI.isTypeLegal(VT)) {
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(N1->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
-
- SDValue SV0 = N1->getOperand(0);
- SDValue SV1 = N1->getOperand(1);
- bool HasSameOp0 = N0 == SV0;
- bool IsSV1Undef = SV1.isUndef();
- if (HasSameOp0 || IsSV1Undef || N0 == SV1)
- // Commute the operands of this shuffle so that next rule
- // will trigger.
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ // Canonicalize shuffles according to rules:
+ // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+ // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+ // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = N1->getOperand(0);
+ SDValue SV1 = N1->getOperand(1);
+ bool HasSameOp0 = N0 == SV0;
+ bool IsSV1Undef = SV1.isUndef();
+ if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+ // Commute the operands of this shuffle so merging below will trigger.
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Canonicalize splat shuffles to the RHS to improve merging below.
+ // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(N0)->isSplat() &&
+ !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
return DAG.getCommutedVectorShuffle(*SVN);
+ }
}
- // Try to fold according to rules:
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- // Don't try to fold shuffles with illegal type.
- // Only fold if this shuffle is the only user of the other shuffle.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
- Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
- ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
-
+ // Compute the combined shuffle mask for a shuffle with SV0 as the first
+ // operand, and SV1 as the second operand.
+ // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask).
+ auto MergeInnerShuffle = [NumElts](ShuffleVectorSDNode *SVN,
+ ShuffleVectorSDNode *OtherSVN, SDValue N1,
+ SDValue &SV0, SDValue &SV1,
+ SmallVectorImpl<int> &Mask) -> bool {
// Don't try to fold splats; they're likely to simplify somehow, or they
// might be free.
- if (OtherSV->isSplat())
- return SDValue();
+ if (OtherSVN->isSplat())
+ return false;
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(OtherSV->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
+ SV0 = SV1 = SDValue();
+ Mask.clear();
- SDValue SV0, SV1;
- SmallVector<int, 4> Mask;
- // Compute the combined shuffle mask for a shuffle with SV0 as the first
- // operand, and SV1 as the second operand.
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx < 0) {
@@ -20344,15 +20871,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (Idx < (int)NumElts) {
// This shuffle index refers to the inner shuffle N0. Lookup the inner
// shuffle mask to identify which vector is actually referenced.
- Idx = OtherSV->getMaskElt(Idx);
+ Idx = OtherSVN->getMaskElt(Idx);
if (Idx < 0) {
// Propagate Undef.
Mask.push_back(Idx);
continue;
}
-
- CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
- : OtherSV->getOperand(1);
+ CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
+ : OtherSVN->getOperand(1);
} else {
// This shuffle index references an element within N1.
CurrentVec = N1;
@@ -20374,38 +20900,82 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Mask.push_back(Idx);
continue;
}
+ if (!SV1.getNode() || SV1 == CurrentVec) {
+ // Ok. CurrentVec is the right hand side.
+ // Update the mask accordingly.
+ SV1 = CurrentVec;
+ Mask.push_back(Idx + NumElts);
+ continue;
+ }
- // Bail out if we cannot convert the shuffle pair into a single shuffle.
- if (SV1.getNode() && SV1 != CurrentVec)
- return SDValue();
+ // Last chance - see if the vector is another shuffle and if it
+ // uses one of the existing candidate shuffle ops.
+ if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
+ int InnerIdx = CurrentSVN->getMaskElt(Idx);
+ if (InnerIdx < 0) {
+ Mask.push_back(-1);
+ continue;
+ }
+ SDValue InnerVec = (InnerIdx < (int)NumElts)
+ ? CurrentSVN->getOperand(0)
+ : CurrentSVN->getOperand(1);
+ if (InnerVec.isUndef()) {
+ Mask.push_back(-1);
+ continue;
+ }
+ InnerIdx %= NumElts;
+ if (InnerVec == SV0) {
+ Mask.push_back(InnerIdx);
+ continue;
+ }
+ if (InnerVec == SV1) {
+ Mask.push_back(InnerIdx + NumElts);
+ continue;
+ }
+ }
- // Ok. CurrentVec is the right hand side.
- // Update the mask accordingly.
- SV1 = CurrentVec;
- Mask.push_back(Idx + NumElts);
+ // Bail out if we cannot convert the shuffle pair into a single shuffle.
+ return false;
}
+ return true;
+ };
- // Check if all indices in Mask are Undef. In case, propagate Undef.
- bool isUndefMask = true;
- for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
- isUndefMask &= Mask[i] < 0;
+ // Try to fold according to rules:
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // Don't try to fold shuffles with illegal type.
+ // Only fold if this shuffle is the only user of the other shuffle.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+ Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
- if (isUndefMask)
- return DAG.getUNDEF(VT);
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0, SV1;
+ SmallVector<int, 4> Mask;
+ if (MergeInnerShuffle(SVN, OtherSV, N1, SV0, SV1, Mask)) {
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ if (llvm::all_of(Mask, [](int M) { return M < 0; }))
+ return DAG.getUNDEF(VT);
- if (!SV0.getNode())
- SV0 = DAG.getUNDEF(VT);
- if (!SV1.getNode())
- SV1 = DAG.getUNDEF(VT);
+ if (!SV0.getNode())
+ SV0 = DAG.getUNDEF(VT);
+ if (!SV1.getNode())
+ SV1 = DAG.getUNDEF(VT);
- // Avoid introducing shuffles with illegal mask.
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
- return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
+ // Avoid introducing shuffles with illegal mask.
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
+ }
}
if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
@@ -20490,8 +21060,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOperand(0).getOperand(1) == N2 &&
- N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
- VT.getVectorNumElements() &&
+ N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
+ VT.getVectorElementCount() &&
N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
VT.getSizeInBits()) {
return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
@@ -20508,7 +21078,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
EVT CN1VT = CN1.getValueType();
if (CN0VT.isVector() && CN1VT.isVector() &&
CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
- CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
+ CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
CN0.getValueType(), CN0, CN1, N2);
return DAG.getBitcast(VT, NewINSERT);
@@ -20547,7 +21117,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDLoc DL(N);
SDValue NewIdx;
LLVMContext &Ctx = *DAG.getContext();
- unsigned NumElts = VT.getVectorNumElements();
+ ElementCount NumElts = VT.getVectorElementCount();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
@@ -20555,8 +21125,9 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
} else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
- if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
- NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
+ if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
+ NumElts.divideCoefficientBy(Scale));
NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
}
}
@@ -20588,8 +21159,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
// If the input vector is a concatenation, and the insert replaces
// one of the pieces, we can optimize into a single concat_vectors.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
- N0.getOperand(0).getValueType() == N1.getValueType()) {
- unsigned Factor = N1.getValueType().getVectorNumElements();
+ N0.getOperand(0).getValueType() == N1.getValueType() &&
+ N0.getOperand(0).getValueType().isScalableVector() ==
+ N1.getValueType().isScalableVector()) {
+ unsigned Factor = N1.getValueType().getVectorMinNumElements();
SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
Ops[InsIdx / Factor] = N1;
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
@@ -20616,7 +21189,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
// fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
- if (N0->getOpcode() == ISD::AND) {
+ if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
if (AndConst && AndConst->getAPIntValue() == 0xffff) {
return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
@@ -20633,7 +21206,7 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
unsigned Opcode = N->getOpcode();
// VECREDUCE over 1-element vector is just an extract.
- if (VT.getVectorNumElements() == 1) {
+ if (VT.getVectorElementCount().isScalar()) {
SDLoc dl(N);
SDValue Res =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
@@ -20872,7 +21445,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue Z = LHS.getOperand(2);
EVT NarrowVT = X.getValueType();
if (NarrowVT == Y.getValueType() &&
- TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
+ LegalOperations)) {
// (binop undef, undef) may not return undef, so compute that result.
SDLoc DL(N);
SDValue VecC =
@@ -20885,11 +21459,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
// Make sure all but the first op are undef or constant.
auto ConcatWithConstantOrUndef = [](SDValue Concat) {
return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
- std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
- [](const SDValue &Op) {
- return Op.isUndef() ||
- ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
- });
+ all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
+ return Op.isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
+ });
};
// The following pattern is likely to emerge with vector reduction ops. Moving
@@ -21111,7 +21684,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// It is safe to replace the two loads if they have different alignments,
// but the new load must be the minimum (most restrictive) alignment of the
// inputs.
- unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
+ Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
if (!RLD->isInvariant())
MMOFlags &= ~MachineMemOperand::MOInvariant;
@@ -21217,6 +21790,46 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
+// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
+SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool IsFabs = N->getOpcode() == ISD::FABS;
+ bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
+
+ if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+
+ // The operand to cast should be integer.
+ if (!IntVT.isInteger() || IntVT.isVector())
+ return SDValue();
+
+ // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
+ // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For vector, create a sign mask (0x80...) or its inverse (for fabs,
+ // 0x7f...) per element and splat it.
+ SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
+ if (IsFabs)
+ SignMask = ~SignMask;
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
+ SignMask = APInt::getSignMask(IntVT.getSizeInBits());
+ if (IsFabs)
+ SignMask = ~SignMask;
+ }
+ SDLoc DL(N0);
+ Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
+ DAG.getConstant(SignMask, DL, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getBitcast(VT, Int);
+}
+
/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
/// in it. This may be a win when the constant is not otherwise available
@@ -21498,9 +22111,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
EVT VT = V.getValueType();
- unsigned EltBits = VT.getScalarSizeInBits();
SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
- SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
+ SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
return LogBase2;
}
@@ -21678,37 +22290,21 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
Reciprocal)) {
AddToWorklist(Est.getNode());
- if (Iterations) {
+ if (Iterations)
Est = UseOneConstNR
? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
-
- if (!Reciprocal) {
- // The estimate is now completely wrong if the input was exactly 0.0 or
- // possibly a denormal. Force the answer to 0.0 for those cases.
- SDLoc DL(Op);
- EVT CCVT = getSetCCResultType(VT);
- ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
- DenormalMode DenormMode = DAG.getDenormalMode(VT);
- if (DenormMode.Input == DenormalMode::IEEE) {
- // This is specifically a check for the handling of denormal inputs,
- // not the result.
-
- // fabs(X) < SmallestNormal ? 0.0 : Est
- const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
- APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
- SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
- SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
- Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
- } else {
- // X == 0.0 ? 0.0 : Est
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
- Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
- }
- }
+ if (!Reciprocal) {
+ SDLoc DL(Op);
+ // Try the target specific test first.
+ SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
+
+ // The estimate is now completely wrong if the input was exactly 0.0 or
+ // possibly a denormal. Force the answer to 0.0 or value provided by
+ // target for those cases.
+ Est = DAG.getNode(
+ Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+ Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
}
return Est;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index f5948d2a20dc..0ff77d4ba1ab 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -113,11 +113,6 @@ using namespace PatternMatch;
#define DEBUG_TYPE "isel"
-// FIXME: Remove this after the feature has proven reliable.
-static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values",
- cl::init(true), cl::Hidden,
- cl::desc("Sink local values in FastISel"));
-
STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
"target-independent selector");
STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
@@ -139,7 +134,6 @@ void FastISel::startNewBlock() {
LastLocalValue = EmitStartPt;
}
-/// Flush the local CSE map and sink anything we can.
void FastISel::finishBasicBlock() { flushLocalValueMap(); }
bool FastISel::lowerArguments() {
@@ -164,48 +158,77 @@ bool FastISel::lowerArguments() {
/// Return the defined register if this instruction defines exactly one
/// virtual register and uses no other virtual registers. Otherwise return 0.
-static Register findSinkableLocalRegDef(MachineInstr &MI) {
+static Register findLocalRegDef(MachineInstr &MI) {
Register RegDef;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
if (MO.isDef()) {
if (RegDef)
- return 0;
+ return Register();
RegDef = MO.getReg();
} else if (MO.getReg().isVirtual()) {
- // This is another use of a vreg. Don't try to sink it.
+ // This is another use of a vreg. Don't delete it.
return Register();
}
}
return RegDef;
}
+static bool isRegUsedByPhiNodes(Register DefReg,
+ FunctionLoweringInfo &FuncInfo) {
+ for (auto &P : FuncInfo.PHINodesToUpdate)
+ if (P.second == DefReg)
+ return true;
+ return false;
+}
+
void FastISel::flushLocalValueMap() {
- // Try to sink local values down to their first use so that we can give them a
- // better debug location. This has the side effect of shrinking local value
- // live ranges, which helps out fast regalloc.
- if (SinkLocalValues && LastLocalValue != EmitStartPt) {
- // Sink local value materialization instructions between EmitStartPt and
- // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to
- // avoid inserting into the range that we're iterating over.
+ // If FastISel bails out, it could leave local value instructions behind
+ // that aren't used for anything. Detect and erase those.
+ if (LastLocalValue != EmitStartPt) {
+ // Save the first instruction after local values, for later.
+ MachineBasicBlock::iterator FirstNonValue(LastLocalValue);
+ ++FirstNonValue;
+
MachineBasicBlock::reverse_iterator RE =
EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
: FuncInfo.MBB->rend();
MachineBasicBlock::reverse_iterator RI(LastLocalValue);
-
- InstOrderMap OrderMap;
for (; RI != RE;) {
MachineInstr &LocalMI = *RI;
+ // Increment before erasing what it points to.
++RI;
- bool Store = true;
- if (!LocalMI.isSafeToMove(nullptr, Store))
+ Register DefReg = findLocalRegDef(LocalMI);
+ if (!DefReg)
continue;
- Register DefReg = findSinkableLocalRegDef(LocalMI);
- if (DefReg == 0)
+ if (FuncInfo.RegsWithFixups.count(DefReg))
continue;
+ bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
+ if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
+ if (EmitStartPt == &LocalMI)
+ EmitStartPt = EmitStartPt->getPrevNode();
+ LLVM_DEBUG(dbgs() << "removing dead local value materialization"
+ << LocalMI);
+ LocalMI.eraseFromParent();
+ }
+ }
- sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap);
+ if (FirstNonValue != FuncInfo.MBB->end()) {
+ // See if there are any local value instructions left. If so, we want to
+ // make sure the first one has a debug location; if it doesn't, use the
+ // first non-value instruction's debug location.
+
+ // If EmitStartPt is non-null, this block had copies at the top before
+ // FastISel started doing anything; it points to the last one, so the
+ // first local value instruction is the one after EmitStartPt.
+ // If EmitStartPt is null, the first local value instruction is at the
+ // top of the block.
+ MachineBasicBlock::iterator FirstLocalValue =
+ EmitStartPt ? ++MachineBasicBlock::iterator(EmitStartPt)
+ : FuncInfo.MBB->begin();
+ if (FirstLocalValue != FirstNonValue && !FirstLocalValue->getDebugLoc())
+ FirstLocalValue->setDebugLoc(FirstNonValue->getDebugLoc());
}
}
@@ -213,132 +236,6 @@ void FastISel::flushLocalValueMap() {
LastLocalValue = EmitStartPt;
recomputeInsertPt();
SavedInsertPt = FuncInfo.InsertPt;
- LastFlushPoint = FuncInfo.InsertPt;
-}
-
-static bool isRegUsedByPhiNodes(Register DefReg,
- FunctionLoweringInfo &FuncInfo) {
- for (auto &P : FuncInfo.PHINodesToUpdate)
- if (P.second == DefReg)
- return true;
- return false;
-}
-
-static bool isTerminatingEHLabel(MachineBasicBlock *MBB, MachineInstr &MI) {
- // Ignore non-EH labels.
- if (!MI.isEHLabel())
- return false;
-
- // Any EH label outside a landing pad must be for an invoke. Consider it a
- // terminator.
- if (!MBB->isEHPad())
- return true;
-
- // If this is a landingpad, the first non-phi instruction will be an EH_LABEL.
- // Don't consider that label to be a terminator.
- return MI.getIterator() != MBB->getFirstNonPHI();
-}
-
-/// Build a map of instruction orders. Return the first terminator and its
-/// order. Consider EH_LABEL instructions to be terminators as well, since local
-/// values for phis after invokes must be materialized before the call.
-void FastISel::InstOrderMap::initialize(
- MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) {
- unsigned Order = 0;
- for (MachineInstr &I : *MBB) {
- if (!FirstTerminator &&
- (I.isTerminator() || isTerminatingEHLabel(MBB, I))) {
- FirstTerminator = &I;
- FirstTerminatorOrder = Order;
- }
- Orders[&I] = Order++;
-
- // We don't need to order instructions past the last flush point.
- if (I.getIterator() == LastFlushPoint)
- break;
- }
-}
-
-void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
- Register DefReg,
- InstOrderMap &OrderMap) {
- // If this register is used by a register fixup, MRI will not contain all
- // the uses until after register fixups, so don't attempt to sink or DCE
- // this instruction. Register fixups typically come from no-op cast
- // instructions, which replace the cast instruction vreg with the local
- // value vreg.
- if (FuncInfo.RegsWithFixups.count(DefReg))
- return;
-
- // We can DCE this instruction if there are no uses and it wasn't a
- // materialized for a successor PHI node.
- bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
- if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
- if (EmitStartPt == &LocalMI)
- EmitStartPt = EmitStartPt->getPrevNode();
- LLVM_DEBUG(dbgs() << "removing dead local value materialization "
- << LocalMI);
- OrderMap.Orders.erase(&LocalMI);
- LocalMI.eraseFromParent();
- return;
- }
-
- // Number the instructions if we haven't yet so we can efficiently find the
- // earliest use.
- if (OrderMap.Orders.empty())
- OrderMap.initialize(FuncInfo.MBB, LastFlushPoint);
-
- // Find the first user in the BB.
- MachineInstr *FirstUser = nullptr;
- unsigned FirstOrder = std::numeric_limits<unsigned>::max();
- for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) {
- auto I = OrderMap.Orders.find(&UseInst);
- assert(I != OrderMap.Orders.end() &&
- "local value used by instruction outside local region");
- unsigned UseOrder = I->second;
- if (UseOrder < FirstOrder) {
- FirstOrder = UseOrder;
- FirstUser = &UseInst;
- }
- }
-
- // The insertion point will be the first terminator or the first user,
- // whichever came first. If there was no terminator, this must be a
- // fallthrough block and the insertion point is the end of the block.
- MachineBasicBlock::instr_iterator SinkPos;
- if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) {
- FirstOrder = OrderMap.FirstTerminatorOrder;
- SinkPos = OrderMap.FirstTerminator->getIterator();
- } else if (FirstUser) {
- SinkPos = FirstUser->getIterator();
- } else {
- assert(UsedByPHI && "must be users if not used by a phi");
- SinkPos = FuncInfo.MBB->instr_end();
- }
-
- // Collect all DBG_VALUEs before the new insertion position so that we can
- // sink them.
- SmallVector<MachineInstr *, 1> DbgValues;
- for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) {
- if (!DbgVal.isDebugValue())
- continue;
- unsigned UseOrder = OrderMap.Orders[&DbgVal];
- if (UseOrder < FirstOrder)
- DbgValues.push_back(&DbgVal);
- }
-
- // Sink LocalMI before SinkPos and assign it the same DebugLoc.
- LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI);
- FuncInfo.MBB->remove(&LocalMI);
- FuncInfo.MBB->insert(SinkPos, &LocalMI);
- if (SinkPos != FuncInfo.MBB->end())
- LocalMI.setDebugLoc(SinkPos->getDebugLoc());
-
- // Sink any debug values that we've collected.
- for (MachineInstr *DI : DbgValues) {
- FuncInfo.MBB->remove(DI);
- FuncInfo.MBB->insert(SinkPos, DI);
- }
}
bool FastISel::hasTrivialKill(const Value *V) {
@@ -364,12 +261,16 @@ bool FastISel::hasTrivialKill(const Value *V) {
if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
return false;
+ // Casts and extractvalues may be trivially coalesced by fast-isel.
+ if (I->getOpcode() == Instruction::BitCast ||
+ I->getOpcode() == Instruction::PtrToInt ||
+ I->getOpcode() == Instruction::IntToPtr ||
+ I->getOpcode() == Instruction::ExtractValue)
+ return false;
+
// Only instructions with a single use in the same basic block are considered
// to have trivial kills.
return I->hasOneUse() &&
- !(I->getOpcode() == Instruction::BitCast ||
- I->getOpcode() == Instruction::PtrToInt ||
- I->getOpcode() == Instruction::IntToPtr) &&
cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
}
@@ -446,7 +347,7 @@ Register FastISel::materializeConstant(const Value *V, MVT VT) {
getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
if (IntegerReg)
Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
- /*Kill=*/false);
+ /*Op0IsKill=*/false);
}
}
} else if (const auto *Op = dyn_cast<Operator>(V)) {
@@ -560,8 +461,6 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
"Invalid iterator!");
while (I != E) {
- if (LastFlushPoint == I)
- LastFlushPoint = E;
if (SavedInsertPt == I)
SavedInsertPt = E;
if (EmitStartPt == I)
@@ -578,12 +477,9 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
}
FastISel::SavePoint FastISel::enterLocalValueArea() {
- MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
- DebugLoc OldDL = DbgLoc;
+ SavePoint OldInsertPt = FuncInfo.InsertPt;
recomputeInsertPt();
- DbgLoc = DebugLoc();
- SavePoint SP = {OldInsertPt, OldDL};
- return SP;
+ return OldInsertPt;
}
void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
@@ -591,8 +487,7 @@ void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
LastLocalValue = &*std::prev(FuncInfo.InsertPt);
// Restore the previous insert position.
- FuncInfo.InsertPt = OldInsertPt.InsertPt;
- DbgLoc = OldInsertPt.DL;
+ FuncInfo.InsertPt = OldInsertPt;
}
bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
@@ -1316,11 +1211,6 @@ bool FastISel::selectCall(const User *I) {
// Handle simple inline asms.
if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) {
- // If the inline asm has side effects, then make sure that no local value
- // lives across by flushing the local value map.
- if (IA->hasSideEffects())
- flushLocalValueMap();
-
// Don't attempt to handle constraints.
if (!IA->getConstraintString().empty())
return false;
@@ -1350,15 +1240,6 @@ bool FastISel::selectCall(const User *I) {
if (const auto *II = dyn_cast<IntrinsicInst>(Call))
return selectIntrinsicCall(II);
- // Usually, it does not make sense to initialize a value,
- // make an unrelated function call and use the value, because
- // it tends to be spilled on the stack. So, we move the pointer
- // to the last local value to the beginning of the block, so that
- // all the values which have already been materialized,
- // appear after the call. It also makes sense to skip intrinsics
- // since they tend to be inlined.
- flushLocalValueMap();
-
return lowerCall(Call);
}
@@ -1375,6 +1256,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::sideeffect:
// Neither does the assume intrinsic; it's also OK not to codegen its operand.
case Intrinsic::assume:
+ // Neither does the llvm.experimental.noalias.scope.decl intrinsic
+ case Intrinsic::experimental_noalias_scope_decl:
return true;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
@@ -1643,6 +1526,11 @@ void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
}
bool FastISel::selectInstruction(const Instruction *I) {
+ // Flush the local value map before starting each instruction.
+ // This improves locality and debugging, and can reduce spills.
+ // Reuse of values across IR instructions is relatively uncommon.
+ flushLocalValueMap();
+
MachineInstr *SavedLastLocalValue = getLastLocalValue();
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
@@ -1789,13 +1677,13 @@ bool FastISel::selectFNeg(const User *I, const Value *In) {
return false;
Register IntResultReg = fastEmit_ri_(
- IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true,
+ IntVT.getSimpleVT(), ISD::XOR, IntReg, /*Op0IsKill=*/true,
UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
if (!IntResultReg)
return false;
ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST,
- IntResultReg, /*IsKill=*/true);
+ IntResultReg, /*Op0IsKill=*/true);
if (!ResultReg)
return false;
@@ -1851,13 +1739,8 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return selectBinaryOp(I, ISD::FADD);
case Instruction::Sub:
return selectBinaryOp(I, ISD::SUB);
- case Instruction::FSub: {
- // FNeg is currently represented in LLVM IR as a special case of FSub.
- Value *X;
- if (match(I, m_FNeg(m_Value(X))))
- return selectFNeg(I, X);
+ case Instruction::FSub:
return selectBinaryOp(I, ISD::FSUB);
- }
case Instruction::Mul:
return selectBinaryOp(I, ISD::MUL);
case Instruction::FMul:
@@ -2353,9 +2236,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
- // Set the DebugLoc for the copy. Prefer the location of the operand
- // if there is one; use the location of the PHI otherwise.
- DbgLoc = PN.getDebugLoc();
+ // Set the DebugLoc for the copy. Use the location of the operand if
+ // there is one; otherwise no location, flushLocalValueMap will fix it.
+ DbgLoc = DebugLoc();
if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
DbgLoc = Inst->getDebugLoc();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 5cf83cff3a90..32a4f60df097 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -197,7 +197,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Look for inline asm that clobbers the SP register.
if (auto *Call = dyn_cast<CallBase>(&I)) {
if (Call->isInlineAsm()) {
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
std::vector<TargetLowering::AsmOperandInfo> Ops =
TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI,
@@ -360,7 +360,7 @@ void FunctionLoweringInfo::clear() {
RegFixups.clear();
RegsWithFixups.clear();
StatepointStackSlots.clear();
- StatepointSpillMaps.clear();
+ StatepointRelocationMaps.clear();
PreferredExtendType.clear();
}
@@ -458,8 +458,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
APInt Val = CI->getValue().zextOrTrunc(BitWidth);
DestLOI.NumSignBits = Val.getNumSignBits();
- DestLOI.Known.Zero = ~Val;
- DestLOI.Known.One = Val;
+ DestLOI.Known = KnownBits::makeConstant(Val);
} else {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
@@ -509,8 +508,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
return;
}
DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
- DestLOI.Known.Zero &= SrcLOI->Known.Zero;
- DestLOI.Known.One &= SrcLOI->Known.One;
+ DestLOI.Known = KnownBits::commonBits(DestLOI.Known, SrcLOI->Known);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 0e4e99214aa2..a5978711b871 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -200,6 +201,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
II.isVariadic() && II.variadicOpsAreDefs();
unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs();
+ if (Node->getMachineOpcode() == TargetOpcode::STATEPOINT)
+ NumVRegs = NumResults;
for (unsigned i = 0; i < NumVRegs; ++i) {
// If the specific node value is only used by a CopyToReg and the dest reg
// is a vreg in the same register class, use the CopyToReg'd destination
@@ -693,6 +696,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
return &*MIB;
}
+ // Attempt to produce a DBG_INSTR_REF if we've been asked to.
+ if (EmitDebugInstrRefs)
+ if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap))
+ return InstrRef;
+
if (SD->getKind() == SDDbgValue::FRAMEIX) {
// Stack address; this needs to be lowered in target-dependent fashion.
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
@@ -760,6 +768,63 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
}
MachineInstr *
+InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ // Instruction referencing is still in a prototype state: for now we're only
+ // going to support SDNodes within a block. Copies are not supported, they
+ // don't actually define a value.
+ if (SD->getKind() != SDDbgValue::SDNODE)
+ return nullptr;
+
+ SDNode *Node = SD->getSDNode();
+ SDValue Op = SDValue(Node, SD->getResNo());
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
+ if (I==VRBaseMap.end())
+ return nullptr; // undef value: let EmitDbgValue produce a DBG_VALUE $noreg.
+
+ MDNode *Var = SD->getVariable();
+ MDNode *Expr = SD->getExpression();
+ DebugLoc DL = SD->getDebugLoc();
+
+ // Try to pick out a defining instruction at this point.
+ unsigned VReg = getVR(Op, VRBaseMap);
+ MachineInstr *ResultInstr = nullptr;
+
+ // No definition corresponds to scenarios where a vreg is live-in to a block,
+ // and doesn't have a defining instruction (yet). This can be patched up
+ // later; at this early stage of implementation, fall back to using DBG_VALUE.
+ if (!MRI->hasOneDef(VReg))
+ return nullptr;
+
+ MachineInstr &DefMI = *MRI->def_instr_begin(VReg);
+ // Some target specific opcodes can become copies. As stated above, we're
+ // ignoring those for now.
+ if (DefMI.isCopy() || DefMI.getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ return nullptr;
+
+ const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
+ auto MIB = BuildMI(*MF, DL, RefII);
+
+ // Find the operand which defines the specified VReg.
+ unsigned OperandIdx = 0;
+ for (const auto &MO : DefMI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == VReg)
+ break;
+ ++OperandIdx;
+ }
+ assert(OperandIdx < DefMI.getNumOperands());
+
+ // Make the DBG_INSTR_REF refer to that instruction, and that operand.
+ unsigned InstrNum = DefMI.getDebugInstrNum();
+ MIB.addImm(InstrNum);
+ MIB.addImm(OperandIdx);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+ ResultInstr = &*MIB;
+ return ResultInstr;
+}
+
+MachineInstr *
InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) {
MDNode *Label = SD->getLabel();
DebugLoc DL = SD->getDebugLoc();
@@ -821,6 +886,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
NumDefs = NumResults;
}
ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
+ } else if (Opc == TargetOpcode::STATEPOINT) {
+ NumDefs = NumResults;
}
unsigned NumImpUses = 0;
@@ -970,6 +1037,22 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
+ // STATEPOINT is too 'dynamic' to have meaningful machine description.
+ // We have to manually tie operands.
+ if (Opc == TargetOpcode::STATEPOINT && NumDefs > 0) {
+ assert(!HasPhysRegOuts && "STATEPOINT mishandled");
+ MachineInstr *MI = MIB;
+ unsigned Def = 0;
+ int First = StatepointOpers(MI).getFirstGCPtrIdx();
+ assert(First > 0 && "Statepoint has Defs but no GC ptr list");
+ unsigned Use = (unsigned)First;
+ while (Def < NumDefs) {
+ if (MI->getOperand(Use).isReg())
+ MI->tieOperands(Def++, Use);
+ Use = StackMaps::getNextMetaArgIdx(MI, Use);
+ }
+ }
+
// Run post-isel target hook to adjust this instruction if needed.
if (II.hasPostISelHook())
TLI->AdjustInstrPostInstrSelection(*MIB, Node);
@@ -1042,6 +1125,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
+ case ISD::PSEUDO_PROBE: {
+ unsigned TarOp = TargetOpcode::PSEUDO_PROBE;
+ auto Guid = cast<PseudoProbeSDNode>(Node)->getGuid();
+ auto Index = cast<PseudoProbeSDNode>(Node)->getIndex();
+ auto Attr = cast<PseudoProbeSDNode>(Node)->getAttributes();
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addImm(Guid)
+ .addImm(Index)
+ .addImm((uint8_t)PseudoProbeType::Block)
+ .addImm(Attr);
+ break;
+ }
+
case ISD::INLINEASM:
case ISD::INLINEASM_BR: {
unsigned NumOps = Node->getNumOperands();
@@ -1157,10 +1254,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
-InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
MachineBasicBlock::iterator insertpos)
: MF(mbb->getParent()), MRI(&MF->getRegInfo()),
TII(MF->getSubtarget().getInstrInfo()),
TRI(MF->getSubtarget().getRegisterInfo()),
TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
- InsertPos(insertpos) {}
+ InsertPos(insertpos) {
+ EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index c3567eae9161..09658b8143fe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -26,6 +26,7 @@ class MCInstrDesc;
class SDDbgLabel;
class SDDbgValue;
class TargetLowering;
+class TargetMachine;
class LLVM_LIBRARY_VISIBILITY InstrEmitter {
MachineFunction *MF;
@@ -37,6 +38,9 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
MachineBasicBlock *MBB;
MachineBasicBlock::iterator InsertPos;
+ /// Should we try to produce DBG_INSTR_REF instructions?
+ bool EmitDebugInstrRefs;
+
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
@@ -109,6 +113,11 @@ public:
MachineInstr *EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap);
+ /// Attempt to emit a dbg_value as a DBG_INSTR_REF. May fail and return
+ /// nullptr, in which case we fall back to plain EmitDbgValue.
+ MachineInstr *EmitDbgInstrRef(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
/// Generate machine instruction for a dbg_label node.
MachineInstr *EmitDbgLabel(SDDbgLabel *SD);
@@ -130,7 +139,8 @@ public:
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
- InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+ InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos);
private:
void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 6a6004c158bb..62d7191036ca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -173,14 +173,17 @@ private:
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
+ SDValue ExpandFNEG(SDNode *Node) const;
SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
+ SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl);
SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
+ SDValue ExpandPARITY(SDValue Op, const SDLoc &dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
SDValue ExpandInsertToVectorThroughStack(SDValue Op);
@@ -428,7 +431,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
- // FIXME: We shouldn't do this for TargetConstantFP's.
// FIXME: move this to the DAG Combiner! Note that we can't regress due
// to phase ordering between legalized code and the dag combiner. This
// probably means that we need to integrate dag combiner and legalizer
@@ -436,10 +438,16 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
// We generally can't do this one for long doubles.
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDLoc dl(ST);
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+
+ // Don't optimise TargetConstantFP
+ if (Value.getOpcode() == ISD::TargetConstantFP)
+ return SDValue();
+
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
if (CFP->getValueType(0) == MVT::f32 &&
TLI.isTypeLegal(MVT::i32)) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().
@@ -470,7 +478,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), dl);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
ST->getOriginalAlign(), MMOFlags, AAInfo);
@@ -479,7 +487,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
}
}
}
- return SDValue(nullptr, 0);
+ return SDValue();
}
void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
@@ -540,28 +548,29 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n");
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
+ TypeSize StWidth = StVT.getSizeInBits();
+ TypeSize StSize = StVT.getStoreSizeInBits();
auto &DL = DAG.getDataLayout();
- if (StWidth != StVT.getStoreSizeInBits()) {
+ if (StWidth != StSize) {
// Promote to a byte-sized store with upper bits zero if not
// storing an integral number of bytes. For example, promote
// TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
- StVT.getStoreSizeInBits());
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedSize());
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
ST->getOriginalAlign(), MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
- } else if (StWidth & (StWidth - 1)) {
+ } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedSize())) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned LogStWidth = Log2_32(StWidth);
+ unsigned StWidthBits = StWidth.getFixedSize();
+ unsigned LogStWidth = Log2_32(StWidthBits);
assert(LogStWidth < 32);
unsigned RoundWidth = 1 << LogStWidth;
- assert(RoundWidth < StWidth);
- unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(RoundWidth < StWidthBits);
+ unsigned ExtraWidth = StWidthBits - RoundWidth;
assert(ExtraWidth < RoundWidth);
assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
"Store size not an integral number of bytes!");
@@ -578,7 +587,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Hi = DAG.getNode(
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth, dl,
@@ -718,7 +727,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n");
EVT SrcVT = LD->getMemoryVT();
- unsigned SrcWidth = SrcVT.getSizeInBits();
+ TypeSize SrcWidth = SrcVT.getSizeInBits();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
@@ -764,14 +773,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Value = Result;
Chain = Ch;
- } else if (SrcWidth & (SrcWidth - 1)) {
+ } else if (!isPowerOf2_64(SrcWidth.getKnownMinSize())) {
// If not loading a power-of-2 number of bits, expand as two loads.
assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned LogSrcWidth = Log2_32(SrcWidth);
+ unsigned SrcWidthBits = SrcWidth.getFixedSize();
+ unsigned LogSrcWidth = Log2_32(SrcWidthBits);
assert(LogSrcWidth < 32);
unsigned RoundWidth = 1 << LogSrcWidth;
- assert(RoundWidth < SrcWidth);
- unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(RoundWidth < SrcWidthBits);
+ unsigned ExtraWidth = SrcWidthBits - RoundWidth;
assert(ExtraWidth < RoundWidth);
assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
"Load size not an integral number of bytes!");
@@ -790,7 +800,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -818,7 +828,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -1103,6 +1113,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
// They'll be converted to Copy(To/From)Reg.
Action = TargetLowering::Legal;
break;
+ case ISD::UBSANTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::UBSANTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
case ISD::DEBUGTRAP:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
if (Action == TargetLowering::Expand) {
@@ -1118,10 +1140,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
- case ISD::USUBSAT: {
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
- }
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -1159,6 +1184,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(0).getValueType());
break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(), Node->getOperand(1).getValueType());
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -1411,6 +1440,12 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
SmallVector<SDValue, 8> Stores;
unsigned TypeByteSize = MemVT.getSizeInBits() / 8;
assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
+
+ // If the destination vector element type of a BUILD_VECTOR is narrower than
+ // the source element type, only store the bits necessary.
+ bool Truncate = isa<BuildVectorSDNode>(Node) &&
+ MemVT.bitsLT(Node->getOperand(0).getValueType());
+
// Store (in the right endianness) the elements to memory.
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
// Ignore undef elements.
@@ -1418,11 +1453,9 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
unsigned Offset = TypeByteSize*i;
- SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, Offset, dl);
+ SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, TypeSize::Fixed(Offset), dl);
- // If the destination vector element type is narrower than the source
- // element type, only store the bits necessary.
- if (MemVT.bitsLT(Node->getOperand(i).getValueType()))
+ if (Truncate)
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx,
PtrInfo.getWithOffset(Offset), MemVT));
@@ -1448,7 +1481,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
const SDLoc &DL,
SDValue Value) const {
EVT FloatVT = Value.getValueType();
- unsigned NumBits = FloatVT.getSizeInBits();
+ unsigned NumBits = FloatVT.getScalarSizeInBits();
State.FloatVT = FloatVT;
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
// Convert to an integer of the same size.
@@ -1480,8 +1513,9 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
State.IntPointerInfo = State.FloatPointerInfo;
} else {
// Advance the pointer so that the loaded byte will contain the sign bit.
- unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
- IntPtr = DAG.getMemBasePlusOffset(StackPtr, ByteOffset, DL);
+ unsigned ByteOffset = (NumBits / 8) - 1;
+ IntPtr =
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(ByteOffset), DL);
State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
ByteOffset);
}
@@ -1489,7 +1523,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
State.IntPtr = IntPtr;
State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr,
State.IntPointerInfo, MVT::i8);
- State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
+ State.SignMask = APInt::getOneBitSet(LoadTy.getScalarSizeInBits(), 7);
State.SignBit = 7;
}
@@ -1544,7 +1578,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
// Get the signbit at the right position for MagAsInt.
int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
EVT ShiftVT = IntVT;
- if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
+ if (SignBit.getScalarValueSizeInBits() <
+ ClearedSign.getScalarValueSizeInBits()) {
SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
ShiftVT = MagVT;
}
@@ -1555,7 +1590,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT);
SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst);
}
- if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
+ if (SignBit.getScalarValueSizeInBits() >
+ ClearedSign.getScalarValueSizeInBits()) {
SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
}
@@ -1564,6 +1600,22 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
return modifySignAsInt(MagAsInt, DL, CopiedSign);
}
+SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const {
+ // Get the sign bit as an integer.
+ SDLoc DL(Node);
+ FloatSignAsInt SignAsInt;
+ getSignAsIntValue(SignAsInt, DL, Node->getOperand(0));
+ EVT IntVT = SignAsInt.IntValue.getValueType();
+
+ // Flip the sign.
+ SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+ SDValue SignFlip =
+ DAG.getNode(ISD::XOR, DL, IntVT, SignAsInt.IntValue, SignMask);
+
+ // Convert back to float.
+ return modifySignAsInt(SignAsInt, DL, SignFlip);
+}
+
SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
SDLoc DL(Node);
SDValue Value = Node->getOperand(0);
@@ -1587,7 +1639,7 @@ SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
SmallVectorImpl<SDValue> &Results) {
- unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
SDLoc dl(Node);
@@ -1681,21 +1733,41 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(
unsigned Opc = 0;
switch (CCCode) {
default: llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETUO:
+ if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
+ CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;
+ break;
+ }
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+ "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
+ NeedInvert = true;
+ LLVM_FALLTHROUGH;
case ISD::SETO:
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
&& "If SETO is expanded, SETOEQ must be legal!");
CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
- case ISD::SETUO:
- assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT)
- && "If SETUO is expanded, SETUNE must be legal!");
- CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ // If the SETUO or SETO CC isn't legal, we might be able to use
+ // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
+ // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
+ // the operands.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ if (!TLI.isCondCodeLegal(CC2, OpVT) &&
+ (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
+ TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
+ CC1 = ISD::SETOGT;
+ CC2 = ISD::SETOLT;
+ Opc = ISD::OR;
+ NeedInvert = ((unsigned)CCCode & 0x8U);
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETOLT:
case ISD::SETOLE:
- case ISD::SETONE:
- case ISD::SETUEQ:
case ISD::SETUNE:
case ISD::SETUGT:
case ISD::SETUGE:
@@ -1727,12 +1799,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
// If we aren't the ordered or unorder operation,
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
- SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain,
+ IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain,
+ IsSignaling);
} else {
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
- SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain,
+ IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain,
+ IsSignaling);
}
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
@@ -1758,27 +1834,34 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
EVT DestVT, const SDLoc &dl,
SDValue Chain) {
+ unsigned SrcSize = SrcOp.getValueSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType);
+
+ // Don't convert with stack if the load/store is expensive.
+ if ((SrcSize > SlotSize &&
+ !TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) ||
+ (SlotSize < DestSize &&
+ !TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT)))
+ return SDValue();
+
// Create the stack frame object.
- unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
+ Align SrcAlign = DAG.getDataLayout().getPrefTypeAlign(
SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
- SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT.getStoreSize(), SrcAlign);
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
int SPFI = StackPtrFI->getIndex();
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
- unsigned SrcSize = SrcOp.getValueSizeInBits();
- unsigned SlotSize = SlotVT.getSizeInBits();
- unsigned DestSize = DestVT.getSizeInBits();
- Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
- unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType);
-
// Emit a store to the stack slot. Use a truncstore if the input value is
// later than DestVT.
SDValue Store;
- if (SrcSize > SlotSize)
+ if (SrcSize > SlotSize)
Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
SlotVT, SrcAlign);
else {
@@ -1790,7 +1873,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
// Result is a load from the stack slot.
if (SlotSize == DestSize)
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
-
+
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
DestAlign);
@@ -2111,7 +2194,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
if (Node->isStrictFPOpcode()) {
EVT RetVT = Node->getValueType(0);
- SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ SmallVector<SDValue, 4> Ops(drop_begin(Node->ops()));
TargetLowering::MakeLibCallOptions CallOptions;
// FIXME: This doesn't support tail calls.
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
@@ -2361,7 +2444,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
// TODO: Should any fast-math-flags be set for the created nodes?
LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
- if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+ if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64) &&
+ (DestVT.bitsLE(MVT::f64) ||
+ TLI.isOperationLegal(Node->isStrictFPOpcode() ? ISD::STRICT_FP_EXTEND
+ : ISD::FP_EXTEND,
+ DestVT))) {
LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
"expansion\n");
@@ -2388,7 +2475,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot,
MachinePointerInfo());
// Store the hi of the constructed double.
- SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
+ SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl);
SDValue Store2 =
DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo());
MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -2423,16 +2510,24 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
}
return Result;
}
- // Code below here assumes !isSigned without checking again.
- assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+
+ if (isSigned)
+ return SDValue();
// TODO: Generalize this for use with other types.
- if ((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) {
- LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32\n");
+ if (((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) ||
+ (SrcVT == MVT::i64 && DestVT == MVT::f64)) {
+ LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32/f64\n");
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundisf in compiler_rt. That method
// should be valid for i32->f32 as well.
+ // More generally this transform should be valid if there are 3 more bits
+ // in the integer type than the significand. Rounding uses the first bit
+ // after the width of the significand and the OR of all bits after that. So
+ // we need to be able to OR the shifted out bit into one of the bits that
+ // participate in the OR.
+
// TODO: This really should be implemented using a branch rather than a
// select. We happen to get lucky and machinesink does the right
// thing most of the time. This would be a good candidate for a
@@ -2476,6 +2571,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast);
}
+ // Don't expand it if there isn't cheap fadd.
+ if (!TLI.isOperationLegalOrCustom(
+ Node->isStrictFPOpcode() ? ISD::STRICT_FADD : ISD::FADD, DestVT))
+ return SDValue();
+
// The following optimization is valid only if every value in SrcVT (when
// treated as signed) is representable in DestVT. Check that the mantissa
// size of DestVT is >= than the number of bits in SrcVT -1.
@@ -2502,7 +2602,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
// offset depending on the data type.
uint64_t FF;
switch (SrcVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unsupported integer type!");
+ default:
+ return SDValue();
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
@@ -2657,6 +2758,30 @@ void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
Results.push_back(Operation.getValue(1));
}
+/// Promote FP_TO_*INT_SAT operation to a larger result type. At this point
+/// the result and operand types are legal and there must be a legal
+/// FP_TO_*INT_SAT operation for a larger result type.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT_SAT(SDNode *Node,
+ const SDLoc &dl) {
+ unsigned Opcode = Node->getOpcode();
+
+ // Scan for the appropriate larger type to use.
+ EVT NewOutTy = Node->getValueType(0);
+ while (true) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy + 1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ if (TLI.isOperationLegalOrCustom(Opcode, NewOutTy))
+ break;
+ }
+
+ // Saturation width is determined by second operand, so we don't have to
+ // perform any fixup and can directly truncate the result.
+ SDValue Result = DAG.getNode(Opcode, dl, NewOutTy, Node->getOperand(0),
+ Node->getOperand(1));
+ return DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Result);
+}
+
/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
EVT VT = Op.getValueType();
@@ -2773,6 +2898,28 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
}
}
+/// Open code the operations for PARITY of the specified operation.
+SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ // If CTPOP is legal, use it. Otherwise use shifts and xor.
+ SDValue Result;
+ if (TLI.isOperationLegal(ISD::CTPOP, VT)) {
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ } else {
+ Result = Op;
+ for (unsigned i = Log2_32_Ceil(Sz); i != 0;) {
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, Result,
+ DAG.getConstant(1ULL << (--i), dl, ShVT));
+ Result = DAG.getNode(ISD::XOR, dl, VT, Result, Shift);
+ }
+ }
+
+ return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT));
+}
+
bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Trying to expand node\n");
SmallVector<SDValue, 8> Results;
@@ -2804,6 +2951,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::BSWAP:
Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
break;
+ case ISD::PARITY:
+ Results.push_back(ExpandPARITY(Node->getOperand(0), dl));
+ break;
case ISD::FRAMEADDR:
case ISD::RETURNADDR:
case ISD::FRAME_TO_ARGS_OFFSET:
@@ -2948,18 +3098,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
// We fall back to use stack operation when the FP_ROUND operation
// isn't available.
- Tmp1 = EmitStackConvert(Node->getOperand(1),
- Node->getValueType(0),
- Node->getValueType(0), dl, Node->getOperand(0));
- ReplaceNode(Node, Tmp1.getNode());
- LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
- return true;
+ if ((Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0),
+ Node->getValueType(0), dl,
+ Node->getOperand(0)))) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
+ return true;
+ }
+ break;
case ISD::FP_ROUND:
case ISD::BITCAST:
- Tmp1 = EmitStackConvert(Node->getOperand(0),
- Node->getValueType(0),
- Node->getValueType(0), dl);
- Results.push_back(Tmp1);
+ if ((Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl)))
+ Results.push_back(Tmp1);
break;
case ISD::STRICT_FP_EXTEND:
// When strict mode is enforced we can't do expansion because it
@@ -2974,17 +3125,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
// We fall back to use stack operation when the FP_EXTEND operation
// isn't available.
- Tmp1 = EmitStackConvert(Node->getOperand(1),
- Node->getOperand(1).getValueType(),
- Node->getValueType(0), dl, Node->getOperand(0));
- ReplaceNode(Node, Tmp1.getNode());
- LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
- return true;
+ if ((Tmp1 = EmitStackConvert(
+ Node->getOperand(1), Node->getOperand(1).getValueType(),
+ Node->getValueType(0), dl, Node->getOperand(0)))) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
+ return true;
+ }
+ break;
case ISD::FP_EXTEND:
- Tmp1 = EmitStackConvert(Node->getOperand(0),
- Node->getOperand(0).getValueType(),
- Node->getValueType(0), dl);
- Results.push_back(Tmp1);
+ if ((Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl)))
+ Results.push_back(Tmp1);
break;
case ISD::SIGN_EXTEND_INREG: {
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
@@ -3029,10 +3182,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LLVM_FALLTHROUGH;
case ISD::SINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
- Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2);
- Results.push_back(Tmp1);
- if (Node->isStrictFPOpcode())
- Results.push_back(Tmp2);
+ if ((Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2))) {
+ Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
+ }
break;
case ISD::FP_TO_SINT:
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
@@ -3059,6 +3213,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
return true;
}
break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Results.push_back(TLI.expandFP_TO_INT_SAT(Node, DAG));
+ break;
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
@@ -3187,7 +3345,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::STACKSAVE:
// Expand to CopyFromReg if the target set
// StackPointerRegisterToSaveRestore.
- if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ if (Register SP = TLI.getStackPointerRegisterToSaveRestore()) {
Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
Node->getValueType(0)));
Results.push_back(Results[0].getValue(1));
@@ -3199,7 +3357,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::STACKRESTORE:
// Expand to CopyToReg if the target set
// StackPointerRegisterToSaveRestore.
- if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ if (Register SP = TLI.getStackPointerRegisterToSaveRestore()) {
Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
Node->getOperand(1)));
} else {
@@ -3214,12 +3372,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(ExpandFCOPYSIGN(Node));
break;
case ISD::FNEG:
- // Expand Y = FNEG(X) -> Y = SUB -0.0, X
- Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
- // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
- Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
- Node->getOperand(0));
- Results.push_back(Tmp1);
+ Results.push_back(ExpandFNEG(Node));
break;
case ISD::FABS:
Results.push_back(ExpandFABS(Node));
@@ -3315,7 +3468,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Check to see if this FP immediate is already legal.
// If this is a legal constant, turn it into a TargetConstantFP node.
if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0),
- DAG.getMachineFunction().getFunction().hasOptSize()))
+ DAG.shouldOptForSize()))
Results.push_back(ExpandConstantFP(CFP, true));
break;
}
@@ -3394,7 +3547,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 4> Halves;
EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext());
assert(TLI.isTypeLegal(HalfType));
- if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves,
+ if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, dl, LHS, RHS, Halves,
HalfType, DAG,
TargetLowering::MulExpansionKind::Always)) {
for (unsigned i = 0; i < 2; ++i) {
@@ -3463,7 +3616,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
case ISD::ROTL:
case ISD::ROTR:
- if (TLI.expandROT(Node, Tmp1, DAG))
+ if (TLI.expandROT(Node, true /*AllowVectorOps*/, Tmp1, DAG))
Results.push_back(Tmp1);
break;
case ISD::SADDSAT:
@@ -3472,6 +3625,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::USUBSAT:
Results.push_back(TLI.expandAddSubSat(Node, DAG));
break;
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ Results.push_back(TLI.expandShlSat(Node, DAG));
+ break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -3809,16 +3966,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
- assert(!NeedInvert && "Don't know how to invert BR_CC!");
-
// If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
// node.
if (Tmp4.getNode()) {
+ assert(!NeedInvert && "Don't know how to invert BR_CC!");
+
Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
Tmp4, Tmp2, Tmp3, Node->getOperand(4));
} else {
Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType());
- Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp4 = DAG.getCondCode(NeedInvert ? ISD::SETEQ : ISD::SETNE);
Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4,
Tmp2, Tmp3, Node->getOperand(4));
}
@@ -3899,6 +4056,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
== TargetLowering::Legal)
return true;
break;
+ case ISD::STRICT_FSUB: {
+ if (TLI.getStrictFPOperationAction(
+ ISD::STRICT_FSUB, Node->getValueType(0)) == TargetLowering::Legal)
+ return true;
+ if (TLI.getStrictFPOperationAction(
+ ISD::STRICT_FADD, Node->getValueType(0)) != TargetLowering::Legal)
+ break;
+
+ EVT VT = Node->getValueType(0);
+ const SDNodeFlags Flags = Node->getFlags();
+ SDValue Neg = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(2), Flags);
+ SDValue Fadd = DAG.getNode(ISD::STRICT_FADD, dl, Node->getVTList(),
+ {Node->getOperand(0), Node->getOperand(1), Neg},
+ Flags);
+
+ Results.push_back(Fadd);
+ Results.push_back(Fadd.getValue(1));
+ break;
+ }
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_LRINT:
case ISD::STRICT_LLRINT:
case ISD::STRICT_LROUND:
@@ -3967,12 +4145,23 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_CMP_SWAP: {
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
+ AtomicOrdering Order = cast<AtomicSDNode>(Node)->getOrdering();
+ RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT);
EVT RetVT = Node->getValueType(0);
- SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
TargetLowering::MakeLibCallOptions CallOptions;
+ SmallVector<SDValue, 4> Ops;
+ if (TLI.getLibcallName(LC)) {
+ // If outline atomic available, prepare its arguments and expand.
+ Ops.append(Node->op_begin() + 2, Node->op_end());
+ Ops.push_back(Node->getOperand(1));
+
+ } else {
+ LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unexpected atomic op or value type!");
+ // Arguments for expansion to sync libcall
+ Ops.append(Node->op_begin() + 1, Node->op_end());
+ }
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
Ops, CallOptions,
SDLoc(Node),
@@ -4220,11 +4409,131 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(LC, Node, false));
break;
}
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: {
+ // TODO - Common the code with DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP
+ bool IsStrict = Node->isStrictFPOpcode();
+ bool Signed = Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT SVT = Node->getOperand(IsStrict ? 1 : 0).getValueType();
+ EVT RVT = Node->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(Node);
+
+ // Even if the input is legal, no libcall may exactly match, eg. we don't
+ // have i1 -> fp conversions. So, it needs to be promoted to a larger type,
+ // eg: i13 -> fp. Then, look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT)
+ : RTLIB::getUINTTOFP(NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, Node->getOperand(IsStrict ? 1 : 0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(Signed);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, dl, Chain);
+ Results.push_back(Tmp.first);
+ if (IsStrict)
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT: {
+ // TODO - Common the code with DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT.
+ bool IsStrict = Node->isStrictFPOpcode();
+ bool Signed = Node->getOpcode() == ISD::FP_TO_SINT ||
+ Node->getOpcode() == ISD::STRICT_FP_TO_SINT;
+
+ SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
+ EVT RVT = Node->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(Node);
+
+ // Even if the result is legal, no libcall may exactly match, eg. we don't
+ // have fp -> i1 conversions. So, it needs to be promoted to a larger type,
+ // eg: fp -> i32. Then, look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ NVT = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (NVT.bitsGE(RVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT)
+ : RTLIB::getFPTOUINT(SVT, NVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain);
+
+ // Truncate the result if the libcall returns a larger type.
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first));
+ if (IsStrict)
+ Results.push_back(Tmp.second);
+ break;
+ }
+
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND: {
+ // X = FP_ROUND(Y, TRUNC)
+ // TRUNC is a flag, which is always an integer that is zero or one.
+ // If TRUNC is 0, this is a normal rounding, if it is 1, this FP_ROUND
+ // is known to not change the value of Y.
+ // We can only expand it into libcall if the TRUNC is 0.
+ bool IsStrict = Node->isStrictFPOpcode();
+ SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ EVT VT = Node->getValueType(0);
+ assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))
+ ->isNullValue() &&
+ "Unable to expand as libcall if it is not normal rounding");
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, SDLoc(Node), Chain);
+ Results.push_back(Tmp.first);
+ if (IsStrict)
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::FP_EXTEND: {
+ Results.push_back(
+ ExpandLibCall(RTLIB::getFPEXT(Node->getOperand(0).getValueType(),
+ Node->getValueType(0)),
+ Node, false));
+ break;
+ }
+ case ISD::STRICT_FP_EXTEND:
case ISD::STRICT_FP_TO_FP16: {
RTLIB::Libcall LC =
- RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16);
- assert(LC != RTLIB::UNKNOWN_LIBCALL &&
- "Unable to expand strict_fp_to_fp16");
+ Node->getOpcode() == ISD::STRICT_FP_TO_FP16
+ ? RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16)
+ : RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
+ Node->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
TargetLowering::MakeLibCallOptions CallOptions;
std::pair<SDValue, SDValue> Tmp =
TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1),
@@ -4321,7 +4630,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
OVT = Node->getOperand(0).getSimpleValueType();
}
if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
- Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
+ Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS)
OVT = Node->getOperand(1).getSimpleValueType();
if (Node->getOpcode() == ISD::BR_CC)
OVT = Node->getOperand(2).getSimpleValueType();
@@ -4381,6 +4692,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::STRICT_FP_TO_SINT:
PromoteLegalFP_TO_INT(Node, dl, Results);
break;
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
+ Results.push_back(PromoteLegalFP_TO_INT_SAT(Node, dl));
+ break;
case ISD::UINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
@@ -4515,13 +4830,29 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::SETCC: {
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
unsigned ExtOp = ISD::FP_EXTEND;
if (NVT.isInteger()) {
- ISD::CondCode CCCode =
- cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
}
+ if (Node->isStrictFPOpcode()) {
+ SDValue InChain = Node->getOperand(0);
+ std::tie(Tmp1, std::ignore) =
+ DAG.getStrictFPExtendOrRound(Node->getOperand(1), InChain, dl, NVT);
+ std::tie(Tmp2, std::ignore) =
+ DAG.getStrictFPExtendOrRound(Node->getOperand(2), InChain, dl, NVT);
+ SmallVector<SDValue, 2> TmpChains = {Tmp1.getValue(1), Tmp2.getValue(1)};
+ SDValue OutChain = DAG.getTokenFactor(dl, TmpChains);
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ Results.push_back(DAG.getNode(Node->getOpcode(), dl, VTs,
+ {OutChain, Tmp1, Tmp2, Node->getOperand(3)},
+ Node->getFlags()));
+ Results.push_back(Results.back().getValue(1));
+ break;
+ }
Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 7e8ad28f9b14..966645e3256d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -134,6 +134,16 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ R = SoftenFloatRes_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ R = SoftenFloatRes_VECREDUCE_SEQ(N);
+ break;
}
// If R is null, the sub-method took care of registering the result.
@@ -772,6 +782,16 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
return Tmp.first;
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE(SDNode *N) {
+ // Expand and soften recursively.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE_SEQ(SDNode *N) {
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG));
+ return SDValue();
+}
//===----------------------------------------------------------------------===//
// Convert Float Operand to Integer
@@ -799,6 +819,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = SoftenFloatOp_FP_TO_XINT_SAT(N); break;
case ISD::STRICT_LROUND:
case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break;
case ISD::STRICT_LLROUND:
@@ -890,6 +913,24 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
+// Even if the result type is legal, no libcall may exactly match. (e.g. We
+// don't have FP-i8 conversions) This helper method looks for an appropriate
+// promoted libcall.
+static RTLIB::Libcall findFPToIntLibcall(EVT SrcVT, EVT RetVT, EVT &Promoted,
+ bool Signed) {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ Promoted = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (Promoted.bitsGE(RetVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SrcVT, Promoted)
+ : RTLIB::getFPTOUINT(SrcVT, Promoted);
+ }
+ return LC;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
bool Signed = N->getOpcode() == ISD::FP_TO_SINT ||
@@ -905,16 +946,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
// a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly
// match, eg. we don't have fp -> i8 conversions.
// Look for an appropriate libcall.
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
- IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
- ++IntVT) {
- NVT = (MVT::SimpleValueType)IntVT;
- // The type needs to big enough to hold the result.
- if (NVT.bitsGE(RVT))
- LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT);
- }
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
+ RTLIB::Libcall LC = findFPToIntLibcall(SVT, RVT, NVT, Signed);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() &&
+ "Unsupported FP_TO_XINT!");
Op = GetSoftenedFloat(Op);
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
@@ -934,6 +968,11 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
return SDValue();
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N) {
+ SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG);
+ return Res;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -1200,6 +1239,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
@@ -1272,7 +1313,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::FMIN_F32, RTLIB::FMIN_F64,
RTLIB::FMIN_F80, RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128), Lo, Hi);
@@ -1598,21 +1639,31 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- SDValue Src = N->getOperand(0);
+ bool Strict = N->isStrictFPOpcode();
+ SDValue Src = N->getOperand(Strict ? 1 : 0);
EVT SrcVT = Src.getValueType();
- bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
SDLoc dl(N);
+ SDValue Chain = Strict ? N->getOperand(0) : DAG.getEntryNode();
+
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(N->getFlags().hasNoFPExcept());
// First do an SINT_TO_FP, whether the original was signed or unsigned.
// When promoting partial word types to i32 we must honor the signedness,
// though.
if (SrcVT.bitsLE(MVT::i32)) {
// The integer can be represented exactly in an f64.
- Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
- MVT::i32, Src);
Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
APInt(NVT.getSizeInBits(), 0)), dl, NVT);
- Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ if (Strict) {
+ Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other),
+ {Chain, Src}, Flags);
+ Chain = Hi.getValue(1);
+ } else
+ Hi = DAG.getNode(N->getOpcode(), dl, NVT, Src);
} else {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (SrcVT.bitsLE(MVT::i64)) {
@@ -1627,14 +1678,25 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first;
- GetPairElements(Hi, Lo, Hi);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain);
+ if (Strict)
+ Chain = Tmp.second;
+ GetPairElements(Tmp.first, Lo, Hi);
}
- if (isSigned)
+ // No need to complement for unsigned 32-bit integers
+ if (isSigned || SrcVT.bitsLE(MVT::i32)) {
+ if (Strict)
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
return;
+ }
// Unsigned - fix up the SINT_TO_FP value just calculated.
+ // FIXME: For unsigned i128 to ppc_fp128 conversion, we need to carefully
+ // keep semantics correctness if the integer is not exactly representable
+ // here. See ExpandLegalINT_TO_FP.
Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
SrcVT = Src.getValueType();
@@ -1658,11 +1720,16 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
break;
}
- // TODO: Are there fast-math-flags to propagate to this FADD?
- Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
- DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble(),
- APInt(128, Parts)),
- dl, MVT::ppcf128));
+ // TODO: Are there other fast-math-flags to propagate to this FADD?
+ SDValue NewLo = DAG.getConstantFP(
+ APFloat(APFloat::PPCDoubleDouble(), APInt(128, Parts)), dl, MVT::ppcf128);
+ if (Strict) {
+ Lo = DAG.getNode(ISD::STRICT_FADD, dl, DAG.getVTList(VT, MVT::Other),
+ {Chain, Hi, NewLo}, Flags);
+ Chain = Lo.getValue(1);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+ } else
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, NewLo);
Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
Lo, Hi, ISD::SETLT);
GetPairElements(Lo, Lo, Hi);
@@ -1702,14 +1769,16 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
case ISD::STRICT_FP_TO_SINT:
- case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
case ISD::STRICT_FP_TO_UINT:
- case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_XINT(N); break;
case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break;
case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break;
case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break;
case ISD::LLRINT: Res = ExpandFloatOp_LLRINT(N); break;
case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
@@ -1735,7 +1804,8 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
SDValue &NewRHS,
ISD::CondCode &CCCode,
- const SDLoc &dl) {
+ const SDLoc &dl, SDValue &Chain,
+ bool IsSignaling) {
SDValue LHSLo, LHSHi, RHSLo, RHSHi;
GetExpandedFloat(NewLHS, LHSLo, LHSHi);
GetExpandedFloat(NewRHS, RHSLo, RHSHi);
@@ -1747,25 +1817,32 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
// BNE crN, L:
// FCMPU crN, lo1, lo2
// The following can be improved, but not that much.
- SDValue Tmp1, Tmp2, Tmp3;
- Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, ISD::SETOEQ);
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
- LHSLo, RHSLo, CCCode);
+ SDValue Tmp1, Tmp2, Tmp3, OutputChain;
+ Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi,
+ RHSHi, ISD::SETOEQ, Chain, IsSignaling);
+ OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue();
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
+ RHSLo, CCCode, OutputChain, IsSignaling);
+ OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue();
Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
- Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, ISD::SETUNE);
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, CCCode);
+ Tmp1 =
+ DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi,
+ ISD::SETUNE, OutputChain, IsSignaling);
+ OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue();
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi,
+ RHSHi, CCCode, OutputChain, IsSignaling);
+ OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue();
Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
NewRHS = SDValue(); // LHS is the result, not a compare.
+ Chain = OutputChain;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
- FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+ SDValue Chain;
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain);
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -1820,38 +1897,23 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
return SDValue();
}
-SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_XINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
bool IsStrict = N->isStrictFPOpcode();
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- TargetLowering::MakeLibCallOptions CallOptions;
- std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
- CallOptions, dl, Chain);
- if (!IsStrict)
- return Tmp.first;
-
- ReplaceValueWith(SDValue(N, 1), Tmp.second);
- ReplaceValueWith(SDValue(N, 0), Tmp.first);
- return SDValue();
-}
-SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
- EVT RVT = N->getValueType(0);
- SDLoc dl(N);
-
- bool IsStrict = N->isStrictFPOpcode();
- SDValue Op = N->getOperand(IsStrict ? 1 : 0);
- SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ EVT NVT;
+ RTLIB::Libcall LC = findFPToIntLibcall(Op.getValueType(), RVT, NVT, Signed);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() &&
+ "Unsupported FP_TO_XINT!");
TargetLowering::MakeLibCallOptions CallOptions;
- std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
- CallOptions, dl, Chain);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain);
if (!IsStrict)
return Tmp.first;
@@ -1863,7 +1925,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
- FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+ SDValue Chain;
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain);
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -1879,20 +1942,25 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
- SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
- ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
- FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue NewLHS = N->getOperand(IsStrict ? 1 : 0);
+ SDValue NewRHS = N->getOperand(IsStrict ? 2 : 1);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain,
+ N->getOpcode() == ISD::STRICT_FSETCCS);
- // If ExpandSetCCOperands returned a scalar, use it.
- if (!NewRHS.getNode()) {
- assert(NewLHS.getValueType() == N->getValueType(0) &&
- "Unexpected setcc expansion!");
- return NewLHS;
+ // FloatExpandSetCCOperands always returned a scalar.
+ assert(!NewRHS.getNode() && "Expect to return scalar");
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ if (Chain) {
+ ReplaceValueWith(SDValue(N, 0), NewLHS);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+ return SDValue();
}
-
- // Otherwise, update N to have the operands specified.
- return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
- DAG.getCondCode(CCCode)), 0);
+ return NewLHS;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
@@ -2013,6 +2081,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break;
case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
@@ -2056,6 +2127,13 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) {
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op);
}
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N,
+ unsigned OpNo) {
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op,
+ N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
SDValue Op = GetPromotedFloat(N->getOperand(0));
EVT VT = N->getValueType(0);
@@ -2191,6 +2269,16 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ R = PromoteFloatRes_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ R = PromoteFloatRes_VECREDUCE_SEQ(N);
+ break;
}
if (R.getNode())
@@ -2422,6 +2510,20 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
N->getValueType(0)));
}
+SDValue DAGTypeLegalizer::PromoteFloatRes_VECREDUCE(SDNode *N) {
+ // Expand and promote recursively.
+ // TODO: This is non-optimal, but dealing with the concurrently happening
+ // vector-legalization is non-trivial. We could do something similar to
+ // PromoteFloatRes_EXTRACT_VECTOR_ELT here.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_VECREDUCE_SEQ(SDNode *N) {
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG));
+ return SDValue();
+}
+
SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -2530,6 +2632,16 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ R = SoftPromoteHalfRes_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ R = SoftPromoteHalfRes_VECREDUCE_SEQ(N);
+ break;
}
if (R.getNode())
@@ -2722,6 +2834,18 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) {
return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) {
+ // Expand and soften recursively.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N) {
+ // Expand and soften.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG));
+ return SDValue();
+}
+
//===----------------------------------------------------------------------===//
// Half Operand Soft Promotion
//===----------------------------------------------------------------------===//
@@ -2753,6 +2877,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: Res = SoftPromoteHalfOp_FP_EXTEND(N); break;
case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break;
@@ -2822,6 +2949,20 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res);
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+
+ Op = GetSoftPromotedHalf(Op);
+
+ SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res,
+ N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N,
unsigned OpNo) {
assert(OpNo == 0 && "Can only soften the comparison values");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 74071f763dbf..4a686bc227de 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -62,7 +62,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
- case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::PARITY:
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break;
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
@@ -81,7 +82,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SMIN:
case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UMIN:
- case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+ case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;
case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
case ISD::SIGN_EXTEND_INREG:
@@ -122,6 +123,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = PromoteIntRes_FP_TO_XINT_SAT(N); break;
+
case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;
@@ -151,10 +156,15 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
+
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
- case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT: Res = PromoteIntRes_ADDSUBSHLSAT(N); break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
@@ -205,6 +215,16 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FREEZE:
Res = PromoteIntRes_FREEZE(N);
break;
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Res = PromoteIntRes_Rotate(N);
+ break;
+
+ case ISD::FSHL:
+ case ISD::FSHR:
+ Res = PromoteIntRes_FunnelShift(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -491,10 +511,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
NVT));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
- // Zero extend to the promoted type and do the count there.
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
+ // Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
@@ -559,8 +579,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
SDValue Res;
if (N->isStrictFPOpcode()) {
- Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other },
- { N->getOperand(0), N->getOperand(1) });
+ Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -580,6 +600,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
DAG.getValueType(N->getValueType(0).getScalarType()));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) {
+ // Promote the result type, while keeping the original width in Op1.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0),
+ N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -663,12 +691,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
assert(NVT == ExtPassThru.getValueType() &&
"Gather result type and the passThru argument type should be the same");
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
SDLoc dl(N);
SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
- N->getMemOperand(), N->getIndexType());
+ N->getMemOperand(), N->getIndexType(),
+ ExtType);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -700,11 +733,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
// If the promoted type is legal, we can convert this to:
// 1. ANY_EXTEND iN to iM
// 2. SHL by M-N
- // 3. [US][ADD|SUB]SAT
+ // 3. [US][ADD|SUB|SHL]SAT
// 4. L/ASHR by M-N
// Else it is more efficient to convert this to a min and a max
// operation in the higher precision arithmetic.
@@ -714,9 +747,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
unsigned OldBits = Op1.getScalarValueSizeInBits();
unsigned Opcode = N->getOpcode();
+ bool IsShift = Opcode == ISD::USHLSAT || Opcode == ISD::SSHLSAT;
SDValue Op1Promoted, Op2Promoted;
- if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
+ if (IsShift) {
+ Op1Promoted = GetPromotedInteger(Op1);
+ Op2Promoted = ZExtPromotedInteger(Op2);
+ } else if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
Op1Promoted = ZExtPromotedInteger(Op1);
Op2Promoted = ZExtPromotedInteger(Op2);
} else {
@@ -726,20 +763,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
EVT PromotedType = Op1Promoted.getValueType();
unsigned NewBits = PromotedType.getScalarSizeInBits();
- if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ // Shift cannot use a min/max expansion, we can't detect overflow if all of
+ // the bits have been shifted out.
+ if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
case ISD::SSUBSAT:
+ case ISD::SSHLSAT:
ShiftOp = ISD::SRA;
break;
case ISD::UADDSAT:
case ISD::USUBSAT:
+ case ISD::USHLSAT:
ShiftOp = ISD::SRL;
break;
default:
llvm_unreachable("Expected opcode to be signed or unsigned saturation "
- "addition or subtraction");
+ "addition, subtraction or left shift");
}
unsigned SHLAmount = NewBits - OldBits;
@@ -747,8 +788,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
Op1Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
- Op2Promoted =
- DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+ if (!IsShift)
+ Op2Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
SDValue Result =
DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
@@ -1076,6 +1118,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
+ // It doesn't matter if we sign extend or zero extend in the inputs. So do
+ // whatever is best for the target.
+ SDValue LHS = SExtOrZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtOrZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
// The input value must be properly sign extended.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
@@ -1094,6 +1145,60 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
+ // Lower the rotate to shifts and ORs which can be promoted.
+ SDValue Res;
+ TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
+ SDValue Hi = GetPromotedInteger(N->getOperand(0));
+ SDValue Lo = GetPromotedInteger(N->getOperand(1));
+ SDValue Amount = GetPromotedInteger(N->getOperand(2));
+
+ SDLoc DL(N);
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT VT = Lo.getValueType();
+ unsigned Opcode = N->getOpcode();
+ bool IsFSHR = Opcode == ISD::FSHR;
+ unsigned OldBits = OldVT.getScalarSizeInBits();
+ unsigned NewBits = VT.getScalarSizeInBits();
+
+ // Amount has to be interpreted modulo the old bit width.
+ Amount =
+ DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));
+
+ // If the promoted type is twice the size (or more), then we use the
+ // traditional funnel 'double' shift codegen. This isn't necessary if the
+ // shift amount is constant.
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
+ if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) &&
+ !TLI.isOperationLegalOrCustom(Opcode, VT)) {
+ SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
+ Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
+ Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
+ SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
+ Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount);
+ if (!IsFSHR)
+ Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
+ return Res;
+ }
+
+ // Shift Lo up to occupy the upper bits of the promoted type.
+ SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT);
+ Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);
+
+ // Increase Amount to shift the result into the lower bits of the promoted
+ // type.
+ if (IsFSHR)
+ Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset);
+
+ return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Res;
@@ -1181,7 +1286,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
}
// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
-// the third operand of ADDE/SUBE nodes is carry flag, which differs from
+// the third operand of ADDE/SUBE nodes is carry flag, which differs from
// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
@@ -1212,6 +1317,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
return SDValue(Res.getNode(), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
+ unsigned ResNo) {
+ assert(ResNo == 1 && "Don't know how to promote other results yet.");
+ return PromoteIntRes_Overflow(N);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
@@ -1394,6 +1505,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::ROTL:
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
@@ -1620,8 +1733,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
EVT OpTy = N->getOperand(1).getValueType();
if (N->getOpcode() == ISD::VSELECT)
- if (SDValue Res = WidenVSELECTAndMask(N))
- return Res;
+ if (SDValue Res = WidenVSELECTMask(N))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ Res, N->getOperand(1), N->getOperand(2));
// Promote all the way up to the canonical SetCC type.
EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
@@ -1763,6 +1877,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
unsigned OpNo) {
+ bool TruncateStore = N->isTruncatingStore();
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
if (OpNo == 2) {
// The Mask
@@ -1775,9 +1890,17 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
else
NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
- } else
+
+ N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(),
+ N->getMemoryVT(), NewOps[OpNo]));
+ } else {
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ TruncateStore = true;
+ }
+
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(),
+ SDLoc(N), NewOps, N->getMemOperand(),
+ N->getIndexType(), TruncateStore);
}
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
@@ -1921,6 +2044,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
case ISD::CTLZ_ZERO_UNDEF:
@@ -1933,6 +2057,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break;
case ISD::STRICT_LLROUND:
case ISD::STRICT_LLRINT:
case ISD::LLROUND:
@@ -2009,6 +2135,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDCARRY:
case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
+
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
@@ -2025,6 +2154,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBSAT:
case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break;
+
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -2044,6 +2176,16 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ ExpandIntRes_Rotate(N, Lo, Hi);
+ break;
+
+ case ISD::FSHL:
+ case ISD::FSHR:
+ ExpandIntRes_FunnelShift(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2055,12 +2197,22 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
unsigned Opc = Node->getOpcode();
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
+ AtomicOrdering order = cast<AtomicSDNode>(Node)->getOrdering();
+ // Lower to outline atomic libcall if outline atomics enabled,
+ // or to sync libcall otherwise
+ RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT);
EVT RetVT = Node->getValueType(0);
- SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
TargetLowering::MakeLibCallOptions CallOptions;
+ SmallVector<SDValue, 4> Ops;
+ if (TLI.getLibcallName(LC)) {
+ Ops.append(Node->op_begin() + 2, Node->op_end());
+ Ops.push_back(Node->getOperand(1));
+ } else {
+ LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unexpected atomic op or value type!");
+ Ops.append(Node->op_begin() + 1, Node->op_end());
+ }
return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
Node->getOperand(0));
}
@@ -2619,6 +2771,26 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+
+ // We need to use an unsigned carry op for the lo part.
+ unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
+ : ISD::SUBCARRY;
+ Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -2700,6 +2872,17 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_PARITY(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ // parity(HiLo) -> parity(Lo^Hi)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo =
+ DAG.getNode(ISD::PARITY, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, Lo, Hi));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -2717,16 +2900,38 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ GetExpandedInteger(N0, Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we
+ // use in LegalizeDAG. The ADD part of the expansion is based on
+ // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that
+ // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded
+ // if needed. Shift expansion has a special case for filling with sign bits
+ // so that we will only end up with one SRA.
+ bool HasAddCarry = TLI.isOperationLegalOrCustom(
+ ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (HasAddCarry) {
+ EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ SDValue Sign =
+ DAG.getNode(ISD::SRA, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
+ SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign);
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
+ Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
+ Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
+ return;
+ }
+
// abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
DAG.getConstant(0, dl, VT), N0);
SDValue NegLo, NegHi;
SplitInteger(Neg, NegLo, NegHi);
- GetExpandedInteger(N0, Lo, Hi);
- EVT NVT = Lo.getValueType();
SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
@@ -2859,6 +3064,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
@@ -2929,7 +3140,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
return;
}
-
+
if (ISD::isNormalLoad(N)) {
ExpandRes_NormalLoad(N, Lo, Hi);
return;
@@ -2983,7 +3194,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
N->getOriginalAlign(), MMOFlags, AAInfo);
@@ -3007,7 +3218,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
// Load the rest of the low bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -3147,6 +3358,12 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
SplitInteger(Result, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_SHLSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Result = TLI.expandShlSat(N, DAG);
+ SplitInteger(Result, Lo, Hi);
+}
+
/// This performs an expansion of the integer result for a fixed point
/// multiplication. The default expansion performs rounding down towards
/// negative infinity, though targets that do care about rounding should specify
@@ -3385,40 +3602,66 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue RHS = Node->getOperand(1);
SDLoc dl(Node);
- // Expand the result by simply replacing it with the equivalent
- // non-overflow-checking operation.
- SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
- ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
- LHS, RHS);
- SplitInteger(Sum, Lo, Hi);
+ SDValue Ovf;
- // Compute the overflow.
- //
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Sum >= 0
- //
- // Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
- // Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- //
- EVT OType = Node->getValueType(1);
- SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+ unsigned CarryOp;
+ switch(Node->getOpcode()) {
+ default: llvm_unreachable("Node has unexpected Opcode");
+ case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break;
+ case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break;
+ }
+
+ bool HasCarryOp = TLI.isOperationLegalOrCustom(
+ CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
- Node->getOpcode() == ISD::SADDO ?
- ISD::SETEQ : ISD::SETNE);
+ if (HasCarryOp) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));
+
+ Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL });
+ Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
+
+ Ovf = Hi.getValue(1);
+ } else {
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
- SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
- SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ }
// Use the calculated overflow everywhere.
- ReplaceValueWith(SDValue(Node, 1), Cmp);
+ ReplaceValueWith(SDValue(Node, 1), Ovf);
}
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
@@ -3874,6 +4117,22 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
SplitInteger(Res, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Lower the rotate to shifts and ORs which can be expanded.
+ SDValue Res;
+ TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Lower the funnel shift to shifts and ORs which can be expanded.
+ SDValue Res;
+ TLI.expandFunnelShift(N, Res, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
//===----------------------------------------------------------------------===//
@@ -4246,7 +4505,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
NEVT, N->getOriginalAlign(), MMOFlags, AAInfo);
@@ -4281,7 +4540,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
// Store the lowest ExcessBits bits in the second half.
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -4586,8 +4845,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
SDLoc dl(N);
+
+ EVT ResVT = N->getValueType(0);
unsigned NumElems = N->getNumOperands();
+ if (ResVT.isScalableVector()) {
+ SDValue ResVec = DAG.getUNDEF(ResVT);
+
+ for (unsigned OpIdx = 0; OpIdx < NumElems; ++OpIdx) {
+ SDValue Op = N->getOperand(OpIdx);
+ unsigned OpNumElts = Op.getValueType().getVectorMinNumElements();
+ ResVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ResVec, Op,
+ DAG.getIntPtrConstant(OpIdx * OpNumElts, dl));
+ }
+
+ return ResVec;
+ }
+
EVT RetSclrTy = N->getValueType(0).getVectorElementType();
SmallVector<SDValue, 8> NewOps;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index ae087d3bbd8c..a59f03854775 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -663,8 +663,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
// Process the list of nodes that need to be reanalyzed.
while (!NodesToAnalyze.empty()) {
- SDNode *N = NodesToAnalyze.back();
- NodesToAnalyze.pop_back();
+ SDNode *N = NodesToAnalyze.pop_back_val();
if (N->getNodeId() != DAGTypeLegalizer::NewNode)
// The node was analyzed while reanalyzing an earlier node - it is safe
// to skip. Note that this is not a morphing node - otherwise it would
@@ -753,7 +752,10 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
// Note that in some cases vector operation operands may be greater than
// the vector element type. For example BUILD_VECTOR of type <1 x i1> with
// a constant i8 operand.
- assert(Result.getValueSizeInBits() >= Op.getScalarValueSizeInBits() &&
+
+ // We don't currently support the scalarization of scalable vector types.
+ assert(Result.getValueSizeInBits().getFixedSize() >=
+ Op.getScalarValueSizeInBits() &&
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
@@ -955,11 +957,12 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
for (unsigned i = 0, e = Results.size(); i != e; ++i) {
- // If this is a chain output just replace it.
- if (Results[i].getValueType() == MVT::Other)
- ReplaceValueWith(SDValue(N, i), Results[i]);
- else
+ // If this is a chain output or already widened just replace it.
+ bool WasWidened = SDValue(N, i).getValueType() != Results[i].getValueType();
+ if (WasWidened)
SetWidenedVector(SDValue(N, i), Results[i]);
+ else
+ ReplaceValueWith(SDValue(N, i), Results[i]);
}
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 0fa6d653a836..630a0a9adaf7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -311,10 +311,11 @@ private:
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
SDValue PromoteIntRes_CTLZ(SDNode *N);
- SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N);
SDValue PromoteIntRes_CTTZ(SDNode *N);
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
@@ -331,22 +332,26 @@ private:
SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_UMINUMAX(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue PromoteIntRes_SRA(SDNode *N);
SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_VSCALE(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
+ SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N);
SDValue PromoteIntRes_MULFIX(SDNode *N);
SDValue PromoteIntRes_DIVFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
+ SDValue PromoteIntRes_Rotate(SDNode *N);
+ SDValue PromoteIntRes_FunnelShift(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -420,6 +425,7 @@ private:
void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -427,8 +433,10 @@ private:
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -442,12 +450,16 @@ private:
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SHLSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -541,6 +553,8 @@ private:
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+ SDValue SoftenFloatRes_VECREDUCE(SDNode *N);
+ SDValue SoftenFloatRes_VECREDUCE_SEQ(SDNode *N);
// Convert Float Operand to Integer.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
@@ -549,6 +563,7 @@ private:
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N);
SDValue SoftenFloatOp_LROUND(SDNode *N);
SDValue SoftenFloatOp_LLROUND(SDNode *N);
SDValue SoftenFloatOp_LRINT(SDNode *N);
@@ -617,8 +632,7 @@ private:
SDValue ExpandFloatOp_BR_CC(SDNode *N);
SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
- SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
- SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_XINT(SDNode *N);
SDValue ExpandFloatOp_LROUND(SDNode *N);
SDValue ExpandFloatOp_LLROUND(SDNode *N);
SDValue ExpandFloatOp_LRINT(SDNode *N);
@@ -628,7 +642,8 @@ private:
SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
- ISD::CondCode &CCCode, const SDLoc &dl);
+ ISD::CondCode &CCCode, const SDLoc &dl,
+ SDValue &Chain, bool IsSignaling = false);
//===--------------------------------------------------------------------===//
// Float promotion support: LegalizeFloatTypes.cpp
@@ -658,12 +673,15 @@ private:
SDValue PromoteFloatRes_UNDEF(SDNode *N);
SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
+ SDValue PromoteFloatRes_VECREDUCE(SDNode *N);
+ SDValue PromoteFloatRes_VECREDUCE_SEQ(SDNode *N);
bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
@@ -695,12 +713,15 @@ private:
SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N);
SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N);
SDValue SoftPromoteHalfRes_UNDEF(SDNode *N);
+ SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N);
+ SDValue SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N);
bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_BITCAST(SDNode *N);
SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo);
@@ -745,6 +766,7 @@ private:
SDValue ScalarizeVecRes_SETCC(SDNode *N);
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue ScalarizeVecRes_FIX(SDNode *N);
@@ -760,7 +782,10 @@ private:
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_FP_EXTEND(SDNode *N);
+ SDValue ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N);
SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
+ SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -778,8 +803,8 @@ private:
// Helper function for incrementing the pointer when splitting
// memory operations
- void IncrementPointer(MemSDNode *N, EVT MemVT,
- MachinePointerInfo &MPI, SDValue &Ptr);
+ void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI,
+ SDValue &Ptr, uint64_t *ScaledOffset = nullptr);
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned ResNo);
@@ -806,20 +831,23 @@ private:
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VECREDUCE_SEQ(SDNode *N);
SDValue SplitVecOp_UnaryOp(SDNode *N);
SDValue SplitVecOp_TruncateHelper(SDNode *N);
SDValue SplitVecOp_BITCAST(SDNode *N);
+ SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
@@ -831,6 +859,7 @@ private:
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
+ SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -862,9 +891,9 @@ private:
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
- SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_ScalarOp(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
- SDValue WidenVSELECTAndMask(SDNode *N);
+ SDValue WidenVSELECTMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
@@ -879,9 +908,9 @@ private:
SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
+ SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
- SDValue WidenVecRes_Shift(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
@@ -901,8 +930,10 @@ private:
SDValue WidenVecOp_VSELECT(SDNode *N);
SDValue WidenVecOp_Convert(SDNode *N);
+ SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
SDValue WidenVecOp_VECREDUCE(SDNode *N);
+ SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
/// Helper function to generate a set of operations to perform
/// a vector operation for a wider type.
@@ -934,13 +965,6 @@ private:
/// ST: store of a widen value
void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
- /// Helper function to generate a set of stores to store a truncate widen
- /// vector into non-widen memory.
- /// StChain: list of chains for the stores we have generated
- /// ST: store of a widen value
- void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
- StoreSDNode *ST);
-
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
/// When FillWithZeroes is "on" the vector will be widened with zeroes.
@@ -980,8 +1004,6 @@ private:
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVSETCC(const SDNode *N);
-
//===--------------------------------------------------------------------===//
// Generic Expansion: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 9cd3b8f76d6c..81cc2bf10d25 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -175,7 +175,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
- StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
+ StackPtr =
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
@@ -266,7 +267,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Hi = DAG.getLoad(
NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize),
LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo);
@@ -481,7 +482,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
St->getOriginalAlign(), St->getMemOperand()->getFlags(),
AAInfo);
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Hi = DAG.getStore(
Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize),
St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo);
@@ -514,8 +515,8 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Cond = N->getOperand(0);
CL = CH = Cond;
if (Cond.getValueType().isVector()) {
- if (SDValue Res = WidenVSELECTAndMask(N))
- std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
+ if (SDValue Res = WidenVSELECTMask(N))
+ std::tie(CL, CH) = DAG.SplitVector(Res, dl);
// Check if there are already splitted versions of the vector available and
// use those instead of splitting the mask operand again.
else if (getTypeAction(Cond.getValueType()) ==
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 6409f924920d..4015a5a0ce70 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -143,7 +143,6 @@ class VectorLegalizer {
void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
- SDValue ExpandStrictFPOp(SDNode *Node);
void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -454,6 +453,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::SMULFIX:
@@ -487,6 +490,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
}
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@@ -794,7 +802,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::ROTL:
case ISD::ROTR:
- if (TLI.expandROT(Node, Tmp, DAG)) {
+ if (TLI.expandROT(Node, false /*AllowVectorOps*/, Tmp, DAG)) {
Results.push_back(Tmp);
return;
}
@@ -806,6 +814,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::UADDO:
case ISD::USUBO:
ExpandUADDSUBO(Node, Results);
@@ -868,6 +885,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VECREDUCE_FMIN:
Results.push_back(TLI.expandVecReduce(Node, DAG));
return;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
+ return;
case ISD::SREM:
case ISD::UREM:
ExpandREM(Node, Results);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c81d03cac81b..57cb364f1939 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -129,6 +129,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
case ISD::FPOW:
case ISD::FREM:
@@ -144,9 +146,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
R = ScalarizeVecRes_BinOp(N);
break;
case ISD::FMA:
+ case ISD::FSHL:
+ case ISD::FSHR:
R = ScalarizeVecRes_TernaryOp(N);
break;
@@ -156,6 +162,11 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_StrictFPOp(N);
break;
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
+ R = ScalarizeVecRes_FP_TO_XINT_SAT(N);
+ break;
+
case ISD::UADDO:
case ISD::SADDO:
case ISD::USUBO:
@@ -510,6 +521,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
return GetScalarizedVector(N->getOperand(Op));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N) {
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ SDLoc dl(N);
+
+ // Handle case where result is scalarized but operand is not
+ if (getTypeAction(SrcVT) == TargetLowering::TypeScalarizeVector)
+ Src = GetScalarizedVector(Src);
+ else
+ Src = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, SrcVT.getVectorElementType(), Src,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ EVT DstVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(N->getOpcode(), dl, DstVT, Src, N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
@@ -552,72 +580,80 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
dbgs() << "\n");
SDValue Res = SDValue();
- if (!Res.getNode()) {
- switch (N->getOpcode()) {
- default:
+ switch (N->getOpcode()) {
+ default:
#ifndef NDEBUG
- dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
#endif
- report_fatal_error("Do not know how to scalarize this operator's "
- "operand!\n");
- case ISD::BITCAST:
- Res = ScalarizeVecOp_BITCAST(N);
- break;
- case ISD::ANY_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::TRUNCATE:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- Res = ScalarizeVecOp_UnaryOp(N);
- break;
- case ISD::STRICT_SINT_TO_FP:
- case ISD::STRICT_UINT_TO_FP:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
- break;
- case ISD::CONCAT_VECTORS:
- Res = ScalarizeVecOp_CONCAT_VECTORS(N);
- break;
- case ISD::EXTRACT_VECTOR_ELT:
- Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
- break;
- case ISD::VSELECT:
- Res = ScalarizeVecOp_VSELECT(N);
- break;
- case ISD::SETCC:
- Res = ScalarizeVecOp_VSETCC(N);
- break;
- case ISD::STORE:
- Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
- break;
- case ISD::STRICT_FP_ROUND:
- Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
- break;
- case ISD::FP_ROUND:
- Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
- break;
- case ISD::VECREDUCE_FADD:
- case ISD::VECREDUCE_FMUL:
- case ISD::VECREDUCE_ADD:
- case ISD::VECREDUCE_MUL:
- case ISD::VECREDUCE_AND:
- case ISD::VECREDUCE_OR:
- case ISD::VECREDUCE_XOR:
- case ISD::VECREDUCE_SMAX:
- case ISD::VECREDUCE_SMIN:
- case ISD::VECREDUCE_UMAX:
- case ISD::VECREDUCE_UMIN:
- case ISD::VECREDUCE_FMAX:
- case ISD::VECREDUCE_FMIN:
- Res = ScalarizeVecOp_VECREDUCE(N);
- break;
- }
+ report_fatal_error("Do not know how to scalarize this operator's "
+ "operand!\n");
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Res = ScalarizeVecOp_UnaryOp(N);
+ break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::VSELECT:
+ Res = ScalarizeVecOp_VSELECT(N);
+ break;
+ case ISD::SETCC:
+ Res = ScalarizeVecOp_VSETCC(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::STRICT_FP_ROUND:
+ Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
+ break;
+ case ISD::FP_ROUND:
+ Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
+ break;
+ case ISD::STRICT_FP_EXTEND:
+ Res = ScalarizeVecOp_STRICT_FP_EXTEND(N);
+ break;
+ case ISD::FP_EXTEND:
+ Res = ScalarizeVecOp_FP_EXTEND(N);
+ break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = ScalarizeVecOp_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Res = ScalarizeVecOp_VECREDUCE_SEQ(N);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -762,6 +798,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
/// If the value to round is a vector that needs to be scalarized, it must be
/// <1 x ty>. Convert the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Wrong operand for scalarization!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
N->getValueType(0).getVectorElementType(), Elt,
@@ -787,7 +824,36 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
// handled all replacements since caller can only handle a single result.
ReplaceValueWith(SDValue(N, 0), Res);
return SDValue();
-}
+}
+
+/// If the value to extend is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_EXTEND(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Res = DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
+ N->getValueType(0).getVectorElementType(), Elt);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+/// If the value to extend is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
+ {N->getValueType(0).getVectorElementType(), MVT::Other},
+ {N->getOperand(0), Elt});
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
SDValue Res = GetScalarizedVector(N->getOperand(0));
@@ -797,6 +863,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) {
+ SDValue AccOp = N->getOperand(0);
+ SDValue VecOp = N->getOperand(1);
+
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
+
+ SDValue Op = GetScalarizedVector(VecOp);
+ return DAG.getNode(BaseOpc, SDLoc(N), N->getValueType(0),
+ AccOp, Op, N->getFlags());
+}
+
//===----------------------------------------------------------------------===//
// Result Vector Splitting
//===----------------------------------------------------------------------===//
@@ -836,7 +913,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
- case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SPLAT_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ SplitVecRes_ScalarOp(N, Lo, Hi);
+ break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
@@ -939,9 +1019,15 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::ROTL:
+ case ISD::ROTR:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA:
+ case ISD::FSHL:
+ case ISD::FSHR:
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
@@ -951,6 +1037,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
+ SplitVecRes_FP_TO_XINT_SAT(N, Lo, Hi);
+ break;
+
case ISD::UADDO:
case ISD::SADDO:
case ISD::USUBO:
@@ -977,21 +1068,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
}
void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
- MachinePointerInfo &MPI,
- SDValue &Ptr) {
+ MachinePointerInfo &MPI, SDValue &Ptr,
+ uint64_t *ScaledOffset) {
SDLoc DL(N);
unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8;
if (MemVT.isScalableVector()) {
+ SDNodeFlags Flags;
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement);
+ Flags.setNoUnsignedWrap(true);
+ if (ScaledOffset)
+ *ScaledOffset += IncrementSize;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement,
+ Flags);
} else {
MPI = N->getPointerInfo().getWithOffset(IncrementSize);
// Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::Fixed(IncrementSize));
}
}
@@ -1200,7 +1296,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
- StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
+ StackPtr =
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr,
@@ -1448,14 +1545,16 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
unsigned IdxVal = CIdx->getZExtValue();
- unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
- if (IdxVal < LoNumElts)
+ unsigned LoNumElts = Lo.getValueType().getVectorMinNumElements();
+ if (IdxVal < LoNumElts) {
Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
Lo.getValueType(), Lo, Elt, Idx);
- else
+ return;
+ } else if (!Vec.getValueType().isScalableVector()) {
Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl));
- return;
+ return;
+ }
}
// See if the target wants to custom expand this node.
@@ -1468,7 +1567,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
if (VecVT.getScalarSizeInBits() < 8) {
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
- VecVT.getVectorNumElements());
+ VecVT.getVectorElementCount());
Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
// Extend the element type to match if needed.
if (EltVT.bitsGT(Elt.getValueType()))
@@ -1493,7 +1592,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Store = DAG.getTruncStore(
Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT,
- commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8));
+ commonAlignment(SmallestAlign,
+ EltVT.getFixedSizeInBits() / 8));
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
@@ -1502,12 +1602,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign);
// Increment the pointer to the other part.
- unsigned IncrementSize = LoVT.getSizeInBits() / 8;
- StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
+ auto Load = cast<LoadSDNode>(Lo);
+ MachinePointerInfo MPI = Load->getPointerInfo();
+ IncrementPointer(Load, LoVT, MPI, StackPtr);
- // Load the Hi part from the stack slot.
- Hi = DAG.getLoad(HiVT, dl, Store, StackPtr,
- PtrInfo.getWithOffset(IncrementSize), SmallestAlign);
+ Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, MPI, SmallestAlign);
// If we adjusted the original type, we need to truncate the results.
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
@@ -1517,13 +1616,18 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}
-void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
- Hi = DAG.getUNDEF(HiVT);
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0));
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ Hi = DAG.getUNDEF(HiVT);
+ } else {
+ assert(N->getOpcode() == ISD::SPLAT_VECTOR && "Unexpected opcode");
+ Hi = Lo;
+ }
}
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
@@ -1611,9 +1715,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
else
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
+ unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MLD->getAAInfo(), MLD->getRanges());
+ MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment,
+ MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
MMO, MLD->getAddressingMode(), ExtType,
@@ -1627,12 +1732,18 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
// Generate hi masked load.
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
- unsigned HiOffset = LoMemVT.getStoreSize();
+ unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
+
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector())
+ MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace());
+ else
+ MPI = MLD->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
- MLD->getAAInfo(), MLD->getRanges());
+ MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(),
+ MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
@@ -1662,7 +1773,9 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue PassThru = MGT->getPassThru();
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
+ EVT MemoryVT = MGT->getMemoryVT();
Align Alignment = MGT->getOriginalAlign();
+ ISD::LoadExtType ExtType = MGT->getExtensionType();
// Split Mask operand
SDValue MaskLo, MaskHi;
@@ -1675,6 +1788,10 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
}
+ EVT LoMemVT, HiMemVT;
+ // Split MemoryVT
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
SDValue PassThruLo, PassThruHi;
if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(PassThru, PassThruLo, PassThruHi);
@@ -1693,12 +1810,12 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
MGT->getRanges());
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
- Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
- MMO, MGT->getIndexType());
+ Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
+ MMO, MGT->getIndexType(), ExtType);
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
- Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
- MMO, MGT->getIndexType());
+ Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
+ MMO, MGT->getIndexType(), ExtType);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1786,8 +1903,8 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
// more effectively move in the right direction and prevent falling down
// to scalarization in many cases due to the input vector being split too
// far.
- if ((SrcVT.getVectorMinNumElements() & 1) == 0 &&
- SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
+ if (SrcVT.getVectorElementCount().isKnownEven() &&
+ SrcVT.getScalarSizeInBits() * 2 < DestVT.getScalarSizeInBits()) {
LLVMContext &Ctx = *DAG.getContext();
EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);
@@ -1942,6 +2059,22 @@ void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
ReplaceValueWith(SDValue(N, 1), Chain);
}
+void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT DstVTLo, DstVTHi;
+ std::tie(DstVTLo, DstVTHi) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue SrcLo, SrcHi;
+ EVT SrcVT = N->getOperand(0).getValueType();
+ if (getTypeAction(SrcVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), SrcLo, SrcHi);
+ else
+ std::tie(SrcLo, SrcHi) = DAG.SplitVectorOperand(N, 0);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, DstVTLo, SrcLo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1));
+}
//===----------------------------------------------------------------------===//
// Operand Vector Splitting
@@ -1959,92 +2092,95 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
return false;
- if (!Res.getNode()) {
- switch (N->getOpcode()) {
- default:
+ switch (N->getOpcode()) {
+ default:
#ifndef NDEBUG
- dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
#endif
- report_fatal_error("Do not know how to split this operator's "
- "operand!\n");
-
- case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
- case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
- case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
- case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
- case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
- case ISD::TRUNCATE:
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::TRUNCATE:
+ Res = SplitVecOp_TruncateHelper(N);
+ break;
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSTORE:
+ Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSCATTER:
+ Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
+ break;
+ case ISD::MGATHER:
+ Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
+ break;
+ case ISD::VSELECT:
+ Res = SplitVecOp_VSELECT(N, OpNo);
+ break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ if (N->getValueType(0).bitsLT(
+ N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
- break;
- case ISD::STRICT_FP_ROUND:
- case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
- case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
- case ISD::STORE:
- Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
- break;
- case ISD::MSTORE:
- Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
- break;
- case ISD::MSCATTER:
- Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
- break;
- case ISD::MGATHER:
- Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
- break;
- case ISD::VSELECT:
- Res = SplitVecOp_VSELECT(N, OpNo);
- break;
- case ISD::STRICT_SINT_TO_FP:
- case ISD::STRICT_UINT_TO_FP:
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- if (N->getValueType(0).bitsLT(
- N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
- Res = SplitVecOp_TruncateHelper(N);
- else
- Res = SplitVecOp_UnaryOp(N);
- break;
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- case ISD::CTTZ:
- case ISD::CTLZ:
- case ISD::CTPOP:
- case ISD::STRICT_FP_EXTEND:
- case ISD::FP_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- case ISD::FTRUNC:
- case ISD::FCANONICALIZE:
+ else
Res = SplitVecOp_UnaryOp(N);
- break;
+ break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = SplitVecOp_FP_TO_XINT_SAT(N);
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::FTRUNC:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
- case ISD::ANY_EXTEND_VECTOR_INREG:
- case ISD::SIGN_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND_VECTOR_INREG:
- Res = SplitVecOp_ExtVecInRegOp(N);
- break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = SplitVecOp_ExtVecInRegOp(N);
+ break;
- case ISD::VECREDUCE_FADD:
- case ISD::VECREDUCE_FMUL:
- case ISD::VECREDUCE_ADD:
- case ISD::VECREDUCE_MUL:
- case ISD::VECREDUCE_AND:
- case ISD::VECREDUCE_OR:
- case ISD::VECREDUCE_XOR:
- case ISD::VECREDUCE_SMAX:
- case ISD::VECREDUCE_SMIN:
- case ISD::VECREDUCE_UMAX:
- case ISD::VECREDUCE_UMIN:
- case ISD::VECREDUCE_FMAX:
- case ISD::VECREDUCE_FMIN:
- Res = SplitVecOp_VECREDUCE(N, OpNo);
- break;
- }
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = SplitVecOp_VECREDUCE(N, OpNo);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Res = SplitVecOp_VECREDUCE_SEQ(N);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2112,36 +2248,35 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
EVT LoOpVT, HiOpVT;
std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
- bool NoNaN = N->getFlags().hasNoNaNs();
- unsigned CombineOpc = 0;
- switch (N->getOpcode()) {
- case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break;
- case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break;
- case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break;
- case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break;
- case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break;
- case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break;
- case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break;
- case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break;
- case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break;
- case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
- case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
- case ISD::VECREDUCE_FMAX:
- CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
- break;
- case ISD::VECREDUCE_FMIN:
- CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
- break;
- default:
- llvm_unreachable("Unexpected reduce ISD node");
- }
-
// Use the appropriate scalar instruction on the split subvectors before
// reducing the now partially reduced smaller vector.
+ unsigned CombineOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags());
return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());
}
+SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue AccOp = N->getOperand(0);
+ SDValue VecOp = N->getOperand(1);
+ SDNodeFlags Flags = N->getFlags();
+
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
+
+ // Reduce low half.
+ SDValue Partial = DAG.getNode(N->getOpcode(), dl, ResVT, AccOp, Lo, Flags);
+
+ // Reduce high half, using low half result as initial value.
+ return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
// The result has a legal vector type, but the input needs splitting.
EVT ResVT = N->getValueType(0);
@@ -2191,9 +2326,36 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
JoinIntegers(Lo, Hi));
}
+SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Invalid OpNo; can only split SubVec.");
+ // We know that the result type is legal.
+ EVT ResVT = N->getValueType(0);
+
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+
+ SDValue Lo, Hi;
+ GetSplitVector(SubVec, Lo, Hi);
+
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
+
+ SDValue FirstInsertion =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx);
+ SDValue SecondInsertion =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi,
+ DAG.getVectorIdxConstant(IdxVal + LoElts, dl));
+
+ return SecondInsertion;
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
// We know that the extracted result type is legal.
EVT SubVT = N->getValueType(0);
+
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
SDValue Lo, Hi;
@@ -2229,13 +2391,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue Lo, Hi;
GetSplitVector(Vec, Lo, Hi);
- uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
if (IdxVal < LoElts)
return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
- return SDValue(DAG.UpdateNodeOperands(N, Hi,
- DAG.getConstant(IdxVal - LoElts, SDLoc(N),
- Idx.getValueType())), 0);
+ else if (!Vec.getValueType().isScalableVector())
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
+ DAG.getConstant(IdxVal - LoElts, SDLoc(N),
+ Idx.getValueType())), 0);
}
// See if the target wants to custom expand this node.
@@ -2248,7 +2411,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
if (VecVT.getScalarSizeInBits() < 8) {
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
- VecVT.getVectorNumElements());
+ VecVT.getVectorElementCount());
Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
}
@@ -2278,7 +2441,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getExtLoad(
ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT,
- commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8));
+ commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
}
SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
@@ -2304,6 +2467,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Mask = MGT->getMask();
SDValue PassThru = MGT->getPassThru();
Align Alignment = MGT->getOriginalAlign();
+ ISD::LoadExtType ExtType = MGT->getExtensionType();
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
@@ -2334,12 +2498,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
MGT->getRanges());
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
- SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
- OpsLo, MMO, MGT->getIndexType());
+ SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
+ OpsLo, MMO, MGT->getIndexType(), ExtType);
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
- SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
- OpsHi, MMO, MGT->getIndexType());
+ SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
+ OpsHi, MMO, MGT->getIndexType(), ExtType);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2393,9 +2557,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
SDValue Lo, Hi, Res;
+ unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, N->getAAInfo(), N->getRanges());
+ N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment,
+ N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -2409,11 +2574,20 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
N->isCompressingStore());
- unsigned HiOffset = LoMemVT.getStoreSize();
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector()) {
+ Alignment = commonAlignment(
+ Alignment, LoMemVT.getSizeInBits().getKnownMinSize() / 8);
+ MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
+ } else
+ MPI = N->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedSize());
+
+ unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
- HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges());
+ MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(),
+ N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -2435,11 +2609,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue Index = N->getIndex();
SDValue Scale = N->getScale();
SDValue Data = N->getValue();
+ EVT MemoryVT = N->getMemoryVT();
Align Alignment = N->getOriginalAlign();
SDLoc DL(N);
// Split all operands
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
SDValue DataLo, DataHi;
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
// Split Data operand
@@ -2470,15 +2648,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
- Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
- DL, OpsLo, MMO, N->getIndexType());
+ Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT,
+ DL, OpsLo, MMO, N->getIndexType(),
+ N->isTruncatingStore());
// The order of the Scatter operation after split is well defined. The "Hi"
// part comes after the "Lo". So these two operations should be chained one
// after another.
SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO, N->getIndexType());
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT,
+ DL, OpsHi, MMO, N->getIndexType(),
+ N->isTruncatingStore());
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2604,7 +2784,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
EVT::getFloatingPointVT(InElementSize/2) :
EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
- NumElements/2);
+ NumElements.divideCoefficientBy(2));
SDValue HalfLo;
SDValue HalfHi;
@@ -2683,7 +2863,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
- InVT.getVectorNumElements());
+ InVT.getVectorElementCount());
if (N->isStrictFPOpcode()) {
Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
@@ -2709,6 +2889,22 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
}
+SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT NewResVT =
+ EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorElementCount());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
//===----------------------------------------------------------------------===//
// Result Vector Widening
@@ -2739,7 +2935,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
- case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ Res = WidenVecRes_ScalarOp(N);
+ break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::VSELECT:
case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
@@ -2764,6 +2963,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::OR:
case ISD::SUB:
case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMINIMUM:
@@ -2776,6 +2978,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SADDSAT:
case ISD::USUBSAT:
case ISD::SSUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::ROTL:
+ case ISD::ROTR:
Res = WidenVecRes_Binary(N);
break;
@@ -2824,12 +3030,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_POWI(N);
break;
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- Res = WidenVecRes_Shift(N);
- break;
-
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -2849,6 +3049,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = WidenVecRes_FP_TO_XINT_SAT(N);
+ break;
+
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -2896,6 +3101,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Unary(N);
break;
case ISD::FMA:
+ case ISD::FSHL:
+ case ISD::FSHR:
Res = WidenVecRes_Ternary(N);
break;
}
@@ -3261,19 +3468,34 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ LLVMContext &Ctx = *DAG.getContext();
SDValue InOp = N->getOperand(0);
SDLoc DL(N);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
EVT InVT = InOp.getValueType();
- EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
unsigned Opcode = N->getOpcode();
- unsigned InVTNumElts = InVT.getVectorNumElements();
const SDNodeFlags Flags = N->getFlags();
+
+ // Handle the case of ZERO_EXTEND where the promoted InVT element size does
+ // not equal that of WidenVT.
+ if (N->getOpcode() == ISD::ZERO_EXTEND &&
+ getTypeAction(InVT) == TargetLowering::TypePromoteInteger &&
+ TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() !=
+ WidenVT.getScalarSizeInBits()) {
+ InOp = ZExtPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits())
+ Opcode = ISD::TRUNCATE;
+ }
+
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts);
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
@@ -3341,6 +3563,27 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getBuildVector(WidenVT, DL, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) {
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ElementCount WidenNumElts = WidenVT.getVectorElementCount();
+
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // Also widen the input.
+ if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
+ Src = GetWidenedVector(Src);
+ SrcVT = Src.getValueType();
+ }
+
+ // Input and output not widened to the same size, give up.
+ if (WidenNumElts != SrcVT.getVectorElementCount())
+ return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
+
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
SDValue InOp = N->getOperand(1);
SDLoc DL(N);
@@ -3447,25 +3690,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
}
-SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- SDValue ShOp = N->getOperand(1);
-
- EVT ShVT = ShOp.getValueType();
- if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
- ShOp = GetWidenedVector(ShOp);
- ShVT = ShOp.getValueType();
- }
- EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
- ShVT.getVectorElementType(),
- WidenVT.getVectorNumElements());
- if (ShVT != ShWidenVT)
- ShOp = ModifyToType(ShOp, ShWidenVT);
-
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
-}
-
SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
// Unary op widening.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -3820,9 +4044,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Index = ModifyToType(Index, WideIndexVT);
SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
Scale };
+
+ // Widen the MemoryType
+ EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getMemoryVT().getScalarType(), NumElts);
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
- N->getMemoryVT(), dl, Ops,
- N->getMemOperand(), N->getIndexType());
+ WideMemVT, dl, Ops, N->getMemOperand(),
+ N->getIndexType(), N->getExtensionType());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -3830,10 +4058,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
return Res;
}
-SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
- WidenVT, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0));
}
// Return true is this is a SETCC node or a strict version of it.
@@ -3953,11 +4180,11 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
return Mask;
}
-// This method tries to handle VSELECT and its mask by legalizing operands
-// (which may require widening) and if needed adjusting the mask vector type
-// to match that of the VSELECT. Without it, many cases end up with
-// scalarization of the SETCC, with many unnecessary instructions.
-SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
+// This method tries to handle some special cases for the vselect mask
+// and if needed adjusting the mask vector type to match that of the VSELECT.
+// Without it, many cases end up with scalarization of the SETCC, with many
+// unnecessary instructions.
+SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
LLVMContext &Ctx = *DAG.getContext();
SDValue Cond = N->getOperand(0);
@@ -4004,14 +4231,9 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
return SDValue();
}
- // Get the VT and operands for VSELECT, and widen if needed.
- SDValue VSelOp1 = N->getOperand(1);
- SDValue VSelOp2 = N->getOperand(2);
- if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) {
+ // Widen the vselect result type if needed.
+ if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector)
VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
- VSelOp1 = GetWidenedVector(VSelOp1);
- VSelOp2 = GetWidenedVector(VSelOp2);
- }
// The mask of the VSELECT should have integer elements.
EVT ToMaskVT = VSelVT;
@@ -4060,7 +4282,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
} else
return SDValue();
- return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2);
+ return Mask;
}
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
@@ -4070,8 +4292,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
if (CondVT.isVector()) {
- if (SDValue Res = WidenVSELECTAndMask(N))
- return Res;
+ if (SDValue WideCond = WidenVSELECTMask(N)) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, WideCond, InOp1, InOp2);
+ }
EVT CondEltVT = CondVT.getVectorElementType();
EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
@@ -4278,6 +4505,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
Res = WidenVecOp_Convert(N);
break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = WidenVecOp_FP_TO_XINT_SAT(N);
+ break;
+
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -4293,6 +4525,10 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_FMIN:
Res = WidenVecOp_VECREDUCE(N);
break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Res = WidenVecOp_VECREDUCE_SEQ(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -4447,6 +4683,28 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
return DAG.getBuildVector(VT, dl, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecOp_FP_TO_XINT_SAT(SDNode *N) {
+ EVT DstVT = N->getValueType(0);
+ SDValue Src = GetWidenedVector(N->getOperand(0));
+ EVT SrcVT = Src.getValueType();
+ ElementCount WideNumElts = SrcVT.getVectorElementCount();
+ SDLoc dl(N);
+
+ // See if a widened result type would be legal, if so widen the node.
+ EVT WideDstVT = EVT::getVectorVT(*DAG.getContext(),
+ DstVT.getVectorElementType(), WideNumElts);
+ if (TLI.isTypeLegal(WideDstVT)) {
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), dl, WideDstVT, Src, N->getOperand(1));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Give up and unroll.
+ return DAG.UnrollVectorOp(N);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -4547,11 +4805,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
if (!ST->getMemoryVT().getScalarType().isByteSized())
return TLI.scalarizeVectorStore(ST, DAG);
- SmallVector<SDValue, 16> StChain;
if (ST->isTruncatingStore())
- GenWidenVectorTruncStores(StChain, ST);
- else
- GenWidenVectorStores(StChain, ST);
+ return TLI.scalarizeVectorStore(ST, DAG);
+
+ SmallVector<SDValue, 16> StChain;
+ GenWidenVectorStores(StChain, ST);
if (StChain.size() == 1)
return StChain[0];
@@ -4613,7 +4871,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
Scale};
SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
- MG->getMemOperand(), MG->getIndexType());
+ MG->getMemOperand(), MG->getIndexType(),
+ MG->getExtensionType());
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
return SDValue();
@@ -4625,6 +4884,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
SDValue Mask = MSC->getMask();
SDValue Index = MSC->getIndex();
SDValue Scale = MSC->getScale();
+ EVT WideMemVT = MSC->getMemoryVT();
if (OpNo == 1) {
DataOp = GetWidenedVector(DataOp);
@@ -4641,6 +4901,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
MaskVT.getVectorElementType(), NumElts);
Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ // Widen the MemoryType
+ WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ MSC->getMemoryVT().getScalarType(), NumElts);
} else if (OpNo == 4) {
// Just widen the index. It's allowed to have extra elements.
Index = GetWidenedVector(Index);
@@ -4649,9 +4913,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
Scale};
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
- MSC->getMemoryVT(), SDLoc(N), Ops,
- MSC->getMemOperand(), MSC->getIndexType());
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N),
+ Ops, MSC->getMemOperand(), MSC->getIndexType(),
+ MSC->isTruncatingStore());
}
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
@@ -4730,45 +4994,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
EVT OrigVT = N->getOperand(0).getValueType();
EVT WideVT = Op.getValueType();
EVT ElemVT = OrigVT.getVectorElementType();
+ SDNodeFlags Flags = N->getFlags();
- SDValue NeutralElem;
- switch (N->getOpcode()) {
- case ISD::VECREDUCE_ADD:
- case ISD::VECREDUCE_OR:
- case ISD::VECREDUCE_XOR:
- case ISD::VECREDUCE_UMAX:
- NeutralElem = DAG.getConstant(0, dl, ElemVT);
- break;
- case ISD::VECREDUCE_MUL:
- NeutralElem = DAG.getConstant(1, dl, ElemVT);
- break;
- case ISD::VECREDUCE_AND:
- case ISD::VECREDUCE_UMIN:
- NeutralElem = DAG.getAllOnesConstant(dl, ElemVT);
- break;
- case ISD::VECREDUCE_SMAX:
- NeutralElem = DAG.getConstant(
- APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT);
- break;
- case ISD::VECREDUCE_SMIN:
- NeutralElem = DAG.getConstant(
- APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT);
- break;
- case ISD::VECREDUCE_FADD:
- NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT);
- break;
- case ISD::VECREDUCE_FMUL:
- NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
- break;
- case ISD::VECREDUCE_FMAX:
- NeutralElem = DAG.getConstantFP(
- -std::numeric_limits<double>::infinity(), dl, ElemVT);
- break;
- case ISD::VECREDUCE_FMIN:
- NeutralElem = DAG.getConstantFP(
- std::numeric_limits<double>::infinity(), dl, ElemVT);
- break;
- }
+ unsigned Opc = N->getOpcode();
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
+ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags);
+ assert(NeutralElem && "Neutral element must exist");
// Pad the vector with the neutral element.
unsigned OrigElts = OrigVT.getVectorNumElements();
@@ -4777,7 +5008,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
DAG.getVectorIdxConstant(Idx, dl));
- return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags());
+ return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
+ SDLoc dl(N);
+ SDValue AccOp = N->getOperand(0);
+ SDValue VecOp = N->getOperand(1);
+ SDValue Op = GetWidenedVector(VecOp);
+
+ EVT OrigVT = VecOp.getValueType();
+ EVT WideVT = Op.getValueType();
+ EVT ElemVT = OrigVT.getVectorElementType();
+ SDNodeFlags Flags = N->getFlags();
+
+ unsigned Opc = N->getOpcode();
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
+ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags);
+
+ // Pad the vector with the neutral element.
+ unsigned OrigElts = OrigVT.getVectorNumElements();
+ unsigned WideElts = WideVT.getVectorNumElements();
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
+ DAG.getVectorIdxConstant(Idx, dl));
+
+ return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
}
SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
@@ -4820,7 +5076,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
// If we have one element to load/store, return it.
EVT RetVT = WidenEltVT;
- if (Width == WidenEltWidth)
+ if (!Scalable && Width == WidenEltWidth)
return RetVT;
// See if there is larger legal integer than the element type to load/store.
@@ -4866,11 +5122,14 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
- if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ if (RetVT.getFixedSizeInBits() < MemVTWidth || MemVT == WidenVT)
return MemVT;
}
}
+ if (Scalable)
+ report_fatal_error("Using element-wise loads and stores for widening "
+ "operations is not supported for scalable vectors");
return RetVT;
}
@@ -4913,10 +5172,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// element type or scalar loads and then recombines it to the widen vector
// type.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
- unsigned WidenWidth = WidenVT.getSizeInBits();
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
// Load information
@@ -4925,23 +5184,25 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
- int LdWidth = LdVT.getSizeInBits();
- int WidthDiff = WidenWidth - LdWidth;
+ TypeSize LdWidth = LdVT.getSizeInBits();
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ TypeSize WidthDiff = WidenWidth - LdWidth;
// Allow wider loads if they are sufficiently aligned to avoid memory faults
// and if the original load is simple.
unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
// Find the vector type that can load from.
- EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
- int NewVTWidth = NewVT.getSizeInBits();
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinSize());
+ TypeSize NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
- if (LdWidth <= NewVTWidth) {
+ if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) {
if (!NewVT.isVector()) {
- unsigned NumElts = WidenWidth / NewVTWidth;
+ unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
@@ -4949,8 +5210,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
if (NewVT == WidenVT)
return LdOp;
- assert(WidenWidth % NewVTWidth == 0);
- unsigned NumConcat = WidenWidth / NewVTWidth;
+ // TODO: We don't currently have any tests that exercise this code path.
+ assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0);
+ unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(NewVT);
ConcatOps[0] = LdOp;
@@ -4963,35 +5225,30 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
SmallVector<SDValue, 16> LdOps;
LdOps.push_back(LdOp);
- LdWidth -= NewVTWidth;
- unsigned Offset = 0;
-
- while (LdWidth > 0) {
- unsigned Increment = NewVTWidth / 8;
- Offset += Increment;
- BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
+ uint64_t ScaledOffset = 0;
+ MachinePointerInfo MPI = LD->getPointerInfo();
+ do {
+ LdWidth -= NewVTWidth;
+ IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr,
+ &ScaledOffset);
- SDValue L;
- if (LdWidth < NewVTWidth) {
+ if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) {
// The current type we are using is too large. Find a better size.
- NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinSize());
NewVTWidth = NewVT.getSizeInBits();
- L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset),
- LD->getOriginalAlign(), MMOFlags, AAInfo);
- LdChain.push_back(L.getValue(1));
- } else {
- L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset),
- LD->getOriginalAlign(), MMOFlags, AAInfo);
- LdChain.push_back(L.getValue(1));
}
+ Align NewAlign = ScaledOffset == 0
+ ? LD->getOriginalAlign()
+ : commonAlignment(LD->getAlign(), ScaledOffset);
+ SDValue L =
+ DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
+ LdChain.push_back(L.getValue(1));
+
LdOps.push_back(L);
LdOp = L;
-
- LdWidth -= NewVTWidth;
- }
+ } while (TypeSize::isKnownGT(LdWidth, NewVTWidth));
// Build the vector from the load operations.
unsigned End = LdOps.size();
@@ -5015,13 +5272,18 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
}
ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
}
+
ConcatOps[--Idx] = LdOps[i];
for (--i; i >= 0; --i) {
EVT NewLdTy = LdOps[i].getValueType();
if (NewLdTy != LdTy) {
// Create a larger vector.
- unsigned NumOps = NewLdTy.getSizeInBits() / LdTy.getSizeInBits();
- assert(NewLdTy.getSizeInBits() % LdTy.getSizeInBits() == 0);
+ TypeSize LdTySize = LdTy.getSizeInBits();
+ TypeSize NewLdTySize = NewLdTy.getSizeInBits();
+ assert(NewLdTySize.isScalable() == LdTySize.isScalable() &&
+ NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinSize()));
+ unsigned NumOps =
+ NewLdTySize.getKnownMinSize() / LdTySize.getKnownMinSize();
SmallVector<SDValue, 16> WidenOps(NumOps);
unsigned j = 0;
for (; j != End-Idx; ++j)
@@ -5042,7 +5304,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
makeArrayRef(&ConcatOps[Idx], End - Idx));
// We need to fill the rest with undefs to build the vector.
- unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ unsigned NumOps =
+ WidenWidth.getKnownMinSize() / LdTy.getSizeInBits().getKnownMinSize();
SmallVector<SDValue, 16> WidenOps(NumOps);
SDValue UndefVal = DAG.getUNDEF(LdTy);
{
@@ -5065,6 +5328,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
// Load information
SDValue Chain = LD->getChain();
@@ -5072,6 +5336,10 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
+ if (LdVT.isScalableVector())
+ report_fatal_error("Generating widen scalable extending vector loads is "
+ "not yet supported");
+
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
unsigned NumElts = LdVT.getVectorNumElements();
@@ -5086,7 +5354,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
+ SDValue NewBasePtr =
+ DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::Fixed(Offset));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -5114,99 +5383,66 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
SDLoc dl(ST);
EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
+ TypeSize StWidth = StVT.getSizeInBits();
EVT ValVT = ValOp.getValueType();
- unsigned ValWidth = ValVT.getSizeInBits();
+ TypeSize ValWidth = ValVT.getSizeInBits();
EVT ValEltVT = ValVT.getVectorElementType();
- unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ unsigned ValEltWidth = ValEltVT.getFixedSizeInBits();
assert(StVT.getVectorElementType() == ValEltVT);
+ assert(StVT.isScalableVector() == ValVT.isScalableVector() &&
+ "Mismatch between store and value types");
int Idx = 0; // current index to store
- unsigned Offset = 0; // offset from base to store
- while (StWidth != 0) {
+
+ MachinePointerInfo MPI = ST->getPointerInfo();
+ uint64_t ScaledOffset = 0;
+ while (StWidth.isNonZero()) {
// Find the largest vector type we can store with.
- EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
- unsigned NewVTWidth = NewVT.getSizeInBits();
- unsigned Increment = NewVTWidth / 8;
+ EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ TypeSize NewVTWidth = NewVT.getSizeInBits();
+
if (NewVT.isVector()) {
- unsigned NumVTElts = NewVT.getVectorNumElements();
+ unsigned NumVTElts = NewVT.getVectorMinNumElements();
do {
+ Align NewAlign = ScaledOffset == 0
+ ? ST->getOriginalAlign()
+ : commonAlignment(ST->getAlign(), ScaledOffset);
SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
DAG.getVectorIdxConstant(Idx, dl));
- StChain.push_back(DAG.getStore(
- Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
- ST->getOriginalAlign(), MMOFlags, AAInfo));
+ SDValue PartStore = DAG.getStore(Chain, dl, EOp, BasePtr, MPI, NewAlign,
+ MMOFlags, AAInfo);
+ StChain.push_back(PartStore);
+
StWidth -= NewVTWidth;
- Offset += Increment;
Idx += NumVTElts;
- BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
- } while (StWidth != 0 && StWidth >= NewVTWidth);
+ IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
+ &ScaledOffset);
+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
} else {
// Cast the vector to the scalar type we can store.
- unsigned NumElts = ValWidth / NewVTWidth;
+ unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
// Readjust index position based on new vector type.
- Idx = Idx * ValEltWidth / NewVTWidth;
+ Idx = Idx * ValEltWidth / NewVTWidth.getFixedSize();
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getVectorIdxConstant(Idx++, dl));
- StChain.push_back(DAG.getStore(
- Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
- ST->getOriginalAlign(), MMOFlags, AAInfo));
+ SDValue PartStore =
+ DAG.getStore(Chain, dl, EOp, BasePtr, MPI, ST->getOriginalAlign(),
+ MMOFlags, AAInfo);
+ StChain.push_back(PartStore);
+
StWidth -= NewVTWidth;
- Offset += Increment;
- BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
- } while (StWidth != 0 && StWidth >= NewVTWidth);
+ IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
// Restore index back to be relative to the original widen element type.
- Idx = Idx * NewVTWidth / ValEltWidth;
+ Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;
}
}
}
-void
-DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
- StoreSDNode *ST) {
- // For extension loads, it may not be more efficient to truncate the vector
- // and then store it. Instead, we extract each element and then store it.
- SDValue Chain = ST->getChain();
- SDValue BasePtr = ST->getBasePtr();
- MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
- AAMDNodes AAInfo = ST->getAAInfo();
- SDValue ValOp = GetWidenedVector(ST->getValue());
- SDLoc dl(ST);
-
- EVT StVT = ST->getMemoryVT();
- EVT ValVT = ValOp.getValueType();
-
- // It must be true that the wide vector type is bigger than where we need to
- // store.
- assert(StVT.isVector() && ValOp.getValueType().isVector());
- assert(StVT.bitsLT(ValOp.getValueType()));
-
- // For truncating stores, we can not play the tricks of chopping legal vector
- // types and bitcast it to the right type. Instead, we unroll the store.
- EVT StEltVT = StVT.getVectorElementType();
- EVT ValEltVT = ValVT.getVectorElementType();
- unsigned Increment = ValEltVT.getSizeInBits() / 8;
- unsigned NumElts = StVT.getVectorNumElements();
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getVectorIdxConstant(0, dl));
- StChain.push_back(
- DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT,
- ST->getOriginalAlign(), MMOFlags, AAInfo));
- unsigned Offset = Increment;
- for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getVectorIdxConstant(0, dl));
- StChain.push_back(DAG.getTruncStore(
- Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset),
- StEltVT, ST->getOriginalAlign(), MMOFlags, AAInfo));
- }
-}
-
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
/// FillWithZeroes specifies that the vector should be widened with zeroes.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2902c96c7658..0022e5ec31f0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -760,7 +760,7 @@ void ScheduleDAGLinearize::Schedule() {
MachineBasicBlock*
ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(BB, InsertPos);
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
DenseMap<SDValue, Register> VRBaseMap;
LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 72e68a5045c6..7a5e8ac6075e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1838,13 +1838,16 @@ protected:
template<class SF>
static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
- std::vector<SUnit *>::iterator Best = Q.begin();
- for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
- if (Picker(*Best, *I))
- Best = I;
- SUnit *V = *Best;
- if (Best != std::prev(Q.end()))
- std::swap(*Best, Q.back());
+ unsigned BestIdx = 0;
+ // Only compute the cost for the first 1000 items in the queue, to avoid
+ // excessive compile-times for very large queues.
+ for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
+ I++)
+ if (Picker(Q[BestIdx], Q[I]))
+ BestIdx = I;
+ SUnit *V = Q[BestIdx];
+ if (BestIdx + 1 != Q.size())
+ std::swap(Q[BestIdx], Q.back());
Q.pop_back();
return V;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index ce20d506586f..debfdda90e1e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -125,8 +125,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
PhysReg = Reg;
} else if (Def->isMachineOpcode()) {
const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
- if (ResNo >= II.getNumDefs() &&
- II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
+ if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg))
PhysReg = Reg;
}
@@ -173,7 +172,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
// Don't add glue to something that already has a glue value.
if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
- SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
+ SmallVector<EVT, 4> VTs(N->values());
if (AddGlue)
VTs.push_back(MVT::Glue);
@@ -830,7 +829,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
/// not necessarily refer to returned BB. The emitter may split blocks.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(BB, InsertPos);
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
DenseMap<SDValue, Register> VRBaseMap;
DenseMap<SUnit*, Register> CopyVRBaseMap;
SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
@@ -1034,7 +1033,29 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
}
InsertPos = Emitter.getInsertPos();
- return Emitter.getBlock();
+ // In some cases, DBG_VALUEs might be inserted after the first terminator,
+ // which results in an invalid MBB. If that happens, move the DBG_VALUEs
+ // before the first terminator.
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ auto FirstTerm = InsertBB->getFirstTerminator();
+ if (FirstTerm != InsertBB->end()) {
+ assert(!FirstTerm->isDebugValue() &&
+ "first terminator cannot be a debug value");
+ for (MachineInstr &MI : make_early_inc_range(
+ make_range(std::next(FirstTerm), InsertBB->end()))) {
+ if (!MI.isDebugValue())
+ continue;
+
+ if (&MI == InsertPos)
+ InsertPos = std::prev(InsertPos->getIterator());
+
+ // The DBG_VALUE was referencing a value produced by a terminator. By
+ // moving the DBG_VALUE, the referenced value also needs invalidating.
+ MI.getOperand(0).ChangeToRegister(0, false);
+ MI.moveBefore(&*FirstTerm);
+ }
+ }
+ return InsertBB;
}
/// Return the basic block label.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 592c09c10fb0..2090762e2ff4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -138,6 +139,15 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
//===----------------------------------------------------------------------===//
bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
+ if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ unsigned EltSize =
+ N->getValueType(0).getVectorElementType().getSizeInBits();
+ if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ SplatVal = Op0->getAPIntValue().truncOrSelf(EltSize);
+ return true;
+ }
+ }
+
auto *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
@@ -154,11 +164,16 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
// specializations of the more general isConstantSplatVector()?
-bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
+ if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
+ APInt SplatVal;
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue();
+ }
+
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
unsigned i = 0, e = N->getNumOperands();
@@ -198,11 +213,16 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
return true;
}
-bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
+ if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
+ APInt SplatVal;
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue();
+ }
+
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
bool IsAllUndef = true;
@@ -235,6 +255,14 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
return true;
}
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ return isConstantSplatVectorAllOnes(N, /*BuildVectorOnly*/ true);
+}
+
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ return isConstantSplatVectorAllZeros(N, /*BuildVectorOnly*/ true);
+}
+
bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
@@ -278,7 +306,8 @@ bool ISD::matchUnaryPredicate(SDValue Op,
return Match(Cst);
// FIXME: Add support for vector UNDEF cases?
- if (ISD::BUILD_VECTOR != Op.getOpcode())
+ if (ISD::BUILD_VECTOR != Op.getOpcode() &&
+ ISD::SPLAT_VECTOR != Op.getOpcode())
return false;
EVT SVT = Op.getValueType().getScalarType();
@@ -332,6 +361,76 @@ bool ISD::matchBinaryPredicate(
return true;
}
+ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
+ switch (VecReduceOpcode) {
+ default:
+ llvm_unreachable("Expected VECREDUCE opcode");
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_SEQ_FADD:
+ return ISD::FADD;
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ return ISD::FMUL;
+ case ISD::VECREDUCE_ADD:
+ return ISD::ADD;
+ case ISD::VECREDUCE_MUL:
+ return ISD::MUL;
+ case ISD::VECREDUCE_AND:
+ return ISD::AND;
+ case ISD::VECREDUCE_OR:
+ return ISD::OR;
+ case ISD::VECREDUCE_XOR:
+ return ISD::XOR;
+ case ISD::VECREDUCE_SMAX:
+ return ISD::SMAX;
+ case ISD::VECREDUCE_SMIN:
+ return ISD::SMIN;
+ case ISD::VECREDUCE_UMAX:
+ return ISD::UMAX;
+ case ISD::VECREDUCE_UMIN:
+ return ISD::UMIN;
+ case ISD::VECREDUCE_FMAX:
+ return ISD::FMAXNUM;
+ case ISD::VECREDUCE_FMIN:
+ return ISD::FMINNUM;
+ }
+}
+
+bool ISD::isVPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+/// The operand position of the vector mask.
+Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return None;
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...) \
+ case ISD::SDOPC: \
+ return MASKPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+/// The operand position of the explicit vector length parameter.
+Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return None;
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
+ case ISD::SDOPC: \
+ return EVLPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
@@ -536,6 +635,11 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset());
}
break;
+ case ISD::PSEUDO_PROBE:
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getGuid());
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getIndex());
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getAttributes());
+ break;
case ISD::JumpTable:
case ISD::TargetJumpTable:
ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
@@ -1229,7 +1333,7 @@ SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
bool isT, bool isO) {
EVT EltVT = VT.getScalarType();
assert((EltVT.getSizeInBits() >= 64 ||
- (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
"getConstant with a uint64_t value that doesn't fit in the type!");
return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
}
@@ -1251,10 +1355,10 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
// inserted value (the type does not need to match the vector element type).
// Any extra bits introduced will be truncated away.
if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
- TargetLowering::TypePromoteInteger) {
- EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
- APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
- Elt = ConstantInt::get(*getContext(), NewVal);
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
}
// In other cases the element type is illegal and needs to be expanded, for
// example v2i64 on MIPS32. In this case, find the nearest legal type, split
@@ -1264,7 +1368,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
// only legalize if the DAG tells us we must produce legal types.
else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
TLI->getTypeAction(*getContext(), EltVT) ==
- TargetLowering::TypeExpandInteger) {
+ TargetLowering::TypeExpandInteger) {
const APInt &NewVal = Elt->getValue();
EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
@@ -1278,9 +1382,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
SmallVector<SDValue, 2> EltParts;
for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
- EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
- .zextOrTrunc(ViaEltSizeInBits), DL,
- ViaEltVT, isT, isO));
+ EltParts.push_back(getConstant(
+ NewVal.lshr(i * ViaEltSizeInBits).zextOrTrunc(ViaEltSizeInBits), DL,
+ ViaEltVT, isT, isO));
}
// EltParts is currently in little endian order. If we actually want
@@ -1297,9 +1401,10 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
- Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
+ llvm::append_range(Ops, EltParts);
- SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+ SDValue V =
+ getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
return V;
}
@@ -1380,7 +1485,9 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
}
SDValue Result(N, 0);
- if (VT.isVector())
+ if (VT.isScalableVector())
+ Result = getSplatVector(VT, DL, Result);
+ else if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
NewSDValueDbgMsg(Result, "Creating fp constant: ", this);
return Result;
@@ -2023,7 +2130,14 @@ Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) {
SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) {
MachineFrameInfo &MFI = MF->getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, false);
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ int StackID = 0;
+ if (Bytes.isScalable())
+ StackID = TFI->getStackIDForScalableVectors();
+ // The stack id gives an indication of whether the object is scalable or
+ // not, so it's safe to pass in the minimum size here.
+ int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinSize(), Alignment,
+ false, nullptr, StackID);
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
@@ -2035,7 +2149,14 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
}
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
- TypeSize Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
+ TypeSize VT1Size = VT1.getStoreSize();
+ TypeSize VT2Size = VT2.getStoreSize();
+ assert(VT1Size.isScalable() == VT2Size.isScalable() &&
+ "Don't know how to choose the maximum size when creating a stack "
+ "temporary");
+ TypeSize Bytes =
+ VT1Size.getKnownMinSize() > VT2Size.getKnownMinSize() ? VT1Size : VT2Size;
+
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
const DataLayout &DL = getDataLayout();
@@ -2204,6 +2325,10 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
EVT VT = V.getValueType();
+
+ if (VT.isScalableVector())
+ return SDValue();
+
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
@@ -2221,7 +2346,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
default:
return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
*this, 0);
- break;
case ISD::Constant: {
const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
APInt NewVal = CVal & DemandedBits;
@@ -2247,18 +2371,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
V.getOperand(1));
}
break;
- case ISD::AND: {
- // X & -1 -> X (ignoring bits which aren't demanded).
- // Also handle the case where masked out bits in X are known to be zero.
- if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) {
- const APInt &AndVal = RHSC->getAPIntValue();
- if (DemandedBits.isSubsetOf(AndVal) ||
- DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero |
- AndVal))
- return V.getOperand(0);
- }
- break;
- }
}
return SDValue();
}
@@ -2298,17 +2410,23 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
/// sense to specify which elements are demanded or undefined, therefore
/// they are simply ignored.
bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
- APInt &UndefElts) {
+ APInt &UndefElts, unsigned Depth) {
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
if (!VT.isScalableVector() && !DemandedElts)
return false; // No demanded elts, better to assume we don't know anything.
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
// Deal with some common cases here that work for both fixed and scalable
// vector types.
switch (V.getOpcode()) {
case ISD::SPLAT_VECTOR:
+ UndefElts = V.getOperand(0).isUndef()
+ ? APInt::getAllOnesValue(DemandedElts.getBitWidth())
+ : APInt(DemandedElts.getBitWidth(), 0);
return true;
case ISD::ADD:
case ISD::SUB:
@@ -2316,13 +2434,17 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
APInt UndefLHS, UndefRHS;
SDValue LHS = V.getOperand(0);
SDValue RHS = V.getOperand(1);
- if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
- isSplatValue(RHS, DemandedElts, UndefRHS)) {
+ if (isSplatValue(LHS, DemandedElts, UndefLHS, Depth + 1) &&
+ isSplatValue(RHS, DemandedElts, UndefRHS, Depth + 1)) {
UndefElts = UndefLHS | UndefRHS;
return true;
}
break;
}
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1);
}
// We don't support other cases than those above for scalable vectors at
@@ -2377,7 +2499,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt UndefSrcElts;
APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts)) {
+ if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
return true;
}
@@ -2574,15 +2696,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
// We know all of the bits for a constant!
- Known.One = C->getAPIntValue();
- Known.Zero = ~Known.One;
- return Known;
+ return KnownBits::makeConstant(C->getAPIntValue());
}
if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
// We know all of the bits for a constant fp!
- Known.One = C->getValueAPF().bitcastToAPInt();
- Known.Zero = ~Known.One;
- return Known;
+ return KnownBits::makeConstant(C->getValueAPF().bitcastToAPInt());
}
if (Depth >= MaxRecursionDepth)
@@ -2617,8 +2735,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
// Known bits are the values that are shared by every demanded element.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -2655,8 +2772,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -2664,8 +2780,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
@@ -2681,8 +2796,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedSub) {
SDValue Sub = Op.getOperand(i);
Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -2710,8 +2824,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
if (!!DemandedSrcElts) {
Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
@@ -2830,35 +2943,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::MUL: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- // If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conservative estimate for high known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- unsigned TrailZ = Known.countMinTrailingZeros() +
- Known2.countMinTrailingZeros();
- unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
- Known2.countMinLeadingZeros(),
- BitWidth) - BitWidth;
-
- Known.resetAll();
- Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
- Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
+ Known = KnownBits::computeForMul(Known, Known2);
break;
}
case ISD::UDIV: {
- // For the purposes of computing leading zeros we can conservatively
- // treat a udiv as a logical right shift by the power of 2 known to
- // be less than the denominator.
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- unsigned LeadZ = Known2.countMinLeadingZeros();
-
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
- if (RHSMaxLeadingZeros != BitWidth)
- LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
-
- Known.Zero.setHighBits(LeadZ);
+ Known = KnownBits::udiv(Known, Known2);
break;
}
case ISD::SELECT:
@@ -2870,8 +2961,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SELECT_CC:
Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1);
@@ -2881,8 +2971,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SMULO:
case ISD::UMULO:
@@ -2911,19 +3000,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::SHL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
- unsigned Shift = ShAmt->getZExtValue();
- Known.Zero <<= Shift;
- Known.One <<= Shift;
- // Low bits are known zero.
- Known.Zero.setLowBits(Shift);
- break;
- }
-
- // No matter the shift amount, the trailing zeros will stay zero.
- Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
- Known.One.clearAllBits();
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::shl(Known, Known2);
// Minimum shift low bits are known zero.
if (const APInt *ShMinAmt =
@@ -2932,19 +3010,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::SRL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
- unsigned Shift = ShAmt->getZExtValue();
- Known.Zero.lshrInPlace(Shift);
- Known.One.lshrInPlace(Shift);
- // High bits are known zero.
- Known.Zero.setHighBits(Shift);
- break;
- }
-
- // No matter the shift amount, the leading zeros will stay zero.
- Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros());
- Known.One.clearAllBits();
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::lshr(Known, Known2);
// Minimum shift high bits are known zero.
if (const APInt *ShMinAmt =
@@ -2952,13 +3019,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setHighBits(ShMinAmt->getZExtValue());
break;
case ISD::SRA:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
- Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- unsigned Shift = ShAmt->getZExtValue();
- // Sign extend known zero/one bit (else is unknown).
- Known.Zero.ashrInPlace(Shift);
- Known.One.ashrInPlace(Shift);
- }
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::ashr(Known, Known2);
+ // TODO: Add minimum shift high known sign bits.
break;
case ISD::FSHL:
case ISD::FSHR:
@@ -2993,38 +3057,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
break;
case ISD::SIGN_EXTEND_INREG: {
- EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- unsigned EBits = EVT.getScalarSizeInBits();
-
- // Sign extension. Compute the demanded bits in the result that are not
- // present in the input.
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
-
- APInt InSignMask = APInt::getSignMask(EBits);
- APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
-
- // If the sign extended bits are demanded, we know that the sign
- // bit is demanded.
- InSignMask = InSignMask.zext(BitWidth);
- if (NewBits.getBoolValue())
- InputDemandedBits |= InSignMask;
-
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known.One &= InputDemandedBits;
- Known.Zero &= InputDemandedBits;
-
- // If the sign bit of the input is known set or clear, then we know the
- // top bits of the result.
- if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear
- Known.Zero |= NewBits;
- Known.One &= ~NewBits;
- } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set
- Known.One |= NewBits;
- Known.Zero &= ~NewBits;
- } else { // Input sign bit unknown
- Known.Zero &= ~NewBits;
- Known.One &= ~NewBits;
- }
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ Known = Known.sextInReg(EVT.getScalarSizeInBits());
break;
}
case ISD::CTTZ:
@@ -3052,6 +3087,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
break;
}
+ case ISD::PARITY: {
+ // Parity returns 0 everywhere but the LSB.
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
@@ -3095,13 +3135,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
} else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
- const APInt &Value = CInt->getValue();
- Known.One = Value;
- Known.Zero = ~Value;
+ Known = KnownBits::makeConstant(CInt->getValue());
} else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
- APInt Value = CFP->getValueAPF().bitcastToAPInt();
- Known.One = Value;
- Known.Zero = ~Value;
+ Known =
+ KnownBits::makeConstant(CFP->getValueAPF().bitcastToAPInt());
}
}
}
@@ -3241,53 +3278,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
break;
}
- case ISD::SREM:
- if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
- const APInt &RA = Rem->getAPIntValue().abs();
- if (RA.isPowerOf2()) {
- APInt LowBits = RA - 1;
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- // The low bits of the first operand are unchanged by the srem.
- Known.Zero = Known2.Zero & LowBits;
- Known.One = Known2.One & LowBits;
-
- // If the first operand is non-negative or has all low bits zero, then
- // the upper bits are all zero.
- if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
- Known.Zero |= ~LowBits;
-
- // If the first operand is negative and not all low bits are zero, then
- // the upper bits are all one.
- if (Known2.isNegative() && LowBits.intersects(Known2.One))
- Known.One |= ~LowBits;
- assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
- }
- }
+ case ISD::SREM: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::srem(Known, Known2);
break;
+ }
case ISD::UREM: {
- if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
- const APInt &RA = Rem->getAPIntValue();
- if (RA.isPowerOf2()) {
- APInt LowBits = (RA - 1);
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- // The upper bits are all zero, the lower ones are unchanged.
- Known.Zero = Known2.Zero | ~LowBits;
- Known.One = Known2.One & LowBits;
- break;
- }
- }
-
- // Since the result is less than or equal to either operand, any leading
- // zero bits in either operand must also exist in the result.
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-
- uint32_t Leaders =
- std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
- Known.resetAll();
- Known.Zero.setHighBits(Leaders);
+ Known = KnownBits::urem(Known, Known2);
break;
}
case ISD::EXTRACT_ELEMENT: {
@@ -3307,6 +3307,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
+ // computeKnownBits not yet implemented for scalable vectors.
+ if (VecVT.isScalableVector())
+ break;
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
@@ -3347,73 +3350,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setAllBits();
if (DemandedVal) {
Known2 = computeKnownBits(InVal, Depth + 1);
- Known.One &= Known2.One.zextOrTrunc(BitWidth);
- Known.Zero &= Known2.Zero.zextOrTrunc(BitWidth);
+ Known = KnownBits::commonBits(Known, Known2.zextOrTrunc(BitWidth));
}
if (!!DemandedVecElts) {
Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
case ISD::BITREVERSE: {
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known.Zero = Known2.Zero.reverseBits();
- Known.One = Known2.One.reverseBits();
+ Known = Known2.reverseBits();
break;
}
case ISD::BSWAP: {
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known.Zero = Known2.Zero.byteSwap();
- Known.One = Known2.One.byteSwap();
+ Known = Known2.byteSwap();
break;
}
case ISD::ABS: {
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- // If the source's MSB is zero then we know the rest of the bits already.
- if (Known2.isNonNegative()) {
- Known.Zero = Known2.Zero;
- Known.One = Known2.One;
- break;
- }
-
- // We only know that the absolute values's MSB will be zero iff there is
- // a set bit that isn't the sign bit (otherwise it could be INT_MIN).
- Known2.One.clearSignBit();
- if (Known2.One.getBoolValue()) {
- Known.Zero = APInt::getSignMask(BitWidth);
- break;
- }
+ Known = Known2.abs();
break;
}
case ISD::UMIN: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-
- // UMIN - we know that the result will have the maximum of the
- // known zero leading bits of the inputs.
- unsigned LeadZero = Known.countMinLeadingZeros();
- LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros());
-
- Known.Zero &= Known2.Zero;
- Known.One &= Known2.One;
- Known.Zero.setHighBits(LeadZero);
+ Known = KnownBits::umin(Known, Known2);
break;
}
case ISD::UMAX: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-
- // UMAX - we know that the result will have the maximum of the
- // known one leading bits of the inputs.
- unsigned LeadOne = Known.countMinLeadingOnes();
- LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes());
-
- Known.Zero &= Known2.Zero;
- Known.One &= Known2.One;
- Known.One.setHighBits(LeadOne);
+ Known = KnownBits::umax(Known, Known2);
break;
}
case ISD::SMIN:
@@ -3447,12 +3416,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
}
- // Fallback - just get the shared known bits of the operands.
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- if (Known.isUnknown()) break; // Early-out
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known.Zero &= Known2.Zero;
- Known.One &= Known2.One;
+ if (IsMax)
+ Known = KnownBits::smax(Known, Known2);
+ else
+ Known = KnownBits::smin(Known, Known2);
break;
}
case ISD::FrameIndex:
@@ -4395,11 +4364,16 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
for (SDValue Op : Elts)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
- if (SVT.bitsGT(VT.getScalarType()))
- for (SDValue &Op : Elts)
- Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
- ? DAG.getZExtOrTrunc(Op, DL, SVT)
- : DAG.getSExtOrTrunc(Op, DL, SVT);
+ if (SVT.bitsGT(VT.getScalarType())) {
+ for (SDValue &Op : Elts) {
+ if (Op.isUndef())
+ Op = DAG.getUNDEF(SVT);
+ else
+ Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, DL, SVT)
+ : DAG.getSExtOrTrunc(Op, DL, SVT);
+ }
+ }
SDValue V = DAG.getBuildVector(VT, DL, Elts);
NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG);
@@ -4425,6 +4399,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue Operand) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, Operand, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue Operand, const SDNodeFlags Flags) {
// Constant fold unary operations with an integer constant operand. Even
// opaque constant will be folded, because the folding of unary operations
@@ -4625,8 +4607,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
if (Operand.getValueType() == VT) return Operand; // noop conversion.
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid fpext node, dst < src!");
@@ -4811,6 +4793,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::VSCALE:
assert(VT == Operand.getValueType() && "Unexpected VT!");
break;
+ case ISD::CTPOP:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return Operand;
+ break;
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return getNOT(DL, Operand, Operand.getValueType());
+ break;
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_OR, DL, VT, Operand);
+ break;
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_UMIN:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_AND, DL, VT, Operand);
+ break;
}
SDNode *N;
@@ -5233,6 +5234,14 @@ SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) {
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, N1, N2, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
@@ -5312,10 +5321,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::MULHS:
case ISD::SDIV:
case ISD::SREM:
- case ISD::SMIN:
- case ISD::SMAX:
- case ISD::UMIN:
- case ISD::UMAX:
case ISD::SADDSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
@@ -5324,6 +5329,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
break;
+ case ISD::SMIN:
+ case ISD::UMAX:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::OR, DL, VT, N1, N2);
+ break;
+ case ISD::SMAX:
+ case ISD::UMIN:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::AND, DL, VT, N1, N2);
+ break;
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -5365,8 +5386,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
// TLI.getShiftAmount().
- assert(N2.getValueType().getScalarSizeInBits().getFixedSize() >=
- Log2_32_Ceil(VT.getScalarSizeInBits().getFixedSize()) &&
+ assert(N2.getValueType().getScalarSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarSizeInBits()) &&
"Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
@@ -5562,6 +5583,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
(VT.getVectorMinNumElements() + N2C->getZExtValue()) <=
N1VT.getVectorMinNumElements()) &&
"Extract subvector overflow!");
+ assert(N2C->getAPIntValue().getBitWidth() ==
+ TLI->getVectorIdxTy(getDataLayout())
+ .getSizeInBits()
+ .getFixedSize() &&
+ "Constant index for EXTRACT_SUBVECTOR has an invalid size");
// Trivial extraction.
if (VT == N1VT)
@@ -5573,8 +5599,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
// the concat have the same type as the extract.
- if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
- N1.getNumOperands() > 0 && VT == N1.getOperand(0).getValueType()) {
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0 &&
+ VT == N1.getOperand(0).getValueType()) {
unsigned Factor = VT.getVectorMinNumElements();
return N1.getOperand(N2C->getZExtValue() / Factor);
}
@@ -5671,6 +5697,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, N1, N2, N3, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3,
const SDNodeFlags Flags) {
// Perform various simplifications.
@@ -5940,11 +5974,20 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
return SDValue(nullptr, 0);
}
-SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, int64_t Offset,
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset,
const SDLoc &DL,
const SDNodeFlags Flags) {
EVT VT = Base.getValueType();
- return getMemBasePlusOffset(Base, getConstant(Offset, DL, VT), DL, Flags);
+ SDValue Index;
+
+ if (Offset.isScalable())
+ Index = getVScale(DL, Base.getValueType(),
+ APInt(Base.getValueSizeInBits().getFixedSize(),
+ Offset.getKnownMinSize()));
+ else
+ Index = getConstant(Offset.getFixedSize(), DL, VT);
+
+ return getMemBasePlusOffset(Base, Index, DL, Flags);
}
SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset,
@@ -6039,7 +6082,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SrcAlign = Alignment;
assert(SrcAlign && "SrcAlign must be set");
ConstantDataArraySlice Slice;
- bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
+ // If marked as volatile, perform a copy even when marked as constant.
+ bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice);
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
const MemOp Op = isZeroConstant
@@ -6111,8 +6155,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
if (Value.getNode()) {
Store = DAG.getStore(
- Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags);
+ Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
OutChains.push_back(Store);
}
}
@@ -6132,16 +6177,17 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
- Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
- DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), VT,
- commonAlignment(*SrcAlign, SrcOff).value(),
- SrcMMOFlags);
+ Value = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, NVT, Chain,
+ DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ SrcPtrInfo.getWithOffset(SrcOff), VT,
+ commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags);
OutLoadChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
- Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), VT, Alignment.value(), MMOFlags);
+ Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);
OutStoreChains.push_back(Store);
}
SrcOff += VTSize;
@@ -6261,9 +6307,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
- Value = DAG.getLoad(
- VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), SrcAlign->value(), SrcMMOFlags);
+ Value =
+ DAG.getLoad(VT, dl, Chain,
+ DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -6275,9 +6322,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Store;
- Store = DAG.getStore(
- Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags);
+ Store =
+ DAG.getStore(Chain, dl, LoadValues[i],
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -6375,8 +6423,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
}
assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(
- Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment.value(),
+ Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment,
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
OutChains.push_back(Store);
DstOff += VT.getSizeInBits() / 8;
@@ -6390,7 +6439,7 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
unsigned AS) {
// Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
// pointer operands can be losslessly bitcasted to pointers of address space 0
- if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
+ if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) {
report_fatal_error("cannot lower memory intrinsic in address space " +
Twine(AS));
}
@@ -6882,6 +6931,30 @@ SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &Dl, SDValue Chain,
+ uint64_t Guid, uint64_t Index,
+ uint32_t Attr) {
+ const unsigned Opcode = ISD::PSEUDO_PROBE;
+ const auto VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ ID.AddInteger(Guid);
+ ID.AddInteger(Index);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, Dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<PseudoProbeSDNode>(
+ Opcode, Dl.getIROrder(), Dl.getDebugLoc(), VTs, Guid, Index, Attr);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
@@ -6962,7 +7035,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
assert(VT.isVector() == MemVT.isVector() &&
"Cannot use an ext load to convert to or from a vector!");
assert((!VT.isVector() ||
- VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+ VT.getVectorElementCount() == MemVT.getVectorElementCount()) &&
"Cannot use an ext load to change the number of vector elements!");
}
@@ -7041,8 +7114,7 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getPointerInfo(),
- LD->getMemoryVT(), LD->getAlignment(), MMOFlags,
- LD->getAAInfo());
+ LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo());
}
SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
@@ -7112,7 +7184,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo);
+ PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+ Alignment, AAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -7133,7 +7206,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
assert(VT.isVector() == SVT.isVector() &&
"Cannot use trunc store to convert to or from a vector!");
assert((!VT.isVector() ||
- VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
"Cannot use trunc store to change the number of vector elements!");
SDVTList VTs = getVTList(MVT::Other);
@@ -7285,14 +7358,15 @@ SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
- ISD::MemIndexType IndexType) {
+ ISD::MemIndexType IndexType,
+ ISD::LoadExtType ExtTy) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
- dl.getIROrder(), VTs, VT, MMO, IndexType));
+ dl.getIROrder(), VTs, VT, MMO, IndexType, ExtTy));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7300,17 +7374,22 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
return SDValue(E, 0);
}
+ IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]);
auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO, IndexType);
+ VTs, VT, MMO, IndexType, ExtTy);
createOperands(N, Ops);
assert(N->getPassThru().getValueType() == N->getValueType(0) &&
"Incompatible type of the PassThru value in MaskedGatherSDNode");
- assert(N->getMask().getValueType().getVectorNumElements() ==
- N->getValueType(0).getVectorNumElements() &&
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValueType(0).getVectorElementCount() &&
"Vector width mismatch between mask and data");
- assert(N->getIndex().getValueType().getVectorNumElements() >=
- N->getValueType(0).getVectorNumElements() &&
+ assert(N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValueType(0).getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValueType(0).getVectorElementCount()) &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
@@ -7326,29 +7405,37 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
- ISD::MemIndexType IndexType) {
+ ISD::MemIndexType IndexType,
+ bool IsTrunc) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
- dl.getIROrder(), VTs, VT, MMO, IndexType));
+ dl.getIROrder(), VTs, VT, MMO, IndexType, IsTrunc));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
+
+ IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]);
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO, IndexType);
+ VTs, VT, MMO, IndexType, IsTrunc);
createOperands(N, Ops);
- assert(N->getMask().getValueType().getVectorNumElements() ==
- N->getValue().getValueType().getVectorNumElements() &&
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValue().getValueType().getVectorElementCount() &&
"Vector width mismatch between mask and data");
- assert(N->getIndex().getValueType().getVectorNumElements() >=
- N->getValue().getValueType().getVectorNumElements() &&
+ assert(
+ N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValue().getValueType().getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValue().getValueType().getVectorElementCount()) &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
@@ -7452,6 +7539,11 @@ SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y,
if (YC->getValueAPF().isExactlyValue(1.0))
return X;
+ // X * 0.0 --> 0.0
+ if (Opcode == ISD::FMUL && Flags.hasNoNaNs() && Flags.hasNoSignedZeros())
+ if (YC->getValueAPF().isZero())
+ return getConstantFP(0.0, SDLoc(Y), Y.getValueType());
+
return SDValue();
}
@@ -7478,6 +7570,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, Ops, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
unsigned NumOps = Ops.size();
switch (NumOps) {
@@ -7549,6 +7649,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VTList, Ops, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
if (VTList.NumVTs == 1)
return getNode(Opcode, DL, VTList.VTs[0], Ops);
@@ -8245,6 +8353,14 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
/// getNodeIfExists - Get the specified node if it's already available, or
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNodeIfExists(Opcode, VTList, Ops, Flags);
+}
+
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
ArrayRef<SDValue> Ops,
const SDNodeFlags Flags) {
if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
@@ -8259,6 +8375,19 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
return nullptr;
}
+/// doesNodeExist - Check if a node exists without modifying its flags.
+bool SelectionDAG::doesNodeExist(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
+ if (FindNodeOrInsertPos(ID, SDLoc(), IP))
+ return true;
+ }
+ return false;
+}
+
/// getDbgValue - Creates a SDDbgValue node.
///
/// SDNode
@@ -8676,21 +8805,31 @@ namespace {
} // end anonymous namespace
-void SelectionDAG::updateDivergence(SDNode * N)
-{
- if (TLI->isSDNodeAlwaysUniform(N))
- return;
- bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+bool SelectionDAG::calculateDivergence(SDNode *N) {
+ if (TLI->isSDNodeAlwaysUniform(N)) {
+ assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, DA) &&
+ "Conflicting divergence information!");
+ return false;
+ }
+ if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA))
+ return true;
for (auto &Op : N->ops()) {
- if (Op.Val.getValueType() != MVT::Other)
- IsDivergent |= Op.getNode()->isDivergent();
+ if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent())
+ return true;
}
- if (N->SDNodeBits.IsDivergent != IsDivergent) {
- N->SDNodeBits.IsDivergent = IsDivergent;
- for (auto U : N->uses()) {
- updateDivergence(U);
+ return false;
+}
+
+void SelectionDAG::updateDivergence(SDNode *N) {
+ SmallVector<SDNode *, 16> Worklist(1, N);
+ do {
+ N = Worklist.pop_back_val();
+ bool IsDivergent = calculateDivergence(N);
+ if (N->SDNodeBits.IsDivergent != IsDivergent) {
+ N->SDNodeBits.IsDivergent = IsDivergent;
+ llvm::append_range(Worklist, N->uses());
}
- }
+ } while (!Worklist.empty());
}
void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
@@ -8716,26 +8855,9 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
void SelectionDAG::VerifyDAGDiverence() {
std::vector<SDNode *> TopoOrder;
CreateTopologicalOrder(TopoOrder);
- const TargetLowering &TLI = getTargetLoweringInfo();
- DenseMap<const SDNode *, bool> DivergenceMap;
- for (auto &N : allnodes()) {
- DivergenceMap[&N] = false;
- }
- for (auto N : TopoOrder) {
- bool IsDivergent = DivergenceMap[N];
- bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA);
- for (auto &Op : N->ops()) {
- if (Op.Val.getValueType() != MVT::Other)
- IsSDNodeDivergent |= DivergenceMap[Op.getNode()];
- }
- if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) {
- DivergenceMap[N] = true;
- }
- }
- for (auto &N : allnodes()) {
- (void)N;
- assert(DivergenceMap[&N] == N.isDivergent() &&
- "Divergence bit inconsistency detected\n");
+ for (auto *N : TopoOrder) {
+ assert(calculateDivergence(N) == N->isDivergent() &&
+ "Divergence bit inconsistency detected");
}
}
#endif
@@ -8904,25 +9026,32 @@ void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) {
DbgInfo->add(DB);
}
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
- SDValue NewMemOp) {
- assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
+ SDValue NewMemOpChain) {
+ assert(isa<MemSDNode>(NewMemOpChain) && "Expected a memop node");
+ assert(NewMemOpChain.getValueType() == MVT::Other && "Expected a token VT");
// The new memory operation must have the same position as the old load in
// terms of memory dependency. Create a TokenFactor for the old load and new
// memory operation and update uses of the old load's output chain to use that
// TokenFactor.
- SDValue OldChain = SDValue(OldLoad, 1);
- SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
- if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1))
- return NewChain;
+ if (OldChain == NewMemOpChain || OldChain.use_empty())
+ return NewMemOpChain;
- SDValue TokenFactor =
- getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain);
+ SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldChain), MVT::Other,
+ OldChain, NewMemOpChain);
ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
- UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
+ UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewMemOpChain);
return TokenFactor;
}
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+ SDValue NewMemOp) {
+ assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
+ SDValue OldChain = SDValue(OldLoad, 1);
+ SDValue NewMemOpChain = NewMemOp.getValue(1);
+ return makeEquivalentMemoryOrdering(OldChain, NewMemOpChain);
+}
+
SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
Function **OutFunction) {
assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol");
@@ -9006,6 +9135,18 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
+ // SplatVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ EVT VecEltVT = N->getValueType(0).getVectorElementType();
+ if (auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ EVT CVT = CN->getValueType(0);
+ assert(CVT.bitsGE(VecEltVT) && "Illegal splat_vector element extension");
+ if (AllowTruncation || CVT == VecEltVT)
+ return CN;
+ }
+ }
+
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
@@ -9059,6 +9200,10 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
return CN;
}
+ if (N.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0)))
+ return CN;
+
return nullptr;
}
@@ -9220,8 +9365,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
bool Seen = false;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDNode *User = *I;
- if (llvm::any_of(Nodes,
- [&User](const SDNode *Node) { return User == Node; }))
+ if (llvm::is_contained(Nodes, User))
Seen = true;
else
return false;
@@ -9232,7 +9376,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
/// isOperand - Return true if this node is an operand of N.
bool SDValue::isOperandOf(const SDNode *N) const {
- return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; });
+ return is_contained(N->op_values(), *this);
}
bool SDNode::isOperandOf(const SDNode *N) const {
@@ -9616,24 +9760,24 @@ std::pair<EVT, EVT>
SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
bool *HiIsEmpty) const {
EVT EltTp = VT.getVectorElementType();
- bool IsScalable = VT.isScalableVector();
// Examples:
// custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty)
// custom VL=9 with enveloping VL=8/8 yields 8/1
// custom VL=10 with enveloping VL=8/8 yields 8/2
// etc.
- unsigned VTNumElts = VT.getVectorNumElements();
- unsigned EnvNumElts = EnvVT.getVectorNumElements();
+ ElementCount VTNumElts = VT.getVectorElementCount();
+ ElementCount EnvNumElts = EnvVT.getVectorElementCount();
+ assert(VTNumElts.isScalable() == EnvNumElts.isScalable() &&
+ "Mixing fixed width and scalable vectors when enveloping a type");
EVT LoVT, HiVT;
- if (VTNumElts > EnvNumElts) {
+ if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) {
LoVT = EnvVT;
- HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts,
- IsScalable);
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts);
*HiIsEmpty = false;
} else {
// Flag that hi type has zero storage size, but return split envelop type
// (this would be easier if vector types with zero elements were allowed).
- LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts, IsScalable);
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts);
HiVT = EnvVT;
*HiIsEmpty = true;
}
@@ -9768,16 +9912,16 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
BitVector *UndefElements) const {
+ unsigned NumOps = getNumOperands();
if (UndefElements) {
UndefElements->clear();
- UndefElements->resize(getNumOperands());
+ UndefElements->resize(NumOps);
}
- assert(getNumOperands() == DemandedElts.getBitWidth() &&
- "Unexpected vector size");
+ assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size");
if (!DemandedElts)
return SDValue();
SDValue Splatted;
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ for (unsigned i = 0; i != NumOps; ++i) {
if (!DemandedElts[i])
continue;
SDValue Op = getOperand(i);
@@ -9806,6 +9950,58 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
return getSplatValue(DemandedElts, UndefElements);
}
+bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts,
+ SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements) const {
+ unsigned NumOps = getNumOperands();
+ Sequence.clear();
+ if (UndefElements) {
+ UndefElements->clear();
+ UndefElements->resize(NumOps);
+ }
+ assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size");
+ if (!DemandedElts || NumOps < 2 || !isPowerOf2_32(NumOps))
+ return false;
+
+ // Set the undefs even if we don't find a sequence (like getSplatValue).
+ if (UndefElements)
+ for (unsigned I = 0; I != NumOps; ++I)
+ if (DemandedElts[I] && getOperand(I).isUndef())
+ (*UndefElements)[I] = true;
+
+ // Iteratively widen the sequence length looking for repetitions.
+ for (unsigned SeqLen = 1; SeqLen < NumOps; SeqLen *= 2) {
+ Sequence.append(SeqLen, SDValue());
+ for (unsigned I = 0; I != NumOps; ++I) {
+ if (!DemandedElts[I])
+ continue;
+ SDValue &SeqOp = Sequence[I % SeqLen];
+ SDValue Op = getOperand(I);
+ if (Op.isUndef()) {
+ if (!SeqOp)
+ SeqOp = Op;
+ continue;
+ }
+ if (SeqOp && !SeqOp.isUndef() && SeqOp != Op) {
+ Sequence.clear();
+ break;
+ }
+ SeqOp = Op;
+ }
+ if (!Sequence.empty())
+ return true;
+ }
+
+ assert(Sequence.empty() && "Failed to empty non-repeating sequence pattern");
+ return false;
+}
+
+bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements) const {
+ APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ return getRepeatedSequence(DemandedElts, Sequence, UndefElements);
+}
+
ConstantSDNode *
BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts,
BitVector *UndefElements) const {
@@ -9878,7 +10074,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
// Returns the SDNode if it is a constant integer BuildVector
// or constant integer.
-SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
+SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) const {
if (isa<ConstantSDNode>(N))
return N.getNode();
if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
@@ -9889,10 +10085,15 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
if (GA->getOpcode() == ISD::GlobalAddress &&
TLI->isOffsetFoldingLegal(GA))
return GA;
+ if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
+ isa<ConstantSDNode>(N.getOperand(0)))
+ return N.getNode();
return nullptr;
}
-SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
+// Returns the SDNode if it is a constant float BuildVector
+// or constant float.
+SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const {
if (isa<ConstantFPSDNode>(N))
return N.getNode();
@@ -9914,13 +10115,14 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
Ops[I].setUser(Node);
Ops[I].setInitial(Vals[I]);
if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
- IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent();
+ IsDivergent |= Ops[I].getNode()->isDivergent();
}
Node->NumOperands = Vals.size();
Node->OperandList = Ops;
- IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
- if (!TLI->isSDNodeAlwaysUniform(Node))
+ if (!TLI->isSDNodeAlwaysUniform(Node)) {
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
Node->SDNodeBits.IsDivergent = IsDivergent;
+ }
checkForCycles(Node);
}
@@ -9937,6 +10139,44 @@ SDValue SelectionDAG::getTokenFactor(const SDLoc &DL,
return getNode(ISD::TokenFactor, DL, MVT::Other, Vals);
}
+SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDNodeFlags Flags) {
+ switch (Opcode) {
+ default:
+ return SDValue();
+ case ISD::ADD:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::UMAX:
+ return getConstant(0, DL, VT);
+ case ISD::MUL:
+ return getConstant(1, DL, VT);
+ case ISD::AND:
+ case ISD::UMIN:
+ return getAllOnesConstant(DL, VT);
+ case ISD::SMAX:
+ return getConstant(APInt::getSignedMinValue(VT.getSizeInBits()), DL, VT);
+ case ISD::SMIN:
+ return getConstant(APInt::getSignedMaxValue(VT.getSizeInBits()), DL, VT);
+ case ISD::FADD:
+ return getConstantFP(-0.0, DL, VT);
+ case ISD::FMUL:
+ return getConstantFP(1.0, DL, VT);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF.
+ const fltSemantics &Semantics = EVTToAPFloatSemantics(VT);
+ APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) :
+ !Flags.hasNoInfs() ? APFloat::getInf(Semantics) :
+ APFloat::getLargest(Semantics);
+ if (Opcode == ISD::FMAXNUM)
+ NeutralAF.changeSign();
+
+ return getConstantFP(NeutralAF, DL, VT);
+ }
+ }
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 3a53ab9717a4..20c7d771bfb6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -96,18 +97,28 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
int64_t PtrDiff;
if (NumBytes0.hasValue() && NumBytes1.hasValue() &&
BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
+ // If the size of memory access is unknown, do not use it to analysis.
+ // One example of unknown size memory access is to load/store scalable
+ // vector objects on the stack.
// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
// following situations arise:
- IsAlias = !(
- // [----BasePtr0----]
- // [---BasePtr1--]
- // ========PtrDiff========>
- (*NumBytes0 <= PtrDiff) ||
- // [----BasePtr0----]
- // [---BasePtr1--]
- // =====(-PtrDiff)====>
- (PtrDiff + *NumBytes1 <= 0)); // i.e. *NumBytes1 < -PtrDiff.
- return true;
+ if (PtrDiff >= 0 &&
+ *NumBytes0 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ IsAlias = !(*NumBytes0 <= PtrDiff);
+ return true;
+ }
+ if (PtrDiff < 0 &&
+ *NumBytes1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ IsAlias = !((PtrDiff + *NumBytes1) <= 0);
+ return true;
+ }
+ return false;
}
// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
// able to calculate their relative offset if at least one arises
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d2930391f87a..a6bd774934ac 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -14,15 +14,12 @@
#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
@@ -40,7 +37,6 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -53,17 +49,14 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -75,13 +68,11 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
@@ -99,31 +90,22 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <algorithm>
-#include <cassert>
#include <cstddef>
-#include <cstdint>
#include <cstring>
#include <iterator>
#include <limits>
#include <numeric>
#include <tuple>
-#include <utility>
-#include <vector>
using namespace llvm;
using namespace PatternMatch;
@@ -422,10 +404,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
- assert((PartEVT.getVectorElementCount().Min >
- ValueVT.getVectorElementCount().Min) &&
- (PartEVT.getVectorElementCount().Scalable ==
- ValueVT.getVectorElementCount().Scalable) &&
+ assert((PartEVT.getVectorElementCount().getKnownMinValue() >
+ ValueVT.getVectorElementCount().getKnownMinValue()) &&
+ (PartEVT.getVectorElementCount().isScalable() ==
+ ValueVT.getVectorElementCount().isScalable()) &&
"Cannot narrow, it would be a lossy transformation");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
DAG.getVectorIdxConstant(0, DL));
@@ -453,7 +435,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// are the same size, this is an obvious bitcast.
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
- } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
+ } else if (ValueVT.bitsLT(PartEVT)) {
// Bitcast Val back the original type and extract the corresponding
// vector we want.
unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
@@ -683,14 +665,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else {
- if (ValueVT.getVectorNumElements() == 1) {
+ if (ValueVT.getVectorElementCount().isScalar()) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getVectorIdxConstant(0, DL));
} else {
- assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
+ uint64_t ValueSize = ValueVT.getFixedSizeInBits();
+ assert(PartVT.getFixedSizeInBits() > ValueSize &&
"lossy conversion of vector to scalar type");
- EVT IntermediateType =
- EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
Val = DAG.getBitcast(IntermediateType, Val);
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
}
@@ -723,15 +705,15 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
"Mixing scalable and fixed vectors when copying in parts");
- ElementCount DestEltCnt;
+ Optional<ElementCount> DestEltCnt;
if (IntermediateVT.isVector())
DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
else
- DestEltCnt = ElementCount(NumIntermediates, false);
+ DestEltCnt = ElementCount::getFixed(NumIntermediates);
EVT BuiltVectorTy = EVT::getVectorVT(
- *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt);
+ *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue());
if (ValueVT != BuiltVectorTy) {
if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
Val = Widened;
@@ -975,7 +957,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
// shouldn't try to apply any sort of splitting logic to them.
assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
"No 1:1 mapping from clobbers to regs?");
- unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
(void)SP;
for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
@@ -998,14 +980,14 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
-SmallVector<std::pair<unsigned, unsigned>, 4>
+SmallVector<std::pair<unsigned, TypeSize>, 4>
RegsForValue::getRegsAndSizes() const {
- SmallVector<std::pair<unsigned, unsigned>, 4> OutVec;
+ SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
unsigned I = 0;
for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
unsigned RegCount = std::get<0>(CountAndVT);
MVT RegisterVT = std::get<1>(CountAndVT);
- unsigned RegisterSize = RegisterVT.getSizeInBits();
+ TypeSize RegisterSize = RegisterVT.getSizeInBits();
for (unsigned E = I + RegCount; I != E; ++I)
OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
}
@@ -1114,25 +1096,6 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
- if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
- // ConstrainedFPIntrinsics handle their own FMF.
- if (!isa<ConstrainedFPIntrinsic>(&I)) {
- // Propagate the fast-math-flags of this IR instruction to the DAG node that
- // maps to this instruction.
- // TODO: We could handle all flags (nsw, etc) here.
- // TODO: If an IR instruction maps to >1 node, only the final node will have
- // flags set.
- if (SDNode *Node = getNodeForIRValue(&I)) {
- SDNodeFlags IncomingFlags;
- IncomingFlags.copyFMF(*FPMO);
- if (!Node->getFlags().isDefined())
- Node->setFlags(IncomingFlags);
- else
- Node->intersectFlagsWith(IncomingFlags);
- }
- }
- }
-
if (!I.isTerminator() && !HasTailCall &&
!isa<GCStatepointInst>(I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
@@ -1178,7 +1141,7 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
if (isMatchingDbgValue(DDI))
salvageUnresolvedDbgValue(DDI);
- DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
+ erase_if(DDIV, isMatchingDbgValue);
}
}
@@ -1551,6 +1514,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
return DAG.getBlockAddress(BA, VT);
+ if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
+ return getValue(Equiv->getGlobalValue());
+
VectorType *VecTy = cast<VectorType>(V->getType());
// Now that we know the number and type of the elements, get that number of
@@ -1671,10 +1637,32 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
}
}
-// For wasm, there's alwyas a single catch pad attached to a catchswitch, and
-// the control flow always stops at the single catch pad, as it does for a
-// cleanup pad. In case the exception caught is not of the types the catch pad
-// catches, it will be rethrown by a rethrow.
+// In wasm EH, even though a catchpad may not catch an exception if a tag does
+// not match, it is OK to add only the first unwind destination catchpad to the
+// successors, because there will be at least one invoke instruction within the
+// catch scope that points to the next unwind destination, if one exists, so
+// CFGSort cannot mess up with BB sorting order.
+// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
+// call within them, and catchpads only consisting of 'catch (...)' have a
+// '__cxa_end_catch' call within them, both of which generate invokes in case
+// the next unwind destination exists, i.e., the next unwind destination is not
+// the caller.)
+//
+// Having at most one EH pad successor is also simpler and helps later
+// transformations.
+//
+// For example,
+// current:
+// invoke void @foo to ... unwind label %catch.dispatch
+// catch.dispatch:
+// %0 = catchswitch within ... [label %catch.start] unwind label %next
+// catch.start:
+// ...
+// ... in this BB or some other child BB dominated by this BB there will be an
+// invoke that points to 'next' BB as an unwind destination
+//
+// next: ; We don't need to add this to 'current' BB's successor
+// ...
static void findWasmUnwindDestinations(
FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
BranchProbability Prob,
@@ -1837,7 +1825,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
for (unsigned i = 0; i != NumValues; ++i) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
- SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
+ SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
+ TypeSize::Fixed(Offsets[i]));
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
@@ -2118,14 +2107,19 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
}
const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ const Value *BOpOp0, *BOpOp1;
// Compute the effective opcode for Cond, taking into account whether it needs
// to be inverted, e.g.
// and (not (or A, B)), C
// gets lowered as
// and (and (not A, not B), C)
- unsigned BOpc = 0;
+ Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
if (BOp) {
- BOpc = BOp->getOpcode();
+ BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::And
+ : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::Or
+ : (Instruction::BinaryOps)0);
if (InvertCond) {
if (BOpc == Instruction::And)
BOpc = Instruction::Or;
@@ -2135,11 +2129,11 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
}
// If this node is not part of the or/and tree, emit it as a branch.
- if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
- BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
- BOp->getParent() != CurBB->getBasicBlock() ||
- !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
- !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ // Note that all nodes in the tree should have same opcode.
+ bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
+ if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOpOp0, CurBB->getBasicBlock()) ||
+ !InBlock(BOpOp1, CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
TProb, FProb, InvertCond);
return;
@@ -2175,15 +2169,15 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewTrueProb = TProb / 2;
auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
- FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb, InvertCond);
+ FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
- FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1], InvertCond);
+ FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
@@ -2208,15 +2202,15 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewTrueProb = TProb + FProb / 2;
auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
- FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb, InvertCond);
+ FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
- FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1], InvertCond);
+ FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
}
}
@@ -2293,16 +2287,20 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// je foo
// cmp D, E
// jle foo
- if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- Instruction::BinaryOps Opcode = BOp->getOpcode();
- Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1);
- if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
- !I.hasMetadata(LLVMContext::MD_unpredictable) &&
- (Opcode == Instruction::And || Opcode == Instruction::Or) &&
- !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
- match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
- FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
- Opcode,
+ const Instruction *BOp = dyn_cast<Instruction>(CondVal);
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
+ BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) {
+ Value *Vec;
+ const Value *BOp0, *BOp1;
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
+ if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::And;
+ else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::Or;
+
+ if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
/*InvertCond=*/false);
@@ -2551,7 +2549,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDLoc dl = getCurSDLoc();
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
- unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
+ Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
@@ -2809,7 +2807,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
break;
- case Intrinsic::wasm_rethrow_in_catch: {
+ case Intrinsic::wasm_rethrow: {
// This is usually done in visitTargetIntrinsic, but this intrinsic is
// special because it can be invoked, so we manually lower it to a DAG
// node here.
@@ -2817,7 +2815,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
Ops.push_back(getRoot()); // inchain
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Ops.push_back(
- DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(),
+ DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
@@ -2999,20 +2997,6 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
-void SelectionDAGBuilder::visitFSub(const User &I) {
- // -0.0 - X --> fneg
- Type *Ty = I.getType();
- if (isa<Constant>(I.getOperand(0)) &&
- I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
- Op2.getValueType(), Op2));
- return;
- }
-
- visitBinary(I, ISD::FSUB);
-}
-
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
@@ -3028,9 +3012,10 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
}
- if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
+ if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
Flags.setExact(ExactOp->isExact());
- }
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -3140,10 +3125,14 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
- auto *FPMO = dyn_cast<FPMathOperator>(&I);
- if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
+ auto *FPMO = cast<FPMathOperator>(&I);
+ if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
+ SDNodeFlags Flags;
+ Flags.copyFMF(*FPMO);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
@@ -3173,6 +3162,11 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
bool IsUnaryAbs = false;
+ bool Negate = false;
+
+ SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
// Min/max matching is only viable if all output VTs are the same.
if (is_splat(ValueVTs)) {
@@ -3233,12 +3227,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
break;
}
break;
+ case SPF_NABS:
+ Negate = true;
+ LLVM_FALLTHROUGH;
case SPF_ABS:
IsUnaryAbs = true;
Opc = ISD::ABS;
break;
- case SPF_NABS:
- // TODO: we need to produce sub(0, abs(X)).
default: break;
}
@@ -3265,10 +3260,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
if (IsUnaryAbs) {
for (unsigned i = 0; i != NumValues; ++i) {
+ SDLoc dl = getCurSDLoc();
+ EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i);
Values[i] =
- DAG.getNode(OpCode, getCurSDLoc(),
- LHSVal.getNode()->getValueType(LHSVal.getResNo() + i),
- SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
+ if (Negate)
+ Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
+ Values[i]);
}
} else {
for (unsigned i = 0; i != NumValues; ++i) {
@@ -3277,7 +3275,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
Values[i] = DAG.getNode(
OpCode, getCurSDLoc(),
- LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops);
+ LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags);
}
}
@@ -3419,7 +3417,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
unsigned SrcAS = SV->getType()->getPointerAddressSpace();
unsigned DestAS = I.getType()->getPointerAddressSpace();
- if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
setValue(&I, N);
@@ -3747,20 +3745,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue N = getValue(Op0);
SDLoc dl = getCurSDLoc();
auto &TLI = DAG.getTargetLoweringInfo();
- MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
- MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
bool IsVectorGEP = I.getType()->isVectorTy();
ElementCount VectorElementCount =
IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
- : ElementCount(0, false);
+ : ElementCount::getFixed(0);
if (IsVectorGEP && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
- if (VectorElementCount.Scalable)
+ if (VectorElementCount.isScalable())
N = DAG.getSplatVector(VT, dl, N);
else
N = DAG.getSplatBuildVector(VT, dl, N);
@@ -3833,7 +3829,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (!IdxN.getValueType().isVector() && IsVectorGEP) {
EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
VectorElementCount);
- if (VectorElementCount.Scalable)
+ if (VectorElementCount.isScalable())
IdxN = DAG.getSplatVector(VT, dl, IdxN);
else
IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
@@ -3874,6 +3870,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
}
+ MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
+ MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
+ if (IsVectorGEP) {
+ PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount);
+ PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount);
+ }
+
if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
@@ -4170,7 +4173,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
Root = Chain;
ChainI = 0;
}
- SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags);
+ SDValue Add =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags);
SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
@@ -4332,12 +4336,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
- IndexType = ISD::SIGNED_SCALED;
+ IndexType = ISD::SIGNED_UNSCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
- Ops, MMO, IndexType);
+ Ops, MMO, IndexType, false);
DAG.setRoot(Scatter);
setValue(&I, Scatter);
}
@@ -4385,7 +4389,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
// Do not serialize masked loads of constant memory with anything.
MemoryLocation ML;
if (VT.isScalableVector())
- ML = MemoryLocation(PtrOperand);
+ ML = MemoryLocation::getAfter(PtrOperand);
else
ML = MemoryLocation(PtrOperand, LocationSize::precise(
DAG.getDataLayout().getTypeStoreSize(I.getType())),
@@ -4443,12 +4447,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
- IndexType = ISD::SIGNED_SCALED;
+ IndexType = ISD::SIGNED_UNSCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
- Ops, MMO, IndexType);
+ Ops, MMO, IndexType, ISD::NON_EXTLOAD);
PendingLoads.push_back(Gather.getValue(1));
setValue(&I, Gather);
@@ -4875,7 +4879,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
@@ -4891,13 +4895,13 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
}
// No special expansion.
- return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags);
}
/// expandLog - Lower a log intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -4990,13 +4994,13 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
}
// No special expansion.
- return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags);
}
/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -5087,13 +5091,13 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
}
// No special expansion.
- return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags);
}
/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -5177,25 +5181,26 @@ static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
}
// No special expansion.
- return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags);
}
/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
return getLimitedPrecisionExp2(Op, dl, DAG);
// No special expansion.
- return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags);
}
/// visitPow - Lower a pow intrinsic. Handles the special sequences for
/// limited-precision mode with x == 10.0f.
static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const TargetLowering &TLI) {
+ SelectionDAG &DAG, const TargetLowering &TLI,
+ SDNodeFlags Flags) {
bool IsExp10 = false;
if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
@@ -5218,7 +5223,7 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
}
// No special expansion.
- return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags);
}
/// ExpandPowI - Expand a llvm.powi intrinsic.
@@ -5343,7 +5348,7 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
// getUnderlyingArgRegs - Find underlying registers used for a truncated,
// bitcasted, or split argument. Returns a list of <Register, size in bits>
static void
-getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
+getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
const SDValue &N) {
switch (N.getOpcode()) {
case ISD::CopyFromReg: {
@@ -5454,7 +5459,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
- SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes;
+ SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
if (!Op && N.getNode()) {
getUnderlyingArgRegs(ArgRegsAndSizes, N);
Register Reg;
@@ -5484,8 +5489,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Op) {
// Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
- auto splitMultiRegDbgValue
- = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
+ auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
+ SplitRegs) {
unsigned Offset = 0;
for (auto RegAndSize : SplitRegs) {
// If the expression is already a fragment, the current register
@@ -5639,6 +5644,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DebugLoc dl = getCurDebugLoc();
SDValue Res;
+ SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
+
switch (Intrinsic) {
default:
// By default, turn this into a target intrinsic node.
@@ -6053,23 +6062,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(1)), DAG));
return;
case Intrinsic::log:
- setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::log2:
- setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I,
+ expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::log10:
- setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I,
+ expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::exp:
- setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::exp2:
- setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I,
+ expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::pow:
setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)), DAG, TLI));
+ getValue(I.getArgOperand(1)), DAG, TLI, Flags));
return;
case Intrinsic::sqrt:
case Intrinsic::fabs:
@@ -6102,7 +6114,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, DAG.getNode(Opcode, sdl,
getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0))));
+ getValue(I.getArgOperand(0)), Flags));
return;
}
case Intrinsic::lround:
@@ -6127,44 +6139,47 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::maxnum:
setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::minimum:
setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::maximum:
setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::fma:
- setValue(&I, DAG.getNode(ISD::FMA, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)),
- getValue(I.getArgOperand(2))));
+ setValue(&I, DAG.getNode(
+ ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)), Flags));
return;
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#include "llvm/IR/VPIntrinsics.def"
+ visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
+ return;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -6173,17 +6188,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
- getValue(I.getArgOperand(2))));
+ getValue(I.getArgOperand(2)), Flags));
} else {
// TODO: Intrinsic calls should have fast-math-flags.
- SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)));
+ SDValue Mul = DAG.getNode(
+ ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags);
SDValue Add = DAG.getNode(ISD::FADD, sdl,
getValue(I.getArgOperand(0)).getValueType(),
- Mul,
- getValue(I.getArgOperand(2)));
+ Mul, getValue(I.getArgOperand(2)), Flags);
setValue(&I, Add);
}
return;
@@ -6201,6 +6214,20 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
getValue(I.getArgOperand(0)))));
return;
+ case Intrinsic::fptosi_sat: {
+ EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32);
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, Type,
+ getValue(I.getArgOperand(0)), SatW));
+ return;
+ }
+ case Intrinsic::fptoui_sat: {
+ EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32);
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, Type,
+ getValue(I.getArgOperand(0)), SatW));
+ return;
+ }
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
@@ -6253,62 +6280,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Y = getValue(I.getArgOperand(1));
SDValue Z = getValue(I.getArgOperand(2));
EVT VT = X.getValueType();
- SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
- SDValue Zero = DAG.getConstant(0, sdl, VT);
- SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
- auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
- if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
- setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
- return;
- }
-
- // When X == Y, this is rotate. If the data type has a power-of-2 size, we
- // avoid the select that is necessary in the general case to filter out
- // the 0-shift possibility that leads to UB.
- if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) {
+ if (X == Y) {
auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
- if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
- setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
- return;
- }
-
- // Some targets only rotate one way. Try the opposite direction.
- RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL;
- if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
- // Negate the shift amount because it is safe to ignore the high bits.
- SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
- setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
- return;
- }
-
- // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
- // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW))
- SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
- SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
- SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
- SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
- setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
- return;
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ } else {
+ auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
+ setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
}
-
- // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
- // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
- SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt);
- SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
- SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
- SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
-
- // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
- // and that is undefined. We must compare and select to avoid UB.
- EVT CCVT = MVT::i1;
- if (VT.isVector())
- CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
-
- // For fshl, 0-shift returns the 1st arg (X).
- // For fshr, 0-shift returns the 2nd arg (Y).
- SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
- setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
return;
}
case Intrinsic::sadd_sat: {
@@ -6335,6 +6314,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
+ case Intrinsic::sshl_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::ushl_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
case Intrinsic::smul_fix:
case Intrinsic::umul_fix:
case Intrinsic::smul_fix_sat:
@@ -6357,6 +6348,36 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Op1, Op2, Op3, DAG, TLI));
return;
}
+ case Intrinsic::smax: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::smin: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::umax: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::umin: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::abs: {
+ // TODO: Preserve "int min is poison" arg in SDAG?
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1));
+ return;
+ }
case Intrinsic::stacksave: {
SDValue Op = getRoot();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -6375,7 +6396,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
// target.
- if (PtrTy.getSizeInBits() < ResTy.getSizeInBits())
+ if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
" intrinsic!");
Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
@@ -6393,7 +6414,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
} else {
EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Global = TLI.getSDagStackGuard(M);
- unsigned Align = DL->getPrefTypeAlignment(Global->getType());
+ Align Align = DL->getPrefTypeAlign(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
MachinePointerInfo(Global, 0), Align,
MachineMemOperand::MOVolatile);
@@ -6424,9 +6445,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
- Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FI),
- /* Alignment = */ 0, MachineMemOperand::MOVolatile);
+ Res = DAG.getStore(
+ Chain, sdl, Src, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ MaybeAlign(), MachineMemOperand::MOVolatile);
setValue(&I, Res);
DAG.setRoot(Res);
return;
@@ -6444,10 +6466,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return;
+
case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::var_annotation:
case Intrinsic::sideeffect:
- // Discard annotate attributes, assumptions, and artificial side-effects.
+ // Discard annotate attributes, noalias scope declarations, assumptions, and
+ // artificial side-effects.
return;
case Intrinsic::codeview_annotation: {
@@ -6508,6 +6533,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, getValue(I.getArgOperand(0)));
return;
+ case Intrinsic::ubsantrap:
case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName =
@@ -6515,12 +6541,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
.getAttribute(AttributeList::FunctionIndex, "trap-func-name")
.getValueAsString();
if (TrapFuncName.empty()) {
- ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
- ISD::TRAP : ISD::DEBUGTRAP;
- DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
+ switch (Intrinsic) {
+ case Intrinsic::trap:
+ DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
+ break;
+ case Intrinsic::debugtrap:
+ DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
+ break;
+ case Intrinsic::ubsantrap:
+ DAG.setRoot(DAG.getNode(
+ ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
+ DAG.getTargetConstant(
+ cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
+ MVT::i32)));
+ break;
+ default: llvm_unreachable("unknown trap intrinsic");
+ }
return;
}
TargetLowering::ArgListTy Args;
+ if (Intrinsic == Intrinsic::ubsantrap) {
+ Args.push_back(TargetLoweringBase::ArgListEntry());
+ Args[0].Val = I.getArgOperand(0);
+ Args[0].Node = getValue(Args[0].Val);
+ Args[0].Ty = Args[0].Val->getType();
+ }
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
@@ -6557,7 +6602,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
EVT OverflowVT = MVT::i1;
if (ResultVT.isVector())
OverflowVT = EVT::getVectorVT(
- *Context, OverflowVT, ResultVT.getVectorNumElements());
+ *Context, OverflowVT, ResultVT.getVectorElementCount());
SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
@@ -6595,7 +6640,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
Value *const ObjectPtr = I.getArgOperand(1);
SmallVector<const Value *, 4> Allocas;
- GetUnderlyingObjects(ObjectPtr, Allocas, *DL);
+ getUnderlyingObjects(ObjectPtr, Allocas);
for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(),
E = Allocas.end(); Object != E; ++Object) {
@@ -6622,6 +6667,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
return;
}
+ case Intrinsic::pseudoprobe: {
+ auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
+ auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr);
+ DAG.setRoot(Res);
+ return;
+ }
case Intrinsic::invariant_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
@@ -6732,7 +6785,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// specific calling convention, and only for x86_64.
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
- if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ if (Triple.getArch() != Triple::x86_64)
return;
SDLoc DL = getCurSDLoc();
@@ -6763,7 +6816,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// specific calling convention, and only for x86_64.
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
- if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ if (Triple.getArch() != Triple::x86_64)
return;
SDLoc DL = getCurSDLoc();
@@ -6797,19 +6850,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
LowerDeoptimizeCall(&I);
return;
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
return;
@@ -6897,36 +6950,57 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::get_active_lane_mask: {
auto DL = getCurSDLoc();
SDValue Index = getValue(I.getOperand(0));
- SDValue BTC = getValue(I.getOperand(1));
+ SDValue TripCount = getValue(I.getOperand(1));
Type *ElementTy = I.getOperand(0)->getType();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned VecWidth = VT.getVectorNumElements();
- SmallVector<SDValue, 16> OpsBTC;
+ SmallVector<SDValue, 16> OpsTripCount;
SmallVector<SDValue, 16> OpsIndex;
SmallVector<SDValue, 16> OpsStepConstants;
for (unsigned i = 0; i < VecWidth; i++) {
- OpsBTC.push_back(BTC);
+ OpsTripCount.push_back(TripCount);
OpsIndex.push_back(Index);
- OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy)));
+ OpsStepConstants.push_back(
+ DAG.getConstant(i, DL, EVT::getEVT(ElementTy)));
}
- EVT CCVT = MVT::i1;
- CCVT = EVT::getVectorVT(I.getContext(), CCVT, VecWidth);
+ EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth);
- auto VecTy = MVT::getVT(FixedVectorType::get(ElementTy, VecWidth));
+ auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth));
SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex);
SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
SDValue VectorInduction = DAG.getNode(
ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
- SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC);
+ SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
- VectorBTC, ISD::CondCode::SETULE);
+ VectorTripCount, ISD::CondCode::SETULT);
setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
SetCC));
return;
}
+ case Intrinsic::experimental_vector_insert: {
+ auto DL = getCurSDLoc();
+
+ SDValue Vec = getValue(I.getOperand(0));
+ SDValue SubVec = getValue(I.getOperand(1));
+ SDValue Index = getValue(I.getOperand(2));
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
+ Index));
+ return;
+ }
+ case Intrinsic::experimental_vector_extract: {
+ auto DL = getCurSDLoc();
+
+ SDValue Vec = getValue(I.getOperand(0));
+ SDValue Index = getValue(I.getOperand(1));
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
+ return;
+ }
}
}
@@ -7042,6 +7116,41 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
setValue(&FPI, FPResult);
}
+static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
+ Optional<unsigned> ResOPC;
+ switch (VPIntrin.getIntrinsicID()) {
+#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN:
+#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID;
+#define END_REGISTER_VP_INTRINSIC(...) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+
+ if (!ResOPC.hasValue())
+ llvm_unreachable(
+ "Inconsistency: no SDNode available for this VPIntrinsic!");
+
+ return ResOPC.getValue();
+}
+
+void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
+ const VPIntrinsic &VPIntrin) {
+ unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
+
+ SmallVector<EVT, 4> ValueVTs;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ // Request operands.
+ SmallVector<SDValue, 7> OpValues;
+ for (int i = 0; i < (int)VPIntrin.getNumArgOperands(); ++i)
+ OpValues.push_back(getValue(VPIntrin.getArgOperand(i)));
+
+ SDLoc DL = getCurSDLoc();
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ setValue(&VPIntrin, Result);
+}
+
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
@@ -7258,9 +7367,9 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
}
SDValue Ptr = Builder.getValue(PtrVal);
- SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root,
- Ptr, MachinePointerInfo(PtrVal),
- /* Alignment = */ 1);
+ SDValue LoadVal =
+ Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr,
+ MachinePointerInfo(PtrVal), Align(1));
if (!ConstantMemory)
Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -7281,12 +7390,12 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
setValue(&I, Value);
}
-/// See if we can lower a memcmp call into an optimized form. If so, return
+/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
-bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
@@ -7537,8 +7646,12 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
if (!I.onlyReadsMemory())
return false;
+ SDNodeFlags Flags;
+ Flags.copyFMF(cast<FPMathOperator>(I));
+
SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp));
+ setValue(&I,
+ DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags));
return true;
}
@@ -7553,10 +7666,13 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
if (!I.onlyReadsMemory())
return false;
+ SDNodeFlags Flags;
+ Flags.copyFMF(cast<FPMathOperator>(I));
+
SDValue Tmp0 = getValue(I.getArgOperand(0));
SDValue Tmp1 = getValue(I.getArgOperand(1));
EVT VT = Tmp0.getValueType();
- setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1));
+ setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags));
return true;
}
@@ -7590,6 +7706,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
+ case LibFunc_bcmp:
+ if (visitMemCmpBCmpCall(I))
+ return;
+ break;
case LibFunc_copysign:
case LibFunc_copysignf:
case LibFunc_copysignl:
@@ -7691,7 +7811,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
break;
case LibFunc_memcmp:
- if (visitMemCmpCall(I))
+ if (visitMemCmpBCmpCall(I))
return;
break;
case LibFunc_mempcpy:
@@ -8111,10 +8231,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
- OpInfo.ConstraintVT =
- OpInfo
- .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
- .getSimpleVT();
+ EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
+ DAG.getDataLayout());
+ OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
@@ -8376,7 +8495,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
AsmNodeOperands.push_back(DAG.getTargetConstant(
ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
- AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ llvm::append_range(AsmNodeOperands, Ops);
break;
}
@@ -8956,57 +9075,59 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
SDLoc dl = getCurSDLoc();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Res;
- FastMathFlags FMF;
- if (isa<FPMathOperator>(I))
- FMF = I.getFastMathFlags();
+ SDNodeFlags SDFlags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
+ SDFlags.copyFMF(*FPMO);
switch (Intrinsic) {
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- if (FMF.allowReassoc())
+ case Intrinsic::vector_reduce_fadd:
+ if (SDFlags.hasAllowReassociation())
Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
- DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2));
+ DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags),
+ SDFlags);
else
- Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
+ Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags);
break;
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- if (FMF.allowReassoc())
+ case Intrinsic::vector_reduce_fmul:
+ if (SDFlags.hasAllowReassociation())
Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
- DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2));
+ DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags),
+ SDFlags);
else
- Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
+ Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags);
break;
- case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::vector_reduce_add:
Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::vector_reduce_mul:
Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::vector_reduce_and:
Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::vector_reduce_or:
Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::vector_reduce_xor:
Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::vector_reduce_smax:
Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::vector_reduce_smin:
Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::vector_reduce_umax:
Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::vector_reduce_umin:
Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_fmax:
- Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1);
+ case Intrinsic::vector_reduce_fmax:
+ Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
break;
- case Intrinsic::experimental_vector_reduce_fmin:
- Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1);
+ case Intrinsic::vector_reduce_fmin:
+ Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
@@ -9093,6 +9214,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.IsSRet = true;
Entry.IsNest = false;
Entry.IsByVal = false;
+ Entry.IsByRef = false;
Entry.IsReturned = false;
Entry.IsSwiftSelf = false;
Entry.IsSwiftError = false;
@@ -9213,6 +9335,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
+ if (Args[i].IsByRef)
+ Flags.setByRef();
if (Args[i].IsPreallocated) {
Flags.setPreallocated();
// Set the byval flag for CCAssignFn callbacks that don't know about
@@ -9418,11 +9542,33 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
return std::make_pair(Res, CLI.Chain);
}
+/// Places new result values for the node in Results (their number
+/// and types must exactly match those of the original return values of
+/// the node), or leaves Results empty, which indicates that the node is not
+/// to be custom lowered after all.
void TargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
- if (SDValue Res = LowerOperation(SDValue(N, 0), DAG))
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+
+ if (!Res.getNode())
+ return;
+
+ // If the original node has one result, take the return value from
+ // LowerOperation as is. It might not be result number 0.
+ if (N->getNumValues() == 1) {
Results.push_back(Res);
+ return;
+ }
+
+ // If the original node has multiple results, then the return node should
+ // have the same number of results.
+ assert((N->getNumValues() == Res->getNumValues()) &&
+ "Lowering returned the wrong number of results!");
+
+ // Places new result values base on N result number.
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ Results.push_back(Res.getValue(I));
}
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9514,8 +9660,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
// We will look through cast uses, so ignore them completely.
if (I.isCast())
continue;
- // Ignore debug info intrinsics, they don't escape or store to allocas.
- if (isa<DbgInfoIntrinsic>(I))
+ // Ignore debug info and pseudo op intrinsics, they don't escape or store
+ // to allocas.
+ if (I.isDebugOrPseudoInst())
continue;
// This is an unknown instruction. Assume it escapes or writes to all
// static alloca operands.
@@ -9545,7 +9692,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
// initializes the alloca. Don't elide copies from the same argument twice.
const Value *Val = SI->getValueOperand()->stripPointerCasts();
const auto *Arg = dyn_cast<Argument>(Val);
- if (!Arg || Arg->hasPassPointeeByValueAttr() ||
+ if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
Arg->getType()->isEmptyTy() ||
DL.getTypeStoreSize(Arg->getType()) !=
DL.getTypeAllocSize(AI->getAllocatedType()) ||
@@ -9726,6 +9873,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setSwiftError();
if (Arg.hasAttribute(Attribute::ByVal))
Flags.setByVal();
+ if (Arg.hasAttribute(Attribute::ByRef))
+ Flags.setByRef();
if (Arg.hasAttribute(Attribute::InAlloca)) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
@@ -9744,27 +9893,31 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
- if (F.getCallingConv() == CallingConv::X86_INTR) {
- // IA Interrupt passes frame (1st parameter) by value in the stack.
- if (ArgNo == 0)
- Flags.setByVal();
- }
- if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
- Type *ElementTy = Arg.getParamByValType();
- // For ByVal, size and alignment should be passed from FE. BE will
- // guess if this info is not there but there are cases it cannot get
- // right.
- unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType());
- Flags.setByValSize(FrameSize);
+ Type *ArgMemTy = nullptr;
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
+ Flags.isByRef()) {
+ if (!ArgMemTy)
+ ArgMemTy = Arg.getPointeeInMemoryValueType();
- unsigned FrameAlign;
- if (Arg.getParamAlignment())
- FrameAlign = Arg.getParamAlignment();
- else
- FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(Align(FrameAlign));
+ uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy);
+
+ // For in-memory arguments, size and alignment should be passed from FE.
+ // BE will guess if this info is not there but there are cases it cannot
+ // get right.
+ MaybeAlign MemAlign = Arg.getParamAlign();
+ if (!MemAlign)
+ MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
+
+ if (Flags.isByRef()) {
+ Flags.setByRefSize(MemSize);
+ Flags.setByRefAlign(*MemAlign);
+ } else {
+ Flags.setByValSize(MemSize);
+ Flags.setByValAlign(*MemAlign);
+ }
}
+
if (Arg.hasAttribute(Attribute::Nest))
Flags.setNest();
if (NeedsRegBlock)
@@ -10641,8 +10794,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
{PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
- SwitchWorkListItem W = WorkList.back();
- WorkList.pop_back();
+ SwitchWorkListItem W = WorkList.pop_back_val();
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f0b7fb0d5229..8f6e98c40161 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,7 +18,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
@@ -26,7 +25,6 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Statepoint.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
@@ -39,6 +37,7 @@
namespace llvm {
+class AAResults;
class AllocaInst;
class AtomicCmpXchgInst;
class AtomicRMWInst;
@@ -63,6 +62,7 @@ class FunctionLoweringInfo;
class GCFunctionInfo;
class GCRelocateInst;
class GCResultInst;
+class GCStatepointInst;
class IndirectBrInst;
class InvokeInst;
class LandingPadInst;
@@ -388,7 +388,7 @@ public:
SelectionDAG &DAG;
const DataLayout *DL = nullptr;
- AliasAnalysis *AA = nullptr;
+ AAResults *AA = nullptr;
const TargetLibraryInfo *LibInfo;
class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
@@ -442,7 +442,7 @@ public:
SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
SwiftError(swifterror) {}
- void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
+ void init(GCFunctionInfo *gfi, AAResults *AA,
const TargetLibraryInfo *li);
/// Clear out the current SelectionDAG and the associated state and prepare
@@ -518,13 +518,6 @@ public:
SDValue getValue(const Value *V);
- /// Return the SDNode for the specified IR value if it exists.
- SDNode *getNodeForIRValue(const Value *V) {
- if (NodeMap.find(V) == NodeMap.end())
- return nullptr;
- return NodeMap[V].getNode();
- }
-
SDValue getNonRegisterValue(const Value *V);
SDValue getValueImpl(const Value *V);
@@ -692,7 +685,7 @@ private:
void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
- void visitFSub(const User &I);
+ void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); }
void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
@@ -747,7 +740,7 @@ private:
void visitFence(const FenceInst &I);
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
- bool visitMemCmpCall(const CallInst &I);
+ bool visitMemCmpBCmpCall(const CallInst &I);
bool visitMemPCpyCall(const CallInst &I);
bool visitMemChrCall(const CallInst &I);
bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
@@ -766,6 +759,7 @@ private:
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+ void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
void visitVAArg(const VAArgInst &I);
@@ -902,7 +896,7 @@ struct RegsForValue {
}
/// Return a list of registers and their sizes.
- SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const;
+ SmallVector<std::pair<unsigned, TypeSize>, 4> getRegsAndSizes() const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 42e3016e65b8..d867f3e09e9c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -293,6 +293,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
case ISD::ADDCARRY: return "addcarry";
+ case ISD::SADDO_CARRY: return "saddo_carry";
case ISD::SADDO: return "saddo";
case ISD::UADDO: return "uaddo";
case ISD::SSUBO: return "ssubo";
@@ -302,6 +303,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SUBC: return "subc";
case ISD::SUBE: return "sube";
case ISD::SUBCARRY: return "subcarry";
+ case ISD::SSUBO_CARRY: return "ssubo_carry";
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
@@ -310,6 +312,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UADDSAT: return "uaddsat";
case ISD::SSUBSAT: return "ssubsat";
case ISD::USUBSAT: return "usubsat";
+ case ISD::SSHLSAT: return "sshlsat";
+ case ISD::USHLSAT: return "ushlsat";
case ISD::SMULFIX: return "smulfix";
case ISD::SMULFIXSAT: return "smulfixsat";
@@ -344,6 +348,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint";
+ case ISD::FP_TO_SINT_SAT: return "fp_to_sint_sat";
+ case ISD::FP_TO_UINT_SAT: return "fp_to_uint_sat";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
@@ -390,8 +396,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STACKRESTORE: return "stackrestore";
case ISD::TRAP: return "trap";
case ISD::DEBUGTRAP: return "debugtrap";
+ case ISD::UBSANTRAP: return "ubsantrap";
case ISD::LIFETIME_START: return "lifetime.start";
case ISD::LIFETIME_END: return "lifetime.end";
+ case ISD::PSEUDO_PROBE:
+ return "pseudoprobe";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
case ISD::GC_TRANSITION_END: return "gc_transition.end";
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
@@ -410,6 +419,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
case ISD::CTLZ: return "ctlz";
case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+ case ISD::PARITY: return "parity";
// Trampolines
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
@@ -447,9 +457,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETFALSE2: return "setfalse2";
}
case ISD::VECREDUCE_FADD: return "vecreduce_fadd";
- case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd";
+ case ISD::VECREDUCE_SEQ_FADD: return "vecreduce_seq_fadd";
case ISD::VECREDUCE_FMUL: return "vecreduce_fmul";
- case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul";
+ case ISD::VECREDUCE_SEQ_FMUL: return "vecreduce_seq_fmul";
case ISD::VECREDUCE_ADD: return "vecreduce_add";
case ISD::VECREDUCE_MUL: return "vecreduce_mul";
case ISD::VECREDUCE_AND: return "vecreduce_and";
@@ -461,6 +471,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
+
+ // Vector Predication
+#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
+ case ISD::SDID: \
+ return #NAME;
+#include "llvm/IR/VPIntrinsics.def"
}
}
@@ -730,7 +746,38 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ", compressing";
OS << ">";
- } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ } else if (const auto *MGather = dyn_cast<MaskedGatherSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MGather->getMemOperand(), G);
+
+ bool doExt = true;
+ switch (MGather->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << MGather->getMemoryVT().getEVTString();
+
+ auto Signed = MGather->isIndexSigned() ? "signed" : "unsigned";
+ auto Scaled = MGather->isIndexScaled() ? "scaled" : "unscaled";
+ OS << ", " << Signed << " " << Scaled << " offset";
+
+ OS << ">";
+ } else if (const auto *MScatter = dyn_cast<MaskedScatterSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MScatter->getMemOperand(), G);
+
+ if (MScatter->isTruncatingStore())
+ OS << ", trunc to " << MScatter->getMemoryVT().getEVTString();
+
+ auto Signed = MScatter->isIndexSigned() ? "signed" : "unsigned";
+ auto Scaled = MScatter->isIndexScaled() ? "scaled" : "unscaled";
+ OS << ", " << Signed << " " << Scaled << " offset";
+
+ OS << ">";
+ } else if (const MemSDNode *M = dyn_cast<MemSDNode>(this)) {
OS << "<";
printMemOperand(OS, *M->getMemOperand(), G);
OS << ">";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1f0432196a2d..d17dd1c5eccb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -75,6 +75,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -778,6 +779,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
if (ViewDAGCombine1 && MatchFilterBB)
CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -788,16 +794,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
if (ViewLegalizeTypesDAGs && MatchFilterBB)
@@ -810,16 +816,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
// Only allow creation of legal node types.
CurDAG->NewNodesMustHaveLegalTypes = true;
@@ -834,15 +840,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
}
{
@@ -857,6 +863,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
{
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
GroupDescription, TimePassesIsEnabled);
@@ -868,6 +879,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -898,16 +914,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
if (ViewDAGCombine2 && MatchFilterBB)
CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -918,16 +934,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
@@ -1251,6 +1267,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
+ // If the unwinder does not preserve all registers, ensure that the
+ // function marks the clobbered registers as used.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+ MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
if (Pers == EHPersonality::Wasm_CXX) {
if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI()))
mapWasmLandingPadIndex(MBB, CPI);
@@ -1669,9 +1691,9 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
/// terminator, but additionally the copies that move the vregs into the
/// physical registers.
static MachineBasicBlock::iterator
-FindSplitPointForStackProtector(MachineBasicBlock *BB) {
+FindSplitPointForStackProtector(MachineBasicBlock *BB,
+ const TargetInstrInfo &TII) {
MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
- //
if (SplitPoint == BB->begin())
return SplitPoint;
@@ -1679,6 +1701,31 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) {
MachineBasicBlock::iterator Previous = SplitPoint;
--Previous;
+ if (TII.isTailCall(*SplitPoint) &&
+ Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // call itself, then we must insert before the sequence even starts. For
+ // example:
+ // <split point>
+ // ADJCALLSTACKDOWN ...
+ // <Moves>
+ // ADJCALLSTACKUP ...
+ // TAILJMP somewhere
+ // On the other hand, it could be an unrelated call in which case this tail call
+ // has to register moves of its own and should be the split point. For example:
+ // ADJCALLSTACKDOWN
+ // CALL something_else
+ // ADJCALLSTACKUP
+ // <split point>
+ // TAILJMP somewhere
+ do {
+ --Previous;
+ if (Previous->isCall())
+ return SplitPoint;
+ } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
+
+ return Previous;
+ }
+
while (MIIsInTerminatorSequence(*Previous)) {
SplitPoint = Previous;
if (Previous == Start)
@@ -1718,7 +1765,7 @@ SelectionDAGISel::FinishBasicBlock() {
// Add load and check to the basicblock.
FuncInfo->MBB = ParentMBB;
FuncInfo->InsertPt =
- FindSplitPointForStackProtector(ParentMBB);
+ FindSplitPointForStackProtector(ParentMBB, *TII);
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
@@ -1737,7 +1784,7 @@ SelectionDAGISel::FinishBasicBlock() {
// register allocation issues caused by us splitting the parent mbb. The
// register allocator will clean up said virtual copies later on.
MachineBasicBlock::iterator SplitPoint =
- FindSplitPointForStackProtector(ParentMBB);
+ FindSplitPointForStackProtector(ParentMBB, *TII);
// Splice the terminator of ParentMBB into SuccessMBB.
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
@@ -2072,7 +2119,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
- Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ llvm::append_range(Ops, SelOps);
i += 2;
}
}
@@ -2272,7 +2319,7 @@ void SelectionDAGISel::Select_FREEZE(SDNode *N) {
}
/// GetVBR - decode a vbr encoding whose top bit is set.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
assert(Val >= 128 && "Not a VBR");
Val &= 127; // Remove first vbr bit.
@@ -2331,7 +2378,7 @@ void SelectionDAGISel::UpdateChains(
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode != NodeToMatch && ChainNode->use_empty() &&
- !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
+ !llvm::is_contained(NowDeadNodes, ChainNode))
NowDeadNodes.push_back(ChainNode);
}
}
@@ -2469,10 +2516,9 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
}
/// CheckSame - Implements OP_CheckSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
-CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes) {
// Accept if it is exactly the same as a previously recorded node.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
@@ -2480,11 +2526,10 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckChildSame - Implements OP_CheckChildXSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
-CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes,
- unsigned ChildNo) {
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame(
+ const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes,
+ unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
return false; // Match fails if out of range child #.
return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
@@ -2492,20 +2537,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -2513,7 +2558,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N->getOpcode() == Opc;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2523,7 +2568,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL,
unsigned ChildNo) {
@@ -2533,14 +2578,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
(ISD::CondCode)MatcherTable[MatcherIndex++];
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
if (2 >= N.getNumOperands())
@@ -2548,7 +2593,7 @@ CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2));
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2559,7 +2604,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2570,7 +2615,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && C->getSExtValue() == Val;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
@@ -2578,7 +2623,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2591,9 +2636,9 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
-CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const SelectionDAGISel &SDISel) {
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
@@ -2786,6 +2831,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::ANNOTATION_LABEL:
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
+ case ISD::PSEUDO_PROBE:
NodeToMatch->setNodeId(-1); // Mark selected.
return;
case ISD::AssertSext:
@@ -3181,10 +3227,12 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
continue;
case OPC_CheckImmAllOnesV:
- if (!ISD::isBuildVectorAllOnes(N.getNode())) break;
+ if (!ISD::isConstantSplatVectorAllOnes(N.getNode()))
+ break;
continue;
case OPC_CheckImmAllZerosV:
- if (!ISD::isBuildVectorAllZeros(N.getNode())) break;
+ if (!ISD::isConstantSplatVectorAllZeros(N.getNode()))
+ break;
continue;
case OPC_CheckFoldableChainNode: {
@@ -3489,7 +3537,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
auto &Chain = ChainNodesMatched;
assert((!E || !is_contained(Chain, N)) &&
"Chain node replaced during MorphNode");
- Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
+ llvm::erase_value(Chain, N);
});
Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList,
Ops, EmitNodeInfo));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 2cb57c1d1ccc..0172646c22ec 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -14,12 +14,10 @@
#include "StatepointLowering.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -30,7 +28,6 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -67,6 +64,20 @@ cl::opt<bool> UseRegistersForDeoptValues(
"use-registers-for-deopt-values", cl::Hidden, cl::init(false),
cl::desc("Allow using registers for non pointer deopt args"));
+cl::opt<bool> UseRegistersForGCPointersInLandingPad(
+ "use-registers-for-gc-values-in-landing-pad", cl::Hidden, cl::init(false),
+ cl::desc("Allow using registers for gc pointer in landing pad"));
+
+cl::opt<unsigned> MaxRegistersForGCPointers(
+ "max-registers-for-gc-values", cl::Hidden, cl::init(0),
+ cl::desc("Max number of VRegs allowed to pass GC pointer meta args in"));
+
+cl::opt<bool> AlwaysSpillBase("statepoint-always-spill-base", cl::Hidden,
+ cl::init(true),
+ cl::desc("Force spilling of base GC pointers"));
+
+typedef FunctionLoweringInfo::StatepointRelocationRecord RecordType;
+
static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
SelectionDAGBuilder &Builder, uint64_t Value) {
SDLoc L = Builder.getCurSDLoc();
@@ -156,14 +167,18 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
// Spill location is known for gc relocates
if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
- const auto &SpillMap =
- Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()];
+ const auto &RelocationMap =
+ Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()];
- auto It = SpillMap.find(Relocate->getDerivedPtr());
- if (It == SpillMap.end())
+ auto It = RelocationMap.find(Relocate->getDerivedPtr());
+ if (It == RelocationMap.end())
return None;
- return It->second;
+ auto &Record = It->second;
+ if (Record.type != RecordType::Spill)
+ return None;
+
+ return Record.payload.FI;
}
// Look through bitcast instructions.
@@ -221,7 +236,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
return None;
}
-
/// Return true if-and-only-if the given SDValue can be lowered as either a
/// constant argument or a stack reference. The key point is that the value
/// doesn't need to be spilled or tracked as a vreg use.
@@ -242,7 +256,6 @@ static bool willLowerDirectly(SDValue Incoming) {
Incoming.isUndef());
}
-
/// Try to find existing copies of the incoming values in stack slots used for
/// statepoint spilling. If we can find a spill slot for the incoming value,
/// mark that slot as allocated, and reuse the same slot for this safepoint.
@@ -388,7 +401,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
StoreMMO);
MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
-
+
Builder.StatepointLowering.setLocation(Incoming, Loc);
}
@@ -485,7 +498,10 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
/// will be set to the last value spilled (if any were).
static void
lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
- SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SmallVectorImpl<MachineMemOperand *> &MemRefs,
+ SmallVectorImpl<SDValue> &GCPtrs,
+ DenseMap<SDValue, int> &LowerAsVReg,
+ SelectionDAGBuilder::StatepointLoweringInfo &SI,
SelectionDAGBuilder &Builder) {
// Lower the deopt and gc arguments for this statepoint. Layout will be:
// deopt argument length, deopt arguments.., gc arguments...
@@ -531,6 +547,66 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
const bool LiveInDeopt =
SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
+ // Decide which deriver pointers will go on VRegs
+ unsigned MaxVRegPtrs = MaxRegistersForGCPointers.getValue();
+
+ // Pointers used on exceptional path of invoke statepoint.
+ // We cannot assing them to VRegs.
+ SmallSet<SDValue, 8> LPadPointers;
+ if (!UseRegistersForGCPointersInLandingPad)
+ if (auto *StInvoke = dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) {
+ LandingPadInst *LPI = StInvoke->getLandingPadInst();
+ for (auto *Relocate : SI.GCRelocates)
+ if (Relocate->getOperand(0) == LPI) {
+ LPadPointers.insert(Builder.getValue(Relocate->getBasePtr()));
+ LPadPointers.insert(Builder.getValue(Relocate->getDerivedPtr()));
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Deciding how to lower GC Pointers:\n");
+
+ // List of unique lowered GC Pointer values.
+ SmallSetVector<SDValue, 16> LoweredGCPtrs;
+ // Map lowered GC Pointer value to the index in above vector
+ DenseMap<SDValue, unsigned> GCPtrIndexMap;
+
+ unsigned CurNumVRegs = 0;
+
+ auto canPassGCPtrOnVReg = [&](SDValue SD) {
+ if (SD.getValueType().isVector())
+ return false;
+ if (LPadPointers.count(SD))
+ return false;
+ return !willLowerDirectly(SD);
+ };
+
+ auto processGCPtr = [&](const Value *V) {
+ SDValue PtrSD = Builder.getValue(V);
+ if (!LoweredGCPtrs.insert(PtrSD))
+ return; // skip duplicates
+ GCPtrIndexMap[PtrSD] = LoweredGCPtrs.size() - 1;
+
+ assert(!LowerAsVReg.count(PtrSD) && "must not have been seen");
+ if (LowerAsVReg.size() == MaxVRegPtrs)
+ return;
+ assert(V->getType()->isVectorTy() == PtrSD.getValueType().isVector() &&
+ "IR and SD types disagree");
+ if (!canPassGCPtrOnVReg(PtrSD)) {
+ LLVM_DEBUG(dbgs() << "direct/spill "; PtrSD.dump(&Builder.DAG));
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "vreg "; PtrSD.dump(&Builder.DAG));
+ LowerAsVReg[PtrSD] = CurNumVRegs++;
+ };
+
+ // Process derived pointers first to give them more chance to go on VReg.
+ for (const Value *V : SI.Ptrs)
+ processGCPtr(V);
+ for (const Value *V : SI.Bases)
+ processGCPtr(V);
+
+ LLVM_DEBUG(dbgs() << LowerAsVReg.size() << " pointers will go in vregs\n");
+
auto isGCValue = [&](const Value *V) {
auto *Ty = V->getType();
if (!Ty->isPtrOrPtrVectorTy())
@@ -542,7 +618,9 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
};
auto requireSpillSlot = [&](const Value *V) {
- return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V);
+ if (isGCValue(V))
+ return !LowerAsVReg.count(Builder.getValue(V));
+ return !(LiveInDeopt || UseRegistersForDeoptValues);
};
// Before we actually start lowering (and allocating spill slots for values),
@@ -554,9 +632,17 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
if (requireSpillSlot(V))
reservePreviousStackSlotForValue(V, Builder);
}
- for (unsigned i = 0; i < SI.Bases.size(); ++i) {
- reservePreviousStackSlotForValue(SI.Bases[i], Builder);
- reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
+
+ for (const Value *V : SI.Ptrs) {
+ SDValue SDV = Builder.getValue(V);
+ if (!LowerAsVReg.count(SDV))
+ reservePreviousStackSlotForValue(V, Builder);
+ }
+
+ for (const Value *V : SI.Bases) {
+ SDValue SDV = Builder.getValue(V);
+ if (!LowerAsVReg.count(SDV))
+ reservePreviousStackSlotForValue(V, Builder);
}
// First, prefix the list with the number of unique values to be
@@ -567,6 +653,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// The vm state arguments are lowered in an opaque manner. We do not know
// what type of values are contained within.
+ LLVM_DEBUG(dbgs() << "Lowering deopt state\n");
for (const Value *V : SI.DeoptState) {
SDValue Incoming;
// If this is a function argument at a static frame index, generate it as
@@ -578,78 +665,56 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
}
if (!Incoming.getNode())
Incoming = Builder.getValue(V);
+ LLVM_DEBUG(dbgs() << "Value " << *V
+ << " requireSpillSlot = " << requireSpillSlot(V) << "\n");
lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs,
Builder);
}
- // Finally, go ahead and lower all the gc arguments. There's no prefixed
- // length for this one. After lowering, we'll have the base and pointer
- // arrays interwoven with each (lowered) base pointer immediately followed by
- // it's (lowered) derived pointer. i.e
- // (base[0], ptr[0], base[1], ptr[1], ...)
- for (unsigned i = 0; i < SI.Bases.size(); ++i) {
- const Value *Base = SI.Bases[i];
- lowerIncomingStatepointValue(Builder.getValue(Base),
- /*RequireSpillSlot*/ true, Ops, MemRefs,
+ // Finally, go ahead and lower all the gc arguments.
+ pushStackMapConstant(Ops, Builder, LoweredGCPtrs.size());
+ for (SDValue SDV : LoweredGCPtrs)
+ lowerIncomingStatepointValue(SDV, !LowerAsVReg.count(SDV), Ops, MemRefs,
Builder);
- const Value *Ptr = SI.Ptrs[i];
- lowerIncomingStatepointValue(Builder.getValue(Ptr),
- /*RequireSpillSlot*/ true, Ops, MemRefs,
- Builder);
- }
+ // Copy to out vector. LoweredGCPtrs will be empty after this point.
+ GCPtrs = LoweredGCPtrs.takeVector();
// If there are any explicit spill slots passed to the statepoint, record
// them, but otherwise do not do anything special. These are user provided
// allocas and give control over placement to the consumer. In this case,
// it is the contents of the slot which may get updated, not the pointer to
// the alloca
+ SmallVector<SDValue, 4> Allocas;
for (Value *V : SI.GCArgs) {
SDValue Incoming = Builder.getValue(V);
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
// This handles allocas as arguments to the statepoint
assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
"Incoming value is a frame index!");
- Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
- Builder.getFrameIndexTy()));
+ Allocas.push_back(Builder.DAG.getTargetFrameIndex(
+ FI->getIndex(), Builder.getFrameIndexTy()));
auto &MF = Builder.DAG.getMachineFunction();
auto *MMO = getMachineMemOperand(MF, *FI);
MemRefs.push_back(MMO);
}
}
+ pushStackMapConstant(Ops, Builder, Allocas.size());
+ Ops.append(Allocas.begin(), Allocas.end());
- // Record computed locations for all lowered values.
- // This can not be embedded in lowering loops as we need to record *all*
- // values, while previous loops account only values with unique SDValues.
- const Instruction *StatepointInstr = SI.StatepointInstr;
- auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr];
-
- for (const GCRelocateInst *Relocate : SI.GCRelocates) {
- const Value *V = Relocate->getDerivedPtr();
- SDValue SDV = Builder.getValue(V);
- SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
-
- if (Loc.getNode()) {
- SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
- } else {
- // Record value as visited, but not spilled. This is case for allocas
- // and constants. For this values we can avoid emitting spill load while
- // visiting corresponding gc_relocate.
- // Actually we do not need to record them in this map at all.
- // We do this only to check that we are not relocating any unvisited
- // value.
- SpillMap[V] = None;
-
- // Default llvm mechanisms for exporting values which are used in
- // different basic blocks does not work for gc relocates.
- // Note that it would be incorrect to teach llvm that all relocates are
- // uses of the corresponding values so that it would automatically
- // export them. Relocates of the spilled values does not use original
- // value.
- if (Relocate->getParent() != StatepointInstr->getParent())
- Builder.ExportFromCurrentBlock(V);
- }
+ // Now construct GC base/derived map;
+ pushStackMapConstant(Ops, Builder, SI.Ptrs.size());
+ SDLoc L = Builder.getCurSDLoc();
+ for (unsigned i = 0; i < SI.Ptrs.size(); ++i) {
+ SDValue Base = Builder.getValue(SI.Bases[i]);
+ assert(GCPtrIndexMap.count(Base) && "base not found in index map");
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(GCPtrIndexMap[Base], L, MVT::i64));
+ SDValue Derived = Builder.getValue(SI.Ptrs[i]);
+ assert(GCPtrIndexMap.count(Derived) && "derived not found in index map");
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(GCPtrIndexMap[Derived], L, MVT::i64));
}
}
@@ -665,6 +730,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
assert(SI.Bases.size() == SI.Ptrs.size() &&
SI.Ptrs.size() <= SI.GCRelocates.size());
+ LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
#ifndef NDEBUG
for (auto *Reloc : SI.GCRelocates)
if (Reloc->getParent() == SI.StatepointInstr->getParent())
@@ -672,9 +738,16 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
#endif
// Lower statepoint vmstate and gcstate arguments
+
+ // All lowered meta args.
SmallVector<SDValue, 10> LoweredMetaArgs;
+ // Lowered GC pointers (subset of above).
+ SmallVector<SDValue, 16> LoweredGCArgs;
SmallVector<MachineMemOperand*, 16> MemRefs;
- lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this);
+ // Maps derived pointer SDValue to statepoint result of relocated pointer.
+ DenseMap<SDValue, int> LowerAsVReg;
+ lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, LoweredGCArgs, LowerAsVReg,
+ SI, *this);
// Now that we've emitted the spills, we need to update the root so that the
// call sequence is ordered correctly.
@@ -774,7 +847,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
pushStackMapConstant(Ops, *this, Flags);
// Insert all vmstate and gcstate arguments
- Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end());
+ llvm::append_range(Ops, LoweredMetaArgs);
// Add register mask from call node
Ops.push_back(*RegMaskIt);
@@ -788,12 +861,79 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// Compute return values. Provide a glue output since we consume one as
// input. This allows someone else to chain off us as needed.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<EVT, 8> NodeTys;
+ for (auto SD : LoweredGCArgs) {
+ if (!LowerAsVReg.count(SD))
+ continue;
+ NodeTys.push_back(SD.getValueType());
+ }
+ LLVM_DEBUG(dbgs() << "Statepoint has " << NodeTys.size() << " results\n");
+ assert(NodeTys.size() == LowerAsVReg.size() && "Inconsistent GC Ptr lowering");
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Glue);
+ unsigned NumResults = NodeTys.size();
MachineSDNode *StatepointMCNode =
DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
+ // For values lowered to tied-defs, create the virtual registers. Note that
+ // for simplicity, we *always* create a vreg even within a single block.
+ DenseMap<SDValue, Register> VirtRegs;
+ for (const auto *Relocate : SI.GCRelocates) {
+ Value *Derived = Relocate->getDerivedPtr();
+ SDValue SD = getValue(Derived);
+ if (!LowerAsVReg.count(SD))
+ continue;
+
+ // Handle multiple gc.relocates of the same input efficiently.
+ if (VirtRegs.count(SD))
+ continue;
+
+ SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]);
+
+ auto *RetTy = Relocate->getType();
+ Register Reg = FuncInfo.CreateRegs(RetTy);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), Reg, RetTy, None);
+ SDValue Chain = DAG.getRoot();
+ RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr);
+ PendingExports.push_back(Chain);
+
+ VirtRegs[SD] = Reg;
+ }
+
+ // Record for later use how each relocation was lowered. This is needed to
+ // allow later gc.relocates to mirror the lowering chosen.
+ const Instruction *StatepointInstr = SI.StatepointInstr;
+ auto &RelocationMap = FuncInfo.StatepointRelocationMaps[StatepointInstr];
+ for (const GCRelocateInst *Relocate : SI.GCRelocates) {
+ const Value *V = Relocate->getDerivedPtr();
+ SDValue SDV = getValue(V);
+ SDValue Loc = StatepointLowering.getLocation(SDV);
+
+ RecordType Record;
+ if (LowerAsVReg.count(SDV)) {
+ Record.type = RecordType::VReg;
+ assert(VirtRegs.count(SDV));
+ Record.payload.Reg = VirtRegs[SDV];
+ } else if (Loc.getNode()) {
+ Record.type = RecordType::Spill;
+ Record.payload.FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ } else {
+ Record.type = RecordType::NoRelocate;
+ // If we didn't relocate a value, we'll essentialy end up inserting an
+ // additional use of the original value when lowering the gc.relocate.
+ // We need to make sure the value is available at the new use, which
+ // might be in another block.
+ if (Relocate->getParent() != StatepointInstr->getParent())
+ ExportFromCurrentBlock(V);
+ }
+ RelocationMap[V] = Record;
+ }
+
+
+
SDNode *SinkNode = StatepointMCNode;
// Build the GC_TRANSITION_END node if necessary.
@@ -804,7 +944,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
SmallVector<SDValue, 8> TEOps;
// Add chain
- TEOps.push_back(SDValue(StatepointMCNode, 0));
+ TEOps.push_back(SDValue(StatepointMCNode, NumResults - 2));
// Add GC transition arguments
for (const Value *V : SI.GCTransitionArgs) {
@@ -814,7 +954,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
}
// Add glue
- TEOps.push_back(SDValue(StatepointMCNode, 1));
+ TEOps.push_back(SDValue(StatepointMCNode, NumResults - 1));
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -825,12 +965,18 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
}
// Replace original call
- DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
+ // Call: ch,glue = CALL ...
+ // Statepoint: [gc relocates],ch,glue = STATEPOINT ...
+ unsigned NumSinkValues = SinkNode->getNumValues();
+ SDValue StatepointValues[2] = {SDValue(SinkNode, NumSinkValues - 2),
+ SDValue(SinkNode, NumSinkValues - 1)};
+ DAG.ReplaceAllUsesWith(CallNode, StatepointValues);
// Remove original call node
DAG.DeleteNode(CallNode);
- // DON'T set the root - under the assumption that it's already set past the
- // inserted node we created.
+ // Since we always emit CopyToRegs (even for local relocates), we must
+ // update root, so that they are emitted before any local uses.
+ (void)getControlRoot();
// TODO: A better future implementation would be to emit a single variable
// argument, variable return value STATEPOINT node here and then hookup the
@@ -927,7 +1073,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
setValue(&I, ReturnValue);
return;
}
-
+
// Result value will be used in a different basic block so we need to export
// it now. Default exporting mechanism will not work here because statepoint
// call has a different type than the actual call. It means that by default
@@ -1024,6 +1170,28 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
#endif
const Value *DerivedPtr = Relocate.getDerivedPtr();
+ auto &RelocationMap =
+ FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()];
+ auto SlotIt = RelocationMap.find(DerivedPtr);
+ assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value");
+ const RecordType &Record = SlotIt->second;
+
+ // If relocation was done via virtual register..
+ if (Record.type == RecordType::VReg) {
+ Register InReg = Record.payload.Reg;
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), InReg, Relocate.getType(),
+ None); // This is not an ABI copy.
+ // We generate copy to/from regs even for local uses, hence we must
+ // chain with current root to ensure proper ordering of copies w.r.t.
+ // statepoint.
+ SDValue Chain = DAG.getRoot();
+ SDValue Relocation = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
+ Chain, nullptr, nullptr);
+ setValue(&Relocate, Relocation);
+ return;
+ }
+
SDValue SD = getValue(DerivedPtr);
if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) {
@@ -1033,19 +1201,17 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
return;
}
- auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()];
- auto SlotIt = SpillMap.find(DerivedPtr);
- assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");
- Optional<int> DerivedPtrLocation = SlotIt->second;
// We didn't need to spill these special cases (constants and allocas).
// See the handling in spillIncomingValueForStatepoint for detail.
- if (!DerivedPtrLocation) {
+ if (Record.type == RecordType::NoRelocate) {
setValue(&Relocate, SD);
return;
}
- unsigned Index = *DerivedPtrLocation;
+ assert(Record.type == RecordType::Spill);
+
+ unsigned Index = Record.payload.FI;;
SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
// All the reloads are independent and are reading memory only modified by
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8b3e6189a07f..b0ad86899d25 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -93,7 +93,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
SDValue Value = OutVals[I];
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
- MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
+ Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
return false;
}
@@ -250,7 +250,7 @@ bool TargetLowering::findOptimalMemOpLowering(
bool Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(
- VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 1,
MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
@@ -912,8 +912,14 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
- Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
- Known.Zero = ~Known.One;
+ Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
+ return false;
+ }
+
+ if (Op.getOpcode() == ISD::ConstantFP) {
+ // We know all of the bits for a floating point constant!
+ Known = KnownBits::makeConstant(
+ cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
return false;
}
@@ -1009,10 +1015,8 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1))
return true;
- if (!!DemandedVecElts) {
- Known.One &= KnownVec.One;
- Known.Zero &= KnownVec.Zero;
- }
+ if (!!DemandedVecElts)
+ Known = KnownBits::commonBits(Known, KnownVec);
return false;
}
@@ -1037,14 +1041,10 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.setAllBits();
Known.One.setAllBits();
- if (!!DemandedSubElts) {
- Known.One &= KnownSub.One;
- Known.Zero &= KnownSub.Zero;
- }
- if (!!DemandedSrcElts) {
- Known.One &= KnownSrc.One;
- Known.Zero &= KnownSrc.Zero;
- }
+ if (!!DemandedSubElts)
+ Known = KnownBits::commonBits(Known, KnownSub);
+ if (!!DemandedSrcElts)
+ Known = KnownBits::commonBits(Known, KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
@@ -1101,10 +1101,8 @@ bool TargetLowering::SimplifyDemandedBits(
Known2, TLO, Depth + 1))
return true;
// Known bits are shared by every demanded subvector element.
- if (!!DemandedSubElts) {
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
- }
+ if (!!DemandedSubElts)
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
@@ -1142,15 +1140,13 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
Depth + 1))
return true;
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
if (!!DemandedRHS) {
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
Depth + 1))
return true;
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
// Attempt to avoid multi-use ops if we don't need anything from them.
@@ -1325,15 +1321,15 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// If all of the unknown bits are known to be zero on one side or the other
- // (but not both) turn this into an *inclusive* or.
+ // turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
if (C) {
- // If one side is a constant, and all of the known set bits on the other
- // side are also set in the constant, turn this into an AND, as we know
+ // If one side is a constant, and all of the set bits in the constant are
+ // also known set on the other side, turn this into an AND, as we know
// the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
// NB: it is okay if more bits are known than are requested
@@ -1377,8 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
@@ -1395,8 +1390,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
@@ -1728,6 +1722,32 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
+ case ISD::UMIN: {
+ // Check if one arg is always less than (or equal) to the other arg.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+ KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+ Known = KnownBits::umin(Known0, Known1);
+ if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
+ return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
+ if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
+ return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
+ break;
+ }
+ case ISD::UMAX: {
+ // Check if one arg is always greater than (or equal) to the other arg.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+ KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+ Known = KnownBits::umax(Known0, Known1);
+ if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
+ return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
+ if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
+ return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
+ break;
+ }
case ISD::BITREVERSE: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.reverseBits();
@@ -1748,6 +1768,17 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero = Known2.Zero.byteSwap();
break;
}
+ case ISD::CTPOP: {
+ // If only 1 bit is demanded, replace with PARITY as long as we're before
+ // op legalization.
+ // FIXME: Limit to scalars for now.
+ if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
+ Op.getOperand(0)));
+
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1858,6 +1889,11 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::SIGN_EXTEND:
@@ -1906,6 +1942,11 @@ bool TargetLowering::SimplifyDemandedBits(
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::ANY_EXTEND:
@@ -1945,7 +1986,8 @@ bool TargetLowering::SimplifyDemandedBits(
// zero/one bits live out.
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
- if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
Known = Known.trunc(BitWidth);
@@ -1968,9 +2010,9 @@ bool TargetLowering::SimplifyDemandedBits(
// undesirable.
break;
- SDValue ShAmt = Src.getOperand(1);
- auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
- if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
+ const APInt *ShAmtC =
+ TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
+ if (!ShAmtC || ShAmtC->uge(BitWidth))
break;
uint64_t ShVal = ShAmtC->getZExtValue();
@@ -1982,12 +2024,12 @@ bool TargetLowering::SimplifyDemandedBits(
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
- if (TLO.LegalTypes())
- ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
+ SDValue NewShAmt = TLO.DAG.getConstant(
+ ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
- Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
}
break;
}
@@ -2012,10 +2054,14 @@ bool TargetLowering::SimplifyDemandedBits(
case ISD::EXTRACT_VECTOR_ELT: {
SDValue Src = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
- unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
unsigned EltBitWidth = Src.getScalarValueSizeInBits();
+ if (SrcEltCnt.isScalable())
+ return false;
+
// Demand the bits from every vector element without a constant index.
+ unsigned NumSrcElts = SrcEltCnt.getFixedValue();
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
@@ -2229,9 +2275,13 @@ bool TargetLowering::SimplifyDemandedBits(
if (C->isOpaque())
return false;
}
- // TODO: Handle float bits as well.
if (VT.isInteger())
return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
+ if (VT.isFloatingPoint())
+ return TLO.CombineTo(
+ Op,
+ TLO.DAG.getConstantFP(
+ APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
}
return false;
@@ -2593,13 +2643,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero, TLO, Depth + 1))
return true;
- KnownUndef.clearBit(Idx);
- if (Scl.isUndef())
- KnownUndef.setBit(Idx);
+ KnownUndef.setBitVal(Idx, Scl.isUndef());
- KnownZero.clearBit(Idx);
- if (isNullConstant(Scl) || isNullFPConstant(Scl))
- KnownZero.setBit(Idx);
+ KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
break;
}
@@ -3347,6 +3393,74 @@ SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(DL, VT, X, YShl1, Cond);
}
+static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
+ SDValue N0, const APInt &C1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // Look through truncs that don't change the value of a ctpop.
+ // FIXME: Add vector support? Need to be careful with setcc result type below.
+ SDValue CTPOP = N0;
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
+ N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
+ return SDValue();
+
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // If this is a vector CTPOP, keep the CTPOP if it is legal.
+ // TODO: Should we check if CTPOP is legal(or custom) for scalars?
+ if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ return SDValue();
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
+ unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
+ if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
+ return SDValue();
+ if (C1 == 0 && (Cond == ISD::SETULT))
+ return SDValue(); // This is handled elsewhere.
+
+ unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
+
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ SDValue Result = CTOp;
+ for (unsigned i = 0; i < Passes; i++) {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
+ Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
+ }
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
+ }
+
+ // If ctpop is not supported, expand a power-of-2 comparison based on it.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
+ // For scalars, keep CTPOP if it is legal or custom.
+ if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
+ return SDValue();
+ // This is based on X86's custom lowering for CTPOP which produces more
+ // instructions than the expansion here.
+
+ // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
+ // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
+ SDValue Zero = DAG.getConstant(0, dl, CTVT);
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ assert(CTVT.isInteger());
+ ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+ SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
+ SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+ unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
+ return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
+ }
+
+ return SDValue();
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -3363,8 +3477,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Ensure that the constant occurs on the RHS and fold constant comparisons.
// TODO: Handle non-splat vector constants. All undef causes trouble.
+ // FIXME: We can't yet fold constant scalable vector splats, so avoid an
+ // infinite loop here when we encounter one.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
if (isConstOrConstSplat(N0) &&
+ (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
@@ -3376,75 +3493,46 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
- DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
- !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
+ DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
+ !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
- if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ if (auto *N1C = isConstOrConstSplat(N1)) {
const APInt &C1 = N1C->getAPIntValue();
+ // Optimize some CTPOP cases.
+ if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
+ return V;
+
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- const APInt &ShAmt = N0.getConstantOperandAPInt(1);
- if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
- ShAmt == Log2_32(N0.getValueSizeInBits())) {
- if ((C1 == 0) == (Cond == ISD::SETEQ)) {
- // (srl (ctlz x), 5) == 0 -> X != 0
- // (srl (ctlz x), 5) != 1 -> X != 0
- Cond = ISD::SETNE;
- } else {
- // (srl (ctlz x), 5) != 0 -> X == 0
- // (srl (ctlz x), 5) == 1 -> X == 0
- Cond = ISD::SETEQ;
+ isPowerOf2_32(N0.getScalarValueSizeInBits())) {
+ if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
+ Cond);
}
- SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
- return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
- Zero, Cond);
}
}
+ }
- SDValue CTPOP = N0;
- // Look through truncs that don't change the value of a ctpop.
- if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
- CTPOP = N0.getOperand(0);
-
- if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
- (N0 == CTPOP ||
- N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
- EVT CTVT = CTPOP.getValueType();
- SDValue CTOp = CTPOP.getOperand(0);
-
- // (ctpop x) u< 2 -> (x & x-1) == 0
- // (ctpop x) u> 1 -> (x & x-1) != 0
- if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
- SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
- SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
- ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
- return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
- }
-
- // If ctpop is not supported, expand a power-of-2 comparison based on it.
- if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
- // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
- // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
- SDValue Zero = DAG.getConstant(0, dl, CTVT);
- SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
- assert(CTVT.isInteger());
- ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
- SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
- SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
- SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
- unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
- return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
- }
- }
+ // FIXME: Support vectors.
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
// (zext x) == C --> x == (trunc C)
// (sext x) == C --> x == (trunc C)
@@ -3578,11 +3666,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
- Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
- unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
- SDValue NewLoad = DAG.getLoad(
- newVT, dl, Lod->getChain(), Ptr,
- Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
+ SDValue NewLoad =
+ DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
+ Lod->getOriginalAlign());
return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
@@ -3647,7 +3736,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
break; // todo, be more careful with signed comparisons
}
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
+ OpVT)) {
EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
EVT ExtDstTy = N0.getValueType();
@@ -3656,26 +3747,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the constant doesn't fit into the number of bits for the source of
// the sign extension, it is impossible for both sides to be equal.
if (C1.getMinSignedBits() > ExtSrcTyBits)
- return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
+ return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
- SDValue ZextOp;
- EVT Op0Ty = N0.getOperand(0).getValueType();
- if (Op0Ty == ExtSrcTy) {
- ZextOp = N0.getOperand(0);
- } else {
- APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
- ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
- DAG.getConstant(Imm, dl, Op0Ty));
- }
+ assert(ExtDstTy == N0.getOperand(0).getValueType() &&
+ ExtDstTy != ExtSrcTy && "Unexpected types!");
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
+ DAG.getConstant(Imm, dl, ExtDstTy));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(ZextOp.getNode());
// Otherwise, make this a use of a zext.
return DAG.getSetCC(dl, VT, ZextOp,
- DAG.getConstant(C1 & APInt::getLowBitsSet(
- ExtDstTyBits,
- ExtSrcTyBits),
- dl, ExtDstTy),
- Cond);
+ DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
} else if ((N1C->isNullValue() || N1C->isOne()) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
@@ -3699,8 +3782,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
- isa<ConstantSDNode>(N0.getOperand(1)) &&
- cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
+ isOneConstant(N0.getOperand(1))) {
// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
// can only do this if the top bits are known zero.
unsigned BitWidth = N0.getValueSizeInBits();
@@ -3744,9 +3826,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
}
}
- if (Op0.getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(Op0.getOperand(1)) &&
- cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
+ if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
if (Op0.getValueType().bitsGT(VT))
Op0 = DAG.getNode(ISD::AND, dl, VT,
@@ -3884,6 +3964,67 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
VT, N0, N1, Cond, DCI, dl))
return CC;
+
+ // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
+ // For example, when high 32-bits of i64 X are known clear:
+ // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
+ // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
+ bool CmpZero = N1C->getAPIntValue().isNullValue();
+ bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
+ if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
+ // Match or(lo,shl(hi,bw/2)) pattern.
+ auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
+ unsigned EltBits = V.getScalarValueSizeInBits();
+ if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
+ return false;
+ SDValue LHS = V.getOperand(0);
+ SDValue RHS = V.getOperand(1);
+ APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
+ // Unshifted element must have zero upperbits.
+ if (RHS.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(RHS.getOperand(1)) &&
+ RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
+ DAG.MaskedValueIsZero(LHS, HiBits)) {
+ Lo = LHS;
+ Hi = RHS.getOperand(0);
+ return true;
+ }
+ if (LHS.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
+ DAG.MaskedValueIsZero(RHS, HiBits)) {
+ Lo = RHS;
+ Hi = LHS.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
+ unsigned EltBits = N0.getScalarValueSizeInBits();
+ unsigned HalfBits = EltBits / 2;
+ APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
+ SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
+ SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
+ SDValue NewN0 =
+ DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
+ SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
+ return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
+ };
+
+ SDValue Lo, Hi;
+ if (IsConcat(N0, Lo, Hi))
+ return MergeConcat(Lo, Hi);
+
+ if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
+ SDValue Lo0, Lo1, Hi0, Hi1;
+ if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
+ IsConcat(N0.getOperand(1), Lo1, Hi1)) {
+ return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
+ DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
+ }
+ }
+ }
}
// If we have "setcc X, C0", check to see if we can shrink the immediate
@@ -3891,20 +4032,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// TODO: Support this for vectors after legalize ops.
if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
// SETUGT X, SINTMAX -> SETLT X, 0
- if (Cond == ISD::SETUGT &&
- C1 == APInt::getSignedMaxValue(OperandBitSize))
+ // SETUGE X, SINTMIN -> SETLT X, 0
+ if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
+ (Cond == ISD::SETUGE && C1.isMinSignedValue()))
return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(0, dl, N1.getValueType()),
ISD::SETLT);
// SETULT X, SINTMIN -> SETGT X, -1
- if (Cond == ISD::SETULT &&
- C1 == APInt::getSignedMinValue(OperandBitSize)) {
- SDValue ConstMinusOne =
- DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
- N1.getValueType());
- return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
- }
+ // SETULE X, SINTMAX -> SETGT X, -1
+ if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
+ (Cond == ISD::SETULE && C1.isMaxSignedValue()))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getAllOnesConstant(dl, N1.getValueType()),
+ ISD::SETGT);
}
}
@@ -3915,8 +4056,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &C1 = N1C->getAPIntValue();
EVT ShValTy = N0.getValueType();
- // Fold bit comparisons when we can.
- if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ // Fold bit comparisons when we can. This will result in an
+ // incorrect value when boolean false is negative one, unless
+ // the bitsize is 1 in which case the false value is the same
+ // in practice regardless of the representation.
+ if ((VT.getSizeInBits() == 1 ||
+ getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
(VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
@@ -4312,8 +4458,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
}
SDValue TargetLowering::LowerAsmOutputForConstraint(
- SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
- SelectionDAG &DAG) const {
+ SDValue &Chain, SDValue &Flag, const SDLoc &DL,
+ const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
return SDValue();
}
@@ -4887,9 +5033,15 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
return SDValue();
SDValue Shift, Factor;
- if (VT.isVector()) {
+ if (VT.isFixedLengthVector()) {
Shift = DAG.getBuildVector(ShVT, dl, Shifts);
Factor = DAG.getBuildVector(VT, dl, Factors);
+ } else if (VT.isScalableVector()) {
+ assert(Shifts.size() == 1 && Factors.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
+ Factor = DAG.getSplatVector(VT, dl, Factors[0]);
} else {
Shift = Shifts[0];
Factor = Factors[0];
@@ -4982,11 +5134,20 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue MagicFactor, Factor, Shift, ShiftMask;
- if (VT.isVector()) {
+ if (VT.isFixedLengthVector()) {
MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
Factor = DAG.getBuildVector(VT, dl, Factors);
Shift = DAG.getBuildVector(ShVT, dl, Shifts);
ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
+ } else if (VT.isScalableVector()) {
+ assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
+ Shifts.size() == 1 && ShiftMasks.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
+ Factor = DAG.getSplatVector(VT, dl, Factors[0]);
+ Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
+ ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
} else {
MagicFactor = MagicFactors[0];
Factor = Factors[0];
@@ -5100,11 +5261,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue PreShift, PostShift, MagicFactor, NPQFactor;
- if (VT.isVector()) {
+ if (VT.isFixedLengthVector()) {
PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
+ } else if (VT.isScalableVector()) {
+ assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
+ NPQFactors.size() == 1 && PostShifts.size() == 1 &&
+ "Expected matchUnaryPredicate to return one for scalable vectors");
+ PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
+ MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
+ NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
+ PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
} else {
PreShift = PreShifts[0];
MagicFactor = MagicFactors[0];
@@ -5156,8 +5325,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
Created.push_back(Q.getNode());
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
SDValue One = DAG.getConstant(1, dl, VT);
- SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
+ SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
@@ -5584,7 +5755,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
return SDValue();
SDValue PVal, AVal, KVal, QVal;
- if (VT.isVector()) {
+ if (VT.isFixedLengthVector()) {
if (HadOneDivisor) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
@@ -5603,6 +5774,15 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
AVal = DAG.getBuildVector(VT, DL, AAmts);
KVal = DAG.getBuildVector(ShVT, DL, KAmts);
QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else if (VT.isScalableVector()) {
+ assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
+ QAmts.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
+ AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
+ KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
+ QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
} else {
PVal = PAmts[0];
AVal = AAmts[0];
@@ -5697,6 +5877,28 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
return false;
}
+SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
+ const DenormalMode &Mode) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ // Testing it with denormal inputs to avoid wrong estimate.
+ if (Mode.Input == DenormalMode::IEEE) {
+ // This is specifically a check for the handling of denormal inputs,
+ // not the result.
+
+ // Test = fabs(X) < SmallestNormal
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+ SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+ return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+ }
+ // Test = X == 0.0
+ return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+}
+
SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOps, bool OptForSize,
NegatibleCost &Cost,
@@ -5733,6 +5935,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
+ // Because getNegatedExpression can delete nodes we need a handle to keep
+ // temporary nodes alive in case the recursion manages to create an identical
+ // node.
+ std::list<HandleSDNode> Handles;
+
switch (Opcode) {
case ISD::ConstantFP: {
// Don't invert constant FP values after legalization unless the target says
@@ -5801,11 +6008,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // Prevent this node from being deleted by the next call.
+ if (NegX)
+ Handles.emplace_back(NegX);
+
// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+ // We're done with the handles.
+ Handles.clear();
+
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
@@ -5850,11 +6064,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // Prevent this node from being deleted by the next call.
+ if (NegX)
+ Handles.emplace_back(NegX);
+
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+ // We're done with the handles.
+ Handles.clear();
+
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
@@ -5892,15 +6113,25 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
if (!NegZ)
break;
+ // Prevent this node from being deleted by the next two calls.
+ Handles.emplace_back(NegZ);
+
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // Prevent this node from being deleted by the next call.
+ if (NegX)
+ Handles.emplace_back(NegX);
+
// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+ // We're done with the handles.
+ Handles.clear();
+
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = std::min(CostX, CostZ);
@@ -5941,7 +6172,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
// Legalization Utilities
//===----------------------------------------------------------------------===//
-bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
+bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
SDValue LHS, SDValue RHS,
SmallVectorImpl<SDValue> &Result,
EVT HiLoVT, SelectionDAG &DAG,
@@ -5964,8 +6195,6 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
unsigned OuterBitSize = VT.getScalarSizeInBits();
unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
- unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
- unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
// LL, LH, RL, and RH must be either all NULL or all set to a value.
assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
@@ -6014,8 +6243,9 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
}
}
- if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
- RHSSB > InnerBitSize) {
+ if (!VT.isVector() && Opcode == ISD::MUL &&
+ DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
+ DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
// The input values are both sign-extended.
// TODO non-MUL case?
if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
@@ -6129,7 +6359,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
SDValue LL, SDValue LH, SDValue RL,
SDValue RH) const {
SmallVector<SDValue, 2> Result;
- bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
+ bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
N->getOperand(0), N->getOperand(1), Result, HiLoVT,
DAG, Kind, LL, LH, RL, RH);
if (Ok) {
@@ -6141,7 +6371,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
}
// Check that (every element of) Z is undef or not an exact multiple of BW.
-static bool isNonZeroModBitWidth(SDValue Z, unsigned BW) {
+static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
return ISD::matchUnaryPredicate(
Z,
[=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
@@ -6168,9 +6398,35 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
EVT ShVT = Z.getValueType();
+ // If a funnel shift in the other direction is more supported, use it.
+ unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
+ if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
+ isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
+ if (isNonZeroModBitWidthOrUndef(Z, BW)) {
+ // fshl X, Y, Z -> fshr X, Y, -Z
+ // fshr X, Y, Z -> fshl X, Y, -Z
+ SDValue Zero = DAG.getConstant(0, DL, ShVT);
+ Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
+ } else {
+ // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
+ // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ if (IsFSHL) {
+ Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
+ X = DAG.getNode(ISD::SRL, DL, VT, X, One);
+ } else {
+ X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
+ Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
+ }
+ Z = DAG.getNOT(DL, Z, ShVT);
+ }
+ Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
+ return true;
+ }
+
SDValue ShX, ShY;
SDValue ShAmt, InvShAmt;
- if (isNonZeroModBitWidth(Z, BW)) {
+ if (isNonZeroModBitWidthOrUndef(Z, BW)) {
// fshl: X << C | Y >> (BW - C)
// fshr: X << (BW - C) | Y >> C
// where C = Z % BW is not zero
@@ -6210,8 +6466,8 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
}
// TODO: Merge with expandFunnelShift.
-bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
+ SDValue &Result, SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
bool IsLeft = Node->getOpcode() == ISD::ROTL;
@@ -6222,36 +6478,47 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
EVT ShVT = Op1.getValueType();
SDValue Zero = DAG.getConstant(0, DL, ShVT);
- assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
- "Expecting the type bitwidth to be a power of 2");
-
// If a rotate in the other direction is supported, use it.
unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
- if (isOperationLegalOrCustom(RevRot, VT)) {
+ if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
return true;
}
- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
- !isOperationLegalOrCustom(ISD::SRL, VT) ||
- !isOperationLegalOrCustom(ISD::SUB, VT) ||
- !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+ if (!AllowVectorOps && VT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
return false;
- // Otherwise,
- // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1)))
- // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1)))
- //
unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
- SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
- SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
- SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
- Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
- DAG.getNode(HsOpc, DL, VT, Op0, And1));
+ SDValue ShVal;
+ SDValue HsVal;
+ if (isPowerOf2_32(EltSizeInBits)) {
+ // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
+ // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
+ SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
+ ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
+ SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
+ HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
+ } else {
+ // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
+ // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+ SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
+ ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
+ SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ HsVal =
+ DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
+ }
+ Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
return true;
}
@@ -6270,7 +6537,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
if (Node->isStrictFPOpcode())
// When a NaN is converted to an integer a trap is allowed. We can't
// use this expansion here because it would eliminate that trap. Other
- // traps are also allowed and cannot be eliminated. See
+ // traps are also allowed and cannot be eliminated. See
// IEEE 754-2008 sec 5.8.
return false;
@@ -6341,7 +6608,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
// Only expand vector types if we have the appropriate vector bit operations.
- unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
+ unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
ISD::FP_TO_SINT;
if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
@@ -6356,14 +6623,19 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
if (Node->isStrictFPOpcode()) {
- Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
- { Node->getOperand(0), Src });
+ Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), Src });
Chain = Result.getValue(1);
} else
Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
return true;
}
+ // Don't expand it if there isn't cheap fsub instruction.
+ if (!isOperationLegalOrCustom(
+ Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
+ return false;
+
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
SDValue Sel;
@@ -6395,9 +6667,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
DAG.getConstant(SignMask, dl, DstVT));
SDValue SInt;
if (Node->isStrictFPOpcode()) {
- SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
{ Chain, Src, FltOfs });
- SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
{ Val.getValue(1), Val });
Chain = SInt.getValue(1);
} else {
@@ -6426,8 +6698,13 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue &Chain,
SelectionDAG &DAG) const {
- unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
- SDValue Src = Node->getOperand(OpNo);
+ // This transform is not correct for converting 0 when rounding mode is set
+ // to round toward negative infinity which will produce -0.0. So disable under
+ // strictfp.
+ if (Node->isStrictFPOpcode())
+ return false;
+
+ SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -6446,9 +6723,10 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
// Implementation of unsigned i64 to f64 following the algorithm in
- // __floatundidf in compiler_rt. This implementation has the advantage
- // of performing rounding correctly, both in the default rounding mode
- // and in all alternate rounding modes.
+ // __floatundidf in compiler_rt. This implementation performs rounding
+ // correctly in all rounding modes with the exception of converting 0
+ // when rounding toward negative infinity. In that case the fsub will produce
+ // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
@@ -6462,18 +6740,9 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
- if (Node->isStrictFPOpcode()) {
- SDValue HiSub =
- DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
- {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
- Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
- {HiSub.getValue(1), LoFlt, HiSub});
- Chain = Result.getValue(1);
- } else {
- SDValue HiSub =
- DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
- Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
- }
+ SDValue HiSub =
+ DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
return true;
}
@@ -6483,6 +6752,11 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
EVT VT = Node->getValueType(0);
+
+ if (VT.isScalableVector())
+ report_fatal_error(
+ "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
+
if (isOperationLegalOrCustom(NewOp, VT)) {
SDValue Quiet0 = Node->getOperand(0);
SDValue Quiet1 = Node->getOperand(1);
@@ -6706,23 +6980,58 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG, bool IsNegative) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Op = N->getOperand(0);
+ // abs(x) -> smax(x,sub(0,x))
+ if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::SMAX, VT)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+ return true;
+ }
+
+ // abs(x) -> umin(x,sub(0,x))
+ if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::UMIN, VT)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+ return true;
+ }
+
+ // 0 - abs(x) -> smin(x, sub(0,x))
+ if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::SMIN, VT)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+ return true;
+ }
+
// Only expand vector types if we have the appropriate vector operations.
- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
- !isOperationLegalOrCustom(ISD::ADD, VT) ||
- !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ if (VT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SRA, VT) ||
+ (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
+ (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return false;
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
+ if (!IsNegative) {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
+ Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
+ } else {
+ // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+ Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
+ }
return true;
}
@@ -6736,6 +7045,9 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
EVT DstVT = LD->getValueType(0);
ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (SrcVT.isScalableVector())
+ report_fatal_error("Cannot scalarize scalable vector loads");
+
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
@@ -6762,7 +7074,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
// the codegen worse.
SDValue Load =
DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
- LD->getPointerInfo(), SrcIntVT, LD->getAlignment(),
+ LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
SmallVector<SDValue, 8> Vals;
@@ -6799,10 +7111,10 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue ScalarLoad =
DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
+ SrcEltVT, LD->getOriginalAlign(),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
- BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
+ BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -6823,6 +7135,9 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
+ if (StVT.isScalableVector())
+ report_fatal_error("Cannot scalarize scalable vector stores");
+
// The type of the data we want to save
EVT RegVT = Value.getValueType();
EVT RegSclVT = RegVT.getScalarType();
@@ -6859,7 +7174,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
}
return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
- ST->getAlignment(), ST->getMemOperand()->getFlags(),
+ ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
ST->getAAInfo());
}
@@ -6873,13 +7188,14 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
DAG.getVectorIdxConstant(Idx, SL));
- SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
+ SDValue Ptr =
+ DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(
Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
- MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
+ ST->getAAInfo());
Stores.push_back(Store);
}
@@ -6944,7 +7260,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
// Load one integer register's worth from the original location.
SDValue Load = DAG.getLoad(
RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
- MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(
@@ -6963,8 +7279,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
SDValue Load =
DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(Offset), MemVT,
- MinAlign(LD->getAlignment(), Offset),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
@@ -6994,7 +7310,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
NumBits >>= 1;
- unsigned Alignment = LD->getAlignment();
+ Align Alignment = LD->getOriginalAlign();
unsigned IncrementSize = NumBits / 8;
ISD::LoadExtType HiExtType = LD->getExtensionType();
@@ -7009,21 +7325,21 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- NewLoadedVT, MinAlign(Alignment, IncrementSize),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
} else {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- NewLoadedVT, MinAlign(Alignment, IncrementSize),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
}
// aggregate the two parts
@@ -7047,7 +7363,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
SDValue Ptr = ST->getBasePtr();
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
- int Alignment = ST->getAlignment();
+ Align Alignment = ST->getOriginalAlign();
auto &MF = DAG.getMachineFunction();
EVT StoreMemVT = ST->getMemoryVT();
@@ -7104,7 +7420,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset),
- MinAlign(ST->getAlignment(), Offset),
+ ST->getOriginalAlign(),
ST->getMemOperand()->getFlags()));
// Increment the pointers.
Offset += RegBytes;
@@ -7126,7 +7442,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
Stores.push_back(
DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
- MinAlign(ST->getAlignment(), Offset),
+ ST->getOriginalAlign(),
ST->getMemOperand()->getFlags(), ST->getAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
@@ -7137,8 +7453,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
"Unaligned store of unknown type.");
// Get the half-size VT
EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
- int NumBits = NewStoredVT.getSizeInBits();
- int IncrementSize = NumBits / 8;
+ unsigned NumBits = NewStoredVT.getFixedSizeInBits();
+ unsigned IncrementSize = NumBits / 8;
// Divide the stored value in two parts.
SDValue ShiftAmount = DAG.getConstant(
@@ -7153,8 +7469,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
ST->getMemOperand()->getFlags());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
- Alignment = MinAlign(Alignment, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Store2 = DAG.getTruncStore(
Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
@@ -7173,9 +7488,12 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
SDValue Increment;
EVT AddrVT = Addr.getValueType();
EVT MaskVT = Mask.getValueType();
- assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
+ assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
"Incompatible types of Data and Mask");
if (IsCompressedMemory) {
+ if (DataVT.isScalableVector())
+ report_fatal_error(
+ "Cannot currently handle compressed memory with scalable vectors");
// Incrementing the pointer according to number of '1's in the mask.
EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
@@ -7191,6 +7509,10 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
AddrVT);
Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
+ } else if (DataVT.isScalableVector()) {
+ Increment = DAG.getVScale(DL, AddrVT,
+ APInt(AddrVT.getFixedSizeInBits(),
+ DataVT.getStoreSize().getKnownMinSize()));
} else
Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
@@ -7201,16 +7523,26 @@ static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
SDValue Idx,
EVT VecVT,
const SDLoc &dl) {
- if (isa<ConstantSDNode>(Idx))
+ if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
return Idx;
EVT IdxVT = Idx.getValueType();
- unsigned NElts = VecVT.getVectorNumElements();
- if (isPowerOf2_32(NElts)) {
- APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
- Log2_32(NElts));
- return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
- DAG.getConstant(Imm, dl, IdxVT));
+ unsigned NElts = VecVT.getVectorMinNumElements();
+ if (VecVT.isScalableVector()) {
+ SDValue VS = DAG.getVScale(dl, IdxVT,
+ APInt(IdxVT.getFixedSizeInBits(),
+ NElts));
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, IdxVT, VS,
+ DAG.getConstant(1, dl, IdxVT));
+
+ return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
+ } else {
+ if (isPowerOf2_32(NElts)) {
+ APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
+ Log2_32(NElts));
+ return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
+ DAG.getConstant(Imm, dl, IdxVT));
+ }
}
return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
@@ -7227,8 +7559,8 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
EVT EltVT = VecVT.getVectorElementType();
// Calculate the element offset and add it to the pointer.
- unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
- assert(EltSize * 8 == EltVT.getSizeInBits() &&
+ unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
"Converting bits to bytes lost precision");
Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
@@ -7306,6 +7638,65 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
return SDValue();
}
+// Convert redundant addressing modes (e.g. scaling is redundant
+// when accessing bytes).
+ISD::MemIndexType
+TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
+ SDValue Offsets) const {
+ bool IsScaledIndex =
+ (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
+ bool IsSignedIndex =
+ (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
+
+ // Scaling is unimportant for bytes, canonicalize to unscaled.
+ if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
+ IsScaledIndex = false;
+ IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
+ }
+
+ return IndexType;
+}
+
+SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ EVT VT = Op0.getValueType();
+ unsigned Opcode = Node->getOpcode();
+ SDLoc DL(Node);
+
+ // umin(x,y) -> sub(x,usubsat(x,y))
+ if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::USUBSAT, VT)) {
+ return DAG.getNode(ISD::SUB, DL, VT, Op0,
+ DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
+ }
+
+ // umax(x,y) -> add(x,usubsat(y,x))
+ if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
+ isOperationLegal(ISD::USUBSAT, VT)) {
+ return DAG.getNode(ISD::ADD, DL, VT, Op0,
+ DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
+ }
+
+ // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+ ISD::CondCode CC;
+ switch (Opcode) {
+ default: llvm_unreachable("How did we get here?");
+ case ISD::SMAX: CC = ISD::SETGT; break;
+ case ISD::SMIN: CC = ISD::SETLT; break;
+ case ISD::UMAX: CC = ISD::SETUGT; break;
+ case ISD::UMIN: CC = ISD::SETULT; break;
+ }
+
+ // FIXME: Should really try to split the vector in case it's legal on a
+ // subvector.
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(Node);
+
+ SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+}
+
SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
unsigned Opcode = Node->getOpcode();
SDValue LHS = Node->getOperand(0);
@@ -7317,12 +7708,13 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
assert(VT.isInteger() && "Expected operands to be integers");
// usub.sat(a, b) -> umax(a, b) - b
- if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
+ if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
}
- if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
+ // uadd.sat(a, b) -> umin(a, ~b) + b
+ if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
@@ -7347,6 +7739,11 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
"addition or subtraction node.");
}
+ // FIXME: Should really try to split the vector in case it's legal on a
+ // subvector.
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(Node);
+
unsigned BitWidth = LHS.getScalarValueSizeInBits();
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
@@ -7386,6 +7783,41 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
}
}
+SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ bool IsSigned = Opcode == ISD::SSHLSAT;
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ SDLoc dl(Node);
+
+ assert((Node->getOpcode() == ISD::SSHLSAT ||
+ Node->getOpcode() == ISD::USHLSAT) &&
+ "Expected a SHLSAT opcode");
+ assert(VT == RHS.getValueType() && "Expected operands to be the same type");
+ assert(VT.isInteger() && "Expected operands to be integers");
+
+ // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
+
+ unsigned BW = VT.getScalarSizeInBits();
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
+ SDValue Orig =
+ DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
+
+ SDValue SatVal;
+ if (IsSigned) {
+ SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
+ SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
+ SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
+ SatMin, SatMax, ISD::SETLT);
+ } else {
+ SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
+ }
+ Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
+
+ return Result;
+}
+
SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
assert((Node->getOpcode() == ISD::SMULFIX ||
@@ -7759,7 +8191,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
if (isSigned) {
// The high part is obtained by SRA'ing all but one of the bits of low
// part.
- unsigned LoSize = VT.getSizeInBits();
+ unsigned LoSize = VT.getFixedSizeInBits();
HiLHS =
DAG.getNode(ISD::SRA, dl, VT, LHS,
DAG.getConstant(LoSize - 1, dl,
@@ -7818,7 +8250,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
// Truncate the result if SetCC returns a larger type than needed.
EVT RType = Node->getValueType(1);
- if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
+ if (RType.bitsLT(Overflow.getValueType()))
Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
@@ -7828,32 +8260,14 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
- bool NoNaN = Node->getFlags().hasNoNaNs();
- unsigned BaseOpcode = 0;
- switch (Node->getOpcode()) {
- default: llvm_unreachable("Expected VECREDUCE opcode");
- case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
- case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
- case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break;
- case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
- case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
- case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
- case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
- case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
- case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
- case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
- case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
- case ISD::VECREDUCE_FMAX:
- BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
- break;
- case ISD::VECREDUCE_FMIN:
- BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
- break;
- }
-
+ unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
SDValue Op = Node->getOperand(0);
EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ report_fatal_error(
+ "Expanding reductions for scalable vectors is undefined.");
+
// Try to use a shuffle reduction for power of two vectors.
if (VT.isPow2VectorType()) {
while (VT.getVectorNumElements() > 1) {
@@ -7884,6 +8298,33 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
return Res;
}
+SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue AccOp = Node->getOperand(0);
+ SDValue VecOp = Node->getOperand(1);
+ SDNodeFlags Flags = Node->getFlags();
+
+ EVT VT = VecOp.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ if (VT.isScalableVector())
+ report_fatal_error(
+ "Expanding reductions for scalable vectors is undefined.");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Ops;
+ DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
+
+ unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
+
+ SDValue Res = AccOp;
+ for (unsigned i = 0; i < NumElts; i++)
+ Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
+
+ return Res;
+}
+
bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
@@ -7906,3 +8347,105 @@ bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
}
return false;
}
+
+SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
+ SelectionDAG &DAG) const {
+ bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
+ SDLoc dl(SDValue(Node, 0));
+ SDValue Src = Node->getOperand(0);
+
+ // DstVT is the result type, while SatVT is the size to which we saturate
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+
+ unsigned SatWidth = Node->getConstantOperandVal(1);
+ unsigned DstWidth = DstVT.getScalarSizeInBits();
+ assert(SatWidth <= DstWidth &&
+ "Expected saturation width smaller than result width");
+
+ // Determine minimum and maximum integer values and their corresponding
+ // floating-point values.
+ APInt MinInt, MaxInt;
+ if (IsSigned) {
+ MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
+ MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
+ } else {
+ MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
+ MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
+ }
+
+ // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
+ // libcall emission cannot handle this. Large result types will fail.
+ if (SrcVT == MVT::f16) {
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
+ SrcVT = Src.getValueType();
+ }
+
+ APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
+ APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
+
+ APFloat::opStatus MinStatus =
+ MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
+ APFloat::opStatus MaxStatus =
+ MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
+ !(MaxStatus & APFloat::opStatus::opInexact);
+
+ SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
+ SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
+
+ // If the integer bounds are exactly representable as floats and min/max are
+ // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
+ // of comparisons and selects.
+ bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
+ isOperationLegal(ISD::FMAXNUM, SrcVT);
+ if (AreExactFloatBounds && MinMaxLegal) {
+ SDValue Clamped = Src;
+
+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
+ Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
+ // Clamp by MaxFloat from above. NaN cannot occur.
+ Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
+ // Convert clamped value to integer.
+ SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
+ dl, DstVT, Clamped);
+
+ // In the unsigned case we're done, because we mapped NaN to MinFloat,
+ // which will cast to zero.
+ if (!IsSigned)
+ return FpToInt;
+
+ // Otherwise, select 0 if Src is NaN.
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
+ return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
+ ISD::CondCode::SETUO);
+ }
+
+ SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
+ SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
+
+ // Result of direct conversion. The assumption here is that the operation is
+ // non-trapping and it's fine to apply it to an out-of-range value if we
+ // select it away later.
+ SDValue FpToInt =
+ DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
+
+ SDValue Select = FpToInt;
+
+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
+ // MinInt if Src is NaN.
+ Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
+ ISD::CondCode::SETULT);
+ // If Src OGT MaxFloat, select MaxInt.
+ Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
+ ISD::CondCode::SETOGT);
+
+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
+ // is already zero.
+ if (!IsSigned)
+ return Select;
+
+ // Otherwise, select 0 if Src is NaN.
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
+ return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
index ce43fb1fbd4b..f89069e9f728 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -144,7 +144,7 @@ class ShrinkWrap : public MachineFunctionPass {
unsigned FrameDestroyOpcode;
/// Stack pointer register, used by llvm.{savestack,restorestack}
- unsigned SP;
+ Register SP;
/// Entry block.
const MachineBasicBlock *Entry;
@@ -331,11 +331,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
Save = &MBB;
else
Save = MDT->findNearestCommonDominator(Save, &MBB);
-
- if (!Save) {
- LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
- return;
- }
+ assert(Save);
if (!Restore)
Restore = &MBB;
@@ -381,7 +377,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
// C. Save and Restore are in the same loop.
bool SaveDominatesRestore = false;
bool RestorePostDominatesSave = false;
- while (Save && Restore &&
+ while (Restore &&
(!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
!(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
// Post-dominance is not enough in loops to ensure that all uses/defs
@@ -412,8 +408,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
Restore = MPDT->findNearestCommonDominator(Restore, Save);
// Fix (C).
- if (Save && Restore &&
- (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
+ if (Restore && (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
// Push Save outside of this loop if immediate dominator is different
// from save block. If immediate dominator is not different, bail out.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 0683058f177e..d2fd4a6d8fd9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -142,7 +142,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
/// instruction with those returned by the personality function.
void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
Value *SelVal) {
- SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end());
+ SmallVector<Value *, 8> UseWorkList(LPI->users());
while (!UseWorkList.empty()) {
Value *Val = UseWorkList.pop_back_val();
auto *EVI = dyn_cast<ExtractValueInst>(Val);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
index 36a0ddf67b19..4bb50a285497 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -27,10 +27,7 @@
//===----------------------------------------------------------------------===//
#include "SpillPlacement.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -39,7 +36,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/BlockFrequency.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
index 8dec620536a7..a6a3149ae25b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
@@ -12,28 +12,18 @@
//===----------------------------------------------------------------------===//
#include "SplitKit.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalCalc.h"
-#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -41,10 +31,8 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BlockFrequency.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -53,7 +41,6 @@
#include <iterator>
#include <limits>
#include <tuple>
-#include <utility>
using namespace llvm;
@@ -181,7 +168,7 @@ void SplitAnalysis::analyzeUses() {
// Get use slots form the use-def chain.
const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg))
+ for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg()))
if (!MO.isUndef())
UseSlots.push_back(LIS.getInstructionIndex(*MO.getParent()).getRegSlot());
@@ -346,7 +333,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
}
bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
- unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+ unsigned OrigReg = VRM.getOriginal(CurLI->reg());
const LiveInterval &Orig = LIS.getInterval(OrigReg);
assert(!Orig.empty() && "Splitting empty interval?");
LiveInterval::const_iterator I = Orig.find(Idx);
@@ -412,10 +399,18 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const {
}
#endif
+LiveInterval::SubRange &SplitEditor::getSubRangeForMaskExact(LaneBitmask LM,
+ LiveInterval &LI) {
+ for (LiveInterval::SubRange &S : LI.subranges())
+ if (S.LaneMask == LM)
+ return S;
+ llvm_unreachable("SubRange for this mask not found");
+}
+
LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM,
LiveInterval &LI) {
for (LiveInterval::SubRange &S : LI.subranges())
- if (S.LaneMask == LM)
+ if ((S.LaneMask & LM) == LM)
return S;
llvm_unreachable("SubRange for this mask not found");
}
@@ -446,7 +441,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
LaneBitmask LM;
for (const MachineOperand &DefOp : DefMI->defs()) {
Register R = DefOp.getReg();
- if (R != LI.reg)
+ if (R != LI.reg())
continue;
if (unsigned SR = DefOp.getSubReg())
LM |= TRI.getSubRegIndexLaneMask(SR);
@@ -517,7 +512,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) {
VFP = ValueForcePair(nullptr, true);
}
-SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
@@ -543,7 +538,7 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
return Def;
}
-SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
+SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
LaneBitmask LaneMask, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
@@ -649,7 +644,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
- unsigned Reg = LI->reg;
+ Register Reg = LI->reg();
bool DidRemat = false;
if (OrigVNI) {
LiveRangeEdit::Remat RM(ParentVNI);
@@ -662,16 +657,25 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
}
if (!DidRemat) {
LaneBitmask LaneMask;
- if (LI->hasSubRanges()) {
+ if (OrigLI.hasSubRanges()) {
LaneMask = LaneBitmask::getNone();
- for (LiveInterval::SubRange &S : LI->subranges())
- LaneMask |= S.LaneMask;
+ for (LiveInterval::SubRange &S : OrigLI.subranges()) {
+ if (S.liveAt(UseIdx))
+ LaneMask |= S.LaneMask;
+ }
} else {
LaneMask = LaneBitmask::getAll();
}
- ++NumCopies;
- Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
+ if (LaneMask.none()) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::IMPLICIT_DEF);
+ MachineInstr *ImplicitDef = BuildMI(MBB, I, DebugLoc(), Desc, Reg);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ Def = Indexes.insertMachineInstrInMaps(*ImplicitDef, Late).getRegSlot();
+ } else {
+ ++NumCopies;
+ Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
+ }
}
// Define the value in Reg.
@@ -994,9 +998,7 @@ void SplitEditor::computeRedundantBackCopies(
}
if (!DominatedVNIs.empty()) {
forceRecompute(0, *ParentVNI);
- for (auto VNI : DominatedVNIs) {
- BackCopies.push_back(VNI);
- }
+ append_range(BackCopies, DominatedVNIs);
DominatedVNIs.clear();
}
}
@@ -1257,8 +1259,8 @@ void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC,
LiveInterval &PLI = Edit->getParent();
// Need the cast because the inputs to ?: would otherwise be deemed
// "incompatible": SubRange vs LiveInterval.
- LiveRange &PSR = !LM.all() ? getSubRangeForMask(LM, PLI)
- : static_cast<LiveRange&>(PLI);
+ LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI)
+ : static_cast<LiveRange &>(PLI);
if (PSR.liveAt(LastUse))
LIC.extend(LR, End, /*PhysReg=*/0, Undefs);
}
@@ -1293,7 +1295,7 @@ void SplitEditor::extendPHIKillRanges() {
continue;
unsigned RegIdx = RegAssign.lookup(V->def);
LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
- LiveInterval::SubRange &S = getSubRangeForMask(PS.LaneMask, LI);
+ LiveInterval::SubRange &S = getSubRangeForMaskExact(PS.LaneMask, LI);
if (removeDeadSegment(V->def, S))
continue;
@@ -1342,7 +1344,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// Rewrite to the mapped register at Idx.
unsigned RegIdx = RegAssign.lookup(Idx);
LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
- MO.setReg(LI.reg);
+ MO.setReg(LI.reg());
LLVM_DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent())
<< '\t' << Idx << ':' << RegIdx << '\t' << *MI);
@@ -1402,7 +1404,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
}
}
- for (unsigned R : *Edit) {
+ for (Register R : *Edit) {
LiveInterval &LI = LIS.getInterval(R);
if (!LI.hasSubRanges())
continue;
@@ -1424,7 +1426,7 @@ void SplitEditor::deleteRematVictims() {
continue;
MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
assert(MI && "Missing instruction for dead def");
- MI->addRegisterDead(LI->reg, &TRI);
+ MI->addRegisterDead(LI->reg(), &TRI);
if (!MI->allDefsAreDead())
continue;
@@ -1521,7 +1523,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
deleteRematVictims();
// Get rid of unused values and set phi-kill flags.
- for (unsigned Reg : *Edit) {
+ for (Register Reg : *Edit) {
LiveInterval &LI = LIS.getInterval(Reg);
LI.removeEmptySubRanges();
LI.RenumberValues();
@@ -1538,13 +1540,13 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
ConnectedVNInfoEqClasses ConEQ(LIS);
for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
// Don't use iterators, they are invalidated by create() below.
- unsigned VReg = Edit->get(i);
+ Register VReg = Edit->get(i);
LiveInterval &LI = LIS.getInterval(VReg);
SmallVector<LiveInterval*, 8> SplitLIs;
LIS.splitSeparateComponents(LI, SplitLIs);
- unsigned Original = VRM.getOriginal(VReg);
+ Register Original = VRM.getOriginal(VReg);
for (LiveInterval *SplitLI : SplitLIs)
- VRM.setIsSplitFromReg(SplitLI->reg, Original);
+ VRM.setIsSplitFromReg(SplitLI->reg(), Original);
// The new intervals all map back to i.
if (LRMap)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
index 3ab5f2585f34..a94518f5a4fc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
@@ -345,10 +345,17 @@ private:
return LICalc[SpillMode != SM_Partition && RegIdx != 0];
}
- /// Find a subrange corresponding to the lane mask @p LM in the live
+ /// Find a subrange corresponding to the exact lane mask @p LM in the live
/// interval @p LI. The interval @p LI is assumed to contain such a subrange.
/// This function is used to find corresponding subranges between the
/// original interval and the new intervals.
+ LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
+ LiveInterval &LI);
+
+ /// Find a subrange corresponding to the lane mask @p LM, or a superset of it,
+ /// in the live interval @p LI. The interval @p LI is assumed to contain such
+ /// a subrange. This function is used to find corresponding subranges between
+ /// the original interval and the new intervals.
LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI);
/// Add a segment to the interval LI for the value number VNI. If LI has
@@ -432,11 +439,11 @@ private:
/// Add a copy instruction copying \p FromReg to \p ToReg before
/// \p InsertBefore. This can be invoked with a \p LaneMask which may make it
/// necessary to construct a sequence of copies to cover it exactly.
- SlotIndex buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LaneMask,
+ SlotIndex buildCopy(Register FromReg, Register ToReg, LaneBitmask LaneMask,
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
bool Late, unsigned RegIdx);
- SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+ SlotIndex buildSingleSubRegCopy(Register FromReg, Register ToReg,
MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
index d720d93c306d..af58204f6db5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
@@ -373,6 +373,36 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
// before visiting the memcpy block (which will contain the lifetime start
// for "b" then it will appear that 'b' has a degenerate lifetime.
//
+// Handle Windows Exception with LifetimeStartOnFirstUse:
+// -----------------
+//
+// There was a bug for using LifetimeStartOnFirstUse in win32.
+// class Type1 {
+// ...
+// ~Type1(){ write memory;}
+// }
+// ...
+// try{
+// Type1 V
+// ...
+// } catch (Type2 X){
+// ...
+// }
+// For variable X in catch(X), we put point pX=&(&X) into ConservativeSlots
+// to prevent using LifetimeStartOnFirstUse. Because pX may merged with
+// object V which may call destructor after implicitly writing pX. All these
+// are done in C++ EH runtime libs (through CxxThrowException), and can't
+// obviously check it in IR level.
+//
+// The loader of pX, without obvious writing IR, is usually the first LOAD MI
+// in EHPad, Some like:
+// bb.x.catch.i (landing-pad, ehfunclet-entry):
+// ; predecessors: %bb...
+// successors: %bb...
+// %n:gr32 = MOV32rm %stack.pX ...
+// ...
+// The Type2** %stack.pX will only be written in EH runtime libs, so we
+// check the StoreSlots to screen it out.
namespace {
@@ -434,6 +464,9 @@ class StackColoring : public MachineFunctionPass {
/// slots lifetime-start-on-first-use is disabled).
BitVector ConservativeSlots;
+ /// Record the FI slots referenced by a 'may write to memory'.
+ BitVector StoreSlots;
+
/// Number of iterations taken during data flow analysis.
unsigned NumIterations;
@@ -629,10 +662,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
InterestingSlots.resize(NumSlot);
ConservativeSlots.clear();
ConservativeSlots.resize(NumSlot);
+ StoreSlots.clear();
+ StoreSlots.resize(NumSlot);
// number of start and end lifetime ops for each slot
SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
+ SmallVector<int, 8> NumLoadInCatchPad(NumSlot, 0);
// Step 1: collect markers and populate the "InterestingSlots"
// and "ConservativeSlots" sets.
@@ -687,6 +723,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
if (! BetweenStartEnd.test(Slot)) {
ConservativeSlots.set(Slot);
}
+ // Here we check the StoreSlots to screen catch point out. For more
+ // information, please refer "Handle Windows Exception with
+ // LifetimeStartOnFirstUse" at the head of this file.
+ if (MI.mayStore())
+ StoreSlots.set(Slot);
+ if (MF->getWinEHFuncInfo() && MBB->isEHPad() && MI.mayLoad())
+ NumLoadInCatchPad[Slot] += 1;
}
}
}
@@ -697,11 +740,14 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
return 0;
}
- // PR27903: slots with multiple start or end lifetime ops are not
+ // 1) PR27903: slots with multiple start or end lifetime ops are not
// safe to enable for "lifetime-start-on-first-use".
- for (unsigned slot = 0; slot < NumSlot; ++slot)
- if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
+ // 2) And also not safe for variable X in catch(X) in windows.
+ for (unsigned slot = 0; slot < NumSlot; ++slot) {
+ if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1 ||
+ (NumLoadInCatchPad[slot] > 1 && !StoreSlots.test(slot)))
ConservativeSlots.set(slot);
+ }
LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
// Step 2: compute begin/end sets for each block
@@ -1048,7 +1094,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
if (MMO->getAAInfo()) {
if (const Value *MMOV = MMO->getValue()) {
SmallVector<Value *, 4> Objs;
- getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
+ getUnderlyingObjectsForCodeGen(MMOV, Objs);
if (Objs.empty())
MayHaveConflictingAAMD = true;
@@ -1241,7 +1287,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// This is a simple greedy algorithm for merging allocas. First, sort the
// slots, placing the largest slots first. Next, perform an n^2 scan and look
- // for disjoint slots. When you find disjoint slots, merge the samller one
+ // for disjoint slots. When you find disjoint slots, merge the smaller one
// into the bigger one and update the live interval. Remove the small alloca
// and continue.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
index 1e060ecbeb43..faf07e90c39c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
@@ -45,6 +45,14 @@ static cl::opt<int> StackMapVersion(
const char *StackMaps::WSMP = "Stack Maps: ";
+static uint64_t getConstMetaVal(const MachineInstr &MI, unsigned Idx) {
+ assert(MI.getOperand(Idx).isImm() &&
+ MI.getOperand(Idx).getImm() == StackMaps::ConstantOp);
+ const auto &MO = MI.getOperand(Idx + 1);
+ assert(MO.isImm());
+ return MO.getImm();
+}
+
StackMapOpers::StackMapOpers(const MachineInstr *MI)
: MI(MI) {
assert(getVarIdx() <= MI->getNumOperands() &&
@@ -83,11 +91,89 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
return ScratchIdx;
}
+unsigned StatepointOpers::getNumGcMapEntriesIdx() {
+ // Take index of num of allocas and skip all allocas records.
+ unsigned CurIdx = getNumAllocaIdx();
+ unsigned NumAllocas = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ while (NumAllocas--)
+ CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx);
+ return CurIdx + 1; // skip <StackMaps::ConstantOp>
+}
+
+unsigned StatepointOpers::getNumAllocaIdx() {
+ // Take index of num of gc ptrs and skip all gc ptr records.
+ unsigned CurIdx = getNumGCPtrIdx();
+ unsigned NumGCPtrs = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ while (NumGCPtrs--)
+ CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx);
+ return CurIdx + 1; // skip <StackMaps::ConstantOp>
+}
+
+unsigned StatepointOpers::getNumGCPtrIdx() {
+ // Take index of num of deopt args and skip all deopt records.
+ unsigned CurIdx = getNumDeoptArgsIdx();
+ unsigned NumDeoptArgs = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ while (NumDeoptArgs--) {
+ CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx);
+ }
+ return CurIdx + 1; // skip <StackMaps::ConstantOp>
+}
+
+int StatepointOpers::getFirstGCPtrIdx() {
+ unsigned NumGCPtrsIdx = getNumGCPtrIdx();
+ unsigned NumGCPtrs = getConstMetaVal(*MI, NumGCPtrsIdx - 1);
+ if (NumGCPtrs == 0)
+ return -1;
+ ++NumGCPtrsIdx; // skip <num gc ptrs>
+ assert(NumGCPtrsIdx < MI->getNumOperands());
+ return (int)NumGCPtrsIdx;
+}
+
+unsigned StatepointOpers::getGCPointerMap(
+ SmallVectorImpl<std::pair<unsigned, unsigned>> &GCMap) {
+ unsigned CurIdx = getNumGcMapEntriesIdx();
+ unsigned GCMapSize = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ for (unsigned N = 0; N < GCMapSize; ++N) {
+ unsigned B = MI->getOperand(CurIdx++).getImm();
+ unsigned D = MI->getOperand(CurIdx++).getImm();
+ GCMap.push_back(std::make_pair(B, D));
+ }
+
+ return GCMapSize;
+}
+
StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
if (StackMapVersion != 3)
llvm_unreachable("Unsupported stackmap version!");
}
+unsigned StackMaps::getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx) {
+ assert(CurIdx < MI->getNumOperands() && "Bad meta arg index");
+ const auto &MO = MI->getOperand(CurIdx);
+ if (MO.isImm()) {
+ switch (MO.getImm()) {
+ default:
+ llvm_unreachable("Unrecognized operand type.");
+ case StackMaps::DirectMemRefOp:
+ CurIdx += 2;
+ break;
+ case StackMaps::IndirectMemRefOp:
+ CurIdx += 3;
+ break;
+ case StackMaps::ConstantOp:
+ ++CurIdx;
+ break;
+ }
+ }
+ ++CurIdx;
+ assert(CurIdx < MI->getNumOperands() && "points past operand list");
+ return CurIdx;
+}
+
/// Go up the super-register chain until we hit a valid dwarf register number.
static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
int RegNum = TRI->getDwarfRegNum(Reg, false);
@@ -148,6 +234,12 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
if (MOI->isImplicit())
return ++MOI;
+ if (MOI->isUndef()) {
+ // Record `undef` register as constant. Use same value as ISel uses.
+ Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, 0xFEFEFEFE);
+ return ++MOI;
+ }
+
assert(Register::isPhysicalRegister(MOI->getReg()) &&
"Virtreg operands should have been rewritten before now.");
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
@@ -286,14 +378,82 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
}
}
- LiveOuts.erase(
- llvm::remove_if(LiveOuts,
- [](const LiveOutReg &LO) { return LO.Reg == 0; }),
- LiveOuts.end());
+ llvm::erase_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; });
return LiveOuts;
}
+// See statepoint MI format description in StatepointOpers' class comment
+// in include/llvm/CodeGen/StackMaps.h
+void StackMaps::parseStatepointOpers(const MachineInstr &MI,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ LocationVec &Locations,
+ LiveOutVec &LiveOuts) {
+ LLVM_DEBUG(dbgs() << "record statepoint : " << MI << "\n");
+ StatepointOpers SO(&MI);
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // CC
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Flags
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Num Deopts
+
+ // Record Deopt Args.
+ unsigned NumDeoptArgs = Locations.back().Offset;
+ assert(Locations.back().Type == Location::Constant);
+ assert(NumDeoptArgs == SO.getNumDeoptArgs());
+
+ while (NumDeoptArgs--)
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
+
+ // Record gc base/derived pairs
+ assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp);
+ ++MOI;
+ assert(MOI->isImm());
+ unsigned NumGCPointers = MOI->getImm();
+ ++MOI;
+ if (NumGCPointers) {
+ // Map logical index of GC ptr to MI operand index.
+ SmallVector<unsigned, 8> GCPtrIndices;
+ unsigned GCPtrIdx = (unsigned)SO.getFirstGCPtrIdx();
+ assert((int)GCPtrIdx != -1);
+ assert(MOI - MI.operands_begin() == GCPtrIdx + 0LL);
+ while (NumGCPointers--) {
+ GCPtrIndices.push_back(GCPtrIdx);
+ GCPtrIdx = StackMaps::getNextMetaArgIdx(&MI, GCPtrIdx);
+ }
+
+ SmallVector<std::pair<unsigned, unsigned>, 8> GCPairs;
+ unsigned NumGCPairs = SO.getGCPointerMap(GCPairs);
+ (void)NumGCPairs;
+ LLVM_DEBUG(dbgs() << "NumGCPairs = " << NumGCPairs << "\n");
+
+ auto MOB = MI.operands_begin();
+ for (auto &P : GCPairs) {
+ assert(P.first < GCPtrIndices.size() && "base pointer index not found");
+ assert(P.second < GCPtrIndices.size() &&
+ "derived pointer index not found");
+ unsigned BaseIdx = GCPtrIndices[P.first];
+ unsigned DerivedIdx = GCPtrIndices[P.second];
+ LLVM_DEBUG(dbgs() << "Base : " << BaseIdx << " Derived : " << DerivedIdx
+ << "\n");
+ (void)parseOperand(MOB + BaseIdx, MOE, Locations, LiveOuts);
+ (void)parseOperand(MOB + DerivedIdx, MOE, Locations, LiveOuts);
+ }
+
+ MOI = MOB + GCPtrIdx;
+ }
+
+ // Record gc allocas
+ assert(MOI < MOE);
+ assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp);
+ ++MOI;
+ unsigned NumAllocas = MOI->getImm();
+ ++MOI;
+ while (NumAllocas--) {
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
+ assert(MOI < MOE);
+ }
+}
+
void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
@@ -311,9 +471,11 @@ void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
}
// Parse operands.
- while (MOI != MOE) {
- MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
- }
+ if (MI.getOpcode() == TargetOpcode::STATEPOINT)
+ parseStatepointOpers(MI, MOI, MOE, Locations, LiveOuts);
+ else
+ while (MOI != MOE)
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
// Move large constants into the constant pool.
for (auto &Loc : Locations) {
@@ -394,8 +556,6 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
StatepointOpers opers(&MI);
- // Record all the deopt and gc operands (they're contiguous and run from the
- // initial index to the end of the operand list)
const unsigned StartIdx = opers.getVarIdx();
recordStackMapOpers(L, MI, opers.getID(), MI.operands_begin() + StartIdx,
MI.operands_end(), false);
@@ -404,7 +564,7 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) {
/// Emit the stackmap header.
///
/// Header {
-/// uint8 : Stack Map Version (currently 2)
+/// uint8 : Stack Map Version (currently 3)
/// uint8 : Reserved (expected to be 0)
/// uint16 : Reserved (expected to be 0)
/// }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index a343791807e6..10c6dcbdb049 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -170,7 +170,8 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// If this instruction accesses memory make sure it doesn't access beyond
// the bounds of the allocated object.
Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
- if (MemLoc.hasValue() && MemLoc->Size.getValue() > AllocSize)
+ if (MemLoc.hasValue() && MemLoc->Size.hasValue() &&
+ MemLoc->Size.getValue() > AllocSize)
return true;
switch (I->getOpcode()) {
case Instruction::Store:
@@ -191,7 +192,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// Ignore intrinsics that do not become real instructions.
// TODO: Narrow this to intrinsics that have store-like effects.
const auto *CI = cast<CallInst>(I);
- if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
+ if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd())
return true;
break;
}
@@ -251,10 +252,9 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
static const CallInst *findStackProtectorIntrinsic(Function &F) {
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (CI->getCalledFunction() ==
- Intrinsic::getDeclaration(F.getParent(), Intrinsic::stackprotector))
- return CI;
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::stackprotector)
+ return II;
return nullptr;
}
@@ -274,7 +274,6 @@ static const CallInst *findStackProtectorIntrinsic(Function &F) {
bool StackProtector::RequiresStackProtector() {
bool Strong = false;
bool NeedsProtector = false;
- HasPrologue = findStackProtectorIntrinsic(*F);
if (F->hasFnAttribute(Attribute::SafeStack))
return false;
@@ -295,8 +294,6 @@ bool StackProtector::RequiresStackProtector() {
Strong = true; // Use the same heuristic as strong to determine SSPLayout
} else if (F->hasFnAttribute(Attribute::StackProtectStrong))
Strong = true;
- else if (HasPrologue)
- NeedsProtector = true;
else if (!F->hasFnAttribute(Attribute::StackProtect))
return false;
@@ -381,7 +378,10 @@ bool StackProtector::RequiresStackProtector() {
static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
IRBuilder<> &B,
bool *SupportsSelectionDAGSP = nullptr) {
- if (Value *Guard = TLI->getIRStackGuard(B))
+ Value *Guard = TLI->getIRStackGuard(B);
+ auto GuardMode = TLI->getTargetMachine().Options.StackProtectorGuard;
+ if ((GuardMode == llvm::StackProtectorGuards::TLS ||
+ GuardMode == llvm::StackProtectorGuards::None) && Guard)
return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard");
// Use SelectionDAG SSP handling, since there isn't an IR guard.
@@ -470,21 +470,36 @@ bool StackProtector::InsertStackProtectors() {
// instrumentation has already been generated.
HasIRCheck = true;
+ // If we're instrumenting a block with a musttail call, the check has to be
+ // inserted before the call rather than between it and the return. The
+ // verifier guarantees that a musttail call is either directly before the
+ // return or with a single correct bitcast of the return value in between so
+ // we don't need to worry about many situations here.
+ Instruction *CheckLoc = RI;
+ Instruction *Prev = RI->getPrevNonDebugInstruction();
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
+ CheckLoc = Prev;
+ else if (Prev) {
+ Prev = Prev->getPrevNonDebugInstruction();
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
+ CheckLoc = Prev;
+ }
+
// Generate epilogue instrumentation. The epilogue intrumentation can be
// function-based or inlined depending on which mechanism the target is
// providing.
if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
// Generate the function-based epilogue instrumentation.
// The target provides a guard check function, generate a call to it.
- IRBuilder<> B(RI);
+ IRBuilder<> B(CheckLoc);
LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
CallInst *Call = B.CreateCall(GuardCheck, {Guard});
Call->setAttributes(GuardCheck->getAttributes());
Call->setCallingConv(GuardCheck->getCallingConv());
} else {
// Generate the epilogue with inline instrumentation.
- // If we do not support SelectionDAG based tail calls, generate IR level
- // tail calls.
+ // If we do not support SelectionDAG based calls, generate IR level
+ // calls.
//
// For each block with a return instruction, convert this:
//
@@ -514,7 +529,8 @@ bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = CreateFailBB();
// Split the basic block before the return instruction.
- BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
+ BasicBlock *NewBB =
+ BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return");
// Update the dominator tree if we need to.
if (DT && DT->isReachableFromEntry(BB)) {
@@ -556,7 +572,9 @@ BasicBlock *StackProtector::CreateFailBB() {
LLVMContext &Context = F->getContext();
BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F);
IRBuilder<> B(FailBB);
- B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram()));
+ if (F->getSubprogram())
+ B.SetCurrentDebugLocation(
+ DILocation::get(Context, 0, 0, F->getSubprogram()));
if (Trip.isOSOpenBSD()) {
FunctionCallee StackChkFail = M->getOrInsertFunction(
"__stack_smash_handler", Type::getVoidTy(Context),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index 3cc5d30ebad7..a6f8974f3343 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -145,7 +145,7 @@ namespace {
// their weight.
struct IntervalSorter {
bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
- return LHS->weight > RHS->weight;
+ return LHS->weight() > RHS->weight();
}
};
@@ -174,7 +174,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
continue;
LiveInterval &li = LS->getInterval(FI);
if (!MI.isDebugValue())
- li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI);
+ li.incrementWeight(
+ LiveIntervals::getSpillWeight(false, true, MBFI, MI));
}
for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(),
EE = MI.memoperands_end();
@@ -222,7 +223,7 @@ void StackSlotColoring::InitializeSlots() {
for (auto *I : Intervals) {
LiveInterval &li = I->second;
LLVM_DEBUG(li.dump());
- int FI = Register::stackSlot2Index(li.reg);
+ int FI = Register::stackSlot2Index(li.reg());
if (MFI->isDeadObjectIndex(FI))
continue;
@@ -269,7 +270,7 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
int StackSlotColoring::ColorSlot(LiveInterval *li) {
int Color = -1;
bool Share = false;
- int FI = Register::stackSlot2Index(li->reg);
+ int FI = Register::stackSlot2Index(li->reg());
uint8_t StackID = MFI->getStackID(FI);
if (!DisableSharing) {
@@ -331,12 +332,12 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
bool Changed = false;
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = Register::stackSlot2Index(li->reg);
+ int SS = Register::stackSlot2Index(li->reg());
int NewSS = ColorSlot(li);
assert(NewSS >= 0 && "Stack coloring failed?");
SlotMapping[SS] = NewSS;
RevMap[NewSS].push_back(SS);
- SlotWeights[NewSS] += li->weight;
+ SlotWeights[NewSS] += li->weight();
UsedColors.set(NewSS);
Changed |= (SS != NewSS);
}
@@ -344,8 +345,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = Register::stackSlot2Index(li->reg);
- li->weight = SlotWeights[SS];
+ int SS = Register::stackSlot2Index(li->reg());
+ li->setWeight(SlotWeights[SS]);
}
// Sort them by new weight.
llvm::stable_sort(SSIntervals, IntervalSorter());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
index dd0b9d4c2e48..4408011c95c0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -202,8 +202,8 @@ void SwiftErrorValueTracking::propagateVRegs() {
// downward defs.
bool needPHI =
VRegs.size() >= 1 &&
- std::find_if(
- VRegs.begin(), VRegs.end(),
+ llvm::find_if(
+ VRegs,
[&](const std::pair<const MachineBasicBlock *, Register> &V)
-> bool { return V.second != VRegs[0].second; }) !=
VRegs.end();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 078c9691f8dc..dfcec32d9537 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -11,8 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index f9773f74a7bd..575bf555c489 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -720,8 +720,7 @@ bool TailDuplicator::duplicateSimpleBB(
SmallVectorImpl<MachineInstr *> &Copies) {
SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
TailBB->succ_end());
- SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
- TailBB->pred_end());
+ SmallVector<MachineBasicBlock *, 8> Preds(TailBB->predecessors());
bool Changed = false;
for (MachineBasicBlock *PredBB : Preds) {
if (PredBB->hasEHPadSuccessor() || PredBB->mayHaveInlineAsmBr())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f8b482c04a58..b0594ec086b2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -41,9 +41,9 @@ bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const
/// frame of the specified index, along with the frame register used
/// (in output arg FrameReg). This is the default implementation which
/// is overridden for some targets.
-int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
+StackOffset
+TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
@@ -52,8 +52,9 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
// something different.
FrameReg = RI->getFrameRegister(MF);
- return MFI.getObjectOffset(FI) + MFI.getStackSize() -
- getOffsetOfLocalArea() + MFI.getOffsetAdjustment();
+ return StackOffset::getFixed(MFI.getObjectOffset(FI) + MFI.getStackSize() -
+ getOffsetOfLocalArea() +
+ MFI.getOffsetAdjustment());
}
bool TargetFrameLowering::needsFrameIndexResolution(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 24f3f96d0b1d..165860ef1aa8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -69,6 +69,15 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
llvm_unreachable("Target didn't implement insertNoop!");
}
+/// insertNoops - Insert noops into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned Quantity) const {
+ for (unsigned i = 0; i < Quantity; ++i)
+ insertNoop(MBB, MI);
+}
+
static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
return strncmp(Str, MAI.getCommentString().data(),
MAI.getCommentString().size()) == 0;
@@ -471,6 +480,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops, int FrameIndex,
const TargetInstrInfo &TII) {
unsigned StartIdx = 0;
+ unsigned NumDefs = 0;
switch (MI.getOpcode()) {
case TargetOpcode::STACKMAP: {
// StackMapLiveValues are foldable
@@ -486,16 +496,25 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
case TargetOpcode::STATEPOINT: {
// For statepoints, fold deopt and gc arguments, but not call arguments.
StartIdx = StatepointOpers(&MI).getVarIdx();
+ NumDefs = MI.getNumDefs();
break;
}
default:
llvm_unreachable("unexpected stackmap opcode");
}
+ unsigned DefToFoldIdx = MI.getNumOperands();
+
// Return false if any operands requested for folding are not foldable (not
// part of the stackmap's live values).
for (unsigned Op : Ops) {
- if (Op < StartIdx)
+ if (Op < NumDefs) {
+ assert(DefToFoldIdx == MI.getNumOperands() && "Folding multiple defs");
+ DefToFoldIdx = Op;
+ } else if (Op < StartIdx) {
+ return nullptr;
+ }
+ if (MI.getOperand(Op).isTied())
return nullptr;
}
@@ -505,11 +524,16 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
// No need to fold return, the meta data, and function arguments
for (unsigned i = 0; i < StartIdx; ++i)
- MIB.add(MI.getOperand(i));
+ if (i != DefToFoldIdx)
+ MIB.add(MI.getOperand(i));
- for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
+ for (unsigned i = StartIdx, e = MI.getNumOperands(); i < e; ++i) {
MachineOperand &MO = MI.getOperand(i);
+ unsigned TiedTo = e;
+ (void)MI.isRegTiedToDefOperand(i, &TiedTo);
+
if (is_contained(Ops, i)) {
+ assert(TiedTo == e && "Cannot fold tied operands");
unsigned SpillSize;
unsigned SpillOffset;
// Compute the spill slot size and offset.
@@ -523,9 +547,15 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
MIB.addImm(SpillSize);
MIB.addFrameIndex(FrameIndex);
MIB.addImm(SpillOffset);
- }
- else
+ } else {
MIB.add(MO);
+ if (TiedTo < e) {
+ assert(TiedTo < NumDefs && "Bad tied operand");
+ if (TiedTo > DefToFoldIdx)
+ --TiedTo;
+ NewMI->tieOperands(TiedTo, NewMI->getNumOperands() - 1);
+ }
+ }
}
return NewMI;
}
@@ -748,8 +778,8 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
// instruction is known to not increase the critical path, then don't match
// that pattern.
bool TargetInstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
bool Commute;
if (isReassociationCandidate(Root, Commute)) {
// We found a sequence of instructions that may be suitable for a
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 42c1fa8af0e6..28c8bd0a7ded 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -135,23 +135,28 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
// For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
- if (TT.getArch() == Triple::ppc || TT.isPPC64()) {
+ if (TT.isPPC()) {
setLibcallName(RTLIB::ADD_F128, "__addkf3");
setLibcallName(RTLIB::SUB_F128, "__subkf3");
setLibcallName(RTLIB::MUL_F128, "__mulkf3");
setLibcallName(RTLIB::DIV_F128, "__divkf3");
+ setLibcallName(RTLIB::POWI_F128, "__powikf2");
setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
+ setLibcallName(RTLIB::FPTOSINT_F128_I128, "__fixkfti");
setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I128, "__fixunskfti");
setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F128, "__floattikf");
setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
+ setLibcallName(RTLIB::UINTTOFP_I128_F128, "__floatuntikf");
setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
setLibcallName(RTLIB::UNE_F128, "__nekf2");
setLibcallName(RTLIB::OGE_F128, "__gekf2");
@@ -224,6 +229,10 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f16) {
if (RetVT == MVT::f32)
return FPEXT_F16_F32;
+ if (RetVT == MVT::f64)
+ return FPEXT_F16_F64;
+ if (RetVT == MVT::f128)
+ return FPEXT_F16_F128;
} else if (OpVT == MVT::f32) {
if (RetVT == MVT::f64)
return FPEXT_F32_F64;
@@ -285,7 +294,14 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F16_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F16_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F16_I128;
+ } else if (OpVT == MVT::f32) {
if (RetVT == MVT::i32)
return FPTOSINT_F32_I32;
if (RetVT == MVT::i64)
@@ -327,7 +343,14 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F16_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F16_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F16_I128;
+ } else if (OpVT == MVT::f32) {
if (RetVT == MVT::i32)
return FPTOUINT_F32_I32;
if (RetVT == MVT::i64)
@@ -370,6 +393,8 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f16)
+ return SINTTOFP_I32_F16;
if (RetVT == MVT::f32)
return SINTTOFP_I32_F32;
if (RetVT == MVT::f64)
@@ -381,6 +406,8 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::ppcf128)
return SINTTOFP_I32_PPCF128;
} else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f16)
+ return SINTTOFP_I64_F16;
if (RetVT == MVT::f32)
return SINTTOFP_I64_F32;
if (RetVT == MVT::f64)
@@ -392,6 +419,8 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::ppcf128)
return SINTTOFP_I64_PPCF128;
} else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f16)
+ return SINTTOFP_I128_F16;
if (RetVT == MVT::f32)
return SINTTOFP_I128_F32;
if (RetVT == MVT::f64)
@@ -410,6 +439,8 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f16)
+ return UINTTOFP_I32_F16;
if (RetVT == MVT::f32)
return UINTTOFP_I32_F32;
if (RetVT == MVT::f64)
@@ -421,6 +452,8 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::ppcf128)
return UINTTOFP_I32_PPCF128;
} else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f16)
+ return UINTTOFP_I64_F16;
if (RetVT == MVT::f32)
return UINTTOFP_I64_F32;
if (RetVT == MVT::f64)
@@ -432,6 +465,8 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::ppcf128)
return UINTTOFP_I64_PPCF128;
} else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f16)
+ return UINTTOFP_I128_F16;
if (RetVT == MVT::f32)
return UINTTOFP_I128_F32;
if (RetVT == MVT::f64)
@@ -446,6 +481,83 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
return UNKNOWN_LIBCALL;
}
+RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
+ MVT VT) {
+ unsigned ModeN, ModelN;
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ ModeN = 0;
+ break;
+ case MVT::i16:
+ ModeN = 1;
+ break;
+ case MVT::i32:
+ ModeN = 2;
+ break;
+ case MVT::i64:
+ ModeN = 3;
+ break;
+ case MVT::i128:
+ ModeN = 4;
+ break;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+
+ switch (Order) {
+ case AtomicOrdering::Monotonic:
+ ModelN = 0;
+ break;
+ case AtomicOrdering::Acquire:
+ ModelN = 1;
+ break;
+ case AtomicOrdering::Release:
+ ModelN = 2;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ ModelN = 3;
+ break;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+
+#define LCALLS(A, B) \
+ { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
+#define LCALL5(A) \
+ LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
+ switch (Opc) {
+ case ISD::ATOMIC_CMP_SWAP: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_SWAP: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_ADD: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_OR: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_CLR: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_XOR: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)};
+ return LC[ModeN][ModelN];
+ }
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+#undef LCALLS
+#undef LCALL5
+}
+
RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
#define OP_TO_LIBCALL(Name, Enum) \
case Name: \
@@ -615,7 +727,7 @@ void TargetLoweringBase::initActions() {
std::end(TargetDAGCombineArray), 0);
for (MVT VT : MVT::fp_valuetypes()) {
- MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits().getFixedSize());
+ MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits());
if (IntVT.isValid()) {
setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
@@ -657,6 +769,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::UADDSAT, VT, Expand);
setOperationAction(ISD::SSUBSAT, VT, Expand);
setOperationAction(ISD::USUBSAT, VT, Expand);
+ setOperationAction(ISD::SSHLSAT, VT, Expand);
+ setOperationAction(ISD::USHLSAT, VT, Expand);
setOperationAction(ISD::SMULFIX, VT, Expand);
setOperationAction(ISD::SMULFIXSAT, VT, Expand);
setOperationAction(ISD::UMULFIX, VT, Expand);
@@ -665,6 +779,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SDIVFIXSAT, VT, Expand);
setOperationAction(ISD::UDIVFIX, VT, Expand);
setOperationAction(ISD::UDIVFIXSAT, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -678,6 +794,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ADDCARRY, VT, Expand);
setOperationAction(ISD::SUBCARRY, VT, Expand);
setOperationAction(ISD::SETCCCARRY, VT, Expand);
+ setOperationAction(ISD::SADDO_CARRY, VT, Expand);
+ setOperationAction(ISD::SSUBO_CARRY, VT, Expand);
// ADDC/ADDE/SUBC/SUBE default to expand.
setOperationAction(ISD::ADDC, VT, Expand);
@@ -690,6 +808,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::BITREVERSE, VT, Expand);
+ setOperationAction(ISD::PARITY, VT, Expand);
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
@@ -728,6 +847,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -772,6 +893,8 @@ void TargetLoweringBase::initActions() {
// On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
// here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
}
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
@@ -801,6 +924,11 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
}
}
+bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
+}
+
void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
// If the command-line option was specified, ignore this request.
if (!JumpIsExpensiveOverride.getNumOccurrences())
@@ -823,9 +951,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
"Promote may not follow Expand or Promote");
if (LA == TypeSplitVector)
- return LegalizeKind(LA,
- EVT::getVectorVT(Context, SVT.getVectorElementType(),
- SVT.getVectorElementCount() / 2));
+ return LegalizeKind(LA, EVT(SVT).getHalfNumVectorElementsVT(Context));
if (LA == TypeScalarizeVector)
return LegalizeKind(LA, SVT.getVectorElementType());
return LegalizeKind(LA, NVT);
@@ -856,10 +982,10 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
EVT EltVT = VT.getVectorElementType();
// Vectors with only one element are always scalarized.
- if (NumElts == 1)
+ if (NumElts.isScalar())
return LegalizeKind(TypeScalarizeVector, EltVT);
- if (VT.getVectorElementCount() == ElementCount(1, true))
+ if (VT.getVectorElementCount() == ElementCount::getScalable(1))
report_fatal_error("Cannot legalize this vector");
// Try to widen vector elements until the element type is a power of two and
@@ -869,7 +995,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// Vectors with a number of elements that is not a power of two are always
// widened, for example <3 x i8> -> <4 x i8>.
if (!VT.isPow2VectorType()) {
- NumElts = NumElts.NextPowerOf2();
+ NumElts = NumElts.coefficientNextPowerOf2();
EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
return LegalizeKind(TypeWidenVector, NVT);
}
@@ -881,7 +1007,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// <4 x i140> -> <2 x i140>
if (LK.first == TypeExpandInteger)
return LegalizeKind(TypeSplitVector,
- EVT::getVectorVT(Context, EltVT, NumElts / 2));
+ VT.getHalfNumVectorElementsVT(Context));
// Promote the integer element types until a legal vector type is found
// or until the element integer type is too big. If a legal type was not
@@ -918,7 +1044,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// If there is no wider legal type, split the vector.
while (true) {
// Round up to the next power of 2.
- NumElts = NumElts.NextPowerOf2();
+ NumElts = NumElts.coefficientNextPowerOf2();
// If there is no simple vector type with this many elements then there
// cannot be a larger legal vector type. Note that this assumes that
@@ -941,7 +1067,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
}
// Vectors with illegal element types are expanded.
- EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorElementCount() / 2);
+ EVT NVT = EVT::getVectorVT(Context, EltVT,
+ VT.getVectorElementCount().divideCoefficientBy(2));
return LegalizeKind(TypeSplitVector, NVT);
}
@@ -957,23 +1084,24 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
// Scalable vectors cannot be scalarized, so splitting or widening is
// required.
- if (VT.isScalableVector() && !isPowerOf2_32(EC.Min))
+ if (VT.isScalableVector() && !isPowerOf2_32(EC.getKnownMinValue()))
llvm_unreachable(
"Splitting or widening of non-power-of-2 MVTs is not implemented.");
// FIXME: We don't support non-power-of-2-sized vectors for now.
// Ideally we could break down into LHS/RHS like LegalizeDAG does.
- if (!isPowerOf2_32(EC.Min)) {
+ if (!isPowerOf2_32(EC.getKnownMinValue())) {
// Split EC to unit size (scalable property is preserved).
- NumVectorRegs = EC.Min;
- EC = EC / NumVectorRegs;
+ NumVectorRegs = EC.getKnownMinValue();
+ EC = ElementCount::getFixed(1);
}
// Divide the input until we get to a supported size. This will
// always end up with an EC that represent a scalar or a scalable
// scalar.
- while (EC.Min > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) {
- EC.Min >>= 1;
+ while (EC.getKnownMinValue() > 1 &&
+ !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) {
+ EC = EC.divideCoefficientBy(2);
NumVectorRegs <<= 1;
}
@@ -984,7 +1112,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
NewVT = EltTy;
IntermediateVT = NewVT;
- unsigned LaneSizeInBits = NewVT.getScalarSizeInBits().getFixedSize();
+ unsigned LaneSizeInBits = NewVT.getScalarSizeInBits();
// Convert sizes such as i33 to i64.
if (!isPowerOf2_32(LaneSizeInBits))
@@ -993,8 +1121,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
MVT DestVT = TLI->getRegisterType(NewVT);
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
- return NumVectorRegs *
- (LaneSizeInBits / DestVT.getScalarSizeInBits().getFixedSize());
+ return NumVectorRegs * (LaneSizeInBits / DestVT.getScalarSizeInBits());
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
@@ -1041,9 +1168,19 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// Inherit previous memory operands.
MIB.cloneMemRefs(*MI);
- for (auto &MO : MI->operands()) {
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
if (!MO.isFI()) {
+ // Index of Def operand this Use it tied to.
+ // Since Defs are coming before Uses, if Use is tied, then
+ // index of Def must be smaller that index of that Use.
+ // Also, Defs preserve their position in new MI.
+ unsigned TiedTo = i;
+ if (MO.isReg() && MO.isTied())
+ TiedTo = MI->findTiedOperandIdx(i);
MIB.add(MO);
+ if (TiedTo < i)
+ MIB->tieOperands(TiedTo, MIB->getNumOperands() - 1);
continue;
}
@@ -1090,36 +1227,6 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
return MBB;
}
-MachineBasicBlock *
-TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
- assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL &&
- "Called emitXRayCustomEvent on the wrong MI!");
- auto &MF = *MI.getMF();
- auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
- for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
- MIB.add(MI.getOperand(OpIdx));
-
- MBB->insert(MachineBasicBlock::iterator(MI), MIB);
- MI.eraseFromParent();
- return MBB;
-}
-
-MachineBasicBlock *
-TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
- assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL &&
- "Called emitXRayTypedEvent on the wrong MI!");
- auto &MF = *MI.getMF();
- auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
- for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
- MIB.add(MI.getOperand(OpIdx));
-
- MBB->insert(MachineBasicBlock::iterator(MI), MIB);
- MI.eraseFromParent();
- return MBB;
-}
-
/// findRepresentativeClass - Return the largest legal super-reg register class
/// of the register class for the specified type and its associated "cost".
// This function is in TargetLowering because it uses RegClassForVT which would
@@ -1282,7 +1389,7 @@ void TargetLoweringBase::computeRegisterProperties(
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
- if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
+ if (SVT.getScalarSizeInBits() > EltVT.getFixedSizeInBits() &&
SVT.getVectorElementCount() == EC && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
@@ -1298,13 +1405,15 @@ void TargetLoweringBase::computeRegisterProperties(
}
case TypeWidenVector:
- if (isPowerOf2_32(EC.Min)) {
+ if (isPowerOf2_32(EC.getKnownMinValue())) {
// Try to widen the vector.
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
if (SVT.getVectorElementType() == EltVT &&
SVT.isScalableVector() == IsScalable &&
- SVT.getVectorElementCount().Min > EC.Min && isTypeLegal(SVT)) {
+ SVT.getVectorElementCount().getKnownMinValue() >
+ EC.getKnownMinValue() &&
+ isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1348,10 +1457,10 @@ void TargetLoweringBase::computeRegisterProperties(
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
else if (PreferredAction == TypeSplitVector)
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
- else if (EC.Min > 1)
+ else if (EC.getKnownMinValue() > 1)
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
else
- ValueTypeActions.setTypeAction(VT, EC.Scalable
+ ValueTypeActions.setTypeAction(VT, EC.isScalable()
? TypeScalarizeScalableVector
: TypeScalarizeVector);
} else {
@@ -1409,7 +1518,8 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
// This handles things like <2 x float> -> <4 x float> and
// <4 x i1> -> <4 x i32>.
LegalizeTypeAction TA = getTypeAction(Context, VT);
- if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+ if (EltCnt.getKnownMinValue() != 1 &&
+ (TA == TypeWidenVector || TA == TypePromoteInteger)) {
EVT RegisterEVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterEVT)) {
IntermediateVT = RegisterEVT;
@@ -1426,7 +1536,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
// Scalable vectors cannot be scalarized, so handle the legalisation of the
// types like done elsewhere in SelectionDAG.
- if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) {
+ if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) {
LegalizeKind LK;
EVT PartVT = VT;
do {
@@ -1435,15 +1545,15 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
PartVT = LK.second;
} while (LK.first != TypeLegal);
- NumIntermediates =
- VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min;
+ NumIntermediates = VT.getVectorElementCount().getKnownMinValue() /
+ PartVT.getVectorElementCount().getKnownMinValue();
// FIXME: This code needs to be extended to handle more complex vector
// breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
// supported cases are vectors that are broken down into equal parts
// such as nxv6i64 -> 3 x nxv2i64.
- assert(NumIntermediates * PartVT.getVectorElementCount().Min ==
- VT.getVectorElementCount().Min &&
+ assert((PartVT.getVectorElementCount() * NumIntermediates) ==
+ VT.getVectorElementCount() &&
"Expected an integer multiple of PartVT");
IntermediateVT = PartVT;
RegisterVT = getRegisterType(Context, IntermediateVT);
@@ -1452,16 +1562,16 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
// FIXME: We don't support non-power-of-2-sized vectors for now. Ideally
// we could break down into LHS/RHS like LegalizeDAG does.
- if (!isPowerOf2_32(EltCnt.Min)) {
- NumVectorRegs = EltCnt.Min;
- EltCnt.Min = 1;
+ if (!isPowerOf2_32(EltCnt.getKnownMinValue())) {
+ NumVectorRegs = EltCnt.getKnownMinValue();
+ EltCnt = ElementCount::getFixed(1);
}
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
- while (EltCnt.Min > 1 &&
+ while (EltCnt.getKnownMinValue() > 1 &&
!isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) {
- EltCnt.Min >>= 1;
+ EltCnt = EltCnt.divideCoefficientBy(2);
NumVectorRegs <<= 1;
}
@@ -1479,7 +1589,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
TypeSize NewVTSize = NewVT.getSizeInBits();
// Convert sizes such as i33 to i64.
if (!isPowerOf2_32(NewVTSize.getKnownMinSize()))
- NewVTSize = NewVTSize.NextPowerOf2();
+ NewVTSize = NewVTSize.coefficientNextPowerOf2();
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
}
@@ -1616,6 +1726,14 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
MMO.getFlags(), Fast);
}
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, LLT Ty,
+ const MachineMemOperand &MMO,
+ bool *Fast) const {
+ return allowsMemoryAccess(Context, DL, getMVTForLLT(Ty), MMO.getAddrSpace(),
+ MMO.getAlign(), MMO.getFlags(), Fast);
+}
+
BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
return BranchProbability(MinPercentageForPredictableBranch, 100);
}
@@ -1838,10 +1956,14 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
// Currently only support "standard" __stack_chk_guard.
// TODO: add LOAD_STACK_GUARD support.
void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
- if (!M.getNamedValue("__stack_chk_guard"))
- new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
- GlobalVariable::ExternalLinkage,
- nullptr, "__stack_chk_guard");
+ if (!M.getNamedValue("__stack_chk_guard")) {
+ auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
+ GlobalVariable::ExternalLinkage, nullptr,
+ "__stack_chk_guard");
+ if (TM.getRelocationModel() == Reloc::Static &&
+ !TM.getTargetTriple().isWindowsGNUEnvironment())
+ GV->setDSOLocal(true);
+ }
}
// Currently only support "standard" __stack_chk_guard.
@@ -1925,7 +2047,7 @@ static bool parseRefinementStep(StringRef In, size_t &Position,
// step parameter.
if (RefStepString.size() == 1) {
char RefStepChar = RefStepString[0];
- if (RefStepChar >= '0' && RefStepChar <= '9') {
+ if (isDigit(RefStepChar)) {
Value = RefStepChar - '0';
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 27bebe503ce6..fe64b38cf0be 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -21,6 +21,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -39,6 +40,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -104,10 +106,14 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
// ELF
//===----------------------------------------------------------------------===//
+TargetLoweringObjectFileELF::TargetLoweringObjectFileELF()
+ : TargetLoweringObjectFile() {
+ SupportDSOLocalEquivalentLowering = true;
+}
+
void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
const TargetMachine &TgtM) {
TargetLoweringObjectFile::Initialize(Ctx, TgtM);
- TM = &TgtM;
CodeModel::Model CM = TgtM.getCodeModel();
InitializeELF(TgtM.Options.UseInitArray);
@@ -122,6 +128,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// Fallthrough if not using EHABI
LLVM_FALLTHROUGH;
case Triple::ppc:
+ case Triple::ppcle:
case Triple::x86:
PersonalityEncoding = isPositionIndependent()
? dwarf::DW_EH_PE_indirect |
@@ -174,11 +181,20 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
if (isPositionIndependent()) {
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
+ // ILP32 uses sdata4 instead of sdata8
+ if (TgtM.getTargetTriple().getEnvironment() == Triple::GNUILP32) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ }
} else {
PersonalityEncoding = dwarf::DW_EH_PE_absptr;
LSDAEncoding = dwarf::DW_EH_PE_absptr;
@@ -310,6 +326,29 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
}
}
+ if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+ // Emit a descriptor for every function including functions that have an
+ // available external linkage. We may not want this for imported functions
+ // that has code in another thinLTO module but we don't have a good way to
+ // tell them apart from inline functions defined in header files. Therefore
+ // we put each descriptor in a separate comdat section and rely on the
+ // linker to deduplicate.
+ for (const auto *Operand : FuncInfo->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ auto *GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0));
+ auto *Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
+ auto *Name = cast<MDString>(MD->getOperand(2));
+ auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection(
+ TM->getFunctionSections() ? Name->getString() : StringRef());
+
+ Streamer.SwitchSection(S);
+ Streamer.emitInt64(GUID->getZExtValue());
+ Streamer.emitInt64(Hash->getZExtValue());
+ Streamer.emitULEB128IntValue(Name->getString().size());
+ Streamer.emitBytes(Name->getString());
+ }
+ }
+
unsigned Version = 0;
unsigned Flags = 0;
StringRef Section;
@@ -324,46 +363,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
Streamer.AddBlankLine();
}
- SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
- M.getModuleFlagsMetadata(ModuleFlags);
-
- MDNode *CFGProfile = nullptr;
-
- for (const auto &MFE : ModuleFlags) {
- StringRef Key = MFE.Key->getString();
- if (Key == "CG Profile") {
- CFGProfile = cast<MDNode>(MFE.Val);
- break;
- }
- }
-
- if (!CFGProfile)
- return;
-
- auto GetSym = [this](const MDOperand &MDO) -> MCSymbol * {
- if (!MDO)
- return nullptr;
- auto V = cast<ValueAsMetadata>(MDO);
- const Function *F = cast<Function>(V->getValue());
- return TM->getSymbol(F);
- };
-
- for (const auto &Edge : CFGProfile->operands()) {
- MDNode *E = cast<MDNode>(Edge);
- const MCSymbol *From = GetSym(E->getOperand(0));
- const MCSymbol *To = GetSym(E->getOperand(1));
- // Skip null functions. This can happen if functions are dead stripped after
- // the CGProfile pass has been run.
- if (!From || !To)
- continue;
- uint64_t Count = cast<ConstantAsMetadata>(E->getOperand(2))
- ->getValue()
- ->getUniqueInteger()
- .getZExtValue();
- Streamer.emitCGProfileEntry(
- MCSymbolRefExpr::create(From, MCSymbolRefExpr::VK_None, C),
- MCSymbolRefExpr::create(To, MCSymbolRefExpr::VK_None, C), Count);
- }
+ emitCGProfileMetadata(Streamer, M);
}
MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
@@ -436,7 +436,8 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF,
/*AddSegmentInfo=*/false) ||
Name == getInstrProfSectionName(IPSK_covfun, Triple::ELF,
- /*AddSegmentInfo=*/false))
+ /*AddSegmentInfo=*/false) ||
+ Name == ".llvmbc" || Name == ".llvmcmd")
return SectionKind::getMetadata();
if (Name.empty() || Name[0] != '.') return K;
@@ -614,7 +615,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
bool HasPrefix = false;
if (const auto *F = dyn_cast<Function>(GO)) {
if (Optional<StringRef> Prefix = F->getSectionPrefix()) {
- Name += *Prefix;
+ raw_svector_ostream(Name) << '.' << *Prefix;
HasPrefix = true;
}
}
@@ -680,11 +681,12 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
// MD_associated in a unique section.
unsigned UniqueID = MCContext::GenericSectionID;
const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM);
- if (LinkedToSym) {
+ if (GO->getMetadata(LLVMContext::MD_associated)) {
UniqueID = NextUniqueID++;
Flags |= ELF::SHF_LINK_ORDER;
} else {
- if (getContext().getAsmInfo()->useIntegratedAssembler()) {
+ if (getContext().getAsmInfo()->useIntegratedAssembler() ||
+ getContext().getAsmInfo()->binutilsIsAtLeast(2, 35)) {
// Symbols must be placed into sections with compatible entry
// sizes. Generate unique sections for symbols that have not
// been assigned to compatible sections.
@@ -735,8 +737,9 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
assert(Section->getLinkedToSymbol() == LinkedToSym &&
"Associated symbol mismatch between sections");
- if (!getContext().getAsmInfo()->useIntegratedAssembler()) {
- // If we are not using the integrated assembler then this symbol might have
+ if (!(getContext().getAsmInfo()->useIntegratedAssembler() ||
+ getContext().getAsmInfo()->binutilsIsAtLeast(2, 35))) {
+ // If we are using GNU as before 2.35, then this symbol might have
// been placed in an incompatible mergeable section. Emit an error if this
// is the case to avoid creating broken output.
if ((Section->getFlags() & ELF::SHF_MERGE) &&
@@ -831,6 +834,43 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
/* AssociatedSymbol */ nullptr);
}
+MCSection *
+TargetLoweringObjectFileELF::getSectionForLSDA(const Function &F,
+ const TargetMachine &TM) const {
+ // If neither COMDAT nor function sections, use the monolithic LSDA section.
+ // Re-use this path if LSDASection is null as in the Arm EHABI.
+ if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections()))
+ return LSDASection;
+
+ const auto *LSDA = cast<MCSectionELF>(LSDASection);
+ unsigned Flags = LSDA->getFlags();
+ StringRef Group;
+ if (F.hasComdat()) {
+ Group = F.getComdat()->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
+ // Append the function name as the suffix like GCC, assuming
+ // -funique-section-names applies to .gcc_except_table sections.
+ if (TM.getUniqueSectionNames())
+ return getContext().getELFSection(LSDA->getName() + "." + F.getName(),
+ LSDA->getType(), Flags, 0, Group,
+ MCSection::NonUniqueID, nullptr);
+
+ // Allocate a unique ID if function sections && (integrated assembler or GNU
+ // as>=2.35). Note we could use SHF_LINK_ORDER to facilitate --gc-sections but
+ // that would require that we know the linker is a modern LLD (12.0 or later).
+ // GNU ld as of 2.35 does not support mixed SHF_LINK_ORDER &
+ // non-SHF_LINK_ORDER components in an output section
+ // https://sourceware.org/bugzilla/show_bug.cgi?id=26256
+ unsigned ID = TM.getFunctionSections() &&
+ getContext().getAsmInfo()->useIntegratedAssembler()
+ ? NextUniqueID++
+ : MCSection::NonUniqueID;
+ return getContext().getELFSection(LSDA->getName(), LSDA->getType(), Flags, 0,
+ Group, ID, nullptr);
+}
+
bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
bool UsesLabelDifference, const Function &F) const {
// We can always create relative relocations, so use another section
@@ -865,14 +905,14 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
assert(MBB.isBeginSection() && "Basic block does not start a section!");
unsigned UniqueID = MCContext::GenericSectionID;
- // For cold sections use the .text.unlikely prefix along with the parent
+ // For cold sections use the .text.split. prefix along with the parent
// function name. All cold blocks for the same function go to the same
// section. Similarly all exception blocks are grouped by symbol name
// under the .text.eh prefix. For regular sections, we either use a unique
// name, or a unique ID for the section.
SmallString<128> Name;
if (MBB.getSectionID() == MBBSectionID::ColdSectionID) {
- Name += ".text.unlikely.";
+ Name += BBSectionsColdTextPrefix;
Name += MBB.getParent()->getName();
} else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) {
Name += ".text.eh.";
@@ -888,7 +928,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
}
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
- std::string GroupName = "";
+ std::string GroupName;
if (F.hasComdat()) {
Flags |= ELF::SHF_GROUP;
GroupName = F.getComdat()->getName().str();
@@ -968,6 +1008,20 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
}
+const MCExpr *TargetLoweringObjectFileELF::lowerDSOLocalEquivalent(
+ const DSOLocalEquivalent *Equiv, const TargetMachine &TM) const {
+ assert(supportDSOLocalEquivalentLowering());
+
+ const auto *GV = Equiv->getGlobalValue();
+
+ // A PLT entry is not needed for dso_local globals.
+ if (GV->isDSOLocal() || GV->isImplicitDSOLocal())
+ return MCSymbolRefExpr::create(TM.getSymbol(GV), getContext());
+
+ return MCSymbolRefExpr::create(TM.getSymbol(GV), PLTRelativeVariantKind,
+ getContext());
+}
+
MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const {
// Use ".GCC.command.line" since this feature is to support clang's
// -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the
@@ -1515,6 +1569,10 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
MCSymbol *Sym = TM.getSymbol(ComdatGV);
StringRef COMDATSymName = Sym->getName();
+ if (const auto *F = dyn_cast<Function>(GO))
+ if (Optional<StringRef> Prefix = F->getSectionPrefix())
+ raw_svector_ostream(Name) << '$' << *Prefix;
+
// Append "$symbol" to the section name *before* IR-level mangling is
// applied when targetting mingw. This is what GCC does, and the ld.bfd
// COFF linker will not properly handle comdats otherwise.
@@ -1590,6 +1648,31 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
Module &M) const {
+ emitLinkerDirectives(Streamer, M);
+
+ unsigned Version = 0;
+ unsigned Flags = 0;
+ StringRef Section;
+
+ GetObjCImageInfo(M, Version, Flags, Section);
+ if (!Section.empty()) {
+ auto &C = getContext();
+ auto *S = C.getCOFFSection(Section,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ Streamer.SwitchSection(S);
+ Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+ Streamer.emitInt32(Version);
+ Streamer.emitInt32(Flags);
+ Streamer.AddBlankLine();
+ }
+
+ emitCGProfileMetadata(Streamer, M);
+}
+
+void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
+ MCStreamer &Streamer, Module &M) const {
if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
// Emit the linker options to the linker .drectve section. According to the
// spec, this section is a space-separated string containing flags for
@@ -1606,28 +1689,51 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
}
}
- unsigned Version = 0;
- unsigned Flags = 0;
- StringRef Section;
-
- GetObjCImageInfo(M, Version, Flags, Section);
- if (Section.empty())
- return;
+ // Emit /EXPORT: flags for each exported global as necessary.
+ std::string Flags;
+ for (const GlobalValue &GV : M.global_values()) {
+ raw_string_ostream OS(Flags);
+ emitLinkerFlagsForGlobalCOFF(OS, &GV, getTargetTriple(), getMangler());
+ OS.flush();
+ if (!Flags.empty()) {
+ Streamer.SwitchSection(getDrectveSection());
+ Streamer.emitBytes(Flags);
+ }
+ Flags.clear();
+ }
- auto &C = getContext();
- auto *S = C.getCOFFSection(
- Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
- Streamer.SwitchSection(S);
- Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
- Streamer.emitInt32(Version);
- Streamer.emitInt32(Flags);
- Streamer.AddBlankLine();
+ // Emit /INCLUDE: flags for each used global as necessary.
+ if (const auto *LU = M.getNamedGlobal("llvm.used")) {
+ assert(LU->hasInitializer() && "expected llvm.used to have an initializer");
+ assert(isa<ArrayType>(LU->getValueType()) &&
+ "expected llvm.used to be an array type");
+ if (const auto *A = cast<ConstantArray>(LU->getInitializer())) {
+ for (const Value *Op : A->operands()) {
+ const auto *GV = cast<GlobalValue>(Op->stripPointerCasts());
+ // Global symbols with internal or private linkage are not visible to
+ // the linker, and thus would cause an error when the linker tried to
+ // preserve the symbol due to the `/include:` directive.
+ if (GV->hasLocalLinkage())
+ continue;
+
+ raw_string_ostream OS(Flags);
+ emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler());
+ OS.flush();
+
+ if (!Flags.empty()) {
+ Streamer.SwitchSection(getDrectveSection());
+ Streamer.emitBytes(Flags);
+ }
+ Flags.clear();
+ }
+ }
+ }
}
void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFile::Initialize(Ctx, TM);
+ this->TM = &TM;
const Triple &T = TM.getTargetTriple();
if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
StaticCtorSection =
@@ -1702,16 +1808,6 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
cast<MCSectionCOFF>(StaticDtorSection));
}
-void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
- raw_ostream &OS, const GlobalValue *GV) const {
- emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler());
-}
-
-void TargetLoweringObjectFileCOFF::emitLinkerFlagsForUsed(
- raw_ostream &OS, const GlobalValue *GV) const {
- emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler());
-}
-
const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
const GlobalValue *LHS, const GlobalValue *RHS,
const TargetMachine &TM) const {
@@ -1882,7 +1978,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
if (const auto *F = dyn_cast<Function>(GO)) {
const auto &OptionalPrefix = F->getSectionPrefix();
if (OptionalPrefix)
- Name += *OptionalPrefix;
+ raw_svector_ostream(Name) << '.' << *OptionalPrefix;
}
if (EmitUniqueSection && UniqueSectionNames) {
@@ -1970,14 +2066,36 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
//===----------------------------------------------------------------------===//
// XCOFF
//===----------------------------------------------------------------------===//
+bool TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(
+ const MachineFunction *MF) {
+ if (!MF->getLandingPads().empty())
+ return true;
+
+ const Function &F = MF->getFunction();
+ if (!F.hasPersonalityFn() || !F.needsUnwindTableEntry())
+ return false;
+
+ const Function *Per =
+ dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ if (isNoOpWithoutInvoke(classifyEHPersonality(Per)))
+ return false;
+
+ return true;
+}
+
+MCSymbol *
+TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) {
+ return MF->getMMI().getContext().getOrCreateSymbol(
+ "__ehinfo." + Twine(MF->getFunctionNumber()));
+}
+
MCSymbol *
TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
const TargetMachine &TM) const {
- if (TM.getDataSections())
- report_fatal_error("XCOFF unique data sections not yet implemented");
-
// We always use a qualname symbol for a GV that represents
// a declaration, a function descriptor, or a common symbol.
+ // If a GV represents a GlobalVariable and -fdata-sections is enabled, we
+ // also return a qualname so that a label symbol could be avoided.
// It is inherently ambiguous when the GO represents the address of a
// function, as the GO could either represent a function descriptor or a
// function entry point. We choose to always return a function descriptor
@@ -1992,21 +2110,34 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
return cast<MCSectionXCOFF>(
getSectionForFunctionDescriptor(cast<Function>(GO), TM))
->getQualNameSymbol();
- if (GOKind.isCommon() || GOKind.isBSSLocal())
+ if ((TM.getDataSections() && !GO->hasSection()) || GOKind.isCommon() ||
+ GOKind.isBSSLocal())
return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM))
->getQualNameSymbol();
}
// For all other cases, fall back to getSymbol to return the unqualified name.
- // This could change for a GV that is a GlobalVariable when we decide to
- // support -fdata-sections since we could avoid having label symbols if the
- // linkage name is applied to the csect symbol.
return nullptr;
}
MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
- report_fatal_error("XCOFF explicit sections not yet implemented.");
+ if (!GO->hasSection())
+ report_fatal_error("#pragma clang section is not yet supported");
+
+ StringRef SectionName = GO->getSection();
+ XCOFF::StorageMappingClass MappingClass;
+ if (Kind.isText())
+ MappingClass = XCOFF::XMC_PR;
+ else if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS())
+ MappingClass = XCOFF::XMC_RW;
+ else if (Kind.isReadOnly())
+ MappingClass = XCOFF::XMC_RO;
+ else
+ report_fatal_error("XCOFF other section types not yet implemented.");
+
+ return getContext().getXCOFFSection(SectionName, MappingClass, XCOFF::XTY_SD,
+ Kind, /* MultiSymbolsAllowed*/ true);
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
@@ -2016,30 +2147,23 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
- XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
// Externals go into a csect of type ER.
return getContext().getXCOFFSection(
Name, isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA, XCOFF::XTY_ER,
- SC, SectionKind::getMetadata());
+ SectionKind::getMetadata());
}
MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
- assert(!TM.getFunctionSections() && !TM.getDataSections() &&
- "XCOFF unique sections not yet implemented.");
-
// Common symbols go into a csect with matching name which will get mapped
// into the .bss section.
if (Kind.isBSSLocal() || Kind.isCommon()) {
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
- XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
return getContext().getXCOFFSection(
Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM,
- SC, Kind, /* BeginSymbolName */ nullptr);
+ Kind);
}
if (Kind.isMergeableCString()) {
@@ -2051,40 +2175,65 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
SmallString<128> Name;
Name = SizeSpec + utostr(Alignment.value());
+ if (TM.getDataSections())
+ getNameWithPrefix(Name, GO, TM);
+
return getContext().getXCOFFSection(
- Name, XCOFF::XMC_RO, XCOFF::XTY_SD,
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO),
- Kind, /* BeginSymbolName */ nullptr);
+ Name, XCOFF::XMC_RO, XCOFF::XTY_SD, Kind,
+ /* MultiSymbolsAllowed*/ !TM.getDataSections());
}
- if (Kind.isText())
+ if (Kind.isText()) {
+ if (TM.getFunctionSections()) {
+ return cast<MCSymbolXCOFF>(getFunctionEntryPointSymbol(GO, TM))
+ ->getRepresentedCsect();
+ }
return TextSection;
+ }
- if (Kind.isData() || Kind.isReadOnlyWithRel())
- // TODO: We may put this under option control, because user may want to
- // have read-only data with relocations placed into a read-only section by
- // the compiler.
- return DataSection;
-
- // Zero initialized data must be emitted to the .data section because external
- // linkage control sections that get mapped to the .bss section will be linked
- // as tentative defintions, which is only appropriate for SectionKind::Common.
- if (Kind.isBSS())
+ // TODO: We may put Kind.isReadOnlyWithRel() under option control, because
+ // user may want to have read-only data with relocations placed into a
+ // read-only section by the compiler.
+ // For BSS kind, zero initialized data must be emitted to the .data section
+ // because external linkage control sections that get mapped to the .bss
+ // section will be linked as tentative defintions, which is only appropriate
+ // for SectionKind::Common.
+ if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS()) {
+ if (TM.getDataSections()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(Name, XCOFF::XMC_RW, XCOFF::XTY_SD,
+ SectionKind::getData());
+ }
return DataSection;
+ }
- if (Kind.isReadOnly())
+ if (Kind.isReadOnly()) {
+ if (TM.getDataSections()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(Name, XCOFF::XMC_RO, XCOFF::XTY_SD,
+ SectionKind::getReadOnly());
+ }
return ReadOnlySection;
+ }
report_fatal_error("XCOFF other section types not yet implemented.");
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable(
const Function &F, const TargetMachine &TM) const {
- assert (!TM.getFunctionSections() && "Unique sections not supported on XCOFF"
- " yet.");
assert (!F.getComdat() && "Comdat not supported on XCOFF.");
- //TODO: Enable emiting jump table to unique sections when we support it.
- return ReadOnlySection;
+
+ if (!TM.getFunctionSections())
+ return ReadOnlySection;
+
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ SmallString<128> NameStr(".rodata.jmp..");
+ getNameWithPrefix(NameStr, &F, TM);
+ return getContext().getXCOFFSection(NameStr, XCOFF::XMC_RO, XCOFF::XTY_SD,
+ SectionKind::getReadOnly());
}
bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
@@ -2104,19 +2253,23 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx,
const TargetMachine &TgtM) {
TargetLoweringObjectFile::Initialize(Ctx, TgtM);
- TTypeEncoding = 0;
+ TTypeEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel |
+ (TgtM.getTargetTriple().isArch32Bit() ? dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_sdata8);
PersonalityEncoding = 0;
LSDAEncoding = 0;
+ CallSiteEncoding = dwarf::DW_EH_PE_udata4;
}
MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection(
- unsigned Priority, const MCSymbol *KeySym) const {
- report_fatal_error("XCOFF ctor section not yet implemented.");
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("no static constructor section on AIX");
}
MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection(
- unsigned Priority, const MCSymbol *KeySym) const {
- report_fatal_error("XCOFF dtor section not yet implemented.");
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("no static destructor section on AIX");
}
const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
@@ -2125,9 +2278,11 @@ const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
report_fatal_error("XCOFF not yet implemented.");
}
-XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
- const GlobalObject *GO) {
- switch (GO->getLinkage()) {
+XCOFF::StorageClass
+TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(const GlobalValue *GV) {
+ assert(!isa<GlobalIFunc>(GV) && "GlobalIFunc is not supported on AIX.");
+
+ switch (GV->getLinkage()) {
case GlobalValue::InternalLinkage:
case GlobalValue::PrivateLinkage:
return XCOFF::C_HIDEXT;
@@ -2149,10 +2304,32 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
}
MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol(
- const Function *F, const TargetMachine &TM) const {
+ const GlobalValue *Func, const TargetMachine &TM) const {
+ assert(
+ (isa<Function>(Func) ||
+ (isa<GlobalAlias>(Func) &&
+ isa_and_nonnull<Function>(cast<GlobalAlias>(Func)->getBaseObject()))) &&
+ "Func must be a function or an alias which has a function as base "
+ "object.");
+
SmallString<128> NameStr;
NameStr.push_back('.');
- getNameWithPrefix(NameStr, F, TM);
+ getNameWithPrefix(NameStr, Func, TM);
+
+ // When -function-sections is enabled and explicit section is not specified,
+ // it's not necessary to emit function entry point label any more. We will use
+ // function entry point csect instead. And for function delcarations, the
+ // undefined symbols gets treated as csect with XTY_ER property.
+ if (((TM.getFunctionSections() && !Func->hasSection()) ||
+ Func->isDeclaration()) &&
+ isa<Function>(Func)) {
+ return getContext()
+ .getXCOFFSection(NameStr, XCOFF::XMC_PR,
+ Func->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD,
+ SectionKind::getText())
+ ->getQualNameSymbol();
+ }
+
return getContext().getOrCreateSymbol(NameStr);
}
@@ -2161,13 +2338,15 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor(
SmallString<128> NameStr;
getNameWithPrefix(NameStr, F, TM);
return getContext().getXCOFFSection(NameStr, XCOFF::XMC_DS, XCOFF::XTY_SD,
- getStorageClassForGlobal(F),
SectionKind::getData());
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
- const MCSymbol *Sym) const {
+ const MCSymbol *Sym, const TargetMachine &TM) const {
+ // Use TE storage-mapping class when large code model is enabled so that
+ // the chance of needing -bbigtoc is decreased.
return getContext().getXCOFFSection(
- cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), XCOFF::XMC_TC,
- XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData());
+ cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(),
+ TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC,
+ XCOFF::XTY_SD, SectionKind::getData());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 4866d4c171c0..0731cf9b28f4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -47,7 +47,11 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const {
}
/// NOTE: There are targets that still do not support the debug entry values
-/// production.
+/// production and that is being controlled with the SupportsDebugEntryValues.
+/// In addition, SCE debugger does not have the feature implemented, so prefer
+/// not to emit the debug entry values in that case.
+/// The EnableDebugEntryValues can be used for the testing purposes.
bool TargetOptions::ShouldEmitDebugEntryValues() const {
- return SupportsDebugEntryValues || EnableDebugEntryValues;
+ return (SupportsDebugEntryValues && DebuggerTuning != DebuggerKind::SCE) ||
+ EnableDebugEntryValues;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index e0fdb0cefcb8..e844d03854e2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -41,6 +42,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -120,16 +122,22 @@ static cl::opt<cl::boolOrDefault> DebugifyAndStripAll(
"Debugify MIR before and Strip debug after "
"each pass except those known to be unsafe when debug info is present"),
cl::ZeroOrMore);
-enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault };
+static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll(
+ "debugify-check-and-strip-all-safe", cl::Hidden,
+ cl::desc(
+ "Debugify MIR before, by checking and stripping the debug info after, "
+ "each pass except those known to be unsafe when debug info is present"),
+ cl::ZeroOrMore);
// Enable or disable the MachineOutliner.
static cl::opt<RunOutliner> EnableMachineOutliner(
"enable-machine-outliner", cl::desc("Enable the machine outliner"),
- cl::Hidden, cl::ValueOptional, cl::init(TargetDefault),
- cl::values(clEnumValN(AlwaysOutline, "always",
+ cl::Hidden, cl::ValueOptional, cl::init(RunOutliner::TargetDefault),
+ cl::values(clEnumValN(RunOutliner::AlwaysOutline, "always",
"Run on all functions guaranteed to be beneficial"),
- clEnumValN(NeverOutline, "never", "Disable all outlining"),
+ clEnumValN(RunOutliner::NeverOutline, "never",
+ "Disable all outlining"),
// Sentinel value for unspecified option.
- clEnumValN(AlwaysOutline, "", "")));
+ clEnumValN(RunOutliner::AlwaysOutline, "", "")));
// Enable or disable FastISel. Both options are needed, because
// FastISel is enabled by default with -fast, and we wish to be
// able to enable or disable fast-isel independently from -O0.
@@ -141,9 +149,11 @@ static cl::opt<cl::boolOrDefault> EnableGlobalISelOption(
"global-isel", cl::Hidden,
cl::desc("Enable the \"global\" instruction selector"));
-static cl::opt<std::string> PrintMachineInstrs(
- "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"),
- cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden);
+// FIXME: remove this after switching to NPM or GlobalISel, whichever gets there
+// first...
+static cl::opt<bool>
+ PrintAfterISel("print-after-isel", cl::init(false), cl::Hidden,
+ cl::desc("Print machine instrs after ISel"));
static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
"global-isel-abort", cl::Hidden,
@@ -170,7 +180,6 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
cl::desc("Run live interval analysis earlier in the pipeline"));
// Experimental option to use CFL-AA in codegen
-enum class CFLAAType { None, Steensgaard, Andersen, Both };
static cl::opt<CFLAAType> UseCFLAA(
"use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden,
cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"),
@@ -210,6 +219,17 @@ static cl::opt<std::string>
cl::desc("Stop compilation before a specific pass"),
cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+/// Enable the machine function splitter pass.
+static cl::opt<bool> EnableMachineFunctionSplitter(
+ "enable-split-machine-functions", cl::Hidden,
+ cl::desc("Split out cold blocks from machine functions based on profile "
+ "information."));
+
+/// Disable the expand reductions pass for testing.
+static cl::opt<bool> DisableExpandReductions(
+ "disable-expand-reductions", cl::init(false), cl::Hidden,
+ cl::desc("Disable the expand reduction intrinsics pass from running"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -294,12 +314,11 @@ struct InsertedPass {
AnalysisID TargetPassID;
IdentifyingPassPtr InsertedPassID;
bool VerifyAfter;
- bool PrintAfter;
InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter, bool PrintAfter)
+ bool VerifyAfter)
: TargetPassID(TargetPassID), InsertedPassID(InsertedPassID),
- VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {}
+ VerifyAfter(VerifyAfter) {}
Pass *getInsertedPass() const {
assert(InsertedPassID.isValid() && "Illegal Pass ID!");
@@ -397,6 +416,145 @@ void TargetPassConfig::setStartStopPasses() {
Started = (StartAfter == nullptr) && (StartBefore == nullptr);
}
+CGPassBuilderOption llvm::getCGPassBuilderOption() {
+ CGPassBuilderOption Opt;
+
+#define SET_OPTION(Option) \
+ if (Option.getNumOccurrences()) \
+ Opt.Option = Option;
+
+ SET_OPTION(EnableFastISelOption)
+ SET_OPTION(EnableGlobalISelAbort)
+ SET_OPTION(EnableGlobalISelOption)
+ SET_OPTION(EnableIPRA)
+ SET_OPTION(OptimizeRegAlloc)
+ SET_OPTION(VerifyMachineCode)
+
+#define SET_BOOLEAN_OPTION(Option) Opt.Option = Option;
+
+ SET_BOOLEAN_OPTION(EarlyLiveIntervals)
+ SET_BOOLEAN_OPTION(EnableBlockPlacementStats)
+ SET_BOOLEAN_OPTION(EnableImplicitNullChecks)
+ SET_BOOLEAN_OPTION(EnableMachineOutliner)
+ SET_BOOLEAN_OPTION(MISchedPostRA)
+ SET_BOOLEAN_OPTION(UseCFLAA)
+ SET_BOOLEAN_OPTION(DisableMergeICmps)
+ SET_BOOLEAN_OPTION(DisableLSR)
+ SET_BOOLEAN_OPTION(DisableConstantHoisting)
+ SET_BOOLEAN_OPTION(DisableCGP)
+ SET_BOOLEAN_OPTION(DisablePartialLibcallInlining)
+ SET_BOOLEAN_OPTION(PrintLSR)
+ SET_BOOLEAN_OPTION(PrintISelInput)
+ SET_BOOLEAN_OPTION(PrintGCInfo)
+
+ return Opt;
+}
+
+static void registerPartialPipelineCallback(PassInstrumentationCallbacks &PIC,
+ LLVMTargetMachine &LLVMTM) {
+ StringRef StartBefore;
+ StringRef StartAfter;
+ StringRef StopBefore;
+ StringRef StopAfter;
+
+ unsigned StartBeforeInstanceNum = 0;
+ unsigned StartAfterInstanceNum = 0;
+ unsigned StopBeforeInstanceNum = 0;
+ unsigned StopAfterInstanceNum = 0;
+
+ std::tie(StartBefore, StartBeforeInstanceNum) =
+ getPassNameAndInstanceNum(StartBeforeOpt);
+ std::tie(StartAfter, StartAfterInstanceNum) =
+ getPassNameAndInstanceNum(StartAfterOpt);
+ std::tie(StopBefore, StopBeforeInstanceNum) =
+ getPassNameAndInstanceNum(StopBeforeOpt);
+ std::tie(StopAfter, StopAfterInstanceNum) =
+ getPassNameAndInstanceNum(StopAfterOpt);
+
+ if (StartBefore.empty() && StartAfter.empty() && StopBefore.empty() &&
+ StopAfter.empty())
+ return;
+
+ std::tie(StartBefore, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StartBefore);
+ std::tie(StartAfter, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StartAfter);
+ std::tie(StopBefore, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StopBefore);
+ std::tie(StopAfter, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StopAfter);
+ if (!StartBefore.empty() && !StartAfter.empty())
+ report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") +
+ Twine(StartAfterOptName) + Twine(" specified!"));
+ if (!StopBefore.empty() && !StopAfter.empty())
+ report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") +
+ Twine(StopAfterOptName) + Twine(" specified!"));
+
+ PIC.registerShouldRunOptionalPassCallback(
+ [=, EnableCurrent = StartBefore.empty() && StartAfter.empty(),
+ EnableNext = Optional<bool>(), StartBeforeCount = 0u,
+ StartAfterCount = 0u, StopBeforeCount = 0u,
+ StopAfterCount = 0u](StringRef P, Any) mutable {
+ bool StartBeforePass = !StartBefore.empty() && P.contains(StartBefore);
+ bool StartAfterPass = !StartAfter.empty() && P.contains(StartAfter);
+ bool StopBeforePass = !StopBefore.empty() && P.contains(StopBefore);
+ bool StopAfterPass = !StopAfter.empty() && P.contains(StopAfter);
+
+ // Implement -start-after/-stop-after
+ if (EnableNext) {
+ EnableCurrent = *EnableNext;
+ EnableNext.reset();
+ }
+
+ // Using PIC.registerAfterPassCallback won't work because if this
+ // callback returns false, AfterPassCallback is also skipped.
+ if (StartAfterPass && StartAfterCount++ == StartAfterInstanceNum) {
+ assert(!EnableNext && "Error: assign to EnableNext more than once");
+ EnableNext = true;
+ }
+ if (StopAfterPass && StopAfterCount++ == StopAfterInstanceNum) {
+ assert(!EnableNext && "Error: assign to EnableNext more than once");
+ EnableNext = false;
+ }
+
+ if (StartBeforePass && StartBeforeCount++ == StartBeforeInstanceNum)
+ EnableCurrent = true;
+ if (StopBeforePass && StopBeforeCount++ == StopBeforeInstanceNum)
+ EnableCurrent = false;
+ return EnableCurrent;
+ });
+}
+
+void llvm::registerCodeGenCallback(PassInstrumentationCallbacks &PIC,
+ LLVMTargetMachine &LLVMTM) {
+
+ // Register a callback for disabling passes.
+ PIC.registerShouldRunOptionalPassCallback([](StringRef P, Any) {
+
+#define DISABLE_PASS(Option, Name) \
+ if (Option && P.contains(#Name)) \
+ return false;
+ DISABLE_PASS(DisableBlockPlacement, MachineBlockPlacementPass)
+ DISABLE_PASS(DisableBranchFold, BranchFolderPass)
+ DISABLE_PASS(DisableCopyProp, MachineCopyPropagationPass)
+ DISABLE_PASS(DisableEarlyIfConversion, EarlyIfConverterPass)
+ DISABLE_PASS(DisableEarlyTailDup, EarlyTailDuplicatePass)
+ DISABLE_PASS(DisableMachineCSE, MachineCSEPass)
+ DISABLE_PASS(DisableMachineDCE, DeadMachineInstructionElimPass)
+ DISABLE_PASS(DisableMachineLICM, EarlyMachineLICMPass)
+ DISABLE_PASS(DisableMachineSink, MachineSinkingPass)
+ DISABLE_PASS(DisablePostRAMachineLICM, MachineLICMPass)
+ DISABLE_PASS(DisablePostRAMachineSink, PostRAMachineSinkingPass)
+ DISABLE_PASS(DisablePostRASched, PostRASchedulerPass)
+ DISABLE_PASS(DisableSSC, StackSlotColoringPass)
+ DISABLE_PASS(DisableTailDuplicate, TailDuplicatePass)
+
+ return true;
+ });
+
+ registerPartialPipelineCallback(PIC, LLVMTM);
+}
+
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
@@ -411,9 +569,6 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
- if (StringRef(PrintMachineInstrs.getValue()).equals(""))
- TM.Options.PrintMachineCode = true;
-
if (EnableIPRA.getNumOccurrences())
TM.Options.EnableIPRA = EnableIPRA;
else {
@@ -437,14 +592,13 @@ CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
/// Insert InsertedPassID pass after TargetPassID.
void TargetPassConfig::insertPass(AnalysisID TargetPassID,
IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter, bool PrintAfter) {
+ bool VerifyAfter) {
assert(((!InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getID()) ||
(InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
"Insert a pass after itself!");
- Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter,
- PrintAfter);
+ Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter);
}
/// createPassConfig - Create a pass configuration object to be used by
@@ -522,7 +676,7 @@ bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const {
/// a later pass or that it should stop after an earlier pass, then do not add
/// the pass. Finally, compare the current pass against the StartAfter
/// and StopAfter options and change the Started/Stopped flags accordingly.
-void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
+void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
assert(!Initialized && "PassConfig is immutable");
// Cache the Pass ID here in case the pass manager finds this pass is
@@ -540,17 +694,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
addMachinePrePasses();
std::string Banner;
// Construct banner message before PM->add() as that may delete the pass.
- if (AddingMachinePasses && (printAfter || verifyAfter))
+ if (AddingMachinePasses && verifyAfter)
Banner = std::string("After ") + std::string(P->getPassName());
PM->add(P);
if (AddingMachinePasses)
- addMachinePostPasses(Banner, /*AllowPrint*/ printAfter,
- /*AllowVerify*/ verifyAfter);
+ addMachinePostPasses(Banner, /*AllowVerify*/ verifyAfter);
// Add the passes after the pass P if there is any.
- for (auto IP : Impl->InsertedPasses) {
+ for (const auto &IP : Impl->InsertedPasses) {
if (IP.TargetPassID == PassID)
- addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter);
+ addPass(IP.getInsertedPass(), IP.VerifyAfter);
}
} else {
delete P;
@@ -570,8 +723,7 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
///
/// addPass cannot return a pointer to the pass instance because is internal the
/// PassManager and the instance we create here may already be freed.
-AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter,
- bool printAfter) {
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) {
IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
if (!FinalPtr.isValid())
@@ -586,7 +738,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter,
llvm_unreachable("Pass ID not registered");
}
AnalysisID FinalID = P->getPassID();
- addPass(P, verifyAfter, printAfter); // Ends the lifetime of P.
+ addPass(P, verifyAfter); // Ends the lifetime of P.
return FinalID;
}
@@ -597,7 +749,7 @@ void TargetPassConfig::printAndVerify(const std::string &Banner) {
}
void TargetPassConfig::addPrintPass(const std::string &Banner) {
- if (TM->shouldPrintMachineCode())
+ if (PrintAfterISel)
PM->add(createMachineFunctionPrinterPass(dbgs(), Banner));
}
@@ -619,18 +771,26 @@ void TargetPassConfig::addStripDebugPass() {
PM->add(createStripDebugMachineModulePass(/*OnlyDebugified=*/true));
}
+void TargetPassConfig::addCheckDebugPass() {
+ PM->add(createCheckDebugMachineModulePass());
+}
+
void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) {
- if (AllowDebugify && DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe)
+ if (AllowDebugify && DebugifyIsSafe &&
+ (DebugifyAndStripAll == cl::BOU_TRUE ||
+ DebugifyCheckAndStripAll == cl::BOU_TRUE))
addDebugifyPass();
}
void TargetPassConfig::addMachinePostPasses(const std::string &Banner,
- bool AllowPrint, bool AllowVerify,
- bool AllowStrip) {
- if (DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe)
- addStripDebugPass();
- if (AllowPrint)
- addPrintPass(Banner);
+ bool AllowVerify, bool AllowStrip) {
+ if (DebugifyIsSafe) {
+ if (DebugifyCheckAndStripAll == cl::BOU_TRUE) {
+ addCheckDebugPass();
+ addStripDebugPass();
+ } else if (DebugifyAndStripAll == cl::BOU_TRUE)
+ addStripDebugPass();
+ }
if (AllowVerify)
addVerifyPass(Banner);
}
@@ -707,10 +867,12 @@ void TargetPassConfig::addIRPasses() {
// Add scalarization of target's unsupported masked memory intrinsics pass.
// the unsupported intrinsic will be replaced with a chain of basic blocks,
// that stores/loads element one-by-one if the appropriate mask bit is set.
- addPass(createScalarizeMaskedMemIntrinPass());
+ addPass(createScalarizeMaskedMemIntrinLegacyPass());
// Expand reduction intrinsics into shuffle sequences if the target wants to.
- addPass(createExpandReductionsPass());
+ // Allow disabling it for testing purposes.
+ if (!DisableExpandReductions)
+ addPass(createExpandReductionsPass());
}
/// Turn exception handling constructs into something the code generators can
@@ -730,6 +892,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
LLVM_FALLTHROUGH;
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
+ case ExceptionHandling::AIX:
addPass(createDwarfEHPass(getOptLevel()));
break;
case ExceptionHandling::WinEH:
@@ -879,7 +1042,7 @@ bool TargetPassConfig::addISelPasses() {
addPass(createLowerEmuTLSPass());
addPass(createPreISelIntrinsicLoweringPass());
- addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+ PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
addIRPasses();
addCodeGenPrepare();
addPassesToHandleExceptions();
@@ -916,20 +1079,6 @@ static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
void TargetPassConfig::addMachinePasses() {
AddingMachinePasses = true;
- // Insert a machine instr printer pass after the specified pass.
- StringRef PrintMachineInstrsPassName = PrintMachineInstrs.getValue();
- if (!PrintMachineInstrsPassName.equals("") &&
- !PrintMachineInstrsPassName.equals("option-unspecified")) {
- if (const PassInfo *TPI = getPassInfo(PrintMachineInstrsPassName)) {
- const PassRegistry *PR = PassRegistry::getPassRegistry();
- const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
- assert(IPI && "failed to get \"machineinstr-printer\" PassInfo!");
- const char *TID = (const char *)(TPI->getTypeInfo());
- const char *IID = (const char *)(IPI->getTypeInfo());
- insertPass(TID, IID);
- }
- }
-
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
addMachineSSAOptimization();
@@ -1000,7 +1149,7 @@ void TargetPassConfig::addMachinePasses() {
// GC
if (addGCPasses()) {
if (PrintGCInfo)
- addPass(createGCInfoPrinter(dbgs()), false, false);
+ addPass(createGCInfoPrinter(dbgs()), false);
}
// Basic block placement.
@@ -1028,20 +1177,31 @@ void TargetPassConfig::addMachinePasses() {
addPass(&LiveDebugValuesID, false);
if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
- EnableMachineOutliner != NeverOutline) {
- bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline);
- bool AddOutliner = RunOnAllFunctions ||
- TM->Options.SupportsDefaultOutlining;
+ EnableMachineOutliner != RunOutliner::NeverOutline) {
+ bool RunOnAllFunctions =
+ (EnableMachineOutliner == RunOutliner::AlwaysOutline);
+ bool AddOutliner =
+ RunOnAllFunctions || TM->Options.SupportsDefaultOutlining;
if (AddOutliner)
addPass(createMachineOutlinerPass(RunOnAllFunctions));
}
- if (TM->getBBSectionsType() != llvm::BasicBlockSection::None)
- addPass(llvm::createBBSectionsPreparePass(TM->getBBSectionsFuncListBuf()));
+ // Machine function splitter uses the basic block sections feature. Both
+ // cannot be enabled at the same time.
+ if (TM->Options.EnableMachineFunctionSplitter ||
+ EnableMachineFunctionSplitter) {
+ addPass(createMachineFunctionSplitterPass());
+ } else if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) {
+ addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf()));
+ }
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
+ // Insert pseudo probe annotation for callsite profiling
+ if (TM->Options.PseudoProbeForProfiling)
+ addPass(createPseudoProbeInserter());
+
AddingMachinePasses = false;
}
@@ -1148,7 +1308,7 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
return createTargetRegisterAllocator(Optimized);
}
-bool TargetPassConfig::addRegAssignmentFast() {
+bool TargetPassConfig::addRegAssignAndRewriteFast() {
if (RegAlloc != &useDefaultRegisterAllocator &&
RegAlloc != &createFastRegisterAllocator)
report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
@@ -1157,7 +1317,7 @@ bool TargetPassConfig::addRegAssignmentFast() {
return true;
}
-bool TargetPassConfig::addRegAssignmentOptimized() {
+bool TargetPassConfig::addRegAssignAndRewriteOptimized() {
// Add the selected register allocation pass.
addPass(createRegAllocPass(true));
@@ -1167,12 +1327,6 @@ bool TargetPassConfig::addRegAssignmentOptimized() {
// Finally rewrite virtual registers.
addPass(&VirtRegRewriterID);
- // Perform stack slot coloring and post-ra machine LICM.
- //
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
- addPass(&StackSlotColoringID);
-
return true;
}
@@ -1188,7 +1342,7 @@ void TargetPassConfig::addFastRegAlloc() {
addPass(&PHIEliminationID, false);
addPass(&TwoAddressInstructionPassID, false);
- addRegAssignmentFast();
+ addRegAssignAndRewriteFast();
}
/// Add standard target-independent passes that are tightly coupled with
@@ -1205,6 +1359,11 @@ void TargetPassConfig::addOptimizedRegAlloc() {
// LiveVariables can be removed completely, and LiveIntervals can be directly
// computed. (We still either need to regenerate kill flags after regalloc, or
// preferably fix the scavenger to not depend on them).
+ // FIXME: UnreachableMachineBlockElim is a dependant pass of LiveVariables.
+ // When LiveVariables is removed this has to be removed/moved either.
+ // Explicit addition of UnreachableMachineBlockElim allows stopping before or
+ // after it with -stop-before/-stop-after.
+ addPass(&UnreachableMachineBlockElimID, false);
addPass(&LiveVariablesID, false);
// Edge splitting is smarter with machine loop info.
@@ -1226,7 +1385,13 @@ void TargetPassConfig::addOptimizedRegAlloc() {
// PreRA instruction scheduling.
addPass(&MachineSchedulerID);
- if (addRegAssignmentOptimized()) {
+ if (addRegAssignAndRewriteOptimized()) {
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(&StackSlotColoringID);
+
// Allow targets to expand pseudo instructions depending on the choice of
// registers before MachineCopyPropagation.
addPostRewrite();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index e2ef12d8ac77..5fd7eef5808f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -68,7 +69,7 @@ bool TargetRegisterInfo::shouldRegionSplitForVirtReg(
const MachineFunction &MF, const LiveInterval &VirtReg) const {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg);
+ MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg());
if (MI && TII->isTriviallyReMaterializable(*MI) &&
VirtReg.size() > HugeSizeForSplit)
return false;
@@ -532,6 +533,56 @@ TargetRegisterInfo::lookThruCopyLike(Register SrcReg,
}
}
+Register TargetRegisterInfo::lookThruSingleUseCopyChain(
+ Register SrcReg, const MachineRegisterInfo *MRI) const {
+ while (true) {
+ const MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ // Found the real definition, return it if it has a single use.
+ if (!MI->isCopyLike())
+ return MRI->hasOneNonDBGUse(SrcReg) ? SrcReg : Register();
+
+ Register CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ // Continue only if the next definition in the chain is for a virtual
+ // register that has a single use.
+ if (!CopySrcReg.isVirtual() || !MRI->hasOneNonDBGUse(CopySrcReg))
+ return Register();
+
+ SrcReg = CopySrcReg;
+ }
+}
+
+void TargetRegisterInfo::getOffsetOpcodes(
+ const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
+ assert(!Offset.getScalable() && "Scalable offsets are not handled");
+ DIExpression::appendOffset(Ops, Offset.getFixed());
+}
+
+DIExpression *
+TargetRegisterInfo::prependOffsetExpression(const DIExpression *Expr,
+ unsigned PrependFlags,
+ const StackOffset &Offset) const {
+ assert((PrependFlags &
+ ~(DIExpression::DerefBefore | DIExpression::DerefAfter |
+ DIExpression::StackValue | DIExpression::EntryValue)) == 0 &&
+ "Unsupported prepend flag");
+ SmallVector<uint64_t, 16> OffsetExpr;
+ if (PrependFlags & DIExpression::DerefBefore)
+ OffsetExpr.push_back(dwarf::DW_OP_deref);
+ getOffsetOpcodes(Offset, OffsetExpr);
+ if (PrependFlags & DIExpression::DerefAfter)
+ OffsetExpr.push_back(dwarf::DW_OP_deref);
+ return DIExpression::prependOpcodes(Expr, OffsetExpr,
+ PrependFlags & DIExpression::StackValue,
+ PrependFlags & DIExpression::EntryValue);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void TargetRegisterInfo::dumpReg(Register Reg, unsigned SubRegIndex,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index 63766df4d2be..e4520d8ccb1e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -15,13 +15,12 @@
using namespace llvm;
TargetSubtargetInfo::TargetSubtargetInfo(
- const Triple &TT, StringRef CPU, StringRef FS,
+ const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
- const MCWriteProcResEntry *WPR,
- const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
- const InstrStage *IS, const unsigned *OC, const unsigned *FP)
- : MCSubtargetInfo(TT, CPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {
-}
+ const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
+ const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC,
+ const unsigned *FP)
+ : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {}
TargetSubtargetInfo::~TargetSubtargetInfo() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 615ff4b8789c..2a9132bd2fe0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -111,37 +111,35 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// A map from virtual registers to physical registers which are likely targets
// to be coalesced to due to copies from physical registers to virtual
// registers. e.g. v1024 = move r0.
- DenseMap<unsigned, unsigned> SrcRegMap;
+ DenseMap<Register, Register> SrcRegMap;
// A map from virtual registers to physical registers which are likely targets
// to be coalesced to due to copies to physical registers from virtual
// registers. e.g. r1 = move v1024.
- DenseMap<unsigned, unsigned> DstRegMap;
+ DenseMap<Register, Register> DstRegMap;
- bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
+ bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);
- bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
+ bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef);
- bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ bool isProfitableToCommute(Register RegA, Register RegB, Register RegC,
MachineInstr *MI, unsigned Dist);
bool commuteInstruction(MachineInstr *MI, unsigned DstIdx,
unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
- bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+ bool isProfitableToConv3Addr(Register RegA, Register RegB);
bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned RegA, unsigned RegB, unsigned Dist);
+ MachineBasicBlock::iterator &nmi, Register RegA,
+ Register RegB, unsigned Dist);
- bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI);
+ bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI);
bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg);
+ MachineBasicBlock::iterator &nmi, Register Reg);
bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg);
+ MachineBasicBlock::iterator &nmi, Register Reg);
bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
@@ -153,7 +151,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
unsigned BaseOpIdx,
bool BaseOpKilled,
unsigned Dist);
- void scanUses(unsigned DstReg);
+ void scanUses(Register DstReg);
void processCopy(MachineInstr *MI);
@@ -199,10 +197,10 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
-static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
+static bool isPlainlyKilled(MachineInstr *MI, Register Reg, LiveIntervals *LIS);
/// Return the MachineInstr* if it is the single def of the Reg in current BB.
-static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
+static MachineInstr *getSingleDef(Register Reg, MachineBasicBlock *BB,
const MachineRegisterInfo *MRI) {
MachineInstr *Ret = nullptr;
for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
@@ -223,9 +221,9 @@ static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
/// %Tmp2 = copy %ToReg;
/// MaxLen specifies the maximum length of the copy chain the func
/// can walk through.
-bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
+bool TwoAddressInstructionPass::isRevCopyChain(Register FromReg, Register ToReg,
int Maxlen) {
- unsigned TmpReg = FromReg;
+ Register TmpReg = FromReg;
for (int i = 0; i < Maxlen; i++) {
MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
if (!Def || !Def->isCopy())
@@ -243,7 +241,7 @@ bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
/// in the MBB that defines the specified register and the two-address
/// instruction which is being processed. It also returns the last def location
/// by reference.
-bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
+bool TwoAddressInstructionPass::noUseAfterLastDef(Register Reg, unsigned Dist,
unsigned &LastDef) {
LastDef = 0;
unsigned LastUse = Dist;
@@ -267,8 +265,8 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
/// instruction. It also returns the source and destination registers and
/// whether they are physical registers by reference.
static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
- unsigned &SrcReg, unsigned &DstReg,
- bool &IsSrcPhys, bool &IsDstPhys) {
+ Register &SrcReg, Register &DstReg, bool &IsSrcPhys,
+ bool &IsDstPhys) {
SrcReg = 0;
DstReg = 0;
if (MI.isCopy()) {
@@ -277,19 +275,20 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
} else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(2).getReg();
- } else
+ } else {
return false;
+ }
- IsSrcPhys = Register::isPhysicalRegister(SrcReg);
- IsDstPhys = Register::isPhysicalRegister(DstReg);
+ IsSrcPhys = SrcReg.isPhysical();
+ IsDstPhys = DstReg.isPhysical();
return true;
}
/// Test if the given register value, which is used by the
/// given instruction, is killed by the given instruction.
-static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
+static bool isPlainlyKilled(MachineInstr *MI, Register Reg,
LiveIntervals *LIS) {
- if (LIS && Register::isVirtualRegister(Reg) && !LIS->isNotInMIMap(*MI)) {
+ if (LIS && Reg.isVirtual() && !LIS->isNotInMIMap(*MI)) {
// FIXME: Sometimes tryInstructionTransform() will add instructions and
// test whether they can be folded before keeping them. In this case it
// sets a kill before recursively calling tryInstructionTransform() again.
@@ -328,20 +327,17 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
///
/// If allowFalsePositives is true then likely kills are treated as kills even
/// if it can't be proven that they are kills.
-static bool isKilled(MachineInstr &MI, unsigned Reg,
- const MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII,
- LiveIntervals *LIS,
- bool allowFalsePositives) {
+static bool isKilled(MachineInstr &MI, Register Reg,
+ const MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ LiveIntervals *LIS, bool allowFalsePositives) {
MachineInstr *DefMI = &MI;
while (true) {
// All uses of physical registers are likely to be kills.
- if (Register::isPhysicalRegister(Reg) &&
- (allowFalsePositives || MRI->hasOneUse(Reg)))
+ if (Reg.isPhysical() && (allowFalsePositives || MRI->hasOneUse(Reg)))
return true;
if (!isPlainlyKilled(DefMI, Reg, LIS))
return false;
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
return true;
MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
// If there are multiple defs, we can't do a simple analysis, so just
@@ -350,7 +346,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
return true;
DefMI = Begin->getParent();
bool IsSrcPhys, IsDstPhys;
- unsigned SrcReg, DstReg;
+ Register SrcReg, DstReg;
// If the def is something other than a copy, then it isn't going to
// be coalesced, so follow the kill flag.
if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
@@ -361,7 +357,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
/// Return true if the specified MI uses the specified register as a two-address
/// use. If so, return the destination register by reference.
-static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) {
for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
@@ -377,19 +373,17 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
/// Given a register, if has a single in-basic block use, return the use
/// instruction if it's a copy or a two-address use.
-static
-MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
- MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII,
- bool &IsCopy,
- unsigned &DstReg, bool &IsDstPhys) {
+static MachineInstr *
+findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ bool &IsCopy, Register &DstReg, bool &IsDstPhys) {
if (!MRI->hasOneNonDBGUse(Reg))
// None or more than one use.
return nullptr;
MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
if (UseMI.getParent() != MBB)
return nullptr;
- unsigned SrcReg;
+ Register SrcReg;
bool IsSrcPhys;
if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
IsCopy = true;
@@ -397,7 +391,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
}
IsDstPhys = false;
if (isTwoAddrUse(UseMI, Reg, DstReg)) {
- IsDstPhys = Register::isPhysicalRegister(DstReg);
+ IsDstPhys = DstReg.isPhysical();
return &UseMI;
}
return nullptr;
@@ -405,22 +399,22 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
/// Return the physical register the specified virtual register might be mapped
/// to.
-static unsigned
-getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
- while (Register::isVirtualRegister(Reg)) {
- DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+static MCRegister getMappedReg(Register Reg,
+ DenseMap<Register, Register> &RegMap) {
+ while (Reg.isVirtual()) {
+ DenseMap<Register, Register>::iterator SI = RegMap.find(Reg);
if (SI == RegMap.end())
return 0;
Reg = SI->second;
}
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
return Reg;
return 0;
}
/// Return true if the two registers are equal or aliased.
-static bool
-regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+static bool regsAreCompatible(Register RegA, Register RegB,
+ const TargetRegisterInfo *TRI) {
if (RegA == RegB)
return true;
if (!RegA || !RegB)
@@ -429,7 +423,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
}
// Returns true if Reg is equal or aliased to at least one register in Set.
-static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg,
+static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,
const TargetRegisterInfo *TRI) {
for (unsigned R : Set)
if (TRI->regsOverlap(R, Reg))
@@ -440,10 +434,11 @@ static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg,
/// Return true if it's potentially profitable to commute the two-address
/// instruction that's being processed.
-bool
-TwoAddressInstructionPass::
-isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
- MachineInstr *MI, unsigned Dist) {
+bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
+ Register RegB,
+ Register RegC,
+ MachineInstr *MI,
+ unsigned Dist) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -465,7 +460,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// insert => %reg1030 = COPY %reg1029
// %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
- if (!isPlainlyKilled(MI, regC, LIS))
+ if (!isPlainlyKilled(MI, RegC, LIS))
return false;
// Ok, we have something like:
@@ -478,10 +473,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// %reg1026 = ADD %reg1024, %reg1025
// r0 = MOV %reg1026
// Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
- unsigned ToRegA = getMappedReg(regA, DstRegMap);
+ MCRegister ToRegA = getMappedReg(RegA, DstRegMap);
if (ToRegA) {
- unsigned FromRegB = getMappedReg(regB, SrcRegMap);
- unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ MCRegister FromRegB = getMappedReg(RegB, SrcRegMap);
+ MCRegister FromRegC = getMappedReg(RegC, SrcRegMap);
bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI);
bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI);
@@ -499,16 +494,16 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
return false;
}
- // If there is a use of regC between its last def (could be livein) and this
+ // If there is a use of RegC between its last def (could be livein) and this
// instruction, then bail.
unsigned LastDefC = 0;
- if (!noUseAfterLastDef(regC, Dist, LastDefC))
+ if (!noUseAfterLastDef(RegC, Dist, LastDefC))
return false;
- // If there is a use of regB between its last def (could be livein) and this
+ // If there is a use of RegB between its last def (could be livein) and this
// instruction, then go ahead and make this transformation.
unsigned LastDefB = 0;
- if (!noUseAfterLastDef(regB, Dist, LastDefB))
+ if (!noUseAfterLastDef(RegB, Dist, LastDefB))
return true;
// Look for situation like this:
@@ -526,14 +521,14 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// To more generally minimize register copies, ideally the logic of two addr
// instruction pass should be integrated with register allocation pass where
// interference graph is available.
- if (isRevCopyChain(regC, regA, MaxDataFlowEdge))
+ if (isRevCopyChain(RegC, RegA, MaxDataFlowEdge))
return true;
- if (isRevCopyChain(regB, regA, MaxDataFlowEdge))
+ if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge))
return false;
// Since there are no intervening uses for both registers, then commute
- // if the def of regC is closer. Its live interval is shorter.
+ // if the def of RegC is closer. Its live interval is shorter.
return LastDefB && LastDefC && LastDefC > LastDefB;
}
@@ -559,7 +554,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
"instruction unless it was requested.");
// Update source register map.
- unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+ MCRegister FromRegC = getMappedReg(RegC, SrcRegMap);
if (FromRegC) {
Register RegA = MI->getOperand(DstIdx).getReg();
SrcRegMap[RegA] = FromRegC;
@@ -570,28 +565,26 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
/// Return true if it is profitable to convert the given 2-address instruction
/// to a 3-address one.
-bool
-TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
+bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
+ Register RegB) {
// Look for situations like this:
// %reg1024 = MOV r1
// %reg1025 = MOV r0
// %reg1026 = ADD %reg1024, %reg1025
// r2 = MOV %reg1026
// Turn ADD into a 3-address instruction to avoid a copy.
- unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+ MCRegister FromRegB = getMappedReg(RegB, SrcRegMap);
if (!FromRegB)
return false;
- unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+ MCRegister ToRegA = getMappedReg(RegA, DstRegMap);
return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
}
/// Convert the specified two-address instruction into a three address one.
/// Return true if this transformation was successful.
-bool
-TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned RegA, unsigned RegB,
- unsigned Dist) {
+bool TwoAddressInstructionPass::convertInstTo3Addr(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ Register RegA, Register RegB, unsigned Dist) {
// FIXME: Why does convertToThreeAddress() need an iterator reference?
MachineFunction::iterator MFI = MBB->getIterator();
MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV);
@@ -606,6 +599,24 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
if (LIS)
LIS->ReplaceMachineInstrInMaps(*mi, *NewMI);
+ // If the old instruction is debug value tracked, an update is required.
+ if (auto OldInstrNum = mi->peekDebugInstrNum()) {
+ // Sanity check.
+ assert(mi->getNumExplicitDefs() == 1);
+ assert(NewMI->getNumExplicitDefs() == 1);
+
+ // Find the old and new def location.
+ auto OldIt = mi->defs().begin();
+ auto NewIt = NewMI->defs().begin();
+ unsigned OldIdx = mi->getOperandNo(OldIt);
+ unsigned NewIdx = NewMI->getOperandNo(NewIt);
+
+ // Record that one def has been replaced by the other.
+ unsigned NewInstrNum = NewMI->getDebugInstrNum();
+ MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
+ std::make_pair(NewInstrNum, NewIdx));
+ }
+
MBB->erase(mi); // Nuke the old inst.
DistanceMap.insert(std::make_pair(NewMI, Dist));
@@ -620,13 +631,12 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
/// Scan forward recursively for only uses, update maps if the use is a copy or
/// a two-address instruction.
-void
-TwoAddressInstructionPass::scanUses(unsigned DstReg) {
- SmallVector<unsigned, 4> VirtRegPairs;
+void TwoAddressInstructionPass::scanUses(Register DstReg) {
+ SmallVector<Register, 4> VirtRegPairs;
bool IsDstPhys;
bool IsCopy = false;
- unsigned NewReg = 0;
- unsigned Reg = DstReg;
+ Register NewReg;
+ Register Reg = DstReg;
while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
NewReg, IsDstPhys)) {
if (IsCopy && !Processed.insert(UseMI).second)
@@ -682,13 +692,13 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
return;
bool IsSrcPhys, IsDstPhys;
- unsigned SrcReg, DstReg;
+ Register SrcReg, DstReg;
if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
return;
- if (IsDstPhys && !IsSrcPhys)
+ if (IsDstPhys && !IsSrcPhys) {
DstRegMap.insert(std::make_pair(SrcReg, DstReg));
- else if (!IsDstPhys && IsSrcPhys) {
+ } else if (!IsDstPhys && IsSrcPhys) {
bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
if (!isNew)
assert(SrcRegMap[DstReg] == SrcReg &&
@@ -703,10 +713,9 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
/// consider moving the instruction below the kill instruction in order to
/// eliminate the need for the copy.
-bool TwoAddressInstructionPass::
-rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg) {
+bool TwoAddressInstructionPass::rescheduleMIBelowKill(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ Register Reg) {
// Bail immediately if we don't have LV or LIS available. We use them to find
// kills efficiently.
if (!LV && !LIS)
@@ -743,7 +752,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// Don't move pass calls, etc.
return false;
- unsigned DstReg;
+ Register DstReg;
if (isTwoAddrUse(*KillMI, Reg, DstReg))
return false;
@@ -755,9 +764,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// FIXME: Needs more sophisticated heuristics.
return false;
- SmallVector<unsigned, 2> Uses;
- SmallVector<unsigned, 2> Kills;
- SmallVector<unsigned, 2> Defs;
+ SmallVector<Register, 2> Uses;
+ SmallVector<Register, 2> Kills;
+ SmallVector<Register, 2> Defs;
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
@@ -792,8 +801,8 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
for (MachineInstr &OtherMI : make_range(End, KillPos)) {
- // Debug instructions cannot be counted against the limit.
- if (OtherMI.isDebugInstr())
+ // Debug or pseudo instructions cannot be counted against the limit.
+ if (OtherMI.isDebugOrPseudoInstr())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
@@ -872,7 +881,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
/// Return true if the re-scheduling will put the given instruction too close
/// to the defs of its register dependencies.
-bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+bool TwoAddressInstructionPass::isDefTooClose(Register Reg, unsigned Dist,
MachineInstr *MI) {
for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike())
@@ -893,10 +902,9 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
/// consider moving the kill instruction above the current two-address
/// instruction in order to eliminate the need for the copy.
-bool TwoAddressInstructionPass::
-rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg) {
+bool TwoAddressInstructionPass::rescheduleKillAboveMI(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ Register Reg) {
// Bail immediately if we don't have LV or LIS available. We use them to find
// kills efficiently.
if (!LV && !LIS)
@@ -928,7 +936,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Don't mess with copies, they may be coalesced later.
return false;
- unsigned DstReg;
+ Register DstReg;
if (isTwoAddrUse(*KillMI, Reg, DstReg))
return false;
@@ -936,10 +944,10 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
if (!KillMI->isSafeToMove(AA, SeenStore))
return false;
- SmallSet<unsigned, 2> Uses;
- SmallSet<unsigned, 2> Kills;
- SmallSet<unsigned, 2> Defs;
- SmallSet<unsigned, 2> LiveDefs;
+ SmallVector<Register, 2> Uses;
+ SmallVector<Register, 2> Kills;
+ SmallVector<Register, 2> Defs;
+ SmallVector<Register, 2> LiveDefs;
for (const MachineOperand &MO : KillMI->operands()) {
if (!MO.isReg())
continue;
@@ -952,13 +960,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS));
if (MOReg == Reg && !isKill)
return false;
- Uses.insert(MOReg);
+ Uses.push_back(MOReg);
if (isKill && MOReg != Reg)
- Kills.insert(MOReg);
- } else if (Register::isPhysicalRegister(MOReg)) {
- Defs.insert(MOReg);
+ Kills.push_back(MOReg);
+ } else if (MOReg.isPhysical()) {
+ Defs.push_back(MOReg);
if (!MO.isDead())
- LiveDefs.insert(MOReg);
+ LiveDefs.push_back(MOReg);
}
}
@@ -966,8 +974,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
unsigned NumVisited = 0;
for (MachineInstr &OtherMI :
make_range(mi, MachineBasicBlock::iterator(KillMI))) {
- // Debug instructions cannot be counted against the limit.
- if (OtherMI.isDebugInstr())
+ // Debug or pseudo instructions cannot be counted against the limit.
+ if (OtherMI.isDebugOrPseudoInstr())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
@@ -976,7 +984,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
OtherMI.isBranch() || OtherMI.isTerminator())
// Don't move pass calls, etc.
return false;
- SmallVector<unsigned, 2> OtherDefs;
+ SmallVector<Register, 2> OtherDefs;
for (const MachineOperand &MO : OtherMI.operands()) {
if (!MO.isReg())
continue;
@@ -984,11 +992,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
if (!MOReg)
continue;
if (MO.isUse()) {
- if (Defs.count(MOReg))
+ if (regOverlapsSet(Defs, MOReg, TRI))
// Moving KillMI can clobber the physical register if the def has
// not been seen.
return false;
- if (Kills.count(MOReg))
+ if (regOverlapsSet(Kills, MOReg, TRI))
// Don't want to extend other live ranges and update kills.
return false;
if (&OtherMI != MI && MOReg == Reg &&
@@ -1001,13 +1009,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
}
for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
- unsigned MOReg = OtherDefs[i];
- if (Uses.count(MOReg))
+ Register MOReg = OtherDefs[i];
+ if (regOverlapsSet(Uses, MOReg, TRI))
return false;
- if (Register::isPhysicalRegister(MOReg) && LiveDefs.count(MOReg))
+ if (MOReg.isPhysical() && regOverlapsSet(LiveDefs, MOReg, TRI))
return false;
// Physical register def is seen.
- Defs.erase(MOReg);
+ llvm::erase_value(Defs, MOReg);
}
}
@@ -1125,11 +1133,10 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
Register regA = MI.getOperand(DstIdx).getReg();
Register regB = MI.getOperand(SrcIdx).getReg();
- assert(Register::isVirtualRegister(regB) &&
- "cannot make instruction into two-address form");
+ assert(regB.isVirtual() && "cannot make instruction into two-address form");
bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
- if (Register::isVirtualRegister(regA))
+ if (regA.isVirtual())
scanUses(regA);
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
@@ -1245,7 +1252,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (LV) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
if (MO.isUse()) {
if (MO.isKill()) {
if (NewMIs[0]->killsRegister(MO.getReg()))
@@ -1330,7 +1337,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
// Deal with undef uses immediately - simply rewrite the src operand.
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
// Constrain the DstReg register class if required.
- if (Register::isVirtualRegister(DstReg))
+ if (DstReg.isVirtual())
if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
TRI, *MF))
MRI->constrainRegClass(DstReg, RC);
@@ -1360,7 +1367,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
bool AllUsesCopied = true;
unsigned LastCopiedReg = 0;
SlotIndex LastCopyIdx;
- unsigned RegB = 0;
+ Register RegB = 0;
unsigned SubRegB = 0;
for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
unsigned SrcIdx = TiedPairs[tpi].first;
@@ -1383,8 +1390,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
LastCopiedReg = RegA;
- assert(Register::isVirtualRegister(RegB) &&
- "cannot make instruction into two-address form");
+ assert(RegB.isVirtual() && "cannot make instruction into two-address form");
#ifndef NDEBUG
// First, verify that we don't have a use of "a" in the instruction
@@ -1404,7 +1410,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
MIB.addReg(RegB, 0, SubRegB);
const TargetRegisterClass *RC = MRI->getRegClass(RegB);
if (SubRegB) {
- if (Register::isVirtualRegister(RegA)) {
+ if (RegA.isVirtual()) {
assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA),
SubRegB) &&
"tied subregister must be a truncation");
@@ -1425,7 +1431,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (LIS) {
LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
- if (Register::isVirtualRegister(RegA)) {
+ if (RegA.isVirtual()) {
LiveInterval &LI = LIS->getInterval(RegA);
VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
SlotIndex endIdx =
@@ -1445,7 +1451,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
// Make sure regA is a legal regclass for the SrcIdx operand.
- if (Register::isVirtualRegister(RegA) && Register::isVirtualRegister(RegB))
+ if (RegA.isVirtual() && RegB.isVirtual())
MRI->constrainRegClass(RegA, RC);
MO.setReg(RegA);
// The getMatchingSuper asserts guarantee that the register class projected
@@ -1649,7 +1655,7 @@ void TwoAddressInstructionPass::
eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
Register DstReg = MI.getOperand(0).getReg();
- if (MI.getOperand(0).getSubReg() || Register::isPhysicalRegister(DstReg) ||
+ if (MI.getOperand(0).getSubReg() || DstReg.isPhysical() ||
!(MI.getNumOperands() & 1)) {
LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
llvm_unreachable(nullptr);
@@ -1699,7 +1705,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
DefEmitted = true;
// Update LiveVariables' kill info.
- if (LV && isKill && !Register::isPhysicalRegister(SrcReg))
+ if (LV && isKill && !SrcReg.isPhysical())
LV->replaceKillInstruction(SrcReg, MI, *CopyMI);
LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index 807babdcaf25..a42095d8718a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -134,8 +134,9 @@ public:
Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
Sources(sources), Sinks(sinks), SafeWrap(wrap) {
ExtTy = IntegerType::get(Ctx, PromotedWidth);
- assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits()
- && "Original type not smaller than extended type");
+ assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() <
+ ExtTy->getPrimitiveSizeInBits().getFixedSize() &&
+ "Original type not smaller than extended type");
}
void Mutate();
@@ -809,7 +810,7 @@ bool TypePromotion::isLegalToPromote(Value *V) {
bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
Type *OrigTy = V->getType();
- TypeSize = OrigTy->getPrimitiveSizeInBits();
+ TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize();
SafeToPromote.clear();
SafeWrap.clear();
@@ -980,15 +981,14 @@ bool TypePromotion::runOnFunction(Function &F) {
if (TLI->getTypeAction(ICmp->getContext(), SrcVT) !=
TargetLowering::TypePromoteInteger)
break;
-
EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT);
- if (RegisterBitWidth < PromotedVT.getSizeInBits()) {
+ if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
<< "for promoted type\n");
break;
}
- MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits());
+ MadeChange |= TryToPromote(I, PromotedVT.getFixedSizeInBits());
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
index 66bcdd9b2c4a..978357d8f539 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -49,8 +49,7 @@ EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements,
EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) {
EVT ResultVT;
- ResultVT.LLVMTy =
- VectorType::get(VT.getTypeForEVT(Context), {EC.Min, EC.Scalable});
+ ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), EC);
assert(ResultVT.isExtended() && "Type is not extended!");
return ResultVT;
}
@@ -123,13 +122,13 @@ EVT EVT::getExtendedVectorElementType() const {
unsigned EVT::getExtendedVectorNumElements() const {
assert(isExtended() && "Type is not extended!");
ElementCount EC = cast<VectorType>(LLVMTy)->getElementCount();
- if (EC.Scalable) {
+ if (EC.isScalable()) {
WithColor::warning()
<< "The code that requested the fixed number of elements has made the "
"assumption that this vector is not scalable. This assumption was "
"not correct, and this may lead to broken code\n";
}
- return EC.Min;
+ return EC.getKnownMinValue();
}
ElementCount EVT::getExtendedVectorElementCount() const {
@@ -151,23 +150,25 @@ std::string EVT::getEVTString() const {
switch (V.SimpleTy) {
default:
if (isVector())
- return (isScalableVector() ? "nxv" : "v")
- + utostr(getVectorElementCount().Min)
- + getVectorElementType().getEVTString();
+ return (isScalableVector() ? "nxv" : "v") +
+ utostr(getVectorElementCount().getKnownMinValue()) +
+ getVectorElementType().getEVTString();
if (isInteger())
return "i" + utostr(getSizeInBits());
if (isFloatingPoint())
return "f" + utostr(getSizeInBits());
llvm_unreachable("Invalid EVT!");
- case MVT::bf16: return "bf16";
- case MVT::ppcf128: return "ppcf128";
- case MVT::isVoid: return "isVoid";
- case MVT::Other: return "ch";
- case MVT::Glue: return "glue";
- case MVT::x86mmx: return "x86mmx";
- case MVT::Metadata:return "Metadata";
- case MVT::Untyped: return "Untyped";
- case MVT::exnref : return "exnref";
+ case MVT::bf16: return "bf16";
+ case MVT::ppcf128: return "ppcf128";
+ case MVT::isVoid: return "isVoid";
+ case MVT::Other: return "ch";
+ case MVT::Glue: return "glue";
+ case MVT::x86mmx: return "x86mmx";
+ case MVT::x86amx: return "x86amx";
+ case MVT::Metadata: return "Metadata";
+ case MVT::Untyped: return "Untyped";
+ case MVT::funcref: return "funcref";
+ case MVT::externref: return "externref";
}
}
@@ -194,6 +195,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::f128: return Type::getFP128Ty(Context);
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
+ case MVT::x86amx: return Type::getX86_AMXTy(Context);
case MVT::v1i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1);
case MVT::v2i1:
@@ -292,6 +294,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt64Ty(Context), 16);
case MVT::v32i64:
return FixedVectorType::get(Type::getInt64Ty(Context), 32);
+ case MVT::v64i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 64);
+ case MVT::v128i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 128);
+ case MVT::v256i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 256);
case MVT::v1i128:
return FixedVectorType::get(Type::getInt128Ty(Context), 1);
case MVT::v2f16:
@@ -307,9 +315,9 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v32f16:
return FixedVectorType::get(Type::getHalfTy(Context), 32);
case MVT::v64f16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 64);
+ return FixedVectorType::get(Type::getHalfTy(Context), 64);
case MVT::v128f16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 128);
+ return FixedVectorType::get(Type::getHalfTy(Context), 128);
case MVT::v2bf16:
return FixedVectorType::get(Type::getBFloatTy(Context), 2);
case MVT::v3bf16:
@@ -366,6 +374,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getDoubleTy(Context), 16);
case MVT::v32f64:
return FixedVectorType::get(Type::getDoubleTy(Context), 32);
+ case MVT::v64f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 64);
+ case MVT::v128f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 128);
+ case MVT::v256f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 256);
case MVT::nxv1i1:
return ScalableVectorType::get(Type::getInt1Ty(Context), 1);
case MVT::nxv2i1:
@@ -488,6 +502,7 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::DoubleTyID: return MVT(MVT::f64);
case Type::X86_FP80TyID: return MVT(MVT::f80);
case Type::X86_MMXTyID: return MVT(MVT::x86mmx);
+ case Type::X86_AMXTyID: return MVT(MVT::x86amx);
case Type::FP128TyID: return MVT(MVT::f128);
case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
case Type::PointerTyID: return MVT(MVT::iPTR);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
index 2c83f13b651b..5e0ff9d9092c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -68,6 +68,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
Virt2PhysMap.clear();
Virt2StackSlotMap.clear();
Virt2SplitMap.clear();
+ Virt2ShapeMap.clear();
grow();
return false;
@@ -104,7 +105,7 @@ bool VirtRegMap::hasPreferredPhys(Register VirtReg) {
return false;
if (Hint.isVirtual())
Hint = getPhys(Hint);
- return getPhys(VirtReg) == Hint;
+ return Register(getPhys(VirtReg)) == Hint;
}
bool VirtRegMap::hasKnownPreference(Register VirtReg) {
@@ -187,7 +188,7 @@ class VirtRegRewriter : public MachineFunctionPass {
void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
void expandCopyBundle(MachineInstr &MI) const;
- bool subRegLiveThrough(const MachineInstr &MI, Register SuperPhysReg) const;
+ bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
public:
static char ID;
@@ -400,18 +401,18 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
/// after processing the last in the bundle. Does not update LiveIntervals
/// which we shouldn't need for this instruction anymore.
void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
- if (!MI.isCopy())
+ if (!MI.isCopy() && !MI.isKill())
return;
if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
SmallVector<MachineInstr *, 2> MIs({&MI});
- // Only do this when the complete bundle is made out of COPYs.
+ // Only do this when the complete bundle is made out of COPYs and KILLs.
MachineBasicBlock &MBB = *MI.getParent();
for (MachineBasicBlock::reverse_instr_iterator I =
std::next(MI.getReverseIterator()), E = MBB.instr_rend();
I != E && I->isBundledWithSucc(); ++I) {
- if (!I->isCopy())
+ if (!I->isCopy() && !I->isKill())
return;
MIs.push_back(&*I);
}
@@ -452,7 +453,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
// instruction, the bundle will have been completely undone.
if (BundledMI != BundleStart) {
BundledMI->removeFromBundle();
- MBB.insert(FirstMI, BundledMI);
+ MBB.insert(BundleStart, BundledMI);
} else if (BundledMI->isBundledWithSucc()) {
BundledMI->unbundleFromSucc();
BundleStart = &*std::next(BundledMI->getIterator());
@@ -468,7 +469,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
/// \pre \p MI defines a subregister of a virtual register that
/// has been assigned to \p SuperPhysReg.
bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
- Register SuperPhysReg) const {
+ MCRegister SuperPhysReg) const {
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
@@ -515,7 +516,7 @@ void VirtRegRewriter::rewrite() {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
Register VirtReg = MO.getReg();
- Register PhysReg = VRM->getPhys(VirtReg);
+ MCRegister PhysReg = VRM->getPhys(VirtReg);
assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
"Instruction uses unmapped VirtReg");
assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 44f4fe2ff9b1..53424556682d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -23,7 +23,7 @@
//
// - After:
// catchpad ...
-// exn = wasm.extract.exception();
+// exn = wasm.catch(WebAssembly::CPP_EXCEPTION);
// // Only add below in case it's not a single catch (...)
// wasm.landingpad.index(index);
// __wasm_lpad_context.lpad_index = index;
@@ -112,7 +112,7 @@ class WasmEHPrepare : public FunctionPass {
Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
Function *LSDAF = nullptr; // wasm.lsda() intrinsic
Function *GetExnF = nullptr; // wasm.get.exception() intrinsic
- Function *ExtractExnF = nullptr; // wasm.extract.exception() intrinsic
+ Function *CatchF = nullptr; // wasm.catch() intrinsic
Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic
FunctionCallee CallPersonalityF =
nullptr; // _Unwind_CallPersonality() wrapper
@@ -124,7 +124,6 @@ class WasmEHPrepare : public FunctionPass {
void setupEHPadFunctions(Function &F);
void prepareEHPad(BasicBlock *BB, bool NeedPersonality, bool NeedLSDA = false,
unsigned Index = 0);
- void prepareTerminateCleanupPad(BasicBlock *BB);
public:
static char ID; // Pass identification, replacement for typeid
@@ -169,7 +168,7 @@ static void eraseDeadBBsAndChildren(const Container &BBs, DomTreeUpdater *DTU) {
SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end());
while (!WL.empty()) {
auto *BB = WL.pop_back_val();
- if (pred_begin(BB) != pred_end(BB))
+ if (!pred_empty(BB))
continue;
WL.append(succ_begin(BB), succ_end(BB));
DeleteDeadBlock(BB, DTU);
@@ -205,7 +204,7 @@ bool WasmEHPrepare::prepareThrows(Function &F) {
continue;
Changed = true;
auto *BB = ThrowI->getParent();
- SmallVector<BasicBlock *, 4> Succs(succ_begin(BB), succ_end(BB));
+ SmallVector<BasicBlock *, 4> Succs(successors(BB));
auto &InstList = BB->getInstList();
InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end());
IRB.SetInsertPoint(BB);
@@ -328,12 +327,9 @@ void WasmEHPrepare::setupEHPadFunctions(Function &F) {
GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception);
GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector);
- // wasm.extract.exception() is the same as wasm.get.exception() but it does
- // not take a token argument. This will be lowered down to EXTRACT_EXCEPTION
- // pseudo instruction in instruction selection, which will be expanded using
- // 'br_on_exn' instruction later.
- ExtractExnF =
- Intrinsic::getDeclaration(&M, Intrinsic::wasm_extract_exception);
+ // wasm.catch() will be lowered down to wasm 'catch' instruction in
+ // instruction selection.
+ CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
// _Unwind_CallPersonality() wrapper function, which calls the personality
CallPersonalityF = M.getOrInsertFunction(
@@ -373,8 +369,13 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
return;
}
- Instruction *ExtractExnCI = IRB.CreateCall(ExtractExnF, {}, "exn");
- GetExnCI->replaceAllUsesWith(ExtractExnCI);
+ // Replace wasm.get.exception intrinsic with wasm.catch intrinsic, which will
+ // be lowered to wasm 'catch' instruction. We do this mainly because
+ // instruction selection cannot handle wasm.get.exception intrinsic's token
+ // argument.
+ Instruction *CatchCI =
+ IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::CPP_EXCEPTION)}, "exn");
+ GetExnCI->replaceAllUsesWith(CatchCI);
GetExnCI->eraseFromParent();
// In case it is a catchpad with single catch (...) or a cleanuppad, we don't
@@ -387,7 +388,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
}
return;
}
- IRB.SetInsertPoint(ExtractExnCI->getNextNode());
+ IRB.SetInsertPoint(CatchCI->getNextNode());
// This is to create a map of <landingpad EH label, landingpad index> in
// SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
@@ -403,7 +404,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
// Pseudocode: _Unwind_CallPersonality(exn);
- CallInst *PersCI = IRB.CreateCall(CallPersonalityF, ExtractExnCI,
+ CallInst *PersCI = IRB.CreateCall(CallPersonalityF, CatchCI,
OperandBundleDef("funclet", CPI));
PersCI->setDoesNotThrow();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
index 5a25234ba850..96d256ba57a3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -804,13 +804,9 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
<< "\' to block \'" << NewBlock->getName()
<< "\'.\n");
- BlocksInFunclet.erase(
- std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock),
- BlocksInFunclet.end());
+ llvm::erase_value(BlocksInFunclet, OldBlock);
ColorVector &OldColors = BlockColors[OldBlock];
- OldColors.erase(
- std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB),
- OldColors.end());
+ llvm::erase_value(OldColors, FuncletPadBB);
DEBUG_WITH_TYPE("winehprepare-coloring",
dbgs() << " Removed color \'" << FuncletPadBB->getName()
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
index ab9c0e81ebdc..11d1b309aa64 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -145,20 +145,22 @@ void XRayInstrumentation::prependRetWithPatchableExit(
bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
auto &F = MF.getFunction();
auto InstrAttr = F.getFnAttribute("function-instrument");
- bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
- InstrAttr.isStringAttribute() &&
+ bool AlwaysInstrument = InstrAttr.isStringAttribute() &&
InstrAttr.getValueAsString() == "xray-always";
+ bool NeverInstrument = InstrAttr.isStringAttribute() &&
+ InstrAttr.getValueAsString() == "xray-never";
+ if (NeverInstrument && !AlwaysInstrument)
+ return false;
auto ThresholdAttr = F.getFnAttribute("xray-instruction-threshold");
auto IgnoreLoopsAttr = F.getFnAttribute("xray-ignore-loops");
unsigned int XRayThreshold = 0;
if (!AlwaysInstrument) {
- if (ThresholdAttr.hasAttribute(Attribute::None) ||
- !ThresholdAttr.isStringAttribute())
+ if (!ThresholdAttr.isStringAttribute())
return false; // XRay threshold attribute not found.
if (ThresholdAttr.getValueAsString().getAsInteger(10, XRayThreshold))
return false; // Invalid value for threshold.
- bool IgnoreLoops = !IgnoreLoopsAttr.hasAttribute(Attribute::None);
+ bool IgnoreLoops = IgnoreLoopsAttr.isValid();
// Count the number of MachineInstr`s in MachineFunction
int64_t MICount = 0;
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp
index 12b19e77a422..d20f6dd8f338 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -52,9 +52,8 @@ static uint64_t getDebugInfoSize(DWARFContext &Dwarf) {
/// Similar to DWARFUnitSection::getUnitForOffset(), but returning our
/// CompileUnit object instead.
static CompileUnit *getUnitForOffset(const UnitListTy &Units, uint64_t Offset) {
- auto CU = std::upper_bound(
- Units.begin(), Units.end(), Offset,
- [](uint64_t LHS, const std::unique_ptr<CompileUnit> &RHS) {
+ auto CU = llvm::upper_bound(
+ Units, Offset, [](uint64_t LHS, const std::unique_ptr<CompileUnit> &RHS) {
return LHS < RHS->getOrigUnit().getNextUnitOffset();
});
return CU != Units.end() ? CU->get() : nullptr;
@@ -63,7 +62,7 @@ static CompileUnit *getUnitForOffset(const UnitListTy &Units, uint64_t Offset) {
/// Resolve the DIE attribute reference that has been extracted in \p RefValue.
/// The resulting DIE might be in another CompileUnit which is stored into \p
/// ReferencedCU. \returns null if resolving fails for any reason.
-DWARFDie DWARFLinker::resolveDIEReference(const DwarfFile &File,
+DWARFDie DWARFLinker::resolveDIEReference(const DWARFFile &File,
const UnitListTy &Units,
const DWARFFormValue &RefValue,
const DWARFDie &DIE,
@@ -242,70 +241,49 @@ static void analyzeImportedModule(
}
}
-/// Recursive helper to build the global DeclContext information and
-/// gather the child->parent relationships in the original compile unit.
-///
-/// \return true when this DIE and all of its children are only
-/// forward declarations to types defined in external clang modules
-/// (i.e., forward declarations that are children of a DW_TAG_module).
-static bool analyzeContextInfo(
- const DWARFDie &DIE, unsigned ParentIdx, CompileUnit &CU,
- DeclContext *CurrentDeclContext, UniquingStringPool &StringPool,
- DeclContextTree &Contexts, uint64_t ModulesEndOffset,
- swiftInterfacesMap *ParseableSwiftInterfaces,
- std::function<void(const Twine &, const DWARFDie &)> ReportWarning,
- bool InImportedModule = false) {
- unsigned MyIdx = CU.getOrigUnit().getDIEIndex(DIE);
- CompileUnit::DIEInfo &Info = CU.getInfo(MyIdx);
+/// The distinct types of work performed by the work loop in
+/// analyzeContextInfo.
+enum class ContextWorklistItemType : uint8_t {
+ AnalyzeContextInfo,
+ UpdateChildPruning,
+ UpdatePruning,
+};
- // Clang imposes an ODR on modules(!) regardless of the language:
- // "The module-id should consist of only a single identifier,
- // which provides the name of the module being defined. Each
- // module shall have a single definition."
- //
- // This does not extend to the types inside the modules:
- // "[I]n C, this implies that if two structs are defined in
- // different submodules with the same name, those two types are
- // distinct types (but may be compatible types if their
- // definitions match)."
- //
- // We treat non-C++ modules like namespaces for this reason.
- if (DIE.getTag() == dwarf::DW_TAG_module && ParentIdx == 0 &&
- dwarf::toString(DIE.find(dwarf::DW_AT_name), "") !=
- CU.getClangModuleName()) {
- InImportedModule = true;
- analyzeImportedModule(DIE, CU, ParseableSwiftInterfaces, ReportWarning);
- }
-
- Info.ParentIdx = ParentIdx;
- bool InClangModule = CU.isClangModule() || InImportedModule;
- if (CU.hasODR() || InClangModule) {
- if (CurrentDeclContext) {
- auto PtrInvalidPair = Contexts.getChildDeclContext(
- *CurrentDeclContext, DIE, CU, StringPool, InClangModule);
- CurrentDeclContext = PtrInvalidPair.getPointer();
- Info.Ctxt =
- PtrInvalidPair.getInt() ? nullptr : PtrInvalidPair.getPointer();
- if (Info.Ctxt)
- Info.Ctxt->setDefinedInClangModule(InClangModule);
- } else
- Info.Ctxt = CurrentDeclContext = nullptr;
- }
+/// This class represents an item in the work list. The type defines what kind
+/// of work needs to be performed when processing the current item. Everything
+/// but the Type and Die fields are optional based on the type.
+struct ContextWorklistItem {
+ DWARFDie Die;
+ unsigned ParentIdx;
+ union {
+ CompileUnit::DIEInfo *OtherInfo;
+ DeclContext *Context;
+ };
+ ContextWorklistItemType Type;
+ bool InImportedModule;
+
+ ContextWorklistItem(DWARFDie Die, ContextWorklistItemType T,
+ CompileUnit::DIEInfo *OtherInfo = nullptr)
+ : Die(Die), ParentIdx(0), OtherInfo(OtherInfo), Type(T),
+ InImportedModule(false) {}
+
+ ContextWorklistItem(DWARFDie Die, DeclContext *Context, unsigned ParentIdx,
+ bool InImportedModule)
+ : Die(Die), ParentIdx(ParentIdx), Context(Context),
+ Type(ContextWorklistItemType::AnalyzeContextInfo),
+ InImportedModule(InImportedModule) {}
+};
- Info.Prune = InImportedModule;
- if (DIE.hasChildren())
- for (auto Child : DIE.children())
- Info.Prune &= analyzeContextInfo(Child, MyIdx, CU, CurrentDeclContext,
- StringPool, Contexts, ModulesEndOffset,
- ParseableSwiftInterfaces, ReportWarning,
- InImportedModule);
+static bool updatePruning(const DWARFDie &Die, CompileUnit &CU,
+ uint64_t ModulesEndOffset) {
+ CompileUnit::DIEInfo &Info = CU.getInfo(Die);
// Prune this DIE if it is either a forward declaration inside a
// DW_TAG_module or a DW_TAG_module that contains nothing but
// forward declarations.
- Info.Prune &= (DIE.getTag() == dwarf::DW_TAG_module) ||
- (isTypeTag(DIE.getTag()) &&
- dwarf::toUnsigned(DIE.find(dwarf::DW_AT_declaration), 0));
+ Info.Prune &= (Die.getTag() == dwarf::DW_TAG_module) ||
+ (isTypeTag(Die.getTag()) &&
+ dwarf::toUnsigned(Die.find(dwarf::DW_AT_declaration), 0));
// Only prune forward declarations inside a DW_TAG_module for which a
// definition exists elsewhere.
@@ -318,6 +296,100 @@ static bool analyzeContextInfo(
return Info.Prune;
}
+static void updateChildPruning(const DWARFDie &Die, CompileUnit &CU,
+ CompileUnit::DIEInfo &ChildInfo) {
+ CompileUnit::DIEInfo &Info = CU.getInfo(Die);
+ Info.Prune &= ChildInfo.Prune;
+}
+
+/// Recursive helper to build the global DeclContext information and
+/// gather the child->parent relationships in the original compile unit.
+///
+/// This function uses the same work list approach as lookForDIEsToKeep.
+///
+/// \return true when this DIE and all of its children are only
+/// forward declarations to types defined in external clang modules
+/// (i.e., forward declarations that are children of a DW_TAG_module).
+static bool analyzeContextInfo(
+ const DWARFDie &DIE, unsigned ParentIdx, CompileUnit &CU,
+ DeclContext *CurrentDeclContext, DeclContextTree &Contexts,
+ uint64_t ModulesEndOffset, swiftInterfacesMap *ParseableSwiftInterfaces,
+ std::function<void(const Twine &, const DWARFDie &)> ReportWarning,
+ bool InImportedModule = false) {
+ // LIFO work list.
+ std::vector<ContextWorklistItem> Worklist;
+ Worklist.emplace_back(DIE, CurrentDeclContext, ParentIdx, InImportedModule);
+
+ while (!Worklist.empty()) {
+ ContextWorklistItem Current = Worklist.back();
+ Worklist.pop_back();
+
+ switch (Current.Type) {
+ case ContextWorklistItemType::UpdatePruning:
+ updatePruning(Current.Die, CU, ModulesEndOffset);
+ continue;
+ case ContextWorklistItemType::UpdateChildPruning:
+ updateChildPruning(Current.Die, CU, *Current.OtherInfo);
+ continue;
+ case ContextWorklistItemType::AnalyzeContextInfo:
+ break;
+ }
+
+ unsigned Idx = CU.getOrigUnit().getDIEIndex(Current.Die);
+ CompileUnit::DIEInfo &Info = CU.getInfo(Idx);
+
+ // Clang imposes an ODR on modules(!) regardless of the language:
+ // "The module-id should consist of only a single identifier,
+ // which provides the name of the module being defined. Each
+ // module shall have a single definition."
+ //
+ // This does not extend to the types inside the modules:
+ // "[I]n C, this implies that if two structs are defined in
+ // different submodules with the same name, those two types are
+ // distinct types (but may be compatible types if their
+ // definitions match)."
+ //
+ // We treat non-C++ modules like namespaces for this reason.
+ if (Current.Die.getTag() == dwarf::DW_TAG_module &&
+ Current.ParentIdx == 0 &&
+ dwarf::toString(Current.Die.find(dwarf::DW_AT_name), "") !=
+ CU.getClangModuleName()) {
+ Current.InImportedModule = true;
+ analyzeImportedModule(Current.Die, CU, ParseableSwiftInterfaces,
+ ReportWarning);
+ }
+
+ Info.ParentIdx = Current.ParentIdx;
+ bool InClangModule = CU.isClangModule() || Current.InImportedModule;
+ if (CU.hasODR() || InClangModule) {
+ if (Current.Context) {
+ auto PtrInvalidPair = Contexts.getChildDeclContext(
+ *Current.Context, Current.Die, CU, InClangModule);
+ Current.Context = PtrInvalidPair.getPointer();
+ Info.Ctxt =
+ PtrInvalidPair.getInt() ? nullptr : PtrInvalidPair.getPointer();
+ if (Info.Ctxt)
+ Info.Ctxt->setDefinedInClangModule(InClangModule);
+ } else
+ Info.Ctxt = Current.Context = nullptr;
+ }
+
+ Info.Prune = Current.InImportedModule;
+ // Add children in reverse order to the worklist to effectively process
+ // them in order.
+ Worklist.emplace_back(Current.Die, ContextWorklistItemType::UpdatePruning);
+ for (auto Child : reverse(Current.Die.children())) {
+ CompileUnit::DIEInfo &ChildInfo = CU.getInfo(Child);
+ Worklist.emplace_back(
+ Current.Die, ContextWorklistItemType::UpdateChildPruning, &ChildInfo);
+ Worklist.emplace_back(Child, Current.Context, Idx,
+ Current.InImportedModule);
+ }
+ }
+
+ return CU.getInfo(DIE).Prune;
+}
+
static bool dieNeedsChildrenToBeMeaningful(uint32_t Tag) {
switch (Tag) {
default:
@@ -347,32 +419,10 @@ void DWARFLinker::cleanupAuxiliarryData(LinkContext &Context) {
DIEAlloc.Reset();
}
-/// Get the starting and ending (exclusive) offset for the
-/// attribute with index \p Idx descibed by \p Abbrev. \p Offset is
-/// supposed to point to the position of the first attribute described
-/// by \p Abbrev.
-/// \return [StartOffset, EndOffset) as a pair.
-static std::pair<uint64_t, uint64_t>
-getAttributeOffsets(const DWARFAbbreviationDeclaration *Abbrev, unsigned Idx,
- uint64_t Offset, const DWARFUnit &Unit) {
- DataExtractor Data = Unit.getDebugInfoExtractor();
-
- for (unsigned I = 0; I < Idx; ++I)
- DWARFFormValue::skipValue(Abbrev->getFormByIndex(I), Data, &Offset,
- Unit.getFormParams());
-
- uint64_t End = Offset;
- DWARFFormValue::skipValue(Abbrev->getFormByIndex(Idx), Data, &End,
- Unit.getFormParams());
-
- return std::make_pair(Offset, End);
-}
-
/// Check if a variable describing DIE should be kept.
/// \returns updated TraversalFlags.
unsigned DWARFLinker::shouldKeepVariableDIE(AddressesMap &RelocMgr,
const DWARFDie &DIE,
- CompileUnit &Unit,
CompileUnit::DIEInfo &MyInfo,
unsigned Flags) {
const auto *Abbrev = DIE.getAbbreviationDeclarationPtr();
@@ -384,24 +434,12 @@ unsigned DWARFLinker::shouldKeepVariableDIE(AddressesMap &RelocMgr,
return Flags | TF_Keep;
}
- Optional<uint32_t> LocationIdx =
- Abbrev->findAttributeIndex(dwarf::DW_AT_location);
- if (!LocationIdx)
- return Flags;
-
- uint64_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode());
- const DWARFUnit &OrigUnit = Unit.getOrigUnit();
- uint64_t LocationOffset, LocationEndOffset;
- std::tie(LocationOffset, LocationEndOffset) =
- getAttributeOffsets(Abbrev, *LocationIdx, Offset, OrigUnit);
-
// See if there is a relocation to a valid debug map entry inside
// this variable's location. The order is important here. We want to
// always check if the variable has a valid relocation, so that the
// DIEInfo is filled. However, we don't want a static variable in a
// function to force us to keep the enclosing function.
- if (!RelocMgr.hasValidRelocationAt(LocationOffset, LocationEndOffset,
- MyInfo) ||
+ if (!RelocMgr.hasLiveMemoryLocation(DIE, MyInfo) ||
(Flags & TF_InFunctionScope))
return Flags;
@@ -420,26 +458,16 @@ unsigned DWARFLinker::shouldKeepVariableDIE(AddressesMap &RelocMgr,
/// \returns updated TraversalFlags.
unsigned DWARFLinker::shouldKeepSubprogramDIE(
AddressesMap &RelocMgr, RangesTy &Ranges, const DWARFDie &DIE,
- const DwarfFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo,
+ const DWARFFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo,
unsigned Flags) {
- const auto *Abbrev = DIE.getAbbreviationDeclarationPtr();
-
Flags |= TF_InFunctionScope;
- Optional<uint32_t> LowPcIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_low_pc);
- if (!LowPcIdx)
+ auto LowPc = dwarf::toAddress(DIE.find(dwarf::DW_AT_low_pc));
+ if (!LowPc)
return Flags;
- uint64_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode());
- DWARFUnit &OrigUnit = Unit.getOrigUnit();
- uint64_t LowPcOffset, LowPcEndOffset;
- std::tie(LowPcOffset, LowPcEndOffset) =
- getAttributeOffsets(Abbrev, *LowPcIdx, Offset, OrigUnit);
-
- auto LowPc = dwarf::toAddress(DIE.find(dwarf::DW_AT_low_pc));
assert(LowPc.hasValue() && "low_pc attribute is not an address.");
- if (!LowPc ||
- !RelocMgr.hasValidRelocationAt(LowPcOffset, LowPcEndOffset, MyInfo))
+ if (!RelocMgr.hasLiveAddressRange(DIE, MyInfo))
return Flags;
if (Options.Verbose) {
@@ -453,6 +481,8 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE(
if (DIE.getTag() == dwarf::DW_TAG_label) {
if (Unit.hasLabelAt(*LowPc))
return Flags;
+
+ DWARFUnit &OrigUnit = Unit.getOrigUnit();
// FIXME: dsymutil-classic compat. dsymutil-classic doesn't consider labels
// that don't fall into the CU's aranges. This is wrong IMO. Debug info
// generation bugs aside, this is really wrong in the case of labels, where
@@ -482,14 +512,14 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE(
/// Check if a DIE should be kept.
/// \returns updated TraversalFlags.
unsigned DWARFLinker::shouldKeepDIE(AddressesMap &RelocMgr, RangesTy &Ranges,
- const DWARFDie &DIE, const DwarfFile &File,
+ const DWARFDie &DIE, const DWARFFile &File,
CompileUnit &Unit,
CompileUnit::DIEInfo &MyInfo,
unsigned Flags) {
switch (DIE.getTag()) {
case dwarf::DW_TAG_constant:
case dwarf::DW_TAG_variable:
- return shouldKeepVariableDIE(RelocMgr, DIE, Unit, MyInfo, Flags);
+ return shouldKeepVariableDIE(RelocMgr, DIE, MyInfo, Flags);
case dwarf::DW_TAG_subprogram:
case dwarf::DW_TAG_label:
return shouldKeepSubprogramDIE(RelocMgr, Ranges, DIE, File, Unit, MyInfo,
@@ -522,8 +552,7 @@ static void updateChildIncompleteness(const DWARFDie &Die, CompileUnit &CU,
return;
}
- unsigned Idx = CU.getOrigUnit().getDIEIndex(Die);
- CompileUnit::DIEInfo &MyInfo = CU.getInfo(Idx);
+ CompileUnit::DIEInfo &MyInfo = CU.getInfo(Die);
if (ChildInfo.Incomplete || ChildInfo.Prune)
MyInfo.Incomplete = true;
@@ -545,8 +574,7 @@ static void updateRefIncompleteness(const DWARFDie &Die, CompileUnit &CU,
return;
}
- unsigned Idx = CU.getOrigUnit().getDIEIndex(Die);
- CompileUnit::DIEInfo &MyInfo = CU.getInfo(Idx);
+ CompileUnit::DIEInfo &MyInfo = CU.getInfo(Die);
if (MyInfo.Incomplete)
return;
@@ -578,8 +606,7 @@ void DWARFLinker::lookForChildDIEsToKeep(
for (auto Child : reverse(Die.children())) {
// Add a worklist item before every child to calculate incompleteness right
// after the current child is processed.
- unsigned Idx = CU.getOrigUnit().getDIEIndex(Child);
- CompileUnit::DIEInfo &ChildInfo = CU.getInfo(Idx);
+ CompileUnit::DIEInfo &ChildInfo = CU.getInfo(Child);
Worklist.emplace_back(Die, CU, WorklistItemType::UpdateChildIncompleteness,
&ChildInfo);
Worklist.emplace_back(Child, CU, Flags);
@@ -590,7 +617,7 @@ void DWARFLinker::lookForChildDIEsToKeep(
/// kept. All DIEs referenced though attributes should be kept.
void DWARFLinker::lookForRefDIEsToKeep(
const DWARFDie &Die, CompileUnit &CU, unsigned Flags,
- const UnitListTy &Units, const DwarfFile &File,
+ const UnitListTy &Units, const DWARFFile &File,
SmallVectorImpl<WorklistItem> &Worklist) {
bool UseOdr = (Flags & DWARFLinker::TF_DependencyWalk)
? (Flags & DWARFLinker::TF_ODR)
@@ -614,8 +641,7 @@ void DWARFLinker::lookForRefDIEsToKeep(
CompileUnit *ReferencedCU;
if (auto RefDie =
resolveDIEReference(File, Units, Val, Die, ReferencedCU)) {
- uint32_t RefIdx = ReferencedCU->getOrigUnit().getDIEIndex(RefDie);
- CompileUnit::DIEInfo &Info = ReferencedCU->getInfo(RefIdx);
+ CompileUnit::DIEInfo &Info = ReferencedCU->getInfo(RefDie);
bool IsModuleRef = Info.Ctxt && Info.Ctxt->getCanonicalDIEOffset() &&
Info.Ctxt->isDefinedInClangModule();
// If the referenced DIE has a DeclContext that has already been
@@ -649,8 +675,7 @@ void DWARFLinker::lookForRefDIEsToKeep(
for (auto &P : reverse(ReferencedDIEs)) {
// Add a worklist item before every child to calculate incompleteness right
// after the current child is processed.
- uint32_t RefIdx = P.second.getOrigUnit().getDIEIndex(P.first);
- CompileUnit::DIEInfo &Info = P.second.getInfo(RefIdx);
+ CompileUnit::DIEInfo &Info = P.second.getInfo(P.first);
Worklist.emplace_back(Die, CU, WorklistItemType::UpdateRefIncompleteness,
&Info);
Worklist.emplace_back(P.first, P.second,
@@ -700,15 +725,14 @@ void DWARFLinker::lookForParentDIEsToKeep(
/// The return value indicates whether the DIE is incomplete.
void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap,
RangesTy &Ranges, const UnitListTy &Units,
- const DWARFDie &Die, const DwarfFile &File,
+ const DWARFDie &Die, const DWARFFile &File,
CompileUnit &Cu, unsigned Flags) {
// LIFO work list.
SmallVector<WorklistItem, 4> Worklist;
Worklist.emplace_back(Die, Cu, Flags);
while (!Worklist.empty()) {
- WorklistItem Current = Worklist.back();
- Worklist.pop_back();
+ WorklistItem Current = Worklist.pop_back_val();
// Look at the worklist type to decide what kind of work to perform.
switch (Current.Type) {
@@ -819,9 +843,12 @@ void DWARFLinker::assignAbbrev(DIEAbbrev &Abbrev) {
unsigned DWARFLinker::DIECloner::cloneStringAttribute(
DIE &Die, AttributeSpec AttrSpec, const DWARFFormValue &Val,
const DWARFUnit &U, OffsetsStringPool &StringPool, AttributesInfo &Info) {
+ Optional<const char *> String = Val.getAsCString();
+ if (!String)
+ return 0;
+
// Switch everything to out of line strings.
- const char *String = *Val.getAsCString();
- auto StringEntry = StringPool.getEntry(String);
+ auto StringEntry = StringPool.getEntry(*String);
// Update attributes info.
if (AttrSpec.Attr == dwarf::DW_AT_name)
@@ -838,7 +865,7 @@ unsigned DWARFLinker::DIECloner::cloneStringAttribute(
unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
DIE &Die, const DWARFDie &InputDIE, AttributeSpec AttrSpec,
- unsigned AttrSize, const DWARFFormValue &Val, const DwarfFile &File,
+ unsigned AttrSize, const DWARFFormValue &Val, const DWARFFile &File,
CompileUnit &Unit) {
const DWARFUnit &U = Unit.getOrigUnit();
uint64_t Ref = *Val.getAsReference();
@@ -854,8 +881,7 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
if (!RefDie || AttrSpec.Attr == dwarf::DW_AT_sibling)
return 0;
- unsigned Idx = RefUnit->getOrigUnit().getDIEIndex(RefDie);
- CompileUnit::DIEInfo &RefInfo = RefUnit->getInfo(Idx);
+ CompileUnit::DIEInfo &RefInfo = RefUnit->getInfo(RefDie);
// If we already have emitted an equivalent DeclContext, just point
// at it.
@@ -910,7 +936,7 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
}
void DWARFLinker::DIECloner::cloneExpression(
- DataExtractor &Data, DWARFExpression Expression, const DwarfFile &File,
+ DataExtractor &Data, DWARFExpression Expression, const DWARFFile &File,
CompileUnit &Unit, SmallVectorImpl<uint8_t> &OutputBuffer) {
using Encoding = DWARFExpression::Operation::Encoding;
@@ -947,8 +973,7 @@ void DWARFLinker::DIECloner::cloneExpression(
// DW_OP_reinterpret, which is currently not supported.
if (RefOffset > 0 || Op.getCode() != dwarf::DW_OP_convert) {
auto RefDie = Unit.getOrigUnit().getDIEForOffset(RefOffset);
- uint32_t RefIdx = Unit.getOrigUnit().getDIEIndex(RefDie);
- CompileUnit::DIEInfo &Info = Unit.getInfo(RefIdx);
+ CompileUnit::DIEInfo &Info = Unit.getInfo(RefDie);
if (DIE *Clone = Info.Clone)
Offset = Clone->getOffset();
else
@@ -975,7 +1000,7 @@ void DWARFLinker::DIECloner::cloneExpression(
}
unsigned DWARFLinker::DIECloner::cloneBlockAttribute(
- DIE &Die, const DwarfFile &File, CompileUnit &Unit, AttributeSpec AttrSpec,
+ DIE &Die, const DWARFFile &File, CompileUnit &Unit, AttributeSpec AttrSpec,
const DWARFFormValue &Val, unsigned AttrSize, bool IsLittleEndian) {
DIEValueList *Attr;
DIEValue Value;
@@ -1032,6 +1057,7 @@ unsigned DWARFLinker::DIECloner::cloneBlockAttribute(
unsigned DWARFLinker::DIECloner::cloneAddressAttribute(
DIE &Die, AttributeSpec AttrSpec, const DWARFFormValue &Val,
const CompileUnit &Unit, AttributesInfo &Info) {
+ dwarf::Form Form = AttrSpec.Form;
uint64_t Addr = *Val.getAsAddress();
if (LLVM_UNLIKELY(Linker.Options.Update)) {
@@ -1081,13 +1107,24 @@ unsigned DWARFLinker::DIECloner::cloneAddressAttribute(
Addr = (Info.OrigCallPc ? Info.OrigCallPc : Addr) + Info.PCOffset;
}
+ // If this is an indexed address emit the relocated address.
+ if (Form == dwarf::DW_FORM_addrx) {
+ if (llvm::Expected<uint64_t> RelocAddr =
+ ObjFile.Addresses->relocateIndexedAddr(Addr)) {
+ Addr = *RelocAddr;
+ Form = dwarf::DW_FORM_addr;
+ } else {
+ Linker.reportWarning(toString(RelocAddr.takeError()), ObjFile);
+ }
+ }
+
Die.addValue(DIEAlloc, static_cast<dwarf::Attribute>(AttrSpec.Attr),
- static_cast<dwarf::Form>(AttrSpec.Form), DIEInteger(Addr));
+ static_cast<dwarf::Form>(Form), DIEInteger(Addr));
return Unit.getOrigUnit().getAddressByteSize();
}
unsigned DWARFLinker::DIECloner::cloneScalarAttribute(
- DIE &Die, const DWARFDie &InputDIE, const DwarfFile &File,
+ DIE &Die, const DWARFDie &InputDIE, const DWARFFile &File,
CompileUnit &Unit, AttributeSpec AttrSpec, const DWARFFormValue &Val,
unsigned AttrSize, AttributesInfo &Info) {
uint64_t Value;
@@ -1155,7 +1192,7 @@ unsigned DWARFLinker::DIECloner::cloneScalarAttribute(
/// value \p Val, and add it to \p Die.
/// \returns the size of the cloned attribute.
unsigned DWARFLinker::DIECloner::cloneAttribute(
- DIE &Die, const DWARFDie &InputDIE, const DwarfFile &File,
+ DIE &Die, const DWARFDie &InputDIE, const DWARFFile &File,
CompileUnit &Unit, OffsetsStringPool &StringPool, const DWARFFormValue &Val,
const AttributeSpec AttrSpec, unsigned AttrSize, AttributesInfo &Info,
bool IsLittleEndian) {
@@ -1164,6 +1201,11 @@ unsigned DWARFLinker::DIECloner::cloneAttribute(
switch (AttrSpec.Form) {
case dwarf::DW_FORM_strp:
case dwarf::DW_FORM_string:
+ case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_strx3:
+ case dwarf::DW_FORM_strx4:
return cloneStringAttribute(Die, AttrSpec, Val, U, StringPool, Info);
case dwarf::DW_FORM_ref_addr:
case dwarf::DW_FORM_ref1:
@@ -1180,6 +1222,7 @@ unsigned DWARFLinker::DIECloner::cloneAttribute(
return cloneBlockAttribute(Die, File, Unit, AttrSpec, Val, AttrSize,
IsLittleEndian);
case dwarf::DW_FORM_addr:
+ case dwarf::DW_FORM_addrx:
return cloneAddressAttribute(Die, AttrSpec, Val, Unit, Info);
case dwarf::DW_FORM_data1:
case dwarf::DW_FORM_data2:
@@ -1193,9 +1236,10 @@ unsigned DWARFLinker::DIECloner::cloneAttribute(
return cloneScalarAttribute(Die, InputDIE, File, Unit, AttrSpec, Val,
AttrSize, Info);
default:
- Linker.reportWarning(
- "Unsupported attribute form in cloneAttribute. Dropping.", File,
- &InputDIE);
+ Linker.reportWarning("Unsupported attribute form " +
+ dwarf::FormEncodingString(AttrSpec.Form) +
+ " in cloneAttribute. Dropping.",
+ File, &InputDIE);
}
return 0;
@@ -1259,6 +1303,9 @@ shouldSkipAttribute(DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
case dwarf::DW_AT_high_pc:
case dwarf::DW_AT_ranges:
return SkipPC;
+ case dwarf::DW_AT_str_offsets_base:
+ // FIXME: Use the string offset table with Dwarf 5.
+ return true;
case dwarf::DW_AT_location:
case dwarf::DW_AT_frame_base:
// FIXME: for some reason dsymutil-classic keeps the location attributes
@@ -1273,7 +1320,7 @@ shouldSkipAttribute(DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
}
DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
- const DwarfFile &File, CompileUnit &Unit,
+ const DWARFFile &File, CompileUnit &Unit,
OffsetsStringPool &StringPool,
int64_t PCOffset, uint32_t OutOffset,
unsigned Flags, bool IsLittleEndian,
@@ -1483,7 +1530,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
/// to point at the new entries.
void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit,
DWARFContext &OrigDwarf,
- const DwarfFile &File) const {
+ const DWARFFile &File) const {
DWARFDebugRangeList RangeList;
const auto &FunctionRanges = Unit.getFunctionRanges();
unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
@@ -1551,7 +1598,7 @@ static void insertLineSequence(std::vector<DWARFDebugLine::Row> &Seq,
return;
if (!Rows.empty() && Rows.back().Address < Seq.front().Address) {
- Rows.insert(Rows.end(), Seq.begin(), Seq.end());
+ llvm::append_range(Rows, Seq);
Seq.clear();
return;
}
@@ -1590,7 +1637,7 @@ static void patchStmtList(DIE &Die, DIEInteger Offset) {
/// are present in the binary.
void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
DWARFContext &OrigDwarf,
- const DwarfFile &File) {
+ const DWARFFile &File) {
DWARFDie CUDie = Unit.getOrigUnit().getUnitDIE();
auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list));
if (!StmtList)
@@ -1790,7 +1837,7 @@ void DWARFLinker::emitDwarfAcceleratorEntriesForUnit(CompileUnit &Unit) {
/// This is actually pretty easy as the data of the CIEs and FDEs can
/// be considered as black boxes and moved as is. The only thing to do
/// is to patch the addresses in the headers.
-void DWARFLinker::patchFrameInfoForObject(const DwarfFile &File,
+void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
RangesTy &Ranges,
DWARFContext &OrigDwarf,
unsigned AddrSize) {
@@ -1887,7 +1934,7 @@ void DWARFLinker::DIECloner::copyAbbrev(
uint32_t DWARFLinker::DIECloner::hashFullyQualifiedName(DWARFDie DIE,
CompileUnit &U,
- const DwarfFile &File,
+ const DWARFFile &File,
int ChildRecurseDepth) {
const char *Name = nullptr;
DWARFUnit *OrigUnit = &U.getOrigUnit();
@@ -1951,11 +1998,13 @@ static std::string remapPath(StringRef Path,
return p.str().str();
}
-bool DWARFLinker::registerModuleReference(
- DWARFDie CUDie, const DWARFUnit &Unit, const DwarfFile &File,
- OffsetsStringPool &StringPool, UniquingStringPool &UniquingStringPool,
- DeclContextTree &ODRContexts, uint64_t ModulesEndOffset, unsigned &UnitID,
- bool IsLittleEndian, unsigned Indent, bool Quiet) {
+bool DWARFLinker::registerModuleReference(DWARFDie CUDie, const DWARFUnit &Unit,
+ const DWARFFile &File,
+ OffsetsStringPool &StringPool,
+ DeclContextTree &ODRContexts,
+ uint64_t ModulesEndOffset,
+ unsigned &UnitID, bool IsLittleEndian,
+ unsigned Indent, bool Quiet) {
std::string PCMfile = dwarf::toString(
CUDie.find({dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), "");
if (PCMfile.empty())
@@ -1999,10 +2048,9 @@ bool DWARFLinker::registerModuleReference(
// shouldn't run into an infinite loop, so mark it as processed now.
ClangModules.insert({PCMfile, DwoId});
- if (Error E =
- loadClangModule(CUDie, PCMfile, Name, DwoId, File, StringPool,
- UniquingStringPool, ODRContexts, ModulesEndOffset,
- UnitID, IsLittleEndian, Indent + 2, Quiet)) {
+ if (Error E = loadClangModule(CUDie, PCMfile, Name, DwoId, File, StringPool,
+ ODRContexts, ModulesEndOffset, UnitID,
+ IsLittleEndian, Indent + 2, Quiet)) {
consumeError(std::move(E));
return false;
}
@@ -2011,10 +2059,9 @@ bool DWARFLinker::registerModuleReference(
Error DWARFLinker::loadClangModule(
DWARFDie CUDie, StringRef Filename, StringRef ModuleName, uint64_t DwoId,
- const DwarfFile &File, OffsetsStringPool &StringPool,
- UniquingStringPool &UniquingStringPool, DeclContextTree &ODRContexts,
- uint64_t ModulesEndOffset, unsigned &UnitID, bool IsLittleEndian,
- unsigned Indent, bool Quiet) {
+ const DWARFFile &File, OffsetsStringPool &StringPool,
+ DeclContextTree &ODRContexts, uint64_t ModulesEndOffset, unsigned &UnitID,
+ bool IsLittleEndian, unsigned Indent, bool Quiet) {
/// Using a SmallString<0> because loadClangModule() is recursive.
SmallString<0> Path(Options.PrependPath);
if (sys::path::is_relative(Filename))
@@ -2038,9 +2085,9 @@ Error DWARFLinker::loadClangModule(
auto CUDie = CU->getUnitDIE(false);
if (!CUDie)
continue;
- if (!registerModuleReference(
- CUDie, *CU, File, StringPool, UniquingStringPool, ODRContexts,
- ModulesEndOffset, UnitID, IsLittleEndian, Indent, Quiet)) {
+ if (!registerModuleReference(CUDie, *CU, File, StringPool, ODRContexts,
+ ModulesEndOffset, UnitID, IsLittleEndian,
+ Indent, Quiet)) {
if (Unit) {
std::string Err =
(Filename +
@@ -2068,9 +2115,8 @@ Error DWARFLinker::loadClangModule(
Unit = std::make_unique<CompileUnit>(*CU, UnitID++, !Options.NoODR,
ModuleName);
Unit->setHasInterestingContent();
- analyzeContextInfo(CUDie, 0, *Unit, &ODRContexts.getRoot(),
- UniquingStringPool, ODRContexts, ModulesEndOffset,
- Options.ParseableSwiftInterfaces,
+ analyzeContextInfo(CUDie, 0, *Unit, &ODRContexts.getRoot(), ODRContexts,
+ ModulesEndOffset, Options.ParseableSwiftInterfaces,
[&](const Twine &Warning, const DWARFDie &DIE) {
reportWarning(Warning, File, &DIE);
});
@@ -2096,17 +2142,19 @@ Error DWARFLinker::loadClangModule(
}
uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits(
- DWARFContext &DwarfContext, const DwarfFile &File,
+ DWARFContext &DwarfContext, const DWARFFile &File,
OffsetsStringPool &StringPool, bool IsLittleEndian) {
uint64_t OutputDebugInfoSize =
Linker.Options.NoOutput ? 0 : Emitter->getDebugInfoSectionSize();
const uint64_t StartOutputDebugInfoSize = OutputDebugInfoSize;
for (auto &CurrentUnit : CompileUnits) {
+ const uint16_t DwarfVersion = CurrentUnit->getOrigUnit().getVersion();
+ const uint32_t UnitHeaderSize = DwarfVersion >= 5 ? 12 : 11;
auto InputDIE = CurrentUnit->getOrigUnit().getUnitDIE();
CurrentUnit->setStartOffset(OutputDebugInfoSize);
if (!InputDIE) {
- OutputDebugInfoSize = CurrentUnit->computeNextUnitOffset();
+ OutputDebugInfoSize = CurrentUnit->computeNextUnitOffset(DwarfVersion);
continue;
}
if (CurrentUnit->getInfo(0).Keep) {
@@ -2114,11 +2162,11 @@ uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits(
// already has a DIE inside of it.
CurrentUnit->createOutputDIE();
cloneDIE(InputDIE, File, *CurrentUnit, StringPool, 0 /* PC offset */,
- 11 /* Unit Header size */, 0, IsLittleEndian,
+ UnitHeaderSize, 0, IsLittleEndian,
CurrentUnit->getOutputUnitDIE());
}
- OutputDebugInfoSize = CurrentUnit->computeNextUnitOffset();
+ OutputDebugInfoSize = CurrentUnit->computeNextUnitOffset(DwarfVersion);
if (!Linker.Options.NoOutput) {
assert(Emitter);
@@ -2159,12 +2207,14 @@ uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits(
if (!CurrentUnit->getOutputUnitDIE())
continue;
+ unsigned DwarfVersion = CurrentUnit->getOrigUnit().getVersion();
+
assert(Emitter->getDebugInfoSectionSize() ==
CurrentUnit->getStartOffset());
- Emitter->emitCompileUnitHeader(*CurrentUnit);
+ Emitter->emitCompileUnitHeader(*CurrentUnit, DwarfVersion);
Emitter->emitDIE(*CurrentUnit->getOutputUnitDIE());
assert(Emitter->getDebugInfoSectionSize() ==
- CurrentUnit->computeNextUnitOffset());
+ CurrentUnit->computeNextUnitOffset(DwarfVersion));
}
}
@@ -2190,7 +2240,7 @@ void DWARFLinker::updateAccelKind(DWARFContext &Dwarf) {
}
}
-bool DWARFLinker::emitPaperTrailWarnings(const DwarfFile &File,
+bool DWARFLinker::emitPaperTrailWarnings(const DWARFFile &File,
OffsetsStringPool &StringPool) {
if (File.Warnings.empty())
@@ -2267,7 +2317,7 @@ void DWARFLinker::copyInvariantDebugSection(DWARFContext &Dwarf) {
"debug_aranges");
}
-void DWARFLinker::addObjectFile(DwarfFile &File) {
+void DWARFLinker::addObjectFile(DWARFFile &File) {
ObjectContexts.emplace_back(LinkContext(File));
if (ObjectContexts.back().File.Dwarf)
@@ -2284,10 +2334,6 @@ bool DWARFLinker::link() {
// parallel loop.
unsigned NumObjects = ObjectContexts.size();
- // This Dwarf string pool which is only used for uniquing. This one should
- // never be used for offsets as its not thread-safe or predictable.
- UniquingStringPool UniquingStringPool(nullptr, true);
-
// This Dwarf string pool which is used for emission. It must be used
// serially as the order of calling getStringOffset matters for
// reproducibility.
@@ -2357,7 +2403,7 @@ bool DWARFLinker::link() {
}
if (CUDie && !LLVM_UNLIKELY(Options.Update))
registerModuleReference(CUDie, *CU, OptContext.File, OffsetsStringPool,
- UniquingStringPool, ODRContexts, 0, UnitID,
+ ODRContexts, 0, UnitID,
OptContext.File.Dwarf->isLittleEndian());
}
}
@@ -2399,8 +2445,8 @@ bool DWARFLinker::link() {
auto CUDie = CU->getUnitDIE(false);
if (!CUDie || LLVM_UNLIKELY(Options.Update) ||
!registerModuleReference(CUDie, *CU, Context.File, OffsetsStringPool,
- UniquingStringPool, ODRContexts,
- ModulesEndOffset, UnitID, Quiet)) {
+ ODRContexts, ModulesEndOffset, UnitID,
+ Quiet)) {
Context.CompileUnits.push_back(std::make_unique<CompileUnit>(
*CU, UnitID++, !Options.NoODR && !Options.Update, ""));
}
@@ -2412,9 +2458,8 @@ bool DWARFLinker::link() {
if (!CUDie)
continue;
analyzeContextInfo(CurrentUnit->getOrigUnit().getUnitDIE(), 0,
- *CurrentUnit, &ODRContexts.getRoot(),
- UniquingStringPool, ODRContexts, ModulesEndOffset,
- Options.ParseableSwiftInterfaces,
+ *CurrentUnit, &ODRContexts.getRoot(), ODRContexts,
+ ModulesEndOffset, Options.ParseableSwiftInterfaces,
[&](const Twine &Warning, const DWARFDie &DIE) {
reportWarning(Warning, Context.File, &DIE);
});
@@ -2544,7 +2589,7 @@ bool DWARFLinker::link() {
std::vector<std::pair<StringRef, DebugInfoSize>> Sorted;
for (auto &E : SizeByObject)
Sorted.emplace_back(E.first(), E.second);
- llvm::sort(Sorted.begin(), Sorted.end(), [](auto &LHS, auto &RHS) {
+ llvm::sort(Sorted, [](auto &LHS, auto &RHS) {
return LHS.second.Output > RHS.second.Output;
});
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
index f59a9023c690..925ab3d295c2 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
@@ -36,7 +36,7 @@ StringRef CompileUnit::getSysRoot() {
}
return SysRoot;
}
-
+
void CompileUnit::markEverythingAsKept() {
unsigned Idx = 0;
@@ -69,10 +69,10 @@ void CompileUnit::markEverythingAsKept() {
}
}
-uint64_t CompileUnit::computeNextUnitOffset() {
+uint64_t CompileUnit::computeNextUnitOffset(uint16_t DwarfVersion) {
NextUnitOffset = StartOffset;
if (NewUnit) {
- NextUnitOffset += 11 /* Header size */;
+ NextUnitOffset += (DwarfVersion >= 5) ? 12 : 11; // Header size
NextUnitOffset += NewUnit->getUnitDie().getSize();
}
return NextUnitOffset;
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
index c9a5da6676b3..d9b3c4235b4d 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
@@ -40,9 +40,9 @@ bool DeclContext::setLastSeenDIE(CompileUnit &U, const DWARFDie &Die) {
return true;
}
-PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
- DeclContext &Context, const DWARFDie &DIE, CompileUnit &U,
- UniquingStringPool &StringPool, bool InClangModule) {
+PointerIntPair<DeclContext *, 1>
+DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE,
+ CompileUnit &U, bool InClangModule) {
unsigned Tag = DIE.getTag();
// FIXME: dsymutil-classic compat: We should bail out here if we
@@ -80,27 +80,20 @@ PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
break;
}
- const char *Name = DIE.getLinkageName();
- const char *ShortName = DIE.getShortName();
-
- if (!Name)
- Name = ShortName;
-
StringRef NameRef;
- StringRef ShortNameRef;
StringRef FileRef;
- if (Name)
- NameRef = StringPool.internString(Name);
- else if (Tag == dwarf::DW_TAG_namespace)
+ if (const char *LinkageName = DIE.getLinkageName())
+ NameRef = StringPool.internString(LinkageName);
+ else if (const char *ShortName = DIE.getShortName())
+ NameRef = StringPool.internString(ShortName);
+
+ bool IsAnonymousNamespace = NameRef.empty() && Tag == dwarf::DW_TAG_namespace;
+ if (IsAnonymousNamespace) {
// FIXME: For dsymutil-classic compatibility. I think uniquing within
// anonymous namespaces is wrong. There is no ODR guarantee there.
- NameRef = StringPool.internString("(anonymous namespace)");
-
- if (ShortName && ShortName != Name)
- ShortNameRef = StringPool.internString(ShortName);
- else
- ShortNameRef = NameRef;
+ NameRef = "(anonymous namespace)";
+ }
if (Tag != dwarf::DW_TAG_class_type && Tag != dwarf::DW_TAG_structure_type &&
Tag != dwarf::DW_TAG_union_type &&
@@ -121,7 +114,7 @@ PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
// module-defined types do not have a file and line.
ByteSize = dwarf::toUnsigned(DIE.find(dwarf::DW_AT_byte_size),
std::numeric_limits<uint64_t>::max());
- if (Tag != dwarf::DW_TAG_namespace || !Name) {
+ if (Tag != dwarf::DW_TAG_namespace || IsAnonymousNamespace) {
if (unsigned FileNum =
dwarf::toUnsigned(DIE.find(dwarf::DW_AT_decl_file), 0)) {
if (const auto *LT = U.getOrigUnit().getContext().getLineTableForUnit(
@@ -129,29 +122,14 @@ PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
// FIXME: dsymutil-classic compatibility. I'd rather not
// unique anything in anonymous namespaces, but if we do, then
// verify that the file and line correspond.
- if (!Name && Tag == dwarf::DW_TAG_namespace)
+ if (IsAnonymousNamespace)
FileNum = 1;
if (LT->hasFileAtIndex(FileNum)) {
Line = dwarf::toUnsigned(DIE.find(dwarf::DW_AT_decl_line), 0);
// Cache the resolved paths based on the index in the line table,
- // because calling realpath is expansive.
- StringRef ResolvedPath = U.getResolvedPath(FileNum);
- if (!ResolvedPath.empty()) {
- FileRef = ResolvedPath;
- } else {
- std::string File;
- bool FoundFileName = LT->getFileNameByIndex(
- FileNum, U.getOrigUnit().getCompilationDir(),
- DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
- File);
- (void)FoundFileName;
- assert(FoundFileName && "Must get file name from line table");
- // Second level of caching, this time based on the file's parent
- // path.
- FileRef = PathResolver.resolve(File, StringPool);
- U.setResolvedPath(FileNum, FileRef);
- }
+ // because calling realpath is expensive.
+ FileRef = getResolvedPath(U, FileNum, *LT);
}
}
}
@@ -175,7 +153,7 @@ PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
// FIXME: dsymutil-classic compatibility: when we don't have a name,
// use the filename.
- if (Tag == dwarf::DW_TAG_namespace && NameRef == "(anonymous namespace)")
+ if (IsAnonymousNamespace)
Hash = hash_combine(Hash, FileRef);
// Now look if this context already exists.
@@ -210,4 +188,28 @@ PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
return PointerIntPair<DeclContext *, 1>(*ContextIter);
}
+StringRef
+DeclContextTree::getResolvedPath(CompileUnit &CU, unsigned FileNum,
+ const DWARFDebugLine::LineTable &LineTable) {
+ std::pair<unsigned, unsigned> Key = {CU.getUniqueID(), FileNum};
+
+ ResolvedPathsMap::const_iterator It = ResolvedPaths.find(Key);
+ if (It == ResolvedPaths.end()) {
+ std::string FileName;
+ bool FoundFileName = LineTable.getFileNameByIndex(
+ FileNum, CU.getOrigUnit().getCompilationDir(),
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FileName);
+ (void)FoundFileName;
+ assert(FoundFileName && "Must get file name from line table");
+
+ // Second level of caching, this time based on the file's parent
+ // path.
+ StringRef ResolvedPath = PathResolver.resolve(FileName, StringPool);
+
+ It = ResolvedPaths.insert(std::make_pair(Key, ResolvedPath)).first;
+ }
+
+ return It->second;
+}
+
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index e900335f24b3..c0043ae39efe 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -121,16 +121,23 @@ void DwarfStreamer::switchToDebugInfoSection(unsigned DwarfVersion) {
/// Emit the compilation unit header for \p Unit in the debug_info section.
///
-/// A Dwarf section header is encoded as:
+/// A Dwarf 4 section header is encoded as:
/// uint32_t Unit length (omitting this field)
/// uint16_t Version
/// uint32_t Abbreviation table offset
/// uint8_t Address size
-///
/// Leading to a total of 11 bytes.
-void DwarfStreamer::emitCompileUnitHeader(CompileUnit &Unit) {
- unsigned Version = Unit.getOrigUnit().getVersion();
- switchToDebugInfoSection(Version);
+///
+/// A Dwarf 5 section header is encoded as:
+/// uint32_t Unit length (omitting this field)
+/// uint16_t Version
+/// uint8_t Unit type
+/// uint8_t Address size
+/// uint32_t Abbreviation table offset
+/// Leading to a total of 12 bytes.
+void DwarfStreamer::emitCompileUnitHeader(CompileUnit &Unit,
+ unsigned DwarfVersion) {
+ switchToDebugInfoSection(DwarfVersion);
/// The start of the unit within its section.
Unit.setLabelBegin(Asm->createTempSymbol("cu_begin"));
@@ -140,13 +147,22 @@ void DwarfStreamer::emitCompileUnitHeader(CompileUnit &Unit) {
// been computed in CompileUnit::computeOffsets(). Subtract 4 to that size to
// account for the length field.
Asm->emitInt32(Unit.getNextUnitOffset() - Unit.getStartOffset() - 4);
- Asm->emitInt16(Version);
-
- // We share one abbreviations table across all units so it's always at the
- // start of the section.
- Asm->emitInt32(0);
- Asm->emitInt8(Unit.getOrigUnit().getAddressByteSize());
- DebugInfoSectionSize += 11;
+ Asm->emitInt16(DwarfVersion);
+
+ if (DwarfVersion >= 5) {
+ Asm->emitInt8(dwarf::DW_UT_compile);
+ Asm->emitInt8(Unit.getOrigUnit().getAddressByteSize());
+ // We share one abbreviations table across all units so it's always at the
+ // start of the section.
+ Asm->emitInt32(0);
+ DebugInfoSectionSize += 12;
+ } else {
+ // We share one abbreviations table across all units so it's always at the
+ // start of the section.
+ Asm->emitInt32(0);
+ Asm->emitInt8(Unit.getOrigUnit().getAddressByteSize());
+ DebugInfoSectionSize += 11;
+ }
// Remember this CU.
EmittedUnits.push_back({Unit.getUniqueID(), Unit.getLabelBegin()});
@@ -211,6 +227,16 @@ void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) {
// Emit a null terminator.
Asm->emitInt8(0);
}
+
+#if 0
+ if (DwarfVersion >= 5) {
+ // Emit an empty string offset section.
+ Asm->OutStreamer->SwitchSection(MOFI->getDwarfStrOffSection());
+ Asm->emitDwarfUnitLength(4, "Length of String Offsets Set");
+ Asm->emitInt16(DwarfVersion);
+ Asm->emitInt16(0);
+ }
+#endif
}
void DwarfStreamer::emitDebugNames(
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 49761b9dce88..c272985cf2d4 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -273,7 +273,6 @@ Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value,
void CodeViewRecordIO::emitEncodedSignedInteger(const int64_t &Value,
const Twine &Comment) {
- assert(Value < 0 && "Encoded integer is not signed!");
if (Value >= std::numeric_limits<int8_t>::min()) {
Streamer->emitIntValue(LF_CHAR, 2);
emitComment(Comment);
@@ -322,7 +321,6 @@ void CodeViewRecordIO::emitEncodedUnsignedInteger(const uint64_t &Value,
}
Error CodeViewRecordIO::writeEncodedSignedInteger(const int64_t &Value) {
- assert(Value < 0 && "Encoded integer is not signed!");
if (Value >= std::numeric_limits<int8_t>::min()) {
if (auto EC = Writer->writeInteger<uint16_t>(LF_CHAR))
return EC;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
index be8c32d5b294..9bc69abea102 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
@@ -47,10 +47,9 @@ Error DebugFrameDataSubsection::commit(BinaryStreamWriter &Writer) const {
}
std::vector<FrameData> SortedFrames(Frames.begin(), Frames.end());
- std::sort(SortedFrames.begin(), SortedFrames.end(),
- [](const FrameData &LHS, const FrameData &RHS) {
- return LHS.RvaStart < RHS.RvaStart;
- });
+ llvm::sort(SortedFrames, [](const FrameData &LHS, const FrameData &RHS) {
+ return LHS.RvaStart < RHS.RvaStart;
+ });
if (auto EC = Writer.writeArray(makeArrayRef(SortedFrames)))
return EC;
return Error::success();
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
index 82f6713a88f5..949707bf5475 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -39,6 +39,14 @@ static const EnumEntry<uint16_t> RegisterNames_X86[] = {
#undef CV_REGISTERS_X86
};
+static const EnumEntry<uint16_t> RegisterNames_ARM[] = {
+#define CV_REGISTERS_ARM
+#define CV_REGISTER(name, val) CV_ENUM_CLASS_ENT(RegisterId, name),
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM
+};
+
static const EnumEntry<uint16_t> RegisterNames_ARM64[] = {
#define CV_REGISTERS_ARM64
#define CV_REGISTER(name, val) CV_ENUM_CLASS_ENT(RegisterId, name),
@@ -434,7 +442,9 @@ ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames() {
}
ArrayRef<EnumEntry<uint16_t>> getRegisterNames(CPUType Cpu) {
- if (Cpu == CPUType::ARM64) {
+ if (Cpu == CPUType::ARMNT) {
+ return makeArrayRef(RegisterNames_ARM);
+ } else if (Cpu == CPUType::ARM64) {
return makeArrayRef(RegisterNames_ARM64);
}
return makeArrayRef(RegisterNames_X86);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index 06b20ba33eec..c0fc3e0ef65a 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -172,10 +172,10 @@ Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) {
if (PartialOffsets.empty())
return fullScanForType(TI);
- auto Next = std::upper_bound(PartialOffsets.begin(), PartialOffsets.end(), TI,
- [](TypeIndex Value, const TypeIndexOffset &IO) {
- return Value < IO.Type;
- });
+ auto Next = llvm::upper_bound(PartialOffsets, TI,
+ [](TypeIndex Value, const TypeIndexOffset &IO) {
+ return Value < IO.Type;
+ });
assert(Next != PartialOffsets.begin());
auto Prev = std::prev(Next);
@@ -185,7 +185,7 @@ Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) {
// They've asked us to fetch a type index, but the entry we found in the
// partial offsets array has already been visited. Since we visit an entire
// block every time, that means this record should have been previously
- // discovered. Ultimately, this means this is a request for a non-existant
+ // discovered. Ultimately, this means this is a request for a non-existent
// type index.
return make_error<CodeViewError>("Invalid type index");
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp
index 47b5498181b7..1ca899789bef 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/CodeView/RecordName.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h"
@@ -77,9 +78,10 @@ Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArgListRecord &Args) {
uint32_t Size = Indices.size();
Name = "(";
for (uint32_t I = 0; I < Size; ++I) {
- assert(Indices[I] < CurrentTypeIndex);
-
- Name.append(Types.getTypeName(Indices[I]));
+ if (Indices[I] < CurrentTypeIndex)
+ Name.append(Types.getTypeName(Indices[I]));
+ else
+ Name.append("<unknown 0x" + utohexstr(Indices[I].getIndex()) + ">");
if (I + 1 != Size)
Name.append(", ");
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
index e7f032f9c670..63ce302a4e09 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
@@ -34,7 +34,7 @@ StringRef llvm::codeview::getBytesAsCString(ArrayRef<uint8_t> LeafData) {
}
Error llvm::codeview::consume(BinaryStreamReader &Reader, APSInt &Num) {
- // Used to avoid overload ambiguity on APInt construtor.
+ // Used to avoid overload ambiguity on APInt constructor.
bool FalseVal = false;
uint16_t Short;
if (auto EC = Reader.readInteger(Short))
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index e84e1c9cea78..682747a2b81f 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -5,8 +5,9 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Endian.h"
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index bb71c86a0609..7ac376156146 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -232,7 +232,7 @@ Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) {
// The largest possible subrecord is one in which there is a record prefix,
// followed by the subrecord, followed by a continuation, and that entire
- // sequence spaws `MaxRecordLength` bytes. So the record's length is
+ // sequence spawns `MaxRecordLength` bytes. So the record's length is
// calculated as follows.
constexpr uint32_t ContinuationLength = 8;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index 8c4b640bcd19..587a68142a4a 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -38,7 +38,7 @@ namespace {
/// 0x1000.
///
/// Type records are only allowed to use type indices smaller than their own, so
-/// a type stream is effectively a topologically sorted DAG. Cycles occuring in
+/// a type stream is effectively a topologically sorted DAG. Cycles occurring in
/// the type graph of the source program are resolved with forward declarations
/// of composite types. This class implements the following type stream merging
/// algorithm, which relies on this DAG structure:
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
index ddf307de2221..25d2e852a7fe 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
@@ -18,8 +18,9 @@ void DWARFAddressRange::dump(raw_ostream &OS, uint32_t AddressSize,
const DWARFObject *Obj) const {
OS << (DumpOpts.DisplayRawContents ? " " : "[");
- OS << format("0x%*.*" PRIx64 ", ", AddressSize * 2, AddressSize * 2, LowPC)
- << format("0x%*.*" PRIx64, AddressSize * 2, AddressSize * 2, HighPC);
+ DWARFFormValue::dumpAddress(OS, AddressSize, LowPC);
+ OS << ", ";
+ DWARFFormValue::dumpAddress(OS, AddressSize, HighPC);
OS << (DumpOpts.DisplayRawContents ? "" : ")");
if (Obj)
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index 9bd134105c9b..2b08120ef4dc 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -22,9 +22,10 @@ void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
<< ", version = " << format("0x%04x", getVersion());
if (getVersion() >= 5)
OS << ", unit_type = " << dwarf::UnitTypeString(getUnitType());
- OS << ", abbr_offset = "
- << format("0x%04" PRIx64, getAbbreviations()->getOffset())
- << ", addr_size = " << format("0x%02x", getAddressByteSize());
+ OS << ", abbr_offset = " << format("0x%04" PRIx64, getAbbrOffset());
+ if (!getAbbreviations())
+ OS << " (invalid)";
+ OS << ", addr_size = " << format("0x%02x", getAddressByteSize());
if (getVersion() >= 5 && getUnitType() != dwarf::DW_UT_compile)
OS << ", DWO_id = " << format("0x%016" PRIx64, *getDWOId());
OS << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset())
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index bf6219497770..749d738af9c1 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -255,7 +255,7 @@ static void dumpRnglistsSection(
break;
Offset = TableOffset + Length;
} else {
- Rnglists.dump(OS, LookupPooledAddress, DumpOpts);
+ Rnglists.dump(rnglistData, OS, LookupPooledAddress, DumpOpts);
}
}
}
@@ -316,7 +316,7 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts,
return;
}
- Header.dump(OS, DumpOpts);
+ Header.dump(Data, OS, DumpOpts);
uint64_t EndOffset = Header.length() + Header.getHeaderOffset();
Data.setAddressSize(Header.getAddrSize());
@@ -457,7 +457,7 @@ void DWARFContext::dump(
shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame,
DObj->getFrameSection().Data)) {
if (Expected<const DWARFDebugFrame *> DF = getDebugFrame())
- (*DF)->dump(OS, getRegisterInfo(), *Off);
+ (*DF)->dump(OS, DumpOpts, getRegisterInfo(), *Off);
else
RecoverableErrorHandler(DF.takeError());
}
@@ -466,7 +466,7 @@ void DWARFContext::dump(
shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame,
DObj->getEHFrameSection().Data)) {
if (Expected<const DWARFDebugFrame *> DF = getEHFrame())
- (*DF)->dump(OS, getRegisterInfo(), *Off);
+ (*DF)->dump(OS, DumpOpts, getRegisterInfo(), *Off);
else
RecoverableErrorHandler(DF.takeError());
}
@@ -502,7 +502,8 @@ void DWARFContext::dump(
0);
DWARFDebugArangeSet set;
while (arangesData.isValidOffset(offset)) {
- if (Error E = set.extract(arangesData, &offset)) {
+ if (Error E =
+ set.extract(arangesData, &offset, DumpOpts.WarningHandler)) {
RecoverableErrorHandler(std::move(E));
break;
}
@@ -525,12 +526,29 @@ void DWARFContext::dump(
}
};
+ auto DumpStrSection = [&](StringRef Section) {
+ DataExtractor StrData(Section, isLittleEndian(), 0);
+ uint64_t Offset = 0;
+ uint64_t StrOffset = 0;
+ while (StrData.isValidOffset(Offset)) {
+ Error Err = Error::success();
+ const char *CStr = StrData.getCStr(&Offset, &Err);
+ if (Err) {
+ DumpOpts.WarningHandler(std::move(Err));
+ return;
+ }
+ OS << format("0x%8.8" PRIx64 ": \"", StrOffset);
+ OS.write_escaped(CStr);
+ OS << "\"\n";
+ StrOffset = Offset;
+ }
+ };
+
if (const auto *Off = shouldDump(Explicit, ".debug_line", DIDT_ID_DebugLine,
DObj->getLineSection().Data)) {
DWARFDataExtractor LineData(*DObj, DObj->getLineSection(), isLittleEndian(),
0);
- DWARFDebugLine::SectionParser Parser(LineData, *this, compile_units(),
- type_units());
+ DWARFDebugLine::SectionParser Parser(LineData, *this, normal_units());
DumpLineSection(Parser, DumpOpts, *Off);
}
@@ -539,8 +557,7 @@ void DWARFContext::dump(
DObj->getLineDWOSection().Data)) {
DWARFDataExtractor LineData(*DObj, DObj->getLineDWOSection(),
isLittleEndian(), 0);
- DWARFDebugLine::SectionParser Parser(LineData, *this, dwo_compile_units(),
- dwo_type_units());
+ DWARFDebugLine::SectionParser Parser(LineData, *this, dwo_units());
DumpLineSection(Parser, DumpOpts, *Off);
}
@@ -555,37 +572,16 @@ void DWARFContext::dump(
}
if (shouldDump(Explicit, ".debug_str", DIDT_ID_DebugStr,
- DObj->getStrSection())) {
- DataExtractor strData(DObj->getStrSection(), isLittleEndian(), 0);
- uint64_t offset = 0;
- uint64_t strOffset = 0;
- while (const char *s = strData.getCStr(&offset)) {
- OS << format("0x%8.8" PRIx64 ": \"%s\"\n", strOffset, s);
- strOffset = offset;
- }
- }
+ DObj->getStrSection()))
+ DumpStrSection(DObj->getStrSection());
+
if (shouldDump(ExplicitDWO, ".debug_str.dwo", DIDT_ID_DebugStr,
- DObj->getStrDWOSection())) {
- DataExtractor strDWOData(DObj->getStrDWOSection(), isLittleEndian(), 0);
- uint64_t offset = 0;
- uint64_t strDWOOffset = 0;
- while (const char *s = strDWOData.getCStr(&offset)) {
- OS << format("0x%8.8" PRIx64 ": \"%s\"\n", strDWOOffset, s);
- strDWOOffset = offset;
- }
- }
+ DObj->getStrDWOSection()))
+ DumpStrSection(DObj->getStrDWOSection());
+
if (shouldDump(Explicit, ".debug_line_str", DIDT_ID_DebugLineStr,
- DObj->getLineStrSection())) {
- DataExtractor strData(DObj->getLineStrSection(), isLittleEndian(), 0);
- uint64_t offset = 0;
- uint64_t strOffset = 0;
- while (const char *s = strData.getCStr(&offset)) {
- OS << format("0x%8.8" PRIx64 ": \"", strOffset);
- OS.write_escaped(s);
- OS << "\"\n";
- strOffset = offset;
- }
- }
+ DObj->getLineStrSection()))
+ DumpStrSection(DObj->getLineStrSection());
if (shouldDump(Explicit, ".debug_addr", DIDT_ID_DebugAddr,
DObj->getAddrSection().Data)) {
@@ -1038,7 +1034,9 @@ DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) {
static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU,
uint64_t Address,
FunctionNameKind Kind,
+ DILineInfoSpecifier::FileLineInfoKind FileNameKind,
std::string &FunctionName,
+ std::string &StartFile,
uint32_t &StartLine) {
// The address may correspond to instruction in some inlined function,
// so we have to build the chain of inlined functions and take the
@@ -1055,6 +1053,11 @@ static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU,
FunctionName = Name;
FoundResult = true;
}
+ std::string DeclFile = DIE.getDeclFile(FileNameKind);
+ if (!DeclFile.empty()) {
+ StartFile = DeclFile;
+ FoundResult = true;
+ }
if (auto DeclLineResult = DIE.getDeclLine()) {
StartLine = DeclLineResult;
FoundResult = true;
@@ -1226,8 +1229,9 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address,
if (!CU)
return Result;
- getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind,
- Result.FunctionName, Result.StartLine);
+ getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, Spec.FLIKind,
+ Result.FunctionName,
+ Result.StartFileName, Result.StartLine);
if (Spec.FLIKind != FileLineInfoKind::None) {
if (const DWARFLineTable *LineTable = getLineTableForUnit(CU)) {
LineTable->getFileLineInfoForAddress(
@@ -1246,15 +1250,17 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange(
return Lines;
uint32_t StartLine = 0;
+ std::string StartFileName;
std::string FunctionName(DILineInfo::BadString);
- getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind,
- FunctionName, StartLine);
+ getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, Spec.FLIKind,
+ FunctionName, StartFileName, StartLine);
// If the Specifier says we don't need FileLineInfo, just
// return the top-most function at the starting address.
if (Spec.FLIKind == FileLineInfoKind::None) {
DILineInfo Result;
Result.FunctionName = FunctionName;
+ Result.StartFileName = StartFileName;
Result.StartLine = StartLine;
Lines.push_back(std::make_pair(Address.Address, Result));
return Lines;
@@ -1278,6 +1284,7 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange(
Result.FunctionName = FunctionName;
Result.Line = Row.Line;
Result.Column = Row.Column;
+ Result.StartFileName = StartFileName;
Result.StartLine = StartLine;
Lines.push_back(std::make_pair(Row.Address.Address, Result));
}
@@ -1320,6 +1327,7 @@ DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address,
Frame.FunctionName = Name;
if (auto DeclLineResult = FunctionDIE.getDeclLine())
Frame.StartLine = DeclLineResult;
+ Frame.StartFileName = FunctionDIE.getDeclFile(Spec.FLIKind);
if (Spec.FLIKind != FileLineInfoKind::None) {
if (i == 0) {
// For the topmost frame, initialize the line table of this
@@ -1701,16 +1709,17 @@ public:
// FIXME: Use the other dwo range section when we emit it.
RangesDWOSection.Data = Data;
}
- } else if (Name == "debug_info") {
+ } else if (InfoSectionMap *Sections =
+ StringSwitch<InfoSectionMap *>(Name)
+ .Case("debug_info", &InfoSections)
+ .Case("debug_info.dwo", &InfoDWOSections)
+ .Case("debug_types", &TypesSections)
+ .Case("debug_types.dwo", &TypesDWOSections)
+ .Default(nullptr)) {
// Find debug_info and debug_types data by section rather than name as
// there are multiple, comdat grouped, of these sections.
- InfoSections[Section].Data = Data;
- } else if (Name == "debug_info.dwo") {
- InfoDWOSections[Section].Data = Data;
- } else if (Name == "debug_types") {
- TypesSections[Section].Data = Data;
- } else if (Name == "debug_types.dwo") {
- TypesDWOSections[Section].Data = Data;
+ DWARFSectionMap &S = (*Sections)[Section];
+ S.Data = Data;
}
if (RelocatedSection == Obj.section_end())
@@ -1771,7 +1780,7 @@ public:
// Symbol to [address, section index] cache mapping.
std::map<SymbolRef, SymInfo> AddrCache;
- bool (*Supports)(uint64_t);
+ SupportsRelocation Supports;
RelocationResolver Resolver;
std::tie(Supports, Resolver) = getRelocationResolver(Obj);
for (const RelocationRef &Reloc : Section.relocations()) {
@@ -1989,6 +1998,6 @@ uint8_t DWARFContext::getCUAddrSize() {
// first compile unit. In practice the address size field is repeated across
// various DWARF headers (at least in version 5) to make it easier to dump
// them independently, not to enable varying the address size.
- unit_iterator_range CUs = compile_units();
+ auto CUs = compile_units();
return CUs.empty() ? 0 : (*CUs.begin())->getAddressByteSize();
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
index 886fe1dff976..da6f6ad903f4 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
@@ -53,14 +53,16 @@ uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint64_t *Off,
ErrorAsOutParameter ErrAsOut(Err);
Optional<RelocAddrEntry> E = Obj->find(*Section, *Off);
- uint64_t A = getUnsigned(Off, Size, Err);
+ uint64_t LocData = getUnsigned(Off, Size, Err);
if (!E || (Err && *Err))
- return A;
+ return LocData;
if (SecNdx)
*SecNdx = E->SectionIndex;
- uint64_t R = E->Resolver(E->Reloc, E->SymbolValue, A);
+
+ uint64_t R =
+ object::resolveRelocation(E->Resolver, E->Reloc, E->SymbolValue, LocData);
if (E->Reloc2)
- R = E->Resolver(*E->Reloc2, E->SymbolValue2, R);
+ R = object::resolveRelocation(E->Resolver, *E->Reloc2, E->SymbolValue2, R);
return R;
}
@@ -104,10 +106,10 @@ DWARFDataExtractor::getEncodedPointer(uint64_t *Offset, uint8_t Encoding,
Result = getSigned(Offset, 2);
break;
case dwarf::DW_EH_PE_sdata4:
- Result = getSigned(Offset, 4);
+ Result = SignExtend64<32>(getRelocatedValue(4, Offset));
break;
case dwarf::DW_EH_PE_sdata8:
- Result = getSigned(Offset, 8);
+ Result = getRelocatedValue(8, Offset);
break;
default:
return None;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index c3b039b05f30..598e3ecee30e 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -20,9 +21,11 @@ using namespace llvm;
void DWARFDebugArangeSet::Descriptor::dump(raw_ostream &OS,
uint32_t AddressSize) const {
- OS << format("[0x%*.*" PRIx64 ", ", AddressSize * 2, AddressSize * 2, Address)
- << format(" 0x%*.*" PRIx64 ")", AddressSize * 2, AddressSize * 2,
- getEndAddress());
+ OS << '[';
+ DWARFFormValue::dumpAddress(OS, AddressSize, Address);
+ OS << ", ";
+ DWARFFormValue::dumpAddress(OS, AddressSize, getEndAddress());
+ OS << ')';
}
void DWARFDebugArangeSet::clear() {
@@ -32,7 +35,8 @@ void DWARFDebugArangeSet::clear() {
}
Error DWARFDebugArangeSet::extract(DWARFDataExtractor data,
- uint64_t *offset_ptr) {
+ uint64_t *offset_ptr,
+ function_ref<void(Error)> WarningHandler) {
assert(data.isValidOffset(*offset_ptr));
ArangeDescriptors.clear();
Offset = *offset_ptr;
@@ -141,11 +145,11 @@ Error DWARFDebugArangeSet::extract(DWARFDataExtractor data,
if (arangeDescriptor.Length == 0 && arangeDescriptor.Address == 0) {
if (*offset_ptr == end_offset)
return ErrorSuccess();
- return createStringError(
+ WarningHandler(createStringError(
errc::invalid_argument,
"address range table at offset 0x%" PRIx64
" has a premature terminator entry at offset 0x%" PRIx64,
- Offset, EntryOffset);
+ Offset, EntryOffset));
}
ArangeDescriptors.push_back(arangeDescriptor);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index e8ed63075055..e0db469752cd 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -28,7 +28,8 @@ void DWARFDebugAranges::extract(
DWARFDebugArangeSet Set;
while (DebugArangesData.isValidOffset(Offset)) {
- if (Error E = Set.extract(DebugArangesData, &Offset)) {
+ if (Error E =
+ Set.extract(DebugArangesData, &Offset, RecoverableErrorHandler)) {
RecoverableErrorHandler(std::move(E));
return;
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 0a1b75592290..b74ecac681f3 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataExtractor.h"
@@ -29,6 +30,18 @@
using namespace llvm;
using namespace dwarf;
+static void printRegister(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH,
+ unsigned RegNum) {
+ if (MRI) {
+ if (Optional<unsigned> LLVMRegNum = MRI->getLLVMRegNum(RegNum, IsEH)) {
+ if (const char *RegName = MRI->getName(*LLVMRegNum)) {
+ OS << RegName;
+ return;
+ }
+ }
+ }
+ OS << "reg" << RegNum;
+}
// See DWARF standard v3, section 7.23
const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
@@ -221,9 +234,10 @@ ArrayRef<CFIProgram::OperandType[2]> CFIProgram::getOperandTypes() {
}
/// Print \p Opcode's operand number \p OperandIdx which has value \p Operand.
-void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI,
- bool IsEH, const Instruction &Instr,
- unsigned OperandIdx, uint64_t Operand) const {
+void CFIProgram::printOperand(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const MCRegisterInfo *MRI, bool IsEH,
+ const Instruction &Instr, unsigned OperandIdx,
+ uint64_t Operand) const {
assert(OperandIdx < 2);
uint8_t Opcode = Instr.Opcode;
OperandType Type = getOperandTypes()[Opcode][OperandIdx];
@@ -268,17 +282,19 @@ void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI,
OS << format(" %" PRId64 "*data_alignment_factor" , Operand);
break;
case OT_Register:
- OS << format(" reg%" PRId64, Operand);
+ OS << ' ';
+ printRegister(OS, MRI, IsEH, Operand);
break;
case OT_Expression:
assert(Instr.Expression && "missing DWARFExpression object");
OS << " ";
- Instr.Expression->print(OS, MRI, nullptr, IsEH);
+ Instr.Expression->print(OS, DumpOpts, MRI, nullptr, IsEH);
break;
}
}
-void CFIProgram::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH,
+void CFIProgram::dump(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const MCRegisterInfo *MRI, bool IsEH,
unsigned IndentLevel) const {
for (const auto &Instr : Instructions) {
uint8_t Opcode = Instr.Opcode;
@@ -287,7 +303,7 @@ void CFIProgram::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH,
OS.indent(2 * IndentLevel);
OS << CallFrameString(Opcode, Arch) << ":";
for (unsigned i = 0; i < Instr.Ops.size(); ++i)
- printOperand(OS, MRI, IsEH, Instr, i, Instr.Ops[i]);
+ printOperand(OS, DumpOpts, MRI, IsEH, Instr, i, Instr.Ops[i]);
OS << '\n';
}
}
@@ -304,7 +320,8 @@ constexpr uint64_t getCIEId(bool IsDWARF64, bool IsEH) {
return DW_CIE_ID;
}
-void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
+void CIE::dump(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const MCRegisterInfo *MRI, bool IsEH) const {
// A CIE with a zero length is a terminator entry in the .eh_frame section.
if (IsEH && Length == 0) {
OS << format("%08" PRIx64, Offset) << " ZERO terminator\n";
@@ -336,11 +353,12 @@ void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
OS << "\n";
}
OS << "\n";
- CFIs.dump(OS, MRI, IsEH);
+ CFIs.dump(OS, DumpOpts, MRI, IsEH);
OS << "\n";
}
-void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
+void FDE::dump(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const MCRegisterInfo *MRI, bool IsEH) const {
OS << format("%08" PRIx64, Offset)
<< format(" %0*" PRIx64, IsDWARF64 ? 16 : 8, Length)
<< format(" %0*" PRIx64, IsDWARF64 && !IsEH ? 16 : 8, CIEPointer)
@@ -354,7 +372,7 @@ void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
OS << " Format: " << FormatString(IsDWARF64) << "\n";
if (LSDAAddress)
OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress);
- CFIs.dump(OS, MRI, IsEH);
+ CFIs.dump(OS, DumpOpts, MRI, IsEH);
OS << "\n";
}
@@ -521,9 +539,9 @@ Error DWARFDebugFrame::parse(DWARFDataExtractor Data) {
"parsing FDE data at 0x%" PRIx64
" failed due to missing CIE",
StartOffset);
- if (auto Val = Data.getEncodedPointer(
- &Offset, Cie->getFDEPointerEncoding(),
- EHFrameAddress ? EHFrameAddress + Offset : 0)) {
+ if (auto Val =
+ Data.getEncodedPointer(&Offset, Cie->getFDEPointerEncoding(),
+ EHFrameAddress + Offset)) {
InitialLocation = *Val;
}
if (auto Val = Data.getEncodedPointer(
@@ -583,15 +601,16 @@ FrameEntry *DWARFDebugFrame::getEntryAtOffset(uint64_t Offset) const {
return nullptr;
}
-void DWARFDebugFrame::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
+void DWARFDebugFrame::dump(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const MCRegisterInfo *MRI,
Optional<uint64_t> Offset) const {
if (Offset) {
if (auto *Entry = getEntryAtOffset(*Offset))
- Entry->dump(OS, MRI, IsEH);
+ Entry->dump(OS, DumpOpts, MRI, IsEH);
return;
}
OS << "\n";
for (const auto &Entry : Entries)
- Entry->dump(OS, MRI, IsEH);
+ Entry->dump(OS, DumpOpts, MRI, IsEH);
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 87eab34d58ee..2b7d0c3363a1 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -38,7 +38,8 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
AbbrevDecl = nullptr;
return true;
}
- AbbrevDecl = U.getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+ if (const auto *AbbrevSet = U.getAbbreviations())
+ AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
if (nullptr == AbbrevDecl) {
// Restore the original offset.
*OffsetPtr = Offset;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 3ca21e97888c..bda41b1f34e9 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -79,6 +79,18 @@ bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const {
return FileIndex != 0 && FileIndex <= FileNames.size();
}
+Optional<uint64_t> DWARFDebugLine::Prologue::getLastValidFileIndex() const {
+ if (FileNames.empty())
+ return None;
+ uint16_t DwarfVersion = getVersion();
+ assert(DwarfVersion != 0 &&
+ "line table prologue has no dwarf version information");
+ // In DWARF v5 the file names are 0-indexed.
+ if (DwarfVersion >= 5)
+ return FileNames.size() - 1;
+ return FileNames.size();
+}
+
const llvm::DWARFDebugLine::FileNameEntry &
DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const {
uint16_t DwarfVersion = getVersion();
@@ -771,6 +783,18 @@ Error DWARFDebugLine::LineTable::parse(
*OS << '\n';
Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
}
+ bool TombstonedAddress = false;
+ auto EmitRow = [&] {
+ if (!TombstonedAddress) {
+ if (Verbose) {
+ *OS << "\n";
+ OS->indent(12);
+ }
+ if (OS)
+ State.Row.dump(*OS);
+ State.appendRowToMatrix();
+ }
+ };
while (*OffsetPtr < EndOffset) {
DataExtractor::Cursor Cursor(*OffsetPtr);
@@ -822,13 +846,7 @@ Error DWARFDebugLine::LineTable::parse(
// No need to test the Cursor is valid here, since it must be to get
// into this code path - if it were invalid, the default case would be
// followed.
- if (Verbose) {
- *OS << "\n";
- OS->indent(12);
- }
- if (OS)
- State.Row.dump(*OS);
- State.appendRowToMatrix();
+ EmitRow();
State.resetRowAndSequence();
break;
@@ -870,13 +888,20 @@ Error DWARFDebugLine::LineTable::parse(
State.Row.Address.Address = TableData.getRelocatedAddress(
Cursor, &State.Row.Address.SectionIndex);
+ uint64_t Tombstone =
+ dwarf::computeTombstoneAddress(OpcodeAddressSize);
+ TombstonedAddress = State.Row.Address.Address == Tombstone;
+
// Restore the address size if the extractor already had it.
if (ExtractorAddressSize != 0)
TableData.setAddressSize(ExtractorAddressSize);
}
- if (Cursor && Verbose)
- *OS << format(" (0x%16.16" PRIx64 ")", State.Row.Address.Address);
+ if (Cursor && Verbose) {
+ *OS << " (";
+ DWARFFormValue::dumpAddress(*OS, OpcodeAddressSize, State.Row.Address.Address);
+ *OS << ')';
+ }
}
break;
@@ -969,13 +994,7 @@ Error DWARFDebugLine::LineTable::parse(
case DW_LNS_copy:
// Takes no arguments. Append a row to the matrix using the
// current values of the state-machine registers.
- if (Verbose) {
- *OS << "\n";
- OS->indent(12);
- }
- if (OS)
- State.Row.dump(*OS);
- State.appendRowToMatrix();
+ EmitRow();
break;
case DW_LNS_advance_pc:
@@ -1140,15 +1159,9 @@ Error DWARFDebugLine::LineTable::parse(
ParsingState::AddrAndLineDelta Delta =
State.handleSpecialOpcode(Opcode, OpcodeOffset);
- if (Verbose) {
- *OS << "address += " << Delta.Address << ", line += " << Delta.Line
- << "\n";
- OS->indent(12);
- }
- if (OS)
- State.Row.dump(*OS);
-
- State.appendRowToMatrix();
+ if (Verbose)
+ *OS << "address += " << Delta.Address << ", line += " << Delta.Line;
+ EmitRow();
*OffsetPtr = Cursor.tell();
}
@@ -1406,25 +1419,20 @@ bool DWARFDebugLine::LineTable::getFileLineInfoForAddress(
// Therefore, collect up handles on all the Units that point into the
// line-table section.
static DWARFDebugLine::SectionParser::LineToUnitMap
-buildLineToUnitMap(DWARFDebugLine::SectionParser::cu_range CUs,
- DWARFDebugLine::SectionParser::tu_range TUs) {
+buildLineToUnitMap(DWARFUnitVector::iterator_range Units) {
DWARFDebugLine::SectionParser::LineToUnitMap LineToUnit;
- for (const auto &CU : CUs)
- if (auto CUDIE = CU->getUnitDIE())
+ for (const auto &U : Units)
+ if (auto CUDIE = U->getUnitDIE())
if (auto StmtOffset = toSectionOffset(CUDIE.find(DW_AT_stmt_list)))
- LineToUnit.insert(std::make_pair(*StmtOffset, &*CU));
- for (const auto &TU : TUs)
- if (auto TUDIE = TU->getUnitDIE())
- if (auto StmtOffset = toSectionOffset(TUDIE.find(DW_AT_stmt_list)))
- LineToUnit.insert(std::make_pair(*StmtOffset, &*TU));
+ LineToUnit.insert(std::make_pair(*StmtOffset, &*U));
return LineToUnit;
}
-DWARFDebugLine::SectionParser::SectionParser(DWARFDataExtractor &Data,
- const DWARFContext &C,
- cu_range CUs, tu_range TUs)
+DWARFDebugLine::SectionParser::SectionParser(
+ DWARFDataExtractor &Data, const DWARFContext &C,
+ DWARFUnitVector::iterator_range Units)
: DebugLineData(Data), Context(C) {
- LineToUnit = buildLineToUnitMap(CUs, TUs);
+ LineToUnit = buildLineToUnitMap(Units);
if (!DebugLineData.isValidOffset(Offset))
Done = true;
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index f38126364401..cdffb36741c8 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -106,15 +106,16 @@ DWARFLocationInterpreter::Interpret(const DWARFLocationEntry &E) {
}
}
-static void dumpExpression(raw_ostream &OS, ArrayRef<uint8_t> Data,
- bool IsLittleEndian, unsigned AddressSize,
- const MCRegisterInfo *MRI, DWARFUnit *U) {
+static void dumpExpression(raw_ostream &OS, DIDumpOptions DumpOpts,
+ ArrayRef<uint8_t> Data, bool IsLittleEndian,
+ unsigned AddressSize, const MCRegisterInfo *MRI,
+ DWARFUnit *U) {
DWARFDataExtractor Extractor(Data, IsLittleEndian, AddressSize);
// Note. We do not pass any format to DWARFExpression, even if the
// corresponding unit is known. For now, there is only one operation,
// DW_OP_call_ref, which depends on the format; it is rarely used, and
// is unexpected in location tables.
- DWARFExpression(Extractor, AddressSize).print(OS, MRI, U);
+ DWARFExpression(Extractor, AddressSize).print(OS, DumpOpts, MRI, U);
}
bool DWARFLocationTable::dumpLocationList(uint64_t *Offset, raw_ostream &OS,
@@ -154,8 +155,8 @@ bool DWARFLocationTable::dumpLocationList(uint64_t *Offset, raw_ostream &OS,
E.Kind != dwarf::DW_LLE_base_addressx &&
E.Kind != dwarf::DW_LLE_end_of_list) {
OS << ": ";
- dumpExpression(OS, E.Loc, Data.isLittleEndian(), Data.getAddressSize(),
- MRI, U);
+ dumpExpression(OS, DumpOpts, E.Loc, Data.isLittleEndian(),
+ Data.getAddressSize(), MRI, U);
}
return true;
});
@@ -259,7 +260,6 @@ void DWARFDebugLoc::dumpRawEntry(const DWARFLocationEntry &Entry,
Value1 = Entry.Value1;
break;
case dwarf::DW_LLE_end_of_list:
- Value0 = Value1 = 0;
return;
default:
llvm_unreachable("Not possible in DWARF4!");
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
index f920d69cc43f..80ffd81b3403 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -40,7 +40,7 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const {
unsigned IndLevel = 0;
for (const auto &Macros : MacroLists) {
OS << format("0x%08" PRIx64 ":\n", Macros.Offset);
- if (Macros.Header.Version >= 5)
+ if (Macros.IsDebugMacro)
Macros.Header.dumpMacroHeader(OS);
for (const Entry &E : Macros.Macros) {
// There should not be DW_MACINFO_end_file when IndLevel is Zero. However,
@@ -52,8 +52,10 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const {
OS << " ";
IndLevel += (E.Type == DW_MACINFO_start_file);
// Based on which version we are handling choose appropriate macro forms.
- if (Macros.Header.Version >= 5)
- WithColor(OS, HighlightColor::Macro).get() << MacroString(E.Type);
+ if (Macros.IsDebugMacro)
+ WithColor(OS, HighlightColor::Macro).get()
+ << (Macros.Header.Version < 5 ? GnuMacroString(E.Type)
+ : MacroString(E.Type));
else
WithColor(OS, HighlightColor::Macro).get() << MacinfoString(E.Type);
switch (E.Type) {
@@ -67,6 +69,9 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const {
// DW_MACRO_start_file == DW_MACINFO_start_file
// DW_MACRO_end_file == DW_MACINFO_end_file
// For readability/uniformity we are using DW_MACRO_*.
+ //
+ // The GNU .debug_macro extension's entries have the same encoding
+ // as DWARF 5's DW_MACRO_* entries, so we only use the latter here.
case DW_MACRO_define:
case DW_MACRO_undef:
case DW_MACRO_define_strp:
@@ -97,7 +102,7 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const {
}
Error DWARFDebugMacro::parseImpl(
- Optional<DWARFUnitVector::iterator_range> Units,
+ Optional<DWARFUnitVector::compile_unit_range> Units,
Optional<DataExtractor> StringExtractor, DWARFDataExtractor Data,
bool IsMacro) {
uint64_t Offset = 0;
@@ -118,6 +123,7 @@ Error DWARFDebugMacro::parseImpl(
MacroLists.emplace_back();
M = &MacroLists.back();
M->Offset = Offset;
+ M->IsDebugMacro = IsMacro;
if (IsMacro) {
auto Err = M->Header.parseMacroHeader(Data, &Offset);
if (Err)
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index 1a1857d8cd79..dc7da5d9348f 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -70,6 +70,9 @@ void DWARFDebugRangeList::dump(raw_ostream &OS) const {
DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges(
llvm::Optional<object::SectionedAddress> BaseAddr) const {
DWARFAddressRangesVector Res;
+ // debug_addr can't use the max integer tombstone because that's used for the
+ // base address specifier entry - so use max-1.
+ uint64_t Tombstone = dwarf::computeTombstoneAddress(AddressSize) - 1;
for (const RangeListEntry &RLE : Entries) {
if (RLE.isBaseAddressSelectionEntry(AddressSize)) {
BaseAddr = {RLE.EndAddress, RLE.SectionIndex};
@@ -78,12 +81,16 @@ DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges(
DWARFAddressRange E;
E.LowPC = RLE.StartAddress;
+ if (E.LowPC == Tombstone)
+ continue;
E.HighPC = RLE.EndAddress;
E.SectionIndex = RLE.SectionIndex;
// Base address of a range list entry is determined by the closest preceding
// base address selection entry in the same range list. It defaults to the
// base address of the compilation unit if there is no such entry.
if (BaseAddr) {
+ if (BaseAddr->Address == Tombstone)
+ continue;
E.LowPC += BaseAddr->Address;
E.HighPC += BaseAddr->Address;
if (E.SectionIndex == -1ULL)
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
index 9ae4c5b73ebe..d12acca1962e 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
@@ -16,114 +17,87 @@
using namespace llvm;
-Error RangeListEntry::extract(DWARFDataExtractor Data, uint64_t End,
- uint64_t *OffsetPtr) {
+Error RangeListEntry::extract(DWARFDataExtractor Data, uint64_t *OffsetPtr) {
Offset = *OffsetPtr;
SectionIndex = -1ULL;
// The caller should guarantee that we have at least 1 byte available, so
// we just assert instead of revalidate.
- assert(*OffsetPtr < End &&
+ assert(*OffsetPtr < Data.size() &&
"not enough space to extract a rangelist encoding");
uint8_t Encoding = Data.getU8(OffsetPtr);
+ DataExtractor::Cursor C(*OffsetPtr);
switch (Encoding) {
case dwarf::DW_RLE_end_of_list:
Value0 = Value1 = 0;
break;
// TODO: Support other encodings.
case dwarf::DW_RLE_base_addressx: {
- uint64_t PreviousOffset = *OffsetPtr - 1;
- Value0 = Data.getULEB128(OffsetPtr);
- if (End < *OffsetPtr)
- return createStringError(
- errc::invalid_argument,
- "read past end of table when reading "
- "DW_RLE_base_addressx encoding at offset 0x%" PRIx64,
- PreviousOffset);
+ Value0 = Data.getULEB128(C);
break;
}
case dwarf::DW_RLE_startx_endx:
- return createStringError(errc::not_supported,
- "unsupported rnglists encoding DW_RLE_startx_endx at "
- "offset 0x%" PRIx64,
- *OffsetPtr - 1);
+ Value0 = Data.getULEB128(C);
+ Value1 = Data.getULEB128(C);
+ break;
case dwarf::DW_RLE_startx_length: {
- uint64_t PreviousOffset = *OffsetPtr - 1;
- Value0 = Data.getULEB128(OffsetPtr);
- Value1 = Data.getULEB128(OffsetPtr);
- if (End < *OffsetPtr)
- return createStringError(
- errc::invalid_argument,
- "read past end of table when reading "
- "DW_RLE_startx_length encoding at offset 0x%" PRIx64,
- PreviousOffset);
+ Value0 = Data.getULEB128(C);
+ Value1 = Data.getULEB128(C);
break;
}
case dwarf::DW_RLE_offset_pair: {
- uint64_t PreviousOffset = *OffsetPtr - 1;
- Value0 = Data.getULEB128(OffsetPtr);
- Value1 = Data.getULEB128(OffsetPtr);
- if (End < *OffsetPtr)
- return createStringError(errc::invalid_argument,
- "read past end of table when reading "
- "DW_RLE_offset_pair encoding at offset 0x%" PRIx64,
- PreviousOffset);
+ Value0 = Data.getULEB128(C);
+ Value1 = Data.getULEB128(C);
break;
}
case dwarf::DW_RLE_base_address: {
- if ((End - *OffsetPtr) < Data.getAddressSize())
- return createStringError(errc::invalid_argument,
- "insufficient space remaining in table for "
- "DW_RLE_base_address encoding at offset 0x%" PRIx64,
- *OffsetPtr - 1);
- Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
+ Value0 = Data.getRelocatedAddress(C, &SectionIndex);
break;
}
case dwarf::DW_RLE_start_end: {
- if ((End - *OffsetPtr) < unsigned(Data.getAddressSize() * 2))
- return createStringError(errc::invalid_argument,
- "insufficient space remaining in table for "
- "DW_RLE_start_end encoding "
- "at offset 0x%" PRIx64,
- *OffsetPtr - 1);
- Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
- Value1 = Data.getRelocatedAddress(OffsetPtr);
+ Value0 = Data.getRelocatedAddress(C, &SectionIndex);
+ Value1 = Data.getRelocatedAddress(C);
break;
}
case dwarf::DW_RLE_start_length: {
- uint64_t PreviousOffset = *OffsetPtr - 1;
- Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
- Value1 = Data.getULEB128(OffsetPtr);
- if (End < *OffsetPtr)
- return createStringError(errc::invalid_argument,
- "read past end of table when reading "
- "DW_RLE_start_length encoding at offset 0x%" PRIx64,
- PreviousOffset);
+ Value0 = Data.getRelocatedAddress(C, &SectionIndex);
+ Value1 = Data.getULEB128(C);
break;
}
default:
+ consumeError(C.takeError());
return createStringError(errc::not_supported,
- "unknown rnglists encoding 0x%" PRIx32
- " at offset 0x%" PRIx64,
- uint32_t(Encoding), *OffsetPtr - 1);
+ "unknown rnglists encoding 0x%" PRIx32
+ " at offset 0x%" PRIx64,
+ uint32_t(Encoding), Offset);
+ }
+
+ if (!C) {
+ consumeError(C.takeError());
+ return createStringError(
+ errc::invalid_argument,
+ "read past end of table when reading %s encoding at offset 0x%" PRIx64,
+ dwarf::RLEString(Encoding).data(), Offset);
}
+ *OffsetPtr = C.tell();
EntryKind = Encoding;
return Error::success();
}
DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
llvm::Optional<object::SectionedAddress> BaseAddr, DWARFUnit &U) const {
- return getAbsoluteRanges(BaseAddr, [&](uint32_t Index) {
- return U.getAddrOffsetSectionItem(Index);
- });
+ return getAbsoluteRanges(
+ BaseAddr, U.getAddressByteSize(),
+ [&](uint32_t Index) { return U.getAddrOffsetSectionItem(Index); });
}
DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
- Optional<object::SectionedAddress> BaseAddr,
+ Optional<object::SectionedAddress> BaseAddr, uint8_t AddressByteSize,
function_ref<Optional<object::SectionedAddress>(uint32_t)>
LookupPooledAddress) const {
DWARFAddressRangesVector Res;
+ uint64_t Tombstone = dwarf::computeTombstoneAddress(AddressByteSize);
for (const RangeListEntry &RLE : Entries) {
if (RLE.EntryKind == dwarf::DW_RLE_end_of_list)
break;
@@ -146,8 +120,12 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
switch (RLE.EntryKind) {
case dwarf::DW_RLE_offset_pair:
E.LowPC = RLE.Value0;
+ if (E.LowPC == Tombstone)
+ continue;
E.HighPC = RLE.Value1;
if (BaseAddr) {
+ if (BaseAddr->Address == Tombstone)
+ continue;
E.LowPC += BaseAddr->Address;
E.HighPC += BaseAddr->Address;
}
@@ -169,11 +147,26 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
E.HighPC = E.LowPC + RLE.Value1;
break;
}
+ case dwarf::DW_RLE_startx_endx: {
+ auto Start = LookupPooledAddress(RLE.Value0);
+ if (!Start)
+ Start = {0, -1ULL};
+ auto End = LookupPooledAddress(RLE.Value1);
+ if (!End)
+ End = {0, -1ULL};
+ // FIXME: Some error handling if Start.SectionIndex != End.SectionIndex
+ E.SectionIndex = Start->SectionIndex;
+ E.LowPC = Start->Address;
+ E.HighPC = End->Address;
+ break;
+ }
default:
// Unsupported encodings should have been reported during extraction,
// so we should not run into any here.
llvm_unreachable("Unsupported range list encoding");
}
+ if (E.LowPC == Tombstone)
+ continue;
Res.push_back(E);
}
return Res;
@@ -206,6 +199,8 @@ void RangeListEntry::dump(
OS << ": ";
}
+ uint64_t Tombstone = dwarf::computeTombstoneAddress(AddrSize);
+
switch (EntryKind) {
case dwarf::DW_RLE_end_of_list:
OS << (DumpOpts.Verbose ? "" : "<End of list>");
@@ -217,7 +212,7 @@ void RangeListEntry::dump(
CurrentBase = Value0;
if (!DumpOpts.Verbose)
return;
- OS << format(" 0x%*.*" PRIx64, AddrSize * 2, AddrSize * 2, Value0);
+ DWARFFormValue::dumpAddress(OS << ' ', AddrSize, Value0);
break;
}
case dwarf::DW_RLE_base_address:
@@ -225,7 +220,7 @@ void RangeListEntry::dump(
CurrentBase = Value0;
if (!DumpOpts.Verbose)
return;
- OS << format(" 0x%*.*" PRIx64, AddrSize * 2, AddrSize * 2, Value0);
+ DWARFFormValue::dumpAddress(OS << ' ', AddrSize, Value0);
break;
case dwarf::DW_RLE_start_length:
PrintRawEntry(OS, *this, AddrSize, DumpOpts);
@@ -233,8 +228,11 @@ void RangeListEntry::dump(
break;
case dwarf::DW_RLE_offset_pair:
PrintRawEntry(OS, *this, AddrSize, DumpOpts);
- DWARFAddressRange(Value0 + CurrentBase, Value1 + CurrentBase)
- .dump(OS, AddrSize, DumpOpts);
+ if (CurrentBase != Tombstone)
+ DWARFAddressRange(Value0 + CurrentBase, Value1 + CurrentBase)
+ .dump(OS, AddrSize, DumpOpts);
+ else
+ OS << "dead code";
break;
case dwarf::DW_RLE_start_end:
DWARFAddressRange(Value0, Value1).dump(OS, AddrSize, DumpOpts);
@@ -247,6 +245,17 @@ void RangeListEntry::dump(
DWARFAddressRange(Start, Start + Value1).dump(OS, AddrSize, DumpOpts);
break;
}
+ case dwarf::DW_RLE_startx_endx: {
+ PrintRawEntry(OS, *this, AddrSize, DumpOpts);
+ uint64_t Start = 0;
+ if (auto SA = LookupPooledAddress(Value0))
+ Start = SA->Address;
+ uint64_t End = 0;
+ if (auto SA = LookupPooledAddress(Value1))
+ End = SA->Address;
+ DWARFAddressRange(Start, End).dump(OS, AddrSize, DumpOpts);
+ break;
+ }
default:
llvm_unreachable("Unsupported range list encoding");
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 81a6b5dcd5e7..5a55f3a04148 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -69,7 +69,7 @@ static void dumpRanges(const DWARFObject &Obj, raw_ostream &OS,
}
}
-static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
+static void dumpLocation(raw_ostream &OS, const DWARFFormValue &FormValue,
DWARFUnit *U, unsigned Indent,
DIDumpOptions DumpOpts) {
DWARFContext &Ctx = U->getContext();
@@ -80,7 +80,7 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
DataExtractor Data(StringRef((const char *)Expr.data(), Expr.size()),
Ctx.isLittleEndian(), 0);
DWARFExpression(Data, U->getAddressByteSize(), U->getFormParams().Format)
- .print(OS, MRI, U);
+ .print(OS, DumpOpts, MRI, U);
return;
}
@@ -113,7 +113,6 @@ static void dumpTypeTagName(raw_ostream &OS, dwarf::Tag T) {
}
static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) {
- Optional<uint64_t> Bound;
for (const DWARFDie &C : D.children())
if (C.getTag() == DW_TAG_subrange_type) {
Optional<uint64_t> LB;
@@ -231,21 +230,22 @@ static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) {
}
static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
- uint64_t *OffsetPtr, dwarf::Attribute Attr,
- dwarf::Form Form, unsigned Indent,
+ const DWARFAttribute &AttrValue, unsigned Indent,
DIDumpOptions DumpOpts) {
if (!Die.isValid())
return;
const char BaseIndent[] = " ";
OS << BaseIndent;
OS.indent(Indent + 2);
+ dwarf::Attribute Attr = AttrValue.Attr;
WithColor(OS, HighlightColor::Attribute) << formatv("{0}", Attr);
+ dwarf::Form Form = AttrValue.Value.getForm();
if (DumpOpts.Verbose || DumpOpts.ShowForm)
OS << formatv(" [{0}]", Form);
DWARFUnit *U = Die.getDwarfUnit();
- DWARFFormValue FormValue = DWARFFormValue::createFromUnit(Form, U, OffsetPtr);
+ const DWARFFormValue &FormValue = AttrValue.Value;
OS << "\t(";
@@ -269,13 +269,23 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
WithColor(OS, Color) << Name;
else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line)
OS << *FormValue.getAsUnsignedConstant();
- else if (Attr == DW_AT_high_pc && !DumpOpts.ShowForm && !DumpOpts.Verbose &&
- FormValue.getAsUnsignedConstant()) {
+ else if (Attr == DW_AT_low_pc &&
+ (FormValue.getAsAddress() ==
+ dwarf::computeTombstoneAddress(U->getAddressByteSize()))) {
+ if (DumpOpts.Verbose) {
+ FormValue.dump(OS, DumpOpts);
+ OS << " (";
+ }
+ OS << "dead code";
+ if (DumpOpts.Verbose)
+ OS << ')';
+ } else if (Attr == DW_AT_high_pc && !DumpOpts.ShowForm && !DumpOpts.Verbose &&
+ FormValue.getAsUnsignedConstant()) {
if (DumpOpts.ShowAddresses) {
// Print the actual address rather than the offset.
uint64_t LowPC, HighPC, Index;
if (Die.getLowAndHighPC(LowPC, HighPC, Index))
- OS << format("0x%016" PRIx64, HighPC);
+ DWARFFormValue::dumpAddress(OS, U->getAddressByteSize(), HighPC);
else
FormValue.dump(OS, DumpOpts);
}
@@ -368,8 +378,7 @@ DWARFDie::findRecursively(ArrayRef<dwarf::Attribute> Attrs) const {
Seen.insert(*this);
while (!Worklist.empty()) {
- DWARFDie Die = Worklist.back();
- Worklist.pop_back();
+ DWARFDie Die = Worklist.pop_back_val();
if (!Die.isValid())
continue;
@@ -416,6 +425,9 @@ Optional<uint64_t> DWARFDie::getLocBaseAttribute() const {
}
Optional<uint64_t> DWARFDie::getHighPC(uint64_t LowPC) const {
+ uint64_t Tombstone = dwarf::computeTombstoneAddress(U->getAddressByteSize());
+ if (LowPC == Tombstone)
+ return None;
if (auto FormValue = find(DW_AT_high_pc)) {
if (auto Address = FormValue->getAsAddress()) {
// High PC is an address.
@@ -467,8 +479,7 @@ void DWARFDie::collectChildrenAddressRanges(
return;
if (isSubprogramDIE()) {
if (auto DIERangesOrError = getAddressRanges())
- Ranges.insert(Ranges.end(), DIERangesOrError.get().begin(),
- DIERangesOrError.get().end());
+ llvm::append_range(Ranges, DIERangesOrError.get());
else
llvm::consumeError(DIERangesOrError.takeError());
}
@@ -558,6 +569,17 @@ uint64_t DWARFDie::getDeclLine() const {
return toUnsigned(findRecursively(DW_AT_decl_line), 0);
}
+std::string
+DWARFDie::getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const {
+ std::string FileName;
+ if (auto DeclFile = toUnsigned(findRecursively(DW_AT_decl_file))) {
+ if (const auto *LT = U->getContext().getLineTableForUnit(U)) {
+ LT->getFileNameByIndex(*DeclFile, U->getCompilationDir(), Kind, FileName);
+ }
+ }
+ return FileName;
+}
+
void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
uint32_t &CallColumn,
uint32_t &CallDiscriminator) const {
@@ -610,16 +632,8 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent,
OS << '\n';
// Dump all data in the DIE for the attributes.
- for (const auto &AttrSpec : AbbrevDecl->attributes()) {
- if (AttrSpec.Form == DW_FORM_implicit_const) {
- // We are dumping .debug_info section ,
- // implicit_const attribute values are not really stored here,
- // but in .debug_abbrev section. So we just skip such attrs.
- continue;
- }
- dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form,
- Indent, DumpOpts);
- }
+ for (const DWARFAttribute &AttrValue : attributes())
+ dumpAttribute(OS, *this, AttrValue, Indent, DumpOpts);
DWARFDie child = getFirstChild();
if (DumpOpts.ShowChildren && DumpOpts.ChildRecurseDepth > 0 && child) {
@@ -702,10 +716,16 @@ void DWARFDie::attribute_iterator::updateForIndex(
// Add the previous byte size of any previous attribute value.
AttrValue.Offset += AttrValue.ByteSize;
uint64_t ParseOffset = AttrValue.Offset;
- auto U = Die.getDwarfUnit();
- assert(U && "Die must have valid DWARF unit");
- AttrValue.Value = DWARFFormValue::createFromUnit(
- AbbrDecl.getFormByIndex(Index), U, &ParseOffset);
+ if (AbbrDecl.getAttrIsImplicitConstByIndex(Index))
+ AttrValue.Value = DWARFFormValue::createFromSValue(
+ AbbrDecl.getFormByIndex(Index),
+ AbbrDecl.getAttrImplicitConstValueByIndex(Index));
+ else {
+ auto U = Die.getDwarfUnit();
+ assert(U && "Die must have valid DWARF unit");
+ AttrValue.Value = DWARFFormValue::createFromUnit(
+ AbbrDecl.getFormByIndex(Index), U, &ParseOffset);
+ }
AttrValue.ByteSize = ParseOffset - AttrValue.Offset;
} else {
assert(Index == NumAttrs && "Indexes should be [0, NumAttrs) only");
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index de5e11e084f4..811716111be5 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -204,11 +204,15 @@ bool DWARFExpression::Operation::extract(DataExtractor Data,
}
static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS,
- uint64_t Operands[2], unsigned Operand) {
+ DIDumpOptions DumpOpts, uint64_t Operands[2],
+ unsigned Operand) {
assert(Operand < 2 && "operand out of bounds");
auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
if (Die && Die.getTag() == dwarf::DW_TAG_base_type) {
- OS << format(" (0x%08" PRIx64 ")", U->getOffset() + Operands[Operand]);
+ OS << " (";
+ if (DumpOpts.Verbose)
+ OS << format("0x%08" PRIx64 " -> ", Operands[Operand]);
+ OS << format("0x%08" PRIx64 ")", U->getOffset() + Operands[Operand]);
if (auto Name = Die.find(dwarf::DW_AT_name))
OS << " \"" << Name->getAsCString() << "\"";
} else {
@@ -217,7 +221,8 @@ static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS,
}
}
-static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS, uint8_t Opcode,
+static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS,
+ DIDumpOptions DumpOpts, uint8_t Opcode,
uint64_t Operands[2],
const MCRegisterInfo *MRI, bool isEH) {
if (!MRI)
@@ -243,7 +248,7 @@ static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS, uint8_t Opcode,
OS << ' ' << RegName;
if (Opcode == DW_OP_regval_type)
- prettyPrintBaseTypeRef(U, OS, Operands, 1);
+ prettyPrintBaseTypeRef(U, OS, DumpOpts, Operands, 1);
return true;
}
}
@@ -251,11 +256,10 @@ static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS, uint8_t Opcode,
return false;
}
-bool DWARFExpression::Operation::print(raw_ostream &OS,
+bool DWARFExpression::Operation::print(raw_ostream &OS, DIDumpOptions DumpOpts,
const DWARFExpression *Expr,
const MCRegisterInfo *RegInfo,
- DWARFUnit *U,
- bool isEH) {
+ DWARFUnit *U, bool isEH) {
if (Error) {
OS << "<decoding error>";
return false;
@@ -269,7 +273,7 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
(Opcode >= DW_OP_reg0 && Opcode <= DW_OP_reg31) ||
Opcode == DW_OP_bregx || Opcode == DW_OP_regx ||
Opcode == DW_OP_regval_type)
- if (prettyPrintRegisterOp(U, OS, Opcode, Operands, RegInfo, isEH))
+ if (prettyPrintRegisterOp(U, OS, DumpOpts, Opcode, Operands, RegInfo, isEH))
return true;
for (unsigned Operand = 0; Operand < 2; ++Operand) {
@@ -286,7 +290,7 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
if (Opcode == DW_OP_convert && Operands[Operand] == 0)
OS << " 0x0";
else
- prettyPrintBaseTypeRef(U, OS, Operands, Operand);
+ prettyPrintBaseTypeRef(U, OS, DumpOpts, Operands, Operand);
} else if (Size == Operation::WasmLocationArg) {
assert(Operand == 1);
switch (Operands[0]) {
@@ -311,11 +315,12 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
return true;
}
-void DWARFExpression::print(raw_ostream &OS, const MCRegisterInfo *RegInfo,
- DWARFUnit *U, bool IsEH) const {
+void DWARFExpression::print(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const MCRegisterInfo *RegInfo, DWARFUnit *U,
+ bool IsEH) const {
uint32_t EntryValExprSize = 0;
for (auto &Op : *this) {
- if (!Op.print(OS, this, RegInfo, U, IsEH)) {
+ if (!Op.print(OS, DumpOpts, this, RegInfo, U, IsEH)) {
uint64_t FailOffset = Op.getEndOffset();
while (FailOffset < Data.getData().size())
OS << format(" %02x", Data.getU8(&FailOffset));
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index a7da5acc380b..2559765876d9 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -168,6 +168,7 @@ bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
case DW_FORM_line_strp:
case DW_FORM_GNU_ref_alt:
case DW_FORM_GNU_strp_alt:
+ case DW_FORM_implicit_const:
if (Optional<uint8_t> FixedSize =
dwarf::getFixedFormByteSize(Form, Params)) {
*OffsetPtr += *FixedSize;
@@ -345,6 +346,9 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
case DW_FORM_ref_sig8:
Value.uval = Data.getU64(OffsetPtr, &Err);
break;
+ case DW_FORM_implicit_const:
+ // Value has been already set by DWARFFormValue::createFromSValue.
+ break;
default:
// DWARFFormValue::skipValue() will have caught this and caused all
// DWARF DIEs to fail to be parsed, so this code is not be reachable.
@@ -358,10 +362,16 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
return !errorToBool(std::move(Err));
}
+void DWARFFormValue::dumpAddress(raw_ostream &OS, uint8_t AddressSize,
+ uint64_t Address) {
+ uint8_t HexDigits = AddressSize * 2;
+ OS << format("0x%*.*" PRIx64, HexDigits, HexDigits, Address);
+}
+
void DWARFFormValue::dumpSectionedAddress(raw_ostream &OS,
DIDumpOptions DumpOpts,
object::SectionedAddress SA) const {
- OS << format("0x%016" PRIx64, SA.Address);
+ dumpAddress(OS, U->getAddressByteSize(), SA.Address);
dumpAddressSection(U->getContext().getDWARFObj(), OS, DumpOpts,
SA.SectionIndex);
}
@@ -476,6 +486,7 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
break;
case DW_FORM_sdata:
+ case DW_FORM_implicit_const:
OS << Value.sval;
break;
case DW_FORM_udata:
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index 252b58e5a591..ace7000f07b2 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -71,8 +71,8 @@ void DWARFGdbIndex::dumpSymbolTable(raw_ostream &OS) const {
StringRef Name = ConstantPoolStrings.substr(
ConstantPoolOffset - StringPoolOffset + E.NameOffset);
- auto CuVector = std::find_if(
- ConstantPoolVectors.begin(), ConstantPoolVectors.end(),
+ auto CuVector = llvm::find_if(
+ ConstantPoolVectors,
[&](const std::pair<uint32_t, SmallVector<uint32_t, 0>> &V) {
return V.first == E.VecOffset;
});
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
index 2124a49bef60..c876af1e9b51 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -71,12 +71,12 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
") than there is space for",
SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount);
Data.setAddressSize(HeaderData.AddrSize);
- for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I)
- Offsets.push_back(Data.getRelocatedValue(OffsetByteSize, OffsetPtr));
+ *OffsetPtr += HeaderData.OffsetEntryCount * OffsetByteSize;
return Error::success();
}
-void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
+void DWARFListTableHeader::dump(DataExtractor Data, raw_ostream &OS,
+ DIDumpOptions DumpOpts) const {
if (DumpOpts.Verbose)
OS << format("0x%8.8" PRIx64 ": ", HeaderOffset);
int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(Format);
@@ -91,7 +91,8 @@ void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
if (HeaderData.OffsetEntryCount > 0) {
OS << "offsets: [";
- for (const auto &Off : Offsets) {
+ for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I) {
+ auto Off = *getOffsetEntry(Data, I);
OS << format("\n0x%0*" PRIx64, OffsetDumpWidth, Off);
if (DumpOpts.Verbose)
OS << format(" => 0x%08" PRIx64,
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
index c219f34bbc31..a301b65dd444 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
@@ -36,9 +36,10 @@ void DWARFTypeUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
<< ", version = " << format("0x%04x", getVersion());
if (getVersion() >= 5)
OS << ", unit_type = " << dwarf::UnitTypeString(getUnitType());
- OS << ", abbr_offset = "
- << format("0x%04" PRIx64, getAbbreviations()->getOffset())
- << ", addr_size = " << format("0x%02x", getAddressByteSize())
+ OS << ", abbr_offset = " << format("0x%04" PRIx64, getAbbrOffset());
+ if (!getAbbreviations())
+ OS << " (invalid)";
+ OS << ", addr_size = " << format("0x%02x", getAddressByteSize())
<< ", name = '" << Name << "'"
<< ", type_signature = " << format("0x%016" PRIx64, getTypeHash())
<< ", type_offset = " << format("0x%04" PRIx64, getTypeOffset())
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index a6d44f04e468..8493950a29b2 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -181,31 +181,6 @@ DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
StringOffsetSection(SOS), AddrOffsetSection(AOS), isLittleEndian(LE),
IsDWO(IsDWO), UnitVector(UnitVector) {
clear();
- if (IsDWO) {
- // If we are reading a package file, we need to adjust the location list
- // data based on the index entries.
- StringRef Data = Header.getVersion() >= 5
- ? Context.getDWARFObj().getLoclistsDWOSection().Data
- : LocSection->Data;
- if (auto *IndexEntry = Header.getIndexEntry())
- if (const auto *C = IndexEntry->getContribution(
- Header.getVersion() >= 5 ? DW_SECT_LOCLISTS : DW_SECT_EXT_LOC))
- Data = Data.substr(C->Offset, C->Length);
-
- DWARFDataExtractor DWARFData(Data, isLittleEndian, getAddressByteSize());
- LocTable =
- std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion());
- } else if (Header.getVersion() >= 5) {
- LocTable = std::make_unique<DWARFDebugLoclists>(
- DWARFDataExtractor(Context.getDWARFObj(),
- Context.getDWARFObj().getLoclistsSection(),
- isLittleEndian, getAddressByteSize()),
- Header.getVersion());
- } else {
- LocTable = std::make_unique<DWARFDebugLoc>(
- DWARFDataExtractor(Context.getDWARFObj(), *LocSection, isLittleEndian,
- getAddressByteSize()));
- }
}
DWARFUnit::~DWARFUnit() = default;
@@ -219,13 +194,12 @@ Optional<object::SectionedAddress>
DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const {
if (IsDWO) {
auto R = Context.info_section_units();
- auto I = R.begin();
// Surprising if a DWO file has more than one skeleton unit in it - this
// probably shouldn't be valid, but if a use case is found, here's where to
// support it (probably have to linearly search for the matching skeleton CU
// here)
- if (I != R.end() && std::next(I) == R.end())
- return (*I)->getAddrOffsetSectionItem(Index);
+ if (hasSingleElement(R))
+ return (*R.begin())->getAddrOffsetSectionItem(Index);
}
if (!AddrOffsetSectionBase)
return None;
@@ -492,8 +466,8 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
// describe address ranges.
if (getVersion() >= 5) {
// In case of DWP, the base offset from the index has to be added.
- uint64_t ContributionBaseOffset = 0;
if (IsDWO) {
+ uint64_t ContributionBaseOffset = 0;
if (auto *IndexEntry = Header.getIndexEntry())
if (auto *Contrib = IndexEntry->getContribution(DW_SECT_RNGLISTS))
ContributionBaseOffset = Contrib->Offset;
@@ -503,67 +477,36 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
DWARFListTableHeader::getHeaderSize(Header.getFormat()));
} else
setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
- toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0));
- if (RangeSection->Data.size()) {
- // Parse the range list table header. Individual range lists are
- // extracted lazily.
- DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection,
- isLittleEndian, 0);
- auto TableOrError = parseListTableHeader<DWARFDebugRnglistTable>(
- RangesDA, RangeSectionBase, Header.getFormat());
- if (!TableOrError)
- return createStringError(errc::invalid_argument,
- "parsing a range list table: " +
- toString(TableOrError.takeError()));
-
- RngListTable = TableOrError.get();
-
- // In a split dwarf unit, there is no DW_AT_rnglists_base attribute.
- // Adjust RangeSectionBase to point past the table header.
- if (IsDWO && RngListTable)
- RangeSectionBase =
- ContributionBaseOffset + RngListTable->getHeaderSize();
- }
+ toSectionOffset(UnitDie.find(DW_AT_rnglists_base),
+ DWARFListTableHeader::getHeaderSize(
+ Header.getFormat())));
+ }
- // In a split dwarf unit, there is no DW_AT_loclists_base attribute.
- // Setting LocSectionBase to point past the table header.
- if (IsDWO) {
- auto &DWOSection = Context.getDWARFObj().getLoclistsDWOSection();
- if (DWOSection.Data.empty())
- return Error::success();
- setLocSection(&DWOSection,
- DWARFListTableHeader::getHeaderSize(Header.getFormat()));
- } else if (auto X = UnitDie.find(DW_AT_loclists_base)) {
- setLocSection(&Context.getDWARFObj().getLoclistsSection(),
- toSectionOffset(X, 0));
- } else {
- return Error::success();
- }
+ if (IsDWO) {
+ // If we are reading a package file, we need to adjust the location list
+ // data based on the index entries.
+ StringRef Data = Header.getVersion() >= 5
+ ? Context.getDWARFObj().getLoclistsDWOSection().Data
+ : Context.getDWARFObj().getLocDWOSection().Data;
+ if (auto *IndexEntry = Header.getIndexEntry())
+ if (const auto *C = IndexEntry->getContribution(
+ Header.getVersion() >= 5 ? DW_SECT_LOCLISTS : DW_SECT_EXT_LOC))
+ Data = Data.substr(C->Offset, C->Length);
- if (LocSection) {
- if (IsDWO)
- LoclistTableHeader.emplace(".debug_loclists.dwo", "locations");
- else
- LoclistTableHeader.emplace(".debug_loclists", "locations");
-
- uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Header.getFormat());
- uint64_t Offset = getLocSectionBase();
- DWARFDataExtractor Data(Context.getDWARFObj(), *LocSection,
- isLittleEndian, getAddressByteSize());
- if (Offset < HeaderSize)
- return createStringError(errc::invalid_argument,
- "did not detect a valid"
- " list table with base = 0x%" PRIx64 "\n",
- Offset);
- Offset -= HeaderSize;
- if (auto *IndexEntry = Header.getIndexEntry())
- if (const auto *Contrib = IndexEntry->getContribution(DW_SECT_LOCLISTS))
- Offset += Contrib->Offset;
- if (Error E = LoclistTableHeader->extract(Data, &Offset))
- return createStringError(errc::invalid_argument,
- "parsing a loclist table: " +
- toString(std::move(E)));
- }
+ DWARFDataExtractor DWARFData(Data, isLittleEndian, getAddressByteSize());
+ LocTable =
+ std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion());
+ LocSectionBase = DWARFListTableHeader::getHeaderSize(Header.getFormat());
+ } else if (getVersion() >= 5) {
+ LocTable = std::make_unique<DWARFDebugLoclists>(
+ DWARFDataExtractor(Context.getDWARFObj(),
+ Context.getDWARFObj().getLoclistsSection(),
+ isLittleEndian, getAddressByteSize()),
+ getVersion());
+ } else {
+ LocTable = std::make_unique<DWARFDebugLoc>(DWARFDataExtractor(
+ Context.getDWARFObj(), Context.getDWARFObj().getLocSection(),
+ isLittleEndian, getAddressByteSize()));
}
// Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
@@ -606,19 +549,8 @@ bool DWARFUnit::parseDWO() {
if (AddrOffsetSectionBase)
DWO->setAddrOffsetSection(AddrOffsetSection, *AddrOffsetSectionBase);
if (getVersion() >= 5) {
- DWO->setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0);
- DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection,
- isLittleEndian, 0);
- if (auto TableOrError = parseListTableHeader<DWARFDebugRnglistTable>(
- RangesDA, RangeSectionBase, Header.getFormat()))
- DWO->RngListTable = TableOrError.get();
- else
- Context.getRecoverableErrorHandler()(createStringError(
- errc::invalid_argument, "parsing a range list table: %s",
- toString(TableOrError.takeError()).c_str()));
-
- if (DWO->RngListTable)
- DWO->RangeSectionBase = DWO->RngListTable->getHeaderSize();
+ DWO->setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(),
+ DWARFListTableHeader::getHeaderSize(getFormat()));
} else {
auto DWORangesBase = UnitDie.getRangesBaseAttribute();
DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0);
@@ -642,17 +574,13 @@ DWARFUnit::findRnglistFromOffset(uint64_t Offset) {
return std::move(E);
return RangeList.getAbsoluteRanges(getBaseAddress());
}
- if (RngListTable) {
- DWARFDataExtractor RangesData(Context.getDWARFObj(), *RangeSection,
- isLittleEndian, RngListTable->getAddrSize());
- auto RangeListOrError = RngListTable->findList(RangesData, Offset);
- if (RangeListOrError)
- return RangeListOrError.get().getAbsoluteRanges(getBaseAddress(), *this);
- return RangeListOrError.takeError();
- }
-
- return createStringError(errc::invalid_argument,
- "missing or invalid range list table");
+ DWARFDataExtractor RangesData(Context.getDWARFObj(), *RangeSection,
+ isLittleEndian, Header.getAddressByteSize());
+ DWARFDebugRnglistTable RnglistTable;
+ auto RangeListOrError = RnglistTable.findList(RangesData, Offset);
+ if (RangeListOrError)
+ return RangeListOrError.get().getAbsoluteRanges(getBaseAddress(), *this);
+ return RangeListOrError.takeError();
}
Expected<DWARFAddressRangesVector>
@@ -660,12 +588,10 @@ DWARFUnit::findRnglistFromIndex(uint32_t Index) {
if (auto Offset = getRnglistOffset(Index))
return findRnglistFromOffset(*Offset);
- if (RngListTable)
- return createStringError(errc::invalid_argument,
- "invalid range list table index %d", Index);
-
return createStringError(errc::invalid_argument,
- "missing or invalid range list table");
+ "invalid range list table index %d (possibly "
+ "missing the entire range list table)",
+ Index);
}
Expected<DWARFAddressRangesVector> DWARFUnit::collectAddressRanges() {
@@ -995,11 +921,35 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) {
// Prior to DWARF v5, we derive the contribution size from the
// index table (in a package file). In a .dwo file it is simply
// the length of the string offsets section.
- if (!IndexEntry)
- return {Optional<StrOffsetsContributionDescriptor>(
- {0, StringOffsetSection.Data.size(), 4, Header.getFormat()})};
+ StrOffsetsContributionDescriptor Desc;
if (C)
- return {Optional<StrOffsetsContributionDescriptor>(
- {C->Offset, C->Length, 4, Header.getFormat()})};
+ Desc = StrOffsetsContributionDescriptor(C->Offset, C->Length, 4,
+ Header.getFormat());
+ else if (!IndexEntry && !StringOffsetSection.Data.empty())
+ Desc = StrOffsetsContributionDescriptor(0, StringOffsetSection.Data.size(),
+ 4, Header.getFormat());
+ else
+ return None;
+ auto DescOrError = Desc.validateContributionSize(DA);
+ if (!DescOrError)
+ return DescOrError.takeError();
+ return *DescOrError;
+}
+
+Optional<uint64_t> DWARFUnit::getRnglistOffset(uint32_t Index) {
+ DataExtractor RangesData(RangeSection->Data, isLittleEndian,
+ getAddressByteSize());
+ DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection,
+ isLittleEndian, 0);
+ if (Optional<uint64_t> Off = llvm::DWARFListTableHeader::getOffsetEntry(
+ RangesData, RangeSectionBase, getFormat(), Index))
+ return *Off + RangeSectionBase;
+ return None;
+}
+
+Optional<uint64_t> DWARFUnit::getLoclistOffset(uint32_t Index) {
+ if (Optional<uint64_t> Off = llvm::DWARFListTableHeader::getOffsetEntry(
+ LocTable->getData(), LocSectionBase, getFormat(), Index))
+ return *Off + LocSectionBase;
return None;
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 3a83317a73a3..ac624ec8b80f 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -172,6 +172,15 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
}
+ if (Die.hasChildren()) {
+ if (Die.getFirstChild().isValid() &&
+ Die.getFirstChild().getTag() == DW_TAG_null) {
+ warn() << dwarf::TagString(Die.getTag())
+ << " has DW_CHILDREN_yes but DIE has no children: ";
+ Die.dump(OS);
+ }
+ }
+
NumUnitErrors += verifyDebugInfoCallSite(Die);
}
@@ -538,6 +547,39 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
}
break;
}
+ case DW_AT_call_file:
+ case DW_AT_decl_file: {
+ if (auto FileIdx = AttrValue.Value.getAsUnsignedConstant()) {
+ DWARFUnit *U = Die.getDwarfUnit();
+ const auto *LT = U->getContext().getLineTableForUnit(U);
+ if (LT) {
+ if (!LT->hasFileAtIndex(*FileIdx)) {
+ bool IsZeroIndexed = LT->Prologue.getVersion() >= 5;
+ if (Optional<uint64_t> LastFileIdx = LT->getLastValidFileIndex()) {
+ ReportError("DIE has " + AttributeString(Attr) +
+ " with an invalid file index " +
+ llvm::formatv("{0}", *FileIdx) +
+ " (valid values are [" + (IsZeroIndexed ? "0-" : "1-") +
+ llvm::formatv("{0}", *LastFileIdx) + "])");
+ } else {
+ ReportError("DIE has " + AttributeString(Attr) +
+ " with an invalid file index " +
+ llvm::formatv("{0}", *FileIdx) +
+ " (the file table in the prologue is empty)");
+ }
+ }
+ } else {
+ ReportError("DIE has " + AttributeString(Attr) +
+ " that references a file with index " +
+ llvm::formatv("{0}", *FileIdx) +
+ " and the compile unit has no line table");
+ }
+ } else {
+ ReportError("DIE has " + AttributeString(Attr) +
+ " with invalid encoding");
+ }
+ break;
+ }
default:
break;
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 7d9b72c6283d..2001478e8047 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -169,7 +169,7 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
Finalized = true;
// Sort function infos so we can emit sorted functions.
- llvm::sort(Funcs.begin(), Funcs.end());
+ llvm::sort(Funcs);
// Don't let the string table indexes change by finalizing in order.
StrTab.finalizeInOrder();
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
index c6fe764ab7e0..f946dd4860ac 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -204,8 +204,7 @@ Error MSFBuilder::setStreamSize(uint32_t Idx, uint32_t Size) {
if (auto EC = allocateBlocks(AddedBlocks, AddedBlockList))
return EC;
auto &CurrentBlocks = StreamData[Idx].second;
- CurrentBlocks.insert(CurrentBlocks.end(), AddedBlockList.begin(),
- AddedBlockList.end());
+ llvm::append_range(CurrentBlocks, AddedBlockList);
} else if (OldBlocks > NewBlocks) {
// For shrinking, free all the Blocks in the Block map, update the stream
// data, then shrink the directory.
@@ -268,8 +267,7 @@ Expected<MSFLayout> MSFBuilder::generateLayout() {
ExtraBlocks.resize(NumExtraBlocks);
if (auto EC = allocateBlocks(NumExtraBlocks, ExtraBlocks))
return std::move(EC);
- DirectoryBlocks.insert(DirectoryBlocks.end(), ExtraBlocks.begin(),
- ExtraBlocks.end());
+ llvm::append_range(DirectoryBlocks, ExtraBlocks);
} else if (NumDirectoryBlocks < DirectoryBlocks.size()) {
uint32_t NumUnnecessaryBlocks = DirectoryBlocks.size() - NumDirectoryBlocks;
for (auto B :
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index 73801ea1dd1b..b6f11a942a26 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -74,7 +74,7 @@ void DbiModuleDescriptorBuilder::addSymbolsInBulk(
if (BulkSymbols.empty())
return;
- Symbols.push_back(BulkSymbols);
+ Symbols.push_back(SymbolListWrapper(BulkSymbols));
// Symbols written to a PDB file are required to be 4 byte aligned. The same
// is not true of object files.
assert(BulkSymbols.size() % alignOf(CodeViewContainer::Pdb) == 0 &&
@@ -82,6 +82,18 @@ void DbiModuleDescriptorBuilder::addSymbolsInBulk(
SymbolByteSize += BulkSymbols.size();
}
+void DbiModuleDescriptorBuilder::addUnmergedSymbols(void *SymSrc,
+ uint32_t SymLength) {
+ assert(SymLength > 0);
+ Symbols.push_back(SymbolListWrapper(SymSrc, SymLength));
+
+ // Symbols written to a PDB file are required to be 4 byte aligned. The same
+ // is not true of object files.
+ assert(SymLength % alignOf(CodeViewContainer::Pdb) == 0 &&
+ "Invalid Symbol alignment!");
+ SymbolByteSize += SymLength;
+}
+
void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) {
SourceFiles.push_back(std::string(Path));
}
@@ -131,9 +143,7 @@ Error DbiModuleDescriptorBuilder::finalizeMsfLayout() {
return Error::success();
}
-Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
- const msf::MSFLayout &MsfLayout,
- WritableBinaryStreamRef MsfBuffer) {
+Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter) {
// We write the Modi record to the `ModiWriter`, but we additionally write its
// symbol stream to a brand new stream.
if (auto EC = ModiWriter.writeObject(Layout))
@@ -144,34 +154,55 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
return EC;
if (auto EC = ModiWriter.padToAlignment(sizeof(uint32_t)))
return EC;
+ return Error::success();
+}
- if (Layout.ModDiStream != kInvalidStreamIndex) {
- auto NS = WritableMappedBlockStream::createIndexedStream(
- MsfLayout, MsfBuffer, Layout.ModDiStream, MSF.getAllocator());
- WritableBinaryStreamRef Ref(*NS);
- BinaryStreamWriter SymbolWriter(Ref);
- // Write the symbols.
- if (auto EC =
- SymbolWriter.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC))
- return EC;
- for (ArrayRef<uint8_t> Syms : Symbols) {
- if (auto EC = SymbolWriter.writeBytes(Syms))
+Error DbiModuleDescriptorBuilder::commitSymbolStream(
+ const msf::MSFLayout &MsfLayout, WritableBinaryStreamRef MsfBuffer) {
+ if (Layout.ModDiStream == kInvalidStreamIndex)
+ return Error::success();
+
+ auto NS = WritableMappedBlockStream::createIndexedStream(
+ MsfLayout, MsfBuffer, Layout.ModDiStream, MSF.getAllocator());
+ WritableBinaryStreamRef Ref(*NS);
+ BinaryStreamWriter SymbolWriter(Ref);
+ // Write the symbols.
+ if (auto EC = SymbolWriter.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC))
+ return EC;
+ for (const SymbolListWrapper &Sym : Symbols) {
+ if (Sym.NeedsToBeMerged) {
+ assert(MergeSymsCallback);
+ if (auto EC = MergeSymsCallback(MergeSymsCtx, Sym.SymPtr, SymbolWriter))
return EC;
- }
- assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 &&
- "Invalid debug section alignment!");
- // TODO: Write C11 Line data
- for (const auto &Builder : C13Builders) {
- if (auto EC = Builder.commit(SymbolWriter, CodeViewContainer::Pdb))
+ } else {
+ if (auto EC = SymbolWriter.writeBytes(Sym.asArray()))
return EC;
}
+ }
+
+ // Apply the string table fixups.
+ auto SavedOffset = SymbolWriter.getOffset();
+ for (const StringTableFixup &Fixup : StringTableFixups) {
+ SymbolWriter.setOffset(Fixup.SymOffsetOfReference);
+ if (auto E = SymbolWriter.writeInteger<uint32_t>(Fixup.StrTabOffset))
+ return E;
+ }
+ SymbolWriter.setOffset(SavedOffset);
- // TODO: Figure out what GlobalRefs substream actually is and populate it.
- if (auto EC = SymbolWriter.writeInteger<uint32_t>(0))
+ assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 &&
+ "Invalid debug section alignment!");
+ // TODO: Write C11 Line data
+ for (const auto &Builder : C13Builders) {
+ if (auto EC = Builder.commit(SymbolWriter, CodeViewContainer::Pdb))
return EC;
- if (SymbolWriter.bytesRemaining() > 0)
- return make_error<RawError>(raw_error_code::stream_too_long);
}
+
+ // TODO: Figure out what GlobalRefs substream actually is and populate it.
+ if (auto EC = SymbolWriter.writeInteger<uint32_t>(0))
+ return EC;
+ if (SymbolWriter.bytesRemaining() > 0)
+ return make_error<RawError>(raw_error_code::stream_too_long);
+
return Error::success();
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 627aef7506fd..98a8acaffd60 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -18,6 +18,7 @@
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Parallel.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -394,10 +395,17 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
return EC;
for (auto &M : ModiList) {
- if (auto EC = M->commit(Writer, Layout, MsfBuffer))
+ if (auto EC = M->commit(Writer))
return EC;
}
+ // Commit symbol streams. This is a lot of data, so do it in parallel.
+ if (auto EC = parallelForEachError(
+ ModiList, [&](std::unique_ptr<DbiModuleDescriptorBuilder> &M) {
+ return M->commitSymbolStream(Layout, MsfBuffer);
+ }))
+ return EC;
+
if (!SectionContribs.empty()) {
if (auto EC = Writer.writeEnum(DbiSecContribVer60))
return EC;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 4e58489f1401..52df26b67916 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -162,7 +162,7 @@ static int gsiRecordCmp(StringRef S1, StringRef S2) {
if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
return memcmp(S1.data(), S2.data(), LS);
- // Both strings are ascii, perform a case-insenstive comparison.
+ // Both strings are ascii, perform a case-insensitive comparison.
return S1.compare_lower(S2.data());
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index 4a88391494cd..1d873b87b347 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -116,7 +116,7 @@ StringMap<uint32_t> NamedStreamMap::entries() const {
uint32_t NamedStreamMap::appendStringData(StringRef S) {
uint32_t Offset = NamesBuffer.size();
- NamesBuffer.insert(NamesBuffer.end(), S.begin(), S.end());
+ llvm::append_range(NamesBuffer, S);
NamesBuffer.push_back('\0');
return Offset;
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp
new file mode 100644
index 000000000000..feede1dbc958
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp
@@ -0,0 +1,41 @@
+//==- NativeEnumSymbols.cpp - Native Symbol Enumerator impl ------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h"
+
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeEnumSymbols::NativeEnumSymbols(NativeSession &PDBSession,
+ std::vector<SymIndexId> Symbols)
+ : Symbols(std::move(Symbols)), Index(0), Session(PDBSession) {}
+
+uint32_t NativeEnumSymbols::getChildCount() const {
+ return static_cast<uint32_t>(Symbols.size());
+}
+
+std::unique_ptr<PDBSymbol>
+NativeEnumSymbols::getChildAtIndex(uint32_t N) const {
+ if (N < Symbols.size()) {
+ return Session.getSymbolCache().getSymbolById(Symbols[N]);
+ }
+ return nullptr;
+}
+
+std::unique_ptr<PDBSymbol> NativeEnumSymbols::getNext() {
+ return getChildAtIndex(Index++);
+}
+
+void NativeEnumSymbols::reset() { Index = 0; }
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp
index 2537daa7493c..7f3b35c297b4 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp
@@ -8,7 +8,9 @@
#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h"
#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
@@ -18,8 +20,10 @@ using namespace llvm::pdb;
NativeFunctionSymbol::NativeFunctionSymbol(NativeSession &Session,
SymIndexId Id,
- const codeview::ProcSym &Sym)
- : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {}
+ const codeview::ProcSym &Sym,
+ uint32_t Offset)
+ : NativeRawSymbol(Session, PDB_SymType::Function, Id), Sym(Sym),
+ RecordOffset(Offset) {}
NativeFunctionSymbol::~NativeFunctionSymbol() {}
@@ -42,10 +46,6 @@ std::string NativeFunctionSymbol::getName() const {
return std::string(Sym.Name);
}
-PDB_SymType NativeFunctionSymbol::getSymTag() const {
- return PDB_SymType::Function;
-}
-
uint64_t NativeFunctionSymbol::getLength() const { return Sym.CodeSize; }
uint32_t NativeFunctionSymbol::getRelativeVirtualAddress() const {
@@ -55,3 +55,89 @@ uint32_t NativeFunctionSymbol::getRelativeVirtualAddress() const {
uint64_t NativeFunctionSymbol::getVirtualAddress() const {
return Session.getVAFromSectOffset(Sym.Segment, Sym.CodeOffset);
}
+
+static bool inlineSiteContainsAddress(InlineSiteSym &IS,
+ uint32_t OffsetInFunc) {
+ // Returns true if inline site contains the offset.
+ bool Found = false;
+ uint32_t CodeOffset = 0;
+ for (auto &Annot : IS.annotations()) {
+ switch (Annot.OpCode) {
+ case BinaryAnnotationsOpCode::CodeOffset:
+ case BinaryAnnotationsOpCode::ChangeCodeOffset:
+ case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset:
+ CodeOffset += Annot.U1;
+ if (OffsetInFunc >= CodeOffset)
+ Found = true;
+ break;
+ case BinaryAnnotationsOpCode::ChangeCodeLength:
+ CodeOffset += Annot.U1;
+ if (Found && OffsetInFunc < CodeOffset)
+ return true;
+ Found = false;
+ break;
+ case BinaryAnnotationsOpCode::ChangeCodeLengthAndCodeOffset:
+ CodeOffset += Annot.U2;
+ if (OffsetInFunc >= CodeOffset && OffsetInFunc < CodeOffset + Annot.U1)
+ return true;
+ Found = false;
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeFunctionSymbol::findInlineFramesByVA(uint64_t VA) const {
+ uint16_t Modi;
+ if (!Session.moduleIndexForVA(VA, Modi))
+ return nullptr;
+
+ Expected<ModuleDebugStreamRef> ModS = Session.getModuleDebugStream(Modi);
+ if (!ModS) {
+ consumeError(ModS.takeError());
+ return nullptr;
+ }
+ CVSymbolArray Syms = ModS->getSymbolArray();
+
+ // Search for inline sites. There should be one matching top level inline
+ // site. Then search in its nested inline sites.
+ std::vector<SymIndexId> Frames;
+ uint32_t CodeOffset = VA - getVirtualAddress();
+ auto Start = Syms.at(RecordOffset);
+ auto End = Syms.at(Sym.End);
+ while (Start != End) {
+ bool Found = false;
+ // Find matching inline site within Start and End.
+ for (; Start != End; ++Start) {
+ if (Start->kind() != S_INLINESITE)
+ continue;
+
+ InlineSiteSym IS =
+ cantFail(SymbolDeserializer::deserializeAs<InlineSiteSym>(*Start));
+ if (inlineSiteContainsAddress(IS, CodeOffset)) {
+ // Insert frames in reverse order.
+ SymIndexId Id = Session.getSymbolCache().getOrCreateInlineSymbol(
+ IS, getVirtualAddress(), Modi, Start.offset());
+ Frames.insert(Frames.begin(), Id);
+
+ // Update offsets to search within this inline site.
+ ++Start;
+ End = Syms.at(IS.End);
+ Found = true;
+ break;
+ }
+
+ Start = Syms.at(IS.End);
+ if (Start == End)
+ break;
+ }
+
+ if (!Found)
+ break;
+ }
+
+ return std::make_unique<NativeEnumSymbols>(Session, std::move(Frames));
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp
new file mode 100644
index 000000000000..8314353c3890
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp
@@ -0,0 +1,177 @@
+//===- NativeInlineSiteSymbol.cpp - info about inline sites -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h"
+
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeInlineSiteSymbol::NativeInlineSiteSymbol(
+ NativeSession &Session, SymIndexId Id, const codeview::InlineSiteSym &Sym,
+ uint64_t ParentAddr)
+ : NativeRawSymbol(Session, PDB_SymType::InlineSite, Id), Sym(Sym),
+ ParentAddr(ParentAddr) {}
+
+NativeInlineSiteSymbol::~NativeInlineSiteSymbol() {}
+
+void NativeInlineSiteSymbol::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+ dumpSymbolField(OS, "name", getName(), Indent);
+}
+
+static Optional<InlineeSourceLine>
+findInlineeByTypeIndex(TypeIndex Id, ModuleDebugStreamRef &ModS) {
+ for (const auto &SS : ModS.getSubsectionsArray()) {
+ if (SS.kind() != DebugSubsectionKind::InlineeLines)
+ continue;
+
+ DebugInlineeLinesSubsectionRef InlineeLines;
+ BinaryStreamReader Reader(SS.getRecordData());
+ if (auto EC = InlineeLines.initialize(Reader)) {
+ consumeError(std::move(EC));
+ continue;
+ }
+
+ for (const InlineeSourceLine &Line : InlineeLines)
+ if (Line.Header->Inlinee == Id)
+ return Line;
+ }
+ return None;
+}
+
+std::string NativeInlineSiteSymbol::getName() const {
+ auto Tpi = Session.getPDBFile().getPDBTpiStream();
+ if (!Tpi) {
+ consumeError(Tpi.takeError());
+ return "";
+ }
+ auto Ipi = Session.getPDBFile().getPDBIpiStream();
+ if (!Ipi) {
+ consumeError(Ipi.takeError());
+ return "";
+ }
+
+ LazyRandomTypeCollection &Types = Tpi->typeCollection();
+ LazyRandomTypeCollection &Ids = Ipi->typeCollection();
+ CVType InlineeType = Ids.getType(Sym.Inlinee);
+ std::string QualifiedName;
+ if (InlineeType.kind() == LF_MFUNC_ID) {
+ MemberFuncIdRecord MFRecord;
+ cantFail(TypeDeserializer::deserializeAs<MemberFuncIdRecord>(InlineeType,
+ MFRecord));
+ TypeIndex ClassTy = MFRecord.getClassType();
+ QualifiedName.append(std::string(Types.getTypeName(ClassTy)));
+ QualifiedName.append("::");
+ } else if (InlineeType.kind() == LF_FUNC_ID) {
+ FuncIdRecord FRecord;
+ cantFail(
+ TypeDeserializer::deserializeAs<FuncIdRecord>(InlineeType, FRecord));
+ TypeIndex ParentScope = FRecord.getParentScope();
+ if (!ParentScope.isNoneType()) {
+ QualifiedName.append(std::string(Ids.getTypeName(ParentScope)));
+ QualifiedName.append("::");
+ }
+ }
+
+ QualifiedName.append(std::string(Ids.getTypeName(Sym.Inlinee)));
+ return QualifiedName;
+}
+
+void NativeInlineSiteSymbol::getLineOffset(uint32_t OffsetInFunc,
+ uint32_t &LineOffset,
+ uint32_t &FileOffset) const {
+ LineOffset = 0;
+ FileOffset = 0;
+ uint32_t CodeOffset = 0;
+ for (const auto &Annot : Sym.annotations()) {
+ switch (Annot.OpCode) {
+ case BinaryAnnotationsOpCode::CodeOffset:
+ case BinaryAnnotationsOpCode::ChangeCodeOffset:
+ case BinaryAnnotationsOpCode::ChangeCodeLength:
+ CodeOffset += Annot.U1;
+ break;
+ case BinaryAnnotationsOpCode::ChangeCodeLengthAndCodeOffset:
+ CodeOffset += Annot.U2;
+ break;
+ case BinaryAnnotationsOpCode::ChangeLineOffset:
+ case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset:
+ CodeOffset += Annot.U1;
+ LineOffset += Annot.S1;
+ break;
+ case BinaryAnnotationsOpCode::ChangeFile:
+ FileOffset = Annot.U1;
+ break;
+ default:
+ break;
+ }
+
+ if (CodeOffset >= OffsetInFunc)
+ return;
+ }
+}
+
+std::unique_ptr<IPDBEnumLineNumbers>
+NativeInlineSiteSymbol::findInlineeLinesByVA(uint64_t VA,
+ uint32_t Length) const {
+ uint16_t Modi;
+ if (!Session.moduleIndexForVA(VA, Modi))
+ return nullptr;
+
+ Expected<ModuleDebugStreamRef> ModS = Session.getModuleDebugStream(Modi);
+ if (!ModS) {
+ consumeError(ModS.takeError());
+ return nullptr;
+ }
+
+ Expected<DebugChecksumsSubsectionRef> Checksums =
+ ModS->findChecksumsSubsection();
+ if (!Checksums) {
+ consumeError(Checksums.takeError());
+ return nullptr;
+ }
+
+ // Get the line number offset and source file offset.
+ uint32_t SrcLineOffset;
+ uint32_t SrcFileOffset;
+ getLineOffset(VA - ParentAddr, SrcLineOffset, SrcFileOffset);
+
+ // Get line info from inlinee line table.
+ Optional<InlineeSourceLine> Inlinee =
+ findInlineeByTypeIndex(Sym.Inlinee, ModS.get());
+
+ if (!Inlinee)
+ return nullptr;
+
+ uint32_t SrcLine = Inlinee->Header->SourceLineNum + SrcLineOffset;
+ uint32_t SrcCol = 0; // Inline sites don't seem to have column info.
+ uint32_t FileChecksumOffset =
+ (SrcFileOffset == 0) ? Inlinee->Header->FileID : SrcFileOffset;
+
+ auto ChecksumIter = Checksums->getArray().at(FileChecksumOffset);
+ uint32_t SrcFileId =
+ Session.getSymbolCache().getOrCreateSourceFile(*ChecksumIter);
+
+ uint32_t LineSect, LineOff;
+ Session.addressForVA(VA, LineSect, LineOff);
+ NativeLineNumber LineNum(Session, SrcLine, SrcCol, LineSect, LineOff, Length,
+ SrcFileId, Modi);
+ auto SrcFile = Session.getSymbolCache().getSourceFileById(SrcFileId);
+ std::vector<NativeLineNumber> Lines{LineNum};
+
+ return std::make_unique<NativeEnumLineNumbers>(std::move(Lines));
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp
index 2535e09baf62..155ed0cdb828 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp
@@ -15,9 +15,10 @@ NativeLineNumber::NativeLineNumber(const NativeSession &Session,
const codeview::LineInfo Line,
uint32_t ColumnNumber, uint32_t Section,
uint32_t Offset, uint32_t Length,
- uint32_t SrcFileId)
+ uint32_t SrcFileId, uint32_t CompilandId)
: Session(Session), Line(Line), ColumnNumber(ColumnNumber),
- Section(Section), Offset(Offset), Length(Length), SrcFileId(SrcFileId) {}
+ Section(Section), Offset(Offset), Length(Length), SrcFileId(SrcFileId),
+ CompilandId(CompilandId) {}
uint32_t NativeLineNumber::getLineNumber() const { return Line.getStartLine(); }
@@ -45,6 +46,6 @@ uint32_t NativeLineNumber::getLength() const { return Length; }
uint32_t NativeLineNumber::getSourceFileId() const { return SrcFileId; }
-uint32_t NativeLineNumber::getCompilandId() const { return 0; }
+uint32_t NativeLineNumber::getCompilandId() const { return CompilandId; }
bool NativeLineNumber::isStatement() const { return Line.isStatement(); }
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp
index 7086af7e67a2..1265e688b867 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp
@@ -18,7 +18,7 @@ using namespace llvm::pdb;
NativePublicSymbol::NativePublicSymbol(NativeSession &Session, SymIndexId Id,
const codeview::PublicSym32 &Sym)
- : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {}
+ : NativeRawSymbol(Session, PDB_SymType::PublicSymbol, Id), Sym(Sym) {}
NativePublicSymbol::~NativePublicSymbol() {}
@@ -39,10 +39,6 @@ std::string NativePublicSymbol::getName() const {
return std::string(Sym.Name);
}
-PDB_SymType NativePublicSymbol::getSymTag() const {
- return PDB_SymType::PublicSymbol;
-}
-
uint32_t NativePublicSymbol::getRelativeVirtualAddress() const {
return Session.getRVAFromSectOffset(Sym.Segment, Sym.Offset);
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
index ac8449df44ff..5d7946cdc2f7 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -13,6 +13,7 @@
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
@@ -56,7 +57,7 @@ static DbiStream *getDbiStreamPtr(PDBFile &File) {
NativeSession::NativeSession(std::unique_ptr<PDBFile> PdbFile,
std::unique_ptr<BumpPtrAllocator> Allocator)
: Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)),
- Cache(*this, getDbiStreamPtr(*Pdb)) {}
+ Cache(*this, getDbiStreamPtr(*Pdb)), AddrToModuleIndex(IMapAllocator) {}
NativeSession::~NativeSession() = default;
@@ -255,6 +256,9 @@ std::unique_ptr<PDBSymbol> NativeSession::findSymbolByRVA(uint32_t RVA,
std::unique_ptr<PDBSymbol>
NativeSession::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset,
PDB_SymType Type) {
+ if (AddrToModuleIndex.empty())
+ parseSectionContribs();
+
return Cache.findSymbolBySectOffset(Sect, Offset, Type);
}
@@ -272,14 +276,14 @@ NativeSession::findLineNumbersByAddress(uint64_t Address,
std::unique_ptr<IPDBEnumLineNumbers>
NativeSession::findLineNumbersByRVA(uint32_t RVA, uint32_t Length) const {
- return findLineNumbersByAddress(getLoadAddress() + RVA, Length);
+ return Cache.findLineNumbersByVA(getLoadAddress() + RVA, Length);
}
std::unique_ptr<IPDBEnumLineNumbers>
NativeSession::findLineNumbersBySectOffset(uint32_t Section, uint32_t Offset,
uint32_t Length) const {
uint64_t VA = getVAFromSectOffset(Section, Offset);
- return findLineNumbersByAddress(VA, Length);
+ return Cache.findLineNumbersByVA(VA, Length);
}
std::unique_ptr<IPDBEnumSourceFiles>
@@ -386,3 +390,74 @@ uint64_t NativeSession::getVAFromSectOffset(uint32_t Section,
uint32_t Offset) const {
return LoadAddress + getRVAFromSectOffset(Section, Offset);
}
+
+bool NativeSession::moduleIndexForVA(uint64_t VA, uint16_t &ModuleIndex) const {
+ ModuleIndex = 0;
+ auto Iter = AddrToModuleIndex.find(VA);
+ if (Iter == AddrToModuleIndex.end())
+ return false;
+ ModuleIndex = Iter.value();
+ return true;
+}
+
+bool NativeSession::moduleIndexForSectOffset(uint32_t Sect, uint32_t Offset,
+ uint16_t &ModuleIndex) const {
+ ModuleIndex = 0;
+ auto Iter = AddrToModuleIndex.find(getVAFromSectOffset(Sect, Offset));
+ if (Iter == AddrToModuleIndex.end())
+ return false;
+ ModuleIndex = Iter.value();
+ return true;
+}
+
+void NativeSession::parseSectionContribs() {
+ auto Dbi = Pdb->getPDBDbiStream();
+ if (!Dbi)
+ return;
+
+ class Visitor : public ISectionContribVisitor {
+ NativeSession &Session;
+ IMap &AddrMap;
+
+ public:
+ Visitor(NativeSession &Session, IMap &AddrMap)
+ : Session(Session), AddrMap(AddrMap) {}
+ void visit(const SectionContrib &C) override {
+ if (C.Size == 0)
+ return;
+
+ uint64_t VA = Session.getVAFromSectOffset(C.ISect, C.Off);
+ uint64_t End = VA + C.Size;
+
+ // Ignore overlapping sections based on the assumption that a valid
+ // PDB file should not have overlaps.
+ if (!AddrMap.overlaps(VA, End))
+ AddrMap.insert(VA, End, C.Imod);
+ }
+ void visit(const SectionContrib2 &C) override { visit(C.Base); }
+ };
+
+ Visitor V(*this, AddrToModuleIndex);
+ Dbi->visitSectionContributions(V);
+}
+
+Expected<ModuleDebugStreamRef>
+NativeSession::getModuleDebugStream(uint32_t Index) const {
+ auto *Dbi = getDbiStreamPtr(*Pdb);
+ assert(Dbi && "Dbi stream not present");
+
+ DbiModuleDescriptor Modi = Dbi->modules().getModuleDescriptor(Index);
+
+ uint16_t ModiStream = Modi.getModuleStreamIndex();
+ if (ModiStream == kInvalidStreamIndex)
+ return make_error<RawError>("Module stream not present");
+
+ std::unique_ptr<msf::MappedBlockStream> ModStreamData =
+ Pdb->createIndexedStream(ModiStream);
+
+ ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
+ if (auto EC = ModS.reload())
+ return std::move(EC);
+
+ return std::move(ModS);
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp
index 6473207e058a..fd813dee6b9f 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp
@@ -1,4 +1,4 @@
-//===- NativeSourceFile.cpp - Native line number implementaiton -*- C++ -*-===//
+//===- NativeSourceFile.cpp - Native line number implementation -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
index b0be7f76e86e..917ec14e58d6 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
@@ -120,7 +120,7 @@ PDB_UdtType NativeTypeUDT::getUdtKind() const {
case TypeRecordKind::Interface:
return PDB_UdtType::Interface;
default:
- llvm_unreachable("Unexected udt kind");
+ llvm_unreachable("Unexpected udt kind");
}
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
index 9f15907b519e..fd9a0deb54d6 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
@@ -1,5 +1,6 @@
#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
@@ -10,8 +11,10 @@
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
@@ -68,7 +71,7 @@ static const struct BuiltinTypeEntry {
};
SymbolCache::SymbolCache(NativeSession &Session, DbiStream *Dbi)
- : Session(Session), Dbi(Dbi), AddrToModuleIndex(IMapAllocator) {
+ : Session(Session), Dbi(Dbi) {
// Id 0 is reserved for the invalid symbol.
Cache.push_back(nullptr);
SourceFiles.push_back(nullptr);
@@ -101,14 +104,15 @@ SymbolCache::createGlobalsEnumerator(codeview::SymbolKind Kind) {
}
SymIndexId SymbolCache::createSimpleType(TypeIndex Index,
- ModifierOptions Mods) {
+ ModifierOptions Mods) const {
if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct)
return createSymbol<NativeTypePointer>(Index);
const auto Kind = Index.getSimpleKind();
- const auto It = std::find_if(
- std::begin(BuiltinTypes), std::end(BuiltinTypes),
- [Kind](const BuiltinTypeEntry &Builtin) { return Builtin.Kind == Kind; });
+ const auto It =
+ llvm::find_if(BuiltinTypes, [Kind](const BuiltinTypeEntry &Builtin) {
+ return Builtin.Kind == Kind;
+ });
if (It == std::end(BuiltinTypes))
return 0;
return createSymbol<NativeTypeBuiltin>(Mods, It->Type, It->Size);
@@ -116,7 +120,7 @@ SymIndexId SymbolCache::createSimpleType(TypeIndex Index,
SymIndexId
SymbolCache::createSymbolForModifiedType(codeview::TypeIndex ModifierTI,
- codeview::CVType CVT) {
+ codeview::CVType CVT) const {
ModifierRecord Record;
if (auto EC = TypeDeserializer::deserializeAs<ModifierRecord>(CVT, Record)) {
consumeError(std::move(EC));
@@ -146,7 +150,7 @@ SymbolCache::createSymbolForModifiedType(codeview::TypeIndex ModifierTI,
return 0;
}
-SymIndexId SymbolCache::findSymbolByTypeIndex(codeview::TypeIndex Index) {
+SymIndexId SymbolCache::findSymbolByTypeIndex(codeview::TypeIndex Index) const {
// First see if it's already in our cache.
const auto Entry = TypeIndexToSymbolId.find(Index);
if (Entry != TypeIndexToSymbolId.end())
@@ -243,7 +247,7 @@ SymbolCache::getSymbolById(SymIndexId SymbolId) const {
return nullptr;
// Make sure to handle the case where we've inserted a placeholder symbol
- // for types we don't yet suppport.
+ // for types we don't yet support.
NativeRawSymbol *NRS = Cache[SymbolId].get();
if (!NRS)
return nullptr;
@@ -288,39 +292,36 @@ SymIndexId SymbolCache::getOrCreateGlobalSymbolByOffset(uint32_t Offset) {
return Id;
}
-Expected<ModuleDebugStreamRef>
-SymbolCache::getModuleDebugStream(uint32_t Index) const {
- assert(Dbi && "Dbi stream not present");
-
- DbiModuleDescriptor Modi = Dbi->modules().getModuleDescriptor(Index);
-
- uint16_t ModiStream = Modi.getModuleStreamIndex();
- if (ModiStream == kInvalidStreamIndex)
- return make_error<RawError>("Module stream not present");
-
- std::unique_ptr<msf::MappedBlockStream> ModStreamData =
- Session.getPDBFile().createIndexedStream(ModiStream);
-
- ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
- if (auto EC = ModS.reload())
- return std::move(EC);
+SymIndexId SymbolCache::getOrCreateInlineSymbol(InlineSiteSym Sym,
+ uint64_t ParentAddr,
+ uint16_t Modi,
+ uint32_t RecordOffset) const {
+ auto Iter = SymTabOffsetToSymbolId.find({Modi, RecordOffset});
+ if (Iter != SymTabOffsetToSymbolId.end())
+ return Iter->second;
- return std::move(ModS);
+ SymIndexId Id = createSymbol<NativeInlineSiteSymbol>(Sym, ParentAddr);
+ SymTabOffsetToSymbolId.insert({{Modi, RecordOffset}, Id});
+ return Id;
}
std::unique_ptr<PDBSymbol>
SymbolCache::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset,
PDB_SymType Type) {
- if (AddrToModuleIndex.empty())
- parseSectionContribs();
-
switch (Type) {
case PDB_SymType::Function:
return findFunctionSymbolBySectOffset(Sect, Offset);
case PDB_SymType::PublicSymbol:
return findPublicSymbolBySectOffset(Sect, Offset);
+ case PDB_SymType::Compiland: {
+ uint16_t Modi;
+ if (!Session.moduleIndexForSectOffset(Sect, Offset, Modi))
+ return nullptr;
+ return getOrCreateCompiland(Modi);
+ }
case PDB_SymType::None: {
- // FIXME: Implement for PDB_SymType::Data.
+ // FIXME: Implement for PDB_SymType::Data. The symbolizer calls this but
+ // only uses it to find the symbol length.
if (auto Sym = findFunctionSymbolBySectOffset(Sect, Offset))
return Sym;
return nullptr;
@@ -332,18 +333,19 @@ SymbolCache::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset,
std::unique_ptr<PDBSymbol>
SymbolCache::findFunctionSymbolBySectOffset(uint32_t Sect, uint32_t Offset) {
- auto Iter = AddressToFunctionSymId.find({Sect, Offset});
- if (Iter != AddressToFunctionSymId.end())
+ auto Iter = AddressToSymbolId.find({Sect, Offset});
+ if (Iter != AddressToSymbolId.end())
return getSymbolById(Iter->second);
if (!Dbi)
return nullptr;
- auto Modi = getModuleIndexForAddr(Session.getVAFromSectOffset(Sect, Offset));
- if (!Modi)
+ uint16_t Modi;
+ if (!Session.moduleIndexForSectOffset(Sect, Offset, Modi))
return nullptr;
- auto ExpectedModS = getModuleDebugStream(*Modi);
+ Expected<ModuleDebugStreamRef> ExpectedModS =
+ Session.getModuleDebugStream(Modi);
if (!ExpectedModS) {
consumeError(ExpectedModS.takeError());
return nullptr;
@@ -357,8 +359,14 @@ SymbolCache::findFunctionSymbolBySectOffset(uint32_t Sect, uint32_t Offset) {
auto PS = cantFail(SymbolDeserializer::deserializeAs<ProcSym>(*I));
if (Sect == PS.Segment && Offset >= PS.CodeOffset &&
Offset < PS.CodeOffset + PS.CodeSize) {
- SymIndexId Id = createSymbol<NativeFunctionSymbol>(PS);
- AddressToFunctionSymId.insert({{Sect, Offset}, Id});
+ // Check if the symbol is already cached.
+ auto Found = AddressToSymbolId.find({PS.Segment, PS.CodeOffset});
+ if (Found != AddressToSymbolId.end())
+ return getSymbolById(Found->second);
+
+ // Otherwise, create a new symbol.
+ SymIndexId Id = createSymbol<NativeFunctionSymbol>(PS, I.offset());
+ AddressToSymbolId.insert({{PS.Segment, PS.CodeOffset}, Id});
return getSymbolById(Id);
}
@@ -418,9 +426,16 @@ SymbolCache::findPublicSymbolBySectOffset(uint32_t Sect, uint32_t Offset) {
consumeError(Sym.takeError());
return nullptr;
}
+
+ // Check if the symbol is already cached.
auto PS = cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym.get()));
+ auto Found = AddressToPublicSymId.find({PS.Segment, PS.Offset});
+ if (Found != AddressToPublicSymId.end())
+ return getSymbolById(Found->second);
+
+ // Otherwise, create a new symbol.
SymIndexId Id = createSymbol<NativePublicSymbol>(PS);
- AddressToPublicSymId.insert({{Sect, Offset}, Id});
+ AddressToPublicSymId.insert({{PS.Segment, PS.Offset}, Id});
return getSymbolById(Id);
}
@@ -435,7 +450,8 @@ SymbolCache::findLineTable(uint16_t Modi) const {
// If there is an error or there are no lines, just return the
// empty vector.
- Expected<ModuleDebugStreamRef> ExpectedModS = getModuleDebugStream(Modi);
+ Expected<ModuleDebugStreamRef> ExpectedModS =
+ Session.getModuleDebugStream(Modi);
if (!ExpectedModS) {
consumeError(ExpectedModS.takeError());
return ModuleLineTable;
@@ -466,11 +482,19 @@ SymbolCache::findLineTable(uint16_t Modi) const {
auto ColIt = Group.Columns.begin();
auto ColsEnd = Group.Columns.end();
+ // Add a line to mark the beginning of this section.
+ uint64_t StartAddr =
+ Session.getVAFromSectOffset(RelocSegment, RelocOffset);
+ LineInfo FirstLine(Group.LineNumbers.front().Flags);
+ uint32_t ColNum =
+ (Lines.hasColumnInfo()) ? Group.Columns.front().StartColumn : 0;
+ Entries.push_back({StartAddr, FirstLine, ColNum, Group.NameIndex, false});
+
for (const LineNumberEntry &LN : Group.LineNumbers) {
uint64_t VA =
Session.getVAFromSectOffset(RelocSegment, RelocOffset + LN.Offset);
LineInfo Line(LN.Flags);
- uint32_t ColNum = 0;
+ ColNum = 0;
if (Lines.hasColumnInfo() && ColIt != ColsEnd) {
ColNum = ColIt->StartColumn;
@@ -480,36 +504,31 @@ SymbolCache::findLineTable(uint16_t Modi) const {
}
// Add a terminal entry line to mark the end of this subsection.
- uint64_t VA = Session.getVAFromSectOffset(
- RelocSegment, RelocOffset + Lines.header()->CodeSize);
+ uint64_t EndAddr = StartAddr + Lines.header()->CodeSize;
LineInfo LastLine(Group.LineNumbers.back().Flags);
- uint32_t ColNum =
- (Lines.hasColumnInfo()) ? Group.Columns.back().StartColumn : 0;
- Entries.push_back({VA, LastLine, ColNum, Group.NameIndex, true});
+ ColNum = (Lines.hasColumnInfo()) ? Group.Columns.back().StartColumn : 0;
+ Entries.push_back({EndAddr, LastLine, ColNum, Group.NameIndex, true});
EntryList.push_back(Entries);
}
}
// Sort EntryList, and add flattened contents to the line table.
- std::sort(EntryList.begin(), EntryList.end(),
- [](const std::vector<LineTableEntry> &LHS,
- const std::vector<LineTableEntry> &RHS) {
- return LHS[0].Addr < RHS[0].Addr;
- });
+ llvm::sort(EntryList, [](const std::vector<LineTableEntry> &LHS,
+ const std::vector<LineTableEntry> &RHS) {
+ return LHS[0].Addr < RHS[0].Addr;
+ });
for (size_t I = 0; I < EntryList.size(); ++I)
- ModuleLineTable.insert(ModuleLineTable.end(), EntryList[I].begin(),
- EntryList[I].end());
+ llvm::append_range(ModuleLineTable, EntryList[I]);
return ModuleLineTable;
}
std::unique_ptr<IPDBEnumLineNumbers>
SymbolCache::findLineNumbersByVA(uint64_t VA, uint32_t Length) const {
- Optional<uint16_t> MaybeModi = getModuleIndexForAddr(VA);
- if (!MaybeModi)
+ uint16_t Modi;
+ if (!Session.moduleIndexForVA(VA, Modi))
return nullptr;
- uint16_t Modi = *MaybeModi;
std::vector<LineTableEntry> Lines = findLineTable(Modi);
if (Lines.empty())
@@ -528,7 +547,8 @@ SymbolCache::findLineNumbersByVA(uint64_t VA, uint32_t Length) const {
--LineIter;
}
- Expected<ModuleDebugStreamRef> ExpectedModS = getModuleDebugStream(Modi);
+ Expected<ModuleDebugStreamRef> ExpectedModS =
+ Session.getModuleDebugStream(Modi);
if (!ExpectedModS) {
consumeError(ExpectedModS.takeError());
return nullptr;
@@ -542,34 +562,8 @@ SymbolCache::findLineNumbersByVA(uint64_t VA, uint32_t Length) const {
// Populate a vector of NativeLineNumbers that have addresses in the given
// address range.
- Optional<uint16_t> EndModi = getModuleIndexForAddr(VA + Length);
- if (!EndModi)
- return nullptr;
std::vector<NativeLineNumber> LineNumbers;
- while (Modi <= *EndModi) {
- // If we reached the end of the current module, increment Modi and get the
- // new line table and checksums array.
- if (LineIter == Lines.end()) {
- ++Modi;
-
- ExpectedModS = getModuleDebugStream(Modi);
- if (!ExpectedModS) {
- consumeError(ExpectedModS.takeError());
- break;
- }
- ExpectedChecksums = ExpectedModS->findChecksumsSubsection();
- if (!ExpectedChecksums) {
- consumeError(ExpectedChecksums.takeError());
- break;
- }
-
- Lines = findLineTable(Modi);
- LineIter = Lines.begin();
-
- if (Lines.empty())
- continue;
- }
-
+ while (LineIter != Lines.end()) {
if (LineIter->IsTerminalEntry) {
++LineIter;
continue;
@@ -587,7 +581,7 @@ SymbolCache::findLineNumbersByVA(uint64_t VA, uint32_t Length) const {
ExpectedChecksums->getArray().at(LineIter->FileNameIndex);
uint32_t SrcFileId = getOrCreateSourceFile(*ChecksumIter);
NativeLineNumber LineNum(Session, LineIter->Line, LineIter->ColumnNumber,
- LineSect, LineOff, LineLength, SrcFileId);
+ LineSect, LineOff, LineLength, SrcFileId, Modi);
LineNumbers.push_back(LineNum);
++LineIter;
}
@@ -636,39 +630,4 @@ SymbolCache::getOrCreateSourceFile(const FileChecksumEntry &Checksums) const {
return Id;
}
-void SymbolCache::parseSectionContribs() {
- if (!Dbi)
- return;
-
- class Visitor : public ISectionContribVisitor {
- NativeSession &Session;
- IMap &AddrMap;
-
- public:
- Visitor(NativeSession &Session, IMap &AddrMap)
- : Session(Session), AddrMap(AddrMap) {}
- void visit(const SectionContrib &C) override {
- if (C.Size == 0)
- return;
-
- uint64_t VA = Session.getVAFromSectOffset(C.ISect, C.Off);
- uint64_t End = VA + C.Size;
-
- // Ignore overlapping sections based on the assumption that a valid
- // PDB file should not have overlaps.
- if (!AddrMap.overlaps(VA, End))
- AddrMap.insert(VA, End, C.Imod);
- }
- void visit(const SectionContrib2 &C) override { visit(C.Base); }
- };
- Visitor V(Session, AddrToModuleIndex);
- Dbi->visitSectionContributions(V);
-}
-
-Optional<uint16_t> SymbolCache::getModuleIndexForAddr(uint64_t Addr) const {
- auto Iter = AddrToModuleIndex.find(Addr);
- if (Iter == AddrToModuleIndex.end())
- return None;
- return Iter.value();
-}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index 51a1f0a544e3..5f4f497690b6 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cstdint>
+#include <numeric>
using namespace llvm;
using namespace llvm::msf;
@@ -41,39 +42,68 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) {
VerHeader = Version;
}
+void TpiStreamBuilder::updateTypeIndexOffsets(ArrayRef<uint16_t> Sizes) {
+ // If we just crossed an 8KB threshold, add a type index offset.
+ for (uint16_t Size : Sizes) {
+ size_t NewSize = TypeRecordBytes + Size;
+ constexpr size_t EightKB = 8 * 1024;
+ if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecordCount == 0) {
+ TypeIndexOffsets.push_back(
+ {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex +
+ TypeRecordCount),
+ ulittle32_t(TypeRecordBytes)});
+ }
+ ++TypeRecordCount;
+ TypeRecordBytes = NewSize;
+ }
+}
+
void TpiStreamBuilder::addTypeRecord(ArrayRef<uint8_t> Record,
Optional<uint32_t> Hash) {
- // If we just crossed an 8KB threshold, add a type index offset.
assert(((Record.size() & 3) == 0) &&
"The type record's size is not a multiple of 4 bytes which will "
"cause misalignment in the output TPI stream!");
- size_t NewSize = TypeRecordBytes + Record.size();
- constexpr size_t EightKB = 8 * 1024;
- if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) {
- TypeIndexOffsets.push_back(
- {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex +
- TypeRecords.size()),
- ulittle32_t(TypeRecordBytes)});
- }
- TypeRecordBytes = NewSize;
+ assert(Record.size() <= codeview::MaxRecordLength);
+ uint16_t OneSize = (uint16_t)Record.size();
+ updateTypeIndexOffsets(makeArrayRef(&OneSize, 1));
- TypeRecords.push_back(Record);
+ TypeRecBuffers.push_back(Record);
+ // FIXME: Require it.
if (Hash)
TypeHashes.push_back(*Hash);
}
+void TpiStreamBuilder::addTypeRecords(ArrayRef<uint8_t> Types,
+ ArrayRef<uint16_t> Sizes,
+ ArrayRef<uint32_t> Hashes) {
+ // Ignore empty type buffers. There should be no hashes or sizes in this case.
+ if (Types.empty()) {
+ assert(Sizes.empty() && Hashes.empty());
+ return;
+ }
+
+ assert(((Types.size() & 3) == 0) &&
+ "The type record's size is not a multiple of 4 bytes which will "
+ "cause misalignment in the output TPI stream!");
+ assert(Sizes.size() == Hashes.size() && "sizes and hashes should be in sync");
+ assert(std::accumulate(Sizes.begin(), Sizes.end(), 0U) == Types.size() &&
+ "sizes of type records should sum to the size of the types");
+ updateTypeIndexOffsets(Sizes);
+
+ TypeRecBuffers.push_back(Types);
+ llvm::append_range(TypeHashes, Hashes);
+}
+
Error TpiStreamBuilder::finalize() {
if (Header)
return Error::success();
TpiStreamHeader *H = Allocator.Allocate<TpiStreamHeader>();
- uint32_t Count = TypeRecords.size();
-
H->Version = VerHeader;
H->HeaderSize = sizeof(TpiStreamHeader);
H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex;
- H->TypeIndexEnd = H->TypeIndexBegin + Count;
+ H->TypeIndexEnd = H->TypeIndexBegin + TypeRecordCount;
H->TypeRecordBytes = TypeRecordBytes;
H->HashStreamIndex = HashStreamIndex;
@@ -104,7 +134,7 @@ uint32_t TpiStreamBuilder::calculateSerializedLength() {
}
uint32_t TpiStreamBuilder::calculateHashBufferSize() const {
- assert((TypeRecords.size() == TypeHashes.size() || TypeHashes.empty()) &&
+ assert((TypeRecordCount == TypeHashes.size() || TypeHashes.empty()) &&
"either all or no type records should have hashes");
return TypeHashes.size() * sizeof(ulittle32_t);
}
@@ -155,7 +185,7 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (auto EC = Writer.writeObject(*Header))
return EC;
- for (auto Rec : TypeRecords) {
+ for (auto Rec : TypeRecBuffers) {
assert(!Rec.empty() && "Attempting to write an empty type record shifts "
"all offsets in the TPI stream!");
assert(((Rec.size() & 3) == 0) &&
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBContext.cpp
index e452f1d4ced7..0ebb70e010d5 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBContext.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBContext.cpp
@@ -86,8 +86,43 @@ DIInliningInfo
PDBContext::getInliningInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Specifier) {
DIInliningInfo InlineInfo;
- DILineInfo Frame = getLineInfoForAddress(Address, Specifier);
- InlineInfo.addFrame(Frame);
+ DILineInfo CurrentLine = getLineInfoForAddress(Address, Specifier);
+
+ // Find the function at this address.
+ std::unique_ptr<PDBSymbol> ParentFunc =
+ Session->findSymbolByAddress(Address.Address, PDB_SymType::Function);
+ if (!ParentFunc) {
+ InlineInfo.addFrame(CurrentLine);
+ return InlineInfo;
+ }
+
+ auto Frames = ParentFunc->findInlineFramesByVA(Address.Address);
+ if (!Frames || Frames->getChildCount() == 0) {
+ InlineInfo.addFrame(CurrentLine);
+ return InlineInfo;
+ }
+
+ while (auto Frame = Frames->getNext()) {
+ uint32_t Length = 1;
+ auto LineNumbers = Frame->findInlineeLinesByVA(Address.Address, Length);
+ if (!LineNumbers || LineNumbers->getChildCount() == 0)
+ break;
+
+ std::unique_ptr<IPDBLineNumber> Line = LineNumbers->getNext();
+ assert(Line);
+
+ DILineInfo LineInfo;
+ LineInfo.FunctionName = Frame->getName();
+ auto SourceFile = Session->getSourceFileById(Line->getSourceFileId());
+ if (SourceFile &&
+ Specifier.FLIKind != DILineInfoSpecifier::FileLineInfoKind::None)
+ LineInfo.FileName = SourceFile->getFileName();
+ LineInfo.Line = Line->getLineNumber();
+ LineInfo.Column = Line->getColumnNumber();
+ InlineInfo.addFrame(LineInfo);
+ }
+
+ InlineInfo.addFrame(CurrentLine);
return InlineInfo;
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
index 354a99476c4b..25962e5152eb 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -118,7 +118,21 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_DataKind &Data) {
raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
const llvm::codeview::CPURegister &CpuReg) {
- if (CpuReg.Cpu == llvm::codeview::CPUType::ARM64) {
+ if (CpuReg.Cpu == llvm::codeview::CPUType::ARMNT) {
+ switch (CpuReg.Reg) {
+#define CV_REGISTERS_ARM
+#define CV_REGISTER(name, val) \
+ case codeview::RegisterId::name: \
+ OS << #name; \
+ return OS;
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM
+
+ default:
+ break;
+ }
+ } else if (CpuReg.Cpu == llvm::codeview::CPUType::ARM64) {
switch (CpuReg.Reg) {
#define CV_REGISTERS_ARM64
#define CV_REGISTER(name, val) \
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
index 8eb3311b09e3..d51091d80933 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
@@ -1,4 +1,4 @@
-//===- PDBInterfaceAnchors.h - defines class anchor funcions ----*- C++ -*-===//
+//===- PDBInterfaceAnchors.h - defines class anchor functions ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
// Class anchors are necessary per the LLVM Coding style guide, to ensure that
// the vtable is only generated in this object file, and not in every object
-// file that incldues the corresponding header.
+// file that includes the corresponding header.
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/IPDBDataStream.h"
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
index 34c8ac41d45b..d6bc7ee9c951 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
@@ -161,10 +161,27 @@ PDBSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
}
std::unique_ptr<IPDBEnumSymbols>
+PDBSymbol::findInlineFramesByVA(uint64_t VA) const {
+ return RawSymbol->findInlineFramesByVA(VA);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
PDBSymbol::findInlineFramesByRVA(uint32_t RVA) const {
return RawSymbol->findInlineFramesByRVA(RVA);
}
+std::unique_ptr<IPDBEnumLineNumbers>
+PDBSymbol::findInlineeLinesByVA(uint64_t VA, uint32_t Length) const {
+ return RawSymbol->findInlineeLinesByVA(VA, Length);
+}
+
+std::unique_ptr<IPDBEnumLineNumbers>
+PDBSymbol::findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const {
+ return RawSymbol->findInlineeLinesByRVA(RVA, Length);
+}
+
+std::string PDBSymbol::getName() const { return RawSymbol->getName(); }
+
std::unique_ptr<IPDBEnumSymbols>
PDBSymbol::getChildStats(TagStats &Stats) const {
std::unique_ptr<IPDBEnumSymbols> Result(findAllChildren());
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/UDTLayout.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/UDTLayout.cpp
index a8e1d0a619ca..55854bb49888 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/UDTLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -289,10 +289,10 @@ void UDTLayoutBase::addChildToLayout(std::unique_ptr<LayoutItemBase> Child) {
UsedBytes |= ChildBytes;
if (ChildBytes.count() > 0) {
- auto Loc = std::upper_bound(LayoutItems.begin(), LayoutItems.end(), Begin,
- [](uint32_t Off, const LayoutItemBase *Item) {
- return (Off < Item->getOffsetInParent());
- });
+ auto Loc = llvm::upper_bound(
+ LayoutItems, Begin, [](uint32_t Off, const LayoutItemBase *Item) {
+ return (Off < Item->getOffsetInParent());
+ });
LayoutItems.insert(Loc, Child.get());
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index 10352237763c..01dc31d84965 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -84,8 +84,10 @@ void DIPrinter::print(const DILineInfo &Info, bool Inlined) {
return;
}
OS << " Filename: " << Filename << "\n";
- if (Info.StartLine)
- OS << "Function start line: " << Info.StartLine << "\n";
+ if (Info.StartLine) {
+ OS << " Function start filename: " << Info.StartFileName << "\n";
+ OS << " Function start line: " << Info.StartLine << "\n";
+ }
OS << " Line: " << Info.Line << "\n";
OS << " Column: " << Info.Column << "\n";
if (Info.Discriminator)
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 84524195fa8a..93d05e4e27bf 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -12,24 +12,15 @@
#include "SymbolizableObjectFile.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolSize.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Error.h"
#include <algorithm>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <system_error>
-#include <utility>
-#include <vector>
using namespace llvm;
using namespace object;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
index 0ba304ee4c61..be3c66df056f 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -15,12 +15,12 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
-#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Error.h"
#include <cstdint>
-#include <map>
#include <memory>
#include <string>
-#include <system_error>
+#include <utility>
+#include <vector>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 051195934aaf..741860562e16 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Config/config.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/PDB/PDB.h"
#include "llvm/DebugInfo/PDB/PDBContext.h"
@@ -286,10 +287,8 @@ bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
}
template <typename ELFT>
-Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> *Obj) {
- if (!Obj)
- return {};
- auto PhdrsOrErr = Obj->program_headers();
+Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> &Obj) {
+ auto PhdrsOrErr = Obj.program_headers();
if (!PhdrsOrErr) {
consumeError(PhdrsOrErr.takeError());
return {};
@@ -298,7 +297,7 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> *Obj) {
if (P.p_type != ELF::PT_NOTE)
continue;
Error Err = Error::success();
- for (auto N : Obj->notes(P, Err))
+ for (auto N : Obj.notes(P, Err))
if (N.getType() == ELF::NT_GNU_BUILD_ID && N.getName() == ELF::ELF_NOTE_GNU)
return N.getDesc();
consumeError(std::move(Err));
@@ -556,9 +555,9 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
#if 0
using namespace pdb;
std::unique_ptr<IPDBSession> Session;
- PDB_ReaderType ReaderType = Opts.UseNativePDBReader
- ? PDB_ReaderType::Native
- : PDB_ReaderType::DIA;
+
+ PDB_ReaderType ReaderType =
+ Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(),
Session)) {
Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
@@ -596,10 +595,8 @@ StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
if (Front != '?') {
size_t AtPos = SymbolName.rfind('@');
if (AtPos != StringRef::npos &&
- std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(),
- [](char C) { return C >= '0' && C <= '9'; })) {
+ all_of(drop_begin(SymbolName, AtPos + 1), isDigit))
SymbolName = SymbolName.substr(0, AtPos);
- }
}
// Remove any ending '@' for vectorcall.
diff --git a/contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp b/contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp
index 71dafa0b2e43..000f75b6a977 100644
--- a/contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp
+++ b/contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp
@@ -31,6 +31,6 @@ std::string llvm::demangle(const std::string &MangledName) {
return MangledName;
std::string Ret = Demangled;
- free(Demangled);
+ std::free(Demangled);
return Ret;
}
diff --git a/contrib/llvm-project/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/contrib/llvm-project/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index 9cee975231a2..8b15ffcee778 100644
--- a/contrib/llvm-project/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -649,5 +649,4 @@ void SpecialTableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
TargetName->output(OS, Flags);
OS << "'}";
}
- return;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index d8bd671c6661..c8bbf0bcdfda 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -53,11 +53,6 @@ ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
std::shared_ptr<LegacyJITSymbolResolver> Resolver,
std::unique_ptr<TargetMachine> TM) = nullptr;
-ExecutionEngine *(*ExecutionEngine::OrcMCJITReplacementCtor)(
- std::string *ErrorStr, std::shared_ptr<MCJITMemoryManager> MemMgr,
- std::shared_ptr<LegacyJITSymbolResolver> Resolver,
- std::unique_ptr<TargetMachine> TM) = nullptr;
-
ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
std::string *ErrorStr) =nullptr;
@@ -476,8 +471,7 @@ EngineBuilder::EngineBuilder() : EngineBuilder(nullptr) {}
EngineBuilder::EngineBuilder(std::unique_ptr<Module> M)
: M(std::move(M)), WhichEngine(EngineKind::Either), ErrorStr(nullptr),
- OptLevel(CodeGenOpt::Default), MemMgr(nullptr), Resolver(nullptr),
- UseOrcMCJITReplacement(false) {
+ OptLevel(CodeGenOpt::Default), MemMgr(nullptr), Resolver(nullptr) {
// IR module verification is enabled by default in debug builds, and disabled
// by default in release builds.
#ifndef NDEBUG
@@ -540,12 +534,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
}
ExecutionEngine *EE = nullptr;
- if (ExecutionEngine::OrcMCJITReplacementCtor && UseOrcMCJITReplacement) {
- EE = ExecutionEngine::OrcMCJITReplacementCtor(ErrorStr, std::move(MemMgr),
- std::move(Resolver),
- std::move(TheTM));
- EE->addModule(std::move(M));
- } else if (ExecutionEngine::MCJITCtor)
+ if (ExecutionEngine::MCJITCtor)
EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, std::move(MemMgr),
std::move(Resolver), std::move(TheTM));
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 1ebc820a8b49..3f75012f5cf9 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "IntelJITEventsWrapper.h"
+#include "ittnotify.h"
#include "llvm-c/ExecutionEngine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -36,6 +37,86 @@ using namespace llvm::object;
namespace {
+class IntelIttnotifyInfo {
+ std::string ModuleName;
+ std::vector<std::string> SectionNamesVector;
+ std::vector<__itt_section_info> SectionInfoVector;
+ __itt_module_object *ModuleObject;
+ IntelJITEventsWrapper &WrapperRef;
+
+public:
+ IntelIttnotifyInfo(IntelJITEventsWrapper &Wrapper)
+ : ModuleObject(NULL), WrapperRef(Wrapper){};
+ ~IntelIttnotifyInfo() { delete ModuleObject; };
+
+ void setModuleName(const char *Name) { ModuleName = std::string(Name); }
+
+ const char *getModuleName() { return ModuleName.c_str(); }
+
+ void setModuleObject(__itt_module_object *ModuleObj) {
+ ModuleObject = ModuleObj;
+ }
+
+ __itt_module_object *getModuleObject() { return ModuleObject; }
+
+ __itt_section_info *getSectionInfoVectorBegin() {
+ if (SectionInfoVector.size())
+ return &SectionInfoVector[0];
+ return NULL;
+ }
+
+ void reportSection(llvm::IttEventType EventType, const char *SectionName,
+ unsigned int SectionSize) {
+ WrapperRef.iJitIttNotifyInfo(EventType, SectionName, SectionSize);
+ }
+
+ int fillSectionInformation(const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) {
+
+ int SectionCounter = 0;
+
+ for (auto &Section : Obj.sections()) {
+ uint64_t SectionLoadAddr = L.getSectionLoadAddress(Section);
+ if (SectionLoadAddr) {
+ object::ELFSectionRef ElfSection(Section);
+
+ __itt_section_info SectionInfo;
+ memset(&SectionInfo, 0, sizeof(SectionInfo));
+ SectionInfo.start_addr = reinterpret_cast<void *>(SectionLoadAddr);
+ SectionInfo.file_offset = ElfSection.getOffset();
+ SectionInfo.flags = ElfSection.getFlags();
+
+ StringRef SectionName("");
+ auto SectionNameOrError = ElfSection.getName();
+ if (SectionNameOrError)
+ SectionName = *SectionNameOrError;
+
+ SectionNamesVector.push_back(SectionName.str());
+ SectionInfo.size = ElfSection.getSize();
+ reportSection(llvm::LoadBinarySection, SectionName.str().c_str(),
+ SectionInfo.size);
+
+ if (ElfSection.isBSS()) {
+ SectionInfo.type = itt_section_type_bss;
+ } else if (ElfSection.isData()) {
+ SectionInfo.type = itt_section_type_data;
+ } else if (ElfSection.isText()) {
+ SectionInfo.type = itt_section_type_text;
+ }
+ SectionInfoVector.push_back(SectionInfo);
+ ++SectionCounter;
+ }
+ }
+ // Hereinafter: don't change SectionNamesVector content to avoid vector
+ // reallocation - reallocation invalidates all the references, pointers, and
+ // iterators referring to the elements in the sequence.
+ for (int I = 0; I < SectionCounter; ++I) {
+ SectionInfoVector[I].name = SectionNamesVector[I].c_str();
+ }
+ return SectionCounter;
+ }
+};
+
class IntelJITEventListener : public JITEventListener {
typedef DenseMap<void*, unsigned int> MethodIDMap;
@@ -48,6 +129,8 @@ class IntelJITEventListener : public JITEventListener {
ObjectMap LoadedObjectMap;
std::map<ObjectKey, OwningBinary<ObjectFile>> DebugObjects;
+ std::map<ObjectKey, std::unique_ptr<IntelIttnotifyInfo>> KeyToIttnotify;
+
public:
IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) {
Wrapper.reset(libraryWrapper);
@@ -95,146 +178,205 @@ static iJIT_Method_Load FunctionDescToIntelJITFormat(
return Result;
}
+int getBackwardCompatibilityMode() {
+
+ char *BackwardCompatibilityEnv = getenv("INTEL_JIT_BACKWARD_COMPATIBILITY");
+ int BackwardCompatibilityMode = 0;
+ if (BackwardCompatibilityEnv) {
+ StringRef(BackwardCompatibilityEnv)
+ .getAsInteger(10, BackwardCompatibilityMode);
+ }
+ return BackwardCompatibilityMode;
+}
+
void IntelJITEventListener::notifyObjectLoaded(
ObjectKey Key, const ObjectFile &Obj,
const RuntimeDyld::LoadedObjectInfo &L) {
- OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
- const ObjectFile *DebugObj = DebugObjOwner.getBinary();
- if (!DebugObj)
- return;
-
- // Get the address of the object image for use as a unique identifier
- const void* ObjData = DebugObj->getData().data();
- std::unique_ptr<DIContext> Context = DWARFContext::create(*DebugObj);
- MethodAddressVector Functions;
-
- // Use symbol info to iterate functions in the object.
- for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(*DebugObj)) {
- SymbolRef Sym = P.first;
- std::vector<LineNumberInfo> LineInfo;
- std::string SourceFileName;
-
- Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
- if (!SymTypeOrErr) {
- // TODO: Actually report errors helpfully.
- consumeError(SymTypeOrErr.takeError());
- continue;
+ int BackwardCompatibilityMode = getBackwardCompatibilityMode();
+ if (BackwardCompatibilityMode == 0) {
+ if (Obj.isELF()) {
+ std::unique_ptr<IntelIttnotifyInfo> ModuleIttnotify =
+ std::make_unique<IntelIttnotifyInfo>(*Wrapper);
+ ModuleIttnotify->setModuleName(
+ StringRef(llvm::utohexstr(
+ MD5Hash(Obj.getMemoryBufferRef().getBuffer()), true))
+ .str()
+ .c_str());
+
+ __itt_module_object *ModuleObject = new __itt_module_object();
+ ModuleObject->module_name = ModuleIttnotify->getModuleName();
+ ModuleObject->module_size = Obj.getMemoryBufferRef().getBufferSize();
+ Wrapper->iJitIttNotifyInfo(llvm::LoadBinaryModule,
+ ModuleObject->module_name,
+ ModuleObject->module_size);
+ ModuleObject->module_type = __itt_module_type_elf;
+ ModuleObject->section_number =
+ ModuleIttnotify->fillSectionInformation(Obj, L);
+ ModuleObject->module_buffer =
+ (void *)const_cast<char *>(Obj.getMemoryBufferRef().getBufferStart());
+ ModuleObject->module_id =
+ __itt_id_make((void *)&(*ModuleObject), ModuleObject->module_size);
+ ModuleObject->section_array =
+ ModuleIttnotify->getSectionInfoVectorBegin();
+ ModuleIttnotify->setModuleObject(ModuleObject);
+
+ __itt_module_load_with_sections(ModuleObject);
+
+ KeyToIttnotify[Key] = std::move(ModuleIttnotify);
}
- SymbolRef::Type SymType = *SymTypeOrErr;
- if (SymType != SymbolRef::ST_Function)
- continue;
-
- Expected<StringRef> Name = Sym.getName();
- if (!Name) {
- // TODO: Actually report errors helpfully.
- consumeError(Name.takeError());
- continue;
+ } else if (BackwardCompatibilityMode == 1) {
+
+ OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
+ const ObjectFile *DebugObj = DebugObjOwner.getBinary();
+ if (!DebugObj)
+ return;
+
+ // Get the address of the object image for use as a unique identifier
+ const void *ObjData = DebugObj->getData().data();
+ std::unique_ptr<DIContext> Context = DWARFContext::create(*DebugObj);
+ MethodAddressVector Functions;
+
+ // Use symbol info to iterate functions in the object.
+ for (const std::pair<SymbolRef, uint64_t> &P :
+ computeSymbolSizes(*DebugObj)) {
+ SymbolRef Sym = P.first;
+ std::vector<LineNumberInfo> LineInfo;
+ std::string SourceFileName;
+
+ Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
+ if (!SymTypeOrErr) {
+ // TODO: Actually report errors helpfully.
+ consumeError(SymTypeOrErr.takeError());
+ continue;
+ }
+ SymbolRef::Type SymType = *SymTypeOrErr;
+ if (SymType != SymbolRef::ST_Function)
+ continue;
+
+ Expected<StringRef> Name = Sym.getName();
+ if (!Name) {
+ // TODO: Actually report errors helpfully.
+ consumeError(Name.takeError());
+ continue;
+ }
+
+ Expected<uint64_t> AddrOrErr = Sym.getAddress();
+ if (!AddrOrErr) {
+ // TODO: Actually report errors helpfully.
+ consumeError(AddrOrErr.takeError());
+ continue;
+ }
+ uint64_t Addr = *AddrOrErr;
+ uint64_t Size = P.second;
+
+ auto SecOrErr = Sym.getSection();
+ if (!SecOrErr) {
+ // TODO: Actually report errors helpfully.
+ consumeError(SecOrErr.takeError());
+ continue;
+ }
+ object::section_iterator Sec = *SecOrErr;
+ if (Sec == Obj.section_end())
+ continue;
+ uint64_t Index = Sec->getIndex();
+
+ // Record this address in a local vector
+ Functions.push_back((void *)Addr);
+
+ // Build the function loaded notification message
+ iJIT_Method_Load FunctionMessage =
+ FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size);
+ DILineInfoTable Lines =
+ Context->getLineInfoForAddressRange({Addr, Index}, Size);
+ DILineInfoTable::iterator Begin = Lines.begin();
+ DILineInfoTable::iterator End = Lines.end();
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+ LineInfo.push_back(
+ DILineInfoToIntelJITFormat((uintptr_t)Addr, It->first, It->second));
+ }
+ if (LineInfo.size() == 0) {
+ FunctionMessage.source_file_name = 0;
+ FunctionMessage.line_number_size = 0;
+ FunctionMessage.line_number_table = 0;
+ } else {
+ // Source line information for the address range is provided as
+ // a code offset for the start of the corresponding sub-range and
+ // a source line. JIT API treats offsets in LineNumberInfo structures
+ // as the end of the corresponding code region. The start of the code
+ // is taken from the previous element. Need to shift the elements.
+
+ LineNumberInfo last = LineInfo.back();
+ last.Offset = FunctionMessage.method_size;
+ LineInfo.push_back(last);
+ for (size_t i = LineInfo.size() - 2; i > 0; --i)
+ LineInfo[i].LineNumber = LineInfo[i - 1].LineNumber;
+
+ SourceFileName = Lines.front().second.FileName;
+ FunctionMessage.source_file_name =
+ const_cast<char *>(SourceFileName.c_str());
+ FunctionMessage.line_number_size = LineInfo.size();
+ FunctionMessage.line_number_table = &*LineInfo.begin();
+ }
+
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ &FunctionMessage);
+ MethodIDs[(void *)Addr] = FunctionMessage.method_id;
}
- Expected<uint64_t> AddrOrErr = Sym.getAddress();
- if (!AddrOrErr) {
- // TODO: Actually report errors helpfully.
- consumeError(AddrOrErr.takeError());
- continue;
- }
- uint64_t Addr = *AddrOrErr;
- uint64_t Size = P.second;
-
- auto SecOrErr = Sym.getSection();
- if (!SecOrErr) {
- // TODO: Actually report errors helpfully.
- consumeError(SecOrErr.takeError());
- continue;
- }
- object::section_iterator Sec = *SecOrErr;
- if (Sec == Obj.section_end())
- continue;
- uint64_t Index = Sec->getIndex();
-
- // Record this address in a local vector
- Functions.push_back((void*)Addr);
-
- // Build the function loaded notification message
- iJIT_Method_Load FunctionMessage =
- FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size);
- DILineInfoTable Lines =
- Context->getLineInfoForAddressRange({Addr, Index}, Size);
- DILineInfoTable::iterator Begin = Lines.begin();
- DILineInfoTable::iterator End = Lines.end();
- for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
- LineInfo.push_back(
- DILineInfoToIntelJITFormat((uintptr_t)Addr, It->first, It->second));
- }
- if (LineInfo.size() == 0) {
- FunctionMessage.source_file_name = 0;
- FunctionMessage.line_number_size = 0;
- FunctionMessage.line_number_table = 0;
- } else {
- // Source line information for the address range is provided as
- // a code offset for the start of the corresponding sub-range and
- // a source line. JIT API treats offsets in LineNumberInfo structures
- // as the end of the corresponding code region. The start of the code
- // is taken from the previous element. Need to shift the elements.
-
- LineNumberInfo last = LineInfo.back();
- last.Offset = FunctionMessage.method_size;
- LineInfo.push_back(last);
- for (size_t i = LineInfo.size() - 2; i > 0; --i)
- LineInfo[i].LineNumber = LineInfo[i - 1].LineNumber;
-
- SourceFileName = Lines.front().second.FileName;
- FunctionMessage.source_file_name =
- const_cast<char *>(SourceFileName.c_str());
- FunctionMessage.line_number_size = LineInfo.size();
- FunctionMessage.line_number_table = &*LineInfo.begin();
- }
-
- Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
- &FunctionMessage);
- MethodIDs[(void*)Addr] = FunctionMessage.method_id;
+ // To support object unload notification, we need to keep a list of
+ // registered function addresses for each loaded object. We will
+ // use the MethodIDs map to get the registered ID for each function.
+ LoadedObjectMap[ObjData] = Functions;
+ DebugObjects[Key] = std::move(DebugObjOwner);
}
-
- // To support object unload notification, we need to keep a list of
- // registered function addresses for each loaded object. We will
- // use the MethodIDs map to get the registered ID for each function.
- LoadedObjectMap[ObjData] = Functions;
- DebugObjects[Key] = std::move(DebugObjOwner);
}
void IntelJITEventListener::notifyFreeingObject(ObjectKey Key) {
- // This object may not have been registered with the listener. If it wasn't,
- // bail out.
- if (DebugObjects.find(Key) == DebugObjects.end())
- return;
-
- // Get the address of the object image for use as a unique identifier
- const ObjectFile &DebugObj = *DebugObjects[Key].getBinary();
- const void* ObjData = DebugObj.getData().data();
-
- // Get the object's function list from LoadedObjectMap
- ObjectMap::iterator OI = LoadedObjectMap.find(ObjData);
- if (OI == LoadedObjectMap.end())
- return;
- MethodAddressVector& Functions = OI->second;
-
- // Walk the function list, unregistering each function
- for (MethodAddressVector::iterator FI = Functions.begin(),
- FE = Functions.end();
- FI != FE;
- ++FI) {
- void* FnStart = const_cast<void*>(*FI);
- MethodIDMap::iterator MI = MethodIDs.find(FnStart);
- if (MI != MethodIDs.end()) {
- Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
- &MI->second);
- MethodIDs.erase(MI);
+
+ int BackwardCompatibilityMode = getBackwardCompatibilityMode();
+ if (BackwardCompatibilityMode == 0) {
+ if (KeyToIttnotify.find(Key) == KeyToIttnotify.end())
+ return;
+ __itt_module_unload_with_sections(KeyToIttnotify[Key]->getModuleObject());
+ Wrapper->iJitIttNotifyInfo(
+ llvm::UnloadBinaryModule,
+ KeyToIttnotify[Key]->getModuleObject()->module_name,
+ KeyToIttnotify[Key]->getModuleObject()->module_size);
+ KeyToIttnotify.erase(Key);
+ } else if (BackwardCompatibilityMode == 1) {
+ // This object may not have been registered with the listener. If it wasn't,
+ // bail out.
+ if (DebugObjects.find(Key) == DebugObjects.end())
+ return;
+
+ // Get the address of the object image for use as a unique identifier
+ const ObjectFile &DebugObj = *DebugObjects[Key].getBinary();
+ const void *ObjData = DebugObj.getData().data();
+
+ // Get the object's function list from LoadedObjectMap
+ ObjectMap::iterator OI = LoadedObjectMap.find(ObjData);
+ if (OI == LoadedObjectMap.end())
+ return;
+ MethodAddressVector &Functions = OI->second;
+
+ // Walk the function list, unregistering each function
+ for (MethodAddressVector::iterator FI = Functions.begin(),
+ FE = Functions.end();
+ FI != FE; ++FI) {
+ void *FnStart = const_cast<void *>(*FI);
+ MethodIDMap::iterator MI = MethodIDs.find(FnStart);
+ if (MI != MethodIDs.end()) {
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
+ &MI->second);
+ MethodIDs.erase(MI);
+ }
}
- }
- // Erase the object from LoadedObjectMap
- LoadedObjectMap.erase(OI);
- DebugObjects.erase(Key);
+ // Erase the object from LoadedObjectMap
+ LoadedObjectMap.erase(OI);
+ DebugObjects.erase(Key);
+ }
}
} // anonymous namespace.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
index 68699c6a2200..088b33b798a3 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
@@ -21,10 +21,18 @@
namespace llvm {
+typedef enum {
+ LoadBinaryModule,
+ LoadBinarySection,
+ UnloadBinaryModule,
+ UnloadBinarySection
+} IttEventType;
+
class IntelJITEventsWrapper {
// Function pointer types for testing implementation of Intel jitprofiling
// library
typedef int (*NotifyEventPtr)(iJIT_JVM_EVENT, void*);
+ typedef int (*IttnotifyInfoPtr)(IttEventType, const char *, unsigned int);
typedef void (*RegisterCallbackExPtr)(void *, iJIT_ModeChangedEx );
typedef iJIT_IsProfilingActiveFlags (*IsProfilingActivePtr)(void);
typedef void (*FinalizeThreadPtr)(void);
@@ -32,6 +40,7 @@ class IntelJITEventsWrapper {
typedef unsigned int (*GetNewMethodIDPtr)(void);
NotifyEventPtr NotifyEventFunc;
+ IttnotifyInfoPtr IttnotifyInfoFunc;
RegisterCallbackExPtr RegisterCallbackExFunc;
IsProfilingActivePtr IsProfilingActiveFunc;
GetNewMethodIDPtr GetNewMethodIDFunc;
@@ -42,23 +51,22 @@ public:
}
IntelJITEventsWrapper()
- : NotifyEventFunc(::iJIT_NotifyEvent),
- RegisterCallbackExFunc(::iJIT_RegisterCallbackEx),
- IsProfilingActiveFunc(::iJIT_IsProfilingActive),
- GetNewMethodIDFunc(::iJIT_GetNewMethodID) {
- }
+ : NotifyEventFunc(::iJIT_NotifyEvent), IttnotifyInfoFunc(0),
+ RegisterCallbackExFunc(::iJIT_RegisterCallbackEx),
+ IsProfilingActiveFunc(::iJIT_IsProfilingActive),
+ GetNewMethodIDFunc(::iJIT_GetNewMethodID) {}
IntelJITEventsWrapper(NotifyEventPtr NotifyEventImpl,
- RegisterCallbackExPtr RegisterCallbackExImpl,
- IsProfilingActivePtr IsProfilingActiveImpl,
- FinalizeThreadPtr FinalizeThreadImpl,
- FinalizeProcessPtr FinalizeProcessImpl,
- GetNewMethodIDPtr GetNewMethodIDImpl)
- : NotifyEventFunc(NotifyEventImpl),
- RegisterCallbackExFunc(RegisterCallbackExImpl),
- IsProfilingActiveFunc(IsProfilingActiveImpl),
- GetNewMethodIDFunc(GetNewMethodIDImpl) {
- }
+ IttnotifyInfoPtr IttnotifyInfoImpl,
+ RegisterCallbackExPtr RegisterCallbackExImpl,
+ IsProfilingActivePtr IsProfilingActiveImpl,
+ FinalizeThreadPtr FinalizeThreadImpl,
+ FinalizeProcessPtr FinalizeProcessImpl,
+ GetNewMethodIDPtr GetNewMethodIDImpl)
+ : NotifyEventFunc(NotifyEventImpl), IttnotifyInfoFunc(IttnotifyInfoImpl),
+ RegisterCallbackExFunc(RegisterCallbackExImpl),
+ IsProfilingActiveFunc(IsProfilingActiveImpl),
+ GetNewMethodIDFunc(GetNewMethodIDImpl) {}
// Sends an event announcing that a function has been emitted
// return values are event-specific. See Intel documentation for details.
@@ -68,6 +76,13 @@ public:
return NotifyEventFunc(EventType, EventSpecificData);
}
+ int iJitIttNotifyInfo(IttEventType EventType, const char *Name,
+ unsigned int Size) {
+ if (!IttnotifyInfoFunc)
+ return -1;
+ return IttnotifyInfoFunc(EventType, Name, Size);
+ }
+
// Registers a callback function to receive notice of profiling state changes
void iJIT_RegisterCallbackEx(void *UserData,
iJIT_ModeChangedEx NewModeCallBackFuncEx) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index cb1b35d62388..3aa77557862e 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -419,7 +419,7 @@ static GenericValue lle_X_printf(FunctionType *FT,
char Buffer[10000];
std::vector<GenericValue> NewArgs;
NewArgs.push_back(PTOGV((void*)&Buffer[0]));
- NewArgs.insert(NewArgs.end(), Args.begin(), Args.end());
+ llvm::append_range(NewArgs, Args);
GenericValue GV = lle_X_sprintf(FT, NewArgs);
outs() << Buffer;
return GV;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
index f1114e92c360..3602601287f4 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -10,6 +10,8 @@
#include "EHFrameSupportImpl.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
#include "llvm/Support/DynamicLibrary.h"
#define DEBUG_TYPE "jitlink"
@@ -117,10 +119,10 @@ Error EHFrameSplitter::processBlock(LinkGraph &G, Block &B,
}
EHFrameEdgeFixer::EHFrameEdgeFixer(StringRef EHFrameSectionName,
- Edge::Kind FDEToCIE, Edge::Kind FDEToPCBegin,
- Edge::Kind FDEToLSDA)
- : EHFrameSectionName(EHFrameSectionName), FDEToCIE(FDEToCIE),
- FDEToPCBegin(FDEToPCBegin), FDEToLSDA(FDEToLSDA) {}
+ unsigned PointerSize, Edge::Kind Delta64,
+ Edge::Kind Delta32, Edge::Kind NegDelta32)
+ : EHFrameSectionName(EHFrameSectionName), PointerSize(PointerSize),
+ Delta64(Delta64), Delta32(Delta32), NegDelta32(NegDelta32) {}
Error EHFrameEdgeFixer::operator()(LinkGraph &G) {
auto *EHFrame = G.findSectionByName(EHFrameSectionName);
@@ -133,6 +135,11 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) {
return Error::success();
}
+ // Check that we support the graph's pointer size.
+ if (G.getPointerSize() != 4 && G.getPointerSize() != 8)
+ return make_error<JITLinkError>(
+ "EHFrameEdgeFixer only supports 32 and 64 bit targets");
+
LLVM_DEBUG({
dbgs() << "EHFrameEdgeFixer: Processing " << EHFrameSectionName << "...\n";
});
@@ -257,7 +264,6 @@ Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) {
Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
size_t RecordOffset, size_t RecordLength,
size_t CIEDeltaFieldOffset) {
- using namespace dwarf;
LLVM_DEBUG(dbgs() << " Record is CIE\n");
@@ -328,11 +334,12 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
uint8_t LSDAPointerEncoding;
if (auto Err = RecordReader.readInteger(LSDAPointerEncoding))
return Err;
- if (LSDAPointerEncoding != (DW_EH_PE_pcrel | DW_EH_PE_absptr))
+ if (!isSupportedPointerEncoding(LSDAPointerEncoding))
return make_error<JITLinkError>(
"Unsupported LSDA pointer encoding " +
formatv("{0:x2}", LSDAPointerEncoding) + " in CIE at " +
formatv("{0:x16}", CIESymbol.getAddress()));
+ CIEInfo.LSDAPointerEncoding = LSDAPointerEncoding;
break;
}
case 'P': {
@@ -340,7 +347,8 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
if (auto Err = RecordReader.readInteger(PersonalityPointerEncoding))
return Err;
if (PersonalityPointerEncoding !=
- (DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4))
+ (dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4))
return make_error<JITLinkError>(
"Unspported personality pointer "
"encoding " +
@@ -355,12 +363,12 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
uint8_t FDEPointerEncoding;
if (auto Err = RecordReader.readInteger(FDEPointerEncoding))
return Err;
- if (FDEPointerEncoding != (DW_EH_PE_pcrel | DW_EH_PE_absptr))
+ if (!isSupportedPointerEncoding(FDEPointerEncoding))
return make_error<JITLinkError>(
- "Unsupported FDE address pointer "
- "encoding " +
+ "Unsupported FDE pointer encoding " +
formatv("{0:x2}", FDEPointerEncoding) + " in CIE at " +
formatv("{0:x16}", CIESymbol.getAddress()));
+ CIEInfo.FDEPointerEncoding = FDEPointerEncoding;
break;
}
default:
@@ -417,7 +425,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
else
return CIEInfoOrErr.takeError();
assert(CIEInfo->CIESymbol && "CIEInfo has no CIE symbol set");
- B.addEdge(FDEToCIE, RecordOffset + CIEDeltaFieldOffset,
+ B.addEdge(NegDelta32, RecordOffset + CIEDeltaFieldOffset,
*CIEInfo->CIESymbol, 0);
} else {
LLVM_DEBUG({
@@ -444,11 +452,13 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
JITTargetAddress PCBeginFieldOffset = RecordReader.getOffset();
auto PCEdgeItr = BlockEdges.find(RecordOffset + PCBeginFieldOffset);
if (PCEdgeItr == BlockEdges.end()) {
- auto PCBeginDelta = readAbsolutePointer(PC.G, RecordReader);
- if (!PCBeginDelta)
- return PCBeginDelta.takeError();
- JITTargetAddress PCBegin =
- RecordAddress + PCBeginFieldOffset + *PCBeginDelta;
+ auto PCBeginPtrInfo =
+ readEncodedPointer(CIEInfo->FDEPointerEncoding,
+ RecordAddress + PCBeginFieldOffset, RecordReader);
+ if (!PCBeginPtrInfo)
+ return PCBeginPtrInfo.takeError();
+ JITTargetAddress PCBegin = PCBeginPtrInfo->first;
+ Edge::Kind PCBeginEdgeKind = PCBeginPtrInfo->second;
LLVM_DEBUG({
dbgs() << " Adding edge at "
<< formatv("{0:x16}", RecordAddress + PCBeginFieldOffset)
@@ -457,7 +467,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
auto PCBeginSym = getOrCreateSymbol(PC, PCBegin);
if (!PCBeginSym)
return PCBeginSym.takeError();
- B.addEdge(FDEToPCBegin, RecordOffset + PCBeginFieldOffset, *PCBeginSym,
+ B.addEdge(PCBeginEdgeKind, RecordOffset + PCBeginFieldOffset, *PCBeginSym,
0);
PCBeginBlock = &PCBeginSym->getBlock();
} else {
@@ -479,38 +489,42 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
" points at external block");
}
PCBeginBlock = &EI.Target->getBlock();
- if (auto Err = RecordReader.skip(PC.G.getPointerSize()))
+ if (auto Err = RecordReader.skip(
+ getPointerEncodingDataSize(CIEInfo->FDEPointerEncoding)))
return Err;
}
// Add a keep-alive edge from the FDE target to the FDE to ensure that the
// FDE is kept alive if its target is.
assert(PCBeginBlock && "PC-begin block not recorded");
+ LLVM_DEBUG({
+ dbgs() << " Adding keep-alive edge from target at "
+ << formatv("{0:x16}", PCBeginBlock->getAddress()) << " to FDE at "
+ << formatv("{0:x16}", RecordAddress) << "\n";
+ });
PCBeginBlock->addEdge(Edge::KeepAlive, 0, FDESymbol, 0);
}
// Skip over the PC range size field.
- if (auto Err = RecordReader.skip(PC.G.getPointerSize()))
+ if (auto Err = RecordReader.skip(
+ getPointerEncodingDataSize(CIEInfo->FDEPointerEncoding)))
return Err;
if (CIEInfo->FDEsHaveLSDAField) {
uint64_t AugmentationDataSize;
if (auto Err = RecordReader.readULEB128(AugmentationDataSize))
return Err;
- if (AugmentationDataSize != PC.G.getPointerSize())
- return make_error<JITLinkError>(
- "Unexpected FDE augmentation data size (expected " +
- Twine(PC.G.getPointerSize()) + ", got " +
- Twine(AugmentationDataSize) + ") for FDE at " +
- formatv("{0:x16}", RecordAddress));
JITTargetAddress LSDAFieldOffset = RecordReader.getOffset();
auto LSDAEdgeItr = BlockEdges.find(RecordOffset + LSDAFieldOffset);
if (LSDAEdgeItr == BlockEdges.end()) {
- auto LSDADelta = readAbsolutePointer(PC.G, RecordReader);
- if (!LSDADelta)
- return LSDADelta.takeError();
- JITTargetAddress LSDA = RecordAddress + LSDAFieldOffset + *LSDADelta;
+ auto LSDAPointerInfo =
+ readEncodedPointer(CIEInfo->LSDAPointerEncoding,
+ RecordAddress + LSDAFieldOffset, RecordReader);
+ if (!LSDAPointerInfo)
+ return LSDAPointerInfo.takeError();
+ JITTargetAddress LSDA = LSDAPointerInfo->first;
+ Edge::Kind LSDAEdgeKind = LSDAPointerInfo->second;
auto LSDASym = getOrCreateSymbol(PC, LSDA);
if (!LSDASym)
return LSDASym.takeError();
@@ -519,7 +533,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
<< formatv("{0:x16}", RecordAddress + LSDAFieldOffset)
<< " to LSDA at " << formatv("{0:x16}", LSDA) << "\n";
});
- B.addEdge(FDEToLSDA, RecordOffset + LSDAFieldOffset, *LSDASym, 0);
+ B.addEdge(LSDAEdgeKind, RecordOffset + LSDAFieldOffset, *LSDASym, 0);
} else {
LLVM_DEBUG({
auto &EI = LSDAEdgeItr->second;
@@ -530,7 +544,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
dbgs() << " + " << formatv("{0:x16}", EI.Addend);
dbgs() << "\n";
});
- if (auto Err = RecordReader.skip(PC.G.getPointerSize()))
+ if (auto Err = RecordReader.skip(AugmentationDataSize))
return Err;
}
} else {
@@ -581,23 +595,110 @@ EHFrameEdgeFixer::parseAugmentationString(BinaryStreamReader &RecordReader) {
return std::move(AugInfo);
}
-Expected<JITTargetAddress>
-EHFrameEdgeFixer::readAbsolutePointer(LinkGraph &G,
- BinaryStreamReader &RecordReader) {
+bool EHFrameEdgeFixer::isSupportedPointerEncoding(uint8_t PointerEncoding) {
+ using namespace dwarf;
+
+ // We only support PC-rel for now.
+ if ((PointerEncoding & 0x70) != DW_EH_PE_pcrel)
+ return false;
+
+ // readEncodedPointer does not handle indirect.
+ if (PointerEncoding & DW_EH_PE_indirect)
+ return false;
+
+ // Supported datatypes.
+ switch (PointerEncoding & 0xf) {
+ case DW_EH_PE_absptr:
+ case DW_EH_PE_udata4:
+ case DW_EH_PE_udata8:
+ case DW_EH_PE_sdata4:
+ case DW_EH_PE_sdata8:
+ return true;
+ }
+
+ return false;
+}
+
+unsigned EHFrameEdgeFixer::getPointerEncodingDataSize(uint8_t PointerEncoding) {
+ using namespace dwarf;
+
+ assert(isSupportedPointerEncoding(PointerEncoding) &&
+ "Unsupported pointer encoding");
+ switch (PointerEncoding & 0xf) {
+ case DW_EH_PE_absptr:
+ return PointerSize;
+ case DW_EH_PE_udata4:
+ case DW_EH_PE_sdata4:
+ return 4;
+ case DW_EH_PE_udata8:
+ case DW_EH_PE_sdata8:
+ return 8;
+ default:
+ llvm_unreachable("Unsupported encoding");
+ }
+}
+
+Expected<std::pair<JITTargetAddress, Edge::Kind>>
+EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding,
+ JITTargetAddress PointerFieldAddress,
+ BinaryStreamReader &RecordReader) {
static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t),
"Result must be able to hold a uint64_t");
+ assert(isSupportedPointerEncoding(PointerEncoding) &&
+ "Unsupported pointer encoding");
+
+ using namespace dwarf;
+
+ // Isolate data type, remap absptr to udata4 or udata8. This relies on us
+ // having verified that the graph uses 32-bit or 64-bit pointers only at the
+ // start of this pass.
+ uint8_t EffectiveType = PointerEncoding & 0xf;
+ if (EffectiveType == DW_EH_PE_absptr)
+ EffectiveType = (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4;
+
JITTargetAddress Addr;
- if (G.getPointerSize() == 8) {
- if (auto Err = RecordReader.readInteger(Addr))
+ Edge::Kind PointerEdgeKind;
+ switch (EffectiveType) {
+ case DW_EH_PE_udata4: {
+ uint32_t Val;
+ if (auto Err = RecordReader.readInteger(Val))
return std::move(Err);
- } else if (G.getPointerSize() == 4) {
- uint32_t Addr32;
- if (auto Err = RecordReader.readInteger(Addr32))
+ Addr = PointerFieldAddress + Val;
+ PointerEdgeKind = Delta32;
+ break;
+ }
+ case DW_EH_PE_udata8: {
+ uint64_t Val;
+ if (auto Err = RecordReader.readInteger(Val))
+ return std::move(Err);
+ Addr = PointerFieldAddress + Val;
+ PointerEdgeKind = Delta64;
+ break;
+ }
+ case DW_EH_PE_sdata4: {
+ int32_t Val;
+ if (auto Err = RecordReader.readInteger(Val))
+ return std::move(Err);
+ Addr = PointerFieldAddress + Val;
+ PointerEdgeKind = Delta32;
+ break;
+ }
+ case DW_EH_PE_sdata8: {
+ int64_t Val;
+ if (auto Err = RecordReader.readInteger(Val))
return std::move(Err);
- Addr = Addr32;
- } else
- llvm_unreachable("Pointer size is not 32-bit or 64-bit");
- return Addr;
+ Addr = PointerFieldAddress + Val;
+ PointerEdgeKind = Delta64;
+ break;
+ }
+ }
+
+ if (PointerEdgeKind == Edge::Invalid)
+ return make_error<JITLinkError>(
+ "Unspported edge kind for encoded pointer at " +
+ formatv("{0:x}", PointerFieldAddress));
+
+ return std::make_pair(Addr, Delta64);
}
Expected<Symbol &> EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC,
@@ -629,146 +730,21 @@ Expected<Symbol &> EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC,
return PC.G.addAnonymousSymbol(*B, Addr - B->getAddress(), 0, false, false);
}
-// Determine whether we can register EH tables.
-#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) && \
- !(defined(_AIX) && defined(__ibmxl__)) && !defined(__SEH__) && \
- !defined(__USING_SJLJ_EXCEPTIONS__))
-#define HAVE_EHTABLE_SUPPORT 1
-#else
-#define HAVE_EHTABLE_SUPPORT 0
-#endif
-
-#if HAVE_EHTABLE_SUPPORT
-extern "C" void __register_frame(const void *);
-extern "C" void __deregister_frame(const void *);
-
-Error registerFrameWrapper(const void *P) {
- __register_frame(P);
- return Error::success();
-}
-
-Error deregisterFrameWrapper(const void *P) {
- __deregister_frame(P);
- return Error::success();
-}
-
-#else
-
-// The building compiler does not have __(de)register_frame but
-// it may be found at runtime in a dynamically-loaded library.
-// For example, this happens when building LLVM with Visual C++
-// but using the MingW runtime.
-static Error registerFrameWrapper(const void *P) {
- static void((*RegisterFrame)(const void *)) = 0;
-
- if (!RegisterFrame)
- *(void **)&RegisterFrame =
- llvm::sys::DynamicLibrary::SearchForAddressOfSymbol("__register_frame");
-
- if (RegisterFrame) {
- RegisterFrame(P);
- return Error::success();
- }
-
- return make_error<JITLinkError>("could not register eh-frame: "
- "__register_frame function not found");
-}
-
-static Error deregisterFrameWrapper(const void *P) {
- static void((*DeregisterFrame)(const void *)) = 0;
-
- if (!DeregisterFrame)
- *(void **)&DeregisterFrame =
- llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(
- "__deregister_frame");
-
- if (DeregisterFrame) {
- DeregisterFrame(P);
- return Error::success();
- }
-
- return make_error<JITLinkError>("could not deregister eh-frame: "
- "__deregister_frame function not found");
-}
-#endif
-
-#ifdef __APPLE__
-
-template <typename HandleFDEFn>
-Error walkAppleEHFrameSection(const char *const SectionStart,
- size_t SectionSize,
- HandleFDEFn HandleFDE) {
- const char *CurCFIRecord = SectionStart;
- const char *End = SectionStart + SectionSize;
- uint64_t Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
-
- while (CurCFIRecord != End && Size != 0) {
- const char *OffsetField = CurCFIRecord + (Size == 0xffffffff ? 12 : 4);
- if (Size == 0xffffffff)
- Size = *reinterpret_cast<const uint64_t *>(CurCFIRecord + 4) + 12;
- else
- Size += 4;
- uint32_t Offset = *reinterpret_cast<const uint32_t *>(OffsetField);
-
- LLVM_DEBUG({
- dbgs() << "Registering eh-frame section:\n";
- dbgs() << "Processing " << (Offset ? "FDE" : "CIE") << " @"
- << (void *)CurCFIRecord << ": [";
- for (unsigned I = 0; I < Size; ++I)
- dbgs() << format(" 0x%02" PRIx8, *(CurCFIRecord + I));
- dbgs() << " ]\n";
- });
-
- if (Offset != 0)
- if (auto Err = HandleFDE(CurCFIRecord))
- return Err;
-
- CurCFIRecord += Size;
-
- Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
- }
-
- return Error::success();
-}
-
-#endif // __APPLE__
-
-Error registerEHFrameSection(const void *EHFrameSectionAddr,
- size_t EHFrameSectionSize) {
-#ifdef __APPLE__
- // On Darwin __register_frame has to be called for each FDE entry.
- return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
- EHFrameSectionSize,
- registerFrameWrapper);
-#else
- // On Linux __register_frame takes a single argument:
- // a pointer to the start of the .eh_frame section.
-
- // How can it find the end? Because crtendS.o is linked
- // in and it has an .eh_frame section with four zero chars.
- return registerFrameWrapper(EHFrameSectionAddr);
-#endif
-}
-
-Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
- size_t EHFrameSectionSize) {
-#ifdef __APPLE__
- return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
- EHFrameSectionSize,
- deregisterFrameWrapper);
-#else
- return deregisterFrameWrapper(EHFrameSectionAddr);
-#endif
-}
-
EHFrameRegistrar::~EHFrameRegistrar() {}
-InProcessEHFrameRegistrar &InProcessEHFrameRegistrar::getInstance() {
- static InProcessEHFrameRegistrar Instance;
- return Instance;
+Error InProcessEHFrameRegistrar::registerEHFrames(
+ JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) {
+ return orc::registerEHFrameSection(
+ jitTargetAddressToPointer<void *>(EHFrameSectionAddr),
+ EHFrameSectionSize);
}
-InProcessEHFrameRegistrar::InProcessEHFrameRegistrar() {}
+Error InProcessEHFrameRegistrar::deregisterEHFrames(
+ JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) {
+ return orc::deregisterEHFrameSection(
+ jitTargetAddressToPointer<void *>(EHFrameSectionAddr),
+ EHFrameSectionSize);
+}
LinkGraphPassFunction
createEHFrameRecorderPass(const Triple &TT,
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
index a8cd32c664dc..5e68e72ba18d 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
@@ -40,8 +40,9 @@ private:
/// edges.
class EHFrameEdgeFixer {
public:
- EHFrameEdgeFixer(StringRef EHFrameSectionName, Edge::Kind FDEToCIE,
- Edge::Kind FDEToPCBegin, Edge::Kind FDEToLSDA);
+ EHFrameEdgeFixer(StringRef EHFrameSectionName, unsigned PointerSize,
+ Edge::Kind Delta64, Edge::Kind Delta32,
+ Edge::Kind NegDelta32);
Error operator()(LinkGraph &G);
private:
@@ -57,6 +58,8 @@ private:
CIEInformation(Symbol &CIESymbol) : CIESymbol(&CIESymbol) {}
Symbol *CIESymbol = nullptr;
bool FDEsHaveLSDAField = false;
+ uint8_t FDEPointerEncoding = 0;
+ uint8_t LSDAPointerEncoding = 0;
};
struct EdgeTarget {
@@ -96,14 +99,21 @@ private:
Expected<AugmentationInfo>
parseAugmentationString(BinaryStreamReader &RecordReader);
- Expected<JITTargetAddress>
- readAbsolutePointer(LinkGraph &G, BinaryStreamReader &RecordReader);
+
+ static bool isSupportedPointerEncoding(uint8_t PointerEncoding);
+ unsigned getPointerEncodingDataSize(uint8_t PointerEncoding);
+ Expected<std::pair<JITTargetAddress, Edge::Kind>>
+ readEncodedPointer(uint8_t PointerEncoding,
+ JITTargetAddress PointerFieldAddress,
+ BinaryStreamReader &RecordReader);
+
Expected<Symbol &> getOrCreateSymbol(ParseContext &PC, JITTargetAddress Addr);
StringRef EHFrameSectionName;
- Edge::Kind FDEToCIE;
- Edge::Kind FDEToPCBegin;
- Edge::Kind FDEToLSDA;
+ unsigned PointerSize;
+ Edge::Kind Delta64;
+ Edge::Kind Delta32;
+ Edge::Kind NegDelta32;
};
} // end namespace jitlink
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
index 6160583b13fe..27eb7d576e2d 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
@@ -15,6 +15,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
+#include "llvm/Object/ELF.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -27,24 +28,63 @@ using namespace llvm;
namespace llvm {
namespace jitlink {
-void jitLink_ELF(std::unique_ptr<JITLinkContext> Ctx) {
+Expected<uint16_t> readTargetMachineArch(StringRef Buffer) {
+ const char *Data = Buffer.data();
- // We don't want to do full ELF validation here. We just verify it is elf'ish.
- // Probably should parse into an elf header when we support more than x86 :)
-
- StringRef Data = Ctx->getObjectBuffer().getBuffer();
- if (Data.size() < llvm::ELF::EI_MAG3 + 1) {
- Ctx->notifyFailed(make_error<JITLinkError>("Truncated ELF buffer"));
- return;
+ if (Data[ELF::EI_DATA] == ELF::ELFDATA2LSB) {
+ if (Data[ELF::EI_CLASS] == ELF::ELFCLASS64) {
+ if (auto File = llvm::object::ELF64LEFile::create(Buffer)) {
+ return File->getHeader().e_machine;
+ } else {
+ return File.takeError();
+ }
+ } else if (Data[ELF::EI_CLASS] == ELF::ELFCLASS32) {
+ if (auto File = llvm::object::ELF32LEFile::create(Buffer)) {
+ return File->getHeader().e_machine;
+ } else {
+ return File.takeError();
+ }
+ }
}
- if (!memcmp(Data.data(), llvm::ELF::ElfMagic, strlen(llvm::ELF::ElfMagic))) {
- if (Data.data()[llvm::ELF::EI_CLASS] == ELF::ELFCLASS64) {
- return jitLink_ELF_x86_64(std::move(Ctx));
- }
+ return ELF::EM_NONE;
+}
+
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) {
+ StringRef Buffer = ObjectBuffer.getBuffer();
+ if (Buffer.size() < ELF::EI_MAG3 + 1)
+ return make_error<JITLinkError>("Truncated ELF buffer");
+
+ if (memcmp(Buffer.data(), ELF::ElfMagic, strlen(ELF::ElfMagic)) != 0)
+ return make_error<JITLinkError>("ELF magic not valid");
+
+ Expected<uint16_t> TargetMachineArch = readTargetMachineArch(Buffer);
+ if (!TargetMachineArch)
+ return TargetMachineArch.takeError();
+
+ switch (*TargetMachineArch) {
+ case ELF::EM_X86_64:
+ return createLinkGraphFromELFObject_x86_64(std::move(ObjectBuffer));
+ default:
+ return make_error<JITLinkError>(
+ "Unsupported target machine architecture in ELF object " +
+ ObjectBuffer.getBufferIdentifier());
}
+}
- Ctx->notifyFailed(make_error<JITLinkError>("ELF magic not valid"));
+void link_ELF(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+ switch (G->getTargetTriple().getArch()) {
+ case Triple::x86_64:
+ link_ELF_x86_64(std::move(G), std::move(Ctx));
+ return;
+ default:
+ Ctx->notifyFailed(make_error<JITLinkError>(
+ "Unsupported target machine architecture in ELF link graph " +
+ G->getName()));
+ return;
+ }
}
} // end namespace jitlink
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
index 505f03590b6b..2a6b3eb19ded 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
@@ -11,16 +11,204 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
-#include "JITLinkGeneric.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Endian.h"
+
+#include "BasicGOTAndStubsBuilder.h"
+#include "EHFrameSupportImpl.h"
+#include "JITLinkGeneric.h"
#define DEBUG_TYPE "jitlink"
using namespace llvm;
using namespace llvm::jitlink;
+using namespace llvm::jitlink::ELF_x86_64_Edges;
+
+namespace {
+
+class ELF_x86_64_GOTAndStubsBuilder
+ : public BasicGOTAndStubsBuilder<ELF_x86_64_GOTAndStubsBuilder> {
+public:
+ static const uint8_t NullGOTEntryContent[8];
+ static const uint8_t StubContent[6];
+
+ ELF_x86_64_GOTAndStubsBuilder(LinkGraph &G)
+ : BasicGOTAndStubsBuilder<ELF_x86_64_GOTAndStubsBuilder>(G) {}
+
+ bool isGOTEdge(Edge &E) const {
+ return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad;
+ }
+
+ Symbol &createGOTEntry(Symbol &Target) {
+ auto &GOTEntryBlock = G.createContentBlock(
+ getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0);
+ GOTEntryBlock.addEdge(Pointer64, 0, Target, 0);
+ return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false);
+ }
+
+ void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
+ assert((E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad) &&
+ "Not a GOT edge?");
+ // If this is a PCRel32GOT then change it to an ordinary PCRel32. If it is
+ // a PCRel32GOTLoad then leave it as-is for now. We will use the kind to
+ // check for GOT optimization opportunities in the
+ // optimizeMachO_x86_64_GOTAndStubs pass below.
+ if (E.getKind() == PCRel32GOT)
+ E.setKind(PCRel32);
+
+ E.setTarget(GOTEntry);
+ // Leave the edge addend as-is.
+ }
+
+ bool isExternalBranchEdge(Edge &E) {
+ return E.getKind() == Branch32 && !E.getTarget().isDefined();
+ }
+
+ Symbol &createStub(Symbol &Target) {
+ auto &StubContentBlock =
+ G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0);
+ // Re-use GOT entries for stub targets.
+ auto &GOTEntrySymbol = getGOTEntrySymbol(Target);
+ StubContentBlock.addEdge(PCRel32, 2, GOTEntrySymbol, -4);
+ return G.addAnonymousSymbol(StubContentBlock, 0, 6, true, false);
+ }
+
+ void fixExternalBranchEdge(Edge &E, Symbol &Stub) {
+ assert(E.getKind() == Branch32 && "Not a Branch32 edge?");
+
+ // Set the edge kind to Branch32ToStub. We will use this to check for stub
+ // optimization opportunities in the optimize ELF_x86_64_GOTAndStubs pass
+ // below.
+ E.setKind(Branch32ToStub);
+ E.setTarget(Stub);
+ }
+
+private:
+ Section &getGOTSection() {
+ if (!GOTSection)
+ GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ);
+ return *GOTSection;
+ }
+
+ Section &getStubsSection() {
+ if (!StubsSection) {
+ auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
+ sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+ StubsSection = &G.createSection("$__STUBS", StubsProt);
+ }
+ return *StubsSection;
+ }
+
+ StringRef getGOTEntryBlockContent() {
+ return StringRef(reinterpret_cast<const char *>(NullGOTEntryContent),
+ sizeof(NullGOTEntryContent));
+ }
+
+ StringRef getStubBlockContent() {
+ return StringRef(reinterpret_cast<const char *>(StubContent),
+ sizeof(StubContent));
+ }
+
+ Section *GOTSection = nullptr;
+ Section *StubsSection = nullptr;
+};
+
+const char *const DwarfSectionNames[] = {
+#define HANDLE_DWARF_SECTION(ENUM_NAME, ELF_NAME, CMDLINE_NAME, OPTION) \
+ ELF_NAME,
+#include "llvm/BinaryFormat/Dwarf.def"
+#undef HANDLE_DWARF_SECTION
+};
+
+} // namespace
+
+const uint8_t ELF_x86_64_GOTAndStubsBuilder::NullGOTEntryContent[8] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+const uint8_t ELF_x86_64_GOTAndStubsBuilder::StubContent[6] = {
+ 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00};
static const char *CommonSectionName = "__common";
+static Error optimizeELF_x86_64_GOTAndStubs(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
+
+ for (auto *B : G.blocks())
+ for (auto &E : B->edges())
+ if (E.getKind() == PCRel32GOTLoad) {
+ // Replace GOT load with LEA only for MOVQ instructions.
+ constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b};
+ if (E.getOffset() < 3 ||
+ strncmp(B->getContent().data() + E.getOffset() - 3,
+ reinterpret_cast<const char *>(MOVQRIPRel), 2) != 0)
+ continue;
+
+ auto &GOTBlock = E.getTarget().getBlock();
+ assert(GOTBlock.getSize() == G.getPointerSize() &&
+ "GOT entry block should be pointer sized");
+ assert(GOTBlock.edges_size() == 1 &&
+ "GOT entry should only have one outgoing edge");
+
+ auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
+ JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
+ JITTargetAddress TargetAddr = GOTTarget.getAddress();
+
+ int64_t Displacement = TargetAddr - EdgeAddr + 4;
+ if (Displacement >= std::numeric_limits<int32_t>::min() &&
+ Displacement <= std::numeric_limits<int32_t>::max()) {
+ // Change the edge kind as we don't go through GOT anymore. This is
+ // for formal correctness only. Technically, the two relocation kinds
+ // are resolved the same way.
+ E.setKind(PCRel32);
+ E.setTarget(GOTTarget);
+ auto *BlockData = reinterpret_cast<uint8_t *>(
+ const_cast<char *>(B->getContent().data()));
+ BlockData[E.getOffset() - 2] = 0x8d;
+ LLVM_DEBUG({
+ dbgs() << " Replaced GOT load wih LEA:\n ";
+ printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ }
+ } else if (E.getKind() == Branch32ToStub) {
+ auto &StubBlock = E.getTarget().getBlock();
+ assert(StubBlock.getSize() ==
+ sizeof(ELF_x86_64_GOTAndStubsBuilder::StubContent) &&
+ "Stub block should be stub sized");
+ assert(StubBlock.edges_size() == 1 &&
+ "Stub block should only have one outgoing edge");
+
+ auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
+ assert(GOTBlock.getSize() == G.getPointerSize() &&
+ "GOT block should be pointer sized");
+ assert(GOTBlock.edges_size() == 1 &&
+ "GOT block should only have one outgoing edge");
+
+ auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
+ JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
+ JITTargetAddress TargetAddr = GOTTarget.getAddress();
+
+ int64_t Displacement = TargetAddr - EdgeAddr + 4;
+ if (Displacement >= std::numeric_limits<int32_t>::min() &&
+ Displacement <= std::numeric_limits<int32_t>::max()) {
+ E.setKind(Branch32);
+ E.setTarget(GOTTarget);
+ LLVM_DEBUG({
+ dbgs() << " Replaced stub branch with direct branch:\n ";
+ printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ }
+ }
+
+ return Error::success();
+}
+
+static bool isDwarfSection(StringRef SectionName) {
+ for (auto &DwarfSectionName : DwarfSectionNames)
+ if (SectionName == DwarfSectionName)
+ return true;
+ return false;
+}
namespace llvm {
namespace jitlink {
@@ -35,7 +223,8 @@ private:
// Find a better way
using SymbolTable = object::ELFFile<object::ELF64LE>::Elf_Shdr;
// For now we just assume
- std::map<int32_t, Symbol *> JITSymbolTable;
+ using SymbolMap = std::map<int32_t, Symbol *>;
+ SymbolMap JITSymbolTable;
Section &getCommonSection() {
if (!CommonSection) {
@@ -51,6 +240,16 @@ private:
switch (Type) {
case ELF::R_X86_64_PC32:
return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32;
+ case ELF::R_X86_64_PC64:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::Delta64;
+ case ELF::R_X86_64_64:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::Pointer64;
+ case ELF::R_X86_64_GOTPCREL:
+ case ELF::R_X86_64_GOTPCRELX:
+ case ELF::R_X86_64_REX_GOTPCRELX:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoad;
+ case ELF::R_X86_64_PLT32:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::Branch32;
}
return make_error<JITLinkError>("Unsupported x86-64 relocation:" +
formatv("{0:d}", Type));
@@ -62,7 +261,7 @@ private:
object::ELFFile<object::ELF64LE>::Elf_Shdr_Range sections;
SymbolTable SymTab;
- bool isRelocatable() { return Obj.getHeader()->e_type == llvm::ELF::ET_REL; }
+ bool isRelocatable() { return Obj.getHeader().e_type == llvm::ELF::ET_REL; }
support::endianness
getEndianness(const object::ELFFile<object::ELF64LE> &Obj) {
@@ -71,7 +270,7 @@ private:
// This could also just become part of a template
unsigned getPointerSize(const object::ELFFile<object::ELF64LE> &Obj) {
- return Obj.getHeader()->getFileClass() == ELF::ELFCLASS64 ? 8 : 4;
+ return Obj.getHeader().getFileClass() == ELF::ELFCLASS64 ? 8 : 4;
}
// We don't technically need this right now
@@ -95,16 +294,12 @@ private:
auto StrTabSec = Obj.getSection(SecRef.sh_link);
if (!StrTabSec)
return StrTabSec.takeError();
- auto StringTable = Obj.getStringTable(*StrTabSec);
+ auto StringTable = Obj.getStringTable(**StrTabSec);
if (!StringTable)
return StringTable.takeError();
for (auto SymRef : *Symbols) {
Optional<StringRef> Name;
- uint64_t Size = 0;
-
- // FIXME: Read size.
- (void)Size;
if (auto NameOrErr = SymRef.getName(*StringTable))
Name = *NameOrErr;
@@ -112,16 +307,13 @@ private:
return NameOrErr.takeError();
LLVM_DEBUG({
- dbgs() << " ";
- if (!Name)
- dbgs() << "<anonymous symbol>";
- else
- dbgs() << *Name;
- dbgs() << ": value = " << formatv("{0:x16}", SymRef.getValue())
+ dbgs() << " value = " << formatv("{0:x16}", SymRef.getValue())
<< ", type = " << formatv("{0:x2}", SymRef.getType())
- << ", binding = " << SymRef.getBinding()
- << ", size =" << Size;
- dbgs() << "\n";
+ << ", binding = " << formatv("{0:x2}", SymRef.getBinding())
+ << ", size = "
+ << formatv("{0:x16}", static_cast<uint64_t>(SymRef.st_size))
+ << ", info = " << formatv("{0:x2}", SymRef.st_info)
+ << " :" << (Name ? *Name : "<anonymous symbol>") << "\n";
});
}
}
@@ -131,9 +323,19 @@ private:
Error createNormalizedSections() {
LLVM_DEBUG(dbgs() << "Creating normalized sections...\n");
for (auto &SecRef : sections) {
- auto Name = Obj.getSectionName(&SecRef);
+ auto Name = Obj.getSectionName(SecRef);
if (!Name)
return Name.takeError();
+
+ // Skip Dwarf sections.
+ if (isDwarfSection(*Name)) {
+ LLVM_DEBUG({
+ dbgs() << *Name
+ << " is a debug section: No graph section will be created.\n";
+ });
+ continue;
+ }
+
sys::Memory::ProtectionFlags Prot;
if (SecRef.sh_flags & ELF::SHF_EXECINSTR) {
Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
@@ -147,8 +349,8 @@ private:
uint64_t Flags = SecRef.sh_flags;
uint64_t Alignment = SecRef.sh_addralign;
const char *Data = nullptr;
- // TODO: figure out what it is that has 0 size no name and address
- // 0000-0000
+ // for now we just use this to skip the "undefined" section, probably need
+ // to revist
if (Size == 0)
continue;
@@ -158,13 +360,13 @@ private:
LLVM_DEBUG({
dbgs() << " " << *Name << ": " << formatv("{0:x16}", Address) << " -- "
<< formatv("{0:x16}", Address + Size) << ", align: " << Alignment
- << " Flags:" << Flags << "\n";
+ << " Flags: " << formatv("{0:x}", Flags) << "\n";
});
if (SecRef.sh_type != ELF::SHT_NOBITS) {
// .sections() already checks that the data is not beyond the end of
// file
- auto contents = Obj.getSectionContentsAsArray<char>(&SecRef);
+ auto contents = Obj.getSectionContentsAsArray<char>(SecRef);
if (!contents)
return contents.takeError();
@@ -178,6 +380,9 @@ private:
if (SecRef.sh_type == ELF::SHT_SYMTAB)
// TODO: Dynamic?
SymTab = SecRef;
+ } else {
+ auto &Section = G->createSection(*Name, Prot);
+ G->createZeroFillBlock(Section, Size, Address, Alignment, 0);
}
}
@@ -196,21 +401,34 @@ private:
return make_error<llvm::StringError>("Shouldn't have REL in x64",
llvm::inconvertibleErrorCode());
- auto RelSectName = Obj.getSectionName(&SecRef);
+ auto RelSectName = Obj.getSectionName(SecRef);
if (!RelSectName)
return RelSectName.takeError();
- // Deal with .eh_frame later
- if (*RelSectName == StringRef(".rela.eh_frame"))
- continue;
+
+ LLVM_DEBUG({
+ dbgs() << "Adding relocations from section " << *RelSectName << "\n";
+ });
auto UpdateSection = Obj.getSection(SecRef.sh_info);
if (!UpdateSection)
return UpdateSection.takeError();
- auto UpdateSectionName = Obj.getSectionName(*UpdateSection);
+ auto UpdateSectionName = Obj.getSectionName(**UpdateSection);
if (!UpdateSectionName)
return UpdateSectionName.takeError();
+ // Don't process relocations for debug sections.
+ if (isDwarfSection(*UpdateSectionName)) {
+ LLVM_DEBUG({
+ dbgs() << " Target is dwarf section " << *UpdateSectionName
+ << ". Skipping.\n";
+ });
+ continue;
+ } else
+ LLVM_DEBUG({
+ dbgs() << " For target section " << *UpdateSectionName << "\n";
+ });
+
auto JITSection = G->findSectionByName(*UpdateSectionName);
if (!JITSection)
return make_error<llvm::StringError>(
@@ -218,7 +436,7 @@ private:
*UpdateSectionName,
llvm::inconvertibleErrorCode());
- auto Relocations = Obj.relas(&SecRef);
+ auto Relocations = Obj.relas(SecRef);
if (!Relocations)
return Relocations.takeError();
@@ -229,13 +447,22 @@ private:
dbgs() << "Relocation Type: " << Type << "\n"
<< "Name: " << Obj.getRelocationTypeName(Type) << "\n";
});
-
- auto Symbol = Obj.getRelocationSymbol(&Rela, &SymTab);
+ auto SymbolIndex = Rela.getSymbol(false);
+ auto Symbol = Obj.getRelocationSymbol(Rela, &SymTab);
if (!Symbol)
return Symbol.takeError();
auto BlockToFix = *(JITSection->blocks().begin());
- auto TargetSymbol = JITSymbolTable[(*Symbol)->st_shndx];
+ auto *TargetSymbol = JITSymbolTable[SymbolIndex];
+
+ if (!TargetSymbol) {
+ return make_error<llvm::StringError>(
+ "Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: " + std::to_string(SymbolIndex)
+ + ", shndx: " + std::to_string((*Symbol)->st_shndx) +
+ " Size of table: " + std::to_string(JITSymbolTable.size()),
+ llvm::inconvertibleErrorCode());
+ }
uint64_t Addend = Rela.r_addend;
JITTargetAddress FixupAddress =
(*UpdateSection)->sh_addr + Rela.r_offset;
@@ -251,8 +478,8 @@ private:
LLVM_DEBUG({
Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
Addend);
- // TODO a mapping of KIND => type then call getRelocationTypeName4
- printEdge(dbgs(), *BlockToFix, GE, StringRef(""));
+ printEdge(dbgs(), *BlockToFix, GE,
+ getELFX86RelocationKindName(*Kind));
dbgs() << "\n";
});
BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(),
@@ -284,25 +511,31 @@ private:
auto StrTabSec = Obj.getSection(SecRef.sh_link);
if (!StrTabSec)
return StrTabSec.takeError();
- auto StringTable = Obj.getStringTable(*StrTabSec);
+ auto StringTable = Obj.getStringTable(**StrTabSec);
if (!StringTable)
return StringTable.takeError();
- auto Name = Obj.getSectionName(&SecRef);
+ auto Name = Obj.getSectionName(SecRef);
if (!Name)
return Name.takeError();
+
+ LLVM_DEBUG(dbgs() << "Processing symbol section " << *Name << ":\n");
+
auto Section = G->findSectionByName(*Name);
if (!Section)
- return make_error<llvm::StringError>("Could not find a section",
+ return make_error<llvm::StringError>("Could not find a section " +
+ *Name,
llvm::inconvertibleErrorCode());
// we only have one for now
auto blocks = Section->blocks();
if (blocks.empty())
return make_error<llvm::StringError>("Section has no block",
llvm::inconvertibleErrorCode());
-
+ int SymbolIndex = -1;
for (auto SymRef : *Symbols) {
+ ++SymbolIndex;
auto Type = SymRef.getType();
- if (Type == ELF::STT_NOTYPE || Type == ELF::STT_FILE)
+
+ if (Type == ELF::STT_FILE || SymbolIndex == 0)
continue;
// these should do it for now
// if(Type != ELF::STT_NOTYPE &&
@@ -312,68 +545,119 @@ private:
// Type != ELF::STT_COMMON) {
// continue;
// }
- std::pair<Linkage, Scope> bindings;
auto Name = SymRef.getName(*StringTable);
// I am not sure on If this is going to hold as an invariant. Revisit.
if (!Name)
return Name.takeError();
- // TODO: weak and hidden
- if (SymRef.isExternal())
- bindings = {Linkage::Strong, Scope::Default};
- else
- bindings = {Linkage::Strong, Scope::Local};
+
+ if (SymRef.isCommon()) {
+ // Symbols in SHN_COMMON refer to uninitialized data. The st_value
+ // field holds alignment constraints.
+ Symbol &S =
+ G->addCommonSymbol(*Name, Scope::Default, getCommonSection(), 0,
+ SymRef.st_size, SymRef.getValue(), false);
+ JITSymbolTable[SymbolIndex] = &S;
+ continue;
+ }
+
+ // Map Visibility and Binding to Scope and Linkage:
+ Linkage L = Linkage::Strong;
+ Scope S = Scope::Default;
+
+ switch (SymRef.getBinding()) {
+ case ELF::STB_LOCAL:
+ S = Scope::Local;
+ break;
+ case ELF::STB_GLOBAL:
+ // Nothing to do here.
+ break;
+ case ELF::STB_WEAK:
+ L = Linkage::Weak;
+ break;
+ default:
+ return make_error<StringError>("Unrecognized symbol binding for " +
+ *Name,
+ inconvertibleErrorCode());
+ }
+
+ switch (SymRef.getVisibility()) {
+ case ELF::STV_DEFAULT:
+ case ELF::STV_PROTECTED:
+ // FIXME: Make STV_DEFAULT symbols pre-emptible? This probably needs
+ // Orc support.
+ // Otherwise nothing to do here.
+ break;
+ case ELF::STV_HIDDEN:
+ // Default scope -> Hidden scope. No effect on local scope.
+ if (S == Scope::Default)
+ S = Scope::Hidden;
+ break;
+ case ELF::STV_INTERNAL:
+ return make_error<StringError>("Unrecognized symbol visibility for " +
+ *Name,
+ inconvertibleErrorCode());
+ }
if (SymRef.isDefined() &&
- (Type == ELF::STT_FUNC || Type == ELF::STT_OBJECT)) {
+ (Type == ELF::STT_FUNC || Type == ELF::STT_OBJECT ||
+ Type == ELF::STT_SECTION)) {
auto DefinedSection = Obj.getSection(SymRef.st_shndx);
if (!DefinedSection)
return DefinedSection.takeError();
- auto sectName = Obj.getSectionName(*DefinedSection);
+ auto sectName = Obj.getSectionName(**DefinedSection);
if (!sectName)
return Name.takeError();
+ // Skip debug section symbols.
+ if (isDwarfSection(*sectName))
+ continue;
+
auto JitSection = G->findSectionByName(*sectName);
if (!JitSection)
return make_error<llvm::StringError>(
- "Could not find a section", llvm::inconvertibleErrorCode());
+ "Could not find the JitSection " + *sectName,
+ llvm::inconvertibleErrorCode());
auto bs = JitSection->blocks();
if (bs.empty())
return make_error<llvm::StringError>(
"Section has no block", llvm::inconvertibleErrorCode());
- auto B = *bs.begin();
- LLVM_DEBUG({ dbgs() << " " << *Name << ": "; });
-
- auto &S = G->addDefinedSymbol(
- *B, SymRef.getValue(), *Name, SymRef.st_size, bindings.first,
- bindings.second, SymRef.getType() == ELF::STT_FUNC, false);
- JITSymbolTable[SymRef.st_shndx] = &S;
- }
- //TODO: The following has to be implmented.
+ auto *B = *bs.begin();
+ LLVM_DEBUG({ dbgs() << " " << *Name << " at index " << SymbolIndex << "\n"; });
+ if (SymRef.getType() == ELF::STT_SECTION)
+ *Name = *sectName;
+ auto &Sym = G->addDefinedSymbol(
+ *B, SymRef.getValue(), *Name, SymRef.st_size, L, S,
+ SymRef.getType() == ELF::STT_FUNC, false);
+ JITSymbolTable[SymbolIndex] = &Sym;
+ } else if (SymRef.isUndefined() && SymRef.isExternal()) {
+ auto &Sym = G->addExternalSymbol(*Name, SymRef.st_size, L);
+ JITSymbolTable[SymbolIndex] = &Sym;
+ } else
+ LLVM_DEBUG({
+ dbgs()
+ << "Not creating graph symbol for normalized symbol at index "
+ << SymbolIndex << ", \"" << *Name << "\"\n";
+ });
+
+ // TODO: The following has to be implmented.
// leaving commented out to save time for future patchs
/*
G->addAbsoluteSymbol(*Name, SymRef.getValue(), SymRef.st_size,
Linkage::Strong, Scope::Default, false);
-
- if(SymRef.isCommon()) {
- G->addCommonSymbol(*Name, Scope::Default, getCommonSection(), 0, 0,
- SymRef.getValue(), false);
- }
-
-
- //G->addExternalSymbol(*Name, SymRef.st_size, Linkage::Strong);
- */
+ */
}
}
return Error::success();
}
public:
- ELFLinkGraphBuilder_x86_64(std::string filename,
+ ELFLinkGraphBuilder_x86_64(StringRef FileName,
const object::ELFFile<object::ELF64LE> &Obj)
- : G(std::make_unique<LinkGraph>(filename, getPointerSize(Obj),
- getEndianness(Obj))),
+ : G(std::make_unique<LinkGraph>(FileName.str(),
+ Triple("x86_64-unknown-linux"),
+ getPointerSize(Obj), getEndianness(Obj))),
Obj(Obj) {}
Expected<std::unique_ptr<LinkGraph>> buildGraph() {
@@ -409,55 +693,121 @@ class ELFJITLinker_x86_64 : public JITLinker<ELFJITLinker_x86_64> {
public:
ELFJITLinker_x86_64(std::unique_ptr<JITLinkContext> Ctx,
+ std::unique_ptr<LinkGraph> G,
PassConfiguration PassConfig)
- : JITLinker(std::move(Ctx), std::move(PassConfig)) {}
+ : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {}
private:
- StringRef getEdgeKindName(Edge::Kind R) const override { return StringRef(); }
-
- Expected<std::unique_ptr<LinkGraph>>
- buildGraph(MemoryBufferRef ObjBuffer) override {
- auto ELFObj = object::ObjectFile::createELFObjectFile(ObjBuffer);
- if (!ELFObj)
- return ELFObj.takeError();
-
- auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF64LE>>(**ELFObj);
- std::string fileName(ELFObj->get()->getFileName());
- return ELFLinkGraphBuilder_x86_64(std::move(fileName),
- *ELFObjFile.getELFFile())
- .buildGraph();
+ StringRef getEdgeKindName(Edge::Kind R) const override {
+ return getELFX86RelocationKindName(R);
+ }
+
+ static Error targetOutOfRangeError(const Block &B, const Edge &E) {
+ std::string ErrMsg;
+ {
+ raw_string_ostream ErrStream(ErrMsg);
+ ErrStream << "Relocation target out of range: ";
+ printEdge(ErrStream, B, E, getELFX86RelocationKindName(E.getKind()));
+ ErrStream << "\n";
+ }
+ return make_error<JITLinkError>(std::move(ErrMsg));
}
Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const {
using namespace ELF_x86_64_Edges;
+ using namespace llvm::support;
char *FixupPtr = BlockWorkingMem + E.getOffset();
JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
switch (E.getKind()) {
-
+ case ELFX86RelocationKind::Branch32:
+ case ELFX86RelocationKind::Branch32ToStub:
case ELFX86RelocationKind::PCRel32:
+ case ELFX86RelocationKind::PCRel32GOTLoad: {
+ int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
+ if (Value < std::numeric_limits<int32_t>::min() ||
+ Value > std::numeric_limits<int32_t>::max())
+ return targetOutOfRangeError(B, E);
+ *(little32_t *)FixupPtr = Value;
+ break;
+ }
+ case ELFX86RelocationKind::Pointer64: {
+ int64_t Value = E.getTarget().getAddress() + E.getAddend();
+ *(ulittle64_t *)FixupPtr = Value;
+ break;
+ }
+ case ELFX86RelocationKind::Delta64: {
int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- // verify
- *(support::little32_t *)FixupPtr = Value;
+ *(little64_t *)FixupPtr = Value;
break;
}
+ }
return Error::success();
}
};
-void jitLink_ELF_x86_64(std::unique_ptr<JITLinkContext> Ctx) {
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_x86_64(MemoryBufferRef ObjectBuffer) {
+ LLVM_DEBUG({
+ dbgs() << "Building jitlink graph for new input "
+ << ObjectBuffer.getBufferIdentifier() << "...\n";
+ });
+
+ auto ELFObj = object::ObjectFile::createELFObjectFile(ObjectBuffer);
+ if (!ELFObj)
+ return ELFObj.takeError();
+
+ auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF64LE>>(**ELFObj);
+ return ELFLinkGraphBuilder_x86_64((*ELFObj)->getFileName(),
+ ELFObjFile.getELFFile())
+ .buildGraph();
+}
+
+void link_ELF_x86_64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
PassConfiguration Config;
- Triple TT("x86_64-linux");
- // Construct a JITLinker and run the link function.
- // Add a mark-live pass.
- if (auto MarkLive = Ctx->getMarkLivePass(TT))
- Config.PrePrunePasses.push_back(std::move(MarkLive));
- else
- Config.PrePrunePasses.push_back(markAllSymbolsLive);
-
- if (auto Err = Ctx->modifyPassConfig(TT, Config))
+
+ if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
+
+ Config.PrePrunePasses.push_back(EHFrameSplitter(".eh_frame"));
+ Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
+ ".eh_frame", G->getPointerSize(), Delta64, Delta32, NegDelta32));
+
+ // Construct a JITLinker and run the link function.
+ // Add a mark-live pass.
+ if (auto MarkLive = Ctx->getMarkLivePass(G->getTargetTriple()))
+ Config.PrePrunePasses.push_back(std::move(MarkLive));
+ else
+ Config.PrePrunePasses.push_back(markAllSymbolsLive);
+
+ // Add an in-place GOT/Stubs pass.
+ Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error {
+ ELF_x86_64_GOTAndStubsBuilder(G).run();
+ return Error::success();
+ });
+
+ // Add GOT/Stubs optimizer pass.
+ Config.PreFixupPasses.push_back(optimizeELF_x86_64_GOTAndStubs);
+ }
+
+ if (auto Err = Ctx->modifyPassConfig(G->getTargetTriple(), Config))
return Ctx->notifyFailed(std::move(Err));
- ELFJITLinker_x86_64::link(std::move(Ctx), std::move(Config));
+ ELFJITLinker_x86_64::link(std::move(Ctx), std::move(G), std::move(Config));
+}
+StringRef getELFX86RelocationKindName(Edge::Kind R) {
+ switch (R) {
+ case PCRel32:
+ return "PCRel32";
+ case Pointer64:
+ return "Pointer64";
+ case PCRel32GOTLoad:
+ return "PCRel32GOTLoad";
+ case Branch32:
+ return "Branch32";
+ case Branch32ToStub:
+ return "Branch32ToStub";
+ }
+ return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
}
} // end namespace jitlink
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index 5105ec495148..93dfba9c759b 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -64,7 +64,7 @@ const char *getGenericEdgeKindName(Edge::Kind K) {
case Edge::KeepAlive:
return "Keep-Alive";
default:
- llvm_unreachable("Unrecognized relocation kind");
+ return "<Unrecognized edge kind>";
}
}
@@ -93,6 +93,7 @@ const char *getScopeName(Scope S) {
raw_ostream &operator<<(raw_ostream &OS, const Block &B) {
return OS << formatv("{0:x16}", B.getAddress()) << " -- "
<< formatv("{0:x16}", B.getAddress() + B.getSize()) << ": "
+ << "size = " << formatv("{0:x}", B.getSize()) << ", "
<< (B.isZeroFill() ? "zero-fill" : "content")
<< ", align = " << B.getAlignment()
<< ", align-ofs = " << B.getAlignmentOffset()
@@ -126,10 +127,10 @@ raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) {
break;
}
OS << (Sym.isLive() ? '+' : '-')
- << ", size = " << formatv("{0:x8}", Sym.getSize())
+ << ", size = " << formatv("{0:x}", Sym.getSize())
<< ", addr = " << formatv("{0:x16}", Sym.getAddress()) << " ("
<< formatv("{0:x16}", Sym.getAddressable().getAddress()) << " + "
- << formatv("{0:x8}", Sym.getOffset());
+ << formatv("{0:x}", Sym.getOffset());
if (Sym.isDefined())
OS << " " << Sym.getBlock().getSection().getName();
OS << ")>";
@@ -139,8 +140,33 @@ raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) {
void printEdge(raw_ostream &OS, const Block &B, const Edge &E,
StringRef EdgeKindName) {
OS << "edge@" << formatv("{0:x16}", B.getAddress() + E.getOffset()) << ": "
- << formatv("{0:x16}", B.getAddress()) << " + " << E.getOffset() << " -- "
- << EdgeKindName << " -> " << E.getTarget() << " + " << E.getAddend();
+ << formatv("{0:x16}", B.getAddress()) << " + "
+ << formatv("{0:x}", E.getOffset()) << " -- " << EdgeKindName << " -> ";
+
+ auto &TargetSym = E.getTarget();
+ if (TargetSym.hasName())
+ OS << TargetSym.getName();
+ else {
+ auto &TargetBlock = TargetSym.getBlock();
+ auto &TargetSec = TargetBlock.getSection();
+ JITTargetAddress SecAddress = ~JITTargetAddress(0);
+ for (auto *B : TargetSec.blocks())
+ if (B->getAddress() < SecAddress)
+ SecAddress = B->getAddress();
+
+ JITTargetAddress SecDelta = TargetSym.getAddress() - SecAddress;
+ OS << formatv("{0:x16}", TargetSym.getAddress()) << " (section "
+ << TargetSec.getName();
+ if (SecDelta)
+ OS << " + " << formatv("{0:x}", SecDelta);
+ OS << " / block " << formatv("{0:x16}", TargetBlock.getAddress());
+ if (TargetSym.getOffset())
+ OS << " + " << formatv("{0:x}", TargetSym.getOffset());
+ OS << ")";
+ }
+
+ if (E.getAddend() != 0)
+ OS << " + " << E.getAddend();
}
Section::~Section() {
@@ -296,15 +322,27 @@ Error markAllSymbolsLive(LinkGraph &G) {
return Error::success();
}
-void jitLink(std::unique_ptr<JITLinkContext> Ctx) {
- auto Magic = identify_magic(Ctx->getObjectBuffer().getBuffer());
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromObject(MemoryBufferRef ObjectBuffer) {
+ auto Magic = identify_magic(ObjectBuffer.getBuffer());
switch (Magic) {
case file_magic::macho_object:
- return jitLink_MachO(std::move(Ctx));
+ return createLinkGraphFromMachOObject(std::move(ObjectBuffer));
case file_magic::elf_relocatable:
- return jitLink_ELF(std::move(Ctx));
+ return createLinkGraphFromELFObject(std::move(ObjectBuffer));
+ default:
+ return make_error<JITLinkError>("Unsupported file format");
+ };
+}
+
+void link(std::unique_ptr<LinkGraph> G, std::unique_ptr<JITLinkContext> Ctx) {
+ switch (G->getTargetTriple().getObjectFormat()) {
+ case Triple::MachO:
+ return link_MachO(std::move(G), std::move(Ctx));
+ case Triple::ELF:
+ return link_ELF(std::move(G), std::move(Ctx));
default:
- Ctx->notifyFailed(make_error<JITLinkError>("Unsupported file format"));
+ Ctx->notifyFailed(make_error<JITLinkError>("Unsupported object format"));
};
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index 1d76a49939dc..7a5e014f223d 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -24,15 +24,6 @@ JITLinkerBase::~JITLinkerBase() {}
void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
- LLVM_DEBUG({ dbgs() << "Building jitlink graph for new input...\n"; });
-
- // Build the link graph.
- if (auto GraphOrErr = buildGraph(Ctx->getObjectBuffer()))
- G = std::move(*GraphOrErr);
- else
- return Ctx->notifyFailed(GraphOrErr.takeError());
- assert(G && "Graph should have been created by buildGraph above");
-
LLVM_DEBUG({
dbgs() << "Starting link phase 1 for graph " << G->getName() << "\n";
});
@@ -64,10 +55,22 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
if (auto Err = allocateSegments(Layout))
return Ctx->notifyFailed(std::move(Err));
+ LLVM_DEBUG({
+ dbgs() << "Link graph \"" << G->getName()
+ << "\" before post-allocation passes:\n";
+ dumpGraph(dbgs());
+ });
+
+ // Run post-allocation passes.
+ if (auto Err = runPasses(Passes.PostAllocationPasses))
+ return Ctx->notifyFailed(std::move(Err));
+
// Notify client that the defined symbols have been assigned addresses.
LLVM_DEBUG(
{ dbgs() << "Resolving symbols defined in " << G->getName() << "\n"; });
- Ctx->notifyResolved(*G);
+
+ if (auto Err = Ctx->notifyResolved(*G))
+ return Ctx->notifyFailed(std::move(Err));
auto ExternalSymbols = getExternalSymbolNames();
@@ -117,11 +120,11 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName()
- << "\" before post-allocation passes:\n";
+ << "\" before pre-fixup passes:\n";
dumpGraph(dbgs());
});
- if (auto Err = runPasses(Passes.PostAllocationPasses))
+ if (auto Err = runPasses(Passes.PreFixupPasses))
return deallocateAndBailOut(std::move(Err));
LLVM_DEBUG({
@@ -261,7 +264,8 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
}
LLVM_DEBUG(dbgs() << " }\n");
- if (auto AllocOrErr = Ctx->getMemoryManager().allocate(Segments))
+ if (auto AllocOrErr =
+ Ctx->getMemoryManager().allocate(Ctx->getJITLinkDylib(), Segments))
Alloc = std::move(*AllocOrErr);
else
return AllocOrErr.takeError();
@@ -332,12 +336,6 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
dbgs() << " " << Sym->getName() << ": "
<< formatv("{0:x16}", Sym->getAddress()) << "\n";
});
- assert(llvm::all_of(G->external_symbols(),
- [](Symbol *Sym) {
- return Sym->getAddress() != 0 ||
- Sym->getLinkage() == Linkage::Weak;
- }) &&
- "All strong external symbols should have been resolved by now");
}
void JITLinkerBase::copyBlockContentToWorkingMemory(
@@ -445,16 +443,19 @@ void prune(LinkGraph &G) {
VisitedBlocks.insert(&B);
for (auto &E : Sym->getBlock().edges()) {
- if (E.getTarget().isDefined() && !E.getTarget().isLive()) {
- E.getTarget().setLive(true);
+ // If the edge target is a defined symbol that is being newly marked live
+ // then add it to the worklist.
+ if (E.getTarget().isDefined() && !E.getTarget().isLive())
Worklist.push_back(&E.getTarget());
- }
+
+ // Mark the target live.
+ E.getTarget().setLive(true);
}
}
- // Collect all the symbols to remove, then remove them.
+ // Collect all defined symbols to remove, then remove them.
{
- LLVM_DEBUG(dbgs() << "Dead-stripping symbols:\n");
+ LLVM_DEBUG(dbgs() << "Dead-stripping defined symbols:\n");
std::vector<Symbol *> SymbolsToRemove;
for (auto *Sym : G.defined_symbols())
if (!Sym->isLive())
@@ -477,6 +478,19 @@ void prune(LinkGraph &G) {
G.removeBlock(*B);
}
}
+
+ // Collect all external symbols to remove, then remove them.
+ {
+ LLVM_DEBUG(dbgs() << "Removing unused external symbols:\n");
+ std::vector<Symbol *> SymbolsToRemove;
+ for (auto *Sym : G.external_symbols())
+ if (!Sym->isLive())
+ SymbolsToRemove.push_back(Sym);
+ for (auto *Sym : SymbolsToRemove) {
+ LLVM_DEBUG(dbgs() << " " << *Sym << "...\n");
+ G.removeExternalSymbol(*Sym);
+ }
+ }
}
} // end namespace jitlink
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
index 87e5e8bbc98d..1d28f5006b2b 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -32,9 +32,11 @@ namespace jitlink {
/// remaining linker work) to allow them to be performed asynchronously.
class JITLinkerBase {
public:
- JITLinkerBase(std::unique_ptr<JITLinkContext> Ctx, PassConfiguration Passes)
- : Ctx(std::move(Ctx)), Passes(std::move(Passes)) {
+ JITLinkerBase(std::unique_ptr<JITLinkContext> Ctx,
+ std::unique_ptr<LinkGraph> G, PassConfiguration Passes)
+ : Ctx(std::move(Ctx)), G(std::move(G)), Passes(std::move(Passes)) {
assert(this->Ctx && "Ctx can not be null");
+ assert(this->G && "G can not be null");
}
virtual ~JITLinkerBase();
@@ -50,8 +52,7 @@ protected:
using SegmentLayoutMap = DenseMap<unsigned, SegmentLayout>;
// Phase 1:
- // 1.1: Build link graph
- // 1.2: Run pre-prune passes
+ // 1.1: Run pre-prune passes
// 1.2: Prune graph
// 1.3: Run post-prune passes
// 1.4: Sort blocks into segments
@@ -72,11 +73,6 @@ protected:
// 3.1: Call OnFinalized callback, handing off allocation.
void linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err);
- // Build a graph from the given object buffer.
- // To be implemented by the client.
- virtual Expected<std::unique_ptr<LinkGraph>>
- buildGraph(MemoryBufferRef ObjBuffer) = 0;
-
// For debug dumping of the link graph.
virtual StringRef getEdgeKindName(Edge::Kind K) const = 0;
@@ -113,8 +109,8 @@ private:
void dumpGraph(raw_ostream &OS);
std::unique_ptr<JITLinkContext> Ctx;
- PassConfiguration Passes;
std::unique_ptr<LinkGraph> G;
+ PassConfiguration Passes;
std::unique_ptr<JITLinkMemoryManager::Allocation> Alloc;
};
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index 68ec9d79af9b..fbbb29e9164a 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -17,7 +17,8 @@ JITLinkMemoryManager::~JITLinkMemoryManager() = default;
JITLinkMemoryManager::Allocation::~Allocation() = default;
Expected<std::unique_ptr<JITLinkMemoryManager::Allocation>>
-InProcessMemoryManager::allocate(const SegmentsRequestMap &Request) {
+InProcessMemoryManager::allocate(const JITLinkDylib *JD,
+ const SegmentsRequestMap &Request) {
using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
index b3e45868ab22..e9327df6da41 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
@@ -27,39 +27,29 @@ using namespace llvm;
namespace llvm {
namespace jitlink {
-void jitLink_MachO(std::unique_ptr<JITLinkContext> Ctx) {
-
- // We don't want to do full MachO validation here. Just parse enough of the
- // header to find out what MachO linker to use.
-
- StringRef Data = Ctx->getObjectBuffer().getBuffer();
- if (Data.size() < 4) {
- StringRef BufferName = Ctx->getObjectBuffer().getBufferIdentifier();
- Ctx->notifyFailed(make_error<JITLinkError>("Truncated MachO buffer \"" +
- BufferName + "\""));
- return;
- }
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromMachOObject(MemoryBufferRef ObjectBuffer) {
+ StringRef Data = ObjectBuffer.getBuffer();
+ if (Data.size() < 4)
+ return make_error<JITLinkError>("Truncated MachO buffer \"" +
+ ObjectBuffer.getBufferIdentifier() + "\"");
uint32_t Magic;
memcpy(&Magic, Data.data(), sizeof(uint32_t));
LLVM_DEBUG({
dbgs() << "jitLink_MachO: magic = " << format("0x%08" PRIx32, Magic)
- << ", identifier = \""
- << Ctx->getObjectBuffer().getBufferIdentifier() << "\"\n";
+ << ", identifier = \"" << ObjectBuffer.getBufferIdentifier()
+ << "\"\n";
});
- if (Magic == MachO::MH_MAGIC || Magic == MachO::MH_CIGAM) {
- Ctx->notifyFailed(
- make_error<JITLinkError>("MachO 32-bit platforms not supported"));
- return;
- } else if (Magic == MachO::MH_MAGIC_64 || Magic == MachO::MH_CIGAM_64) {
+ if (Magic == MachO::MH_MAGIC || Magic == MachO::MH_CIGAM)
+ return make_error<JITLinkError>("MachO 32-bit platforms not supported");
+ else if (Magic == MachO::MH_MAGIC_64 || Magic == MachO::MH_CIGAM_64) {
- if (Data.size() < sizeof(MachO::mach_header_64)) {
- StringRef BufferName = Ctx->getObjectBuffer().getBufferIdentifier();
- Ctx->notifyFailed(make_error<JITLinkError>("Truncated MachO buffer \"" +
- BufferName + "\""));
- return;
- }
+ if (Data.size() < sizeof(MachO::mach_header_64))
+ return make_error<JITLinkError>("Truncated MachO buffer \"" +
+ ObjectBuffer.getBufferIdentifier() +
+ "\"");
// Read the CPU type from the header.
uint32_t CPUType;
@@ -74,15 +64,27 @@ void jitLink_MachO(std::unique_ptr<JITLinkContext> Ctx) {
switch (CPUType) {
case MachO::CPU_TYPE_ARM64:
- return jitLink_MachO_arm64(std::move(Ctx));
+ return createLinkGraphFromMachOObject_arm64(std::move(ObjectBuffer));
case MachO::CPU_TYPE_X86_64:
- return jitLink_MachO_x86_64(std::move(Ctx));
+ return createLinkGraphFromMachOObject_x86_64(std::move(ObjectBuffer));
}
+ return make_error<JITLinkError>("MachO-64 CPU type not valid");
+ } else
+ return make_error<JITLinkError>("Unrecognized MachO magic value");
+}
+
+void link_MachO(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+
+ switch (G->getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ return link_MachO_arm64(std::move(G), std::move(Ctx));
+ case Triple::x86_64:
+ return link_MachO_x86_64(std::move(G), std::move(Ctx));
+ default:
Ctx->notifyFailed(make_error<JITLinkError>("MachO-64 CPU type not valid"));
return;
}
-
- Ctx->notifyFailed(make_error<JITLinkError>("MachO magic not valid"));
}
} // end namespace jitlink
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
index fa3f403b717c..4602154eb579 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
@@ -45,10 +45,12 @@ Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
return std::move(G);
}
-MachOLinkGraphBuilder::MachOLinkGraphBuilder(const object::MachOObjectFile &Obj)
+MachOLinkGraphBuilder::MachOLinkGraphBuilder(const object::MachOObjectFile &Obj,
+ Triple TT)
: Obj(Obj),
G(std::make_unique<LinkGraph>(std::string(Obj.getFileName()),
- getPointerSize(Obj), getEndianness(Obj))) {}
+ std::move(TT), getPointerSize(Obj),
+ getEndianness(Obj))) {}
void MachOLinkGraphBuilder::addCustomSectionParser(
StringRef SectionName, SectionParserFunction Parser) {
@@ -64,10 +66,8 @@ Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) {
}
Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) {
- if (Type & MachO::N_PEXT)
- return Scope::Hidden;
if (Type & MachO::N_EXT) {
- if (Name.startswith("l"))
+ if ((Type & MachO::N_PEXT) || Name.startswith("l"))
return Scope::Hidden;
else
return Scope::Default;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index dd3bcf27494c..26e6859de91d 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -16,10 +16,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Object/MachO.h"
#include "EHFrameSupportImpl.h"
#include "JITLinkGeneric.h"
-#include "llvm/Object/MachO.h"
#include <list>
@@ -81,7 +81,7 @@ protected:
using SectionParserFunction = std::function<Error(NormalizedSection &S)>;
- MachOLinkGraphBuilder(const object::MachOObjectFile &Obj);
+ MachOLinkGraphBuilder(const object::MachOObjectFile &Obj, Triple TT);
LinkGraph &getGraph() const { return *G; }
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 463845a5b8cb..8366e9658539 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -26,7 +26,7 @@ namespace {
class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder {
public:
MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj)
- : MachOLinkGraphBuilder(Obj),
+ : MachOLinkGraphBuilder(Obj, Triple("arm64-apple-darwin")),
NumSymbols(Obj.getSymtabLoadCommand().nsyms) {}
private:
@@ -148,10 +148,11 @@ private:
else
return ToSymbolOrErr.takeError();
} else {
- if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue))
- ToSymbol = &*ToSymbolOrErr;
- else
- return ToSymbolOrErr.takeError();
+ auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1);
+ if (!ToSymbolSec)
+ return ToSymbolSec.takeError();
+ ToSymbol = getSymbolByAddress(ToSymbolSec->Address);
+ assert(ToSymbol && "No symbol for section");
FixupValue -= ToSymbol->getAddress();
}
@@ -181,6 +182,8 @@ private:
using namespace support;
auto &Obj = getObject();
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
for (auto &S : Obj.sections()) {
JITTargetAddress SectionAddress = S.getAddress();
@@ -199,8 +202,8 @@ private:
getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
if (!NSec.GraphSection) {
LLVM_DEBUG({
- dbgs() << "Skipping relocations for MachO section " << NSec.SegName
- << "/" << NSec.SectName
+ dbgs() << " Skipping relocations for MachO section "
+ << NSec.SegName << "/" << NSec.SectName
<< " which has no associated graph section\n";
});
continue;
@@ -221,9 +224,10 @@ private:
JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address;
LLVM_DEBUG({
- dbgs() << "Processing " << getMachOARM64RelocationKindName(*Kind)
- << " relocation at " << format("0x%016" PRIx64, FixupAddress)
- << "\n";
+ auto &NSec =
+ getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
+ dbgs() << " " << NSec.SectName << " + "
+ << formatv("{0:x8}", RI.r_address) << ":\n";
});
// Find the block that the fixup points to.
@@ -252,7 +256,7 @@ private:
// If this is an Addend relocation then process it and move to the
// paired reloc.
- Addend = RI.r_symbolnum;
+ Addend = SignExtend64(RI.r_symbolnum, 24);
if (RelItr == RelEnd)
return make_error<JITLinkError>("Unpaired Addend reloc at " +
@@ -268,11 +272,12 @@ private:
return make_error<JITLinkError>(
"Invalid relocation pair: Addend + " +
getMachOARM64RelocationKindName(*Kind));
- else
- LLVM_DEBUG({
- dbgs() << " pair is " << getMachOARM64RelocationKindName(*Kind)
- << "`\n";
- });
+
+ LLVM_DEBUG({
+ dbgs() << " Addend: value = " << formatv("{0:x6}", Addend)
+ << ", pair is " << getMachOARM64RelocationKindName(*Kind)
+ << "\n";
+ });
// Find the address of the value to fix up.
JITTargetAddress PairedFixupAddress =
@@ -335,6 +340,11 @@ private:
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
return TargetSymbolOrErr.takeError();
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ uint32_t EncodedAddend = (Instr & 0x003FFC00) >> 10;
+ if (EncodedAddend != 0)
+ return make_error<JITLinkError>("GOTPAGEOFF12 target has non-zero "
+ "encoded addend");
break;
}
case GOTPageOffset12: {
@@ -377,6 +387,7 @@ private:
}
LLVM_DEBUG({
+ dbgs() << " ";
Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
Addend);
printEdge(dbgs(), *BlockToFix, GE,
@@ -490,22 +501,15 @@ class MachOJITLinker_arm64 : public JITLinker<MachOJITLinker_arm64> {
public:
MachOJITLinker_arm64(std::unique_ptr<JITLinkContext> Ctx,
+ std::unique_ptr<LinkGraph> G,
PassConfiguration PassConfig)
- : JITLinker(std::move(Ctx), std::move(PassConfig)) {}
+ : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {}
private:
StringRef getEdgeKindName(Edge::Kind R) const override {
return getMachOARM64RelocationKindName(R);
}
- Expected<std::unique_ptr<LinkGraph>>
- buildGraph(MemoryBufferRef ObjBuffer) override {
- auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer);
- if (!MachOObj)
- return MachOObj.takeError();
- return MachOLinkGraphBuilder_arm64(**MachOObj).buildGraph();
- }
-
static Error targetOutOfRangeError(const Block &B, const Edge &E) {
std::string ErrMsg;
{
@@ -518,23 +522,17 @@ private:
}
static unsigned getPageOffset12Shift(uint32_t Instr) {
- constexpr uint32_t LDRLiteralMask = 0x3ffffc00;
+ constexpr uint32_t LoadStoreImm12Mask = 0x3b000000;
+ constexpr uint32_t Vec128Mask = 0x04800000;
- // Check for a GPR LDR immediate with a zero embedded literal.
- // If found, the top two bits contain the shift.
- if ((Instr & LDRLiteralMask) == 0x39400000)
- return Instr >> 30;
+ if ((Instr & LoadStoreImm12Mask) == 0x39000000) {
+ uint32_t ImplicitShift = Instr >> 30;
+ if (ImplicitShift == 0)
+ if ((Instr & Vec128Mask) == Vec128Mask)
+ ImplicitShift = 4;
- // Check for a Neon LDR immediate of size 64-bit or less with a zero
- // embedded literal. If found, the top two bits contain the shift.
- if ((Instr & LDRLiteralMask) == 0x3d400000)
- return Instr >> 30;
-
- // Check for a Neon LDR immediate of size 128-bit with a zero embedded
- // literal.
- constexpr uint32_t SizeBitsMask = 0xc0000000;
- if ((Instr & (LDRLiteralMask | SizeBitsMask)) == 0x3dc00000)
- return 4;
+ return ImplicitShift;
+ }
return 0;
}
@@ -581,10 +579,12 @@ private:
}
case Page21:
case GOTPage21: {
- assert(E.getAddend() == 0 && "PAGE21/GOTPAGE21 with non-zero addend");
+ assert((E.getKind() != GOTPage21 || E.getAddend() == 0) &&
+ "GOTPAGE21 with non-zero addend");
uint64_t TargetPage =
- E.getTarget().getAddress() & ~static_cast<uint64_t>(4096 - 1);
- uint64_t PCPage = B.getAddress() & ~static_cast<uint64_t>(4096 - 1);
+ (E.getTarget().getAddress() + E.getAddend()) &
+ ~static_cast<uint64_t>(4096 - 1);
+ uint64_t PCPage = FixupAddress & ~static_cast<uint64_t>(4096 - 1);
int64_t PageDelta = TargetPage - PCPage;
if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1))
@@ -600,8 +600,8 @@ private:
break;
}
case PageOffset12: {
- assert(E.getAddend() == 0 && "PAGEOFF12 with non-zero addend");
- uint64_t TargetOffset = E.getTarget().getAddress() & 0xfff;
+ uint64_t TargetOffset =
+ (E.getTarget().getAddress() + E.getAddend()) & 0xfff;
uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
unsigned ImmShift = getPageOffset12Shift(RawInstr);
@@ -674,13 +674,22 @@ private:
uint64_t NullValue = 0;
};
-void jitLink_MachO_arm64(std::unique_ptr<JITLinkContext> Ctx) {
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromMachOObject_arm64(MemoryBufferRef ObjectBuffer) {
+ auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjectBuffer);
+ if (!MachOObj)
+ return MachOObj.takeError();
+ return MachOLinkGraphBuilder_arm64(**MachOObj).buildGraph();
+}
+
+void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+
PassConfiguration Config;
- Triple TT("arm64-apple-ios");
- if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
// Add a mark-live pass.
- if (auto MarkLive = Ctx->getMarkLivePass(TT))
+ if (auto MarkLive = Ctx->getMarkLivePass(G->getTargetTriple()))
Config.PrePrunePasses.push_back(std::move(MarkLive));
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
@@ -692,11 +701,11 @@ void jitLink_MachO_arm64(std::unique_ptr<JITLinkContext> Ctx) {
});
}
- if (auto Err = Ctx->modifyPassConfig(TT, Config))
+ if (auto Err = Ctx->modifyPassConfig(G->getTargetTriple(), Config))
return Ctx->notifyFailed(std::move(Err));
// Construct a JITLinker and run the link function.
- MachOJITLinker_arm64::link(std::move(Ctx), std::move(Config));
+ MachOJITLinker_arm64::link(std::move(Ctx), std::move(G), std::move(Config));
}
StringRef getMachOARM64RelocationKindName(Edge::Kind R) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index a91bc3b6033c..bde4a19e71ba 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -26,7 +26,7 @@ namespace {
class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder {
public:
MachOLinkGraphBuilder_x86_64(const object::MachOObjectFile &Obj)
- : MachOLinkGraphBuilder(Obj) {}
+ : MachOLinkGraphBuilder(Obj, Triple("x86_64-apple-darwin")) {}
private:
static Expected<MachOX86RelocationKind>
@@ -150,10 +150,11 @@ private:
else
return ToSymbolOrErr.takeError();
} else {
- if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue))
- ToSymbol = &*ToSymbolOrErr;
- else
- return ToSymbolOrErr.takeError();
+ auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1);
+ if (!ToSymbolSec)
+ return ToSymbolSec.takeError();
+ ToSymbol = getSymbolByAddress(ToSymbolSec->Address);
+ assert(ToSymbol && "No symbol for section");
FixupValue -= ToSymbol->getAddress();
}
@@ -183,6 +184,8 @@ private:
using namespace support;
auto &Obj = getObject();
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
for (auto &S : Obj.sections()) {
JITTargetAddress SectionAddress = S.getAddress();
@@ -201,8 +204,8 @@ private:
getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
if (!NSec.GraphSection) {
LLVM_DEBUG({
- dbgs() << "Skipping relocations for MachO section " << NSec.SegName
- << "/" << NSec.SectName
+ dbgs() << " Skipping relocations for MachO section "
+ << NSec.SegName << "/" << NSec.SectName
<< " which has no associated graph section\n";
});
continue;
@@ -224,8 +227,10 @@ private:
JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address;
LLVM_DEBUG({
- dbgs() << "Processing relocation at "
- << format("0x%016" PRIx64, FixupAddress) << "\n";
+ auto &NSec =
+ getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
+ dbgs() << " " << NSec.SectName << " + "
+ << formatv("{0:x8}", RI.r_address) << ":\n";
});
// Find the block that the fixup points to.
@@ -334,12 +339,16 @@ private:
assert(TargetSymbol && "No target symbol from parsePairRelocation?");
break;
}
+ case PCRel32TLV:
+ return make_error<JITLinkError>(
+ "MachO TLV relocations not yet supported");
default:
llvm_unreachable("Special relocation kind should not appear in "
"mach-o file");
}
LLVM_DEBUG({
+ dbgs() << " ";
Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
Addend);
printEdge(dbgs(), *BlockToFix, GE,
@@ -539,22 +548,15 @@ class MachOJITLinker_x86_64 : public JITLinker<MachOJITLinker_x86_64> {
public:
MachOJITLinker_x86_64(std::unique_ptr<JITLinkContext> Ctx,
+ std::unique_ptr<LinkGraph> G,
PassConfiguration PassConfig)
- : JITLinker(std::move(Ctx), std::move(PassConfig)) {}
+ : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {}
private:
StringRef getEdgeKindName(Edge::Kind R) const override {
return getMachOX86RelocationKindName(R);
}
- Expected<std::unique_ptr<LinkGraph>>
- buildGraph(MemoryBufferRef ObjBuffer) override {
- auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer);
- if (!MachOObj)
- return MachOObj.takeError();
- return MachOLinkGraphBuilder_x86_64(**MachOObj).buildGraph();
- }
-
static Error targetOutOfRangeError(const Block &B, const Edge &E) {
std::string ErrMsg;
{
@@ -651,18 +653,27 @@ private:
uint64_t NullValue = 0;
};
-void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx) {
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromMachOObject_x86_64(MemoryBufferRef ObjectBuffer) {
+ auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjectBuffer);
+ if (!MachOObj)
+ return MachOObj.takeError();
+ return MachOLinkGraphBuilder_x86_64(**MachOObj).buildGraph();
+}
+
+void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+
PassConfiguration Config;
- Triple TT("x86_64-apple-macosx");
- if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
// Add eh-frame passses.
Config.PrePrunePasses.push_back(EHFrameSplitter("__eh_frame"));
- Config.PrePrunePasses.push_back(
- EHFrameEdgeFixer("__eh_frame", NegDelta32, Delta64, Delta64));
+ Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
+ "__eh_frame", G->getPointerSize(), Delta64, Delta32, NegDelta32));
// Add a mark-live pass.
- if (auto MarkLive = Ctx->getMarkLivePass(TT))
+ if (auto MarkLive = Ctx->getMarkLivePass(G->getTargetTriple()))
Config.PrePrunePasses.push_back(std::move(MarkLive));
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
@@ -674,14 +685,14 @@ void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx) {
});
// Add GOT/Stubs optimizer pass.
- Config.PostAllocationPasses.push_back(optimizeMachO_x86_64_GOTAndStubs);
+ Config.PreFixupPasses.push_back(optimizeMachO_x86_64_GOTAndStubs);
}
- if (auto Err = Ctx->modifyPassConfig(TT, Config))
+ if (auto Err = Ctx->modifyPassConfig(G->getTargetTriple(), Config))
return Ctx->notifyFailed(std::move(Err));
// Construct a JITLinker and run the link function.
- MachOJITLinker_x86_64::link(std::move(Ctx), std::move(Config));
+ MachOJITLinker_x86_64::link(std::move(Ctx), std::move(G), std::move(Config));
}
StringRef getMachOX86RelocationKindName(Edge::Kind R) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index 83b64b5171c0..52e7eda90310 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -102,22 +102,22 @@ class MCJIT : public ExecutionEngine {
}
bool hasModuleBeenAddedButNotLoaded(Module *M) {
- return AddedModules.count(M) != 0;
+ return AddedModules.contains(M);
}
bool hasModuleBeenLoaded(Module *M) {
// If the module is in either the "loaded" or "finalized" sections it
// has been loaded.
- return (LoadedModules.count(M) != 0 ) || (FinalizedModules.count(M) != 0);
+ return LoadedModules.contains(M) || FinalizedModules.contains(M);
}
bool hasModuleBeenFinalized(Module *M) {
- return FinalizedModules.count(M) != 0;
+ return FinalizedModules.contains(M);
}
bool ownsModule(Module* M) {
- return (AddedModules.count(M) != 0) || (LoadedModules.count(M) != 0) ||
- (FinalizedModules.count(M) != 0);
+ return AddedModules.contains(M) || LoadedModules.contains(M) ||
+ FinalizedModules.contains(M);
}
void markModuleAsLoaded(Module *M) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index 9e38dc36faae..68878f6729e9 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -73,22 +73,21 @@ class PartitioningIRMaterializationUnit : public IRMaterializationUnit {
public:
PartitioningIRMaterializationUnit(ExecutionSession &ES,
const IRSymbolMapper::ManglingOptions &MO,
- ThreadSafeModule TSM, VModuleKey K,
+ ThreadSafeModule TSM,
CompileOnDemandLayer &Parent)
- : IRMaterializationUnit(ES, MO, std::move(TSM), std::move(K)),
- Parent(Parent) {}
+ : IRMaterializationUnit(ES, MO, std::move(TSM)), Parent(Parent) {}
PartitioningIRMaterializationUnit(
- ThreadSafeModule TSM, VModuleKey K, SymbolFlagsMap SymbolFlags,
+ ThreadSafeModule TSM, SymbolFlagsMap SymbolFlags,
SymbolStringPtr InitSymbol, SymbolNameToDefinitionMap SymbolToDefinition,
CompileOnDemandLayer &Parent)
- : IRMaterializationUnit(std::move(TSM), std::move(K),
- std::move(SymbolFlags), std::move(InitSymbol),
+ : IRMaterializationUnit(std::move(TSM), std::move(SymbolFlags),
+ std::move(InitSymbol),
std::move(SymbolToDefinition)),
Parent(Parent) {}
private:
- void materialize(MaterializationResponsibility R) override {
+ void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
Parent.emitPartition(std::move(R), std::move(TSM),
std::move(SymbolToDefinition));
}
@@ -128,15 +127,15 @@ void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) {
void CompileOnDemandLayer::setImplMap(ImplSymbolMap *Imp) {
this->AliaseeImpls = Imp;
}
-void CompileOnDemandLayer::emit(MaterializationResponsibility R,
- ThreadSafeModule TSM) {
+void CompileOnDemandLayer::emit(
+ std::unique_ptr<MaterializationResponsibility> R, ThreadSafeModule TSM) {
assert(TSM && "Null module");
auto &ES = getExecutionSession();
// Sort the callables and non-callables, build re-exports and lodge the
// actual module with the implementation dylib.
- auto &PDR = getPerDylibResources(R.getTargetJITDylib());
+ auto &PDR = getPerDylibResources(R->getTargetJITDylib());
SymbolAliasMap NonCallables;
SymbolAliasMap Callables;
@@ -145,7 +144,7 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R,
cleanUpModule(M);
});
- for (auto &KV : R.getSymbols()) {
+ for (auto &KV : R->getSymbols()) {
auto &Name = KV.first;
auto &Flags = KV.second;
if (Flags.isCallable())
@@ -158,19 +157,29 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R,
// implementation dylib.
if (auto Err = PDR.getImplDylib().define(
std::make_unique<PartitioningIRMaterializationUnit>(
- ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(),
- *this))) {
+ ES, *getManglingOptions(), std::move(TSM), *this))) {
ES.reportError(std::move(Err));
- R.failMaterialization();
+ R->failMaterialization();
return;
}
if (!NonCallables.empty())
- R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables),
- JITDylibLookupFlags::MatchAllSymbols));
- if (!Callables.empty())
- R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(),
- std::move(Callables), AliaseeImpls));
+ if (auto Err =
+ R->replace(reexports(PDR.getImplDylib(), std::move(NonCallables),
+ JITDylibLookupFlags::MatchAllSymbols))) {
+ getExecutionSession().reportError(std::move(Err));
+ R->failMaterialization();
+ return;
+ }
+ if (!Callables.empty()) {
+ if (auto Err = R->replace(
+ lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(),
+ std::move(Callables), AliaseeImpls))) {
+ getExecutionSession().reportError(std::move(Err));
+ R->failMaterialization();
+ return;
+ }
+ }
}
CompileOnDemandLayer::PerDylibResources &
@@ -247,7 +256,7 @@ void CompileOnDemandLayer::expandPartition(GlobalValueSet &Partition) {
}
void CompileOnDemandLayer::emitPartition(
- MaterializationResponsibility R, ThreadSafeModule TSM,
+ std::unique_ptr<MaterializationResponsibility> R, ThreadSafeModule TSM,
IRMaterializationUnit::SymbolNameToDefinitionMap Defs) {
// FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the
@@ -257,8 +266,8 @@ void CompileOnDemandLayer::emitPartition(
auto &ES = getExecutionSession();
GlobalValueSet RequestedGVs;
- for (auto &Name : R.getRequestedSymbols()) {
- if (Name == R.getInitializerSymbol())
+ for (auto &Name : R->getRequestedSymbols()) {
+ if (Name == R->getInitializerSymbol())
TSM.withModuleDo([&](Module &M) {
for (auto &GV : getStaticInitGVs(M))
RequestedGVs.insert(&GV);
@@ -285,9 +294,14 @@ void CompileOnDemandLayer::emitPartition(
// If the partition is empty, return the whole module to the symbol table.
if (GVsToExtract->empty()) {
- R.replace(std::make_unique<PartitioningIRMaterializationUnit>(
- std::move(TSM), R.getVModuleKey(), R.getSymbols(),
- R.getInitializerSymbol(), std::move(Defs), *this));
+ if (auto Err =
+ R->replace(std::make_unique<PartitioningIRMaterializationUnit>(
+ std::move(TSM), R->getSymbols(), R->getInitializerSymbol(),
+ std::move(Defs), *this))) {
+ getExecutionSession().reportError(std::move(Err));
+ R->failMaterialization();
+ return;
+ }
return;
}
@@ -308,7 +322,7 @@ void CompileOnDemandLayer::emitPartition(
IRSymbolMapper::add(ES, *getManglingOptions(),
PromotedGlobals, SymbolFlags);
- if (auto Err = R.defineMaterializing(SymbolFlags))
+ if (auto Err = R->defineMaterializing(SymbolFlags))
return std::move(Err);
}
@@ -348,12 +362,16 @@ void CompileOnDemandLayer::emitPartition(
if (!ExtractedTSM) {
ES.reportError(ExtractedTSM.takeError());
- R.failMaterialization();
+ R->failMaterialization();
return;
}
- R.replace(std::make_unique<PartitioningIRMaterializationUnit>(
- ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), *this));
+ if (auto Err = R->replace(std::make_unique<PartitioningIRMaterializationUnit>(
+ ES, *getManglingOptions(), std::move(TSM), *this))) {
+ ES.reportError(std::move(Err));
+ R->failMaterialization();
+ return;
+ }
BaseLayer.emit(std::move(R), std::move(*ExtractedTSM));
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
index bad13cfebbc6..dfb558808c32 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -11,18 +11,19 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
-#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
#include <condition_variable>
-#if LLVM_ENABLE_THREADS
#include <future>
-#endif
#define DEBUG_TYPE "orc"
namespace llvm {
namespace orc {
+char ResourceTrackerDefunct::ID = 0;
char FailedToMaterialize::ID = 0;
char SymbolsNotFound::ID = 0;
char SymbolsCouldNotBeRemoved::ID = 0;
@@ -34,6 +35,45 @@ RegisterDependenciesFunction NoDependenciesToRegister =
void MaterializationUnit::anchor() {}
+ResourceTracker::ResourceTracker(JITDylibSP JD) {
+ assert((reinterpret_cast<uintptr_t>(JD.get()) & 0x1) == 0 &&
+ "JITDylib must be two byte aligned");
+ JD->Retain();
+ JDAndFlag.store(reinterpret_cast<uintptr_t>(JD.get()));
+}
+
+ResourceTracker::~ResourceTracker() {
+ getJITDylib().getExecutionSession().destroyResourceTracker(*this);
+ getJITDylib().Release();
+}
+
+Error ResourceTracker::remove() {
+ return getJITDylib().getExecutionSession().removeResourceTracker(*this);
+}
+
+void ResourceTracker::transferTo(ResourceTracker &DstRT) {
+ getJITDylib().getExecutionSession().transferResourceTracker(DstRT, *this);
+}
+
+void ResourceTracker::makeDefunct() {
+ uintptr_t Val = JDAndFlag.load();
+ Val |= 0x1U;
+ JDAndFlag.store(Val);
+}
+
+ResourceManager::~ResourceManager() {}
+
+ResourceTrackerDefunct::ResourceTrackerDefunct(ResourceTrackerSP RT)
+ : RT(std::move(RT)) {}
+
+std::error_code ResourceTrackerDefunct::convertToErrorCode() const {
+ return orcError(OrcErrorCode::UnknownORCError);
+}
+
+void ResourceTrackerDefunct::log(raw_ostream &OS) const {
+ OS << "Resource tracker " << (void *)RT.get() << " became defunct";
+}
+
FailedToMaterialize::FailedToMaterialize(
std::shared_ptr<SymbolDependenceMap> Symbols)
: Symbols(std::move(Symbols)) {
@@ -137,8 +177,6 @@ void AsynchronousSymbolQuery::handleComplete() {
TmpNotifyComplete(std::move(ResolvedSymbols));
}
-bool AsynchronousSymbolQuery::canStillFail() { return !!NotifyComplete; }
-
void AsynchronousSymbolQuery::handleFailed(Error Err) {
assert(QueryRegistrations.empty() && ResolvedSymbols.empty() &&
OutstandingSymbolsCount == 0 &&
@@ -181,156 +219,9 @@ void AsynchronousSymbolQuery::detach() {
QueryRegistrations.clear();
}
-MaterializationResponsibility::~MaterializationResponsibility() {
- assert(SymbolFlags.empty() &&
- "All symbols should have been explicitly materialized or failed");
-}
-
-SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const {
- return JD->getRequestedSymbols(SymbolFlags);
-}
-
-Error MaterializationResponsibility::notifyResolved(const SymbolMap &Symbols) {
- LLVM_DEBUG({
- dbgs() << "In " << JD->getName() << " resolving " << Symbols << "\n";
- });
-#ifndef NDEBUG
- for (auto &KV : Symbols) {
- auto WeakFlags = JITSymbolFlags::Weak | JITSymbolFlags::Common;
- auto I = SymbolFlags.find(KV.first);
- assert(I != SymbolFlags.end() &&
- "Resolving symbol outside this responsibility set");
- assert(!I->second.hasMaterializationSideEffectsOnly() &&
- "Can't resolve materialization-side-effects-only symbol");
- assert((KV.second.getFlags() & ~WeakFlags) == (I->second & ~WeakFlags) &&
- "Resolving symbol with incorrect flags");
- }
-#endif
-
- return JD->resolve(Symbols);
-}
-
-Error MaterializationResponsibility::notifyEmitted() {
-
- LLVM_DEBUG({
- dbgs() << "In " << JD->getName() << " emitting " << SymbolFlags << "\n";
- });
-
- if (auto Err = JD->emit(SymbolFlags))
- return Err;
-
- SymbolFlags.clear();
- return Error::success();
-}
-
-Error MaterializationResponsibility::defineMaterializing(
- SymbolFlagsMap NewSymbolFlags) {
-
- LLVM_DEBUG({
- dbgs() << "In " << JD->getName() << " defining materializing symbols "
- << NewSymbolFlags << "\n";
- });
- if (auto AcceptedDefs = JD->defineMaterializing(std::move(NewSymbolFlags))) {
- // Add all newly accepted symbols to this responsibility object.
- for (auto &KV : *AcceptedDefs)
- SymbolFlags.insert(KV);
- return Error::success();
- } else
- return AcceptedDefs.takeError();
-}
-
-void MaterializationResponsibility::failMaterialization() {
-
- LLVM_DEBUG({
- dbgs() << "In " << JD->getName() << " failing materialization for "
- << SymbolFlags << "\n";
- });
-
- JITDylib::FailedSymbolsWorklist Worklist;
-
- for (auto &KV : SymbolFlags)
- Worklist.push_back(std::make_pair(JD.get(), KV.first));
- SymbolFlags.clear();
-
- JD->notifyFailed(std::move(Worklist));
-}
-
-void MaterializationResponsibility::replace(
- std::unique_ptr<MaterializationUnit> MU) {
-
- // If the replacement MU is empty then return.
- if (MU->getSymbols().empty())
- return;
-
- for (auto &KV : MU->getSymbols()) {
- assert(SymbolFlags.count(KV.first) &&
- "Replacing definition outside this responsibility set");
- SymbolFlags.erase(KV.first);
- }
-
- if (MU->getInitializerSymbol() == InitSymbol)
- InitSymbol = nullptr;
-
- LLVM_DEBUG(JD->getExecutionSession().runSessionLocked([&]() {
- dbgs() << "In " << JD->getName() << " replacing symbols with " << *MU
- << "\n";
- }););
-
- JD->replace(std::move(MU));
-}
-
-MaterializationResponsibility
-MaterializationResponsibility::delegate(const SymbolNameSet &Symbols,
- VModuleKey NewKey) {
-
- if (NewKey == VModuleKey())
- NewKey = K;
-
- SymbolStringPtr DelegatedInitSymbol;
- SymbolFlagsMap DelegatedFlags;
-
- for (auto &Name : Symbols) {
- auto I = SymbolFlags.find(Name);
- assert(I != SymbolFlags.end() &&
- "Symbol is not tracked by this MaterializationResponsibility "
- "instance");
-
- DelegatedFlags[Name] = std::move(I->second);
- if (Name == InitSymbol)
- std::swap(InitSymbol, DelegatedInitSymbol);
-
- SymbolFlags.erase(I);
- }
-
- return MaterializationResponsibility(JD, std::move(DelegatedFlags),
- std::move(DelegatedInitSymbol),
- std::move(NewKey));
-}
-
-void MaterializationResponsibility::addDependencies(
- const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies) {
- LLVM_DEBUG({
- dbgs() << "Adding dependencies for " << Name << ": " << Dependencies
- << "\n";
- });
- assert(SymbolFlags.count(Name) &&
- "Symbol not covered by this MaterializationResponsibility instance");
- JD->addDependencies(Name, Dependencies);
-}
-
-void MaterializationResponsibility::addDependenciesForAll(
- const SymbolDependenceMap &Dependencies) {
- LLVM_DEBUG({
- dbgs() << "Adding dependencies for all symbols in " << SymbolFlags << ": "
- << Dependencies << "\n";
- });
- for (auto &KV : SymbolFlags)
- JD->addDependencies(KV.first, Dependencies);
-}
-
AbsoluteSymbolsMaterializationUnit::AbsoluteSymbolsMaterializationUnit(
- SymbolMap Symbols, VModuleKey K)
- : MaterializationUnit(extractFlags(Symbols), nullptr, std::move(K)),
+ SymbolMap Symbols)
+ : MaterializationUnit(extractFlags(Symbols), nullptr),
Symbols(std::move(Symbols)) {}
StringRef AbsoluteSymbolsMaterializationUnit::getName() const {
@@ -338,10 +229,10 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const {
}
void AbsoluteSymbolsMaterializationUnit::materialize(
- MaterializationResponsibility R) {
+ std::unique_ptr<MaterializationResponsibility> R) {
// No dependencies, so these calls can't fail.
- cantFail(R.notifyResolved(Symbols));
- cantFail(R.notifyEmitted());
+ cantFail(R->notifyResolved(Symbols));
+ cantFail(R->notifyEmitted());
}
void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD,
@@ -360,26 +251,25 @@ AbsoluteSymbolsMaterializationUnit::extractFlags(const SymbolMap &Symbols) {
ReExportsMaterializationUnit::ReExportsMaterializationUnit(
JITDylib *SourceJD, JITDylibLookupFlags SourceJDLookupFlags,
- SymbolAliasMap Aliases, VModuleKey K)
- : MaterializationUnit(extractFlags(Aliases), nullptr, std::move(K)),
- SourceJD(SourceJD), SourceJDLookupFlags(SourceJDLookupFlags),
- Aliases(std::move(Aliases)) {}
+ SymbolAliasMap Aliases)
+ : MaterializationUnit(extractFlags(Aliases), nullptr), SourceJD(SourceJD),
+ SourceJDLookupFlags(SourceJDLookupFlags), Aliases(std::move(Aliases)) {}
StringRef ReExportsMaterializationUnit::getName() const {
return "<Reexports>";
}
void ReExportsMaterializationUnit::materialize(
- MaterializationResponsibility R) {
+ std::unique_ptr<MaterializationResponsibility> R) {
- auto &ES = R.getTargetJITDylib().getExecutionSession();
- JITDylib &TgtJD = R.getTargetJITDylib();
+ auto &ES = R->getTargetJITDylib().getExecutionSession();
+ JITDylib &TgtJD = R->getTargetJITDylib();
JITDylib &SrcJD = SourceJD ? *SourceJD : TgtJD;
// Find the set of requested aliases and aliasees. Return any unrequested
// aliases back to the JITDylib so as to not prematurely materialize any
// aliasees.
- auto RequestedSymbols = R.getRequestedSymbols();
+ auto RequestedSymbols = R->getRequestedSymbols();
SymbolAliasMap RequestedAliases;
for (auto &Name : RequestedSymbols) {
@@ -398,19 +288,27 @@ void ReExportsMaterializationUnit::materialize(
});
if (!Aliases.empty()) {
- if (SourceJD)
- R.replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags));
- else
- R.replace(symbolAliases(std::move(Aliases)));
+ auto Err = SourceJD ? R->replace(reexports(*SourceJD, std::move(Aliases),
+ SourceJDLookupFlags))
+ : R->replace(symbolAliases(std::move(Aliases)));
+
+ if (Err) {
+ // FIXME: Should this be reported / treated as failure to materialize?
+ // Or should this be treated as a sanctioned bailing-out?
+ ES.reportError(std::move(Err));
+ R->failMaterialization();
+ return;
+ }
}
// The OnResolveInfo struct will hold the aliases and responsibilty for each
// query in the list.
struct OnResolveInfo {
- OnResolveInfo(MaterializationResponsibility R, SymbolAliasMap Aliases)
+ OnResolveInfo(std::unique_ptr<MaterializationResponsibility> R,
+ SymbolAliasMap Aliases)
: R(std::move(R)), Aliases(std::move(Aliases)) {}
- MaterializationResponsibility R;
+ std::unique_ptr<MaterializationResponsibility> R;
SymbolAliasMap Aliases;
};
@@ -450,8 +348,15 @@ void ReExportsMaterializationUnit::materialize(
assert(!QuerySymbols.empty() && "Alias cycle detected!");
- auto QueryInfo = std::make_shared<OnResolveInfo>(
- R.delegate(ResponsibilitySymbols), std::move(QueryAliases));
+ auto NewR = R->delegate(ResponsibilitySymbols);
+ if (!NewR) {
+ ES.reportError(NewR.takeError());
+ R->failMaterialization();
+ return;
+ }
+
+ auto QueryInfo = std::make_shared<OnResolveInfo>(std::move(*NewR),
+ std::move(QueryAliases));
QueryInfos.push_back(
make_pair(std::move(QuerySymbols), std::move(QueryInfo)));
}
@@ -480,12 +385,12 @@ void ReExportsMaterializationUnit::materialize(
for (auto &KV : QueryInfo->Aliases)
if (SrcJDDeps.count(KV.second.Aliasee)) {
PerAliasDeps = {KV.second.Aliasee};
- QueryInfo->R.addDependencies(KV.first, PerAliasDepsMap);
+ QueryInfo->R->addDependencies(KV.first, PerAliasDepsMap);
}
};
auto OnComplete = [QueryInfo](Expected<SymbolMap> Result) {
- auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession();
+ auto &ES = QueryInfo->R->getTargetJITDylib().getExecutionSession();
if (Result) {
SymbolMap ResolutionMap;
for (auto &KV : QueryInfo->Aliases) {
@@ -499,19 +404,19 @@ void ReExportsMaterializationUnit::materialize(
ResolutionMap[KV.first] = JITEvaluatedSymbol(
(*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags);
}
- if (auto Err = QueryInfo->R.notifyResolved(ResolutionMap)) {
+ if (auto Err = QueryInfo->R->notifyResolved(ResolutionMap)) {
ES.reportError(std::move(Err));
- QueryInfo->R.failMaterialization();
+ QueryInfo->R->failMaterialization();
return;
}
- if (auto Err = QueryInfo->R.notifyEmitted()) {
+ if (auto Err = QueryInfo->R->notifyEmitted()) {
ES.reportError(std::move(Err));
- QueryInfo->R.failMaterialization();
+ QueryInfo->R->failMaterialization();
return;
}
} else {
ES.reportError(Result.takeError());
- QueryInfo->R.failMaterialization();
+ QueryInfo->R->failMaterialization();
}
};
@@ -538,20 +443,16 @@ ReExportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) {
return SymbolFlags;
}
-Expected<SymbolAliasMap>
-buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) {
+Expected<SymbolAliasMap> buildSimpleReexportsAliasMap(JITDylib &SourceJD,
+ SymbolNameSet Symbols) {
SymbolLookupSet LookupSet(Symbols);
- auto Flags = SourceJD.lookupFlags(
- LookupKind::Static, JITDylibLookupFlags::MatchAllSymbols, LookupSet);
+ auto Flags = SourceJD.getExecutionSession().lookupFlags(
+ LookupKind::Static, {{&SourceJD, JITDylibLookupFlags::MatchAllSymbols}},
+ SymbolLookupSet(std::move(Symbols)));
if (!Flags)
return Flags.takeError();
- if (!LookupSet.empty()) {
- LookupSet.sortByName();
- return make_error<SymbolsNotFound>(LookupSet.getSymbolNames());
- }
-
SymbolAliasMap Result;
for (auto &Name : Symbols) {
assert(Flags->count(Name) && "Missing entry in flags map");
@@ -561,19 +462,100 @@ buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) {
return Result;
}
+class InProgressLookupState {
+public:
+ InProgressLookupState(LookupKind K, JITDylibSearchOrder SearchOrder,
+ SymbolLookupSet LookupSet, SymbolState RequiredState)
+ : K(K), SearchOrder(std::move(SearchOrder)),
+ LookupSet(std::move(LookupSet)), RequiredState(RequiredState) {
+ DefGeneratorCandidates = this->LookupSet;
+ }
+ virtual ~InProgressLookupState() {}
+ virtual void complete(std::unique_ptr<InProgressLookupState> IPLS) = 0;
+ virtual void fail(Error Err) = 0;
+
+ LookupKind K;
+ JITDylibSearchOrder SearchOrder;
+ SymbolLookupSet LookupSet;
+ SymbolState RequiredState;
+
+ std::unique_lock<std::mutex> GeneratorLock;
+ size_t CurSearchOrderIndex = 0;
+ bool NewJITDylib = true;
+ SymbolLookupSet DefGeneratorCandidates;
+ SymbolLookupSet DefGeneratorNonCandidates;
+ std::vector<std::weak_ptr<DefinitionGenerator>> CurDefGeneratorStack;
+};
+
+class InProgressLookupFlagsState : public InProgressLookupState {
+public:
+ InProgressLookupFlagsState(
+ LookupKind K, JITDylibSearchOrder SearchOrder, SymbolLookupSet LookupSet,
+ unique_function<void(Expected<SymbolFlagsMap>)> OnComplete)
+ : InProgressLookupState(K, std::move(SearchOrder), std::move(LookupSet),
+ SymbolState::NeverSearched),
+ OnComplete(std::move(OnComplete)) {}
+
+ void complete(std::unique_ptr<InProgressLookupState> IPLS) override {
+ GeneratorLock = {}; // Unlock and release.
+ auto &ES = SearchOrder.front().first->getExecutionSession();
+ ES.OL_completeLookupFlags(std::move(IPLS), std::move(OnComplete));
+ }
+
+ void fail(Error Err) override {
+ GeneratorLock = {}; // Unlock and release.
+ OnComplete(std::move(Err));
+ }
+
+private:
+ unique_function<void(Expected<SymbolFlagsMap>)> OnComplete;
+};
+
+class InProgressFullLookupState : public InProgressLookupState {
+public:
+ InProgressFullLookupState(LookupKind K, JITDylibSearchOrder SearchOrder,
+ SymbolLookupSet LookupSet,
+ SymbolState RequiredState,
+ std::shared_ptr<AsynchronousSymbolQuery> Q,
+ RegisterDependenciesFunction RegisterDependencies)
+ : InProgressLookupState(K, std::move(SearchOrder), std::move(LookupSet),
+ RequiredState),
+ Q(std::move(Q)), RegisterDependencies(std::move(RegisterDependencies)) {
+ }
+
+ void complete(std::unique_ptr<InProgressLookupState> IPLS) override {
+ GeneratorLock = {}; // Unlock and release.
+ auto &ES = SearchOrder.front().first->getExecutionSession();
+ ES.OL_completeLookup(std::move(IPLS), std::move(Q),
+ std::move(RegisterDependencies));
+ }
+
+ void fail(Error Err) override {
+ GeneratorLock = {};
+ Q->detach();
+ Q->handleFailed(std::move(Err));
+ }
+
+private:
+ std::shared_ptr<AsynchronousSymbolQuery> Q;
+ RegisterDependenciesFunction RegisterDependencies;
+};
+
ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD,
JITDylibLookupFlags SourceJDLookupFlags,
SymbolPredicate Allow)
: SourceJD(SourceJD), SourceJDLookupFlags(SourceJDLookupFlags),
Allow(std::move(Allow)) {}
-Error ReexportsGenerator::tryToGenerate(LookupKind K, JITDylib &JD,
+Error ReexportsGenerator::tryToGenerate(LookupState &LS, LookupKind K,
+ JITDylib &JD,
JITDylibLookupFlags JDLookupFlags,
const SymbolLookupSet &LookupSet) {
assert(&JD != &SourceJD && "Cannot re-export from the same dylib");
// Use lookupFlags to find the subset of symbols that match our lookup.
- auto Flags = SourceJD.lookupFlags(K, JDLookupFlags, LookupSet);
+ auto Flags = JD.getExecutionSession().lookupFlags(
+ K, {{&SourceJD, JDLookupFlags}}, LookupSet);
if (!Flags)
return Flags.takeError();
@@ -590,19 +572,63 @@ Error ReexportsGenerator::tryToGenerate(LookupKind K, JITDylib &JD,
return JD.define(reexports(SourceJD, AliasMap, SourceJDLookupFlags));
}
-JITDylib::DefinitionGenerator::~DefinitionGenerator() {}
+LookupState::LookupState(std::unique_ptr<InProgressLookupState> IPLS)
+ : IPLS(std::move(IPLS)) {}
-void JITDylib::removeGenerator(DefinitionGenerator &G) {
+void LookupState::reset(InProgressLookupState *IPLS) { this->IPLS.reset(IPLS); }
+
+LookupState::LookupState() = default;
+LookupState::LookupState(LookupState &&) = default;
+LookupState &LookupState::operator=(LookupState &&) = default;
+LookupState::~LookupState() = default;
+
+void LookupState::continueLookup(Error Err) {
+ assert(IPLS && "Cannot call continueLookup on empty LookupState");
+ auto &ES = IPLS->SearchOrder.begin()->first->getExecutionSession();
+ ES.OL_applyQueryPhase1(std::move(IPLS), std::move(Err));
+}
+
+DefinitionGenerator::~DefinitionGenerator() {}
+
+Error JITDylib::clear() {
+ std::vector<ResourceTrackerSP> TrackersToRemove;
ES.runSessionLocked([&]() {
- auto I = std::find_if(DefGenerators.begin(), DefGenerators.end(),
- [&](const std::unique_ptr<DefinitionGenerator> &H) {
- return H.get() == &G;
- });
- assert(I != DefGenerators.end() && "Generator not found");
- DefGenerators.erase(I);
+ for (auto &KV : TrackerSymbols)
+ TrackersToRemove.push_back(KV.first);
+ TrackersToRemove.push_back(getDefaultResourceTracker());
+ });
+
+ Error Err = Error::success();
+ for (auto &RT : TrackersToRemove)
+ Err = joinErrors(std::move(Err), RT->remove());
+ return Err;
+}
+
+ResourceTrackerSP JITDylib::getDefaultResourceTracker() {
+ return ES.runSessionLocked([this] {
+ if (!DefaultTracker)
+ DefaultTracker = new ResourceTracker(this);
+ return DefaultTracker;
+ });
+}
+
+ResourceTrackerSP JITDylib::createResourceTracker() {
+ return ES.runSessionLocked([this] {
+ ResourceTrackerSP RT = new ResourceTracker(this);
+ return RT;
});
}
+void JITDylib::removeGenerator(DefinitionGenerator &G) {
+ std::lock_guard<std::mutex> Lock(GeneratorsMutex);
+ auto I = llvm::find_if(DefGenerators,
+ [&](const std::shared_ptr<DefinitionGenerator> &H) {
+ return H.get() == &G;
+ });
+ assert(I != DefGenerators.end() && "Generator not found");
+ DefGenerators.erase(I);
+}
+
Expected<SymbolFlagsMap>
JITDylib::defineMaterializing(SymbolFlagsMap SymbolFlags) {
@@ -652,11 +678,18 @@ JITDylib::defineMaterializing(SymbolFlagsMap SymbolFlags) {
});
}
-void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
+Error JITDylib::replace(MaterializationResponsibility &FromMR,
+ std::unique_ptr<MaterializationUnit> MU) {
assert(MU != nullptr && "Can not replace with a null MaterializationUnit");
+ std::unique_ptr<MaterializationUnit> MustRunMU;
+ std::unique_ptr<MaterializationResponsibility> MustRunMR;
+
+ auto Err =
+ ES.runSessionLocked([&, this]() -> Error {
+ auto RT = getTracker(FromMR);
- auto MustRunMU =
- ES.runSessionLocked([&, this]() -> std::unique_ptr<MaterializationUnit> {
+ if (RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(std::move(RT));
#ifndef NDEBUG
for (auto &KV : MU->getSymbols()) {
@@ -671,18 +704,27 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
}
#endif // NDEBUG
+ // If the tracker is defunct we need to bail out immediately.
+
// If any symbol has pending queries against it then we need to
// materialize MU immediately.
for (auto &KV : MU->getSymbols()) {
auto MII = MaterializingInfos.find(KV.first);
if (MII != MaterializingInfos.end()) {
- if (MII->second.hasQueriesPending())
- return std::move(MU);
+ if (MII->second.hasQueriesPending()) {
+ MustRunMR = ES.createMaterializationResponsibility(
+ *RT, std::move(MU->SymbolFlags), std::move(MU->InitSymbol));
+ MustRunMU = std::move(MU);
+ return Error::success();
+ }
}
}
// Otherwise, make MU responsible for all the symbols.
- auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
+ auto RTI = MRTrackers.find(&FromMR);
+ assert(RTI != MRTrackers.end() && "No tracker for FromMR");
+ auto UMI =
+ std::make_shared<UnmaterializedInfo>(std::move(MU), RTI->second);
for (auto &KV : UMI->MU->getSymbols()) {
auto SymI = Symbols.find(KV.first);
assert(SymI->second.getState() == SymbolState::Materializing &&
@@ -700,14 +742,36 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
UMIEntry = UMI;
}
- return nullptr;
+ return Error::success();
});
+ if (Err)
+ return Err;
+
if (MustRunMU) {
- auto MR =
- MustRunMU->createMaterializationResponsibility(shared_from_this());
- ES.dispatchMaterialization(std::move(MustRunMU), std::move(MR));
+ assert(MustRunMR && "MustRunMU set implies MustRunMR set");
+ ES.dispatchMaterialization(std::move(MustRunMU), std::move(MustRunMR));
+ } else {
+ assert(!MustRunMR && "MustRunMU unset implies MustRunMR unset");
}
+
+ return Error::success();
+}
+
+Expected<std::unique_ptr<MaterializationResponsibility>>
+JITDylib::delegate(MaterializationResponsibility &FromMR,
+ SymbolFlagsMap SymbolFlags, SymbolStringPtr InitSymbol) {
+
+ return ES.runSessionLocked(
+ [&]() -> Expected<std::unique_ptr<MaterializationResponsibility>> {
+ auto RT = getTracker(FromMR);
+
+ if (RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(std::move(RT));
+
+ return ES.createMaterializationResponsibility(
+ *RT, std::move(SymbolFlags), std::move(InitSymbol));
+ });
}
SymbolNameSet
@@ -808,89 +872,93 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name,
Symbols[Name].setFlags(Symbols[Name].getFlags() | JITSymbolFlags::HasError);
}
-Error JITDylib::resolve(const SymbolMap &Resolved) {
- SymbolNameSet SymbolsInErrorState;
+Error JITDylib::resolve(MaterializationResponsibility &MR,
+ const SymbolMap &Resolved) {
AsynchronousSymbolQuerySet CompletedQueries;
- ES.runSessionLocked([&, this]() {
- struct WorklistEntry {
- SymbolTable::iterator SymI;
- JITEvaluatedSymbol ResolvedSym;
- };
-
- std::vector<WorklistEntry> Worklist;
- Worklist.reserve(Resolved.size());
+ if (auto Err = ES.runSessionLocked([&, this]() -> Error {
+ auto RTI = MRTrackers.find(&MR);
+ assert(RTI != MRTrackers.end() && "No resource tracker for MR?");
+ if (RTI->second->isDefunct())
+ return make_error<ResourceTrackerDefunct>(RTI->second);
- // Build worklist and check for any symbols in the error state.
- for (const auto &KV : Resolved) {
+ struct WorklistEntry {
+ SymbolTable::iterator SymI;
+ JITEvaluatedSymbol ResolvedSym;
+ };
- assert(!KV.second.getFlags().hasError() &&
- "Resolution result can not have error flag set");
+ SymbolNameSet SymbolsInErrorState;
+ std::vector<WorklistEntry> Worklist;
+ Worklist.reserve(Resolved.size());
- auto SymI = Symbols.find(KV.first);
+ // Build worklist and check for any symbols in the error state.
+ for (const auto &KV : Resolved) {
- assert(SymI != Symbols.end() && "Symbol not found");
- assert(!SymI->second.hasMaterializerAttached() &&
- "Resolving symbol with materializer attached?");
- assert(SymI->second.getState() == SymbolState::Materializing &&
- "Symbol should be materializing");
- assert(SymI->second.getAddress() == 0 &&
- "Symbol has already been resolved");
+ assert(!KV.second.getFlags().hasError() &&
+ "Resolution result can not have error flag set");
- if (SymI->second.getFlags().hasError())
- SymbolsInErrorState.insert(KV.first);
- else {
- auto Flags = KV.second.getFlags();
- Flags &= ~(JITSymbolFlags::Weak | JITSymbolFlags::Common);
- assert(Flags == (SymI->second.getFlags() &
- ~(JITSymbolFlags::Weak | JITSymbolFlags::Common)) &&
- "Resolved flags should match the declared flags");
-
- Worklist.push_back(
- {SymI, JITEvaluatedSymbol(KV.second.getAddress(), Flags)});
- }
- }
+ auto SymI = Symbols.find(KV.first);
- // If any symbols were in the error state then bail out.
- if (!SymbolsInErrorState.empty())
- return;
+ assert(SymI != Symbols.end() && "Symbol not found");
+ assert(!SymI->second.hasMaterializerAttached() &&
+ "Resolving symbol with materializer attached?");
+ assert(SymI->second.getState() == SymbolState::Materializing &&
+ "Symbol should be materializing");
+ assert(SymI->second.getAddress() == 0 &&
+ "Symbol has already been resolved");
+
+ if (SymI->second.getFlags().hasError())
+ SymbolsInErrorState.insert(KV.first);
+ else {
+ auto Flags = KV.second.getFlags();
+ Flags &= ~(JITSymbolFlags::Weak | JITSymbolFlags::Common);
+ assert(Flags ==
+ (SymI->second.getFlags() &
+ ~(JITSymbolFlags::Weak | JITSymbolFlags::Common)) &&
+ "Resolved flags should match the declared flags");
+
+ Worklist.push_back(
+ {SymI, JITEvaluatedSymbol(KV.second.getAddress(), Flags)});
+ }
+ }
- while (!Worklist.empty()) {
- auto SymI = Worklist.back().SymI;
- auto ResolvedSym = Worklist.back().ResolvedSym;
- Worklist.pop_back();
+ // If any symbols were in the error state then bail out.
+ if (!SymbolsInErrorState.empty()) {
+ auto FailedSymbolsDepMap = std::make_shared<SymbolDependenceMap>();
+ (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState);
+ return make_error<FailedToMaterialize>(
+ std::move(FailedSymbolsDepMap));
+ }
- auto &Name = SymI->first;
+ while (!Worklist.empty()) {
+ auto SymI = Worklist.back().SymI;
+ auto ResolvedSym = Worklist.back().ResolvedSym;
+ Worklist.pop_back();
- // Resolved symbols can not be weak: discard the weak flag.
- JITSymbolFlags ResolvedFlags = ResolvedSym.getFlags();
- SymI->second.setAddress(ResolvedSym.getAddress());
- SymI->second.setFlags(ResolvedFlags);
- SymI->second.setState(SymbolState::Resolved);
+ auto &Name = SymI->first;
- auto MII = MaterializingInfos.find(Name);
- if (MII == MaterializingInfos.end())
- continue;
+ // Resolved symbols can not be weak: discard the weak flag.
+ JITSymbolFlags ResolvedFlags = ResolvedSym.getFlags();
+ SymI->second.setAddress(ResolvedSym.getAddress());
+ SymI->second.setFlags(ResolvedFlags);
+ SymI->second.setState(SymbolState::Resolved);
- auto &MI = MII->second;
- for (auto &Q : MI.takeQueriesMeeting(SymbolState::Resolved)) {
- Q->notifySymbolMetRequiredState(Name, ResolvedSym);
- Q->removeQueryDependence(*this, Name);
- if (Q->isComplete())
- CompletedQueries.insert(std::move(Q));
- }
- }
- });
+ auto MII = MaterializingInfos.find(Name);
+ if (MII == MaterializingInfos.end())
+ continue;
- assert((SymbolsInErrorState.empty() || CompletedQueries.empty()) &&
- "Can't fail symbols and completed queries at the same time");
+ auto &MI = MII->second;
+ for (auto &Q : MI.takeQueriesMeeting(SymbolState::Resolved)) {
+ Q->notifySymbolMetRequiredState(Name, ResolvedSym);
+ Q->removeQueryDependence(*this, Name);
+ if (Q->isComplete())
+ CompletedQueries.insert(std::move(Q));
+ }
+ }
- // If we failed any symbols then return an error.
- if (!SymbolsInErrorState.empty()) {
- auto FailedSymbolsDepMap = std::make_shared<SymbolDependenceMap>();
- (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState);
- return make_error<FailedToMaterialize>(std::move(FailedSymbolsDepMap));
- }
+ return Error::success();
+ }))
+ return Err;
// Otherwise notify all the completed queries.
for (auto &Q : CompletedQueries) {
@@ -901,139 +969,145 @@ Error JITDylib::resolve(const SymbolMap &Resolved) {
return Error::success();
}
-Error JITDylib::emit(const SymbolFlagsMap &Emitted) {
+Error JITDylib::emit(MaterializationResponsibility &MR,
+ const SymbolFlagsMap &Emitted) {
AsynchronousSymbolQuerySet CompletedQueries;
- SymbolNameSet SymbolsInErrorState;
DenseMap<JITDylib *, SymbolNameVector> ReadySymbols;
- ES.runSessionLocked([&, this]() {
- std::vector<SymbolTable::iterator> Worklist;
+ if (auto Err = ES.runSessionLocked([&, this]() -> Error {
+ auto RTI = MRTrackers.find(&MR);
+ assert(RTI != MRTrackers.end() && "No resource tracker for MR?");
+ if (RTI->second->isDefunct())
+ return make_error<ResourceTrackerDefunct>(RTI->second);
- // Scan to build worklist, record any symbols in the erorr state.
- for (const auto &KV : Emitted) {
- auto &Name = KV.first;
+ SymbolNameSet SymbolsInErrorState;
+ std::vector<SymbolTable::iterator> Worklist;
- auto SymI = Symbols.find(Name);
- assert(SymI != Symbols.end() && "No symbol table entry for Name");
+ // Scan to build worklist, record any symbols in the erorr state.
+ for (const auto &KV : Emitted) {
+ auto &Name = KV.first;
- if (SymI->second.getFlags().hasError())
- SymbolsInErrorState.insert(Name);
- else
- Worklist.push_back(SymI);
- }
+ auto SymI = Symbols.find(Name);
+ assert(SymI != Symbols.end() && "No symbol table entry for Name");
- // If any symbols were in the error state then bail out.
- if (!SymbolsInErrorState.empty())
- return;
+ if (SymI->second.getFlags().hasError())
+ SymbolsInErrorState.insert(Name);
+ else
+ Worklist.push_back(SymI);
+ }
- // Otherwise update dependencies and move to the emitted state.
- while (!Worklist.empty()) {
- auto SymI = Worklist.back();
- Worklist.pop_back();
+ // If any symbols were in the error state then bail out.
+ if (!SymbolsInErrorState.empty()) {
+ auto FailedSymbolsDepMap = std::make_shared<SymbolDependenceMap>();
+ (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState);
+ return make_error<FailedToMaterialize>(
+ std::move(FailedSymbolsDepMap));
+ }
- auto &Name = SymI->first;
- auto &SymEntry = SymI->second;
+ // Otherwise update dependencies and move to the emitted state.
+ while (!Worklist.empty()) {
+ auto SymI = Worklist.back();
+ Worklist.pop_back();
- // Move symbol to the emitted state.
- assert(((SymEntry.getFlags().hasMaterializationSideEffectsOnly() &&
- SymEntry.getState() == SymbolState::Materializing) ||
- SymEntry.getState() == SymbolState::Resolved) &&
- "Emitting from state other than Resolved");
- SymEntry.setState(SymbolState::Emitted);
+ auto &Name = SymI->first;
+ auto &SymEntry = SymI->second;
- auto MII = MaterializingInfos.find(Name);
+ // Move symbol to the emitted state.
+ assert(((SymEntry.getFlags().hasMaterializationSideEffectsOnly() &&
+ SymEntry.getState() == SymbolState::Materializing) ||
+ SymEntry.getState() == SymbolState::Resolved) &&
+ "Emitting from state other than Resolved");
+ SymEntry.setState(SymbolState::Emitted);
- // If this symbol has no MaterializingInfo then it's trivially ready.
- // Update its state and continue.
- if (MII == MaterializingInfos.end()) {
- SymEntry.setState(SymbolState::Ready);
- continue;
- }
+ auto MII = MaterializingInfos.find(Name);
+
+ // If this symbol has no MaterializingInfo then it's trivially ready.
+ // Update its state and continue.
+ if (MII == MaterializingInfos.end()) {
+ SymEntry.setState(SymbolState::Ready);
+ continue;
+ }
+
+ auto &MI = MII->second;
+
+ // For each dependant, transfer this node's emitted dependencies to
+ // it. If the dependant node is ready (i.e. has no unemitted
+ // dependencies) then notify any pending queries.
+ for (auto &KV : MI.Dependants) {
+ auto &DependantJD = *KV.first;
+ auto &DependantJDReadySymbols = ReadySymbols[&DependantJD];
+ for (auto &DependantName : KV.second) {
+ auto DependantMII =
+ DependantJD.MaterializingInfos.find(DependantName);
+ assert(DependantMII != DependantJD.MaterializingInfos.end() &&
+ "Dependant should have MaterializingInfo");
+
+ auto &DependantMI = DependantMII->second;
+
+ // Remove the dependant's dependency on this node.
+ assert(DependantMI.UnemittedDependencies.count(this) &&
+ "Dependant does not have an unemitted dependencies record "
+ "for "
+ "this JITDylib");
+ assert(DependantMI.UnemittedDependencies[this].count(Name) &&
+ "Dependant does not count this symbol as a dependency?");
+
+ DependantMI.UnemittedDependencies[this].erase(Name);
+ if (DependantMI.UnemittedDependencies[this].empty())
+ DependantMI.UnemittedDependencies.erase(this);
+
+ // Transfer unemitted dependencies from this node to the
+ // dependant.
+ DependantJD.transferEmittedNodeDependencies(DependantMI,
+ DependantName, MI);
+
+ auto DependantSymI = DependantJD.Symbols.find(DependantName);
+ assert(DependantSymI != DependantJD.Symbols.end() &&
+ "Dependant has no entry in the Symbols table");
+ auto &DependantSymEntry = DependantSymI->second;
+
+ // If the dependant is emitted and this node was the last of its
+ // unemitted dependencies then the dependant node is now ready, so
+ // notify any pending queries on the dependant node.
+ if (DependantSymEntry.getState() == SymbolState::Emitted &&
+ DependantMI.UnemittedDependencies.empty()) {
+ assert(DependantMI.Dependants.empty() &&
+ "Dependants should be empty by now");
+
+ // Since this dependant is now ready, we erase its
+ // MaterializingInfo and update its materializing state.
+ DependantSymEntry.setState(SymbolState::Ready);
+ DependantJDReadySymbols.push_back(DependantName);
+
+ for (auto &Q :
+ DependantMI.takeQueriesMeeting(SymbolState::Ready)) {
+ Q->notifySymbolMetRequiredState(
+ DependantName, DependantSymI->second.getSymbol());
+ if (Q->isComplete())
+ CompletedQueries.insert(Q);
+ Q->removeQueryDependence(DependantJD, DependantName);
+ }
+ }
+ }
+ }
- auto &MI = MII->second;
-
- // For each dependant, transfer this node's emitted dependencies to
- // it. If the dependant node is ready (i.e. has no unemitted
- // dependencies) then notify any pending queries.
- for (auto &KV : MI.Dependants) {
- auto &DependantJD = *KV.first;
- auto &DependantJDReadySymbols = ReadySymbols[&DependantJD];
- for (auto &DependantName : KV.second) {
- auto DependantMII =
- DependantJD.MaterializingInfos.find(DependantName);
- assert(DependantMII != DependantJD.MaterializingInfos.end() &&
- "Dependant should have MaterializingInfo");
-
- auto &DependantMI = DependantMII->second;
-
- // Remove the dependant's dependency on this node.
- assert(DependantMI.UnemittedDependencies.count(this) &&
- "Dependant does not have an unemitted dependencies record for "
- "this JITDylib");
- assert(DependantMI.UnemittedDependencies[this].count(Name) &&
- "Dependant does not count this symbol as a dependency?");
-
- DependantMI.UnemittedDependencies[this].erase(Name);
- if (DependantMI.UnemittedDependencies[this].empty())
- DependantMI.UnemittedDependencies.erase(this);
-
- // Transfer unemitted dependencies from this node to the dependant.
- DependantJD.transferEmittedNodeDependencies(DependantMI,
- DependantName, MI);
-
- auto DependantSymI = DependantJD.Symbols.find(DependantName);
- assert(DependantSymI != DependantJD.Symbols.end() &&
- "Dependant has no entry in the Symbols table");
- auto &DependantSymEntry = DependantSymI->second;
-
- // If the dependant is emitted and this node was the last of its
- // unemitted dependencies then the dependant node is now ready, so
- // notify any pending queries on the dependant node.
- if (DependantSymEntry.getState() == SymbolState::Emitted &&
- DependantMI.UnemittedDependencies.empty()) {
- assert(DependantMI.Dependants.empty() &&
- "Dependants should be empty by now");
-
- // Since this dependant is now ready, we erase its MaterializingInfo
- // and update its materializing state.
- DependantSymEntry.setState(SymbolState::Ready);
- DependantJDReadySymbols.push_back(DependantName);
-
- for (auto &Q : DependantMI.takeQueriesMeeting(SymbolState::Ready)) {
- Q->notifySymbolMetRequiredState(
- DependantName, DependantSymI->second.getSymbol());
+ auto &ThisJDReadySymbols = ReadySymbols[this];
+ MI.Dependants.clear();
+ if (MI.UnemittedDependencies.empty()) {
+ SymI->second.setState(SymbolState::Ready);
+ ThisJDReadySymbols.push_back(Name);
+ for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) {
+ Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
if (Q->isComplete())
CompletedQueries.insert(Q);
- Q->removeQueryDependence(DependantJD, DependantName);
+ Q->removeQueryDependence(*this, Name);
}
}
}
- }
- auto &ThisJDReadySymbols = ReadySymbols[this];
- MI.Dependants.clear();
- if (MI.UnemittedDependencies.empty()) {
- SymI->second.setState(SymbolState::Ready);
- ThisJDReadySymbols.push_back(Name);
- for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) {
- Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
- if (Q->isComplete())
- CompletedQueries.insert(Q);
- Q->removeQueryDependence(*this, Name);
- }
- }
- }
- });
-
- assert((SymbolsInErrorState.empty() || CompletedQueries.empty()) &&
- "Can't fail symbols and completed queries at the same time");
-
- // If we failed any symbols then return an error.
- if (!SymbolsInErrorState.empty()) {
- auto FailedSymbolsDepMap = std::make_shared<SymbolDependenceMap>();
- (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState);
- return make_error<FailedToMaterialize>(std::move(FailedSymbolsDepMap));
- }
+ return Error::success();
+ }))
+ return Err;
// Otherwise notify all the completed queries.
for (auto &Q : CompletedQueries) {
@@ -1044,120 +1118,122 @@ Error JITDylib::emit(const SymbolFlagsMap &Emitted) {
return Error::success();
}
-void JITDylib::notifyFailed(FailedSymbolsWorklist Worklist) {
- AsynchronousSymbolQuerySet FailedQueries;
- auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
-
- // Failing no symbols is a no-op.
- if (Worklist.empty())
- return;
-
- auto &ES = Worklist.front().first->getExecutionSession();
-
+void JITDylib::unlinkMaterializationResponsibility(
+ MaterializationResponsibility &MR) {
ES.runSessionLocked([&]() {
- while (!Worklist.empty()) {
- assert(Worklist.back().first && "Failed JITDylib can not be null");
- auto &JD = *Worklist.back().first;
- auto Name = std::move(Worklist.back().second);
- Worklist.pop_back();
-
- (*FailedSymbolsMap)[&JD].insert(Name);
-
- assert(JD.Symbols.count(Name) && "No symbol table entry for Name");
- auto &Sym = JD.Symbols[Name];
-
- // Move the symbol into the error state.
- // Note that this may be redundant: The symbol might already have been
- // moved to this state in response to the failure of a dependence.
- Sym.setFlags(Sym.getFlags() | JITSymbolFlags::HasError);
-
- // FIXME: Come up with a sane mapping of state to
- // presence-of-MaterializingInfo so that we can assert presence / absence
- // here, rather than testing it.
- auto MII = JD.MaterializingInfos.find(Name);
+ auto I = MRTrackers.find(&MR);
+ assert(I != MRTrackers.end() && "MaterializationResponsibility not linked");
+ MRTrackers.erase(I);
+ });
+}
- if (MII == JD.MaterializingInfos.end())
- continue;
+std::pair<JITDylib::AsynchronousSymbolQuerySet,
+ std::shared_ptr<SymbolDependenceMap>>
+JITDylib::failSymbols(FailedSymbolsWorklist Worklist) {
+ AsynchronousSymbolQuerySet FailedQueries;
+ auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
- auto &MI = MII->second;
-
- // Move all dependants to the error state and disconnect from them.
- for (auto &KV : MI.Dependants) {
- auto &DependantJD = *KV.first;
- for (auto &DependantName : KV.second) {
- assert(DependantJD.Symbols.count(DependantName) &&
- "No symbol table entry for DependantName");
- auto &DependantSym = DependantJD.Symbols[DependantName];
- DependantSym.setFlags(DependantSym.getFlags() |
- JITSymbolFlags::HasError);
-
- assert(DependantJD.MaterializingInfos.count(DependantName) &&
- "No MaterializingInfo for dependant");
- auto &DependantMI = DependantJD.MaterializingInfos[DependantName];
-
- auto UnemittedDepI = DependantMI.UnemittedDependencies.find(&JD);
- assert(UnemittedDepI != DependantMI.UnemittedDependencies.end() &&
- "No UnemittedDependencies entry for this JITDylib");
- assert(UnemittedDepI->second.count(Name) &&
- "No UnemittedDependencies entry for this symbol");
- UnemittedDepI->second.erase(Name);
- if (UnemittedDepI->second.empty())
- DependantMI.UnemittedDependencies.erase(UnemittedDepI);
-
- // If this symbol is already in the emitted state then we need to
- // take responsibility for failing its queries, so add it to the
- // worklist.
- if (DependantSym.getState() == SymbolState::Emitted) {
- assert(DependantMI.Dependants.empty() &&
- "Emitted symbol should not have dependants");
- Worklist.push_back(std::make_pair(&DependantJD, DependantName));
- }
- }
- }
- MI.Dependants.clear();
-
- // Disconnect from all unemitted depenencies.
- for (auto &KV : MI.UnemittedDependencies) {
- auto &UnemittedDepJD = *KV.first;
- for (auto &UnemittedDepName : KV.second) {
- auto UnemittedDepMII =
- UnemittedDepJD.MaterializingInfos.find(UnemittedDepName);
- assert(UnemittedDepMII != UnemittedDepJD.MaterializingInfos.end() &&
- "Missing MII for unemitted dependency");
- assert(UnemittedDepMII->second.Dependants.count(&JD) &&
- "JD not listed as a dependant of unemitted dependency");
- assert(UnemittedDepMII->second.Dependants[&JD].count(Name) &&
- "Name is not listed as a dependant of unemitted dependency");
- UnemittedDepMII->second.Dependants[&JD].erase(Name);
- if (UnemittedDepMII->second.Dependants[&JD].empty())
- UnemittedDepMII->second.Dependants.erase(&JD);
+ while (!Worklist.empty()) {
+ assert(Worklist.back().first && "Failed JITDylib can not be null");
+ auto &JD = *Worklist.back().first;
+ auto Name = std::move(Worklist.back().second);
+ Worklist.pop_back();
+
+ (*FailedSymbolsMap)[&JD].insert(Name);
+
+ assert(JD.Symbols.count(Name) && "No symbol table entry for Name");
+ auto &Sym = JD.Symbols[Name];
+
+ // Move the symbol into the error state.
+ // Note that this may be redundant: The symbol might already have been
+ // moved to this state in response to the failure of a dependence.
+ Sym.setFlags(Sym.getFlags() | JITSymbolFlags::HasError);
+
+ // FIXME: Come up with a sane mapping of state to
+ // presence-of-MaterializingInfo so that we can assert presence / absence
+ // here, rather than testing it.
+ auto MII = JD.MaterializingInfos.find(Name);
+
+ if (MII == JD.MaterializingInfos.end())
+ continue;
+
+ auto &MI = MII->second;
+
+ // Move all dependants to the error state and disconnect from them.
+ for (auto &KV : MI.Dependants) {
+ auto &DependantJD = *KV.first;
+ for (auto &DependantName : KV.second) {
+ assert(DependantJD.Symbols.count(DependantName) &&
+ "No symbol table entry for DependantName");
+ auto &DependantSym = DependantJD.Symbols[DependantName];
+ DependantSym.setFlags(DependantSym.getFlags() |
+ JITSymbolFlags::HasError);
+
+ assert(DependantJD.MaterializingInfos.count(DependantName) &&
+ "No MaterializingInfo for dependant");
+ auto &DependantMI = DependantJD.MaterializingInfos[DependantName];
+
+ auto UnemittedDepI = DependantMI.UnemittedDependencies.find(&JD);
+ assert(UnemittedDepI != DependantMI.UnemittedDependencies.end() &&
+ "No UnemittedDependencies entry for this JITDylib");
+ assert(UnemittedDepI->second.count(Name) &&
+ "No UnemittedDependencies entry for this symbol");
+ UnemittedDepI->second.erase(Name);
+ if (UnemittedDepI->second.empty())
+ DependantMI.UnemittedDependencies.erase(UnemittedDepI);
+
+ // If this symbol is already in the emitted state then we need to
+ // take responsibility for failing its queries, so add it to the
+ // worklist.
+ if (DependantSym.getState() == SymbolState::Emitted) {
+ assert(DependantMI.Dependants.empty() &&
+ "Emitted symbol should not have dependants");
+ Worklist.push_back(std::make_pair(&DependantJD, DependantName));
}
}
- MI.UnemittedDependencies.clear();
-
- // Collect queries to be failed for this MII.
- AsynchronousSymbolQueryList ToDetach;
- for (auto &Q : MII->second.pendingQueries()) {
- // Add the query to the list to be failed and detach it.
- FailedQueries.insert(Q);
- ToDetach.push_back(Q);
+ }
+ MI.Dependants.clear();
+
+ // Disconnect from all unemitted depenencies.
+ for (auto &KV : MI.UnemittedDependencies) {
+ auto &UnemittedDepJD = *KV.first;
+ for (auto &UnemittedDepName : KV.second) {
+ auto UnemittedDepMII =
+ UnemittedDepJD.MaterializingInfos.find(UnemittedDepName);
+ assert(UnemittedDepMII != UnemittedDepJD.MaterializingInfos.end() &&
+ "Missing MII for unemitted dependency");
+ assert(UnemittedDepMII->second.Dependants.count(&JD) &&
+ "JD not listed as a dependant of unemitted dependency");
+ assert(UnemittedDepMII->second.Dependants[&JD].count(Name) &&
+ "Name is not listed as a dependant of unemitted dependency");
+ UnemittedDepMII->second.Dependants[&JD].erase(Name);
+ if (UnemittedDepMII->second.Dependants[&JD].empty())
+ UnemittedDepMII->second.Dependants.erase(&JD);
}
- for (auto &Q : ToDetach)
- Q->detach();
-
- assert(MI.Dependants.empty() &&
- "Can not delete MaterializingInfo with dependants still attached");
- assert(MI.UnemittedDependencies.empty() &&
- "Can not delete MaterializingInfo with unemitted dependencies "
- "still attached");
- assert(!MI.hasQueriesPending() &&
- "Can not delete MaterializingInfo with queries pending");
- JD.MaterializingInfos.erase(MII);
}
- });
+ MI.UnemittedDependencies.clear();
+
+ // Collect queries to be failed for this MII.
+ AsynchronousSymbolQueryList ToDetach;
+ for (auto &Q : MII->second.pendingQueries()) {
+ // Add the query to the list to be failed and detach it.
+ FailedQueries.insert(Q);
+ ToDetach.push_back(Q);
+ }
+ for (auto &Q : ToDetach)
+ Q->detach();
- for (auto &Q : FailedQueries)
- Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbolsMap));
+ assert(MI.Dependants.empty() &&
+ "Can not delete MaterializingInfo with dependants still attached");
+ assert(MI.UnemittedDependencies.empty() &&
+ "Can not delete MaterializingInfo with unemitted dependencies "
+ "still attached");
+ assert(!MI.hasQueriesPending() &&
+ "Can not delete MaterializingInfo with queries pending");
+ JD.MaterializingInfos.erase(MII);
+ }
+
+ return std::make_pair(std::move(FailedQueries), std::move(FailedSymbolsMap));
}
void JITDylib::setLinkOrder(JITDylibSearchOrder NewLinkOrder,
@@ -1168,8 +1244,7 @@ void JITDylib::setLinkOrder(JITDylibSearchOrder NewLinkOrder,
if (NewLinkOrder.empty() || NewLinkOrder.front().first != this)
LinkOrder.push_back(
std::make_pair(this, JITDylibLookupFlags::MatchAllSymbols));
- LinkOrder.insert(LinkOrder.end(), NewLinkOrder.begin(),
- NewLinkOrder.end());
+ llvm::append_range(LinkOrder, NewLinkOrder);
} else
LinkOrder = std::move(NewLinkOrder);
});
@@ -1192,10 +1267,10 @@ void JITDylib::replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD,
void JITDylib::removeFromLinkOrder(JITDylib &JD) {
ES.runSessionLocked([&]() {
- auto I = std::find_if(LinkOrder.begin(), LinkOrder.end(),
- [&](const JITDylibSearchOrder::value_type &KV) {
- return KV.first == &JD;
- });
+ auto I = llvm::find_if(LinkOrder,
+ [&](const JITDylibSearchOrder::value_type &KV) {
+ return KV.first == &JD;
+ });
if (I != LinkOrder.end())
LinkOrder.erase(I);
});
@@ -1257,279 +1332,6 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
});
}
-Expected<SymbolFlagsMap>
-JITDylib::lookupFlags(LookupKind K, JITDylibLookupFlags JDLookupFlags,
- SymbolLookupSet LookupSet) {
- return ES.runSessionLocked([&, this]() -> Expected<SymbolFlagsMap> {
- SymbolFlagsMap Result;
- lookupFlagsImpl(Result, K, JDLookupFlags, LookupSet);
-
- // Run any definition generators.
- for (auto &DG : DefGenerators) {
-
- // Bail out early if we found everything.
- if (LookupSet.empty())
- break;
-
- // Run this generator.
- if (auto Err = DG->tryToGenerate(K, *this, JDLookupFlags, LookupSet))
- return std::move(Err);
-
- // Re-try the search.
- lookupFlagsImpl(Result, K, JDLookupFlags, LookupSet);
- }
-
- return Result;
- });
-}
-
-void JITDylib::lookupFlagsImpl(SymbolFlagsMap &Result, LookupKind K,
- JITDylibLookupFlags JDLookupFlags,
- SymbolLookupSet &LookupSet) {
-
- LookupSet.forEachWithRemoval(
- [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) -> bool {
- auto I = Symbols.find(Name);
- if (I == Symbols.end())
- return false;
- assert(!Result.count(Name) && "Symbol already present in Flags map");
- Result[Name] = I->second.getFlags();
- return true;
- });
-}
-
-Error JITDylib::lodgeQuery(MaterializationUnitList &MUs,
- std::shared_ptr<AsynchronousSymbolQuery> &Q,
- LookupKind K, JITDylibLookupFlags JDLookupFlags,
- SymbolLookupSet &Unresolved) {
- assert(Q && "Query can not be null");
-
- if (auto Err = lodgeQueryImpl(MUs, Q, K, JDLookupFlags, Unresolved))
- return Err;
-
- // Run any definition generators.
- for (auto &DG : DefGenerators) {
-
- // Bail out early if we have resolved everything.
- if (Unresolved.empty())
- break;
-
- // Run the generator.
- if (auto Err = DG->tryToGenerate(K, *this, JDLookupFlags, Unresolved))
- return Err;
-
- // Lodge query. This can not fail as any new definitions were added
- // by the generator under the session locked. Since they can't have
- // started materializing yet they can not have failed.
- cantFail(lodgeQueryImpl(MUs, Q, K, JDLookupFlags, Unresolved));
- }
-
- return Error::success();
-}
-
-Error JITDylib::lodgeQueryImpl(MaterializationUnitList &MUs,
- std::shared_ptr<AsynchronousSymbolQuery> &Q,
- LookupKind K, JITDylibLookupFlags JDLookupFlags,
- SymbolLookupSet &Unresolved) {
-
- return Unresolved.forEachWithRemoval(
- [&](const SymbolStringPtr &Name,
- SymbolLookupFlags SymLookupFlags) -> Expected<bool> {
- // Search for name in symbols. If not found then continue without
- // removal.
- auto SymI = Symbols.find(Name);
- if (SymI == Symbols.end())
- return false;
-
- // If we match against a materialization-side-effects only symbol then
- // make sure it is weakly-referenced. Otherwise bail out with an error.
- if (SymI->second.getFlags().hasMaterializationSideEffectsOnly() &&
- SymLookupFlags != SymbolLookupFlags::WeaklyReferencedSymbol)
- return make_error<SymbolsNotFound>(SymbolNameVector({Name}));
-
- // If this is a non exported symbol and we're matching exported symbols
- // only then skip this symbol without removal.
- if (!SymI->second.getFlags().isExported() &&
- JDLookupFlags == JITDylibLookupFlags::MatchExportedSymbolsOnly)
- return false;
-
- // If we matched against this symbol but it is in the error state then
- // bail out and treat it as a failure to materialize.
- if (SymI->second.getFlags().hasError()) {
- auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
- (*FailedSymbolsMap)[this] = {Name};
- return make_error<FailedToMaterialize>(std::move(FailedSymbolsMap));
- }
-
- // If this symbol already meets the required state for then notify the
- // query, then remove the symbol and continue.
- if (SymI->second.getState() >= Q->getRequiredState()) {
- Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
- return true;
- }
-
- // Otherwise this symbol does not yet meet the required state. Check
- // whether it has a materializer attached, and if so prepare to run it.
- if (SymI->second.hasMaterializerAttached()) {
- assert(SymI->second.getAddress() == 0 &&
- "Symbol not resolved but already has address?");
- auto UMII = UnmaterializedInfos.find(Name);
- assert(UMII != UnmaterializedInfos.end() &&
- "Lazy symbol should have UnmaterializedInfo");
- auto MU = std::move(UMII->second->MU);
- assert(MU != nullptr && "Materializer should not be null");
-
- // Move all symbols associated with this MaterializationUnit into
- // materializing state.
- for (auto &KV : MU->getSymbols()) {
- auto SymK = Symbols.find(KV.first);
- SymK->second.setMaterializerAttached(false);
- SymK->second.setState(SymbolState::Materializing);
- UnmaterializedInfos.erase(KV.first);
- }
-
- // Add MU to the list of MaterializationUnits to be materialized.
- MUs.push_back(std::move(MU));
- }
-
- // Add the query to the PendingQueries list and continue, deleting the
- // element.
- assert(SymI->second.getState() != SymbolState::NeverSearched &&
- SymI->second.getState() != SymbolState::Ready &&
- "By this line the symbol should be materializing");
- auto &MI = MaterializingInfos[Name];
- MI.addQuery(Q);
- Q->addQueryDependence(*this, Name);
- return true;
- });
-}
-
-Expected<SymbolNameSet>
-JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
- SymbolNameSet Names) {
- assert(Q && "Query can not be null");
-
- ES.runOutstandingMUs();
-
- bool QueryComplete = false;
- std::vector<std::unique_ptr<MaterializationUnit>> MUs;
-
- SymbolLookupSet Unresolved(Names);
- auto Err = ES.runSessionLocked([&, this]() -> Error {
- QueryComplete = lookupImpl(Q, MUs, Unresolved);
-
- // Run any definition generators.
- for (auto &DG : DefGenerators) {
-
- // Bail out early if we have resolved everything.
- if (Unresolved.empty())
- break;
-
- assert(!QueryComplete && "query complete but unresolved symbols remain?");
- if (auto Err = DG->tryToGenerate(LookupKind::Static, *this,
- JITDylibLookupFlags::MatchAllSymbols,
- Unresolved))
- return Err;
-
- if (!Unresolved.empty())
- QueryComplete = lookupImpl(Q, MUs, Unresolved);
- }
- return Error::success();
- });
-
- if (Err)
- return std::move(Err);
-
- assert((MUs.empty() || !QueryComplete) &&
- "If action flags are set, there should be no work to do (so no MUs)");
-
- if (QueryComplete)
- Q->handleComplete();
-
- // FIXME: Swap back to the old code below once RuntimeDyld works with
- // callbacks from asynchronous queries.
- // Add MUs to the OutstandingMUs list.
- {
- std::lock_guard<std::recursive_mutex> Lock(ES.OutstandingMUsMutex);
- auto ThisJD = shared_from_this();
- for (auto &MU : MUs) {
- auto MR = MU->createMaterializationResponsibility(ThisJD);
- ES.OutstandingMUs.push_back(make_pair(std::move(MU), std::move(MR)));
- }
- }
- ES.runOutstandingMUs();
-
- // Dispatch any required MaterializationUnits for materialization.
- // for (auto &MU : MUs)
- // ES.dispatchMaterialization(*this, std::move(MU));
-
- SymbolNameSet RemainingSymbols;
- for (auto &KV : Unresolved)
- RemainingSymbols.insert(KV.first);
-
- return RemainingSymbols;
-}
-
-bool JITDylib::lookupImpl(
- std::shared_ptr<AsynchronousSymbolQuery> &Q,
- std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
- SymbolLookupSet &Unresolved) {
- bool QueryComplete = false;
-
- std::vector<SymbolStringPtr> ToRemove;
- Unresolved.forEachWithRemoval(
- [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) -> bool {
- // Search for the name in Symbols. Skip without removing if not found.
- auto SymI = Symbols.find(Name);
- if (SymI == Symbols.end())
- return false;
-
- // If the symbol is already in the required state then notify the query
- // and remove.
- if (SymI->second.getState() >= Q->getRequiredState()) {
- Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
- if (Q->isComplete())
- QueryComplete = true;
- return true;
- }
-
- // If the symbol is lazy, get the MaterialiaztionUnit for it.
- if (SymI->second.hasMaterializerAttached()) {
- assert(SymI->second.getAddress() == 0 &&
- "Lazy symbol should not have a resolved address");
- auto UMII = UnmaterializedInfos.find(Name);
- assert(UMII != UnmaterializedInfos.end() &&
- "Lazy symbol should have UnmaterializedInfo");
- auto MU = std::move(UMII->second->MU);
- assert(MU != nullptr && "Materializer should not be null");
-
- // Kick all symbols associated with this MaterializationUnit into
- // materializing state.
- for (auto &KV : MU->getSymbols()) {
- auto SymK = Symbols.find(KV.first);
- assert(SymK != Symbols.end() && "Missing symbol table entry");
- SymK->second.setState(SymbolState::Materializing);
- SymK->second.setMaterializerAttached(false);
- UnmaterializedInfos.erase(KV.first);
- }
-
- // Add MU to the list of MaterializationUnits to be materialized.
- MUs.push_back(std::move(MU));
- }
-
- // Add the query to the PendingQueries list.
- assert(SymI->second.getState() != SymbolState::NeverSearched &&
- SymI->second.getState() != SymbolState::Ready &&
- "By this line the symbol should be materializing");
- auto &MI = MaterializingInfos[Name];
- MI.addQuery(Q);
- Q->addQueryDependence(*this, Name);
- return true;
- });
-
- return QueryComplete;
-}
-
void JITDylib::dump(raw_ostream &OS) {
ES.runSessionLocked([&, this]() {
OS << "JITDylib \"" << JITDylibName << "\" (ES: "
@@ -1589,11 +1391,10 @@ void JITDylib::MaterializingInfo::addQuery(
void JITDylib::MaterializingInfo::removeQuery(
const AsynchronousSymbolQuery &Q) {
// FIXME: Implement 'find_as' for shared_ptr<T>/T*.
- auto I =
- std::find_if(PendingQueries.begin(), PendingQueries.end(),
- [&Q](const std::shared_ptr<AsynchronousSymbolQuery> &V) {
- return V.get() == &Q;
- });
+ auto I = llvm::find_if(
+ PendingQueries, [&Q](const std::shared_ptr<AsynchronousSymbolQuery> &V) {
+ return V.get() == &Q;
+ });
assert(I != PendingQueries.end() &&
"Query is not attached to this MaterializingInfo");
PendingQueries.erase(I);
@@ -1618,6 +1419,137 @@ JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
LinkOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols});
}
+ResourceTrackerSP JITDylib::getTracker(MaterializationResponsibility &MR) {
+ auto I = MRTrackers.find(&MR);
+ assert(I != MRTrackers.end() && "MR is not linked");
+ assert(I->second && "Linked tracker is null");
+ return I->second;
+}
+
+std::pair<JITDylib::AsynchronousSymbolQuerySet,
+ std::shared_ptr<SymbolDependenceMap>>
+JITDylib::removeTracker(ResourceTracker &RT) {
+ // Note: Should be called under the session lock.
+
+ SymbolNameVector SymbolsToRemove;
+ std::vector<std::pair<JITDylib *, SymbolStringPtr>> SymbolsToFail;
+
+ if (&RT == DefaultTracker.get()) {
+ SymbolNameSet TrackedSymbols;
+ for (auto &KV : TrackerSymbols)
+ for (auto &Sym : KV.second)
+ TrackedSymbols.insert(Sym);
+
+ for (auto &KV : Symbols) {
+ auto &Sym = KV.first;
+ if (!TrackedSymbols.count(Sym))
+ SymbolsToRemove.push_back(Sym);
+ }
+
+ DefaultTracker.reset();
+ } else {
+ /// Check for a non-default tracker.
+ auto I = TrackerSymbols.find(&RT);
+ if (I != TrackerSymbols.end()) {
+ SymbolsToRemove = std::move(I->second);
+ TrackerSymbols.erase(I);
+ }
+ // ... if not found this tracker was already defunct. Nothing to do.
+ }
+
+ for (auto &Sym : SymbolsToRemove) {
+ assert(Symbols.count(Sym) && "Symbol not in symbol table");
+
+ // If there is a MaterializingInfo then collect any queries to fail.
+ auto MII = MaterializingInfos.find(Sym);
+ if (MII != MaterializingInfos.end())
+ SymbolsToFail.push_back({this, Sym});
+ }
+
+ AsynchronousSymbolQuerySet QueriesToFail;
+ auto Result = failSymbols(std::move(SymbolsToFail));
+
+ // Removed symbols should be taken out of the table altogether.
+ for (auto &Sym : SymbolsToRemove) {
+ auto I = Symbols.find(Sym);
+ assert(I != Symbols.end() && "Symbol not present in table");
+
+ // Remove Materializer if present.
+ if (I->second.hasMaterializerAttached()) {
+ // FIXME: Should this discard the symbols?
+ UnmaterializedInfos.erase(Sym);
+ } else {
+ assert(!UnmaterializedInfos.count(Sym) &&
+ "Symbol has materializer attached");
+ }
+
+ Symbols.erase(I);
+ }
+
+ return Result;
+}
+
+void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) {
+ assert(&DstRT != &SrcRT && "No-op transfers shouldn't call transferTracker");
+ assert(&DstRT.getJITDylib() == this && "DstRT is not for this JITDylib");
+ assert(&SrcRT.getJITDylib() == this && "SrcRT is not for this JITDylib");
+
+ // Update trackers for any not-yet materialized units.
+ for (auto &KV : UnmaterializedInfos) {
+ if (KV.second->RT == &SrcRT)
+ KV.second->RT = &DstRT;
+ }
+
+ // Update trackers for any active materialization responsibilities.
+ for (auto &KV : MRTrackers) {
+ if (KV.second == &SrcRT)
+ KV.second = &DstRT;
+ }
+
+ // If we're transfering to the default tracker we just need to delete the
+ // tracked symbols for the source tracker.
+ if (&DstRT == DefaultTracker.get()) {
+ TrackerSymbols.erase(&SrcRT);
+ return;
+ }
+
+ // If we're transferring from the default tracker we need to find all
+ // currently untracked symbols.
+ if (&SrcRT == DefaultTracker.get()) {
+ assert(!TrackerSymbols.count(&SrcRT) &&
+ "Default tracker should not appear in TrackerSymbols");
+
+ SymbolNameVector SymbolsToTrack;
+
+ SymbolNameSet CurrentlyTrackedSymbols;
+ for (auto &KV : TrackerSymbols)
+ for (auto &Sym : KV.second)
+ CurrentlyTrackedSymbols.insert(Sym);
+
+ for (auto &KV : Symbols) {
+ auto &Sym = KV.first;
+ if (!CurrentlyTrackedSymbols.count(Sym))
+ SymbolsToTrack.push_back(Sym);
+ }
+
+ TrackerSymbols[&DstRT] = std::move(SymbolsToTrack);
+ return;
+ }
+
+ auto &DstTrackedSymbols = TrackerSymbols[&DstRT];
+
+ // Finally if neither SrtRT or DstRT are the default tracker then
+ // just append DstRT's tracked symbols to SrtRT's.
+ auto SI = TrackerSymbols.find(&SrcRT);
+ if (SI == TrackerSymbols.end())
+ return;
+
+ DstTrackedSymbols.reserve(DstTrackedSymbols.size() + SI->second.size());
+ for (auto &Sym : SI->second)
+ DstTrackedSymbols.push_back(std::move(Sym));
+ TrackerSymbols.erase(SI);
+}
+
Error JITDylib::defineImpl(MaterializationUnit &MU) {
LLVM_DEBUG({ dbgs() << " " << MU.getSymbols() << "\n"; });
@@ -1685,6 +1617,22 @@ Error JITDylib::defineImpl(MaterializationUnit &MU) {
return Error::success();
}
+void JITDylib::installMaterializationUnit(
+ std::unique_ptr<MaterializationUnit> MU, ResourceTracker &RT) {
+
+ /// defineImpl succeeded.
+ if (&RT != DefaultTracker.get()) {
+ auto &TS = TrackerSymbols[&RT];
+ TS.reserve(TS.size() + MU->getSymbols().size());
+ for (auto &KV : MU->getSymbols())
+ TS.push_back(KV.first);
+ }
+
+ auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU), &RT);
+ for (auto &KV : UMI->MU->getSymbols())
+ UnmaterializedInfos[KV.first] = UMI;
+}
+
void JITDylib::detachQueryHelper(AsynchronousSymbolQuery &Q,
const SymbolNameSet &QuerySymbols) {
for (auto &QuerySymbol : QuerySymbols) {
@@ -1773,7 +1721,39 @@ Expected<DenseMap<JITDylib *, SymbolMap>> Platform::lookupInitSymbols(
}
ExecutionSession::ExecutionSession(std::shared_ptr<SymbolStringPool> SSP)
- : SSP(SSP ? std::move(SSP) : std::make_shared<SymbolStringPool>()) {
+ : SSP(SSP ? std::move(SSP) : std::make_shared<SymbolStringPool>()) {}
+
+Error ExecutionSession::endSession() {
+ LLVM_DEBUG(dbgs() << "Ending ExecutionSession " << this << "\n");
+
+ std::vector<JITDylibSP> JITDylibsToClose = runSessionLocked([&] {
+ SessionOpen = false;
+ return std::move(JDs);
+ });
+
+ // TODO: notifiy platform? run static deinits?
+
+ Error Err = Error::success();
+ for (auto &JD : JITDylibsToClose)
+ Err = joinErrors(std::move(Err), JD->clear());
+ return Err;
+}
+
+void ExecutionSession::registerResourceManager(ResourceManager &RM) {
+ runSessionLocked([&] { ResourceManagers.push_back(&RM); });
+}
+
+void ExecutionSession::deregisterResourceManager(ResourceManager &RM) {
+ runSessionLocked([&] {
+ assert(!ResourceManagers.empty() && "No managers registered");
+ if (ResourceManagers.back() == &RM)
+ ResourceManagers.pop_back();
+ else {
+ auto I = llvm::find(ResourceManagers, &RM);
+ assert(I != ResourceManagers.end() && "RM not registered");
+ ResourceManagers.erase(I);
+ }
+ });
}
JITDylib *ExecutionSession::getJITDylibByName(StringRef Name) {
@@ -1788,8 +1768,7 @@ JITDylib *ExecutionSession::getJITDylibByName(StringRef Name) {
JITDylib &ExecutionSession::createBareJITDylib(std::string Name) {
assert(!getJITDylibByName(Name) && "JITDylib with that name already exists");
return runSessionLocked([&, this]() -> JITDylib & {
- JDs.push_back(
- std::shared_ptr<JITDylib>(new JITDylib(*this, std::move(Name))));
+ JDs.push_back(new JITDylib(*this, std::move(Name)));
return *JDs.back();
});
}
@@ -1802,86 +1781,80 @@ Expected<JITDylib &> ExecutionSession::createJITDylib(std::string Name) {
return JD;
}
-void ExecutionSession::legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err) {
- assert(!!Err && "Error should be in failure state");
+std::vector<JITDylibSP> JITDylib::getDFSLinkOrder(ArrayRef<JITDylibSP> JDs) {
+ if (JDs.empty())
+ return {};
- bool SendErrorToQuery;
- runSessionLocked([&]() {
- Q.detach();
- SendErrorToQuery = Q.canStillFail();
+ auto &ES = JDs.front()->getExecutionSession();
+ return ES.runSessionLocked([&]() {
+ DenseSet<JITDylib *> Visited;
+ std::vector<JITDylibSP> Result;
+
+ for (auto &JD : JDs) {
+
+ if (Visited.count(JD.get()))
+ continue;
+
+ SmallVector<JITDylibSP, 64> WorkStack;
+ WorkStack.push_back(JD);
+ Visited.insert(JD.get());
+
+ while (!WorkStack.empty()) {
+ Result.push_back(std::move(WorkStack.back()));
+ WorkStack.pop_back();
+
+ for (auto &KV : llvm::reverse(Result.back()->LinkOrder)) {
+ auto &JD = *KV.first;
+ if (Visited.count(&JD))
+ continue;
+ Visited.insert(&JD);
+ WorkStack.push_back(&JD);
+ }
+ }
+ }
+ return Result;
});
+}
- if (SendErrorToQuery)
- Q.handleFailed(std::move(Err));
- else
- reportError(std::move(Err));
+std::vector<JITDylibSP>
+JITDylib::getReverseDFSLinkOrder(ArrayRef<JITDylibSP> JDs) {
+ auto Tmp = getDFSLinkOrder(JDs);
+ std::reverse(Tmp.begin(), Tmp.end());
+ return Tmp;
}
-Expected<SymbolMap> ExecutionSession::legacyLookup(
- LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
- SymbolState RequiredState,
- RegisterDependenciesFunction RegisterDependencies) {
-#if LLVM_ENABLE_THREADS
- // In the threaded case we use promises to return the results.
- std::promise<SymbolMap> PromisedResult;
- Error ResolutionError = Error::success();
- auto NotifyComplete = [&](Expected<SymbolMap> R) {
- if (R)
- PromisedResult.set_value(std::move(*R));
- else {
- ErrorAsOutParameter _(&ResolutionError);
- ResolutionError = R.takeError();
- PromisedResult.set_value(SymbolMap());
- }
- };
-#else
- SymbolMap Result;
- Error ResolutionError = Error::success();
+std::vector<JITDylibSP> JITDylib::getDFSLinkOrder() {
+ return getDFSLinkOrder({this});
+}
- auto NotifyComplete = [&](Expected<SymbolMap> R) {
- ErrorAsOutParameter _(&ResolutionError);
- if (R)
- Result = std::move(*R);
- else
- ResolutionError = R.takeError();
- };
-#endif
+std::vector<JITDylibSP> JITDylib::getReverseDFSLinkOrder() {
+ return getReverseDFSLinkOrder({this});
+}
- auto Query = std::make_shared<AsynchronousSymbolQuery>(
- SymbolLookupSet(Names), RequiredState, std::move(NotifyComplete));
- // FIXME: This should be run session locked along with the registration code
- // and error reporting below.
- SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names));
-
- // If the query was lodged successfully then register the dependencies,
- // otherwise fail it with an error.
- if (UnresolvedSymbols.empty())
- RegisterDependencies(Query->QueryRegistrations);
- else {
- bool DeliverError = runSessionLocked([&]() {
- Query->detach();
- return Query->canStillFail();
- });
- auto Err = make_error<SymbolsNotFound>(std::move(UnresolvedSymbols));
- if (DeliverError)
- Query->handleFailed(std::move(Err));
- else
- reportError(std::move(Err));
- }
+void ExecutionSession::lookupFlags(
+ LookupKind K, JITDylibSearchOrder SearchOrder, SymbolLookupSet LookupSet,
+ unique_function<void(Expected<SymbolFlagsMap>)> OnComplete) {
-#if LLVM_ENABLE_THREADS
- auto ResultFuture = PromisedResult.get_future();
- auto Result = ResultFuture.get();
- if (ResolutionError)
- return std::move(ResolutionError);
- return std::move(Result);
+ OL_applyQueryPhase1(std::make_unique<InProgressLookupFlagsState>(
+ K, std::move(SearchOrder), std::move(LookupSet),
+ std::move(OnComplete)),
+ Error::success());
+}
-#else
- if (ResolutionError)
- return std::move(ResolutionError);
+Expected<SymbolFlagsMap>
+ExecutionSession::lookupFlags(LookupKind K, JITDylibSearchOrder SearchOrder,
+ SymbolLookupSet LookupSet) {
- return Result;
-#endif
+ std::promise<MSVCPExpected<SymbolFlagsMap>> ResultP;
+ OL_applyQueryPhase1(std::make_unique<InProgressLookupFlagsState>(
+ K, std::move(SearchOrder), std::move(LookupSet),
+ [&ResultP](Expected<SymbolFlagsMap> Result) {
+ ResultP.set_value(std::move(Result));
+ }),
+ Error::success());
+
+ auto ResultF = ResultP.get_future();
+ return ResultF.get();
}
void ExecutionSession::lookup(
@@ -1900,94 +1873,17 @@ void ExecutionSession::lookup(
// lookup can be re-entered recursively if running on a single thread. Run any
// outstanding MUs in case this query depends on them, otherwise this lookup
// will starve waiting for a result from an MU that is stuck in the queue.
- runOutstandingMUs();
+ dispatchOutstandingMUs();
auto Unresolved = std::move(Symbols);
- std::map<JITDylib *, MaterializationUnitList> CollectedMUsMap;
auto Q = std::make_shared<AsynchronousSymbolQuery>(Unresolved, RequiredState,
std::move(NotifyComplete));
- bool QueryComplete = false;
-
- auto LodgingErr = runSessionLocked([&]() -> Error {
- auto LodgeQuery = [&]() -> Error {
- for (auto &KV : SearchOrder) {
- assert(KV.first && "JITDylibList entries must not be null");
- assert(!CollectedMUsMap.count(KV.first) &&
- "JITDylibList should not contain duplicate entries");
-
- auto &JD = *KV.first;
- auto JDLookupFlags = KV.second;
- if (auto Err = JD.lodgeQuery(CollectedMUsMap[&JD], Q, K, JDLookupFlags,
- Unresolved))
- return Err;
- }
-
- // Strip any weakly referenced symbols that were not found.
- Unresolved.forEachWithRemoval(
- [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) {
- if (Flags == SymbolLookupFlags::WeaklyReferencedSymbol) {
- Q->dropSymbol(Name);
- return true;
- }
- return false;
- });
-
- if (!Unresolved.empty())
- return make_error<SymbolsNotFound>(Unresolved.getSymbolNames());
-
- return Error::success();
- };
- if (auto Err = LodgeQuery()) {
- // Query failed.
+ auto IPLS = std::make_unique<InProgressFullLookupState>(
+ K, SearchOrder, std::move(Unresolved), RequiredState, std::move(Q),
+ std::move(RegisterDependencies));
- // Disconnect the query from its dependencies.
- Q->detach();
-
- // Replace the MUs.
- for (auto &KV : CollectedMUsMap)
- for (auto &MU : KV.second)
- KV.first->replace(std::move(MU));
-
- return Err;
- }
-
- // Query lodged successfully.
-
- // Record whether this query is fully ready / resolved. We will use
- // this to call handleFullyResolved/handleFullyReady outside the session
- // lock.
- QueryComplete = Q->isComplete();
-
- // Call the register dependencies function.
- if (RegisterDependencies && !Q->QueryRegistrations.empty())
- RegisterDependencies(Q->QueryRegistrations);
-
- return Error::success();
- });
-
- if (LodgingErr) {
- Q->handleFailed(std::move(LodgingErr));
- return;
- }
-
- if (QueryComplete)
- Q->handleComplete();
-
- // Move the MUs to the OutstandingMUs list, then materialize.
- {
- std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
-
- for (auto &KV : CollectedMUsMap) {
- auto JD = KV.first->shared_from_this();
- for (auto &MU : KV.second) {
- auto MR = MU->createMaterializationResponsibility(JD);
- OutstandingMUs.push_back(std::make_pair(std::move(MU), std::move(MR)));
- }
- }
- }
-
- runOutstandingMUs();
+ OL_applyQueryPhase1(std::move(IPLS), Error::success());
}
Expected<SymbolMap>
@@ -2077,10 +1973,11 @@ void ExecutionSession::dump(raw_ostream &OS) {
});
}
-void ExecutionSession::runOutstandingMUs() {
+void ExecutionSession::dispatchOutstandingMUs() {
+ LLVM_DEBUG(dbgs() << "Dispatching MaterializationUnits...\n");
while (1) {
Optional<std::pair<std::unique_ptr<MaterializationUnit>,
- MaterializationResponsibility>>
+ std::unique_ptr<MaterializationResponsibility>>>
JMU;
{
@@ -2095,8 +1992,777 @@ void ExecutionSession::runOutstandingMUs() {
break;
assert(JMU->first && "No MU?");
+ LLVM_DEBUG(dbgs() << " Dispatching \"" << JMU->first->getName() << "\"\n");
dispatchMaterialization(std::move(JMU->first), std::move(JMU->second));
}
+ LLVM_DEBUG(dbgs() << "Done dispatching MaterializationUnits.\n");
+}
+
+Error ExecutionSession::removeResourceTracker(ResourceTracker &RT) {
+ LLVM_DEBUG({
+ dbgs() << "In " << RT.getJITDylib().getName() << " removing tracker "
+ << formatv("{0:x}", RT.getKeyUnsafe()) << "\n";
+ });
+ std::vector<ResourceManager *> CurrentResourceManagers;
+
+ JITDylib::AsynchronousSymbolQuerySet QueriesToFail;
+ std::shared_ptr<SymbolDependenceMap> FailedSymbols;
+
+ runSessionLocked([&] {
+ CurrentResourceManagers = ResourceManagers;
+ RT.makeDefunct();
+ std::tie(QueriesToFail, FailedSymbols) = RT.getJITDylib().removeTracker(RT);
+ });
+
+ Error Err = Error::success();
+
+ for (auto *L : reverse(CurrentResourceManagers))
+ Err =
+ joinErrors(std::move(Err), L->handleRemoveResources(RT.getKeyUnsafe()));
+
+ for (auto &Q : QueriesToFail)
+ Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbols));
+
+ return Err;
+}
+
+void ExecutionSession::transferResourceTracker(ResourceTracker &DstRT,
+ ResourceTracker &SrcRT) {
+ LLVM_DEBUG({
+ dbgs() << "In " << SrcRT.getJITDylib().getName()
+ << " transfering resources from tracker "
+ << formatv("{0:x}", SrcRT.getKeyUnsafe()) << " to tracker "
+ << formatv("{0:x}", DstRT.getKeyUnsafe()) << "\n";
+ });
+
+ // No-op transfers are allowed and do not invalidate the source.
+ if (&DstRT == &SrcRT)
+ return;
+
+ assert(&DstRT.getJITDylib() == &SrcRT.getJITDylib() &&
+ "Can't transfer resources between JITDylibs");
+ runSessionLocked([&]() {
+ SrcRT.makeDefunct();
+ auto &JD = DstRT.getJITDylib();
+ JD.transferTracker(DstRT, SrcRT);
+ for (auto *L : reverse(ResourceManagers))
+ L->handleTransferResources(DstRT.getKeyUnsafe(), SrcRT.getKeyUnsafe());
+ });
+}
+
+void ExecutionSession::destroyResourceTracker(ResourceTracker &RT) {
+ runSessionLocked([&]() {
+ LLVM_DEBUG({
+ dbgs() << "In " << RT.getJITDylib().getName() << " destroying tracker "
+ << formatv("{0:x}", RT.getKeyUnsafe()) << "\n";
+ });
+ if (!RT.isDefunct())
+ transferResourceTracker(*RT.getJITDylib().getDefaultResourceTracker(),
+ RT);
+ });
+}
+
+Error ExecutionSession::IL_updateCandidatesFor(
+ JITDylib &JD, JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet &Candidates, SymbolLookupSet *NonCandidates) {
+ return Candidates.forEachWithRemoval(
+ [&](const SymbolStringPtr &Name,
+ SymbolLookupFlags SymLookupFlags) -> Expected<bool> {
+ /// Search for the symbol. If not found then continue without
+ /// removal.
+ auto SymI = JD.Symbols.find(Name);
+ if (SymI == JD.Symbols.end())
+ return false;
+
+ // If this is a non-exported symbol and we're matching exported
+ // symbols only then remove this symbol from the candidates list.
+ //
+ // If we're tracking non-candidates then add this to the non-candidate
+ // list.
+ if (!SymI->second.getFlags().isExported() &&
+ JDLookupFlags == JITDylibLookupFlags::MatchExportedSymbolsOnly) {
+ if (NonCandidates)
+ NonCandidates->add(Name, SymLookupFlags);
+ return true;
+ }
+
+ // If we match against a materialization-side-effects only symbol
+ // then make sure it is weakly-referenced. Otherwise bail out with
+ // an error.
+ // FIXME: Use a "materialization-side-effects-only symbols must be
+ // weakly referenced" specific error here to reduce confusion.
+ if (SymI->second.getFlags().hasMaterializationSideEffectsOnly() &&
+ SymLookupFlags != SymbolLookupFlags::WeaklyReferencedSymbol)
+ return make_error<SymbolsNotFound>(SymbolNameVector({Name}));
+
+ // If we matched against this symbol but it is in the error state
+ // then bail out and treat it as a failure to materialize.
+ if (SymI->second.getFlags().hasError()) {
+ auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
+ (*FailedSymbolsMap)[&JD] = {Name};
+ return make_error<FailedToMaterialize>(std::move(FailedSymbolsMap));
+ }
+
+ // Otherwise this is a match. Remove it from the candidate set.
+ return true;
+ });
+}
+
+void ExecutionSession::OL_applyQueryPhase1(
+ std::unique_ptr<InProgressLookupState> IPLS, Error Err) {
+
+ LLVM_DEBUG({
+ dbgs() << "Entering OL_applyQueryPhase1:\n"
+ << " Lookup kind: " << IPLS->K << "\n"
+ << " Search order: " << IPLS->SearchOrder
+ << ", Current index = " << IPLS->CurSearchOrderIndex
+ << (IPLS->NewJITDylib ? " (entering new JITDylib)" : "") << "\n"
+ << " Lookup set: " << IPLS->LookupSet << "\n"
+ << " Definition generator candidates: "
+ << IPLS->DefGeneratorCandidates << "\n"
+ << " Definition generator non-candidates: "
+ << IPLS->DefGeneratorNonCandidates << "\n";
+ });
+
+ // FIXME: We should attach the query as we go: This provides a result in a
+ // single pass in the common case where all symbols have already reached the
+ // required state. The query could be detached again in the 'fail' method on
+ // IPLS. Phase 2 would be reduced to collecting and dispatching the MUs.
+
+ while (IPLS->CurSearchOrderIndex != IPLS->SearchOrder.size()) {
+
+ // If we've been handed an error or received one back from a generator then
+ // fail the query. We don't need to unlink: At this stage the query hasn't
+ // actually been lodged.
+ if (Err)
+ return IPLS->fail(std::move(Err));
+
+ // Get the next JITDylib and lookup flags.
+ auto &KV = IPLS->SearchOrder[IPLS->CurSearchOrderIndex];
+ auto &JD = *KV.first;
+ auto JDLookupFlags = KV.second;
+
+ LLVM_DEBUG({
+ dbgs() << "Visiting \"" << JD.getName() << "\" (" << JDLookupFlags
+ << ") with lookup set " << IPLS->LookupSet << ":\n";
+ });
+
+ // If we've just reached a new JITDylib then perform some setup.
+ if (IPLS->NewJITDylib) {
+
+ // Acquire the generator lock for this JITDylib.
+ IPLS->GeneratorLock = std::unique_lock<std::mutex>(JD.GeneratorsMutex);
+
+ // Add any non-candidates from the last JITDylib (if any) back on to the
+ // list of definition candidates for this JITDylib, reset definition
+ // non-candiates to the empty set.
+ SymbolLookupSet Tmp;
+ std::swap(IPLS->DefGeneratorNonCandidates, Tmp);
+ IPLS->DefGeneratorCandidates.append(std::move(Tmp));
+
+ LLVM_DEBUG({
+ dbgs() << " First time visiting " << JD.getName()
+ << ", resetting candidate sets and building generator stack\n";
+ });
+
+ // Build the definition generator stack for this JITDylib.
+ for (auto &DG : reverse(JD.DefGenerators))
+ IPLS->CurDefGeneratorStack.push_back(DG);
+
+ // Flag that we've done our initialization.
+ IPLS->NewJITDylib = false;
+ }
+
+ // Remove any generation candidates that are already defined (and match) in
+ // this JITDylib.
+ runSessionLocked([&] {
+ // Update the list of candidates (and non-candidates) for definition
+ // generation.
+ LLVM_DEBUG(dbgs() << " Updating candidate set...\n");
+ Err = IL_updateCandidatesFor(
+ JD, JDLookupFlags, IPLS->DefGeneratorCandidates,
+ JD.DefGenerators.empty() ? nullptr
+ : &IPLS->DefGeneratorNonCandidates);
+ LLVM_DEBUG({
+ dbgs() << " Remaining candidates = " << IPLS->DefGeneratorCandidates
+ << "\n";
+ });
+ });
+
+ // If we encountered an error while filtering generation candidates then
+ // bail out.
+ if (Err)
+ return IPLS->fail(std::move(Err));
+
+ /// Apply any definition generators on the stack.
+ LLVM_DEBUG({
+ if (IPLS->CurDefGeneratorStack.empty())
+ LLVM_DEBUG(dbgs() << " No generators to run for this JITDylib.\n");
+ else if (IPLS->DefGeneratorCandidates.empty())
+ LLVM_DEBUG(dbgs() << " No candidates to generate.\n");
+ else
+ dbgs() << " Running " << IPLS->CurDefGeneratorStack.size()
+ << " remaining generators for "
+ << IPLS->DefGeneratorCandidates.size() << " candidates\n";
+ });
+ while (!IPLS->CurDefGeneratorStack.empty() &&
+ !IPLS->DefGeneratorCandidates.empty()) {
+ auto DG = IPLS->CurDefGeneratorStack.back().lock();
+ IPLS->CurDefGeneratorStack.pop_back();
+
+ if (!DG)
+ return IPLS->fail(make_error<StringError>(
+ "DefinitionGenerator removed while lookup in progress",
+ inconvertibleErrorCode()));
+
+ auto K = IPLS->K;
+ auto &LookupSet = IPLS->DefGeneratorCandidates;
+
+ // Run the generator. If the generator takes ownership of QA then this
+ // will break the loop.
+ {
+ LLVM_DEBUG(dbgs() << " Attempting to generate " << LookupSet << "\n");
+ LookupState LS(std::move(IPLS));
+ Err = DG->tryToGenerate(LS, K, JD, JDLookupFlags, LookupSet);
+ IPLS = std::move(LS.IPLS);
+ }
+
+ // If there was an error then fail the query.
+ if (Err) {
+ LLVM_DEBUG({
+ dbgs() << " Error attempting to generate " << LookupSet << "\n";
+ });
+ assert(IPLS && "LS cannot be retained if error is returned");
+ return IPLS->fail(std::move(Err));
+ }
+
+ // Otherwise if QA was captured then break the loop.
+ if (!IPLS) {
+ LLVM_DEBUG(
+ { dbgs() << " LookupState captured. Exiting phase1 for now.\n"; });
+ return;
+ }
+
+ // Otherwise if we're continuing around the loop then update candidates
+ // for the next round.
+ runSessionLocked([&] {
+ LLVM_DEBUG(dbgs() << " Updating candidate set post-generation\n");
+ Err = IL_updateCandidatesFor(
+ JD, JDLookupFlags, IPLS->DefGeneratorCandidates,
+ JD.DefGenerators.empty() ? nullptr
+ : &IPLS->DefGeneratorNonCandidates);
+ });
+
+ // If updating candidates failed then fail the query.
+ if (Err) {
+ LLVM_DEBUG(dbgs() << " Error encountered while updating candidates\n");
+ return IPLS->fail(std::move(Err));
+ }
+ }
+
+ // If we get here then we've moved on to the next JITDylib.
+ LLVM_DEBUG(dbgs() << "Phase 1 moving to next JITDylib.\n");
+ ++IPLS->CurSearchOrderIndex;
+ IPLS->NewJITDylib = true;
+ }
+
+ // Remove any weakly referenced candidates that could not be found/generated.
+ IPLS->DefGeneratorCandidates.remove_if(
+ [](const SymbolStringPtr &Name, SymbolLookupFlags SymLookupFlags) {
+ return SymLookupFlags == SymbolLookupFlags::WeaklyReferencedSymbol;
+ });
+
+ // If we get here then we've finished searching all JITDylibs.
+ // If we matched all symbols then move to phase 2, otherwise fail the query
+ // with a SymbolsNotFound error.
+ if (IPLS->DefGeneratorCandidates.empty()) {
+ LLVM_DEBUG(dbgs() << "Phase 1 succeeded.\n");
+ IPLS->complete(std::move(IPLS));
+ } else {
+ LLVM_DEBUG(dbgs() << "Phase 1 failed with unresolved symbols.\n");
+ IPLS->fail(make_error<SymbolsNotFound>(
+ IPLS->DefGeneratorCandidates.getSymbolNames()));
+ }
+}
+
+void ExecutionSession::OL_completeLookup(
+ std::unique_ptr<InProgressLookupState> IPLS,
+ std::shared_ptr<AsynchronousSymbolQuery> Q,
+ RegisterDependenciesFunction RegisterDependencies) {
+
+ LLVM_DEBUG({
+ dbgs() << "Entering OL_completeLookup:\n"
+ << " Lookup kind: " << IPLS->K << "\n"
+ << " Search order: " << IPLS->SearchOrder
+ << ", Current index = " << IPLS->CurSearchOrderIndex
+ << (IPLS->NewJITDylib ? " (entering new JITDylib)" : "") << "\n"
+ << " Lookup set: " << IPLS->LookupSet << "\n"
+ << " Definition generator candidates: "
+ << IPLS->DefGeneratorCandidates << "\n"
+ << " Definition generator non-candidates: "
+ << IPLS->DefGeneratorNonCandidates << "\n";
+ });
+
+ bool QueryComplete = false;
+ DenseMap<JITDylib *, JITDylib::UnmaterializedInfosList> CollectedUMIs;
+
+ auto LodgingErr = runSessionLocked([&]() -> Error {
+ for (auto &KV : IPLS->SearchOrder) {
+ auto &JD = *KV.first;
+ auto JDLookupFlags = KV.second;
+ LLVM_DEBUG({
+ dbgs() << "Visiting \"" << JD.getName() << "\" (" << JDLookupFlags
+ << ") with lookup set " << IPLS->LookupSet << ":\n";
+ });
+
+ auto Err = IPLS->LookupSet.forEachWithRemoval(
+ [&](const SymbolStringPtr &Name,
+ SymbolLookupFlags SymLookupFlags) -> Expected<bool> {
+ LLVM_DEBUG({
+ dbgs() << " Attempting to match \"" << Name << "\" ("
+ << SymLookupFlags << ")... ";
+ });
+
+ /// Search for the symbol. If not found then continue without
+ /// removal.
+ auto SymI = JD.Symbols.find(Name);
+ if (SymI == JD.Symbols.end()) {
+ LLVM_DEBUG(dbgs() << "skipping: not present\n");
+ return false;
+ }
+
+ // If this is a non-exported symbol and we're matching exported
+ // symbols only then skip this symbol without removal.
+ if (!SymI->second.getFlags().isExported() &&
+ JDLookupFlags ==
+ JITDylibLookupFlags::MatchExportedSymbolsOnly) {
+ LLVM_DEBUG(dbgs() << "skipping: not exported\n");
+ return false;
+ }
+
+ // If we match against a materialization-side-effects only symbol
+ // then make sure it is weakly-referenced. Otherwise bail out with
+ // an error.
+ // FIXME: Use a "materialization-side-effects-only symbols must be
+ // weakly referenced" specific error here to reduce confusion.
+ if (SymI->second.getFlags().hasMaterializationSideEffectsOnly() &&
+ SymLookupFlags != SymbolLookupFlags::WeaklyReferencedSymbol) {
+ LLVM_DEBUG({
+ dbgs() << "error: "
+ "required, but symbol is has-side-effects-only\n";
+ });
+ return make_error<SymbolsNotFound>(SymbolNameVector({Name}));
+ }
+
+ // If we matched against this symbol but it is in the error state
+ // then bail out and treat it as a failure to materialize.
+ if (SymI->second.getFlags().hasError()) {
+ LLVM_DEBUG(dbgs() << "error: symbol is in error state\n");
+ auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
+ (*FailedSymbolsMap)[&JD] = {Name};
+ return make_error<FailedToMaterialize>(
+ std::move(FailedSymbolsMap));
+ }
+
+ // Otherwise this is a match.
+
+ // If this symbol is already in the requried state then notify the
+ // query, remove the symbol and continue.
+ if (SymI->second.getState() >= Q->getRequiredState()) {
+ LLVM_DEBUG(dbgs()
+ << "matched, symbol already in required state\n");
+ Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
+ return true;
+ }
+
+ // Otherwise this symbol does not yet meet the required state. Check
+ // whether it has a materializer attached, and if so prepare to run
+ // it.
+ if (SymI->second.hasMaterializerAttached()) {
+ assert(SymI->second.getAddress() == 0 &&
+ "Symbol not resolved but already has address?");
+ auto UMII = JD.UnmaterializedInfos.find(Name);
+ assert(UMII != JD.UnmaterializedInfos.end() &&
+ "Lazy symbol should have UnmaterializedInfo");
+
+ auto UMI = UMII->second;
+ assert(UMI->MU && "Materializer should not be null");
+ assert(UMI->RT && "Tracker should not be null");
+ LLVM_DEBUG({
+ dbgs() << "matched, preparing to dispatch MU@" << UMI->MU.get()
+ << " (" << UMI->MU->getName() << ")\n";
+ });
+
+ // Move all symbols associated with this MaterializationUnit into
+ // materializing state.
+ for (auto &KV : UMI->MU->getSymbols()) {
+ auto SymK = JD.Symbols.find(KV.first);
+ assert(SymK != JD.Symbols.end() &&
+ "No entry for symbol covered by MaterializationUnit");
+ SymK->second.setMaterializerAttached(false);
+ SymK->second.setState(SymbolState::Materializing);
+ JD.UnmaterializedInfos.erase(KV.first);
+ }
+
+ // Add MU to the list of MaterializationUnits to be materialized.
+ CollectedUMIs[&JD].push_back(std::move(UMI));
+ } else
+ LLVM_DEBUG(dbgs() << "matched, registering query");
+
+ // Add the query to the PendingQueries list and continue, deleting
+ // the element from the lookup set.
+ assert(SymI->second.getState() != SymbolState::NeverSearched &&
+ SymI->second.getState() != SymbolState::Ready &&
+ "By this line the symbol should be materializing");
+ auto &MI = JD.MaterializingInfos[Name];
+ MI.addQuery(Q);
+ Q->addQueryDependence(JD, Name);
+
+ return true;
+ });
+
+ // Handle failure.
+ if (Err) {
+
+ LLVM_DEBUG({
+ dbgs() << "Lookup failed. Detaching query and replacing MUs.\n";
+ });
+
+ // Detach the query.
+ Q->detach();
+
+ // Replace the MUs.
+ for (auto &KV : CollectedUMIs) {
+ auto &JD = *KV.first;
+ for (auto &UMI : KV.second)
+ for (auto &KV2 : UMI->MU->getSymbols()) {
+ assert(!JD.UnmaterializedInfos.count(KV2.first) &&
+ "Unexpected materializer in map");
+ auto SymI = JD.Symbols.find(KV2.first);
+ assert(SymI != JD.Symbols.end() && "Missing symbol entry");
+ assert(SymI->second.getState() == SymbolState::Materializing &&
+ "Can not replace symbol that is not materializing");
+ assert(!SymI->second.hasMaterializerAttached() &&
+ "MaterializerAttached flag should not be set");
+ SymI->second.setMaterializerAttached(true);
+ JD.UnmaterializedInfos[KV2.first] = UMI;
+ }
+ }
+
+ return Err;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Stripping unmatched weakly-refererced symbols\n");
+ IPLS->LookupSet.forEachWithRemoval(
+ [&](const SymbolStringPtr &Name, SymbolLookupFlags SymLookupFlags) {
+ if (SymLookupFlags == SymbolLookupFlags::WeaklyReferencedSymbol) {
+ Q->dropSymbol(Name);
+ return true;
+ } else
+ return false;
+ });
+
+ if (!IPLS->LookupSet.empty()) {
+ LLVM_DEBUG(dbgs() << "Failing due to unresolved symbols\n");
+ return make_error<SymbolsNotFound>(IPLS->LookupSet.getSymbolNames());
+ }
+
+ // Record whether the query completed.
+ QueryComplete = Q->isComplete();
+
+ LLVM_DEBUG({
+ dbgs() << "Query successfully "
+ << (QueryComplete ? "completed" : "lodged") << "\n";
+ });
+
+ // Move the collected MUs to the OutstandingMUs list.
+ if (!CollectedUMIs.empty()) {
+ std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
+
+ LLVM_DEBUG(dbgs() << "Adding MUs to dispatch:\n");
+ for (auto &KV : CollectedUMIs) {
+ auto &JD = *KV.first;
+ LLVM_DEBUG({
+ dbgs() << " For " << JD.getName() << ": Adding " << KV.second.size()
+ << " MUs.\n";
+ });
+ for (auto &UMI : KV.second) {
+ std::unique_ptr<MaterializationResponsibility> MR(
+ new MaterializationResponsibility(
+ &JD, std::move(UMI->MU->SymbolFlags),
+ std::move(UMI->MU->InitSymbol)));
+ JD.MRTrackers[MR.get()] = UMI->RT;
+ OutstandingMUs.push_back(
+ std::make_pair(std::move(UMI->MU), std::move(MR)));
+ }
+ }
+ } else
+ LLVM_DEBUG(dbgs() << "No MUs to dispatch.\n");
+
+ if (RegisterDependencies && !Q->QueryRegistrations.empty()) {
+ LLVM_DEBUG(dbgs() << "Registering dependencies\n");
+ RegisterDependencies(Q->QueryRegistrations);
+ } else
+ LLVM_DEBUG(dbgs() << "No dependencies to register\n");
+
+ return Error::success();
+ });
+
+ if (LodgingErr) {
+ LLVM_DEBUG(dbgs() << "Failing query\n");
+ Q->detach();
+ Q->handleFailed(std::move(LodgingErr));
+ return;
+ }
+
+ if (QueryComplete) {
+ LLVM_DEBUG(dbgs() << "Completing query\n");
+ Q->handleComplete();
+ }
+
+ dispatchOutstandingMUs();
+}
+
+void ExecutionSession::OL_completeLookupFlags(
+ std::unique_ptr<InProgressLookupState> IPLS,
+ unique_function<void(Expected<SymbolFlagsMap>)> OnComplete) {
+
+ auto Result = runSessionLocked([&]() -> Expected<SymbolFlagsMap> {
+ LLVM_DEBUG({
+ dbgs() << "Entering OL_completeLookupFlags:\n"
+ << " Lookup kind: " << IPLS->K << "\n"
+ << " Search order: " << IPLS->SearchOrder
+ << ", Current index = " << IPLS->CurSearchOrderIndex
+ << (IPLS->NewJITDylib ? " (entering new JITDylib)" : "") << "\n"
+ << " Lookup set: " << IPLS->LookupSet << "\n"
+ << " Definition generator candidates: "
+ << IPLS->DefGeneratorCandidates << "\n"
+ << " Definition generator non-candidates: "
+ << IPLS->DefGeneratorNonCandidates << "\n";
+ });
+
+ SymbolFlagsMap Result;
+
+ // Attempt to find flags for each symbol.
+ for (auto &KV : IPLS->SearchOrder) {
+ auto &JD = *KV.first;
+ auto JDLookupFlags = KV.second;
+ LLVM_DEBUG({
+ dbgs() << "Visiting \"" << JD.getName() << "\" (" << JDLookupFlags
+ << ") with lookup set " << IPLS->LookupSet << ":\n";
+ });
+
+ IPLS->LookupSet.forEachWithRemoval([&](const SymbolStringPtr &Name,
+ SymbolLookupFlags SymLookupFlags) {
+ LLVM_DEBUG({
+ dbgs() << " Attempting to match \"" << Name << "\" ("
+ << SymLookupFlags << ")... ";
+ });
+
+ // Search for the symbol. If not found then continue without removing
+ // from the lookup set.
+ auto SymI = JD.Symbols.find(Name);
+ if (SymI == JD.Symbols.end()) {
+ LLVM_DEBUG(dbgs() << "skipping: not present\n");
+ return false;
+ }
+
+ // If this is a non-exported symbol then it doesn't match. Skip it.
+ if (!SymI->second.getFlags().isExported() &&
+ JDLookupFlags == JITDylibLookupFlags::MatchExportedSymbolsOnly) {
+ LLVM_DEBUG(dbgs() << "skipping: not exported\n");
+ return false;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "matched, \"" << Name << "\" -> " << SymI->second.getFlags()
+ << "\n";
+ });
+ Result[Name] = SymI->second.getFlags();
+ return true;
+ });
+ }
+
+ // Remove any weakly referenced symbols that haven't been resolved.
+ IPLS->LookupSet.remove_if(
+ [](const SymbolStringPtr &Name, SymbolLookupFlags SymLookupFlags) {
+ return SymLookupFlags == SymbolLookupFlags::WeaklyReferencedSymbol;
+ });
+
+ if (!IPLS->LookupSet.empty()) {
+ LLVM_DEBUG(dbgs() << "Failing due to unresolved symbols\n");
+ return make_error<SymbolsNotFound>(IPLS->LookupSet.getSymbolNames());
+ }
+
+ LLVM_DEBUG(dbgs() << "Succeded, result = " << Result << "\n");
+ return Result;
+ });
+
+ // Run the callback on the result.
+ LLVM_DEBUG(dbgs() << "Sending result to handler.\n");
+ OnComplete(std::move(Result));
+}
+
+void ExecutionSession::OL_destroyMaterializationResponsibility(
+ MaterializationResponsibility &MR) {
+
+ assert(MR.SymbolFlags.empty() &&
+ "All symbols should have been explicitly materialized or failed");
+ MR.JD->unlinkMaterializationResponsibility(MR);
+}
+
+SymbolNameSet ExecutionSession::OL_getRequestedSymbols(
+ const MaterializationResponsibility &MR) {
+ return MR.JD->getRequestedSymbols(MR.SymbolFlags);
+}
+
+Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR,
+ const SymbolMap &Symbols) {
+ LLVM_DEBUG({
+ dbgs() << "In " << MR.JD->getName() << " resolving " << Symbols << "\n";
+ });
+#ifndef NDEBUG
+ for (auto &KV : Symbols) {
+ auto WeakFlags = JITSymbolFlags::Weak | JITSymbolFlags::Common;
+ auto I = MR.SymbolFlags.find(KV.first);
+ assert(I != MR.SymbolFlags.end() &&
+ "Resolving symbol outside this responsibility set");
+ assert(!I->second.hasMaterializationSideEffectsOnly() &&
+ "Can't resolve materialization-side-effects-only symbol");
+ assert((KV.second.getFlags() & ~WeakFlags) == (I->second & ~WeakFlags) &&
+ "Resolving symbol with incorrect flags");
+ }
+#endif
+
+ return MR.JD->resolve(MR, Symbols);
+}
+
+Error ExecutionSession::OL_notifyEmitted(MaterializationResponsibility &MR) {
+ LLVM_DEBUG({
+ dbgs() << "In " << MR.JD->getName() << " emitting " << MR.SymbolFlags << "\n";
+ });
+
+ if (auto Err = MR.JD->emit(MR, MR.SymbolFlags))
+ return Err;
+
+ MR.SymbolFlags.clear();
+ return Error::success();
+}
+
+Error ExecutionSession::OL_defineMaterializing(
+ MaterializationResponsibility &MR, SymbolFlagsMap NewSymbolFlags) {
+
+ LLVM_DEBUG({
+ dbgs() << "In " << MR.JD->getName() << " defining materializing symbols "
+ << NewSymbolFlags << "\n";
+ });
+ if (auto AcceptedDefs = MR.JD->defineMaterializing(std::move(NewSymbolFlags))) {
+ // Add all newly accepted symbols to this responsibility object.
+ for (auto &KV : *AcceptedDefs)
+ MR.SymbolFlags.insert(KV);
+ return Error::success();
+ } else
+ return AcceptedDefs.takeError();
+}
+
+void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) {
+
+ LLVM_DEBUG({
+ dbgs() << "In " << MR.JD->getName() << " failing materialization for "
+ << MR.SymbolFlags << "\n";
+ });
+
+ JITDylib::FailedSymbolsWorklist Worklist;
+
+ for (auto &KV : MR.SymbolFlags)
+ Worklist.push_back(std::make_pair(MR.JD.get(), KV.first));
+ MR.SymbolFlags.clear();
+
+ if (Worklist.empty())
+ return;
+
+ JITDylib::AsynchronousSymbolQuerySet FailedQueries;
+ std::shared_ptr<SymbolDependenceMap> FailedSymbols;
+
+ runSessionLocked([&]() {
+ auto RTI = MR.JD->MRTrackers.find(&MR);
+ assert(RTI != MR.JD->MRTrackers.end() && "No tracker for this");
+ if (RTI->second->isDefunct())
+ return;
+
+ std::tie(FailedQueries, FailedSymbols) =
+ JITDylib::failSymbols(std::move(Worklist));
+ });
+
+ for (auto &Q : FailedQueries)
+ Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbols));
+}
+
+Error ExecutionSession::OL_replace(MaterializationResponsibility &MR,
+ std::unique_ptr<MaterializationUnit> MU) {
+ for (auto &KV : MU->getSymbols()) {
+ assert(MR.SymbolFlags.count(KV.first) &&
+ "Replacing definition outside this responsibility set");
+ MR.SymbolFlags.erase(KV.first);
+ }
+
+ if (MU->getInitializerSymbol() == MR.InitSymbol)
+ MR.InitSymbol = nullptr;
+
+ LLVM_DEBUG(MR.JD->getExecutionSession().runSessionLocked([&]() {
+ dbgs() << "In " << MR.JD->getName() << " replacing symbols with " << *MU
+ << "\n";
+ }););
+
+ return MR.JD->replace(MR, std::move(MU));
+}
+
+Expected<std::unique_ptr<MaterializationResponsibility>>
+ExecutionSession::OL_delegate(MaterializationResponsibility &MR,
+ const SymbolNameSet &Symbols) {
+
+ SymbolStringPtr DelegatedInitSymbol;
+ SymbolFlagsMap DelegatedFlags;
+
+ for (auto &Name : Symbols) {
+ auto I = MR.SymbolFlags.find(Name);
+ assert(I != MR.SymbolFlags.end() &&
+ "Symbol is not tracked by this MaterializationResponsibility "
+ "instance");
+
+ DelegatedFlags[Name] = std::move(I->second);
+ if (Name == MR.InitSymbol)
+ std::swap(MR.InitSymbol, DelegatedInitSymbol);
+
+ MR.SymbolFlags.erase(I);
+ }
+
+ return MR.JD->delegate(MR, std::move(DelegatedFlags),
+ std::move(DelegatedInitSymbol));
+}
+
+void ExecutionSession::OL_addDependencies(
+ MaterializationResponsibility &MR, const SymbolStringPtr &Name,
+ const SymbolDependenceMap &Dependencies) {
+ LLVM_DEBUG({
+ dbgs() << "Adding dependencies for " << Name << ": " << Dependencies
+ << "\n";
+ });
+ assert(MR.SymbolFlags.count(Name) &&
+ "Symbol not covered by this MaterializationResponsibility instance");
+ MR.JD->addDependencies(Name, Dependencies);
+}
+
+void ExecutionSession::OL_addDependenciesForAll(
+ MaterializationResponsibility &MR,
+ const SymbolDependenceMap &Dependencies) {
+ LLVM_DEBUG({
+ dbgs() << "Adding dependencies for all symbols in " << MR.SymbolFlags << ": "
+ << Dependencies << "\n";
+ });
+ for (auto &KV : MR.SymbolFlags)
+ MR.JD->addDependencies(KV.first, Dependencies);
}
#ifndef NDEBUG
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 4d255cd66c1b..6a1a41a13a1b 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -21,32 +21,6 @@
namespace llvm {
namespace orc {
-int runAsMain(int (*Main)(int, char *[]), ArrayRef<std::string> Args,
- Optional<StringRef> ProgramName) {
- std::vector<std::unique_ptr<char[]>> ArgVStorage;
- std::vector<char *> ArgV;
-
- ArgVStorage.reserve(Args.size() + (ProgramName ? 1 : 0));
- ArgV.reserve(Args.size() + 1 + (ProgramName ? 1 : 0));
-
- if (ProgramName) {
- ArgVStorage.push_back(std::make_unique<char[]>(ProgramName->size() + 1));
- llvm::copy(*ProgramName, &ArgVStorage.back()[0]);
- ArgVStorage.back()[ProgramName->size()] = '\0';
- ArgV.push_back(ArgVStorage.back().get());
- }
-
- for (auto &Arg : Args) {
- ArgVStorage.push_back(std::make_unique<char[]>(Arg.size() + 1));
- llvm::copy(Arg, &ArgVStorage.back()[0]);
- ArgVStorage.back()[Arg.size()] = '\0';
- ArgV.push_back(ArgVStorage.back().get());
- }
- ArgV.push_back(nullptr);
-
- return Main(Args.size() + !!ProgramName, ArgV.data());
-}
-
CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End)
: InitList(
GV ? dyn_cast_or_null<ConstantArray>(GV->getInitializer()) : nullptr),
@@ -261,8 +235,8 @@ DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix,
}
Error DynamicLibrarySearchGenerator::tryToGenerate(
- LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags,
- const SymbolLookupSet &Symbols) {
+ LookupState &LS, LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &Symbols) {
orc::SymbolMap NewSymbols;
bool HasGlobalPrefix = (GlobalPrefix != '\0');
@@ -322,7 +296,8 @@ StaticLibraryDefinitionGenerator::Load(ObjectLayer &L, const char *FileName,
auto ObjTT = Obj.getTriple();
if (ObjTT.getArch() == TT.getArch() &&
ObjTT.getSubArch() == TT.getSubArch() &&
- ObjTT.getVendor() == TT.getVendor()) {
+ (TT.getVendor() == Triple::UnknownVendor ||
+ ObjTT.getVendor() == TT.getVendor())) {
// We found a match. Create an instance from a buffer covering this
// slice.
auto SliceBuffer = MemoryBuffer::getFileSlice(FileName, Obj.getSize(),
@@ -364,8 +339,8 @@ StaticLibraryDefinitionGenerator::Create(
}
Error StaticLibraryDefinitionGenerator::tryToGenerate(
- LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags,
- const SymbolLookupSet &Symbols) {
+ LookupState &LS, LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &Symbols) {
// Don't materialize symbols from static archives unless this is a static
// lookup.
@@ -396,8 +371,7 @@ Error StaticLibraryDefinitionGenerator::tryToGenerate(
MemoryBufferRef ChildBufferRef(ChildBufferInfo.first,
ChildBufferInfo.second);
- if (auto Err = L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef, false),
- VModuleKey()))
+ if (auto Err = L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef, false)))
return Err;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
index 023940dc8298..aadc437c80c4 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
@@ -25,7 +25,7 @@ void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) {
this->NotifyCompiled = std::move(NotifyCompiled);
}
-void IRCompileLayer::emit(MaterializationResponsibility R,
+void IRCompileLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM) {
assert(TSM && "Module must not be null");
@@ -33,13 +33,13 @@ void IRCompileLayer::emit(MaterializationResponsibility R,
{
std::lock_guard<std::mutex> Lock(IRLayerMutex);
if (NotifyCompiled)
- NotifyCompiled(R.getVModuleKey(), std::move(TSM));
+ NotifyCompiled(*R, std::move(TSM));
else
TSM = ThreadSafeModule();
}
BaseLayer.emit(std::move(R), std::move(*Obj));
} else {
- R.failMaterialization();
+ R->failMaterialization();
getExecutionSession().reportError(Obj.takeError());
}
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
index 511248f83b25..d5b11349277c 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
@@ -17,14 +17,14 @@ IRTransformLayer::IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer,
: IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer),
Transform(std::move(Transform)) {}
-void IRTransformLayer::emit(MaterializationResponsibility R,
+void IRTransformLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM) {
assert(TSM && "Module must not be null");
- if (auto TransformedTSM = Transform(std::move(TSM), R))
+ if (auto TransformedTSM = Transform(std::move(TSM), *R))
BaseLayer.emit(std::move(R), std::move(*TransformedTSM));
else {
- R.failMaterialization();
+ R->failMaterialization();
getExecutionSession().reportError(TransformedTSM.takeError());
}
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 031b1afefc9d..1cfcf8ae943d 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -25,20 +25,20 @@ public:
using CompileFunction = JITCompileCallbackManager::CompileFunction;
CompileCallbackMaterializationUnit(SymbolStringPtr Name,
- CompileFunction Compile, VModuleKey K)
+ CompileFunction Compile)
: MaterializationUnit(SymbolFlagsMap({{Name, JITSymbolFlags::Exported}}),
- nullptr, std::move(K)),
+ nullptr),
Name(std::move(Name)), Compile(std::move(Compile)) {}
StringRef getName() const override { return "<Compile Callbacks>"; }
private:
- void materialize(MaterializationResponsibility R) override {
+ void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
SymbolMap Result;
Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported);
// No dependencies, so these calls cannot fail.
- cantFail(R.notifyResolved(Result));
- cantFail(R.notifyEmitted());
+ cantFail(R->notifyResolved(Result));
+ cantFail(R->notifyEmitted());
}
void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {
@@ -54,8 +54,8 @@ private:
namespace llvm {
namespace orc {
+TrampolinePool::~TrampolinePool() {}
void IndirectStubsManager::anchor() {}
-void TrampolinePool::anchor() {}
Expected<JITTargetAddress>
JITCompileCallbackManager::getCompileCallback(CompileFunction Compile) {
@@ -65,10 +65,9 @@ JITCompileCallbackManager::getCompileCallback(CompileFunction Compile) {
std::lock_guard<std::mutex> Lock(CCMgrMutex);
AddrToSymbol[*TrampolineAddr] = CallbackName;
- cantFail(CallbacksJD.define(
- std::make_unique<CompileCallbackMaterializationUnit>(
- std::move(CallbackName), std::move(Compile),
- ES.allocateVModule())));
+ cantFail(
+ CallbacksJD.define(std::make_unique<CompileCallbackMaterializationUnit>(
+ std::move(CallbackName), std::move(Compile))));
return *TrampolineAddr;
} else
return TrampolineAddr.takeError();
@@ -149,7 +148,7 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
}
case Triple::x86_64: {
- if ( T.getOS() == Triple::OSType::Win32 ) {
+ if (T.getOS() == Triple::OSType::Win32) {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
return CCMgrT::Create(ES, ErrorHandlerAddress);
} else {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 713a48fbf3eb..c368c1e37134 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -11,8 +11,10 @@
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
@@ -87,8 +89,9 @@ class GenericLLVMIRPlatform : public Platform {
public:
GenericLLVMIRPlatform(GenericLLVMIRPlatformSupport &S) : S(S) {}
Error setupJITDylib(JITDylib &JD) override;
- Error notifyAdding(JITDylib &JD, const MaterializationUnit &MU) override;
- Error notifyRemoving(JITDylib &JD, VModuleKey K) override {
+ Error notifyAdding(ResourceTracker &RT,
+ const MaterializationUnit &MU) override;
+ Error notifyRemoving(ResourceTracker &RT) override {
// Noop -- Nothing to do (yet).
return Error::success();
}
@@ -186,7 +189,8 @@ public:
return J.addIRModule(JD, ThreadSafeModule(std::move(M), std::move(Ctx)));
}
- Error notifyAdding(JITDylib &JD, const MaterializationUnit &MU) {
+ Error notifyAdding(ResourceTracker &RT, const MaterializationUnit &MU) {
+ auto &JD = RT.getJITDylib();
if (auto &InitSym = MU.getInitializerSymbol())
InitSymbols[&JD].add(InitSym, SymbolLookupFlags::WeaklyReferencedSymbol);
else {
@@ -235,7 +239,7 @@ public:
});
for (auto DeinitFnAddr : *Deinitializers) {
LLVM_DEBUG({
- dbgs() << " Running init " << formatv("{0:x16}", DeinitFnAddr)
+ dbgs() << " Running deinit " << formatv("{0:x16}", DeinitFnAddr)
<< "...\n";
});
auto *DeinitFn = jitTargetAddressToFunction<void (*)()>(DeinitFnAddr);
@@ -260,23 +264,23 @@ private:
return std::move(Err);
DenseMap<JITDylib *, SymbolLookupSet> LookupSymbols;
- std::vector<JITDylib *> DFSLinkOrder;
+ std::vector<JITDylibSP> DFSLinkOrder;
getExecutionSession().runSessionLocked([&]() {
- DFSLinkOrder = getDFSLinkOrder(JD);
-
- for (auto *NextJD : DFSLinkOrder) {
- auto IFItr = InitFunctions.find(NextJD);
- if (IFItr != InitFunctions.end()) {
- LookupSymbols[NextJD] = std::move(IFItr->second);
- InitFunctions.erase(IFItr);
- }
+ DFSLinkOrder = JD.getDFSLinkOrder();
+
+ for (auto &NextJD : DFSLinkOrder) {
+ auto IFItr = InitFunctions.find(NextJD.get());
+ if (IFItr != InitFunctions.end()) {
+ LookupSymbols[NextJD.get()] = std::move(IFItr->second);
+ InitFunctions.erase(IFItr);
}
- });
+ }
+ });
LLVM_DEBUG({
dbgs() << "JITDylib init order is [ ";
- for (auto *JD : llvm::reverse(DFSLinkOrder))
+ for (auto &JD : llvm::reverse(DFSLinkOrder))
dbgs() << "\"" << JD->getName() << "\" ";
dbgs() << "]\n";
dbgs() << "Looking up init functions:\n";
@@ -310,26 +314,26 @@ private:
auto LLJITRunAtExits = J.mangleAndIntern("__lljit_run_atexits");
DenseMap<JITDylib *, SymbolLookupSet> LookupSymbols;
- std::vector<JITDylib *> DFSLinkOrder;
+ std::vector<JITDylibSP> DFSLinkOrder;
ES.runSessionLocked([&]() {
- DFSLinkOrder = getDFSLinkOrder(JD);
-
- for (auto *NextJD : DFSLinkOrder) {
- auto &JDLookupSymbols = LookupSymbols[NextJD];
- auto DIFItr = DeInitFunctions.find(NextJD);
- if (DIFItr != DeInitFunctions.end()) {
- LookupSymbols[NextJD] = std::move(DIFItr->second);
- DeInitFunctions.erase(DIFItr);
- }
- JDLookupSymbols.add(LLJITRunAtExits,
+ DFSLinkOrder = JD.getDFSLinkOrder();
+
+ for (auto &NextJD : DFSLinkOrder) {
+ auto &JDLookupSymbols = LookupSymbols[NextJD.get()];
+ auto DIFItr = DeInitFunctions.find(NextJD.get());
+ if (DIFItr != DeInitFunctions.end()) {
+ LookupSymbols[NextJD.get()] = std::move(DIFItr->second);
+ DeInitFunctions.erase(DIFItr);
+ }
+ JDLookupSymbols.add(LLJITRunAtExits,
SymbolLookupFlags::WeaklyReferencedSymbol);
}
});
LLVM_DEBUG({
dbgs() << "JITDylib deinit order is [ ";
- for (auto *JD : DFSLinkOrder)
+ for (auto &JD : DFSLinkOrder)
dbgs() << "\"" << JD->getName() << "\" ";
dbgs() << "]\n";
dbgs() << "Looking up deinit functions:\n";
@@ -343,8 +347,8 @@ private:
return LookupResult.takeError();
std::vector<JITTargetAddress> DeInitializers;
- for (auto *NextJD : DFSLinkOrder) {
- auto DeInitsItr = LookupResult->find(NextJD);
+ for (auto &NextJD : DFSLinkOrder) {
+ auto DeInitsItr = LookupResult->find(NextJD.get());
assert(DeInitsItr != LookupResult->end() &&
"Every JD should have at least __lljit_run_atexits");
@@ -360,46 +364,23 @@ private:
return DeInitializers;
}
- // Returns a DFS traversal order of the JITDylibs reachable (via
- // links-against edges) from JD, starting with JD itself.
- static std::vector<JITDylib *> getDFSLinkOrder(JITDylib &JD) {
- std::vector<JITDylib *> DFSLinkOrder;
- std::vector<JITDylib *> WorkStack({&JD});
- DenseSet<JITDylib *> Visited;
-
- while (!WorkStack.empty()) {
- auto &NextJD = *WorkStack.back();
- WorkStack.pop_back();
- if (Visited.count(&NextJD))
- continue;
- Visited.insert(&NextJD);
- DFSLinkOrder.push_back(&NextJD);
- NextJD.withLinkOrderDo([&](const JITDylibSearchOrder &LinkOrder) {
- for (auto &KV : LinkOrder)
- WorkStack.push_back(KV.first);
- });
- }
-
- return DFSLinkOrder;
- }
-
/// Issue lookups for all init symbols required to initialize JD (and any
/// JITDylibs that it depends on).
Error issueInitLookups(JITDylib &JD) {
DenseMap<JITDylib *, SymbolLookupSet> RequiredInitSymbols;
- std::vector<JITDylib *> DFSLinkOrder;
+ std::vector<JITDylibSP> DFSLinkOrder;
getExecutionSession().runSessionLocked([&]() {
- DFSLinkOrder = getDFSLinkOrder(JD);
-
- for (auto *NextJD : DFSLinkOrder) {
- auto ISItr = InitSymbols.find(NextJD);
- if (ISItr != InitSymbols.end()) {
- RequiredInitSymbols[NextJD] = std::move(ISItr->second);
- InitSymbols.erase(ISItr);
- }
+ DFSLinkOrder = JD.getDFSLinkOrder();
+
+ for (auto &NextJD : DFSLinkOrder) {
+ auto ISItr = InitSymbols.find(NextJD.get());
+ if (ISItr != InitSymbols.end()) {
+ RequiredInitSymbols[NextJD.get()] = std::move(ISItr->second);
+ InitSymbols.erase(ISItr);
}
- });
+ }
+ });
return Platform::lookupInitSymbols(getExecutionSession(),
RequiredInitSymbols)
@@ -468,9 +449,9 @@ Error GenericLLVMIRPlatform::setupJITDylib(JITDylib &JD) {
return S.setupJITDylib(JD);
}
-Error GenericLLVMIRPlatform::notifyAdding(JITDylib &JD,
+Error GenericLLVMIRPlatform::notifyAdding(ResourceTracker &RT,
const MaterializationUnit &MU) {
- return S.notifyAdding(JD, MU);
+ return S.notifyAdding(RT, MU);
}
Expected<ThreadSafeModule>
@@ -927,7 +908,7 @@ LLJIT::PlatformSupport::~PlatformSupport() {}
Error LLJITBuilderState::prepareForConstruction() {
- LLVM_DEBUG(dbgs() << "Preparing to create LLIT instance...\n");
+ LLVM_DEBUG(dbgs() << "Preparing to create LLJIT instance...\n");
if (!JTMB) {
LLVM_DEBUG({
@@ -973,12 +954,18 @@ Error LLJITBuilderState::prepareForConstruction() {
JTMB->setRelocationModel(Reloc::PIC_);
JTMB->setCodeModel(CodeModel::Small);
CreateObjectLinkingLayer =
- [](ExecutionSession &ES,
- const Triple &) -> std::unique_ptr<ObjectLayer> {
- auto ObjLinkingLayer = std::make_unique<ObjectLinkingLayer>(
- ES, std::make_unique<jitlink::InProcessMemoryManager>());
+ [TPC = this->TPC](
+ ExecutionSession &ES,
+ const Triple &) -> Expected<std::unique_ptr<ObjectLayer>> {
+ std::unique_ptr<ObjectLinkingLayer> ObjLinkingLayer;
+ if (TPC)
+ ObjLinkingLayer =
+ std::make_unique<ObjectLinkingLayer>(ES, TPC->getMemMgr());
+ else
+ ObjLinkingLayer = std::make_unique<ObjectLinkingLayer>(
+ ES, std::make_unique<jitlink::InProcessMemoryManager>());
ObjLinkingLayer->addPlugin(std::make_unique<EHFrameRegistrationPlugin>(
- jitlink::InProcessEHFrameRegistrar::getInstance()));
+ ES, std::make_unique<jitlink::InProcessEHFrameRegistrar>()));
return std::move(ObjLinkingLayer);
};
}
@@ -990,23 +977,33 @@ Error LLJITBuilderState::prepareForConstruction() {
LLJIT::~LLJIT() {
if (CompileThreads)
CompileThreads->wait();
+ if (auto Err = ES->endSession())
+ ES->reportError(std::move(Err));
}
-Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) {
+Error LLJIT::addIRModule(ResourceTrackerSP RT, ThreadSafeModule TSM) {
assert(TSM && "Can not add null module");
if (auto Err =
TSM.withModuleDo([&](Module &M) { return applyDataLayout(M); }))
return Err;
- return InitHelperTransformLayer->add(JD, std::move(TSM),
- ES->allocateVModule());
+ return InitHelperTransformLayer->add(std::move(RT), std::move(TSM));
}
-Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
+Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) {
+ return addIRModule(JD.getDefaultResourceTracker(), std::move(TSM));
+}
+
+Error LLJIT::addObjectFile(ResourceTrackerSP RT,
+ std::unique_ptr<MemoryBuffer> Obj) {
assert(Obj && "Can not add null object");
- return ObjTransformLayer.add(JD, std::move(Obj), ES->allocateVModule());
+ return ObjTransformLayer->add(std::move(RT), std::move(Obj));
+}
+
+Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
+ return addObjectFile(JD.getDefaultResourceTracker(), std::move(Obj));
}
Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
@@ -1015,7 +1012,7 @@ Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols), Name);
}
-std::unique_ptr<ObjectLayer>
+Expected<std::unique_ptr<ObjectLayer>>
LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
// If the config state provided an ObjectLinkingLayer factory then use it.
@@ -1061,9 +1058,7 @@ LLJIT::createCompileFunction(LLJITBuilderState &S,
LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
: ES(S.ES ? std::move(S.ES) : std::make_unique<ExecutionSession>()), Main(),
- DL(""), TT(S.JTMB->getTargetTriple()),
- ObjLinkingLayer(createObjectLinkingLayer(S, *ES)),
- ObjTransformLayer(*this->ES, *ObjLinkingLayer) {
+ DL(""), TT(S.JTMB->getTargetTriple()) {
ErrorAsOutParameter _(&Err);
@@ -1083,6 +1078,15 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
return;
}
+ auto ObjLayer = createObjectLinkingLayer(S, *ES);
+ if (!ObjLayer) {
+ Err = ObjLayer.takeError();
+ return;
+ }
+ ObjLinkingLayer = std::move(*ObjLayer);
+ ObjTransformLayer =
+ std::make_unique<ObjectTransformLayer>(*ES, *ObjLinkingLayer);
+
{
auto CompileFunction = createCompileFunction(S, std::move(*S.JTMB));
if (!CompileFunction) {
@@ -1090,7 +1094,7 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
return;
}
CompileLayer = std::make_unique<IRCompileLayer>(
- *ES, ObjTransformLayer, std::move(*CompileFunction));
+ *ES, *ObjTransformLayer, std::move(*CompileFunction));
TransformLayer = std::make_unique<IRTransformLayer>(*ES, *CompileLayer);
InitHelperTransformLayer =
std::make_unique<IRTransformLayer>(*ES, *TransformLayer);
@@ -1102,15 +1106,17 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
std::make_unique<ThreadPool>(hardware_concurrency(S.NumCompileThreads));
ES->setDispatchMaterialization(
[this](std::unique_ptr<MaterializationUnit> MU,
- MaterializationResponsibility MR) {
- // FIXME: Switch to move capture once ThreadPool uses unique_function.
- auto SharedMU = std::shared_ptr<MaterializationUnit>(std::move(MU));
- auto SharedMR =
- std::make_shared<MaterializationResponsibility>(std::move(MR));
- auto Work = [SharedMU, SharedMR]() mutable {
- SharedMU->materialize(std::move(*SharedMR));
- };
- CompileThreads->async(std::move(Work));
+ std::unique_ptr<MaterializationResponsibility> MR) {
+ // FIXME: We should be able to use move-capture here, but ThreadPool's
+ // AsyncTaskTys are std::functions rather than unique_functions
+ // (because MSVC's std::packaged_tasks don't support move-only types).
+ // Fix this when all the above gets sorted out.
+ CompileThreads->async(
+ [UnownedMU = MU.release(), UnownedMR = MR.release()]() mutable {
+ std::unique_ptr<MaterializationUnit> MU(UnownedMU);
+ std::unique_ptr<MaterializationResponsibility> MR(UnownedMR);
+ MU->materialize(std::move(MR));
+ });
});
}
@@ -1172,7 +1178,7 @@ Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) {
[&](Module &M) -> Error { return applyDataLayout(M); }))
return Err;
- return CODLayer->add(JD, std::move(TSM), ES->allocateVModule());
+ return CODLayer->add(JD, std::move(TSM));
}
LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Layer.cpp
index 61e7ab5ae68b..5e27e343d23b 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Layer.cpp
@@ -22,16 +22,18 @@ namespace orc {
IRLayer::~IRLayer() {}
-Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) {
+Error IRLayer::add(ResourceTrackerSP RT, ThreadSafeModule TSM) {
+ assert(RT && "RT can not be null");
+ auto &JD = RT->getJITDylib();
return JD.define(std::make_unique<BasicIRLayerMaterializationUnit>(
- *this, *getManglingOptions(), std::move(TSM), std::move(K)));
+ *this, *getManglingOptions(), std::move(TSM)),
+ std::move(RT));
}
IRMaterializationUnit::IRMaterializationUnit(
ExecutionSession &ES, const IRSymbolMapper::ManglingOptions &MO,
- ThreadSafeModule TSM, VModuleKey K)
- : MaterializationUnit(SymbolFlagsMap(), nullptr, std::move(K)),
- TSM(std::move(TSM)) {
+ ThreadSafeModule TSM)
+ : MaterializationUnit(SymbolFlagsMap(), nullptr), TSM(std::move(TSM)) {
assert(this->TSM && "Module must not be null");
@@ -83,26 +85,22 @@ IRMaterializationUnit::IRMaterializationUnit(
if (!llvm::empty(getStaticInitGVs(M))) {
size_t Counter = 0;
- while (true) {
+ do {
std::string InitSymbolName;
raw_string_ostream(InitSymbolName)
<< "$." << M.getModuleIdentifier() << ".__inits." << Counter++;
InitSymbol = ES.intern(InitSymbolName);
- if (SymbolFlags.count(InitSymbol))
- continue;
- SymbolFlags[InitSymbol] =
- JITSymbolFlags::MaterializationSideEffectsOnly;
- break;
- }
+ } while (SymbolFlags.count(InitSymbol));
+
+ SymbolFlags[InitSymbol] = JITSymbolFlags::MaterializationSideEffectsOnly;
}
});
}
IRMaterializationUnit::IRMaterializationUnit(
- ThreadSafeModule TSM, VModuleKey K, SymbolFlagsMap SymbolFlags,
+ ThreadSafeModule TSM, SymbolFlagsMap SymbolFlags,
SymbolStringPtr InitSymbol, SymbolNameToDefinitionMap SymbolToDefinition)
- : MaterializationUnit(std::move(SymbolFlags), std::move(InitSymbol),
- std::move(K)),
+ : MaterializationUnit(std::move(SymbolFlags), std::move(InitSymbol)),
TSM(std::move(TSM)), SymbolToDefinition(std::move(SymbolToDefinition)) {}
StringRef IRMaterializationUnit::getName() const {
@@ -129,14 +127,12 @@ void IRMaterializationUnit::discard(const JITDylib &JD,
}
BasicIRLayerMaterializationUnit::BasicIRLayerMaterializationUnit(
- IRLayer &L, const IRSymbolMapper::ManglingOptions &MO, ThreadSafeModule TSM,
- VModuleKey K)
- : IRMaterializationUnit(L.getExecutionSession(), MO, std::move(TSM),
- std::move(K)),
- L(L), K(std::move(K)) {}
+ IRLayer &L, const IRSymbolMapper::ManglingOptions &MO, ThreadSafeModule TSM)
+ : IRMaterializationUnit(L.getExecutionSession(), MO, std::move(TSM)), L(L) {
+}
void BasicIRLayerMaterializationUnit::materialize(
- MaterializationResponsibility R) {
+ std::unique_ptr<MaterializationResponsibility> R) {
// Throw away the SymbolToDefinition map: it's not usable after we hand
// off the module.
@@ -147,8 +143,8 @@ void BasicIRLayerMaterializationUnit::materialize(
TSM = cloneToNewContext(TSM);
#ifndef NDEBUG
- auto &ES = R.getTargetJITDylib().getExecutionSession();
- auto &N = R.getTargetJITDylib().getName();
+ auto &ES = R->getTargetJITDylib().getExecutionSession();
+ auto &N = R->getTargetJITDylib().getName();
#endif // NDEBUG
LLVM_DEBUG(ES.runSessionLocked(
@@ -163,17 +159,17 @@ ObjectLayer::ObjectLayer(ExecutionSession &ES) : ES(ES) {}
ObjectLayer::~ObjectLayer() {}
-Error ObjectLayer::add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O,
- VModuleKey K) {
- auto ObjMU = BasicObjectLayerMaterializationUnit::Create(*this, std::move(K),
- std::move(O));
+Error ObjectLayer::add(ResourceTrackerSP RT, std::unique_ptr<MemoryBuffer> O) {
+ assert(RT && "RT can not be null");
+ auto ObjMU = BasicObjectLayerMaterializationUnit::Create(*this, std::move(O));
if (!ObjMU)
return ObjMU.takeError();
- return JD.define(std::move(*ObjMU));
+ auto &JD = RT->getJITDylib();
+ return JD.define(std::move(*ObjMU), std::move(RT));
}
Expected<std::unique_ptr<BasicObjectLayerMaterializationUnit>>
-BasicObjectLayerMaterializationUnit::Create(ObjectLayer &L, VModuleKey K,
+BasicObjectLayerMaterializationUnit::Create(ObjectLayer &L,
std::unique_ptr<MemoryBuffer> O) {
auto ObjSymInfo =
getObjectSymbolInfo(L.getExecutionSession(), O->getMemBufferRef());
@@ -186,15 +182,14 @@ BasicObjectLayerMaterializationUnit::Create(ObjectLayer &L, VModuleKey K,
return std::unique_ptr<BasicObjectLayerMaterializationUnit>(
new BasicObjectLayerMaterializationUnit(
- L, K, std::move(O), std::move(SymbolFlags), std::move(InitSymbol)));
+ L, std::move(O), std::move(SymbolFlags), std::move(InitSymbol)));
}
BasicObjectLayerMaterializationUnit::BasicObjectLayerMaterializationUnit(
- ObjectLayer &L, VModuleKey K, std::unique_ptr<MemoryBuffer> O,
- SymbolFlagsMap SymbolFlags, SymbolStringPtr InitSymbol)
- : MaterializationUnit(std::move(SymbolFlags), std::move(InitSymbol),
- std::move(K)),
- L(L), O(std::move(O)) {}
+ ObjectLayer &L, std::unique_ptr<MemoryBuffer> O, SymbolFlagsMap SymbolFlags,
+ SymbolStringPtr InitSymbol)
+ : MaterializationUnit(std::move(SymbolFlags), std::move(InitSymbol)), L(L),
+ O(std::move(O)) {}
StringRef BasicObjectLayerMaterializationUnit::getName() const {
if (O)
@@ -203,7 +198,7 @@ StringRef BasicObjectLayerMaterializationUnit::getName() const {
}
void BasicObjectLayerMaterializationUnit::materialize(
- MaterializationResponsibility R) {
+ std::unique_ptr<MaterializationResponsibility> R) {
L.emit(std::move(R), std::move(O));
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 153f6b80784f..e1f494415e86 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -75,27 +75,31 @@ void LazyCallThroughManager::resolveTrampolineLandingAddress(
if (!Entry)
return NotifyLandingResolved(reportCallThroughError(Entry.takeError()));
- ES.lookup(
- LookupKind::Static,
- makeJITDylibSearchOrder(Entry->SourceJD,
- JITDylibLookupFlags::MatchAllSymbols),
- SymbolLookupSet({Entry->SymbolName}), SymbolState::Ready,
- [this, TrampolineAddr, SymbolName = Entry->SymbolName,
- NotifyLandingResolved = std::move(NotifyLandingResolved)](
- Expected<SymbolMap> Result) mutable {
- if (Result) {
- assert(Result->size() == 1 && "Unexpected result size");
- assert(Result->count(SymbolName) && "Unexpected result value");
- JITTargetAddress LandingAddr = (*Result)[SymbolName].getAddress();
-
- if (auto Err = notifyResolved(TrampolineAddr, LandingAddr))
- NotifyLandingResolved(reportCallThroughError(std::move(Err)));
- else
- NotifyLandingResolved(LandingAddr);
- } else
- NotifyLandingResolved(reportCallThroughError(Result.takeError()));
- },
- NoDependenciesToRegister);
+ // Declaring SLS and the callback outside of the call to ES.lookup is a
+ // workaround to fix build failures on AIX and on z/OS platforms.
+ SymbolLookupSet SLS({Entry->SymbolName});
+ auto Callback = [this, TrampolineAddr, SymbolName = Entry->SymbolName,
+ NotifyLandingResolved = std::move(NotifyLandingResolved)](
+ Expected<SymbolMap> Result) mutable {
+ if (Result) {
+ assert(Result->size() == 1 && "Unexpected result size");
+ assert(Result->count(SymbolName) && "Unexpected result value");
+ JITTargetAddress LandingAddr = (*Result)[SymbolName].getAddress();
+
+ if (auto Err = notifyResolved(TrampolineAddr, LandingAddr))
+ NotifyLandingResolved(reportCallThroughError(std::move(Err)));
+ else
+ NotifyLandingResolved(LandingAddr);
+ } else {
+ NotifyLandingResolved(reportCallThroughError(Result.takeError()));
+ }
+ };
+
+ ES.lookup(LookupKind::Static,
+ makeJITDylibSearchOrder(Entry->SourceJD,
+ JITDylibLookupFlags::MatchAllSymbols),
+ std::move(SLS), SymbolState::Ready, std::move(Callback),
+ NoDependenciesToRegister);
}
Expected<std::unique_ptr<LazyCallThroughManager>>
@@ -139,9 +143,8 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit(
LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager,
- JITDylib &SourceJD, SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc,
- VModuleKey K)
- : MaterializationUnit(extractFlags(CallableAliases), nullptr, std::move(K)),
+ JITDylib &SourceJD, SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc)
+ : MaterializationUnit(extractFlags(CallableAliases), nullptr),
LCTManager(LCTManager), ISManager(ISManager), SourceJD(SourceJD),
CallableAliases(std::move(CallableAliases)), AliaseeTable(SrcJDLoc) {}
@@ -150,8 +153,8 @@ StringRef LazyReexportsMaterializationUnit::getName() const {
}
void LazyReexportsMaterializationUnit::materialize(
- MaterializationResponsibility R) {
- auto RequestedSymbols = R.getRequestedSymbols();
+ std::unique_ptr<MaterializationResponsibility> R) {
+ auto RequestedSymbols = R->getRequestedSymbols();
SymbolAliasMap RequestedAliases;
for (auto &RequestedSymbol : RequestedSymbols) {
@@ -162,8 +165,13 @@ void LazyReexportsMaterializationUnit::materialize(
}
if (!CallableAliases.empty())
- R.replace(lazyReexports(LCTManager, ISManager, SourceJD,
- std::move(CallableAliases), AliaseeTable));
+ if (auto Err = R->replace(lazyReexports(LCTManager, ISManager, SourceJD,
+ std::move(CallableAliases),
+ AliaseeTable))) {
+ R->getExecutionSession().reportError(std::move(Err));
+ R->failMaterialization();
+ return;
+ }
IndirectStubsManager::StubInitsMap StubInits;
for (auto &Alias : RequestedAliases) {
@@ -178,7 +186,7 @@ void LazyReexportsMaterializationUnit::materialize(
if (!CallThroughTrampoline) {
SourceJD.getExecutionSession().reportError(
CallThroughTrampoline.takeError());
- R.failMaterialization();
+ R->failMaterialization();
return;
}
@@ -191,7 +199,7 @@ void LazyReexportsMaterializationUnit::materialize(
if (auto Err = ISManager.createStubs(StubInits)) {
SourceJD.getExecutionSession().reportError(std::move(Err));
- R.failMaterialization();
+ R->failMaterialization();
return;
}
@@ -200,8 +208,8 @@ void LazyReexportsMaterializationUnit::materialize(
Stubs[Alias.first] = ISManager.findStub(*Alias.first, false);
// No registered dependencies, so these calls cannot fail.
- cantFail(R.notifyResolved(Stubs));
- cantFail(R.notifyEmitted());
+ cantFail(R->notifyResolved(Stubs));
+ cantFail(R->notifyEmitted());
}
void LazyReexportsMaterializationUnit::discard(const JITDylib &JD,
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Legacy.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
deleted file mode 100644
index 67b804c37287..000000000000
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-//===------- Legacy.cpp - Adapters for ExecutionEngine API interop --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ExecutionEngine/Orc/Legacy.h"
-
-namespace llvm {
-namespace orc {
-
-void SymbolResolver::anchor() {}
-
-JITSymbolResolverAdapter::JITSymbolResolverAdapter(
- ExecutionSession &ES, SymbolResolver &R, MaterializationResponsibility *MR)
- : ES(ES), R(R), MR(MR) {}
-
-void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols,
- OnResolvedFunction OnResolved) {
- SymbolNameSet InternedSymbols;
- for (auto &S : Symbols)
- InternedSymbols.insert(ES.intern(S));
-
- auto OnResolvedWithUnwrap = [OnResolved = std::move(OnResolved)](
- Expected<SymbolMap> InternedResult) mutable {
- if (!InternedResult) {
- OnResolved(InternedResult.takeError());
- return;
- }
-
- LookupResult Result;
- for (auto &KV : *InternedResult)
- Result[*KV.first] = std::move(KV.second);
- OnResolved(Result);
- };
-
- auto Q = std::make_shared<AsynchronousSymbolQuery>(
- SymbolLookupSet(InternedSymbols), SymbolState::Resolved,
- std::move(OnResolvedWithUnwrap));
-
- auto Unresolved = R.lookup(Q, InternedSymbols);
- if (Unresolved.empty()) {
- if (MR)
- MR->addDependenciesForAll(Q->QueryRegistrations);
- } else
- ES.legacyFailQuery(*Q, make_error<SymbolsNotFound>(std::move(Unresolved)));
-}
-
-Expected<JITSymbolResolverAdapter::LookupSet>
-JITSymbolResolverAdapter::getResponsibilitySet(const LookupSet &Symbols) {
- SymbolNameSet InternedSymbols;
- for (auto &S : Symbols)
- InternedSymbols.insert(ES.intern(S));
-
- auto InternedResult = R.getResponsibilitySet(InternedSymbols);
- LookupSet Result;
- for (auto &S : InternedResult) {
- ResolvedStrings.insert(S);
- Result.insert(*S);
- }
-
- return Result;
-}
-
-} // End namespace orc.
-} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 15c3aa79a2a8..17b9465a0541 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -159,7 +159,9 @@ Error MachOPlatform::setupJITDylib(JITDylib &JD) {
return ObjLinkingLayer.add(JD, std::move(ObjBuffer));
}
-Error MachOPlatform::notifyAdding(JITDylib &JD, const MaterializationUnit &MU) {
+Error MachOPlatform::notifyAdding(ResourceTracker &RT,
+ const MaterializationUnit &MU) {
+ auto &JD = RT.getJITDylib();
const auto &InitSym = MU.getInitializerSymbol();
if (!InitSym)
return Error::success();
@@ -173,7 +175,7 @@ Error MachOPlatform::notifyAdding(JITDylib &JD, const MaterializationUnit &MU) {
return Error::success();
}
-Error MachOPlatform::notifyRemoving(JITDylib &JD, VModuleKey K) {
+Error MachOPlatform::notifyRemoving(ResourceTracker &RT) {
llvm_unreachable("Not supported yet");
}
@@ -185,19 +187,19 @@ MachOPlatform::getInitializerSequence(JITDylib &JD) {
<< JD.getName() << "\n";
});
- std::vector<JITDylib *> DFSLinkOrder;
+ std::vector<JITDylibSP> DFSLinkOrder;
while (true) {
DenseMap<JITDylib *, SymbolLookupSet> NewInitSymbols;
ES.runSessionLocked([&]() {
- DFSLinkOrder = getDFSLinkOrder(JD);
+ DFSLinkOrder = JD.getDFSLinkOrder();
- for (auto *InitJD : DFSLinkOrder) {
- auto RISItr = RegisteredInitSymbols.find(InitJD);
+ for (auto &InitJD : DFSLinkOrder) {
+ auto RISItr = RegisteredInitSymbols.find(InitJD.get());
if (RISItr != RegisteredInitSymbols.end()) {
- NewInitSymbols[InitJD] = std::move(RISItr->second);
+ NewInitSymbols[InitJD.get()] = std::move(RISItr->second);
RegisteredInitSymbols.erase(RISItr);
}
}
@@ -229,14 +231,14 @@ MachOPlatform::getInitializerSequence(JITDylib &JD) {
InitializerSequence FullInitSeq;
{
std::lock_guard<std::mutex> Lock(InitSeqsMutex);
- for (auto *InitJD : reverse(DFSLinkOrder)) {
+ for (auto &InitJD : reverse(DFSLinkOrder)) {
LLVM_DEBUG({
dbgs() << "MachOPlatform: Appending inits for \"" << InitJD->getName()
<< "\" to sequence\n";
});
- auto ISItr = InitSeqs.find(InitJD);
+ auto ISItr = InitSeqs.find(InitJD.get());
if (ISItr != InitSeqs.end()) {
- FullInitSeq.emplace_back(InitJD, std::move(ISItr->second));
+ FullInitSeq.emplace_back(InitJD.get(), std::move(ISItr->second));
InitSeqs.erase(ISItr);
}
}
@@ -247,39 +249,19 @@ MachOPlatform::getInitializerSequence(JITDylib &JD) {
Expected<MachOPlatform::DeinitializerSequence>
MachOPlatform::getDeinitializerSequence(JITDylib &JD) {
- std::vector<JITDylib *> DFSLinkOrder = getDFSLinkOrder(JD);
+ std::vector<JITDylibSP> DFSLinkOrder = JD.getDFSLinkOrder();
DeinitializerSequence FullDeinitSeq;
{
std::lock_guard<std::mutex> Lock(InitSeqsMutex);
- for (auto *DeinitJD : DFSLinkOrder) {
- FullDeinitSeq.emplace_back(DeinitJD, MachOJITDylibDeinitializers());
+ for (auto &DeinitJD : DFSLinkOrder) {
+ FullDeinitSeq.emplace_back(DeinitJD.get(), MachOJITDylibDeinitializers());
}
}
return FullDeinitSeq;
}
-std::vector<JITDylib *> MachOPlatform::getDFSLinkOrder(JITDylib &JD) {
- std::vector<JITDylib *> Result, WorkStack({&JD});
- DenseSet<JITDylib *> Visited;
-
- while (!WorkStack.empty()) {
- auto *NextJD = WorkStack.back();
- WorkStack.pop_back();
- if (Visited.count(NextJD))
- continue;
- Visited.insert(NextJD);
- Result.push_back(NextJD);
- NextJD->withLinkOrderDo([&](const JITDylibSearchOrder &LO) {
- for (auto &KV : LO)
- WorkStack.push_back(KV.first);
- });
- }
-
- return Result;
-}
-
void MachOPlatform::registerInitInfo(
JITDylib &JD, JITTargetAddress ObjCImageInfoAddr,
MachOJITDylibInitializers::SectionExtent ModInits,
@@ -319,13 +301,16 @@ void MachOPlatform::InitScraperPlugin::modifyPassConfig(
MaterializationResponsibility &MR, const Triple &TT,
jitlink::PassConfiguration &Config) {
+ if (!MR.getInitializerSymbol())
+ return;
+
Config.PrePrunePasses.push_back([this, &MR](jitlink::LinkGraph &G) -> Error {
JITLinkSymbolVector InitSectionSymbols;
preserveInitSectionIfPresent(InitSectionSymbols, G, "__mod_init_func");
preserveInitSectionIfPresent(InitSectionSymbols, G, "__objc_selrefs");
preserveInitSectionIfPresent(InitSectionSymbols, G, "__objc_classlist");
- if (!InitSymbolDeps.empty()) {
+ if (!InitSectionSymbols.empty()) {
std::lock_guard<std::mutex> Lock(InitScraperMutex);
InitSymbolDeps[&MR] = std::move(InitSectionSymbols);
}
@@ -343,10 +328,8 @@ void MachOPlatform::InitScraperPlugin::modifyPassConfig(
JITTargetAddress ObjCImageInfoAddr = 0;
if (auto *ObjCImageInfoSec = G.findSectionByName("__objc_image_info")) {
- if (auto Addr = jitlink::SectionRange(*ObjCImageInfoSec).getStart()) {
+ if (auto Addr = jitlink::SectionRange(*ObjCImageInfoSec).getStart())
ObjCImageInfoAddr = Addr;
- dbgs() << "Recorded __objc_imageinfo @ " << formatv("{0:x16}", Addr);
- }
}
// Record __mod_init_func.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
deleted file mode 100644
index 5b4345b870bb..000000000000
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===---------- NullResolver.cpp - Reject symbol lookup requests ----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ExecutionEngine/Orc/NullResolver.h"
-
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-namespace orc {
-
-SymbolNameSet NullResolver::getResponsibilitySet(const SymbolNameSet &Symbols) {
- return Symbols;
-}
-
-SymbolNameSet
-NullResolver::lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
- SymbolNameSet Symbols) {
- assert(Symbols.empty() && "Null resolver: Symbols must be empty");
- return Symbols;
-}
-
-JITSymbol NullLegacyResolver::findSymbol(const std::string &Name) {
- llvm_unreachable("Unexpected cross-object symbol reference");
-}
-
-JITSymbol
-NullLegacyResolver::findSymbolInLogicalDylib(const std::string &Name) {
- llvm_unreachable("Unexpected cross-object symbol reference");
-}
-
-} // End namespace orc.
-} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
index 02066b458dfc..26f77acd91fc 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -24,34 +24,34 @@ namespace orc {
class ObjectLinkingLayerJITLinkContext final : public JITLinkContext {
public:
- ObjectLinkingLayerJITLinkContext(ObjectLinkingLayer &Layer,
- MaterializationResponsibility MR,
- std::unique_ptr<MemoryBuffer> ObjBuffer)
- : Layer(Layer), MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {}
+ ObjectLinkingLayerJITLinkContext(
+ ObjectLinkingLayer &Layer,
+ std::unique_ptr<MaterializationResponsibility> MR,
+ std::unique_ptr<MemoryBuffer> ObjBuffer)
+ : JITLinkContext(&MR->getTargetJITDylib()), Layer(Layer),
+ MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {}
~ObjectLinkingLayerJITLinkContext() {
// If there is an object buffer return function then use it to
// return ownership of the buffer.
- if (Layer.ReturnObjectBuffer)
+ if (Layer.ReturnObjectBuffer && ObjBuffer)
Layer.ReturnObjectBuffer(std::move(ObjBuffer));
}
- JITLinkMemoryManager &getMemoryManager() override { return *Layer.MemMgr; }
-
- MemoryBufferRef getObjectBuffer() const override {
- return ObjBuffer->getMemBufferRef();
- }
+ JITLinkMemoryManager &getMemoryManager() override { return Layer.MemMgr; }
void notifyFailed(Error Err) override {
+ for (auto &P : Layer.Plugins)
+ Err = joinErrors(std::move(Err), P->notifyFailed(*MR));
Layer.getExecutionSession().reportError(std::move(Err));
- MR.failMaterialization();
+ MR->failMaterialization();
}
void lookup(const LookupMap &Symbols,
std::unique_ptr<JITLinkAsyncLookupContinuation> LC) override {
JITDylibSearchOrder LinkOrder;
- MR.getTargetJITDylib().withLinkOrderDo(
+ MR->getTargetJITDylib().withLinkOrderDo(
[&](const JITDylibSearchOrder &LO) { LinkOrder = LO; });
auto &ES = Layer.getExecutionSession();
@@ -71,9 +71,8 @@ public:
}
// OnResolve -- De-intern the symbols and pass the result to the linker.
- auto OnResolve = [this, LookupContinuation = std::move(LC)](
- Expected<SymbolMap> Result) mutable {
- auto Main = Layer.getExecutionSession().intern("_main");
+ auto OnResolve = [LookupContinuation =
+ std::move(LC)](Expected<SymbolMap> Result) mutable {
if (!Result)
LookupContinuation->run(Result.takeError());
else {
@@ -86,8 +85,8 @@ public:
for (auto &KV : InternalNamedSymbolDeps) {
SymbolDependenceMap InternalDeps;
- InternalDeps[&MR.getTargetJITDylib()] = std::move(KV.second);
- MR.addDependencies(KV.first, InternalDeps);
+ InternalDeps[&MR->getTargetJITDylib()] = std::move(KV.second);
+ MR->addDependencies(KV.first, InternalDeps);
}
ES.lookup(LookupKind::Static, LinkOrder, std::move(LookupSet),
@@ -97,7 +96,7 @@ public:
});
}
- void notifyResolved(LinkGraph &G) override {
+ Error notifyResolved(LinkGraph &G) override {
auto &ES = Layer.getExecutionSession();
SymbolFlagsMap ExtraSymbolsToClaim;
@@ -116,7 +115,7 @@ public:
InternedResult[InternedName] =
JITEvaluatedSymbol(Sym->getAddress(), Flags);
- if (AutoClaim && !MR.getSymbols().count(InternedName)) {
+ if (AutoClaim && !MR->getSymbols().count(InternedName)) {
assert(!ExtraSymbolsToClaim.count(InternedName) &&
"Duplicate symbol to claim?");
ExtraSymbolsToClaim[InternedName] = Flags;
@@ -134,7 +133,7 @@ public:
Flags |= JITSymbolFlags::Weak;
InternedResult[InternedName] =
JITEvaluatedSymbol(Sym->getAddress(), Flags);
- if (AutoClaim && !MR.getSymbols().count(InternedName)) {
+ if (AutoClaim && !MR->getSymbols().count(InternedName)) {
assert(!ExtraSymbolsToClaim.count(InternedName) &&
"Duplicate symbol to claim?");
ExtraSymbolsToClaim[InternedName] = Flags;
@@ -142,19 +141,19 @@ public:
}
if (!ExtraSymbolsToClaim.empty())
- if (auto Err = MR.defineMaterializing(ExtraSymbolsToClaim))
- return notifyFailed(std::move(Err));
+ if (auto Err = MR->defineMaterializing(ExtraSymbolsToClaim))
+ return Err;
{
- // Check that InternedResult matches up with MR.getSymbols().
+ // Check that InternedResult matches up with MR->getSymbols().
// This guards against faulty transformations / compilers / object caches.
// First check that there aren't any missing symbols.
size_t NumMaterializationSideEffectsOnlySymbols = 0;
SymbolNameVector ExtraSymbols;
SymbolNameVector MissingSymbols;
- for (auto &KV : MR.getSymbols()) {
+ for (auto &KV : MR->getSymbols()) {
// If this is a materialization-side-effects only symbol then bump
// the counter and make sure it's *not* defined, otherwise make
@@ -169,49 +168,42 @@ public:
}
// If there were missing symbols then report the error.
- if (!MissingSymbols.empty()) {
- ES.reportError(make_error<MissingSymbolDefinitions>(
- G.getName(), std::move(MissingSymbols)));
- MR.failMaterialization();
- return;
- }
+ if (!MissingSymbols.empty())
+ return make_error<MissingSymbolDefinitions>(G.getName(),
+ std::move(MissingSymbols));
// If there are more definitions than expected, add them to the
// ExtraSymbols vector.
if (InternedResult.size() >
- MR.getSymbols().size() - NumMaterializationSideEffectsOnlySymbols) {
+ MR->getSymbols().size() - NumMaterializationSideEffectsOnlySymbols) {
for (auto &KV : InternedResult)
- if (!MR.getSymbols().count(KV.first))
+ if (!MR->getSymbols().count(KV.first))
ExtraSymbols.push_back(KV.first);
}
// If there were extra definitions then report the error.
- if (!ExtraSymbols.empty()) {
- ES.reportError(make_error<UnexpectedSymbolDefinitions>(
- G.getName(), std::move(ExtraSymbols)));
- MR.failMaterialization();
- return;
- }
+ if (!ExtraSymbols.empty())
+ return make_error<UnexpectedSymbolDefinitions>(G.getName(),
+ std::move(ExtraSymbols));
}
- if (auto Err = MR.notifyResolved(InternedResult)) {
- Layer.getExecutionSession().reportError(std::move(Err));
- MR.failMaterialization();
- return;
- }
- Layer.notifyLoaded(MR);
+ if (auto Err = MR->notifyResolved(InternedResult))
+ return Err;
+
+ Layer.notifyLoaded(*MR);
+ return Error::success();
}
void notifyFinalized(
std::unique_ptr<JITLinkMemoryManager::Allocation> A) override {
- if (auto Err = Layer.notifyEmitted(MR, std::move(A))) {
+ if (auto Err = Layer.notifyEmitted(*MR, std::move(A))) {
Layer.getExecutionSession().reportError(std::move(Err));
- MR.failMaterialization();
+ MR->failMaterialization();
return;
}
- if (auto Err = MR.notifyEmitted()) {
+ if (auto Err = MR->notifyEmitted()) {
Layer.getExecutionSession().reportError(std::move(Err));
- MR.failMaterialization();
+ MR->failMaterialization();
}
}
@@ -222,10 +214,11 @@ public:
Error modifyPassConfig(const Triple &TT, PassConfiguration &Config) override {
// Add passes to mark duplicate defs as should-discard, and to walk the
// link graph to build the symbol dependence graph.
- Config.PrePrunePasses.push_back(
- [this](LinkGraph &G) { return externalizeWeakAndCommonSymbols(G); });
+ Config.PrePrunePasses.push_back([this](LinkGraph &G) {
+ return claimOrExternalizeWeakAndCommonSymbols(G);
+ });
- Layer.modifyPassConfig(MR, TT, Config);
+ Layer.modifyPassConfig(*MR, TT, Config);
Config.PostPrunePasses.push_back(
[this](LinkGraph &G) { return computeNamedSymbolDependencies(G); });
@@ -241,19 +234,38 @@ private:
using LocalSymbolNamedDependenciesMap =
DenseMap<const Symbol *, LocalSymbolNamedDependencies>;
- Error externalizeWeakAndCommonSymbols(LinkGraph &G) {
+ Error claimOrExternalizeWeakAndCommonSymbols(LinkGraph &G) {
auto &ES = Layer.getExecutionSession();
- for (auto *Sym : G.defined_symbols())
+
+ SymbolFlagsMap NewSymbolsToClaim;
+ std::vector<std::pair<SymbolStringPtr, Symbol *>> NameToSym;
+
+ auto ProcessSymbol = [&](Symbol *Sym) {
if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) {
- if (!MR.getSymbols().count(ES.intern(Sym->getName())))
- G.makeExternal(*Sym);
+ auto Name = ES.intern(Sym->getName());
+ if (!MR->getSymbols().count(ES.intern(Sym->getName()))) {
+ JITSymbolFlags SF = JITSymbolFlags::Weak;
+ if (Sym->getScope() == Scope::Default)
+ SF |= JITSymbolFlags::Exported;
+ NewSymbolsToClaim[Name] = SF;
+ NameToSym.push_back(std::make_pair(std::move(Name), Sym));
+ }
}
+ };
+ for (auto *Sym : G.defined_symbols())
+ ProcessSymbol(Sym);
for (auto *Sym : G.absolute_symbols())
- if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) {
- if (!MR.getSymbols().count(ES.intern(Sym->getName())))
- G.makeExternal(*Sym);
- }
+ ProcessSymbol(Sym);
+
+ // Attempt to claim all weak defs that we're not already responsible for.
+ // This cannot fail -- any clashes will just result in rejection of our
+ // claim, at which point we'll externalize that symbol.
+ cantFail(MR->defineMaterializing(std::move(NewSymbolsToClaim)));
+
+ for (auto &KV : NameToSym)
+ if (!MR->getSymbols().count(KV.first))
+ G.makeExternal(*KV.second);
return Error::success();
}
@@ -261,13 +273,13 @@ private:
Error markResponsibilitySymbolsLive(LinkGraph &G) const {
auto &ES = Layer.getExecutionSession();
for (auto *Sym : G.defined_symbols())
- if (Sym->hasName() && MR.getSymbols().count(ES.intern(Sym->getName())))
+ if (Sym->hasName() && MR->getSymbols().count(ES.intern(Sym->getName())))
Sym->setLive(true);
return Error::success();
}
Error computeNamedSymbolDependencies(LinkGraph &G) {
- auto &ES = MR.getTargetJITDylib().getExecutionSession();
+ auto &ES = MR->getTargetJITDylib().getExecutionSession();
auto LocalDeps = computeLocalDeps(G);
// Compute dependencies for symbols defined in the JITLink graph.
@@ -314,7 +326,7 @@ private:
}
for (auto &P : Layer.Plugins) {
- auto SyntheticLocalDeps = P->getSyntheticSymbolLocalDependencies(MR);
+ auto SyntheticLocalDeps = P->getSyntheticSymbolLocalDependencies(*MR);
if (SyntheticLocalDeps.empty())
continue;
@@ -434,12 +446,12 @@ private:
SymbolDeps.erase(&SourceJD);
}
- MR.addDependencies(Name, SymbolDeps);
+ MR->addDependencies(Name, SymbolDeps);
}
}
ObjectLinkingLayer &Layer;
- MaterializationResponsibility MR;
+ std::unique_ptr<MaterializationResponsibility> MR;
std::unique_ptr<MemoryBuffer> ObjBuffer;
DenseMap<SymbolStringPtr, SymbolNameSet> ExternalNamedSymbolDeps;
DenseMap<SymbolStringPtr, SymbolNameSet> InternalNamedSymbolDeps;
@@ -447,20 +459,39 @@ private:
ObjectLinkingLayer::Plugin::~Plugin() {}
+ObjectLinkingLayer::ObjectLinkingLayer(ExecutionSession &ES,
+ JITLinkMemoryManager &MemMgr)
+ : ObjectLayer(ES), MemMgr(MemMgr) {
+ ES.registerResourceManager(*this);
+}
+
ObjectLinkingLayer::ObjectLinkingLayer(
ExecutionSession &ES, std::unique_ptr<JITLinkMemoryManager> MemMgr)
- : ObjectLayer(ES), MemMgr(std::move(MemMgr)) {}
+ : ObjectLayer(ES), MemMgr(*MemMgr), MemMgrOwnership(std::move(MemMgr)) {
+ ES.registerResourceManager(*this);
+}
ObjectLinkingLayer::~ObjectLinkingLayer() {
- if (auto Err = removeAllModules())
- getExecutionSession().reportError(std::move(Err));
+ assert(Allocs.empty() && "Layer destroyed with resources still attached");
+ getExecutionSession().deregisterResourceManager(*this);
}
-void ObjectLinkingLayer::emit(MaterializationResponsibility R,
+void ObjectLinkingLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
std::unique_ptr<MemoryBuffer> O) {
assert(O && "Object must not be null");
- jitLink(std::make_unique<ObjectLinkingLayerJITLinkContext>(
- *this, std::move(R), std::move(O)));
+ auto ObjBuffer = O->getMemBufferRef();
+ auto Ctx = std::make_unique<ObjectLinkingLayerJITLinkContext>(
+ *this, std::move(R), std::move(O));
+ if (auto G = createLinkGraphFromObject(std::move(ObjBuffer)))
+ link(std::move(*G), std::move(Ctx));
+ else
+ Ctx->notifyFailed(G.takeError());
+}
+
+void ObjectLinkingLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
+ std::unique_ptr<LinkGraph> G) {
+ link(std::move(G), std::make_unique<ObjectLinkingLayerJITLinkContext>(
+ *this, std::move(R), nullptr));
}
void ObjectLinkingLayer::modifyPassConfig(MaterializationResponsibility &MR,
@@ -484,63 +515,56 @@ Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR,
if (Err)
return Err;
- {
- std::lock_guard<std::mutex> Lock(LayerMutex);
- UntrackedAllocs.push_back(std::move(Alloc));
- }
-
- return Error::success();
+ return MR.withResourceKeyDo(
+ [&](ResourceKey K) { Allocs[K].push_back(std::move(Alloc)); });
}
-Error ObjectLinkingLayer::removeModule(VModuleKey K) {
+Error ObjectLinkingLayer::handleRemoveResources(ResourceKey K) {
+
Error Err = Error::success();
for (auto &P : Plugins)
- Err = joinErrors(std::move(Err), P->notifyRemovingModule(K));
-
- AllocPtr Alloc;
+ Err = joinErrors(std::move(Err), P->notifyRemovingResources(K));
+
+ std::vector<AllocPtr> AllocsToRemove;
+ getExecutionSession().runSessionLocked([&] {
+ auto I = Allocs.find(K);
+ if (I != Allocs.end()) {
+ std::swap(AllocsToRemove, I->second);
+ Allocs.erase(I);
+ }
+ });
- {
- std::lock_guard<std::mutex> Lock(LayerMutex);
- auto AllocItr = TrackedAllocs.find(K);
- Alloc = std::move(AllocItr->second);
- TrackedAllocs.erase(AllocItr);
+ while (!AllocsToRemove.empty()) {
+ Err = joinErrors(std::move(Err), AllocsToRemove.back()->deallocate());
+ AllocsToRemove.pop_back();
}
- assert(Alloc && "No allocation for key K");
-
- return joinErrors(std::move(Err), Alloc->deallocate());
+ return Err;
}
-Error ObjectLinkingLayer::removeAllModules() {
-
- Error Err = Error::success();
-
- for (auto &P : Plugins)
- Err = joinErrors(std::move(Err), P->notifyRemovingAllModules());
-
- std::vector<AllocPtr> Allocs;
- {
- std::lock_guard<std::mutex> Lock(LayerMutex);
- Allocs = std::move(UntrackedAllocs);
-
- for (auto &KV : TrackedAllocs)
- Allocs.push_back(std::move(KV.second));
-
- TrackedAllocs.clear();
+void ObjectLinkingLayer::handleTransferResources(ResourceKey DstKey,
+ ResourceKey SrcKey) {
+ auto I = Allocs.find(SrcKey);
+ if (I != Allocs.end()) {
+ auto &SrcAllocs = I->second;
+ auto &DstAllocs = Allocs[DstKey];
+ DstAllocs.reserve(DstAllocs.size() + SrcAllocs.size());
+ for (auto &Alloc : SrcAllocs)
+ DstAllocs.push_back(std::move(Alloc));
+
+ // Erase SrcKey entry using value rather than iterator I: I may have been
+ // invalidated when we looked up DstKey.
+ Allocs.erase(SrcKey);
}
- while (!Allocs.empty()) {
- Err = joinErrors(std::move(Err), Allocs.back()->deallocate());
- Allocs.pop_back();
- }
-
- return Err;
+ for (auto &P : Plugins)
+ P->notifyTransferringResources(DstKey, SrcKey);
}
EHFrameRegistrationPlugin::EHFrameRegistrationPlugin(
- EHFrameRegistrar &Registrar)
- : Registrar(Registrar) {}
+ ExecutionSession &ES, std::unique_ptr<EHFrameRegistrar> Registrar)
+ : ES(ES), Registrar(std::move(Registrar)) {}
void EHFrameRegistrationPlugin::modifyPassConfig(
MaterializationResponsibility &MR, const Triple &TT,
@@ -559,65 +583,70 @@ void EHFrameRegistrationPlugin::modifyPassConfig(
Error EHFrameRegistrationPlugin::notifyEmitted(
MaterializationResponsibility &MR) {
- std::lock_guard<std::mutex> Lock(EHFramePluginMutex);
-
- auto EHFrameRangeItr = InProcessLinks.find(&MR);
- if (EHFrameRangeItr == InProcessLinks.end())
- return Error::success();
-
- auto EHFrameRange = EHFrameRangeItr->second;
- assert(EHFrameRange.Addr &&
- "eh-frame addr to register can not be null");
-
- InProcessLinks.erase(EHFrameRangeItr);
- if (auto Key = MR.getVModuleKey())
- TrackedEHFrameRanges[Key] = EHFrameRange;
- else
- UntrackedEHFrameRanges.push_back(EHFrameRange);
- return Registrar.registerEHFrames(EHFrameRange.Addr, EHFrameRange.Size);
-}
+ EHFrameRange EmittedRange;
+ {
+ std::lock_guard<std::mutex> Lock(EHFramePluginMutex);
-Error EHFrameRegistrationPlugin::notifyRemovingModule(VModuleKey K) {
- std::lock_guard<std::mutex> Lock(EHFramePluginMutex);
+ auto EHFrameRangeItr = InProcessLinks.find(&MR);
+ if (EHFrameRangeItr == InProcessLinks.end())
+ return Error::success();
- auto EHFrameRangeItr = TrackedEHFrameRanges.find(K);
- if (EHFrameRangeItr == TrackedEHFrameRanges.end())
- return Error::success();
-
- auto EHFrameRange = EHFrameRangeItr->second;
- assert(EHFrameRange.Addr && "Tracked eh-frame range must not be null");
+ EmittedRange = EHFrameRangeItr->second;
+ assert(EmittedRange.Addr && "eh-frame addr to register can not be null");
+ InProcessLinks.erase(EHFrameRangeItr);
+ }
- TrackedEHFrameRanges.erase(EHFrameRangeItr);
+ if (auto Err = MR.withResourceKeyDo(
+ [&](ResourceKey K) { EHFrameRanges[K].push_back(EmittedRange); }))
+ return Err;
- return Registrar.deregisterEHFrames(EHFrameRange.Addr, EHFrameRange.Size);
+ return Registrar->registerEHFrames(EmittedRange.Addr, EmittedRange.Size);
}
-Error EHFrameRegistrationPlugin::notifyRemovingAllModules() {
+Error EHFrameRegistrationPlugin::notifyFailed(
+ MaterializationResponsibility &MR) {
std::lock_guard<std::mutex> Lock(EHFramePluginMutex);
+ InProcessLinks.erase(&MR);
+ return Error::success();
+}
- std::vector<EHFrameRange> EHFrameRanges =
- std::move(UntrackedEHFrameRanges);
- EHFrameRanges.reserve(EHFrameRanges.size() + TrackedEHFrameRanges.size());
-
- for (auto &KV : TrackedEHFrameRanges)
- EHFrameRanges.push_back(KV.second);
+Error EHFrameRegistrationPlugin::notifyRemovingResources(ResourceKey K) {
+ std::vector<EHFrameRange> RangesToRemove;
- TrackedEHFrameRanges.clear();
+ ES.runSessionLocked([&] {
+ auto I = EHFrameRanges.find(K);
+ if (I != EHFrameRanges.end()) {
+ RangesToRemove = std::move(I->second);
+ EHFrameRanges.erase(I);
+ }
+ });
Error Err = Error::success();
-
- while (!EHFrameRanges.empty()) {
- auto EHFrameRange = EHFrameRanges.back();
- assert(EHFrameRange.Addr && "Untracked eh-frame range must not be null");
- EHFrameRanges.pop_back();
- Err = joinErrors(std::move(Err),
- Registrar.deregisterEHFrames(EHFrameRange.Addr,
- EHFrameRange.Size));
+ while (!RangesToRemove.empty()) {
+ auto RangeToRemove = RangesToRemove.back();
+ RangesToRemove.pop_back();
+ assert(RangeToRemove.Addr && "Untracked eh-frame range must not be null");
+ Err = joinErrors(
+ std::move(Err),
+ Registrar->deregisterEHFrames(RangeToRemove.Addr, RangeToRemove.Size));
}
return Err;
}
+void EHFrameRegistrationPlugin::notifyTransferringResources(
+ ResourceKey DstKey, ResourceKey SrcKey) {
+ auto SI = EHFrameRanges.find(SrcKey);
+ if (SI != EHFrameRanges.end()) {
+ auto &SrcRanges = SI->second;
+ auto &DstRanges = EHFrameRanges[DstKey];
+ DstRanges.reserve(DstRanges.size() + SrcRanges.size());
+ for (auto &SrcRange : SrcRanges)
+ DstRanges.push_back(std::move(SrcRange));
+ EHFrameRanges.erase(SI);
+ }
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
index d18eb38a4142..a57662e10a79 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
@@ -17,8 +17,9 @@ ObjectTransformLayer::ObjectTransformLayer(ExecutionSession &ES,
TransformFunction Transform)
: ObjectLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
-void ObjectTransformLayer::emit(MaterializationResponsibility R,
- std::unique_ptr<MemoryBuffer> O) {
+void ObjectTransformLayer::emit(
+ std::unique_ptr<MaterializationResponsibility> R,
+ std::unique_ptr<MemoryBuffer> O) {
assert(O && "Module must not be null");
// If there is a transform set then apply it.
@@ -26,7 +27,7 @@ void ObjectTransformLayer::emit(MaterializationResponsibility R,
if (auto TransformedObj = Transform(std::move(O)))
O = std::move(*TransformedObj);
else {
- R.failMaterialization();
+ R->failMaterialization();
getExecutionSession().reportError(TransformedObj.takeError());
return;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
deleted file mode 100644
index 28c8479abba4..000000000000
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===----------- OrcCBindings.cpp - C bindings for the Orc APIs -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "OrcCBindingsStack.h"
-#include "llvm-c/OrcBindings.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
-
-using namespace llvm;
-
-LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM) {
- TargetMachine *TM2(unwrap(TM));
-
- Triple T(TM2->getTargetTriple());
-
- auto IndirectStubsMgrBuilder =
- orc::createLocalIndirectStubsManagerBuilder(T);
-
- OrcCBindingsStack *JITStack =
- new OrcCBindingsStack(*TM2, std::move(IndirectStubsMgrBuilder));
-
- return wrap(JITStack);
-}
-
-const char *LLVMOrcGetErrorMsg(LLVMOrcJITStackRef JITStack) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- return J.getErrorMessage().c_str();
-}
-
-void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName,
- const char *SymbolName) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- std::string Mangled = J.mangle(SymbolName);
- *MangledName = new char[Mangled.size() + 1];
- strcpy(*MangledName, Mangled.c_str());
-}
-
-void LLVMOrcDisposeMangledSymbol(char *MangledName) { delete[] MangledName; }
-
-LLVMErrorRef LLVMOrcCreateLazyCompileCallback(
- LLVMOrcJITStackRef JITStack, LLVMOrcTargetAddress *RetAddr,
- LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- if (auto Addr = J.createLazyCompileCallback(Callback, CallbackCtx)) {
- *RetAddr = *Addr;
- return LLVMErrorSuccess;
- } else
- return wrap(Addr.takeError());
-}
-
-LLVMErrorRef LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
- const char *StubName,
- LLVMOrcTargetAddress InitAddr) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- return wrap(J.createIndirectStub(StubName, InitAddr));
-}
-
-LLVMErrorRef LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
- const char *StubName,
- LLVMOrcTargetAddress NewAddr) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- return wrap(J.setIndirectStubPointer(StubName, NewAddr));
-}
-
-LLVMErrorRef LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle,
- LLVMModuleRef Mod,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- std::unique_ptr<Module> M(unwrap(Mod));
- if (auto Handle =
- J.addIRModuleEager(std::move(M), SymbolResolver, SymbolResolverCtx)) {
- *RetHandle = *Handle;
- return LLVMErrorSuccess;
- } else
- return wrap(Handle.takeError());
-}
-
-LLVMErrorRef LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle,
- LLVMModuleRef Mod,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- std::unique_ptr<Module> M(unwrap(Mod));
- if (auto Handle =
- J.addIRModuleLazy(std::move(M), SymbolResolver, SymbolResolverCtx)) {
- *RetHandle = *Handle;
- return LLVMErrorSuccess;
- } else
- return wrap(Handle.takeError());
-}
-
-LLVMErrorRef LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle,
- LLVMMemoryBufferRef Obj,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- std::unique_ptr<MemoryBuffer> O(unwrap(Obj));
- if (auto Handle =
- J.addObject(std::move(O), SymbolResolver, SymbolResolverCtx)) {
- *RetHandle = *Handle;
- return LLVMErrorSuccess;
- } else
- return wrap(Handle.takeError());
-}
-
-LLVMErrorRef LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle H) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- return wrap(J.removeModule(H));
-}
-
-LLVMErrorRef LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- const char *SymbolName) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- if (auto Addr = J.findSymbolAddress(SymbolName, true)) {
- *RetAddr = *Addr;
- return LLVMErrorSuccess;
- } else
- return wrap(Addr.takeError());
-}
-
-LLVMErrorRef LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- LLVMOrcModuleHandle H,
- const char *SymbolName) {
- OrcCBindingsStack &J = *unwrap(JITStack);
- if (auto Addr = J.findSymbolAddressIn(H, SymbolName, true)) {
- *RetAddr = *Addr;
- return LLVMErrorSuccess;
- } else
- return wrap(Addr.takeError());
-}
-
-LLVMErrorRef LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) {
- auto *J = unwrap(JITStack);
- auto Err = J->shutdown();
- delete J;
- return wrap(std::move(Err));
-}
-
-void LLVMOrcRegisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventListenerRef L)
-{
- unwrap(JITStack)->RegisterJITEventListener(unwrap(L));
-}
-
-void LLVMOrcUnregisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventListenerRef L)
-{
- unwrap(JITStack)->UnregisterJITEventListener(unwrap(L));
-}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
deleted file mode 100644
index 87bb4398765d..000000000000
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ /dev/null
@@ -1,534 +0,0 @@
-//===- OrcCBindingsStack.h - Orc JIT stack for C bindings -----*- C++ -*---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
-#define LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
-
-#include "llvm-c/OrcBindings.h"
-#include "llvm-c/TargetMachine.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
-#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
-#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
-#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
-#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
-#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
-#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CBindingWrapping.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
-#include <cstdint>
-#include <functional>
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-class OrcCBindingsStack;
-
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcCBindingsStack, LLVMOrcJITStackRef)
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef)
-
-namespace detail {
-
-// FIXME: Kill this off once the Layer concept becomes an interface.
-class GenericLayer {
-public:
- virtual ~GenericLayer() = default;
-
- virtual JITSymbol findSymbolIn(orc::VModuleKey K, const std::string &Name,
- bool ExportedSymbolsOnly) = 0;
- virtual Error removeModule(orc::VModuleKey K) = 0;
- };
-
- template <typename LayerT> class GenericLayerImpl : public GenericLayer {
- public:
- GenericLayerImpl(LayerT &Layer) : Layer(Layer) {}
-
- JITSymbol findSymbolIn(orc::VModuleKey K, const std::string &Name,
- bool ExportedSymbolsOnly) override {
- return Layer.findSymbolIn(K, Name, ExportedSymbolsOnly);
- }
-
- Error removeModule(orc::VModuleKey K) override {
- return Layer.removeModule(K);
- }
-
- private:
- LayerT &Layer;
- };
-
- template <>
- class GenericLayerImpl<orc::LegacyRTDyldObjectLinkingLayer> : public GenericLayer {
- private:
- using LayerT = orc::LegacyRTDyldObjectLinkingLayer;
- public:
- GenericLayerImpl(LayerT &Layer) : Layer(Layer) {}
-
- JITSymbol findSymbolIn(orc::VModuleKey K, const std::string &Name,
- bool ExportedSymbolsOnly) override {
- return Layer.findSymbolIn(K, Name, ExportedSymbolsOnly);
- }
-
- Error removeModule(orc::VModuleKey K) override {
- return Layer.removeObject(K);
- }
-
- private:
- LayerT &Layer;
- };
-
- template <typename LayerT>
- std::unique_ptr<GenericLayerImpl<LayerT>> createGenericLayer(LayerT &Layer) {
- return std::make_unique<GenericLayerImpl<LayerT>>(Layer);
- }
-
-} // end namespace detail
-
-class OrcCBindingsStack {
-public:
-
- using CompileCallbackMgr = orc::JITCompileCallbackManager;
- using ObjLayerT = orc::LegacyRTDyldObjectLinkingLayer;
- using CompileLayerT = orc::LegacyIRCompileLayer<ObjLayerT, orc::SimpleCompiler>;
- using CODLayerT =
- orc::LegacyCompileOnDemandLayer<CompileLayerT, CompileCallbackMgr>;
-
- using CallbackManagerBuilder =
- std::function<std::unique_ptr<CompileCallbackMgr>()>;
-
- using IndirectStubsManagerBuilder = CODLayerT::IndirectStubsManagerBuilderT;
-
-private:
-
- using OwningObject = object::OwningBinary<object::ObjectFile>;
-
- class CBindingsResolver : public orc::SymbolResolver {
- public:
- CBindingsResolver(OrcCBindingsStack &Stack,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx)
- : Stack(Stack), ExternalResolver(std::move(ExternalResolver)),
- ExternalResolverCtx(std::move(ExternalResolverCtx)) {}
-
- orc::SymbolNameSet
- getResponsibilitySet(const orc::SymbolNameSet &Symbols) override {
- orc::SymbolNameSet Result;
-
- for (auto &S : Symbols) {
- if (auto Sym = findSymbol(std::string(*S))) {
- if (!Sym.getFlags().isStrong())
- Result.insert(S);
- } else if (auto Err = Sym.takeError()) {
- Stack.reportError(std::move(Err));
- return orc::SymbolNameSet();
- }
- }
-
- return Result;
- }
-
- orc::SymbolNameSet
- lookup(std::shared_ptr<orc::AsynchronousSymbolQuery> Query,
- orc::SymbolNameSet Symbols) override {
- orc::SymbolNameSet UnresolvedSymbols;
-
- for (auto &S : Symbols) {
- if (auto Sym = findSymbol(std::string(*S))) {
- if (auto Addr = Sym.getAddress()) {
- Query->notifySymbolMetRequiredState(
- S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
- } else {
- Stack.ES.legacyFailQuery(*Query, Addr.takeError());
- return orc::SymbolNameSet();
- }
- } else if (auto Err = Sym.takeError()) {
- Stack.ES.legacyFailQuery(*Query, std::move(Err));
- return orc::SymbolNameSet();
- } else
- UnresolvedSymbols.insert(S);
- }
-
- if (Query->isComplete())
- Query->handleComplete();
-
- return UnresolvedSymbols;
- }
-
- private:
- JITSymbol findSymbol(const std::string &Name) {
- // Search order:
- // 1. JIT'd symbols.
- // 2. Runtime overrides.
- // 3. External resolver (if present).
-
- if (Stack.CODLayer) {
- if (auto Sym = Stack.CODLayer->findSymbol(Name, true))
- return Sym;
- else if (auto Err = Sym.takeError())
- return Sym.takeError();
- } else {
- if (auto Sym = Stack.CompileLayer.findSymbol(Name, true))
- return Sym;
- else if (auto Err = Sym.takeError())
- return Sym.takeError();
- }
-
- if (auto Sym = Stack.CXXRuntimeOverrides.searchOverrides(Name))
- return Sym;
-
- if (ExternalResolver)
- return JITSymbol(ExternalResolver(Name.c_str(), ExternalResolverCtx),
- JITSymbolFlags::Exported);
-
- return JITSymbol(nullptr);
- }
-
- OrcCBindingsStack &Stack;
- LLVMOrcSymbolResolverFn ExternalResolver;
- void *ExternalResolverCtx = nullptr;
- };
-
-public:
- OrcCBindingsStack(TargetMachine &TM,
- IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
- : CCMgr(createCompileCallbackManager(TM, ES)), DL(TM.createDataLayout()),
- IndirectStubsMgr(IndirectStubsMgrBuilder()),
- ObjectLayer(
- AcknowledgeORCv1Deprecation, ES,
- [this](orc::VModuleKey K) {
- auto ResolverI = Resolvers.find(K);
- assert(ResolverI != Resolvers.end() &&
- "No resolver for module K");
- auto Resolver = std::move(ResolverI->second);
- Resolvers.erase(ResolverI);
- return ObjLayerT::Resources{
- std::make_shared<SectionMemoryManager>(), Resolver};
- },
- nullptr,
- [this](orc::VModuleKey K, const object::ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
- this->notifyFinalized(K, Obj, LoadedObjInfo);
- },
- [this](orc::VModuleKey K, const object::ObjectFile &Obj) {
- this->notifyFreed(K, Obj);
- }),
- CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
- orc::SimpleCompiler(TM)),
- CODLayer(createCODLayer(ES, CompileLayer, CCMgr.get(),
- std::move(IndirectStubsMgrBuilder), Resolvers)),
- CXXRuntimeOverrides(
- AcknowledgeORCv1Deprecation,
- [this](const std::string &S) { return mangle(S); }) {}
-
- Error shutdown() {
- // Run any destructors registered with __cxa_atexit.
- CXXRuntimeOverrides.runDestructors();
- // Run any IR destructors.
- for (auto &DtorRunner : IRStaticDestructorRunners)
- if (auto Err = DtorRunner.runViaLayer(*this))
- return Err;
- return Error::success();
- }
-
- std::string mangle(StringRef Name) {
- std::string MangledName;
- {
- raw_string_ostream MangledNameStream(MangledName);
- Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
- }
- return MangledName;
- }
-
- template <typename PtrTy>
- static PtrTy fromTargetAddress(JITTargetAddress Addr) {
- return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
- }
-
- Expected<JITTargetAddress>
- createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback,
- void *CallbackCtx) {
- auto WrappedCallback = [=]() -> JITTargetAddress {
- return Callback(wrap(this), CallbackCtx);
- };
-
- return CCMgr->getCompileCallback(std::move(WrappedCallback));
- }
-
- Error createIndirectStub(StringRef StubName, JITTargetAddress Addr) {
- return IndirectStubsMgr->createStub(StubName, Addr,
- JITSymbolFlags::Exported);
- }
-
- Error setIndirectStubPointer(StringRef Name, JITTargetAddress Addr) {
- return IndirectStubsMgr->updatePointer(Name, Addr);
- }
-
- template <typename LayerT>
- Expected<orc::VModuleKey>
- addIRModule(LayerT &Layer, std::unique_ptr<Module> M,
- std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
-
- // Attach a data-layout if one isn't already present.
- if (M->getDataLayout().isDefault())
- M->setDataLayout(DL);
-
- // Record the static constructors and destructors. We have to do this before
- // we hand over ownership of the module to the JIT.
- std::vector<std::string> CtorNames, DtorNames;
- for (auto Ctor : orc::getConstructors(*M))
- CtorNames.push_back(mangle(Ctor.Func->getName()));
- for (auto Dtor : orc::getDestructors(*M))
- DtorNames.push_back(mangle(Dtor.Func->getName()));
-
- // Add the module to the JIT.
- auto K = ES.allocateVModule();
- Resolvers[K] = std::make_shared<CBindingsResolver>(*this, ExternalResolver,
- ExternalResolverCtx);
- if (auto Err = Layer.addModule(K, std::move(M)))
- return std::move(Err);
-
- KeyLayers[K] = detail::createGenericLayer(Layer);
-
- // Run the static constructors, and save the static destructor runner for
- // execution when the JIT is torn down.
- orc::LegacyCtorDtorRunner<OrcCBindingsStack> CtorRunner(
- AcknowledgeORCv1Deprecation, std::move(CtorNames), K);
- if (auto Err = CtorRunner.runViaLayer(*this))
- return std::move(Err);
-
- IRStaticDestructorRunners.emplace_back(AcknowledgeORCv1Deprecation,
- std::move(DtorNames), K);
-
- return K;
- }
-
- Expected<orc::VModuleKey>
- addIRModuleEager(std::unique_ptr<Module> M,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
- return addIRModule(CompileLayer, std::move(M),
- std::make_unique<SectionMemoryManager>(),
- std::move(ExternalResolver), ExternalResolverCtx);
- }
-
- Expected<orc::VModuleKey>
- addIRModuleLazy(std::unique_ptr<Module> M,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
- if (!CODLayer)
- return make_error<StringError>("Can not add lazy module: No compile "
- "callback manager available",
- inconvertibleErrorCode());
-
- return addIRModule(*CODLayer, std::move(M),
- std::make_unique<SectionMemoryManager>(),
- std::move(ExternalResolver), ExternalResolverCtx);
- }
-
- Error removeModule(orc::VModuleKey K) {
- // FIXME: Should error release the module key?
- if (auto Err = KeyLayers[K]->removeModule(K))
- return Err;
- ES.releaseVModule(K);
- KeyLayers.erase(K);
- return Error::success();
- }
-
- Expected<orc::VModuleKey> addObject(std::unique_ptr<MemoryBuffer> ObjBuffer,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
- if (auto Obj = object::ObjectFile::createObjectFile(
- ObjBuffer->getMemBufferRef())) {
-
- auto K = ES.allocateVModule();
- Resolvers[K] = std::make_shared<CBindingsResolver>(
- *this, ExternalResolver, ExternalResolverCtx);
-
- if (auto Err = ObjectLayer.addObject(K, std::move(ObjBuffer)))
- return std::move(Err);
-
- KeyLayers[K] = detail::createGenericLayer(ObjectLayer);
-
- return K;
- } else
- return Obj.takeError();
- }
-
- JITSymbol findSymbol(const std::string &Name,
- bool ExportedSymbolsOnly) {
- if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly))
- return Sym;
- if (CODLayer)
- return CODLayer->findSymbol(mangle(Name), ExportedSymbolsOnly);
- return CompileLayer.findSymbol(mangle(Name), ExportedSymbolsOnly);
- }
-
- JITSymbol findSymbolIn(orc::VModuleKey K, const std::string &Name,
- bool ExportedSymbolsOnly) {
- assert(KeyLayers.count(K) && "looking up symbol in unknown module");
- return KeyLayers[K]->findSymbolIn(K, mangle(Name), ExportedSymbolsOnly);
- }
-
- Expected<JITTargetAddress> findSymbolAddress(const std::string &Name,
- bool ExportedSymbolsOnly) {
- if (auto Sym = findSymbol(Name, ExportedSymbolsOnly)) {
- // Successful lookup, non-null symbol:
- if (auto AddrOrErr = Sym.getAddress())
- return *AddrOrErr;
- else
- return AddrOrErr.takeError();
- } else if (auto Err = Sym.takeError()) {
- // Lookup failure - report error.
- return std::move(Err);
- }
-
- // No symbol not found. Return 0.
- return 0;
- }
-
- Expected<JITTargetAddress> findSymbolAddressIn(orc::VModuleKey K,
- const std::string &Name,
- bool ExportedSymbolsOnly) {
- if (auto Sym = findSymbolIn(K, Name, ExportedSymbolsOnly)) {
- // Successful lookup, non-null symbol:
- if (auto AddrOrErr = Sym.getAddress())
- return *AddrOrErr;
- else
- return AddrOrErr.takeError();
- } else if (auto Err = Sym.takeError()) {
- // Lookup failure - report error.
- return std::move(Err);
- }
-
- // Symbol not found. Return 0.
- return 0;
- }
-
- const std::string &getErrorMessage() const { return ErrMsg; }
-
- void RegisterJITEventListener(JITEventListener *L) {
- if (!L)
- return;
- EventListeners.push_back(L);
- }
-
- void UnregisterJITEventListener(JITEventListener *L) {
- if (!L)
- return;
-
- auto I = find(reverse(EventListeners), L);
- if (I != EventListeners.rend()) {
- std::swap(*I, EventListeners.back());
- EventListeners.pop_back();
- }
- }
-
-private:
- using ResolverMap =
- std::map<orc::VModuleKey, std::shared_ptr<orc::SymbolResolver>>;
-
- static std::unique_ptr<CompileCallbackMgr>
- createCompileCallbackManager(TargetMachine &TM, orc::ExecutionSession &ES) {
- auto CCMgr = createLocalCompileCallbackManager(TM.getTargetTriple(), ES, 0);
- if (!CCMgr) {
- // FIXME: It would be good if we could report this somewhere, but we do
- // have an instance yet.
- logAllUnhandledErrors(CCMgr.takeError(), errs(), "ORC error: ");
- return nullptr;
- }
- return std::move(*CCMgr);
- }
-
- static std::unique_ptr<CODLayerT>
- createCODLayer(orc::ExecutionSession &ES, CompileLayerT &CompileLayer,
- CompileCallbackMgr *CCMgr,
- IndirectStubsManagerBuilder IndirectStubsMgrBuilder,
- ResolverMap &Resolvers) {
- // If there is no compile callback manager available we can not create a
- // compile on demand layer.
- if (!CCMgr)
- return nullptr;
-
- return std::make_unique<CODLayerT>(
- AcknowledgeORCv1Deprecation, ES, CompileLayer,
- [&Resolvers](orc::VModuleKey K) {
- auto ResolverI = Resolvers.find(K);
- assert(ResolverI != Resolvers.end() && "No resolver for module K");
- return ResolverI->second;
- },
- [&Resolvers](orc::VModuleKey K,
- std::shared_ptr<orc::SymbolResolver> Resolver) {
- assert(!Resolvers.count(K) && "Resolver already present");
- Resolvers[K] = std::move(Resolver);
- },
- [](Function &F) { return std::set<Function *>({&F}); }, *CCMgr,
- std::move(IndirectStubsMgrBuilder), false);
- }
-
- void reportError(Error Err) {
- // FIXME: Report errors on the execution session.
- logAllUnhandledErrors(std::move(Err), errs(), "ORC error: ");
- };
-
- void notifyFinalized(orc::VModuleKey K,
- const object::ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
- uint64_t Key = static_cast<uint64_t>(
- reinterpret_cast<uintptr_t>(Obj.getData().data()));
- for (auto &Listener : EventListeners)
- Listener->notifyObjectLoaded(Key, Obj, LoadedObjInfo);
- }
-
- void notifyFreed(orc::VModuleKey K, const object::ObjectFile &Obj) {
- uint64_t Key = static_cast<uint64_t>(
- reinterpret_cast<uintptr_t>(Obj.getData().data()));
- for (auto &Listener : EventListeners)
- Listener->notifyFreeingObject(Key);
- }
-
- orc::ExecutionSession ES;
- std::unique_ptr<CompileCallbackMgr> CCMgr;
-
- std::vector<JITEventListener *> EventListeners;
-
- DataLayout DL;
- SectionMemoryManager CCMgrMemMgr;
-
- std::unique_ptr<orc::IndirectStubsManager> IndirectStubsMgr;
-
- ObjLayerT ObjectLayer;
- CompileLayerT CompileLayer;
- std::unique_ptr<CODLayerT> CODLayer;
-
- std::map<orc::VModuleKey, std::unique_ptr<detail::GenericLayer>> KeyLayers;
-
- orc::LegacyLocalCXXRuntimeOverrides CXXRuntimeOverrides;
- std::vector<orc::LegacyCtorDtorRunner<OrcCBindingsStack>> IRStaticDestructorRunners;
- std::string ErrMsg;
-
- ResolverMap Resolvers;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
deleted file mode 100644
index 772a9c2c4ab2..000000000000
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-//===-------- OrcMCJITReplacement.cpp - Orc-based MCJIT replacement -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "OrcMCJITReplacement.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
-
-namespace {
-
-static struct RegisterJIT {
- RegisterJIT() { llvm::orc::OrcMCJITReplacement::Register(); }
-} JITRegistrator;
-
-}
-
-extern "C" void LLVMLinkInOrcMCJITReplacement() {}
-
-namespace llvm {
-namespace orc {
-
-GenericValue
-OrcMCJITReplacement::runFunction(Function *F,
- ArrayRef<GenericValue> ArgValues) {
- assert(F && "Function *F was null at entry to run()");
-
- void *FPtr = getPointerToFunction(F);
- assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
- FunctionType *FTy = F->getFunctionType();
- Type *RetTy = FTy->getReturnType();
-
- assert((FTy->getNumParams() == ArgValues.size() ||
- (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
- "Wrong number of arguments passed into function!");
- assert(FTy->getNumParams() == ArgValues.size() &&
- "This doesn't support passing arguments through varargs (yet)!");
-
- // Handle some common cases first. These cases correspond to common `main'
- // prototypes.
- if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
- switch (ArgValues.size()) {
- case 3:
- if (FTy->getParamType(0)->isIntegerTy(32) &&
- FTy->getParamType(1)->isPointerTy() &&
- FTy->getParamType(2)->isPointerTy()) {
- int (*PF)(int, char **, const char **) =
- (int (*)(int, char **, const char **))(intptr_t)FPtr;
-
- // Call the function.
- GenericValue rv;
- rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
- (char **)GVTOP(ArgValues[1]),
- (const char **)GVTOP(ArgValues[2])));
- return rv;
- }
- break;
- case 2:
- if (FTy->getParamType(0)->isIntegerTy(32) &&
- FTy->getParamType(1)->isPointerTy()) {
- int (*PF)(int, char **) = (int (*)(int, char **))(intptr_t)FPtr;
-
- // Call the function.
- GenericValue rv;
- rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
- (char **)GVTOP(ArgValues[1])));
- return rv;
- }
- break;
- case 1:
- if (FTy->getNumParams() == 1 && FTy->getParamType(0)->isIntegerTy(32)) {
- GenericValue rv;
- int (*PF)(int) = (int (*)(int))(intptr_t)FPtr;
- rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
- return rv;
- }
- break;
- }
- }
-
- // Handle cases where no arguments are passed first.
- if (ArgValues.empty()) {
- GenericValue rv;
- switch (RetTy->getTypeID()) {
- default:
- llvm_unreachable("Unknown return type for function call!");
- case Type::IntegerTyID: {
- unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
- if (BitWidth == 1)
- rv.IntVal = APInt(BitWidth, ((bool (*)())(intptr_t)FPtr)());
- else if (BitWidth <= 8)
- rv.IntVal = APInt(BitWidth, ((char (*)())(intptr_t)FPtr)());
- else if (BitWidth <= 16)
- rv.IntVal = APInt(BitWidth, ((short (*)())(intptr_t)FPtr)());
- else if (BitWidth <= 32)
- rv.IntVal = APInt(BitWidth, ((int (*)())(intptr_t)FPtr)());
- else if (BitWidth <= 64)
- rv.IntVal = APInt(BitWidth, ((int64_t (*)())(intptr_t)FPtr)());
- else
- llvm_unreachable("Integer types > 64 bits not supported");
- return rv;
- }
- case Type::VoidTyID:
- rv.IntVal = APInt(32, ((int (*)())(intptr_t)FPtr)());
- return rv;
- case Type::FloatTyID:
- rv.FloatVal = ((float (*)())(intptr_t)FPtr)();
- return rv;
- case Type::DoubleTyID:
- rv.DoubleVal = ((double (*)())(intptr_t)FPtr)();
- return rv;
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- llvm_unreachable("long double not supported yet");
- case Type::PointerTyID:
- return PTOGV(((void *(*)())(intptr_t)FPtr)());
- }
- }
-
- llvm_unreachable("Full-featured argument passing not supported yet!");
-}
-
-void OrcMCJITReplacement::runStaticConstructorsDestructors(bool isDtors) {
- auto &CtorDtorsMap = isDtors ? UnexecutedDestructors : UnexecutedConstructors;
-
- for (auto &KV : CtorDtorsMap)
- cantFail(LegacyCtorDtorRunner<LazyEmitLayerT>(
- AcknowledgeORCv1Deprecation, std::move(KV.second), KV.first)
- .runViaLayer(LazyEmitLayer));
-
- CtorDtorsMap.clear();
-}
-
-} // End namespace orc.
-} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
deleted file mode 100644
index 139572bd6977..000000000000
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ /dev/null
@@ -1,502 +0,0 @@
-//===- OrcMCJITReplacement.h - Orc based MCJIT replacement ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Orc based MCJIT replacement.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_EXECUTIONENGINE_ORC_ORCMCJITREPLACEMENT_H
-#define LLVM_LIB_EXECUTIONENGINE_ORC_ORCMCJITREPLACEMENT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
-#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
-#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
-#include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
-#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Object/Archive.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-class ObjectCache;
-
-namespace orc {
-
-class OrcMCJITReplacement : public ExecutionEngine {
-
- // OrcMCJITReplacement needs to do a little extra book-keeping to ensure that
- // Orc's automatic finalization doesn't kick in earlier than MCJIT clients are
- // expecting - see finalizeMemory.
- class MCJITReplacementMemMgr : public MCJITMemoryManager {
- public:
- MCJITReplacementMemMgr(OrcMCJITReplacement &M,
- std::shared_ptr<MCJITMemoryManager> ClientMM)
- : M(M), ClientMM(std::move(ClientMM)) {}
-
- uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID,
- StringRef SectionName) override {
- uint8_t *Addr =
- ClientMM->allocateCodeSection(Size, Alignment, SectionID,
- SectionName);
- M.SectionsAllocatedSinceLastLoad.insert(Addr);
- return Addr;
- }
-
- uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID, StringRef SectionName,
- bool IsReadOnly) override {
- uint8_t *Addr = ClientMM->allocateDataSection(Size, Alignment, SectionID,
- SectionName, IsReadOnly);
- M.SectionsAllocatedSinceLastLoad.insert(Addr);
- return Addr;
- }
-
- void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
- uintptr_t RODataSize, uint32_t RODataAlign,
- uintptr_t RWDataSize,
- uint32_t RWDataAlign) override {
- return ClientMM->reserveAllocationSpace(CodeSize, CodeAlign,
- RODataSize, RODataAlign,
- RWDataSize, RWDataAlign);
- }
-
- bool needsToReserveAllocationSpace() override {
- return ClientMM->needsToReserveAllocationSpace();
- }
-
- void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
- size_t Size) override {
- return ClientMM->registerEHFrames(Addr, LoadAddr, Size);
- }
-
- void deregisterEHFrames() override {
- return ClientMM->deregisterEHFrames();
- }
-
- void notifyObjectLoaded(RuntimeDyld &RTDyld,
- const object::ObjectFile &O) override {
- return ClientMM->notifyObjectLoaded(RTDyld, O);
- }
-
- void notifyObjectLoaded(ExecutionEngine *EE,
- const object::ObjectFile &O) override {
- return ClientMM->notifyObjectLoaded(EE, O);
- }
-
- bool finalizeMemory(std::string *ErrMsg = nullptr) override {
- // Each set of objects loaded will be finalized exactly once, but since
- // symbol lookup during relocation may recursively trigger the
- // loading/relocation of other modules, and since we're forwarding all
- // finalizeMemory calls to a single underlying memory manager, we need to
- // defer forwarding the call on until all necessary objects have been
- // loaded. Otherwise, during the relocation of a leaf object, we will end
- // up finalizing memory, causing a crash further up the stack when we
- // attempt to apply relocations to finalized memory.
- // To avoid finalizing too early, look at how many objects have been
- // loaded but not yet finalized. This is a bit of a hack that relies on
- // the fact that we're lazily emitting object files: The only way you can
- // get more than one set of objects loaded but not yet finalized is if
- // they were loaded during relocation of another set.
- if (M.UnfinalizedSections.size() == 1)
- return ClientMM->finalizeMemory(ErrMsg);
- return false;
- }
-
- private:
- OrcMCJITReplacement &M;
- std::shared_ptr<MCJITMemoryManager> ClientMM;
- };
-
- class LinkingORCResolver : public orc::SymbolResolver {
- public:
- LinkingORCResolver(OrcMCJITReplacement &M) : M(M) {}
-
- SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) override {
- SymbolNameSet Result;
-
- for (auto &S : Symbols) {
- if (auto Sym = M.findMangledSymbol(*S)) {
- if (!Sym.getFlags().isStrong())
- Result.insert(S);
- } else if (auto Err = Sym.takeError()) {
- M.reportError(std::move(Err));
- return SymbolNameSet();
- } else {
- if (auto Sym2 =
- M.ClientResolver->findSymbolInLogicalDylib(std::string(*S))) {
- if (!Sym2.getFlags().isStrong())
- Result.insert(S);
- } else if (auto Err = Sym2.takeError()) {
- M.reportError(std::move(Err));
- return SymbolNameSet();
- } else
- Result.insert(S);
- }
- }
-
- return Result;
- }
-
- SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
- SymbolNameSet Symbols) override {
- SymbolNameSet UnresolvedSymbols;
- bool NewSymbolsResolved = false;
-
- for (auto &S : Symbols) {
- if (auto Sym = M.findMangledSymbol(*S)) {
- if (auto Addr = Sym.getAddress()) {
- Query->notifySymbolMetRequiredState(
- S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
- NewSymbolsResolved = true;
- } else {
- M.ES.legacyFailQuery(*Query, Addr.takeError());
- return SymbolNameSet();
- }
- } else if (auto Err = Sym.takeError()) {
- M.ES.legacyFailQuery(*Query, std::move(Err));
- return SymbolNameSet();
- } else {
- if (auto Sym2 = M.ClientResolver->findSymbol(std::string(*S))) {
- if (auto Addr = Sym2.getAddress()) {
- Query->notifySymbolMetRequiredState(
- S, JITEvaluatedSymbol(*Addr, Sym2.getFlags()));
- NewSymbolsResolved = true;
- } else {
- M.ES.legacyFailQuery(*Query, Addr.takeError());
- return SymbolNameSet();
- }
- } else if (auto Err = Sym2.takeError()) {
- M.ES.legacyFailQuery(*Query, std::move(Err));
- return SymbolNameSet();
- } else
- UnresolvedSymbols.insert(S);
- }
- }
-
- if (NewSymbolsResolved && Query->isComplete())
- Query->handleComplete();
-
- return UnresolvedSymbols;
- }
-
- private:
- OrcMCJITReplacement &M;
- };
-
-private:
- static ExecutionEngine *
- createOrcMCJITReplacement(std::string *ErrorMsg,
- std::shared_ptr<MCJITMemoryManager> MemMgr,
- std::shared_ptr<LegacyJITSymbolResolver> Resolver,
- std::unique_ptr<TargetMachine> TM) {
- return new OrcMCJITReplacement(std::move(MemMgr), std::move(Resolver),
- std::move(TM));
- }
-
- void reportError(Error Err) {
- logAllUnhandledErrors(std::move(Err), errs(), "MCJIT error: ");
- }
-
-public:
- OrcMCJITReplacement(std::shared_ptr<MCJITMemoryManager> MemMgr,
- std::shared_ptr<LegacyJITSymbolResolver> ClientResolver,
- std::unique_ptr<TargetMachine> TM)
- : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)),
- MemMgr(
- std::make_shared<MCJITReplacementMemMgr>(*this, std::move(MemMgr))),
- Resolver(std::make_shared<LinkingORCResolver>(*this)),
- ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this),
- NotifyFinalized(*this),
- ObjectLayer(
- AcknowledgeORCv1Deprecation, ES,
- [this](VModuleKey K) {
- return ObjectLayerT::Resources{this->MemMgr, this->Resolver};
- },
- NotifyObjectLoaded, NotifyFinalized),
- CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
- SimpleCompiler(*this->TM),
- [this](VModuleKey K, std::unique_ptr<Module> M) {
- Modules.push_back(std::move(M));
- }),
- LazyEmitLayer(AcknowledgeORCv1Deprecation, CompileLayer) {}
-
- static void Register() {
- OrcMCJITReplacementCtor = createOrcMCJITReplacement;
- }
-
- void addModule(std::unique_ptr<Module> M) override {
- // If this module doesn't have a DataLayout attached then attach the
- // default.
- if (M->getDataLayout().isDefault()) {
- M->setDataLayout(getDataLayout());
- } else {
- assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch");
- }
-
- // Rename, bump linkage and record static constructors and destructors.
- // We have to do this before we hand over ownership of the module to the
- // JIT.
- std::vector<std::string> CtorNames, DtorNames;
- {
- unsigned CtorId = 0, DtorId = 0;
- for (auto Ctor : orc::getConstructors(*M)) {
- std::string NewCtorName = ("__ORCstatic_ctor." + Twine(CtorId++)).str();
- Ctor.Func->setName(NewCtorName);
- Ctor.Func->setLinkage(GlobalValue::ExternalLinkage);
- Ctor.Func->setVisibility(GlobalValue::HiddenVisibility);
- CtorNames.push_back(mangle(NewCtorName));
- }
- for (auto Dtor : orc::getDestructors(*M)) {
- std::string NewDtorName = ("__ORCstatic_dtor." + Twine(DtorId++)).str();
- dbgs() << "Found dtor: " << NewDtorName << "\n";
- Dtor.Func->setName(NewDtorName);
- Dtor.Func->setLinkage(GlobalValue::ExternalLinkage);
- Dtor.Func->setVisibility(GlobalValue::HiddenVisibility);
- DtorNames.push_back(mangle(NewDtorName));
- }
- }
-
- auto K = ES.allocateVModule();
-
- UnexecutedConstructors[K] = std::move(CtorNames);
- UnexecutedDestructors[K] = std::move(DtorNames);
-
- cantFail(LazyEmitLayer.addModule(K, std::move(M)));
- }
-
- void addObjectFile(std::unique_ptr<object::ObjectFile> O) override {
- cantFail(ObjectLayer.addObject(
- ES.allocateVModule(), MemoryBuffer::getMemBufferCopy(O->getData())));
- }
-
- void addObjectFile(object::OwningBinary<object::ObjectFile> O) override {
- std::unique_ptr<object::ObjectFile> Obj;
- std::unique_ptr<MemoryBuffer> ObjBuffer;
- std::tie(Obj, ObjBuffer) = O.takeBinary();
- cantFail(ObjectLayer.addObject(ES.allocateVModule(), std::move(ObjBuffer)));
- }
-
- void addArchive(object::OwningBinary<object::Archive> A) override {
- Archives.push_back(std::move(A));
- }
-
- bool removeModule(Module *M) override {
- auto I = Modules.begin();
- for (auto E = Modules.end(); I != E; ++I)
- if (I->get() == M)
- break;
- if (I == Modules.end())
- return false;
- Modules.erase(I);
- return true;
- }
-
- uint64_t getSymbolAddress(StringRef Name) {
- return cantFail(findSymbol(Name).getAddress());
- }
-
- JITSymbol findSymbol(StringRef Name) {
- return findMangledSymbol(mangle(Name));
- }
-
- void finalizeObject() override {
- // This is deprecated - Aim to remove in ExecutionEngine.
- // REMOVE IF POSSIBLE - Doesn't make sense for New JIT.
- }
-
- void mapSectionAddress(const void *LocalAddress,
- uint64_t TargetAddress) override {
- for (auto &P : UnfinalizedSections)
- if (P.second.count(LocalAddress))
- ObjectLayer.mapSectionAddress(P.first, LocalAddress, TargetAddress);
- }
-
- uint64_t getGlobalValueAddress(const std::string &Name) override {
- return getSymbolAddress(Name);
- }
-
- uint64_t getFunctionAddress(const std::string &Name) override {
- return getSymbolAddress(Name);
- }
-
- void *getPointerToFunction(Function *F) override {
- uint64_t FAddr = getSymbolAddress(F->getName());
- return reinterpret_cast<void *>(static_cast<uintptr_t>(FAddr));
- }
-
- void *getPointerToNamedFunction(StringRef Name,
- bool AbortOnFailure = true) override {
- uint64_t Addr = getSymbolAddress(Name);
- if (!Addr && AbortOnFailure)
- llvm_unreachable("Missing symbol!");
- return reinterpret_cast<void *>(static_cast<uintptr_t>(Addr));
- }
-
- GenericValue runFunction(Function *F,
- ArrayRef<GenericValue> ArgValues) override;
-
- void setObjectCache(ObjectCache *NewCache) override {
- CompileLayer.getCompiler().setObjectCache(NewCache);
- }
-
- void setProcessAllSections(bool ProcessAllSections) override {
- ObjectLayer.setProcessAllSections(ProcessAllSections);
- }
-
- void runStaticConstructorsDestructors(bool isDtors) override;
-
-private:
- JITSymbol findMangledSymbol(StringRef Name) {
- if (auto Sym = LazyEmitLayer.findSymbol(std::string(Name), false))
- return Sym;
- if (auto Sym = ClientResolver->findSymbol(std::string(Name)))
- return Sym;
- if (auto Sym = scanArchives(Name))
- return Sym;
-
- return nullptr;
- }
-
- JITSymbol scanArchives(StringRef Name) {
- for (object::OwningBinary<object::Archive> &OB : Archives) {
- object::Archive *A = OB.getBinary();
- // Look for our symbols in each Archive
- auto OptionalChildOrErr = A->findSym(Name);
- if (!OptionalChildOrErr)
- report_fatal_error(OptionalChildOrErr.takeError());
- auto &OptionalChild = *OptionalChildOrErr;
- if (OptionalChild) {
- // FIXME: Support nested archives?
- Expected<std::unique_ptr<object::Binary>> ChildBinOrErr =
- OptionalChild->getAsBinary();
- if (!ChildBinOrErr) {
- // TODO: Actually report errors helpfully.
- consumeError(ChildBinOrErr.takeError());
- continue;
- }
- std::unique_ptr<object::Binary> &ChildBin = ChildBinOrErr.get();
- if (ChildBin->isObject()) {
- cantFail(ObjectLayer.addObject(
- ES.allocateVModule(),
- MemoryBuffer::getMemBufferCopy(ChildBin->getData())));
- if (auto Sym = ObjectLayer.findSymbol(Name, true))
- return Sym;
- }
- }
- }
- return nullptr;
- }
-
- class NotifyObjectLoadedT {
- public:
- using LoadedObjInfoListT =
- std::vector<std::unique_ptr<RuntimeDyld::LoadedObjectInfo>>;
-
- NotifyObjectLoadedT(OrcMCJITReplacement &M) : M(M) {}
-
- void operator()(VModuleKey K, const object::ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &Info) const {
- M.UnfinalizedSections[K] = std::move(M.SectionsAllocatedSinceLastLoad);
- M.SectionsAllocatedSinceLastLoad = SectionAddrSet();
- M.MemMgr->notifyObjectLoaded(&M, Obj);
- }
- private:
- OrcMCJITReplacement &M;
- };
-
- class NotifyFinalizedT {
- public:
- NotifyFinalizedT(OrcMCJITReplacement &M) : M(M) {}
-
- void operator()(VModuleKey K, const object::ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &Info) {
- M.UnfinalizedSections.erase(K);
- }
-
- private:
- OrcMCJITReplacement &M;
- };
-
- std::string mangle(StringRef Name) {
- std::string MangledName;
- {
- raw_string_ostream MangledNameStream(MangledName);
- Mang.getNameWithPrefix(MangledNameStream, Name, getDataLayout());
- }
- return MangledName;
- }
-
- using ObjectLayerT = LegacyRTDyldObjectLinkingLayer;
- using CompileLayerT = LegacyIRCompileLayer<ObjectLayerT, orc::SimpleCompiler>;
- using LazyEmitLayerT = LazyEmittingLayer<CompileLayerT>;
-
- ExecutionSession ES;
-
- std::unique_ptr<TargetMachine> TM;
- std::shared_ptr<MCJITReplacementMemMgr> MemMgr;
- std::shared_ptr<LinkingORCResolver> Resolver;
- std::shared_ptr<LegacyJITSymbolResolver> ClientResolver;
- Mangler Mang;
-
- // IMPORTANT: ShouldDelete *must* come before LocalModules: The shared_ptr
- // delete blocks in LocalModules refer to the ShouldDelete map, so
- // LocalModules needs to be destructed before ShouldDelete.
- std::map<Module*, bool> ShouldDelete;
-
- NotifyObjectLoadedT NotifyObjectLoaded;
- NotifyFinalizedT NotifyFinalized;
-
- ObjectLayerT ObjectLayer;
- CompileLayerT CompileLayer;
- LazyEmitLayerT LazyEmitLayer;
-
- std::map<VModuleKey, std::vector<std::string>> UnexecutedConstructors;
- std::map<VModuleKey, std::vector<std::string>> UnexecutedDestructors;
-
- // We need to store ObjLayerT::ObjSetHandles for each of the object sets
- // that have been emitted but not yet finalized so that we can forward the
- // mapSectionAddress calls appropriately.
- using SectionAddrSet = std::set<const void *>;
- SectionAddrSet SectionsAllocatedSinceLastLoad;
- std::map<VModuleKey, SectionAddrSet> UnfinalizedSections;
-
- std::vector<object::OwningBinary<object::Archive>> Archives;
-};
-
-} // end namespace orc
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_EXECUTIONENGINE_ORC_MCJITREPLACEMENT_H
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index 5933c2e666d1..834d4cc8f514 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -6,11 +6,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm-c/LLJIT.h"
#include "llvm-c/Orc.h"
+#include "llvm-c/OrcEE.h"
#include "llvm-c/TargetMachine.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
using namespace llvm;
using namespace llvm::orc;
@@ -18,6 +22,8 @@ using namespace llvm::orc;
namespace llvm {
namespace orc {
+class InProgressLookupState;
+
class OrcV2CAPIHelper {
public:
using PoolEntry = SymbolStringPtr::PoolEntry;
@@ -29,58 +35,278 @@ public:
return Result;
}
+ static SymbolStringPtr retainSymbolStringPtr(PoolEntryPtr P) {
+ return SymbolStringPtr(P);
+ }
+
static PoolEntryPtr getRawPoolEntryPtr(const SymbolStringPtr &S) {
return S.S;
}
+ static void retainPoolEntry(PoolEntryPtr P) {
+ SymbolStringPtr S(P);
+ S.S = nullptr;
+ }
+
static void releasePoolEntry(PoolEntryPtr P) {
SymbolStringPtr S;
S.S = P;
}
+
+ static InProgressLookupState *extractLookupState(LookupState &LS) {
+ return LS.IPLS.release();
+ }
+
+ static void resetLookupState(LookupState &LS, InProgressLookupState *IPLS) {
+ return LS.reset(IPLS);
+ }
};
-} // end namespace orc
-} // end namespace llvm
+} // namespace orc
+} // namespace llvm
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionSession, LLVMOrcExecutionSessionRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(SymbolStringPool, LLVMOrcSymbolStringPoolRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcV2CAPIHelper::PoolEntry,
LLVMOrcSymbolStringPoolEntryRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MaterializationUnit,
+ LLVMOrcMaterializationUnitRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JITDylib, LLVMOrcJITDylibRef)
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JITDylib::DefinitionGenerator,
- LLVMOrcJITDylibDefinitionGeneratorRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ResourceTracker, LLVMOrcResourceTrackerRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DefinitionGenerator,
+ LLVMOrcDefinitionGeneratorRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(InProgressLookupState, LLVMOrcLookupStateRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ThreadSafeContext,
LLVMOrcThreadSafeContextRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JITTargetMachineBuilder,
LLVMOrcJITTargetMachineBuilderRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ObjectLayer, LLVMOrcObjectLayerRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJITBuilder, LLVMOrcLLJITBuilderRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJIT, LLVMOrcLLJITRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef)
+namespace llvm {
+namespace orc {
+
+class CAPIDefinitionGenerator final : public DefinitionGenerator {
+public:
+ CAPIDefinitionGenerator(
+ void *Ctx,
+ LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate)
+ : Ctx(Ctx), TryToGenerate(TryToGenerate) {}
+
+ Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &LookupSet) override {
+
+ // Take the lookup state.
+ LLVMOrcLookupStateRef LSR = ::wrap(OrcV2CAPIHelper::extractLookupState(LS));
+
+ // Translate the lookup kind.
+ LLVMOrcLookupKind CLookupKind;
+ switch (K) {
+ case LookupKind::Static:
+ CLookupKind = LLVMOrcLookupKindStatic;
+ break;
+ case LookupKind::DLSym:
+ CLookupKind = LLVMOrcLookupKindDLSym;
+ break;
+ }
+
+ // Translate the JITDylibSearchFlags.
+ LLVMOrcJITDylibLookupFlags CJDLookupFlags;
+ switch (JDLookupFlags) {
+ case JITDylibLookupFlags::MatchExportedSymbolsOnly:
+ CJDLookupFlags = LLVMOrcJITDylibLookupFlagsMatchExportedSymbolsOnly;
+ break;
+ case JITDylibLookupFlags::MatchAllSymbols:
+ CJDLookupFlags = LLVMOrcJITDylibLookupFlagsMatchAllSymbols;
+ break;
+ }
+
+ // Translate the lookup set.
+ std::vector<LLVMOrcCLookupSetElement> CLookupSet;
+ CLookupSet.reserve(LookupSet.size());
+ for (auto &KV : LookupSet) {
+ LLVMOrcSymbolLookupFlags SLF;
+ LLVMOrcSymbolStringPoolEntryRef Name =
+ ::wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first));
+ switch (KV.second) {
+ case SymbolLookupFlags::RequiredSymbol:
+ SLF = LLVMOrcSymbolLookupFlagsRequiredSymbol;
+ break;
+ case SymbolLookupFlags::WeaklyReferencedSymbol:
+ SLF = LLVMOrcSymbolLookupFlagsWeaklyReferencedSymbol;
+ break;
+ }
+ CLookupSet.push_back({Name, SLF});
+ }
+
+ // Run the C TryToGenerate function.
+ auto Err = unwrap(TryToGenerate(::wrap(this), Ctx, &LSR, CLookupKind,
+ ::wrap(&JD), CJDLookupFlags,
+ CLookupSet.data(), CLookupSet.size()));
+
+ // Restore the lookup state.
+ OrcV2CAPIHelper::resetLookupState(LS, ::unwrap(LSR));
+
+ return Err;
+ }
+
+private:
+ void *Ctx;
+ LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+void LLVMOrcExecutionSessionSetErrorReporter(
+ LLVMOrcExecutionSessionRef ES, LLVMOrcErrorReporterFunction ReportError,
+ void *Ctx) {
+ unwrap(ES)->setErrorReporter(
+ [=](Error Err) { ReportError(Ctx, wrap(std::move(Err))); });
+}
+
+LLVMOrcSymbolStringPoolRef
+LLVMOrcExecutionSessionGetSymbolStringPool(LLVMOrcExecutionSessionRef ES) {
+ return wrap(unwrap(ES)->getSymbolStringPool().get());
+}
+
+void LLVMOrcSymbolStringPoolClearDeadEntries(LLVMOrcSymbolStringPoolRef SSP) {
+ unwrap(SSP)->clearDeadEntries();
+}
+
LLVMOrcSymbolStringPoolEntryRef
LLVMOrcExecutionSessionIntern(LLVMOrcExecutionSessionRef ES, const char *Name) {
return wrap(
OrcV2CAPIHelper::releaseSymbolStringPtr(unwrap(ES)->intern(Name)));
}
+void LLVMOrcRetainSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S) {
+ OrcV2CAPIHelper::retainPoolEntry(unwrap(S));
+}
+
void LLVMOrcReleaseSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S) {
OrcV2CAPIHelper::releasePoolEntry(unwrap(S));
}
-void LLVMOrcDisposeJITDylibDefinitionGenerator(
- LLVMOrcJITDylibDefinitionGeneratorRef DG) {
- delete unwrap(DG);
+const char *LLVMOrcSymbolStringPoolEntryStr(LLVMOrcSymbolStringPoolEntryRef S) {
+ return unwrap(S)->getKey().data();
+}
+
+LLVMOrcResourceTrackerRef
+LLVMOrcJITDylibCreateResourceTracker(LLVMOrcJITDylibRef JD) {
+ auto RT = unwrap(JD)->createResourceTracker();
+ // Retain the pointer for the C API client.
+ RT->Retain();
+ return wrap(RT.get());
+}
+
+LLVMOrcResourceTrackerRef
+LLVMOrcJITDylibGetDefaultResourceTracker(LLVMOrcJITDylibRef JD) {
+ auto RT = unwrap(JD)->getDefaultResourceTracker();
+ // Retain the pointer for the C API client.
+ return wrap(RT.get());
+}
+
+void LLVMOrcReleaseResourceTracker(LLVMOrcResourceTrackerRef RT) {
+ ResourceTrackerSP TmpRT(unwrap(RT));
+ TmpRT->Release();
+}
+
+void LLVMOrcResourceTrackerTransferTo(LLVMOrcResourceTrackerRef SrcRT,
+ LLVMOrcResourceTrackerRef DstRT) {
+ ResourceTrackerSP TmpRT(unwrap(SrcRT));
+ TmpRT->transferTo(*unwrap(DstRT));
+}
+
+LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT) {
+ ResourceTrackerSP TmpRT(unwrap(RT));
+ return wrap(TmpRT->remove());
+}
+
+void LLVMOrcDisposeDefinitionGenerator(LLVMOrcDefinitionGeneratorRef DG) {
+ std::unique_ptr<DefinitionGenerator> TmpDG(unwrap(DG));
+}
+
+void LLVMOrcDisposeMaterializationUnit(LLVMOrcMaterializationUnitRef MU) {
+ std::unique_ptr<MaterializationUnit> TmpMU(unwrap(MU));
+}
+
+LLVMOrcMaterializationUnitRef
+LLVMOrcAbsoluteSymbols(LLVMOrcCSymbolMapPairs Syms, size_t NumPairs) {
+ SymbolMap SM;
+ for (size_t I = 0; I != NumPairs; ++I) {
+ JITSymbolFlags Flags;
+
+ if (Syms[I].Sym.Flags.GenericFlags & LLVMJITSymbolGenericFlagsExported)
+ Flags |= JITSymbolFlags::Exported;
+ if (Syms[I].Sym.Flags.GenericFlags & LLVMJITSymbolGenericFlagsWeak)
+ Flags |= JITSymbolFlags::Weak;
+
+ Flags.getTargetFlags() = Syms[I].Sym.Flags.TargetFlags;
+
+ SM[OrcV2CAPIHelper::retainSymbolStringPtr(unwrap(Syms[I].Name))] =
+ JITEvaluatedSymbol(Syms[I].Sym.Address, Flags);
+ }
+
+ return wrap(absoluteSymbols(std::move(SM)).release());
+}
+
+LLVMOrcJITDylibRef
+LLVMOrcExecutionSessionCreateBareJITDylib(LLVMOrcExecutionSessionRef ES,
+ const char *Name) {
+ return wrap(&unwrap(ES)->createBareJITDylib(Name));
+}
+
+LLVMErrorRef
+LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES,
+ LLVMOrcJITDylibRef *Result,
+ const char *Name) {
+ auto JD = unwrap(ES)->createJITDylib(Name);
+ if (!JD)
+ return wrap(JD.takeError());
+ *Result = wrap(&*JD);
+ return LLVMErrorSuccess;
+}
+
+LLVMOrcJITDylibRef
+LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES,
+ const char *Name) {
+ return wrap(unwrap(ES)->getJITDylibByName(Name));
+}
+
+LLVMErrorRef LLVMOrcJITDylibDefine(LLVMOrcJITDylibRef JD,
+ LLVMOrcMaterializationUnitRef MU) {
+ std::unique_ptr<MaterializationUnit> TmpMU(unwrap(MU));
+
+ if (auto Err = unwrap(JD)->define(TmpMU)) {
+ TmpMU.release();
+ return wrap(std::move(Err));
+ }
+ return LLVMErrorSuccess;
+}
+
+LLVMErrorRef LLVMOrcJITDylibClear(LLVMOrcJITDylibRef JD) {
+ return wrap(unwrap(JD)->clear());
}
void LLVMOrcJITDylibAddGenerator(LLVMOrcJITDylibRef JD,
- LLVMOrcJITDylibDefinitionGeneratorRef DG) {
- unwrap(JD)->addGenerator(
- std::unique_ptr<JITDylib::DefinitionGenerator>(unwrap(DG)));
+ LLVMOrcDefinitionGeneratorRef DG) {
+ unwrap(JD)->addGenerator(std::unique_ptr<DefinitionGenerator>(unwrap(DG)));
+}
+
+LLVMOrcDefinitionGeneratorRef LLVMOrcCreateCustomCAPIDefinitionGenerator(
+ LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx) {
+ auto DG = std::make_unique<CAPIDefinitionGenerator>(Ctx, F);
+ return wrap(DG.release());
}
LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
- LLVMOrcJITDylibDefinitionGeneratorRef *Result, char GlobalPrefix,
+ LLVMOrcDefinitionGeneratorRef *Result, char GlobalPrefix,
LLVMOrcSymbolPredicate Filter, void *FilterCtx) {
assert(Result && "Result can not be null");
assert((Filter || !FilterCtx) &&
@@ -89,7 +315,7 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
DynamicLibrarySearchGenerator::SymbolPredicate Pred;
if (Filter)
Pred = [=](const SymbolStringPtr &Name) -> bool {
- return Filter(wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(Name)), FilterCtx);
+ return Filter(FilterCtx, wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(Name)));
};
auto ProcessSymsGenerator =
@@ -143,7 +369,7 @@ LLVMErrorRef LLVMOrcJITTargetMachineBuilderDetectHost(
}
LLVMOrcJITTargetMachineBuilderRef
-LLVMOrcJITTargetMachineBuilderFromTargetMachine(LLVMTargetMachineRef TM) {
+LLVMOrcJITTargetMachineBuilderCreateFromTargetMachine(LLVMTargetMachineRef TM) {
auto *TemplateTM = unwrap(TM);
auto JTMB =
@@ -167,6 +393,10 @@ void LLVMOrcDisposeJITTargetMachineBuilder(
delete unwrap(JTMB);
}
+void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
+ delete unwrap(ObjLayer);
+}
+
LLVMOrcLLJITBuilderRef LLVMOrcCreateLLJITBuilder(void) {
return wrap(new LLJITBuilder());
}
@@ -180,6 +410,17 @@ void LLVMOrcLLJITBuilderSetJITTargetMachineBuilder(
unwrap(Builder)->setJITTargetMachineBuilder(*unwrap(JTMB));
}
+void LLVMOrcLLJITBuilderSetObjectLinkingLayerCreator(
+ LLVMOrcLLJITBuilderRef Builder,
+ LLVMOrcLLJITBuilderObjectLinkingLayerCreatorFunction F, void *Ctx) {
+ unwrap(Builder)->setObjectLinkingLayerCreator(
+ [=](ExecutionSession &ES, const Triple &TT) {
+ auto TTStr = TT.str();
+ return std::unique_ptr<ObjectLayer>(
+ unwrap(F(Ctx, wrap(&ES), TTStr.c_str())));
+ });
+}
+
LLVMErrorRef LLVMOrcCreateLLJIT(LLVMOrcLLJITRef *Result,
LLVMOrcLLJITBuilderRef Builder) {
assert(Result && "Result can not be null");
@@ -232,10 +473,27 @@ LLVMErrorRef LLVMOrcLLJITAddObjectFile(LLVMOrcLLJITRef J, LLVMOrcJITDylibRef JD,
*unwrap(JD), std::unique_ptr<MemoryBuffer>(unwrap(ObjBuffer))));
}
+LLVMErrorRef LLVMOrcLLJITAddObjectFileWithRT(LLVMOrcLLJITRef J,
+ LLVMOrcResourceTrackerRef RT,
+ LLVMMemoryBufferRef ObjBuffer) {
+ return wrap(unwrap(J)->addObjectFile(
+ ResourceTrackerSP(unwrap(RT)),
+ std::unique_ptr<MemoryBuffer>(unwrap(ObjBuffer))));
+}
+
LLVMErrorRef LLVMOrcLLJITAddLLVMIRModule(LLVMOrcLLJITRef J,
LLVMOrcJITDylibRef JD,
LLVMOrcThreadSafeModuleRef TSM) {
- return wrap(unwrap(J)->addIRModule(*unwrap(JD), std::move(*unwrap(TSM))));
+ std::unique_ptr<ThreadSafeModule> TmpTSM(unwrap(TSM));
+ return wrap(unwrap(J)->addIRModule(*unwrap(JD), std::move(*TmpTSM)));
+}
+
+LLVMErrorRef LLVMOrcLLJITAddLLVMIRModuleWithRT(LLVMOrcLLJITRef J,
+ LLVMOrcResourceTrackerRef RT,
+ LLVMOrcThreadSafeModuleRef TSM) {
+ std::unique_ptr<ThreadSafeModule> TmpTSM(unwrap(TSM));
+ return wrap(unwrap(J)->addIRModule(ResourceTrackerSP(unwrap(RT)),
+ std::move(*TmpTSM)));
}
LLVMErrorRef LLVMOrcLLJITLookup(LLVMOrcLLJITRef J,
@@ -252,3 +510,20 @@ LLVMErrorRef LLVMOrcLLJITLookup(LLVMOrcLLJITRef J,
*Result = Sym->getAddress();
return LLVMErrorSuccess;
}
+
+LLVMOrcObjectLayerRef
+LLVMOrcCreateRTDyldObjectLinkingLayerWithSectionMemoryManager(
+ LLVMOrcExecutionSessionRef ES) {
+ assert(ES && "ES must not be null");
+ return wrap(new RTDyldObjectLinkingLayer(
+ *unwrap(ES), [] { return std::make_unique<SectionMemoryManager>(); }));
+}
+
+void LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(
+ LLVMOrcObjectLayerRef RTDyldObjLinkingLayer,
+ LLVMJITEventListenerRef Listener) {
+ assert(RTDyldObjLinkingLayer && "RTDyldObjLinkingLayer must not be null");
+ assert(Listener && "Listener must not be null");
+ reinterpret_cast<RTDyldObjectLinkingLayer *>(unwrap(RTDyldObjLinkingLayer))
+ ->registerJITEventListener(*unwrap(Listener));
+}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 21925726072e..0ad666ebbebd 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -18,7 +18,7 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver {
public:
JITDylibSearchOrderResolver(MaterializationResponsibility &MR) : MR(MR) {}
- void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) {
+ void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) override {
auto &ES = MR.getTargetJITDylib().getExecutionSession();
SymbolLookupSet InternedSymbols;
@@ -55,7 +55,7 @@ public:
RegisterDependencies);
}
- Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) {
+ Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override {
LookupSet Result;
for (auto &KV : MR.getSymbols()) {
@@ -77,35 +77,26 @@ namespace orc {
RTDyldObjectLinkingLayer::RTDyldObjectLinkingLayer(
ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager)
- : ObjectLayer(ES), GetMemoryManager(GetMemoryManager) {}
+ : ObjectLayer(ES), GetMemoryManager(GetMemoryManager) {
+ ES.registerResourceManager(*this);
+}
RTDyldObjectLinkingLayer::~RTDyldObjectLinkingLayer() {
- std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
- for (auto &MemMgr : MemMgrs) {
- for (auto *L : EventListeners)
- L->notifyFreeingObject(
- static_cast<uint64_t>(reinterpret_cast<uintptr_t>(MemMgr.get())));
- MemMgr->deregisterEHFrames();
- }
+ assert(MemMgrs.empty() && "Layer destroyed with resources still attached");
}
-void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
- std::unique_ptr<MemoryBuffer> O) {
+void RTDyldObjectLinkingLayer::emit(
+ std::unique_ptr<MaterializationResponsibility> R,
+ std::unique_ptr<MemoryBuffer> O) {
assert(O && "Object must not be null");
- // This method launches an asynchronous link step that will fulfill our
- // materialization responsibility. We need to switch R to be heap
- // allocated before that happens so it can live as long as the asynchronous
- // link needs it to (i.e. it must be able to outlive this method).
- auto SharedR = std::make_shared<MaterializationResponsibility>(std::move(R));
-
auto &ES = getExecutionSession();
auto Obj = object::ObjectFile::createObjectFile(*O);
if (!Obj) {
getExecutionSession().reportError(Obj.takeError());
- SharedR->failMaterialization();
+ R->failMaterialization();
return;
}
@@ -121,7 +112,7 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
continue;
} else {
ES.reportError(SymType.takeError());
- R.failMaterialization();
+ R->failMaterialization();
return;
}
@@ -129,7 +120,7 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
if (!SymFlagsOrErr) {
// TODO: Test this error.
ES.reportError(SymFlagsOrErr.takeError());
- R.failMaterialization();
+ R->failMaterialization();
return;
}
@@ -139,46 +130,44 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
InternalSymbols->insert(*SymName);
else {
ES.reportError(SymName.takeError());
- R.failMaterialization();
+ R->failMaterialization();
return;
}
}
}
}
- auto K = R.getVModuleKey();
- RuntimeDyld::MemoryManager *MemMgr = nullptr;
+ auto MemMgr = GetMemoryManager();
+ auto &MemMgrRef = *MemMgr;
- // Create a record a memory manager for this object.
- {
- auto Tmp = GetMemoryManager();
- std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
- MemMgrs.push_back(std::move(Tmp));
- MemMgr = MemMgrs.back().get();
- }
+ // Switch to shared ownership of MR so that it can be captured by both
+ // lambdas below.
+ std::shared_ptr<MaterializationResponsibility> SharedR(std::move(R));
JITDylibSearchOrderResolver Resolver(*SharedR);
jitLinkForORC(
object::OwningBinary<object::ObjectFile>(std::move(*Obj), std::move(O)),
- *MemMgr, Resolver, ProcessAllSections,
- [this, K, SharedR, MemMgr, InternalSymbols](
+ MemMgrRef, Resolver, ProcessAllSections,
+ [this, SharedR, &MemMgrRef, InternalSymbols](
const object::ObjectFile &Obj,
- std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+ RuntimeDyld::LoadedObjectInfo &LoadedObjInfo,
std::map<StringRef, JITEvaluatedSymbol> ResolvedSymbols) {
- return onObjLoad(K, *SharedR, Obj, MemMgr, std::move(LoadedObjInfo),
+ return onObjLoad(*SharedR, Obj, MemMgrRef, LoadedObjInfo,
ResolvedSymbols, *InternalSymbols);
},
- [this, K, SharedR, MemMgr](object::OwningBinary<object::ObjectFile> Obj,
- Error Err) mutable {
- onObjEmit(K, *SharedR, std::move(Obj), MemMgr, std::move(Err));
+ [this, SharedR, MemMgr = std::move(MemMgr)](
+ object::OwningBinary<object::ObjectFile> Obj,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+ Error Err) mutable {
+ onObjEmit(*SharedR, std::move(Obj), std::move(MemMgr),
+ std::move(LoadedObjInfo), std::move(Err));
});
}
void RTDyldObjectLinkingLayer::registerJITEventListener(JITEventListener &L) {
std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
- assert(llvm::none_of(EventListeners,
- [&](JITEventListener *O) { return O == &L; }) &&
+ assert(!llvm::is_contained(EventListeners, &L) &&
"Listener has already been registered");
EventListeners.push_back(&L);
}
@@ -191,9 +180,9 @@ void RTDyldObjectLinkingLayer::unregisterJITEventListener(JITEventListener &L) {
}
Error RTDyldObjectLinkingLayer::onObjLoad(
- VModuleKey K, MaterializationResponsibility &R,
- const object::ObjectFile &Obj, RuntimeDyld::MemoryManager *MemMgr,
- std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+ MaterializationResponsibility &R, const object::ObjectFile &Obj,
+ RuntimeDyld::MemoryManager &MemMgr,
+ RuntimeDyld::LoadedObjectInfo &LoadedObjInfo,
std::map<StringRef, JITEvaluatedSymbol> Resolved,
std::set<StringRef> &InternalSymbols) {
SymbolFlagsMap ExtraSymbolsToClaim;
@@ -274,19 +263,16 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
}
if (NotifyLoaded)
- NotifyLoaded(K, Obj, *LoadedObjInfo);
-
- std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
- assert(!LoadedObjInfos.count(MemMgr) && "Duplicate loaded info for MemMgr");
- LoadedObjInfos[MemMgr] = std::move(LoadedObjInfo);
+ NotifyLoaded(R, Obj, LoadedObjInfo);
return Error::success();
}
void RTDyldObjectLinkingLayer::onObjEmit(
- VModuleKey K, MaterializationResponsibility &R,
+ MaterializationResponsibility &R,
object::OwningBinary<object::ObjectFile> O,
- RuntimeDyld::MemoryManager *MemMgr, Error Err) {
+ std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo, Error Err) {
if (Err) {
getExecutionSession().reportError(std::move(Err));
R.failMaterialization();
@@ -306,27 +292,60 @@ void RTDyldObjectLinkingLayer::onObjEmit(
// Run EventListener notifyLoaded callbacks.
{
std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
- auto LOIItr = LoadedObjInfos.find(MemMgr);
- assert(LOIItr != LoadedObjInfos.end() && "LoadedObjInfo missing");
for (auto *L : EventListeners)
- L->notifyObjectLoaded(
- static_cast<uint64_t>(reinterpret_cast<uintptr_t>(MemMgr)), *Obj,
- *LOIItr->second);
- LoadedObjInfos.erase(MemMgr);
+ L->notifyObjectLoaded(pointerToJITTargetAddress(MemMgr.get()), *Obj,
+ *LoadedObjInfo);
}
if (NotifyEmitted)
- NotifyEmitted(K, std::move(ObjBuffer));
+ NotifyEmitted(R, std::move(ObjBuffer));
+
+ if (auto Err = R.withResourceKeyDo(
+ [&](ResourceKey K) { MemMgrs[K].push_back(std::move(MemMgr)); })) {
+ getExecutionSession().reportError(std::move(Err));
+ R.failMaterialization();
+ }
}
-LegacyRTDyldObjectLinkingLayer::LegacyRTDyldObjectLinkingLayer(
- ExecutionSession &ES, ResourcesGetter GetResources,
- NotifyLoadedFtor NotifyLoaded, NotifyFinalizedFtor NotifyFinalized,
- NotifyFreedFtor NotifyFreed)
- : ES(ES), GetResources(std::move(GetResources)),
- NotifyLoaded(std::move(NotifyLoaded)),
- NotifyFinalized(std::move(NotifyFinalized)),
- NotifyFreed(std::move(NotifyFreed)), ProcessAllSections(false) {}
+Error RTDyldObjectLinkingLayer::handleRemoveResources(ResourceKey K) {
+
+ std::vector<MemoryManagerUP> MemMgrsToRemove;
+
+ getExecutionSession().runSessionLocked([&] {
+ auto I = MemMgrs.find(K);
+ if (I != MemMgrs.end()) {
+ std::swap(MemMgrsToRemove, I->second);
+ MemMgrs.erase(I);
+ }
+ });
+
+ {
+ std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
+ for (auto &MemMgr : MemMgrsToRemove) {
+ for (auto *L : EventListeners)
+ L->notifyFreeingObject(pointerToJITTargetAddress(MemMgr.get()));
+ MemMgr->deregisterEHFrames();
+ }
+ }
+
+ return Error::success();
+}
+
+void RTDyldObjectLinkingLayer::handleTransferResources(ResourceKey DstKey,
+ ResourceKey SrcKey) {
+ auto I = MemMgrs.find(SrcKey);
+ if (I != MemMgrs.end()) {
+ auto &SrcMemMgrs = I->second;
+ auto &DstMemMgrs = MemMgrs[DstKey];
+ DstMemMgrs.reserve(DstMemMgrs.size() + SrcMemMgrs.size());
+ for (auto &MemMgr : SrcMemMgrs)
+ DstMemMgrs.push_back(std::move(MemMgr));
+
+ // Erase SrcKey entry using value rather than iterator I: I may have been
+ // invalidated when we looked up DstKey.
+ MemMgrs.erase(SrcKey);
+ }
+}
} // End namespace orc.
} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/OrcError.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp
index cc99e154fbec..fdad90cbcfb7 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/OrcError.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
@@ -71,7 +71,7 @@ public:
};
static ManagedStatic<OrcErrorCategory> OrcErrCat;
-}
+} // namespace
namespace llvm {
namespace orc {
@@ -84,9 +84,8 @@ std::error_code orcError(OrcErrorCode ErrCode) {
return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat);
}
-
DuplicateDefinition::DuplicateDefinition(std::string SymbolName)
- : SymbolName(std::move(SymbolName)) {}
+ : SymbolName(std::move(SymbolName)) {}
std::error_code DuplicateDefinition::convertToErrorCode() const {
return orcError(OrcErrorCode::DuplicateDefinition);
@@ -101,7 +100,7 @@ const std::string &DuplicateDefinition::getSymbolName() const {
}
JITSymbolNotFound::JITSymbolNotFound(std::string SymbolName)
- : SymbolName(std::move(SymbolName)) {}
+ : SymbolName(std::move(SymbolName)) {}
std::error_code JITSymbolNotFound::convertToErrorCode() const {
typedef std::underlying_type<OrcErrorCode>::type UT;
@@ -117,5 +116,5 @@ const std::string &JITSymbolNotFound::getSymbolName() const {
return SymbolName;
}
-}
-}
+} // namespace orc
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/RPCError.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp
index 3cf78fd9f7ba..a55cb220f218 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/RPCError.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp
@@ -10,21 +10,21 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ExecutionEngine/Orc/RPC/RPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
-#include <system_error>
#include <string>
+#include <system_error>
-char llvm::orc::rpc::RPCFatalError::ID = 0;
-char llvm::orc::rpc::ConnectionClosed::ID = 0;
-char llvm::orc::rpc::ResponseAbandoned::ID = 0;
-char llvm::orc::rpc::CouldNotNegotiate::ID = 0;
+char llvm::orc::shared::RPCFatalError::ID = 0;
+char llvm::orc::shared::ConnectionClosed::ID = 0;
+char llvm::orc::shared::ResponseAbandoned::ID = 0;
+char llvm::orc::shared::CouldNotNegotiate::ID = 0;
namespace llvm {
namespace orc {
-namespace rpc {
+namespace shared {
std::error_code ConnectionClosed::convertToErrorCode() const {
return orcError(OrcErrorCode::RPCConnectionClosed);
@@ -53,7 +53,6 @@ void CouldNotNegotiate::log(raw_ostream &OS) const {
OS << "Could not negotiate RPC function " << Signature;
}
-
-} // end namespace rpc
+} // end namespace shared
} // end namespace orc
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.cpp
new file mode 100644
index 000000000000..52d11f0741d4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.cpp
@@ -0,0 +1,44 @@
+//===---------- TargetProcessControlTypes.cpp - Shared TPC types ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// TargetProcessControl types.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+
+namespace llvm {
+namespace orc {
+namespace tpctypes {
+
+WrapperFunctionResult WrapperFunctionResult::from(StringRef S) {
+ CWrapperFunctionResult R;
+ zeroInit(R);
+ R.Size = S.size();
+ if (R.Size > sizeof(uint64_t)) {
+ R.Data.ValuePtr = new uint8_t[R.Size];
+ memcpy(R.Data.ValuePtr, S.data(), R.Size);
+ R.Destroy = destroyWithDeleteArray;
+ } else
+ memcpy(R.Data.Value, S.data(), R.Size);
+ return R;
+}
+
+void WrapperFunctionResult::destroyWithFree(CWrapperFunctionResultData Data,
+ uint64_t Size) {
+ free(Data.ValuePtr);
+}
+
+void WrapperFunctionResult::destroyWithDeleteArray(
+ CWrapperFunctionResultData Data, uint64_t Size) {
+ delete[] Data.ValuePtr;
+}
+
+} // end namespace tpctypes
+} // end namespace orc
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp
index 7240c1ed0ce9..c2fa4466eab6 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp
@@ -106,11 +106,10 @@ BlockFreqQuery::ResultTy BlockFreqQuery::operator()(Function &F) {
assert(IBBs.size() == BBFreqs.size() && "BB Count Mismatch");
- llvm::sort(BBFreqs.begin(), BBFreqs.end(),
- [](decltype(BBFreqs)::const_reference BBF,
- decltype(BBFreqs)::const_reference BBS) {
- return BBF.second > BBS.second ? true : false;
- });
+ llvm::sort(BBFreqs, [](decltype(BBFreqs)::const_reference BBF,
+ decltype(BBFreqs)::const_reference BBS) {
+ return BBF.second > BBS.second ? true : false;
+ });
// ignoring number of direct calls in a BB
auto Topk = numBBToGet(BBFreqs.size());
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
index 0530b1a97b67..0b4755fe23cf 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
@@ -16,9 +16,6 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Verifier.h"
-#include "llvm/Support/Debug.h"
-
-#include <vector>
namespace llvm {
@@ -58,7 +55,7 @@ Error Speculator::addSpeculationRuntime(JITDylib &JD,
// If two modules, share the same LLVMContext, different threads must
// not access them concurrently without locking the associated LLVMContext
// this implementation follows this contract.
-void IRSpeculationLayer::emit(MaterializationResponsibility R,
+void IRSpeculationLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM) {
assert(TSM && "Speculation Layer received Null Module ?");
@@ -130,7 +127,7 @@ void IRSpeculationLayer::emit(MaterializationResponsibility R,
assert(Mutator.GetInsertBlock()->getParent() == &Fn &&
"IR builder association mismatch?");
S.registerSymbols(internToJITSymbols(IRNames.getValue()),
- &R.getTargetJITDylib());
+ &R->getTargetJITDylib());
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp
new file mode 100644
index 000000000000..bbf3ada1d4ba
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp
@@ -0,0 +1,70 @@
+//===---------------- TPCDynamicLibrarySearchGenerator.cpp ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h"
+
+namespace llvm {
+namespace orc {
+
+Expected<std::unique_ptr<TPCDynamicLibrarySearchGenerator>>
+TPCDynamicLibrarySearchGenerator::Load(TargetProcessControl &TPC,
+ const char *LibraryPath,
+ SymbolPredicate Allow) {
+ auto Handle = TPC.loadDylib(LibraryPath);
+ if (!Handle)
+ return Handle.takeError();
+
+ return std::make_unique<TPCDynamicLibrarySearchGenerator>(TPC, *Handle,
+ std::move(Allow));
+}
+
+Error TPCDynamicLibrarySearchGenerator::tryToGenerate(
+ LookupState &LS, LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &Symbols) {
+
+ if (Symbols.empty())
+ return Error::success();
+
+ SymbolLookupSet LookupSymbols;
+
+ for (auto &KV : Symbols) {
+ // Skip symbols that don't match the filter.
+ if (Allow && !Allow(KV.first))
+ continue;
+ LookupSymbols.add(KV.first, SymbolLookupFlags::WeaklyReferencedSymbol);
+ }
+
+ SymbolMap NewSymbols;
+
+ TargetProcessControl::LookupRequest Request(H, LookupSymbols);
+ auto Result = TPC.lookupSymbols(Request);
+ if (!Result)
+ return Result.takeError();
+
+ assert(Result->size() == 1 && "Results for more than one library returned");
+ assert(Result->front().size() == LookupSymbols.size() &&
+ "Result has incorrect number of elements");
+
+ auto ResultI = Result->front().begin();
+ for (auto &KV : LookupSymbols) {
+ if (*ResultI)
+ NewSymbols[KV.first] =
+ JITEvaluatedSymbol(*ResultI, JITSymbolFlags::Exported);
+ ++ResultI;
+ }
+
+ // If there were no resolved symbols bail out.
+ if (NewSymbols.empty())
+ return Error::success();
+
+ // Define resolved symbols.
+ return JD.define(absoluteSymbols(std::move(NewSymbols)));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCEHFrameRegistrar.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCEHFrameRegistrar.cpp
new file mode 100644
index 000000000000..4f901ce6d445
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCEHFrameRegistrar.cpp
@@ -0,0 +1,80 @@
+//===------ TPCEHFrameRegistrar.cpp - TPC-based eh-frame registration -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+
+namespace llvm {
+namespace orc {
+
+Expected<std::unique_ptr<TPCEHFrameRegistrar>>
+TPCEHFrameRegistrar::Create(TargetProcessControl &TPC) {
+ // FIXME: Proper mangling here -- we really need to decouple linker mangling
+ // from DataLayout.
+
+ // Find the addresses of the registration/deregistration functions in the
+ // target process.
+ auto ProcessHandle = TPC.loadDylib(nullptr);
+ if (!ProcessHandle)
+ return ProcessHandle.takeError();
+
+ std::string RegisterWrapperName, DeregisterWrapperName;
+ if (TPC.getTargetTriple().isOSBinFormatMachO()) {
+ RegisterWrapperName += '_';
+ DeregisterWrapperName += '_';
+ }
+ RegisterWrapperName += "llvm_orc_registerEHFrameSectionWrapper";
+ DeregisterWrapperName += "llvm_orc_deregisterEHFrameSectionWrapper";
+
+ SymbolLookupSet RegistrationSymbols;
+ RegistrationSymbols.add(TPC.intern(RegisterWrapperName));
+ RegistrationSymbols.add(TPC.intern(DeregisterWrapperName));
+
+ auto Result = TPC.lookupSymbols({{*ProcessHandle, RegistrationSymbols}});
+ if (!Result)
+ return Result.takeError();
+
+ assert(Result->size() == 1 && "Unexpected number of dylibs in result");
+ assert((*Result)[0].size() == 2 &&
+ "Unexpected number of addresses in result");
+
+ auto RegisterEHFrameWrapperFnAddr = (*Result)[0][0];
+ auto DeregisterEHFrameWrapperFnAddr = (*Result)[0][1];
+
+ return std::make_unique<TPCEHFrameRegistrar>(
+ TPC, RegisterEHFrameWrapperFnAddr, DeregisterEHFrameWrapperFnAddr);
+}
+
+Error TPCEHFrameRegistrar::registerEHFrames(JITTargetAddress EHFrameSectionAddr,
+ size_t EHFrameSectionSize) {
+ constexpr size_t ArgBufferSize = sizeof(uint64_t) + sizeof(uint64_t);
+ uint8_t ArgBuffer[ArgBufferSize];
+ BinaryStreamWriter ArgWriter(
+ MutableArrayRef<uint8_t>(ArgBuffer, ArgBufferSize),
+ support::endianness::big);
+ cantFail(ArgWriter.writeInteger(static_cast<uint64_t>(EHFrameSectionAddr)));
+ cantFail(ArgWriter.writeInteger(static_cast<uint64_t>(EHFrameSectionSize)));
+
+ return TPC.runWrapper(RegisterEHFrameWrapperFnAddr, ArgBuffer).takeError();
+}
+
+Error TPCEHFrameRegistrar::deregisterEHFrames(
+ JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) {
+ constexpr size_t ArgBufferSize = sizeof(uint64_t) + sizeof(uint64_t);
+ uint8_t ArgBuffer[ArgBufferSize];
+ BinaryStreamWriter ArgWriter(
+ MutableArrayRef<uint8_t>(ArgBuffer, ArgBufferSize),
+ support::endianness::big);
+ cantFail(ArgWriter.writeInteger(static_cast<uint64_t>(EHFrameSectionAddr)));
+ cantFail(ArgWriter.writeInteger(static_cast<uint64_t>(EHFrameSectionSize)));
+
+ return TPC.runWrapper(DeregisterEHFrameWrapperFnAddr, ArgBuffer).takeError();
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
new file mode 100644
index 000000000000..7989ec41952d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
@@ -0,0 +1,423 @@
+//===------ TargetProcessControl.cpp -- Target process control APIs -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h"
+
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <future>
+
+using namespace llvm;
+using namespace llvm::orc;
+
+namespace llvm {
+namespace orc {
+
+class TPCIndirectionUtilsAccess {
+public:
+ using IndirectStubInfo = TPCIndirectionUtils::IndirectStubInfo;
+ using IndirectStubInfoVector = TPCIndirectionUtils::IndirectStubInfoVector;
+
+ static Expected<IndirectStubInfoVector>
+ getIndirectStubs(TPCIndirectionUtils &TPCIU, unsigned NumStubs) {
+ return TPCIU.getIndirectStubs(NumStubs);
+ };
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+namespace {
+
+class TPCTrampolinePool : public TrampolinePool {
+public:
+ TPCTrampolinePool(TPCIndirectionUtils &TPCIU);
+ Error deallocatePool();
+
+protected:
+ Error grow() override;
+
+ using Allocation = jitlink::JITLinkMemoryManager::Allocation;
+
+ TPCIndirectionUtils &TPCIU;
+ unsigned TrampolineSize = 0;
+ unsigned TrampolinesPerPage = 0;
+ std::vector<std::unique_ptr<Allocation>> TrampolineBlocks;
+};
+
+class TPCIndirectStubsManager : public IndirectStubsManager,
+ private TPCIndirectionUtilsAccess {
+public:
+ TPCIndirectStubsManager(TPCIndirectionUtils &TPCIU) : TPCIU(TPCIU) {}
+
+ Error deallocateStubs();
+
+ Error createStub(StringRef StubName, JITTargetAddress StubAddr,
+ JITSymbolFlags StubFlags) override;
+
+ Error createStubs(const StubInitsMap &StubInits) override;
+
+ JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override;
+
+ JITEvaluatedSymbol findPointer(StringRef Name) override;
+
+ Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override;
+
+private:
+ using StubInfo = std::pair<IndirectStubInfo, JITSymbolFlags>;
+
+ std::mutex ISMMutex;
+ TPCIndirectionUtils &TPCIU;
+ StringMap<StubInfo> StubInfos;
+};
+
+TPCTrampolinePool::TPCTrampolinePool(TPCIndirectionUtils &TPCIU)
+ : TPCIU(TPCIU) {
+ auto &TPC = TPCIU.getTargetProcessControl();
+ auto &ABI = TPCIU.getABISupport();
+
+ TrampolineSize = ABI.getTrampolineSize();
+ TrampolinesPerPage =
+ (TPC.getPageSize() - ABI.getPointerSize()) / TrampolineSize;
+}
+
+Error TPCTrampolinePool::deallocatePool() {
+ Error Err = Error::success();
+ for (auto &Alloc : TrampolineBlocks)
+ Err = joinErrors(std::move(Err), Alloc->deallocate());
+ return Err;
+}
+
+Error TPCTrampolinePool::grow() {
+ assert(AvailableTrampolines.empty() &&
+ "Grow called with trampolines still available");
+
+ auto ResolverAddress = TPCIU.getResolverBlockAddress();
+ assert(ResolverAddress && "Resolver address can not be null");
+
+ auto &TPC = TPCIU.getTargetProcessControl();
+ constexpr auto TrampolinePagePermissions =
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC);
+ auto PageSize = TPC.getPageSize();
+ jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
+ Request[TrampolinePagePermissions] = {PageSize, static_cast<size_t>(PageSize),
+ 0};
+ auto Alloc = TPC.getMemMgr().allocate(nullptr, Request);
+
+ if (!Alloc)
+ return Alloc.takeError();
+
+ unsigned NumTrampolines = TrampolinesPerPage;
+
+ auto WorkingMemory = (*Alloc)->getWorkingMemory(TrampolinePagePermissions);
+ auto TargetAddress = (*Alloc)->getTargetMemory(TrampolinePagePermissions);
+
+ TPCIU.getABISupport().writeTrampolines(WorkingMemory.data(), TargetAddress,
+ ResolverAddress, NumTrampolines);
+
+ auto TargetAddr = (*Alloc)->getTargetMemory(TrampolinePagePermissions);
+ for (unsigned I = 0; I < NumTrampolines; ++I)
+ AvailableTrampolines.push_back(TargetAddr + (I * TrampolineSize));
+
+ if (auto Err = (*Alloc)->finalize())
+ return Err;
+
+ TrampolineBlocks.push_back(std::move(*Alloc));
+
+ return Error::success();
+}
+
+Error TPCIndirectStubsManager::createStub(StringRef StubName,
+ JITTargetAddress StubAddr,
+ JITSymbolFlags StubFlags) {
+ StubInitsMap SIM;
+ SIM[StubName] = std::make_pair(StubAddr, StubFlags);
+ return createStubs(SIM);
+}
+
+Error TPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) {
+ auto AvailableStubInfos = getIndirectStubs(TPCIU, StubInits.size());
+ if (!AvailableStubInfos)
+ return AvailableStubInfos.takeError();
+
+ {
+ std::lock_guard<std::mutex> Lock(ISMMutex);
+ unsigned ASIdx = 0;
+ for (auto &SI : StubInits) {
+ auto &A = (*AvailableStubInfos)[ASIdx++];
+ StubInfos[SI.first()] = std::make_pair(A, SI.second.second);
+ }
+ }
+
+ auto &MemAccess = TPCIU.getTargetProcessControl().getMemoryAccess();
+ switch (TPCIU.getABISupport().getPointerSize()) {
+ case 4: {
+ unsigned ASIdx = 0;
+ std::vector<tpctypes::UInt32Write> PtrUpdates;
+ for (auto &SI : StubInits)
+ PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
+ static_cast<uint32_t>(SI.second.first)});
+ return MemAccess.writeUInt32s(PtrUpdates);
+ }
+ case 8: {
+ unsigned ASIdx = 0;
+ std::vector<tpctypes::UInt64Write> PtrUpdates;
+ for (auto &SI : StubInits)
+ PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
+ static_cast<uint64_t>(SI.second.first)});
+ return MemAccess.writeUInt64s(PtrUpdates);
+ }
+ default:
+ return make_error<StringError>("Unsupported pointer size",
+ inconvertibleErrorCode());
+ }
+}
+
+JITEvaluatedSymbol TPCIndirectStubsManager::findStub(StringRef Name,
+ bool ExportedStubsOnly) {
+ std::lock_guard<std::mutex> Lock(ISMMutex);
+ auto I = StubInfos.find(Name);
+ if (I == StubInfos.end())
+ return nullptr;
+ return {I->second.first.StubAddress, I->second.second};
+}
+
+JITEvaluatedSymbol TPCIndirectStubsManager::findPointer(StringRef Name) {
+ std::lock_guard<std::mutex> Lock(ISMMutex);
+ auto I = StubInfos.find(Name);
+ if (I == StubInfos.end())
+ return nullptr;
+ return {I->second.first.PointerAddress, I->second.second};
+}
+
+Error TPCIndirectStubsManager::updatePointer(StringRef Name,
+ JITTargetAddress NewAddr) {
+
+ JITTargetAddress PtrAddr = 0;
+ {
+ std::lock_guard<std::mutex> Lock(ISMMutex);
+ auto I = StubInfos.find(Name);
+ if (I == StubInfos.end())
+ return make_error<StringError>("Unknown stub name",
+ inconvertibleErrorCode());
+ PtrAddr = I->second.first.PointerAddress;
+ }
+
+ auto &MemAccess = TPCIU.getTargetProcessControl().getMemoryAccess();
+ switch (TPCIU.getABISupport().getPointerSize()) {
+ case 4: {
+ tpctypes::UInt32Write PUpdate(PtrAddr, NewAddr);
+ return MemAccess.writeUInt32s(PUpdate);
+ }
+ case 8: {
+ tpctypes::UInt64Write PUpdate(PtrAddr, NewAddr);
+ return MemAccess.writeUInt64s(PUpdate);
+ }
+ default:
+ return make_error<StringError>("Unsupported pointer size",
+ inconvertibleErrorCode());
+ }
+}
+
+} // end anonymous namespace.
+
+namespace llvm {
+namespace orc {
+
+TPCIndirectionUtils::ABISupport::~ABISupport() {}
+
+Expected<std::unique_ptr<TPCIndirectionUtils>>
+TPCIndirectionUtils::Create(TargetProcessControl &TPC) {
+ const auto &TT = TPC.getTargetTriple();
+ switch (TT.getArch()) {
+ default:
+ return make_error<StringError>(
+ std::string("No TPCIndirectionUtils available for ") + TT.str(),
+ inconvertibleErrorCode());
+ case Triple::aarch64:
+ case Triple::aarch64_32:
+ return CreateWithABI<OrcAArch64>(TPC);
+
+ case Triple::x86:
+ return CreateWithABI<OrcI386>(TPC);
+
+ case Triple::mips:
+ return CreateWithABI<OrcMips32Be>(TPC);
+
+ case Triple::mipsel:
+ return CreateWithABI<OrcMips32Le>(TPC);
+
+ case Triple::mips64:
+ case Triple::mips64el:
+ return CreateWithABI<OrcMips64>(TPC);
+
+ case Triple::x86_64:
+ if (TT.getOS() == Triple::OSType::Win32)
+ return CreateWithABI<OrcX86_64_Win32>(TPC);
+ else
+ return CreateWithABI<OrcX86_64_SysV>(TPC);
+ }
+}
+
+Error TPCIndirectionUtils::cleanup() {
+ Error Err = Error::success();
+
+ for (auto &A : IndirectStubAllocs)
+ Err = joinErrors(std::move(Err), A->deallocate());
+
+ if (TP)
+ Err = joinErrors(std::move(Err),
+ static_cast<TPCTrampolinePool &>(*TP).deallocatePool());
+
+ if (ResolverBlock)
+ Err = joinErrors(std::move(Err), ResolverBlock->deallocate());
+
+ return Err;
+}
+
+Expected<JITTargetAddress>
+TPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
+ JITTargetAddress ReentryCtxAddr) {
+ assert(ABI && "ABI can not be null");
+ constexpr auto ResolverBlockPermissions =
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC);
+ auto ResolverSize = ABI->getResolverCodeSize();
+
+ jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
+ Request[ResolverBlockPermissions] = {TPC.getPageSize(),
+ static_cast<size_t>(ResolverSize), 0};
+ auto Alloc = TPC.getMemMgr().allocate(nullptr, Request);
+ if (!Alloc)
+ return Alloc.takeError();
+
+ auto WorkingMemory = (*Alloc)->getWorkingMemory(ResolverBlockPermissions);
+ ResolverBlockAddr = (*Alloc)->getTargetMemory(ResolverBlockPermissions);
+ ABI->writeResolverCode(WorkingMemory.data(), ResolverBlockAddr, ReentryFnAddr,
+ ReentryCtxAddr);
+
+ if (auto Err = (*Alloc)->finalize())
+ return std::move(Err);
+
+ ResolverBlock = std::move(*Alloc);
+ return ResolverBlockAddr;
+}
+
+std::unique_ptr<IndirectStubsManager>
+TPCIndirectionUtils::createIndirectStubsManager() {
+ return std::make_unique<TPCIndirectStubsManager>(*this);
+}
+
+TrampolinePool &TPCIndirectionUtils::getTrampolinePool() {
+ if (!TP)
+ TP = std::make_unique<TPCTrampolinePool>(*this);
+ return *TP;
+}
+
+LazyCallThroughManager &TPCIndirectionUtils::createLazyCallThroughManager(
+ ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr) {
+ assert(!LCTM &&
+ "createLazyCallThroughManager can not have been called before");
+ LCTM = std::make_unique<LazyCallThroughManager>(ES, ErrorHandlerAddr,
+ &getTrampolinePool());
+ return *LCTM;
+}
+
+TPCIndirectionUtils::TPCIndirectionUtils(TargetProcessControl &TPC,
+ std::unique_ptr<ABISupport> ABI)
+ : TPC(TPC), ABI(std::move(ABI)) {
+ assert(this->ABI && "ABI can not be null");
+
+ assert(TPC.getPageSize() > getABISupport().getStubSize() &&
+ "Stubs larger than one page are not supported");
+}
+
+Expected<TPCIndirectionUtils::IndirectStubInfoVector>
+TPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
+
+ std::lock_guard<std::mutex> Lock(TPCUIMutex);
+
+ // If there aren't enough stubs available then allocate some more.
+ if (NumStubs > AvailableIndirectStubs.size()) {
+ auto NumStubsToAllocate = NumStubs;
+ auto PageSize = TPC.getPageSize();
+ auto StubBytes = alignTo(NumStubsToAllocate * ABI->getStubSize(), PageSize);
+ NumStubsToAllocate = StubBytes / ABI->getStubSize();
+ auto PointerBytes =
+ alignTo(NumStubsToAllocate * ABI->getPointerSize(), PageSize);
+
+ constexpr auto StubPagePermissions =
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC);
+ constexpr auto PointerPagePermissions =
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+
+ jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
+ Request[StubPagePermissions] = {PageSize, static_cast<size_t>(StubBytes),
+ 0};
+ Request[PointerPagePermissions] = {PageSize, 0, PointerBytes};
+ auto Alloc = TPC.getMemMgr().allocate(nullptr, Request);
+ if (!Alloc)
+ return Alloc.takeError();
+
+ auto StubTargetAddr = (*Alloc)->getTargetMemory(StubPagePermissions);
+ auto PointerTargetAddr = (*Alloc)->getTargetMemory(PointerPagePermissions);
+
+ ABI->writeIndirectStubsBlock(
+ (*Alloc)->getWorkingMemory(StubPagePermissions).data(), StubTargetAddr,
+ PointerTargetAddr, NumStubsToAllocate);
+
+ if (auto Err = (*Alloc)->finalize())
+ return std::move(Err);
+
+ for (unsigned I = 0; I != NumStubsToAllocate; ++I) {
+ AvailableIndirectStubs.push_back(
+ IndirectStubInfo(StubTargetAddr, PointerTargetAddr));
+ StubTargetAddr += ABI->getStubSize();
+ PointerTargetAddr += ABI->getPointerSize();
+ }
+
+ IndirectStubAllocs.push_back(std::move(*Alloc));
+ }
+
+ assert(NumStubs <= AvailableIndirectStubs.size() &&
+ "Sufficient stubs should have been allocated above");
+
+ IndirectStubInfoVector Result;
+ while (NumStubs--) {
+ Result.push_back(AvailableIndirectStubs.back());
+ AvailableIndirectStubs.pop_back();
+ }
+
+ return std::move(Result);
+}
+
+static JITTargetAddress reentry(JITTargetAddress LCTMAddr,
+ JITTargetAddress TrampolineAddr) {
+ auto &LCTM = *jitTargetAddressToPointer<LazyCallThroughManager *>(LCTMAddr);
+ std::promise<JITTargetAddress> LandingAddrP;
+ auto LandingAddrF = LandingAddrP.get_future();
+ LCTM.resolveTrampolineLandingAddress(
+ TrampolineAddr,
+ [&](JITTargetAddress Addr) { LandingAddrP.set_value(Addr); });
+ return LandingAddrF.get();
+}
+
+Error setUpInProcessLCTMReentryViaTPCIU(TPCIndirectionUtils &TPCIU) {
+ auto &LCTM = TPCIU.getLazyCallThroughManager();
+ return TPCIU
+ .writeResolverBlock(pointerToJITTargetAddress(&reentry),
+ pointerToJITTargetAddress(&LCTM))
+ .takeError();
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp
new file mode 100644
index 000000000000..aff7296cb6e3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp
@@ -0,0 +1,208 @@
+//===--------- RegisterEHFrames.cpp - Register EH frame sections ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::orc::tpctypes;
+
+namespace llvm {
+namespace orc {
+
+#if defined(HAVE_REGISTER_FRAME) && defined(HAVE_DEREGISTER_FRAME) && \
+ !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+
+extern "C" void __register_frame(const void *);
+extern "C" void __deregister_frame(const void *);
+
+Error registerFrameWrapper(const void *P) {
+ __register_frame(P);
+ return Error::success();
+}
+
+Error deregisterFrameWrapper(const void *P) {
+ __deregister_frame(P);
+ return Error::success();
+}
+
+#else
+
+// The building compiler does not have __(de)register_frame but
+// it may be found at runtime in a dynamically-loaded library.
+// For example, this happens when building LLVM with Visual C++
+// but using the MingW runtime.
+static Error registerFrameWrapper(const void *P) {
+ static void((*RegisterFrame)(const void *)) = 0;
+
+ if (!RegisterFrame)
+ *(void **)&RegisterFrame =
+ llvm::sys::DynamicLibrary::SearchForAddressOfSymbol("__register_frame");
+
+ if (RegisterFrame) {
+ RegisterFrame(P);
+ return Error::success();
+ }
+
+ return make_error<StringError>("could not register eh-frame: "
+ "__register_frame function not found",
+ inconvertibleErrorCode());
+}
+
+static Error deregisterFrameWrapper(const void *P) {
+ static void((*DeregisterFrame)(const void *)) = 0;
+
+ if (!DeregisterFrame)
+ *(void **)&DeregisterFrame =
+ llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(
+ "__deregister_frame");
+
+ if (DeregisterFrame) {
+ DeregisterFrame(P);
+ return Error::success();
+ }
+
+ return make_error<StringError>("could not deregister eh-frame: "
+ "__deregister_frame function not found",
+ inconvertibleErrorCode());
+}
+#endif
+
+#ifdef __APPLE__
+
+template <typename HandleFDEFn>
+Error walkAppleEHFrameSection(const char *const SectionStart,
+ size_t SectionSize, HandleFDEFn HandleFDE) {
+ const char *CurCFIRecord = SectionStart;
+ const char *End = SectionStart + SectionSize;
+ uint64_t Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
+
+ while (CurCFIRecord != End && Size != 0) {
+ const char *OffsetField = CurCFIRecord + (Size == 0xffffffff ? 12 : 4);
+ if (Size == 0xffffffff)
+ Size = *reinterpret_cast<const uint64_t *>(CurCFIRecord + 4) + 12;
+ else
+ Size += 4;
+ uint32_t Offset = *reinterpret_cast<const uint32_t *>(OffsetField);
+
+ LLVM_DEBUG({
+ dbgs() << "Registering eh-frame section:\n";
+ dbgs() << "Processing " << (Offset ? "FDE" : "CIE") << " @"
+ << (void *)CurCFIRecord << ": [";
+ for (unsigned I = 0; I < Size; ++I)
+ dbgs() << format(" 0x%02" PRIx8, *(CurCFIRecord + I));
+ dbgs() << " ]\n";
+ });
+
+ if (Offset != 0)
+ if (auto Err = HandleFDE(CurCFIRecord))
+ return Err;
+
+ CurCFIRecord += Size;
+
+ Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
+ }
+
+ return Error::success();
+}
+
+#endif // __APPLE__
+
+Error registerEHFrameSection(const void *EHFrameSectionAddr,
+ size_t EHFrameSectionSize) {
+#ifdef __APPLE__
+ // On Darwin __register_frame has to be called for each FDE entry.
+ return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
+ EHFrameSectionSize, registerFrameWrapper);
+#else
+ // On Linux __register_frame takes a single argument:
+ // a pointer to the start of the .eh_frame section.
+
+ // How can it find the end? Because crtendS.o is linked
+ // in and it has an .eh_frame section with four zero chars.
+ return registerFrameWrapper(EHFrameSectionAddr);
+#endif
+}
+
+Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
+ size_t EHFrameSectionSize) {
+#ifdef __APPLE__
+ return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
+ EHFrameSectionSize, deregisterFrameWrapper);
+#else
+ return deregisterFrameWrapper(EHFrameSectionAddr);
+#endif
+}
+
+} // end namespace orc
+} // end namespace llvm
+
+extern "C" CWrapperFunctionResult
+llvm_orc_registerEHFrameSectionWrapper(uint8_t *Data, uint64_t Size) {
+ if (Size != sizeof(uint64_t) + sizeof(uint64_t))
+ return WrapperFunctionResult::from(
+ "Invalid arguments to llvm_orc_registerEHFrameSectionWrapper")
+ .release();
+
+ uint64_t EHFrameSectionAddr;
+ uint64_t EHFrameSectionSize;
+
+ {
+ BinaryStreamReader ArgReader(ArrayRef<uint8_t>(Data, Size),
+ support::endianness::big);
+ cantFail(ArgReader.readInteger(EHFrameSectionAddr));
+ cantFail(ArgReader.readInteger(EHFrameSectionSize));
+ }
+
+ if (auto Err = registerEHFrameSection(
+ jitTargetAddressToPointer<void *>(EHFrameSectionAddr),
+ EHFrameSectionSize)) {
+ auto ErrMsg = toString(std::move(Err));
+ return WrapperFunctionResult::from(ErrMsg).release();
+ }
+ return WrapperFunctionResult().release();
+}
+
+extern "C" CWrapperFunctionResult
+llvm_orc_deregisterEHFrameSectionWrapper(uint8_t *Data, uint64_t Size) {
+ if (Size != sizeof(uint64_t) + sizeof(uint64_t))
+ return WrapperFunctionResult::from(
+ "Invalid arguments to llvm_orc_registerEHFrameSectionWrapper")
+ .release();
+
+ uint64_t EHFrameSectionAddr;
+ uint64_t EHFrameSectionSize;
+
+ {
+ BinaryStreamReader ArgReader(ArrayRef<uint8_t>(Data, Size),
+ support::endianness::big);
+ cantFail(ArgReader.readInteger(EHFrameSectionAddr));
+ cantFail(ArgReader.readInteger(EHFrameSectionSize));
+ }
+
+ if (auto Err = deregisterEHFrameSection(
+ jitTargetAddressToPointer<void *>(EHFrameSectionAddr),
+ EHFrameSectionSize)) {
+ auto ErrMsg = toString(std::move(Err));
+ return WrapperFunctionResult::from(ErrMsg).release();
+ }
+ return WrapperFunctionResult().release();
+}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.cpp
new file mode 100644
index 000000000000..a8e6c049cf4b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.cpp
@@ -0,0 +1,43 @@
+//===--- TargetExecutionUtils.cpp - Execution utils for target processes --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
+
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+int runAsMain(int (*Main)(int, char *[]), ArrayRef<std::string> Args,
+ Optional<StringRef> ProgramName) {
+ std::vector<std::unique_ptr<char[]>> ArgVStorage;
+ std::vector<char *> ArgV;
+
+ ArgVStorage.reserve(Args.size() + (ProgramName ? 1 : 0));
+ ArgV.reserve(Args.size() + 1 + (ProgramName ? 1 : 0));
+
+ if (ProgramName) {
+ ArgVStorage.push_back(std::make_unique<char[]>(ProgramName->size() + 1));
+ llvm::copy(*ProgramName, &ArgVStorage.back()[0]);
+ ArgVStorage.back()[ProgramName->size()] = '\0';
+ ArgV.push_back(ArgVStorage.back().get());
+ }
+
+ for (const auto &Arg : Args) {
+ ArgVStorage.push_back(std::make_unique<char[]>(Arg.size() + 1));
+ llvm::copy(Arg, &ArgVStorage.back()[0]);
+ ArgVStorage.back()[Arg.size()] = '\0';
+ ArgV.push_back(ArgVStorage.back().get());
+ }
+ ArgV.push_back(nullptr);
+
+ return Main(Args.size() + !!ProgramName, ArgV.data());
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp
new file mode 100644
index 000000000000..7bf874e88c26
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp
@@ -0,0 +1,153 @@
+//===------ TargetProcessControl.cpp -- Target process control APIs -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Process.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+TargetProcessControl::MemoryAccess::~MemoryAccess() {}
+
+TargetProcessControl::~TargetProcessControl() {}
+
+SelfTargetProcessControl::SelfTargetProcessControl(
+ std::shared_ptr<SymbolStringPool> SSP, Triple TargetTriple,
+ unsigned PageSize, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr)
+ : TargetProcessControl(std::move(SSP)) {
+
+ OwnedMemMgr = std::move(MemMgr);
+ if (!OwnedMemMgr)
+ OwnedMemMgr = std::make_unique<jitlink::InProcessMemoryManager>();
+
+ this->TargetTriple = std::move(TargetTriple);
+ this->PageSize = PageSize;
+ this->MemMgr = OwnedMemMgr.get();
+ this->MemAccess = this;
+ if (this->TargetTriple.isOSBinFormatMachO())
+ GlobalManglingPrefix = '_';
+}
+
+Expected<std::unique_ptr<SelfTargetProcessControl>>
+SelfTargetProcessControl::Create(
+ std::shared_ptr<SymbolStringPool> SSP,
+ std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr) {
+ auto PageSize = sys::Process::getPageSize();
+ if (!PageSize)
+ return PageSize.takeError();
+
+ Triple TT(sys::getProcessTriple());
+
+ return std::make_unique<SelfTargetProcessControl>(
+ std::move(SSP), std::move(TT), *PageSize, std::move(MemMgr));
+}
+
+Expected<tpctypes::DylibHandle>
+SelfTargetProcessControl::loadDylib(const char *DylibPath) {
+ std::string ErrMsg;
+ auto Dylib = std::make_unique<sys::DynamicLibrary>(
+ sys::DynamicLibrary::getPermanentLibrary(DylibPath, &ErrMsg));
+ if (!Dylib->isValid())
+ return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+ DynamicLibraries.push_back(std::move(Dylib));
+ return pointerToJITTargetAddress(DynamicLibraries.back().get());
+}
+
+Expected<std::vector<tpctypes::LookupResult>>
+SelfTargetProcessControl::lookupSymbols(ArrayRef<LookupRequest> Request) {
+ std::vector<tpctypes::LookupResult> R;
+
+ for (auto &Elem : Request) {
+ auto *Dylib = jitTargetAddressToPointer<sys::DynamicLibrary *>(Elem.Handle);
+ assert(llvm::any_of(DynamicLibraries,
+ [=](const std::unique_ptr<sys::DynamicLibrary> &DL) {
+ return DL.get() == Dylib;
+ }) &&
+ "Invalid handle");
+
+ R.push_back(std::vector<JITTargetAddress>());
+ for (auto &KV : Elem.Symbols) {
+ auto &Sym = KV.first;
+ std::string Tmp((*Sym).data() + !!GlobalManglingPrefix,
+ (*Sym).size() - !!GlobalManglingPrefix);
+ void *Addr = Dylib->getAddressOfSymbol(Tmp.c_str());
+ if (!Addr && KV.second == SymbolLookupFlags::RequiredSymbol) {
+ // FIXME: Collect all failing symbols before erroring out.
+ SymbolNameVector MissingSymbols;
+ MissingSymbols.push_back(Sym);
+ return make_error<SymbolsNotFound>(std::move(MissingSymbols));
+ }
+ R.back().push_back(pointerToJITTargetAddress(Addr));
+ }
+ }
+
+ return R;
+}
+
+Expected<int32_t>
+SelfTargetProcessControl::runAsMain(JITTargetAddress MainFnAddr,
+ ArrayRef<std::string> Args) {
+ using MainTy = int (*)(int, char *[]);
+ return orc::runAsMain(jitTargetAddressToFunction<MainTy>(MainFnAddr), Args);
+}
+
+Expected<tpctypes::WrapperFunctionResult>
+SelfTargetProcessControl::runWrapper(JITTargetAddress WrapperFnAddr,
+ ArrayRef<uint8_t> ArgBuffer) {
+ using WrapperFnTy =
+ tpctypes::CWrapperFunctionResult (*)(const uint8_t *Data, uint64_t Size);
+ auto *WrapperFn = jitTargetAddressToFunction<WrapperFnTy>(WrapperFnAddr);
+ return WrapperFn(ArgBuffer.data(), ArgBuffer.size());
+}
+
+Error SelfTargetProcessControl::disconnect() { return Error::success(); }
+
+void SelfTargetProcessControl::writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) {
+ for (auto &W : Ws)
+ *jitTargetAddressToPointer<uint8_t *>(W.Address) = W.Value;
+ OnWriteComplete(Error::success());
+}
+
+void SelfTargetProcessControl::writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) {
+ for (auto &W : Ws)
+ *jitTargetAddressToPointer<uint16_t *>(W.Address) = W.Value;
+ OnWriteComplete(Error::success());
+}
+
+void SelfTargetProcessControl::writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) {
+ for (auto &W : Ws)
+ *jitTargetAddressToPointer<uint32_t *>(W.Address) = W.Value;
+ OnWriteComplete(Error::success());
+}
+
+void SelfTargetProcessControl::writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) {
+ for (auto &W : Ws)
+ *jitTargetAddressToPointer<uint64_t *>(W.Address) = W.Value;
+ OnWriteComplete(Error::success());
+}
+
+void SelfTargetProcessControl::writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) {
+ for (auto &W : Ws)
+ memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(),
+ W.Buffer.size());
+ OnWriteComplete(Error::success());
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
index 1f4e6f132115..2e128dd23744 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
@@ -15,7 +15,7 @@
namespace llvm {
namespace orc {
-ThreadSafeModule cloneToNewContext(ThreadSafeModule &TSM,
+ThreadSafeModule cloneToNewContext(const ThreadSafeModule &TSM,
GVPredicate ShouldCloneDef,
GVModifier UpdateClonedDefSource) {
assert(TSM && "Can not clone null module");
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index 46604ff4000c..b6ccd02405c1 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -31,16 +31,8 @@ namespace llvm {
RTDyldMemoryManager::~RTDyldMemoryManager() {}
-// Determine whether we can register EH tables.
-#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) && \
- !(defined(_AIX) && defined(__ibmxl__)) && !defined(__SEH__) && \
- !defined(__USING_SJLJ_EXCEPTIONS__))
-#define HAVE_EHTABLE_SUPPORT 1
-#else
-#define HAVE_EHTABLE_SUPPORT 0
-#endif
-
-#if HAVE_EHTABLE_SUPPORT
+#if defined(HAVE_REGISTER_FRAME) && defined(HAVE_DEREGISTER_FRAME) && \
+ !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
extern "C" void __register_frame(void *);
extern "C" void __deregister_frame(void *);
#else
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 04f541b59557..e49e6e541f15 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -1206,16 +1206,19 @@ Error RuntimeDyldImpl::resolveExternalSymbols() {
void RuntimeDyldImpl::finalizeAsync(
std::unique_ptr<RuntimeDyldImpl> This,
- unique_function<void(object::OwningBinary<object::ObjectFile>, Error)>
+ unique_function<void(object::OwningBinary<object::ObjectFile>,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>, Error)>
OnEmitted,
- object::OwningBinary<object::ObjectFile> O) {
+ object::OwningBinary<object::ObjectFile> O,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> Info) {
auto SharedThis = std::shared_ptr<RuntimeDyldImpl>(std::move(This));
auto PostResolveContinuation =
- [SharedThis, OnEmitted = std::move(OnEmitted), O = std::move(O)](
+ [SharedThis, OnEmitted = std::move(OnEmitted), O = std::move(O),
+ Info = std::move(Info)](
Expected<JITSymbolResolver::LookupResult> Result) mutable {
if (!Result) {
- OnEmitted(std::move(O), Result.takeError());
+ OnEmitted(std::move(O), std::move(Info), Result.takeError());
return;
}
@@ -1229,11 +1232,11 @@ void RuntimeDyldImpl::finalizeAsync(
SharedThis->registerEHFrames();
std::string ErrMsg;
if (SharedThis->MemMgr.finalizeMemory(&ErrMsg))
- OnEmitted(std::move(O),
+ OnEmitted(std::move(O), std::move(Info),
make_error<StringError>(std::move(ErrMsg),
inconvertibleErrorCode()));
else
- OnEmitted(std::move(O), Error::success());
+ OnEmitted(std::move(O), std::move(Info), Error::success());
};
JITSymbolResolver::LookupSet Symbols;
@@ -1425,12 +1428,12 @@ void jitLinkForORC(
object::OwningBinary<object::ObjectFile> O,
RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver,
bool ProcessAllSections,
- unique_function<
- Error(const object::ObjectFile &Obj,
- std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObj,
- std::map<StringRef, JITEvaluatedSymbol>)>
+ unique_function<Error(const object::ObjectFile &Obj,
+ RuntimeDyld::LoadedObjectInfo &LoadedObj,
+ std::map<StringRef, JITEvaluatedSymbol>)>
OnLoaded,
- unique_function<void(object::OwningBinary<object::ObjectFile>, Error)>
+ unique_function<void(object::OwningBinary<object::ObjectFile>,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>, Error)>
OnEmitted) {
RuntimeDyld RTDyld(MemMgr, Resolver);
@@ -1439,17 +1442,17 @@ void jitLinkForORC(
auto Info = RTDyld.loadObject(*O.getBinary());
if (RTDyld.hasError()) {
- OnEmitted(std::move(O), make_error<StringError>(RTDyld.getErrorString(),
- inconvertibleErrorCode()));
+ OnEmitted(std::move(O), std::move(Info),
+ make_error<StringError>(RTDyld.getErrorString(),
+ inconvertibleErrorCode()));
return;
}
- if (auto Err =
- OnLoaded(*O.getBinary(), std::move(Info), RTDyld.getSymbolTable()))
- OnEmitted(std::move(O), std::move(Err));
+ if (auto Err = OnLoaded(*O.getBinary(), *Info, RTDyld.getSymbolTable()))
+ OnEmitted(std::move(O), std::move(Info), std::move(Err));
RuntimeDyldImpl::finalizeAsync(std::move(RTDyld.Dyld), std::move(OnEmitted),
- std::move(O));
+ std::move(O), std::move(Info));
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index e5e512672daa..2fbe707ce8df 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -352,7 +352,7 @@ private:
RemainingExpr = RemainingExpr.substr(1).ltrim();
uint64_t StubAddr;
- std::string ErrorMsg = "";
+ std::string ErrorMsg;
std::tie(StubAddr, ErrorMsg) = Checker.getStubOrGOTAddrFor(
StubContainerName, Symbol, PCtx.IsInsideLoad, IsStubAddr);
@@ -389,7 +389,7 @@ private:
RemainingExpr = RemainingExpr.substr(1).ltrim();
uint64_t StubAddr;
- std::string ErrorMsg = "";
+ std::string ErrorMsg;
std::tie(StubAddr, ErrorMsg) = Checker.getSectionAddr(
FileName, SectionName, PCtx.IsInsideLoad);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 7ed8a718ed3c..28e1faab5ac7 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -57,13 +57,6 @@ namespace {
template <class ELFT> class DyldELFObject : public ELFObjectFile<ELFT> {
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
- typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
- typedef Elf_Sym_Impl<ELFT> Elf_Sym;
- typedef Elf_Rel_Impl<ELFT, false> Elf_Rel;
- typedef Elf_Rel_Impl<ELFT, true> Elf_Rela;
-
- typedef Elf_Ehdr_Impl<ELFT> Elf_Ehdr;
-
typedef typename ELFT::uint addr_type;
DyldELFObject(ELFObjectFile<ELFT> &&Obj);
@@ -961,7 +954,8 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type,
(uint32_t)(Addend & 0xffffffffL));
break;
- case Triple::ppc:
+ case Triple::ppc: // Fall through.
+ case Triple::ppcle:
resolvePPC32Relocation(Section, Offset, Value, Type, Addend);
break;
case Triple::ppc64: // Fall through.
@@ -1676,30 +1670,33 @@ RuntimeDyldELF::processRelocationRef(
if (Value.SymbolName) {
// This is a call to an external function.
// Look for an existing stub.
- SectionEntry &Section = Sections[SectionID];
+ SectionEntry *Section = &Sections[SectionID];
StubMap::const_iterator i = Stubs.find(Value);
uintptr_t StubAddress;
if (i != Stubs.end()) {
- StubAddress = uintptr_t(Section.getAddress()) + i->second;
+ StubAddress = uintptr_t(Section->getAddress()) + i->second;
LLVM_DEBUG(dbgs() << " Stub function found\n");
} else {
// Create a new stub function (equivalent to a PLT entry).
LLVM_DEBUG(dbgs() << " Create a new stub function\n");
- uintptr_t BaseAddress = uintptr_t(Section.getAddress());
+ uintptr_t BaseAddress = uintptr_t(Section->getAddress());
uintptr_t StubAlignment = getStubAlignment();
StubAddress =
- (BaseAddress + Section.getStubOffset() + StubAlignment - 1) &
+ (BaseAddress + Section->getStubOffset() + StubAlignment - 1) &
-StubAlignment;
unsigned StubOffset = StubAddress - BaseAddress;
Stubs[Value] = StubOffset;
createStubFunction((uint8_t *)StubAddress);
// Bump our stub offset counter
- Section.advanceStubOffset(getMaxStubSize());
+ Section->advanceStubOffset(getMaxStubSize());
// Allocate a GOT Entry
uint64_t GOTOffset = allocateGOTEntries(1);
+ // This potentially creates a new Section which potentially
+ // invalidates the Section pointer, so reload it.
+ Section = &Sections[SectionID];
// The load of the GOT address has an addend of -4
resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4,
@@ -1712,7 +1709,7 @@ RuntimeDyldELF::processRelocationRef(
}
// Make the target call a call into the stub table.
- resolveRelocation(Section, Offset, StubAddress, ELF::R_X86_64_PC32,
+ resolveRelocation(*Section, Offset, StubAddress, ELF::R_X86_64_PC32,
Addend);
} else {
RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend,
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index a9346536fd09..d34fae9aaf0c 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -535,9 +535,12 @@ public:
static void finalizeAsync(
std::unique_ptr<RuntimeDyldImpl> This,
- unique_function<void(object::OwningBinary<object::ObjectFile>, Error)>
+ unique_function<void(object::OwningBinary<object::ObjectFile>,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>,
+ Error)>
OnEmitted,
- object::OwningBinary<object::ObjectFile> O);
+ object::OwningBinary<object::ObjectFile> O,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> Info);
void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index ebe3ca33d308..9df3e2e3c3bf 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -113,11 +113,10 @@ public:
// The MemoryManager can make sure this is always true by forcing the
// memory layout to be: CodeSection < ReadOnlySection < ReadWriteSection.
const uint64_t ImageBase = getImageBase();
- if (Value < ImageBase || ((Value - ImageBase) > UINT32_MAX)) {
- llvm::errs() << "IMAGE_REL_AMD64_ADDR32NB relocation requires an"
- << "ordered section layout.\n";
- write32BitOffset(Target, 0, 0);
- } else {
+ if (Value < ImageBase || ((Value - ImageBase) > UINT32_MAX))
+ report_fatal_error("IMAGE_REL_AMD64_ADDR32NB relocation requires an "
+ "ordered section layout");
+ else {
write32BitOffset(Target, RE.Addend, Value - ImageBase);
}
break;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
index 925049b2a1b4..6690dd07d99b 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -112,6 +112,15 @@ uint8_t *SectionMemoryManager::allocateSection(
// Save this address as the basis for our next request
MemGroup.Near = MB;
+ // Copy the address to all the other groups, if they have not
+ // been initialized.
+ if (CodeMem.Near.base() == 0)
+ CodeMem.Near = MB;
+ if (RODataMem.Near.base() == 0)
+ RODataMem.Near = MB;
+ if (RWDataMem.Near.base() == 0)
+ RWDataMem.Near = MB;
+
// Remember that we allocated this memory
MemGroup.AllocatedMem.push_back(MB);
Addr = (uintptr_t)MB.base();
@@ -152,8 +161,7 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) {
}
// Make read-only data memory read-only.
- ec = applyMemoryGroupPermissions(RODataMem,
- sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+ ec = applyMemoryGroupPermissions(RODataMem, sys::Memory::MF_READ);
if (ec) {
if (ErrMsg) {
*ErrMsg = ec.message();
@@ -210,11 +218,9 @@ SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
}
// Remove all blocks which are now empty
- MemGroup.FreeMem.erase(remove_if(MemGroup.FreeMem,
- [](FreeMemBlock &FreeMB) {
- return FreeMB.Free.allocatedSize() == 0;
- }),
- MemGroup.FreeMem.end());
+ erase_if(MemGroup.FreeMem, [](FreeMemBlock &FreeMB) {
+ return FreeMB.Free.allocatedSize() == 0;
+ });
return std::error_code();
}
diff --git a/contrib/llvm-project/llvm/lib/Support/FileCheck.cpp b/contrib/llvm-project/llvm/lib/FileCheck/FileCheck.cpp
index d0e79c675bcb..3169afaed58c 100644
--- a/contrib/llvm-project/llvm/lib/Support/FileCheck.cpp
+++ b/contrib/llvm-project/llvm/lib/FileCheck/FileCheck.cpp
@@ -13,14 +13,16 @@
// as well as various unittests.
//===----------------------------------------------------------------------===//
-#include "llvm/Support/FileCheck.h"
+#include "llvm/FileCheck/FileCheck.h"
#include "FileCheckImpl.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CheckedArithmetic.h"
#include "llvm/Support/FormatVariadic.h"
#include <cstdint>
#include <list>
+#include <set>
#include <tuple>
#include <utility>
@@ -42,16 +44,28 @@ StringRef ExpressionFormat::toString() const {
llvm_unreachable("unknown expression format");
}
-Expected<StringRef> ExpressionFormat::getWildcardRegex() const {
+Expected<std::string> ExpressionFormat::getWildcardRegex() const {
+ auto CreatePrecisionRegex = [this](StringRef S) {
+ return (S + Twine('{') + Twine(Precision) + "}").str();
+ };
+
switch (Value) {
case Kind::Unsigned:
- return StringRef("[0-9]+");
+ if (Precision)
+ return CreatePrecisionRegex("([1-9][0-9]*)?[0-9]");
+ return std::string("[0-9]+");
case Kind::Signed:
- return StringRef("-?[0-9]+");
+ if (Precision)
+ return CreatePrecisionRegex("-?([1-9][0-9]*)?[0-9]");
+ return std::string("-?[0-9]+");
case Kind::HexUpper:
- return StringRef("[0-9A-F]+");
+ if (Precision)
+ return CreatePrecisionRegex("([1-9A-F][0-9A-F]*)?[0-9A-F]");
+ return std::string("[0-9A-F]+");
case Kind::HexLower:
- return StringRef("[0-9a-f]+");
+ if (Precision)
+ return CreatePrecisionRegex("([1-9a-f][0-9a-f]*)?[0-9a-f]");
+ return std::string("[0-9a-f]+");
default:
return createStringError(std::errc::invalid_argument,
"trying to match value with invalid format");
@@ -60,27 +74,47 @@ Expected<StringRef> ExpressionFormat::getWildcardRegex() const {
Expected<std::string>
ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const {
+ uint64_t AbsoluteValue;
+ StringRef SignPrefix = IntegerValue.isNegative() ? "-" : "";
+
if (Value == Kind::Signed) {
Expected<int64_t> SignedValue = IntegerValue.getSignedValue();
if (!SignedValue)
return SignedValue.takeError();
- return itostr(*SignedValue);
+ if (*SignedValue < 0)
+ AbsoluteValue = cantFail(IntegerValue.getAbsolute().getUnsignedValue());
+ else
+ AbsoluteValue = *SignedValue;
+ } else {
+ Expected<uint64_t> UnsignedValue = IntegerValue.getUnsignedValue();
+ if (!UnsignedValue)
+ return UnsignedValue.takeError();
+ AbsoluteValue = *UnsignedValue;
}
- Expected<uint64_t> UnsignedValue = IntegerValue.getUnsignedValue();
- if (!UnsignedValue)
- return UnsignedValue.takeError();
+ std::string AbsoluteValueStr;
switch (Value) {
case Kind::Unsigned:
- return utostr(*UnsignedValue);
+ case Kind::Signed:
+ AbsoluteValueStr = utostr(AbsoluteValue);
+ break;
case Kind::HexUpper:
- return utohexstr(*UnsignedValue, /*LowerCase=*/false);
case Kind::HexLower:
- return utohexstr(*UnsignedValue, /*LowerCase=*/true);
+ AbsoluteValueStr = utohexstr(AbsoluteValue, Value == Kind::HexLower);
+ break;
default:
return createStringError(std::errc::invalid_argument,
"trying to match value with invalid format");
}
+
+ if (Precision > AbsoluteValueStr.size()) {
+ unsigned LeadingZeros = Precision - AbsoluteValueStr.size();
+ return (Twine(SignPrefix) + std::string(LeadingZeros, '0') +
+ AbsoluteValueStr)
+ .str();
+ }
+
+ return (Twine(SignPrefix) + AbsoluteValueStr).str();
}
Expected<ExpressionValue>
@@ -719,41 +753,59 @@ Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock(
StringRef DefExpr = StringRef();
DefinedNumericVariable = None;
ExpressionFormat ExplicitFormat = ExpressionFormat();
+ unsigned Precision = 0;
// Parse format specifier (NOTE: ',' is also an argument seperator).
size_t FormatSpecEnd = Expr.find(',');
size_t FunctionStart = Expr.find('(');
if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) {
- Expr = Expr.ltrim(SpaceChars);
- if (!Expr.consume_front("%"))
+ StringRef FormatExpr = Expr.take_front(FormatSpecEnd);
+ Expr = Expr.drop_front(FormatSpecEnd + 1);
+ FormatExpr = FormatExpr.trim(SpaceChars);
+ if (!FormatExpr.consume_front("%"))
return ErrorDiagnostic::get(
- SM, Expr, "invalid matching format specification in expression");
-
- // Check for unknown matching format specifier and set matching format in
- // class instance representing this expression.
- SMLoc fmtloc = SMLoc::getFromPointer(Expr.data());
- switch (popFront(Expr)) {
- case 'u':
- ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::Unsigned);
- break;
- case 'd':
- ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::Signed);
- break;
- case 'x':
- ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexLower);
- break;
- case 'X':
- ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexUpper);
- break;
- default:
- return ErrorDiagnostic::get(SM, fmtloc,
- "invalid format specifier in expression");
+ SM, FormatExpr,
+ "invalid matching format specification in expression");
+
+ // Parse precision.
+ if (FormatExpr.consume_front(".")) {
+ if (FormatExpr.consumeInteger(10, Precision))
+ return ErrorDiagnostic::get(SM, FormatExpr,
+ "invalid precision in format specifier");
}
- Expr = Expr.ltrim(SpaceChars);
- if (!Expr.consume_front(","))
+ if (!FormatExpr.empty()) {
+ // Check for unknown matching format specifier and set matching format in
+ // class instance representing this expression.
+ SMLoc FmtLoc = SMLoc::getFromPointer(FormatExpr.data());
+ switch (popFront(FormatExpr)) {
+ case 'u':
+ ExplicitFormat =
+ ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision);
+ break;
+ case 'd':
+ ExplicitFormat =
+ ExpressionFormat(ExpressionFormat::Kind::Signed, Precision);
+ break;
+ case 'x':
+ ExplicitFormat =
+ ExpressionFormat(ExpressionFormat::Kind::HexLower, Precision);
+ break;
+ case 'X':
+ ExplicitFormat =
+ ExpressionFormat(ExpressionFormat::Kind::HexUpper, Precision);
+ break;
+ default:
+ return ErrorDiagnostic::get(SM, FmtLoc,
+ "invalid format specifier in expression");
+ }
+ }
+
+ FormatExpr = FormatExpr.ltrim(SpaceChars);
+ if (!FormatExpr.empty())
return ErrorDiagnostic::get(
- SM, Expr, "invalid matching format specification in expression");
+ SM, FormatExpr,
+ "invalid matching format specification in expression");
}
// Save variable definition expression if any.
@@ -813,7 +865,7 @@ Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock(
Format = *ImplicitFormat;
}
if (!Format)
- Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned);
+ Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision);
std::unique_ptr<Expression> ExpressionPointer =
std::make_unique<Expression>(std::move(ExpressionASTPointer), Format);
@@ -865,6 +917,12 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
return false;
}
+ // If literal check, set fixed string.
+ if (CheckTy.isLiteralMatch()) {
+ FixedStr = PatternStr;
+ return false;
+ }
+
// Check to see if this is a fixed string, or if it has regex pieces.
if (!MatchFullLinesHere &&
(PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
@@ -947,12 +1005,12 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
bool IsLegacyLineExpr = false;
StringRef DefName;
StringRef SubstStr;
- StringRef MatchRegexp;
+ std::string MatchRegexp;
size_t SubstInsertIdx = RegExStr.size();
// Parse string variable or legacy @LINE expression.
if (!IsNumBlock) {
- size_t VarEndIdx = MatchStr.find(":");
+ size_t VarEndIdx = MatchStr.find(':');
size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t");
if (SpacePos != StringRef::npos) {
SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos),
@@ -991,7 +1049,7 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
return true;
}
DefName = Name;
- MatchRegexp = MatchStr;
+ MatchRegexp = MatchStr.str();
} else {
if (IsPseudo) {
MatchStr = OrigMatchStr;
@@ -1218,7 +1276,7 @@ Expected<size_t> Pattern::match(StringRef Buffer, size_t &MatchLen,
Format.valueFromStringRepr(MatchedValue, SM);
if (!Value)
return Value.takeError();
- DefinedNumericVariable->setValue(*Value);
+ DefinedNumericVariable->setValue(*Value, MatchedValue);
}
// Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
@@ -1247,7 +1305,9 @@ unsigned Pattern::computeMatchDistance(StringRef Buffer) const {
}
void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
- SMRange MatchRange) const {
+ SMRange Range,
+ FileCheckDiag::MatchType MatchTy,
+ std::vector<FileCheckDiag> *Diags) const {
// Print what we know about substitutions.
if (!Substitutions.empty()) {
for (const auto &Substitution : Substitutions) {
@@ -1280,29 +1340,85 @@ void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
OS.write_escaped(*MatchedValue) << "\"";
}
- if (MatchRange.isValid())
- SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
- {MatchRange});
+ // We report only the start of the match/search range to suggest we are
+ // reporting the substitutions as set at the start of the match/search.
+ // Indicating a non-zero-length range might instead seem to imply that the
+ // substitution matches or was captured from exactly that range.
+ if (Diags)
+ Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy,
+ SMRange(Range.Start, Range.Start), OS.str());
else
- SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
- SourceMgr::DK_Note, OS.str());
+ SM.PrintMessage(Range.Start, SourceMgr::DK_Note, OS.str());
}
}
}
+void Pattern::printVariableDefs(const SourceMgr &SM,
+ FileCheckDiag::MatchType MatchTy,
+ std::vector<FileCheckDiag> *Diags) const {
+ if (VariableDefs.empty() && NumericVariableDefs.empty())
+ return;
+ // Build list of variable captures.
+ struct VarCapture {
+ StringRef Name;
+ SMRange Range;
+ };
+ SmallVector<VarCapture, 2> VarCaptures;
+ for (const auto &VariableDef : VariableDefs) {
+ VarCapture VC;
+ VC.Name = VariableDef.first;
+ StringRef Value = Context->GlobalVariableTable[VC.Name];
+ SMLoc Start = SMLoc::getFromPointer(Value.data());
+ SMLoc End = SMLoc::getFromPointer(Value.data() + Value.size());
+ VC.Range = SMRange(Start, End);
+ VarCaptures.push_back(VC);
+ }
+ for (const auto &VariableDef : NumericVariableDefs) {
+ VarCapture VC;
+ VC.Name = VariableDef.getKey();
+ StringRef StrValue = VariableDef.getValue()
+ .DefinedNumericVariable->getStringValue()
+ .getValue();
+ SMLoc Start = SMLoc::getFromPointer(StrValue.data());
+ SMLoc End = SMLoc::getFromPointer(StrValue.data() + StrValue.size());
+ VC.Range = SMRange(Start, End);
+ VarCaptures.push_back(VC);
+ }
+ // Sort variable captures by the order in which they matched the input.
+ // Ranges shouldn't be overlapping, so we can just compare the start.
+ llvm::sort(VarCaptures, [](const VarCapture &A, const VarCapture &B) {
+ assert(A.Range.Start != B.Range.Start &&
+ "unexpected overlapping variable captures");
+ return A.Range.Start.getPointer() < B.Range.Start.getPointer();
+ });
+ // Create notes for the sorted captures.
+ for (const VarCapture &VC : VarCaptures) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "captured var \"" << VC.Name << "\"";
+ if (Diags)
+ Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, VC.Range, OS.str());
+ else
+ SM.PrintMessage(VC.Range.Start, SourceMgr::DK_Note, OS.str(), VC.Range);
+ }
+}
+
static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
const SourceMgr &SM, SMLoc Loc,
Check::FileCheckType CheckTy,
StringRef Buffer, size_t Pos, size_t Len,
std::vector<FileCheckDiag> *Diags,
- bool AdjustPrevDiag = false) {
+ bool AdjustPrevDiags = false) {
SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos);
SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len);
SMRange Range(Start, End);
if (Diags) {
- if (AdjustPrevDiag)
- Diags->rbegin()->MatchTy = MatchTy;
- else
+ if (AdjustPrevDiags) {
+ SMLoc CheckLoc = Diags->rbegin()->CheckLoc;
+ for (auto I = Diags->rbegin(), E = Diags->rend();
+ I != E && I->CheckLoc == CheckLoc; ++I)
+ I->MatchTy = MatchTy;
+ } else
Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range);
}
return Range;
@@ -1455,8 +1571,8 @@ StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB,
FileCheckDiag::FileCheckDiag(const SourceMgr &SM,
const Check::FileCheckType &CheckTy,
SMLoc CheckLoc, MatchType MatchTy,
- SMRange InputRange)
- : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy) {
+ SMRange InputRange, StringRef Note)
+ : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy), Note(Note) {
auto Start = SM.getLineAndColumn(InputRange.Start);
auto End = SM.getLineAndColumn(InputRange.End);
InputStartLine = Start.first;
@@ -1477,26 +1593,43 @@ Check::FileCheckType &Check::FileCheckType::setCount(int C) {
return *this;
}
+std::string Check::FileCheckType::getModifiersDescription() const {
+ if (Modifiers.none())
+ return "";
+ std::string Ret;
+ raw_string_ostream OS(Ret);
+ OS << '{';
+ if (isLiteralMatch())
+ OS << "LITERAL";
+ OS << '}';
+ return OS.str();
+}
+
std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
+ // Append directive modifiers.
+ auto WithModifiers = [this, Prefix](StringRef Str) -> std::string {
+ return (Prefix + Str + getModifiersDescription()).str();
+ };
+
switch (Kind) {
case Check::CheckNone:
return "invalid";
case Check::CheckPlain:
if (Count > 1)
- return Prefix.str() + "-COUNT";
- return std::string(Prefix);
+ return WithModifiers("-COUNT");
+ return WithModifiers("");
case Check::CheckNext:
- return Prefix.str() + "-NEXT";
+ return WithModifiers("-NEXT");
case Check::CheckSame:
- return Prefix.str() + "-SAME";
+ return WithModifiers("-SAME");
case Check::CheckNot:
- return Prefix.str() + "-NOT";
+ return WithModifiers("-NOT");
case Check::CheckDAG:
- return Prefix.str() + "-DAG";
+ return WithModifiers("-DAG");
case Check::CheckLabel:
- return Prefix.str() + "-LABEL";
+ return WithModifiers("-LABEL");
case Check::CheckEmpty:
- return Prefix.str() + "-EMPTY";
+ return WithModifiers("-EMPTY");
case Check::CheckComment:
return std::string(Prefix);
case Check::CheckEOF:
@@ -1514,25 +1647,45 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) {
if (Buffer.size() <= Prefix.size())
return {Check::CheckNone, StringRef()};
- char NextChar = Buffer[Prefix.size()];
-
- StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
-
+ StringRef Rest = Buffer.drop_front(Prefix.size());
// Check for comment.
- if (Req.CommentPrefixes.end() != std::find(Req.CommentPrefixes.begin(),
- Req.CommentPrefixes.end(),
- Prefix)) {
- if (NextChar == ':')
+ if (llvm::is_contained(Req.CommentPrefixes, Prefix)) {
+ if (Rest.consume_front(":"))
return {Check::CheckComment, Rest};
// Ignore a comment prefix if it has a suffix like "-NOT".
return {Check::CheckNone, StringRef()};
}
- // Verify that the : is present after the prefix.
- if (NextChar == ':')
+ auto ConsumeModifiers = [&](Check::FileCheckType Ret)
+ -> std::pair<Check::FileCheckType, StringRef> {
+ if (Rest.consume_front(":"))
+ return {Ret, Rest};
+ if (!Rest.consume_front("{"))
+ return {Check::CheckNone, StringRef()};
+
+ // Parse the modifiers, speparated by commas.
+ do {
+ // Allow whitespace in modifiers list.
+ Rest = Rest.ltrim();
+ if (Rest.consume_front("LITERAL"))
+ Ret.setLiteralMatch();
+ else
+ return {Check::CheckNone, Rest};
+ // Allow whitespace in modifiers list.
+ Rest = Rest.ltrim();
+ } while (Rest.consume_front(","));
+ if (!Rest.consume_front("}:"))
+ return {Check::CheckNone, Rest};
+ return {Ret, Rest};
+ };
+
+ // Verify that the prefix is followed by directive modifiers or a colon.
+ if (Rest.consume_front(":"))
return {Check::CheckPlain, Rest};
+ if (Rest.front() == '{')
+ return ConsumeModifiers(Check::CheckPlain);
- if (NextChar != '-')
+ if (!Rest.consume_front("-"))
return {Check::CheckNone, StringRef()};
if (Rest.consume_front("COUNT-")) {
@@ -1542,29 +1695,12 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) {
return {Check::CheckBadCount, Rest};
if (Count <= 0 || Count > INT32_MAX)
return {Check::CheckBadCount, Rest};
- if (!Rest.consume_front(":"))
+ if (Rest.front() != ':' && Rest.front() != '{')
return {Check::CheckBadCount, Rest};
- return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest};
+ return ConsumeModifiers(
+ Check::FileCheckType(Check::CheckPlain).setCount(Count));
}
- if (Rest.consume_front("NEXT:"))
- return {Check::CheckNext, Rest};
-
- if (Rest.consume_front("SAME:"))
- return {Check::CheckSame, Rest};
-
- if (Rest.consume_front("NOT:"))
- return {Check::CheckNot, Rest};
-
- if (Rest.consume_front("DAG:"))
- return {Check::CheckDAG, Rest};
-
- if (Rest.consume_front("LABEL:"))
- return {Check::CheckLabel, Rest};
-
- if (Rest.consume_front("EMPTY:"))
- return {Check::CheckEmpty, Rest};
-
// You can't combine -NOT with another suffix.
if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
@@ -1572,6 +1708,24 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) {
Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
return {Check::CheckBadNot, Rest};
+ if (Rest.consume_front("NEXT"))
+ return ConsumeModifiers(Check::CheckNext);
+
+ if (Rest.consume_front("SAME"))
+ return ConsumeModifiers(Check::CheckSame);
+
+ if (Rest.consume_front("NOT"))
+ return ConsumeModifiers(Check::CheckNot);
+
+ if (Rest.consume_front("DAG"))
+ return ConsumeModifiers(Check::CheckDAG);
+
+ if (Rest.consume_front("LABEL"))
+ return ConsumeModifiers(Check::CheckLabel);
+
+ if (Rest.consume_front("EMPTY"))
+ return ConsumeModifiers(Check::CheckEmpty);
+
return {Check::CheckNone, Rest};
}
@@ -1717,8 +1871,10 @@ bool FileCheck::readCheckFile(
// found.
unsigned LineNumber = 1;
- bool FoundUsedCheckPrefix = false;
- while (1) {
+ std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(),
+ Req.CheckPrefixes.end());
+ const size_t DistinctPrefixes = PrefixesNotFound.size();
+ while (true) {
Check::FileCheckType CheckTy;
// See if a prefix occurs in the memory buffer.
@@ -1729,7 +1885,7 @@ bool FileCheck::readCheckFile(
if (UsedPrefix.empty())
break;
if (CheckTy != Check::CheckComment)
- FoundUsedCheckPrefix = true;
+ PrefixesNotFound.erase(UsedPrefix);
assert(UsedPrefix.data() == Buffer.data() &&
"Failed to move Buffer's start forward, or pointed prefix outside "
@@ -1822,14 +1978,19 @@ bool FileCheck::readCheckFile(
// When there are no used prefixes we report an error except in the case that
// no prefix is specified explicitly but -implicit-check-not is specified.
- if (!FoundUsedCheckPrefix &&
+ const bool NoPrefixesFound = PrefixesNotFound.size() == DistinctPrefixes;
+ const bool SomePrefixesUnexpectedlyNotUsed =
+ !Req.AllowUnusedPrefixes && !PrefixesNotFound.empty();
+ if ((NoPrefixesFound || SomePrefixesUnexpectedlyNotUsed) &&
(ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) {
errs() << "error: no check strings found with prefix"
- << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
- for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) {
- if (I != 0)
+ << (PrefixesNotFound.size() > 1 ? "es " : " ");
+ bool First = true;
+ for (StringRef MissingPrefix : PrefixesNotFound) {
+ if (!First)
errs() << ", ";
- errs() << "\'" << Req.CheckPrefixes[I] << ":'";
+ errs() << "\'" << MissingPrefix << ":'";
+ First = false;
}
errs() << '\n';
return true;
@@ -1863,10 +2024,15 @@ static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
// diagnostics.
PrintDiag = !Diags;
}
- SMRange MatchRange = ProcessMatchResult(
- ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected
- : FileCheckDiag::MatchFoundButExcluded,
- SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
+ FileCheckDiag::MatchType MatchTy = ExpectedMatch
+ ? FileCheckDiag::MatchFoundAndExpected
+ : FileCheckDiag::MatchFoundButExcluded;
+ SMRange MatchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(),
+ Buffer, MatchPos, MatchLen, Diags);
+ if (Diags) {
+ Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, Diags);
+ Pat.printVariableDefs(SM, MatchTy, Diags);
+ }
if (!PrintDiag)
return;
@@ -1881,7 +2047,8 @@ static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
{MatchRange});
- Pat.printSubstitutions(SM, Buffer, MatchRange);
+ Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, nullptr);
+ Pat.printVariableDefs(SM, MatchTy, nullptr);
}
static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
@@ -1914,10 +2081,13 @@ static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
// If the current position is at the end of a line, advance to the start of
// the next line.
Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
- SMRange SearchRange = ProcessMatchResult(
- ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
- : FileCheckDiag::MatchNoneAndExcluded,
- SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
+ FileCheckDiag::MatchType MatchTy = ExpectedMatch
+ ? FileCheckDiag::MatchNoneButExpected
+ : FileCheckDiag::MatchNoneAndExcluded;
+ SMRange SearchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(),
+ Buffer, 0, Buffer.size(), Diags);
+ if (Diags)
+ Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, Diags);
if (!PrintDiag) {
consumeError(std::move(MatchErrors));
return;
@@ -1945,7 +2115,7 @@ static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
// Allow the pattern to print additional information if desired.
- Pat.printSubstitutions(SM, Buffer);
+ Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, nullptr);
if (ExpectedMatch)
Pat.printFuzzyMatch(SM, Buffer, Diags);
@@ -2130,6 +2300,7 @@ bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
const std::vector<const Pattern *> &NotStrings,
const FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) const {
+ bool DirectiveFail = false;
for (const Pattern *Pat : NotStrings) {
assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
@@ -2145,11 +2316,10 @@ bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen,
Req, Diags);
-
- return true;
+ DirectiveFail = true;
}
- return false;
+ return DirectiveFail;
}
size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
@@ -2248,8 +2418,12 @@ size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
SM.PrintMessage(OldStart, SourceMgr::DK_Note,
"match discarded, overlaps earlier DAG match here",
{OldRange});
- } else
- Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
+ } else {
+ SMLoc CheckLoc = Diags->rbegin()->CheckLoc;
+ for (auto I = Diags->rbegin(), E = Diags->rend();
+ I != E && I->CheckLoc == CheckLoc; ++I)
+ I->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
+ }
}
MatchPos = MI->End;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/FileCheckImpl.h b/contrib/llvm-project/llvm/lib/FileCheck/FileCheckImpl.h
index 6ca67ec2964c..05b2a529002f 100644
--- a/contrib/llvm-project/llvm/lib/Support/FileCheckImpl.h
+++ b/contrib/llvm-project/llvm/lib/FileCheck/FileCheckImpl.h
@@ -12,13 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_SUPPORT_FILECHECKIMPL_H
-#define LLVM_LIB_SUPPORT_FILECHECKIMPL_H
+#ifndef LLVM_LIB_FILECHECK_FILECHECKIMPL_H
+#define LLVM_LIB_FILECHECK_FILECHECKIMPL_H
-#include "llvm/Support/FileCheck.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/FileCheck/FileCheck.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SourceMgr.h"
#include <map>
@@ -53,15 +53,17 @@ struct ExpressionFormat {
private:
Kind Value;
+ unsigned Precision = 0;
public:
/// Evaluates a format to true if it can be used in a match.
explicit operator bool() const { return Value != Kind::NoFormat; }
/// Define format equality: formats are equal if neither is NoFormat and
- /// their kinds are the same.
+ /// their kinds and precision are the same.
bool operator==(const ExpressionFormat &Other) const {
- return Value != Kind::NoFormat && Value == Other.Value;
+ return Value != Kind::NoFormat && Value == Other.Value &&
+ Precision == Other.Precision;
}
bool operator!=(const ExpressionFormat &Other) const {
@@ -76,12 +78,14 @@ public:
StringRef toString() const;
ExpressionFormat() : Value(Kind::NoFormat){};
- explicit ExpressionFormat(Kind Value) : Value(Value){};
+ explicit ExpressionFormat(Kind Value) : Value(Value), Precision(0){};
+ explicit ExpressionFormat(Kind Value, unsigned Precision)
+ : Value(Value), Precision(Precision){};
- /// \returns a wildcard regular expression StringRef that matches any value
- /// in the format represented by this instance, or an error if the format is
- /// NoFormat.
- Expected<StringRef> getWildcardRegex() const;
+ /// \returns a wildcard regular expression string that matches any value in
+ /// the format represented by this instance and no other value, or an error
+ /// if the format is NoFormat.
+ Expected<std::string> getWildcardRegex() const;
/// \returns the string representation of \p Value in the format represented
/// by this instance, or an error if conversion to this format failed or the
@@ -261,6 +265,10 @@ private:
/// Value of numeric variable, if defined, or None otherwise.
Optional<ExpressionValue> Value;
+ /// The input buffer's string from which Value was parsed, or None. See
+ /// comments on getStringValue for a discussion of the None case.
+ Optional<StringRef> StrValue;
+
/// Line number where this variable is defined, or None if defined before
/// input is parsed. Used to determine whether a variable is defined on the
/// same line as a given use.
@@ -284,12 +292,28 @@ public:
/// \returns this variable's value.
Optional<ExpressionValue> getValue() const { return Value; }
- /// Sets value of this numeric variable to \p NewValue.
- void setValue(ExpressionValue NewValue) { Value = NewValue; }
+ /// \returns the input buffer's string from which this variable's value was
+ /// parsed, or None if the value is not yet defined or was not parsed from the
+ /// input buffer. For example, the value of @LINE is not parsed from the
+ /// input buffer, and some numeric variables are parsed from the command
+ /// line instead.
+ Optional<StringRef> getStringValue() const { return StrValue; }
+
+ /// Sets value of this numeric variable to \p NewValue, and sets the input
+ /// buffer string from which it was parsed to \p NewStrValue. See comments on
+ /// getStringValue for a discussion of when the latter can be None.
+ void setValue(ExpressionValue NewValue,
+ Optional<StringRef> NewStrValue = None) {
+ Value = NewValue;
+ StrValue = NewStrValue;
+ }
/// Clears value of this numeric variable, regardless of whether it is
/// currently defined or not.
- void clearValue() { Value = None; }
+ void clearValue() {
+ Value = None;
+ StrValue = None;
+ }
/// \returns the line number where this variable is defined, if any, or None
/// if defined before input is parsed.
@@ -683,13 +707,16 @@ public:
/// Prints the value of successful substitutions or the name of the undefined
/// string or numeric variables preventing a successful substitution.
void printSubstitutions(const SourceMgr &SM, StringRef Buffer,
- SMRange MatchRange = None) const;
+ SMRange MatchRange, FileCheckDiag::MatchType MatchTy,
+ std::vector<FileCheckDiag> *Diags) const;
void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
std::vector<FileCheckDiag> *Diags) const;
bool hasVariable() const {
return !(Substitutions.empty() && VariableDefs.empty());
}
+ void printVariableDefs(const SourceMgr &SM, FileCheckDiag::MatchType MatchTy,
+ std::vector<FileCheckDiag> *Diags) const;
Check::FileCheckType getCheckTy() const { return CheckTy; }
diff --git a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPContext.cpp
index c44e858ab5ed..11d8da097c6c 100644
--- a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPContext.cpp
+++ b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPContext.cpp
@@ -14,7 +14,9 @@
#include "llvm/Frontend/OpenMP/OMPContext.h"
#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -40,6 +42,7 @@ OMPContext::OMPContext(bool IsDeviceCompilation, Triple TargetTriple) {
case Triple::mips64:
case Triple::mips64el:
case Triple::ppc:
+ case Triple::ppcle:
case Triple::ppc64:
case Triple::ppc64le:
case Triple::x86:
@@ -57,9 +60,13 @@ OMPContext::OMPContext(bool IsDeviceCompilation, Triple TargetTriple) {
// Add the appropriate device architecture trait based on the triple.
#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \
- if (TraitSelector::TraitSelectorEnum == TraitSelector::device_arch) \
+ if (TraitSelector::TraitSelectorEnum == TraitSelector::device_arch) { \
if (TargetTriple.getArch() == TargetTriple.getArchTypeForLLVMName(Str)) \
- ActiveTraits.set(unsigned(TraitProperty::Enum));
+ ActiveTraits.set(unsigned(TraitProperty::Enum)); \
+ if (StringRef(Str) == StringRef("x86_64") && \
+ TargetTriple.getArch() == Triple::x86_64) \
+ ActiveTraits.set(unsigned(TraitProperty::Enum)); \
+ }
#include "llvm/Frontend/OpenMP/OMPKinds.def"
// TODO: What exactly do we want to see as device ISA trait?
@@ -175,11 +182,11 @@ static int isVariantApplicableInContextHelper(
LLVM_DEBUG({
if (MK == MK_ALL)
dbgs() << "[" << DEBUG_TYPE << "] Property "
- << getOpenMPContextTraitPropertyName(Property)
+ << getOpenMPContextTraitPropertyName(Property, "")
<< " was not in the OpenMP context but match kind is all.\n";
if (MK == MK_NONE)
dbgs() << "[" << DEBUG_TYPE << "] Property "
- << getOpenMPContextTraitPropertyName(Property)
+ << getOpenMPContextTraitPropertyName(Property, "")
<< " was in the OpenMP context but match kind is none.\n";
});
return false;
@@ -198,6 +205,14 @@ static int isVariantApplicableInContextHelper(
continue;
bool IsActiveTrait = Ctx.ActiveTraits.test(unsigned(Property));
+
+ // We overwrite the isa trait as it is actually up to the OMPContext hook to
+ // check the raw string(s).
+ if (Property == TraitProperty::device_isa___ANY)
+ IsActiveTrait = llvm::all_of(VMI.ISATraits, [&](StringRef RawString) {
+ return Ctx.matchesISATrait(RawString);
+ });
+
Optional<bool> Result = HandleTrait(Property, IsActiveTrait);
if (Result.hasValue())
return Result.getValue();
@@ -225,7 +240,7 @@ static int isVariantApplicableInContextHelper(
if (!FoundInOrder) {
LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Construct property "
- << getOpenMPContextTraitPropertyName(Property)
+ << getOpenMPContextTraitPropertyName(Property, "")
<< " was not nested properly.\n");
return false;
}
@@ -425,8 +440,12 @@ StringRef llvm::omp::getOpenMPContextTraitSelectorName(TraitSelector Kind) {
llvm_unreachable("Unknown trait selector!");
}
-TraitProperty llvm::omp::getOpenMPContextTraitPropertyKind(TraitSet Set,
- StringRef S) {
+TraitProperty llvm::omp::getOpenMPContextTraitPropertyKind(
+ TraitSet Set, TraitSelector Selector, StringRef S) {
+ // Special handling for `device={isa(...)}` as we accept anything here. It is
+ // up to the target to decide if the feature is available.
+ if (Set == TraitSet::device && Selector == TraitSelector::device_isa)
+ return TraitProperty::device_isa___ANY;
#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \
if (Set == TraitSet::TraitSetEnum && Str == S) \
return TraitProperty::Enum;
@@ -444,7 +463,10 @@ llvm::omp::getOpenMPContextTraitPropertyForSelector(TraitSelector Selector) {
#include "llvm/Frontend/OpenMP/OMPKinds.def"
.Default(TraitProperty::invalid);
}
-StringRef llvm::omp::getOpenMPContextTraitPropertyName(TraitProperty Kind) {
+StringRef llvm::omp::getOpenMPContextTraitPropertyName(TraitProperty Kind,
+ StringRef RawString) {
+ if (Kind == TraitProperty::device_isa___ANY)
+ return RawString;
switch (Kind) {
#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \
case TraitProperty::Enum: \
diff --git a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 6c72cd01ce6e..1f67aecb57e9 100644
--- a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -15,7 +15,7 @@
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
@@ -126,7 +126,7 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
void OpenMPIRBuilder::initialize() { initializeTypes(M); }
-void OpenMPIRBuilder::finalize() {
+void OpenMPIRBuilder::finalize(bool AllowExtractorSinking) {
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
SmallVector<BasicBlock *, 32> Blocks;
for (OutlineInfo &OI : OutlineInfos) {
@@ -169,6 +169,25 @@ void OpenMPIRBuilder::finalize() {
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
+ if (AllowExtractorSinking) {
+ // Move instructions from the to-be-deleted ArtificialEntry to the entry
+ // basic block of the parallel region. CodeExtractor may have sunk
+ // allocas/bitcasts for values that are solely used in the outlined
+ // region and do not escape.
+ assert(!ArtificialEntry.empty() &&
+ "Expected instructions to sink in the outlined region");
+ for (BasicBlock::iterator It = ArtificialEntry.begin(),
+ End = ArtificialEntry.end();
+ It != End;) {
+ Instruction &I = *It;
+ It++;
+
+ if (I.isTerminator())
+ continue;
+
+ I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
+ }
+ }
OI.EntryBB->moveBefore(&ArtificialEntry);
ArtificialEntry.eraseFromParent();
}
@@ -214,7 +233,15 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
GV->setAlignment(Align(8));
Ident = GV;
}
- return Ident;
+ return Builder.CreatePointerCast(Ident, IdentPtr);
+}
+
+Type *OpenMPIRBuilder::getLanemaskType() {
+ LLVMContext &Ctx = M.getContext();
+ Triple triple(M.getTargetTriple());
+
+ // This test is adequate until deviceRTL has finer grained lane widths
+ return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
}
Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
@@ -263,8 +290,10 @@ OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
DILocation *DIL = Loc.DL.get();
if (!DIL)
return getOrCreateDefaultSrcLocStr();
- StringRef FileName =
- !DIL->getFilename().empty() ? DIL->getFilename() : M.getName();
+ StringRef FileName = M.getName();
+ if (DIFile *DIF = DIL->getFile())
+ if (Optional<StringRef> Source = DIF->getSource())
+ FileName = *Source;
StringRef Function = DIL->getScope()->getSubprogram()->getName();
Function =
!Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
@@ -279,7 +308,7 @@ Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
}
OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::CreateBarrier(const LocationDescription &Loc, Directive DK,
+OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK,
bool ForceSimpleCall, bool CheckCancelFlag) {
if (!updateToLocation(Loc))
return Loc.IP;
@@ -334,7 +363,7 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
}
OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::CreateCancel(const LocationDescription &Loc,
+OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
Value *IfCondition,
omp::Directive CanceledDirective) {
if (!updateToLocation(Loc))
@@ -411,10 +440,11 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(
Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
}
-IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
- const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
- PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
- Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
+IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
+ const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
+ BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
+ FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
+ omp::ProcBindKind ProcBind, bool IsCancellable) {
if (!updateToLocation(Loc))
return Loc.IP;
@@ -443,11 +473,17 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
BasicBlock *InsertBB = Builder.GetInsertBlock();
Function *OuterFn = InsertBB->getParent();
+ // Save the outer alloca block because the insertion iterator may get
+ // invalidated and we still need this later.
+ BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
+
// Vector to remember instructions we used only during the modeling but which
// we want to delete at the end.
SmallVector<Instruction *, 4> ToBeDeleted;
- Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
+ // Change the location to the outer alloca insertion point to create and
+ // initialize the allocas we pass into the parallel region.
+ Builder.restoreIP(OuterAllocaIP);
AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
@@ -499,16 +535,17 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
// Generate the privatization allocas in the block that will become the entry
// of the outlined function.
- InsertPointTy AllocaIP(PRegEntryBB,
- PRegEntryBB->getTerminator()->getIterator());
- Builder.restoreIP(AllocaIP);
+ Builder.SetInsertPoint(PRegEntryBB->getTerminator());
+ InsertPointTy InnerAllocaIP = Builder.saveIP();
+
AllocaInst *PrivTIDAddr =
Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
// Add some fake uses for OpenMP provided arguments.
ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
- ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
+ Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use");
+ ToBeDeleted.push_back(ZeroAddrUse);
// ThenBB
// |
@@ -530,7 +567,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
// Let the caller create the body.
assert(BodyGenCB && "Expected body generation callback!");
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
- BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
+ BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
@@ -677,11 +714,37 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
if (&V == TIDAddr || &V == ZeroAddr)
return;
- SmallVector<Use *, 8> Uses;
+ SetVector<Use *> Uses;
for (Use &U : V.uses())
if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
if (ParallelRegionBlockSet.count(UserI->getParent()))
- Uses.push_back(&U);
+ Uses.insert(&U);
+
+ // __kmpc_fork_call expects extra arguments as pointers. If the input
+ // already has a pointer type, everything is fine. Otherwise, store the
+ // value onto stack and load it back inside the to-be-outlined region. This
+ // will ensure only the pointer will be passed to the function.
+ // FIXME: if there are more than 15 trailing arguments, they must be
+ // additionally packed in a struct.
+ Value *Inner = &V;
+ if (!V.getType()->isPointerTy()) {
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
+
+ Builder.restoreIP(OuterAllocaIP);
+ Value *Ptr =
+ Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
+
+ // Store to stack at end of the block that currently branches to the entry
+ // block of the to-be-outlined region.
+ Builder.SetInsertPoint(InsertBB,
+ InsertBB->getTerminator()->getIterator());
+ Builder.CreateStore(&V, Ptr);
+
+ // Load back next to allocations in the to-be-outlined region.
+ Builder.restoreIP(InnerAllocaIP);
+ Inner = Builder.CreateLoad(Ptr);
+ }
Value *ReplacementValue = nullptr;
CallInst *CI = dyn_cast<CallInst>(&V);
@@ -689,7 +752,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
ReplacementValue = PrivTID;
} else {
Builder.restoreIP(
- PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
+ PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
assert(ReplacementValue &&
"Expected copy/create callback to set replacement value!");
if (ReplacementValue == &V)
@@ -700,10 +763,28 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
UPtr->set(ReplacementValue);
};
+ // Reset the inner alloca insertion as it will be used for loading the values
+ // wrapped into pointers before passing them into the to-be-outlined region.
+ // Configure it to insert immediately after the fake use of zero address so
+ // that they are available in the generated body and so that the
+ // OpenMP-related values (thread ID and zero address pointers) remain leading
+ // in the argument list.
+ InnerAllocaIP = IRBuilder<>::InsertPoint(
+ ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
+
+ // Reset the outer alloca insertion point to the entry of the relevant block
+ // in case it was invalidated.
+ OuterAllocaIP = IRBuilder<>::InsertPoint(
+ OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
+
for (Value *Input : Inputs) {
LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
PrivHelper(*Input);
}
+ LLVM_DEBUG({
+ for (Value *Output : Outputs)
+ LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
+ });
assert(Outputs.empty() &&
"OpenMP outlining should not produce live-out values!");
@@ -730,7 +811,7 @@ void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) {
Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
}
-void OpenMPIRBuilder::CreateFlush(const LocationDescription &Loc) {
+void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) {
if (!updateToLocation(Loc))
return;
emitFlush(Loc);
@@ -748,7 +829,7 @@ void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) {
Args);
}
-void OpenMPIRBuilder::CreateTaskwait(const LocationDescription &Loc) {
+void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) {
if (!updateToLocation(Loc))
return;
emitTaskwaitImpl(Loc);
@@ -765,14 +846,14 @@ void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) {
Args);
}
-void OpenMPIRBuilder::CreateTaskyield(const LocationDescription &Loc) {
+void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) {
if (!updateToLocation(Loc))
return;
emitTaskyieldImpl(Loc);
}
OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::CreateMaster(const LocationDescription &Loc,
+OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB) {
@@ -795,7 +876,597 @@ OpenMPIRBuilder::CreateMaster(const LocationDescription &Loc,
/*Conditional*/ true, /*hasFinalize*/ true);
}
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCritical(
+CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
+ DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
+ BasicBlock *PostInsertBefore, const Twine &Name) {
+ Module *M = F->getParent();
+ LLVMContext &Ctx = M->getContext();
+ Type *IndVarTy = TripCount->getType();
+
+ // Create the basic block structure.
+ BasicBlock *Preheader =
+ BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
+ BasicBlock *Header =
+ BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
+ BasicBlock *Cond =
+ BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
+ BasicBlock *Body =
+ BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
+ BasicBlock *Latch =
+ BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
+ BasicBlock *Exit =
+ BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
+ BasicBlock *After =
+ BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
+
+ // Use specified DebugLoc for new instructions.
+ Builder.SetCurrentDebugLocation(DL);
+
+ Builder.SetInsertPoint(Preheader);
+ Builder.CreateBr(Header);
+
+ Builder.SetInsertPoint(Header);
+ PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
+ IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
+ Builder.CreateBr(Cond);
+
+ Builder.SetInsertPoint(Cond);
+ Value *Cmp =
+ Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
+ Builder.CreateCondBr(Cmp, Body, Exit);
+
+ Builder.SetInsertPoint(Body);
+ Builder.CreateBr(Latch);
+
+ Builder.SetInsertPoint(Latch);
+ Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
+ "omp_" + Name + ".next", /*HasNUW=*/true);
+ Builder.CreateBr(Header);
+ IndVarPHI->addIncoming(Next, Latch);
+
+ Builder.SetInsertPoint(Exit);
+ Builder.CreateBr(After);
+
+ // Remember and return the canonical control flow.
+ LoopInfos.emplace_front();
+ CanonicalLoopInfo *CL = &LoopInfos.front();
+
+ CL->Preheader = Preheader;
+ CL->Header = Header;
+ CL->Cond = Cond;
+ CL->Body = Body;
+ CL->Latch = Latch;
+ CL->Exit = Exit;
+ CL->After = After;
+
+ CL->IsValid = true;
+
+#ifndef NDEBUG
+ CL->assertOK();
+#endif
+ return CL;
+}
+
+CanonicalLoopInfo *
+OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
+ LoopBodyGenCallbackTy BodyGenCB,
+ Value *TripCount, const Twine &Name) {
+ BasicBlock *BB = Loc.IP.getBlock();
+ BasicBlock *NextBB = BB->getNextNode();
+
+ CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
+ NextBB, NextBB, Name);
+ BasicBlock *After = CL->getAfter();
+
+ // If location is not set, don't connect the loop.
+ if (updateToLocation(Loc)) {
+ // Split the loop at the insertion point: Branch to the preheader and move
+ // every following instruction to after the loop (the After BB). Also, the
+ // new successor is the loop's after block.
+ Builder.CreateBr(CL->Preheader);
+ After->getInstList().splice(After->begin(), BB->getInstList(),
+ Builder.GetInsertPoint(), BB->end());
+ After->replaceSuccessorsPhiUsesWith(BB, After);
+ }
+
+ // Emit the body content. We do it after connecting the loop to the CFG to
+ // avoid that the callback encounters degenerate BBs.
+ BodyGenCB(CL->getBodyIP(), CL->getIndVar());
+
+#ifndef NDEBUG
+ CL->assertOK();
+#endif
+ return CL;
+}
+
+CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(
+ const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+ InsertPointTy ComputeIP, const Twine &Name) {
+
+ // Consider the following difficulties (assuming 8-bit signed integers):
+ // * Adding \p Step to the loop counter which passes \p Stop may overflow:
+ // DO I = 1, 100, 50
+ /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
+ // DO I = 100, 0, -128
+
+ // Start, Stop and Step must be of the same integer type.
+ auto *IndVarTy = cast<IntegerType>(Start->getType());
+ assert(IndVarTy == Stop->getType() && "Stop type mismatch");
+ assert(IndVarTy == Step->getType() && "Step type mismatch");
+
+ LocationDescription ComputeLoc =
+ ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
+ updateToLocation(ComputeLoc);
+
+ ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
+ ConstantInt *One = ConstantInt::get(IndVarTy, 1);
+
+ // Like Step, but always positive.
+ Value *Incr = Step;
+
+ // Distance between Start and Stop; always positive.
+ Value *Span;
+
+ // Condition whether there are no iterations are executed at all, e.g. because
+ // UB < LB.
+ Value *ZeroCmp;
+
+ if (IsSigned) {
+ // Ensure that increment is positive. If not, negate and invert LB and UB.
+ Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
+ Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
+ Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
+ Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
+ Span = Builder.CreateSub(UB, LB, "", false, true);
+ ZeroCmp = Builder.CreateICmp(
+ InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
+ } else {
+ Span = Builder.CreateSub(Stop, Start, "", true);
+ ZeroCmp = Builder.CreateICmp(
+ InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
+ }
+
+ Value *CountIfLooping;
+ if (InclusiveStop) {
+ CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
+ } else {
+ // Avoid incrementing past stop since it could overflow.
+ Value *CountIfTwo = Builder.CreateAdd(
+ Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
+ Value *OneCmp = Builder.CreateICmp(
+ InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
+ CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
+ }
+ Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
+ "omp_" + Name + ".tripcount");
+
+ auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
+ Builder.restoreIP(CodeGenIP);
+ Value *Span = Builder.CreateMul(IV, Step);
+ Value *IndVar = Builder.CreateAdd(Span, Start);
+ BodyGenCB(Builder.saveIP(), IndVar);
+ };
+ LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
+ return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
+}
+
+// Returns an LLVM function to call for initializing loop bounds using OpenMP
+// static scheduling depending on `type`. Only i32 and i64 are supported by the
+// runtime. Always interpret integers as unsigned similarly to
+// CanonicalLoopInfo.
+static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M,
+ OpenMPIRBuilder &OMPBuilder) {
+ unsigned Bitwidth = Ty->getIntegerBitWidth();
+ if (Bitwidth == 32)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
+ if (Bitwidth == 64)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
+ llvm_unreachable("unknown OpenMP loop iterator bitwidth");
+}
+
+// Sets the number of loop iterations to the given value. This value must be
+// valid in the condition block (i.e., defined in the preheader) and is
+// interpreted as an unsigned integer.
+void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) {
+ Instruction *CmpI = &CLI->getCond()->front();
+ assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
+ CmpI->setOperand(1, TripCount);
+ CLI->assertOK();
+}
+
+CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
+ const LocationDescription &Loc, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
+ // Set up the source location value for OpenMP runtime.
+ if (!updateToLocation(Loc))
+ return nullptr;
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *SrcLoc = getOrCreateIdent(SrcLocStr);
+
+ // Declare useful OpenMP runtime functions.
+ Value *IV = CLI->getIndVar();
+ Type *IVTy = IV->getType();
+ FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
+ FunctionCallee StaticFini =
+ getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
+
+ // Allocate space for computed loop bounds as expected by the "init" function.
+ Builder.restoreIP(AllocaIP);
+ Type *I32Type = Type::getInt32Ty(M.getContext());
+ Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
+ Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
+ Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
+ Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
+
+ // At the end of the preheader, prepare for calling the "init" function by
+ // storing the current loop bounds into the allocated space. A canonical loop
+ // always iterates from 0 to trip-count with step 1. Note that "init" expects
+ // and produces an inclusive upper bound.
+ Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
+ Constant *Zero = ConstantInt::get(IVTy, 0);
+ Constant *One = ConstantInt::get(IVTy, 1);
+ Builder.CreateStore(Zero, PLowerBound);
+ Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
+ Builder.CreateStore(UpperBound, PUpperBound);
+ Builder.CreateStore(One, PStride);
+
+ if (!Chunk)
+ Chunk = One;
+
+ Value *ThreadNum = getOrCreateThreadID(SrcLoc);
+
+ // TODO: extract scheduling type and map it to OMP constant. This is curently
+ // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first.
+ constexpr int StaticSchedType = 34;
+ Constant *SchedulingType = ConstantInt::get(I32Type, StaticSchedType);
+
+ // Call the "init" function and update the trip count of the loop with the
+ // value it produced.
+ Builder.CreateCall(StaticInit,
+ {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
+ PUpperBound, PStride, One, Chunk});
+ Value *LowerBound = Builder.CreateLoad(PLowerBound);
+ Value *InclusiveUpperBound = Builder.CreateLoad(PUpperBound);
+ Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
+ Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
+ setCanonicalLoopTripCount(CLI, TripCount);
+
+ // Update all uses of the induction variable except the one in the condition
+ // block that compares it with the actual upper bound, and the increment in
+ // the latch block.
+ // TODO: this can eventually move to CanonicalLoopInfo or to a new
+ // CanonicalLoopInfoUpdater interface.
+ Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
+ Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
+ IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
+ auto *Instr = dyn_cast<Instruction>(U.getUser());
+ return !Instr ||
+ (Instr->getParent() != CLI->getCond() &&
+ Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
+ });
+
+ // In the "exit" block, call the "fini" function.
+ Builder.SetInsertPoint(CLI->getExit(),
+ CLI->getExit()->getTerminator()->getIterator());
+ Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
+
+ // Add the barrier if requested.
+ if (NeedsBarrier)
+ createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+ omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
+ /* CheckCancelFlag */ false);
+
+ CLI->assertOK();
+ return CLI;
+}
+
+/// Make \p Source branch to \p Target.
+///
+/// Handles two situations:
+/// * \p Source already has an unconditional branch.
+/// * \p Source is a degenerate block (no terminator because the BB is
+/// the current head of the IR construction).
+static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) {
+ if (Instruction *Term = Source->getTerminator()) {
+ auto *Br = cast<BranchInst>(Term);
+ assert(!Br->isConditional() &&
+ "BB's terminator must be an unconditional branch (or degenerate)");
+ BasicBlock *Succ = Br->getSuccessor(0);
+ Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
+ Br->setSuccessor(0, Target);
+ return;
+ }
+
+ auto *NewBr = BranchInst::Create(Target, Source);
+ NewBr->setDebugLoc(DL);
+}
+
+/// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
+/// after this \p OldTarget will be orphaned.
+static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
+ BasicBlock *NewTarget, DebugLoc DL) {
+ for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
+ redirectTo(Pred, NewTarget, DL);
+}
+
+/// Determine which blocks in \p BBs are reachable from outside and remove the
+/// ones that are not reachable from the function.
+static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) {
+ SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
+ auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
+ for (Use &U : BB->uses()) {
+ auto *UseInst = dyn_cast<Instruction>(U.getUser());
+ if (!UseInst)
+ continue;
+ if (BBsToErase.count(UseInst->getParent()))
+ continue;
+ return true;
+ }
+ return false;
+ };
+
+ while (true) {
+ bool Changed = false;
+ for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
+ if (HasRemainingUses(BB)) {
+ BBsToErase.erase(BB);
+ Changed = true;
+ }
+ }
+ if (!Changed)
+ break;
+ }
+
+ SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
+ DeleteDeadBlocks(BBVec);
+}
+
+std::vector<CanonicalLoopInfo *>
+OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
+ ArrayRef<Value *> TileSizes) {
+ assert(TileSizes.size() == Loops.size() &&
+ "Must pass as many tile sizes as there are loops");
+ int NumLoops = Loops.size();
+ assert(NumLoops >= 1 && "At least one loop to tile required");
+
+ CanonicalLoopInfo *OutermostLoop = Loops.front();
+ CanonicalLoopInfo *InnermostLoop = Loops.back();
+ Function *F = OutermostLoop->getBody()->getParent();
+ BasicBlock *InnerEnter = InnermostLoop->getBody();
+ BasicBlock *InnerLatch = InnermostLoop->getLatch();
+
+ // Collect original trip counts and induction variable to be accessible by
+ // index. Also, the structure of the original loops is not preserved during
+ // the construction of the tiled loops, so do it before we scavenge the BBs of
+ // any original CanonicalLoopInfo.
+ SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
+ for (CanonicalLoopInfo *L : Loops) {
+ OrigTripCounts.push_back(L->getTripCount());
+ OrigIndVars.push_back(L->getIndVar());
+ }
+
+ // Collect the code between loop headers. These may contain SSA definitions
+ // that are used in the loop nest body. To be usable with in the innermost
+ // body, these BasicBlocks will be sunk into the loop nest body. That is,
+ // these instructions may be executed more often than before the tiling.
+ // TODO: It would be sufficient to only sink them into body of the
+ // corresponding tile loop.
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
+ for (int i = 0; i < NumLoops - 1; ++i) {
+ CanonicalLoopInfo *Surrounding = Loops[i];
+ CanonicalLoopInfo *Nested = Loops[i + 1];
+
+ BasicBlock *EnterBB = Surrounding->getBody();
+ BasicBlock *ExitBB = Nested->getHeader();
+ InbetweenCode.emplace_back(EnterBB, ExitBB);
+ }
+
+ // Compute the trip counts of the floor loops.
+ Builder.SetCurrentDebugLocation(DL);
+ Builder.restoreIP(OutermostLoop->getPreheaderIP());
+ SmallVector<Value *, 4> FloorCount, FloorRems;
+ for (int i = 0; i < NumLoops; ++i) {
+ Value *TileSize = TileSizes[i];
+ Value *OrigTripCount = OrigTripCounts[i];
+ Type *IVType = OrigTripCount->getType();
+
+ Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
+ Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
+
+ // 0 if tripcount divides the tilesize, 1 otherwise.
+ // 1 means we need an additional iteration for a partial tile.
+ //
+ // Unfortunately we cannot just use the roundup-formula
+ // (tripcount + tilesize - 1)/tilesize
+ // because the summation might overflow. We do not want introduce undefined
+ // behavior when the untiled loop nest did not.
+ Value *FloorTripOverflow =
+ Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
+
+ FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
+ FloorTripCount =
+ Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
+ "omp_floor" + Twine(i) + ".tripcount", true);
+
+ // Remember some values for later use.
+ FloorCount.push_back(FloorTripCount);
+ FloorRems.push_back(FloorTripRem);
+ }
+
+ // Generate the new loop nest, from the outermost to the innermost.
+ std::vector<CanonicalLoopInfo *> Result;
+ Result.reserve(NumLoops * 2);
+
+ // The basic block of the surrounding loop that enters the nest generated
+ // loop.
+ BasicBlock *Enter = OutermostLoop->getPreheader();
+
+ // The basic block of the surrounding loop where the inner code should
+ // continue.
+ BasicBlock *Continue = OutermostLoop->getAfter();
+
+ // Where the next loop basic block should be inserted.
+ BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
+
+ auto EmbeddNewLoop =
+ [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
+ Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
+ CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
+ DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
+ redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
+ redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
+
+ // Setup the position where the next embedded loop connects to this loop.
+ Enter = EmbeddedLoop->getBody();
+ Continue = EmbeddedLoop->getLatch();
+ OutroInsertBefore = EmbeddedLoop->getLatch();
+ return EmbeddedLoop;
+ };
+
+ auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
+ const Twine &NameBase) {
+ for (auto P : enumerate(TripCounts)) {
+ CanonicalLoopInfo *EmbeddedLoop =
+ EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
+ Result.push_back(EmbeddedLoop);
+ }
+ };
+
+ EmbeddNewLoops(FloorCount, "floor");
+
+ // Within the innermost floor loop, emit the code that computes the tile
+ // sizes.
+ Builder.SetInsertPoint(Enter->getTerminator());
+ SmallVector<Value *, 4> TileCounts;
+ for (int i = 0; i < NumLoops; ++i) {
+ CanonicalLoopInfo *FloorLoop = Result[i];
+ Value *TileSize = TileSizes[i];
+
+ Value *FloorIsEpilogue =
+ Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
+ Value *TileTripCount =
+ Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
+
+ TileCounts.push_back(TileTripCount);
+ }
+
+ // Create the tile loops.
+ EmbeddNewLoops(TileCounts, "tile");
+
+ // Insert the inbetween code into the body.
+ BasicBlock *BodyEnter = Enter;
+ BasicBlock *BodyEntered = nullptr;
+ for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
+ BasicBlock *EnterBB = P.first;
+ BasicBlock *ExitBB = P.second;
+
+ if (BodyEnter)
+ redirectTo(BodyEnter, EnterBB, DL);
+ else
+ redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
+
+ BodyEnter = nullptr;
+ BodyEntered = ExitBB;
+ }
+
+ // Append the original loop nest body into the generated loop nest body.
+ if (BodyEnter)
+ redirectTo(BodyEnter, InnerEnter, DL);
+ else
+ redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
+ redirectAllPredecessorsTo(InnerLatch, Continue, DL);
+
+ // Replace the original induction variable with an induction variable computed
+ // from the tile and floor induction variables.
+ Builder.restoreIP(Result.back()->getBodyIP());
+ for (int i = 0; i < NumLoops; ++i) {
+ CanonicalLoopInfo *FloorLoop = Result[i];
+ CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
+ Value *OrigIndVar = OrigIndVars[i];
+ Value *Size = TileSizes[i];
+
+ Value *Scale =
+ Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
+ Value *Shift =
+ Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
+ OrigIndVar->replaceAllUsesWith(Shift);
+ }
+
+ // Remove unused parts of the original loops.
+ SmallVector<BasicBlock *, 12> OldControlBBs;
+ OldControlBBs.reserve(6 * Loops.size());
+ for (CanonicalLoopInfo *Loop : Loops)
+ Loop->collectControlBlocks(OldControlBBs);
+ removeUnusedBlocksFromParent(OldControlBBs);
+
+#ifndef NDEBUG
+ for (CanonicalLoopInfo *GenL : Result)
+ GenL->assertOK();
+#endif
+ return Result;
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
+ llvm::Value *BufSize, llvm::Value *CpyBuf,
+ llvm::Value *CpyFn, llvm::Value *DidIt) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+
+ llvm::Value *DidItLD = Builder.CreateLoad(DidIt);
+
+ Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
+
+ Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
+ Builder.CreateCall(Fn, Args);
+
+ return Builder.saveIP();
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createSingle(const LocationDescription &Loc,
+ BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
+
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ // If needed (i.e. not null), initialize `DidIt` with 0
+ if (DidIt) {
+ Builder.CreateStore(Builder.getInt32(0), DidIt);
+ }
+
+ Directive OMPD = Directive::OMPD_single;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Value *Args[] = {Ident, ThreadId};
+
+ Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
+ Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
+
+ Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
+ Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
+
+ // generates the following:
+ // if (__kmpc_single()) {
+ // .... single region ...
+ // __kmpc_end_single
+ // }
+
+ return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
+ /*Conditional*/ true, /*hasFinalize*/ true);
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
@@ -959,7 +1630,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
ExitCall->getIterator());
}
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCopyinClauseBlocks(
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
if (!IP.isSet())
@@ -1009,7 +1680,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCopyinClauseBlocks(
return Builder.saveIP();
}
-CallInst *OpenMPIRBuilder::CreateOMPAlloc(const LocationDescription &Loc,
+CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
Value *Size, Value *Allocator,
std::string Name) {
IRBuilder<>::InsertPointGuard IPG(Builder);
@@ -1025,7 +1696,7 @@ CallInst *OpenMPIRBuilder::CreateOMPAlloc(const LocationDescription &Loc,
return Builder.CreateCall(Fn, Args, Name);
}
-CallInst *OpenMPIRBuilder::CreateOMPFree(const LocationDescription &Loc,
+CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
Value *Addr, Value *Allocator,
std::string Name) {
IRBuilder<>::InsertPointGuard IPG(Builder);
@@ -1039,7 +1710,7 @@ CallInst *OpenMPIRBuilder::CreateOMPFree(const LocationDescription &Loc,
return Builder.CreateCall(Fn, Args, Name);
}
-CallInst *OpenMPIRBuilder::CreateCachedThreadPrivate(
+CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
const LocationDescription &Loc, llvm::Value *Pointer,
llvm::ConstantInt *Size, const llvm::Twine &Name) {
IRBuilder<>::InsertPointGuard IPG(Builder);
@@ -1120,7 +1791,7 @@ void OpenMPIRBuilder::initializeTypes(Module &M) {
VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
VarName##Ptr = PointerType::getUnqual(VarName);
#define OMP_STRUCT_TYPE(VarName, StructName, ...) \
- T = M.getTypeByName(StructName); \
+ T = StructType::getTypeByName(Ctx, StructName); \
if (!T) \
T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
VarName = T; \
@@ -1144,3 +1815,102 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks(
Worklist.push_back(SuccBB);
}
}
+
+void CanonicalLoopInfo::collectControlBlocks(
+ SmallVectorImpl<BasicBlock *> &BBs) {
+ // We only count those BBs as control block for which we do not need to
+ // reverse the CFG, i.e. not the loop body which can contain arbitrary control
+ // flow. For consistency, this also means we do not add the Body block, which
+ // is just the entry to the body code.
+ BBs.reserve(BBs.size() + 6);
+ BBs.append({Preheader, Header, Cond, Latch, Exit, After});
+}
+
+void CanonicalLoopInfo::assertOK() const {
+#ifndef NDEBUG
+ if (!IsValid)
+ return;
+
+ // Verify standard control-flow we use for OpenMP loops.
+ assert(Preheader);
+ assert(isa<BranchInst>(Preheader->getTerminator()) &&
+ "Preheader must terminate with unconditional branch");
+ assert(Preheader->getSingleSuccessor() == Header &&
+ "Preheader must jump to header");
+
+ assert(Header);
+ assert(isa<BranchInst>(Header->getTerminator()) &&
+ "Header must terminate with unconditional branch");
+ assert(Header->getSingleSuccessor() == Cond &&
+ "Header must jump to exiting block");
+
+ assert(Cond);
+ assert(Cond->getSinglePredecessor() == Header &&
+ "Exiting block only reachable from header");
+
+ assert(isa<BranchInst>(Cond->getTerminator()) &&
+ "Exiting block must terminate with conditional branch");
+ assert(size(successors(Cond)) == 2 &&
+ "Exiting block must have two successors");
+ assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
+ "Exiting block's first successor jump to the body");
+ assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
+ "Exiting block's second successor must exit the loop");
+
+ assert(Body);
+ assert(Body->getSinglePredecessor() == Cond &&
+ "Body only reachable from exiting block");
+ assert(!isa<PHINode>(Body->front()));
+
+ assert(Latch);
+ assert(isa<BranchInst>(Latch->getTerminator()) &&
+ "Latch must terminate with unconditional branch");
+ assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
+ // TODO: To support simple redirecting of the end of the body code that has
+ // multiple; introduce another auxiliary basic block like preheader and after.
+ assert(Latch->getSinglePredecessor() != nullptr);
+ assert(!isa<PHINode>(Latch->front()));
+
+ assert(Exit);
+ assert(isa<BranchInst>(Exit->getTerminator()) &&
+ "Exit block must terminate with unconditional branch");
+ assert(Exit->getSingleSuccessor() == After &&
+ "Exit block must jump to after block");
+
+ assert(After);
+ assert(After->getSinglePredecessor() == Exit &&
+ "After block only reachable from exit block");
+ assert(After->empty() || !isa<PHINode>(After->front()));
+
+ Instruction *IndVar = getIndVar();
+ assert(IndVar && "Canonical induction variable not found?");
+ assert(isa<IntegerType>(IndVar->getType()) &&
+ "Induction variable must be an integer");
+ assert(cast<PHINode>(IndVar)->getParent() == Header &&
+ "Induction variable must be a PHI in the loop header");
+ assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
+ assert(
+ cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
+ assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
+
+ auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
+ assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
+ assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
+ assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
+ assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
+ ->isOne());
+
+ Value *TripCount = getTripCount();
+ assert(TripCount && "Loop trip count not found?");
+ assert(IndVar->getType() == TripCount->getType() &&
+ "Trip count and induction variable must have the same type");
+
+ auto *CmpI = cast<CmpInst>(&Cond->front());
+ assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
+ "Exit condition must be a signed less-than comparison");
+ assert(CmpI->getOperand(0) == IndVar &&
+ "Exit condition must compare the induction variable");
+ assert(CmpI->getOperand(1) == TripCount &&
+ "Exit condition must compare with the trip count");
+#endif
+}
diff --git a/contrib/llvm-project/llvm/lib/FuzzMutate/IRMutator.cpp b/contrib/llvm-project/llvm/lib/FuzzMutate/IRMutator.cpp
index 2fc65981f1db..33b90097ab2c 100644
--- a/contrib/llvm-project/llvm/lib/FuzzMutate/IRMutator.cpp
+++ b/contrib/llvm-project/llvm/lib/FuzzMutate/IRMutator.cpp
@@ -197,3 +197,46 @@ void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
Inst.replaceAllUsesWith(RS.getSelection());
Inst.eraseFromParent();
}
+
+void InstModificationIRStrategy::mutate(Instruction &Inst,
+ RandomIRBuilder &IB) {
+ SmallVector<std::function<void()>, 8> Modifications;
+ CmpInst *CI = nullptr;
+ GetElementPtrInst *GEP = nullptr;
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::Sub:
+ case Instruction::Shl:
+ Modifications.push_back([&Inst]() { Inst.setHasNoSignedWrap(true); }),
+ Modifications.push_back([&Inst]() { Inst.setHasNoSignedWrap(false); });
+ Modifications.push_back([&Inst]() { Inst.setHasNoUnsignedWrap(true); });
+ Modifications.push_back([&Inst]() { Inst.setHasNoUnsignedWrap(false); });
+
+ break;
+ case Instruction::ICmp:
+ CI = cast<ICmpInst>(&Inst);
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_EQ); });
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_NE); });
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_UGT); });
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_UGE); });
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_ULT); });
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_ULE); });
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_SGT); });
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_SGE); });
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_SLT); });
+ Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_SLE); });
+ break;
+ case Instruction::GetElementPtr:
+ GEP = cast<GetElementPtrInst>(&Inst);
+ Modifications.push_back([GEP]() { GEP->setIsInBounds(true); });
+ Modifications.push_back([GEP]() { GEP->setIsInBounds(false); });
+ break;
+ }
+
+ auto RS = makeSampler(IB.Rand, Modifications);
+ if (RS)
+ RS.getSelection()();
+}
diff --git a/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
index fd08310316b3..69abf8769e4b 100644
--- a/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
@@ -116,6 +116,15 @@ struct OrderMap {
} // end anonymous namespace
+/// Look for a value that might be wrapped as metadata, e.g. a value in a
+/// metadata operand. Returns the input value as-is if it is not wrapped.
+static const Value *skipMetadataWrapper(const Value *V) {
+ if (const auto *MAV = dyn_cast<MetadataAsValue>(V))
+ if (const auto *VAM = dyn_cast<ValueAsMetadata>(MAV->getMetadata()))
+ return VAM->getValue();
+ return V;
+}
+
static void orderValue(const Value *V, OrderMap &OM) {
if (OM.lookup(V).first)
return;
@@ -132,8 +141,6 @@ static void orderValue(const Value *V, OrderMap &OM) {
}
static OrderMap orderModule(const Module *M) {
- // This needs to match the order used by ValueEnumerator::ValueEnumerator()
- // and ValueEnumerator::incorporateFunction().
OrderMap OM;
for (const GlobalVariable &G : M->globals()) {
@@ -167,10 +174,12 @@ static OrderMap orderModule(const Module *M) {
for (const BasicBlock &BB : F) {
orderValue(&BB, OM);
for (const Instruction &I : BB) {
- for (const Value *Op : I.operands())
+ for (const Value *Op : I.operands()) {
+ Op = skipMetadataWrapper(Op);
if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
isa<InlineAsm>(*Op))
orderValue(Op, OM);
+ }
orderValue(&I, OM);
}
}
@@ -284,9 +293,11 @@ static UseListOrderStack predictUseListOrder(const Module *M) {
predictValueUseListOrder(&A, &F, OM, Stack);
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
- for (const Value *Op : I.operands())
+ for (const Value *Op : I.operands()) {
+ Op = skipMetadataWrapper(Op);
if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues.
predictValueUseListOrder(Op, &F, OM, Stack);
+ }
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
predictValueUseListOrder(&I, &F, OM, Stack);
@@ -388,6 +399,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break;
case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break;
case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
+ case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break;
}
}
@@ -597,6 +609,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
case Type::LabelTyID: OS << "label"; return;
case Type::MetadataTyID: OS << "metadata"; return;
case Type::X86_MMXTyID: OS << "x86_mmx"; return;
+ case Type::X86_AMXTyID: OS << "x86_amx"; return;
case Type::TokenTyID: OS << "token"; return;
case Type::IntegerTyID:
OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
@@ -656,9 +669,9 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
VectorType *PTy = cast<VectorType>(Ty);
ElementCount EC = PTy->getElementCount();
OS << "<";
- if (EC.Scalable)
+ if (EC.isScalable())
OS << "vscale x ";
- OS << EC.Min << " x ";
+ OS << EC.getKnownMinValue() << " x ";
print(PTy->getElementType(), OS);
OS << '>';
return;
@@ -1355,9 +1368,8 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
// "Inf" or NaN, that atof will accept, but the lexer will not. Check
// that the string matches the "[-+]?[0-9]" regex.
//
- assert(((StrVal[0] >= '0' && StrVal[0] <= '9') ||
- ((StrVal[0] == '-' || StrVal[0] == '+') &&
- (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+ assert((isDigit(StrVal[0]) || ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ isDigit(StrVal[1]))) &&
"[-+]?[0-9] regex does not match!");
// Reparse stringized version!
if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) {
@@ -1373,9 +1385,19 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
"assuming that double is 64 bits!");
APFloat apf = APF;
// Floats are represented in ASCII IR as double, convert.
- if (!isDouble)
+ // FIXME: We should allow 32-bit hex float and remove this.
+ if (!isDouble) {
+ // A signaling NaN is quieted on conversion, so we need to recreate the
+ // expected value after convert (quiet bit of the payload is clear).
+ bool IsSNAN = apf.isSignaling();
apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
- &ignored);
+ &ignored);
+ if (IsSNAN) {
+ APInt Payload = apf.bitcastToAPInt();
+ apf = APFloat::getSNaN(APFloat::IEEEdouble(), apf.isNegative(),
+ &Payload);
+ }
+ }
Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true);
return;
}
@@ -1433,6 +1455,13 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
return;
}
+ if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV)) {
+ Out << "dso_local_equivalent ";
+ WriteAsOperandInternal(Out, Equiv->getGlobalValue(), &TypePrinter, Machine,
+ Context);
+ return;
+ }
+
if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
Type *ETy = CA->getType()->getElementType();
Out << '[';
@@ -1511,7 +1540,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
}
if (isa<ConstantVector>(CV) || isa<ConstantDataVector>(CV)) {
- auto *CVVTy = cast<VectorType>(CV->getType());
+ auto *CVVTy = cast<FixedVectorType>(CV->getType());
Type *ETy = CVVTy->getElementType();
Out << '<';
TypePrinter.print(ETy, Out);
@@ -1539,6 +1568,11 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
return;
}
+ if (isa<PoisonValue>(CV)) {
+ Out << "poison";
+ return;
+ }
+
if (isa<UndefValue>(CV)) {
Out << "undef";
return;
@@ -1889,6 +1923,57 @@ static void writeDISubrange(raw_ostream &Out, const DISubrange *N,
Out << ")";
}
+static void writeDIGenericSubrange(raw_ostream &Out, const DIGenericSubrange *N,
+ TypePrinting *TypePrinter,
+ SlotTracker *Machine,
+ const Module *Context) {
+ Out << "!DIGenericSubrange(";
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+
+ auto IsConstant = [&](Metadata *Bound) -> bool {
+ if (auto *BE = dyn_cast_or_null<DIExpression>(Bound)) {
+ return BE->isSignedConstant();
+ }
+ return false;
+ };
+
+ auto GetConstant = [&](Metadata *Bound) -> int64_t {
+ assert(IsConstant(Bound) && "Expected constant");
+ auto *BE = dyn_cast_or_null<DIExpression>(Bound);
+ return static_cast<int64_t>(BE->getElement(1));
+ };
+
+ auto *Count = N->getRawCountNode();
+ if (IsConstant(Count))
+ Printer.printInt("count", GetConstant(Count),
+ /* ShouldSkipZero */ false);
+ else
+ Printer.printMetadata("count", Count, /*ShouldSkipNull */ true);
+
+ auto *LBound = N->getRawLowerBound();
+ if (IsConstant(LBound))
+ Printer.printInt("lowerBound", GetConstant(LBound),
+ /* ShouldSkipZero */ false);
+ else
+ Printer.printMetadata("lowerBound", LBound, /*ShouldSkipNull */ true);
+
+ auto *UBound = N->getRawUpperBound();
+ if (IsConstant(UBound))
+ Printer.printInt("upperBound", GetConstant(UBound),
+ /* ShouldSkipZero */ false);
+ else
+ Printer.printMetadata("upperBound", UBound, /*ShouldSkipNull */ true);
+
+ auto *Stride = N->getRawStride();
+ if (IsConstant(Stride))
+ Printer.printInt("stride", GetConstant(Stride),
+ /* ShouldSkipZero */ false);
+ else
+ Printer.printMetadata("stride", Stride, /*ShouldSkipNull */ true);
+
+ Out << ")";
+}
+
static void writeDIEnumerator(raw_ostream &Out, const DIEnumerator *N,
TypePrinting *, SlotTracker *, const Module *) {
Out << "!DIEnumerator(";
@@ -1916,6 +2001,23 @@ static void writeDIBasicType(raw_ostream &Out, const DIBasicType *N,
Out << ")";
}
+static void writeDIStringType(raw_ostream &Out, const DIStringType *N,
+ TypePrinting *TypePrinter, SlotTracker *Machine,
+ const Module *Context) {
+ Out << "!DIStringType(";
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ if (N->getTag() != dwarf::DW_TAG_string_type)
+ Printer.printTag(N);
+ Printer.printString("name", N->getName());
+ Printer.printMetadata("stringLength", N->getRawStringLength());
+ Printer.printMetadata("stringLengthExpression", N->getRawStringLengthExp());
+ Printer.printInt("size", N->getSizeInBits());
+ Printer.printInt("align", N->getAlignInBits());
+ Printer.printDwarfEnum("encoding", N->getEncoding(),
+ dwarf::AttributeEncodingString);
+ Out << ")";
+}
+
static void writeDIDerivedType(raw_ostream &Out, const DIDerivedType *N,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
@@ -1962,6 +2064,13 @@ static void writeDICompositeType(raw_ostream &Out, const DICompositeType *N,
Printer.printString("identifier", N->getIdentifier());
Printer.printMetadata("discriminator", N->getRawDiscriminator());
Printer.printMetadata("dataLocation", N->getRawDataLocation());
+ Printer.printMetadata("associated", N->getRawAssociated());
+ Printer.printMetadata("allocated", N->getRawAllocated());
+ if (auto *RankConst = N->getRankConst())
+ Printer.printInt("rank", RankConst->getSExtValue(),
+ /* ShouldSkipZero */ false);
+ else
+ Printer.printMetadata("rank", N->getRawRank(), /*ShouldSkipNull */ true);
Out << ")";
}
@@ -2136,6 +2245,7 @@ static void writeDIModule(raw_ostream &Out, const DIModule *N,
Printer.printString("apinotes", N->getAPINotesFile());
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLineNo());
+ Printer.printBool("isDecl", N->getIsDecl(), /* Default */ false);
Out << ")";
}
@@ -3087,7 +3197,7 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
printTypeIdInfo(*TIdInfo);
auto PrintRange = [&](const ConstantRange &Range) {
- Out << "[" << Range.getLower() << ", " << Range.getSignedMax() << "]";
+ Out << "[" << Range.getSignedMin() << ", " << Range.getSignedMax() << "]";
};
if (!FS->paramAccesses().empty()) {
@@ -3103,7 +3213,7 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
FieldSeparator IFS;
for (auto &Call : PS.Calls) {
Out << IFS;
- Out << "(callee: ^" << Machine.getGUIDSlot(Call.Callee);
+ Out << "(callee: ^" << Machine.getGUIDSlot(Call.Callee.getGUID());
Out << ", param: " << Call.ParamNo;
Out << ", offset: ";
PrintRange(Call.Offsets);
@@ -4269,11 +4379,17 @@ void AssemblyWriter::writeAttribute(const Attribute &Attr, bool InAttrGroup) {
}
assert((Attr.hasAttribute(Attribute::ByVal) ||
+ Attr.hasAttribute(Attribute::StructRet) ||
+ Attr.hasAttribute(Attribute::ByRef) ||
Attr.hasAttribute(Attribute::Preallocated)) &&
"unexpected type attr");
if (Attr.hasAttribute(Attribute::ByVal)) {
Out << "byval";
+ } else if (Attr.hasAttribute(Attribute::StructRet)) {
+ Out << "sret";
+ } else if (Attr.hasAttribute(Attribute::ByRef)) {
+ Out << "byref";
} else {
Out << "preallocated";
}
@@ -4366,7 +4482,7 @@ void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
void BasicBlock::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
bool ShouldPreserveUseListOrder,
bool IsForDebug) const {
- SlotTracker SlotTable(this->getModule());
+ SlotTracker SlotTable(this->getParent());
formatted_raw_ostream OS(ROS);
AssemblyWriter W(OS, SlotTable, this->getModule(), AAW,
IsForDebug,
diff --git a/contrib/llvm-project/llvm/lib/IR/Assumptions.cpp b/contrib/llvm-project/llvm/lib/IR/Assumptions.cpp
new file mode 100644
index 000000000000..1bd8b7f51e67
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/IR/Assumptions.cpp
@@ -0,0 +1,36 @@
+//===- Assumptions.cpp ------ Collection of helpers for assumptions -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+bool llvm::hasAssumption(Function &F,
+ const KnownAssumptionString &AssumptionStr) {
+ const Attribute &A = F.getFnAttribute(AssumptionAttrKey);
+ if (!A.isValid())
+ return false;
+ assert(A.isStringAttribute() && "Expected a string attribute!");
+
+ SmallVector<StringRef, 8> Strings;
+ A.getValueAsString().split(Strings, ",");
+
+ return llvm::any_of(Strings, [=](StringRef Assumption) {
+ return Assumption == AssumptionStr;
+ });
+}
+
+StringSet<> llvm::KnownAssumptionStrings({
+ "omp_no_openmp", // OpenMP 5.1
+ "omp_no_openmp_routines", // OpenMP 5.1
+ "omp_no_parallelism", // OpenMP 5.1
+});
diff --git a/contrib/llvm-project/llvm/lib/IR/AttributeImpl.h b/contrib/llvm-project/llvm/lib/IR/AttributeImpl.h
index 5c334348cde3..c69fe3fe0827 100644
--- a/contrib/llvm-project/llvm/lib/IR/AttributeImpl.h
+++ b/contrib/llvm-project/llvm/lib/IR/AttributeImpl.h
@@ -121,7 +121,10 @@ protected:
public:
EnumAttributeImpl(Attribute::AttrKind Kind)
- : AttributeImpl(EnumAttrEntry), Kind(Kind) {}
+ : AttributeImpl(EnumAttrEntry), Kind(Kind) {
+ assert(Kind != Attribute::AttrKind::None &&
+ "Can't create a None attribute!");
+ }
Attribute::AttrKind getEnumKind() const { return Kind; }
};
@@ -251,6 +254,8 @@ public:
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
std::string getAsString(bool InAttrGrp) const;
Type *getByValType() const;
+ Type *getStructRetType() const;
+ Type *getByRefType() const;
Type *getPreallocatedType() const;
using iterator = const Attribute *;
diff --git a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp
index f67d96a854f4..c4629decc6d9 100644
--- a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp
@@ -172,6 +172,14 @@ Attribute Attribute::getWithByValType(LLVMContext &Context, Type *Ty) {
return get(Context, ByVal, Ty);
}
+Attribute Attribute::getWithStructRetType(LLVMContext &Context, Type *Ty) {
+ return get(Context, StructRet, Ty);
+}
+
+Attribute Attribute::getWithByRefType(LLVMContext &Context, Type *Ty) {
+ return get(Context, ByRef, Ty);
+}
+
Attribute Attribute::getWithPreallocatedType(LLVMContext &Context, Type *Ty) {
return get(Context, Preallocated, Ty);
}
@@ -363,6 +371,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "noalias";
if (hasAttribute(Attribute::NoBuiltin))
return "nobuiltin";
+ if (hasAttribute(Attribute::NoCallback))
+ return "nocallback";
if (hasAttribute(Attribute::NoCapture))
return "nocapture";
if (hasAttribute(Attribute::NoDuplicate))
@@ -393,6 +403,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "nocf_check";
if (hasAttribute(Attribute::NoRecurse))
return "norecurse";
+ if (hasAttribute(Attribute::NoProfile))
+ return "noprofile";
if (hasAttribute(Attribute::NoUnwind))
return "nounwind";
if (hasAttribute(Attribute::OptForFuzzing))
@@ -429,8 +441,6 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "shadowcallstack";
if (hasAttribute(Attribute::StrictFP))
return "strictfp";
- if (hasAttribute(Attribute::StructRet))
- return "sret";
if (hasAttribute(Attribute::SanitizeThread))
return "sanitize_thread";
if (hasAttribute(Attribute::SanitizeMemory))
@@ -441,14 +451,19 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "zeroext";
if (hasAttribute(Attribute::Cold))
return "cold";
+ if (hasAttribute(Attribute::Hot))
+ return "hot";
if (hasAttribute(Attribute::ImmArg))
return "immarg";
if (hasAttribute(Attribute::NoUndef))
return "noundef";
+ if (hasAttribute(Attribute::MustProgress))
+ return "mustprogress";
- if (hasAttribute(Attribute::ByVal)) {
+ const bool IsByVal = hasAttribute(Attribute::ByVal);
+ if (IsByVal || hasAttribute(Attribute::StructRet)) {
std::string Result;
- Result += "byval";
+ Result += IsByVal ? "byval" : "sret";
if (Type *Ty = getValueAsType()) {
raw_string_ostream OS(Result);
Result += '(';
@@ -459,9 +474,9 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return Result;
}
- if (hasAttribute(Attribute::Preallocated)) {
- std::string Result;
- Result += "preallocated";
+ const bool IsByRef = hasAttribute(Attribute::ByRef);
+ if (IsByRef || hasAttribute(Attribute::Preallocated)) {
+ std::string Result = IsByRef ? "byref" : "preallocated";
raw_string_ostream OS(Result);
Result += '(';
getValueAsType()->print(OS, false, true);
@@ -742,10 +757,18 @@ uint64_t AttributeSet::getDereferenceableOrNullBytes() const {
return SetNode ? SetNode->getDereferenceableOrNullBytes() : 0;
}
+Type *AttributeSet::getByRefType() const {
+ return SetNode ? SetNode->getByRefType() : nullptr;
+}
+
Type *AttributeSet::getByValType() const {
return SetNode ? SetNode->getByValType() : nullptr;
}
+Type *AttributeSet::getStructRetType() const {
+ return SetNode ? SetNode->getStructRetType() : nullptr;
+}
+
Type *AttributeSet::getPreallocatedType() const {
return SetNode ? SetNode->getPreallocatedType() : nullptr;
}
@@ -842,6 +865,12 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
case Attribute::ByVal:
Attr = Attribute::getWithByValType(C, B.getByValType());
break;
+ case Attribute::StructRet:
+ Attr = Attribute::getWithStructRetType(C, B.getStructRetType());
+ break;
+ case Attribute::ByRef:
+ Attr = Attribute::getWithByRefType(C, B.getByRefType());
+ break;
case Attribute::Preallocated:
Attr = Attribute::getWithPreallocatedType(C, B.getPreallocatedType());
break;
@@ -925,14 +954,25 @@ MaybeAlign AttributeSetNode::getStackAlignment() const {
Type *AttributeSetNode::getByValType() const {
if (auto A = findEnumAttribute(Attribute::ByVal))
return A->getValueAsType();
- return 0;
+ return nullptr;
+}
+
+Type *AttributeSetNode::getStructRetType() const {
+ if (auto A = findEnumAttribute(Attribute::StructRet))
+ return A->getValueAsType();
+ return nullptr;
+}
+
+Type *AttributeSetNode::getByRefType() const {
+ if (auto A = findEnumAttribute(Attribute::ByRef))
+ return A->getValueAsType();
+ return nullptr;
}
Type *AttributeSetNode::getPreallocatedType() const {
- for (const auto &I : *this)
- if (I.hasAttribute(Attribute::Preallocated))
- return I.getValueAsType();
- return 0;
+ if (auto A = findEnumAttribute(Attribute::Preallocated))
+ return A->getValueAsType();
+ return nullptr;
}
uint64_t AttributeSetNode::getDereferenceableBytes() const {
@@ -970,7 +1010,7 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
/// Map from AttributeList index to the internal array index. Adding one happens
/// to work, because -1 wraps around to 0.
-static constexpr unsigned attrIdxToArrayIdx(unsigned Index) {
+static unsigned attrIdxToArrayIdx(unsigned Index) {
return Index + 1;
}
@@ -983,9 +1023,7 @@ AttributeListImpl::AttributeListImpl(ArrayRef<AttributeSet> Sets)
// Initialize AvailableFunctionAttrs and AvailableSomewhereAttrs
// summary bitsets.
- static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U,
- "function should be stored in slot 0");
- for (const auto &I : Sets[0])
+ for (const auto &I : Sets[attrIdxToArrayIdx(AttributeList::FunctionIndex)])
if (!I.isStringAttribute())
AvailableFunctionAttrs.addAttribute(I.getKindAsEnum());
@@ -1073,10 +1111,10 @@ AttributeList::get(LLVMContext &C,
return LHS.first < RHS.first;
}) &&
"Misordered Attributes list!");
- assert(llvm::none_of(Attrs,
- [](const std::pair<unsigned, Attribute> &Pair) {
- return Pair.second.hasAttribute(Attribute::None);
- }) &&
+ assert(llvm::all_of(Attrs,
+ [](const std::pair<unsigned, Attribute> &Pair) {
+ return Pair.second.isValid();
+ }) &&
"Pointless attribute!");
// Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes
@@ -1164,7 +1202,7 @@ AttributeList AttributeList::get(LLVMContext &C, AttributeSet FnAttrs,
if (NumSets > 2) {
// Drop the empty argument attribute sets at the end.
ArgAttrs = ArgAttrs.take_front(NumSets - 2);
- AttrSets.insert(AttrSets.end(), ArgAttrs.begin(), ArgAttrs.end());
+ llvm::append_range(AttrSets, ArgAttrs);
}
return getImpl(C, AttrSets);
@@ -1236,9 +1274,11 @@ AttributeList AttributeList::get(LLVMContext &C,
AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
Attribute::AttrKind Kind) const {
if (hasAttribute(Index, Kind)) return *this;
- AttrBuilder B;
- B.addAttribute(Kind);
- return addAttributes(C, Index, B);
+ AttributeSet Attrs = getAttributes(Index);
+ // TODO: Insert at correct position and avoid sort.
+ SmallVector<Attribute, 8> NewAttrs(Attrs.begin(), Attrs.end());
+ NewAttrs.push_back(Attribute::get(C, Kind));
+ return setAttributes(C, Index, AttributeSet::get(C, NewAttrs));
}
AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
@@ -1256,6 +1296,16 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
return addAttributes(C, Index, B);
}
+AttributeList AttributeList::setAttributes(LLVMContext &C, unsigned Index,
+ AttributeSet Attrs) const {
+ Index = attrIdxToArrayIdx(Index);
+ SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
+ if (Index >= AttrSets.size())
+ AttrSets.resize(Index + 1);
+ AttrSets[Index] = Attrs;
+ return AttributeList::getImpl(C, AttrSets);
+}
+
AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
const AttrBuilder &B) const {
if (!B.hasAttributes())
@@ -1273,16 +1323,9 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
"Attempt to change alignment!");
#endif
- Index = attrIdxToArrayIdx(Index);
- SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
- if (Index >= AttrSets.size())
- AttrSets.resize(Index + 1);
-
- AttrBuilder Merged(AttrSets[Index]);
+ AttrBuilder Merged(getAttributes(Index));
Merged.merge(B);
- AttrSets[Index] = AttributeSet::get(C, Merged);
-
- return getImpl(C, AttrSets);
+ return setAttributes(C, Index, AttributeSet::get(C, Merged));
}
AttributeList AttributeList::addParamAttribute(LLVMContext &C,
@@ -1452,6 +1495,14 @@ Type *AttributeList::getParamByValType(unsigned Index) const {
return getAttributes(Index+FirstArgIndex).getByValType();
}
+Type *AttributeList::getParamStructRetType(unsigned Index) const {
+ return getAttributes(Index + FirstArgIndex).getStructRetType();
+}
+
+Type *AttributeList::getParamByRefType(unsigned Index) const {
+ return getAttributes(Index + FirstArgIndex).getByRefType();
+}
+
Type *AttributeList::getParamPreallocatedType(unsigned Index) const {
return getAttributes(Index + FirstArgIndex).getPreallocatedType();
}
@@ -1537,17 +1588,11 @@ void AttrBuilder::clear() {
DerefBytes = DerefOrNullBytes = 0;
AllocSizeArgs = 0;
ByValType = nullptr;
+ StructRetType = nullptr;
+ ByRefType = nullptr;
PreallocatedType = nullptr;
}
-AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
- assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!");
- assert(!Attribute::doesAttrKindHaveArgument(Val) &&
- "Adding integer attribute without adding a value!");
- Attrs[Val] = true;
- return *this;
-}
-
AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
if (Attr.isStringAttribute()) {
addAttribute(Attr.getKindAsString(), Attr.getValueAsString());
@@ -1563,6 +1608,10 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
StackAlignment = Attr.getStackAlignment();
else if (Kind == Attribute::ByVal)
ByValType = Attr.getValueAsType();
+ else if (Kind == Attribute::StructRet)
+ StructRetType = Attr.getValueAsType();
+ else if (Kind == Attribute::ByRef)
+ ByRefType = Attr.getValueAsType();
else if (Kind == Attribute::Preallocated)
PreallocatedType = Attr.getValueAsType();
else if (Kind == Attribute::Dereferenceable)
@@ -1589,6 +1638,10 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
StackAlignment.reset();
else if (Val == Attribute::ByVal)
ByValType = nullptr;
+ else if (Val == Attribute::StructRet)
+ StructRetType = nullptr;
+ else if (Val == Attribute::ByRef)
+ ByRefType = nullptr;
else if (Val == Attribute::Preallocated)
PreallocatedType = nullptr;
else if (Val == Attribute::Dereferenceable)
@@ -1679,6 +1732,18 @@ AttrBuilder &AttrBuilder::addByValAttr(Type *Ty) {
return *this;
}
+AttrBuilder &AttrBuilder::addStructRetAttr(Type *Ty) {
+ Attrs[Attribute::StructRet] = true;
+ StructRetType = Ty;
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::addByRefAttr(Type *Ty) {
+ Attrs[Attribute::ByRef] = true;
+ ByRefType = Ty;
+ return *this;
+}
+
AttrBuilder &AttrBuilder::addPreallocatedAttr(Type *Ty) {
Attrs[Attribute::Preallocated] = true;
PreallocatedType = Ty;
@@ -1705,6 +1770,12 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
if (!ByValType)
ByValType = B.ByValType;
+ if (!StructRetType)
+ StructRetType = B.StructRetType;
+
+ if (!ByRefType)
+ ByRefType = B.ByRefType;
+
if (!PreallocatedType)
PreallocatedType = B.PreallocatedType;
@@ -1736,6 +1807,12 @@ AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) {
if (B.ByValType)
ByValType = nullptr;
+ if (B.StructRetType)
+ StructRetType = nullptr;
+
+ if (B.ByRefType)
+ ByRefType = nullptr;
+
if (B.PreallocatedType)
PreallocatedType = nullptr;
@@ -1788,7 +1865,7 @@ bool AttrBuilder::hasAlignmentAttr() const {
return Alignment != 0;
}
-bool AttrBuilder::operator==(const AttrBuilder &B) {
+bool AttrBuilder::operator==(const AttrBuilder &B) const {
if (Attrs != B.Attrs)
return false;
@@ -1799,6 +1876,7 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
return Alignment == B.Alignment && StackAlignment == B.StackAlignment &&
DerefBytes == B.DerefBytes && ByValType == B.ByValType &&
+ StructRetType == B.StructRetType && ByRefType == B.ByRefType &&
PreallocatedType == B.PreallocatedType;
}
@@ -1821,14 +1899,20 @@ AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) {
.addAttribute(Attribute::NoAlias)
.addAttribute(Attribute::NoCapture)
.addAttribute(Attribute::NonNull)
+ .addAlignmentAttr(1) // the int here is ignored
.addDereferenceableAttr(1) // the int here is ignored
.addDereferenceableOrNullAttr(1) // the int here is ignored
.addAttribute(Attribute::ReadNone)
.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::StructRet)
.addAttribute(Attribute::InAlloca)
.addPreallocatedAttr(Ty)
- .addByValAttr(Ty);
+ .addByValAttr(Ty)
+ .addStructRetAttr(Ty)
+ .addByRefAttr(Ty);
+
+ // Some attributes can apply to all "values" but there are no `void` values.
+ if (Ty->isVoidTy())
+ Incompatible.addAttribute(Attribute::NoUndef);
return Incompatible;
}
@@ -1866,6 +1950,16 @@ static void setOR(Function &Caller, const Function &Callee) {
/// If the inlined function had a higher stack protection level than the
/// calling function, then bump up the caller's stack protection level.
static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) {
+#ifndef NDEBUG
+ if (!Callee.hasFnAttribute(Attribute::AlwaysInline)) {
+ assert(!(!Callee.hasStackProtectorFnAttr() &&
+ Caller.hasStackProtectorFnAttr()) &&
+ "stack protected caller but callee requested no stack protector");
+ assert(!(!Caller.hasStackProtectorFnAttr() &&
+ Callee.hasStackProtectorFnAttr()) &&
+ "stack protected callee but caller requested no stack protector");
+ }
+#endif
// If upgrading the SSP attribute, clear out the old SSP Attributes first.
// Having multiple SSP attributes doesn't actually hurt, but it adds useless
// clutter to the IR.
@@ -1901,21 +1995,19 @@ static void adjustCallerStackProbes(Function &Caller, const Function &Callee) {
/// that is no larger.
static void
adjustCallerStackProbeSize(Function &Caller, const Function &Callee) {
- if (Callee.hasFnAttribute("stack-probe-size")) {
- uint64_t CalleeStackProbeSize;
- Callee.getFnAttribute("stack-probe-size")
- .getValueAsString()
- .getAsInteger(0, CalleeStackProbeSize);
- if (Caller.hasFnAttribute("stack-probe-size")) {
- uint64_t CallerStackProbeSize;
- Caller.getFnAttribute("stack-probe-size")
- .getValueAsString()
- .getAsInteger(0, CallerStackProbeSize);
+ Attribute CalleeAttr = Callee.getFnAttribute("stack-probe-size");
+ if (CalleeAttr.isValid()) {
+ Attribute CallerAttr = Caller.getFnAttribute("stack-probe-size");
+ if (CallerAttr.isValid()) {
+ uint64_t CallerStackProbeSize, CalleeStackProbeSize;
+ CallerAttr.getValueAsString().getAsInteger(0, CallerStackProbeSize);
+ CalleeAttr.getValueAsString().getAsInteger(0, CalleeStackProbeSize);
+
if (CallerStackProbeSize > CalleeStackProbeSize) {
- Caller.addFnAttr(Callee.getFnAttribute("stack-probe-size"));
+ Caller.addFnAttr(CalleeAttr);
}
} else {
- Caller.addFnAttr(Callee.getFnAttribute("stack-probe-size"));
+ Caller.addFnAttr(CalleeAttr);
}
}
}
@@ -1931,18 +2023,15 @@ adjustCallerStackProbeSize(Function &Caller, const Function &Callee) {
/// handled as part of inline cost analysis.
static void
adjustMinLegalVectorWidth(Function &Caller, const Function &Callee) {
- if (Caller.hasFnAttribute("min-legal-vector-width")) {
- if (Callee.hasFnAttribute("min-legal-vector-width")) {
- uint64_t CallerVectorWidth;
- Caller.getFnAttribute("min-legal-vector-width")
- .getValueAsString()
- .getAsInteger(0, CallerVectorWidth);
- uint64_t CalleeVectorWidth;
- Callee.getFnAttribute("min-legal-vector-width")
- .getValueAsString()
- .getAsInteger(0, CalleeVectorWidth);
+ Attribute CallerAttr = Caller.getFnAttribute("min-legal-vector-width");
+ if (CallerAttr.isValid()) {
+ Attribute CalleeAttr = Callee.getFnAttribute("min-legal-vector-width");
+ if (CalleeAttr.isValid()) {
+ uint64_t CallerVectorWidth, CalleeVectorWidth;
+ CallerAttr.getValueAsString().getAsInteger(0, CallerVectorWidth);
+ CalleeAttr.getValueAsString().getAsInteger(0, CalleeVectorWidth);
if (CallerVectorWidth < CalleeVectorWidth)
- Caller.addFnAttr(Callee.getFnAttribute("min-legal-vector-width"));
+ Caller.addFnAttr(CalleeAttr);
} else {
// If the callee doesn't have the attribute then we don't know anything
// and must drop the attribute from the caller.
@@ -2009,7 +2098,25 @@ bool AttributeFuncs::areInlineCompatible(const Function &Caller,
return hasCompatibleFnAttrs(Caller, Callee);
}
+bool AttributeFuncs::areOutlineCompatible(const Function &A,
+ const Function &B) {
+ return hasCompatibleFnAttrs(A, B);
+}
+
void AttributeFuncs::mergeAttributesForInlining(Function &Caller,
const Function &Callee) {
mergeFnAttrs(Caller, Callee);
}
+
+void AttributeFuncs::mergeAttributesForOutlining(Function &Base,
+ const Function &ToMerge) {
+
+ // We merge functions so that they meet the most general case.
+ // For example, if the NoNansFPMathAttr is set in one function, but not in
+ // the other, in the merged function we can say that the NoNansFPMathAttr
+ // is not set.
+ // However if we have the SpeculativeLoadHardeningAttr set true in one
+ // function, but not the other, we make sure that the function retains
+ // that aspect in the merged function.
+ mergeFnAttrs(Base, ToMerge);
+}
diff --git a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
index 1e8fdb506619..7d83cf5dcf1d 100644
--- a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsX86.h"
@@ -68,6 +69,19 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
return true;
}
+// Upgrade the declaration of fp compare intrinsics that change return type
+// from scalar to vXi1 mask.
+static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
+ Function *&NewFn) {
+ // Check if the return type is a vector.
+ if (F->getReturnType()->isVectorTy())
+ return false;
+
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
+ return true;
+}
+
static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
// All of the intrinsics matches below should be marked with which llvm
// version started autoupgrading them. At some point in the future we would
@@ -241,7 +255,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
- Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
+ Name.startswith("avx512.cmp.p") || // Added in 12.0
Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
@@ -456,6 +470,24 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
if (Name == "avx2.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
+ if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
+ NewFn);
+ if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
+ NewFn);
+ if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
+ NewFn);
+ if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
+ NewFn);
+ if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
+ NewFn);
+ if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
+ NewFn);
// frcz.ss/sd may need to have an argument dropped. Added in 3.2
if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
@@ -601,6 +633,63 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
return true;
}
}
+
+ // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
+ // respectively
+ if ((Name.startswith("arm.neon.bfdot.") ||
+ Name.startswith("aarch64.neon.bfdot.")) &&
+ Name.endswith("i8")) {
+ Intrinsic::ID IID =
+ StringSwitch<Intrinsic::ID>(Name)
+ .Cases("arm.neon.bfdot.v2f32.v8i8",
+ "arm.neon.bfdot.v4f32.v16i8",
+ Intrinsic::arm_neon_bfdot)
+ .Cases("aarch64.neon.bfdot.v2f32.v8i8",
+ "aarch64.neon.bfdot.v4f32.v16i8",
+ Intrinsic::aarch64_neon_bfdot)
+ .Default(Intrinsic::not_intrinsic);
+ if (IID == Intrinsic::not_intrinsic)
+ break;
+
+ size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
+ assert((OperandWidth == 64 || OperandWidth == 128) &&
+ "Unexpected operand width");
+ LLVMContext &Ctx = F->getParent()->getContext();
+ std::array<Type *, 2> Tys {{
+ F->getReturnType(),
+ FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
+ }};
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
+ return true;
+ }
+
+ // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
+ // and accept v8bf16 instead of v16i8
+ if ((Name.startswith("arm.neon.bfm") ||
+ Name.startswith("aarch64.neon.bfm")) &&
+ Name.endswith(".v4f32.v16i8")) {
+ Intrinsic::ID IID =
+ StringSwitch<Intrinsic::ID>(Name)
+ .Case("arm.neon.bfmmla.v4f32.v16i8",
+ Intrinsic::arm_neon_bfmmla)
+ .Case("arm.neon.bfmlalb.v4f32.v16i8",
+ Intrinsic::arm_neon_bfmlalb)
+ .Case("arm.neon.bfmlalt.v4f32.v16i8",
+ Intrinsic::arm_neon_bfmlalt)
+ .Case("aarch64.neon.bfmmla.v4f32.v16i8",
+ Intrinsic::aarch64_neon_bfmmla)
+ .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
+ Intrinsic::aarch64_neon_bfmlalb)
+ .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
+ Intrinsic::aarch64_neon_bfmlalt)
+ .Default(Intrinsic::not_intrinsic);
+ if (IID == Intrinsic::not_intrinsic)
+ break;
+
+ std::array<Type *, 0> Tys;
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
+ return true;
+ }
break;
}
@@ -629,18 +718,42 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
case 'e': {
SmallVector<StringRef, 2> Groups;
- static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
+ static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
if (R.match(Name, &Groups)) {
+ Intrinsic::ID ID;
+ ID = StringSwitch<Intrinsic::ID>(Groups[1])
+ .Case("add", Intrinsic::vector_reduce_add)
+ .Case("mul", Intrinsic::vector_reduce_mul)
+ .Case("and", Intrinsic::vector_reduce_and)
+ .Case("or", Intrinsic::vector_reduce_or)
+ .Case("xor", Intrinsic::vector_reduce_xor)
+ .Case("smax", Intrinsic::vector_reduce_smax)
+ .Case("smin", Intrinsic::vector_reduce_smin)
+ .Case("umax", Intrinsic::vector_reduce_umax)
+ .Case("umin", Intrinsic::vector_reduce_umin)
+ .Case("fmax", Intrinsic::vector_reduce_fmax)
+ .Case("fmin", Intrinsic::vector_reduce_fmin)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic) {
+ rename(F);
+ auto Args = F->getFunctionType()->params();
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
+ return true;
+ }
+ }
+ static const Regex R2(
+ "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
+ Groups.clear();
+ if (R2.match(Name, &Groups)) {
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (Groups[1] == "fadd")
- ID = Intrinsic::experimental_vector_reduce_v2_fadd;
+ ID = Intrinsic::vector_reduce_fadd;
if (Groups[1] == "fmul")
- ID = Intrinsic::experimental_vector_reduce_v2_fmul;
-
+ ID = Intrinsic::vector_reduce_fmul;
if (ID != Intrinsic::not_intrinsic) {
rename(F);
auto Args = F->getFunctionType()->params();
- Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
+ Type *Tys[] = {Args[1]};
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
return true;
}
@@ -824,6 +937,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
return true;
}
+ } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::ptr_annotation,
+ F->arg_begin()->getType());
+ return true;
}
break;
@@ -834,6 +953,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
+ case 'v': {
+ if (Name == "var.annotation" && F->arg_size() == 4) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::var_annotation);
+ return true;
+ }
+ break;
+ }
+
case 'x':
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
@@ -900,7 +1029,7 @@ GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
// to byte shuffles.
static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
Value *Op, unsigned Shift) {
- auto *ResultTy = cast<VectorType>(Op->getType());
+ auto *ResultTy = cast<FixedVectorType>(Op->getType());
unsigned NumElts = ResultTy->getNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
@@ -934,7 +1063,7 @@ static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
// to byte shuffles.
static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
unsigned Shift) {
- auto *ResultTy = cast<VectorType>(Op->getType());
+ auto *ResultTy = cast<FixedVectorType>(Op->getType());
unsigned NumElts = ResultTy->getNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
@@ -966,19 +1095,19 @@ static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
unsigned NumElts) {
+ assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
llvm::VectorType *MaskTy = FixedVectorType::get(
Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
Mask = Builder.CreateBitCast(Mask, MaskTy);
- // If we have less than 8 elements, then the starting mask was an i8 and
- // we need to extract down to the right number of elements.
- if (NumElts < 8) {
+ // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
+ // i8 and we need to extract down to the right number of elements.
+ if (NumElts <= 4) {
int Indices[4];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
- Mask = Builder.CreateShuffleVector(Mask, Mask,
- makeArrayRef(Indices, NumElts),
- "extract");
+ Mask = Builder.CreateShuffleVector(
+ Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
}
return Mask;
@@ -992,7 +1121,7 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
return Op0;
Mask = getX86MaskVec(Builder, Mask,
- cast<VectorType>(Op0->getType())->getNumElements());
+ cast<FixedVectorType>(Op0->getType())->getNumElements());
return Builder.CreateSelect(Mask, Op0, Op1);
}
@@ -1019,7 +1148,7 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
bool IsVALIGN) {
unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
- unsigned NumElts = cast<VectorType>(Op0->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
@@ -1120,15 +1249,11 @@ static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
}
-static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
- bool IsSigned, bool IsAddition) {
+static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
+ Intrinsic::ID IID) {
Type *Ty = CI.getType();
Value *Op0 = CI.getOperand(0);
Value *Op1 = CI.getOperand(1);
-
- Intrinsic::ID IID =
- IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
- : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
@@ -1150,7 +1275,7 @@ static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
- unsigned NumElts = cast<VectorType>(Ty)->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = Builder.CreateVectorSplat(NumElts, Amt);
}
@@ -1220,7 +1345,7 @@ static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
- unsigned NumElts = cast<VectorType>(Ty)->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = Builder.CreateVectorSplat(NumElts, Amt);
}
@@ -1257,7 +1382,7 @@ static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
return Builder.CreateAlignedStore(Data, Ptr, Alignment);
// Convert the mask from an integer type to a vector of i1.
- unsigned NumElts = cast<VectorType>(Data->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
}
@@ -1280,35 +1405,19 @@ static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
// Convert the mask from an integer type to a vector of i1.
- unsigned NumElts = cast<VectorType>(Passthru->getType())->getNumElements();
+ unsigned NumElts =
+ cast<FixedVectorType>(Passthru->getType())->getNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedLoad(Ptr, Alignment, Mask, Passthru);
}
static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
+ Type *Ty = CI.getType();
Value *Op0 = CI.getArgOperand(0);
- llvm::Type *Ty = Op0->getType();
- Value *Zero = llvm::Constant::getNullValue(Ty);
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
- Value *Neg = Builder.CreateNeg(Op0);
- Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
-
+ Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
+ Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
if (CI.getNumArgOperands() == 3)
- Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
-
- return Res;
-}
-
-static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
- ICmpInst::Predicate Pred) {
- Value *Op0 = CI.getArgOperand(0);
- Value *Op1 = CI.getArgOperand(1);
- Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
- Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
-
- if (CI.getNumArgOperands() == 4)
- Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
-
+ Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
return Res;
}
@@ -1344,7 +1453,7 @@ static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
Value *Mask) {
- unsigned NumElts = cast<VectorType>(Vec->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
if (Mask) {
const auto *C = dyn_cast<Constant>(Mask);
if (!C || !C->isAllOnesValue())
@@ -1367,7 +1476,7 @@ static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
unsigned CC, bool Signed) {
Value *Op0 = CI.getArgOperand(0);
- unsigned NumElts = cast<VectorType>(Op0->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
Value *Cmp;
if (CC == 3) {
@@ -1422,7 +1531,7 @@ static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
Value* Op = CI.getArgOperand(0);
Type* ReturnOp = CI.getType();
- unsigned NumElts = cast<VectorType>(CI.getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
Value *Mask = getX86MaskVec(Builder, Op, NumElts);
return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
}
@@ -1676,7 +1785,6 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
(Pos = AsmStr->find("# marker")) != std::string::npos) {
AsmStr->replace(Pos, 1, ";");
}
- return;
}
/// Upgrade a call to an old intrinsic. All argument and return casting must be
@@ -1871,8 +1979,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateICmp(Pred, Rep, Zero);
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
} else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
- unsigned NumElts =
- cast<VectorType>(CI->getArgOperand(1)->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
+ ->getNumElements();
Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
@@ -2000,38 +2108,36 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getOperand(0), CI->getArgOperand(1) });
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
- Type *OpTy = CI->getArgOperand(0)->getType();
+ } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
+ SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+ CI->arg_operands().end());
+ Type *OpTy = Args[0]->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
unsigned EltWidth = OpTy->getScalarSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_cmp_ps_128;
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
else if (VecWidth == 256 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_cmp_ps_256;
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
else if (VecWidth == 512 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_cmp_ps_512;
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
else if (VecWidth == 128 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_cmp_pd_128;
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
else if (VecWidth == 256 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_cmp_pd_256;
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
else if (VecWidth == 512 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_cmp_pd_512;
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
else
llvm_unreachable("Unexpected intrinsic");
- SmallVector<Value *, 4> Args;
- Args.push_back(CI->getArgOperand(0));
- Args.push_back(CI->getArgOperand(1));
- Args.push_back(CI->getArgOperand(2));
- if (CI->getNumArgOperands() == 5)
- Args.push_back(CI->getArgOperand(4));
+ Value *Mask = Constant::getAllOnesValue(CI->getType());
+ if (VecWidth == 512)
+ std::swap(Mask, Args.back());
+ Args.push_back(Mask);
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
Args);
- Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
- } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
- Name[16] != 'p') {
+ } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
// Integer compare intrinsics.
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
@@ -2057,25 +2163,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name == "sse41.pmaxsd" ||
Name.startswith("avx2.pmaxs") ||
Name.startswith("avx512.mask.pmaxs"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
} else if (IsX86 && (Name == "sse2.pmaxu.b" ||
Name == "sse41.pmaxuw" ||
Name == "sse41.pmaxud" ||
Name.startswith("avx2.pmaxu") ||
Name.startswith("avx512.mask.pmaxu"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
} else if (IsX86 && (Name == "sse41.pminsb" ||
Name == "sse2.pmins.w" ||
Name == "sse41.pminsd" ||
Name.startswith("avx2.pmins") ||
Name.startswith("avx512.mask.pmins"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
} else if (IsX86 && (Name == "sse2.pminu.b" ||
Name == "sse41.pminuw" ||
Name == "sse41.pminud" ||
Name.startswith("avx2.pminu") ||
Name.startswith("avx512.mask.pminu"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
} else if (IsX86 && (Name == "sse2.pmulu.dq" ||
Name == "avx2.pmulu.dq" ||
Name == "avx512.pmulu.dq.512" ||
@@ -2122,9 +2228,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name == "avx.cvt.ps2.pd.256" ||
Name == "avx512.mask.cvtps2pd.128" ||
Name == "avx512.mask.cvtps2pd.256")) {
- auto *DstTy = cast<VectorType>(CI->getType());
+ auto *DstTy = cast<FixedVectorType>(CI->getType());
Rep = CI->getArgOperand(0);
- auto *SrcTy = cast<VectorType>(Rep->getType());
+ auto *SrcTy = cast<FixedVectorType>(Rep->getType());
unsigned NumDstElts = DstTy->getNumElements();
if (NumDstElts < SrcTy->getNumElements()) {
@@ -2154,9 +2260,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
Name.startswith("vcvtph2ps."))) {
- auto *DstTy = cast<VectorType>(CI->getType());
+ auto *DstTy = cast<FixedVectorType>(CI->getType());
Rep = CI->getArgOperand(0);
- auto *SrcTy = cast<VectorType>(Rep->getType());
+ auto *SrcTy = cast<FixedVectorType>(Rep->getType());
unsigned NumDstElts = DstTy->getNumElements();
if (NumDstElts != SrcTy->getNumElements()) {
assert(NumDstElts == 4 && "Unexpected vector size");
@@ -2177,7 +2283,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->getArgOperand(1),CI->getArgOperand(2),
/*Aligned*/true);
} else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
- auto *ResultTy = cast<VectorType>(CI->getType());
+ auto *ResultTy = cast<FixedVectorType>(CI->getType());
Type *PtrTy = ResultTy->getElementType();
// Cast the pointer to element type.
@@ -2199,8 +2305,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
llvm::PointerType::getUnqual(PtrTy));
- Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
- ResultTy->getNumElements());
+ Value *MaskVec =
+ getX86MaskVec(Builder, CI->getArgOperand(2),
+ cast<FixedVectorType>(ResultTy)->getNumElements());
Function *CSt = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_compressstore,
@@ -2208,7 +2315,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
} else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
Name.startswith("avx512.mask.expand."))) {
- auto *ResultTy = cast<VectorType>(CI->getType());
+ auto *ResultTy = cast<FixedVectorType>(CI->getType());
Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
ResultTy->getNumElements());
@@ -2288,7 +2395,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
} else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
Name.startswith("avx512.vbroadcast.s"))) {
// Replace broadcasts with a series of insertelements.
- auto *VecTy = cast<VectorType>(CI->getType());
+ auto *VecTy = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecTy->getElementType();
unsigned EltNum = VecTy->getNumElements();
Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
@@ -2305,8 +2412,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx2.pmovzx") ||
Name.startswith("avx512.mask.pmovsx") ||
Name.startswith("avx512.mask.pmovzx"))) {
- VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
- VectorType *DstTy = cast<VectorType>(CI->getType());
+ auto *DstTy = cast<FixedVectorType>(CI->getType());
unsigned NumDstElts = DstTy->getNumElements();
// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
@@ -2314,8 +2420,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
for (unsigned i = 0; i != NumDstElts; ++i)
ShuffleMask[i] = i;
- Value *SV = Builder.CreateShuffleVector(
- CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
+ Value *SV =
+ Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
bool DoSext = (StringRef::npos != Name.find("pmovsx"));
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
@@ -2342,12 +2448,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
PointerType::getUnqual(VT));
Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
if (NumSrcElts == 2)
- Rep = Builder.CreateShuffleVector(
- Load, UndefValue::get(Load->getType()), ArrayRef<int>{0, 1, 0, 1});
+ Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
else
- Rep =
- Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
- ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
+ Rep = Builder.CreateShuffleVector(
+ Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
} else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
Name.startswith("avx512.mask.shuf.f"))) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
@@ -2373,8 +2477,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
}else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
Name.startswith("avx512.mask.broadcasti"))) {
unsigned NumSrcElts =
- cast<VectorType>(CI->getArgOperand(0)->getType())->getNumElements();
- unsigned NumDstElts = cast<VectorType>(CI->getType())->getNumElements();
+ cast<FixedVectorType>(CI->getArgOperand(0)->getType())
+ ->getNumElements();
+ unsigned NumDstElts =
+ cast<FixedVectorType>(CI->getType())->getNumElements();
SmallVector<int, 8> ShuffleMask(NumDstElts);
for (unsigned i = 0; i != NumDstElts; ++i)
@@ -2393,30 +2499,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Op = CI->getArgOperand(0);
ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
- Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
- Constant::getNullValue(MaskTy));
+ SmallVector<int, 8> M;
+ ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
+ Rep = Builder.CreateShuffleVector(Op, M);
if (CI->getNumArgOperands() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("sse2.padds.") ||
- Name.startswith("sse2.psubs.") ||
Name.startswith("avx2.padds.") ||
- Name.startswith("avx2.psubs.") ||
Name.startswith("avx512.padds.") ||
+ Name.startswith("avx512.mask.padds."))) {
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
+ } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
+ Name.startswith("avx2.psubs.") ||
Name.startswith("avx512.psubs.") ||
- Name.startswith("avx512.mask.padds.") ||
Name.startswith("avx512.mask.psubs."))) {
- bool IsAdd = Name.contains(".padds");
- Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
} else if (IsX86 && (Name.startswith("sse2.paddus.") ||
- Name.startswith("sse2.psubus.") ||
Name.startswith("avx2.paddus.") ||
+ Name.startswith("avx512.mask.paddus."))) {
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
+ } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
Name.startswith("avx2.psubus.") ||
- Name.startswith("avx512.mask.paddus.") ||
Name.startswith("avx512.mask.psubus."))) {
- bool IsAdd = Name.contains(".paddus");
- Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
@@ -2463,7 +2570,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- VectorType *VecTy = cast<VectorType>(CI->getType());
+ auto *VecTy = cast<FixedVectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
SmallVector<int, 16> Idxs(NumElts);
@@ -2477,21 +2584,22 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- unsigned DstNumElts = cast<VectorType>(CI->getType())->getNumElements();
- unsigned SrcNumElts = cast<VectorType>(Op1->getType())->getNumElements();
+ unsigned DstNumElts =
+ cast<FixedVectorType>(CI->getType())->getNumElements();
+ unsigned SrcNumElts =
+ cast<FixedVectorType>(Op1->getType())->getNumElements();
unsigned Scale = DstNumElts / SrcNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm % Scale;
// Extend the second operand into a vector the size of the destination.
- Value *UndefV = UndefValue::get(Op1->getType());
SmallVector<int, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i] = i;
for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
Idxs[i] = SrcNumElts;
- Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
+ Rep = Builder.CreateShuffleVector(Op1, Idxs);
// Insert the second operand into the first operand.
@@ -2521,8 +2629,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.vextract"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- unsigned DstNumElts = cast<VectorType>(CI->getType())->getNumElements();
- unsigned SrcNumElts = cast<VectorType>(Op0->getType())->getNumElements();
+ unsigned DstNumElts =
+ cast<FixedVectorType>(CI->getType())->getNumElements();
+ unsigned SrcNumElts =
+ cast<FixedVectorType>(Op0->getType())->getNumElements();
unsigned Scale = SrcNumElts / DstNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
@@ -2545,7 +2655,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.perm.di."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- VectorType *VecTy = cast<VectorType>(CI->getType());
+ auto *VecTy = cast<FixedVectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
SmallVector<int, 8> Idxs(NumElts);
@@ -2569,7 +2679,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned HalfSize = NumElts / 2;
SmallVector<int, 8> ShuffleMask(NumElts);
@@ -2599,7 +2709,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.pshuf.d."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- VectorType *VecTy = cast<VectorType>(CI->getType());
+ auto *VecTy = cast<FixedVectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
// Calculate the size of each index in the immediate.
unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
@@ -2621,7 +2731,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.pshufl.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
@@ -2640,7 +2750,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.pshufh.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
@@ -2659,7 +2769,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned HalfLaneElts = NumLaneElts / 2;
@@ -2684,7 +2794,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.movshdup") ||
Name.startswith("avx512.mask.movsldup"))) {
Value *Op0 = CI->getArgOperand(0);
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned Offset = 0;
@@ -2706,7 +2816,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.unpckl."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
- int NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
SmallVector<int, 64> Idxs(NumElts);
@@ -2722,7 +2832,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.unpckh."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
- int NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
SmallVector<int, 64> Idxs(NumElts);
@@ -3290,7 +3400,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
Ops);
} else {
- int NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
@@ -3547,28 +3657,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
DefaultCase();
return;
}
- case Intrinsic::experimental_vector_reduce_v2_fmul: {
- SmallVector<Value *, 2> Args;
- if (CI->isFast())
- Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
- else
- Args.push_back(CI->getOperand(0));
- Args.push_back(CI->getOperand(1));
- NewCall = Builder.CreateCall(NewFn, Args);
- cast<Instruction>(NewCall)->copyFastMathFlags(CI);
- break;
- }
- case Intrinsic::experimental_vector_reduce_v2_fadd: {
- SmallVector<Value *, 2> Args;
- if (CI->isFast())
- Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
- else
- Args.push_back(CI->getOperand(0));
- Args.push_back(CI->getOperand(1));
- NewCall = Builder.CreateCall(NewFn, Args);
- cast<Instruction>(NewCall)->copyFastMathFlags(CI);
- break;
- }
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
@@ -3589,6 +3677,30 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
break;
}
+ case Intrinsic::arm_neon_bfdot:
+ case Intrinsic::arm_neon_bfmmla:
+ case Intrinsic::arm_neon_bfmlalb:
+ case Intrinsic::arm_neon_bfmlalt:
+ case Intrinsic::aarch64_neon_bfdot:
+ case Intrinsic::aarch64_neon_bfmmla:
+ case Intrinsic::aarch64_neon_bfmlalb:
+ case Intrinsic::aarch64_neon_bfmlalt: {
+ SmallVector<Value *, 3> Args;
+ assert(CI->getNumArgOperands() == 3 &&
+ "Mismatch between function args and call args");
+ size_t OperandWidth =
+ CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
+ assert((OperandWidth == 64 || OperandWidth == 128) &&
+ "Unexpected operand width");
+ Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
+ auto Iter = CI->arg_operands().begin();
+ Args.push_back(*Iter++);
+ Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
+ Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
+ }
+
case Intrinsic::bitreverse:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
break;
@@ -3634,6 +3746,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
return;
+ case Intrinsic::ptr_annotation:
+ // Upgrade from versions that lacked the annotation attribute argument.
+ assert(CI->getNumArgOperands() == 4 &&
+ "Before LLVM 12.0 this intrinsic took four arguments");
+ // Create a new call with an added null annotation attribute argument.
+ NewCall = Builder.CreateCall(
+ NewFn,
+ {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
+ CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
+ NewCall->takeName(CI);
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
+ return;
+
+ case Intrinsic::var_annotation:
+ // Upgrade from versions that lacked the annotation attribute argument.
+ assert(CI->getNumArgOperands() == 4 &&
+ "Before LLVM 12.0 this intrinsic took four arguments");
+ // Create a new call with an added null annotation attribute argument.
+ NewCall = Builder.CreateCall(
+ NewFn,
+ {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
+ CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
+ CI->eraseFromParent();
+ return;
+
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
@@ -3691,11 +3829,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Replace the original call result with the first result of the new call.
Value *TSC = Builder.CreateExtractValue(NewCall, 0);
- std::string Name = std::string(CI->getName());
- if (!Name.empty()) {
- CI->setName(Name + ".old");
- NewCall->setName(Name);
- }
+ NewCall->takeName(CI);
CI->replaceAllUsesWith(TSC);
CI->eraseFromParent();
return;
@@ -3718,6 +3852,27 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
break;
}
+ case Intrinsic::x86_avx512_mask_cmp_pd_128:
+ case Intrinsic::x86_avx512_mask_cmp_pd_256:
+ case Intrinsic::x86_avx512_mask_cmp_pd_512:
+ case Intrinsic::x86_avx512_mask_cmp_ps_128:
+ case Intrinsic::x86_avx512_mask_cmp_ps_256:
+ case Intrinsic::x86_avx512_mask_cmp_ps_512: {
+ SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+ CI->arg_operands().end());
+ unsigned NumElts =
+ cast<FixedVectorType>(Args[0]->getType())->getNumElements();
+ Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
+
+ NewCall = Builder.CreateCall(NewFn, Args);
+ Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
+
+ NewCall->takeName(CI);
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return;
+ }
+
case Intrinsic::thread_pointer: {
NewCall = Builder.CreateCall(NewFn, {});
break;
@@ -3766,11 +3921,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
}
assert(NewCall && "Should have either set this variable or returned through "
"the default case");
- std::string Name = std::string(CI->getName());
- if (!Name.empty()) {
- CI->setName(Name + ".old");
- NewCall->setName(Name);
- }
+ NewCall->takeName(CI);
CI->replaceAllUsesWith(NewCall);
CI->eraseFromParent();
}
@@ -3784,8 +3935,8 @@ void llvm::UpgradeCallsToIntrinsic(Function *F) {
if (UpgradeIntrinsicFunction(F, NewFn)) {
// Replace all users of the old function with the new function or new
// instructions. This is not a range loop because the call is deleted.
- for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
- if (CallInst *CI = dyn_cast<CallInst>(*UI++))
+ for (User *U : make_early_inc_range(F->users()))
+ if (CallInst *CI = dyn_cast<CallInst>(U))
UpgradeIntrinsicCall(CI, NewFn);
// Remove old function, no longer used, from the module.
@@ -3921,8 +4072,8 @@ void llvm::UpgradeARCRuntime(Module &M) {
Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
- for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
- CallInst *CI = dyn_cast<CallInst>(*I++);
+ for (User *U : make_early_inc_range(Fn->users())) {
+ CallInst *CI = dyn_cast<CallInst>(U);
if (!CI || CI->getCalledFunction() != Fn)
continue;
@@ -3963,7 +4114,7 @@ void llvm::UpgradeARCRuntime(Module &M) {
// Create a call instruction that calls the new function.
CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
- NewCall->setName(CI->getName());
+ NewCall->takeName(CI);
// Bitcast the return value back to the type of the old call.
Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
@@ -4202,6 +4353,13 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
StrictFPUpgradeVisitor SFPV;
SFPV.visit(F);
}
+
+ if (F.getCallingConv() == CallingConv::X86_INTR &&
+ !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) {
+ Type *ByValTy = cast<PointerType>(F.getArg(0)->getType())->getElementType();
+ Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy);
+ F.addParamAttr(0, NewAttr);
+ }
}
static bool isOldLoopArgument(Metadata *MD) {
@@ -4267,11 +4425,17 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
}
std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
- std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
+ Triple T(TT);
+ // For AMDGPU we uprgrade older DataLayouts to include the default globals
+ // address space of 1.
+ if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
+ return DL.empty() ? std::string("G1") : (DL + "-G1").str();
+ }
+ std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
// If X86, and the datalayout matches the expected format, add pointer size
// address spaces to the datalayout.
- if (!Triple(TT).isX86() || DL.contains(AddrSpaces))
+ if (!T.isX86() || DL.contains(AddrSpaces))
return std::string(DL);
SmallVector<StringRef, 4> Groups;
@@ -4279,9 +4443,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
if (!R.match(DL, &Groups))
return std::string(DL);
- SmallString<1024> Buf;
- std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
- return Res;
+ return (Groups[1] + AddrSpaces + Groups[3]).str();
}
void llvm::UpgradeAttributes(AttrBuilder &B) {
diff --git a/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp
index 64f1d3f3100c..00ef10dd53af 100644
--- a/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp
@@ -97,18 +97,20 @@ void BasicBlock::setParent(Function *parent) {
iterator_range<filter_iterator<BasicBlock::const_iterator,
std::function<bool(const Instruction &)>>>
-BasicBlock::instructionsWithoutDebug() const {
- std::function<bool(const Instruction &)> Fn = [](const Instruction &I) {
- return !isa<DbgInfoIntrinsic>(I);
+BasicBlock::instructionsWithoutDebug(bool SkipPseudoOp) const {
+ std::function<bool(const Instruction &)> Fn = [=](const Instruction &I) {
+ return !isa<DbgInfoIntrinsic>(I) &&
+ !(SkipPseudoOp && isa<PseudoProbeInst>(I));
};
return make_filter_range(*this, Fn);
}
-iterator_range<filter_iterator<BasicBlock::iterator,
- std::function<bool(Instruction &)>>>
-BasicBlock::instructionsWithoutDebug() {
- std::function<bool(Instruction &)> Fn = [](Instruction &I) {
- return !isa<DbgInfoIntrinsic>(I);
+iterator_range<
+ filter_iterator<BasicBlock::iterator, std::function<bool(Instruction &)>>>
+BasicBlock::instructionsWithoutDebug(bool SkipPseudoOp) {
+ std::function<bool(Instruction &)> Fn = [=](Instruction &I) {
+ return !isa<DbgInfoIntrinsic>(I) &&
+ !(SkipPseudoOp && isa<PseudoProbeInst>(I));
};
return make_filter_range(*this, Fn);
}
@@ -128,15 +130,11 @@ iplist<BasicBlock>::iterator BasicBlock::eraseFromParent() {
return getParent()->getBasicBlockList().erase(getIterator());
}
-/// Unlink this basic block from its current function and
-/// insert it into the function that MovePos lives in, right before MovePos.
void BasicBlock::moveBefore(BasicBlock *MovePos) {
MovePos->getParent()->getBasicBlockList().splice(
MovePos->getIterator(), getParent()->getBasicBlockList(), getIterator());
}
-/// Unlink this basic block from its current function and
-/// insert it into the function that MovePos lives in, right after MovePos.
void BasicBlock::moveAfter(BasicBlock *MovePos) {
MovePos->getParent()->getBasicBlockList().splice(
++MovePos->getIterator(), getParent()->getBasicBlockList(),
@@ -218,14 +216,21 @@ const Instruction* BasicBlock::getFirstNonPHI() const {
return nullptr;
}
-const Instruction* BasicBlock::getFirstNonPHIOrDbg() const {
- for (const Instruction &I : *this)
- if (!isa<PHINode>(I) && !isa<DbgInfoIntrinsic>(I))
- return &I;
+const Instruction *BasicBlock::getFirstNonPHIOrDbg(bool SkipPseudoOp) const {
+ for (const Instruction &I : *this) {
+ if (isa<PHINode>(I) || isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ if (SkipPseudoOp && isa<PseudoProbeInst>(I))
+ continue;
+
+ return &I;
+ }
return nullptr;
}
-const Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() const {
+const Instruction *
+BasicBlock::getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp) const {
for (const Instruction &I : *this) {
if (isa<PHINode>(I) || isa<DbgInfoIntrinsic>(I))
continue;
@@ -233,6 +238,9 @@ const Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() const {
if (I.isLifetimeStartOrEnd())
continue;
+ if (SkipPseudoOp && isa<PseudoProbeInst>(I))
+ continue;
+
return &I;
}
return nullptr;
@@ -253,8 +261,6 @@ void BasicBlock::dropAllReferences() {
I.dropAllReferences();
}
-/// If this basic block has a single predecessor block,
-/// return the block, otherwise return a null pointer.
const BasicBlock *BasicBlock::getSinglePredecessor() const {
const_pred_iterator PI = pred_begin(this), E = pred_end(this);
if (PI == E) return nullptr; // No preds.
@@ -263,11 +269,6 @@ const BasicBlock *BasicBlock::getSinglePredecessor() const {
return (PI == E) ? ThePred : nullptr /*multiple preds*/;
}
-/// If this basic block has a unique predecessor block,
-/// return the block, otherwise return a null pointer.
-/// Note that unique predecessor doesn't mean single edge, there can be
-/// multiple edges from the unique predecessor to this block (for example
-/// a switch statement with multiple cases having the same destination).
const BasicBlock *BasicBlock::getUniquePredecessor() const {
const_pred_iterator PI = pred_begin(this), E = pred_end(this);
if (PI == E) return nullptr; // No preds.
@@ -317,38 +318,31 @@ iterator_range<BasicBlock::phi_iterator> BasicBlock::phis() {
return make_range<phi_iterator>(P, nullptr);
}
-/// Update PHI nodes in this BasicBlock before removal of predecessor \p Pred.
-/// Note that this function does not actually remove the predecessor.
-///
-/// If \p KeepOneInputPHIs is true then don't remove PHIs that are left with
-/// zero or one incoming values, and don't simplify PHIs with all incoming
-/// values the same.
void BasicBlock::removePredecessor(BasicBlock *Pred,
bool KeepOneInputPHIs) {
// Use hasNUsesOrMore to bound the cost of this assertion for complex CFGs.
- assert((hasNUsesOrMore(16) ||
- find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
+ assert((hasNUsesOrMore(16) || llvm::is_contained(predecessors(this), Pred)) &&
"Pred is not a predecessor!");
// Return early if there are no PHI nodes to update.
- if (!isa<PHINode>(begin()))
+ if (empty() || !isa<PHINode>(begin()))
return;
+
unsigned NumPreds = cast<PHINode>(front()).getNumIncomingValues();
+ for (PHINode &Phi : make_early_inc_range(phis())) {
+ Phi.removeIncomingValue(Pred, !KeepOneInputPHIs);
+ if (KeepOneInputPHIs)
+ continue;
+
+ // If we have a single predecessor, removeIncomingValue may have erased the
+ // PHI node itself.
+ if (NumPreds == 1)
+ continue;
- // Update all PHI nodes.
- for (iterator II = begin(); isa<PHINode>(II);) {
- PHINode *PN = cast<PHINode>(II++);
- PN->removeIncomingValue(Pred, !KeepOneInputPHIs);
- if (!KeepOneInputPHIs) {
- // If we have a single predecessor, removeIncomingValue erased the PHI
- // node itself.
- if (NumPreds > 1) {
- if (Value *PNV = PN->hasConstantValue()) {
- // Replace the PHI node with its constant value.
- PN->replaceAllUsesWith(PNV);
- PN->eraseFromParent();
- }
- }
+ // Try to replace the PHI node with a constant value.
+ if (Value *PhiConstant = Phi.hasConstantValue()) {
+ Phi.replaceAllUsesWith(PhiConstant);
+ Phi.eraseFromParent();
}
}
}
@@ -378,18 +372,11 @@ bool BasicBlock::isLegalToHoistInto() const {
return !Term->isExceptionalTerminator();
}
-/// This splits a basic block into two at the specified
-/// instruction. Note that all instructions BEFORE the specified iterator stay
-/// as part of the original basic block, an unconditional branch is added to
-/// the new BB, and the rest of the instructions in the BB are moved to the new
-/// BB, including the old terminator. This invalidates the iterator.
-///
-/// Note that this only works on well formed basic blocks (must have a
-/// terminator), and 'I' must not be the end of instruction list (which would
-/// cause a degenerate basic block to be formed, having a terminator inside of
-/// the basic block).
-///
-BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
+BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName,
+ bool Before) {
+ if (Before)
+ return splitBasicBlockBefore(I, BBName);
+
assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
assert(I != InstList.end() &&
"Trying to get me to create degenerate basic block!");
@@ -416,6 +403,40 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
return New;
}
+BasicBlock *BasicBlock::splitBasicBlockBefore(iterator I, const Twine &BBName) {
+ assert(getTerminator() &&
+ "Can't use splitBasicBlockBefore on degenerate BB!");
+ assert(I != InstList.end() &&
+ "Trying to get me to create degenerate basic block!");
+
+ assert((!isa<PHINode>(*I) || getSinglePredecessor()) &&
+ "cannot split on multi incoming phis");
+
+ BasicBlock *New = BasicBlock::Create(getContext(), BBName, getParent(), this);
+ // Save DebugLoc of split point before invalidating iterator.
+ DebugLoc Loc = I->getDebugLoc();
+ // Move all of the specified instructions from the original basic block into
+ // the new basic block.
+ New->getInstList().splice(New->end(), this->getInstList(), begin(), I);
+
+ // Loop through all of the predecessors of the 'this' block (which will be the
+ // predecessors of the New block), replace the specified successor 'this'
+ // block to point at the New block and update any PHI nodes in 'this' block.
+ // If there were PHI nodes in 'this' block, the PHI nodes are updated
+ // to reflect that the incoming branches will be from the New block and not
+ // from predecessors of the 'this' block.
+ for (BasicBlock *Pred : predecessors(this)) {
+ Instruction *TI = Pred->getTerminator();
+ TI->replaceSuccessorWith(this, New);
+ this->replacePhiUsesWith(Pred, New);
+ }
+ // Add a branch instruction from "New" to "this" Block.
+ BranchInst *BI = BranchInst::Create(this, New);
+ BI->setDebugLoc(Loc);
+
+ return New;
+}
+
void BasicBlock::replacePhiUsesWith(BasicBlock *Old, BasicBlock *New) {
// N.B. This might not be a complete BasicBlock, so don't assume
// that it ends with a non-phi instruction.
@@ -443,13 +464,10 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
this->replaceSuccessorsPhiUsesWith(this, New);
}
-/// Return true if this basic block is a landing pad. I.e., it's
-/// the destination of the 'unwind' edge of an invoke instruction.
bool BasicBlock::isLandingPad() const {
return isa<LandingPadInst>(getFirstNonPHI());
}
-/// Return the landingpad instruction associated with the landing pad.
const LandingPadInst *BasicBlock::getLandingPadInst() const {
return dyn_cast<LandingPadInst>(getFirstNonPHI());
}
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
index c20d0955f3d8..95dd55237e5f 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
@@ -522,6 +522,9 @@ static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy,
Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
Type *DestTy) {
+ if (isa<PoisonValue>(V))
+ return PoisonValue::get(DestTy);
+
if (isa<UndefValue>(V)) {
// zext(undef) = 0, because the top bits will be zero.
// sext(undef) = 0, because the top bits will all be the same.
@@ -532,7 +535,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
return UndefValue::get(DestTy);
}
- if (V->isNullValue() && !DestTy->isX86_MMXTy() &&
+ if (V->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy() &&
opc != Instruction::AddrSpaceCast)
return Constant::getNullValue(DestTy);
@@ -759,7 +762,9 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
Constant *V2Element = ConstantExpr::getExtractElement(V2,
ConstantInt::get(Ty, i));
auto *Cond = cast<Constant>(CondV->getOperand(i));
- if (V1Element == V2Element) {
+ if (isa<PoisonValue>(Cond)) {
+ V = PoisonValue::get(V1Element->getType());
+ } else if (V1Element == V2Element) {
V = V1Element;
} else if (isa<UndefValue>(Cond)) {
V = isa<UndefValue>(V1Element) ? V1Element : V2Element;
@@ -775,14 +780,45 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
return ConstantVector::get(Result);
}
+ if (isa<PoisonValue>(Cond))
+ return PoisonValue::get(V1->getType());
+
if (isa<UndefValue>(Cond)) {
if (isa<UndefValue>(V1)) return V1;
return V2;
}
- if (isa<UndefValue>(V1)) return V2;
- if (isa<UndefValue>(V2)) return V1;
+
if (V1 == V2) return V1;
+ if (isa<PoisonValue>(V1))
+ return V2;
+ if (isa<PoisonValue>(V2))
+ return V1;
+
+ // If the true or false value is undef, we can fold to the other value as
+ // long as the other value isn't poison.
+ auto NotPoison = [](Constant *C) {
+ if (isa<PoisonValue>(C))
+ return false;
+
+ // TODO: We can analyze ConstExpr by opcode to determine if there is any
+ // possibility of poison.
+ if (isa<ConstantExpr>(C))
+ return false;
+
+ if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(C) ||
+ isa<ConstantPointerNull>(C) || isa<Function>(C))
+ return true;
+
+ if (C->getType()->isVectorTy())
+ return !C->containsPoisonElement() && !C->containsConstantExpression();
+
+ // TODO: Recursively analyze aggregates or other constants.
+ return false;
+ };
+ if (isa<UndefValue>(V1) && NotPoison(V2)) return V2;
+ if (isa<UndefValue>(V2) && NotPoison(V1)) return V1;
+
if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
if (TrueVal->getOpcode() == Instruction::Select)
if (TrueVal->getOperand(0) == Cond)
@@ -801,9 +837,13 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
Constant *Idx) {
auto *ValVTy = cast<VectorType>(Val->getType());
+ // extractelt poison, C -> poison
+ // extractelt C, undef -> poison
+ if (isa<PoisonValue>(Val) || isa<UndefValue>(Idx))
+ return PoisonValue::get(ValVTy->getElementType());
+
// extractelt undef, C -> undef
- // extractelt C, undef -> undef
- if (isa<UndefValue>(Val) || isa<UndefValue>(Idx))
+ if (isa<UndefValue>(Val))
return UndefValue::get(ValVTy->getElementType());
auto *CIdx = dyn_cast<ConstantInt>(Idx);
@@ -811,9 +851,9 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
return nullptr;
if (auto *ValFVTy = dyn_cast<FixedVectorType>(Val->getType())) {
- // ee({w,x,y,z}, wrong_value) -> undef
+ // ee({w,x,y,z}, wrong_value) -> poison
if (CIdx->uge(ValFVTy->getNumElements()))
- return UndefValue::get(ValFVTy->getElementType());
+ return PoisonValue::get(ValFVTy->getElementType());
}
// ee (gep (ptr, idx0, ...), idx) -> gep (ee (ptr, idx), ee (idx0, idx), ...)
@@ -833,6 +873,15 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
}
return CE->getWithOperands(Ops, ValVTy->getElementType(), false,
Ops[0]->getType()->getPointerElementType());
+ } else if (CE->getOpcode() == Instruction::InsertElement) {
+ if (const auto *IEIdx = dyn_cast<ConstantInt>(CE->getOperand(2))) {
+ if (APSInt::isSameValue(APSInt(IEIdx->getValue()),
+ APSInt(CIdx->getValue()))) {
+ return CE->getOperand(1);
+ } else {
+ return ConstantExpr::getExtractElement(CE->getOperand(0), CIdx);
+ }
+ }
}
}
@@ -853,7 +902,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
Constant *Elt,
Constant *Idx) {
if (isa<UndefValue>(Idx))
- return UndefValue::get(Val->getType());
+ return PoisonValue::get(Val->getType());
ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
if (!CIdx) return nullptr;
@@ -890,7 +939,8 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
ArrayRef<int> Mask) {
auto *V1VTy = cast<VectorType>(V1->getType());
unsigned MaskNumElts = Mask.size();
- ElementCount MaskEltCount = {MaskNumElts, isa<ScalableVectorType>(V1VTy)};
+ auto MaskEltCount =
+ ElementCount::get(MaskNumElts, isa<ScalableVectorType>(V1VTy));
Type *EltTy = V1VTy->getElementType();
// Undefined shuffle mask -> undefined value.
@@ -901,7 +951,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
// If the mask is all zeros this is a splat, no need to go through all
// elements.
if (all_of(Mask, [](int Elt) { return Elt == 0; }) &&
- !MaskEltCount.Scalable) {
+ !MaskEltCount.isScalable()) {
Type *Ty = IntegerType::get(V1->getContext(), 32);
Constant *Elt =
ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, 0));
@@ -912,7 +962,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
if (isa<ScalableVectorType>(V1VTy))
return nullptr;
- unsigned SrcNumElts = V1VTy->getElementCount().Min;
+ unsigned SrcNumElts = V1VTy->getElementCount().getKnownMinValue();
// Loop over the shuffle mask, evaluating each element.
SmallVector<Constant*, 32> Result;
@@ -1054,6 +1104,17 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
return C1;
}
+ // Binary operations propagate poison.
+ // FIXME: Currently, or/and i1 poison aren't folded into poison because
+ // it causes miscompilation when combined with another optimization in
+ // InstCombine (select i1 -> and/or). The select fold is wrong, but
+ // fixing it requires an effort, so temporarily disable this until it is
+ // fixed.
+ bool PoisonFold = !C1->getType()->isIntegerTy(1) ||
+ (Opcode != Instruction::Or && Opcode != Instruction::And);
+ if (PoisonFold && (isa<PoisonValue>(C1) || isa<PoisonValue>(C2)))
+ return PoisonValue::get(C1->getType());
+
// Handle scalar UndefValue and scalable vector UndefValue. Fixed-length
// vectors are always evaluated per element.
bool IsScalableVector = isa<ScalableVectorType>(C1->getType());
@@ -1378,12 +1439,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
return ConstantFP::get(C1->getContext(), C3V);
}
}
- } else if (IsScalableVector) {
- // Do not iterate on scalable vector. The number of elements is unknown at
- // compile-time.
- // FIXME: this branch can potentially be removed
- return nullptr;
- } else if (auto *VTy = dyn_cast<FixedVectorType>(C1->getType())) {
+ } else if (auto *VTy = dyn_cast<VectorType>(C1->getType())) {
// Fast path for splatted constants.
if (Constant *C2Splat = C2->getSplatValue()) {
if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue())
@@ -1395,22 +1451,24 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
}
}
- // Fold each element and create a vector constant from those constants.
- SmallVector<Constant*, 16> Result;
- Type *Ty = IntegerType::get(VTy->getContext(), 32);
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- Constant *ExtractIdx = ConstantInt::get(Ty, i);
- Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx);
- Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx);
+ if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+ // Fold each element and create a vector constant from those constants.
+ SmallVector<Constant*, 16> Result;
+ Type *Ty = IntegerType::get(FVTy->getContext(), 32);
+ for (unsigned i = 0, e = FVTy->getNumElements(); i != e; ++i) {
+ Constant *ExtractIdx = ConstantInt::get(Ty, i);
+ Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx);
+ Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx);
- // If any element of a divisor vector is zero, the whole op is undef.
- if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue())
- return UndefValue::get(VTy);
+ // If any element of a divisor vector is zero, the whole op is undef.
+ if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue())
+ return UndefValue::get(VTy);
- Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
- }
+ Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
+ }
- return ConstantVector::get(Result);
+ return ConstantVector::get(Result);
+ }
}
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
@@ -1724,9 +1782,15 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
case Instruction::FPToSI:
break; // We can't evaluate floating point casts or truncations.
+ case Instruction::BitCast:
+ // If this is a global value cast, check to see if the RHS is also a
+ // GlobalValue.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0))
+ if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2))
+ return areGlobalsPotentiallyEqual(GV, GV2);
+ LLVM_FALLTHROUGH;
case Instruction::UIToFP:
case Instruction::SIToFP:
- case Instruction::BitCast:
case Instruction::ZExt:
case Instruction::SExt:
// We can't evaluate floating point casts or truncations.
@@ -1889,6 +1953,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
return Constant::getAllOnesValue(ResultTy);
// Handle some degenerate cases first
+ if (isa<PoisonValue>(C1) || isa<PoisonValue>(C2))
+ return PoisonValue::get(ResultTy);
+
if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
CmpInst::Predicate Predicate = CmpInst::Predicate(pred);
bool isIntegerPredicate = ICmpInst::isIntPredicate(Predicate);
@@ -2009,11 +2076,6 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
}
} else if (auto *C1VTy = dyn_cast<VectorType>(C1->getType())) {
- // Do not iterate on scalable vector. The number of elements is unknown at
- // compile-time.
- if (isa<ScalableVectorType>(C1VTy))
- return nullptr;
-
// Fast path for splatted constants.
if (Constant *C1Splat = C1->getSplatValue())
if (Constant *C2Splat = C2->getSplatValue())
@@ -2021,16 +2083,22 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
C1VTy->getElementCount(),
ConstantExpr::getCompare(pred, C1Splat, C2Splat));
+ // Do not iterate on scalable vector. The number of elements is unknown at
+ // compile-time.
+ if (isa<ScalableVectorType>(C1VTy))
+ return nullptr;
+
// If we can constant fold the comparison of each element, constant fold
// the whole vector comparison.
SmallVector<Constant*, 4> ResElts;
Type *Ty = IntegerType::get(C1->getContext(), 32);
// Compare the elements, producing an i1 result or constant expr.
- for (unsigned i = 0, e = C1VTy->getElementCount().Min; i != e; ++i) {
+ for (unsigned I = 0, E = C1VTy->getElementCount().getKnownMinValue();
+ I != E; ++I) {
Constant *C1E =
- ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i));
+ ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, I));
Constant *C2E =
- ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i));
+ ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, I));
ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E));
}
@@ -2273,6 +2341,9 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
Type *GEPTy = GetElementPtrInst::getGEPReturnType(
PointeeTy, C, makeArrayRef((Value *const *)Idxs.data(), Idxs.size()));
+ if (isa<PoisonValue>(C))
+ return PoisonValue::get(GEPTy);
+
if (isa<UndefValue>(C))
return UndefValue::get(GEPTy);
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp
index eabaaa203927..4b0ad1bd25a0 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Compiler.h"
@@ -412,6 +413,21 @@ bool ConstantRange::contains(const ConstantRange &Other) const {
return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower());
}
+unsigned ConstantRange::getActiveBits() const {
+ if (isEmptySet())
+ return 0;
+
+ return getUnsignedMax().getActiveBits();
+}
+
+unsigned ConstantRange::getMinSignedBits() const {
+ if (isEmptySet())
+ return 0;
+
+ return std::max(getSignedMin().getMinSignedBits(),
+ getSignedMax().getMinSignedBits());
+}
+
ConstantRange ConstantRange::subtract(const APInt &Val) const {
assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width");
// If the set is empty or full, don't modify the endpoints.
@@ -835,6 +851,54 @@ ConstantRange ConstantRange::overflowingBinaryOp(Instruction::BinaryOps BinOp,
}
}
+bool ConstantRange::isIntrinsicSupported(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::umin:
+ case Intrinsic::umax:
+ case Intrinsic::smin:
+ case Intrinsic::smax:
+ case Intrinsic::abs:
+ return true;
+ default:
+ return false;
+ }
+}
+
+ConstantRange ConstantRange::intrinsic(Intrinsic::ID IntrinsicID,
+ ArrayRef<ConstantRange> Ops) {
+ switch (IntrinsicID) {
+ case Intrinsic::uadd_sat:
+ return Ops[0].uadd_sat(Ops[1]);
+ case Intrinsic::usub_sat:
+ return Ops[0].usub_sat(Ops[1]);
+ case Intrinsic::sadd_sat:
+ return Ops[0].sadd_sat(Ops[1]);
+ case Intrinsic::ssub_sat:
+ return Ops[0].ssub_sat(Ops[1]);
+ case Intrinsic::umin:
+ return Ops[0].umin(Ops[1]);
+ case Intrinsic::umax:
+ return Ops[0].umax(Ops[1]);
+ case Intrinsic::smin:
+ return Ops[0].smin(Ops[1]);
+ case Intrinsic::smax:
+ return Ops[0].smax(Ops[1]);
+ case Intrinsic::abs: {
+ const APInt *IntMinIsPoison = Ops[1].getSingleElement();
+ assert(IntMinIsPoison && "Must be known (immarg)");
+ assert(IntMinIsPoison->getBitWidth() == 1 && "Must be boolean");
+ return Ops[0].abs(IntMinIsPoison->getBoolValue());
+ }
+ default:
+ assert(!isIntrinsicSupported(IntrinsicID) && "Shouldn't be supported");
+ llvm_unreachable("Unsupported intrinsic");
+ }
+}
+
ConstantRange
ConstantRange::add(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
@@ -1191,6 +1255,16 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
return ConstantRange(std::move(Lower), std::move(Upper));
}
+ConstantRange ConstantRange::binaryNot() const {
+ if (isEmptySet())
+ return getEmpty();
+
+ if (isWrappedSet())
+ return getFull();
+
+ return ConstantRange(APInt::getAllOnesValue(getBitWidth())).sub(*this);
+}
+
ConstantRange
ConstantRange::binaryAnd(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
@@ -1229,6 +1303,12 @@ ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const {
if (isSingleElement() && Other.isSingleElement())
return {*getSingleElement() ^ *Other.getSingleElement()};
+ // Special-case binary complement, since we can give a precise answer.
+ if (Other.isSingleElement() && Other.getSingleElement()->isAllOnesValue())
+ return binaryNot();
+ if (isSingleElement() && getSingleElement()->isAllOnesValue())
+ return Other.binaryNot();
+
// TODO: replace this with something less conservative
return getFull();
}
@@ -1418,7 +1498,7 @@ ConstantRange ConstantRange::inverse() const {
return ConstantRange(Upper, Lower);
}
-ConstantRange ConstantRange::abs() const {
+ConstantRange ConstantRange::abs(bool IntMinIsPoison) const {
if (isEmptySet())
return getEmpty();
@@ -1430,12 +1510,23 @@ ConstantRange ConstantRange::abs() const {
else
Lo = APIntOps::umin(Lower, -Upper + 1);
- // SignedMin is included in the result range.
- return ConstantRange(Lo, APInt::getSignedMinValue(getBitWidth()) + 1);
+ // If SignedMin is not poison, then it is included in the result range.
+ if (IntMinIsPoison)
+ return ConstantRange(Lo, APInt::getSignedMinValue(getBitWidth()));
+ else
+ return ConstantRange(Lo, APInt::getSignedMinValue(getBitWidth()) + 1);
}
APInt SMin = getSignedMin(), SMax = getSignedMax();
+ // Skip SignedMin if it is poison.
+ if (IntMinIsPoison && SMin.isMinSignedValue()) {
+ // The range may become empty if it *only* contains SignedMin.
+ if (SMax.isMinSignedValue())
+ return getEmpty();
+ ++SMin;
+ }
+
// All non-negative.
if (SMin.isNonNegative())
return *this;
diff --git a/contrib/llvm-project/llvm/lib/IR/Constants.cpp b/contrib/llvm-project/llvm/lib/IR/Constants.cpp
index cbbcca20ea51..9f05917cf7cc 100644
--- a/contrib/llvm-project/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Constants.cpp
@@ -161,7 +161,7 @@ bool Constant::isNotOneValue() const {
// Check that vectors don't contain 1
if (auto *VTy = dyn_cast<VectorType>(this->getType())) {
- unsigned NumElts = VTy->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = this->getAggregateElement(i);
if (!Elt || !Elt->isNotOneValue())
@@ -211,7 +211,7 @@ bool Constant::isNotMinSignedValue() const {
// Check that vectors don't contain INT_MIN
if (auto *VTy = dyn_cast<VectorType>(this->getType())) {
- unsigned NumElts = VTy->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = this->getAggregateElement(i);
if (!Elt || !Elt->isNotMinSignedValue())
@@ -227,7 +227,7 @@ bool Constant::isNotMinSignedValue() const {
bool Constant::isFiniteNonZeroFP() const {
if (auto *CFP = dyn_cast<ConstantFP>(this))
return CFP->getValueAPF().isFiniteNonZero();
- auto *VTy = dyn_cast<VectorType>(getType());
+ auto *VTy = dyn_cast<FixedVectorType>(getType());
if (!VTy)
return false;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
@@ -304,23 +304,42 @@ bool Constant::isElementWiseEqual(Value *Y) const {
return isa<UndefValue>(CmpEq) || match(CmpEq, m_One());
}
-bool Constant::containsUndefElement() const {
- if (auto *VTy = dyn_cast<VectorType>(getType())) {
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
- if (isa<UndefValue>(getAggregateElement(i)))
+static bool
+containsUndefinedElement(const Constant *C,
+ function_ref<bool(const Constant *)> HasFn) {
+ if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
+ if (HasFn(C))
+ return true;
+ if (isa<ConstantAggregateZero>(C))
+ return false;
+ if (isa<ScalableVectorType>(C->getType()))
+ return false;
+
+ for (unsigned i = 0, e = cast<FixedVectorType>(VTy)->getNumElements();
+ i != e; ++i)
+ if (HasFn(C->getAggregateElement(i)))
return true;
}
return false;
}
+bool Constant::containsUndefOrPoisonElement() const {
+ return containsUndefinedElement(
+ this, [&](const auto *C) { return isa<UndefValue>(C); });
+}
+
+bool Constant::containsPoisonElement() const {
+ return containsUndefinedElement(
+ this, [&](const auto *C) { return isa<PoisonValue>(C); });
+}
+
bool Constant::containsConstantExpression() const {
- if (auto *VTy = dyn_cast<VectorType>(getType())) {
+ if (auto *VTy = dyn_cast<FixedVectorType>(getType())) {
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
if (isa<ConstantExpr>(getAggregateElement(i)))
return true;
}
-
return false;
}
@@ -400,16 +419,23 @@ Constant *Constant::getAllOnesValue(Type *Ty) {
}
Constant *Constant::getAggregateElement(unsigned Elt) const {
- if (const ConstantAggregate *CC = dyn_cast<ConstantAggregate>(this))
+ if (const auto *CC = dyn_cast<ConstantAggregate>(this))
return Elt < CC->getNumOperands() ? CC->getOperand(Elt) : nullptr;
- if (const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(this))
+ // FIXME: getNumElements() will fail for non-fixed vector types.
+ if (isa<ScalableVectorType>(getType()))
+ return nullptr;
+
+ if (const auto *CAZ = dyn_cast<ConstantAggregateZero>(this))
return Elt < CAZ->getNumElements() ? CAZ->getElementValue(Elt) : nullptr;
- if (const UndefValue *UV = dyn_cast<UndefValue>(this))
+ if (const auto *PV = dyn_cast<PoisonValue>(this))
+ return Elt < PV->getNumElements() ? PV->getElementValue(Elt) : nullptr;
+
+ if (const auto *UV = dyn_cast<UndefValue>(this))
return Elt < UV->getNumElements() ? UV->getElementValue(Elt) : nullptr;
- if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
+ if (const auto *CDS = dyn_cast<ConstantDataSequential>(this))
return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt)
: nullptr;
return nullptr;
@@ -501,9 +527,15 @@ void llvm::deleteConstant(Constant *C) {
case Constant::BlockAddressVal:
delete static_cast<BlockAddress *>(C);
break;
+ case Constant::DSOLocalEquivalentVal:
+ delete static_cast<DSOLocalEquivalent *>(C);
+ break;
case Constant::UndefValueVal:
delete static_cast<UndefValue *>(C);
break;
+ case Constant::PoisonValueVal:
+ delete static_cast<PoisonValue *>(C);
+ break;
case Constant::ConstantExprVal:
if (isa<UnaryConstantExpr>(C))
delete static_cast<UnaryConstantExpr *>(C);
@@ -646,10 +678,17 @@ bool Constant::needsRelocation() const {
return false;
// Relative pointers do not need to be dynamically relocated.
- if (auto *LHSGV = dyn_cast<GlobalValue>(LHSOp0->stripPointerCasts()))
- if (auto *RHSGV = dyn_cast<GlobalValue>(RHSOp0->stripPointerCasts()))
+ if (auto *RHSGV =
+ dyn_cast<GlobalValue>(RHSOp0->stripInBoundsConstantOffsets())) {
+ auto *LHS = LHSOp0->stripInBoundsConstantOffsets();
+ if (auto *LHSGV = dyn_cast<GlobalValue>(LHS)) {
if (LHSGV->isDSOLocal() && RHSGV->isDSOLocal())
return false;
+ } else if (isa<DSOLocalEquivalent>(LHS)) {
+ if (RHSGV->isDSOLocal())
+ return false;
+ }
+ }
}
}
}
@@ -729,6 +768,52 @@ Constant *Constant::replaceUndefsWith(Constant *C, Constant *Replacement) {
return ConstantVector::get(NewC);
}
+Constant *Constant::mergeUndefsWith(Constant *C, Constant *Other) {
+ assert(C && Other && "Expected non-nullptr constant arguments");
+ if (match(C, m_Undef()))
+ return C;
+
+ Type *Ty = C->getType();
+ if (match(Other, m_Undef()))
+ return UndefValue::get(Ty);
+
+ auto *VTy = dyn_cast<FixedVectorType>(Ty);
+ if (!VTy)
+ return C;
+
+ Type *EltTy = VTy->getElementType();
+ unsigned NumElts = VTy->getNumElements();
+ assert(isa<FixedVectorType>(Other->getType()) &&
+ cast<FixedVectorType>(Other->getType())->getNumElements() == NumElts &&
+ "Type mismatch");
+
+ bool FoundExtraUndef = false;
+ SmallVector<Constant *, 32> NewC(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I) {
+ NewC[I] = C->getAggregateElement(I);
+ Constant *OtherEltC = Other->getAggregateElement(I);
+ assert(NewC[I] && OtherEltC && "Unknown vector element");
+ if (!match(NewC[I], m_Undef()) && match(OtherEltC, m_Undef())) {
+ NewC[I] = UndefValue::get(EltTy);
+ FoundExtraUndef = true;
+ }
+ }
+ if (FoundExtraUndef)
+ return ConstantVector::get(NewC);
+ return C;
+}
+
+bool Constant::isManifestConstant() const {
+ if (isa<ConstantData>(this))
+ return true;
+ if (isa<ConstantAggregate>(this) || isa<ConstantExpr>(this)) {
+ for (const Value *Op : operand_values())
+ if (!cast<Constant>(Op)->isManifestConstant())
+ return false;
+ return true;
+ }
+ return false;
+}
//===----------------------------------------------------------------------===//
// ConstantInt
@@ -753,6 +838,10 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
return pImpl->TheFalseVal;
}
+ConstantInt *ConstantInt::getBool(LLVMContext &Context, bool V) {
+ return V ? getTrue(Context) : getFalse(Context);
+}
+
Constant *ConstantInt::getTrue(Type *Ty) {
assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1.");
ConstantInt *TrueC = ConstantInt::getTrue(Ty->getContext());
@@ -769,6 +858,10 @@ Constant *ConstantInt::getFalse(Type *Ty) {
return FalseC;
}
+Constant *ConstantInt::getBool(Type *Ty, bool V) {
+ return V ? getTrue(Ty) : getFalse(Ty);
+}
+
// Get a ConstantInt from an APInt.
ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
// get an existing value or the insertion position
@@ -830,30 +923,12 @@ void ConstantInt::destroyConstantImpl() {
// ConstantFP
//===----------------------------------------------------------------------===//
-static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
- if (Ty->isHalfTy())
- return &APFloat::IEEEhalf();
- if (Ty->isBFloatTy())
- return &APFloat::BFloat();
- if (Ty->isFloatTy())
- return &APFloat::IEEEsingle();
- if (Ty->isDoubleTy())
- return &APFloat::IEEEdouble();
- if (Ty->isX86_FP80Ty())
- return &APFloat::x87DoubleExtended();
- else if (Ty->isFP128Ty())
- return &APFloat::IEEEquad();
-
- assert(Ty->isPPC_FP128Ty() && "Unknown FP format");
- return &APFloat::PPCDoubleDouble();
-}
-
Constant *ConstantFP::get(Type *Ty, double V) {
LLVMContext &Context = Ty->getContext();
APFloat FV(V);
bool ignored;
- FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+ FV.convert(Ty->getScalarType()->getFltSemantics(),
APFloat::rmNearestTiesToEven, &ignored);
Constant *C = get(Context, FV);
@@ -879,7 +954,7 @@ Constant *ConstantFP::get(Type *Ty, const APFloat &V) {
Constant *ConstantFP::get(Type *Ty, StringRef Str) {
LLVMContext &Context = Ty->getContext();
- APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
+ APFloat FV(Ty->getScalarType()->getFltSemantics(), Str);
Constant *C = get(Context, FV);
// For vectors, broadcast the value.
@@ -890,7 +965,7 @@ Constant *ConstantFP::get(Type *Ty, StringRef Str) {
}
Constant *ConstantFP::getNaN(Type *Ty, bool Negative, uint64_t Payload) {
- const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+ const fltSemantics &Semantics = Ty->getScalarType()->getFltSemantics();
APFloat NaN = APFloat::getNaN(Semantics, Negative, Payload);
Constant *C = get(Ty->getContext(), NaN);
@@ -901,7 +976,7 @@ Constant *ConstantFP::getNaN(Type *Ty, bool Negative, uint64_t Payload) {
}
Constant *ConstantFP::getQNaN(Type *Ty, bool Negative, APInt *Payload) {
- const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+ const fltSemantics &Semantics = Ty->getScalarType()->getFltSemantics();
APFloat NaN = APFloat::getQNaN(Semantics, Negative, Payload);
Constant *C = get(Ty->getContext(), NaN);
@@ -912,7 +987,7 @@ Constant *ConstantFP::getQNaN(Type *Ty, bool Negative, APInt *Payload) {
}
Constant *ConstantFP::getSNaN(Type *Ty, bool Negative, APInt *Payload) {
- const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+ const fltSemantics &Semantics = Ty->getScalarType()->getFltSemantics();
APFloat NaN = APFloat::getSNaN(Semantics, Negative, Payload);
Constant *C = get(Ty->getContext(), NaN);
@@ -923,7 +998,7 @@ Constant *ConstantFP::getSNaN(Type *Ty, bool Negative, APInt *Payload) {
}
Constant *ConstantFP::getNegativeZero(Type *Ty) {
- const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+ const fltSemantics &Semantics = Ty->getScalarType()->getFltSemantics();
APFloat NegZero = APFloat::getZero(Semantics, /*Negative=*/true);
Constant *C = get(Ty->getContext(), NegZero);
@@ -949,24 +1024,7 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
std::unique_ptr<ConstantFP> &Slot = pImpl->FPConstants[V];
if (!Slot) {
- Type *Ty;
- if (&V.getSemantics() == &APFloat::IEEEhalf())
- Ty = Type::getHalfTy(Context);
- else if (&V.getSemantics() == &APFloat::BFloat())
- Ty = Type::getBFloatTy(Context);
- else if (&V.getSemantics() == &APFloat::IEEEsingle())
- Ty = Type::getFloatTy(Context);
- else if (&V.getSemantics() == &APFloat::IEEEdouble())
- Ty = Type::getDoubleTy(Context);
- else if (&V.getSemantics() == &APFloat::x87DoubleExtended())
- Ty = Type::getX86_FP80Ty(Context);
- else if (&V.getSemantics() == &APFloat::IEEEquad())
- Ty = Type::getFP128Ty(Context);
- else {
- assert(&V.getSemantics() == &APFloat::PPCDoubleDouble() &&
- "Unknown FP format");
- Ty = Type::getPPC_FP128Ty(Context);
- }
+ Type *Ty = Type::getFloatingPointTy(Context, V.getSemantics());
Slot.reset(new ConstantFP(Ty, V));
}
@@ -974,7 +1032,7 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
}
Constant *ConstantFP::getInfinity(Type *Ty, bool Negative) {
- const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+ const fltSemantics &Semantics = Ty->getScalarType()->getFltSemantics();
Constant *C = get(Ty->getContext(), APFloat::getInf(Semantics, Negative));
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
@@ -985,7 +1043,7 @@ Constant *ConstantFP::getInfinity(Type *Ty, bool Negative) {
ConstantFP::ConstantFP(Type *Ty, const APFloat &V)
: ConstantData(Ty, ConstantFPVal), Val(V) {
- assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
+ assert(&V.getSemantics() == &Ty->getFltSemantics() &&
"FP type Mismatch");
}
@@ -1029,7 +1087,7 @@ unsigned ConstantAggregateZero::getNumElements() const {
if (auto *AT = dyn_cast<ArrayType>(Ty))
return AT->getNumElements();
if (auto *VT = dyn_cast<VectorType>(Ty))
- return VT->getNumElements();
+ return cast<FixedVectorType>(VT)->getNumElements();
return Ty->getStructNumElements();
}
@@ -1064,11 +1122,37 @@ unsigned UndefValue::getNumElements() const {
if (auto *AT = dyn_cast<ArrayType>(Ty))
return AT->getNumElements();
if (auto *VT = dyn_cast<VectorType>(Ty))
- return VT->getNumElements();
+ return cast<FixedVectorType>(VT)->getNumElements();
return Ty->getStructNumElements();
}
//===----------------------------------------------------------------------===//
+// PoisonValue Implementation
+//===----------------------------------------------------------------------===//
+
+PoisonValue *PoisonValue::getSequentialElement() const {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(getType()))
+ return PoisonValue::get(ATy->getElementType());
+ return PoisonValue::get(cast<VectorType>(getType())->getElementType());
+}
+
+PoisonValue *PoisonValue::getStructElement(unsigned Elt) const {
+ return PoisonValue::get(getType()->getStructElementType(Elt));
+}
+
+PoisonValue *PoisonValue::getElementValue(Constant *C) const {
+ if (isa<ArrayType>(getType()) || isa<VectorType>(getType()))
+ return getSequentialElement();
+ return getStructElement(cast<ConstantInt>(C)->getZExtValue());
+}
+
+PoisonValue *PoisonValue::getElementValue(unsigned Idx) const {
+ if (isa<ArrayType>(getType()) || isa<VectorType>(getType()))
+ return getSequentialElement();
+ return getStructElement(Idx);
+}
+
+//===----------------------------------------------------------------------===//
// ConstantXXX Classes
//===----------------------------------------------------------------------===//
@@ -1246,7 +1330,7 @@ Constant *ConstantStruct::get(StructType *ST, ArrayRef<Constant*> V) {
ConstantVector::ConstantVector(VectorType *T, ArrayRef<Constant *> V)
: ConstantAggregate(T, ConstantVectorVal, V) {
- assert(V.size() == T->getNumElements() &&
+ assert(V.size() == cast<FixedVectorType>(T)->getNumElements() &&
"Invalid initializer for constant vector");
}
@@ -1267,17 +1351,20 @@ Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
Constant *C = V[0];
bool isZero = C->isNullValue();
bool isUndef = isa<UndefValue>(C);
+ bool isPoison = isa<PoisonValue>(C);
if (isZero || isUndef) {
for (unsigned i = 1, e = V.size(); i != e; ++i)
if (V[i] != C) {
- isZero = isUndef = false;
+ isZero = isUndef = isPoison = false;
break;
}
}
if (isZero)
return ConstantAggregateZero::get(T);
+ if (isPoison)
+ return PoisonValue::get(T);
if (isUndef)
return UndefValue::get(T);
@@ -1292,14 +1379,14 @@ Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
}
Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) {
- if (!EC.Scalable) {
+ if (!EC.isScalable()) {
// If this splat is compatible with ConstantDataVector, use it instead of
// ConstantVector.
if ((isa<ConstantFP>(V) || isa<ConstantInt>(V)) &&
ConstantDataSequential::isElementTypeCompatible(V->getType()))
- return ConstantDataVector::getSplat(EC.Min, V);
+ return ConstantDataVector::getSplat(EC.getKnownMinValue(), V);
- SmallVector<Constant *, 32> Elts(EC.Min, V);
+ SmallVector<Constant *, 32> Elts(EC.getKnownMinValue(), V);
return get(Elts);
}
@@ -1316,7 +1403,7 @@ Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) {
Constant *UndefV = UndefValue::get(VTy);
V = ConstantExpr::getInsertElement(UndefV, V, ConstantInt::get(I32Ty, 0));
// Build shuffle mask to perform the splat.
- SmallVector<int, 8> Zeros(EC.Min, 0);
+ SmallVector<int, 8> Zeros(EC.getKnownMinValue(), 0);
// Splat.
return ConstantExpr::getShuffleVector(V, UndefV, Zeros);
}
@@ -1441,6 +1528,8 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
OnlyIfReducedTy);
case Instruction::ExtractValue:
return ConstantExpr::getExtractValue(Ops[0], getIndices(), OnlyIfReducedTy);
+ case Instruction::FNeg:
+ return ConstantExpr::getFNeg(Ops[0]);
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], getShuffleMask(),
OnlyIfReducedTy);
@@ -1601,7 +1690,7 @@ Constant *Constant::getSplatValue(bool AllowUndefs) const {
ConstantInt *Index = dyn_cast<ConstantInt>(IElt->getOperand(2));
if (Index && Index->getValue() == 0 &&
- std::all_of(Mask.begin(), Mask.end(), [](int I) { return I == 0; }))
+ llvm::all_of(Mask, [](int I) { return I == 0; }))
return SplatVal;
}
}
@@ -1673,7 +1762,26 @@ UndefValue *UndefValue::get(Type *Ty) {
/// Remove the constant from the constant table.
void UndefValue::destroyConstantImpl() {
// Free the constant and any dangling references to it.
- getContext().pImpl->UVConstants.erase(getType());
+ if (getValueID() == UndefValueVal) {
+ getContext().pImpl->UVConstants.erase(getType());
+ } else if (getValueID() == PoisonValueVal) {
+ getContext().pImpl->PVConstants.erase(getType());
+ }
+ llvm_unreachable("Not a undef or a poison!");
+}
+
+PoisonValue *PoisonValue::get(Type *Ty) {
+ std::unique_ptr<PoisonValue> &Entry = Ty->getContext().pImpl->PVConstants[Ty];
+ if (!Entry)
+ Entry.reset(new PoisonValue(Ty));
+
+ return Entry.get();
+}
+
+/// Remove the constant from the constant table.
+void PoisonValue::destroyConstantImpl() {
+ // Free the constant and any dangling references to it.
+ getContext().pImpl->PVConstants.erase(getType());
}
BlockAddress *BlockAddress::get(BasicBlock *BB) {
@@ -1754,6 +1862,58 @@ Value *BlockAddress::handleOperandChangeImpl(Value *From, Value *To) {
return nullptr;
}
+DSOLocalEquivalent *DSOLocalEquivalent::get(GlobalValue *GV) {
+ DSOLocalEquivalent *&Equiv = GV->getContext().pImpl->DSOLocalEquivalents[GV];
+ if (!Equiv)
+ Equiv = new DSOLocalEquivalent(GV);
+
+ assert(Equiv->getGlobalValue() == GV &&
+ "DSOLocalFunction does not match the expected global value");
+ return Equiv;
+}
+
+DSOLocalEquivalent::DSOLocalEquivalent(GlobalValue *GV)
+ : Constant(GV->getType(), Value::DSOLocalEquivalentVal, &Op<0>(), 1) {
+ setOperand(0, GV);
+}
+
+/// Remove the constant from the constant table.
+void DSOLocalEquivalent::destroyConstantImpl() {
+ const GlobalValue *GV = getGlobalValue();
+ GV->getContext().pImpl->DSOLocalEquivalents.erase(GV);
+}
+
+Value *DSOLocalEquivalent::handleOperandChangeImpl(Value *From, Value *To) {
+ assert(From == getGlobalValue() && "Changing value does not match operand.");
+ assert(isa<Constant>(To) && "Can only replace the operands with a constant");
+
+ // The replacement is with another global value.
+ if (const auto *ToObj = dyn_cast<GlobalValue>(To)) {
+ DSOLocalEquivalent *&NewEquiv =
+ getContext().pImpl->DSOLocalEquivalents[ToObj];
+ if (NewEquiv)
+ return llvm::ConstantExpr::getBitCast(NewEquiv, getType());
+ }
+
+ // If the argument is replaced with a null value, just replace this constant
+ // with a null value.
+ if (cast<Constant>(To)->isNullValue())
+ return To;
+
+ // The replacement could be a bitcast or an alias to another function. We can
+ // replace it with a bitcast to the dso_local_equivalent of that function.
+ auto *Func = cast<Function>(To->stripPointerCastsAndAliases());
+ DSOLocalEquivalent *&NewEquiv = getContext().pImpl->DSOLocalEquivalents[Func];
+ if (NewEquiv)
+ return llvm::ConstantExpr::getBitCast(NewEquiv, getType());
+
+ // Replace this with the new one.
+ getContext().pImpl->DSOLocalEquivalents.erase(getGlobalValue());
+ NewEquiv = this;
+ setOperand(0, Func);
+ return nullptr;
+}
+
//---- ConstantExpr::get() implementations.
//
@@ -2002,8 +2162,8 @@ Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy,
"PtrToInt destination must be integer or integer vector");
assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
if (isa<VectorType>(C->getType()))
- assert(cast<VectorType>(C->getType())->getNumElements() ==
- cast<VectorType>(DstTy)->getNumElements() &&
+ assert(cast<FixedVectorType>(C->getType())->getNumElements() ==
+ cast<FixedVectorType>(DstTy)->getNumElements() &&
"Invalid cast between a different number of vector elements");
return getFoldedCast(Instruction::PtrToInt, C, DstTy, OnlyIfReduced);
}
@@ -2016,8 +2176,8 @@ Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy,
"IntToPtr destination must be a pointer or pointer vector");
assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
if (isa<VectorType>(C->getType()))
- assert(cast<VectorType>(C->getType())->getNumElements() ==
- cast<VectorType>(DstTy)->getNumElements() &&
+ assert(cast<VectorType>(C->getType())->getElementCount() ==
+ cast<VectorType>(DstTy)->getElementCount() &&
"Invalid cast between a different number of vector elements");
return getFoldedCast(Instruction::IntToPtr, C, DstTy, OnlyIfReduced);
}
@@ -2048,7 +2208,8 @@ Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
Type *MidTy = PointerType::get(DstElemTy, SrcScalarTy->getAddressSpace());
if (VectorType *VT = dyn_cast<VectorType>(DstTy)) {
// Handle vectors of pointers.
- MidTy = FixedVectorType::get(MidTy, VT->getNumElements());
+ MidTy = FixedVectorType::get(MidTy,
+ cast<FixedVectorType>(VT)->getNumElements());
}
C = getBitCast(C, MidTy);
}
@@ -2245,7 +2406,7 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
unsigned AS = C->getType()->getPointerAddressSpace();
Type *ReqTy = DestTy->getPointerTo(AS);
- ElementCount EltCount = {0, false};
+ auto EltCount = ElementCount::getFixed(0);
if (VectorType *VecTy = dyn_cast<VectorType>(C->getType()))
EltCount = VecTy->getElementCount();
else
@@ -2253,7 +2414,7 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
if (VectorType *VecTy = dyn_cast<VectorType>(Idx->getType()))
EltCount = VecTy->getElementCount();
- if (EltCount.Min != 0)
+ if (EltCount.isNonZero())
ReqTy = VectorType::get(ReqTy, EltCount);
if (OnlyIfReducedTy == ReqTy)
@@ -2273,7 +2434,7 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
if (GTI.isStruct() && Idx->getType()->isVectorTy()) {
Idx = Idx->getSplatValue();
- } else if (GTI.isSequential() && EltCount.Min != 0 &&
+ } else if (GTI.isSequential() && EltCount.isNonZero() &&
!Idx->getType()->isVectorTy()) {
Idx = ConstantVector::getSplat(EltCount, Idx);
}
@@ -2549,6 +2710,11 @@ Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
return get(Instruction::Xor, C1, C2);
}
+Constant *ConstantExpr::getUMin(Constant *C1, Constant *C2) {
+ Constant *Cmp = ConstantExpr::getICmp(CmpInst::ICMP_ULT, C1, C2);
+ return getSelect(Cmp, C1, C2);
+}
+
Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
bool HasNUW, bool HasNSW) {
unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
@@ -2566,6 +2732,35 @@ Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
isExact ? PossiblyExactOperator::IsExact : 0);
}
+Constant *ConstantExpr::getExactLogBase2(Constant *C) {
+ Type *Ty = C->getType();
+ const APInt *IVal;
+ if (match(C, m_APInt(IVal)) && IVal->isPowerOf2())
+ return ConstantInt::get(Ty, IVal->logBase2());
+
+ // FIXME: We can extract pow of 2 of splat constant for scalable vectors.
+ auto *VecTy = dyn_cast<FixedVectorType>(Ty);
+ if (!VecTy)
+ return nullptr;
+
+ SmallVector<Constant *, 4> Elts;
+ for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
+ Constant *Elt = C->getAggregateElement(I);
+ if (!Elt)
+ return nullptr;
+ // Note that log2(iN undef) is *NOT* iN undef, because log2(iN undef) u< N.
+ if (isa<UndefValue>(Elt)) {
+ Elts.push_back(Constant::getNullValue(Ty->getScalarType()));
+ continue;
+ }
+ if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2())
+ return nullptr;
+ Elts.push_back(ConstantInt::get(Ty->getScalarType(), IVal->logBase2()));
+ }
+
+ return ConstantVector::get(Elts);
+}
+
Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty,
bool AllowRHSConstant) {
assert(Instruction::isBinaryOp(Opcode) && "Only binops allowed");
@@ -2690,7 +2885,7 @@ bool ConstantDataSequential::isElementTypeCompatible(Type *Ty) {
unsigned ConstantDataSequential::getNumElements() const {
if (ArrayType *AT = dyn_cast<ArrayType>(getType()))
return AT->getNumElements();
- return cast<VectorType>(getType())->getNumElements();
+ return cast<FixedVectorType>(getType())->getNumElements();
}
@@ -2739,56 +2934,58 @@ Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
// body but different types. For example, 0,0,0,1 could be a 4 element array
// of i8, or a 1-element array of i32. They'll both end up in the same
/// StringMap bucket, linked up by their Next pointers. Walk the list.
- ConstantDataSequential **Entry = &Slot.second;
- for (ConstantDataSequential *Node = *Entry; Node;
- Entry = &Node->Next, Node = *Entry)
- if (Node->getType() == Ty)
- return Node;
+ std::unique_ptr<ConstantDataSequential> *Entry = &Slot.second;
+ for (; *Entry; Entry = &(*Entry)->Next)
+ if ((*Entry)->getType() == Ty)
+ return Entry->get();
// Okay, we didn't get a hit. Create a node of the right class, link it in,
// and return it.
- if (isa<ArrayType>(Ty))
- return *Entry = new ConstantDataArray(Ty, Slot.first().data());
+ if (isa<ArrayType>(Ty)) {
+ // Use reset because std::make_unique can't access the constructor.
+ Entry->reset(new ConstantDataArray(Ty, Slot.first().data()));
+ return Entry->get();
+ }
assert(isa<VectorType>(Ty));
- return *Entry = new ConstantDataVector(Ty, Slot.first().data());
+ // Use reset because std::make_unique can't access the constructor.
+ Entry->reset(new ConstantDataVector(Ty, Slot.first().data()));
+ return Entry->get();
}
void ConstantDataSequential::destroyConstantImpl() {
// Remove the constant from the StringMap.
- StringMap<ConstantDataSequential*> &CDSConstants =
- getType()->getContext().pImpl->CDSConstants;
+ StringMap<std::unique_ptr<ConstantDataSequential>> &CDSConstants =
+ getType()->getContext().pImpl->CDSConstants;
- StringMap<ConstantDataSequential*>::iterator Slot =
- CDSConstants.find(getRawDataValues());
+ auto Slot = CDSConstants.find(getRawDataValues());
assert(Slot != CDSConstants.end() && "CDS not found in uniquing table");
- ConstantDataSequential **Entry = &Slot->getValue();
+ std::unique_ptr<ConstantDataSequential> *Entry = &Slot->getValue();
// Remove the entry from the hash table.
if (!(*Entry)->Next) {
// If there is only one value in the bucket (common case) it must be this
// entry, and removing the entry should remove the bucket completely.
- assert((*Entry) == this && "Hash mismatch in ConstantDataSequential");
+ assert(Entry->get() == this && "Hash mismatch in ConstantDataSequential");
getContext().pImpl->CDSConstants.erase(Slot);
- } else {
- // Otherwise, there are multiple entries linked off the bucket, unlink the
- // node we care about but keep the bucket around.
- for (ConstantDataSequential *Node = *Entry; ;
- Entry = &Node->Next, Node = *Entry) {
- assert(Node && "Didn't find entry in its uniquing hash table!");
- // If we found our entry, unlink it from the list and we're done.
- if (Node == this) {
- *Entry = Node->Next;
- break;
- }
- }
+ return;
}
- // If we were part of a list, make sure that we don't delete the list that is
- // still owned by the uniquing map.
- Next = nullptr;
+ // Otherwise, there are multiple entries linked off the bucket, unlink the
+ // node we care about but keep the bucket around.
+ while (true) {
+ std::unique_ptr<ConstantDataSequential> &Node = *Entry;
+ assert(Node && "Didn't find entry in its uniquing hash table!");
+ // If we found our entry, unlink it from the list and we're done.
+ if (Node.get() == this) {
+ Node = std::move(Node->Next);
+ return;
+ }
+
+ Entry = &Node->Next;
+ }
}
/// getFP() constructors - Return a constant of array type with a float
@@ -2938,7 +3135,7 @@ Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
return getFP(V->getType(), Elts);
}
}
- return ConstantVector::getSplat({NumElts, false}, V);
+ return ConstantVector::getSplat(ElementCount::getFixed(NumElts), V);
}
@@ -3248,7 +3445,7 @@ Value *ConstantExpr::handleOperandChangeImpl(Value *From, Value *ToV) {
}
Instruction *ConstantExpr::getAsInstruction() const {
- SmallVector<Value *, 4> ValueOperands(op_begin(), op_end());
+ SmallVector<Value *, 4> ValueOperands(operands());
ArrayRef<Value*> Ops(ValueOperands);
switch (getOpcode()) {
diff --git a/contrib/llvm-project/llvm/lib/IR/Core.cpp b/contrib/llvm-project/llvm/lib/IR/Core.cpp
index c1f7329034e0..039b34ace6ab 100644
--- a/contrib/llvm-project/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Core.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm-c/Core.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -146,6 +145,11 @@ LLVMAttributeRef LLVMCreateEnumAttribute(LLVMContextRef C, unsigned KindID,
return wrap(Attribute::getWithByValType(Ctx, NULL));
}
+ if (AttrKind == Attribute::AttrKind::StructRet) {
+ // Same as byval.
+ return wrap(Attribute::getWithStructRetType(Ctx, NULL));
+ }
+
return wrap(Attribute::get(Ctx, AttrKind, Val));
}
@@ -160,6 +164,18 @@ uint64_t LLVMGetEnumAttributeValue(LLVMAttributeRef A) {
return Attr.getValueAsInt();
}
+LLVMAttributeRef LLVMCreateTypeAttribute(LLVMContextRef C, unsigned KindID,
+ LLVMTypeRef type_ref) {
+ auto &Ctx = *unwrap(C);
+ auto AttrKind = (Attribute::AttrKind)KindID;
+ return wrap(Attribute::get(Ctx, AttrKind, unwrap(type_ref)));
+}
+
+LLVMTypeRef LLVMGetTypeAttributeValue(LLVMAttributeRef A) {
+ auto Attr = unwrap(A);
+ return wrap(Attr.getValueAsType());
+}
+
LLVMAttributeRef LLVMCreateStringAttribute(LLVMContextRef C,
const char *K, unsigned KLength,
const char *V, unsigned VLength) {
@@ -190,6 +206,10 @@ LLVMBool LLVMIsStringAttribute(LLVMAttributeRef A) {
return unwrap(A).isStringAttribute();
}
+LLVMBool LLVMIsTypeAttribute(LLVMAttributeRef A) {
+ return unwrap(A).isTypeAttribute();
+}
+
char *LLVMGetDiagInfoDescription(LLVMDiagnosticInfoRef DI) {
std::string MsgStorage;
raw_string_ostream Stream(MsgStorage);
@@ -507,6 +527,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
return LLVMVectorTypeKind;
case Type::X86_MMXTyID:
return LLVMX86_MMXTypeKind;
+ case Type::X86_AMXTyID:
+ return LLVMX86_AMXTypeKind;
case Type::TokenTyID:
return LLVMTokenTypeKind;
case Type::ScalableVectorTyID:
@@ -618,6 +640,9 @@ LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
}
+LLVMTypeRef LLVMX86AMXTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getX86_AMXTy(*unwrap(C));
+}
LLVMTypeRef LLVMHalfType(void) {
return LLVMHalfTypeInContext(LLVMGetGlobalContext());
@@ -643,6 +668,9 @@ LLVMTypeRef LLVMPPCFP128Type(void) {
LLVMTypeRef LLVMX86MMXType(void) {
return LLVMX86MMXTypeInContext(LLVMGetGlobalContext());
}
+LLVMTypeRef LLVMX86AMXType(void) {
+ return LLVMX86AMXTypeInContext(LLVMGetGlobalContext());
+}
/*--.. Operations on function types ........................................--*/
@@ -734,7 +762,11 @@ LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy) {
}
LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
- return wrap(unwrap(M)->getTypeByName(Name));
+ return wrap(StructType::getTypeByName(unwrap(M)->getContext(), Name));
+}
+
+LLVMTypeRef LLVMGetTypeByName2(LLVMContextRef C, const char *Name) {
+ return wrap(StructType::getTypeByName(*unwrap(C), Name));
}
/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
@@ -759,6 +791,11 @@ LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
return wrap(FixedVectorType::get(unwrap(ElementType), ElementCount));
}
+LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType,
+ unsigned ElementCount) {
+ return wrap(ScalableVectorType::get(unwrap(ElementType), ElementCount));
+}
+
LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) {
auto *Ty = unwrap<Type>(WrappedTy);
if (auto *PTy = dyn_cast<PointerType>(Ty))
@@ -781,7 +818,7 @@ unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
}
unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
- return unwrap<VectorType>(VectorTy)->getNumElements();
+ return unwrap<VectorType>(VectorTy)->getElementCount().getKnownMinValue();
}
/*--.. Operations on other types ...........................................--*/
@@ -816,6 +853,7 @@ LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) {
LLVMValueKind LLVMGetValueKind(LLVMValueRef Val) {
switch(unwrap(Val)->getValueID()) {
+#define LLVM_C_API 1
#define HANDLE_VALUE(Name) \
case Value::Name##Val: \
return LLVM##Name##ValueKind;
@@ -925,6 +963,7 @@ LLVMValueMetadataEntry *
LLVMInstructionGetAllMetadataOtherThanDebugLoc(LLVMValueRef Value,
size_t *NumEntries) {
return llvm_getMetadata(NumEntries, [&Value](MetadataEntries &Entries) {
+ Entries.clear();
unwrap<Instruction>(Value)->getAllMetadata(Entries);
});
}
@@ -1034,6 +1073,10 @@ LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) {
return wrap(UndefValue::get(unwrap(Ty)));
}
+LLVMValueRef LLVMGetPoison(LLVMTypeRef Ty) {
+ return wrap(PoisonValue::get(unwrap(Ty)));
+}
+
LLVMBool LLVMIsConstant(LLVMValueRef Ty) {
return isa<Constant>(unwrap(Ty));
}
@@ -1048,6 +1091,10 @@ LLVMBool LLVMIsUndef(LLVMValueRef Val) {
return isa<UndefValue>(unwrap(Val));
}
+LLVMBool LLVMIsPoison(LLVMValueRef Val) {
+ return isa<PoisonValue>(unwrap(Val));
+}
+
LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
return wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
}
@@ -2034,6 +2081,7 @@ void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) {
LLVMValueMetadataEntry *LLVMGlobalCopyAllMetadata(LLVMValueRef Value,
size_t *NumEntries) {
return llvm_getMetadata(NumEntries, [&Value](MetadataEntries &Entries) {
+ Entries.clear();
if (Instruction *Instr = dyn_cast<Instruction>(unwrap(Value))) {
Instr->getAllMetadata(Entries);
} else {
diff --git a/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
index 45cbbb3a6037..5104dc349d0b 100644
--- a/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
@@ -267,6 +267,12 @@ DIBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
0, Encoding, Flags);
}
+DIStringType *DIBuilder::createStringType(StringRef Name, uint64_t SizeInBits) {
+ assert(!Name.empty() && "Unable to create type without name");
+ return DIStringType::get(VMContext, dwarf::DW_TAG_string_type, Name,
+ SizeInBits, 0);
+}
+
DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, FromTy, 0,
0, 0, None, DINode::FlagZero);
@@ -519,12 +525,24 @@ DICompositeType *DIBuilder::createEnumerationType(
return CTy;
}
-DICompositeType *DIBuilder::createArrayType(uint64_t Size,
- uint32_t AlignInBits, DIType *Ty,
- DINodeArray Subscripts) {
- auto *R = DICompositeType::get(VMContext, dwarf::DW_TAG_array_type, "",
- nullptr, 0, nullptr, Ty, Size, AlignInBits, 0,
- DINode::FlagZero, Subscripts, 0, nullptr);
+DICompositeType *DIBuilder::createArrayType(
+ uint64_t Size, uint32_t AlignInBits, DIType *Ty, DINodeArray Subscripts,
+ PointerUnion<DIExpression *, DIVariable *> DL,
+ PointerUnion<DIExpression *, DIVariable *> AS,
+ PointerUnion<DIExpression *, DIVariable *> AL,
+ PointerUnion<DIExpression *, DIVariable *> RK) {
+ auto *R = DICompositeType::get(
+ VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0,
+ nullptr, Ty, Size, AlignInBits, 0, DINode::FlagZero,
+ Subscripts, 0, nullptr, nullptr, "", nullptr,
+ DL.is<DIExpression *>() ? (Metadata *)DL.get<DIExpression *>()
+ : (Metadata *)DL.get<DIVariable *>(),
+ AS.is<DIExpression *>() ? (Metadata *)AS.get<DIExpression *>()
+ : (Metadata *)AS.get<DIVariable *>(),
+ AL.is<DIExpression *>() ? (Metadata *)AL.get<DIExpression *>()
+ : (Metadata *)AL.get<DIVariable *>(),
+ RK.is<DIExpression *>() ? (Metadata *)RK.get<DIExpression *>()
+ : (Metadata *)RK.get<DIVariable *>());
trackIfUnresolved(R);
return R;
}
@@ -643,6 +661,18 @@ DISubrange *DIBuilder::getOrCreateSubrange(Metadata *CountNode, Metadata *LB,
return DISubrange::get(VMContext, CountNode, LB, UB, Stride);
}
+DIGenericSubrange *DIBuilder::getOrCreateGenericSubrange(
+ DIGenericSubrange::BoundType CountNode, DIGenericSubrange::BoundType LB,
+ DIGenericSubrange::BoundType UB, DIGenericSubrange::BoundType Stride) {
+ auto ConvToMetadata = [&](DIGenericSubrange::BoundType Bound) -> Metadata * {
+ return Bound.is<DIExpression *>() ? (Metadata *)Bound.get<DIExpression *>()
+ : (Metadata *)Bound.get<DIVariable *>();
+ };
+ return DIGenericSubrange::get(VMContext, ConvToMetadata(CountNode),
+ ConvToMetadata(LB), ConvToMetadata(UB),
+ ConvToMetadata(Stride));
+}
+
static void checkGlobalVariableScope(DIScope *Context) {
#ifndef NDEBUG
if (auto *CT =
@@ -844,9 +874,10 @@ DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
DIModule *DIBuilder::createModule(DIScope *Scope, StringRef Name,
StringRef ConfigurationMacros,
StringRef IncludePath, StringRef APINotesFile,
- DIFile *File, unsigned LineNo) {
+ DIFile *File, unsigned LineNo, bool IsDecl) {
return DIModule::get(VMContext, File, getNonCompileUnitScope(Scope), Name,
- ConfigurationMacros, IncludePath, APINotesFile, LineNo);
+ ConfigurationMacros, IncludePath, APINotesFile, LineNo,
+ IsDecl);
}
DILexicalBlockFile *DIBuilder::createLexicalBlockFile(DIScope *Scope,
diff --git a/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp b/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp
index c44737c5bfc2..274ea0aa5fd1 100644
--- a/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TypeSize.h"
@@ -64,7 +65,8 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
StructAlignment = std::max(TyAlign, StructAlignment);
MemberOffsets[i] = StructSize;
- StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item
+ // Consume space for this data item
+ StructSize += DL.getTypeAllocSize(Ty).getFixedValue();
}
// Add padding to the end of the struct so that it could be put in an array
@@ -181,6 +183,7 @@ void DataLayout::reset(StringRef Desc) {
AllocaAddrSpace = 0;
StackNaturalAlign.reset();
ProgramAddrSpace = 0;
+ DefaultGlobalsAddrSpace = 0;
FunctionPtrAlign.reset();
TheFunctionPtrAlignType = FunctionPtrAlignType::Independent;
ManglingMode = MM_None;
@@ -188,57 +191,80 @@ void DataLayout::reset(StringRef Desc) {
// Default alignments
for (const LayoutAlignElem &E : DefaultAlignments) {
- setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign,
- E.TypeBitWidth);
+ if (Error Err = setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign,
+ E.PrefAlign, E.TypeBitWidth))
+ return report_fatal_error(std::move(Err));
}
- setPointerAlignment(0, Align(8), Align(8), 8, 8);
+ if (Error Err = setPointerAlignment(0, Align(8), Align(8), 8, 8))
+ return report_fatal_error(std::move(Err));
- parseSpecifier(Desc);
+ if (Error Err = parseSpecifier(Desc))
+ return report_fatal_error(std::move(Err));
+}
+
+Expected<DataLayout> DataLayout::parse(StringRef LayoutDescription) {
+ DataLayout Layout("");
+ if (Error Err = Layout.parseSpecifier(LayoutDescription))
+ return std::move(Err);
+ return Layout;
+}
+
+static Error reportError(const Twine &Message) {
+ return createStringError(inconvertibleErrorCode(), Message);
}
/// Checked version of split, to ensure mandatory subparts.
-static std::pair<StringRef, StringRef> split(StringRef Str, char Separator) {
+static Error split(StringRef Str, char Separator,
+ std::pair<StringRef, StringRef> &Split) {
assert(!Str.empty() && "parse error, string can't be empty here");
- std::pair<StringRef, StringRef> Split = Str.split(Separator);
+ Split = Str.split(Separator);
if (Split.second.empty() && Split.first != Str)
- report_fatal_error("Trailing separator in datalayout string");
+ return reportError("Trailing separator in datalayout string");
if (!Split.second.empty() && Split.first.empty())
- report_fatal_error("Expected token before separator in datalayout string");
- return Split;
+ return reportError("Expected token before separator in datalayout string");
+ return Error::success();
}
/// Get an unsigned integer, including error checks.
-static unsigned getInt(StringRef R) {
- unsigned Result;
+template <typename IntTy> static Error getInt(StringRef R, IntTy &Result) {
bool error = R.getAsInteger(10, Result); (void)error;
if (error)
- report_fatal_error("not a number, or does not fit in an unsigned int");
- return Result;
+ return reportError("not a number, or does not fit in an unsigned int");
+ return Error::success();
}
-/// Convert bits into bytes. Assert if not a byte width multiple.
-static unsigned inBytes(unsigned Bits) {
- if (Bits % 8)
- report_fatal_error("number of bits must be a byte width multiple");
- return Bits / 8;
+/// Get an unsigned integer representing the number of bits and convert it into
+/// bytes. Error out of not a byte width multiple.
+template <typename IntTy>
+static Error getIntInBytes(StringRef R, IntTy &Result) {
+ if (Error Err = getInt<IntTy>(R, Result))
+ return Err;
+ if (Result % 8)
+ return reportError("number of bits must be a byte width multiple");
+ Result /= 8;
+ return Error::success();
}
-static unsigned getAddrSpace(StringRef R) {
- unsigned AddrSpace = getInt(R);
+static Error getAddrSpace(StringRef R, unsigned &AddrSpace) {
+ if (Error Err = getInt(R, AddrSpace))
+ return Err;
if (!isUInt<24>(AddrSpace))
- report_fatal_error("Invalid address space, must be a 24-bit integer");
- return AddrSpace;
+ return reportError("Invalid address space, must be a 24-bit integer");
+ return Error::success();
}
-void DataLayout::parseSpecifier(StringRef Desc) {
+Error DataLayout::parseSpecifier(StringRef Desc) {
StringRepresentation = std::string(Desc);
while (!Desc.empty()) {
// Split at '-'.
- std::pair<StringRef, StringRef> Split = split(Desc, '-');
+ std::pair<StringRef, StringRef> Split;
+ if (Error Err = split(Desc, '-', Split))
+ return Err;
Desc = Split.second;
// Split at ':'.
- Split = split(Split.first, ':');
+ if (Error Err = split(Split.first, ':', Split))
+ return Err;
// Aliases used below.
StringRef &Tok = Split.first; // Current token.
@@ -246,11 +272,14 @@ void DataLayout::parseSpecifier(StringRef Desc) {
if (Tok == "ni") {
do {
- Split = split(Rest, ':');
+ if (Error Err = split(Rest, ':', Split))
+ return Err;
Rest = Split.second;
- unsigned AS = getInt(Split.first);
+ unsigned AS;
+ if (Error Err = getInt(Split.first, AS))
+ return Err;
if (AS == 0)
- report_fatal_error("Address space 0 can never be non-integral");
+ return reportError("Address space 0 can never be non-integral");
NonIntegralAddressSpaces.push_back(AS);
} while (!Rest.empty());
@@ -273,28 +302,36 @@ void DataLayout::parseSpecifier(StringRef Desc) {
break;
case 'p': {
// Address space.
- unsigned AddrSpace = Tok.empty() ? 0 : getInt(Tok);
+ unsigned AddrSpace = 0;
+ if (!Tok.empty())
+ if (Error Err = getInt(Tok, AddrSpace))
+ return Err;
if (!isUInt<24>(AddrSpace))
- report_fatal_error("Invalid address space, must be a 24bit integer");
+ return reportError("Invalid address space, must be a 24bit integer");
// Size.
if (Rest.empty())
- report_fatal_error(
+ return reportError(
"Missing size specification for pointer in datalayout string");
- Split = split(Rest, ':');
- unsigned PointerMemSize = inBytes(getInt(Tok));
+ if (Error Err = split(Rest, ':', Split))
+ return Err;
+ unsigned PointerMemSize;
+ if (Error Err = getIntInBytes(Tok, PointerMemSize))
+ return Err;
if (!PointerMemSize)
- report_fatal_error("Invalid pointer size of 0 bytes");
+ return reportError("Invalid pointer size of 0 bytes");
// ABI alignment.
if (Rest.empty())
- report_fatal_error(
+ return reportError(
"Missing alignment specification for pointer in datalayout string");
- Split = split(Rest, ':');
- unsigned PointerABIAlign = inBytes(getInt(Tok));
+ if (Error Err = split(Rest, ':', Split))
+ return Err;
+ unsigned PointerABIAlign;
+ if (Error Err = getIntInBytes(Tok, PointerABIAlign))
+ return Err;
if (!isPowerOf2_64(PointerABIAlign))
- report_fatal_error(
- "Pointer ABI alignment must be a power of 2");
+ return reportError("Pointer ABI alignment must be a power of 2");
// Size of index used in GEP for address calculation.
// The parameter is optional. By default it is equal to size of pointer.
@@ -303,23 +340,28 @@ void DataLayout::parseSpecifier(StringRef Desc) {
// Preferred alignment.
unsigned PointerPrefAlign = PointerABIAlign;
if (!Rest.empty()) {
- Split = split(Rest, ':');
- PointerPrefAlign = inBytes(getInt(Tok));
+ if (Error Err = split(Rest, ':', Split))
+ return Err;
+ if (Error Err = getIntInBytes(Tok, PointerPrefAlign))
+ return Err;
if (!isPowerOf2_64(PointerPrefAlign))
- report_fatal_error(
- "Pointer preferred alignment must be a power of 2");
+ return reportError(
+ "Pointer preferred alignment must be a power of 2");
// Now read the index. It is the second optional parameter here.
if (!Rest.empty()) {
- Split = split(Rest, ':');
- IndexSize = inBytes(getInt(Tok));
+ if (Error Err = split(Rest, ':', Split))
+ return Err;
+ if (Error Err = getIntInBytes(Tok, IndexSize))
+ return Err;
if (!IndexSize)
- report_fatal_error("Invalid index size of 0 bytes");
+ return reportError("Invalid index size of 0 bytes");
}
}
- setPointerAlignment(AddrSpace, assumeAligned(PointerABIAlign),
- assumeAligned(PointerPrefAlign), PointerMemSize,
- IndexSize);
+ if (Error Err = setPointerAlignment(
+ AddrSpace, assumeAligned(PointerABIAlign),
+ assumeAligned(PointerPrefAlign), PointerMemSize, IndexSize))
+ return Err;
break;
}
case 'i':
@@ -336,61 +378,75 @@ void DataLayout::parseSpecifier(StringRef Desc) {
}
// Bit size.
- unsigned Size = Tok.empty() ? 0 : getInt(Tok);
+ unsigned Size = 0;
+ if (!Tok.empty())
+ if (Error Err = getInt(Tok, Size))
+ return Err;
if (AlignType == AGGREGATE_ALIGN && Size != 0)
- report_fatal_error(
+ return reportError(
"Sized aggregate specification in datalayout string");
// ABI alignment.
if (Rest.empty())
- report_fatal_error(
+ return reportError(
"Missing alignment specification in datalayout string");
- Split = split(Rest, ':');
- const unsigned ABIAlign = inBytes(getInt(Tok));
+ if (Error Err = split(Rest, ':', Split))
+ return Err;
+ unsigned ABIAlign;
+ if (Error Err = getIntInBytes(Tok, ABIAlign))
+ return Err;
if (AlignType != AGGREGATE_ALIGN && !ABIAlign)
- report_fatal_error(
+ return reportError(
"ABI alignment specification must be >0 for non-aggregate types");
if (!isUInt<16>(ABIAlign))
- report_fatal_error("Invalid ABI alignment, must be a 16bit integer");
+ return reportError("Invalid ABI alignment, must be a 16bit integer");
if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign))
- report_fatal_error("Invalid ABI alignment, must be a power of 2");
+ return reportError("Invalid ABI alignment, must be a power of 2");
// Preferred alignment.
unsigned PrefAlign = ABIAlign;
if (!Rest.empty()) {
- Split = split(Rest, ':');
- PrefAlign = inBytes(getInt(Tok));
+ if (Error Err = split(Rest, ':', Split))
+ return Err;
+ if (Error Err = getIntInBytes(Tok, PrefAlign))
+ return Err;
}
if (!isUInt<16>(PrefAlign))
- report_fatal_error(
+ return reportError(
"Invalid preferred alignment, must be a 16bit integer");
if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign))
- report_fatal_error("Invalid preferred alignment, must be a power of 2");
+ return reportError("Invalid preferred alignment, must be a power of 2");
- setAlignment(AlignType, assumeAligned(ABIAlign), assumeAligned(PrefAlign),
- Size);
+ if (Error Err = setAlignment(AlignType, assumeAligned(ABIAlign),
+ assumeAligned(PrefAlign), Size))
+ return Err;
break;
}
case 'n': // Native integer types.
while (true) {
- unsigned Width = getInt(Tok);
+ unsigned Width;
+ if (Error Err = getInt(Tok, Width))
+ return Err;
if (Width == 0)
- report_fatal_error(
+ return reportError(
"Zero width native integer type in datalayout string");
LegalIntWidths.push_back(Width);
if (Rest.empty())
break;
- Split = split(Rest, ':');
+ if (Error Err = split(Rest, ':', Split))
+ return Err;
}
break;
case 'S': { // Stack natural alignment.
- uint64_t Alignment = inBytes(getInt(Tok));
+ uint64_t Alignment;
+ if (Error Err = getIntInBytes(Tok, Alignment))
+ return Err;
if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment))
- report_fatal_error("Alignment is neither 0 nor a power of 2");
+ return reportError("Alignment is neither 0 nor a power of 2");
StackNaturalAlign = MaybeAlign(Alignment);
break;
}
@@ -403,34 +459,44 @@ void DataLayout::parseSpecifier(StringRef Desc) {
TheFunctionPtrAlignType = FunctionPtrAlignType::MultipleOfFunctionAlign;
break;
default:
- report_fatal_error("Unknown function pointer alignment type in "
+ return reportError("Unknown function pointer alignment type in "
"datalayout string");
}
Tok = Tok.substr(1);
- uint64_t Alignment = inBytes(getInt(Tok));
+ uint64_t Alignment;
+ if (Error Err = getIntInBytes(Tok, Alignment))
+ return Err;
if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment))
- report_fatal_error("Alignment is neither 0 nor a power of 2");
+ return reportError("Alignment is neither 0 nor a power of 2");
FunctionPtrAlign = MaybeAlign(Alignment);
break;
}
case 'P': { // Function address space.
- ProgramAddrSpace = getAddrSpace(Tok);
+ if (Error Err = getAddrSpace(Tok, ProgramAddrSpace))
+ return Err;
break;
}
case 'A': { // Default stack/alloca address space.
- AllocaAddrSpace = getAddrSpace(Tok);
+ if (Error Err = getAddrSpace(Tok, AllocaAddrSpace))
+ return Err;
+ break;
+ }
+ case 'G': { // Default address space for global variables.
+ if (Error Err = getAddrSpace(Tok, DefaultGlobalsAddrSpace))
+ return Err;
break;
}
case 'm':
if (!Tok.empty())
- report_fatal_error("Unexpected trailing characters after mangling specifier in datalayout string");
+ return reportError("Unexpected trailing characters after mangling "
+ "specifier in datalayout string");
if (Rest.empty())
- report_fatal_error("Expected mangling specifier in datalayout string");
+ return reportError("Expected mangling specifier in datalayout string");
if (Rest.size() > 1)
- report_fatal_error("Unknown mangling specifier in datalayout string");
+ return reportError("Unknown mangling specifier in datalayout string");
switch(Rest[0]) {
default:
- report_fatal_error("Unknown mangling in datalayout string");
+ return reportError("Unknown mangling in datalayout string");
case 'e':
ManglingMode = MM_ELF;
break;
@@ -452,10 +518,12 @@ void DataLayout::parseSpecifier(StringRef Desc) {
}
break;
default:
- report_fatal_error("Unknown specifier in datalayout string");
+ return reportError("Unknown specifier in datalayout string");
break;
}
}
+
+ return Error::success();
}
DataLayout::DataLayout(const Module *M) {
@@ -469,6 +537,7 @@ bool DataLayout::operator==(const DataLayout &Other) const {
AllocaAddrSpace == Other.AllocaAddrSpace &&
StackNaturalAlign == Other.StackNaturalAlign &&
ProgramAddrSpace == Other.ProgramAddrSpace &&
+ DefaultGlobalsAddrSpace == Other.DefaultGlobalsAddrSpace &&
FunctionPtrAlign == Other.FunctionPtrAlign &&
TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType &&
ManglingMode == Other.ManglingMode &&
@@ -487,17 +556,17 @@ DataLayout::findAlignmentLowerBound(AlignTypeEnum AlignType,
});
}
-void DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align,
- Align pref_align, uint32_t bit_width) {
+Error DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align,
+ Align pref_align, uint32_t bit_width) {
// AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as
// uint16_t, it is unclear if there are requirements for alignment to be less
// than 2^16 other than storage. In the meantime we leave the restriction as
// an assert. See D67400 for context.
assert(Log2(abi_align) < 16 && Log2(pref_align) < 16 && "Alignment too big");
if (!isUInt<24>(bit_width))
- report_fatal_error("Invalid bit width, must be a 24bit integer");
+ return reportError("Invalid bit width, must be a 24bit integer");
if (pref_align < abi_align)
- report_fatal_error(
+ return reportError(
"Preferred alignment cannot be less than the ABI alignment");
AlignmentsTy::iterator I = findAlignmentLowerBound(align_type, bit_width);
@@ -511,24 +580,35 @@ void DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align,
Alignments.insert(I, LayoutAlignElem::get(align_type, abi_align,
pref_align, bit_width));
}
-}
+ return Error::success();
+}
+
+const PointerAlignElem &
+DataLayout::getPointerAlignElem(uint32_t AddressSpace) const {
+ if (AddressSpace != 0) {
+ auto I = lower_bound(Pointers, AddressSpace,
+ [](const PointerAlignElem &A, uint32_t AddressSpace) {
+ return A.AddressSpace < AddressSpace;
+ });
+ if (I != Pointers.end() && I->AddressSpace == AddressSpace)
+ return *I;
+ }
-DataLayout::PointersTy::iterator
-DataLayout::findPointerLowerBound(uint32_t AddressSpace) {
- return std::lower_bound(Pointers.begin(), Pointers.end(), AddressSpace,
- [](const PointerAlignElem &A, uint32_t AddressSpace) {
- return A.AddressSpace < AddressSpace;
- });
+ assert(Pointers[0].AddressSpace == 0);
+ return Pointers[0];
}
-void DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign,
- Align PrefAlign, uint32_t TypeByteWidth,
- uint32_t IndexWidth) {
+Error DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign,
+ Align PrefAlign, uint32_t TypeByteWidth,
+ uint32_t IndexWidth) {
if (PrefAlign < ABIAlign)
- report_fatal_error(
+ return reportError(
"Preferred alignment cannot be less than the ABI alignment");
- PointersTy::iterator I = findPointerLowerBound(AddrSpace);
+ auto I = lower_bound(Pointers, AddrSpace,
+ [](const PointerAlignElem &A, uint32_t AddressSpace) {
+ return A.AddressSpace < AddressSpace;
+ });
if (I == Pointers.end() || I->AddressSpace != AddrSpace) {
Pointers.insert(I, PointerAlignElem::get(AddrSpace, ABIAlign, PrefAlign,
TypeByteWidth, IndexWidth));
@@ -538,49 +618,19 @@ void DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign,
I->TypeByteWidth = TypeByteWidth;
I->IndexWidth = IndexWidth;
}
+ return Error::success();
}
-/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
-/// preferred if ABIInfo = false) the layout wants for the specified datatype.
-Align DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, uint32_t BitWidth,
- bool ABIInfo, Type *Ty) const {
- AlignmentsTy::const_iterator I = findAlignmentLowerBound(AlignType, BitWidth);
- // See if we found an exact match. Of if we are looking for an integer type,
- // but don't have an exact match take the next largest integer. This is where
- // the lower_bound will point to when it fails an exact match.
- if (I != Alignments.end() && I->AlignType == (unsigned)AlignType &&
- (I->TypeBitWidth == BitWidth || AlignType == INTEGER_ALIGN))
- return ABIInfo ? I->ABIAlign : I->PrefAlign;
-
- if (AlignType == INTEGER_ALIGN) {
- // If we didn't have a larger value try the largest value we have.
- if (I != Alignments.begin()) {
- --I; // Go to the previous entry and see if its an integer.
- if (I->AlignType == INTEGER_ALIGN)
- return ABIInfo ? I->ABIAlign : I->PrefAlign;
- }
- } else if (AlignType == VECTOR_ALIGN) {
- // By default, use natural alignment for vector types. This is consistent
- // with what clang and llvm-gcc do.
- unsigned Alignment =
- getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
- // We're only calculating a natural alignment, so it doesn't have to be
- // based on the full size for scalable vectors. Using the minimum element
- // count should be enough here.
- Alignment *= cast<VectorType>(Ty)->getElementCount().Min;
- Alignment = PowerOf2Ceil(Alignment);
- return Align(Alignment);
- }
-
- // If we still couldn't find a reasonable default alignment, fall back
- // to a simple heuristic that the alignment is the first power of two
- // greater-or-equal to the store size of the type. This is a reasonable
- // approximation of reality, and if the user wanted something less
- // less conservative, they should have specified it explicitly in the data
- // layout.
- unsigned Alignment = getTypeStoreSize(Ty);
- Alignment = PowerOf2Ceil(Alignment);
- return Align(Alignment);
+Align DataLayout::getIntegerAlignment(uint32_t BitWidth,
+ bool abi_or_pref) const {
+ auto I = findAlignmentLowerBound(INTEGER_ALIGN, BitWidth);
+ // If we don't have an exact match, use alignment of next larger integer
+ // type. If there is none, use alignment of largest integer type by going
+ // back one element.
+ if (I == Alignments.end() || I->AlignType != INTEGER_ALIGN)
+ --I;
+ assert(I->AlignType == INTEGER_ALIGN && "Must be integer alignment");
+ return abi_or_pref ? I->ABIAlign : I->PrefAlign;
}
namespace {
@@ -642,30 +692,15 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
}
Align DataLayout::getPointerABIAlignment(unsigned AS) const {
- PointersTy::const_iterator I = findPointerLowerBound(AS);
- if (I == Pointers.end() || I->AddressSpace != AS) {
- I = findPointerLowerBound(0);
- assert(I->AddressSpace == 0);
- }
- return I->ABIAlign;
+ return getPointerAlignElem(AS).ABIAlign;
}
Align DataLayout::getPointerPrefAlignment(unsigned AS) const {
- PointersTy::const_iterator I = findPointerLowerBound(AS);
- if (I == Pointers.end() || I->AddressSpace != AS) {
- I = findPointerLowerBound(0);
- assert(I->AddressSpace == 0);
- }
- return I->PrefAlign;
+ return getPointerAlignElem(AS).PrefAlign;
}
unsigned DataLayout::getPointerSize(unsigned AS) const {
- PointersTy::const_iterator I = findPointerLowerBound(AS);
- if (I == Pointers.end() || I->AddressSpace != AS) {
- I = findPointerLowerBound(0);
- assert(I->AddressSpace == 0);
- }
- return I->TypeByteWidth;
+ return getPointerAlignElem(AS).TypeByteWidth;
}
unsigned DataLayout::getMaxPointerSize() const {
@@ -684,12 +719,7 @@ unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
}
unsigned DataLayout::getIndexSize(unsigned AS) const {
- PointersTy::const_iterator I = findPointerLowerBound(AS);
- if (I == Pointers.end() || I->AddressSpace != AS) {
- I = findPointerLowerBound(0);
- assert(I->AddressSpace == 0);
- }
- return I->IndexWidth;
+ return getPointerAlignElem(AS).IndexWidth;
}
unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const {
@@ -708,8 +738,6 @@ unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const {
== false) for the requested type \a Ty.
*/
Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
- AlignTypeEnum AlignType;
-
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
switch (Ty->getTypeID()) {
// Early escape for the non-numeric types.
@@ -730,12 +758,15 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
// Get the layout annotation... which is lazily created on demand.
const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
- const Align Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+ const LayoutAlignElem &AggregateAlign = Alignments[0];
+ assert(AggregateAlign.AlignType == AGGREGATE_ALIGN &&
+ "Aggregate alignment must be first alignment entry");
+ const Align Align =
+ abi_or_pref ? AggregateAlign.ABIAlign : AggregateAlign.PrefAlign;
return std::max(Align, Layout->getAlignment());
}
case Type::IntegerTyID:
- AlignType = INTEGER_ALIGN;
- break;
+ return getIntegerAlignment(Ty->getIntegerBitWidth(), abi_or_pref);
case Type::HalfTyID:
case Type::BFloatTyID:
case Type::FloatTyID:
@@ -744,22 +775,47 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
// same size and alignment, so they look the same here.
case Type::PPC_FP128TyID:
case Type::FP128TyID:
- case Type::X86_FP80TyID:
- AlignType = FLOAT_ALIGN;
- break;
+ case Type::X86_FP80TyID: {
+ unsigned BitWidth = getTypeSizeInBits(Ty).getFixedSize();
+ auto I = findAlignmentLowerBound(FLOAT_ALIGN, BitWidth);
+ if (I != Alignments.end() && I->AlignType == FLOAT_ALIGN &&
+ I->TypeBitWidth == BitWidth)
+ return abi_or_pref ? I->ABIAlign : I->PrefAlign;
+
+ // If we still couldn't find a reasonable default alignment, fall back
+ // to a simple heuristic that the alignment is the first power of two
+ // greater-or-equal to the store size of the type. This is a reasonable
+ // approximation of reality, and if the user wanted something less
+ // less conservative, they should have specified it explicitly in the data
+ // layout.
+ return Align(PowerOf2Ceil(BitWidth / 8));
+ }
case Type::X86_MMXTyID:
case Type::FixedVectorTyID:
- case Type::ScalableVectorTyID:
- AlignType = VECTOR_ALIGN;
- break;
+ case Type::ScalableVectorTyID: {
+ unsigned BitWidth = getTypeSizeInBits(Ty).getKnownMinSize();
+ auto I = findAlignmentLowerBound(VECTOR_ALIGN, BitWidth);
+ if (I != Alignments.end() && I->AlignType == VECTOR_ALIGN &&
+ I->TypeBitWidth == BitWidth)
+ return abi_or_pref ? I->ABIAlign : I->PrefAlign;
+
+ // By default, use natural alignment for vector types. This is consistent
+ // with what clang and llvm-gcc do.
+ // TODO: This should probably not be using the alloc size.
+ unsigned Alignment =
+ getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+ // We're only calculating a natural alignment, so it doesn't have to be
+ // based on the full size for scalable vectors. Using the minimum element
+ // count should be enough here.
+ Alignment *= cast<VectorType>(Ty)->getElementCount().getKnownMinValue();
+ Alignment = PowerOf2Ceil(Alignment);
+ return Align(Alignment);
+ }
+ case Type::X86_AMXTyID:
+ return Align(64);
default:
llvm_unreachable("Bad type for getAlignment!!!");
}
-
- // If we're dealing with a scalable vector, we just need the known minimum
- // size for determining alignment. If not, we'll get the exact size.
- return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty).getKnownMinSize(),
- abi_or_pref, Ty);
}
/// TODO: Remove this function once the transition to Align is over.
@@ -771,12 +827,6 @@ Align DataLayout::getABITypeAlign(Type *Ty) const {
return getAlignment(Ty, true);
}
-/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
-/// an integer type of the specified bitwidth.
-Align DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const {
- return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, nullptr);
-}
-
/// TODO: Remove this function once the transition to Align is over.
unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const {
return getPrefTypeAlign(Ty).value();
diff --git a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
index 190b220dc9aa..d7656b9dd1f8 100644
--- a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
@@ -652,7 +652,8 @@ bool llvm::stripNonLineTableDebugInfo(Module &M) {
MDNode *InlinedAt = DL.getInlinedAt();
Scope = remap(Scope);
InlinedAt = remap(InlinedAt);
- return DebugLoc::get(DL.getLine(), DL.getCol(), Scope, InlinedAt);
+ return DILocation::get(M.getContext(), DL.getLine(), DL.getCol(),
+ Scope, InlinedAt);
};
if (I.getDebugLoc() != DebugLoc())
@@ -696,6 +697,38 @@ void Instruction::applyMergedLocation(const DILocation *LocA,
setDebugLoc(DILocation::getMergedLocation(LocA, LocB));
}
+void Instruction::updateLocationAfterHoist() { dropLocation(); }
+
+void Instruction::dropLocation() {
+ const DebugLoc &DL = getDebugLoc();
+ if (!DL)
+ return;
+
+ // If this isn't a call, drop the location to allow a location from a
+ // preceding instruction to propagate.
+ if (!isa<CallBase>(this)) {
+ setDebugLoc(DebugLoc());
+ return;
+ }
+
+ // Set a line 0 location for calls to preserve scope information in case
+ // inlining occurs.
+ DISubprogram *SP = getFunction()->getSubprogram();
+ if (SP)
+ // If a function scope is available, set it on the line 0 location. When
+ // hoisting a call to a predecessor block, using the function scope avoids
+ // making it look like the callee was reached earlier than it should be.
+ setDebugLoc(DILocation::get(getContext(), 0, 0, SP));
+ else
+ // The parent function has no scope. Go ahead and drop the location. If
+ // the parent function is inlined, and the callee has a subprogram, the
+ // inliner will attach a location to the call.
+ //
+ // One alternative is to set a line 0 location with the existing scope and
+ // inlinedAt info. The location might be sensitive to when inlining occurs.
+ setDebugLoc(DebugLoc());
+}
+
//===----------------------------------------------------------------------===//
// LLVM C API implementations.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp
index 110d94116f10..77bba9f7ed0e 100644
--- a/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -133,7 +133,7 @@ const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
}
Optional<unsigned> DILocation::encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI) {
- SmallVector<unsigned, 3> Components = {BD, DF, CI};
+ std::array<unsigned, 3> Components = {BD, DF, CI};
uint64_t RemainingWork = 0U;
// We use RemainingWork to figure out if we have no remaining components to
// encode. For example: if BD != 0 but DF == 0 && CI == 0, we don't need to
@@ -435,6 +435,84 @@ DISubrange::BoundType DISubrange::getStride() const {
return BoundType();
}
+DIGenericSubrange *DIGenericSubrange::getImpl(LLVMContext &Context,
+ Metadata *CountNode, Metadata *LB,
+ Metadata *UB, Metadata *Stride,
+ StorageType Storage,
+ bool ShouldCreate) {
+ DEFINE_GETIMPL_LOOKUP(DIGenericSubrange, (CountNode, LB, UB, Stride));
+ Metadata *Ops[] = {CountNode, LB, UB, Stride};
+ DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DIGenericSubrange, Ops);
+}
+
+DIGenericSubrange::BoundType DIGenericSubrange::getCount() const {
+ Metadata *CB = getRawCountNode();
+ if (!CB)
+ return BoundType();
+
+ assert((isa<DIVariable>(CB) || isa<DIExpression>(CB)) &&
+ "Count must be signed constant or DIVariable or DIExpression");
+
+ if (auto *MD = dyn_cast<DIVariable>(CB))
+ return BoundType(MD);
+
+ if (auto *MD = dyn_cast<DIExpression>(CB))
+ return BoundType(MD);
+
+ return BoundType();
+}
+
+DIGenericSubrange::BoundType DIGenericSubrange::getLowerBound() const {
+ Metadata *LB = getRawLowerBound();
+ if (!LB)
+ return BoundType();
+
+ assert((isa<DIVariable>(LB) || isa<DIExpression>(LB)) &&
+ "LowerBound must be signed constant or DIVariable or DIExpression");
+
+ if (auto *MD = dyn_cast<DIVariable>(LB))
+ return BoundType(MD);
+
+ if (auto *MD = dyn_cast<DIExpression>(LB))
+ return BoundType(MD);
+
+ return BoundType();
+}
+
+DIGenericSubrange::BoundType DIGenericSubrange::getUpperBound() const {
+ Metadata *UB = getRawUpperBound();
+ if (!UB)
+ return BoundType();
+
+ assert((isa<DIVariable>(UB) || isa<DIExpression>(UB)) &&
+ "UpperBound must be signed constant or DIVariable or DIExpression");
+
+ if (auto *MD = dyn_cast<DIVariable>(UB))
+ return BoundType(MD);
+
+ if (auto *MD = dyn_cast<DIExpression>(UB))
+ return BoundType(MD);
+
+ return BoundType();
+}
+
+DIGenericSubrange::BoundType DIGenericSubrange::getStride() const {
+ Metadata *ST = getRawStride();
+ if (!ST)
+ return BoundType();
+
+ assert((isa<DIVariable>(ST) || isa<DIExpression>(ST)) &&
+ "Stride must be signed constant or DIVariable or DIExpression");
+
+ if (auto *MD = dyn_cast<DIVariable>(ST))
+ return BoundType(MD);
+
+ if (auto *MD = dyn_cast<DIExpression>(ST))
+ return BoundType(MD);
+
+ return BoundType();
+}
+
DIEnumerator *DIEnumerator::getImpl(LLVMContext &Context, const APInt &Value,
bool IsUnsigned, MDString *Name,
StorageType Storage, bool ShouldCreate) {
@@ -470,6 +548,20 @@ Optional<DIBasicType::Signedness> DIBasicType::getSignedness() const {
}
}
+DIStringType *DIStringType::getImpl(LLVMContext &Context, unsigned Tag,
+ MDString *Name, Metadata *StringLength,
+ Metadata *StringLengthExp,
+ uint64_t SizeInBits, uint32_t AlignInBits,
+ unsigned Encoding, StorageType Storage,
+ bool ShouldCreate) {
+ assert(isCanonical(Name) && "Expected canonical MDString");
+ DEFINE_GETIMPL_LOOKUP(DIStringType, (Tag, Name, StringLength, StringLengthExp,
+ SizeInBits, AlignInBits, Encoding));
+ Metadata *Ops[] = {nullptr, nullptr, Name, StringLength, StringLengthExp};
+ DEFINE_GETIMPL_STORE(DIStringType, (Tag, SizeInBits, AlignInBits, Encoding),
+ Ops);
+}
+
DIDerivedType *DIDerivedType::getImpl(
LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
@@ -493,18 +585,20 @@ DICompositeType *DICompositeType::getImpl(
uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, MDString *Identifier, Metadata *Discriminator,
- Metadata *DataLocation, StorageType Storage, bool ShouldCreate) {
+ Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
+ Metadata *Rank, StorageType Storage, bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
// Keep this in sync with buildODRType.
- DEFINE_GETIMPL_LOOKUP(DICompositeType,
- (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
- AlignInBits, OffsetInBits, Flags, Elements,
- RuntimeLang, VTableHolder, TemplateParams, Identifier,
- Discriminator, DataLocation));
+ DEFINE_GETIMPL_LOOKUP(
+ DICompositeType,
+ (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
+ OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
+ Identifier, Discriminator, DataLocation, Associated, Allocated, Rank));
Metadata *Ops[] = {File, Scope, Name, BaseType,
Elements, VTableHolder, TemplateParams, Identifier,
- Discriminator, DataLocation};
+ Discriminator, DataLocation, Associated, Allocated,
+ Rank};
DEFINE_GETIMPL_STORE(DICompositeType, (Tag, Line, RuntimeLang, SizeInBits,
AlignInBits, OffsetInBits, Flags),
Ops);
@@ -516,7 +610,8 @@ DICompositeType *DICompositeType::buildODRType(
uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator,
- Metadata *DataLocation) {
+ Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
+ Metadata *Rank) {
assert(!Identifier.getString().empty() && "Expected valid identifier");
if (!Context.isODRUniquingDebugTypes())
return nullptr;
@@ -526,7 +621,7 @@ DICompositeType *DICompositeType::buildODRType(
Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
VTableHolder, TemplateParams, &Identifier, Discriminator,
- DataLocation);
+ DataLocation, Associated, Allocated, Rank);
// Only mutate CT if it's a forward declaration and the new operands aren't.
assert(CT->getRawIdentifier() == &Identifier && "Wrong ODR identifier?");
@@ -538,7 +633,8 @@ DICompositeType *DICompositeType::buildODRType(
Flags);
Metadata *Ops[] = {File, Scope, Name, BaseType,
Elements, VTableHolder, TemplateParams, &Identifier,
- Discriminator, DataLocation};
+ Discriminator, DataLocation, Associated, Allocated,
+ Rank};
assert((std::end(Ops) - std::begin(Ops)) == (int)CT->getNumOperands() &&
"Mismatched number of operands");
for (unsigned I = 0, E = CT->getNumOperands(); I != E; ++I)
@@ -553,7 +649,8 @@ DICompositeType *DICompositeType::getODRType(
uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator,
- Metadata *DataLocation) {
+ Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
+ Metadata *Rank) {
assert(!Identifier.getString().empty() && "Expected valid identifier");
if (!Context.isODRUniquingDebugTypes())
return nullptr;
@@ -562,7 +659,8 @@ DICompositeType *DICompositeType::getODRType(
CT = DICompositeType::getDistinct(
Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder,
- TemplateParams, &Identifier, Discriminator, DataLocation);
+ TemplateParams, &Identifier, Discriminator, DataLocation, Associated,
+ Allocated, Rank);
return CT;
}
@@ -828,14 +926,14 @@ DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *File,
Metadata *Scope, MDString *Name,
MDString *ConfigurationMacros,
MDString *IncludePath, MDString *APINotesFile,
- unsigned LineNo, StorageType Storage,
+ unsigned LineNo, bool IsDecl, StorageType Storage,
bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIModule, (File, Scope, Name, ConfigurationMacros,
- IncludePath, APINotesFile, LineNo));
+ IncludePath, APINotesFile, LineNo, IsDecl));
Metadata *Ops[] = {File, Scope, Name, ConfigurationMacros,
IncludePath, APINotesFile};
- DEFINE_GETIMPL_STORE(DIModule, (LineNo), Ops);
+ DEFINE_GETIMPL_STORE(DIModule, (LineNo, IsDecl), Ops);
}
DITemplateTypeParameter *
@@ -1016,6 +1114,7 @@ bool DIExpression::isValid() const {
return I->get() == expr_op_begin()->get() && I->getArg(0) == 1 &&
getNumElements() == 2;
}
+ case dwarf::DW_OP_LLVM_implicit_pointer:
case dwarf::DW_OP_LLVM_convert:
case dwarf::DW_OP_LLVM_tag_offset:
case dwarf::DW_OP_constu:
@@ -1040,6 +1139,8 @@ bool DIExpression::isValid() const {
case dwarf::DW_OP_regx:
case dwarf::DW_OP_bregx:
case dwarf::DW_OP_push_object_address:
+ case dwarf::DW_OP_over:
+ case dwarf::DW_OP_consts:
break;
}
}
@@ -1313,6 +1414,15 @@ bool DIExpression::isConstant() const {
return true;
}
+bool DIExpression::isSignedConstant() const {
+ // Recognize DW_OP_consts C
+ if (getNumElements() != 2)
+ return false;
+ if (getElement(0) != dwarf::DW_OP_consts)
+ return false;
+ return true;
+}
+
DIExpression::ExtOps DIExpression::getExtOps(unsigned FromSize, unsigned ToSize,
bool Signed) {
dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned;
diff --git a/contrib/llvm-project/llvm/lib/IR/DebugLoc.cpp b/contrib/llvm-project/llvm/lib/IR/DebugLoc.cpp
index e945cbcba782..993f3a39e6ff 100644
--- a/contrib/llvm-project/llvm/lib/IR/DebugLoc.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DebugLoc.cpp
@@ -50,7 +50,7 @@ DebugLoc DebugLoc::getFnDebugLoc() const {
// FIXME: Add a method on \a DILocation that does this work.
const MDNode *Scope = getInlinedAtScope();
if (auto *SP = getDISubprogram(Scope))
- return DebugLoc::get(SP->getScopeLine(), 0, SP);
+ return DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
return DebugLoc();
}
@@ -68,21 +68,9 @@ void DebugLoc::setImplicitCode(bool ImplicitCode) {
}
}
-DebugLoc DebugLoc::get(unsigned Line, unsigned Col, const MDNode *Scope,
- const MDNode *InlinedAt, bool ImplicitCode) {
- // If no scope is available, this is an unknown location.
- if (!Scope)
- return DebugLoc();
-
- return DILocation::get(Scope->getContext(), Line, Col,
- const_cast<MDNode *>(Scope),
- const_cast<MDNode *>(InlinedAt), ImplicitCode);
-}
-
DebugLoc DebugLoc::appendInlinedAt(const DebugLoc &DL, DILocation *InlinedAt,
LLVMContext &Ctx,
- DenseMap<const MDNode *, MDNode *> &Cache,
- bool ReplaceLast) {
+ DenseMap<const MDNode *, MDNode *> &Cache) {
SmallVector<DILocation *, 3> InlinedAtLocations;
DILocation *Last = InlinedAt;
DILocation *CurInlinedAt = DL;
@@ -95,8 +83,6 @@ DebugLoc DebugLoc::appendInlinedAt(const DebugLoc &DL, DILocation *InlinedAt,
break;
}
- if (ReplaceLast && !IA->getInlinedAt())
- break;
InlinedAtLocations.push_back(IA);
CurInlinedAt = IA;
}
diff --git a/contrib/llvm-project/llvm/lib/IR/DiagnosticInfo.cpp b/contrib/llvm-project/llvm/lib/IR/DiagnosticInfo.cpp
index 6528c723fbfa..8820a79421c3 100644
--- a/contrib/llvm-project/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DiagnosticInfo.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -213,6 +214,20 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
unsigned long long N)
: Key(std::string(Key)), Val(utostr(N)) {}
+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
+ ElementCount EC)
+ : Key(std::string(Key)) {
+ raw_string_ostream OS(Val);
+ EC.print(OS);
+}
+
+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
+ InstructionCost C)
+ : Key(std::string(Key)) {
+ raw_string_ostream OS(Val);
+ C.print(OS);
+}
+
DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, DebugLoc Loc)
: Key(std::string(Key)), Loc(Loc) {
if (Loc) {
@@ -368,16 +383,5 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
return OS.str();
}
-DiagnosticInfoMisExpect::DiagnosticInfoMisExpect(const Instruction *Inst,
- Twine &Msg)
- : DiagnosticInfoWithLocationBase(DK_MisExpect, DS_Warning,
- *Inst->getParent()->getParent(),
- Inst->getDebugLoc()),
- Msg(Msg) {}
-
-void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const {
- DP << getLocationStr() << ": " << getMsg();
-}
-
void OptimizationRemarkAnalysisFPCommute::anchor() {}
void OptimizationRemarkAnalysisAliasing::anchor() {}
diff --git a/contrib/llvm-project/llvm/lib/IR/Dominators.cpp b/contrib/llvm-project/llvm/lib/IR/Dominators.cpp
index bb1cc347dcb1..fbc28c202aec 100644
--- a/contrib/llvm-project/llvm/lib/IR/Dominators.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Dominators.cpp
@@ -90,9 +90,11 @@ template void llvm::DomTreeBuilder::DeleteEdge<DomTreeBuilder::BBPostDomTree>(
DomTreeBuilder::BBPostDomTree &DT, BasicBlock *From, BasicBlock *To);
template void llvm::DomTreeBuilder::ApplyUpdates<DomTreeBuilder::BBDomTree>(
- DomTreeBuilder::BBDomTree &DT, DomTreeBuilder::BBUpdates);
+ DomTreeBuilder::BBDomTree &DT, DomTreeBuilder::BBDomTreeGraphDiff &,
+ DomTreeBuilder::BBDomTreeGraphDiff *);
template void llvm::DomTreeBuilder::ApplyUpdates<DomTreeBuilder::BBPostDomTree>(
- DomTreeBuilder::BBPostDomTree &DT, DomTreeBuilder::BBUpdates);
+ DomTreeBuilder::BBPostDomTree &DT, DomTreeBuilder::BBPostDomTreeGraphDiff &,
+ DomTreeBuilder::BBPostDomTreeGraphDiff *);
template bool llvm::DomTreeBuilder::Verify<DomTreeBuilder::BBDomTree>(
const DomTreeBuilder::BBDomTree &DT,
@@ -113,8 +115,15 @@ bool DominatorTree::invalidate(Function &F, const PreservedAnalyses &PA,
// dominates - Return true if Def dominates a use in User. This performs
// the special checks necessary if Def and User are in the same basic block.
// Note that Def doesn't dominate a use in Def itself!
-bool DominatorTree::dominates(const Instruction *Def,
+bool DominatorTree::dominates(const Value *DefV,
const Instruction *User) const {
+ const Instruction *Def = dyn_cast<Instruction>(DefV);
+ if (!Def) {
+ assert((isa<Argument>(DefV) || isa<Constant>(DefV)) &&
+ "Should be called with an instruction, argument or constant");
+ return true; // Arguments and constants dominate everything.
+ }
+
const BasicBlock *UseBB = User->getParent();
const BasicBlock *DefBB = Def->getParent();
@@ -248,7 +257,14 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const {
return dominates(BBE, UseBB);
}
-bool DominatorTree::dominates(const Instruction *Def, const Use &U) const {
+bool DominatorTree::dominates(const Value *DefV, const Use &U) const {
+ const Instruction *Def = dyn_cast<Instruction>(DefV);
+ if (!Def) {
+ assert((isa<Argument>(DefV) || isa<Constant>(DefV)) &&
+ "Should be called with an instruction, argument or constant");
+ return true; // Arguments and constants dominate everything.
+ }
+
Instruction *UserInst = cast<Instruction>(U.getUser());
const BasicBlock *DefBB = Def->getParent();
diff --git a/contrib/llvm-project/llvm/lib/IR/Function.cpp b/contrib/llvm-project/llvm/lib/IR/Function.cpp
index 10d535e3ab11..17247123f87f 100644
--- a/contrib/llvm-project/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Function.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/IntrinsicsS390.h"
+#include "llvm/IR/IntrinsicsVE.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/IntrinsicsXCore.h"
@@ -86,9 +87,11 @@ void Argument::setParent(Function *parent) {
Parent = parent;
}
-bool Argument::hasNonNullAttr() const {
+bool Argument::hasNonNullAttr(bool AllowUndefOrPoison) const {
if (!getType()->isPointerTy()) return false;
- if (getParent()->hasParamAttribute(getArgNo(), Attribute::NonNull))
+ if (getParent()->hasParamAttribute(getArgNo(), Attribute::NonNull) &&
+ (AllowUndefOrPoison ||
+ getParent()->hasParamAttribute(getArgNo(), Attribute::NoUndef)))
return true;
else if (getDereferenceableBytes() > 0 &&
!NullPointerIsDefined(getParent(),
@@ -102,6 +105,12 @@ bool Argument::hasByValAttr() const {
return hasAttribute(Attribute::ByVal);
}
+bool Argument::hasByRefAttr() const {
+ if (!getType()->isPointerTy())
+ return false;
+ return hasAttribute(Attribute::ByRef);
+}
+
bool Argument::hasSwiftSelfAttr() const {
return getParent()->hasParamAttribute(getArgNo(), Attribute::SwiftSelf);
}
@@ -121,7 +130,7 @@ bool Argument::hasPreallocatedAttr() const {
return hasAttribute(Attribute::Preallocated);
}
-bool Argument::hasPassPointeeByValueAttr() const {
+bool Argument::hasPassPointeeByValueCopyAttr() const {
if (!getType()->isPointerTy()) return false;
AttributeList Attrs = getParent()->getAttributes();
return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
@@ -129,27 +138,54 @@ bool Argument::hasPassPointeeByValueAttr() const {
Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated);
}
-uint64_t Argument::getPassPointeeByValueCopySize(const DataLayout &DL) const {
- AttributeSet ParamAttrs
- = getParent()->getAttributes().getParamAttributes(getArgNo());
+bool Argument::hasPointeeInMemoryValueAttr() const {
+ if (!getType()->isPointerTy())
+ return false;
+ AttributeList Attrs = getParent()->getAttributes();
+ return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
+ Attrs.hasParamAttribute(getArgNo(), Attribute::StructRet) ||
+ Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca) ||
+ Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated) ||
+ Attrs.hasParamAttribute(getArgNo(), Attribute::ByRef);
+}
+/// For a byval, sret, inalloca, or preallocated parameter, get the in-memory
+/// parameter type.
+static Type *getMemoryParamAllocType(AttributeSet ParamAttrs, Type *ArgTy) {
// FIXME: All the type carrying attributes are mutually exclusive, so there
// should be a single query to get the stored type that handles any of them.
if (Type *ByValTy = ParamAttrs.getByValType())
- return DL.getTypeAllocSize(ByValTy);
+ return ByValTy;
+ if (Type *ByRefTy = ParamAttrs.getByRefType())
+ return ByRefTy;
if (Type *PreAllocTy = ParamAttrs.getPreallocatedType())
- return DL.getTypeAllocSize(PreAllocTy);
+ return PreAllocTy;
- // FIXME: inalloca always depends on pointee element type. It's also possible
- // for byval to miss it.
+ // FIXME: sret and inalloca always depends on pointee element type. It's also
+ // possible for byval to miss it.
if (ParamAttrs.hasAttribute(Attribute::InAlloca) ||
ParamAttrs.hasAttribute(Attribute::ByVal) ||
+ ParamAttrs.hasAttribute(Attribute::StructRet) ||
ParamAttrs.hasAttribute(Attribute::Preallocated))
- return DL.getTypeAllocSize(cast<PointerType>(getType())->getElementType());
+ return cast<PointerType>(ArgTy)->getElementType();
+ return nullptr;
+}
+
+uint64_t Argument::getPassPointeeByValueCopySize(const DataLayout &DL) const {
+ AttributeSet ParamAttrs =
+ getParent()->getAttributes().getParamAttributes(getArgNo());
+ if (Type *MemTy = getMemoryParamAllocType(ParamAttrs, getType()))
+ return DL.getTypeAllocSize(MemTy);
return 0;
}
+Type *Argument::getPointeeInMemoryValueType() const {
+ AttributeSet ParamAttrs =
+ getParent()->getAttributes().getParamAttributes(getArgNo());
+ return getMemoryParamAllocType(ParamAttrs, getType());
+}
+
unsigned Argument::getParamAlignment() const {
assert(getType()->isPointerTy() && "Only pointers have alignments");
return getParent()->getParamAlignment(getArgNo());
@@ -165,6 +201,16 @@ Type *Argument::getParamByValType() const {
return getParent()->getParamByValType(getArgNo());
}
+Type *Argument::getParamStructRetType() const {
+ assert(getType()->isPointerTy() && "Only pointers have sret types");
+ return getParent()->getParamStructRetType(getArgNo());
+}
+
+Type *Argument::getParamByRefType() const {
+ assert(getType()->isPointerTy() && "Only pointers have byval types");
+ return getParent()->getParamByRefType(getArgNo());
+}
+
uint64_t Argument::getDereferenceableBytes() const {
assert(getType()->isPointerTy() &&
"Only pointers have dereferenceable bytes");
@@ -534,6 +580,21 @@ void Function::addDereferenceableOrNullParamAttr(unsigned ArgNo,
setAttributes(PAL);
}
+DenormalMode Function::getDenormalMode(const fltSemantics &FPType) const {
+ if (&FPType == &APFloat::IEEEsingle()) {
+ Attribute Attr = getFnAttribute("denormal-fp-math-f32");
+ StringRef Val = Attr.getValueAsString();
+ if (!Val.empty())
+ return parseDenormalFPAttribute(Val);
+
+ // If the f32 variant of the attribute isn't specified, try to use the
+ // generic one.
+ }
+
+ Attribute Attr = getFnAttribute("denormal-fp-math");
+ return parseDenormalFPAttribute(Attr.getValueAsString());
+}
+
const std::string &Function::getGC() const {
assert(hasGC() && "Function has no collector");
return getContext().getGC(*this);
@@ -551,6 +612,12 @@ void Function::clearGC() {
setValueSubclassDataBit(14, false);
}
+bool Function::hasStackProtectorFnAttr() const {
+ return hasFnAttribute(Attribute::StackProtect) ||
+ hasFnAttribute(Attribute::StackProtectStrong) ||
+ hasFnAttribute(Attribute::StackProtectReq);
+}
+
/// Copy all additional attributes (those not needed to create a Function) from
/// the Function Src to this one.
void Function::copyAttributesFrom(const Function *Src) {
@@ -582,6 +649,14 @@ static const char * const IntrinsicNameTable[] = {
#include "llvm/IR/IntrinsicImpl.inc"
#undef GET_INTRINSIC_TARGET_DATA
+bool Function::isTargetIntrinsic(Intrinsic::ID IID) {
+ return IID > TargetInfos[0].Count;
+}
+
+bool Function::isTargetIntrinsic() const {
+ return isTargetIntrinsic(IntID);
+}
+
/// Find the segment of \c IntrinsicNameTable for intrinsics with the same
/// target as \c Name, or the generic table if \c Name is not target specific.
///
@@ -674,9 +749,10 @@ static std::string getMangledTypeStr(Type* Ty) {
Result += "f";
} else if (VectorType* VTy = dyn_cast<VectorType>(Ty)) {
ElementCount EC = VTy->getElementCount();
- if (EC.Scalable)
+ if (EC.isScalable())
Result += "nx";
- Result += "v" + utostr(EC.Min) + getMangledTypeStr(VTy->getElementType());
+ Result += "v" + utostr(EC.getKnownMinValue()) +
+ getMangledTypeStr(VTy->getElementType());
} else if (Ty) {
switch (Ty->getTypeID()) {
default: llvm_unreachable("Unhandled type");
@@ -690,6 +766,7 @@ static std::string getMangledTypeStr(Type* Ty) {
case Type::FP128TyID: Result += "f128"; break;
case Type::PPC_FP128TyID: Result += "ppcf128"; break;
case Type::X86_MMXTyID: Result += "x86mmx"; break;
+ case Type::X86_AMXTyID: Result += "x86amx"; break;
case Type::IntegerTyID:
Result += "i" + utostr(cast<IntegerType>(Ty)->getBitWidth());
break;
@@ -707,6 +784,8 @@ StringRef Intrinsic::getName(ID id) {
std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
assert(id < num_intrinsics && "Invalid intrinsic ID!");
+ assert((Tys.empty() || Intrinsic::isOverloaded(id)) &&
+ "This version of getName is for overloaded intrinsics only");
std::string Result(IntrinsicNameTable[id]);
for (Type *Ty : Tys) {
Result += "." + getMangledTypeStr(Ty);
@@ -770,7 +849,10 @@ enum IIT_Info {
IIT_SUBDIVIDE4_ARG = 45,
IIT_VEC_OF_BITCASTS_TO_INT = 46,
IIT_V128 = 47,
- IIT_BF16 = 48
+ IIT_BF16 = 48,
+ IIT_STRUCT9 = 49,
+ IIT_V256 = 50,
+ IIT_AMX = 51
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -793,6 +875,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_MMX:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0));
return;
+ case IIT_AMX:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::AMX, 0));
+ return;
case IIT_TOKEN:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Token, 0));
return;
@@ -864,6 +949,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
OutputTable.push_back(IITDescriptor::getVector(128, IsScalableVector));
DecodeIITType(NextElt, Infos, Info, OutputTable);
return;
+ case IIT_V256:
+ OutputTable.push_back(IITDescriptor::getVector(256, IsScalableVector));
+ DecodeIITType(NextElt, Infos, Info, OutputTable);
+ return;
case IIT_V512:
OutputTable.push_back(IITDescriptor::getVector(512, IsScalableVector));
DecodeIITType(NextElt, Infos, Info, OutputTable);
@@ -932,6 +1021,7 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_EMPTYSTRUCT:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
return;
+ case IIT_STRUCT9: ++StructElts; LLVM_FALLTHROUGH;
case IIT_STRUCT8: ++StructElts; LLVM_FALLTHROUGH;
case IIT_STRUCT7: ++StructElts; LLVM_FALLTHROUGH;
case IIT_STRUCT6: ++StructElts; LLVM_FALLTHROUGH;
@@ -1025,6 +1115,7 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::Void: return Type::getVoidTy(Context);
case IITDescriptor::VarArg: return Type::getVoidTy(Context);
case IITDescriptor::MMX: return Type::getX86_MMXTy(Context);
+ case IITDescriptor::AMX: return Type::getX86_AMXTy(Context);
case IITDescriptor::Token: return Type::getTokenTy(Context);
case IITDescriptor::Metadata: return Type::getMetadataTy(Context);
case IITDescriptor::Half: return Type::getHalfTy(Context);
@@ -1162,7 +1253,7 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
// There can never be multiple globals with the same name of different types,
// because intrinsics must be a specific type.
return cast<Function>(
- M->getOrInsertFunction(getName(id, Tys),
+ M->getOrInsertFunction(Tys.empty() ? getName(id) : getName(id, Tys),
getType(M->getContext(), id, Tys))
.getCallee());
}
@@ -1204,6 +1295,7 @@ static bool matchIntrinsicType(
case IITDescriptor::Void: return !Ty->isVoidTy();
case IITDescriptor::VarArg: return true;
case IITDescriptor::MMX: return !Ty->isX86_MMXTy();
+ case IITDescriptor::AMX: return !Ty->isX86_AMXTy();
case IITDescriptor::Token: return !Ty->isTokenTy();
case IITDescriptor::Metadata: return !Ty->isMetadataTy();
case IITDescriptor::Half: return !Ty->isHalfTy();
@@ -1356,10 +1448,10 @@ static bool matchIntrinsicType(
// Verify the overloaded type "matches" the Ref type.
// i.e. Ty is a vector with the same width as Ref.
// Composed of pointers to the same element type as Ref.
- VectorType *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]);
- VectorType *ThisArgVecTy = dyn_cast<VectorType>(Ty);
+ auto *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]);
+ auto *ThisArgVecTy = dyn_cast<VectorType>(Ty);
if (!ThisArgVecTy || !ReferenceType ||
- (ReferenceType->getNumElements() != ThisArgVecTy->getNumElements()))
+ (ReferenceType->getElementCount() != ThisArgVecTy->getElementCount()))
return true;
PointerType *ThisArgEltTy =
dyn_cast<PointerType>(ThisArgVecTy->getElementType());
diff --git a/contrib/llvm-project/llvm/lib/IR/Globals.cpp b/contrib/llvm-project/llvm/lib/IR/Globals.cpp
index ed946ef3fd12..c2cbe7ddddf8 100644
--- a/contrib/llvm-project/llvm/lib/IR/Globals.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Globals.cpp
@@ -352,11 +352,15 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
LinkageTypes Link, Constant *InitVal,
const Twine &Name, GlobalVariable *Before,
- ThreadLocalMode TLMode, unsigned AddressSpace,
+ ThreadLocalMode TLMode,
+ Optional<unsigned> AddressSpace,
bool isExternallyInitialized)
: GlobalObject(Ty, Value::GlobalVariableVal,
OperandTraits<GlobalVariable>::op_begin(this),
- InitVal != nullptr, Link, Name, AddressSpace),
+ InitVal != nullptr, Link, Name,
+ AddressSpace
+ ? *AddressSpace
+ : M.getDataLayout().getDefaultGlobalsAddressSpace()),
isConstantGlobal(constant),
isExternallyInitializedConstant(isExternallyInitialized) {
assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) &&
diff --git a/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp
index a82f15895782..91ca984b755c 100644
--- a/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp
@@ -72,13 +72,25 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
static CallInst *createCallHelper(Function *Callee, ArrayRef<Value *> Ops,
IRBuilderBase *Builder,
const Twine &Name = "",
- Instruction *FMFSource = nullptr) {
- CallInst *CI = Builder->CreateCall(Callee, Ops, Name);
+ Instruction *FMFSource = nullptr,
+ ArrayRef<OperandBundleDef> OpBundles = {}) {
+ CallInst *CI = Builder->CreateCall(Callee, Ops, OpBundles, Name);
if (FMFSource)
CI->copyFastMathFlags(FMFSource);
return CI;
}
+Value *IRBuilderBase::CreateVScale(Constant *Scaling, const Twine &Name) {
+ Module *M = GetInsertBlock()->getParent()->getParent();
+ assert(isa<ConstantInt>(Scaling) && "Expected constant integer");
+ Function *TheFn =
+ Intrinsic::getDeclaration(M, Intrinsic::vscale, {Scaling->getType()});
+ CallInst *CI = createCallHelper(TheFn, {}, this, Name);
+ return cast<ConstantInt>(Scaling)->getSExtValue() == 1
+ ? CI
+ : CreateMul(CI, Scaling);
+}
+
CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size,
MaybeAlign Align, bool isVolatile,
MDNode *TBAATag, MDNode *ScopeTag,
@@ -135,22 +147,21 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
return CI;
}
-CallInst *IRBuilderBase::CreateMemCpy(Value *Dst, MaybeAlign DstAlign,
- Value *Src, MaybeAlign SrcAlign,
- Value *Size, bool isVolatile,
- MDNode *TBAATag, MDNode *TBAAStructTag,
- MDNode *ScopeTag, MDNode *NoAliasTag) {
+CallInst *IRBuilderBase::CreateMemTransferInst(
+ Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src,
+ MaybeAlign SrcAlign, Value *Size, bool isVolatile, MDNode *TBAATag,
+ MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) {
Dst = getCastedInt8PtrValue(Dst);
Src = getCastedInt8PtrValue(Src);
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
Module *M = BB->getParent()->getParent();
- Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+ Function *TheFn = Intrinsic::getDeclaration(M, IntrID, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
- auto* MCI = cast<MemCpyInst>(CI);
+ auto* MCI = cast<MemTransferInst>(CI);
if (DstAlign)
MCI->setDestAlignment(*DstAlign);
if (SrcAlign)
@@ -324,78 +335,57 @@ static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID,
CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) {
Module *M = GetInsertBlock()->getParent()->getParent();
Value *Ops[] = {Acc, Src};
- Type *Tys[] = {Acc->getType(), Src->getType()};
- auto Decl = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_vector_reduce_v2_fadd, Tys);
+ auto Decl = Intrinsic::getDeclaration(M, Intrinsic::vector_reduce_fadd,
+ {Src->getType()});
return createCallHelper(Decl, Ops, this);
}
CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) {
Module *M = GetInsertBlock()->getParent()->getParent();
Value *Ops[] = {Acc, Src};
- Type *Tys[] = {Acc->getType(), Src->getType()};
- auto Decl = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_vector_reduce_v2_fmul, Tys);
+ auto Decl = Intrinsic::getDeclaration(M, Intrinsic::vector_reduce_fmul,
+ {Src->getType()});
return createCallHelper(Decl, Ops, this);
}
CallInst *IRBuilderBase::CreateAddReduce(Value *Src) {
- return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_add,
- Src);
+ return getReductionIntrinsic(this, Intrinsic::vector_reduce_add, Src);
}
CallInst *IRBuilderBase::CreateMulReduce(Value *Src) {
- return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_mul,
- Src);
+ return getReductionIntrinsic(this, Intrinsic::vector_reduce_mul, Src);
}
CallInst *IRBuilderBase::CreateAndReduce(Value *Src) {
- return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_and,
- Src);
+ return getReductionIntrinsic(this, Intrinsic::vector_reduce_and, Src);
}
CallInst *IRBuilderBase::CreateOrReduce(Value *Src) {
- return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_or,
- Src);
+ return getReductionIntrinsic(this, Intrinsic::vector_reduce_or, Src);
}
CallInst *IRBuilderBase::CreateXorReduce(Value *Src) {
- return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_xor,
- Src);
+ return getReductionIntrinsic(this, Intrinsic::vector_reduce_xor, Src);
}
CallInst *IRBuilderBase::CreateIntMaxReduce(Value *Src, bool IsSigned) {
- auto ID = IsSigned ? Intrinsic::experimental_vector_reduce_smax
- : Intrinsic::experimental_vector_reduce_umax;
+ auto ID =
+ IsSigned ? Intrinsic::vector_reduce_smax : Intrinsic::vector_reduce_umax;
return getReductionIntrinsic(this, ID, Src);
}
CallInst *IRBuilderBase::CreateIntMinReduce(Value *Src, bool IsSigned) {
- auto ID = IsSigned ? Intrinsic::experimental_vector_reduce_smin
- : Intrinsic::experimental_vector_reduce_umin;
+ auto ID =
+ IsSigned ? Intrinsic::vector_reduce_smin : Intrinsic::vector_reduce_umin;
return getReductionIntrinsic(this, ID, Src);
}
-CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src, bool NoNaN) {
- auto Rdx = getReductionIntrinsic(
- this, Intrinsic::experimental_vector_reduce_fmax, Src);
- if (NoNaN) {
- FastMathFlags FMF;
- FMF.setNoNaNs();
- Rdx->setFastMathFlags(FMF);
- }
- return Rdx;
+CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src) {
+ return getReductionIntrinsic(this, Intrinsic::vector_reduce_fmax, Src);
}
-CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src, bool NoNaN) {
- auto Rdx = getReductionIntrinsic(
- this, Intrinsic::experimental_vector_reduce_fmin, Src);
- if (NoNaN) {
- FastMathFlags FMF;
- FMF.setNoNaNs();
- Rdx->setFastMathFlags(FMF);
- }
- return Rdx;
+CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src) {
+ return getReductionIntrinsic(this, Intrinsic::vector_reduce_fmin, Src);
}
CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
@@ -450,14 +440,23 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
return createCallHelper(TheFn, Ops, this);
}
-CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
+CallInst *
+IRBuilderBase::CreateAssumption(Value *Cond,
+ ArrayRef<OperandBundleDef> OpBundles) {
assert(Cond->getType() == getInt1Ty() &&
"an assumption condition must be of type i1");
Value *Ops[] = { Cond };
Module *M = BB->getParent()->getParent();
Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
- return createCallHelper(FnAssume, Ops, this);
+ return createCallHelper(FnAssume, Ops, this, "", nullptr, OpBundles);
+}
+
+Instruction *IRBuilderBase::CreateNoAliasScopeDeclaration(Value *Scope) {
+ Module *M = BB->getModule();
+ auto *FnIntrinsic = Intrinsic::getDeclaration(
+ M, Intrinsic::experimental_noalias_scope_decl, {});
+ return createCallHelper(FnIntrinsic, {Scope}, this);
}
/// Create a call to a Masked Load intrinsic.
@@ -523,8 +522,8 @@ CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, Align Alignment,
Value *Mask, Value *PassThru,
const Twine &Name) {
- auto PtrsTy = cast<VectorType>(Ptrs->getType());
- auto PtrTy = cast<PointerType>(PtrsTy->getElementType());
+ auto *PtrsTy = cast<FixedVectorType>(Ptrs->getType());
+ auto *PtrTy = cast<PointerType>(PtrsTy->getElementType());
unsigned NumElts = PtrsTy->getNumElements();
auto *DataTy = FixedVectorType::get(PtrTy->getElementType(), NumElts);
@@ -553,8 +552,8 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, Align Alignment,
/// be accessed in memory
CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs,
Align Alignment, Value *Mask) {
- auto PtrsTy = cast<VectorType>(Ptrs->getType());
- auto DataTy = cast<VectorType>(Data->getType());
+ auto *PtrsTy = cast<FixedVectorType>(Ptrs->getType());
+ auto *DataTy = cast<FixedVectorType>(Data->getType());
unsigned NumElts = PtrsTy->getNumElements();
#ifndef NDEBUG
@@ -586,7 +585,7 @@ getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes,
Args.push_back(ActualCallee);
Args.push_back(B.getInt32(CallArgs.size()));
Args.push_back(B.getInt32(Flags));
- Args.insert(Args.end(), CallArgs.begin(), CallArgs.end());
+ llvm::append_range(Args, CallArgs);
// GC Transition and Deopt args are now always handled via operand bundle.
// They will be removed from the signature of gc.statepoint shortly.
Args.push_back(B.getInt32(0));
@@ -603,18 +602,17 @@ getStatepointBundles(Optional<ArrayRef<T1>> TransitionArgs,
std::vector<OperandBundleDef> Rval;
if (DeoptArgs) {
SmallVector<Value*, 16> DeoptValues;
- DeoptValues.insert(DeoptValues.end(), DeoptArgs->begin(), DeoptArgs->end());
+ llvm::append_range(DeoptValues, *DeoptArgs);
Rval.emplace_back("deopt", DeoptValues);
}
if (TransitionArgs) {
SmallVector<Value*, 16> TransitionValues;
- TransitionValues.insert(TransitionValues.end(),
- TransitionArgs->begin(), TransitionArgs->end());
+ llvm::append_range(TransitionValues, *TransitionArgs);
Rval.emplace_back("gc-transition", TransitionValues);
}
if (GCArgs.size()) {
SmallVector<Value*, 16> LiveValues;
- LiveValues.insert(LiveValues.end(), GCArgs.begin(), GCArgs.end());
+ llvm::append_range(LiveValues, GCArgs);
Rval.emplace_back("gc-live", LiveValues);
}
return Rval;
@@ -660,10 +658,10 @@ CallInst *IRBuilderBase::CreateGCStatepointCall(
CallInst *IRBuilderBase::CreateGCStatepointCall(
uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, uint32_t Flags,
- ArrayRef<Use> CallArgs, Optional<ArrayRef<Use>> TransitionArgs,
+ ArrayRef<Value *> CallArgs, Optional<ArrayRef<Use>> TransitionArgs,
Optional<ArrayRef<Use>> DeoptArgs, ArrayRef<Value *> GCArgs,
const Twine &Name) {
- return CreateGCStatepointCallCommon<Use, Use, Use, Value *>(
+ return CreateGCStatepointCallCommon<Value *, Use, Use, Value *>(
this, ID, NumPatchBytes, ActualCallee, Flags, CallArgs, TransitionArgs,
DeoptArgs, GCArgs, Name);
}
@@ -718,9 +716,9 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags,
- ArrayRef<Use> InvokeArgs, Optional<ArrayRef<Use>> TransitionArgs,
+ ArrayRef<Value *> InvokeArgs, Optional<ArrayRef<Use>> TransitionArgs,
Optional<ArrayRef<Use>> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
- return CreateGCStatepointInvokeCommon<Use, Use, Use, Value *>(
+ return CreateGCStatepointInvokeCommon<Value *, Use, Use, Value *>(
this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, Flags,
InvokeArgs, TransitionArgs, DeoptArgs, GCArgs, Name);
}
@@ -997,18 +995,24 @@ Value *IRBuilderBase::CreateStripInvariantGroup(Value *Ptr) {
Value *IRBuilderBase::CreateVectorSplat(unsigned NumElts, Value *V,
const Twine &Name) {
- assert(NumElts > 0 && "Cannot splat to an empty vector!");
+ auto EC = ElementCount::getFixed(NumElts);
+ return CreateVectorSplat(EC, V, Name);
+}
+
+Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V,
+ const Twine &Name) {
+ assert(EC.isNonZero() && "Cannot splat to an empty vector!");
- // First insert it into an undef vector so we can shuffle it.
+ // First insert it into a poison vector so we can shuffle it.
Type *I32Ty = getInt32Ty();
- Value *Undef = UndefValue::get(FixedVectorType::get(V->getType(), NumElts));
- V = CreateInsertElement(Undef, V, ConstantInt::get(I32Ty, 0),
+ Value *Poison = PoisonValue::get(VectorType::get(V->getType(), EC));
+ V = CreateInsertElement(Poison, V, ConstantInt::get(I32Ty, 0),
Name + ".splatinsert");
// Shuffle the value across the desired number of elements.
- Value *Zeros =
- ConstantAggregateZero::get(FixedVectorType::get(I32Ty, NumElts));
- return CreateShuffleVector(V, Undef, Zeros, Name + ".splat");
+ SmallVector<int, 16> Zeros;
+ Zeros.resize(EC.getKnownMinValue());
+ return CreateShuffleVector(V, Zeros, Name + ".splat");
}
Value *IRBuilderBase::CreateExtractInteger(
@@ -1043,9 +1047,7 @@ Value *IRBuilderBase::CreatePreserveArrayAccessIndex(
Value *LastIndexV = getInt32(LastIndex);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
- SmallVector<Value *, 4> IdxList;
- for (unsigned I = 0; I < Dimension; ++I)
- IdxList.push_back(Zero);
+ SmallVector<Value *, 4> IdxList(Dimension, Zero);
IdxList.push_back(LastIndexV);
Type *ResultType =
@@ -1108,63 +1110,37 @@ Value *IRBuilderBase::CreatePreserveStructAccessIndex(
return Fn;
}
-CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(
- const DataLayout &DL, Value *PtrValue, Value *Mask, Type *IntPtrTy,
- Value *OffsetValue, Value **TheCheck) {
- Value *PtrIntValue = CreatePtrToInt(PtrValue, IntPtrTy, "ptrint");
-
- if (OffsetValue) {
- bool IsOffsetZero = false;
- if (const auto *CI = dyn_cast<ConstantInt>(OffsetValue))
- IsOffsetZero = CI->isZero();
-
- if (!IsOffsetZero) {
- if (OffsetValue->getType() != IntPtrTy)
- OffsetValue = CreateIntCast(OffsetValue, IntPtrTy, /*isSigned*/ true,
- "offsetcast");
- PtrIntValue = CreateSub(PtrIntValue, OffsetValue, "offsetptr");
- }
- }
-
- Value *Zero = ConstantInt::get(IntPtrTy, 0);
- Value *MaskedPtr = CreateAnd(PtrIntValue, Mask, "maskedptr");
- Value *InvCond = CreateICmpEQ(MaskedPtr, Zero, "maskcond");
- if (TheCheck)
- *TheCheck = InvCond;
-
- return CreateAssumption(InvCond);
+CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(const DataLayout &DL,
+ Value *PtrValue,
+ Value *AlignValue,
+ Value *OffsetValue) {
+ SmallVector<Value *, 4> Vals({PtrValue, AlignValue});
+ if (OffsetValue)
+ Vals.push_back(OffsetValue);
+ OperandBundleDefT<Value *> AlignOpB("align", Vals);
+ return CreateAssumption(ConstantInt::getTrue(getContext()), {AlignOpB});
}
-CallInst *IRBuilderBase::CreateAlignmentAssumption(
- const DataLayout &DL, Value *PtrValue, unsigned Alignment,
- Value *OffsetValue, Value **TheCheck) {
+CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL,
+ Value *PtrValue,
+ unsigned Alignment,
+ Value *OffsetValue) {
assert(isa<PointerType>(PtrValue->getType()) &&
"trying to create an alignment assumption on a non-pointer?");
assert(Alignment != 0 && "Invalid Alignment");
auto *PtrTy = cast<PointerType>(PtrValue->getType());
Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
-
- Value *Mask = ConstantInt::get(IntPtrTy, Alignment - 1);
- return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
- OffsetValue, TheCheck);
+ Value *AlignValue = ConstantInt::get(IntPtrTy, Alignment);
+ return CreateAlignmentAssumptionHelper(DL, PtrValue, AlignValue, OffsetValue);
}
-CallInst *IRBuilderBase::CreateAlignmentAssumption(
- const DataLayout &DL, Value *PtrValue, Value *Alignment,
- Value *OffsetValue, Value **TheCheck) {
+CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL,
+ Value *PtrValue,
+ Value *Alignment,
+ Value *OffsetValue) {
assert(isa<PointerType>(PtrValue->getType()) &&
"trying to create an alignment assumption on a non-pointer?");
- auto *PtrTy = cast<PointerType>(PtrValue->getType());
- Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
-
- if (Alignment->getType() != IntPtrTy)
- Alignment = CreateIntCast(Alignment, IntPtrTy, /*isSigned*/ false,
- "alignmentcast");
-
- Value *Mask = CreateSub(Alignment, ConstantInt::get(IntPtrTy, 1), "mask");
-
- return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
- OffsetValue, TheCheck);
+ return CreateAlignmentAssumptionHelper(DL, PtrValue, Alignment, OffsetValue);
}
IRBuilderDefaultInserter::~IRBuilderDefaultInserter() {}
diff --git a/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp
index 03657ff8d9d4..8d6fe1eb6134 100644
--- a/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp
@@ -11,13 +11,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
PrintModulePass::PrintModulePass() : OS(dbgs()) {}
diff --git a/contrib/llvm-project/llvm/lib/IR/Instruction.cpp b/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
index bfbd801cb7a7..8e52dd3ddc71 100644
--- a/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
@@ -55,9 +55,6 @@ Instruction::~Instruction() {
// instructions in a BasicBlock are deleted).
if (isUsedByMetadata())
ValueAsMetadata::handleRAUW(this, UndefValue::get(getType()));
-
- if (hasMetadataHashEntry())
- clearMetadataHashEntries();
}
@@ -488,6 +485,7 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
if (!std::equal(op_begin(), op_end(), I->op_begin()))
return false;
+ // WARNING: this logic must be kept in sync with EliminateDuplicatePHINodes()!
if (const PHINode *thisPHI = dyn_cast<PHINode>(this)) {
const PHINode *otherPHI = cast<PHINode>(I);
return std::equal(thisPHI->block_begin(), thisPHI->block_end(),
@@ -635,6 +633,16 @@ bool Instruction::isSafeToRemove() const {
!this->isTerminator();
}
+bool Instruction::willReturn() const {
+ if (const auto *CB = dyn_cast<CallBase>(this))
+ // FIXME: Temporarily assume that all side-effect free intrinsics will
+ // return. Remove this workaround once all intrinsics are appropriately
+ // annotated.
+ return CB->hasFnAttr(Attribute::WillReturn) ||
+ (isa<IntrinsicInst>(CB) && CB->onlyReadsMemory());
+ return true;
+}
+
bool Instruction::isLifetimeStartOrEnd() const {
auto II = dyn_cast<IntrinsicInst>(this);
if (!II)
@@ -643,16 +651,22 @@ bool Instruction::isLifetimeStartOrEnd() const {
return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end;
}
-const Instruction *Instruction::getNextNonDebugInstruction() const {
+bool Instruction::isDebugOrPseudoInst() const {
+ return isa<DbgInfoIntrinsic>(this) || isa<PseudoProbeInst>(this);
+}
+
+const Instruction *
+Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const {
for (const Instruction *I = getNextNode(); I; I = I->getNextNode())
- if (!isa<DbgInfoIntrinsic>(I))
+ if (!isa<DbgInfoIntrinsic>(I) && !(SkipPseudoOp && isa<PseudoProbeInst>(I)))
return I;
return nullptr;
}
-const Instruction *Instruction::getPrevNonDebugInstruction() const {
+const Instruction *
+Instruction::getPrevNonDebugInstruction(bool SkipPseudoOp) const {
for (const Instruction *I = getPrevNode(); I; I = I->getPrevNode())
- if (!isa<DbgInfoIntrinsic>(I))
+ if (!isa<DbgInfoIntrinsic>(I) && !(SkipPseudoOp && isa<PseudoProbeInst>(I)))
return I;
return nullptr;
}
@@ -672,6 +686,13 @@ bool Instruction::isAssociative() const {
}
}
+bool Instruction::isCommutative() const {
+ if (auto *II = dyn_cast<IntrinsicInst>(this))
+ return II->isCommutative();
+ // TODO: Should allow icmp/fcmp?
+ return isCommutative(getOpcode());
+}
+
unsigned Instruction::getNumSuccessors() const {
switch (getOpcode()) {
#define HANDLE_TERM_INST(N, OPC, CLASS) \
diff --git a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
index 2f17a0d73af4..d6b4a4f5030f 100644
--- a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
@@ -49,13 +49,14 @@ using namespace llvm;
// AllocaInst Class
//===----------------------------------------------------------------------===//
-Optional<uint64_t>
+Optional<TypeSize>
AllocaInst::getAllocationSizeInBits(const DataLayout &DL) const {
- uint64_t Size = DL.getTypeAllocSizeInBits(getAllocatedType());
+ TypeSize Size = DL.getTypeAllocSizeInBits(getAllocatedType());
if (isArrayAllocation()) {
auto *C = dyn_cast<ConstantInt>(getArraySize());
if (!C)
return None;
+ assert(!Size.isScalable() && "Array elements cannot have a scalable size");
Size *= C->getZExtValue();
}
return Size;
@@ -321,16 +322,6 @@ Value *CallBase::getReturnedArgOperand() const {
return nullptr;
}
-bool CallBase::hasRetAttr(Attribute::AttrKind Kind) const {
- if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
- return true;
-
- // Look at the callee, if available.
- if (const Function *F = getCalledFunction())
- return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
- return false;
-}
-
/// Determine whether the argument or parameter has the given attribute.
bool CallBase::paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
assert(ArgNo < getNumArgOperands() && "Param index out of bounds!");
@@ -409,7 +400,7 @@ CallBase::BundleOpInfo &CallBase::getBundleOpInfoForOperand(unsigned OpIdx) {
bundle_op_iterator Begin = bundle_op_info_begin();
bundle_op_iterator End = bundle_op_info_end();
- bundle_op_iterator Current;
+ bundle_op_iterator Current = Begin;
while (Begin != End) {
unsigned ScaledOperandPerBundle =
@@ -515,6 +506,18 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
return NewCI;
}
+CallInst *CallInst::CreateWithReplacedBundle(CallInst *CI, OperandBundleDef OpB,
+ Instruction *InsertPt) {
+ SmallVector<OperandBundleDef, 2> OpDefs;
+ for (unsigned i = 0, e = CI->getNumOperandBundles(); i < e; ++i) {
+ auto ChildOB = CI->getOperandBundleAt(i);
+ if (ChildOB.getTagName() != OpB.getTag())
+ OpDefs.emplace_back(ChildOB);
+ }
+ OpDefs.emplace_back(OpB);
+ return CallInst::Create(CI, OpDefs, InsertPt);
+}
+
// Update profile weight for call instruction by scaling it using the ratio
// of S/T. The meaning of "branch_weights" meta data for call instruction is
// transfered to represent call count.
@@ -548,8 +551,9 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
->getValue()
.getZExtValue());
Val *= APS;
- Vals.push_back(MDB.createConstant(ConstantInt::get(
- Type::getInt64Ty(getContext()), Val.udiv(APT).getLimitedValue())));
+ Vals.push_back(MDB.createConstant(
+ ConstantInt::get(Type::getInt32Ty(getContext()),
+ Val.udiv(APT).getLimitedValue(UINT32_MAX))));
} else if (ProfDataName->getString().equals("VP"))
for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
// The first value is the key of the value profile, which will not change.
@@ -826,6 +830,18 @@ InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef<OperandBundleDef> OpB,
return NewII;
}
+InvokeInst *InvokeInst::CreateWithReplacedBundle(InvokeInst *II,
+ OperandBundleDef OpB,
+ Instruction *InsertPt) {
+ SmallVector<OperandBundleDef, 2> OpDefs;
+ for (unsigned i = 0, e = II->getNumOperandBundles(); i < e; ++i) {
+ auto ChildOB = II->getOperandBundleAt(i);
+ if (ChildOB.getTagName() != OpB.getTag())
+ OpDefs.emplace_back(ChildOB);
+ }
+ OpDefs.emplace_back(OpB);
+ return InvokeInst::Create(II, OpDefs, InsertPt);
+}
LandingPadInst *InvokeInst::getLandingPadInst() const {
return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
@@ -1919,7 +1935,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, ArrayRef<int> Mask,
}
void ShuffleVectorInst::commute() {
- int NumOpElts = cast<VectorType>(Op<0>()->getType())->getNumElements();
+ int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
int NumMaskElts = ShuffleMask.size();
SmallVector<int, 16> NewMask(NumMaskElts);
for (int i = 0; i != NumMaskElts; ++i) {
@@ -1943,7 +1959,8 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
return false;
// Make sure the mask elements make sense.
- int V1Size = cast<VectorType>(V1->getType())->getElementCount().Min;
+ int V1Size =
+ cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
for (int Elem : Mask)
if (Elem != UndefMaskElem && Elem >= V1Size * 2)
return false;
@@ -1973,7 +1990,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
return true;
if (const auto *MV = dyn_cast<ConstantVector>(Mask)) {
- unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
+ unsigned V1Size = cast<FixedVectorType>(V1->getType())->getNumElements();
for (Value *Op : MV->operands()) {
if (auto *CI = dyn_cast<ConstantInt>(Op)) {
if (CI->uge(V1Size*2))
@@ -1986,8 +2003,9 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
}
if (const auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
- unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
- for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i)
+ unsigned V1Size = cast<FixedVectorType>(V1->getType())->getNumElements();
+ for (unsigned i = 0, e = cast<FixedVectorType>(MaskTy)->getNumElements();
+ i != e; ++i)
if (CDS->getElementAsInteger(i) >= V1Size*2)
return false;
return true;
@@ -1998,12 +2016,26 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
SmallVectorImpl<int> &Result) {
- unsigned NumElts = cast<VectorType>(Mask->getType())->getElementCount().Min;
+ ElementCount EC = cast<VectorType>(Mask->getType())->getElementCount();
+
if (isa<ConstantAggregateZero>(Mask)) {
- Result.resize(NumElts, 0);
+ Result.resize(EC.getKnownMinValue(), 0);
return;
}
- Result.reserve(NumElts);
+
+ Result.reserve(EC.getKnownMinValue());
+
+ if (EC.isScalable()) {
+ assert((isa<ConstantAggregateZero>(Mask) || isa<UndefValue>(Mask)) &&
+ "Scalable vector shuffle mask must be undef or zeroinitializer");
+ int MaskVal = isa<UndefValue>(Mask) ? -1 : 0;
+ for (unsigned I = 0; I < EC.getKnownMinValue(); ++I)
+ Result.emplace_back(MaskVal);
+ return;
+ }
+
+ unsigned NumElts = EC.getKnownMinValue();
+
if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
for (unsigned i = 0; i != NumElts; ++i)
Result.push_back(CDS->getElementAsInteger(i));
@@ -2185,8 +2217,14 @@ bool ShuffleVectorInst::isExtractSubvectorMask(ArrayRef<int> Mask,
bool ShuffleVectorInst::isIdentityWithPadding() const {
if (isa<UndefValue>(Op<2>()))
return false;
- int NumOpElts = cast<VectorType>(Op<0>()->getType())->getNumElements();
- int NumMaskElts = cast<VectorType>(getType())->getNumElements();
+
+ // FIXME: Not currently possible to express a shuffle mask for a scalable
+ // vector for this case.
+ if (isa<ScalableVectorType>(getType()))
+ return false;
+
+ int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
+ int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
if (NumMaskElts <= NumOpElts)
return false;
@@ -2208,7 +2246,7 @@ bool ShuffleVectorInst::isIdentityWithExtract() const {
return false;
// FIXME: Not currently possible to express a shuffle mask for a scalable
- // vector for this case
+ // vector for this case.
if (isa<ScalableVectorType>(getType()))
return false;
@@ -2226,8 +2264,13 @@ bool ShuffleVectorInst::isConcat() const {
isa<UndefValue>(Op<2>()))
return false;
- int NumOpElts = cast<VectorType>(Op<0>()->getType())->getNumElements();
- int NumMaskElts = getType()->getNumElements();
+ // FIXME: Not currently possible to express a shuffle mask for a scalable
+ // vector for this case.
+ if (isa<ScalableVectorType>(getType()))
+ return false;
+
+ int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
+ int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
if (NumMaskElts != NumOpElts * 2)
return false;
@@ -2614,6 +2657,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode,
Type *SrcTy,
Type *DestTy,
const DataLayout &DL) {
+ assert(castIsValid(Opcode, SrcTy, DestTy) && "method precondition");
switch (Opcode) {
default: llvm_unreachable("Invalid CastOp");
case Instruction::Trunc:
@@ -2968,8 +3012,8 @@ CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
"Invalid cast");
assert(Ty->isVectorTy() == S->getType()->isVectorTy() && "Invalid cast");
assert((!Ty->isVectorTy() ||
- cast<VectorType>(Ty)->getNumElements() ==
- cast<VectorType>(S->getType())->getNumElements()) &&
+ cast<VectorType>(Ty)->getElementCount() ==
+ cast<VectorType>(S->getType())->getElementCount()) &&
"Invalid cast");
if (Ty->isIntOrIntVectorTy())
@@ -2987,8 +3031,8 @@ CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
"Invalid cast");
assert(Ty->isVectorTy() == S->getType()->isVectorTy() && "Invalid cast");
assert((!Ty->isVectorTy() ||
- cast<VectorType>(Ty)->getNumElements() ==
- cast<VectorType>(S->getType())->getNumElements()) &&
+ cast<VectorType>(Ty)->getElementCount() ==
+ cast<VectorType>(S->getType())->getElementCount()) &&
"Invalid cast");
if (Ty->isIntOrIntVectorTy())
@@ -3088,63 +3132,6 @@ CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
return Create(opcode, C, Ty, Name, InsertAtEnd);
}
-// Check whether it is valid to call getCastOpcode for these types.
-// This routine must be kept in sync with getCastOpcode.
-bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
- if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
- return false;
-
- if (SrcTy == DestTy)
- return true;
-
- if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
- if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
- if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
- // An element by element cast. Valid if casting the elements is valid.
- SrcTy = SrcVecTy->getElementType();
- DestTy = DestVecTy->getElementType();
- }
-
- // Get the bit sizes, we'll need these
- TypeSize SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr
- TypeSize DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
-
- // Run through the possibilities ...
- if (DestTy->isIntegerTy()) { // Casting to integral
- if (SrcTy->isIntegerTy()) // Casting from integral
- return true;
- if (SrcTy->isFloatingPointTy()) // Casting from floating pt
- return true;
- if (SrcTy->isVectorTy()) // Casting from vector
- return DestBits == SrcBits;
- // Casting from something else
- return SrcTy->isPointerTy();
- }
- if (DestTy->isFloatingPointTy()) { // Casting to floating pt
- if (SrcTy->isIntegerTy()) // Casting from integral
- return true;
- if (SrcTy->isFloatingPointTy()) // Casting from floating pt
- return true;
- if (SrcTy->isVectorTy()) // Casting from vector
- return DestBits == SrcBits;
- // Casting from something else
- return false;
- }
- if (DestTy->isVectorTy()) // Casting to vector
- return DestBits == SrcBits;
- if (DestTy->isPointerTy()) { // Casting to pointer
- if (SrcTy->isPointerTy()) // Casting from pointer
- return true;
- return SrcTy->isIntegerTy(); // Casting from integral
- }
- if (DestTy->isX86_MMXTy()) {
- if (SrcTy->isVectorTy())
- return DestBits == SrcBits; // 64-bit vector to MMX
- return false;
- } // Casting to something else
- return false;
-}
-
bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) {
if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
return false;
@@ -3206,7 +3193,6 @@ bool CastInst::isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy,
// castIsValid( getCastOpcode(Val, Ty), Val, Ty)
// should not assert in castIsValid. In other words, this produces a "correct"
// casting opcode for the arguments passed to it.
-// This routine must be kept in sync with isCastable.
Instruction::CastOps
CastInst::getCastOpcode(
const Value *Src, bool SrcIsSigned, Type *DestTy, bool DestIsSigned) {
@@ -3221,7 +3207,7 @@ CastInst::getCastOpcode(
// FIXME: Check address space sizes here
if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
- if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+ if (SrcVecTy->getElementCount() == DestVecTy->getElementCount()) {
// An element by element cast. Find the appropriate opcode based on the
// element types.
SrcTy = SrcVecTy->getElementType();
@@ -3311,10 +3297,7 @@ CastInst::getCastOpcode(
/// it in one place and to eliminate the redundant code for getting the sizes
/// of the types involved.
bool
-CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
- // Check for type sanity on the arguments
- Type *SrcTy = S->getType();
-
+CastInst::castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy) {
if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
SrcTy->isAggregateType() || DstTy->isAggregateType())
return false;
@@ -3330,9 +3313,9 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
// scalar types means that checking that vector lengths match also checks that
// scalars are not being converted to vectors or vectors to scalars).
ElementCount SrcEC = SrcIsVec ? cast<VectorType>(SrcTy)->getElementCount()
- : ElementCount(0, false);
+ : ElementCount::getFixed(0);
ElementCount DstEC = DstIsVec ? cast<VectorType>(DstTy)->getElementCount()
- : ElementCount(0, false);
+ : ElementCount::getFixed(0);
// Switch on the opcode provided
switch (op) {
@@ -3390,9 +3373,9 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
if (SrcIsVec && DstIsVec)
return SrcEC == DstEC;
if (SrcIsVec)
- return SrcEC == ElementCount(1, false);
+ return SrcEC == ElementCount::getFixed(1);
if (DstIsVec)
- return DstEC == ElementCount(1, false);
+ return DstEC == ElementCount::getFixed(1);
return true;
}
@@ -3643,10 +3626,12 @@ bool CmpInst::isCommutative() const {
return cast<FCmpInst>(this)->isCommutative();
}
-bool CmpInst::isEquality() const {
- if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
- return IC->isEquality();
- return cast<FCmpInst>(this)->isEquality();
+bool CmpInst::isEquality(Predicate P) {
+ if (ICmpInst::isIntPredicate(P))
+ return ICmpInst::isEquality(P);
+ if (FCmpInst::isFPPredicate(P))
+ return FCmpInst::isEquality(P);
+ llvm_unreachable("Unsupported predicate kind");
}
CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
@@ -3740,29 +3725,6 @@ ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
}
}
-CmpInst::Predicate CmpInst::getFlippedStrictnessPredicate(Predicate pred) {
- switch (pred) {
- default: llvm_unreachable("Unknown or unsupported cmp predicate!");
- case ICMP_SGT: return ICMP_SGE;
- case ICMP_SLT: return ICMP_SLE;
- case ICMP_SGE: return ICMP_SGT;
- case ICMP_SLE: return ICMP_SLT;
- case ICMP_UGT: return ICMP_UGE;
- case ICMP_ULT: return ICMP_ULE;
- case ICMP_UGE: return ICMP_UGT;
- case ICMP_ULE: return ICMP_ULT;
-
- case FCMP_OGT: return FCMP_OGE;
- case FCMP_OLT: return FCMP_OLE;
- case FCMP_OGE: return FCMP_OGT;
- case FCMP_OLE: return FCMP_OLT;
- case FCMP_UGT: return FCMP_UGE;
- case FCMP_ULT: return FCMP_ULE;
- case FCMP_UGE: return FCMP_UGT;
- case FCMP_ULE: return FCMP_ULT;
- }
-}
-
CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
switch (pred) {
default: llvm_unreachable("Unknown cmp predicate!");
@@ -3793,22 +3755,97 @@ CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
}
}
+bool CmpInst::isNonStrictPredicate(Predicate pred) {
+ switch (pred) {
+ case ICMP_SGE:
+ case ICMP_SLE:
+ case ICMP_UGE:
+ case ICMP_ULE:
+ case FCMP_OGE:
+ case FCMP_OLE:
+ case FCMP_UGE:
+ case FCMP_ULE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool CmpInst::isStrictPredicate(Predicate pred) {
+ switch (pred) {
+ case ICMP_SGT:
+ case ICMP_SLT:
+ case ICMP_UGT:
+ case ICMP_ULT:
+ case FCMP_OGT:
+ case FCMP_OLT:
+ case FCMP_UGT:
+ case FCMP_ULT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+CmpInst::Predicate CmpInst::getStrictPredicate(Predicate pred) {
+ switch (pred) {
+ case ICMP_SGE:
+ return ICMP_SGT;
+ case ICMP_SLE:
+ return ICMP_SLT;
+ case ICMP_UGE:
+ return ICMP_UGT;
+ case ICMP_ULE:
+ return ICMP_ULT;
+ case FCMP_OGE:
+ return FCMP_OGT;
+ case FCMP_OLE:
+ return FCMP_OLT;
+ case FCMP_UGE:
+ return FCMP_UGT;
+ case FCMP_ULE:
+ return FCMP_ULT;
+ default:
+ return pred;
+ }
+}
+
CmpInst::Predicate CmpInst::getNonStrictPredicate(Predicate pred) {
switch (pred) {
- case ICMP_SGT: return ICMP_SGE;
- case ICMP_SLT: return ICMP_SLE;
- case ICMP_UGT: return ICMP_UGE;
- case ICMP_ULT: return ICMP_ULE;
- case FCMP_OGT: return FCMP_OGE;
- case FCMP_OLT: return FCMP_OLE;
- case FCMP_UGT: return FCMP_UGE;
- case FCMP_ULT: return FCMP_ULE;
- default: return pred;
+ case ICMP_SGT:
+ return ICMP_SGE;
+ case ICMP_SLT:
+ return ICMP_SLE;
+ case ICMP_UGT:
+ return ICMP_UGE;
+ case ICMP_ULT:
+ return ICMP_ULE;
+ case FCMP_OGT:
+ return FCMP_OGE;
+ case FCMP_OLT:
+ return FCMP_OLE;
+ case FCMP_UGT:
+ return FCMP_UGE;
+ case FCMP_ULT:
+ return FCMP_ULE;
+ default:
+ return pred;
}
}
+CmpInst::Predicate CmpInst::getFlippedStrictnessPredicate(Predicate pred) {
+ assert(CmpInst::isRelational(pred) && "Call only with relational predicate!");
+
+ if (isStrictPredicate(pred))
+ return getNonStrictPredicate(pred);
+ if (isNonStrictPredicate(pred))
+ return getStrictPredicate(pred);
+
+ llvm_unreachable("Unknown predicate!");
+}
+
CmpInst::Predicate CmpInst::getSignedPredicate(Predicate pred) {
- assert(CmpInst::isUnsigned(pred) && "Call only with signed predicates!");
+ assert(CmpInst::isUnsigned(pred) && "Call only with unsigned predicates!");
switch (pred) {
default:
@@ -3824,6 +3861,23 @@ CmpInst::Predicate CmpInst::getSignedPredicate(Predicate pred) {
}
}
+CmpInst::Predicate CmpInst::getUnsignedPredicate(Predicate pred) {
+ assert(CmpInst::isSigned(pred) && "Call only with signed predicates!");
+
+ switch (pred) {
+ default:
+ llvm_unreachable("Unknown predicate!");
+ case CmpInst::ICMP_SLT:
+ return CmpInst::ICMP_ULT;
+ case CmpInst::ICMP_SLE:
+ return CmpInst::ICMP_ULE;
+ case CmpInst::ICMP_SGT:
+ return CmpInst::ICMP_UGT;
+ case CmpInst::ICMP_SGE:
+ return CmpInst::ICMP_UGE;
+ }
+}
+
bool CmpInst::isUnsigned(Predicate predicate) {
switch (predicate) {
default: return false;
@@ -3840,6 +3894,18 @@ bool CmpInst::isSigned(Predicate predicate) {
}
}
+CmpInst::Predicate CmpInst::getFlippedSignednessPredicate(Predicate pred) {
+ assert(CmpInst::isRelational(pred) &&
+ "Call only with non-equality predicates!");
+
+ if (isSigned(pred))
+ return getUnsignedPredicate(pred);
+ if (isUnsigned(pred))
+ return getSignedPredicate(pred);
+
+ llvm_unreachable("Unknown predicate!");
+}
+
bool CmpInst::isOrdered(Predicate predicate) {
switch (predicate) {
default: return false;
diff --git a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
index c4e06cd979ed..3d1ea2853591 100644
--- a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
@@ -208,7 +208,7 @@ Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) {
default:
return None;
-#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
case Intrinsic::VPID: \
return MASKPOS;
#include "llvm/IR/VPIntrinsics.def"
@@ -220,7 +220,7 @@ Optional<int> VPIntrinsic::GetVectorLengthParamPos(Intrinsic::ID IntrinsicID) {
default:
return None;
-#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
case Intrinsic::VPID: \
return VLENPOS;
#include "llvm/IR/VPIntrinsics.def"
@@ -232,7 +232,7 @@ bool VPIntrinsic::IsVPIntrinsic(Intrinsic::ID ID) {
default:
return false;
-#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
case Intrinsic::VPID: \
break;
#include "llvm/IR/VPIntrinsics.def"
@@ -242,25 +242,26 @@ bool VPIntrinsic::IsVPIntrinsic(Intrinsic::ID ID) {
// Equivalent non-predicated opcode
unsigned VPIntrinsic::GetFunctionalOpcodeForVP(Intrinsic::ID ID) {
+ unsigned FunctionalOC = Instruction::Call;
switch (ID) {
default:
- return Instruction::Call;
-
-#define HANDLE_VP_TO_OC(VPID, OC) \
- case Intrinsic::VPID: \
- return Instruction::OC;
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define HANDLE_VP_TO_OPC(OPC) FunctionalOC = Instruction::OPC;
+#define END_REGISTER_VP_INTRINSIC(...) break;
#include "llvm/IR/VPIntrinsics.def"
}
+
+ return FunctionalOC;
}
-Intrinsic::ID VPIntrinsic::GetForOpcode(unsigned OC) {
- switch (OC) {
+Intrinsic::ID VPIntrinsic::GetForOpcode(unsigned IROPC) {
+ switch (IROPC) {
default:
return Intrinsic::not_intrinsic;
-#define HANDLE_VP_TO_OC(VPID, OC) \
- case Instruction::OC: \
- return Intrinsic::VPID;
+#define HANDLE_VP_TO_OPC(OPC) case Instruction::OPC:
+#define END_REGISTER_VP_INTRINSIC(VPID) return Intrinsic::VPID;
#include "llvm/IR/VPIntrinsics.def"
}
}
@@ -280,8 +281,8 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const {
// the operation. This function returns true when this is detected statically
// in the IR.
- // Check whether "W == vscale * EC.Min"
- if (EC.Scalable) {
+ // Check whether "W == vscale * EC.getKnownMinValue()"
+ if (EC.isScalable()) {
// Undig the DL
auto ParMod = this->getModule();
if (!ParMod)
@@ -291,8 +292,8 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const {
// Compare vscale patterns
uint64_t VScaleFactor;
if (match(VLParam, m_c_Mul(m_ConstantInt(VScaleFactor), m_VScale(DL))))
- return VScaleFactor >= EC.Min;
- return (EC.Min == 1) && match(VLParam, m_VScale(DL));
+ return VScaleFactor >= EC.getKnownMinValue();
+ return (EC.getKnownMinValue() == 1) && match(VLParam, m_VScale(DL));
}
// standard SIMD operation
@@ -301,7 +302,7 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const {
return false;
uint64_t VLNum = VLConst->getZExtValue();
- if (VLNum >= EC.Min)
+ if (VLNum >= EC.getKnownMinValue())
return true;
return false;
diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp
index 7ebca5274369..4f292101256d 100644
--- a/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp
@@ -146,11 +146,16 @@ bool LLVMContext::getDiagnosticsHotnessRequested() const {
return pImpl->DiagnosticsHotnessRequested;
}
-void LLVMContext::setDiagnosticsHotnessThreshold(uint64_t Threshold) {
+void LLVMContext::setDiagnosticsHotnessThreshold(Optional<uint64_t> Threshold) {
pImpl->DiagnosticsHotnessThreshold = Threshold;
}
+
uint64_t LLVMContext::getDiagnosticsHotnessThreshold() const {
- return pImpl->DiagnosticsHotnessThreshold;
+ return pImpl->DiagnosticsHotnessThreshold.getValueOr(UINT64_MAX);
+}
+
+bool LLVMContext::isDiagnosticsHotnessThresholdSetFromPSI() const {
+ return !pImpl->DiagnosticsHotnessThreshold.hasValue();
}
remarks::RemarkStreamer *LLVMContext::getMainRemarkStreamer() {
diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp
index f197b3e67d30..e998138ec3cb 100644
--- a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp
@@ -35,6 +35,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
FP128Ty(C, Type::FP128TyID),
PPC_FP128Ty(C, Type::PPC_FP128TyID),
X86_MMXTy(C, Type::X86_MMXTyID),
+ X86_AMXTy(C, Type::X86_AMXTyID),
Int1Ty(C, 1),
Int8Ty(C, 8),
Int16Ty(C, 16),
@@ -50,11 +51,10 @@ LLVMContextImpl::~LLVMContextImpl() {
delete *OwnedModules.begin();
#ifndef NDEBUG
- // Check for metadata references from leaked Instructions.
- for (auto &Pair : InstructionMetadata)
+ // Check for metadata references from leaked Values.
+ for (auto &Pair : ValueMetadata)
Pair.first->dump();
- assert(InstructionMetadata.empty() &&
- "Instructions with metadata have been leaked");
+ assert(ValueMetadata.empty() && "Values with metadata have been leaked");
#endif
// Drop references for MDNodes. Do this before Values get deleted to avoid
@@ -98,11 +98,9 @@ LLVMContextImpl::~LLVMContextImpl() {
CAZConstants.clear();
CPNConstants.clear();
UVConstants.clear();
+ PVConstants.clear();
IntConstants.clear();
FPConstants.clear();
-
- for (auto &CDSConstant : CDSConstants)
- delete CDSConstant.second;
CDSConstants.clear();
// Destroy attribute node lists.
@@ -129,8 +127,15 @@ LLVMContextImpl::~LLVMContextImpl() {
}
void LLVMContextImpl::dropTriviallyDeadConstantArrays() {
- SmallSetVector<ConstantArray *, 4> WorkList(ArrayConstants.begin(),
- ArrayConstants.end());
+ SmallSetVector<ConstantArray *, 4> WorkList;
+
+ // When ArrayConstants are of substantial size and only a few in them are
+ // dead, starting WorkList with all elements of ArrayConstants can be
+ // wasteful. Instead, starting WorkList with only elements that have empty
+ // uses.
+ for (ConstantArray *C : ArrayConstants)
+ if (C->use_empty())
+ WorkList.insert(C);
while (!WorkList.empty()) {
ConstantArray *C = WorkList.pop_back_val();
@@ -171,7 +176,7 @@ unsigned MDNodeOpsKey::calculateHash(MDNode *N, unsigned Offset) {
unsigned Hash = hash_combine_range(N->op_begin() + Offset, N->op_end());
#ifndef NDEBUG
{
- SmallVector<Metadata *, 8> MDs(N->op_begin() + Offset, N->op_end());
+ SmallVector<Metadata *, 8> MDs(drop_begin(N->operands(), Offset));
unsigned RawHash = calculateHash(MDs);
assert(Hash == RawHash &&
"Expected hash of MDOperand to equal hash of Metadata*");
@@ -215,19 +220,8 @@ void LLVMContextImpl::getSyncScopeNames(
SSNs[SSE.second] = SSE.first();
}
-/// Singleton instance of the OptBisect class.
-///
-/// This singleton is accessed via the LLVMContext::getOptPassGate() function.
-/// It provides a mechanism to disable passes and individual optimizations at
-/// compile time based on a command line option (-opt-bisect-limit) in order to
-/// perform a bisecting search for optimization-related problems.
-///
-/// Even if multiple LLVMContext objects are created, they will all return the
-/// same instance of OptBisect in order to provide a single bisect count. Any
-/// code that uses the OptBisect object should be serialized when bisection is
-/// enabled in order to enable a consistent bisect count.
-static ManagedStatic<OptBisect> OptBisector;
-
+/// Gets the OptPassGate for this LLVMContextImpl, which defaults to the
+/// singleton OptBisect if not explicitly set.
OptPassGate &LLVMContextImpl::getOptPassGate() const {
if (!OPG)
OPG = &(*OptBisector);
diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h
index 1c7d8746d242..05fd1814e2dc 100644
--- a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h
@@ -57,27 +57,7 @@ class Type;
class Value;
class ValueHandleBase;
-struct DenseMapAPIntKeyInfo {
- static inline APInt getEmptyKey() {
- APInt V(nullptr, 0);
- V.U.VAL = 0;
- return V;
- }
-
- static inline APInt getTombstoneKey() {
- APInt V(nullptr, 0);
- V.U.VAL = 1;
- return V;
- }
-
- static unsigned getHashValue(const APInt &Key) {
- return static_cast<unsigned>(hash_value(Key));
- }
-
- static bool isEqual(const APInt &LHS, const APInt &RHS) {
- return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
- }
-};
+using DenseMapAPIntKeyInfo = DenseMapInfo<APInt>;
struct DenseMapAPFloatKeyInfo {
static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); }
@@ -366,6 +346,36 @@ template <> struct MDNodeKeyImpl<DISubrange> {
}
};
+template <> struct MDNodeKeyImpl<DIGenericSubrange> {
+ Metadata *CountNode;
+ Metadata *LowerBound;
+ Metadata *UpperBound;
+ Metadata *Stride;
+
+ MDNodeKeyImpl(Metadata *CountNode, Metadata *LowerBound, Metadata *UpperBound,
+ Metadata *Stride)
+ : CountNode(CountNode), LowerBound(LowerBound), UpperBound(UpperBound),
+ Stride(Stride) {}
+ MDNodeKeyImpl(const DIGenericSubrange *N)
+ : CountNode(N->getRawCountNode()), LowerBound(N->getRawLowerBound()),
+ UpperBound(N->getRawUpperBound()), Stride(N->getRawStride()) {}
+
+ bool isKeyOf(const DIGenericSubrange *RHS) const {
+ return (CountNode == RHS->getRawCountNode()) &&
+ (LowerBound == RHS->getRawLowerBound()) &&
+ (UpperBound == RHS->getRawUpperBound()) &&
+ (Stride == RHS->getRawStride());
+ }
+
+ unsigned getHashValue() const {
+ auto *MD = dyn_cast_or_null<ConstantAsMetadata>(CountNode);
+ if (CountNode && MD)
+ return hash_combine(cast<ConstantInt>(MD->getValue())->getSExtValue(),
+ LowerBound, UpperBound, Stride);
+ return hash_combine(CountNode, LowerBound, UpperBound, Stride);
+ }
+};
+
template <> struct MDNodeKeyImpl<DIEnumerator> {
APInt Value;
MDString *Name;
@@ -417,6 +427,37 @@ template <> struct MDNodeKeyImpl<DIBasicType> {
}
};
+template <> struct MDNodeKeyImpl<DIStringType> {
+ unsigned Tag;
+ MDString *Name;
+ Metadata *StringLength;
+ Metadata *StringLengthExp;
+ uint64_t SizeInBits;
+ uint32_t AlignInBits;
+ unsigned Encoding;
+
+ MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *StringLength,
+ Metadata *StringLengthExp, uint64_t SizeInBits,
+ uint32_t AlignInBits, unsigned Encoding)
+ : Tag(Tag), Name(Name), StringLength(StringLength),
+ StringLengthExp(StringLengthExp), SizeInBits(SizeInBits),
+ AlignInBits(AlignInBits), Encoding(Encoding) {}
+ MDNodeKeyImpl(const DIStringType *N)
+ : Tag(N->getTag()), Name(N->getRawName()),
+ StringLength(N->getRawStringLength()),
+ StringLengthExp(N->getRawStringLengthExp()),
+ SizeInBits(N->getSizeInBits()), AlignInBits(N->getAlignInBits()),
+ Encoding(N->getEncoding()) {}
+
+ bool isKeyOf(const DIStringType *RHS) const {
+ return Tag == RHS->getTag() && Name == RHS->getRawName() &&
+ SizeInBits == RHS->getSizeInBits() &&
+ AlignInBits == RHS->getAlignInBits() &&
+ Encoding == RHS->getEncoding();
+ }
+ unsigned getHashValue() const { return hash_combine(Tag, Name, Encoding); }
+};
+
template <> struct MDNodeKeyImpl<DIDerivedType> {
unsigned Tag;
MDString *Name;
@@ -525,6 +566,9 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
MDString *Identifier;
Metadata *Discriminator;
Metadata *DataLocation;
+ Metadata *Associated;
+ Metadata *Allocated;
+ Metadata *Rank;
MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
@@ -532,13 +576,15 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
Metadata *Elements, unsigned RuntimeLang,
Metadata *VTableHolder, Metadata *TemplateParams,
MDString *Identifier, Metadata *Discriminator,
- Metadata *DataLocation)
+ Metadata *DataLocation, Metadata *Associated,
+ Metadata *Allocated, Metadata *Rank)
: Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
BaseType(BaseType), SizeInBits(SizeInBits), OffsetInBits(OffsetInBits),
AlignInBits(AlignInBits), Flags(Flags), Elements(Elements),
RuntimeLang(RuntimeLang), VTableHolder(VTableHolder),
TemplateParams(TemplateParams), Identifier(Identifier),
- Discriminator(Discriminator), DataLocation(DataLocation) {}
+ Discriminator(Discriminator), DataLocation(DataLocation),
+ Associated(Associated), Allocated(Allocated), Rank(Rank) {}
MDNodeKeyImpl(const DICompositeType *N)
: Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()),
Line(N->getLine()), Scope(N->getRawScope()),
@@ -549,7 +595,9 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
TemplateParams(N->getRawTemplateParams()),
Identifier(N->getRawIdentifier()),
Discriminator(N->getRawDiscriminator()),
- DataLocation(N->getRawDataLocation()) {}
+ DataLocation(N->getRawDataLocation()),
+ Associated(N->getRawAssociated()), Allocated(N->getRawAllocated()),
+ Rank(N->getRawRank()) {}
bool isKeyOf(const DICompositeType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
@@ -564,7 +612,9 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
TemplateParams == RHS->getRawTemplateParams() &&
Identifier == RHS->getRawIdentifier() &&
Discriminator == RHS->getRawDiscriminator() &&
- DataLocation == RHS->getRawDataLocation();
+ DataLocation == RHS->getRawDataLocation() &&
+ Associated == RHS->getRawAssociated() &&
+ Allocated == RHS->getRawAllocated() && Rank == RHS->getRawRank();
}
unsigned getHashValue() const {
@@ -841,25 +891,28 @@ template <> struct MDNodeKeyImpl<DIModule> {
MDString *IncludePath;
MDString *APINotesFile;
unsigned LineNo;
+ bool IsDecl;
MDNodeKeyImpl(Metadata *File, Metadata *Scope, MDString *Name,
MDString *ConfigurationMacros, MDString *IncludePath,
- MDString *APINotesFile, unsigned LineNo)
+ MDString *APINotesFile, unsigned LineNo, bool IsDecl)
: File(File), Scope(Scope), Name(Name),
ConfigurationMacros(ConfigurationMacros), IncludePath(IncludePath),
- APINotesFile(APINotesFile), LineNo(LineNo) {}
+ APINotesFile(APINotesFile), LineNo(LineNo), IsDecl(IsDecl) {}
MDNodeKeyImpl(const DIModule *N)
: File(N->getRawFile()), Scope(N->getRawScope()), Name(N->getRawName()),
ConfigurationMacros(N->getRawConfigurationMacros()),
IncludePath(N->getRawIncludePath()),
- APINotesFile(N->getRawAPINotesFile()), LineNo(N->getLineNo()) {}
+ APINotesFile(N->getRawAPINotesFile()), LineNo(N->getLineNo()),
+ IsDecl(N->getIsDecl()) {}
bool isKeyOf(const DIModule *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
ConfigurationMacros == RHS->getRawConfigurationMacros() &&
IncludePath == RHS->getRawIncludePath() &&
APINotesFile == RHS->getRawAPINotesFile() &&
- File == RHS->getRawFile() && LineNo == RHS->getLineNo();
+ File == RHS->getRawFile() && LineNo == RHS->getLineNo() &&
+ IsDecl == RHS->getIsDecl();
}
unsigned getHashValue() const {
@@ -1204,72 +1257,55 @@ template <class NodeTy> struct MDNodeInfo {
#define HANDLE_MDNODE_LEAF(CLASS) using CLASS##Info = MDNodeInfo<CLASS>;
#include "llvm/IR/Metadata.def"
-/// Map-like storage for metadata attachments.
-class MDAttachmentMap {
- SmallVector<std::pair<unsigned, TrackingMDNodeRef>, 2> Attachments;
-
+/// Multimap-like storage for metadata attachments.
+class MDAttachments {
public:
- bool empty() const { return Attachments.empty(); }
- size_t size() const { return Attachments.size(); }
-
- /// Get a particular attachment (if any).
- MDNode *lookup(unsigned ID) const;
-
- /// Set an attachment to a particular node.
- ///
- /// Set the \c ID attachment to \c MD, replacing the current attachment at \c
- /// ID (if anyway).
- void set(unsigned ID, MDNode &MD);
-
- /// Remove an attachment.
- ///
- /// Remove the attachment at \c ID, if any.
- bool erase(unsigned ID);
-
- /// Copy out all the attachments.
- ///
- /// Copies all the current attachments into \c Result, sorting by attachment
- /// ID. This function does \em not clear \c Result.
- void getAll(SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const;
-
- /// Erase matching attachments.
- ///
- /// Erases all attachments matching the \c shouldRemove predicate.
- template <class PredTy> void remove_if(PredTy shouldRemove) {
- Attachments.erase(llvm::remove_if(Attachments, shouldRemove),
- Attachments.end());
- }
-};
-
-/// Multimap-like storage for metadata attachments for globals. This differs
-/// from MDAttachmentMap in that it allows multiple attachments per metadata
-/// kind.
-class MDGlobalAttachmentMap {
struct Attachment {
unsigned MDKind;
TrackingMDNodeRef Node;
};
+
+private:
SmallVector<Attachment, 1> Attachments;
public:
bool empty() const { return Attachments.empty(); }
-
- /// Appends all attachments with the given ID to \c Result in insertion order.
- /// If the global has no attachments with the given ID, or if ID is invalid,
- /// leaves Result unchanged.
- void get(unsigned ID, SmallVectorImpl<MDNode *> &Result) const;
+ size_t size() const { return Attachments.size(); }
/// Returns the first attachment with the given ID or nullptr if no such
/// attachment exists.
MDNode *lookup(unsigned ID) const;
- void insert(unsigned ID, MDNode &MD);
- bool erase(unsigned ID);
+ /// Appends all attachments with the given ID to \c Result in insertion order.
+ /// If the global has no attachments with the given ID, or if ID is invalid,
+ /// leaves Result unchanged.
+ void get(unsigned ID, SmallVectorImpl<MDNode *> &Result) const;
/// Appends all attachments for the global to \c Result, sorting by attachment
/// ID. Attachments with the same ID appear in insertion order. This function
/// does \em not clear \c Result.
void getAll(SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const;
+
+ /// Set an attachment to a particular node.
+ ///
+ /// Set the \c ID attachment to \c MD, replacing the current attachments at \c
+ /// ID (if anyway).
+ void set(unsigned ID, MDNode *MD);
+
+ /// Adds an attachment to a particular node.
+ void insert(unsigned ID, MDNode &MD);
+
+ /// Remove attachments with the given ID.
+ ///
+ /// Remove the attachments at \c ID, if any.
+ bool erase(unsigned ID);
+
+ /// Erase matching attachments.
+ ///
+ /// Erases all attachments matching the \c shouldRemove predicate.
+ template <class PredTy> void remove_if(PredTy shouldRemove) {
+ llvm::erase_if(Attachments, shouldRemove);
+ }
};
class LLVMContextImpl {
@@ -1289,7 +1325,26 @@ public:
std::unique_ptr<DiagnosticHandler> DiagHandler;
bool RespectDiagnosticFilters = false;
bool DiagnosticsHotnessRequested = false;
- uint64_t DiagnosticsHotnessThreshold = 0;
+ /// The minimum hotness value a diagnostic needs in order to be included in
+ /// optimization diagnostics.
+ ///
+ /// The threshold is an Optional value, which maps to one of the 3 states:
+ /// 1). 0 => threshold disabled. All emarks will be printed.
+ /// 2). positive int => manual threshold by user. Remarks with hotness exceed
+ /// threshold will be printed.
+ /// 3). None => 'auto' threshold by user. The actual value is not
+ /// available at command line, but will be synced with
+ /// hotness threhold from profile summary during
+ /// compilation.
+ ///
+ /// State 1 and 2 are considered as terminal states. State transition is
+ /// only allowed from 3 to 2, when the threshold is first synced with profile
+ /// summary. This ensures that the threshold is set only once and stays
+ /// constant.
+ ///
+ /// If threshold option is not specified, it is disabled (0) by default.
+ Optional<uint64_t> DiagnosticsHotnessThreshold = 0;
+
/// The specialized remark streamer used by LLVM's OptimizationRemarkEmitter.
std::unique_ptr<LLVMRemarkStreamer> LLVMRS;
@@ -1342,10 +1397,15 @@ public:
DenseMap<Type *, std::unique_ptr<UndefValue>> UVConstants;
- StringMap<ConstantDataSequential*> CDSConstants;
+ DenseMap<Type *, std::unique_ptr<PoisonValue>> PVConstants;
+
+ StringMap<std::unique_ptr<ConstantDataSequential>> CDSConstants;
DenseMap<std::pair<const Function *, const BasicBlock *>, BlockAddress *>
BlockAddresses;
+
+ DenseMap<const GlobalValue *, DSOLocalEquivalent *> DSOLocalEquivalents;
+
ConstantUniqueMap<ConstantExpr> ExprConstants;
ConstantUniqueMap<InlineAsm> InlineAsms;
@@ -1358,7 +1418,7 @@ public:
// Basic type instances.
Type VoidTy, LabelTy, HalfTy, BFloatTy, FloatTy, DoubleTy, MetadataTy,
TokenTy;
- Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
+ Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy, X86_AMXTy;
IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
BumpPtrAllocator Alloc;
@@ -1387,11 +1447,8 @@ public:
/// CustomMDKindNames - Map to hold the metadata string to ID mapping.
StringMap<unsigned> CustomMDKindNames;
- /// Collection of per-instruction metadata used in this context.
- DenseMap<const Instruction *, MDAttachmentMap> InstructionMetadata;
-
- /// Collection of per-GlobalObject metadata used in this context.
- DenseMap<const GlobalObject *, MDGlobalAttachmentMap> GlobalObjectMetadata;
+ /// Collection of metadata used in this context.
+ DenseMap<const Value *, MDAttachments> ValueMetadata;
/// Collection of per-GlobalObject sections used in this context.
DenseMap<const GlobalObject *, StringRef> GlobalObjectSections;
diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMRemarkStreamer.cpp b/contrib/llvm-project/llvm/lib/IR/LLVMRemarkStreamer.cpp
index 96001ab42c38..18b47611c97c 100644
--- a/contrib/llvm-project/llvm/lib/IR/LLVMRemarkStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LLVMRemarkStreamer.cpp
@@ -92,12 +92,11 @@ char LLVMRemarkSetupFormatError::ID = 0;
Expected<std::unique_ptr<ToolOutputFile>> llvm::setupLLVMOptimizationRemarks(
LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
StringRef RemarksFormat, bool RemarksWithHotness,
- unsigned RemarksHotnessThreshold) {
+ Optional<uint64_t> RemarksHotnessThreshold) {
if (RemarksWithHotness)
Context.setDiagnosticsHotnessRequested(true);
- if (RemarksHotnessThreshold)
- Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold);
+ Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold);
if (RemarksFilename.empty())
return nullptr;
@@ -137,16 +136,14 @@ Expected<std::unique_ptr<ToolOutputFile>> llvm::setupLLVMOptimizationRemarks(
return std::move(RemarksFile);
}
-Error llvm::setupLLVMOptimizationRemarks(LLVMContext &Context, raw_ostream &OS,
- StringRef RemarksPasses,
- StringRef RemarksFormat,
- bool RemarksWithHotness,
- unsigned RemarksHotnessThreshold) {
+Error llvm::setupLLVMOptimizationRemarks(
+ LLVMContext &Context, raw_ostream &OS, StringRef RemarksPasses,
+ StringRef RemarksFormat, bool RemarksWithHotness,
+ Optional<uint64_t> RemarksHotnessThreshold) {
if (RemarksWithHotness)
Context.setDiagnosticsHotnessRequested(true);
- if (RemarksHotnessThreshold)
- Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold);
+ Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold);
Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat);
if (Error E = Format.takeError())
diff --git a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
index 4189aea46294..4547c3a01239 100644
--- a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
@@ -20,6 +20,8 @@
#include "llvm/IR/LegacyPassNameParser.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/IR/StructuralHash.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -48,7 +50,7 @@ namespace {
enum PassDebugLevel {
Disabled, Arguments, Structure, Executions, Details
};
-}
+} // namespace
static cl::opt<enum PassDebugLevel>
PassDebugging("debug-pass", cl::Hidden,
@@ -60,80 +62,6 @@ PassDebugging("debug-pass", cl::Hidden,
clEnumVal(Executions, "print pass name before it is executed"),
clEnumVal(Details , "print pass details when it is executed")));
-namespace {
-typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
-PassOptionList;
-}
-
-// Print IR out before/after specified passes.
-static PassOptionList
-PrintBefore("print-before",
- llvm::cl::desc("Print IR before specified passes"),
- cl::Hidden);
-
-static PassOptionList
-PrintAfter("print-after",
- llvm::cl::desc("Print IR after specified passes"),
- cl::Hidden);
-
-static cl::opt<bool> PrintBeforeAll("print-before-all",
- llvm::cl::desc("Print IR before each pass"),
- cl::init(false), cl::Hidden);
-static cl::opt<bool> PrintAfterAll("print-after-all",
- llvm::cl::desc("Print IR after each pass"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
- PrintModuleScope("print-module-scope",
- cl::desc("When printing IR for print-[before|after]{-all} "
- "always print a module IR"),
- cl::init(false), cl::Hidden);
-
-static cl::list<std::string>
- PrintFuncsList("filter-print-funcs", cl::value_desc("function names"),
- cl::desc("Only print IR for functions whose name "
- "match this for all print-[before|after][-all] "
- "options"),
- cl::CommaSeparated, cl::Hidden);
-
-/// This is a helper to determine whether to print IR before or
-/// after a pass.
-
-bool llvm::shouldPrintBeforePass() {
- return PrintBeforeAll || !PrintBefore.empty();
-}
-
-bool llvm::shouldPrintAfterPass() {
- return PrintAfterAll || !PrintAfter.empty();
-}
-
-static bool ShouldPrintBeforeOrAfterPass(StringRef PassID,
- PassOptionList &PassesToPrint) {
- for (auto *PassInf : PassesToPrint) {
- if (PassInf)
- if (PassInf->getPassArgument() == PassID) {
- return true;
- }
- }
- return false;
-}
-
-bool llvm::shouldPrintBeforePass(StringRef PassID) {
- return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
-}
-
-bool llvm::shouldPrintAfterPass(StringRef PassID) {
- return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
-}
-
-bool llvm::forcePrintModuleIR() { return PrintModuleScope; }
-
-bool llvm::isFunctionInPrintList(StringRef FunctionName) {
- static std::unordered_set<std::string> PrintFuncNames(PrintFuncsList.begin(),
- PrintFuncsList.end());
- return PrintFuncNames.empty() ||
- PrintFuncNames.count(std::string(FunctionName));
-}
/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
/// or higher is specified.
bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
@@ -209,8 +137,7 @@ void PMDataManager::emitInstrCountChangedRemark(
// remarks. Since it's possible that the first function in the module
// doesn't actually contain a basic block, we have to go and find one that's
// suitable for emitting remarks.
- auto It = std::find_if(M.begin(), M.end(),
- [](const Function &Fn) { return !Fn.empty(); });
+ auto It = llvm::find_if(M, [](const Function &Fn) { return !Fn.empty(); });
// Didn't find a function. Quit.
if (It == M.end())
@@ -641,7 +568,12 @@ PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) {
PDepth = P->getResolver()->getPMDataManager().getDepth();
for (Pass *AP : AnalysisPasses) {
- LastUser[AP] = P;
+ // Record P as the new last user of AP.
+ auto &LastUserOfAP = LastUser[AP];
+ if (LastUserOfAP)
+ InversedLastUser[LastUserOfAP].erase(AP);
+ LastUserOfAP = P;
+ InversedLastUser[P].insert(AP);
if (P == AP)
continue;
@@ -671,31 +603,25 @@ PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) {
if (P->getResolver())
setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
-
// If AP is the last user of other passes then make P last user of
// such passes.
- for (auto LU : LastUser) {
- if (LU.second == AP)
- // DenseMap iterator is not invalidated here because
- // this is just updating existing entries.
- LastUser[LU.first] = P;
- }
+ auto &LastUsedByAP = InversedLastUser[AP];
+ for (Pass *L : LastUsedByAP)
+ LastUser[L] = P;
+ InversedLastUser[P].insert(LastUsedByAP.begin(), LastUsedByAP.end());
+ LastUsedByAP.clear();
}
}
/// Collect passes whose last user is P
void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
Pass *P) {
- DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
- InversedLastUser.find(P);
+ auto DMI = InversedLastUser.find(P);
if (DMI == InversedLastUser.end())
return;
- SmallPtrSet<Pass *, 8> &LU = DMI->second;
- for (Pass *LUP : LU) {
- LastUses.push_back(LUP);
- }
-
+ auto &LU = DMI->second;
+ LastUses.append(LU.begin(), LU.end());
}
AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
@@ -929,11 +855,6 @@ void PMTopLevelManager::initializeAllAnalysisInfo() {
// Initailize other pass managers
for (PMDataManager *IPM : IndirectPassManagers)
IPM->initializeAnalysisInfo();
-
- for (auto LU : LastUser) {
- SmallPtrSet<Pass *, 8> &L = InversedLastUser[LU.second];
- L.insert(LU.first);
- }
}
/// Destructor
@@ -1189,12 +1110,6 @@ void PMDataManager::collectRequiredAndUsedAnalyses(
UP.push_back(AnalysisPass);
else
RP_NotAvail.push_back(RequiredID);
-
- for (const auto &RequiredID : AnUsage->getRequiredTransitiveSet())
- if (Pass *AnalysisPass = findAnalysisPass(RequiredID, true))
- UP.push_back(AnalysisPass);
- else
- RP_NotAvail.push_back(RequiredID);
}
// All Required analyses should be available to the pass as it runs! Here
@@ -1236,6 +1151,8 @@ Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
// Print list of passes that are last used by P.
void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
+ if (PassDebugging < Details)
+ return;
SmallVector<Pass *, 12> LUses;
@@ -1391,8 +1308,8 @@ PMDataManager::~PMDataManager() {
//===----------------------------------------------------------------------===//
// NOTE: Is this the right place to define this method ?
// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
-Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
- return PM.findAnalysisPass(ID, dir);
+Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID) const {
+ return PM.findAnalysisPass(ID, true);
}
std::tuple<Pass *, bool>
@@ -1475,7 +1392,6 @@ void FPPassManager::dumpPassStructure(unsigned Offset) {
}
}
-
/// Execute all of the passes scheduled for execution by invoking
/// runOnFunction method. Keep track of whether any of the passes modifies
/// the function, and if so, return true.
@@ -1513,7 +1429,19 @@ bool FPPassManager::runOnFunction(Function &F) {
{
PassManagerPrettyStackEntry X(FP, F);
TimeRegion PassTimer(getPassTimer(FP));
+#ifdef EXPENSIVE_CHECKS
+ uint64_t RefHash = StructuralHash(F);
+#endif
LocalChanged |= FP->runOnFunction(F);
+
+#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
+ if (!LocalChanged && (RefHash != StructuralHash(F))) {
+ llvm::errs() << "Pass modifies its input and doesn't report it: "
+ << FP->getPassName() << "\n";
+ llvm_unreachable("Pass modifies its input and doesn't report it");
+ }
+#endif
+
if (EmitICRemark) {
unsigned NewSize = F.getInstructionCount();
@@ -1537,7 +1465,8 @@ bool FPPassManager::runOnFunction(Function &F) {
dumpUsedSet(FP);
verifyPreservedAnalysis(FP);
- removeNotPreservedAnalysis(FP);
+ if (LocalChanged)
+ removeNotPreservedAnalysis(FP);
recordAvailableAnalysis(FP);
removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
}
@@ -1614,7 +1543,17 @@ MPPassManager::runOnModule(Module &M) {
PassManagerPrettyStackEntry X(MP, M);
TimeRegion PassTimer(getPassTimer(MP));
+#ifdef EXPENSIVE_CHECKS
+ uint64_t RefHash = StructuralHash(M);
+#endif
+
LocalChanged |= MP->runOnModule(M);
+
+#ifdef EXPENSIVE_CHECKS
+ assert((LocalChanged || (RefHash == StructuralHash(M))) &&
+ "Pass modifies its input and doesn't report it.");
+#endif
+
if (EmitICRemark) {
// Update the size of the module.
unsigned ModuleCount = M.getInstructionCount();
@@ -1636,7 +1575,8 @@ MPPassManager::runOnModule(Module &M) {
dumpUsedSet(MP);
verifyPreservedAnalysis(MP);
- removeNotPreservedAnalysis(MP);
+ if (LocalChanged)
+ removeNotPreservedAnalysis(MP);
recordAvailableAnalysis(MP);
removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/MDBuilder.cpp b/contrib/llvm-project/llvm/lib/IR/MDBuilder.cpp
index 40d70f43132d..35af8490287b 100644
--- a/contrib/llvm-project/llvm/lib/IR/MDBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/MDBuilder.cpp
@@ -151,24 +151,20 @@ MDNode *MDBuilder::mergeCallbackEncodings(MDNode *ExistingCallbacks,
}
MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
- // To ensure uniqueness the root node is self-referential.
- auto Dummy = MDNode::getTemporary(Context, None);
-
- SmallVector<Metadata *, 3> Args(1, Dummy.get());
+ SmallVector<Metadata *, 3> Args(1, nullptr);
if (Extra)
Args.push_back(Extra);
if (!Name.empty())
Args.push_back(createString(Name));
- MDNode *Root = MDNode::get(Context, Args);
+ MDNode *Root = MDNode::getDistinct(Context, Args);
// At this point we have
- // !0 = metadata !{} <- dummy
- // !1 = metadata !{metadata !0} <- root
- // Replace the dummy operand with the root node itself and delete the dummy.
+ // !0 = distinct !{null} <- root
+ // Replace the reserved operand with the root node itself.
Root->replaceOperandWith(0, Root);
// We now have
- // !1 = metadata !{metadata !1} <- self-referential root
+ // !0 = distinct !{!0} <- root
return Root;
}
@@ -310,14 +306,12 @@ MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) {
return MDNode::get(Context, Vals);
}
-MDNode *MDBuilder::createMisExpect(uint64_t Index, uint64_t LikleyWeight,
- uint64_t UnlikleyWeight) {
- auto *IntType = Type::getInt64Ty(Context);
- Metadata *Vals[] = {
- createString("misexpect"),
- createConstant(ConstantInt::get(IntType, Index)),
- createConstant(ConstantInt::get(IntType, LikleyWeight)),
- createConstant(ConstantInt::get(IntType, UnlikleyWeight)),
- };
- return MDNode::get(Context, Vals);
+MDNode *MDBuilder::createPseudoProbeDesc(uint64_t GUID, uint64_t Hash,
+ Function *F) {
+ auto *Int64Ty = Type::getInt64Ty(Context);
+ SmallVector<Metadata *, 3> Ops(3);
+ Ops[0] = createConstant(ConstantInt::get(Int64Ty, GUID));
+ Ops[1] = createConstant(ConstantInt::get(Int64Ty, Hash));
+ Ops[2] = createString(F->getName());
+ return MDNode::get(Context, Ops);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/Mangler.cpp b/contrib/llvm-project/llvm/lib/IR/Mangler.cpp
index 0d66e321c396..674ba3cdaa24 100644
--- a/contrib/llvm-project/llvm/lib/IR/Mangler.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Mangler.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
@@ -100,7 +101,7 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F,
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
AI != AE; ++AI) {
// 'Dereference' type in case of byval or inalloca parameter attribute.
- uint64_t AllocSize = AI->hasPassPointeeByValueAttr() ?
+ uint64_t AllocSize = AI->hasPassPointeeByValueCopyAttr() ?
AI->getPassPointeeByValueCopySize(DL) :
DL.getTypeAllocSize(AI->getType());
@@ -184,6 +185,25 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
getNameWithPrefix(OS, GV, CannotUsePrivateLabel);
}
+// Check if the name needs quotes to be safe for the linker to interpret.
+static bool canBeUnquotedInDirective(char C) {
+ return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '@';
+}
+
+static bool canBeUnquotedInDirective(StringRef Name) {
+ if (Name.empty())
+ return false;
+
+ // If any of the characters in the string is an unacceptable character, force
+ // quotes.
+ for (char C : Name) {
+ if (!canBeUnquotedInDirective(C))
+ return false;
+ }
+
+ return true;
+}
+
void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
const Triple &TT, Mangler &Mangler) {
if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
@@ -194,6 +214,9 @@ void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
else
OS << " -export:";
+ bool NeedQuotes = GV->hasName() && !canBeUnquotedInDirective(GV->getName());
+ if (NeedQuotes)
+ OS << "\"";
if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
std::string Flag;
raw_string_ostream FlagOS(Flag);
@@ -206,6 +229,8 @@ void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
} else {
Mangler.getNameWithPrefix(OS, GV, false);
}
+ if (NeedQuotes)
+ OS << "\"";
if (!GV->getValueType()->isFunctionTy()) {
if (TT.isWindowsMSVCEnvironment())
@@ -221,6 +246,11 @@ void llvm::emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
return;
OS << " /INCLUDE:";
+ bool NeedQuotes = GV->hasName() && !canBeUnquotedInDirective(GV->getName());
+ if (NeedQuotes)
+ OS << "\"";
M.getNameWithPrefix(OS, GV, false);
+ if (NeedQuotes)
+ OS << "\"";
}
diff --git a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
index ce89009e86eb..7ca538995db2 100644
--- a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/IR/Metadata.h"
#include "LLVMContextImpl.h"
#include "MetadataImpl.h"
#include "SymbolTableListTraitsImpl.h"
@@ -38,7 +39,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TrackingMDRef.h"
#include "llvm/IR/Type.h"
@@ -640,10 +641,7 @@ void MDNode::resolveCycles() {
}
static bool hasSelfReference(MDNode *N) {
- for (Metadata *MD : N->operands())
- if (MD == N)
- return true;
- return false;
+ return llvm::is_contained(N->operands(), N);
}
MDNode *MDNode::replaceWithPermanentImpl() {
@@ -925,7 +923,32 @@ MDNode *MDNode::getMostGenericAliasScope(MDNode *A, MDNode *B) {
if (!A || !B)
return nullptr;
- return concatenate(A, B);
+ // Take the intersection of domains then union the scopes
+ // within those domains
+ SmallPtrSet<const MDNode *, 16> ADomains;
+ SmallPtrSet<const MDNode *, 16> IntersectDomains;
+ SmallSetVector<Metadata *, 4> MDs;
+ for (const MDOperand &MDOp : A->operands())
+ if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+ if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+ ADomains.insert(Domain);
+
+ for (const MDOperand &MDOp : B->operands())
+ if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+ if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+ if (ADomains.contains(Domain)) {
+ IntersectDomains.insert(Domain);
+ MDs.insert(MDOp);
+ }
+
+ for (const MDOperand &MDOp : A->operands())
+ if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+ if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+ if (IntersectDomains.contains(Domain))
+ MDs.insert(MDOp);
+
+ return MDs.empty() ? nullptr
+ : getOrSelfReference(A->getContext(), MDs.getArrayRef());
}
MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
@@ -1101,87 +1124,158 @@ StringRef NamedMDNode::getName() const { return StringRef(Name); }
//===----------------------------------------------------------------------===//
// Instruction Metadata method implementations.
//
-void MDAttachmentMap::set(unsigned ID, MDNode &MD) {
- for (auto &I : Attachments)
- if (I.first == ID) {
- I.second.reset(&MD);
- return;
- }
- Attachments.emplace_back(std::piecewise_construct, std::make_tuple(ID),
- std::make_tuple(&MD));
+
+MDNode *MDAttachments::lookup(unsigned ID) const {
+ for (const auto &A : Attachments)
+ if (A.MDKind == ID)
+ return A.Node;
+ return nullptr;
+}
+
+void MDAttachments::get(unsigned ID, SmallVectorImpl<MDNode *> &Result) const {
+ for (const auto &A : Attachments)
+ if (A.MDKind == ID)
+ Result.push_back(A.Node);
}
-bool MDAttachmentMap::erase(unsigned ID) {
+void MDAttachments::getAll(
+ SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
+ for (const auto &A : Attachments)
+ Result.emplace_back(A.MDKind, A.Node);
+
+ // Sort the resulting array so it is stable with respect to metadata IDs. We
+ // need to preserve the original insertion order though.
+ if (Result.size() > 1)
+ llvm::stable_sort(Result, less_first());
+}
+
+void MDAttachments::set(unsigned ID, MDNode *MD) {
+ erase(ID);
+ if (MD)
+ insert(ID, *MD);
+}
+
+void MDAttachments::insert(unsigned ID, MDNode &MD) {
+ Attachments.push_back({ID, TrackingMDNodeRef(&MD)});
+}
+
+bool MDAttachments::erase(unsigned ID) {
if (empty())
return false;
- // Common case is one/last value.
- if (Attachments.back().first == ID) {
+ // Common case is one value.
+ if (Attachments.size() == 1 && Attachments.back().MDKind == ID) {
Attachments.pop_back();
return true;
}
- for (auto I = Attachments.begin(), E = std::prev(Attachments.end()); I != E;
- ++I)
- if (I->first == ID) {
- *I = std::move(Attachments.back());
- Attachments.pop_back();
- return true;
- }
+ auto OldSize = Attachments.size();
+ llvm::erase_if(Attachments,
+ [ID](const Attachment &A) { return A.MDKind == ID; });
+ return OldSize != Attachments.size();
+}
- return false;
+MDNode *Value::getMetadata(unsigned KindID) const {
+ if (!hasMetadata())
+ return nullptr;
+ const auto &Info = getContext().pImpl->ValueMetadata[this];
+ assert(!Info.empty() && "bit out of sync with hash table");
+ return Info.lookup(KindID);
}
-MDNode *MDAttachmentMap::lookup(unsigned ID) const {
- for (const auto &I : Attachments)
- if (I.first == ID)
- return I.second;
- return nullptr;
+MDNode *Value::getMetadata(StringRef Kind) const {
+ if (!hasMetadata())
+ return nullptr;
+ const auto &Info = getContext().pImpl->ValueMetadata[this];
+ assert(!Info.empty() && "bit out of sync with hash table");
+ return Info.lookup(getContext().getMDKindID(Kind));
}
-void MDAttachmentMap::getAll(
- SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
- Result.append(Attachments.begin(), Attachments.end());
+void Value::getMetadata(unsigned KindID, SmallVectorImpl<MDNode *> &MDs) const {
+ if (hasMetadata())
+ getContext().pImpl->ValueMetadata[this].get(KindID, MDs);
+}
- // Sort the resulting array so it is stable.
- if (Result.size() > 1)
- array_pod_sort(Result.begin(), Result.end());
+void Value::getMetadata(StringRef Kind, SmallVectorImpl<MDNode *> &MDs) const {
+ if (hasMetadata())
+ getMetadata(getContext().getMDKindID(Kind), MDs);
}
-void MDGlobalAttachmentMap::insert(unsigned ID, MDNode &MD) {
- Attachments.push_back({ID, TrackingMDNodeRef(&MD)});
+void Value::getAllMetadata(
+ SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const {
+ if (hasMetadata()) {
+ assert(getContext().pImpl->ValueMetadata.count(this) &&
+ "bit out of sync with hash table");
+ const auto &Info = getContext().pImpl->ValueMetadata.find(this)->second;
+ assert(!Info.empty() && "Shouldn't have called this");
+ Info.getAll(MDs);
+ }
}
-MDNode *MDGlobalAttachmentMap::lookup(unsigned ID) const {
- for (const auto &A : Attachments)
- if (A.MDKind == ID)
- return A.Node;
- return nullptr;
+void Value::setMetadata(unsigned KindID, MDNode *Node) {
+ assert(isa<Instruction>(this) || isa<GlobalObject>(this));
+
+ // Handle the case when we're adding/updating metadata on a value.
+ if (Node) {
+ auto &Info = getContext().pImpl->ValueMetadata[this];
+ assert(!Info.empty() == HasMetadata && "bit out of sync with hash table");
+ if (Info.empty())
+ HasMetadata = true;
+ Info.set(KindID, Node);
+ return;
+ }
+
+ // Otherwise, we're removing metadata from an instruction.
+ assert((HasMetadata == (getContext().pImpl->ValueMetadata.count(this) > 0)) &&
+ "bit out of sync with hash table");
+ if (!HasMetadata)
+ return; // Nothing to remove!
+ auto &Info = getContext().pImpl->ValueMetadata[this];
+
+ // Handle removal of an existing value.
+ Info.erase(KindID);
+ if (!Info.empty())
+ return;
+ getContext().pImpl->ValueMetadata.erase(this);
+ HasMetadata = false;
}
-void MDGlobalAttachmentMap::get(unsigned ID,
- SmallVectorImpl<MDNode *> &Result) const {
- for (const auto &A : Attachments)
- if (A.MDKind == ID)
- Result.push_back(A.Node);
+void Value::setMetadata(StringRef Kind, MDNode *Node) {
+ if (!Node && !HasMetadata)
+ return;
+ setMetadata(getContext().getMDKindID(Kind), Node);
}
-bool MDGlobalAttachmentMap::erase(unsigned ID) {
- auto I = std::remove_if(Attachments.begin(), Attachments.end(),
- [ID](const Attachment &A) { return A.MDKind == ID; });
- bool Changed = I != Attachments.end();
- Attachments.erase(I, Attachments.end());
- return Changed;
+void Value::addMetadata(unsigned KindID, MDNode &MD) {
+ assert(isa<Instruction>(this) || isa<GlobalObject>(this));
+ if (!HasMetadata)
+ HasMetadata = true;
+ getContext().pImpl->ValueMetadata[this].insert(KindID, MD);
}
-void MDGlobalAttachmentMap::getAll(
- SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
- for (const auto &A : Attachments)
- Result.emplace_back(A.MDKind, A.Node);
+void Value::addMetadata(StringRef Kind, MDNode &MD) {
+ addMetadata(getContext().getMDKindID(Kind), MD);
+}
- // Sort the resulting array so it is stable with respect to metadata IDs. We
- // need to preserve the original insertion order though.
- llvm::stable_sort(Result, less_first());
+bool Value::eraseMetadata(unsigned KindID) {
+ // Nothing to unset.
+ if (!HasMetadata)
+ return false;
+
+ auto &Store = getContext().pImpl->ValueMetadata[this];
+ bool Changed = Store.erase(KindID);
+ if (Store.empty())
+ clearMetadata();
+ return Changed;
+}
+
+void Value::clearMetadata() {
+ if (!HasMetadata)
+ return;
+ assert(getContext().pImpl->ValueMetadata.count(this) &&
+ "bit out of sync with hash table");
+ getContext().pImpl->ValueMetadata.erase(this);
+ HasMetadata = false;
}
void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
@@ -1195,29 +1289,28 @@ MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
}
void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
- if (!hasMetadataHashEntry())
+ if (!Value::hasMetadata())
return; // Nothing to remove!
- auto &InstructionMetadata = getContext().pImpl->InstructionMetadata;
-
- SmallSet<unsigned, 4> KnownSet;
- KnownSet.insert(KnownIDs.begin(), KnownIDs.end());
- if (KnownSet.empty()) {
+ if (KnownIDs.empty()) {
// Just drop our entry at the store.
- InstructionMetadata.erase(this);
- setHasMetadataHashEntry(false);
+ clearMetadata();
return;
}
- auto &Info = InstructionMetadata[this];
- Info.remove_if([&KnownSet](const std::pair<unsigned, TrackingMDNodeRef> &I) {
- return !KnownSet.count(I.first);
+ SmallSet<unsigned, 4> KnownSet;
+ KnownSet.insert(KnownIDs.begin(), KnownIDs.end());
+
+ auto &MetadataStore = getContext().pImpl->ValueMetadata;
+ auto &Info = MetadataStore[this];
+ assert(!Info.empty() && "bit out of sync with hash table");
+ Info.remove_if([&KnownSet](const MDAttachments::Attachment &I) {
+ return !KnownSet.count(I.MDKind);
});
if (Info.empty()) {
// Drop our entry at the store.
- InstructionMetadata.erase(this);
- setHasMetadataHashEntry(false);
+ clearMetadata();
}
}
@@ -1231,33 +1324,28 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
return;
}
- // Handle the case when we're adding/updating metadata on an instruction.
- if (Node) {
- auto &Info = getContext().pImpl->InstructionMetadata[this];
- assert(!Info.empty() == hasMetadataHashEntry() &&
- "HasMetadata bit is wonked");
- if (Info.empty())
- setHasMetadataHashEntry(true);
- Info.set(KindID, *Node);
- return;
- }
+ Value::setMetadata(KindID, Node);
+}
- // Otherwise, we're removing metadata from an instruction.
- assert((hasMetadataHashEntry() ==
- (getContext().pImpl->InstructionMetadata.count(this) > 0)) &&
- "HasMetadata bit out of date!");
- if (!hasMetadataHashEntry())
- return; // Nothing to remove!
- auto &Info = getContext().pImpl->InstructionMetadata[this];
+void Instruction::addAnnotationMetadata(StringRef Name) {
+ MDBuilder MDB(getContext());
- // Handle removal of an existing value.
- Info.erase(KindID);
-
- if (!Info.empty())
- return;
+ auto *Existing = getMetadata(LLVMContext::MD_annotation);
+ SmallVector<Metadata *, 4> Names;
+ bool AppendName = true;
+ if (Existing) {
+ auto *Tuple = cast<MDTuple>(Existing);
+ for (auto &N : Tuple->operands()) {
+ if (cast<MDString>(N.get())->getString() == Name)
+ AppendName = false;
+ Names.push_back(N.get());
+ }
+ }
+ if (AppendName)
+ Names.push_back(MDB.createString(Name));
- getContext().pImpl->InstructionMetadata.erase(this);
- setHasMetadataHashEntry(false);
+ MDNode *MD = MDTuple::get(getContext(), Names);
+ setMetadata(LLVMContext::MD_annotation, MD);
}
void Instruction::setAAMetadata(const AAMDNodes &N) {
@@ -1271,13 +1359,7 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
// Handle 'dbg' as a special case since it is not stored in the hash table.
if (KindID == LLVMContext::MD_dbg)
return DbgLoc.getAsMDNode();
-
- if (!hasMetadataHashEntry())
- return nullptr;
- auto &Info = getContext().pImpl->InstructionMetadata[this];
- assert(!Info.empty() && "bit out of sync with hash table");
-
- return Info.lookup(KindID);
+ return Value::getMetadata(KindID);
}
void Instruction::getAllMetadataImpl(
@@ -1288,27 +1370,8 @@ void Instruction::getAllMetadataImpl(
if (DbgLoc) {
Result.push_back(
std::make_pair((unsigned)LLVMContext::MD_dbg, DbgLoc.getAsMDNode()));
- if (!hasMetadataHashEntry())
- return;
}
-
- assert(hasMetadataHashEntry() &&
- getContext().pImpl->InstructionMetadata.count(this) &&
- "Shouldn't have called this");
- const auto &Info = getContext().pImpl->InstructionMetadata.find(this)->second;
- assert(!Info.empty() && "Shouldn't have called this");
- Info.getAll(Result);
-}
-
-void Instruction::getAllMetadataOtherThanDebugLocImpl(
- SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
- Result.clear();
- assert(hasMetadataHashEntry() &&
- getContext().pImpl->InstructionMetadata.count(this) &&
- "Shouldn't have called this");
- const auto &Info = getContext().pImpl->InstructionMetadata.find(this)->second;
- assert(!Info.empty() && "Shouldn't have called this");
- Info.getAll(Result);
+ Value::getAllMetadata(Result);
}
bool Instruction::extractProfMetadata(uint64_t &TrueVal,
@@ -1372,84 +1435,6 @@ bool Instruction::extractProfTotalWeight(uint64_t &TotalVal) const {
return false;
}
-void Instruction::clearMetadataHashEntries() {
- assert(hasMetadataHashEntry() && "Caller should check");
- getContext().pImpl->InstructionMetadata.erase(this);
- setHasMetadataHashEntry(false);
-}
-
-void GlobalObject::getMetadata(unsigned KindID,
- SmallVectorImpl<MDNode *> &MDs) const {
- if (hasMetadata())
- getContext().pImpl->GlobalObjectMetadata[this].get(KindID, MDs);
-}
-
-void GlobalObject::getMetadata(StringRef Kind,
- SmallVectorImpl<MDNode *> &MDs) const {
- if (hasMetadata())
- getMetadata(getContext().getMDKindID(Kind), MDs);
-}
-
-void GlobalObject::addMetadata(unsigned KindID, MDNode &MD) {
- if (!hasMetadata())
- setHasMetadataHashEntry(true);
-
- getContext().pImpl->GlobalObjectMetadata[this].insert(KindID, MD);
-}
-
-void GlobalObject::addMetadata(StringRef Kind, MDNode &MD) {
- addMetadata(getContext().getMDKindID(Kind), MD);
-}
-
-bool GlobalObject::eraseMetadata(unsigned KindID) {
- // Nothing to unset.
- if (!hasMetadata())
- return false;
-
- auto &Store = getContext().pImpl->GlobalObjectMetadata[this];
- bool Changed = Store.erase(KindID);
- if (Store.empty())
- clearMetadata();
- return Changed;
-}
-
-void GlobalObject::getAllMetadata(
- SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const {
- MDs.clear();
-
- if (!hasMetadata())
- return;
-
- getContext().pImpl->GlobalObjectMetadata[this].getAll(MDs);
-}
-
-void GlobalObject::clearMetadata() {
- if (!hasMetadata())
- return;
- getContext().pImpl->GlobalObjectMetadata.erase(this);
- setHasMetadataHashEntry(false);
-}
-
-void GlobalObject::setMetadata(unsigned KindID, MDNode *N) {
- eraseMetadata(KindID);
- if (N)
- addMetadata(KindID, *N);
-}
-
-void GlobalObject::setMetadata(StringRef Kind, MDNode *N) {
- setMetadata(getContext().getMDKindID(Kind), N);
-}
-
-MDNode *GlobalObject::getMetadata(unsigned KindID) const {
- if (hasMetadata())
- return getContext().pImpl->GlobalObjectMetadata[this].lookup(KindID);
- return nullptr;
-}
-
-MDNode *GlobalObject::getMetadata(StringRef Kind) const {
- return getMetadata(getContext().getMDKindID(Kind));
-}
-
void GlobalObject::copyMetadata(const GlobalObject *Other, unsigned Offset) {
SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
Other->getAllMetadata(MDs);
diff --git a/contrib/llvm-project/llvm/lib/IR/Module.cpp b/contrib/llvm-project/llvm/lib/IR/Module.cpp
index 3ea181a9b48d..b4f10e2e2d23 100644
--- a/contrib/llvm-project/llvm/lib/IR/Module.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Module.cpp
@@ -582,7 +582,7 @@ void Module::setProfileSummary(Metadata *M, ProfileSummary::Kind Kind) {
setModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M);
}
-Metadata *Module::getProfileSummary(bool IsCS) {
+Metadata *Module::getProfileSummary(bool IsCS) const {
return (IsCS ? getModuleFlag("CSProfileSummary")
: getModuleFlag("ProfileSummary"));
}
@@ -601,13 +601,6 @@ void Module::setSemanticInterposition(bool SI) {
addModuleFlag(ModFlagBehavior::Error, "SemanticInterposition", SI);
}
-bool Module::noSemanticInterposition() const {
- // Conservatively require an explicit zero value for now.
- Metadata *MF = getModuleFlag("SemanticInterposition");
- auto *Val = cast_or_null<ConstantAsMetadata>(MF);
- return Val && cast<ConstantInt>(Val->getValue())->getZExtValue() == 0;
-}
-
void Module::setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB) {
OwnedMemoryBuffer = std::move(MB);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp b/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp
index 91612eafada7..5d21ca759f35 100644
--- a/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -163,7 +163,9 @@ bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const {
return false;
}
-static void propagateAttributesToRefs(GlobalValueSummary *S) {
+static void
+propagateAttributesToRefs(GlobalValueSummary *S,
+ DenseSet<ValueInfo> &MarkedNonReadWriteOnly) {
// If reference is not readonly or writeonly then referenced summary is not
// read/writeonly either. Note that:
// - All references from GlobalVarSummary are conservatively considered as
@@ -174,6 +176,11 @@ static void propagateAttributesToRefs(GlobalValueSummary *S) {
// for them.
for (auto &VI : S->refs()) {
assert(VI.getAccessSpecifier() == 0 || isa<FunctionSummary>(S));
+ if (!VI.getAccessSpecifier()) {
+ if (!MarkedNonReadWriteOnly.insert(VI).second)
+ continue;
+ } else if (MarkedNonReadWriteOnly.contains(VI))
+ continue;
for (auto &Ref : VI.getSummaryList())
// If references to alias is not read/writeonly then aliasee
// is not read/writeonly
@@ -216,11 +223,24 @@ void ModuleSummaryIndex::propagateAttributes(
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
if (!PropagateAttrs)
return;
+ DenseSet<ValueInfo> MarkedNonReadWriteOnly;
for (auto &P : *this)
for (auto &S : P.second.SummaryList) {
- if (!isGlobalValueLive(S.get()))
+ if (!isGlobalValueLive(S.get())) {
+ // computeDeadSymbols should have marked all copies live. Note that
+ // it is possible that there is a GUID collision between internal
+ // symbols with the same name in different files of the same name but
+ // not enough distinguishing path. Because computeDeadSymbols should
+ // conservatively mark all copies live we can assert here that all are
+ // dead if any copy is dead.
+ assert(llvm::none_of(
+ P.second.SummaryList,
+ [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
+ return isGlobalValueLive(Summary.get());
+ }));
// We don't examine references from dead objects
- continue;
+ break;
+ }
// Global variable can't be marked read/writeonly if it is not eligible
// to import since we need to ensure that all external references get
@@ -240,7 +260,7 @@ void ModuleSummaryIndex::propagateAttributes(
GVS->setReadOnly(false);
GVS->setWriteOnly(false);
}
- propagateAttributesToRefs(S.get());
+ propagateAttributesToRefs(S.get(), MarkedNonReadWriteOnly);
}
setWithAttributePropagation();
if (llvm::AreStatisticsEnabled())
diff --git a/contrib/llvm-project/llvm/lib/IR/Operator.cpp b/contrib/llvm-project/llvm/lib/IR/Operator.cpp
index 0f70fc37dee2..69181f35827b 100644
--- a/contrib/llvm-project/llvm/lib/IR/Operator.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Operator.cpp
@@ -61,10 +61,17 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const {
bool GEPOperator::accumulateConstantOffset(
const DataLayout &DL, APInt &Offset,
function_ref<bool(Value &, APInt &)> ExternalAnalysis) const {
- assert(Offset.getBitWidth() ==
- DL.getIndexSizeInBits(getPointerAddressSpace()) &&
- "The offset bit width does not match DL specification.");
+ assert(Offset.getBitWidth() ==
+ DL.getIndexSizeInBits(getPointerAddressSpace()) &&
+ "The offset bit width does not match DL specification.");
+ SmallVector<const Value *> Index(value_op_begin() + 1, value_op_end());
+ return GEPOperator::accumulateConstantOffset(getSourceElementType(), Index,
+ DL, Offset, ExternalAnalysis);
+}
+bool GEPOperator::accumulateConstantOffset(
+ Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL,
+ APInt &Offset, function_ref<bool(Value &, APInt &)> ExternalAnalysis) {
bool UsedExternalAnalysis = false;
auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool {
Index = Index.sextOrTrunc(Offset.getBitWidth());
@@ -85,9 +92,10 @@ bool GEPOperator::accumulateConstantOffset(
}
return true;
};
-
- for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
- GTI != GTE; ++GTI) {
+ auto begin = generic_gep_type_iterator<decltype(Index.begin())>::begin(
+ SourceType, Index.begin());
+ auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
+ for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
// Scalable vectors are multiplied by a runtime constant.
bool ScalableType = false;
if (isa<ScalableVectorType>(GTI.getIndexedType()))
diff --git a/contrib/llvm-project/llvm/lib/IR/OptBisect.cpp b/contrib/llvm-project/llvm/lib/IR/OptBisect.cpp
index 3104b90f3070..dc85e1316d48 100644
--- a/contrib/llvm-project/llvm/lib/IR/OptBisect.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/OptBisect.cpp
@@ -54,3 +54,5 @@ bool OptBisect::checkPass(const StringRef PassName,
printPassMessage(PassName, CurBisectNum, TargetDesc, ShouldRun);
return ShouldRun;
}
+
+ManagedStatic<OptBisect> llvm::OptBisector;
diff --git a/contrib/llvm-project/llvm/lib/IR/Pass.cpp b/contrib/llvm-project/llvm/lib/IR/Pass.cpp
index a815da2bdc51..755ea57c63fd 100644
--- a/contrib/llvm-project/llvm/lib/IR/Pass.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Pass.cpp
@@ -62,7 +62,7 @@ bool ModulePass::skipModule(Module &M) const {
}
bool Pass::mustPreserveAnalysisID(char &AID) const {
- return Resolver->getAnalysisIfAvailable(&AID, true) != nullptr;
+ return Resolver->getAnalysisIfAvailable(&AID) != nullptr;
}
// dumpPassStructure - Implement the -debug-pass=Structure option
@@ -259,22 +259,23 @@ void AnalysisUsage::setPreservesCFG() {
AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) {
const PassInfo *PI = Pass::lookupPassInfo(Arg);
// If the pass exists, preserve it. Otherwise silently do nothing.
- if (PI) Preserved.push_back(PI->getTypeInfo());
+ if (PI)
+ pushUnique(Preserved, PI->getTypeInfo());
return *this;
}
AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) {
- Required.push_back(ID);
+ pushUnique(Required, ID);
return *this;
}
AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) {
- Required.push_back(&ID);
+ pushUnique(Required, &ID);
return *this;
}
AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) {
- Required.push_back(&ID);
- RequiredTransitive.push_back(&ID);
+ pushUnique(Required, &ID);
+ pushUnique(RequiredTransitive, &ID);
return *this;
}
diff --git a/contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp b/contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp
index 49cc6ec04d90..56a36db21e28 100644
--- a/contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp
@@ -12,10 +12,29 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+void PassInstrumentationCallbacks::addClassToPassName(StringRef ClassName,
+ StringRef PassName) {
+ ClassToPassName[ClassName] = PassName.str();
+}
+
+StringRef
+PassInstrumentationCallbacks::getPassNameForClassName(StringRef ClassName) {
+ return ClassToPassName[ClassName];
+}
+
AnalysisKey PassInstrumentationAnalysis::Key;
+bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) {
+ size_t Pos = PassID.find('<');
+ StringRef Prefix = PassID;
+ if (Pos != StringRef::npos)
+ Prefix = PassID.substr(0, Pos);
+ return any_of(Specials, [Prefix](StringRef S) { return Prefix.endswith(S); });
+}
+
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/IR/PassManager.cpp b/contrib/llvm-project/llvm/lib/IR/PassManager.cpp
index 624827ff8cd9..4cf7ab2a602b 100644
--- a/contrib/llvm-project/llvm/lib/IR/PassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PassManager.cpp
@@ -91,6 +91,54 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
}
} // namespace llvm
+PreservedAnalyses ModuleToFunctionPassAdaptor::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns
+ // false).
+ if (!PI.runBeforePass<Function>(*Pass, F))
+ continue;
+
+ PreservedAnalyses PassPA;
+ {
+ TimeTraceScope TimeScope(Pass->name(), F.getName());
+ PassPA = Pass->run(F, FAM);
+ }
+
+ PI.runAfterPass(*Pass, F, PassPA);
+
+ // We know that the function pass couldn't have invalidated any other
+ // function's analyses (that's the contract of a function pass), so
+ // directly handle the function analysis manager's invalidation here.
+ FAM.invalidate(F, PassPA);
+
+ // Then intersect the preserved set so that invalidation of module
+ // analyses will eventually occur when the module pass completes.
+ PA.intersect(std::move(PassPA));
+ }
+
+ // The FunctionAnalysisManagerModuleProxy is preserved because (we assume)
+ // the function passes we ran didn't add or remove any functions.
+ //
+ // We also preserve all analyses on Functions, because we did all the
+ // invalidation we needed to do above.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ return PA;
+}
+
AnalysisSetKey CFGAnalyses::SetKey;
AnalysisSetKey PreservedAnalyses::AllAnalysesKey;
diff --git a/contrib/llvm-project/llvm/lib/IR/PassRegistry.cpp b/contrib/llvm-project/llvm/lib/IR/PassRegistry.cpp
index 0572c4fe5237..94f607afec47 100644
--- a/contrib/llvm-project/llvm/lib/IR/PassRegistry.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PassRegistry.cpp
@@ -40,14 +40,12 @@ PassRegistry::~PassRegistry() = default;
const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
sys::SmartScopedReader<true> Guard(Lock);
- MapType::const_iterator I = PassInfoMap.find(TI);
- return I != PassInfoMap.end() ? I->second : nullptr;
+ return PassInfoMap.lookup(TI);
}
const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
sys::SmartScopedReader<true> Guard(Lock);
- StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
- return I != PassInfoStringMap.end() ? I->second : nullptr;
+ return PassInfoStringMap.lookup(Arg);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp b/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp
index 25275e5733ac..d0c1517f480b 100644
--- a/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp
@@ -16,9 +16,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/PassTimingInfo.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/IR/PassInstrumentation.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -26,9 +24,8 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
-#include "llvm/Support/Timer.h"
+#include "llvm/Support/TypeName.h"
#include "llvm/Support/raw_ostream.h"
-#include <memory>
#include <string>
using namespace llvm;
@@ -38,11 +35,17 @@ using namespace llvm;
namespace llvm {
bool TimePassesIsEnabled = false;
+bool TimePassesPerRun = false;
static cl::opt<bool, true> EnableTiming(
"time-passes", cl::location(TimePassesIsEnabled), cl::Hidden,
cl::desc("Time each pass, printing elapsed time for each on exit"));
+static cl::opt<bool, true> EnableTimingPerRun(
+ "time-passes-per-run", cl::location(TimePassesPerRun), cl::Hidden,
+ cl::desc("Time each pass run, printing elapsed time for each run on exit"),
+ cl::callback([](const bool &) { TimePassesIsEnabled = true; }));
+
namespace {
namespace legacy {
@@ -168,6 +171,13 @@ void reportAndResetTimings(raw_ostream *OutStream) {
/// Returns the timer for the specified pass invocation of \p PassID.
/// Each time it creates a new timer.
Timer &TimePassesHandler::getPassTimer(StringRef PassID) {
+ if (!PerRun) {
+ TimerVector &Timers = TimingData[PassID];
+ if (Timers.size() == 0)
+ Timers.emplace_back(new Timer(PassID, PassID, TG));
+ return *Timers.front();
+ }
+
// Take a vector of Timers created for this \p PassID and append
// one more timer to it.
TimerVector &Timers = TimingData[PassID];
@@ -182,8 +192,12 @@ Timer &TimePassesHandler::getPassTimer(StringRef PassID) {
return *T;
}
-TimePassesHandler::TimePassesHandler(bool Enabled)
- : TG("pass", "... Pass execution timing report ..."), Enabled(Enabled) {}
+TimePassesHandler::TimePassesHandler(bool Enabled, bool PerRun)
+ : TG("pass", "... Pass execution timing report ..."), Enabled(Enabled),
+ PerRun(PerRun) {}
+
+TimePassesHandler::TimePassesHandler()
+ : TimePassesHandler(TimePassesIsEnabled, TimePassesPerRun) {}
void TimePassesHandler::setOutStream(raw_ostream &Out) {
OutStream = &Out;
@@ -234,30 +248,20 @@ void TimePassesHandler::stopTimer(StringRef PassID) {
MyTimer->stopTimer();
}
-static bool matchPassManager(StringRef PassID) {
- size_t prefix_pos = PassID.find('<');
- if (prefix_pos == StringRef::npos)
- return false;
- StringRef Prefix = PassID.substr(0, prefix_pos);
- return Prefix.endswith("PassManager") || Prefix.endswith("PassAdaptor") ||
- Prefix.endswith("AnalysisManagerProxy");
-}
-
-bool TimePassesHandler::runBeforePass(StringRef PassID) {
- if (matchPassManager(PassID))
- return true;
+void TimePassesHandler::runBeforePass(StringRef PassID) {
+ if (isSpecialPass(PassID,
+ {"PassManager", "PassAdaptor", "AnalysisManagerProxy"}))
+ return;
startTimer(PassID);
LLVM_DEBUG(dbgs() << "after runBeforePass(" << PassID << ")\n");
LLVM_DEBUG(dump());
-
- // we are not going to skip this pass, thus return true.
- return true;
}
void TimePassesHandler::runAfterPass(StringRef PassID) {
- if (matchPassManager(PassID))
+ if (isSpecialPass(PassID,
+ {"PassManager", "PassAdaptor", "AnalysisManagerProxy"}))
return;
stopTimer(PassID);
@@ -270,12 +274,16 @@ void TimePassesHandler::registerCallbacks(PassInstrumentationCallbacks &PIC) {
if (!Enabled)
return;
- PIC.registerBeforePassCallback(
- [this](StringRef P, Any) { return this->runBeforePass(P); });
+ PIC.registerBeforeNonSkippedPassCallback(
+ [this](StringRef P, Any) { this->runBeforePass(P); });
PIC.registerAfterPassCallback(
- [this](StringRef P, Any) { this->runAfterPass(P); });
+ [this](StringRef P, Any, const PreservedAnalyses &) {
+ this->runAfterPass(P);
+ });
PIC.registerAfterPassInvalidatedCallback(
- [this](StringRef P) { this->runAfterPass(P); });
+ [this](StringRef P, const PreservedAnalyses &) {
+ this->runAfterPass(P);
+ });
PIC.registerBeforeAnalysisCallback(
[this](StringRef P, Any) { this->runBeforePass(P); });
PIC.registerAfterAnalysisCallback(
diff --git a/contrib/llvm-project/llvm/lib/IR/PrintPasses.cpp b/contrib/llvm-project/llvm/lib/IR/PrintPasses.cpp
new file mode 100644
index 000000000000..83b8c93e766f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/IR/PrintPasses.cpp
@@ -0,0 +1,88 @@
+//===- PrintPasses.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/Support/CommandLine.h"
+#include <unordered_set>
+
+using namespace llvm;
+
+// Print IR out before/after specified passes.
+static cl::list<std::string>
+ PrintBefore("print-before",
+ llvm::cl::desc("Print IR before specified passes"),
+ cl::CommaSeparated, cl::Hidden);
+
+static cl::list<std::string>
+ PrintAfter("print-after", llvm::cl::desc("Print IR after specified passes"),
+ cl::CommaSeparated, cl::Hidden);
+
+static cl::opt<bool> PrintBeforeAll("print-before-all",
+ llvm::cl::desc("Print IR before each pass"),
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> PrintAfterAll("print-after-all",
+ llvm::cl::desc("Print IR after each pass"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ PrintModuleScope("print-module-scope",
+ cl::desc("When printing IR for print-[before|after]{-all} "
+ "always print a module IR"),
+ cl::init(false), cl::Hidden);
+
+static cl::list<std::string>
+ PrintFuncsList("filter-print-funcs", cl::value_desc("function names"),
+ cl::desc("Only print IR for functions whose name "
+ "match this for all print-[before|after][-all] "
+ "options"),
+ cl::CommaSeparated, cl::Hidden);
+
+/// This is a helper to determine whether to print IR before or
+/// after a pass.
+
+bool llvm::shouldPrintBeforeSomePass() {
+ return PrintBeforeAll || !PrintBefore.empty();
+}
+
+bool llvm::shouldPrintAfterSomePass() {
+ return PrintAfterAll || !PrintAfter.empty();
+}
+
+static bool shouldPrintBeforeOrAfterPass(StringRef PassID,
+ ArrayRef<std::string> PassesToPrint) {
+ return llvm::is_contained(PassesToPrint, PassID);
+}
+
+bool llvm::shouldPrintBeforeAll() { return PrintBeforeAll; }
+
+bool llvm::shouldPrintAfterAll() { return PrintAfterAll; }
+
+bool llvm::shouldPrintBeforePass(StringRef PassID) {
+ return PrintBeforeAll || shouldPrintBeforeOrAfterPass(PassID, PrintBefore);
+}
+
+bool llvm::shouldPrintAfterPass(StringRef PassID) {
+ return PrintAfterAll || shouldPrintBeforeOrAfterPass(PassID, PrintAfter);
+}
+
+std::vector<std::string> llvm::printBeforePasses() {
+ return std::vector<std::string>(PrintBefore);
+}
+
+std::vector<std::string> llvm::printAfterPasses() {
+ return std::vector<std::string>(PrintAfter);
+}
+
+bool llvm::forcePrintModuleIR() { return PrintModuleScope; }
+
+bool llvm::isFunctionInPrintList(StringRef FunctionName) {
+ static std::unordered_set<std::string> PrintFuncNames(PrintFuncsList.begin(),
+ PrintFuncsList.end());
+ return PrintFuncNames.empty() ||
+ PrintFuncNames.count(std::string(FunctionName));
+}
diff --git a/contrib/llvm-project/llvm/lib/IR/ProfileSummary.cpp b/contrib/llvm-project/llvm/lib/IR/ProfileSummary.cpp
index ac6bcd9fe3af..453a278a7f3f 100644
--- a/contrib/llvm-project/llvm/lib/IR/ProfileSummary.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ProfileSummary.cpp
@@ -259,7 +259,7 @@ void ProfileSummary::printSummary(raw_ostream &OS) {
void ProfileSummary::printDetailedSummary(raw_ostream &OS) {
OS << "Detailed summary:\n";
- for (auto Entry : DetailedSummary) {
+ for (const auto &Entry : DetailedSummary) {
OS << Entry.NumCounts << " blocks with count >= " << Entry.MinCount
<< " account for "
<< format("%0.6g", (float)Entry.Cutoff / Scale * 100)
diff --git a/contrib/llvm-project/llvm/lib/IR/PseudoProbe.cpp b/contrib/llvm-project/llvm/lib/IR/PseudoProbe.cpp
new file mode 100644
index 000000000000..80d2963938d4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/IR/PseudoProbe.cpp
@@ -0,0 +1,99 @@
+//===- PseudoProbe.cpp - Pseudo Probe Helpers -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the helpers to manipulate pseudo probe IR intrinsic
+// calls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+Optional<PseudoProbe> extractProbeFromDiscriminator(const Instruction &Inst) {
+ assert(isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst) &&
+ "Only call instructions should have pseudo probe encodes as their "
+ "Dwarf discriminators");
+ if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
+ const DILocation *DIL = DLoc;
+ auto Discriminator = DIL->getDiscriminator();
+ if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
+ PseudoProbe Probe;
+ Probe.Id =
+ PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
+ Probe.Type =
+ PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
+ Probe.Attr =
+ PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator);
+ Probe.Factor =
+ PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
+ (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+ return Probe;
+ }
+ }
+ return None;
+}
+
+Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
+ if (const auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
+ PseudoProbe Probe;
+ Probe.Id = II->getIndex()->getZExtValue();
+ Probe.Type = (uint32_t)PseudoProbeType::Block;
+ Probe.Attr = II->getAttributes()->getZExtValue();
+ Probe.Factor = II->getFactor()->getZExtValue() /
+ (float)PseudoProbeFullDistributionFactor;
+ return Probe;
+ }
+
+ if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst))
+ return extractProbeFromDiscriminator(Inst);
+
+ return None;
+}
+
+void setProbeDistributionFactor(Instruction &Inst, float Factor) {
+ assert(Factor >= 0 && Factor <= 1 &&
+ "Distribution factor must be in [0, 1.0]");
+ if (auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
+ IRBuilder<> Builder(&Inst);
+ uint64_t IntFactor = PseudoProbeFullDistributionFactor;
+ if (Factor < 1)
+ IntFactor *= Factor;
+ auto OrigFactor = II->getFactor()->getZExtValue();
+ if (IntFactor != OrigFactor)
+ II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor));
+ } else if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst)) {
+ if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
+ const DILocation *DIL = DLoc;
+ auto Discriminator = DIL->getDiscriminator();
+ if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
+ auto Index =
+ PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
+ auto Type =
+ PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
+ auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+ Discriminator);
+ // Round small factors to 0 to avoid over-counting.
+ uint32_t IntFactor =
+ PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+ if (Factor < 1)
+ IntFactor *= Factor;
+ uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+ Index, Type, Attr, IntFactor);
+ DIL = DIL->cloneWithDiscriminator(V);
+ Inst.setDebugLoc(DIL);
+ }
+ }
+ }
+}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/IR/ReplaceConstant.cpp b/contrib/llvm-project/llvm/lib/IR/ReplaceConstant.cpp
new file mode 100644
index 000000000000..7efa525d427e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/IR/ReplaceConstant.cpp
@@ -0,0 +1,70 @@
+//===- ReplaceConstant.cpp - Replace LLVM constant expression--------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a utility function for replacing LLVM constant
+// expressions by instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/ReplaceConstant.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/NoFolder.h"
+
+namespace llvm {
+// Replace a constant expression by instructions with equivalent operations at
+// a specified location.
+Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
+ IRBuilder<NoFolder> Builder(Instr);
+ unsigned OpCode = CE->getOpcode();
+ switch (OpCode) {
+ case Instruction::GetElementPtr: {
+ SmallVector<Value *, 4> CEOpVec(CE->operands());
+ ArrayRef<Value *> CEOps(CEOpVec);
+ return dyn_cast<Instruction>(
+ Builder.CreateInBoundsGEP(cast<GEPOperator>(CE)->getSourceElementType(),
+ CEOps[0], CEOps.slice(1)));
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return dyn_cast<Instruction>(
+ Builder.CreateBinOp((Instruction::BinaryOps)OpCode, CE->getOperand(0),
+ CE->getOperand(1), CE->getName()));
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return dyn_cast<Instruction>(
+ Builder.CreateCast((Instruction::CastOps)OpCode, CE->getOperand(0),
+ CE->getType(), CE->getName()));
+ default:
+ llvm_unreachable("Unhandled constant expression!\n");
+ }
+}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp b/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
index 6bf7caa50a12..8ed31b6668e0 100644
--- a/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -561,8 +561,7 @@ GCPtrTracker::GCPtrTracker(const Function &F, const DominatorTree &DT,
}
BasicBlockState *GCPtrTracker::getBasicBlockState(const BasicBlock *BB) {
- auto it = BlockMap.find(BB);
- return it != BlockMap.end() ? it->second : nullptr;
+ return BlockMap.lookup(BB);
}
const BasicBlockState *GCPtrTracker::getBasicBlockState(
diff --git a/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp b/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp
new file mode 100644
index 000000000000..5a6e07451326
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp
@@ -0,0 +1,84 @@
+//===-- StructuralHash.cpp - IR Hash for expensive checks -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifdef EXPENSIVE_CHECKS
+
+#include "llvm/IR/StructuralHash.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+namespace {
+namespace details {
+
+// Basic hashing mechanism to detect structural change to the IR, used to verify
+// pass return status consistency with actual change. Loosely copied from
+// llvm/lib/Transforms/Utils/FunctionComparator.cpp
+
+class StructuralHash {
+ uint64_t Hash = 0x6acaa36bef8325c5ULL;
+
+ void update(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+
+public:
+ StructuralHash() = default;
+
+ void update(const Function &F) {
+ if (F.empty())
+ return;
+
+ update(F.isVarArg());
+ update(F.arg_size());
+
+ SmallVector<const BasicBlock *, 8> BBs;
+ SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
+
+ BBs.push_back(&F.getEntryBlock());
+ VisitedBBs.insert(BBs[0]);
+ while (!BBs.empty()) {
+ const BasicBlock *BB = BBs.pop_back_val();
+ update(45798); // Block header
+ for (auto &Inst : *BB)
+ update(Inst.getOpcode());
+
+ const Instruction *Term = BB->getTerminator();
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+ continue;
+ BBs.push_back(Term->getSuccessor(i));
+ }
+ }
+ }
+
+ void update(const Module &M) {
+ for (const Function &F : M)
+ update(F);
+ }
+
+ uint64_t getHash() const { return Hash; }
+};
+
+} // namespace details
+
+} // namespace
+
+uint64_t llvm::StructuralHash(const Function &F) {
+ details::StructuralHash H;
+ H.update(F);
+ return H.getHash();
+}
+
+uint64_t llvm::StructuralHash(const Module &M) {
+ details::StructuralHash H;
+ H.update(M);
+ return H.getHash();
+}
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/IR/Type.cpp b/contrib/llvm-project/llvm/lib/IR/Type.cpp
index d869a6e07cca..bade7dc325f4 100644
--- a/contrib/llvm-project/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Type.cpp
@@ -49,6 +49,7 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
case LabelTyID : return getLabelTy(C);
case MetadataTyID : return getMetadataTy(C);
case X86_MMXTyID : return getX86_MMXTy(C);
+ case X86_AMXTyID : return getX86_AMXTy(C);
case TokenTyID : return getTokenTy(C);
default:
return nullptr;
@@ -81,6 +82,14 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
Ty->getPrimitiveSizeInBits().getFixedSize() == 64)
return true;
+ // 8192-bit fixed width vector types can be losslessly converted to x86amx.
+ if (((isa<FixedVectorType>(this)) && Ty->isX86_AMXTy()) &&
+ getPrimitiveSizeInBits().getFixedSize() == 8192)
+ return true;
+ if ((isX86_AMXTy() && isa<FixedVectorType>(Ty)) &&
+ Ty->getPrimitiveSizeInBits().getFixedSize() == 8192)
+ return true;
+
// At this point we have only various mismatches of the first class types
// remaining and ptr->ptr. Just select the lossless conversions. Everything
// else is not lossless. Conservatively assume we can't losslessly convert
@@ -120,6 +129,7 @@ TypeSize Type::getPrimitiveSizeInBits() const {
case Type::FP128TyID: return TypeSize::Fixed(128);
case Type::PPC_FP128TyID: return TypeSize::Fixed(128);
case Type::X86_MMXTyID: return TypeSize::Fixed(64);
+ case Type::X86_AMXTyID: return TypeSize::Fixed(8192);
case Type::IntegerTyID:
return TypeSize::Fixed(cast<IntegerType>(this)->getBitWidth());
case Type::FixedVectorTyID:
@@ -128,7 +138,7 @@ TypeSize Type::getPrimitiveSizeInBits() const {
ElementCount EC = VTy->getElementCount();
TypeSize ETS = VTy->getElementType()->getPrimitiveSizeInBits();
assert(!ETS.isScalable() && "Vector type should have fixed-width elements");
- return {ETS.getFixedSize() * EC.Min, EC.Scalable};
+ return {ETS.getFixedSize() * EC.getKnownMinValue(), EC.isScalable()};
}
default: return TypeSize::Fixed(0);
}
@@ -179,6 +189,7 @@ Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; }
+Type *Type::getX86_AMXTy(LLVMContext &C) { return &C.pImpl->X86_AMXTy; }
IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; }
IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; }
@@ -223,6 +234,10 @@ PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
return getX86_MMXTy(C)->getPointerTo(AS);
}
+PointerType *Type::getX86_AMXPtrTy(LLVMContext &C, unsigned AS) {
+ return getX86_AMXTy(C)->getPointerTo(AS);
+}
+
PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
return getIntNTy(C, N)->getPointerTo(AS);
}
@@ -275,11 +290,6 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
return Entry;
}
-bool IntegerType::isPowerOf2ByteWidth() const {
- unsigned BitWidth = getBitWidth();
- return (BitWidth > 7) && isPowerOf2_32(BitWidth);
-}
-
APInt IntegerType::getMask() const {
return APInt::getAllOnesValue(getBitWidth());
}
@@ -380,6 +390,18 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
return ST;
}
+bool StructType::containsScalableVectorType() const {
+ for (Type *Ty : elements()) {
+ if (isa<ScalableVectorType>(Ty))
+ return true;
+ if (auto *STy = dyn_cast<StructType>(Ty))
+ if (STy->containsScalableVectorType())
+ return true;
+ }
+
+ return false;
+}
+
void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
assert(isOpaque() && "Struct body already set!");
@@ -499,9 +521,14 @@ bool StructType::isSized(SmallPtrSetImpl<Type*> *Visited) const {
// Okay, our struct is sized if all of the elements are, but if one of the
// elements is opaque, the struct isn't sized *yet*, but may become sized in
// the future, so just bail out without caching.
- for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
- if (!(*I)->isSized(Visited))
+ for (Type *Ty : elements()) {
+ // If the struct contains a scalable vector type, don't consider it sized.
+ // This prevents it from being used in loads/stores/allocas/GEPs.
+ if (isa<ScalableVectorType>(Ty))
+ return false;
+ if (!Ty->isSized(Visited))
return false;
+ }
// Here we cheat a bit and cast away const-ness. The goal is to memoize when
// we find a sized type, as types can only move from opaque to sized, not the
@@ -521,7 +548,7 @@ StringRef StructType::getName() const {
bool StructType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
- !ElemTy->isTokenTy() && !isa<ScalableVectorType>(ElemTy);
+ !ElemTy->isTokenTy();
}
bool StructType::isLayoutIdentical(StructType *Other) const {
@@ -533,10 +560,6 @@ bool StructType::isLayoutIdentical(StructType *Other) const {
return elements() == Other->elements();
}
-StructType *Module::getTypeByName(StringRef Name) const {
- return getContext().pImpl->NamedStructTypes.lookup(Name);
-}
-
Type *StructType::getTypeAtIndex(const Value *V) const {
unsigned Idx = (unsigned)cast<Constant>(V)->getUniqueInteger().getZExtValue();
assert(indexValid(Idx) && "Invalid structure index!");
@@ -557,6 +580,10 @@ bool StructType::indexValid(const Value *V) const {
return CU && CU->getZExtValue() < getNumElements();
}
+StructType *StructType::getTypeByName(LLVMContext &C, StringRef Name) {
+ return C.pImpl->NamedStructTypes.lookup(Name);
+}
+
//===----------------------------------------------------------------------===//
// ArrayType Implementation
//===----------------------------------------------------------------------===//
@@ -598,10 +625,10 @@ VectorType::VectorType(Type *ElType, unsigned EQ, Type::TypeID TID)
}
VectorType *VectorType::get(Type *ElementType, ElementCount EC) {
- if (EC.Scalable)
- return ScalableVectorType::get(ElementType, EC.Min);
+ if (EC.isScalable())
+ return ScalableVectorType::get(ElementType, EC.getKnownMinValue());
else
- return FixedVectorType::get(ElementType, EC.Min);
+ return FixedVectorType::get(ElementType, EC.getKnownMinValue());
}
bool VectorType::isValidElementType(Type *ElemTy) {
@@ -619,7 +646,7 @@ FixedVectorType *FixedVectorType::get(Type *ElementType, unsigned NumElts) {
"be an integer, floating point, or "
"pointer type.");
- ElementCount EC(NumElts, false);
+ auto EC = ElementCount::getFixed(NumElts);
LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
VectorType *&Entry = ElementType->getContext()
@@ -641,7 +668,7 @@ ScalableVectorType *ScalableVectorType::get(Type *ElementType,
"be an integer, floating point, or "
"pointer type.");
- ElementCount EC(MinNumElts, true);
+ auto EC = ElementCount::getScalable(MinNumElts);
LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
VectorType *&Entry = ElementType->getContext()
diff --git a/contrib/llvm-project/llvm/lib/IR/Use.cpp b/contrib/llvm-project/llvm/lib/IR/Use.cpp
index dc0716b85372..99049c0232aa 100644
--- a/contrib/llvm-project/llvm/lib/IR/Use.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Use.cpp
@@ -17,24 +17,17 @@ void Use::swap(Use &RHS) {
if (Val == RHS.Val)
return;
- if (Val)
- removeFromList();
-
- Value *OldVal = Val;
- if (RHS.Val) {
- RHS.removeFromList();
- Val = RHS.Val;
- Val->addUse(*this);
- } else {
- Val = nullptr;
- }
-
- if (OldVal) {
- RHS.Val = OldVal;
- RHS.Val->addUse(RHS);
- } else {
- RHS.Val = nullptr;
- }
+ std::swap(Val, RHS.Val);
+ std::swap(Next, RHS.Next);
+ std::swap(Prev, RHS.Prev);
+
+ *Prev = this;
+ if (Next)
+ Next->Prev = &Next;
+
+ *RHS.Prev = &RHS;
+ if (RHS.Next)
+ RHS.Next->Prev = &RHS.Next;
}
unsigned Use::getOperandNo() const {
diff --git a/contrib/llvm-project/llvm/lib/IR/User.cpp b/contrib/llvm-project/llvm/lib/IR/User.cpp
index 7da592f40127..9105c6fbd230 100644
--- a/contrib/llvm-project/llvm/lib/IR/User.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/User.cpp
@@ -29,7 +29,7 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
// The side effects of this setOperand call include linking to
// "To", adding "this" to the uses list of To, and
// most importantly, removing "this" from the use list of "From".
- setOperand(i, To); // Fix it now...
+ setOperand(i, To);
}
}
diff --git a/contrib/llvm-project/llvm/lib/IR/Value.cpp b/contrib/llvm-project/llvm/lib/IR/Value.cpp
index efb8d53e8964..572f37a32410 100644
--- a/contrib/llvm-project/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Value.cpp
@@ -51,9 +51,9 @@ static inline Type *checkType(Type *Ty) {
}
Value::Value(Type *ty, unsigned scid)
- : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid),
- HasValueHandle(0), SubclassOptionalData(0), SubclassData(0),
- NumUserOperands(0), IsUsedByMD(false), HasName(false) {
+ : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid), HasValueHandle(0),
+ SubclassOptionalData(0), SubclassData(0), NumUserOperands(0),
+ IsUsedByMD(false), HasName(false), HasMetadata(false) {
static_assert(ConstantFirstVal == 0, "!(SubclassID < ConstantFirstVal)");
// FIXME: Why isn't this in the subclass gunk??
// Note, we cannot call isa<CallInst> before the CallInst has been
@@ -77,6 +77,10 @@ Value::~Value() {
if (isUsedByMetadata())
ValueAsMetadata::handleDeletion(this);
+ // Remove associated metadata from context.
+ if (HasMetadata)
+ clearMetadata();
+
#ifndef NDEBUG // Only in -g mode...
// Check to make sure that there are no uses of this value that are still
// around when the value is destroyed. If there are, then we have a dangling
@@ -147,6 +151,14 @@ bool Value::hasNUsesOrMore(unsigned N) const {
return hasNItemsOrMore(use_begin(), use_end(), N);
}
+bool Value::hasOneUser() const {
+ if (use_empty())
+ return false;
+ if (hasOneUse())
+ return true;
+ return std::equal(++user_begin(), user_end(), user_begin());
+}
+
static bool isUnDroppableUser(const User *U) { return !U->isDroppable(); }
Use *Value::getSingleUndroppableUse() {
@@ -175,21 +187,34 @@ void Value::dropDroppableUses(
for (Use &U : uses())
if (U.getUser()->isDroppable() && ShouldDrop(&U))
ToBeEdited.push_back(&U);
- for (Use *U : ToBeEdited) {
- U->removeFromList();
- if (auto *Assume = dyn_cast<IntrinsicInst>(U->getUser())) {
- assert(Assume->getIntrinsicID() == Intrinsic::assume);
- unsigned OpNo = U->getOperandNo();
- if (OpNo == 0)
- Assume->setOperand(0, ConstantInt::getTrue(Assume->getContext()));
- else {
- Assume->setOperand(OpNo, UndefValue::get(U->get()->getType()));
- CallInst::BundleOpInfo &BOI = Assume->getBundleOpInfoForOperand(OpNo);
- BOI.Tag = getContext().pImpl->getOrInsertBundleTag("ignore");
- }
- } else
- llvm_unreachable("unkown droppable use");
+ for (Use *U : ToBeEdited)
+ dropDroppableUse(*U);
+}
+
+void Value::dropDroppableUsesIn(User &Usr) {
+ assert(Usr.isDroppable() && "Expected a droppable user!");
+ for (Use &UsrOp : Usr.operands()) {
+ if (UsrOp.get() == this)
+ dropDroppableUse(UsrOp);
+ }
+}
+
+void Value::dropDroppableUse(Use &U) {
+ U.removeFromList();
+ if (auto *Assume = dyn_cast<IntrinsicInst>(U.getUser())) {
+ assert(Assume->getIntrinsicID() == Intrinsic::assume);
+ unsigned OpNo = U.getOperandNo();
+ if (OpNo == 0)
+ U.set(ConstantInt::getTrue(Assume->getContext()));
+ else {
+ U.set(UndefValue::get(U.get()->getType()));
+ CallInst::BundleOpInfo &BOI = Assume->getBundleOpInfoForOperand(OpNo);
+ BOI.Tag = Assume->getContext().pImpl->getOrInsertBundleTag("ignore");
+ }
+ return;
}
+
+ llvm_unreachable("unkown droppable use");
}
bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
@@ -405,6 +430,18 @@ void Value::takeName(Value *V) {
ST->reinsertValue(this);
}
+#ifndef NDEBUG
+std::string Value::getNameOrAsOperand() const {
+ if (!getName().empty())
+ return std::string(getName());
+
+ std::string BBName;
+ raw_string_ostream OS(BBName);
+ printAsOperand(OS, false);
+ return OS.str();
+}
+#endif
+
void Value::assertModuleIsMaterializedImpl() const {
#ifndef NDEBUG
const GlobalValue *GV = dyn_cast<GlobalValue>(this);
@@ -691,11 +728,16 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
CanBeNull = false;
if (const Argument *A = dyn_cast<Argument>(this)) {
DerefBytes = A->getDereferenceableBytes();
- if (DerefBytes == 0 && (A->hasByValAttr() || A->hasStructRetAttr())) {
- Type *PT = cast<PointerType>(A->getType())->getElementType();
- if (PT->isSized())
- DerefBytes = DL.getTypeStoreSize(PT).getKnownMinSize();
+ if (DerefBytes == 0) {
+ // Handle byval/byref/inalloca/preallocated arguments
+ if (Type *ArgMemTy = A->getPointeeInMemoryValueType()) {
+ if (ArgMemTy->isSized()) {
+ // FIXME: Why isn't this the type alloc size?
+ DerefBytes = DL.getTypeStoreSize(ArgMemTy).getKnownMinSize();
+ }
+ }
}
+
if (DerefBytes == 0) {
DerefBytes = A->getDereferenceableOrNullBytes();
CanBeNull = true;
@@ -783,7 +825,7 @@ Align Value::getPointerAlignment(const DataLayout &DL) const {
const MaybeAlign Alignment = A->getParamAlign();
if (!Alignment && A->hasStructRetAttr()) {
// An sret parameter has at least the ABI alignment of the return type.
- Type *EltTy = cast<PointerType>(A->getType())->getElementType();
+ Type *EltTy = A->getParamStructRetType();
if (EltTy->isSized())
return DL.getABITypeAlign(EltTy);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
index c518ae87ea9b..6dd299ee9845 100644
--- a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
@@ -115,6 +115,11 @@
using namespace llvm;
+static cl::opt<bool> VerifyNoAliasScopeDomination(
+ "verify-noalias-scope-decl-dom", cl::Hidden, cl::init(false),
+ cl::desc("Ensure that llvm.experimental.noalias.scope.decl for identical "
+ "scopes are not dominating"));
+
namespace llvm {
struct VerifierSupport {
@@ -282,6 +287,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// Whether the current function has a DISubprogram attached to it.
bool HasDebugInfo = false;
+ /// The current source language.
+ dwarf::SourceLanguage CurrentSourceLang = dwarf::DW_LANG_lo_user;
+
/// Whether source was present on the first DIFile encountered in each CU.
DenseMap<const DICompileUnit *, bool> HasSourceDebugInfo;
@@ -310,6 +318,8 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
TBAAVerifier TBAAVerifyHelper;
+ SmallVector<IntrinsicInst *, 4> NoAliasScopeDecls;
+
void checkAtomicMemAccessSize(Type *Ty, const Instruction *I);
public:
@@ -357,6 +367,8 @@ public:
LandingPadResultTy = nullptr;
SawFrameEscape = false;
SiblingFuncletInfo.clear();
+ verifyNoAliasScopeDecl();
+ NoAliasScopeDecls.clear();
return !Broken;
}
@@ -424,6 +436,7 @@ private:
void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
void visitProfMetadata(Instruction &I, MDNode *MD);
+ void visitAnnotationMetadata(MDNode *Annotation);
template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -501,8 +514,6 @@ private:
void verifySwiftErrorCall(CallBase &Call, const Value *SwiftErrorVal);
void verifySwiftErrorValue(const Value *SwiftErrorVal);
void verifyMustTailCall(CallInst &CI);
- bool performTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT,
- unsigned ArgNo, std::string &Suffix);
bool verifyAttributeCount(AttributeList Attrs, unsigned Params);
void verifyAttributeTypes(AttributeSet Attrs, bool IsFunction,
const Value *V);
@@ -534,6 +545,9 @@ private:
/// Verify all-or-nothing property of DIFile source attribute within a CU.
void verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F);
+
+ /// Verify the llvm.experimental.noalias.scope.decl declarations
+ void verifyNoAliasScopeDecl();
};
} // end anonymous namespace
@@ -589,7 +603,8 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
Assert(!GV.isDSOLocal(),
"GlobalValue with DLLImport Storage is dso_local!", &GV);
- Assert((GV.isDeclaration() && GV.hasExternalLinkage()) ||
+ Assert((GV.isDeclaration() &&
+ (GV.hasExternalLinkage() || GV.hasExternalWeakLinkage())) ||
GV.hasAvailableExternallyLinkage(),
"Global is marked as dllimport, but not external", &GV);
}
@@ -699,12 +714,16 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
}
// Scalable vectors cannot be global variables, since we don't know
- // the runtime size. If the global is a struct or an array containing
- // scalable vectors, that will be caught by the isValidElementType methods
- // in StructType or ArrayType instead.
+ // the runtime size. If the global is an array containing scalable vectors,
+ // that will be caught by the isValidElementType methods in StructType or
+ // ArrayType instead.
Assert(!isa<ScalableVectorType>(GV.getValueType()),
"Globals cannot contain scalable vectors", &GV);
+ if (auto *STy = dyn_cast<StructType>(GV.getValueType()))
+ Assert(!STy->containsScalableVectorType(),
+ "Globals cannot contain scalable vectors", &GV);
+
if (!GV.hasInitializer()) {
visitGlobalValue(GV);
return;
@@ -894,7 +913,9 @@ void Verifier::visitDIScope(const DIScope &N) {
void Verifier::visitDISubrange(const DISubrange &N) {
AssertDI(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N);
- AssertDI(N.getRawCountNode() || N.getRawUpperBound(),
+ bool HasAssumedSizedArraySupport = dwarf::isFortran(CurrentSourceLang);
+ AssertDI(HasAssumedSizedArraySupport || N.getRawCountNode() ||
+ N.getRawUpperBound(),
"Subrange must contain count or upperBound", &N);
AssertDI(!N.getRawCountNode() || !N.getRawUpperBound(),
"Subrange can have any one of count or upperBound", &N);
@@ -920,14 +941,43 @@ void Verifier::visitDISubrange(const DISubrange &N) {
"Stride must be signed constant or DIVariable or DIExpression", &N);
}
+void Verifier::visitDIGenericSubrange(const DIGenericSubrange &N) {
+ AssertDI(N.getTag() == dwarf::DW_TAG_generic_subrange, "invalid tag", &N);
+ AssertDI(N.getRawCountNode() || N.getRawUpperBound(),
+ "GenericSubrange must contain count or upperBound", &N);
+ AssertDI(!N.getRawCountNode() || !N.getRawUpperBound(),
+ "GenericSubrange can have any one of count or upperBound", &N);
+ auto *CBound = N.getRawCountNode();
+ AssertDI(!CBound || isa<DIVariable>(CBound) || isa<DIExpression>(CBound),
+ "Count must be signed constant or DIVariable or DIExpression", &N);
+ auto *LBound = N.getRawLowerBound();
+ AssertDI(LBound, "GenericSubrange must contain lowerBound", &N);
+ AssertDI(isa<DIVariable>(LBound) || isa<DIExpression>(LBound),
+ "LowerBound must be signed constant or DIVariable or DIExpression",
+ &N);
+ auto *UBound = N.getRawUpperBound();
+ AssertDI(!UBound || isa<DIVariable>(UBound) || isa<DIExpression>(UBound),
+ "UpperBound must be signed constant or DIVariable or DIExpression",
+ &N);
+ auto *Stride = N.getRawStride();
+ AssertDI(Stride, "GenericSubrange must contain stride", &N);
+ AssertDI(isa<DIVariable>(Stride) || isa<DIExpression>(Stride),
+ "Stride must be signed constant or DIVariable or DIExpression", &N);
+}
+
void Verifier::visitDIEnumerator(const DIEnumerator &N) {
AssertDI(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N);
}
void Verifier::visitDIBasicType(const DIBasicType &N) {
AssertDI(N.getTag() == dwarf::DW_TAG_base_type ||
- N.getTag() == dwarf::DW_TAG_unspecified_type,
+ N.getTag() == dwarf::DW_TAG_unspecified_type ||
+ N.getTag() == dwarf::DW_TAG_string_type,
"invalid tag", &N);
+}
+
+void Verifier::visitDIStringType(const DIStringType &N) {
+ AssertDI(N.getTag() == dwarf::DW_TAG_string_type, "invalid tag", &N);
AssertDI(!(N.isBigEndian() && N.isLittleEndian()) ,
"has conflicting flags", &N);
}
@@ -1020,12 +1070,6 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
if (auto *Params = N.getRawTemplateParams())
visitTemplateParams(N, *Params);
- if (N.getTag() == dwarf::DW_TAG_class_type ||
- N.getTag() == dwarf::DW_TAG_union_type) {
- AssertDI(N.getFile() && !N.getFile()->getFilename().empty(),
- "class/union requires a filename", &N, N.getFile());
- }
-
if (auto *D = N.getRawDiscriminator()) {
AssertDI(isa<DIDerivedType>(D) && N.getTag() == dwarf::DW_TAG_variant_part,
"discriminator can only appear on variant part");
@@ -1035,6 +1079,21 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
AssertDI(N.getTag() == dwarf::DW_TAG_array_type,
"dataLocation can only appear in array type");
}
+
+ if (N.getRawAssociated()) {
+ AssertDI(N.getTag() == dwarf::DW_TAG_array_type,
+ "associated can only appear in array type");
+ }
+
+ if (N.getRawAllocated()) {
+ AssertDI(N.getTag() == dwarf::DW_TAG_array_type,
+ "allocated can only appear in array type");
+ }
+
+ if (N.getRawRank()) {
+ AssertDI(N.getTag() == dwarf::DW_TAG_array_type,
+ "rank can only appear in array type");
+ }
}
void Verifier::visitDISubroutineType(const DISubroutineType &N) {
@@ -1084,6 +1143,8 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) {
AssertDI(!N.getFile()->getFilename().empty(), "invalid filename", &N,
N.getFile());
+ CurrentSourceLang = (dwarf::SourceLanguage)N.getSourceLanguage();
+
verifySourceDebugInfo(N, *N.getFile());
AssertDI((N.getEmissionKind() <= DICompileUnit::LastEmissionKind),
@@ -1525,8 +1586,8 @@ void Verifier::visitModuleFlagCGProfileEntry(const MDOperand &MDO) {
if (!FuncMDO)
return;
auto F = dyn_cast<ValueAsMetadata>(FuncMDO);
- Assert(F && isa<Function>(F->getValue()), "expected a Function or null",
- FuncMDO);
+ Assert(F && isa<Function>(F->getValue()->stripPointerCasts()),
+ "expected a Function or null", FuncMDO);
};
auto Node = dyn_cast_or_null<MDNode>(MDO);
Assert(Node && Node->getNumOperands() == 3, "expected a MDNode triple", MDO);
@@ -1544,6 +1605,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
case Attribute::NoReturn:
case Attribute::NoSync:
case Attribute::WillReturn:
+ case Attribute::NoCallback:
case Attribute::NoCfCheck:
case Attribute::NoUnwind:
case Attribute::NoInline:
@@ -1572,6 +1634,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
case Attribute::Builtin:
case Attribute::NoBuiltin:
case Attribute::Cold:
+ case Attribute::Hot:
case Attribute::OptForFuzzing:
case Attribute::OptimizeNone:
case Attribute::JumpTable:
@@ -1585,6 +1648,8 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
case Attribute::Speculatable:
case Attribute::StrictFP:
case Attribute::NullPointerIsValid:
+ case Attribute::MustProgress:
+ case Attribute::NoProfile:
return true;
default:
break;
@@ -1652,9 +1717,10 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
AttrCount += Attrs.hasAttribute(Attribute::StructRet) ||
Attrs.hasAttribute(Attribute::InReg);
AttrCount += Attrs.hasAttribute(Attribute::Nest);
+ AttrCount += Attrs.hasAttribute(Attribute::ByRef);
Assert(AttrCount <= 1,
"Attributes 'byval', 'inalloca', 'preallocated', 'inreg', 'nest', "
- "and 'sret' are incompatible!",
+ "'byref', and 'sret' are incompatible!",
V);
Assert(!(Attrs.hasAttribute(Attribute::InAlloca) &&
@@ -1699,17 +1765,6 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
"'noinline and alwaysinline' are incompatible!",
V);
- if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
- Assert(Attrs.getByValType() == cast<PointerType>(Ty)->getElementType(),
- "Attribute 'byval' type does not match parameter!", V);
- }
-
- if (Attrs.hasAttribute(Attribute::Preallocated)) {
- Assert(Attrs.getPreallocatedType() ==
- cast<PointerType>(Ty)->getElementType(),
- "Attribute 'preallocated' type does not match parameter!", V);
- }
-
AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs),
"Wrong types for attribute: " +
@@ -1720,9 +1775,10 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
SmallPtrSet<Type*, 4> Visited;
if (!PTy->getElementType()->isSized(&Visited)) {
Assert(!Attrs.hasAttribute(Attribute::ByVal) &&
- !Attrs.hasAttribute(Attribute::InAlloca) &&
- !Attrs.hasAttribute(Attribute::Preallocated),
- "Attributes 'byval', 'inalloca', and 'preallocated' do not "
+ !Attrs.hasAttribute(Attribute::ByRef) &&
+ !Attrs.hasAttribute(Attribute::InAlloca) &&
+ !Attrs.hasAttribute(Attribute::Preallocated),
+ "Attributes 'byval', 'byref', 'inalloca', and 'preallocated' do not "
"support unsized types!",
V);
}
@@ -1731,10 +1787,28 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
"Attribute 'swifterror' only applies to parameters "
"with pointer to pointer type!",
V);
+
+ if (Attrs.hasAttribute(Attribute::ByRef)) {
+ Assert(Attrs.getByRefType() == PTy->getElementType(),
+ "Attribute 'byref' type does not match parameter!", V);
+ }
+
+ if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
+ Assert(Attrs.getByValType() == PTy->getElementType(),
+ "Attribute 'byval' type does not match parameter!", V);
+ }
+
+ if (Attrs.hasAttribute(Attribute::Preallocated)) {
+ Assert(Attrs.getPreallocatedType() == PTy->getElementType(),
+ "Attribute 'preallocated' type does not match parameter!", V);
+ }
} else {
Assert(!Attrs.hasAttribute(Attribute::ByVal),
"Attribute 'byval' only applies to parameters with pointer type!",
V);
+ Assert(!Attrs.hasAttribute(Attribute::ByRef),
+ "Attribute 'byref' only applies to parameters with pointer type!",
+ V);
Assert(!Attrs.hasAttribute(Attribute::SwiftError),
"Attribute 'swifterror' only applies to parameters "
"with pointer type!",
@@ -1765,10 +1839,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
!RetAttrs.hasAttribute(Attribute::Returned) &&
!RetAttrs.hasAttribute(Attribute::InAlloca) &&
!RetAttrs.hasAttribute(Attribute::Preallocated) &&
+ !RetAttrs.hasAttribute(Attribute::ByRef) &&
!RetAttrs.hasAttribute(Attribute::SwiftSelf) &&
!RetAttrs.hasAttribute(Attribute::SwiftError)),
- "Attributes 'byval', 'inalloca', 'preallocated', 'nest', 'sret', "
- "'nocapture', 'nofree', "
+ "Attributes 'byval', 'inalloca', 'preallocated', 'byref', "
+ "'nest', 'sret', 'nocapture', 'nofree', "
"'returned', 'swiftself', and 'swifterror' do not apply to return "
"values!",
V);
@@ -2102,39 +2177,22 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
Call);
const int NumTransitionArgs =
cast<ConstantInt>(NumTransitionArgsV)->getZExtValue();
- Assert(NumTransitionArgs >= 0,
- "gc.statepoint number of transition arguments must be positive", Call);
+ Assert(NumTransitionArgs == 0,
+ "gc.statepoint w/inline transition bundle is deprecated", Call);
const int EndTransitionArgsInx = EndCallArgsInx + 1 + NumTransitionArgs;
- // We're migrating away from inline operands to operand bundles, enforce
- // the either/or property during transition.
- if (Call.getOperandBundle(LLVMContext::OB_gc_transition)) {
- Assert(NumTransitionArgs == 0,
- "can't use both deopt operands and deopt bundle on a statepoint");
- }
-
const Value *NumDeoptArgsV = Call.getArgOperand(EndTransitionArgsInx + 1);
Assert(isa<ConstantInt>(NumDeoptArgsV),
"gc.statepoint number of deoptimization arguments "
"must be constant integer",
Call);
const int NumDeoptArgs = cast<ConstantInt>(NumDeoptArgsV)->getZExtValue();
- Assert(NumDeoptArgs >= 0,
- "gc.statepoint number of deoptimization arguments "
- "must be positive",
- Call);
+ Assert(NumDeoptArgs == 0,
+ "gc.statepoint w/inline deopt operands is deprecated", Call);
- // We're migrating away from inline operands to operand bundles, enforce
- // the either/or property during transition.
- if (Call.getOperandBundle(LLVMContext::OB_deopt)) {
- Assert(NumDeoptArgs == 0,
- "can't use both deopt operands and deopt bundle on a statepoint");
- }
-
- const int ExpectedNumArgs =
- 7 + NumCallArgs + NumTransitionArgs + NumDeoptArgs;
- Assert(ExpectedNumArgs <= (int)Call.arg_size(),
- "gc.statepoint too few arguments according to length fields", Call);
+ const int ExpectedNumArgs = 7 + NumCallArgs;
+ Assert(ExpectedNumArgs == (int)Call.arg_size(),
+ "gc.statepoint too many arguments", Call);
// Check that the only uses of this gc.statepoint are gc.result or
// gc.relocate calls which are tied to this statepoint and thus part
@@ -2280,6 +2338,11 @@ void Verifier::visitFunction(const Function &F) {
default:
case CallingConv::C:
break;
+ case CallingConv::X86_INTR: {
+ Assert(F.arg_empty() || Attrs.hasParamAttribute(0, Attribute::ByVal),
+ "Calling convention parameter requires byval", &F);
+ break;
+ }
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
Assert(F.getReturnType()->isVoidTy(),
@@ -2292,6 +2355,28 @@ void Verifier::visitFunction(const Function &F) {
case CallingConv::AMDGPU_CS:
Assert(!F.hasStructRetAttr(),
"Calling convention does not allow sret", &F);
+ if (F.getCallingConv() != CallingConv::SPIR_KERNEL) {
+ const unsigned StackAS = DL.getAllocaAddrSpace();
+ unsigned i = 0;
+ for (const Argument &Arg : F.args()) {
+ Assert(!Attrs.hasParamAttribute(i, Attribute::ByVal),
+ "Calling convention disallows byval", &F);
+ Assert(!Attrs.hasParamAttribute(i, Attribute::Preallocated),
+ "Calling convention disallows preallocated", &F);
+ Assert(!Attrs.hasParamAttribute(i, Attribute::InAlloca),
+ "Calling convention disallows inalloca", &F);
+
+ if (Attrs.hasParamAttribute(i, Attribute::ByRef)) {
+ // FIXME: Should also disallow LDS and GDS, but we don't have the enum
+ // value here.
+ Assert(Arg.getType()->getPointerAddressSpace() != StackAS,
+ "Calling convention disallows stack byref", &F);
+ }
+
+ ++i;
+ }
+ }
+
LLVM_FALLTHROUGH;
case CallingConv::Fast:
case CallingConv::Cold:
@@ -2394,6 +2479,10 @@ void Verifier::visitFunction(const Function &F) {
"function must have a single !dbg attachment", &F, I.second);
AssertDI(isa<DISubprogram>(I.second),
"function !dbg attachment must be a subprogram", &F, I.second);
+ AssertDI(cast<DISubprogram>(I.second)->isDistinct(),
+ "function definition may only have a distinct !dbg attachment",
+ &F);
+
auto *SP = cast<DISubprogram>(I.second);
const Function *&AttachedTo = DISubprogramAttachments[SP];
AssertDI(!AttachedTo || AttachedTo == &F,
@@ -2488,15 +2577,10 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
// Check constraints that this basic block imposes on all of the PHI nodes in
// it.
if (isa<PHINode>(BB.front())) {
- SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
+ SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
llvm::sort(Preds);
for (const PHINode &PN : BB.phis()) {
- // Ensure that PHI nodes have at least one entry!
- Assert(PN.getNumIncomingValues() != 0,
- "PHI nodes must have at least one entry. If the block is dead, "
- "the PHI should be removed!",
- &PN);
Assert(PN.getNumIncomingValues() == Preds.size(),
"PHINode should have one entry for each predecessor of its "
"parent basic block!",
@@ -2887,8 +2971,8 @@ void Verifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
Assert(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(),
"AddrSpaceCast must be between different address spaces", &I);
if (auto *SrcVTy = dyn_cast<VectorType>(SrcTy))
- Assert(SrcVTy->getNumElements() ==
- cast<VectorType>(DestTy)->getNumElements(),
+ Assert(SrcVTy->getElementCount() ==
+ cast<VectorType>(DestTy)->getElementCount(),
"AddrSpaceCast vector pointer number of elements mismatch", &I);
visitInstruction(I);
}
@@ -3174,17 +3258,19 @@ static bool isTypeCongruent(Type *L, Type *R) {
static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) {
static const Attribute::AttrKind ABIAttrs[] = {
- Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
- Attribute::InReg, Attribute::SwiftSelf, Attribute::SwiftError,
- Attribute::Preallocated};
+ Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
+ Attribute::InReg, Attribute::SwiftSelf, Attribute::SwiftError,
+ Attribute::Preallocated, Attribute::ByRef};
AttrBuilder Copy;
for (auto AK : ABIAttrs) {
if (Attrs.hasParamAttribute(I, AK))
Copy.addAttribute(AK);
}
- // `align` is ABI-affecting only in combination with `byval`.
+
+ // `align` is ABI-affecting only in combination with `byval` or `byref`.
if (Attrs.hasParamAttribute(I, Attribute::Alignment) &&
- Attrs.hasParamAttribute(I, Attribute::ByVal))
+ (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
+ Attrs.hasParamAttribute(I, Attribute::ByRef)))
Copy.addAlignmentAttr(Attrs.getParamAlignment(I));
return Copy;
}
@@ -3420,7 +3506,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
"GEP base pointer is not a vector or a vector of pointers", &GEP);
Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
- SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
+ SmallVector<Value *, 16> Idxs(GEP.indices());
Assert(all_of(
Idxs, [](Value* V) { return V->getType()->isIntOrIntVectorTy(); }),
"GEP indexes must be integers", &GEP);
@@ -4205,6 +4291,14 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
}
}
+void Verifier::visitAnnotationMetadata(MDNode *Annotation) {
+ Assert(isa<MDTuple>(Annotation), "annotation must be a tuple");
+ Assert(Annotation->getNumOperands() >= 1,
+ "annotation must have at least one operand");
+ for (const MDOperand &Op : Annotation->operands())
+ Assert(isa<MDString>(Op.get()), "operands must be strings");
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@@ -4274,7 +4368,7 @@ void Verifier::visitInstruction(Instruction &I) {
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
- F->getIntrinsicID() == Intrinsic::wasm_rethrow_in_catch,
+ F->getIntrinsicID() == Intrinsic::wasm_rethrow,
"Cannot invoke an intrinsic other than donothing, patchpoint, "
"statepoint, coro_resume or coro_destroy",
&I);
@@ -4365,6 +4459,9 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *MD = I.getMetadata(LLVMContext::MD_prof))
visitProfMetadata(I, MD);
+ if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation))
+ visitAnnotationMetadata(Annotation);
+
if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
AssertDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
visitMDNode(*N, AreDebugLocsAllowed::Yes);
@@ -4449,21 +4546,32 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Assert(Elem.Tag->getKey() == "ignore" ||
Attribute::isExistingAttribute(Elem.Tag->getKey()),
"tags must be valid attribute names");
- Assert(Elem.End - Elem.Begin <= 2, "to many arguments");
Attribute::AttrKind Kind =
Attribute::getAttrKindFromName(Elem.Tag->getKey());
+ unsigned ArgCount = Elem.End - Elem.Begin;
+ if (Kind == Attribute::Alignment) {
+ Assert(ArgCount <= 3 && ArgCount >= 2,
+ "alignment assumptions should have 2 or 3 arguments");
+ Assert(Call.getOperand(Elem.Begin)->getType()->isPointerTy(),
+ "first argument should be a pointer");
+ Assert(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(),
+ "second argument should be an integer");
+ if (ArgCount == 3)
+ Assert(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(),
+ "third argument should be an integer if present");
+ return;
+ }
+ Assert(ArgCount <= 2, "to many arguments");
if (Kind == Attribute::None)
break;
if (Attribute::doesAttrKindHaveArgument(Kind)) {
- Assert(Elem.End - Elem.Begin == 2,
- "this attribute should have 2 arguments");
+ Assert(ArgCount == 2, "this attribute should have 2 arguments");
Assert(isa<ConstantInt>(Call.getOperand(Elem.Begin + 1)),
"the second argument should be a constant integral value");
} else if (isFuncOnlyAttr(Kind)) {
- Assert((Elem.End - Elem.Begin) == 0, "this attribute has no argument");
+ Assert((ArgCount) == 0, "this attribute has no argument");
} else if (!isFuncOrArgAttr(Kind)) {
- Assert((Elem.End - Elem.Begin) == 1,
- "this attribute should have one argument");
+ Assert((ArgCount) == 1, "this attribute should have one argument");
}
}
break;
@@ -4774,45 +4882,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"gc.relocate: statepoint base index out of bounds", Call);
Assert(DerivedIndex < Opt->Inputs.size(),
"gc.relocate: statepoint derived index out of bounds", Call);
- } else {
- Assert(BaseIndex < StatepointCall.arg_size(),
- "gc.relocate: statepoint base index out of bounds", Call);
- Assert(DerivedIndex < StatepointCall.arg_size(),
- "gc.relocate: statepoint derived index out of bounds", Call);
-
- // Check that BaseIndex and DerivedIndex fall within the 'gc parameters'
- // section of the statepoint's argument.
- Assert(StatepointCall.arg_size() > 0,
- "gc.statepoint: insufficient arguments");
- Assert(isa<ConstantInt>(StatepointCall.getArgOperand(3)),
- "gc.statement: number of call arguments must be constant integer");
- const uint64_t NumCallArgs =
- cast<ConstantInt>(StatepointCall.getArgOperand(3))->getZExtValue();
- Assert(StatepointCall.arg_size() > NumCallArgs + 5,
- "gc.statepoint: mismatch in number of call arguments");
- Assert(isa<ConstantInt>(StatepointCall.getArgOperand(NumCallArgs + 5)),
- "gc.statepoint: number of transition arguments must be "
- "a constant integer");
- const uint64_t NumTransitionArgs =
- cast<ConstantInt>(StatepointCall.getArgOperand(NumCallArgs + 5))
- ->getZExtValue();
- const uint64_t DeoptArgsStart = 4 + NumCallArgs + 1 + NumTransitionArgs + 1;
- Assert(isa<ConstantInt>(StatepointCall.getArgOperand(DeoptArgsStart)),
- "gc.statepoint: number of deoptimization arguments must be "
- "a constant integer");
- const uint64_t NumDeoptArgs =
- cast<ConstantInt>(StatepointCall.getArgOperand(DeoptArgsStart))
- ->getZExtValue();
- const uint64_t GCParamArgsStart = DeoptArgsStart + 1 + NumDeoptArgs;
- const uint64_t GCParamArgsEnd = StatepointCall.arg_size();
- Assert(GCParamArgsStart <= BaseIndex && BaseIndex < GCParamArgsEnd,
- "gc.relocate: statepoint base index doesn't fall within the "
- "'gc parameters' section of the statepoint call",
- Call);
- Assert(GCParamArgsStart <= DerivedIndex && DerivedIndex < GCParamArgsEnd,
- "gc.relocate: statepoint derived index doesn't fall within the "
- "'gc parameters' section of the statepoint call",
- Call);
}
// Relocated value must be either a pointer type or vector-of-pointer type,
@@ -4941,15 +5010,17 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::sadd_sat:
case Intrinsic::uadd_sat:
case Intrinsic::ssub_sat:
- case Intrinsic::usub_sat: {
+ case Intrinsic::usub_sat:
+ case Intrinsic::sshl_sat:
+ case Intrinsic::ushl_sat: {
Value *Op1 = Call.getArgOperand(0);
Value *Op2 = Call.getArgOperand(1);
Assert(Op1->getType()->isIntOrIntVectorTy(),
- "first operand of [us][add|sub]_sat must be an int type or vector "
- "of ints");
+ "first operand of [us][add|sub|shl]_sat must be an int type or "
+ "vector of ints");
Assert(Op2->getType()->isIntOrIntVectorTy(),
- "second operand of [us][add|sub]_sat must be an int type or vector "
- "of ints");
+ "second operand of [us][add|sub|shl]_sat must be an int type or "
+ "vector of ints");
break;
}
case Intrinsic::smul_fix:
@@ -5002,6 +5073,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Assert(Size % 16 == 0, "bswap must be an even number of bytes", &Call);
break;
}
+ case Intrinsic::invariant_start: {
+ ConstantInt *InvariantSize = dyn_cast<ConstantInt>(Call.getArgOperand(0));
+ Assert(InvariantSize &&
+ (!InvariantSize->isNegative() || InvariantSize->isMinusOne()),
+ "invariant_start parameter must be -1, 0 or a positive number",
+ &Call);
+ break;
+ }
case Intrinsic::matrix_multiply:
case Intrinsic::matrix_transpose:
case Intrinsic::matrix_column_major_load:
@@ -5065,7 +5144,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"Vector element type mismatch of the result and second operand "
"vector!", IF);
- Assert(ResultTy->getNumElements() ==
+ Assert(cast<FixedVectorType>(ResultTy)->getNumElements() ==
NumRows->getZExtValue() * NumColumns->getZExtValue(),
"Result of a matrix operation does not fit in the returned vector!");
@@ -5075,6 +5154,30 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
+ case Intrinsic::experimental_vector_insert: {
+ VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
+ VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
+
+ Assert(VecTy->getElementType() == SubVecTy->getElementType(),
+ "experimental_vector_insert parameters must have the same element "
+ "type.",
+ &Call);
+ break;
+ }
+ case Intrinsic::experimental_vector_extract: {
+ VectorType *ResultTy = cast<VectorType>(Call.getType());
+ VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
+
+ Assert(ResultTy->getElementType() == VecTy->getElementType(),
+ "experimental_vector_extract result must have the same element "
+ "type as the input vector.",
+ &Call);
+ break;
+ }
+ case Intrinsic::experimental_noalias_scope_decl: {
+ NoAliasScopeDecls.push_back(cast<IntrinsicInst>(&Call));
+ break;
+ }
};
}
@@ -5151,7 +5254,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
Assert(Operand->getType()->isFPOrFPVectorTy(),
"Intrinsic first argument must be floating point", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- NumSrcElem = OperandT->getNumElements();
+ NumSrcElem = cast<FixedVectorType>(OperandT)->getNumElements();
}
Operand = &FPI;
@@ -5160,7 +5263,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
Assert(Operand->getType()->isIntOrIntVectorTy(),
"Intrinsic result must be an integer", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- Assert(NumSrcElem == OperandT->getNumElements(),
+ Assert(NumSrcElem == cast<FixedVectorType>(OperandT)->getNumElements(),
"Intrinsic first argument and result vector lengths must be equal",
&FPI);
}
@@ -5174,7 +5277,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
Assert(Operand->getType()->isIntOrIntVectorTy(),
"Intrinsic first argument must be integer", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- NumSrcElem = OperandT->getNumElements();
+ NumSrcElem = cast<FixedVectorType>(OperandT)->getNumElements();
}
Operand = &FPI;
@@ -5183,7 +5286,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
Assert(Operand->getType()->isFPOrFPVectorTy(),
"Intrinsic result must be a floating point", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- Assert(NumSrcElem == OperandT->getNumElements(),
+ Assert(NumSrcElem == cast<FixedVectorType>(OperandT)->getNumElements(),
"Intrinsic first argument and result vector lengths must be equal",
&FPI);
}
@@ -5202,9 +5305,8 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
Assert(OperandTy->isVectorTy() == ResultTy->isVectorTy(),
"Intrinsic first argument and result disagree on vector use", &FPI);
if (OperandTy->isVectorTy()) {
- auto *OperandVecTy = cast<VectorType>(OperandTy);
- auto *ResultVecTy = cast<VectorType>(ResultTy);
- Assert(OperandVecTy->getNumElements() == ResultVecTy->getNumElements(),
+ Assert(cast<FixedVectorType>(OperandTy)->getNumElements() ==
+ cast<FixedVectorType>(ResultTy)->getNumElements(),
"Intrinsic first argument and result vector lengths must be equal",
&FPI);
}
@@ -5426,6 +5528,75 @@ void Verifier::verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F) {
"inconsistent use of embedded source");
}
+void Verifier::verifyNoAliasScopeDecl() {
+ if (NoAliasScopeDecls.empty())
+ return;
+
+ // only a single scope must be declared at a time.
+ for (auto *II : NoAliasScopeDecls) {
+ assert(II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl &&
+ "Not a llvm.experimental.noalias.scope.decl ?");
+ const auto *ScopeListMV = dyn_cast<MetadataAsValue>(
+ II->getOperand(Intrinsic::NoAliasScopeDeclScopeArg));
+ Assert(ScopeListMV != nullptr,
+ "llvm.experimental.noalias.scope.decl must have a MetadataAsValue "
+ "argument",
+ II);
+
+ const auto *ScopeListMD = dyn_cast<MDNode>(ScopeListMV->getMetadata());
+ Assert(ScopeListMD != nullptr, "!id.scope.list must point to an MDNode",
+ II);
+ Assert(ScopeListMD->getNumOperands() == 1,
+ "!id.scope.list must point to a list with a single scope", II);
+ }
+
+ // Only check the domination rule when requested. Once all passes have been
+ // adapted this option can go away.
+ if (!VerifyNoAliasScopeDomination)
+ return;
+
+ // Now sort the intrinsics based on the scope MDNode so that declarations of
+ // the same scopes are next to each other.
+ auto GetScope = [](IntrinsicInst *II) {
+ const auto *ScopeListMV = cast<MetadataAsValue>(
+ II->getOperand(Intrinsic::NoAliasScopeDeclScopeArg));
+ return &cast<MDNode>(ScopeListMV->getMetadata())->getOperand(0);
+ };
+
+ // We are sorting on MDNode pointers here. For valid input IR this is ok.
+ // TODO: Sort on Metadata ID to avoid non-deterministic error messages.
+ auto Compare = [GetScope](IntrinsicInst *Lhs, IntrinsicInst *Rhs) {
+ return GetScope(Lhs) < GetScope(Rhs);
+ };
+
+ llvm::sort(NoAliasScopeDecls, Compare);
+
+ // Go over the intrinsics and check that for the same scope, they are not
+ // dominating each other.
+ auto ItCurrent = NoAliasScopeDecls.begin();
+ while (ItCurrent != NoAliasScopeDecls.end()) {
+ auto CurScope = GetScope(*ItCurrent);
+ auto ItNext = ItCurrent;
+ do {
+ ++ItNext;
+ } while (ItNext != NoAliasScopeDecls.end() &&
+ GetScope(*ItNext) == CurScope);
+
+ // [ItCurrent, ItNext) represents the declarations for the same scope.
+ // Ensure they are not dominating each other.. but only if it is not too
+ // expensive.
+ if (ItNext - ItCurrent < 32)
+ for (auto *I : llvm::make_range(ItCurrent, ItNext))
+ for (auto *J : llvm::make_range(ItCurrent, ItNext))
+ if (I != J)
+ Assert(!DT.dominates(I, J),
+ "llvm.experimental.noalias.scope.decl dominates another one "
+ "with the same scope",
+ I);
+ ItCurrent = ItNext;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Implement the public interfaces to this file...
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/IRReader/IRReader.cpp b/contrib/llvm-project/llvm/lib/IRReader/IRReader.cpp
index cb33e40be61b..e7fd835f8ad0 100644
--- a/contrib/llvm-project/llvm/lib/IRReader/IRReader.cpp
+++ b/contrib/llvm-project/llvm/lib/IRReader/IRReader.cpp
@@ -24,10 +24,10 @@ namespace llvm {
extern bool TimePassesIsEnabled;
}
-static const char *const TimeIRParsingGroupName = "irparse";
-static const char *const TimeIRParsingGroupDescription = "LLVM IR Parsing";
-static const char *const TimeIRParsingName = "parse";
-static const char *const TimeIRParsingDescription = "Parse IR";
+const char TimeIRParsingGroupName[] = "irparse";
+const char TimeIRParsingGroupDescription[] = "LLVM IR Parsing";
+const char TimeIRParsingName[] = "parse";
+const char TimeIRParsingDescription[] = "Parse IR";
std::unique_ptr<Module>
llvm::getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
diff --git a/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp
new file mode 100644
index 000000000000..255d301362eb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp
@@ -0,0 +1,680 @@
+//===- ELFObjHandler.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------------===/
+
+#include "llvm/InterfaceStub/ELFObjHandler.h"
+#include "llvm/InterfaceStub/ELFStub.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+
+using llvm::MemoryBufferRef;
+using llvm::object::ELFObjectFile;
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+
+namespace llvm {
+namespace elfabi {
+
+// Simple struct to hold relevant .dynamic entries.
+struct DynamicEntries {
+ uint64_t StrTabAddr = 0;
+ uint64_t StrSize = 0;
+ Optional<uint64_t> SONameOffset;
+ std::vector<uint64_t> NeededLibNames;
+ // Symbol table:
+ uint64_t DynSymAddr = 0;
+ // Hash tables:
+ Optional<uint64_t> ElfHash;
+ Optional<uint64_t> GnuHash;
+};
+
+/// This initializes an ELF file header with information specific to a binary
+/// dynamic shared object.
+/// Offsets, indexes, links, etc. for section and program headers are just
+/// zero-initialized as they will be updated elsewhere.
+///
+/// @param ElfHeader Target ELFT::Ehdr to populate.
+/// @param Machine Target architecture (e_machine from ELF specifications).
+template <class ELFT>
+static void initELFHeader(typename ELFT::Ehdr &ElfHeader, uint16_t Machine) {
+ memset(&ElfHeader, 0, sizeof(ElfHeader));
+ // ELF identification.
+ ElfHeader.e_ident[EI_MAG0] = ElfMagic[EI_MAG0];
+ ElfHeader.e_ident[EI_MAG1] = ElfMagic[EI_MAG1];
+ ElfHeader.e_ident[EI_MAG2] = ElfMagic[EI_MAG2];
+ ElfHeader.e_ident[EI_MAG3] = ElfMagic[EI_MAG3];
+ ElfHeader.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32;
+ bool IsLittleEndian = ELFT::TargetEndianness == support::little;
+ ElfHeader.e_ident[EI_DATA] = IsLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
+ ElfHeader.e_ident[EI_VERSION] = EV_CURRENT;
+ ElfHeader.e_ident[EI_OSABI] = ELFOSABI_NONE;
+
+ // Remainder of ELF header.
+ ElfHeader.e_type = ET_DYN;
+ ElfHeader.e_machine = Machine;
+ ElfHeader.e_version = EV_CURRENT;
+ ElfHeader.e_ehsize = sizeof(typename ELFT::Ehdr);
+ ElfHeader.e_phentsize = sizeof(typename ELFT::Phdr);
+ ElfHeader.e_shentsize = sizeof(typename ELFT::Shdr);
+}
+
+namespace {
+template <class ELFT> struct OutputSection {
+ using Elf_Shdr = typename ELFT::Shdr;
+ std::string Name;
+ Elf_Shdr Shdr;
+ uint64_t Addr;
+ uint64_t Offset;
+ uint64_t Size;
+ uint64_t Align;
+ uint32_t Index;
+ bool NoBits = true;
+};
+
+template <class T, class ELFT>
+struct ContentSection : public OutputSection<ELFT> {
+ T Content;
+ ContentSection() { this->NoBits = false; }
+};
+
+// This class just wraps StringTableBuilder for the purpose of adding a
+// default constructor.
+class ELFStringTableBuilder : public StringTableBuilder {
+public:
+ ELFStringTableBuilder() : StringTableBuilder(StringTableBuilder::ELF) {}
+};
+
+template <class ELFT> class ELFSymbolTableBuilder {
+public:
+ using Elf_Sym = typename ELFT::Sym;
+
+ ELFSymbolTableBuilder() { Symbols.push_back({}); }
+
+ void add(size_t StNameOffset, uint64_t StSize, uint8_t StBind, uint8_t StType,
+ uint8_t StOther, uint16_t StShndx) {
+ Elf_Sym S{};
+ S.st_name = StNameOffset;
+ S.st_size = StSize;
+ S.st_info = (StBind << 4) | (StType & 0xf);
+ S.st_other = StOther;
+ S.st_shndx = StShndx;
+ Symbols.push_back(S);
+ }
+
+ size_t getSize() const { return Symbols.size() * sizeof(Elf_Sym); }
+
+ void write(uint8_t *Buf) const {
+ memcpy(Buf, Symbols.data(), sizeof(Elf_Sym) * Symbols.size());
+ }
+
+private:
+ llvm::SmallVector<Elf_Sym, 8> Symbols;
+};
+
+template <class ELFT> class ELFDynamicTableBuilder {
+public:
+ using Elf_Dyn = typename ELFT::Dyn;
+
+ size_t addAddr(uint64_t Tag, uint64_t Addr) {
+ Elf_Dyn Entry;
+ Entry.d_tag = Tag;
+ Entry.d_un.d_ptr = Addr;
+ Entries.push_back(Entry);
+ return Entries.size() - 1;
+ }
+
+ void modifyAddr(size_t Index, uint64_t Addr) {
+ Entries[Index].d_un.d_ptr = Addr;
+ }
+
+ size_t addValue(uint64_t Tag, uint64_t Value) {
+ Elf_Dyn Entry;
+ Entry.d_tag = Tag;
+ Entry.d_un.d_val = Value;
+ Entries.push_back(Entry);
+ return Entries.size() - 1;
+ }
+
+ void modifyValue(size_t Index, uint64_t Value) {
+ Entries[Index].d_un.d_val = Value;
+ }
+
+ size_t getSize() const {
+ // Add DT_NULL entry at the end.
+ return (Entries.size() + 1) * sizeof(Elf_Dyn);
+ }
+
+ void write(uint8_t *Buf) const {
+ memcpy(Buf, Entries.data(), sizeof(Elf_Dyn) * Entries.size());
+ // Add DT_NULL entry at the end.
+ memset(Buf + sizeof(Elf_Dyn) * Entries.size(), 0, sizeof(Elf_Dyn));
+ }
+
+private:
+ llvm::SmallVector<Elf_Dyn, 8> Entries;
+};
+
+template <class ELFT> class ELFStubBuilder {
+public:
+ using Elf_Ehdr = typename ELFT::Ehdr;
+ using Elf_Shdr = typename ELFT::Shdr;
+ using Elf_Phdr = typename ELFT::Phdr;
+ using Elf_Sym = typename ELFT::Sym;
+ using Elf_Addr = typename ELFT::Addr;
+ using Elf_Dyn = typename ELFT::Dyn;
+
+ ELFStubBuilder(const ELFStubBuilder &) = delete;
+ ELFStubBuilder(ELFStubBuilder &&) = default;
+
+ explicit ELFStubBuilder(const ELFStub &Stub) {
+ DynSym.Name = ".dynsym";
+ DynSym.Align = sizeof(Elf_Addr);
+ DynStr.Name = ".dynstr";
+ DynStr.Align = 1;
+ DynTab.Name = ".dynamic";
+ DynTab.Align = sizeof(Elf_Addr);
+ ShStrTab.Name = ".shstrtab";
+ ShStrTab.Align = 1;
+
+ // Populate string tables.
+ for (const ELFSymbol &Sym : Stub.Symbols)
+ DynStr.Content.add(Sym.Name);
+ for (const std::string &Lib : Stub.NeededLibs)
+ DynStr.Content.add(Lib);
+ if (Stub.SoName)
+ DynStr.Content.add(Stub.SoName.getValue());
+
+ std::vector<OutputSection<ELFT> *> Sections = {&DynSym, &DynStr, &DynTab,
+ &ShStrTab};
+ const OutputSection<ELFT> *LastSection = Sections.back();
+ // Now set the Index and put sections names into ".shstrtab".
+ uint64_t Index = 1;
+ for (OutputSection<ELFT> *Sec : Sections) {
+ Sec->Index = Index++;
+ ShStrTab.Content.add(Sec->Name);
+ }
+ ShStrTab.Content.finalize();
+ ShStrTab.Size = ShStrTab.Content.getSize();
+ DynStr.Content.finalize();
+ DynStr.Size = DynStr.Content.getSize();
+
+ // Populate dynamic symbol table.
+ for (const ELFSymbol &Sym : Stub.Symbols) {
+ uint8_t Bind = Sym.Weak ? STB_WEAK : STB_GLOBAL;
+ // For non-undefined symbols, value of the shndx is not relevant at link
+ // time as long as it is not SHN_UNDEF. Set shndx to 1, which
+ // points to ".dynsym".
+ uint16_t Shndx = Sym.Undefined ? SHN_UNDEF : 1;
+ DynSym.Content.add(DynStr.Content.getOffset(Sym.Name), Sym.Size, Bind,
+ (uint8_t)Sym.Type, 0, Shndx);
+ }
+ DynSym.Size = DynSym.Content.getSize();
+
+ // Poplulate dynamic table.
+ size_t DynSymIndex = DynTab.Content.addAddr(DT_SYMTAB, 0);
+ size_t DynStrIndex = DynTab.Content.addAddr(DT_STRTAB, 0);
+ for (const std::string &Lib : Stub.NeededLibs)
+ DynTab.Content.addValue(DT_NEEDED, DynStr.Content.getOffset(Lib));
+ if (Stub.SoName)
+ DynTab.Content.addValue(DT_SONAME,
+ DynStr.Content.getOffset(Stub.SoName.getValue()));
+ DynTab.Size = DynTab.Content.getSize();
+ // Calculate sections' addresses and offsets.
+ uint64_t CurrentOffset = sizeof(Elf_Ehdr);
+ for (OutputSection<ELFT> *Sec : Sections) {
+ Sec->Offset = alignTo(CurrentOffset, Sec->Align);
+ Sec->Addr = Sec->Offset;
+ CurrentOffset = Sec->Offset + Sec->Size;
+ }
+ // Fill Addr back to dynamic table.
+ DynTab.Content.modifyAddr(DynSymIndex, DynSym.Addr);
+ DynTab.Content.modifyAddr(DynStrIndex, DynStr.Addr);
+ // Write section headers of string tables.
+ fillSymTabShdr(DynSym, SHT_DYNSYM);
+ fillStrTabShdr(DynStr, SHF_ALLOC);
+ fillDynTabShdr(DynTab);
+ fillStrTabShdr(ShStrTab);
+
+ // Finish initializing the ELF header.
+ initELFHeader<ELFT>(ElfHeader, Stub.Arch);
+ ElfHeader.e_shstrndx = ShStrTab.Index;
+ ElfHeader.e_shnum = LastSection->Index + 1;
+ ElfHeader.e_shoff =
+ alignTo(LastSection->Offset + LastSection->Size, sizeof(Elf_Addr));
+ }
+
+ size_t getSize() const {
+ return ElfHeader.e_shoff + ElfHeader.e_shnum * sizeof(Elf_Shdr);
+ }
+
+ void write(uint8_t *Data) const {
+ write(Data, ElfHeader);
+ DynSym.Content.write(Data + DynSym.Shdr.sh_offset);
+ DynStr.Content.write(Data + DynStr.Shdr.sh_offset);
+ DynTab.Content.write(Data + DynTab.Shdr.sh_offset);
+ ShStrTab.Content.write(Data + ShStrTab.Shdr.sh_offset);
+ writeShdr(Data, DynSym);
+ writeShdr(Data, DynStr);
+ writeShdr(Data, DynTab);
+ writeShdr(Data, ShStrTab);
+ }
+
+private:
+ Elf_Ehdr ElfHeader;
+ ContentSection<ELFStringTableBuilder, ELFT> DynStr;
+ ContentSection<ELFStringTableBuilder, ELFT> ShStrTab;
+ ContentSection<ELFSymbolTableBuilder<ELFT>, ELFT> DynSym;
+ ContentSection<ELFDynamicTableBuilder<ELFT>, ELFT> DynTab;
+
+ template <class T> static void write(uint8_t *Data, const T &Value) {
+ *reinterpret_cast<T *>(Data) = Value;
+ }
+
+ void fillStrTabShdr(ContentSection<ELFStringTableBuilder, ELFT> &StrTab,
+ uint32_t ShFlags = 0) const {
+ StrTab.Shdr.sh_type = SHT_STRTAB;
+ StrTab.Shdr.sh_flags = ShFlags;
+ StrTab.Shdr.sh_addr = StrTab.Addr;
+ StrTab.Shdr.sh_offset = StrTab.Offset;
+ StrTab.Shdr.sh_info = 0;
+ StrTab.Shdr.sh_size = StrTab.Size;
+ StrTab.Shdr.sh_name = ShStrTab.Content.getOffset(StrTab.Name);
+ StrTab.Shdr.sh_addralign = StrTab.Align;
+ StrTab.Shdr.sh_entsize = 0;
+ StrTab.Shdr.sh_link = 0;
+ }
+ void fillSymTabShdr(ContentSection<ELFSymbolTableBuilder<ELFT>, ELFT> &SymTab,
+ uint32_t ShType) const {
+ SymTab.Shdr.sh_type = ShType;
+ SymTab.Shdr.sh_flags = SHF_ALLOC;
+ SymTab.Shdr.sh_addr = SymTab.Addr;
+ SymTab.Shdr.sh_offset = SymTab.Offset;
+ SymTab.Shdr.sh_info = SymTab.Size / sizeof(Elf_Sym) > 1 ? 1 : 0;
+ SymTab.Shdr.sh_size = SymTab.Size;
+ SymTab.Shdr.sh_name = this->ShStrTab.Content.getOffset(SymTab.Name);
+ SymTab.Shdr.sh_addralign = SymTab.Align;
+ SymTab.Shdr.sh_entsize = sizeof(Elf_Sym);
+ SymTab.Shdr.sh_link = this->DynStr.Index;
+ }
+ void fillDynTabShdr(
+ ContentSection<ELFDynamicTableBuilder<ELFT>, ELFT> &DynTab) const {
+ DynTab.Shdr.sh_type = SHT_DYNAMIC;
+ DynTab.Shdr.sh_flags = SHF_ALLOC;
+ DynTab.Shdr.sh_addr = DynTab.Addr;
+ DynTab.Shdr.sh_offset = DynTab.Offset;
+ DynTab.Shdr.sh_info = 0;
+ DynTab.Shdr.sh_size = DynTab.Size;
+ DynTab.Shdr.sh_name = this->ShStrTab.Content.getOffset(DynTab.Name);
+ DynTab.Shdr.sh_addralign = DynTab.Align;
+ DynTab.Shdr.sh_entsize = sizeof(Elf_Dyn);
+ DynTab.Shdr.sh_link = this->DynStr.Index;
+ }
+ uint64_t shdrOffset(const OutputSection<ELFT> &Sec) const {
+ return ElfHeader.e_shoff + Sec.Index * sizeof(Elf_Shdr);
+ }
+
+ void writeShdr(uint8_t *Data, const OutputSection<ELFT> &Sec) const {
+ write(Data + shdrOffset(Sec), Sec.Shdr);
+ }
+};
+} // end anonymous namespace
+
+/// This function behaves similarly to StringRef::substr(), but attempts to
+/// terminate the returned StringRef at the first null terminator. If no null
+/// terminator is found, an error is returned.
+///
+/// @param Str Source string to create a substring from.
+/// @param Offset The start index of the desired substring.
+static Expected<StringRef> terminatedSubstr(StringRef Str, size_t Offset) {
+ size_t StrEnd = Str.find('\0', Offset);
+ if (StrEnd == StringLiteral::npos) {
+ return createError(
+ "String overran bounds of string table (no null terminator)");
+ }
+
+ size_t StrLen = StrEnd - Offset;
+ return Str.substr(Offset, StrLen);
+}
+
+/// This function takes an error, and appends a string of text to the end of
+/// that error. Since "appending" to an Error isn't supported behavior of an
+/// Error, this function technically creates a new error with the combined
+/// message and consumes the old error.
+///
+/// @param Err Source error.
+/// @param After Text to append at the end of Err's error message.
+Error appendToError(Error Err, StringRef After) {
+ std::string Message;
+ raw_string_ostream Stream(Message);
+ Stream << Err;
+ Stream << " " << After;
+ consumeError(std::move(Err));
+ return createError(Stream.str().c_str());
+}
+
+/// This function populates a DynamicEntries struct using an ELFT::DynRange.
+/// After populating the struct, the members are validated with
+/// some basic sanity checks.
+///
+/// @param Dyn Target DynamicEntries struct to populate.
+/// @param DynTable Source dynamic table.
+template <class ELFT>
+static Error populateDynamic(DynamicEntries &Dyn,
+ typename ELFT::DynRange DynTable) {
+ if (DynTable.empty())
+ return createError("No .dynamic section found");
+
+ // Search .dynamic for relevant entries.
+ bool FoundDynStr = false;
+ bool FoundDynStrSz = false;
+ bool FoundDynSym = false;
+ for (auto &Entry : DynTable) {
+ switch (Entry.d_tag) {
+ case DT_SONAME:
+ Dyn.SONameOffset = Entry.d_un.d_val;
+ break;
+ case DT_STRTAB:
+ Dyn.StrTabAddr = Entry.d_un.d_ptr;
+ FoundDynStr = true;
+ break;
+ case DT_STRSZ:
+ Dyn.StrSize = Entry.d_un.d_val;
+ FoundDynStrSz = true;
+ break;
+ case DT_NEEDED:
+ Dyn.NeededLibNames.push_back(Entry.d_un.d_val);
+ break;
+ case DT_SYMTAB:
+ Dyn.DynSymAddr = Entry.d_un.d_ptr;
+ FoundDynSym = true;
+ break;
+ case DT_HASH:
+ Dyn.ElfHash = Entry.d_un.d_ptr;
+ break;
+ case DT_GNU_HASH:
+ Dyn.GnuHash = Entry.d_un.d_ptr;
+ }
+ }
+
+ if (!FoundDynStr) {
+ return createError(
+ "Couldn't locate dynamic string table (no DT_STRTAB entry)");
+ }
+ if (!FoundDynStrSz) {
+ return createError(
+ "Couldn't determine dynamic string table size (no DT_STRSZ entry)");
+ }
+ if (!FoundDynSym) {
+ return createError(
+ "Couldn't locate dynamic symbol table (no DT_SYMTAB entry)");
+ }
+ if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) {
+ return createStringError(object_error::parse_failed,
+ "DT_SONAME string offset (0x%016" PRIx64
+ ") outside of dynamic string table",
+ *Dyn.SONameOffset);
+ }
+ for (uint64_t Offset : Dyn.NeededLibNames) {
+ if (Offset >= Dyn.StrSize) {
+ return createStringError(object_error::parse_failed,
+ "DT_NEEDED string offset (0x%016" PRIx64
+ ") outside of dynamic string table",
+ Offset);
+ }
+ }
+
+ return Error::success();
+}
+
+/// This function extracts symbol type from a symbol's st_info member and
+/// maps it to an ELFSymbolType enum.
+/// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported.
+/// Other symbol types are mapped to ELFSymbolType::Unknown.
+///
+/// @param Info Binary symbol st_info to extract symbol type from.
+static ELFSymbolType convertInfoToType(uint8_t Info) {
+ Info = Info & 0xf;
+ switch (Info) {
+ case ELF::STT_NOTYPE:
+ return ELFSymbolType::NoType;
+ case ELF::STT_OBJECT:
+ return ELFSymbolType::Object;
+ case ELF::STT_FUNC:
+ return ELFSymbolType::Func;
+ case ELF::STT_TLS:
+ return ELFSymbolType::TLS;
+ default:
+ return ELFSymbolType::Unknown;
+ }
+}
+
+/// This function creates an ELFSymbol and populates all members using
+/// information from a binary ELFT::Sym.
+///
+/// @param SymName The desired name of the ELFSymbol.
+/// @param RawSym ELFT::Sym to extract symbol information from.
+template <class ELFT>
+static ELFSymbol createELFSym(StringRef SymName,
+ const typename ELFT::Sym &RawSym) {
+ ELFSymbol TargetSym{std::string(SymName)};
+ uint8_t Binding = RawSym.getBinding();
+ if (Binding == STB_WEAK)
+ TargetSym.Weak = true;
+ else
+ TargetSym.Weak = false;
+
+ TargetSym.Undefined = RawSym.isUndefined();
+ TargetSym.Type = convertInfoToType(RawSym.st_info);
+
+ if (TargetSym.Type == ELFSymbolType::Func) {
+ TargetSym.Size = 0;
+ } else {
+ TargetSym.Size = RawSym.st_size;
+ }
+ return TargetSym;
+}
+
+/// This function populates an ELFStub with symbols using information read
+/// from an ELF binary.
+///
+/// @param TargetStub ELFStub to add symbols to.
+/// @param DynSym Range of dynamic symbols to add to TargetStub.
+/// @param DynStr StringRef to the dynamic string table.
+template <class ELFT>
+static Error populateSymbols(ELFStub &TargetStub,
+ const typename ELFT::SymRange DynSym,
+ StringRef DynStr) {
+ // Skips the first symbol since it's the NULL symbol.
+ for (auto RawSym : DynSym.drop_front(1)) {
+ // If a symbol does not have global or weak binding, ignore it.
+ uint8_t Binding = RawSym.getBinding();
+ if (!(Binding == STB_GLOBAL || Binding == STB_WEAK))
+ continue;
+ // If a symbol doesn't have default or protected visibility, ignore it.
+ uint8_t Visibility = RawSym.getVisibility();
+ if (!(Visibility == STV_DEFAULT || Visibility == STV_PROTECTED))
+ continue;
+ // Create an ELFSymbol and populate it with information from the symbol
+ // table entry.
+ Expected<StringRef> SymName = terminatedSubstr(DynStr, RawSym.st_name);
+ if (!SymName)
+ return SymName.takeError();
+ ELFSymbol Sym = createELFSym<ELFT>(*SymName, RawSym);
+ TargetStub.Symbols.insert(std::move(Sym));
+ // TODO: Populate symbol warning.
+ }
+ return Error::success();
+}
+
+/// Returns a new ELFStub with all members populated from an ELFObjectFile.
+/// @param ElfObj Source ELFObjectFile.
+template <class ELFT>
+static Expected<std::unique_ptr<ELFStub>>
+buildStub(const ELFObjectFile<ELFT> &ElfObj) {
+ using Elf_Dyn_Range = typename ELFT::DynRange;
+ using Elf_Phdr_Range = typename ELFT::PhdrRange;
+ using Elf_Sym_Range = typename ELFT::SymRange;
+ using Elf_Sym = typename ELFT::Sym;
+ std::unique_ptr<ELFStub> DestStub = std::make_unique<ELFStub>();
+ const ELFFile<ELFT> &ElfFile = ElfObj.getELFFile();
+ // Fetch .dynamic table.
+ Expected<Elf_Dyn_Range> DynTable = ElfFile.dynamicEntries();
+ if (!DynTable) {
+ return DynTable.takeError();
+ }
+
+ // Fetch program headers.
+ Expected<Elf_Phdr_Range> PHdrs = ElfFile.program_headers();
+ if (!PHdrs) {
+ return PHdrs.takeError();
+ }
+
+ // Collect relevant .dynamic entries.
+ DynamicEntries DynEnt;
+ if (Error Err = populateDynamic<ELFT>(DynEnt, *DynTable))
+ return std::move(Err);
+
+ // Get pointer to in-memory location of .dynstr section.
+ Expected<const uint8_t *> DynStrPtr = ElfFile.toMappedAddr(DynEnt.StrTabAddr);
+ if (!DynStrPtr)
+ return appendToError(DynStrPtr.takeError(),
+ "when locating .dynstr section contents");
+
+ StringRef DynStr(reinterpret_cast<const char *>(DynStrPtr.get()),
+ DynEnt.StrSize);
+
+ // Populate Arch from ELF header.
+ DestStub->Arch = ElfFile.getHeader().e_machine;
+
+ // Populate SoName from .dynamic entries and dynamic string table.
+ if (DynEnt.SONameOffset.hasValue()) {
+ Expected<StringRef> NameOrErr =
+ terminatedSubstr(DynStr, *DynEnt.SONameOffset);
+ if (!NameOrErr) {
+ return appendToError(NameOrErr.takeError(), "when reading DT_SONAME");
+ }
+ DestStub->SoName = std::string(*NameOrErr);
+ }
+
+ // Populate NeededLibs from .dynamic entries and dynamic string table.
+ for (uint64_t NeededStrOffset : DynEnt.NeededLibNames) {
+ Expected<StringRef> LibNameOrErr =
+ terminatedSubstr(DynStr, NeededStrOffset);
+ if (!LibNameOrErr) {
+ return appendToError(LibNameOrErr.takeError(), "when reading DT_NEEDED");
+ }
+ DestStub->NeededLibs.push_back(std::string(*LibNameOrErr));
+ }
+
+ // Populate Symbols from .dynsym table and dynamic string table.
+ Expected<uint64_t> SymCount = ElfFile.getDynSymtabSize();
+ if (!SymCount)
+ return SymCount.takeError();
+ if (*SymCount > 0) {
+ // Get pointer to in-memory location of .dynsym section.
+ Expected<const uint8_t *> DynSymPtr =
+ ElfFile.toMappedAddr(DynEnt.DynSymAddr);
+ if (!DynSymPtr)
+ return appendToError(DynSymPtr.takeError(),
+ "when locating .dynsym section contents");
+ Elf_Sym_Range DynSyms = ArrayRef<Elf_Sym>(
+ reinterpret_cast<const Elf_Sym *>(*DynSymPtr), *SymCount);
+ Error SymReadError = populateSymbols<ELFT>(*DestStub, DynSyms, DynStr);
+ if (SymReadError)
+ return appendToError(std::move(SymReadError),
+ "when reading dynamic symbols");
+ }
+
+ return std::move(DestStub);
+}
+
+/// This function opens a file for writing and then writes a binary ELF stub to
+/// the file.
+///
+/// @param FilePath File path for writing the ELF binary.
+/// @param Stub Source ELFStub to generate a binary ELF stub from.
+template <class ELFT>
+static Error writeELFBinaryToFile(StringRef FilePath, const ELFStub &Stub,
+ bool WriteIfChanged) {
+ ELFStubBuilder<ELFT> Builder{Stub};
+ // Write Stub to memory first.
+ std::vector<uint8_t> Buf(Builder.getSize());
+ Builder.write(Buf.data());
+
+ if (WriteIfChanged) {
+ if (ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrError =
+ MemoryBuffer::getFile(FilePath)) {
+ // Compare Stub output with existing Stub file.
+ // If Stub file unchanged, abort updating.
+ if ((*BufOrError)->getBufferSize() == Builder.getSize() &&
+ !memcmp((*BufOrError)->getBufferStart(), Buf.data(),
+ Builder.getSize()))
+ return Error::success();
+ }
+ }
+
+ Expected<std::unique_ptr<FileOutputBuffer>> BufOrError =
+ FileOutputBuffer::create(FilePath, Builder.getSize());
+ if (!BufOrError)
+ return createStringError(errc::invalid_argument,
+ toString(BufOrError.takeError()) +
+ " when trying to open `" + FilePath +
+ "` for writing");
+
+ // Write binary to file.
+ std::unique_ptr<FileOutputBuffer> FileBuf = std::move(*BufOrError);
+ memcpy(FileBuf->getBufferStart(), Buf.data(), Buf.size());
+
+ return FileBuf->commit();
+}
+
+Expected<std::unique_ptr<ELFStub>> readELFFile(MemoryBufferRef Buf) {
+ Expected<std::unique_ptr<Binary>> BinOrErr = createBinary(Buf);
+ if (!BinOrErr) {
+ return BinOrErr.takeError();
+ }
+
+ Binary *Bin = BinOrErr->get();
+ if (auto Obj = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
+ return buildStub(*Obj);
+ } else if (auto Obj = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
+ return buildStub(*Obj);
+ } else if (auto Obj = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
+ return buildStub(*Obj);
+ } else if (auto Obj = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
+ return buildStub(*Obj);
+ }
+ return createStringError(errc::not_supported, "unsupported binary format");
+}
+
+// This function wraps the ELFT writeELFBinaryToFile() so writeBinaryStub()
+// can be called without having to use ELFType templates directly.
+Error writeBinaryStub(StringRef FilePath, const ELFStub &Stub,
+ ELFTarget OutputFormat, bool WriteIfChanged) {
+ if (OutputFormat == ELFTarget::ELF32LE)
+ return writeELFBinaryToFile<ELF32LE>(FilePath, Stub, WriteIfChanged);
+ if (OutputFormat == ELFTarget::ELF32BE)
+ return writeELFBinaryToFile<ELF32BE>(FilePath, Stub, WriteIfChanged);
+ if (OutputFormat == ELFTarget::ELF64LE)
+ return writeELFBinaryToFile<ELF64LE>(FilePath, Stub, WriteIfChanged);
+ if (OutputFormat == ELFTarget::ELF64BE)
+ return writeELFBinaryToFile<ELF64BE>(FilePath, Stub, WriteIfChanged);
+ llvm_unreachable("invalid binary output target");
+}
+
+} // end namespace elfabi
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/ELF/ELFStub.cpp b/contrib/llvm-project/llvm/lib/InterfaceStub/ELFStub.cpp
index f8463497093b..3c637695d8e7 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/ELF/ELFStub.cpp
+++ b/contrib/llvm-project/llvm/lib/InterfaceStub/ELFStub.cpp
@@ -6,7 +6,7 @@
//
//===-----------------------------------------------------------------------===/
-#include "llvm/TextAPI/ELF/ELFStub.h"
+#include "llvm/InterfaceStub/ELFStub.h"
using namespace llvm;
using namespace llvm::elfabi;
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/ELF/TBEHandler.cpp b/contrib/llvm-project/llvm/lib/InterfaceStub/TBEHandler.cpp
index cb597d8896e8..ee95d21ee661 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/ELF/TBEHandler.cpp
+++ b/contrib/llvm-project/llvm/lib/InterfaceStub/TBEHandler.cpp
@@ -6,12 +6,12 @@
//
//===-----------------------------------------------------------------------===/
-#include "llvm/TextAPI/ELF/TBEHandler.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/InterfaceStub/TBEHandler.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/InterfaceStub/ELFStub.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/YAMLTraits.h"
-#include "llvm/TextAPI/ELF/ELFStub.h"
using namespace llvm;
using namespace llvm::elfabi;
@@ -69,28 +69,6 @@ template <> struct ScalarTraits<ELFArchMapper> {
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
};
-/// YAML traits for TbeVersion.
-template <> struct ScalarTraits<VersionTuple> {
- static void output(const VersionTuple &Value, void *,
- llvm::raw_ostream &Out) {
- Out << Value.getAsString();
- }
-
- static StringRef input(StringRef Scalar, void *, VersionTuple &Value) {
- if (Value.tryParse(Scalar))
- return StringRef("Can't parse version: invalid version format.");
-
- if (Value > TBEVersionCurrent)
- return StringRef("Unsupported TBE version.");
-
- // Returning empty StringRef indicates successful parse.
- return StringRef();
- }
-
- // Don't place quotation marks around version value.
- static QuotingType mustQuote(StringRef) { return QuotingType::None; }
-};
-
/// YAML traits for ELFSymbol.
template <> struct MappingTraits<ELFSymbol> {
static void mapping(IO &IO, ELFSymbol &Symbol) {
@@ -149,6 +127,11 @@ Expected<std::unique_ptr<ELFStub>> elfabi::readTBEFromBuffer(StringRef Buf) {
if (std::error_code Err = YamlIn.error())
return createStringError(Err, "YAML failed reading as TBE");
+ if (Stub->TbeVersion > elfabi::TBEVersionCurrent)
+ return make_error<StringError>(
+ "TBE version " + Stub->TbeVersion.getAsString() + " is unsupported.",
+ std::make_error_code(std::errc::invalid_argument));
+
return std::move(Stub);
}
diff --git a/contrib/llvm-project/llvm/lib/LTO/Caching.cpp b/contrib/llvm-project/llvm/lib/LTO/Caching.cpp
index 46cac3fb1830..75a89e729f43 100644
--- a/contrib/llvm-project/llvm/lib/LTO/Caching.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/Caching.cpp
@@ -13,6 +13,7 @@
#include "llvm/LTO/Caching.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTO.cpp b/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
index 6e1e3998e490..9103d11059e0 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
@@ -798,7 +798,7 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
for (GlobalValue *GV : Mod.Keep) {
if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) {
if (Function *F = dyn_cast<Function>(GV)) {
- OptimizationRemarkEmitter ORE(F);
+ OptimizationRemarkEmitter ORE(F, nullptr);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F)
<< ore::NV("Function", F)
<< " not added to the combined module ");
@@ -983,7 +983,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
// Setup optimization remarks.
auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename,
- Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness);
+ Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness,
+ Conf.RemarksHotnessThreshold);
if (!DiagFileOrErr)
return DiagFileOrErr.takeError();
@@ -1107,6 +1108,7 @@ public:
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
virtual Error wait() = 0;
+ virtual unsigned getThreadCount() = 0;
};
namespace {
@@ -1221,6 +1223,10 @@ public:
else
return Error::success();
}
+
+ unsigned getThreadCount() override {
+ return BackendThreadPool.getThreadCount();
+ }
};
} // end anonymous namespace
@@ -1309,6 +1315,10 @@ public:
}
Error wait() override { return Error::success(); }
+
+ // WriteIndexesThinBackend should always return 1 to prevent module
+ // re-ordering and avoid non-determinism in the final link.
+ unsigned getThreadCount() override { return 1; }
};
} // end anonymous namespace
@@ -1443,23 +1453,44 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
auto &ModuleMap =
ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
- // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined
- // module and parallel code generation partitions.
- unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel;
- for (auto &Mod : ModuleMap) {
- if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
- ExportLists[Mod.first],
- ResolvedODR[Mod.first], ThinLTO.ModuleMap))
- return E;
- ++Task;
- }
+ auto ProcessOneModule = [&](int I) -> Error {
+ auto &Mod = *(ModuleMap.begin() + I);
+ // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for
+ // combined module and parallel code generation partitions.
+ return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I,
+ Mod.second, ImportLists[Mod.first],
+ ExportLists[Mod.first], ResolvedODR[Mod.first],
+ ThinLTO.ModuleMap);
+ };
+ if (BackendProc->getThreadCount() == 1) {
+ // Process the modules in the order they were provided on the command-line.
+ // It is important for this codepath to be used for WriteIndexesThinBackend,
+ // to ensure the emitted LinkedObjectsFile lists ThinLTO objects in the same
+ // order as the inputs, which otherwise would affect the final link order.
+ for (int I = 0, E = ModuleMap.size(); I != E; ++I)
+ if (Error E = ProcessOneModule(I))
+ return E;
+ } else {
+ // When executing in parallel, process largest bitsize modules first to
+ // improve parallelism, and avoid starving the thread pool near the end.
+ // This saves about 15 sec on a 36-core machine while link `clang.exe` (out
+ // of 100 sec).
+ std::vector<BitcodeModule *> ModulesVec;
+ ModulesVec.reserve(ModuleMap.size());
+ for (auto &Mod : ModuleMap)
+ ModulesVec.push_back(&Mod.second);
+ for (int I : generateModulesOrdering(ModulesVec))
+ if (Error E = ProcessOneModule(I))
+ return E;
+ }
return BackendProc->wait();
}
Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
- StringRef RemarksFormat, bool RemarksWithHotness, int Count) {
+ StringRef RemarksFormat, bool RemarksWithHotness,
+ Optional<uint64_t> RemarksHotnessThreshold, int Count) {
std::string Filename = std::string(RemarksFilename);
// For ThinLTO, file.opt.<format> becomes
// file.opt.<format>.thin.<num>.<format>.
@@ -1469,7 +1500,8 @@ Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
.str();
auto ResultOrErr = llvm::setupLLVMOptimizationRemarks(
- Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness);
+ Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness,
+ RemarksHotnessThreshold);
if (Error E = ResultOrErr.takeError())
return std::move(E);
@@ -1495,3 +1527,18 @@ lto::setupStatsFile(StringRef StatsFilename) {
StatsFile->keep();
return std::move(StatsFile);
}
+
+// Compute the ordering we will process the inputs: the rough heuristic here
+// is to sort them per size so that the largest module get schedule as soon as
+// possible. This is purely a compile-time optimization.
+std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
+ std::vector<int> ModulesOrdering;
+ ModulesOrdering.resize(R.size());
+ std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+ llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+ auto LSize = R[LeftIndex]->getBuffer().size();
+ auto RSize = R[RightIndex]->getBuffer().size();
+ return LSize > RSize;
+ });
+ return ModulesOrdering;
+}
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp b/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp
index 0c395f9bbf28..1796d6ba60cc 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp
@@ -50,6 +50,30 @@
using namespace llvm;
using namespace lto;
+#define DEBUG_TYPE "lto-backend"
+
+enum class LTOBitcodeEmbedding {
+ DoNotEmbed = 0,
+ EmbedOptimized = 1,
+ EmbedPostMergePreOptimized = 2
+};
+
+static cl::opt<LTOBitcodeEmbedding> EmbedBitcode(
+ "lto-embed-bitcode", cl::init(LTOBitcodeEmbedding::DoNotEmbed),
+ cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none",
+ "Do not embed"),
+ clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized",
+ "Embed after all optimization passes"),
+ clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized,
+ "post-merge-pre-opt",
+ "Embed post merge, but before optimizations")),
+ cl::desc("Embed LLVM bitcode in object files produced by LTO"));
+
+static cl::opt<bool> ThinLTOAssumeMerged(
+ "thinlto-assume-merged", cl::init(false),
+ cl::desc("Assume the input has already undergone ThinLTO function "
+ "importing and the other pre-optimization pipeline changes."));
+
LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
errs() << "failed to open " << Path << ": " << Msg << '\n';
errs().flush();
@@ -152,9 +176,7 @@ static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins,
}
}
-namespace {
-
-std::unique_ptr<TargetMachine>
+static std::unique_ptr<TargetMachine>
createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) {
StringRef TheTriple = M.getTargetTriple();
SubtargetFeatures Features;
@@ -175,9 +197,11 @@ createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) {
else
CodeModel = M.getCodeModel();
- return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
+ std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
CodeModel, Conf.CGOptLevel));
+ assert(TM && "Failed to create target machine");
+ return TM;
}
static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
@@ -197,9 +221,9 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
}
PassInstrumentationCallbacks PIC;
- StandardInstrumentations SI;
+ StandardInstrumentations SI(Conf.DebugPassManager);
SI.registerCallbacks(PIC);
- PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC);
+ PassBuilder PB(Conf.DebugPassManager, TM, Conf.PTO, PGOOpt, &PIC);
AAManager AA;
// Parse a custom AA pipeline if asked to.
@@ -213,6 +237,12 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
CGSCCAnalysisManager CGAM(Conf.DebugPassManager);
ModuleAnalysisManager MAM(Conf.DebugPassManager);
+ std::unique_ptr<TargetLibraryInfoImpl> TLII(
+ new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
+ if (Conf.Freestanding)
+ TLII->disableAllFunctions();
+ FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] { return std::move(AA); });
@@ -224,7 +254,9 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
ModulePassManager MPM(Conf.DebugPassManager);
- // FIXME (davide): verify the input.
+
+ if (!Conf.DisableVerify)
+ MPM.addPass(VerifierPass());
PassBuilder::OptimizationLevel OL;
@@ -246,20 +278,21 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
}
if (IsThinLTO)
- MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager,
- ImportSummary);
+ MPM.addPass(PB.buildThinLTODefaultPipeline(OL, ImportSummary));
else
- MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager, ExportSummary);
- MPM.run(Mod, MAM);
+ MPM.addPass(PB.buildLTODefaultPipeline(OL, ExportSummary));
- // FIXME (davide): verify the output.
+ if (!Conf.DisableVerify)
+ MPM.addPass(VerifierPass());
+
+ MPM.run(Mod, MAM);
}
static void runNewPMCustomPasses(const Config &Conf, Module &Mod,
TargetMachine *TM, std::string PipelineDesc,
std::string AAPipelineDesc,
bool DisableVerify) {
- PassBuilder PB(TM);
+ PassBuilder PB(Conf.DebugPassManager, TM);
AAManager AA;
// Parse a custom AA pipeline if asked to.
@@ -275,6 +308,12 @@ static void runNewPMCustomPasses(const Config &Conf, Module &Mod,
CGSCCAnalysisManager CGAM;
ModuleAnalysisManager MAM;
+ std::unique_ptr<TargetLibraryInfoImpl> TLII(
+ new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
+ if (Conf.Freestanding)
+ TLII->disableAllFunctions();
+ FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] { return std::move(AA); });
@@ -308,6 +347,8 @@ static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
PassManagerBuilder PMB;
PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
+ if (Conf.Freestanding)
+ PMB.LibraryInfo->disableAllFunctions();
PMB.Inliner = createFunctionInliningPass();
PMB.ExportSummary = ExportSummary;
PMB.ImportSummary = ImportSummary;
@@ -331,9 +372,27 @@ static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
passes.run(Mod);
}
-bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
- bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary) {
+bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
+ bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary,
+ const std::vector<uint8_t> &CmdArgs) {
+ if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) {
+ // FIXME: the motivation for capturing post-merge bitcode and command line
+ // is replicating the compilation environment from bitcode, without needing
+ // to understand the dependencies (the functions to be imported). This
+ // assumes a clang - based invocation, case in which we have the command
+ // line.
+ // It's not very clear how the above motivation would map in the
+ // linker-based case, so we currently don't plumb the command line args in
+ // that case.
+ if (CmdArgs.empty())
+ LLVM_DEBUG(
+ dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but "
+ "command line arguments are not available");
+ llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
+ /*EmbedBitcode*/ true, /*EmbedCmdline*/ true,
+ /*Cmdline*/ CmdArgs);
+ }
// FIXME: Plumb the combined index into the new pass manager.
if (!Conf.OptPipeline.empty())
runNewPMCustomPasses(Conf, Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
@@ -346,30 +405,17 @@ bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
}
-static cl::opt<bool> EmbedBitcode(
- "lto-embed-bitcode", cl::init(false),
- cl::desc("Embed LLVM bitcode in object files produced by LTO"));
-
-static void EmitBitcodeSection(Module &M, const Config &Conf) {
- if (!EmbedBitcode)
- return;
- SmallVector<char, 0> Buffer;
- raw_svector_ostream OS(Buffer);
- WriteBitcodeToFile(M, OS);
-
- std::unique_ptr<MemoryBuffer> Buf(
- new SmallVectorMemoryBuffer(std::move(Buffer)));
- llvm::EmbedBitcodeInModule(M, Buf->getMemBufferRef(), /*EmbedBitcode*/ true,
- /*EmbedMarker*/ false, /*CmdArgs*/ nullptr);
-}
-
-void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
- unsigned Task, Module &Mod,
- const ModuleSummaryIndex &CombinedIndex) {
+static void codegen(const Config &Conf, TargetMachine *TM,
+ AddStreamFn AddStream, unsigned Task, Module &Mod,
+ const ModuleSummaryIndex &CombinedIndex) {
if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
return;
- EmitBitcodeSection(Mod, Conf);
+ if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized)
+ llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
+ /*EmbedBitcode*/ true,
+ /*EmbedCmdline*/ false,
+ /*CmdArgs*/ std::vector<uint8_t>());
std::unique_ptr<ToolOutputFile> DwoOut;
SmallString<1024> DwoFile(Conf.SplitDwarfOutput);
@@ -396,6 +442,8 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
legacy::PassManager CodeGenPasses;
CodeGenPasses.add(
createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex));
+ if (Conf.PreCodeGenPassesHook)
+ Conf.PreCodeGenPassesHook(CodeGenPasses);
if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
DwoOut ? &DwoOut->os() : nullptr,
Conf.CGFileType))
@@ -406,10 +454,11 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
DwoOut->keep();
}
-void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream,
- unsigned ParallelCodeGenParallelismLevel,
- std::unique_ptr<Module> Mod,
- const ModuleSummaryIndex &CombinedIndex) {
+static void splitCodeGen(const Config &C, TargetMachine *TM,
+ AddStreamFn AddStream,
+ unsigned ParallelCodeGenParallelismLevel,
+ std::unique_ptr<Module> Mod,
+ const ModuleSummaryIndex &CombinedIndex) {
ThreadPool CodegenThreadPool(
heavyweight_hardware_concurrency(ParallelCodeGenParallelismLevel));
unsigned ThreadCount = 0;
@@ -457,7 +506,8 @@ void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream,
CodegenThreadPool.wait();
}
-Expected<const Target *> initAndLookupTarget(const Config &C, Module &Mod) {
+static Expected<const Target *> initAndLookupTarget(const Config &C,
+ Module &Mod) {
if (!C.OverrideTriple.empty())
Mod.setTargetTriple(C.OverrideTriple);
else if (Mod.getTargetTriple().empty())
@@ -469,7 +519,6 @@ Expected<const Target *> initAndLookupTarget(const Config &C, Module &Mod) {
return make_error<StringError>(Msg, inconvertibleErrorCode());
return T;
}
-}
Error lto::finalizeOptimizationRemarks(
std::unique_ptr<ToolOutputFile> DiagOutputFile) {
@@ -494,7 +543,8 @@ Error lto::backend(const Config &C, AddStreamFn AddStream,
if (!C.CodeGenOnly) {
if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false,
- /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr))
+ /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr,
+ /*CmdArgs*/ std::vector<uint8_t>()))
return Error::success();
}
@@ -532,7 +582,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
Module &Mod, const ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
- MapVector<StringRef, BitcodeModule> &ModuleMap) {
+ MapVector<StringRef, BitcodeModule> &ModuleMap,
+ const std::vector<uint8_t> &CmdArgs) {
Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
if (!TOrErr)
return TOrErr.takeError();
@@ -542,7 +593,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
// Setup optimization remarks.
auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
Mod.getContext(), Conf.RemarksFilename, Conf.RemarksPasses,
- Conf.RemarksFormat, Conf.RemarksWithHotness, Task);
+ Conf.RemarksFormat, Conf.RemarksWithHotness, Conf.RemarksHotnessThreshold,
+ Task);
if (!DiagFileOrErr)
return DiagFileOrErr.takeError();
auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
@@ -559,6 +611,21 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ auto OptimizeAndCodegen =
+ [&](Module &Mod, TargetMachine *TM,
+ std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) {
+ if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
+ /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
+ CmdArgs))
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+
+ codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ };
+
+ if (ThinLTOAssumeMerged)
+ return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile));
+
// When linking an ELF shared object, dso_local should be dropped. We
// conservatively do this for -fpic.
bool ClearDSOLocalOnDeclarations =
@@ -599,10 +666,81 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
- if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true,
- /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
- return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile));
+}
+
+BitcodeModule *lto::findThinLTOModule(MutableArrayRef<BitcodeModule> BMs) {
+ if (ThinLTOAssumeMerged && BMs.size() == 1)
+ return BMs.begin();
- codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
- return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ for (BitcodeModule &BM : BMs) {
+ Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
+ if (LTOInfo && LTOInfo->IsThinLTO)
+ return &BM;
+ }
+ return nullptr;
+}
+
+Expected<BitcodeModule> lto::findThinLTOModule(MemoryBufferRef MBRef) {
+ Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef);
+ if (!BMsOrErr)
+ return BMsOrErr.takeError();
+
+ // The bitcode file may contain multiple modules, we want the one that is
+ // marked as being the ThinLTO module.
+ if (const BitcodeModule *Bm = lto::findThinLTOModule(*BMsOrErr))
+ return *Bm;
+
+ return make_error<StringError>("Could not find module summary",
+ inconvertibleErrorCode());
+}
+
+bool lto::loadReferencedModules(
+ const Module &M, const ModuleSummaryIndex &CombinedIndex,
+ FunctionImporter::ImportMapTy &ImportList,
+ MapVector<llvm::StringRef, llvm::BitcodeModule> &ModuleMap,
+ std::vector<std::unique_ptr<llvm::MemoryBuffer>>
+ &OwnedImportsLifetimeManager) {
+ if (ThinLTOAssumeMerged)
+ return true;
+ // We can simply import the values mentioned in the combined index, since
+ // we should only invoke this using the individual indexes written out
+ // via a WriteIndexesThinBackend.
+ for (const auto &GlobalList : CombinedIndex) {
+ // Ignore entries for undefined references.
+ if (GlobalList.second.SummaryList.empty())
+ continue;
+
+ auto GUID = GlobalList.first;
+ for (const auto &Summary : GlobalList.second.SummaryList) {
+ // Skip the summaries for the importing module. These are included to
+ // e.g. record required linkage changes.
+ if (Summary->modulePath() == M.getModuleIdentifier())
+ continue;
+ // Add an entry to provoke importing by thinBackend.
+ ImportList[Summary->modulePath()].insert(GUID);
+ }
+ }
+
+ for (auto &I : ImportList) {
+ ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MBOrErr =
+ llvm::MemoryBuffer::getFile(I.first());
+ if (!MBOrErr) {
+ errs() << "Error loading imported file '" << I.first()
+ << "': " << MBOrErr.getError().message() << "\n";
+ return false;
+ }
+
+ Expected<BitcodeModule> BMOrErr = findThinLTOModule(**MBOrErr);
+ if (!BMOrErr) {
+ handleAllErrors(BMOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+ errs() << "Error loading imported file '" << I.first()
+ << "': " << EIB.message() << '\n';
+ });
+ return false;
+ }
+ ModuleMap.insert({I.first(), *BMOrErr});
+ OwnedImportsLifetimeManager.push_back(std::move(*MBOrErr));
+ }
+ return true;
}
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp
index 25ab1404b4e1..027e197e1e0d 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -43,6 +43,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@@ -87,6 +88,14 @@ cl::opt<bool> RemarksWithHotness(
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
+cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser>
+ RemarksHotnessThreshold(
+ "lto-pass-remarks-hotness-threshold",
+ cl::desc("Minimum profile count required for an "
+ "optimization remark to be output."
+ " Use 'auto' to apply the threshold from profile summary."),
+ cl::value_desc("uint or 'auto'"), cl::init(0), cl::Hidden);
+
cl::opt<std::string>
RemarksFilename("lto-pass-remarks-output",
cl::desc("Output filename for pass remarks"),
@@ -317,22 +326,15 @@ LTOCodeGenerator::compileOptimized() {
return std::move(*BufferOrErr);
}
-bool LTOCodeGenerator::compile_to_file(const char **Name, bool DisableVerify,
- bool DisableInline,
- bool DisableGVNLoadPRE,
- bool DisableVectorization) {
- if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
- DisableVectorization))
+bool LTOCodeGenerator::compile_to_file(const char **Name) {
+ if (!optimize())
return false;
return compileOptimizedToFile(Name);
}
-std::unique_ptr<MemoryBuffer>
-LTOCodeGenerator::compile(bool DisableVerify, bool DisableInline,
- bool DisableGVNLoadPRE, bool DisableVectorization) {
- if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
- DisableVectorization))
+std::unique_ptr<MemoryBuffer> LTOCodeGenerator::compile() {
+ if (!optimize())
return nullptr;
return compileOptimized();
@@ -359,7 +361,7 @@ bool LTOCodeGenerator::determineTarget() {
// Construct LTOModule, hand over ownership of module and target. Use MAttr as
// the default set of features.
- SubtargetFeatures Features(MAttr);
+ SubtargetFeatures Features(join(MAttrs, ""));
Features.getDefaultSubtargetFeatures(Triple);
FeatureStr = Features.getString();
// Set a default CPU for Darwin triples.
@@ -368,16 +370,21 @@ bool LTOCodeGenerator::determineTarget() {
MCpu = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
MCpu = "yonah";
+ else if (Triple.isArm64e())
+ MCpu = "apple-a12";
else if (Triple.getArch() == llvm::Triple::aarch64 ||
Triple.getArch() == llvm::Triple::aarch64_32)
MCpu = "cyclone";
}
TargetMach = createTargetMachine();
+ assert(TargetMach && "Unable to create target machine");
+
return true;
}
std::unique_ptr<TargetMachine> LTOCodeGenerator::createTargetMachine() {
+ assert(MArch && "MArch is not set!");
return std::unique_ptr<TargetMachine>(MArch->createTargetMachine(
TripleStr, MCpu, FeatureStr, Options, RelocModel, None, CGOptLevel));
}
@@ -466,8 +473,6 @@ void LTOCodeGenerator::applyScopeRestrictions() {
internalizeModule(*MergedModule, mustPreserveGV);
- MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
-
ScopeRestrictionsDone = true;
}
@@ -522,15 +527,13 @@ void LTOCodeGenerator::finishOptimizationRemarks() {
}
/// Optimize merged modules using various IPO passes
-bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
- bool DisableGVNLoadPRE,
- bool DisableVectorization) {
+bool LTOCodeGenerator::optimize() {
if (!this->determineTarget())
return false;
- auto DiagFileOrErr =
- lto::setupLLVMOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
- RemarksFormat, RemarksWithHotness);
+ auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
+ Context, RemarksFilename, RemarksPasses, RemarksFormat,
+ RemarksWithHotness, RemarksHotnessThreshold);
if (!DiagFileOrErr) {
errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
report_fatal_error("Can't get an output file for the remarks");
@@ -559,6 +562,9 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
// Mark which symbols can not be internalized
this->applyScopeRestrictions();
+ // Write LTOPostLink flag for passes that require all the modules.
+ MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
+
// Instantiate the pass manager to organize the passes.
legacy::PassManager passes;
@@ -570,11 +576,9 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
Triple TargetTriple(TargetMach->getTargetTriple());
PassManagerBuilder PMB;
- PMB.DisableGVNLoadPRE = DisableGVNLoadPRE;
- PMB.LoopVectorize = !DisableVectorization;
- PMB.SLPVectorize = !DisableVectorization;
- if (!DisableInline)
- PMB.Inliner = createFunctionInliningPass();
+ PMB.LoopVectorize = true;
+ PMB.SLPVectorize = true;
+ PMB.Inliner = createFunctionInliningPass();
PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple);
if (Freestanding)
PMB.LibraryInfo->disableAllFunctions();
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp
index ebe779aea62e..1119622578df 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp
@@ -46,6 +46,7 @@ using namespace llvm::object;
LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef,
llvm::TargetMachine *TM)
: Mod(std::move(M)), MBRef(MBRef), _target(TM) {
+ assert(_target && "target machine is null");
SymTab.addModule(Mod.get());
}
@@ -221,6 +222,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options,
CPU = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
CPU = "yonah";
+ else if (Triple.isArm64e())
+ CPU = "apple-a12";
else if (Triple.getArch() == llvm::Triple::aarch64 ||
Triple.getArch() == llvm::Triple::aarch64_32)
CPU = "cyclone";
diff --git a/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index d0a1e1889c61..38f49693b62e 100644
--- a/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -37,6 +37,7 @@
#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
@@ -75,6 +76,8 @@ extern cl::opt<bool> LTODiscardValueNames;
extern cl::opt<std::string> RemarksFilename;
extern cl::opt<std::string> RemarksPasses;
extern cl::opt<bool> RemarksWithHotness;
+extern cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser>
+ RemarksHotnessThreshold;
extern cl::opt<std::string> RemarksFormat;
}
@@ -269,16 +272,26 @@ addUsedSymbolToPreservedGUID(const lto::InputFile &File,
}
// Convert the PreservedSymbols map from "Name" based to "GUID" based.
+static void computeGUIDPreservedSymbols(const lto::InputFile &File,
+ const StringSet<> &PreservedSymbols,
+ const Triple &TheTriple,
+ DenseSet<GlobalValue::GUID> &GUIDs) {
+ // Iterate the symbols in the input file and if the input has preserved symbol
+ // compute the GUID for the symbol.
+ for (const auto &Sym : File.symbols()) {
+ if (PreservedSymbols.count(Sym.getName()) && !Sym.getIRName().empty())
+ GUIDs.insert(GlobalValue::getGUID(GlobalValue::getGlobalIdentifier(
+ Sym.getIRName(), GlobalValue::ExternalLinkage, "")));
+ }
+}
+
static DenseSet<GlobalValue::GUID>
-computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
+computeGUIDPreservedSymbols(const lto::InputFile &File,
+ const StringSet<> &PreservedSymbols,
const Triple &TheTriple) {
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
- for (auto &Entry : PreservedSymbols) {
- StringRef Name = Entry.first();
- if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
- Name = Name.drop_front();
- GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
- }
+ computeGUIDPreservedSymbols(File, PreservedSymbols, TheTriple,
+ GUIDPreservedSymbols);
return GUIDPreservedSymbols;
}
@@ -565,9 +578,12 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
Features.getDefaultSubtargetFeatures(TheTriple);
std::string FeatureStr = Features.getString();
- return std::unique_ptr<TargetMachine>(
+ std::unique_ptr<TargetMachine> TM(
TheTarget->createTargetMachine(TheTriple.str(), MCpu, FeatureStr, Options,
RelocModel, None, CGOptLevel));
+ assert(TM && "Cannot create target machine");
+
+ return TM;
}
/**
@@ -652,7 +668,7 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
// Add used symbol to the preserved symbols.
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -702,7 +718,7 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -737,7 +753,7 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -770,7 +786,7 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -808,7 +824,7 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols =
- computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+ computeGUIDPreservedSymbols(File, PreservedSymbols, TMBuilder.TheTriple);
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -972,8 +988,10 @@ void ThinLTOCodeGenerator::run() {
// Convert the preserved symbols set from string to GUID, this is needed for
// computing the caching hash and the internalization.
- auto GUIDPreservedSymbols =
- computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+ DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
+ for (const auto &M : Modules)
+ computeGUIDPreservedSymbols(*M, PreservedSymbols, TMBuilder.TheTriple,
+ GUIDPreservedSymbols);
// Add used symbol from inputs to the preserved symbols.
for (const auto &M : Modules)
@@ -1042,19 +1060,11 @@ void ThinLTOCodeGenerator::run() {
ModuleToDefinedGVSummaries[ModuleIdentifier];
}
- // Compute the ordering we will process the inputs: the rough heuristic here
- // is to sort them per size so that the largest module get schedule as soon as
- // possible. This is purely a compile-time optimization.
- std::vector<int> ModulesOrdering;
- ModulesOrdering.resize(Modules.size());
- std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
- llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
- auto LSize =
- Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
- auto RSize =
- Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
- return LSize > RSize;
- });
+ std::vector<BitcodeModule *> ModulesVec;
+ ModulesVec.reserve(Modules.size());
+ for (auto &Mod : Modules)
+ ModulesVec.push_back(&Mod->getSingleBitcodeModule());
+ std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
// Parallel optimizer + codegen
{
@@ -1097,7 +1107,7 @@ void ThinLTOCodeGenerator::run() {
Context.enableDebugTypeODRUniquing();
auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
Context, RemarksFilename, RemarksPasses, RemarksFormat,
- RemarksWithHotness, count);
+ RemarksWithHotness, RemarksHotnessThreshold, count);
if (!DiagFileOrErr) {
errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
report_fatal_error("ThinLTO: Can't get an output file for the "
diff --git a/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp b/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp
index 055689b16e8f..6a2f84bb48a0 100644
--- a/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/TypeFinder.h"
+#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Support/Error.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <utility>
@@ -242,15 +243,6 @@ Type *TypeMapTy::get(Type *Ty, SmallPtrSet<StructType *, 8> &Visited) {
bool IsUniqued = !isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral();
if (!IsUniqued) {
- StructType *STy = cast<StructType>(Ty);
- // This is actually a type from the destination module, this can be reached
- // when this type is loaded in another module, added to DstStructTypesSet,
- // and then we reach the same type in another module where it has not been
- // added to MappedTypes. (PR37684)
- if (STy->getContext().isODRUniquingDebugTypes() && !STy->isOpaque() &&
- DstStructTypesSet.hasType(STy))
- return *Entry = STy;
-
#ifndef NDEBUG
for (auto &Pair : MappedTypes) {
assert(!(Pair.first != Ty && Pair.second == Ty) &&
@@ -258,7 +250,7 @@ Type *TypeMapTy::get(Type *Ty, SmallPtrSet<StructType *, 8> &Visited) {
}
#endif
- if (!Visited.insert(STy).second) {
+ if (!Visited.insert(cast<StructType>(Ty)).second) {
StructType *DTy = StructType::create(Ty->getContext());
return *Entry = DTy;
}
@@ -579,6 +571,13 @@ Value *IRLinker::materialize(Value *V, bool ForIndirectSymbol) {
if (!SGV)
return nullptr;
+ // When linking a global from other modules than source & dest, skip
+ // materializing it because it would be mapped later when its containing
+ // module is linked. Linking it now would potentially pull in many types that
+ // may not be mapped properly.
+ if (SGV->getParent() != &DstM && SGV->getParent() != SrcM.get())
+ return nullptr;
+
Expected<Constant *> NewProto = linkGlobalValueProto(SGV, ForIndirectSymbol);
if (!NewProto) {
setError(NewProto.takeError());
@@ -640,14 +639,14 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
AttributeList IRLinker::mapAttributeTypes(LLVMContext &C, AttributeList Attrs) {
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
- if (Attrs.hasAttribute(i, Attribute::ByVal)) {
- Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
- if (!Ty)
- continue;
-
- Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
- Attrs = Attrs.addAttribute(
- C, i, Attribute::getWithByValType(C, TypeMap.get(Ty)));
+ for (Attribute::AttrKind TypedAttr :
+ {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) {
+ if (Attrs.hasAttribute(i, TypedAttr)) {
+ if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) {
+ Attrs = Attrs.replaceAttributeType(C, i, TypedAttr, TypeMap.get(Ty));
+ break;
+ }
+ }
}
}
return Attrs;
@@ -798,11 +797,11 @@ void IRLinker::computeTypeMapping() {
}
auto STTypePrefix = getTypeNamePrefix(ST->getName());
- if (STTypePrefix.size()== ST->getName().size())
+ if (STTypePrefix.size() == ST->getName().size())
continue;
// Check to see if the destination module has a struct with the prefix name.
- StructType *DST = DstM.getTypeByName(STTypePrefix);
+ StructType *DST = StructType::getTypeByName(ST->getContext(), STTypePrefix);
if (!DST)
continue;
@@ -844,6 +843,38 @@ static void getArrayElements(const Constant *C,
Expected<Constant *>
IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
const GlobalVariable *SrcGV) {
+ // Check that both variables have compatible properties.
+ if (DstGV && !DstGV->isDeclaration() && !SrcGV->isDeclaration()) {
+ if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
+ return stringErr(
+ "Linking globals named '" + SrcGV->getName() +
+ "': can only link appending global with another appending "
+ "global!");
+
+ if (DstGV->isConstant() != SrcGV->isConstant())
+ return stringErr("Appending variables linked with different const'ness!");
+
+ if (DstGV->getAlignment() != SrcGV->getAlignment())
+ return stringErr(
+ "Appending variables with different alignment need to be linked!");
+
+ if (DstGV->getVisibility() != SrcGV->getVisibility())
+ return stringErr(
+ "Appending variables with different visibility need to be linked!");
+
+ if (DstGV->hasGlobalUnnamedAddr() != SrcGV->hasGlobalUnnamedAddr())
+ return stringErr(
+ "Appending variables with different unnamed_addr need to be linked!");
+
+ if (DstGV->getSection() != SrcGV->getSection())
+ return stringErr(
+ "Appending variables with different section name need to be linked!");
+ }
+
+ // Do not need to do anything if source is a declaration.
+ if (SrcGV->isDeclaration())
+ return DstGV;
+
Type *EltTy = cast<ArrayType>(TypeMap.get(SrcGV->getValueType()))
->getElementType();
@@ -869,44 +900,20 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
}
uint64_t DstNumElements = 0;
- if (DstGV) {
+ if (DstGV && !DstGV->isDeclaration()) {
ArrayType *DstTy = cast<ArrayType>(DstGV->getValueType());
DstNumElements = DstTy->getNumElements();
- if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
- return stringErr(
- "Linking globals named '" + SrcGV->getName() +
- "': can only link appending global with another appending "
- "global!");
-
// Check to see that they two arrays agree on type.
if (EltTy != DstTy->getElementType())
return stringErr("Appending variables with different element types!");
- if (DstGV->isConstant() != SrcGV->isConstant())
- return stringErr("Appending variables linked with different const'ness!");
-
- if (DstGV->getAlignment() != SrcGV->getAlignment())
- return stringErr(
- "Appending variables with different alignment need to be linked!");
-
- if (DstGV->getVisibility() != SrcGV->getVisibility())
- return stringErr(
- "Appending variables with different visibility need to be linked!");
-
- if (DstGV->hasGlobalUnnamedAddr() != SrcGV->hasGlobalUnnamedAddr())
- return stringErr(
- "Appending variables with different unnamed_addr need to be linked!");
-
- if (DstGV->getSection() != SrcGV->getSection())
- return stringErr(
- "Appending variables with different section name need to be linked!");
}
SmallVector<Constant *, 16> SrcElements;
getArrayElements(SrcGV->getInitializer(), SrcElements);
if (IsNewStructor) {
- auto It = remove_if(SrcElements, [this](Constant *E) {
+ erase_if(SrcElements, [this](Constant *E) {
auto *Key =
dyn_cast<GlobalValue>(E->getAggregateElement(2)->stripPointerCasts());
if (!Key)
@@ -914,7 +921,6 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
GlobalValue *DGV = getLinkedToGlobal(Key);
return !shouldLink(DGV, *Key);
});
- SrcElements.erase(It, SrcElements.end());
}
uint64_t NewSize = DstNumElements + SrcElements.size();
ArrayType *NewType = ArrayType::get(EltTy, NewSize);
@@ -930,9 +936,10 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
Constant *Ret = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
- Mapper.scheduleMapAppendingVariable(*NG,
- DstGV ? DstGV->getInitializer() : nullptr,
- IsOldStructor, SrcElements);
+ Mapper.scheduleMapAppendingVariable(
+ *NG,
+ (DstGV && !DstGV->isDeclaration()) ? DstGV->getInitializer() : nullptr,
+ IsOldStructor, SrcElements);
// Replace any uses of the two global variables with uses of the new
// global.
@@ -985,8 +992,7 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
DGV = nullptr;
// Handle the ultra special appending linkage case first.
- assert(!DGV || SGV->hasAppendingLinkage() == DGV->hasAppendingLinkage());
- if (SGV->hasAppendingLinkage())
+ if (SGV->hasAppendingLinkage() || (DGV && DGV->hasAppendingLinkage()))
return linkAppendingVarProto(cast_or_null<GlobalVariable>(DGV),
cast<GlobalVariable>(SGV));
@@ -1126,14 +1132,13 @@ void IRLinker::prepareCompileUnitsForImport() {
assert(CU && "Expected valid compile unit");
// Enums, macros, and retained types don't need to be listed on the
// imported DICompileUnit. This means they will only be imported
- // if reached from the mapped IR. Do this by setting their value map
- // entries to nullptr, which will automatically prevent their importing
- // when reached from the DICompileUnit during metadata mapping.
- ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr);
- ValueMap.MD()[CU->getRawMacros()].reset(nullptr);
- ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr);
+ // if reached from the mapped IR.
+ CU->replaceEnumTypes(nullptr);
+ CU->replaceMacros(nullptr);
+ CU->replaceRetainedTypes(nullptr);
+
// The original definition (or at least its debug info - if the variable is
- // internalized an optimized away) will remain in the source module, so
+ // internalized and optimized away) will remain in the source module, so
// there's no need to import them.
// If LLVM ever does more advanced optimizations on global variables
// (removing/localizing write operations, for instance) that can track
@@ -1141,7 +1146,7 @@ void IRLinker::prepareCompileUnitsForImport() {
// with care when it comes to debug info size. Emitting small CUs containing
// only a few imported entities into every destination module may be very
// size inefficient.
- ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr);
+ CU->replaceGlobalVariables(nullptr);
// Imported entities only need to be mapped in if they have local
// scope, as those might correspond to an imported entity inside a
@@ -1174,7 +1179,7 @@ void IRLinker::prepareCompileUnitsForImport() {
else
// If there were no local scope imported entities, we can map
// the whole list to nullptr.
- ValueMap.MD()[CU->getRawImportedEntities()].reset(nullptr);
+ CU->replaceImportedEntities(nullptr);
}
}
}
@@ -1433,7 +1438,7 @@ Error IRLinker::run() {
if (!SrcM->getTargetTriple().empty()&&
!SrcTriple.isCompatibleWith(DstTriple))
- emitWarning("Linking two modules of different target triples: " +
+ emitWarning("Linking two modules of different target triples: '" +
SrcM->getModuleIdentifier() + "' is '" +
SrcM->getTargetTriple() + "' whereas '" +
DstM.getModuleIdentifier() + "' is '" + DstM.getTargetTriple() +
@@ -1441,17 +1446,6 @@ Error IRLinker::run() {
DstM.setTargetTriple(SrcTriple.merge(DstTriple));
- // Append the module inline asm string.
- if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) {
- std::string SrcModuleInlineAsm = adjustInlineAsm(SrcM->getModuleInlineAsm(),
- SrcTriple);
- if (DstM.getModuleInlineAsm().empty())
- DstM.setModuleInlineAsm(SrcModuleInlineAsm);
- else
- DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" +
- SrcModuleInlineAsm);
- }
-
// Loop over all of the linked values to compute type mappings.
computeTypeMapping();
@@ -1482,6 +1476,24 @@ Error IRLinker::run() {
// are properly remapped.
linkNamedMDNodes();
+ if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) {
+ // Append the module inline asm string.
+ DstM.appendModuleInlineAsm(adjustInlineAsm(SrcM->getModuleInlineAsm(),
+ SrcTriple));
+ } else if (IsPerformingImport) {
+ // Import any symver directives for symbols in DstM.
+ ModuleSymbolTable::CollectAsmSymvers(*SrcM,
+ [&](StringRef Name, StringRef Alias) {
+ if (DstM.getNamedValue(Name)) {
+ SmallString<256> S(".symver ");
+ S += Name;
+ S += ", ";
+ S += Alias;
+ DstM.appendModuleInlineAsm(S);
+ }
+ });
+ }
+
// Merge the module flags into the DstM module.
return linkModuleFlagsMetadata();
}
diff --git a/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
index 35d6290e901b..98793c587388 100644
--- a/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
@@ -248,7 +248,7 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
}
// We always have to add Src if it has appending linkage.
- if (Src.hasAppendingLinkage()) {
+ if (Src.hasAppendingLinkage() || Dest.hasAppendingLinkage()) {
LinkFromSrc = true;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
index 1ca9d0fe1e18..2d810ffd350b 100644
--- a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
@@ -464,7 +464,7 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) {
uint64_t ELFWriter::SymbolValue(const MCSymbol &Sym,
const MCAsmLayout &Layout) {
- if (Sym.isCommon() && (Sym.isTargetCommon() || Sym.isExternal()))
+ if (Sym.isCommon())
return Sym.getCommonAlignment();
uint64_t Res;
@@ -1024,9 +1024,13 @@ void ELFWriter::writeSection(const SectionIndexMapTy &SectionIndexMap,
}
if (Section.getFlags() & ELF::SHF_LINK_ORDER) {
+ // If the value in the associated metadata is not a definition, Sym will be
+ // undefined. Represent this with sh_link=0.
const MCSymbol *Sym = Section.getLinkedToSymbol();
- const MCSectionELF *Sec = cast<MCSectionELF>(&Sym->getSection());
- sh_link = SectionIndexMap.lookup(Sec);
+ if (Sym && Sym->isInSection()) {
+ const MCSectionELF *Sec = cast<MCSectionELF>(&Sym->getSection());
+ sh_link = SectionIndexMap.lookup(Sec);
+ }
}
WriteSecHdrEntry(StrTabBuilder.getOffset(Section.getName()),
@@ -1254,9 +1258,9 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
// The presence of symbol versions causes undefined symbols and
// versions declared with @@@ to be renamed.
- for (const std::pair<StringRef, const MCSymbol *> &P : Asm.Symvers) {
- StringRef AliasName = P.first;
- const auto &Symbol = cast<MCSymbolELF>(*P.second);
+ for (const MCAssembler::Symver &S : Asm.Symvers) {
+ StringRef AliasName = S.Name;
+ const auto &Symbol = cast<MCSymbolELF>(*S.Sym);
size_t Pos = AliasName.find('@');
assert(Pos != StringRef::npos);
@@ -1274,25 +1278,23 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
// Aliases defined with .symvar copy the binding from the symbol they alias.
// This is the first place we are able to copy this information.
- Alias->setExternal(Symbol.isExternal());
Alias->setBinding(Symbol.getBinding());
+ Alias->setVisibility(Symbol.getVisibility());
Alias->setOther(Symbol.getOther());
if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
continue;
- // FIXME: Get source locations for these errors or diagnose them earlier.
if (Symbol.isUndefined() && Rest.startswith("@@") &&
!Rest.startswith("@@@")) {
- Asm.getContext().reportError(SMLoc(), "versioned symbol " + AliasName +
- " must be defined");
+ Asm.getContext().reportError(S.Loc, "default version symbol " +
+ AliasName + " must be defined");
continue;
}
if (Renames.count(&Symbol) && Renames[&Symbol] != Alias) {
- Asm.getContext().reportError(
- SMLoc(), llvm::Twine("multiple symbol versions defined for ") +
- Symbol.getName());
+ Asm.getContext().reportError(S.Loc, Twine("multiple versions for ") +
+ Symbol.getName());
continue;
}
@@ -1390,9 +1392,21 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
if (C != 0)
return true;
- // It looks like gold has a bug (http://sourceware.org/PR16794) and can
- // only handle section relocations to mergeable sections if using RELA.
- if (!hasRelocationAddend())
+ // gold<2.34 incorrectly ignored the addend for R_386_GOTOFF (9)
+ // (http://sourceware.org/PR16794).
+ if (TargetObjectWriter->getEMachine() == ELF::EM_386 &&
+ Type == ELF::R_386_GOTOFF)
+ return true;
+
+ // ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so
+ // it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an
+ // R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in
+ // range of a MergeInputSection. We could introduce a new RelExpr member
+ // (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12)
+ // but the complexity is unnecessary given that GNU as keeps the original
+ // symbol for this case as well.
+ if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS &&
+ !hasRelocationAddend())
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp
index cf110345df3d..0d32e71c2d8f 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp
@@ -54,10 +54,17 @@ std::unique_ptr<MCObjectWriter>
MCAsmBackend::createDwoObjectWriter(raw_pwrite_stream &OS,
raw_pwrite_stream &DwoOS) const {
auto TW = createObjectTargetWriter();
- if (TW->getFormat() != Triple::ELF)
- report_fatal_error("dwo only supported with ELF");
- return createELFDwoObjectWriter(cast<MCELFObjectTargetWriter>(std::move(TW)),
- OS, DwoOS, Endian == support::little);
+ switch (TW->getFormat()) {
+ case Triple::ELF:
+ return createELFDwoObjectWriter(
+ cast<MCELFObjectTargetWriter>(std::move(TW)), OS, DwoOS,
+ Endian == support::little);
+ case Triple::Wasm:
+ return createWasmDwoObjectWriter(
+ cast<MCWasmObjectTargetWriter>(std::move(TW)), OS, DwoOS);
+ default:
+ report_fatal_error("dwo only supported with ELF and Wasm");
+ }
}
Optional<MCFixupKind> MCAsmBackend::getFixupKind(StringRef Name) const {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp
index 9767ee6c1133..620d3e7cffc3 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp
@@ -28,6 +28,12 @@ static cl::opt<DefaultOnOff> DwarfExtendedLoc(
clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
cl::init(Default));
+cl::opt<cl::boolOrDefault> UseLEB128Directives(
+ "use-leb128-directives", cl::Hidden,
+ cl::desc(
+ "Disable the usage of LEB128 directives, and generate .byte instead."),
+ cl::init(cl::BOU_UNSET));
+
MCAsmInfo::MCAsmInfo() {
SeparatorString = ";";
CommentString = "#";
@@ -51,6 +57,8 @@ MCAsmInfo::MCAsmInfo() {
WeakDirective = "\t.weak\t";
if (DwarfExtendedLoc != Default)
SupportsExtendedDwarfLocDirective = DwarfExtendedLoc == Enable;
+ if (UseLEB128Directives != cl::BOU_UNSET)
+ HasLEB128Directives = UseLEB128Directives == cl::BOU_TRUE;
// FIXME: Clang's logic should be synced with the logic used to initialize
// this member and the two implementations should be merged.
@@ -101,8 +109,7 @@ MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
}
bool MCAsmInfo::isAcceptableChar(char C) const {
- return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
- (C >= '0' && C <= '9') || C == '_' || C == '$' || C == '.' || C == '@';
+ return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '@';
}
bool MCAsmInfo::isValidUnquotedName(StringRef Name) const {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp
index b5c5bb3ace8e..2f8bc6a49bb7 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp
@@ -8,9 +8,12 @@
#include "llvm/MC/MCAsmInfoXCOFF.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+extern cl::opt<cl::boolOrDefault> UseLEB128Directives;
+
void MCAsmInfoXCOFF::anchor() {}
MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
@@ -20,10 +23,14 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
PrivateLabelPrefix = "L..";
SupportsQuotedNames = false;
UseDotAlignForAlignment = true;
+ if (UseLEB128Directives == cl::BOU_UNSET)
+ HasLEB128Directives = false;
ZeroDirective = "\t.space\t";
ZeroDirectiveSupportsNonZeroValue = false;
AsciiDirective = nullptr; // not supported
AscizDirective = nullptr; // not supported
+ ByteListDirective = "\t.byte\t";
+ CharacterLiteralSyntax = ACLS_SingleQuotePrefix;
// Use .vbyte for data definition to avoid directives that apply an implicit
// alignment.
@@ -35,6 +42,8 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
HasDotTypeDotSizeDirective = false;
UseIntegratedAssembler = false;
NeedsFunctionDescriptors = true;
+
+ ExceptionsType = ExceptionHandling::AIX;
}
bool MCAsmInfoXCOFF::isAcceptableChar(char C) const {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmMacro.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmMacro.cpp
index 186a68b02a29..bc95f98f2957 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmMacro.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmMacro.cpp
@@ -38,6 +38,11 @@ void MCAsmMacro::dump(raw_ostream &OS) const {
OS << " ";
P.dump();
}
+ if (!Locals.empty()) {
+ OS << " Locals:\n";
+ for (StringRef L : Locals)
+ OS << " " << L << '\n';
+ }
OS << " (BEGIN BODY)" << Body << "(END BODY)\n";
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
index 6a8572e57922..10d72553fe6d 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -144,6 +145,11 @@ public:
const MCSymbol *Aliasee) override;
void emitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) override;
+
+ StringRef getMnemonic(MCInst &MI) override {
+ return InstPrinter->getMnemonic(&MI).first;
+ }
+
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void emitAssemblerFlag(MCAssemblerFlag Flag) override;
@@ -345,6 +351,10 @@ public:
void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+ uint64_t Attr,
+ const MCPseudoProbeInlineStack &InlineStack) override;
+
void emitBundleAlignMode(unsigned AlignPow2) override;
void emitBundleLock(bool AlignToEnd) override;
void emitBundleUnlock() override;
@@ -580,6 +590,7 @@ static const char *getPlatformName(MachO::PlatformType Type) {
case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator";
case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator";
case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
+ case MachO::PLATFORM_DRIVERKIT: return "driverkit";
}
llvm_unreachable("Invalid Mach-O platform type");
}
@@ -796,15 +807,16 @@ void MCAsmStreamer::emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym,
OS << ',' << Log2_32(ByteAlignment);
EmitEOL();
+
+ // Print symbol's rename (original name contains invalid character(s)) if
+ // there is one.
+ MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(CsectSym);
+ if (XSym->hasRename())
+ emitXCOFFRenameDirective(XSym, XSym->getSymbolTableName());
}
void MCAsmStreamer::emitXCOFFSymbolLinkageWithVisibility(
MCSymbol *Symbol, MCSymbolAttr Linkage, MCSymbolAttr Visibility) {
- // Print symbol's rename (original name contains invalid character(s)) if
- // there is one.
- if (cast<MCSymbolXCOFF>(Symbol)->hasRename())
- emitXCOFFRenameDirective(Symbol,
- cast<MCSymbolXCOFF>(Symbol)->getSymbolTableName());
switch (Linkage) {
case MCSA_Global:
@@ -839,6 +851,12 @@ void MCAsmStreamer::emitXCOFFSymbolLinkageWithVisibility(
report_fatal_error("unexpected value for Visibility type");
}
EmitEOL();
+
+ // Print symbol's rename (original name contains invalid character(s)) if
+ // there is one.
+ if (cast<MCSymbolXCOFF>(Symbol)->hasRename())
+ emitXCOFFRenameDirective(Symbol,
+ cast<MCSymbolXCOFF>(Symbol)->getSymbolTableName());
}
void MCAsmStreamer::emitXCOFFRenameDirective(const MCSymbol *Name,
@@ -868,12 +886,6 @@ void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
void MCAsmStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
- // Print symbol's rename (original name contains invalid character(s)) if
- // there is one.
- MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(Symbol);
- if (XSym && XSym->hasRename())
- emitXCOFFRenameDirective(XSym, XSym->getSymbolTableName());
-
OS << "\t.comm\t";
Symbol->print(OS, MAI);
OS << ',' << Size;
@@ -885,6 +897,13 @@ void MCAsmStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
OS << ',' << Log2_32(ByteAlignment);
}
EmitEOL();
+
+ // Print symbol's rename (original name contains invalid character(s)) if
+ // there is one.
+ MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(Symbol);
+ if (XSym && XSym->hasRename())
+ emitXCOFFRenameDirective(XSym, XSym->getSymbolTableName());
+
}
void MCAsmStreamer::emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -962,6 +981,47 @@ void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
static inline char toOctal(int X) { return (X&7)+'0'; }
+static void PrintByteList(StringRef Data, raw_ostream &OS,
+ MCAsmInfo::AsmCharLiteralSyntax ACLS) {
+ assert(!Data.empty() && "Cannot generate an empty list.");
+ const auto printCharacterInOctal = [&OS](unsigned char C) {
+ OS << '0';
+ OS << toOctal(C >> 6);
+ OS << toOctal(C >> 3);
+ OS << toOctal(C >> 0);
+ };
+ const auto printOneCharacterFor = [printCharacterInOctal](
+ auto printOnePrintingCharacter) {
+ return [printCharacterInOctal, printOnePrintingCharacter](unsigned char C) {
+ if (isPrint(C)) {
+ printOnePrintingCharacter(static_cast<char>(C));
+ return;
+ }
+ printCharacterInOctal(C);
+ };
+ };
+ const auto printCharacterList = [Data, &OS](const auto &printOneCharacter) {
+ const auto BeginPtr = Data.begin(), EndPtr = Data.end();
+ for (const unsigned char C : make_range(BeginPtr, EndPtr - 1)) {
+ printOneCharacter(C);
+ OS << ',';
+ }
+ printOneCharacter(*(EndPtr - 1));
+ };
+ switch (ACLS) {
+ case MCAsmInfo::ACLS_Unknown:
+ printCharacterList(printCharacterInOctal);
+ return;
+ case MCAsmInfo::ACLS_SingleQuotePrefix:
+ printCharacterList(printOneCharacterFor([&OS](char C) {
+ const char AsmCharLitBuf[2] = {'\'', C};
+ OS << StringRef(AsmCharLitBuf, sizeof(AsmCharLitBuf));
+ }));
+ return;
+ }
+ llvm_unreachable("Invalid AsmCharLiteralSyntax value!");
+}
+
static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
OS << '"';
@@ -1000,33 +1060,42 @@ void MCAsmStreamer::emitBytes(StringRef Data) {
"Cannot emit contents before setting section!");
if (Data.empty()) return;
- // If only single byte is provided or no ascii or asciz directives is
- // supported, emit as vector of 8bits data.
- if (Data.size() == 1 ||
- !(MAI->getAscizDirective() || MAI->getAsciiDirective())) {
- if (MCTargetStreamer *TS = getTargetStreamer()) {
- TS->emitRawBytes(Data);
+ const auto emitAsString = [this](StringRef Data) {
+ // If the data ends with 0 and the target supports .asciz, use it, otherwise
+ // use .ascii or a byte-list directive
+ if (MAI->getAscizDirective() && Data.back() == 0) {
+ OS << MAI->getAscizDirective();
+ Data = Data.substr(0, Data.size() - 1);
+ } else if (LLVM_LIKELY(MAI->getAsciiDirective())) {
+ OS << MAI->getAsciiDirective();
+ } else if (MAI->getByteListDirective()) {
+ OS << MAI->getByteListDirective();
+ PrintByteList(Data, OS, MAI->characterLiteralSyntax());
+ EmitEOL();
+ return true;
} else {
- const char *Directive = MAI->getData8bitsDirective();
- for (const unsigned char C : Data.bytes()) {
- OS << Directive << (unsigned)C;
- EmitEOL();
- }
+ return false;
}
+
+ PrintQuotedString(Data, OS);
+ EmitEOL();
+ return true;
+ };
+
+ if (Data.size() != 1 && emitAsString(Data))
return;
- }
- // If the data ends with 0 and the target supports .asciz, use it, otherwise
- // use .ascii
- if (MAI->getAscizDirective() && Data.back() == 0) {
- OS << MAI->getAscizDirective();
- Data = Data.substr(0, Data.size()-1);
- } else {
- OS << MAI->getAsciiDirective();
+ // Only single byte is provided or no ascii, asciz, or byte-list directives
+ // are applicable. Emit as vector of individual 8bits data elements.
+ if (MCTargetStreamer *TS = getTargetStreamer()) {
+ TS->emitRawBytes(Data);
+ return;
+ }
+ const char *Directive = MAI->getData8bitsDirective();
+ for (const unsigned char C : Data.bytes()) {
+ OS << Directive << (unsigned)C;
+ EmitEOL();
}
-
- PrintQuotedString(Data, OS);
- EmitEOL();
}
void MCAsmStreamer::emitBinaryData(StringRef Data) {
@@ -1813,8 +1882,11 @@ void MCAsmStreamer::EmitWinCFIEndProc(SMLoc Loc) {
EmitEOL();
}
-// TODO: Implement
void MCAsmStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
+ MCStreamer::EmitWinCFIFuncletOrFuncEnd(Loc);
+
+ OS << "\t.seh_endfunclet";
+ EmitEOL();
}
void MCAsmStreamer::EmitWinCFIStartChained(SMLoc Loc) {
@@ -2055,6 +2127,18 @@ void MCAsmStreamer::emitInstruction(const MCInst &Inst,
EmitEOL();
}
+void MCAsmStreamer::emitPseudoProbe(
+ uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr,
+ const MCPseudoProbeInlineStack &InlineStack) {
+ OS << "\t.pseudoprobe\t" << Guid << " " << Index << " " << Type << " "
+ << Attr;
+ // Emit inline stack like
+ // @ GUIDmain:3 @ GUIDCaller:1 @ GUIDDirectCaller:11
+ for (const auto &Site : InlineStack)
+ OS << " @ " << std::get<0>(Site) << ":" << std::get<1>(Site);
+ EmitEOL();
+}
+
void MCAsmStreamer::emitBundleAlignMode(unsigned AlignPow2) {
OS << "\t.bundle_align_mode " << AlignPow2;
EmitEOL();
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp
index 3ca8714b7817..ce296d7faa45 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp
@@ -62,8 +62,8 @@ STATISTIC(EmittedAlignFragments,
"Number of emitted assembler fragments - align");
STATISTIC(EmittedFillFragments,
"Number of emitted assembler fragments - fill");
-STATISTIC(EmittedOrgFragments,
- "Number of emitted assembler fragments - org");
+STATISTIC(EmittedNopsFragments, "Number of emitted assembler fragments - nops");
+STATISTIC(EmittedOrgFragments, "Number of emitted assembler fragments - org");
STATISTIC(evaluateFixup, "Number of evaluated fixups");
STATISTIC(FragmentLayouts, "Number of fragment layouts");
STATISTIC(ObjectBytes, "Number of emitted object file bytes");
@@ -312,6 +312,9 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
return Size;
}
+ case MCFragment::FT_Nops:
+ return cast<MCNopsFragment>(F).getNumBytes();
+
case MCFragment::FT_LEB:
return cast<MCLEBFragment>(F).getContents().size();
@@ -380,6 +383,8 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
return cast<MCCVInlineLineTableFragment>(F).getContents().size();
case MCFragment::FT_CVDefRange:
return cast<MCCVDefRangeFragment>(F).getContents().size();
+ case MCFragment::FT_PseudoProbe:
+ return cast<MCPseudoProbeAddrFragment>(F).getContents().size();
case MCFragment::FT_Dummy:
llvm_unreachable("Should not have been added");
}
@@ -613,6 +618,45 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
break;
}
+ case MCFragment::FT_Nops: {
+ ++stats::EmittedNopsFragments;
+ const MCNopsFragment &NF = cast<MCNopsFragment>(F);
+ int64_t NumBytes = NF.getNumBytes();
+ int64_t ControlledNopLength = NF.getControlledNopLength();
+ int64_t MaximumNopLength = Asm.getBackend().getMaximumNopSize();
+
+ assert(NumBytes > 0 && "Expected positive NOPs fragment size");
+ assert(ControlledNopLength >= 0 && "Expected non-negative NOP size");
+
+ if (ControlledNopLength > MaximumNopLength) {
+ Asm.getContext().reportError(NF.getLoc(),
+ "illegal NOP size " +
+ std::to_string(ControlledNopLength) +
+ ". (expected within [0, " +
+ std::to_string(MaximumNopLength) + "])");
+ // Clamp the NOP length as reportError does not stop the execution
+ // immediately.
+ ControlledNopLength = MaximumNopLength;
+ }
+
+ // Use maximum value if the size of each NOP is not specified
+ if (!ControlledNopLength)
+ ControlledNopLength = MaximumNopLength;
+
+ while (NumBytes) {
+ uint64_t NumBytesToEmit =
+ (uint64_t)std::min(NumBytes, ControlledNopLength);
+ assert(NumBytesToEmit && "try to emit empty NOP instruction");
+ if (!Asm.getBackend().writeNopData(OS, NumBytesToEmit)) {
+ report_fatal_error("unable to write nop sequence of the remaining " +
+ Twine(NumBytesToEmit) + " bytes");
+ break;
+ }
+ NumBytes -= NumBytesToEmit;
+ }
+ break;
+ }
+
case MCFragment::FT_LEB: {
const MCLEBFragment &LF = cast<MCLEBFragment>(F);
OS << LF.getContents();
@@ -662,6 +706,11 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
OS << DRF.getContents();
break;
}
+ case MCFragment::FT_PseudoProbe: {
+ const MCPseudoProbeAddrFragment &PF = cast<MCPseudoProbeAddrFragment>(F);
+ OS << PF.getContents();
+ break;
+ }
case MCFragment::FT_Dummy:
llvm_unreachable("Should not have been added");
}
@@ -712,6 +761,8 @@ void MCAssembler::writeSectionData(raw_ostream &OS, const MCSection *Sec,
assert((cast<MCFillFragment>(F).getValue() == 0) &&
"Invalid fill in virtual section!");
break;
+ case MCFragment::FT_Org:
+ break;
}
}
@@ -724,7 +775,8 @@ void MCAssembler::writeSectionData(raw_ostream &OS, const MCSection *Sec,
for (const MCFragment &F : *Sec)
writeFragment(OS, *this, Layout, F);
- assert(OS.tell() - Start == Layout.getSectionAddressSize(Sec));
+ assert(getContext().hadError() ||
+ OS.tell() - Start == Layout.getSectionAddressSize(Sec));
}
std::tuple<MCValue, uint64_t, bool>
@@ -870,6 +922,12 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
Contents = DF.getContents();
break;
}
+ case MCFragment::FT_PseudoProbe: {
+ MCPseudoProbeAddrFragment &PF = cast<MCPseudoProbeAddrFragment>(Frag);
+ Fixups = PF.getFixups();
+ Contents = PF.getContents();
+ break;
+ }
}
for (const MCFixup &Fixup : Fixups) {
uint64_t FixedValue;
@@ -1061,10 +1119,9 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
} else {
uint32_t Offset;
uint32_t Size;
- bool SetDelta = MCDwarfLineAddr::FixedEncode(Context,
- getDWARFLinetableParams(),
- LineDelta, AddrDelta,
- OSE, &Offset, &Size);
+ bool SetDelta;
+ std::tie(Offset, Size, SetDelta) =
+ MCDwarfLineAddr::fixedEncode(Context, LineDelta, AddrDelta, OSE);
// Add Fixups for address delta or new address.
const MCExpr *FixupExpr;
if (SetDelta) {
@@ -1125,6 +1182,27 @@ bool MCAssembler::relaxCVDefRange(MCAsmLayout &Layout,
return OldSize != F.getContents().size();
}
+bool MCAssembler::relaxPseudoProbeAddr(MCAsmLayout &Layout,
+ MCPseudoProbeAddrFragment &PF) {
+ uint64_t OldSize = PF.getContents().size();
+ int64_t AddrDelta;
+ bool Abs = PF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
+ assert(Abs && "We created a pseudo probe with an invalid expression");
+ (void)Abs;
+ SmallVectorImpl<char> &Data = PF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ PF.getFixups().clear();
+
+ // Relocations should not be needed in general except on RISC-V which we are
+ // not targeted for now.
+ assert(!getBackend().requiresDiffExpressionRelocations() &&
+ "cannot relax relocations");
+ // AddrDelta is a signed integer
+ encodeSLEB128(AddrDelta, OSE, OldSize);
+ return OldSize != Data.size();
+}
+
bool MCAssembler::relaxFragment(MCAsmLayout &Layout, MCFragment &F) {
switch(F.getKind()) {
default:
@@ -1146,6 +1224,8 @@ bool MCAssembler::relaxFragment(MCAsmLayout &Layout, MCFragment &F) {
return relaxCVInlineLineTable(Layout, cast<MCCVInlineLineTableFragment>(F));
case MCFragment::FT_CVDefRange:
return relaxCVDefRange(Layout, cast<MCCVDefRangeFragment>(F));
+ case MCFragment::FT_PseudoProbe:
+ return relaxPseudoProbeAddr(Layout, cast<MCPseudoProbeAddrFragment>(F));
}
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp b/contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp
index 7849196432b8..3da1a9c3e331 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp
@@ -563,10 +563,7 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
int LineDelta = CurSourceLoc.Line - LastSourceLoc.Line;
unsigned EncodedLineDelta = encodeSignedNumber(LineDelta);
unsigned CodeDelta = computeLabelDiff(Layout, LastLabel, Loc.getLabel());
- if (CodeDelta == 0 && LineDelta != 0) {
- compressAnnotation(BinaryAnnotationsOpCode::ChangeLineOffset, Buffer);
- compressAnnotation(EncodedLineDelta, Buffer);
- } else if (EncodedLineDelta < 0x8 && CodeDelta <= 0xf) {
+ if (EncodedLineDelta < 0x8 && CodeDelta <= 0xf) {
// The ChangeCodeOffsetAndLineOffset combination opcode is used when the
// encoded line delta uses 3 or fewer set bits and the code offset fits
// in one nibble.
diff --git a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
index a0f9212f3b14..9dab8a6c0910 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
@@ -90,6 +90,7 @@ void MCContext::reset() {
ELFAllocator.DestroyAll();
MachOAllocator.DestroyAll();
XCOFFAllocator.DestroyAll();
+ MCInstAllocator.DestroyAll();
MCSubtargetAllocator.DestroyAll();
InlineAsmUsedLabelNames.clear();
@@ -127,6 +128,14 @@ void MCContext::reset() {
}
//===----------------------------------------------------------------------===//
+// MCInst Management
+//===----------------------------------------------------------------------===//
+
+MCInst *MCContext::createMCInst() {
+ return new (MCInstAllocator.Allocate()) MCInst;
+}
+
+//===----------------------------------------------------------------------===//
// Symbol Manipulation
//===----------------------------------------------------------------------===//
@@ -223,11 +232,16 @@ MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix,
llvm_unreachable("Infinite loop");
}
-MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix,
- bool CanBeUnnamed) {
+MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix) {
+ SmallString<128> NameSV;
+ raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name;
+ return createSymbol(NameSV, AlwaysAddSuffix, true);
+}
+
+MCSymbol *MCContext::createNamedTempSymbol(const Twine &Name) {
SmallString<128> NameSV;
raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name;
- return createSymbol(NameSV, AlwaysAddSuffix, CanBeUnnamed);
+ return createSymbol(NameSV, true, false);
}
MCSymbol *MCContext::createLinkerPrivateTempSymbol() {
@@ -236,8 +250,10 @@ MCSymbol *MCContext::createLinkerPrivateTempSymbol() {
return createSymbol(NameSV, true, false);
}
-MCSymbol *MCContext::createTempSymbol(bool CanBeUnnamed) {
- return createTempSymbol("tmp", true, CanBeUnnamed);
+MCSymbol *MCContext::createTempSymbol() { return createTempSymbol("tmp"); }
+
+MCSymbol *MCContext::createNamedTempSymbol() {
+ return createNamedTempSymbol("tmp");
}
unsigned MCContext::NextInstance(unsigned LocalLabelVal) {
@@ -258,7 +274,7 @@ MCSymbol *MCContext::getOrCreateDirectionalLocalSymbol(unsigned LocalLabelVal,
unsigned Instance) {
MCSymbol *&Sym = LocalSymbols[std::make_pair(LocalLabelVal, Instance)];
if (!Sym)
- Sym = createTempSymbol(false);
+ Sym = createNamedTempSymbol();
return Sym;
}
@@ -635,7 +651,7 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
StringRef CachedName = Entry.first.SectionName;
- MCSymbol *Begin = createSymbol(CachedName, false, false);
+ MCSymbol *Begin = createSymbol(CachedName, true, false);
cast<MCSymbolWasm>(Begin)->setType(wasm::WASM_SYMBOL_TYPE_SECTION);
MCSectionWasm *Result = new (WasmAllocator.Allocate())
@@ -650,18 +666,21 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
return Result;
}
-MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section,
- XCOFF::StorageMappingClass SMC,
- XCOFF::SymbolType Type,
- XCOFF::StorageClass SC,
- SectionKind Kind,
- const char *BeginSymName) {
+MCSectionXCOFF *
+MCContext::getXCOFFSection(StringRef Section, XCOFF::StorageMappingClass SMC,
+ XCOFF::SymbolType Type, SectionKind Kind,
+ bool MultiSymbolsAllowed, const char *BeginSymName) {
// Do the lookup. If we have a hit, return it.
auto IterBool = XCOFFUniquingMap.insert(
std::make_pair(XCOFFSectionKey{Section.str(), SMC}, nullptr));
auto &Entry = *IterBool.first;
- if (!IterBool.second)
- return Entry.second;
+ if (!IterBool.second) {
+ MCSectionXCOFF *ExistedEntry = Entry.second;
+ if (ExistedEntry->isMultiSymbolsAllowed() != MultiSymbolsAllowed)
+ report_fatal_error("section's multiply symbols policy does not match");
+
+ return ExistedEntry;
+ }
// Otherwise, return a new section.
StringRef CachedName = Entry.first.SectionName;
@@ -675,8 +694,8 @@ MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section,
// QualName->getUnqualifiedName() and CachedName are the same except when
// CachedName contains invalid character(s) such as '$' for an XCOFF symbol.
MCSectionXCOFF *Result = new (XCOFFAllocator.Allocate())
- MCSectionXCOFF(QualName->getUnqualifiedName(), SMC, Type, SC, Kind,
- QualName, Begin, CachedName);
+ MCSectionXCOFF(QualName->getUnqualifiedName(), SMC, Type, Kind, QualName,
+ Begin, CachedName, MultiSymbolsAllowed);
Entry.second = Result;
auto *F = new MCDataFragment();
@@ -813,13 +832,13 @@ void MCContext::reportError(SMLoc Loc, const Twine &Msg) {
// If we have a source manager use it. Otherwise, try using the inline source
// manager.
- // If that fails, use the generic report_fatal_error().
+ // If that fails, construct a temporary SourceMgr.
if (SrcMgr)
SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
else if (InlineSrcMgr)
InlineSrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
else
- report_fatal_error(Msg, false);
+ SourceMgr().PrintMessage(Loc, SourceMgr::DK_Error, Msg);
}
void MCContext::reportWarning(SMLoc Loc, const Twine &Msg) {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp
index 7f72d062b7ac..f86d4266a1eb 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp
@@ -46,10 +46,8 @@
using namespace llvm;
MCSymbol *mcdwarf::emitListsTableHeaderStart(MCStreamer &S) {
- MCSymbol *Start =
- S.getContext().createTempSymbol("debug_list_header_start", true, true);
- MCSymbol *End =
- S.getContext().createTempSymbol("debug_list_header_end", true, true);
+ MCSymbol *Start = S.getContext().createTempSymbol("debug_list_header_start");
+ MCSymbol *End = S.getContext().createTempSymbol("debug_list_header_end");
auto DwarfFormat = S.getContext().getDwarfFormat();
if (DwarfFormat == dwarf::DWARF64) {
S.AddComment("DWARF64 mark");
@@ -768,11 +766,10 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
}
}
-bool MCDwarfLineAddr::FixedEncode(MCContext &Context,
- MCDwarfLineTableParams Params,
- int64_t LineDelta, uint64_t AddrDelta,
- raw_ostream &OS,
- uint32_t *Offset, uint32_t *Size) {
+std::tuple<uint32_t, uint32_t, bool>
+MCDwarfLineAddr::fixedEncode(MCContext &Context, int64_t LineDelta,
+ uint64_t AddrDelta, raw_ostream &OS) {
+ uint32_t Offset, Size;
if (LineDelta != INT64_MAX) {
OS << char(dwarf::DW_LNS_advance_line);
encodeSLEB128(LineDelta, OS);
@@ -792,15 +789,15 @@ bool MCDwarfLineAddr::FixedEncode(MCContext &Context,
encodeULEB128(1 + AddrSize, OS);
OS << char(dwarf::DW_LNE_set_address);
// Generate fixup for the address.
- *Offset = OS.tell();
- *Size = AddrSize;
+ Offset = OS.tell();
+ Size = AddrSize;
SetDelta = false;
OS.write_zeros(AddrSize);
} else {
OS << char(dwarf::DW_LNS_fixed_advance_pc);
// Generate fixup for 2-bytes address delta.
- *Offset = OS.tell();
- *Size = 2;
+ Offset = OS.tell();
+ Size = 2;
SetDelta = true;
OS << char(0);
OS << char(0);
@@ -814,7 +811,7 @@ bool MCDwarfLineAddr::FixedEncode(MCContext &Context,
OS << char(dwarf::DW_LNS_copy);
}
- return SetDelta;
+ return std::make_tuple(Offset, Size, SetDelta);
}
// Utility function to write a tuple for .debug_abbrev.
@@ -1140,7 +1137,7 @@ static MCSymbol *emitGenDwarfRanges(MCStreamer *MCOS) {
MCSymbol *EndSymbol = mcdwarf::emitListsTableHeaderStart(*MCOS);
MCOS->AddComment("Offset entry count");
MCOS->emitInt32(0);
- RangesSymbol = context.createTempSymbol("debug_rnglist0_start", true, true);
+ RangesSymbol = context.createTempSymbol("debug_rnglist0_start");
MCOS->emitLabel(RangesSymbol);
for (MCSection *Sec : Sections) {
const MCSymbol *StartSymbol = Sec->getBeginSymbol();
@@ -1157,7 +1154,7 @@ static MCSymbol *emitGenDwarfRanges(MCStreamer *MCOS) {
MCOS->emitLabel(EndSymbol);
} else {
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
- RangesSymbol = context.createTempSymbol("debug_ranges_start", true, true);
+ RangesSymbol = context.createTempSymbol("debug_ranges_start");
MCOS->emitLabel(RangesSymbol);
for (MCSection *Sec : Sections) {
const MCSymbol *StartSymbol = Sec->getBeginSymbol();
diff --git a/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
index 49d863f258bf..db45aef7d506 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
@@ -221,23 +221,35 @@ bool MCELFStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
case MCSA_ELF_TypeGnuUniqueObject:
Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_OBJECT));
Symbol->setBinding(ELF::STB_GNU_UNIQUE);
- Symbol->setExternal(true);
break;
case MCSA_Global:
+ // For `.weak x; .global x`, GNU as sets the binding to STB_WEAK while we
+ // traditionally set the binding to STB_GLOBAL. This is error-prone, so we
+ // error on such cases. Note, we also disallow changed binding from .local.
+ if (Symbol->isBindingSet() && Symbol->getBinding() != ELF::STB_GLOBAL)
+ getContext().reportError(getStartTokLoc(),
+ Symbol->getName() +
+ " changed binding to STB_GLOBAL");
Symbol->setBinding(ELF::STB_GLOBAL);
- Symbol->setExternal(true);
break;
case MCSA_WeakReference:
case MCSA_Weak:
+ // For `.global x; .weak x`, both MC and GNU as set the binding to STB_WEAK.
+ // We emit a warning for now but may switch to an error in the future.
+ if (Symbol->isBindingSet() && Symbol->getBinding() != ELF::STB_WEAK)
+ getContext().reportWarning(
+ getStartTokLoc(), Symbol->getName() + " changed binding to STB_WEAK");
Symbol->setBinding(ELF::STB_WEAK);
- Symbol->setExternal(true);
break;
case MCSA_Local:
+ if (Symbol->isBindingSet() && Symbol->getBinding() != ELF::STB_LOCAL)
+ getContext().reportError(getStartTokLoc(),
+ Symbol->getName() +
+ " changed binding to STB_LOCAL");
Symbol->setBinding(ELF::STB_LOCAL);
- Symbol->setExternal(false);
break;
case MCSA_ELF_TypeFunction:
@@ -292,10 +304,8 @@ void MCELFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
auto *Symbol = cast<MCSymbolELF>(S);
getAssembler().registerSymbol(*Symbol);
- if (!Symbol->isBindingSet()) {
+ if (!Symbol->isBindingSet())
Symbol->setBinding(ELF::STB_GLOBAL);
- Symbol->setExternal(true);
- }
Symbol->setType(ELF::STT_OBJECT);
@@ -326,7 +336,8 @@ void MCELFStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
void MCELFStreamer::emitELFSymverDirective(StringRef AliasName,
const MCSymbol *Aliasee) {
- getAssembler().Symvers.push_back({AliasName, Aliasee});
+ getAssembler().Symvers.push_back(
+ MCAssembler::Symver{AliasName, Aliasee, getStartTokLoc()});
}
void MCELFStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
@@ -335,7 +346,6 @@ void MCELFStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
// FIXME: Should this be caught and done earlier?
getAssembler().registerSymbol(*Symbol);
Symbol->setBinding(ELF::STB_LOCAL);
- Symbol->setExternal(false);
emitCommonSymbol(Symbol, Size, ByteAlignment);
}
@@ -433,15 +443,18 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO:
case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HI:
case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA:
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL:
case MCSymbolRefExpr::VK_PPC_GOT_DTPREL:
case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO:
case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HI:
case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HA:
case MCSymbolRefExpr::VK_PPC_TLS:
+ case MCSymbolRefExpr::VK_PPC_TLS_PCREL:
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD:
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO:
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HI:
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA:
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL:
case MCSymbolRefExpr::VK_PPC_TLSGD:
case MCSymbolRefExpr::VK_PPC_GOT_TLSLD:
case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO:
@@ -479,10 +492,8 @@ void MCELFStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE) {
// Not a temporary, referece it as a weak undefined.
bool Created;
getAssembler().registerSymbol(*S, &Created);
- if (Created) {
+ if (Created)
cast<MCSymbolELF>(S)->setBinding(ELF::STB_WEAK);
- cast<MCSymbolELF>(S)->setExternal(true);
- }
}
void MCELFStreamer::finalizeCGProfile() {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
index ecf63b10f73f..3b123a46d9dc 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
@@ -47,6 +47,8 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
auto Value = cast<MCConstantExpr>(*this).getValue();
auto PrintInHex = cast<MCConstantExpr>(*this).useHexFormat();
auto SizeInBytes = cast<MCConstantExpr>(*this).getSizeInBytes();
+ if (Value < 0 && MAI && !MAI->supportsSignedData())
+ PrintInHex = true;
if (PrintInHex)
switch (SizeInBytes) {
default:
@@ -83,8 +85,13 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
} else
Sym.print(OS, MAI);
- if (SRE.getKind() != MCSymbolRefExpr::VK_None)
- SRE.printVariantKind(OS);
+ const MCSymbolRefExpr::VariantKind Kind = SRE.getKind();
+ if (Kind != MCSymbolRefExpr::VK_None) {
+ if (MAI && MAI->useParensForSymbolVariant()) // ARM
+ OS << '(' << MCSymbolRefExpr::getVariantKindName(Kind) << ')';
+ else
+ OS << '@' << MCSymbolRefExpr::getVariantKindName(Kind);
+ }
return;
}
@@ -143,6 +150,7 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
case MCBinaryExpr::Mul: OS << '*'; break;
case MCBinaryExpr::NE: OS << "!="; break;
case MCBinaryExpr::Or: OS << '|'; break;
+ case MCBinaryExpr::OrNot: OS << '!'; break;
case MCBinaryExpr::Shl: OS << "<<"; break;
case MCBinaryExpr::Sub: OS << '-'; break;
case MCBinaryExpr::Xor: OS << '^'; break;
@@ -194,8 +202,7 @@ const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx,
MCSymbolRefExpr::MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind,
const MCAsmInfo *MAI, SMLoc Loc)
: MCExpr(MCExpr::SymbolRef, Loc,
- encodeSubclassData(Kind, MAI->useParensForSymbolVariant(),
- MAI->hasSubsectionsViaSymbols())),
+ encodeSubclassData(Kind, MAI->hasSubsectionsViaSymbols())),
Symbol(Symbol) {
assert(Symbol);
}
@@ -246,6 +253,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_SIZE: return "SIZE";
case VK_WEAKREF: return "WEAKREF";
case VK_X86_ABS8: return "ABS8";
+ case VK_X86_PLTOFF: return "PLTOFF";
case VK_ARM_NONE: return "none";
case VK_ARM_GOT_PREL: return "GOT_PREL";
case VK_ARM_TARGET1: return "target1";
@@ -319,9 +327,18 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha";
case VK_PPC_GOT_PCREL:
return "got@pcrel";
+ case VK_PPC_GOT_TLSGD_PCREL:
+ return "got@tlsgd@pcrel";
+ case VK_PPC_GOT_TLSLD_PCREL:
+ return "got@tlsld@pcrel";
+ case VK_PPC_GOT_TPREL_PCREL:
+ return "got@tprel@pcrel";
+ case VK_PPC_TLS_PCREL:
+ return "tls@pcrel";
case VK_PPC_TLSLD: return "tlsld";
case VK_PPC_LOCAL: return "local";
case VK_PPC_NOTOC: return "notoc";
+ case VK_PPC_PCREL_OPT: return "<<invalid>>";
case VK_COFF_IMGREL32: return "IMGREL";
case VK_Hexagon_LO16: return "LO16";
case VK_Hexagon_HI16: return "HI16";
@@ -334,6 +351,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Hexagon_IE_GOT: return "IEGOT";
case VK_WASM_TYPEINDEX: return "TYPEINDEX";
case VK_WASM_MBREL: return "MBREL";
+ case VK_WASM_TLSREL: return "TLSREL";
case VK_WASM_TBREL: return "TBREL";
case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
@@ -393,6 +411,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("secrel32", VK_SECREL)
.Case("size", VK_SIZE)
.Case("abs8", VK_X86_ABS8)
+ .Case("pltoff", VK_X86_PLTOFF)
.Case("l", VK_PPC_LO)
.Case("h", VK_PPC_HI)
.Case("ha", VK_PPC_HA)
@@ -450,6 +469,10 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI)
.Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA)
.Case("got@pcrel", VK_PPC_GOT_PCREL)
+ .Case("got@tlsgd@pcrel", VK_PPC_GOT_TLSGD_PCREL)
+ .Case("got@tlsld@pcrel", VK_PPC_GOT_TLSLD_PCREL)
+ .Case("got@tprel@pcrel", VK_PPC_GOT_TPREL_PCREL)
+ .Case("tls@pcrel", VK_PPC_TLS_PCREL)
.Case("notoc", VK_PPC_NOTOC)
.Case("gdgot", VK_Hexagon_GD_GOT)
.Case("gdplt", VK_Hexagon_GD_PLT)
@@ -470,6 +493,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("typeindex", VK_WASM_TYPEINDEX)
.Case("tbrel", VK_WASM_TBREL)
.Case("mbrel", VK_WASM_MBREL)
+ .Case("tlsrel", VK_WASM_TLSREL)
.Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
.Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
.Case("rel32@lo", VK_AMDGPU_REL32_LO)
@@ -494,13 +518,6 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Default(VK_Invalid);
}
-void MCSymbolRefExpr::printVariantKind(raw_ostream &OS) const {
- if (useParensForSymbolVariant())
- OS << '(' << MCSymbolRefExpr::getVariantKindName(getKind()) << ')';
- else
- OS << '@' << MCSymbolRefExpr::getVariantKindName(getKind());
-}
-
/* *** */
void MCTargetExpr::anchor() {}
@@ -575,12 +592,7 @@ static void AttemptToFoldSymbolOffsetDifference(
if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
return;
- MCFragment *FA = SA.getFragment();
- MCFragment *FB = SB.getFragment();
- if (FA == FB && !SA.isVariable() && !SA.isUnset() && !SB.isVariable() &&
- !SB.isUnset()) {
- Addend += (SA.getOffset() - SB.getOffset());
-
+ auto FinalizeFolding = [&]() {
// Pointers to Thumb symbols need to have their low-bit set to allow
// for interworking.
if (Asm->isThumbFunc(&SA))
@@ -594,11 +606,17 @@ static void AttemptToFoldSymbolOffsetDifference(
// Clear the symbol expr pointers to indicate we have folded these
// operands.
A = B = nullptr;
- return;
- }
+ };
- if (!Layout)
- return;
+ const MCFragment *FA = SA.getFragment();
+ const MCFragment *FB = SB.getFragment();
+ // If both symbols are in the same fragment, return the difference of their
+ // offsets
+ if (FA == FB && !SA.isVariable() && !SA.isUnset() && !SB.isVariable() &&
+ !SB.isUnset()) {
+ Addend += SA.getOffset() - SB.getOffset();
+ return FinalizeFolding();
+ }
const MCSection &SecA = *FA->getParent();
const MCSection &SecB = *FB->getParent();
@@ -606,30 +624,46 @@ static void AttemptToFoldSymbolOffsetDifference(
if ((&SecA != &SecB) && !Addrs)
return;
- // One of the symbol involved is part of a fragment being laid out. Quit now
- // to avoid a self loop.
- if (!Layout->canGetFragmentOffset(FA) || !Layout->canGetFragmentOffset(FB))
- return;
+ if (Layout) {
+ // One of the symbol involved is part of a fragment being laid out. Quit now
+ // to avoid a self loop.
+ if (!Layout->canGetFragmentOffset(FA) || !Layout->canGetFragmentOffset(FB))
+ return;
+
+ // Eagerly evaluate when layout is finalized.
+ Addend += Layout->getSymbolOffset(A->getSymbol()) -
+ Layout->getSymbolOffset(B->getSymbol());
+ if (Addrs && (&SecA != &SecB))
+ Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
+
+ FinalizeFolding();
+ } else {
+ // When layout is not finalized, our ability to resolve differences between
+ // symbols is limited to specific cases where the fragments between two
+ // symbols (including the fragments the symbols are defined in) are
+ // fixed-size fragments so the difference can be calculated. For example,
+ // this is important when the Subtarget is changed and a new MCDataFragment
+ // is created in the case of foo: instr; .arch_extension ext; instr .if . -
+ // foo.
+ if (SA.isVariable() || SA.isUnset() || SB.isVariable() || SB.isUnset() ||
+ FA->getKind() != MCFragment::FT_Data ||
+ FB->getKind() != MCFragment::FT_Data ||
+ FA->getSubsectionNumber() != FB->getSubsectionNumber())
+ return;
+ // Try to find a constant displacement from FA to FB, add the displacement
+ // between the offset in FA of SA and the offset in FB of SB.
+ int64_t Displacement = SA.getOffset() - SB.getOffset();
+ for (auto FI = FB->getIterator(), FE = SecA.end(); FI != FE; ++FI) {
+ if (&*FI == FA) {
+ Addend += Displacement;
+ return FinalizeFolding();
+ }
- // Eagerly evaluate.
- Addend += Layout->getSymbolOffset(A->getSymbol()) -
- Layout->getSymbolOffset(B->getSymbol());
- if (Addrs && (&SecA != &SecB))
- Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
-
- // Pointers to Thumb symbols need to have their low-bit set to allow
- // for interworking.
- if (Asm->isThumbFunc(&SA))
- Addend |= 1;
-
- // If symbol is labeled as micromips, we set low-bit to ensure
- // correct offset in .gcc_except_table
- if (Asm->getBackend().isMicroMips(&SA))
- Addend |= 1;
-
- // Clear the symbol expr pointers to indicate we have folded these
- // operands.
- A = B = nullptr;
+ if (FI->getKind() != MCFragment::FT_Data)
+ return;
+ Displacement += cast<MCDataFragment>(FI)->getContents().size();
+ }
+ }
}
static bool canFold(const MCAssembler *Asm, const MCSymbolRefExpr *A,
@@ -770,13 +804,30 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
case SymbolRef: {
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
const MCSymbol &Sym = SRE->getSymbol();
+ const auto Kind = SRE->getKind();
// Evaluate recursively if this is a variable.
- if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None &&
+ if (Sym.isVariable() && (Kind == MCSymbolRefExpr::VK_None || Layout) &&
canExpand(Sym, InSet)) {
bool IsMachO = SRE->hasSubsectionsViaSymbols();
if (Sym.getVariableValue()->evaluateAsRelocatableImpl(
Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) {
+ if (Kind != MCSymbolRefExpr::VK_None) {
+ if (Res.isAbsolute()) {
+ Res = MCValue::get(SRE, nullptr, 0);
+ return true;
+ }
+ // If the reference has a variant kind, we can only handle expressions
+ // which evaluate exactly to a single unadorned symbol. Attach the
+ // original VariantKind to SymA of the result.
+ if (Res.getRefKind() != MCSymbolRefExpr::VK_None || !Res.getSymA() ||
+ Res.getSymB() || Res.getConstant())
+ return false;
+ Res =
+ MCValue::get(MCSymbolRefExpr::create(&Res.getSymA()->getSymbol(),
+ Kind, Asm->getContext()),
+ Res.getSymB(), Res.getConstant(), Res.getRefKind());
+ }
if (!IsMachO)
return true;
@@ -917,6 +968,7 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
case MCBinaryExpr::Mul: Result = LHS * RHS; break;
case MCBinaryExpr::NE: Result = LHS != RHS; break;
case MCBinaryExpr::Or: Result = LHS | RHS; break;
+ case MCBinaryExpr::OrNot: Result = LHS | ~RHS; break;
case MCBinaryExpr::Shl: Result = uint64_t(LHS) << uint64_t(RHS); break;
case MCBinaryExpr::Sub: Result = LHS - RHS; break;
case MCBinaryExpr::Xor: Result = LHS ^ RHS; break;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCFragment.cpp b/contrib/llvm-project/llvm/lib/MC/MCFragment.cpp
index 8e90e07a4dbf..0f8543f51096 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCFragment.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCFragment.cpp
@@ -279,6 +279,9 @@ void MCFragment::destroy() {
case FT_Fill:
delete cast<MCFillFragment>(this);
return;
+ case FT_Nops:
+ delete cast<MCNopsFragment>(this);
+ return;
case FT_Relaxable:
delete cast<MCRelaxableFragment>(this);
return;
@@ -306,6 +309,9 @@ void MCFragment::destroy() {
case FT_CVDefRange:
delete cast<MCCVDefRangeFragment>(this);
return;
+ case FT_PseudoProbe:
+ delete cast<MCPseudoProbeAddrFragment>(this);
+ return;
case FT_Dummy:
delete cast<MCDummyFragment>(this);
return;
@@ -336,6 +342,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
case MCFragment::FT_CompactEncodedInst:
OS << "MCCompactEncodedInstFragment"; break;
case MCFragment::FT_Fill: OS << "MCFillFragment"; break;
+ case MCFragment::FT_Nops:
+ OS << "MCFNopsFragment";
+ break;
case MCFragment::FT_Relaxable: OS << "MCRelaxableFragment"; break;
case MCFragment::FT_Org: OS << "MCOrgFragment"; break;
case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
@@ -345,6 +354,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
case MCFragment::FT_SymbolId: OS << "MCSymbolIdFragment"; break;
case MCFragment::FT_CVInlineLines: OS << "MCCVInlineLineTableFragment"; break;
case MCFragment::FT_CVDefRange: OS << "MCCVDefRangeTableFragment"; break;
+ case MCFragment::FT_PseudoProbe:
+ OS << "MCPseudoProbe";
+ break;
case MCFragment::FT_Dummy: OS << "MCDummyFragment"; break;
}
@@ -408,6 +420,12 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
<< " NumValues:" << FF->getNumValues();
break;
}
+ case MCFragment::FT_Nops: {
+ const auto *NF = cast<MCNopsFragment>(this);
+ OS << " NumBytes:" << NF->getNumBytes()
+ << " ControlledNopLength:" << NF->getControlledNopLength();
+ break;
+ }
case MCFragment::FT_Relaxable: {
const auto *F = cast<MCRelaxableFragment>(this);
OS << "\n ";
@@ -472,6 +490,12 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
}
break;
}
+ case MCFragment::FT_PseudoProbe: {
+ const auto *OF = cast<MCPseudoProbeAddrFragment>(this);
+ OS << "\n ";
+ OS << " AddrDelta:" << OF->getAddrDelta();
+ break;
+ }
case MCFragment::FT_Dummy:
break;
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp
index b9b4416fde21..398de873fe0b 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -497,6 +497,10 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
+
+ PseudoProbeSection = Ctx->getELFSection(".pseudo_probe", DebugSecType, 0);
+ PseudoProbeDescSection =
+ Ctx->getELFSection(".pseudo_probe_desc", DebugSecType, 0);
}
void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
@@ -754,6 +758,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
+ GIATsSection = Ctx->getCOFFSection(".giats$y",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+
GLJMPSection = Ctx->getCOFFSection(".gljmp$y",
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
@@ -798,6 +807,10 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
DwarfFrameSection = Ctx->getWasmSection(".debug_frame", SectionKind::getMetadata());
DwarfPubNamesSection = Ctx->getWasmSection(".debug_pubnames", SectionKind::getMetadata());
DwarfPubTypesSection = Ctx->getWasmSection(".debug_pubtypes", SectionKind::getMetadata());
+ DwarfGnuPubNamesSection =
+ Ctx->getWasmSection(".debug_gnu_pubnames", SectionKind::getMetadata());
+ DwarfGnuPubTypesSection =
+ Ctx->getWasmSection(".debug_gnu_pubtypes", SectionKind::getMetadata());
DwarfDebugNamesSection =
Ctx->getWasmSection(".debug_names", SectionKind::getMetadata());
@@ -810,6 +823,37 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
DwarfLoclistsSection =
Ctx->getWasmSection(".debug_loclists", SectionKind::getMetadata());
+ // Fission Sections
+ DwarfInfoDWOSection =
+ Ctx->getWasmSection(".debug_info.dwo", SectionKind::getMetadata());
+ DwarfTypesDWOSection =
+ Ctx->getWasmSection(".debug_types.dwo", SectionKind::getMetadata());
+ DwarfAbbrevDWOSection =
+ Ctx->getWasmSection(".debug_abbrev.dwo", SectionKind::getMetadata());
+ DwarfStrDWOSection =
+ Ctx->getWasmSection(".debug_str.dwo", SectionKind::getMetadata());
+ DwarfLineDWOSection =
+ Ctx->getWasmSection(".debug_line.dwo", SectionKind::getMetadata());
+ DwarfLocDWOSection =
+ Ctx->getWasmSection(".debug_loc.dwo", SectionKind::getMetadata());
+ DwarfStrOffDWOSection =
+ Ctx->getWasmSection(".debug_str_offsets.dwo", SectionKind::getMetadata());
+ DwarfRnglistsDWOSection =
+ Ctx->getWasmSection(".debug_rnglists.dwo", SectionKind::getMetadata());
+ DwarfMacinfoDWOSection =
+ Ctx->getWasmSection(".debug_macinfo.dwo", SectionKind::getMetadata());
+ DwarfMacroDWOSection =
+ Ctx->getWasmSection(".debug_macro.dwo", SectionKind::getMetadata());
+
+ DwarfLoclistsDWOSection =
+ Ctx->getWasmSection(".debug_loclists.dwo", SectionKind::getMetadata());
+
+ // DWP Sections
+ DwarfCUIndexSection =
+ Ctx->getWasmSection(".debug_cu_index", SectionKind::getMetadata(), 0);
+ DwarfTUIndexSection =
+ Ctx->getWasmSection(".debug_tu_index", SectionKind::getMetadata(), 0);
+
// Wasm use data section for LSDA.
// TODO Consider putting each function's exception table in a separate
// section, as in -function-sections, to facilitate lld's --gc-section.
@@ -826,23 +870,31 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
// csect for program code.
TextSection = Ctx->getXCOFFSection(
".text", XCOFF::StorageMappingClass::XMC_PR, XCOFF::XTY_SD,
- XCOFF::C_HIDEXT, SectionKind::getText());
+ SectionKind::getText(), /* MultiSymbolsAllowed*/ true);
DataSection = Ctx->getXCOFFSection(
".data", XCOFF::StorageMappingClass::XMC_RW, XCOFF::XTY_SD,
- XCOFF::C_HIDEXT, SectionKind::getData());
+ SectionKind::getData(), /* MultiSymbolsAllowed*/ true);
ReadOnlySection = Ctx->getXCOFFSection(
".rodata", XCOFF::StorageMappingClass::XMC_RO, XCOFF::XTY_SD,
- XCOFF::C_HIDEXT, SectionKind::getReadOnly());
+ SectionKind::getReadOnly(), /* MultiSymbolsAllowed*/ true);
- TOCBaseSection = Ctx->getXCOFFSection(
- "TOC", XCOFF::StorageMappingClass::XMC_TC0, XCOFF::XTY_SD,
- XCOFF::C_HIDEXT, SectionKind::getData());
+ TOCBaseSection =
+ Ctx->getXCOFFSection("TOC", XCOFF::StorageMappingClass::XMC_TC0,
+ XCOFF::XTY_SD, SectionKind::getData());
// The TOC-base always has 0 size, but 4 byte alignment.
TOCBaseSection->setAlignment(Align(4));
+ LSDASection = Ctx->getXCOFFSection(".gcc_except_table",
+ XCOFF::StorageMappingClass::XMC_RO,
+ XCOFF::XTY_SD, SectionKind::getReadOnly());
+
+ CompactUnwindSection =
+ Ctx->getXCOFFSection(".eh_info_table", XCOFF::StorageMappingClass::XMC_RW,
+ XCOFF::XTY_SD, SectionKind::getData());
+
// DWARF sections for XCOFF are not csects. They are special STYP_DWARF
// sections, and the individual DWARF sections are distinguished by their
// section subtype.
@@ -906,6 +958,9 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
Env = IsWasm;
initWasmMCObjectFileInfo(TT);
break;
+ case Triple::GOFF:
+ report_fatal_error("Cannot initialize MC for GOFF object file format");
+ break;
case Triple::XCOFF:
Env = IsXCOFF;
initXCOFFMCObjectFileInfo(TT);
@@ -922,9 +977,12 @@ MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name,
case Triple::ELF:
return Ctx->getELFSection(Name, ELF::SHT_PROGBITS, ELF::SHF_GROUP, 0,
utostr(Hash));
+ case Triple::Wasm:
+ return Ctx->getWasmSection(Name, SectionKind::getMetadata(), utostr(Hash),
+ MCContext::GenericSectionID);
case Triple::MachO:
case Triple::COFF:
- case Triple::Wasm:
+ case Triple::GOFF:
case Triple::XCOFF:
case Triple::UnknownObjectFormat:
report_fatal_error("Cannot get DWARF comdat section for this object file "
@@ -948,6 +1006,64 @@ MCObjectFileInfo::getStackSizesSection(const MCSection &TextSec) const {
}
return Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, Flags, 0,
- GroupName, MCSection::NonUniqueID,
+ GroupName, ElfSec.getUniqueID(),
cast<MCSymbolELF>(TextSec.getBeginSymbol()));
}
+
+MCSection *
+MCObjectFileInfo::getBBAddrMapSection(const MCSection &TextSec) const {
+ if (Env != IsELF)
+ return nullptr;
+
+ const MCSectionELF &ElfSec = static_cast<const MCSectionELF &>(TextSec);
+ unsigned Flags = ELF::SHF_LINK_ORDER;
+ StringRef GroupName;
+ if (const MCSymbol *Group = ElfSec.getGroup()) {
+ GroupName = Group->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
+ // Use the text section's begin symbol and unique ID to create a separate
+ // .llvm_bb_addr_map section associated with every unique text section.
+ return Ctx->getELFSection(".llvm_bb_addr_map", ELF::SHT_LLVM_BB_ADDR_MAP,
+ Flags, 0, GroupName, ElfSec.getUniqueID(),
+ cast<MCSymbolELF>(TextSec.getBeginSymbol()));
+}
+
+MCSection *
+MCObjectFileInfo::getPseudoProbeSection(const MCSection *TextSec) const {
+ if (Env == IsELF) {
+ const auto *ElfSec = static_cast<const MCSectionELF *>(TextSec);
+ // Create a separate section for probes that comes with a comdat function.
+ if (const MCSymbol *Group = ElfSec->getGroup()) {
+ auto *S = static_cast<MCSectionELF *>(PseudoProbeSection);
+ auto Flags = S->getFlags() | ELF::SHF_GROUP;
+ return Ctx->getELFSection(S->getName(), S->getType(), Flags,
+ S->getEntrySize(), Group->getName());
+ }
+ }
+ return PseudoProbeSection;
+}
+
+MCSection *
+MCObjectFileInfo::getPseudoProbeDescSection(StringRef FuncName) const {
+ if (Env == IsELF) {
+ // Create a separate comdat group for each function's descriptor in order
+ // for the linker to deduplicate. The duplication, must be from different
+ // tranlation unit, can come from:
+ // 1. Inline functions defined in header files;
+ // 2. ThinLTO imported funcions;
+ // 3. Weak-linkage definitions.
+ // Use a concatenation of the section name and the function name as the
+ // group name so that descriptor-only groups won't be folded with groups of
+ // code.
+ if (TT.supportsCOMDAT() && !FuncName.empty()) {
+ auto *S = static_cast<MCSectionELF *>(PseudoProbeDescSection);
+ auto Flags = S->getFlags() | ELF::SHF_GROUP;
+ return Ctx->getELFSection(S->getName(), S->getType(), Flags,
+ S->getEntrySize(),
+ S->getName() + "_" + FuncName);
+ }
+ }
+ return PseudoProbeDescSection;
+}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp
index e39c4a03bc1e..1c23d31f8744 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp
@@ -248,7 +248,7 @@ void MCObjectStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
}
MCSymbol *MCObjectStreamer::emitCFILabel() {
- MCSymbol *Label = getContext().createTempSymbol("cfi", true);
+ MCSymbol *Label = getContext().createTempSymbol("cfi");
emitLabel(Label);
return Label;
}
@@ -665,6 +665,68 @@ void MCObjectStreamer::emitGPRel64Value(const MCExpr *Value) {
DF->getContents().resize(DF->getContents().size() + 8, 0);
}
+static Optional<std::pair<bool, std::string>>
+getOffsetAndDataFragment(const MCSymbol &Symbol, uint32_t &RelocOffset,
+ MCDataFragment *&DF) {
+ if (Symbol.isVariable()) {
+ const MCExpr *SymbolExpr = Symbol.getVariableValue();
+ MCValue OffsetVal;
+ if(!SymbolExpr->evaluateAsRelocatable(OffsetVal, nullptr, nullptr))
+ return std::make_pair(false,
+ std::string("symbol in .reloc offset is not "
+ "relocatable"));
+ if (OffsetVal.isAbsolute()) {
+ RelocOffset = OffsetVal.getConstant();
+ MCFragment *Fragment = Symbol.getFragment();
+ // FIXME Support symbols with no DF. For example:
+ // .reloc .data, ENUM_VALUE, <some expr>
+ if (!Fragment || Fragment->getKind() != MCFragment::FT_Data)
+ return std::make_pair(false,
+ std::string("symbol in offset has no data "
+ "fragment"));
+ DF = cast<MCDataFragment>(Fragment);
+ return None;
+ }
+
+ if (OffsetVal.getSymB())
+ return std::make_pair(false,
+ std::string(".reloc symbol offset is not "
+ "representable"));
+
+ const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*OffsetVal.getSymA());
+ if (!SRE.getSymbol().isDefined())
+ return std::make_pair(false,
+ std::string("symbol used in the .reloc offset is "
+ "not defined"));
+
+ if (SRE.getSymbol().isVariable())
+ return std::make_pair(false,
+ std::string("symbol used in the .reloc offset is "
+ "variable"));
+
+ MCFragment *Fragment = SRE.getSymbol().getFragment();
+ // FIXME Support symbols with no DF. For example:
+ // .reloc .data, ENUM_VALUE, <some expr>
+ if (!Fragment || Fragment->getKind() != MCFragment::FT_Data)
+ return std::make_pair(false,
+ std::string("symbol in offset has no data "
+ "fragment"));
+ RelocOffset = SRE.getSymbol().getOffset() + OffsetVal.getConstant();
+ DF = cast<MCDataFragment>(Fragment);
+ } else {
+ RelocOffset = Symbol.getOffset();
+ MCFragment *Fragment = Symbol.getFragment();
+ // FIXME Support symbols with no DF. For example:
+ // .reloc .data, ENUM_VALUE, <some expr>
+ if (!Fragment || Fragment->getKind() != MCFragment::FT_Data)
+ return std::make_pair(false,
+ std::string("symbol in offset has no data "
+ "fragment"));
+ DF = cast<MCDataFragment>(Fragment);
+ }
+ return None;
+}
+
Optional<std::pair<bool, std::string>>
MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
const MCExpr *Expr, SMLoc Loc,
@@ -698,10 +760,17 @@ MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
std::string(".reloc offset is not representable"));
const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*OffsetVal.getSymA());
- if (SRE.getSymbol().isDefined()) {
- // FIXME SRE.getSymbol() may not be relative to DF.
+ const MCSymbol &Symbol = SRE.getSymbol();
+ if (Symbol.isDefined()) {
+ uint32_t SymbolOffset = 0;
+ Optional<std::pair<bool, std::string>> Error;
+ Error = getOffsetAndDataFragment(Symbol, SymbolOffset, DF);
+
+ if (Error != None)
+ return Error;
+
DF->getFixups().push_back(
- MCFixup::create(SRE.getSymbol().getOffset() + OffsetVal.getConstant(),
+ MCFixup::create(SymbolOffset + OffsetVal.getConstant(),
Expr, Kind, Loc));
return None;
}
@@ -750,6 +819,16 @@ void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size,
insert(new MCFillFragment(Expr, Size, NumValues, Loc));
}
+void MCObjectStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLength,
+ SMLoc Loc) {
+ // Emit an NOP fragment.
+ MCDataFragment *DF = getOrCreateDataFragment();
+ flushPendingLabels(DF, DF->getContents().size());
+
+ assert(getCurrentSectionOnly() && "need a section");
+ insert(new MCNopsFragment(NumBytes, ControlledNopLength, Loc));
+}
+
void MCObjectStreamer::emitFileDirective(StringRef Filename) {
getAssembler().addFileName(Filename);
}
@@ -773,6 +852,9 @@ void MCObjectStreamer::finishImpl() {
// Dump out the dwarf file & directory tables and line tables.
MCDwarfLineTable::Emit(this, getAssembler().getDWARFLinetableParams());
+ // Emit pseudo probes for the current module.
+ MCPseudoProbeTable::emit(this);
+
// Update any remaining pending labels with empty data fragments.
flushPendingLabels();
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp
index 5a571c7c0c0e..1fa22ab000f0 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SaveAndRestore.h"
#include <cassert>
@@ -63,6 +64,12 @@ int AsmLexer::getNextChar() {
return (unsigned char)*CurPtr++;
}
+int AsmLexer::peekNextChar() {
+ if (CurPtr == CurBuf.end())
+ return EOF;
+ return (unsigned char)*CurPtr;
+}
+
/// The leading integral digit sequence and dot should have already been
/// consumed, some or all of the fractional digit sequence *can* have been
/// consumed.
@@ -271,13 +278,34 @@ static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
return DefaultRadix;
}
-static AsmToken intToken(StringRef Ref, APInt &Value)
-{
+static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
+ while (hexDigitValue(*CurPtr) < DefaultRadix) {
+ ++CurPtr;
+ }
+ return CurPtr;
+}
+
+static AsmToken intToken(StringRef Ref, APInt &Value) {
if (Value.isIntN(64))
return AsmToken(AsmToken::Integer, Ref, Value);
return AsmToken(AsmToken::BigNum, Ref, Value);
}
+static std::string radixName(unsigned Radix) {
+ switch (Radix) {
+ case 2:
+ return "binary";
+ case 8:
+ return "octal";
+ case 10:
+ return "decimal";
+ case 16:
+ return "hexadecimal";
+ default:
+ return "base-" + std::to_string(Radix);
+ }
+}
+
/// LexDigit: First character is [0-9].
/// Local Label: [0-9][:]
/// Forward/Backward Label: [0-9][fb]
@@ -286,16 +314,51 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
/// Decimal integer: [1-9][0-9]*
AsmToken AsmLexer::LexDigit() {
- // MASM-flavor binary integer: [01]+[bB]
+ // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY])
+ // MASM-flavor octal integer: [0-7]+[oOqQ]
+ // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT])
// MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
if (LexMasmIntegers && isdigit(CurPtr[-1])) {
- const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
- CurPtr - 1 : nullptr;
+ const char *FirstNonBinary =
+ (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
+ const char *FirstNonDecimal =
+ (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
const char *OldCurPtr = CurPtr;
while (isHexDigit(*CurPtr)) {
- if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
- FirstNonBinary = CurPtr;
+ switch (*CurPtr) {
+ default:
+ if (!FirstNonDecimal) {
+ FirstNonDecimal = CurPtr;
+ }
+ LLVM_FALLTHROUGH;
+ case '9':
+ case '8':
+ case '7':
+ case '6':
+ case '5':
+ case '4':
+ case '3':
+ case '2':
+ if (!FirstNonBinary) {
+ FirstNonBinary = CurPtr;
+ }
+ break;
+ case '1':
+ case '0':
+ break;
+ }
+ ++CurPtr;
+ }
+ if (*CurPtr == '.') {
+ // MASM float literals (other than hex floats) always contain a ".", and
+ // are always written in decimal.
+ ++CurPtr;
+ return LexFloatLiteral();
+ }
+
+ if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) {
++CurPtr;
+ return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
}
unsigned Radix = 0;
@@ -303,28 +366,61 @@ AsmToken AsmLexer::LexDigit() {
// hexadecimal number
++CurPtr;
Radix = 16;
+ } else if (*CurPtr == 't' || *CurPtr == 'T') {
+ // decimal number
+ ++CurPtr;
+ Radix = 10;
+ } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
+ *CurPtr == 'Q') {
+ // octal number
+ ++CurPtr;
+ Radix = 8;
+ } else if (*CurPtr == 'y' || *CurPtr == 'Y') {
+ // binary number
+ ++CurPtr;
+ Radix = 2;
+ } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
+ DefaultRadix < 14 &&
+ (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
+ Radix = 10;
} else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
- (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
+ DefaultRadix < 12 &&
+ (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
Radix = 2;
+ }
- if (Radix == 2 || Radix == 16) {
+ if (Radix) {
StringRef Result(TokStart, CurPtr - TokStart);
APInt Value(128, 0, true);
if (Result.drop_back().getAsInteger(Radix, Value))
- return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
- "invalid hexdecimal number");
+ return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
// MSVC accepts and ignores type suffices on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);
return intToken(Result, Value);
- }
+ }
- // octal/decimal integers, or floating point numbers, fall through
+ // default-radix integers, or floating point numbers, fall through
CurPtr = OldCurPtr;
}
+ // MASM default-radix integers: [0-9a-fA-F]+
+ // (All other integer literals have a radix specifier.)
+ if (LexMasmIntegers && UseMasmDefaultRadix) {
+ CurPtr = findLastDigit(CurPtr, 16);
+ StringRef Result(TokStart, CurPtr - TokStart);
+
+ APInt Value(128, 0, true);
+ if (Result.getAsInteger(DefaultRadix, Value)) {
+ return ReturnError(TokStart,
+ "invalid " + radixName(DefaultRadix) + " number");
+ }
+
+ return intToken(Result, Value);
+ }
+
// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
@@ -339,13 +435,9 @@ AsmToken AsmLexer::LexDigit() {
StringRef Result(TokStart, CurPtr - TokStart);
APInt Value(128, 0, true);
- if (Result.getAsInteger(Radix, Value))
- return ReturnError(TokStart, !isHex ? "invalid decimal number" :
- "invalid hexdecimal number");
-
- // Consume the [hH].
- if (LexMasmIntegers && Radix == 16)
- ++CurPtr;
+ if (Result.getAsInteger(Radix, Value)) {
+ return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
+ }
// The darwin/x86 (and x86-64) assembler accepts and ignores type
// suffices on integer literals.
@@ -416,11 +508,9 @@ AsmToken AsmLexer::LexDigit() {
// Either octal or hexadecimal.
APInt Value(128, 0, true);
unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
- bool isHex = Radix == 16;
StringRef Result(TokStart, CurPtr - TokStart);
if (Result.getAsInteger(Radix, Value))
- return ReturnError(TokStart, !isHex ? "invalid octal number" :
- "invalid hexdecimal number");
+ return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
// Consume the [hH].
if (Radix == 16)
@@ -437,6 +527,24 @@ AsmToken AsmLexer::LexDigit() {
AsmToken AsmLexer::LexSingleQuote() {
int CurChar = getNextChar();
+ if (LexMasmStrings) {
+ while (CurChar != EOF) {
+ if (CurChar != '\'') {
+ CurChar = getNextChar();
+ } else if (peekNextChar() == '\'') {
+ // In MASM single-quote strings, doubled single-quotes mean an escaped
+ // single quote, so should be lexed in.
+ getNextChar();
+ CurChar = getNextChar();
+ } else {
+ break;
+ }
+ }
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated string constant");
+ return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
+ }
+
if (CurChar == '\\')
CurChar = getNextChar();
@@ -471,6 +579,24 @@ AsmToken AsmLexer::LexSingleQuote() {
/// LexQuote: String: "..."
AsmToken AsmLexer::LexQuote() {
int CurChar = getNextChar();
+ if (LexMasmStrings) {
+ while (CurChar != EOF) {
+ if (CurChar != '"') {
+ CurChar = getNextChar();
+ } else if (peekNextChar() == '"') {
+ // In MASM double-quoted strings, doubled double-quotes mean an escaped
+ // double quote, so should be lexed in.
+ getNextChar();
+ CurChar = getNextChar();
+ } else {
+ break;
+ }
+ }
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated string constant");
+ return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
+ }
+
// TODO: does gas allow multiline string constants?
while (CurChar != '"') {
if (CurChar == '\\') {
@@ -589,7 +715,7 @@ AsmToken AsmLexer::LexToken() {
if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
IsAtStartOfLine = true;
IsAtStartOfStatement = true;
- return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+ return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
}
IsAtStartOfLine = false;
bool OldIsAtStartOfStatement = IsAtStartOfStatement;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp
index c05f26cbdda5..c5ff241ead74 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -126,6 +126,7 @@ private:
SourceMgr::DiagHandlerTy SavedDiagHandler;
void *SavedDiagContext;
std::unique_ptr<MCAsmParserExtension> PlatformParser;
+ SMLoc StartTokLoc;
/// This is the current buffer index we're lexing from as managed by the
/// SourceMgr object.
@@ -244,7 +245,8 @@ public:
bool parseExpression(const MCExpr *&Res);
bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
- bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
+ bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
+ AsmTypeInfo *TypeInfo) override;
bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
SMLoc &EndLoc) override;
@@ -513,6 +515,7 @@ private:
DK_PRINT,
DK_ADDRSIG,
DK_ADDRSIG_SYM,
+ DK_PSEUDO_PROBE,
DK_END
};
@@ -676,6 +679,9 @@ private:
// .print <double-quotes-string>
bool parseDirectivePrint(SMLoc DirectiveLoc);
+ // .pseudoprobe
+ bool parseDirectivePseudoProbe();
+
// Directives to support address-significance tables.
bool parseDirectiveAddrsig();
bool parseDirectiveAddrsigSym();
@@ -708,6 +714,8 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
// Set our own handler which calls the saved handler.
SrcMgr.setDiagHandler(DiagHandler, this);
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
+ // Make MCStreamer aware of the StartTokLoc for locations in diagnostics.
+ Out.setStartTokLocPtr(&StartTokLoc);
// Initialize the platform / file format parser.
switch (Ctx.getObjectFileInfo()->getObjectFileType()) {
@@ -741,6 +749,8 @@ AsmParser::~AsmParser() {
assert((HadError || ActiveMacros.empty()) &&
"Unexpected active macro instantiation!");
+ // Remove MCStreamer's reference to the parser SMLoc.
+ Out.setStartTokLocPtr(nullptr);
// Restore the saved diagnostics handler and context for use during
// finalization.
SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
@@ -988,7 +998,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Finalize the output stream if there are no errors and if the client wants
// us to.
if (!HadError && !NoFinalize)
- Out.Finish();
+ Out.Finish(Lexer.getLoc());
return HadError || getContext().hadError();
}
@@ -1068,7 +1078,8 @@ bool AsmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
/// primaryexpr ::= number
/// primaryexpr ::= '.'
/// primaryexpr ::= ~,+,- primaryexpr
-bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
+ AsmTypeInfo *TypeInfo) {
SMLoc FirstTokenLoc = getLexer().getLoc();
AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
switch (FirstTokenKind) {
@@ -1079,7 +1090,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return true;
case AsmToken::Exclaim:
Lex(); // Eat the operator.
- if (parsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc, TypeInfo))
return true;
Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
return false;
@@ -1238,19 +1249,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return parseBracketExpr(Res, EndLoc);
case AsmToken::Minus:
Lex(); // Eat the operator.
- if (parsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc, TypeInfo))
return true;
Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
return false;
case AsmToken::Plus:
Lex(); // Eat the operator.
- if (parsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc, TypeInfo))
return true;
Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
return false;
case AsmToken::Tilde:
Lex(); // Eat the operator.
- if (parsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc, TypeInfo))
return true;
Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
return false;
@@ -1497,8 +1508,6 @@ static unsigned getDarwinBinOpPrecedence(AsmToken::TokenKind K,
return 1;
// Low Precedence: |, &, ^
- //
- // FIXME: gas seems to support '!' as an infix operator?
case AsmToken::Pipe:
Kind = MCBinaryExpr::Or;
return 2;
@@ -1559,7 +1568,8 @@ static unsigned getDarwinBinOpPrecedence(AsmToken::TokenKind K,
}
}
-static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
+static unsigned getGNUBinOpPrecedence(const MCAsmInfo &MAI,
+ AsmToken::TokenKind K,
MCBinaryExpr::Opcode &Kind,
bool ShouldUseLogicalShr) {
switch (K) {
@@ -1603,12 +1613,18 @@ static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
Kind = MCBinaryExpr::Sub;
return 4;
- // High Intermediate Precedence: |, &, ^
+ // High Intermediate Precedence: |, !, &, ^
//
- // FIXME: gas seems to support '!' as an infix operator?
case AsmToken::Pipe:
Kind = MCBinaryExpr::Or;
return 5;
+ case AsmToken::Exclaim:
+ // Hack to support ARM compatible aliases (implied 'sp' operand in 'srs*'
+ // instructions like 'srsda #31!') and not parse ! as an infix operator.
+ if (MAI.getCommentString() == "@")
+ return 0;
+ Kind = MCBinaryExpr::OrNot;
+ return 5;
case AsmToken::Caret:
Kind = MCBinaryExpr::Xor;
return 5;
@@ -1639,7 +1655,7 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
MCBinaryExpr::Opcode &Kind) {
bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
return IsDarwin ? getDarwinBinOpPrecedence(K, Kind, ShouldUseLogicalShr)
- : getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr);
+ : getGNUBinOpPrecedence(MAI, K, Kind, ShouldUseLogicalShr);
}
/// Parse all binary operators with precedence >= 'Precedence'.
@@ -1698,6 +1714,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
SMLoc IDLoc = ID.getLoc();
StringRef IDVal;
int64_t LocalLabelVal = -1;
+ StartTokLoc = ID.getLoc();
if (Lexer.is(AsmToken::HashDirective))
return parseCppHashLineFilenameComment(IDLoc,
!isInsideMacroInstantiation());
@@ -2189,6 +2206,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveAddrsig();
case DK_ADDRSIG_SYM:
return parseDirectiveAddrsigSym();
+ case DK_PSEUDO_PROBE:
+ return parseDirectivePseudoProbe();
}
return Error(IDLoc, "unknown directive");
@@ -2989,13 +3008,20 @@ bool AsmParser::parseAngleBracketString(std::string &Data) {
}
/// parseDirectiveAscii:
-/// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
+// ::= .ascii [ "string"+ ( , "string"+ )* ]
+/// ::= ( .asciz | .string ) [ "string" ( , "string" )* ]
bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
auto parseOp = [&]() -> bool {
std::string Data;
- if (checkForValidSection() || parseEscapedString(Data))
+ if (checkForValidSection())
return true;
- getStreamer().emitBytes(Data);
+ // Only support spaces as separators for .ascii directive for now. See the
+ // discusssion at https://reviews.llvm.org/D91460 for more details.
+ do {
+ if (parseEscapedString(Data))
+ return true;
+ getStreamer().emitBytes(Data);
+ } while (!ZeroTerminated && getTok().is(AsmToken::String));
if (ZeroTerminated)
getStreamer().emitBytes(StringRef("\0", 1));
return false;
@@ -3323,6 +3349,8 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
Alignment = 1;
if (!isPowerOf2_64(Alignment))
ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2");
+ if (!isUInt<32>(Alignment))
+ ReturnVal |= Error(AlignmentLoc, "alignment must be smaller than 2**32");
}
// Diagnose non-sensical max bytes to align.
@@ -4064,6 +4092,9 @@ bool AsmParser::parseDirectiveCFISections() {
Debug = true;
}
+ if (parseToken(AsmToken::EndOfStatement))
+ return addErrorSuffix(" in '.cfi_sections' directive");
+
getStreamer().emitCFISections(EH, Debug);
return false;
}
@@ -4091,6 +4122,8 @@ bool AsmParser::parseDirectiveCFIStartProc() {
/// parseDirectiveCFIEndProc
/// ::= .cfi_endproc
bool AsmParser::parseDirectiveCFIEndProc() {
+ if (parseToken(AsmToken::EndOfStatement))
+ return addErrorSuffix(" in '.cfi_endproc' directive");
getStreamer().emitCFIEndProc();
return false;
}
@@ -5502,6 +5535,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".print"] = DK_PRINT;
DirectiveKindMap[".addrsig"] = DK_ADDRSIG;
DirectiveKindMap[".addrsig_sym"] = DK_ADDRSIG_SYM;
+ DirectiveKindMap[".pseudoprobe"] = DK_PSEUDO_PROBE;
}
MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
@@ -5757,6 +5791,69 @@ bool AsmParser::parseDirectiveAddrsigSym() {
return false;
}
+bool AsmParser::parseDirectivePseudoProbe() {
+ int64_t Guid;
+ int64_t Index;
+ int64_t Type;
+ int64_t Attr;
+
+ if (getLexer().is(AsmToken::Integer)) {
+ if (parseIntToken(Guid, "unexpected token in '.pseudoprobe' directive"))
+ return true;
+ }
+
+ if (getLexer().is(AsmToken::Integer)) {
+ if (parseIntToken(Index, "unexpected token in '.pseudoprobe' directive"))
+ return true;
+ }
+
+ if (getLexer().is(AsmToken::Integer)) {
+ if (parseIntToken(Type, "unexpected token in '.pseudoprobe' directive"))
+ return true;
+ }
+
+ if (getLexer().is(AsmToken::Integer)) {
+ if (parseIntToken(Attr, "unexpected token in '.pseudoprobe' directive"))
+ return true;
+ }
+
+ // Parse inline stack like @ GUID:11:12 @ GUID:1:11 @ GUID:3:21
+ MCPseudoProbeInlineStack InlineStack;
+
+ while (getLexer().is(AsmToken::At)) {
+ // eat @
+ Lex();
+
+ int64_t CallerGuid = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ if (parseIntToken(CallerGuid,
+ "unexpected token in '.pseudoprobe' directive"))
+ return true;
+ }
+
+ // eat colon
+ if (getLexer().is(AsmToken::Colon))
+ Lex();
+
+ int64_t CallerProbeId = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ if (parseIntToken(CallerProbeId,
+ "unexpected token in '.pseudoprobe' directive"))
+ return true;
+ }
+
+ InlineSite Site(CallerGuid, CallerProbeId);
+ InlineStack.push_back(Site);
+ }
+
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.pseudoprobe' directive"))
+ return true;
+
+ getStreamer().emitPseudoProbe(Guid, Index, Type, Attr, InlineStack);
+ return false;
+}
+
// We are comparing pointers, but the pointers are relative to a single string.
// Thus, this should always be deterministic.
static int rewritesSort(const AsmRewrite *AsmRewriteA,
@@ -5878,7 +5975,7 @@ bool AsmParser::parseMSInlineAsm(
// Consider implicit defs to be clobbers. Think of cpuid and push.
ArrayRef<MCPhysReg> ImpDefs(Desc.getImplicitDefs(),
Desc.getNumImplicitDefs());
- ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end());
+ llvm::append_range(ClobberRegs, ImpDefs);
}
// Set the number of Outputs and Inputs.
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index 2104fb83b309..3ac6a883417e 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -77,6 +77,8 @@ class COFFAsmParser : public MCAsmParserExtension {
".seh_proc");
addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProc>(
".seh_endproc");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndFuncletOrFunc>(
+ ".seh_endfunclet");
addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartChained>(
".seh_startchained");
addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndChained>(
@@ -131,6 +133,7 @@ class COFFAsmParser : public MCAsmParserExtension {
// Win64 EH directives.
bool ParseSEHDirectiveStartProc(StringRef, SMLoc);
bool ParseSEHDirectiveEndProc(StringRef, SMLoc);
+ bool ParseSEHDirectiveEndFuncletOrFunc(StringRef, SMLoc);
bool ParseSEHDirectiveStartChained(StringRef, SMLoc);
bool ParseSEHDirectiveEndChained(StringRef, SMLoc);
bool ParseSEHDirectiveHandler(StringRef, SMLoc);
@@ -139,7 +142,6 @@ class COFFAsmParser : public MCAsmParserExtension {
bool ParseSEHDirectiveEndProlog(StringRef, SMLoc);
bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except);
- bool ParseSEHRegisterNumber(unsigned &RegNo);
bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc);
public:
@@ -629,6 +631,12 @@ bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc Loc) {
return false;
}
+bool COFFAsmParser::ParseSEHDirectiveEndFuncletOrFunc(StringRef, SMLoc Loc) {
+ Lex();
+ getStreamer().EmitWinCFIFuncletOrFuncEnd(Loc);
+ return false;
+}
+
bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc Loc) {
Lex();
getStreamer().EmitWinCFIStartChained(Loc);
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index b7c48e92961b..95128cf7d184 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -21,6 +20,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/SMLoc.h"
#include <cassert>
@@ -53,6 +53,11 @@ class COFFMasmParser : public MCAsmParserExtension {
bool ParseDirectiveSegmentEnd(StringRef, SMLoc);
bool ParseDirectiveIncludelib(StringRef, SMLoc);
+ bool ParseDirectiveAlias(StringRef, SMLoc);
+
+ bool ParseSEHDirectiveAllocStack(StringRef, SMLoc);
+ bool ParseSEHDirectiveEndProlog(StringRef, SMLoc);
+
bool IgnoreDirective(StringRef, SMLoc) {
while (!getLexer().is(AsmToken::EndOfStatement)) {
Lex();
@@ -65,13 +70,10 @@ class COFFMasmParser : public MCAsmParserExtension {
MCAsmParserExtension::Initialize(Parser);
// x64 directives
- // .allocstack
- // .endprolog
- // .pushframe
- // .pushreg
- // .savereg
- // .savexmm128
- // .setframe
+ addDirectiveHandler<&COFFMasmParser::ParseSEHDirectiveAllocStack>(
+ ".allocstack");
+ addDirectiveHandler<&COFFMasmParser::ParseSEHDirectiveEndProlog>(
+ ".endprolog");
// Code label directives
// label
@@ -92,16 +94,11 @@ class COFFMasmParser : public MCAsmParserExtension {
// Data allocation directives
// align
- // byte/sbyte
- // dword/sdword
// even
- // fword
- // qword
- // real4
- // real8
- // real10
+ // mmword
// tbyte
- // word/sword
+ // xmmword
+ // ymmword
// Listing control directives
addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".cref");
@@ -120,27 +117,18 @@ class COFFMasmParser : public MCAsmParserExtension {
addDirectiveHandler<&COFFMasmParser::IgnoreDirective>("title");
// Macro directives
- // endm
- // exitm
// goto
- // local
- // macro
- // purge
// Miscellaneous directives
- // alias
+ addDirectiveHandler<&COFFMasmParser::ParseDirectiveAlias>("alias");
// assume
// .fpo
addDirectiveHandler<&COFFMasmParser::ParseDirectiveIncludelib>(
"includelib");
- // mmword
// option
// popcontext
// pushcontext
- // .radix
// .safeseh
- // xmmword
- // ymmword
// Procedure directives
addDirectiveHandler<&COFFMasmParser::ParseDirectiveEndProc>("endp");
@@ -148,7 +136,7 @@ class COFFMasmParser : public MCAsmParserExtension {
addDirectiveHandler<&COFFMasmParser::ParseDirectiveProc>("proc");
// proto
- // Processor directives
+ // Processor directives; all ignored
addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".386");
addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".386P");
addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".387");
@@ -162,13 +150,6 @@ class COFFMasmParser : public MCAsmParserExtension {
addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".mmx");
addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".xmm");
- // Repeat blocks directives
- // for
- // forc
- // goto
- // repeat
- // while
-
// Scope directives
// comm
// externdef
@@ -202,11 +183,8 @@ class COFFMasmParser : public MCAsmParserExtension {
// substr (equivalent to <name> TEXTEQU @SubStr(<params>))
// Structure and record directives
- // ends
// record
- // struct
// typedef
- // union
}
bool ParseSectionDirectiveCode(StringRef, SMLoc) {
@@ -234,6 +212,7 @@ class COFFMasmParser : public MCAsmParserExtension {
}
StringRef CurrentProcedure;
+ bool CurrentProcedureFramed;
public:
COFFMasmParser() = default;
@@ -353,16 +332,23 @@ bool COFFMasmParser::ParseDirectiveProc(StringRef Directive, SMLoc Loc) {
nextLoc = getTok().getLoc();
}
}
- MCSymbol *Sym = getContext().getOrCreateSymbol(Label);
-
- // Define symbol as simple function
- getStreamer().BeginCOFFSymbolDef(Sym);
- getStreamer().EmitCOFFSymbolStorageClass(2);
- getStreamer().EmitCOFFSymbolType(0x20);
- getStreamer().EndCOFFSymbolDef();
-
+ MCSymbolCOFF *Sym = cast<MCSymbolCOFF>(getContext().getOrCreateSymbol(Label));
+
+ // Define symbol as simple external function
+ Sym->setExternal(true);
+ Sym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT);
+
+ bool Framed = false;
+ if (getLexer().is(AsmToken::Identifier) &&
+ getTok().getString().equals_lower("frame")) {
+ Lex();
+ Framed = true;
+ getStreamer().EmitWinCFIStartProc(Sym, Loc);
+ }
getStreamer().emitLabel(Sym, Loc);
+
CurrentProcedure = Label;
+ CurrentProcedureFramed = Framed;
return false;
}
bool COFFMasmParser::ParseDirectiveEndProc(StringRef Directive, SMLoc Loc) {
@@ -376,6 +362,49 @@ bool COFFMasmParser::ParseDirectiveEndProc(StringRef Directive, SMLoc Loc) {
else if (CurrentProcedure != Label)
return Error(LabelLoc, "endp does not match current procedure '" +
CurrentProcedure + "'");
+
+ if (CurrentProcedureFramed) {
+ getStreamer().EmitWinCFIEndProc(Loc);
+ }
+ CurrentProcedure = "";
+ CurrentProcedureFramed = false;
+ return false;
+}
+
+bool COFFMasmParser::ParseDirectiveAlias(StringRef Directive, SMLoc Loc) {
+ std::string AliasName, ActualName;
+ if (getTok().isNot(AsmToken::Less) ||
+ getParser().parseAngleBracketString(AliasName))
+ return Error(getTok().getLoc(), "expected <aliasName>");
+ if (getParser().parseToken(AsmToken::Equal))
+ return addErrorSuffix(" in " + Directive + " directive");
+ if (getTok().isNot(AsmToken::Less) ||
+ getParser().parseAngleBracketString(ActualName))
+ return Error(getTok().getLoc(), "expected <actualName>");
+
+ MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName);
+ MCSymbol *Actual = getContext().getOrCreateSymbol(ActualName);
+
+ getStreamer().emitWeakReference(Alias, Actual);
+
+ return false;
+}
+
+bool COFFMasmParser::ParseSEHDirectiveAllocStack(StringRef Directive,
+ SMLoc Loc) {
+ int64_t Size;
+ SMLoc SizeLoc = getTok().getLoc();
+ if (getParser().parseAbsoluteExpression(Size))
+ return Error(SizeLoc, "expected integer size");
+ if (Size % 8 != 0)
+ return Error(SizeLoc, "stack size must be a multiple of 8");
+ getStreamer().EmitWinCFIAllocStack(static_cast<unsigned>(Size), Loc);
+ return false;
+}
+
+bool COFFMasmParser::ParseSEHDirectiveEndProlog(StringRef Directive,
+ SMLoc Loc) {
+ getStreamer().EmitWinCFIEndProlog(Loc);
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index b670355a392b..926483451259 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -1152,6 +1152,7 @@ static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
case MachO::PLATFORM_IOSSIMULATOR: /* silence warning */ break;
case MachO::PLATFORM_TVOSSIMULATOR: /* silence warning */ break;
case MachO::PLATFORM_WATCHOSSIMULATOR: /* silence warning */ break;
+ case MachO::PLATFORM_DRIVERKIT: /* silence warning */ break;
}
llvm_unreachable("Invalid mach-o platform type");
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index fb8215ef2281..65ac1d6b5ba0 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -450,8 +450,14 @@ bool ELFAsmParser::parseLinkedToSym(MCSymbolELF *&LinkedToSym) {
Lex();
StringRef Name;
SMLoc StartLoc = L.getLoc();
- if (getParser().parseIdentifier(Name))
+ if (getParser().parseIdentifier(Name)) {
+ if (getParser().getTok().getString() == "0") {
+ getParser().Lex();
+ LinkedToSym = nullptr;
+ return false;
+ }
return TokError("invalid linked-to symbol");
+ }
LinkedToSym = dyn_cast_or_null<MCSymbolELF>(getContext().lookupSymbol(Name));
if (!LinkedToSym || !LinkedToSym->isInSection())
return Error(StartLoc, "linked-to symbol is not in a section: " + Name);
@@ -620,6 +626,8 @@ EndStmt:
Type = ELF::SHT_LLVM_DEPENDENT_LIBRARIES;
else if (TypeName == "llvm_sympart")
Type = ELF::SHT_LLVM_SYMPART;
+ else if (TypeName == "llvm_bb_addr_map")
+ Type = ELF::SHT_LLVM_BB_ADDR_MAP;
else if (TypeName.getAsInteger(0, Type))
return TokError("unknown section type");
}
@@ -654,7 +662,9 @@ EndStmt:
Error(loc, "changed section entsize for " + SectionName +
", expected: " + Twine(Section->getEntrySize()));
- if (getContext().getGenDwarfForAssembly()) {
+ if (getContext().getGenDwarfForAssembly() &&
+ (Section->getFlags() & ELF::SHF_ALLOC) &&
+ (Section->getFlags() & ELF::SHF_EXECINSTR)) {
bool InsertResult = getContext().addGenDwarfSection(Section);
if (InsertResult) {
if (getContext().getDwarfVersion() <= 2)
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp
index e2aaeaae03b0..4957ee7a0323 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -14,12 +14,14 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -50,6 +52,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -105,6 +108,9 @@ struct ParseStatementInfo {
/// Was there an error parsing the inline assembly?
bool ParseError = false;
+ /// The value associated with a macro exit.
+ Optional<std::string> ExitValue;
+
SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
ParseStatementInfo() = delete;
@@ -122,12 +128,14 @@ struct FieldInfo;
struct StructInfo {
StringRef Name;
bool IsUnion = false;
- size_t Alignment = 0;
- size_t Size = 0;
+ unsigned Alignment = 0;
+ unsigned Size = 0;
+ unsigned AlignmentSize = 0;
std::vector<FieldInfo> Fields;
StringMap<size_t> FieldsByName;
- FieldInfo &addField(StringRef FieldName, FieldType FT);
+ FieldInfo &addField(StringRef FieldName, FieldType FT,
+ unsigned FieldAlignmentSize);
StructInfo() = default;
@@ -317,30 +325,32 @@ struct FieldInfo {
size_t Offset = 0;
// Total size of the field (= LengthOf * Type).
- size_t SizeOf = 0;
+ unsigned SizeOf = 0;
// Number of elements in the field (1 if scalar, >1 if an array).
- size_t LengthOf = 0;
+ unsigned LengthOf = 0;
// Size of a single entry in this field, in bytes ("type" in MASM standards).
- size_t Type = 0;
+ unsigned Type = 0;
FieldInitializer Contents;
FieldInfo(FieldType FT) : Contents(FT) {}
};
-FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) {
+FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
+ unsigned FieldAlignmentSize) {
if (!FieldName.empty())
- FieldsByName[FieldName] = Fields.size();
+ FieldsByName[FieldName.lower()] = Fields.size();
Fields.emplace_back(FT);
FieldInfo &Field = Fields.back();
if (IsUnion) {
Field.Offset = 0;
} else {
- Size = llvm::alignTo(Size, Alignment);
+ Size = llvm::alignTo(Size, std::min(Alignment, FieldAlignmentSize));
Field.Offset = Size;
}
+ AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
return Field;
}
@@ -361,6 +371,7 @@ private:
/// This is the current buffer index we're lexing from as managed by the
/// SourceMgr object.
unsigned CurBuffer;
+ std::vector<bool> EndStatementAtEOFStack;
AsmCond TheCondState;
std::vector<AsmCond> TheCondStack;
@@ -386,8 +397,8 @@ private:
/// Maps struct tags to struct definitions.
StringMap<StructInfo> Structs;
- /// Maps data location names to user-defined types.
- StringMap<const StructInfo *> KnownType;
+ /// Maps data location names to types.
+ StringMap<AsmTypeInfo> KnownType;
/// Stack of active macro instantiations.
std::vector<MacroInstantiation*> ActiveMacros;
@@ -427,12 +438,12 @@ private:
/// Did we already inform the user about inconsistent MD5 usage?
bool ReportedInconsistentMD5 = false;
- // Is alt macro mode enabled.
- bool AltMacroMode = false;
-
// Current <...> expression depth.
unsigned AngleBracketDepth = 0U;
+ // Number of locals defined.
+ uint16_t LocalCounter = 0;
+
public:
MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
const MCAsmInfo &MAI, unsigned CB);
@@ -490,10 +501,13 @@ public:
bool isParsingMasm() const override { return true; }
- bool lookUpField(StringRef Name, StringRef &Type,
- unsigned &Offset) const override;
- bool lookUpField(StringRef Base, StringRef Member, StringRef &Type,
- unsigned &Offset) const override;
+ bool defineMacro(StringRef Name, StringRef Value) override;
+
+ bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
+ bool lookUpField(StringRef Base, StringRef Member,
+ AsmFieldInfo &Info) const override;
+
+ bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned &NumOutputs, unsigned &NumInputs,
@@ -505,7 +519,8 @@ public:
bool parseExpression(const MCExpr *&Res);
bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
- bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
+ bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
+ AsmTypeInfo *TypeInfo) override;
bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
SMLoc &EndLoc) override;
@@ -530,12 +545,10 @@ private:
bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
bool parseCppHashLineFilenameComment(SMLoc L);
- void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
- ArrayRef<MCAsmMacroParameter> Parameters);
bool expandMacro(raw_svector_ostream &OS, StringRef Body,
ArrayRef<MCAsmMacroParameter> Parameters,
- ArrayRef<MCAsmMacroArgument> A, bool EnableAtPseudoVariable,
- SMLoc L);
+ ArrayRef<MCAsmMacroArgument> A,
+ const std::vector<std::string> &Locals, SMLoc L);
/// Are we inside a macro instantiation?
bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
@@ -544,18 +557,33 @@ private:
///
/// \param M The macro.
/// \param NameLoc Instantiation location.
- bool handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc);
+ bool handleMacroEntry(
+ const MCAsmMacro *M, SMLoc NameLoc,
+ AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
+
+ /// Handle invocation of macro function.
+ ///
+ /// \param M The macro.
+ /// \param NameLoc Invocation location.
+ bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
/// Handle exit from macro instantiation.
void handleMacroExit();
/// Extract AsmTokens for a macro argument.
- bool parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg);
+ bool
+ parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
+ AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
/// Parse all macro arguments for a given macro.
- bool parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A);
+ bool
+ parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
+ AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
void printMacroInstantiations();
+
+ bool expandStatement(SMLoc Loc);
+
void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
SMRange Range = None) const {
ArrayRef<SMRange> Ranges(Range);
@@ -564,7 +592,7 @@ private:
static void DiagHandler(const SMDiagnostic &Diag, void *Context);
bool lookUpField(const StructInfo &Structure, StringRef Member,
- StringRef &Type, unsigned &Offset) const;
+ AsmFieldInfo &Info) const;
/// Should we emit DWARF describing this assembler source? (Returns false if
/// the source has .file directives, which means we don't want to generate
@@ -580,11 +608,19 @@ private:
///
/// \param InBuffer If not 0, should be the known buffer id that contains the
/// location.
- void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0);
-
- /// Parse up to the end of statement and a return the contents from the
- /// current token until the end of the statement; the current token on exit
- /// will be either the EndOfStatement or EOF.
+ void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
+ bool EndStatementAtEOF = true);
+
+ /// Parse up to a token of kind \p EndTok and return the contents from the
+ /// current token up to (but not including) this token; the current token on
+ /// exit will be either this kind or EOF. Reads through instantiated macro
+ /// functions and text macros.
+ SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
+ std::string parseStringTo(AsmToken::TokenKind EndTok);
+
+ /// Parse up to the end of statement and return the contents from the current
+ /// token until the end of the statement; the current token on exit will be
+ /// either the EndOfStatement or EOF.
StringRef parseStringToEndOfStatement() override;
bool parseTextItem(std::string &Data);
@@ -622,10 +658,12 @@ private:
DK_SQWORD,
DK_DB,
DK_DD,
+ DK_DF,
DK_DQ,
DK_DW,
DK_REAL4,
DK_REAL8,
+ DK_REAL10,
DK_ALIGN,
DK_ORG,
DK_ENDR,
@@ -634,9 +672,10 @@ private:
DK_COMM,
DK_COMMENT,
DK_INCLUDE,
- DK_REPT,
- DK_IRP,
- DK_IRPC,
+ DK_REPEAT,
+ DK_WHILE,
+ DK_FOR,
+ DK_FORC,
DK_IF,
DK_IFE,
DK_IFB,
@@ -697,12 +736,10 @@ private:
DK_CFI_REGISTER,
DK_CFI_WINDOW_SAVE,
DK_CFI_B_KEY_FRAME,
- DK_ALTMACRO,
- DK_NOALTMACRO,
DK_MACRO,
DK_EXITM,
DK_ENDM,
- DK_PURGEM,
+ DK_PURGE,
DK_ERR,
DK_ERRB,
DK_ERRNB,
@@ -718,13 +755,21 @@ private:
DK_STRUCT,
DK_UNION,
DK_ENDS,
- DK_END
+ DK_END,
+ DK_PUSHFRAME,
+ DK_PUSHREG,
+ DK_SAVEREG,
+ DK_SAVEXMM128,
+ DK_SETFRAME,
+ DK_RADIX,
};
/// Maps directive name --> DirectiveKind enum, for directives parsed by this
/// class.
StringMap<DirectiveKind> DirectiveKindMap;
+ bool isMacroLikeDirective();
+
// Codeview def_range type parsing.
enum CVDefRangeType {
CVDR_DEFRANGE = 0, // Placeholder
@@ -738,8 +783,6 @@ private:
/// def_range types parsed by this class.
StringMap<CVDefRangeType> CVDefRangeTypeMap;
- bool parseInitValue(unsigned Size);
-
// ".ascii", ".asciz", ".string"
bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
@@ -751,22 +794,24 @@ private:
bool parseScalarInstList(
unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
- bool emitIntegralValues(unsigned Size);
+ bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
bool addIntegralField(StringRef Name, unsigned Size);
bool parseDirectiveValue(StringRef IDVal, unsigned Size);
- bool parseDirectiveNamedValue(StringRef IDVal, unsigned Size, StringRef Name,
- SMLoc NameLoc);
-
- // "real4", "real8"
- bool emitRealValues(const fltSemantics &Semantics);
- bool addRealField(StringRef Name, const fltSemantics &Semantics);
- bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics);
+ bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
+ StringRef Name, SMLoc NameLoc);
+
+ // "real4", "real8", "real10"
+ bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
+ bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
+ bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
+ size_t Size);
bool parseRealInstList(
const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
- bool parseDirectiveNamedRealValue(StringRef IDVal,
+ bool parseDirectiveNamedRealValue(StringRef TypeName,
const fltSemantics &Semantics,
- StringRef Name, SMLoc NameLoc);
+ unsigned Size, StringRef Name,
+ SMLoc NameLoc);
bool parseOptionalAngleBracketOpen();
bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
@@ -794,8 +839,6 @@ private:
bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
- bool emitStructValue(const StructInfo &Structure);
-
bool emitFieldInitializer(const FieldInfo &Field,
const FieldInitializer &Initializer);
bool emitFieldInitializer(const FieldInfo &Field,
@@ -812,7 +855,7 @@ private:
const StructInitializer &Initializer);
// User-defined types (structs, unions):
- bool emitStructValues(const StructInfo &Structure);
+ bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
bool addStructField(StringRef Name, const StructInfo &Structure);
bool parseDirectiveStructValue(const StructInfo &Structure,
StringRef Directive, SMLoc DirLoc);
@@ -872,11 +915,10 @@ private:
// macro directives
bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
- bool parseDirectiveExitMacro(StringRef Directive);
+ bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
+ std::string &Value);
bool parseDirectiveEndMacro(StringRef Directive);
- bool parseDirectiveMacro(SMLoc DirectiveLoc);
- // alternate macro mode directives
- bool parseDirectiveAltmacro(StringRef Directive);
+ bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
StringRef Name, SMLoc NameLoc);
@@ -923,10 +965,12 @@ private:
MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
raw_svector_ostream &OS);
- bool parseDirectiveRept(SMLoc DirectiveLoc, StringRef Directive);
- bool parseDirectiveIrp(SMLoc DirectiveLoc); // ".irp"
- bool parseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc"
- bool parseDirectiveEndr(SMLoc DirectiveLoc); // ".endr"
+ void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
+ SMLoc ExitLoc, raw_svector_ostream &OS);
+ bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
+ bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
+ bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
+ bool parseDirectiveWhile(SMLoc DirectiveLoc);
// "_emit" or "__emit"
bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
@@ -951,6 +995,9 @@ private:
// ".erre" or ".errnz", depending on ExpectZero.
bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
+ // ".radix"
+ bool parseDirectiveRadix(SMLoc DirectiveLoc);
+
// "echo"
bool parseDirectiveEcho();
@@ -979,6 +1026,7 @@ MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
// Set our own handler which calls the saved handler.
SrcMgr.setDiagHandler(DiagHandler, this);
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
+ EndStatementAtEOFStack.push_back(true);
// Initialize the platform / file format parser.
switch (Ctx.getObjectFileInfo()->getObjectFileType()) {
@@ -1048,13 +1096,15 @@ bool MasmParser::enterIncludeFile(const std::string &Filename) {
CurBuffer = NewBuf;
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
+ EndStatementAtEOFStack.push_back(true);
return false;
}
-void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer) {
+void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
+ bool EndStatementAtEOF) {
CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
- Loc.getPointer());
+ Loc.getPointer(), EndStatementAtEOF);
}
const AsmToken &MasmParser::Lex() {
@@ -1072,8 +1122,11 @@ const AsmToken &MasmParser::Lex() {
const AsmToken *tok = &Lexer.Lex();
while (tok->is(AsmToken::Identifier)) {
- auto it = Variables.find(tok->getIdentifier());
+ auto it = Variables.find(tok->getIdentifier().lower());
+ const llvm::MCAsmMacro *M =
+ getContext().lookupMacro(tok->getIdentifier().lower());
if (it != Variables.end() && it->second.IsText) {
+ // This is a textmacro; expand it in place.
std::unique_ptr<MemoryBuffer> Instantiation =
MemoryBuffer::getMemBufferCopy(it->second.TextValue,
"<instantiation>");
@@ -1083,7 +1136,17 @@ const AsmToken &MasmParser::Lex() {
getTok().getEndLoc());
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
/*EndStatementAtEOF=*/false);
+ EndStatementAtEOFStack.push_back(false);
+ tok = &Lexer.Lex();
+ } else if (M && M->IsFunction && Lexer.peekTok().is(AsmToken::LParen)) {
+ // This is a macro function invocation; expand it in place.
+ const AsmToken MacroTok = *tok;
tok = &Lexer.Lex();
+ if (handleMacroInvocation(M, MacroTok.getLoc())) {
+ Lexer.UnLex(AsmToken(AsmToken::Error, MacroTok.getIdentifier()));
+ tok = &Lexer.Lex();
+ }
+ continue;
} else {
break;
}
@@ -1096,14 +1159,25 @@ const AsmToken &MasmParser::Lex() {
tok = &Lexer.Lex();
}
+ // Recognize and bypass line continuations.
+ while (tok->is(AsmToken::BackSlash) &&
+ Lexer.peekTok().is(AsmToken::EndOfStatement)) {
+ // Eat both the backslash and the end of statement.
+ Lexer.Lex();
+ tok = &Lexer.Lex();
+ }
+
if (tok->is(AsmToken::Eof)) {
// If this is the end of an included file, pop the parent file off the
// include stack.
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
- jumpToLoc(ParentIncludeLoc);
+ EndStatementAtEOFStack.pop_back();
+ jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
return Lex();
}
+ EndStatementAtEOFStack.pop_back();
+ assert(EndStatementAtEOFStack.empty());
}
return *tok;
@@ -1162,7 +1236,12 @@ bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
}
// While we have input, parse each statement.
- while (Lexer.isNot(AsmToken::Eof)) {
+ while (Lexer.isNot(AsmToken::Eof) ||
+ SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
+ // Skip through the EOF at the end of an inclusion.
+ if (Lexer.is(AsmToken::Eof))
+ Lex();
+
ParseStatementInfo Info(&AsmStrRewrites);
bool Parsed = parseStatement(Info, nullptr);
@@ -1240,7 +1319,7 @@ bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Finalize the output stream if there are no errors and if the client wants
// us to.
if (!HadError && !NoFinalize)
- Out.Finish();
+ Out.Finish(Lexer.getLoc());
return HadError || getContext().hadError();
}
@@ -1256,14 +1335,58 @@ bool MasmParser::checkForValidSection() {
/// Throw away the rest of the line for testing purposes.
void MasmParser::eatToEndOfStatement() {
- while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
+ while (Lexer.isNot(AsmToken::EndOfStatement)) {
+ if (Lexer.is(AsmToken::Eof)) {
+ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+ if (ParentIncludeLoc == SMLoc()) {
+ break;
+ }
+
+ EndStatementAtEOFStack.pop_back();
+ jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
+ }
+
Lexer.Lex();
+ }
// Eat EOL.
if (Lexer.is(AsmToken::EndOfStatement))
Lexer.Lex();
}
+SmallVector<StringRef, 1>
+MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
+ SmallVector<StringRef, 1> Refs;
+ const char *Start = getTok().getLoc().getPointer();
+ while (Lexer.isNot(EndTok)) {
+ if (Lexer.is(AsmToken::Eof)) {
+ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+ if (ParentIncludeLoc == SMLoc()) {
+ break;
+ }
+ Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
+
+ EndStatementAtEOFStack.pop_back();
+ jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
+ Lexer.Lex();
+ Start = getTok().getLoc().getPointer();
+ } else {
+ Lexer.Lex();
+ }
+ }
+ Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
+ return Refs;
+}
+
+std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
+ SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
+ std::string Str;
+ for (StringRef S : Refs) {
+ Str.append(S.str());
+ }
+ return Str;
+}
+
StringRef MasmParser::parseStringToEndOfStatement() {
const char *Start = getTok().getLoc().getPointer();
@@ -1308,8 +1431,11 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
/// primaryexpr ::= symbol
/// primaryexpr ::= number
/// primaryexpr ::= '.'
-/// primaryexpr ::= ~,+,- primaryexpr
-bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+/// primaryexpr ::= ~,+,-,'not' primaryexpr
+/// primaryexpr ::= string
+/// (a string is interpreted as a 64-bit number in big-endian base-256)
+bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
+ AsmTypeInfo *TypeInfo) {
SMLoc FirstTokenLoc = getLexer().getLoc();
AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
switch (FirstTokenKind) {
@@ -1320,13 +1446,12 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return true;
case AsmToken::Exclaim:
Lex(); // Eat the operator.
- if (parsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc, nullptr))
return true;
Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
return false;
case AsmToken::Dollar:
case AsmToken::At:
- case AsmToken::String:
case AsmToken::Identifier: {
StringRef Identifier;
if (parseIdentifier(Identifier)) {
@@ -1346,6 +1471,13 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return Error(FirstTokenLoc, "invalid token in expression");
}
}
+ // Parse named bitwise negation.
+ if (Identifier.equals_lower("not")) {
+ if (parsePrimaryExpr(Res, EndLoc, nullptr))
+ return true;
+ Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
+ return false;
+ }
// Parse symbol variant.
std::pair<StringRef, StringRef> Split;
if (!MAI.useParensForSymbolVariant()) {
@@ -1396,24 +1528,19 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
}
// Find the field offset if used.
- StringRef Type;
- unsigned Offset = 0;
+ AsmFieldInfo Info;
Split = SymbolName.split('.');
- if (!Split.second.empty()) {
+ if (Split.second.empty()) {
+ } else {
SymbolName = Split.first;
- if (Structs.count(SymbolName.lower()) &&
- !lookUpField(SymbolName, Split.second, Type, Offset)) {
- // This is actually a reference to a field offset.
- Res = MCConstantExpr::create(Offset, getContext());
- return false;
- }
-
- auto TypeIt = KnownType.find(SymbolName);
- if (TypeIt == KnownType.end() ||
- lookUpField(*TypeIt->second, Split.second, Type, Offset)) {
+ if (lookUpField(SymbolName, Split.second, Info)) {
std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
StringRef Base = BaseMember.first, Member = BaseMember.second;
- lookUpField(Base, Member, Type, Offset);
+ lookUpField(Base, Member, Info);
+ } else if (Structs.count(SymbolName.lower())) {
+ // This is actually a reference to a field offset.
+ Res = MCConstantExpr::create(Info.Offset, getContext());
+ return false;
}
}
@@ -1439,13 +1566,23 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// Otherwise create a symbol ref.
const MCExpr *SymRef =
MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
- if (Offset) {
- Res = MCBinaryExpr::create(MCBinaryExpr::Add, SymRef,
- MCConstantExpr::create(Offset, getContext()),
- getContext());
+ if (Info.Offset) {
+ Res = MCBinaryExpr::create(
+ MCBinaryExpr::Add, SymRef,
+ MCConstantExpr::create(Info.Offset, getContext()), getContext());
} else {
Res = SymRef;
}
+ if (TypeInfo) {
+ if (Info.Type.Name.empty()) {
+ auto TypeIt = KnownType.find(Identifier.lower());
+ if (TypeIt != KnownType.end()) {
+ Info.Type = TypeIt->second;
+ }
+ }
+
+ *TypeInfo = Info.Type;
+ }
return false;
}
case AsmToken::BigNum:
@@ -1481,6 +1618,20 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
}
return false;
}
+ case AsmToken::String: {
+ // MASM strings (used as constants) are interpreted as big-endian base-256.
+ SMLoc ValueLoc = getTok().getLoc();
+ std::string Value;
+ if (parseEscapedString(Value))
+ return true;
+ if (Value.size() > 8)
+ return Error(ValueLoc, "literal value out of range");
+ uint64_t IntValue = 0;
+ for (const unsigned char CharVal : Value)
+ IntValue = (IntValue << 8) | CharVal;
+ Res = MCConstantExpr::create(IntValue, getContext());
+ return false;
+ }
case AsmToken::Real: {
APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
@@ -1509,19 +1660,19 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return parseBracketExpr(Res, EndLoc);
case AsmToken::Minus:
Lex(); // Eat the operator.
- if (parsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc, nullptr))
return true;
Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
return false;
case AsmToken::Plus:
Lex(); // Eat the operator.
- if (parsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc, nullptr))
return true;
Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
return false;
case AsmToken::Tilde:
Lex(); // Eat the operator.
- if (parsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc, nullptr))
return true;
Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
return false;
@@ -1576,11 +1727,6 @@ bool MasmParser::parseExpression(const MCExpr *&Res) {
/// If the function returns a 'true' value,
/// the End argument will be filled with the last location pointed to the '>'
/// character.
-
-/// There is a gap between the AltMacro's documentation and the single quote
-/// implementation. GCC does not fully support this feature and so we will not
-/// support it.
-/// TODO: Adding single quote as a string.
static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
assert((StrLoc.getPointer() != nullptr) &&
"Argument to the function cannot be a NULL value");
@@ -1599,12 +1745,12 @@ static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
}
/// creating a string without the escape characters '!'.
-static std::string angleBracketString(StringRef AltMacroStr) {
+static std::string angleBracketString(StringRef BracketContents) {
std::string Res;
- for (size_t Pos = 0; Pos < AltMacroStr.size(); Pos++) {
- if (AltMacroStr[Pos] == '!')
+ for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
+ if (BracketContents[Pos] == '!')
Pos++;
- Res += AltMacroStr[Pos];
+ Res += BracketContents[Pos];
}
return Res;
}
@@ -1722,8 +1868,6 @@ static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
return 4;
// High Intermediate Precedence: |, &, ^
- //
- // FIXME: gas seems to support '!' as an infix operator?
case AsmToken::Pipe:
Kind = MCBinaryExpr::Or;
return 5;
@@ -1768,8 +1912,22 @@ bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
SMLoc &EndLoc) {
SMLoc StartLoc = Lexer.getLoc();
while (true) {
+ AsmToken::TokenKind TokKind = Lexer.getKind();
+ if (Lexer.getKind() == AsmToken::Identifier) {
+ TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
+ .CaseLower("and", AsmToken::Amp)
+ .CaseLower("not", AsmToken::Exclaim)
+ .CaseLower("or", AsmToken::Pipe)
+ .CaseLower("eq", AsmToken::EqualEqual)
+ .CaseLower("ne", AsmToken::ExclaimEqual)
+ .CaseLower("lt", AsmToken::Less)
+ .CaseLower("le", AsmToken::LessEqual)
+ .CaseLower("gt", AsmToken::Greater)
+ .CaseLower("ge", AsmToken::GreaterEqual)
+ .Default(TokKind);
+ }
MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
- unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
+ unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
// If the next token is lower precedence than we are allowed to eat, return
// successfully with what we ate already.
@@ -1796,6 +1954,7 @@ bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
}
/// ParseStatement:
+/// ::= % statement
/// ::= EndOfStatement
/// ::= Label* Directive ...Operands... EndOfStatement
/// ::= Label* Identifier OperandList* EndOfStatement
@@ -1813,6 +1972,15 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
Lex();
return false;
}
+
+ // If preceded by an expansion operator, first expand all text macros and
+ // macro functions.
+ if (getTok().is(AsmToken::Percent)) {
+ SMLoc ExpansionLoc = getTok().getLoc();
+ if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
+ return true;
+ }
+
// Statements always start with an identifier, unless we're dealing with a
// processor directive (.386, .686, etc.) that lexes as a real.
AsmToken ID = getTok();
@@ -1864,6 +2032,11 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
Lex(); // always eat a token
if (!IDVal.startswith("."))
return Error(IDLoc, "unexpected token at start of statement");
+ } else if (Lexer.is(AsmToken::Identifier) &&
+ getTok().getString().equals_lower("echo")) {
+ // Intercept echo early to avoid lexical substitution in its message, and
+ // delegate all handling to the appropriate function.
+ return parseDirectiveEcho();
} else if (parseIdentifier(IDVal)) {
if (!TheCondState.Ignore) {
Lex(); // always eat a token
@@ -1983,7 +2156,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
// does not understand Labels. This may cause us to see a Hash
// here instead of a preprocessor line comment.
if (getTok().is(AsmToken::Hash)) {
- StringRef CommentStr = parseStringToEndOfStatement();
+ std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
Lexer.Lex();
Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
}
@@ -2016,7 +2189,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
}
// If macros are enabled, check to see if this is a macro instantiation.
- if (const MCAsmMacro *M = getContext().lookupMacro(IDVal)) {
+ if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
return handleMacroEntry(M, IDLoc);
}
@@ -2091,15 +2264,18 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
case DK_DD:
return parseDirectiveValue(IDVal, 4);
case DK_FWORD:
+ case DK_DF:
return parseDirectiveValue(IDVal, 6);
case DK_QWORD:
case DK_SQWORD:
case DK_DQ:
return parseDirectiveValue(IDVal, 8);
case DK_REAL4:
- return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle());
+ return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
case DK_REAL8:
- return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble());
+ return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
+ case DK_REAL10:
+ return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
case DK_STRUCT:
case DK_UNION:
return parseDirectiveNestedStruct(IDVal, DirKind);
@@ -2120,14 +2296,14 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveComment(IDLoc);
case DK_INCLUDE:
return parseDirectiveInclude();
- case DK_REPT:
- return parseDirectiveRept(IDLoc, IDVal);
- case DK_IRP:
- return parseDirectiveIrp(IDLoc);
- case DK_IRPC:
- return parseDirectiveIrpc(IDLoc);
- case DK_ENDR:
- return parseDirectiveEndr(IDLoc);
+ case DK_REPEAT:
+ return parseDirectiveRepeat(IDLoc, IDVal);
+ case DK_WHILE:
+ return parseDirectiveWhile(IDLoc);
+ case DK_FOR:
+ return parseDirectiveFor(IDLoc, IDVal);
+ case DK_FORC:
+ return parseDirectiveForc(IDLoc, IDVal);
case DK_FILE:
return parseDirectiveFile(IDLoc);
case DK_LINE:
@@ -2202,16 +2378,13 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveCFIRegister(IDLoc);
case DK_CFI_WINDOW_SAVE:
return parseDirectiveCFIWindowSave();
- case DK_MACRO:
- return parseDirectiveMacro(IDLoc);
- case DK_ALTMACRO:
- case DK_NOALTMACRO:
- return parseDirectiveAltmacro(IDVal);
case DK_EXITM:
- return parseDirectiveExitMacro(IDVal);
+ Info.ExitValue = "";
+ return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
case DK_ENDM:
+ Info.ExitValue = "";
return parseDirectiveEndMacro(IDVal);
- case DK_PURGEM:
+ case DK_PURGE:
return parseDirectivePurgeMacro(IDLoc);
case DK_END:
return parseDirectiveEnd(IDLoc);
@@ -2241,8 +2414,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveErrorIfe(IDLoc, true);
case DK_ERRNZ:
return parseDirectiveErrorIfe(IDLoc, false);
- case DK_ECHO:
- return parseDirectiveEcho();
+ case DK_RADIX:
+ return parseDirectiveRadix(IDLoc);
}
return Error(IDLoc, "unknown directive");
@@ -2302,32 +2475,41 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
Lex();
return parseDirectiveEquate(nextVal, IDVal, DirKind);
case DK_BYTE:
+ case DK_SBYTE:
case DK_DB:
Lex();
return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
case DK_WORD:
+ case DK_SWORD:
case DK_DW:
Lex();
return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
case DK_DWORD:
+ case DK_SDWORD:
case DK_DD:
Lex();
return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
case DK_FWORD:
+ case DK_DF:
Lex();
return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
case DK_QWORD:
+ case DK_SQWORD:
case DK_DQ:
Lex();
return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
case DK_REAL4:
Lex();
- return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal,
- IDLoc);
+ return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
+ IDVal, IDLoc);
case DK_REAL8:
Lex();
- return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal,
- IDLoc);
+ return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
+ IDVal, IDLoc);
+ case DK_REAL10:
+ Lex();
+ return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
+ 10, IDVal, IDLoc);
case DK_STRUCT:
case DK_UNION:
Lex();
@@ -2335,6 +2517,9 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
case DK_ENDS:
Lex();
return parseDirectiveEnds(IDVal, IDLoc);
+ case DK_MACRO:
+ Lex();
+ return parseDirectiveMacro(IDVal, IDLoc);
}
// Finally, we check if this is allocating a variable with user-defined type.
@@ -2528,45 +2713,68 @@ void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
NewDiag.print(nullptr, OS);
}
-// FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The
-// difference being that that function accepts '@' as part of identifiers and
-// we can't do that. AsmLexer.cpp should probably be changed to handle
-// '@' as a special case when needed.
-static bool isIdentifierChar(char c) {
- return isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '$' ||
- c == '.';
+// This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
+// not accept '.'.
+static bool isMacroParameterChar(char C) {
+ return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
}
bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
ArrayRef<MCAsmMacroParameter> Parameters,
ArrayRef<MCAsmMacroArgument> A,
- bool EnableAtPseudoVariable, SMLoc L) {
+ const std::vector<std::string> &Locals, SMLoc L) {
unsigned NParameters = Parameters.size();
- bool HasVararg = NParameters ? Parameters.back().Vararg : false;
- if ((!IsDarwin || NParameters != 0) && NParameters != A.size())
+ if (NParameters != A.size())
return Error(L, "Wrong number of arguments");
-
- // A macro without parameters is handled differently on Darwin:
- // gas accepts no arguments and does no substitutions
+ StringMap<std::string> LocalSymbols;
+ std::string Name;
+ Name.reserve(6);
+ for (StringRef Local : Locals) {
+ raw_string_ostream LocalName(Name);
+ LocalName << "??"
+ << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
+ LocalSymbols.insert({Local, LocalName.str()});
+ Name.clear();
+ }
+
+ Optional<char> CurrentQuote;
while (!Body.empty()) {
// Scan for the next substitution.
std::size_t End = Body.size(), Pos = 0;
+ std::size_t IdentifierPos = End;
for (; Pos != End; ++Pos) {
- // Check for a substitution or escape.
- if (IsDarwin && !NParameters) {
- // This macro has no parameters, look for $0, $1, etc.
- if (Body[Pos] != '$' || Pos + 1 == End)
- continue;
-
- char Next = Body[Pos + 1];
- if (Next == '$' || Next == 'n' ||
- isdigit(static_cast<unsigned char>(Next)))
+ // Find the next possible macro parameter, including preceding a '&'
+ // inside quotes.
+ if (Body[Pos] == '&')
+ break;
+ if (isMacroParameterChar(Body[Pos])) {
+ if (!CurrentQuote.hasValue())
break;
+ if (IdentifierPos == End)
+ IdentifierPos = Pos;
} else {
- // This macro has parameters, look for \foo, \bar, etc.
- if (Body[Pos] == '\\' && Pos + 1 != End)
- break;
+ IdentifierPos = End;
}
+
+ // Track quotation status
+ if (!CurrentQuote.hasValue()) {
+ if (Body[Pos] == '\'' || Body[Pos] == '"')
+ CurrentQuote = Body[Pos];
+ } else if (Body[Pos] == CurrentQuote) {
+ if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
+ // Escaped quote, and quotes aren't identifier chars; skip
+ ++Pos;
+ continue;
+ } else {
+ CurrentQuote.reset();
+ }
+ }
+ }
+ if (IdentifierPos != End) {
+ // We've recognized an identifier before an apostrophe inside quotes;
+ // check once to see if we can expand it.
+ Pos = IdentifierPos;
+ IdentifierPos = End;
}
// Add the prefix.
@@ -2576,90 +2784,51 @@ bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
if (Pos == End)
break;
- if (IsDarwin && !NParameters) {
- switch (Body[Pos + 1]) {
- // $$ => $
- case '$':
- OS << '$';
- break;
-
- // $n => number of arguments
- case 'n':
- OS << A.size();
- break;
+ unsigned I = Pos;
+ bool InitialAmpersand = (Body[I] == '&');
+ if (InitialAmpersand) {
+ ++I;
+ ++Pos;
+ }
+ while (I < End && isMacroParameterChar(Body[I]))
+ ++I;
- // $[0-9] => argument
- default: {
- // Missing arguments are ignored.
- unsigned Index = Body[Pos + 1] - '0';
- if (Index >= A.size())
- break;
+ const char *Begin = Body.data() + Pos;
+ StringRef Argument(Begin, I - Pos);
+ unsigned Index = 0;
- // Otherwise substitute with the token values, with spaces eliminated.
- for (const AsmToken &Token : A[Index])
- OS << Token.getString();
+ for (; Index < NParameters; ++Index)
+ if (Parameters[Index].Name == Argument)
break;
- }
- }
- Pos += 2;
- } else {
- unsigned I = Pos + 1;
- // Check for the \@ pseudo-variable.
- if (EnableAtPseudoVariable && Body[I] == '@' && I + 1 != End)
- ++I;
+ if (Index == NParameters) {
+ if (InitialAmpersand)
+ OS << '&';
+ auto it = LocalSymbols.find(Argument.lower());
+ if (it != LocalSymbols.end())
+ OS << it->second;
else
- while (isIdentifierChar(Body[I]) && I + 1 != End)
- ++I;
-
- const char *Begin = Body.data() + Pos + 1;
- StringRef Argument(Begin, I - (Pos + 1));
- unsigned Index = 0;
+ OS << Argument;
+ Pos = I;
+ } else {
+ for (const AsmToken &Token : A[Index]) {
+ // In MASM, you can write '%expr'.
+ // The prefix '%' evaluates the expression 'expr'
+ // and uses the result as a string (e.g. replace %(1+2) with the
+ // string "3").
+ // Here, we identify the integer token which is the result of the
+ // absolute expression evaluation and replace it with its string
+ // representation.
+ if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
+ // Emit an integer value to the buffer.
+ OS << Token.getIntVal();
+ else
+ OS << Token.getString();
+ }
- if (Argument == "@") {
- OS << NumOfMacroInstantiations;
- Pos += 2;
- } else {
- for (; Index < NParameters; ++Index)
- if (Parameters[Index].Name == Argument)
- break;
-
- if (Index == NParameters) {
- if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')')
- Pos += 3;
- else {
- OS << '\\' << Argument;
- Pos = I;
- }
- } else {
- bool VarargParameter = HasVararg && Index == (NParameters - 1);
- for (const AsmToken &Token : A[Index])
- // For altmacro mode, you can write '%expr'.
- // The prefix '%' evaluates the expression 'expr'
- // and uses the result as a string (e.g. replace %(1+2) with the
- // string "3").
- // Here, we identify the integer token which is the result of the
- // absolute expression evaluation and replace it with its string
- // representation.
- if (AltMacroMode && Token.getString().front() == '%' &&
- Token.is(AsmToken::Integer))
- // Emit an integer value to the buffer.
- OS << Token.getIntVal();
- // Only Token that was validated as a string and begins with '<'
- // is considered altMacroString!!!
- else if (AltMacroMode && Token.getString().front() == '<' &&
- Token.is(AsmToken::String)) {
- OS << angleBracketString(Token.getStringContents());
- }
- // We expect no quotes around the string's contents when
- // parsing for varargs.
- else if (Token.isNot(AsmToken::String) || VarargParameter)
- OS << Token.getString();
- else
- OS << Token.getStringContents();
-
- Pos += 1 + Argument.size();
- }
+ Pos += Argument.size();
+ if (Pos < End && Body[Pos] == '&') {
+ ++Pos;
}
}
// Update the scan point.
@@ -2717,16 +2886,30 @@ private:
} // end anonymous namespace
-bool MasmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
-
- if (Vararg) {
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
- StringRef Str = parseStringToEndOfStatement();
- MA.emplace_back(AsmToken::String, Str);
+bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
+ MCAsmMacroArgument &MA,
+ AsmToken::TokenKind EndTok) {
+ if (MP && MP->Vararg) {
+ if (Lexer.isNot(EndTok)) {
+ SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
+ for (StringRef S : Str) {
+ MA.emplace_back(AsmToken::String, S);
+ }
}
return false;
}
+ SMLoc StrLoc = Lexer.getLoc(), EndLoc;
+ if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
+ const char *StrChar = StrLoc.getPointer() + 1;
+ const char *EndChar = EndLoc.getPointer() - 1;
+ jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
+ /// Eat from '<' to '>'.
+ Lex();
+ MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
+ return false;
+ }
+
unsigned ParenLevel = 0;
// Darwin doesn't use spaces to delmit arguments.
@@ -2737,29 +2920,28 @@ bool MasmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
while (true) {
SpaceEaten = false;
if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
- return TokError("unexpected token in macro instantiation");
+ return TokError("unexpected token");
if (ParenLevel == 0) {
-
if (Lexer.is(AsmToken::Comma))
break;
if (Lexer.is(AsmToken::Space)) {
SpaceEaten = true;
- Lexer.Lex(); // Eat spaces.
+ Lex(); // Eat spaces.
}
// Spaces can delimit parameters, but could also be part an expression.
// If the token after a space is an operator, add the token and the next
// one into this argument
if (!IsDarwin) {
- if (isOperator(Lexer.getKind())) {
+ if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
MA.push_back(getTok());
- Lexer.Lex();
+ Lex();
// Whitespace after an operator can be ignored.
if (Lexer.is(AsmToken::Space))
- Lexer.Lex();
+ Lex();
continue;
}
@@ -2770,7 +2952,7 @@ bool MasmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
// handleMacroEntry relies on not advancing the lexer here
// to be able to fill in the remaining default parameter values
- if (Lexer.is(AsmToken::EndOfStatement))
+ if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
break;
// Adjust the current parentheses level.
@@ -2781,17 +2963,27 @@ bool MasmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
// Append the token to the current argument list.
MA.push_back(getTok());
- Lexer.Lex();
+ Lex();
}
if (ParenLevel != 0)
- return TokError("unbalanced parentheses in macro argument");
+ return TokError("unbalanced parentheses in argument");
+
+ if (MA.empty() && MP) {
+ if (MP->Required) {
+ return TokError("missing value for required parameter '" + MP->Name +
+ "'");
+ } else {
+ MA = MP->Value;
+ }
+ }
return false;
}
// Parse the macro instantiation arguments.
bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
- MCAsmMacroArguments &A) {
+ MCAsmMacroArguments &A,
+ AsmToken::TokenKind EndTok) {
const unsigned NParameters = M ? M->Parameters.size() : 0;
bool NamedParametersFound = false;
SmallVector<SMLoc, 4> FALocs;
@@ -2802,7 +2994,6 @@ bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
// Parse two kinds of macro invocations:
// - macros defined without any parameters accept an arbitrary number of them
// - macros defined with parameters accept at most that many of them
- bool HasVararg = NParameters ? M->Parameters.back().Vararg : false;
for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
++Parameter) {
SMLoc IDLoc = Lexer.getLoc();
@@ -2819,14 +3010,31 @@ bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
NamedParametersFound = true;
}
- bool Vararg = HasVararg && Parameter == (NParameters - 1);
if (NamedParametersFound && FA.Name.empty())
return Error(IDLoc, "cannot mix positional and keyword arguments");
+ unsigned PI = Parameter;
+ if (!FA.Name.empty()) {
+ assert(M && "expected macro to be defined");
+ unsigned FAI = 0;
+ for (FAI = 0; FAI < NParameters; ++FAI)
+ if (M->Parameters[FAI].Name == FA.Name)
+ break;
+
+ if (FAI >= NParameters) {
+ return Error(IDLoc, "parameter named '" + FA.Name +
+ "' does not exist for macro '" + M->Name + "'");
+ }
+ PI = FAI;
+ }
+ const MCAsmMacroParameter *MP = nullptr;
+ if (M && PI < NParameters)
+ MP = &M->Parameters[PI];
+
SMLoc StrLoc = Lexer.getLoc();
SMLoc EndLoc;
- if (AltMacroMode && Lexer.is(AsmToken::Percent)) {
+ if (Lexer.is(AsmToken::Percent)) {
const MCExpr *AbsoluteExp;
int64_t Value;
/// Eat '%'.
@@ -2841,32 +3049,11 @@ bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
AsmToken newToken(AsmToken::Integer,
StringRef(StrChar, EndChar - StrChar), Value);
FA.Value.push_back(newToken);
- } else if (AltMacroMode && Lexer.is(AsmToken::Less) &&
- isAngleBracketString(StrLoc, EndLoc)) {
- const char *StrChar = StrLoc.getPointer();
- const char *EndChar = EndLoc.getPointer();
- jumpToLoc(EndLoc, CurBuffer);
- /// Eat from '<' to '>'.
- Lex();
- AsmToken newToken(AsmToken::String,
- StringRef(StrChar, EndChar - StrChar));
- FA.Value.push_back(newToken);
- } else if(parseMacroArgument(FA.Value, Vararg))
- return true;
-
- unsigned PI = Parameter;
- if (!FA.Name.empty()) {
- unsigned FAI = 0;
- for (FAI = 0; FAI < NParameters; ++FAI)
- if (M->Parameters[FAI].Name == FA.Name)
- break;
-
- if (FAI >= NParameters) {
- assert(M && "expected macro to be defined");
- return Error(IDLoc, "parameter named '" + FA.Name +
- "' does not exist for macro '" + M->Name + "'");
- }
- PI = FAI;
+ } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
+ if (M)
+ return addErrorSuffix(" in '" + M->Name + "' macro");
+ else
+ return true;
}
if (!FA.Value.empty()) {
@@ -2883,14 +3070,15 @@ bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
// At the end of the statement, fill in remaining arguments that have
// default values. If there aren't any, then the next argument is
// required but missing
- if (Lexer.is(AsmToken::EndOfStatement)) {
+ if (Lexer.is(EndTok)) {
bool Failure = false;
for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
if (A[FAI].empty()) {
if (M->Parameters[FAI].Required) {
Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
"missing value for required parameter "
- "'" + M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
+ "'" +
+ M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
Failure = true;
}
@@ -2908,7 +3096,8 @@ bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
return TokError("too many positional arguments");
}
-bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
+bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
+ AsmToken::TokenKind ArgumentEndTok) {
// Arbitrarily limit macro nesting depth (default matches 'as'). We can
// eliminate this, although we should protect against infinite loops.
unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
@@ -2922,7 +3111,7 @@ bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
}
MCAsmMacroArguments A;
- if (parseMacroArguments(M, A))
+ if (parseMacroArguments(M, A, ArgumentEndTok))
return true;
// Macro instantiation is lexical, unfortunately. We construct a new buffer
@@ -2931,12 +3120,12 @@ bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
StringRef Body = M->Body;
raw_svector_ostream OS(Buf);
- if (expandMacro(OS, Body, M->Parameters, A, true, getTok().getLoc()))
+ if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
return true;
- // We include the .endmacro in the buffer as our cue to exit the macro
+ // We include the endm in the buffer as our cue to exit the macro
// instantiation.
- OS << ".endmacro\n";
+ OS << "endm\n";
std::unique_ptr<MemoryBuffer> Instantiation =
MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
@@ -2952,14 +3141,17 @@ bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
// Jump to the macro instantiation and prime the lexer.
CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
+ EndStatementAtEOFStack.push_back(true);
Lex();
return false;
}
void MasmParser::handleMacroExit() {
- // Jump to the EndOfStatement we should return to, and consume it.
- jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer);
+ // Jump to the token we should return to, and consume it.
+ EndStatementAtEOFStack.pop_back();
+ jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
+ EndStatementAtEOFStack.back());
Lex();
// Pop the instantiation entry.
@@ -2967,6 +3159,63 @@ void MasmParser::handleMacroExit() {
ActiveMacros.pop_back();
}
+bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
+ if (!M->IsFunction)
+ return Error(NameLoc, "cannot invoke macro procedure as function");
+
+ if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
+ "' requires arguments in parentheses") ||
+ handleMacroEntry(M, NameLoc, AsmToken::RParen))
+ return true;
+
+ // Parse all statements in the macro, retrieving the exit value when it ends.
+ std::string ExitValue;
+ SmallVector<AsmRewrite, 4> AsmStrRewrites;
+ while (Lexer.isNot(AsmToken::Eof)) {
+ ParseStatementInfo Info(&AsmStrRewrites);
+ bool Parsed = parseStatement(Info, nullptr);
+
+ if (!Parsed && Info.ExitValue.hasValue()) {
+ ExitValue = std::move(*Info.ExitValue);
+ break;
+ }
+
+ // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
+ // for printing ErrMsg via Lex() only if no (presumably better) parser error
+ // exists.
+ if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
+ Lex();
+ }
+
+ // parseStatement returned true so may need to emit an error.
+ printPendingErrors();
+
+ // Skipping to the next line if needed.
+ if (Parsed && !getLexer().isAtStartOfStatement())
+ eatToEndOfStatement();
+ }
+
+ // Consume the right-parenthesis on the other side of the arguments.
+ if (parseToken(AsmToken::RParen, "invoking macro function '" + M->Name +
+ "' requires arguments in parentheses"))
+ return true;
+
+ // Exit values may require lexing, unfortunately. We construct a new buffer to
+ // hold the exit value.
+ std::unique_ptr<MemoryBuffer> MacroValue =
+ MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
+
+ // Jump from this location to the instantiated exit value, and prime the
+ // lexer.
+ CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
+ /*EndStatementAtEOF=*/false);
+ EndStatementAtEOFStack.push_back(false);
+ Lex();
+
+ return false;
+}
+
/// parseIdentifier:
/// ::= identifier
/// ::= string
@@ -3035,7 +3284,7 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
// Accept a text-list, not just one text-item.
auto parseItem = [&]() -> bool {
if (parseTextItem(Value))
- return true;
+ return TokError("expected text item");
Var.TextValue += Value;
return false;
};
@@ -3053,6 +3302,11 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
SMLoc EndLoc, StartLoc = Lexer.getLoc();
if (parseExpression(Expr, EndLoc))
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
+ Sym->setRedefinable(Var.Redefinable);
+ Sym->setVariableValue(Expr);
+ Sym->setExternal(false);
+
if (Expr->evaluateAsAbsolute(Var.NumericValue,
getStreamer().getAssemblerPtr()))
return false;
@@ -3069,70 +3323,19 @@ bool MasmParser::parseEscapedString(std::string &Data) {
return true;
Data = "";
+ char Quote = getTok().getString().front();
StringRef Str = getTok().getStringContents();
- for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- if (Str[i] != '\\') {
- Data += Str[i];
- continue;
- }
-
- // Recognize escaped characters. Note that this escape semantics currently
- // loosely follows Darwin 'as'.
- ++i;
- if (i == e)
- return TokError("unexpected backslash at end of string");
-
- // Recognize hex sequences similarly to GNU 'as'.
- if (Str[i] == 'x' || Str[i] == 'X') {
- size_t length = Str.size();
- if (i + 1 >= length || !isHexDigit(Str[i + 1]))
- return TokError("invalid hexadecimal escape sequence");
-
- // Consume hex characters. GNU 'as' reads all hexadecimal characters and
- // then truncates to the lower 16 bits. Seems reasonable.
- unsigned Value = 0;
- while (i + 1 < length && isHexDigit(Str[i + 1]))
- Value = Value * 16 + hexDigitValue(Str[++i]);
-
- Data += (unsigned char)(Value & 0xFF);
- continue;
- }
-
- // Recognize octal sequences.
- if ((unsigned)(Str[i] - '0') <= 7) {
- // Consume up to three octal characters.
- unsigned Value = Str[i] - '0';
-
- if (i + 1 != e && ((unsigned)(Str[i + 1] - '0')) <= 7) {
+ Data.reserve(Str.size());
+ for (size_t i = 0, e = Str.size(); i != e; ++i) {
+ Data.push_back(Str[i]);
+ if (Str[i] == Quote) {
+ // MASM treats doubled delimiting quotes as an escaped delimiting quote.
+ // If we're escaping the string's trailing delimiter, we're definitely
+ // missing a quotation mark.
+ if (i + 1 == Str.size())
+ return Error(getTok().getLoc(), "missing quotation mark in string");
+ if (Str[i + 1] == Quote)
++i;
- Value = Value * 8 + (Str[i] - '0');
-
- if (i + 1 != e && ((unsigned)(Str[i + 1] - '0')) <= 7) {
- ++i;
- Value = Value * 8 + (Str[i] - '0');
- }
- }
-
- if (Value > 255)
- return TokError("invalid octal escape sequence (out of range)");
-
- Data += (unsigned char)Value;
- continue;
- }
-
- // Otherwise recognize individual escapes.
- switch (Str[i]) {
- default:
- // Just reject invalid escape sequences for now.
- return TokError("invalid escape sequence (unrecognized character)");
-
- case 'b': Data += '\b'; break;
- case 'f': Data += '\f'; break;
- case 'n': Data += '\n'; break;
- case 'r': Data += '\r'; break;
- case 't': Data += '\t'; break;
- case '"': Data += '"'; break;
- case '\\': Data += '\\'; break;
}
}
@@ -3145,7 +3348,7 @@ bool MasmParser::parseAngleBracketString(std::string &Data) {
if (isAngleBracketString(StartLoc, EndLoc)) {
const char *StartChar = StartLoc.getPointer() + 1;
const char *EndChar = EndLoc.getPointer() - 1;
- jumpToLoc(EndLoc, CurBuffer);
+ jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
// Eat from '<' to '>'.
Lex();
@@ -3157,8 +3360,42 @@ bool MasmParser::parseAngleBracketString(std::string &Data) {
/// textItem ::= textLiteral | textMacroID | % constExpr
bool MasmParser::parseTextItem(std::string &Data) {
- // TODO(epastor): Support textMacroID and % expansion of expressions.
- return parseAngleBracketString(Data);
+ switch (getTok().getKind()) {
+ default:
+ return true;
+ case AsmToken::Percent: {
+ int64_t Res;
+ if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
+ return true;
+ Data = std::to_string(Res);
+ return false;
+ }
+ case AsmToken::Less:
+ case AsmToken::LessEqual:
+ case AsmToken::LessLess:
+ case AsmToken::LessGreater:
+ return parseAngleBracketString(Data);
+ case AsmToken::Identifier: {
+ StringRef ID;
+ if (parseIdentifier(ID))
+ return true;
+ Data = ID.str();
+
+ auto it = Variables.find(ID);
+ if (it == Variables.end())
+ return true;
+
+ while (it != Variables.end()) {
+ const Variable &Var = it->second;
+ if (!Var.IsText)
+ return true;
+ Data = Var.TextValue;
+ it = Variables.find(Data);
+ }
+ return false;
+ }
+ }
+ llvm_unreachable("unhandled token kind");
}
/// parseDirectiveAscii:
@@ -3202,30 +3439,20 @@ bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
bool MasmParser::parseScalarInitializer(unsigned Size,
SmallVectorImpl<const MCExpr *> &Values,
unsigned StringPadLength) {
- if (getTok().is(AsmToken::String)) {
- StringRef Value = getTok().getStringContents();
- if (Size == 1) {
- // Treat each character as an initializer.
- for (const char CharVal : Value)
- Values.push_back(MCConstantExpr::create(CharVal, getContext()));
-
- // Pad the string with spaces to the specified length.
- for (size_t i = Value.size(); i < StringPadLength; ++i)
- Values.push_back(MCConstantExpr::create(' ', getContext()));
- } else {
- // Treat the string as an initial value in big-endian representation.
- if (Value.size() > Size)
- return Error(getTok().getLoc(), "out of range literal value");
-
- uint64_t IntValue = 0;
- for (const unsigned char CharVal : Value.bytes())
- IntValue = (IntValue << 8) | CharVal;
- Values.push_back(MCConstantExpr::create(IntValue, getContext()));
- }
- Lex();
+ if (Size == 1 && getTok().is(AsmToken::String)) {
+ std::string Value;
+ if (parseEscapedString(Value))
+ return true;
+ // Treat each character as an initializer.
+ for (const unsigned char CharVal : Value)
+ Values.push_back(MCConstantExpr::create(CharVal, getContext()));
+
+ // Pad the string with spaces to the specified length.
+ for (size_t i = Value.size(); i < StringPadLength; ++i)
+ Values.push_back(MCConstantExpr::create(' ', getContext()));
} else {
const MCExpr *Value;
- if (checkForValidSection() || parseExpression(Value))
+ if (parseExpression(Value))
return true;
if (getTok().is(AsmToken::Identifier) &&
getTok().getString().equals_lower("dup")) {
@@ -3271,7 +3498,7 @@ bool MasmParser::parseScalarInstList(unsigned Size,
return false;
}
-bool MasmParser::emitIntegralValues(unsigned Size) {
+bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
SmallVector<const MCExpr *, 1> Values;
if (checkForValidSection() || parseScalarInstList(Size, Values))
return true;
@@ -3279,13 +3506,15 @@ bool MasmParser::emitIntegralValues(unsigned Size) {
for (auto Value : Values) {
emitIntValue(Value, Size);
}
+ if (Count)
+ *Count = Values.size();
return false;
}
// Add a field to the current structure.
bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
StructInfo &Struct = StructInProgress.back();
- FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL);
+ FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
IntFieldInfo &IntInfo = Field.Contents.IntInfo;
Field.Type = Size;
@@ -3318,16 +3547,24 @@ bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
/// parseDirectiveNamedValue
/// ::= name (byte | word | ... ) [ expression (, expression)* ]
-bool MasmParser::parseDirectiveNamedValue(StringRef IDVal, unsigned Size,
+bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
StringRef Name, SMLoc NameLoc) {
if (StructInProgress.empty()) {
// Initialize named data value.
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
getStreamer().emitLabel(Sym);
- if (emitIntegralValues(Size))
- return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+ unsigned Count;
+ if (emitIntegralValues(Size, &Count))
+ return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
+
+ AsmTypeInfo Type;
+ Type.Name = TypeName;
+ Type.Size = Size * Count;
+ Type.ElementSize = Size;
+ Type.Length = Count;
+ KnownType[Name.lower()] = Type;
} else if (addIntegralField(Name, Size)) {
- return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+ return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
}
return false;
@@ -3356,10 +3593,13 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
// We don't truly support arithmetic on floating point expressions, so we
// have to manually parse unary prefixes.
bool IsNeg = false;
+ SMLoc SignLoc;
if (getLexer().is(AsmToken::Minus)) {
+ SignLoc = getLexer().getLoc();
Lexer.Lex();
IsNeg = true;
} else if (getLexer().is(AsmToken::Plus)) {
+ SignLoc = getLexer().getLoc();
Lexer.Lex();
}
@@ -3381,6 +3621,20 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
Value = APFloat::getZero(Semantics);
else
return TokError("invalid floating point literal");
+ } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
+ // MASM hexadecimal floating-point literal; no APFloat conversion needed.
+ // To match ML64.exe, ignore the initial sign.
+ unsigned SizeInBits = Value.getSizeInBits(Semantics);
+ if (SizeInBits != (IDVal.size() << 2))
+ return TokError("invalid floating point literal");
+
+ // Consume the numeric token.
+ Lex();
+
+ Res = APInt(SizeInBits, IDVal, 16);
+ if (SignLoc.isValid())
+ return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
+ return false;
} else if (errorToBool(
Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
.takeError())) {
@@ -3444,28 +3698,33 @@ bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
}
// Initialize real data values.
-bool MasmParser::emitRealValues(const fltSemantics &Semantics) {
+bool MasmParser::emitRealValues(const fltSemantics &Semantics,
+ unsigned *Count) {
+ if (checkForValidSection())
+ return true;
+
SmallVector<APInt, 1> ValuesAsInt;
if (parseRealInstList(Semantics, ValuesAsInt))
return true;
for (const APInt &AsInt : ValuesAsInt) {
- getStreamer().emitIntValue(AsInt.getLimitedValue(),
- AsInt.getBitWidth() / 8);
+ getStreamer().emitIntValue(AsInt);
}
+ if (Count)
+ *Count = ValuesAsInt.size();
return false;
}
// Add a real field to the current struct.
-bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) {
+bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
+ size_t Size) {
StructInfo &Struct = StructInProgress.back();
- FieldInfo &Field = Struct.addField(Name, FT_REAL);
+ FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
RealFieldInfo &RealInfo = Field.Contents.RealInfo;
Field.SizeOf = 0;
- if (checkForValidSection() ||
- parseRealInstList(Semantics, RealInfo.AsIntValues))
+ if (parseRealInstList(Semantics, RealInfo.AsIntValues))
return true;
Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
@@ -3479,38 +3738,42 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) {
}
/// parseDirectiveRealValue
-/// ::= (real4 | real8) [ expression (, expression)* ]
+/// ::= (real4 | real8 | real10) [ expression (, expression)* ]
bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
- const fltSemantics &Semantics) {
- if (checkForValidSection())
- return true;
-
+ const fltSemantics &Semantics,
+ size_t Size) {
if (StructInProgress.empty()) {
// Initialize data value.
if (emitRealValues(Semantics))
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
- } else if (addRealField("", Semantics)) {
+ } else if (addRealField("", Semantics, Size)) {
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
}
return false;
}
/// parseDirectiveNamedRealValue
-/// ::= name (real4 | real8) [ expression (, expression)* ]
-bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal,
+/// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
+bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
const fltSemantics &Semantics,
- StringRef Name, SMLoc NameLoc) {
- if (checkForValidSection())
- return true;
-
+ unsigned Size, StringRef Name,
+ SMLoc NameLoc) {
if (StructInProgress.empty()) {
// Initialize named data value.
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
getStreamer().emitLabel(Sym);
- if (emitRealValues(Semantics))
- return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
- } else if (addRealField(Name, Semantics)) {
- return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+ unsigned Count;
+ if (emitRealValues(Semantics, &Count))
+ return addErrorSuffix(" in '" + TypeName + "' directive");
+
+ AsmTypeInfo Type;
+ Type.Name = TypeName;
+ Type.Size = Size * Count;
+ Type.ElementSize = Size;
+ Type.Length = Count;
+ KnownType[Name.lower()] = Type;
+ } else if (addRealField(Name, Semantics, Size)) {
+ return addErrorSuffix(" in '" + TypeName + "' directive");
}
return false;
}
@@ -3584,8 +3847,20 @@ bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
const RealFieldInfo &Contents,
FieldInitializer &Initializer) {
- const fltSemantics &Semantics =
- (Field.Type == 4) ? APFloat::IEEEsingle() : APFloat::IEEEdouble();
+ const fltSemantics *Semantics;
+ switch (Field.Type) {
+ case 4:
+ Semantics = &APFloat::IEEEsingle();
+ break;
+ case 8:
+ Semantics = &APFloat::IEEEdouble();
+ break;
+ case 10:
+ Semantics = &APFloat::x87DoubleExtended();
+ break;
+ default:
+ llvm_unreachable("unknown real field type");
+ }
SMLoc Loc = getTok().getLoc();
@@ -3593,20 +3868,20 @@ bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
if (parseOptionalToken(AsmToken::LCurly)) {
if (Field.LengthOf == 1)
return Error(Loc, "Cannot initialize scalar field with array value");
- if (parseRealInstList(Semantics, AsIntValues, AsmToken::RCurly) ||
+ if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
parseToken(AsmToken::RCurly))
return true;
} else if (parseOptionalAngleBracketOpen()) {
if (Field.LengthOf == 1)
return Error(Loc, "Cannot initialize scalar field with array value");
- if (parseRealInstList(Semantics, AsIntValues, AsmToken::Greater) ||
+ if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
parseAngleBracketClose())
return true;
} else if (Field.LengthOf > 1) {
return Error(Loc, "Cannot initialize array field with scalar value");
} else {
AsIntValues.emplace_back();
- if (parseRealValue(Semantics, AsIntValues.back()))
+ if (parseRealValue(*Semantics, AsIntValues.back()))
return true;
}
@@ -3769,8 +4044,7 @@ bool MasmParser::parseStructInstList(
return true;
for (int i = 0; i < Repetitions; ++i)
- Initializers.insert(Initializers.end(), DuplicatedValues.begin(),
- DuplicatedValues.end());
+ llvm::append_range(Initializers, DuplicatedValues);
} else {
Initializers.emplace_back();
if (parseStructInitializer(Structure, Initializers.back()))
@@ -3830,20 +4104,6 @@ bool MasmParser::emitFieldValue(const FieldInfo &Field) {
llvm_unreachable("Unhandled FieldType enum");
}
-bool MasmParser::emitStructValue(const StructInfo &Structure) {
- size_t Offset = 0;
- for (const auto &Field : Structure.Fields) {
- getStreamer().emitZeros(Field.Offset - Offset);
- if (emitFieldValue(Field))
- return true;
- Offset = Field.Offset + Field.SizeOf;
- }
- // Add final padding.
- if (Offset != Structure.Size)
- getStreamer().emitZeros(Structure.Size - Offset);
- return false;
-}
-
bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
const IntFieldInfo &Contents,
const IntFieldInfo &Initializer) {
@@ -3937,7 +4197,8 @@ bool MasmParser::emitStructInitializer(const StructInfo &Structure,
}
// Set data values from initializers.
-bool MasmParser::emitStructValues(const StructInfo &Structure) {
+bool MasmParser::emitStructValues(const StructInfo &Structure,
+ unsigned *Count) {
std::vector<StructInitializer> Initializers;
if (parseStructInstList(Structure, Initializers))
return true;
@@ -3947,13 +4208,16 @@ bool MasmParser::emitStructValues(const StructInfo &Structure) {
return true;
}
+ if (Count)
+ *Count = Initializers.size();
return false;
}
// Declare a field in the current struct.
bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
StructInfo &OwningStruct = StructInProgress.back();
- FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT);
+ FieldInfo &Field =
+ OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
StructFieldInfo &StructInfo = Field.Contents.StructInfo;
StructInfo.Structure = Structure;
@@ -3996,9 +4260,15 @@ bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
// Initialize named data value.
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
getStreamer().emitLabel(Sym);
- KnownType[Name] = &Structure;
- if (emitStructValues(Structure))
+ unsigned Count;
+ if (emitStructValues(Structure, &Count))
return true;
+ AsmTypeInfo Type;
+ Type.Name = Structure.Name;
+ Type.Size = Structure.Size * Count;
+ Type.ElementSize = Structure.Size;
+ Type.Length = Count;
+ KnownType[Name.lower()] = Type;
} else if (addStructField(Name, Structure)) {
return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
}
@@ -4067,6 +4337,9 @@ bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
if (parseToken(AsmToken::EndOfStatement))
return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
+ // Reserve space to ensure Alignment doesn't get invalidated when
+ // StructInProgress grows.
+ StructInProgress.reserve(StructInProgress.size() + 1);
StructInProgress.emplace_back(Name, DirKind == DK_UNION,
StructInProgress.back().Alignment);
return false;
@@ -4081,8 +4354,10 @@ bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
StructInProgress.back().Name + "'");
StructInfo Structure = StructInProgress.pop_back_val();
- // Pad to make the structure's size divisible by its alignment.
- Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
+ // Pad to make the structure's size divisible by the smaller of its alignment
+ // and the size of its largest field.
+ Structure.Size = llvm::alignTo(
+ Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
Structs[Name.lower()] = Structure;
if (parseToken(AsmToken::EndOfStatement))
@@ -4127,7 +4402,8 @@ bool MasmParser::parseDirectiveNestedEnds() {
else
ParentStruct.Size += Structure.Size;
} else {
- FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT);
+ FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
+ Structure.AlignmentSize);
StructFieldInfo &StructInfo = Field.Contents.StructInfo;
Field.Type = Structure.Size;
Field.LengthOf = 1;
@@ -5199,76 +5475,60 @@ bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
return false;
}
-/// parseDirectiveAltmacro
-/// ::= .altmacro
-/// ::= .noaltmacro
-bool MasmParser::parseDirectiveAltmacro(StringRef Directive) {
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '" + Directive + "' directive");
- AltMacroMode = (Directive == ".altmacro");
- return false;
-}
-
/// parseDirectiveMacro
-/// ::= .macro name[,] [parameters]
-bool MasmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
- StringRef Name;
- if (parseIdentifier(Name))
- return TokError("expected identifier in '.macro' directive");
-
- if (getLexer().is(AsmToken::Comma))
- Lex();
-
+/// ::= name macro [parameters]
+/// ["LOCAL" identifiers]
+/// parameters ::= parameter [, parameter]*
+/// parameter ::= name ":" qualifier
+/// qualifier ::= "req" | "vararg" | "=" macro_argument
+bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
MCAsmMacroParameters Parameters;
while (getLexer().isNot(AsmToken::EndOfStatement)) {
-
if (!Parameters.empty() && Parameters.back().Vararg)
return Error(Lexer.getLoc(),
"Vararg parameter '" + Parameters.back().Name +
- "' should be last one in the list of parameters.");
+ "' should be last in the list of parameters");
MCAsmMacroParameter Parameter;
if (parseIdentifier(Parameter.Name))
- return TokError("expected identifier in '.macro' directive");
+ return TokError("expected identifier in 'macro' directive");
// Emit an error if two (or more) named parameters share the same name.
for (const MCAsmMacroParameter& CurrParam : Parameters)
- if (CurrParam.Name.equals(Parameter.Name))
+ if (CurrParam.Name.equals_lower(Parameter.Name))
return TokError("macro '" + Name + "' has multiple parameters"
" named '" + Parameter.Name + "'");
if (Lexer.is(AsmToken::Colon)) {
Lex(); // consume ':'
- SMLoc QualLoc;
- StringRef Qualifier;
-
- QualLoc = Lexer.getLoc();
- if (parseIdentifier(Qualifier))
- return Error(QualLoc, "missing parameter qualifier for "
- "'" + Parameter.Name + "' in macro '" + Name + "'");
-
- if (Qualifier == "req")
- Parameter.Required = true;
- else if (Qualifier == "vararg")
- Parameter.Vararg = true;
- else
- return Error(QualLoc, Qualifier + " is not a valid parameter qualifier "
- "for '" + Parameter.Name + "' in macro '" + Name + "'");
- }
-
- if (getLexer().is(AsmToken::Equal)) {
- Lex();
-
- SMLoc ParamLoc;
-
- ParamLoc = Lexer.getLoc();
- if (parseMacroArgument(Parameter.Value, /*Vararg=*/false ))
- return true;
+ if (parseOptionalToken(AsmToken::Equal)) {
+ // Default value
+ SMLoc ParamLoc;
- if (Parameter.Required)
- Warning(ParamLoc, "pointless default value for required parameter "
- "'" + Parameter.Name + "' in macro '" + Name + "'");
+ ParamLoc = Lexer.getLoc();
+ if (parseMacroArgument(nullptr, Parameter.Value))
+ return true;
+ } else {
+ SMLoc QualLoc;
+ StringRef Qualifier;
+
+ QualLoc = Lexer.getLoc();
+ if (parseIdentifier(Qualifier))
+ return Error(QualLoc, "missing parameter qualifier for "
+ "'" +
+ Parameter.Name + "' in macro '" + Name +
+ "'");
+
+ if (Qualifier.equals_lower("req"))
+ Parameter.Required = true;
+ else if (Qualifier.equals_lower("vararg"))
+ Parameter.Vararg = true;
+ else
+ return Error(QualLoc,
+ Qualifier + " is not a valid parameter qualifier for '" +
+ Parameter.Name + "' in macro '" + Name + "'");
+ }
}
Parameters.push_back(std::move(Parameter));
@@ -5280,9 +5540,28 @@ bool MasmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
// Eat just the end of statement.
Lexer.Lex();
+ std::vector<std::string> Locals;
+ if (getTok().is(AsmToken::Identifier) &&
+ getTok().getIdentifier().equals_lower("local")) {
+ Lex(); // Eat the LOCAL directive.
+
+ StringRef ID;
+ while (true) {
+ if (parseIdentifier(ID))
+ return true;
+ Locals.push_back(ID.lower());
+
+ // If we see a comma, continue (and allow line continuation).
+ if (!parseOptionalToken(AsmToken::Comma))
+ break;
+ parseOptionalToken(AsmToken::EndOfStatement);
+ }
+ }
+
// Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
AsmToken EndToken, StartToken = getTok();
unsigned MacroDepth = 0;
+ bool IsMacroFunction = false;
// Lex the macro definition.
while (true) {
// Ignore Lexing errors in macros.
@@ -5292,12 +5571,12 @@ bool MasmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
// Check whether we have reached the end of the file.
if (getLexer().is(AsmToken::Eof))
- return Error(DirectiveLoc, "no matching '.endmacro' in definition");
+ return Error(NameLoc, "no matching 'endm' in definition");
- // Otherwise, check whether we have reach the .endmacro.
+ // Otherwise, check whether we have reached the 'endm'... and determine if
+ // this is a macro function.
if (getLexer().is(AsmToken::Identifier)) {
- if (getTok().getIdentifier() == ".endm" ||
- getTok().getIdentifier() == ".endmacro") {
+ if (getTok().getIdentifier().equals_lower("endm")) {
if (MacroDepth == 0) { // Outermost macro.
EndToken = getTok();
Lexer.Lex();
@@ -5309,9 +5588,14 @@ bool MasmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
// Otherwise we just found the end of an inner macro.
--MacroDepth;
}
- } else if (getTok().getIdentifier() == ".macro") {
- // We allow nested macros. Those aren't instantiated until the outermost
- // macro is expanded so just ignore them for now.
+ } else if (getTok().getIdentifier().equals_lower("exitm")) {
+ if (MacroDepth == 0 &&
+ getLexer().peekTok().isNot(AsmToken::EndOfStatement)) {
+ IsMacroFunction = true;
+ }
+ } else if (isMacroLikeDirective()) {
+ // We allow nested macros. Those aren't instantiated until the
+ // outermost macro is expanded so just ignore them for now.
++MacroDepth;
}
}
@@ -5320,129 +5604,31 @@ bool MasmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
eatToEndOfStatement();
}
- if (getContext().lookupMacro(Name)) {
- return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
+ if (getContext().lookupMacro(Name.lower())) {
+ return Error(NameLoc, "macro '" + Name + "' is already defined");
}
const char *BodyStart = StartToken.getLoc().getPointer();
const char *BodyEnd = EndToken.getLoc().getPointer();
StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
- checkForBadMacro(DirectiveLoc, Name, Body, Parameters);
- MCAsmMacro Macro(Name, Body, std::move(Parameters));
+ MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
+ IsMacroFunction);
DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
Macro.dump());
getContext().defineMacro(Name, std::move(Macro));
return false;
}
-/// checkForBadMacro
-///
-/// With the support added for named parameters there may be code out there that
-/// is transitioning from positional parameters. In versions of gas that did
-/// not support named parameters they would be ignored on the macro definition.
-/// But to support both styles of parameters this is not possible so if a macro
-/// definition has named parameters but does not use them and has what appears
-/// to be positional parameters, strings like $1, $2, ... and $n, then issue a
-/// warning that the positional parameter found in body which have no effect.
-/// Hoping the developer will either remove the named parameters from the macro
-/// definition so the positional parameters get used if that was what was
-/// intended or change the macro to use the named parameters. It is possible
-/// this warning will trigger when the none of the named parameters are used
-/// and the strings like $1 are infact to simply to be passed trough unchanged.
-void MasmParser::checkForBadMacro(SMLoc DirectiveLoc, StringRef Name,
- StringRef Body,
- ArrayRef<MCAsmMacroParameter> Parameters) {
- // If this macro is not defined with named parameters the warning we are
- // checking for here doesn't apply.
- unsigned NParameters = Parameters.size();
- if (NParameters == 0)
- return;
-
- bool NamedParametersFound = false;
- bool PositionalParametersFound = false;
-
- // Look at the body of the macro for use of both the named parameters and what
- // are likely to be positional parameters. This is what expandMacro() is
- // doing when it finds the parameters in the body.
- while (!Body.empty()) {
- // Scan for the next possible parameter.
- std::size_t End = Body.size(), Pos = 0;
- for (; Pos != End; ++Pos) {
- // Check for a substitution or escape.
- // This macro is defined with parameters, look for \foo, \bar, etc.
- if (Body[Pos] == '\\' && Pos + 1 != End)
- break;
-
- // This macro should have parameters, but look for $0, $1, ..., $n too.
- if (Body[Pos] != '$' || Pos + 1 == End)
- continue;
- char Next = Body[Pos + 1];
- if (Next == '$' || Next == 'n' ||
- isdigit(static_cast<unsigned char>(Next)))
- break;
- }
-
- // Check if we reached the end.
- if (Pos == End)
- break;
-
- if (Body[Pos] == '$') {
- switch (Body[Pos + 1]) {
- // $$ => $
- case '$':
- break;
-
- // $n => number of arguments
- case 'n':
- PositionalParametersFound = true;
- break;
-
- // $[0-9] => argument
- default: {
- PositionalParametersFound = true;
- break;
- }
- }
- Pos += 2;
- } else {
- unsigned I = Pos + 1;
- while (isIdentifierChar(Body[I]) && I + 1 != End)
- ++I;
-
- const char *Begin = Body.data() + Pos + 1;
- StringRef Argument(Begin, I - (Pos + 1));
- unsigned Index = 0;
- for (; Index < NParameters; ++Index)
- if (Parameters[Index].Name == Argument)
- break;
-
- if (Index == NParameters) {
- if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')')
- Pos += 3;
- else {
- Pos = I;
- }
- } else {
- NamedParametersFound = true;
- Pos += 1 + Argument.size();
- }
- }
- // Update the scan point.
- Body = Body.substr(Pos);
- }
-
- if (!NamedParametersFound && PositionalParametersFound)
- Warning(DirectiveLoc, "macro defined with named parameters which are not "
- "used in macro body, possible positional parameter "
- "found in body which will have no effect");
-}
-
/// parseDirectiveExitMacro
-/// ::= .exitm
-bool MasmParser::parseDirectiveExitMacro(StringRef Directive) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '" + Directive + "' directive"))
- return true;
+/// ::= "exitm" [textitem]
+bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
+ StringRef Directive,
+ std::string &Value) {
+ SMLoc EndLoc = getTok().getLoc();
+ if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
+ return Error(EndLoc,
+ "unable to parse text item in '" + Directive + "' directive");
+ eatToEndOfStatement();
if (!isInsideMacroInstantiation())
return TokError("unexpected '" + Directive + "' in file, "
@@ -5459,8 +5645,7 @@ bool MasmParser::parseDirectiveExitMacro(StringRef Directive) {
}
/// parseDirectiveEndMacro
-/// ::= .endm
-/// ::= .endmacro
+/// ::= endm
bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '" + Directive + "' directive");
@@ -5479,23 +5664,27 @@ bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
}
/// parseDirectivePurgeMacro
-/// ::= .purgem
+/// ::= purge identifier ( , identifier )*
bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
StringRef Name;
- SMLoc Loc;
- if (parseTokenLoc(Loc) ||
- check(parseIdentifier(Name), Loc,
- "expected identifier in '.purgem' directive") ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.purgem' directive"))
- return true;
+ while (true) {
+ SMLoc NameLoc;
+ if (parseTokenLoc(NameLoc) ||
+ check(parseIdentifier(Name), NameLoc,
+ "expected identifier in 'purge' directive"))
+ return true;
- if (!getContext().lookupMacro(Name))
- return Error(DirectiveLoc, "macro '" + Name + "' is not defined");
+ DEBUG_WITH_TYPE("asm-macros", dbgs()
+ << "Un-defining macro: " << Name << "\n");
+ if (!getContext().lookupMacro(Name.lower()))
+ return Error(NameLoc, "macro '" + Name + "' is not defined");
+ getContext().undefineMacro(Name.lower());
+
+ if (!parseOptionalToken(AsmToken::Comma))
+ break;
+ parseOptionalToken(AsmToken::EndOfStatement);
+ }
- getContext().undefineMacro(Name);
- DEBUG_WITH_TYPE("asm-macros", dbgs()
- << "Un-defining macro: " << Name << "\n");
return false;
}
@@ -5603,16 +5792,17 @@ bool MasmParser::parseDirectiveComm(bool IsLocal) {
/// [[text]]
/// [[text]] delimiter [[text]]
bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
- StringRef FirstLine = parseStringToEndOfStatement();
+ std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
- StringRef Delimiter = FirstLine.take_front(DelimiterEnd);
+ StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
if (Delimiter.empty())
return Error(DirectiveLoc, "no delimiter in 'comment' directive");
do {
if (getTok().is(AsmToken::Eof))
return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
Lex(); // eat end of statement
- } while (!parseStringToEndOfStatement().contains(Delimiter));
+ } while (
+ !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
return parseToken(AsmToken::EndOfStatement,
"unexpected token in 'comment' directive");
}
@@ -5626,7 +5816,7 @@ bool MasmParser::parseDirectiveInclude() {
SMLoc IncludeLoc = getTok().getLoc();
if (!parseAngleBracketString(Filename))
- Filename = parseStringToEndOfStatement().str();
+ Filename = parseStringTo(AsmToken::EndOfStatement);
if (check(!Filename.empty(), "missing filename in 'include' directive") ||
check(getTok().isNot(AsmToken::EndOfStatement),
"unexpected token in 'include' directive") ||
@@ -5671,7 +5861,7 @@ bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
}
/// parseDirectiveIfb
-/// ::= .ifb string
+/// ::= .ifb textitem
bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
@@ -5681,7 +5871,7 @@ bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
} else {
std::string Str;
if (parseTextItem(Str))
- return TokError("expected string parameter for 'ifb' directive");
+ return TokError("expected text item parameter for 'ifb' directive");
if (parseToken(AsmToken::EndOfStatement,
"unexpected token in 'ifb' directive"))
@@ -5695,14 +5885,15 @@ bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
}
/// parseDirectiveIfidn
-/// ::= ifidn string1, string2
-bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual, bool CaseInsensitive) {
+/// ::= ifidn textitem, textitem
+bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
+ bool CaseInsensitive) {
std::string String1, String2;
if (parseTextItem(String1)) {
if (ExpectEqual)
- return TokError("expected string parameter for 'ifidn' directive");
- return TokError("expected string parameter for 'ifdif' directive");
+ return TokError("expected text item parameter for 'ifidn' directive");
+ return TokError("expected text item parameter for 'ifdif' directive");
}
if (Lexer.isNot(AsmToken::Comma)) {
@@ -5715,8 +5906,8 @@ bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual, bool
if (parseTextItem(String2)) {
if (ExpectEqual)
- return TokError("expected string parameter for 'ifidn' directive");
- return TokError("expected string parameter for 'ifdif' directive");
+ return TokError("expected text item parameter for 'ifidn' directive");
+ return TokError("expected text item parameter for 'ifdif' directive");
}
TheCondStack.push_back(TheCondState);
@@ -5810,7 +6001,7 @@ bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
}
/// parseDirectiveElseIfb
-/// ::= elseifb expression
+/// ::= elseifb textitem
bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
if (TheCondState.TheCond != AsmCond::IfCond &&
TheCondState.TheCond != AsmCond::ElseIfCond)
@@ -5826,8 +6017,11 @@ bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
eatToEndOfStatement();
} else {
std::string Str;
- if (parseTextItem(Str))
- return TokError("expected string parameter for 'elseifb' directive");
+ if (parseTextItem(Str)) {
+ if (ExpectBlank)
+ return TokError("expected text item parameter for 'elseifb' directive");
+ return TokError("expected text item parameter for 'elseifnb' directive");
+ }
if (parseToken(AsmToken::EndOfStatement,
"unexpected token in 'elseifb' directive"))
@@ -5887,7 +6081,7 @@ bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
}
/// parseDirectiveElseIfidn
-/// ::= elseifidn string1, string2
+/// ::= elseifidn textitem, textitem
bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
bool CaseInsensitive) {
if (TheCondState.TheCond != AsmCond::IfCond &&
@@ -5907,8 +6101,9 @@ bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
if (parseTextItem(String1)) {
if (ExpectEqual)
- return TokError("expected string parameter for 'elseifidn' directive");
- return TokError("expected string parameter for 'elseifdif' directive");
+ return TokError(
+ "expected text item parameter for 'elseifidn' directive");
+ return TokError("expected text item parameter for 'elseifdif' directive");
}
if (Lexer.isNot(AsmToken::Comma)) {
@@ -5922,8 +6117,9 @@ bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
if (parseTextItem(String2)) {
if (ExpectEqual)
- return TokError("expected string parameter for 'elseifidn' directive");
- return TokError("expected string parameter for 'elseifdif' directive");
+ return TokError(
+ "expected text item parameter for 'elseifidn' directive");
+ return TokError("expected text item parameter for 'elseifdif' directive");
}
if (CaseInsensitive)
@@ -5983,9 +6179,9 @@ bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
}
}
- StringRef Message = ".err directive invoked in source file";
+ std::string Message = ".err directive invoked in source file";
if (Lexer.isNot(AsmToken::EndOfStatement))
- Message = parseStringToEndOfStatement();
+ Message = parseStringTo(AsmToken::EndOfStatement);
Lex();
return Error(DirectiveLoc, Message);
@@ -6005,11 +6201,11 @@ bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
if (parseTextItem(Text))
return Error(getTok().getLoc(), "missing text item in '.errb' directive");
- StringRef Message = ".errb directive invoked in source file";
+ std::string Message = ".errb directive invoked in source file";
if (Lexer.isNot(AsmToken::EndOfStatement)) {
if (parseToken(AsmToken::Comma))
return addErrorSuffix(" in '.errb' directive");
- Message = parseStringToEndOfStatement();
+ Message = parseStringTo(AsmToken::EndOfStatement);
}
Lex();
@@ -6047,11 +6243,11 @@ bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
}
}
- StringRef Message = ".errdef directive invoked in source file";
+ std::string Message = ".errdef directive invoked in source file";
if (Lexer.isNot(AsmToken::EndOfStatement)) {
if (parseToken(AsmToken::Comma))
return addErrorSuffix(" in '.errdef' directive");
- Message = parseStringToEndOfStatement();
+ Message = parseStringTo(AsmToken::EndOfStatement);
}
Lex();
@@ -6061,7 +6257,7 @@ bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
}
/// parseDirectiveErrorIfidn
-/// ::= .erridn textitem1, textitem2[, message]
+/// ::= .erridn textitem, textitem[, message]
bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
bool CaseInsensitive) {
if (!TheCondStack.empty()) {
@@ -6094,7 +6290,7 @@ bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
return TokError("expected string parameter for '.errdif' directive");
}
- StringRef Message;
+ std::string Message;
if (ExpectEqual)
Message = ".erridn directive invoked in source file";
else
@@ -6102,7 +6298,7 @@ bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
if (Lexer.isNot(AsmToken::EndOfStatement)) {
if (parseToken(AsmToken::Comma))
return addErrorSuffix(" in '.erridn' directive");
- Message = parseStringToEndOfStatement();
+ Message = parseStringTo(AsmToken::EndOfStatement);
}
Lex();
@@ -6134,11 +6330,11 @@ bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
if (parseAbsoluteExpression(ExprValue))
return addErrorSuffix(" in '.erre' directive");
- StringRef Message = ".erre directive invoked in source file";
+ std::string Message = ".erre directive invoked in source file";
if (Lexer.isNot(AsmToken::EndOfStatement)) {
if (parseToken(AsmToken::Comma))
return addErrorSuffix(" in '.erre' directive");
- Message = parseStringToEndOfStatement();
+ Message = parseStringTo(AsmToken::EndOfStatement);
}
Lex();
@@ -6183,6 +6379,7 @@ void MasmParser::initializeDirectiveKindMap() {
DirectiveKindMap["sqword"] = DK_SQWORD;
DirectiveKindMap["real4"] = DK_REAL4;
DirectiveKindMap["real8"] = DK_REAL8;
+ DirectiveKindMap["real10"] = DK_REAL10;
DirectiveKindMap["align"] = DK_ALIGN;
// DirectiveKindMap[".org"] = DK_ORG;
DirectiveKindMap["extern"] = DK_EXTERN;
@@ -6190,11 +6387,13 @@ void MasmParser::initializeDirectiveKindMap() {
// DirectiveKindMap[".comm"] = DK_COMM;
DirectiveKindMap["comment"] = DK_COMMENT;
DirectiveKindMap["include"] = DK_INCLUDE;
- // DirectiveKindMap[".rept"] = DK_REPT;
- // DirectiveKindMap[".rep"] = DK_REPT;
- // DirectiveKindMap[".irp"] = DK_IRP;
- // DirectiveKindMap[".irpc"] = DK_IRPC;
- // DirectiveKindMap[".endr"] = DK_ENDR;
+ DirectiveKindMap["repeat"] = DK_REPEAT;
+ DirectiveKindMap["rept"] = DK_REPEAT;
+ DirectiveKindMap["while"] = DK_WHILE;
+ DirectiveKindMap["for"] = DK_FOR;
+ DirectiveKindMap["irp"] = DK_FOR;
+ DirectiveKindMap["forc"] = DK_FORC;
+ DirectiveKindMap["irpc"] = DK_FORC;
DirectiveKindMap["if"] = DK_IF;
DirectiveKindMap["ife"] = DK_IFE;
DirectiveKindMap["ifb"] = DK_IFB;
@@ -6251,10 +6450,10 @@ void MasmParser::initializeDirectiveKindMap() {
// DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
// DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
// DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
- // DirectiveKindMap[".macro"] = DK_MACRO;
- // DirectiveKindMap[".exitm"] = DK_EXITM;
- // DirectiveKindMap[".endm"] = DK_ENDM;
- // DirectiveKindMap[".purgem"] = DK_PURGEM;
+ DirectiveKindMap["macro"] = DK_MACRO;
+ DirectiveKindMap["exitm"] = DK_EXITM;
+ DirectiveKindMap["endm"] = DK_ENDM;
+ DirectiveKindMap["purge"] = DK_PURGE;
DirectiveKindMap[".err"] = DK_ERR;
DirectiveKindMap[".errb"] = DK_ERRB;
DirectiveKindMap[".errnb"] = DK_ERRNB;
@@ -6266,10 +6465,15 @@ void MasmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".erridni"] = DK_ERRIDNI;
DirectiveKindMap[".erre"] = DK_ERRE;
DirectiveKindMap[".errnz"] = DK_ERRNZ;
- // DirectiveKindMap[".altmacro"] = DK_ALTMACRO;
- // DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO;
+ DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
+ DirectiveKindMap[".pushreg"] = DK_PUSHREG;
+ DirectiveKindMap[".savereg"] = DK_SAVEREG;
+ DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
+ DirectiveKindMap[".setframe"] = DK_SETFRAME;
+ DirectiveKindMap[".radix"] = DK_RADIX;
DirectiveKindMap["db"] = DK_DB;
DirectiveKindMap["dd"] = DK_DD;
+ DirectiveKindMap["df"] = DK_DF;
DirectiveKindMap["dq"] = DK_DQ;
DirectiveKindMap["dw"] = DK_DW;
DirectiveKindMap["echo"] = DK_ECHO;
@@ -6279,6 +6483,24 @@ void MasmParser::initializeDirectiveKindMap() {
DirectiveKindMap["ends"] = DK_ENDS;
}
+bool MasmParser::isMacroLikeDirective() {
+ if (getLexer().is(AsmToken::Identifier)) {
+ bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
+ .CasesLower("repeat", "rept", true)
+ .CaseLower("while", true)
+ .CasesLower("for", "irp", true)
+ .CasesLower("forc", "irpc", true)
+ .Default(false);
+ if (IsMacroLike)
+ return true;
+ }
+ if (getLexer().peekTok().is(AsmToken::Identifier) &&
+ getLexer().peekTok().getIdentifier().equals_lower("macro"))
+ return true;
+
+ return false;
+}
+
MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
AsmToken EndToken, StartToken = getTok();
@@ -6286,26 +6508,21 @@ MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
while (true) {
// Check whether we have reached the end of the file.
if (getLexer().is(AsmToken::Eof)) {
- printError(DirectiveLoc, "no matching '.endr' in definition");
+ printError(DirectiveLoc, "no matching 'endm' in definition");
return nullptr;
}
- if (Lexer.is(AsmToken::Identifier) &&
- (getTok().getIdentifier() == ".rep" ||
- getTok().getIdentifier() == ".rept" ||
- getTok().getIdentifier() == ".irp" ||
- getTok().getIdentifier() == ".irpc")) {
+ if (isMacroLikeDirective())
++NestLevel;
- }
- // Otherwise, check whether we have reached the .endr.
- if (Lexer.is(AsmToken::Identifier) && getTok().getIdentifier() == ".endr") {
+ // Otherwise, check whether we have reached the endm.
+ if (Lexer.is(AsmToken::Identifier) &&
+ getTok().getIdentifier().equals_lower("endm")) {
if (NestLevel == 0) {
EndToken = getTok();
Lex();
if (Lexer.isNot(AsmToken::EndOfStatement)) {
- printError(getTok().getLoc(),
- "unexpected token in '.endr' directive");
+ printError(getTok().getLoc(), "unexpected token in 'endm' directive");
return nullptr;
}
break;
@@ -6326,28 +6543,73 @@ MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
return &MacroLikeBodies.back();
}
+bool MasmParser::expandStatement(SMLoc Loc) {
+ std::string Body = parseStringTo(AsmToken::EndOfStatement);
+ SMLoc EndLoc = getTok().getLoc();
+
+ MCAsmMacroParameters Parameters;
+ MCAsmMacroArguments Arguments;
+ for (const auto &V : Variables) {
+ const Variable &Var = V.getValue();
+ if (Var.IsText) {
+ Parameters.emplace_back();
+ Arguments.emplace_back();
+ MCAsmMacroParameter &P = Parameters.back();
+ MCAsmMacroArgument &A = Arguments.back();
+ P.Name = Var.Name;
+ P.Required = true;
+ A.push_back(AsmToken(AsmToken::String, Var.TextValue));
+ }
+ }
+ MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
+ MCAsmMacro M = MacroLikeBodies.back();
+
+ // Expand the statement in a new buffer.
+ SmallString<80> Buf;
+ raw_svector_ostream OS(Buf);
+ if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
+ return true;
+ std::unique_ptr<MemoryBuffer> Expansion =
+ MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
+
+ // Jump to the expanded statement and prime the lexer.
+ CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
+ EndStatementAtEOFStack.push_back(false);
+ Lex();
+ return false;
+}
+
+void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
+ raw_svector_ostream &OS) {
+ instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
+}
void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
+ SMLoc ExitLoc,
raw_svector_ostream &OS) {
- OS << ".endr\n";
+ OS << "endm\n";
std::unique_ptr<MemoryBuffer> Instantiation =
MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
// Create the macro instantiation object and add to the current macro
// instantiation stack.
- MacroInstantiation *MI = new MacroInstantiation{
- DirectiveLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
+ MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
+ ExitLoc, TheCondStack.size()};
ActiveMacros.push_back(MI);
// Jump to the macro instantiation and prime the lexer.
CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
+ EndStatementAtEOFStack.push_back(true);
Lex();
}
-/// parseDirectiveRept
-/// ::= .rep | .rept count
-bool MasmParser::parseDirectiveRept(SMLoc DirectiveLoc, StringRef Dir) {
+/// parseDirectiveRepeat
+/// ::= ("repeat" | "rept") count
+/// body
+/// endm
+bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
const MCExpr *CountExpr;
SMLoc CountLoc = getTok().getLoc();
if (parseExpression(CountExpr))
@@ -6363,7 +6625,7 @@ bool MasmParser::parseDirectiveRept(SMLoc DirectiveLoc, StringRef Dir) {
"unexpected token in '" + Dir + "' directive"))
return true;
- // Lex the rept definition.
+ // Lex the repeat definition.
MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
if (!M)
return true;
@@ -6373,8 +6635,7 @@ bool MasmParser::parseDirectiveRept(SMLoc DirectiveLoc, StringRef Dir) {
SmallString<256> Buf;
raw_svector_ostream OS(Buf);
while (Count--) {
- // Note that the AtPseudoVariable is disabled for instantiations of .rep(t).
- if (expandMacro(OS, M->Body, None, None, false, getTok().getLoc()))
+ if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc()))
return true;
}
instantiateMacroLikeBody(M, DirectiveLoc, OS);
@@ -6382,19 +6643,104 @@ bool MasmParser::parseDirectiveRept(SMLoc DirectiveLoc, StringRef Dir) {
return false;
}
-/// parseDirectiveIrp
-/// ::= .irp symbol,values
-bool MasmParser::parseDirectiveIrp(SMLoc DirectiveLoc) {
+/// parseDirectiveWhile
+/// ::= "while" expression
+/// body
+/// endm
+bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
+ const MCExpr *CondExpr;
+ SMLoc CondLoc = getTok().getLoc();
+ if (parseExpression(CondExpr))
+ return true;
+
+ // Lex the repeat definition.
+ MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
+ if (!M)
+ return true;
+
+ // Macro instantiation is lexical, unfortunately. We construct a new buffer
+ // to hold the macro body with substitutions.
+ SmallString<256> Buf;
+ raw_svector_ostream OS(Buf);
+ int64_t Condition;
+ if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
+ return Error(CondLoc, "expected absolute expression in 'while' directive");
+ if (Condition) {
+ // Instantiate the macro, then resume at this directive to recheck the
+ // condition.
+ if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc()))
+ return true;
+ instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
+ }
+
+ return false;
+}
+
+/// parseDirectiveFor
+/// ::= ("for" | "irp") symbol [":" qualifier], <values>
+/// body
+/// endm
+bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
MCAsmMacroParameter Parameter;
MCAsmMacroArguments A;
if (check(parseIdentifier(Parameter.Name),
- "expected identifier in '.irp' directive") ||
- parseToken(AsmToken::Comma, "expected comma in '.irp' directive") ||
- parseMacroArguments(nullptr, A) ||
+ "expected identifier in '" + Dir + "' directive"))
+ return true;
+
+ // Parse optional qualifier (default value, or "req")
+ if (parseOptionalToken(AsmToken::Colon)) {
+ if (parseOptionalToken(AsmToken::Equal)) {
+ // Default value
+ SMLoc ParamLoc;
+
+ ParamLoc = Lexer.getLoc();
+ if (parseMacroArgument(nullptr, Parameter.Value))
+ return true;
+ } else {
+ SMLoc QualLoc;
+ StringRef Qualifier;
+
+ QualLoc = Lexer.getLoc();
+ if (parseIdentifier(Qualifier))
+ return Error(QualLoc, "missing parameter qualifier for "
+ "'" +
+ Parameter.Name + "' in '" + Dir +
+ "' directive");
+
+ if (Qualifier.equals_lower("req"))
+ Parameter.Required = true;
+ else
+ return Error(QualLoc,
+ Qualifier + " is not a valid parameter qualifier for '" +
+ Parameter.Name + "' in '" + Dir + "' directive");
+ }
+ }
+
+ if (parseToken(AsmToken::Comma,
+ "expected comma in '" + Dir + "' directive") ||
+ parseToken(AsmToken::Less,
+ "values in '" + Dir +
+ "' directive must be enclosed in angle brackets"))
+ return true;
+
+ while (true) {
+ A.emplace_back();
+ if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
+ return addErrorSuffix(" in arguments for '" + Dir + "' directive");
+
+ // If we see a comma, continue, and allow line continuation.
+ if (!parseOptionalToken(AsmToken::Comma))
+ break;
+ parseOptionalToken(AsmToken::EndOfStatement);
+ }
+
+ if (parseToken(AsmToken::Greater,
+ "values in '" + Dir +
+ "' directive must be enclosed in angle brackets") ||
parseToken(AsmToken::EndOfStatement, "expected End of Statement"))
return true;
- // Lex the irp definition.
+ // Lex the for definition.
MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
if (!M)
return true;
@@ -6405,9 +6751,7 @@ bool MasmParser::parseDirectiveIrp(SMLoc DirectiveLoc) {
raw_svector_ostream OS(Buf);
for (const MCAsmMacroArgument &Arg : A) {
- // Note that the AtPseudoVariable is enabled for instantiations of .irp.
- // This is undocumented, but GAS seems to support it.
- if (expandMacro(OS, M->Body, Parameter, Arg, true, getTok().getLoc()))
+ if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
return true;
}
@@ -6416,22 +6760,33 @@ bool MasmParser::parseDirectiveIrp(SMLoc DirectiveLoc) {
return false;
}
-/// parseDirectiveIrpc
-/// ::= .irpc symbol,values
-bool MasmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) {
+/// parseDirectiveForc
+/// ::= ("forc" | "irpc") symbol, <string>
+/// body
+/// endm
+bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
MCAsmMacroParameter Parameter;
- MCAsmMacroArguments A;
+ std::string Argument;
if (check(parseIdentifier(Parameter.Name),
- "expected identifier in '.irpc' directive") ||
- parseToken(AsmToken::Comma, "expected comma in '.irpc' directive") ||
- parseMacroArguments(nullptr, A))
+ "expected identifier in '" + Directive + "' directive") ||
+ parseToken(AsmToken::Comma,
+ "expected comma in '" + Directive + "' directive"))
return true;
-
- if (A.size() != 1 || A.front().size() != 1)
- return TokError("unexpected token in '.irpc' directive");
-
- // Eat the end of statement.
+ if (parseAngleBracketString(Argument)) {
+ // Match ml64.exe; treat all characters to end of statement as a string,
+ // ignoring comment markers, then discard anything following a space (using
+ // the C locale).
+ Argument = parseStringTo(AsmToken::EndOfStatement);
+ if (getTok().is(AsmToken::EndOfStatement))
+ Argument += getTok().getString();
+ size_t End = 0;
+ for (; End < Argument.size(); ++End) {
+ if (isSpace(Argument[End]))
+ break;
+ }
+ Argument.resize(End);
+ }
if (parseToken(AsmToken::EndOfStatement, "expected end of statement"))
return true;
@@ -6445,14 +6800,12 @@ bool MasmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) {
SmallString<256> Buf;
raw_svector_ostream OS(Buf);
- StringRef Values = A.front().front().getString();
+ StringRef Values(Argument);
for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
MCAsmMacroArgument Arg;
Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1));
- // Note that the AtPseudoVariable is enabled for instantiations of .irpc.
- // This is undocumented, but GAS seems to support it.
- if (expandMacro(OS, M->Body, Parameter, Arg, true, getTok().getLoc()))
+ if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
return true;
}
@@ -6461,18 +6814,6 @@ bool MasmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) {
return false;
}
-bool MasmParser::parseDirectiveEndr(SMLoc DirectiveLoc) {
- if (ActiveMacros.empty())
- return TokError("unmatched '.endr' directive");
-
- // The only .repl that should get here are the ones created by
- // instantiateMacroLikeBody.
- assert(getLexer().is(AsmToken::EndOfStatement));
-
- handleMacroExit();
- return false;
-}
-
bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
size_t Len) {
const MCExpr *Value;
@@ -6506,10 +6847,37 @@ bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
return false;
}
+bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
+ const SMLoc Loc = getLexer().getLoc();
+ std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
+ StringRef RadixString = StringRef(RadixStringRaw).trim();
+ unsigned Radix;
+ if (RadixString.getAsInteger(10, Radix)) {
+ return Error(Loc,
+ "radix must be a decimal number in the range 2 to 16; was " +
+ RadixString);
+ }
+ if (Radix < 2 || Radix > 16)
+ return Error(Loc, "radix must be in the range 2 to 16; was " +
+ std::to_string(Radix));
+ getLexer().setMasmDefaultRadix(Radix);
+ return false;
+}
+
+/// parseDirectiveEcho
+/// ::= "echo" message
bool MasmParser::parseDirectiveEcho() {
- StringRef Message = parseStringToEndOfStatement();
- Lex(); // eat end of statement
- llvm::outs() << Message << '\n';
+ // We're called before the directive is parsed, to avoid triggering lexical
+ // substitutions in the message. Assert that the next token is the directive,
+ // then eat it without using the Parser's Lex method.
+ assert(getTok().is(AsmToken::Identifier) &&
+ getTok().getString().equals_lower("echo"));
+ Lexer.Lex();
+
+ std::string Message = parseStringTo(AsmToken::EndOfStatement);
+ llvm::outs() << Message;
+ if (!StringRef(Message).endswith("\n"))
+ llvm::outs() << '\n';
return false;
}
@@ -6536,37 +6904,52 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA,
llvm_unreachable("Unstable rewrite sort.");
}
-bool MasmParser::lookUpField(StringRef Name, StringRef &Type,
- unsigned &Offset) const {
+bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
+ Variable &Var = Variables[Name.lower()];
+ if (Var.Name.empty()) {
+ Var.Name = Name;
+ } else if (!Var.Redefinable) {
+ return TokError("invalid variable redefinition");
+ }
+ Var.Redefinable = true;
+ Var.IsText = true;
+ Var.TextValue = Value.str();
+ return false;
+}
+
+bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
const StringRef Base = BaseMember.first, Member = BaseMember.second;
- return lookUpField(Base, Member, Type, Offset);
+ return lookUpField(Base, Member, Info);
}
-bool MasmParser::lookUpField(StringRef Base, StringRef Member, StringRef &Type,
- unsigned &Offset) const {
+bool MasmParser::lookUpField(StringRef Base, StringRef Member,
+ AsmFieldInfo &Info) const {
if (Base.empty())
return true;
- unsigned BaseOffset = 0;
- if (Base.contains('.') && !lookUpField(Base, Type, BaseOffset))
- Base = Type;
-
- auto TypeIt = KnownType.find(Base);
- if (TypeIt != KnownType.end())
- return lookUpField(*TypeIt->second, Member, Type, Offset);
+ AsmFieldInfo BaseInfo;
+ if (Base.contains('.') && !lookUpField(Base, BaseInfo))
+ Base = BaseInfo.Type.Name;
auto StructIt = Structs.find(Base.lower());
+ auto TypeIt = KnownType.find(Base.lower());
+ if (TypeIt != KnownType.end()) {
+ StructIt = Structs.find(TypeIt->second.Name.lower());
+ }
if (StructIt != Structs.end())
- return lookUpField(StructIt->second, Member, Type, Offset);
+ return lookUpField(StructIt->second, Member, Info);
return true;
}
bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
- StringRef &Type, unsigned &Offset) const {
+ AsmFieldInfo &Info) const {
if (Member.empty()) {
- Type = Structure.Name;
+ Info.Type.Name = Structure.Name;
+ Info.Type.Size = Structure.Size;
+ Info.Type.ElementSize = Structure.Size;
+ Info.Type.Length = 1;
return false;
}
@@ -6575,7 +6958,7 @@ bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
auto StructIt = Structs.find(FieldName.lower());
if (StructIt != Structs.end())
- return lookUpField(StructIt->second, FieldMember, Type, Offset);
+ return lookUpField(StructIt->second, FieldMember, Info);
auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
if (FieldIt == Structure.FieldsByName.end())
@@ -6583,9 +6966,14 @@ bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
const FieldInfo &Field = Structure.Fields[FieldIt->second];
if (FieldMember.empty()) {
- Offset += Field.Offset;
+ Info.Offset += Field.Offset;
+ Info.Type.Size = Field.SizeOf;
+ Info.Type.ElementSize = Field.Type;
+ Info.Type.Length = Field.LengthOf;
if (Field.Contents.FT == FT_STRUCT)
- Type = Field.Contents.StructInfo.Structure.Name;
+ Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
+ else
+ Info.Type.Name = "";
return false;
}
@@ -6593,14 +6981,45 @@ bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
return true;
const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
- bool Result = lookUpField(StructInfo.Structure, FieldMember, Type, Offset);
- if (Result)
+ if (lookUpField(StructInfo.Structure, FieldMember, Info))
return true;
- Offset += Field.Offset;
+ Info.Offset += Field.Offset;
return false;
}
+bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
+ unsigned Size = StringSwitch<unsigned>(Name)
+ .CasesLower("byte", "db", "sbyte", 1)
+ .CasesLower("word", "dw", "sword", 2)
+ .CasesLower("dword", "dd", "sdword", 4)
+ .CasesLower("fword", "df", 6)
+ .CasesLower("qword", "dq", "sqword", 8)
+ .CaseLower("real4", 4)
+ .CaseLower("real8", 8)
+ .CaseLower("real10", 10)
+ .Default(0);
+ if (Size) {
+ Info.Name = Name;
+ Info.ElementSize = Size;
+ Info.Length = 1;
+ Info.Size = Size;
+ return false;
+ }
+
+ auto StructIt = Structs.find(Name.lower());
+ if (StructIt != Structs.end()) {
+ const StructInfo &Structure = StructIt->second;
+ Info.Name = Name;
+ Info.ElementSize = Structure.Size;
+ Info.Length = 1;
+ Info.Size = Structure.Size;
+ return false;
+ }
+
+ return true;
+}
+
bool MasmParser::parseMSInlineAsm(
void *AsmLoc, std::string &AsmString, unsigned &NumOutputs,
unsigned &NumInputs, SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
@@ -6699,7 +7118,7 @@ bool MasmParser::parseMSInlineAsm(
// Consider implicit defs to be clobbers. Think of cpuid and push.
ArrayRef<MCPhysReg> ImpDefs(Desc.getImplicitDefs(),
Desc.getNumImplicitDefs());
- ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end());
+ llvm::append_range(ClobberRegs, ImpDefs);
}
// Set the number of Outputs and Inputs.
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 05f23e143341..0c255ef02d2a 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -90,15 +90,40 @@ public:
return false;
}
- bool parseSectionFlags(StringRef FlagStr, bool &Passive) {
- SmallVector<StringRef, 2> Flags;
- // If there are no flags, keep Flags empty
- FlagStr.split(Flags, ",", -1, false);
- for (auto &Flag : Flags) {
- if (Flag == "passive")
+ bool parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) {
+ for (char C : FlagStr) {
+ switch (C) {
+ case 'p':
Passive = true;
- else
- return error("Expected section flags, instead got: ", Lexer->getTok());
+ break;
+ case 'G':
+ Group = true;
+ break;
+ default:
+ return Parser->Error(getTok().getLoc(),
+ StringRef("Unexepcted section flag: ") + FlagStr);
+ }
+ }
+ return false;
+ }
+
+ bool parseGroup(StringRef &GroupName) {
+ if (Lexer->isNot(AsmToken::Comma))
+ return TokError("expected group name");
+ Lex();
+ if (Lexer->is(AsmToken::Integer)) {
+ GroupName = getTok().getString();
+ Lex();
+ } else if (Parser->parseIdentifier(GroupName)) {
+ return TokError("invalid group name");
+ }
+ if (Lexer->is(AsmToken::Comma)) {
+ Lex();
+ StringRef Linkage;
+ if (Parser->parseIdentifier(Linkage))
+ return TokError("invalid linkage");
+ if (Linkage != "comdat")
+ return TokError("Linkage must be 'comdat'");
}
return false;
}
@@ -116,6 +141,8 @@ public:
auto Kind = StringSwitch<Optional<SectionKind>>(Name)
.StartsWith(".data", SectionKind::getData())
+ .StartsWith(".tdata", SectionKind::getThreadData())
+ .StartsWith(".tbss", SectionKind::getThreadBSS())
.StartsWith(".rodata", SectionKind::getReadOnly())
.StartsWith(".text", SectionKind::getText())
.StartsWith(".custom_section", SectionKind::getMetadata())
@@ -128,27 +155,34 @@ public:
if (!Kind.hasValue())
return Parser->Error(Lexer->getLoc(), "unknown section kind: " + Name);
- MCSectionWasm *Section = getContext().getWasmSection(Name, Kind.getValue());
// Update section flags if present in this .section directive
bool Passive = false;
- if (parseSectionFlags(getTok().getStringContents(), Passive))
+ bool Group = false;
+ if (parseSectionFlags(getTok().getStringContents(), Passive, Group))
return true;
- if (Passive) {
- if (!Section->isWasmData())
- return Parser->Error(getTok().getLoc(),
- "Only data sections can be passive");
- Section->setPassive();
- }
-
Lex();
- if (expect(AsmToken::Comma, ",") || expect(AsmToken::At, "@") ||
- expect(AsmToken::EndOfStatement, "eol"))
+ if (expect(AsmToken::Comma, ",") || expect(AsmToken::At, "@"))
+ return true;
+
+ StringRef GroupName;
+ if (Group && parseGroup(GroupName))
+ return true;
+
+ if (expect(AsmToken::EndOfStatement, "eol"))
return true;
- auto WS = getContext().getWasmSection(Name, Kind.getValue());
+ // TODO: Parse UniqueID
+ MCSectionWasm *WS = getContext().getWasmSection(
+ Name, Kind.getValue(), GroupName, MCContext::GenericSectionID);
+ if (Passive) {
+ if (!WS->isWasmData())
+ return Parser->Error(getTok().getLoc(),
+ "Only data sections can be passive");
+ WS->setPassive();
+ }
getStreamer().SwitchSection(WS);
return false;
}
@@ -187,9 +221,13 @@ public:
Lexer->is(AsmToken::Identifier)))
return error("Expected label,@type declaration, got: ", Lexer->getTok());
auto TypeName = Lexer->getTok().getString();
- if (TypeName == "function")
+ if (TypeName == "function") {
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
- else if (TypeName == "global")
+ auto *Current =
+ cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
+ if (Current->getGroup())
+ WasmSym->setComdat(true);
+ } else if (TypeName == "global")
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
else if (TypeName == "object")
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
diff --git a/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp b/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp
new file mode 100644
index 000000000000..731831d3bce3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp
@@ -0,0 +1,213 @@
+//===- lib/MC/MCPseudoProbe.cpp - Pseudo probe encoding support ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCPseudoProbe.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+
+#define DEBUG_TYPE "mcpseudoprobe"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+int MCPseudoProbeTable::DdgPrintIndent = 0;
+#endif
+
+static const MCExpr *buildSymbolDiff(MCObjectStreamer *MCOS, const MCSymbol *A,
+ const MCSymbol *B) {
+ MCContext &Context = MCOS->getContext();
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *ARef = MCSymbolRefExpr::create(A, Variant, Context);
+ const MCExpr *BRef = MCSymbolRefExpr::create(B, Variant, Context);
+ const MCExpr *AddrDelta =
+ MCBinaryExpr::create(MCBinaryExpr::Sub, ARef, BRef, Context);
+ return AddrDelta;
+}
+
+void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
+ const MCPseudoProbe *LastProbe) const {
+ // Emit Index
+ MCOS->emitULEB128IntValue(Index);
+ // Emit Type and the flag:
+ // Type (bit 0 to 3), with bit 4 to 6 for attributes.
+ // Flag (bit 7, 0 - code address, 1 - address delta). This indicates whether
+ // the following field is a symbolic code address or an address delta.
+ assert(Type <= 0xF && "Probe type too big to encode, exceeding 15");
+ assert(Attributes <= 0x7 &&
+ "Probe attributes too big to encode, exceeding 7");
+ uint8_t PackedType = Type | (Attributes << 4);
+ uint8_t Flag = LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0;
+ MCOS->emitInt8(Flag | PackedType);
+
+ if (LastProbe) {
+ // Emit the delta between the address label and LastProbe.
+ const MCExpr *AddrDelta =
+ buildSymbolDiff(MCOS, Label, LastProbe->getLabel());
+ int64_t Delta;
+ if (AddrDelta->evaluateAsAbsolute(Delta, MCOS->getAssemblerPtr())) {
+ MCOS->emitSLEB128IntValue(Delta);
+ } else {
+ MCOS->insert(new MCPseudoProbeAddrFragment(AddrDelta));
+ }
+ } else {
+ // Emit label as a symbolic code address.
+ MCOS->emitSymbolValue(
+ Label, MCOS->getContext().getAsmInfo()->getCodePointerSize());
+ }
+
+ LLVM_DEBUG({
+ dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+ dbgs() << "Probe: " << Index << "\n";
+ });
+}
+
+MCPseudoProbeInlineTree::~MCPseudoProbeInlineTree() {
+ for (auto &Inlinee : Inlinees)
+ delete Inlinee.second;
+}
+
+MCPseudoProbeInlineTree *
+MCPseudoProbeInlineTree::getOrAddNode(InlineSite Site) {
+ auto Iter = Inlinees.find(Site);
+ if (Iter == Inlinees.end()) {
+ auto *Node = new MCPseudoProbeInlineTree(std::get<0>(Site));
+ Inlinees[Site] = Node;
+ return Node;
+ } else {
+ return Iter->second;
+ }
+}
+
+void MCPseudoProbeInlineTree::addPseudoProbe(
+ const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack) {
+ // The function should not be called on the root.
+ assert(isRoot() && "Should not be called on root");
+
+ // When it comes here, the input look like:
+ // Probe: GUID of C, ...
+ // InlineStack: [88, A], [66, B]
+ // which means, Function A inlines function B at call site with a probe id of
+ // 88, and B inlines C at probe 66. The tri-tree expects a tree path like {[0,
+ // A], [88, B], [66, C]} to locate the tree node where the probe should be
+ // added. Note that the edge [0, A] means A is the top-level function we are
+ // emitting probes for.
+
+ // Make a [0, A] edge.
+ // An empty inline stack means the function that the probe originates from
+ // is a top-level function.
+ InlineSite Top;
+ if (InlineStack.empty()) {
+ Top = InlineSite(Probe.getGuid(), 0);
+ } else {
+ Top = InlineSite(std::get<0>(InlineStack.front()), 0);
+ }
+
+ auto *Cur = getOrAddNode(Top);
+
+ // Make interior edges by walking the inline stack. Once it's done, Cur should
+ // point to the node that the probe originates from.
+ if (!InlineStack.empty()) {
+ auto Iter = InlineStack.begin();
+ auto Index = std::get<1>(*Iter);
+ Iter++;
+ for (; Iter != InlineStack.end(); Iter++) {
+ // Make an edge by using the previous probe id and current GUID.
+ Cur = Cur->getOrAddNode(InlineSite(std::get<0>(*Iter), Index));
+ Index = std::get<1>(*Iter);
+ }
+ Cur = Cur->getOrAddNode(InlineSite(Probe.getGuid(), Index));
+ }
+
+ Cur->Probes.push_back(Probe);
+}
+
+void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
+ const MCPseudoProbe *&LastProbe) {
+ LLVM_DEBUG({
+ dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+ dbgs() << "Group [\n";
+ MCPseudoProbeTable::DdgPrintIndent += 2;
+ });
+ // Emit probes grouped by GUID.
+ if (Guid != 0) {
+ LLVM_DEBUG({
+ dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+ dbgs() << "GUID: " << Guid << "\n";
+ });
+ // Emit Guid
+ MCOS->emitInt64(Guid);
+ // Emit number of probes in this node
+ MCOS->emitULEB128IntValue(Probes.size());
+ // Emit number of direct inlinees
+ MCOS->emitULEB128IntValue(Inlinees.size());
+ // Emit probes in this group
+ for (const auto &Probe : Probes) {
+ Probe.emit(MCOS, LastProbe);
+ LastProbe = &Probe;
+ }
+ } else {
+ assert(Probes.empty() && "Root should not have probes");
+ }
+
+ // Emit descendent
+ for (const auto &Inlinee : Inlinees) {
+ if (Guid) {
+ // Emit probe index
+ MCOS->emitULEB128IntValue(std::get<1>(Inlinee.first));
+ LLVM_DEBUG({
+ dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+ dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n";
+ });
+ }
+ // Emit the group
+ Inlinee.second->emit(MCOS, LastProbe);
+ }
+
+ LLVM_DEBUG({
+ MCPseudoProbeTable::DdgPrintIndent -= 2;
+ dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+ dbgs() << "]\n";
+ });
+}
+
+void MCPseudoProbeSection::emit(MCObjectStreamer *MCOS) {
+ MCContext &Ctx = MCOS->getContext();
+
+ for (auto &ProbeSec : MCProbeDivisions) {
+ const MCPseudoProbe *LastProbe = nullptr;
+ if (auto *S =
+ Ctx.getObjectFileInfo()->getPseudoProbeSection(ProbeSec.first)) {
+ // Switch to the .pseudoprobe section or a comdat group.
+ MCOS->SwitchSection(S);
+ // Emit probes grouped by GUID.
+ ProbeSec.second.emit(MCOS, LastProbe);
+ }
+ }
+}
+
+//
+// This emits the pseudo probe tables.
+//
+void MCPseudoProbeTable::emit(MCObjectStreamer *MCOS) {
+ MCContext &Ctx = MCOS->getContext();
+ auto &ProbeTable = Ctx.getMCPseudoProbeTable();
+
+ // Bail out early so we don't switch to the pseudo_probe section needlessly
+ // and in doing so create an unnecessary (if empty) section.
+ auto &ProbeSections = ProbeTable.getProbeSections();
+ if (ProbeSections.empty())
+ return;
+
+ LLVM_DEBUG(MCPseudoProbeTable::DdgPrintIndent = 0);
+
+ // Put out the probe.
+ ProbeSections.emit(MCOS);
+}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp b/contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp
index 1fc5ec5e975f..db08e2044113 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp
@@ -74,7 +74,7 @@ int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
unsigned CPUID = getProcessorID();
while (SCDesc->isVariant()) {
- SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, CPUID);
+ SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, &MCII, CPUID);
SCDesc = getSchedClassDesc(SchedClass);
}
@@ -120,7 +120,7 @@ MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
unsigned CPUID = getProcessorID();
while (SCDesc->isVariant()) {
- SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, CPUID);
+ SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, &MCII, CPUID);
SCDesc = getSchedClassDesc(SchedClass);
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSection.cpp b/contrib/llvm-project/llvm/lib/MC/MCSection.cpp
index ba256102080a..7997b237a7eb 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSection.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSection.cpp
@@ -28,7 +28,7 @@ MCSection::MCSection(SectionVariant V, StringRef Name, SectionKind K,
MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
if (!End)
- End = Ctx.createTempSymbol("sec_end", true);
+ End = Ctx.createTempSymbol("sec_end");
return End;
}
@@ -82,6 +82,7 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) {
SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F));
getFragmentList().insert(IP, F);
F->setParent(this);
+ F->setSubsectionNumber(Subsection);
}
return IP;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp
index 77c259c27a04..27694cb14419 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp
@@ -156,6 +156,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << "llvm_dependent_libraries";
else if (Type == ELF::SHT_LLVM_SYMPART)
OS << "llvm_sympart";
+ else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP)
+ OS << "llvm_bb_addr_map";
else
report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) +
" for section " + getName());
@@ -172,9 +174,11 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
}
if (Flags & ELF::SHF_LINK_ORDER) {
- assert(LinkedToSym);
OS << ",";
- printName(OS, LinkedToSym->getName());
+ if (LinkedToSym)
+ printName(OS, LinkedToSym->getName());
+ else
+ OS << '0';
}
if (isUnique())
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm-project/llvm/lib/MC/MCSectionMachO.cpp
index 21a63ce83330..794d2c52d7b1 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSectionMachO.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSectionMachO.cpp
@@ -215,11 +215,11 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
return "";
// Figure out which section type it is.
- auto TypeDescriptor = std::find_if(
- std::begin(SectionTypeDescriptors), std::end(SectionTypeDescriptors),
- [&](decltype(*SectionTypeDescriptors) &Descriptor) {
- return SectionType == Descriptor.AssemblerName;
- });
+ auto TypeDescriptor =
+ llvm::find_if(SectionTypeDescriptors,
+ [&](decltype(*SectionTypeDescriptors) &Descriptor) {
+ return SectionType == Descriptor.AssemblerName;
+ });
// If we didn't find the section type, reject it.
if (TypeDescriptor == std::end(SectionTypeDescriptors))
@@ -243,11 +243,11 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
Attrs.split(SectionAttrs, '+', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
for (StringRef &SectionAttr : SectionAttrs) {
- auto AttrDescriptorI = std::find_if(
- std::begin(SectionAttrDescriptors), std::end(SectionAttrDescriptors),
- [&](decltype(*SectionAttrDescriptors) &Descriptor) {
- return SectionAttr.trim() == Descriptor.AssemblerName;
- });
+ auto AttrDescriptorI =
+ llvm::find_if(SectionAttrDescriptors,
+ [&](decltype(*SectionAttrDescriptors) &Descriptor) {
+ return SectionAttr.trim() == Descriptor.AssemblerName;
+ });
if (AttrDescriptorI == std::end(SectionAttrDescriptors))
return "mach-o section specifier has invalid attribute";
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSectionWasm.cpp b/contrib/llvm-project/llvm/lib/MC/MCSectionWasm.cpp
index 27ed51802a2e..81dc4329be6a 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSectionWasm.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSectionWasm.cpp
@@ -64,7 +64,9 @@ void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << ",\"";
if (IsPassive)
- OS << "passive";
+ OS << "p";
+ if (Group)
+ OS << "G";
OS << '"';
@@ -78,6 +80,12 @@ void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
// TODO: Print section type.
+ if (Group) {
+ OS << ",";
+ printName(OS, Group->getName());
+ OS << ",comdat";
+ }
+
if (isUnique())
OS << ",unique," << UniqueID;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp b/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp
index 1fa495239f74..17b7b60a04ab 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -45,6 +45,7 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
printCsectDirective(OS);
break;
case XCOFF::XMC_TC:
+ case XCOFF::XMC_TE:
break;
case XCOFF::XMC_TC0:
OS << "\t.toc\n";
diff --git a/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp
index 6d3a933c96a3..4b5ae3cc202d 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
@@ -90,7 +91,7 @@ void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
MCStreamer::MCStreamer(MCContext &Ctx)
: Context(Ctx), CurrentWinFrameInfo(nullptr),
- UseAssemblerInfoForParsing(false) {
+ CurrentProcWinFrameInfoStartIndex(0), UseAssemblerInfoForParsing(false) {
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
@@ -138,6 +139,21 @@ void MCStreamer::emitIntValue(uint64_t Value, unsigned Size) {
unsigned Index = IsLittleEndian ? 0 : 8 - Size;
emitBytes(StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size));
}
+void MCStreamer::emitIntValue(APInt Value) {
+ if (Value.getNumWords() == 1) {
+ emitIntValue(Value.getLimitedValue(), Value.getBitWidth() / 8);
+ return;
+ }
+
+ const bool IsLittleEndianTarget = Context.getAsmInfo()->isLittleEndian();
+ const bool ShouldSwap = sys::IsLittleEndianHost != IsLittleEndianTarget;
+ const APInt Swapped = ShouldSwap ? Value.byteSwap() : Value;
+ const unsigned Size = Value.getBitWidth() / 8;
+ SmallString<10> Tmp;
+ Tmp.resize(Size);
+ StoreIntToMemory(Swapped, reinterpret_cast<uint8_t *>(Tmp.data()), Size);
+ emitBytes(Tmp.str());
+}
/// EmitULEB128IntValue - Special case of EmitULEB128Value that avoids the
/// client having to pass in a MCExpr for constant integers.
@@ -202,6 +218,9 @@ void MCStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) {
emitFill(*MCConstantExpr::create(NumBytes, getContext()), FillValue);
}
+void llvm::MCStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLen,
+ llvm::SMLoc) {}
+
/// The implementation in this class just redirects to emitFill.
void MCStreamer::emitZeros(uint64_t NumBytes) { emitFill(NumBytes, 0); }
@@ -255,9 +274,9 @@ bool MCStreamer::hasUnfinishedDwarfFrameInfo() {
MCDwarfFrameInfo *MCStreamer::getCurrentDwarfFrameInfo() {
if (!hasUnfinishedDwarfFrameInfo()) {
- getContext().reportError(SMLoc(), "this directive must appear between "
- ".cfi_startproc and .cfi_endproc "
- "directives");
+ getContext().reportError(getStartTokLoc(),
+ "this directive must appear between "
+ ".cfi_startproc and .cfi_endproc directives");
return nullptr;
}
return &DwarfFrameInfos.back();
@@ -674,6 +693,7 @@ void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) {
MCSymbol *StartProc = emitCFILabel();
+ CurrentProcWinFrameInfoStartIndex = WinFrameInfos.size();
WinFrameInfos.emplace_back(
std::make_unique<WinEH::FrameInfo>(Symbol, StartProc));
CurrentWinFrameInfo = WinFrameInfos.back().get();
@@ -689,6 +709,13 @@ void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
CurFrame->End = Label;
+ if (!CurFrame->FuncletOrFuncEnd)
+ CurFrame->FuncletOrFuncEnd = CurFrame->End;
+
+ for (size_t I = CurrentProcWinFrameInfoStartIndex, E = WinFrameInfos.size();
+ I != E; ++I)
+ EmitWindowsUnwindTables(WinFrameInfos[I].get());
+ SwitchSection(CurFrame->TextSection);
}
void MCStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
@@ -947,10 +974,13 @@ void MCStreamer::emitRawText(const Twine &T) {
void MCStreamer::EmitWindowsUnwindTables() {
}
-void MCStreamer::Finish() {
+void MCStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
+}
+
+void MCStreamer::Finish(SMLoc EndLoc) {
if ((!DwarfFrameInfos.empty() && !DwarfFrameInfos.back().End) ||
(!WinFrameInfos.empty() && !WinFrameInfos.back()->End)) {
- getContext().reportError(SMLoc(), "Unfinished frame!");
+ getContext().reportError(EndLoc, "Unfinished frame!");
return;
}
@@ -1013,6 +1043,25 @@ void MCStreamer::emitInstruction(const MCInst &Inst, const MCSubtargetInfo &) {
visitUsedExpr(*Inst.getOperand(i).getExpr());
}
+void MCStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+ uint64_t Attr,
+ const MCPseudoProbeInlineStack &InlineStack) {
+ auto &Context = getContext();
+
+ // Create a symbol at in the current section for use in the probe.
+ MCSymbol *ProbeSym = Context.createTempSymbol();
+
+ // Set the value of the symbol to use for the MCPseudoProbe.
+ emitLabel(ProbeSym);
+
+ // Create a (local) probe entry with the symbol.
+ MCPseudoProbe Probe(ProbeSym, Guid, Index, Type, Attr);
+
+ // Add the probe entry to this section's entries.
+ Context.getMCPseudoProbeTable().getProbeSections().addPseudoProbe(
+ getCurrentSectionOnly(), Probe, InlineStack);
+}
+
void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
unsigned Size) {
// Get the Hi-Lo expression.
@@ -1027,7 +1076,7 @@ void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
}
// Otherwise, emit with .set (aka assignment).
- MCSymbol *SetLabel = Context.createTempSymbol("set", true);
+ MCSymbol *SetLabel = Context.createTempSymbol("set");
emitAssignment(SetLabel, Diff);
emitSymbolValue(SetLabel, Size);
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp
index 1c187d616e4e..55ada91857be 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -147,7 +147,7 @@ static void cpuHelp(ArrayRef<SubtargetSubTypeKV> CPUTable) {
PrintOnce = true;
}
-static FeatureBitset getFeatures(StringRef CPU, StringRef FS,
+static FeatureBitset getFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS,
ArrayRef<SubtargetSubTypeKV> ProcDesc,
ArrayRef<SubtargetFeatureKV> ProcFeatures) {
SubtargetFeatures Features(FS);
@@ -178,6 +178,19 @@ static FeatureBitset getFeatures(StringRef CPU, StringRef FS,
}
}
+ if (!TuneCPU.empty()) {
+ const SubtargetSubTypeKV *CPUEntry = Find(TuneCPU, ProcDesc);
+
+ // If there is a match
+ if (CPUEntry) {
+ // Set the features implied by this CPU feature, if any.
+ SetImpliedBits(Bits, CPUEntry->TuneImplies.getAsBitset(), ProcFeatures);
+ } else if (TuneCPU != CPU) {
+ errs() << "'" << TuneCPU << "' is not a recognized processor for this "
+ << "target (ignoring processor)\n";
+ }
+ }
+
// Iterate through each feature
for (const std::string &Feature : Features.getFeatures()) {
// Check for help
@@ -192,30 +205,33 @@ static FeatureBitset getFeatures(StringRef CPU, StringRef FS,
return Bits;
}
-void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
- FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures);
- if (!CPU.empty())
- CPUSchedModel = &getSchedModelForCPU(CPU);
+void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU,
+ StringRef FS) {
+ FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures);
+ if (!TuneCPU.empty())
+ CPUSchedModel = &getSchedModelForCPU(TuneCPU);
else
CPUSchedModel = &MCSchedModel::GetDefaultSchedModel();
}
-void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef FS) {
- FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures);
+void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef TuneCPU,
+ StringRef FS) {
+ FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures);
}
-MCSubtargetInfo::MCSubtargetInfo(const Triple &TT, StringRef C, StringRef FS,
- ArrayRef<SubtargetFeatureKV> PF,
+MCSubtargetInfo::MCSubtargetInfo(const Triple &TT, StringRef C, StringRef TC,
+ StringRef FS, ArrayRef<SubtargetFeatureKV> PF,
ArrayRef<SubtargetSubTypeKV> PD,
const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL,
const MCReadAdvanceEntry *RA,
const InstrStage *IS, const unsigned *OC,
const unsigned *FP)
- : TargetTriple(TT), CPU(std::string(C)), ProcFeatures(PF), ProcDesc(PD),
- WriteProcResTable(WPR), WriteLatencyTable(WL), ReadAdvanceTable(RA),
- Stages(IS), OperandCycles(OC), ForwardingPaths(FP) {
- InitMCProcessorInfo(CPU, FS);
+ : TargetTriple(TT), CPU(std::string(C)), TuneCPU(std::string(TC)),
+ ProcFeatures(PF), ProcDesc(PD), WriteProcResTable(WPR),
+ WriteLatencyTable(WL), ReadAdvanceTable(RA), Stages(IS),
+ OperandCycles(OC), ForwardingPaths(FP) {
+ InitMCProcessorInfo(CPU, TuneCPU, FS);
}
FeatureBitset MCSubtargetInfo::ToggleFeature(uint64_t FB) {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSymbolXCOFF.cpp b/contrib/llvm-project/llvm/lib/MC/MCSymbolXCOFF.cpp
index 536153e5518b..b9dd2908b40b 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSymbolXCOFF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSymbolXCOFF.cpp
@@ -13,8 +13,7 @@ using namespace llvm;
MCSectionXCOFF *MCSymbolXCOFF::getRepresentedCsect() const {
assert(RepresentedCsect &&
"Trying to get csect representation of this symbol but none was set.");
- assert((!getName().equals(getUnqualifiedName()) ||
- RepresentedCsect->getCSectType() == XCOFF::XTY_ER) &&
+ assert(!getName().equals(getUnqualifiedName()) &&
"Symbol does not represent a csect; MCSectionXCOFF that represents "
"the symbol should not be (but is) set.");
assert(getSymbolTableName().equals(RepresentedCsect->getSymbolTableName()) &&
@@ -26,10 +25,9 @@ MCSectionXCOFF *MCSymbolXCOFF::getRepresentedCsect() const {
void MCSymbolXCOFF::setRepresentedCsect(MCSectionXCOFF *C) {
assert(C && "Assigned csect should not be null.");
assert((!RepresentedCsect || RepresentedCsect == C) &&
- "Trying to set a csect that doesn't match the one that"
- "this symbol is already mapped to.");
- assert((!getName().equals(getUnqualifiedName()) ||
- C->getCSectType() == XCOFF::XTY_ER) &&
+ "Trying to set a csect that doesn't match the one that this symbol is "
+ "already mapped to.");
+ assert(!getName().equals(getUnqualifiedName()) &&
"Symbol does not represent a csect; can only set a MCSectionXCOFF "
"representation for a csect.");
assert(getSymbolTableName().equals(C->getSymbolTableName()) &&
diff --git a/contrib/llvm-project/llvm/lib/MC/MCWasmStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCWasmStreamer.cpp
index bf8b142b355a..e3d2439cef81 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCWasmStreamer.cpp
@@ -148,18 +148,6 @@ void MCWasmStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
llvm_unreachable("Local common symbols are not yet implemented for Wasm");
}
-void MCWasmStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
- SMLoc Loc) {
- MCObjectStreamer::emitValueImpl(Value, Size, Loc);
-}
-
-void MCWasmStreamer::emitValueToAlignment(unsigned ByteAlignment, int64_t Value,
- unsigned ValueSize,
- unsigned MaxBytesToEmit) {
- MCObjectStreamer::emitValueToAlignment(ByteAlignment, Value, ValueSize,
- MaxBytesToEmit);
-}
-
void MCWasmStreamer::emitIdent(StringRef IdentString) {
// TODO(sbc): Add the ident section once we support mergable strings
// sections in the object format
diff --git a/contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp
index ac288ca08c93..de1b0fd3c742 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp
@@ -238,8 +238,9 @@ void llvm::Win64EH::UnwindEmitter::Emit(MCStreamer &Streamer) const {
}
}
-void llvm::Win64EH::UnwindEmitter::EmitUnwindInfo(
- MCStreamer &Streamer, WinEH::FrameInfo *info) const {
+void llvm::Win64EH::UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer,
+ WinEH::FrameInfo *info,
+ bool HandlerData) const {
// Switch sections (the static function above is meant to be called from
// here and from Emit().
MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection);
@@ -264,8 +265,7 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
return value;
}
-static uint32_t
-ARM64CountOfUnwindCodes(const std::vector<WinEH::Instruction> &Insns) {
+static uint32_t ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
uint32_t Count = 0;
for (const auto &I : Insns) {
switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
@@ -280,6 +280,9 @@ ARM64CountOfUnwindCodes(const std::vector<WinEH::Instruction> &Insns) {
case Win64EH::UOP_AllocLarge:
Count += 4;
break;
+ case Win64EH::UOP_SaveR19R20X:
+ Count += 1;
+ break;
case Win64EH::UOP_SaveFPLRX:
Count += 1;
break;
@@ -298,6 +301,9 @@ ARM64CountOfUnwindCodes(const std::vector<WinEH::Instruction> &Insns) {
case Win64EH::UOP_SaveRegX:
Count += 2;
break;
+ case Win64EH::UOP_SaveLRPair:
+ Count += 2;
+ break;
case Win64EH::UOP_SaveFReg:
Count += 2;
break;
@@ -322,6 +328,21 @@ ARM64CountOfUnwindCodes(const std::vector<WinEH::Instruction> &Insns) {
case Win64EH::UOP_End:
Count += 1;
break;
+ case Win64EH::UOP_SaveNext:
+ Count += 1;
+ break;
+ case Win64EH::UOP_TrapFrame:
+ Count += 1;
+ break;
+ case Win64EH::UOP_PushMachFrame:
+ Count += 1;
+ break;
+ case Win64EH::UOP_Context:
+ Count += 1;
+ break;
+ case Win64EH::UOP_ClearUnwoundToCall:
+ Count += 1;
+ break;
}
}
return Count;
@@ -375,6 +396,11 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
b = 0xE3;
streamer.emitInt8(b);
break;
+ case Win64EH::UOP_SaveR19R20X:
+ b = 0x20;
+ b |= (inst.Offset >> 3) & 0x1F;
+ streamer.emitInt8(b);
+ break;
case Win64EH::UOP_SaveFPLRX:
b = 0x80;
b |= ((inst.Offset - 1) >> 3) & 0x3F;
@@ -417,6 +443,16 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
b = ((reg & 0x3) << 6) | ((inst.Offset >> 3) - 1);
streamer.emitInt8(b);
break;
+ case Win64EH::UOP_SaveLRPair:
+ assert(inst.Register >= 19 && "Saved reg must be >= 19");
+ reg = inst.Register - 19;
+ assert((reg % 2) == 0 && "Saved reg must be 19+2*X");
+ reg /= 2;
+ b = 0xD6 | ((reg & 0x7) >> 2);
+ streamer.emitInt8(b);
+ b = ((reg & 0x3) << 6) | (inst.Offset >> 3);
+ streamer.emitInt8(b);
+ break;
case Win64EH::UOP_SaveFReg:
assert(inst.Register >= 8 && "Saved dreg must be >= 8");
reg = inst.Register - 8;
@@ -453,6 +489,26 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
b = 0xE4;
streamer.emitInt8(b);
break;
+ case Win64EH::UOP_SaveNext:
+ b = 0xE6;
+ streamer.emitInt8(b);
+ break;
+ case Win64EH::UOP_TrapFrame:
+ b = 0xE8;
+ streamer.emitInt8(b);
+ break;
+ case Win64EH::UOP_PushMachFrame:
+ b = 0xE9;
+ streamer.emitInt8(b);
+ break;
+ case Win64EH::UOP_Context:
+ b = 0xEA;
+ streamer.emitInt8(b);
+ break;
+ case Win64EH::UOP_ClearUnwoundToCall:
+ b = 0xEC;
+ streamer.emitInt8(b);
+ break;
}
}
@@ -488,12 +544,366 @@ FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
return nullptr;
}
+static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
+ bool Reverse) {
+ unsigned PrevOffset = -1;
+ unsigned PrevRegister = -1;
+
+ auto VisitInstruction = [&](WinEH::Instruction &Inst) {
+ // Convert 2-byte opcodes into equivalent 1-byte ones.
+ if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) {
+ Inst.Operation = Win64EH::UOP_SaveFPLR;
+ Inst.Register = -1;
+ } else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
+ Inst.Register == 29) {
+ Inst.Operation = Win64EH::UOP_SaveFPLRX;
+ Inst.Register = -1;
+ } else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
+ Inst.Register == 19 && Inst.Offset <= 248) {
+ Inst.Operation = Win64EH::UOP_SaveR19R20X;
+ Inst.Register = -1;
+ } else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) {
+ Inst.Operation = Win64EH::UOP_SetFP;
+ } else if (Inst.Operation == Win64EH::UOP_SaveRegP &&
+ Inst.Register == PrevRegister + 2 &&
+ Inst.Offset == PrevOffset + 16) {
+ Inst.Operation = Win64EH::UOP_SaveNext;
+ Inst.Register = -1;
+ Inst.Offset = 0;
+ // Intentionally not creating UOP_SaveNext for float register pairs,
+ // as current versions of Windows (up to at least 20.04) is buggy
+ // regarding SaveNext for float pairs.
+ }
+ // Update info about the previous instruction, for detecting if
+ // the next one can be made a UOP_SaveNext
+ if (Inst.Operation == Win64EH::UOP_SaveR19R20X) {
+ PrevOffset = 0;
+ PrevRegister = 19;
+ } else if (Inst.Operation == Win64EH::UOP_SaveRegPX) {
+ PrevOffset = 0;
+ PrevRegister = Inst.Register;
+ } else if (Inst.Operation == Win64EH::UOP_SaveRegP) {
+ PrevOffset = Inst.Offset;
+ PrevRegister = Inst.Register;
+ } else if (Inst.Operation == Win64EH::UOP_SaveNext) {
+ PrevRegister += 2;
+ PrevOffset += 16;
+ } else {
+ PrevRegister = -1;
+ PrevOffset = -1;
+ }
+ };
+
+ // Iterate over instructions in a forward order (for prologues),
+ // backwards for epilogues (i.e. always reverse compared to how the
+ // opcodes are stored).
+ if (Reverse) {
+ for (auto It = Instructions.rbegin(); It != Instructions.rend(); It++)
+ VisitInstruction(*It);
+ } else {
+ for (WinEH::Instruction &Inst : Instructions)
+ VisitInstruction(Inst);
+ }
+}
+
+static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info,
+ int PrologCodeBytes) {
+ // Can only pack if there's one single epilog
+ if (info->EpilogMap.size() != 1)
+ return -1;
+
+ const std::vector<WinEH::Instruction> &Epilog =
+ info->EpilogMap.begin()->second;
+
+ // Can pack if the epilog is a subset of the prolog but not vice versa
+ if (Epilog.size() > info->Instructions.size())
+ return -1;
+
+ // Check that the epilog actually is a perfect match for the end (backwrds)
+ // of the prolog.
+ for (int I = Epilog.size() - 1; I >= 0; I--) {
+ if (info->Instructions[I] != Epilog[Epilog.size() - 1 - I])
+ return -1;
+ }
+
+ // Check that the epilog actually is at the very end of the function,
+ // otherwise it can't be packed.
+ uint32_t DistanceFromEnd = (uint32_t)GetAbsDifference(
+ streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first);
+ if (DistanceFromEnd / 4 != Epilog.size())
+ return -1;
+
+ int Offset = Epilog.size() == info->Instructions.size()
+ ? 0
+ : ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction>(
+ &info->Instructions[Epilog.size()],
+ info->Instructions.size() - Epilog.size()));
+
+ // Check that the offset and prolog size fits in the first word; it's
+ // unclear whether the epilog count in the extension word can be taken
+ // as packed epilog offset.
+ if (Offset > 31 || PrologCodeBytes > 124)
+ return -1;
+
+ info->EpilogMap.clear();
+ return Offset;
+}
+
+static bool tryPackedUnwind(WinEH::FrameInfo *info, uint32_t FuncLength,
+ int PackedEpilogOffset) {
+ if (PackedEpilogOffset == 0) {
+ // Fully symmetric prolog and epilog, should be ok for packed format.
+ // For CR=3, the corresponding synthesized epilog actually lacks the
+ // SetFP opcode, but unwinding should work just fine despite that
+ // (if at the SetFP opcode, the unwinder considers it as part of the
+ // function body and just unwinds the full prolog instead).
+ } else if (PackedEpilogOffset == 1) {
+ // One single case of differences between prolog and epilog is allowed:
+ // The epilog can lack a single SetFP that is the last opcode in the
+ // prolog, for the CR=3 case.
+ if (info->Instructions.back().Operation != Win64EH::UOP_SetFP)
+ return false;
+ } else {
+ // Too much difference between prolog and epilog.
+ return false;
+ }
+ unsigned RegI = 0, RegF = 0;
+ int Predecrement = 0;
+ enum {
+ Start,
+ Start2,
+ IntRegs,
+ FloatRegs,
+ InputArgs,
+ StackAdjust,
+ FrameRecord,
+ End
+ } Location = Start;
+ bool StandaloneLR = false, FPLRPair = false;
+ int StackOffset = 0;
+ int Nops = 0;
+ // Iterate over the prolog and check that all opcodes exactly match
+ // the canonical order and form. A more lax check could verify that
+ // all saved registers are in the expected locations, but not enforce
+ // the order - that would work fine when unwinding from within
+ // functions, but not be exactly right if unwinding happens within
+ // prologs/epilogs.
+ for (const WinEH::Instruction &Inst : info->Instructions) {
+ switch (Inst.Operation) {
+ case Win64EH::UOP_End:
+ if (Location != Start)
+ return false;
+ Location = Start2;
+ break;
+ case Win64EH::UOP_SaveR19R20X:
+ if (Location != Start2)
+ return false;
+ Predecrement = Inst.Offset;
+ RegI = 2;
+ Location = IntRegs;
+ break;
+ case Win64EH::UOP_SaveRegX:
+ if (Location != Start2)
+ return false;
+ Predecrement = Inst.Offset;
+ if (Inst.Register == 19)
+ RegI += 1;
+ else if (Inst.Register == 30)
+ StandaloneLR = true;
+ else
+ return false;
+ // Odd register; can't be any further int registers.
+ Location = FloatRegs;
+ break;
+ case Win64EH::UOP_SaveRegPX:
+ // Can't have this in a canonical prologue. Either this has been
+ // canonicalized into SaveR19R20X or SaveFPLRX, or it's an unsupported
+ // register pair.
+ // It can't be canonicalized into SaveR19R20X if the offset is
+ // larger than 248 bytes, but even with the maximum case with
+ // RegI=10/RegF=8/CR=1/H=1, we end up with SavSZ = 216, which should
+ // fit into SaveR19R20X.
+ // The unwinding opcodes can't describe the otherwise seemingly valid
+ // case for RegI=1 CR=1, that would start with a
+ // "stp x19, lr, [sp, #-...]!" as that fits neither SaveRegPX nor
+ // SaveLRPair.
+ return false;
+ case Win64EH::UOP_SaveRegP:
+ if (Location != IntRegs || Inst.Offset != 8 * RegI ||
+ Inst.Register != 19 + RegI)
+ return false;
+ RegI += 2;
+ break;
+ case Win64EH::UOP_SaveReg:
+ if (Location != IntRegs || Inst.Offset != 8 * RegI)
+ return false;
+ if (Inst.Register == 19 + RegI)
+ RegI += 1;
+ else if (Inst.Register == 30)
+ StandaloneLR = true;
+ else
+ return false;
+ // Odd register; can't be any further int registers.
+ Location = FloatRegs;
+ break;
+ case Win64EH::UOP_SaveLRPair:
+ if (Location != IntRegs || Inst.Offset != 8 * RegI ||
+ Inst.Register != 19 + RegI)
+ return false;
+ RegI += 1;
+ StandaloneLR = true;
+ Location = FloatRegs;
+ break;
+ case Win64EH::UOP_SaveFRegX:
+ // Packed unwind can't handle prologs that only save one single
+ // float register.
+ return false;
+ case Win64EH::UOP_SaveFReg:
+ if (Location != FloatRegs || RegF == 0 || Inst.Register != 8 + RegF ||
+ Inst.Offset != 8 * (RegI + (StandaloneLR ? 1 : 0) + RegF))
+ return false;
+ RegF += 1;
+ Location = InputArgs;
+ break;
+ case Win64EH::UOP_SaveFRegPX:
+ if (Location != Start2 || Inst.Register != 8)
+ return false;
+ Predecrement = Inst.Offset;
+ RegF = 2;
+ Location = FloatRegs;
+ break;
+ case Win64EH::UOP_SaveFRegP:
+ if ((Location != IntRegs && Location != FloatRegs) ||
+ Inst.Register != 8 + RegF ||
+ Inst.Offset != 8 * (RegI + (StandaloneLR ? 1 : 0) + RegF))
+ return false;
+ RegF += 2;
+ Location = FloatRegs;
+ break;
+ case Win64EH::UOP_SaveNext:
+ if (Location == IntRegs)
+ RegI += 2;
+ else if (Location == FloatRegs)
+ RegF += 2;
+ else
+ return false;
+ break;
+ case Win64EH::UOP_Nop:
+ if (Location != IntRegs && Location != FloatRegs && Location != InputArgs)
+ return false;
+ Location = InputArgs;
+ Nops++;
+ break;
+ case Win64EH::UOP_AllocSmall:
+ case Win64EH::UOP_AllocMedium:
+ if (Location != Start2 && Location != IntRegs && Location != FloatRegs &&
+ Location != InputArgs && Location != StackAdjust)
+ return false;
+ // Can have either a single decrement, or a pair of decrements with
+ // 4080 and another decrement.
+ if (StackOffset == 0)
+ StackOffset = Inst.Offset;
+ else if (StackOffset != 4080)
+ return false;
+ else
+ StackOffset += Inst.Offset;
+ Location = StackAdjust;
+ break;
+ case Win64EH::UOP_SaveFPLRX:
+ // Not allowing FPLRX after StackAdjust; if a StackAdjust is used, it
+ // should be followed by a FPLR instead.
+ if (Location != Start2 && Location != IntRegs && Location != FloatRegs &&
+ Location != InputArgs)
+ return false;
+ StackOffset = Inst.Offset;
+ Location = FrameRecord;
+ FPLRPair = true;
+ break;
+ case Win64EH::UOP_SaveFPLR:
+ // This can only follow after a StackAdjust
+ if (Location != StackAdjust || Inst.Offset != 0)
+ return false;
+ Location = FrameRecord;
+ FPLRPair = true;
+ break;
+ case Win64EH::UOP_SetFP:
+ if (Location != FrameRecord)
+ return false;
+ Location = End;
+ break;
+ }
+ }
+ if (RegI > 10 || RegF > 8)
+ return false;
+ if (StandaloneLR && FPLRPair)
+ return false;
+ if (FPLRPair && Location != End)
+ return false;
+ if (Nops != 0 && Nops != 4)
+ return false;
+ int H = Nops == 4;
+ int IntSZ = 8 * RegI;
+ if (StandaloneLR)
+ IntSZ += 8;
+ int FpSZ = 8 * RegF; // RegF not yet decremented
+ int SavSZ = (IntSZ + FpSZ + 8 * 8 * H + 0xF) & ~0xF;
+ if (Predecrement != SavSZ)
+ return false;
+ if (FPLRPair && StackOffset < 16)
+ return false;
+ if (StackOffset % 16)
+ return false;
+ uint32_t FrameSize = (StackOffset + SavSZ) / 16;
+ if (FrameSize > 0x1FF)
+ return false;
+ assert(RegF != 1 && "One single float reg not allowed");
+ if (RegF > 0)
+ RegF--; // Convert from actual number of registers, to value stored
+ assert(FuncLength <= 0x7FF && "FuncLength should have been checked earlier");
+ int Flag = 0x01; // Function segments not supported yet
+ int CR = FPLRPair ? 3 : StandaloneLR ? 1 : 0;
+ info->PackedInfo |= Flag << 0;
+ info->PackedInfo |= (FuncLength & 0x7FF) << 2;
+ info->PackedInfo |= (RegF & 0x7) << 13;
+ info->PackedInfo |= (RegI & 0xF) << 16;
+ info->PackedInfo |= (H & 0x1) << 20;
+ info->PackedInfo |= (CR & 0x3) << 21;
+ info->PackedInfo |= (FrameSize & 0x1FF) << 23;
+ return true;
+}
+
// Populate the .xdata section. The format of .xdata on ARM64 is documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
-static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
+static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
+ bool TryPacked = true) {
// If this UNWIND_INFO already has a symbol, it's already been emitted.
if (info->Symbol)
return;
+ // If there's no unwind info here (not even a terminating UOP_End), the
+ // unwind info is considered bogus and skipped. If this was done in
+ // response to an explicit .seh_handlerdata, the associated trailing
+ // handler data is left orphaned in the xdata section.
+ if (info->empty()) {
+ info->EmitAttempted = true;
+ return;
+ }
+ if (info->EmitAttempted) {
+ // If we tried to emit unwind info before (due to an explicit
+ // .seh_handlerdata directive), but skipped it (because there was no
+ // valid information to emit at the time), and it later got valid unwind
+ // opcodes, we can't emit it here, because the trailing handler data
+ // was already emitted elsewhere in the xdata section.
+ streamer.getContext().reportError(
+ SMLoc(), "Earlier .seh_handlerdata for " + info->Function->getName() +
+ " skipped due to no unwind info at the time "
+ "(.seh_handlerdata too early?), but the function later "
+ "did get unwind info that can't be emitted");
+ return;
+ }
+
+ simplifyOpcodes(info->Instructions, false);
+ for (auto &I : info->EpilogMap)
+ simplifyOpcodes(I.second, true);
MCContext &context = streamer.getContext();
MCSymbol *Label = context.createTempSymbol();
@@ -504,9 +914,7 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
int64_t RawFuncLength;
if (!info->FuncletOrFuncEnd) {
- // FIXME: This is very wrong; we emit SEH data which covers zero bytes
- // of code. But otherwise test/MC/AArch64/seh.s crashes.
- RawFuncLength = 0;
+ report_fatal_error("FuncletOrFuncEnd not set");
} else {
// FIXME: GetAbsDifference tries to compute the length of the function
// immediately, before the whole file is emitted, but in general
@@ -543,6 +951,22 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions);
uint32_t TotalCodeBytes = PrologCodeBytes;
+ int PackedEpilogOffset = checkPackedEpilog(streamer, info, PrologCodeBytes);
+
+ if (PackedEpilogOffset >= 0 && !info->HandlesExceptions &&
+ FuncLength <= 0x7ff && TryPacked) {
+ // Matching prolog/epilog and no exception handlers; check if the
+ // prolog matches the patterns that can be described by the packed
+ // format.
+
+ // info->Symbol was already set even if we didn't actually write any
+ // unwind info there. Keep using that as indicator that this unwind
+ // info has been generated already.
+
+ if (tryPackedUnwind(info, FuncLength, PackedEpilogOffset))
+ return;
+ }
+
// Process epilogs.
MapVector<MCSymbol *, uint32_t> EpilogInfo;
// Epilogs processed so far.
@@ -575,15 +999,17 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
uint32_t CodeWordsMod = TotalCodeBytes % 4;
if (CodeWordsMod)
CodeWords++;
- uint32_t EpilogCount = info->EpilogMap.size();
+ uint32_t EpilogCount =
+ PackedEpilogOffset >= 0 ? PackedEpilogOffset : info->EpilogMap.size();
bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124;
if (!ExtensionWord) {
row1 |= (EpilogCount & 0x1F) << 22;
row1 |= (CodeWords & 0x1F) << 27;
}
- // E is always 0 right now, TODO: packed epilog setup
if (info->HandlesExceptions) // X
row1 |= 1 << 20;
+ if (PackedEpilogOffset >= 0) // E
+ row1 |= 1 << 21;
row1 |= FuncLength & 0x3FFFF;
streamer.emitInt32(row1);
@@ -648,33 +1074,56 @@ static void ARM64EmitRuntimeFunction(MCStreamer &streamer,
streamer.emitValueToAlignment(4);
EmitSymbolRefWithOfs(streamer, info->Function, info->Begin);
- streamer.emitValue(MCSymbolRefExpr::create(info->Symbol,
- MCSymbolRefExpr::VK_COFF_IMGREL32,
- context),
- 4);
+ if (info->PackedInfo)
+ streamer.emitInt32(info->PackedInfo);
+ else
+ streamer.emitValue(
+ MCSymbolRefExpr::create(info->Symbol, MCSymbolRefExpr::VK_COFF_IMGREL32,
+ context),
+ 4);
}
void llvm::Win64EH::ARM64UnwindEmitter::Emit(MCStreamer &Streamer) const {
// Emit the unwind info structs first.
for (const auto &CFI : Streamer.getWinFrameInfos()) {
+ WinEH::FrameInfo *Info = CFI.get();
+ if (Info->empty())
+ continue;
MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection);
Streamer.SwitchSection(XData);
- ARM64EmitUnwindInfo(Streamer, CFI.get());
+ ARM64EmitUnwindInfo(Streamer, Info);
}
// Now emit RUNTIME_FUNCTION entries.
for (const auto &CFI : Streamer.getWinFrameInfos()) {
+ WinEH::FrameInfo *Info = CFI.get();
+ // ARM64EmitUnwindInfo above clears the info struct, so we can't check
+ // empty here. But if a Symbol is set, we should create the corresponding
+ // pdata entry.
+ if (!Info->Symbol)
+ continue;
MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection);
Streamer.SwitchSection(PData);
- ARM64EmitRuntimeFunction(Streamer, CFI.get());
+ ARM64EmitRuntimeFunction(Streamer, Info);
}
}
-void llvm::Win64EH::ARM64UnwindEmitter::EmitUnwindInfo(
- MCStreamer &Streamer, WinEH::FrameInfo *info) const {
+void llvm::Win64EH::ARM64UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer,
+ WinEH::FrameInfo *info,
+ bool HandlerData) const {
+ // Called if there's an .seh_handlerdata directive before the end of the
+ // function. This forces writing the xdata record already here - and
+ // in this case, the function isn't actually ended already, but the xdata
+ // record needs to know the function length. In these cases, if the funclet
+ // end hasn't been marked yet, the xdata function length won't cover the
+ // whole function, only up to this point.
+ if (!info->FuncletOrFuncEnd) {
+ Streamer.SwitchSection(info->TextSection);
+ info->FuncletOrFuncEnd = Streamer.emitCFILabel();
+ }
// Switch sections (the static function above is meant to be called from
// here and from Emit().
MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection);
Streamer.SwitchSection(XData);
- ARM64EmitUnwindInfo(Streamer, info);
+ ARM64EmitUnwindInfo(Streamer, info, /* TryPacked = */ !HandlerData);
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCWinCOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCWinCOFFStreamer.cpp
index d8fde4004d44..97cceac74ac2 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -308,6 +308,16 @@ void MCWinCOFFStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
PopSection();
}
+void MCWinCOFFStreamer::emitWeakReference(MCSymbol *AliasS,
+ const MCSymbol *Symbol) {
+ auto *Alias = cast<MCSymbolCOFF>(AliasS);
+ emitSymbolAttribute(Alias, MCSA_Weak);
+
+ getAssembler().registerSymbol(*Symbol);
+ Alias->setVariableValue(MCSymbolRefExpr::create(
+ Symbol, MCSymbolRefExpr::VK_WEAKREF, getContext()));
+}
+
void MCWinCOFFStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment,
SMLoc Loc) {
@@ -340,10 +350,8 @@ void MCWinCOFFStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE) {
const MCSymbol *S = &SRE->getSymbol();
bool Created;
getAssembler().registerSymbol(*S, &Created);
- if (Created) {
- cast<MCSymbolCOFF>(S)->setIsWeakExternal();
+ if (Created)
cast<MCSymbolCOFF>(S)->setExternal(true);
- }
}
void MCWinCOFFStreamer::finalizeCGProfile() {
diff --git a/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp b/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp
index c9c88ec58432..973c59057cb5 100644
--- a/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp
@@ -33,7 +33,12 @@ void StringTableBuilder::initSize() {
case DWARF:
Size = 0;
break;
+ case MachOLinked:
+ case MachO64Linked:
+ Size = 2;
+ break;
case MachO:
+ case MachO64:
case ELF:
// Start the table with a NUL byte.
Size = 1;
@@ -161,8 +166,16 @@ void StringTableBuilder::finalizeStringTable(bool Optimize) {
}
}
- if (K == MachO)
+ if (K == MachO || K == MachOLinked)
Size = alignTo(Size, 4); // Pad to multiple of 4.
+ if (K == MachO64 || K == MachO64Linked)
+ Size = alignTo(Size, 8); // Pad to multiple of 8.
+
+ // According to ld64 the string table of a final linked Mach-O binary starts
+ // with " ", i.e. the first byte is ' ' and the second byte is zero. In
+ // 'initSize()' we reserved the first two bytes for holding this string.
+ if (K == MachOLinked || K == MachO64Linked)
+ StringIndexMap[CachedHashStringRef(" ")] = 0;
// The first byte in an ELF string table must be null, according to the ELF
// specification. In 'initSize()' we reserved the first byte to hold null for
diff --git a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
index f51d908c53e1..930413e83438 100644
--- a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/BinaryFormat/WasmTraits.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
@@ -39,8 +40,8 @@ using namespace llvm;
namespace {
-// Went we ceate the indirect function table we start at 1, so that there is
-// and emtpy slot at 0 and therefore calling a null function pointer will trap.
+// When we create the indirect function table we start at 1, so that there is
+// and empty slot at 0 and therefore calling a null function pointer will trap.
static const uint32_t InitialTableOffset = 1;
// For patching purposes, we need to remember where each section starts, both
@@ -56,50 +57,6 @@ struct SectionBookkeeping {
uint32_t Index;
};
-// The signature of a wasm function or event, in a struct capable of being used
-// as a DenseMap key.
-// TODO: Consider using wasm::WasmSignature directly instead.
-struct WasmSignature {
- // Support empty and tombstone instances, needed by DenseMap.
- enum { Plain, Empty, Tombstone } State = Plain;
-
- // The return types of the function.
- SmallVector<wasm::ValType, 1> Returns;
-
- // The parameter types of the function.
- SmallVector<wasm::ValType, 4> Params;
-
- bool operator==(const WasmSignature &Other) const {
- return State == Other.State && Returns == Other.Returns &&
- Params == Other.Params;
- }
-};
-
-// Traits for using WasmSignature in a DenseMap.
-struct WasmSignatureDenseMapInfo {
- static WasmSignature getEmptyKey() {
- WasmSignature Sig;
- Sig.State = WasmSignature::Empty;
- return Sig;
- }
- static WasmSignature getTombstoneKey() {
- WasmSignature Sig;
- Sig.State = WasmSignature::Tombstone;
- return Sig;
- }
- static unsigned getHashValue(const WasmSignature &Sig) {
- uintptr_t Value = Sig.State;
- for (wasm::ValType Ret : Sig.Returns)
- Value += DenseMapInfo<uint32_t>::getHashValue(uint32_t(Ret));
- for (wasm::ValType Param : Sig.Params)
- Value += DenseMapInfo<uint32_t>::getHashValue(uint32_t(Param));
- return Value;
- }
- static bool isEqual(const WasmSignature &LHS, const WasmSignature &RHS) {
- return LHS == RHS;
- }
-};
-
// A wasm data segment. A wasm binary contains only a single data section
// but that can contain many segments, each with their own virtual location
// in memory. Each MCSection data created by llvm is modeled as its own
@@ -216,8 +173,12 @@ static void patchI64(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) {
Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
}
+bool isDwoSection(const MCSection &Sec) {
+ return Sec.getName().endswith(".dwo");
+}
+
class WasmObjectWriter : public MCObjectWriter {
- support::endian::Writer W;
+ support::endian::Writer *W;
/// The target specific Wasm writer instance.
std::unique_ptr<MCWasmObjectTargetWriter> TargetObjectWriter;
@@ -233,8 +194,8 @@ class WasmObjectWriter : public MCObjectWriter {
// Maps function symbols to the table element index space. Used
// for TABLE_INDEX relocation types (i.e. address taken functions).
DenseMap<const MCSymbolWasm *, uint32_t> TableIndices;
- // Maps function/global symbols to the function/global/event/section index
- // space.
+ // Maps function/global/table symbols to the
+ // function/global/table/event/section index space.
DenseMap<const MCSymbolWasm *, uint32_t> WasmIndices;
DenseMap<const MCSymbolWasm *, uint32_t> GOTIndices;
// Maps data symbols to the Wasm segment and offset/size with the segment.
@@ -252,15 +213,25 @@ class WasmObjectWriter : public MCObjectWriter {
// Map from section to defining function symbol.
DenseMap<const MCSection *, const MCSymbol *> SectionFunctions;
- DenseMap<WasmSignature, uint32_t, WasmSignatureDenseMapInfo> SignatureIndices;
- SmallVector<WasmSignature, 4> Signatures;
+ DenseMap<wasm::WasmSignature, uint32_t> SignatureIndices;
+ SmallVector<wasm::WasmSignature, 4> Signatures;
SmallVector<WasmDataSegment, 4> DataSegments;
unsigned NumFunctionImports = 0;
unsigned NumGlobalImports = 0;
+ unsigned NumTableImports = 0;
unsigned NumEventImports = 0;
uint32_t SectionCount = 0;
- // TargetObjectWriter wrappers.
+ enum class DwoMode {
+ AllSections,
+ NonDwoOnly,
+ DwoOnly,
+ };
+ bool IsSplitDwarf = false;
+ raw_pwrite_stream *OS = nullptr;
+ raw_pwrite_stream *DwoOS = nullptr;
+
+ // TargetObjectWriter wranppers.
bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
bool isEmscripten() const { return TargetObjectWriter->isEmscripten(); }
@@ -270,8 +241,13 @@ class WasmObjectWriter : public MCObjectWriter {
public:
WasmObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW,
- raw_pwrite_stream &OS)
- : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {}
+ raw_pwrite_stream &OS_)
+ : TargetObjectWriter(std::move(MOTW)), OS(&OS_) {}
+
+ WasmObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS_, raw_pwrite_stream &DwoOS_)
+ : TargetObjectWriter(std::move(MOTW)), IsSplitDwarf(true), OS(&OS_),
+ DwoOS(&DwoOS_) {}
private:
void reset() override {
@@ -292,6 +268,7 @@ private:
SectionFunctions.clear();
NumFunctionImports = 0;
NumGlobalImports = 0;
+ NumTableImports = 0;
MCObjectWriter::reset();
}
@@ -303,29 +280,33 @@ private:
void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override;
-
+ void prepareImports(SmallVectorImpl<wasm::WasmImport> &Imports,
+ MCAssembler &Asm, const MCAsmLayout &Layout);
uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+ uint64_t writeOneObject(MCAssembler &Asm, const MCAsmLayout &Layout,
+ DwoMode Mode);
+
void writeString(const StringRef Str) {
- encodeULEB128(Str.size(), W.OS);
- W.OS << Str;
+ encodeULEB128(Str.size(), W->OS);
+ W->OS << Str;
}
void writeI32(int32_t val) {
char Buffer[4];
support::endian::write32le(Buffer, val);
- W.OS.write(Buffer, sizeof(Buffer));
+ W->OS.write(Buffer, sizeof(Buffer));
}
void writeI64(int64_t val) {
char Buffer[8];
support::endian::write64le(Buffer, val);
- W.OS.write(Buffer, sizeof(Buffer));
+ W->OS.write(Buffer, sizeof(Buffer));
}
- void writeValueType(wasm::ValType Ty) { W.OS << static_cast<char>(Ty); }
+ void writeValueType(wasm::ValType Ty) { W->OS << static_cast<char>(Ty); }
- void writeTypeSection(ArrayRef<WasmSignature> Signatures);
+ void writeTypeSection(ArrayRef<wasm::WasmSignature> Signatures);
void writeImportSection(ArrayRef<wasm::WasmImport> Imports, uint64_t DataSize,
uint32_t NumElements);
void writeFunctionSection(ArrayRef<WasmFunction> Functions);
@@ -337,6 +318,7 @@ private:
uint32_t writeDataSection(const MCAsmLayout &Layout);
void writeEventSection(ArrayRef<wasm::WasmEventType> Events);
void writeGlobalSection(ArrayRef<wasm::WasmGlobal> Globals);
+ void writeTableSection(ArrayRef<wasm::WasmTable> Tables);
void writeRelocSection(uint32_t SectionIndex, StringRef Name,
std::vector<WasmRelocationEntry> &Relocations);
void writeLinkingMetaDataSection(
@@ -346,9 +328,6 @@ private:
void writeCustomSection(WasmCustomSection &CustomSection,
const MCAssembler &Asm, const MCAsmLayout &Layout);
void writeCustomRelocSections();
- void
- updateCustomSectionRelocations(const SmallVector<WasmFunction, 4> &Functions,
- const MCAsmLayout &Layout);
uint64_t getProvisionalValue(const WasmRelocationEntry &RelEntry,
const MCAsmLayout &Layout);
@@ -368,17 +347,17 @@ private:
void WasmObjectWriter::startSection(SectionBookkeeping &Section,
unsigned SectionId) {
LLVM_DEBUG(dbgs() << "startSection " << SectionId << "\n");
- W.OS << char(SectionId);
+ W->OS << char(SectionId);
- Section.SizeOffset = W.OS.tell();
+ Section.SizeOffset = W->OS.tell();
// The section size. We don't know the size yet, so reserve enough space
// for any 32-bit value; we'll patch it later.
- encodeULEB128(0, W.OS, 5);
+ encodeULEB128(0, W->OS, 5);
// The position where the section starts, for measuring its size.
- Section.ContentsOffset = W.OS.tell();
- Section.PayloadOffset = W.OS.tell();
+ Section.ContentsOffset = W->OS.tell();
+ Section.PayloadOffset = W->OS.tell();
Section.Index = SectionCount++;
}
@@ -388,19 +367,19 @@ void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
startSection(Section, wasm::WASM_SEC_CUSTOM);
// The position where the section header ends, for measuring its size.
- Section.PayloadOffset = W.OS.tell();
+ Section.PayloadOffset = W->OS.tell();
// Custom sections in wasm also have a string identifier.
writeString(Name);
// The position where the custom section starts.
- Section.ContentsOffset = W.OS.tell();
+ Section.ContentsOffset = W->OS.tell();
}
// Now that the section is complete and we know how big it is, patch up the
// section size field at the start of the section.
void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
- uint64_t Size = W.OS.tell();
+ uint64_t Size = W->OS.tell();
// /dev/null doesn't support seek/tell and can report offset of 0.
// Simply skip this patching in that case.
if (!Size)
@@ -414,18 +393,25 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
// Write the final section size to the payload_len field, which follows
// the section id byte.
- writePatchableLEB<5>(static_cast<raw_pwrite_stream &>(W.OS), Size,
+ writePatchableLEB<5>(static_cast<raw_pwrite_stream &>(W->OS), Size,
Section.SizeOffset);
}
// Emit the Wasm header.
void WasmObjectWriter::writeHeader(const MCAssembler &Asm) {
- W.OS.write(wasm::WasmMagic, sizeof(wasm::WasmMagic));
- W.write<uint32_t>(wasm::WasmVersion);
+ W->OS.write(wasm::WasmMagic, sizeof(wasm::WasmMagic));
+ W->write<uint32_t>(wasm::WasmVersion);
}
void WasmObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
+ // As a stopgap measure until call_indirect instructions start explicitly
+ // referencing the indirect function table via TABLE_NUMBER relocs, ensure
+ // that the indirect function table import makes it to the output if anything
+ // in the compilation unit has caused it to be present.
+ if (auto *Sym = Asm.getContext().lookupSymbol("__indirect_function_table"))
+ Asm.registerSymbol(*Sym);
+
// Build a map of sections to the function that defines them, for use
// in recordRelocation.
for (const MCSymbol &S : Asm.symbols()) {
@@ -494,6 +480,7 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
// Currently only supported for for metadata sections.
// See: test/MC/WebAssembly/blockaddress.ll
if (Type == wasm::R_WASM_FUNCTION_OFFSET_I32 ||
+ Type == wasm::R_WASM_FUNCTION_OFFSET_I64 ||
Type == wasm::R_WASM_SECTION_OFFSET_I32) {
if (!FixupSection.getKind().isMetadata())
report_fatal_error("relocations for function or section offsets are "
@@ -512,6 +499,29 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
SymA = cast<MCSymbolWasm>(SectionSymbol);
}
+ if (Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB ||
+ Type == wasm::R_WASM_TABLE_INDEX_SLEB ||
+ Type == wasm::R_WASM_TABLE_INDEX_SLEB64 ||
+ Type == wasm::R_WASM_TABLE_INDEX_I32 ||
+ Type == wasm::R_WASM_TABLE_INDEX_I64) {
+ // TABLE_INDEX relocs implicitly use the default indirect function table.
+ auto TableName = "__indirect_function_table";
+ MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(TableName));
+ if (Sym) {
+ if (!Sym->isFunctionTable())
+ Ctx.reportError(
+ Fixup.getLoc(),
+ "symbol '__indirect_function_table' is not a function table");
+ } else {
+ Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(TableName));
+ Sym->setFunctionTable();
+ // The default function table is synthesized by the linker.
+ Sym->setUndefined();
+ }
+ Sym->setUsedInReloc();
+ Asm.registerSymbol(*Sym);
+ }
+
// Relocation other than R_WASM_TYPE_INDEX_LEB are required to be
// against a named symbol.
if (Type != wasm::R_WASM_TYPE_INDEX_LEB) {
@@ -556,7 +566,9 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry,
switch (RelEntry.Type) {
case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
case wasm::R_WASM_TABLE_INDEX_SLEB:
- case wasm::R_WASM_TABLE_INDEX_I32: {
+ case wasm::R_WASM_TABLE_INDEX_SLEB64:
+ case wasm::R_WASM_TABLE_INDEX_I32:
+ case wasm::R_WASM_TABLE_INDEX_I64: {
// Provisional value is table address of the resolved symbol itself
const MCSymbolWasm *Base =
cast<MCSymbolWasm>(Layout.getBaseSymbol(*RelEntry.Symbol));
@@ -573,10 +585,12 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry,
case wasm::R_WASM_GLOBAL_INDEX_LEB:
case wasm::R_WASM_GLOBAL_INDEX_I32:
case wasm::R_WASM_EVENT_INDEX_LEB:
+ case wasm::R_WASM_TABLE_NUMBER_LEB:
// Provisional value is function/global/event Wasm index
assert(WasmIndices.count(RelEntry.Symbol) > 0 && "symbol not found in wasm index space");
return WasmIndices[RelEntry.Symbol];
case wasm::R_WASM_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_FUNCTION_OFFSET_I64:
case wasm::R_WASM_SECTION_OFFSET_I32: {
const auto &Section =
static_cast<const MCSectionWasm &>(RelEntry.Symbol->getSection());
@@ -589,17 +603,19 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry,
case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64:
case wasm::R_WASM_MEMORY_ADDR_I32:
- case wasm::R_WASM_MEMORY_ADDR_I64: {
- // Provisional value is address of the global
+ case wasm::R_WASM_MEMORY_ADDR_I64:
+ case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB: {
+ // Provisional value is address of the global plus the offset
const MCSymbolWasm *Base =
cast<MCSymbolWasm>(Layout.getBaseSymbol(*RelEntry.Symbol));
// For undefined symbols, use zero
if (!Base->isDefined())
return 0;
- const wasm::WasmDataReference &Ref = DataLocations[Base];
- const WasmDataSegment &Segment = DataSegments[Ref.Segment];
+ const wasm::WasmDataReference &BaseRef = DataLocations[Base],
+ &SymRef = DataLocations[RelEntry.Symbol];
+ const WasmDataSegment &Segment = DataSegments[BaseRef.Segment];
// Ignore overflow. LLVM allows address arithmetic to silently wrap.
- return Segment.Offset + Ref.Offset + RelEntry.Addend;
+ return Segment.Offset + BaseRef.Offset + SymRef.Offset + RelEntry.Addend;
}
default:
llvm_unreachable("invalid relocation type");
@@ -633,11 +649,11 @@ static void addData(SmallVectorImpl<char> &DataBytes,
Fill->getValue());
} else if (auto *LEB = dyn_cast<MCLEBFragment>(&Frag)) {
const SmallVectorImpl<char> &Contents = LEB->getContents();
- DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
+ llvm::append_range(DataBytes, Contents);
} else {
const auto &DataFrag = cast<MCDataFragment>(Frag);
const SmallVectorImpl<char> &Contents = DataFrag.getContents();
- DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
+ llvm::append_range(DataBytes, Contents);
}
}
@@ -661,7 +677,7 @@ WasmObjectWriter::getRelocationIndexValue(const WasmRelocationEntry &RelEntry) {
void WasmObjectWriter::applyRelocations(
ArrayRef<WasmRelocationEntry> Relocations, uint64_t ContentsOffset,
const MCAsmLayout &Layout) {
- auto &Stream = static_cast<raw_pwrite_stream &>(W.OS);
+ auto &Stream = static_cast<raw_pwrite_stream &>(W->OS);
for (const WasmRelocationEntry &RelEntry : Relocations) {
uint64_t Offset = ContentsOffset +
RelEntry.FixupSection->getSectionOffset() +
@@ -676,6 +692,7 @@ void WasmObjectWriter::applyRelocations(
case wasm::R_WASM_GLOBAL_INDEX_LEB:
case wasm::R_WASM_MEMORY_ADDR_LEB:
case wasm::R_WASM_EVENT_INDEX_LEB:
+ case wasm::R_WASM_TABLE_NUMBER_LEB:
writePatchableLEB<5>(Stream, Value, Offset);
break;
case wasm::R_WASM_MEMORY_ADDR_LEB64:
@@ -688,15 +705,19 @@ void WasmObjectWriter::applyRelocations(
case wasm::R_WASM_GLOBAL_INDEX_I32:
patchI32(Stream, Value, Offset);
break;
+ case wasm::R_WASM_TABLE_INDEX_I64:
case wasm::R_WASM_MEMORY_ADDR_I64:
+ case wasm::R_WASM_FUNCTION_OFFSET_I64:
patchI64(Stream, Value, Offset);
break;
case wasm::R_WASM_TABLE_INDEX_SLEB:
case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
case wasm::R_WASM_MEMORY_ADDR_SLEB:
case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB:
writePatchableSLEB<5>(Stream, Value, Offset);
break;
+ case wasm::R_WASM_TABLE_INDEX_SLEB64:
case wasm::R_WASM_MEMORY_ADDR_SLEB64:
case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64:
writePatchableSLEB<10>(Stream, Value, Offset);
@@ -707,21 +728,22 @@ void WasmObjectWriter::applyRelocations(
}
}
-void WasmObjectWriter::writeTypeSection(ArrayRef<WasmSignature> Signatures) {
+void WasmObjectWriter::writeTypeSection(
+ ArrayRef<wasm::WasmSignature> Signatures) {
if (Signatures.empty())
return;
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_TYPE);
- encodeULEB128(Signatures.size(), W.OS);
+ encodeULEB128(Signatures.size(), W->OS);
- for (const WasmSignature &Sig : Signatures) {
- W.OS << char(wasm::WASM_TYPE_FUNC);
- encodeULEB128(Sig.Params.size(), W.OS);
+ for (const wasm::WasmSignature &Sig : Signatures) {
+ W->OS << char(wasm::WASM_TYPE_FUNC);
+ encodeULEB128(Sig.Params.size(), W->OS);
for (wasm::ValType Ty : Sig.Params)
writeValueType(Ty);
- encodeULEB128(Sig.Returns.size(), W.OS);
+ encodeULEB128(Sig.Returns.size(), W->OS);
for (wasm::ValType Ty : Sig.Returns)
writeValueType(Ty);
}
@@ -740,32 +762,32 @@ void WasmObjectWriter::writeImportSection(ArrayRef<wasm::WasmImport> Imports,
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_IMPORT);
- encodeULEB128(Imports.size(), W.OS);
+ encodeULEB128(Imports.size(), W->OS);
for (const wasm::WasmImport &Import : Imports) {
writeString(Import.Module);
writeString(Import.Field);
- W.OS << char(Import.Kind);
+ W->OS << char(Import.Kind);
switch (Import.Kind) {
case wasm::WASM_EXTERNAL_FUNCTION:
- encodeULEB128(Import.SigIndex, W.OS);
+ encodeULEB128(Import.SigIndex, W->OS);
break;
case wasm::WASM_EXTERNAL_GLOBAL:
- W.OS << char(Import.Global.Type);
- W.OS << char(Import.Global.Mutable ? 1 : 0);
+ W->OS << char(Import.Global.Type);
+ W->OS << char(Import.Global.Mutable ? 1 : 0);
break;
case wasm::WASM_EXTERNAL_MEMORY:
- encodeULEB128(Import.Memory.Flags, W.OS);
- encodeULEB128(NumPages, W.OS); // initial
+ encodeULEB128(Import.Memory.Flags, W->OS);
+ encodeULEB128(NumPages, W->OS); // initial
break;
case wasm::WASM_EXTERNAL_TABLE:
- W.OS << char(Import.Table.ElemType);
- encodeULEB128(0, W.OS); // flags
- encodeULEB128(NumElements, W.OS); // initial
+ W->OS << char(Import.Table.ElemType);
+ encodeULEB128(0, W->OS); // flags
+ encodeULEB128(NumElements, W->OS); // initial
break;
case wasm::WASM_EXTERNAL_EVENT:
- encodeULEB128(Import.Event.Attribute, W.OS);
- encodeULEB128(Import.Event.SigIndex, W.OS);
+ encodeULEB128(Import.Event.Attribute, W->OS);
+ encodeULEB128(Import.Event.SigIndex, W->OS);
break;
default:
llvm_unreachable("unsupported import kind");
@@ -782,9 +804,9 @@ void WasmObjectWriter::writeFunctionSection(ArrayRef<WasmFunction> Functions) {
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_FUNCTION);
- encodeULEB128(Functions.size(), W.OS);
+ encodeULEB128(Functions.size(), W->OS);
for (const WasmFunction &Func : Functions)
- encodeULEB128(Func.SigIndex, W.OS);
+ encodeULEB128(Func.SigIndex, W->OS);
endSection(Section);
}
@@ -796,10 +818,10 @@ void WasmObjectWriter::writeEventSection(ArrayRef<wasm::WasmEventType> Events) {
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_EVENT);
- encodeULEB128(Events.size(), W.OS);
+ encodeULEB128(Events.size(), W->OS);
for (const wasm::WasmEventType &Event : Events) {
- encodeULEB128(Event.Attribute, W.OS);
- encodeULEB128(Event.SigIndex, W.OS);
+ encodeULEB128(Event.Attribute, W->OS);
+ encodeULEB128(Event.SigIndex, W->OS);
}
endSection(Section);
@@ -812,17 +834,17 @@ void WasmObjectWriter::writeGlobalSection(ArrayRef<wasm::WasmGlobal> Globals) {
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_GLOBAL);
- encodeULEB128(Globals.size(), W.OS);
+ encodeULEB128(Globals.size(), W->OS);
for (const wasm::WasmGlobal &Global : Globals) {
- encodeULEB128(Global.Type.Type, W.OS);
- W.OS << char(Global.Type.Mutable);
- W.OS << char(Global.InitExpr.Opcode);
+ encodeULEB128(Global.Type.Type, W->OS);
+ W->OS << char(Global.Type.Mutable);
+ W->OS << char(Global.InitExpr.Opcode);
switch (Global.Type.Type) {
case wasm::WASM_TYPE_I32:
- encodeSLEB128(0, W.OS);
+ encodeSLEB128(0, W->OS);
break;
case wasm::WASM_TYPE_I64:
- encodeSLEB128(0, W.OS);
+ encodeSLEB128(0, W->OS);
break;
case wasm::WASM_TYPE_F32:
writeI32(0);
@@ -836,12 +858,30 @@ void WasmObjectWriter::writeGlobalSection(ArrayRef<wasm::WasmGlobal> Globals) {
default:
llvm_unreachable("unexpected type");
}
- W.OS << char(wasm::WASM_OPCODE_END);
+ W->OS << char(wasm::WASM_OPCODE_END);
}
endSection(Section);
}
+void WasmObjectWriter::writeTableSection(ArrayRef<wasm::WasmTable> Tables) {
+ if (Tables.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_TABLE);
+
+ encodeULEB128(Tables.size(), W->OS);
+ for (const wasm::WasmTable &Table : Tables) {
+ encodeULEB128(Table.Type.ElemType, W->OS);
+ encodeULEB128(Table.Type.Limits.Flags, W->OS);
+ encodeULEB128(Table.Type.Limits.Initial, W->OS);
+ if (Table.Type.Limits.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
+ encodeULEB128(Table.Type.Limits.Maximum, W->OS);
+ }
+ endSection(Section);
+}
+
void WasmObjectWriter::writeExportSection(ArrayRef<wasm::WasmExport> Exports) {
if (Exports.empty())
return;
@@ -849,11 +889,11 @@ void WasmObjectWriter::writeExportSection(ArrayRef<wasm::WasmExport> Exports) {
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_EXPORT);
- encodeULEB128(Exports.size(), W.OS);
+ encodeULEB128(Exports.size(), W->OS);
for (const wasm::WasmExport &Export : Exports) {
writeString(Export.Name);
- W.OS << char(Export.Kind);
- encodeULEB128(Export.Index, W.OS);
+ W->OS << char(Export.Kind);
+ encodeULEB128(Export.Index, W->OS);
}
endSection(Section);
@@ -866,17 +906,17 @@ void WasmObjectWriter::writeElemSection(ArrayRef<uint32_t> TableElems) {
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_ELEM);
- encodeULEB128(1, W.OS); // number of "segments"
- encodeULEB128(0, W.OS); // the table index
+ encodeULEB128(1, W->OS); // number of "segments"
+ encodeULEB128(0, W->OS); // the table index
// init expr for starting offset
- W.OS << char(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(InitialTableOffset, W.OS);
- W.OS << char(wasm::WASM_OPCODE_END);
+ W->OS << char(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(InitialTableOffset, W->OS);
+ W->OS << char(wasm::WASM_OPCODE_END);
- encodeULEB128(TableElems.size(), W.OS);
+ encodeULEB128(TableElems.size(), W->OS);
for (uint32_t Elem : TableElems)
- encodeULEB128(Elem, W.OS);
+ encodeULEB128(Elem, W->OS);
endSection(Section);
}
@@ -887,7 +927,7 @@ void WasmObjectWriter::writeDataCountSection() {
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_DATACOUNT);
- encodeULEB128(DataSegments.size(), W.OS);
+ encodeULEB128(DataSegments.size(), W->OS);
endSection(Section);
}
@@ -900,7 +940,7 @@ uint32_t WasmObjectWriter::writeCodeSection(const MCAssembler &Asm,
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_CODE);
- encodeULEB128(Functions.size(), W.OS);
+ encodeULEB128(Functions.size(), W->OS);
for (const WasmFunction &Func : Functions) {
auto &FuncSection = static_cast<MCSectionWasm &>(Func.Sym->getSection());
@@ -909,9 +949,9 @@ uint32_t WasmObjectWriter::writeCodeSection(const MCAssembler &Asm,
if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout))
report_fatal_error(".size expression must be evaluatable");
- encodeULEB128(Size, W.OS);
- FuncSection.setSectionOffset(W.OS.tell() - Section.ContentsOffset);
- Asm.writeSectionData(W.OS, &FuncSection, Layout);
+ encodeULEB128(Size, W->OS);
+ FuncSection.setSectionOffset(W->OS.tell() - Section.ContentsOffset);
+ Asm.writeSectionData(W->OS, &FuncSection, Layout);
}
// Apply fixups.
@@ -928,22 +968,21 @@ uint32_t WasmObjectWriter::writeDataSection(const MCAsmLayout &Layout) {
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_DATA);
- encodeULEB128(DataSegments.size(), W.OS); // count
+ encodeULEB128(DataSegments.size(), W->OS); // count
for (const WasmDataSegment &Segment : DataSegments) {
- encodeULEB128(Segment.InitFlags, W.OS); // flags
- if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX)
- encodeULEB128(0, W.OS); // memory index
- if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) {
- W.OS << char(Segment.Offset > std::numeric_limits<int32_t>().max()
- ? wasm::WASM_OPCODE_I64_CONST
- : wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(Segment.Offset, W.OS); // offset
- W.OS << char(wasm::WASM_OPCODE_END);
+ encodeULEB128(Segment.InitFlags, W->OS); // flags
+ if (Segment.InitFlags & wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX)
+ encodeULEB128(0, W->OS); // memory index
+ if ((Segment.InitFlags & wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
+ W->OS << char(Segment.Offset > INT32_MAX ? wasm::WASM_OPCODE_I64_CONST
+ : wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(Segment.Offset, W->OS); // offset
+ W->OS << char(wasm::WASM_OPCODE_END);
}
- encodeULEB128(Segment.Data.size(), W.OS); // size
- Segment.Section->setSectionOffset(W.OS.tell() - Section.ContentsOffset);
- W.OS << Segment.Data; // data
+ encodeULEB128(Segment.Data.size(), W->OS); // size
+ Segment.Section->setSectionOffset(W->OS.tell() - Section.ContentsOffset);
+ W->OS << Segment.Data; // data
}
// Apply fixups.
@@ -976,18 +1015,18 @@ void WasmObjectWriter::writeRelocSection(
SectionBookkeeping Section;
startCustomSection(Section, std::string("reloc.") + Name.str());
- encodeULEB128(SectionIndex, W.OS);
- encodeULEB128(Relocs.size(), W.OS);
+ encodeULEB128(SectionIndex, W->OS);
+ encodeULEB128(Relocs.size(), W->OS);
for (const WasmRelocationEntry &RelEntry : Relocs) {
uint64_t Offset =
RelEntry.Offset + RelEntry.FixupSection->getSectionOffset();
uint32_t Index = getRelocationIndexValue(RelEntry);
- W.OS << char(RelEntry.Type);
- encodeULEB128(Offset, W.OS);
- encodeULEB128(Index, W.OS);
+ W->OS << char(RelEntry.Type);
+ encodeULEB128(Offset, W->OS);
+ encodeULEB128(Index, W->OS);
if (RelEntry.hasAddend())
- encodeSLEB128(RelEntry.Addend, W.OS);
+ encodeSLEB128(RelEntry.Addend, W->OS);
}
endSection(Section);
@@ -1006,20 +1045,21 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
const std::map<StringRef, std::vector<WasmComdatEntry>> &Comdats) {
SectionBookkeeping Section;
startCustomSection(Section, "linking");
- encodeULEB128(wasm::WasmMetadataVersion, W.OS);
+ encodeULEB128(wasm::WasmMetadataVersion, W->OS);
SectionBookkeeping SubSection;
if (SymbolInfos.size() != 0) {
startSection(SubSection, wasm::WASM_SYMBOL_TABLE);
- encodeULEB128(SymbolInfos.size(), W.OS);
+ encodeULEB128(SymbolInfos.size(), W->OS);
for (const wasm::WasmSymbolInfo &Sym : SymbolInfos) {
- encodeULEB128(Sym.Kind, W.OS);
- encodeULEB128(Sym.Flags, W.OS);
+ encodeULEB128(Sym.Kind, W->OS);
+ encodeULEB128(Sym.Flags, W->OS);
switch (Sym.Kind) {
case wasm::WASM_SYMBOL_TYPE_FUNCTION:
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
case wasm::WASM_SYMBOL_TYPE_EVENT:
- encodeULEB128(Sym.ElementIndex, W.OS);
+ case wasm::WASM_SYMBOL_TYPE_TABLE:
+ encodeULEB128(Sym.ElementIndex, W->OS);
if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 ||
(Sym.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
writeString(Sym.Name);
@@ -1027,15 +1067,15 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
case wasm::WASM_SYMBOL_TYPE_DATA:
writeString(Sym.Name);
if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) {
- encodeULEB128(Sym.DataRef.Segment, W.OS);
- encodeULEB128(Sym.DataRef.Offset, W.OS);
- encodeULEB128(Sym.DataRef.Size, W.OS);
+ encodeULEB128(Sym.DataRef.Segment, W->OS);
+ encodeULEB128(Sym.DataRef.Offset, W->OS);
+ encodeULEB128(Sym.DataRef.Size, W->OS);
}
break;
case wasm::WASM_SYMBOL_TYPE_SECTION: {
const uint32_t SectionIndex =
CustomSections[Sym.ElementIndex].OutputIndex;
- encodeULEB128(SectionIndex, W.OS);
+ encodeULEB128(SectionIndex, W->OS);
break;
}
default:
@@ -1047,35 +1087,35 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
if (DataSegments.size()) {
startSection(SubSection, wasm::WASM_SEGMENT_INFO);
- encodeULEB128(DataSegments.size(), W.OS);
+ encodeULEB128(DataSegments.size(), W->OS);
for (const WasmDataSegment &Segment : DataSegments) {
writeString(Segment.Name);
- encodeULEB128(Segment.Alignment, W.OS);
- encodeULEB128(Segment.LinkerFlags, W.OS);
+ encodeULEB128(Segment.Alignment, W->OS);
+ encodeULEB128(Segment.LinkerFlags, W->OS);
}
endSection(SubSection);
}
if (!InitFuncs.empty()) {
startSection(SubSection, wasm::WASM_INIT_FUNCS);
- encodeULEB128(InitFuncs.size(), W.OS);
+ encodeULEB128(InitFuncs.size(), W->OS);
for (auto &StartFunc : InitFuncs) {
- encodeULEB128(StartFunc.first, W.OS); // priority
- encodeULEB128(StartFunc.second, W.OS); // function index
+ encodeULEB128(StartFunc.first, W->OS); // priority
+ encodeULEB128(StartFunc.second, W->OS); // function index
}
endSection(SubSection);
}
if (Comdats.size()) {
startSection(SubSection, wasm::WASM_COMDAT_INFO);
- encodeULEB128(Comdats.size(), W.OS);
+ encodeULEB128(Comdats.size(), W->OS);
for (const auto &C : Comdats) {
writeString(C.first);
- encodeULEB128(0, W.OS); // flags for future use
- encodeULEB128(C.second.size(), W.OS);
+ encodeULEB128(0, W->OS); // flags for future use
+ encodeULEB128(C.second.size(), W->OS);
for (const WasmComdatEntry &Entry : C.second) {
- encodeULEB128(Entry.Kind, W.OS);
- encodeULEB128(Entry.Index, W.OS);
+ encodeULEB128(Entry.Kind, W->OS);
+ encodeULEB128(Entry.Index, W->OS);
}
}
endSection(SubSection);
@@ -1091,8 +1131,8 @@ void WasmObjectWriter::writeCustomSection(WasmCustomSection &CustomSection,
auto *Sec = CustomSection.Section;
startCustomSection(Section, CustomSection.Name);
- Sec->setSectionOffset(W.OS.tell() - Section.ContentsOffset);
- Asm.writeSectionData(W.OS, Sec, Layout);
+ Sec->setSectionOffset(W->OS.tell() - Section.ContentsOffset);
+ Asm.writeSectionData(W->OS, Sec, Layout);
CustomSection.OutputContentsOffset = Section.ContentsOffset;
CustomSection.OutputIndex = Section.Index;
@@ -1119,7 +1159,7 @@ uint32_t WasmObjectWriter::getEventType(const MCSymbolWasm &Symbol) {
void WasmObjectWriter::registerFunctionType(const MCSymbolWasm &Symbol) {
assert(Symbol.isFunction());
- WasmSignature S;
+ wasm::WasmSignature S;
if (auto *Sig = Symbol.getSignature()) {
S.Returns = Sig->Returns;
@@ -1141,7 +1181,7 @@ void WasmObjectWriter::registerEventType(const MCSymbolWasm &Symbol) {
// TODO Currently we don't generate imported exceptions, but if we do, we
// should have a way of infering types of imported exceptions.
- WasmSignature S;
+ wasm::WasmSignature S;
if (auto *Sig = Symbol.getSignature()) {
S.Returns = Sig->Returns;
S.Params = Sig->Params;
@@ -1173,24 +1213,9 @@ static bool isInSymtab(const MCSymbolWasm &Sym) {
return true;
}
-uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
- uint64_t StartOffset = W.OS.tell();
-
- LLVM_DEBUG(dbgs() << "WasmObjectWriter::writeObject\n");
-
- // Collect information from the available symbols.
- SmallVector<WasmFunction, 4> Functions;
- SmallVector<uint32_t, 4> TableElems;
- SmallVector<wasm::WasmImport, 4> Imports;
- SmallVector<wasm::WasmExport, 4> Exports;
- SmallVector<wasm::WasmEventType, 1> Events;
- SmallVector<wasm::WasmGlobal, 1> Globals;
- SmallVector<wasm::WasmSymbolInfo, 4> SymbolInfos;
- SmallVector<std::pair<uint16_t, uint32_t>, 2> InitFuncs;
- std::map<StringRef, std::vector<WasmComdatEntry>> Comdats;
- uint64_t DataSize = 0;
-
+void WasmObjectWriter::prepareImports(
+ SmallVectorImpl<wasm::WasmImport> &Imports, MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
// For now, always emit the memory import, since loads and stores are not
// valid without it. In the future, we could perhaps be more clever and omit
// it if there are no loads or stores.
@@ -1202,16 +1227,6 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
: wasm::WASM_LIMITS_FLAG_NONE;
Imports.push_back(MemImport);
- // For now, always emit the table section, since indirect calls are not
- // valid without it. In the future, we could perhaps be more clever and omit
- // it if there are no indirect calls.
- wasm::WasmImport TableImport;
- TableImport.Module = "env";
- TableImport.Field = "__indirect_function_table";
- TableImport.Kind = wasm::WASM_EXTERNAL_TABLE;
- TableImport.Table.ElemType = wasm::WASM_TYPE_FUNCREF;
- Imports.push_back(TableImport);
-
// Populate SignatureIndices, and Imports and WasmIndices for undefined
// symbols. This must be done before populating WasmIndices for defined
// symbols.
@@ -1221,8 +1236,11 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// Register types for all functions, including those with private linkage
// (because wasm always needs a type signature).
if (WS.isFunction()) {
- const MCSymbolWasm *Base = cast<MCSymbolWasm>(Layout.getBaseSymbol(S));
- registerFunctionType(*Base);
+ const auto *BS = Layout.getBaseSymbol(S);
+ if (!BS)
+ report_fatal_error(Twine(S.getName()) +
+ ": absolute addressing not supported!");
+ registerFunctionType(*cast<MCSymbolWasm>(BS));
}
if (WS.isEvent())
@@ -1267,6 +1285,23 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
Imports.push_back(Import);
assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = NumEventImports++;
+ } else if (WS.isTable()) {
+ if (WS.isWeak())
+ report_fatal_error("undefined table symbol cannot be weak");
+
+ wasm::WasmImport Import;
+ Import.Module = WS.getImportModule();
+ Import.Field = WS.getImportName();
+ Import.Kind = wasm::WASM_EXTERNAL_TABLE;
+ wasm::ValType ElemType = WS.getTableType();
+ Import.Table.ElemType = uint8_t(ElemType);
+ // FIXME: Extend table type to include limits? For now we don't specify
+ // a min or max which does not place any restrictions on the size of the
+ // imported table.
+ Import.Table.Limits = {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
+ Imports.push_back(Import);
+ assert(WasmIndices.count(&WS) == 0);
+ WasmIndices[&WS] = NumTableImports++;
}
}
}
@@ -1288,6 +1323,47 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
GOTIndices[&WS] = NumGlobalImports++;
}
}
+}
+
+uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ support::endian::Writer MainWriter(*OS, support::little);
+ W = &MainWriter;
+ if (IsSplitDwarf) {
+ uint64_t TotalSize = writeOneObject(Asm, Layout, DwoMode::NonDwoOnly);
+ assert(DwoOS);
+ support::endian::Writer DwoWriter(*DwoOS, support::little);
+ W = &DwoWriter;
+ return TotalSize + writeOneObject(Asm, Layout, DwoMode::DwoOnly);
+ } else {
+ return writeOneObject(Asm, Layout, DwoMode::AllSections);
+ }
+}
+
+uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ DwoMode Mode) {
+ uint64_t StartOffset = W->OS.tell();
+ SectionCount = 0;
+ CustomSections.clear();
+
+ LLVM_DEBUG(dbgs() << "WasmObjectWriter::writeObject\n");
+
+ // Collect information from the available symbols.
+ SmallVector<WasmFunction, 4> Functions;
+ SmallVector<uint32_t, 4> TableElems;
+ SmallVector<wasm::WasmImport, 4> Imports;
+ SmallVector<wasm::WasmExport, 4> Exports;
+ SmallVector<wasm::WasmEventType, 1> Events;
+ SmallVector<wasm::WasmGlobal, 1> Globals;
+ SmallVector<wasm::WasmTable, 1> Tables;
+ SmallVector<wasm::WasmSymbolInfo, 4> SymbolInfos;
+ SmallVector<std::pair<uint16_t, uint32_t>, 2> InitFuncs;
+ std::map<StringRef, std::vector<WasmComdatEntry>> Comdats;
+ uint64_t DataSize = 0;
+ if (Mode != DwoMode::DwoOnly) {
+ prepareImports(Imports, Asm, Layout);
+ }
// Populate DataSegments and CustomSections, which must be done before
// populating DataLocations.
@@ -1295,6 +1371,14 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
auto &Section = static_cast<MCSectionWasm &>(Sec);
StringRef SectionName = Section.getName();
+ if (Mode == DwoMode::NonDwoOnly && isDwoSection(Sec))
+ continue;
+ if (Mode == DwoMode::DwoOnly && !isDwoSection(Sec))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Processing Section " << SectionName << " group "
+ << Section.getGroup() << "\n";);
+
// .init_array sections are handled specially elsewhere.
if (SectionName.startswith(".init_array"))
continue;
@@ -1309,8 +1393,9 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
DataSegments.emplace_back();
WasmDataSegment &Segment = DataSegments.back();
Segment.Name = SectionName;
- Segment.InitFlags =
- Section.getPassive() ? (uint32_t)wasm::WASM_SEGMENT_IS_PASSIVE : 0;
+ Segment.InitFlags = Section.getPassive()
+ ? (uint32_t)wasm::WASM_DATA_SEGMENT_IS_PASSIVE
+ : 0;
Segment.Offset = DataSize;
Segment.Section = &Section;
addData(Segment.Data, Section);
@@ -1335,10 +1420,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
MCSymbol *Begin = Sec.getBeginSymbol();
if (Begin) {
+ assert(WasmIndices.count(cast<MCSymbolWasm>(Begin)) == 0);
WasmIndices[cast<MCSymbolWasm>(Begin)] = CustomSections.size();
- if (SectionName != Begin->getName())
- report_fatal_error("section name and begin symbol should match: " +
- Twine(SectionName));
}
// Separate out the producers and target features sections
@@ -1352,200 +1435,232 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
continue;
}
+ // Custom sections can also belong to COMDAT groups. In this case the
+ // decriptor's "index" field is the section index (in the final object
+ // file), but that is not known until after layout, so it must be fixed up
+ // later
+ if (const MCSymbolWasm *C = Section.getGroup()) {
+ Comdats[C->getName()].emplace_back(
+ WasmComdatEntry{wasm::WASM_COMDAT_SECTION,
+ static_cast<uint32_t>(CustomSections.size())});
+ }
+
CustomSections.emplace_back(Name, &Section);
}
}
- // Populate WasmIndices and DataLocations for defined symbols.
- for (const MCSymbol &S : Asm.symbols()) {
- // Ignore unnamed temporary symbols, which aren't ever exported, imported,
- // or used in relocations.
- if (S.isTemporary() && S.getName().empty())
- continue;
+ if (Mode != DwoMode::DwoOnly) {
+ // Populate WasmIndices and DataLocations for defined symbols.
+ for (const MCSymbol &S : Asm.symbols()) {
+ // Ignore unnamed temporary symbols, which aren't ever exported, imported,
+ // or used in relocations.
+ if (S.isTemporary() && S.getName().empty())
+ continue;
- const auto &WS = static_cast<const MCSymbolWasm &>(S);
- LLVM_DEBUG(
- dbgs() << "MCSymbol: " << toString(WS.getType()) << " '" << S << "'"
- << " isDefined=" << S.isDefined() << " isExternal="
- << S.isExternal() << " isTemporary=" << S.isTemporary()
- << " isWeak=" << WS.isWeak() << " isHidden=" << WS.isHidden()
- << " isVariable=" << WS.isVariable() << "\n");
-
- if (WS.isVariable())
- continue;
- if (WS.isComdat() && !WS.isDefined())
- continue;
+ const auto &WS = static_cast<const MCSymbolWasm &>(S);
+ LLVM_DEBUG(
+ dbgs() << "MCSymbol: " << toString(WS.getType()) << " '" << S << "'"
+ << " isDefined=" << S.isDefined() << " isExternal="
+ << S.isExternal() << " isTemporary=" << S.isTemporary()
+ << " isWeak=" << WS.isWeak() << " isHidden=" << WS.isHidden()
+ << " isVariable=" << WS.isVariable() << "\n");
- if (WS.isFunction()) {
- unsigned Index;
- if (WS.isDefined()) {
- if (WS.getOffset() != 0)
- report_fatal_error(
- "function sections must contain one function each");
-
- if (WS.getSize() == nullptr)
- report_fatal_error(
- "function symbols must have a size set with .size");
-
- // A definition. Write out the function body.
- Index = NumFunctionImports + Functions.size();
- WasmFunction Func;
- Func.SigIndex = getFunctionType(WS);
- Func.Sym = &WS;
- WasmIndices[&WS] = Index;
- Functions.push_back(Func);
-
- auto &Section = static_cast<MCSectionWasm &>(WS.getSection());
- if (const MCSymbolWasm *C = Section.getGroup()) {
- Comdats[C->getName()].emplace_back(
- WasmComdatEntry{wasm::WASM_COMDAT_FUNCTION, Index});
+ if (WS.isVariable())
+ continue;
+ if (WS.isComdat() && !WS.isDefined())
+ continue;
+
+ if (WS.isFunction()) {
+ unsigned Index;
+ if (WS.isDefined()) {
+ if (WS.getOffset() != 0)
+ report_fatal_error(
+ "function sections must contain one function each");
+
+ if (WS.getSize() == nullptr)
+ report_fatal_error(
+ "function symbols must have a size set with .size");
+
+ // A definition. Write out the function body.
+ Index = NumFunctionImports + Functions.size();
+ WasmFunction Func;
+ Func.SigIndex = getFunctionType(WS);
+ Func.Sym = &WS;
+ assert(WasmIndices.count(&WS) == 0);
+ WasmIndices[&WS] = Index;
+ Functions.push_back(Func);
+
+ auto &Section = static_cast<MCSectionWasm &>(WS.getSection());
+ if (const MCSymbolWasm *C = Section.getGroup()) {
+ Comdats[C->getName()].emplace_back(
+ WasmComdatEntry{wasm::WASM_COMDAT_FUNCTION, Index});
+ }
+
+ if (WS.hasExportName()) {
+ wasm::WasmExport Export;
+ Export.Name = WS.getExportName();
+ Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
+ Export.Index = Index;
+ Exports.push_back(Export);
+ }
+ } else {
+ // An import; the index was assigned above.
+ Index = WasmIndices.find(&WS)->second;
}
- if (WS.hasExportName()) {
- wasm::WasmExport Export;
- Export.Name = WS.getExportName();
- Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
- Export.Index = Index;
- Exports.push_back(Export);
+ LLVM_DEBUG(dbgs() << " -> function index: " << Index << "\n");
+
+ } else if (WS.isData()) {
+ if (!isInSymtab(WS))
+ continue;
+
+ if (!WS.isDefined()) {
+ LLVM_DEBUG(dbgs() << " -> segment index: -1"
+ << "\n");
+ continue;
}
- } else {
- // An import; the index was assigned above.
- Index = WasmIndices.find(&WS)->second;
- }
- LLVM_DEBUG(dbgs() << " -> function index: " << Index << "\n");
+ if (!WS.getSize())
+ report_fatal_error("data symbols must have a size set with .size: " +
+ WS.getName());
- } else if (WS.isData()) {
- if (!isInSymtab(WS))
- continue;
+ int64_t Size = 0;
+ if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
+ report_fatal_error(".size expression must be evaluatable");
- if (!WS.isDefined()) {
- LLVM_DEBUG(dbgs() << " -> segment index: -1"
- << "\n");
- continue;
- }
+ auto &DataSection = static_cast<MCSectionWasm &>(WS.getSection());
+ if (!DataSection.isWasmData())
+ report_fatal_error("data symbols must live in a data section: " +
+ WS.getName());
- if (!WS.getSize())
- report_fatal_error("data symbols must have a size set with .size: " +
- WS.getName());
-
- int64_t Size = 0;
- if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
- report_fatal_error(".size expression must be evaluatable");
-
- auto &DataSection = static_cast<MCSectionWasm &>(WS.getSection());
- if (!DataSection.isWasmData())
- report_fatal_error("data symbols must live in a data section: " +
- WS.getName());
-
- // For each data symbol, export it in the symtab as a reference to the
- // corresponding Wasm data segment.
- wasm::WasmDataReference Ref = wasm::WasmDataReference{
- DataSection.getSegmentIndex(), Layout.getSymbolOffset(WS),
- static_cast<uint64_t>(Size)};
- DataLocations[&WS] = Ref;
- LLVM_DEBUG(dbgs() << " -> segment index: " << Ref.Segment << "\n");
-
- } else if (WS.isGlobal()) {
- // A "true" Wasm global (currently just __stack_pointer)
- if (WS.isDefined()) {
- assert(WasmIndices.count(&WS) == 0);
- wasm::WasmGlobal Global;
- Global.Type = WS.getGlobalType();
- Global.Index = NumGlobalImports + Globals.size();
- switch (Global.Type.Type) {
- case wasm::WASM_TYPE_I32:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_I32_CONST;
- break;
- case wasm::WASM_TYPE_I64:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_I64_CONST;
- break;
- case wasm::WASM_TYPE_F32:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_F32_CONST;
- break;
- case wasm::WASM_TYPE_F64:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_F64_CONST;
- break;
- case wasm::WASM_TYPE_EXTERNREF:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_REF_NULL;
- break;
- default:
- llvm_unreachable("unexpected type");
+ // For each data symbol, export it in the symtab as a reference to the
+ // corresponding Wasm data segment.
+ wasm::WasmDataReference Ref = wasm::WasmDataReference{
+ DataSection.getSegmentIndex(), Layout.getSymbolOffset(WS),
+ static_cast<uint64_t>(Size)};
+ assert(DataLocations.count(&WS) == 0);
+ DataLocations[&WS] = Ref;
+ LLVM_DEBUG(dbgs() << " -> segment index: " << Ref.Segment << "\n");
+
+ } else if (WS.isGlobal()) {
+ // A "true" Wasm global (currently just __stack_pointer)
+ if (WS.isDefined()) {
+ wasm::WasmGlobal Global;
+ Global.Type = WS.getGlobalType();
+ Global.Index = NumGlobalImports + Globals.size();
+ switch (Global.Type.Type) {
+ case wasm::WASM_TYPE_I32:
+ Global.InitExpr.Opcode = wasm::WASM_OPCODE_I32_CONST;
+ break;
+ case wasm::WASM_TYPE_I64:
+ Global.InitExpr.Opcode = wasm::WASM_OPCODE_I64_CONST;
+ break;
+ case wasm::WASM_TYPE_F32:
+ Global.InitExpr.Opcode = wasm::WASM_OPCODE_F32_CONST;
+ break;
+ case wasm::WASM_TYPE_F64:
+ Global.InitExpr.Opcode = wasm::WASM_OPCODE_F64_CONST;
+ break;
+ case wasm::WASM_TYPE_EXTERNREF:
+ Global.InitExpr.Opcode = wasm::WASM_OPCODE_REF_NULL;
+ break;
+ default:
+ llvm_unreachable("unexpected type");
+ }
+ assert(WasmIndices.count(&WS) == 0);
+ WasmIndices[&WS] = Global.Index;
+ Globals.push_back(Global);
+ } else {
+ // An import; the index was assigned above
+ LLVM_DEBUG(dbgs() << " -> global index: "
+ << WasmIndices.find(&WS)->second << "\n");
}
- WasmIndices[&WS] = Global.Index;
- Globals.push_back(Global);
- } else {
- // An import; the index was assigned above
- LLVM_DEBUG(dbgs() << " -> global index: "
+ } else if (WS.isTable()) {
+ if (WS.isDefined()) {
+ wasm::WasmTable Table;
+ Table.Index = NumTableImports + Tables.size();
+ Table.Type.ElemType = static_cast<uint8_t>(WS.getTableType());
+ // FIXME: Work on custom limits is ongoing
+ Table.Type.Limits = {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
+ assert(WasmIndices.count(&WS) == 0);
+ WasmIndices[&WS] = Table.Index;
+ Tables.push_back(Table);
+ }
+ LLVM_DEBUG(dbgs() << " -> table index: "
<< WasmIndices.find(&WS)->second << "\n");
- }
- } else if (WS.isEvent()) {
- // C++ exception symbol (__cpp_exception)
- unsigned Index;
- if (WS.isDefined()) {
- assert(WasmIndices.count(&WS) == 0);
- Index = NumEventImports + Events.size();
- wasm::WasmEventType Event;
- Event.SigIndex = getEventType(WS);
- Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
- WasmIndices[&WS] = Index;
- Events.push_back(Event);
+ } else if (WS.isEvent()) {
+ // C++ exception symbol (__cpp_exception)
+ unsigned Index;
+ if (WS.isDefined()) {
+ Index = NumEventImports + Events.size();
+ wasm::WasmEventType Event;
+ Event.SigIndex = getEventType(WS);
+ Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
+ assert(WasmIndices.count(&WS) == 0);
+ WasmIndices[&WS] = Index;
+ Events.push_back(Event);
+ } else {
+ // An import; the index was assigned above.
+ assert(WasmIndices.count(&WS) > 0);
+ }
+ LLVM_DEBUG(dbgs() << " -> event index: "
+ << WasmIndices.find(&WS)->second << "\n");
+
} else {
- // An import; the index was assigned above.
- assert(WasmIndices.count(&WS) > 0);
+ assert(WS.isSection());
}
- LLVM_DEBUG(dbgs() << " -> event index: " << WasmIndices.find(&WS)->second
- << "\n");
-
- } else {
- assert(WS.isSection());
}
- }
- // Populate WasmIndices and DataLocations for aliased symbols. We need to
- // process these in a separate pass because we need to have processed the
- // target of the alias before the alias itself and the symbols are not
- // necessarily ordered in this way.
- for (const MCSymbol &S : Asm.symbols()) {
- if (!S.isVariable())
- continue;
+ // Populate WasmIndices and DataLocations for aliased symbols. We need to
+ // process these in a separate pass because we need to have processed the
+ // target of the alias before the alias itself and the symbols are not
+ // necessarily ordered in this way.
+ for (const MCSymbol &S : Asm.symbols()) {
+ if (!S.isVariable())
+ continue;
- assert(S.isDefined());
+ assert(S.isDefined());
- const MCSymbolWasm *Base = cast<MCSymbolWasm>(Layout.getBaseSymbol(S));
+ const auto *BS = Layout.getBaseSymbol(S);
+ if (!BS)
+ report_fatal_error(Twine(S.getName()) +
+ ": absolute addressing not supported!");
+ const MCSymbolWasm *Base = cast<MCSymbolWasm>(BS);
- // Find the target symbol of this weak alias and export that index
- const auto &WS = static_cast<const MCSymbolWasm &>(S);
- LLVM_DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *Base << "'\n");
-
- if (Base->isFunction()) {
- assert(WasmIndices.count(Base) > 0);
- uint32_t WasmIndex = WasmIndices.find(Base)->second;
- assert(WasmIndices.count(&WS) == 0);
- WasmIndices[&WS] = WasmIndex;
- LLVM_DEBUG(dbgs() << " -> index:" << WasmIndex << "\n");
- } else if (Base->isData()) {
- auto &DataSection = static_cast<MCSectionWasm &>(WS.getSection());
- uint64_t Offset = Layout.getSymbolOffset(S);
- int64_t Size = 0;
- // For data symbol alias we use the size of the base symbol as the
- // size of the alias. When an offset from the base is involved this
- // can result in a offset + size goes past the end of the data section
- // which out object format doesn't support. So we must clamp it.
- if (!Base->getSize()->evaluateAsAbsolute(Size, Layout))
- report_fatal_error(".size expression must be evaluatable");
- const WasmDataSegment &Segment =
- DataSegments[DataSection.getSegmentIndex()];
- Size =
- std::min(static_cast<uint64_t>(Size), Segment.Data.size() - Offset);
- wasm::WasmDataReference Ref = wasm::WasmDataReference{
- DataSection.getSegmentIndex(),
- static_cast<uint32_t>(Layout.getSymbolOffset(S)),
- static_cast<uint32_t>(Size)};
- DataLocations[&WS] = Ref;
- LLVM_DEBUG(dbgs() << " -> index:" << Ref.Segment << "\n");
- } else {
- report_fatal_error("don't yet support global/event aliases");
+ // Find the target symbol of this weak alias and export that index
+ const auto &WS = static_cast<const MCSymbolWasm &>(S);
+ LLVM_DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *Base
+ << "'\n");
+
+ if (Base->isFunction()) {
+ assert(WasmIndices.count(Base) > 0);
+ uint32_t WasmIndex = WasmIndices.find(Base)->second;
+ assert(WasmIndices.count(&WS) == 0);
+ WasmIndices[&WS] = WasmIndex;
+ LLVM_DEBUG(dbgs() << " -> index:" << WasmIndex << "\n");
+ } else if (Base->isData()) {
+ auto &DataSection = static_cast<MCSectionWasm &>(WS.getSection());
+ uint64_t Offset = Layout.getSymbolOffset(S);
+ int64_t Size = 0;
+ // For data symbol alias we use the size of the base symbol as the
+ // size of the alias. When an offset from the base is involved this
+ // can result in a offset + size goes past the end of the data section
+ // which out object format doesn't support. So we must clamp it.
+ if (!Base->getSize()->evaluateAsAbsolute(Size, Layout))
+ report_fatal_error(".size expression must be evaluatable");
+ const WasmDataSegment &Segment =
+ DataSegments[DataSection.getSegmentIndex()];
+ Size =
+ std::min(static_cast<uint64_t>(Size), Segment.Data.size() - Offset);
+ wasm::WasmDataReference Ref = wasm::WasmDataReference{
+ DataSection.getSegmentIndex(),
+ static_cast<uint32_t>(Layout.getSymbolOffset(S)),
+ static_cast<uint32_t>(Size)};
+ DataLocations[&WS] = Ref;
+ LLVM_DEBUG(dbgs() << " -> index:" << Ref.Segment << "\n");
+ } else {
+ report_fatal_error("don't yet support global/event aliases");
+ }
}
}
@@ -1556,6 +1671,10 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
WS.setIndex(InvalidIndex);
continue;
}
+ if (WS.isTable() && WS.getName() == "__indirect_function_table") {
+ // For the moment, don't emit table symbols -- wasm-ld can't handle them.
+ continue;
+ }
LLVM_DEBUG(dbgs() << "adding to symtab: " << WS << "\n");
uint32_t Flags = 0;
@@ -1599,7 +1718,9 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// purely to make the object file's provisional values readable, and is
// ignored by the linker, which re-calculates the relocations itself.
if (Rel.Type != wasm::R_WASM_TABLE_INDEX_I32 &&
+ Rel.Type != wasm::R_WASM_TABLE_INDEX_I64 &&
Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB &&
+ Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB64 &&
Rel.Type != wasm::R_WASM_TABLE_INDEX_REL_SLEB)
return;
assert(Rel.Symbol->isFunction());
@@ -1689,23 +1810,41 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// Write out the Wasm header.
writeHeader(Asm);
- writeTypeSection(Signatures);
- writeImportSection(Imports, DataSize, TableElems.size());
- writeFunctionSection(Functions);
- // Skip the "table" section; we import the table instead.
- // Skip the "memory" section; we import the memory instead.
- writeEventSection(Events);
- writeGlobalSection(Globals);
- writeExportSection(Exports);
- writeElemSection(TableElems);
- writeDataCountSection();
- uint32_t CodeSectionIndex = writeCodeSection(Asm, Layout, Functions);
- uint32_t DataSectionIndex = writeDataSection(Layout);
+ uint32_t CodeSectionIndex, DataSectionIndex;
+ if (Mode != DwoMode::DwoOnly) {
+ writeTypeSection(Signatures);
+ writeImportSection(Imports, DataSize, TableElems.size());
+ writeFunctionSection(Functions);
+ writeTableSection(Tables);
+ // Skip the "memory" section; we import the memory instead.
+ writeEventSection(Events);
+ writeGlobalSection(Globals);
+ writeExportSection(Exports);
+ writeElemSection(TableElems);
+ writeDataCountSection();
+
+ CodeSectionIndex = writeCodeSection(Asm, Layout, Functions);
+ DataSectionIndex = writeDataSection(Layout);
+ }
+
+ // The Sections in the COMDAT list have placeholder indices (their index among
+ // custom sections, rather than among all sections). Fix them up here.
+ for (auto &Group : Comdats) {
+ for (auto &Entry : Group.second) {
+ if (Entry.Kind == wasm::WASM_COMDAT_SECTION) {
+ Entry.Index += SectionCount;
+ }
+ }
+ }
for (auto &CustomSection : CustomSections)
writeCustomSection(CustomSection, Asm, Layout);
- writeLinkingMetaDataSection(SymbolInfos, InitFuncs, Comdats);
- writeRelocSection(CodeSectionIndex, "CODE", CodeRelocations);
- writeRelocSection(DataSectionIndex, "DATA", DataRelocations);
+
+ if (Mode != DwoMode::DwoOnly) {
+ writeLinkingMetaDataSection(SymbolInfos, InitFuncs, Comdats);
+
+ writeRelocSection(CodeSectionIndex, "CODE", CodeRelocations);
+ writeRelocSection(DataSectionIndex, "DATA", DataRelocations);
+ }
writeCustomRelocSections();
if (ProducersSection)
writeCustomSection(*ProducersSection, Asm, Layout);
@@ -1713,7 +1852,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
writeCustomSection(*TargetFeaturesSection, Asm, Layout);
// TODO: Translate the .comment section to the output.
- return W.OS.tell() - StartOffset;
+ return W->OS.tell() - StartOffset;
}
std::unique_ptr<MCObjectWriter>
@@ -1721,3 +1860,10 @@ llvm::createWasmObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW,
raw_pwrite_stream &OS) {
return std::make_unique<WasmObjectWriter>(std::move(MOTW), OS);
}
+
+std::unique_ptr<MCObjectWriter>
+llvm::createWasmDwoObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS,
+ raw_pwrite_stream &DwoOS) {
+ return std::make_unique<WasmObjectWriter>(std::move(MOTW), OS, DwoOS);
+}
diff --git a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 8e7bf1eb0169..901d2c06e716 100644
--- a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -353,9 +353,10 @@ COFFSymbol *WinCOFFObjectWriter::getLinkedSymbol(const MCSymbol &Symbol) {
return nullptr;
const MCSymbol &Aliasee = SymRef->getSymbol();
- if (!Aliasee.isUndefined())
+ if (Aliasee.isUndefined() || Aliasee.isExternal())
+ return GetOrCreateCOFFSymbol(&Aliasee);
+ else
return nullptr;
- return GetOrCreateCOFFSymbol(&Aliasee);
}
/// This function takes a symbol data object from the assembler
diff --git a/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp
index 0dabdc9777d6..031eceaadf06 100644
--- a/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -138,12 +138,13 @@ struct Section {
Group->clear();
}
- Section(const char *N, XCOFF::SectionTypeFlags Flags, bool IsVirtual,
+ Section(StringRef N, XCOFF::SectionTypeFlags Flags, bool IsVirtual,
CsectGroups Groups)
- : Address(0), Size(0), FileOffsetToData(0), FileOffsetToRelocations(0),
- RelocationCount(0), Flags(Flags), Index(UninitializedIndex),
- IsVirtual(IsVirtual), Groups(Groups) {
- strncpy(Name, N, XCOFF::NameSize);
+ : Name(), Address(0), Size(0), FileOffsetToData(0),
+ FileOffsetToRelocations(0), RelocationCount(0), Flags(Flags),
+ Index(UninitializedIndex), IsVirtual(IsVirtual), Groups(Groups) {
+ assert(N.size() <= XCOFF::NameSize && "section name too long");
+ memcpy(Name, N.data(), N.size());
}
};
@@ -304,6 +305,7 @@ CsectGroup &XCOFFObjectWriter::getCsectGroup(const MCSectionXCOFF *MCSec) {
"in this CsectGroup.");
return TOCCsects;
case XCOFF::XMC_TC:
+ case XCOFF::XMC_TE:
assert(XCOFF::XTY_SD == MCSec->getCSectType() &&
"Only an initialized csect can contain TC entry.");
assert(!TOCCsects.empty() &&
@@ -427,9 +429,18 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
// The FixedValue should be symbol's virtual address in this object file
// plus any constant value that we might get.
FixedValue = getVirtualAddress(SymA, SymASec) + Target.getConstant();
- else if (Type == XCOFF::RelocationType::R_TOC)
- // The FixedValue should be the TC entry offset from TOC-base.
- FixedValue = SectionMap[SymASec]->Address - TOCCsects.front().Address;
+ else if (Type == XCOFF::RelocationType::R_TOC ||
+ Type == XCOFF::RelocationType::R_TOCL) {
+ // The FixedValue should be the TOC entry offset from the TOC-base plus any
+ // constant offset value.
+ const int64_t TOCEntryOffset = SectionMap[SymASec]->Address -
+ TOCCsects.front().Address +
+ Target.getConstant();
+ if (Type == XCOFF::RelocationType::R_TOC && !isInt<16>(TOCEntryOffset))
+ report_fatal_error("TOCEntryOffset overflows in small code model mode");
+
+ FixedValue = TOCEntryOffset;
+ }
assert(
(TargetObjectWriter->is64Bit() ||
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
index 7ea5506f11d6..11a24a6889f1 100644
--- a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -196,15 +196,9 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// Update zero registers.
MCPhysReg ZeroRegisterID =
WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
- if (IsWriteZero) {
- ZeroRegisters.setBit(ZeroRegisterID);
- for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
- ZeroRegisters.setBit(*I);
- } else {
- ZeroRegisters.clearBit(ZeroRegisterID);
- for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
- ZeroRegisters.clearBit(*I);
- }
+ ZeroRegisters.setBitVal(ZeroRegisterID, IsWriteZero);
+ for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+ ZeroRegisters.setBitVal(*I, IsWriteZero);
// If this is move has been eliminated, then the call to tryEliminateMove
// should have already updated all the register mappings.
@@ -233,10 +227,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
RegisterMappings[*I].second.AliasRegID = 0U;
}
- if (IsWriteZero)
- ZeroRegisters.setBit(*I);
- else
- ZeroRegisters.clearBit(*I);
+ ZeroRegisters.setBitVal(*I, IsWriteZero);
}
}
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
index 8730336c6669..31ea751f1c44 100644
--- a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -241,7 +241,7 @@ void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
}
uint64_t Scheduler::analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts) {
- Insts.insert(Insts.end(), ReadySet.begin(), ReadySet.end());
+ llvm::append_range(Insts, ReadySet);
return BusyResourceUnits;
}
diff --git a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
index 24e2a9d2f0ce..2bad13601718 100644
--- a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
@@ -259,8 +259,9 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
// the opcode descriptor (MCInstrDesc).
// 2. Uses start at index #(MCDesc.getNumDefs()).
// 3. There can only be a single optional register definition, an it is
- // always the last operand of the sequence (excluding extra operands
- // contributed by variadic opcodes).
+ // either the last operand of the sequence (excluding extra operands
+ // contributed by variadic opcodes) or one of the explicit register
+ // definitions. The latter occurs for some Thumb1 instructions.
//
// These assumptions work quite well for most out-of-order in-tree targets
// like x86. This is mainly because the vast majority of instructions is
@@ -308,12 +309,18 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
// The first NumExplicitDefs register operands are expected to be register
// definitions.
unsigned CurrentDef = 0;
+ unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
unsigned i = 0;
for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
const MCOperand &Op = MCI.getOperand(i);
if (!Op.isReg())
continue;
+ if (MCDesc.OpInfo[CurrentDef].isOptionalDef()) {
+ OptionalDefIdx = CurrentDef++;
+ continue;
+ }
+
WriteDescriptor &Write = ID.Writes[CurrentDef];
Write.OpIndex = i;
if (CurrentDef < NumWriteLatencyEntries) {
@@ -369,7 +376,7 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
if (MCDesc.hasOptionalDef()) {
WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
- Write.OpIndex = MCDesc.getNumOperands() - 1;
+ Write.OpIndex = OptionalDefIdx;
// Assign a default latency for this write.
Write.Latency = ID.MaxLatency;
Write.SClassOrWriteResourceID = 0;
@@ -518,7 +525,8 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
if (IsVariant) {
unsigned CPUID = SM.getProcessorID();
while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
- SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
+ SchedClassID =
+ STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
if (!SchedClassID) {
return make_error<InstructionError<MCInst>>(
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
index a0cdfb89c553..93e368123066 100644
--- a/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
@@ -30,8 +30,7 @@ Error InstructionTables::execute(InstRef &IR) {
if (!Resource.second.size())
continue;
unsigned Cycles = Resource.second.size();
- unsigned Index = std::distance(
- Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first));
+ unsigned Index = std::distance(Masks.begin(), find(Masks, Resource.first));
const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index);
unsigned NumUnits = ProcResource.NumUnits;
if (!ProcResource.SubUnitsIdxBegin) {
diff --git a/contrib/llvm-project/llvm/lib/Object/Archive.cpp b/contrib/llvm-project/llvm/lib/Object/Archive.cpp
index c18dd11a72cc..11c9de4455dd 100644
--- a/contrib/llvm-project/llvm/lib/Object/Archive.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/Archive.cpp
@@ -38,8 +38,8 @@ using namespace llvm;
using namespace object;
using namespace llvm::support::endian;
-static const char *const Magic = "!<arch>\n";
-static const char *const ThinMagic = "!<thin>\n";
+const char Magic[] = "!<arch>\n";
+const char ThinMagic[] = "!<thin>\n";
void Archive::anchor() {}
diff --git a/contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp
index 6f92c547164b..ce997464caa7 100644
--- a/contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
@@ -285,22 +286,12 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind,
print<uint32_t>(Out, Kind, Val);
}
-static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
- bool Deterministic, ArrayRef<MemberData> Members,
- StringRef StringTable) {
- // We don't write a symbol table on an archive with no members -- except on
- // Darwin, where the linker will abort unless the archive has a symbol table.
- if (StringTable.empty() && !isDarwin(Kind))
- return;
-
- unsigned NumSyms = 0;
- for (const MemberData &M : Members)
- NumSyms += M.Symbols.size();
-
- unsigned Size = 0;
- unsigned OffsetSize = is64BitKind(Kind) ? sizeof(uint64_t) : sizeof(uint32_t);
-
- Size += OffsetSize; // Number of entries
+static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
+ uint64_t NumSyms, uint64_t OffsetSize,
+ StringRef StringTable,
+ uint32_t *Padding = nullptr) {
+ assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize");
+ uint64_t Size = OffsetSize; // Number of entries
if (isBSDLike(Kind))
Size += NumSyms * OffsetSize * 2; // Table
else
@@ -312,10 +303,15 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly.
// We do this for all bsd formats because it simplifies aligning members.
- const Align Alignment(isBSDLike(Kind) ? 8 : 2);
- unsigned Pad = offsetToAlignment(Size, Alignment);
+ uint32_t Pad = offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2));
Size += Pad;
+ if (Padding)
+ *Padding = Pad;
+ return Size;
+}
+static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind,
+ bool Deterministic, uint64_t Size) {
if (isBSDLike(Kind)) {
const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF";
printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0,
@@ -324,6 +320,24 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
const char *Name = is64BitKind(Kind) ? "/SYM64" : "";
printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size);
}
+}
+
+static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
+ bool Deterministic, ArrayRef<MemberData> Members,
+ StringRef StringTable) {
+ // We don't write a symbol table on an archive with no members -- except on
+ // Darwin, where the linker will abort unless the archive has a symbol table.
+ if (StringTable.empty() && !isDarwin(Kind))
+ return;
+
+ unsigned NumSyms = 0;
+ for (const MemberData &M : Members)
+ NumSyms += M.Symbols.size();
+
+ uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4;
+ uint32_t Pad;
+ uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad);
+ writeSymbolTableHeader(Out, Kind, Deterministic, Size);
uint64_t Pos = Out.tell() + Size;
@@ -358,22 +372,21 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
// reference to it, thus SymbolicFile should be destroyed first.
LLVMContext Context;
std::unique_ptr<object::SymbolicFile> Obj;
- if (identify_magic(Buf.getBuffer()) == file_magic::bitcode) {
+
+ const file_magic Type = identify_magic(Buf.getBuffer());
+ // Treat unsupported file types as having no symbols.
+ if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
+ return Ret;
+ if (Type == file_magic::bitcode) {
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
Buf, file_magic::bitcode, &Context);
- if (!ObjOrErr) {
- // FIXME: check only for "not an object file" errors.
- consumeError(ObjOrErr.takeError());
- return Ret;
- }
+ if (!ObjOrErr)
+ return ObjOrErr.takeError();
Obj = std::move(*ObjOrErr);
} else {
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
- if (!ObjOrErr) {
- // FIXME: check only for "not an object file" errors.
- consumeError(ObjOrErr.takeError());
- return Ret;
- }
+ if (!ObjOrErr)
+ return ObjOrErr.takeError();
Obj = std::move(*ObjOrErr);
}
@@ -392,7 +405,7 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
static Expected<std::vector<MemberData>>
computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
object::Archive::Kind Kind, bool Thin, bool Deterministic,
- ArrayRef<NewArchiveMember> NewMembers) {
+ bool NeedSymbols, ArrayRef<NewArchiveMember> NewMembers) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
// This ignores the symbol table, but we only need the value mod 8 and the
@@ -493,13 +506,17 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
ModTime, Size);
Out.flush();
- Expected<std::vector<unsigned>> Symbols =
- getSymbols(Buf, SymNames, HasObject);
- if (auto E = Symbols.takeError())
- return std::move(E);
+ std::vector<unsigned> Symbols;
+ if (NeedSymbols) {
+ Expected<std::vector<unsigned>> SymbolsOrErr =
+ getSymbols(Buf, SymNames, HasObject);
+ if (auto E = SymbolsOrErr.takeError())
+ return std::move(E);
+ Symbols = std::move(*SymbolsOrErr);
+ }
Pos += Header.size() + Data.size() + Padding.size();
- Ret.push_back({std::move(*Symbols), std::move(Header), Data, Padding});
+ Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding});
}
// If there are no symbols, emit an empty symbol table, to satisfy Solaris
// tools, older versions of which expect a symbol table in a non-empty
@@ -552,10 +569,10 @@ Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
return std::string(Relative.str());
}
-Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
- bool WriteSymtab, object::Archive::Kind Kind,
- bool Deterministic, bool Thin,
- std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
+static Error writeArchiveToStream(raw_ostream &Out,
+ ArrayRef<NewArchiveMember> NewMembers,
+ bool WriteSymtab, object::Archive::Kind Kind,
+ bool Deterministic, bool Thin) {
assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
SmallString<0> SymNamesBuf;
@@ -563,8 +580,9 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
SmallString<0> StringTableBuf;
raw_svector_ostream StringTable(StringTableBuf);
- Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
- StringTable, SymNames, Kind, Thin, Deterministic, NewMembers);
+ Expected<std::vector<MemberData>> DataOrErr =
+ computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic,
+ WriteSymtab, NewMembers);
if (Error E = DataOrErr.takeError())
return E;
std::vector<MemberData> &Data = *DataOrErr;
@@ -574,17 +592,28 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
// We would like to detect if we need to switch to a 64-bit symbol table.
if (WriteSymtab) {
- uint64_t MaxOffset = 0;
+ uint64_t MaxOffset = 8; // For the file signature.
uint64_t LastOffset = MaxOffset;
+ uint64_t NumSyms = 0;
for (const auto &M : Data) {
// Record the start of the member's offset
LastOffset = MaxOffset;
// Account for the size of each part associated with the member.
MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size();
- // We assume 32-bit symbols to see if 32-bit symbols are possible or not.
- MaxOffset += M.Symbols.size() * 4;
+ NumSyms += M.Symbols.size();
}
+ // We assume 32-bit offsets to see if 32-bit symbols are possible or not.
+ uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf);
+ auto computeSymbolTableHeaderSize =
+ [=] {
+ SmallString<0> TmpBuf;
+ raw_svector_ostream Tmp(TmpBuf);
+ writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize);
+ return TmpBuf.size();
+ };
+ LastOffset += computeSymbolTableHeaderSize() + SymtabSize;
+
// The SYM64 format is used when an archive's member offsets are larger than
// 32-bits can hold. The need for this shift in format is detected by
// writeArchive. To test this we need to generate a file with a member that
@@ -592,15 +621,15 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
// speed the test up we use this environment variable to pretend like the
// cutoff happens before 32-bits and instead happens at some much smaller
// value.
+ uint64_t Sym64Threshold = 1ULL << 32;
const char *Sym64Env = std::getenv("SYM64_THRESHOLD");
- int Sym64Threshold = 32;
if (Sym64Env)
StringRef(Sym64Env).getAsInteger(10, Sym64Threshold);
// If LastOffset isn't going to fit in a 32-bit varible we need to switch
// to 64-bit. Note that the file can be larger than 4GB as long as the last
// member starts before the 4GB offset.
- if (LastOffset >= (1ULL << Sym64Threshold)) {
+ if (LastOffset >= Sym64Threshold) {
if (Kind == object::Archive::K_DARWIN)
Kind = object::Archive::K_DARWIN64;
else
@@ -608,12 +637,6 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
}
}
- Expected<sys::fs::TempFile> Temp =
- sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
- if (!Temp)
- return Temp.takeError();
-
- raw_fd_ostream Out(Temp->FD, false);
if (Thin)
Out << "!<thin>\n";
else
@@ -626,6 +649,25 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
Out << M.Header << M.Data << M.Padding;
Out.flush();
+ return Error::success();
+}
+
+Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
+ bool WriteSymtab, object::Archive::Kind Kind,
+ bool Deterministic, bool Thin,
+ std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
+ Expected<sys::fs::TempFile> Temp =
+ sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
+ if (!Temp)
+ return Temp.takeError();
+ raw_fd_ostream Out(Temp->FD, false);
+
+ if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind,
+ Deterministic, Thin)) {
+ if (Error DiscardError = Temp->discard())
+ return joinErrors(std::move(E), std::move(DiscardError));
+ return E;
+ }
// At this point, we no longer need whatever backing memory
// was used to generate the NewMembers. On Windows, this buffer
@@ -642,4 +684,19 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
return Temp->keep(ArcName);
}
+Expected<std::unique_ptr<MemoryBuffer>>
+writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab,
+ object::Archive::Kind Kind, bool Deterministic,
+ bool Thin) {
+ SmallVector<char, 0> ArchiveBufferVector;
+ raw_svector_ostream ArchiveStream(ArchiveBufferVector);
+
+ if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab,
+ Kind, Deterministic, Thin))
+ return std::move(E);
+
+ return std::make_unique<SmallVectorMemoryBuffer>(
+ std::move(ArchiveBufferVector));
+}
+
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Object/Binary.cpp b/contrib/llvm-project/llvm/lib/Object/Binary.cpp
index 944d2bc1bca7..e741cbba2882 100644
--- a/contrib/llvm-project/llvm/lib/Object/Binary.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/Binary.cpp
@@ -44,7 +44,8 @@ StringRef Binary::getFileName() const { return Data.getBufferIdentifier(); }
MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; }
Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
- LLVMContext *Context) {
+ LLVMContext *Context,
+ bool InitContent) {
file_magic Type = identify_magic(Buffer.getBuffer());
switch (Type) {
@@ -73,7 +74,7 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
case file_magic::xcoff_object_32:
case file_magic::xcoff_object_64:
case file_magic::wasm_object:
- return ObjectFile::createSymbolicFile(Buffer, Type, Context);
+ return ObjectFile::createSymbolicFile(Buffer, Type, Context, InitContent);
case file_magic::macho_universal_binary:
return MachOUniversalBinary::create(Buffer);
case file_magic::windows_resource:
@@ -93,7 +94,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
llvm_unreachable("Unexpected Binary File Type");
}
-Expected<OwningBinary<Binary>> object::createBinary(StringRef Path) {
+Expected<OwningBinary<Binary>>
+object::createBinary(StringRef Path, LLVMContext *Context, bool InitContent) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
/*RequiresNullTerminator=*/false);
@@ -102,7 +104,7 @@ Expected<OwningBinary<Binary>> object::createBinary(StringRef Path) {
std::unique_ptr<MemoryBuffer> &Buffer = FileOrErr.get();
Expected<std::unique_ptr<Binary>> BinOrErr =
- createBinary(Buffer->getMemBufferRef());
+ createBinary(Buffer->getMemBufferRef(), Context, InitContent);
if (!BinOrErr)
return BinOrErr.takeError();
std::unique_ptr<Binary> &Bin = BinOrErr.get();
diff --git a/contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp
index c26d7721b3fe..6e9a8eb35dcf 100644
--- a/contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp
@@ -28,8 +28,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include <algorithm>
#include <cassert>
+#include <cinttypes>
#include <cstddef>
-#include <cstdint>
#include <cstring>
#include <limits>
#include <memory>
@@ -57,7 +57,7 @@ static bool checkSize(MemoryBufferRef M, std::error_code &EC, uint64_t Size) {
template <typename T>
static Error getObject(const T *&Obj, MemoryBufferRef M, const void *Ptr,
const uint64_t Size = sizeof(T)) {
- uintptr_t Addr = uintptr_t(Ptr);
+ uintptr_t Addr = reinterpret_cast<uintptr_t>(Ptr);
if (Error E = Binary::checkOffset(M, Addr, Size))
return E;
Obj = reinterpret_cast<const T *>(Addr);
@@ -103,10 +103,11 @@ const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const {
const coff_symbol_type *Addr =
reinterpret_cast<const coff_symbol_type *>(Ref.p);
- assert(!checkOffset(Data, uintptr_t(Addr), sizeof(*Addr)));
+ assert(!checkOffset(Data, reinterpret_cast<uintptr_t>(Addr), sizeof(*Addr)));
#ifndef NDEBUG
// Verify that the symbol points to a valid entry in the symbol table.
- uintptr_t Offset = uintptr_t(Addr) - uintptr_t(base());
+ uintptr_t Offset =
+ reinterpret_cast<uintptr_t>(Addr) - reinterpret_cast<uintptr_t>(base());
assert((Offset - getPointerToSymbolTable()) % sizeof(coff_symbol_type) == 0 &&
"Symbol did not point to the beginning of a symbol");
@@ -123,7 +124,8 @@ const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const {
if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections()))
report_fatal_error("Section was outside of section table.");
- uintptr_t Offset = uintptr_t(Addr) - uintptr_t(SectionTable);
+ uintptr_t Offset = reinterpret_cast<uintptr_t>(Addr) -
+ reinterpret_cast<uintptr_t>(SectionTable);
assert(Offset % sizeof(coff_section) == 0 &&
"Section did not point to the beginning of a section");
#endif
@@ -332,7 +334,7 @@ bool COFFObjectFile::isDebugSection(StringRef SectionName) const {
unsigned COFFObjectFile::getSectionID(SectionRef Sec) const {
uintptr_t Offset =
- uintptr_t(Sec.getRawDataRefImpl().p) - uintptr_t(SectionTable);
+ Sec.getRawDataRefImpl().p - reinterpret_cast<uintptr_t>(SectionTable);
assert((Offset % sizeof(coff_section)) == 0);
return (Offset / sizeof(coff_section)) + 1;
}
@@ -376,7 +378,7 @@ getFirstReloc(const coff_section *Sec, MemoryBufferRef M, const uint8_t *Base) {
// relocations.
begin++;
}
- if (auto E = Binary::checkOffset(M, uintptr_t(begin),
+ if (auto E = Binary::checkOffset(M, reinterpret_cast<uintptr_t>(begin),
sizeof(coff_relocation) * NumRelocs)) {
consumeError(std::move(E));
return nullptr;
@@ -467,7 +469,8 @@ Error COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const {
uint32_t SectionEnd = Section->VirtualAddress + Section->VirtualSize;
if (SectionStart <= Addr && Addr < SectionEnd) {
uint32_t Offset = Addr - SectionStart;
- Res = uintptr_t(base()) + Section->PointerToRawData + Offset;
+ Res = reinterpret_cast<uintptr_t>(base()) + Section->PointerToRawData +
+ Offset;
return Error::success();
}
}
@@ -484,8 +487,8 @@ Error COFFObjectFile::getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size,
uint32_t OffsetIntoSection = RVA - SectionStart;
if (SectionStart <= RVA && OffsetIntoSection < Section->VirtualSize &&
Size <= Section->VirtualSize - OffsetIntoSection) {
- uintptr_t Begin =
- uintptr_t(base()) + Section->PointerToRawData + OffsetIntoSection;
+ uintptr_t Begin = reinterpret_cast<uintptr_t>(base()) +
+ Section->PointerToRawData + OffsetIntoSection;
Contents =
ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Begin), Size);
return Error::success();
@@ -649,6 +652,38 @@ Error COFFObjectFile::initDebugDirectoryPtr() {
return Error::success();
}
+Error COFFObjectFile::initTLSDirectoryPtr() {
+ // Get the RVA of the TLS directory. Do nothing if it does not exist.
+ const data_directory *DataEntry = getDataDirectory(COFF::TLS_TABLE);
+ if (!DataEntry)
+ return Error::success();
+
+ // Do nothing if the RVA is NULL.
+ if (DataEntry->RelativeVirtualAddress == 0)
+ return Error::success();
+
+ uint64_t DirSize =
+ is64() ? sizeof(coff_tls_directory64) : sizeof(coff_tls_directory32);
+
+ // Check that the size is correct.
+ if (DataEntry->Size != DirSize)
+ return createStringError(
+ object_error::parse_failed,
+ "TLS Directory size (%u) is not the expected size (%" PRIu64 ").",
+ static_cast<uint32_t>(DataEntry->Size), DirSize);
+
+ uintptr_t IntPtr = 0;
+ if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
+ return E;
+
+ if (is64())
+ TLSDirectory64 = reinterpret_cast<const coff_tls_directory64 *>(IntPtr);
+ else
+ TLSDirectory32 = reinterpret_cast<const coff_tls_directory32 *>(IntPtr);
+
+ return Error::success();
+}
+
Error COFFObjectFile::initLoadConfigPtr() {
// Get the RVA of the debug directory. Do nothing if it does not exist.
const data_directory *DataEntry = getDataDirectory(COFF::LOAD_CONFIG_TABLE);
@@ -682,7 +717,8 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object)
ImportDirectory(nullptr), DelayImportDirectory(nullptr),
NumberOfDelayImportDirectory(0), ExportDirectory(nullptr),
BaseRelocHeader(nullptr), BaseRelocEnd(nullptr),
- DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr) {}
+ DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr),
+ TLSDirectory32(nullptr), TLSDirectory64(nullptr) {}
Error COFFObjectFile::initialize() {
// Check that we at least have enough room for a header.
@@ -809,10 +845,14 @@ Error COFFObjectFile::initialize() {
if (Error E = initBaseRelocPtr())
return E;
- // Initialize the pointer to the export table.
+ // Initialize the pointer to the debug directory.
if (Error E = initDebugDirectoryPtr())
return E;
+ // Initialize the pointer to the TLS directory.
+ if (Error E = initTLSDirectoryPtr())
+ return E;
+
if (Error E = initLoadConfigPtr())
return E;
@@ -1090,7 +1130,8 @@ Error COFFObjectFile::getSectionContents(const coff_section *Sec,
// The only thing that we need to verify is that the contents is contained
// within the file bounds. We don't need to make sure it doesn't cover other
// data, as there's nothing that says that is not allowed.
- uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData;
+ uintptr_t ConStart =
+ reinterpret_cast<uintptr_t>(base()) + Sec->PointerToRawData;
uint32_t SectionSize = getSectionSize(Sec);
if (Error E = checkOffset(Data, ConStart, SectionSize))
return E;
@@ -1750,10 +1791,9 @@ Error ResourceSectionRef::load(const COFFObjectFile *O, const SectionRef &S) {
Relocs.reserve(OrigRelocs.size());
for (const coff_relocation &R : OrigRelocs)
Relocs.push_back(&R);
- std::sort(Relocs.begin(), Relocs.end(),
- [](const coff_relocation *A, const coff_relocation *B) {
- return A->VirtualAddress < B->VirtualAddress;
- });
+ llvm::sort(Relocs, [](const coff_relocation *A, const coff_relocation *B) {
+ return A->VirtualAddress < B->VirtualAddress;
+ });
return Error::success();
}
diff --git a/contrib/llvm-project/llvm/lib/Object/ELF.cpp b/contrib/llvm-project/llvm/lib/Object/ELF.cpp
index 2515695095a1..264f115ddbb5 100644
--- a/contrib/llvm-project/llvm/lib/Object/ELF.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ELF.cpp
@@ -152,6 +152,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
break;
}
break;
+ case ELF::EM_CSKY:
+ switch (Type) {
+#include "llvm/BinaryFormat/ELFRelocs/CSKY.def"
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -194,6 +201,8 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) {
case ELF::EM_SPARC32PLUS:
case ELF::EM_SPARCV9:
return ELF::R_SPARC_RELATIVE;
+ case ELF::EM_CSKY:
+ return ELF::R_CKCORE_RELATIVE;
case ELF::EM_AMDGPU:
break;
case ELF::EM_BPF:
@@ -267,6 +276,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_EHDR);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_PHDR);
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef);
@@ -278,7 +288,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
}
template <class ELFT>
-Expected<std::vector<typename ELFT::Rela>>
+std::vector<typename ELFT::Rel>
ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const {
// This function decodes the contents of an SHT_RELR packed relocation
// section.
@@ -310,11 +320,10 @@ ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const {
// even means address, odd means bitmap.
// 2. Just a simple list of addresses is a valid encoding.
- Elf_Rela Rela;
- Rela.r_info = 0;
- Rela.r_addend = 0;
- Rela.setType(getRelativeRelocationType(), false);
- std::vector<Elf_Rela> Relocs;
+ Elf_Rel Rel;
+ Rel.r_info = 0;
+ Rel.setType(getRelativeRelocationType(), false);
+ std::vector<Elf_Rel> Relocs;
// Word type: uint32_t for Elf32, and uint64_t for Elf64.
typedef typename ELFT::uint Word;
@@ -331,8 +340,8 @@ ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const {
Word Entry = R;
if ((Entry&1) == 0) {
// Even entry: encodes the offset for next relocation.
- Rela.r_offset = Entry;
- Relocs.push_back(Rela);
+ Rel.r_offset = Entry;
+ Relocs.push_back(Rel);
// Set base offset for subsequent bitmap entries.
Base = Entry + WordSize;
continue;
@@ -343,8 +352,8 @@ ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const {
while (Entry != 0) {
Entry >>= 1;
if ((Entry&1) != 0) {
- Rela.r_offset = Offset;
- Relocs.push_back(Rela);
+ Rel.r_offset = Offset;
+ Relocs.push_back(Rel);
}
Offset += WordSize;
}
@@ -358,7 +367,7 @@ ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const {
template <class ELFT>
Expected<std::vector<typename ELFT::Rela>>
-ELFFile<ELFT>::android_relas(const Elf_Shdr *Sec) const {
+ELFFile<ELFT>::android_relas(const Elf_Shdr &Sec) const {
// This function reads relocations in Android's packed relocation format,
// which is based on SLEB128 and delta encoding.
Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec);
@@ -503,7 +512,7 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
template <class ELFT>
std::string ELFFile<ELFT>::getDynamicTagAsString(uint64_t Type) const {
- return getDynamicTagAsString(getHeader()->e_machine, Type);
+ return getDynamicTagAsString(getHeader().e_machine, Type);
}
template <class ELFT>
@@ -533,7 +542,7 @@ Expected<typename ELFT::DynRange> ELFFile<ELFT>::dynamicEntries() const {
for (const Elf_Shdr &Sec : *SectionsOrError) {
if (Sec.sh_type == ELF::SHT_DYNAMIC) {
Expected<ArrayRef<Elf_Dyn>> DynOrError =
- getSectionContentsAsArray<Elf_Dyn>(&Sec);
+ getSectionContentsAsArray<Elf_Dyn>(Sec);
if (!DynOrError)
return DynOrError.takeError();
Dyn = *DynOrError;
@@ -557,7 +566,8 @@ Expected<typename ELFT::DynRange> ELFFile<ELFT>::dynamicEntries() const {
}
template <class ELFT>
-Expected<const uint8_t *> ELFFile<ELFT>::toMappedAddr(uint64_t VAddr) const {
+Expected<const uint8_t *>
+ELFFile<ELFT>::toMappedAddr(uint64_t VAddr, WarningHandler WarnHandler) const {
auto ProgramHeadersOrError = program_headers();
if (!ProgramHeadersOrError)
return ProgramHeadersOrError.takeError();
@@ -568,11 +578,21 @@ Expected<const uint8_t *> ELFFile<ELFT>::toMappedAddr(uint64_t VAddr) const {
if (Phdr.p_type == ELF::PT_LOAD)
LoadSegments.push_back(const_cast<Elf_Phdr *>(&Phdr));
- const Elf_Phdr *const *I =
- std::upper_bound(LoadSegments.begin(), LoadSegments.end(), VAddr,
- [](uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) {
- return VAddr < Phdr->p_vaddr;
- });
+ auto SortPred = [](const Elf_Phdr_Impl<ELFT> *A,
+ const Elf_Phdr_Impl<ELFT> *B) {
+ return A->p_vaddr < B->p_vaddr;
+ };
+ if (!llvm::is_sorted(LoadSegments, SortPred)) {
+ if (Error E =
+ WarnHandler("loadable segments are unsorted by virtual address"))
+ return std::move(E);
+ llvm::stable_sort(LoadSegments, SortPred);
+ }
+
+ const Elf_Phdr *const *I = llvm::upper_bound(
+ LoadSegments, VAddr, [](uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) {
+ return VAddr < Phdr->p_vaddr;
+ });
if (I == LoadSegments.begin())
return createError("virtual address is not in any segment: 0x" +
diff --git a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
index c919d25855d2..91871a6255dc 100644
--- a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
@@ -61,35 +61,36 @@ ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source)
template <class ELFT>
static Expected<std::unique_ptr<ELFObjectFile<ELFT>>>
-createPtr(MemoryBufferRef Object) {
- auto Ret = ELFObjectFile<ELFT>::create(Object);
+createPtr(MemoryBufferRef Object, bool InitContent) {
+ auto Ret = ELFObjectFile<ELFT>::create(Object, InitContent);
if (Error E = Ret.takeError())
return std::move(E);
return std::make_unique<ELFObjectFile<ELFT>>(std::move(*Ret));
}
Expected<std::unique_ptr<ObjectFile>>
-ObjectFile::createELFObjectFile(MemoryBufferRef Obj) {
+ObjectFile::createELFObjectFile(MemoryBufferRef Obj, bool InitContent) {
std::pair<unsigned char, unsigned char> Ident =
getElfArchType(Obj.getBuffer());
std::size_t MaxAlignment =
- 1ULL << countTrailingZeros(uintptr_t(Obj.getBufferStart()));
+ 1ULL << countTrailingZeros(
+ reinterpret_cast<uintptr_t>(Obj.getBufferStart()));
if (MaxAlignment < 2)
return createError("Insufficient alignment");
if (Ident.first == ELF::ELFCLASS32) {
if (Ident.second == ELF::ELFDATA2LSB)
- return createPtr<ELF32LE>(Obj);
+ return createPtr<ELF32LE>(Obj, InitContent);
else if (Ident.second == ELF::ELFDATA2MSB)
- return createPtr<ELF32BE>(Obj);
+ return createPtr<ELF32BE>(Obj, InitContent);
else
return createError("Invalid ELF data");
} else if (Ident.first == ELF::ELFCLASS64) {
if (Ident.second == ELF::ELFDATA2LSB)
- return createPtr<ELF64LE>(Obj);
+ return createPtr<ELF64LE>(Obj, InitContent);
else if (Ident.second == ELF::ELFDATA2MSB)
- return createPtr<ELF64BE>(Obj);
+ return createPtr<ELF64BE>(Obj, InitContent);
else
return createError("Invalid ELF data");
}
@@ -355,6 +356,130 @@ SubtargetFeatures ELFObjectFileBase::getFeatures() const {
}
}
+Optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
+ switch (getEMachine()) {
+ case ELF::EM_AMDGPU:
+ return getAMDGPUCPUName();
+ default:
+ return None;
+ }
+}
+
+StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
+ assert(getEMachine() == ELF::EM_AMDGPU);
+ unsigned CPU = getPlatformFlags() & ELF::EF_AMDGPU_MACH;
+
+ switch (CPU) {
+ // Radeon HD 2000/3000 Series (R600).
+ case ELF::EF_AMDGPU_MACH_R600_R600:
+ return "r600";
+ case ELF::EF_AMDGPU_MACH_R600_R630:
+ return "r630";
+ case ELF::EF_AMDGPU_MACH_R600_RS880:
+ return "rs880";
+ case ELF::EF_AMDGPU_MACH_R600_RV670:
+ return "rv670";
+
+ // Radeon HD 4000 Series (R700).
+ case ELF::EF_AMDGPU_MACH_R600_RV710:
+ return "rv710";
+ case ELF::EF_AMDGPU_MACH_R600_RV730:
+ return "rv730";
+ case ELF::EF_AMDGPU_MACH_R600_RV770:
+ return "rv770";
+
+ // Radeon HD 5000 Series (Evergreen).
+ case ELF::EF_AMDGPU_MACH_R600_CEDAR:
+ return "cedar";
+ case ELF::EF_AMDGPU_MACH_R600_CYPRESS:
+ return "cypress";
+ case ELF::EF_AMDGPU_MACH_R600_JUNIPER:
+ return "juniper";
+ case ELF::EF_AMDGPU_MACH_R600_REDWOOD:
+ return "redwood";
+ case ELF::EF_AMDGPU_MACH_R600_SUMO:
+ return "sumo";
+
+ // Radeon HD 6000 Series (Northern Islands).
+ case ELF::EF_AMDGPU_MACH_R600_BARTS:
+ return "barts";
+ case ELF::EF_AMDGPU_MACH_R600_CAICOS:
+ return "caicos";
+ case ELF::EF_AMDGPU_MACH_R600_CAYMAN:
+ return "cayman";
+ case ELF::EF_AMDGPU_MACH_R600_TURKS:
+ return "turks";
+
+ // AMDGCN GFX6.
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600:
+ return "gfx600";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601:
+ return "gfx601";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602:
+ return "gfx602";
+
+ // AMDGCN GFX7.
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700:
+ return "gfx700";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701:
+ return "gfx701";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702:
+ return "gfx702";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703:
+ return "gfx703";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704:
+ return "gfx704";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705:
+ return "gfx705";
+
+ // AMDGCN GFX8.
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801:
+ return "gfx801";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802:
+ return "gfx802";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803:
+ return "gfx803";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805:
+ return "gfx805";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810:
+ return "gfx810";
+
+ // AMDGCN GFX9.
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900:
+ return "gfx900";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902:
+ return "gfx902";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904:
+ return "gfx904";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906:
+ return "gfx906";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908:
+ return "gfx908";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909:
+ return "gfx909";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C:
+ return "gfx90c";
+
+ // AMDGCN GFX10.
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
+ return "gfx1010";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011:
+ return "gfx1011";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012:
+ return "gfx1012";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030:
+ return "gfx1030";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031:
+ return "gfx1031";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032:
+ return "gfx1032";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033:
+ return "gfx1033";
+ default:
+ llvm_unreachable("Unknown EF_AMDGPU_MACH value");
+ }
+}
+
// FIXME Encode from a tablegen description or target parser.
void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
if (TheTriple.getSubArch() != Triple::NoSubArch)
@@ -440,7 +565,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
TheTriple.setArchName(Triple);
}
-std::vector<std::pair<DataRefImpl, uint64_t>>
+std::vector<std::pair<Optional<DataRefImpl>, uint64_t>>
ELFObjectFileBase::getPltAddresses() const {
std::string Err;
const auto Triple = makeTriple();
@@ -498,14 +623,18 @@ ELFObjectFileBase::getPltAddresses() const {
GotToPlt.insert(std::make_pair(Entry.second, Entry.first));
// Find the relocations in the dynamic relocation table that point to
// locations in the GOT for which we know the corresponding PLT entry.
- std::vector<std::pair<DataRefImpl, uint64_t>> Result;
+ std::vector<std::pair<Optional<DataRefImpl>, uint64_t>> Result;
for (const auto &Relocation : RelaPlt->relocations()) {
if (Relocation.getType() != JumpSlotReloc)
continue;
auto PltEntryIter = GotToPlt.find(Relocation.getOffset());
- if (PltEntryIter != GotToPlt.end())
- Result.push_back(std::make_pair(
- Relocation.getSymbol()->getRawDataRefImpl(), PltEntryIter->second));
+ if (PltEntryIter != GotToPlt.end()) {
+ symbol_iterator Sym = Relocation.getSymbol();
+ if (Sym == symbol_end())
+ Result.emplace_back(None, PltEntryIter->second);
+ else
+ Result.emplace_back(Sym->getRawDataRefImpl(), PltEntryIter->second);
+ }
}
return Result;
}
diff --git a/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp
index 4d85e6f40ec4..302255926289 100644
--- a/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp
@@ -1596,6 +1596,9 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
if ((Err = checkTwoLevelHintsCommand(*this, Load, I,
&TwoLevelHintsLoadCmd, Elements)))
return;
+ } else if (Load.C.cmd == MachO::LC_IDENT) {
+ // Note: LC_IDENT is ignored.
+ continue;
} else if (isLoadCommandObsolete(Load.C.cmd)) {
Err = malformedError("load command " + Twine(I) + " for cmd value of: " +
Twine(Load.C.cmd) + " is obsolete and not "
@@ -2032,7 +2035,9 @@ bool MachOObjectFile::isSectionBSS(DataRefImpl Sec) const {
bool MachOObjectFile::isDebugSection(StringRef SectionName) const {
return SectionName.startswith("__debug") ||
- SectionName.startswith("__zdebug") || SectionName == "__gdb_index";
+ SectionName.startswith("__zdebug") ||
+ SectionName.startswith("__apple") || SectionName == "__gdb_index" ||
+ SectionName == "__swift_ast";
}
unsigned MachOObjectFile::getSectionID(SectionRef Sec) const {
@@ -2689,6 +2694,12 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
if (ArchFlag)
*ArchFlag = "arm64";
return Triple("arm64-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM64E:
+ if (McpuDefault)
+ *McpuDefault = "apple-a12";
+ if (ArchFlag)
+ *ArchFlag = "arm64e";
+ return Triple("arm64e-apple-darwin");
default:
return Triple();
}
@@ -2732,17 +2743,32 @@ Triple MachOObjectFile::getHostArch() {
bool MachOObjectFile::isValidArch(StringRef ArchFlag) {
auto validArchs = getValidArchs();
- return llvm::find(validArchs, ArchFlag) != validArchs.end();
+ return llvm::is_contained(validArchs, ArchFlag);
}
ArrayRef<StringRef> MachOObjectFile::getValidArchs() {
- static const std::array<StringRef, 17> validArchs = {{
- "i386", "x86_64", "x86_64h", "armv4t", "arm", "armv5e",
- "armv6", "armv6m", "armv7", "armv7em", "armv7k", "armv7m",
- "armv7s", "arm64", "arm64_32", "ppc", "ppc64",
+ static const std::array<StringRef, 18> ValidArchs = {{
+ "i386",
+ "x86_64",
+ "x86_64h",
+ "armv4t",
+ "arm",
+ "armv5e",
+ "armv6",
+ "armv6m",
+ "armv7",
+ "armv7em",
+ "armv7k",
+ "armv7m",
+ "armv7s",
+ "arm64",
+ "arm64e",
+ "arm64_32",
+ "ppc",
+ "ppc64",
}};
- return validArchs;
+ return ValidArchs;
}
Triple::ArchType MachOObjectFile::getArch() const {
diff --git a/contrib/llvm-project/llvm/lib/Object/MachOUniversal.cpp b/contrib/llvm-project/llvm/lib/Object/MachOUniversal.cpp
index a178ecde949e..f3ce005e6ef9 100644
--- a/contrib/llvm-project/llvm/lib/Object/MachOUniversal.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/MachOUniversal.cpp
@@ -12,6 +12,7 @@
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/Archive.h"
+#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
@@ -80,6 +81,25 @@ MachOUniversalBinary::ObjectForArch::getAsObjectFile() const {
return ObjectFile::createMachOObjectFile(ObjBuffer, cputype, Index);
}
+Expected<std::unique_ptr<IRObjectFile>>
+MachOUniversalBinary::ObjectForArch::getAsIRObject(LLVMContext &Ctx) const {
+ if (!Parent)
+ report_fatal_error("MachOUniversalBinary::ObjectForArch::getAsIRObject() "
+ "called when Parent is a nullptr");
+
+ StringRef ParentData = Parent->getData();
+ StringRef ObjectData;
+ if (Parent->getMagic() == MachO::FAT_MAGIC) {
+ ObjectData = ParentData.substr(Header.offset, Header.size);
+ } else { // Parent->getMagic() == MachO::FAT_MAGIC_64
+ ObjectData = ParentData.substr(Header64.offset, Header64.size);
+ }
+ StringRef ObjectName = Parent->getFileName();
+ MemoryBufferRef ObjBuffer(ObjectData, ObjectName);
+
+ return IRObjectFile::create(ObjBuffer, Ctx);
+}
+
Expected<std::unique_ptr<Archive>>
MachOUniversalBinary::ObjectForArch::getAsArchive() const {
if (!Parent)
@@ -234,6 +254,15 @@ MachOUniversalBinary::getMachOObjectForArch(StringRef ArchName) const {
return O->getAsObjectFile();
}
+Expected<std::unique_ptr<IRObjectFile>>
+MachOUniversalBinary::getIRObjectForArch(StringRef ArchName,
+ LLVMContext &Ctx) const {
+ Expected<ObjectForArch> O = getObjectForArch(ArchName);
+ if (!O)
+ return O.takeError();
+ return O->getAsIRObject(Ctx);
+}
+
Expected<std::unique_ptr<Archive>>
MachOUniversalBinary::getArchiveForArch(StringRef ArchName) const {
Expected<ObjectForArch> O = getObjectForArch(ArchName);
diff --git a/contrib/llvm-project/llvm/lib/Object/MachOUniversalWriter.cpp b/contrib/llvm-project/llvm/lib/Object/MachOUniversalWriter.cpp
new file mode 100644
index 000000000000..4bb467e56a6f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Object/MachOUniversalWriter.cpp
@@ -0,0 +1,337 @@
+//===- MachOUniversalWriter.cpp - MachO universal binary writer---*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the Slice class and writeUniversalBinary function for writing a MachO
+// universal binary file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOUniversalWriter.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
+
+using namespace llvm;
+using namespace object;
+
+// For compatibility with cctools lipo, a file's alignment is calculated as the
+// minimum aligment of all segments. For object files, the file's alignment is
+// the maximum alignment of its sections.
+static uint32_t calculateFileAlignment(const MachOObjectFile &O) {
+ uint32_t P2CurrentAlignment;
+ uint32_t P2MinAlignment = MachOUniversalBinary::MaxSectionAlignment;
+ const bool Is64Bit = O.is64Bit();
+
+ for (const auto &LC : O.load_commands()) {
+ if (LC.C.cmd != (Is64Bit ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT))
+ continue;
+ if (O.getHeader().filetype == MachO::MH_OBJECT) {
+ unsigned NumberOfSections =
+ (Is64Bit ? O.getSegment64LoadCommand(LC).nsects
+ : O.getSegmentLoadCommand(LC).nsects);
+ P2CurrentAlignment = NumberOfSections ? 2 : P2MinAlignment;
+ for (unsigned SI = 0; SI < NumberOfSections; ++SI) {
+ P2CurrentAlignment = std::max(P2CurrentAlignment,
+ (Is64Bit ? O.getSection64(LC, SI).align
+ : O.getSection(LC, SI).align));
+ }
+ } else {
+ P2CurrentAlignment =
+ countTrailingZeros(Is64Bit ? O.getSegment64LoadCommand(LC).vmaddr
+ : O.getSegmentLoadCommand(LC).vmaddr);
+ }
+ P2MinAlignment = std::min(P2MinAlignment, P2CurrentAlignment);
+ }
+ // return a value >= 4 byte aligned, and less than MachO MaxSectionAlignment
+ return std::max(
+ static_cast<uint32_t>(2),
+ std::min(P2MinAlignment, static_cast<uint32_t>(
+ MachOUniversalBinary::MaxSectionAlignment)));
+}
+
+static uint32_t calculateAlignment(const MachOObjectFile &ObjectFile) {
+ switch (ObjectFile.getHeader().cputype) {
+ case MachO::CPU_TYPE_I386:
+ case MachO::CPU_TYPE_X86_64:
+ case MachO::CPU_TYPE_POWERPC:
+ case MachO::CPU_TYPE_POWERPC64:
+ return 12; // log2 value of page size(4k) for x86 and PPC
+ case MachO::CPU_TYPE_ARM:
+ case MachO::CPU_TYPE_ARM64:
+ case MachO::CPU_TYPE_ARM64_32:
+ return 14; // log2 value of page size(16k) for Darwin ARM
+ default:
+ return calculateFileAlignment(ObjectFile);
+ }
+}
+
+Slice::Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType,
+ std::string ArchName, uint32_t Align)
+ : B(&A), CPUType(CPUType), CPUSubType(CPUSubType),
+ ArchName(std::move(ArchName)), P2Alignment(Align) {}
+
+Slice::Slice(const MachOObjectFile &O, uint32_t Align)
+ : B(&O), CPUType(O.getHeader().cputype),
+ CPUSubType(O.getHeader().cpusubtype),
+ ArchName(std::string(O.getArchTriple().getArchName())),
+ P2Alignment(Align) {}
+
+Slice::Slice(const IRObjectFile &IRO, uint32_t CPUType, uint32_t CPUSubType,
+ std::string ArchName, uint32_t Align)
+ : B(&IRO), CPUType(CPUType), CPUSubType(CPUSubType),
+ ArchName(std::move(ArchName)), P2Alignment(Align) {}
+
+Slice::Slice(const MachOObjectFile &O) : Slice(O, calculateAlignment(O)) {}
+
+using MachoCPUTy = std::pair<unsigned, unsigned>;
+
+static Expected<MachoCPUTy> getMachoCPUFromTriple(Triple TT) {
+ auto CPU = std::make_pair(MachO::getCPUType(TT), MachO::getCPUSubType(TT));
+ if (!CPU.first) {
+ return CPU.first.takeError();
+ }
+ if (!CPU.second) {
+ return CPU.second.takeError();
+ }
+ return std::make_pair(*CPU.first, *CPU.second);
+}
+
+static Expected<MachoCPUTy> getMachoCPUFromTriple(StringRef TT) {
+ return getMachoCPUFromTriple(Triple{TT});
+}
+
+Expected<Slice> Slice::create(const Archive &A, LLVMContext *LLVMCtx) {
+ Error Err = Error::success();
+ std::unique_ptr<MachOObjectFile> MFO = nullptr;
+ std::unique_ptr<IRObjectFile> IRFO = nullptr;
+ for (const Archive::Child &Child : A.children(Err)) {
+ Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary(LLVMCtx);
+ if (!ChildOrErr)
+ return createFileError(A.getFileName(), ChildOrErr.takeError());
+ Binary *Bin = ChildOrErr.get().get();
+ if (Bin->isMachOUniversalBinary())
+ return createStringError(std::errc::invalid_argument,
+ ("archive member " + Bin->getFileName() +
+ " is a fat file (not allowed in an archive)")
+ .str()
+ .c_str());
+ if (Bin->isMachO()) {
+ MachOObjectFile *O = cast<MachOObjectFile>(Bin);
+ if (IRFO) {
+ return createStringError(
+ std::errc::invalid_argument,
+ "archive member %s is a MachO, while previous archive member "
+ "%s was an IR LLVM object",
+ O->getFileName().str().c_str(), IRFO->getFileName().str().c_str());
+ }
+ if (MFO &&
+ std::tie(MFO->getHeader().cputype, MFO->getHeader().cpusubtype) !=
+ std::tie(O->getHeader().cputype, O->getHeader().cpusubtype)) {
+ return createStringError(
+ std::errc::invalid_argument,
+ ("archive member " + O->getFileName() + " cputype (" +
+ Twine(O->getHeader().cputype) + ") and cpusubtype(" +
+ Twine(O->getHeader().cpusubtype) +
+ ") does not match previous archive members cputype (" +
+ Twine(MFO->getHeader().cputype) + ") and cpusubtype(" +
+ Twine(MFO->getHeader().cpusubtype) +
+ ") (all members must match) " + MFO->getFileName())
+ .str()
+ .c_str());
+ }
+ if (!MFO) {
+ ChildOrErr.get().release();
+ MFO.reset(O);
+ }
+ } else if (Bin->isIR()) {
+ IRObjectFile *O = cast<IRObjectFile>(Bin);
+ if (MFO) {
+ return createStringError(std::errc::invalid_argument,
+ "archive member '%s' is an LLVM IR object, "
+ "while previous archive member "
+ "'%s' was a MachO",
+ O->getFileName().str().c_str(),
+ MFO->getFileName().str().c_str());
+ }
+ if (IRFO) {
+ Expected<MachoCPUTy> CPUO = getMachoCPUFromTriple(O->getTargetTriple());
+ Expected<MachoCPUTy> CPUFO =
+ getMachoCPUFromTriple(IRFO->getTargetTriple());
+ if (!CPUO)
+ return CPUO.takeError();
+ if (!CPUFO)
+ return CPUFO.takeError();
+ if (*CPUO != *CPUFO) {
+ return createStringError(
+ std::errc::invalid_argument,
+ ("archive member " + O->getFileName() + " cputype (" +
+ Twine(CPUO->first) + ") and cpusubtype(" + Twine(CPUO->second) +
+ ") does not match previous archive members cputype (" +
+ Twine(CPUFO->first) + ") and cpusubtype(" +
+ Twine(CPUFO->second) + ") (all members must match) " +
+ IRFO->getFileName())
+ .str()
+ .c_str());
+ }
+ } else {
+ ChildOrErr.get().release();
+ IRFO.reset(O);
+ }
+ } else
+ return createStringError(std::errc::invalid_argument,
+ ("archive member " + Bin->getFileName() +
+ " is neither a MachO file or an LLVM IR file "
+ "(not allowed in an archive)")
+ .str()
+ .c_str());
+ }
+ if (Err)
+ return createFileError(A.getFileName(), std::move(Err));
+ if (!MFO && !IRFO)
+ return createStringError(
+ std::errc::invalid_argument,
+ ("empty archive with no architecture specification: " +
+ A.getFileName() + " (can't determine architecture for it)")
+ .str()
+ .c_str());
+
+ if (MFO) {
+ Slice ArchiveSlice(*(MFO.get()), MFO->is64Bit() ? 3 : 2);
+ ArchiveSlice.B = &A;
+ return ArchiveSlice;
+ }
+
+ // For IR objects
+ Expected<Slice> ArchiveSliceOrErr = Slice::create(*IRFO, 0);
+ if (!ArchiveSliceOrErr)
+ return createFileError(A.getFileName(), ArchiveSliceOrErr.takeError());
+ auto &ArchiveSlice = ArchiveSliceOrErr.get();
+ ArchiveSlice.B = &A;
+ return std::move(ArchiveSlice);
+}
+
+Expected<Slice> Slice::create(const IRObjectFile &IRO, uint32_t Align) {
+ Expected<MachoCPUTy> CPUOrErr = getMachoCPUFromTriple(IRO.getTargetTriple());
+ if (!CPUOrErr)
+ return CPUOrErr.takeError();
+ unsigned CPUType, CPUSubType;
+ std::tie(CPUType, CPUSubType) = CPUOrErr.get();
+ // We don't directly use the architecture name of the target triple T, as,
+ // for instance, thumb is treated as ARM by the MachOUniversal object.
+ std::string ArchName(
+ MachOObjectFile::getArchTriple(CPUType, CPUSubType).getArchName());
+ return Slice{IRO, CPUType, CPUSubType, std::move(ArchName), Align};
+}
+
+static Expected<SmallVector<MachO::fat_arch, 2>>
+buildFatArchList(ArrayRef<Slice> Slices) {
+ SmallVector<MachO::fat_arch, 2> FatArchList;
+ uint64_t Offset =
+ sizeof(MachO::fat_header) + Slices.size() * sizeof(MachO::fat_arch);
+
+ for (const auto &S : Slices) {
+ Offset = alignTo(Offset, 1ull << S.getP2Alignment());
+ if (Offset > UINT32_MAX)
+ return createStringError(
+ std::errc::invalid_argument,
+ ("fat file too large to be created because the offset "
+ "field in struct fat_arch is only 32-bits and the offset " +
+ Twine(Offset) + " for " + S.getBinary()->getFileName() +
+ " for architecture " + S.getArchString() + "exceeds that.")
+ .str()
+ .c_str());
+
+ MachO::fat_arch FatArch;
+ FatArch.cputype = S.getCPUType();
+ FatArch.cpusubtype = S.getCPUSubType();
+ FatArch.offset = Offset;
+ FatArch.size = S.getBinary()->getMemoryBufferRef().getBufferSize();
+ FatArch.align = S.getP2Alignment();
+ Offset += FatArch.size;
+ FatArchList.push_back(FatArch);
+ }
+ return FatArchList;
+}
+
+static Error writeUniversalBinaryToStream(ArrayRef<Slice> Slices,
+ raw_ostream &Out) {
+ MachO::fat_header FatHeader;
+ FatHeader.magic = MachO::FAT_MAGIC;
+ FatHeader.nfat_arch = Slices.size();
+
+ Expected<SmallVector<MachO::fat_arch, 2>> FatArchListOrErr =
+ buildFatArchList(Slices);
+ if (!FatArchListOrErr)
+ return FatArchListOrErr.takeError();
+ SmallVector<MachO::fat_arch, 2> FatArchList = *FatArchListOrErr;
+
+ if (sys::IsLittleEndianHost)
+ MachO::swapStruct(FatHeader);
+ Out.write(reinterpret_cast<const char *>(&FatHeader),
+ sizeof(MachO::fat_header));
+
+ if (sys::IsLittleEndianHost)
+ for (MachO::fat_arch &FA : FatArchList)
+ MachO::swapStruct(FA);
+ Out.write(reinterpret_cast<const char *>(FatArchList.data()),
+ sizeof(MachO::fat_arch) * FatArchList.size());
+
+ if (sys::IsLittleEndianHost)
+ for (MachO::fat_arch &FA : FatArchList)
+ MachO::swapStruct(FA);
+
+ size_t Offset =
+ sizeof(MachO::fat_header) + sizeof(MachO::fat_arch) * FatArchList.size();
+ for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) {
+ MemoryBufferRef BufferRef = Slices[Index].getBinary()->getMemoryBufferRef();
+ assert((Offset <= FatArchList[Index].offset) && "Incorrect slice offset");
+ Out.write_zeros(FatArchList[Index].offset - Offset);
+ Out.write(BufferRef.getBufferStart(), BufferRef.getBufferSize());
+ Offset = FatArchList[Index].offset + BufferRef.getBufferSize();
+ }
+
+ Out.flush();
+ return Error::success();
+}
+
+Error object::writeUniversalBinary(ArrayRef<Slice> Slices,
+ StringRef OutputFileName) {
+ const bool IsExecutable = any_of(Slices, [](Slice S) {
+ return sys::fs::can_execute(S.getBinary()->getFileName());
+ });
+ unsigned Mode = sys::fs::all_read | sys::fs::all_write;
+ if (IsExecutable)
+ Mode |= sys::fs::all_exe;
+ Expected<sys::fs::TempFile> Temp = sys::fs::TempFile::create(
+ OutputFileName + ".temp-universal-%%%%%%", Mode);
+ if (!Temp)
+ return Temp.takeError();
+ raw_fd_ostream Out(Temp->FD, false);
+ if (Error E = writeUniversalBinaryToStream(Slices, Out)) {
+ if (Error DiscardError = Temp->discard())
+ return joinErrors(std::move(E), std::move(DiscardError));
+ return E;
+ }
+ return Temp->keep(OutputFileName);
+}
+
+Expected<std::unique_ptr<MemoryBuffer>>
+object::writeUniversalBinaryToBuffer(ArrayRef<Slice> Slices) {
+ SmallVector<char, 0> Buffer;
+ raw_svector_ostream Out(Buffer);
+
+ if (Error E = writeUniversalBinaryToStream(Slices, Out))
+ return std::move(E);
+
+ return std::make_unique<SmallVectorMemoryBuffer>(std::move(Buffer));
+}
diff --git a/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp
index 61b36ea0f448..cf09a66d9c7d 100644
--- a/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp
@@ -132,7 +132,8 @@ Triple ObjectFile::makeTriple() const {
}
Expected<std::unique_ptr<ObjectFile>>
-ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) {
+ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
+ bool InitContent) {
StringRef Data = Object.getBuffer();
if (Type == file_magic::unknown)
Type = identify_magic(Data);
@@ -154,7 +155,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) {
case file_magic::elf_executable:
case file_magic::elf_shared_object:
case file_magic::elf_core:
- return createELFObjectFile(Object);
+ return createELFObjectFile(Object, InitContent);
case file_magic::macho_object:
case file_magic::macho_executable:
case file_magic::macho_fixed_virtual_memory_shared_lib:
diff --git a/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp b/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp
index ad7a50d13bb7..204577af7239 100644
--- a/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp
@@ -39,20 +39,21 @@ static bool supportsX86_64(uint64_t Type) {
}
}
-static uint64_t resolveX86_64(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveX86_64(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t Addend) {
+ switch (Type) {
case ELF::R_X86_64_NONE:
- return A;
+ return LocData;
case ELF::R_X86_64_64:
case ELF::R_X86_64_DTPOFF32:
case ELF::R_X86_64_DTPOFF64:
- return S + getELFAddend(R);
+ return S + Addend;
case ELF::R_X86_64_PC32:
case ELF::R_X86_64_PC64:
- return S + getELFAddend(R) - R.getOffset();
+ return S + Addend - Offset;
case ELF::R_X86_64_32:
case ELF::R_X86_64_32S:
- return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ return (S + Addend) & 0xFFFFFFFF;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -70,16 +71,17 @@ static bool supportsAArch64(uint64_t Type) {
}
}
-static uint64_t resolveAArch64(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveAArch64(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ switch (Type) {
case ELF::R_AARCH64_ABS32:
- return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ return (S + Addend) & 0xFFFFFFFF;
case ELF::R_AARCH64_ABS64:
- return S + getELFAddend(R);
+ return S + Addend;
case ELF::R_AARCH64_PREL32:
- return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
+ return (S + Addend - Offset) & 0xFFFFFFFF;
case ELF::R_AARCH64_PREL64:
- return S + getELFAddend(R) - R.getOffset();
+ return S + Addend - Offset;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -95,12 +97,13 @@ static bool supportsBPF(uint64_t Type) {
}
}
-static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveBPF(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
+ switch (Type) {
case ELF::R_BPF_64_32:
- return (S + A) & 0xFFFFFFFF;
+ return (S + LocData) & 0xFFFFFFFF;
case ELF::R_BPF_64_64:
- return S + A;
+ return S + LocData;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -118,16 +121,17 @@ static bool supportsMips64(uint64_t Type) {
}
}
-static uint64_t resolveMips64(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveMips64(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ switch (Type) {
case ELF::R_MIPS_32:
- return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ return (S + Addend) & 0xFFFFFFFF;
case ELF::R_MIPS_64:
- return S + getELFAddend(R);
+ return S + Addend;
case ELF::R_MIPS_TLS_DTPREL64:
- return S + getELFAddend(R) - 0x8000;
+ return S + Addend - 0x8000;
case ELF::R_MIPS_PC32:
- return S + getELFAddend(R) - R.getOffset();
+ return S + Addend - Offset;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -143,12 +147,13 @@ static bool supportsMSP430(uint64_t Type) {
}
}
-static uint64_t resolveMSP430(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveMSP430(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ switch (Type) {
case ELF::R_MSP430_32:
- return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ return (S + Addend) & 0xFFFFFFFF;
case ELF::R_MSP430_16_BYTE:
- return (S + getELFAddend(R)) & 0xFFFF;
+ return (S + Addend) & 0xFFFF;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -166,16 +171,17 @@ static bool supportsPPC64(uint64_t Type) {
}
}
-static uint64_t resolvePPC64(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolvePPC64(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ switch (Type) {
case ELF::R_PPC64_ADDR32:
- return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ return (S + Addend) & 0xFFFFFFFF;
case ELF::R_PPC64_ADDR64:
- return S + getELFAddend(R);
+ return S + Addend;
case ELF::R_PPC64_REL32:
- return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
+ return (S + Addend - Offset) & 0xFFFFFFFF;
case ELF::R_PPC64_REL64:
- return S + getELFAddend(R) - R.getOffset();
+ return S + Addend - Offset;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -191,12 +197,13 @@ static bool supportsSystemZ(uint64_t Type) {
}
}
-static uint64_t resolveSystemZ(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveSystemZ(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ switch (Type) {
case ELF::R_390_32:
- return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ return (S + Addend) & 0xFFFFFFFF;
case ELF::R_390_64:
- return S + getELFAddend(R);
+ return S + Addend;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -214,13 +221,14 @@ static bool supportsSparc64(uint64_t Type) {
}
}
-static uint64_t resolveSparc64(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveSparc64(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ switch (Type) {
case ELF::R_SPARC_32:
case ELF::R_SPARC_64:
case ELF::R_SPARC_UA32:
case ELF::R_SPARC_UA64:
- return S + getELFAddend(R);
+ return S + Addend;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -236,11 +244,12 @@ static bool supportsAmdgpu(uint64_t Type) {
}
}
-static uint64_t resolveAmdgpu(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveAmdgpu(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ switch (Type) {
case ELF::R_AMDGPU_ABS32:
case ELF::R_AMDGPU_ABS64:
- return S + getELFAddend(R);
+ return S + Addend;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -257,14 +266,15 @@ static bool supportsX86(uint64_t Type) {
}
}
-static uint64_t resolveX86(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveX86(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
+ switch (Type) {
case ELF::R_386_NONE:
- return A;
+ return LocData;
case ELF::R_386_32:
- return S + A;
+ return S + LocData;
case ELF::R_386_PC32:
- return S - R.getOffset() + A;
+ return S - Offset + LocData;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -280,23 +290,35 @@ static bool supportsPPC32(uint64_t Type) {
}
}
-static uint64_t resolvePPC32(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolvePPC32(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ switch (Type) {
case ELF::R_PPC_ADDR32:
- return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ return (S + Addend) & 0xFFFFFFFF;
case ELF::R_PPC_REL32:
- return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
+ return (S + Addend - Offset) & 0xFFFFFFFF;
}
llvm_unreachable("Invalid relocation type");
}
static bool supportsARM(uint64_t Type) {
- return Type == ELF::R_ARM_ABS32;
+ switch (Type) {
+ case ELF::R_ARM_ABS32:
+ case ELF::R_ARM_REL32:
+ return true;
+ default:
+ return false;
+ }
}
-static uint64_t resolveARM(RelocationRef R, uint64_t S, uint64_t A) {
- if (R.getType() == ELF::R_ARM_ABS32)
- return (S + A) & 0xFFFFFFFF;
+static uint64_t resolveARM(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
+ switch (Type) {
+ case ELF::R_ARM_ABS32:
+ return (S + LocData) & 0xFFFFFFFF;
+ case ELF::R_ARM_REL32:
+ return (S + LocData - Offset) & 0xFFFFFFFF;
+ }
llvm_unreachable("Invalid relocation type");
}
@@ -310,12 +332,13 @@ static bool supportsAVR(uint64_t Type) {
}
}
-static uint64_t resolveAVR(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveAVR(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ switch (Type) {
case ELF::R_AVR_16:
- return (S + getELFAddend(R)) & 0xFFFF;
+ return (S + Addend) & 0xFFFF;
case ELF::R_AVR_32:
- return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ return (S + Addend) & 0xFFFFFFFF;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -325,9 +348,10 @@ static bool supportsLanai(uint64_t Type) {
return Type == ELF::R_LANAI_32;
}
-static uint64_t resolveLanai(RelocationRef R, uint64_t S, uint64_t A) {
- if (R.getType() == ELF::R_LANAI_32)
- return (S + getELFAddend(R)) & 0xFFFFFFFF;
+static uint64_t resolveLanai(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ if (Type == ELF::R_LANAI_32)
+ return (S + Addend) & 0xFFFFFFFF;
llvm_unreachable("Invalid relocation type");
}
@@ -341,13 +365,13 @@ static bool supportsMips32(uint64_t Type) {
}
}
-static uint64_t resolveMips32(RelocationRef R, uint64_t S, uint64_t A) {
+static uint64_t resolveMips32(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
// FIXME: Take in account implicit addends to get correct results.
- uint32_t Rel = R.getType();
- if (Rel == ELF::R_MIPS_32)
- return (S + A) & 0xFFFFFFFF;
- if (Rel == ELF::R_MIPS_TLS_DTPREL32)
- return (S + A) & 0xFFFFFFFF;
+ if (Type == ELF::R_MIPS_32)
+ return (S + LocData) & 0xFFFFFFFF;
+ if (Type == ELF::R_MIPS_TLS_DTPREL32)
+ return (S + LocData) & 0xFFFFFFFF;
llvm_unreachable("Invalid relocation type");
}
@@ -361,20 +385,21 @@ static bool supportsSparc32(uint64_t Type) {
}
}
-static uint64_t resolveSparc32(RelocationRef R, uint64_t S, uint64_t A) {
- uint32_t Rel = R.getType();
- if (Rel == ELF::R_SPARC_32 || Rel == ELF::R_SPARC_UA32)
- return S + getELFAddend(R);
- return A;
+static uint64_t resolveSparc32(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t Addend) {
+ if (Type == ELF::R_SPARC_32 || Type == ELF::R_SPARC_UA32)
+ return S + Addend;
+ return LocData;
}
static bool supportsHexagon(uint64_t Type) {
return Type == ELF::R_HEX_32;
}
-static uint64_t resolveHexagon(RelocationRef R, uint64_t S, uint64_t A) {
- if (R.getType() == ELF::R_HEX_32)
- return S + getELFAddend(R);
+static uint64_t resolveHexagon(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t /*LocData*/, int64_t Addend) {
+ if (Type == ELF::R_HEX_32)
+ return S + Addend;
llvm_unreachable("Invalid relocation type");
}
@@ -400,15 +425,17 @@ static bool supportsRISCV(uint64_t Type) {
}
}
-static uint64_t resolveRISCV(RelocationRef R, uint64_t S, uint64_t A) {
- int64_t RA = getELFAddend(R);
- switch (R.getType()) {
+static uint64_t resolveRISCV(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t Addend) {
+ int64_t RA = Addend;
+ uint64_t A = LocData;
+ switch (Type) {
case ELF::R_RISCV_NONE:
- return A;
+ return LocData;
case ELF::R_RISCV_32:
return (S + RA) & 0xFFFFFFFF;
case ELF::R_RISCV_32_PCREL:
- return (S + RA - R.getOffset()) & 0xFFFFFFFF;
+ return (S + RA - Offset) & 0xFFFFFFFF;
case ELF::R_RISCV_64:
return S + RA;
case ELF::R_RISCV_SET6:
@@ -446,11 +473,12 @@ static bool supportsCOFFX86(uint64_t Type) {
}
}
-static uint64_t resolveCOFFX86(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveCOFFX86(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
+ switch (Type) {
case COFF::IMAGE_REL_I386_SECREL:
case COFF::IMAGE_REL_I386_DIR32:
- return (S + A) & 0xFFFFFFFF;
+ return (S + LocData) & 0xFFFFFFFF;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -466,12 +494,13 @@ static bool supportsCOFFX86_64(uint64_t Type) {
}
}
-static uint64_t resolveCOFFX86_64(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveCOFFX86_64(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
+ switch (Type) {
case COFF::IMAGE_REL_AMD64_SECREL:
- return (S + A) & 0xFFFFFFFF;
+ return (S + LocData) & 0xFFFFFFFF;
case COFF::IMAGE_REL_AMD64_ADDR64:
- return S + A;
+ return S + LocData;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -487,11 +516,12 @@ static bool supportsCOFFARM(uint64_t Type) {
}
}
-static uint64_t resolveCOFFARM(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveCOFFARM(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
+ switch (Type) {
case COFF::IMAGE_REL_ARM_SECREL:
case COFF::IMAGE_REL_ARM_ADDR32:
- return (S + A) & 0xFFFFFFFF;
+ return (S + LocData) & 0xFFFFFFFF;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -507,12 +537,13 @@ static bool supportsCOFFARM64(uint64_t Type) {
}
}
-static uint64_t resolveCOFFARM64(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveCOFFARM64(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
+ switch (Type) {
case COFF::IMAGE_REL_ARM64_SECREL:
- return (S + A) & 0xFFFFFFFF;
+ return (S + LocData) & 0xFFFFFFFF;
case COFF::IMAGE_REL_ARM64_ADDR64:
- return S + A;
+ return S + LocData;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -522,8 +553,9 @@ static bool supportsMachOX86_64(uint64_t Type) {
return Type == MachO::X86_64_RELOC_UNSIGNED;
}
-static uint64_t resolveMachOX86_64(RelocationRef R, uint64_t S, uint64_t A) {
- if (R.getType() == MachO::X86_64_RELOC_UNSIGNED)
+static uint64_t resolveMachOX86_64(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
+ if (Type == MachO::X86_64_RELOC_UNSIGNED)
return S;
llvm_unreachable("Invalid relocation type");
}
@@ -542,6 +574,7 @@ static bool supportsWasm32(uint64_t Type) {
case wasm::R_WASM_SECTION_OFFSET_I32:
case wasm::R_WASM_EVENT_INDEX_LEB:
case wasm::R_WASM_GLOBAL_INDEX_I32:
+ case wasm::R_WASM_TABLE_NUMBER_LEB:
return true;
default:
return false;
@@ -553,14 +586,18 @@ static bool supportsWasm64(uint64_t Type) {
case wasm::R_WASM_MEMORY_ADDR_LEB64:
case wasm::R_WASM_MEMORY_ADDR_SLEB64:
case wasm::R_WASM_MEMORY_ADDR_I64:
+ case wasm::R_WASM_TABLE_INDEX_SLEB64:
+ case wasm::R_WASM_TABLE_INDEX_I64:
+ case wasm::R_WASM_FUNCTION_OFFSET_I64:
return true;
default:
return supportsWasm32(Type);
}
}
-static uint64_t resolveWasm32(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveWasm32(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t /*Addend*/) {
+ switch (Type) {
case wasm::R_WASM_FUNCTION_INDEX_LEB:
case wasm::R_WASM_TABLE_INDEX_SLEB:
case wasm::R_WASM_TABLE_INDEX_I32:
@@ -573,26 +610,31 @@ static uint64_t resolveWasm32(RelocationRef R, uint64_t S, uint64_t A) {
case wasm::R_WASM_SECTION_OFFSET_I32:
case wasm::R_WASM_EVENT_INDEX_LEB:
case wasm::R_WASM_GLOBAL_INDEX_I32:
+ case wasm::R_WASM_TABLE_NUMBER_LEB:
// For wasm section, its offset at 0 -- ignoring Value
- return A;
+ return LocData;
default:
llvm_unreachable("Invalid relocation type");
}
}
-static uint64_t resolveWasm64(RelocationRef R, uint64_t S, uint64_t A) {
- switch (R.getType()) {
+static uint64_t resolveWasm64(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t Addend) {
+ switch (Type) {
case wasm::R_WASM_MEMORY_ADDR_LEB64:
case wasm::R_WASM_MEMORY_ADDR_SLEB64:
case wasm::R_WASM_MEMORY_ADDR_I64:
+ case wasm::R_WASM_TABLE_INDEX_SLEB64:
+ case wasm::R_WASM_TABLE_INDEX_I64:
+ case wasm::R_WASM_FUNCTION_OFFSET_I64:
// For wasm section, its offset at 0 -- ignoring Value
- return A;
+ return LocData;
default:
- return resolveWasm32(R, S, A);
+ return resolveWasm32(Type, Offset, S, LocData, Addend);
}
}
-std::pair<bool (*)(uint64_t), RelocationResolver>
+std::pair<SupportsRelocation, RelocationResolver>
getRelocationResolver(const ObjectFile &Obj) {
if (Obj.isCOFF()) {
switch (Obj.getArch()) {
@@ -645,6 +687,7 @@ getRelocationResolver(const ObjectFile &Obj) {
switch (Obj.getArch()) {
case Triple::x86:
return {supportsX86, resolveX86};
+ case Triple::ppcle:
case Triple::ppc:
return {supportsPPC32, resolvePPC32};
case Triple::arm:
@@ -683,5 +726,38 @@ getRelocationResolver(const ObjectFile &Obj) {
llvm_unreachable("Invalid object file");
}
+uint64_t resolveRelocation(RelocationResolver Resolver, const RelocationRef &R,
+ uint64_t S, uint64_t LocData) {
+ if (const ObjectFile *Obj = R.getObject()) {
+ int64_t Addend = 0;
+ if (Obj->isELF()) {
+ auto GetRelSectionType = [&]() -> unsigned {
+ if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj))
+ return Elf32LEObj->getRelSection(R.getRawDataRefImpl())->sh_type;
+ if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj))
+ return Elf64LEObj->getRelSection(R.getRawDataRefImpl())->sh_type;
+ if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj))
+ return Elf32BEObj->getRelSection(R.getRawDataRefImpl())->sh_type;
+ auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj);
+ return Elf64BEObj->getRelSection(R.getRawDataRefImpl())->sh_type;
+ };
+
+ if (GetRelSectionType() == ELF::SHT_RELA)
+ Addend = getELFAddend(R);
+ }
+
+ return Resolver(R.getType(), R.getOffset(), S, LocData, Addend);
+ }
+
+ // Sometimes the caller might want to use its own specific implementation of
+ // the resolver function. E.g. this is used by LLD when it resolves debug
+ // relocations and assumes that all of them have the same computation (S + A).
+ // The relocation R has no owner object in this case and we don't need to
+ // provide Type and Offset fields. It is also assumed the DataRefImpl.p
+ // contains the addend, provided by the caller.
+ return Resolver(/*Type=*/0, /*Offset=*/0, S, LocData,
+ R.getRawDataRefImpl().p);
+}
+
} // namespace object
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp b/contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp
index 84eed4d169d3..97baabec084b 100644
--- a/contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp
@@ -48,7 +48,7 @@ llvm::object::computeSymbolSizes(const ObjectFile &O) {
if (const auto *E = dyn_cast<ELFObjectFileBase>(&O)) {
auto Syms = E->symbols();
- if (Syms.begin() == Syms.end())
+ if (Syms.empty())
Syms = E->getDynamicSymbolIterators();
for (ELFSymbolRef Sym : Syms)
Ret.push_back({Sym, Sym.getSize()});
diff --git a/contrib/llvm-project/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm-project/llvm/lib/Object/SymbolicFile.cpp
index 3db4ad9ed14b..34a2c5e1c125 100644
--- a/contrib/llvm-project/llvm/lib/Object/SymbolicFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/SymbolicFile.cpp
@@ -36,25 +36,19 @@ SymbolicFile::~SymbolicFile() = default;
Expected<std::unique_ptr<SymbolicFile>>
SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
- LLVMContext *Context) {
+ LLVMContext *Context, bool InitContent) {
StringRef Data = Object.getBuffer();
if (Type == file_magic::unknown)
Type = identify_magic(Data);
+ if (!isSymbolicFile(Type, Context))
+ return errorCodeToError(object_error::invalid_file_type);
+
switch (Type) {
case file_magic::bitcode:
- if (Context)
- return IRObjectFile::create(Object, *Context);
- LLVM_FALLTHROUGH;
- case file_magic::unknown:
- case file_magic::archive:
- case file_magic::coff_cl_gl_object:
- case file_magic::macho_universal_binary:
- case file_magic::windows_resource:
- case file_magic::pdb:
- case file_magic::minidump:
- case file_magic::tapi_file:
- return errorCodeToError(object_error::invalid_file_type);
+ // Context is guaranteed to be non-null here, because bitcode magic only
+ // indicates a symbolic file when Context is non-null.
+ return IRObjectFile::create(Object, *Context);
case file_magic::elf:
case file_magic::elf_executable:
case file_magic::elf_shared_object:
@@ -73,14 +67,14 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
case file_magic::xcoff_object_32:
case file_magic::xcoff_object_64:
case file_magic::wasm_object:
- return ObjectFile::createObjectFile(Object, Type);
+ return ObjectFile::createObjectFile(Object, Type, InitContent);
case file_magic::coff_import_library:
return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object));
case file_magic::elf_relocatable:
case file_magic::macho_object:
case file_magic::coff_object: {
Expected<std::unique_ptr<ObjectFile>> Obj =
- ObjectFile::createObjectFile(Object, Type);
+ ObjectFile::createObjectFile(Object, Type, InitContent);
if (!Obj || !Context)
return std::move(Obj);
@@ -95,6 +89,39 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()),
*Context);
}
+ default:
+ llvm_unreachable("Unexpected Binary File Type");
+ }
+}
+
+bool SymbolicFile::isSymbolicFile(file_magic Type, const LLVMContext *Context) {
+ switch (Type) {
+ case file_magic::bitcode:
+ return Context != nullptr;
+ case file_magic::elf:
+ case file_magic::elf_executable:
+ case file_magic::elf_shared_object:
+ case file_magic::elf_core:
+ case file_magic::macho_executable:
+ case file_magic::macho_fixed_virtual_memory_shared_lib:
+ case file_magic::macho_core:
+ case file_magic::macho_preload_executable:
+ case file_magic::macho_dynamically_linked_shared_lib:
+ case file_magic::macho_dynamic_linker:
+ case file_magic::macho_bundle:
+ case file_magic::macho_dynamically_linked_shared_lib_stub:
+ case file_magic::macho_dsym_companion:
+ case file_magic::macho_kext_bundle:
+ case file_magic::pecoff_executable:
+ case file_magic::xcoff_object_32:
+ case file_magic::xcoff_object_64:
+ case file_magic::wasm_object:
+ case file_magic::coff_import_library:
+ case file_magic::elf_relocatable:
+ case file_magic::macho_object:
+ case file_magic::coff_object:
+ return true;
+ default:
+ return false;
}
- llvm_unreachable("Unexpected Binary File Type");
}
diff --git a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
index bb2e81d64047..40f468881edd 100644
--- a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
@@ -187,19 +187,19 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
case wasm::WASM_OPCODE_REF_NULL: {
wasm::ValType Ty = static_cast<wasm::ValType>(readULEB128(Ctx));
if (Ty != wasm::ValType::EXTERNREF) {
- return make_error<GenericBinaryError>("Invalid type for ref.null",
+ return make_error<GenericBinaryError>("invalid type for ref.null",
object_error::parse_failed);
}
break;
}
default:
- return make_error<GenericBinaryError>("Invalid opcode in init_expr",
+ return make_error<GenericBinaryError>("invalid opcode in init_expr",
object_error::parse_failed);
}
uint8_t EndOpcode = readOpcode(Ctx);
if (EndOpcode != wasm::WASM_OPCODE_END) {
- return make_error<GenericBinaryError>("Invalid init_expr",
+ return make_error<GenericBinaryError>("invalid init_expr",
object_error::parse_failed);
}
return Error::success();
@@ -214,11 +214,11 @@ static wasm::WasmLimits readLimits(WasmObjectFile::ReadContext &Ctx) {
return Result;
}
-static wasm::WasmTable readTable(WasmObjectFile::ReadContext &Ctx) {
- wasm::WasmTable Table;
- Table.ElemType = readUint8(Ctx);
- Table.Limits = readLimits(Ctx);
- return Table;
+static wasm::WasmTableType readTableType(WasmObjectFile::ReadContext &Ctx) {
+ wasm::WasmTableType TableType;
+ TableType.ElemType = readUint8(Ctx);
+ TableType.Limits = readLimits(Ctx);
+ return TableType;
}
static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
@@ -228,10 +228,10 @@ static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n");
uint32_t Size = readVaruint32(Ctx);
if (Size == 0)
- return make_error<StringError>("Zero length section",
+ return make_error<StringError>("zero length section",
object_error::parse_failed);
if (Ctx.Ptr + Size > Ctx.End)
- return make_error<StringError>("Section too large",
+ return make_error<StringError>("section too large",
object_error::parse_failed);
if (Section.Type == wasm::WASM_SEC_CUSTOM) {
WasmObjectFile::ReadContext SectionCtx;
@@ -247,7 +247,7 @@ static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
}
if (!Checker.isValidSectionOrder(Section.Type, Section.Name)) {
- return make_error<StringError>("Out of order section type: " +
+ return make_error<StringError>("out of order section type: " +
llvm::to_string(Section.Type),
object_error::parse_failed);
}
@@ -262,8 +262,8 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
ErrorAsOutParameter ErrAsOutParam(&Err);
Header.Magic = getData().substr(0, 4);
if (Header.Magic != StringRef("\0asm", 4)) {
- Err =
- make_error<StringError>("Bad magic number", object_error::parse_failed);
+ Err = make_error<StringError>("invalid magic number",
+ object_error::parse_failed);
return;
}
@@ -273,14 +273,15 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
Ctx.End = Ctx.Start + getData().size();
if (Ctx.Ptr + 4 > Ctx.End) {
- Err = make_error<StringError>("Missing version number",
+ Err = make_error<StringError>("missing version number",
object_error::parse_failed);
return;
}
Header.Version = readUint32(Ctx);
if (Header.Version != wasm::WasmVersion) {
- Err = make_error<StringError>("Bad version number",
+ Err = make_error<StringError>("invalid version number: " +
+ Twine(Header.Version),
object_error::parse_failed);
return;
}
@@ -333,7 +334,7 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) {
return parseDataCountSection(Ctx);
default:
return make_error<GenericBinaryError>(
- "Invalid section type: " + Twine(Sec.Type), object_error::parse_failed);
+ "invalid section type: " + Twine(Sec.Type), object_error::parse_failed);
}
}
@@ -355,9 +356,11 @@ Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
}
Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
- llvm::DenseSet<uint64_t> Seen;
+ llvm::DenseSet<uint64_t> SeenFunctions;
+ llvm::DenseSet<uint64_t> SeenGlobals;
+ llvm::DenseSet<uint64_t> SeenSegments;
if (FunctionTypes.size() && !SeenCodeSection) {
- return make_error<GenericBinaryError>("Names must come after code section",
+ return make_error<GenericBinaryError>("names must come after code section",
object_error::parse_failed);
}
@@ -366,20 +369,42 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
uint32_t Size = readVaruint32(Ctx);
const uint8_t *SubSectionEnd = Ctx.Ptr + Size;
switch (Type) {
- case wasm::WASM_NAMES_FUNCTION: {
+ case wasm::WASM_NAMES_FUNCTION:
+ case wasm::WASM_NAMES_GLOBAL:
+ case wasm::WASM_NAMES_DATA_SEGMENT: {
uint32_t Count = readVaruint32(Ctx);
while (Count--) {
uint32_t Index = readVaruint32(Ctx);
- if (!Seen.insert(Index).second)
- return make_error<GenericBinaryError>("Function named more than once",
- object_error::parse_failed);
StringRef Name = readString(Ctx);
- if (!isValidFunctionIndex(Index) || Name.empty())
- return make_error<GenericBinaryError>("Invalid name entry",
- object_error::parse_failed);
- DebugNames.push_back(wasm::WasmFunctionName{Index, Name});
- if (isDefinedFunctionIndex(Index))
- getDefinedFunction(Index).DebugName = Name;
+ wasm::NameType nameType = wasm::NameType::FUNCTION;
+ if (Type == wasm::WASM_NAMES_FUNCTION) {
+ if (!SeenFunctions.insert(Index).second)
+ return make_error<GenericBinaryError>(
+ "function named more than once", object_error::parse_failed);
+ if (!isValidFunctionIndex(Index) || Name.empty())
+ return make_error<GenericBinaryError>("invalid name entry",
+ object_error::parse_failed);
+
+ if (isDefinedFunctionIndex(Index))
+ getDefinedFunction(Index).DebugName = Name;
+ } else if (Type == wasm::WASM_NAMES_GLOBAL) {
+ nameType = wasm::NameType::GLOBAL;
+ if (!SeenGlobals.insert(Index).second)
+ return make_error<GenericBinaryError>("global named more than once",
+ object_error::parse_failed);
+ if (!isValidGlobalIndex(Index) || Name.empty())
+ return make_error<GenericBinaryError>("invalid name entry",
+ object_error::parse_failed);
+ } else {
+ nameType = wasm::NameType::DATA_SEGMENT;
+ if (!SeenSegments.insert(Index).second)
+ return make_error<GenericBinaryError>(
+ "segment named more than once", object_error::parse_failed);
+ if (Index > DataSegments.size())
+ return make_error<GenericBinaryError>("invalid named data segment",
+ object_error::parse_failed);
+ }
+ DebugNames.push_back(wasm::WasmDebugName{nameType, Index, Name});
}
break;
}
@@ -391,11 +416,11 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
}
if (Ctx.Ptr != SubSectionEnd)
return make_error<GenericBinaryError>(
- "Name sub-section ended prematurely", object_error::parse_failed);
+ "name sub-section ended prematurely", object_error::parse_failed);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Name section ended prematurely",
+ return make_error<GenericBinaryError>("name section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -404,14 +429,14 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
HasLinkingSection = true;
if (FunctionTypes.size() && !SeenCodeSection) {
return make_error<GenericBinaryError>(
- "Linking data must come after code section",
+ "linking data must come after code section",
object_error::parse_failed);
}
LinkingData.Version = readVaruint32(Ctx);
if (LinkingData.Version != wasm::WasmMetadataVersion) {
return make_error<GenericBinaryError>(
- "Unexpected metadata version: " + Twine(LinkingData.Version) +
+ "unexpected metadata version: " + Twine(LinkingData.Version) +
" (Expected: " + Twine(wasm::WasmMetadataVersion) + ")",
object_error::parse_failed);
}
@@ -432,7 +457,7 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
case wasm::WASM_SEGMENT_INFO: {
uint32_t Count = readVaruint32(Ctx);
if (Count > DataSegments.size())
- return make_error<GenericBinaryError>("Too many segment names",
+ return make_error<GenericBinaryError>("too many segment names",
object_error::parse_failed);
for (uint32_t I = 0; I < Count; I++) {
DataSegments[I].Data.Name = readString(Ctx);
@@ -449,7 +474,7 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
Init.Priority = readVaruint32(Ctx);
Init.Symbol = readVaruint32(Ctx);
if (!isValidFunctionSymbol(Init.Symbol))
- return make_error<GenericBinaryError>("Invalid function symbol: " +
+ return make_error<GenericBinaryError>("invalid function symbol: " +
Twine(Init.Symbol),
object_error::parse_failed);
LinkingData.InitFunctions.emplace_back(Init);
@@ -466,10 +491,10 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
}
if (Ctx.Ptr != Ctx.End)
return make_error<GenericBinaryError>(
- "Linking sub-section ended prematurely", object_error::parse_failed);
+ "linking sub-section ended prematurely", object_error::parse_failed);
}
if (Ctx.Ptr != OrigEnd)
- return make_error<GenericBinaryError>("Linking section ended prematurely",
+ return make_error<GenericBinaryError>("linking section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -483,9 +508,11 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
std::vector<wasm::WasmImport *> ImportedGlobals;
std::vector<wasm::WasmImport *> ImportedFunctions;
std::vector<wasm::WasmImport *> ImportedEvents;
+ std::vector<wasm::WasmImport *> ImportedTables;
ImportedGlobals.reserve(Imports.size());
ImportedFunctions.reserve(Imports.size());
ImportedEvents.reserve(Imports.size());
+ ImportedTables.reserve(Imports.size());
for (auto &I : Imports) {
if (I.Kind == wasm::WASM_EXTERNAL_FUNCTION)
ImportedFunctions.emplace_back(&I);
@@ -493,12 +520,15 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
ImportedGlobals.emplace_back(&I);
else if (I.Kind == wasm::WASM_EXTERNAL_EVENT)
ImportedEvents.emplace_back(&I);
+ else if (I.Kind == wasm::WASM_EXTERNAL_TABLE)
+ ImportedTables.emplace_back(&I);
}
while (Count--) {
wasm::WasmSymbolInfo Info;
const wasm::WasmSignature *Signature = nullptr;
const wasm::WasmGlobalType *GlobalType = nullptr;
+ const wasm::WasmTableType *TableType = nullptr;
const wasm::WasmEventType *EventType = nullptr;
Info.Kind = readUint8(Ctx);
@@ -560,7 +590,38 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
Info.Name = Import.Field;
}
GlobalType = &Import.Global;
- Info.ImportName = Import.Field;
+ if (!Import.Module.empty()) {
+ Info.ImportModule = Import.Module;
+ }
+ }
+ break;
+
+ case wasm::WASM_SYMBOL_TYPE_TABLE:
+ Info.ElementIndex = readVaruint32(Ctx);
+ if (!isValidTableIndex(Info.ElementIndex) ||
+ IsDefined != isDefinedTableIndex(Info.ElementIndex))
+ return make_error<GenericBinaryError>("invalid table symbol index",
+ object_error::parse_failed);
+ if (!IsDefined && (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) ==
+ wasm::WASM_SYMBOL_BINDING_WEAK)
+ return make_error<GenericBinaryError>("undefined weak table symbol",
+ object_error::parse_failed);
+ if (IsDefined) {
+ Info.Name = readString(Ctx);
+ unsigned TableIndex = Info.ElementIndex - NumImportedTables;
+ wasm::WasmTable &Table = Tables[TableIndex];
+ TableType = &Table.Type;
+ if (Table.SymbolName.empty())
+ Table.SymbolName = Info.Name;
+ } else {
+ wasm::WasmImport &Import = *ImportedTables[Info.ElementIndex];
+ if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) {
+ Info.Name = readString(Ctx);
+ Info.ImportName = Import.Field;
+ } else {
+ Info.Name = Import.Field;
+ }
+ TableType = &Import.Table;
if (!Import.Module.empty()) {
Info.ImportModule = Import.Module;
}
@@ -587,7 +648,7 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
if ((Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) !=
wasm::WASM_SYMBOL_BINDING_LOCAL)
return make_error<GenericBinaryError>(
- "Section symbols must have local binding",
+ "section symbols must have local binding",
object_error::parse_failed);
Info.ElementIndex = readVaruint32(Ctx);
// Use somewhat unique section name as symbol name.
@@ -633,19 +694,20 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
}
default:
- return make_error<GenericBinaryError>("Invalid symbol type",
+ return make_error<GenericBinaryError>("invalid symbol type: " +
+ Twine(unsigned(Info.Kind)),
object_error::parse_failed);
}
if ((Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) !=
wasm::WASM_SYMBOL_BINDING_LOCAL &&
!SymbolNames.insert(Info.Name).second)
- return make_error<GenericBinaryError>("Duplicate symbol name " +
+ return make_error<GenericBinaryError>("duplicate symbol name " +
Twine(Info.Name),
object_error::parse_failed);
LinkingData.SymbolTable.emplace_back(Info);
- Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, EventType,
- Signature);
+ Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, TableType,
+ EventType, Signature);
LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n");
}
@@ -658,13 +720,13 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
for (unsigned ComdatIndex = 0; ComdatIndex < ComdatCount; ++ComdatIndex) {
StringRef Name = readString(Ctx);
if (Name.empty() || !ComdatSet.insert(Name).second)
- return make_error<GenericBinaryError>("Bad/duplicate COMDAT name " +
+ return make_error<GenericBinaryError>("bad/duplicate COMDAT name " +
Twine(Name),
object_error::parse_failed);
LinkingData.Comdats.emplace_back(Name);
uint32_t Flags = readVaruint32(Ctx);
if (Flags != 0)
- return make_error<GenericBinaryError>("Unsupported COMDAT flags",
+ return make_error<GenericBinaryError>("unsupported COMDAT flags",
object_error::parse_failed);
uint32_t EntryCount = readVaruint32(Ctx);
@@ -673,14 +735,14 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
unsigned Index = readVaruint32(Ctx);
switch (Kind) {
default:
- return make_error<GenericBinaryError>("Invalid COMDAT entry type",
+ return make_error<GenericBinaryError>("invalid COMDAT entry type",
object_error::parse_failed);
case wasm::WASM_COMDAT_DATA:
if (Index >= DataSegments.size())
return make_error<GenericBinaryError>(
"COMDAT data index out of range", object_error::parse_failed);
if (DataSegments[Index].Data.Comdat != UINT32_MAX)
- return make_error<GenericBinaryError>("Data segment in two COMDATs",
+ return make_error<GenericBinaryError>("data segment in two COMDATs",
object_error::parse_failed);
DataSegments[Index].Data.Comdat = ComdatIndex;
break;
@@ -689,10 +751,19 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
return make_error<GenericBinaryError>(
"COMDAT function index out of range", object_error::parse_failed);
if (getDefinedFunction(Index).Comdat != UINT32_MAX)
- return make_error<GenericBinaryError>("Function in two COMDATs",
+ return make_error<GenericBinaryError>("function in two COMDATs",
object_error::parse_failed);
getDefinedFunction(Index).Comdat = ComdatIndex;
break;
+ case wasm::WASM_COMDAT_SECTION:
+ if (Index >= Sections.size())
+ return make_error<GenericBinaryError>(
+ "COMDAT section index out of range", object_error::parse_failed);
+ if (Sections[Index].Type != wasm::WASM_SEC_CUSTOM)
+ return make_error<GenericBinaryError>(
+ "non-custom section in a COMDAT", object_error::parse_failed);
+ Sections[Index].Comdat = ComdatIndex;
+ break;
}
}
}
@@ -706,7 +777,7 @@ Error WasmObjectFile::parseProducersSection(ReadContext &Ctx) {
StringRef FieldName = readString(Ctx);
if (!FieldsSeen.insert(FieldName).second)
return make_error<GenericBinaryError>(
- "Producers section does not have unique fields",
+ "producers section does not have unique fields",
object_error::parse_failed);
std::vector<std::pair<std::string, std::string>> *ProducerVec = nullptr;
if (FieldName == "language") {
@@ -717,7 +788,7 @@ Error WasmObjectFile::parseProducersSection(ReadContext &Ctx) {
ProducerVec = &ProducerInfo.SDKs;
} else {
return make_error<GenericBinaryError>(
- "Producers section field is not named one of language, processed-by, "
+ "producers section field is not named one of language, processed-by, "
"or sdk",
object_error::parse_failed);
}
@@ -728,14 +799,14 @@ Error WasmObjectFile::parseProducersSection(ReadContext &Ctx) {
StringRef Version = readString(Ctx);
if (!ProducersSeen.insert(Name).second) {
return make_error<GenericBinaryError>(
- "Producers section contains repeated producer",
+ "producers section contains repeated producer",
object_error::parse_failed);
}
ProducerVec->emplace_back(std::string(Name), std::string(Version));
}
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Producers section ended prematurely",
+ return make_error<GenericBinaryError>("producers section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -752,20 +823,20 @@ Error WasmObjectFile::parseTargetFeaturesSection(ReadContext &Ctx) {
case wasm::WASM_FEATURE_PREFIX_DISALLOWED:
break;
default:
- return make_error<GenericBinaryError>("Unknown feature policy prefix",
+ return make_error<GenericBinaryError>("unknown feature policy prefix",
object_error::parse_failed);
}
Feature.Name = std::string(readString(Ctx));
if (!FeaturesSeen.insert(Feature.Name).second)
return make_error<GenericBinaryError>(
- "Target features section contains repeated feature \"" +
+ "target features section contains repeated feature \"" +
Feature.Name + "\"",
object_error::parse_failed);
TargetFeatures.push_back(Feature);
}
if (Ctx.Ptr != Ctx.End)
return make_error<GenericBinaryError>(
- "Target features section ended prematurely",
+ "target features section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -773,7 +844,7 @@ Error WasmObjectFile::parseTargetFeaturesSection(ReadContext &Ctx) {
Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
uint32_t SectionIndex = readVaruint32(Ctx);
if (SectionIndex >= Sections.size())
- return make_error<GenericBinaryError>("Invalid section index",
+ return make_error<GenericBinaryError>("invalid section index",
object_error::parse_failed);
WasmSection &Section = Sections[SectionIndex];
uint32_t RelocCount = readVaruint32(Ctx);
@@ -781,25 +852,33 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
uint32_t PreviousOffset = 0;
while (RelocCount--) {
wasm::WasmRelocation Reloc = {};
- Reloc.Type = readVaruint32(Ctx);
+ uint32_t type = readVaruint32(Ctx);
+ Reloc.Type = type;
Reloc.Offset = readVaruint32(Ctx);
if (Reloc.Offset < PreviousOffset)
- return make_error<GenericBinaryError>("Relocations not in offset order",
+ return make_error<GenericBinaryError>("relocations not in offset order",
object_error::parse_failed);
PreviousOffset = Reloc.Offset;
Reloc.Index = readVaruint32(Ctx);
- switch (Reloc.Type) {
+ switch (type) {
case wasm::R_WASM_FUNCTION_INDEX_LEB:
case wasm::R_WASM_TABLE_INDEX_SLEB:
+ case wasm::R_WASM_TABLE_INDEX_SLEB64:
case wasm::R_WASM_TABLE_INDEX_I32:
+ case wasm::R_WASM_TABLE_INDEX_I64:
case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
if (!isValidFunctionSymbol(Reloc.Index))
- return make_error<GenericBinaryError>("Bad relocation function index",
+ return make_error<GenericBinaryError>(
+ "invalid relocation function index", object_error::parse_failed);
+ break;
+ case wasm::R_WASM_TABLE_NUMBER_LEB:
+ if (!isValidTableSymbol(Reloc.Index))
+ return make_error<GenericBinaryError>("invalid relocation table index",
object_error::parse_failed);
break;
case wasm::R_WASM_TYPE_INDEX_LEB:
if (Reloc.Index >= Signatures.size())
- return make_error<GenericBinaryError>("Bad relocation type index",
+ return make_error<GenericBinaryError>("invalid relocation type index",
object_error::parse_failed);
break;
case wasm::R_WASM_GLOBAL_INDEX_LEB:
@@ -808,25 +887,26 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
if (!isValidGlobalSymbol(Reloc.Index) &&
!isValidDataSymbol(Reloc.Index) &&
!isValidFunctionSymbol(Reloc.Index))
- return make_error<GenericBinaryError>("Bad relocation global index",
+ return make_error<GenericBinaryError>("invalid relocation global index",
object_error::parse_failed);
break;
case wasm::R_WASM_GLOBAL_INDEX_I32:
if (!isValidGlobalSymbol(Reloc.Index))
- return make_error<GenericBinaryError>("Bad relocation global index",
+ return make_error<GenericBinaryError>("invalid relocation global index",
object_error::parse_failed);
break;
case wasm::R_WASM_EVENT_INDEX_LEB:
if (!isValidEventSymbol(Reloc.Index))
- return make_error<GenericBinaryError>("Bad relocation event index",
+ return make_error<GenericBinaryError>("invalid relocation event index",
object_error::parse_failed);
break;
case wasm::R_WASM_MEMORY_ADDR_LEB:
case wasm::R_WASM_MEMORY_ADDR_SLEB:
case wasm::R_WASM_MEMORY_ADDR_I32:
case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB:
if (!isValidDataSymbol(Reloc.Index))
- return make_error<GenericBinaryError>("Bad relocation data index",
+ return make_error<GenericBinaryError>("invalid relocation data index",
object_error::parse_failed);
Reloc.Addend = readVarint32(Ctx);
break;
@@ -835,25 +915,31 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
case wasm::R_WASM_MEMORY_ADDR_I64:
case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64:
if (!isValidDataSymbol(Reloc.Index))
- return make_error<GenericBinaryError>("Bad relocation data index",
+ return make_error<GenericBinaryError>("invalid relocation data index",
object_error::parse_failed);
Reloc.Addend = readVarint64(Ctx);
break;
case wasm::R_WASM_FUNCTION_OFFSET_I32:
if (!isValidFunctionSymbol(Reloc.Index))
- return make_error<GenericBinaryError>("Bad relocation function index",
- object_error::parse_failed);
+ return make_error<GenericBinaryError>(
+ "invalid relocation function index", object_error::parse_failed);
Reloc.Addend = readVarint32(Ctx);
break;
+ case wasm::R_WASM_FUNCTION_OFFSET_I64:
+ if (!isValidFunctionSymbol(Reloc.Index))
+ return make_error<GenericBinaryError>(
+ "invalid relocation function index", object_error::parse_failed);
+ Reloc.Addend = readVarint64(Ctx);
+ break;
case wasm::R_WASM_SECTION_OFFSET_I32:
if (!isValidSectionSymbol(Reloc.Index))
- return make_error<GenericBinaryError>("Bad relocation section index",
- object_error::parse_failed);
+ return make_error<GenericBinaryError>(
+ "invalid relocation section index", object_error::parse_failed);
Reloc.Addend = readVarint32(Ctx);
break;
default:
- return make_error<GenericBinaryError>("Bad relocation type: " +
- Twine(Reloc.Type),
+ return make_error<GenericBinaryError>("invalid relocation type: " +
+ Twine(type),
object_error::parse_failed);
}
@@ -871,16 +957,18 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I32 ||
Reloc.Type == wasm::R_WASM_GLOBAL_INDEX_I32)
Size = 4;
- if (Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64)
+ if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I64 ||
+ Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64 ||
+ Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I64)
Size = 8;
if (Reloc.Offset + Size > EndOffset)
- return make_error<GenericBinaryError>("Bad relocation offset",
+ return make_error<GenericBinaryError>("invalid relocation offset",
object_error::parse_failed);
Section.Relocations.push_back(Reloc);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Reloc section ended prematurely",
+ return make_error<GenericBinaryError>("reloc section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -915,7 +1003,7 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
wasm::WasmSignature Sig;
uint8_t Form = readUint8(Ctx);
if (Form != wasm::WASM_TYPE_FUNC) {
- return make_error<GenericBinaryError>("Invalid signature type",
+ return make_error<GenericBinaryError>("invalid signature type",
object_error::parse_failed);
}
uint32_t ParamCount = readVaruint32(Ctx);
@@ -932,7 +1020,7 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
Signatures.push_back(std::move(Sig));
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Type section ended prematurely",
+ return make_error<GenericBinaryError>("type section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -957,26 +1045,32 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
break;
case wasm::WASM_EXTERNAL_MEMORY:
Im.Memory = readLimits(Ctx);
+ if (Im.Memory.Flags & wasm::WASM_LIMITS_FLAG_IS_64)
+ HasMemory64 = true;
break;
- case wasm::WASM_EXTERNAL_TABLE:
- Im.Table = readTable(Ctx);
- if (Im.Table.ElemType != wasm::WASM_TYPE_FUNCREF)
- return make_error<GenericBinaryError>("Invalid table element type",
+ case wasm::WASM_EXTERNAL_TABLE: {
+ Im.Table = readTableType(Ctx);
+ NumImportedTables++;
+ auto ElemType = Im.Table.ElemType;
+ if (ElemType != wasm::WASM_TYPE_FUNCREF &&
+ ElemType != wasm::WASM_TYPE_EXTERNREF)
+ return make_error<GenericBinaryError>("invalid table element type",
object_error::parse_failed);
break;
+ }
case wasm::WASM_EXTERNAL_EVENT:
NumImportedEvents++;
Im.Event.Attribute = readVarint32(Ctx);
Im.Event.SigIndex = readVarint32(Ctx);
break;
default:
- return make_error<GenericBinaryError>("Unexpected import kind",
+ return make_error<GenericBinaryError>("unexpected import kind",
object_error::parse_failed);
}
Imports.push_back(Im);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Import section ended prematurely",
+ return make_error<GenericBinaryError>("import section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -989,28 +1083,34 @@ Error WasmObjectFile::parseFunctionSection(ReadContext &Ctx) {
while (Count--) {
uint32_t Type = readVaruint32(Ctx);
if (Type >= NumTypes)
- return make_error<GenericBinaryError>("Invalid function type",
+ return make_error<GenericBinaryError>("invalid function type",
object_error::parse_failed);
FunctionTypes.push_back(Type);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Function section ended prematurely",
+ return make_error<GenericBinaryError>("function section ended prematurely",
object_error::parse_failed);
return Error::success();
}
Error WasmObjectFile::parseTableSection(ReadContext &Ctx) {
+ TableSection = Sections.size();
uint32_t Count = readVaruint32(Ctx);
Tables.reserve(Count);
while (Count--) {
- Tables.push_back(readTable(Ctx));
- if (Tables.back().ElemType != wasm::WASM_TYPE_FUNCREF) {
- return make_error<GenericBinaryError>("Invalid table element type",
+ wasm::WasmTable T;
+ T.Type = readTableType(Ctx);
+ T.Index = NumImportedTables + Tables.size();
+ Tables.push_back(T);
+ auto ElemType = Tables.back().Type.ElemType;
+ if (ElemType != wasm::WASM_TYPE_FUNCREF &&
+ ElemType != wasm::WASM_TYPE_EXTERNREF) {
+ return make_error<GenericBinaryError>("invalid table element type",
object_error::parse_failed);
}
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Table section ended prematurely",
+ return make_error<GenericBinaryError>("table section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -1019,10 +1119,13 @@ Error WasmObjectFile::parseMemorySection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
Memories.reserve(Count);
while (Count--) {
- Memories.push_back(readLimits(Ctx));
+ auto Limits = readLimits(Ctx);
+ if (Limits.Flags & wasm::WASM_LIMITS_FLAG_IS_64)
+ HasMemory64 = true;
+ Memories.push_back(Limits);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Memory section ended prematurely",
+ return make_error<GenericBinaryError>("memory section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -1040,7 +1143,7 @@ Error WasmObjectFile::parseEventSection(ReadContext &Ctx) {
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Event section ended prematurely",
+ return make_error<GenericBinaryError>("event section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -1059,7 +1162,7 @@ Error WasmObjectFile::parseGlobalSection(ReadContext &Ctx) {
Globals.push_back(Global);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Global section ended prematurely",
+ return make_error<GenericBinaryError>("global section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -1076,31 +1179,31 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
case wasm::WASM_EXTERNAL_FUNCTION:
if (!isDefinedFunctionIndex(Ex.Index))
- return make_error<GenericBinaryError>("Invalid function export",
+ return make_error<GenericBinaryError>("invalid function export",
object_error::parse_failed);
getDefinedFunction(Ex.Index).ExportName = Ex.Name;
break;
case wasm::WASM_EXTERNAL_GLOBAL:
if (!isValidGlobalIndex(Ex.Index))
- return make_error<GenericBinaryError>("Invalid global export",
+ return make_error<GenericBinaryError>("invalid global export",
object_error::parse_failed);
break;
case wasm::WASM_EXTERNAL_EVENT:
if (!isValidEventIndex(Ex.Index))
- return make_error<GenericBinaryError>("Invalid event export",
+ return make_error<GenericBinaryError>("invalid event export",
object_error::parse_failed);
break;
case wasm::WASM_EXTERNAL_MEMORY:
case wasm::WASM_EXTERNAL_TABLE:
break;
default:
- return make_error<GenericBinaryError>("Unexpected export kind",
+ return make_error<GenericBinaryError>("unexpected export kind",
object_error::parse_failed);
}
Exports.push_back(Ex);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Export section ended prematurely",
+ return make_error<GenericBinaryError>("export section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -1117,10 +1220,18 @@ bool WasmObjectFile::isValidGlobalIndex(uint32_t Index) const {
return Index < NumImportedGlobals + Globals.size();
}
+bool WasmObjectFile::isValidTableIndex(uint32_t Index) const {
+ return Index < NumImportedTables + Tables.size();
+}
+
bool WasmObjectFile::isDefinedGlobalIndex(uint32_t Index) const {
return Index >= NumImportedGlobals && isValidGlobalIndex(Index);
}
+bool WasmObjectFile::isDefinedTableIndex(uint32_t Index) const {
+ return Index >= NumImportedTables && isValidTableIndex(Index);
+}
+
bool WasmObjectFile::isValidEventIndex(uint32_t Index) const {
return Index < NumImportedEvents + Events.size();
}
@@ -1133,6 +1244,10 @@ bool WasmObjectFile::isValidFunctionSymbol(uint32_t Index) const {
return Index < Symbols.size() && Symbols[Index].isTypeFunction();
}
+bool WasmObjectFile::isValidTableSymbol(uint32_t Index) const {
+ return Index < Symbols.size() && Symbols[Index].isTypeTable();
+}
+
bool WasmObjectFile::isValidGlobalSymbol(uint32_t Index) const {
return Index < Symbols.size() && Symbols[Index].isTypeGlobal();
}
@@ -1173,7 +1288,7 @@ wasm::WasmEvent &WasmObjectFile::getDefinedEvent(uint32_t Index) {
Error WasmObjectFile::parseStartSection(ReadContext &Ctx) {
StartFunction = readVaruint32(Ctx);
if (!isValidFunctionIndex(StartFunction))
- return make_error<GenericBinaryError>("Invalid start function",
+ return make_error<GenericBinaryError>("invalid start function",
object_error::parse_failed);
return Error::success();
}
@@ -1183,7 +1298,7 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
CodeSection = Sections.size();
uint32_t FunctionCount = readVaruint32(Ctx);
if (FunctionCount != FunctionTypes.size()) {
- return make_error<GenericBinaryError>("Invalid function count",
+ return make_error<GenericBinaryError>("invalid function count",
object_error::parse_failed);
}
@@ -1215,7 +1330,7 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
assert(Ctx.Ptr == FunctionEnd);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Code section ended prematurely",
+ return make_error<GenericBinaryError>("code section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -1227,7 +1342,7 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
wasm::WasmElemSegment Segment;
Segment.TableIndex = readVaruint32(Ctx);
if (Segment.TableIndex != 0) {
- return make_error<GenericBinaryError>("Invalid TableIndex",
+ return make_error<GenericBinaryError>("invalid TableIndex",
object_error::parse_failed);
}
if (Error Err = readInitExpr(Segment.Offset, Ctx))
@@ -1239,7 +1354,7 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
ElemSegments.push_back(Segment);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Elem section ended prematurely",
+ return make_error<GenericBinaryError>("elem section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -1249,14 +1364,16 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
if (DataCount && Count != DataCount.getValue())
return make_error<GenericBinaryError>(
- "Number of data segments does not match DataCount section");
+ "number of data segments does not match DataCount section");
DataSegments.reserve(Count);
while (Count--) {
WasmSegment Segment;
Segment.Data.InitFlags = readVaruint32(Ctx);
- Segment.Data.MemoryIndex = (Segment.Data.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX)
- ? readVaruint32(Ctx) : 0;
- if ((Segment.Data.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) {
+ Segment.Data.MemoryIndex =
+ (Segment.Data.InitFlags & wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX)
+ ? readVaruint32(Ctx)
+ : 0;
+ if ((Segment.Data.InitFlags & wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
if (Error Err = readInitExpr(Segment.Data.Offset, Ctx))
return Err;
} else {
@@ -1265,7 +1382,7 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
}
uint32_t Size = readVaruint32(Ctx);
if (Size > (size_t)(Ctx.End - Ctx.Ptr))
- return make_error<GenericBinaryError>("Invalid segment size",
+ return make_error<GenericBinaryError>("invalid segment size",
object_error::parse_failed);
Segment.Data.Content = ArrayRef<uint8_t>(Ctx.Ptr, Size);
// The rest of these Data fields are set later, when reading in the linking
@@ -1278,7 +1395,7 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
DataSegments.push_back(Segment);
}
if (Ctx.Ptr != Ctx.End)
- return make_error<GenericBinaryError>("Data section ended prematurely",
+ return make_error<GenericBinaryError>("data section ended prematurely",
object_error::parse_failed);
return Error::success();
}
@@ -1352,6 +1469,7 @@ uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {
case wasm::WASM_SYMBOL_TYPE_FUNCTION:
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
case wasm::WASM_SYMBOL_TYPE_EVENT:
+ case wasm::WASM_SYMBOL_TYPE_TABLE:
return Sym.Info.ElementIndex;
case wasm::WASM_SYMBOL_TYPE_DATA: {
// The value of a data symbol is the segment offset, plus the symbol
@@ -1401,9 +1519,11 @@ WasmObjectFile::getSymbolType(DataRefImpl Symb) const {
return SymbolRef::ST_Debug;
case wasm::WASM_SYMBOL_TYPE_EVENT:
return SymbolRef::ST_Other;
+ case wasm::WASM_SYMBOL_TYPE_TABLE:
+ return SymbolRef::ST_Other;
}
- llvm_unreachable("Unknown WasmSymbol::SymbolType");
+ llvm_unreachable("unknown WasmSymbol::SymbolType");
return SymbolRef::ST_Other;
}
@@ -1435,8 +1555,10 @@ uint32_t WasmObjectFile::getSymbolSectionIdImpl(const WasmSymbol &Sym) const {
return Sym.Info.ElementIndex;
case wasm::WASM_SYMBOL_TYPE_EVENT:
return EventSection;
+ case wasm::WASM_SYMBOL_TYPE_TABLE:
+ return TableSection;
default:
- llvm_unreachable("Unknown WasmSymbol::SymbolType");
+ llvm_unreachable("unknown WasmSymbol::SymbolType");
}
}
@@ -1576,11 +1698,15 @@ section_iterator WasmObjectFile::section_end() const {
return section_iterator(SectionRef(Ref, this));
}
-uint8_t WasmObjectFile::getBytesInAddress() const { return 4; }
+uint8_t WasmObjectFile::getBytesInAddress() const {
+ return HasMemory64 ? 8 : 4;
+}
StringRef WasmObjectFile::getFileFormatName() const { return "WASM"; }
-Triple::ArchType WasmObjectFile::getArch() const { return Triple::wasm32; }
+Triple::ArchType WasmObjectFile::getArch() const {
+ return HasMemory64 ? Triple::wasm64 : Triple::wasm32;
+}
SubtargetFeatures WasmObjectFile::getFeatures() const {
return SubtargetFeatures();
diff --git a/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp
index 533361666cf2..a16a458168d4 100644
--- a/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -12,10 +12,14 @@
#include "llvm/Object/XCOFFObjectFile.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/DataExtractor.h"
#include <cstddef>
#include <cstring>
namespace llvm {
+
+using namespace XCOFF;
+
namespace object {
static const uint8_t FunctionSym = 0x20;
@@ -27,7 +31,7 @@ static const uint16_t NoRelMask = 0x0001;
template <typename T>
static Expected<const T *> getObject(MemoryBufferRef M, const void *Ptr,
const uint64_t Size = sizeof(T)) {
- uintptr_t Addr = uintptr_t(Ptr);
+ uintptr_t Addr = reinterpret_cast<uintptr_t>(Ptr);
if (Error E = Binary::checkOffset(M, Addr, Size))
return std::move(E);
return reinterpret_cast<const T *>(Addr);
@@ -279,7 +283,7 @@ XCOFFObjectFile::getSectionContents(DataRefImpl Sec) const {
const uint8_t * ContentStart = base() + OffsetToRaw;
uint64_t SectionSize = getSectionSize(Sec);
- if (checkOffset(Data, uintptr_t(ContentStart), SectionSize))
+ if (checkOffset(Data, reinterpret_cast<uintptr_t>(ContentStart), SectionSize))
return make_error<BinaryError>();
return makeArrayRef(ContentStart,SectionSize);
@@ -651,7 +655,8 @@ XCOFFObjectFile::relocations(const XCOFFSectionHeader32 &Sec) const {
uint32_t NumRelocEntries = NumRelocEntriesOrErr.get();
- assert(sizeof(XCOFFRelocation32) == XCOFF::RelocationSerializationSize32);
+ static_assert(
+ sizeof(XCOFFRelocation32) == XCOFF::RelocationSerializationSize32, "");
auto RelocationOrErr =
getObject<XCOFFRelocation32>(Data, reinterpret_cast<void *>(RelocAddr),
NumRelocEntries * sizeof(XCOFFRelocation32));
@@ -834,5 +839,297 @@ bool XCOFFSymbolRef::isFunction() const {
template struct XCOFFSectionHeader<XCOFFSectionHeader32>;
template struct XCOFFSectionHeader<XCOFFSectionHeader64>;
+bool doesXCOFFTracebackTableBegin(ArrayRef<uint8_t> Bytes) {
+ if (Bytes.size() < 4)
+ return false;
+
+ return support::endian::read32be(Bytes.data()) == 0;
+}
+
+TBVectorExt::TBVectorExt(StringRef TBvectorStrRef) {
+ const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(TBvectorStrRef.data());
+ Data = support::endian::read16be(Ptr);
+ VecParmsInfo = support::endian::read32be(Ptr + 2);
+}
+
+#define GETVALUEWITHMASK(X) (Data & (TracebackTable::X))
+#define GETVALUEWITHMASKSHIFT(X, S) \
+ ((Data & (TracebackTable::X)) >> (TracebackTable::S))
+uint8_t TBVectorExt::getNumberOfVRSaved() const {
+ return GETVALUEWITHMASKSHIFT(NumberOfVRSavedMask, NumberOfVRSavedShift);
+}
+
+bool TBVectorExt::isVRSavedOnStack() const {
+ return GETVALUEWITHMASK(IsVRSavedOnStackMask);
+}
+
+bool TBVectorExt::hasVarArgs() const {
+ return GETVALUEWITHMASK(HasVarArgsMask);
+}
+uint8_t TBVectorExt::getNumberOfVectorParms() const {
+ return GETVALUEWITHMASKSHIFT(NumberOfVectorParmsMask,
+ NumberOfVectorParmsShift);
+}
+
+bool TBVectorExt::hasVMXInstruction() const {
+ return GETVALUEWITHMASK(HasVMXInstructionMask);
+}
+#undef GETVALUEWITHMASK
+#undef GETVALUEWITHMASKSHIFT
+
+SmallString<32> TBVectorExt::getVectorParmsInfoString() const {
+ SmallString<32> ParmsType;
+ uint32_t Value = VecParmsInfo;
+ for (uint8_t I = 0; I < getNumberOfVectorParms(); ++I) {
+ if (I != 0)
+ ParmsType += ", ";
+ switch (Value & TracebackTable::ParmTypeMask) {
+ case TracebackTable::ParmTypeIsVectorCharBit:
+ ParmsType += "vc";
+ break;
+
+ case TracebackTable::ParmTypeIsVectorShortBit:
+ ParmsType += "vs";
+ break;
+
+ case TracebackTable::ParmTypeIsVectorIntBit:
+ ParmsType += "vi";
+ break;
+
+ case TracebackTable::ParmTypeIsVectorFloatBit:
+ ParmsType += "vf";
+ break;
+ }
+ Value <<= 2;
+ }
+ return ParmsType;
+}
+
+static SmallString<32> parseParmsTypeWithVecInfo(uint32_t Value,
+ unsigned int ParmsNum) {
+ SmallString<32> ParmsType;
+ unsigned I = 0;
+ bool Begin = false;
+ while (I < ParmsNum || Value) {
+ if (Begin)
+ ParmsType += ", ";
+ else
+ Begin = true;
+
+ switch (Value & TracebackTable::ParmTypeMask) {
+ case TracebackTable::ParmTypeIsFixedBits:
+ ParmsType += "i";
+ ++I;
+ break;
+ case TracebackTable::ParmTypeIsVectorBits:
+ ParmsType += "v";
+ break;
+ case TracebackTable::ParmTypeIsFloatingBits:
+ ParmsType += "f";
+ ++I;
+ break;
+ case TracebackTable::ParmTypeIsDoubleBits:
+ ParmsType += "d";
+ ++I;
+ break;
+ default:
+ assert(false && "Unrecognized bits in ParmsType.");
+ }
+ Value <<= 2;
+ }
+ assert(I == ParmsNum &&
+ "The total parameters number of fixed-point or floating-point "
+ "parameters not equal to the number in the parameter type!");
+ return ParmsType;
+}
+
+Expected<XCOFFTracebackTable> XCOFFTracebackTable::create(const uint8_t *Ptr,
+ uint64_t &Size) {
+ Error Err = Error::success();
+ XCOFFTracebackTable TBT(Ptr, Size, Err);
+ if (Err)
+ return std::move(Err);
+ return TBT;
+}
+
+XCOFFTracebackTable::XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size,
+ Error &Err)
+ : TBPtr(Ptr) {
+ ErrorAsOutParameter EAO(&Err);
+ DataExtractor DE(ArrayRef<uint8_t>(Ptr, Size), /*IsLittleEndian=*/false,
+ /*AddressSize=*/0);
+ DataExtractor::Cursor Cur(/*Offset=*/0);
+
+ // Skip 8 bytes of mandatory fields.
+ DE.getU64(Cur);
+
+ // Begin to parse optional fields.
+ if (Cur) {
+ unsigned ParmNum = getNumberOfFixedParms() + getNumberOfFPParms();
+
+ // As long as there are no "fixed-point" or floating-point parameters, this
+ // field remains not present even when hasVectorInfo gives true and
+ // indicates the presence of vector parameters.
+ if (ParmNum > 0) {
+ uint32_t ParamsTypeValue = DE.getU32(Cur);
+ if (Cur)
+ ParmsType = hasVectorInfo()
+ ? parseParmsTypeWithVecInfo(ParamsTypeValue, ParmNum)
+ : parseParmsType(ParamsTypeValue, ParmNum);
+ }
+ }
+
+ if (Cur && hasTraceBackTableOffset())
+ TraceBackTableOffset = DE.getU32(Cur);
+
+ if (Cur && isInterruptHandler())
+ HandlerMask = DE.getU32(Cur);
+
+ if (Cur && hasControlledStorage()) {
+ NumOfCtlAnchors = DE.getU32(Cur);
+ if (Cur && NumOfCtlAnchors) {
+ SmallVector<uint32_t, 8> Disp;
+ Disp.reserve(NumOfCtlAnchors.getValue());
+ for (uint32_t I = 0; I < NumOfCtlAnchors && Cur; ++I)
+ Disp.push_back(DE.getU32(Cur));
+ if (Cur)
+ ControlledStorageInfoDisp = std::move(Disp);
+ }
+ }
+
+ if (Cur && isFuncNamePresent()) {
+ uint16_t FunctionNameLen = DE.getU16(Cur);
+ if (Cur)
+ FunctionName = DE.getBytes(Cur, FunctionNameLen);
+ }
+
+ if (Cur && isAllocaUsed())
+ AllocaRegister = DE.getU8(Cur);
+
+ if (Cur && hasVectorInfo()) {
+ StringRef VectorExtRef = DE.getBytes(Cur, 6);
+ if (Cur)
+ VecExt = TBVectorExt(VectorExtRef);
+ }
+
+ if (Cur && hasExtensionTable())
+ ExtensionTable = DE.getU8(Cur);
+
+ if (!Cur)
+ Err = Cur.takeError();
+ Size = Cur.tell();
+}
+
+#define GETBITWITHMASK(P, X) \
+ (support::endian::read32be(TBPtr + (P)) & (TracebackTable::X))
+#define GETBITWITHMASKSHIFT(P, X, S) \
+ ((support::endian::read32be(TBPtr + (P)) & (TracebackTable::X)) >> \
+ (TracebackTable::S))
+
+uint8_t XCOFFTracebackTable::getVersion() const {
+ return GETBITWITHMASKSHIFT(0, VersionMask, VersionShift);
+}
+
+uint8_t XCOFFTracebackTable::getLanguageID() const {
+ return GETBITWITHMASKSHIFT(0, LanguageIdMask, LanguageIdShift);
+}
+
+bool XCOFFTracebackTable::isGlobalLinkage() const {
+ return GETBITWITHMASK(0, IsGlobaLinkageMask);
+}
+
+bool XCOFFTracebackTable::isOutOfLineEpilogOrPrologue() const {
+ return GETBITWITHMASK(0, IsOutOfLineEpilogOrPrologueMask);
+}
+
+bool XCOFFTracebackTable::hasTraceBackTableOffset() const {
+ return GETBITWITHMASK(0, HasTraceBackTableOffsetMask);
+}
+
+bool XCOFFTracebackTable::isInternalProcedure() const {
+ return GETBITWITHMASK(0, IsInternalProcedureMask);
+}
+
+bool XCOFFTracebackTable::hasControlledStorage() const {
+ return GETBITWITHMASK(0, HasControlledStorageMask);
+}
+
+bool XCOFFTracebackTable::isTOCless() const {
+ return GETBITWITHMASK(0, IsTOClessMask);
+}
+
+bool XCOFFTracebackTable::isFloatingPointPresent() const {
+ return GETBITWITHMASK(0, IsFloatingPointPresentMask);
+}
+
+bool XCOFFTracebackTable::isFloatingPointOperationLogOrAbortEnabled() const {
+ return GETBITWITHMASK(0, IsFloatingPointOperationLogOrAbortEnabledMask);
+}
+
+bool XCOFFTracebackTable::isInterruptHandler() const {
+ return GETBITWITHMASK(0, IsInterruptHandlerMask);
+}
+
+bool XCOFFTracebackTable::isFuncNamePresent() const {
+ return GETBITWITHMASK(0, IsFunctionNamePresentMask);
+}
+
+bool XCOFFTracebackTable::isAllocaUsed() const {
+ return GETBITWITHMASK(0, IsAllocaUsedMask);
+}
+
+uint8_t XCOFFTracebackTable::getOnConditionDirective() const {
+ return GETBITWITHMASKSHIFT(0, OnConditionDirectiveMask,
+ OnConditionDirectiveShift);
+}
+
+bool XCOFFTracebackTable::isCRSaved() const {
+ return GETBITWITHMASK(0, IsCRSavedMask);
+}
+
+bool XCOFFTracebackTable::isLRSaved() const {
+ return GETBITWITHMASK(0, IsLRSavedMask);
+}
+
+bool XCOFFTracebackTable::isBackChainStored() const {
+ return GETBITWITHMASK(4, IsBackChainStoredMask);
+}
+
+bool XCOFFTracebackTable::isFixup() const {
+ return GETBITWITHMASK(4, IsFixupMask);
+}
+
+uint8_t XCOFFTracebackTable::getNumOfFPRsSaved() const {
+ return GETBITWITHMASKSHIFT(4, FPRSavedMask, FPRSavedShift);
+}
+
+bool XCOFFTracebackTable::hasExtensionTable() const {
+ return GETBITWITHMASK(4, HasExtensionTableMask);
+}
+
+bool XCOFFTracebackTable::hasVectorInfo() const {
+ return GETBITWITHMASK(4, HasVectorInfoMask);
+}
+
+uint8_t XCOFFTracebackTable::getNumOfGPRsSaved() const {
+ return GETBITWITHMASKSHIFT(4, GPRSavedMask, GPRSavedShift);
+}
+
+uint8_t XCOFFTracebackTable::getNumberOfFixedParms() const {
+ return GETBITWITHMASKSHIFT(4, NumberOfFixedParmsMask,
+ NumberOfFixedParmsShift);
+}
+
+uint8_t XCOFFTracebackTable::getNumberOfFPParms() const {
+ return GETBITWITHMASKSHIFT(4, NumberOfFloatingPointParmsMask,
+ NumberOfFloatingPointParmsShift);
+}
+
+bool XCOFFTracebackTable::hasParmsOnStack() const {
+ return GETBITWITHMASK(4, HasParmsOnStackMask);
+}
+
+#undef GETBITWITHMASK
+#undef GETBITWITHMASKSHIFT
} // namespace object
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ArchiveEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ArchiveEmitter.cpp
new file mode 100644
index 000000000000..a0cf8fe360da
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ArchiveEmitter.cpp
@@ -0,0 +1,51 @@
+//===- ArchiveEmitter.cpp ---------------------------- --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/ArchiveYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace ArchYAML;
+
+namespace llvm {
+namespace yaml {
+
+bool yaml2archive(ArchYAML::Archive &Doc, raw_ostream &Out, ErrorHandler EH) {
+ Out.write(Doc.Magic.data(), Doc.Magic.size());
+
+ if (Doc.Content) {
+ Doc.Content->writeAsBinary(Out);
+ return true;
+ }
+
+ if (!Doc.Members)
+ return true;
+
+ auto WriteField = [&](StringRef Field, uint8_t Size) {
+ Out.write(Field.data(), Field.size());
+ for (size_t I = Field.size(); I != Size; ++I)
+ Out.write(' ');
+ };
+
+ for (const Archive::Child &C : *Doc.Members) {
+ for (auto &P : C.Fields)
+ WriteField(P.second.Value, P.second.MaxLength);
+
+ if (C.Content)
+ C.Content->writeAsBinary(Out);
+ if (C.PaddingByte)
+ Out.write(*C.PaddingByte);
+ }
+
+ return true;
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ArchiveYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ArchiveYAML.cpp
new file mode 100644
index 000000000000..d2ea1eaf5210
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ArchiveYAML.cpp
@@ -0,0 +1,58 @@
+//===- ArchiveYAML.cpp - ELF YAMLIO implementation -------------------- ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of archives.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/ArchiveYAML.h"
+
+namespace llvm {
+
+namespace yaml {
+
+void MappingTraits<ArchYAML::Archive>::mapping(IO &IO, ArchYAML::Archive &A) {
+ assert(!IO.getContext() && "The IO context is initialized already");
+ IO.setContext(&A);
+ IO.mapTag("!Arch", true);
+ IO.mapOptional("Magic", A.Magic, "!<arch>\n");
+ IO.mapOptional("Members", A.Members);
+ IO.mapOptional("Content", A.Content);
+ IO.setContext(nullptr);
+}
+
+std::string MappingTraits<ArchYAML::Archive>::validate(IO &,
+ ArchYAML::Archive &A) {
+ if (A.Members && A.Content)
+ return "\"Content\" and \"Members\" cannot be used together";
+ return "";
+}
+
+void MappingTraits<ArchYAML::Archive::Child>::mapping(
+ IO &IO, ArchYAML::Archive::Child &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ for (auto &P : E.Fields)
+ IO.mapOptional(P.first.data(), P.second.Value, P.second.DefaultValue);
+ IO.mapOptional("Content", E.Content);
+ IO.mapOptional("PaddingByte", E.PaddingByte);
+}
+
+std::string
+MappingTraits<ArchYAML::Archive::Child>::validate(IO &,
+ ArchYAML::Archive::Child &C) {
+ for (auto &P : C.Fields)
+ if (P.second.Value.size() > P.second.MaxLength)
+ return ("the maximum length of \"" + P.first + "\" field is " +
+ Twine(P.second.MaxLength))
+ .str();
+ return "";
+}
+
+} // end namespace yaml
+
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
index 734e1be4b2d5..06ce93affd38 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
#include "llvm/Object/COFF.h"
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 95409fdc3300..6b6a1176628b 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -147,7 +147,30 @@ void ScalarEnumerationTraits<CPUType>::enumeration(IO &io, CPUType &Cpu) {
}
void ScalarEnumerationTraits<RegisterId>::enumeration(IO &io, RegisterId &Reg) {
- auto RegNames = getRegisterNames(CPUType::X64);
+ const auto *Header = static_cast<COFF::header *>(io.getContext());
+ assert(Header && "The IO context is not initialized");
+
+ Optional<CPUType> CpuType;
+ ArrayRef<EnumEntry<uint16_t>> RegNames;
+
+ switch (Header->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ CpuType = CPUType::Pentium3;
+ break;
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ CpuType = CPUType::X64;
+ break;
+ case COFF::IMAGE_FILE_MACHINE_ARMNT:
+ CpuType = CPUType::ARMNT;
+ break;
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ CpuType = CPUType::ARM64;
+ break;
+ }
+
+ if (CpuType)
+ RegNames = getRegisterNames(*CpuType);
+
for (const auto &E : RegNames) {
io.enumCase(Reg, E.Name.str().c_str(), static_cast<RegisterId>(E.Value));
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp
index ed3732ba29f6..eec733c7d7f9 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp
@@ -12,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/DWARFEmitter.h"
-#include "DWARFVisitor.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/ObjectYAML/DWARFYAML.h"
#include "llvm/Support/Errc.h"
@@ -62,18 +63,10 @@ static Error writeVariableSizedInteger(uint64_t Integer, size_t Size,
}
static void ZeroFillBytes(raw_ostream &OS, size_t Size) {
- std::vector<uint8_t> FillData;
- FillData.insert(FillData.begin(), Size, 0);
+ std::vector<uint8_t> FillData(Size, 0);
OS.write(reinterpret_cast<char *>(FillData.data()), Size);
}
-static void writeInitialLength(const DWARFYAML::InitialLength &Length,
- raw_ostream &OS, bool IsLittleEndian) {
- writeInteger((uint32_t)Length.TotalLength, OS, IsLittleEndian);
- if (Length.isDWARF64())
- writeInteger((uint64_t)Length.TotalLength64, OS, IsLittleEndian);
-}
-
static void writeInitialLength(const dwarf::DwarfFormat Format,
const uint64_t Length, raw_ostream &OS,
bool IsLittleEndian) {
@@ -85,8 +78,14 @@ static void writeInitialLength(const dwarf::DwarfFormat Format,
writeVariableSizedInteger(Length, IsDWARF64 ? 8 : 4, OS, IsLittleEndian));
}
+static void writeDWARFOffset(uint64_t Offset, dwarf::DwarfFormat Format,
+ raw_ostream &OS, bool IsLittleEndian) {
+ cantFail(writeVariableSizedInteger(Offset, Format == dwarf::DWARF64 ? 8 : 4,
+ OS, IsLittleEndian));
+}
+
Error DWARFYAML::emitDebugStr(raw_ostream &OS, const DWARFYAML::Data &DI) {
- for (auto Str : DI.DebugStrings) {
+ for (StringRef Str : *DI.DebugStrings) {
OS.write(Str.data(), Str.size());
OS.write('\0');
}
@@ -94,14 +93,23 @@ Error DWARFYAML::emitDebugStr(raw_ostream &OS, const DWARFYAML::Data &DI) {
return Error::success();
}
-Error DWARFYAML::emitDebugAbbrev(raw_ostream &OS, const DWARFYAML::Data &DI) {
+StringRef DWARFYAML::Data::getAbbrevTableContentByIndex(uint64_t Index) const {
+ assert(Index < DebugAbbrev.size() &&
+ "Index should be less than the size of DebugAbbrev array");
+ auto It = AbbrevTableContents.find(Index);
+ if (It != AbbrevTableContents.cend())
+ return It->second;
+
+ std::string AbbrevTableBuffer;
+ raw_string_ostream OS(AbbrevTableBuffer);
+
uint64_t AbbrevCode = 0;
- for (auto AbbrevDecl : DI.AbbrevDecls) {
+ for (const DWARFYAML::Abbrev &AbbrevDecl : DebugAbbrev[Index].Table) {
AbbrevCode = AbbrevDecl.Code ? (uint64_t)*AbbrevDecl.Code : AbbrevCode + 1;
encodeULEB128(AbbrevCode, OS);
encodeULEB128(AbbrevDecl.Tag, OS);
OS.write(AbbrevDecl.Children);
- for (auto Attr : AbbrevDecl.Attributes) {
+ for (const auto &Attr : AbbrevDecl.Attributes) {
encodeULEB128(Attr.Attribute, OS);
encodeULEB128(Attr.Form, OS);
if (Attr.Form == dwarf::DW_FORM_implicit_const)
@@ -111,39 +119,68 @@ Error DWARFYAML::emitDebugAbbrev(raw_ostream &OS, const DWARFYAML::Data &DI) {
encodeULEB128(0, OS);
}
- // The abbreviations for a given compilation unit end with an entry consisting
- // of a 0 byte for the abbreviation code.
+ // The abbreviations for a given compilation unit end with an entry
+ // consisting of a 0 byte for the abbreviation code.
OS.write_zeros(1);
+ AbbrevTableContents.insert({Index, AbbrevTableBuffer});
+
+ return AbbrevTableContents[Index];
+}
+
+Error DWARFYAML::emitDebugAbbrev(raw_ostream &OS, const DWARFYAML::Data &DI) {
+ for (uint64_t I = 0; I < DI.DebugAbbrev.size(); ++I) {
+ StringRef AbbrevTableContent = DI.getAbbrevTableContentByIndex(I);
+ OS.write(AbbrevTableContent.data(), AbbrevTableContent.size());
+ }
+
return Error::success();
}
Error DWARFYAML::emitDebugAranges(raw_ostream &OS, const DWARFYAML::Data &DI) {
- for (auto Range : DI.ARanges) {
- auto HeaderStart = OS.tell();
- writeInitialLength(Range.Format, Range.Length, OS, DI.IsLittleEndian);
- writeInteger((uint16_t)Range.Version, OS, DI.IsLittleEndian);
- if (Range.Format == dwarf::DWARF64)
- writeInteger((uint64_t)Range.CuOffset, OS, DI.IsLittleEndian);
+ assert(DI.DebugAranges && "unexpected emitDebugAranges() call");
+ for (const auto &Range : *DI.DebugAranges) {
+ uint8_t AddrSize;
+ if (Range.AddrSize)
+ AddrSize = *Range.AddrSize;
else
- writeInteger((uint32_t)Range.CuOffset, OS, DI.IsLittleEndian);
- writeInteger((uint8_t)Range.AddrSize, OS, DI.IsLittleEndian);
- writeInteger((uint8_t)Range.SegSize, OS, DI.IsLittleEndian);
+ AddrSize = DI.Is64BitAddrSize ? 8 : 4;
+
+ uint64_t Length = 4; // sizeof(version) 2 + sizeof(address_size) 1 +
+ // sizeof(segment_selector_size) 1
+ Length +=
+ Range.Format == dwarf::DWARF64 ? 8 : 4; // sizeof(debug_info_offset)
- auto HeaderSize = OS.tell() - HeaderStart;
- auto FirstDescriptor = alignTo(HeaderSize, Range.AddrSize * 2);
- ZeroFillBytes(OS, FirstDescriptor - HeaderSize);
+ const uint64_t HeaderLength =
+ Length + (Range.Format == dwarf::DWARF64
+ ? 12
+ : 4); // sizeof(unit_header) = 12 (DWARF64) or 4 (DWARF32)
+ const uint64_t PaddedHeaderLength = alignTo(HeaderLength, AddrSize * 2);
+
+ if (Range.Length) {
+ Length = *Range.Length;
+ } else {
+ Length += PaddedHeaderLength - HeaderLength;
+ Length += AddrSize * 2 * (Range.Descriptors.size() + 1);
+ }
+
+ writeInitialLength(Range.Format, Length, OS, DI.IsLittleEndian);
+ writeInteger((uint16_t)Range.Version, OS, DI.IsLittleEndian);
+ writeDWARFOffset(Range.CuOffset, Range.Format, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)AddrSize, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)Range.SegSize, OS, DI.IsLittleEndian);
+ ZeroFillBytes(OS, PaddedHeaderLength - HeaderLength);
- for (auto Descriptor : Range.Descriptors) {
- if (Error Err = writeVariableSizedInteger(
- Descriptor.Address, Range.AddrSize, OS, DI.IsLittleEndian))
+ for (const auto &Descriptor : Range.Descriptors) {
+ if (Error Err = writeVariableSizedInteger(Descriptor.Address, AddrSize,
+ OS, DI.IsLittleEndian))
return createStringError(errc::not_supported,
"unable to write debug_aranges address: %s",
toString(std::move(Err)).c_str());
- cantFail(writeVariableSizedInteger(Descriptor.Length, Range.AddrSize, OS,
+ cantFail(writeVariableSizedInteger(Descriptor.Length, AddrSize, OS,
DI.IsLittleEndian));
}
- ZeroFillBytes(OS, Range.AddrSize * 2);
+ ZeroFillBytes(OS, AddrSize * 2);
}
return Error::success();
@@ -152,7 +189,7 @@ Error DWARFYAML::emitDebugAranges(raw_ostream &OS, const DWARFYAML::Data &DI) {
Error DWARFYAML::emitDebugRanges(raw_ostream &OS, const DWARFYAML::Data &DI) {
const size_t RangesOffset = OS.tell();
uint64_t EntryIndex = 0;
- for (auto DebugRanges : DI.DebugRanges) {
+ for (const auto &DebugRanges : *DI.DebugRanges) {
const size_t CurrOffset = OS.tell() - RangesOffset;
if (DebugRanges.Offset && (uint64_t)*DebugRanges.Offset < CurrOffset)
return createStringError(errc::invalid_argument,
@@ -169,7 +206,7 @@ Error DWARFYAML::emitDebugRanges(raw_ostream &OS, const DWARFYAML::Data &DI) {
AddrSize = *DebugRanges.AddrSize;
else
AddrSize = DI.Is64BitAddrSize ? 8 : 4;
- for (auto Entry : DebugRanges.Entries) {
+ for (const auto &Entry : DebugRanges.Entries) {
if (Error Err = writeVariableSizedInteger(Entry.LowOffset, AddrSize, OS,
DI.IsLittleEndian))
return createStringError(
@@ -186,97 +223,252 @@ Error DWARFYAML::emitDebugRanges(raw_ostream &OS, const DWARFYAML::Data &DI) {
return Error::success();
}
-Error DWARFYAML::emitPubSection(raw_ostream &OS,
- const DWARFYAML::PubSection &Sect,
- bool IsLittleEndian, bool IsGNUPubSec) {
- writeInitialLength(Sect.Length, OS, IsLittleEndian);
+static Error emitPubSection(raw_ostream &OS, const DWARFYAML::PubSection &Sect,
+ bool IsLittleEndian, bool IsGNUPubSec = false) {
+ writeInitialLength(Sect.Format, Sect.Length, OS, IsLittleEndian);
writeInteger((uint16_t)Sect.Version, OS, IsLittleEndian);
writeInteger((uint32_t)Sect.UnitOffset, OS, IsLittleEndian);
writeInteger((uint32_t)Sect.UnitSize, OS, IsLittleEndian);
- for (auto Entry : Sect.Entries) {
+ for (const auto &Entry : Sect.Entries) {
writeInteger((uint32_t)Entry.DieOffset, OS, IsLittleEndian);
if (IsGNUPubSec)
writeInteger((uint8_t)Entry.Descriptor, OS, IsLittleEndian);
OS.write(Entry.Name.data(), Entry.Name.size());
OS.write('\0');
}
-
return Error::success();
}
-namespace {
-/// An extension of the DWARFYAML::ConstVisitor which writes compile
-/// units and DIEs to a stream.
-class DumpVisitor : public DWARFYAML::ConstVisitor {
- raw_ostream &OS;
-
-protected:
- void onStartCompileUnit(const DWARFYAML::Unit &CU) override {
- writeInitialLength(CU.Format, CU.Length, OS, DebugInfo.IsLittleEndian);
- writeInteger((uint16_t)CU.Version, OS, DebugInfo.IsLittleEndian);
- if (CU.Version >= 5) {
- writeInteger((uint8_t)CU.Type, OS, DebugInfo.IsLittleEndian);
- writeInteger((uint8_t)CU.AddrSize, OS, DebugInfo.IsLittleEndian);
- cantFail(writeVariableSizedInteger(CU.AbbrOffset,
- CU.Format == dwarf::DWARF64 ? 8 : 4,
- OS, DebugInfo.IsLittleEndian));
- } else {
- cantFail(writeVariableSizedInteger(CU.AbbrOffset,
- CU.Format == dwarf::DWARF64 ? 8 : 4,
- OS, DebugInfo.IsLittleEndian));
- writeInteger((uint8_t)CU.AddrSize, OS, DebugInfo.IsLittleEndian);
- }
- }
+Error DWARFYAML::emitDebugPubnames(raw_ostream &OS, const Data &DI) {
+ assert(DI.PubNames && "unexpected emitDebugPubnames() call");
+ return emitPubSection(OS, *DI.PubNames, DI.IsLittleEndian);
+}
- void onStartDIE(const DWARFYAML::Unit &CU,
- const DWARFYAML::Entry &DIE) override {
- encodeULEB128(DIE.AbbrCode, OS);
- }
+Error DWARFYAML::emitDebugPubtypes(raw_ostream &OS, const Data &DI) {
+ assert(DI.PubTypes && "unexpected emitDebugPubtypes() call");
+ return emitPubSection(OS, *DI.PubTypes, DI.IsLittleEndian);
+}
- void onValue(const uint8_t U) override {
- writeInteger(U, OS, DebugInfo.IsLittleEndian);
- }
+Error DWARFYAML::emitDebugGNUPubnames(raw_ostream &OS, const Data &DI) {
+ assert(DI.GNUPubNames && "unexpected emitDebugGNUPubnames() call");
+ return emitPubSection(OS, *DI.GNUPubNames, DI.IsLittleEndian,
+ /*IsGNUStyle=*/true);
+}
- void onValue(const uint16_t U) override {
- writeInteger(U, OS, DebugInfo.IsLittleEndian);
- }
+Error DWARFYAML::emitDebugGNUPubtypes(raw_ostream &OS, const Data &DI) {
+ assert(DI.GNUPubTypes && "unexpected emitDebugGNUPubtypes() call");
+ return emitPubSection(OS, *DI.GNUPubTypes, DI.IsLittleEndian,
+ /*IsGNUStyle=*/true);
+}
- void onValue(const uint32_t U) override {
- writeInteger(U, OS, DebugInfo.IsLittleEndian);
+static Expected<uint64_t> writeDIE(const DWARFYAML::Data &DI, uint64_t CUIndex,
+ uint64_t AbbrevTableID,
+ const dwarf::FormParams &Params,
+ const DWARFYAML::Entry &Entry,
+ raw_ostream &OS, bool IsLittleEndian) {
+ uint64_t EntryBegin = OS.tell();
+ encodeULEB128(Entry.AbbrCode, OS);
+ uint32_t AbbrCode = Entry.AbbrCode;
+ if (AbbrCode == 0 || Entry.Values.empty())
+ return OS.tell() - EntryBegin;
+
+ Expected<DWARFYAML::Data::AbbrevTableInfo> AbbrevTableInfoOrErr =
+ DI.getAbbrevTableInfoByID(AbbrevTableID);
+ if (!AbbrevTableInfoOrErr)
+ return createStringError(errc::invalid_argument,
+ toString(AbbrevTableInfoOrErr.takeError()) +
+ " for compilation unit with index " +
+ utostr(CUIndex));
+
+ ArrayRef<DWARFYAML::Abbrev> AbbrevDecls(
+ DI.DebugAbbrev[AbbrevTableInfoOrErr->Index].Table);
+
+ if (AbbrCode > AbbrevDecls.size())
+ return createStringError(
+ errc::invalid_argument,
+ "abbrev code must be less than or equal to the number of "
+ "entries in abbreviation table");
+ const DWARFYAML::Abbrev &Abbrev = AbbrevDecls[AbbrCode - 1];
+ auto FormVal = Entry.Values.begin();
+ auto AbbrForm = Abbrev.Attributes.begin();
+ for (; FormVal != Entry.Values.end() && AbbrForm != Abbrev.Attributes.end();
+ ++FormVal, ++AbbrForm) {
+ dwarf::Form Form = AbbrForm->Form;
+ bool Indirect;
+ do {
+ Indirect = false;
+ switch (Form) {
+ case dwarf::DW_FORM_addr:
+ // TODO: Test this error.
+ if (Error Err = writeVariableSizedInteger(
+ FormVal->Value, Params.AddrSize, OS, IsLittleEndian))
+ return std::move(Err);
+ break;
+ case dwarf::DW_FORM_ref_addr:
+ // TODO: Test this error.
+ if (Error Err = writeVariableSizedInteger(FormVal->Value,
+ Params.getRefAddrByteSize(),
+ OS, IsLittleEndian))
+ return std::move(Err);
+ break;
+ case dwarf::DW_FORM_exprloc:
+ case dwarf::DW_FORM_block:
+ encodeULEB128(FormVal->BlockData.size(), OS);
+ OS.write((const char *)FormVal->BlockData.data(),
+ FormVal->BlockData.size());
+ break;
+ case dwarf::DW_FORM_block1: {
+ writeInteger((uint8_t)FormVal->BlockData.size(), OS, IsLittleEndian);
+ OS.write((const char *)FormVal->BlockData.data(),
+ FormVal->BlockData.size());
+ break;
+ }
+ case dwarf::DW_FORM_block2: {
+ writeInteger((uint16_t)FormVal->BlockData.size(), OS, IsLittleEndian);
+ OS.write((const char *)FormVal->BlockData.data(),
+ FormVal->BlockData.size());
+ break;
+ }
+ case dwarf::DW_FORM_block4: {
+ writeInteger((uint32_t)FormVal->BlockData.size(), OS, IsLittleEndian);
+ OS.write((const char *)FormVal->BlockData.data(),
+ FormVal->BlockData.size());
+ break;
+ }
+ case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_addrx:
+ case dwarf::DW_FORM_rnglistx:
+ case dwarf::DW_FORM_loclistx:
+ case dwarf::DW_FORM_udata:
+ case dwarf::DW_FORM_ref_udata:
+ case dwarf::DW_FORM_GNU_addr_index:
+ case dwarf::DW_FORM_GNU_str_index:
+ encodeULEB128(FormVal->Value, OS);
+ break;
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_ref1:
+ case dwarf::DW_FORM_flag:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_addrx1:
+ writeInteger((uint8_t)FormVal->Value, OS, IsLittleEndian);
+ break;
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_ref2:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_addrx2:
+ writeInteger((uint16_t)FormVal->Value, OS, IsLittleEndian);
+ break;
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_ref4:
+ case dwarf::DW_FORM_ref_sup4:
+ case dwarf::DW_FORM_strx4:
+ case dwarf::DW_FORM_addrx4:
+ writeInteger((uint32_t)FormVal->Value, OS, IsLittleEndian);
+ break;
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_ref8:
+ case dwarf::DW_FORM_ref_sup8:
+ case dwarf::DW_FORM_ref_sig8:
+ writeInteger((uint64_t)FormVal->Value, OS, IsLittleEndian);
+ break;
+ case dwarf::DW_FORM_sdata:
+ encodeSLEB128(FormVal->Value, OS);
+ break;
+ case dwarf::DW_FORM_string:
+ OS.write(FormVal->CStr.data(), FormVal->CStr.size());
+ OS.write('\0');
+ break;
+ case dwarf::DW_FORM_indirect:
+ encodeULEB128(FormVal->Value, OS);
+ Indirect = true;
+ Form = static_cast<dwarf::Form>((uint64_t)FormVal->Value);
+ ++FormVal;
+ break;
+ case dwarf::DW_FORM_strp:
+ case dwarf::DW_FORM_sec_offset:
+ case dwarf::DW_FORM_GNU_ref_alt:
+ case dwarf::DW_FORM_GNU_strp_alt:
+ case dwarf::DW_FORM_line_strp:
+ case dwarf::DW_FORM_strp_sup:
+ cantFail(writeVariableSizedInteger(FormVal->Value,
+ Params.getDwarfOffsetByteSize(), OS,
+ IsLittleEndian));
+ break;
+ default:
+ break;
+ }
+ } while (Indirect);
}
- void onValue(const uint64_t U, const bool LEB = false) override {
- if (LEB)
- encodeULEB128(U, OS);
- else
- writeInteger(U, OS, DebugInfo.IsLittleEndian);
- }
+ return OS.tell() - EntryBegin;
+}
- void onValue(const int64_t S, const bool LEB = false) override {
- if (LEB)
- encodeSLEB128(S, OS);
+Error DWARFYAML::emitDebugInfo(raw_ostream &OS, const DWARFYAML::Data &DI) {
+ for (uint64_t I = 0; I < DI.CompileUnits.size(); ++I) {
+ const DWARFYAML::Unit &Unit = DI.CompileUnits[I];
+ uint8_t AddrSize;
+ if (Unit.AddrSize)
+ AddrSize = *Unit.AddrSize;
else
- writeInteger(S, OS, DebugInfo.IsLittleEndian);
- }
+ AddrSize = DI.Is64BitAddrSize ? 8 : 4;
+ dwarf::FormParams Params = {Unit.Version, AddrSize, Unit.Format};
+ uint64_t Length = 3; // sizeof(version) + sizeof(address_size)
+ Length += Unit.Version >= 5 ? 1 : 0; // sizeof(unit_type)
+ Length += Params.getDwarfOffsetByteSize(); // sizeof(debug_abbrev_offset)
+
+ // Since the length of the current compilation unit is undetermined yet, we
+ // firstly write the content of the compilation unit to a buffer to
+ // calculate it and then serialize the buffer content to the actual output
+ // stream.
+ std::string EntryBuffer;
+ raw_string_ostream EntryBufferOS(EntryBuffer);
+
+ uint64_t AbbrevTableID = Unit.AbbrevTableID.getValueOr(I);
+ for (const DWARFYAML::Entry &Entry : Unit.Entries) {
+ if (Expected<uint64_t> EntryLength =
+ writeDIE(DI, I, AbbrevTableID, Params, Entry, EntryBufferOS,
+ DI.IsLittleEndian))
+ Length += *EntryLength;
+ else
+ return EntryLength.takeError();
+ }
- void onValue(const StringRef String) override {
- OS.write(String.data(), String.size());
- OS.write('\0');
- }
+ // If the length is specified in the YAML description, we use it instead of
+ // the actual length.
+ if (Unit.Length)
+ Length = *Unit.Length;
- void onValue(const MemoryBufferRef MBR) override {
- OS.write(MBR.getBufferStart(), MBR.getBufferSize());
- }
+ writeInitialLength(Unit.Format, Length, OS, DI.IsLittleEndian);
+ writeInteger((uint16_t)Unit.Version, OS, DI.IsLittleEndian);
+
+ uint64_t AbbrevTableOffset = 0;
+ if (Unit.AbbrOffset) {
+ AbbrevTableOffset = *Unit.AbbrOffset;
+ } else {
+ if (Expected<DWARFYAML::Data::AbbrevTableInfo> AbbrevTableInfoOrErr =
+ DI.getAbbrevTableInfoByID(AbbrevTableID)) {
+ AbbrevTableOffset = AbbrevTableInfoOrErr->Offset;
+ } else {
+ // The current compilation unit may not have DIEs and it will not be
+ // able to find the associated abbrev table. We consume the error and
+ // assign 0 to the debug_abbrev_offset in such circumstances.
+ consumeError(AbbrevTableInfoOrErr.takeError());
+ }
+ }
-public:
- DumpVisitor(const DWARFYAML::Data &DI, raw_ostream &Out)
- : DWARFYAML::ConstVisitor(DI), OS(Out) {}
-};
-} // namespace
+ if (Unit.Version >= 5) {
+ writeInteger((uint8_t)Unit.Type, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)AddrSize, OS, DI.IsLittleEndian);
+ writeDWARFOffset(AbbrevTableOffset, Unit.Format, OS, DI.IsLittleEndian);
+ } else {
+ writeDWARFOffset(AbbrevTableOffset, Unit.Format, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)AddrSize, OS, DI.IsLittleEndian);
+ }
-Error DWARFYAML::emitDebugInfo(raw_ostream &OS, const DWARFYAML::Data &DI) {
- DumpVisitor Visitor(DI, OS);
- return Visitor.traverseDebugInfo();
+ OS.write(EntryBuffer.data(), EntryBuffer.size());
+ }
+
+ return Error::success();
}
static void emitFileEntry(raw_ostream &OS, const DWARFYAML::File &File) {
@@ -287,96 +479,156 @@ static void emitFileEntry(raw_ostream &OS, const DWARFYAML::File &File) {
encodeULEB128(File.Length, OS);
}
+static void writeExtendedOpcode(const DWARFYAML::LineTableOpcode &Op,
+ uint8_t AddrSize, bool IsLittleEndian,
+ raw_ostream &OS) {
+ // The first byte of extended opcodes is a zero byte. The next bytes are an
+ // ULEB128 integer giving the number of bytes in the instruction itself (does
+ // not include the first zero byte or the size). We serialize the instruction
+ // itself into the OpBuffer and then write the size of the buffer and the
+ // buffer to the real output stream.
+ std::string OpBuffer;
+ raw_string_ostream OpBufferOS(OpBuffer);
+ writeInteger((uint8_t)Op.SubOpcode, OpBufferOS, IsLittleEndian);
+ switch (Op.SubOpcode) {
+ case dwarf::DW_LNE_set_address:
+ cantFail(writeVariableSizedInteger(Op.Data, AddrSize, OpBufferOS,
+ IsLittleEndian));
+ break;
+ case dwarf::DW_LNE_define_file:
+ emitFileEntry(OpBufferOS, Op.FileEntry);
+ break;
+ case dwarf::DW_LNE_set_discriminator:
+ encodeULEB128(Op.Data, OpBufferOS);
+ break;
+ case dwarf::DW_LNE_end_sequence:
+ break;
+ default:
+ for (auto OpByte : Op.UnknownOpcodeData)
+ writeInteger((uint8_t)OpByte, OpBufferOS, IsLittleEndian);
+ }
+ uint64_t ExtLen = Op.ExtLen.getValueOr(OpBuffer.size());
+ encodeULEB128(ExtLen, OS);
+ OS.write(OpBuffer.data(), OpBuffer.size());
+}
+
+static void writeLineTableOpcode(const DWARFYAML::LineTableOpcode &Op,
+ uint8_t OpcodeBase, uint8_t AddrSize,
+ raw_ostream &OS, bool IsLittleEndian) {
+ writeInteger((uint8_t)Op.Opcode, OS, IsLittleEndian);
+ if (Op.Opcode == 0) {
+ writeExtendedOpcode(Op, AddrSize, IsLittleEndian, OS);
+ } else if (Op.Opcode < OpcodeBase) {
+ switch (Op.Opcode) {
+ case dwarf::DW_LNS_copy:
+ case dwarf::DW_LNS_negate_stmt:
+ case dwarf::DW_LNS_set_basic_block:
+ case dwarf::DW_LNS_const_add_pc:
+ case dwarf::DW_LNS_set_prologue_end:
+ case dwarf::DW_LNS_set_epilogue_begin:
+ break;
+
+ case dwarf::DW_LNS_advance_pc:
+ case dwarf::DW_LNS_set_file:
+ case dwarf::DW_LNS_set_column:
+ case dwarf::DW_LNS_set_isa:
+ encodeULEB128(Op.Data, OS);
+ break;
+
+ case dwarf::DW_LNS_advance_line:
+ encodeSLEB128(Op.SData, OS);
+ break;
+
+ case dwarf::DW_LNS_fixed_advance_pc:
+ writeInteger((uint16_t)Op.Data, OS, IsLittleEndian);
+ break;
+
+ default:
+ for (auto OpData : Op.StandardOpcodeData) {
+ encodeULEB128(OpData, OS);
+ }
+ }
+ }
+}
+
+static std::vector<uint8_t>
+getStandardOpcodeLengths(uint16_t Version, Optional<uint8_t> OpcodeBase) {
+ // If the opcode_base field isn't specified, we returns the
+ // standard_opcode_lengths array according to the version by default.
+ std::vector<uint8_t> StandardOpcodeLengths{0, 1, 1, 1, 1, 0,
+ 0, 0, 1, 0, 0, 1};
+ if (Version == 2) {
+ // DWARF v2 uses the same first 9 standard opcodes as v3-5.
+ StandardOpcodeLengths.resize(9);
+ } else if (OpcodeBase) {
+ StandardOpcodeLengths.resize(*OpcodeBase > 0 ? *OpcodeBase - 1 : 0, 0);
+ }
+ return StandardOpcodeLengths;
+}
+
Error DWARFYAML::emitDebugLine(raw_ostream &OS, const DWARFYAML::Data &DI) {
- for (const auto &LineTable : DI.DebugLines) {
- writeInitialLength(LineTable.Format, LineTable.Length, OS,
- DI.IsLittleEndian);
- uint64_t SizeOfPrologueLength = LineTable.Format == dwarf::DWARF64 ? 8 : 4;
- writeInteger((uint16_t)LineTable.Version, OS, DI.IsLittleEndian);
- cantFail(writeVariableSizedInteger(
- LineTable.PrologueLength, SizeOfPrologueLength, OS, DI.IsLittleEndian));
- writeInteger((uint8_t)LineTable.MinInstLength, OS, DI.IsLittleEndian);
+ for (const DWARFYAML::LineTable &LineTable : DI.DebugLines) {
+ // Buffer holds the bytes following the header_length (or prologue_length in
+ // DWARFv2) field to the end of the line number program itself.
+ std::string Buffer;
+ raw_string_ostream BufferOS(Buffer);
+
+ writeInteger(LineTable.MinInstLength, BufferOS, DI.IsLittleEndian);
+ // TODO: Add support for emitting DWARFv5 line table.
if (LineTable.Version >= 4)
- writeInteger((uint8_t)LineTable.MaxOpsPerInst, OS, DI.IsLittleEndian);
- writeInteger((uint8_t)LineTable.DefaultIsStmt, OS, DI.IsLittleEndian);
- writeInteger((uint8_t)LineTable.LineBase, OS, DI.IsLittleEndian);
- writeInteger((uint8_t)LineTable.LineRange, OS, DI.IsLittleEndian);
- writeInteger((uint8_t)LineTable.OpcodeBase, OS, DI.IsLittleEndian);
-
- for (auto OpcodeLength : LineTable.StandardOpcodeLengths)
- writeInteger((uint8_t)OpcodeLength, OS, DI.IsLittleEndian);
-
- for (auto IncludeDir : LineTable.IncludeDirs) {
- OS.write(IncludeDir.data(), IncludeDir.size());
- OS.write('\0');
+ writeInteger(LineTable.MaxOpsPerInst, BufferOS, DI.IsLittleEndian);
+ writeInteger(LineTable.DefaultIsStmt, BufferOS, DI.IsLittleEndian);
+ writeInteger(LineTable.LineBase, BufferOS, DI.IsLittleEndian);
+ writeInteger(LineTable.LineRange, BufferOS, DI.IsLittleEndian);
+
+ std::vector<uint8_t> StandardOpcodeLengths =
+ LineTable.StandardOpcodeLengths.getValueOr(
+ getStandardOpcodeLengths(LineTable.Version, LineTable.OpcodeBase));
+ uint8_t OpcodeBase = LineTable.OpcodeBase
+ ? *LineTable.OpcodeBase
+ : StandardOpcodeLengths.size() + 1;
+ writeInteger(OpcodeBase, BufferOS, DI.IsLittleEndian);
+ for (uint8_t OpcodeLength : StandardOpcodeLengths)
+ writeInteger(OpcodeLength, BufferOS, DI.IsLittleEndian);
+
+ for (StringRef IncludeDir : LineTable.IncludeDirs) {
+ BufferOS.write(IncludeDir.data(), IncludeDir.size());
+ BufferOS.write('\0');
}
- OS.write('\0');
+ BufferOS.write('\0');
- for (auto File : LineTable.Files)
- emitFileEntry(OS, File);
- OS.write('\0');
+ for (const DWARFYAML::File &File : LineTable.Files)
+ emitFileEntry(BufferOS, File);
+ BufferOS.write('\0');
- for (auto Op : LineTable.Opcodes) {
- writeInteger((uint8_t)Op.Opcode, OS, DI.IsLittleEndian);
- if (Op.Opcode == 0) {
- encodeULEB128(Op.ExtLen, OS);
- writeInteger((uint8_t)Op.SubOpcode, OS, DI.IsLittleEndian);
- switch (Op.SubOpcode) {
- case dwarf::DW_LNE_set_address:
- case dwarf::DW_LNE_set_discriminator:
- // TODO: Test this error.
- if (Error Err = writeVariableSizedInteger(
- Op.Data, DI.CompileUnits[0].AddrSize, OS, DI.IsLittleEndian))
- return Err;
- break;
- case dwarf::DW_LNE_define_file:
- emitFileEntry(OS, Op.FileEntry);
- break;
- case dwarf::DW_LNE_end_sequence:
- break;
- default:
- for (auto OpByte : Op.UnknownOpcodeData)
- writeInteger((uint8_t)OpByte, OS, DI.IsLittleEndian);
- }
- } else if (Op.Opcode < LineTable.OpcodeBase) {
- switch (Op.Opcode) {
- case dwarf::DW_LNS_copy:
- case dwarf::DW_LNS_negate_stmt:
- case dwarf::DW_LNS_set_basic_block:
- case dwarf::DW_LNS_const_add_pc:
- case dwarf::DW_LNS_set_prologue_end:
- case dwarf::DW_LNS_set_epilogue_begin:
- break;
-
- case dwarf::DW_LNS_advance_pc:
- case dwarf::DW_LNS_set_file:
- case dwarf::DW_LNS_set_column:
- case dwarf::DW_LNS_set_isa:
- encodeULEB128(Op.Data, OS);
- break;
-
- case dwarf::DW_LNS_advance_line:
- encodeSLEB128(Op.SData, OS);
- break;
-
- case dwarf::DW_LNS_fixed_advance_pc:
- writeInteger((uint16_t)Op.Data, OS, DI.IsLittleEndian);
- break;
-
- default:
- for (auto OpData : Op.StandardOpcodeData) {
- encodeULEB128(OpData, OS);
- }
- }
- }
+ uint64_t HeaderLength =
+ LineTable.PrologueLength ? *LineTable.PrologueLength : Buffer.size();
+
+ for (const DWARFYAML::LineTableOpcode &Op : LineTable.Opcodes)
+ writeLineTableOpcode(Op, OpcodeBase, DI.Is64BitAddrSize ? 8 : 4, BufferOS,
+ DI.IsLittleEndian);
+
+ uint64_t Length;
+ if (LineTable.Length) {
+ Length = *LineTable.Length;
+ } else {
+ Length = 2; // sizeof(version)
+ Length +=
+ (LineTable.Format == dwarf::DWARF64 ? 8 : 4); // sizeof(header_length)
+ Length += Buffer.size();
}
+
+ writeInitialLength(LineTable.Format, Length, OS, DI.IsLittleEndian);
+ writeInteger(LineTable.Version, OS, DI.IsLittleEndian);
+ writeDWARFOffset(HeaderLength, LineTable.Format, OS, DI.IsLittleEndian);
+ OS.write(Buffer.data(), Buffer.size());
}
return Error::success();
}
Error DWARFYAML::emitDebugAddr(raw_ostream &OS, const Data &DI) {
- for (const AddrTableEntry &TableEntry : DI.DebugAddr) {
+ for (const AddrTableEntry &TableEntry : *DI.DebugAddr) {
uint8_t AddrSize;
if (TableEntry.AddrSize)
AddrSize = *TableEntry.AddrSize;
@@ -397,7 +649,7 @@ Error DWARFYAML::emitDebugAddr(raw_ostream &OS, const Data &DI) {
writeInteger((uint8_t)TableEntry.SegSelectorSize, OS, DI.IsLittleEndian);
for (const SegAddrPair &Pair : TableEntry.SegAddrPairs) {
- if (TableEntry.SegSelectorSize != 0)
+ if (TableEntry.SegSelectorSize != yaml::Hex8{0})
if (Error Err = writeVariableSizedInteger(Pair.Segment,
TableEntry.SegSelectorSize,
OS, DI.IsLittleEndian))
@@ -416,68 +668,389 @@ Error DWARFYAML::emitDebugAddr(raw_ostream &OS, const Data &DI) {
return Error::success();
}
-using EmitFuncType = Error (*)(raw_ostream &, const DWARFYAML::Data &);
+Error DWARFYAML::emitDebugStrOffsets(raw_ostream &OS, const Data &DI) {
+ assert(DI.DebugStrOffsets && "unexpected emitDebugStrOffsets() call");
+ for (const DWARFYAML::StringOffsetsTable &Table : *DI.DebugStrOffsets) {
+ uint64_t Length;
+ if (Table.Length)
+ Length = *Table.Length;
+ else
+ // sizeof(version) + sizeof(padding) = 4
+ Length =
+ 4 + Table.Offsets.size() * (Table.Format == dwarf::DWARF64 ? 8 : 4);
-static Error
-emitDebugSectionImpl(const DWARFYAML::Data &DI, EmitFuncType EmitFunc,
- StringRef Sec,
- StringMap<std::unique_ptr<MemoryBuffer>> &OutputBuffers) {
- std::string Data;
- raw_string_ostream DebugInfoStream(Data);
- if (Error Err = EmitFunc(DebugInfoStream, DI))
- return Err;
- DebugInfoStream.flush();
- if (!Data.empty())
- OutputBuffers[Sec] = MemoryBuffer::getMemBufferCopy(Data);
+ writeInitialLength(Table.Format, Length, OS, DI.IsLittleEndian);
+ writeInteger((uint16_t)Table.Version, OS, DI.IsLittleEndian);
+ writeInteger((uint16_t)Table.Padding, OS, DI.IsLittleEndian);
+
+ for (uint64_t Offset : Table.Offsets)
+ writeDWARFOffset(Offset, Table.Format, OS, DI.IsLittleEndian);
+ }
+
+ return Error::success();
+}
+
+static Error checkOperandCount(StringRef EncodingString,
+ ArrayRef<yaml::Hex64> Values,
+ uint64_t ExpectedOperands) {
+ if (Values.size() != ExpectedOperands)
+ return createStringError(
+ errc::invalid_argument,
+ "invalid number (%zu) of operands for the operator: %s, %" PRIu64
+ " expected",
+ Values.size(), EncodingString.str().c_str(), ExpectedOperands);
return Error::success();
}
-namespace {
-class DIEFixupVisitor : public DWARFYAML::Visitor {
- uint64_t Length;
+static Error writeListEntryAddress(StringRef EncodingName, raw_ostream &OS,
+ uint64_t Addr, uint8_t AddrSize,
+ bool IsLittleEndian) {
+ if (Error Err = writeVariableSizedInteger(Addr, AddrSize, OS, IsLittleEndian))
+ return createStringError(errc::invalid_argument,
+ "unable to write address for the operator %s: %s",
+ EncodingName.str().c_str(),
+ toString(std::move(Err)).c_str());
-public:
- DIEFixupVisitor(DWARFYAML::Data &DI) : DWARFYAML::Visitor(DI){};
+ return Error::success();
+}
+
+static Expected<uint64_t>
+writeDWARFExpression(raw_ostream &OS,
+ const DWARFYAML::DWARFOperation &Operation,
+ uint8_t AddrSize, bool IsLittleEndian) {
+ auto CheckOperands = [&](uint64_t ExpectedOperands) -> Error {
+ return checkOperandCount(dwarf::OperationEncodingString(Operation.Operator),
+ Operation.Values, ExpectedOperands);
+ };
-protected:
- void onStartCompileUnit(DWARFYAML::Unit &CU) override {
- // Size of the unit header, excluding the length field itself.
- Length = CU.Version >= 5 ? 8 : 7;
+ uint64_t ExpressionBegin = OS.tell();
+ writeInteger((uint8_t)Operation.Operator, OS, IsLittleEndian);
+ switch (Operation.Operator) {
+ case dwarf::DW_OP_consts:
+ if (Error Err = CheckOperands(1))
+ return std::move(Err);
+ encodeSLEB128(Operation.Values[0], OS);
+ break;
+ case dwarf::DW_OP_stack_value:
+ if (Error Err = CheckOperands(0))
+ return std::move(Err);
+ break;
+ default:
+ StringRef EncodingStr = dwarf::OperationEncodingString(Operation.Operator);
+ return createStringError(errc::not_supported,
+ "DWARF expression: " +
+ (EncodingStr.empty()
+ ? "0x" + utohexstr(Operation.Operator)
+ : EncodingStr) +
+ " is not supported");
}
+ return OS.tell() - ExpressionBegin;
+}
+
+static Expected<uint64_t> writeListEntry(raw_ostream &OS,
+ const DWARFYAML::RnglistEntry &Entry,
+ uint8_t AddrSize,
+ bool IsLittleEndian) {
+ uint64_t BeginOffset = OS.tell();
+ writeInteger((uint8_t)Entry.Operator, OS, IsLittleEndian);
+
+ StringRef EncodingName = dwarf::RangeListEncodingString(Entry.Operator);
+
+ auto CheckOperands = [&](uint64_t ExpectedOperands) -> Error {
+ return checkOperandCount(EncodingName, Entry.Values, ExpectedOperands);
+ };
- void onEndCompileUnit(DWARFYAML::Unit &CU) override { CU.Length = Length; }
+ auto WriteAddress = [&](uint64_t Addr) -> Error {
+ return writeListEntryAddress(EncodingName, OS, Addr, AddrSize,
+ IsLittleEndian);
+ };
- void onStartDIE(DWARFYAML::Unit &CU, DWARFYAML::Entry &DIE) override {
- Length += getULEB128Size(DIE.AbbrCode);
+ switch (Entry.Operator) {
+ case dwarf::DW_RLE_end_of_list:
+ if (Error Err = CheckOperands(0))
+ return std::move(Err);
+ break;
+ case dwarf::DW_RLE_base_addressx:
+ if (Error Err = CheckOperands(1))
+ return std::move(Err);
+ encodeULEB128(Entry.Values[0], OS);
+ break;
+ case dwarf::DW_RLE_startx_endx:
+ case dwarf::DW_RLE_startx_length:
+ case dwarf::DW_RLE_offset_pair:
+ if (Error Err = CheckOperands(2))
+ return std::move(Err);
+ encodeULEB128(Entry.Values[0], OS);
+ encodeULEB128(Entry.Values[1], OS);
+ break;
+ case dwarf::DW_RLE_base_address:
+ if (Error Err = CheckOperands(1))
+ return std::move(Err);
+ if (Error Err = WriteAddress(Entry.Values[0]))
+ return std::move(Err);
+ break;
+ case dwarf::DW_RLE_start_end:
+ if (Error Err = CheckOperands(2))
+ return std::move(Err);
+ if (Error Err = WriteAddress(Entry.Values[0]))
+ return std::move(Err);
+ cantFail(WriteAddress(Entry.Values[1]));
+ break;
+ case dwarf::DW_RLE_start_length:
+ if (Error Err = CheckOperands(2))
+ return std::move(Err);
+ if (Error Err = WriteAddress(Entry.Values[0]))
+ return std::move(Err);
+ encodeULEB128(Entry.Values[1], OS);
+ break;
}
- void onValue(const uint8_t U) override { Length += 1; }
- void onValue(const uint16_t U) override { Length += 2; }
- void onValue(const uint32_t U) override { Length += 4; }
- void onValue(const uint64_t U, const bool LEB = false) override {
- if (LEB)
- Length += getULEB128Size(U);
+ return OS.tell() - BeginOffset;
+}
+
+static Expected<uint64_t> writeListEntry(raw_ostream &OS,
+ const DWARFYAML::LoclistEntry &Entry,
+ uint8_t AddrSize,
+ bool IsLittleEndian) {
+ uint64_t BeginOffset = OS.tell();
+ writeInteger((uint8_t)Entry.Operator, OS, IsLittleEndian);
+
+ StringRef EncodingName = dwarf::LocListEncodingString(Entry.Operator);
+
+ auto CheckOperands = [&](uint64_t ExpectedOperands) -> Error {
+ return checkOperandCount(EncodingName, Entry.Values, ExpectedOperands);
+ };
+
+ auto WriteAddress = [&](uint64_t Addr) -> Error {
+ return writeListEntryAddress(EncodingName, OS, Addr, AddrSize,
+ IsLittleEndian);
+ };
+
+ auto WriteDWARFOperations = [&]() -> Error {
+ std::string OpBuffer;
+ raw_string_ostream OpBufferOS(OpBuffer);
+ uint64_t DescriptionsLength = 0;
+
+ for (const DWARFYAML::DWARFOperation &Op : Entry.Descriptions) {
+ if (Expected<uint64_t> OpSize =
+ writeDWARFExpression(OpBufferOS, Op, AddrSize, IsLittleEndian))
+ DescriptionsLength += *OpSize;
+ else
+ return OpSize.takeError();
+ }
+
+ if (Entry.DescriptionsLength)
+ DescriptionsLength = *Entry.DescriptionsLength;
else
- Length += 8;
+ DescriptionsLength = OpBuffer.size();
+
+ encodeULEB128(DescriptionsLength, OS);
+ OS.write(OpBuffer.data(), OpBuffer.size());
+
+ return Error::success();
+ };
+
+ switch (Entry.Operator) {
+ case dwarf::DW_LLE_end_of_list:
+ if (Error Err = CheckOperands(0))
+ return std::move(Err);
+ break;
+ case dwarf::DW_LLE_base_addressx:
+ if (Error Err = CheckOperands(1))
+ return std::move(Err);
+ encodeULEB128(Entry.Values[0], OS);
+ break;
+ case dwarf::DW_LLE_startx_endx:
+ case dwarf::DW_LLE_startx_length:
+ case dwarf::DW_LLE_offset_pair:
+ if (Error Err = CheckOperands(2))
+ return std::move(Err);
+ encodeULEB128(Entry.Values[0], OS);
+ encodeULEB128(Entry.Values[1], OS);
+ if (Error Err = WriteDWARFOperations())
+ return std::move(Err);
+ break;
+ case dwarf::DW_LLE_default_location:
+ if (Error Err = CheckOperands(0))
+ return std::move(Err);
+ if (Error Err = WriteDWARFOperations())
+ return std::move(Err);
+ break;
+ case dwarf::DW_LLE_base_address:
+ if (Error Err = CheckOperands(1))
+ return std::move(Err);
+ if (Error Err = WriteAddress(Entry.Values[0]))
+ return std::move(Err);
+ break;
+ case dwarf::DW_LLE_start_end:
+ if (Error Err = CheckOperands(2))
+ return std::move(Err);
+ if (Error Err = WriteAddress(Entry.Values[0]))
+ return std::move(Err);
+ cantFail(WriteAddress(Entry.Values[1]));
+ if (Error Err = WriteDWARFOperations())
+ return std::move(Err);
+ break;
+ case dwarf::DW_LLE_start_length:
+ if (Error Err = CheckOperands(2))
+ return std::move(Err);
+ if (Error Err = WriteAddress(Entry.Values[0]))
+ return std::move(Err);
+ encodeULEB128(Entry.Values[1], OS);
+ if (Error Err = WriteDWARFOperations())
+ return std::move(Err);
+ break;
}
- void onValue(const int64_t S, const bool LEB = false) override {
- if (LEB)
- Length += getSLEB128Size(S);
+
+ return OS.tell() - BeginOffset;
+}
+
+template <typename EntryType>
+static Error writeDWARFLists(raw_ostream &OS,
+ ArrayRef<DWARFYAML::ListTable<EntryType>> Tables,
+ bool IsLittleEndian, bool Is64BitAddrSize) {
+ for (const DWARFYAML::ListTable<EntryType> &Table : Tables) {
+ // sizeof(version) + sizeof(address_size) + sizeof(segment_selector_size) +
+ // sizeof(offset_entry_count) = 8
+ uint64_t Length = 8;
+
+ uint8_t AddrSize;
+ if (Table.AddrSize)
+ AddrSize = *Table.AddrSize;
else
- Length += 8;
- }
- void onValue(const StringRef String) override { Length += String.size() + 1; }
+ AddrSize = Is64BitAddrSize ? 8 : 4;
+
+ // Since the length of the current range/location lists entry is
+ // undetermined yet, we firstly write the content of the range/location
+ // lists to a buffer to calculate the length and then serialize the buffer
+ // content to the actual output stream.
+ std::string ListBuffer;
+ raw_string_ostream ListBufferOS(ListBuffer);
+
+ // Offsets holds offsets for each range/location list. The i-th element is
+ // the offset from the beginning of the first range/location list to the
+ // location of the i-th range list.
+ std::vector<uint64_t> Offsets;
+
+ for (const DWARFYAML::ListEntries<EntryType> &List : Table.Lists) {
+ Offsets.push_back(ListBufferOS.tell());
+ if (List.Content) {
+ List.Content->writeAsBinary(ListBufferOS, UINT64_MAX);
+ Length += List.Content->binary_size();
+ } else if (List.Entries) {
+ for (const EntryType &Entry : *List.Entries) {
+ Expected<uint64_t> EntrySize =
+ writeListEntry(ListBufferOS, Entry, AddrSize, IsLittleEndian);
+ if (!EntrySize)
+ return EntrySize.takeError();
+ Length += *EntrySize;
+ }
+ }
+ }
- void onValue(const MemoryBufferRef MBR) override {
- Length += MBR.getBufferSize();
+ // If the offset_entry_count field isn't specified, yaml2obj will infer it
+ // from the 'Offsets' field in the YAML description. If the 'Offsets' field
+ // isn't specified either, yaml2obj will infer it from the auto-generated
+ // offsets.
+ uint32_t OffsetEntryCount;
+ if (Table.OffsetEntryCount)
+ OffsetEntryCount = *Table.OffsetEntryCount;
+ else
+ OffsetEntryCount = Table.Offsets ? Table.Offsets->size() : Offsets.size();
+ uint64_t OffsetsSize =
+ OffsetEntryCount * (Table.Format == dwarf::DWARF64 ? 8 : 4);
+ Length += OffsetsSize;
+
+ // If the length is specified in the YAML description, we use it instead of
+ // the actual length.
+ if (Table.Length)
+ Length = *Table.Length;
+
+ writeInitialLength(Table.Format, Length, OS, IsLittleEndian);
+ writeInteger((uint16_t)Table.Version, OS, IsLittleEndian);
+ writeInteger((uint8_t)AddrSize, OS, IsLittleEndian);
+ writeInteger((uint8_t)Table.SegSelectorSize, OS, IsLittleEndian);
+ writeInteger((uint32_t)OffsetEntryCount, OS, IsLittleEndian);
+
+ auto EmitOffsets = [&](ArrayRef<uint64_t> Offsets, uint64_t OffsetsSize) {
+ for (uint64_t Offset : Offsets)
+ writeDWARFOffset(OffsetsSize + Offset, Table.Format, OS,
+ IsLittleEndian);
+ };
+
+ if (Table.Offsets)
+ EmitOffsets(ArrayRef<uint64_t>((const uint64_t *)Table.Offsets->data(),
+ Table.Offsets->size()),
+ 0);
+ else if (OffsetEntryCount != 0)
+ EmitOffsets(Offsets, OffsetsSize);
+
+ OS.write(ListBuffer.data(), ListBuffer.size());
}
-};
-} // namespace
+
+ return Error::success();
+}
+
+Error DWARFYAML::emitDebugRnglists(raw_ostream &OS, const Data &DI) {
+ assert(DI.DebugRnglists && "unexpected emitDebugRnglists() call");
+ return writeDWARFLists<DWARFYAML::RnglistEntry>(
+ OS, *DI.DebugRnglists, DI.IsLittleEndian, DI.Is64BitAddrSize);
+}
+
+Error DWARFYAML::emitDebugLoclists(raw_ostream &OS, const Data &DI) {
+ assert(DI.DebugLoclists && "unexpected emitDebugRnglists() call");
+ return writeDWARFLists<DWARFYAML::LoclistEntry>(
+ OS, *DI.DebugLoclists, DI.IsLittleEndian, DI.Is64BitAddrSize);
+}
+
+std::function<Error(raw_ostream &, const DWARFYAML::Data &)>
+DWARFYAML::getDWARFEmitterByName(StringRef SecName) {
+ auto EmitFunc =
+ StringSwitch<
+ std::function<Error(raw_ostream &, const DWARFYAML::Data &)>>(SecName)
+ .Case("debug_abbrev", DWARFYAML::emitDebugAbbrev)
+ .Case("debug_addr", DWARFYAML::emitDebugAddr)
+ .Case("debug_aranges", DWARFYAML::emitDebugAranges)
+ .Case("debug_gnu_pubnames", DWARFYAML::emitDebugGNUPubnames)
+ .Case("debug_gnu_pubtypes", DWARFYAML::emitDebugGNUPubtypes)
+ .Case("debug_info", DWARFYAML::emitDebugInfo)
+ .Case("debug_line", DWARFYAML::emitDebugLine)
+ .Case("debug_loclists", DWARFYAML::emitDebugLoclists)
+ .Case("debug_pubnames", DWARFYAML::emitDebugPubnames)
+ .Case("debug_pubtypes", DWARFYAML::emitDebugPubtypes)
+ .Case("debug_ranges", DWARFYAML::emitDebugRanges)
+ .Case("debug_rnglists", DWARFYAML::emitDebugRnglists)
+ .Case("debug_str", DWARFYAML::emitDebugStr)
+ .Case("debug_str_offsets", DWARFYAML::emitDebugStrOffsets)
+ .Default([&](raw_ostream &, const DWARFYAML::Data &) {
+ return createStringError(errc::not_supported,
+ SecName + " is not supported");
+ });
+
+ return EmitFunc;
+}
+
+static Error
+emitDebugSectionImpl(const DWARFYAML::Data &DI, StringRef Sec,
+ StringMap<std::unique_ptr<MemoryBuffer>> &OutputBuffers) {
+ std::string Data;
+ raw_string_ostream DebugInfoStream(Data);
+
+ auto EmitFunc = DWARFYAML::getDWARFEmitterByName(Sec);
+
+ if (Error Err = EmitFunc(DebugInfoStream, DI))
+ return Err;
+ DebugInfoStream.flush();
+ if (!Data.empty())
+ OutputBuffers[Sec] = MemoryBuffer::getMemBufferCopy(Data);
+
+ return Error::success();
+}
Expected<StringMap<std::unique_ptr<MemoryBuffer>>>
-DWARFYAML::emitDebugSections(StringRef YAMLString, bool ApplyFixups,
- bool IsLittleEndian) {
+DWARFYAML::emitDebugSections(StringRef YAMLString, bool IsLittleEndian,
+ bool Is64BitAddrSize) {
auto CollectDiagnostic = [](const SMDiagnostic &Diag, void *DiagContext) {
*static_cast<SMDiagnostic *>(DiagContext) = Diag;
};
@@ -488,34 +1061,18 @@ DWARFYAML::emitDebugSections(StringRef YAMLString, bool ApplyFixups,
DWARFYAML::Data DI;
DI.IsLittleEndian = IsLittleEndian;
+ DI.Is64BitAddrSize = Is64BitAddrSize;
+
YIn >> DI;
if (YIn.error())
return createStringError(YIn.error(), GeneratedDiag.getMessage());
- if (ApplyFixups) {
- DIEFixupVisitor DIFixer(DI);
- if (Error Err = DIFixer.traverseDebugInfo())
- return std::move(Err);
- }
-
StringMap<std::unique_ptr<MemoryBuffer>> DebugSections;
- Error Err = emitDebugSectionImpl(DI, &DWARFYAML::emitDebugInfo, "debug_info",
- DebugSections);
- Err = joinErrors(std::move(Err),
- emitDebugSectionImpl(DI, &DWARFYAML::emitDebugLine,
- "debug_line", DebugSections));
- Err = joinErrors(std::move(Err),
- emitDebugSectionImpl(DI, &DWARFYAML::emitDebugStr,
- "debug_str", DebugSections));
- Err = joinErrors(std::move(Err),
- emitDebugSectionImpl(DI, &DWARFYAML::emitDebugAbbrev,
- "debug_abbrev", DebugSections));
- Err = joinErrors(std::move(Err),
- emitDebugSectionImpl(DI, &DWARFYAML::emitDebugAranges,
- "debug_aranges", DebugSections));
- Err = joinErrors(std::move(Err),
- emitDebugSectionImpl(DI, &DWARFYAML::emitDebugRanges,
- "debug_ranges", DebugSections));
+ Error Err = Error::success();
+
+ for (StringRef SecName : DI.getNonEmptySectionNames())
+ Err = joinErrors(std::move(Err),
+ emitDebugSectionImpl(DI, SecName, DebugSections));
if (Err)
return std::move(Err);
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFVisitor.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFVisitor.cpp
deleted file mode 100644
index a2dd37b5fe32..000000000000
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFVisitor.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//===--- DWARFVisitor.cpp ---------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-
-#include "DWARFVisitor.h"
-#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/ObjectYAML/DWARFYAML.h"
-#include "llvm/Support/Errc.h"
-#include "llvm/Support/Error.h"
-
-using namespace llvm;
-
-template <typename T>
-void DWARFYAML::VisitorImpl<T>::onVariableSizeValue(uint64_t U, unsigned Size) {
- switch (Size) {
- case 8:
- onValue((uint64_t)U);
- break;
- case 4:
- onValue((uint32_t)U);
- break;
- case 2:
- onValue((uint16_t)U);
- break;
- case 1:
- onValue((uint8_t)U);
- break;
- default:
- llvm_unreachable("Invalid integer write size.");
- }
-}
-
-static unsigned getOffsetSize(const DWARFYAML::Unit &Unit) {
- return Unit.Format == dwarf::DWARF64 ? 8 : 4;
-}
-
-static unsigned getRefSize(const DWARFYAML::Unit &Unit) {
- if (Unit.Version == 2)
- return Unit.AddrSize;
- return getOffsetSize(Unit);
-}
-
-template <typename T> Error DWARFYAML::VisitorImpl<T>::traverseDebugInfo() {
- for (auto &Unit : DebugInfo.CompileUnits) {
- onStartCompileUnit(Unit);
- if (Unit.Entries.empty())
- continue;
-
- for (auto &Entry : Unit.Entries) {
- onStartDIE(Unit, Entry);
- uint32_t AbbrCode = Entry.AbbrCode;
- if (AbbrCode == 0 || Entry.Values.empty())
- continue;
-
- if (AbbrCode > DebugInfo.AbbrevDecls.size())
- return createStringError(
- errc::invalid_argument,
- "abbrev code must be less than or equal to the number of "
- "entries in abbreviation table");
- const DWARFYAML::Abbrev &Abbrev = DebugInfo.AbbrevDecls[AbbrCode - 1];
- auto FormVal = Entry.Values.begin();
- auto AbbrForm = Abbrev.Attributes.begin();
- for (;
- FormVal != Entry.Values.end() && AbbrForm != Abbrev.Attributes.end();
- ++FormVal, ++AbbrForm) {
- onForm(*AbbrForm, *FormVal);
- dwarf::Form Form = AbbrForm->Form;
- bool Indirect;
- do {
- Indirect = false;
- switch (Form) {
- case dwarf::DW_FORM_addr:
- onVariableSizeValue(FormVal->Value, Unit.AddrSize);
- break;
- case dwarf::DW_FORM_ref_addr:
- onVariableSizeValue(FormVal->Value, getRefSize(Unit));
- break;
- case dwarf::DW_FORM_exprloc:
- case dwarf::DW_FORM_block:
- onValue((uint64_t)FormVal->BlockData.size(), true);
- onValue(
- MemoryBufferRef(StringRef((const char *)&FormVal->BlockData[0],
- FormVal->BlockData.size()),
- ""));
- break;
- case dwarf::DW_FORM_block1: {
- auto writeSize = FormVal->BlockData.size();
- onValue((uint8_t)writeSize);
- onValue(
- MemoryBufferRef(StringRef((const char *)&FormVal->BlockData[0],
- FormVal->BlockData.size()),
- ""));
- break;
- }
- case dwarf::DW_FORM_block2: {
- auto writeSize = FormVal->BlockData.size();
- onValue((uint16_t)writeSize);
- onValue(
- MemoryBufferRef(StringRef((const char *)&FormVal->BlockData[0],
- FormVal->BlockData.size()),
- ""));
- break;
- }
- case dwarf::DW_FORM_block4: {
- auto writeSize = FormVal->BlockData.size();
- onValue((uint32_t)writeSize);
- onValue(
- MemoryBufferRef(StringRef((const char *)&FormVal->BlockData[0],
- FormVal->BlockData.size()),
- ""));
- break;
- }
- case dwarf::DW_FORM_strx:
- case dwarf::DW_FORM_addrx:
- case dwarf::DW_FORM_rnglistx:
- case dwarf::DW_FORM_loclistx:
- onValue((uint64_t)FormVal->Value, /*LEB=*/true);
- break;
- case dwarf::DW_FORM_data1:
- case dwarf::DW_FORM_ref1:
- case dwarf::DW_FORM_flag:
- case dwarf::DW_FORM_strx1:
- case dwarf::DW_FORM_addrx1:
- onValue((uint8_t)FormVal->Value);
- break;
- case dwarf::DW_FORM_data2:
- case dwarf::DW_FORM_ref2:
- case dwarf::DW_FORM_strx2:
- case dwarf::DW_FORM_addrx2:
- onValue((uint16_t)FormVal->Value);
- break;
- case dwarf::DW_FORM_data4:
- case dwarf::DW_FORM_ref4:
- case dwarf::DW_FORM_ref_sup4:
- case dwarf::DW_FORM_strx4:
- case dwarf::DW_FORM_addrx4:
- onValue((uint32_t)FormVal->Value);
- break;
- case dwarf::DW_FORM_data8:
- case dwarf::DW_FORM_ref8:
- case dwarf::DW_FORM_ref_sup8:
- onValue((uint64_t)FormVal->Value);
- break;
- case dwarf::DW_FORM_sdata:
- onValue((int64_t)FormVal->Value, true);
- break;
- case dwarf::DW_FORM_udata:
- case dwarf::DW_FORM_ref_udata:
- onValue((uint64_t)FormVal->Value, true);
- break;
- case dwarf::DW_FORM_string:
- onValue(FormVal->CStr);
- break;
- case dwarf::DW_FORM_indirect:
- onValue((uint64_t)FormVal->Value, true);
- Indirect = true;
- Form = static_cast<dwarf::Form>((uint64_t)FormVal->Value);
- ++FormVal;
- break;
- case dwarf::DW_FORM_strp:
- case dwarf::DW_FORM_sec_offset:
- case dwarf::DW_FORM_GNU_ref_alt:
- case dwarf::DW_FORM_GNU_strp_alt:
- case dwarf::DW_FORM_line_strp:
- case dwarf::DW_FORM_strp_sup:
- onVariableSizeValue(FormVal->Value, getOffsetSize(Unit));
- break;
- case dwarf::DW_FORM_ref_sig8:
- onValue((uint64_t)FormVal->Value);
- break;
- case dwarf::DW_FORM_GNU_addr_index:
- case dwarf::DW_FORM_GNU_str_index:
- onValue((uint64_t)FormVal->Value, true);
- break;
- default:
- break;
- }
- } while (Indirect);
- }
- onEndDIE(Unit, Entry);
- }
- onEndCompileUnit(Unit);
- }
-
- return Error::success();
-}
-
-// Explicitly instantiate the two template expansions.
-template class DWARFYAML::VisitorImpl<DWARFYAML::Data>;
-template class DWARFYAML::VisitorImpl<const DWARFYAML::Data>;
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFVisitor.h b/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFVisitor.h
deleted file mode 100644
index 3b2c4303c7f7..000000000000
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFVisitor.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//===--- DWARFVisitor.h -----------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_OBJECTYAML_DWARFVISITOR_H
-#define LLVM_OBJECTYAML_DWARFVISITOR_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-namespace llvm {
-class Error;
-
-namespace DWARFYAML {
-
-struct Data;
-struct Unit;
-struct Entry;
-struct FormValue;
-struct AttributeAbbrev;
-
-/// A class to visits DWARFYAML Compile Units and DIEs in preorder.
-///
-/// Extensions of this class can either maintain const or non-const references
-/// to the DWARFYAML::Data object.
-template <typename T> class VisitorImpl {
-protected:
- T &DebugInfo;
-
- /// Visitor Functions
- /// @{
- virtual void onStartCompileUnit(Unit &CU) {}
- virtual void onEndCompileUnit(Unit &CU) {}
- virtual void onStartDIE(Unit &CU, Entry &DIE) {}
- virtual void onEndDIE(Unit &CU, Entry &DIE) {}
- virtual void onForm(AttributeAbbrev &AttAbbrev, FormValue &Value) {}
- /// @}
-
- /// Const Visitor Functions
- /// @{
- virtual void onStartCompileUnit(const Unit &CU) {}
- virtual void onEndCompileUnit(const Unit &CU) {}
- virtual void onStartDIE(const Unit &CU, const Entry &DIE) {}
- virtual void onEndDIE(const Unit &CU, const Entry &DIE) {}
- virtual void onForm(const AttributeAbbrev &AttAbbrev,
- const FormValue &Value) {}
- /// @}
-
- /// Value visitors
- /// @{
- virtual void onValue(const uint8_t U) {}
- virtual void onValue(const uint16_t U) {}
- virtual void onValue(const uint32_t U) {}
- virtual void onValue(const uint64_t U, const bool LEB = false) {}
- virtual void onValue(const int64_t S, const bool LEB = false) {}
- virtual void onValue(const StringRef String) {}
- virtual void onValue(const MemoryBufferRef MBR) {}
- /// @}
-
-public:
- VisitorImpl(T &DI) : DebugInfo(DI) {}
-
- virtual ~VisitorImpl() {}
-
- Error traverseDebugInfo();
-
-private:
- void onVariableSizeValue(uint64_t U, unsigned Size);
-};
-
-// Making the visior instantiations extern and explicit in the cpp file. This
-// prevents them from being instantiated in every compile unit that uses the
-// visitors.
-extern template class VisitorImpl<DWARFYAML::Data>;
-extern template class VisitorImpl<const DWARFYAML::Data>;
-
-class Visitor : public VisitorImpl<Data> {
-public:
- Visitor(Data &DI) : VisitorImpl<Data>(DI) {}
-};
-
-class ConstVisitor : public VisitorImpl<const Data> {
-public:
- ConstVisitor(const Data &DI) : VisitorImpl<const Data>(DI) {}
-};
-
-} // namespace DWARFYAML
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFYAML.cpp
index bedf31dc8179..2591bf4d5af4 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFYAML.cpp
@@ -13,28 +13,28 @@
#include "llvm/ObjectYAML/DWARFYAML.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
namespace llvm {
bool DWARFYAML::Data::isEmpty() const {
- return DebugStrings.empty() && AbbrevDecls.empty() && ARanges.empty() &&
- DebugRanges.empty() && !PubNames && !PubTypes && !GNUPubNames &&
- !GNUPubTypes && CompileUnits.empty() && DebugLines.empty();
+ return getNonEmptySectionNames().empty();
}
-SetVector<StringRef> DWARFYAML::Data::getUsedSectionNames() const {
+SetVector<StringRef> DWARFYAML::Data::getNonEmptySectionNames() const {
SetVector<StringRef> SecNames;
- if (!DebugStrings.empty())
+ if (DebugStrings)
SecNames.insert("debug_str");
- if (!ARanges.empty())
+ if (DebugAranges)
SecNames.insert("debug_aranges");
- if (!DebugRanges.empty())
+ if (DebugRanges)
SecNames.insert("debug_ranges");
if (!DebugLines.empty())
SecNames.insert("debug_line");
- if (!DebugAddr.empty())
+ if (DebugAddr)
SecNames.insert("debug_addr");
- if (!AbbrevDecls.empty())
+ if (!DebugAbbrev.empty())
SecNames.insert("debug_abbrev");
if (!CompileUnits.empty())
SecNames.insert("debug_info");
@@ -46,9 +46,46 @@ SetVector<StringRef> DWARFYAML::Data::getUsedSectionNames() const {
SecNames.insert("debug_gnu_pubnames");
if (GNUPubTypes)
SecNames.insert("debug_gnu_pubtypes");
+ if (DebugStrOffsets)
+ SecNames.insert("debug_str_offsets");
+ if (DebugRnglists)
+ SecNames.insert("debug_rnglists");
+ if (DebugLoclists)
+ SecNames.insert("debug_loclists");
return SecNames;
}
+Expected<DWARFYAML::Data::AbbrevTableInfo>
+DWARFYAML::Data::getAbbrevTableInfoByID(uint64_t ID) const {
+ if (AbbrevTableInfoMap.empty()) {
+ uint64_t AbbrevTableOffset = 0;
+ for (auto &AbbrevTable : enumerate(DebugAbbrev)) {
+ // If the abbrev table's ID isn't specified, we use the index as its ID.
+ uint64_t AbbrevTableID =
+ AbbrevTable.value().ID.getValueOr(AbbrevTable.index());
+ auto It = AbbrevTableInfoMap.insert(
+ {AbbrevTableID, AbbrevTableInfo{/*Index=*/AbbrevTable.index(),
+ /*Offset=*/AbbrevTableOffset}});
+ if (!It.second)
+ return createStringError(
+ errc::invalid_argument,
+ "the ID (%" PRIu64 ") of abbrev table with index %zu has been used "
+ "by abbrev table with index %" PRIu64,
+ AbbrevTableID, AbbrevTable.index(), It.first->second.Index);
+
+ AbbrevTableOffset +=
+ getAbbrevTableContentByIndex(AbbrevTable.index()).size();
+ }
+ }
+
+ auto It = AbbrevTableInfoMap.find(ID);
+ if (It == AbbrevTableInfoMap.end())
+ return createStringError(errc::invalid_argument,
+ "cannot find abbrev table whose ID is %" PRIu64,
+ ID);
+ return It->second;
+}
+
namespace yaml {
void MappingTraits<DWARFYAML::Data>::mapping(IO &IO, DWARFYAML::Data &DWARF) {
@@ -56,11 +93,9 @@ void MappingTraits<DWARFYAML::Data>::mapping(IO &IO, DWARFYAML::Data &DWARF) {
DWARFYAML::DWARFContext DWARFCtx;
IO.setContext(&DWARFCtx);
IO.mapOptional("debug_str", DWARF.DebugStrings);
- IO.mapOptional("debug_abbrev", DWARF.AbbrevDecls);
- if (!DWARF.ARanges.empty() || !IO.outputting())
- IO.mapOptional("debug_aranges", DWARF.ARanges);
- if (!DWARF.DebugRanges.empty() || !IO.outputting())
- IO.mapOptional("debug_ranges", DWARF.DebugRanges);
+ IO.mapOptional("debug_abbrev", DWARF.DebugAbbrev);
+ IO.mapOptional("debug_aranges", DWARF.DebugAranges);
+ IO.mapOptional("debug_ranges", DWARF.DebugRanges);
IO.mapOptional("debug_pubnames", DWARF.PubNames);
IO.mapOptional("debug_pubtypes", DWARF.PubTypes);
DWARFCtx.IsGNUPubSec = true;
@@ -69,15 +104,24 @@ void MappingTraits<DWARFYAML::Data>::mapping(IO &IO, DWARFYAML::Data &DWARF) {
IO.mapOptional("debug_info", DWARF.CompileUnits);
IO.mapOptional("debug_line", DWARF.DebugLines);
IO.mapOptional("debug_addr", DWARF.DebugAddr);
+ IO.mapOptional("debug_str_offsets", DWARF.DebugStrOffsets);
+ IO.mapOptional("debug_rnglists", DWARF.DebugRnglists);
+ IO.mapOptional("debug_loclists", DWARF.DebugLoclists);
IO.setContext(OldContext);
}
+void MappingTraits<DWARFYAML::AbbrevTable>::mapping(
+ IO &IO, DWARFYAML::AbbrevTable &AbbrevTable) {
+ IO.mapOptional("ID", AbbrevTable.ID);
+ IO.mapOptional("Table", AbbrevTable.Table);
+}
+
void MappingTraits<DWARFYAML::Abbrev>::mapping(IO &IO,
DWARFYAML::Abbrev &Abbrev) {
IO.mapOptional("Code", Abbrev.Code);
IO.mapRequired("Tag", Abbrev.Tag);
IO.mapRequired("Children", Abbrev.Children);
- IO.mapRequired("Attributes", Abbrev.Attributes);
+ IO.mapOptional("Attributes", Abbrev.Attributes);
}
void MappingTraits<DWARFYAML::AttributeAbbrev>::mapping(
@@ -97,12 +141,12 @@ void MappingTraits<DWARFYAML::ARangeDescriptor>::mapping(
void MappingTraits<DWARFYAML::ARange>::mapping(IO &IO,
DWARFYAML::ARange &ARange) {
IO.mapOptional("Format", ARange.Format, dwarf::DWARF32);
- IO.mapRequired("Length", ARange.Length);
+ IO.mapOptional("Length", ARange.Length);
IO.mapRequired("Version", ARange.Version);
IO.mapRequired("CuOffset", ARange.CuOffset);
- IO.mapRequired("AddrSize", ARange.AddrSize);
- IO.mapRequired("SegSize", ARange.SegSize);
- IO.mapRequired("Descriptors", ARange.Descriptors);
+ IO.mapOptional("AddressSize", ARange.AddrSize);
+ IO.mapOptional("SegmentSelectorSize", ARange.SegSize, 0);
+ IO.mapOptional("Descriptors", ARange.Descriptors);
}
void MappingTraits<DWARFYAML::RangeEntry>::mapping(
@@ -128,6 +172,7 @@ void MappingTraits<DWARFYAML::PubEntry>::mapping(IO &IO,
void MappingTraits<DWARFYAML::PubSection>::mapping(
IO &IO, DWARFYAML::PubSection &Section) {
+ IO.mapOptional("Format", Section.Format, dwarf::DWARF32);
IO.mapRequired("Length", Section.Length);
IO.mapRequired("Version", Section.Version);
IO.mapRequired("UnitOffset", Section.UnitOffset);
@@ -137,18 +182,19 @@ void MappingTraits<DWARFYAML::PubSection>::mapping(
void MappingTraits<DWARFYAML::Unit>::mapping(IO &IO, DWARFYAML::Unit &Unit) {
IO.mapOptional("Format", Unit.Format, dwarf::DWARF32);
- IO.mapRequired("Length", Unit.Length);
+ IO.mapOptional("Length", Unit.Length);
IO.mapRequired("Version", Unit.Version);
if (Unit.Version >= 5)
IO.mapRequired("UnitType", Unit.Type);
- IO.mapRequired("AbbrOffset", Unit.AbbrOffset);
- IO.mapRequired("AddrSize", Unit.AddrSize);
+ IO.mapOptional("AbbrevTableID", Unit.AbbrevTableID);
+ IO.mapOptional("AbbrOffset", Unit.AbbrOffset);
+ IO.mapOptional("AddrSize", Unit.AddrSize);
IO.mapOptional("Entries", Unit.Entries);
}
void MappingTraits<DWARFYAML::Entry>::mapping(IO &IO, DWARFYAML::Entry &Entry) {
IO.mapRequired("AbbrCode", Entry.AbbrCode);
- IO.mapRequired("Values", Entry.Values);
+ IO.mapOptional("Values", Entry.Values);
}
void MappingTraits<DWARFYAML::FormValue>::mapping(
@@ -171,7 +217,7 @@ void MappingTraits<DWARFYAML::LineTableOpcode>::mapping(
IO &IO, DWARFYAML::LineTableOpcode &LineTableOpcode) {
IO.mapRequired("Opcode", LineTableOpcode.Opcode);
if (LineTableOpcode.Opcode == dwarf::DW_LNS_extended_op) {
- IO.mapRequired("ExtLen", LineTableOpcode.ExtLen);
+ IO.mapOptional("ExtLen", LineTableOpcode.ExtLen);
IO.mapRequired("SubOpcode", LineTableOpcode.SubOpcode);
}
@@ -189,20 +235,20 @@ void MappingTraits<DWARFYAML::LineTableOpcode>::mapping(
void MappingTraits<DWARFYAML::LineTable>::mapping(
IO &IO, DWARFYAML::LineTable &LineTable) {
IO.mapOptional("Format", LineTable.Format, dwarf::DWARF32);
- IO.mapRequired("Length", LineTable.Length);
+ IO.mapOptional("Length", LineTable.Length);
IO.mapRequired("Version", LineTable.Version);
- IO.mapRequired("PrologueLength", LineTable.PrologueLength);
+ IO.mapOptional("PrologueLength", LineTable.PrologueLength);
IO.mapRequired("MinInstLength", LineTable.MinInstLength);
if(LineTable.Version >= 4)
IO.mapRequired("MaxOpsPerInst", LineTable.MaxOpsPerInst);
IO.mapRequired("DefaultIsStmt", LineTable.DefaultIsStmt);
IO.mapRequired("LineBase", LineTable.LineBase);
IO.mapRequired("LineRange", LineTable.LineRange);
- IO.mapRequired("OpcodeBase", LineTable.OpcodeBase);
- IO.mapRequired("StandardOpcodeLengths", LineTable.StandardOpcodeLengths);
- IO.mapRequired("IncludeDirs", LineTable.IncludeDirs);
- IO.mapRequired("Files", LineTable.Files);
- IO.mapRequired("Opcodes", LineTable.Opcodes);
+ IO.mapOptional("OpcodeBase", LineTable.OpcodeBase);
+ IO.mapOptional("StandardOpcodeLengths", LineTable.StandardOpcodeLengths);
+ IO.mapOptional("IncludeDirs", LineTable.IncludeDirs);
+ IO.mapOptional("Files", LineTable.Files);
+ IO.mapOptional("Opcodes", LineTable.Opcodes);
}
void MappingTraits<DWARFYAML::SegAddrPair>::mapping(
@@ -221,11 +267,61 @@ void MappingTraits<DWARFYAML::AddrTableEntry>::mapping(
IO.mapOptional("Entries", AddrTable.SegAddrPairs);
}
-void MappingTraits<DWARFYAML::InitialLength>::mapping(
- IO &IO, DWARFYAML::InitialLength &InitialLength) {
- IO.mapRequired("TotalLength", InitialLength.TotalLength);
- if (InitialLength.isDWARF64())
- IO.mapRequired("TotalLength64", InitialLength.TotalLength64);
+void MappingTraits<DWARFYAML::StringOffsetsTable>::mapping(
+ IO &IO, DWARFYAML::StringOffsetsTable &StrOffsetsTable) {
+ IO.mapOptional("Format", StrOffsetsTable.Format, dwarf::DWARF32);
+ IO.mapOptional("Length", StrOffsetsTable.Length);
+ IO.mapOptional("Version", StrOffsetsTable.Version, 5);
+ IO.mapOptional("Padding", StrOffsetsTable.Padding, 0);
+ IO.mapOptional("Offsets", StrOffsetsTable.Offsets);
+}
+
+void MappingTraits<DWARFYAML::DWARFOperation>::mapping(
+ IO &IO, DWARFYAML::DWARFOperation &DWARFOperation) {
+ IO.mapRequired("Operator", DWARFOperation.Operator);
+ IO.mapOptional("Values", DWARFOperation.Values);
+}
+
+void MappingTraits<DWARFYAML::RnglistEntry>::mapping(
+ IO &IO, DWARFYAML::RnglistEntry &RnglistEntry) {
+ IO.mapRequired("Operator", RnglistEntry.Operator);
+ IO.mapOptional("Values", RnglistEntry.Values);
+}
+
+void MappingTraits<DWARFYAML::LoclistEntry>::mapping(
+ IO &IO, DWARFYAML::LoclistEntry &LoclistEntry) {
+ IO.mapRequired("Operator", LoclistEntry.Operator);
+ IO.mapOptional("Values", LoclistEntry.Values);
+ IO.mapOptional("DescriptionsLength", LoclistEntry.DescriptionsLength);
+ IO.mapOptional("Descriptions", LoclistEntry.Descriptions);
+}
+
+template <typename EntryType>
+void MappingTraits<DWARFYAML::ListEntries<EntryType>>::mapping(
+ IO &IO, DWARFYAML::ListEntries<EntryType> &ListEntries) {
+ IO.mapOptional("Entries", ListEntries.Entries);
+ IO.mapOptional("Content", ListEntries.Content);
+}
+
+template <typename EntryType>
+std::string MappingTraits<DWARFYAML::ListEntries<EntryType>>::validate(
+ IO &IO, DWARFYAML::ListEntries<EntryType> &ListEntries) {
+ if (ListEntries.Entries && ListEntries.Content)
+ return "Entries and Content can't be used together";
+ return "";
+}
+
+template <typename EntryType>
+void MappingTraits<DWARFYAML::ListTable<EntryType>>::mapping(
+ IO &IO, DWARFYAML::ListTable<EntryType> &ListTable) {
+ IO.mapOptional("Format", ListTable.Format, dwarf::DWARF32);
+ IO.mapOptional("Length", ListTable.Length);
+ IO.mapOptional("Version", ListTable.Version, 5);
+ IO.mapOptional("AddressSize", ListTable.AddrSize);
+ IO.mapOptional("SegmentSelectorSize", ListTable.SegSelectorSize, 0);
+ IO.mapOptional("OffsetEntryCount", ListTable.OffsetEntryCount);
+ IO.mapOptional("Offsets", ListTable.Offsets);
+ IO.mapOptional("Lists", ListTable.Lists);
}
} // end namespace yaml
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp
index f9f2f128e2e8..e477a1b2b8f2 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -124,6 +124,11 @@ public:
if (checkLimit(sizeof(T)))
support::endian::write<T>(OS, Val, E);
}
+
+ void updateDataAt(uint64_t Pos, void *Data, size_t Size) {
+ assert(Pos >= InitialOffset && Pos + Size <= getOffset());
+ memcpy(&Buf[Pos - InitialOffset], Data, Size);
+ }
};
// Used to keep track of section and symbol names, so that in the YAML file
@@ -168,15 +173,7 @@ struct Fragment {
/// TODO: This class still has a ways to go before it is truly a "single
/// point of truth".
template <class ELFT> class ELFState {
- typedef typename ELFT::Ehdr Elf_Ehdr;
- typedef typename ELFT::Phdr Elf_Phdr;
- typedef typename ELFT::Shdr Elf_Shdr;
- typedef typename ELFT::Sym Elf_Sym;
- typedef typename ELFT::Rel Elf_Rel;
- typedef typename ELFT::Rela Elf_Rela;
- typedef typename ELFT::Relr Elf_Relr;
- typedef typename ELFT::Dyn Elf_Dyn;
- typedef typename ELFT::uint uintX_t;
+ LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
enum class SymtabType { Static, Dynamic };
@@ -232,7 +229,7 @@ template <class ELFT> class ELFState {
ArrayRef<typename ELFT::Shdr> SHeaders);
void finalizeStrings();
- void writeELFHeader(raw_ostream &OS, uint64_t SHOff);
+ void writeELFHeader(raw_ostream &OS);
void writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::NoBitsSection &Section,
ContiguousBlobAccumulator &CBA);
@@ -245,7 +242,8 @@ template <class ELFT> class ELFState {
void writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::RelrSection &Section,
ContiguousBlobAccumulator &CBA);
- void writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::Group &Group,
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::GroupSection &Group,
ContiguousBlobAccumulator &CBA);
void writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::SymtabShndxSection &Shndx,
@@ -260,6 +258,9 @@ template <class ELFT> class ELFState {
const ELFYAML::VerdefSection &Section,
ContiguousBlobAccumulator &CBA);
void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::ARMIndexTableSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::MipsABIFlags &Section,
ContiguousBlobAccumulator &CBA);
void writeSectionContent(Elf_Shdr &SHeader,
@@ -269,6 +270,9 @@ template <class ELFT> class ELFState {
const ELFYAML::StackSizesSection &Section,
ContiguousBlobAccumulator &CBA);
void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::BBAddrMapSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::HashSection &Section,
ContiguousBlobAccumulator &CBA);
void writeSectionContent(Elf_Shdr &SHeader,
@@ -331,12 +335,22 @@ ELFState<ELFT>::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH)
std::make_unique<ELFYAML::Section>(
ELFYAML::Chunk::ChunkKind::RawContent, /*IsImplicit=*/true));
- // We add a technical suffix for each unnamed section/fill. It does not affect
- // the output, but allows us to map them by name in the code and report better
- // error messages.
StringSet<> DocSections;
+ ELFYAML::SectionHeaderTable *SecHdrTable = nullptr;
for (size_t I = 0; I < Doc.Chunks.size(); ++I) {
const std::unique_ptr<ELFYAML::Chunk> &C = Doc.Chunks[I];
+
+ // We might have an explicit section header table declaration.
+ if (auto S = dyn_cast<ELFYAML::SectionHeaderTable>(C.get())) {
+ if (SecHdrTable)
+ reportError("multiple section header tables are not allowed");
+ SecHdrTable = S;
+ continue;
+ }
+
+ // We add a technical suffix for each unnamed section/fill. It does not
+ // affect the output, but allows us to map them by name in the code and
+ // report better error messages.
if (C->Name.empty()) {
std::string NewName = ELFYAML::appendUniqueSuffix(
/*Name=*/"", "index " + Twine(I));
@@ -355,11 +369,13 @@ ELFState<ELFT>::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH)
if (Doc.Symbols)
ImplicitSections.push_back(".symtab");
if (Doc.DWARF)
- for (StringRef DebugSecName : Doc.DWARF->getUsedSectionNames()) {
+ for (StringRef DebugSecName : Doc.DWARF->getNonEmptySectionNames()) {
std::string SecName = ("." + DebugSecName).str();
ImplicitSections.push_back(StringRef(SecName).copy(StringAlloc));
}
- ImplicitSections.insert(ImplicitSections.end(), {".strtab", ".shstrtab"});
+ ImplicitSections.insert(ImplicitSections.end(), {".strtab"});
+ if (!SecHdrTable || !SecHdrTable->NoHeaders.getValueOr(false))
+ ImplicitSections.insert(ImplicitSections.end(), {".shstrtab"});
// Insert placeholders for implicit sections that are not
// defined explicitly in YAML.
@@ -367,15 +383,37 @@ ELFState<ELFT>::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH)
if (DocSections.count(SecName))
continue;
- std::unique_ptr<ELFYAML::Chunk> Sec = std::make_unique<ELFYAML::Section>(
+ std::unique_ptr<ELFYAML::Section> Sec = std::make_unique<ELFYAML::Section>(
ELFYAML::Chunk::ChunkKind::RawContent, true /*IsImplicit*/);
Sec->Name = SecName;
- Doc.Chunks.push_back(std::move(Sec));
+
+ if (SecName == ".dynsym")
+ Sec->Type = ELF::SHT_DYNSYM;
+ else if (SecName == ".symtab")
+ Sec->Type = ELF::SHT_SYMTAB;
+ else
+ Sec->Type = ELF::SHT_STRTAB;
+
+ // When the section header table is explicitly defined at the end of the
+ // sections list, it is reasonable to assume that the user wants to reorder
+ // section headers, but still wants to place the section header table after
+ // all sections, like it normally happens. In this case we want to insert
+ // other implicit sections right before the section header table.
+ if (Doc.Chunks.back().get() == SecHdrTable)
+ Doc.Chunks.insert(Doc.Chunks.end() - 1, std::move(Sec));
+ else
+ Doc.Chunks.push_back(std::move(Sec));
}
+
+ // Insert the section header table implicitly at the end, when it is not
+ // explicitly defined.
+ if (!SecHdrTable)
+ Doc.Chunks.push_back(
+ std::make_unique<ELFYAML::SectionHeaderTable>(/*IsImplicit=*/true));
}
template <class ELFT>
-void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
+void ELFState<ELFT>::writeELFHeader(raw_ostream &OS) {
using namespace llvm::ELF;
Elf_Ehdr Header;
@@ -390,7 +428,12 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
Header.e_ident[EI_OSABI] = Doc.Header.OSABI;
Header.e_ident[EI_ABIVERSION] = Doc.Header.ABIVersion;
Header.e_type = Doc.Header.Type;
- Header.e_machine = Doc.Header.Machine;
+
+ if (Doc.Header.Machine)
+ Header.e_machine = *Doc.Header.Machine;
+ else
+ Header.e_machine = EM_NONE;
+
Header.e_version = EV_CURRENT;
Header.e_entry = Doc.Header.Entry;
Header.e_flags = Doc.Header.Flags;
@@ -420,34 +463,27 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
Header.e_shentsize = Doc.Header.EShEntSize ? (uint16_t)*Doc.Header.EShEntSize
: sizeof(Elf_Shdr);
- const bool NoShdrs =
- Doc.SectionHeaders && Doc.SectionHeaders->NoHeaders.getValueOr(false);
+ const ELFYAML::SectionHeaderTable &SectionHeaders =
+ Doc.getSectionHeaderTable();
if (Doc.Header.EShOff)
Header.e_shoff = *Doc.Header.EShOff;
- else if (NoShdrs)
- Header.e_shoff = 0;
+ else if (SectionHeaders.Offset)
+ Header.e_shoff = *SectionHeaders.Offset;
else
- Header.e_shoff = SHOff;
+ Header.e_shoff = 0;
if (Doc.Header.EShNum)
Header.e_shnum = *Doc.Header.EShNum;
- else if (!Doc.SectionHeaders)
- Header.e_shnum = Doc.getSections().size();
- else if (NoShdrs)
- Header.e_shnum = 0;
else
- Header.e_shnum =
- (Doc.SectionHeaders->Sections ? Doc.SectionHeaders->Sections->size()
- : 0) +
- /*Null section*/ 1;
+ Header.e_shnum = SectionHeaders.getNumHeaders(Doc.getSections().size());
if (Doc.Header.EShStrNdx)
Header.e_shstrndx = *Doc.Header.EShStrNdx;
- else if (NoShdrs || ExcludedSectionHeaders.count(".shstrtab"))
- Header.e_shstrndx = 0;
- else
+ else if (SectionHeaders.Offset && !ExcludedSectionHeaders.count(".shstrtab"))
Header.e_shstrndx = SN2I.get(".shstrtab");
+ else
+ Header.e_shstrndx = 0;
OS.write((const char *)&Header, sizeof(Header));
}
@@ -455,12 +491,16 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
template <class ELFT>
void ELFState<ELFT>::initProgramHeaders(std::vector<Elf_Phdr> &PHeaders) {
DenseMap<StringRef, ELFYAML::Fill *> NameToFill;
- for (const std::unique_ptr<ELFYAML::Chunk> &D : Doc.Chunks)
- if (auto S = dyn_cast<ELFYAML::Fill>(D.get()))
+ DenseMap<StringRef, size_t> NameToIndex;
+ for (size_t I = 0, E = Doc.Chunks.size(); I != E; ++I) {
+ if (auto S = dyn_cast<ELFYAML::Fill>(Doc.Chunks[I].get()))
NameToFill[S->Name] = S;
+ NameToIndex[Doc.Chunks[I]->Name] = I + 1;
+ }
std::vector<ELFYAML::Section *> Sections = Doc.getSections();
- for (ELFYAML::ProgramHeader &YamlPhdr : Doc.ProgramHeaders) {
+ for (size_t I = 0, E = Doc.ProgramHeaders.size(); I != E; ++I) {
+ ELFYAML::ProgramHeader &YamlPhdr = Doc.ProgramHeaders[I];
Elf_Phdr Phdr;
zero(Phdr);
Phdr.p_type = YamlPhdr.Type;
@@ -469,22 +509,30 @@ void ELFState<ELFT>::initProgramHeaders(std::vector<Elf_Phdr> &PHeaders) {
Phdr.p_paddr = YamlPhdr.PAddr;
PHeaders.push_back(Phdr);
- // Map Sections list to corresponding chunks.
- for (const ELFYAML::SectionName &SecName : YamlPhdr.Sections) {
- if (ELFYAML::Fill *Fill = NameToFill.lookup(SecName.Section)) {
- YamlPhdr.Chunks.push_back(Fill);
- continue;
- }
+ if (!YamlPhdr.FirstSec && !YamlPhdr.LastSec)
+ continue;
- unsigned Index;
- if (SN2I.lookup(SecName.Section, Index)) {
- YamlPhdr.Chunks.push_back(Sections[Index]);
- continue;
- }
+ // Get the index of the section, or 0 in the case when the section doesn't exist.
+ size_t First = NameToIndex[*YamlPhdr.FirstSec];
+ if (!First)
+ reportError("unknown section or fill referenced: '" + *YamlPhdr.FirstSec +
+ "' by the 'FirstSec' key of the program header with index " +
+ Twine(I));
+ size_t Last = NameToIndex[*YamlPhdr.LastSec];
+ if (!Last)
+ reportError("unknown section or fill referenced: '" + *YamlPhdr.LastSec +
+ "' by the 'LastSec' key of the program header with index " +
+ Twine(I));
+ if (!First || !Last)
+ continue;
- reportError("unknown section or fill referenced: '" + SecName.Section +
- "' by program header");
- }
+ if (First > Last)
+ reportError("program header with index " + Twine(I) +
+ ": the section index of " + *YamlPhdr.FirstSec +
+ " is greater than the index of " + *YamlPhdr.LastSec);
+
+ for (size_t I = First; I <= Last; ++I)
+ YamlPhdr.Chunks.push_back(Doc.Chunks[I - 1].get());
}
}
@@ -504,14 +552,16 @@ unsigned ELFState<ELFT>::toSectionIndex(StringRef S, StringRef LocSec,
return 0;
}
- if (!Doc.SectionHeaders || (Doc.SectionHeaders->NoHeaders &&
- !Doc.SectionHeaders->NoHeaders.getValue()))
+ const ELFYAML::SectionHeaderTable &SectionHeaders =
+ Doc.getSectionHeaderTable();
+ if (SectionHeaders.IsImplicit ||
+ (SectionHeaders.NoHeaders && !SectionHeaders.NoHeaders.getValue()))
return Index;
- assert(!Doc.SectionHeaders->NoHeaders.getValueOr(false) ||
- !Doc.SectionHeaders->Sections);
+ assert(!SectionHeaders.NoHeaders.getValueOr(false) ||
+ !SectionHeaders.Sections);
size_t FirstExcluded =
- Doc.SectionHeaders->Sections ? Doc.SectionHeaders->Sections->size() : 0;
+ SectionHeaders.Sections ? SectionHeaders.Sections->size() : 0;
if (Index >= FirstExcluded) {
if (LocSym.empty())
reportError("unable to link '" + LocSec + "' to excluded section '" + S +
@@ -542,6 +592,8 @@ template <class ELFT>
static void overrideFields(ELFYAML::Section *From, typename ELFT::Shdr &To) {
if (!From)
return;
+ if (From->ShAddrAlign)
+ To.sh_addralign = *From->ShAddrAlign;
if (From->ShFlags)
To.sh_flags = *From->ShFlags;
if (From->ShName)
@@ -550,6 +602,8 @@ static void overrideFields(ELFYAML::Section *From, typename ELFT::Shdr &To) {
To.sh_offset = *From->ShOffset;
if (From->ShSize)
To.sh_size = *From->ShSize;
+ if (From->ShType)
+ To.sh_type = *From->ShType;
}
template <class ELFT>
@@ -619,6 +673,45 @@ uint64_t ELFState<ELFT>::getSectionNameOffset(StringRef Name) {
return DotShStrtab.getOffset(Name);
}
+static uint64_t writeContent(ContiguousBlobAccumulator &CBA,
+ const Optional<yaml::BinaryRef> &Content,
+ const Optional<llvm::yaml::Hex64> &Size) {
+ size_t ContentSize = 0;
+ if (Content) {
+ CBA.writeAsBinary(*Content);
+ ContentSize = Content->binary_size();
+ }
+
+ if (!Size)
+ return ContentSize;
+
+ CBA.writeZeros(*Size - ContentSize);
+ return *Size;
+}
+
+static StringRef getDefaultLinkSec(unsigned SecType) {
+ switch (SecType) {
+ case ELF::SHT_REL:
+ case ELF::SHT_RELA:
+ case ELF::SHT_GROUP:
+ case ELF::SHT_LLVM_CALL_GRAPH_PROFILE:
+ case ELF::SHT_LLVM_ADDRSIG:
+ return ".symtab";
+ case ELF::SHT_GNU_versym:
+ case ELF::SHT_HASH:
+ case ELF::SHT_GNU_HASH:
+ return ".dynsym";
+ case ELF::SHT_DYNSYM:
+ case ELF::SHT_GNU_verdef:
+ case ELF::SHT_GNU_verneed:
+ return ".dynstr";
+ case ELF::SHT_SYMTAB:
+ return ".strtab";
+ default:
+ return "";
+ }
+}
+
template <class ELFT>
void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
ContiguousBlobAccumulator &CBA) {
@@ -634,16 +727,51 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
continue;
}
+ if (ELFYAML::SectionHeaderTable *S =
+ dyn_cast<ELFYAML::SectionHeaderTable>(D.get())) {
+ if (S->NoHeaders.getValueOr(false))
+ continue;
+
+ if (!S->Offset)
+ S->Offset = alignToOffset(CBA, sizeof(typename ELFT::uint),
+ /*Offset=*/None);
+ else
+ S->Offset = alignToOffset(CBA, /*Align=*/1, S->Offset);
+
+ uint64_t Size = S->getNumHeaders(SHeaders.size()) * sizeof(Elf_Shdr);
+ // The full section header information might be not available here, so
+ // fill the space with zeroes as a placeholder.
+ CBA.writeZeros(Size);
+ LocationCounter += Size;
+ continue;
+ }
+
ELFYAML::Section *Sec = cast<ELFYAML::Section>(D.get());
- bool IsFirstUndefSection = D == Doc.Chunks.front();
+ bool IsFirstUndefSection = Sec == Doc.getSections().front();
if (IsFirstUndefSection && Sec->IsImplicit)
continue;
+ Elf_Shdr &SHeader = SHeaders[SN2I.get(Sec->Name)];
+ if (Sec->Link) {
+ SHeader.sh_link = toSectionIndex(*Sec->Link, Sec->Name);
+ } else {
+ StringRef LinkSec = getDefaultLinkSec(Sec->Type);
+ unsigned Link = 0;
+ if (!LinkSec.empty() && !ExcludedSectionHeaders.count(LinkSec) &&
+ SN2I.lookup(LinkSec, Link))
+ SHeader.sh_link = Link;
+ }
+
+ if (Sec->EntSize)
+ SHeader.sh_entsize = *Sec->EntSize;
+ else
+ SHeader.sh_entsize = ELFYAML::getDefaultShEntSize<ELFT>(
+ Doc.Header.Machine.getValueOr(ELF::EM_NONE), Sec->Type, Sec->Name);
+
// We have a few sections like string or symbol tables that are usually
// added implicitly to the end. However, if they are explicitly specified
// in the YAML, we need to write them here. This ensures the file offset
// remains correct.
- Elf_Shdr &SHeader = SHeaders[SN2I.get(Sec->Name)];
if (initImplicitHeader(CBA, SHeader, Sec->Name,
Sec->IsImplicit ? nullptr : Sec))
continue;
@@ -665,9 +793,6 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
assignSectionAddress(SHeader, Sec);
- if (!Sec->Link.empty())
- SHeader.sh_link = toSectionIndex(Sec->Link, Sec->Name);
-
if (IsFirstUndefSection) {
if (auto RawSec = dyn_cast<ELFYAML::RawContentSection>(Sec)) {
// We do not write any content for special SHN_UNDEF section.
@@ -676,9 +801,16 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
if (RawSec->Info)
SHeader.sh_info = *RawSec->Info;
}
- if (Sec->EntSize)
- SHeader.sh_entsize = *Sec->EntSize;
- } else if (auto S = dyn_cast<ELFYAML::RawContentSection>(Sec)) {
+
+ LocationCounter += SHeader.sh_size;
+ overrideFields<ELFT>(Sec, SHeader);
+ continue;
+ }
+
+ if (!isa<ELFYAML::NoBitsSection>(Sec) && (Sec->Content || Sec->Size))
+ SHeader.sh_size = writeContent(CBA, Sec->Content, Sec->Size);
+
+ if (auto S = dyn_cast<ELFYAML::RawContentSection>(Sec)) {
writeSectionContent(SHeader, *S, CBA);
} else if (auto S = dyn_cast<ELFYAML::SymtabShndxSection>(Sec)) {
writeSectionContent(SHeader, *S, CBA);
@@ -686,7 +818,9 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
writeSectionContent(SHeader, *S, CBA);
} else if (auto S = dyn_cast<ELFYAML::RelrSection>(Sec)) {
writeSectionContent(SHeader, *S, CBA);
- } else if (auto S = dyn_cast<ELFYAML::Group>(Sec)) {
+ } else if (auto S = dyn_cast<ELFYAML::GroupSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::ARMIndexTableSection>(Sec)) {
writeSectionContent(SHeader, *S, CBA);
} else if (auto S = dyn_cast<ELFYAML::MipsABIFlags>(Sec)) {
writeSectionContent(SHeader, *S, CBA);
@@ -716,6 +850,8 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
writeSectionContent(SHeader, *S, CBA);
} else if (auto S = dyn_cast<ELFYAML::CallGraphProfileSection>(Sec)) {
writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::BBAddrMapSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
} else {
llvm_unreachable("Unknown section type");
}
@@ -755,22 +891,6 @@ static size_t findFirstNonGlobal(ArrayRef<ELFYAML::Symbol> Symbols) {
return Symbols.size();
}
-static uint64_t writeContent(ContiguousBlobAccumulator &CBA,
- const Optional<yaml::BinaryRef> &Content,
- const Optional<llvm::yaml::Hex64> &Size) {
- size_t ContentSize = 0;
- if (Content) {
- CBA.writeAsBinary(*Content);
- ContentSize = Content->binary_size();
- }
-
- if (!Size)
- return ContentSize;
-
- CBA.writeZeros(*Size - ContentSize);
- return *Size;
-}
-
template <class ELFT>
std::vector<typename ELFT::Sym>
ELFState<ELFT>::toELFSymbols(ArrayRef<ELFYAML::Symbol> Symbols,
@@ -791,14 +911,14 @@ ELFState<ELFT>::toELFSymbols(ArrayRef<ELFYAML::Symbol> Symbols,
Symbol.st_name = Strtab.getOffset(ELFYAML::dropUniqueSuffix(Sym.Name));
Symbol.setBindingAndType(Sym.Binding, Sym.Type);
- if (!Sym.Section.empty())
- Symbol.st_shndx = toSectionIndex(Sym.Section, "", Sym.Name);
+ if (Sym.Section)
+ Symbol.st_shndx = toSectionIndex(*Sym.Section, "", Sym.Name);
else if (Sym.Index)
Symbol.st_shndx = *Sym.Index;
- Symbol.st_value = Sym.Value;
+ Symbol.st_value = Sym.Value.getValueOr(yaml::Hex64(0));
Symbol.st_other = Sym.Other ? *Sym.Other : 0;
- Symbol.st_size = Sym.Size;
+ Symbol.st_size = Sym.Size.getValueOr(yaml::Hex64(0));
}
return Ret;
@@ -834,7 +954,6 @@ void ELFState<ELFT>::initSymtabSectionHeader(Elf_Shdr &SHeader,
}
}
- zero(SHeader);
SHeader.sh_name = getSectionNameOffset(IsStatic ? ".symtab" : ".dynsym");
if (YAMLSec)
@@ -842,26 +961,6 @@ void ELFState<ELFT>::initSymtabSectionHeader(Elf_Shdr &SHeader,
else
SHeader.sh_type = IsStatic ? ELF::SHT_SYMTAB : ELF::SHT_DYNSYM;
- if (RawSec && !RawSec->Link.empty()) {
- // If the Link field is explicitly defined in the document,
- // we should use it.
- SHeader.sh_link = toSectionIndex(RawSec->Link, RawSec->Name);
- } else {
- // When we describe the .dynsym section in the document explicitly, it is
- // allowed to omit the "DynamicSymbols" tag. In this case .dynstr is not
- // added implicitly and we should be able to leave the Link zeroed if
- // .dynstr is not defined.
- unsigned Link = 0;
- if (IsStatic) {
- if (!ExcludedSectionHeaders.count(".strtab"))
- Link = SN2I.get(".strtab");
- } else {
- if (!ExcludedSectionHeaders.count(".dynstr"))
- SN2I.lookup(".dynstr", Link);
- }
- SHeader.sh_link = Link;
- }
-
if (YAMLSec && YAMLSec->Flags)
SHeader.sh_flags = *YAMLSec->Flags;
else if (!IsStatic)
@@ -871,14 +970,12 @@ void ELFState<ELFT>::initSymtabSectionHeader(Elf_Shdr &SHeader,
// then we should set the fields requested.
SHeader.sh_info = (RawSec && RawSec->Info) ? (unsigned)(*RawSec->Info)
: findFirstNonGlobal(Symbols) + 1;
- SHeader.sh_entsize = (YAMLSec && YAMLSec->EntSize)
- ? (uint64_t)(*YAMLSec->EntSize)
- : sizeof(Elf_Sym);
SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 8;
assignSectionAddress(SHeader, YAMLSec);
- SHeader.sh_offset = alignToOffset(CBA, SHeader.sh_addralign, /*Offset=*/None);
+ SHeader.sh_offset =
+ alignToOffset(CBA, SHeader.sh_addralign, RawSec ? RawSec->Offset : None);
if (RawSec && (RawSec->Content || RawSec->Size)) {
assert(Symbols.empty());
@@ -897,7 +994,6 @@ void ELFState<ELFT>::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name,
StringTableBuilder &STB,
ContiguousBlobAccumulator &CBA,
ELFYAML::Section *YAMLSec) {
- zero(SHeader);
SHeader.sh_name = getSectionNameOffset(Name);
SHeader.sh_type = YAMLSec ? YAMLSec->Type : ELF::SHT_STRTAB;
SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 1;
@@ -905,7 +1001,8 @@ void ELFState<ELFT>::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name,
ELFYAML::RawContentSection *RawSec =
dyn_cast_or_null<ELFYAML::RawContentSection>(YAMLSec);
- SHeader.sh_offset = alignToOffset(CBA, SHeader.sh_addralign, /*Offset=*/None);
+ SHeader.sh_offset = alignToOffset(CBA, SHeader.sh_addralign,
+ YAMLSec ? YAMLSec->Offset : None);
if (RawSec && (RawSec->Content || RawSec->Size)) {
SHeader.sh_size = writeContent(CBA, RawSec->Content, RawSec->Size);
@@ -915,9 +1012,6 @@ void ELFState<ELFT>::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name,
SHeader.sh_size = STB.getSize();
}
- if (YAMLSec && YAMLSec->EntSize)
- SHeader.sh_entsize = *YAMLSec->EntSize;
-
if (RawSec && RawSec->Info)
SHeader.sh_info = *RawSec->Info;
@@ -930,7 +1024,7 @@ void ELFState<ELFT>::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name,
}
static bool shouldEmitDWARF(DWARFYAML::Data &DWARF, StringRef Name) {
- SetVector<StringRef> DebugSecNames = DWARF.getUsedSectionNames();
+ SetVector<StringRef> DebugSecNames = DWARF.getNonEmptySectionNames();
return Name.consume_front(".") && DebugSecNames.count(Name);
}
@@ -946,37 +1040,9 @@ Expected<uint64_t> emitDWARF(typename ELFT::Shdr &SHeader, StringRef Name,
return 0;
uint64_t BeginOffset = CBA.tell();
- Error Err = Error::success();
- cantFail(std::move(Err));
-
- if (Name == ".debug_str")
- Err = DWARFYAML::emitDebugStr(*OS, DWARF);
- else if (Name == ".debug_aranges")
- Err = DWARFYAML::emitDebugAranges(*OS, DWARF);
- else if (Name == ".debug_ranges")
- Err = DWARFYAML::emitDebugRanges(*OS, DWARF);
- else if (Name == ".debug_line")
- Err = DWARFYAML::emitDebugLine(*OS, DWARF);
- else if (Name == ".debug_addr")
- Err = DWARFYAML::emitDebugAddr(*OS, DWARF);
- else if (Name == ".debug_abbrev")
- Err = DWARFYAML::emitDebugAbbrev(*OS, DWARF);
- else if (Name == ".debug_info")
- Err = DWARFYAML::emitDebugInfo(*OS, DWARF);
- else if (Name == ".debug_pubnames")
- Err = DWARFYAML::emitPubSection(*OS, *DWARF.PubNames, DWARF.IsLittleEndian);
- else if (Name == ".debug_pubtypes")
- Err = DWARFYAML::emitPubSection(*OS, *DWARF.PubTypes, DWARF.IsLittleEndian);
- else if (Name == ".debug_gnu_pubnames")
- Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubNames,
- DWARF.IsLittleEndian, /*IsGNUStyle=*/true);
- else if (Name == ".debug_gnu_pubtypes")
- Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubTypes,
- DWARF.IsLittleEndian, /*IsGNUStyle=*/true);
- else
- llvm_unreachable("unexpected emitDWARF() call");
- if (Err)
+ auto EmitFunc = DWARFYAML::getDWARFEmitterByName(Name.substr(1));
+ if (Error Err = EmitFunc(*OS, DWARF))
return std::move(Err);
return CBA.tell() - BeginOffset;
@@ -986,7 +1052,6 @@ template <class ELFT>
void ELFState<ELFT>::initDWARFSectionHeader(Elf_Shdr &SHeader, StringRef Name,
ContiguousBlobAccumulator &CBA,
ELFYAML::Section *YAMLSec) {
- zero(SHeader);
SHeader.sh_name = getSectionNameOffset(ELFYAML::dropUniqueSuffix(Name));
SHeader.sh_type = YAMLSec ? YAMLSec->Type : ELF::SHT_PROGBITS;
SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 1;
@@ -1013,11 +1078,6 @@ void ELFState<ELFT>::initDWARFSectionHeader(Elf_Shdr &SHeader, StringRef Name,
llvm_unreachable("debug sections can only be initialized via the 'DWARF' "
"entry or a RawContentSection");
- if (YAMLSec && YAMLSec->EntSize)
- SHeader.sh_entsize = *YAMLSec->EntSize;
- else if (Name == ".debug_str")
- SHeader.sh_entsize = 1;
-
if (RawSec && RawSec->Info)
SHeader.sh_info = *RawSec->Info;
@@ -1026,9 +1086,6 @@ void ELFState<ELFT>::initDWARFSectionHeader(Elf_Shdr &SHeader, StringRef Name,
else if (Name == ".debug_str")
SHeader.sh_flags = ELF::SHF_MERGE | ELF::SHF_STRINGS;
- if (YAMLSec && !YAMLSec->Link.empty())
- SHeader.sh_link = toSectionIndex(YAMLSec->Link, Name);
-
assignSectionAddress(SHeader, YAMLSec);
}
@@ -1120,8 +1177,8 @@ void ELFState<ELFT>::setProgramHeaderLayout(std::vector<Elf_Phdr> &PHeaders,
}
}
-static bool shouldAllocateFileSpace(ArrayRef<ELFYAML::ProgramHeader> Phdrs,
- const ELFYAML::NoBitsSection &S) {
+bool llvm::ELFYAML::shouldAllocateFileSpace(
+ ArrayRef<ELFYAML::ProgramHeader> Phdrs, const ELFYAML::NoBitsSection &S) {
for (const ELFYAML::ProgramHeader &PH : Phdrs) {
auto It = llvm::find_if(
PH.Chunks, [&](ELFYAML::Chunk *C) { return C->Name == S.Name; });
@@ -1138,34 +1195,30 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::NoBitsSection &S,
ContiguousBlobAccumulator &CBA) {
- // SHT_NOBITS sections do not have any content to write.
- SHeader.sh_entsize = 0;
- SHeader.sh_size = S.Size;
+ if (!S.Size)
+ return;
+
+ SHeader.sh_size = *S.Size;
// When a nobits section is followed by a non-nobits section or fill
// in the same segment, we allocate the file space for it. This behavior
// matches linkers.
if (shouldAllocateFileSpace(Doc.ProgramHeaders, S))
- CBA.writeZeros(S.Size);
+ CBA.writeZeros(*S.Size);
}
template <class ELFT>
void ELFState<ELFT>::writeSectionContent(
Elf_Shdr &SHeader, const ELFYAML::RawContentSection &Section,
ContiguousBlobAccumulator &CBA) {
- SHeader.sh_size = writeContent(CBA, Section.Content, Section.Size);
-
- if (Section.EntSize)
- SHeader.sh_entsize = *Section.EntSize;
-
if (Section.Info)
SHeader.sh_info = *Section.Info;
}
-static bool isMips64EL(const ELFYAML::Object &Doc) {
- return Doc.Header.Machine == ELFYAML::ELF_EM(llvm::ELF::EM_MIPS) &&
- Doc.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64) &&
- Doc.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB);
+static bool isMips64EL(const ELFYAML::Object &Obj) {
+ return Obj.getMachine() == llvm::ELF::EM_MIPS &&
+ Obj.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64) &&
+ Obj.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB);
}
template <class ELFT>
@@ -1176,27 +1229,17 @@ void ELFState<ELFT>::writeSectionContent(
Section.Type == llvm::ELF::SHT_RELA) &&
"Section type is not SHT_REL nor SHT_RELA");
- bool IsRela = Section.Type == llvm::ELF::SHT_RELA;
- if (Section.EntSize)
- SHeader.sh_entsize = *Section.EntSize;
- else
- SHeader.sh_entsize = IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
- SHeader.sh_size = (IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel)) *
- Section.Relocations.size();
-
- // For relocation section set link to .symtab by default.
- unsigned Link = 0;
- if (Section.Link.empty() && !ExcludedSectionHeaders.count(".symtab") &&
- SN2I.lookup(".symtab", Link))
- SHeader.sh_link = Link;
-
if (!Section.RelocatableSec.empty())
SHeader.sh_info = toSectionIndex(Section.RelocatableSec, Section.Name);
- for (const auto &Rel : Section.Relocations) {
- unsigned SymIdx = Rel.Symbol ? toSymbolIndex(*Rel.Symbol, Section.Name,
- Section.Link == ".dynsym")
- : 0;
+ if (!Section.Relocations)
+ return;
+
+ const bool IsRela = Section.Type == llvm::ELF::SHT_RELA;
+ for (const ELFYAML::Relocation &Rel : *Section.Relocations) {
+ const bool IsDynamic = Section.Link && (*Section.Link == ".dynsym");
+ unsigned SymIdx =
+ Rel.Symbol ? toSymbolIndex(*Rel.Symbol, Section.Name, IsDynamic) : 0;
if (IsRela) {
Elf_Rela REntry;
zero(REntry);
@@ -1212,20 +1255,15 @@ void ELFState<ELFT>::writeSectionContent(
CBA.write((const char *)&REntry, sizeof(REntry));
}
}
+
+ SHeader.sh_size = (IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel)) *
+ Section.Relocations->size();
}
template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::RelrSection &Section,
ContiguousBlobAccumulator &CBA) {
- SHeader.sh_entsize =
- Section.EntSize ? uint64_t(*Section.EntSize) : sizeof(Elf_Relr);
-
- if (Section.Content) {
- SHeader.sh_size = writeContent(CBA, Section.Content, None);
- return;
- }
-
if (!Section.Entries)
return;
@@ -1243,33 +1281,34 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(
Elf_Shdr &SHeader, const ELFYAML::SymtabShndxSection &Shndx,
ContiguousBlobAccumulator &CBA) {
- for (uint32_t E : Shndx.Entries)
- CBA.write<uint32_t>(E, ELFT::TargetEndianness);
+ if (Shndx.Content || Shndx.Size) {
+ SHeader.sh_size = writeContent(CBA, Shndx.Content, Shndx.Size);
+ return;
+ }
- SHeader.sh_entsize = Shndx.EntSize ? (uint64_t)*Shndx.EntSize : 4;
- SHeader.sh_size = Shndx.Entries.size() * SHeader.sh_entsize;
+ if (!Shndx.Entries)
+ return;
+
+ for (uint32_t E : *Shndx.Entries)
+ CBA.write<uint32_t>(E, ELFT::TargetEndianness);
+ SHeader.sh_size = Shndx.Entries->size() * SHeader.sh_entsize;
}
template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
- const ELFYAML::Group &Section,
+ const ELFYAML::GroupSection &Section,
ContiguousBlobAccumulator &CBA) {
assert(Section.Type == llvm::ELF::SHT_GROUP &&
"Section type is not SHT_GROUP");
- unsigned Link = 0;
- if (Section.Link.empty() && !ExcludedSectionHeaders.count(".symtab") &&
- SN2I.lookup(".symtab", Link))
- SHeader.sh_link = Link;
-
- SHeader.sh_entsize = 4;
- SHeader.sh_size = SHeader.sh_entsize * Section.Members.size();
-
if (Section.Signature)
SHeader.sh_info =
toSymbolIndex(*Section.Signature, Section.Name, /*IsDynamic=*/false);
- for (const ELFYAML::SectionOrType &Member : Section.Members) {
+ if (!Section.Members)
+ return;
+
+ for (const ELFYAML::SectionOrType &Member : *Section.Members) {
unsigned int SectionIndex = 0;
if (Member.sectionNameOrType == "GRP_COMDAT")
SectionIndex = llvm::ELF::GRP_COMDAT;
@@ -1277,27 +1316,30 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
SectionIndex = toSectionIndex(Member.sectionNameOrType, Section.Name);
CBA.write<uint32_t>(SectionIndex, ELFT::TargetEndianness);
}
+ SHeader.sh_size = SHeader.sh_entsize * Section.Members->size();
}
template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::SymverSection &Section,
ContiguousBlobAccumulator &CBA) {
- for (uint16_t Version : Section.Entries)
- CBA.write<uint16_t>(Version, ELFT::TargetEndianness);
+ if (!Section.Entries)
+ return;
- SHeader.sh_entsize = Section.EntSize ? (uint64_t)*Section.EntSize : 2;
- SHeader.sh_size = Section.Entries.size() * SHeader.sh_entsize;
+ for (uint16_t Version : *Section.Entries)
+ CBA.write<uint16_t>(Version, ELFT::TargetEndianness);
+ SHeader.sh_size = Section.Entries->size() * SHeader.sh_entsize;
}
template <class ELFT>
void ELFState<ELFT>::writeSectionContent(
Elf_Shdr &SHeader, const ELFYAML::StackSizesSection &Section,
ContiguousBlobAccumulator &CBA) {
- if (Section.Content || Section.Size) {
- SHeader.sh_size = writeContent(CBA, Section.Content, Section.Size);
+ if (!Section.Entries)
+ return;
+
+ if (!Section.Entries)
return;
- }
for (const ELFYAML::StackSizeEntry &E : *Section.Entries) {
CBA.write<uintX_t>(E.Address, ELFT::TargetEndianness);
@@ -1307,13 +1349,31 @@ void ELFState<ELFT>::writeSectionContent(
template <class ELFT>
void ELFState<ELFT>::writeSectionContent(
- Elf_Shdr &SHeader, const ELFYAML::LinkerOptionsSection &Section,
+ Elf_Shdr &SHeader, const ELFYAML::BBAddrMapSection &Section,
ContiguousBlobAccumulator &CBA) {
- if (Section.Content) {
- SHeader.sh_size = writeContent(CBA, Section.Content, None);
+ if (!Section.Entries)
return;
+
+ for (const ELFYAML::BBAddrMapEntry &E : *Section.Entries) {
+ // Write the address of the function.
+ CBA.write<uintX_t>(E.Address, ELFT::TargetEndianness);
+ // Write number of BBEntries (number of basic blocks in the function).
+ size_t NumBlocks = E.BBEntries ? E.BBEntries->size() : 0;
+ SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks);
+ if (!NumBlocks)
+ continue;
+ // Write all BBEntries.
+ for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *E.BBEntries)
+ SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset) +
+ CBA.writeULEB128(BBE.Size) +
+ CBA.writeULEB128(BBE.Metadata);
}
+}
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(
+ Elf_Shdr &SHeader, const ELFYAML::LinkerOptionsSection &Section,
+ ContiguousBlobAccumulator &CBA) {
if (!Section.Options)
return;
@@ -1330,11 +1390,6 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(
Elf_Shdr &SHeader, const ELFYAML::DependentLibrariesSection &Section,
ContiguousBlobAccumulator &CBA) {
- if (Section.Content) {
- SHeader.sh_size = writeContent(CBA, Section.Content, None);
- return;
- }
-
if (!Section.Libs)
return;
@@ -1373,21 +1428,6 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(
Elf_Shdr &SHeader, const ELFYAML::CallGraphProfileSection &Section,
ContiguousBlobAccumulator &CBA) {
- if (Section.EntSize)
- SHeader.sh_entsize = *Section.EntSize;
- else
- SHeader.sh_entsize = 16;
-
- unsigned Link = 0;
- if (Section.Link.empty() && !ExcludedSectionHeaders.count(".symtab") &&
- SN2I.lookup(".symtab", Link))
- SHeader.sh_link = Link;
-
- if (Section.Content) {
- SHeader.sh_size = writeContent(CBA, Section.Content, None);
- return;
- }
-
if (!Section.Entries)
return;
@@ -1406,15 +1446,11 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::HashSection &Section,
ContiguousBlobAccumulator &CBA) {
- unsigned Link = 0;
- if (Section.Link.empty() && !ExcludedSectionHeaders.count(".dynsym") &&
- SN2I.lookup(".dynsym", Link))
- SHeader.sh_link = Link;
+ if (!Section.Bucket)
+ return;
- if (Section.Content || Section.Size) {
- SHeader.sh_size = writeContent(CBA, Section.Content, Section.Size);
+ if (!Section.Bucket)
return;
- }
CBA.write<uint32_t>(
Section.NBucket.getValueOr(llvm::yaml::Hex64(Section.Bucket->size())),
@@ -1435,15 +1471,11 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::VerdefSection &Section,
ContiguousBlobAccumulator &CBA) {
- typedef typename ELFT::Verdef Elf_Verdef;
- typedef typename ELFT::Verdaux Elf_Verdaux;
-
- SHeader.sh_info = Section.Info;
- if (Section.Content) {
- SHeader.sh_size = writeContent(CBA, Section.Content, None);
- return;
- }
+ if (Section.Info)
+ SHeader.sh_info = *Section.Info;
+ else if (Section.Entries)
+ SHeader.sh_info = Section.Entries->size();
if (!Section.Entries)
return;
@@ -1453,10 +1485,10 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::VerdefEntry &E = (*Section.Entries)[I];
Elf_Verdef VerDef;
- VerDef.vd_version = E.Version;
- VerDef.vd_flags = E.Flags;
- VerDef.vd_ndx = E.VersionNdx;
- VerDef.vd_hash = E.Hash;
+ VerDef.vd_version = E.Version.getValueOr(1);
+ VerDef.vd_flags = E.Flags.getValueOr(0);
+ VerDef.vd_ndx = E.VersionNdx.getValueOr(0);
+ VerDef.vd_hash = E.Hash.getValueOr(0);
VerDef.vd_aux = sizeof(Elf_Verdef);
VerDef.vd_cnt = E.VerNames.size();
if (I == Section.Entries->size() - 1)
@@ -1485,15 +1517,10 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::VerneedSection &Section,
ContiguousBlobAccumulator &CBA) {
- typedef typename ELFT::Verneed Elf_Verneed;
- typedef typename ELFT::Vernaux Elf_Vernaux;
-
- SHeader.sh_info = Section.Info;
-
- if (Section.Content) {
- SHeader.sh_size = writeContent(CBA, Section.Content, None);
- return;
- }
+ if (Section.Info)
+ SHeader.sh_info = *Section.Info;
+ else if (Section.VerneedV)
+ SHeader.sh_info = Section.VerneedV->size();
if (!Section.VerneedV)
return;
@@ -1535,6 +1562,20 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
}
template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(
+ Elf_Shdr &SHeader, const ELFYAML::ARMIndexTableSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ if (!Section.Entries)
+ return;
+
+ for (const ELFYAML::ARMIndexTableEntry &E : *Section.Entries) {
+ CBA.write<uint32_t>(E.Offset, ELFT::TargetEndianness);
+ CBA.write<uint32_t>(E.Value, ELFT::TargetEndianness);
+ }
+ SHeader.sh_size = Section.Entries->size() * 8;
+}
+
+template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::MipsABIFlags &Section,
ContiguousBlobAccumulator &CBA) {
@@ -1543,7 +1584,6 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
object::Elf_Mips_ABIFlags<ELFT> Flags;
zero(Flags);
- SHeader.sh_entsize = sizeof(Flags);
SHeader.sh_size = SHeader.sh_entsize;
Flags.version = Section.Version;
@@ -1567,41 +1607,25 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
assert(Section.Type == llvm::ELF::SHT_DYNAMIC &&
"Section type is not SHT_DYNAMIC");
- if (!Section.Entries.empty() && Section.Content)
- reportError("cannot specify both raw content and explicit entries "
- "for dynamic section '" +
- Section.Name + "'");
-
- if (Section.Content)
- SHeader.sh_size = Section.Content->binary_size();
- else
- SHeader.sh_size = 2 * sizeof(uintX_t) * Section.Entries.size();
- if (Section.EntSize)
- SHeader.sh_entsize = *Section.EntSize;
- else
- SHeader.sh_entsize = sizeof(Elf_Dyn);
+ if (!Section.Entries)
+ return;
- for (const ELFYAML::DynamicEntry &DE : Section.Entries) {
+ for (const ELFYAML::DynamicEntry &DE : *Section.Entries) {
CBA.write<uintX_t>(DE.Tag, ELFT::TargetEndianness);
CBA.write<uintX_t>(DE.Val, ELFT::TargetEndianness);
}
- if (Section.Content)
- CBA.writeAsBinary(*Section.Content);
+ SHeader.sh_size = 2 * sizeof(uintX_t) * Section.Entries->size();
}
template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::AddrsigSection &Section,
ContiguousBlobAccumulator &CBA) {
- unsigned Link = 0;
- if (Section.Link.empty() && !ExcludedSectionHeaders.count(".symtab") &&
- SN2I.lookup(".symtab", Link))
- SHeader.sh_link = Link;
+ if (!Section.Symbols)
+ return;
- if (Section.Content || Section.Size) {
- SHeader.sh_size = writeContent(CBA, Section.Content, Section.Size);
+ if (!Section.Symbols)
return;
- }
for (StringRef Sym : *Section.Symbols)
SHeader.sh_size +=
@@ -1612,12 +1636,10 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::NoteSection &Section,
ContiguousBlobAccumulator &CBA) {
- uint64_t Offset = CBA.tell();
- if (Section.Content || Section.Size) {
- SHeader.sh_size = writeContent(CBA, Section.Content, Section.Size);
+ if (!Section.Notes)
return;
- }
+ uint64_t Offset = CBA.tell();
for (const ELFYAML::NoteEntry &NE : *Section.Notes) {
// Write name size.
if (NE.Name.empty())
@@ -1655,15 +1677,11 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::GnuHashSection &Section,
ContiguousBlobAccumulator &CBA) {
- unsigned Link = 0;
- if (Section.Link.empty() && !ExcludedSectionHeaders.count(".dynsym") &&
- SN2I.lookup(".dynsym", Link))
- SHeader.sh_link = Link;
+ if (!Section.HashBuckets)
+ return;
- if (Section.Content) {
- SHeader.sh_size = writeContent(CBA, Section.Content, None);
+ if (!Section.Header)
return;
- }
// We write the header first, starting with the hash buckets count. Normally
// it is the number of entries in HashBuckets, but the "NBuckets" property can
@@ -1724,7 +1742,9 @@ void ELFState<ELFT>::writeFill(ELFYAML::Fill &Fill,
template <class ELFT>
DenseMap<StringRef, size_t> ELFState<ELFT>::buildSectionHeaderReorderMap() {
- if (!Doc.SectionHeaders || Doc.SectionHeaders->NoHeaders)
+ const ELFYAML::SectionHeaderTable &SectionHeaders =
+ Doc.getSectionHeaderTable();
+ if (SectionHeaders.IsImplicit || SectionHeaders.NoHeaders)
return DenseMap<StringRef, size_t>();
DenseMap<StringRef, size_t> Ret;
@@ -1738,12 +1758,12 @@ DenseMap<StringRef, size_t> ELFState<ELFT>::buildSectionHeaderReorderMap() {
Seen.insert(Hdr.Name);
};
- if (Doc.SectionHeaders->Sections)
- for (const ELFYAML::SectionHeader &Hdr : *Doc.SectionHeaders->Sections)
+ if (SectionHeaders.Sections)
+ for (const ELFYAML::SectionHeader &Hdr : *SectionHeaders.Sections)
AddSection(Hdr);
- if (Doc.SectionHeaders->Excluded)
- for (const ELFYAML::SectionHeader &Hdr : *Doc.SectionHeaders->Excluded)
+ if (SectionHeaders.Excluded)
+ for (const ELFYAML::SectionHeader &Hdr : *SectionHeaders.Excluded)
AddSection(Hdr);
for (const ELFYAML::Section *S : Doc.getSections()) {
@@ -1772,17 +1792,17 @@ template <class ELFT> void ELFState<ELFT>::buildSectionIndex() {
// Build excluded section headers map.
std::vector<ELFYAML::Section *> Sections = Doc.getSections();
- if (Doc.SectionHeaders) {
- if (Doc.SectionHeaders->Excluded)
- for (const ELFYAML::SectionHeader &Hdr : *Doc.SectionHeaders->Excluded)
- if (!ExcludedSectionHeaders.insert(Hdr.Name).second)
- llvm_unreachable("buildSectionIndex() failed");
-
- if (Doc.SectionHeaders->NoHeaders.getValueOr(false))
- for (const ELFYAML::Section *S : Sections)
- if (!ExcludedSectionHeaders.insert(S->Name).second)
- llvm_unreachable("buildSectionIndex() failed");
- }
+ const ELFYAML::SectionHeaderTable &SectionHeaders =
+ Doc.getSectionHeaderTable();
+ if (SectionHeaders.Excluded)
+ for (const ELFYAML::SectionHeader &Hdr : *SectionHeaders.Excluded)
+ if (!ExcludedSectionHeaders.insert(Hdr.Name).second)
+ llvm_unreachable("buildSectionIndex() failed");
+
+ if (SectionHeaders.NoHeaders.getValueOr(false))
+ for (const ELFYAML::Section *S : Sections)
+ if (!ExcludedSectionHeaders.insert(S->Name).second)
+ llvm_unreachable("buildSectionIndex() failed");
size_t SecNdx = -1;
for (const ELFYAML::Section *S : Sections) {
@@ -1885,11 +1905,7 @@ bool ELFState<ELFT>::writeELF(raw_ostream &OS, ELFYAML::Object &Doc,
// Now we can decide segment offsets.
State.setProgramHeaderLayout(PHeaders, SHeaders);
- // Align the start of the section header table, which is written after all
- // section data.
- uint64_t SHOff =
- State.alignToOffset(CBA, sizeof(typename ELFT::uint), /*Offset=*/None);
- bool ReachedLimit = SHOff + arrayDataSize(makeArrayRef(SHeaders)) > MaxSize;
+ bool ReachedLimit = CBA.getOffset() > MaxSize;
if (Error E = CBA.takeLimitError()) {
// We report a custom error message instead below.
consumeError(std::move(E));
@@ -1904,10 +1920,15 @@ bool ELFState<ELFT>::writeELF(raw_ostream &OS, ELFYAML::Object &Doc,
if (State.HasError)
return false;
- State.writeELFHeader(OS, SHOff);
+ State.writeELFHeader(OS);
writeArrayData(OS, makeArrayRef(PHeaders));
+
+ const ELFYAML::SectionHeaderTable &SHT = Doc.getSectionHeaderTable();
+ if (!SHT.NoHeaders.getValueOr(false))
+ CBA.updateDataAt(*SHT.Offset, SHeaders.data(),
+ SHT.getNumHeaders(SHeaders.size()) * sizeof(Elf_Shdr));
+
CBA.writeBlobToStream(OS);
- writeArrayData(OS, makeArrayRef(SHeaders));
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
index 2353b34f188b..05d30577812b 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/ARMEHABI.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MipsABIFlags.h"
@@ -26,6 +27,16 @@ namespace llvm {
ELFYAML::Chunk::~Chunk() = default;
+namespace ELFYAML {
+unsigned Object::getMachine() const {
+ if (Header.Machine)
+ return *Header.Machine;
+ return llvm::ELF::EM_NONE;
+}
+
+constexpr StringRef SectionHeaderTable::TypeStr;
+} // namespace ELFYAML
+
namespace yaml {
void ScalarEnumerationTraits<ELFYAML::ELF_ET>::enumeration(
@@ -222,6 +233,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(
ECase(EM_LANAI);
ECase(EM_BPF);
ECase(EM_VE);
+ ECase(EM_CSKY);
#undef ECase
IO.enumFallback<Hex16>(Value);
}
@@ -285,7 +297,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
assert(Object && "The IO context is not initialized");
#define BCase(X) IO.bitSetCase(Value, #X, ELF::X)
#define BCaseMask(X, M) IO.maskedBitSetCase(Value, #X, ELF::X, ELF::M)
- switch (Object->Header.Machine) {
+ switch (Object->getMachine()) {
case ELF::EM_ARM:
BCase(EF_ARM_SOFT_FLOAT);
BCase(EF_ARM_VFP_FLOAT);
@@ -411,14 +423,17 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_R600_TURKS, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX600, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX601, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX602, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX700, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX701, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX702, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX703, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX704, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX705, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX802, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX803, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX805, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX810, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX902, EF_AMDGPU_MACH);
@@ -426,17 +441,19 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX908, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1030, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1031, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1032, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1033, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
break;
- case ELF::EM_X86_64:
- break;
default:
- llvm_unreachable("Unsupported architecture");
+ break;
}
#undef BCase
#undef BCaseMask
@@ -477,12 +494,13 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_LLVM_SYMPART);
ECase(SHT_LLVM_PART_EHDR);
ECase(SHT_LLVM_PART_PHDR);
+ ECase(SHT_LLVM_BB_ADDR_MAP);
ECase(SHT_GNU_ATTRIBUTES);
ECase(SHT_GNU_HASH);
ECase(SHT_GNU_verdef);
ECase(SHT_GNU_verneed);
ECase(SHT_GNU_versym);
- switch (Object->Header.Machine) {
+ switch (Object->getMachine()) {
case ELF::EM_ARM:
ECase(SHT_ARM_EXIDX);
ECase(SHT_ARM_PREEMPTMAP);
@@ -537,7 +555,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
BCase(SHF_GROUP);
BCase(SHF_TLS);
BCase(SHF_COMPRESSED);
- switch (Object->Header.Machine) {
+ switch (Object->getMachine()) {
case ELF::EM_ARM:
BCase(SHF_ARM_PURECODE);
break;
@@ -629,7 +647,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
assert(Object && "The IO context is not initialized");
#define ELF_RELOC(X, Y) IO.enumCase(Value, #X, ELF::X);
- switch (Object->Header.Machine) {
+ switch (Object->getMachine()) {
case ELF::EM_X86_64:
#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
break;
@@ -667,6 +685,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
case ELF::EM_VE:
#include "llvm/BinaryFormat/ELFRelocs/VE.def"
break;
+ case ELF::EM_CSKY:
+#include "llvm/BinaryFormat/ELFRelocs/CSKY.def"
+ break;
case ELF::EM_PPC64:
#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def"
break;
@@ -694,7 +715,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
#define STRINGIFY(X) (#X)
#define DYNAMIC_TAG(X, Y) IO.enumCase(Value, STRINGIFY(DT_##X), ELF::DT_##X);
- switch (Object->Header.Machine) {
+ switch (Object->getMachine()) {
case ELF::EM_AARCH64:
#undef AARCH64_DYNAMIC_TAG
#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
@@ -805,6 +826,7 @@ void ScalarEnumerationTraits<ELFYAML::MIPS_ISA>::enumeration(
IO.enumCase(Value, "MIPS5", 5);
IO.enumCase(Value, "MIPS32", 32);
IO.enumCase(Value, "MIPS64", 64);
+ IO.enumFallback<Hex32>(Value);
}
void ScalarBitSetTraits<ELFYAML::MIPS_AFL_ASE>::bitset(
@@ -823,6 +845,8 @@ void ScalarBitSetTraits<ELFYAML::MIPS_AFL_ASE>::bitset(
BCase(MIPS16);
BCase(MICROMIPS);
BCase(XPA);
+ BCase(CRC);
+ BCase(GINV);
#undef BCase
}
@@ -838,23 +862,6 @@ void MappingTraits<ELFYAML::SectionHeader>::mapping(
IO.mapRequired("Name", SHdr.Name);
}
-void MappingTraits<ELFYAML::SectionHeaderTable>::mapping(
- IO &IO, ELFYAML::SectionHeaderTable &SectionHeader) {
- IO.mapOptional("Sections", SectionHeader.Sections);
- IO.mapOptional("Excluded", SectionHeader.Excluded);
- IO.mapOptional("NoHeaders", SectionHeader.NoHeaders);
-}
-
-StringRef MappingTraits<ELFYAML::SectionHeaderTable>::validate(
- IO &IO, ELFYAML::SectionHeaderTable &SecHdrTable) {
- if (SecHdrTable.NoHeaders && (SecHdrTable.Sections || SecHdrTable.Excluded))
- return "NoHeaders can't be used together with Sections/Excluded";
- if (!SecHdrTable.NoHeaders && !SecHdrTable.Sections && !SecHdrTable.Excluded)
- return "SectionHeaderTable can't be empty. Use 'NoHeaders' key to drop the "
- "section header table";
- return StringRef();
-}
-
void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
ELFYAML::FileHeader &FileHdr) {
IO.mapRequired("Class", FileHdr.Class);
@@ -862,7 +869,7 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
IO.mapOptional("OSABI", FileHdr.OSABI, ELFYAML::ELF_ELFOSABI(0));
IO.mapOptional("ABIVersion", FileHdr.ABIVersion, Hex8(0));
IO.mapRequired("Type", FileHdr.Type);
- IO.mapRequired("Machine", FileHdr.Machine);
+ IO.mapOptional("Machine", FileHdr.Machine);
IO.mapOptional("Flags", FileHdr.Flags, ELFYAML::ELF_EF(0));
IO.mapOptional("Entry", FileHdr.Entry, Hex64(0));
@@ -882,7 +889,8 @@ void MappingTraits<ELFYAML::ProgramHeader>::mapping(
IO &IO, ELFYAML::ProgramHeader &Phdr) {
IO.mapRequired("Type", Phdr.Type);
IO.mapOptional("Flags", Phdr.Flags, ELFYAML::ELF_PF(0));
- IO.mapOptional("Sections", Phdr.Sections);
+ IO.mapOptional("FirstSec", Phdr.FirstSec);
+ IO.mapOptional("LastSec", Phdr.LastSec);
IO.mapOptional("VAddr", Phdr.VAddr, Hex64(0));
IO.mapOptional("PAddr", Phdr.PAddr, Phdr.VAddr);
IO.mapOptional("Align", Phdr.Align);
@@ -891,6 +899,15 @@ void MappingTraits<ELFYAML::ProgramHeader>::mapping(
IO.mapOptional("Offset", Phdr.Offset);
}
+std::string MappingTraits<ELFYAML::ProgramHeader>::validate(
+ IO &IO, ELFYAML::ProgramHeader &FileHdr) {
+ if (!FileHdr.FirstSec && FileHdr.LastSec)
+ return "the \"LastSec\" key can't be used without the \"FirstSec\" key";
+ if (FileHdr.FirstSec && !FileHdr.LastSec)
+ return "the \"FirstSec\" key can't be used without the \"LastSec\" key";
+ return "";
+}
+
LLVM_YAML_STRONG_TYPEDEF(StringRef, StOtherPiece)
template <> struct ScalarTraits<StOtherPiece> {
@@ -935,7 +952,7 @@ struct NormalizedOther {
std::vector<StOtherPiece> Ret;
const auto *Object = static_cast<ELFYAML::Object *>(YamlIO.getContext());
for (std::pair<StringRef, uint8_t> &P :
- getFlags(Object->Header.Machine).takeVector()) {
+ getFlags(Object->getMachine()).takeVector()) {
uint8_t FlagValue = P.second;
if ((*Original & FlagValue) != FlagValue)
continue;
@@ -954,7 +971,7 @@ struct NormalizedOther {
uint8_t toValue(StringRef Name) {
const auto *Object = static_cast<ELFYAML::Object *>(YamlIO.getContext());
- MapVector<StringRef, uint8_t> Flags = getFlags(Object->Header.Machine);
+ MapVector<StringRef, uint8_t> Flags = getFlags(Object->getMachine());
auto It = Flags.find(Name);
if (It != Flags.end())
@@ -1008,6 +1025,9 @@ struct NormalizedOther {
Map["STO_MIPS_PLT"] = ELF::STO_MIPS_PLT;
Map["STO_MIPS_OPTIONAL"] = ELF::STO_MIPS_OPTIONAL;
}
+
+ if (EMachine == ELF::EM_AARCH64)
+ Map["STO_AARCH64_VARIANT_PCS"] = ELF::STO_AARCH64_VARIANT_PCS;
return Map;
}
@@ -1054,11 +1074,11 @@ void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
IO.mapOptional("Name", Symbol.Name, StringRef());
IO.mapOptional("StName", Symbol.StName);
IO.mapOptional("Type", Symbol.Type, ELFYAML::ELF_STT(0));
- IO.mapOptional("Section", Symbol.Section, StringRef());
+ IO.mapOptional("Section", Symbol.Section);
IO.mapOptional("Index", Symbol.Index);
IO.mapOptional("Binding", Symbol.Binding, ELFYAML::ELF_STB(0));
- IO.mapOptional("Value", Symbol.Value, Hex64(0));
- IO.mapOptional("Size", Symbol.Size, Hex64(0));
+ IO.mapOptional("Value", Symbol.Value);
+ IO.mapOptional("Size", Symbol.Size);
// Symbol's Other field is a bit special. It is usually a field that
// represents st_other and holds the symbol visibility. However, on some
@@ -1070,11 +1090,11 @@ void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
IO.mapOptional("Other", Keys->Other);
}
-StringRef MappingTraits<ELFYAML::Symbol>::validate(IO &IO,
- ELFYAML::Symbol &Symbol) {
- if (Symbol.Index && Symbol.Section.data())
+std::string MappingTraits<ELFYAML::Symbol>::validate(IO &IO,
+ ELFYAML::Symbol &Symbol) {
+ if (Symbol.Index && Symbol.Section)
return "Index and Section cannot both be specified for Symbol";
- return StringRef();
+ return "";
}
static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
@@ -1082,32 +1102,35 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
IO.mapRequired("Type", Section.Type);
IO.mapOptional("Flags", Section.Flags);
IO.mapOptional("Address", Section.Address);
- IO.mapOptional("Link", Section.Link, StringRef());
+ IO.mapOptional("Link", Section.Link);
IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0));
IO.mapOptional("EntSize", Section.EntSize);
IO.mapOptional("Offset", Section.Offset);
+ IO.mapOptional("Content", Section.Content);
+ IO.mapOptional("Size", Section.Size);
+
// obj2yaml does not dump these fields. They are expected to be empty when we
// are producing YAML, because yaml2obj sets appropriate values for them
// automatically when they are not explicitly defined.
assert(!IO.outputting() ||
- (!Section.ShOffset.hasValue() && !Section.ShSize.hasValue() &&
- !Section.ShName.hasValue() && !Section.ShFlags.hasValue()));
+ (!Section.ShOffset && !Section.ShSize && !Section.ShName &&
+ !Section.ShFlags && !Section.ShType && !Section.ShAddrAlign));
+ IO.mapOptional("ShAddrAlign", Section.ShAddrAlign);
IO.mapOptional("ShName", Section.ShName);
IO.mapOptional("ShOffset", Section.ShOffset);
IO.mapOptional("ShSize", Section.ShSize);
IO.mapOptional("ShFlags", Section.ShFlags);
+ IO.mapOptional("ShType", Section.ShType);
}
static void sectionMapping(IO &IO, ELFYAML::DynamicSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Entries", Section.Entries);
- IO.mapOptional("Content", Section.Content);
}
static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapOptional("Content", Section.Content);
// We also support reading a content as array of bytes using the ContentArray
// key. obj2yaml never prints this field.
@@ -1119,23 +1142,24 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
Section.Content = yaml::BinaryRef(*Section.ContentBuf);
}
- IO.mapOptional("Size", Section.Size);
IO.mapOptional("Info", Section.Info);
}
-static void sectionMapping(IO &IO, ELFYAML::StackSizesSection &Section) {
+static void sectionMapping(IO &IO, ELFYAML::BBAddrMapSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Content", Section.Content);
- IO.mapOptional("Size", Section.Size);
+ IO.mapOptional("Entries", Section.Entries);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::StackSizesSection &Section) {
+ commonSectionMapping(IO, Section);
IO.mapOptional("Entries", Section.Entries);
}
static void sectionMapping(IO &IO, ELFYAML::HashSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapOptional("Content", Section.Content);
IO.mapOptional("Bucket", Section.Bucket);
IO.mapOptional("Chain", Section.Chain);
- IO.mapOptional("Size", Section.Size);
// obj2yaml does not dump these fields. They can be used to override nchain
// and nbucket values for creating broken sections.
@@ -1147,15 +1171,12 @@ static void sectionMapping(IO &IO, ELFYAML::HashSection &Section) {
static void sectionMapping(IO &IO, ELFYAML::NoteSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapOptional("Content", Section.Content);
- IO.mapOptional("Size", Section.Size);
IO.mapOptional("Notes", Section.Notes);
}
static void sectionMapping(IO &IO, ELFYAML::GnuHashSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapOptional("Content", Section.Content);
IO.mapOptional("Header", Section.Header);
IO.mapOptional("BloomFilter", Section.BloomFilter);
IO.mapOptional("HashBuckets", Section.HashBuckets);
@@ -1163,26 +1184,23 @@ static void sectionMapping(IO &IO, ELFYAML::GnuHashSection &Section) {
}
static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapOptional("Size", Section.Size, Hex64(0));
}
static void sectionMapping(IO &IO, ELFYAML::VerdefSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapRequired("Info", Section.Info);
+ IO.mapOptional("Info", Section.Info);
IO.mapOptional("Entries", Section.Entries);
- IO.mapOptional("Content", Section.Content);
}
static void sectionMapping(IO &IO, ELFYAML::SymverSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapRequired("Entries", Section.Entries);
+ IO.mapOptional("Entries", Section.Entries);
}
static void sectionMapping(IO &IO, ELFYAML::VerneedSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapRequired("Info", Section.Info);
+ IO.mapOptional("Info", Section.Info);
IO.mapOptional("Dependencies", Section.VerneedV);
- IO.mapOptional("Content", Section.Content);
}
static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
@@ -1194,24 +1212,21 @@ static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
static void sectionMapping(IO &IO, ELFYAML::RelrSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Entries", Section.Entries);
- IO.mapOptional("Content", Section.Content);
}
-static void groupSectionMapping(IO &IO, ELFYAML::Group &Group) {
+static void groupSectionMapping(IO &IO, ELFYAML::GroupSection &Group) {
commonSectionMapping(IO, Group);
IO.mapOptional("Info", Group.Signature);
- IO.mapRequired("Members", Group.Members);
+ IO.mapOptional("Members", Group.Members);
}
static void sectionMapping(IO &IO, ELFYAML::SymtabShndxSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapRequired("Entries", Section.Entries);
+ IO.mapOptional("Entries", Section.Entries);
}
static void sectionMapping(IO &IO, ELFYAML::AddrsigSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapOptional("Content", Section.Content);
- IO.mapOptional("Size", Section.Size);
IO.mapOptional("Symbols", Section.Symbols);
}
@@ -1222,23 +1237,28 @@ static void fillMapping(IO &IO, ELFYAML::Fill &Fill) {
IO.mapRequired("Size", Fill.Size);
}
+static void sectionHeaderTableMapping(IO &IO,
+ ELFYAML::SectionHeaderTable &SHT) {
+ IO.mapOptional("Offset", SHT.Offset);
+ IO.mapOptional("Sections", SHT.Sections);
+ IO.mapOptional("Excluded", SHT.Excluded);
+ IO.mapOptional("NoHeaders", SHT.NoHeaders);
+}
+
static void sectionMapping(IO &IO, ELFYAML::LinkerOptionsSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Options", Section.Options);
- IO.mapOptional("Content", Section.Content);
}
static void sectionMapping(IO &IO,
ELFYAML::DependentLibrariesSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Libraries", Section.Libs);
- IO.mapOptional("Content", Section.Content);
}
static void sectionMapping(IO &IO, ELFYAML::CallGraphProfileSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Entries", Section.Entries);
- IO.mapOptional("Content", Section.Content);
}
void MappingTraits<ELFYAML::SectionOrType>::mapping(
@@ -1246,9 +1266,9 @@ void MappingTraits<ELFYAML::SectionOrType>::mapping(
IO.mapRequired("SectionOrType", sectionOrType.sectionNameOrType);
}
-void MappingTraits<ELFYAML::SectionName>::mapping(
- IO &IO, ELFYAML::SectionName &sectionName) {
- IO.mapRequired("Section", sectionName.Section);
+static void sectionMapping(IO &IO, ELFYAML::ARMIndexTableSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Entries", Section.Entries);
}
static void sectionMapping(IO &IO, ELFYAML::MipsABIFlags &Section) {
@@ -1271,24 +1291,69 @@ static void sectionMapping(IO &IO, ELFYAML::MipsABIFlags &Section) {
IO.mapOptional("Flags2", Section.Flags2, Hex32(0));
}
+static StringRef getStringValue(IO &IO, const char *Key) {
+ StringRef Val;
+ IO.mapRequired(Key, Val);
+ return Val;
+}
+
+static void setStringValue(IO &IO, const char *Key, StringRef Val) {
+ IO.mapRequired(Key, Val);
+}
+
+static bool isInteger(StringRef Val) {
+ APInt Tmp;
+ return !Val.getAsInteger(0, Tmp);
+}
+
void MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::mapping(
IO &IO, std::unique_ptr<ELFYAML::Chunk> &Section) {
ELFYAML::ELF_SHT Type;
+ StringRef TypeStr;
if (IO.outputting()) {
- Type = cast<ELFYAML::Section>(Section.get())->Type;
+ if (auto *S = dyn_cast<ELFYAML::Section>(Section.get()))
+ Type = S->Type;
+ else if (auto *SHT = dyn_cast<ELFYAML::SectionHeaderTable>(Section.get()))
+ TypeStr = SHT->TypeStr;
} else {
// When the Type string does not have a "SHT_" prefix, we know it is not a
- // description of a regular ELF output section. Currently, we have one
- // special type named "Fill". See comments for Fill.
- StringRef StrType;
- IO.mapRequired("Type", StrType);
- if (StrType == "Fill") {
- Section.reset(new ELFYAML::Fill());
- fillMapping(IO, *cast<ELFYAML::Fill>(Section.get()));
- return;
- }
+ // description of a regular ELF output section.
+ TypeStr = getStringValue(IO, "Type");
+ if (TypeStr.startswith("SHT_") || isInteger(TypeStr))
+ IO.mapRequired("Type", Type);
+ }
+
+ if (TypeStr == "Fill") {
+ assert(!IO.outputting()); // We don't dump fills currently.
+ Section.reset(new ELFYAML::Fill());
+ fillMapping(IO, *cast<ELFYAML::Fill>(Section.get()));
+ return;
+ }
+
+ if (TypeStr == ELFYAML::SectionHeaderTable::TypeStr) {
+ if (IO.outputting())
+ setStringValue(IO, "Type", TypeStr);
+ else
+ Section.reset(new ELFYAML::SectionHeaderTable(/*IsImplicit=*/false));
+
+ sectionHeaderTableMapping(
+ IO, *cast<ELFYAML::SectionHeaderTable>(Section.get()));
+ return;
+ }
- IO.mapRequired("Type", Type);
+ const auto &Obj = *static_cast<ELFYAML::Object *>(IO.getContext());
+ if (Obj.getMachine() == ELF::EM_MIPS && Type == ELF::SHT_MIPS_ABIFLAGS) {
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::MipsABIFlags());
+ sectionMapping(IO, *cast<ELFYAML::MipsABIFlags>(Section.get()));
+ return;
+ }
+
+ if (Obj.getMachine() == ELF::EM_ARM && Type == ELF::SHT_ARM_EXIDX) {
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::ARMIndexTableSection());
+ sectionMapping(IO, *cast<ELFYAML::ARMIndexTableSection>(Section.get()));
+ return;
}
switch (Type) {
@@ -1310,8 +1375,8 @@ void MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::mapping(
break;
case ELF::SHT_GROUP:
if (!IO.outputting())
- Section.reset(new ELFYAML::Group());
- groupSectionMapping(IO, *cast<ELFYAML::Group>(Section.get()));
+ Section.reset(new ELFYAML::GroupSection());
+ groupSectionMapping(IO, *cast<ELFYAML::GroupSection>(Section.get()));
break;
case ELF::SHT_NOBITS:
if (!IO.outputting())
@@ -1333,11 +1398,6 @@ void MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::mapping(
Section.reset(new ELFYAML::GnuHashSection());
sectionMapping(IO, *cast<ELFYAML::GnuHashSection>(Section.get()));
break;
- case ELF::SHT_MIPS_ABIFLAGS:
- if (!IO.outputting())
- Section.reset(new ELFYAML::MipsABIFlags());
- sectionMapping(IO, *cast<ELFYAML::MipsABIFlags>(Section.get()));
- break;
case ELF::SHT_GNU_verdef:
if (!IO.outputting())
Section.reset(new ELFYAML::VerdefSection());
@@ -1379,6 +1439,11 @@ void MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::mapping(
Section.reset(new ELFYAML::CallGraphProfileSection());
sectionMapping(IO, *cast<ELFYAML::CallGraphProfileSection>(Section.get()));
break;
+ case ELF::SHT_LLVM_BB_ADDR_MAP:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::BBAddrMapSection());
+ sectionMapping(IO, *cast<ELFYAML::BBAddrMapSection>(Section.get()));
+ break;
default:
if (!IO.outputting()) {
StringRef Name;
@@ -1398,170 +1463,77 @@ void MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::mapping(
}
}
-StringRef MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::validate(
+std::string MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::validate(
IO &io, std::unique_ptr<ELFYAML::Chunk> &C) {
- if (const auto *RawSection = dyn_cast<ELFYAML::RawContentSection>(C.get())) {
- if (RawSection->Size && RawSection->Content &&
- (uint64_t)(*RawSection->Size) < RawSection->Content->binary_size())
- return "Section size must be greater than or equal to the content size";
- if (RawSection->Flags && RawSection->ShFlags)
- return "ShFlags and Flags cannot be used together";
- return {};
- }
-
- if (const auto *SS = dyn_cast<ELFYAML::StackSizesSection>(C.get())) {
- if (!SS->Entries && !SS->Content && !SS->Size)
- return ".stack_sizes: one of Content, Entries and Size must be specified";
-
- if (SS->Size && SS->Content &&
- (uint64_t)(*SS->Size) < SS->Content->binary_size())
- return ".stack_sizes: Size must be greater than or equal to the content "
- "size";
-
- // We accept Content, Size or both together when there are no Entries.
- if (!SS->Entries)
- return {};
-
- if (SS->Size)
- return ".stack_sizes: Size and Entries cannot be used together";
- if (SS->Content)
- return ".stack_sizes: Content and Entries cannot be used together";
- return {};
- }
-
- if (const auto *HS = dyn_cast<ELFYAML::HashSection>(C.get())) {
- if (!HS->Content && !HS->Bucket && !HS->Chain && !HS->Size)
- return "one of \"Content\", \"Size\", \"Bucket\" or \"Chain\" must be "
- "specified";
-
- if (HS->Content || HS->Size) {
- if (HS->Size && HS->Content &&
- (uint64_t)*HS->Size < HS->Content->binary_size())
- return "\"Size\" must be greater than or equal to the content "
- "size";
-
- if (HS->Bucket)
- return "\"Bucket\" cannot be used with \"Content\" or \"Size\"";
- if (HS->Chain)
- return "\"Chain\" cannot be used with \"Content\" or \"Size\"";
- return {};
- }
-
- if ((HS->Bucket && !HS->Chain) || (!HS->Bucket && HS->Chain))
- return "\"Bucket\" and \"Chain\" must be used together";
- return {};
- }
-
- if (const auto *Sec = dyn_cast<ELFYAML::AddrsigSection>(C.get())) {
- if (!Sec->Symbols && !Sec->Content && !Sec->Size)
- return "one of \"Content\", \"Size\" or \"Symbols\" must be specified";
-
- if (Sec->Content || Sec->Size) {
- if (Sec->Size && Sec->Content &&
- (uint64_t)*Sec->Size < Sec->Content->binary_size())
- return "\"Size\" must be greater than or equal to the content "
- "size";
-
- if (Sec->Symbols)
- return "\"Symbols\" cannot be used with \"Content\" or \"Size\"";
- return {};
- }
-
- if (!Sec->Symbols)
- return {};
- return {};
+ if (const auto *F = dyn_cast<ELFYAML::Fill>(C.get())) {
+ if (F->Pattern && F->Pattern->binary_size() != 0 && !F->Size)
+ return "\"Size\" can't be 0 when \"Pattern\" is not empty";
+ return "";
}
- if (const auto *NS = dyn_cast<ELFYAML::NoteSection>(C.get())) {
- if (!NS->Content && !NS->Size && !NS->Notes)
- return "one of \"Content\", \"Size\" or \"Notes\" must be "
- "specified";
-
- if (!NS->Content && !NS->Size)
- return {};
-
- if (NS->Size && NS->Content &&
- (uint64_t)*NS->Size < NS->Content->binary_size())
- return "\"Size\" must be greater than or equal to the content "
- "size";
-
- if (NS->Notes)
- return "\"Notes\" cannot be used with \"Content\" or \"Size\"";
- return {};
+ if (const auto *SHT = dyn_cast<ELFYAML::SectionHeaderTable>(C.get())) {
+ if (SHT->NoHeaders && (SHT->Sections || SHT->Excluded || SHT->Offset))
+ return "NoHeaders can't be used together with Offset/Sections/Excluded";
+ if (!SHT->NoHeaders && !SHT->Sections && !SHT->Excluded)
+ return "SectionHeaderTable can't be empty. Use 'NoHeaders' key to drop "
+ "the section header table";
+ return "";
}
- if (const auto *Sec = dyn_cast<ELFYAML::GnuHashSection>(C.get())) {
- if (!Sec->Content && !Sec->Header && !Sec->BloomFilter &&
- !Sec->HashBuckets && !Sec->HashValues)
- return "either \"Content\" or \"Header\", \"BloomFilter\", "
- "\"HashBuckets\" and \"HashBuckets\" must be specified";
-
- if (Sec->Header || Sec->BloomFilter || Sec->HashBuckets ||
- Sec->HashValues) {
- if (!Sec->Header || !Sec->BloomFilter || !Sec->HashBuckets ||
- !Sec->HashValues)
- return "\"Header\", \"BloomFilter\", "
- "\"HashBuckets\" and \"HashValues\" must be used together";
- if (Sec->Content)
- return "\"Header\", \"BloomFilter\", "
- "\"HashBuckets\" and \"HashValues\" can't be used together with "
- "\"Content\"";
- return {};
+ const ELFYAML::Section &Sec = *cast<ELFYAML::Section>(C.get());
+ if (Sec.Size && Sec.Content &&
+ (uint64_t)(*Sec.Size) < Sec.Content->binary_size())
+ return "Section size must be greater than or equal to the content size";
+
+ auto BuildErrPrefix = [](ArrayRef<std::pair<StringRef, bool>> EntV) {
+ std::string Msg;
+ for (size_t I = 0, E = EntV.size(); I != E; ++I) {
+ StringRef Name = EntV[I].first;
+ if (I == 0) {
+ Msg = "\"" + Name.str() + "\"";
+ continue;
+ }
+ if (I != EntV.size() - 1)
+ Msg += ", \"" + Name.str() + "\"";
+ else
+ Msg += " and \"" + Name.str() + "\"";
}
+ return Msg;
+ };
- // Only Content is specified.
- return {};
- }
-
- if (const auto *Sec = dyn_cast<ELFYAML::LinkerOptionsSection>(C.get())) {
- if (Sec->Options && Sec->Content)
- return "\"Options\" and \"Content\" can't be used together";
- return {};
- }
+ std::vector<std::pair<StringRef, bool>> Entries = Sec.getEntries();
+ const size_t NumUsedEntries = llvm::count_if(
+ Entries, [](const std::pair<StringRef, bool> &P) { return P.second; });
- if (const auto *Sec = dyn_cast<ELFYAML::DependentLibrariesSection>(C.get())) {
- if (Sec->Libs && Sec->Content)
- return "SHT_LLVM_DEPENDENT_LIBRARIES: \"Libraries\" and \"Content\" "
- "can't "
- "be used together";
- return {};
- }
+ if ((Sec.Size || Sec.Content) && NumUsedEntries > 0)
+ return BuildErrPrefix(Entries) +
+ " cannot be used with \"Content\" or \"Size\"";
- if (const auto *F = dyn_cast<ELFYAML::Fill>(C.get())) {
- if (!F->Pattern)
- return {};
- if (F->Pattern->binary_size() != 0 && !F->Size)
- return "\"Size\" can't be 0 when \"Pattern\" is not empty";
- return {};
- }
+ if (NumUsedEntries > 0 && Entries.size() != NumUsedEntries)
+ return BuildErrPrefix(Entries) + " must be used together";
- if (const auto *VD = dyn_cast<ELFYAML::VerdefSection>(C.get())) {
- if (VD->Entries && VD->Content)
- return "SHT_GNU_verdef: \"Entries\" and \"Content\" can't be used "
- "together";
- return {};
- }
-
- if (const auto *VD = dyn_cast<ELFYAML::VerneedSection>(C.get())) {
- if (VD->VerneedV && VD->Content)
- return "SHT_GNU_verneed: \"Dependencies\" and \"Content\" can't be used "
- "together";
- return {};
+ if (const auto *RawSection = dyn_cast<ELFYAML::RawContentSection>(C.get())) {
+ if (RawSection->Flags && RawSection->ShFlags)
+ return "ShFlags and Flags cannot be used together";
+ return "";
}
- if (const auto *RS = dyn_cast<ELFYAML::RelrSection>(C.get())) {
- if (RS->Entries && RS->Content)
- return "\"Entries\" and \"Content\" can't be used together";
- return {};
+ if (const auto *NB = dyn_cast<ELFYAML::NoBitsSection>(C.get())) {
+ if (NB->Content)
+ return "SHT_NOBITS section cannot have \"Content\"";
+ return "";
}
- if (const auto *CGP = dyn_cast<ELFYAML::CallGraphProfileSection>(C.get())) {
- if (CGP->Entries && CGP->Content)
- return "\"Entries\" and \"Content\" can't be used together";
- return {};
+ if (const auto *MF = dyn_cast<ELFYAML::MipsABIFlags>(C.get())) {
+ if (MF->Content)
+ return "\"Content\" key is not implemented for SHT_MIPS_ABIFLAGS "
+ "sections";
+ if (MF->Size)
+ return "\"Size\" key is not implemented for SHT_MIPS_ABIFLAGS sections";
+ return "";
}
- return {};
+ return "";
}
namespace {
@@ -1596,6 +1568,21 @@ void MappingTraits<ELFYAML::StackSizeEntry>::mapping(
IO.mapRequired("Size", E.Size);
}
+void MappingTraits<ELFYAML::BBAddrMapEntry>::mapping(
+ IO &IO, ELFYAML::BBAddrMapEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapOptional("Address", E.Address, Hex64(0));
+ IO.mapOptional("BBEntries", E.BBEntries);
+}
+
+void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping(
+ IO &IO, ELFYAML::BBAddrMapEntry::BBEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapRequired("AddressOffset", E.AddressOffset);
+ IO.mapRequired("Size", E.Size);
+ IO.mapRequired("Metadata", E.Metadata);
+}
+
void MappingTraits<ELFYAML::GnuHashHeader>::mapping(IO &IO,
ELFYAML::GnuHashHeader &E) {
assert(IO.getContext() && "The IO context is not initialized");
@@ -1625,10 +1612,10 @@ void MappingTraits<ELFYAML::VerdefEntry>::mapping(IO &IO,
ELFYAML::VerdefEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
- IO.mapRequired("Version", E.Version);
- IO.mapRequired("Flags", E.Flags);
- IO.mapRequired("VersionNdx", E.VersionNdx);
- IO.mapRequired("Hash", E.Hash);
+ IO.mapOptional("Version", E.Version);
+ IO.mapOptional("Flags", E.Flags);
+ IO.mapOptional("VersionNdx", E.VersionNdx);
+ IO.mapOptional("Hash", E.Hash);
IO.mapRequired("Names", E.VerNames);
}
@@ -1659,7 +1646,7 @@ void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
IO.mapOptional("Offset", Rel.Offset, (Hex64)0);
IO.mapOptional("Symbol", Rel.Symbol);
- if (Object->Header.Machine == ELFYAML::ELF_EM(ELF::EM_MIPS) &&
+ if (Object->getMachine() == ELFYAML::ELF_EM(ELF::EM_MIPS) &&
Object->Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64)) {
MappingNormalization<NormalizedMips64RelType, ELFYAML::ELF_REL> Key(
IO, Rel.Type);
@@ -1673,12 +1660,25 @@ void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
IO.mapOptional("Addend", Rel.Addend, (ELFYAML::YAMLIntUInt)0);
}
+void MappingTraits<ELFYAML::ARMIndexTableEntry>::mapping(
+ IO &IO, ELFYAML::ARMIndexTableEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapRequired("Offset", E.Offset);
+
+ StringRef CantUnwind = "EXIDX_CANTUNWIND";
+ if (IO.outputting() && (uint32_t)E.Value == ARM::EHABI::EXIDX_CANTUNWIND)
+ IO.mapRequired("Value", CantUnwind);
+ else if (!IO.outputting() && getStringValue(IO, "Value") == CantUnwind)
+ E.Value = ARM::EHABI::EXIDX_CANTUNWIND;
+ else
+ IO.mapRequired("Value", E.Value);
+}
+
void MappingTraits<ELFYAML::Object>::mapping(IO &IO, ELFYAML::Object &Object) {
assert(!IO.getContext() && "The IO context is initialized already");
IO.setContext(&Object);
IO.mapTag("!ELF", true);
IO.mapRequired("FileHeader", Object.Header);
- IO.mapOptional("SectionHeaderTable", Object.SectionHeaders);
IO.mapOptional("ProgramHeaders", Object.ProgramHeaders);
IO.mapOptional("Sections", Object.Chunks);
IO.mapOptional("Symbols", Object.Symbols);
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
index 680264484704..dec9c9f6960b 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -29,7 +29,7 @@ namespace {
class MachOWriter {
public:
- MachOWriter(MachOYAML::Object &Obj) : Obj(Obj), is64Bit(true), fileStart(0) {
+ MachOWriter(MachOYAML::Object &Obj) : Obj(Obj), fileStart(0) {
is64Bit = Obj.Header.magic == MachO::MH_MAGIC_64 ||
Obj.Header.magic == MachO::MH_CIGAM_64;
memset(reinterpret_cast<void *>(&Header), 0, sizeof(MachO::mach_header_64));
@@ -199,14 +199,12 @@ size_t writeLoadCommandData<MachO::build_version_command>(
}
void ZeroFillBytes(raw_ostream &OS, size_t Size) {
- std::vector<uint8_t> FillData;
- FillData.insert(FillData.begin(), Size, 0);
+ std::vector<uint8_t> FillData(Size, 0);
OS.write(reinterpret_cast<char *>(FillData.data()), Size);
}
void Fill(raw_ostream &OS, size_t Size, uint32_t Data) {
- std::vector<uint32_t> FillData;
- FillData.insert(FillData.begin(), (Size / 4) + 1, Data);
+ std::vector<uint32_t> FillData((Size / 4) + 1, Data);
OS.write(reinterpret_cast<char *>(FillData.data()), Size);
}
@@ -285,34 +283,20 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
return createStringError(
errc::invalid_argument,
"wrote too much data somewhere, section offsets don't line up");
- if (0 == strncmp(&Sec.segname[0], "__DWARF", 16)) {
- Error Err = Error::success();
- cantFail(std::move(Err));
-
- if (0 == strncmp(&Sec.sectname[0], "__debug_str", 16))
- Err = DWARFYAML::emitDebugStr(OS, Obj.DWARF);
- else if (0 == strncmp(&Sec.sectname[0], "__debug_abbrev", 16))
- Err = DWARFYAML::emitDebugAbbrev(OS, Obj.DWARF);
- else if (0 == strncmp(&Sec.sectname[0], "__debug_aranges", 16))
- Err = DWARFYAML::emitDebugAranges(OS, Obj.DWARF);
- else if (0 == strncmp(&Sec.sectname[0], "__debug_ranges", 16))
- Err = DWARFYAML::emitDebugRanges(OS, Obj.DWARF);
- else if (0 == strncmp(&Sec.sectname[0], "__debug_pubnames", 16)) {
- if (Obj.DWARF.PubNames)
- Err = DWARFYAML::emitPubSection(OS, *Obj.DWARF.PubNames,
- Obj.IsLittleEndian);
- } else if (0 == strncmp(&Sec.sectname[0], "__debug_pubtypes", 16)) {
- if (Obj.DWARF.PubTypes)
- Err = DWARFYAML::emitPubSection(OS, *Obj.DWARF.PubTypes,
- Obj.IsLittleEndian);
- } else if (0 == strncmp(&Sec.sectname[0], "__debug_info", 16))
- Err = DWARFYAML::emitDebugInfo(OS, Obj.DWARF);
- else if (0 == strncmp(&Sec.sectname[0], "__debug_line", 16))
- Err = DWARFYAML::emitDebugLine(OS, Obj.DWARF);
-
- if (Err)
- return Err;
+ StringRef SectName(Sec.sectname,
+ strnlen(Sec.sectname, sizeof(Sec.sectname)));
+ // If the section's content is specified in the 'DWARF' entry, we will
+ // emit it regardless of the section's segname.
+ if (Obj.DWARF.getNonEmptySectionNames().count(SectName.substr(2))) {
+ if (Sec.content)
+ return createStringError(errc::invalid_argument,
+ "cannot specify section '" + SectName +
+ "' contents in the 'DWARF' entry and "
+ "the 'content' at the same time");
+ auto EmitFunc = DWARFYAML::getDWARFEmitterByName(SectName.substr(2));
+ if (Error Err = EmitFunc(OS, Obj.DWARF))
+ return Err;
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp
index 86aad0233767..5a27d37cb726 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -305,12 +305,12 @@ void MappingTraits<MachOYAML::Section>::mapping(IO &IO,
IO.mapOptional("relocations", Section.relocations);
}
-StringRef
+std::string
MappingTraits<MachOYAML::Section>::validate(IO &IO,
MachOYAML::Section &Section) {
if (Section.content && Section.size < Section.content->binary_size())
return "Section size must be greater than or equal to the content size";
- return {};
+ return "";
}
void MappingTraits<MachO::build_tool_version>::mapping(
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp
index 77ea42c41a4c..e1a80b98e449 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp
@@ -292,7 +292,7 @@ static void streamMapping(yaml::IO &IO, RawContentStream &Stream) {
IO.mapOptional("Size", Stream.Size, Stream.Content.binary_size());
}
-static StringRef streamValidate(RawContentStream &Stream) {
+static std::string streamValidate(RawContentStream &Stream) {
if (Stream.Size.value < Stream.Content.binary_size())
return "Stream size must be greater or equal to the content size";
return "";
@@ -434,7 +434,7 @@ void yaml::MappingTraits<std::unique_ptr<Stream>>::mapping(
}
}
-StringRef yaml::MappingTraits<std::unique_ptr<Stream>>::validate(
+std::string yaml::MappingTraits<std::unique_ptr<Stream>>::validate(
yaml::IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S) {
switch (S->Kind) {
case MinidumpYAML::Stream::StreamKind::RawContent:
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp
index 7f636f4eabac..4564b537c9a1 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp
@@ -33,7 +33,14 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
*ObjectFile.FatMachO);
} else {
Input &In = (Input &)IO;
- if (IO.mapTag("!ELF")) {
+ if (IO.mapTag("!Arch")) {
+ ObjectFile.Arch.reset(new ArchYAML::Archive());
+ MappingTraits<ArchYAML::Archive>::mapping(IO, *ObjectFile.Arch);
+ std::string Err =
+ MappingTraits<ArchYAML::Archive>::validate(IO, *ObjectFile.Arch);
+ if (!Err.empty())
+ IO.setError(Err);
+ } else if (IO.mapTag("!ELF")) {
ObjectFile.Elf.reset(new ELFYAML::Object());
MappingTraits<ELFYAML::Object>::mapping(IO, *ObjectFile.Elf);
} else if (IO.mapTag("!COFF")) {
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/WasmEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/WasmEmitter.cpp
index cbb062d87ae6..c09d7f1bc95a 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -60,6 +60,7 @@ private:
WasmYAML::Object &Obj;
uint32_t NumImportedFunctions = 0;
uint32_t NumImportedGlobals = 0;
+ uint32_t NumImportedTables = 0;
uint32_t NumImportedEvents = 0;
bool HasError = false;
@@ -187,6 +188,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
switch (Info.Kind) {
case wasm::WASM_SYMBOL_TYPE_FUNCTION:
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
+ case wasm::WASM_SYMBOL_TYPE_TABLE:
case wasm::WASM_SYMBOL_TYPE_EVENT:
encodeULEB128(Info.ElementIndex, SubSection.getStream());
if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 ||
@@ -268,6 +270,32 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
SubSection.done();
}
+ if (Section.GlobalNames.size()) {
+ writeUint8(OS, wasm::WASM_NAMES_GLOBAL);
+
+ SubSectionWriter SubSection(OS);
+
+ encodeULEB128(Section.GlobalNames.size(), SubSection.getStream());
+ for (const WasmYAML::NameEntry &NameEntry : Section.GlobalNames) {
+ encodeULEB128(NameEntry.Index, SubSection.getStream());
+ writeStringRef(NameEntry.Name, SubSection.getStream());
+ }
+
+ SubSection.done();
+ }
+ if (Section.DataSegmentNames.size()) {
+ writeUint8(OS, wasm::WASM_NAMES_DATA_SEGMENT);
+
+ SubSectionWriter SubSection(OS);
+
+ encodeULEB128(Section.DataSegmentNames.size(), SubSection.getStream());
+ for (const WasmYAML::NameEntry &NameEntry : Section.DataSegmentNames) {
+ encodeULEB128(NameEntry.Index, SubSection.getStream());
+ writeStringRef(NameEntry.Name, SubSection.getStream());
+ }
+
+ SubSection.done();
+ }
}
void WasmWriter::writeSectionContent(raw_ostream &OS,
@@ -360,7 +388,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
case wasm::WASM_EXTERNAL_EVENT:
writeUint32(OS, Import.EventImport.Attribute);
writeUint32(OS, Import.EventImport.SigIndex);
- NumImportedGlobals++;
+ NumImportedEvents++;
break;
case wasm::WASM_EXTERNAL_MEMORY:
writeLimits(Import.Memory, OS);
@@ -368,6 +396,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
case wasm::WASM_EXTERNAL_TABLE:
writeUint8(OS, Import.TableImport.ElemType);
writeLimits(Import.TableImport.TableLimits, OS);
+ NumImportedTables++;
break;
default:
reportError("unknown import type: " +Twine(Import.Kind));
@@ -401,7 +430,13 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
void WasmWriter::writeSectionContent(raw_ostream &OS,
WasmYAML::TableSection &Section) {
encodeULEB128(Section.Tables.size(), OS);
+ uint32_t ExpectedIndex = NumImportedTables;
for (auto &Table : Section.Tables) {
+ if (Table.Index != ExpectedIndex) {
+ reportError("unexpected table index: " + Twine(Table.Index));
+ return;
+ }
+ ++ExpectedIndex;
writeUint8(OS, Table.ElemType);
writeLimits(Table.TableLimits, OS);
}
@@ -491,9 +526,9 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
encodeULEB128(Section.Segments.size(), OS);
for (auto &Segment : Section.Segments) {
encodeULEB128(Segment.InitFlags, OS);
- if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX)
+ if (Segment.InitFlags & wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX)
encodeULEB128(Segment.MemoryIndex, OS);
- if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0)
+ if ((Segment.InitFlags & wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0)
writeInitExpr(OS, Segment.Offset);
encodeULEB128(Segment.Content.binary_size(), OS);
Segment.Content.writeAsBinary(OS);
@@ -538,6 +573,7 @@ void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec,
case wasm::R_WASM_MEMORY_ADDR_I32:
case wasm::R_WASM_MEMORY_ADDR_I64:
case wasm::R_WASM_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_FUNCTION_OFFSET_I64:
case wasm::R_WASM_SECTION_OFFSET_I32:
encodeULEB128(Reloc.Addend, OS);
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp
index d1aa1181a344..b4d2d113fb5a 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -61,6 +61,8 @@ static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) {
commonSectionMapping(IO, Section);
IO.mapRequired("Name", Section.Name);
IO.mapOptional("FunctionNames", Section.FunctionNames);
+ IO.mapOptional("GlobalNames", Section.GlobalNames);
+ IO.mapOptional("DataSegmentNames", Section.DataSegmentNames);
}
static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) {
@@ -300,6 +302,7 @@ void MappingTraits<WasmYAML::Signature>::mapping(
}
void MappingTraits<WasmYAML::Table>::mapping(IO &IO, WasmYAML::Table &Table) {
+ IO.mapRequired("Index", Table.Index);
IO.mapRequired("ElemType", Table.ElemType);
IO.mapRequired("Limits", Table.TableLimits);
}
@@ -445,12 +448,12 @@ void MappingTraits<WasmYAML::DataSegment>::mapping(
IO &IO, WasmYAML::DataSegment &Segment) {
IO.mapOptional("SectionOffset", Segment.SectionOffset);
IO.mapRequired("InitFlags", Segment.InitFlags);
- if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX) {
+ if (Segment.InitFlags & wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
IO.mapRequired("MemoryIndex", Segment.MemoryIndex);
} else {
Segment.MemoryIndex = 0;
}
- if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) {
+ if ((Segment.InitFlags & wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
IO.mapRequired("Offset", Segment.Offset);
} else {
Segment.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST;
@@ -470,6 +473,7 @@ void ScalarEnumerationTraits<WasmYAML::ComdatKind>::enumeration(
#define ECase(X) IO.enumCase(Kind, #X, wasm::WASM_COMDAT_##X);
ECase(FUNCTION);
ECase(DATA);
+ ECase(SECTION);
#undef ECase
}
@@ -496,6 +500,8 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
IO.mapRequired("Function", Info.ElementIndex);
} else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_GLOBAL) {
IO.mapRequired("Global", Info.ElementIndex);
+ } else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_TABLE) {
+ IO.mapRequired("Table", Info.ElementIndex);
} else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_EVENT) {
IO.mapRequired("Event", Info.ElementIndex);
} else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_DATA) {
@@ -551,6 +557,7 @@ void ScalarEnumerationTraits<WasmYAML::SymbolKind>::enumeration(
ECase(FUNCTION);
ECase(DATA);
ECase(GLOBAL);
+ ECase(TABLE);
ECase(SECTION);
ECase(EVENT);
#undef ECase
@@ -565,7 +572,6 @@ void ScalarEnumerationTraits<WasmYAML::ValueType>::enumeration(
ECase(F64);
ECase(V128);
ECase(FUNCREF);
- ECase(EXNREF);
ECase(EXTERNREF);
ECase(FUNC);
#undef ECase
@@ -599,6 +605,7 @@ void ScalarEnumerationTraits<WasmYAML::TableType>::enumeration(
IO &IO, WasmYAML::TableType &Type) {
#define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X);
ECase(FUNCREF);
+ ECase(EXTERNREF);
#undef ECase
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp
index a04345f1294a..ef2ab83dcd24 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp
@@ -32,6 +32,8 @@ bool convertYAML(yaml::Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler,
return false;
}
+ if (Doc.Arch)
+ return yaml2archive(*Doc.Arch, Out, ErrHandler);
if (Doc.Elf)
return yaml2elf(*Doc.Elf, Out, ErrHandler, MaxSize);
if (Doc.Coff)
diff --git a/contrib/llvm-project/llvm/lib/Option/OptTable.cpp b/contrib/llvm-project/llvm/lib/Option/OptTable.cpp
index 926eb8e0437f..c78c2cee1edf 100644
--- a/contrib/llvm-project/llvm/lib/Option/OptTable.cpp
+++ b/contrib/llvm-project/llvm/lib/Option/OptTable.cpp
@@ -6,14 +6,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Option/OptTable.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
-#include "llvm/Option/Option.h"
#include "llvm/Option/OptSpecifier.h"
-#include "llvm/Option/OptTable.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h" // for expandResponseFiles
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -195,10 +196,13 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str,
// Returns true if one of the Prefixes + In.Names matches Option
static bool optionMatches(const OptTable::Info &In, StringRef Option) {
- if (In.Prefixes)
+ if (In.Prefixes) {
+ StringRef InName(In.Name);
for (size_t I = 0; In.Prefixes[I]; I++)
- if (Option == std::string(In.Prefixes[I]) + In.Name)
- return true;
+ if (Option.endswith(InName))
+ if (Option.slice(0, Option.size() - InName.size()) == In.Prefixes[I])
+ return true;
+ }
return false;
}
@@ -226,7 +230,7 @@ OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const {
}
std::vector<std::string>
-OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const {
+OptTable::findByPrefix(StringRef Cur, unsigned int DisableFlags) const {
std::vector<std::string> Ret;
for (size_t I = FirstSearchableIndex, E = OptionInfos.size(); I < E; I++) {
const Info &In = OptionInfos[I];
@@ -239,7 +243,7 @@ OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const {
std::string S = std::string(In.Prefixes[I]) + std::string(In.Name) + "\t";
if (In.HelpText)
S += In.HelpText;
- if (StringRef(S).startswith(Cur) && S.compare(std::string(Cur) + "\t"))
+ if (StringRef(S).startswith(Cur) && S != std::string(Cur) + "\t")
Ret.push_back(S);
}
}
@@ -330,6 +334,60 @@ bool OptTable::addValues(const char *Option, const char *Values) {
return false;
}
+// Parse a single argument, return the new argument, and update Index. If
+// GroupedShortOptions is true, -a matches "-abc" and the argument in Args will
+// be updated to "-bc". This overload does not support
+// FlagsToInclude/FlagsToExclude or case insensitive options.
+Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const {
+ // Anything that doesn't start with PrefixesUnion is an input, as is '-'
+ // itself.
+ const char *CStr = Args.getArgString(Index);
+ StringRef Str(CStr);
+ if (isInput(PrefixesUnion, Str))
+ return new Arg(getOption(TheInputOptionID), Str, Index++, CStr);
+
+ const Info *End = OptionInfos.data() + OptionInfos.size();
+ StringRef Name = Str.ltrim(PrefixChars);
+ const Info *Start = std::lower_bound(
+ OptionInfos.data() + FirstSearchableIndex, End, Name.data());
+ const Info *Fallback = nullptr;
+ unsigned Prev = Index;
+
+ // Search for the option which matches Str.
+ for (; Start != End; ++Start) {
+ unsigned ArgSize = matchOption(Start, Str, IgnoreCase);
+ if (!ArgSize)
+ continue;
+
+ Option Opt(Start, this);
+ if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+ false, Index))
+ return A;
+
+ // If Opt is a Flag of length 2 (e.g. "-a"), we know it is a prefix of
+ // the current argument (e.g. "-abc"). Match it as a fallback if no longer
+ // option (e.g. "-ab") exists.
+ if (ArgSize == 2 && Opt.getKind() == Option::FlagClass)
+ Fallback = Start;
+
+ // Otherwise, see if the argument is missing.
+ if (Prev != Index)
+ return nullptr;
+ }
+ if (Fallback) {
+ Option Opt(Fallback, this);
+ if (Arg *A = Opt.accept(Args, Str.substr(0, 2), true, Index)) {
+ if (Str.size() == 2)
+ ++Index;
+ else
+ Args.replaceArgString(Index, Twine('-') + Str.substr(2));
+ return A;
+ }
+ }
+
+ return new Arg(getOption(TheUnknownOptionID), Str, Index++, CStr);
+}
+
Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
unsigned FlagsToInclude,
unsigned FlagsToExclude) const {
@@ -373,7 +431,8 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
continue;
// See if this option matches.
- if (Arg *A = Opt.accept(Args, Index, ArgSize))
+ if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+ false, Index))
return A;
// Otherwise, see if this argument was missing values.
@@ -414,8 +473,11 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
}
unsigned Prev = Index;
- Arg *A = ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
- assert(Index > Prev && "Parser failed to consume argument.");
+ Arg *A = GroupedShortOptions
+ ? parseOneArgGrouped(Args, Index)
+ : ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
+ assert((Index > Prev || GroupedShortOptions) &&
+ "Parser failed to consume argument.");
// Check for missing argument error.
if (!A) {
@@ -432,6 +494,33 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
return Args;
}
+InputArgList OptTable::parseArgs(int Argc, char *const *Argv,
+ OptSpecifier Unknown, StringSaver &Saver,
+ function_ref<void(StringRef)> ErrorFn) const {
+ SmallVector<const char *, 0> NewArgv;
+ // The environment variable specifies initial options which can be overridden
+ // by commnad line options.
+ cl::expandResponseFiles(Argc, Argv, EnvVar, Saver, NewArgv);
+
+ unsigned MAI, MAC;
+ opt::InputArgList Args = ParseArgs(makeArrayRef(NewArgv), MAI, MAC);
+ if (MAC)
+ ErrorFn((Twine(Args.getArgString(MAI)) + ": missing argument").str());
+
+ // For each unknwon option, call ErrorFn with a formatted error message. The
+ // message includes a suggested alternative option spelling if available.
+ std::string Nearest;
+ for (const opt::Arg *A : Args.filtered(Unknown)) {
+ std::string Spelling = A->getAsString(Args);
+ if (findNearest(Spelling, Nearest) > 1)
+ ErrorFn("unknown argument '" + A->getAsString(Args) + "'");
+ else
+ ErrorFn("unknown argument '" + A->getAsString(Args) +
+ "', did you mean '" + Nearest + "'?");
+ }
+ return Args;
+}
+
static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
const Option O = Opts.getOption(Id);
std::string Name = O.getPrefixedName();
diff --git a/contrib/llvm-project/llvm/lib/Option/Option.cpp b/contrib/llvm-project/llvm/lib/Option/Option.cpp
index 9abc9fdce4c7..68d074b2702e 100644
--- a/contrib/llvm-project/llvm/lib/Option/Option.cpp
+++ b/contrib/llvm-project/llvm/lib/Option/Option.cpp
@@ -106,9 +106,9 @@ bool Option::matches(OptSpecifier Opt) const {
return false;
}
-Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index,
- unsigned ArgSize) const {
- StringRef Spelling = StringRef(Args.getArgString(Index), ArgSize);
+Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
+ unsigned &Index) const {
+ size_t ArgSize = Spelling.size();
switch (getKind()) {
case FlagClass: {
if (ArgSize != strlen(Args.getArgString(Index)))
@@ -230,10 +230,11 @@ Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index,
}
}
-Arg *Option::accept(const ArgList &Args,
- unsigned &Index,
- unsigned ArgSize) const {
- std::unique_ptr<Arg> A(acceptInternal(Args, Index, ArgSize));
+Arg *Option::accept(const ArgList &Args, StringRef CurArg,
+ bool GroupedShortOption, unsigned &Index) const {
+ std::unique_ptr<Arg> A(GroupedShortOption && getKind() == FlagClass
+ ? new Arg(*this, CurArg, Index)
+ : acceptInternal(Args, CurArg, Index));
if (!A)
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
index 4db7bebcb77c..6c1a7c75d30a 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
@@ -16,8 +16,8 @@
#include "llvm/Passes/PassBuilder.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasAnalysisEvaluator.h"
+#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -28,23 +28,32 @@
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/DDG.h"
+#include "llvm/Analysis/DDGPrinter.h"
+#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/InlineAdvisor.h"
-#include "llvm/Analysis/InlineFeaturesAnalysis.h"
#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+#include "llvm/Analysis/InstCount.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Lint.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
+#include "llvm/Analysis/MemDerefPrinter.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ModuleDebugInfoPrinter.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PhiValues.h"
#include "llvm/Analysis/PostDominators.h"
@@ -61,10 +70,12 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/IR/SafepointIRVerifier.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Regex.h"
#include "llvm/Target/TargetMachine.h"
@@ -73,9 +84,12 @@
#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "llvm/Transforms/Coroutines/CoroElide.h"
#include "llvm/Transforms/Coroutines/CoroSplit.h"
+#include "llvm/Transforms/HelloNew/HelloWorld.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/Annotation2Metadata.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/Transforms/IPO/BlockExtractor.h"
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
#include "llvm/Transforms/IPO/ConstantMerge.h"
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
@@ -88,16 +102,20 @@
#include "llvm/Transforms/IPO/GlobalOpt.h"
#include "llvm/Transforms/IPO/GlobalSplit.h"
#include "llvm/Transforms/IPO/HotColdSplitting.h"
+#include "llvm/Transforms/IPO/IROutliner.h"
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/IPO/OpenMPOpt.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/Transforms/IPO/SampleProfile.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
+#include "llvm/Transforms/IPO/StripSymbols.h"
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
@@ -106,20 +124,25 @@
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
#include "llvm/Transforms/Instrumentation/CGProfile.h"
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
+#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
+#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
#include "llvm/Transforms/Scalar/BDCE.h"
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
#include "llvm/Transforms/Scalar/ConstantHoisting.h"
+#include "llvm/Transforms/Scalar/ConstraintElimination.h"
#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
@@ -131,6 +154,7 @@
#include "llvm/Transforms/Scalar/IVUsersPrinter.h"
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
+#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/Scalar/LICM.h"
@@ -138,18 +162,22 @@
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Scalar/LoopDistribute.h"
+#include "llvm/Transforms/Scalar/LoopFlatten.h"
#include "llvm/Transforms/Scalar/LoopFuse.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
+#include "llvm/Transforms/Scalar/LoopInterchange.h"
#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Scalar/LoopPredication.h"
+#include "llvm/Transforms/Scalar/LoopReroll.h"
#include "llvm/Transforms/Scalar/LoopRotation.h"
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
+#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
@@ -164,15 +192,20 @@
#include "llvm/Transforms/Scalar/NewGVN.h"
#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
#include "llvm/Transforms/Scalar/Reassociate.h"
+#include "llvm/Transforms/Scalar/Reg2Mem.h"
#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
#include "llvm/Transforms/Scalar/Scalarizer.h"
+#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
+#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
+#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
#include "llvm/Transforms/Utils/AddDiscriminators.h"
@@ -181,14 +214,24 @@
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
+#include "llvm/Transforms/Utils/InstructionNamer.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/LowerInvoke.h"
+#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
+#include "llvm/Transforms/Utils/MetaRenamer.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/Transforms/Utils/StripGCRelocates.h"
+#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Utils/UnifyLoopExits.h"
+#include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
@@ -196,26 +239,7 @@
using namespace llvm;
-static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations",
- cl::ReallyHidden, cl::init(4));
-static cl::opt<bool>
- RunPartialInlining("enable-npm-partial-inlining", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
- cl::desc("Run Partial inlinining pass"));
-
-static cl::opt<int> PreInlineThreshold(
- "npm-preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
- cl::desc("Control the amount of inlining in pre-instrumentation inliner "
- "(default = 75)"));
-
-static cl::opt<bool>
- RunNewGVN("enable-npm-newgvn", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
- cl::desc("Run NewGVN instead of GVN"));
-
-static cl::opt<bool> EnableGVNHoist(
- "enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
- cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
+extern cl::opt<unsigned> MaxDevirtIterations;
static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
"enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
@@ -227,14 +251,6 @@ static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
clEnumValN(InliningAdvisorMode::Release, "release",
"Use release mode (AOT-compiled model).")));
-static cl::opt<bool> EnableGVNSink(
- "enable-npm-gvn-sink", cl::init(false), cl::Hidden,
- cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
-
-static cl::opt<bool> EnableUnrollAndJam(
- "enable-npm-unroll-and-jam", cl::init(false), cl::Hidden,
- cl::desc("Enable the Unroll and Jam pass for the new PM (default = off)"));
-
static cl::opt<bool> EnableSyntheticCounts(
"enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
cl::desc("Run synthetic function entry count generation "
@@ -243,18 +259,21 @@ static cl::opt<bool> EnableSyntheticCounts(
static const Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
-// This option is used in simplifying testing SampleFDO optimizations for
-// profile loading.
-static cl::opt<bool>
- EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
- cl::desc("Enable control height reduction optimization (CHR)"));
-
/// Flag to enable inline deferral during PGO.
static cl::opt<bool>
EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
cl::Hidden,
cl::desc("Enable inline deferral during PGO"));
+static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Enable memory profiler"));
+
+static cl::opt<bool> PerformMandatoryInliningsFirst(
+ "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Perform mandatory inlinings module-wide, before performing "
+ "inlining."));
+
PipelineTuningOptions::PipelineTuningOptions() {
LoopInterleaving = true;
LoopVectorization = true;
@@ -265,16 +284,33 @@ PipelineTuningOptions::PipelineTuningOptions() {
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
CallGraphProfile = true;
+ MergeFunctions = false;
+ UniqueLinkageNames = false;
}
+extern cl::opt<bool> ExtraVectorizerPasses;
+extern cl::opt<bool> EnableConstraintElimination;
+extern cl::opt<bool> EnableGVNHoist;
+extern cl::opt<bool> EnableGVNSink;
extern cl::opt<bool> EnableHotColdSplit;
+extern cl::opt<bool> EnableIROutliner;
extern cl::opt<bool> EnableOrderFileInstrumentation;
+extern cl::opt<bool> EnableCHR;
+extern cl::opt<bool> EnableUnrollAndJam;
+extern cl::opt<bool> EnableLoopFlatten;
+extern cl::opt<bool> RunNewGVN;
+extern cl::opt<bool> RunPartialInlining;
extern cl::opt<bool> FlattenedProfileUsed;
extern cl::opt<AttributorRunOption> AttributorRun;
extern cl::opt<bool> EnableKnowledgeRetention;
+extern cl::opt<bool> EnableMatrix;
+
+extern cl::opt<bool> DisablePreInliner;
+extern cl::opt<int> PreInlineThreshold;
+
const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {
/*SpeedLevel*/ 0,
/*SizeLevel*/ 0};
@@ -388,8 +424,45 @@ AnalysisKey NoOpCGSCCAnalysis::Key;
AnalysisKey NoOpFunctionAnalysis::Key;
AnalysisKey NoOpLoopAnalysis::Key;
+/// Whether or not we should populate a PassInstrumentationCallbacks's class to
+/// pass name map.
+///
+/// This is for optimization purposes so we don't populate it if we never use
+/// it. This should be updated if new pass instrumentation wants to use the map.
+/// We currently only use this for --print-before/after.
+bool shouldPopulateClassToPassNames() {
+ return !printBeforePasses().empty() || !printAfterPasses().empty();
+}
+
} // namespace
+PassBuilder::PassBuilder(bool DebugLogging, TargetMachine *TM,
+ PipelineTuningOptions PTO, Optional<PGOOptions> PGOOpt,
+ PassInstrumentationCallbacks *PIC)
+ : DebugLogging(DebugLogging), TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {
+ if (TM)
+ TM->registerPassBuilderCallbacks(*this, DebugLogging);
+ if (PIC && shouldPopulateClassToPassNames()) {
+#define MODULE_PASS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define FUNCTION_PASS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define LOOP_PASS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define CGSCC_PASS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#include "PassRegistry.def"
+ }
+}
+
void PassBuilder::invokePeepholeEPCallbacks(
FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
for (auto &C : PeepholeEPCallbacks)
@@ -432,9 +505,17 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
C(LAM);
}
+// Helper to add AnnotationRemarksPass.
+static void addAnnotationRemarksPass(ModulePassManager &MPM) {
+ FunctionPassManager FPM;
+ FPM.addPass(AnnotationRemarksPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+}
+
// TODO: Investigate the cost/benefit of tail call elimination on debugging.
-FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(
- OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) {
+FunctionPassManager
+PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
FunctionPassManager FPM(DebugLogging);
@@ -481,8 +562,9 @@ FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(SimpleLoopUnswitchPass());
- LPM2.addPass(IndVarSimplifyPass());
+
LPM2.addPass(LoopIdiomRecognizePass());
+ LPM2.addPass(IndVarSimplifyPass());
for (auto &C : LateLoopOptimizationsEPCallbacks)
C(LPM2, Level);
@@ -493,7 +575,7 @@ FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(
// inaccurate. The normal unroller doesn't pay attention to forced full unroll
// attributes so we need to make sure and allow the full unroll pass to pay
// attention to it.
- if (Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
PGOOpt->Action != PGOOptions::SampleUse)
LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
@@ -507,13 +589,17 @@ FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(
FPM.addPass(
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM1), EnableMSSALoopDependency, DebugLogging));
+ std::move(LPM1), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
+ DebugLogging));
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
+ if (EnableLoopFlatten)
+ FPM.addPass(LoopFlattenPass());
// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
FPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging));
+ std::move(LPM2), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false,
+ DebugLogging));
// Delete small array after loop unroll.
FPM.addPass(SROA());
@@ -555,14 +641,13 @@ FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(
FunctionPassManager
PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
- ThinLTOPhase Phase,
- bool DebugLogging) {
+ ThinOrFullLTOPhase Phase) {
assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
// The O1 pipeline has a separate pipeline creation function to simplify
// construction readability.
if (Level.getSpeedupLevel() == 1)
- return buildO1FunctionSimplificationPipeline(Level, Phase, DebugLogging);
+ return buildO1FunctionSimplificationPipeline(Level, Phase);
FunctionPassManager FPM(DebugLogging);
@@ -585,6 +670,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(SimplifyCFGPass());
}
+ if (EnableConstraintElimination)
+ FPM.addPass(ConstraintEliminationPass());
+
// Speculative execution if the target has divergent branches; otherwise nop.
FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
@@ -633,13 +721,14 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM1.addPass(LoopInstSimplifyPass());
LPM1.addPass(LoopSimplifyCFGPass());
- // Rotate Loop - disable header duplication at -Oz
+ // Disable header duplication in loop rotation at -Oz.
LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
- LPM1.addPass(SimpleLoopUnswitchPass());
- LPM2.addPass(IndVarSimplifyPass());
+ LPM1.addPass(
+ SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
LPM2.addPass(LoopIdiomRecognizePass());
+ LPM2.addPass(IndVarSimplifyPass());
for (auto &C : LateLoopOptimizationsEPCallbacks)
C(LPM2, Level);
@@ -650,7 +739,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// inaccurate. The normal unroller doesn't pay attention to forced full unroll
// attributes so we need to make sure and allow the full unroll pass to pay
// attention to it.
- if (Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
PGOOpt->Action != PGOOptions::SampleUse)
LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
@@ -664,14 +753,18 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM1), EnableMSSALoopDependency, DebugLogging));
+ std::move(LPM1), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
+ DebugLogging));
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
- // The loop passes in LPM2 (IndVarSimplifyPass, LoopIdiomRecognizePass,
+ if (EnableLoopFlatten)
+ FPM.addPass(LoopFlattenPass());
+ // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
FPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging));
+ std::move(LPM2), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false,
+ DebugLogging));
// Delete small array after loop unroll.
FPM.addPass(SROA());
@@ -705,10 +798,16 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// redo DCE, etc.
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
+
+ // Finally, do an expensive DCE pass to catch all the dead code exposed by
+ // the simplifications and basic cleanup after all the simplifications.
+ // TODO: Investigate if this is too expensive.
+ FPM.addPass(ADCEPass());
+
FPM.addPass(DSEPass());
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, DebugLogging));
+ EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true, DebugLogging));
if (PTO.Coroutines)
FPM.addPass(CoroElidePass());
@@ -716,10 +815,6 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
- // Finally, do an expensive DCE pass to catch all the dead code exposed by
- // the simplifications and basic cleanup after all the simplifications.
- // TODO: Investigate if this is too expensive.
- FPM.addPass(ADCEPass());
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
@@ -732,26 +827,27 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
return FPM;
}
-void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
+void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
+ MPM.addPass(CanonicalizeAliasesPass());
+ MPM.addPass(NameAnonGlobalPass());
+}
+
+void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
PassBuilder::OptimizationLevel Level,
bool RunProfileGen, bool IsCS,
std::string ProfileFile,
std::string ProfileRemappingFile) {
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
- // Generally running simplification passes and the inliner with an high
- // threshold results in smaller executables, but there may be cases where
- // the size grows, so let's be conservative here and skip this simplification
- // at -Os/Oz. We will not do this inline for context sensistive PGO (when
- // IsCS is true).
- if (!Level.isOptimizingForSize() && !IsCS) {
+ if (!IsCS && !DisablePreInliner) {
InlineParams IP;
IP.DefaultThreshold = PreInlineThreshold;
- // FIXME: The hint threshold has the same value used by the regular inliner.
- // This should probably be lowered after performance testing.
+ // FIXME: The hint threshold has the same value used by the regular inliner
+ // when not optimzing for size. This should probably be lowered after
+ // performance testing.
// FIXME: this comment is cargo culted from the old pass manager, revisit).
- IP.HintThreshold = 325;
+ IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
ModuleInlinerWrapperPass MIWP(IP, DebugLogging);
CGSCCPassManager &CGPipeline = MIWP.getPM();
@@ -785,8 +881,10 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
MPM.addPass(PGOInstrumentationGen(IsCS));
FunctionPassManager FPM;
+ // Disable header duplication in loop rotation at -Oz.
FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopRotatePass(), EnableMSSALoopDependency, DebugLogging));
+ LoopRotatePass(Level != OptimizationLevel::Oz), EnableMSSALoopDependency,
+ /*UseBlockFrequencyInfo=*/false, DebugLogging));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
// Add the profile lowering pass.
@@ -800,8 +898,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
}
void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
- bool DebugLogging, bool RunProfileGen,
- bool IsCS, std::string ProfileFile,
+ bool RunProfileGen, bool IsCS,
+ std::string ProfileFile,
std::string ProfileRemappingFile) {
if (!RunProfileGen) {
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
@@ -830,18 +928,19 @@ getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
}
ModuleInlinerWrapperPass
-PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase,
- bool DebugLogging) {
+PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
InlineParams IP = getInlineParamsFromOptLevel(Level);
- if (Phase == PassBuilder::ThinLTOPhase::PreLink && PGOOpt &&
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
PGOOpt->Action == PGOOptions::SampleUse)
IP.HotCallSiteThreshold = 0;
if (PGOOpt)
IP.EnableDeferral = EnablePGOInlineDeferral;
- ModuleInlinerWrapperPass MIWP(IP, DebugLogging, UseInlineAdvisor,
- MaxDevirtIterations);
+ ModuleInlinerWrapperPass MIWP(IP, DebugLogging,
+ PerformMandatoryInliningsFirst,
+ UseInlineAdvisor, MaxDevirtIterations);
// Require the GlobalsAA analysis for the module so we can query it within
// the CGSCC pipeline.
@@ -866,7 +965,7 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase,
MainCGPipeline.addPass(AttributorCGSCCPass());
if (PTO.Coroutines)
- MainCGPipeline.addPass(CoroSplitPass());
+ MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
// Now deduce any function attributes based in the current code.
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
@@ -881,21 +980,33 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase,
if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
MainCGPipeline.addPass(OpenMPOptPass());
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(MainCGPipeline, Level);
+
// Lastly, add the core function simplification pipeline nested inside the
// CGSCC walk.
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
-
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(MainCGPipeline, Level);
+ buildFunctionSimplificationPipeline(Level, Phase)));
return MIWP;
}
-ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
- OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) {
+ModulePassManager
+PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
ModulePassManager MPM(DebugLogging);
+ // Add UniqueInternalLinkageNames Pass which renames internal linkage
+ // symbols with unique names.
+ if (PTO.UniqueLinkageNames)
+ MPM.addPass(UniqueInternalLinkageNamesPass());
+
+ // Place pseudo probe instrumentation as the first pass of the pipeline to
+ // minimize the impact of optimization changes.
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
+ Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
+ MPM.addPass(SampleProfileProbePass(TM));
+
bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
// In ThinLTO mode, when flattened profile is used, all the available
@@ -903,7 +1014,7 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
// no need to load the profile again in PostLink.
bool LoadSampleProfile =
HasSampleProfile &&
- !(FlattenedProfileUsed && Phase == ThinLTOPhase::PostLink);
+ !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
// During the ThinLTO backend phase we perform early indirect call promotion
// here, before globalopt. Otherwise imported available_externally functions
@@ -919,7 +1030,7 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
// command line. E.g. for flattened profiles where we will not be reloading
// the sample profile in the ThinLTO backend, we ideally shouldn't have to
// provide the sample profile file.
- if (Phase == ThinLTOPhase::PostLink && !LoadSampleProfile)
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
// Do basic inference of function attributes from known properties of system
@@ -952,20 +1063,19 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
// Annotate sample profile right after early FPM to ensure freshness of
// the debug info.
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile,
- Phase == ThinLTOPhase::PreLink));
+ PGOOpt->ProfileRemappingFile, Phase));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
// Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
// for the profile annotation to be accurate in the ThinLTO backend.
- if (Phase != ThinLTOPhase::PreLink)
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
// We perform early indirect call promotion here, before globalopt.
// This is important for the ThinLTO backend phase because otherwise
// imported available_externally functions look unreferenced and are
// removed.
- MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink,
- true /* SamplePGO */));
+ MPM.addPass(PGOIndirectCallPromotion(
+ Phase == ThinOrFullLTOPhase::ThinLTOPostLink, true /* SamplePGO */));
}
if (AttributorRun & AttributorRunOption::MODULE)
@@ -974,9 +1084,12 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
// Lower type metadata and the type.test intrinsic in the ThinLTO
// post link pipeline after ICP. This is to enable usage of the type
// tests in ICP sequences.
- if (Phase == ThinLTOPhase::PostLink)
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+ for (auto &C : PipelineEarlySimplificationEPCallbacks)
+ C(MPM, Level);
+
// Interprocedural constant propagation now that basic cleanup has occurred
// and prior to optimizing globals.
// FIXME: This position in the pipeline hasn't been carefully considered in
@@ -1011,16 +1124,16 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
// Add all the requested passes for instrumentation PGO, if requested.
- if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
+ if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
(PGOOpt->Action == PGOOptions::IRInstr ||
PGOOpt->Action == PGOOptions::IRUse)) {
- addPGOInstrPasses(MPM, DebugLogging, Level,
+ addPGOInstrPasses(MPM, Level,
/* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
/* IsCS */ false, PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile);
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
- if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
+ if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
PGOOpt->CSAction == PGOOptions::CSIRInstr)
MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
@@ -1028,12 +1141,19 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
if (EnableSyntheticCounts && !PGOOpt)
MPM.addPass(SyntheticCountsPropagation());
- MPM.addPass(buildInlinerPipeline(Level, Phase, DebugLogging));
+ MPM.addPass(buildInlinerPipeline(Level, Phase));
+
+ if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
+ MPM.addPass(ModuleMemProfilerPass());
+ }
+
return MPM;
}
-ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
- OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) {
+ModulePassManager
+PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
+ bool LTOPreLink) {
ModulePassManager MPM(DebugLogging);
// Optimize globals now that the module is fully simplified.
@@ -1071,11 +1191,11 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// instrumentation is after all the inlines are done.
if (!LTOPreLink && PGOOpt) {
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true,
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
/* IsCS */ true, PGOOpt->CSProfileGenFile,
PGOOpt->ProfileRemappingFile);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false,
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
/* IsCS */ true, PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile);
}
@@ -1093,6 +1213,11 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
OptimizePM.addPass(Float2IntPass());
OptimizePM.addPass(LowerConstantIntrinsicsPass());
+ if (EnableMatrix) {
+ OptimizePM.addPass(LowerMatrixIntrinsicsPass());
+ OptimizePM.addPass(EarlyCSEPass());
+ }
+
// FIXME: We need to run some loop optimizations to re-rotate loops after
// simplify-cfg and others undo their rotation.
@@ -1104,8 +1229,11 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
C(OptimizePM, Level);
// First rotate loops that may have been un-rotated by prior passes.
+ // Disable header duplication at -Oz.
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
- LoopRotatePass(), EnableMSSALoopDependency, DebugLogging));
+ LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
+ EnableMSSALoopDependency,
+ /*UseBlockFrequencyInfo=*/false, DebugLogging));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
@@ -1128,6 +1256,28 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// Cleanup after the loop optimization passes.
OptimizePM.addPass(InstCombinePass());
+ if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+ // At higher optimization levels, try to clean up any runtime overlap and
+ // alignment checks inserted by the vectorizer. We want to track correlated
+ // runtime checks for two inner loops in the same outer loop, fold any
+ // common computations, hoist loop-invariant aspects out of any outer loop,
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
+ // dead (or speculatable) control flows or more combining opportunities.
+ OptimizePM.addPass(EarlyCSEPass());
+ OptimizePM.addPass(CorrelatedValuePropagationPass());
+ OptimizePM.addPass(InstCombinePass());
+ LoopPassManager LPM(DebugLogging);
+ LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM.addPass(
+ SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
+ OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
+ DebugLogging));
+ OptimizePM.addPass(SimplifyCFGPass());
+ OptimizePM.addPass(InstCombinePass());
+ }
+
// Now that we've formed fast to execute loop structures, we do further
// optimizations. These are run afterward as they might block doing complex
// analyses and transforms such as what are needed for loop vectorization.
@@ -1137,15 +1287,22 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// convert to more optimized IR using more aggressive simplify CFG options.
// The extra sinking transform can create larger basic blocks, so do this
// before SLP vectorization.
- OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
- forwardSwitchCondToPhi(true).
- convertSwitchToLookupTable(true).
- needCanonicalLoops(false).
- sinkCommonInsts(true)));
+ // FIXME: study whether hoisting and/or sinking of common instructions should
+ // be delayed until after SLP vectorizer.
+ OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
// Optimize parallel scalar instruction chains into SIMD instructions.
- if (PTO.SLPVectorization)
+ if (PTO.SLPVectorization) {
OptimizePM.addPass(SLPVectorizerPass());
+ if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+ OptimizePM.addPass(EarlyCSEPass());
+ }
+ }
// Enhance/cleanup vector code.
OptimizePM.addPass(VectorCombinePass());
@@ -1169,7 +1326,7 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, DebugLogging));
+ EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true, DebugLogging));
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
@@ -1181,6 +1338,17 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
if (EnableHotColdSplit && !LTOPreLink)
MPM.addPass(HotColdSplittingPass());
+ // Search the code for similar regions of code. If enough similar regions can
+ // be found where extracting the regions into their own function will decrease
+ // the size of the program, we extract the regions, a deduplicate the
+ // structurally similar regions.
+ if (EnableIROutliner)
+ MPM.addPass(IROutlinerPass());
+
+ // Merge functions if requested.
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
// canonicalization pass that enables other optimizations. As a result,
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
@@ -1228,55 +1396,70 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
ModulePassManager
PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
- bool DebugLogging, bool LTOPreLink) {
+ bool LTOPreLink) {
assert(Level != OptimizationLevel::O0 &&
"Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging);
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
// Apply module pipeline start EP callback.
for (auto &C : PipelineStartEPCallbacks)
- C(MPM);
+ C(MPM, Level);
- if (PGOOpt && PGOOpt->SamplePGOSupport)
+ if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
// Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None,
- DebugLogging));
+ MPM.addPass(buildModuleSimplificationPipeline(
+ Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
+ : ThinOrFullLTOPhase::None));
// Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink));
+ MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
+
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+ MPM.addPass(PseudoProbeUpdatePass());
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ if (LTOPreLink)
+ addRequiredLTOPreLinkPasses(MPM);
return MPM;
}
ModulePassManager
-PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
- bool DebugLogging) {
+PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
assert(Level != OptimizationLevel::O0 &&
"Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging);
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
- if (PGOOpt && PGOOpt->SamplePGOSupport)
+ if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
// Apply module pipeline start EP callback.
for (auto &C : PipelineStartEPCallbacks)
- C(MPM);
+ C(MPM, Level);
// If we are planning to perform ThinLTO later, we don't bloat the code with
// unrolling/vectorization/... now. Just simplify the module as much as we
// can.
- MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PreLink,
- DebugLogging));
+ MPM.addPass(buildModuleSimplificationPipeline(
+ Level, ThinOrFullLTOPhase::ThinLTOPreLink));
// Run partial inlining pass to partially inline functions that have
// large bodies.
@@ -1297,14 +1480,24 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
if (PTO.Coroutines)
MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+ MPM.addPass(PseudoProbeUpdatePass());
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ addRequiredLTOPreLinkPasses(MPM);
+
return MPM;
}
ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
- OptimizationLevel Level, bool DebugLogging,
- const ModuleSummaryIndex *ImportSummary) {
+ OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
ModulePassManager MPM(DebugLogging);
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
if (ImportSummary) {
// These passes import type identifier resolutions for whole-program
// devirtualization and CFI. They must run early because other passes may
@@ -1332,30 +1525,35 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
MPM.addPass(ForceFunctionAttrsPass());
// Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink,
- DebugLogging));
+ MPM.addPass(buildModuleSimplificationPipeline(
+ Level, ThinOrFullLTOPhase::ThinLTOPostLink));
// Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging));
+ MPM.addPass(buildModuleOptimizationPipeline(Level));
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
return MPM;
}
ModulePassManager
-PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
- bool DebugLogging) {
+PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
assert(Level != OptimizationLevel::O0 &&
"Must request optimizations for the default pipeline!");
// FIXME: We should use a customized pre-link pipeline!
- return buildPerModuleDefaultPipeline(Level, DebugLogging,
+ return buildPerModuleDefaultPipeline(Level,
/* LTOPreLink */ true);
}
ModulePassManager
-PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
+PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
ModuleSummaryIndex *ExportSummary) {
ModulePassManager MPM(DebugLogging);
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
if (Level == OptimizationLevel::O0) {
// The WPD and LowerTypeTest passes need to run at -O0 to lower type
// metadata and intrinsics.
@@ -1364,6 +1562,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// Run a second time to clean up any type tests left behind by WPD for use
// in ICP.
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
return MPM;
}
@@ -1371,7 +1573,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// Load sample profile before running the LTO optimization pipeline.
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile,
- false /* ThinLTOPhase::PreLink */));
+ ThinOrFullLTOPhase::FullLTOPostLink));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
@@ -1434,6 +1636,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// in ICP (which is performed earlier than this in the regular LTO
// pipeline).
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
return MPM;
}
@@ -1482,17 +1688,17 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
- FPM.addPass(JumpThreadingPass());
+ FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
// Do a post inline PGO instrumentation and use pass. This is a context
// sensitive PGO pass.
if (PGOOpt) {
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true,
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
/* IsCS */ true, PGOOpt->CSProfileGenFile,
PGOOpt->ProfileRemappingFile);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false,
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
/* IsCS */ true, PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile);
}
@@ -1532,7 +1738,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// are sorted out.
MainFPM.addPass(InstCombinePass());
- MainFPM.addPass(SimplifyCFGPass());
+ MainFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
MainFPM.addPass(SCCPPass());
MainFPM.addPass(InstCombinePass());
MainFPM.addPass(BDCEPass());
@@ -1549,7 +1755,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
MainFPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(MainFPM, Level);
- MainFPM.addPass(JumpThreadingPass());
+ MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
// Create a function that performs CFI checks for cross-DSO calls with
@@ -1572,7 +1778,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// Add late LTO optimization passes.
// Delete basic blocks, which optimization passes may have killed.
- MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass()));
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
// Drop bodies of available eternally objects to improve GlobalDCE.
MPM.addPass(EliminateAvailableExternallyPass());
@@ -1580,7 +1787,112 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// Now that we have optimized the program, discard unreachable functions.
MPM.addPass(GlobalDCEPass());
- // FIXME: Maybe enable MergeFuncs conditionally after it's ported.
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ return MPM;
+}
+
+ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
+ bool LTOPreLink) {
+ assert(Level == OptimizationLevel::O0 &&
+ "buildO0DefaultPipeline should only be used with O0");
+
+ ModulePassManager MPM(DebugLogging);
+
+ // Add UniqueInternalLinkageNames Pass which renames internal linkage
+ // symbols with unique names.
+ if (PTO.UniqueLinkageNames)
+ MPM.addPass(UniqueInternalLinkageNamesPass());
+
+ if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse))
+ addPGOInstrPassesForO0(
+ MPM,
+ /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
+ /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
+
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM, Level);
+ for (auto &C : PipelineEarlySimplificationEPCallbacks)
+ C(MPM, Level);
+
+ // Build a minimal pipeline based on the semantics required by LLVM,
+ // which is just that always inlining occurs. Further, disable generating
+ // lifetime intrinsics to avoid enabling further optimizations during
+ // code generation.
+ // However, we need to insert lifetime intrinsics to avoid invalid access
+ // caused by multithreaded coroutines.
+ MPM.addPass(AlwaysInlinerPass(
+ /*InsertLifetimeIntrinsics=*/PTO.Coroutines));
+
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
+ if (EnableMatrix)
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
+
+ if (!CGSCCOptimizerLateEPCallbacks.empty()) {
+ CGSCCPassManager CGPM(DebugLogging);
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(CGPM, Level);
+ if (!CGPM.isEmpty())
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+ }
+ if (!LateLoopOptimizationsEPCallbacks.empty()) {
+ LoopPassManager LPM(DebugLogging);
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM, Level);
+ if (!LPM.isEmpty()) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ createFunctionToLoopPassAdaptor(std::move(LPM))));
+ }
+ }
+ if (!LoopOptimizerEndEPCallbacks.empty()) {
+ LoopPassManager LPM(DebugLogging);
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM, Level);
+ if (!LPM.isEmpty()) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ createFunctionToLoopPassAdaptor(std::move(LPM))));
+ }
+ }
+ if (!ScalarOptimizerLateEPCallbacks.empty()) {
+ FunctionPassManager FPM(DebugLogging);
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+ if (!FPM.isEmpty())
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ }
+ if (!VectorizerStartEPCallbacks.empty()) {
+ FunctionPassManager FPM(DebugLogging);
+ for (auto &C : VectorizerStartEPCallbacks)
+ C(FPM, Level);
+ if (!FPM.isEmpty())
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ }
+
+ if (PTO.Coroutines) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
+
+ CGSCCPassManager CGPM(DebugLogging);
+ CGPM.addPass(CoroSplitPass());
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor(CoroElidePass()));
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+ }
+
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(MPM, Level);
+
+ if (LTOPreLink)
+ addRequiredLTOPreLinkPasses(MPM);
+
return MPM;
}
@@ -1606,6 +1918,10 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
// results from `GlobalsAA` through a readonly proxy.
AA.registerModuleAnalysis<GlobalsAA>();
+ // Add target-specific alias analyses.
+ if (TM)
+ TM->registerDefaultAliasAnalyses(AA);
+
return AA;
}
@@ -1622,7 +1938,7 @@ static Optional<int> parseDevirtPassName(StringRef Name) {
if (!Name.consume_front("devirt<") || !Name.consume_back(">"))
return None;
int Count;
- if (Name.getAsInteger(0, Count) || Count <= 0)
+ if (Name.getAsInteger(0, Count) || Count < 0)
return None;
return Count;
}
@@ -1763,6 +2079,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
Result.convertSwitchToLookupTable(Enable);
} else if (ParamName == "keep-loops") {
Result.needCanonicalLoops(Enable);
+ } else if (ParamName == "hoist-common-insts") {
+ Result.hoistCommonInsts(Enable);
} else if (ParamName == "sink-common-insts") {
Result.sinkCommonInsts(Enable);
} else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
@@ -1854,6 +2172,8 @@ Expected<GVNOptions> parseGVNOptions(StringRef Params) {
Result.setPRE(Enable);
} else if (ParamName == "load-pre") {
Result.setLoadPRE(Enable);
+ } else if (ParamName == "split-backedge-load-pre") {
+ Result.setLoadPRESplitBackedge(Enable);
} else if (ParamName == "memdep") {
Result.setMemDep(Enable);
} else {
@@ -2075,8 +2395,7 @@ PassBuilder::parsePipelineText(StringRef Text) {
}
Error PassBuilder::parseModulePass(ModulePassManager &MPM,
- const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging) {
+ const PipelineElement &E) {
auto &Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
@@ -2084,32 +2403,28 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
if (!InnerPipeline.empty()) {
if (Name == "module") {
ModulePassManager NestedMPM(DebugLogging);
- if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline))
return Err;
MPM.addPass(std::move(NestedMPM));
return Error::success();
}
if (Name == "cgscc") {
CGSCCPassManager CGPM(DebugLogging);
- if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
+ if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline))
return Err;
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
return Error::success();
}
if (Name == "function") {
FunctionPassManager FPM(DebugLogging);
- if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
ModulePassManager NestedMPM(DebugLogging);
- if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline))
return Err;
MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM)));
return Error::success();
@@ -2143,31 +2458,10 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
.Case("O3", OptimizationLevel::O3)
.Case("Os", OptimizationLevel::Os)
.Case("Oz", OptimizationLevel::Oz);
- if (L == OptimizationLevel::O0) {
- // Add instrumentation PGO passes -- at O0 we can still do PGO.
- if (PGOOpt && Matches[1] != "thinlto" &&
- (PGOOpt->Action == PGOOptions::IRInstr ||
- PGOOpt->Action == PGOOptions::IRUse))
- addPGOInstrPassesForO0(
- MPM, DebugLogging,
- /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
- /* IsCS */ false, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
-
- // For IR that makes use of coroutines intrinsics, coroutine passes must
- // be run, even at -O0.
- if (PTO.Coroutines) {
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
-
- CGSCCPassManager CGPM(DebugLogging);
- CGPM.addPass(CoroSplitPass());
- CGPM.addPass(createCGSCCToFunctionPassAdaptor(CoroElidePass()));
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
-
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
- }
-
- // Do nothing else at all!
+ if (L == OptimizationLevel::O0 && Matches[1] != "thinlto" &&
+ Matches[1] != "lto") {
+ MPM.addPass(buildO0DefaultPipeline(L, Matches[1] == "thinlto-pre-link" ||
+ Matches[1] == "lto-pre-link"));
return Error::success();
}
@@ -2180,16 +2474,16 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
if (Matches[1] == "default") {
- MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
+ MPM.addPass(buildPerModuleDefaultPipeline(L));
} else if (Matches[1] == "thinlto-pre-link") {
- MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L, DebugLogging));
+ MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L));
} else if (Matches[1] == "thinlto") {
- MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging, nullptr));
+ MPM.addPass(buildThinLTODefaultPipeline(L, nullptr));
} else if (Matches[1] == "lto-pre-link") {
- MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging));
+ MPM.addPass(buildLTOPreLinkDefaultPipeline(L));
} else {
assert(Matches[1] == "lto" && "Not one of the matched options!");
- MPM.addPass(buildLTODefaultPipeline(L, DebugLogging, nullptr));
+ MPM.addPass(buildLTODefaultPipeline(L, nullptr));
}
return Error::success();
}
@@ -2212,6 +2506,41 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
return Error::success(); \
}
+#define CGSCC_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \
+ return Error::success(); \
+ }
+#define FUNCTION_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS)); \
+ return Error::success(); \
+ }
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
+ return Error::success(); \
+ }
+#define LOOP_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ MPM.addPass( \
+ createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
+ CREATE_PASS, false, false, DebugLogging))); \
+ return Error::success(); \
+ }
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ MPM.addPass( \
+ createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
+ CREATE_PASS(Params.get()), false, false, DebugLogging))); \
+ return Error::success(); \
+ }
#include "PassRegistry.def"
for (auto &C : ModulePipelineParsingCallbacks)
@@ -2223,8 +2552,7 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
}
Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
- const PipelineElement &E, bool VerifyEachPass,
- bool DebugLogging) {
+ const PipelineElement &E) {
auto &Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
@@ -2232,8 +2560,7 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
if (!InnerPipeline.empty()) {
if (Name == "cgscc") {
CGSCCPassManager NestedCGPM(DebugLogging);
- if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
CGPM.addPass(std::move(NestedCGPM));
@@ -2241,8 +2568,7 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
}
if (Name == "function") {
FunctionPassManager FPM(DebugLogging);
- if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
@@ -2250,16 +2576,14 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
}
if (auto Count = parseRepeatPassName(Name)) {
CGSCCPassManager NestedCGPM(DebugLogging);
- if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline))
return Err;
CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM)));
return Error::success();
}
if (auto MaxRepetitions = parseDevirtPassName(Name)) {
CGSCCPassManager NestedCGPM(DebugLogging);
- if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline))
return Err;
CGPM.addPass(
createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions));
@@ -2295,6 +2619,36 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
return Error::success(); \
}
+#define FUNCTION_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS)); \
+ return Error::success(); \
+ }
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
+ return Error::success(); \
+ }
+#define LOOP_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ CGPM.addPass( \
+ createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
+ CREATE_PASS, false, false, DebugLogging))); \
+ return Error::success(); \
+ }
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ CGPM.addPass( \
+ createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
+ CREATE_PASS(Params.get()), false, false, DebugLogging))); \
+ return Error::success(); \
+ }
#include "PassRegistry.def"
for (auto &C : CGSCCPipelineParsingCallbacks)
@@ -2306,8 +2660,7 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
}
Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
- const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging) {
+ const PipelineElement &E) {
auto &Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
@@ -2315,8 +2668,7 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
if (!InnerPipeline.empty()) {
if (Name == "function") {
FunctionPassManager NestedFPM(DebugLogging);
- if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
FPM.addPass(std::move(NestedFPM));
@@ -2324,19 +2676,19 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
}
if (Name == "loop" || Name == "loop-mssa") {
LoopPassManager LPM(DebugLogging);
- if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
+ if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
bool UseMemorySSA = (Name == "loop-mssa");
+ bool UseBFI = llvm::any_of(
+ InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; });
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
- DebugLogging));
+ UseBFI, DebugLogging));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
FunctionPassManager NestedFPM(DebugLogging);
- if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline))
return Err;
FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM)));
return Error::success();
@@ -2378,6 +2730,25 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
return Error::success(); \
}
+// FIXME: UseMemorySSA is set to false. Maybe we could do things like:
+// bool UseMemorySSA = !("canon-freeze" || "loop-predication" ||
+// "guard-widening");
+// The risk is that it may become obsolete if we're not careful.
+#define LOOP_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false, \
+ DebugLogging)); \
+ return Error::success(); \
+ }
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), \
+ false, false, DebugLogging)); \
+ return Error::success(); \
+ }
#include "PassRegistry.def"
for (auto &C : FunctionPipelineParsingCallbacks)
@@ -2388,8 +2759,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
inconvertibleErrorCode());
}
-Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging) {
+Error PassBuilder::parseLoopPass(LoopPassManager &LPM,
+ const PipelineElement &E) {
StringRef Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
@@ -2397,8 +2768,7 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
if (!InnerPipeline.empty()) {
if (Name == "loop") {
LoopPassManager NestedLPM(DebugLogging);
- if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
LPM.addPass(std::move(NestedLPM));
@@ -2406,8 +2776,7 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
}
if (auto Count = parseRepeatPassName(Name)) {
LoopPassManager NestedLPM(DebugLogging);
- if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline,
- VerifyEachPass, DebugLogging))
+ if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline))
return Err;
LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM)));
return Error::success();
@@ -2481,39 +2850,28 @@ bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
}
Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass,
- bool DebugLogging) {
+ ArrayRef<PipelineElement> Pipeline) {
for (const auto &Element : Pipeline) {
- if (auto Err = parseLoopPass(LPM, Element, VerifyEachPass, DebugLogging))
+ if (auto Err = parseLoopPass(LPM, Element))
return Err;
- // FIXME: No verifier support for Loop passes!
}
return Error::success();
}
-Error PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parseFunctionPassPipeline(
+ FunctionPassManager &FPM, ArrayRef<PipelineElement> Pipeline) {
for (const auto &Element : Pipeline) {
- if (auto Err =
- parseFunctionPass(FPM, Element, VerifyEachPass, DebugLogging))
+ if (auto Err = parseFunctionPass(FPM, Element))
return Err;
- if (VerifyEachPass)
- FPM.addPass(VerifierPass());
}
return Error::success();
}
Error PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass,
- bool DebugLogging) {
+ ArrayRef<PipelineElement> Pipeline) {
for (const auto &Element : Pipeline) {
- if (auto Err = parseCGSCCPass(CGPM, Element, VerifyEachPass, DebugLogging))
+ if (auto Err = parseCGSCCPass(CGPM, Element))
return Err;
- // FIXME: No verifier support for CGSCC passes!
}
return Error::success();
}
@@ -2532,14 +2890,10 @@ void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
}
Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass,
- bool DebugLogging) {
+ ArrayRef<PipelineElement> Pipeline) {
for (const auto &Element : Pipeline) {
- if (auto Err = parseModulePass(MPM, Element, VerifyEachPass, DebugLogging))
+ if (auto Err = parseModulePass(MPM, Element))
return Err;
- if (VerifyEachPass)
- MPM.addPass(VerifierPass());
}
return Error::success();
}
@@ -2548,8 +2902,7 @@ Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
// FIXME: Should this routine accept a TargetMachine or require the caller to
// pre-populate the analysis managers with target-specific stuff?
Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
- StringRef PipelineText,
- bool VerifyEachPass, bool DebugLogging) {
+ StringRef PipelineText) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
return make_error<StringError>(
@@ -2570,7 +2923,7 @@ Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}};
} else {
for (auto &C : TopLevelPipelineParsingCallbacks)
- if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging))
+ if (C(MPM, *Pipeline, DebugLogging))
return Error::success();
// Unknown pass or pipeline name!
@@ -2583,16 +2936,14 @@ Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
}
}
- if (auto Err =
- parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging))
+ if (auto Err = parseModulePassPipeline(MPM, *Pipeline))
return Err;
return Error::success();
}
// Primary pass pipeline description parsing routine for a \c CGSCCPassManager
Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
- StringRef PipelineText,
- bool VerifyEachPass, bool DebugLogging) {
+ StringRef PipelineText) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
return make_error<StringError>(
@@ -2607,8 +2958,7 @@ Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
.str(),
inconvertibleErrorCode());
- if (auto Err =
- parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging))
+ if (auto Err = parseCGSCCPassPipeline(CGPM, *Pipeline))
return Err;
return Error::success();
}
@@ -2616,8 +2966,7 @@ Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
// Primary pass pipeline description parsing routine for a \c
// FunctionPassManager
Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
- StringRef PipelineText,
- bool VerifyEachPass, bool DebugLogging) {
+ StringRef PipelineText) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
return make_error<StringError>(
@@ -2632,24 +2981,21 @@ Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
.str(),
inconvertibleErrorCode());
- if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass,
- DebugLogging))
+ if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline))
return Err;
return Error::success();
}
// Primary pass pipeline description parsing routine for a \c LoopPassManager
Error PassBuilder::parsePassPipeline(LoopPassManager &CGPM,
- StringRef PipelineText,
- bool VerifyEachPass, bool DebugLogging) {
+ StringRef PipelineText) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
return make_error<StringError>(
formatv("invalid pipeline '{0}'", PipelineText).str(),
inconvertibleErrorCode());
- if (auto Err =
- parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging))
+ if (auto Err = parseLoopPassPipeline(CGPM, *Pipeline))
return Err;
return Error::success();
@@ -2676,6 +3022,9 @@ Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
}
bool PassBuilder::isAAPassName(StringRef PassName) {
+#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
+ if (PassName == NAME) \
+ return true;
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
@@ -2693,9 +3042,21 @@ bool PassBuilder::isAnalysisPassName(StringRef PassName) {
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
-#define CGSSC_ANALYSIS(NAME, CREATE_PASS) \
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
+ if (PassName == NAME) \
+ return true;
+#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
+ if (PassName == NAME) \
+ return true;
+#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
#include "PassRegistry.def"
return false;
}
+
+void PassBuilder::registerParseTopLevelPipelineCallback(
+ const std::function<bool(ModulePassManager &, ArrayRef<PipelineElement>,
+ bool DebugLogging)> &C) {
+ TopLevelPipelineParsingCallbacks.push_back(C);
+}
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
index dfdfc3d05976..877cb9ed13b3 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
@@ -28,6 +28,7 @@ MODULE_ANALYSIS("verify", VerifierAnalysis())
MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis())
MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis())
+MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis())
#ifndef MODULE_ALIAS_ANALYSIS
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
@@ -42,6 +43,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
#endif
MODULE_PASS("always-inline", AlwaysInlinerPass())
MODULE_PASS("attributor", AttributorPass())
+MODULE_PASS("annotation2metadata", Annotation2MetadataPass())
MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
MODULE_PASS("cg-profile", CGProfilePass())
@@ -49,6 +51,7 @@ MODULE_PASS("constmerge", ConstantMergePass())
MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
+MODULE_PASS("extract-blocks", BlockExtractorPass())
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
MODULE_PASS("function-import", FunctionImportPass())
MODULE_PASS("globaldce", GlobalDCEPass())
@@ -59,16 +62,25 @@ MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false))
MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
+MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass(
+ getInlineParams(),
+ DebugLogging,
+ false))
MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
MODULE_PASS("instrorderfile", InstrOrderFilePass())
MODULE_PASS("instrprof", InstrProfiling())
MODULE_PASS("internalize", InternalizePass())
MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("ipsccp", IPSCCPPass())
-MODULE_PASS("lowertypetests", LowerTypeTestsPass(nullptr, nullptr))
+MODULE_PASS("iroutliner", IROutlinerPass())
+MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
+MODULE_PASS("loop-extract", LoopExtractorPass())
+MODULE_PASS("lowertypetests", LowerTypeTestsPass())
+MODULE_PASS("metarenamer", MetaRenamerPass())
MODULE_PASS("mergefunc", MergeFunctionsPass())
MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
+MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
MODULE_PASS("partial-inliner", PartialInlinerPass())
MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion())
MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen())
@@ -78,23 +90,36 @@ MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs()))
MODULE_PASS("print", PrintModulePass(dbgs()))
MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs()))
MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs()))
+MODULE_PASS("print-must-be-executed-contexts", MustBeExecutedContextPrinterPass(dbgs()))
MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs()))
+MODULE_PASS("print<module-debuginfo>", ModuleDebugInfoPrinterPass(dbgs()))
MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC())
MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
-MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
+MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass())
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
MODULE_PASS("scc-oz-module-inliner",
- buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging))
+ buildInlinerPipeline(OptimizationLevel::Oz, ThinOrFullLTOPhase::None))
+MODULE_PASS("loop-extract-single", LoopExtractorPass(1))
+MODULE_PASS("strip", StripSymbolsPass())
+MODULE_PASS("strip-dead-debug-info", StripDeadDebugInfoPass())
+MODULE_PASS("pseudo-probe", SampleProfileProbePass(TM))
MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
+MODULE_PASS("strip-debug-declare", StripDebugDeclarePass())
+MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass())
+MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass())
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
-MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
+MODULE_PASS("unique-internal-linkage-names", UniqueInternalLinkageNamesPass())
MODULE_PASS("verify", VerifierPass())
+MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
+MODULE_PASS("dfsan", DataFlowSanitizerPass())
MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
MODULE_PASS("msan-module", MemorySanitizerPass({}))
MODULE_PASS("tsan-module", ThreadSanitizerPass())
MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
+MODULE_PASS("memprof-module", ModuleMemProfilerPass())
MODULE_PASS("poison-checking", PoisonCheckingPass())
+MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
#undef MODULE_PASS
#ifndef CGSCC_ANALYSIS
@@ -129,10 +154,10 @@ FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis())
FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis())
FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis())
+FUNCTION_ANALYSIS("func-properties", FunctionPropertiesAnalysis())
FUNCTION_ANALYSIS("loops", LoopAnalysis())
FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
FUNCTION_ANALYSIS("da", DependenceAnalysis())
-FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis())
FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis())
FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis())
FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis())
@@ -155,9 +180,10 @@ FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
FUNCTION_ALIAS_ANALYSIS("basic-aa", BasicAA())
FUNCTION_ALIAS_ANALYSIS("cfl-anders-aa", CFLAndersAA())
FUNCTION_ALIAS_ANALYSIS("cfl-steens-aa", CFLSteensAA())
+FUNCTION_ALIAS_ANALYSIS("objc-arc-aa", objcarc::ObjCARCAA())
FUNCTION_ALIAS_ANALYSIS("scev-aa", SCEVAA())
FUNCTION_ALIAS_ANALYSIS("scoped-noalias-aa", ScopedNoAliasAA())
-FUNCTION_ALIAS_ANALYSIS("type-based-aa", TypeBasedAA())
+FUNCTION_ALIAS_ANALYSIS("tbaa", TypeBasedAA())
#undef FUNCTION_ALIAS_ANALYSIS
#undef FUNCTION_ANALYSIS
@@ -171,11 +197,13 @@ FUNCTION_PASS("aggressive-instcombine", AggressiveInstCombinePass())
FUNCTION_PASS("assume-builder", AssumeBuilderPass())
FUNCTION_PASS("assume-simplify", AssumeSimplifyPass())
FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass())
+FUNCTION_PASS("annotation-remarks", AnnotationRemarksPass())
FUNCTION_PASS("bdce", BDCEPass())
FUNCTION_PASS("bounds-checking", BoundsCheckingPass())
FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
FUNCTION_PASS("consthoist", ConstantHoistingPass())
+FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass())
FUNCTION_PASS("chr", ControlHeightReductionPass())
FUNCTION_PASS("coro-early", CoroEarlyPass())
FUNCTION_PASS("coro-elide", CoroElidePass())
@@ -189,32 +217,43 @@ FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true))
FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false))
+FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true))
FUNCTION_PASS("gvn-hoist", GVNHoistPass())
+FUNCTION_PASS("gvn-sink", GVNSinkPass())
+FUNCTION_PASS("helloworld", HelloWorldPass())
+FUNCTION_PASS("infer-address-spaces", InferAddressSpacesPass())
FUNCTION_PASS("instcombine", InstCombinePass())
+FUNCTION_PASS("instcount", InstCountPass())
FUNCTION_PASS("instsimplify", InstSimplifyPass())
FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
FUNCTION_PASS("irce", IRCEPass())
FUNCTION_PASS("float2int", Float2IntPass())
FUNCTION_PASS("no-op-function", NoOpFunctionPass())
FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
+FUNCTION_PASS("lint", LintPass())
FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings())
+FUNCTION_PASS("instnamer", InstructionNamerPass())
FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
FUNCTION_PASS("lower-matrix-intrinsics", LowerMatrixIntrinsicsPass())
+FUNCTION_PASS("lower-matrix-intrinsics-minimal", LowerMatrixIntrinsicsPass(true))
FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
FUNCTION_PASS("loop-sink", LoopSinkPass())
FUNCTION_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
+FUNCTION_PASS("loop-flatten", LoopFlattenPass())
FUNCTION_PASS("lowerinvoke", LowerInvokePass())
+FUNCTION_PASS("lowerswitch", LowerSwitchPass())
FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
+FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
FUNCTION_PASS("newgvn", NewGVNPass())
FUNCTION_PASS("jump-threading", JumpThreadingPass())
@@ -224,6 +263,10 @@ FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
FUNCTION_PASS("loop-fusion", LoopFusePass())
FUNCTION_PASS("loop-distribute", LoopDistributePass())
+FUNCTION_PASS("loop-versioning", LoopVersioningPass())
+FUNCTION_PASS("objc-arc", ObjCARCOptPass())
+FUNCTION_PASS("objc-arc-contract", ObjCARCContractPass())
+FUNCTION_PASS("objc-arc-expand", ObjCARCExpandPass())
FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())
FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
@@ -232,26 +275,42 @@ FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
+FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs()))
FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs()))
FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs()))
+FUNCTION_PASS("print<func-properties>", FunctionPropertiesPrinterPass(dbgs()))
FUNCTION_PASS("print<inline-cost>", InlineCostAnnotationPrinterPass(dbgs()))
+FUNCTION_PASS("print<inliner-size-estimator>",
+ InlineSizeEstimatorAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs()))
FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs()))
+// TODO: rename to print<foo> after NPM switch
+FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(dbgs()))
FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(dbgs()))
+FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(dbgs()))
+FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(dbgs()))
FUNCTION_PASS("reassociate", ReassociatePass())
+FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass())
+FUNCTION_PASS("reg2mem", RegToMemPass())
+FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass())
FUNCTION_PASS("scalarizer", ScalarizerPass())
+FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass())
FUNCTION_PASS("sccp", SCCPPass())
FUNCTION_PASS("simplifycfg", SimplifyCFGPass())
FUNCTION_PASS("sink", SinkingPass())
FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
+FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
FUNCTION_PASS("sroa", SROA())
+FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
+FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
+FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass())
FUNCTION_PASS("vector-combine", VectorCombinePass())
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
@@ -268,6 +327,7 @@ FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false))
FUNCTION_PASS("msan", MemorySanitizerPass({}))
FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true}))
FUNCTION_PASS("tsan", ThreadSanitizerPass())
+FUNCTION_PASS("memprof", MemProfilerPass())
#undef FUNCTION_PASS
#ifndef FUNCTION_PASS_WITH_PARAMS
@@ -316,7 +376,7 @@ FUNCTION_PASS_WITH_PARAMS("print<stack-lifetime>",
LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
LOOP_ANALYSIS("access-info", LoopAccessAnalysis())
LOOP_ANALYSIS("ddg", DDGAnalysis())
-LOOP_ANALYSIS("ivusers", IVUsersAnalysis())
+LOOP_ANALYSIS("iv-users", IVUsersAnalysis())
LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#undef LOOP_ANALYSIS
@@ -324,26 +384,30 @@ LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#define LOOP_PASS(NAME, CREATE_PASS)
#endif
LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
+LOOP_PASS("dot-ddg", DDGDotPrinterPass())
LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
LOOP_PASS("licm", LICMPass())
LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
-LOOP_PASS("rotate", LoopRotatePass())
+LOOP_PASS("loop-interchange", LoopInterchangePass())
+LOOP_PASS("loop-rotate", LoopRotatePass())
LOOP_PASS("no-op-loop", NoOpLoopPass())
LOOP_PASS("print", PrintLoopPass(dbgs()))
LOOP_PASS("loop-deletion", LoopDeletionPass())
-LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass())
+LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
LOOP_PASS("indvars", IndVarSimplifyPass())
LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))
-LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
+LOOP_PASS("print<iv-users>", IVUsersPrinterPass(dbgs()))
LOOP_PASS("print<loopnest>", LoopNestPrinterPass(dbgs()))
LOOP_PASS("print<loop-cache-cost>", LoopCachePrinterPass(dbgs()))
LOOP_PASS("loop-predication", LoopPredicationPass())
LOOP_PASS("guard-widening", GuardWideningPass())
LOOP_PASS("simple-loop-unswitch", SimpleLoopUnswitchPass())
+LOOP_PASS("loop-reroll", LoopRerollPass())
+LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass())
#undef LOOP_PASS
#ifndef LOOP_PASS_WITH_PARAMS
diff --git a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
index 1e1a6b98a65a..6795aed7b04e 100644
--- a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -13,32 +13,98 @@
//===----------------------------------------------------------------------===//
#include "llvm/Passes/StandardInstrumentations.h"
+#include "llvm/ADT/Any.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
+#include <unordered_set>
+#include <vector>
using namespace llvm;
+cl::opt<bool> PreservedCFGCheckerInstrumentation::VerifyPreservedCFG(
+ "verify-cfg-preserved", cl::Hidden,
+#ifdef NDEBUG
+ cl::init(false));
+#else
+ cl::init(false));
+#endif
+
+// FIXME: Change `-debug-pass-manager` from boolean to enum type. Similar to
+// `-debug-pass` in legacy PM.
+static cl::opt<bool>
+ DebugPMVerbose("debug-pass-manager-verbose", cl::Hidden, cl::init(false),
+ cl::desc("Print all pass management debugging information. "
+ "`-debug-pass-manager` must also be specified"));
+
+// An option that prints out the IR after passes, similar to
+// -print-after-all except that it only prints the IR after passes that
+// change the IR. Those passes that do not make changes to the IR are
+// reported as not making any changes. In addition, the initial IR is
+// also reported. Other hidden options affect the output from this
+// option. -filter-passes will limit the output to the named passes
+// that actually change the IR and other passes are reported as filtered out.
+// The specified passes will either be reported as making no changes (with
+// no IR reported) or the changed IR will be reported. Also, the
+// -filter-print-funcs and -print-module-scope options will do similar
+// filtering based on function name, reporting changed IRs as functions(or
+// modules if -print-module-scope is specified) for a particular function
+// or indicating that the IR has been filtered out. The extra options
+// can be combined, allowing only changed IRs for certain passes on certain
+// functions to be reported in different formats, with the rest being
+// reported as filtered out. The -print-before-changed option will print
+// the IR as it was before each pass that changed it. The optional
+// value of quiet will only report when the IR changes, suppressing
+// all other messages, including the initial IR.
+enum ChangePrinter { NoChangePrinter, PrintChangedVerbose, PrintChangedQuiet };
+static cl::opt<ChangePrinter> PrintChanged(
+ "print-changed", cl::desc("Print changed IRs"), cl::Hidden,
+ cl::ValueOptional, cl::init(NoChangePrinter),
+ cl::values(clEnumValN(PrintChangedQuiet, "quiet", "Run in quiet mode"),
+ // Sentinel value for unspecified option.
+ clEnumValN(PrintChangedVerbose, "", "")));
+
+// An option that supports the -print-changed option. See
+// the description for -print-changed for an explanation of the use
+// of this option. Note that this option has no effect without -print-changed.
+static cl::list<std::string>
+ PrintPassesList("filter-passes", cl::value_desc("pass names"),
+ cl::desc("Only consider IR changes for passes whose names "
+ "match for the print-changed option"),
+ cl::CommaSeparated, cl::Hidden);
+// An option that supports the -print-changed option. See
+// the description for -print-changed for an explanation of the use
+// of this option. Note that this option has no effect without -print-changed.
+static cl::opt<bool>
+ PrintChangedBefore("print-before-changed",
+ cl::desc("Print before passes that change them"),
+ cl::init(false), cl::Hidden);
+
namespace {
/// Extracting Module out of \p IR unit. Also fills a textual description
/// of \p IR for use in header when printing.
-Optional<std::pair<const Module *, std::string>> unwrapModule(Any IR) {
+Optional<std::pair<const Module *, std::string>>
+unwrapModule(Any IR, bool Force = false) {
if (any_isa<const Module *>(IR))
return std::make_pair(any_cast<const Module *>(IR), std::string());
if (any_isa<const Function *>(IR)) {
const Function *F = any_cast<const Function *>(IR);
- if (!llvm::isFunctionInPrintList(F->getName()))
+ if (!Force && !isFunctionInPrintList(F->getName()))
return None;
+
const Module *M = F->getParent();
return std::make_pair(M, formatv(" (function: {0})", F->getName()).str());
}
@@ -47,18 +113,19 @@ Optional<std::pair<const Module *, std::string>> unwrapModule(Any IR) {
const LazyCallGraph::SCC *C = any_cast<const LazyCallGraph::SCC *>(IR);
for (const LazyCallGraph::Node &N : *C) {
const Function &F = N.getFunction();
- if (!F.isDeclaration() && isFunctionInPrintList(F.getName())) {
+ if (Force || (!F.isDeclaration() && isFunctionInPrintList(F.getName()))) {
const Module *M = F.getParent();
return std::make_pair(M, formatv(" (scc: {0})", C->getName()).str());
}
}
+ assert(!Force && "Expected to have made a pair when forced.");
return None;
}
if (any_isa<const Loop *>(IR)) {
const Loop *L = any_cast<const Loop *>(IR);
const Function *F = L->getHeader()->getParent();
- if (!isFunctionInPrintList(F->getName()))
+ if (!Force && !isFunctionInPrintList(F->getName()))
return None;
const Module *M = F->getParent();
std::string LoopName;
@@ -70,65 +137,92 @@ Optional<std::pair<const Module *, std::string>> unwrapModule(Any IR) {
llvm_unreachable("Unknown IR unit");
}
-void printIR(const Function *F, StringRef Banner,
- StringRef Extra = StringRef()) {
- if (!llvm::isFunctionInPrintList(F->getName()))
+void printIR(raw_ostream &OS, const Function *F, StringRef Banner,
+ StringRef Extra = StringRef(), bool Brief = false) {
+ if (Brief) {
+ OS << F->getName() << '\n';
+ return;
+ }
+
+ if (!isFunctionInPrintList(F->getName()))
return;
- dbgs() << Banner << Extra << "\n" << static_cast<const Value &>(*F);
+ OS << Banner << Extra << "\n" << static_cast<const Value &>(*F);
}
-void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef()) {
- if (llvm::isFunctionInPrintList("*") || llvm::forcePrintModuleIR()) {
- dbgs() << Banner << Extra << "\n";
- M->print(dbgs(), nullptr, false);
+void printIR(raw_ostream &OS, const Module *M, StringRef Banner,
+ StringRef Extra = StringRef(), bool Brief = false,
+ bool ShouldPreserveUseListOrder = false) {
+ if (Brief) {
+ OS << M->getName() << '\n';
+ return;
+ }
+
+ if (isFunctionInPrintList("*") || forcePrintModuleIR()) {
+ OS << Banner << Extra << "\n";
+ M->print(OS, nullptr, ShouldPreserveUseListOrder);
} else {
for (const auto &F : M->functions()) {
- printIR(&F, Banner, Extra);
+ printIR(OS, &F, Banner, Extra);
}
}
}
-void printIR(const LazyCallGraph::SCC *C, StringRef Banner,
- StringRef Extra = StringRef()) {
+void printIR(raw_ostream &OS, const LazyCallGraph::SCC *C, StringRef Banner,
+ StringRef Extra = StringRef(), bool Brief = false) {
+ if (Brief) {
+ OS << *C << '\n';
+ return;
+ }
+
bool BannerPrinted = false;
for (const LazyCallGraph::Node &N : *C) {
const Function &F = N.getFunction();
- if (!F.isDeclaration() && llvm::isFunctionInPrintList(F.getName())) {
+ if (!F.isDeclaration() && isFunctionInPrintList(F.getName())) {
if (!BannerPrinted) {
- dbgs() << Banner << Extra << "\n";
+ OS << Banner << Extra << "\n";
BannerPrinted = true;
}
- F.print(dbgs());
+ F.print(OS);
}
}
}
-void printIR(const Loop *L, StringRef Banner) {
+
+void printIR(raw_ostream &OS, const Loop *L, StringRef Banner,
+ bool Brief = false) {
+ if (Brief) {
+ OS << *L;
+ return;
+ }
+
const Function *F = L->getHeader()->getParent();
- if (!llvm::isFunctionInPrintList(F->getName()))
+ if (!isFunctionInPrintList(F->getName()))
return;
- llvm::printLoop(const_cast<Loop &>(*L), dbgs(), std::string(Banner));
+ printLoop(const_cast<Loop &>(*L), OS, std::string(Banner));
}
/// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into
/// llvm::Any and does actual print job.
-void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false) {
+void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner,
+ bool ForceModule = false, bool Brief = false,
+ bool ShouldPreserveUseListOrder = false) {
if (ForceModule) {
if (auto UnwrappedModule = unwrapModule(IR))
- printIR(UnwrappedModule->first, Banner, UnwrappedModule->second);
+ printIR(OS, UnwrappedModule->first, Banner, UnwrappedModule->second,
+ Brief, ShouldPreserveUseListOrder);
return;
}
if (any_isa<const Module *>(IR)) {
const Module *M = any_cast<const Module *>(IR);
assert(M && "module should be valid for printing");
- printIR(M, Banner);
+ printIR(OS, M, Banner, "", Brief, ShouldPreserveUseListOrder);
return;
}
if (any_isa<const Function *>(IR)) {
const Function *F = any_cast<const Function *>(IR);
assert(F && "function should be valid for printing");
- printIR(F, Banner);
+ printIR(OS, F, Banner, "", Brief);
return;
}
@@ -136,21 +230,240 @@ void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false) {
const LazyCallGraph::SCC *C = any_cast<const LazyCallGraph::SCC *>(IR);
assert(C && "scc should be valid for printing");
std::string Extra = std::string(formatv(" (scc: {0})", C->getName()));
- printIR(C, Banner, Extra);
+ printIR(OS, C, Banner, Extra, Brief);
return;
}
if (any_isa<const Loop *>(IR)) {
const Loop *L = any_cast<const Loop *>(IR);
assert(L && "Loop should be valid for printing");
- printIR(L, Banner);
+ printIR(OS, L, Banner, Brief);
return;
}
llvm_unreachable("Unknown wrapped IR type");
}
+// Return true when this is a pass for which changes should be ignored
+bool isIgnored(StringRef PassID) {
+ return isSpecialPass(PassID,
+ {"PassManager", "PassAdaptor", "AnalysisManagerProxy"});
+}
+
} // namespace
+template <typename IRUnitT>
+ChangeReporter<IRUnitT>::~ChangeReporter<IRUnitT>() {
+ assert(BeforeStack.empty() && "Problem with Change Printer stack.");
+}
+
+template <typename IRUnitT>
+bool ChangeReporter<IRUnitT>::isInterestingFunction(const Function &F) {
+ return isFunctionInPrintList(F.getName());
+}
+
+template <typename IRUnitT>
+bool ChangeReporter<IRUnitT>::isInterestingPass(StringRef PassID) {
+ if (isIgnored(PassID))
+ return false;
+
+ static std::unordered_set<std::string> PrintPassNames(PrintPassesList.begin(),
+ PrintPassesList.end());
+ return PrintPassNames.empty() || PrintPassNames.count(PassID.str());
+}
+
+// Return true when this is a pass on IR for which printing
+// of changes is desired.
+template <typename IRUnitT>
+bool ChangeReporter<IRUnitT>::isInteresting(Any IR, StringRef PassID) {
+ if (!isInterestingPass(PassID))
+ return false;
+ if (any_isa<const Function *>(IR))
+ return isInterestingFunction(*any_cast<const Function *>(IR));
+ return true;
+}
+
+template <typename IRUnitT>
+void ChangeReporter<IRUnitT>::saveIRBeforePass(Any IR, StringRef PassID) {
+ // Always need to place something on the stack because invalidated passes
+ // are not given the IR so it cannot be determined whether the pass was for
+ // something that was filtered out.
+ BeforeStack.emplace_back();
+
+ if (!isInteresting(IR, PassID))
+ return;
+ // Is this the initial IR?
+ if (InitialIR) {
+ InitialIR = false;
+ if (VerboseMode)
+ handleInitialIR(IR);
+ }
+
+ // Save the IR representation on the stack.
+ IRUnitT &Data = BeforeStack.back();
+ generateIRRepresentation(IR, PassID, Data);
+}
+
+template <typename IRUnitT>
+void ChangeReporter<IRUnitT>::handleIRAfterPass(Any IR, StringRef PassID) {
+ assert(!BeforeStack.empty() && "Unexpected empty stack encountered.");
+ std::string Name;
+
+ // unwrapModule has inconsistent handling of names for function IRs.
+ if (any_isa<const Function *>(IR)) {
+ const Function *F = any_cast<const Function *>(IR);
+ Name = formatv(" (function: {0})", F->getName()).str();
+ } else {
+ if (auto UM = unwrapModule(IR))
+ Name = UM->second;
+ }
+ if (Name == "")
+ Name = " (module)";
+
+ if (isIgnored(PassID)) {
+ if (VerboseMode)
+ handleIgnored(PassID, Name);
+ } else if (!isInteresting(IR, PassID)) {
+ if (VerboseMode)
+ handleFiltered(PassID, Name);
+ } else {
+ // Get the before rep from the stack
+ IRUnitT &Before = BeforeStack.back();
+ // Create the after rep
+ IRUnitT After;
+ generateIRRepresentation(IR, PassID, After);
+
+ // Was there a change in IR?
+ if (same(Before, After)) {
+ if (VerboseMode)
+ omitAfter(PassID, Name);
+ } else
+ handleAfter(PassID, Name, Before, After, IR);
+ }
+ BeforeStack.pop_back();
+}
+
+template <typename IRUnitT>
+void ChangeReporter<IRUnitT>::handleInvalidatedPass(StringRef PassID) {
+ assert(!BeforeStack.empty() && "Unexpected empty stack encountered.");
+
+ // Always flag it as invalidated as we cannot determine when
+ // a pass for a filtered function is invalidated since we do not
+ // get the IR in the call. Also, the output is just alternate
+ // forms of the banner anyway.
+ if (VerboseMode)
+ handleInvalidated(PassID);
+ BeforeStack.pop_back();
+}
+
+template <typename IRUnitT>
+void ChangeReporter<IRUnitT>::registerRequiredCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ PIC.registerBeforeNonSkippedPassCallback(
+ [this](StringRef P, Any IR) { saveIRBeforePass(IR, P); });
+
+ PIC.registerAfterPassCallback(
+ [this](StringRef P, Any IR, const PreservedAnalyses &) {
+ handleIRAfterPass(IR, P);
+ });
+ PIC.registerAfterPassInvalidatedCallback(
+ [this](StringRef P, const PreservedAnalyses &) {
+ handleInvalidatedPass(P);
+ });
+}
+
+template <typename IRUnitT>
+TextChangeReporter<IRUnitT>::TextChangeReporter(bool Verbose)
+ : ChangeReporter<IRUnitT>(Verbose), Out(dbgs()) {}
+
+template <typename IRUnitT>
+void TextChangeReporter<IRUnitT>::handleInitialIR(Any IR) {
+ // Always print the module.
+ // Unwrap and print directly to avoid filtering problems in general routines.
+ auto UnwrappedModule = unwrapModule(IR, /*Force=*/true);
+ assert(UnwrappedModule && "Expected module to be unwrapped when forced.");
+ Out << "*** IR Dump At Start: ***" << UnwrappedModule->second << "\n";
+ UnwrappedModule->first->print(Out, nullptr,
+ /*ShouldPreserveUseListOrder=*/true);
+}
+
+template <typename IRUnitT>
+void TextChangeReporter<IRUnitT>::omitAfter(StringRef PassID,
+ std::string &Name) {
+ Out << formatv("*** IR Dump After {0}{1} omitted because no change ***\n",
+ PassID, Name);
+}
+
+template <typename IRUnitT>
+void TextChangeReporter<IRUnitT>::handleInvalidated(StringRef PassID) {
+ Out << formatv("*** IR Pass {0} invalidated ***\n", PassID);
+}
+
+template <typename IRUnitT>
+void TextChangeReporter<IRUnitT>::handleFiltered(StringRef PassID,
+ std::string &Name) {
+ SmallString<20> Banner =
+ formatv("*** IR Dump After {0}{1} filtered out ***\n", PassID, Name);
+ Out << Banner;
+}
+
+template <typename IRUnitT>
+void TextChangeReporter<IRUnitT>::handleIgnored(StringRef PassID,
+ std::string &Name) {
+ Out << formatv("*** IR Pass {0}{1} ignored ***\n", PassID, Name);
+}
+
+IRChangedPrinter::~IRChangedPrinter() {}
+
+void IRChangedPrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+ if (PrintChanged != NoChangePrinter)
+ TextChangeReporter<std::string>::registerRequiredCallbacks(PIC);
+}
+
+void IRChangedPrinter::generateIRRepresentation(Any IR, StringRef PassID,
+ std::string &Output) {
+ raw_string_ostream OS(Output);
+ // use the after banner for all cases so it will match
+ SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID);
+ unwrapAndPrint(OS, IR, Banner, forcePrintModuleIR(),
+ /*Brief=*/false, /*ShouldPreserveUseListOrder=*/true);
+
+ OS.str();
+}
+
+void IRChangedPrinter::handleAfter(StringRef PassID, std::string &Name,
+ const std::string &Before,
+ const std::string &After, Any) {
+ assert(After.find("*** IR Dump") == 0 && "Unexpected banner format.");
+ StringRef AfterRef = After;
+ StringRef Banner =
+ AfterRef.take_until([](char C) -> bool { return C == '\n'; });
+
+ // Report the IR before the changes when requested.
+ if (PrintChangedBefore) {
+ Out << "*** IR Dump Before" << Banner.substr(17);
+ // LazyCallGraph::SCC already has "(scc:..." in banner so only add
+ // in the name if it isn't already there.
+ if (Name.substr(0, 6) != " (scc:" && !forcePrintModuleIR())
+ Out << Name;
+
+ StringRef BeforeRef = Before;
+ Out << BeforeRef.substr(Banner.size());
+ }
+
+ Out << Banner;
+
+ // LazyCallGraph::SCC already has "(scc:..." in banner so only add
+ // in the name if it isn't already there.
+ if (Name.substr(0, 6) != " (scc:" && !forcePrintModuleIR())
+ Out << Name;
+
+ Out << After.substr(Banner.size());
+}
+
+bool IRChangedPrinter::same(const std::string &S1, const std::string &S2) {
+ return S1 == S2;
+}
+
PrintIRInstrumentation::~PrintIRInstrumentation() {
assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit");
}
@@ -172,44 +485,44 @@ PrintIRInstrumentation::popModuleDesc(StringRef PassID) {
return ModuleDesc;
}
-bool PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
- if (PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"))
- return true;
+void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
+ if (isIgnored(PassID))
+ return;
// Saving Module for AfterPassInvalidated operations.
// Note: here we rely on a fact that we do not change modules while
// traversing the pipeline, so the latest captured module is good
// for all print operations that has not happen yet.
- if (StoreModuleDesc && llvm::shouldPrintAfterPass(PassID))
+ if (StoreModuleDesc && shouldPrintAfterPass(PassID))
pushModuleDesc(PassID, IR);
- if (!llvm::shouldPrintBeforePass(PassID))
- return true;
+ if (!shouldPrintBeforePass(PassID))
+ return;
SmallString<20> Banner = formatv("*** IR Dump Before {0} ***", PassID);
- unwrapAndPrint(IR, Banner, llvm::forcePrintModuleIR());
- return true;
+ unwrapAndPrint(dbgs(), IR, Banner, forcePrintModuleIR());
}
void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
- if (PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"))
+ if (isIgnored(PassID))
return;
- if (!llvm::shouldPrintAfterPass(PassID))
+ if (!shouldPrintAfterPass(PassID))
return;
if (StoreModuleDesc)
popModuleDesc(PassID);
SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID);
- unwrapAndPrint(IR, Banner, llvm::forcePrintModuleIR());
+ unwrapAndPrint(dbgs(), IR, Banner, forcePrintModuleIR());
}
void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
- if (!StoreModuleDesc || !llvm::shouldPrintAfterPass(PassID))
+ StringRef PassName = PIC->getPassNameForClassName(PassID);
+ if (!StoreModuleDesc || !shouldPrintAfterPass(PassName))
return;
- if (PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"))
+ if (isIgnored(PassID))
return;
const Module *M;
@@ -223,28 +536,360 @@ void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
SmallString<20> Banner =
formatv("*** IR Dump After {0} *** invalidated: ", PassID);
- printIR(M, Banner, Extra);
+ printIR(dbgs(), M, Banner, Extra);
+}
+
+bool PrintIRInstrumentation::shouldPrintBeforePass(StringRef PassID) {
+ if (shouldPrintBeforeAll())
+ return true;
+
+ StringRef PassName = PIC->getPassNameForClassName(PassID);
+ for (const auto &P : printBeforePasses()) {
+ if (PassName == P)
+ return true;
+ }
+ return false;
+}
+
+bool PrintIRInstrumentation::shouldPrintAfterPass(StringRef PassID) {
+ if (shouldPrintAfterAll())
+ return true;
+
+ StringRef PassName = PIC->getPassNameForClassName(PassID);
+ for (const auto &P : printAfterPasses()) {
+ if (PassName == P)
+ return true;
+ }
+ return false;
}
void PrintIRInstrumentation::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
+ this->PIC = &PIC;
+
// BeforePass callback is not just for printing, it also saves a Module
// for later use in AfterPassInvalidated.
- StoreModuleDesc = llvm::forcePrintModuleIR() && llvm::shouldPrintAfterPass();
- if (llvm::shouldPrintBeforePass() || StoreModuleDesc)
- PIC.registerBeforePassCallback(
- [this](StringRef P, Any IR) { return this->printBeforePass(P, IR); });
+ StoreModuleDesc = forcePrintModuleIR() && shouldPrintAfterSomePass();
+ if (shouldPrintBeforeSomePass() || StoreModuleDesc)
+ PIC.registerBeforeNonSkippedPassCallback(
+ [this](StringRef P, Any IR) { this->printBeforePass(P, IR); });
- if (llvm::shouldPrintAfterPass()) {
+ if (shouldPrintAfterSomePass()) {
PIC.registerAfterPassCallback(
- [this](StringRef P, Any IR) { this->printAfterPass(P, IR); });
+ [this](StringRef P, Any IR, const PreservedAnalyses &) {
+ this->printAfterPass(P, IR);
+ });
PIC.registerAfterPassInvalidatedCallback(
- [this](StringRef P) { this->printAfterPassInvalidated(P); });
+ [this](StringRef P, const PreservedAnalyses &) {
+ this->printAfterPassInvalidated(P);
+ });
}
}
+void OptNoneInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ PIC.registerShouldRunOptionalPassCallback(
+ [this](StringRef P, Any IR) { return this->shouldRun(P, IR); });
+}
+
+bool OptNoneInstrumentation::shouldRun(StringRef PassID, Any IR) {
+ const Function *F = nullptr;
+ if (any_isa<const Function *>(IR)) {
+ F = any_cast<const Function *>(IR);
+ } else if (any_isa<const Loop *>(IR)) {
+ F = any_cast<const Loop *>(IR)->getHeader()->getParent();
+ }
+ bool ShouldRun = !(F && F->hasOptNone());
+ if (!ShouldRun && DebugLogging) {
+ errs() << "Skipping pass " << PassID << " on " << F->getName()
+ << " due to optnone attribute\n";
+ }
+ return ShouldRun;
+}
+
+static std::string getBisectDescription(Any IR) {
+ if (any_isa<const Module *>(IR)) {
+ const Module *M = any_cast<const Module *>(IR);
+ assert(M && "module should be valid for printing");
+ return "module (" + M->getName().str() + ")";
+ }
+
+ if (any_isa<const Function *>(IR)) {
+ const Function *F = any_cast<const Function *>(IR);
+ assert(F && "function should be valid for printing");
+ return "function (" + F->getName().str() + ")";
+ }
+
+ if (any_isa<const LazyCallGraph::SCC *>(IR)) {
+ const LazyCallGraph::SCC *C = any_cast<const LazyCallGraph::SCC *>(IR);
+ assert(C && "scc should be valid for printing");
+ return "SCC " + C->getName();
+ }
+
+ if (any_isa<const Loop *>(IR)) {
+ return "loop";
+ }
+
+ llvm_unreachable("Unknown wrapped IR type");
+}
+
+void OptBisectInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ if (!OptBisector->isEnabled())
+ return;
+ PIC.registerShouldRunOptionalPassCallback([](StringRef PassID, Any IR) {
+ return isIgnored(PassID) ||
+ OptBisector->checkPass(PassID, getBisectDescription(IR));
+ });
+}
+
+void PrintPassInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ if (!DebugLogging)
+ return;
+
+ std::vector<StringRef> SpecialPasses = {"PassManager"};
+ if (!DebugPMVerbose)
+ SpecialPasses.emplace_back("PassAdaptor");
+
+ PIC.registerBeforeSkippedPassCallback(
+ [SpecialPasses](StringRef PassID, Any IR) {
+ assert(!isSpecialPass(PassID, SpecialPasses) &&
+ "Unexpectedly skipping special pass");
+
+ dbgs() << "Skipping pass: " << PassID << " on ";
+ unwrapAndPrint(dbgs(), IR, "", false, true);
+ });
+
+ PIC.registerBeforeNonSkippedPassCallback(
+ [SpecialPasses](StringRef PassID, Any IR) {
+ if (isSpecialPass(PassID, SpecialPasses))
+ return;
+
+ dbgs() << "Running pass: " << PassID << " on ";
+ unwrapAndPrint(dbgs(), IR, "", false, true);
+ });
+
+ PIC.registerBeforeAnalysisCallback([](StringRef PassID, Any IR) {
+ dbgs() << "Running analysis: " << PassID << " on ";
+ unwrapAndPrint(dbgs(), IR, "", false, true);
+ });
+}
+
+PreservedCFGCheckerInstrumentation::CFG::CFG(const Function *F,
+ bool TrackBBLifetime) {
+ if (TrackBBLifetime)
+ BBGuards = DenseMap<intptr_t, BBGuard>(F->size());
+ for (const auto &BB : *F) {
+ if (BBGuards)
+ BBGuards->try_emplace(intptr_t(&BB), &BB);
+ for (auto *Succ : successors(&BB)) {
+ Graph[&BB][Succ]++;
+ if (BBGuards)
+ BBGuards->try_emplace(intptr_t(Succ), Succ);
+ }
+ }
+}
+
+static void printBBName(raw_ostream &out, const BasicBlock *BB) {
+ if (BB->hasName()) {
+ out << BB->getName() << "<" << BB << ">";
+ return;
+ }
+
+ if (!BB->getParent()) {
+ out << "unnamed_removed<" << BB << ">";
+ return;
+ }
+
+ if (BB == &BB->getParent()->getEntryBlock()) {
+ out << "entry"
+ << "<" << BB << ">";
+ return;
+ }
+
+ unsigned FuncOrderBlockNum = 0;
+ for (auto &FuncBB : *BB->getParent()) {
+ if (&FuncBB == BB)
+ break;
+ FuncOrderBlockNum++;
+ }
+ out << "unnamed_" << FuncOrderBlockNum << "<" << BB << ">";
+}
+
+void PreservedCFGCheckerInstrumentation::CFG::printDiff(raw_ostream &out,
+ const CFG &Before,
+ const CFG &After) {
+ assert(!After.isPoisoned());
+
+ // Print function name.
+ const CFG *FuncGraph = nullptr;
+ if (!After.Graph.empty())
+ FuncGraph = &After;
+ else if (!Before.isPoisoned() && !Before.Graph.empty())
+ FuncGraph = &Before;
+
+ if (FuncGraph)
+ out << "In function @"
+ << FuncGraph->Graph.begin()->first->getParent()->getName() << "\n";
+
+ if (Before.isPoisoned()) {
+ out << "Some blocks were deleted\n";
+ return;
+ }
+
+ // Find and print graph differences.
+ if (Before.Graph.size() != After.Graph.size())
+ out << "Different number of non-leaf basic blocks: before="
+ << Before.Graph.size() << ", after=" << After.Graph.size() << "\n";
+
+ for (auto &BB : Before.Graph) {
+ auto BA = After.Graph.find(BB.first);
+ if (BA == After.Graph.end()) {
+ out << "Non-leaf block ";
+ printBBName(out, BB.first);
+ out << " is removed (" << BB.second.size() << " successors)\n";
+ }
+ }
+
+ for (auto &BA : After.Graph) {
+ auto BB = Before.Graph.find(BA.first);
+ if (BB == Before.Graph.end()) {
+ out << "Non-leaf block ";
+ printBBName(out, BA.first);
+ out << " is added (" << BA.second.size() << " successors)\n";
+ continue;
+ }
+
+ if (BB->second == BA.second)
+ continue;
+
+ out << "Different successors of block ";
+ printBBName(out, BA.first);
+ out << " (unordered):\n";
+ out << "- before (" << BB->second.size() << "): ";
+ for (auto &SuccB : BB->second) {
+ printBBName(out, SuccB.first);
+ if (SuccB.second != 1)
+ out << "(" << SuccB.second << "), ";
+ else
+ out << ", ";
+ }
+ out << "\n";
+ out << "- after (" << BA.second.size() << "): ";
+ for (auto &SuccA : BA.second) {
+ printBBName(out, SuccA.first);
+ if (SuccA.second != 1)
+ out << "(" << SuccA.second << "), ";
+ else
+ out << ", ";
+ }
+ out << "\n";
+ }
+}
+
+void PreservedCFGCheckerInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ if (!VerifyPreservedCFG)
+ return;
+
+ PIC.registerBeforeNonSkippedPassCallback([this](StringRef P, Any IR) {
+ if (any_isa<const Function *>(IR))
+ GraphStackBefore.emplace_back(P, CFG(any_cast<const Function *>(IR)));
+ else
+ GraphStackBefore.emplace_back(P, None);
+ });
+
+ PIC.registerAfterPassInvalidatedCallback(
+ [this](StringRef P, const PreservedAnalyses &PassPA) {
+ auto Before = GraphStackBefore.pop_back_val();
+ assert(Before.first == P &&
+ "Before and After callbacks must correspond");
+ (void)Before;
+ });
+
+ PIC.registerAfterPassCallback([this](StringRef P, Any IR,
+ const PreservedAnalyses &PassPA) {
+ auto Before = GraphStackBefore.pop_back_val();
+ assert(Before.first == P && "Before and After callbacks must correspond");
+ auto &GraphBefore = Before.second;
+
+ if (!PassPA.allAnalysesInSetPreserved<CFGAnalyses>())
+ return;
+
+ if (any_isa<const Function *>(IR)) {
+ assert(GraphBefore && "Must be built in BeforePassCallback");
+ CFG GraphAfter(any_cast<const Function *>(IR), false /* NeedsGuard */);
+ if (GraphAfter == *GraphBefore)
+ return;
+
+ dbgs() << "Error: " << P
+ << " reported it preserved CFG, but changes detected:\n";
+ CFG::printDiff(dbgs(), *GraphBefore, GraphAfter);
+ report_fatal_error(Twine("Preserved CFG changed by ", P));
+ }
+ });
+}
+
+void VerifyInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ PIC.registerAfterPassCallback(
+ [this](StringRef P, Any IR, const PreservedAnalyses &PassPA) {
+ if (isIgnored(P) || P == "VerifierPass")
+ return;
+ if (any_isa<const Function *>(IR) || any_isa<const Loop *>(IR)) {
+ const Function *F;
+ if (any_isa<const Loop *>(IR))
+ F = any_cast<const Loop *>(IR)->getHeader()->getParent();
+ else
+ F = any_cast<const Function *>(IR);
+ if (DebugLogging)
+ dbgs() << "Verifying function " << F->getName() << "\n";
+
+ if (verifyFunction(*F))
+ report_fatal_error("Broken function found, compilation aborted!");
+ } else if (any_isa<const Module *>(IR) ||
+ any_isa<const LazyCallGraph::SCC *>(IR)) {
+ const Module *M;
+ if (any_isa<const LazyCallGraph::SCC *>(IR))
+ M = any_cast<const LazyCallGraph::SCC *>(IR)
+ ->begin()
+ ->getFunction()
+ .getParent();
+ else
+ M = any_cast<const Module *>(IR);
+ if (DebugLogging)
+ dbgs() << "Verifying module " << M->getName() << "\n";
+
+ if (verifyModule(*M))
+ report_fatal_error("Broken module found, compilation aborted!");
+ }
+ });
+}
+
+StandardInstrumentations::StandardInstrumentations(bool DebugLogging,
+ bool VerifyEach)
+ : PrintPass(DebugLogging), OptNone(DebugLogging),
+ PrintChangedIR(PrintChanged != PrintChangedQuiet), Verify(DebugLogging),
+ VerifyEach(VerifyEach) {}
+
void StandardInstrumentations::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
PrintIR.registerCallbacks(PIC);
+ PrintPass.registerCallbacks(PIC);
TimePasses.registerCallbacks(PIC);
+ OptNone.registerCallbacks(PIC);
+ OptBisect.registerCallbacks(PIC);
+ PreservedCFGChecker.registerCallbacks(PIC);
+ PrintChangedIR.registerCallbacks(PIC);
+ PseudoProbeVerification.registerCallbacks(PIC);
+ if (VerifyEach)
+ Verify.registerCallbacks(PIC);
}
+
+namespace llvm {
+
+template class ChangeReporter<std::string>;
+template class TextChangeReporter<std::string>;
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index 70f00d333db1..cdbcde50d33a 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -249,7 +249,12 @@ Error CoverageMapping::loadFunctionRecord(
consumeError(std::move(E));
return Error::success();
}
- Function.pushRegion(Region, *ExecutionCount);
+ Expected<int64_t> AltExecutionCount = Ctx.evaluate(Region.FalseCount);
+ if (auto E = AltExecutionCount.takeError()) {
+ consumeError(std::move(E));
+ return Error::success();
+ }
+ Function.pushRegion(Region, *ExecutionCount, *AltExecutionCount);
}
// Don't create records for (filenames, function) pairs we've already seen.
@@ -485,9 +490,15 @@ class SegmentBuilder {
if (CurStartLoc == CR.value().endLoc()) {
// Avoid making zero-length regions active. If it's the last region,
// emit a skipped segment. Otherwise use its predecessor's count.
- const bool Skipped = (CR.index() + 1) == Regions.size();
+ const bool Skipped =
+ (CR.index() + 1) == Regions.size() ||
+ CR.value().Kind == CounterMappingRegion::SkippedRegion;
startSegment(ActiveRegions.empty() ? CR.value() : *ActiveRegions.back(),
CurStartLoc, !GapRegion, Skipped);
+ // If it is skipped segment, create a segment with last pushed
+ // regions's count at CurStartLoc.
+ if (Skipped && !ActiveRegions.empty())
+ startSegment(*ActiveRegions.back(), CurStartLoc, false);
continue;
}
if (CR.index() + 1 == Regions.size() ||
@@ -587,6 +598,8 @@ public:
const auto &L = Segments[I - 1];
const auto &R = Segments[I];
if (!(L.Line < R.Line) && !(L.Line == R.Line && L.Col < R.Col)) {
+ if (L.Line == R.Line && L.Col == R.Col && !L.HasCount)
+ continue;
LLVM_DEBUG(dbgs() << " ! Segment " << L.Line << ":" << L.Col
<< " followed by " << R.Line << ":" << R.Col << "\n");
assert(false && "Coverage segments not unique or sorted");
@@ -603,8 +616,7 @@ public:
std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
std::vector<StringRef> Filenames;
for (const auto &Function : getCoveredFunctions())
- Filenames.insert(Filenames.end(), Function.Filenames.begin(),
- Function.Filenames.end());
+ llvm::append_range(Filenames, Function.Filenames);
llvm::sort(Filenames);
auto Last = std::unique(Filenames.begin(), Filenames.end());
Filenames.erase(Last, Filenames.end());
@@ -664,6 +676,10 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const {
if (MainFileID && isExpansion(CR, *MainFileID))
FileCoverage.Expansions.emplace_back(CR, Function);
}
+ // Capture branch regions specific to the function (excluding expansions).
+ for (const auto &CR : Function.CountedBranchRegions)
+ if (FileIDs.test(CR.FileID) && (CR.FileID == CR.ExpandedFileID))
+ FileCoverage.BranchRegions.push_back(CR);
}
LLVM_DEBUG(dbgs() << "Emitting segments for file: " << Filename << "\n");
@@ -711,6 +727,10 @@ CoverageMapping::getCoverageForFunction(const FunctionRecord &Function) const {
if (isExpansion(CR, *MainFileID))
FunctionCoverage.Expansions.emplace_back(CR, Function);
}
+ // Capture branch regions specific to the function (excluding expansions).
+ for (const auto &CR : Function.CountedBranchRegions)
+ if (CR.FileID == *MainFileID)
+ FunctionCoverage.BranchRegions.push_back(CR);
LLVM_DEBUG(dbgs() << "Emitting segments for function: " << Function.Name
<< "\n");
@@ -730,6 +750,10 @@ CoverageData CoverageMapping::getCoverageForExpansion(
if (isExpansion(CR, Expansion.FileID))
ExpansionCoverage.Expansions.emplace_back(CR, Expansion.Function);
}
+ for (const auto &CR : Expansion.Function.CountedBranchRegions)
+ // Capture branch regions that only pertain to the corresponding expansion.
+ if (CR.FileID == Expansion.FileID)
+ ExpansionCoverage.BranchRegions.push_back(CR);
LLVM_DEBUG(dbgs() << "Emitting segments for expansion of file "
<< Expansion.FileID << "\n");
@@ -807,6 +831,8 @@ static std::string getCoverageMapErrString(coveragemap_error Err) {
return "Malformed coverage data";
case coveragemap_error::decompression_failed:
return "Failed to decompress coverage data (zlib)";
+ case coveragemap_error::invalid_or_missing_arch_specifier:
+ return "`-arch` specifier is invalid or missing for universal binary";
}
llvm_unreachable("A value of coveragemap_error has no message.");
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index b75738bc360c..1acdcb4bebb9 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -213,7 +213,7 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
return Err;
unsigned LineStart = 0;
for (size_t I = 0; I < NumRegions; ++I) {
- Counter C;
+ Counter C, C2;
CounterMappingRegion::RegionKind Kind = CounterMappingRegion::CodeRegion;
// Read the combined counter + region kind.
@@ -223,6 +223,18 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
return Err;
unsigned Tag = EncodedCounterAndRegion & Counter::EncodingTagMask;
uint64_t ExpandedFileID = 0;
+
+ // If Tag does not represent a ZeroCounter, then it is understood to refer
+ // to a counter or counter expression with region kind assumed to be
+ // "CodeRegion". In that case, EncodedCounterAndRegion actually encodes the
+ // referenced counter or counter expression (and nothing else).
+ //
+ // If Tag represents a ZeroCounter and EncodingExpansionRegionBit is set,
+ // then EncodedCounterAndRegion is interpreted to represent an
+ // ExpansionRegion. In all other cases, EncodedCounterAndRegion is
+ // interpreted to refer to a specific region kind, after which additional
+ // fields may be read (e.g. BranchRegions have two encoded counters that
+ // follow an encoded region kind value).
if (Tag != Counter::Zero) {
if (auto Err = decodeCounter(EncodedCounterAndRegion, C))
return Err;
@@ -243,6 +255,14 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
case CounterMappingRegion::SkippedRegion:
Kind = CounterMappingRegion::SkippedRegion;
break;
+ case CounterMappingRegion::BranchRegion:
+ // For a Branch Region, read two successive counters.
+ Kind = CounterMappingRegion::BranchRegion;
+ if (auto Err = readCounter(C))
+ return Err;
+ if (auto Err = readCounter(C2))
+ return Err;
+ break;
default:
return make_error<CoverageMapError>(coveragemap_error::malformed);
}
@@ -294,7 +314,7 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
dbgs() << "\n";
});
- auto CMR = CounterMappingRegion(C, InferredFileID, ExpandedFileID,
+ auto CMR = CounterMappingRegion(C, C2, InferredFileID, ExpandedFileID,
LineStart, ColumnStart,
LineStart + NumLines, ColumnEnd, Kind);
if (CMR.startLoc() > CMR.endLoc())
@@ -600,7 +620,7 @@ public:
CovBuf += FilenamesSize;
FilenameRange FileRange(FilenamesBegin, Filenames.size() - FilenamesBegin);
- if (Version == CovMapVersion::Version4) {
+ if (Version >= CovMapVersion::Version4) {
// Map a hash of the filenames region to the filename range associated
// with this coverage header.
int64_t FilenamesRef =
@@ -628,7 +648,7 @@ public:
// This is a no-op in Version4 (coverage mappings are not affixed to the
// coverage header).
const char *MappingBuf = CovBuf;
- if (Version == CovMapVersion::Version4 && CoverageSize != 0)
+ if (Version >= CovMapVersion::Version4 && CoverageSize != 0)
return make_error<CoverageMapError>(coveragemap_error::malformed);
CovBuf += CoverageSize;
const char *MappingEnd = CovBuf;
@@ -682,7 +702,7 @@ public:
if (FileRange && !FileRange->isInvalid()) {
StringRef Mapping =
CFR->template getCoverageMapping<Endian>(OutOfLineMappingBuf);
- if (Version == CovMapVersion::Version4 &&
+ if (Version >= CovMapVersion::Version4 &&
Mapping.data() + Mapping.size() > FuncRecBufEnd)
return make_error<CoverageMapError>(coveragemap_error::malformed);
if (Error Err = insertFunctionRecordIfNeeded(CFR, Mapping, *FileRange))
@@ -711,6 +731,7 @@ Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
case CovMapVersion::Version2:
case CovMapVersion::Version3:
case CovMapVersion::Version4:
+ case CovMapVersion::Version5:
// Decompress the name data.
if (Error E = P.create(P.getNameData()))
return std::move(E);
@@ -723,6 +744,9 @@ Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
else if (Version == CovMapVersion::Version4)
return std::make_unique<VersionedCovMapFuncRecordReader<
CovMapVersion::Version4, IntPtrT, Endian>>(P, R, F);
+ else if (Version == CovMapVersion::Version5)
+ return std::make_unique<VersionedCovMapFuncRecordReader<
+ CovMapVersion::Version5, IntPtrT, Endian>>(P, R, F);
}
llvm_unreachable("Unsupported version");
}
@@ -766,7 +790,7 @@ static Error readCoverageMappingData(
}
// In Version4, function records are not affixed to coverage headers. Read
// the records from their dedicated section.
- if (Version == CovMapVersion::Version4)
+ if (Version >= CovMapVersion::Version4)
return Reader->readFunctionRecords(FuncRecBuf, FuncRecBufEnd, None, nullptr,
nullptr);
return Error::success();
@@ -950,6 +974,19 @@ loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch) {
BytesInAddress, Endian);
}
+/// Determine whether \p Arch is invalid or empty, given \p Bin.
+static bool isArchSpecifierInvalidOrMissing(Binary *Bin, StringRef Arch) {
+ // If we have a universal binary and Arch doesn't identify any of its slices,
+ // it's user error.
+ if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin)) {
+ for (auto &ObjForArch : Universal->objects())
+ if (Arch == ObjForArch.getArchFlagName())
+ return false;
+ return true;
+ }
+ return false;
+}
+
Expected<std::vector<std::unique_ptr<BinaryCoverageReader>>>
BinaryCoverageReader::create(
MemoryBufferRef ObjectBuffer, StringRef Arch,
@@ -970,6 +1007,10 @@ BinaryCoverageReader::create(
return BinOrErr.takeError();
std::unique_ptr<Binary> Bin = std::move(BinOrErr.get());
+ if (isArchSpecifierInvalidOrMissing(Bin.get(), Arch))
+ return make_error<CoverageMapError>(
+ coveragemap_error::invalid_or_missing_arch_specifier);
+
// MachO universal binaries which contain archives need to be treated as
// archives, not as regular binaries.
if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index 8d3c429c4484..65b83d1f4197 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -80,10 +80,14 @@ public:
ArrayRef<CounterMappingRegion> MappingRegions)
: Expressions(Expressions) {
AdjustedExpressionIDs.resize(Expressions.size(), 0);
- for (const auto &I : MappingRegions)
+ for (const auto &I : MappingRegions) {
mark(I.Count);
- for (const auto &I : MappingRegions)
+ mark(I.FalseCount);
+ }
+ for (const auto &I : MappingRegions) {
gatherUsed(I.Count);
+ gatherUsed(I.FalseCount);
+ }
}
void mark(Counter C) {
@@ -201,6 +205,7 @@ void CoverageMappingWriter::write(raw_ostream &OS) {
PrevLineStart = 0;
}
Counter Count = Minimizer.adjust(I->Count);
+ Counter FalseCount = Minimizer.adjust(I->FalseCount);
switch (I->Kind) {
case CounterMappingRegion::CodeRegion:
case CounterMappingRegion::GapRegion:
@@ -226,6 +231,13 @@ void CoverageMappingWriter::write(raw_ostream &OS) {
<< Counter::EncodingCounterTagAndExpansionRegionTagBits,
OS);
break;
+ case CounterMappingRegion::BranchRegion:
+ encodeULEB128(unsigned(I->Kind)
+ << Counter::EncodingCounterTagAndExpansionRegionTagBits,
+ OS);
+ writeCounter(MinExpressions, Count, OS);
+ writeCounter(MinExpressions, FalseCount, OS);
+ break;
}
assert(I->LineStart >= PrevLineStart);
encodeULEB128(I->LineStart - PrevLineStart, OS);
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp b/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp
index 71ea44a1a722..3332a898603b 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp
@@ -14,14 +14,16 @@
#include "llvm/ProfileData/GCOV.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/MD5.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <system_error>
+#include <unordered_map>
using namespace llvm;
@@ -39,6 +41,59 @@ enum : uint32_t {
GCOV_TAG_PROGRAM_SUMMARY = 0xa3000000,
};
+namespace {
+struct Summary {
+ Summary(StringRef Name) : Name(Name) {}
+
+ StringRef Name;
+ uint64_t lines = 0;
+ uint64_t linesExec = 0;
+ uint64_t branches = 0;
+ uint64_t branchesExec = 0;
+ uint64_t branchesTaken = 0;
+};
+
+struct LineInfo {
+ SmallVector<const GCOVBlock *, 1> blocks;
+ uint64_t count = 0;
+ bool exists = false;
+};
+
+struct SourceInfo {
+ StringRef filename;
+ SmallString<0> displayName;
+ std::vector<std::vector<const GCOVFunction *>> startLineToFunctions;
+ std::vector<LineInfo> lines;
+ bool ignored = false;
+ SourceInfo(StringRef filename) : filename(filename) {}
+};
+
+class Context {
+public:
+ Context(const GCOV::Options &Options) : options(Options) {}
+ void print(StringRef filename, StringRef gcno, StringRef gcda,
+ GCOVFile &file);
+
+private:
+ std::string getCoveragePath(StringRef filename, StringRef mainFilename) const;
+ void printFunctionDetails(const GCOVFunction &f, raw_ostream &os) const;
+ void printBranchInfo(const GCOVBlock &Block, uint32_t &edgeIdx,
+ raw_ostream &OS) const;
+ void printSummary(const Summary &summary, raw_ostream &os) const;
+
+ void collectFunction(GCOVFunction &f, Summary &summary);
+ void collectSourceLine(SourceInfo &si, Summary *summary, LineInfo &line,
+ size_t lineNum) const;
+ void collectSource(SourceInfo &si, Summary &summary) const;
+ void annotateSource(SourceInfo &si, const GCOVFile &file, StringRef gcno,
+ StringRef gcda, raw_ostream &os) const;
+ void printSourceToIntermediate(const SourceInfo &si, raw_ostream &os) const;
+
+ const GCOV::Options &options;
+ std::vector<SourceInfo> sources;
+};
+} // namespace
+
//===----------------------------------------------------------------------===//
// GCOVFile implementation.
@@ -56,13 +111,13 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) {
buf.getWord(); // hasUnexecutedBlocks
uint32_t tag, length;
- GCOVFunction *fn;
+ GCOVFunction *fn = nullptr;
while ((tag = buf.getWord())) {
if (!buf.readInt(length))
return false;
if (tag == GCOV_TAG_FUNCTION) {
- Functions.push_back(std::make_unique<GCOVFunction>(*this));
- fn = Functions.back().get();
+ functions.push_back(std::make_unique<GCOVFunction>(*this));
+ fn = functions.back().get();
fn->ident = buf.getWord();
fn->linenoChecksum = buf.getWord();
if (Version >= GCOV::V407)
@@ -90,41 +145,40 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) {
if (Version < GCOV::V800) {
for (uint32_t i = 0; i != length; ++i) {
buf.getWord(); // Ignored block flags
- fn->Blocks.push_back(std::make_unique<GCOVBlock>(*fn, i));
+ fn->blocks.push_back(std::make_unique<GCOVBlock>(i));
}
} else {
uint32_t num = buf.getWord();
for (uint32_t i = 0; i != num; ++i)
- fn->Blocks.push_back(std::make_unique<GCOVBlock>(*fn, i));
+ fn->blocks.push_back(std::make_unique<GCOVBlock>(i));
}
} else if (tag == GCOV_TAG_ARCS && fn) {
uint32_t srcNo = buf.getWord();
- if (srcNo >= fn->Blocks.size()) {
+ if (srcNo >= fn->blocks.size()) {
errs() << "unexpected block number: " << srcNo << " (in "
- << fn->Blocks.size() << ")\n";
+ << fn->blocks.size() << ")\n";
return false;
}
- GCOVBlock *src = fn->Blocks[srcNo].get();
+ GCOVBlock *src = fn->blocks[srcNo].get();
for (uint32_t i = 0, e = (length - 1) / 2; i != e; ++i) {
uint32_t dstNo = buf.getWord(), flags = buf.getWord();
- GCOVBlock *dst = fn->Blocks[dstNo].get();
- auto arc =
- std::make_unique<GCOVArc>(*src, *dst, flags & GCOV_ARC_FALLTHROUGH);
+ GCOVBlock *dst = fn->blocks[dstNo].get();
+ auto arc = std::make_unique<GCOVArc>(*src, *dst, flags);
src->addDstEdge(arc.get());
dst->addSrcEdge(arc.get());
- if (flags & GCOV_ARC_ON_TREE)
+ if (arc->onTree())
fn->treeArcs.push_back(std::move(arc));
else
fn->arcs.push_back(std::move(arc));
}
} else if (tag == GCOV_TAG_LINES && fn) {
uint32_t srcNo = buf.getWord();
- if (srcNo >= fn->Blocks.size()) {
+ if (srcNo >= fn->blocks.size()) {
errs() << "unexpected block number: " << srcNo << " (in "
- << fn->Blocks.size() << ")\n";
+ << fn->blocks.size() << ")\n";
return false;
}
- GCOVBlock &Block = *fn->Blocks[srcNo];
+ GCOVBlock &Block = *fn->blocks[srcNo];
for (;;) {
uint32_t line = buf.getWord();
if (line)
@@ -219,12 +273,24 @@ bool GCOVFile::readGCDA(GCOVBuffer &buf) {
return false;
}
for (std::unique_ptr<GCOVArc> &arc : fn->arcs) {
- if (!buf.readInt64(arc->Count))
+ if (!buf.readInt64(arc->count))
return false;
- // FIXME Fix counters
- arc->src.Counter += arc->Count;
- if (arc->dst.succ.empty())
- arc->dst.Counter += arc->Count;
+ arc->src.count += arc->count;
+ }
+
+ if (fn->blocks.size() >= 2) {
+ GCOVBlock &src = *fn->blocks[0];
+ GCOVBlock &sink =
+ Version < GCOV::V408 ? *fn->blocks.back() : *fn->blocks[1];
+ auto arc = std::make_unique<GCOVArc>(sink, src, GCOV_ARC_ON_TREE);
+ sink.addDstEdge(arc.get());
+ src.addSrcEdge(arc.get());
+ fn->treeArcs.push_back(std::move(arc));
+
+ for (GCOVBlock &block : fn->blocksRange())
+ fn->propagateCounts(block, nullptr);
+ for (size_t i = fn->treeArcs.size() - 1; i; --i)
+ fn->treeArcs[i - 1]->src.count += fn->treeArcs[i - 1]->count;
}
}
pos += 4 * length;
@@ -246,41 +312,71 @@ void GCOVFile::print(raw_ostream &OS) const {
LLVM_DUMP_METHOD void GCOVFile::dump() const { print(dbgs()); }
#endif
-/// collectLineCounts - Collect line counts. This must be used after
-/// reading .gcno and .gcda files.
-void GCOVFile::collectLineCounts(FileInfo &fi) {
- assert(fi.sources.empty());
- for (StringRef filename : filenames)
- fi.sources.emplace_back(filename);
- for (GCOVFunction &f : *this) {
- f.collectLineCounts(fi);
- fi.sources[f.srcIdx].functions.push_back(&f);
- }
- fi.setRunCount(RunCount);
- fi.setProgramCount(ProgramCount);
-}
+bool GCOVArc::onTree() const { return flags & GCOV_ARC_ON_TREE; }
//===----------------------------------------------------------------------===//
// GCOVFunction implementation.
+StringRef GCOVFunction::getName(bool demangle) const {
+ if (!demangle)
+ return Name;
+ if (demangled.empty()) {
+ do {
+ if (Name.startswith("_Z")) {
+ int status = 0;
+ // Name is guaranteed to be NUL-terminated.
+ char *res = itaniumDemangle(Name.data(), nullptr, nullptr, &status);
+ if (status == 0) {
+ demangled = res;
+ free(res);
+ break;
+ }
+ }
+ demangled = Name;
+ } while (0);
+ }
+ return demangled;
+}
StringRef GCOVFunction::getFilename() const { return file.filenames[srcIdx]; }
/// getEntryCount - Get the number of times the function was called by
/// retrieving the entry block's count.
uint64_t GCOVFunction::getEntryCount() const {
- return Blocks.front()->getCount();
+ return blocks.front()->getCount();
}
-/// getExitCount - Get the number of times the function returned by retrieving
-/// the exit block's count.
-uint64_t GCOVFunction::getExitCount() const {
- return Blocks.back()->getCount();
+GCOVBlock &GCOVFunction::getExitBlock() const {
+ return file.getVersion() < GCOV::V408 ? *blocks.back() : *blocks[1];
+}
+
+// For each basic block, the sum of incoming edge counts equals the sum of
+// outgoing edge counts by Kirchoff's circuit law. If the unmeasured arcs form a
+// spanning tree, the count for each unmeasured arc (GCOV_ARC_ON_TREE) can be
+// uniquely identified.
+uint64_t GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) {
+ // If GCOV_ARC_ON_TREE edges do form a tree, visited is not needed; otherwise
+ // this prevents infinite recursion.
+ if (!visited.insert(&v).second)
+ return 0;
+
+ uint64_t excess = 0;
+ for (GCOVArc *e : v.srcs())
+ if (e != pred)
+ excess += e->onTree() ? propagateCounts(e->src, e) : e->count;
+ for (GCOVArc *e : v.dsts())
+ if (e != pred)
+ excess -= e->onTree() ? propagateCounts(e->dst, e) : e->count;
+ if (int64_t(excess) < 0)
+ excess = -excess;
+ if (pred)
+ pred->count = excess;
+ return excess;
}
void GCOVFunction::print(raw_ostream &OS) const {
OS << "===== " << Name << " (" << ident << ") @ " << getFilename() << ":"
<< startLine << "\n";
- for (const auto &Block : Blocks)
+ for (const auto &Block : blocks)
Block->print(OS);
}
@@ -291,44 +387,30 @@ LLVM_DUMP_METHOD void GCOVFunction::dump() const { print(dbgs()); }
/// collectLineCounts - Collect line counts. This must be used after
/// reading .gcno and .gcda files.
-void GCOVFunction::collectLineCounts(FileInfo &FI) {
- // If the line number is zero, this is a function that doesn't actually appear
- // in the source file, so there isn't anything we can do with it.
- if (startLine == 0)
- return;
-
- for (const auto &Block : Blocks)
- Block->collectLineCounts(FI);
- FI.addFunctionLine(getFilename(), startLine, this);
-}
//===----------------------------------------------------------------------===//
// GCOVBlock implementation.
-/// collectLineCounts - Collect line counts. This must be used after
-/// reading .gcno and .gcda files.
-void GCOVBlock::collectLineCounts(FileInfo &FI) {
- for (uint32_t N : Lines)
- FI.addBlockLine(Parent.getFilename(), N, this);
-}
-
void GCOVBlock::print(raw_ostream &OS) const {
- OS << "Block : " << Number << " Counter : " << Counter << "\n";
+ OS << "Block : " << number << " Counter : " << count << "\n";
if (!pred.empty()) {
OS << "\tSource Edges : ";
for (const GCOVArc *Edge : pred)
- OS << Edge->src.Number << " (" << Edge->Count << "), ";
+ OS << Edge->src.number << " (" << Edge->count << "), ";
OS << "\n";
}
if (!succ.empty()) {
OS << "\tDestination Edges : ";
- for (const GCOVArc *Edge : succ)
- OS << Edge->dst.Number << " (" << Edge->Count << "), ";
+ for (const GCOVArc *Edge : succ) {
+ if (Edge->flags & GCOV_ARC_ON_TREE)
+ OS << '*';
+ OS << Edge->dst.number << " (" << Edge->count << "), ";
+ }
OS << "\n";
}
- if (!Lines.empty()) {
+ if (!lines.empty()) {
OS << "\tLines : ";
- for (uint32_t N : Lines)
+ for (uint32_t N : lines)
OS << (N) << ",";
OS << "\n";
}
@@ -339,139 +421,96 @@ void GCOVBlock::print(raw_ostream &OS) const {
LLVM_DUMP_METHOD void GCOVBlock::dump() const { print(dbgs()); }
#endif
-//===----------------------------------------------------------------------===//
-// Cycles detection
-//
-// The algorithm in GCC is based on the algorithm by Hawick & James:
-// "Enumerating Circuits and Loops in Graphs with Self-Arcs and Multiple-Arcs"
-// http://complexity.massey.ac.nz/cstn/013/cstn-013.pdf.
-
-/// Get the count for the detected cycle.
-uint64_t GCOVBlock::getCycleCount(const Edges &Path) {
- uint64_t CycleCount = std::numeric_limits<uint64_t>::max();
- for (auto E : Path) {
- CycleCount = std::min(E->CyclesCount, CycleCount);
- }
- for (auto E : Path) {
- E->CyclesCount -= CycleCount;
- }
- return CycleCount;
-}
-
-/// Unblock a vertex previously marked as blocked.
-void GCOVBlock::unblock(const GCOVBlock *U, BlockVector &Blocked,
- BlockVectorLists &BlockLists) {
- auto it = find(Blocked, U);
- if (it == Blocked.end()) {
- return;
- }
-
- const size_t index = it - Blocked.begin();
- Blocked.erase(it);
-
- const BlockVector ToUnblock(BlockLists[index]);
- BlockLists.erase(BlockLists.begin() + index);
- for (auto GB : ToUnblock) {
- GCOVBlock::unblock(GB, Blocked, BlockLists);
- }
-}
-
-bool GCOVBlock::lookForCircuit(const GCOVBlock *V, const GCOVBlock *Start,
- Edges &Path, BlockVector &Blocked,
- BlockVectorLists &BlockLists,
- const BlockVector &Blocks, uint64_t &Count) {
- Blocked.push_back(V);
- BlockLists.emplace_back(BlockVector());
- bool FoundCircuit = false;
-
- for (auto E : V->dsts()) {
- const GCOVBlock *W = &E->dst;
- if (W < Start || find(Blocks, W) == Blocks.end()) {
+uint64_t
+GCOVBlock::augmentOneCycle(GCOVBlock *src,
+ std::vector<std::pair<GCOVBlock *, size_t>> &stack) {
+ GCOVBlock *u;
+ size_t i;
+ stack.clear();
+ stack.emplace_back(src, 0);
+ src->incoming = (GCOVArc *)1; // Mark u available for cycle detection
+ for (;;) {
+ std::tie(u, i) = stack.back();
+ if (i == u->succ.size()) {
+ u->traversable = false;
+ stack.pop_back();
+ if (stack.empty())
+ break;
continue;
}
-
- Path.push_back(E);
-
- if (W == Start) {
- // We've a cycle.
- Count += GCOVBlock::getCycleCount(Path);
- FoundCircuit = true;
- } else if (find(Blocked, W) == Blocked.end() && // W is not blocked.
- GCOVBlock::lookForCircuit(W, Start, Path, Blocked, BlockLists,
- Blocks, Count)) {
- FoundCircuit = true;
+ ++stack.back().second;
+ GCOVArc *succ = u->succ[i];
+ // Ignore saturated arcs (cycleCount has been reduced to 0) and visited
+ // blocks. Ignore self arcs to guard against bad input (.gcno has no
+ // self arcs).
+ if (succ->cycleCount == 0 || !succ->dst.traversable || &succ->dst == u)
+ continue;
+ if (succ->dst.incoming == nullptr) {
+ succ->dst.incoming = succ;
+ stack.emplace_back(&succ->dst, 0);
+ continue;
}
-
- Path.pop_back();
- }
-
- if (FoundCircuit) {
- GCOVBlock::unblock(V, Blocked, BlockLists);
- } else {
- for (auto E : V->dsts()) {
- const GCOVBlock *W = &E->dst;
- if (W < Start || find(Blocks, W) == Blocks.end()) {
- continue;
- }
- const size_t index = find(Blocked, W) - Blocked.begin();
- BlockVector &List = BlockLists[index];
- if (find(List, V) == List.end()) {
- List.push_back(V);
- }
+ uint64_t minCount = succ->cycleCount;
+ for (GCOVBlock *v = u;;) {
+ minCount = std::min(minCount, v->incoming->cycleCount);
+ v = &v->incoming->src;
+ if (v == &succ->dst)
+ break;
}
+ succ->cycleCount -= minCount;
+ for (GCOVBlock *v = u;;) {
+ v->incoming->cycleCount -= minCount;
+ v = &v->incoming->src;
+ if (v == &succ->dst)
+ break;
+ }
+ return minCount;
}
-
- return FoundCircuit;
-}
-
-/// Get the count for the list of blocks which lie on the same line.
-void GCOVBlock::getCyclesCount(const BlockVector &Blocks, uint64_t &Count) {
- for (auto Block : Blocks) {
- Edges Path;
- BlockVector Blocked;
- BlockVectorLists BlockLists;
-
- GCOVBlock::lookForCircuit(Block, Block, Path, Blocked, BlockLists, Blocks,
- Count);
- }
+ return 0;
}
-/// Get the count for the list of blocks which lie on the same line.
-uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) {
- uint64_t Count = 0;
-
- for (auto Block : Blocks) {
- if (Block->getNumSrcEdges() == 0) {
- // The block has no predecessors and a non-null counter
- // (can be the case with entry block in functions).
- Count += Block->getCount();
- } else {
- // Add counts from predecessors that are not on the same line.
- for (auto E : Block->srcs()) {
- const GCOVBlock *W = &E->src;
- if (find(Blocks, W) == Blocks.end()) {
- Count += E->Count;
- }
- }
+// Get the total execution count of loops among blocks on the same line.
+// Assuming a reducible flow graph, the count is the sum of back edge counts.
+// Identifying loops is complex, so we simply find cycles and perform cycle
+// cancelling iteratively.
+uint64_t GCOVBlock::getCyclesCount(const BlockVector &blocks) {
+ std::vector<std::pair<GCOVBlock *, size_t>> stack;
+ uint64_t count = 0, d;
+ for (;;) {
+ // Make blocks on the line traversable and try finding a cycle.
+ for (auto b : blocks) {
+ const_cast<GCOVBlock *>(b)->traversable = true;
+ const_cast<GCOVBlock *>(b)->incoming = nullptr;
}
- for (auto E : Block->dsts()) {
- E->CyclesCount = E->Count;
+ d = 0;
+ for (auto block : blocks) {
+ auto *b = const_cast<GCOVBlock *>(block);
+ if (b->traversable && (d = augmentOneCycle(b, stack)) > 0)
+ break;
}
+ if (d == 0)
+ break;
+ count += d;
}
-
- GCOVBlock::getCyclesCount(Blocks, Count);
-
- return Count;
+ // If there is no more loop, all traversable bits should have been cleared.
+ // This property is needed by subsequent calls.
+ for (auto b : blocks) {
+ assert(!b->traversable);
+ (void)b;
+ }
+ return count;
}
//===----------------------------------------------------------------------===//
// FileInfo implementation.
-// Safe integer division, returns 0 if numerator is 0.
-static uint32_t safeDiv(uint64_t Numerator, uint64_t Divisor) {
- if (!Numerator)
+// Format dividend/divisor as a percentage. Return 1 if the result is greater
+// than 0% and less than 1%.
+static uint32_t formatPercentage(uint64_t dividend, uint64_t divisor) {
+ if (!dividend || !divisor)
return 0;
- return Numerator / Divisor;
+ dividend *= 100;
+ return dividend < divisor ? 1 : dividend / divisor;
}
// This custom division function mimics gcov's branch ouputs:
@@ -522,8 +561,11 @@ class LineConsumer {
public:
LineConsumer() = default;
LineConsumer(StringRef Filename) {
+ // Open source files without requiring a NUL terminator. The concurrent
+ // modification may nullify the NUL terminator condition.
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
- MemoryBuffer::getFileOrSTDIN(Filename);
+ MemoryBuffer::getFileOrSTDIN(Filename, -1,
+ /*RequiresNullTerminator=*/false);
if (std::error_code EC = BufferOrErr.getError()) {
errs() << Filename << ": " << EC.message() << "\n";
Remaining = "";
@@ -579,23 +621,23 @@ static std::string mangleCoveragePath(StringRef Filename, bool PreservePaths) {
return std::string(Result.str());
}
-std::string FileInfo::getCoveragePath(StringRef Filename,
- StringRef MainFilename) {
- if (Options.NoOutput)
+std::string Context::getCoveragePath(StringRef filename,
+ StringRef mainFilename) const {
+ if (options.NoOutput)
// This is probably a bug in gcov, but when -n is specified, paths aren't
// mangled at all, and the -l and -p options are ignored. Here, we do the
// same.
- return std::string(Filename);
+ return std::string(filename);
std::string CoveragePath;
- if (Options.LongFileNames && !Filename.equals(MainFilename))
+ if (options.LongFileNames && !filename.equals(mainFilename))
CoveragePath =
- mangleCoveragePath(MainFilename, Options.PreservePaths) + "##";
- CoveragePath += mangleCoveragePath(Filename, Options.PreservePaths);
- if (Options.HashFilenames) {
+ mangleCoveragePath(mainFilename, options.PreservePaths) + "##";
+ CoveragePath += mangleCoveragePath(filename, options.PreservePaths);
+ if (options.HashFilenames) {
MD5 Hasher;
MD5::MD5Result Result;
- Hasher.update(Filename.str());
+ Hasher.update(filename.str());
Hasher.final(Result);
CoveragePath += "##" + std::string(Result.digest());
}
@@ -603,292 +645,302 @@ std::string FileInfo::getCoveragePath(StringRef Filename,
return CoveragePath;
}
-std::unique_ptr<raw_ostream>
-FileInfo::openCoveragePath(StringRef CoveragePath) {
- std::error_code EC;
- auto OS =
- std::make_unique<raw_fd_ostream>(CoveragePath, EC, sys::fs::OF_Text);
- if (EC) {
- errs() << EC.message() << "\n";
- return std::make_unique<raw_null_ostream>();
+void Context::collectFunction(GCOVFunction &f, Summary &summary) {
+ SourceInfo &si = sources[f.srcIdx];
+ if (f.startLine >= si.startLineToFunctions.size())
+ si.startLineToFunctions.resize(f.startLine + 1);
+ si.startLineToFunctions[f.startLine].push_back(&f);
+ for (const GCOVBlock &b : f.blocksRange()) {
+ if (b.lines.empty())
+ continue;
+ uint32_t maxLineNum = *std::max_element(b.lines.begin(), b.lines.end());
+ if (maxLineNum >= si.lines.size())
+ si.lines.resize(maxLineNum + 1);
+ for (uint32_t lineNum : b.lines) {
+ LineInfo &line = si.lines[lineNum];
+ if (!line.exists)
+ ++summary.lines;
+ if (line.count == 0 && b.count)
+ ++summary.linesExec;
+ line.exists = true;
+ line.count += b.count;
+ line.blocks.push_back(&b);
+ }
}
- return std::move(OS);
}
-/// print - Print source files with collected line count information.
-void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
- StringRef GCNOFile, StringRef GCDAFile, GCOVFile &file) {
- SmallVector<StringRef, 4> Filenames;
- for (const auto &LI : LineInfo)
- Filenames.push_back(LI.first());
- llvm::sort(Filenames);
-
- for (StringRef Filename : Filenames) {
- auto AllLines =
- Options.Intermediate ? LineConsumer() : LineConsumer(Filename);
- std::string CoveragePath = getCoveragePath(Filename, MainFilename);
- std::unique_ptr<raw_ostream> CovStream;
- if (Options.NoOutput || Options.Intermediate)
- CovStream = std::make_unique<raw_null_ostream>();
- else if (!Options.UseStdout)
- CovStream = openCoveragePath(CoveragePath);
- raw_ostream &CovOS =
- !Options.NoOutput && Options.UseStdout ? llvm::outs() : *CovStream;
-
- CovOS << " -: 0:Source:" << Filename << "\n";
- CovOS << " -: 0:Graph:" << GCNOFile << "\n";
- CovOS << " -: 0:Data:" << GCDAFile << "\n";
- CovOS << " -: 0:Runs:" << RunCount << "\n";
- if (file.getVersion() < GCOV::V900)
- CovOS << " -: 0:Programs:" << ProgramCount << "\n";
-
- const LineData &Line = LineInfo[Filename];
- GCOVCoverage FileCoverage(Filename);
- for (uint32_t LineIndex = 0; LineIndex < Line.LastLine || !AllLines.empty();
- ++LineIndex) {
- if (Options.BranchInfo) {
- FunctionLines::const_iterator FuncsIt = Line.Functions.find(LineIndex);
- if (FuncsIt != Line.Functions.end())
- printFunctionSummary(CovOS, FuncsIt->second);
- }
+void Context::collectSourceLine(SourceInfo &si, Summary *summary,
+ LineInfo &line, size_t lineNum) const {
+ uint64_t count = 0;
+ for (const GCOVBlock *b : line.blocks) {
+ if (b->number == 0) {
+ // For nonstandard control flows, arcs into the exit block may be
+ // duplicately counted (fork) or not be counted (abnormal exit), and thus
+ // the (exit,entry) counter may be inaccurate. Count the entry block with
+ // the outgoing arcs.
+ for (const GCOVArc *arc : b->succ)
+ count += arc->count;
+ } else {
+ // Add counts from predecessors that are not on the same line.
+ for (const GCOVArc *arc : b->pred)
+ if (!llvm::is_contained(line.blocks, &arc->src))
+ count += arc->count;
+ }
+ for (GCOVArc *arc : b->succ)
+ arc->cycleCount = arc->count;
+ }
- BlockLines::const_iterator BlocksIt = Line.Blocks.find(LineIndex);
- if (BlocksIt == Line.Blocks.end()) {
- // No basic blocks are on this line. Not an executable line of code.
- CovOS << " -:";
- AllLines.printNext(CovOS, LineIndex + 1);
- } else {
- const BlockVector &Blocks = BlocksIt->second;
-
- // Add up the block counts to form line counts.
- DenseMap<const GCOVFunction *, bool> LineExecs;
- for (const GCOVBlock *Block : Blocks) {
- if (Options.FuncCoverage) {
- // This is a slightly convoluted way to most accurately gather line
- // statistics for functions. Basically what is happening is that we
- // don't want to count a single line with multiple blocks more than
- // once. However, we also don't simply want to give the total line
- // count to every function that starts on the line. Thus, what is
- // happening here are two things:
- // 1) Ensure that the number of logical lines is only incremented
- // once per function.
- // 2) If there are multiple blocks on the same line, ensure that the
- // number of lines executed is incremented as long as at least
- // one of the blocks are executed.
- const GCOVFunction *Function = &Block->getParent();
- if (FuncCoverages.find(Function) == FuncCoverages.end()) {
- std::pair<const GCOVFunction *, GCOVCoverage> KeyValue(
- Function, GCOVCoverage(Function->getName()));
- FuncCoverages.insert(KeyValue);
- }
- GCOVCoverage &FuncCoverage = FuncCoverages.find(Function)->second;
-
- if (LineExecs.find(Function) == LineExecs.end()) {
- if (Block->getCount()) {
- ++FuncCoverage.LinesExec;
- LineExecs[Function] = true;
- } else {
- LineExecs[Function] = false;
- }
- ++FuncCoverage.LogicalLines;
- } else if (!LineExecs[Function] && Block->getCount()) {
- ++FuncCoverage.LinesExec;
- LineExecs[Function] = true;
- }
- }
- }
+ count += GCOVBlock::getCyclesCount(line.blocks);
+ line.count = count;
+ if (line.exists) {
+ ++summary->lines;
+ if (line.count != 0)
+ ++summary->linesExec;
+ }
- const uint64_t LineCount = GCOVBlock::getLineCount(Blocks);
- if (LineCount == 0)
- CovOS << " #####:";
- else {
- CovOS << format("%9" PRIu64 ":", LineCount);
- ++FileCoverage.LinesExec;
- }
- ++FileCoverage.LogicalLines;
-
- AllLines.printNext(CovOS, LineIndex + 1);
-
- uint32_t BlockNo = 0;
- uint32_t EdgeNo = 0;
- for (const GCOVBlock *Block : Blocks) {
- // Only print block and branch information at the end of the block.
- if (Block->getLastLine() != LineIndex + 1)
- continue;
- if (Options.AllBlocks)
- printBlockInfo(CovOS, *Block, LineIndex, BlockNo);
- if (Options.BranchInfo) {
- size_t NumEdges = Block->getNumDstEdges();
- if (NumEdges > 1)
- printBranchInfo(CovOS, *Block, FileCoverage, EdgeNo);
- else if (Options.UncondBranch && NumEdges == 1)
- printUncondBranchInfo(CovOS, EdgeNo, Block->succ[0]->Count);
- }
- }
+ if (options.BranchInfo)
+ for (const GCOVBlock *b : line.blocks) {
+ if (b->getLastLine() != lineNum)
+ continue;
+ int branches = 0, execBranches = 0, takenBranches = 0;
+ for (const GCOVArc *arc : b->succ) {
+ ++branches;
+ if (count != 0)
+ ++execBranches;
+ if (arc->count != 0)
+ ++takenBranches;
+ }
+ if (branches > 1) {
+ summary->branches += branches;
+ summary->branchesExec += execBranches;
+ summary->branchesTaken += takenBranches;
}
}
- SourceInfo &source = sources[file.filenameToIdx.find(Filename)->second];
- source.name = CoveragePath;
- source.coverage = FileCoverage;
+}
+
+void Context::collectSource(SourceInfo &si, Summary &summary) const {
+ size_t lineNum = 0;
+ for (LineInfo &line : si.lines) {
+ collectSourceLine(si, &summary, line, lineNum);
+ ++lineNum;
}
+}
- if (Options.Intermediate && !Options.NoOutput) {
- // gcov 7.* unexpectedly create multiple .gcov files, which was fixed in 8.0
- // (PR GCC/82702). We create just one file.
- std::string outputPath(sys::path::filename(MainFilename));
- std::error_code ec;
- raw_fd_ostream os(outputPath + ".gcov", ec, sys::fs::OF_Text);
- if (ec) {
- errs() << ec.message() << "\n";
- return;
+void Context::annotateSource(SourceInfo &si, const GCOVFile &file,
+ StringRef gcno, StringRef gcda,
+ raw_ostream &os) const {
+ auto source =
+ options.Intermediate ? LineConsumer() : LineConsumer(si.filename);
+
+ os << " -: 0:Source:" << si.displayName << '\n';
+ os << " -: 0:Graph:" << gcno << '\n';
+ os << " -: 0:Data:" << gcda << '\n';
+ os << " -: 0:Runs:" << file.RunCount << '\n';
+ if (file.Version < GCOV::V900)
+ os << " -: 0:Programs:" << file.ProgramCount << '\n';
+
+ for (size_t lineNum = 1; !source.empty(); ++lineNum) {
+ if (lineNum >= si.lines.size()) {
+ os << " -:";
+ source.printNext(os, lineNum);
+ continue;
}
- for (const SourceInfo &source : sources) {
- os << "file:" << source.filename << '\n';
- for (const GCOVFunction *f : source.functions)
- os << "function:" << f->startLine << ',' << f->getEntryCount() << ','
- << f->Name << '\n';
- const LineData &line = LineInfo[source.filename];
- for (uint32_t lineNum = 0; lineNum != line.LastLine; ++lineNum) {
- BlockLines::const_iterator BlocksIt = line.Blocks.find(lineNum);
- if (BlocksIt == line.Blocks.end())
- continue;
- const BlockVector &blocks = BlocksIt->second;
- // GCC 8 (r254259) added third third field for Ada:
- // lcount:<line>,<count>,<has_unexecuted_blocks>
- // We don't need the third field.
- os << "lcount:" << (lineNum + 1) << ','
- << GCOVBlock::getLineCount(blocks) << '\n';
-
- if (!Options.BranchInfo)
- continue;
- for (const GCOVBlock *block : blocks) {
- if (block->getLastLine() != lineNum + 1 ||
- block->getNumDstEdges() < 2)
- continue;
- for (const GCOVArc *arc : block->dsts()) {
- const char *type = block->getCount()
- ? arc->Count ? "taken" : "nottaken"
- : "notexec";
- os << "branch:" << (lineNum + 1) << ',' << type << '\n';
- }
+ const LineInfo &line = si.lines[lineNum];
+ if (options.BranchInfo && lineNum < si.startLineToFunctions.size())
+ for (const auto *f : si.startLineToFunctions[lineNum])
+ printFunctionDetails(*f, os);
+ if (!line.exists)
+ os << " -:";
+ else if (line.count == 0)
+ os << " #####:";
+ else
+ os << format("%9" PRIu64 ":", line.count);
+ source.printNext(os, lineNum);
+
+ uint32_t blockIdx = 0, edgeIdx = 0;
+ for (const GCOVBlock *b : line.blocks) {
+ if (b->getLastLine() != lineNum)
+ continue;
+ if (options.AllBlocks) {
+ if (b->getCount() == 0)
+ os << " $$$$$:";
+ else
+ os << format("%9" PRIu64 ":", b->count);
+ os << format("%5u-block %2u\n", lineNum, blockIdx++);
+ }
+ if (options.BranchInfo) {
+ size_t NumEdges = b->succ.size();
+ if (NumEdges > 1)
+ printBranchInfo(*b, edgeIdx, os);
+ else if (options.UncondBranch && NumEdges == 1) {
+ uint64_t count = b->succ[0]->count;
+ os << format("unconditional %2u ", edgeIdx++)
+ << formatBranchInfo(options, count, count) << '\n';
}
}
}
}
+}
+
+void Context::printSourceToIntermediate(const SourceInfo &si,
+ raw_ostream &os) const {
+ os << "file:" << si.filename << '\n';
+ for (const auto &fs : si.startLineToFunctions)
+ for (const GCOVFunction *f : fs)
+ os << "function:" << f->startLine << ',' << f->getEntryCount() << ','
+ << f->getName(options.Demangle) << '\n';
+ for (size_t lineNum = 1, size = si.lines.size(); lineNum < size; ++lineNum) {
+ const LineInfo &line = si.lines[lineNum];
+ if (line.blocks.empty())
+ continue;
+ // GCC 8 (r254259) added third third field for Ada:
+ // lcount:<line>,<count>,<has_unexecuted_blocks>
+ // We don't need the third field.
+ os << "lcount:" << lineNum << ',' << line.count << '\n';
- if (!Options.UseStdout) {
- // FIXME: There is no way to detect calls given current instrumentation.
- if (Options.FuncCoverage)
- printFuncCoverage(InfoOS);
- printFileCoverage(InfoOS);
+ if (!options.BranchInfo)
+ continue;
+ for (const GCOVBlock *b : line.blocks) {
+ if (b->succ.size() < 2 || b->getLastLine() != lineNum)
+ continue;
+ for (const GCOVArc *arc : b->succ) {
+ const char *type =
+ b->getCount() ? arc->count ? "taken" : "nottaken" : "notexec";
+ os << "branch:" << lineNum << ',' << type << '\n';
+ }
+ }
}
}
-/// printFunctionSummary - Print function and block summary.
-void FileInfo::printFunctionSummary(raw_ostream &OS,
- const FunctionVector &Funcs) const {
- for (const GCOVFunction *Func : Funcs) {
- uint64_t EntryCount = Func->getEntryCount();
- uint32_t BlocksExec = 0;
- for (const GCOVBlock &Block : Func->blocks())
- if (Block.getNumDstEdges() && Block.getCount())
- ++BlocksExec;
-
- OS << "function " << Func->getName() << " called " << EntryCount
- << " returned " << safeDiv(Func->getExitCount() * 100, EntryCount)
- << "% blocks executed "
- << safeDiv(BlocksExec * 100, Func->getNumBlocks() - 1) << "%\n";
+void Context::print(StringRef filename, StringRef gcno, StringRef gcda,
+ GCOVFile &file) {
+ for (StringRef filename : file.filenames) {
+ sources.emplace_back(filename);
+ SourceInfo &si = sources.back();
+ si.displayName = si.filename;
+ if (!options.SourcePrefix.empty() &&
+ sys::path::replace_path_prefix(si.displayName, options.SourcePrefix,
+ "") &&
+ !si.displayName.empty()) {
+ // TODO replace_path_prefix may strip the prefix even if the remaining
+ // part does not start with a separator.
+ if (sys::path::is_separator(si.displayName[0]))
+ si.displayName.erase(si.displayName.begin());
+ else
+ si.displayName = si.filename;
+ }
+ if (options.RelativeOnly && sys::path::is_absolute(si.displayName))
+ si.ignored = true;
}
-}
-/// printBlockInfo - Output counts for each block.
-void FileInfo::printBlockInfo(raw_ostream &OS, const GCOVBlock &Block,
- uint32_t LineIndex, uint32_t &BlockNo) const {
- if (Block.getCount() == 0)
- OS << " $$$$$:";
- else
- OS << format("%9" PRIu64 ":", Block.getCount());
- OS << format("%5u-block %2u\n", LineIndex + 1, BlockNo++);
-}
+ raw_ostream &os = llvm::outs();
+ for (GCOVFunction &f : make_pointee_range(file.functions)) {
+ Summary summary(f.getName(options.Demangle));
+ collectFunction(f, summary);
+ if (options.FuncCoverage && !options.UseStdout) {
+ os << "Function '" << summary.Name << "'\n";
+ printSummary(summary, os);
+ os << '\n';
+ }
+ }
-/// printBranchInfo - Print conditional branch probabilities.
-void FileInfo::printBranchInfo(raw_ostream &OS, const GCOVBlock &Block,
- GCOVCoverage &Coverage, uint32_t &EdgeNo) {
- SmallVector<uint64_t, 16> BranchCounts;
- uint64_t TotalCounts = 0;
- for (const GCOVArc *Edge : Block.dsts()) {
- BranchCounts.push_back(Edge->Count);
- TotalCounts += Edge->Count;
- if (Block.getCount())
- ++Coverage.BranchesExec;
- if (Edge->Count)
- ++Coverage.BranchesTaken;
- ++Coverage.Branches;
-
- if (Options.FuncCoverage) {
- const GCOVFunction *Function = &Block.getParent();
- GCOVCoverage &FuncCoverage = FuncCoverages.find(Function)->second;
- if (Block.getCount())
- ++FuncCoverage.BranchesExec;
- if (Edge->Count)
- ++FuncCoverage.BranchesTaken;
- ++FuncCoverage.Branches;
+ for (SourceInfo &si : sources) {
+ if (si.ignored)
+ continue;
+ Summary summary(si.displayName);
+ collectSource(si, summary);
+
+ // Print file summary unless -t is specified.
+ std::string gcovName = getCoveragePath(si.filename, filename);
+ if (!options.UseStdout) {
+ os << "File '" << summary.Name << "'\n";
+ printSummary(summary, os);
+ if (!options.NoOutput && !options.Intermediate)
+ os << "Creating '" << gcovName << "'\n";
+ os << '\n';
+ }
+
+ if (options.NoOutput || options.Intermediate)
+ continue;
+ Optional<raw_fd_ostream> os;
+ if (!options.UseStdout) {
+ std::error_code ec;
+ os.emplace(gcovName, ec, sys::fs::OF_Text);
+ if (ec) {
+ errs() << ec.message() << '\n';
+ continue;
+ }
+ }
+ annotateSource(si, file, gcno, gcda,
+ options.UseStdout ? llvm::outs() : *os);
+ }
+
+ if (options.Intermediate && !options.NoOutput) {
+ // gcov 7.* unexpectedly create multiple .gcov files, which was fixed in 8.0
+ // (PR GCC/82702). We create just one file.
+ std::string outputPath(sys::path::filename(filename));
+ std::error_code ec;
+ raw_fd_ostream os(outputPath + ".gcov", ec, sys::fs::OF_Text);
+ if (ec) {
+ errs() << ec.message() << '\n';
+ return;
}
+
+ for (const SourceInfo &si : sources)
+ printSourceToIntermediate(si, os);
}
+}
- for (uint64_t N : BranchCounts)
- OS << format("branch %2u ", EdgeNo++)
- << formatBranchInfo(Options, N, TotalCounts) << "\n";
+void Context::printFunctionDetails(const GCOVFunction &f,
+ raw_ostream &os) const {
+ const uint64_t entryCount = f.getEntryCount();
+ uint32_t blocksExec = 0;
+ const GCOVBlock &exitBlock = f.getExitBlock();
+ uint64_t exitCount = 0;
+ for (const GCOVArc *arc : exitBlock.pred)
+ exitCount += arc->count;
+ for (const GCOVBlock &b : f.blocksRange())
+ if (b.number != 0 && &b != &exitBlock && b.getCount())
+ ++blocksExec;
+
+ os << "function " << f.getName(options.Demangle) << " called " << entryCount
+ << " returned " << formatPercentage(exitCount, entryCount)
+ << "% blocks executed "
+ << formatPercentage(blocksExec, f.blocks.size() - 2) << "%\n";
}
-/// printUncondBranchInfo - Print unconditional branch probabilities.
-void FileInfo::printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo,
- uint64_t Count) const {
- OS << format("unconditional %2u ", EdgeNo++)
- << formatBranchInfo(Options, Count, Count) << "\n";
+/// printBranchInfo - Print conditional branch probabilities.
+void Context::printBranchInfo(const GCOVBlock &Block, uint32_t &edgeIdx,
+ raw_ostream &os) const {
+ uint64_t total = 0;
+ for (const GCOVArc *arc : Block.dsts())
+ total += arc->count;
+ for (const GCOVArc *arc : Block.dsts())
+ os << format("branch %2u ", edgeIdx++)
+ << formatBranchInfo(options, arc->count, total) << '\n';
}
-// printCoverage - Print generic coverage info used by both printFuncCoverage
-// and printFileCoverage.
-void FileInfo::printCoverage(raw_ostream &OS,
- const GCOVCoverage &Coverage) const {
- OS << format("Lines executed:%.2f%% of %u\n",
- double(Coverage.LinesExec) * 100 / Coverage.LogicalLines,
- Coverage.LogicalLines);
- if (Options.BranchInfo) {
- if (Coverage.Branches) {
- OS << format("Branches executed:%.2f%% of %u\n",
- double(Coverage.BranchesExec) * 100 / Coverage.Branches,
- Coverage.Branches);
- OS << format("Taken at least once:%.2f%% of %u\n",
- double(Coverage.BranchesTaken) * 100 / Coverage.Branches,
- Coverage.Branches);
+void Context::printSummary(const Summary &summary, raw_ostream &os) const {
+ os << format("Lines executed:%.2f%% of %" PRIu64 "\n",
+ double(summary.linesExec) * 100 / summary.lines, summary.lines);
+ if (options.BranchInfo) {
+ if (summary.branches == 0) {
+ os << "No branches\n";
} else {
- OS << "No branches\n";
+ os << format("Branches executed:%.2f%% of %" PRIu64 "\n",
+ double(summary.branchesExec) * 100 / summary.branches,
+ summary.branches);
+ os << format("Taken at least once:%.2f%% of %" PRIu64 "\n",
+ double(summary.branchesTaken) * 100 / summary.branches,
+ summary.branches);
}
- OS << "No calls\n"; // to be consistent with gcov
- }
-}
-
-// printFuncCoverage - Print per-function coverage info.
-void FileInfo::printFuncCoverage(raw_ostream &OS) const {
- for (const auto &FC : FuncCoverages) {
- const GCOVCoverage &Coverage = FC.second;
- OS << "Function '" << Coverage.Name << "'\n";
- printCoverage(OS, Coverage);
- OS << "\n";
+ os << "No calls\n";
}
}
-// printFileCoverage - Print per-file coverage info.
-void FileInfo::printFileCoverage(raw_ostream &OS) const {
- for (const SourceInfo &source : sources) {
- const GCOVCoverage &Coverage = source.coverage;
- OS << "File '" << Coverage.Name << "'\n";
- printCoverage(OS, Coverage);
- if (!Options.NoOutput && !Options.Intermediate)
- OS << "Creating '" << source.name << "'\n";
- OS << "\n";
- }
+void llvm::gcovOneInput(const GCOV::Options &options, StringRef filename,
+ StringRef gcno, StringRef gcda, GCOVFile &file) {
+ Context fi(options);
+ fi.print(filename, gcno, gcda, file);
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
index b9d8ae9ba60d..4a0cee67089d 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
@@ -625,11 +625,11 @@ void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
}
}
-void InstrProfValueSiteRecord::scale(uint64_t Weight,
+void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) {
bool Overflowed;
- I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed);
+ I->Count = SaturatingMultiply(I->Count, N, &Overflowed) / D;
if (Overflowed)
Warn(instrprof_error::counter_overflow);
}
@@ -678,22 +678,23 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight,
}
void InstrProfRecord::scaleValueProfData(
- uint32_t ValueKind, uint64_t Weight,
+ uint32_t ValueKind, uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
for (auto &R : getValueSitesForKind(ValueKind))
- R.scale(Weight, Warn);
+ R.scale(N, D, Warn);
}
-void InstrProfRecord::scale(uint64_t Weight,
+void InstrProfRecord::scale(uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
+ assert(D != 0 && "D cannot be 0");
for (auto &Count : this->Counts) {
bool Overflowed;
- Count = SaturatingMultiply(Count, Weight, &Overflowed);
+ Count = SaturatingMultiply(Count, N, &Overflowed) / D;
if (Overflowed)
Warn(instrprof_error::counter_overflow);
}
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
- scaleValueProfData(Kind, Weight, Warn);
+ scaleValueProfData(Kind, N, D, Warn);
}
// Map indirect call target name hash to name string.
@@ -1111,35 +1112,17 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
return true;
}
-// Parse the value profile options.
-void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart,
- int64_t &RangeLast) {
- static const int64_t DefaultMemOPSizeRangeStart = 0;
- static const int64_t DefaultMemOPSizeRangeLast = 8;
- RangeStart = DefaultMemOPSizeRangeStart;
- RangeLast = DefaultMemOPSizeRangeLast;
-
- if (!MemOPSizeRange.empty()) {
- auto Pos = MemOPSizeRange.find(':');
- if (Pos != std::string::npos) {
- if (Pos > 0)
- MemOPSizeRange.substr(0, Pos).getAsInteger(10, RangeStart);
- if (Pos < MemOPSizeRange.size() - 1)
- MemOPSizeRange.substr(Pos + 1).getAsInteger(10, RangeLast);
- } else
- MemOPSizeRange.getAsInteger(10, RangeLast);
- }
- assert(RangeLast >= RangeStart);
-}
-
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
// aware this is an ir_level profile so it can set the version flag.
-void createIRLevelProfileFlagVar(Module &M, bool IsCS) {
+void createIRLevelProfileFlagVar(Module &M, bool IsCS,
+ bool InstrEntryBBEnabled) {
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
Type *IntTy64 = Type::getInt64Ty(M.getContext());
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
if (IsCS)
ProfileVersion |= VARIANT_MASK_CSIR_PROF;
+ if (InstrEntryBBEnabled)
+ ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
auto IRLevelVersionVariable = new GlobalVariable(
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
index 16a69cb5457b..9581e5b486a6 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -154,23 +154,29 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
Error TextInstrProfReader::readHeader() {
Symtab.reset(new InstrProfSymtab());
bool IsIRInstr = false;
- if (!Line->startswith(":")) {
- IsIRLevelProfile = false;
- return success();
+ bool IsEntryFirst = false;
+ bool IsCS = false;
+
+ while (Line->startswith(":")) {
+ StringRef Str = Line->substr(1);
+ if (Str.equals_lower("ir"))
+ IsIRInstr = true;
+ else if (Str.equals_lower("fe"))
+ IsIRInstr = false;
+ else if (Str.equals_lower("csir")) {
+ IsIRInstr = true;
+ IsCS = true;
+ } else if (Str.equals_lower("entry_first"))
+ IsEntryFirst = true;
+ else if (Str.equals_lower("not_entry_first"))
+ IsEntryFirst = false;
+ else
+ return error(instrprof_error::bad_header);
+ ++Line;
}
- StringRef Str = (Line)->substr(1);
- if (Str.equals_lower("ir"))
- IsIRInstr = true;
- else if (Str.equals_lower("fe"))
- IsIRInstr = false;
- else if (Str.equals_lower("csir")) {
- IsIRInstr = true;
- HasCSIRLevelProfile = true;
- } else
- return error(instrprof_error::bad_header);
-
- ++Line;
IsIRLevelProfile = IsIRInstr;
+ InstrEntryBBEnabled = IsEntryFirst;
+ HasCSIRLevelProfile = IsCS;
return success();
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp
index ccb270e0b719..d07668322354 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -165,8 +165,9 @@ public:
} // end namespace llvm
-InstrProfWriter::InstrProfWriter(bool Sparse)
- : Sparse(Sparse), InfoObj(new InstrProfRecordWriterTrait()) {}
+InstrProfWriter::InstrProfWriter(bool Sparse, bool InstrEntryBBEnabled)
+ : Sparse(Sparse), InstrEntryBBEnabled(InstrEntryBBEnabled),
+ InfoObj(new InstrProfRecordWriterTrait()) {}
InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
@@ -240,7 +241,7 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
// We've never seen a function with this name and hash, add it.
Dest = std::move(I);
if (Weight > 1)
- Dest.scale(Weight, MapWarn);
+ Dest.scale(Weight, 1, MapWarn);
} else {
// We're updating a function we've seen before.
Dest.merge(I, Weight, MapWarn);
@@ -308,6 +309,9 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
Header.Version |= VARIANT_MASK_IR_PROF;
Header.Version |= VARIANT_MASK_CSIR_PROF;
}
+ if (InstrEntryBBEnabled)
+ Header.Version |= VARIANT_MASK_INSTR_ENTRY;
+
Header.Unused = 0;
Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
Header.HashOffset = 0;
@@ -441,6 +445,8 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
OS << "# IR level Instrumentation Flag\n:ir\n";
else if (ProfileKind == PF_IRLevelWithCS)
OS << "# CSIR level Instrumentation Flag\n:csir\n";
+ if (InstrEntryBBEnabled)
+ OS << "# Always instrument the function entry block\n:entry_first\n";
InstrProfSymtab Symtab;
using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>;
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index 5d3a07640942..0e03aa50173d 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -18,9 +18,14 @@
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+cl::opt<bool> UseContextLessSummary(
+ "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
+ cl::desc("Merge context profiles before calculating thresholds."));
+
// A set of cutoff values. Each value, when divided by ProfileSummary::Scale
// (which is 1000000) is a desired percentile of total counts.
static const uint32_t DefaultCutoffsData[] = {
@@ -111,6 +116,35 @@ std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
MaxFunctionCount, NumCounts, NumFunctions);
}
+std::unique_ptr<ProfileSummary>
+SampleProfileSummaryBuilder::computeSummaryForProfiles(
+ const StringMap<sampleprof::FunctionSamples> &Profiles) {
+ assert(NumFunctions == 0 &&
+ "This can only be called on an empty summary builder");
+ StringMap<sampleprof::FunctionSamples> ContextLessProfiles;
+ const StringMap<sampleprof::FunctionSamples> *ProfilesToUse = &Profiles;
+ // For CSSPGO, context-sensitive profile effectively split a function profile
+ // into many copies each representing the CFG profile of a particular calling
+ // context. That makes the count distribution looks more flat as we now have
+ // more function profiles each with lower counts, which in turn leads to lower
+ // hot thresholds. To compensate for that, by defauly we merge context
+ // profiles before coumputing profile summary.
+ if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
+ !UseContextLessSummary.getNumOccurrences())) {
+ for (const auto &I : Profiles) {
+ ContextLessProfiles[I.second.getName()].merge(I.second);
+ }
+ ProfilesToUse = &ContextLessProfiles;
+ }
+
+ for (const auto &I : *ProfilesToUse) {
+ const sampleprof::FunctionSamples &Profile = I.second;
+ addRecord(Profile);
+ }
+
+ return getSummary();
+}
+
std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
computeDetailedSummary();
return std::make_unique<ProfileSummary>(
@@ -119,13 +153,22 @@ std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
}
void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) {
- addCount(Count);
NumFunctions++;
+
+ // Skip invalid count.
+ if (Count == (uint64_t)-1)
+ return;
+
+ addCount(Count);
if (Count > MaxFunctionCount)
MaxFunctionCount = Count;
}
void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) {
+ // Skip invalid count.
+ if (Count == (uint64_t)-1)
+ return;
+
addCount(Count);
if (Count > MaxInternalBlockCount)
MaxInternalBlockCount = Count;
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp
index e5d0fdba5fc4..d6acc00e1a6f 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp
@@ -14,6 +14,8 @@
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
@@ -30,6 +32,8 @@ using namespace sampleprof;
namespace llvm {
namespace sampleprof {
SampleProfileFormat FunctionSamples::Format;
+bool FunctionSamples::ProfileIsProbeBased = false;
+bool FunctionSamples::ProfileIsCS = false;
bool FunctionSamples::UseMD5;
} // namespace sampleprof
} // namespace llvm
@@ -75,6 +79,8 @@ class SampleProfErrorCategoryType : public std::error_category {
return "Uncompress failure";
case sampleprof_error::zlib_unavailable:
return "Zlib is unavailable";
+ case sampleprof_error::hash_mismatch:
+ return "Function hash mismatch";
}
llvm_unreachable("A value of sampleprof_error has no message.");
}
@@ -127,6 +133,9 @@ raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
/// Print the samples collected for a function on stream \p OS.
void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
+ if (getFunctionHash())
+ OS << "CFG checksum " << getFunctionHash() << "\n";
+
OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
<< " sampled lines\n";
@@ -174,8 +183,22 @@ unsigned FunctionSamples::getOffset(const DILocation *DIL) {
0xffff;
}
-const FunctionSamples *
-FunctionSamples::findFunctionSamples(const DILocation *DIL) const {
+LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL) {
+ if (FunctionSamples::ProfileIsProbeBased)
+ // In a pseudo-probe based profile, a callsite is simply represented by the
+ // ID of the probe associated with the call instruction. The probe ID is
+ // encoded in the Discriminator field of the call instruction's debug
+ // metadata.
+ return LineLocation(PseudoProbeDwarfDiscriminator::extractProbeIndex(
+ DIL->getDiscriminator()),
+ 0);
+ else
+ return LineLocation(FunctionSamples::getOffset(DIL),
+ DIL->getBaseDiscriminator());
+}
+
+const FunctionSamples *FunctionSamples::findFunctionSamples(
+ const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper) const {
assert(DIL);
SmallVector<std::pair<LineLocation, StringRef>, 10> S;
@@ -190,11 +213,59 @@ FunctionSamples::findFunctionSamples(const DILocation *DIL) const {
return this;
const FunctionSamples *FS = this;
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
- FS = FS->findFunctionSamplesAt(S[i].first, S[i].second);
+ FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper);
}
return FS;
}
+void FunctionSamples::findAllNames(DenseSet<StringRef> &NameSet) const {
+ NameSet.insert(Name);
+ for (const auto &BS : BodySamples)
+ for (const auto &TS : BS.second.getCallTargets())
+ NameSet.insert(TS.getKey());
+
+ for (const auto &CS : CallsiteSamples) {
+ for (const auto &NameFS : CS.second) {
+ NameSet.insert(NameFS.first);
+ NameFS.second.findAllNames(NameSet);
+ }
+ }
+}
+
+const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
+ const LineLocation &Loc, StringRef CalleeName,
+ SampleProfileReaderItaniumRemapper *Remapper) const {
+ std::string CalleeGUID;
+ CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID);
+
+ auto iter = CallsiteSamples.find(Loc);
+ if (iter == CallsiteSamples.end())
+ return nullptr;
+ auto FS = iter->second.find(CalleeName);
+ if (FS != iter->second.end())
+ return &FS->second;
+ if (Remapper) {
+ if (auto NameInProfile = Remapper->lookUpNameInProfile(CalleeName)) {
+ auto FS = iter->second.find(*NameInProfile);
+ if (FS != iter->second.end())
+ return &FS->second;
+ }
+ }
+ // If we cannot find exact match of the callee name, return the FS with
+ // the max total count. Only do this when CalleeName is not provided,
+ // i.e., only for indirect calls.
+ if (!CalleeName.empty())
+ return nullptr;
+ uint64_t MaxTotalSamples = 0;
+ const FunctionSamples *R = nullptr;
+ for (const auto &NameFS : iter->second)
+ if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
+ MaxTotalSamples = NameFS.second.getTotalSamples();
+ R = &NameFS.second;
+ }
+ return R;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); }
#endif
@@ -216,8 +287,7 @@ std::error_code ProfileSymbolList::read(const uint8_t *Data,
std::error_code ProfileSymbolList::write(raw_ostream &OS) {
// Sort the symbols before output. If doing compression.
// It will make the compression much more effective.
- std::vector<StringRef> SortedList;
- SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end());
+ std::vector<StringRef> SortedList(Syms.begin(), Syms.end());
llvm::sort(SortedList);
std::string OutputString;
@@ -232,8 +302,7 @@ std::error_code ProfileSymbolList::write(raw_ostream &OS) {
void ProfileSymbolList::dump(raw_ostream &OS) const {
OS << "======== Dump profile symbol list ========\n";
- std::vector<StringRef> SortedList;
- SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end());
+ std::vector<StringRef> SortedList(Syms.begin(), Syms.end());
llvm::sort(SortedList);
for (auto &Sym : SortedList)
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
index 03f1ac190b91..38cbca844c87 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -83,26 +83,52 @@ static bool ParseHead(const StringRef &Input, StringRef &FName,
/// Returns true if line offset \p L is legal (only has 16 bits).
static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
+/// Parse \p Input that contains metadata.
+/// Possible metadata:
+/// - CFG Checksum information:
+/// !CFGChecksum: 12345
+/// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
+static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash) {
+ if (!Input.startswith("!CFGChecksum:"))
+ return false;
+
+ StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
+ return !CFGInfo.getAsInteger(10, FunctionHash);
+}
+
+enum class LineType {
+ CallSiteProfile,
+ BodyProfile,
+ Metadata,
+};
+
/// Parse \p Input as line sample.
///
/// \param Input input line.
-/// \param IsCallsite true if the line represents an inlined callsite.
+/// \param LineTy Type of this line.
/// \param Depth the depth of the inline stack.
/// \param NumSamples total samples of the line/inlined callsite.
/// \param LineOffset line offset to the start of the function.
/// \param Discriminator discriminator of the line.
/// \param TargetCountMap map from indirect call target to count.
+/// \param FunctionHash the function's CFG hash, used by pseudo probe.
///
/// returns true if parsing is successful.
-static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
+static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
uint64_t &NumSamples, uint32_t &LineOffset,
uint32_t &Discriminator, StringRef &CalleeName,
- DenseMap<StringRef, uint64_t> &TargetCountMap) {
+ DenseMap<StringRef, uint64_t> &TargetCountMap,
+ uint64_t &FunctionHash) {
for (Depth = 0; Input[Depth] == ' '; Depth++)
;
if (Depth == 0)
return false;
+ if (Depth == 1 && Input[Depth] == '!') {
+ LineTy = LineType::Metadata;
+ return parseMetadata(Input.substr(Depth), FunctionHash);
+ }
+
size_t n1 = Input.find(':');
StringRef Loc = Input.substr(Depth, n1 - Depth);
size_t n2 = Loc.find('.');
@@ -118,8 +144,8 @@ static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
}
StringRef Rest = Input.substr(n1 + 2);
- if (Rest[0] >= '0' && Rest[0] <= '9') {
- IsCallsite = false;
+ if (isDigit(Rest[0])) {
+ LineTy = LineType::BodyProfile;
size_t n3 = Rest.find(' ');
if (n3 == StringRef::npos) {
if (Rest.getAsInteger(10, NumSamples))
@@ -176,7 +202,7 @@ static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
n3 = n4;
}
} else {
- IsCallsite = true;
+ LineTy = LineType::CallSiteProfile;
size_t n3 = Rest.find_last_of(':');
CalleeName = Rest.substr(0, n3);
if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
@@ -196,6 +222,11 @@ std::error_code SampleProfileReaderText::readImpl() {
sampleprof_error Result = sampleprof_error::success;
InlineCallStack InlineStack;
+ uint32_t ProbeProfileCount = 0;
+
+ // SeenMetadata tracks whether we have processed metadata for the current
+ // top-level function profile.
+ bool SeenMetadata = false;
for (; !LineIt.is_at_eof(); ++LineIt) {
if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
@@ -220,9 +251,14 @@ std::error_code SampleProfileReaderText::readImpl() {
"Expected 'mangled_name:NUM:NUM', found " + *LineIt);
return sampleprof_error::malformed;
}
- Profiles[FName] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[FName];
- FProfile.setName(FName);
+ SeenMetadata = false;
+ SampleContext FContext(FName);
+ if (FContext.hasContext())
+ ++CSProfileCount;
+ Profiles[FContext] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[FContext];
+ FProfile.setName(FContext.getNameWithoutContext());
+ FProfile.setContext(FContext);
MergeResult(Result, FProfile.addTotalSamples(NumSamples));
MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
InlineStack.clear();
@@ -231,25 +267,35 @@ std::error_code SampleProfileReaderText::readImpl() {
uint64_t NumSamples;
StringRef FName;
DenseMap<StringRef, uint64_t> TargetCountMap;
- bool IsCallsite;
uint32_t Depth, LineOffset, Discriminator;
- if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset,
- Discriminator, FName, TargetCountMap)) {
+ LineType LineTy;
+ uint64_t FunctionHash;
+ if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
+ Discriminator, FName, TargetCountMap, FunctionHash)) {
reportError(LineIt.line_number(),
"Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
*LineIt);
return sampleprof_error::malformed;
}
- if (IsCallsite) {
- while (InlineStack.size() > Depth) {
- InlineStack.pop_back();
- }
+ if (SeenMetadata && LineTy != LineType::Metadata) {
+ // Metadata must be put at the end of a function profile.
+ reportError(LineIt.line_number(),
+ "Found non-metadata after metadata: " + *LineIt);
+ return sampleprof_error::malformed;
+ }
+ while (InlineStack.size() > Depth) {
+ InlineStack.pop_back();
+ }
+ switch (LineTy) {
+ case LineType::CallSiteProfile: {
FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
LineLocation(LineOffset, Discriminator))[std::string(FName)];
FSamples.setName(FName);
MergeResult(Result, FSamples.addTotalSamples(NumSamples));
InlineStack.push_back(&FSamples);
- } else {
+ break;
+ }
+ case LineType::BodyProfile: {
while (InlineStack.size() > Depth) {
InlineStack.pop_back();
}
@@ -261,9 +307,28 @@ std::error_code SampleProfileReaderText::readImpl() {
}
MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
NumSamples));
+ break;
+ }
+ case LineType::Metadata: {
+ FunctionSamples &FProfile = *InlineStack.back();
+ FProfile.setFunctionHash(FunctionHash);
+ ++ProbeProfileCount;
+ SeenMetadata = true;
+ break;
+ }
}
}
}
+
+ assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
+ "Cannot have both context-sensitive and regular profile");
+ ProfileIsCS = (CSProfileCount > 0);
+ assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
+ "Cannot have both probe-based profiles and regular profiles");
+ ProfileIsProbeBased = (ProbeProfileCount > 0);
+ FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
+ FunctionSamples::ProfileIsCS = ProfileIsCS;
+
if (Result == sampleprof_error::success)
computeSummary();
@@ -354,6 +419,34 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
return NameTable[*Idx];
}
+ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
+ if (!FixedLengthMD5)
+ return SampleProfileReaderBinary::readStringFromTable();
+
+ // read NameTable index.
+ auto Idx = readStringIndex(NameTable);
+ if (std::error_code EC = Idx.getError())
+ return EC;
+
+ // Check whether the name to be accessed has been accessed before,
+ // if not, read it from memory directly.
+ StringRef &SR = NameTable[*Idx];
+ if (SR.empty()) {
+ const uint8_t *SavedData = Data;
+ Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
+ auto FID = readUnencodedNumber<uint64_t>();
+ if (std::error_code EC = FID.getError())
+ return EC;
+ // Save the string converted from uint64_t in MD5StringBuf. All the
+ // references to the name are all StringRefs refering to the string
+ // in MD5StringBuf.
+ MD5StringBuf->push_back(std::to_string(*FID));
+ SR = MD5StringBuf->back();
+ Data = SavedData;
+ }
+ return SR;
+}
+
ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
auto Idx = readStringIndex(NameTable);
if (std::error_code EC = Idx.getError())
@@ -450,12 +543,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
if (std::error_code EC = FName.getError())
return EC;
- Profiles[*FName] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[*FName];
- FProfile.setName(*FName);
-
+ SampleContext FContext(*FName);
+ Profiles[FContext] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[FContext];
+ FProfile.setName(FContext.getNameWithoutContext());
+ FProfile.setContext(FContext);
FProfile.addHeadSamples(*NumHeadSamples);
+ if (FContext.hasContext())
+ CSProfileCount++;
+
if (std::error_code EC = readProfile(FProfile))
return EC;
return sampleprof_error::success;
@@ -470,7 +567,7 @@ std::error_code SampleProfileReaderBinary::readImpl() {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderExtBinary::readOneSection(
+std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
Data = Start;
End = Start + Size;
@@ -481,37 +578,56 @@ std::error_code SampleProfileReaderExtBinary::readOneSection(
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
Summary->setPartialProfile(true);
break;
- case SecNameTable:
- if (std::error_code EC = readNameTableSec(
- hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)))
+ case SecNameTable: {
+ FixedLengthMD5 =
+ hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
+ bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
+ assert((!FixedLengthMD5 || UseMD5) &&
+ "If FixedLengthMD5 is true, UseMD5 has to be true");
+ if (std::error_code EC = readNameTableSec(UseMD5))
return EC;
break;
+ }
case SecLBRProfile:
if (std::error_code EC = readFuncProfiles())
return EC;
break;
- case SecProfileSymbolList:
- if (std::error_code EC = readProfileSymbolList())
- return EC;
- break;
case SecFuncOffsetTable:
if (std::error_code EC = readFuncOffsetTable())
return EC;
break;
+ case SecFuncMetadata:
+ ProfileIsProbeBased =
+ hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
+ FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
+ if (std::error_code EC = readFuncMetadata())
+ return EC;
+ break;
+ case SecProfileSymbolList:
+ if (std::error_code EC = readProfileSymbolList())
+ return EC;
+ break;
default:
+ if (std::error_code EC = readCustomSection(Entry))
+ return EC;
break;
}
return sampleprof_error::success;
}
-void SampleProfileReaderExtBinary::collectFuncsFrom(const Module &M) {
+void SampleProfileReaderExtBinaryBase::collectFuncsFrom(const Module &M) {
UseAllFuncs = false;
FuncsToUse.clear();
for (auto &F : M)
FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
}
-std::error_code SampleProfileReaderExtBinary::readFuncOffsetTable() {
+std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
+ // If there are more than one FuncOffsetTable, the profile read associated
+ // with previous FuncOffsetTable has to be done before next FuncOffsetTable
+ // is read.
+ FuncOffsetTable.clear();
+
auto Size = readNumber<uint64_t>();
if (std::error_code EC = Size.getError())
return EC;
@@ -531,7 +647,7 @@ std::error_code SampleProfileReaderExtBinary::readFuncOffsetTable() {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderExtBinary::readFuncProfiles() {
+std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
const uint8_t *Start = Data;
if (UseAllFuncs) {
while (Data < End) {
@@ -539,44 +655,48 @@ std::error_code SampleProfileReaderExtBinary::readFuncProfiles() {
return EC;
}
assert(Data == End && "More data is read than expected");
- return sampleprof_error::success;
- }
-
- if (Remapper) {
- for (auto Name : FuncsToUse) {
- Remapper->insert(Name);
+ } else {
+ if (Remapper) {
+ for (auto Name : FuncsToUse) {
+ Remapper->insert(Name);
+ }
}
- }
- if (useMD5()) {
- for (auto Name : FuncsToUse) {
- auto GUID = std::to_string(MD5Hash(Name));
- auto iter = FuncOffsetTable.find(StringRef(GUID));
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- } else {
- for (auto NameOffset : FuncOffsetTable) {
- auto FuncName = NameOffset.first;
- if (!FuncsToUse.count(FuncName) &&
- (!Remapper || !Remapper->exist(FuncName)))
- continue;
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse) {
+ auto GUID = std::to_string(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(StringRef(GUID));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
+ } else {
+ for (auto NameOffset : FuncOffsetTable) {
+ SampleContext FContext(NameOffset.first);
+ auto FuncName = FContext.getNameWithoutContext();
+ if (!FuncsToUse.count(FuncName) &&
+ (!Remapper || !Remapper->exist(FuncName)))
+ continue;
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
}
+ Data = End;
}
- Data = End;
+ assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
+ "Cannot have both context-sensitive and regular profile");
+ ProfileIsCS = (CSProfileCount > 0);
+ FunctionSamples::ProfileIsCS = ProfileIsCS;
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderExtBinary::readProfileSymbolList() {
+std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
if (!ProfSymList)
ProfSymList = std::make_unique<ProfileSymbolList>();
@@ -625,6 +745,10 @@ std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
if (!Entry.Size)
continue;
+ // Skip sections without context when SkipFlatProf is true.
+ if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
+ continue;
+
const uint8_t *SecStart = BufStart + Entry.Offset;
uint64_t SecSize = Entry.Size;
@@ -709,7 +833,7 @@ std::error_code SampleProfileReaderBinary::readNameTable() {
auto Size = readNumber<uint32_t>();
if (std::error_code EC = Size.getError())
return EC;
- NameTable.reserve(*Size);
+ NameTable.reserve(*Size + NameTable.size());
for (uint32_t I = 0; I < *Size; ++I) {
auto Name(readString());
if (std::error_code EC = Name.getError())
@@ -720,13 +844,24 @@ std::error_code SampleProfileReaderBinary::readNameTable() {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderExtBinary::readMD5NameTable() {
+std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
auto Size = readNumber<uint64_t>();
if (std::error_code EC = Size.getError())
return EC;
- NameTable.reserve(*Size);
MD5StringBuf = std::make_unique<std::vector<std::string>>();
MD5StringBuf->reserve(*Size);
+ if (FixedLengthMD5) {
+ // Preallocate and initialize NameTable so we can check whether a name
+ // index has been read before by checking whether the element in the
+ // NameTable is empty, meanwhile readStringIndex can do the boundary
+ // check using the size of NameTable.
+ NameTable.resize(*Size + NameTable.size());
+
+ MD5NameMemStart = Data;
+ Data = Data + (*Size) * sizeof(uint64_t);
+ return sampleprof_error::success;
+ }
+ NameTable.reserve(*Size);
for (uint32_t I = 0; I < *Size; ++I) {
auto FID = readNumber<uint64_t>();
if (std::error_code EC = FID.getError())
@@ -739,12 +874,35 @@ std::error_code SampleProfileReaderExtBinary::readMD5NameTable() {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderExtBinary::readNameTableSec(bool IsMD5) {
+std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
if (IsMD5)
return readMD5NameTable();
return SampleProfileReaderBinary::readNameTable();
}
+std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
+ if (!ProfileIsProbeBased)
+ return sampleprof_error::success;
+ while (Data < End) {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+
+ auto Checksum = readNumber<uint64_t>();
+ if (std::error_code EC = Checksum.getError())
+ return EC;
+
+ SampleContext FContext(*FName);
+ // No need to load metadata for profiles that are not loaded in the current
+ // module.
+ if (Profiles.count(FContext))
+ Profiles[FContext].setFunctionHash(*Checksum);
+ }
+
+ assert(Data == End && "More data is read than expected");
+ return sampleprof_error::success;
+}
+
std::error_code SampleProfileReaderCompactBinary::readNameTable() {
auto Size = readNumber<uint64_t>();
if (std::error_code EC = Size.getError())
@@ -759,7 +917,8 @@ std::error_code SampleProfileReaderCompactBinary::readNameTable() {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTableEntry() {
+std::error_code
+SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
SecHdrTableEntry Entry;
auto Type = readUnencodedNumber<uint64_t>();
if (std::error_code EC = Type.getError())
@@ -781,6 +940,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTableEntry() {
return EC;
Entry.Size = *Size;
+ Entry.LayoutIndex = Idx;
SecHdrTable.push_back(std::move(Entry));
return sampleprof_error::success;
}
@@ -791,7 +951,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
return EC;
for (uint32_t i = 0; i < (*EntryNum); i++)
- if (std::error_code EC = readSecHdrTableEntry())
+ if (std::error_code EC = readSecHdrTableEntry(i))
return EC;
return sampleprof_error::success;
@@ -813,11 +973,12 @@ std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
}
uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
+ uint64_t Size = 0;
for (auto &Entry : SecHdrTable) {
if (Entry.Type == Type)
- return Entry.Size;
+ Size += Entry.Size;
}
- return 0;
+ return Size;
}
uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
@@ -840,9 +1001,14 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
else
Flags.append("{");
+ if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
+ Flags.append("flat,");
+
switch (Entry.Type) {
case SecNameTable:
- if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
+ if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
+ Flags.append("fixlenmd5,");
+ else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
Flags.append("md5,");
break;
case SecProfSummary:
@@ -867,7 +1033,7 @@ bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
<< ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
<< "\n";
;
- TotalSecsSize += getSectionSize(Entry.Type);
+ TotalSecsSize += Entry.Size;
}
uint64_t HeaderSize = SecHdrTable.front().Offset;
assert(HeaderSize + TotalSecsSize == getFileSize() &&
@@ -1201,7 +1367,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
InlineCallStack NewStack;
NewStack.push_back(FProfile);
- NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
+ llvm::append_range(NewStack, InlineStack);
if (Update) {
// Walk up the inline stack, adding the samples on this line to
// the total sample count of the callers in the chain.
@@ -1249,7 +1415,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
return sampleprof_error::truncated;
InlineCallStack NewStack;
NewStack.push_back(FProfile);
- NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
+ llvm::append_range(NewStack, InlineStack);
if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
return EC;
}
@@ -1290,19 +1456,25 @@ void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
return;
}
+ // CSSPGO-TODO: Remapper is not yet supported.
+ // We will need to remap the entire context string.
assert(Remappings && "should be initialized while creating remapper");
- for (auto &Sample : Reader.getProfiles())
- if (auto Key = Remappings->insert(Sample.first()))
- SampleMap.insert({Key, &Sample.second});
+ for (auto &Sample : Reader.getProfiles()) {
+ DenseSet<StringRef> NamesInSample;
+ Sample.second.findAllNames(NamesInSample);
+ for (auto &Name : NamesInSample)
+ if (auto Key = Remappings->insert(Name))
+ NameMap.insert({Key, Name});
+ }
RemappingApplied = true;
}
-FunctionSamples *
-SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname) {
+Optional<StringRef>
+SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
if (auto Key = Remappings->lookup(Fname))
- return SampleMap.lookup(Key);
- return nullptr;
+ return NameMap.lookup(Key);
+ return None;
}
/// Prepare a memory buffer for the contents of \p Filename.
@@ -1438,9 +1610,5 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
// profile. Binary format has the profile summary in its header.
void SampleProfileReader::computeSummary() {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
- for (const auto &I : Profiles) {
- const FunctionSamples &Profile = I.second;
- Builder.addRecord(Profile);
- }
- Summary = Builder.getSummary();
+ Summary = Builder.computeSummaryForProfiles(Profiles);
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp
index 48d3faa6cd2f..8017f2a82804 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -19,6 +19,7 @@
#include "llvm/ProfileData/SampleProfWriter.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/Compression.h"
@@ -73,19 +74,16 @@ SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
return sampleprof_error::success;
}
-SecHdrTableEntry &
-SampleProfileWriterExtBinaryBase::getEntryInLayout(SecType Type) {
- auto SecIt = std::find_if(
- SectionHdrLayout.begin(), SectionHdrLayout.end(),
- [=](const auto &Entry) -> bool { return Entry.Type == Type; });
- return *SecIt;
-}
-
/// Return the current position and prepare to use it as the start
-/// position of a section.
-uint64_t SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type) {
+/// position of a section given the section type \p Type and its position
+/// \p LayoutIdx in SectionHdrLayout.
+uint64_t
+SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type,
+ uint32_t LayoutIdx) {
uint64_t SectionStart = OutputStream->tell();
- auto &Entry = getEntryInLayout(Type);
+ assert(LayoutIdx < SectionHdrLayout.size() && "LayoutIdx out of range");
+ const auto &Entry = SectionHdrLayout[LayoutIdx];
+ assert(Entry.Type == Type && "Unexpected section type");
// Use LocalBuf as a temporary output for writting data.
if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
LocalBufStream.swap(OutputStream);
@@ -112,18 +110,21 @@ std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() {
return sampleprof_error::success;
}
-/// Add a new section into section header table.
-std::error_code
-SampleProfileWriterExtBinaryBase::addNewSection(SecType Type,
- uint64_t SectionStart) {
- auto Entry = getEntryInLayout(Type);
+/// Add a new section into section header table given the section type
+/// \p Type, its position \p LayoutIdx in SectionHdrLayout and the
+/// location \p SectionStart where the section should be written to.
+std::error_code SampleProfileWriterExtBinaryBase::addNewSection(
+ SecType Type, uint32_t LayoutIdx, uint64_t SectionStart) {
+ assert(LayoutIdx < SectionHdrLayout.size() && "LayoutIdx out of range");
+ const auto &Entry = SectionHdrLayout[LayoutIdx];
+ assert(Entry.Type == Type && "Unexpected section type");
if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) {
LocalBufStream.swap(OutputStream);
if (std::error_code EC = compressAndOutput())
return EC;
}
SecHdrTable.push_back({Type, Entry.Flags, SectionStart - FileStart,
- OutputStream->tell() - SectionStart});
+ OutputStream->tell() - SectionStart, LayoutIdx});
return sampleprof_error::success;
}
@@ -144,15 +145,15 @@ std::error_code SampleProfileWriterExtBinaryBase::write(
}
std::error_code
-SampleProfileWriterExtBinary::writeSample(const FunctionSamples &S) {
+SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) {
uint64_t Offset = OutputStream->tell();
- StringRef Name = S.getName();
+ StringRef Name = S.getNameWithContext(true);
FuncOffsetTable[Name] = Offset - SecLBRProfileStart;
encodeULEB128(S.getHeadSamples(), *OutputStream);
return writeBody(S);
}
-std::error_code SampleProfileWriterExtBinary::writeFuncOffsetTable() {
+std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() {
auto &OS = *OutputStream;
// Write out the table size.
@@ -163,10 +164,23 @@ std::error_code SampleProfileWriterExtBinary::writeFuncOffsetTable() {
writeNameIdx(entry.first);
encodeULEB128(entry.second, OS);
}
+ FuncOffsetTable.clear();
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterExtBinary::writeNameTable() {
+std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
+ const StringMap<FunctionSamples> &Profiles) {
+ if (!FunctionSamples::ProfileIsProbeBased)
+ return sampleprof_error::success;
+ auto &OS = *OutputStream;
+ for (const auto &Entry : Profiles) {
+ writeNameIdx(Entry.first());
+ encodeULEB128(Entry.second.getFunctionHash(), OS);
+ }
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
if (!UseMD5)
return SampleProfileWriterBinary::writeNameTable();
@@ -174,59 +188,159 @@ std::error_code SampleProfileWriterExtBinary::writeNameTable() {
std::set<StringRef> V;
stablizeNameTable(V);
- // Write out the name table.
+ // Write out the MD5 name table. We wrote unencoded MD5 so reader can
+ // retrieve the name using the name index without having to read the
+ // whole name table.
encodeULEB128(NameTable.size(), OS);
- for (auto N : V) {
- encodeULEB128(MD5Hash(N), OS);
- }
+ support::endian::Writer Writer(OS, support::little);
+ for (auto N : V)
+ Writer.write(MD5Hash(N));
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterExtBinary::writeSections(
+std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
const StringMap<FunctionSamples> &ProfileMap) {
- uint64_t SectionStart = markSectionStart(SecProfSummary);
- computeSummary(ProfileMap);
- if (auto EC = writeSummary())
- return EC;
- if (std::error_code EC = addNewSection(SecProfSummary, SectionStart))
- return EC;
-
- // Generate the name table for all the functions referenced in the profile.
- SectionStart = markSectionStart(SecNameTable);
for (const auto &I : ProfileMap) {
addName(I.first());
addNames(I.second);
}
- writeNameTable();
- if (std::error_code EC = addNewSection(SecNameTable, SectionStart))
- return EC;
-
- SectionStart = markSectionStart(SecLBRProfile);
- SecLBRProfileStart = OutputStream->tell();
- if (std::error_code EC = writeFuncProfiles(ProfileMap))
- return EC;
- if (std::error_code EC = addNewSection(SecLBRProfile, SectionStart))
+ if (auto EC = writeNameTable())
return EC;
+ return sampleprof_error::success;
+}
- if (ProfSymList && ProfSymList->toCompress())
- setToCompressSection(SecProfileSymbolList);
-
- SectionStart = markSectionStart(SecProfileSymbolList);
+std::error_code
+SampleProfileWriterExtBinaryBase::writeProfileSymbolListSection() {
if (ProfSymList && ProfSymList->size() > 0)
if (std::error_code EC = ProfSymList->write(*OutputStream))
return EC;
- if (std::error_code EC = addNewSection(SecProfileSymbolList, SectionStart))
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
+ SecType Type, uint32_t LayoutIdx,
+ const StringMap<FunctionSamples> &ProfileMap) {
+ // The setting of SecFlagCompress should happen before markSectionStart.
+ if (Type == SecProfileSymbolList && ProfSymList && ProfSymList->toCompress())
+ setToCompressSection(SecProfileSymbolList);
+ if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased)
+ addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased);
+
+ uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
+ switch (Type) {
+ case SecProfSummary:
+ computeSummary(ProfileMap);
+ if (auto EC = writeSummary())
+ return EC;
+ break;
+ case SecNameTable:
+ if (auto EC = writeNameTableSection(ProfileMap))
+ return EC;
+ break;
+ case SecLBRProfile:
+ SecLBRProfileStart = OutputStream->tell();
+ if (std::error_code EC = writeFuncProfiles(ProfileMap))
+ return EC;
+ break;
+ case SecFuncOffsetTable:
+ if (auto EC = writeFuncOffsetTable())
+ return EC;
+ break;
+ case SecFuncMetadata:
+ if (std::error_code EC = writeFuncMetadata(ProfileMap))
+ return EC;
+ break;
+ case SecProfileSymbolList:
+ if (auto EC = writeProfileSymbolListSection())
+ return EC;
+ break;
+ default:
+ if (auto EC = writeCustomSection(Type))
+ return EC;
+ break;
+ }
+ if (std::error_code EC = addNewSection(Type, LayoutIdx, SectionStart))
return EC;
+ return sampleprof_error::success;
+}
- SectionStart = markSectionStart(SecFuncOffsetTable);
- if (std::error_code EC = writeFuncOffsetTable())
+std::error_code SampleProfileWriterExtBinary::writeDefaultLayout(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ // The const indices passed to writeOneSection below are specifying the
+ // positions of the sections in SectionHdrLayout. Look at
+ // initSectionHdrLayout to find out where each section is located in
+ // SectionHdrLayout.
+ if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecLBRProfile, 3, ProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecProfileSymbolList, 4, ProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ProfileMap))
return EC;
- if (std::error_code EC = addNewSection(SecFuncOffsetTable, SectionStart))
+ if (auto EC = writeOneSection(SecFuncMetadata, 5, ProfileMap))
+ return EC;
+ return sampleprof_error::success;
+}
+
+static void
+splitProfileMapToTwo(const StringMap<FunctionSamples> &ProfileMap,
+ StringMap<FunctionSamples> &ContextProfileMap,
+ StringMap<FunctionSamples> &NoContextProfileMap) {
+ for (const auto &I : ProfileMap) {
+ if (I.second.getCallsiteSamples().size())
+ ContextProfileMap.insert({I.first(), I.second});
+ else
+ NoContextProfileMap.insert({I.first(), I.second});
+ }
+}
+
+std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ StringMap<FunctionSamples> ContextProfileMap, NoContextProfileMap;
+ splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap);
+
+ if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecLBRProfile, 3, ContextProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ContextProfileMap))
+ return EC;
+ // Mark the section to have no context. Note section flag needs to be set
+ // before writing the section.
+ addSectionFlag(5, SecCommonFlags::SecFlagFlat);
+ if (auto EC = writeOneSection(SecLBRProfile, 5, NoContextProfileMap))
+ return EC;
+ // Mark the section to have no context. Note section flag needs to be set
+ // before writing the section.
+ addSectionFlag(4, SecCommonFlags::SecFlagFlat);
+ if (auto EC = writeOneSection(SecFuncOffsetTable, 4, NoContextProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecProfileSymbolList, 6, ProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecFuncMetadata, 7, ProfileMap))
return EC;
return sampleprof_error::success;
}
+std::error_code SampleProfileWriterExtBinary::writeSections(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ std::error_code EC;
+ if (SecLayout == DefaultLayout)
+ EC = writeDefaultLayout(ProfileMap);
+ else if (SecLayout == CtxSplitLayout)
+ EC = writeCtxSplitLayout(ProfileMap);
+ else
+ llvm_unreachable("Unsupported layout");
+ return EC;
+}
+
std::error_code SampleProfileWriterCompactBinary::write(
const StringMap<FunctionSamples> &ProfileMap) {
if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
@@ -246,7 +360,7 @@ std::error_code SampleProfileWriterCompactBinary::write(
/// it needs to be parsed by the SampleProfileReaderText class.
std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
auto &OS = *OutputStream;
- OS << S.getName() << ":" << S.getTotalSamples();
+ OS << S.getNameWithContext(true) << ":" << S.getTotalSamples();
if (Indent == 0)
OS << ":" << S.getHeadSamples();
OS << "\n";
@@ -285,6 +399,13 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
}
Indent -= 1;
+ if (Indent == 0) {
+ if (FunctionSamples::ProfileIsProbeBased) {
+ OS.indent(Indent + 1);
+ OS << "!CFGChecksum: " << S.getFunctionHash() << "\n";
+ }
+ }
+
return sampleprof_error::success;
}
@@ -435,24 +556,31 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
return sampleprof_error::ostream_seek_unsupported;
support::endian::Writer Writer(*OutputStream, support::little);
- DenseMap<uint32_t, uint32_t> IndexMap;
- for (uint32_t i = 0; i < SecHdrTable.size(); i++) {
- IndexMap.insert({static_cast<uint32_t>(SecHdrTable[i].Type), i});
+ assert(SecHdrTable.size() == SectionHdrLayout.size() &&
+ "SecHdrTable entries doesn't match SectionHdrLayout");
+ SmallVector<uint32_t, 16> IndexMap(SecHdrTable.size(), -1);
+ for (uint32_t TableIdx = 0; TableIdx < SecHdrTable.size(); TableIdx++) {
+ IndexMap[SecHdrTable[TableIdx].LayoutIndex] = TableIdx;
}
// Write the section header table in the order specified in
- // SectionHdrLayout. That is the sections order Reader will see.
- // Note that the sections order in which Reader expects to read
- // may be different from the order in which Writer is able to
- // write, so we need to adjust the order in SecHdrTable to be
- // consistent with SectionHdrLayout when we write SecHdrTable
- // to the memory.
- for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) {
- uint32_t idx = IndexMap[static_cast<uint32_t>(SectionHdrLayout[i].Type)];
- Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Type));
- Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Flags));
- Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Offset));
- Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Size));
+ // SectionHdrLayout. SectionHdrLayout specifies the sections
+ // order in which profile reader expect to read, so the section
+ // header table should be written in the order in SectionHdrLayout.
+ // Note that the section order in SecHdrTable may be different
+ // from the order in SectionHdrLayout, for example, SecFuncOffsetTable
+ // needs to be computed after SecLBRProfile (the order in SecHdrTable),
+ // but it needs to be read before SecLBRProfile (the order in
+ // SectionHdrLayout). So we use IndexMap above to switch the order.
+ for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size();
+ LayoutIdx++) {
+ assert(IndexMap[LayoutIdx] < SecHdrTable.size() &&
+ "Incorrect LayoutIdx in SecHdrTable");
+ auto Entry = SecHdrTable[IndexMap[LayoutIdx]];
+ Writer.write(static_cast<uint64_t>(Entry.Type));
+ Writer.write(static_cast<uint64_t>(Entry.Flags));
+ Writer.write(static_cast<uint64_t>(Entry.Offset));
+ Writer.write(static_cast<uint64_t>(Entry.Size));
}
// Reset OutputStream.
@@ -504,7 +632,7 @@ std::error_code SampleProfileWriterBinary::writeSummary() {
std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
auto &OS = *OutputStream;
- if (std::error_code EC = writeNameIdx(S.getName()))
+ if (std::error_code EC = writeNameIdx(S.getNameWithContext(true)))
return EC;
encodeULEB128(S.getTotalSamples(), OS);
@@ -621,9 +749,5 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
void SampleProfileWriter::computeSummary(
const StringMap<FunctionSamples> &ProfileMap) {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
- for (const auto &I : ProfileMap) {
- const FunctionSamples &Profile = I.second;
- Builder.addRecord(Profile);
- }
- Summary = Builder.getSummary();
+ Summary = Builder.computeSummaryForProfiles(ProfileMap);
}
diff --git a/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp b/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp
index 25fbea7d31c2..3d586a247962 100644
--- a/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp
@@ -13,7 +13,6 @@
#include "llvm/Remarks/BitstreamRemarkParser.h"
#include "BitstreamRemarkParser.h"
-#include "llvm/Remarks/BitstreamRemarkContainer.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
diff --git a/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h b/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h
index 749219fc5155..0e40e5d66e00 100644
--- a/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h
+++ b/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h
@@ -14,13 +14,13 @@
#define LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/Remarks/BitstreamRemarkContainer.h"
#include "llvm/Remarks/BitstreamRemarkParser.h"
+#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Remarks/RemarkParser.h"
-#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
#include <memory>
-#include <string>
namespace llvm {
namespace remarks {
diff --git a/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp b/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp
index a6de44605675..503a7bd49d15 100644
--- a/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp
@@ -35,11 +35,11 @@ unsigned AArch64::getDefaultFPU(StringRef CPU, AArch64::ArchKind AK) {
.Default(ARM::FK_INVALID);
}
-unsigned AArch64::getDefaultExtensions(StringRef CPU, AArch64::ArchKind AK) {
+uint64_t AArch64::getDefaultExtensions(StringRef CPU, AArch64::ArchKind AK) {
if (CPU == "generic")
return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions;
- return StringSwitch<unsigned>(CPU)
+ return StringSwitch<uint64_t>(CPU)
#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
.Case(NAME, AArch64ARCHNames[static_cast<unsigned>(ArchKind::ID)] \
.ArchBaseExtensions | \
@@ -59,7 +59,7 @@ AArch64::ArchKind AArch64::getCPUArchKind(StringRef CPU) {
.Default(ArchKind::INVALID);
}
-bool AArch64::getExtensionFeatures(unsigned Extensions,
+bool AArch64::getExtensionFeatures(uint64_t Extensions,
std::vector<StringRef> &Features) {
if (Extensions == AArch64::AEK_INVALID)
return false;
@@ -100,6 +100,12 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
Features.push_back("+sve2-bitperm");
if (Extensions & AEK_RCPC)
Features.push_back("+rcpc");
+ if (Extensions & AEK_BRBE)
+ Features.push_back("+brbe");
+ if (Extensions & AEK_PAUTH)
+ Features.push_back("+pauth");
+ if (Extensions & AEK_FLAGM)
+ Features.push_back("+flagm");
return true;
}
@@ -118,6 +124,10 @@ bool AArch64::getArchFeatures(AArch64::ArchKind AK,
Features.push_back("+v8.5a");
if (AK == AArch64::ArchKind::ARMV8_6A)
Features.push_back("+v8.6a");
+ if (AK == AArch64::ArchKind::ARMV8_7A)
+ Features.push_back("+v8.7a");
+ if(AK == AArch64::ArchKind::ARMV8R)
+ Features.push_back("+v8r");
return AK != ArchKind::INVALID;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp b/contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp
index bfa1fe86cd3e..3d6325134d75 100644
--- a/contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Twine.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/YAMLTraits.h"
@@ -211,7 +210,7 @@ struct MappingTraits<HSAMD::Metadata> {
namespace AMDGPU {
namespace HSAMD {
-std::error_code fromString(std::string String, Metadata &HSAMetadata) {
+std::error_code fromString(StringRef String, Metadata &HSAMetadata) {
yaml::Input YamlInput(String);
YamlInput >> HSAMetadata;
return YamlInput.error();
diff --git a/contrib/llvm-project/llvm/lib/Support/APFixedPoint.cpp b/contrib/llvm-project/llvm/lib/Support/APFixedPoint.cpp
new file mode 100644
index 000000000000..9764dd51f572
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Support/APFixedPoint.cpp
@@ -0,0 +1,574 @@
+//===- APFixedPoint.cpp - Fixed point constant handling ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines the implementation for the fixed point number interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFixedPoint.h"
+#include "llvm/ADT/APFloat.h"
+
+namespace llvm {
+
+APFixedPoint APFixedPoint::convert(const FixedPointSemantics &DstSema,
+ bool *Overflow) const {
+ APSInt NewVal = Val;
+ unsigned DstWidth = DstSema.getWidth();
+ unsigned DstScale = DstSema.getScale();
+ bool Upscaling = DstScale > getScale();
+ if (Overflow)
+ *Overflow = false;
+
+ if (Upscaling) {
+ NewVal = NewVal.extend(NewVal.getBitWidth() + DstScale - getScale());
+ NewVal <<= (DstScale - getScale());
+ } else {
+ NewVal >>= (getScale() - DstScale);
+ }
+
+ auto Mask = APInt::getBitsSetFrom(
+ NewVal.getBitWidth(),
+ std::min(DstScale + DstSema.getIntegralBits(), NewVal.getBitWidth()));
+ APInt Masked(NewVal & Mask);
+
+ // Change in the bits above the sign
+ if (!(Masked == Mask || Masked == 0)) {
+ // Found overflow in the bits above the sign
+ if (DstSema.isSaturated())
+ NewVal = NewVal.isNegative() ? Mask : ~Mask;
+ else if (Overflow)
+ *Overflow = true;
+ }
+
+ // If the dst semantics are unsigned, but our value is signed and negative, we
+ // clamp to zero.
+ if (!DstSema.isSigned() && NewVal.isSigned() && NewVal.isNegative()) {
+ // Found negative overflow for unsigned result
+ if (DstSema.isSaturated())
+ NewVal = 0;
+ else if (Overflow)
+ *Overflow = true;
+ }
+
+ NewVal = NewVal.extOrTrunc(DstWidth);
+ NewVal.setIsSigned(DstSema.isSigned());
+ return APFixedPoint(NewVal, DstSema);
+}
+
+int APFixedPoint::compare(const APFixedPoint &Other) const {
+ APSInt ThisVal = getValue();
+ APSInt OtherVal = Other.getValue();
+ bool ThisSigned = Val.isSigned();
+ bool OtherSigned = OtherVal.isSigned();
+ unsigned OtherScale = Other.getScale();
+ unsigned OtherWidth = OtherVal.getBitWidth();
+
+ unsigned CommonWidth = std::max(Val.getBitWidth(), OtherWidth);
+
+ // Prevent overflow in the event the widths are the same but the scales differ
+ CommonWidth += getScale() >= OtherScale ? getScale() - OtherScale
+ : OtherScale - getScale();
+
+ ThisVal = ThisVal.extOrTrunc(CommonWidth);
+ OtherVal = OtherVal.extOrTrunc(CommonWidth);
+
+ unsigned CommonScale = std::max(getScale(), OtherScale);
+ ThisVal = ThisVal.shl(CommonScale - getScale());
+ OtherVal = OtherVal.shl(CommonScale - OtherScale);
+
+ if (ThisSigned && OtherSigned) {
+ if (ThisVal.sgt(OtherVal))
+ return 1;
+ else if (ThisVal.slt(OtherVal))
+ return -1;
+ } else if (!ThisSigned && !OtherSigned) {
+ if (ThisVal.ugt(OtherVal))
+ return 1;
+ else if (ThisVal.ult(OtherVal))
+ return -1;
+ } else if (ThisSigned && !OtherSigned) {
+ if (ThisVal.isSignBitSet())
+ return -1;
+ else if (ThisVal.ugt(OtherVal))
+ return 1;
+ else if (ThisVal.ult(OtherVal))
+ return -1;
+ } else {
+ // !ThisSigned && OtherSigned
+ if (OtherVal.isSignBitSet())
+ return 1;
+ else if (ThisVal.ugt(OtherVal))
+ return 1;
+ else if (ThisVal.ult(OtherVal))
+ return -1;
+ }
+
+ return 0;
+}
+
+APFixedPoint APFixedPoint::getMax(const FixedPointSemantics &Sema) {
+ bool IsUnsigned = !Sema.isSigned();
+ auto Val = APSInt::getMaxValue(Sema.getWidth(), IsUnsigned);
+ if (IsUnsigned && Sema.hasUnsignedPadding())
+ Val = Val.lshr(1);
+ return APFixedPoint(Val, Sema);
+}
+
+APFixedPoint APFixedPoint::getMin(const FixedPointSemantics &Sema) {
+ auto Val = APSInt::getMinValue(Sema.getWidth(), !Sema.isSigned());
+ return APFixedPoint(Val, Sema);
+}
+
+bool FixedPointSemantics::fitsInFloatSemantics(
+ const fltSemantics &FloatSema) const {
+ // A fixed point semantic fits in a floating point semantic if the maximum
+ // and minimum values as integers of the fixed point semantic can fit in the
+ // floating point semantic.
+
+ // If these values do not fit, then a floating point rescaling of the true
+ // maximum/minimum value will not fit either, so the floating point semantic
+ // cannot be used to perform such a rescaling.
+
+ APSInt MaxInt = APFixedPoint::getMax(*this).getValue();
+ APFloat F(FloatSema);
+ APFloat::opStatus Status = F.convertFromAPInt(MaxInt, MaxInt.isSigned(),
+ APFloat::rmNearestTiesToAway);
+ if ((Status & APFloat::opOverflow) || !isSigned())
+ return !(Status & APFloat::opOverflow);
+
+ APSInt MinInt = APFixedPoint::getMin(*this).getValue();
+ Status = F.convertFromAPInt(MinInt, MinInt.isSigned(),
+ APFloat::rmNearestTiesToAway);
+ return !(Status & APFloat::opOverflow);
+}
+
+FixedPointSemantics FixedPointSemantics::getCommonSemantics(
+ const FixedPointSemantics &Other) const {
+ unsigned CommonScale = std::max(getScale(), Other.getScale());
+ unsigned CommonWidth =
+ std::max(getIntegralBits(), Other.getIntegralBits()) + CommonScale;
+
+ bool ResultIsSigned = isSigned() || Other.isSigned();
+ bool ResultIsSaturated = isSaturated() || Other.isSaturated();
+ bool ResultHasUnsignedPadding = false;
+ if (!ResultIsSigned) {
+ // Both are unsigned.
+ ResultHasUnsignedPadding = hasUnsignedPadding() &&
+ Other.hasUnsignedPadding() && !ResultIsSaturated;
+ }
+
+ // If the result is signed, add an extra bit for the sign. Otherwise, if it is
+ // unsigned and has unsigned padding, we only need to add the extra padding
+ // bit back if we are not saturating.
+ if (ResultIsSigned || ResultHasUnsignedPadding)
+ CommonWidth++;
+
+ return FixedPointSemantics(CommonWidth, CommonScale, ResultIsSigned,
+ ResultIsSaturated, ResultHasUnsignedPadding);
+}
+
+APFixedPoint APFixedPoint::add(const APFixedPoint &Other,
+ bool *Overflow) const {
+ auto CommonFXSema = Sema.getCommonSemantics(Other.getSemantics());
+ APFixedPoint ConvertedThis = convert(CommonFXSema);
+ APFixedPoint ConvertedOther = Other.convert(CommonFXSema);
+ APSInt ThisVal = ConvertedThis.getValue();
+ APSInt OtherVal = ConvertedOther.getValue();
+ bool Overflowed = false;
+
+ APSInt Result;
+ if (CommonFXSema.isSaturated()) {
+ Result = CommonFXSema.isSigned() ? ThisVal.sadd_sat(OtherVal)
+ : ThisVal.uadd_sat(OtherVal);
+ } else {
+ Result = ThisVal.isSigned() ? ThisVal.sadd_ov(OtherVal, Overflowed)
+ : ThisVal.uadd_ov(OtherVal, Overflowed);
+ }
+
+ if (Overflow)
+ *Overflow = Overflowed;
+
+ return APFixedPoint(Result, CommonFXSema);
+}
+
+APFixedPoint APFixedPoint::sub(const APFixedPoint &Other,
+ bool *Overflow) const {
+ auto CommonFXSema = Sema.getCommonSemantics(Other.getSemantics());
+ APFixedPoint ConvertedThis = convert(CommonFXSema);
+ APFixedPoint ConvertedOther = Other.convert(CommonFXSema);
+ APSInt ThisVal = ConvertedThis.getValue();
+ APSInt OtherVal = ConvertedOther.getValue();
+ bool Overflowed = false;
+
+ APSInt Result;
+ if (CommonFXSema.isSaturated()) {
+ Result = CommonFXSema.isSigned() ? ThisVal.ssub_sat(OtherVal)
+ : ThisVal.usub_sat(OtherVal);
+ } else {
+ Result = ThisVal.isSigned() ? ThisVal.ssub_ov(OtherVal, Overflowed)
+ : ThisVal.usub_ov(OtherVal, Overflowed);
+ }
+
+ if (Overflow)
+ *Overflow = Overflowed;
+
+ return APFixedPoint(Result, CommonFXSema);
+}
+
+APFixedPoint APFixedPoint::mul(const APFixedPoint &Other,
+ bool *Overflow) const {
+ auto CommonFXSema = Sema.getCommonSemantics(Other.getSemantics());
+ APFixedPoint ConvertedThis = convert(CommonFXSema);
+ APFixedPoint ConvertedOther = Other.convert(CommonFXSema);
+ APSInt ThisVal = ConvertedThis.getValue();
+ APSInt OtherVal = ConvertedOther.getValue();
+ bool Overflowed = false;
+
+ // Widen the LHS and RHS so we can perform a full multiplication.
+ unsigned Wide = CommonFXSema.getWidth() * 2;
+ if (CommonFXSema.isSigned()) {
+ ThisVal = ThisVal.sextOrSelf(Wide);
+ OtherVal = OtherVal.sextOrSelf(Wide);
+ } else {
+ ThisVal = ThisVal.zextOrSelf(Wide);
+ OtherVal = OtherVal.zextOrSelf(Wide);
+ }
+
+ // Perform the full multiplication and downscale to get the same scale.
+ //
+ // Note that the right shifts here perform an implicit downwards rounding.
+ // This rounding could discard bits that would technically place the result
+ // outside the representable range. We interpret the spec as allowing us to
+ // perform the rounding step first, avoiding the overflow case that would
+ // arise.
+ APSInt Result;
+ if (CommonFXSema.isSigned())
+ Result = ThisVal.smul_ov(OtherVal, Overflowed)
+ .ashr(CommonFXSema.getScale());
+ else
+ Result = ThisVal.umul_ov(OtherVal, Overflowed)
+ .lshr(CommonFXSema.getScale());
+ assert(!Overflowed && "Full multiplication cannot overflow!");
+ Result.setIsSigned(CommonFXSema.isSigned());
+
+ // If our result lies outside of the representative range of the common
+ // semantic, we either have overflow or saturation.
+ APSInt Max = APFixedPoint::getMax(CommonFXSema).getValue()
+ .extOrTrunc(Wide);
+ APSInt Min = APFixedPoint::getMin(CommonFXSema).getValue()
+ .extOrTrunc(Wide);
+ if (CommonFXSema.isSaturated()) {
+ if (Result < Min)
+ Result = Min;
+ else if (Result > Max)
+ Result = Max;
+ } else
+ Overflowed = Result < Min || Result > Max;
+
+ if (Overflow)
+ *Overflow = Overflowed;
+
+ return APFixedPoint(Result.sextOrTrunc(CommonFXSema.getWidth()),
+ CommonFXSema);
+}
+
+APFixedPoint APFixedPoint::div(const APFixedPoint &Other,
+ bool *Overflow) const {
+ auto CommonFXSema = Sema.getCommonSemantics(Other.getSemantics());
+ APFixedPoint ConvertedThis = convert(CommonFXSema);
+ APFixedPoint ConvertedOther = Other.convert(CommonFXSema);
+ APSInt ThisVal = ConvertedThis.getValue();
+ APSInt OtherVal = ConvertedOther.getValue();
+ bool Overflowed = false;
+
+ // Widen the LHS and RHS so we can perform a full division.
+ unsigned Wide = CommonFXSema.getWidth() * 2;
+ if (CommonFXSema.isSigned()) {
+ ThisVal = ThisVal.sextOrSelf(Wide);
+ OtherVal = OtherVal.sextOrSelf(Wide);
+ } else {
+ ThisVal = ThisVal.zextOrSelf(Wide);
+ OtherVal = OtherVal.zextOrSelf(Wide);
+ }
+
+ // Upscale to compensate for the loss of precision from division, and
+ // perform the full division.
+ ThisVal = ThisVal.shl(CommonFXSema.getScale());
+ APSInt Result;
+ if (CommonFXSema.isSigned()) {
+ APInt Rem;
+ APInt::sdivrem(ThisVal, OtherVal, Result, Rem);
+ // If the quotient is negative and the remainder is nonzero, round
+ // towards negative infinity by subtracting epsilon from the result.
+ if (ThisVal.isNegative() != OtherVal.isNegative() && !Rem.isNullValue())
+ Result = Result - 1;
+ } else
+ Result = ThisVal.udiv(OtherVal);
+ Result.setIsSigned(CommonFXSema.isSigned());
+
+ // If our result lies outside of the representative range of the common
+ // semantic, we either have overflow or saturation.
+ APSInt Max = APFixedPoint::getMax(CommonFXSema).getValue()
+ .extOrTrunc(Wide);
+ APSInt Min = APFixedPoint::getMin(CommonFXSema).getValue()
+ .extOrTrunc(Wide);
+ if (CommonFXSema.isSaturated()) {
+ if (Result < Min)
+ Result = Min;
+ else if (Result > Max)
+ Result = Max;
+ } else
+ Overflowed = Result < Min || Result > Max;
+
+ if (Overflow)
+ *Overflow = Overflowed;
+
+ return APFixedPoint(Result.sextOrTrunc(CommonFXSema.getWidth()),
+ CommonFXSema);
+}
+
+APFixedPoint APFixedPoint::shl(unsigned Amt, bool *Overflow) const {
+ APSInt ThisVal = Val;
+ bool Overflowed = false;
+
+ // Widen the LHS.
+ unsigned Wide = Sema.getWidth() * 2;
+ if (Sema.isSigned())
+ ThisVal = ThisVal.sextOrSelf(Wide);
+ else
+ ThisVal = ThisVal.zextOrSelf(Wide);
+
+ // Clamp the shift amount at the original width, and perform the shift.
+ Amt = std::min(Amt, ThisVal.getBitWidth());
+ APSInt Result = ThisVal << Amt;
+ Result.setIsSigned(Sema.isSigned());
+
+ // If our result lies outside of the representative range of the
+ // semantic, we either have overflow or saturation.
+ APSInt Max = APFixedPoint::getMax(Sema).getValue().extOrTrunc(Wide);
+ APSInt Min = APFixedPoint::getMin(Sema).getValue().extOrTrunc(Wide);
+ if (Sema.isSaturated()) {
+ if (Result < Min)
+ Result = Min;
+ else if (Result > Max)
+ Result = Max;
+ } else
+ Overflowed = Result < Min || Result > Max;
+
+ if (Overflow)
+ *Overflow = Overflowed;
+
+ return APFixedPoint(Result.sextOrTrunc(Sema.getWidth()), Sema);
+}
+
+void APFixedPoint::toString(SmallVectorImpl<char> &Str) const {
+ APSInt Val = getValue();
+ unsigned Scale = getScale();
+
+ if (Val.isSigned() && Val.isNegative() && Val != -Val) {
+ Val = -Val;
+ Str.push_back('-');
+ }
+
+ APSInt IntPart = Val >> Scale;
+
+ // Add 4 digits to hold the value after multiplying 10 (the radix)
+ unsigned Width = Val.getBitWidth() + 4;
+ APInt FractPart = Val.zextOrTrunc(Scale).zext(Width);
+ APInt FractPartMask = APInt::getAllOnesValue(Scale).zext(Width);
+ APInt RadixInt = APInt(Width, 10);
+
+ IntPart.toString(Str, /*Radix=*/10);
+ Str.push_back('.');
+ do {
+ (FractPart * RadixInt)
+ .lshr(Scale)
+ .toString(Str, /*Radix=*/10, Val.isSigned());
+ FractPart = (FractPart * RadixInt) & FractPartMask;
+ } while (FractPart != 0);
+}
+
+APFixedPoint APFixedPoint::negate(bool *Overflow) const {
+ if (!isSaturated()) {
+ if (Overflow)
+ *Overflow =
+ (!isSigned() && Val != 0) || (isSigned() && Val.isMinSignedValue());
+ return APFixedPoint(-Val, Sema);
+ }
+
+ // We never overflow for saturation
+ if (Overflow)
+ *Overflow = false;
+
+ if (isSigned())
+ return Val.isMinSignedValue() ? getMax(Sema) : APFixedPoint(-Val, Sema);
+ else
+ return APFixedPoint(Sema);
+}
+
+APSInt APFixedPoint::convertToInt(unsigned DstWidth, bool DstSign,
+ bool *Overflow) const {
+ APSInt Result = getIntPart();
+ unsigned SrcWidth = getWidth();
+
+ APSInt DstMin = APSInt::getMinValue(DstWidth, !DstSign);
+ APSInt DstMax = APSInt::getMaxValue(DstWidth, !DstSign);
+
+ if (SrcWidth < DstWidth) {
+ Result = Result.extend(DstWidth);
+ } else if (SrcWidth > DstWidth) {
+ DstMin = DstMin.extend(SrcWidth);
+ DstMax = DstMax.extend(SrcWidth);
+ }
+
+ if (Overflow) {
+ if (Result.isSigned() && !DstSign) {
+ *Overflow = Result.isNegative() || Result.ugt(DstMax);
+ } else if (Result.isUnsigned() && DstSign) {
+ *Overflow = Result.ugt(DstMax);
+ } else {
+ *Overflow = Result < DstMin || Result > DstMax;
+ }
+ }
+
+ Result.setIsSigned(DstSign);
+ return Result.extOrTrunc(DstWidth);
+}
+
+const fltSemantics *APFixedPoint::promoteFloatSemantics(const fltSemantics *S) {
+ if (S == &APFloat::BFloat())
+ return &APFloat::IEEEdouble();
+ else if (S == &APFloat::IEEEhalf())
+ return &APFloat::IEEEsingle();
+ else if (S == &APFloat::IEEEsingle())
+ return &APFloat::IEEEdouble();
+ else if (S == &APFloat::IEEEdouble())
+ return &APFloat::IEEEquad();
+ llvm_unreachable("Could not promote float type!");
+}
+
+APFloat APFixedPoint::convertToFloat(const fltSemantics &FloatSema) const {
+ // For some operations, rounding mode has an effect on the result, while
+ // other operations are lossless and should never result in rounding.
+ // To signify which these operations are, we define two rounding modes here.
+ APFloat::roundingMode RM = APFloat::rmNearestTiesToEven;
+ APFloat::roundingMode LosslessRM = APFloat::rmTowardZero;
+
+ // Make sure that we are operating in a type that works with this fixed-point
+ // semantic.
+ const fltSemantics *OpSema = &FloatSema;
+ while (!Sema.fitsInFloatSemantics(*OpSema))
+ OpSema = promoteFloatSemantics(OpSema);
+
+ // Convert the fixed point value bits as an integer. If the floating point
+ // value does not have the required precision, we will round according to the
+ // given mode.
+ APFloat Flt(*OpSema);
+ APFloat::opStatus S = Flt.convertFromAPInt(Val, Sema.isSigned(), RM);
+
+ // If we cared about checking for precision loss, we could look at this
+ // status.
+ (void)S;
+
+ // Scale down the integer value in the float to match the correct scaling
+ // factor.
+ APFloat ScaleFactor(std::pow(2, -(int)Sema.getScale()));
+ bool Ignored;
+ ScaleFactor.convert(*OpSema, LosslessRM, &Ignored);
+ Flt.multiply(ScaleFactor, LosslessRM);
+
+ if (OpSema != &FloatSema)
+ Flt.convert(FloatSema, RM, &Ignored);
+
+ return Flt;
+}
+
+APFixedPoint APFixedPoint::getFromIntValue(const APSInt &Value,
+ const FixedPointSemantics &DstFXSema,
+ bool *Overflow) {
+ FixedPointSemantics IntFXSema = FixedPointSemantics::GetIntegerSemantics(
+ Value.getBitWidth(), Value.isSigned());
+ return APFixedPoint(Value, IntFXSema).convert(DstFXSema, Overflow);
+}
+
+APFixedPoint
+APFixedPoint::getFromFloatValue(const APFloat &Value,
+ const FixedPointSemantics &DstFXSema,
+ bool *Overflow) {
+ // For some operations, rounding mode has an effect on the result, while
+ // other operations are lossless and should never result in rounding.
+ // To signify which these operations are, we define two rounding modes here,
+ // even though they are the same mode.
+ APFloat::roundingMode RM = APFloat::rmTowardZero;
+ APFloat::roundingMode LosslessRM = APFloat::rmTowardZero;
+
+ const fltSemantics &FloatSema = Value.getSemantics();
+
+ if (Value.isNaN()) {
+ // Handle NaN immediately.
+ if (Overflow)
+ *Overflow = true;
+ return APFixedPoint(DstFXSema);
+ }
+
+ // Make sure that we are operating in a type that works with this fixed-point
+ // semantic.
+ const fltSemantics *OpSema = &FloatSema;
+ while (!DstFXSema.fitsInFloatSemantics(*OpSema))
+ OpSema = promoteFloatSemantics(OpSema);
+
+ APFloat Val = Value;
+
+ bool Ignored;
+ if (&FloatSema != OpSema)
+ Val.convert(*OpSema, LosslessRM, &Ignored);
+
+ // Scale up the float so that the 'fractional' part of the mantissa ends up in
+ // the integer range instead. Rounding mode is irrelevant here.
+ // It is fine if this overflows to infinity even for saturating types,
+ // since we will use floating point comparisons to check for saturation.
+ APFloat ScaleFactor(std::pow(2, DstFXSema.getScale()));
+ ScaleFactor.convert(*OpSema, LosslessRM, &Ignored);
+ Val.multiply(ScaleFactor, LosslessRM);
+
+ // Convert to the integral representation of the value. This rounding mode
+ // is significant.
+ APSInt Res(DstFXSema.getWidth(), !DstFXSema.isSigned());
+ Val.convertToInteger(Res, RM, &Ignored);
+
+ // Round the integral value and scale back. This makes the
+ // overflow calculations below work properly. If we do not round here,
+ // we risk checking for overflow with a value that is outside the
+ // representable range of the fixed-point semantic even though no overflow
+ // would occur had we rounded first.
+ ScaleFactor = APFloat(std::pow(2, -(int)DstFXSema.getScale()));
+ ScaleFactor.convert(*OpSema, LosslessRM, &Ignored);
+ Val.roundToIntegral(RM);
+ Val.multiply(ScaleFactor, LosslessRM);
+
+ // Check for overflow/saturation by checking if the floating point value
+ // is outside the range representable by the fixed-point value.
+ APFloat FloatMax = getMax(DstFXSema).convertToFloat(*OpSema);
+ APFloat FloatMin = getMin(DstFXSema).convertToFloat(*OpSema);
+ bool Overflowed = false;
+ if (DstFXSema.isSaturated()) {
+ if (Val > FloatMax)
+ Res = getMax(DstFXSema).getValue();
+ else if (Val < FloatMin)
+ Res = getMin(DstFXSema).getValue();
+ } else
+ Overflowed = Val > FloatMax || Val < FloatMin;
+
+ if (Overflow)
+ *Overflow = Overflowed;
+
+ return APFixedPoint(Res, DstFXSema);
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
index 362595d8f8b1..5dea98ee3993 100644
--- a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
@@ -755,6 +755,7 @@ void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
category = fcNaN;
sign = Negative;
+ exponent = exponentNaN();
integerPart *significand = significandParts();
unsigned numParts = partCount();
@@ -841,7 +842,7 @@ bool IEEEFloat::isSignificandAllOnes() const {
// Test if the significand excluding the integral bit is all ones. This allows
// us to test for binade boundaries.
const integerPart *Parts = significandParts();
- const unsigned PartCount = partCount();
+ const unsigned PartCount = partCountForBits(semantics->precision);
for (unsigned i = 0; i < PartCount - 1; i++)
if (~Parts[i])
return false;
@@ -849,8 +850,8 @@ bool IEEEFloat::isSignificandAllOnes() const {
// Set the unused high bits to all ones when we compare.
const unsigned NumHighBits =
PartCount*integerPartWidth - semantics->precision + 1;
- assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
- "fill than integerPartWidth");
+ assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
+ "Can not have more high bits to fill than integerPartWidth");
const integerPart HighBitFill =
~integerPart(0) << (integerPartWidth - NumHighBits);
if (~(Parts[PartCount - 1] | HighBitFill))
@@ -863,15 +864,16 @@ bool IEEEFloat::isSignificandAllZeros() const {
// Test if the significand excluding the integral bit is all zeros. This
// allows us to test for binade boundaries.
const integerPart *Parts = significandParts();
- const unsigned PartCount = partCount();
+ const unsigned PartCount = partCountForBits(semantics->precision);
for (unsigned i = 0; i < PartCount - 1; i++)
if (Parts[i])
return false;
+ // Compute how many bits are used in the final word.
const unsigned NumHighBits =
PartCount*integerPartWidth - semantics->precision + 1;
- assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
+ assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
"clear than integerPartWidth");
const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
@@ -925,8 +927,7 @@ IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
initialize(&ourSemantics);
- category = fcZero;
- sign = false;
+ makeZero(false);
}
// Delegate to the previous constructor, because later copy constructor may
@@ -2242,26 +2243,15 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
if (!X86SpecialNan && semantics == &semX87DoubleExtended)
APInt::tcSetBit(significandParts(), semantics->precision - 1);
- // If we are truncating NaN, it is possible that we shifted out all of the
- // set bits in a signalling NaN payload. But NaN must remain NaN, so some
- // bit in the significand must be set (otherwise it is Inf).
- // This can only happen with sNaN. Set the 1st bit after the quiet bit,
- // so that we still have an sNaN.
- // FIXME: Set quiet and return opInvalidOp (on convert of any sNaN).
- // But this requires fixing LLVM to parse 32-bit hex FP or ignoring
- // conversions while parsing IR.
- if (APInt::tcIsZero(significandParts(), newPartCount)) {
- assert(shift < 0 && "Should not lose NaN payload on extend");
- assert(semantics->precision >= 3 && "Unexpectedly narrow significand");
- assert(*losesInfo && "Missing payload should have set lost info");
- APInt::tcSetBit(significandParts(), semantics->precision - 3);
+ // Convert of sNaN creates qNaN and raises an exception (invalid op).
+ // This also guarantees that a sNaN does not become Inf on a truncation
+ // that loses all payload bits.
+ if (isSignaling()) {
+ makeQuiet();
+ fs = opInvalidOp;
+ } else {
+ fs = opOK;
}
-
- // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
- // does not give you back the same bits. This is dubious, and we
- // don't currently do it. You're really supposed to get
- // an invalid operation signal at runtime, but nobody does that.
- fs = opOK;
} else {
*losesInfo = false;
fs = opOK;
@@ -3394,15 +3384,13 @@ void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
sign = static_cast<unsigned int>(i2>>15);
if (myexponent == 0 && mysignificand == 0) {
- // exponent, significand meaningless
- category = fcZero;
+ makeZero(sign);
} else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
- // exponent, significand meaningless
- category = fcInfinity;
+ makeInf(sign);
} else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
(myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
- // exponent meaningless
category = fcNaN;
+ exponent = exponentNaN();
significandParts()[0] = mysignificand;
significandParts()[1] = 0;
} else {
@@ -3453,16 +3441,14 @@ void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
sign = static_cast<unsigned int>(i2>>63);
if (myexponent==0 &&
(mysignificand==0 && mysignificand2==0)) {
- // exponent, significand meaningless
- category = fcZero;
+ makeZero(sign);
} else if (myexponent==0x7fff &&
(mysignificand==0 && mysignificand2==0)) {
- // exponent, significand meaningless
- category = fcInfinity;
+ makeInf(sign);
} else if (myexponent==0x7fff &&
(mysignificand!=0 || mysignificand2 !=0)) {
- // exponent meaningless
category = fcNaN;
+ exponent = exponentNaN();
significandParts()[0] = mysignificand;
significandParts()[1] = mysignificand2;
} else {
@@ -3488,14 +3474,12 @@ void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
sign = static_cast<unsigned int>(i>>63);
if (myexponent==0 && mysignificand==0) {
- // exponent, significand meaningless
- category = fcZero;
+ makeZero(sign);
} else if (myexponent==0x7ff && mysignificand==0) {
- // exponent, significand meaningless
- category = fcInfinity;
+ makeInf(sign);
} else if (myexponent==0x7ff && mysignificand!=0) {
- // exponent meaningless
category = fcNaN;
+ exponent = exponentNaN();
*significandParts() = mysignificand;
} else {
category = fcNormal;
@@ -3519,14 +3503,12 @@ void IEEEFloat::initFromFloatAPInt(const APInt &api) {
sign = i >> 31;
if (myexponent==0 && mysignificand==0) {
- // exponent, significand meaningless
- category = fcZero;
+ makeZero(sign);
} else if (myexponent==0xff && mysignificand==0) {
- // exponent, significand meaningless
- category = fcInfinity;
+ makeInf(sign);
} else if (myexponent==0xff && mysignificand!=0) {
- // sign, exponent, significand meaningless
category = fcNaN;
+ exponent = exponentNaN();
*significandParts() = mysignificand;
} else {
category = fcNormal;
@@ -3550,14 +3532,12 @@ void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
sign = i >> 15;
if (myexponent == 0 && mysignificand == 0) {
- // exponent, significand meaningless
- category = fcZero;
+ makeZero(sign);
} else if (myexponent == 0xff && mysignificand == 0) {
- // exponent, significand meaningless
- category = fcInfinity;
+ makeInf(sign);
} else if (myexponent == 0xff && mysignificand != 0) {
- // sign, exponent, significand meaningless
category = fcNaN;
+ exponent = exponentNaN();
*significandParts() = mysignificand;
} else {
category = fcNormal;
@@ -3581,14 +3561,12 @@ void IEEEFloat::initFromHalfAPInt(const APInt &api) {
sign = i >> 15;
if (myexponent==0 && mysignificand==0) {
- // exponent, significand meaningless
- category = fcZero;
+ makeZero(sign);
} else if (myexponent==0x1f && mysignificand==0) {
- // exponent, significand meaningless
- category = fcInfinity;
+ makeInf(sign);
} else if (myexponent==0x1f && mysignificand!=0) {
- // sign, exponent, significand meaningless
category = fcNaN;
+ exponent = exponentNaN();
*significandParts() = mysignificand;
} else {
category = fcNormal;
@@ -4146,17 +4124,29 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
return result;
}
+APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
+ return semantics->maxExponent + 1;
+}
+
+APFloatBase::ExponentType IEEEFloat::exponentInf() const {
+ return semantics->maxExponent + 1;
+}
+
+APFloatBase::ExponentType IEEEFloat::exponentZero() const {
+ return semantics->minExponent - 1;
+}
+
void IEEEFloat::makeInf(bool Negative) {
category = fcInfinity;
sign = Negative;
- exponent = semantics->maxExponent + 1;
+ exponent = exponentInf();
APInt::tcSet(significandParts(), 0, partCount());
}
void IEEEFloat::makeZero(bool Negative) {
category = fcZero;
sign = Negative;
- exponent = semantics->minExponent-1;
+ exponent = exponentZero();
APInt::tcSet(significandParts(), 0, partCount());
}
@@ -4884,6 +4874,6 @@ APFloat::opStatus APFloat::convertToInteger(APSInt &result,
return status;
}
-} // End llvm namespace
+} // namespace llvm
#undef APFLOAT_DISPATCH_ON_SEMANTICS
diff --git a/contrib/llvm-project/llvm/lib/Support/APInt.cpp b/contrib/llvm-project/llvm/lib/Support/APInt.cpp
index 9a6f93feaa29..12ceb2df112e 100644
--- a/contrib/llvm-project/llvm/lib/Support/APInt.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/APInt.cpp
@@ -338,8 +338,7 @@ void APInt::flipAllBitsSlowCase() {
/// Toggles a given bit to its opposite value.
void APInt::flipBit(unsigned bitPosition) {
assert(bitPosition < BitWidth && "Out of the bit-width range!");
- if ((*this)[bitPosition]) clearBit(bitPosition);
- else setBit(bitPosition);
+ setBitVal(bitPosition, !(*this)[bitPosition]);
}
void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
@@ -393,12 +392,8 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
// General case - set/clear individual bits in dst based on src.
// TODO - there is scope for optimization here, but at the moment this code
// path is barely used so prefer readability over performance.
- for (unsigned i = 0; i != subBitWidth; ++i) {
- if (subBits[i])
- setBit(bitPosition + i);
- else
- clearBit(bitPosition + i);
- }
+ for (unsigned i = 0; i != subBitWidth; ++i)
+ setBitVal(bitPosition + i, subBits[i]);
}
void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
@@ -966,6 +961,12 @@ APInt APInt::sextOrTrunc(unsigned width) const {
return *this;
}
+APInt APInt::truncOrSelf(unsigned width) const {
+ if (BitWidth > width)
+ return trunc(width);
+ return *this;
+}
+
APInt APInt::zextOrSelf(unsigned width) const {
if (BitWidth < width)
return zext(width);
diff --git a/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp b/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp
index 17ad38d22614..459691923af8 100644
--- a/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp
@@ -113,7 +113,7 @@ Error ARMAttributeParser::ARM_ISA_use(AttrType tag) {
}
Error ARMAttributeParser::THUMB_ISA_use(AttrType tag) {
- static const char *strings[] = {"Not Permitted", "Thumb-1", "Thumb-2"};
+ static const char *strings[] = {"Not Permitted", "Thumb-1", "Thumb-2", "Permitted"};
return parseStringAttribute("THUMB_ISA_use", tag, makeArrayRef(strings));
}
diff --git a/contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp b/contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp
index 56a91f7dc787..eb425cbb1d25 100644
--- a/contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp
@@ -76,6 +76,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) {
case ArchKind::ARMV8_4A:
case ArchKind::ARMV8_5A:
case ArchKind::ARMV8_6A:
+ case ArchKind::ARMV8_7A:
case ArchKind::ARMV8R:
case ArchKind::ARMV8MBaseline:
case ArchKind::ARMV8MMainline:
@@ -111,6 +112,7 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
case ArchKind::ARMV8_4A:
case ArchKind::ARMV8_5A:
case ArchKind::ARMV8_6A:
+ case ArchKind::ARMV8_7A:
return ProfileKind::A;
case ArchKind::ARMV2:
case ArchKind::ARMV2A:
@@ -154,6 +156,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v8.4a", "v8.4-a")
.Case("v8.5a", "v8.5-a")
.Case("v8.6a", "v8.6-a")
+ .Case("v8.7a", "v8.7-a")
.Case("v8r", "v8-r")
.Case("v8m.base", "v8-m.base")
.Case("v8m.main", "v8-m.main")
@@ -255,7 +258,7 @@ ARM::ISAKind ARM::parseArchISA(StringRef Arch) {
unsigned ARM::parseFPU(StringRef FPU) {
StringRef Syn = getFPUSynonym(FPU);
- for (const auto F : FPUNames) {
+ for (const auto &F : FPUNames) {
if (Syn == F.getName())
return F.ID;
}
@@ -280,6 +283,8 @@ StringRef ARM::getCanonicalArchName(StringRef Arch) {
// Begins with "arm" / "thumb", move past it.
if (A.startswith("arm64_32"))
offset = 8;
+ else if (A.startswith("arm64e"))
+ offset = 6;
else if (A.startswith("arm64"))
offset = 5;
else if (A.startswith("aarch64_32"))
@@ -409,7 +414,7 @@ bool ARM::getExtensionFeatures(uint64_t Extensions,
if (Extensions == AEK_INVALID)
return false;
- for (const auto AE : ARCHExtNames) {
+ for (const auto &AE : ARCHExtNames) {
if ((Extensions & AE.ID) == AE.ID && AE.Feature)
Features.push_back(AE.Feature);
else if (AE.NegFeature)
@@ -436,7 +441,7 @@ unsigned ARM::getArchAttr(ARM::ArchKind AK) {
}
StringRef ARM::getArchExtName(uint64_t ArchExtKind) {
- for (const auto AE : ARCHExtNames) {
+ for (const auto &AE : ARCHExtNames) {
if (ArchExtKind == AE.ID)
return AE.getName();
}
@@ -453,7 +458,7 @@ static bool stripNegationPrefix(StringRef &Name) {
StringRef ARM::getArchExtFeature(StringRef ArchExt) {
bool Negated = stripNegationPrefix(ArchExt);
- for (const auto AE : ARCHExtNames) {
+ for (const auto &AE : ARCHExtNames) {
if (AE.Feature && ArchExt == AE.getName())
return StringRef(Negated ? AE.NegFeature : AE.Feature);
}
@@ -490,9 +495,10 @@ static unsigned findDoublePrecisionFPU(unsigned InputFPUKind) {
return ARM::FK_INVALID;
}
-bool ARM::appendArchExtFeatures(
- StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
- std::vector<StringRef> &Features) {
+bool ARM::appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK,
+ StringRef ArchExt,
+ std::vector<StringRef> &Features,
+ unsigned &ArgFPUID) {
size_t StartingNumFeatures = Features.size();
const bool Negated = stripNegationPrefix(ArchExt);
@@ -501,7 +507,7 @@ bool ARM::appendArchExtFeatures(
if (ID == AEK_INVALID)
return false;
- for (const auto AE : ARCHExtNames) {
+ for (const auto &AE : ARCHExtNames) {
if (Negated) {
if ((AE.ID & ID) == ID && AE.NegFeature)
Features.push_back(AE.NegFeature);
@@ -527,13 +533,14 @@ bool ARM::appendArchExtFeatures(
} else {
FPUKind = getDefaultFPU(CPU, AK);
}
+ ArgFPUID = FPUKind;
return ARM::getFPUFeatures(FPUKind, Features);
}
return StartingNumFeatures != Features.size();
}
StringRef ARM::getHWDivName(uint64_t HWDivKind) {
- for (const auto D : HWDivNames) {
+ for (const auto &D : HWDivNames) {
if (HWDivKind == D.ID)
return D.getName();
}
@@ -546,7 +553,7 @@ StringRef ARM::getDefaultCPU(StringRef Arch) {
return StringRef();
// Look for multiple AKs to find the default for pair AK+Name.
- for (const auto CPU : CPUNames) {
+ for (const auto &CPU : CPUNames) {
if (CPU.ArchID == AK && CPU.Default)
return CPU.getName();
}
@@ -557,7 +564,7 @@ StringRef ARM::getDefaultCPU(StringRef Arch) {
uint64_t ARM::parseHWDiv(StringRef HWDiv) {
StringRef Syn = getHWDivSynonym(HWDiv);
- for (const auto D : HWDivNames) {
+ for (const auto &D : HWDivNames) {
if (Syn == D.getName())
return D.ID;
}
@@ -565,7 +572,7 @@ uint64_t ARM::parseHWDiv(StringRef HWDiv) {
}
uint64_t ARM::parseArchExt(StringRef ArchExt) {
- for (const auto A : ARCHExtNames) {
+ for (const auto &A : ARCHExtNames) {
if (ArchExt == A.getName())
return A.ID;
}
@@ -573,7 +580,7 @@ uint64_t ARM::parseArchExt(StringRef ArchExt) {
}
ARM::ArchKind ARM::parseCPUArch(StringRef CPU) {
- for (const auto C : CPUNames) {
+ for (const auto &C : CPUNames) {
if (CPU == C.getName())
return C.ArchID;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/CRC.cpp b/contrib/llvm-project/llvm/lib/Support/CRC.cpp
index 7ff09debe3b7..2bc668beed32 100644
--- a/contrib/llvm-project/llvm/lib/Support/CRC.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CRC.cpp
@@ -25,7 +25,7 @@
using namespace llvm;
-#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H
+#if !LLVM_ENABLE_ZLIB
static const uint32_t CRCTable[256] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
diff --git a/contrib/llvm-project/llvm/lib/Support/CachePruning.cpp b/contrib/llvm-project/llvm/lib/Support/CachePruning.cpp
index 7663644db558..5c5759ffbaca 100644
--- a/contrib/llvm-project/llvm/lib/Support/CachePruning.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CachePruning.cpp
@@ -211,11 +211,12 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
// Walk all of the files within this directory.
for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd;
File != FileEnd && !EC; File.increment(EC)) {
- // Ignore any files not beginning with the string "llvmcache-". This
+ // Ignore filenames not beginning with "llvmcache-" or "Thin-". This
// includes the timestamp file as well as any files created by the user.
// This acts as a safeguard against data loss if the user specifies the
// wrong directory as their cache directory.
- if (!sys::path::filename(File->path()).startswith("llvmcache-"))
+ StringRef filename = sys::path::filename(File->path());
+ if (!filename.startswith("llvmcache-") && !filename.startswith("Thin-"))
continue;
// Look at this file. If we can't stat it, there's nothing interesting
diff --git a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
index 12ef0d511b14..123a23a5242c 100644
--- a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
@@ -464,7 +464,7 @@ void Option::addCategory(OptionCategory &C) {
// must be explicitly added if you want multiple categories that include it.
if (&C != &GeneralCategory && Categories[0] == &GeneralCategory)
Categories[0] = &C;
- else if (find(Categories, &C) == Categories.end())
+ else if (!is_contained(Categories, &C))
Categories.push_back(&C);
}
@@ -531,11 +531,7 @@ Option *CommandLineParser::LookupOption(SubCommand &Sub, StringRef &Arg,
// If we have an equals sign, remember the value.
if (EqualPos == StringRef::npos) {
// Look up the option.
- auto I = Sub.OptionsMap.find(Arg);
- if (I == Sub.OptionsMap.end())
- return nullptr;
-
- return I != Sub.OptionsMap.end() ? I->second : nullptr;
+ return Sub.OptionsMap.lookup(Arg);
}
// If the argument before the = is a valid option name and the option allows
@@ -832,7 +828,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
// Consume runs of whitespace.
if (Token.empty()) {
while (I != E && isWhitespace(Src[I])) {
- // Mark the end of lines in response files
+ // Mark the end of lines in response files.
if (MarkEOLs && Src[I] == '\n')
NewArgv.push_back(nullptr);
++I;
@@ -869,6 +865,9 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
if (isWhitespace(C)) {
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
+ // Mark the end of lines in response files.
+ if (MarkEOLs && C == '\n')
+ NewArgv.push_back(nullptr);
Token.clear();
continue;
}
@@ -880,9 +879,6 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
// Append the last token after hitting EOF with no whitespace.
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
- // Mark the end of response files
- if (MarkEOLs)
- NewArgv.push_back(nullptr);
}
/// Backslashes are interpreted in a rather complicated way in the Windows-style
@@ -956,11 +952,11 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
++I;
StringRef NormalChars = Src.slice(Start, I);
if (I >= E || isWhitespaceOrNull(Src[I])) {
- if (I < E && Src[I] == '\n')
- MarkEOL();
// No special characters: slice out the substring and start the next
// token. Copy the string if the caller asks us to.
AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars);
+ if (I < E && Src[I] == '\n')
+ MarkEOL();
} else if (Src[I] == '\"') {
Token += NormalChars;
State = QUOTED;
@@ -1208,7 +1204,7 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
};
// Check for recursive response files.
- if (std::any_of(FileStack.begin() + 1, FileStack.end(), IsEquivalent)) {
+ if (any_of(drop_begin(FileStack), IsEquivalent)) {
// This file is recursive, so we leave it in the argument stream and
// move on.
AllExpanded = false;
@@ -1251,6 +1247,22 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
return AllExpanded;
}
+bool cl::expandResponseFiles(int Argc, const char *const *Argv,
+ const char *EnvVar, StringSaver &Saver,
+ SmallVectorImpl<const char *> &NewArgv) {
+ auto Tokenize = Triple(sys::getProcessTriple()).isOSWindows()
+ ? cl::TokenizeWindowsCommandLine
+ : cl::TokenizeGNUCommandLine;
+ // The environment variable specifies initial options.
+ if (EnvVar)
+ if (llvm::Optional<std::string> EnvValue = sys::Process::GetEnv(EnvVar))
+ Tokenize(*EnvValue, Saver, NewArgv, /*MarkEOLs=*/false);
+
+ // Command line options can override the environment variable.
+ NewArgv.append(Argv + 1, Argv + Argc);
+ return ExpandResponseFiles(Saver, Tokenize, NewArgv);
+}
+
bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver,
SmallVectorImpl<const char *> &Argv) {
SmallString<128> AbsPath;
@@ -1271,36 +1283,6 @@ bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver,
/*MarkEOLs*/ false, /*RelativeNames*/ true);
}
-/// ParseEnvironmentOptions - An alternative entry point to the
-/// CommandLine library, which allows you to read the program's name
-/// from the caller (as PROGNAME) and its command-line arguments from
-/// an environment variable (whose name is given in ENVVAR).
-///
-void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
- const char *Overview) {
- // Check args.
- assert(progName && "Program name not specified");
- assert(envVar && "Environment variable name missing");
-
- // Get the environment variable they want us to parse options out of.
- llvm::Optional<std::string> envValue = sys::Process::GetEnv(StringRef(envVar));
- if (!envValue)
- return;
-
- // Get program's "name", which we wouldn't know without the caller
- // telling us.
- SmallVector<const char *, 20> newArgv;
- BumpPtrAllocator A;
- StringSaver Saver(A);
- newArgv.push_back(Saver.save(progName).data());
-
- // Parse the value of the environment variable into a "command line"
- // and hand it off to ParseCommandLineOptions().
- TokenizeGNUCommandLine(*envValue, Saver, newArgv);
- int newArgc = static_cast<int>(newArgv.size());
- ParseCommandLineOptions(newArgc, &newArgv[0], StringRef(Overview));
-}
-
bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
StringRef Overview, raw_ostream *Errs,
const char *EnvVar,
@@ -1744,6 +1726,19 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
}
}
+void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent,
+ size_t FirstLineIndentedBy) {
+ const StringRef ValHelpPrefix = " ";
+ assert(BaseIndent >= FirstLineIndentedBy);
+ std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
+ outs().indent(BaseIndent - FirstLineIndentedBy)
+ << ArgHelpPrefix << ValHelpPrefix << Split.first << "\n";
+ while (!Split.second.empty()) {
+ Split = Split.second.split('\n');
+ outs().indent(BaseIndent + ValHelpPrefix.size()) << Split.first << "\n";
+ }
+}
+
// Print out the option for the alias.
void alias::printOptionInfo(size_t GlobalWidth) const {
outs() << PrintArg(ArgStr);
@@ -1989,17 +1984,17 @@ void generic_parser_base::printOptionInfo(const Option &O,
StringRef Description = getDescription(i);
if (!shouldPrintOption(OptionName, Description, O))
continue;
- assert(GlobalWidth >= OptionName.size() + OptionPrefixesSize);
- size_t NumSpaces = GlobalWidth - OptionName.size() - OptionPrefixesSize;
+ size_t FirstLineIndent = OptionName.size() + OptionPrefixesSize;
outs() << OptionPrefix << OptionName;
if (OptionName.empty()) {
outs() << EmptyOption;
- assert(NumSpaces >= EmptyOption.size());
- NumSpaces -= EmptyOption.size();
+ assert(FirstLineIndent >= EmptyOption.size());
+ FirstLineIndent += EmptyOption.size();
}
if (!Description.empty())
- outs().indent(NumSpaces) << ArgHelpPrefix << " " << Description;
- outs() << '\n';
+ Option::printEnumValHelpStr(Description, GlobalWidth, FirstLineIndent);
+ else
+ outs() << '\n';
}
} else {
if (!O.HelpStr.empty())
@@ -2604,7 +2599,7 @@ void cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *> Categories,
SubCommand &Sub) {
for (auto &I : Sub.OptionsMap) {
for (auto &Cat : I.second->Categories) {
- if (find(Categories, Cat) == Categories.end() && Cat != &GenericCategory)
+ if (!is_contained(Categories, Cat) && Cat != &GenericCategory)
I.second->setHiddenFlag(cl::ReallyHidden);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Compression.cpp b/contrib/llvm-project/llvm/lib/Support/Compression.cpp
index 27d92f0e0aec..b8c77cf69b95 100644
--- a/contrib/llvm-project/llvm/lib/Support/Compression.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Compression.cpp
@@ -17,13 +17,13 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H
+#if LLVM_ENABLE_ZLIB
#include <zlib.h>
#endif
using namespace llvm;
-#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
+#if LLVM_ENABLE_ZLIB
static Error createError(StringRef Err) {
return make_error<StringError>(Err, inconvertibleErrorCode());
}
diff --git a/contrib/llvm-project/llvm/lib/Support/ConvertUTFWrapper.cpp b/contrib/llvm-project/llvm/lib/Support/ConvertUTFWrapper.cpp
index 6ec567882ea6..d8d46712a593 100644
--- a/contrib/llvm-project/llvm/lib/Support/ConvertUTFWrapper.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ConvertUTFWrapper.cpp
@@ -97,6 +97,8 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
const UTF16 *Src = reinterpret_cast<const UTF16 *>(SrcBytes.begin());
const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(SrcBytes.end());
+ assert((uintptr_t)Src % sizeof(UTF16) == 0);
+
// Byteswap if necessary.
std::vector<UTF16> ByteSwapped;
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
diff --git a/contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp
index ec7d7d641dce..3d3ca7f567c7 100644
--- a/contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -9,14 +9,12 @@
#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ExitCodes.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/ThreadLocal.h"
#include <mutex>
#include <setjmp.h>
-#if LLVM_ON_UNIX
-#include <sysexits.h> // EX_IOERR
-#endif
using namespace llvm;
@@ -97,6 +95,13 @@ static void uninstallExceptionOrSignalHandlers();
CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
+CrashRecoveryContext::CrashRecoveryContext() {
+ // On Windows, if abort() was previously triggered (and caught by a previous
+ // CrashRecoveryContext) the Windows CRT removes our installed signal handler,
+ // so we need to install it again.
+ sys::DisableSystemDialogsOnCrash();
+}
+
CrashRecoveryContext::~CrashRecoveryContext() {
// Reclaim registered resources.
CrashRecoveryContextCleanup *i = head;
@@ -370,9 +375,10 @@ static void CrashRecoverySignalHandler(int Signal) {
sigaddset(&SigMask, Signal);
sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);
- // As per convention, -2 indicates a crash or timeout as opposed to failure to
- // execute (see llvm/include/llvm/Support/Program.h)
- int RetCode = -2;
+ // Return the same error code as if the program crashed, as mentioned in the
+ // section "Exit Status for Commands":
+ // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
+ int RetCode = 128 + Signal;
// Don't consider a broken pipe as a crash (see clang/lib/Driver/Driver.cpp)
if (Signal == SIGPIPE)
@@ -436,6 +442,27 @@ void CrashRecoveryContext::HandleExit(int RetCode) {
llvm_unreachable("Most likely setjmp wasn't called!");
}
+bool CrashRecoveryContext::throwIfCrash(int RetCode) {
+#if defined(_WIN32)
+ // On Windows, the high bits are reserved for kernel return codes. Values
+ // starting with 0x80000000 are reserved for "warnings"; values of 0xC0000000
+ // and up are for "errors". In practice, both are interpreted as a
+ // non-continuable signal.
+ unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28;
+ if (Code != 0xC && Code != 8)
+ return false;
+ ::RaiseException(RetCode, 0, 0, NULL);
+#else
+ // On Unix, signals are represented by return codes of 128 or higher.
+ // Exit code 128 is a reserved value and should not be raised as a signal.
+ if (RetCode <= 128)
+ return false;
+ llvm::sys::unregisterHandlers();
+ raise(RetCode - 128);
+#endif
+ return true;
+}
+
// FIXME: Portability.
static void setThreadBackgroundPriority() {
#ifdef __APPLE__
diff --git a/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp b/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp
index 8c579f395282..7bb231c79239 100644
--- a/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp
@@ -118,7 +118,7 @@ void DebugCounter::push_back(const std::string &Val) {
void DebugCounter::print(raw_ostream &OS) const {
SmallVector<StringRef, 16> CounterNames(RegisteredCounters.begin(),
RegisteredCounters.end());
- sort(CounterNames.begin(), CounterNames.end());
+ sort(CounterNames);
auto &Us = instance();
OS << "Counters and values:\n";
diff --git a/contrib/llvm-project/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm-project/llvm/lib/Support/DynamicLibrary.cpp
index d23716016fb2..bdf74623670b 100644
--- a/contrib/llvm-project/llvm/lib/Support/DynamicLibrary.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/DynamicLibrary.cpp
@@ -39,9 +39,7 @@ public:
HandleSet() : Process(nullptr) {}
~HandleSet();
- HandleList::iterator Find(void *Handle) {
- return std::find(Handles.begin(), Handles.end(), Handle);
- }
+ HandleList::iterator Find(void *Handle) { return find(Handles, Handle); }
bool Contains(void *Handle) {
return Handle == Process || Find(Handle) != Handles.end();
diff --git a/contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp b/contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp
index df955cdf5d30..2a30794bc1e9 100644
--- a/contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp
@@ -200,7 +200,7 @@ Error ELFAttributeParser::parse(ArrayRef<uint8_t> section,
// Unrecognized format-version.
uint8_t formatVersion = de.getU8(cursor);
- if (formatVersion != 'A')
+ if (formatVersion != ELFAttrs::Format_Version)
return createStringError(errc::invalid_argument,
"unrecognized format-version: 0x" +
utohexstr(formatVersion));
diff --git a/contrib/llvm-project/llvm/lib/Support/Error.cpp b/contrib/llvm-project/llvm/lib/Support/Error.cpp
index 315a11e967d1..e7ab4387dfd1 100644
--- a/contrib/llvm-project/llvm/lib/Support/Error.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Error.cpp
@@ -168,3 +168,7 @@ void LLVMDisposeErrorMessage(char *ErrMsg) { delete[] ErrMsg; }
LLVMErrorTypeId LLVMGetStringErrorTypeId() {
return reinterpret_cast<void *>(&StringError::ID);
}
+
+LLVMErrorRef LLVMCreateStringError(const char *ErrMsg) {
+ return wrap(make_error<StringError>(ErrMsg, inconvertibleErrorCode()));
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/ErrorHandling.cpp b/contrib/llvm-project/llvm/lib/Support/ErrorHandling.cpp
index f70a6921a41a..ce6344284f06 100644
--- a/contrib/llvm-project/llvm/lib/Support/ErrorHandling.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ErrorHandling.cpp
@@ -168,9 +168,11 @@ void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) {
#else
// Don't call the normal error handler. It may allocate memory. Directly write
// an OOM to stderr and abort.
- char OOMMessage[] = "LLVM ERROR: out of memory\n";
- ssize_t written = ::write(2, OOMMessage, strlen(OOMMessage));
- (void)written;
+ const char *OOMMessage = "LLVM ERROR: out of memory\n";
+ const char *Newline = "\n";
+ (void)!::write(2, OOMMessage, strlen(OOMMessage));
+ (void)!::write(2, Reason, strlen(Reason));
+ (void)!::write(2, Newline, strlen(Newline));
abort();
#endif
}
@@ -192,7 +194,8 @@ static void out_of_memory_new_handler() {
void llvm::install_out_of_memory_new_handler() {
std::new_handler old = std::set_new_handler(out_of_memory_new_handler);
(void)old;
- assert(old == nullptr && "new-handler already installed");
+ assert((old == nullptr || old == out_of_memory_new_handler) &&
+ "new-handler already installed");
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/Support/FileCollector.cpp b/contrib/llvm-project/llvm/lib/Support/FileCollector.cpp
index 59755556a5a3..99482075f675 100644
--- a/contrib/llvm-project/llvm/lib/Support/FileCollector.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/FileCollector.cpp
@@ -15,6 +15,22 @@
using namespace llvm;
+FileCollectorBase::FileCollectorBase() = default;
+FileCollectorBase::~FileCollectorBase() = default;
+
+void FileCollectorBase::addFile(const Twine &File) {
+ std::lock_guard<std::mutex> lock(Mutex);
+ std::string FileStr = File.str();
+ if (markAsSeen(FileStr))
+ addFileImpl(FileStr);
+}
+
+void FileCollectorBase::addDirectory(const Twine &Dir) {
+ assert(sys::fs::is_directory(Dir));
+ std::error_code EC;
+ addDirectoryImpl(Dir, vfs::getRealFileSystem(), EC);
+}
+
static bool isCaseSensitivePath(StringRef Path) {
SmallString<256> TmpDest = Path, UpperDest, RealDest;
@@ -37,74 +53,82 @@ FileCollector::FileCollector(std::string Root, std::string OverlayRoot)
: Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) {
}
-bool FileCollector::getRealPath(StringRef SrcPath,
- SmallVectorImpl<char> &Result) {
+void FileCollector::PathCanonicalizer::updateWithRealPath(
+ SmallVectorImpl<char> &Path) {
+ StringRef SrcPath(Path.begin(), Path.size());
+ StringRef Filename = sys::path::filename(SrcPath);
+ StringRef Directory = sys::path::parent_path(SrcPath);
+
+ // Use real_path to fix any symbolic link component present in the directory
+ // part of the path, caching the search because computing the real path is
+ // expensive.
SmallString<256> RealPath;
- StringRef FileName = sys::path::filename(SrcPath);
- std::string Directory = sys::path::parent_path(SrcPath).str();
- auto DirWithSymlink = SymlinkMap.find(Directory);
-
- // Use real_path to fix any symbolic link component present in a path.
- // Computing the real path is expensive, cache the search through the parent
- // path Directory.
- if (DirWithSymlink == SymlinkMap.end()) {
- auto EC = sys::fs::real_path(Directory, RealPath);
- if (EC)
- return false;
- SymlinkMap[Directory] = std::string(RealPath.str());
+ auto DirWithSymlink = CachedDirs.find(Directory);
+ if (DirWithSymlink == CachedDirs.end()) {
+ // FIXME: Should this be a call to FileSystem::getRealpath(), in some
+ // cases? What if there is nothing on disk?
+ if (sys::fs::real_path(Directory, RealPath))
+ return;
+ CachedDirs[Directory] = std::string(RealPath.str());
} else {
RealPath = DirWithSymlink->second;
}
- sys::path::append(RealPath, FileName);
- Result.swap(RealPath);
- return true;
-}
-
-void FileCollector::addFile(const Twine &File) {
- std::lock_guard<std::mutex> lock(Mutex);
- std::string FileStr = File.str();
- if (markAsSeen(FileStr))
- addFileImpl(FileStr);
-}
+ // Finish recreating the path by appending the original filename, since we
+ // don't need to resolve symlinks in the filename.
+ //
+ // FIXME: If we can cope with this, maybe we can cope without calling
+ // getRealPath() at all when there's no ".." component.
+ sys::path::append(RealPath, Filename);
-void FileCollector::addDirectory(const Twine &Dir) {
- assert(sys::fs::is_directory(Dir));
- std::error_code EC;
- addDirectoryImpl(Dir, vfs::getRealFileSystem(), EC);
+ // Swap to create the output.
+ Path.swap(RealPath);
}
-void FileCollector::addFileImpl(StringRef SrcPath) {
+/// Make Path absolute.
+static void makeAbsolute(SmallVectorImpl<char> &Path) {
// We need an absolute src path to append to the root.
- SmallString<256> AbsoluteSrc = SrcPath;
- sys::fs::make_absolute(AbsoluteSrc);
+ sys::fs::make_absolute(Path);
// Canonicalize src to a native path to avoid mixed separator styles.
- sys::path::native(AbsoluteSrc);
+ sys::path::native(Path);
// Remove redundant leading "./" pieces and consecutive separators.
- AbsoluteSrc = sys::path::remove_leading_dotslash(AbsoluteSrc);
+ Path.erase(Path.begin(), sys::path::remove_leading_dotslash(
+ StringRef(Path.begin(), Path.size()))
+ .begin());
+}
- // Canonicalize the source path by removing "..", "." components.
- SmallString<256> VirtualPath = AbsoluteSrc;
- sys::path::remove_dots(VirtualPath, /*remove_dot_dot=*/true);
+FileCollector::PathCanonicalizer::PathStorage
+FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) {
+ PathStorage Paths;
+ Paths.VirtualPath = SrcPath;
+ makeAbsolute(Paths.VirtualPath);
// If a ".." component is present after a symlink component, remove_dots may
// lead to the wrong real destination path. Let the source be canonicalized
// like that but make sure we always use the real path for the destination.
- SmallString<256> CopyFrom;
- if (!getRealPath(AbsoluteSrc, CopyFrom))
- CopyFrom = VirtualPath;
+ Paths.CopyFrom = Paths.VirtualPath;
+ updateWithRealPath(Paths.CopyFrom);
+
+ // Canonicalize the virtual path by removing "..", "." components.
+ sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true);
+
+ return Paths;
+}
+
+void FileCollector::addFileImpl(StringRef SrcPath) {
+ PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath);
SmallString<256> DstPath = StringRef(Root);
- sys::path::append(DstPath, sys::path::relative_path(CopyFrom));
+ sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom));
// Always map a canonical src path to its real path into the YAML, by doing
// this we map different virtual src paths to the same entry in the VFS
// overlay, which is a way to emulate symlink inside the VFS; this is also
// needed for correctness, not doing that can lead to module redefinition
// errors.
- addFileToMapping(VirtualPath, DstPath);
+ addFileToMapping(Paths.VirtualPath, DstPath);
}
llvm::vfs::directory_iterator
@@ -158,14 +182,6 @@ std::error_code FileCollector::copyFiles(bool StopOnError) {
std::lock_guard<std::mutex> lock(Mutex);
for (auto &entry : VFSWriter.getMappings()) {
- // Create directory tree.
- if (std::error_code EC =
- sys::fs::create_directories(sys::path::parent_path(entry.RPath),
- /*IgnoreExisting=*/true)) {
- if (StopOnError)
- return EC;
- }
-
// Get the status of the original file/directory.
sys::fs::file_status Stat;
if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) {
@@ -174,6 +190,18 @@ std::error_code FileCollector::copyFiles(bool StopOnError) {
continue;
}
+ // Continue if the file doesn't exist.
+ if (Stat.type() == sys::fs::file_type::file_not_found)
+ continue;
+
+ // Create directory tree.
+ if (std::error_code EC =
+ sys::fs::create_directories(sys::path::parent_path(entry.RPath),
+ /*IgnoreExisting=*/true)) {
+ if (StopOnError)
+ return EC;
+ }
+
if (Stat.type() == sys::fs::file_type::directory_file) {
// Construct a directory when it's just a directory entry.
if (std::error_code EC =
diff --git a/contrib/llvm-project/llvm/lib/Support/FormatVariadic.cpp b/contrib/llvm-project/llvm/lib/Support/FormatVariadic.cpp
index 632e879e540d..f6d48bcd50e8 100644
--- a/contrib/llvm-project/llvm/lib/Support/FormatVariadic.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/FormatVariadic.cpp
@@ -91,27 +91,26 @@ formatv_object_base::parseReplacementItem(StringRef Spec) {
std::pair<ReplacementItem, StringRef>
formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
- std::size_t From = 0;
- while (From < Fmt.size() && From != StringRef::npos) {
- std::size_t BO = Fmt.find_first_of('{', From);
+ while (!Fmt.empty()) {
// Everything up until the first brace is a literal.
- if (BO != 0)
+ if (Fmt.front() != '{') {
+ std::size_t BO = Fmt.find_first_of('{');
return std::make_pair(ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO));
+ }
- StringRef Braces =
- Fmt.drop_front(BO).take_while([](char C) { return C == '{'; });
+ StringRef Braces = Fmt.take_while([](char C) { return C == '{'; });
// If there is more than one brace, then some of them are escaped. Treat
// these as replacements.
if (Braces.size() > 1) {
size_t NumEscapedBraces = Braces.size() / 2;
- StringRef Middle = Fmt.substr(BO, NumEscapedBraces);
- StringRef Right = Fmt.drop_front(BO + NumEscapedBraces * 2);
+ StringRef Middle = Fmt.take_front(NumEscapedBraces);
+ StringRef Right = Fmt.drop_front(NumEscapedBraces * 2);
return std::make_pair(ReplacementItem{Middle}, Right);
}
// An unterminated open brace is undefined. We treat the rest of the string
// as a literal replacement, but we assert to indicate that this is
// undefined and that we consider it an error.
- std::size_t BC = Fmt.find_first_of('}', BO);
+ std::size_t BC = Fmt.find_first_of('}');
if (BC == StringRef::npos) {
assert(
false &&
@@ -122,12 +121,12 @@ formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
// Even if there is a closing brace, if there is another open brace before
// this closing brace, treat this portion as literal, and try again with the
// next one.
- std::size_t BO2 = Fmt.find_first_of('{', BO + 1);
+ std::size_t BO2 = Fmt.find_first_of('{', 1);
if (BO2 < BC)
return std::make_pair(ReplacementItem{Fmt.substr(0, BO2)},
Fmt.substr(BO2));
- StringRef Spec = Fmt.slice(BO + 1, BC);
+ StringRef Spec = Fmt.slice(1, BC);
StringRef Right = Fmt.substr(BC + 1);
auto RI = parseReplacementItem(Spec);
@@ -136,7 +135,7 @@ formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
// If there was an error parsing the replacement item, treat it as an
// invalid replacement spec, and just continue.
- From = BC + 1;
+ Fmt = Fmt.drop_front(BC + 1);
}
return std::make_pair(ReplacementItem{Fmt}, StringRef());
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Host.cpp b/contrib/llvm-project/llvm/lib/Support/Host.cpp
index 36cecf9b2a16..09146c47ff2c 100644
--- a/contrib/llvm-project/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Host.cpp
@@ -161,11 +161,14 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
// Look for the CPU implementer line.
StringRef Implementer;
StringRef Hardware;
+ StringRef Part;
for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
if (Lines[I].startswith("CPU implementer"))
Implementer = Lines[I].substr(15).ltrim("\t :");
if (Lines[I].startswith("Hardware"))
Hardware = Lines[I].substr(8).ltrim("\t :");
+ if (Lines[I].startswith("CPU part"))
+ Part = Lines[I].substr(8).ltrim("\t :");
}
if (Implementer == "0x41") { // ARM Ltd.
@@ -175,110 +178,89 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
return "cortex-a53";
- // Look for the CPU part line.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("CPU part"))
- // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
- // values correspond to the "Part number" in the CP15/c0 register. The
- // contents are specified in the various processor manuals.
- // This corresponds to the Main ID Register in Technical Reference Manuals.
- // and is used in programs like sys-utils
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x926", "arm926ej-s")
- .Case("0xb02", "mpcore")
- .Case("0xb36", "arm1136j-s")
- .Case("0xb56", "arm1156t2-s")
- .Case("0xb76", "arm1176jz-s")
- .Case("0xc08", "cortex-a8")
- .Case("0xc09", "cortex-a9")
- .Case("0xc0f", "cortex-a15")
- .Case("0xc20", "cortex-m0")
- .Case("0xc23", "cortex-m3")
- .Case("0xc24", "cortex-m4")
- .Case("0xd22", "cortex-m55")
- .Case("0xd02", "cortex-a34")
- .Case("0xd04", "cortex-a35")
- .Case("0xd03", "cortex-a53")
- .Case("0xd07", "cortex-a57")
- .Case("0xd08", "cortex-a72")
- .Case("0xd09", "cortex-a73")
- .Case("0xd0a", "cortex-a75")
- .Case("0xd0b", "cortex-a76")
- .Case("0xd0d", "cortex-a77")
- .Case("0xd41", "cortex-a78")
- .Case("0xd44", "cortex-x1")
- .Case("0xd0c", "neoverse-n1")
- .Default("generic");
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ // This corresponds to the Main ID Register in Technical Reference Manuals.
+ // and is used in programs like sys-utils
+ return StringSwitch<const char *>(Part)
+ .Case("0x926", "arm926ej-s")
+ .Case("0xb02", "mpcore")
+ .Case("0xb36", "arm1136j-s")
+ .Case("0xb56", "arm1156t2-s")
+ .Case("0xb76", "arm1176jz-s")
+ .Case("0xc08", "cortex-a8")
+ .Case("0xc09", "cortex-a9")
+ .Case("0xc0f", "cortex-a15")
+ .Case("0xc20", "cortex-m0")
+ .Case("0xc23", "cortex-m3")
+ .Case("0xc24", "cortex-m4")
+ .Case("0xd22", "cortex-m55")
+ .Case("0xd02", "cortex-a34")
+ .Case("0xd04", "cortex-a35")
+ .Case("0xd03", "cortex-a53")
+ .Case("0xd07", "cortex-a57")
+ .Case("0xd08", "cortex-a72")
+ .Case("0xd09", "cortex-a73")
+ .Case("0xd0a", "cortex-a75")
+ .Case("0xd0b", "cortex-a76")
+ .Case("0xd0d", "cortex-a77")
+ .Case("0xd41", "cortex-a78")
+ .Case("0xd44", "cortex-x1")
+ .Case("0xd0c", "neoverse-n1")
+ .Case("0xd49", "neoverse-n2")
+ .Default("generic");
}
if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("CPU part")) {
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x516", "thunderx2t99")
- .Case("0x0516", "thunderx2t99")
- .Case("0xaf", "thunderx2t99")
- .Case("0x0af", "thunderx2t99")
- .Case("0xa1", "thunderxt88")
- .Case("0x0a1", "thunderxt88")
- .Default("generic");
- }
- }
+ return StringSwitch<const char *>(Part)
+ .Case("0x516", "thunderx2t99")
+ .Case("0x0516", "thunderx2t99")
+ .Case("0xaf", "thunderx2t99")
+ .Case("0x0af", "thunderx2t99")
+ .Case("0xa1", "thunderxt88")
+ .Case("0x0a1", "thunderxt88")
+ .Default("generic");
}
if (Implementer == "0x46") { // Fujitsu Ltd.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("CPU part")) {
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x001", "a64fx")
- .Default("generic");
- }
- }
+ return StringSwitch<const char *>(Part)
+ .Case("0x001", "a64fx")
+ .Default("generic");
}
if (Implementer == "0x4e") { // NVIDIA Corporation
- for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("CPU part")) {
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x004", "carmel")
- .Default("generic");
- }
- }
+ return StringSwitch<const char *>(Part)
+ .Case("0x004", "carmel")
+ .Default("generic");
}
if (Implementer == "0x48") // HiSilicon Technologies, Inc.
- // Look for the CPU part line.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("CPU part"))
- // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
- // values correspond to the "Part number" in the CP15/c0 register. The
- // contents are specified in the various processor manuals.
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0xd01", "tsv110")
- .Default("generic");
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ return StringSwitch<const char *>(Part)
+ .Case("0xd01", "tsv110")
+ .Default("generic");
if (Implementer == "0x51") // Qualcomm Technologies, Inc.
- // Look for the CPU part line.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("CPU part"))
- // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
- // values correspond to the "Part number" in the CP15/c0 register. The
- // contents are specified in the various processor manuals.
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x06f", "krait") // APQ8064
- .Case("0x201", "kryo")
- .Case("0x205", "kryo")
- .Case("0x211", "kryo")
- .Case("0x800", "cortex-a73")
- .Case("0x801", "cortex-a73")
- .Case("0x802", "cortex-a73")
- .Case("0x803", "cortex-a73")
- .Case("0x804", "cortex-a73")
- .Case("0x805", "cortex-a73")
- .Case("0xc00", "falkor")
- .Case("0xc01", "saphira")
- .Default("generic");
-
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ return StringSwitch<const char *>(Part)
+ .Case("0x06f", "krait") // APQ8064
+ .Case("0x201", "kryo")
+ .Case("0x205", "kryo")
+ .Case("0x211", "kryo")
+ .Case("0x800", "cortex-a73") // Kryo 2xx Gold
+ .Case("0x801", "cortex-a73") // Kryo 2xx Silver
+ .Case("0x802", "cortex-a75") // Kryo 3xx Gold
+ .Case("0x803", "cortex-a75") // Kryo 3xx Silver
+ .Case("0x804", "cortex-a76") // Kryo 4xx Gold
+ .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
+ .Case("0xc00", "falkor")
+ .Case("0xc01", "saphira")
+ .Default("generic");
if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
// The Exynos chips have a convoluted ID scheme that doesn't seem to follow
// any predictive pattern across variants and parts.
@@ -323,7 +305,7 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
SmallVector<StringRef, 32> CPUFeatures;
for (unsigned I = 0, E = Lines.size(); I != E; ++I)
if (Lines[I].startswith("features")) {
- size_t Pos = Lines[I].find(":");
+ size_t Pos = Lines[I].find(':');
if (Pos != StringRef::npos) {
Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
break;
@@ -435,11 +417,6 @@ StringRef sys::detail::getHostCPUNameForBPF() {
#if defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64__) || defined(_M_X64)
-enum VendorSignatures {
- SIG_INTEL = 0x756e6547 /* Genu */,
- SIG_AMD = 0x68747541 /* Auth */
-};
-
// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
// support. Consequently, for i386, the presence of CPUID is checked first
@@ -513,6 +490,42 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
#endif
}
+namespace llvm {
+namespace sys {
+namespace detail {
+namespace x86 {
+
+VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ if (MaxLeaf == nullptr)
+ MaxLeaf = &EAX;
+ else
+ *MaxLeaf = 0;
+
+ if (!isCpuIdSupported())
+ return VendorSignatures::UNKNOWN;
+
+ if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
+ return VendorSignatures::UNKNOWN;
+
+ // "Genu ineI ntel"
+ if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
+ return VendorSignatures::GENUINE_INTEL;
+
+ // "Auth enti cAMD"
+ if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
+ return VendorSignatures::AUTHENTIC_AMD;
+
+ return VendorSignatures::UNKNOWN;
+}
+
+} // namespace x86
+} // namespace detail
+} // namespace sys
+} // namespace llvm
+
+using namespace llvm::sys::detail::x86;
+
/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
/// the 4 values in the specified arguments. If we can't run cpuid on the host,
/// return true.
@@ -730,6 +743,13 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
break;
+ // Sapphire Rapids:
+ case 0x8f:
+ CPU = "sapphirerapids";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
+ break;
+
case 0x1c: // Most 45 nm Intel Atom processors
case 0x26: // 45 nm Atom Lincroft
case 0x27: // 32 nm Atom Medfield
@@ -956,6 +976,14 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break; // 00h-0Fh: Zen1
}
break;
+ case 25:
+ CPU = "znver3";
+ *Type = X86::AMDFAM19H;
+ if (Model <= 0x0f) {
+ *Subtype = X86::AMDFAM19H_ZNVER3;
+ break; // 00h-0Fh: Zen3
+ }
+ break;
default:
break; // Unknown AMD CPU.
}
@@ -1095,14 +1123,12 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
}
StringRef sys::getHostCPUName() {
- unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
- unsigned MaxLeaf, Vendor;
-
- if (!isCpuIdSupported())
+ unsigned MaxLeaf = 0;
+ const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
+ if (Vendor == VendorSignatures::UNKNOWN)
return "generic";
- if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1)
- return "generic";
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
unsigned Family = 0, Model = 0;
@@ -1117,10 +1143,10 @@ StringRef sys::getHostCPUName() {
StringRef CPU;
- if (Vendor == SIG_INTEL) {
+ if (Vendor == VendorSignatures::GENUINE_INTEL) {
CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
&Subtype);
- } else if (Vendor == SIG_AMD) {
+ } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
&Subtype);
}
@@ -1222,6 +1248,19 @@ StringRef sys::getHostCPUName() {
}
#else
StringRef sys::getHostCPUName() { return "generic"; }
+namespace llvm {
+namespace sys {
+namespace detail {
+namespace x86 {
+
+VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
+ return VendorSignatures::UNKNOWN;
+}
+
+} // namespace x86
+} // namespace detail
+} // namespace sys
+} // namespace llvm
#endif
#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
@@ -1272,6 +1311,27 @@ int computeHostNumPhysicalCores() {
}
return CPU_COUNT(&Enabled);
}
+#elif defined(__linux__) && defined(__powerpc__)
+int computeHostNumPhysicalCores() {
+ cpu_set_t Affinity;
+ if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
+ return CPU_COUNT(&Affinity);
+
+ // The call to sched_getaffinity() may have failed because the Affinity
+ // mask is too small for the number of CPU's on the system (i.e. the
+ // system has more than 1024 CPUs). Allocate a mask large enough for
+ // twice as many CPUs.
+ cpu_set_t *DynAffinity;
+ DynAffinity = CPU_ALLOC(2048);
+ if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
+ int NumCPUs = CPU_COUNT(DynAffinity);
+ CPU_FREE(DynAffinity);
+ return NumCPUs;
+ }
+ return -1;
+}
+#elif defined(__linux__) && defined(__s390x__)
+int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
#elif defined(__APPLE__) && defined(__x86_64__)
#include <sys/param.h>
#include <sys/sysctl.h>
@@ -1291,6 +1351,28 @@ int computeHostNumPhysicalCores() {
}
return count;
}
+#elif defined(__MVS__)
+int computeHostNumPhysicalCores() {
+ enum {
+ // Byte offset of the pointer to the Communications Vector Table (CVT) in
+ // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
+ // will be zero-extended to uintptr_t.
+ FLCCVT = 16,
+ // Byte offset of the pointer to the Common System Data Area (CSD) in the
+ // CVT. The table entry is a 31-bit pointer and will be zero-extended to
+ // uintptr_t.
+ CVTCSD = 660,
+ // Byte offset to the number of live CPs in the LPAR, stored as a signed
+ // 32-bit value in the table.
+ CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
+ };
+ char *PSA = 0;
+ char *CVT = reinterpret_cast<char *>(
+ static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
+ char *CSD = reinterpret_cast<char *>(
+ static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
+ return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
+}
#elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
// Defined in llvm/lib/Support/Windows/Threading.inc
int computeHostNumPhysicalCores();
@@ -1420,11 +1502,13 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1);
+ Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker
Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1);
Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1);
Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1);
Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1);
+ Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1);
Features["avx512vp2intersect"] =
HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1);
@@ -1445,7 +1529,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
bool HasLeaf7Subleaf1 =
MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
+ Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
bool HasLeafD = MaxLevel >= 0xd &&
!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
@@ -1460,6 +1546,10 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
+ bool HasLeaf19 =
+ MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
+ Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
+
return true;
}
#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
diff --git a/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp b/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp
index 5c56b773ea69..152de6ebae0a 100644
--- a/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp
@@ -22,10 +22,17 @@ using namespace llvm;
using namespace llvm::sys;
InitLLVM::InitLLVM(int &Argc, const char **&Argv,
- bool InstallPipeSignalExitHandler)
- : StackPrinter(Argc, Argv) {
+ bool InstallPipeSignalExitHandler) {
if (InstallPipeSignalExitHandler)
+ // The pipe signal handler must be installed before any other handlers are
+ // registered. This is because the Unix \ref RegisterHandlers function does
+ // not perform a sigaction() for SIGPIPE unless a one-shot handler is
+ // present, to allow long-lived processes (like lldb) to fully opt-out of
+ // llvm's SIGPIPE handling and ignore the signal safely.
sys::SetOneShotPipeSignalFunction(sys::DefaultOneShotPipeSignalHandler);
+ // Initialize the stack printer after installing the one-shot pipe signal
+ // handler, so we can perform a sigaction() for SIGPIPE on Unix if requested.
+ StackPrinter.emplace(Argc, Argv);
sys::PrintStackTraceOnErrorSignal(Argv[0]);
install_out_of_memory_new_handler();
diff --git a/contrib/llvm-project/llvm/lib/Support/InstructionCost.cpp b/contrib/llvm-project/llvm/lib/Support/InstructionCost.cpp
new file mode 100644
index 000000000000..c485ce9107af
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Support/InstructionCost.cpp
@@ -0,0 +1,24 @@
+//===- InstructionCost.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file includes the function definitions for the InstructionCost class
+/// that is used when calculating the cost of an instruction, or a group of
+/// instructions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/InstructionCost.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void InstructionCost::print(raw_ostream &OS) const {
+ if (isValid())
+ OS << Value;
+ else
+ OS << "Invalid";
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/JSON.cpp b/contrib/llvm-project/llvm/lib/Support/JSON.cpp
index 16b1d11efd08..dbfd673553f4 100644
--- a/contrib/llvm-project/llvm/lib/Support/JSON.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/JSON.cpp
@@ -7,8 +7,11 @@
//===---------------------------------------------------------------------===//
#include "llvm/Support/JSON.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
#include <cctype>
namespace llvm {
@@ -106,7 +109,7 @@ void Value::copyFrom(const Value &M) {
case T_Boolean:
case T_Double:
case T_Integer:
- memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer));
+ memcpy(&Union, &M.Union, sizeof(Union));
break;
case T_StringRef:
create<StringRef>(M.as<StringRef>());
@@ -130,7 +133,7 @@ void Value::moveFrom(const Value &&M) {
case T_Boolean:
case T_Double:
case T_Integer:
- memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer));
+ memcpy(&Union, &M.Union, sizeof(Union));
break;
case T_StringRef:
create<StringRef>(M.as<StringRef>());
@@ -198,6 +201,160 @@ bool operator==(const Value &L, const Value &R) {
llvm_unreachable("Unknown value kind");
}
+void Path::report(llvm::StringLiteral Msg) {
+ // Walk up to the root context, and count the number of segments.
+ unsigned Count = 0;
+ const Path *P;
+ for (P = this; P->Parent != nullptr; P = P->Parent)
+ ++Count;
+ Path::Root *R = P->Seg.root();
+ // Fill in the error message and copy the path (in reverse order).
+ R->ErrorMessage = Msg;
+ R->ErrorPath.resize(Count);
+ auto It = R->ErrorPath.begin();
+ for (P = this; P->Parent != nullptr; P = P->Parent)
+ *It++ = P->Seg;
+}
+
+Error Path::Root::getError() const {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
+ if (ErrorPath.empty()) {
+ if (!Name.empty())
+ OS << " when parsing " << Name;
+ } else {
+ OS << " at " << (Name.empty() ? "(root)" : Name);
+ for (const Path::Segment &S : llvm::reverse(ErrorPath)) {
+ if (S.isField())
+ OS << '.' << S.field();
+ else
+ OS << '[' << S.index() << ']';
+ }
+ }
+ return createStringError(llvm::inconvertibleErrorCode(), OS.str());
+}
+
+namespace {
+
+std::vector<const Object::value_type *> sortedElements(const Object &O) {
+ std::vector<const Object::value_type *> Elements;
+ for (const auto &E : O)
+ Elements.push_back(&E);
+ llvm::sort(Elements,
+ [](const Object::value_type *L, const Object::value_type *R) {
+ return L->first < R->first;
+ });
+ return Elements;
+}
+
+// Prints a one-line version of a value that isn't our main focus.
+// We interleave writes to OS and JOS, exploiting the lack of extra buffering.
+// This is OK as we own the implementation.
+void abbreviate(const Value &V, OStream &JOS) {
+ switch (V.kind()) {
+ case Value::Array:
+ JOS.rawValue(V.getAsArray()->empty() ? "[]" : "[ ... ]");
+ break;
+ case Value::Object:
+ JOS.rawValue(V.getAsObject()->empty() ? "{}" : "{ ... }");
+ break;
+ case Value::String: {
+ llvm::StringRef S = *V.getAsString();
+ if (S.size() < 40) {
+ JOS.value(V);
+ } else {
+ std::string Truncated = fixUTF8(S.take_front(37));
+ Truncated.append("...");
+ JOS.value(Truncated);
+ }
+ break;
+ }
+ default:
+ JOS.value(V);
+ }
+}
+
+// Prints a semi-expanded version of a value that is our main focus.
+// Array/Object entries are printed, but not recursively as they may be huge.
+void abbreviateChildren(const Value &V, OStream &JOS) {
+ switch (V.kind()) {
+ case Value::Array:
+ JOS.array([&] {
+ for (const auto &I : *V.getAsArray())
+ abbreviate(I, JOS);
+ });
+ break;
+ case Value::Object:
+ JOS.object([&] {
+ for (const auto *KV : sortedElements(*V.getAsObject())) {
+ JOS.attributeBegin(KV->first);
+ abbreviate(KV->second, JOS);
+ JOS.attributeEnd();
+ }
+ });
+ break;
+ default:
+ JOS.value(V);
+ }
+}
+
+} // namespace
+
+void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
+ OStream JOS(OS, /*IndentSize=*/2);
+ // PrintValue recurses down the path, printing the ancestors of our target.
+ // Siblings of nodes along the path are printed with abbreviate(), and the
+ // target itself is printed with the somewhat richer abbreviateChildren().
+ // 'Recurse' is the lambda itself, to allow recursive calls.
+ auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
+ // Print the target node itself, with the error as a comment.
+ // Also used if we can't follow our path, e.g. it names a field that
+ // *should* exist but doesn't.
+ auto HighlightCurrent = [&] {
+ std::string Comment = "error: ";
+ Comment.append(ErrorMessage.data(), ErrorMessage.size());
+ JOS.comment(Comment);
+ abbreviateChildren(V, JOS);
+ };
+ if (Path.empty()) // We reached our target.
+ return HighlightCurrent();
+ const Segment &S = Path.back(); // Path is in reverse order.
+ if (S.isField()) {
+ // Current node is an object, path names a field.
+ llvm::StringRef FieldName = S.field();
+ const Object *O = V.getAsObject();
+ if (!O || !O->get(FieldName))
+ return HighlightCurrent();
+ JOS.object([&] {
+ for (const auto *KV : sortedElements(*O)) {
+ JOS.attributeBegin(KV->first);
+ if (FieldName.equals(KV->first))
+ Recurse(KV->second, Path.drop_back(), Recurse);
+ else
+ abbreviate(KV->second, JOS);
+ JOS.attributeEnd();
+ }
+ });
+ } else {
+ // Current node is an array, path names an element.
+ const Array *A = V.getAsArray();
+ if (!A || S.index() >= A->size())
+ return HighlightCurrent();
+ JOS.array([&] {
+ unsigned Current = 0;
+ for (const auto &V : *A) {
+ if (Current++ == S.index())
+ Recurse(V, Path.drop_back(), Recurse);
+ else
+ abbreviate(V, JOS);
+ }
+ });
+ }
+ };
+ PrintValue(R, ErrorPath, PrintValue);
+}
+
namespace {
// Simple recursive-descent JSON parser.
class Parser {
@@ -518,17 +675,6 @@ Expected<Value> parse(StringRef JSON) {
}
char ParseError::ID = 0;
-static std::vector<const Object::value_type *> sortedElements(const Object &O) {
- std::vector<const Object::value_type *> Elements;
- for (const auto &E : O)
- Elements.push_back(&E);
- llvm::sort(Elements,
- [](const Object::value_type *L, const Object::value_type *R) {
- return L->first < R->first;
- });
- return Elements;
-}
-
bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
// Fast-path for ASCII, which is valid UTF-8.
if (LLVM_LIKELY(isASCII(S)))
@@ -633,9 +779,40 @@ void llvm::json::OStream::valueBegin() {
}
if (Stack.back().Ctx == Array)
newline();
+ flushComment();
Stack.back().HasValue = true;
}
+void OStream::comment(llvm::StringRef Comment) {
+ assert(PendingComment.empty() && "Only one comment per value!");
+ PendingComment = Comment;
+}
+
+void OStream::flushComment() {
+ if (PendingComment.empty())
+ return;
+ OS << (IndentSize ? "/* " : "/*");
+ // Be sure not to accidentally emit "*/". Transform to "* /".
+ while (!PendingComment.empty()) {
+ auto Pos = PendingComment.find("*/");
+ if (Pos == StringRef::npos) {
+ OS << PendingComment;
+ PendingComment = "";
+ } else {
+ OS << PendingComment.take_front(Pos) << "* /";
+ PendingComment = PendingComment.drop_front(Pos + 2);
+ }
+ }
+ OS << (IndentSize ? " */" : "*/");
+ // Comments are on their own line unless attached to an attribute value.
+ if (Stack.size() > 1 && Stack.back().Ctx == Singleton) {
+ if (IndentSize)
+ OS << ' ';
+ } else {
+ newline();
+ }
+}
+
void llvm::json::OStream::newline() {
if (IndentSize) {
OS.write('\n');
@@ -657,6 +834,7 @@ void llvm::json::OStream::arrayEnd() {
if (Stack.back().HasValue)
newline();
OS << ']';
+ assert(PendingComment.empty());
Stack.pop_back();
assert(!Stack.empty());
}
@@ -675,6 +853,7 @@ void llvm::json::OStream::objectEnd() {
if (Stack.back().HasValue)
newline();
OS << '}';
+ assert(PendingComment.empty());
Stack.pop_back();
assert(!Stack.empty());
}
@@ -684,6 +863,7 @@ void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
if (Stack.back().HasValue)
OS << ',';
newline();
+ flushComment();
Stack.back().HasValue = true;
Stack.emplace_back();
Stack.back().Ctx = Singleton;
@@ -701,10 +881,23 @@ void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
void llvm::json::OStream::attributeEnd() {
assert(Stack.back().Ctx == Singleton);
assert(Stack.back().HasValue && "Attribute must have a value");
+ assert(PendingComment.empty());
Stack.pop_back();
assert(Stack.back().Ctx == Object);
}
+raw_ostream &llvm::json::OStream::rawValueBegin() {
+ valueBegin();
+ Stack.emplace_back();
+ Stack.back().Ctx = RawValue;
+ return OS;
+}
+
+void llvm::json::OStream::rawValueEnd() {
+ assert(Stack.back().Ctx == RawValue);
+ Stack.pop_back();
+}
+
} // namespace json
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp b/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
index 1ff66d504cbe..3623a54ae476 100644
--- a/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
@@ -83,6 +83,403 @@ KnownBits KnownBits::computeForAddSub(bool Add, bool NSW,
return KnownOut;
}
+KnownBits KnownBits::sextInReg(unsigned SrcBitWidth) const {
+ unsigned BitWidth = getBitWidth();
+ assert(0 < SrcBitWidth && SrcBitWidth <= BitWidth &&
+ "Illegal sext-in-register");
+
+ if (SrcBitWidth == BitWidth)
+ return *this;
+
+ unsigned ExtBits = BitWidth - SrcBitWidth;
+ KnownBits Result;
+ Result.One = One << ExtBits;
+ Result.Zero = Zero << ExtBits;
+ Result.One.ashrInPlace(ExtBits);
+ Result.Zero.ashrInPlace(ExtBits);
+ return Result;
+}
+
+KnownBits KnownBits::makeGE(const APInt &Val) const {
+ // Count the number of leading bit positions where our underlying value is
+ // known to be less than or equal to Val.
+ unsigned N = (Zero | Val).countLeadingOnes();
+
+ // For each of those bit positions, if Val has a 1 in that bit then our
+ // underlying value must also have a 1.
+ APInt MaskedVal(Val);
+ MaskedVal.clearLowBits(getBitWidth() - N);
+ return KnownBits(Zero, One | MaskedVal);
+}
+
+KnownBits KnownBits::umax(const KnownBits &LHS, const KnownBits &RHS) {
+ // If we can prove that LHS >= RHS then use LHS as the result. Likewise for
+ // RHS. Ideally our caller would already have spotted these cases and
+ // optimized away the umax operation, but we handle them here for
+ // completeness.
+ if (LHS.getMinValue().uge(RHS.getMaxValue()))
+ return LHS;
+ if (RHS.getMinValue().uge(LHS.getMaxValue()))
+ return RHS;
+
+ // If the result of the umax is LHS then it must be greater than or equal to
+ // the minimum possible value of RHS. Likewise for RHS. Any known bits that
+ // are common to these two values are also known in the result.
+ KnownBits L = LHS.makeGE(RHS.getMinValue());
+ KnownBits R = RHS.makeGE(LHS.getMinValue());
+ return KnownBits::commonBits(L, R);
+}
+
+KnownBits KnownBits::umin(const KnownBits &LHS, const KnownBits &RHS) {
+ // Flip the range of values: [0, 0xFFFFFFFF] <-> [0xFFFFFFFF, 0]
+ auto Flip = [](const KnownBits &Val) { return KnownBits(Val.One, Val.Zero); };
+ return Flip(umax(Flip(LHS), Flip(RHS)));
+}
+
+KnownBits KnownBits::smax(const KnownBits &LHS, const KnownBits &RHS) {
+ // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0, 0xFFFFFFFF]
+ auto Flip = [](const KnownBits &Val) {
+ unsigned SignBitPosition = Val.getBitWidth() - 1;
+ APInt Zero = Val.Zero;
+ APInt One = Val.One;
+ Zero.setBitVal(SignBitPosition, Val.One[SignBitPosition]);
+ One.setBitVal(SignBitPosition, Val.Zero[SignBitPosition]);
+ return KnownBits(Zero, One);
+ };
+ return Flip(umax(Flip(LHS), Flip(RHS)));
+}
+
+KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) {
+ // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0xFFFFFFFF, 0]
+ auto Flip = [](const KnownBits &Val) {
+ unsigned SignBitPosition = Val.getBitWidth() - 1;
+ APInt Zero = Val.One;
+ APInt One = Val.Zero;
+ Zero.setBitVal(SignBitPosition, Val.Zero[SignBitPosition]);
+ One.setBitVal(SignBitPosition, Val.One[SignBitPosition]);
+ return KnownBits(Zero, One);
+ };
+ return Flip(umax(Flip(LHS), Flip(RHS)));
+}
+
+KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS) {
+ unsigned BitWidth = LHS.getBitWidth();
+ KnownBits Known(BitWidth);
+
+ // If the shift amount is a valid constant then transform LHS directly.
+ if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
+ unsigned Shift = RHS.getConstant().getZExtValue();
+ Known = LHS;
+ Known.Zero <<= Shift;
+ Known.One <<= Shift;
+ // Low bits are known zero.
+ Known.Zero.setLowBits(Shift);
+ return Known;
+ }
+
+ // No matter the shift amount, the trailing zeros will stay zero.
+ unsigned MinTrailingZeros = LHS.countMinTrailingZeros();
+
+ // Minimum shift amount low bits are known zero.
+ if (RHS.getMinValue().ult(BitWidth)) {
+ MinTrailingZeros += RHS.getMinValue().getZExtValue();
+ MinTrailingZeros = std::min(MinTrailingZeros, BitWidth);
+ }
+
+ Known.Zero.setLowBits(MinTrailingZeros);
+ return Known;
+}
+
+KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS) {
+ unsigned BitWidth = LHS.getBitWidth();
+ KnownBits Known(BitWidth);
+
+ if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
+ unsigned Shift = RHS.getConstant().getZExtValue();
+ Known = LHS;
+ Known.Zero.lshrInPlace(Shift);
+ Known.One.lshrInPlace(Shift);
+ // High bits are known zero.
+ Known.Zero.setHighBits(Shift);
+ return Known;
+ }
+
+ // No matter the shift amount, the leading zeros will stay zero.
+ unsigned MinLeadingZeros = LHS.countMinLeadingZeros();
+
+ // Minimum shift amount high bits are known zero.
+ if (RHS.getMinValue().ult(BitWidth)) {
+ MinLeadingZeros += RHS.getMinValue().getZExtValue();
+ MinLeadingZeros = std::min(MinLeadingZeros, BitWidth);
+ }
+
+ Known.Zero.setHighBits(MinLeadingZeros);
+ return Known;
+}
+
+KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS) {
+ unsigned BitWidth = LHS.getBitWidth();
+ KnownBits Known(BitWidth);
+
+ if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
+ unsigned Shift = RHS.getConstant().getZExtValue();
+ Known = LHS;
+ Known.Zero.ashrInPlace(Shift);
+ Known.One.ashrInPlace(Shift);
+ return Known;
+ }
+
+ // No matter the shift amount, the leading sign bits will stay.
+ unsigned MinLeadingZeros = LHS.countMinLeadingZeros();
+ unsigned MinLeadingOnes = LHS.countMinLeadingOnes();
+
+ // Minimum shift amount high bits are known sign bits.
+ if (RHS.getMinValue().ult(BitWidth)) {
+ if (MinLeadingZeros) {
+ MinLeadingZeros += RHS.getMinValue().getZExtValue();
+ MinLeadingZeros = std::min(MinLeadingZeros, BitWidth);
+ }
+ if (MinLeadingOnes) {
+ MinLeadingOnes += RHS.getMinValue().getZExtValue();
+ MinLeadingOnes = std::min(MinLeadingOnes, BitWidth);
+ }
+ }
+
+ Known.Zero.setHighBits(MinLeadingZeros);
+ Known.One.setHighBits(MinLeadingOnes);
+ return Known;
+}
+
+Optional<bool> KnownBits::eq(const KnownBits &LHS, const KnownBits &RHS) {
+ if (LHS.isConstant() && RHS.isConstant())
+ return Optional<bool>(LHS.getConstant() == RHS.getConstant());
+ if (LHS.One.intersects(RHS.Zero) || RHS.One.intersects(LHS.Zero))
+ return Optional<bool>(false);
+ return None;
+}
+
+Optional<bool> KnownBits::ne(const KnownBits &LHS, const KnownBits &RHS) {
+ if (Optional<bool> KnownEQ = eq(LHS, RHS))
+ return Optional<bool>(!KnownEQ.getValue());
+ return None;
+}
+
+Optional<bool> KnownBits::ugt(const KnownBits &LHS, const KnownBits &RHS) {
+ // LHS >u RHS -> false if umax(LHS) <= umax(RHS)
+ if (LHS.getMaxValue().ule(RHS.getMinValue()))
+ return Optional<bool>(false);
+ // LHS >u RHS -> true if umin(LHS) > umax(RHS)
+ if (LHS.getMinValue().ugt(RHS.getMaxValue()))
+ return Optional<bool>(true);
+ return None;
+}
+
+Optional<bool> KnownBits::uge(const KnownBits &LHS, const KnownBits &RHS) {
+ if (Optional<bool> IsUGT = ugt(RHS, LHS))
+ return Optional<bool>(!IsUGT.getValue());
+ return None;
+}
+
+Optional<bool> KnownBits::ult(const KnownBits &LHS, const KnownBits &RHS) {
+ return ugt(RHS, LHS);
+}
+
+Optional<bool> KnownBits::ule(const KnownBits &LHS, const KnownBits &RHS) {
+ return uge(RHS, LHS);
+}
+
+Optional<bool> KnownBits::sgt(const KnownBits &LHS, const KnownBits &RHS) {
+ // LHS >s RHS -> false if smax(LHS) <= smax(RHS)
+ if (LHS.getSignedMaxValue().sle(RHS.getSignedMinValue()))
+ return Optional<bool>(false);
+ // LHS >s RHS -> true if smin(LHS) > smax(RHS)
+ if (LHS.getSignedMinValue().sgt(RHS.getSignedMaxValue()))
+ return Optional<bool>(true);
+ return None;
+}
+
+Optional<bool> KnownBits::sge(const KnownBits &LHS, const KnownBits &RHS) {
+ if (Optional<bool> KnownSGT = sgt(RHS, LHS))
+ return Optional<bool>(!KnownSGT.getValue());
+ return None;
+}
+
+Optional<bool> KnownBits::slt(const KnownBits &LHS, const KnownBits &RHS) {
+ return sgt(RHS, LHS);
+}
+
+Optional<bool> KnownBits::sle(const KnownBits &LHS, const KnownBits &RHS) {
+ return sge(RHS, LHS);
+}
+
+KnownBits KnownBits::abs(bool IntMinIsPoison) const {
+ // If the source's MSB is zero then we know the rest of the bits already.
+ if (isNonNegative())
+ return *this;
+
+ // Absolute value preserves trailing zero count.
+ KnownBits KnownAbs(getBitWidth());
+ KnownAbs.Zero.setLowBits(countMinTrailingZeros());
+
+ // We only know that the absolute values's MSB will be zero if INT_MIN is
+ // poison, or there is a set bit that isn't the sign bit (otherwise it could
+ // be INT_MIN).
+ if (IntMinIsPoison || (!One.isNullValue() && !One.isMinSignedValue()))
+ KnownAbs.Zero.setSignBit();
+
+ // FIXME: Handle known negative input?
+ // FIXME: Calculate the negated Known bits and combine them?
+ return KnownAbs;
+}
+
+KnownBits KnownBits::computeForMul(const KnownBits &LHS, const KnownBits &RHS) {
+ unsigned BitWidth = LHS.getBitWidth();
+
+ assert(!LHS.hasConflict() && !RHS.hasConflict());
+ // Compute a conservative estimate for high known-0 bits.
+ unsigned LeadZ =
+ std::max(LHS.countMinLeadingZeros() + RHS.countMinLeadingZeros(),
+ BitWidth) -
+ BitWidth;
+ LeadZ = std::min(LeadZ, BitWidth);
+
+ // The result of the bottom bits of an integer multiply can be
+ // inferred by looking at the bottom bits of both operands and
+ // multiplying them together.
+ // We can infer at least the minimum number of known trailing bits
+ // of both operands. Depending on number of trailing zeros, we can
+ // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming
+ // a and b are divisible by m and n respectively.
+ // We then calculate how many of those bits are inferrable and set
+ // the output. For example, the i8 mul:
+ // a = XXXX1100 (12)
+ // b = XXXX1110 (14)
+ // We know the bottom 3 bits are zero since the first can be divided by
+ // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4).
+ // Applying the multiplication to the trimmed arguments gets:
+ // XX11 (3)
+ // X111 (7)
+ // -------
+ // XX11
+ // XX11
+ // XX11
+ // XX11
+ // -------
+ // XXXXX01
+ // Which allows us to infer the 2 LSBs. Since we're multiplying the result
+ // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits.
+ // The proof for this can be described as:
+ // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) &&
+ // (C7 == (1 << (umin(countTrailingZeros(C1), C5) +
+ // umin(countTrailingZeros(C2), C6) +
+ // umin(C5 - umin(countTrailingZeros(C1), C5),
+ // C6 - umin(countTrailingZeros(C2), C6)))) - 1)
+ // %aa = shl i8 %a, C5
+ // %bb = shl i8 %b, C6
+ // %aaa = or i8 %aa, C1
+ // %bbb = or i8 %bb, C2
+ // %mul = mul i8 %aaa, %bbb
+ // %mask = and i8 %mul, C7
+ // =>
+ // %mask = i8 ((C1*C2)&C7)
+ // Where C5, C6 describe the known bits of %a, %b
+ // C1, C2 describe the known bottom bits of %a, %b.
+ // C7 describes the mask of the known bits of the result.
+ const APInt &Bottom0 = LHS.One;
+ const APInt &Bottom1 = RHS.One;
+
+ // How many times we'd be able to divide each argument by 2 (shr by 1).
+ // This gives us the number of trailing zeros on the multiplication result.
+ unsigned TrailBitsKnown0 = (LHS.Zero | LHS.One).countTrailingOnes();
+ unsigned TrailBitsKnown1 = (RHS.Zero | RHS.One).countTrailingOnes();
+ unsigned TrailZero0 = LHS.countMinTrailingZeros();
+ unsigned TrailZero1 = RHS.countMinTrailingZeros();
+ unsigned TrailZ = TrailZero0 + TrailZero1;
+
+ // Figure out the fewest known-bits operand.
+ unsigned SmallestOperand =
+ std::min(TrailBitsKnown0 - TrailZero0, TrailBitsKnown1 - TrailZero1);
+ unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth);
+
+ APInt BottomKnown =
+ Bottom0.getLoBits(TrailBitsKnown0) * Bottom1.getLoBits(TrailBitsKnown1);
+
+ KnownBits Res(BitWidth);
+ Res.Zero.setHighBits(LeadZ);
+ Res.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown);
+ Res.One = BottomKnown.getLoBits(ResultBitsKnown);
+ return Res;
+}
+
+KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS) {
+ unsigned BitWidth = LHS.getBitWidth();
+ assert(!LHS.hasConflict() && !RHS.hasConflict());
+ KnownBits Known(BitWidth);
+
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ unsigned LeadZ = LHS.countMinLeadingZeros();
+ unsigned RHSMaxLeadingZeros = RHS.countMaxLeadingZeros();
+
+ if (RHSMaxLeadingZeros != BitWidth)
+ LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
+
+ Known.Zero.setHighBits(LeadZ);
+ return Known;
+}
+
+KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
+ unsigned BitWidth = LHS.getBitWidth();
+ assert(!LHS.hasConflict() && !RHS.hasConflict());
+ KnownBits Known(BitWidth);
+
+ if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) {
+ // The upper bits are all zero, the lower ones are unchanged.
+ APInt LowBits = RHS.getConstant() - 1;
+ Known.Zero = LHS.Zero | ~LowBits;
+ Known.One = LHS.One & LowBits;
+ return Known;
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ uint32_t Leaders =
+ std::max(LHS.countMinLeadingZeros(), RHS.countMinLeadingZeros());
+ Known.Zero.setHighBits(Leaders);
+ return Known;
+}
+
+KnownBits KnownBits::srem(const KnownBits &LHS, const KnownBits &RHS) {
+ unsigned BitWidth = LHS.getBitWidth();
+ assert(!LHS.hasConflict() && !RHS.hasConflict());
+ KnownBits Known(BitWidth);
+
+ if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) {
+ // The low bits of the first operand are unchanged by the srem.
+ APInt LowBits = RHS.getConstant() - 1;
+ Known.Zero = LHS.Zero & LowBits;
+ Known.One = LHS.One & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (LHS.isNonNegative() || LowBits.isSubsetOf(LHS.Zero))
+ Known.Zero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (LHS.isNegative() && LowBits.intersects(LHS.One))
+ Known.One |= ~LowBits;
+ return Known;
+ }
+
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero. If it's known zero, our sign bit is also zero.
+ if (LHS.isNonNegative())
+ Known.makeNonNegative();
+ return Known;
+}
+
KnownBits &KnownBits::operator&=(const KnownBits &RHS) {
// Result bit is 0 if either operand bit is 0.
Zero |= RHS.Zero;
diff --git a/contrib/llvm-project/llvm/lib/Support/LineIterator.cpp b/contrib/llvm-project/llvm/lib/Support/LineIterator.cpp
index 164436a2c48e..7bdf1271ac25 100644
--- a/contrib/llvm-project/llvm/lib/Support/LineIterator.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/LineIterator.cpp
@@ -33,7 +33,11 @@ static bool skipIfAtLineEnd(const char *&P) {
line_iterator::line_iterator(const MemoryBuffer &Buffer, bool SkipBlanks,
char CommentMarker)
- : Buffer(Buffer.getBufferSize() ? &Buffer : nullptr),
+ : line_iterator(Buffer.getMemBufferRef(), SkipBlanks, CommentMarker) {}
+
+line_iterator::line_iterator(const MemoryBufferRef &Buffer, bool SkipBlanks,
+ char CommentMarker)
+ : Buffer(Buffer.getBufferSize() ? Optional<MemoryBufferRef>(Buffer) : None),
CommentMarker(CommentMarker), SkipBlanks(SkipBlanks), LineNumber(1),
CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : nullptr,
0) {
@@ -78,7 +82,7 @@ void line_iterator::advance() {
if (*Pos == '\0') {
// We've hit the end of the buffer, reset ourselves to the end state.
- Buffer = nullptr;
+ Buffer = None;
CurrentLine = StringRef();
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/Support/LowLevelType.cpp
index fe77cb3db413..63559d5ac3ee 100644
--- a/contrib/llvm-project/llvm/lib/Support/LowLevelType.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/LowLevelType.cpp
@@ -23,7 +23,7 @@ LLT::LLT(MVT VT) {
} else if (VT.isValid()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
- assert(VT.getSizeInBits() != 0 && "invalid zero-sized type");
+ assert(VT.getSizeInBits().isNonZero() && "invalid zero-sized type");
init(/*IsPointer=*/false, /*IsVector=*/false, /*NumElements=*/0,
VT.getSizeInBits(), /*AddressSpace=*/0);
} else {
diff --git a/contrib/llvm-project/llvm/lib/Support/MemoryBufferRef.cpp b/contrib/llvm-project/llvm/lib/Support/MemoryBufferRef.cpp
new file mode 100644
index 000000000000..a93853c0acb1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Support/MemoryBufferRef.cpp
@@ -0,0 +1,19 @@
+//===- MemoryBufferRef.cpp - Memory Buffer Reference ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MemoryBufferRef interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+MemoryBufferRef::MemoryBufferRef(const MemoryBuffer &Buffer)
+ : Buffer(Buffer.getBuffer()), Identifier(Buffer.getBufferIdentifier()) {}
diff --git a/contrib/llvm-project/llvm/lib/Support/Path.cpp b/contrib/llvm-project/llvm/lib/Support/Path.cpp
index 37b3086fddf5..ef223ae5ac1d 100644
--- a/contrib/llvm-project/llvm/lib/Support/Path.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Path.cpp
@@ -354,10 +354,9 @@ StringRef root_path(StringRef path, Style style) {
if ((++pos != e) && is_separator((*pos)[0], style)) {
// {C:/,//net/}, so get the first two components.
return path.substr(0, b->size() + pos->size());
- } else {
- // just {C:,//net}, return the first component.
- return *b;
}
+ // just {C:,//net}, return the first component.
+ return *b;
}
// POSIX style root directory.
@@ -467,8 +466,7 @@ StringRef parent_path(StringRef path, Style style) {
size_t end_pos = parent_path_end(path, style);
if (end_pos == StringRef::npos)
return StringRef();
- else
- return path.substr(0, end_pos);
+ return path.substr(0, end_pos);
}
void remove_filename(SmallVectorImpl<char> &path, Style style) {
@@ -581,12 +579,10 @@ StringRef stem(StringRef path, Style style) {
size_t pos = fname.find_last_of('.');
if (pos == StringRef::npos)
return fname;
- else
- if ((fname.size() == 1 && fname == ".") ||
- (fname.size() == 2 && fname == ".."))
- return fname;
- else
- return fname.substr(0, pos);
+ if ((fname.size() == 1 && fname == ".") ||
+ (fname.size() == 2 && fname == ".."))
+ return fname;
+ return fname.substr(0, pos);
}
StringRef extension(StringRef path, Style style) {
@@ -594,12 +590,10 @@ StringRef extension(StringRef path, Style style) {
size_t pos = fname.find_last_of('.');
if (pos == StringRef::npos)
return StringRef();
- else
- if ((fname.size() == 1 && fname == ".") ||
- (fname.size() == 2 && fname == ".."))
- return StringRef();
- else
- return fname.substr(pos);
+ if ((fname.size() == 1 && fname == ".") ||
+ (fname.size() == 2 && fname == ".."))
+ return StringRef();
+ return fname.substr(pos);
}
bool is_separator(char value, Style style) {
@@ -683,6 +677,24 @@ bool is_absolute(const Twine &path, Style style) {
return rootDir && rootName;
}
+bool is_absolute_gnu(const Twine &path, Style style) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ // Handle '/' which is absolute for both Windows and POSIX systems.
+ // Handle '\\' on Windows.
+ if (!p.empty() && is_separator(p.front(), style))
+ return true;
+
+ if (real_style(style) == Style::windows) {
+ // Handle drive letter pattern (a character followed by ':') on Windows.
+ if (p.size() >= 2 && (p[0] && p[1] == ':'))
+ return true;
+ }
+
+ return false;
+}
+
bool is_relative(const Twine &path, Style style) {
return !is_absolute(path, style);
}
@@ -1281,7 +1293,7 @@ Expected<TempFile> TempFile::create(const Twine &Model, unsigned Mode) {
#endif
return std::move(Ret);
}
-}
+} // namespace fs
-} // end namsspace sys
-} // end namespace llvm
+} // namespace sys
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm-project/llvm/lib/Support/PrettyStackTrace.cpp
index 9072f9d2d2ee..5d3d95b5e7cb 100644
--- a/contrib/llvm-project/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/PrettyStackTrace.cpp
@@ -25,6 +25,7 @@
#include <cassert>
#include <cstdarg>
#include <cstdio>
+#include <cstring>
#include <tuple>
#ifdef HAVE_CRASHREPORTERCLIENT_H
@@ -253,8 +254,16 @@ void PrettyStackTraceFormat::print(raw_ostream &OS) const { OS << Str << "\n"; }
void PrettyStackTraceProgram::print(raw_ostream &OS) const {
OS << "Program arguments: ";
// Print the argument list.
- for (unsigned i = 0, e = ArgC; i != e; ++i)
- OS << ArgV[i] << ' ';
+ for (int I = 0; I < ArgC; ++I) {
+ const bool HaveSpace = ::strchr(ArgV[I], ' ');
+ if (I)
+ OS << ' ';
+ if (HaveSpace)
+ OS << '"';
+ OS.write_escaped(ArgV[I]);
+ if (HaveSpace)
+ OS << '"';
+ }
OS << '\n';
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Process.cpp b/contrib/llvm-project/llvm/lib/Support/Process.cpp
index 9e6e233b26ac..e0814444876c 100644
--- a/contrib/llvm-project/llvm/lib/Support/Process.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Process.cpp
@@ -20,6 +20,8 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
+#include <stdlib.h> // for _Exit
+
using namespace llvm;
using namespace sys;
@@ -28,21 +30,22 @@ using namespace sys;
//=== independent code.
//===----------------------------------------------------------------------===//
-Optional<std::string> Process::FindInEnvPath(StringRef EnvName,
- StringRef FileName) {
- return FindInEnvPath(EnvName, FileName, {});
+Optional<std::string>
+Process::FindInEnvPath(StringRef EnvName, StringRef FileName, char Separator) {
+ return FindInEnvPath(EnvName, FileName, {}, Separator);
}
Optional<std::string> Process::FindInEnvPath(StringRef EnvName,
StringRef FileName,
- ArrayRef<std::string> IgnoreList) {
+ ArrayRef<std::string> IgnoreList,
+ char Separator) {
assert(!path::is_absolute(FileName));
Optional<std::string> FoundPath;
Optional<std::string> OptPath = Process::GetEnv(EnvName);
if (!OptPath.hasValue())
return FoundPath;
- const char EnvPathSeparatorStr[] = {EnvPathSeparator, '\0'};
+ const char EnvPathSeparatorStr[] = {Separator, '\0'};
SmallVector<StringRef, 8> Dirs;
SplitString(OptPath.getValue(), Dirs, EnvPathSeparatorStr);
@@ -90,10 +93,14 @@ static bool coreFilesPrevented = !LLVM_ENABLE_CRASH_DUMPS;
bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; }
LLVM_ATTRIBUTE_NORETURN
-void Process::Exit(int RetCode) {
+void Process::Exit(int RetCode, bool NoCleanup) {
if (CrashRecoveryContext *CRC = CrashRecoveryContext::GetCurrent())
CRC->HandleExit(RetCode);
- ::exit(RetCode);
+
+ if (NoCleanup)
+ _Exit(RetCode);
+ else
+ ::exit(RetCode);
}
// Include the platform-specific parts of this class.
diff --git a/contrib/llvm-project/llvm/lib/Support/Program.cpp b/contrib/llvm-project/llvm/lib/Support/Program.cpp
index 5294f65bd5a5..c7a59642b27e 100644
--- a/contrib/llvm-project/llvm/lib/Support/Program.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Program.cpp
@@ -26,17 +26,20 @@ using namespace sys;
static bool Execute(ProcessInfo &PI, StringRef Program,
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
- unsigned MemoryLimit, std::string *ErrMsg);
+ unsigned MemoryLimit, std::string *ErrMsg,
+ BitVector *AffinityMask);
int sys::ExecuteAndWait(StringRef Program, ArrayRef<StringRef> Args,
Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
unsigned SecondsToWait, unsigned MemoryLimit,
std::string *ErrMsg, bool *ExecutionFailed,
- Optional<ProcessStatistics> *ProcStat) {
+ Optional<ProcessStatistics> *ProcStat,
+ BitVector *AffinityMask) {
assert(Redirects.empty() || Redirects.size() == 3);
ProcessInfo PI;
- if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg)) {
+ if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
+ AffinityMask)) {
if (ExecutionFailed)
*ExecutionFailed = false;
ProcessInfo Result =
@@ -55,12 +58,13 @@ ProcessInfo sys::ExecuteNoWait(StringRef Program, ArrayRef<StringRef> Args,
Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
unsigned MemoryLimit, std::string *ErrMsg,
- bool *ExecutionFailed) {
+ bool *ExecutionFailed, BitVector *AffinityMask) {
assert(Redirects.empty() || Redirects.size() == 3);
ProcessInfo PI;
if (ExecutionFailed)
*ExecutionFailed = false;
- if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg))
+ if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
+ AffinityMask))
if (ExecutionFailed)
*ExecutionFailed = true;
diff --git a/contrib/llvm-project/llvm/lib/Support/SHA1.cpp b/contrib/llvm-project/llvm/lib/Support/SHA1.cpp
index 417b13fea05a..5dce44af9ecd 100644
--- a/contrib/llvm-project/llvm/lib/Support/SHA1.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/SHA1.cpp
@@ -225,7 +225,7 @@ void SHA1::update(ArrayRef<uint8_t> Data) {
// Fast buffer filling for large inputs.
while (Data.size() >= BLOCK_LENGTH) {
assert(InternalState.BufferOffset == 0);
- assert(BLOCK_LENGTH % 4 == 0);
+ static_assert(BLOCK_LENGTH % 4 == 0, "");
constexpr size_t BLOCK_LENGTH_32 = BLOCK_LENGTH / 4;
for (size_t I = 0; I < BLOCK_LENGTH_32; ++I)
InternalState.Buffer.L[I] = support::endian::read32be(&Data[I * 4]);
diff --git a/contrib/llvm-project/llvm/lib/Support/Signals.cpp b/contrib/llvm-project/llvm/lib/Support/Signals.cpp
index 2cfdf2d42a4a..29be4df95954 100644
--- a/contrib/llvm-project/llvm/lib/Support/Signals.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Signals.cpp
@@ -44,6 +44,9 @@ static cl::opt<bool, true>
cl::desc("Disable symbolizing crash backtraces."),
cl::location(DisableSymbolicationFlag), cl::Hidden);
+constexpr char DisableSymbolizationEnv[] = "LLVM_DISABLE_SYMBOLIZATION";
+constexpr char LLVMSymbolizerPathEnv[] = "LLVM_SYMBOLIZER_PATH";
+
// Callbacks to run in signal handler must be lock-free because a signal handler
// could be running as we add new callbacks. We don't add unbounded numbers of
// callbacks, an array is therefore sufficient.
@@ -105,7 +108,7 @@ static FormattedNumber format_ptr(void *PC) {
LLVM_ATTRIBUTE_USED
static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
int Depth, llvm::raw_ostream &OS) {
- if (DisableSymbolicationFlag)
+ if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
return false;
// Don't recursively invoke the llvm-symbolizer binary.
@@ -117,7 +120,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
// alongside our binary, then in $PATH.
ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
- if (!Argv0.empty()) {
+ if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
+ LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
+ } else if (!Argv0.empty()) {
StringRef Parent = llvm::sys::path::parent_path(Argv0);
if (!Parent.empty())
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent);
diff --git a/contrib/llvm-project/llvm/lib/Support/Signposts.cpp b/contrib/llvm-project/llvm/lib/Support/Signposts.cpp
index aa159e1da2ae..9353e9b294d1 100644
--- a/contrib/llvm-project/llvm/lib/Support/Signposts.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Signposts.cpp
@@ -13,6 +13,7 @@
#include "llvm/Config/config.h"
#if LLVM_SUPPORT_XCODE_SIGNPOSTS
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Mutex.h"
#include <os/signpost.h>
#endif // if LLVM_SUPPORT_XCODE_SIGNPOSTS
@@ -33,21 +34,22 @@ void LogDeleter(os_log_t *X) {
namespace llvm {
class SignpostEmitterImpl {
- using LogPtrTy =
- std::unique_ptr<os_log_t, std::function<void(os_log_t *)>>;
+ using LogPtrTy = std::unique_ptr<os_log_t, std::function<void(os_log_t *)>>;
using LogTy = LogPtrTy::element_type;
LogPtrTy SignpostLog;
- DenseMap<const Timer *, os_signpost_id_t> Signposts;
+ DenseMap<const void *, os_signpost_id_t> Signposts;
+ sys::SmartMutex<true> Mutex;
LogTy &getLogger() const { return *SignpostLog; }
- os_signpost_id_t getSignpostForTimer(const Timer *T) {
- const auto &I = Signposts.find(T);
+ os_signpost_id_t getSignpostForObject(const void *O) {
+ sys::SmartScopedLock<true> Lock(Mutex);
+ const auto &I = Signposts.find(O);
if (I != Signposts.end())
return I->second;
const auto &Inserted = Signposts.insert(
- std::make_pair(T, os_signpost_id_make_with_pointer(getLogger(), T)));
+ std::make_pair(O, os_signpost_id_make_with_pointer(getLogger(), O)));
return Inserted.first->second;
}
@@ -56,20 +58,19 @@ public:
bool isEnabled() const { return os_signpost_enabled(*SignpostLog); }
- void startTimerInterval(Timer *T) {
+ void startInterval(const void *O, llvm::StringRef Name) {
if (isEnabled()) {
- // Both strings used here are required to be constant literal strings
- os_signpost_interval_begin(getLogger(), getSignpostForTimer(T),
- "Pass Timers", "Begin %s",
- T->getName().c_str());
+ // Both strings used here are required to be constant literal strings.
+ os_signpost_interval_begin(getLogger(), getSignpostForObject(O),
+ "LLVM Timers", "Begin %s", Name.data());
}
}
- void endTimerInterval(Timer *T) {
+ void endInterval(const void *O, llvm::StringRef Name) {
if (isEnabled()) {
- // Both strings used here are required to be constant literal strings
- os_signpost_interval_end(getLogger(), getSignpostForTimer(T),
- "Pass Timers", "End %s", T->getName().c_str());
+ // Both strings used here are required to be constant literal strings.
+ os_signpost_interval_end(getLogger(), getSignpostForObject(O),
+ "LLVM Timers", "End %s", Name.data());
}
}
};
@@ -85,7 +86,7 @@ public:
SignpostEmitter::SignpostEmitter() {
#if HAVE_ANY_SIGNPOST_IMPL
Impl = new SignpostEmitterImpl();
-#else // if HAVE_ANY_SIGNPOST_IMPL
+#else // if HAVE_ANY_SIGNPOST_IMPL
Impl = nullptr;
#endif // if !HAVE_ANY_SIGNPOST_IMPL
}
@@ -104,18 +105,18 @@ bool SignpostEmitter::isEnabled() const {
#endif // if !HAVE_ANY_SIGNPOST_IMPL
}
-void SignpostEmitter::startTimerInterval(Timer *T) {
+void SignpostEmitter::startInterval(const void *O, StringRef Name) {
#if HAVE_ANY_SIGNPOST_IMPL
if (Impl == nullptr)
return;
- return Impl->startTimerInterval(T);
+ return Impl->startInterval(O, Name);
#endif // if !HAVE_ANY_SIGNPOST_IMPL
}
-void SignpostEmitter::endTimerInterval(Timer *T) {
+void SignpostEmitter::endInterval(const void *O, StringRef Name) {
#if HAVE_ANY_SIGNPOST_IMPL
if (Impl == nullptr)
return;
- Impl->endTimerInterval(T);
+ Impl->endInterval(O, Name);
#endif // if !HAVE_ANY_SIGNPOST_IMPL
}
diff --git a/contrib/llvm-project/llvm/lib/Support/SmallVector.cpp b/contrib/llvm-project/llvm/lib/Support/SmallVector.cpp
index 6d5fe7165f63..0005f7840912 100644
--- a/contrib/llvm-project/llvm/lib/Support/SmallVector.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/SmallVector.cpp
@@ -12,6 +12,9 @@
#include "llvm/ADT/SmallVector.h"
#include <cstdint>
+#ifdef LLVM_ENABLE_EXCEPTIONS
+#include <stdexcept>
+#endif
using namespace llvm;
// Check that no bytes are wasted and everything is well-aligned.
@@ -42,27 +45,72 @@ static_assert(sizeof(SmallVector<char, 0>) ==
sizeof(void *) * 2 + sizeof(void *),
"1 byte elements have word-sized type for size and capacity");
+/// Report that MinSize doesn't fit into this vector's size type. Throws
+/// std::length_error or calls report_fatal_error.
+LLVM_ATTRIBUTE_NORETURN
+static void report_size_overflow(size_t MinSize, size_t MaxSize);
+static void report_size_overflow(size_t MinSize, size_t MaxSize) {
+ std::string Reason = "SmallVector unable to grow. Requested capacity (" +
+ std::to_string(MinSize) +
+ ") is larger than maximum value for size type (" +
+ std::to_string(MaxSize) + ")";
+#ifdef LLVM_ENABLE_EXCEPTIONS
+ throw std::length_error(Reason);
+#else
+ report_fatal_error(Reason);
+#endif
+}
+
+/// Report that this vector is already at maximum capacity. Throws
+/// std::length_error or calls report_fatal_error.
+LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity(size_t MaxSize);
+static void report_at_maximum_capacity(size_t MaxSize) {
+ std::string Reason =
+ "SmallVector capacity unable to grow. Already at maximum size " +
+ std::to_string(MaxSize);
+#ifdef LLVM_ENABLE_EXCEPTIONS
+ throw std::length_error(Reason);
+#else
+ report_fatal_error(Reason);
+#endif
+}
+
// Note: Moving this function into the header may cause performance regression.
template <class Size_T>
-void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinCapacity,
- size_t TSize) {
+static size_t getNewCapacity(size_t MinSize, size_t TSize, size_t OldCapacity) {
+ constexpr size_t MaxSize = std::numeric_limits<Size_T>::max();
+
// Ensure we can fit the new capacity.
// This is only going to be applicable when the capacity is 32 bit.
- if (MinCapacity > SizeTypeMax())
- report_bad_alloc_error("SmallVector capacity overflow during allocation");
+ if (MinSize > MaxSize)
+ report_size_overflow(MinSize, MaxSize);
// Ensure we can meet the guarantee of space for at least one more element.
// The above check alone will not catch the case where grow is called with a
- // default MinCapacity of 0, but the current capacity cannot be increased.
+ // default MinSize of 0, but the current capacity cannot be increased.
// This is only going to be applicable when the capacity is 32 bit.
- if (capacity() == SizeTypeMax())
- report_bad_alloc_error("SmallVector capacity unable to grow");
+ if (OldCapacity == MaxSize)
+ report_at_maximum_capacity(MaxSize);
// In theory 2*capacity can overflow if the capacity is 64 bit, but the
// original capacity would never be large enough for this to be a problem.
- size_t NewCapacity = 2 * capacity() + 1; // Always grow.
- NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax());
+ size_t NewCapacity = 2 * OldCapacity + 1; // Always grow.
+ return std::min(std::max(NewCapacity, MinSize), MaxSize);
+}
+// Note: Moving this function into the header may cause performance regression.
+template <class Size_T>
+void *SmallVectorBase<Size_T>::mallocForGrow(size_t MinSize, size_t TSize,
+ size_t &NewCapacity) {
+ NewCapacity = getNewCapacity<Size_T>(MinSize, TSize, this->capacity());
+ return llvm::safe_malloc(NewCapacity * TSize);
+}
+
+// Note: Moving this function into the header may cause performance regression.
+template <class Size_T>
+void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinSize,
+ size_t TSize) {
+ size_t NewCapacity = getNewCapacity<Size_T>(MinSize, TSize, this->capacity());
void *NewElts;
if (BeginX == FirstEl) {
NewElts = safe_malloc(NewCapacity * TSize);
@@ -81,7 +129,7 @@ void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinCapacity,
template class llvm::SmallVectorBase<uint32_t>;
// Disable the uint64_t instantiation for 32-bit builds.
-// Both uint32_t and uint64_t instantations are needed for 64-bit builds.
+// Both uint32_t and uint64_t instantiations are needed for 64-bit builds.
// This instantiation will never be used in 32-bit builds, and will cause
// warnings when sizeof(Size_T) > sizeof(size_t).
#if SIZE_MAX > UINT32_MAX
diff --git a/contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp
index 9cc69732a964..89b7dc939dfc 100644
--- a/contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp
@@ -180,7 +180,7 @@ std::pair<unsigned, unsigned>
SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
if (!BufferID)
BufferID = FindBufferContainingLoc(Loc);
- assert(BufferID && "Invalid Location!");
+ assert(BufferID && "Invalid location!");
auto &SB = getBufferInfo(BufferID);
const char *Ptr = Loc.getPointer();
@@ -193,6 +193,30 @@ SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs);
}
+// FIXME: Note that the formatting of source locations is spread between
+// multiple functions, some in SourceMgr and some in SMDiagnostic. A better
+// solution would be a general-purpose source location formatter
+// in one of those two classes, or possibly in SMLoc.
+
+/// Get a string with the source location formatted in the standard
+/// style, but without the line offset. If \p IncludePath is true, the path
+/// is included. If false, only the file name and extension are included.
+std::string SourceMgr::getFormattedLocationNoOffset(SMLoc Loc,
+ bool IncludePath) const {
+ auto BufferID = FindBufferContainingLoc(Loc);
+ assert(BufferID && "Invalid location!");
+ auto FileSpec = getBufferInfo(BufferID).Buffer->getBufferIdentifier();
+
+ if (IncludePath) {
+ return FileSpec.str() + ":" + std::to_string(FindLineNumber(Loc, BufferID));
+ } else {
+ auto I = FileSpec.find_last_of("/\\");
+ I = (I == FileSpec.size()) ? 0 : (I + 1);
+ return FileSpec.substr(I).str() + ":" +
+ std::to_string(FindLineNumber(Loc, BufferID));
+ }
+}
+
/// Given a line and column number in a mapped buffer, turn it into an SMLoc.
/// This will return a null SMLoc if the line/column location is invalid.
SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo,
@@ -243,7 +267,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
std::pair<unsigned, unsigned> LineAndCol;
StringRef BufferID = "<unknown>";
- std::string LineStr;
+ StringRef LineStr;
if (Loc.isValid()) {
unsigned CurBuf = FindBufferContainingLoc(Loc);
@@ -264,7 +288,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
const char *BufEnd = CurMB->getBufferEnd();
while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
++LineEnd;
- LineStr = std::string(LineStart, LineEnd);
+ LineStr = StringRef(LineStart, LineEnd - LineStart);
// Convert any ranges to column ranges that only intersect the line of the
// location.
@@ -346,8 +370,8 @@ SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line,
ArrayRef<std::pair<unsigned, unsigned>> Ranges,
ArrayRef<SMFixIt> Hints)
: SM(&sm), Loc(L), Filename(std::string(FN)), LineNo(Line), ColumnNo(Col),
- Kind(Kind), Message(std::string(Msg)), LineContents(std::string(LineStr)),
- Ranges(Ranges.vec()), FixIts(Hints.begin(), Hints.end()) {
+ Kind(Kind), Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
+ FixIts(Hints.begin(), Hints.end()) {
llvm::sort(FixIts);
}
@@ -362,13 +386,12 @@ static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
size_t PrevHintEndCol = 0;
- for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); I != E;
- ++I) {
+ for (const llvm::SMFixIt &Fixit : FixIts) {
// If the fixit contains a newline or tab, ignore it.
- if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
+ if (Fixit.getText().find_first_of("\n\r\t") != StringRef::npos)
continue;
- SMRange R = I->getRange();
+ SMRange R = Fixit.getRange();
// If the line doesn't contain any part of the range, then ignore it.
if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
@@ -397,16 +420,15 @@ static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
// FIXME: This assertion is intended to catch unintended use of multibyte
// characters in fixits. If we decide to do this, we'll have to track
// separate byte widths for the source and fixit lines.
- assert((size_t)sys::locale::columnWidth(I->getText()) ==
- I->getText().size());
+ assert((size_t)sys::locale::columnWidth(Fixit.getText()) ==
+ Fixit.getText().size());
// This relies on one byte per column in our fixit hints.
- unsigned LastColumnModified = HintCol + I->getText().size();
+ unsigned LastColumnModified = HintCol + Fixit.getText().size();
if (LastColumnModified > FixItLine.size())
FixItLine.resize(LastColumnModified, ' ');
- std::copy(I->getText().begin(), I->getText().end(),
- FixItLine.begin() + HintCol);
+ llvm::copy(Fixit.getText(), FixItLine.begin() + HintCol);
PrevHintEndCol = LastColumnModified;
@@ -500,7 +522,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors,
// map like Clang's TextDiagnostic. For now, we'll just handle tabs by
// expanding them later, and bail out rather than show incorrect ranges and
// misaligned fixits for any other odd characters.
- if (find_if(LineContents, isNonASCII) != LineContents.end()) {
+ if (any_of(LineContents, isNonASCII)) {
printSourceLine(OS, LineContents);
return;
}
@@ -510,11 +532,9 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors,
std::string CaretLine(NumColumns + 1, ' ');
// Expand any ranges.
- for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
- std::pair<unsigned, unsigned> R = Ranges[r];
+ for (const std::pair<unsigned, unsigned> &R : Ranges)
std::fill(&CaretLine[R.first],
&CaretLine[std::min((size_t)R.second, CaretLine.size())], '~');
- }
// Add any fix-its.
// FIXME: Find the beginning of the line properly for multibyte characters.
diff --git a/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp b/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp
index 031384ebaa91..3ccdc55b305d 100644
--- a/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp
@@ -30,7 +30,7 @@ struct GPUInfo {
unsigned Features;
};
-constexpr GPUInfo R600GPUs[26] = {
+constexpr GPUInfo R600GPUs[] = {
// Name Canonical Kind Features
// Name
{{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
@@ -63,16 +63,17 @@ constexpr GPUInfo R600GPUs[26] = {
// This table should be sorted by the value of GPUKind
// Don't bother listing the implicitly true features
-constexpr GPUInfo AMDGCNGPUs[38] = {
+constexpr GPUInfo AMDGCNGPUs[] = {
// Name Canonical Kind Features
// Name
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
{{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
{{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
- {{"hainan"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
- {{"oland"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
{{"pitcairn"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
{{"verde"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
+ {{"gfx602"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
+ {{"hainan"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
+ {{"oland"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
{{"gfx700"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
{{"kaveri"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
{{"gfx701"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
@@ -83,36 +84,43 @@ constexpr GPUInfo AMDGCNGPUs[38] = {
{{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
{{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
{{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
- {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
- {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
- {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
- {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
- {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
- {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
- {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
- {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32},
- {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32},
- {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx705"}, {"gfx705"}, GK_GFX705, FEATURE_NONE},
+ {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx805"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"tongapro"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
+ {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
+ {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
- auto I = std::lower_bound(Table.begin(), Table.end(), Search,
- [](const GPUInfo &A, const GPUInfo &B) {
- return A.Kind < B.Kind;
- });
+ auto I =
+ llvm::lower_bound(Table, Search, [](const GPUInfo &A, const GPUInfo &B) {
+ return A.Kind < B.Kind;
+ });
if (I == Table.end())
return nullptr;
@@ -187,14 +195,17 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
switch (AK) {
case GK_GFX600: return {6, 0, 0};
case GK_GFX601: return {6, 0, 1};
+ case GK_GFX602: return {6, 0, 2};
case GK_GFX700: return {7, 0, 0};
case GK_GFX701: return {7, 0, 1};
case GK_GFX702: return {7, 0, 2};
case GK_GFX703: return {7, 0, 3};
case GK_GFX704: return {7, 0, 4};
+ case GK_GFX705: return {7, 0, 5};
case GK_GFX801: return {8, 0, 1};
case GK_GFX802: return {8, 0, 2};
case GK_GFX803: return {8, 0, 3};
+ case GK_GFX805: return {8, 0, 5};
case GK_GFX810: return {8, 1, 0};
case GK_GFX900: return {9, 0, 0};
case GK_GFX902: return {9, 0, 2};
@@ -202,14 +213,27 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX906: return {9, 0, 6};
case GK_GFX908: return {9, 0, 8};
case GK_GFX909: return {9, 0, 9};
+ case GK_GFX90C: return {9, 0, 12};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
case GK_GFX1030: return {10, 3, 0};
+ case GK_GFX1031: return {10, 3, 1};
+ case GK_GFX1032: return {10, 3, 2};
+ case GK_GFX1033: return {10, 3, 3};
default: return {0, 0, 0};
}
}
+StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
+ assert(T.isAMDGPU());
+ auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(Arch) : parseArchR600(Arch);
+ if (ProcKind == GK_NONE)
+ return StringRef();
+
+ return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
+}
+
namespace llvm {
namespace RISCV {
@@ -233,6 +257,12 @@ bool checkCPUKind(CPUKind Kind, bool IsRV64) {
return RISCVCPUInfo[static_cast<unsigned>(Kind)].is64Bit() == IsRV64;
}
+bool checkTuneCPUKind(CPUKind Kind, bool IsRV64) {
+ if (Kind == CK_INVALID)
+ return false;
+ return RISCVCPUInfo[static_cast<unsigned>(Kind)].is64Bit() == IsRV64;
+}
+
CPUKind parseCPUKind(StringRef CPU) {
return llvm::StringSwitch<CPUKind>(CPU)
#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
@@ -240,6 +270,22 @@ CPUKind parseCPUKind(StringRef CPU) {
.Default(CK_INVALID);
}
+StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64) {
+ return llvm::StringSwitch<StringRef>(TuneCPU)
+#define PROC_ALIAS(NAME, RV32, RV64) .Case(NAME, IsRV64 ? StringRef(RV64) : StringRef(RV32))
+#include "llvm/Support/RISCVTargetParser.def"
+ .Default(TuneCPU);
+}
+
+CPUKind parseTuneCPUKind(StringRef TuneCPU, bool IsRV64) {
+ TuneCPU = resolveTuneCPUAlias(TuneCPU, IsRV64);
+
+ return llvm::StringSwitch<CPUKind>(TuneCPU)
+#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
+#include "llvm/Support/RISCVTargetParser.def"
+ .Default(CK_INVALID);
+}
+
StringRef getMArchFromMcpu(StringRef CPU) {
CPUKind Kind = parseCPUKind(CPU);
return RISCVCPUInfo[static_cast<unsigned>(Kind)].DefaultMarch;
@@ -252,6 +298,15 @@ void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64) {
}
}
+void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64) {
+ for (const auto &C : RISCVCPUInfo) {
+ if (C.Kind != CK_INVALID && IsRV64 == C.is64Bit())
+ Values.emplace_back(C.Name);
+ }
+#define PROC_ALIAS(NAME, RV32, RV64) Values.emplace_back(StringRef(NAME));
+#include "llvm/Support/RISCVTargetParser.def"
+}
+
// Get all features except standard extension feature
bool getCPUFeaturesExceptStdExt(CPUKind Kind,
std::vector<StringRef> &Features) {
diff --git a/contrib/llvm-project/llvm/lib/Support/Timer.cpp b/contrib/llvm-project/llvm/lib/Support/Timer.cpp
index c97538cb560a..f5a512f9a22d 100644
--- a/contrib/llvm-project/llvm/lib/Support/Timer.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Timer.cpp
@@ -53,6 +53,11 @@ namespace {
InfoOutputFilename("info-output-file", cl::value_desc("filename"),
cl::desc("File to append -stats and -timer output to"),
cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
+
+ static cl::opt<bool>
+ SortTimers("sort-timers", cl::desc("In the report, sort the timers in each group "
+ "in wall clock time order"),
+ cl::init(true), cl::Hidden);
}
std::unique_ptr<raw_fd_ostream> llvm::CreateInfoOutputFile() {
@@ -138,7 +143,7 @@ TimeRecord TimeRecord::getCurrentTime(bool Start) {
void Timer::startTimer() {
assert(!Running && "Cannot start a running timer");
Running = Triggered = true;
- Signposts->startTimerInterval(this);
+ Signposts->startInterval(this, getName());
StartTime = TimeRecord::getCurrentTime(true);
}
@@ -147,7 +152,7 @@ void Timer::stopTimer() {
Running = false;
Time += TimeRecord::getCurrentTime(false);
Time -= StartTime;
- Signposts->endTimerInterval(this);
+ Signposts->endInterval(this, getName());
}
void Timer::clear() {
@@ -301,8 +306,9 @@ void TimerGroup::addTimer(Timer &T) {
}
void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
- // Sort the timers in descending order by amount of time taken.
- llvm::sort(TimersToPrint);
+ // Perhaps sort the timers in descending order by amount of time taken.
+ if (SortTimers)
+ llvm::sort(TimersToPrint);
TimeRecord Total;
for (const PrintRecord &Record : TimersToPrint)
diff --git a/contrib/llvm-project/llvm/lib/Support/TrigramIndex.cpp b/contrib/llvm-project/llvm/lib/Support/TrigramIndex.cpp
index 88375e6e7863..4370adc9c3e0 100644
--- a/contrib/llvm-project/llvm/lib/Support/TrigramIndex.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/TrigramIndex.cpp
@@ -15,12 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/TrigramIndex.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-
#include <set>
-#include <string>
-#include <unordered_map>
using namespace llvm;
@@ -30,7 +25,7 @@ static bool isAdvancedMetachar(unsigned Char) {
return strchr(RegexAdvancedMetachars, Char) != nullptr;
}
-void TrigramIndex::insert(std::string Regex) {
+void TrigramIndex::insert(const std::string &Regex) {
if (Defeated) return;
std::set<unsigned> Was;
unsigned Cnt = 0;
diff --git a/contrib/llvm-project/llvm/lib/Support/Triple.cpp b/contrib/llvm-project/llvm/lib/Support/Triple.cpp
index fec1985ccaca..4f483c965282 100644
--- a/contrib/llvm-project/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Triple.cpp
@@ -36,6 +36,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case avr: return "avr";
case bpfeb: return "bpfeb";
case bpfel: return "bpfel";
+ case csky: return "csky";
case hexagon: return "hexagon";
case hsail64: return "hsail64";
case hsail: return "hsail";
@@ -53,6 +54,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case ppc64: return "powerpc64";
case ppc64le: return "powerpc64le";
case ppc: return "powerpc";
+ case ppcle: return "powerpcle";
case r600: return "r600";
case renderscript32: return "renderscript32";
case renderscript64: return "renderscript64";
@@ -100,7 +102,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case ppc64:
case ppc64le:
- case ppc: return "ppc";
+ case ppc:
+ case ppcle: return "ppc";
case mips:
case mipsel:
@@ -151,6 +154,7 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case riscv64: return "riscv";
case ve: return "ve";
+ case csky: return "csky";
}
}
@@ -160,8 +164,6 @@ StringRef Triple::getVendorTypeName(VendorType Kind) {
case AMD: return "amd";
case Apple: return "apple";
- case BGP: return "bgp";
- case BGQ: return "bgq";
case CSR: return "csr";
case Freescale: return "fsl";
case IBM: return "ibm";
@@ -187,7 +189,6 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case AMDHSA: return "amdhsa";
case AMDPAL: return "amdpal";
case Ananas: return "ananas";
- case CNK: return "cnk";
case CUDA: return "cuda";
case CloudABI: return "cloudabi";
case Contiki: return "contiki";
@@ -218,6 +219,7 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case WASI: return "wasi";
case WatchOS: return "watchos";
case Win32: return "windows";
+ case ZOS: return "zos";
}
llvm_unreachable("Invalid OSType");
@@ -238,6 +240,7 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
case GNUEABI: return "gnueabi";
case GNUEABIHF: return "gnueabihf";
case GNUX32: return "gnux32";
+ case GNUILP32: return "gnu_ilp32";
case Itanium: return "itanium";
case MSVC: return "msvc";
case MacABI: return "macabi";
@@ -286,6 +289,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("ppc64", ppc64)
.Case("ppc32", ppc)
.Case("ppc", ppc)
+ .Case("ppc32le", ppcle)
+ .Case("ppcle", ppcle)
.Case("ppc64le", ppc64le)
.Case("r600", r600)
.Case("amdgcn", amdgcn)
@@ -321,6 +326,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("renderscript32", renderscript32)
.Case("renderscript64", renderscript64)
.Case("ve", ve)
+ .Case("csky", csky)
.Default(UnknownArch);
}
@@ -396,6 +402,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Cases("i786", "i886", "i986", Triple::x86)
.Cases("amd64", "x86_64", "x86_64h", Triple::x86_64)
.Cases("powerpc", "powerpcspe", "ppc", "ppc32", Triple::ppc)
+ .Cases("powerpcle", "ppcle", "ppc32le", Triple::ppcle)
.Cases("powerpc64", "ppu", "ppc64", Triple::ppc64)
.Cases("powerpc64le", "ppc64le", Triple::ppc64le)
.Case("xscale", Triple::arm)
@@ -406,6 +413,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("arc", Triple::arc)
.Case("arm64", Triple::aarch64)
.Case("arm64_32", Triple::aarch64_32)
+ .Case("arm64e", Triple::aarch64)
.Case("arm", Triple::arm)
.Case("armeb", Triple::armeb)
.Case("thumb", Triple::thumb)
@@ -450,6 +458,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("ve", Triple::ve)
.Case("wasm32", Triple::wasm32)
.Case("wasm64", Triple::wasm64)
+ .Case("csky", Triple::csky)
.Default(Triple::UnknownArch);
// Some architectures require special parsing logic just to compute the
@@ -470,8 +479,6 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
.Case("apple", Triple::Apple)
.Case("pc", Triple::PC)
.Case("scei", Triple::SCEI)
- .Case("bgp", Triple::BGP)
- .Case("bgq", Triple::BGQ)
.Case("fsl", Triple::Freescale)
.Case("ibm", Triple::IBM)
.Case("img", Triple::ImaginationTechnologies)
@@ -504,11 +511,11 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("solaris", Triple::Solaris)
.StartsWith("win32", Triple::Win32)
.StartsWith("windows", Triple::Win32)
+ .StartsWith("zos", Triple::ZOS)
.StartsWith("haiku", Triple::Haiku)
.StartsWith("minix", Triple::Minix)
.StartsWith("rtems", Triple::RTEMS)
.StartsWith("nacl", Triple::NaCl)
- .StartsWith("cnk", Triple::CNK)
.StartsWith("aix", Triple::AIX)
.StartsWith("cuda", Triple::CUDA)
.StartsWith("nvcl", Triple::NVCL)
@@ -529,26 +536,27 @@ static Triple::OSType parseOS(StringRef OSName) {
static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
- .StartsWith("eabihf", Triple::EABIHF)
- .StartsWith("eabi", Triple::EABI)
- .StartsWith("gnuabin32", Triple::GNUABIN32)
- .StartsWith("gnuabi64", Triple::GNUABI64)
- .StartsWith("gnueabihf", Triple::GNUEABIHF)
- .StartsWith("gnueabi", Triple::GNUEABI)
- .StartsWith("gnux32", Triple::GNUX32)
- .StartsWith("code16", Triple::CODE16)
- .StartsWith("gnu", Triple::GNU)
- .StartsWith("android", Triple::Android)
- .StartsWith("musleabihf", Triple::MuslEABIHF)
- .StartsWith("musleabi", Triple::MuslEABI)
- .StartsWith("musl", Triple::Musl)
- .StartsWith("msvc", Triple::MSVC)
- .StartsWith("itanium", Triple::Itanium)
- .StartsWith("cygnus", Triple::Cygnus)
- .StartsWith("coreclr", Triple::CoreCLR)
- .StartsWith("simulator", Triple::Simulator)
- .StartsWith("macabi", Triple::MacABI)
- .Default(Triple::UnknownEnvironment);
+ .StartsWith("eabihf", Triple::EABIHF)
+ .StartsWith("eabi", Triple::EABI)
+ .StartsWith("gnuabin32", Triple::GNUABIN32)
+ .StartsWith("gnuabi64", Triple::GNUABI64)
+ .StartsWith("gnueabihf", Triple::GNUEABIHF)
+ .StartsWith("gnueabi", Triple::GNUEABI)
+ .StartsWith("gnux32", Triple::GNUX32)
+ .StartsWith("gnu_ilp32", Triple::GNUILP32)
+ .StartsWith("code16", Triple::CODE16)
+ .StartsWith("gnu", Triple::GNU)
+ .StartsWith("android", Triple::Android)
+ .StartsWith("musleabihf", Triple::MuslEABIHF)
+ .StartsWith("musleabi", Triple::MuslEABI)
+ .StartsWith("musl", Triple::Musl)
+ .StartsWith("msvc", Triple::MSVC)
+ .StartsWith("itanium", Triple::Itanium)
+ .StartsWith("cygnus", Triple::Cygnus)
+ .StartsWith("coreclr", Triple::CoreCLR)
+ .StartsWith("simulator", Triple::Simulator)
+ .StartsWith("macabi", Triple::MacABI)
+ .Default(Triple::UnknownEnvironment);
}
static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
@@ -558,6 +566,7 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
.EndsWith("xcoff", Triple::XCOFF)
.EndsWith("coff", Triple::COFF)
.EndsWith("elf", Triple::ELF)
+ .EndsWith("goff", Triple::GOFF)
.EndsWith("macho", Triple::MachO)
.EndsWith("wasm", Triple::Wasm)
.Default(Triple::UnknownObjectFormat);
@@ -571,6 +580,9 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
if (SubArchName == "powerpcspe")
return Triple::PPCSubArch_spe;
+ if (SubArchName == "arm64e")
+ return Triple::AArch64SubArch_arm64e;
+
StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
// For now, this is the small part. Early return.
@@ -631,6 +643,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
return Triple::ARMSubArch_v8_5a;
case ARM::ArchKind::ARMV8_6A:
return Triple::ARMSubArch_v8_6a;
+ case ARM::ArchKind::ARMV8_7A:
+ return Triple::ARMSubArch_v8_7a;
case ARM::ArchKind::ARMV8R:
return Triple::ARMSubArch_v8r;
case ARM::ArchKind::ARMV8MBaseline:
@@ -649,6 +663,7 @@ static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
case Triple::UnknownObjectFormat: return "";
case Triple::COFF: return "coff";
case Triple::ELF: return "elf";
+ case Triple::GOFF: return "goff";
case Triple::MachO: return "macho";
case Triple::Wasm: return "wasm";
case Triple::XCOFF: return "xcoff";
@@ -680,6 +695,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::avr:
case Triple::bpfeb:
case Triple::bpfel:
+ case Triple::csky:
case Triple::hexagon:
case Triple::hsail64:
case Triple::hsail:
@@ -695,6 +711,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::nvptx64:
case Triple::nvptx:
case Triple::ppc64le:
+ case Triple::ppcle:
case Triple::r600:
case Triple::renderscript32:
case Triple::renderscript64:
@@ -706,7 +723,6 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::sparcv9:
case Triple::spir64:
case Triple::spir:
- case Triple::systemz:
case Triple::tce:
case Triple::tcele:
case Triple::thumbeb:
@@ -720,6 +736,11 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
return Triple::XCOFF;
return Triple::ELF;
+ case Triple::systemz:
+ if (T.isOSzOS())
+ return Triple::GOFF;
+ return Triple::ELF;
+
case Triple::wasm32:
case Triple::wasm64:
return Triple::Wasm;
@@ -1017,7 +1038,7 @@ StringRef Triple::getOSAndEnvironmentName() const {
}
static unsigned EatNumber(StringRef &Str) {
- assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number");
+ assert(!Str.empty() && isDigit(Str[0]) && "Not a number");
unsigned Result = 0;
do {
@@ -1026,7 +1047,7 @@ static unsigned EatNumber(StringRef &Str) {
// Eat the digit.
Str = Str.substr(1);
- } while (!Str.empty() && Str[0] >= '0' && Str[0] <= '9');
+ } while (!Str.empty() && isDigit(Str[0]));
return Result;
}
@@ -1249,6 +1270,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::arc:
case llvm::Triple::arm:
case llvm::Triple::armeb:
+ case llvm::Triple::csky:
case llvm::Triple::hexagon:
case llvm::Triple::hsail:
case llvm::Triple::kalimba:
@@ -1258,6 +1280,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::mipsel:
case llvm::Triple::nvptx:
case llvm::Triple::ppc:
+ case llvm::Triple::ppcle:
case llvm::Triple::r600:
case llvm::Triple::renderscript32:
case llvm::Triple::riscv32:
@@ -1321,7 +1344,6 @@ Triple Triple::get32BitArchVariant() const {
case Triple::bpfeb:
case Triple::bpfel:
case Triple::msp430:
- case Triple::ppc64le:
case Triple::systemz:
case Triple::ve:
T.setArch(UnknownArch);
@@ -1332,6 +1354,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::arc:
case Triple::arm:
case Triple::armeb:
+ case Triple::csky:
case Triple::hexagon:
case Triple::hsail:
case Triple::kalimba:
@@ -1341,6 +1364,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::mipsel:
case Triple::nvptx:
case Triple::ppc:
+ case Triple::ppcle:
case Triple::r600:
case Triple::renderscript32:
case Triple::riscv32:
@@ -1367,6 +1391,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::mips64el: T.setArch(Triple::mipsel); break;
case Triple::nvptx64: T.setArch(Triple::nvptx); break;
case Triple::ppc64: T.setArch(Triple::ppc); break;
+ case Triple::ppc64le: T.setArch(Triple::ppcle); break;
case Triple::renderscript64: T.setArch(Triple::renderscript32); break;
case Triple::riscv64: T.setArch(Triple::riscv32); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
@@ -1383,6 +1408,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::UnknownArch:
case Triple::arc:
case Triple::avr:
+ case Triple::csky:
case Triple::hexagon:
case Triple::kalimba:
case Triple::lanai:
@@ -1430,6 +1456,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::mipsel: T.setArch(Triple::mips64el); break;
case Triple::nvptx: T.setArch(Triple::nvptx64); break;
case Triple::ppc: T.setArch(Triple::ppc64); break;
+ case Triple::ppcle: T.setArch(Triple::ppc64le); break;
case Triple::renderscript32: T.setArch(Triple::renderscript64); break;
case Triple::riscv32: T.setArch(Triple::riscv64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;
@@ -1476,6 +1503,7 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::x86_64:
case Triple::xcore:
case Triple::ve:
+ case Triple::csky:
// ARM is intentionally unsupported here, changing the architecture would
// drop any arch suffixes.
@@ -1488,6 +1516,7 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::bpfel: T.setArch(Triple::bpfeb); break;
case Triple::mips64el:T.setArch(Triple::mips64); break;
case Triple::mipsel: T.setArch(Triple::mips); break;
+ case Triple::ppcle: T.setArch(Triple::ppc); break;
case Triple::ppc64le: T.setArch(Triple::ppc64); break;
case Triple::sparcel: T.setArch(Triple::sparc); break;
case Triple::tcele: T.setArch(Triple::tce); break;
@@ -1505,7 +1534,6 @@ Triple Triple::getLittleEndianArchVariant() const {
switch (getArch()) {
case Triple::UnknownArch:
case Triple::lanai:
- case Triple::ppc:
case Triple::sparcv9:
case Triple::systemz:
@@ -1520,6 +1548,7 @@ Triple Triple::getLittleEndianArchVariant() const {
case Triple::bpfeb: T.setArch(Triple::bpfel); break;
case Triple::mips64: T.setArch(Triple::mips64el); break;
case Triple::mips: T.setArch(Triple::mipsel); break;
+ case Triple::ppc: T.setArch(Triple::ppcle); break;
case Triple::ppc64: T.setArch(Triple::ppc64le); break;
case Triple::sparc: T.setArch(Triple::sparcel); break;
case Triple::tce: T.setArch(Triple::tcele); break;
@@ -1539,6 +1568,7 @@ bool Triple::isLittleEndian() const {
case Triple::arm:
case Triple::avr:
case Triple::bpfel:
+ case Triple::csky:
case Triple::hexagon:
case Triple::hsail64:
case Triple::hsail:
@@ -1550,6 +1580,7 @@ bool Triple::isLittleEndian() const {
case Triple::msp430:
case Triple::nvptx64:
case Triple::nvptx:
+ case Triple::ppcle:
case Triple::ppc64le:
case Triple::r600:
case Triple::renderscript32:
@@ -1636,6 +1667,9 @@ VersionTuple Triple::getMinimumSupportedOSVersion() const {
// ARM64 simulators are supported for iOS 14+.
if (isMacCatalystEnvironment() || isSimulatorEnvironment())
return VersionTuple(14, 0, 0);
+ // ARM64e slice is supported starting from iOS 14.
+ if (isArm64e())
+ return VersionTuple(14, 0, 0);
break;
case Triple::TvOS:
// ARM64 simulators are supported for tvOS 14+.
diff --git a/contrib/llvm-project/llvm/lib/Support/Unicode.cpp b/contrib/llvm-project/llvm/lib/Support/Unicode.cpp
index 4d195069682b..bb6e75555b4c 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unicode.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Unicode.cpp
@@ -339,11 +339,22 @@ static inline int charWidth(int UCS)
return 1;
}
+static bool isprintableascii(char c) { return c > 31 && c < 127; }
+
int columnWidthUTF8(StringRef Text) {
unsigned ColumnWidth = 0;
unsigned Length;
for (size_t i = 0, e = Text.size(); i < e; i += Length) {
Length = getNumBytesForUTF8(Text[i]);
+
+ // fast path for ASCII characters
+ if (Length == 1) {
+ if (!isprintableascii(Text[i]))
+ return ErrorNonPrintableCharacter;
+ ColumnWidth += 1;
+ continue;
+ }
+
if (Length <= 0 || i + Length > Text.size())
return ErrorInvalidUTF8;
UTF32 buf[1];
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc
index d91b269cc6d3..77f3f54bd881 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc
@@ -33,6 +33,7 @@
#include <dirent.h>
#include <pwd.h>
+#include <sys/file.h>
#ifdef __APPLE__
#include <mach-o/dyld.h>
@@ -146,6 +147,9 @@ test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
static char *
getprogpath(char ret[PATH_MAX], const char *bin)
{
+ if (bin == nullptr)
+ return nullptr;
+
/* First approach: absolute path. */
if (bin[0] == '/') {
if (test_dir(ret, "/", bin) == 0)
@@ -681,8 +685,6 @@ void expand_tilde(const Twine &path, SmallVectorImpl<char> &dest) {
path.toVector(dest);
expandTildeExpr(dest);
-
- return;
}
static file_type typeForMode(mode_t Mode) {
@@ -791,6 +793,16 @@ std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
if (::futimes(FD, Times))
return std::error_code(errno, std::generic_category());
return std::error_code();
+#elif defined(__MVS__)
+ attrib_t Attr;
+ memset(&Attr, 0, sizeof(Attr));
+ Attr.att_atimechg = 1;
+ Attr.att_atime = sys::toTimeT(AccessTime);
+ Attr.att_mtimechg = 1;
+ Attr.att_mtime = sys::toTimeT(ModificationTime);
+ if (::__fchattr(FD, &Attr, sizeof(Attr)) != 0)
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
#else
#warning Missing futimes() and futimens()
return make_error_code(errc::function_not_supported);
@@ -1055,8 +1067,13 @@ file_t getStdoutHandle() { return 1; }
file_t getStderrHandle() { return 2; }
Expected<size_t> readNativeFile(file_t FD, MutableArrayRef<char> Buf) {
+#if defined(__APPLE__)
+ size_t Size = std::min<size_t>(Buf.size(), INT32_MAX);
+#else
+ size_t Size = Buf.size();
+#endif
ssize_t NumRead =
- sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size());
+ sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Size);
if (ssize_t(NumRead) == -1)
return errorCodeToError(std::error_code(errno, std::generic_category()));
return NumRead;
@@ -1064,20 +1081,69 @@ Expected<size_t> readNativeFile(file_t FD, MutableArrayRef<char> Buf) {
Expected<size_t> readNativeFileSlice(file_t FD, MutableArrayRef<char> Buf,
uint64_t Offset) {
+#if defined(__APPLE__)
+ size_t Size = std::min<size_t>(Buf.size(), INT32_MAX);
+#else
+ size_t Size = Buf.size();
+#endif
#ifdef HAVE_PREAD
ssize_t NumRead =
- sys::RetryAfterSignal(-1, ::pread, FD, Buf.data(), Buf.size(), Offset);
+ sys::RetryAfterSignal(-1, ::pread, FD, Buf.data(), Size, Offset);
#else
if (lseek(FD, Offset, SEEK_SET) == -1)
return errorCodeToError(std::error_code(errno, std::generic_category()));
ssize_t NumRead =
- sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size());
+ sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Size);
#endif
if (NumRead == -1)
return errorCodeToError(std::error_code(errno, std::generic_category()));
return NumRead;
}
+std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout) {
+ auto Start = std::chrono::steady_clock::now();
+ auto End = Start + Timeout;
+ do {
+ struct flock Lock;
+ memset(&Lock, 0, sizeof(Lock));
+ Lock.l_type = F_WRLCK;
+ Lock.l_whence = SEEK_SET;
+ Lock.l_start = 0;
+ Lock.l_len = 0;
+ if (::fcntl(FD, F_SETLK, &Lock) != -1)
+ return std::error_code();
+ int Error = errno;
+ if (Error != EACCES && Error != EAGAIN)
+ return std::error_code(Error, std::generic_category());
+ usleep(1000);
+ } while (std::chrono::steady_clock::now() < End);
+ return make_error_code(errc::no_lock_available);
+}
+
+std::error_code lockFile(int FD) {
+ struct flock Lock;
+ memset(&Lock, 0, sizeof(Lock));
+ Lock.l_type = F_WRLCK;
+ Lock.l_whence = SEEK_SET;
+ Lock.l_start = 0;
+ Lock.l_len = 0;
+ if (::fcntl(FD, F_SETLKW, &Lock) != -1)
+ return std::error_code();
+ int Error = errno;
+ return std::error_code(Error, std::generic_category());
+}
+
+std::error_code unlockFile(int FD) {
+ struct flock Lock;
+ Lock.l_type = F_UNLCK;
+ Lock.l_whence = SEEK_SET;
+ Lock.l_start = 0;
+ Lock.l_len = 0;
+ if (::fcntl(FD, F_SETLK, &Lock) != -1)
+ return std::error_code();
+ return std::error_code(errno, std::generic_category());
+}
+
std::error_code closeFile(file_t &F) {
file_t TmpF = F;
F = kInvalidFile;
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Process.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Process.inc
index 24f16b51af7b..7425d084da27 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Process.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Process.inc
@@ -313,7 +313,7 @@ unsigned Process::StandardErrColumns() {
return getColumns();
}
-#ifdef HAVE_TERMINFO
+#ifdef LLVM_ENABLE_TERMINFO
// We manually declare these extern functions because finding the correct
// headers from various terminfo, curses, or other sources is harder than
// writing their specs down.
@@ -323,12 +323,12 @@ extern "C" int del_curterm(struct term *termp);
extern "C" int tigetnum(char *capname);
#endif
-#ifdef HAVE_TERMINFO
+#ifdef LLVM_ENABLE_TERMINFO
static ManagedStatic<std::mutex> TermColorMutex;
#endif
static bool terminalHasColors(int fd) {
-#ifdef HAVE_TERMINFO
+#ifdef LLVM_ENABLE_TERMINFO
// First, acquire a global lock because these C routines are thread hostile.
std::lock_guard<std::mutex> G(*TermColorMutex);
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Program.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Program.inc
index 8f41fc015163..fb56fa4b0d1d 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Program.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Program.inc
@@ -174,7 +174,8 @@ toNullTerminatedCStringArray(ArrayRef<StringRef> Strings, StringSaver &Saver) {
static bool Execute(ProcessInfo &PI, StringRef Program,
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
- unsigned MemoryLimit, std::string *ErrMsg) {
+ unsigned MemoryLimit, std::string *ErrMsg,
+ BitVector *AffinityMask) {
if (!llvm::sys::fs::exists(Program)) {
if (ErrMsg)
*ErrMsg = std::string("Executable \"") + Program.str() +
@@ -182,6 +183,9 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
return false;
}
+ assert(!AffinityMask && "Starting a process with an affinity mask is "
+ "currently not supported on Unix!");
+
BumpPtrAllocator Allocator;
StringSaver Saver(Allocator);
std::vector<const char *> ArgVector, EnvVector;
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc
index f68374d29f02..3d7b5d2fe5aa 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc
@@ -36,6 +36,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/config.h"
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Support/ExitCodes.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Format.h"
@@ -46,7 +47,6 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <string>
-#include <sysexits.h>
#ifdef HAVE_BACKTRACE
# include BACKTRACE_HEADER // For backtrace().
#endif
@@ -331,7 +331,7 @@ static void RegisterHandlers() { // Not signal-safe.
registerHandler(S, SignalKind::IsInfo);
}
-static void UnregisterHandlers() {
+void sys::unregisterHandlers() {
// Restore all of the signal handlers to how they were before we showed up.
for (unsigned i = 0, e = NumRegisteredSignals.load(); i != e; ++i) {
sigaction(RegisteredSignalInfo[i].SigNo,
@@ -367,7 +367,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
// crashes when we return and the signal reissues. This also ensures that if
// we crash in our signal handler that the program will terminate immediately
// instead of recursing in the signal handler.
- UnregisterHandlers();
+ sys::unregisterHandlers();
// Unmask all potentially blocked kill signals.
sigset_t SigMask;
@@ -382,14 +382,15 @@ static RETSIGTYPE SignalHandler(int Sig) {
OneShotPipeSignalFunction.exchange(nullptr))
return OldOneShotPipeFunction();
- if (std::find(std::begin(IntSigs), std::end(IntSigs), Sig)
- != std::end(IntSigs)) {
+ bool IsIntSig = llvm::is_contained(IntSigs, Sig);
+ if (IsIntSig)
if (auto OldInterruptFunction = InterruptFunction.exchange(nullptr))
return OldInterruptFunction();
- raise(Sig); // Execute the default handler.
+ if (Sig == SIGPIPE || IsIntSig) {
+ raise(Sig); // Execute the default handler.
return;
- }
+ }
}
// Otherwise if it is a fault (like SEGV) run any handler.
@@ -554,7 +555,7 @@ static int unwindBacktrace(void **StackTrace, int MaxEntries) {
//
// On glibc systems we have the 'backtrace' function, which works nicely, but
// doesn't demangle symbols.
-void llvm::sys::PrintStackTrace(raw_ostream &OS) {
+void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
#if ENABLE_BACKTRACES
static void *StackTrace[256];
int depth = 0;
@@ -571,9 +572,15 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
#endif
if (!depth)
return;
-
- if (printSymbolizedStackTrace(Argv0, StackTrace, depth, OS))
+ // If "Depth" is not provided by the caller, use the return value of
+ // backtrace() for printing a symbolized stack trace.
+ if (!Depth)
+ Depth = depth;
+ if (printSymbolizedStackTrace(Argv0, StackTrace, Depth, OS))
return;
+ OS << "Stack dump without symbol names (ensure you have llvm-symbolizer in "
+ "your PATH or set the environment var `LLVM_SYMBOLIZER_PATH` to point "
+ "to it):\n";
#if HAVE_DLFCN_H && HAVE_DLADDR
int width = 0;
for (int i = 0; i < depth; ++i) {
@@ -615,7 +622,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
OS << '\n';
}
#elif defined(HAVE_BACKTRACE)
- backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
+ backtrace_symbols_fd(StackTrace, Depth, STDERR_FILENO);
#endif
#endif
}
diff --git a/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp b/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp
index 5b757c9ea80d..05332b00bd1f 100644
--- a/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp
@@ -792,14 +792,12 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
}
bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
- llvm::MemoryBuffer *Buffer,
+ const llvm::MemoryBufferRef &Buffer,
Optional<uint32_t> User,
Optional<uint32_t> Group,
Optional<llvm::sys::fs::file_type> Type,
Optional<llvm::sys::fs::perms> Perms) {
- return addFile(P, ModificationTime,
- llvm::MemoryBuffer::getMemBuffer(
- Buffer->getBuffer(), Buffer->getBufferIdentifier()),
+ return addFile(P, ModificationTime, llvm::MemoryBuffer::getMemBuffer(Buffer),
std::move(User), std::move(Group), std::move(Type),
std::move(Perms));
}
@@ -1018,7 +1016,6 @@ RedirectingFileSystem::RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> FS)
if (auto ExternalWorkingDirectory =
ExternalFS->getCurrentWorkingDirectory()) {
WorkingDirectory = *ExternalWorkingDirectory;
- ExternalFSValidWD = true;
}
}
@@ -1077,12 +1074,6 @@ RedirectingFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
if (!exists(Path))
return errc::no_such_file_or_directory;
- // Always change the external FS but ignore its result.
- if (ExternalFS) {
- auto EC = ExternalFS->setCurrentWorkingDirectory(Path);
- ExternalFSValidWD = !static_cast<bool>(EC);
- }
-
SmallString<128> AbsolutePath;
Path.toVector(AbsolutePath);
if (std::error_code EC = makeAbsolute(AbsolutePath))
@@ -1091,8 +1082,14 @@ RedirectingFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
return {};
}
-std::error_code RedirectingFileSystem::isLocal(const Twine &Path,
+std::error_code RedirectingFileSystem::isLocal(const Twine &Path_,
bool &Result) {
+ SmallString<256> Path;
+ Path_.toVector(Path);
+
+ if (std::error_code EC = makeCanonical(Path))
+ return {};
+
return ExternalFS->isLocal(Path, Result);
}
@@ -1127,14 +1124,21 @@ std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl<char> &Path)
directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
std::error_code &EC) {
- ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Dir);
+ SmallString<256> Path;
+ Dir.toVector(Path);
+
+ EC = makeCanonical(Path);
+ if (EC)
+ return {};
+
+ ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Path);
if (!E) {
EC = E.getError();
if (shouldUseExternalFS() && EC == errc::no_such_file_or_directory)
- return ExternalFS->dir_begin(Dir, EC);
+ return ExternalFS->dir_begin(Path, EC);
return {};
}
- ErrorOr<Status> S = status(Dir, *E);
+ ErrorOr<Status> S = status(Path, *E);
if (!S) {
EC = S.getError();
return {};
@@ -1147,7 +1151,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
auto *D = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(*E);
return directory_iterator(std::make_shared<VFSFromYamlDirIterImpl>(
- Dir, D->contents_begin(), D->contents_end(),
+ Path, D->contents_begin(), D->contents_end(),
/*IterateExternalFS=*/shouldUseExternalFS(), *ExternalFS, EC));
}
@@ -1159,6 +1163,17 @@ StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const {
return ExternalContentsPrefixDir;
}
+void RedirectingFileSystem::setFallthrough(bool Fallthrough) {
+ IsFallthrough = Fallthrough;
+}
+
+std::vector<StringRef> RedirectingFileSystem::getRoots() const {
+ std::vector<StringRef> R;
+ for (const auto &Root : Roots)
+ R.push_back(Root->getName());
+ return R;
+}
+
void RedirectingFileSystem::dump(raw_ostream &OS) const {
for (const auto &Root : Roots)
dumpEntry(OS, Root.get());
@@ -1263,7 +1278,8 @@ class llvm::vfs::RedirectingFileSystemParser {
return true;
}
- RedirectingFileSystem::Entry *
+public:
+ static RedirectingFileSystem::Entry *
lookupOrCreateEntry(RedirectingFileSystem *FS, StringRef Name,
RedirectingFileSystem::Entry *ParentEntry = nullptr) {
if (!ParentEntry) { // Look for a existent root
@@ -1305,6 +1321,7 @@ class llvm::vfs::RedirectingFileSystemParser {
return DE->getLastContent();
}
+private:
void uniqueOverlayTree(RedirectingFileSystem *FS,
RedirectingFileSystem::Entry *SrcE,
RedirectingFileSystem::Entry *NewParentE = nullptr) {
@@ -1630,7 +1647,7 @@ public:
}
};
-RedirectingFileSystem *
+std::unique_ptr<RedirectingFileSystem>
RedirectingFileSystem::create(std::unique_ptr<MemoryBuffer> Buffer,
SourceMgr::DiagHandlerTy DiagHandler,
StringRef YAMLFilePath, void *DiagContext,
@@ -1670,25 +1687,80 @@ RedirectingFileSystem::create(std::unique_ptr<MemoryBuffer> Buffer,
if (!P.parse(Root, FS.get()))
return nullptr;
- return FS.release();
+ return FS;
}
-ErrorOr<RedirectingFileSystem::Entry *>
-RedirectingFileSystem::lookupPath(const Twine &Path_) const {
- SmallString<256> Path;
- Path_.toVector(Path);
+std::unique_ptr<RedirectingFileSystem> RedirectingFileSystem::create(
+ ArrayRef<std::pair<std::string, std::string>> RemappedFiles,
+ bool UseExternalNames, FileSystem &ExternalFS) {
+ std::unique_ptr<RedirectingFileSystem> FS(
+ new RedirectingFileSystem(&ExternalFS));
+ FS->UseExternalNames = UseExternalNames;
+
+ StringMap<RedirectingFileSystem::Entry *> Entries;
+
+ for (auto &Mapping : llvm::reverse(RemappedFiles)) {
+ SmallString<128> From = StringRef(Mapping.first);
+ SmallString<128> To = StringRef(Mapping.second);
+ {
+ auto EC = ExternalFS.makeAbsolute(From);
+ (void)EC;
+ assert(!EC && "Could not make absolute path");
+ }
- // Handle relative paths
+ // Check if we've already mapped this file. The first one we see (in the
+ // reverse iteration) wins.
+ RedirectingFileSystem::Entry *&ToEntry = Entries[From];
+ if (ToEntry)
+ continue;
+
+ // Add parent directories.
+ RedirectingFileSystem::Entry *Parent = nullptr;
+ StringRef FromDirectory = llvm::sys::path::parent_path(From);
+ for (auto I = llvm::sys::path::begin(FromDirectory),
+ E = llvm::sys::path::end(FromDirectory);
+ I != E; ++I) {
+ Parent = RedirectingFileSystemParser::lookupOrCreateEntry(FS.get(), *I,
+ Parent);
+ }
+ assert(Parent && "File without a directory?");
+ {
+ auto EC = ExternalFS.makeAbsolute(To);
+ (void)EC;
+ assert(!EC && "Could not make absolute path");
+ }
+
+ // Add the file.
+ auto NewFile =
+ std::make_unique<RedirectingFileSystem::RedirectingFileEntry>(
+ llvm::sys::path::filename(From), To,
+ UseExternalNames
+ ? RedirectingFileSystem::RedirectingFileEntry::NK_External
+ : RedirectingFileSystem::RedirectingFileEntry::NK_Virtual);
+ ToEntry = NewFile.get();
+ cast<RedirectingFileSystem::RedirectingDirectoryEntry>(Parent)->addContent(
+ std::move(NewFile));
+ }
+
+ return FS;
+}
+
+std::error_code
+RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const {
if (std::error_code EC = makeAbsolute(Path))
return EC;
- // Canonicalize path by removing ".", "..", "./", components. This is
- // a VFS request, do not bother about symlinks in the path components
- // but canonicalize in order to perform the correct entry search.
- Path = canonicalize(Path);
- if (Path.empty())
+ llvm::SmallString<256> CanonicalPath =
+ canonicalize(StringRef(Path.data(), Path.size()));
+ if (CanonicalPath.empty())
return make_error_code(llvm::errc::invalid_argument);
+ Path.assign(CanonicalPath.begin(), CanonicalPath.end());
+ return {};
+}
+
+ErrorOr<RedirectingFileSystem::Entry *>
+RedirectingFileSystem::lookupPath(StringRef Path) const {
sys::path::const_iterator Start = sys::path::begin(Path);
sys::path::const_iterator End = sys::path::end(Path);
for (const auto &Root : Roots) {
@@ -1763,7 +1835,13 @@ ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path,
}
}
-ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path) {
+ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path_) {
+ SmallString<256> Path;
+ Path_.toVector(Path);
+
+ if (std::error_code EC = makeCanonical(Path))
+ return EC;
+
ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path);
if (!Result) {
if (shouldUseExternalFS() &&
@@ -1801,7 +1879,13 @@ public:
} // namespace
ErrorOr<std::unique_ptr<File>>
-RedirectingFileSystem::openFileForRead(const Twine &Path) {
+RedirectingFileSystem::openFileForRead(const Twine &Path_) {
+ SmallString<256> Path;
+ Path_.toVector(Path);
+
+ if (std::error_code EC = makeCanonical(Path))
+ return EC;
+
ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Path);
if (!E) {
if (shouldUseExternalFS() &&
@@ -1831,8 +1915,14 @@ RedirectingFileSystem::openFileForRead(const Twine &Path) {
}
std::error_code
-RedirectingFileSystem::getRealPath(const Twine &Path,
+RedirectingFileSystem::getRealPath(const Twine &Path_,
SmallVectorImpl<char> &Output) const {
+ SmallString<256> Path;
+ Path_.toVector(Path);
+
+ if (std::error_code EC = makeCanonical(Path))
+ return EC;
+
ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path);
if (!Result) {
if (shouldUseExternalFS() &&
@@ -1852,7 +1942,7 @@ RedirectingFileSystem::getRealPath(const Twine &Path,
: llvm::errc::invalid_argument;
}
-IntrusiveRefCntPtr<FileSystem>
+std::unique_ptr<FileSystem>
vfs::getVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer,
SourceMgr::DiagHandlerTy DiagHandler,
StringRef YAMLFilePath, void *DiagContext,
@@ -1893,7 +1983,7 @@ void vfs::collectVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer,
SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
void *DiagContext,
IntrusiveRefCntPtr<FileSystem> ExternalFS) {
- RedirectingFileSystem *VFS = RedirectingFileSystem::create(
+ std::unique_ptr<RedirectingFileSystem> VFS = RedirectingFileSystem::create(
std::move(Buffer), DiagHandler, YAMLFilePath, DiagContext,
std::move(ExternalFS));
ErrorOr<RedirectingFileSystem::Entry *> RootE = VFS->lookupPath("/");
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
index a4ffc0ec4313..adcbd1b5f8f3 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
@@ -402,8 +402,22 @@ std::error_code is_local(int FD, bool &Result) {
}
static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
- // First, check if the file is on a network (non-local) drive. If so, don't
- // set DeleteFile to true, since it prevents opening the file for writes.
+ // Clear the FILE_DISPOSITION_INFO flag first, before checking if it's a
+ // network file. On Windows 7 the function realPathFromHandle() below fails
+ // if the FILE_DISPOSITION_INFO flag was already set to 'DeleteFile = true' by
+ // a prior call.
+ FILE_DISPOSITION_INFO Disposition;
+ Disposition.DeleteFile = false;
+ if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
+ sizeof(Disposition)))
+ return mapWindowsError(::GetLastError());
+ if (!Delete)
+ return std::error_code();
+
+ // Check if the file is on a network (non-local) drive. If so, don't
+ // continue when DeleteFile is true, since it prevents opening the file for
+ // writes. Note -- this will leak temporary files on disk, but only when the
+ // target file is on a network drive.
SmallVector<wchar_t, 128> FinalPath;
if (std::error_code EC = realPathFromHandle(Handle, FinalPath))
return EC;
@@ -415,9 +429,9 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
if (!IsLocal)
return std::error_code();
- // The file is on a local drive, set the DeleteFile to true.
- FILE_DISPOSITION_INFO Disposition;
- Disposition.DeleteFile = Delete;
+ // The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's
+ // flag.
+ Disposition.DeleteFile = true;
if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
sizeof(Disposition)))
return mapWindowsError(::GetLastError());
@@ -1273,6 +1287,43 @@ Expected<size_t> readNativeFileSlice(file_t FileHandle,
return readNativeFileImpl(FileHandle, Buf, &Overlapped);
}
+std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout) {
+ DWORD Flags = LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY;
+ OVERLAPPED OV = {};
+ file_t File = convertFDToNativeFile(FD);
+ auto Start = std::chrono::steady_clock::now();
+ auto End = Start + Timeout;
+ do {
+ if (::LockFileEx(File, Flags, 0, MAXDWORD, MAXDWORD, &OV))
+ return std::error_code();
+ DWORD Error = ::GetLastError();
+ if (Error == ERROR_LOCK_VIOLATION) {
+ ::Sleep(1);
+ continue;
+ }
+ return mapWindowsError(Error);
+ } while (std::chrono::steady_clock::now() < End);
+ return mapWindowsError(ERROR_LOCK_VIOLATION);
+}
+
+std::error_code lockFile(int FD) {
+ DWORD Flags = LOCKFILE_EXCLUSIVE_LOCK;
+ OVERLAPPED OV = {};
+ file_t File = convertFDToNativeFile(FD);
+ if (::LockFileEx(File, Flags, 0, MAXDWORD, MAXDWORD, &OV))
+ return std::error_code();
+ DWORD Error = ::GetLastError();
+ return mapWindowsError(Error);
+}
+
+std::error_code unlockFile(int FD) {
+ OVERLAPPED OV = {};
+ file_t File = convertFDToNativeFile(FD);
+ if (::UnlockFileEx(File, 0, MAXDWORD, MAXDWORD, &OV))
+ return std::error_code();
+ return mapWindowsError(::GetLastError());
+}
+
std::error_code closeFile(file_t &F) {
file_t TmpF = F;
F = kInvalidFile;
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc
index 8064d4e17b29..910d2395d277 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc
@@ -228,7 +228,8 @@ static std::error_code GetExecutableName(SmallVectorImpl<char> &Filename) {
if (EC)
return EC;
- StringRef Base = sys::path::filename(Filename.data());
+ // Make a copy of the filename since assign makes the StringRef invalid.
+ std::string Base = sys::path::filename(Filename.data()).str();
Filename.assign(Base.begin(), Base.end());
return std::error_code();
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Program.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Program.inc
index 9fe05d24ec2e..f1d612cf3c98 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Program.inc
@@ -171,7 +171,8 @@ static HANDLE RedirectIO(Optional<StringRef> Path, int fd,
static bool Execute(ProcessInfo &PI, StringRef Program,
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
- unsigned MemoryLimit, std::string *ErrMsg) {
+ unsigned MemoryLimit, std::string *ErrMsg,
+ BitVector *AffinityMask) {
if (!sys::fs::can_execute(Program)) {
if (ErrMsg)
*ErrMsg = "program not executable";
@@ -189,7 +190,13 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
// Windows wants a command line, not an array of args, to pass to the new
// process. We have to concatenate them all, while quoting the args that
// have embedded spaces (or are empty).
- std::string Command = flattenWindowsCommandLine(Args);
+ auto Result = flattenWindowsCommandLine(Args);
+ if (std::error_code ec = Result.getError()) {
+ SetLastError(ec.value());
+ MakeErrMsg(ErrMsg, std::string("Unable to convert command-line to UTF-16"));
+ return false;
+ }
+ std::wstring Command = *Result;
// The pointer to the environment block for the new process.
std::vector<wchar_t> EnvBlock;
@@ -206,7 +213,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
return false;
}
- EnvBlock.insert(EnvBlock.end(), EnvString.begin(), EnvString.end());
+ llvm::append_range(EnvBlock, EnvString);
EnvBlock.push_back(0);
}
EnvBlock.push_back(0);
@@ -271,18 +278,15 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
return false;
}
- SmallVector<wchar_t, MAX_PATH> CommandUtf16;
- if (std::error_code ec = windows::UTF8ToUTF16(Command, CommandUtf16)) {
- SetLastError(ec.value());
- MakeErrMsg(ErrMsg,
- std::string("Unable to convert command-line to UTF-16"));
- return false;
- }
+ unsigned CreateFlags = CREATE_UNICODE_ENVIRONMENT;
+ if (AffinityMask)
+ CreateFlags |= CREATE_SUSPENDED;
- BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0,
- TRUE, CREATE_UNICODE_ENVIRONMENT,
- EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si,
- &pi);
+ std::vector<wchar_t> CommandUtf16(Command.size() + 1, 0);
+ std::copy(Command.begin(), Command.end(), CommandUtf16.begin());
+ BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, TRUE,
+ CreateFlags, EnvBlock.empty() ? 0 : EnvBlock.data(),
+ 0, &si, &pi);
DWORD err = GetLastError();
// Regardless of whether the process got created or not, we are done with
@@ -330,6 +334,13 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
}
}
+ // Set the affinity mask
+ if (AffinityMask) {
+ ::SetProcessAffinityMask(pi.hProcess,
+ (DWORD_PTR)AffinityMask->getData().front());
+ ::ResumeThread(pi.hThread);
+ }
+
return true;
}
@@ -376,7 +387,7 @@ static std::string quoteSingleArg(StringRef Arg) {
}
namespace llvm {
-std::string sys::flattenWindowsCommandLine(ArrayRef<StringRef> Args) {
+ErrorOr<std::wstring> sys::flattenWindowsCommandLine(ArrayRef<StringRef> Args) {
std::string Command;
for (StringRef Arg : Args) {
if (argNeedsQuotes(Arg))
@@ -387,7 +398,11 @@ std::string sys::flattenWindowsCommandLine(ArrayRef<StringRef> Args) {
Command.push_back(' ');
}
- return Command;
+ SmallVector<wchar_t, MAX_PATH> CommandUtf16;
+ if (std::error_code ec = windows::UTF8ToUTF16(Command, CommandUtf16))
+ return ec;
+
+ return std::wstring(CommandUtf16.begin(), CommandUtf16.end());
}
ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
@@ -532,12 +547,16 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program,
ArrayRef<StringRef> Args) {
- // The documented max length of the command line passed to CreateProcess.
- static const size_t MaxCommandStringLength = 32768;
+ // The documentation on CreateProcessW states that the size of the argument
+ // lpCommandLine must not be greater than 32767 characters, including the
+ // Unicode terminating null character. We use smaller value to reduce risk
+ // of getting invalid command line due to unaccounted factors.
+ static const size_t MaxCommandStringLength = 32000;
SmallVector<StringRef, 8> FullArgs;
FullArgs.push_back(Program);
FullArgs.append(Args.begin(), Args.end());
- std::string Result = flattenWindowsCommandLine(FullArgs);
- return (Result.size() + 1) <= MaxCommandStringLength;
+ auto Result = flattenWindowsCommandLine(FullArgs);
+ assert(!Result.getError());
+ return (Result->size() + 1) <= MaxCommandStringLength;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc
index 0c3681fa9654..3758582b35f7 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc
@@ -552,7 +552,8 @@ static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) {
StackFrame, C);
}
-void llvm::sys::PrintStackTrace(raw_ostream &OS) {
+void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
+ // FIXME: Handle "Depth" parameter to print stack trace upto specified Depth
LocalPrintStackTrace(OS, nullptr);
}
@@ -868,3 +869,5 @@ static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
#pragma GCC diagnostic warning "-Wformat"
#pragma GCC diagnostic warning "-Wformat-extra-args"
#endif
+
+void sys::unregisterHandlers() {}
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc
index 296e87b77695..6448bb478d0c 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc
@@ -195,14 +195,27 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() {
if (!IterateProcInfo(RelationProcessorCore, HandleProc))
return std::vector<ProcessorGroup>();
- // If there's an affinity mask set on one of the CPUs, then assume the user
- // wants to constrain the current process to only a single CPU.
- for (auto &G : Groups) {
- if (G.UsableThreads != G.AllThreads) {
- ProcessorGroup NewG{G};
+ // If there's an affinity mask set, assume the user wants to constrain the
+ // current process to only a single CPU group. On Windows, it is not
+ // possible for affinity masks to cross CPU group boundaries.
+ DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
+ if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
+ &SystemAffinityMask) &&
+ ProcessAffinityMask != SystemAffinityMask) {
+ // We don't expect more that 4 CPU groups on Windows (256 processors).
+ USHORT GroupCount = 4;
+ USHORT GroupArray[4]{};
+ if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
+ GroupArray)) {
+ assert(GroupCount == 1 &&
+ "On startup, a program is expected to be assigned only to "
+ "one processor group!");
+ unsigned CurrentGroupID = GroupArray[0];
+ ProcessorGroup NewG{Groups[CurrentGroupID]};
+ NewG.Affinity = ProcessAffinityMask;
+ NewG.UsableThreads = countPopulation(ProcessAffinityMask);
Groups.clear();
Groups.push_back(NewG);
- break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Support/X86TargetParser.cpp b/contrib/llvm-project/llvm/lib/Support/X86TargetParser.cpp
index 4c2d4efbfca8..d738511465b4 100644
--- a/contrib/llvm-project/llvm/lib/Support/X86TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/X86TargetParser.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/X86TargetParser.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
using namespace llvm;
@@ -117,147 +116,160 @@ struct FeatureInfo {
} // end anonymous namespace
#define X86_FEATURE(ENUM, STRING) \
- static constexpr FeatureBitset Feature##ENUM = {X86::FEATURE_##ENUM};
+ constexpr FeatureBitset Feature##ENUM = {X86::FEATURE_##ENUM};
#include "llvm/Support/X86TargetParser.def"
// Pentium with MMX.
-static constexpr FeatureBitset FeaturesPentiumMMX =
+constexpr FeatureBitset FeaturesPentiumMMX =
FeatureX87 | FeatureCMPXCHG8B | FeatureMMX;
// Pentium 2 and 3.
-static constexpr FeatureBitset FeaturesPentium2 =
+constexpr FeatureBitset FeaturesPentium2 =
FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeatureFXSR;
-static constexpr FeatureBitset FeaturesPentium3 = FeaturesPentium2 | FeatureSSE;
+constexpr FeatureBitset FeaturesPentium3 = FeaturesPentium2 | FeatureSSE;
// Pentium 4 CPUs
-static constexpr FeatureBitset FeaturesPentium4 =
- FeaturesPentium3 | FeatureSSE2;
-static constexpr FeatureBitset FeaturesPrescott =
- FeaturesPentium4 | FeatureSSE3;
-static constexpr FeatureBitset FeaturesNocona =
+constexpr FeatureBitset FeaturesPentium4 = FeaturesPentium3 | FeatureSSE2;
+constexpr FeatureBitset FeaturesPrescott = FeaturesPentium4 | FeatureSSE3;
+constexpr FeatureBitset FeaturesNocona =
FeaturesPrescott | Feature64BIT | FeatureCMPXCHG16B;
// Basic 64-bit capable CPU.
-static constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
+constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
+constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
+ FeaturePOPCNT | FeatureSSE4_2 |
+ FeatureCMPXCHG16B;
+constexpr FeatureBitset FeaturesX86_64_V3 =
+ FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
+ FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
+constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 |
+ FeatureAVX512BW | FeatureAVX512CD |
+ FeatureAVX512DQ | FeatureAVX512VL;
// Intel Core CPUs
-static constexpr FeatureBitset FeaturesCore2 =
+constexpr FeatureBitset FeaturesCore2 =
FeaturesNocona | FeatureSAHF | FeatureSSSE3;
-static constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 | FeatureSSE4_1;
-static constexpr FeatureBitset FeaturesNehalem =
+constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 | FeatureSSE4_1;
+constexpr FeatureBitset FeaturesNehalem =
FeaturesPenryn | FeaturePOPCNT | FeatureSSE4_2;
-static constexpr FeatureBitset FeaturesWestmere =
- FeaturesNehalem | FeaturePCLMUL;
-static constexpr FeatureBitset FeaturesSandyBridge =
+constexpr FeatureBitset FeaturesWestmere = FeaturesNehalem | FeaturePCLMUL;
+constexpr FeatureBitset FeaturesSandyBridge =
FeaturesWestmere | FeatureAVX | FeatureXSAVE | FeatureXSAVEOPT;
-static constexpr FeatureBitset FeaturesIvyBridge =
+constexpr FeatureBitset FeaturesIvyBridge =
FeaturesSandyBridge | FeatureF16C | FeatureFSGSBASE | FeatureRDRND;
-static constexpr FeatureBitset FeaturesHaswell =
+constexpr FeatureBitset FeaturesHaswell =
FeaturesIvyBridge | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureFMA |
FeatureINVPCID | FeatureLZCNT | FeatureMOVBE;
-static constexpr FeatureBitset FeaturesBroadwell =
+constexpr FeatureBitset FeaturesBroadwell =
FeaturesHaswell | FeatureADX | FeaturePRFCHW | FeatureRDSEED;
// Intel Knights Landing and Knights Mill
// Knights Landing has feature parity with Broadwell.
-static constexpr FeatureBitset FeaturesKNL =
+constexpr FeatureBitset FeaturesKNL =
FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureAVX512CD |
FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
-static constexpr FeatureBitset FeaturesKNM =
- FeaturesKNL | FeatureAVX512VPOPCNTDQ;
+constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ;
// Intel Skylake processors.
-static constexpr FeatureBitset FeaturesSkylakeClient =
+constexpr FeatureBitset FeaturesSkylakeClient =
FeaturesBroadwell | FeatureAES | FeatureCLFLUSHOPT | FeatureXSAVEC |
FeatureXSAVES | FeatureSGX;
// SkylakeServer inherits all SkylakeClient features except SGX.
// FIXME: That doesn't match gcc.
-static constexpr FeatureBitset FeaturesSkylakeServer =
+constexpr FeatureBitset FeaturesSkylakeServer =
(FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureAVX512CD |
FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureCLWB |
FeaturePKU;
-static constexpr FeatureBitset FeaturesCascadeLake =
+constexpr FeatureBitset FeaturesCascadeLake =
FeaturesSkylakeServer | FeatureAVX512VNNI;
-static constexpr FeatureBitset FeaturesCooperLake =
+constexpr FeatureBitset FeaturesCooperLake =
FeaturesCascadeLake | FeatureAVX512BF16;
// Intel 10nm processors.
-static constexpr FeatureBitset FeaturesCannonlake =
+constexpr FeatureBitset FeaturesCannonlake =
FeaturesSkylakeClient | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ |
FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI |
FeaturePKU | FeatureSHA;
-static constexpr FeatureBitset FeaturesICLClient =
+constexpr FeatureBitset FeaturesICLClient =
FeaturesCannonlake | FeatureAVX512BITALG | FeatureAVX512VBMI2 |
FeatureAVX512VNNI | FeatureAVX512VPOPCNTDQ | FeatureCLWB | FeatureGFNI |
FeatureRDPID | FeatureVAES | FeatureVPCLMULQDQ;
-static constexpr FeatureBitset FeaturesICLServer =
+constexpr FeatureBitset FeaturesICLServer =
FeaturesICLClient | FeaturePCONFIG | FeatureWBNOINVD;
-static constexpr FeatureBitset FeaturesTigerlake =
+constexpr FeatureBitset FeaturesTigerlake =
FeaturesICLClient | FeatureAVX512VP2INTERSECT | FeatureMOVDIR64B |
- FeatureMOVDIRI | FeatureSHSTK;
+ FeatureMOVDIRI | FeatureSHSTK | FeatureKL | FeatureWIDEKL;
+constexpr FeatureBitset FeaturesSapphireRapids =
+ FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 |
+ FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE |
+ FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
+ FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR |
+ FeatureWAITPKG | FeatureAVXVNNI;
+constexpr FeatureBitset FeaturesAlderlake =
+ FeaturesSkylakeClient | FeatureCLDEMOTE | FeatureHRESET | FeaturePTWRITE |
+ FeatureSERIALIZE | FeatureWAITPKG | FeatureAVXVNNI;
// Intel Atom processors.
// Bonnell has feature parity with Core2 and adds MOVBE.
-static constexpr FeatureBitset FeaturesBonnell = FeaturesCore2 | FeatureMOVBE;
+constexpr FeatureBitset FeaturesBonnell = FeaturesCore2 | FeatureMOVBE;
// Silvermont has parity with Westmere and Bonnell plus PRFCHW and RDRND.
-static constexpr FeatureBitset FeaturesSilvermont =
+constexpr FeatureBitset FeaturesSilvermont =
FeaturesBonnell | FeaturesWestmere | FeaturePRFCHW | FeatureRDRND;
-static constexpr FeatureBitset FeaturesGoldmont =
+constexpr FeatureBitset FeaturesGoldmont =
FeaturesSilvermont | FeatureAES | FeatureCLFLUSHOPT | FeatureFSGSBASE |
FeatureRDSEED | FeatureSHA | FeatureXSAVE | FeatureXSAVEC |
FeatureXSAVEOPT | FeatureXSAVES;
-static constexpr FeatureBitset FeaturesGoldmontPlus =
+constexpr FeatureBitset FeaturesGoldmontPlus =
FeaturesGoldmont | FeaturePTWRITE | FeatureRDPID | FeatureSGX;
-static constexpr FeatureBitset FeaturesTremont =
+constexpr FeatureBitset FeaturesTremont =
FeaturesGoldmontPlus | FeatureCLWB | FeatureGFNI;
// Geode Processor.
-static constexpr FeatureBitset FeaturesGeode =
+constexpr FeatureBitset FeaturesGeode =
FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA;
// K6 processor.
-static constexpr FeatureBitset FeaturesK6 =
- FeatureX87 | FeatureCMPXCHG8B | FeatureMMX;
+constexpr FeatureBitset FeaturesK6 = FeatureX87 | FeatureCMPXCHG8B | FeatureMMX;
// K7 and K8 architecture processors.
-static constexpr FeatureBitset FeaturesAthlon =
+constexpr FeatureBitset FeaturesAthlon =
FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA;
-static constexpr FeatureBitset FeaturesAthlonXP =
+constexpr FeatureBitset FeaturesAthlonXP =
FeaturesAthlon | FeatureFXSR | FeatureSSE;
-static constexpr FeatureBitset FeaturesK8 =
+constexpr FeatureBitset FeaturesK8 =
FeaturesAthlonXP | FeatureSSE2 | Feature64BIT;
-static constexpr FeatureBitset FeaturesK8SSE3 = FeaturesK8 | FeatureSSE3;
-static constexpr FeatureBitset FeaturesAMDFAM10 =
+constexpr FeatureBitset FeaturesK8SSE3 = FeaturesK8 | FeatureSSE3;
+constexpr FeatureBitset FeaturesAMDFAM10 =
FeaturesK8SSE3 | FeatureCMPXCHG16B | FeatureLZCNT | FeaturePOPCNT |
FeaturePRFCHW | FeatureSAHF | FeatureSSE4_A;
// Bobcat architecture processors.
-static constexpr FeatureBitset FeaturesBTVER1 =
+constexpr FeatureBitset FeaturesBTVER1 =
FeatureX87 | FeatureCMPXCHG8B | FeatureCMPXCHG16B | Feature64BIT |
FeatureFXSR | FeatureLZCNT | FeatureMMX | FeaturePOPCNT | FeaturePRFCHW |
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_A |
FeatureSAHF;
-static constexpr FeatureBitset FeaturesBTVER2 =
+constexpr FeatureBitset FeaturesBTVER2 =
FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureF16C |
FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
// AMD Bulldozer architecture processors.
-static constexpr FeatureBitset FeaturesBDVER1 =
+constexpr FeatureBitset FeaturesBDVER1 =
FeatureX87 | FeatureAES | FeatureAVX | FeatureCMPXCHG8B |
FeatureCMPXCHG16B | Feature64BIT | FeatureFMA4 | FeatureFXSR | FeatureLWP |
FeatureLZCNT | FeatureMMX | FeaturePCLMUL | FeaturePOPCNT | FeaturePRFCHW |
FeatureSAHF | FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 |
FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | FeatureXOP | FeatureXSAVE;
-static constexpr FeatureBitset FeaturesBDVER2 =
+constexpr FeatureBitset FeaturesBDVER2 =
FeaturesBDVER1 | FeatureBMI | FeatureFMA | FeatureF16C | FeatureTBM;
-static constexpr FeatureBitset FeaturesBDVER3 =
+constexpr FeatureBitset FeaturesBDVER3 =
FeaturesBDVER2 | FeatureFSGSBASE | FeatureXSAVEOPT;
-static constexpr FeatureBitset FeaturesBDVER4 =
- FeaturesBDVER3 | FeatureAVX2 | FeatureBMI2 | FeatureMOVBE | FeatureMWAITX |
- FeatureRDRND;
+constexpr FeatureBitset FeaturesBDVER4 = FeaturesBDVER3 | FeatureAVX2 |
+ FeatureBMI2 | FeatureMOVBE |
+ FeatureMWAITX | FeatureRDRND;
// AMD Zen architecture processors.
-static constexpr FeatureBitset FeaturesZNVER1 =
+constexpr FeatureBitset FeaturesZNVER1 =
FeatureX87 | FeatureADX | FeatureAES | FeatureAVX | FeatureAVX2 |
FeatureBMI | FeatureBMI2 | FeatureCLFLUSHOPT | FeatureCLZERO |
FeatureCMPXCHG8B | FeatureCMPXCHG16B | Feature64BIT | FeatureF16C |
@@ -267,10 +279,13 @@ static constexpr FeatureBitset FeaturesZNVER1 =
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 |
FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC |
FeatureXSAVEOPT | FeatureXSAVES;
-static constexpr FeatureBitset FeaturesZNVER2 =
+constexpr FeatureBitset FeaturesZNVER2 =
FeaturesZNVER1 | FeatureCLWB | FeatureRDPID | FeatureWBNOINVD;
+static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 |
+ FeatureINVPCID | FeaturePKU |
+ FeatureVAES | FeatureVPCLMULQDQ;
-static constexpr ProcInfo Processors[] = {
+constexpr ProcInfo Processors[] = {
// Empty processor. Include X87 and CMPXCHG8 for backwards compatibility.
{ {""}, CK_None, ~0U, FeatureX87 | FeatureCMPXCHG8B },
// i386-generation processors.
@@ -342,6 +357,10 @@ static constexpr ProcInfo Processors[] = {
{ {"icelake-server"}, CK_IcelakeServer, FEATURE_AVX512VBMI2, FeaturesICLServer },
// Tigerlake microarchitecture based processors.
{ {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, FeaturesTigerlake },
+ // Sapphire Rapids microarchitecture based processors.
+ { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512VP2INTERSECT, FeaturesSapphireRapids },
+ // Alderlake microarchitecture based processors.
+ { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake },
// Knights Landing processor.
{ {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL },
// Knights Mill processor.
@@ -379,12 +398,18 @@ static constexpr ProcInfo Processors[] = {
// Zen architecture processors.
{ {"znver1"}, CK_ZNVER1, FEATURE_AVX2, FeaturesZNVER1 },
{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 },
+ { {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3 },
// Generic 64-bit processor.
{ {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 },
+ { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 },
+ { {"x86-64-v3"}, CK_x86_64_v3, ~0U, FeaturesX86_64_V3 },
+ { {"x86-64-v4"}, CK_x86_64_v4, ~0U, FeaturesX86_64_V4 },
// Geode processors.
{ {"geode"}, CK_Geode, ~0U, FeaturesGeode },
};
+constexpr const char *NoTuneList[] = {"x86-64-v2", "x86-64-v3", "x86-64-v4"};
+
X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
for (const auto &P : Processors)
if (P.Name == CPU && (P.Features[FEATURE_64BIT] || !Only64Bit))
@@ -393,6 +418,12 @@ X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
return CK_None;
}
+X86::CPUKind llvm::X86::parseTuneCPU(StringRef CPU, bool Only64Bit) {
+ if (llvm::is_contained(NoTuneList, CPU))
+ return CK_None;
+ return parseArchX86(CPU, Only64Bit);
+}
+
void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
bool Only64Bit) {
for (const auto &P : Processors)
@@ -400,6 +431,14 @@ void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
Values.emplace_back(P.Name);
}
+void llvm::X86::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
+ bool Only64Bit) {
+ for (const ProcInfo &P : Processors)
+ if (!P.Name.empty() && (P.Features[FEATURE_64BIT] || !Only64Bit) &&
+ !llvm::is_contained(NoTuneList, P.Name))
+ Values.emplace_back(P.Name);
+}
+
ProcessorFeatures llvm::X86::getKeyFeature(X86::CPUKind Kind) {
// FIXME: Can we avoid a linear search here? The table might be sorted by
// CPUKind so we could binary search?
@@ -414,136 +453,135 @@ ProcessorFeatures llvm::X86::getKeyFeature(X86::CPUKind Kind) {
}
// Features with no dependencies.
-static constexpr FeatureBitset ImpliedFeatures64BIT = {};
-static constexpr FeatureBitset ImpliedFeaturesADX = {};
-static constexpr FeatureBitset ImpliedFeaturesBMI = {};
-static constexpr FeatureBitset ImpliedFeaturesBMI2 = {};
-static constexpr FeatureBitset ImpliedFeaturesCLDEMOTE = {};
-static constexpr FeatureBitset ImpliedFeaturesCLFLUSHOPT = {};
-static constexpr FeatureBitset ImpliedFeaturesCLWB = {};
-static constexpr FeatureBitset ImpliedFeaturesCLZERO = {};
-static constexpr FeatureBitset ImpliedFeaturesCMOV = {};
-static constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {};
-static constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {};
-static constexpr FeatureBitset ImpliedFeaturesENQCMD = {};
-static constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {};
-static constexpr FeatureBitset ImpliedFeaturesFXSR = {};
-static constexpr FeatureBitset ImpliedFeaturesINVPCID = {};
-static constexpr FeatureBitset ImpliedFeaturesLWP = {};
-static constexpr FeatureBitset ImpliedFeaturesLZCNT = {};
-static constexpr FeatureBitset ImpliedFeaturesMWAITX = {};
-static constexpr FeatureBitset ImpliedFeaturesMOVBE = {};
-static constexpr FeatureBitset ImpliedFeaturesMOVDIR64B = {};
-static constexpr FeatureBitset ImpliedFeaturesMOVDIRI = {};
-static constexpr FeatureBitset ImpliedFeaturesPCONFIG = {};
-static constexpr FeatureBitset ImpliedFeaturesPOPCNT = {};
-static constexpr FeatureBitset ImpliedFeaturesPKU = {};
-static constexpr FeatureBitset ImpliedFeaturesPREFETCHWT1 = {};
-static constexpr FeatureBitset ImpliedFeaturesPRFCHW = {};
-static constexpr FeatureBitset ImpliedFeaturesPTWRITE = {};
-static constexpr FeatureBitset ImpliedFeaturesRDPID = {};
-static constexpr FeatureBitset ImpliedFeaturesRDRND = {};
-static constexpr FeatureBitset ImpliedFeaturesRDSEED = {};
-static constexpr FeatureBitset ImpliedFeaturesRTM = {};
-static constexpr FeatureBitset ImpliedFeaturesSAHF = {};
-static constexpr FeatureBitset ImpliedFeaturesSERIALIZE = {};
-static constexpr FeatureBitset ImpliedFeaturesSGX = {};
-static constexpr FeatureBitset ImpliedFeaturesSHSTK = {};
-static constexpr FeatureBitset ImpliedFeaturesTBM = {};
-static constexpr FeatureBitset ImpliedFeaturesTSXLDTRK = {};
-static constexpr FeatureBitset ImpliedFeaturesWAITPKG = {};
-static constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {};
-static constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {};
-static constexpr FeatureBitset ImpliedFeaturesX87 = {};
-static constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
+constexpr FeatureBitset ImpliedFeatures64BIT = {};
+constexpr FeatureBitset ImpliedFeaturesADX = {};
+constexpr FeatureBitset ImpliedFeaturesBMI = {};
+constexpr FeatureBitset ImpliedFeaturesBMI2 = {};
+constexpr FeatureBitset ImpliedFeaturesCLDEMOTE = {};
+constexpr FeatureBitset ImpliedFeaturesCLFLUSHOPT = {};
+constexpr FeatureBitset ImpliedFeaturesCLWB = {};
+constexpr FeatureBitset ImpliedFeaturesCLZERO = {};
+constexpr FeatureBitset ImpliedFeaturesCMOV = {};
+constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {};
+constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {};
+constexpr FeatureBitset ImpliedFeaturesENQCMD = {};
+constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {};
+constexpr FeatureBitset ImpliedFeaturesFXSR = {};
+constexpr FeatureBitset ImpliedFeaturesINVPCID = {};
+constexpr FeatureBitset ImpliedFeaturesLWP = {};
+constexpr FeatureBitset ImpliedFeaturesLZCNT = {};
+constexpr FeatureBitset ImpliedFeaturesMWAITX = {};
+constexpr FeatureBitset ImpliedFeaturesMOVBE = {};
+constexpr FeatureBitset ImpliedFeaturesMOVDIR64B = {};
+constexpr FeatureBitset ImpliedFeaturesMOVDIRI = {};
+constexpr FeatureBitset ImpliedFeaturesPCONFIG = {};
+constexpr FeatureBitset ImpliedFeaturesPOPCNT = {};
+constexpr FeatureBitset ImpliedFeaturesPKU = {};
+constexpr FeatureBitset ImpliedFeaturesPREFETCHWT1 = {};
+constexpr FeatureBitset ImpliedFeaturesPRFCHW = {};
+constexpr FeatureBitset ImpliedFeaturesPTWRITE = {};
+constexpr FeatureBitset ImpliedFeaturesRDPID = {};
+constexpr FeatureBitset ImpliedFeaturesRDRND = {};
+constexpr FeatureBitset ImpliedFeaturesRDSEED = {};
+constexpr FeatureBitset ImpliedFeaturesRTM = {};
+constexpr FeatureBitset ImpliedFeaturesSAHF = {};
+constexpr FeatureBitset ImpliedFeaturesSERIALIZE = {};
+constexpr FeatureBitset ImpliedFeaturesSGX = {};
+constexpr FeatureBitset ImpliedFeaturesSHSTK = {};
+constexpr FeatureBitset ImpliedFeaturesTBM = {};
+constexpr FeatureBitset ImpliedFeaturesTSXLDTRK = {};
+constexpr FeatureBitset ImpliedFeaturesUINTR = {};
+constexpr FeatureBitset ImpliedFeaturesWAITPKG = {};
+constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {};
+constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {};
+constexpr FeatureBitset ImpliedFeaturesX87 = {};
+constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
// Not really CPU features, but need to be in the table because clang uses
// target features to communicate them to the backend.
-static constexpr FeatureBitset ImpliedFeaturesRETPOLINE_EXTERNAL_THUNK = {};
-static constexpr FeatureBitset ImpliedFeaturesRETPOLINE_INDIRECT_BRANCHES = {};
-static constexpr FeatureBitset ImpliedFeaturesRETPOLINE_INDIRECT_CALLS = {};
-static constexpr FeatureBitset ImpliedFeaturesLVI_CFI = {};
-static constexpr FeatureBitset ImpliedFeaturesLVI_LOAD_HARDENING = {};
+constexpr FeatureBitset ImpliedFeaturesRETPOLINE_EXTERNAL_THUNK = {};
+constexpr FeatureBitset ImpliedFeaturesRETPOLINE_INDIRECT_BRANCHES = {};
+constexpr FeatureBitset ImpliedFeaturesRETPOLINE_INDIRECT_CALLS = {};
+constexpr FeatureBitset ImpliedFeaturesLVI_CFI = {};
+constexpr FeatureBitset ImpliedFeaturesLVI_LOAD_HARDENING = {};
// XSAVE features are dependent on basic XSAVE.
-static constexpr FeatureBitset ImpliedFeaturesXSAVEC = FeatureXSAVE;
-static constexpr FeatureBitset ImpliedFeaturesXSAVEOPT = FeatureXSAVE;
-static constexpr FeatureBitset ImpliedFeaturesXSAVES = FeatureXSAVE;
+constexpr FeatureBitset ImpliedFeaturesXSAVEC = FeatureXSAVE;
+constexpr FeatureBitset ImpliedFeaturesXSAVEOPT = FeatureXSAVE;
+constexpr FeatureBitset ImpliedFeaturesXSAVES = FeatureXSAVE;
// MMX->3DNOW->3DNOWA chain.
-static constexpr FeatureBitset ImpliedFeaturesMMX = {};
-static constexpr FeatureBitset ImpliedFeatures3DNOW = FeatureMMX;
-static constexpr FeatureBitset ImpliedFeatures3DNOWA = Feature3DNOW;
+constexpr FeatureBitset ImpliedFeaturesMMX = {};
+constexpr FeatureBitset ImpliedFeatures3DNOW = FeatureMMX;
+constexpr FeatureBitset ImpliedFeatures3DNOWA = Feature3DNOW;
// SSE/AVX/AVX512F chain.
-static constexpr FeatureBitset ImpliedFeaturesSSE = {};
-static constexpr FeatureBitset ImpliedFeaturesSSE2 = FeatureSSE;
-static constexpr FeatureBitset ImpliedFeaturesSSE3 = FeatureSSE2;
-static constexpr FeatureBitset ImpliedFeaturesSSSE3 = FeatureSSE3;
-static constexpr FeatureBitset ImpliedFeaturesSSE4_1 = FeatureSSSE3;
-static constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1;
-static constexpr FeatureBitset ImpliedFeaturesAVX = FeatureSSE4_2;
-static constexpr FeatureBitset ImpliedFeaturesAVX2 = FeatureAVX;
-static constexpr FeatureBitset ImpliedFeaturesAVX512F =
+constexpr FeatureBitset ImpliedFeaturesSSE = {};
+constexpr FeatureBitset ImpliedFeaturesSSE2 = FeatureSSE;
+constexpr FeatureBitset ImpliedFeaturesSSE3 = FeatureSSE2;
+constexpr FeatureBitset ImpliedFeaturesSSSE3 = FeatureSSE3;
+constexpr FeatureBitset ImpliedFeaturesSSE4_1 = FeatureSSSE3;
+constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1;
+constexpr FeatureBitset ImpliedFeaturesAVX = FeatureSSE4_2;
+constexpr FeatureBitset ImpliedFeaturesAVX2 = FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesAVX512F =
FeatureAVX2 | FeatureF16C | FeatureFMA;
// Vector extensions that build on SSE or AVX.
-static constexpr FeatureBitset ImpliedFeaturesAES = FeatureSSE2;
-static constexpr FeatureBitset ImpliedFeaturesF16C = FeatureAVX;
-static constexpr FeatureBitset ImpliedFeaturesFMA = FeatureAVX;
-static constexpr FeatureBitset ImpliedFeaturesGFNI = FeatureSSE2;
-static constexpr FeatureBitset ImpliedFeaturesPCLMUL = FeatureSSE2;
-static constexpr FeatureBitset ImpliedFeaturesSHA = FeatureSSE2;
-static constexpr FeatureBitset ImpliedFeaturesVAES = FeatureAES | FeatureAVX;
-static constexpr FeatureBitset ImpliedFeaturesVPCLMULQDQ =
- FeatureAVX | FeaturePCLMUL;
+constexpr FeatureBitset ImpliedFeaturesAES = FeatureSSE2;
+constexpr FeatureBitset ImpliedFeaturesF16C = FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesFMA = FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesGFNI = FeatureSSE2;
+constexpr FeatureBitset ImpliedFeaturesPCLMUL = FeatureSSE2;
+constexpr FeatureBitset ImpliedFeaturesSHA = FeatureSSE2;
+constexpr FeatureBitset ImpliedFeaturesVAES = FeatureAES | FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesVPCLMULQDQ = FeatureAVX | FeaturePCLMUL;
// AVX512 features.
-static constexpr FeatureBitset ImpliedFeaturesAVX512CD = FeatureAVX512F;
-static constexpr FeatureBitset ImpliedFeaturesAVX512BW = FeatureAVX512F;
-static constexpr FeatureBitset ImpliedFeaturesAVX512DQ = FeatureAVX512F;
-static constexpr FeatureBitset ImpliedFeaturesAVX512ER = FeatureAVX512F;
-static constexpr FeatureBitset ImpliedFeaturesAVX512PF = FeatureAVX512F;
-static constexpr FeatureBitset ImpliedFeaturesAVX512VL = FeatureAVX512F;
-
-static constexpr FeatureBitset ImpliedFeaturesAVX512BF16 = FeatureAVX512BW;
-static constexpr FeatureBitset ImpliedFeaturesAVX512BITALG = FeatureAVX512BW;
-static constexpr FeatureBitset ImpliedFeaturesAVX512IFMA = FeatureAVX512F;
-static constexpr FeatureBitset ImpliedFeaturesAVX512VNNI = FeatureAVX512F;
-static constexpr FeatureBitset ImpliedFeaturesAVX512VPOPCNTDQ = FeatureAVX512F;
-static constexpr FeatureBitset ImpliedFeaturesAVX512VBMI = FeatureAVX512BW;
-static constexpr FeatureBitset ImpliedFeaturesAVX512VBMI2 = FeatureAVX512BW;
-static constexpr FeatureBitset ImpliedFeaturesAVX512VP2INTERSECT =
- FeatureAVX512F;
+constexpr FeatureBitset ImpliedFeaturesAVX512CD = FeatureAVX512F;
+constexpr FeatureBitset ImpliedFeaturesAVX512BW = FeatureAVX512F;
+constexpr FeatureBitset ImpliedFeaturesAVX512DQ = FeatureAVX512F;
+constexpr FeatureBitset ImpliedFeaturesAVX512ER = FeatureAVX512F;
+constexpr FeatureBitset ImpliedFeaturesAVX512PF = FeatureAVX512F;
+constexpr FeatureBitset ImpliedFeaturesAVX512VL = FeatureAVX512F;
+
+constexpr FeatureBitset ImpliedFeaturesAVX512BF16 = FeatureAVX512BW;
+constexpr FeatureBitset ImpliedFeaturesAVX512BITALG = FeatureAVX512BW;
+constexpr FeatureBitset ImpliedFeaturesAVX512IFMA = FeatureAVX512F;
+constexpr FeatureBitset ImpliedFeaturesAVX512VNNI = FeatureAVX512F;
+constexpr FeatureBitset ImpliedFeaturesAVX512VPOPCNTDQ = FeatureAVX512F;
+constexpr FeatureBitset ImpliedFeaturesAVX512VBMI = FeatureAVX512BW;
+constexpr FeatureBitset ImpliedFeaturesAVX512VBMI2 = FeatureAVX512BW;
+constexpr FeatureBitset ImpliedFeaturesAVX512VP2INTERSECT = FeatureAVX512F;
// FIXME: These two aren't really implemented and just exist in the feature
// list for __builtin_cpu_supports. So omit their dependencies.
-static constexpr FeatureBitset ImpliedFeaturesAVX5124FMAPS = {};
-static constexpr FeatureBitset ImpliedFeaturesAVX5124VNNIW = {};
+constexpr FeatureBitset ImpliedFeaturesAVX5124FMAPS = {};
+constexpr FeatureBitset ImpliedFeaturesAVX5124VNNIW = {};
// SSE4_A->FMA4->XOP chain.
-static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSE3;
-static constexpr FeatureBitset ImpliedFeaturesFMA4 = FeatureAVX | FeatureSSE4_A;
-static constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4;
+constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSE3;
+constexpr FeatureBitset ImpliedFeaturesFMA4 = FeatureAVX | FeatureSSE4_A;
+constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4;
// AMX Features
-static constexpr FeatureBitset ImpliedFeaturesAMX_TILE = {};
-static constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE;
-static constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
+constexpr FeatureBitset ImpliedFeaturesAMX_TILE = {};
+constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE;
+constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
+constexpr FeatureBitset ImpliedFeaturesHRESET = {};
+
+// Key Locker Features
+constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2;
+constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL;
-static constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
+// AVXVNNI Features
+constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2;
+
+constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
#define X86_FEATURE(ENUM, STR) {{STR}, ImpliedFeatures##ENUM},
#include "llvm/Support/X86TargetParser.def"
};
-// Convert the set bits in FeatureBitset to a list of strings.
-static void getFeatureBitsAsStrings(const FeatureBitset &Bits,
- SmallVectorImpl<StringRef> &Features) {
- for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
- if (Bits[i] && !FeatureInfos[i].Name.empty())
- Features.push_back(FeatureInfos[i].Name);
-}
-
void llvm::X86::getFeaturesForCPU(StringRef CPU,
SmallVectorImpl<StringRef> &EnabledFeatures) {
auto I = llvm::find_if(Processors,
@@ -557,7 +595,9 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU,
Bits &= ~Feature64BIT;
// Add the string version of all set bits.
- getFeatureBitsAsStrings(Bits, EnabledFeatures);
+ for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
+ if (Bits[i] && !FeatureInfos[i].Name.empty())
+ EnabledFeatures.push_back(FeatureInfos[i].Name);
}
// For each feature that is (transitively) implied by this feature, set it.
@@ -591,9 +631,9 @@ static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) {
} while (Prev != Bits);
}
-void llvm::X86::getImpliedFeatures(
+void llvm::X86::updateImpliedFeatures(
StringRef Feature, bool Enabled,
- SmallVectorImpl<StringRef> &ImpliedFeatures) {
+ StringMap<bool> &Features) {
auto I = llvm::find_if(
FeatureInfos, [&](const FeatureInfo &FI) { return FI.Name == Feature; });
if (I == std::end(FeatureInfos)) {
@@ -609,6 +649,8 @@ void llvm::X86::getImpliedFeatures(
getImpliedDisabledFeatures(ImpliedBits,
std::distance(std::begin(FeatureInfos), I));
- // Convert all the found bits into strings.
- getFeatureBitsAsStrings(ImpliedBits, ImpliedFeatures);
+ // Update the map entry for all implied features.
+ for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
+ if (ImpliedBits[i] && !FeatureInfos[i].Name.empty())
+ Features[FeatureInfos[i].Name] = Enabled;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp
index ca8ffdc47afa..f68ba0d065c1 100644
--- a/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp
@@ -200,13 +200,12 @@ static UTF8Decoded decodeUTF8(StringRef Range) {
StringRef::iterator End = Range.end();
// 1 byte: [0x00, 0x7f]
// Bit pattern: 0xxxxxxx
- if ((*Position & 0x80) == 0) {
- return std::make_pair(*Position, 1);
+ if (Position < End && (*Position & 0x80) == 0) {
+ return std::make_pair(*Position, 1);
}
// 2 bytes: [0x80, 0x7ff]
// Bit pattern: 110xxxxx 10xxxxxx
- if (Position + 1 != End &&
- ((*Position & 0xE0) == 0xC0) &&
+ if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) &&
((*(Position + 1) & 0xC0) == 0x80)) {
uint32_t codepoint = ((*Position & 0x1F) << 6) |
(*(Position + 1) & 0x3F);
@@ -215,8 +214,7 @@ static UTF8Decoded decodeUTF8(StringRef Range) {
}
// 3 bytes: [0x8000, 0xffff]
// Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
- if (Position + 2 != End &&
- ((*Position & 0xF0) == 0xE0) &&
+ if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) &&
((*(Position + 1) & 0xC0) == 0x80) &&
((*(Position + 2) & 0xC0) == 0x80)) {
uint32_t codepoint = ((*Position & 0x0F) << 12) |
@@ -230,8 +228,7 @@ static UTF8Decoded decodeUTF8(StringRef Range) {
}
// 4 bytes: [0x10000, 0x10FFFF]
// Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- if (Position + 3 != End &&
- ((*Position & 0xF8) == 0xF0) &&
+ if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) &&
((*(Position + 1) & 0xC0) == 0x80) &&
((*(Position + 2) & 0xC0) == 0x80) &&
((*(Position + 3) & 0xC0) == 0x80)) {
@@ -718,7 +715,7 @@ std::string yaml::escape(StringRef Input, bool EscapePrintable) {
// Found invalid char.
SmallString<4> Val;
encodeUTF8(0xFFFD, Val);
- EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
+ llvm::append_range(EscapedInput, Val);
// FIXME: Error reporting.
return EscapedInput;
}
@@ -749,6 +746,92 @@ std::string yaml::escape(StringRef Input, bool EscapePrintable) {
return EscapedInput;
}
+llvm::Optional<bool> yaml::parseBool(StringRef S) {
+ switch (S.size()) {
+ case 1:
+ switch (S.front()) {
+ case 'y':
+ case 'Y':
+ return true;
+ case 'n':
+ case 'N':
+ return false;
+ default:
+ return None;
+ }
+ case 2:
+ switch (S.front()) {
+ case 'O':
+ if (S[1] == 'N') // ON
+ return true;
+ LLVM_FALLTHROUGH;
+ case 'o':
+ if (S[1] == 'n') //[Oo]n
+ return true;
+ return None;
+ case 'N':
+ if (S[1] == 'O') // NO
+ return false;
+ LLVM_FALLTHROUGH;
+ case 'n':
+ if (S[1] == 'o') //[Nn]o
+ return false;
+ return None;
+ default:
+ return None;
+ }
+ case 3:
+ switch (S.front()) {
+ case 'O':
+ if (S.drop_front() == "FF") // OFF
+ return false;
+ LLVM_FALLTHROUGH;
+ case 'o':
+ if (S.drop_front() == "ff") //[Oo]ff
+ return false;
+ return None;
+ case 'Y':
+ if (S.drop_front() == "ES") // YES
+ return true;
+ LLVM_FALLTHROUGH;
+ case 'y':
+ if (S.drop_front() == "es") //[Yy]es
+ return true;
+ return None;
+ default:
+ return None;
+ }
+ case 4:
+ switch (S.front()) {
+ case 'T':
+ if (S.drop_front() == "RUE") // TRUE
+ return true;
+ LLVM_FALLTHROUGH;
+ case 't':
+ if (S.drop_front() == "rue") //[Tt]rue
+ return true;
+ return None;
+ default:
+ return None;
+ }
+ case 5:
+ switch (S.front()) {
+ case 'F':
+ if (S.drop_front() == "ALSE") // FALSE
+ return false;
+ LLVM_FALLTHROUGH;
+ case 'f':
+ if (S.drop_front() == "alse") //[Ff]alse
+ return false;
+ return None;
+ default:
+ return None;
+ }
+ default:
+ return None;
+ }
+}
+
Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
std::error_code *EC)
: SM(sm), ShowColors(ShowColors), EC(EC) {
@@ -773,7 +856,7 @@ void Scanner::init(MemoryBufferRef Buffer) {
IsSimpleKeyAllowed = true;
Failed = false;
std::unique_ptr<MemoryBuffer> InputBufferOwner =
- MemoryBuffer::getMemBuffer(Buffer);
+ MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false);
SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
}
@@ -898,17 +981,9 @@ void Scanner::advanceWhile(SkipWhileFunc Func) {
Current = Final;
}
-static bool is_ns_hex_digit(const char C) {
- return (C >= '0' && C <= '9')
- || (C >= 'a' && C <= 'z')
- || (C >= 'A' && C <= 'Z');
-}
+static bool is_ns_hex_digit(const char C) { return isAlnum(C); }
-static bool is_ns_word_char(const char C) {
- return C == '-'
- || (C >= 'a' && C <= 'z')
- || (C >= 'A' && C <= 'Z');
-}
+static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); }
void Scanner::scan_ns_uri_char() {
while (true) {
@@ -1036,7 +1111,7 @@ bool Scanner::rollIndent( int ToColumn
}
void Scanner::skipComment() {
- if (*Current != '#')
+ if (Current == End || *Current != '#')
return;
while (true) {
// This may skip more than one byte, thus Column is only incremented
@@ -1051,7 +1126,7 @@ void Scanner::skipComment() {
void Scanner::scanToNextToken() {
while (true) {
- while (*Current == ' ' || *Current == '\t') {
+ while (Current != End && (*Current == ' ' || *Current == '\t')) {
skip(1);
}
@@ -1286,7 +1361,7 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
&& wasEscaped(Start + 1, Current));
} else {
skip(1);
- while (true) {
+ while (Current != End) {
// Skip a ' followed by another '.
if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
skip(2);
@@ -1334,13 +1409,14 @@ bool Scanner::scanPlainScalar() {
unsigned LeadingBlanks = 0;
assert(Indent >= -1 && "Indent must be >= -1 !");
unsigned indent = static_cast<unsigned>(Indent + 1);
- while (true) {
+ while (Current != End) {
if (*Current == '#')
break;
- while (!isBlankOrBreak(Current)) {
- if ( FlowLevel && *Current == ':'
- && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
+ while (Current != End && !isBlankOrBreak(Current)) {
+ if (FlowLevel && *Current == ':' &&
+ (Current + 1 == End ||
+ !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) {
setError("Found unexpected ':' while scanning a plain scalar", Current);
return false;
}
@@ -1410,7 +1486,7 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
StringRef::iterator Start = Current;
unsigned ColStart = Column;
skip(1);
- while(true) {
+ while (Current != End) {
if ( *Current == '[' || *Current == ']'
|| *Current == '{' || *Current == '}'
|| *Current == ','
@@ -1423,7 +1499,7 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
++Column;
}
- if (Start == Current) {
+ if (Start + 1 == Current) {
setError("Got empty alias or anchor", Start);
return false;
}
@@ -1775,12 +1851,13 @@ Stream::~Stream() = default;
bool Stream::failed() { return scanner->failed(); }
-void Stream::printError(Node *N, const Twine &Msg) {
- SMRange Range = N ? N->getSourceRange() : SMRange();
- scanner->printError( Range.Start
- , SourceMgr::DK_Error
- , Msg
- , Range);
+void Stream::printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind) {
+ printError(N ? N->getSourceRange() : SMRange(), Msg, Kind);
+}
+
+void Stream::printError(const SMRange &Range, const Twine &Msg,
+ SourceMgr::DiagKind Kind) {
+ scanner->printError(Range.Start, Kind, Msg, Range);
}
document_iterator Stream::begin() {
@@ -1899,11 +1976,11 @@ StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
Storage.reserve(UnquotedValue.size());
for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
StringRef Valid(UnquotedValue.begin(), i);
- Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+ llvm::append_range(Storage, Valid);
Storage.push_back('\'');
UnquotedValue = UnquotedValue.substr(i + 2);
}
- Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+ llvm::append_range(Storage, UnquotedValue);
return StringRef(Storage.begin(), Storage.size());
}
return UnquotedValue;
@@ -1922,7 +1999,7 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
// Insert all previous chars into Storage.
StringRef Valid(UnquotedValue.begin(), i);
- Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+ llvm::append_range(Storage, Valid);
// Chop off inserted chars.
UnquotedValue = UnquotedValue.substr(i);
@@ -2054,7 +2131,7 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
UnquotedValue = UnquotedValue.substr(1);
}
}
- Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+ llvm::append_range(Storage, UnquotedValue);
return StringRef(Storage.begin(), Storage.size());
}
diff --git a/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp
index 9ac7c65e19f7..aa6163a76161 100644
--- a/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp
@@ -48,6 +48,10 @@ void IO::setContext(void *Context) {
Ctxt = Context;
}
+void IO::setAllowUnknownKeys(bool Allow) {
+ llvm_unreachable("Only supported for Input");
+}
+
//===----------------------------------------------------------------------===//
// Input
//===----------------------------------------------------------------------===//
@@ -171,7 +175,7 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
return false;
}
MN->ValidKeys.push_back(Key);
- HNode *Value = MN->Mapping[Key].get();
+ HNode *Value = MN->Mapping[Key].first.get();
if (!Value) {
if (Required)
setError(CurrentNode, Twine("missing required key '") + Key + "'");
@@ -197,8 +201,12 @@ void Input::endMapping() {
return;
for (const auto &NN : MN->Mapping) {
if (!is_contained(MN->ValidKeys, NN.first())) {
- setError(NN.second.get(), Twine("unknown key '") + NN.first() + "'");
- break;
+ const SMRange &ReportLoc = NN.second.second;
+ if (!AllowUnknownKeys) {
+ setError(ReportLoc, Twine("unknown key '") + NN.first() + "'");
+ break;
+ } else
+ reportWarning(ReportLoc, Twine("unknown key '") + NN.first() + "'");
}
}
}
@@ -370,6 +378,24 @@ void Input::setError(Node *node, const Twine &message) {
EC = make_error_code(errc::invalid_argument);
}
+void Input::setError(const SMRange &range, const Twine &message) {
+ Strm->printError(range, message);
+ EC = make_error_code(errc::invalid_argument);
+}
+
+void Input::reportWarning(HNode *hnode, const Twine &message) {
+ assert(hnode && "HNode must not be NULL");
+ Strm->printError(hnode->_node, message, SourceMgr::DK_Warning);
+}
+
+void Input::reportWarning(Node *node, const Twine &message) {
+ Strm->printError(node, message, SourceMgr::DK_Warning);
+}
+
+void Input::reportWarning(const SMRange &range, const Twine &message) {
+ Strm->printError(range, message, SourceMgr::DK_Warning);
+}
+
std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
SmallString<128> StringStorage;
if (ScalarNode *SN = dyn_cast<ScalarNode>(N)) {
@@ -413,7 +439,8 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
auto ValueHNode = createHNodes(Value);
if (EC)
break;
- mapHNode->Mapping[KeyStr] = std::move(ValueHNode);
+ mapHNode->Mapping[KeyStr] =
+ std::make_pair(std::move(ValueHNode), KeyNode->getSourceRange());
}
return std::move(mapHNode);
} else if (isa<NullNode>(N)) {
@@ -428,6 +455,8 @@ void Input::setError(const Twine &Message) {
setError(CurrentNode, Message);
}
+void Input::setAllowUnknownKeys(bool Allow) { AllowUnknownKeys = Allow; }
+
bool Input::canElideEmptySequence() {
return false;
}
@@ -563,7 +592,7 @@ void Output::endSequence() {
// If we did not emit anything, we should explicitly emit an empty sequence
if (StateStack.back() == inSeqFirstElement) {
Padding = PaddingBeforeContainer;
- newLineCheck();
+ newLineCheck(/*EmptySequence=*/true);
output("[]");
Padding = "\n";
}
@@ -769,7 +798,7 @@ void Output::outputNewLine() {
// if seq in middle, use "- " if firstKey, else use " "
//
-void Output::newLineCheck() {
+void Output::newLineCheck(bool EmptySequence) {
if (Padding != "\n") {
output(Padding);
Padding = {};
@@ -778,7 +807,7 @@ void Output::newLineCheck() {
outputNewLine();
Padding = {};
- if (StateStack.size() == 0)
+ if (StateStack.size() == 0 || EmptySequence)
return;
unsigned Indent = StateStack.size() - 1;
@@ -802,7 +831,6 @@ void Output::newLineCheck() {
if (OutputDash) {
output("- ");
}
-
}
void Output::paddedKey(StringRef key) {
@@ -856,11 +884,8 @@ void ScalarTraits<bool>::output(const bool &Val, void *, raw_ostream &Out) {
}
StringRef ScalarTraits<bool>::input(StringRef Scalar, void *, bool &Val) {
- if (Scalar.equals("true")) {
- Val = true;
- return StringRef();
- } else if (Scalar.equals("false")) {
- Val = false;
+ if (llvm::Optional<bool> Parsed = parseBool(Scalar)) {
+ Val = *Parsed;
return StringRef();
}
return "invalid boolean";
@@ -1031,8 +1056,7 @@ StringRef ScalarTraits<float>::input(StringRef Scalar, void *, float &Val) {
}
void ScalarTraits<Hex8>::output(const Hex8 &Val, void *, raw_ostream &Out) {
- uint8_t Num = Val;
- Out << format("0x%02X", Num);
+ Out << format("0x%" PRIX8, (uint8_t)Val);
}
StringRef ScalarTraits<Hex8>::input(StringRef Scalar, void *, Hex8 &Val) {
@@ -1046,8 +1070,7 @@ StringRef ScalarTraits<Hex8>::input(StringRef Scalar, void *, Hex8 &Val) {
}
void ScalarTraits<Hex16>::output(const Hex16 &Val, void *, raw_ostream &Out) {
- uint16_t Num = Val;
- Out << format("0x%04X", Num);
+ Out << format("0x%" PRIX16, (uint16_t)Val);
}
StringRef ScalarTraits<Hex16>::input(StringRef Scalar, void *, Hex16 &Val) {
@@ -1061,8 +1084,7 @@ StringRef ScalarTraits<Hex16>::input(StringRef Scalar, void *, Hex16 &Val) {
}
void ScalarTraits<Hex32>::output(const Hex32 &Val, void *, raw_ostream &Out) {
- uint32_t Num = Val;
- Out << format("0x%08X", Num);
+ Out << format("0x%" PRIX32, (uint32_t)Val);
}
StringRef ScalarTraits<Hex32>::input(StringRef Scalar, void *, Hex32 &Val) {
@@ -1076,8 +1098,7 @@ StringRef ScalarTraits<Hex32>::input(StringRef Scalar, void *, Hex32 &Val) {
}
void ScalarTraits<Hex64>::output(const Hex64 &Val, void *, raw_ostream &Out) {
- uint64_t Num = Val;
- Out << format("0x%016llX", Num);
+ Out << format("0x%" PRIX64, (uint64_t)Val);
}
StringRef ScalarTraits<Hex64>::input(StringRef Scalar, void *, Hex64 &Val) {
@@ -1087,3 +1108,15 @@ StringRef ScalarTraits<Hex64>::input(StringRef Scalar, void *, Hex64 &Val) {
Val = Num;
return StringRef();
}
+
+void ScalarTraits<VersionTuple>::output(const VersionTuple &Val, void *,
+ llvm::raw_ostream &Out) {
+ Out << Val.getAsString();
+}
+
+StringRef ScalarTraits<VersionTuple>::input(StringRef Scalar, void *,
+ VersionTuple &Val) {
+ if (Val.tryParse(Scalar))
+ return "invalid version format";
+ return StringRef();
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
index f2d78d773239..8f10d136bc38 100644
--- a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
@@ -12,7 +12,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
@@ -30,7 +29,6 @@
#include <cstdio>
#include <iterator>
#include <sys/stat.h>
-#include <system_error>
// <fcntl.h> may provide O_BINARY.
#if defined(HAVE_FCNTL_H)
@@ -620,8 +618,9 @@ raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
/// FD is the file descriptor that this writes to. If ShouldClose is true, this
/// closes the file when the stream is destroyed.
-raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
- : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose) {
+raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered,
+ OStreamKind K)
+ : raw_pwrite_stream(unbuffered, K), FD(fd), ShouldClose(shouldClose) {
if (FD < 0 ) {
ShouldClose = false;
return;
@@ -858,7 +857,24 @@ bool raw_fd_ostream::is_displayed() const {
}
bool raw_fd_ostream::has_colors() const {
- return sys::Process::FileDescriptorHasColors(FD);
+ if (!HasColors)
+ HasColors = sys::Process::FileDescriptorHasColors(FD);
+ return *HasColors;
+}
+
+Expected<sys::fs::FileLocker> raw_fd_ostream::lock() {
+ std::error_code EC = sys::fs::lockFile(FD);
+ if (!EC)
+ return sys::fs::FileLocker(FD);
+ return errorCodeToError(EC);
+}
+
+Expected<sys::fs::FileLocker>
+raw_fd_ostream::tryLockFor(std::chrono::milliseconds Timeout) {
+ std::error_code EC = sys::fs::tryLockFile(FD, Timeout);
+ if (!EC)
+ return sys::fs::FileLocker(FD);
+ return errorCodeToError(EC);
}
void raw_fd_ostream::anchor() {}
@@ -888,6 +904,37 @@ raw_ostream &llvm::nulls() {
}
//===----------------------------------------------------------------------===//
+// File Streams
+//===----------------------------------------------------------------------===//
+
+raw_fd_stream::raw_fd_stream(StringRef Filename, std::error_code &EC)
+ : raw_fd_ostream(getFD(Filename, EC, sys::fs::CD_CreateAlways,
+ sys::fs::FA_Write | sys::fs::FA_Read,
+ sys::fs::OF_None),
+ true, false, OStreamKind::OK_FDStream) {
+ if (EC)
+ return;
+
+ // Do not support non-seekable files.
+ if (!supportsSeeking())
+ EC = std::make_error_code(std::errc::invalid_argument);
+}
+
+ssize_t raw_fd_stream::read(char *Ptr, size_t Size) {
+ assert(get_fd() >= 0 && "File already closed.");
+ ssize_t Ret = ::read(get_fd(), (void *)Ptr, Size);
+ if (Ret >= 0)
+ inc_pos(Ret);
+ else
+ error_detected(std::error_code(errno, std::generic_category()));
+ return Ret;
+}
+
+bool raw_fd_stream::classof(const raw_ostream *OS) {
+ return OS->get_kind() == OStreamKind::OK_FDStream;
+}
+
+//===----------------------------------------------------------------------===//
// raw_string_ostream
//===----------------------------------------------------------------------===//
@@ -940,3 +987,5 @@ void raw_null_ostream::pwrite_impl(const char *Ptr, size_t Size,
void raw_pwrite_stream::anchor() {}
void buffer_ostream::anchor() {}
+
+void buffer_unique_ostream::anchor() {}
diff --git a/contrib/llvm-project/llvm/lib/TableGen/DetailedRecordsBackend.cpp b/contrib/llvm-project/llvm/lib/TableGen/DetailedRecordsBackend.cpp
new file mode 100644
index 000000000000..2c3c3358b347
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/TableGen/DetailedRecordsBackend.cpp
@@ -0,0 +1,203 @@
+//===- DetailedRecordBackend.cpp - Detailed Records Report -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This Tablegen backend prints a report that includes all the global
+// variables, classes, and records in complete detail. It includes more
+// detail than the default TableGen printer backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <algorithm>
+#include <set>
+#include <string>
+#include <vector>
+
+#define DEBUG_TYPE "detailed-records-backend"
+
+#define NL "\n"
+
+using namespace llvm;
+
+namespace {
+
+class DetailedRecordsEmitter {
+private:
+ RecordKeeper &Records;
+
+public:
+ DetailedRecordsEmitter(RecordKeeper &RK) : Records(RK) {}
+
+ void run(raw_ostream &OS);
+ void printReportHeading(raw_ostream &OS);
+ void printVariables(raw_ostream &OS);
+ void printClasses(raw_ostream &OS);
+ void printRecords(raw_ostream &OS);
+ void printSectionHeading(std::string Title, int Count, raw_ostream &OS);
+ void printDefms(Record *Rec, raw_ostream &OS);
+ void printTemplateArgs(Record *Rec, raw_ostream &OS);
+ void printSuperclasses(Record *Rec, raw_ostream &OS);
+ void printFields(Record *Rec, raw_ostream &OS);
+}; // emitter class
+
+} // anonymous namespace
+
+// Print the report.
+void DetailedRecordsEmitter::run(raw_ostream &OS) {
+ printReportHeading(OS);
+ printVariables(OS);
+ printClasses(OS);
+ printRecords(OS);
+}
+
+// Print the report heading, including the source file name.
+void DetailedRecordsEmitter::printReportHeading(raw_ostream &OS) {
+ OS << formatv("DETAILED RECORDS for file {0}\n", Records.getInputFilename());
+}
+
+// Print the global variables.
+void DetailedRecordsEmitter::printVariables(raw_ostream &OS) {
+ const auto GlobalList = Records.getGlobals();
+ printSectionHeading("Global Variables", GlobalList.size(), OS);
+
+ OS << NL;
+ for (const auto &Var : GlobalList) {
+ OS << Var.first << " = " << Var.second->getAsString() << NL;
+ }
+}
+
+// Print the classes, including the template arguments, superclasses,
+// and fields.
+void DetailedRecordsEmitter::printClasses(raw_ostream &OS) {
+ const auto &ClassList = Records.getClasses();
+ printSectionHeading("Classes", ClassList.size(), OS);
+
+ for (const auto &ClassPair : ClassList) {
+ auto *const Class = ClassPair.second.get();
+ OS << formatv("\n{0} |{1}|\n", Class->getNameInitAsString(),
+ SrcMgr.getFormattedLocationNoOffset(Class->getLoc().front()));
+ printTemplateArgs(Class, OS);
+ printSuperclasses(Class, OS);
+ printFields(Class, OS);
+ }
+}
+
+// Print the records, including the defm sequences, supercasses,
+// and fields.
+void DetailedRecordsEmitter::printRecords(raw_ostream &OS) {
+ const auto &RecordList = Records.getDefs();
+ printSectionHeading("Records", RecordList.size(), OS);
+
+ for (const auto &RecPair : RecordList) {
+ auto *const Rec = RecPair.second.get();
+ OS << formatv("\n{0} |{1}|\n", Rec->getNameInitAsString(),
+ SrcMgr.getFormattedLocationNoOffset(Rec->getLoc().front()));
+ printDefms(Rec, OS);
+ printSuperclasses(Rec, OS);
+ printFields(Rec, OS);
+ }
+}
+
+// Print a section heading with the name of the section and
+// the item count.
+void DetailedRecordsEmitter::printSectionHeading(std::string Title, int Count,
+ raw_ostream &OS) {
+ OS << formatv("\n{0} {1} ({2}) {0}\n", "--------------------", Title, Count);
+}
+
+// Print the record's defm source locations, if any. Note that they
+// are stored in the reverse order of their invocation.
+void DetailedRecordsEmitter::printDefms(Record *Rec, raw_ostream &OS) {
+ const auto &LocList = Rec->getLoc();
+ if (LocList.size() < 2)
+ return;
+
+ OS << " Defm sequence:";
+ for (unsigned I = LocList.size() - 1; I >= 1; --I) {
+ OS << formatv(" |{0}|", SrcMgr.getFormattedLocationNoOffset(LocList[I]));
+ }
+ OS << NL;
+}
+
+// Print the template arguments of a class.
+void DetailedRecordsEmitter::printTemplateArgs(Record *Rec,
+ raw_ostream &OS) {
+ ArrayRef<Init *> Args = Rec->getTemplateArgs();
+ if (Args.empty()) {
+ OS << " Template args: (none)\n";
+ return;
+ }
+
+ OS << " Template args:\n";
+ for (const Init *ArgName : Args) {
+ const RecordVal *Value = Rec->getValue(ArgName);
+ assert(Value && "Template argument value not found.");
+ OS << " ";
+ Value->print(OS, false);
+ OS << formatv(" |{0}|", SrcMgr.getFormattedLocationNoOffset(Value->getLoc()));
+ OS << NL;
+ }
+}
+
+// Print the superclasses of a class or record. Indirect superclasses
+// are enclosed in parentheses.
+void DetailedRecordsEmitter::printSuperclasses(Record *Rec, raw_ostream &OS) {
+ ArrayRef<std::pair<Record *, SMRange>> Superclasses = Rec->getSuperClasses();
+ if (Superclasses.empty()) {
+ OS << " Superclasses: (none)\n";
+ return;
+ }
+
+ OS << " Superclasses:";
+ for (const auto &SuperclassPair : Superclasses) {
+ auto *ClassRec = SuperclassPair.first;
+ if (Rec->hasDirectSuperClass(ClassRec))
+ OS << formatv(" {0}", ClassRec->getNameInitAsString());
+ else
+ OS << formatv(" ({0})", ClassRec->getNameInitAsString());
+ }
+ OS << NL;
+}
+
+// Print the fields of a class or record, including their source locations.
+void DetailedRecordsEmitter::printFields(Record *Rec, raw_ostream &OS) {
+ const auto &ValueList = Rec->getValues();
+ if (ValueList.empty()) {
+ OS << " Fields: (none)\n";
+ return;
+ }
+
+ OS << " Fields:\n";
+ for (const RecordVal &Value : ValueList)
+ if (!Rec->isTemplateArg(Value.getNameInit())) {
+ OS << " ";
+ Value.print(OS, false);
+ OS << formatv(" |{0}|\n",
+ SrcMgr.getFormattedLocationNoOffset(Value.getLoc()));
+ }
+}
+
+namespace llvm {
+
+// This function is called by TableGen after parsing the files.
+
+void EmitDetailedRecords(RecordKeeper &RK, raw_ostream &OS) {
+ // Instantiate the emitter class and invoke run().
+ DetailedRecordsEmitter(RK).run(OS);
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/TableGen/Error.cpp b/contrib/llvm-project/llvm/lib/TableGen/Error.cpp
index 54b063cb4f8d..eed4de67942a 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/Error.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/Error.cpp
@@ -11,11 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/TableGen/Error.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/WithColor.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
#include <cstdlib>
namespace llvm {
@@ -39,12 +40,54 @@ static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind,
"instantiated from multiclass");
}
-void PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; }
+// Functions to print notes.
+
+void PrintNote(const Twine &Msg) {
+ WithColor::note() << Msg << "\n";
+}
void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg);
}
+// Functions to print fatal notes.
+
+void PrintFatalNote(const Twine &Msg) {
+ PrintNote(Msg);
+ // The following call runs the file cleanup handlers.
+ sys::RunInterruptHandlers();
+ std::exit(1);
+}
+
+void PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
+ PrintNote(NoteLoc, Msg);
+ // The following call runs the file cleanup handlers.
+ sys::RunInterruptHandlers();
+ std::exit(1);
+}
+
+// This method takes a Record and uses the source location
+// stored in it.
+void PrintFatalNote(const Record *Rec, const Twine &Msg) {
+ PrintNote(Rec->getLoc(), Msg);
+ // The following call runs the file cleanup handlers.
+ sys::RunInterruptHandlers();
+ std::exit(1);
+}
+
+// This method takes a RecordVal and uses the source location
+// stored in it.
+void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) {
+ PrintNote(RecVal->getLoc(), Msg);
+ // The following call runs the file cleanup handlers.
+ sys::RunInterruptHandlers();
+ std::exit(1);
+}
+
+// Functions to print warnings.
+
+void PrintWarning(const Twine &Msg) { WithColor::warning() << Msg << "\n"; }
+
void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
}
@@ -53,7 +96,9 @@ void PrintWarning(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg);
}
-void PrintWarning(const Twine &Msg) { WithColor::warning() << Msg << "\n"; }
+// Functions to print errors.
+
+void PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
@@ -63,7 +108,19 @@ void PrintError(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
}
-void PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
+// This method takes a Record and uses the source location
+// stored in it.
+void PrintError(const Record *Rec, const Twine &Msg) {
+ PrintMessage(Rec->getLoc(), SourceMgr::DK_Error, Msg);
+}
+
+// This method takes a RecordVal and uses the source location
+// stored in it.
+void PrintError(const RecordVal *RecVal, const Twine &Msg) {
+ PrintMessage(RecVal->getLoc(), SourceMgr::DK_Error, Msg);
+}
+
+// Functions to print fatal errors.
void PrintFatalError(const Twine &Msg) {
PrintError(Msg);
@@ -79,4 +136,22 @@ void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
std::exit(1);
}
+// This method takes a Record and uses the source location
+// stored in it.
+void PrintFatalError(const Record *Rec, const Twine &Msg) {
+ PrintError(Rec->getLoc(), Msg);
+ // The following call runs the file cleanup handlers.
+ sys::RunInterruptHandlers();
+ std::exit(1);
+}
+
+// This method takes a RecordVal and uses the source location
+// stored in it.
+void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
+ PrintError(RecVal->getLoc(), Msg);
+ // The following call runs the file cleanup handlers.
+ sys::RunInterruptHandlers();
+ std::exit(1);
+}
+
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp b/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp
index 196644cda667..8ddfd9f04524 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp
@@ -29,7 +29,6 @@ private:
RecordKeeper &Records;
json::Value translateInit(const Init &I);
- json::Array listSuperclasses(const Record &R);
public:
JSONEmitter(RecordKeeper &R);
@@ -59,8 +58,6 @@ json::Value JSONEmitter::translateInit(const Init &I) {
return Int->getValue();
} else if (auto *Str = dyn_cast<StringInit>(&I)) {
return Str->getValue();
- } else if (auto *Code = dyn_cast<CodeInit>(&I)) {
- return Code->getValue();
} else if (auto *List = dyn_cast<ListInit>(&I)) {
json::Array array;
for (auto val : *List)
@@ -147,7 +144,7 @@ void JSONEmitter::run(raw_ostream &OS) {
for (const RecordVal &RV : Def.getValues()) {
if (!Def.isTemplateArg(RV.getNameInit())) {
auto Name = RV.getNameInitAsString();
- if (RV.getPrefix())
+ if (RV.isNonconcreteOK())
fields.push_back(Name);
obj[Name] = translateInit(*RV.getValue());
}
diff --git a/contrib/llvm-project/llvm/lib/TableGen/Main.cpp b/contrib/llvm-project/llvm/lib/TableGen/Main.cpp
index 77f1b61cf930..0ace5363dd05 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/Main.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/Main.cpp
@@ -52,6 +52,9 @@ MacroNames("D", cl::desc("Name of the macro to be defined"),
static cl::opt<bool>
WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
+static cl::opt<bool>
+TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
+
static int reportError(const char *ProgName, Twine Msg) {
errs() << ProgName << ": " << Msg;
errs().flush();
@@ -83,13 +86,20 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) {
int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) {
RecordKeeper Records;
+ if (TimePhases)
+ Records.startPhaseTiming();
+
// Parse the input file.
+
+ Records.startTimer("Parse, build records");
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(InputFilename);
if (std::error_code EC = FileOrErr.getError())
return reportError(argv0, "Could not open input file '" + InputFilename +
"': " + EC.message() + "\n");
+ Records.saveInputFilename(InputFilename);
+
// Tell SrcMgr about this buffer, which is what TGParser will pick up.
SrcMgr.AddNewSourceBuffer(std::move(*FileOrErr), SMLoc());
@@ -101,11 +111,15 @@ int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) {
if (Parser.ParseFile())
return 1;
+ Records.stopTimer();
// Write output to memory.
+ Records.startBackendTimer("Backend overall");
std::string OutString;
raw_string_ostream Out(OutString);
- if (MainFn(Out, Records))
+ unsigned status = MainFn(Out, Records);
+ Records.stopBackendTimer();
+ if (status)
return 1;
// Always write the depfile, even if the main output hasn't changed.
@@ -117,26 +131,31 @@ int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) {
return Ret;
}
+ Records.startTimer("Write output");
+ bool WriteFile = true;
if (WriteIfChanged) {
// Only updates the real output file if there are any differences.
// This prevents recompilation of all the files depending on it if there
// aren't any.
if (auto ExistingOrErr = MemoryBuffer::getFile(OutputFilename))
if (std::move(ExistingOrErr.get())->getBuffer() == Out.str())
- return 0;
+ WriteFile = false;
}
-
- std::error_code EC;
- ToolOutputFile OutFile(OutputFilename, EC, sys::fs::OF_None);
- if (EC)
- return reportError(argv0, "error opening " + OutputFilename + ":" +
- EC.message() + "\n");
- OutFile.os() << Out.str();
+ if (WriteFile) {
+ std::error_code EC;
+ ToolOutputFile OutFile(OutputFilename, EC, sys::fs::OF_None);
+ if (EC)
+ return reportError(argv0, "error opening " + OutputFilename + ": " +
+ EC.message() + "\n");
+ OutFile.os() << Out.str();
+ if (ErrorsPrinted == 0)
+ OutFile.keep();
+ }
+
+ Records.stopTimer();
+ Records.stopPhaseTiming();
if (ErrorsPrinted > 0)
return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n");
-
- // Declare success.
- OutFile.keep();
return 0;
}
diff --git a/contrib/llvm-project/llvm/lib/TableGen/Record.cpp b/contrib/llvm-project/llvm/lib/TableGen/Record.cpp
index d3db004196b8..b6c6b8f031d3 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/Record.cpp
@@ -43,15 +43,11 @@ using namespace llvm;
static BumpPtrAllocator Allocator;
-STATISTIC(CodeInitsConstructed,
- "The total number of unique CodeInits constructed");
-
//===----------------------------------------------------------------------===//
// Type implementations
//===----------------------------------------------------------------------===//
BitRecTy BitRecTy::Shared;
-CodeRecTy CodeRecTy::Shared;
IntRecTy IntRecTy::Shared;
StringRecTy StringRecTy::Shared;
DagRecTy DagRecTy::Shared;
@@ -113,27 +109,22 @@ bool IntRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind;
}
-bool CodeRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
- RecTyKind Kind = RHS->getRecTyKind();
- return Kind == CodeRecTyKind || Kind == StringRecTyKind;
-}
-
std::string StringRecTy::getAsString() const {
return "string";
}
bool StringRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
RecTyKind Kind = RHS->getRecTyKind();
- return Kind == StringRecTyKind || Kind == CodeRecTyKind;
+ return Kind == StringRecTyKind;
}
std::string ListRecTy::getAsString() const {
- return "list<" + Ty->getAsString() + ">";
+ return "list<" + ElementTy->getAsString() + ">";
}
bool ListRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
if (const auto *ListTy = dyn_cast<ListRecTy>(RHS))
- return Ty->typeIsConvertibleTo(ListTy->getElementType());
+ return ElementTy->typeIsConvertibleTo(ListTy->getElementType());
return false;
}
@@ -241,13 +232,10 @@ bool RecordRecTy::typeIsA(const RecTy *RHS) const {
static RecordRecTy *resolveRecordTypes(RecordRecTy *T1, RecordRecTy *T2) {
SmallVector<Record *, 4> CommonSuperClasses;
- SmallVector<Record *, 4> Stack;
-
- Stack.insert(Stack.end(), T1->classes_begin(), T1->classes_end());
+ SmallVector<Record *, 4> Stack(T1->classes_begin(), T1->classes_end());
while (!Stack.empty()) {
- Record *R = Stack.back();
- Stack.pop_back();
+ Record *R = Stack.pop_back_val();
if (T2->isSubClassOf(R)) {
CommonSuperClasses.push_back(R);
@@ -514,45 +502,26 @@ IntInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
return BitsInit::get(NewBits);
}
-CodeInit *CodeInit::get(StringRef V, const SMLoc &Loc) {
- static StringSet<BumpPtrAllocator &> ThePool(Allocator);
-
- CodeInitsConstructed++;
-
- // Unlike StringMap, StringSet doesn't accept empty keys.
- if (V.empty())
- return new (Allocator) CodeInit("", Loc);
-
- // Location tracking prevents us from de-duping CodeInits as we're never
- // called with the same string and same location twice. However, we can at
- // least de-dupe the strings for a modest saving.
- auto &Entry = *ThePool.insert(V).first;
- return new(Allocator) CodeInit(Entry.getKey(), Loc);
-}
-
-StringInit *StringInit::get(StringRef V) {
- static StringMap<StringInit*, BumpPtrAllocator &> ThePool(Allocator);
+StringInit *StringInit::get(StringRef V, StringFormat Fmt) {
+ static StringMap<StringInit*, BumpPtrAllocator &> StringPool(Allocator);
+ static StringMap<StringInit*, BumpPtrAllocator &> CodePool(Allocator);
- auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first;
- if (!Entry.second)
- Entry.second = new(Allocator) StringInit(Entry.getKey());
- return Entry.second;
+ if (Fmt == SF_String) {
+ auto &Entry = *StringPool.insert(std::make_pair(V, nullptr)).first;
+ if (!Entry.second)
+ Entry.second = new (Allocator) StringInit(Entry.getKey(), Fmt);
+ return Entry.second;
+ } else {
+ auto &Entry = *CodePool.insert(std::make_pair(V, nullptr)).first;
+ if (!Entry.second)
+ Entry.second = new (Allocator) StringInit(Entry.getKey(), Fmt);
+ return Entry.second;
+ }
}
Init *StringInit::convertInitializerTo(RecTy *Ty) const {
if (isa<StringRecTy>(Ty))
return const_cast<StringInit *>(this);
- if (isa<CodeRecTy>(Ty))
- return CodeInit::get(getValue(), SMLoc());
-
- return nullptr;
-}
-
-Init *CodeInit::convertInitializerTo(RecTy *Ty) const {
- if (isa<CodeRecTy>(Ty))
- return const_cast<CodeInit *>(this);
- if (isa<StringRecTy>(Ty))
- return StringInit::get(getValue());
return nullptr;
}
@@ -719,8 +688,10 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
if (DefInit *LHSd = dyn_cast<DefInit>(LHS))
return StringInit::get(LHSd->getAsString());
- if (IntInit *LHSi = dyn_cast<IntInit>(LHS))
+ if (IntInit *LHSi =
+ dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get())))
return StringInit::get(LHSi->getAsString());
+
} else if (isa<RecordRecTy>(getType())) {
if (StringInit *Name = dyn_cast<StringInit>(LHS)) {
if (!CurRec && !IsFinal)
@@ -761,6 +732,12 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
return NewInit;
break;
+ case NOT:
+ if (IntInit *LHSi =
+ dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get())))
+ return IntInit::get(LHSi->getValue() ? 0 : 1);
+ break;
+
case HEAD:
if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
assert(!LHSl->empty() && "Empty list in head");
@@ -780,16 +757,22 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
case SIZE:
if (ListInit *LHSl = dyn_cast<ListInit>(LHS))
return IntInit::get(LHSl->size());
+ if (DagInit *LHSd = dyn_cast<DagInit>(LHS))
+ return IntInit::get(LHSd->arg_size());
+ if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
+ return IntInit::get(LHSs->getValue().size());
break;
case EMPTY:
if (ListInit *LHSl = dyn_cast<ListInit>(LHS))
return IntInit::get(LHSl->empty());
+ if (DagInit *LHSd = dyn_cast<DagInit>(LHS))
+ return IntInit::get(LHSd->arg_empty());
if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
return IntInit::get(LHSs->getValue().empty());
break;
- case GETOP:
+ case GETDAGOP:
if (DagInit *Dag = dyn_cast<DagInit>(LHS)) {
DefInit *DI = DefInit::get(Dag->getOperatorAsDef({}));
if (!DI->getType()->typeIsA(getType())) {
@@ -820,11 +803,12 @@ std::string UnOpInit::getAsString() const {
std::string Result;
switch (getOpcode()) {
case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break;
+ case NOT: Result = "!not"; break;
case HEAD: Result = "!head"; break;
case TAIL: Result = "!tail"; break;
case SIZE: Result = "!size"; break;
case EMPTY: Result = "!empty"; break;
- case GETOP: Result = "!getop"; break;
+ case GETDAGOP: Result = "!getdagop"; break;
}
return Result + "(" + LHS->getAsString() + ")";
}
@@ -862,7 +846,53 @@ static StringInit *ConcatStringInits(const StringInit *I0,
const StringInit *I1) {
SmallString<80> Concat(I0->getValue());
Concat.append(I1->getValue());
- return StringInit::get(Concat);
+ return StringInit::get(Concat,
+ StringInit::determineFormat(I0->getFormat(),
+ I1->getFormat()));
+}
+
+static StringInit *interleaveStringList(const ListInit *List,
+ const StringInit *Delim) {
+ if (List->size() == 0)
+ return StringInit::get("");
+ StringInit *Element = dyn_cast<StringInit>(List->getElement(0));
+ if (!Element)
+ return nullptr;
+ SmallString<80> Result(Element->getValue());
+ StringInit::StringFormat Fmt = StringInit::SF_String;
+
+ for (unsigned I = 1, E = List->size(); I < E; ++I) {
+ Result.append(Delim->getValue());
+ StringInit *Element = dyn_cast<StringInit>(List->getElement(I));
+ if (!Element)
+ return nullptr;
+ Result.append(Element->getValue());
+ Fmt = StringInit::determineFormat(Fmt, Element->getFormat());
+ }
+ return StringInit::get(Result, Fmt);
+}
+
+static StringInit *interleaveIntList(const ListInit *List,
+ const StringInit *Delim) {
+ if (List->size() == 0)
+ return StringInit::get("");
+ IntInit *Element =
+ dyn_cast_or_null<IntInit>(List->getElement(0)
+ ->convertInitializerTo(IntRecTy::get()));
+ if (!Element)
+ return nullptr;
+ SmallString<80> Result(Element->getAsString());
+
+ for (unsigned I = 1, E = List->size(); I < E; ++I) {
+ Result.append(Delim->getValue());
+ IntInit *Element =
+ dyn_cast_or_null<IntInit>(List->getElement(I)
+ ->convertInitializerTo(IntRecTy::get()));
+ if (!Element)
+ return nullptr;
+ Result.append(Element->getAsString());
+ }
+ return StringInit::get(Result);
}
Init *BinOpInit::getStrConcat(Init *I0, Init *I1) {
@@ -876,8 +906,8 @@ Init *BinOpInit::getStrConcat(Init *I0, Init *I1) {
static ListInit *ConcatListInits(const ListInit *LHS,
const ListInit *RHS) {
SmallVector<Init *, 8> Args;
- Args.insert(Args.end(), LHS->begin(), LHS->end());
- Args.insert(Args.end(), RHS->begin(), RHS->end());
+ llvm::append_range(Args, *LHS);
+ llvm::append_range(Args, *RHS);
return ListInit::get(Args, LHS->getElementType());
}
@@ -891,10 +921,6 @@ Init *BinOpInit::getListConcat(TypedInit *LHS, Init *RHS) {
return BinOpInit::get(BinOpInit::LISTCONCAT, LHS, RHS, LHS->getType());
}
-Init *BinOpInit::getListSplat(TypedInit *LHS, Init *RHS) {
- return BinOpInit::get(BinOpInit::LISTSPLAT, LHS, RHS, LHS->getType());
-}
-
Init *BinOpInit::Fold(Record *CurRec) const {
switch (getOpcode()) {
case CONCAT: {
@@ -934,8 +960,8 @@ Init *BinOpInit::Fold(Record *CurRec) const {
ListInit *RHSs = dyn_cast<ListInit>(RHS);
if (LHSs && RHSs) {
SmallVector<Init *, 8> Args;
- Args.insert(Args.end(), LHSs->begin(), LHSs->end());
- Args.insert(Args.end(), RHSs->begin(), RHSs->end());
+ llvm::append_range(Args, *LHSs);
+ llvm::append_range(Args, *RHSs);
return ListInit::get(Args, LHSs->getElementType());
}
break;
@@ -956,47 +982,76 @@ Init *BinOpInit::Fold(Record *CurRec) const {
return ConcatStringInits(LHSs, RHSs);
break;
}
+ case INTERLEAVE: {
+ ListInit *List = dyn_cast<ListInit>(LHS);
+ StringInit *Delim = dyn_cast<StringInit>(RHS);
+ if (List && Delim) {
+ StringInit *Result;
+ if (isa<StringRecTy>(List->getElementType()))
+ Result = interleaveStringList(List, Delim);
+ else
+ Result = interleaveIntList(List, Delim);
+ if (Result)
+ return Result;
+ }
+ break;
+ }
case EQ:
case NE:
case LE:
case LT:
case GE:
case GT: {
- // try to fold eq comparison for 'bit' and 'int', otherwise fallback
- // to string objects.
- IntInit *L =
+ // First see if we have two bit, bits, or int.
+ IntInit *LHSi =
dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get()));
- IntInit *R =
+ IntInit *RHSi =
dyn_cast_or_null<IntInit>(RHS->convertInitializerTo(IntRecTy::get()));
- if (L && R) {
+ if (LHSi && RHSi) {
bool Result;
switch (getOpcode()) {
- case EQ: Result = L->getValue() == R->getValue(); break;
- case NE: Result = L->getValue() != R->getValue(); break;
- case LE: Result = L->getValue() <= R->getValue(); break;
- case LT: Result = L->getValue() < R->getValue(); break;
- case GE: Result = L->getValue() >= R->getValue(); break;
- case GT: Result = L->getValue() > R->getValue(); break;
+ case EQ: Result = LHSi->getValue() == RHSi->getValue(); break;
+ case NE: Result = LHSi->getValue() != RHSi->getValue(); break;
+ case LE: Result = LHSi->getValue() <= RHSi->getValue(); break;
+ case LT: Result = LHSi->getValue() < RHSi->getValue(); break;
+ case GE: Result = LHSi->getValue() >= RHSi->getValue(); break;
+ case GT: Result = LHSi->getValue() > RHSi->getValue(); break;
default: llvm_unreachable("unhandled comparison");
}
return BitInit::get(Result);
}
- if (getOpcode() == EQ || getOpcode() == NE) {
- StringInit *LHSs = dyn_cast<StringInit>(LHS);
- StringInit *RHSs = dyn_cast<StringInit>(RHS);
+ // Next try strings.
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ StringInit *RHSs = dyn_cast<StringInit>(RHS);
- // Make sure we've resolved
- if (LHSs && RHSs) {
- bool Equal = LHSs->getValue() == RHSs->getValue();
- return BitInit::get(getOpcode() == EQ ? Equal : !Equal);
+ if (LHSs && RHSs) {
+ bool Result;
+ switch (getOpcode()) {
+ case EQ: Result = LHSs->getValue() == RHSs->getValue(); break;
+ case NE: Result = LHSs->getValue() != RHSs->getValue(); break;
+ case LE: Result = LHSs->getValue() <= RHSs->getValue(); break;
+ case LT: Result = LHSs->getValue() < RHSs->getValue(); break;
+ case GE: Result = LHSs->getValue() >= RHSs->getValue(); break;
+ case GT: Result = LHSs->getValue() > RHSs->getValue(); break;
+ default: llvm_unreachable("unhandled comparison");
}
+ return BitInit::get(Result);
+ }
+
+ // Finally, !eq and !ne can be used with records.
+ if (getOpcode() == EQ || getOpcode() == NE) {
+ DefInit *LHSd = dyn_cast<DefInit>(LHS);
+ DefInit *RHSd = dyn_cast<DefInit>(RHS);
+ if (LHSd && RHSd)
+ return BitInit::get((getOpcode() == EQ) ? LHSd == RHSd
+ : LHSd != RHSd);
}
break;
}
- case SETOP: {
+ case SETDAGOP: {
DagInit *Dag = dyn_cast<DagInit>(LHS);
DefInit *Op = dyn_cast<DefInit>(RHS);
if (Dag && Op) {
@@ -1011,9 +1066,11 @@ Init *BinOpInit::Fold(Record *CurRec) const {
break;
}
case ADD:
+ case SUB:
case MUL:
case AND:
case OR:
+ case XOR:
case SHL:
case SRA:
case SRL: {
@@ -1026,10 +1083,12 @@ Init *BinOpInit::Fold(Record *CurRec) const {
int64_t Result;
switch (getOpcode()) {
default: llvm_unreachable("Bad opcode!");
- case ADD: Result = LHSv + RHSv; break;
- case MUL: Result = LHSv * RHSv; break;
- case AND: Result = LHSv & RHSv; break;
- case OR: Result = LHSv | RHSv; break;
+ case ADD: Result = LHSv + RHSv; break;
+ case SUB: Result = LHSv - RHSv; break;
+ case MUL: Result = LHSv * RHSv; break;
+ case AND: Result = LHSv & RHSv; break;
+ case OR: Result = LHSv | RHSv; break;
+ case XOR: Result = LHSv ^ RHSv; break;
case SHL: Result = (uint64_t)LHSv << (uint64_t)RHSv; break;
case SRA: Result = LHSv >> RHSv; break;
case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
@@ -1057,9 +1116,11 @@ std::string BinOpInit::getAsString() const {
switch (getOpcode()) {
case CONCAT: Result = "!con"; break;
case ADD: Result = "!add"; break;
+ case SUB: Result = "!sub"; break;
case MUL: Result = "!mul"; break;
case AND: Result = "!and"; break;
case OR: Result = "!or"; break;
+ case XOR: Result = "!xor"; break;
case SHL: Result = "!shl"; break;
case SRA: Result = "!sra"; break;
case SRL: Result = "!srl"; break;
@@ -1072,7 +1133,8 @@ std::string BinOpInit::getAsString() const {
case LISTCONCAT: Result = "!listconcat"; break;
case LISTSPLAT: Result = "!listsplat"; break;
case STRCONCAT: Result = "!strconcat"; break;
- case SETOP: Result = "!setop"; break;
+ case INTERLEAVE: Result = "!interleave"; break;
+ case SETDAGOP: Result = "!setdagop"; break;
}
return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
}
@@ -1107,7 +1169,7 @@ void TernOpInit::Profile(FoldingSetNodeID &ID) const {
ProfileTernOpInit(ID, getOpcode(), getLHS(), getMHS(), getRHS(), getType());
}
-static Init *ForeachApply(Init *LHS, Init *MHSe, Init *RHS, Record *CurRec) {
+static Init *ItemApply(Init *LHS, Init *MHSe, Init *RHS, Record *CurRec) {
MapResolver R(CurRec);
R.set(LHS, MHSe);
return RHS->resolveReferences(R);
@@ -1116,7 +1178,7 @@ static Init *ForeachApply(Init *LHS, Init *MHSe, Init *RHS, Record *CurRec) {
static Init *ForeachDagApply(Init *LHS, DagInit *MHSd, Init *RHS,
Record *CurRec) {
bool Change = false;
- Init *Val = ForeachApply(LHS, MHSd->getOperator(), RHS, CurRec);
+ Init *Val = ItemApply(LHS, MHSd->getOperator(), RHS, CurRec);
if (Val != MHSd->getOperator())
Change = true;
@@ -1129,7 +1191,7 @@ static Init *ForeachDagApply(Init *LHS, DagInit *MHSd, Init *RHS,
if (DagInit *Argd = dyn_cast<DagInit>(Arg))
NewArg = ForeachDagApply(LHS, Argd, RHS, CurRec);
else
- NewArg = ForeachApply(LHS, Arg, RHS, CurRec);
+ NewArg = ItemApply(LHS, Arg, RHS, CurRec);
NewArgs.push_back(std::make_pair(NewArg, ArgName));
if (Arg != NewArg)
@@ -1151,7 +1213,7 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
SmallVector<Init *, 8> NewList(MHSl->begin(), MHSl->end());
for (Init *&Item : NewList) {
- Init *NewItem = ForeachApply(LHS, Item, RHS, CurRec);
+ Init *NewItem = ItemApply(LHS, Item, RHS, CurRec);
if (NewItem != Item)
Item = NewItem;
}
@@ -1161,6 +1223,31 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
return nullptr;
}
+// Evaluates RHS for all elements of MHS, using LHS as a temp variable.
+// Creates a new list with the elements that evaluated to true.
+static Init *FilterHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
+ Record *CurRec) {
+ if (ListInit *MHSl = dyn_cast<ListInit>(MHS)) {
+ SmallVector<Init *, 8> NewList;
+
+ for (Init *Item : MHSl->getValues()) {
+ Init *Include = ItemApply(LHS, Item, RHS, CurRec);
+ if (!Include)
+ return nullptr;
+ if (IntInit *IncludeInt = dyn_cast_or_null<IntInit>(
+ Include->convertInitializerTo(IntRecTy::get()))) {
+ if (IncludeInt->getValue())
+ NewList.push_back(Item);
+ } else {
+ return nullptr;
+ }
+ }
+ return ListInit::get(NewList, cast<ListRecTy>(Type)->getElementType());
+ }
+
+ return nullptr;
+}
+
Init *TernOpInit::Fold(Record *CurRec) const {
switch (getOpcode()) {
case SUBST: {
@@ -1213,6 +1300,12 @@ Init *TernOpInit::Fold(Record *CurRec) const {
break;
}
+ case FILTER: {
+ if (Init *Result = FilterHelper(LHS, MHS, RHS, getType(), CurRec))
+ return Result;
+ break;
+ }
+
case IF: {
if (IntInit *LHSi = dyn_cast_or_null<IntInit>(
LHS->convertInitializerTo(IntRecTy::get()))) {
@@ -1246,6 +1339,27 @@ Init *TernOpInit::Fold(Record *CurRec) const {
}
break;
}
+
+ case SUBSTR: {
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ IntInit *MHSi = dyn_cast<IntInit>(MHS);
+ IntInit *RHSi = dyn_cast<IntInit>(RHS);
+ if (LHSs && MHSi && RHSi) {
+ int64_t StringSize = LHSs->getValue().size();
+ int64_t Start = MHSi->getValue();
+ int64_t Length = RHSi->getValue();
+ if (Start < 0 || Start > StringSize)
+ PrintError(CurRec->getLoc(),
+ Twine("!substr start position is out of range 0...") +
+ std::to_string(StringSize) + ": " +
+ std::to_string(Start));
+ if (Length < 0)
+ PrintError(CurRec->getLoc(), "!substr length must be nonnegative");
+ return StringInit::get(LHSs->getValue().substr(Start, Length),
+ LHSs->getFormat());
+ }
+ break;
+ }
}
return const_cast<TernOpInit *>(this);
@@ -1267,7 +1381,7 @@ Init *TernOpInit::resolveReferences(Resolver &R) const {
Init *mhs = MHS->resolveReferences(R);
Init *rhs;
- if (getOpcode() == FOREACH) {
+ if (getOpcode() == FOREACH || getOpcode() == FILTER) {
ShadowResolver SR(R);
SR.addShadow(lhs);
rhs = RHS->resolveReferences(SR);
@@ -1285,10 +1399,12 @@ std::string TernOpInit::getAsString() const {
std::string Result;
bool UnquotedLHS = false;
switch (getOpcode()) {
- case SUBST: Result = "!subst"; break;
+ case DAG: Result = "!dag"; break;
+ case FILTER: Result = "!filter"; UnquotedLHS = true; break;
case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
case IF: Result = "!if"; break;
- case DAG: Result = "!dag"; break;
+ case SUBST: Result = "!subst"; break;
+ case SUBSTR: Result = "!substr"; break;
}
return (Result + "(" +
(UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) +
@@ -2026,8 +2142,16 @@ std::string DagInit::getAsString() const {
// Other implementations
//===----------------------------------------------------------------------===//
-RecordVal::RecordVal(Init *N, RecTy *T, bool P)
- : Name(N), TyAndPrefix(T, P) {
+RecordVal::RecordVal(Init *N, RecTy *T, FieldKind K)
+ : Name(N), TyAndKind(T, K) {
+ setValue(UnsetInit::get());
+ assert(Value && "Cannot create unset value for current type!");
+}
+
+// This constructor accepts the same arguments as the above, but also
+// a source location.
+RecordVal::RecordVal(Init *N, SMLoc Loc, RecTy *T, FieldKind K)
+ : Name(N), Loc(Loc), TyAndKind(T, K) {
setValue(UnsetInit::get());
assert(Value && "Cannot create unset value for current type!");
}
@@ -2036,6 +2160,21 @@ StringRef RecordVal::getName() const {
return cast<StringInit>(getNameInit())->getValue();
}
+std::string RecordVal::getPrintType() const {
+ if (getType() == StringRecTy::get()) {
+ if (auto *StrInit = dyn_cast<StringInit>(Value)) {
+ if (StrInit->hasCodeFormat())
+ return "code";
+ else
+ return "string";
+ } else {
+ return "string";
+ }
+ } else {
+ return TyAndKind.getPointer()->getAsString();
+ }
+}
+
bool RecordVal::setValue(Init *V) {
if (V) {
Value = V->getCastTo(getType());
@@ -2046,8 +2185,33 @@ bool RecordVal::setValue(Init *V) {
if (!isa<BitsInit>(Value)) {
SmallVector<Init *, 64> Bits;
Bits.reserve(BTy->getNumBits());
- for (unsigned i = 0, e = BTy->getNumBits(); i < e; ++i)
- Bits.push_back(Value->getBit(i));
+ for (unsigned I = 0, E = BTy->getNumBits(); I < E; ++I)
+ Bits.push_back(Value->getBit(I));
+ Value = BitsInit::get(Bits);
+ }
+ }
+ }
+ return Value == nullptr;
+ }
+ Value = nullptr;
+ return false;
+}
+
+// This version of setValue takes a source location and resets the
+// location in the RecordVal.
+bool RecordVal::setValue(Init *V, SMLoc NewLoc) {
+ Loc = NewLoc;
+ if (V) {
+ Value = V->getCastTo(getType());
+ if (Value) {
+ assert(!isa<TypedInit>(Value) ||
+ cast<TypedInit>(Value)->getType()->typeIsA(getType()));
+ if (BitsRecTy *BTy = dyn_cast<BitsRecTy>(getType())) {
+ if (!isa<BitsInit>(Value)) {
+ SmallVector<Init *, 64> Bits;
+ Bits.reserve(BTy->getNumBits());
+ for (unsigned I = 0, E = BTy->getNumBits(); I < E; ++I)
+ Bits.push_back(Value->getBit(I));
Value = BitsInit::get(Bits);
}
}
@@ -2058,13 +2222,14 @@ bool RecordVal::setValue(Init *V) {
return false;
}
+#include "llvm/TableGen/Record.h"
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RecordVal::dump() const { errs() << *this; }
#endif
void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
- if (getPrefix()) OS << "field ";
- OS << *getType() << " " << getNameInitAsString();
+ if (isNonconcreteOK()) OS << "field ";
+ OS << getPrintType() << " " << getNameInitAsString();
if (getValue())
OS << " = " << *getValue();
@@ -2089,9 +2254,9 @@ RecordRecTy *Record::getType() {
}
DefInit *Record::getDefInit() {
- if (!TheInit)
- TheInit = new(Allocator) DefInit(this);
- return TheInit;
+ if (!CorrespondingDefInit)
+ CorrespondingDefInit = new (Allocator) DefInit(this);
+ return CorrespondingDefInit;
}
void Record::setName(Init *NewName) {
@@ -2110,11 +2275,28 @@ void Record::setName(Init *NewName) {
// this. See TGParser::ParseDef and TGParser::ParseDefm.
}
+// NOTE for the next two functions:
+// Superclasses are in post-order, so the final one is a direct
+// superclass. All of its transitive superclases immediately precede it,
+// so we can step through the direct superclasses in reverse order.
+
+bool Record::hasDirectSuperClass(const Record *Superclass) const {
+ ArrayRef<std::pair<Record *, SMRange>> SCs = getSuperClasses();
+
+ for (int I = SCs.size() - 1; I >= 0; --I) {
+ const Record *SC = SCs[I].first;
+ if (SC == Superclass)
+ return true;
+ I -= SC->getSuperClasses().size();
+ }
+
+ return false;
+}
+
void Record::getDirectSuperClasses(SmallVectorImpl<Record *> &Classes) const {
ArrayRef<std::pair<Record *, SMRange>> SCs = getSuperClasses();
+
while (!SCs.empty()) {
- // Superclasses are in reverse preorder, so 'back' is a direct superclass,
- // and its transitive superclasses are directly preceding it.
Record *SC = SCs.back().first;
SCs = SCs.drop_back(1 + SC->getSuperClasses().size());
Classes.push_back(SC);
@@ -2187,15 +2369,23 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
OS << "\n";
for (const RecordVal &Val : R.getValues())
- if (Val.getPrefix() && !R.isTemplateArg(Val.getNameInit()))
+ if (Val.isNonconcreteOK() && !R.isTemplateArg(Val.getNameInit()))
OS << Val;
for (const RecordVal &Val : R.getValues())
- if (!Val.getPrefix() && !R.isTemplateArg(Val.getNameInit()))
+ if (!Val.isNonconcreteOK() && !R.isTemplateArg(Val.getNameInit()))
OS << Val;
return OS << "}\n";
}
+SMLoc Record::getFieldLoc(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (!R)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName + "'!\n");
+ return R->getLoc();
+}
+
Init *Record::getValueInit(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
if (!R || !R->getValue())
@@ -2205,18 +2395,27 @@ Init *Record::getValueInit(StringRef FieldName) const {
}
StringRef Record::getValueAsString(StringRef FieldName) const {
- const RecordVal *R = getValue(FieldName);
- if (!R || !R->getValue())
+ llvm::Optional<StringRef> S = getValueAsOptionalString(FieldName);
+ if (!S.hasValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
+ return S.getValue();
+}
+
+llvm::Optional<StringRef>
+Record::getValueAsOptionalString(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (!R || !R->getValue())
+ return llvm::Optional<StringRef>();
+ if (isa<UnsetInit>(R->getValue()))
+ return llvm::Optional<StringRef>();
if (StringInit *SI = dyn_cast<StringInit>(R->getValue()))
return SI->getValue();
- if (CodeInit *CI = dyn_cast<CodeInit>(R->getValue()))
- return CI->getValue();
- PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
- FieldName + "' does not have a string initializer!");
+ PrintFatalError(getLoc(),
+ "Record `" + getName() + "', ` field `" + FieldName +
+ "' exists but does not have a string initializer!");
}
BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
@@ -2227,8 +2426,8 @@ BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
if (BitsInit *BI = dyn_cast<BitsInit>(R->getValue()))
return BI;
- PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
- FieldName + "' does not have a BitsInit initializer!");
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
+ "' exists but does not have a bits value");
}
ListInit *Record::getValueAsListInit(StringRef FieldName) const {
@@ -2239,8 +2438,8 @@ ListInit *Record::getValueAsListInit(StringRef FieldName) const {
if (ListInit *LI = dyn_cast<ListInit>(R->getValue()))
return LI;
- PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
- FieldName + "' does not have a list initializer!");
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
+ "' exists but does not have a list value");
}
std::vector<Record*>
@@ -2267,7 +2466,7 @@ int64_t Record::getValueAsInt(StringRef FieldName) const {
return II->getValue();
PrintFatalError(getLoc(), Twine("Record `") + getName() + "', field `" +
FieldName +
- "' does not have an int initializer: " +
+ "' exists but does not have an int value: " +
R->getValue()->getAsString());
}
@@ -2281,7 +2480,7 @@ Record::getValueAsListOfInts(StringRef FieldName) const {
else
PrintFatalError(getLoc(),
Twine("Record `") + getName() + "', field `" + FieldName +
- "' does not have a list of ints initializer: " +
+ "' exists but does not have a list of ints value: " +
I->getAsString());
}
return Ints;
@@ -2294,12 +2493,10 @@ Record::getValueAsListOfStrings(StringRef FieldName) const {
for (Init *I : List->getValues()) {
if (StringInit *SI = dyn_cast<StringInit>(I))
Strings.push_back(SI->getValue());
- else if (CodeInit *CI = dyn_cast<CodeInit>(I))
- Strings.push_back(CI->getValue());
else
PrintFatalError(getLoc(),
Twine("Record `") + getName() + "', field `" + FieldName +
- "' does not have a list of strings initializer: " +
+ "' exists but does not have a list of strings value: " +
I->getAsString());
}
return Strings;
@@ -2394,16 +2591,78 @@ Init *RecordKeeper::getNewAnonymousName() {
return StringInit::get("anonymous_" + utostr(AnonCounter++));
}
-std::vector<Record *>
-RecordKeeper::getAllDerivedDefinitions(StringRef ClassName) const {
- Record *Class = getClass(ClassName);
- if (!Class)
- PrintFatalError("ERROR: Couldn't find the `" + ClassName + "' class!\n");
+// These functions implement the phase timing facility. Starting a timer
+// when one is already running stops the running one.
- std::vector<Record*> Defs;
- for (const auto &D : getDefs())
- if (D.second->isSubClassOf(Class))
- Defs.push_back(D.second.get());
+void RecordKeeper::startTimer(StringRef Name) {
+ if (TimingGroup) {
+ if (LastTimer && LastTimer->isRunning()) {
+ LastTimer->stopTimer();
+ if (BackendTimer) {
+ LastTimer->clear();
+ BackendTimer = false;
+ }
+ }
+
+ LastTimer = new Timer("", Name, *TimingGroup);
+ LastTimer->startTimer();
+ }
+}
+
+void RecordKeeper::stopTimer() {
+ if (TimingGroup) {
+ assert(LastTimer && "No phase timer was started");
+ LastTimer->stopTimer();
+ }
+}
+
+void RecordKeeper::startBackendTimer(StringRef Name) {
+ if (TimingGroup) {
+ startTimer(Name);
+ BackendTimer = true;
+ }
+}
+
+void RecordKeeper::stopBackendTimer() {
+ if (TimingGroup) {
+ if (BackendTimer) {
+ stopTimer();
+ BackendTimer = false;
+ }
+ }
+}
+
+// We cache the record vectors for single classes. Many backends request
+// the same vectors multiple times.
+std::vector<Record *> RecordKeeper::getAllDerivedDefinitions(
+ StringRef ClassName) const {
+
+ auto Pair = ClassRecordsMap.try_emplace(ClassName);
+ if (Pair.second)
+ Pair.first->second = getAllDerivedDefinitions(makeArrayRef(ClassName));
+
+ return Pair.first->second;
+}
+
+std::vector<Record *> RecordKeeper::getAllDerivedDefinitions(
+ ArrayRef<StringRef> ClassNames) const {
+ SmallVector<Record *, 2> ClassRecs;
+ std::vector<Record *> Defs;
+
+ assert(ClassNames.size() > 0 && "At least one class must be passed.");
+ for (const auto &ClassName : ClassNames) {
+ Record *Class = getClass(ClassName);
+ if (!Class)
+ PrintFatalError("The class '" + ClassName + "' is not defined\n");
+ ClassRecs.push_back(Class);
+ }
+
+ for (const auto &OneDef : getDefs()) {
+ if (all_of(ClassRecs, [&OneDef](const Record *Class) {
+ return OneDef.second->isSubClassOf(Class);
+ }))
+ Defs.push_back(OneDef.second.get());
+ }
return Defs;
}
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
index 9e6cc947925d..94c79102c7cd 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
@@ -150,7 +150,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
case EOF:
// Lex next token, if we just left an include file.
// Note that leaving an include file means that the next
- // symbol is located at the end of 'include "..."'
+ // symbol is located at the end of the 'include "..."'
// construct, so LexToken() is called with default
// false parameter.
if (processEOF())
@@ -161,7 +161,6 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
case ':': return tgtok::colon;
case ';': return tgtok::semi;
- case '.': return tgtok::period;
case ',': return tgtok::comma;
case '<': return tgtok::less;
case '>': return tgtok::greater;
@@ -181,6 +180,19 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
return tgtok::paste;
+ // The period is a separate case so we can recognize the "..."
+ // range punctuator.
+ case '.':
+ if (peekNextChar(0) == '.') {
+ ++CurPtr; // Eat second dot.
+ if (peekNextChar(0) == '.') {
+ ++CurPtr; // Eat third dot.
+ return tgtok::dotdotdot;
+ }
+ return ReturnError(TokStart, "Invalid '..' punctuation");
+ }
+ return tgtok::dot;
+
case '\r':
PrintFatalError("getNextChar() must never return '\r'");
return tgtok::Error;
@@ -326,14 +338,9 @@ tgtok::TokKind TGLexer::LexIdentifier() {
while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
++CurPtr;
- // Check to see if this identifier is a keyword.
+ // Check to see if this identifier is a reserved keyword.
StringRef Str(IdentStart, CurPtr-IdentStart);
- if (Str == "include") {
- if (LexInclude()) return tgtok::Error;
- return Lex();
- }
-
tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
.Case("int", tgtok::Int)
.Case("bit", tgtok::Bit)
@@ -344,6 +351,8 @@ tgtok::TokKind TGLexer::LexIdentifier() {
.Case("dag", tgtok::Dag)
.Case("class", tgtok::Class)
.Case("def", tgtok::Def)
+ .Case("true", tgtok::TrueVal)
+ .Case("false", tgtok::FalseVal)
.Case("foreach", tgtok::Foreach)
.Case("defm", tgtok::Defm)
.Case("defset", tgtok::Defset)
@@ -352,13 +361,25 @@ tgtok::TokKind TGLexer::LexIdentifier() {
.Case("let", tgtok::Let)
.Case("in", tgtok::In)
.Case("defvar", tgtok::Defvar)
+ .Case("include", tgtok::Include)
.Case("if", tgtok::If)
.Case("then", tgtok::Then)
.Case("else", tgtok::ElseKW)
+ .Case("assert", tgtok::Assert)
.Default(tgtok::Id);
- if (Kind == tgtok::Id)
- CurStrVal.assign(Str.begin(), Str.end());
+ // A couple of tokens require special processing.
+ switch (Kind) {
+ case tgtok::Include:
+ if (LexInclude()) return tgtok::Error;
+ return Lex();
+ case tgtok::Id:
+ CurStrVal.assign(Str.begin(), Str.end());
+ break;
+ default:
+ break;
+ }
+
return Kind;
}
@@ -520,7 +541,7 @@ tgtok::TokKind TGLexer::LexBracket() {
}
}
- return ReturnError(CodeStart-2, "Unterminated Code Block");
+ return ReturnError(CodeStart - 2, "Unterminated code block");
}
/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
@@ -550,9 +571,12 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("con", tgtok::XConcat)
.Case("dag", tgtok::XDag)
.Case("add", tgtok::XADD)
+ .Case("sub", tgtok::XSUB)
.Case("mul", tgtok::XMUL)
+ .Case("not", tgtok::XNOT)
.Case("and", tgtok::XAND)
.Case("or", tgtok::XOR)
+ .Case("xor", tgtok::XXOR)
.Case("shl", tgtok::XSHL)
.Case("sra", tgtok::XSRA)
.Case("srl", tgtok::XSRL)
@@ -561,11 +585,14 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("subst", tgtok::XSubst)
.Case("foldl", tgtok::XFoldl)
.Case("foreach", tgtok::XForEach)
+ .Case("filter", tgtok::XFilter)
.Case("listconcat", tgtok::XListConcat)
.Case("listsplat", tgtok::XListSplat)
.Case("strconcat", tgtok::XStrConcat)
- .Case("setop", tgtok::XSetOp)
- .Case("getop", tgtok::XGetOp)
+ .Case("interleave", tgtok::XInterleave)
+ .Case("substr", tgtok::XSubstr)
+ .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
+ .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
.Default(tgtok::Error);
return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
index 5b3b0a44e3ef..2f322f705e0d 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
@@ -40,19 +40,25 @@ namespace tgtok {
l_paren, r_paren, // ( )
less, greater, // < >
colon, semi, // : ;
- comma, period, // , .
+ comma, dot, // , .
equal, question, // = ?
paste, // #
+ dotdotdot, // ...
- // Keywords. ('ElseKW' is named to distinguish it from the existing 'Else'
- // that means the preprocessor #else.)
- Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
- MultiClass, String, Defset, Defvar, If, Then, ElseKW,
+ // Reserved keywords. ('ElseKW' is named to distinguish it from the
+ // existing 'Else' that means the preprocessor #else.)
+ Assert, Bit, Bits, Class, Code, Dag, Def, Defm, Defset, Defvar, ElseKW,
+ FalseKW, Field, Foreach, If, In, Include, Int, Let, List, MultiClass,
+ String, Then, TrueKW,
- // !keywords.
- XConcat, XADD, XMUL, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XListSplat,
- XStrConcat, XCast, XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty,
- XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetOp, XGetOp,
+ // Bang operators.
+ XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL,
+ XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XCast,
+ XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
+ XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
+
+ // Boolean literals.
+ TrueVal, FalseVal,
// Integer value.
IntVal,
@@ -80,8 +86,8 @@ class TGLexer {
// Information about the current token.
const char *TokStart = nullptr;
tgtok::TokKind CurCode = tgtok::TokKind::Eof;
- std::string CurStrVal; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
- int64_t CurIntVal = 0; // This is valid for INTVAL.
+ std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
+ int64_t CurIntVal = 0; // This is valid for IntVal.
/// CurBuffer - This is the current buffer index we're lexing from as managed
/// by the SourceMgr object.
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp
index 47f471ae2c4b..24949f0b2b4d 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp
@@ -25,6 +25,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <limits>
using namespace llvm;
@@ -94,7 +95,7 @@ static void checkConcrete(Record &R) {
// done merely because existing targets have legitimate cases of
// non-concrete variables in helper defs. Ideally, we'd introduce a
// 'maybe' or 'optional' modifier instead of this.
- if (RV.getPrefix())
+ if (RV.isNonconcreteOK())
continue;
if (Init *V = RV.getValue()) {
@@ -209,16 +210,16 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
V = BitsInit::get(NewBits);
}
- if (RV->setValue(V)) {
+ if (RV->setValue(V, Loc)) {
std::string InitType;
if (BitsInit *BI = dyn_cast<BitsInit>(V))
InitType = (Twine("' of type bit initializer with length ") +
Twine(BI->getNumBits())).str();
else if (TypedInit *TI = dyn_cast<TypedInit>(V))
InitType = (Twine("' of type '") + TI->getType()->getAsString()).str();
- return Error(Loc, "Value '" + ValName->getAsUnquotedString() +
+ return Error(Loc, "Field '" + ValName->getAsUnquotedString() +
"' of type '" + RV->getType()->getAsString() +
- "' is incompatible with initializer '" +
+ "' is incompatible with value '" +
V->getAsString() + InitType + "'");
}
return false;
@@ -451,6 +452,8 @@ bool TGParser::addDefOne(std::unique_ptr<Record> Rec) {
Rec->resolveReferences();
checkConcrete(*Rec);
+ CheckRecordAsserts(*Rec);
+
if (!isa<StringInit>(Rec->getNameInit())) {
PrintError(Rec->getLoc(), Twine("record name '") +
Rec->getNameInit()->getAsString() +
@@ -481,11 +484,12 @@ bool TGParser::addDefOne(std::unique_ptr<Record> Rec) {
// Parser Code
//===----------------------------------------------------------------------===//
-/// isObjectStart - Return true if this is a valid first token for an Object.
+/// isObjectStart - Return true if this is a valid first token for a statement.
static bool isObjectStart(tgtok::TokKind K) {
- return K == tgtok::Class || K == tgtok::Def || K == tgtok::Defm ||
- K == tgtok::Let || K == tgtok::MultiClass || K == tgtok::Foreach ||
- K == tgtok::Defset || K == tgtok::Defvar || K == tgtok::If;
+ return K == tgtok::Assert || K == tgtok::Class || K == tgtok::Def ||
+ K == tgtok::Defm || K == tgtok::Defset || K == tgtok::Defvar ||
+ K == tgtok::Foreach || K == tgtok::If || K == tgtok::Let ||
+ K == tgtok::MultiClass;
}
bool TGParser::consume(tgtok::TokKind K) {
@@ -671,8 +675,10 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
/// ParseRangePiece - Parse a bit/value range.
/// RangePiece ::= INTVAL
+/// RangePiece ::= INTVAL '...' INTVAL
/// RangePiece ::= INTVAL '-' INTVAL
-/// RangePiece ::= INTVAL INTVAL
+/// RangePiece ::= INTVAL INTVAL
+// The last two forms are deprecated.
bool TGParser::ParseRangePiece(SmallVectorImpl<unsigned> &Ranges,
TypedInit *FirstItem) {
Init *CurVal = FirstItem;
@@ -693,6 +699,8 @@ bool TGParser::ParseRangePiece(SmallVectorImpl<unsigned> &Ranges,
default:
Ranges.push_back(Start);
return false;
+
+ case tgtok::dotdotdot:
case tgtok::minus: {
Lex.Lex(); // eat
@@ -795,8 +803,8 @@ bool TGParser::ParseOptionalBitList(SmallVectorImpl<unsigned> &Ranges) {
RecTy *TGParser::ParseType() {
switch (Lex.getCode()) {
default: TokError("Unknown token when expecting a type"); return nullptr;
- case tgtok::String: Lex.Lex(); return StringRecTy::get();
- case tgtok::Code: Lex.Lex(); return CodeRecTy::get();
+ case tgtok::String:
+ case tgtok::Code: Lex.Lex(); return StringRecTy::get();
case tgtok::Bit: Lex.Lex(); return BitRecTy::get();
case tgtok::Int: Lex.Lex(); return IntRecTy::get();
case tgtok::Dag: Lex.Lex(); return DagRecTy::get();
@@ -809,12 +817,12 @@ RecTy *TGParser::ParseType() {
TokError("expected '<' after bits type");
return nullptr;
}
- if (Lex.Lex() != tgtok::IntVal) { // Eat '<'
+ if (Lex.Lex() != tgtok::IntVal) { // Eat '<'
TokError("expected integer in bits<n> type");
return nullptr;
}
uint64_t Val = Lex.getCurIntVal();
- if (Lex.Lex() != tgtok::greater) { // Eat count.
+ if (Lex.Lex() != tgtok::greater) { // Eat count.
TokError("expected '>' at end of bits<n> type");
return nullptr;
}
@@ -839,8 +847,7 @@ RecTy *TGParser::ParseType() {
}
}
-/// ParseIDValue - This is just like ParseIDValue above, but it assumes the ID
-/// has already been read.
+/// ParseIDValue
Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc,
IDParseMode Mode) {
if (CurRec) {
@@ -902,14 +909,15 @@ Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc,
Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
switch (Lex.getCode()) {
default:
- TokError("unknown operation");
+ TokError("unknown bang operator");
return nullptr;
+ case tgtok::XNOT:
case tgtok::XHead:
case tgtok::XTail:
case tgtok::XSize:
case tgtok::XEmpty:
case tgtok::XCast:
- case tgtok::XGetOp: { // Value ::= !unop '(' Value ')'
+ case tgtok::XGetDagOp: { // Value ::= !unop '(' Value ')'
UnOpInit::UnaryOp Code;
RecTy *Type = nullptr;
@@ -927,6 +935,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
}
break;
+ case tgtok::XNOT:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::NOT;
+ Type = IntRecTy::get();
+ break;
case tgtok::XHead:
Lex.Lex(); // eat the operation
Code = UnOpInit::HEAD;
@@ -945,12 +958,12 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
Code = UnOpInit::EMPTY;
Type = IntRecTy::get();
break;
- case tgtok::XGetOp:
+ case tgtok::XGetDagOp:
Lex.Lex(); // eat the operation
if (Lex.getCode() == tgtok::less) {
// Parse an optional type suffix, so that you can say
- // !getop<BaseClass>(someDag) as a shorthand for
- // !cast<BaseClass>(!getop(someDag)).
+ // !getdagop<BaseClass>(someDag) as a shorthand for
+ // !cast<BaseClass>(!getdagop(someDag)).
Type = ParseOperatorType();
if (!Type) {
@@ -959,13 +972,13 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
}
if (!isa<RecordRecTy>(Type)) {
- TokError("type for !getop must be a record type");
+ TokError("type for !getdagop must be a record type");
// but keep parsing, to consume the operand
}
} else {
Type = RecordRecTy::get({});
}
- Code = UnOpInit::GETOP;
+ Code = UnOpInit::GETDAGOP;
break;
}
if (!consume(tgtok::l_paren)) {
@@ -976,56 +989,58 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
Init *LHS = ParseValue(CurRec);
if (!LHS) return nullptr;
- if (Code == UnOpInit::HEAD ||
- Code == UnOpInit::TAIL ||
- Code == UnOpInit::EMPTY) {
+ if (Code == UnOpInit::EMPTY || Code == UnOpInit::SIZE) {
ListInit *LHSl = dyn_cast<ListInit>(LHS);
StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ DagInit *LHSd = dyn_cast<DagInit>(LHS);
TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
- if (!LHSl && !LHSs && !LHSt) {
- TokError("expected list or string type argument in unary operator");
+ if (!LHSl && !LHSs && !LHSd && !LHSt) {
+ TokError("expected string, list, or dag type argument in unary operator");
return nullptr;
}
if (LHSt) {
ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
StringRecTy *SType = dyn_cast<StringRecTy>(LHSt->getType());
- if (!LType && !SType) {
- TokError("expected list or string type argument in unary operator");
+ DagRecTy *DType = dyn_cast<DagRecTy>(LHSt->getType());
+ if (!LType && !SType && !DType) {
+ TokError("expected string, list, or dag type argument in unary operator");
return nullptr;
}
}
+ }
- if (Code == UnOpInit::HEAD || Code == UnOpInit::TAIL ||
- Code == UnOpInit::SIZE) {
- if (!LHSl && !LHSt) {
+ if (Code == UnOpInit::HEAD || Code == UnOpInit::TAIL) {
+ ListInit *LHSl = dyn_cast<ListInit>(LHS);
+ TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
+ if (!LHSl && !LHSt) {
+ TokError("expected list type argument in unary operator");
+ return nullptr;
+ }
+ if (LHSt) {
+ ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
+ if (!LType) {
TokError("expected list type argument in unary operator");
return nullptr;
}
}
- if (Code == UnOpInit::HEAD || Code == UnOpInit::TAIL) {
- if (LHSl && LHSl->empty()) {
- TokError("empty list argument in unary operator");
+ if (LHSl && LHSl->empty()) {
+ TokError("empty list argument in unary operator");
+ return nullptr;
+ }
+ if (LHSl) {
+ Init *Item = LHSl->getElement(0);
+ TypedInit *Itemt = dyn_cast<TypedInit>(Item);
+ if (!Itemt) {
+ TokError("untyped list element in unary operator");
return nullptr;
}
- if (LHSl) {
- Init *Item = LHSl->getElement(0);
- TypedInit *Itemt = dyn_cast<TypedInit>(Item);
- if (!Itemt) {
- TokError("untyped list element in unary operator");
- return nullptr;
- }
- Type = (Code == UnOpInit::HEAD) ? Itemt->getType()
- : ListRecTy::get(Itemt->getType());
- } else {
- assert(LHSt && "expected list type argument in unary operator");
- ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
- if (!LType) {
- TokError("expected list type argument in unary operator");
- return nullptr;
- }
- Type = (Code == UnOpInit::HEAD) ? LType->getElementType() : LType;
- }
+ Type = (Code == UnOpInit::HEAD) ? Itemt->getType()
+ : ListRecTy::get(Itemt->getType());
+ } else {
+ assert(LHSt && "expected list type argument in unary operator");
+ ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
+ Type = (Code == UnOpInit::HEAD) ? LType->getElementType() : LType;
}
}
@@ -1063,9 +1078,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XConcat:
case tgtok::XADD:
+ case tgtok::XSUB:
case tgtok::XMUL:
case tgtok::XAND:
case tgtok::XOR:
+ case tgtok::XXOR:
case tgtok::XSRA:
case tgtok::XSRL:
case tgtok::XSHL:
@@ -1078,7 +1095,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XListConcat:
case tgtok::XListSplat:
case tgtok::XStrConcat:
- case tgtok::XSetOp: { // Value ::= !binop '(' Value ',' Value ')'
+ case tgtok::XInterleave:
+ case tgtok::XSetDagOp: { // Value ::= !binop '(' Value ',' Value ')'
tgtok::TokKind OpTok = Lex.getCode();
SMLoc OpLoc = Lex.getLoc();
Lex.Lex(); // eat the operation
@@ -1088,9 +1106,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
default: llvm_unreachable("Unhandled code!");
case tgtok::XConcat: Code = BinOpInit::CONCAT; break;
case tgtok::XADD: Code = BinOpInit::ADD; break;
+ case tgtok::XSUB: Code = BinOpInit::SUB; break;
case tgtok::XMUL: Code = BinOpInit::MUL; break;
case tgtok::XAND: Code = BinOpInit::AND; break;
case tgtok::XOR: Code = BinOpInit::OR; break;
+ case tgtok::XXOR: Code = BinOpInit::XOR; break;
case tgtok::XSRA: Code = BinOpInit::SRA; break;
case tgtok::XSRL: Code = BinOpInit::SRL; break;
case tgtok::XSHL: Code = BinOpInit::SHL; break;
@@ -1101,9 +1121,10 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XGe: Code = BinOpInit::GE; break;
case tgtok::XGt: Code = BinOpInit::GT; break;
case tgtok::XListConcat: Code = BinOpInit::LISTCONCAT; break;
- case tgtok::XListSplat: Code = BinOpInit::LISTSPLAT; break;
- case tgtok::XStrConcat: Code = BinOpInit::STRCONCAT; break;
- case tgtok::XSetOp: Code = BinOpInit::SETOP; break;
+ case tgtok::XListSplat: Code = BinOpInit::LISTSPLAT; break;
+ case tgtok::XStrConcat: Code = BinOpInit::STRCONCAT; break;
+ case tgtok::XInterleave: Code = BinOpInit::INTERLEAVE; break;
+ case tgtok::XSetDagOp: Code = BinOpInit::SETDAGOP; break;
}
RecTy *Type = nullptr;
@@ -1112,31 +1133,30 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
default:
llvm_unreachable("Unhandled code!");
case tgtok::XConcat:
- case tgtok::XSetOp:
+ case tgtok::XSetDagOp:
Type = DagRecTy::get();
ArgType = DagRecTy::get();
break;
case tgtok::XAND:
case tgtok::XOR:
+ case tgtok::XXOR:
case tgtok::XSRA:
case tgtok::XSRL:
case tgtok::XSHL:
case tgtok::XADD:
+ case tgtok::XSUB:
case tgtok::XMUL:
Type = IntRecTy::get();
ArgType = IntRecTy::get();
break;
case tgtok::XEq:
case tgtok::XNe:
- Type = BitRecTy::get();
- // ArgType for Eq / Ne is not known at this point
- break;
case tgtok::XLe:
case tgtok::XLt:
case tgtok::XGe:
case tgtok::XGt:
Type = BitRecTy::get();
- ArgType = IntRecTy::get();
+ // ArgType for the comparison operators is not yet known.
break;
case tgtok::XListConcat:
// We don't know the list type until we parse the first argument
@@ -1149,6 +1169,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
Type = StringRecTy::get();
ArgType = StringRecTy::get();
break;
+ case tgtok::XInterleave:
+ Type = StringRecTy::get();
+ // The first argument type is not yet known.
}
if (Type && ItemType && !Type->typeIsConvertibleTo(ItemType)) {
@@ -1165,6 +1188,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
SmallVector<Init*, 2> InitList;
+ // Note that this loop consumes an arbitrary number of arguments.
+ // The actual count is checked later.
for (;;) {
SMLoc InitLoc = Lex.getLoc();
InitList.push_back(ParseValue(CurRec, ArgType));
@@ -1177,7 +1202,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return nullptr;
}
RecTy *ListType = InitListBack->getType();
+
if (!ArgType) {
+ // Argument type must be determined from the argument itself.
ArgType = ListType;
switch (Code) {
@@ -1218,15 +1245,54 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case BinOpInit::EQ:
case BinOpInit::NE:
if (!ArgType->typeIsConvertibleTo(IntRecTy::get()) &&
+ !ArgType->typeIsConvertibleTo(StringRecTy::get()) &&
+ !ArgType->typeIsConvertibleTo(RecordRecTy::get({}))) {
+ Error(InitLoc, Twine("expected bit, bits, int, string, or record; "
+ "got value of type '") + ArgType->getAsString() +
+ "'");
+ return nullptr;
+ }
+ break;
+ case BinOpInit::LE:
+ case BinOpInit::LT:
+ case BinOpInit::GE:
+ case BinOpInit::GT:
+ if (!ArgType->typeIsConvertibleTo(IntRecTy::get()) &&
!ArgType->typeIsConvertibleTo(StringRecTy::get())) {
- Error(InitLoc, Twine("expected int, bits, or string; got value of "
- "type '") + ArgType->getAsString() + "'");
+ Error(InitLoc, Twine("expected bit, bits, int, or string; "
+ "got value of type '") + ArgType->getAsString() +
+ "'");
return nullptr;
}
break;
+ case BinOpInit::INTERLEAVE:
+ switch (InitList.size()) {
+ case 1: // First argument must be a list of strings or integers.
+ if (ArgType != StringRecTy::get()->getListTy() &&
+ !ArgType->typeIsConvertibleTo(IntRecTy::get()->getListTy())) {
+ Error(InitLoc, Twine("expected list of string, int, bits, or bit; "
+ "got value of type '") +
+ ArgType->getAsString() + "'");
+ return nullptr;
+ }
+ break;
+ case 2: // Second argument must be a string.
+ if (!isa<StringRecTy>(ArgType)) {
+ Error(InitLoc, Twine("expected second argument to be a string, "
+ "got value of type '") +
+ ArgType->getAsString() + "'");
+ return nullptr;
+ }
+ break;
+ default: ;
+ }
+ ArgType = nullptr; // Broken invariant: types not identical.
+ break;
default: llvm_unreachable("other ops have fixed argument types");
}
+
} else {
+ // Desired argument type is a known and in ArgType.
RecTy *Resolved = resolveTypes(ArgType, ListType);
if (!Resolved) {
Error(InitLoc, Twine("expected value of type '") +
@@ -1234,8 +1300,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
ListType->getAsString() + "'");
return nullptr;
}
- if (Code != BinOpInit::ADD && Code != BinOpInit::AND &&
- Code != BinOpInit::OR && Code != BinOpInit::SRA &&
+ if (Code != BinOpInit::ADD && Code != BinOpInit::SUB &&
+ Code != BinOpInit::AND && Code != BinOpInit::OR &&
+ Code != BinOpInit::XOR && Code != BinOpInit::SRA &&
Code != BinOpInit::SRL && Code != BinOpInit::SHL &&
Code != BinOpInit::MUL)
ArgType = Resolved;
@@ -1244,7 +1311,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
// Deal with BinOps whose arguments have different types, by
// rewriting ArgType in between them.
switch (Code) {
- case BinOpInit::SETOP:
+ case BinOpInit::SETDAGOP:
// After parsing the first dag argument, switch to expecting
// a record, with no restriction on its superclasses.
ArgType = RecordRecTy::get({});
@@ -1274,7 +1341,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
if (Code == BinOpInit::STRCONCAT || Code == BinOpInit::LISTCONCAT ||
Code == BinOpInit::CONCAT || Code == BinOpInit::ADD ||
Code == BinOpInit::AND || Code == BinOpInit::OR ||
- Code == BinOpInit::MUL) {
+ Code == BinOpInit::XOR || Code == BinOpInit::MUL) {
while (InitList.size() > 2) {
Init *RHS = InitList.pop_back_val();
RHS = (BinOpInit::get(Code, InitList.back(), RHS, Type))->Fold(CurRec);
@@ -1290,118 +1357,14 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return nullptr;
}
- case tgtok::XForEach: { // Value ::= !foreach '(' Id ',' Value ',' Value ')'
- SMLoc OpLoc = Lex.getLoc();
- Lex.Lex(); // eat the operation
- if (Lex.getCode() != tgtok::l_paren) {
- TokError("expected '(' after !foreach");
- return nullptr;
- }
-
- if (Lex.Lex() != tgtok::Id) { // eat the '('
- TokError("first argument of !foreach must be an identifier");
- return nullptr;
- }
-
- Init *LHS = StringInit::get(Lex.getCurStrVal());
- Lex.Lex();
-
- if (CurRec && CurRec->getValue(LHS)) {
- TokError((Twine("iteration variable '") + LHS->getAsString() +
- "' already defined")
- .str());
- return nullptr;
- }
-
- if (!consume(tgtok::comma)) { // eat the id
- TokError("expected ',' in ternary operator");
- return nullptr;
- }
-
- Init *MHS = ParseValue(CurRec);
- if (!MHS)
- return nullptr;
-
- if (!consume(tgtok::comma)) {
- TokError("expected ',' in ternary operator");
- return nullptr;
- }
-
- TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
- if (!MHSt) {
- TokError("could not get type of !foreach input");
- return nullptr;
- }
-
- RecTy *InEltType = nullptr;
- RecTy *OutEltType = nullptr;
- bool IsDAG = false;
-
- if (ListRecTy *InListTy = dyn_cast<ListRecTy>(MHSt->getType())) {
- InEltType = InListTy->getElementType();
- if (ItemType) {
- if (ListRecTy *OutListTy = dyn_cast<ListRecTy>(ItemType)) {
- OutEltType = OutListTy->getElementType();
- } else {
- Error(OpLoc,
- "expected value of type '" + Twine(ItemType->getAsString()) +
- "', but got !foreach of list type");
- return nullptr;
- }
- }
- } else if (DagRecTy *InDagTy = dyn_cast<DagRecTy>(MHSt->getType())) {
- InEltType = InDagTy;
- if (ItemType && !isa<DagRecTy>(ItemType)) {
- Error(OpLoc,
- "expected value of type '" + Twine(ItemType->getAsString()) +
- "', but got !foreach of dag type");
- return nullptr;
- }
- IsDAG = true;
- } else {
- TokError("!foreach must have list or dag input");
- return nullptr;
- }
-
- // We need to create a temporary record to provide a scope for the iteration
- // variable while parsing top-level foreach's.
- std::unique_ptr<Record> ParseRecTmp;
- Record *ParseRec = CurRec;
- if (!ParseRec) {
- ParseRecTmp = std::make_unique<Record>(".parse", ArrayRef<SMLoc>{}, Records);
- ParseRec = ParseRecTmp.get();
- }
-
- ParseRec->addValue(RecordVal(LHS, InEltType, false));
- Init *RHS = ParseValue(ParseRec, OutEltType);
- ParseRec->removeValue(LHS);
- if (!RHS)
- return nullptr;
-
- if (!consume(tgtok::r_paren)) {
- TokError("expected ')' in binary operator");
- return nullptr;
- }
-
- RecTy *OutType;
- if (IsDAG) {
- OutType = InEltType;
- } else {
- TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
- if (!RHSt) {
- TokError("could not get type of !foreach result");
- return nullptr;
- }
- OutType = RHSt->getType()->getListTy();
- }
-
- return (TernOpInit::get(TernOpInit::FOREACH, LHS, MHS, RHS, OutType))
- ->Fold(CurRec);
+ case tgtok::XForEach:
+ case tgtok::XFilter: {
+ return ParseOperationForEachFilter(CurRec, ItemType);
}
case tgtok::XDag:
case tgtok::XIf:
- case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+ case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
TernOpInit::TernaryOp Code;
RecTy *Type = nullptr;
@@ -1536,11 +1499,14 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
}
+ case tgtok::XSubstr:
+ return ParseOperationSubstr(CurRec, ItemType);
+
case tgtok::XCond:
return ParseOperationCond(CurRec, ItemType);
case tgtok::XFoldl: {
- // Value ::= !foldl '(' Id ',' Id ',' Value ',' Value ',' Value ')'
+ // Value ::= !foldl '(' Value ',' Value ',' Id ',' Id ',' Expr ')'
Lex.Lex(); // eat the operation
if (!consume(tgtok::l_paren)) {
TokError("expected '(' after !foldl");
@@ -1623,8 +1589,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
}
Lex.Lex(); // eat the ','
- // We need to create a temporary record to provide a scope for the iteration
- // variable while parsing top-level foreach's.
+ // We need to create a temporary record to provide a scope for the
+ // two variables.
std::unique_ptr<Record> ParseRecTmp;
Record *ParseRec = CurRec;
if (!ParseRec) {
@@ -1632,8 +1598,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
ParseRec = ParseRecTmp.get();
}
- ParseRec->addValue(RecordVal(A, Start->getType(), false));
- ParseRec->addValue(RecordVal(B, ListType->getElementType(), false));
+ ParseRec->addValue(RecordVal(A, Start->getType(), RecordVal::FK_Normal));
+ ParseRec->addValue(RecordVal(B, ListType->getElementType(),
+ RecordVal::FK_Normal));
Init *ExprUntyped = ParseValue(ParseRec);
ParseRec->removeValue(A);
ParseRec->removeValue(B);
@@ -1677,6 +1644,9 @@ RecTy *TGParser::ParseOperatorType() {
return nullptr;
}
+ if (Lex.getCode() == tgtok::Code)
+ TokError("the 'code' type is not allowed in bang operators; use 'string'");
+
Type = ParseType();
if (!Type) {
@@ -1692,6 +1662,221 @@ RecTy *TGParser::ParseOperatorType() {
return Type;
}
+/// Parse the !substr operation. Return null on error.
+///
+/// Substr ::= !substr(string, start-int [, length-int]) => string
+Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) {
+ TernOpInit::TernaryOp Code = TernOpInit::SUBSTR;
+ RecTy *Type = StringRecTy::get();
+
+ Lex.Lex(); // eat the operation
+
+ if (!consume(tgtok::l_paren)) {
+ TokError("expected '(' after !substr operator");
+ return nullptr;
+ }
+
+ Init *LHS = ParseValue(CurRec);
+ if (!LHS)
+ return nullptr;
+
+ if (!consume(tgtok::comma)) {
+ TokError("expected ',' in !substr operator");
+ return nullptr;
+ }
+
+ SMLoc MHSLoc = Lex.getLoc();
+ Init *MHS = ParseValue(CurRec);
+ if (!MHS)
+ return nullptr;
+
+ SMLoc RHSLoc = Lex.getLoc();
+ Init *RHS;
+ if (consume(tgtok::comma)) {
+ RHSLoc = Lex.getLoc();
+ RHS = ParseValue(CurRec);
+ if (!RHS)
+ return nullptr;
+ } else {
+ RHS = IntInit::get(std::numeric_limits<int64_t>::max());
+ }
+
+ if (!consume(tgtok::r_paren)) {
+ TokError("expected ')' in !substr operator");
+ return nullptr;
+ }
+
+ if (ItemType && !Type->typeIsConvertibleTo(ItemType)) {
+ Error(RHSLoc, Twine("expected value of type '") +
+ ItemType->getAsString() + "', got '" +
+ Type->getAsString() + "'");
+ }
+
+ TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
+ if (!LHSt && !isa<UnsetInit>(LHS)) {
+ TokError("could not determine type of the string in !substr");
+ return nullptr;
+ }
+ if (LHSt && !isa<StringRecTy>(LHSt->getType())) {
+ TokError(Twine("expected string, got type '") +
+ LHSt->getType()->getAsString() + "'");
+ return nullptr;
+ }
+
+ TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
+ if (!MHSt && !isa<UnsetInit>(MHS)) {
+ TokError("could not determine type of the start position in !substr");
+ return nullptr;
+ }
+ if (MHSt && !isa<IntRecTy>(MHSt->getType())) {
+ Error(MHSLoc, Twine("expected int, got type '") +
+ MHSt->getType()->getAsString() + "'");
+ return nullptr;
+ }
+
+ if (RHS) {
+ TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
+ if (!RHSt && !isa<UnsetInit>(RHS)) {
+ TokError("could not determine type of the length in !substr");
+ return nullptr;
+ }
+ if (RHSt && !isa<IntRecTy>(RHSt->getType())) {
+ TokError(Twine("expected int, got type '") +
+ RHSt->getType()->getAsString() + "'");
+ return nullptr;
+ }
+ }
+
+ return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
+}
+
+/// Parse the !foreach and !filter operations. Return null on error.
+///
+/// ForEach ::= !foreach(ID, list-or-dag, expr) => list<expr type>
+/// Filter ::= !foreach(ID, list, predicate) ==> list<list type>
+Init *TGParser::ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType) {
+ SMLoc OpLoc = Lex.getLoc();
+ tgtok::TokKind Operation = Lex.getCode();
+ Lex.Lex(); // eat the operation
+ if (Lex.getCode() != tgtok::l_paren) {
+ TokError("expected '(' after !foreach/!filter");
+ return nullptr;
+ }
+
+ if (Lex.Lex() != tgtok::Id) { // eat the '('
+ TokError("first argument of !foreach/!filter must be an identifier");
+ return nullptr;
+ }
+
+ Init *LHS = StringInit::get(Lex.getCurStrVal());
+ Lex.Lex(); // eat the ID.
+
+ if (CurRec && CurRec->getValue(LHS)) {
+ TokError((Twine("iteration variable '") + LHS->getAsString() +
+ "' is already defined")
+ .str());
+ return nullptr;
+ }
+
+ if (!consume(tgtok::comma)) {
+ TokError("expected ',' in !foreach/!filter");
+ return nullptr;
+ }
+
+ Init *MHS = ParseValue(CurRec);
+ if (!MHS)
+ return nullptr;
+
+ if (!consume(tgtok::comma)) {
+ TokError("expected ',' in !foreach/!filter");
+ return nullptr;
+ }
+
+ TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
+ if (!MHSt) {
+ TokError("could not get type of !foreach/!filter list or dag");
+ return nullptr;
+ }
+
+ RecTy *InEltType = nullptr;
+ RecTy *ExprEltType = nullptr;
+ bool IsDAG = false;
+
+ if (ListRecTy *InListTy = dyn_cast<ListRecTy>(MHSt->getType())) {
+ InEltType = InListTy->getElementType();
+ if (ItemType) {
+ if (ListRecTy *OutListTy = dyn_cast<ListRecTy>(ItemType)) {
+ ExprEltType = (Operation == tgtok::XForEach)
+ ? OutListTy->getElementType()
+ : IntRecTy::get();
+ } else {
+ Error(OpLoc,
+ "expected value of type '" +
+ Twine(ItemType->getAsString()) +
+ "', but got list type");
+ return nullptr;
+ }
+ }
+ } else if (DagRecTy *InDagTy = dyn_cast<DagRecTy>(MHSt->getType())) {
+ if (Operation == tgtok::XFilter) {
+ TokError("!filter must have a list argument");
+ return nullptr;
+ }
+ InEltType = InDagTy;
+ if (ItemType && !isa<DagRecTy>(ItemType)) {
+ Error(OpLoc,
+ "expected value of type '" + Twine(ItemType->getAsString()) +
+ "', but got dag type");
+ return nullptr;
+ }
+ IsDAG = true;
+ } else {
+ if (Operation == tgtok::XForEach)
+ TokError("!foreach must have a list or dag argument");
+ else
+ TokError("!filter must have a list argument");
+ return nullptr;
+ }
+
+ // We need to create a temporary record to provide a scope for the
+ // iteration variable.
+ std::unique_ptr<Record> ParseRecTmp;
+ Record *ParseRec = CurRec;
+ if (!ParseRec) {
+ ParseRecTmp =
+ std::make_unique<Record>(".parse", ArrayRef<SMLoc>{}, Records);
+ ParseRec = ParseRecTmp.get();
+ }
+
+ ParseRec->addValue(RecordVal(LHS, InEltType, RecordVal::FK_Normal));
+ Init *RHS = ParseValue(ParseRec, ExprEltType);
+ ParseRec->removeValue(LHS);
+ if (!RHS)
+ return nullptr;
+
+ if (!consume(tgtok::r_paren)) {
+ TokError("expected ')' in !foreach/!filter");
+ return nullptr;
+ }
+
+ RecTy *OutType = InEltType;
+ if (Operation == tgtok::XForEach && !IsDAG) {
+ TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
+ if (!RHSt) {
+ TokError("could not get type of !foreach result expression");
+ return nullptr;
+ }
+ OutType = RHSt->getType()->getListTy();
+ } else if (Operation == tgtok::XFilter) {
+ OutType = InEltType->getListTy();
+ }
+
+ return (TernOpInit::get((Operation == tgtok::XForEach) ? TernOpInit::FOREACH
+ : TernOpInit::FILTER,
+ LHS, MHS, RHS, OutType))
+ ->Fold(CurRec);
+}
+
Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) {
Lex.Lex(); // eat the operation 'cond'
@@ -1783,6 +1968,7 @@ Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) {
/// SimpleValue ::= '(' IDValue DagArgList ')'
/// SimpleValue ::= CONCATTOK '(' Value ',' Value ')'
/// SimpleValue ::= ADDTOK '(' Value ',' Value ')'
+/// SimpleValue ::= SUBTOK '(' Value ',' Value ')'
/// SimpleValue ::= SHLTOK '(' Value ',' Value ')'
/// SimpleValue ::= SRATOK '(' Value ',' Value ')'
/// SimpleValue ::= SRLTOK '(' Value ',' Value ')'
@@ -1795,8 +1981,20 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
IDParseMode Mode) {
Init *R = nullptr;
switch (Lex.getCode()) {
- default: TokError("Unknown token when parsing a value"); break;
- case tgtok::IntVal: R = IntInit::get(Lex.getCurIntVal()); Lex.Lex(); break;
+ default: TokError("Unknown or reserved token when parsing a value"); break;
+
+ case tgtok::TrueVal:
+ R = IntInit::get(1);
+ Lex.Lex();
+ break;
+ case tgtok::FalseVal:
+ R = IntInit::get(0);
+ Lex.Lex();
+ break;
+ case tgtok::IntVal:
+ R = IntInit::get(Lex.getCurIntVal());
+ Lex.Lex();
+ break;
case tgtok::BinaryIntVal: {
auto BinaryVal = Lex.getCurBinaryIntVal();
SmallVector<Init*, 16> Bits(BinaryVal.second);
@@ -1820,7 +2018,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
break;
}
case tgtok::CodeFragment:
- R = CodeInit::get(Lex.getCurStrVal(), Lex.getLoc());
+ R = StringInit::get(Lex.getCurStrVal(), StringInit::SF_Code);
Lex.Lex();
break;
case tgtok::question:
@@ -1951,7 +2149,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
if (ItemType) {
ListRecTy *ListType = dyn_cast<ListRecTy>(ItemType);
if (!ListType) {
- TokError(Twine("Type mismatch for list, expected list type, got ") +
+ TokError(Twine("Encountered a list when expecting a ") +
ItemType->getAsString());
return nullptr;
}
@@ -2035,7 +2233,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::l_paren: { // Value ::= '(' IDValue DagArgList ')'
Lex.Lex(); // eat the '('
if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast &&
- Lex.getCode() != tgtok::question && Lex.getCode() != tgtok::XGetOp) {
+ Lex.getCode() != tgtok::question && Lex.getCode() != tgtok::XGetDagOp) {
TokError("expected identifier in dag init");
return nullptr;
}
@@ -2073,14 +2271,17 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XSize:
case tgtok::XEmpty:
case tgtok::XCast:
- case tgtok::XGetOp: // Value ::= !unop '(' Value ')'
+ case tgtok::XGetDagOp: // Value ::= !unop '(' Value ')'
case tgtok::XIsA:
case tgtok::XConcat:
case tgtok::XDag:
case tgtok::XADD:
+ case tgtok::XSUB:
case tgtok::XMUL:
+ case tgtok::XNOT:
case tgtok::XAND:
case tgtok::XOR:
+ case tgtok::XXOR:
case tgtok::XSRA:
case tgtok::XSRL:
case tgtok::XSHL:
@@ -2093,12 +2294,15 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XListConcat:
case tgtok::XListSplat:
case tgtok::XStrConcat:
- case tgtok::XSetOp: // Value ::= !binop '(' Value ',' Value ')'
+ case tgtok::XInterleave:
+ case tgtok::XSetDagOp: // Value ::= !binop '(' Value ',' Value ')'
case tgtok::XIf:
case tgtok::XCond:
case tgtok::XFoldl:
case tgtok::XForEach:
- case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+ case tgtok::XFilter:
+ case tgtok::XSubst:
+ case tgtok::XSubstr: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
return ParseOperation(CurRec, ItemType);
}
}
@@ -2106,7 +2310,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
return R;
}
-/// ParseValue - Parse a tblgen value. This returns null on error.
+/// ParseValue - Parse a TableGen value. This returns null on error.
///
/// Value ::= SimpleValue ValueSuffix*
/// ValueSuffix ::= '{' BitList '}'
@@ -2167,8 +2371,8 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
}
break;
}
- case tgtok::period: {
- if (Lex.Lex() != tgtok::Id) { // eat the .
+ case tgtok::dot: {
+ if (Lex.Lex() != tgtok::Id) { // eat the .
TokError("expected field identifier after '.'");
return nullptr;
}
@@ -2195,6 +2399,8 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
if (isa<ListRecTy>(LHS->getType())) {
Lex.Lex(); // Eat the '#'.
+ assert(Mode == ParseValueMode && "encountered paste of lists in name");
+
switch (Lex.getCode()) {
case tgtok::colon:
case tgtok::semi:
@@ -2202,8 +2408,11 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
Result = LHS; // trailing paste, ignore.
break;
default:
- Init *RHSResult = ParseValue(CurRec, ItemType, ParseNameMode);
+ Init *RHSResult = ParseValue(CurRec, ItemType, ParseValueMode);
+ if (!RHSResult)
+ return nullptr;
Result = BinOpInit::getListConcat(LHS, RHSResult);
+ break;
}
break;
}
@@ -2239,6 +2448,8 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
default:
Init *RHSResult = ParseValue(CurRec, nullptr, ParseNameMode);
+ if (!RHSResult)
+ return nullptr;
RHS = dyn_cast<TypedInit>(RHSResult);
if (!RHS) {
Error(PasteLoc, "RHS of paste is not typed!");
@@ -2410,8 +2621,10 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
"::");
}
- // Add the value.
- if (AddValue(CurRec, IdLoc, RecordVal(DeclName, Type, HasField)))
+ // Add the field to the record.
+ if (AddValue(CurRec, IdLoc, RecordVal(DeclName, IdLoc, Type,
+ HasField ? RecordVal::FK_NonconcreteOK
+ : RecordVal::FK_Normal)))
return nullptr;
// If a value is present, parse it.
@@ -2552,12 +2765,16 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
return false;
}
-/// ParseBodyItem - Parse a single item at within the body of a def or class.
+/// ParseBodyItem - Parse a single item within the body of a def or class.
///
/// BodyItem ::= Declaration ';'
/// BodyItem ::= LET ID OptionalBitList '=' Value ';'
/// BodyItem ::= Defvar
+/// BodyItem ::= Assert
bool TGParser::ParseBodyItem(Record *CurRec) {
+ if (Lex.getCode() == tgtok::Assert)
+ return ParseAssert(nullptr, CurRec);
+
if (Lex.getCode() == tgtok::Defvar)
return ParseDefvar();
@@ -2619,7 +2836,7 @@ bool TGParser::ParseBody(Record *CurRec) {
return false;
if (!consume(tgtok::l_brace))
- return TokError("Expected ';' or '{' to start body");
+ return TokError("Expected '{' to start body or ';' for declaration only");
// An object body introduces a new scope for local variables.
TGLocalVarScope *BodyScope = PushLocalScope();
@@ -2632,6 +2849,14 @@ bool TGParser::ParseBody(Record *CurRec) {
// Eat the '}'.
Lex.Lex();
+
+ // If we have a semicolon, print a gentle error.
+ SMLoc SemiLoc = Lex.getLoc();
+ if (consume(tgtok::semi)) {
+ PrintError(SemiLoc, "A class or def body should not end with a semicolon");
+ PrintNote("Semicolon ignored; remove to eliminate this error");
+ }
+
return false;
}
@@ -2963,6 +3188,41 @@ bool TGParser::ParseIfBody(MultiClass *CurMultiClass, StringRef Kind) {
return false;
}
+/// ParseAssert - Parse an assert statement.
+///
+/// Assert ::= ASSERT condition , message ;
+bool TGParser::ParseAssert(MultiClass *CurMultiClass, Record *CurRec) {
+ assert(Lex.getCode() == tgtok::Assert && "Unknown tok");
+ Lex.Lex(); // Eat the 'assert' token.
+
+ SMLoc ConditionLoc = Lex.getLoc();
+ Init *Condition = ParseValue(CurRec);
+ if (!Condition)
+ return true;
+
+ if (!consume(tgtok::comma)) {
+ TokError("expected ',' in assert statement");
+ return true;
+ }
+
+ Init *Message = ParseValue(CurRec);
+ if (!Message)
+ return true;
+
+ if (!consume(tgtok::semi))
+ return TokError("expected ';'");
+
+ if (CurMultiClass) {
+ assert(false && "assert in multiclass not yet supported");
+ } else if (CurRec) {
+ CurRec->addAssertion(ConditionLoc, Condition, Message);
+ } else { // at top level
+ CheckAssert(ConditionLoc, Condition, Message);
+ }
+
+ return false;
+}
+
/// ParseClass - Parse a tblgen class definition.
///
/// ClassInst ::= CLASS ID TemplateArgList? ObjectBody
@@ -3162,14 +3422,17 @@ bool TGParser::ParseMultiClass() {
while (Lex.getCode() != tgtok::r_brace) {
switch (Lex.getCode()) {
default:
- return TokError("expected 'let', 'def', 'defm', 'defvar', 'foreach' "
- "or 'if' in multiclass body");
- case tgtok::Let:
+ return TokError("expected 'assert', 'def', 'defm', 'defvar', "
+ "'foreach', 'if', or 'let' in multiclass body");
+ case tgtok::Assert:
+ return TokError("an assert statement in a multiclass is not yet supported");
+
case tgtok::Def:
case tgtok::Defm:
case tgtok::Defvar:
case tgtok::Foreach:
case tgtok::If:
+ case tgtok::Let:
if (ParseObject(CurMultiClass))
return true;
break;
@@ -3177,6 +3440,13 @@ bool TGParser::ParseMultiClass() {
}
Lex.Lex(); // eat the '}'.
+ // If we have a semicolon, print a gentle error.
+ SMLoc SemiLoc = Lex.getLoc();
+ if (consume(tgtok::semi)) {
+ PrintError(SemiLoc, "A multiclass body should not end with a semicolon");
+ PrintNote("Semicolon ignored; remove to eliminate this error");
+ }
+
PopLocalScope(MulticlassScope);
}
@@ -3320,22 +3590,23 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
/// Object ::= LETCommand Object
/// Object ::= Defset
/// Object ::= Defvar
+/// Object ::= Assert
bool TGParser::ParseObject(MultiClass *MC) {
switch (Lex.getCode()) {
default:
- return TokError("Expected class, def, defm, defset, multiclass, let, "
- "foreach or if");
- case tgtok::Let: return ParseTopLevelLet(MC);
- case tgtok::Def: return ParseDef(MC);
- case tgtok::Foreach: return ParseForeach(MC);
- case tgtok::If: return ParseIf(MC);
- case tgtok::Defm: return ParseDefm(MC);
+ return TokError(
+ "Expected assert, class, def, defm, defset, foreach, if, or let");
+ case tgtok::Assert: return ParseAssert(MC, nullptr);
+ case tgtok::Def: return ParseDef(MC);
+ case tgtok::Defm: return ParseDefm(MC);
+ case tgtok::Defvar: return ParseDefvar();
+ case tgtok::Foreach: return ParseForeach(MC);
+ case tgtok::If: return ParseIf(MC);
+ case tgtok::Let: return ParseTopLevelLet(MC);
case tgtok::Defset:
if (MC)
return TokError("defset is not allowed inside multiclass");
return ParseDefset();
- case tgtok::Defvar:
- return ParseDefvar();
case tgtok::Class:
if (MC)
return TokError("class is not allowed inside multiclass");
@@ -3367,7 +3638,38 @@ bool TGParser::ParseFile() {
if (Lex.getCode() == tgtok::Eof)
return false;
- return TokError("Unexpected input at top level");
+ return TokError("Unexpected token at top level");
+}
+
+// Check an assertion: Obtain the condition value and be sure it is true.
+// If not, print a nonfatal error along with the message.
+void TGParser::CheckAssert(SMLoc Loc, Init *Condition, Init *Message) {
+ auto *CondValue = dyn_cast_or_null<IntInit>(
+ Condition->convertInitializerTo(IntRecTy::get()));
+ if (CondValue) {
+ if (!CondValue->getValue()) {
+ PrintError(Loc, "assertion failed");
+ if (auto *MessageInit = dyn_cast<StringInit>(Message))
+ PrintNote(MessageInit->getValue());
+ else
+ PrintNote("(assert message is not a string)");
+ }
+ } else {
+ PrintError(Loc, "assert condition must of type bit, bits, or int.");
+ }
+}
+
+// Check all record assertions: For each one, resolve the condition
+// and message, then call CheckAssert().
+void TGParser::CheckRecordAsserts(Record &Rec) {
+ RecordResolver R(Rec);
+ R.setFinal(true);
+
+ for (auto Assertion : Rec.getAssertions()) {
+ Init *Condition = std::get<1>(Assertion)->resolveReferences(R);
+ Init *Message = std::get<2>(Assertion)->resolveReferences(R);
+ CheckAssert(std::get<0>(Assertion), Condition, Message);
+ }
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGParser.h b/contrib/llvm-project/llvm/lib/TableGen/TGParser.h
index 07a4003219f5..578a56c9d01c 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGParser.h
@@ -222,6 +222,7 @@ private: // Parser methods.
bool ParseForeach(MultiClass *CurMultiClass);
bool ParseIf(MultiClass *CurMultiClass);
bool ParseIfBody(MultiClass *CurMultiClass, StringRef Kind);
+ bool ParseAssert(MultiClass *CurMultiClass, Record *CurRec);
bool ParseTopLevelLet(MultiClass *CurMultiClass);
void ParseLetList(SmallVectorImpl<LetRecord> &Result);
@@ -254,6 +255,8 @@ private: // Parser methods.
TypedInit *FirstItem = nullptr);
RecTy *ParseType();
Init *ParseOperation(Record *CurRec, RecTy *ItemType);
+ Init *ParseOperationSubstr(Record *CurRec, RecTy *ItemType);
+ Init *ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType);
Init *ParseOperationCond(Record *CurRec, RecTy *ItemType);
RecTy *ParseOperatorType();
Init *ParseObjectName(MultiClass *CurMultiClass);
@@ -261,6 +264,8 @@ private: // Parser methods.
MultiClass *ParseMultiClassID();
bool ApplyLetStack(Record *CurRec);
bool ApplyLetStack(RecordsEntry &Entry);
+ void CheckAssert(SMLoc Loc, Init *Condition, Init *Message);
+ void CheckRecordAsserts(Record &Rec);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TableGenBackendSkeleton.cpp b/contrib/llvm-project/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
new file mode 100644
index 000000000000..4ce88e003e65
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
@@ -0,0 +1,64 @@
+//===- SkeletonEmitter.cpp - Skeleton TableGen backend -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This Tablegen backend emits ...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <algorithm>
+#include <set>
+#include <string>
+#include <vector>
+
+#define DEBUG_TYPE "skeleton-emitter"
+
+using namespace llvm;
+
+namespace {
+
+// Any helper data structures can be defined here. Some backends use
+// structs to collect information from the records.
+
+class SkeletonEmitter {
+private:
+ RecordKeeper &Records;
+
+public:
+ SkeletonEmitter(RecordKeeper &RK) : Records(RK) {}
+
+ void run(raw_ostream &OS);
+}; // emitter class
+
+} // anonymous namespace
+
+void SkeletonEmitter::run(raw_ostream &OS) {
+ emitSourceFileHeader("Skeleton data structures", OS);
+
+ (void)Records; // To suppress unused variable warning; remove on use.
+}
+
+namespace llvm {
+
+// The only thing that should be in the llvm namespace is the
+// emitter entry point function.
+
+void EmitSkeleton(RecordKeeper &RK, raw_ostream &OS) {
+ // Instantiate the emitter class and invoke run().
+ SkeletonEmitter(RK).run(OS);
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h
index fd35b530e3ce..d2170a99e0a2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h
@@ -58,8 +58,10 @@ ModulePass *createSVEIntrinsicOptsPass();
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &);
-FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone);
-FunctionPass *createAArch64PostLegalizeCombiner(bool IsOptNone);
+FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone);
+FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone);
+FunctionPass *createAArch64PostLegalizerLowering();
+FunctionPass *createAArch64PostSelectOptimize();
FunctionPass *createAArch64StackTaggingPass(bool IsOptNone);
FunctionPass *createAArch64StackTaggingPreRAPass();
@@ -80,6 +82,8 @@ void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
+void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
+void initializeAArch64PostSelectOptimizePass(PassRegistry &);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64StorePairSuppressPass(PassRegistry&);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
index 534af9686af0..762855207d2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
@@ -61,6 +61,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
"Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
+def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
+ "Enable out of line atomics to support LSE instructions">;
+
def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
"Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
@@ -72,9 +75,11 @@ def FeatureLOR : SubtargetFeature<
"lor", "HasLOR", "true",
"Enables ARM v8.1 Limited Ordering Regions extension">;
-def FeatureVH : SubtargetFeature<
- "vh", "HasVH", "true",
- "Enables ARM v8.1 Virtual Host extension">;
+def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2",
+ "true", "Enable RW operand CONTEXTIDR_EL2" >;
+
+def FeatureVH : SubtargetFeature<"vh", "HasVH", "true",
+ "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >;
def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable ARMv8 PMUv3 Performance Monitors extension">;
@@ -213,6 +218,10 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
"arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
"CPU fuses arithmetic + cbz/cbnz operations">;
+def FeatureCmpBccFusion : SubtargetFeature<
+ "cmp-bcc-fusion", "HasCmpBccFusion", "true",
+ "CPU fuses cmp+bcc operations">;
+
def FeatureFuseAddress : SubtargetFeature<
"fuse-address", "HasFuseAddress", "true",
"CPU fuses address generation and memory operations">;
@@ -256,8 +265,8 @@ def FeatureDotProd : SubtargetFeature<
"dotprod", "HasDotProd", "true",
"Enable dot product support">;
-def FeaturePA : SubtargetFeature<
- "pa", "HasPA", "true",
+def FeaturePAuth : SubtargetFeature<
+ "pauth", "HasPAuth", "true",
"Enable v8.3-A Pointer Authentication extension">;
def FeatureJS : SubtargetFeature<
@@ -278,11 +287,6 @@ def FeatureNV : SubtargetFeature<
"nv", "HasNV", "true",
"Enable v8.4-A Nested Virtualization Enchancement">;
-def FeatureRASv8_4 : SubtargetFeature<
- "rasv8_4", "HasRASv8_4", "true",
- "Enable v8.4-A Reliability, Availability and Serviceability extension",
- [FeatureRAS]>;
-
def FeatureMPAM : SubtargetFeature<
"mpam", "HasMPAM", "true",
"Enable v8.4-A Memory system Partitioning and Monitoring extension">;
@@ -316,8 +320,8 @@ def FeatureTLB_RMI : SubtargetFeature<
"tlb-rmi", "HasTLB_RMI", "true",
"Enable v8.4-A TLB Range and Maintenance Instructions">;
-def FeatureFMI : SubtargetFeature<
- "fmi", "HasFMI", "true",
+def FeatureFlagM : SubtargetFeature<
+ "flagm", "HasFlagM", "true",
"Enable v8.4-A Flag Manipulation Instructions">;
// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset
@@ -400,6 +404,24 @@ def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32",
def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64",
"true", "Enable Matrix Multiply FP64 Extension", [FeatureSVE]>;
+def FeatureXS : SubtargetFeature<"xs", "HasXS",
+ "true", "Enable Armv8.7-A limited-TLB-maintenance instruction">;
+
+def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT",
+ "true", "Enable Armv8.7-A WFET and WFIT instruction">;
+
+def FeatureHCX : SubtargetFeature<
+ "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register">;
+
+def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64",
+ "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">;
+
+def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE",
+ "true", "Enable Branch Record Buffer Extension">;
+
+def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF",
+ "true", "Enable extra register in the Statistical Profiling Extension">;
+
def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps",
"true", "Enable fine grained virtualization traps extension">;
@@ -420,14 +442,14 @@ def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>;
def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
- "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePA,
+ "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth,
FeatureJS, FeatureCCIDX, FeatureComplxNum]>;
def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
"Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
- FeatureNV, FeatureRASv8_4, FeatureMPAM, FeatureDIT,
+ FeatureNV, FeatureMPAM, FeatureDIT,
FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeaturePMU, FeatureTLB_RMI,
- FeatureFMI, FeatureRCPC_IMMO]>;
+ FeatureFlagM, FeatureRCPC_IMMO]>;
def HasV8_5aOps : SubtargetFeature<
"v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
@@ -437,10 +459,29 @@ def HasV8_5aOps : SubtargetFeature<
def HasV8_6aOps : SubtargetFeature<
"v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions",
-
[HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps,
FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>;
+def HasV8_7aOps : SubtargetFeature<
+ "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions",
+ [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>;
+
+def HasV8_0rOps : SubtargetFeature<
+ "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
+ [//v8.1
+ FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2,
+ //v8.2
+ FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4,
+ FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV,
+ //v8.3
+ FeatureComplxNum, FeatureCCIDX, FeatureJS,
+ FeaturePAuth, FeatureRCPC,
+ //v8.4
+ FeatureDotProd, FeatureFP16FML, FeatureTRACEV8_4,
+ FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
+ //v8.5
+ FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -502,10 +543,11 @@ def SVEUnsupported : AArch64Unsupported {
}
def PAUnsupported : AArch64Unsupported {
- let F = [HasPA];
+ let F = [HasPAuth];
}
include "AArch64SchedA53.td"
+include "AArch64SchedA55.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
include "AArch64SchedFalkor.td"
@@ -515,7 +557,9 @@ include "AArch64SchedExynosM4.td"
include "AArch64SchedExynosM5.td"
include "AArch64SchedThunderX.td"
include "AArch64SchedThunderX2T99.td"
+include "AArch64SchedA64FX.td"
include "AArch64SchedThunderX3T110.td"
+include "AArch64SchedTSV110.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors", [
@@ -575,6 +619,9 @@ def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
FeatureDotProd,
FeatureFPARMv8,
FeatureFullFP16,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeatureRAS,
FeatureRCPC,
@@ -587,6 +634,7 @@ def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
FeatureCrypto,
FeatureFPARMv8,
FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeaturePerfMon
]>;
@@ -618,6 +666,7 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", [
HasV8_2aOps,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeatureRCPC,
FeatureCrypto,
@@ -629,7 +678,9 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
"Cortex-A77 ARM processors", [
HasV8_2aOps,
+ FeatureCmpBccFusion,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON, FeatureRCPC,
FeatureCrypto,
FeatureFullFP16,
@@ -640,6 +691,7 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
"CortexA78",
"Cortex-A78 ARM processors", [
HasV8_2aOps,
+ FeatureCmpBccFusion,
FeatureCrypto,
FeatureFPARMv8,
FeatureFuseAES,
@@ -652,9 +704,39 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
FeatureSSBS,
FeatureDotProd]>;
+def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily",
+ "CortexA78C",
+ "Cortex-A78C ARM processors", [
+ HasV8_2aOps,
+ FeatureCmpBccFusion,
+ FeatureCrypto,
+ FeatureDotProd,
+ FeatureFlagM,
+ FeatureFP16FML,
+ FeatureFPARMv8,
+ FeatureFullFP16,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePAuth,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureRCPC,
+ FeatureSPE,
+ FeatureSSBS]>;
+
+def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
+ "CortexR82",
+ "Cortex-R82 ARM Processors", [
+ FeaturePostRAScheduler,
+ // TODO: crypto and FuseAES
+ // All other features are implied by v8_0r ops:
+ HasV8_0rOps,
+ ]>;
+
def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", [
HasV8_2aOps,
+ FeatureCmpBccFusion,
FeatureCrypto,
FeatureFPARMv8,
FeatureFuseAES,
@@ -676,7 +758,10 @@ def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
FeatureFullFP16,
FeatureSVE,
FeaturePostRAScheduler,
- FeatureComplxNum
+ FeatureComplxNum,
+ FeatureAggressiveFMA,
+ FeatureArithmeticBccFusion,
+ FeaturePredictableSelectIsExpensive
]>;
def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
@@ -783,6 +868,38 @@ def ProcAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
HasV8_4aOps
]>;
+def ProcAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
+ "Apple A14", [
+ FeatureAggressiveFMA,
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureAltFPCmp,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureCrypto,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFPARMv8,
+ FeatureFRInt3264,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseCryptoEOR,
+ FeatureFuseLiterals,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeatureSpecRestrict,
+ FeatureSSBS,
+ FeatureSB,
+ FeaturePredRes,
+ FeatureCacheDeepPersist,
+ FeatureZCRegMove,
+ FeatureZCZeroing,
+ FeatureFullFP16,
+ FeatureFP16FML,
+ FeatureSHA3,
+ HasV8_4aOps
+ ]>;
+
def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
[FeatureCRC,
@@ -876,6 +993,38 @@ def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily",
FeatureSSBS,
]>;
+def ProcNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily",
+ "NeoverseN2",
+ "Neoverse N2 ARM processors", [
+ HasV8_5aOps,
+ FeatureBF16,
+ FeatureETE,
+ FeatureMatMulInt8,
+ FeatureMTE,
+ FeatureSVE2,
+ FeatureSVE2BitPerm,
+ FeatureTRBE]>;
+
+def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily",
+ "NeoverseV1",
+ "Neoverse V1 ARM processors", [
+ HasV8_4aOps,
+ FeatureBF16,
+ FeatureCacheDeepPersist,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFP16FML,
+ FeatureFullFP16,
+ FeatureFuseAES,
+ FeatureMatMulInt8,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureRandGen,
+ FeatureSPE,
+ FeatureSSBS,
+ FeatureSVE]>;
+
def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
"Qualcomm Saphira processors", [
FeatureCrypto,
@@ -916,7 +1065,7 @@ def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureLSE,
- FeaturePA,
+ FeaturePAuth,
FeatureUseAA,
FeatureBalanceFPOps,
FeaturePerfMon,
@@ -998,7 +1147,7 @@ def : ProcessorModel<"generic", NoSchedModel, [
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
-def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>;
+def : ProcessorModel<"cortex-a55", CortexA55Model, [ProcA55]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>;
def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>;
@@ -1009,9 +1158,13 @@ def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>;
def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>;
+def : ProcessorModel<"cortex-a78c", CortexA57Model, [ProcA78C]>;
+def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>;
def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
+def : ProcessorModel<"neoverse-n2", CortexA57Model, [ProcNeoverseN2]>;
+def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
@@ -1027,8 +1180,7 @@ def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
// Marvell ThunderX3T110 Processors.
def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>;
-// FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57.
-def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>;
+def : ProcessorModel<"tsv110", TSV110Model, [ProcTSV110]>;
// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
def : ProcessorModel<"cyclone", CycloneModel, [ProcAppleA7]>;
@@ -1041,6 +1193,7 @@ def : ProcessorModel<"apple-a10", CycloneModel, [ProcAppleA10]>;
def : ProcessorModel<"apple-a11", CycloneModel, [ProcAppleA11]>;
def : ProcessorModel<"apple-a12", CycloneModel, [ProcAppleA12]>;
def : ProcessorModel<"apple-a13", CycloneModel, [ProcAppleA13]>;
+def : ProcessorModel<"apple-a14", CycloneModel, [ProcAppleA14]>;
// watch CPUs.
def : ProcessorModel<"apple-s4", CycloneModel, [ProcAppleA12]>;
@@ -1050,8 +1203,7 @@ def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>;
def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA13]>;
// Fujitsu A64FX
-// FIXME: Scheduling model is not implemented yet.
-def : ProcessorModel<"a64fx", NoSchedModel, [ProcA64FX]>;
+def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>;
// Nvidia Carmel
def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 981b366c14b1..c996d2df8c38 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -123,7 +123,7 @@ static bool isFPR64(unsigned Reg, unsigned SubReg,
}
// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
-// copy instruction. Return zero_reg if the instruction is not a copy.
+// copy instruction. Return nullptr if the instruction is not a copy.
static MachineOperand *getSrcFromCopy(MachineInstr *MI,
const MachineRegisterInfo *MRI,
unsigned &SubReg) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 7ec7ffe309f7..a0c5498ee620 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -32,6 +32,7 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -54,6 +55,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -69,12 +71,13 @@ namespace {
class AArch64AsmPrinter : public AsmPrinter {
AArch64MCInstLower MCInstLowering;
StackMaps SM;
+ FaultMaps FM;
const AArch64Subtarget *STI;
public:
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
- SM(*this) {}
+ SM(*this), FM(*this) {}
StringRef getPassName() const override { return "AArch64 Assembly Printer"; }
@@ -86,17 +89,18 @@ public:
void emitStartOfAsmFile(Module &M) override;
void emitJumpTableInfo() override;
- void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
- const MachineBasicBlock *MBB, unsigned JTI);
void emitFunctionEntryLabel() override;
- void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI);
+ void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
+ void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+ void LowerFAULTING_OP(const MachineInstr &MI);
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
@@ -191,58 +195,24 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) {
return;
// Assemble feature flags that may require creation of a note section.
- unsigned Flags = ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI |
- ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
-
- if (any_of(M, [](const Function &F) {
- return !F.isDeclaration() &&
- !F.hasFnAttribute("branch-target-enforcement");
- })) {
- Flags &= ~ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
- }
+ unsigned Flags = 0;
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("branch-target-enforcement")))
+ if (BTE->getZExtValue())
+ Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
- if ((Flags & ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI) == 0 &&
- any_of(M, [](const Function &F) {
- return F.hasFnAttribute("branch-target-enforcement");
- })) {
- errs() << "warning: some functions compiled with BTI and some compiled "
- "without BTI\n"
- << "warning: not setting BTI in feature flags\n";
- }
-
- if (any_of(M, [](const Function &F) {
- if (F.isDeclaration())
- return false;
- Attribute A = F.getFnAttribute("sign-return-address");
- return !A.isStringAttribute() || A.getValueAsString() == "none";
- })) {
- Flags &= ~ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
- }
+ if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address")))
+ if (Sign->getZExtValue())
+ Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
if (Flags == 0)
return;
// Emit a .note.gnu.property section with the flags.
- MCSection *Cur = OutStreamer->getCurrentSectionOnly();
- MCSection *Nt = MMI->getContext().getELFSection(
- ".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC);
- OutStreamer->SwitchSection(Nt);
-
- // Emit the note header.
- emitAlignment(Align(8));
- OutStreamer->emitInt32(4); // data size for "GNU\0"
- OutStreamer->emitInt32(4 * 4); // Elf_Prop size
- OutStreamer->emitInt32(ELF::NT_GNU_PROPERTY_TYPE_0);
- OutStreamer->emitBytes(StringRef("GNU", 4)); // note name
-
- // Emit the PAC/BTI properties.
- OutStreamer->emitInt32(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND);
- OutStreamer->emitInt32(4); // data size
- OutStreamer->emitInt32(Flags); // data
- OutStreamer->emitInt32(0); // pad
-
- OutStreamer->endSection(Nt);
- OutStreamer->SwitchSection(Cur);
+ if (auto *TS = static_cast<AArch64TargetStreamer *>(
+ OutStreamer->getTargetStreamer()))
+ TS->emitNoteSection(Flags);
}
void AArch64AsmPrinter::emitFunctionHeaderComment() {
@@ -333,7 +303,7 @@ void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" +
utostr(AccessInfo);
if (IsShort)
- SymName += "_short";
+ SymName += "_short_v2";
Sym = OutContext.getOrCreateSymbol(SymName);
}
@@ -350,6 +320,7 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
assert(TT.isOSBinFormatELF());
std::unique_ptr<MCSubtargetInfo> STI(
TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
+ assert(STI && "Unable to create subtarget info");
MCSymbol *HwasanTagMismatchV1Sym =
OutContext.getOrCreateSymbol("__hwasan_tag_mismatch");
@@ -369,6 +340,15 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
IsShort ? HwasanTagMismatchV2Ref : HwasanTagMismatchV1Ref;
MCSymbol *Sym = P.second;
+ bool HasMatchAllTag =
+ (AccessInfo >> HWASanAccessInfo::HasMatchAllShift) & 1;
+ uint8_t MatchAllTag =
+ (AccessInfo >> HWASanAccessInfo::MatchAllShift) & 0xff;
+ unsigned Size =
+ 1 << ((AccessInfo >> HWASanAccessInfo::AccessSizeShift) & 0xf);
+ bool CompileKernel =
+ (AccessInfo >> HWASanAccessInfo::CompileKernelShift) & 1;
+
OutStreamer->SwitchSection(OutContext.getELFSection(
".text.hot", ELF::SHT_PROGBITS,
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
@@ -379,19 +359,20 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
OutStreamer->emitSymbolAttribute(Sym, MCSA_Hidden);
OutStreamer->emitLabel(Sym);
- OutStreamer->emitInstruction(MCInstBuilder(AArch64::UBFMXri)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::SBFMXri)
.addReg(AArch64::X16)
.addReg(Reg)
.addImm(4)
.addImm(55),
*STI);
- OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDRBBroX)
- .addReg(AArch64::W16)
- .addReg(AArch64::X9)
- .addReg(AArch64::X16)
- .addImm(0)
- .addImm(0),
- *STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::LDRBBroX)
+ .addReg(AArch64::W16)
+ .addReg(IsShort ? AArch64::X20 : AArch64::X9)
+ .addReg(AArch64::X16)
+ .addImm(0)
+ .addImm(0),
+ *STI);
OutStreamer->emitInstruction(
MCInstBuilder(AArch64::SUBSXrs)
.addReg(AArch64::XZR)
@@ -412,6 +393,26 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI);
OutStreamer->emitLabel(HandleMismatchOrPartialSym);
+ if (HasMatchAllTag) {
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::UBFMXri)
+ .addReg(AArch64::X16)
+ .addReg(Reg)
+ .addImm(56)
+ .addImm(63),
+ *STI);
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSXri)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X16)
+ .addImm(MatchAllTag)
+ .addImm(0),
+ *STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::Bcc)
+ .addImm(AArch64CC::EQ)
+ .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)),
+ *STI);
+ }
+
if (IsShort) {
OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSWri)
.addReg(AArch64::WZR)
@@ -432,7 +433,6 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
.addReg(Reg)
.addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
*STI);
- unsigned Size = 1 << (AccessInfo & 0xf);
if (Size != 1)
OutStreamer->emitInstruction(MCInstBuilder(AArch64::ADDXri)
.addReg(AArch64::X17)
@@ -500,32 +500,41 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
.addReg(Reg)
.addImm(0),
*STI);
- OutStreamer->emitInstruction(MCInstBuilder(AArch64::MOVZXi)
- .addReg(AArch64::X1)
- .addImm(AccessInfo)
- .addImm(0),
- *STI);
-
- // Intentionally load the GOT entry and branch to it, rather than possibly
- // late binding the function, which may clobber the registers before we have
- // a chance to save them.
OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::ADRP)
- .addReg(AArch64::X16)
- .addExpr(AArch64MCExpr::create(
- HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE,
- OutContext)),
+ MCInstBuilder(AArch64::MOVZXi)
+ .addReg(AArch64::X1)
+ .addImm(AccessInfo & HWASanAccessInfo::RuntimeMask)
+ .addImm(0),
*STI);
- OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::LDRXui)
- .addReg(AArch64::X16)
- .addReg(AArch64::X16)
- .addExpr(AArch64MCExpr::create(
- HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12,
- OutContext)),
- *STI);
- OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI);
+
+ if (CompileKernel) {
+ // The Linux kernel's dynamic loader doesn't support GOT relative
+ // relocations, but it doesn't support late binding either, so just call
+ // the function directly.
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::B).addExpr(HwasanTagMismatchRef), *STI);
+ } else {
+ // Intentionally load the GOT entry and branch to it, rather than possibly
+ // late binding the function, which may clobber the registers before we
+ // have a chance to save them.
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::ADRP)
+ .addReg(AArch64::X16)
+ .addExpr(AArch64MCExpr::create(
+ HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE,
+ OutContext)),
+ *STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::LDRXui)
+ .addReg(AArch64::X16)
+ .addReg(AArch64::X16)
+ .addExpr(AArch64MCExpr::create(
+ HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12,
+ OutContext)),
+ *STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI);
+ }
}
}
@@ -541,7 +550,11 @@ void AArch64AsmPrinter::emitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
+
+ // Emit stack and fault map information.
emitStackMaps(SM);
+ FM.serializeToFaultMapSection();
+
}
void AArch64AsmPrinter::EmitLOHs() {
@@ -634,7 +647,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
const TargetRegisterInfo *RI = STI->getRegisterInfo();
Register Reg = MO.getReg();
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
- assert(RI->regsOverlap(RegToPrint, Reg));
+ if (!RI->regsOverlap(RegToPrint, Reg))
+ return true;
O << AArch64InstPrinter::getRegisterName(RegToPrint, AltName);
return false;
}
@@ -795,33 +809,25 @@ void AArch64AsmPrinter::emitJumpTableInfo() {
emitAlignment(Align(Size));
OutStreamer->emitLabel(GetJTISymbol(JTI));
- for (auto *JTBB : JTBBs)
- emitJumpTableEntry(MJTI, JTBB, JTI);
- }
-}
+ const MCSymbol *BaseSym = AArch64FI->getJumpTableEntryPCRelSymbol(JTI);
+ const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
-void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
- const MachineBasicBlock *MBB,
- unsigned JTI) {
- const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
- auto AFI = MF->getInfo<AArch64FunctionInfo>();
- unsigned Size = AFI->getJumpTableEntrySize(JTI);
+ for (auto *JTBB : JTBBs) {
+ const MCExpr *Value =
+ MCSymbolRefExpr::create(JTBB->getSymbol(), OutContext);
- if (Size == 4) {
- // .word LBB - LJTI
- const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
- const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext);
- Value = MCBinaryExpr::createSub(Value, Base, OutContext);
- } else {
- // .byte (LBB - LBB) >> 2 (or .hword)
- const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI);
- const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
- Value = MCBinaryExpr::createSub(Value, Base, OutContext);
- Value = MCBinaryExpr::createLShr(
- Value, MCConstantExpr::create(2, OutContext), OutContext);
- }
+ // Each entry is:
+ // .byte/.hword (LBB - Lbase)>>2
+ // or plain:
+ // .word LBB - Lbase
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ if (Size != 4)
+ Value = MCBinaryExpr::createLShr(
+ Value, MCConstantExpr::create(2, OutContext), OutContext);
- OutStreamer->emitValue(Value, Size);
+ OutStreamer->emitValue(Value, Size);
+ }
+ }
}
void AArch64AsmPrinter::emitFunctionEntryLabel() {
@@ -845,9 +851,9 @@ void AArch64AsmPrinter::emitFunctionEntryLabel() {
///
/// adr xDest, .LBB0_0
/// ldrb wScratch, [xTable, xEntry] (with "lsl #1" for ldrh).
-/// add xDest, xDest, xScratch, lsl #2
-void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
- const llvm::MachineInstr &MI) {
+/// add xDest, xDest, xScratch (with "lsl #2" for smaller entries)
+void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
+ const llvm::MachineInstr &MI) {
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
Register ScratchRegW =
@@ -855,33 +861,50 @@ void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
Register TableReg = MI.getOperand(2).getReg();
Register EntryReg = MI.getOperand(3).getReg();
int JTIdx = MI.getOperand(4).getIndex();
- bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8;
+ int Size = AArch64FI->getJumpTableEntrySize(JTIdx);
// This has to be first because the compression pass based its reachability
// calculations on the start of the JumpTableDest instruction.
auto Label =
MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
+
+ // If we don't already have a symbol to use as the base, use the ADR
+ // instruction itself.
+ if (!Label) {
+ Label = MF->getContext().createTempSymbol();
+ AArch64FI->setJumpTableEntryInfo(JTIdx, Size, Label);
+ OutStreamer.emitLabel(Label);
+ }
+
+ auto LabelExpr = MCSymbolRefExpr::create(Label, MF->getContext());
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
.addReg(DestReg)
- .addExpr(MCSymbolRefExpr::create(
- Label, MF->getContext())));
+ .addExpr(LabelExpr));
// Load the number of instruction-steps to offset from the label.
- unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX;
+ unsigned LdrOpcode;
+ switch (Size) {
+ case 1: LdrOpcode = AArch64::LDRBBroX; break;
+ case 2: LdrOpcode = AArch64::LDRHHroX; break;
+ case 4: LdrOpcode = AArch64::LDRSWroX; break;
+ default:
+ llvm_unreachable("Unknown jump table size");
+ }
+
EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
- .addReg(ScratchRegW)
+ .addReg(Size == 4 ? ScratchReg : ScratchRegW)
.addReg(TableReg)
.addReg(EntryReg)
.addImm(0)
- .addImm(IsByteEntry ? 0 : 1));
+ .addImm(Size == 1 ? 0 : 1));
- // Multiply the steps by 4 and add to the already materialized base label
- // address.
+ // Add to the already materialized base label address, multiplying by 4 if
+ // compressed.
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
.addReg(DestReg)
.addReg(DestReg)
.addReg(ScratchReg)
- .addImm(2));
+ .addImm(Size == 4 ? 0 : 2));
}
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
@@ -959,6 +982,83 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
}
+void AArch64AsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ StatepointOpers SOpers(&MI);
+ if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
+ assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+ for (unsigned i = 0; i < PatchBytes; i += 4)
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
+ } else {
+ // Lower call target and choose correct opcode
+ const MachineOperand &CallTarget = SOpers.getCallTarget();
+ MCOperand CallTargetMCOp;
+ unsigned CallOpcode;
+ switch (CallTarget.getType()) {
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCInstLowering.lowerOperand(CallTarget, CallTargetMCOp);
+ CallOpcode = AArch64::BL;
+ break;
+ case MachineOperand::MO_Immediate:
+ CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
+ CallOpcode = AArch64::BL;
+ break;
+ case MachineOperand::MO_Register:
+ CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
+ CallOpcode = AArch64::BLR;
+ break;
+ default:
+ llvm_unreachable("Unsupported operand type in statepoint call target");
+ break;
+ }
+
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(CallOpcode).addOperand(CallTargetMCOp));
+ }
+
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.emitLabel(MILabel);
+ SM.recordStatepoint(*MILabel, MI);
+}
+
+void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
+ // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
+ // <opcode>, <operands>
+
+ Register DefRegister = FaultingMI.getOperand(0).getReg();
+ FaultMaps::FaultKind FK =
+ static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
+ MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
+ unsigned Opcode = FaultingMI.getOperand(3).getImm();
+ unsigned OperandsBeginIdx = 4;
+
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *FaultingLabel = Ctx.createTempSymbol();
+ OutStreamer->emitLabel(FaultingLabel);
+
+ assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
+ FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
+
+ MCInst MI;
+ MI.setOpcode(Opcode);
+
+ if (DefRegister != (Register)0)
+ MI.addOperand(MCOperand::createReg(DefRegister));
+
+ for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
+ E = FaultingMI.operands_end();
+ I != E; ++I) {
+ MCOperand Dest;
+ lowerOperand(*I, Dest);
+ MI.addOperand(Dest);
+ }
+
+ OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
+ OutStreamer->emitInstruction(MI, getSubtargetInfo());
+}
+
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
Register DestReg = MI.getOperand(0).getReg();
if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
@@ -1172,17 +1272,28 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, Adrp);
MCInst Ldr;
- Ldr.setOpcode(AArch64::LDRXui);
- Ldr.addOperand(MCOperand::createReg(AArch64::X1));
+ if (STI->isTargetILP32()) {
+ Ldr.setOpcode(AArch64::LDRWui);
+ Ldr.addOperand(MCOperand::createReg(AArch64::W1));
+ } else {
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::createReg(AArch64::X1));
+ }
Ldr.addOperand(MCOperand::createReg(AArch64::X0));
Ldr.addOperand(SymTLSDescLo12);
Ldr.addOperand(MCOperand::createImm(0));
EmitToStreamer(*OutStreamer, Ldr);
MCInst Add;
- Add.setOpcode(AArch64::ADDXri);
- Add.addOperand(MCOperand::createReg(AArch64::X0));
- Add.addOperand(MCOperand::createReg(AArch64::X0));
+ if (STI->isTargetILP32()) {
+ Add.setOpcode(AArch64::ADDWri);
+ Add.addOperand(MCOperand::createReg(AArch64::W0));
+ Add.addOperand(MCOperand::createReg(AArch64::W0));
+ } else {
+ Add.setOpcode(AArch64::ADDXri);
+ Add.addOperand(MCOperand::createReg(AArch64::X0));
+ Add.addOperand(MCOperand::createReg(AArch64::X0));
+ }
Add.addOperand(SymTLSDescLo12);
Add.addOperand(MCOperand::createImm(AArch64_AM::getShiftValue(0)));
EmitToStreamer(*OutStreamer, Add);
@@ -1202,30 +1313,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
- case AArch64::JumpTableDest32: {
- // We want:
- // ldrsw xScratch, [xTable, xEntry, lsl #2]
- // add xDest, xTable, xScratch
- unsigned DestReg = MI->getOperand(0).getReg(),
- ScratchReg = MI->getOperand(1).getReg(),
- TableReg = MI->getOperand(2).getReg(),
- EntryReg = MI->getOperand(3).getReg();
- EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
- .addReg(ScratchReg)
- .addReg(TableReg)
- .addReg(EntryReg)
- .addImm(0)
- .addImm(1));
- EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
- .addReg(DestReg)
- .addReg(TableReg)
- .addReg(ScratchReg)
- .addImm(0));
- return;
- }
+ case AArch64::JumpTableDest32:
case AArch64::JumpTableDest16:
case AArch64::JumpTableDest8:
- LowerJumpTableDestSmall(*OutStreamer, *MI);
+ LowerJumpTableDest(*OutStreamer, *MI);
return;
case AArch64::FMOVH0:
@@ -1240,6 +1331,12 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
case TargetOpcode::PATCHPOINT:
return LowerPATCHPOINT(*OutStreamer, SM, *MI);
+ case TargetOpcode::STATEPOINT:
+ return LowerSTATEPOINT(*OutStreamer, SM, *MI);
+
+ case TargetOpcode::FAULTING_OP:
+ return LowerFAULTING_OP(*MI);
+
case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
LowerPATCHABLE_FUNCTION_ENTER(*MI);
return;
@@ -1284,6 +1381,14 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
case AArch64::SEH_SaveRegP:
+ if (MI->getOperand(1).getImm() == 30 && MI->getOperand(0).getImm() >= 19 &&
+ MI->getOperand(0).getImm() <= 28) {
+ assert((MI->getOperand(0).getImm() - 19) % 2 == 0 &&
+ "Register paired with LR must be odd");
+ TS->EmitARM64WinCFISaveLRPair(MI->getOperand(0).getImm(),
+ MI->getOperand(2).getImm());
+ return;
+ }
assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
"Non-consecutive registers not allowed for save_regp");
TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(),
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
index 1956014b738d..d3b5166585c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -16,6 +16,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -57,13 +58,13 @@ FunctionPass *llvm::createAArch64BranchTargetsPass() {
}
bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
- const Function &F = MF.getFunction();
- if (!F.hasFnAttribute("branch-target-enforcement"))
+ if (!MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
return false;
LLVM_DEBUG(
dbgs() << "********** AArch64 Branch Targets **********\n"
<< "********** Function: " << MF.getName() << '\n');
+ const Function &F = MF.getFunction();
// LLVM does not consider basic blocks which are the targets of jump tables
// to be address-taken (the address can't escape anywhere else), but they are
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
index 9ae2b465e247..c51dd48cab34 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -42,6 +42,51 @@ static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
CCState &State, Align SlotAlign) {
+ if (LocVT.isScalableVector()) {
+ const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+ State.getMachineFunction().getSubtarget());
+ const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
+
+ // We are about to reinvoke the CCAssignFn auto-generated handler. If we
+ // don't unset these flags we will get stuck in an infinite loop forever
+ // invoking the custom handler.
+ ArgFlags.setInConsecutiveRegs(false);
+ ArgFlags.setInConsecutiveRegsLast(false);
+
+ // The calling convention for passing SVE tuples states that in the event
+ // we cannot allocate enough registers for the tuple we should still leave
+ // any remaining registers unallocated. However, when we call the
+ // CCAssignFn again we want it to behave as if all remaining registers are
+ // allocated. This will force the code to pass the tuple indirectly in
+ // accordance with the PCS.
+ bool RegsAllocated[8];
+ for (int I = 0; I < 8; I++) {
+ RegsAllocated[I] = State.isAllocated(ZRegList[I]);
+ State.AllocateReg(ZRegList[I]);
+ }
+
+ auto &It = PendingMembers[0];
+ CCAssignFn *AssignFn =
+ TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
+ if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
+ ArgFlags, State))
+ llvm_unreachable("Call operand has unhandled type");
+
+ // Return the flags to how they were before.
+ ArgFlags.setInConsecutiveRegs(true);
+ ArgFlags.setInConsecutiveRegsLast(true);
+
+ // Return the register state back to how it was before, leaving any
+ // unallocated registers available for other smaller types.
+ for (int I = 0; I < 8; I++)
+ if (!RegsAllocated[I])
+ State.DeallocateReg(ZRegList[I]);
+
+ // All pending members have now been allocated
+ PendingMembers.clear();
+ return true;
+ }
+
unsigned Size = LocVT.getSizeInBits() / 8;
const Align StackAlign =
State.getMachineFunction().getDataLayout().getStackAlignment();
@@ -146,13 +191,11 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
- if (LocVT.isScalableVector())
- report_fatal_error(
- "Passing consecutive scalable vector registers unsupported");
-
- // Mark all regs in the class as unavailable
- for (auto Reg : RegList)
- State.AllocateReg(Reg);
+ if (!LocVT.isScalableVector()) {
+ // Mark all regs in the class as unavailable
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
+ }
const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
index aa41cae289e8..b1e714653f46 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -19,7 +19,6 @@ def fconstant_to_constant : GICombineRule<
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
- elide_br_by_inverting_cond,
fconstant_to_constant]> {
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
let StateClass = "AArch64PreLegalizerCombinerHelperState";
@@ -76,9 +75,68 @@ def ext: GICombineRule <
// instruction.
def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>;
+def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
+def vashr_vlshr_imm : GICombineRule<
+ (defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
+ (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+ [{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
+>;
+
+def form_duplane_matchdata :
+ GIDefMatchData<"std::pair<unsigned, int>">;
+def form_duplane : GICombineRule <
+ (defs root:$root, form_duplane_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def adjust_icmp_imm_matchdata :
+ GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
+def adjust_icmp_imm : GICombineRule <
+ (defs root:$root, adjust_icmp_imm_matchdata:$matchinfo),
+ (match (wip_match_opcode G_ICMP):$root,
+ [{ return matchAdjustICmpImmAndPred(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }])
+>;
+
+def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>;
+
+def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
+def extractvecelt_pairwise_add : GICombineRule<
+ (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
+ (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
+ [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">;
+def mul_const : GICombineRule<
+ (defs root:$root, mul_const_matchdata:$matchinfo),
+ (match (wip_match_opcode G_MUL):$root,
+ [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+// Post-legalization combines which should happen at all optimization levels.
+// (E.g. ones that facilitate matching for the selector) For example, matching
+// pseudos.
+def AArch64PostLegalizerLoweringHelper
+ : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
+ [shuffle_vector_pseudos, vashr_vlshr_imm,
+ icmp_lowering, form_duplane]> {
+ let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
+}
+
+// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombinerHelper
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
- [erase_undef_store, combines_for_extload,
- sext_already_extended, shuffle_vector_pseudos]> {
+ [copy_prop, erase_undef_store, combines_for_extload,
+ sext_trunc_sextload,
+ hoist_logic_op_with_same_opcode_hands,
+ redundant_and, xor_of_and_with_same_reg,
+ extractvecelt_pairwise_add, redundant_or,
+ mul_const]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
index 57dc8a4061f1..2328a8b4deb8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -37,8 +37,13 @@ class AArch64CompressJumpTables : public MachineFunctionPass {
MachineFunction *MF;
SmallVector<int, 8> BlockInfo;
- int computeBlockSize(MachineBasicBlock &MBB);
- void scanFunction();
+ /// Returns the size in instructions of the block \p MBB, or None if we
+ /// couldn't get a safe upper bound.
+ Optional<int> computeBlockSize(MachineBasicBlock &MBB);
+
+ /// Gather information about the function, returns false if we can't perform
+ /// this optimization for some reason.
+ bool scanFunction();
bool compressJumpTable(MachineInstr &MI, int Offset);
@@ -59,19 +64,27 @@ public:
}
};
char AArch64CompressJumpTables::ID = 0;
-}
+} // namespace
INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE,
"AArch64 compress jump tables pass", false, false)
-int AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
+Optional<int>
+AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
int Size = 0;
- for (const MachineInstr &MI : MBB)
+ for (const MachineInstr &MI : MBB) {
+ // Inline asm may contain some directives like .bytes which we don't
+ // currently have the ability to parse accurately. To be safe, just avoid
+ // computing a size and bail out.
+ if (MI.getOpcode() == AArch64::INLINEASM ||
+ MI.getOpcode() == AArch64::INLINEASM_BR)
+ return None;
Size += TII->getInstSizeInBytes(MI);
+ }
return Size;
}
-void AArch64CompressJumpTables::scanFunction() {
+bool AArch64CompressJumpTables::scanFunction() {
BlockInfo.clear();
BlockInfo.resize(MF->getNumBlockIDs());
@@ -84,8 +97,12 @@ void AArch64CompressJumpTables::scanFunction() {
else
AlignedOffset = alignTo(Offset, Alignment);
BlockInfo[MBB.getNumber()] = AlignedOffset;
- Offset = AlignedOffset + computeBlockSize(MBB);
+ auto BlockSize = computeBlockSize(MBB);
+ if (!BlockSize)
+ return false;
+ Offset = AlignedOffset + *BlockSize;
}
+ return true;
}
bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
@@ -104,7 +121,7 @@ bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
int MaxOffset = std::numeric_limits<int>::min(),
MinOffset = std::numeric_limits<int>::max();
MachineBasicBlock *MinBlock = nullptr;
- for (auto Block : JT.MBBs) {
+ for (auto *Block : JT.MBBs) {
int BlockOffset = BlockInfo[Block->getNumber()];
assert(BlockOffset % 4 == 0 && "misaligned basic block");
@@ -124,13 +141,14 @@ bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
}
int Span = MaxOffset - MinOffset;
- auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ auto *AFI = MF->getInfo<AArch64FunctionInfo>();
if (isUInt<8>(Span / 4)) {
AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol());
MI.setDesc(TII->get(AArch64::JumpTableDest8));
++NumJT8;
return true;
- } else if (isUInt<16>(Span / 4)) {
+ }
+ if (isUInt<16>(Span / 4)) {
AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol());
MI.setDesc(TII->get(AArch64::JumpTableDest16));
++NumJT16;
@@ -151,7 +169,8 @@ bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
if (ST.force32BitJumpTables() && !MF->getFunction().hasMinSize())
return false;
- scanFunction();
+ if (!scanFunction())
+ return false;
for (MachineBasicBlock &MBB : *MF) {
int Offset = BlockInfo[MBB.getNumber()];
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 9e65ad2e18f9..e57650ae60b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -83,6 +83,8 @@ private:
bool expandSVESpillFill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opc,
unsigned N);
+ bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
};
} // end anonymous namespace
@@ -627,6 +629,46 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
return true;
}
+bool AArch64ExpandPseudo::expandCALL_RVMARKER(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+ // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
+ // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
+ // in between.
+ MachineInstr &MI = *MBBI;
+
+ MachineInstr *OriginalCall;
+ MachineOperand &CallTarget = MI.getOperand(0);
+ assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
+ "invalid operand for regular call");
+ unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
+ OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
+ OriginalCall->addOperand(CallTarget);
+
+ unsigned RegMaskStartIdx = 1;
+ // Skip register arguments. Those are added during ISel, but are not
+ // needed for the concrete branch.
+ while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
+ assert(MI.getOperand(RegMaskStartIdx).isReg() &&
+ "should only skip register operands");
+ RegMaskStartIdx++;
+ }
+ for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
+ OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
+
+ auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
+ .addReg(AArch64::FP, RegState::Define)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::FP)
+ .addImm(0)
+ .getInstr();
+ if (MI.shouldUpdateCallSiteInfo())
+ MBB.getParent()->moveCallSiteInfo(&MI, Marker);
+ MI.eraseFromParent();
+ finalizeBundle(MBB, OriginalCall->getIterator(),
+ std::next(Marker->getIterator()));
+ return true;
+}
+
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -1014,6 +1056,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
case AArch64::LDR_ZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
+ case AArch64::BLR_RVMARKER:
+ return expandCALL_RVMARKER(MBB, MBBI);
}
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 538863ebe95a..209f9f7255a5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -54,7 +54,7 @@
using namespace llvm;
-#define DEBUG_TYPE "falkor-hwpf-fix"
+#define DEBUG_TYPE "aarch64-falkor-hwpf-fix"
STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
STATISTIC(NumCollisionsAvoided,
@@ -146,7 +146,7 @@ bool FalkorMarkStridedAccesses::run() {
bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) {
// Only mark strided loads in the inner-most loop
- if (!L.empty())
+ if (!L.isInnermost())
return false;
bool MadeChange = false;
@@ -224,10 +224,10 @@ struct LoadInfo {
char FalkorHWPFFix::ID = 0;
-INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "falkor-hwpf-fix-late",
+INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
"Falkor HW Prefetch Fix Late Phase", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(FalkorHWPFFix, "falkor-hwpf-fix-late",
+INITIALIZE_PASS_END(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
"Falkor HW Prefetch Fix Late Phase", false, false)
static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) {
@@ -830,7 +830,7 @@ bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
for (MachineLoop *I : LI)
for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
// Only process inner-loops
- if (L->empty())
+ if (L->isInnermost())
runOnLoop(**L, Fn);
return Modified;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 0f63f4ca62e5..9801036653f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3409,8 +3409,7 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
const Value *RHS = II->getArgOperand(1);
// Canonicalize immediate to the RHS.
- if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
- isCommutativeIntrinsic(II))
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
std::swap(LHS, RHS);
// Simplify multiplies.
@@ -3652,14 +3651,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
- case Intrinsic::debugtrap: {
- if (Subtarget->isTargetWindows()) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
- .addImm(0xF000);
- return true;
- }
- break;
- }
+ case Intrinsic::debugtrap:
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
+ .addImm(0xF000);
+ return true;
case Intrinsic::sqrt: {
Type *RetTy = II->getCalledFunction()->getReturnType();
@@ -3701,8 +3696,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
const Value *LHS = II->getArgOperand(0);
const Value *RHS = II->getArgOperand(1);
// Canonicalize immediate to the RHS.
- if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
- isCommutativeIntrinsic(II))
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
std::swap(LHS, RHS);
// Simplify multiplies.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c6cc6e9e8471..65ee5016042c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -116,7 +116,6 @@
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
-#include "AArch64StackOffset.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -176,6 +175,10 @@ static cl::opt<bool> StackTaggingMergeSetTag(
cl::desc("merge settag instruction in function epilog"), cl::init(true),
cl::Hidden);
+static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
+ cl::desc("sort stack allocations"),
+ cl::init(true), cl::Hidden);
+
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
/// Returns the argument pop size.
@@ -246,7 +249,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
TargetStackID::Value
AArch64FrameLowering::getStackIDForScalableVectors() const {
- return TargetStackID::SVEVector;
+ return TargetStackID::ScalableVector;
}
/// Returns the size of the fixed object area (allocated next to sp on entry)
@@ -270,7 +273,7 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
/// Returns the size of the entire SVE stackframe (calleesaves + spills).
static StackOffset getSVEStackSize(const MachineFunction &MF) {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8};
+ return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
}
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
@@ -362,44 +365,19 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
// Most call frames will be allocated at the start of a function so
// this is OK, but it is a limitation that needs dealing with.
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
- emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8},
- TII);
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(Amount), TII);
}
} else if (CalleePopAmount != 0) {
// If the calling convention demands that the callee pops arguments from the
// stack, we want to add it back if we have a reserved call frame.
assert(CalleePopAmount < 0xffffff && "call frame too large");
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
- {-(int64_t)CalleePopAmount, MVT::i8}, TII);
+ StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
}
return MBB.erase(I);
}
-static bool ShouldSignReturnAddress(MachineFunction &MF) {
- // The function should be signed in the following situations:
- // - sign-return-address=all
- // - sign-return-address=non-leaf and the functions spills the LR
-
- const Function &F = MF.getFunction();
- if (!F.hasFnAttribute("sign-return-address"))
- return false;
-
- StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
- if (Scope.equals("none"))
- return false;
-
- if (Scope.equals("all"))
- return true;
-
- assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
-
- for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
- if (Info.getReg() == AArch64::LR)
- return true;
-
- return false;
-}
-
// Convenience function to create a DWARF expression for
// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr,
@@ -435,7 +413,8 @@ static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr,
MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP(
const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const {
int64_t NumBytes, NumVGScaledBytes;
- OffsetFromSP.getForDwarfOffset(NumBytes, NumVGScaledBytes);
+ AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes,
+ NumVGScaledBytes);
std::string CommentBuffer = "sp";
llvm::raw_string_ostream Comment(CommentBuffer);
@@ -462,7 +441,8 @@ MCCFIInstruction AArch64FrameLowering::createCfaOffset(
const TargetRegisterInfo &TRI, unsigned Reg,
const StackOffset &OffsetFromDefCFA) const {
int64_t NumBytes, NumVGScaledBytes;
- OffsetFromDefCFA.getForDwarfOffset(NumBytes, NumVGScaledBytes);
+ AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
+ OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
@@ -516,14 +496,14 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
continue;
StackOffset Offset;
- if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::SVEVector) {
+ if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- Offset = StackOffset(MFI.getObjectOffset(Info.getFrameIdx()), MVT::nxv1i8) -
- StackOffset(AFI->getCalleeSavedStackSize(MFI), MVT::i8);
+ Offset =
+ StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
} else {
- Offset = {MFI.getObjectOffset(Info.getFrameIdx()) -
- getOffsetOfLocalArea(),
- MVT::i8};
+ Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) -
+ getOffsetOfLocalArea());
}
unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
@@ -604,6 +584,12 @@ static bool windowsRequiresStackProbe(MachineFunction &MF,
!F.hasFnAttribute("no-stack-arg-probe");
}
+static bool needsWinCFI(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ F.needsUnwindTableEntry();
+}
+
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
MachineFunction &MF, uint64_t StackBumpBytes) const {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -614,6 +600,18 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
if (AFI->getLocalStackSize() == 0)
return false;
+ // For WinCFI, if optimizing for size, prefer to not combine the stack bump
+ // (to force a stp with predecrement) to match the packed unwind format,
+ // provided that there actually are any callee saved registers to merge the
+ // decrement with.
+ // This is potentially marginally slower, but allows using the packed
+ // unwind format for functions that both have a local area and callee saved
+ // registers. Using the packed unwind format notably reduces the size of
+ // the unwind info.
+ if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
+ MF.getFunction().hasOptSize())
+ return false;
+
// 512 is the maximum immediate for stp/ldp that will be used for
// callee-save save/restores
if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
@@ -1007,27 +1005,6 @@ static void adaptForLdStOpt(MachineBasicBlock &MBB,
//
}
-static bool ShouldSignWithAKey(MachineFunction &MF) {
- const Function &F = MF.getFunction();
- if (!F.hasFnAttribute("sign-return-address-key"))
- return true;
-
- const StringRef Key =
- F.getFnAttribute("sign-return-address-key").getValueAsString();
- assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
- return Key.equals_lower("a_key");
-}
-
-static bool needsWinCFI(const MachineFunction &MF) {
- const Function &F = MF.getFunction();
- return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
- F.needsUnwindTableEntry();
-}
-
-static bool isTargetDarwin(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
-}
-
static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
@@ -1074,15 +1051,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;
- if (ShouldSignReturnAddress(MF)) {
- if (ShouldSignWithAKey(MF))
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
- .setMIFlag(MachineInstr::FrameSetup);
- else {
+ const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
+ if (MFnI.shouldSignReturnAddress()) {
+ if (MFnI.shouldSignWithBKey()) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
.setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
+ .setMIFlag(MachineInstr::FrameSetup);
}
unsigned CFIIndex =
@@ -1097,9 +1075,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
- // Set tagged base pointer to the bottom of the stack frame.
+ // Set tagged base pointer to the requested stack slot.
// Ideally it should match SP value after prologue.
- AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+ Optional<int> TBPI = AFI->getTaggedBasePointerIndex();
+ if (TBPI)
+ AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
+ else
+ AFI->setTaggedBasePointerOffset(MFI.getStackSize());
const StackOffset &SVEStackSize = getSVEStackSize(MF);
@@ -1126,8 +1108,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++NumRedZoneFunctions;
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
- false, NeedsWinCFI, &HasWinCFI);
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (!NeedsWinCFI && needsFrameMoves) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
@@ -1160,8 +1142,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false,
- NeedsWinCFI, &HasWinCFI);
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
NumBytes = 0;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
@@ -1185,7 +1167,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// For funclets the FP belongs to the containing function.
if (!IsFunclet && HasFP) {
// Only set up FP if we actually need to.
- int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
+ int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
@@ -1195,8 +1177,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
- {FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false,
- NeedsWinCFI, &HasWinCFI);
+ StackOffset::getFixed(FPOffset), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
@@ -1306,7 +1288,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
CalleeSavesEnd = MBBI;
- AllocateBefore = {CalleeSavedSize, MVT::nxv1i8};
+ AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
AllocateAfter = SVEStackSize - AllocateBefore;
}
@@ -1338,8 +1320,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
- {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
- false, NeedsWinCFI, &HasWinCFI);
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (NeedsRealignment) {
const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
@@ -1409,11 +1391,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
if (needsFrameMoves) {
- const DataLayout &TD = MF.getDataLayout();
- const int StackGrowth = isTargetDarwin(MF)
- ? (2 * -TD.getPointerSize(0))
- : -AFI->getCalleeSavedStackSize();
- Register FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
// .globl __foo
@@ -1481,10 +1458,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// .cfi_offset w28, -32
if (HasFP) {
+ const int OffsetToFirstCalleeSaveFromFP =
+ AFI->getCalleeSaveBaseToFrameRecordOffset() -
+ AFI->getCalleeSavedStackSize();
+ Register FramePtr = RegInfo->getFrameRegister(MF);
+
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth));
+ MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -1494,7 +1476,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
StackOffset TotalSize =
- SVEStackSize + StackOffset((int64_t)MFI.getStackSize(), MVT::i8);
+ SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize));
} else {
// Encode the stack size of the leaf function.
@@ -1514,7 +1496,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
static void InsertReturnAddressAuth(MachineFunction &MF,
MachineBasicBlock &MBB) {
- if (!ShouldSignReturnAddress(MF))
+ const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
+ if (!MFI.shouldSignReturnAddress())
return;
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -1528,16 +1511,16 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
// this instruction can safely used for any v8a architecture.
// From v8.3a onwards there are optimised authenticate LR and return
// instructions, namely RETA{A,B}, that can be used instead.
- if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
+ if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
MBBI->getOpcode() == AArch64::RET_ReallyLR) {
BuildMI(MBB, MBBI, DL,
- TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
+ TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
.copyImplicitOps(*MBBI);
MBB.erase(MBBI);
} else {
BuildMI(
MBB, MBBI, DL,
- TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
+ TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
.setMIFlag(MachineInstr::FrameDestroy);
}
}
@@ -1562,10 +1545,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
bool NeedsWinCFI = needsWinCFI(MF);
bool HasWinCFI = false;
bool IsFunclet = false;
- auto WinCFI = make_scope_exit([&]() {
- if (!MF.hasWinCFI())
- MF.setHasWinCFI(HasWinCFI);
- });
+ auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); });
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
@@ -1665,7 +1645,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
NeedsWinCFI, &HasWinCFI);
}
- if (NeedsWinCFI) {
+ if (MF.hasWinCFI()) {
+ // If the prologue didn't contain any SEH opcodes and didn't set the
+ // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the
+ // EpilogStart - to avoid generating CFI for functions that don't need it.
+ // (And as we didn't generate any prologue at all, it would be asymmetrical
+ // to the epilogue.) By the end of the function, we assert that
+ // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption.
HasWinCFI = true;
BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
.setMIFlag(MachineInstr::FrameDestroy);
@@ -1677,9 +1663,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
- {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
- if (NeedsWinCFI && HasWinCFI)
+ StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
+ if (HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
@@ -1702,7 +1689,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
assert(IsSVECalleeSave(RestoreBegin) &&
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
- StackOffset CalleeSavedSizeAsOffset = {CalleeSavedSize, MVT::nxv1i8};
+ StackOffset CalleeSavedSizeAsOffset =
+ StackOffset::getScalable(CalleeSavedSize);
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
DeallocateAfter = CalleeSavedSizeAsOffset;
}
@@ -1715,14 +1703,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// be reloaded. The code below will deallocate the stack space
// space by moving FP -> SP.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
- {-CalleeSavedSize, MVT::nxv1i8}, TII,
+ StackOffset::getScalable(-CalleeSavedSize), TII,
MachineInstr::FrameDestroy);
} else {
if (AFI->getSVECalleeSavedStackSize()) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy);
+ StackOffset::getFixed(NumBytes), TII,
+ MachineInstr::FrameDestroy);
NumBytes = 0;
}
@@ -1755,11 +1744,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- {StackRestoreBytes, MVT::i8}, TII,
+ StackOffset::getFixed(StackRestoreBytes), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
if (Done) {
- if (NeedsWinCFI) {
- HasWinCFI = true;
+ if (HasWinCFI) {
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
@@ -1775,15 +1763,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
- int64_t OffsetToFrameRecord =
- isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
- {OffsetToFrameRecord, MVT::i8},
- TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ emitFrameOffset(
+ MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
+ StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI);
+ StackOffset::getFixed(NumBytes), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
@@ -1804,62 +1791,63 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
- {(int64_t)AfterCSRPopSize, MVT::i8}, TII,
+ StackOffset::getFixed((int64_t)AfterCSRPopSize), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
}
- if (NeedsWinCFI && HasWinCFI)
+ if (HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
-
- MF.setHasWinCFI(HasWinCFI);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
/// debug info. It's the same as what we use for resolving the code-gen
/// references for now. FIXME: This can go wrong when references are
/// SP-relative and simple call frames aren't used.
-int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
+StackOffset
+AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
return resolveFrameIndexReference(
- MF, FI, FrameReg,
- /*PreferFP=*/
- MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
- /*ForSimm=*/false)
- .getBytes();
+ MF, FI, FrameReg,
+ /*PreferFP=*/
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
+ /*ForSimm=*/false);
}
-int AArch64FrameLowering::getNonLocalFrameIndexReference(
- const MachineFunction &MF, int FI) const {
- return getSEHFrameIndexOffset(MF, FI);
+StackOffset
+AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
+ int FI) const {
+ return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
}
-static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) {
+static StackOffset getFPOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) {
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
-
unsigned FixedObject =
getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
- unsigned FPAdjust = isTargetDarwin(MF)
- ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo());
- return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
+ int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
+ int64_t FPAdjust =
+ CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
+ return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
}
-static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset) {
+static StackOffset getStackOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) {
const auto &MFI = MF.getFrameInfo();
- return {ObjectOffset + (int64_t)MFI.getStackSize(), MVT::i8};
+ return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
}
+ // TODO: This function currently does not work for scalable vectors.
int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
int FI) const {
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
- ? getFPOffset(MF, ObjectOffset).getBytes()
- : getStackOffset(MF, ObjectOffset).getBytes();
+ ? getFPOffset(MF, ObjectOffset).getFixed()
+ : getStackOffset(MF, ObjectOffset).getFixed();
}
StackOffset AArch64FrameLowering::resolveFrameIndexReference(
@@ -1868,7 +1856,7 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector;
+ bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
PreferFP, ForSimm);
}
@@ -1882,8 +1870,8 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- int64_t FPOffset = getFPOffset(MF, ObjectOffset).getBytes();
- int64_t Offset = getStackOffset(MF, ObjectOffset).getBytes();
+ int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
+ int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
@@ -1958,16 +1946,16 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
"non-argument/CSR objects cannot be accessed through the frame pointer");
if (isSVE) {
- int64_t OffsetToSVEArea =
- MFI.getStackSize() - AFI->getCalleeSavedStackSize();
- StackOffset FPOffset = {ObjectOffset, MVT::nxv1i8};
- StackOffset SPOffset = SVEStackSize +
- StackOffset(ObjectOffset, MVT::nxv1i8) +
- StackOffset(OffsetToSVEArea, MVT::i8);
+ StackOffset FPOffset =
+ StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
+ StackOffset SPOffset =
+ SVEStackSize +
+ StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
+ ObjectOffset);
// Always use the FP for SVE spills if available and beneficial.
if (hasFP(MF) &&
- (SPOffset.getBytes() ||
- FPOffset.getScalableBytes() < SPOffset.getScalableBytes() ||
+ (SPOffset.getFixed() ||
+ FPOffset.getScalable() < SPOffset.getScalable() ||
RegInfo->needsStackRealignment(MF))) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
@@ -1986,7 +1974,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return StackOffset(FPOffset, MVT::i8) + ScalableOffset;
+ return StackOffset::getFixed(FPOffset) + ScalableOffset;
}
// Use the base pointer if we have one.
@@ -2003,7 +1991,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
Offset -= AFI->getLocalStackSize();
}
- return StackOffset(Offset, MVT::i8) + ScalableOffset;
+ return StackOffset::getFixed(Offset) + ScalableOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -2025,21 +2013,28 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
- bool NeedsWinCFI) {
+ bool NeedsWinCFI, bool IsFirst) {
// If we are generating register pairs for a Windows function that requires
// EH support, then pair consecutive registers only. There are no unwind
// opcodes for saves/restores of non-consectuve register pairs.
- // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
+ // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
+ // save_lrpair.
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
- // TODO: LR can be paired with any register. We don't support this yet in
- // the MCLayer. We need to add support for the save_lrpair unwind code.
if (Reg2 == AArch64::FP)
return true;
if (!NeedsWinCFI)
return false;
if (Reg2 == Reg1 + 1)
return false;
+ // If pairing a GPR with LR, the pair can be described by the save_lrpair
+ // opcode. If this is the first register pair, it would end up with a
+ // predecrement, but there's no save_lrpair_x opcode, so we can only do this
+ // if LR is paired with something else than the first register.
+ // The save_lrpair opcode requires the first register to be an odd one.
+ if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
+ (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
+ return false;
return true;
}
@@ -2048,9 +2043,10 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
/// LR and FP need to be allocated together when the frame needs to save
/// the frame-record. This means any other register pairing with LR is invalid.
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
- bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord) {
+ bool UsesWinAAPCS, bool NeedsWinCFI,
+ bool NeedsFrameRecord, bool IsFirst) {
if (UsesWinAAPCS)
- return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI);
+ return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst);
// If we need to store the frame record, don't pair any register
// with LR other than FP.
@@ -2114,14 +2110,22 @@ static void computeCalleeSaveRegisterPairs(
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int ByteOffset = AFI->getCalleeSavedStackSize();
+ int StackFillDir = -1;
+ int RegInc = 1;
+ unsigned FirstReg = 0;
+ if (NeedsWinCFI) {
+ // For WinCFI, fill the stack from the bottom up.
+ ByteOffset = 0;
+ StackFillDir = 1;
+ // As the CSI array is reversed to match PrologEpilogInserter, iterate
+ // backwards, to pair up registers starting from lower numbered registers.
+ RegInc = -1;
+ FirstReg = Count - 1;
+ }
int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
- // On Linux, we will have either one or zero non-paired register. On Windows
- // with CFI, we can have multiple unpaired registers in order to utilize the
- // available unwind codes. This flag assures that the alignment fixup is done
- // only once, as intened.
- bool FixupDone = false;
- for (unsigned i = 0; i < Count; ++i) {
+ // When iterating backwards, the loop condition relies on unsigned wraparound.
+ for (unsigned i = FirstReg; i < Count; i += RegInc) {
RegPairInfo RPI;
RPI.Reg1 = CSI[i].getReg();
@@ -2139,18 +2143,20 @@ static void computeCalleeSaveRegisterPairs(
llvm_unreachable("Unsupported register class.");
// Add the next reg to the pair if it is in the same register class.
- if (i + 1 < Count) {
- unsigned NextReg = CSI[i + 1].getReg();
+ if (unsigned(i + RegInc) < Count) {
+ unsigned NextReg = CSI[i + RegInc].getReg();
+ bool IsFirst = i == FirstReg;
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
- !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, NeedsWinCFI,
- NeedsFrameRecord))
+ !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
+ NeedsWinCFI, NeedsFrameRecord, IsFirst))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR64:
if (AArch64::FPR64RegClass.contains(NextReg) &&
- !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+ !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
+ IsFirst))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR128:
@@ -2179,7 +2185,7 @@ static void computeCalleeSaveRegisterPairs(
// The order of the registers in the list is controlled by
// getCalleeSavedRegs(), so they will always be in-order, as well.
assert((!RPI.isPaired() ||
- (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
+ (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
"Out of order callee saved regs!");
assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
@@ -2201,39 +2207,72 @@ static void computeCalleeSaveRegisterPairs(
"Callee-save registers not saved as adjacent register pair!");
RPI.FrameIdx = CSI[i].getFrameIdx();
+ if (NeedsWinCFI &&
+ RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
+ RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
int Scale = RPI.getScale();
+
+ int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
+ assert(OffsetPre % Scale == 0);
+
if (RPI.isScalable())
- ScalableByteOffset -= Scale;
+ ScalableByteOffset += StackFillDir * Scale;
else
- ByteOffset -= RPI.isPaired() ? 2 * Scale : Scale;
+ ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
assert(!(RPI.isScalable() && RPI.isPaired()) &&
"Paired spill/fill instructions don't exist for SVE vectors");
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
- if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
+ if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI &&
!RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
!RPI.isPaired()) {
- FixupDone = true;
- ByteOffset -= 8;
+ ByteOffset += 8 * StackFillDir;
assert(ByteOffset % 16 == 0);
assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
+ // A stack frame with a gap looks like this, bottom up:
+ // d9, d8. x21, gap, x20, x19.
+ // Set extra alignment on the x21 object (the only unpaired register)
+ // to create the gap above it.
MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
}
- int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
- assert(Offset % Scale == 0);
+ int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
+ assert(OffsetPost % Scale == 0);
+ // If filling top down (default), we want the offset after incrementing it.
+ // If fillibg bootom up (WinCFI) we need the original offset.
+ int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
RPI.Offset = Offset / Scale;
assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
+ // Save the offset to frame record so that the FP register can point to the
+ // innermost frame record (spilled FP and LR registers).
+ if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
+ RPI.Reg2 == AArch64::FP) ||
+ (IsWindows && RPI.Reg1 == AArch64::FP &&
+ RPI.Reg2 == AArch64::LR)))
+ AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
+
RegPairs.push_back(RPI);
if (RPI.isPaired())
- ++i;
+ i += RegInc;
+ }
+ if (NeedsWinCFI) {
+ // If we need an alignment gap in the stack, align the topmost stack
+ // object. A stack frame with a gap looks like this, bottom up:
+ // x19, d8. d9, gap.
+ // Set extra alignment on the topmost stack object (the first element in
+ // CSI, which goes top down), to create the gap above it.
+ if (AFI->hasCalleeSaveStackFreeSpace())
+ MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16));
+ // We iterated bottom up over the registers; flip RegPairs back to top
+ // down order.
+ std::reverse(RegPairs.begin(), RegPairs.end());
}
}
@@ -2373,7 +2412,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Update the StackIDs of the SVE stack slots.
MachineFrameInfo &MFI = MF.getFrameInfo();
if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
- MFI.setStackID(RPI.FrameIdx, TargetStackID::SVEVector);
+ MFI.setStackID(RPI.FrameIdx, TargetStackID::ScalableVector);
}
return true;
@@ -2665,6 +2704,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
}
+bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ bool NeedsWinCFI = needsWinCFI(MF);
+ // To match the canonical windows frame layout, reverse the list of
+ // callee saved registers to get them laid out by PrologEpilogInserter
+ // in the right order. (PrologEpilogInserter allocates stack objects top
+ // down. Windows canonical prologs store higher numbered registers at
+ // the top, thus have the CSI array start from the highest registers.)
+ if (NeedsWinCFI)
+ std::reverse(CSI.begin(), CSI.end());
+ // Let the generic code do the rest of the setup.
+ return false;
+}
+
bool AArch64FrameLowering::enableStackSlotScavenging(
const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -2707,7 +2761,7 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
#ifndef NDEBUG
// First process all fixed stack objects.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- assert(MFI.getStackID(I) != TargetStackID::SVEVector &&
+ assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
"SVE vectors should never be passed on the stack by value, only by "
"reference.");
#endif
@@ -2737,7 +2791,7 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
SmallVector<int, 8> ObjectsToAllocate;
for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
unsigned StackID = MFI.getStackID(I);
- if (StackID != TargetStackID::SVEVector)
+ if (StackID != TargetStackID::ScalableVector)
continue;
if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
continue;
@@ -2891,12 +2945,12 @@ void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
const int64_t kMaxOffset = 255 * 16;
Register BaseReg = FrameReg;
- int64_t BaseRegOffsetBytes = FrameRegOffset.getBytes();
+ int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
if (BaseRegOffsetBytes < kMinOffset ||
BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
- {BaseRegOffsetBytes, MVT::i8}, TII);
+ StackOffset::getFixed(BaseRegOffsetBytes), TII);
BaseReg = ScratchReg;
BaseRegOffsetBytes = 0;
}
@@ -2953,7 +3007,7 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
LoopI->setFlags(FrameRegUpdateFlags);
int64_t ExtraBaseRegUpdate =
- FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getBytes() - Size) : 0;
+ FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
if (LoopSize < Size) {
assert(FrameRegUpdate);
assert(Size - LoopSize == 16);
@@ -3057,7 +3111,7 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
// realistically happens in function epilogue. Also, STGloop is expanded
// before that pass.
if (InsertI != MBB->end() &&
- canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getBytes() + Size,
+ canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
&TotalOffset)) {
UpdateInstr = &*InsertI++;
LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
@@ -3220,7 +3274,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
/// before the update. This is easily retrieved as it is exactly the offset
/// that is set in processFunctionBeforeFrameFinalized.
-int AArch64FrameLowering::getFrameIndexReferencePreferSP(
+StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
const MachineFunction &MF, int FI, Register &FrameReg,
bool IgnoreSPUpdates) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -3228,7 +3282,7 @@ int AArch64FrameLowering::getFrameIndexReferencePreferSP(
LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
<< MFI.getObjectOffset(FI) << "\n");
FrameReg = AArch64::SP;
- return MFI.getObjectOffset(FI);
+ return StackOffset::getFixed(MFI.getObjectOffset(FI));
}
return getFrameIndexReference(MF, FI, FrameReg);
@@ -3252,3 +3306,162 @@ unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
getStackAlign());
}
+
+namespace {
+struct FrameObject {
+ bool IsValid = false;
+ // Index of the object in MFI.
+ int ObjectIndex = 0;
+ // Group ID this object belongs to.
+ int GroupIndex = -1;
+ // This object should be placed first (closest to SP).
+ bool ObjectFirst = false;
+ // This object's group (which always contains the object with
+ // ObjectFirst==true) should be placed first.
+ bool GroupFirst = false;
+};
+
+class GroupBuilder {
+ SmallVector<int, 8> CurrentMembers;
+ int NextGroupIndex = 0;
+ std::vector<FrameObject> &Objects;
+
+public:
+ GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
+ void AddMember(int Index) { CurrentMembers.push_back(Index); }
+ void EndCurrentGroup() {
+ if (CurrentMembers.size() > 1) {
+ // Create a new group with the current member list. This might remove them
+ // from their pre-existing groups. That's OK, dealing with overlapping
+ // groups is too hard and unlikely to make a difference.
+ LLVM_DEBUG(dbgs() << "group:");
+ for (int Index : CurrentMembers) {
+ Objects[Index].GroupIndex = NextGroupIndex;
+ LLVM_DEBUG(dbgs() << " " << Index);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+ NextGroupIndex++;
+ }
+ CurrentMembers.clear();
+ }
+};
+
+bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
+ // Objects at a lower index are closer to FP; objects at a higher index are
+ // closer to SP.
+ //
+ // For consistency in our comparison, all invalid objects are placed
+ // at the end. This also allows us to stop walking when we hit the
+ // first invalid item after it's all sorted.
+ //
+ // The "first" object goes first (closest to SP), followed by the members of
+ // the "first" group.
+ //
+ // The rest are sorted by the group index to keep the groups together.
+ // Higher numbered groups are more likely to be around longer (i.e. untagged
+ // in the function epilogue and not at some earlier point). Place them closer
+ // to SP.
+ //
+ // If all else equal, sort by the object index to keep the objects in the
+ // original order.
+ return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex,
+ A.ObjectIndex) <
+ std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex,
+ B.ObjectIndex);
+}
+} // namespace
+
+void AArch64FrameLowering::orderFrameObjects(
+ const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
+ if (!OrderFrameObjects || ObjectsToAllocate.empty())
+ return;
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
+ for (auto &Obj : ObjectsToAllocate) {
+ FrameObjects[Obj].IsValid = true;
+ FrameObjects[Obj].ObjectIndex = Obj;
+ }
+
+ // Identify stack slots that are tagged at the same time.
+ GroupBuilder GB(FrameObjects);
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (MI.isDebugInstr())
+ continue;
+ int OpIndex;
+ switch (MI.getOpcode()) {
+ case AArch64::STGloop:
+ case AArch64::STZGloop:
+ OpIndex = 3;
+ break;
+ case AArch64::STGOffset:
+ case AArch64::STZGOffset:
+ case AArch64::ST2GOffset:
+ case AArch64::STZ2GOffset:
+ OpIndex = 1;
+ break;
+ default:
+ OpIndex = -1;
+ }
+
+ int TaggedFI = -1;
+ if (OpIndex >= 0) {
+ const MachineOperand &MO = MI.getOperand(OpIndex);
+ if (MO.isFI()) {
+ int FI = MO.getIndex();
+ if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
+ FrameObjects[FI].IsValid)
+ TaggedFI = FI;
+ }
+ }
+
+ // If this is a stack tagging instruction for a slot that is not part of a
+ // group yet, either start a new group or add it to the current one.
+ if (TaggedFI >= 0)
+ GB.AddMember(TaggedFI);
+ else
+ GB.EndCurrentGroup();
+ }
+ // Groups should never span multiple basic blocks.
+ GB.EndCurrentGroup();
+ }
+
+ // If the function's tagged base pointer is pinned to a stack slot, we want to
+ // put that slot first when possible. This will likely place it at SP + 0,
+ // and save one instruction when generating the base pointer because IRG does
+ // not allow an immediate offset.
+ const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ Optional<int> TBPI = AFI.getTaggedBasePointerIndex();
+ if (TBPI) {
+ FrameObjects[*TBPI].ObjectFirst = true;
+ FrameObjects[*TBPI].GroupFirst = true;
+ int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
+ if (FirstGroupIndex >= 0)
+ for (FrameObject &Object : FrameObjects)
+ if (Object.GroupIndex == FirstGroupIndex)
+ Object.GroupFirst = true;
+ }
+
+ llvm::stable_sort(FrameObjects, FrameObjectCompare);
+
+ int i = 0;
+ for (auto &Obj : FrameObjects) {
+ // All invalid items are sorted at the end, so it's safe to stop.
+ if (!Obj.IsValid)
+ break;
+ ObjectsToAllocate[i++] = Obj.ObjectIndex;
+ }
+
+ LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj
+ : FrameObjects) {
+ if (!Obj.IsValid)
+ break;
+ dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
+ if (Obj.ObjectFirst)
+ dbgs() << ", first";
+ if (Obj.GroupFirst)
+ dbgs() << ", group-first";
+ dbgs() << "\n";
+ });
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 1ca8c3e9e2bf..80079a9d9836 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
-#include "AArch64StackOffset.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
@@ -41,8 +41,8 @@ public:
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg, bool PreferFP,
bool ForSimm) const;
@@ -67,6 +67,11 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
@@ -89,11 +94,12 @@ public:
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
- int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
- Register &FrameReg,
- bool IgnoreSPUpdates) const override;
- int getNonLocalFrameIndexReference(const MachineFunction &MF,
- int FI) const override;
+ StackOffset
+ getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
+ Register &FrameReg,
+ bool IgnoreSPUpdates) const override;
+ StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF,
+ int FI) const override;
int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const;
bool isSupportedStackID(TargetStackID::Value ID) const override {
@@ -101,7 +107,7 @@ public:
default:
return false;
case TargetStackID::Default:
- case TargetStackID::SVEVector:
+ case TargetStackID::ScalableVector:
case TargetStackID::NoAlloc:
return true;
}
@@ -110,9 +116,13 @@ public:
bool isStackIdSafeForLocalArea(unsigned StackId) const override {
// We don't support putting SVE objects into the pre-allocated local
// frame block at the moment.
- return StackId != TargetStackID::SVEVector;
+ return StackId != TargetStackID::ScalableVector;
}
+ void
+ orderFrameObjects(const MachineFunction &MF,
+ SmallVectorImpl<int> &ObjectsToAllocate) const override;
+
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
uint64_t StackBumpBytes) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7799ebfbd68e..94b5d7718d0c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/APSInt.h"
@@ -190,9 +191,14 @@ public:
return SelectSVELogicalImm(N, VT, Imm);
}
- template <unsigned Low, unsigned High>
- bool SelectSVEShiftImm64(SDValue N, SDValue &Imm) {
- return SelectSVEShiftImm64(N, Low, High, Imm);
+ template <MVT::SimpleValueType VT>
+ bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
+ return SelectSVEArithImm(N, VT, Imm);
+ }
+
+ template <unsigned Low, unsigned High, bool AllowSaturation = false>
+ bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
+ return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
}
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
@@ -323,10 +329,10 @@ private:
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
- bool SelectSVEShiftImm64(SDValue N, uint64_t Low, uint64_t High,
- SDValue &Imm);
+ bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
+ bool AllowSaturation, SDValue &Imm);
- bool SelectSVEArithImm(SDValue N, SDValue &Imm);
+ bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
SDValue &Offset);
};
@@ -1371,9 +1377,12 @@ void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
- // Transfer memoperands.
- MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
+ // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
+ // because it's too simple to have needed special treatment during lowering.
+ if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
+ MachineMemOperand *MemOp = MemIntr->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
+ }
CurDAG->RemoveDeadNode(N);
}
@@ -3127,13 +3136,28 @@ bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
return false;
}
-bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, SDValue &Imm) {
+bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
- uint64_t ImmVal = CNode->getSExtValue();
- SDLoc DL(N);
- ImmVal = ImmVal & 0xFF;
+ uint64_t ImmVal = CNode->getZExtValue();
+
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ ImmVal &= 0xFF;
+ break;
+ case MVT::i16:
+ ImmVal &= 0xFFFF;
+ break;
+ case MVT::i32:
+ ImmVal &= 0xFFFFFFFF;
+ break;
+ case MVT::i64:
+ break;
+ default:
+ llvm_unreachable("Unexpected type");
+ }
+
if (ImmVal < 256) {
- Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
return true;
}
}
@@ -3177,19 +3201,30 @@ bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
return false;
}
-// This method is only needed to "cast" i64s into i32s when the value
-// is a valid shift which has been splatted into a vector with i64 elements.
-// Every other type is fine in tablegen.
-bool AArch64DAGToDAGISel::SelectSVEShiftImm64(SDValue N, uint64_t Low,
- uint64_t High, SDValue &Imm) {
+// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
+// Rather than attempt to normalise everything we can sometimes saturate the
+// shift amount during selection. This function also allows for consistent
+// isel patterns by ensuring the resulting "Imm" node is of the i32 type
+// required by the instructions.
+bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
+ uint64_t High, bool AllowSaturation,
+ SDValue &Imm) {
if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
uint64_t ImmVal = CN->getZExtValue();
- SDLoc DL(N);
- if (ImmVal >= Low && ImmVal <= High) {
- Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
- return true;
+ // Reject shift amounts that are too small.
+ if (ImmVal < Low)
+ return false;
+
+ // Reject or saturate shift amounts that are too big.
+ if (ImmVal > High) {
+ if (!AllowSaturation)
+ return false;
+ ImmVal = High;
}
+
+ Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
+ return true;
}
return false;
@@ -3798,6 +3833,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
}
break;
+ case Intrinsic::aarch64_ld64b:
+ SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
+ return;
}
} break;
case ISD::INTRINSIC_WO_CHAIN: {
@@ -4816,7 +4854,8 @@ static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
return EVT();
ElementCount EC = PredVT.getVectorElementCount();
- EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min);
+ EVT ScalarVT =
+ EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
return MemVT;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 48ca9039b1bd..c522ee76626d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27,7 +27,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -113,9 +112,76 @@ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
"optimization"),
cl::init(true));
+// Temporary option added for the purpose of testing functionality added
+// to DAGCombiner.cpp in D92230. It is expected that this can be removed
+// in future when both implementations will be based off MGATHER rather
+// than the GLD1 nodes added for the SVE gather load intrinsics.
+static cl::opt<bool>
+EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
+ cl::desc("Combine extends of AArch64 masked "
+ "gather intrinsics"),
+ cl::init(true));
+
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
+static inline EVT getPackedSVEVectorVT(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("unexpected element type for vector");
+ case MVT::i8:
+ return MVT::nxv16i8;
+ case MVT::i16:
+ return MVT::nxv8i16;
+ case MVT::i32:
+ return MVT::nxv4i32;
+ case MVT::i64:
+ return MVT::nxv2i64;
+ case MVT::f16:
+ return MVT::nxv8f16;
+ case MVT::f32:
+ return MVT::nxv4f32;
+ case MVT::f64:
+ return MVT::nxv2f64;
+ case MVT::bf16:
+ return MVT::nxv8bf16;
+ }
+}
+
+// NOTE: Currently there's only a need to return integer vector types. If this
+// changes then just add an extra "type" parameter.
+static inline EVT getPackedSVEVectorVT(ElementCount EC) {
+ switch (EC.getKnownMinValue()) {
+ default:
+ llvm_unreachable("unexpected element count for vector");
+ case 16:
+ return MVT::nxv16i8;
+ case 8:
+ return MVT::nxv8i16;
+ case 4:
+ return MVT::nxv4i32;
+ case 2:
+ return MVT::nxv2i64;
+ }
+}
+
+static inline EVT getPromotedVTForPredicate(EVT VT) {
+ assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&
+ "Expected scalable predicate vector type!");
+ switch (VT.getVectorMinNumElements()) {
+ default:
+ llvm_unreachable("unexpected element count for vector");
+ case 2:
+ return MVT::nxv2i64;
+ case 4:
+ return MVT::nxv4i32;
+ case 8:
+ return MVT::nxv8i16;
+ case 16:
+ return MVT::nxv16i8;
+ }
+}
+
/// Returns true if VT's elements occupy the lowest bit positions of its
/// associated register class without any intervening space.
///
@@ -128,6 +194,42 @@ static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
}
+// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
+// predicate and end with a passthru value matching the result type.
+static bool isMergePassthruOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
+ case AArch64ISD::BSWAP_MERGE_PASSTHRU:
+ case AArch64ISD::CTLZ_MERGE_PASSTHRU:
+ case AArch64ISD::CTPOP_MERGE_PASSTHRU:
+ case AArch64ISD::DUP_MERGE_PASSTHRU:
+ case AArch64ISD::ABS_MERGE_PASSTHRU:
+ case AArch64ISD::NEG_MERGE_PASSTHRU:
+ case AArch64ISD::FNEG_MERGE_PASSTHRU:
+ case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
+ case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
+ case AArch64ISD::FCEIL_MERGE_PASSTHRU:
+ case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
+ case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
+ case AArch64ISD::FRINT_MERGE_PASSTHRU:
+ case AArch64ISD::FROUND_MERGE_PASSTHRU:
+ case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
+ case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
+ case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
+ case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
+ case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
+ case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
+ case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
+ case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
+ case AArch64ISD::FSQRT_MERGE_PASSTHRU:
+ case AArch64ISD::FRECPX_MERGE_PASSTHRU:
+ case AArch64ISD::FABS_MERGE_PASSTHRU:
+ return true;
+ }
+}
+
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -161,7 +263,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
addDRTypeForNEON(MVT::v4f16);
- addDRTypeForNEON(MVT::v4bf16);
+ if (Subtarget->hasBF16())
+ addDRTypeForNEON(MVT::v4bf16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
@@ -170,7 +273,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);
- addQRTypeForNEON(MVT::v8bf16);
+ if (Subtarget->hasBF16())
+ addQRTypeForNEON(MVT::v8bf16);
}
if (Subtarget->hasSVE()) {
@@ -199,7 +303,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
}
- if (useSVEForFixedLengthVectors()) {
+ if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
@@ -230,7 +334,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
MVT::nxv2f64 }) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
+ setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
+ setCondCodeAction(ISD::SETLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
@@ -296,12 +402,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
- setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FADD, MVT::f128, LibCall);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOS, MVT::f128, Expand);
- setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FDIV, MVT::f128, LibCall);
setOperationAction(ISD::FMA, MVT::f128, Expand);
- setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FMUL, MVT::f128, LibCall);
setOperationAction(ISD::FNEG, MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
@@ -309,7 +415,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSIN, MVT::f128, Expand);
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
- setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FSUB, MVT::f128, LibCall);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
@@ -345,8 +451,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
@@ -401,6 +509,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::CTPOP, MVT::i128, Custom);
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
+ setOperationAction(ISD::ABS, MVT::i64, Custom);
+
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -588,6 +699,57 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ // Generate outline atomics library calls only if LSE was not specified for
+ // subtarget
+ if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
+#define LCALLNAMES(A, B, N) \
+ setLibcallName(A##N##_RELAX, #B #N "_relax"); \
+ setLibcallName(A##N##_ACQ, #B #N "_acq"); \
+ setLibcallName(A##N##_REL, #B #N "_rel"); \
+ setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
+#define LCALLNAME4(A, B) \
+ LCALLNAMES(A, B, 1) \
+ LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
+#define LCALLNAME5(A, B) \
+ LCALLNAMES(A, B, 1) \
+ LCALLNAMES(A, B, 2) \
+ LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
+ LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
+#undef LCALLNAMES
+#undef LCALLNAME4
+#undef LCALLNAME5
+ }
+
// 128-bit loads and stores can be done without expanding
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
@@ -677,8 +839,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Trap.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
- if (Subtarget->isTargetWindows())
- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// We combine OR nodes for bitfield operations.
setTargetDAGCombine(ISD::OR);
@@ -688,6 +850,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::XOR);
@@ -704,11 +867,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+ setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::STORE);
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine(ISD::MGATHER);
+ setTargetDAGCombine(ISD::MSCATTER);
+
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
@@ -717,6 +884,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::VECREDUCE_ADD);
setTargetDAGCombine(ISD::GlobalAddress);
@@ -836,28 +1005,34 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // Saturates
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
- // Vector reductions
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
-
- // Saturates
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
-
- setOperationAction(ISD::TRUNCATE, VT, Custom);
}
+
+ // Vector reductions
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
+ }
+ }
+ for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
+ MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
+ setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
@@ -918,46 +1093,112 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
// splat of 0 or undef) once vector selects supported in SVE codegen. See
// D68877 for more details.
- for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
- if (isTypeLegal(VT)) {
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SDIV, VT, Custom);
- setOperationAction(ISD::UDIV, VT, Custom);
- setOperationAction(ISD::SMIN, VT, Custom);
- setOperationAction(ISD::UMIN, VT, Custom);
- setOperationAction(ISD::SMAX, VT, Custom);
- setOperationAction(ISD::UMAX, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- if (VT.getScalarType() == MVT::i1) {
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
- }
- }
+ for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::BSWAP, VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ setOperationAction(ISD::CTTZ, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ setOperationAction(ISD::SMIN, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::ABS, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
}
- for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32})
+ // Illegal unpacked integer vector types.
+ for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ }
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
-
- for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
- if (isTypeLegal(VT)) {
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::FMA, VT, Custom);
+ for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+
+ // There are no legal MVT::nxv16f## based types.
+ if (VT != MVT::nxv16i1) {
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
}
}
+ for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
+ MVT::nxv4f32, MVT::nxv2f64}) {
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::FDIV, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Custom);
+ setOperationAction(ISD::FMAXNUM, VT, Custom);
+ setOperationAction(ISD::FMINNUM, VT, Custom);
+ setOperationAction(ISD::FMUL, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FSUB, VT, Custom);
+ setOperationAction(ISD::FCEIL, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Custom);
+ setOperationAction(ISD::FNEARBYINT, VT, Custom);
+ setOperationAction(ISD::FRINT, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FROUNDEVEN, VT, Custom);
+ setOperationAction(ISD::FTRUNC, VT, Custom);
+ setOperationAction(ISD::FSQRT, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FP_EXTEND, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ }
+
+ for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
+ }
+
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
+
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
- if (useSVEForFixedLengthVectors()) {
+ if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
@@ -975,6 +1216,61 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, VT, Custom);
for (auto VT : {MVT::v8f16, MVT::v4f32})
setOperationAction(ISD::FP_ROUND, VT, Expand);
+
+ // These operations are not supported on NEON but SVE can do them.
+ setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v1i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
+ setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
+ setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
+ setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
+ setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
+ setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
+ setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
+ setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
+ setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
+ setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
+ setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
+
+ // Int operations with no NEON support.
+ for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
+ MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::CTTZ, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ }
+
+ // FP operations with no NEON support.
+ for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
+ MVT::v1f64, MVT::v2f64})
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+
+ // Use SVE for vectors with more than 2 elements.
+ for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
}
}
@@ -1046,6 +1342,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
if (VT.isFloatingPoint() &&
+ VT.getVectorElementType() != MVT::bf16 &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
for (unsigned Opcode :
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
@@ -1071,11 +1368,64 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Lower fixed length vector operations to scalable equivalents.
+ setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::AND, VT, Custom);
+ setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::BSWAP, VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::FCEIL, VT, Custom);
+ setOperationAction(ISD::FDIV, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Custom);
+ setOperationAction(ISD::FMAXNUM, VT, Custom);
+ setOperationAction(ISD::FMINNUM, VT, Custom);
+ setOperationAction(ISD::FMUL, VT, Custom);
+ setOperationAction(ISD::FNEARBYINT, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FRINT, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FSQRT, VT, Custom);
+ setOperationAction(ISD::FSUB, VT, Custom);
+ setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
+ setOperationAction(ISD::OR, VT, Custom);
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::SMIN, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
+ setOperationAction(ISD::XOR, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
}
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
@@ -1247,8 +1597,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
- Known.Zero &= Known2.Zero;
- Known.One &= Known2.One;
+ Known = KnownBits::commonBits(Known, Known2);
break;
}
case AArch64ISD::LOADgot:
@@ -1388,15 +1737,38 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::THREAD_POINTER)
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
MAKE_CASE(AArch64ISD::ADD_PRED)
+ MAKE_CASE(AArch64ISD::MUL_PRED)
MAKE_CASE(AArch64ISD::SDIV_PRED)
+ MAKE_CASE(AArch64ISD::SHL_PRED)
+ MAKE_CASE(AArch64ISD::SMAX_PRED)
+ MAKE_CASE(AArch64ISD::SMIN_PRED)
+ MAKE_CASE(AArch64ISD::SRA_PRED)
+ MAKE_CASE(AArch64ISD::SRL_PRED)
+ MAKE_CASE(AArch64ISD::SUB_PRED)
MAKE_CASE(AArch64ISD::UDIV_PRED)
- MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1)
- MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1)
- MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1)
- MAKE_CASE(AArch64ISD::UMAX_MERGE_OP1)
- MAKE_CASE(AArch64ISD::SHL_MERGE_OP1)
- MAKE_CASE(AArch64ISD::SRL_MERGE_OP1)
- MAKE_CASE(AArch64ISD::SRA_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::UMAX_PRED)
+ MAKE_CASE(AArch64ISD::UMIN_PRED)
+ MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
MAKE_CASE(AArch64ISD::ADC)
MAKE_CASE(AArch64ISD::SBC)
@@ -1465,10 +1837,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::SRHADD)
MAKE_CASE(AArch64ISD::URHADD)
+ MAKE_CASE(AArch64ISD::SHADD)
+ MAKE_CASE(AArch64ISD::UHADD)
MAKE_CASE(AArch64ISD::SMINV)
MAKE_CASE(AArch64ISD::UMINV)
MAKE_CASE(AArch64ISD::SMAXV)
MAKE_CASE(AArch64ISD::UMAXV)
+ MAKE_CASE(AArch64ISD::SADDV_PRED)
+ MAKE_CASE(AArch64ISD::UADDV_PRED)
MAKE_CASE(AArch64ISD::SMAXV_PRED)
MAKE_CASE(AArch64ISD::UMAXV_PRED)
MAKE_CASE(AArch64ISD::SMINV_PRED)
@@ -1486,12 +1862,16 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FADD_PRED)
MAKE_CASE(AArch64ISD::FADDA_PRED)
MAKE_CASE(AArch64ISD::FADDV_PRED)
+ MAKE_CASE(AArch64ISD::FDIV_PRED)
MAKE_CASE(AArch64ISD::FMA_PRED)
MAKE_CASE(AArch64ISD::FMAXV_PRED)
+ MAKE_CASE(AArch64ISD::FMAXNM_PRED)
MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
MAKE_CASE(AArch64ISD::FMINV_PRED)
+ MAKE_CASE(AArch64ISD::FMINNM_PRED)
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
- MAKE_CASE(AArch64ISD::NOT)
+ MAKE_CASE(AArch64ISD::FMUL_PRED)
+ MAKE_CASE(AArch64ISD::FSUB_PRED)
MAKE_CASE(AArch64ISD::BIT)
MAKE_CASE(AArch64ISD::CBZ)
MAKE_CASE(AArch64ISD::CBNZ)
@@ -1603,8 +1983,15 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::LDP)
MAKE_CASE(AArch64ISD::STP)
MAKE_CASE(AArch64ISD::STNP)
+ MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::INDEX_VECTOR)
+ MAKE_CASE(AArch64ISD::UABD)
+ MAKE_CASE(AArch64ISD::SABD)
+ MAKE_CASE(AArch64ISD::CALL_RVMARKER)
}
#undef MAKE_CASE
return nullptr;
@@ -1692,6 +2079,7 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
+ case TargetOpcode::STATEPOINT:
return emitPatchPoint(MI, BB);
case AArch64::CATCHRET:
@@ -2517,21 +2905,10 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
return std::make_pair(Value, Overflow);
}
-SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
- RTLIB::Libcall Call) const {
- bool IsStrict = Op->isStrictFPOpcode();
- unsigned Offset = IsStrict ? 1 : 0;
- SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
- SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end());
- MakeLibCallOptions CallOptions;
- SDValue Result;
- SDLoc dl(Op);
- std::tie(Result, Chain) = makeLibCall(DAG, Call, Op.getValueType(), Ops,
- CallOptions, dl, Chain);
- return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
-}
+SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ return LowerToScalableOp(Op, DAG);
-static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
SDValue Sel = Op.getOperand(0);
SDValue Other = Op.getOperand(1);
SDLoc dl(Sel);
@@ -2706,16 +3083,18 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
- assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
-
- RTLIB::Libcall LC;
- LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+ if (Op.getValueType().isScalableVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
- return LowerF128Call(Op, DAG, LC);
+ assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
+ if (Op.getValueType().isScalableVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
+
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
@@ -2729,19 +3108,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
return Op;
}
- RTLIB::Libcall LC;
- LC = RTLIB::getFPROUND(SrcVT, Op.getValueType());
-
- // FP_ROUND node has a second operand indicating whether it is known to be
- // precise. That doesn't take part in the LibCall so we can't directly use
- // LowerF128Call.
- MakeLibCallOptions CallOptions;
- SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
- SDValue Result;
- SDLoc dl(Op);
- std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
- CallOptions, dl, Chain);
- return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
@@ -2751,6 +3118,14 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
// in the cost tables.
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
+
+ if (VT.isScalableVector()) {
+ unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
+ ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
+ : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
+ return LowerToPredicatedOp(Op, DAG, Opcode);
+ }
+
unsigned NumElts = InVT.getVectorNumElements();
// f16 conversions are promoted to f32 when full fp16 is not supported.
@@ -2763,7 +3138,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
}
- if (VT.getSizeInBits() < InVT.getSizeInBits()) {
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ uint64_t InVTSize = InVT.getFixedSizeInBits();
+ if (VTSize < InVTSize) {
SDLoc dl(Op);
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
@@ -2771,7 +3148,7 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
}
- if (VT.getSizeInBits() > InVT.getSizeInBits()) {
+ if (VTSize > InVTSize) {
SDLoc dl(Op);
MVT ExtVT =
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
@@ -2806,17 +3183,11 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
return Op;
}
- RTLIB::Libcall LC;
- if (Op.getOpcode() == ISD::FP_TO_SINT ||
- Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
- LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), Op.getValueType());
- else
- LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), Op.getValueType());
-
- return LowerF128Call(Op, DAG, LC);
+ return SDValue();
}
-static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
@@ -2824,21 +3195,38 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
+ unsigned Opc = Op.getOpcode();
+ bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
+
+ if (VT.isScalableVector()) {
+ if (InVT.getVectorElementType() == MVT::i1) {
+ // We can't directly extend an SVE predicate; extend it first.
+ unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ EVT CastVT = getPromotedVTForPredicate(InVT);
+ In = DAG.getNode(CastOpc, dl, CastVT, In);
+ return DAG.getNode(Opc, dl, VT, In);
+ }
- if (VT.getSizeInBits() < InVT.getSizeInBits()) {
+ unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
+ : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
+ return LowerToPredicatedOp(Op, DAG, Opcode);
+ }
+
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ uint64_t InVTSize = InVT.getFixedSizeInBits();
+ if (VTSize < InVTSize) {
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
- In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
+ In = DAG.getNode(Opc, dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
- if (VT.getSizeInBits() > InVT.getSizeInBits()) {
- unsigned CastOpc =
- Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ if (VTSize > InVTSize) {
+ unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
- return DAG.getNode(Op.getOpcode(), dl, VT, In);
+ return DAG.getNode(Opc, dl, VT, In);
}
return Op;
@@ -2871,15 +3259,7 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
// fp128.
if (Op.getValueType() != MVT::f128)
return Op;
-
- RTLIB::Libcall LC;
- if (Op.getOpcode() == ISD::SINT_TO_FP ||
- Op.getOpcode() == ISD::STRICT_SINT_TO_FP)
- LC = RTLIB::getSINTTOFP(SrcVal.getValueType(), Op.getValueType());
- else
- LC = RTLIB::getUINTTOFP(SrcVal.getValueType(), Op.getValueType());
-
- return LowerF128Call(Op, DAG, LC);
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
@@ -2993,7 +3373,8 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
}
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ if (N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
N->getOperand(0)->getValueType(0),
N->getValueType(0),
@@ -3018,11 +3399,13 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, false);
}
@@ -3071,10 +3454,17 @@ SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
return DAG.getMergeValues({AND, Chain}, dl);
}
-static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ // If SVE is available then i64 vector multiplications can also be made legal.
+ bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
+
+ if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
+
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
- EVT VT = Op.getValueType();
assert(VT.is128BitVector() && VT.isInteger() &&
"unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
@@ -3233,11 +3623,77 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_ptrue:
return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
Op.getOperand(1));
+ case Intrinsic::aarch64_sve_clz:
+ return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_cnt: {
+ SDValue Data = Op.getOperand(3);
+ // CTPOP only supports integer operands.
+ if (Data.getValueType().isFloatingPoint())
+ Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
+ return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Data, Op.getOperand(1));
+ }
case Intrinsic::aarch64_sve_dupq_lane:
return LowerDUPQLane(Op, DAG);
case Intrinsic::aarch64_sve_convert_from_svbool:
return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fneg:
+ return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintp:
+ return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintm:
+ return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frinti:
+ return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintx:
+ return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frinta:
+ return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintn:
+ return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintz:
+ return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_ucvtf:
+ return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_scvtf:
+ return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fcvtzu:
+ return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fcvtzs:
+ return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fsqrt:
+ return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frecpx:
+ return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fabs:
+ return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_abs:
+ return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_neg:
+ return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_convert_to_svbool: {
EVT OutVT = Op.getValueType();
EVT InVT = Op.getOperand(1).getValueType();
@@ -3263,6 +3719,49 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
Op.getOperand(1), Scalar);
}
+ case Intrinsic::aarch64_sve_rbit:
+ return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_revb:
+ return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_sxtb:
+ return DAG.getNode(
+ AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_sxth:
+ return DAG.getNode(
+ AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_sxtw:
+ return DAG.getNode(
+ AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_uxtb:
+ return DAG.getNode(
+ AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_uxth:
+ return DAG.getNode(
+ AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_uxtw:
+ return DAG.getNode(
+ AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
+ Op.getOperand(1));
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
@@ -3302,19 +3801,291 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::aarch64_neon_srhadd:
- case Intrinsic::aarch64_neon_urhadd: {
- bool IsSignedAdd = IntNo == Intrinsic::aarch64_neon_srhadd;
- unsigned Opcode = IsSignedAdd ? AArch64ISD::SRHADD : AArch64ISD::URHADD;
+ case Intrinsic::aarch64_neon_urhadd:
+ case Intrinsic::aarch64_neon_shadd:
+ case Intrinsic::aarch64_neon_uhadd: {
+ bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
+ IntNo == Intrinsic::aarch64_neon_shadd);
+ bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
+ IntNo == Intrinsic::aarch64_neon_urhadd);
+ unsigned Opcode =
+ IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
+ : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
+
+ case Intrinsic::aarch64_neon_uabd: {
+ return DAG.getNode(AArch64ISD::UABD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::aarch64_neon_sabd: {
+ return DAG.getNode(AArch64ISD::SABD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
}
}
+bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
+ if (VT.getVectorElementType() == MVT::i32 &&
+ VT.getVectorElementCount().getKnownMinValue() >= 4)
+ return true;
+
+ return false;
+}
+
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return ExtVal.getValueType().isScalableVector();
}
+unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
+ std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
+ AArch64ISD::GLD1_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
+ AArch64ISD::GLD1_UXTW_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
+ AArch64ISD::GLD1_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
+ AArch64ISD::GLD1_SXTW_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
+ AArch64ISD::GLD1_SCALED_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
+ AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
+ AArch64ISD::GLD1_SCALED_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
+ AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
+ };
+ auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
+ return AddrModes.find(Key)->second;
+}
+
+unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
+ std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
+ AArch64ISD::SST1_PRED},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
+ AArch64ISD::SST1_UXTW_PRED},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
+ AArch64ISD::SST1_PRED},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
+ AArch64ISD::SST1_SXTW_PRED},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
+ AArch64ISD::SST1_SCALED_PRED},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
+ AArch64ISD::SST1_UXTW_SCALED_PRED},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
+ AArch64ISD::SST1_SCALED_PRED},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
+ AArch64ISD::SST1_SXTW_SCALED_PRED},
+ };
+ auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
+ return AddrModes.find(Key)->second;
+}
+
+unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unimplemented opcode");
+ return Opcode;
+ case AArch64ISD::GLD1_MERGE_ZERO:
+ return AArch64ISD::GLD1S_MERGE_ZERO;
+ case AArch64ISD::GLD1_IMM_MERGE_ZERO:
+ return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
+ case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
+ return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
+ case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
+ return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
+ case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
+ return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
+ case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
+ return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
+ case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
+ return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
+ }
+}
+
+bool getGatherScatterIndexIsExtended(SDValue Index) {
+ unsigned Opcode = Index.getOpcode();
+ if (Opcode == ISD::SIGN_EXTEND_INREG)
+ return true;
+
+ if (Opcode == ISD::AND) {
+ SDValue Splat = Index.getOperand(1);
+ if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
+ return false;
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
+ if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+// If the base pointer of a masked gather or scatter is null, we
+// may be able to swap BasePtr & Index and use the vector + register
+// or vector + immediate addressing mode, e.g.
+// VECTOR + REGISTER:
+// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
+// -> getelementptr %offset, <vscale x N x T> %indices
+// VECTOR + IMMEDIATE:
+// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
+// -> getelementptr #x, <vscale x N x T> %indices
+void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
+ unsigned &Opcode, bool IsGather,
+ SelectionDAG &DAG) {
+ if (!isNullConstant(BasePtr))
+ return;
+
+ ConstantSDNode *Offset = nullptr;
+ if (Index.getOpcode() == ISD::ADD)
+ if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
+ if (isa<ConstantSDNode>(SplatVal))
+ Offset = cast<ConstantSDNode>(SplatVal);
+ else {
+ BasePtr = SplatVal;
+ Index = Index->getOperand(0);
+ return;
+ }
+ }
+
+ unsigned NewOp =
+ IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
+
+ if (!Offset) {
+ std::swap(BasePtr, Index);
+ Opcode = NewOp;
+ return;
+ }
+
+ uint64_t OffsetVal = Offset->getZExtValue();
+ unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
+ auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
+
+ if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
+ // Index is out of range for the immediate addressing mode
+ BasePtr = ConstOffset;
+ Index = Index->getOperand(0);
+ return;
+ }
+
+ // Immediate is in range
+ Opcode = NewOp;
+ BasePtr = Index->getOperand(0);
+ Index = ConstOffset;
+}
+
+SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
+ assert(MGT && "Can only custom lower gather load nodes");
+
+ SDValue Index = MGT->getIndex();
+ SDValue Chain = MGT->getChain();
+ SDValue PassThru = MGT->getPassThru();
+ SDValue Mask = MGT->getMask();
+ SDValue BasePtr = MGT->getBasePtr();
+ ISD::LoadExtType ExtTy = MGT->getExtensionType();
+
+ ISD::MemIndexType IndexType = MGT->getIndexType();
+ bool IsScaled =
+ IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
+ bool IsSigned =
+ IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
+ bool IdxNeedsExtend =
+ getGatherScatterIndexIsExtended(Index) ||
+ Index.getSimpleValueType().getVectorElementType() == MVT::i32;
+ bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
+
+ EVT VT = PassThru.getSimpleValueType();
+ EVT MemVT = MGT->getMemoryVT();
+ SDValue InputVT = DAG.getValueType(MemVT);
+
+ if (VT.getVectorElementType() == MVT::bf16 &&
+ !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
+ return SDValue();
+
+ // Handle FP data by using an integer gather and casting the result.
+ if (VT.isFloatingPoint()) {
+ EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
+ PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
+ InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
+ }
+
+ SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other);
+
+ if (getGatherScatterIndexIsExtended(Index))
+ Index = Index.getOperand(0);
+
+ unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
+ selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
+ /*isGather=*/true, DAG);
+
+ if (ResNeedsSignExtend)
+ Opcode = getSignExtendedGatherOpcode(Opcode);
+
+ SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru};
+ SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops);
+
+ if (VT.isFloatingPoint()) {
+ SDValue Cast = getSVESafeBitCast(VT, Gather, DAG);
+ return DAG.getMergeValues({Cast, Gather}, DL);
+ }
+
+ return Gather;
+}
+
+SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
+ assert(MSC && "Can only custom lower scatter store nodes");
+
+ SDValue Index = MSC->getIndex();
+ SDValue Chain = MSC->getChain();
+ SDValue StoreVal = MSC->getValue();
+ SDValue Mask = MSC->getMask();
+ SDValue BasePtr = MSC->getBasePtr();
+
+ ISD::MemIndexType IndexType = MSC->getIndexType();
+ bool IsScaled =
+ IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
+ bool IsSigned =
+ IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
+ bool NeedsExtend =
+ getGatherScatterIndexIsExtended(Index) ||
+ Index.getSimpleValueType().getVectorElementType() == MVT::i32;
+
+ EVT VT = StoreVal.getSimpleValueType();
+ SDVTList VTs = DAG.getVTList(MVT::Other);
+ EVT MemVT = MSC->getMemoryVT();
+ SDValue InputVT = DAG.getValueType(MemVT);
+
+ if (VT.getVectorElementType() == MVT::bf16 &&
+ !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
+ return SDValue();
+
+ // Handle FP data by casting the data so an integer scatter can be used.
+ if (VT.isFloatingPoint()) {
+ EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
+ StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
+ InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
+ }
+
+ if (getGatherScatterIndexIsExtended(Index))
+ Index = Index.getOperand(0);
+
+ unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
+ selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
+ /*isGather=*/false, DAG);
+
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
+ return DAG.getNode(Opcode, DL, VTs, Ops);
+}
+
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
EVT VT, EVT MemVT,
@@ -3380,8 +4151,9 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
// the custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
+ ElementCount EC = MemVT.getVectorElementCount();
if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
- MemVT.getVectorElementCount().Min % 2u == 0 &&
+ EC.isKnownEven() &&
((MemVT.getScalarSizeInBits() == 8u ||
MemVT.getScalarSizeInBits() == 16u ||
MemVT.getScalarSizeInBits() == 32u ||
@@ -3390,11 +4162,11 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
- SDValue Hi = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, Dl,
- MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
- StoreNode->getValue(),
- DAG.getConstant(MemVT.getVectorElementCount().Min / 2, Dl, MVT::i64));
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
+ MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
+ StoreNode->getValue(),
+ DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
@@ -3419,6 +4191,25 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
return SDValue();
}
+// Generate SUBS and CSEL for integer abs.
+SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+
+ if (VT.isVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
+
+ SDLoc DL(Op);
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Op.getOperand(0));
+ // Generate SUBS & CSEL.
+ SDValue Cmp =
+ DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(0, DL, VT));
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
+ DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
+ Cmp.getValue(1));
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -3471,17 +4262,35 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
- if (useSVEForFixedLengthVectorVT(Op.getValueType()))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
- return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
case ISD::FSUB:
- return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
case ISD::FMUL:
- return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
case ISD::FMA:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:
- return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
+ case ISD::FNEG:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
+ case ISD::FCEIL:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
+ case ISD::FFLOOR:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
+ case ISD::FNEARBYINT:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
+ case ISD::FRINT:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
+ case ISD::FROUND:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
+ case ISD::FROUNDEVEN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
+ case ISD::FTRUNC:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
+ case ISD::FSQRT:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
+ case ISD::FABS:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
return LowerFP_ROUND(Op, DAG);
@@ -3495,6 +4304,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerRETURNADDR(Op, DAG);
case ISD::ADDROFRETURNADDR:
return LowerADDROFRETURNADDR(Op, DAG);
+ case ISD::CONCAT_VECTORS:
+ return LowerCONCAT_VECTORS(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
@@ -3510,17 +4321,20 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);
case ISD::UDIV:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED);
+ return LowerDIV(Op, DAG);
case ISD::SMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
+ /*OverrideNEON=*/true);
case ISD::UMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
+ /*OverrideNEON=*/true);
case ISD::SMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
+ /*OverrideNEON=*/true);
case ISD::UMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
+ /*OverrideNEON=*/true);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@@ -3560,11 +4374,21 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::STORE:
return LowerSTORE(Op, DAG);
+ case ISD::MGATHER:
+ return LowerMGATHER(Op, DAG);
+ case ISD::MSCATTER:
+ return LowerMSCATTER(Op, DAG);
+ case ISD::VECREDUCE_SEQ_FADD:
+ return LowerVECREDUCE_SEQ_FADD(Op, DAG);
case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
return LowerVECREDUCE(Op, DAG);
@@ -3576,6 +4400,21 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VSCALE:
return LowerVSCALE(Op, DAG);
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: {
+ // Only custom lower when ExtraVT has a legal byte based element type.
+ EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
+ (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
+ return SDValue();
+
+ return LowerToPredicatedOp(Op, DAG,
+ AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
+ }
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
case ISD::LOAD:
@@ -3583,31 +4422,49 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
llvm_unreachable("Unexpected request to lower ISD::LOAD");
case ISD::ADD:
- if (useSVEForFixedLengthVectorVT(Op.getValueType()))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
- llvm_unreachable("Unexpected request to lower ISD::ADD");
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
+ case ISD::AND:
+ return LowerToScalableOp(Op, DAG);
+ case ISD::SUB:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
+ case ISD::FMAXNUM:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
+ case ISD::FMINNUM:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
+ case ISD::VSELECT:
+ return LowerFixedLengthVectorSelectToSVE(Op, DAG);
+ case ISD::ABS:
+ return LowerABS(Op, DAG);
+ case ISD::BITREVERSE:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
+ /*OverrideNEON=*/true);
+ case ISD::BSWAP:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
+ case ISD::CTLZ:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
+ /*OverrideNEON=*/true);
+ case ISD::CTTZ:
+ return LowerCTTZ(Op, DAG);
}
}
-bool AArch64TargetLowering::useSVEForFixedLengthVectors() const {
- // Prefer NEON unless larger SVE registers are available.
- return Subtarget->hasSVE() && Subtarget->getMinSVEVectorSizeInBits() >= 256;
+bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
+ return !Subtarget->useSVEForFixedLengthVectors();
}
-bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(EVT VT) const {
- if (!useSVEForFixedLengthVectors())
+bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
+ EVT VT, bool OverrideNEON) const {
+ if (!Subtarget->useSVEForFixedLengthVectors())
return false;
if (!VT.isFixedLengthVector())
return false;
- // Fixed length predicates should be promoted to i8.
- // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
- if (VT.getVectorElementType() == MVT::i1)
- return false;
-
// Don't use SVE for vectors we cannot scalarize if required.
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ // Fixed length predicates should be promoted to i8.
+ // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
+ case MVT::i1:
default:
return false;
case MVT::i8:
@@ -3620,12 +4477,16 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(EVT VT) const {
break;
}
+ // All SVE implementations support NEON sized vectors.
+ if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
+ return true;
+
// Ensure NEON MVTs only belong to a single register class.
- if (VT.getSizeInBits() <= 128)
+ if (VT.getFixedSizeInBits() <= 128)
return false;
// Don't use SVE for types that don't fit.
- if (VT.getSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
+ if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
return false;
// TODO: Perhaps an artificial restriction, but worth having whilst getting
@@ -3724,10 +4585,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
- assert(ArgLocs.size() == Ins.size());
SmallVector<SDValue, 16> ArgValues;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ unsigned ExtraArgLocs = 0;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
@@ -3855,16 +4716,44 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
if (VA.getLocInfo() == CCValAssign::Indirect) {
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
- // If value is passed via pointer - do a load.
- ArgValue =
- DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo());
- }
- if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
- ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
- ArgValue, DAG.getValueType(MVT::i32));
- InVals.push_back(ArgValue);
+ uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
+ unsigned NumParts = 1;
+ if (Ins[i].Flags.isInConsecutiveRegs()) {
+ assert(!Ins[i].Flags.isInConsecutiveRegsLast());
+ while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
+ ++NumParts;
+ }
+
+ MVT PartLoad = VA.getValVT();
+ SDValue Ptr = ArgValue;
+
+ // Ensure we generate all loads for each tuple part, whilst updating the
+ // pointer after each load correctly using vscale.
+ while (NumParts > 0) {
+ ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
+ InVals.push_back(ArgValue);
+ NumParts--;
+ if (NumParts > 0) {
+ SDValue BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ BytesIncrement, Flags);
+ ExtraArgLocs++;
+ i++;
+ }
+ }
+ } else {
+ if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
+ ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
+ ArgValue, DAG.getValueType(MVT::i32));
+ InVals.push_back(ArgValue);
+ }
}
+ assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
// varargs
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
@@ -4039,9 +4928,7 @@ SDValue AArch64TargetLowering::LowerCallResult(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
- CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
- ? RetCC_AArch64_WebKit_JS
- : RetCC_AArch64_AAPCS;
+ CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
@@ -4464,8 +5351,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ unsigned ExtraArgLocs = 0;
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -4507,18 +5395,49 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
+
+ uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
+ uint64_t PartSize = StoreSize;
+ unsigned NumParts = 1;
+ if (Outs[i].Flags.isInConsecutiveRegs()) {
+ assert(!Outs[i].Flags.isInConsecutiveRegsLast());
+ while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
+ ++NumParts;
+ StoreSize *= NumParts;
+ }
+
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
- int FI = MFI.CreateStackObject(
- VA.getValVT().getStoreSize().getKnownMinSize(), Alignment, false);
- MFI.setStackID(FI, TargetStackID::SVEVector);
+ int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
+ MFI.setStackID(FI, TargetStackID::ScalableVector);
- SDValue SpillSlot = DAG.getFrameIndex(
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+ SDValue Ptr = DAG.getFrameIndex(
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
- Chain = DAG.getStore(
- Chain, DL, Arg, SpillSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ SDValue SpillSlot = Ptr;
+
+ // Ensure we generate all stores for each tuple part, whilst updating the
+ // pointer after each store correctly using vscale.
+ while (NumParts) {
+ Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
+ NumParts--;
+ if (NumParts > 0) {
+ SDValue BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ MPI = MachinePointerInfo(MPI.getAddrSpace());
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ BytesIncrement, Flags);
+ ExtraArgLocs++;
+ i++;
+ }
+ }
+
Arg = SpillSlot;
break;
}
@@ -4538,20 +5457,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// take care of putting the two halves in the right place but we have to
// combine them.
SDValue &Bits =
- std::find_if(RegsToPass.begin(), RegsToPass.end(),
- [=](const std::pair<unsigned, SDValue> &Elt) {
- return Elt.first == VA.getLocReg();
- })
+ llvm::find_if(RegsToPass,
+ [=](const std::pair<unsigned, SDValue> &Elt) {
+ return Elt.first == VA.getLocReg();
+ })
->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
// Call site info is used for function's parameter entry value
// tracking. For now we track only simple cases when parameter
// is transferred through whole register.
- CSInfo.erase(std::remove_if(CSInfo.begin(), CSInfo.end(),
- [&VA](MachineFunction::ArgRegPair ArgReg) {
- return ArgReg.Reg == VA.getLocReg();
- }),
- CSInfo.end());
+ llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
+ return ArgReg.Reg == VA.getLocReg();
+ });
} else {
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
@@ -4570,7 +5487,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
uint32_t BEAlign = 0;
unsigned OpSize;
if (VA.getLocInfo() == CCValAssign::Indirect)
- OpSize = VA.getLocVT().getSizeInBits();
+ OpSize = VA.getLocVT().getFixedSizeInBits();
else
OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
: VA.getValVT().getSizeInBits();
@@ -4730,8 +5647,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
return Ret;
}
+ unsigned CallOpc = AArch64ISD::CALL;
+ // Calls marked with "rv_marker" are special. They should be expanded to the
+ // call, directly followed by a special marker sequence. Use the CALL_RVMARKER
+ // to do that.
+ if (CLI.CB && CLI.CB->hasRetAttr("rv_marker")) {
+ assert(!IsTailCall && "tail calls cannot be marked with rv_marker");
+ CallOpc = AArch64ISD::CALL_RVMARKER;
+ }
+
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
+ Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
@@ -4755,9 +5681,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
bool AArch64TargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
- CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
- ? RetCC_AArch64_WebKit_JS
- : RetCC_AArch64_AAPCS;
+ CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC);
@@ -4772,9 +5696,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
auto &MF = DAG.getMachineFunction();
auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
- CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
- ? RetCC_AArch64_WebKit_JS
- : RetCC_AArch64_AAPCS;
+ CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -4819,11 +5741,9 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (RegsUsed.count(VA.getLocReg())) {
SDValue &Bits =
- std::find_if(RetVals.begin(), RetVals.end(),
- [=](const std::pair<unsigned, SDValue> &Elt) {
- return Elt.first == VA.getLocReg();
- })
- ->second;
+ llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
+ return Elt.first == VA.getLocReg();
+ })->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
} else {
RetVals.emplace_back(VA.getLocReg(), Arg);
@@ -5043,7 +5963,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
SDValue FuncTLVGet = DAG.getLoad(
PtrMemVT, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- /* Alignment = */ PtrMemVT.getSizeInBits() / 8,
+ Align(PtrMemVT.getSizeInBits() / 8),
MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
Chain = FuncTLVGet.getValue(1);
@@ -5358,6 +6278,22 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
llvm_unreachable("Unexpected platform trying to use TLS");
}
+// Looks through \param Val to determine the bit that can be used to
+// check the sign of the value. It returns the unextended value and
+// the sign bit position.
+std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
+ if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
+ return {Val.getOperand(0),
+ cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
+ 1};
+
+ if (Val.getOpcode() == ISD::SIGN_EXTEND)
+ return {Val.getOperand(0),
+ Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
+
+ return {Val, Val.getValueSizeInBits() - 1};
+}
+
SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -5452,9 +6388,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
- uint64_t Mask = LHS.getValueSizeInBits() - 1;
+ uint64_t SignBitPos;
+ std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
- DAG.getConstant(Mask, dl, MVT::i64), Dest);
+ DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
@@ -5462,9 +6399,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
- uint64_t Mask = LHS.getValueSizeInBits() - 1;
+ uint64_t SignBitPos;
+ std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
- DAG.getConstant(Mask, dl, MVT::i64), Dest);
+ DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
SDValue CCVal;
@@ -5611,6 +6549,9 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
}
+ if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
+
assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering");
@@ -5634,6 +6575,16 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
return Val;
}
+SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isScalableVector() ||
+ useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true));
+
+ SDLoc DL(Op);
+ SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
+ return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
+}
+
SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
@@ -5791,7 +6742,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// instead of a CSEL in that case.
if (TrueVal == ~FalseVal) {
Opcode = AArch64ISD::CSINV;
- } else if (TrueVal == -FalseVal) {
+ } else if (FalseVal > std::numeric_limits<int64_t>::min() &&
+ TrueVal == -FalseVal) {
Opcode = AArch64ISD::CSNEG;
} else if (TVal.getValueType() == MVT::i32) {
// If our operands are only 32-bit wide, make sure we use 32-bit
@@ -5991,6 +6943,9 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
SDValue Entry = Op.getOperand(2);
int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
+ auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
+ AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
+
SDNode *Dest =
DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
@@ -6057,11 +7012,13 @@ SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
}
SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
// The layout of the va_list struct is specified in the AArch64 Procedure Call
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
+ auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
@@ -6071,56 +7028,64 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
+ unsigned Offset = 0;
SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
+ Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
- MachinePointerInfo(SV), /* Alignment = */ 8));
+ MachinePointerInfo(SV), Align(PtrSize)));
- // void *__gr_top at offset 8
+ // void *__gr_top at offset 8 (4 on ILP32)
+ Offset += PtrSize;
int GPRSize = FuncInfo->getVarArgsGPRSize();
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
- GRTopAddr =
- DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
+ GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Offset, DL, PtrVT));
GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
DAG.getConstant(GPRSize, DL, PtrVT));
+ GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
- MachinePointerInfo(SV, 8),
- /* Alignment = */ 8));
+ MachinePointerInfo(SV, Offset),
+ Align(PtrSize)));
}
- // void *__vr_top at offset 16
+ // void *__vr_top at offset 16 (8 on ILP32)
+ Offset += PtrSize;
int FPRSize = FuncInfo->getVarArgsFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
- DAG.getConstant(16, DL, PtrVT));
+ DAG.getConstant(Offset, DL, PtrVT));
VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
DAG.getConstant(FPRSize, DL, PtrVT));
+ VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
- MachinePointerInfo(SV, 16),
- /* Alignment = */ 8));
- }
-
- // int __gr_offs at offset 24
- SDValue GROffsAddr =
- DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
- MemOps.push_back(DAG.getStore(
- Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
- MachinePointerInfo(SV, 24), /* Alignment = */ 4));
-
- // int __vr_offs at offset 28
- SDValue VROffsAddr =
- DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
- MemOps.push_back(DAG.getStore(
- Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
- MachinePointerInfo(SV, 28), /* Alignment = */ 4));
+ MachinePointerInfo(SV, Offset),
+ Align(PtrSize)));
+ }
+
+ // int __gr_offs at offset 24 (12 on ILP32)
+ Offset += PtrSize;
+ SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Offset, DL, PtrVT));
+ MemOps.push_back(
+ DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
+ GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
+
+ // int __vr_offs at offset 28 (16 on ILP32)
+ Offset += 4;
+ SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Offset, DL, PtrVT));
+ MemOps.push_back(
+ DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
+ VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
@@ -6143,8 +7108,10 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
// pointer.
SDLoc DL(Op);
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
- unsigned VaListSize = (Subtarget->isTargetDarwin() ||
- Subtarget->isTargetWindows()) ? PtrSize : 32;
+ unsigned VaListSize =
+ (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ ? PtrSize
+ : Subtarget->isTargetILP32() ? 20 : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
@@ -6297,17 +7264,34 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue ReturnAddress;
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
- return DAG.getLoad(VT, DL, DAG.getEntryNode(),
- DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
- MachinePointerInfo());
+ ReturnAddress = DAG.getLoad(
+ VT, DL, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
+ } else {
+ // Return LR, which contains the return address. Mark it an implicit
+ // live-in.
+ unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
+ ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
+ }
+
+ // The XPACLRI instruction assembles to a hint-space instruction before
+ // Armv8.3-A therefore this instruction can be safely used for any pre
+ // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
+ // that instead.
+ SDNode *St;
+ if (Subtarget->hasPAuth()) {
+ St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
+ } else {
+ // XPACLRI operates on LR therefore we must move the operand accordingly.
+ SDValue Chain =
+ DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
+ St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
}
-
- // Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
- return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
+ return SDValue(St, 0);
}
/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
@@ -6488,6 +7472,22 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
return SDValue();
}
+SDValue
+AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
+ const DenormalMode &Mode) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+}
+
+SDValue
+AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op;
+}
+
SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &ExtraSteps,
@@ -6511,17 +7511,8 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
- if (!Reciprocal) {
- EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- VT);
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
-
+ if (!Reciprocal)
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
- // Correct the result if the operand is 0.0.
- Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
- VT, Eq, Operand, Estimate);
- }
ExtraSteps = 0;
return Estimate;
@@ -6697,23 +7688,30 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
- if (VT.getSizeInBits() == 64)
+ if (VT.isScalableVector())
+ return std::make_pair(0U, nullptr);
+ if (VT.getFixedSizeInBits() == 64)
return std::make_pair(0U, &AArch64::GPR64commonRegClass);
return std::make_pair(0U, &AArch64::GPR32commonRegClass);
- case 'w':
+ case 'w': {
if (!Subtarget->hasFPARMv8())
break;
- if (VT.isScalableVector())
- return std::make_pair(0U, &AArch64::ZPRRegClass);
- if (VT.getSizeInBits() == 16)
+ if (VT.isScalableVector()) {
+ if (VT.getVectorElementType() != MVT::i1)
+ return std::make_pair(0U, &AArch64::ZPRRegClass);
+ return std::make_pair(0U, nullptr);
+ }
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ if (VTSize == 16)
return std::make_pair(0U, &AArch64::FPR16RegClass);
- if (VT.getSizeInBits() == 32)
+ if (VTSize == 32)
return std::make_pair(0U, &AArch64::FPR32RegClass);
- if (VT.getSizeInBits() == 64)
+ if (VTSize == 64)
return std::make_pair(0U, &AArch64::FPR64RegClass);
- if (VT.getSizeInBits() == 128)
+ if (VTSize == 128)
return std::make_pair(0U, &AArch64::FPR128RegClass);
break;
+ }
// The instructions that this constraint is designed for can
// only take 128-bit registers so just use that regclass.
case 'x':
@@ -6734,10 +7732,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
} else {
PredicateConstraint PC = parsePredicateConstraint(Constraint);
if (PC != PredicateConstraint::Invalid) {
- assert(VT.isScalableVector());
+ if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
+ return std::make_pair(0U, nullptr);
bool restricted = (PC == PredicateConstraint::Upl);
return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
- : std::make_pair(0U, &AArch64::PPRRegClass);
+ : std::make_pair(0U, &AArch64::PPRRegClass);
}
}
if (StringRef("{cc}").equals_lower(Constraint))
@@ -6976,6 +7975,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
SDLoc dl(Op);
EVT VT = Op.getValueType();
+ assert(!VT.isScalableVector() &&
+ "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
unsigned NumElts = VT.getVectorNumElements();
struct ShuffleSourceInfo {
@@ -7046,8 +8047,9 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
}
}
unsigned ResMultiplier =
- VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
- NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
// If the source vector is too wide or too narrow, we may nevertheless be able
@@ -7056,17 +8058,18 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();
- if (SrcVT.getSizeInBits() == VT.getSizeInBits())
+ uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
+ if (SrcVTSize == VTSize)
continue;
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
- unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+ unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
- if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
- assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
+ if (SrcVTSize < VTSize) {
+ assert(2 * SrcVTSize == VTSize);
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
Src.ShuffleVec =
@@ -7075,7 +8078,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
continue;
}
- assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
+ if (SrcVTSize != 2 * VTSize) {
+ LLVM_DEBUG(
+ dbgs() << "Reshuffle failed: result vector too small to extract\n");
+ return SDValue();
+ }
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
LLVM_DEBUG(
@@ -7104,6 +8111,13 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
DAG.getConstant(NumSrcElts, dl, MVT::i64));
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
+ if (!SrcVT.is64BitVector()) {
+ LLVM_DEBUG(
+ dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
+ "for SVE vectors.");
+ return SDValue();
+ }
+
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
VEXTSrc2,
DAG.getConstant(Imm, dl, MVT::i32));
@@ -7120,7 +8134,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
- Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ Src.WindowScale =
+ SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
Src.WindowBase *= Src.WindowScale;
}
@@ -7144,8 +8159,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
- int BitsDefined =
- std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits());
+ int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
+ VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
@@ -7209,6 +8224,81 @@ static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
return true;
}
+/// Check if a vector shuffle corresponds to a DUP instructions with a larger
+/// element width than the vector lane type. If that is the case the function
+/// returns true and writes the value of the DUP instruction lane operand into
+/// DupLaneOp
+static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
+ unsigned &DupLaneOp) {
+ assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+ "Only possible block sizes for wide DUP are: 16, 32, 64");
+
+ if (BlockSize <= VT.getScalarSizeInBits())
+ return false;
+ if (BlockSize % VT.getScalarSizeInBits() != 0)
+ return false;
+ if (VT.getSizeInBits() % BlockSize != 0)
+ return false;
+
+ size_t SingleVecNumElements = VT.getVectorNumElements();
+ size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
+ size_t NumBlocks = VT.getSizeInBits() / BlockSize;
+
+ // We are looking for masks like
+ // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
+ // might be replaced by 'undefined'. BlockIndices will eventually contain
+ // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
+ // for the above examples)
+ SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
+ for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
+ for (size_t I = 0; I < NumEltsPerBlock; I++) {
+ int Elt = M[BlockIndex * NumEltsPerBlock + I];
+ if (Elt < 0)
+ continue;
+ // For now we don't support shuffles that use the second operand
+ if ((unsigned)Elt >= SingleVecNumElements)
+ return false;
+ if (BlockElts[I] < 0)
+ BlockElts[I] = Elt;
+ else if (BlockElts[I] != Elt)
+ return false;
+ }
+
+ // We found a candidate block (possibly with some undefs). It must be a
+ // sequence of consecutive integers starting with a value divisible by
+ // NumEltsPerBlock with some values possibly replaced by undef-s.
+
+ // Find first non-undef element
+ auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
+ assert(FirstRealEltIter != BlockElts.end() &&
+ "Shuffle with all-undefs must have been caught by previous cases, "
+ "e.g. isSplat()");
+ if (FirstRealEltIter == BlockElts.end()) {
+ DupLaneOp = 0;
+ return true;
+ }
+
+ // Index of FirstRealElt in BlockElts
+ size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
+
+ if ((unsigned)*FirstRealEltIter < FirstRealIndex)
+ return false;
+ // BlockElts[0] must have the following value if it isn't undef:
+ size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
+
+ // Check the first element
+ if (Elt0 % NumEltsPerBlock != 0)
+ return false;
+ // Check that the sequence indeed consists of consecutive integers (modulo
+ // undefs)
+ for (size_t I = 0; I < NumEltsPerBlock; I++)
+ if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
+ return false;
+
+ DupLaneOp = Elt0 / NumEltsPerBlock;
+ return true;
+}
+
// check if an EXT instruction can handle the shuffle mask when the
// vector sources of the shuffle are different.
static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
@@ -7642,6 +8732,60 @@ static unsigned getDUPLANEOp(EVT EltType) {
llvm_unreachable("Invalid vector element type?");
}
+static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
+ unsigned Opcode, SelectionDAG &DAG) {
+ // Try to eliminate a bitcasted extract subvector before a DUPLANE.
+ auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
+ // Match: dup (bitcast (extract_subv X, C)), LaneC
+ if (BitCast.getOpcode() != ISD::BITCAST ||
+ BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+
+ // The extract index must align in the destination type. That may not
+ // happen if the bitcast is from narrow to wide type.
+ SDValue Extract = BitCast.getOperand(0);
+ unsigned ExtIdx = Extract.getConstantOperandVal(1);
+ unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
+ unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
+ unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
+ if (ExtIdxInBits % CastedEltBitWidth != 0)
+ return false;
+
+ // Update the lane value by offsetting with the scaled extract index.
+ LaneC += ExtIdxInBits / CastedEltBitWidth;
+
+ // Determine the casted vector type of the wide vector input.
+ // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
+ // Examples:
+ // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
+ // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
+ unsigned SrcVecNumElts =
+ Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
+ CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
+ SrcVecNumElts);
+ return true;
+ };
+ MVT CastVT;
+ if (getScaledOffsetDup(V, Lane, CastVT)) {
+ V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
+ } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ // The lane is incremented by the index of the extract.
+ // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
+ Lane += V.getConstantOperandVal(1);
+ V = V.getOperand(0);
+ } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
+ // The lane is decremented if we are splatting from the 2nd operand.
+ // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
+ unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
+ Lane -= Idx * VT.getVectorNumElements() / 2;
+ V = WidenVector(V.getOperand(Idx), DAG);
+ } else if (VT.getSizeInBits() == 64) {
+ // Widen the operand to 128-bit register with undef.
+ V = WidenVector(V, DAG);
+ }
+ return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
+}
+
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -7675,57 +8819,26 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Otherwise, duplicate from the lane of the input vector.
unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
-
- // Try to eliminate a bitcasted extract subvector before a DUPLANE.
- auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
- // Match: dup (bitcast (extract_subv X, C)), LaneC
- if (BitCast.getOpcode() != ISD::BITCAST ||
- BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return false;
-
- // The extract index must align in the destination type. That may not
- // happen if the bitcast is from narrow to wide type.
- SDValue Extract = BitCast.getOperand(0);
- unsigned ExtIdx = Extract.getConstantOperandVal(1);
- unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
- unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
- unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
- if (ExtIdxInBits % CastedEltBitWidth != 0)
- return false;
-
- // Update the lane value by offsetting with the scaled extract index.
- LaneC += ExtIdxInBits / CastedEltBitWidth;
-
- // Determine the casted vector type of the wide vector input.
- // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
- // Examples:
- // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
- // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
- unsigned SrcVecNumElts =
- Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
- CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
- SrcVecNumElts);
- return true;
- };
- MVT CastVT;
- if (getScaledOffsetDup(V1, Lane, CastVT)) {
- V1 = DAG.getBitcast(CastVT, V1.getOperand(0).getOperand(0));
- } else if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- // The lane is incremented by the index of the extract.
- // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
- Lane += V1.getConstantOperandVal(1);
- V1 = V1.getOperand(0);
- } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
- // The lane is decremented if we are splatting from the 2nd operand.
- // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
- unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
- Lane -= Idx * VT.getVectorNumElements() / 2;
- V1 = WidenVector(V1.getOperand(Idx), DAG);
- } else if (VT.getSizeInBits() == 64) {
- // Widen the operand to 128-bit register with undef.
- V1 = WidenVector(V1, DAG);
- }
- return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
+ return constructDup(V1, Lane, dl, VT, Opcode, DAG);
+ }
+
+ // Check if the mask matches a DUP for a wider element
+ for (unsigned LaneSize : {64U, 32U, 16U}) {
+ unsigned Lane = 0;
+ if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
+ unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
+ : LaneSize == 32 ? AArch64ISD::DUPLANE32
+ : AArch64ISD::DUPLANE16;
+ // Cast V1 to an integer vector with required lane size
+ MVT NewEltTy = MVT::getIntegerVT(LaneSize);
+ unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
+ MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
+ V1 = DAG.getBitcast(NewVecTy, V1);
+ // Constuct the DUP instruction
+ V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
+ // Cast back to the original type
+ return DAG.getBitcast(VT, V1);
+ }
}
if (isREVMask(ShuffleMask, VT, 64))
@@ -7796,7 +8909,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
EVT ScalarVT = VT.getVectorElementType();
- if (ScalarVT.getSizeInBits() < 32 && ScalarVT.isInteger())
+ if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
ScalarVT = MVT::i32;
return DAG.getNode(
@@ -7835,9 +8948,11 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT ElemVT = VT.getScalarType();
-
SDValue SplatVal = Op.getOperand(0);
+ if (useSVEForFixedLengthVectorVT(VT))
+ return LowerToScalableOp(Op, DAG);
+
// Extend input splat value where needed to fit into a GPR (32b or 64b only)
// FPRs don't have this restriction.
switch (ElemVT.getSimpleVT().SimpleTy) {
@@ -8267,6 +9382,9 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ return LowerToScalableOp(Op, DAG);
+
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
return Res;
@@ -8425,14 +9543,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
bool isConstant = true;
bool AllLanesExtractElt = true;
unsigned NumConstantLanes = 0;
+ unsigned NumDifferentLanes = 0;
+ unsigned NumUndefLanes = 0;
SDValue Value;
SDValue ConstantValue;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
AllLanesExtractElt = false;
- if (V.isUndef())
+ if (V.isUndef()) {
+ ++NumUndefLanes;
continue;
+ }
if (i > 0)
isOnlyLowElement = false;
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
@@ -8448,8 +9570,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (!Value.getNode())
Value = V;
- else if (V != Value)
+ else if (V != Value) {
usesOnlyOneValue = false;
+ ++NumDifferentLanes;
+ }
}
if (!Value.getNode()) {
@@ -8575,11 +9699,20 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
}
+ // If we need to insert a small number of different non-constant elements and
+ // the vector width is sufficiently large, prefer using DUP with the common
+ // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
+ // skip the constant lane handling below.
+ bool PreferDUPAndInsert =
+ !isConstant && NumDifferentLanes >= 1 &&
+ NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
+ NumDifferentLanes >= NumConstantLanes;
+
// If there was only one constant value used and for more than one lane,
// start by splatting that value, then replace the non-constant lanes. This
// is better than the default, which will perform a separate initialization
// for each lane.
- if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
+ if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
// Firstly, try to materialize the splat constant.
SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
Val = ConstantBuildVector(Vec, DAG);
@@ -8615,6 +9748,22 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return shuffle;
}
+ if (PreferDUPAndInsert) {
+ // First, build a constant vector with the common element.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0; I < NumElts; ++I)
+ Ops.push_back(Value);
+ SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
+ // Next, insert the elements that do not match the common value.
+ for (unsigned I = 0; I < NumElts; ++I)
+ if (Op.getOperand(I) != Value)
+ NewVector =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
+ Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
+
+ return NewVector;
+ }
+
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
// worse. For a vector with one or two non-undef values, that's
@@ -8663,6 +9812,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
+SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getValueType().isScalableVector() &&
+ isTypeLegal(Op.getValueType()) &&
+ "Expected legal scalable vector type!");
+
+ if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
+ return Op;
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
@@ -8758,7 +9919,8 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
- if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64)
+ if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
+ InVT.getSizeInBits() == 128)
return Op;
return SDValue();
@@ -8772,9 +9934,34 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
EVT InVT = Op.getOperand(1).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- // We don't have any patterns for scalable vector yet.
- if (InVT.isScalableVector() || !useSVEForFixedLengthVectorVT(InVT))
+ if (InVT.isScalableVector()) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ if (!isTypeLegal(VT) || !VT.isInteger())
+ return SDValue();
+
+ SDValue Vec0 = Op.getOperand(0);
+ SDValue Vec1 = Op.getOperand(1);
+
+ // Ensure the subvector is half the size of the main vector.
+ if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
+ return SDValue();
+
+ // Extend elements of smaller vector...
+ EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
+ SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
+
+ if (Idx == 0) {
+ SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
+ return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
+ } else if (Idx == InVT.getVectorMinNumElements()) {
+ SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
+ return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
+ }
+
return SDValue();
+ }
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
@@ -8783,6 +9970,42 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
return SDValue();
}
+SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
+ return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
+
+ assert(VT.isScalableVector() && "Expected a scalable vector.");
+
+ bool Signed = Op.getOpcode() == ISD::SDIV;
+ unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
+
+ if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
+ return LowerToPredicatedOp(Op, DAG, PredOpcode);
+
+ // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
+ // operations, and truncate the result.
+ EVT WidenedVT;
+ if (VT == MVT::nxv16i8)
+ WidenedVT = MVT::nxv8i16;
+ else if (VT == MVT::nxv8i16)
+ WidenedVT = MVT::nxv4i32;
+ else
+ llvm_unreachable("Unexpected Custom DIV operation");
+
+ SDLoc dl(Op);
+ unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
+ unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
+ SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
+ SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
+ SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
+ SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
+ SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
+ SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
+ return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
+}
+
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Currently no fixed length shuffles that require SVE are legal.
if (useSVEForFixedLengthVectorVT(VT))
@@ -8867,14 +10090,6 @@ static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
}
-// Attempt to form urhadd(OpA, OpB) from
-// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1)).
-// The original form of this expression is
-// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and before this function
-// is called the srl will have been lowered to AArch64ISD::VLSHR and the
-// ((OpA + OpB + 1) >> 1) expression will have been changed to (OpB - (~OpA)).
-// This pass can also recognize a variant of this pattern that uses sign
-// extension instead of zero extension and form a srhadd(OpA, OpB) from it.
SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -8890,66 +10105,12 @@ SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
}
if (!VT.isVector() || VT.isScalableVector())
- return Op;
+ return SDValue();
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
- // Since we are looking for a right shift by a constant value of 1 and we are
- // operating on types at least 16 bits in length (sign/zero extended OpA and
- // OpB, which are at least 8 bits), it follows that the truncate will always
- // discard the shifted-in bit and therefore the right shift will be logical
- // regardless of the signedness of OpA and OpB.
- SDValue Shift = Op.getOperand(0);
- if (Shift.getOpcode() != AArch64ISD::VLSHR)
- return Op;
-
- // Is the right shift using an immediate value of 1?
- uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
- if (ShiftAmount != 1)
- return Op;
-
- SDValue Sub = Shift->getOperand(0);
- if (Sub.getOpcode() != ISD::SUB)
- return Op;
-
- SDValue Xor = Sub.getOperand(1);
- if (Xor.getOpcode() != ISD::XOR)
- return Op;
-
- SDValue ExtendOpA = Xor.getOperand(0);
- SDValue ExtendOpB = Sub.getOperand(0);
- unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
- unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
- if (!(ExtendOpAOpc == ExtendOpBOpc &&
- (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
- return Op;
-
- // Is the result of the right shift being truncated to the same value type as
- // the original operands, OpA and OpB?
- SDValue OpA = ExtendOpA.getOperand(0);
- SDValue OpB = ExtendOpB.getOperand(0);
- EVT OpAVT = OpA.getValueType();
- assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
- if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
- return Op;
-
- // Is the XOR using a constant amount of all ones in the right hand side?
- uint64_t C;
- if (!isAllConstantBuildVector(Xor.getOperand(1), C))
- return Op;
-
- unsigned ElemSizeInBits = VT.getScalarSizeInBits();
- APInt CAsAPInt(ElemSizeInBits, C);
- if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
- return Op;
-
- SDLoc DL(Op);
- bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
- unsigned RHADDOpc = IsSignExtend ? AArch64ISD::SRHADD : AArch64ISD::URHADD;
- SDValue ResultURHADD = DAG.getNode(RHADDOpc, DL, VT, OpA, OpB);
-
- return ResultURHADD;
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
@@ -8967,8 +10128,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
llvm_unreachable("unexpected shift opcode");
case ISD::SHL:
- if (VT.isScalableVector())
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_MERGE_OP1);
+ if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
@@ -8979,9 +10140,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
- if (VT.isScalableVector()) {
- unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_MERGE_OP1
- : AArch64ISD::SRL_MERGE_OP1;
+ if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
+ unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
+ : AArch64ISD::SRL_PRED;
return LowerToPredicatedOp(Op, DAG, Opc);
}
@@ -9033,7 +10194,7 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
else
Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
- return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq);
+ return DAG.getNOT(dl, Fcmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
@@ -9072,7 +10233,7 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
else
Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
- return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq);
+ return DAG.getNOT(dl, Cmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
@@ -9113,6 +10274,9 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
}
+ if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
+ return LowerFixedLengthVectorSetccToSVE(Op, DAG);
+
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -9185,6 +10349,51 @@ static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
+ SDValue Src = Op.getOperand(0);
+
+ // Try to lower fixed length reductions to SVE.
+ EVT SrcVT = Src.getValueType();
+ bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
+ Op.getOpcode() == ISD::VECREDUCE_OR ||
+ Op.getOpcode() == ISD::VECREDUCE_XOR ||
+ Op.getOpcode() == ISD::VECREDUCE_FADD ||
+ (Op.getOpcode() != ISD::VECREDUCE_ADD &&
+ SrcVT.getVectorElementType() == MVT::i64);
+ if (SrcVT.isScalableVector() ||
+ useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
+
+ if (SrcVT.getVectorElementType() == MVT::i1)
+ return LowerPredReductionToSVE(Op, DAG);
+
+ switch (Op.getOpcode()) {
+ case ISD::VECREDUCE_ADD:
+ return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
+ case ISD::VECREDUCE_AND:
+ return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
+ case ISD::VECREDUCE_OR:
+ return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
+ case ISD::VECREDUCE_SMAX:
+ return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
+ case ISD::VECREDUCE_SMIN:
+ return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
+ case ISD::VECREDUCE_UMAX:
+ return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
+ case ISD::VECREDUCE_UMIN:
+ return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
+ case ISD::VECREDUCE_XOR:
+ return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
+ case ISD::VECREDUCE_FADD:
+ return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
+ case ISD::VECREDUCE_FMAX:
+ return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
+ case ISD::VECREDUCE_FMIN:
+ return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
+ default:
+ llvm_unreachable("Unhandled fixed length reduction");
+ }
+ }
+
+ // Lower NEON reductions.
SDLoc dl(Op);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:
@@ -9198,18 +10407,16 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
case ISD::VECREDUCE_UMIN:
return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
case ISD::VECREDUCE_FMAX: {
- assert(Op->getFlags().hasNoNaNs() && "fmax vector reduction needs NoNaN flag");
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
- Op.getOperand(0));
+ Src);
}
case ISD::VECREDUCE_FMIN: {
- assert(Op->getFlags().hasNoNaNs() && "fmin vector reduction needs NoNaN flag");
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
- Op.getOperand(0));
+ Src);
}
default:
llvm_unreachable("Unhandled reduction");
@@ -9219,7 +10426,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
- if (!Subtarget.hasLSE())
+ if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-add instruction, but not a load-sub.
@@ -9236,7 +10443,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
- if (!Subtarget.hasLSE())
+ if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-clear instruction, but not a load-and.
@@ -9337,16 +10544,17 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
template <unsigned NumVecs>
-static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info,
- const CallInst &CI) {
+static bool
+setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
+ AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
Info.opc = ISD::INTRINSIC_VOID;
// Retrieve EC from first vector argument.
- const EVT VT = EVT::getEVT(CI.getArgOperand(0)->getType());
+ const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
ElementCount EC = VT.getVectorElementCount();
#ifndef NDEBUG
// Check the assumption that all input vectors are the same type.
for (unsigned I = 0; I < NumVecs; ++I)
- assert(VT == EVT::getEVT(CI.getArgOperand(I)->getType()) &&
+ assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
"Invalid type.");
#endif
// memVT is `NumVecs * VT`.
@@ -9369,11 +10577,11 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
case Intrinsic::aarch64_sve_st2:
- return setInfoSVEStN<2>(Info, I);
+ return setInfoSVEStN<2>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st3:
- return setInfoSVEStN<3>(Info, I);
+ return setInfoSVEStN<3>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st4:
- return setInfoSVEStN<4>(Info, I);
+ return setInfoSVEStN<4>(*this, DL, Info, I);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
@@ -9529,15 +10737,15 @@ bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
- unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
- unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
+ uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
return NumBits1 > NumBits2;
}
bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
- unsigned NumBits1 = VT1.getSizeInBits();
- unsigned NumBits2 = VT2.getSizeInBits();
+ uint64_t NumBits1 = VT1.getFixedSizeInBits();
+ uint64_t NumBits2 = VT2.getFixedSizeInBits();
return NumBits1 > NumBits2;
}
@@ -9779,6 +10987,43 @@ bool AArch64TargetLowering::shouldSinkOperands(
return true;
}
+ case Instruction::Mul: {
+ bool IsProfitable = false;
+ for (auto &Op : I->operands()) {
+ // Make sure we are not already sinking this operand
+ if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
+ continue;
+
+ ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
+ if (!Shuffle || !Shuffle->isZeroEltSplat())
+ continue;
+
+ Value *ShuffleOperand = Shuffle->getOperand(0);
+ InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
+ if (!Insert)
+ continue;
+
+ Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
+ if (!OperandInstr)
+ continue;
+
+ ConstantInt *ElementConstant =
+ dyn_cast<ConstantInt>(Insert->getOperand(2));
+ // Check that the insertelement is inserting into element 0
+ if (!ElementConstant || ElementConstant->getZExtValue() != 0)
+ continue;
+
+ unsigned Opcode = OperandInstr->getOpcode();
+ if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
+ continue;
+
+ Ops.push_back(&Shuffle->getOperandUse(0));
+ Ops.push_back(&Op);
+ IsProfitable = true;
+ }
+
+ return IsProfitable;
+ }
default:
return false;
}
@@ -10114,11 +11359,12 @@ SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
{Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
- assert(VT.getVectorElementCount().Min % N == 0 &&
+ assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
"invalid tuple vector type!");
- EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- VT.getVectorElementCount() / N);
+ EVT SplitVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorElementCount().divideCoefficientBy(N));
assert(isTypeLegal(SplitVT));
SmallVector<EVT, 5> VTs(N, SplitVT);
@@ -10409,32 +11655,77 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
}
-// Generate SUBS and CSEL for integer abs.
-static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
+// VECREDUCE_ADD( EXTEND(v16i8_type) ) to
+// VECREDUCE_ADD( DOTv16i8(v16i8_type) )
+static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *ST) {
+ SDValue Op0 = N->getOperand(0);
+ if (!ST->hasDotProd() || N->getValueType(0) != MVT::i32)
+ return SDValue();
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDLoc DL(N);
+ if (Op0.getValueType().getVectorElementType() != MVT::i32)
+ return SDValue();
- // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
- // and change it to SUB and CSEL.
- if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
- N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
- N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
- if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
- if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
- // Generate SUBS & CSEL.
- SDValue Cmp =
- DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
- N0.getOperand(0), DAG.getConstant(0, DL, VT));
- return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
- DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
- SDValue(Cmp.getNode(), 1));
- }
- return SDValue();
+ unsigned ExtOpcode = Op0.getOpcode();
+ if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
+ return SDValue();
+
+ EVT Op0VT = Op0.getOperand(0).getValueType();
+ if (Op0VT != MVT::v16i8)
+ return SDValue();
+
+ SDLoc DL(Op0);
+ SDValue Ones = DAG.getConstant(1, DL, Op0VT);
+ SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32);
+ auto DotIntrisic = (ExtOpcode == ISD::ZERO_EXTEND)
+ ? Intrinsic::aarch64_neon_udot
+ : Intrinsic::aarch64_neon_sdot;
+ SDValue Dot = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Zeros.getValueType(),
+ DAG.getConstant(DotIntrisic, DL, MVT::i32), Zeros,
+ Ones, Op0.getOperand(0));
+ return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
+}
+
+// Given a ABS node, detect the following pattern:
+// (ABS (SUB (EXTEND a), (EXTEND b))).
+// Generates UABD/SABD instruction.
+static SDValue performABSCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SDValue AbsOp1 = N->getOperand(0);
+ SDValue Op0, Op1;
+
+ if (AbsOp1.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ Op0 = AbsOp1.getOperand(0);
+ Op1 = AbsOp1.getOperand(1);
+
+ unsigned Opc0 = Op0.getOpcode();
+ // Check if the operands of the sub are (zero|sign)-extended.
+ if (Opc0 != Op1.getOpcode() ||
+ (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
+ return SDValue();
+
+ EVT VectorT1 = Op0.getOperand(0).getValueType();
+ EVT VectorT2 = Op1.getOperand(0).getValueType();
+ // Check if vectors are of same type and valid size.
+ uint64_t Size = VectorT1.getFixedSizeInBits();
+ if (VectorT1 != VectorT2 || (Size != 64 && Size != 128))
+ return SDValue();
+
+ // Check if vector element types are valid.
+ EVT VT1 = VectorT1.getVectorElementType();
+ if (VT1 != MVT::i8 && VT1 != MVT::i16 && VT1 != MVT::i32)
+ return SDValue();
+
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.getOperand(0);
+ unsigned ABDOpcode =
+ (Opc0 == ISD::SIGN_EXTEND) ? AArch64ISD::SABD : AArch64ISD::UABD;
+ SDValue ABD =
+ DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
}
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
@@ -10443,10 +11734,7 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
- return Cmp;
-
- return performIntegerAbsCombine(N, DAG);
+ return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
}
SDValue
@@ -10505,9 +11793,157 @@ static bool IsSVECntIntrinsic(SDValue S) {
return false;
}
+/// Calculates what the pre-extend type is, based on the extension
+/// operation node provided by \p Extend.
+///
+/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
+/// pre-extend type is pulled directly from the operand, while other extend
+/// operations need a bit more inspection to get this information.
+///
+/// \param Extend The SDNode from the DAG that represents the extend operation
+/// \param DAG The SelectionDAG hosting the \p Extend node
+///
+/// \returns The type representing the \p Extend source type, or \p MVT::Other
+/// if no valid type can be determined
+static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
+ switch (Extend.getOpcode()) {
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return Extend.getOperand(0).getValueType();
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ case ISD::SIGN_EXTEND_INREG: {
+ VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
+ if (!TypeNode)
+ return MVT::Other;
+ return TypeNode->getVT();
+ }
+ case ISD::AND: {
+ ConstantSDNode *Constant =
+ dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
+ if (!Constant)
+ return MVT::Other;
+
+ uint32_t Mask = Constant->getZExtValue();
+
+ if (Mask == UCHAR_MAX)
+ return MVT::i8;
+ else if (Mask == USHRT_MAX)
+ return MVT::i16;
+ else if (Mask == UINT_MAX)
+ return MVT::i32;
+
+ return MVT::Other;
+ }
+ default:
+ return MVT::Other;
+ }
+
+ llvm_unreachable("Code path unhandled in calculatePreExtendType!");
+}
+
+/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
+/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
+static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
+ SelectionDAG &DAG) {
+
+ ShuffleVectorSDNode *ShuffleNode =
+ dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
+ if (!ShuffleNode)
+ return SDValue();
+
+ // Ensuring the mask is zero before continuing
+ if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
+ return SDValue();
+
+ SDValue InsertVectorElt = VectorShuffle.getOperand(0);
+
+ if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ return SDValue();
+
+ SDValue InsertLane = InsertVectorElt.getOperand(2);
+ ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
+ // Ensures the insert is inserting into lane 0
+ if (!Constant || Constant->getZExtValue() != 0)
+ return SDValue();
+
+ SDValue Extend = InsertVectorElt.getOperand(1);
+ unsigned ExtendOpcode = Extend.getOpcode();
+
+ bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
+ ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
+ ExtendOpcode == ISD::AssertSext;
+ if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
+ ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
+ return SDValue();
+
+ EVT TargetType = VectorShuffle.getValueType();
+ EVT PreExtendType = calculatePreExtendType(Extend, DAG);
+
+ if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
+ TargetType != MVT::v2i64) ||
+ (PreExtendType == MVT::Other))
+ return SDValue();
+
+ // Restrict valid pre-extend data type
+ if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
+ PreExtendType != MVT::i32)
+ return SDValue();
+
+ EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
+
+ if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
+ return SDValue();
+
+ if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
+ return SDValue();
+
+ SDLoc DL(VectorShuffle);
+
+ SDValue InsertVectorNode = DAG.getNode(
+ InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
+ DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
+ DAG.getConstant(0, DL, MVT::i64));
+
+ std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
+
+ SDValue VectorShuffleNode =
+ DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
+ DAG.getUNDEF(PreExtendVT), ShuffleMask);
+
+ SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ DL, TargetType, VectorShuffleNode);
+
+ return ExtendNode;
+}
+
+/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
+/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
+static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
+ // If the value type isn't a vector, none of the operands are going to be dups
+ if (!Mul->getValueType(0).isVector())
+ return SDValue();
+
+ SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
+ SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
+
+ // Neither operands have been changed, don't make any further changes
+ if (!Op0 && !Op1)
+ return SDValue();
+
+ SDLoc DL(Mul);
+ return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
+ Op0 ? Op0 : Mul->getOperand(0),
+ Op1 ? Op1 : Mul->getOperand(1));
+}
+
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
+
+ if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
+ return Ext;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -11042,6 +12478,9 @@ static SDValue performSVEAndCombine(SDNode *N,
return DAG.getNode(Opc, DL, N->getValueType(0), And);
}
+ if (!EnableCombineMGatherIntrinsics)
+ return SDValue();
+
SDValue Mask = N->getOperand(1);
if (!Src.hasOneUse())
@@ -11095,6 +12534,11 @@ static SDValue performANDCombine(SDNode *N,
if (VT.isScalableVector())
return performSVEAndCombine(N, DCI);
+ // The combining code below works only for NEON vectors. In particular, it
+ // does not work for SVE when dealing with vectors wider than 128 bits.
+ if (!(VT.is64BitVector() || VT.is128BitVector()))
+ return SDValue();
+
BuildVectorSDNode *BVN =
dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
if (!BVN)
@@ -11155,6 +12599,143 @@ static SDValue performSRLCombine(SDNode *N,
return SDValue();
}
+// Attempt to form urhadd(OpA, OpB) from
+// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
+// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
+// The original form of the first expression is
+// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
+// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
+// Before this function is called the srl will have been lowered to
+// AArch64ISD::VLSHR.
+// This pass can also recognize signed variants of the patterns that use sign
+// extension instead of zero extension and form a srhadd(OpA, OpB) or a
+// shadd(OpA, OpB) from them.
+static SDValue
+performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Since we are looking for a right shift by a constant value of 1 and we are
+ // operating on types at least 16 bits in length (sign/zero extended OpA and
+ // OpB, which are at least 8 bits), it follows that the truncate will always
+ // discard the shifted-in bit and therefore the right shift will be logical
+ // regardless of the signedness of OpA and OpB.
+ SDValue Shift = N->getOperand(0);
+ if (Shift.getOpcode() != AArch64ISD::VLSHR)
+ return SDValue();
+
+ // Is the right shift using an immediate value of 1?
+ uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
+ if (ShiftAmount != 1)
+ return SDValue();
+
+ SDValue ExtendOpA, ExtendOpB;
+ SDValue ShiftOp0 = Shift.getOperand(0);
+ unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
+ if (ShiftOp0Opc == ISD::SUB) {
+
+ SDValue Xor = ShiftOp0.getOperand(1);
+ if (Xor.getOpcode() != ISD::XOR)
+ return SDValue();
+
+ // Is the XOR using a constant amount of all ones in the right hand side?
+ uint64_t C;
+ if (!isAllConstantBuildVector(Xor.getOperand(1), C))
+ return SDValue();
+
+ unsigned ElemSizeInBits = VT.getScalarSizeInBits();
+ APInt CAsAPInt(ElemSizeInBits, C);
+ if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
+ return SDValue();
+
+ ExtendOpA = Xor.getOperand(0);
+ ExtendOpB = ShiftOp0.getOperand(0);
+ } else if (ShiftOp0Opc == ISD::ADD) {
+ ExtendOpA = ShiftOp0.getOperand(0);
+ ExtendOpB = ShiftOp0.getOperand(1);
+ } else
+ return SDValue();
+
+ unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
+ unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
+ if (!(ExtendOpAOpc == ExtendOpBOpc &&
+ (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
+ return SDValue();
+
+ // Is the result of the right shift being truncated to the same value type as
+ // the original operands, OpA and OpB?
+ SDValue OpA = ExtendOpA.getOperand(0);
+ SDValue OpB = ExtendOpB.getOperand(0);
+ EVT OpAVT = OpA.getValueType();
+ assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
+ if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
+ return SDValue();
+
+ SDLoc DL(N);
+ bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
+ bool IsRHADD = ShiftOp0Opc == ISD::SUB;
+ unsigned HADDOpc = IsSignExtend
+ ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
+ : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
+ SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
+
+ return ResultHADD;
+}
+
+static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
+ switch (Opcode) {
+ case ISD::FADD:
+ return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
+ case ISD::ADD:
+ return VT == MVT::i64;
+ default:
+ return false;
+ }
+}
+
+static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
+
+ EVT VT = N->getValueType(0);
+ const bool FullFP16 =
+ static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+
+ // Rewrite for pairwise fadd pattern
+ // (f32 (extract_vector_elt
+ // (fadd (vXf32 Other)
+ // (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
+ // ->
+ // (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
+ // (extract_vector_elt (vXf32 Other) 1))
+ if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
+ hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
+ SDLoc DL(N0);
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+
+ ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
+ SDValue Other = N00;
+
+ // And handle the commutative case.
+ if (!Shuffle) {
+ Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
+ Other = N01;
+ }
+
+ if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
+ Other == Shuffle->getOperand(0)) {
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
+ DAG.getConstant(0, DL, MVT::i64)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
+ DAG.getConstant(1, DL, MVT::i64)));
+ }
+ }
+
+ return SDValue();
+}
+
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -11200,9 +12781,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- // Optimise concat_vectors of two [us]rhadds that use extracted subvectors
- // from the same original vectors. Combine these into a single [us]rhadd that
- // operates on the two original vectors. Example:
+ // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
+ // subvectors from the same original vectors. Combine these into a single
+ // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
// (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
// extract_subvector (v16i8 OpB,
// <0>))),
@@ -11212,7 +12793,8 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// ->
// (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
- (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD)) {
+ (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
+ N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
SDValue N10 = N1->getOperand(0);
@@ -11517,6 +13099,43 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
}
+// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
+static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ // Only scalar integer and vector types.
+ if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
+ return SDValue();
+
+ auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
+ if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
+ return SDValue();
+
+ SDValue Op1 = LHS->getOperand(0);
+ SDValue Op2 = RHS->getOperand(0);
+ EVT OpVT1 = Op1.getValueType();
+ EVT OpVT2 = Op2.getValueType();
+ if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
+ Op2.getOpcode() != AArch64ISD::UADDV ||
+ OpVT1.getVectorElementType() != VT)
+ return SDValue();
+
+ SDValue Val1 = Op1.getOperand(0);
+ SDValue Val2 = Op2.getOperand(0);
+ EVT ValVT = Val1->getValueType(0);
+ SDLoc DL(N);
+ SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
+ DAG.getConstant(0, DL, MVT::i64));
+}
+
// The basic add/sub long vector instructions have variants with "2" on the end
// which act on the high-half of their inputs. They are normally matched by
// patterns like:
@@ -11570,6 +13189,16 @@ static SDValue performAddSubLongCombine(SDNode *N,
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
}
+static SDValue performAddSubCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ // Try to change sum of two reductions.
+ if (SDValue Val = performUADDVCombine(N, DAG))
+ return Val;
+
+ return performAddSubLongCombine(N, DCI, DAG);
+}
+
// Massage DAGs which we can use the high-half "long" operations on into
// something isel will recognize better. E.g.
//
@@ -11583,8 +13212,8 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
+ SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
+ SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
@@ -11602,6 +13231,9 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
return SDValue();
}
+ if (IID == Intrinsic::not_intrinsic)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
+
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
N->getOperand(0), LHS, RHS);
}
@@ -11700,34 +13332,6 @@ static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
DAG.getConstant(0, dl, MVT::i64));
}
-static SDValue LowerSVEIntReduction(SDNode *N, unsigned Opc,
- SelectionDAG &DAG) {
- SDLoc dl(N);
- LLVMContext &Ctx = *DAG.getContext();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
- EVT VT = N->getValueType(0);
- SDValue Pred = N->getOperand(1);
- SDValue Data = N->getOperand(2);
- EVT DataVT = Data.getValueType();
-
- if (DataVT.getVectorElementType().isScalarInteger() &&
- (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)) {
- if (!TLI.isTypeLegal(DataVT))
- return SDValue();
-
- EVT OutputVT = EVT::getVectorVT(Ctx, VT,
- AArch64::NeonBitsPerVector / VT.getSizeInBits());
- SDValue Reduce = DAG.getNode(Opc, dl, OutputVT, Pred, Data);
- SDValue Zero = DAG.getConstant(0, dl, MVT::i64);
- SDValue Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Reduce, Zero);
-
- return Result;
- }
-
- return SDValue();
-}
-
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op1 = N->getOperand(1);
@@ -11770,7 +13374,8 @@ static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
- EVT ByteVT = EVT::getVectorVT(Ctx, MVT::i8, { ByteSize, true });
+ EVT ByteVT =
+ EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
// Convert everything to the domain of EXT (i.e bytes).
SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
@@ -11870,6 +13475,25 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
return DAG.getZExtOrTrunc(Res, DL, VT);
}
+static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ SDValue Pred = N->getOperand(1);
+ SDValue VecToReduce = N->getOperand(2);
+
+ // NOTE: The integer reduction's result type is not always linked to the
+ // operand's element type so we construct it from the intrinsic's result type.
+ EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
+ SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
+
+ // SVE reductions set the whole vector register with the first element
+ // containing the reduction result, which we'll now extract.
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
+ Zero);
+}
+
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
@@ -11910,6 +13534,25 @@ static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
Zero);
}
+// If a merged operation has no inactive lanes we can relax it to a predicated
+// or unpredicated operation, which potentially allows better isel (perhaps
+// using immediate forms) or relaxing register reuse requirements.
+static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc,
+ SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
+ assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
+ SDValue Pg = N->getOperand(1);
+
+ // ISD way to specify an all active predicate.
+ if ((Pg.getOpcode() == AArch64ISD::PTRUE) &&
+ (Pg.getConstantOperandVal(0) == AArch64SVEPredPattern::all))
+ return DAG.getNode(PredOpc, SDLoc(N), N->getValueType(0), Pg,
+ N->getOperand(2), N->getOperand(3));
+
+ // FUTURE: SplatVector(true)
+ return SDValue();
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -11964,20 +13607,28 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_crc32h:
case Intrinsic::aarch64_crc32ch:
return tryCombineCRC32(0xffff, N, DAG);
+ case Intrinsic::aarch64_sve_saddv:
+ // There is no i64 version of SADDV because the sign is irrelevant.
+ if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
+ return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
+ else
+ return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
+ case Intrinsic::aarch64_sve_uaddv:
+ return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
case Intrinsic::aarch64_sve_smaxv:
- return LowerSVEIntReduction(N, AArch64ISD::SMAXV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_umaxv:
- return LowerSVEIntReduction(N, AArch64ISD::UMAXV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_sminv:
- return LowerSVEIntReduction(N, AArch64ISD::SMINV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
case Intrinsic::aarch64_sve_uminv:
- return LowerSVEIntReduction(N, AArch64ISD::UMINV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
case Intrinsic::aarch64_sve_orv:
- return LowerSVEIntReduction(N, AArch64ISD::ORV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
case Intrinsic::aarch64_sve_eorv:
- return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
case Intrinsic::aarch64_sve_andv:
- return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
case Intrinsic::aarch64_sve_index:
return LowerSVEIntrinsicIndex(N, DAG);
case Intrinsic::aarch64_sve_dup:
@@ -11988,26 +13639,19 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);
case Intrinsic::aarch64_sve_smin:
- return DAG.getNode(AArch64ISD::SMIN_MERGE_OP1, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
case Intrinsic::aarch64_sve_umin:
- return DAG.getNode(AArch64ISD::UMIN_MERGE_OP1, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
case Intrinsic::aarch64_sve_smax:
- return DAG.getNode(AArch64ISD::SMAX_MERGE_OP1, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
case Intrinsic::aarch64_sve_umax:
- return DAG.getNode(AArch64ISD::UMAX_MERGE_OP1, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
case Intrinsic::aarch64_sve_lsl:
- return DAG.getNode(AArch64ISD::SHL_MERGE_OP1, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
case Intrinsic::aarch64_sve_lsr:
- return DAG.getNode(AArch64ISD::SRL_MERGE_OP1, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
case Intrinsic::aarch64_sve_asr:
- return DAG.getNode(AArch64ISD::SRA_MERGE_OP1, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
@@ -12100,18 +13744,15 @@ static SDValue performExtendCombine(SDNode *N,
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
- N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ (N->getOperand(0).getOpcode() == AArch64ISD::UABD ||
+ N->getOperand(0).getOpcode() == AArch64ISD::SABD)) {
SDNode *ABDNode = N->getOperand(0).getNode();
- unsigned IID = getIntrinsicID(ABDNode);
- if (IID == Intrinsic::aarch64_neon_sabd ||
- IID == Intrinsic::aarch64_neon_uabd) {
- SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
- if (!NewABD.getNode())
- return SDValue();
+ SDValue NewABD =
+ tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
+ if (!NewABD.getNode())
+ return SDValue();
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
- NewABD);
- }
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
}
// This is effectively a custom type legalization for AArch64.
@@ -12594,6 +14235,31 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
S->getMemOperand()->getFlags());
}
+static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT ResVT = N->getValueType(0);
+
+ // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
+ if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
+ if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
+ SDValue X = Op0.getOperand(0).getOperand(0);
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
+ }
+ }
+
+ // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
+ if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
+ if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
+ SDValue Z = Op1.getOperand(0).getOperand(1);
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
+ }
+ }
+
+ return SDValue();
+}
+
/// Target-specific DAG combine function for post-increment LD1 (lane) and
/// post-increment LD1R.
static SDValue performPostLD1Combine(SDNode *N,
@@ -12732,6 +14398,54 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
}
+static SDValue performMaskedGatherScatterCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N);
+ assert(MGS && "Can only combine gather load or scatter store nodes");
+
+ SDLoc DL(MGS);
+ SDValue Chain = MGS->getChain();
+ SDValue Scale = MGS->getScale();
+ SDValue Index = MGS->getIndex();
+ SDValue Mask = MGS->getMask();
+ SDValue BasePtr = MGS->getBasePtr();
+ ISD::MemIndexType IndexType = MGS->getIndexType();
+
+ EVT IdxVT = Index.getValueType();
+
+ if (DCI.isBeforeLegalize()) {
+ // SVE gather/scatter requires indices of i32/i64. Promote anything smaller
+ // prior to legalisation so the result can be split if required.
+ if ((IdxVT.getVectorElementType() == MVT::i8) ||
+ (IdxVT.getVectorElementType() == MVT::i16)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(MVT::i32);
+ if (MGS->isIndexSigned())
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ else
+ Index = DAG.getNode(ISD::ZERO_EXTEND, DL, NewIdxVT, Index);
+
+ if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
+ SDValue PassThru = MGT->getPassThru();
+ SDValue Ops[] = { Chain, PassThru, Mask, BasePtr, Index, Scale };
+ return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
+ PassThru.getValueType(), DL, Ops,
+ MGT->getMemOperand(),
+ MGT->getIndexType(), MGT->getExtensionType());
+ } else {
+ auto *MSC = cast<MaskedScatterSDNode>(MGS);
+ SDValue Data = MSC->getValue();
+ SDValue Ops[] = { Chain, Data, Mask, BasePtr, Index, Scale };
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+ MSC->getMemoryVT(), DL, Ops,
+ MSC->getMemOperand(), IndexType,
+ MSC->isTruncatingStore());
+ }
+ }
+ }
+
+ return SDValue();
+}
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
@@ -13703,9 +15417,6 @@ static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
static SDValue
performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
SDLoc DL(N);
SDValue Src = N->getOperand(0);
unsigned Opc = Src->getOpcode();
@@ -13732,9 +15443,7 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
"Sign extending from an invalid type");
- EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
- VT.getVectorElementType(),
- VT.getVectorElementCount() * 2);
+ EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
ExtOp, DAG.getValueType(ExtVT));
@@ -13742,6 +15451,12 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
}
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (!EnableCombineMGatherIntrinsics)
+ return SDValue();
+
// SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
// for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
unsigned NewOpc;
@@ -13881,9 +15596,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;
+ case ISD::ABS:
+ return performABSCombine(N, DAG, DCI, Subtarget);
case ISD::ADD:
case ISD::SUB:
- return performAddSubLongCombine(N, DCI, DAG);
+ return performAddSubCombine(N, DCI, DAG);
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
@@ -13910,6 +15627,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performExtendCombine(N, DCI, DAG);
case ISD::SIGN_EXTEND_INREG:
return performSignExtendInRegCombine(N, DCI, DAG);
+ case ISD::TRUNCATE:
+ return performVectorTruncateCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
case ISD::SELECT:
@@ -13922,6 +15641,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
+ case ISD::MGATHER:
+ case ISD::MSCATTER:
+ return performMaskedGatherScatterCombine(N, DCI, DAG);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
case AArch64ISD::TBNZ:
@@ -13933,8 +15655,14 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performPostLD1Combine(N, DCI, false);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
+ case AArch64ISD::UZP1:
+ return performUzpCombine(N, DAG);
case ISD::INSERT_VECTOR_ELT:
return performPostLD1Combine(N, DCI, true);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return performExtractVectorEltCombine(N, DAG);
+ case ISD::VECREDUCE_ADD:
+ return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -14083,10 +15811,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
EVT ResVT = N->getValueType(0);
- uint64_t NumLanes = ResVT.getVectorElementCount().Min;
+ uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
+ SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
SDValue Val =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1,
- DAG.getConstant(IdxConst * NumLanes, DL, MVT::i32));
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
return DAG.getMergeValues({Val, Chain}, DL);
}
case Intrinsic::aarch64_sve_tuple_set: {
@@ -14097,10 +15825,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
SDValue Vec = N->getOperand(4);
EVT TupleVT = Tuple.getValueType();
- uint64_t TupleLanes = TupleVT.getVectorElementCount().Min;
+ uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
- uint64_t NumLanes = Vec.getValueType().getVectorElementCount().Min;
+ uint64_t NumLanes =
+ Vec.getValueType().getVectorElementCount().getKnownMinValue();
if ((TupleLanes % NumLanes) != 0)
report_fatal_error("invalid tuple vector!");
@@ -14112,9 +15841,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
if (I == IdxConst)
Opnds.push_back(Vec);
else {
- Opnds.push_back(
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Vec.getValueType(), Tuple,
- DAG.getConstant(I * NumLanes, DL, MVT::i32)));
+ SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
+ Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ Vec.getValueType(), Tuple, ExtIdx));
}
}
SDValue Concat =
@@ -14336,7 +16065,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
ElementCount ResEC = VT.getVectorElementCount();
- if (InVT.getVectorElementCount().Min != (ResEC.Min * 2))
+ if (InVT.getVectorElementCount() != (ResEC * 2))
return;
auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
@@ -14344,7 +16073,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
return;
unsigned Index = CIndex->getZExtValue();
- if ((Index != 0) && (Index != ResEC.Min))
+ if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
return;
unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
@@ -14379,7 +16108,7 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
assert(N->getValueType(0) == MVT::i128 &&
"AtomicCmpSwap on types less than 128 should be legal");
- if (Subtarget->hasLSE()) {
+ if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
SDValue Ops[] = {
@@ -14460,7 +16189,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
return;
case ISD::CTPOP:
- Results.push_back(LowerCTPOP(SDValue(N, 0), DAG));
+ if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
+ Results.push_back(Result);
return;
case AArch64ISD::SADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
@@ -14605,17 +16335,44 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
- // Nand not supported in LSE.
- if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC;
- // Leave 128 bits to LLSC.
- return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
+
+ // Nand is not supported in LSE.
+ // Leave 128 bits to LLSC or CmpXChg.
+ if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
+ if (Subtarget->hasLSE())
+ return AtomicExpansionKind::None;
+ if (Subtarget->outlineAtomics()) {
+ // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
+ // Don't outline them unless
+ // (1) high level <atomic> support approved:
+ // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
+ // (2) low level libgcc and compiler-rt support implemented by:
+ // min/max outline atomics helpers
+ if (AI->getOperation() != AtomicRMWInst::Min &&
+ AI->getOperation() != AtomicRMWInst::Max &&
+ AI->getOperation() != AtomicRMWInst::UMin &&
+ AI->getOperation() != AtomicRMWInst::UMax) {
+ return AtomicExpansionKind::None;
+ }
+ }
+ }
+
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement atomicrmw without spilling. If the target address is also on the
+ // stack and close enough to the spill slot, this can lead to a situation
+ // where the monitor always gets cleared and the atomic operation can never
+ // succeed. So at -O0 lower this operation to a CAS loop.
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return AtomicExpansionKind::CmpXChg;
+
+ return AtomicExpansionKind::LLSC;
}
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
// If subtarget has LSE, leave cmpxchg intact for codegen.
- if (Subtarget->hasLSE())
+ if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
@@ -15126,6 +16883,92 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
Store->isTruncatingStore());
}
+SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ bool Signed = Op.getOpcode() == ISD::SDIV;
+ unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
+
+ // Scalable vector i32/i64 DIV is supported.
+ if (EltVT == MVT::i32 || EltVT == MVT::i64)
+ return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
+
+ // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+ EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
+ EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
+
+ // Convert the operands to scalable vectors.
+ SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
+ SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
+
+ // Extend the scalable operands.
+ unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
+ unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
+ SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
+ SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
+ SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
+ SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
+
+ // Convert back to fixed vectors so the DIV can be further lowered.
+ Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
+ Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
+ Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
+ Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
+ SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
+ Op0Lo, Op1Lo);
+ SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
+ Op0Hi, Op1Hi);
+
+ // Convert again to scalable vectors to truncate.
+ ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
+ ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
+ SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
+ ResultLo, ResultHi);
+
+ return convertFromScalableVector(DAG, VT, ScalableResult);
+}
+
+SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
+
+ SDLoc DL(Op);
+ SDValue Val = Op.getOperand(0);
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
+ Val = convertToScalableVector(DAG, ContainerVT, Val);
+
+ bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
+ unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
+
+ // Repeatedly unpack Val until the result is of the desired element type.
+ switch (ContainerVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("unimplemented container type");
+ case MVT::nxv16i8:
+ Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
+ if (VT.getVectorElementType() == MVT::i16)
+ break;
+ LLVM_FALLTHROUGH;
+ case MVT::nxv8i16:
+ Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
+ if (VT.getVectorElementType() == MVT::i32)
+ break;
+ LLVM_FALLTHROUGH;
+ case MVT::nxv4i32:
+ Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
+ assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
+ break;
+ }
+
+ return convertFromScalableVector(DAG, VT, Val);
+}
+
SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -15162,17 +17005,21 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
return convertFromScalableVector(DAG, VT, Val);
}
+// Convert vector operation 'Op' to an equivalent predicated operation whereby
+// the original operation's type is used to construct a suitable predicate.
+// NOTE: The results for inactive lanes are undefined.
SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SelectionDAG &DAG,
- unsigned NewOp) const {
+ unsigned NewOp,
+ bool OverrideNEON) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
auto Pg = getPredicateForVector(DAG, DL, VT);
- if (useSVEForFixedLengthVectorVT(VT)) {
+ if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
- // Create list of operands by convereting existing ones to scalable types.
+ // Create list of operands by converting existing ones to scalable types.
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
if (isa<CondCodeSDNode>(V)) {
@@ -15180,11 +17027,21 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
continue;
}
- assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
+ if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
+ EVT VTArg = VTNode->getVT().getVectorElementType();
+ EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
+ Operands.push_back(DAG.getValueType(NewVTArg));
+ continue;
+ }
+
+ assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
"Only fixed length vectors are supported!");
Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
+ if (isMergePassthruOpcode(NewOp))
+ Operands.push_back(DAG.getUNDEF(ContainerVT));
+
auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
@@ -15193,10 +17050,228 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
- assert((isa<CondCodeSDNode>(V) || V.getValueType().isScalableVector()) &&
+ assert((!V.getValueType().isVector() ||
+ V.getValueType().isScalableVector()) &&
"Only scalable vectors are supported!");
Operands.push_back(V);
}
+ if (isMergePassthruOpcode(NewOp))
+ Operands.push_back(DAG.getUNDEF(VT));
+
return DAG.getNode(NewOp, DL, VT, Operands);
}
+
+// If a fixed length vector operation has no side effects when applied to
+// undefined elements, we can safely use scalable vectors to perform the same
+// operation without needing to worry about predication.
+SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(useSVEForFixedLengthVectorVT(VT) &&
+ "Only expected to lower fixed length vector operation!");
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+
+ // Create list of operands by converting existing ones to scalable types.
+ SmallVector<SDValue, 4> Ops;
+ for (const SDValue &V : Op->op_values()) {
+ assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
+
+ // Pass through non-vector operands.
+ if (!V.getValueType().isVector()) {
+ Ops.push_back(V);
+ continue;
+ }
+
+ // "cast" fixed length vector to a scalable vector.
+ assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
+ "Only fixed length vectors are supported!");
+ Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
+ }
+
+ auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
+ return convertFromScalableVector(DAG, VT, ScalableRes);
+}
+
+SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
+ SelectionDAG &DAG) const {
+ SDLoc DL(ScalarOp);
+ SDValue AccOp = ScalarOp.getOperand(0);
+ SDValue VecOp = ScalarOp.getOperand(1);
+ EVT SrcVT = VecOp.getValueType();
+ EVT ResVT = SrcVT.getVectorElementType();
+
+ EVT ContainerVT = SrcVT;
+ if (SrcVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
+ VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
+ }
+
+ SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+
+ // Convert operands to Scalable.
+ AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), AccOp, Zero);
+
+ // Perform reduction.
+ SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
+ Pg, AccOp, VecOp);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
+}
+
+SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
+ SelectionDAG &DAG) const {
+ SDLoc DL(ReduceOp);
+ SDValue Op = ReduceOp.getOperand(0);
+ EVT OpVT = Op.getValueType();
+ EVT VT = ReduceOp.getValueType();
+
+ if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
+
+ switch (ReduceOp.getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::VECREDUCE_OR:
+ return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
+ case ISD::VECREDUCE_AND: {
+ Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
+ return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
+ }
+ case ISD::VECREDUCE_XOR: {
+ SDValue ID =
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
+ SDValue Cntp =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
+ return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
+ SDValue ScalarOp,
+ SelectionDAG &DAG) const {
+ SDLoc DL(ScalarOp);
+ SDValue VecOp = ScalarOp.getOperand(0);
+ EVT SrcVT = VecOp.getValueType();
+
+ if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
+ VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
+ }
+
+ // UADDV always returns an i64 result.
+ EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
+ SrcVT.getVectorElementType();
+ EVT RdxVT = SrcVT;
+ if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
+ RdxVT = getPackedSVEVectorVT(ResVT);
+
+ SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
+ SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
+ Rdx, DAG.getConstant(0, DL, MVT::i64));
+
+ // The VEC_REDUCE nodes expect an element size result.
+ if (ResVT != ScalarOp.getValueType())
+ Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
+
+ return Res;
+}
+
+SDValue
+AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ EVT InVT = Op.getOperand(1).getValueType();
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+ SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
+ SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
+
+ // Convert the mask to a predicated (NOTE: We don't need to worry about
+ // inactive lanes since VSELECT is safe when given undefined elements).
+ EVT MaskVT = Op.getOperand(0).getValueType();
+ EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
+ auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
+ Mask = DAG.getNode(ISD::TRUNCATE, DL,
+ MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
+
+ auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
+ Mask, Op1, Op2);
+
+ return convertFromScalableVector(DAG, VT, ScalableRes);
+}
+
+SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT InVT = Op.getOperand(0).getValueType();
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+
+ assert(useSVEForFixedLengthVectorVT(InVT) &&
+ "Only expected to lower fixed length vector operation!");
+ assert(Op.getValueType() == InVT.changeTypeToInteger() &&
+ "Expected integer result of the same bit length as the inputs!");
+
+ // Expand floating point vector comparisons.
+ if (InVT.isFloatingPoint())
+ return SDValue();
+
+ auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
+ auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
+ auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
+
+ EVT CmpVT = Pg.getValueType();
+ auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
+ {Pg, Op1, Op2, Op.getOperand(2)});
+
+ EVT PromoteVT = ContainerVT.changeTypeToInteger();
+ auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
+ return convertFromScalableVector(DAG, Op.getValueType(), Promote);
+}
+
+SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT InVT = Op.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ (void)TLI;
+
+ assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
+ InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
+ "Only expect to cast between legal scalable vector types!");
+ assert((VT.getVectorElementType() == MVT::i1) ==
+ (InVT.getVectorElementType() == MVT::i1) &&
+ "Cannot cast between data and predicate scalable vector types!");
+
+ if (InVT == VT)
+ return Op;
+
+ if (VT.getVectorElementType() == MVT::i1)
+ return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
+
+ EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
+ EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
+ assert((VT == PackedVT || InVT == PackedInVT) &&
+ "Cannot cast between unpacked scalable vector types!");
+
+ // Pack input if required.
+ if (InVT != PackedInVT)
+ Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
+
+ Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
+
+ // Unpack result if required.
+ if (VT != PackedVT)
+ Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
+
+ return Op;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 4fe77481706b..9550197159e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -72,19 +72,50 @@ enum NodeType : unsigned {
ADC,
SBC, // adc, sbc instructions
- // Arithmetic instructions
+ // Predicated instructions where inactive lanes produce undefined results.
ADD_PRED,
FADD_PRED,
+ FDIV_PRED,
+ FMA_PRED,
+ FMAXNM_PRED,
+ FMINNM_PRED,
+ FMUL_PRED,
+ FSUB_PRED,
+ MUL_PRED,
SDIV_PRED,
+ SHL_PRED,
+ SMAX_PRED,
+ SMIN_PRED,
+ SRA_PRED,
+ SRL_PRED,
+ SUB_PRED,
UDIV_PRED,
- FMA_PRED,
- SMIN_MERGE_OP1,
- UMIN_MERGE_OP1,
- SMAX_MERGE_OP1,
- UMAX_MERGE_OP1,
- SHL_MERGE_OP1,
- SRL_MERGE_OP1,
- SRA_MERGE_OP1,
+ UMAX_PRED,
+ UMIN_PRED,
+
+ // Predicated instructions with the result of inactive lanes provided by the
+ // last operand.
+ FABS_MERGE_PASSTHRU,
+ FCEIL_MERGE_PASSTHRU,
+ FFLOOR_MERGE_PASSTHRU,
+ FNEARBYINT_MERGE_PASSTHRU,
+ FNEG_MERGE_PASSTHRU,
+ FRECPX_MERGE_PASSTHRU,
+ FRINT_MERGE_PASSTHRU,
+ FROUND_MERGE_PASSTHRU,
+ FROUNDEVEN_MERGE_PASSTHRU,
+ FSQRT_MERGE_PASSTHRU,
+ FTRUNC_MERGE_PASSTHRU,
+ FP_ROUND_MERGE_PASSTHRU,
+ FP_EXTEND_MERGE_PASSTHRU,
+ UINT_TO_FP_MERGE_PASSTHRU,
+ SINT_TO_FP_MERGE_PASSTHRU,
+ FCVTZU_MERGE_PASSTHRU,
+ FCVTZS_MERGE_PASSTHRU,
+ SIGN_EXTEND_INREG_MERGE_PASSTHRU,
+ ZERO_EXTEND_INREG_MERGE_PASSTHRU,
+ ABS_MERGE_PASSTHRU,
+ NEG_MERGE_PASSTHRU,
SETCC_MERGE_ZERO,
@@ -188,10 +219,18 @@ enum NodeType : unsigned {
SADDV,
UADDV,
+ // Vector halving addition
+ SHADD,
+ UHADD,
+
// Vector rounding halving addition
SRHADD,
URHADD,
+ // Absolute difference
+ UABD,
+ SABD,
+
// Vector across-lanes min/max
// Only the lower result lane is defined.
SMINV,
@@ -199,6 +238,8 @@ enum NodeType : unsigned {
SMAXV,
UMAXV,
+ SADDV_PRED,
+ UADDV_PRED,
SMAXV_PRED,
UMAXV_PRED,
SMINV_PRED,
@@ -207,9 +248,6 @@ enum NodeType : unsigned {
EORV_PRED,
ANDV_PRED,
- // Vector bitwise negation
- NOT,
-
// Vector bitwise insertion
BIT,
@@ -269,9 +307,14 @@ enum NodeType : unsigned {
PTEST,
PTRUE,
+ BITREVERSE_MERGE_PASSTHRU,
+ BSWAP_MERGE_PASSTHRU,
+ CTLZ_MERGE_PASSTHRU,
+ CTPOP_MERGE_PASSTHRU,
DUP_MERGE_PASSTHRU,
INDEX_VECTOR,
+ // Cast between vectors of the same element type but differ in length.
REINTERPRET_CAST,
LD1_MERGE_ZERO,
@@ -381,7 +424,11 @@ enum NodeType : unsigned {
LDP,
STP,
- STNP
+ STNP,
+
+ // Pseudo for a OBJC call that gets emitted together with a special `mov
+ // x29, x29` marker instruction.
+ CALL_RVMARKER
};
} // end namespace AArch64ISD
@@ -391,12 +438,14 @@ namespace {
// Any instruction that defines a 32-bit result zeros out the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
// be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits.
+// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
+// 32 bits, they're probably just qualifying a CopyFromReg.
// FIXME: X86 also checks for CMOV here. Do we need something similar?
static inline bool isDef32(const SDNode &N) {
unsigned Opc = N.getOpcode();
return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
- Opc != ISD::CopyFromReg;
+ Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
+ Opc != ISD::AssertZext;
}
} // end anonymous namespace
@@ -455,12 +504,6 @@ public:
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- /// Returns true if a cast between SrcAS and DestAS is a noop.
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
- // Addrspacecasts are always noops.
- return true;
- }
-
/// This method returns a target specific FastISel object, or null if the
/// target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -741,9 +784,7 @@ public:
/// illegal as the original, thus leading to an infinite legalisation loop.
/// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
/// vector types this override can be removed.
- bool mergeStoresAfterLegalization(EVT VT) const override {
- return !useSVEForFixedLengthVectors();
- }
+ bool mergeStoresAfterLegalization(EVT VT) const override;
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
@@ -774,6 +815,10 @@ private:
SDValue ThisVal) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
@@ -858,24 +903,28 @@ private:
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
- unsigned NewOp) const;
+ SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
+ bool OverrideNEON = false) const;
+ SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
- RTLIB::Libcall Call) const;
+ SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
@@ -890,7 +939,17 @@ private:
SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
+ SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
+ SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
+ SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
+ SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
+ SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
+ SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
SelectionDAG &DAG) const;
@@ -902,6 +961,10 @@ private:
bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps) const override;
+ SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
+ const DenormalMode &Mode) const override;
+ SDValue getSqrtResultForDenormInput(SDValue Operand,
+ SelectionDAG &DAG) const override;
unsigned combineRepeatedFPDivisors() const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
@@ -933,6 +996,7 @@ private:
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
@@ -959,8 +1023,21 @@ private:
bool shouldLocalize(const MachineInstr &MI,
const TargetTransformInfo *TTI) const override;
- bool useSVEForFixedLengthVectors() const;
- bool useSVEForFixedLengthVectorVT(EVT VT) const;
+ // Normally SVE is only used for byte size vectors that do not fit within a
+ // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
+ // used for 64bit and 128bit vectors as well.
+ bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
+
+ // With the exception of data-predicate transitions, no instructions are
+ // required to cast between legal scalable vector types. However:
+ // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
+ // is not universally useable.
+ // 2. Most unpacked integer types are not legal and thus integer extends
+ // cannot be used to convert between unpacked and packed types.
+ // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
+ // to transition between unpacked and packed types of the same element type,
+ // with BITCAST used otherwise.
+ SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
};
namespace AArch64 {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4f4ba692c2db..cf08f56e5b08 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -60,10 +60,14 @@ class AArch64Inst<Format f, string cstr> : Instruction {
bits<2> Form = F.Value;
// Defaults
+ bit isWhile = 0;
+ bit isPTestLike = 0;
FalseLanesEnum FalseLanes = FalseLanesNone;
DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
ElementSizeEnum ElementSize = ElementSizeNone;
+ let TSFlags{10} = isPTestLike;
+ let TSFlags{9} = isWhile;
let TSFlags{8-7} = FalseLanes.Value;
let TSFlags{6-3} = DestructiveInstType.Value;
let TSFlags{2-0} = ElementSize.Value;
@@ -263,6 +267,7 @@ def adrplabel : Operand<i64> {
let EncoderMethod = "getAdrLabelOpValue";
let PrintMethod = "printAdrpLabel";
let ParserMatchClass = AdrpOperand;
+ let OperandType = "OPERAND_PCREL";
}
def AdrOperand : AsmOperandClass {
@@ -325,7 +330,7 @@ def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
}
def SImm8Operand : SImmOperand<8>;
-def simm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -128 && Imm < 127; }]> {
+def simm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -128 && Imm < 128; }]> {
let ParserMatchClass = SImm8Operand;
let DecoderMethod = "DecodeSImm<8>";
}
@@ -914,6 +919,13 @@ def imm0_1 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_1Operand;
}
+// timm0_1 - as above, but use TargetConstant (TImmLeaf)
+def timm0_1 : Operand<i64>, TImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = Imm0_1Operand;
+}
+
// imm0_15 predicate - True if the immediate is in the range [0,15]
def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 16;
@@ -1289,8 +1301,9 @@ class SimpleSystemI<bit L, dag iops, string asm, string operands,
}
// System instructions which have an Rt register.
-class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
- : BaseSystemI<L, oops, iops, asm, operands>,
+class RtSystemI<bit L, dag oops, dag iops, string asm, string operands,
+ list<dag> pattern = []>
+ : BaseSystemI<L, oops, iops, asm, operands, pattern>,
Sched<[WriteSys]> {
bits<5> Rt;
let Inst{4-0} = Rt;
@@ -1318,6 +1331,16 @@ class TMSystemI<bits<4> CRm, string asm, list<dag> pattern>
let Inst{4-0} = Rt;
}
+// System instructions that pass a register argument
+// This class assumes the register is for input rather than output.
+class RegInputSystemI<bits<4> CRm, bits<3> Op2, string asm,
+ list<dag> pattern = []>
+ : RtSystemI<0, (outs), (ins GPR64:$Rt), asm, "\t$Rt", pattern> {
+ let Inst{20-12} = 0b000110001;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = Op2;
+}
+
// System instructions for transactional memory - no operand
class TMSystemINoOperand<bits<4> CRm, string asm, list<dag> pattern>
: TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> {
@@ -1358,6 +1381,14 @@ def barrier_op : Operand<i32> {
let PrintMethod = "printBarrierOption";
let ParserMatchClass = BarrierAsmOperand;
}
+def BarriernXSAsmOperand : AsmOperandClass {
+ let Name = "BarriernXS";
+ let ParserMethod = "tryParseBarriernXSOperand";
+}
+def barrier_nxs_op : Operand<i32> {
+ let PrintMethod = "printBarriernXSOption";
+ let ParserMatchClass = BarriernXSAsmOperand;
+}
class CRmSystemI<Operand crmtype, bits<3> opc, string asm,
list<dag> pattern = []>
: SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm", pattern>,
@@ -1439,6 +1470,7 @@ class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
bits<16> systemreg;
let Inst{20-5} = systemreg;
+ let DecoderNamespace = "Fallback";
}
// FIXME: Some of these def NZCV, others don't. Best way to model that?
@@ -1448,6 +1480,7 @@ class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
"msr", "\t$systemreg, $Rt"> {
bits<16> systemreg;
let Inst{20-5} = systemreg;
+ let DecoderNamespace = "Fallback";
}
def SystemPStateFieldWithImm0_15Operand : AsmOperandClass {
@@ -1937,11 +1970,21 @@ class SignAuthTwoOperand<bits<4> opc, string asm,
let Inst{4-0} = Rd;
}
+class ClearAuth<bits<1> data, string asm>
+ : I<(outs GPR64:$Rd), (ins GPR64:$Rn), asm, "\t$Rd", "$Rd = $Rn", []>, Sched<[]> {
+ bits<5> Rd;
+ let Inst{31-11} = 0b110110101100000101000;
+ let Inst{10} = data;
+ let Inst{9-5} = 0b11111;
+ let Inst{4-0} = Rd;
+}
+
// Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions
class BaseFlagManipulation<bit sf, bit sz, dag iops, string asm, string ops>
: I<(outs), iops, asm, ops, "", []>,
Sched<[WriteI, ReadI, ReadI]> {
let Uses = [NZCV];
+ let Defs = [NZCV];
bits<5> Rn;
let Inst{31} = sf;
let Inst{30-15} = 0b0111010000000000;
@@ -3929,7 +3972,7 @@ class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
(outs GPR64sp:$wback, regtype:$Rt),
(ins GPR64sp:$Rn, simm9:$offset), asm,
"$Rn = $wback,@earlyclobber $wback", []>,
- Sched<[WriteLD, WriteAdr]>;
+ Sched<[WriteAdr, WriteLD]>;
let mayStore = 1, mayLoad = 0 in
class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
@@ -3975,7 +4018,7 @@ class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
(outs GPR64sp:$wback, regtype:$Rt),
(ins GPR64sp:$Rn, simm9:$offset),
asm, "$Rn = $wback,@earlyclobber $wback", []>,
- Sched<[WriteLD, WriteAdr]>;
+ Sched<[WriteAdr, WriteLD]>;
let mayStore = 1, mayLoad = 0 in
class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
@@ -4072,7 +4115,7 @@ class LoadPairPreIdx<bits<2> opc, bit V, RegisterOperand regtype,
: BaseLoadStorePairPreIdx<opc, V, 1,
(outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
(ins GPR64sp:$Rn, indextype:$offset), asm>,
- Sched<[WriteLD, WriteLDHi, WriteAdr]>;
+ Sched<[WriteAdr, WriteLD, WriteLDHi]>;
let mayStore = 1, mayLoad = 0 in
class StorePairPreIdx<bits<2> opc, bit V, RegisterOperand regtype,
@@ -4113,7 +4156,7 @@ class LoadPairPostIdx<bits<2> opc, bit V, RegisterOperand regtype,
: BaseLoadStorePairPostIdx<opc, V, 1,
(outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
(ins GPR64sp:$Rn, idxtype:$offset), asm>,
- Sched<[WriteLD, WriteLDHi, WriteAdr]>;
+ Sched<[WriteAdr, WriteLD, WriteLDHi]>;
let mayStore = 1, mayLoad = 0 in
class StorePairPostIdx<bits<2> opc, bit V, RegisterOperand regtype,
@@ -7831,9 +7874,9 @@ class BaseSIMDThreeSameVectorBFDot<bit Q, bit U, string asm, string kind1,
multiclass SIMDThreeSameVectorBFDot<bit U, string asm> {
def v4bf16 : BaseSIMDThreeSameVectorBFDot<0, U, asm, ".2s", ".4h", V64,
- v2f32, v8i8>;
+ v2f32, v4bf16>;
def v8bf16 : BaseSIMDThreeSameVectorBFDot<1, U, asm, ".4s", ".8h", V128,
- v4f32, v16i8>;
+ v4f32, v8bf16>;
}
class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
@@ -7851,7 +7894,7 @@ class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
(InputType RegType:$Rn),
(InputType (bitconvert (AccumType
(AArch64duplane32 (v4f32 V128:$Rm),
- VectorIndexH:$idx)))))))]> {
+ VectorIndexS:$idx)))))))]> {
bits<2> idx;
let Inst{21} = idx{0}; // L
@@ -7861,16 +7904,16 @@ class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> {
def v4bf16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h",
- ".2h", V64, v2f32, v8i8>;
+ ".2h", V64, v2f32, v4bf16>;
def v8bf16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h",
- ".2h", V128, v4f32, v16i8>;
+ ".2h", V128, v4f32, v8bf16>;
}
class SIMDBF16MLAL<bit Q, string asm, SDPatternOperator OpNode>
: BaseSIMDThreeSameVectorTied<Q, 0b1, 0b110, 0b11111, V128, asm, ".4s",
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
- (v16i8 V128:$Rn),
- (v16i8 V128:$Rm)))]> {
+ (v8bf16 V128:$Rn),
+ (v8bf16 V128:$Rm)))]> {
let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}");
}
@@ -7880,10 +7923,10 @@ class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
"{\t$Rd.4s, $Rn.8h, $Rm.h$idx}", "$Rd = $dst",
[(set (v4f32 V128:$dst),
(v4f32 (OpNode (v4f32 V128:$Rd),
- (v16i8 V128:$Rn),
- (v16i8 (bitconvert (v8bf16
+ (v8bf16 V128:$Rn),
+ (v8bf16
(AArch64duplane16 (v8bf16 V128_lo:$Rm),
- VectorIndexH:$idx)))))))]>,
+ VectorIndexH:$idx)))))]>,
Sched<[WriteV]> {
bits<5> Rd;
bits<5> Rn;
@@ -7907,8 +7950,8 @@ class SIMDThreeSameVectorBF16MatrixMul<string asm>
V128, asm, ".4s",
[(set (v4f32 V128:$dst),
(int_aarch64_neon_bfmmla (v4f32 V128:$Rd),
- (v16i8 V128:$Rn),
- (v16i8 V128:$Rm)))]> {
+ (v8bf16 V128:$Rn),
+ (v8bf16 V128:$Rm)))]> {
let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h",
", $Rm", ".8h", "}");
}
@@ -10586,14 +10629,14 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
(v4f16 V64:$Rn),
(v4f16 V64:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype,
asm, ".8h",
[(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
(v8f16 V128:$Rn),
(v8f16 V128:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
}
let Predicates = [HasComplxNum, HasNEON] in {
@@ -10602,21 +10645,21 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
(v2f32 V64:$Rn),
(v2f32 V64:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype,
asm, ".4s",
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
(v4f32 V128:$Rn),
(v4f32 V128:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype,
asm, ".2d",
[(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
(v2f64 V128:$Rn),
(v2f64 V128:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
}
}
@@ -10658,14 +10701,14 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
(v4f16 V64:$Rn),
(v4f16 V64:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128,
rottype, asm, ".8h",
[(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
(v8f16 V128:$Rn),
(v8f16 V128:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
}
let Predicates = [HasComplxNum, HasNEON] in {
@@ -10674,21 +10717,21 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
(v2f32 V64:$Rn),
(v2f32 V64:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128,
rottype, asm, ".4s",
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
(v4f32 V128:$Rn),
(v4f32 V128:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128,
rottype, asm, ".2d",
[(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
(v2f64 V128:$Rn),
(v2f64 V128:$Rm),
- (rottype i32:$rot)))]>;
+ (i32 rottype:$rot)))]>;
}
}
@@ -11216,6 +11259,35 @@ multiclass STOPregister<string asm, string instr> {
!cast<Instruction>(instr # "X")>;
}
+class LoadStore64B_base<bits<3> opc, string asm_inst, string asm_ops,
+ dag iops, dag oops, list<dag> pat>
+ : I<oops, iops, asm_inst, asm_ops, "", pat>,
+ Sched<[]> /* FIXME: fill in scheduling details once known */ {
+ bits<5> Rt;
+ bits<5> Rn;
+ let Inst{31-21} = 0b11111000001;
+ let Inst{15} = 1;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let Predicates = [HasV8_7a];
+}
+
+class LoadStore64B<bits<3> opc, string asm_inst, dag iops, dag oops,
+ list<dag> pat = []>
+ : LoadStore64B_base<opc, asm_inst, "\t$Rt, [$Rn]", iops, oops, pat> {
+ let Inst{20-16} = 0b11111;
+}
+
+class Store64BV<bits<3> opc, string asm_inst, list<dag> pat = []>
+ : LoadStore64B_base<opc, asm_inst, "\t$Rs, $Rt, [$Rn]",
+ (ins GPR64x8:$Rt, GPR64sp:$Rn), (outs GPR64:$Rs), pat> {
+ bits<5> Rs;
+ let Inst{20-16} = Rs;
+}
+
//----------------------------------------------------------------------------
// Allow the size specifier tokens to be upper case, not just lower.
def : TokenAlias<".4B", ".4b">; // Add dot product
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index a0e7c782f68c..25656fac1d2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -88,6 +88,29 @@ def G_DUP: AArch64GenericInstruction {
let InOperandList = (ins type1:$lane);
let hasSideEffects = 0;
}
+
+// Represents a lane duplicate operation.
+def G_DUPLANE8 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+def G_DUPLANE16 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+def G_DUPLANE32 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+def G_DUPLANE64 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+
// Represents a trn1 instruction. Produced post-legalization from
// G_SHUFFLE_VECTORs with appropriate masks.
def G_TRN1 : AArch64GenericInstruction {
@@ -111,6 +134,28 @@ def G_EXT: AArch64GenericInstruction {
let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
}
+// Represents a vector G_ASHR with an immediate.
+def G_VASHR : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
+}
+
+// Represents a vector G_LSHR with an immediate.
+def G_VLSHR : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
+}
+
+// Represents an integer to FP conversion on the FPR bank.
+def G_SITOF : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+}
+def G_UITOF : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+}
+
def : GINodeEquiv<G_REV16, AArch64rev16>;
def : GINodeEquiv<G_REV32, AArch64rev32>;
def : GINodeEquiv<G_REV64, AArch64rev64>;
@@ -119,6 +164,21 @@ def : GINodeEquiv<G_UZP2, AArch64uzp2>;
def : GINodeEquiv<G_ZIP1, AArch64zip1>;
def : GINodeEquiv<G_ZIP2, AArch64zip2>;
def : GINodeEquiv<G_DUP, AArch64dup>;
+def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
+def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
+def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
+def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
def : GINodeEquiv<G_TRN1, AArch64trn1>;
def : GINodeEquiv<G_TRN2, AArch64trn2>;
def : GINodeEquiv<G_EXT, AArch64ext>;
+def : GINodeEquiv<G_VASHR, AArch64vashr>;
+def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
+def : GINodeEquiv<G_SITOF, AArch64sitof>;
+def : GINodeEquiv<G_UITOF, AArch64uitof>;
+
+def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+
+// These are patterns that we only use for GlobalISel via the importer.
+def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
+ (vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
+ (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 08f80c9aa361..6b38e216a854 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -107,6 +107,13 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
break;
+ case TargetOpcode::STATEPOINT:
+ NumBytes = StatepointOpers(&MI).getNumPatchBytes();
+ assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+ // No patch bytes means a normal call inst is emitted
+ if (NumBytes == 0)
+ NumBytes = 4;
+ break;
case AArch64::TLSDESC_CALLSEQ:
// This gets lowered to an instruction sequence which takes 16 bytes
NumBytes = 16;
@@ -287,6 +294,31 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
}
+ // If we're allowed to modify and the block ends in a unconditional branch
+ // which could simply fallthrough, remove the branch. (Note: This case only
+ // matters when we can't understand the whole sequence, otherwise it's also
+ // handled by BranchFolding.cpp.)
+ if (AllowModify && isUncondBranchOpcode(LastOpc) &&
+ MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
+ assert(!isUncondBranchOpcode(LastOpc) &&
+ "unreachable unconditional branches removed above");
+
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ parseCondBranch(LastInst, TBB, Cond);
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ } else {
+ SecondLastInst = &*I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
return true;
@@ -321,6 +353,56 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return true;
}
+bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
+ MachineBranchPredicate &MBP,
+ bool AllowModify) const {
+ // For the moment, handle only a block which ends with a cb(n)zx followed by
+ // a fallthrough. Why this? Because it is a common form.
+ // TODO: Should we handle b.cc?
+
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return true;
+
+ // Skip over SpeculationBarrierEndBB terminators
+ if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
+ I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
+ --I;
+ }
+
+ if (!isUnpredicatedTerminator(*I))
+ return true;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = &*I;
+ unsigned LastOpc = LastInst->getOpcode();
+ if (!isCondBranchOpcode(LastOpc))
+ return true;
+
+ switch (LastOpc) {
+ default:
+ return true;
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ break;
+ };
+
+ MBP.TrueDest = LastInst->getOperand(1).getMBB();
+ assert(MBP.TrueDest && "expected!");
+ MBP.FalseDest = MBB.getNextNode();
+
+ MBP.ConditionDef = nullptr;
+ MBP.SingleUseCondition = false;
+
+ MBP.LHS = LastInst->getOperand(0);
+ MBP.RHS = MachineOperand::CreateImm(0);
+ MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
+ : MachineBranchPredicate::PRED_EQ;
+ return false;
+}
+
bool AArch64InstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond[0].getImm() != -1) {
@@ -1037,6 +1119,13 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
switch (MI.getOpcode()) {
default:
break;
+ case AArch64::PTEST_PP:
+ SrcReg = MI.getOperand(0).getReg();
+ SrcReg2 = MI.getOperand(1).getReg();
+ // Not sure about the mask and value for now...
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
case AArch64::SUBSWrr:
case AArch64::SUBSWrs:
case AArch64::SUBSWrx:
@@ -1192,10 +1281,9 @@ static bool areCFlagsAccessedBetweenInstrs(
return true;
// From must be above To.
- assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
- [From](MachineInstr &MI) {
- return MI.getIterator() == From;
- }) != To->getParent()->rend());
+ assert(std::any_of(
+ ++To.getReverse(), To->getParent()->rend(),
+ [From](MachineInstr &MI) { return MI.getIterator() == From; }));
// We iterate backward starting at \p To until we hit \p From.
for (const MachineInstr &Instr :
@@ -1208,6 +1296,127 @@ static bool areCFlagsAccessedBetweenInstrs(
return false;
}
+/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
+/// operation which could set the flags in an identical manner
+bool AArch64InstrInfo::optimizePTestInstr(
+ MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
+ const MachineRegisterInfo *MRI) const {
+ auto *Mask = MRI->getUniqueVRegDef(MaskReg);
+ auto *Pred = MRI->getUniqueVRegDef(PredReg);
+ auto NewOp = Pred->getOpcode();
+ bool OpChanged = false;
+
+ unsigned MaskOpcode = Mask->getOpcode();
+ unsigned PredOpcode = Pred->getOpcode();
+ bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
+ bool PredIsWhileLike = isWhileOpcode(PredOpcode);
+
+ if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
+ // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
+ // deactivate any lanes OTHER_INST might set.
+ uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
+ uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
+
+ // Must be an all active predicate of matching element size.
+ if ((PredElementSize != MaskElementSize) ||
+ (Mask->getOperand(1).getImm() != 31))
+ return false;
+
+ // Fallthough to simply remove the PTEST.
+ } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
+ // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
+ // instruction that sets the flags as PTEST would.
+
+ // Fallthough to simply remove the PTEST.
+ } else if (PredIsPTestLike) {
+ // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
+ // instructions use the same predicate.
+ auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PTestLikeMask)
+ return false;
+
+ // Fallthough to simply remove the PTEST.
+ } else {
+ switch (Pred->getOpcode()) {
+ case AArch64::BRKB_PPzP:
+ case AArch64::BRKPB_PPzPP: {
+ // Op 0 is chain, 1 is the mask, 2 the previous predicate to
+ // propagate, 3 the new predicate.
+
+ // Check to see if our mask is the same as the brkpb's. If
+ // not the resulting flag bits may be different and we
+ // can't remove the ptest.
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return false;
+
+ // Switch to the new opcode
+ NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
+ : AArch64::BRKPBS_PPzPP;
+ OpChanged = true;
+ break;
+ }
+ case AArch64::BRKN_PPzP: {
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return false;
+
+ NewOp = AArch64::BRKNS_PPzP;
+ OpChanged = true;
+ break;
+ }
+ default:
+ // Bail out if we don't recognize the input
+ return false;
+ }
+ }
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // If the predicate is in a different block (possibly because its been
+ // hoisted out), then assume the flags are set in between statements.
+ if (Pred->getParent() != PTest->getParent())
+ return false;
+
+ // If another instruction between the propagation and test sets the
+ // flags, don't remove the ptest.
+ MachineBasicBlock::iterator I = Pred, E = PTest;
+ ++I; // Skip past the predicate op itself.
+ for (; I != E; ++I) {
+ const MachineInstr &Inst = *I;
+
+ // TODO: If the ptest flags are unused, we could still remove it.
+ if (Inst.modifiesRegister(AArch64::NZCV, TRI))
+ return false;
+ }
+
+ // If we pass all the checks, it's safe to remove the PTEST and use the flags
+ // as they are prior to PTEST. Sometimes this requires the tested PTEST
+ // operand to be replaced with an equivalent instruction that also sets the
+ // flags.
+ Pred->setDesc(get(NewOp));
+ PTest->eraseFromParent();
+ if (OpChanged) {
+ bool succeeded = UpdateOperandRegClass(*Pred);
+ (void)succeeded;
+ assert(succeeded && "Operands have incompatible register classes!");
+ Pred->addRegisterDefined(AArch64::NZCV, TRI);
+ }
+
+ // Ensure that the flags def is live.
+ if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
+ unsigned i = 0, e = Pred->getNumOperands();
+ for (; i != e; ++i) {
+ MachineOperand &MO = Pred->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
+ MO.setIsDead(false);
+ break;
+ }
+ }
+ }
+ return true;
+}
+
/// Try to optimize a compare instruction. A compare instruction is an
/// instruction which produces AArch64::NZCV. It can be truly compare
/// instruction
@@ -1246,6 +1455,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
return true;
}
+ if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
+ return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
+
// Continue only if we have a "ri" where immediate is zero.
// FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
// function.
@@ -2062,6 +2274,24 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
return true;
}
+Optional<ExtAddrMode>
+AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
+ const TargetRegisterInfo *TRI) const {
+ const MachineOperand *Base; // Filled with the base operand of MI.
+ int64_t Offset; // Filled with the offset of MI.
+ bool OffsetIsScalable;
+ if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
+ return None;
+
+ if (!Base->isReg())
+ return None;
+ ExtAddrMode AM;
+ AM.BaseReg = Base->getReg();
+ AM.Displacement = Offset;
+ AM.ScaledReg = 0;
+ return AM;
+}
+
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
bool &OffsetIsScalable, unsigned &Width,
@@ -3060,7 +3290,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_PXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 4:
@@ -3104,7 +3334,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
@@ -3126,7 +3356,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 48:
@@ -3137,7 +3367,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 64:
@@ -3148,7 +3378,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
}
@@ -3214,7 +3444,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_PXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 4:
@@ -3258,7 +3488,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
@@ -3280,7 +3510,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 48:
@@ -3291,7 +3521,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 64:
@@ -3302,7 +3532,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZZXI;
- StackID = TargetStackID::SVEVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
}
@@ -3329,6 +3559,47 @@ bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
});
}
+void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
+ const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
+
+ // VGSized offsets are divided by '2', because the VG register is the
+ // the number of 64bit granules as opposed to 128bit vector chunks,
+ // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
+ // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
+ // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
+ ByteSized = Offset.getFixed();
+ VGSized = Offset.getScalable() / 2;
+}
+
+/// Returns the offset in parts to which this frame offset can be
+/// decomposed for the purpose of describing a frame offset.
+/// For non-scalable offsets this is simply its byte size.
+void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
+ const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
+ int64_t &NumDataVectors) {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
+
+ NumBytes = Offset.getFixed();
+ NumDataVectors = 0;
+ NumPredicateVectors = Offset.getScalable() / 2;
+ // This method is used to get the offsets to adjust the frame offset.
+ // If the function requires ADDPL to be used and needs more than two ADDPL
+ // instructions, part of the offset is folded into NumDataVectors so that it
+ // uses ADDVL for part of it, reducing the number of ADDPL instructions.
+ if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
+ NumPredicateVectors > 62) {
+ NumDataVectors = NumPredicateVectors / 8;
+ NumPredicateVectors -= NumDataVectors * 8;
+ }
+}
+
// Helper function to emit a frame offset adjustment from a given
// pointer (SrcReg), stored into DestReg. This function is explicit
// in that it requires the opcode.
@@ -3438,12 +3709,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineInstr::MIFlag Flag, bool SetNZCV,
bool NeedsWinCFI, bool *HasWinCFI) {
int64_t Bytes, NumPredicateVectors, NumDataVectors;
- Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
+ AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
+ Offset, Bytes, NumPredicateVectors, NumDataVectors);
// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
- assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
- "SP increment/decrement not 16-byte aligned");
+ assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
+ "SP increment/decrement not 8-byte aligned");
unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
if (Bytes < 0) {
Bytes = -Bytes;
@@ -3698,7 +3970,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
// Construct the complete offset.
bool IsMulVL = ScaleValue.isScalable();
unsigned Scale = ScaleValue.getKnownMinSize();
- int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes();
+ int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
const MachineOperand &ImmOpnd =
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
@@ -3740,11 +4012,9 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
*OutUnscaledOp = *UnscaledOp;
if (IsMulVL)
- SOffset = StackOffset(Offset, MVT::nxv1i8) +
- StackOffset(SOffset.getBytes(), MVT::i8);
+ SOffset = StackOffset::get(SOffset.getFixed(), Offset);
else
- SOffset = StackOffset(Offset, MVT::i8) +
- StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
+ SOffset = StackOffset::get(Offset, SOffset.getScalable());
return AArch64FrameOffsetCanUpdate |
(SOffset ? 0 : AArch64FrameOffsetIsLegal);
}
@@ -3756,7 +4026,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned ImmIdx = FrameRegIdx + 1;
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
- Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8);
+ Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
MI.getOperand(0).getReg(), FrameReg, Offset, TII,
MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
@@ -3861,7 +4131,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
return false;
}
-// FP Opcodes that can be combined with a FMUL
+// FP Opcodes that can be combined with a FMUL.
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
default:
@@ -3883,8 +4153,12 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
- return (Options.UnsafeFPMath ||
- Options.AllowFPOpFusion == FPOpFusion::Fast);
+ // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
+ // the target options or if FADD/FSUB has the contract fast-math flag.
+ return Options.UnsafeFPMath ||
+ Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Inst.getFlag(MachineInstr::FmContract);
+ return true;
}
return false;
}
@@ -4364,8 +4638,8 @@ bool AArch64InstrInfo::isThroughputPattern(
/// pattern evaluator stops checking as soon as it finds a faster sequence.
bool AArch64InstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
// Integer patterns
if (getMaddPatterns(Root, Patterns))
return true;
@@ -4373,7 +4647,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getFMAPatterns(Root, Patterns))
return true;
- return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
+ DoRegPressureReduce);
}
enum class FMAInstKind { Default, Indexed, Accumulator };
@@ -4596,7 +4871,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- MachineInstr *MUL;
+ MachineInstr *MUL = nullptr;
const TargetRegisterClass *RC;
unsigned Opc;
switch (Pattern) {
@@ -5417,6 +5692,9 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
+ // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
+ // CodeGen/AArch64/urem-seteq-nonzero.ll.
+ // assert(MUL && "MUL was never set");
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
}
@@ -5756,84 +6034,20 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
static bool
outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
const outliner::Candidate &b) {
- const Function &Fa = a.getMF()->getFunction();
- const Function &Fb = b.getMF()->getFunction();
-
- // If none of the functions have the "sign-return-address" attribute their
- // signing behaviour is equal
- if (!Fa.hasFnAttribute("sign-return-address") &&
- !Fb.hasFnAttribute("sign-return-address")) {
- return true;
- }
+ const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
+ const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
- // If both functions have the "sign-return-address" attribute their signing
- // behaviour is equal, if the values of the attributes are equal
- if (Fa.hasFnAttribute("sign-return-address") &&
- Fb.hasFnAttribute("sign-return-address")) {
- StringRef ScopeA =
- Fa.getFnAttribute("sign-return-address").getValueAsString();
- StringRef ScopeB =
- Fb.getFnAttribute("sign-return-address").getValueAsString();
- return ScopeA.equals(ScopeB);
- }
-
- // If function B doesn't have the "sign-return-address" attribute but A does,
- // the functions' signing behaviour is equal if A's value for
- // "sign-return-address" is "none" and vice versa.
- if (Fa.hasFnAttribute("sign-return-address")) {
- StringRef ScopeA =
- Fa.getFnAttribute("sign-return-address").getValueAsString();
- return ScopeA.equals("none");
- }
-
- if (Fb.hasFnAttribute("sign-return-address")) {
- StringRef ScopeB =
- Fb.getFnAttribute("sign-return-address").getValueAsString();
- return ScopeB.equals("none");
- }
-
- llvm_unreachable("Unkown combination of sign-return-address attributes");
+ return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
+ MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
}
static bool
outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
const outliner::Candidate &b) {
- const Function &Fa = a.getMF()->getFunction();
- const Function &Fb = b.getMF()->getFunction();
-
- // If none of the functions have the "sign-return-address-key" attribute
- // their keys are equal
- if (!Fa.hasFnAttribute("sign-return-address-key") &&
- !Fb.hasFnAttribute("sign-return-address-key")) {
- return true;
- }
-
- // If both functions have the "sign-return-address-key" attribute their
- // keys are equal if the values of "sign-return-address-key" are equal
- if (Fa.hasFnAttribute("sign-return-address-key") &&
- Fb.hasFnAttribute("sign-return-address-key")) {
- StringRef KeyA =
- Fa.getFnAttribute("sign-return-address-key").getValueAsString();
- StringRef KeyB =
- Fb.getFnAttribute("sign-return-address-key").getValueAsString();
- return KeyA.equals(KeyB);
- }
-
- // If B doesn't have the "sign-return-address-key" attribute, both keys are
- // equal, if function a has the default key (a_key)
- if (Fa.hasFnAttribute("sign-return-address-key")) {
- StringRef KeyA =
- Fa.getFnAttribute("sign-return-address-key").getValueAsString();
- return KeyA.equals_lower("a_key");
- }
-
- if (Fb.hasFnAttribute("sign-return-address-key")) {
- StringRef KeyB =
- Fb.getFnAttribute("sign-return-address-key").getValueAsString();
- return KeyB.equals_lower("a_key");
- }
+ const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
+ const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
- llvm_unreachable("Unkown combination of sign-return-address-key attributes");
+ return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
}
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
@@ -5889,9 +6103,10 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
// necessary. However, at this point we don't know if the outlined function
// will have a RET instruction so we assume the worst.
- const Function &FCF = FirstCand.getMF()->getFunction();
const TargetRegisterInfo &TRI = getRegisterInfo();
- if (FCF.hasFnAttribute("sign-return-address")) {
+ if (FirstCand.getMF()
+ ->getInfo<AArch64FunctionInfo>()
+ ->shouldSignReturnAddress(true)) {
// One PAC and one AUT instructions
NumBytesToCreateFrame += 8;
@@ -5948,10 +6163,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
return false;
};
// Remove candidates with illegal stack modifying instructions
- RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- hasIllegalSPModification),
- RepeatedSequenceLocs.end());
+ llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
@@ -5994,10 +6206,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// Erase every candidate that violates the restrictions above. (It could be
// true that we have viable candidates, so it's not worth bailing out in
// the case that, say, 1 out of 20 candidates violate the restructions.)
- RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- CantGuaranteeValueAcrossCall),
- RepeatedSequenceLocs.end());
+ llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
@@ -6020,7 +6229,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
NumBytesToCreateFrame += 4;
bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
- return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
+ return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
});
// We check to see if CFI Instructions are present, and if they are
@@ -6189,6 +6398,60 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
FrameID = MachineOutlinerNoLRSave;
} else {
SetCandidateCallInfo(MachineOutlinerDefault, 12);
+
+ // Bugzilla ID: 46767
+ // TODO: Check if fixing up the stack more than once is safe so we can
+ // outline these.
+ //
+ // An outline resulting in a caller that requires stack fixups at the
+ // callsite to a callee that also requires stack fixups can happen when
+ // there are no available registers at the candidate callsite for a
+ // candidate that itself also has calls.
+ //
+ // In other words if function_containing_sequence in the following pseudo
+ // assembly requires that we save LR at the point of the call, but there
+ // are no available registers: in this case we save using SP and as a
+ // result the SP offsets requires stack fixups by multiples of 16.
+ //
+ // function_containing_sequence:
+ // ...
+ // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
+ // call OUTLINED_FUNCTION_N
+ // restore LR from SP
+ // ...
+ //
+ // OUTLINED_FUNCTION_N:
+ // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
+ // ...
+ // bl foo
+ // restore LR from SP
+ // ret
+ //
+ // Because the code to handle more than one stack fixup does not
+ // currently have the proper checks for legality, these cases will assert
+ // in the AArch64 MachineOutliner. This is because the code to do this
+ // needs more hardening, testing, better checks that generated code is
+ // legal, etc and because it is only verified to handle a single pass of
+ // stack fixup.
+ //
+ // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
+ // these cases until they are known to be handled. Bugzilla 46767 is
+ // referenced in comments at the assert site.
+ //
+ // To avoid asserting (or generating non-legal code on noassert builds)
+ // we remove all candidates which would need more than one stack fixup by
+ // pruning the cases where the candidate has calls while also having no
+ // available LR and having no available general purpose registers to copy
+ // LR to (ie one extra stack save/restore).
+ //
+ if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
+ erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
+ return (std::any_of(
+ C.front(), std::next(C.back()),
+ [](const MachineInstr &MI) { return MI.isCall(); })) &&
+ (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C));
+ });
+ }
}
// If we dropped all of the candidates, bail out here.
@@ -6557,7 +6820,7 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
// If v8.3a features are available we can replace a RET instruction by
// RETAA or RETAB and omit the AUT instructions
- if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() &&
+ if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
MBBAUT->getOpcode() == AArch64::RET) {
BuildMI(MBB, MBBAUT, DL,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
@@ -6609,9 +6872,12 @@ void AArch64InstrInfo::buildOutlinedFrame(
return MI.isCall() && !MI.isReturn();
};
- if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
+ if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
// Fix up the instructions in the range, since we're going to modify the
// stack.
+
+ // Bugzilla ID: 46767
+ // TODO: Check if fixing up twice is safe so we can outline these.
assert(OF.FrameConstructionID != MachineOutlinerDefault &&
"Can only fix up stack references once");
fixupPostOutline(MBB);
@@ -6668,27 +6934,11 @@ void AArch64InstrInfo::buildOutlinedFrame(
// If a bunch of candidates reach this point they must agree on their return
// address signing. It is therefore enough to just consider the signing
// behaviour of one of them
- const Function &CF = OF.Candidates.front().getMF()->getFunction();
- bool ShouldSignReturnAddr = false;
- if (CF.hasFnAttribute("sign-return-address")) {
- StringRef Scope =
- CF.getFnAttribute("sign-return-address").getValueAsString();
- if (Scope.equals("all"))
- ShouldSignReturnAddr = true;
- else if (Scope.equals("non-leaf") && !IsLeafFunction)
- ShouldSignReturnAddr = true;
- }
+ const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>();
+ bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction);
// a_key is the default
- bool ShouldSignReturnAddrWithAKey = true;
- if (CF.hasFnAttribute("sign-return-address-key")) {
- const StringRef Key =
- CF.getFnAttribute("sign-return-address-key").getValueAsString();
- // Key can either be a_key or b_key
- assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) &&
- "Return address signing key must be either a_key or b_key");
- ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key");
- }
+ bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey();
// If this is a tail call outlined function, then there's already a return.
if (OF.FrameConstructionID == MachineOutlinerTailCall ||
@@ -6847,10 +7097,9 @@ Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
!MI.getOperand(2).isImm())
return None;
- Offset = MI.getOperand(2).getImm() * Sign;
int Shift = MI.getOperand(3).getImm();
assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
- Offset = Offset << Shift;
+ Offset = Sign * (MI.getOperand(2).getImm() << Shift);
}
}
return RegImmPair{MI.getOperand(1).getReg(), Offset};
@@ -6926,6 +7175,14 @@ uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::ElementSizeMask;
}
+bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
+}
+
+bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
+}
+
unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
return AArch64::BLRNoIP;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 298c04d81708..7434987e0617 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -15,7 +15,6 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
-#include "AArch64StackOffset.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -113,6 +112,10 @@ public:
/// Hint that pairing the given load or store is unprofitable.
static void suppressLdStPair(MachineInstr &MI);
+ Optional<ExtAddrMode>
+ getAddrModeFromMemoryOp(const MachineInstr &MemI,
+ const TargetRegisterInfo *TRI) const override;
+
bool getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
@@ -188,6 +191,9 @@ public:
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify = false) const override;
+ bool analyzeBranchPredicate(MachineBasicBlock &MBB,
+ MachineBranchPredicate &MBP,
+ bool AllowModify) const override;
unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override;
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
@@ -229,9 +235,10 @@ public:
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in ``Root``. All potential patterns are
/// listed in the ``Patterns`` array.
- bool getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const override;
+ bool
+ getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const override;
/// Return true when Inst is associative and commutative so that it can be
/// reassociated.
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
@@ -273,6 +280,12 @@ public:
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
/// Returns the vector element size (B, H, S or D) of an SVE opcode.
uint64_t getElementSizeForOpcode(unsigned Opc) const;
+ /// Returns true if the opcode is for an SVE instruction that sets the
+ /// condition codes as if it's results had been fed to a PTEST instruction
+ /// along with the same general predicate.
+ bool isPTestLikeOpcode(unsigned Opc) const;
+ /// Returns true if the opcode is for an SVE WHILE## instruction.
+ bool isWhileOpcode(unsigned Opc) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
static bool isFalkorShiftExtFast(const MachineInstr &MI);
@@ -286,6 +299,13 @@ public:
Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
Register Reg) const override;
+ static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
+ int64_t &NumBytes,
+ int64_t &NumPredicateVectors,
+ int64_t &NumDataVectors);
+ static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset,
+ int64_t &ByteSized,
+ int64_t &VGSized);
#define GET_INSTRINFO_HELPER_DECLS
#include "AArch64GenInstrInfo.inc"
@@ -314,6 +334,12 @@ private:
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
+
+ /// Remove a ptest of a predicate-generating operation that already sets, or
+ /// can be made to set, the condition codes in an identical manner
+ bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
+ unsigned PredReg,
+ const MachineRegisterInfo *MRI) const;
};
/// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
@@ -397,6 +423,18 @@ static inline bool isIndirectBranchOpcode(int Opc) {
return false;
}
+static inline bool isPTrueOpcode(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::PTRUE_B:
+ case AArch64::PTRUE_H:
+ case AArch64::PTRUE_S:
+ case AArch64::PTRUE_D:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// Return opcode to be used for indirect calls.
unsigned getBLRCallOpcode(const MachineFunction &MF);
@@ -404,6 +442,7 @@ unsigned getBLRCallOpcode(const MachineFunction &MF);
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit
#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
+#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits
// }
namespace AArch64 {
@@ -436,9 +475,14 @@ enum FalseLaneType {
FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
};
+// NOTE: This is a bit field.
+static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1);
+static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2);
+
#undef TSFLAG_ELEMENT_SIZE_TYPE
#undef TSFLAG_DESTRUCTIVE_INST_TYPE
#undef TSFLAG_FALSE_LANE_TYPE
+#undef TSFLAG_INSTR_FLAGS
int getSVEPseudoMap(uint16_t Opcode);
int getSVERevInstr(uint16_t Opcode);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f4a5f639e497..171d3dbaa814 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -25,14 +25,16 @@ def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
+def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
+ AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
def HasVH : Predicate<"Subtarget->hasVH()">,
AssemblerPredicate<(all_of FeatureVH), "vh">;
def HasLOR : Predicate<"Subtarget->hasLOR()">,
AssemblerPredicate<(all_of FeatureLOR), "lor">;
-def HasPA : Predicate<"Subtarget->hasPA()">,
- AssemblerPredicate<(all_of FeaturePA), "pa">;
+def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
+ AssemblerPredicate<(all_of FeaturePAuth), "pauth">;
def HasJS : Predicate<"Subtarget->hasJS()">,
AssemblerPredicate<(all_of FeatureJS), "jsconv">;
@@ -46,9 +48,6 @@ def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
def HasNV : Predicate<"Subtarget->hasNV()">,
AssemblerPredicate<(all_of FeatureNV), "nv">;
-def HasRASv8_4 : Predicate<"Subtarget->hasRASv8_4()">,
- AssemblerPredicate<(all_of FeatureRASv8_4), "rasv8_4">;
-
def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
AssemblerPredicate<(all_of FeatureMPAM), "mpam">;
@@ -70,8 +69,8 @@ def HasPMU : Predicate<"Subtarget->hasPMU()">,
def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
-def HasFMI : Predicate<"Subtarget->hasFMI()">,
- AssemblerPredicate<(all_of FeatureFMI), "fmi">;
+def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
+ AssemblerPredicate<(all_of FeatureFlagM), "flagm">;
def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
@@ -152,6 +151,16 @@ def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">;
def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">;
+def HasXS : Predicate<"Subtarget->hasXS()">,
+ AssemblerPredicate<(all_of FeatureXS), "xs">;
+def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
+ AssemblerPredicate<(all_of FeatureWFxT), "wfxt">;
+def HasLS64 : Predicate<"Subtarget->hasLS64()">,
+ AssemblerPredicate<(all_of FeatureLS64), "ls64">;
+def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
+ AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
+def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
+ AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -402,6 +411,12 @@ def AArch64call : SDNode<"AArch64ISD::CALL",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+
+def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
+ SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
[SDNPHasChain]>;
def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
@@ -484,7 +499,6 @@ def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
-def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
@@ -504,7 +518,7 @@ def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
- (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
+ (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
@@ -556,6 +570,18 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
def AArch64srhadd : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>;
def AArch64urhadd : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
+def AArch64shadd : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
+def AArch64uhadd : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
+
+def AArch64uabd_n : SDNode<"AArch64ISD::UABD", SDT_AArch64binvec>;
+def AArch64sabd_n : SDNode<"AArch64ISD::SABD", SDT_AArch64binvec>;
+
+def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
+ [(AArch64uabd_n node:$lhs, node:$rhs),
+ (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
+def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs),
+ [(AArch64sabd_n node:$lhs, node:$rhs),
+ (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -591,8 +617,8 @@ let RecomputePerFunction = 1 in {
// Avoid generating STRQro if it is slow, unless we're optimizing for code size.
def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
- def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
- def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
+ def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
+ def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
@@ -690,7 +716,8 @@ def : Pat<(AArch64LOADgot tconstpool:$addr),
// 32-bit jump table destination is actually only 2 instructions since we can
// use the table itself as a PC-relative base. But optimization occurs after
// branch relaxation so be pessimistic.
-let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in {
+let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
+ isNotDuplicable = 1 in {
def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
(ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
Sched<[]>;
@@ -774,8 +801,34 @@ def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> {
let Inst{12} = 0;
let Predicates = [HasTRACEV8_4];
}
+
+def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
+ let CRm{1-0} = 0b11;
+ let Inst{9-8} = 0b10;
+ let Predicates = [HasXS];
+}
+
+let Predicates = [HasWFxT] in {
+def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
+def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
+}
+
+// Branch Record Buffer two-word mnemonic instructions
+class BRBEI<bits<3> op2, string keyword>
+ : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
+ let Inst{31-8} = 0b110101010000100101110010;
+ let Inst{7-5} = op2;
+ let Predicates = [HasBRBE];
+}
+def BRB_IALL: BRBEI<0b100, "\tiall">;
+def BRB_INJ: BRBEI<0b101, "\tinj">;
+
}
+// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
+def : TokenAlias<"INJ", "inj">;
+def : TokenAlias<"IALL", "iall">;
+
// ARMv8.2-A Dot Product
let Predicates = [HasDotProd] in {
defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", int_aarch64_neon_sdot>;
@@ -796,6 +849,23 @@ def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
def BFCVTN : SIMD_BFCVTN;
def BFCVTN2 : SIMD_BFCVTN2;
def BFCVT : BF16ToSinglePrecision<"bfcvt">;
+
+// Vector-scalar BFDOT:
+// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
+// register (the instruction uses a single 32-bit lane from it), so the pattern
+// is a bit tricky.
+def : Pat<(v2f32 (int_aarch64_neon_bfdot
+ (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
+ (v4bf16 (bitconvert
+ (v2i32 (AArch64duplane32
+ (v4i32 (bitconvert
+ (v8bf16 (insert_subvector undef,
+ (v4bf16 V64:$Rm),
+ (i64 0))))),
+ VectorIndexS:$idx)))))),
+ (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
+ VectorIndexS:$idx)>;
}
// ARMv8.6A AArch64 matrix multiplication
@@ -895,6 +965,7 @@ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
(FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
}
+
let Predicates = [HasComplxNum, HasNEON] in {
def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
(FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
@@ -908,6 +979,47 @@ let Predicates = [HasComplxNum, HasNEON] in {
}
}
+multiclass FCMLA_PATS<ValueType ty, RegisterClass Reg> {
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
+ (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
+ (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
+ (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
+ (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
+}
+
+multiclass FCMLA_LANE_PATS<ValueType ty, RegisterClass Reg, dag RHSDup> {
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
+ (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
+ (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
+ (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
+ (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
+}
+
+
+let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
+ defm : FCMLA_PATS<v4f16, V64>;
+ defm : FCMLA_PATS<v8f16, V128>;
+
+ defm : FCMLA_LANE_PATS<v4f16, V64,
+ (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
+ defm : FCMLA_LANE_PATS<v8f16, V128,
+ (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
+}
+let Predicates = [HasComplxNum, HasNEON] in {
+ defm : FCMLA_PATS<v2f32, V64>;
+ defm : FCMLA_PATS<v4f32, V128>;
+ defm : FCMLA_PATS<v2f64, V128>;
+
+ defm : FCMLA_LANE_PATS<v4f32, V128,
+ (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
+}
+
// v8.3a Pointer Authentication
// These instructions inhabit part of the hint space and so can be used for
// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
@@ -961,7 +1073,7 @@ def : InstAlias<"autib1716", (AUTIB1716), 0>;
def : InstAlias<"xpaclri", (XPACLRI), 0>;
// These pointer authentication instructions require armv8.3a
-let Predicates = [HasPA] in {
+let Predicates = [HasPAuth] in {
// When PA is enabled, a better mnemonic should be emitted.
def : InstAlias<"paciaz", (PACIAZ), 1>;
@@ -992,8 +1104,8 @@ let Predicates = [HasPA] in {
defm PAC : SignAuth<0b000, 0b010, "pac">;
defm AUT : SignAuth<0b001, 0b011, "aut">;
- def XPACI : SignAuthZero<0b100, 0b00, "xpaci">;
- def XPACD : SignAuthZero<0b100, 0b01, "xpacd">;
+ def XPACI : ClearAuth<0, "xpaci">;
+ def XPACD : ClearAuth<1, "xpacd">;
def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
// Combined Instructions
@@ -1028,7 +1140,7 @@ let Predicates = [HasPA] in {
}
// v8.3a floating point conversion for javascript
-let Predicates = [HasJS, HasFPARMv8] in
+let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
"fjcvtzs",
[(set GPR32:$Rd,
@@ -1037,7 +1149,7 @@ def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
} // HasJS, HasFPARMv8
// v8.4 Flag manipulation instructions
-let Predicates = [HasFMI] in {
+let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
let Inst{20-5} = 0b0000001000000000;
}
@@ -1045,7 +1157,7 @@ def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
"{\t$Rn, $imm, $mask}">;
-} // HasFMI
+} // HasFlagM
// v8.5 flag manipulation instructions
let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
@@ -1094,9 +1206,12 @@ def HWASAN_CHECK_MEMACCESS : Pseudo<
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
[(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
Sched<[]>;
+}
+
+let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
- [(int_hwasan_check_memaccess_shortgranules X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
+ [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
Sched<[]>;
}
@@ -1443,8 +1558,16 @@ def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
+def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
+ (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
+ (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
(SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
+ (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
+ (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
(UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
@@ -2031,6 +2154,8 @@ let isCall = 1, Defs = [LR], Uses = [SP] in {
def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
Sched<[WriteBrReg]>,
PseudoInstExpansion<(BLR GPR64:$Rn)>;
+ def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
+ Sched<[WriteBrReg]>;
} // isCall
def : Pat<(AArch64call GPR64:$Rn),
@@ -2040,6 +2165,10 @@ def : Pat<(AArch64call GPR64noip:$Rn),
(BLRNoIP GPR64noip:$Rn)>,
Requires<[SLSBLRMitigation]>;
+def : Pat<(AArch64call_rvmarker GPR64:$Rn),
+ (BLR_RVMARKER GPR64:$Rn)>,
+ Requires<[NoSLSBLRMitigation]>;
+
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
} // isBranch, isTerminator, isBarrier, isIndirectBranch
@@ -3701,18 +3830,6 @@ def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
(FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-// And here "(-a) + b*(-c)"
-
-let Predicates = [HasNEON, HasFullFP16] in
-def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))),
- (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
-
-def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
- (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-
-def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))),
- (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-
//===----------------------------------------------------------------------===//
// Floating point comparison instructions.
//===----------------------------------------------------------------------===//
@@ -3783,7 +3900,7 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
// Floating point immediate move.
//===----------------------------------------------------------------------===//
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm FMOV : FPMoveImmediate<"fmov">;
}
@@ -3792,7 +3909,7 @@ defm FMOV : FPMoveImmediate<"fmov">;
//===----------------------------------------------------------------------===//
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- int_aarch64_neon_uabd>;
+ AArch64uabd>;
// Match UABDL in log2-shuffle patterns.
def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
(zext (v8i8 V64:$opB))))),
@@ -3920,19 +4037,11 @@ def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>;
def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
-def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
-def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
-def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
-def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
-def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-
def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
@@ -4038,17 +4147,6 @@ defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
-// The following def pats catch the case where the LHS of an FMA is negated.
-// The TriOpFrag above catches the case where the middle operand is negated.
-def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)),
- (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>;
-
-def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
- (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>;
-
-def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
- (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
-
defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
@@ -4062,9 +4160,9 @@ defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
-defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
-defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
+ TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
+defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
+defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
@@ -4081,9 +4179,9 @@ defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
-defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
-defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
+ TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
+defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
+defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
@@ -4481,6 +4579,10 @@ def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
(FCVTPSv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
(FCVTPUv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
+ (FCVTZSv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
+ (FCVTZUv1i64 FPR64:$Rn)>;
def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
(FRECPEv1f16 FPR16:$Rn)>;
@@ -4652,9 +4754,9 @@ defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
- int_aarch64_neon_sabd>;
+ AArch64sabd>;
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
- int_aarch64_neon_sabd>;
+ AArch64sabd>;
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
@@ -4675,20 +4777,58 @@ defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
- int_aarch64_neon_uabd>;
+ AArch64uabd>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
- BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
+ BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
- BinOpFrag<(add node:$LHS, (zext node:$RHS))>>;
+ BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
- BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>;
+ BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
- BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
+ BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
+
+// Additional patterns for [SU]ML[AS]L
+multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v4i16 (opnode
+ V64:$Ra,
+ (v4i16 (extract_subvector
+ (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
+ (i64 0))))),
+ (EXTRACT_SUBREG (v8i16 (INST8B
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
+ V64:$Rn, V64:$Rm)), dsub)>;
+ def : Pat<(v2i32 (opnode
+ V64:$Ra,
+ (v2i32 (extract_subvector
+ (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
+ (i64 0))))),
+ (EXTRACT_SUBREG (v4i32 (INST4H
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
+ V64:$Rn, V64:$Rm)), dsub)>;
+ def : Pat<(v1i64 (opnode
+ V64:$Ra,
+ (v1i64 (extract_subvector
+ (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
+ (i64 0))))),
+ (EXTRACT_SUBREG (v2i64 (INST2S
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
+ V64:$Rn, V64:$Rm)), dsub)>;
+}
+
+defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_umull,
+ UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
+defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_smull,
+ SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
+defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_umull,
+ UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
+defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_smull,
+ SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
// Additional patterns for SMULL and UMULL
multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
@@ -4901,6 +5041,26 @@ defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
+
+let Predicates = [HasFullFP16] in {
+def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
+ (FADDPv2i16p
+ (EXTRACT_SUBREG
+ (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
+ dsub))>;
+def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
+ (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
+}
+def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
+ (FADDPv2i32p
+ (EXTRACT_SUBREG
+ (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
+ dsub))>;
+def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
+ (FADDPv2i32p V64:$Rn)>;
+def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
+ (FADDPv2i64p V128:$Rn)>;
+
def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
@@ -5152,6 +5312,16 @@ def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
(i64 0)),
dsub)>;
+def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0),
+ (i64 VectorIndexH:$imm)),
+ (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
+def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0),
+ (i64 VectorIndexS:$imm)),
+ (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
+def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0),
+ (i64 VectorIndexD:$imm)),
+ (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
+
def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
(f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
(INSvi16lane
@@ -6663,7 +6833,17 @@ def : Pat<(i32 (trunc GPR64sp:$src)),
// __builtin_trap() uses the BRK instruction on AArch64.
def : Pat<(trap), (BRK 1)>;
-def : Pat<(debugtrap), (BRK 0xF000)>, Requires<[IsWindows]>;
+def : Pat<(debugtrap), (BRK 0xF000)>;
+
+def ubsan_trap_xform : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
+}]>;
+
+def ubsan_trap_imm : TImmLeaf<i32, [{
+ return isUInt<8>(Imm);
+}], ubsan_trap_xform>;
+
+def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
// Multiply high patterns which multiply the lower subvector using smull/umull
// and the upper subvector with smull2/umull2. Then shuffle the high the high
@@ -7459,6 +7639,9 @@ def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
(vector_extract (v4f32 FPR128:$Rn), (i64 1))),
(f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
+def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
+ (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
+ (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
// Scalar 64-bit shifts in FPR64 registers.
def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
@@ -7661,6 +7844,23 @@ let AddedComplexity = 10 in {
// FIXME: add SVE dot-product patterns.
}
+let Predicates = [HasLS64] in {
+ def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
+ (outs GPR64x8:$Rt)>;
+ def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
+ (outs)>;
+ def ST64BV: Store64BV<0b011, "st64bv">;
+ def ST64BV0: Store64BV<0b010, "st64bv0">;
+
+ class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
+ : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
+ (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
+
+ def : ST64BPattern<int_aarch64_st64b, ST64B>;
+ def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
+ def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
+}
+
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index d975b8bd04fe..ad180cb2935e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1186,8 +1186,10 @@ bool AArch64LoadStoreOpt::findMatchingStore(
// store instruction writes and the stored value is not modified, we can
// promote the load. Since we do not handle stores with pre-/post-index,
// it's unnecessary to check if BaseReg is modified by the store itself.
+ // Also we can't handle stores without an immediate offset operand,
+ // while the operand might be the address for a global variable.
if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
- BaseReg == getLdStBaseOp(MI).getReg() &&
+ BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
StoreI = MBBI;
@@ -1550,16 +1552,27 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
continue;
}
}
- // If the destination register of the loads is the same register, bail
- // and keep looking. A load-pair instruction with both destination
- // registers the same is UNPREDICTABLE and will result in an exception.
- if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
+ // If the destination register of one load is the same register or a
+ // sub/super register of the other load, bail and keep looking. A
+ // load-pair instruction with both destination registers the same is
+ // UNPREDICTABLE and will result in an exception.
+ if (MayLoad &&
+ TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
MemInsns.push_back(&MI);
continue;
}
+ // If the BaseReg has been modified, then we cannot do the optimization.
+ // For example, in the following pattern
+ // ldr x1 [x2]
+ // ldr x2 [x3]
+ // ldr x4 [x2, #8],
+ // the first and third ldr cannot be converted to ldp x1, x4, [x2]
+ if (!ModifiedRegUnits.available(BaseReg))
+ return E;
+
// If the Rt of the second instruction was not modified or used between
// the two instructions and none of the instructions between the second
// and first alias with the second, we can combine the second into the
@@ -1750,6 +1763,11 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
return false;
}
+static bool needsWinCFI(const MachineFunction *MF) {
+ return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ MF->getFunction().needsUnwindTableEntry();
+}
+
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
@@ -1790,14 +1808,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// the memory access (I) and the increment (MBBI) can access the memory
// region defined by [SP, MBBI].
const bool BaseRegSP = BaseReg == AArch64::SP;
- if (BaseRegSP) {
+ if (BaseRegSP && needsWinCFI(I->getMF())) {
// FIXME: For now, we always block the optimization over SP in windows
// targets as it requires to adjust the unwind/debug info, messing up
// the unwind info can actually cause a miscompile.
- const MCAsmInfo *MAI = I->getMF()->getTarget().getMCAsmInfo();
- if (MAI->usesWindowsCFI() &&
- I->getMF()->getFunction().needsUnwindTableEntry())
- return E;
+ return E;
}
for (unsigned Count = 0; MBBI != E && Count < Limit;
@@ -1853,6 +1868,14 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
}
}
+ const bool BaseRegSP = BaseReg == AArch64::SP;
+ if (BaseRegSP && needsWinCFI(I->getMF())) {
+ // FIXME: For now, we always block the optimization over SP in windows
+ // targets as it requires to adjust the unwind/debug info, messing up
+ // the unwind info can actually cause a miscompile.
+ return E;
+ }
+
// Track which register units have been modified and used between the first
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index afd5ae6bcbf2..10e191ff44cf 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -203,6 +203,12 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
RefFlags |= AArch64MCExpr::VK_SABS;
} else {
RefFlags |= AArch64MCExpr::VK_ABS;
+
+ if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
+ RefFlags |= AArch64MCExpr::VK_PAGE;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+ AArch64II::MO_PAGEOFF)
+ RefFlags |= AArch64MCExpr::VK_PAGEOFF | AArch64MCExpr::VK_NC;
}
if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3)
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index a37e38072554..41343ba9700c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -14,6 +14,9 @@
//===----------------------------------------------------------------------===//
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64InstrInfo.h"
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/Module.h>
using namespace llvm;
@@ -30,3 +33,82 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
if (YamlMFI.HasRedZone.hasValue())
HasRedZone = YamlMFI.HasRedZone;
}
+
+static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
+ // The function should be signed in the following situations:
+ // - sign-return-address=all
+ // - sign-return-address=non-leaf and the functions spills the LR
+ if (!F.hasFnAttribute("sign-return-address")) {
+ const Module &M = *F.getParent();
+ if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address"))) {
+ if (Sign->getZExtValue()) {
+ if (const auto *All = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address-all")))
+ return {true, All->getZExtValue()};
+ return {true, false};
+ }
+ }
+ return {false, false};
+ }
+
+ StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
+ if (Scope.equals("none"))
+ return {false, false};
+
+ if (Scope.equals("all"))
+ return {true, true};
+
+ assert(Scope.equals("non-leaf"));
+ return {true, false};
+}
+
+static bool ShouldSignWithBKey(const Function &F) {
+ if (!F.hasFnAttribute("sign-return-address-key")) {
+ if (const auto *BKey = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("sign-return-address-with-bkey")))
+ return BKey->getZExtValue();
+ return false;
+ }
+
+ const StringRef Key =
+ F.getFnAttribute("sign-return-address-key").getValueAsString();
+ assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
+ return Key.equals_lower("b_key");
+}
+
+AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF) : MF(MF) {
+ // If we already know that the function doesn't have a redzone, set
+ // HasRedZone here.
+ if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
+ HasRedZone = false;
+
+ const Function &F = MF.getFunction();
+ std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(F);
+ SignWithBKey = ShouldSignWithBKey(F);
+
+ if (!F.hasFnAttribute("branch-target-enforcement")) {
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("branch-target-enforcement")))
+ BranchTargetEnforcement = BTE->getZExtValue();
+ return;
+ }
+
+ const StringRef BTIEnable = F.getFnAttribute("branch-target-enforcement").getValueAsString();
+ assert(BTIEnable.equals_lower("true") || BTIEnable.equals_lower("false"));
+ BranchTargetEnforcement = BTIEnable.equals_lower("true");
+}
+
+bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const {
+ if (!SignReturnAddress)
+ return false;
+ if (SignReturnAddressAll)
+ return true;
+ return SpillsLR;
+}
+
+bool AArch64FunctionInfo::shouldSignReturnAddress() const {
+ return shouldSignReturnAddress(llvm::any_of(
+ MF.getFrameInfo().getCalleeSavedInfo(),
+ [](const auto &Info) { return Info.getReg() == AArch64::LR; }));
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 84aa53f2bece..f60e2b6c316e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include <cassert>
@@ -36,6 +35,9 @@ class MachineInstr;
/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
/// contains private AArch64-specific information for each MachineFunction.
class AArch64FunctionInfo final : public MachineFunctionInfo {
+ /// Backreference to the machine function.
+ MachineFunction &MF;
+
/// Number of bytes of arguments this function has on the stack. If the callee
/// is expected to restore the argument stack this should be a multiple of 16,
/// all usable during a tail call.
@@ -126,26 +128,40 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// that must be forwarded to every musttail call.
SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
- // Offset from SP-at-entry to the tagged base pointer.
- // Tagged base pointer is set up to point to the first (lowest address) tagged
- // stack slot.
- unsigned TaggedBasePointerOffset = 0;
+ /// FrameIndex for the tagged base pointer.
+ Optional<int> TaggedBasePointerIndex;
+
+ /// Offset from SP-at-entry to the tagged base pointer.
+ /// Tagged base pointer is set up to point to the first (lowest address)
+ /// tagged stack slot.
+ unsigned TaggedBasePointerOffset;
/// OutliningStyle denotes, if a function was outined, how it was outlined,
/// e.g. Tail Call, Thunk, or Function if none apply.
Optional<std::string> OutliningStyle;
-public:
- AArch64FunctionInfo() = default;
+ // Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
+ // CalleeSavedStackSize) to the address of the frame record.
+ int CalleeSaveBaseToFrameRecordOffset = 0;
- explicit AArch64FunctionInfo(MachineFunction &MF) {
- (void)MF;
+ /// SignReturnAddress is true if PAC-RET is enabled for the function with
+ /// defaults being sign non-leaf functions only, with the B key.
+ bool SignReturnAddress = false;
+
+ /// SignReturnAddressAll modifies the default PAC-RET mode to signing leaf
+ /// functions as well.
+ bool SignReturnAddressAll = false;
+
+ /// SignWithBKey modifies the default PAC-RET mode to signing with the B key.
+ bool SignWithBKey = false;
+
+ /// BranchTargetEnforcement enables placing BTI instructions at potential
+ /// indirect branch destinations.
+ bool BranchTargetEnforcement = false;
+
+public:
+ explicit AArch64FunctionInfo(MachineFunction &MF);
- // If we already know that the function doesn't have a redzone, set
- // HasRedZone here.
- if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
- HasRedZone = false;
- }
void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI);
unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
@@ -281,15 +297,14 @@ public:
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
unsigned getJumpTableEntrySize(int Idx) const {
- auto It = JumpTableEntryInfo.find(Idx);
- if (It != JumpTableEntryInfo.end())
- return It->second.first;
- return 4;
+ return JumpTableEntryInfo[Idx].first;
}
MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const {
- return JumpTableEntryInfo.find(Idx)->second.second;
+ return JumpTableEntryInfo[Idx].second;
}
void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) {
+ if ((unsigned)Idx >= JumpTableEntryInfo.size())
+ JumpTableEntryInfo.resize(Idx+1);
JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym);
}
@@ -331,6 +346,11 @@ public:
return ForwardedMustTailRegParms;
}
+ Optional<int> getTaggedBasePointerIndex() const {
+ return TaggedBasePointerIndex;
+ }
+ void setTaggedBasePointerIndex(int Index) { TaggedBasePointerIndex = Index; }
+
unsigned getTaggedBasePointerOffset() const {
return TaggedBasePointerOffset;
}
@@ -338,12 +358,26 @@ public:
TaggedBasePointerOffset = Offset;
}
+ int getCalleeSaveBaseToFrameRecordOffset() const {
+ return CalleeSaveBaseToFrameRecordOffset;
+ }
+ void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
+ CalleeSaveBaseToFrameRecordOffset = Offset;
+ }
+
+ bool shouldSignReturnAddress() const;
+ bool shouldSignReturnAddress(bool SpillsLR) const;
+
+ bool shouldSignWithBKey() const { return SignWithBKey; }
+
+ bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
+
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
- DenseMap<int, std::pair<unsigned, MCSymbol *>> JumpTableEntryInfo;
+ SmallVector<std::pair<unsigned, MCSymbol *>, 2> JumpTableEntryInfo;
};
namespace yaml {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index 9a2103579a6a..f3b8ef16d6f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -21,7 +21,7 @@ namespace {
/// CMN, CMP, TST followed by Bcc
static bool isArithmeticBccPair(const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
+ const MachineInstr &SecondMI, bool CmpOnly) {
if (SecondMI.getOpcode() != AArch64::Bcc)
return false;
@@ -29,6 +29,13 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
if (FirstMI == nullptr)
return true;
+ // If we're in CmpOnly mode, we only fuse arithmetic instructions that
+ // discard their result.
+ if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR ||
+ FirstMI->getOperand(0).getReg() == AArch64::WZR)) {
+ return false;
+ }
+
switch (FirstMI->getOpcode()) {
case AArch64::ADDSWri:
case AArch64::ADDSWrr:
@@ -380,8 +387,11 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
// All checking functions assume that the 1st instr is a wildcard if it is
// unspecified.
- if (ST.hasArithmeticBccFusion() && isArithmeticBccPair(FirstMI, SecondMI))
- return true;
+ if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) {
+ bool CmpOnly = !ST.hasArithmeticBccFusion();
+ if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly))
+ return true;
+ }
if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 0d75ab7ac8a9..019220e3a527 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -408,6 +408,11 @@ bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) {
O.getReg() != CmpReg;
}))
continue;
+
+ // Don't remove a move immediate that implicitly defines the upper
+ // bits as different.
+ if (TRI->isSuperRegister(DefReg, KnownReg.Reg) && KnownReg.Imm < 0)
+ continue;
}
if (IsCopy)
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 3e9c8c7b6df2..f90856d14b2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -15,7 +15,6 @@
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
-#include "AArch64StackOffset.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
@@ -25,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
@@ -240,6 +240,14 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
}
+const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask(
+ const MachineFunction &MF) const {
+ if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux())
+ return CSR_AArch64_AAPCS_RegMask;
+
+ return nullptr;
+}
+
const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
if (TT.isOSDarwin())
return CSR_Darwin_AArch64_TLS_RegMask;
@@ -326,16 +334,16 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
}
bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
- return std::any_of(std::begin(*AArch64::GPR64argRegClass.MC),
- std::end(*AArch64::GPR64argRegClass.MC),
- [this, &MF](MCPhysReg r){return isReservedReg(MF, r);});
+ return llvm::any_of(*AArch64::GPR64argRegClass.MC, [this, &MF](MCPhysReg r) {
+ return isReservedReg(MF, r);
+ });
}
void AArch64RegisterInfo::emitReservedArgRegCallError(
const MachineFunction &MF) const {
const Function &F = MF.getFunction();
- F.getContext().diagnose(DiagnosticInfoUnsupported{F, "AArch64 doesn't support"
- " function calls if any of the argument registers is reserved."});
+ F.getContext().diagnose(DiagnosticInfoUnsupported{F, ("AArch64 doesn't support"
+ " function calls if any of the argument registers is reserved.")});
}
bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
@@ -517,16 +525,16 @@ bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
Register BaseReg,
int64_t Offset) const {
assert(MI && "Unable to get the legal offset for nil instruction.");
- StackOffset SaveOffset(Offset, MVT::i8);
+ StackOffset SaveOffset = StackOffset::getFixed(Offset);
return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
}
/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
/// at the beginning of the basic block.
-void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- Register BaseReg,
- int FrameIdx,
- int64_t Offset) const {
+Register
+AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ int FrameIdx,
+ int64_t Offset) const {
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
if (Ins != MBB->end())
@@ -536,6 +544,7 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
@@ -543,19 +552,21 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx)
.addImm(Offset)
.addImm(Shifter);
+
+ return BaseReg;
}
void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
// ARM doesn't need the general 64-bit offsets
- StackOffset Off(Offset, MVT::i8);
+ StackOffset Off = StackOffset::getFixed(Offset);
unsigned i = 0;
-
while (!MI.getOperand(i).isFI()) {
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
+
const MachineFunction *MF = MI.getParent()->getParent();
const AArch64InstrInfo *TII =
MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
@@ -585,6 +596,33 @@ createScratchRegisterForInstruction(MachineInstr &MI,
}
}
+void AArch64RegisterInfo::getOffsetOpcodes(
+ const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
+
+ // Add fixed-sized offset using existing DIExpression interface.
+ DIExpression::appendOffset(Ops, Offset.getFixed());
+
+ unsigned VG = getDwarfRegNum(AArch64::VG, true);
+ int64_t VGSized = Offset.getScalable() / 2;
+ if (VGSized > 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(VGSized);
+ Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
+ Ops.push_back(dwarf::DW_OP_mul);
+ Ops.push_back(dwarf::DW_OP_plus);
+ } else if (VGSized < 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(-VGSized);
+ Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
+ Ops.push_back(dwarf::DW_OP_mul);
+ Ops.push_back(dwarf::DW_OP_minus);
+ }
+}
+
void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
@@ -597,29 +635,31 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const AArch64InstrInfo *TII =
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const AArch64FrameLowering *TFI = getFrameLowering(MF);
-
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
bool Tagged =
MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED;
Register FrameReg;
- // Special handling of dbg_value, stackmap and patchpoint instructions.
- if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
- MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ // Special handling of dbg_value, stackmap patchpoint statepoint instructions.
+ if (MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT) {
StackOffset Offset =
TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
/*PreferFP=*/true,
/*ForSimm=*/false);
- Offset += StackOffset(MI.getOperand(FIOperandNum + 1).getImm(), MVT::i8);
+ Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getBytes());
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
return;
}
if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
MachineOperand &FI = MI.getOperand(FIOperandNum);
- int Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
- FI.ChangeToImmediate(Offset);
+ StackOffset Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
+ assert(!Offset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ FI.ChangeToImmediate(Offset.getFixed());
return;
}
@@ -628,12 +668,11 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// TAGPstack must use the virtual frame register in its 3rd operand.
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
FrameReg = MI.getOperand(3).getReg();
- Offset = {MFI.getObjectOffset(FrameIndex) +
- AFI->getTaggedBasePointerOffset(),
- MVT::i8};
+ Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
+ AFI->getTaggedBasePointerOffset());
} else if (Tagged) {
- StackOffset SPOffset = {
- MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), MVT::i8};
+ StackOffset SPOffset = StackOffset::getFixed(
+ MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize());
if (MFI.hasVarSizedObjects() ||
isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) !=
(AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) {
@@ -654,8 +693,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
FrameReg = AArch64::SP;
- Offset = {MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(),
- MVT::i8};
+ Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
+ (int64_t)MFI.getStackSize());
} else {
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
@@ -726,3 +765,19 @@ unsigned AArch64RegisterInfo::getLocalAddressRegister(
return getBaseRegister();
return getFrameRegister(MF);
}
+
+/// SrcRC and DstRC will be morphed into NewRC if this returns true
+bool AArch64RegisterInfo::shouldCoalesce(
+ MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
+ const TargetRegisterClass *DstRC, unsigned DstSubReg,
+ const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
+ if (MI->isCopy() &&
+ ((DstRC->getID() == AArch64::GPR64RegClassID) ||
+ (DstRC->getID() == AArch64::GPR64commonRegClassID)) &&
+ MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg())
+ // Do not coalesce in the case of a 32-bit subregister copy
+ // which implements a 32 to 64 bit zero extension
+ // which relies on the upper 32 bits being zeroed.
+ return false;
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 7b20f181e76d..0c871ac089a7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -72,6 +72,10 @@ public:
// Funclets on ARM64 Windows don't preserve any registers.
const uint32_t *getNoPreservedMask() const override;
+ // Unwinders may not preserve all Neon and SVE registers.
+ const uint32_t *
+ getCustomEHPadPreservedMask(const MachineFunction &MF) const override;
+
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i64 first argument if the calling convention
/// is one that can (partially) model this attribute with a preserved mask
@@ -103,9 +107,8 @@ public:
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
- int FrameIdx,
- int64_t Offset) const override;
+ Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
+ int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
@@ -125,6 +128,15 @@ public:
unsigned getLocalAddressRegister(const MachineFunction &MF) const;
bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const;
+
+ /// SrcRC and DstRC will be morphed into NewRC if this returns true
+ bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
+ unsigned SubReg, const TargetRegisterClass *DstRC,
+ unsigned DstSubReg, const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const override;
+
+ void getOffsetOpcodes(const StackOffset &Offset,
+ SmallVectorImpl<uint64_t> &Ops) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 54b351fda053..28d1988b8a5f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -711,6 +711,32 @@ def XSeqPairClassOperand :
//===----- END: v8.1a atomic CASP register operands -----------------------===//
+//===----------------------------------------------------------------------===//
+// Armv8.7a accelerator extension register operands: 8 consecutive GPRs
+// starting with an even one
+
+let Namespace = "AArch64" in {
+ foreach i = 0-7 in
+ def "x8sub_"#i : SubRegIndex<64, !mul(64, i)>;
+}
+
+def Tuples8X : RegisterTuples<
+ !foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)),
+ !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>;
+
+def GPR64x8Class : RegisterClass<"AArch64", [i64], 64, (trunc Tuples8X, 12)>;
+def GPR64x8AsmOp : AsmOperandClass {
+ let Name = "GPR64x8";
+ let ParserMethod = "tryParseGPR64x8";
+ let RenderMethod = "addRegOperands";
+}
+def GPR64x8 : RegisterOperand<GPR64x8Class, "printGPR64x8"> {
+ let ParserMatchClass = GPR64x8AsmOp;
+ let PrintMethod = "printGPR64x8";
+}
+
+//===----- END: v8.7a accelerator extension register operands -------------===//
+
// SVE predicate registers
def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>;
def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index fc31e701d3af..03b32967a212 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -221,8 +221,9 @@ shouldReplaceInst(MachineFunction *MF, const MCInstrDesc *InstDesc,
// if so, return it.
std::string Subtarget = std::string(SchedModel.getSubtargetInfo()->getCPU());
auto InstID = std::make_pair(InstDesc->getOpcode(), Subtarget);
- if (SIMDInstrTable.find(InstID) != SIMDInstrTable.end())
- return SIMDInstrTable[InstID];
+ auto It = SIMDInstrTable.find(InstID);
+ if (It != SIMDInstrTable.end())
+ return It->second;
unsigned SCIdx = InstDesc->getSchedClass();
const MCSchedClassDesc *SCDesc =
@@ -290,8 +291,9 @@ bool AArch64SIMDInstrOpt::shouldExitEarly(MachineFunction *MF, Subpass SP) {
case Interleave:
std::string Subtarget =
std::string(SchedModel.getSubtargetInfo()->getCPU());
- if (InterlEarlyExit.find(Subtarget) != InterlEarlyExit.end())
- return InterlEarlyExit[Subtarget];
+ auto It = InterlEarlyExit.find(Subtarget);
+ if (It != InterlEarlyExit.end())
+ return It->second;
for (auto &I : IRT) {
OriginalMCID = &TII->get(I.OrigOpc);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4f29f2f18185..e09b8401c0e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -152,6 +152,8 @@ def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>;
def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
def AArch64fminv_p : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>;
def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>;
+def AArch64saddv_p : SDNode<"AArch64ISD::SADDV_PRED", SDT_AArch64Reduce>;
+def AArch64uaddv_p : SDNode<"AArch64ISD::UADDV_PRED", SDT_AArch64Reduce>;
def AArch64smaxv_p : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
def AArch64umaxv_p : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
@@ -164,29 +166,83 @@ def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;
def SDT_AArch64Arith : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
- SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>
]>;
def SDT_AArch64FMA : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,
- SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
]>;
// Predicated operations with the result of inactive lanes being unspecified.
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
+def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>;
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
+def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
+def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>;
+def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>;
+def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
+def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
+def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>;
+def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>;
+def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>;
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
+def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
+def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
+def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
+def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
+def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
-// Merging op1 into the inactive lanes.
-def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>;
-def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>;
-def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>;
-def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>;
-def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>;
-def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>;
-def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>;
+def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>
+]>;
+
+// Predicated operations with the result of inactive lanes provided by the last operand.
+def AArch64clz_mt : SDNode<"AArch64ISD::CTLZ_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64cnt_mt : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64fabs_mt : SDNode<"AArch64ISD::FABS_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64abs_mt : SDNode<"AArch64ISD::ABS_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64neg_mt : SDNode<"AArch64ISD::NEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
+def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
+def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>;
+
+// These are like the above but we don't yet have need for ISD nodes. They allow
+// a single pattern to match intrinsic and ISD operand layouts.
+def AArch64cls_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cls node:$pt, node:$pg, node:$op)]>;
+def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>;
+def AArch64not_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not node:$pt, node:$pg, node:$op)]>;
+
+def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
+ SDTCVecEltisVT<1,i1>
+]>;
+
+def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>,
+ SDTCVecEltisVT<1,i1>
+]>;
+
+def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>;
+def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>;
+def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
+def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
+def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>;
+def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>;
def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
@@ -207,6 +263,24 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
+def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
+ [(setoge node:$lhs, node:$rhs),
+ (setge node:$lhs, node:$rhs)]>;
+def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
+ [(setogt node:$lhs, node:$rhs),
+ (setgt node:$lhs, node:$rhs)]>;
+def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
+ [(setoeq node:$lhs, node:$rhs),
+ (seteq node:$lhs, node:$rhs)]>;
+def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
+ [(setone node:$lhs, node:$rhs),
+ (setne node:$lhs, node:$rhs)]>;
+def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
+ (AArch64mul_p node:$pred, node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
+
let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
@@ -231,6 +305,7 @@ let Predicates = [HasSVE] in {
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
defm ADD_ZPZZ : sve_int_bin_pred_bhsd<AArch64add_p>;
+ defm SUB_ZPZZ : sve_int_bin_pred_bhsd<AArch64sub_p>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
@@ -253,12 +328,12 @@ let Predicates = [HasSVE] in {
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>;
defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>;
- defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla>;
- defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls>;
+ defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla, add, AArch64mul_p_oneuse>;
+ defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>;
// SVE predicated integer reductions.
- defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>;
- defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv, int_aarch64_sve_saddv>;
+ defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>;
+ defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", AArch64uaddv_p>;
defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>;
defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>;
defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>;
@@ -271,25 +346,17 @@ let Predicates = [HasSVE] in {
defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
- defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_m1>;
- defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_m1>;
- defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_m1>;
- defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_m1>;
-
- defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
- defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>;
- defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", int_aarch64_sve_smulh>;
- defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", int_aarch64_sve_umulh>;
-
- // Add unpredicated alternative for the mul instruction.
- def : Pat<(mul nxv16i8:$Op1, nxv16i8:$Op2),
- (MUL_ZPmZ_B (PTRUE_B 31), $Op1, $Op2)>;
- def : Pat<(mul nxv8i16:$Op1, nxv8i16:$Op2),
- (MUL_ZPmZ_H (PTRUE_H 31), $Op1, $Op2)>;
- def : Pat<(mul nxv4i32:$Op1, nxv4i32:$Op2),
- (MUL_ZPmZ_S (PTRUE_S 31), $Op1, $Op2)>;
- def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2),
- (MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>;
+ defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>;
+ defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>;
+ defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>;
+ defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>;
+
+ defm MUL_ZI : sve_int_arith_imm2<"mul", AArch64mul_p>;
+ defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", int_aarch64_sve_mul, DestructiveBinaryComm>;
+ defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>;
+ defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>;
+
+ defm MUL_ZPZZ : sve_int_bin_pred_bhsd<AArch64mul_p>;
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">;
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">;
@@ -305,34 +372,34 @@ let Predicates = [HasSVE] in {
defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>;
- defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", int_aarch64_sve_sxtb>;
- defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", int_aarch64_sve_uxtb>;
- defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", int_aarch64_sve_sxth>;
- defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", int_aarch64_sve_uxth>;
- defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", int_aarch64_sve_sxtw>;
- defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", int_aarch64_sve_uxtw>;
- defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>;
- defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>;
-
- defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", int_aarch64_sve_cls>;
- defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", int_aarch64_sve_clz>;
- defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", int_aarch64_sve_cnt>;
-
- let Predicates = [HasSVE, HasBF16] in {
- def : SVE_3_Op_Pat<nxv8i16, int_aarch64_sve_cnt, nxv8i16, nxv8i1, nxv8bf16, !cast<Instruction>(CNT_ZPmZ_H)>;
- }
-
- defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", int_aarch64_sve_cnot>;
- defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", int_aarch64_sve_not>;
- defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>;
- defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>;
-
- defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_m1>;
- defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_m1>;
- defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_m1>;
- defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_m1>;
- defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>;
- defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>;
+ defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", AArch64sxt_mt>;
+ defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", AArch64uxt_mt>;
+ defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", AArch64sxt_mt>;
+ defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxt_mt>;
+ defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxt_mt>;
+ defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxt_mt>;
+ defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", AArch64abs_mt>;
+ defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", AArch64neg_mt>;
+
+ defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", AArch64cls_mt>;
+ defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", AArch64clz_mt>;
+ defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", AArch64cnt_mt>;
+ defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", AArch64cnot_mt>;
+ defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", AArch64not_mt>;
+ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>;
+ defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>;
+
+ defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>;
+ defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>;
+ defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>;
+ defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>;
+ defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>;
+ defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>;
+
+ defm SMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64smax_p>;
+ defm UMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64umax_p>;
+ defm SMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64smin_p>;
+ defm UMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64umin_p>;
defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>;
defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
@@ -361,6 +428,11 @@ let Predicates = [HasSVE] in {
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">;
defm FADD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fadd_p>;
+ defm FSUB_ZPZZ : sve_fp_bin_pred_hfd<AArch64fsub_p>;
+ defm FMUL_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmul_p>;
+ defm FMAXNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmaxnm_p>;
+ defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fminnm_p>;
+ defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
@@ -377,10 +449,10 @@ let Predicates = [HasSVE] in {
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
}
- defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
- defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
- defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>;
- defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
+ defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
+ defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
+ defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
+ defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
@@ -404,8 +476,14 @@ let Predicates = [HasSVE] in {
// regalloc.
def : Pat<(nxv8f16 (AArch64fma_p nxv8i1:$P, nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3)),
(FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)),
+ (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)),
+ (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv4f32 (AArch64fma_p nxv4i1:$P, nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3)),
(FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)),
+ (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv2f64 (AArch64fma_p nxv2i1:$P, nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3)),
(FMLA_ZPmZZ_D $P, $Op3, $Op1, $Op2)>;
@@ -425,15 +503,6 @@ let Predicates = [HasSVE] in {
defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>;
defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>;
- // Use more efficient NEON instructions to extract elements within the NEON
- // part (first 128bits) of an SVE register.
- def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
- (f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>;
- def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
- (f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>;
- def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
- (f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>;
-
// Splat immediate (unpredicated)
defm DUP_ZI : sve_int_dup_imm<"dup">;
defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;
@@ -452,11 +521,6 @@ let Predicates = [HasSVE] in {
defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>;
defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>;
- let Predicates = [HasSVE, HasBF16] in {
- def : Pat<(nxv8bf16 (AArch64dup_mt nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)),
- (CPY_ZPmV_H $passthru, $pg, $splat)>;
- }
-
// Duplicate FP scalar into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
@@ -470,10 +534,8 @@ let Predicates = [HasSVE] in {
(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))),
(DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;
- let Predicates = [HasSVE, HasBF16] in {
- def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
- (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
- }
+ def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
+ (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
// Duplicate +0.0 into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
@@ -482,9 +544,7 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
- let Predicates = [HasSVE, HasBF16] in {
- def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
- }
+ def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
// Duplicate Int immediate into all vector elements
def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
@@ -513,36 +573,23 @@ let Predicates = [HasSVE] in {
}
// Select elements from either vector (predicated)
- defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
+ defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
defm SPLICE_ZPZ : sve_int_perm_splice<"splice", int_aarch64_sve_splice>;
- let Predicates = [HasSVE, HasBF16] in {
- def : SVE_3_Op_Pat<nxv8bf16, vselect, nxv8i1, nxv8bf16, nxv8bf16, SEL_ZPZZ_H>;
- def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_splice, nxv8i1, nxv8bf16, nxv8bf16, SPLICE_ZPZ_H>;
- }
-
defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>;
defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
- let Predicates = [HasSVE, HasBF16] in {
- def : SVE_2_Op_Pat<nxv8bf16, AArch64insr, nxv8bf16, bf16, INSR_ZV_H>;
- }
-
- defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>;
- defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>;
+ defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", AArch64rbit_mt>;
+ defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", AArch64revb_mt>;
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;
defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>;
defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>;
- let Predicates = [HasSVE, HasBF16] in {
- def : SVE_1_Op_Pat<nxv8bf16, AArch64rev, nxv8bf16, REV_ZZ_H>;
- }
-
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;
defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>;
@@ -599,23 +646,11 @@ let Predicates = [HasSVE] in {
defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>;
defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>;
- let Predicates = [HasSVE, HasBF16] in {
- def : SVE_3_Op_Pat<bf16, AArch64clasta_n, nxv8i1, bf16, nxv8bf16, CLASTA_VPZ_H>;
- def : SVE_3_Op_Pat<bf16, AArch64clastb_n, nxv8i1, bf16, nxv8bf16, CLASTB_VPZ_H>;
- def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clasta, nxv8i1, nxv8bf16, nxv8bf16, CLASTA_ZPZ_H>;
- def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clastb, nxv8i1, nxv8bf16, nxv8bf16, CLASTB_ZPZ_H>;
- }
-
defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>;
defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>;
defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>;
defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>;
- let Predicates = [HasSVE, HasBF16] in {
- def : SVE_2_Op_Pat<bf16, AArch64lasta, nxv8i1, nxv8bf16, LASTA_VPZ_H>;
- def : SVE_2_Op_Pat<bf16, AArch64lastb, nxv8i1, nxv8bf16, LASTB_VPZ_H>;
- }
-
// continuous load with reg+immediate
defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>;
defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>;
@@ -1000,7 +1035,7 @@ let Predicates = [HasSVE] in {
def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
-multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
+ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
// reg + imm
let AddedComplexity = 2 in {
def _reg_imm : Pat<(prefetch (PredTy PPR_3b:$gp), (am_sve_indexed_s6 GPR64sp:$base, simm6s1:$offset), (i32 sve_prfop:$prfop)),
@@ -1082,10 +1117,6 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>;
- let Predicates = [HasSVE, HasBF16] in {
- def : SVE_2_Op_Pat<nxv8bf16, AArch64tbl, nxv8bf16, nxv8i16, TBL_ZZZ_H>;
- }
-
defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>;
defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2", AArch64zip2>;
defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1", AArch64uzp1>;
@@ -1093,15 +1124,6 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1", AArch64trn1>;
defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2", AArch64trn2>;
- let Predicates = [HasSVE, HasBF16] in {
- def : SVE_2_Op_Pat<nxv8bf16, AArch64zip1, nxv8bf16, nxv8bf16, ZIP1_ZZZ_H>;
- def : SVE_2_Op_Pat<nxv8bf16, AArch64zip2, nxv8bf16, nxv8bf16, ZIP2_ZZZ_H>;
- def : SVE_2_Op_Pat<nxv8bf16, AArch64uzp1, nxv8bf16, nxv8bf16, UZP1_ZZZ_H>;
- def : SVE_2_Op_Pat<nxv8bf16, AArch64uzp2, nxv8bf16, nxv8bf16, UZP2_ZZZ_H>;
- def : SVE_2_Op_Pat<nxv8bf16, AArch64trn1, nxv8bf16, nxv8bf16, TRN1_ZZZ_H>;
- def : SVE_2_Op_Pat<nxv8bf16, AArch64trn2, nxv8bf16, nxv8bf16, TRN2_ZZZ_H>;
- }
-
defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>;
defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>;
defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>;
@@ -1123,6 +1145,29 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
(ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
+ // Extract subvectors from FP SVE vectors
+ def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_S ZPR:$Zs)>;
+ def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
+ (UUNPKHI_ZZ_S ZPR:$Zs)>;
+ def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D ZPR:$Zs)>;
+
+ def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_S ZPR:$Zs)>;
+ def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
+ (UUNPKHI_ZZ_S ZPR:$Zs)>;
+
// Concatenate two predicates.
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
(UZP1_PPP_S $p1, $p2)>;
@@ -1131,6 +1176,18 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)),
(UZP1_PPP_B $p1, $p2)>;
+ // Concatenate two floating point vectors.
+ def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)),
+ (UZP1_ZZZ_S $v1, $v2)>;
+ def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)),
+ (UZP1_ZZZ_H $v1, $v2)>;
+ def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)),
+ (UZP1_ZZZ_S $v1, $v2)>;
+ def : Pat<(nxv4bf16 (concat_vectors nxv2bf16:$v1, nxv2bf16:$v2)),
+ (UZP1_ZZZ_S $v1, $v2)>;
+ def : Pat<(nxv8bf16 (concat_vectors nxv4bf16:$v1, nxv4bf16:$v2)),
+ (UZP1_ZZZ_H $v1, $v2)>;
+
defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
@@ -1160,10 +1217,10 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
- defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge>;
- defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt>;
- defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq>;
- defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone>;
+ defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
+ defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
+ defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
+ defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
@@ -1288,82 +1345,145 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm INDEX_II : sve_int_index_ii<"index", index_vector>;
// Unpredicated shifts
- defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_m1>;
- defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_m1>;
- defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_m1>;
+ defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>;
+ defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>;
+ defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>;
defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;
// Predicated shifts
- defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr", "ASR_ZPZI">;
- defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr", "LSR_ZPZI">;
- defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
- defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
+ defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>;
+ defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>;
+ defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>;
+ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
+
+ defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
+ defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
+ defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
- defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64asr_m1>;
- defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsr_m1>;
- defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsl_m1>;
+ defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
+ defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
+ defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
}
- defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ">;
- defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ">;
- defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_m1, "LSLR_ZPmZ">;
+ defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;
+ defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;
+ defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>;
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>;
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>;
+ defm ASR_ZPZZ : sve_int_bin_pred_bhsd<AArch64asr_p>;
+ defm LSR_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsr_p>;
+ defm LSL_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsl_p>;
+
defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
- defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>;
- defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>;
- defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
- defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
- defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
- defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
- defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
- defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
- defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
- defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
- defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>;
- defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv2i1, nxv4f32, ElementSizeD>;
- defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
- defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
- defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>;
- defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>;
- defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>;
- defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>;
- defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>;
- defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>;
- defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
- defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
- defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;
- defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;
- defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
- defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;
- defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
- defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;
- defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
-
- defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
- defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>;
- defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;
- defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;
- defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;
- defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;
- defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;
- defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
- defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;
+ defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zdr<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, AArch64fcvtr_mt, nxv4f16, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, AArch64fcvte_mt, nxv4f32, nxv4i1, nxv4f16, ElementSizeS>;
+ defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
+ defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
+ defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
+ defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
+ defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
+ defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
+ defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zdr<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, AArch64fcvtr_mt, nxv2f16, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>;
+ defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
+ defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
+ defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
+ defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
+ defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
+ defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
+ defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
+ defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
+ defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
+ defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
+ defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
+
+ def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
+ (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ // FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
+ // This is ignored by the pattern below where it is matched by (i64 timm0_1)
+ def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
+ (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ // Floating-point -> signed integer
+ def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))),
+ (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg),
+ (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))),
+ (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))),
+ (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))),
+ (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))),
+ (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ // Floating-point -> unsigned integer
+ def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ (and (nxv2i64 ZPR:$Zs),
+ (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),
+ (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ (and (nxv2i64 ZPR:$Zs),
+ (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),
+ (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg),
+ (and (nxv4i32 ZPR:$Zs),
+ (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),
+ (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ (and (nxv2i64 ZPR:$Zs),
+ (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),
+ (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ (and (nxv2i64 ZPR:$Zs),
+ (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),
+ (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;
+ defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;
+ defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>;
+ defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", AArch64frintz_mt>;
+ defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", AArch64frinta_mt>;
+ defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", AArch64frintx_mt>;
+ defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>;
+ defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>;
+ defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
let Predicates = [HasBF16, HasSVE] in {
defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
@@ -1528,6 +1648,9 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
}
+ def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
+ (ADDVL_XXI GPR64:$op, $imm)>;
+
// FIXME: BigEndian requires an additional REV instruction to satisfy the
// constraint that none of the bits change when stored to memory as one
// type, and and reloaded as another type.
@@ -1581,15 +1704,6 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
- }
-
- let Predicates = [IsLE, HasBF16, HasSVE] in {
- def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
- def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
- def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
- }
-
- let Predicates = [IsLE, HasSVE, HasBF16] in {
def : Pat<(nxv8bf16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
def : Pat<(nxv8bf16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
@@ -1607,6 +1721,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
}
+ // These allow casting from/to unpacked predicate types.
def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
@@ -1621,6 +1736,18 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ // These allow casting from/to unpacked floating-point types.
+ def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv8f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv4f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv2f32 (reinterpret_cast (nxv4f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv2bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv8bf16 (reinterpret_cast (nxv2bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv4bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv8bf16 (reinterpret_cast (nxv4bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+
def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)),
(AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>;
def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)),
@@ -1673,10 +1800,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm : pred_load<nxv8i16, nxv8i1, asext_masked_load_i8, LD1SB_H, LD1SB_H_IMM, am_sve_regreg_lsl0>;
defm : pred_load<nxv8i16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
defm : pred_load<nxv8f16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
-
- let Predicates = [HasBF16, HasSVE] in {
- defm : pred_load<nxv8bf16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
- }
+ defm : pred_load<nxv8bf16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
// 16-element contiguous loads
defm : pred_load<nxv16i8, nxv16i1, nonext_masked_load, LD1B, LD1B_IMM, am_sve_regreg_lsl0>;
@@ -1714,13 +1838,10 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
// 8-element contiguous stores
- defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>;
- defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
-
- let Predicates = [HasBF16, HasSVE] in {
- defm : pred_store<nxv8bf16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
- }
+ defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv8bf16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
// 16-element contiguous stores
defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B, ST1B_IMM, am_sve_regreg_lsl0>;
@@ -1882,10 +2003,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm : ld1<LD1SB_H, LD1SB_H_IMM, nxv8i16, AArch64ld1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
defm : ld1<LD1H, LD1H_IMM, nxv8i16, AArch64ld1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
defm : ld1<LD1H, LD1H_IMM, nxv8f16, AArch64ld1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
-
- let Predicates = [HasBF16, HasSVE] in {
- defm : ld1<LD1H, LD1H_IMM, nxv8bf16, AArch64ld1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
- }
+ defm : ld1<LD1H, LD1H_IMM, nxv8bf16, AArch64ld1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
// 16-element contiguous loads
defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
@@ -1925,10 +2043,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm : ldnf1<LDNF1SB_H_IMM, nxv8i16, AArch64ldnf1s_z, nxv8i1, nxv8i8>;
defm : ldnf1<LDNF1H_IMM, nxv8i16, AArch64ldnf1_z, nxv8i1, nxv8i16>;
defm : ldnf1<LDNF1H_IMM, nxv8f16, AArch64ldnf1_z, nxv8i1, nxv8f16>;
-
- let Predicates = [HasBF16, HasSVE] in {
- defm : ldnf1<LDNF1H_IMM, nxv8bf16, AArch64ldnf1_z, nxv8i1, nxv8bf16>;
- }
+ defm : ldnf1<LDNF1H_IMM, nxv8bf16, AArch64ldnf1_z, nxv8i1, nxv8bf16>;
// 16-element contiguous non-faulting loads
defm : ldnf1<LDNF1B_IMM, nxv16i8, AArch64ldnf1_z, nxv16i1, nxv16i8>;
@@ -1969,10 +2084,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm : ldff1<LDFF1SB_H, nxv8i16, AArch64ldff1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
defm : ldff1<LDFF1H, nxv8i16, AArch64ldff1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
defm : ldff1<LDFF1H, nxv8f16, AArch64ldff1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
-
- let Predicates = [HasBF16, HasSVE] in {
- defm : ldff1<LDFF1H, nxv8bf16, AArch64ldff1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
- }
+ defm : ldff1<LDFF1H, nxv8bf16, AArch64ldff1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
// 16-element contiguous first faulting loads
defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
@@ -2023,6 +2135,19 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)),
(INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+ def : Pat<(nxv8f16 (vector_insert (nxv8f16 (undef)), (f16 FPR16:$src), 0)),
+ (INSERT_SUBREG (nxv8f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+ def : Pat<(nxv4f16 (vector_insert (nxv4f16 (undef)), (f16 FPR16:$src), 0)),
+ (INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+ def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)),
+ (INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+ def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv2f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv2f64 (vector_insert (nxv2f64 (undef)), (f64 FPR64:$src), 0)),
+ (INSERT_SUBREG (nxv2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+
// Insert scalar into vector[0]
def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)),
(CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>;
@@ -2086,6 +2211,28 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
(DUP_ZR_D $index)),
$src)>;
+ // Extract element from vector with scalar index
+ def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+
// Extract element from vector with immediate index
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)),
(EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>;
@@ -2097,34 +2244,54 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
+ def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
+ def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
+ def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>;
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
+ def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>;
def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
- // Extract element from vector with scalar index
- def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
- ZPR:$vec)>;
+ // Extract element from vector with immediate index that's within the bottom 128-bits.
+ let AddedComplexity = 1 in {
+ def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
+ (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
+ def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
+ (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
+ def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
+ (i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
+ def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
+ (i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
+ }
- def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
- ZPR:$vec)>;
+ // Extract first element from vector.
+ let AddedComplexity = 2 in {
+ def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)),
+ (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)),
+ (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)),
+ (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)),
+ (i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
+ def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
+ (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)),
+ (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
+ (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
+ }
}
let Predicates = [HasSVE, HasMatMulInt8] in {
@@ -2158,15 +2325,6 @@ let Predicates = [HasSVE, HasMatMulFP64] in {
defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>;
}
-let Predicates = [HasSVE, HasMatMulFP64, HasBF16] in {
- def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_zip1q, nxv8bf16, nxv8bf16, ZIP1_ZZZ_Q>;
- def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_zip2q, nxv8bf16, nxv8bf16, ZIP2_ZZZ_Q>;
- def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_uzp1q, nxv8bf16, nxv8bf16, UZP1_ZZZ_Q>;
- def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_uzp2q, nxv8bf16, nxv8bf16, UZP2_ZZZ_Q>;
- def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_trn1q, nxv8bf16, nxv8bf16, TRN1_ZZZ_Q>;
- def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_trn2q, nxv8bf16, nxv8bf16, TRN2_ZZZ_Q>;
-}
-
let Predicates = [HasSVE2] in {
// SVE2 integer multiply-add (indexed)
defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>;
@@ -2192,10 +2350,10 @@ let Predicates = [HasSVE2] in {
defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>;
// SVE2 integer multiply vectors (unpredicated)
- defm MUL_ZZZ : sve2_int_mul<0b000, "mul", mul>;
+ defm MUL_ZZZ : sve2_int_mul<0b000, "mul", null_frag, AArch64mul_p>;
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag>;
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag>;
- defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
+ defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
// Add patterns for unpredicated version of smulh and umulh.
def : Pat<(nxv16i8 (int_aarch64_sve_smulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)),
@@ -2214,6 +2372,7 @@ let Predicates = [HasSVE2] in {
(UMULH_ZZZ_S $Op1, $Op2)>;
def : Pat<(nxv2i64 (int_aarch64_sve_umulh (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op1, nxv2i64:$Op2)),
(UMULH_ZZZ_D $Op1, $Op2)>;
+
// SVE2 complex integer dot product (indexed)
defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot", int_aarch64_sve_cdot_lane>;
@@ -2335,11 +2494,11 @@ let Predicates = [HasSVE2] in {
}
// SVE2 predicated shifts
- defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">;
- defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">;
- defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>;
- defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>;
- defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
+ defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">;
+ defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">;
+ defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>;
+ defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>;
+ defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
// SVE2 integer add/subtract long
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>;
@@ -2546,13 +2705,6 @@ let Predicates = [HasSVE2] in {
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>;
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>;
- let Predicates = [HasSVE, HasBF16] in {
- def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_tbx, nxv8bf16, nxv8bf16, nxv8i16, TBX_ZZZ_H>;
- def : Pat<(nxv8bf16 (int_aarch64_sve_tbl2 nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)),
- (nxv8bf16 (TBL_ZZZZ_H (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0, nxv8bf16:$Op2, zsub1),
- nxv8i16:$Op3))>;
- }
-
// SVE2 integer compare scalar count and limit
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;
defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td
new file mode 100644
index 000000000000..50911fd22bc9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -0,0 +1,339 @@
+//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the ARM Cortex-A55 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the per-operand machine model.
+// This works with MachineScheduler. See MCSchedModel.h for details.
+
+// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
+def CortexA55Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // The Cortex-A55 is an in-order processor
+ let IssueWidth = 2; // It dual-issues under most circumstances
+ let LoadLatency = 4; // Cycles for loads to access the cache. The
+ // optimisation guide shows that most loads have
+ // a latency of 3, but some have a latency of 4
+ // or 5. Setting it 4 looked to be good trade-off.
+ let MispredictPenalty = 8; // A branch direction mispredict.
+ let PostRAScheduler = 1; // Enable PostRA scheduler pass.
+ let CompleteModel = 0; // Covers instructions applicable to Cortex-A55.
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+
+ // FIXME: Remove when all errors have been fixed.
+ let FullInstRWOverlapCheck = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
+// Cortex-A55 is in-order.
+
+def CortexA55UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def CortexA55UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
+def CortexA55UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
+def CortexA55UnitLd : ProcResource<1> { let BufferSize = 0; } // Load pipe
+def CortexA55UnitSt : ProcResource<1> { let BufferSize = 0; } // Store pipe
+def CortexA55UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
+
+// The FP DIV/SQRT instructions execute totally differently from the FP ALU
+// instructions, which can mostly be dual-issued; that's why for now we model
+// them with 2 resources.
+def CortexA55UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
+def CortexA55UnitFPMAC : ProcResource<2> { let BufferSize = 0; } // FP MAC
+def CortexA55UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types
+
+let SchedModel = CortexA55Model in {
+
+// These latencies are modeled without taking into account forwarding paths
+// (the software optimisation guide lists latencies taking into account
+// typical forwarding paths).
+def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; } // MOVN, MOVZ
+def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; } // ALU
+def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Shifted-Reg
+def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Extended-Reg
+def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; } // EXTR from a reg pair
+def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; } // Shift/Scale
+
+// MAC
+def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; } // 32-bit Multiply
+def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; } // 64-bit Multiply
+
+// Div
+def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8];
+}
+def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8];
+}
+
+// Load
+def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
+def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
+
+// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
+// below, choosing the median of 3 which makes the latency 6.
+// An extra cycle is needed to get the swizzling right.
+def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
+def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
+ let ResourceCycles = [4]; }
+def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
+ let ResourceCycles = [6]; }
+def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
+ let ResourceCycles = [7]; }
+def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
+ let ResourceCycles = [8]; }
+
+// Pre/Post Indexing - Performed as part of address generation
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 4; }
+def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 4; }
+def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 4; }
+def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
+
+// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
+def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
+ let ResourceCycles = [2];}
+def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
+def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
+ let ResourceCycles = [4]; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [CortexA55UnitB]>;
+def : WriteRes<WriteBrReg, [CortexA55UnitB]>;
+def : WriteRes<WriteSys, [CortexA55UnitB]>;
+def : WriteRes<WriteBarrier, [CortexA55UnitB]>;
+def : WriteRes<WriteHint, [CortexA55UnitB]>;
+
+// FP ALU
+// As WriteF result is produced in F5 and it can be mostly forwarded
+// to consumer at F1, the effectively latency is set as 4.
+def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
+
+// FP ALU specific new schedwrite definitions
+def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
+def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
+def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
+
+// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
+def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
+def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
+ let ResourceCycles = [29]; }
+def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
+def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
+ let ResourceCycles = [10]; }
+def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
+ let ResourceCycles = [19]; }
+def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
+ let ResourceCycles = [9]; }
+def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
+ let ResourceCycles = [19]; }
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadExtrHi, 1>;
+def : ReadAdvance<ReadAdrBase, 1>;
+
+// ALU - ALU input operands are generally needed in EX1. An operand produced in
+// in say EX2 can be forwarded for consumption to ALU in EX1, thereby
+// allowing back-to-back ALU operations such as add. If an operand requires
+// a shift, it will, however, be required in ISS stage.
+def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+// Shifted operand
+def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def CortexA55ReadISReg : SchedReadVariant<[
+ SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
+ SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, CortexA55ReadISReg>;
+
+def CortexA55ReadIEReg : SchedReadVariant<[
+ SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
+ SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
+
+// MUL
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRWs.
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[CortexA55WriteVLD2,CortexA55WriteVLD1], (instregex "LDP.*")>;
+def : InstRW<[WriteI], (instrs COPY)>;
+//---
+// Vector Loads - 64-bit per cycle
+//---
+// 1-element structures
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; // single element
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// 2-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+// 3-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+// 4-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+//---
+// Floating Point Conversions, MAC, DIV, SQRT
+//---
+def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
+
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
+
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
+def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
+def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
+def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td
index 7c40da05c305..aa5bec8088e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -93,7 +93,7 @@ def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>;
def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
-def : SchedAlias<WriteFMul, A57Write_5cyc_1V>;
+def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>;
def : SchedAlias<WriteV, A57Write_3cyc_1V>;
def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
@@ -350,12 +350,16 @@ def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")
// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
+// Cortex A57 Software Optimization Guide Sec 3.14
+// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate
+def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>;
+
// ASIMD absolute diff accum, D-form
-def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
// ASIMD absolute diff accum, Q-form
-def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
// ASIMD absolute diff accum long
-def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>;
// ASIMD arith, reduce, 4H/4S
def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
@@ -372,32 +376,41 @@ def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>
def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
// ASIMD multiply, D-form
-def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// MUL
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// PMUL, SQDMULH, SQRDMULH
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+
// ASIMD multiply, Q-form
-def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// MUL
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// PMUL, SQDMULH, SQRDMULH
+def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// Cortex A57 Software Optimization Guide Sec 3.14
+def A57ReadIVMA4 : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
+def A57ReadIVMA3 : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
// ASIMD multiply accumulate, D-form
-def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
// ASIMD multiply accumulate, Q-form
-def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
// ASIMD multiply accumulate long
// ASIMD multiply accumulate saturating long
-def A57WriteIVMA : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
-def A57ReadIVMA4 : SchedReadAdvance<4, [A57WriteIVMA]>;
-def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>;
// ASIMD multiply long
-def : InstRW<[A57Write_5cyc_1W], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^(S|U)MULL")>;
+def : InstRW<[A57Write_5cyc_1W], (instregex "^SQDMULL")>;
def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>;
def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>;
// ASIMD pairwise add and accumulate
// ASIMD shift accumulate
-def A57WriteIVA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
-def A57ReadIVA3 : SchedReadAdvance<3, [A57WriteIVA]>;
-def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^[SU]ADALP")>;
-def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ADALP")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
// ASIMD shift by immed, complex
def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>;
@@ -474,17 +487,22 @@ def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i6
def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>;
// ASIMD FP multiply, D-form, FZ
-def : InstRW<[A57Write_5cyc_1V], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[A57Write_5cyc_1V_FP_Forward], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
// ASIMD FP multiply, Q-form, FZ
-def : InstRW<[A57Write_5cyc_2V], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57Write_5cyc_2V_FP_Forward], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP multiply accumulate, D-form, FZ
// ASIMD FP multiply accumulate, Q-form, FZ
def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10; }
-def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ]>;
+
+// Cortex A57 Software Optimization Guide Sec 3.15
+// Advances from FP mul and mul-accum to mul-accum
+def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
+def A57ReadFPVMA6 : SchedReadAdvance<6, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
+
def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
-def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA5], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA6], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP round, D-form
def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>;
@@ -547,8 +565,9 @@ def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v
def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>;
+// Cortex A57 Software Optimization Guide Sec 3.10
def A57WriteFPMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
-def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA]>;
+def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA, WriteFMul]>;
def A57ReadFPM : SchedReadAdvance<0>;
def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td
index 987ed3c4ebfb..a4c090d439db 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -13,6 +13,10 @@
// Prefix: A57Write
// Latency: #cyc
// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+// Postfix (optional): (XYZ)_Forward
+//
+// The postfix is added to differentiate SchedWriteRes that are used in
+// subsequent SchedReadAdvances.
//
// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
@@ -25,7 +29,9 @@
def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; }
def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1V_FP_Forward : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57Write_5cyc_1W_Mul_Forward : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
let ResourceCycles = [17]; }
@@ -45,6 +51,7 @@ def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57Write_4cyc_1X_NonMul_Forward : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
@@ -93,6 +100,10 @@ def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
let Latency = 6;
let NumMicroOps = 2;
}
+def A57Write_6cyc_2W_Mul_Forward : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
A57UnitL]> {
let Latency = 5;
@@ -102,10 +113,18 @@ def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
let Latency = 5;
let NumMicroOps = 2;
}
+def A57Write_5cyc_2V_FP_Forward : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
let Latency = 5;
let NumMicroOps = 2;
}
+def A57Write_5cyc_2X_NonMul_Forward : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
A57UnitV]> {
let Latency = 10;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
new file mode 100644
index 000000000000..b6741d418ef0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -0,0 +1,3890 @@
+//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the Fujitsu A64FX processors.
+//
+//===----------------------------------------------------------------------===//
+
+def A64FXModel : SchedMachineModel {
+ let IssueWidth = 6; // 6 micro-ops dispatched at a time.
+ let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer.
+ let LoadLatency = 5; // Optimistic load latency.
+ let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
+ // Determined via a mix of micro-arch details and experimentation.
+ let LoopMicroOpBufferSize = 128;
+ let PostRAScheduler = 1; // Using PostRA sched.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures =
+ [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth];
+
+ let FullInstRWOverlapCheck = 0;
+}
+
+let SchedModel = A64FXModel in {
+
+// Define the issue ports.
+
+// A64FXIP*
+
+// Port 0
+def A64FXIPFLA : ProcResource<1>;
+
+// Port 1
+def A64FXIPPR : ProcResource<1>;
+
+// Port 2
+def A64FXIPEXA : ProcResource<1>;
+
+// Port 3
+def A64FXIPFLB : ProcResource<1>;
+
+// Port 4
+def A64FXIPEXB : ProcResource<1>;
+
+// Port 5
+def A64FXIPEAGA : ProcResource<1>;
+
+// Port 6
+def A64FXIPEAGB : ProcResource<1>;
+
+// Port 7
+def A64FXIPBR : ProcResource<1>;
+
+// Define groups for the functional units on each issue port. Each group
+// created will be used by a WriteRes later on.
+
+def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
+
+def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
+
+def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
+
+def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
+
+def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
+
+def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
+
+def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
+
+def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
+
+def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
+
+def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
+
+def A64FXGI02 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA]>;
+
+def A64FXGI12 : ProcResGroup<[A64FXIPEXA, A64FXIPPR]>;
+
+def A64FXGI15 : ProcResGroup<[A64FXIPEAGA, A64FXIPPR]>;
+
+def A64FXGI05 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA]>;
+
+def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
+
+def A64FXGI124 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPPR]>;
+
+def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
+
+def A64FXGI0256 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA, A64FXIPEAGA, A64FXIPEAGB]>;
+
+def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
+
+def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
+
+def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
+ A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]> {
+ let BufferSize = 60;
+}
+
+def A64FXWrite_6Cyc : SchedWriteRes<[]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 2;
+}
+
+def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_5Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_13Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 13;
+}
+
+def A64FXWrite_37Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 37;
+}
+
+def A64FXWrite_98Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 98;
+}
+
+def A64FXWrite_134Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 134;
+}
+
+def A64FXWrite_154Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 154;
+}
+
+def A64FXWrite_4Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_6Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_8Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_12Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_10Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_17Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
+ let Latency = 17;
+}
+
+def A64FXWrite_21Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
+ let Latency = 21;
+}
+
+def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
+ let Latency = 3;
+}
+
+def A64FXWrite_6Cyc_NGI1 : SchedWriteRes<[A64FXGI1]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_4Cyc_GI12 : SchedWriteRes<[A64FXGI12]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_3Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
+ let Latency = 3;
+}
+
+def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_6Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_6Cyc_GI15 : SchedWriteRes<[A64FXGI15]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_3Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 3;
+}
+
+def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_6Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_10Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_12Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_14Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_15Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+}
+
+def A64FXWrite_15Cyc_NGI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_18Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 18;
+}
+
+def A64FXWrite_45Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 45;
+}
+
+def A64FXWrite_60Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 60;
+}
+
+def A64FXWrite_75Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 75;
+}
+
+def A64FXWrite_6Cyc_GI05 : SchedWriteRes<[A64FXGI05]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
+ let Latency = 20;
+}
+
+def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
+ let Latency = 11;
+}
+
+def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
+ let Latency = 2;
+}
+
+def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_6Cyc_GI124: SchedWriteRes<[A64FXGI124]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_8Cyc_GI124 : SchedWriteRes<[A64FXGI124]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_6Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+}
+
+def A64FXWrite_44Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 44;
+}
+
+def A64FXWrite_10Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_15Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
+ let Latency = 15;
+}
+
+def A64FXWrite_19Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
+ let Latency = 19;
+}
+
+def A64FXWrite_25Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
+ let Latency = 25;
+}
+
+def A64FXWrite_14Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_19Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
+ let Latency = 19;
+}
+
+def A64FXWrite_29Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
+ let Latency = 29;
+}
+
+def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+
+}
+
+def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 5;
+}
+
+def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+
+def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 9;
+}
+
+def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_FMOV_VG : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 25;
+}
+
+def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 12;
+ let NumMicroOps = 6;
+}
+
+def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+ let NumMicroOps = 6;
+}
+
+def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+}
+
+
+def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+
+def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+ let NumMicroOps = 7;
+}
+
+def A64FXWrite_FMAXVVS : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 25;
+}
+
+def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 5;
+}
+
+def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 7;
+}
+
+def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 9;
+}
+
+def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_ST1W_6: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_ST2W_7: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 7;
+}
+
+def A64FXWrite_ST3W_8: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_ST4W_9: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_ST1W_15: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 15;
+}
+
+def A64FXWrite_ST1W_19: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 19;
+}
+
+def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 7;
+}
+
+// Define commonly used read types.
+
+// No forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// 3. Instruction Tables.
+
+//---
+// 3.1 Branch Instructions
+//---
+
+// Branch, immed
+// Branch and link, immed
+// Compare and branch
+def : WriteRes<WriteBr, [A64FXGI7]> {
+ let Latency = 1;
+}
+
+// Branch, register
+// Branch and link, register != LR
+// Branch and link, register = LR
+def : WriteRes<WriteBrReg, [A64FXGI7]> {
+ let Latency = 1;
+}
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteAtomic, []> {
+ let Latency = 4;
+}
+
+//---
+// Branch
+//---
+def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
+def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
+def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
+def : InstRW<[A64FXWrite_1Cyc_GI7],
+ (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
+
+//---
+// 3.2 Arithmetic and Logical Instructions
+// 3.3 Move and Shift Instructions
+//---
+
+// ALU, basic
+// Conditional compare
+// Conditional select
+// Address generation
+def : WriteRes<WriteI, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[WriteI],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// ALU, extend and/or shift
+def : WriteRes<WriteISReg, [A64FXGI2456]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[WriteISReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+def : WriteRes<WriteIEReg, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[WriteIEReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+// Move immed
+def : WriteRes<WriteImm, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[A64FXWrite_1Cyc_GI2456],
+ (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
+
+def : InstRW<[A64FXWrite_2Cyc_GI24],
+ (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
+
+// Variable shift
+def : WriteRes<WriteIS, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+//---
+// 3.4 Divide and Multiply Instructions
+//---
+
+// Divide, W-form
+def : WriteRes<WriteID32, [A64FXGI4]> {
+ let Latency = 39;
+ let ResourceCycles = [39];
+}
+
+// Divide, X-form
+def : WriteRes<WriteID64, [A64FXGI4]> {
+ let Latency = 23;
+ let ResourceCycles = [23];
+}
+
+// Multiply accumulate, W-form
+def : WriteRes<WriteIM32, [A64FXGI2456]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+}
+
+// Multiply accumulate, X-form
+def : WriteRes<WriteIM64, [A64FXGI2456]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[A64FXWrite_MADDL],
+ (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+
+def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
+def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
+
+// Bitfield extract, two reg
+def : WriteRes<WriteExtr, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+// Multiply high
+def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
+
+// Miscellaneous Data-Processing Instructions
+// Bitfield extract
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitifield move - basic
+def : InstRW<[A64FXWrite_1Cyc_GI24],
+ (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
+
+// Bitfield move, insert
+def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
+
+// Count leading
+def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
+ "^CLZ(W|X)r$")>;
+
+// Reverse bits
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
+
+// Cryptography Extensions
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
+def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
+def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
+
+// CRC Instructions
+def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
+def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
+def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
+
+def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
+def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
+def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
+
+// Reverse bits/bytes
+// NOTE: Handled by WriteI.
+
+//---
+// 3.6 Load Instructions
+// 3.10 FP Load Instructions
+//---
+
+// Load register, literal
+// Load register, unscaled immed
+// Load register, immed unprivileged
+// Load register, unsigned immed
+def : WriteRes<WriteLD, [A64FXGI56]> {
+ let Latency = 4;
+ let ResourceCycles = [3];
+}
+
+// Load register, immed post-index
+// NOTE: Handled by WriteLD, WriteI.
+// Load register, immed pre-index
+// NOTE: Handled by WriteLD, WriteAdr.
+def : WriteRes<WriteAdr, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+// Load pair, immed offset, normal
+// Load pair, immed offset, signed words, base != SP
+// Load pair, immed offset signed words, base = SP
+// LDP only breaks into *one* LS micro-op. Thus
+// the resources are handled by WriteLD.
+def : WriteRes<WriteLDHi, []> {
+ let Latency = 5;
+}
+
+// Load register offset, basic
+// Load register, register offset, scale by 4/8
+// Load register, register offset, scale by 2
+// Load register offset, extend
+// Load register, register offset, extend, scale by 4/8
+// Load register, register offset, extend, scale by 2
+def A64FXWriteLDIdx : SchedWriteVariant<[
+ SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
+ SchedVar<NoSchedPred, [A64FXWrite_1Cyc_GI56]>]>;
+def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
+
+def A64FXReadAdrBase : SchedReadVariant<[
+ SchedVar<ScaledIdxPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
+def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
+
+// Load pair, immed pre-index, normal
+// Load pair, immed pre-index, signed words
+// Load pair, immed post-index, normal
+// Load pair, immed post-index, signed words
+// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
+
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
+
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
+
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
+
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPXpre)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRDroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRHroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRHHroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRQroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSHWroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSHXroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRWroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRXroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRBroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRDroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRHroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRHHroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRQroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSHWroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSHXroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRWroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRXroX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
+
+//---
+// Prefetch
+//---
+def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
+
+//--
+// 3.7 Store Instructions
+// 3.11 FP Store Instructions
+//--
+
+// Store register, unscaled immed
+// Store register, immed unprivileged
+// Store register, unsigned immed
+def : WriteRes<WriteST, [A64FXGI56]> {
+ let Latency = 1;
+}
+
+// Store register, immed post-index
+// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
+
+// Store register, immed pre-index
+// NOTE: Handled by WriteAdr, WriteST
+
+// Store register, register offset, basic
+// Store register, register offset, scaled by 4/8
+// Store register, register offset, scaled by 2
+// Store register, register offset, extend
+// Store register, register offset, extend, scale by 4/8
+// Store register, register offset, extend, scale by 1
+def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
+ let Latency = 1;
+}
+
+// Store pair, immed offset, W-form
+// Store pair, immed offset, X-form
+def : WriteRes<WriteSTP, [A64FXGI56]> {
+ let Latency = 1;
+}
+
+// Store pair, immed post-index, W-form
+// Store pair, immed post-index, X-form
+// Store pair, immed pre-index, W-form
+// Store pair, immed pre-index, X-form
+// NOTE: Handled by WriteAdr, WriteSTP.
+
+def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
+
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
+
+def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
+
+def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
+
+def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
+
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+
+//---
+// 3.8 FP Data Processing Instructions
+//---
+
+// FP absolute value
+// FP min/max
+// FP negate
+def : WriteRes<WriteF, [A64FXGI03]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+
+// FP arithmetic
+
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
+
+// FP compare
+def : WriteRes<WriteFCmp, [A64FXGI03]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+
+// FP Div, Sqrt
+def : WriteRes<WriteFDiv, [A64FXGI0]> {
+ let Latency = 43;
+}
+
+def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 38;
+}
+
+def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 29;
+}
+
+def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 43;
+}
+
+def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 29;
+}
+
+def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 43;
+}
+
+// FP divide, S-form
+// FP square root, S-form
+def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
+def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
+def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
+
+// FP divide, D-form
+// FP square root, D-form
+def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
+def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
+def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
+
+// FP multiply
+// FP multiply accumulate
+def : WriteRes<WriteFMul, [A64FXGI03]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+def A64FXXWriteFMul : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+def A64FXXWriteFMulAcc : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+def : InstRW<[A64FXXWriteFMul], (instregex "^FMUL", "^FNMUL")>;
+def : InstRW<[A64FXXWriteFMulAcc],
+ (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>;
+
+// FP round to integral
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
+
+// FP select
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
+
+//---
+// 3.9 FP Miscellaneous Instructions
+//---
+
+// FP convert, from vec to vec reg
+// FP convert, from gen to vec reg
+// FP convert, from vec to gen reg
+def : WriteRes<WriteFCvt, [A64FXGI03]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+// FP move, immed
+// FP move, register
+def : WriteRes<WriteFImm, [A64FXGI0]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+
+// FP transfer, from gen to vec reg
+// FP transfer, from vec to gen reg
+def : WriteRes<WriteFCopy, [A64FXGI0]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+
+def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
+def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
+
+//---
+// 3.12 ASIMD Integer Instructions
+//---
+
+// ASIMD absolute diff, D-form
+// ASIMD absolute diff, Q-form
+// ASIMD absolute diff accum, D-form
+// ASIMD absolute diff accum, Q-form
+// ASIMD absolute diff accum long
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD compare
+// ASIMD logical (AND, BIC, EOR)
+// ASIMD max/min, basic
+// ASIMD max/min, reduce, 4H/4S
+// ASIMD max/min, reduce, 8B/8H
+// ASIMD max/min, reduce, 16B
+// ASIMD multiply, D-form
+// ASIMD multiply, Q-form
+// ASIMD multiply accumulate long
+// ASIMD multiply accumulate saturating long
+// ASIMD multiply long
+// ASIMD pairwise add and accumulate
+// ASIMD shift accumulate
+// ASIMD shift by immed, basic
+// ASIMD shift by immed and insert, basic, D-form
+// ASIMD shift by immed and insert, basic, Q-form
+// ASIMD shift by immed, complex
+// ASIMD shift by register, basic, D-form
+// ASIMD shift by register, basic, Q-form
+// ASIMD shift by register, complex, D-form
+// ASIMD shift by register, complex, Q-form
+def : WriteRes<WriteV, [A64FXGI03]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+// ASIMD arith, reduce, 4H/4S
+// ASIMD arith, reduce, 8B/8H
+// ASIMD arith, reduce, 16B
+
+// ASIMD logical (MVN (alias for NOT), ORN, ORR)
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
+
+// ASIMD arith, reduce
+def : InstRW<[A64FXWrite_ADDLV],
+ (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
+
+// ASIMD polynomial (8x8) multiply long
+def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[A64FXWrite_MULLV],
+ (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[A64FXWrite_ABA],
+ (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[A64FXWrite_ABA],
+ (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[A64FXWrite_ABAL],
+ (instregex "^[SU]ABAL")>;
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[A64FXWrite_ADDLV1],
+ (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B
+def : InstRW<[A64FXWrite_ADDLV1],
+ (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B/16H
+def : InstRW<[A64FXWrite_ADDLV1],
+ (instregex "^[SU]?ADDL?Vv16i8v$")>;
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B/16H
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+// ASIMD multiply, D-form
+def : InstRW<[A64FXWrite_PMUL],
+ (instregex "^(P?MUL|SQR?DMUL)" #
+ "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
+ "(_indexed)?$")>;
+
+// ASIMD multiply, Q-form
+def : InstRW<[A64FXWrite_PMUL],
+ (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply, Q-form
+def : InstRW<[A64FXWrite_SQRDMULH],
+ (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply accumulate, D-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD shift accumulate
+def : InstRW<[A64FXWrite_SRSRAV],
+ (instregex "SRSRAv", "URSRAv")>;
+def : InstRW<[A64FXWrite_SSRAV],
+ (instregex "SSRAv", "USRAv")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[A64FXWrite_RSHRN],
+ (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
+def : InstRW<[A64FXWrite_SHRN],
+ (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
+
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "^[SU][QR]{1,2}SHL" #
+ "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD Arithmetic
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
+def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
+ "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[A64FXWrite_ADDP],
+ (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
+ "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
+def : InstRW<[A64FXWrite_4Cyc_GI0],
+ (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
+def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
+def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
+def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
+def : InstRW<[A64FXWrite_ABA],
+ (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
+def : InstRW<[A64FXWrite_SHRN],
+ (instregex "^ADDHNv", "^SUBHNv")>;
+def : InstRW<[A64FXWrite_RSHRN],
+ (instregex "^RADDHNv", "^RSUBHNv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
+ "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
+ "^URHADD", "^USQADD")>;
+
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^CMEQv", "^CMGEv", "^CMGTv",
+ "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
+def : InstRW<[A64FXWrite_ADDP],
+ (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^SABDv", "^UABDv")>;
+def : InstRW<[A64FXWrite_TBX1],
+ (instregex "^SABDLv", "^UABDLv")>;
+
+//---
+// 3.13 ASIMD Floating-point Instructions
+//---
+
+// ASIMD FP absolute value
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
+
+// ASIMD FP arith, normal, D-form
+// ASIMD FP arith, normal, Q-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+
+// ASIMD FP arith, pairwise, D-form
+// ASIMD FP arith, pairwise, Q-form
+def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
+
+// ASIMD FP compare, D-form
+// ASIMD FP compare, Q-form
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
+ "^FCMGTv", "^FCMLEv",
+ "^FCMLTv")>;
+// ASIMD FP round, D-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
+
+// ASIMD FP convert, long
+// ASIMD FP convert, narrow
+// ASIMD FP convert, other, D-form
+// ASIMD FP convert, other, Q-form
+
+// ASIMD FP convert, long and narrow
+def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[A64FXWrite_FCVTXNV],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[A64FXWrite_FCVTXNV],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
+def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
+
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
+def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
+def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
+
+// ASIMD FP max/min, normal, D-form
+// ASIMD FP max/min, normal, Q-form
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
+ "^FMINv", "^FMINNMv")>;
+
+// ASIMD FP max/min, pairwise, D-form
+// ASIMD FP max/min, pairwise, Q-form
+def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
+ "^FMINPv", "^FMINNMPv")>;
+
+// ASIMD FP max/min, reduce
+def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
+ "^FMINVv", "^FMINNMVv")>;
+
+// ASIMD FP multiply, D-form, FZ
+// ASIMD FP multiply, D-form, no FZ
+// ASIMD FP multiply, Q-form, FZ
+// ASIMD FP multiply, Q-form, no FZ
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[A64FXWrite_FMULXE],
+ (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[A64FXWrite_FMULXE],
+ (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP multiply accumulate, Dform, FZ
+// ASIMD FP multiply accumulate, Dform, no FZ
+// ASIMD FP multiply accumulate, Qform, FZ
+// ASIMD FP multiply accumulate, Qform, no FZ
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[A64FXWrite_FMULXE],
+ (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[A64FXWrite_FMULXE],
+ (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP negate
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
+
+//--
+// 3.14 ASIMD Miscellaneous Instructions
+//--
+
+// ASIMD bit reverse
+def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
+
+// ASIMD bitwise insert, D-form
+// ASIMD bitwise insert, Q-form
+def : InstRW<[A64FXWrite_BIF],
+ (instregex "^BIFv", "^BITv", "^BSLv")>;
+
+// ASIMD count, D-form
+// ASIMD count, Q-form
+def : InstRW<[A64FXWrite_4Cyc_GI0],
+ (instregex "^CLSv", "^CLZv", "^CNTv")>;
+
+// ASIMD duplicate, gen reg
+// ASIMD duplicate, element
+def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^CPY")>;
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
+
+// ASIMD extract
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
+
+// ASIMD extract narrow
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
+
+// ASIMD insert, element to element
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
+
+// ASIMD move, integer immed
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
+
+// ASIMD move, FP immed
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
+def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
+
+// ASIMD table lookup, Q-form
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
+def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
+
+// ASIMD transpose
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1", "^TRN2")>;
+
+// ASIMD unzip/zip
+def : InstRW<[A64FXWrite_6Cyc_GI0],
+ (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
+
+// ASIMD reciprocal estimate, D-form
+// ASIMD reciprocal estimate, Q-form
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
+ "^FRSQRTEv", "^URSQRTEv")>;
+
+// ASIMD reciprocal step, D-form, FZ
+// ASIMD reciprocal step, D-form, no FZ
+// ASIMD reciprocal step, Q-form, FZ
+// ASIMD reciprocal step, Q-form, no FZ
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
+
+// ASIMD reverse
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^REV16v", "^REV32v", "^REV64v")>;
+
+// ASIMD table lookup, D-form
+// ASIMD table lookup, Q-form
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
+
+// ASIMD transfer, element to word or word
+def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
+
+// ASIMD transfer gen reg to element
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
+
+// ASIMD transpose
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
+ "^UZP1v", "^UZP2v")>;
+
+// ASIMD unzip/zip
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
+
+//--
+// 3.15 ASIMD Load Instructions
+//--
+
+// ASIMD load, 1 element, multiple, 1 reg, D-form
+// ASIMD load, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[A64FXWrite_8Cyc_GI56],
+ (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
+def : InstRW<[A64FXWrite_11Cyc_GI56],
+ (instregex "^LD1Onev(16b|8h|4s)$")>;
+def : InstRW<[A64FXWrite_LD108, WriteAdr],
+ (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
+def : InstRW<[A64FXWrite_LD109, WriteAdr],
+ (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form
+// ASIMD load, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[A64FXWrite_LD102],
+ (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
+def : InstRW<[A64FXWrite_LD103],
+ (instregex "^LD1Twov(16b|8h|4s)$")>;
+def : InstRW<[A64FXWrite_LD110, WriteAdr],
+ (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
+def : InstRW<[A64FXWrite_LD111, WriteAdr],
+ (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form
+// ASIMD load, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[A64FXWrite_LD104],
+ (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
+def : InstRW<[A64FXWrite_LD105],
+ (instregex "^LD1Threev(16b|8h|4s)$")>;
+def : InstRW<[A64FXWrite_LD112, WriteAdr],
+ (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
+def : InstRW<[A64FXWrite_LD113, WriteAdr],
+ (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form
+// ASIMD load, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[A64FXWrite_LD106],
+ (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
+def : InstRW<[A64FXWrite_LD107],
+ (instregex "^LD1Fourv(16b|8h|4s)$")>;
+def : InstRW<[A64FXWrite_LD114, WriteAdr],
+ (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
+def : InstRW<[A64FXWrite_LD115, WriteAdr],
+ (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S
+// ASIMD load, 1 element, one lane, D
+def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
+ (instregex "^LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S
+// ASIMD load, 1 element, all lanes, D-form, D
+// ASIMD load, 1 element, all lanes, Q-form
+def : InstRW<[A64FXWrite_8Cyc_GI03],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD108, WriteAdr],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S
+// ASIMD load, 2 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_LD103],
+ (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD111, WriteAdr],
+ (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H
+// ASIMD load, 2 element, one lane, S
+// ASIMD load, 2 element, one lane, D
+def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
+ (instregex "^LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S
+// ASIMD load, 2 element, all lanes, D-form, D
+// ASIMD load, 2 element, all lanes, Q-form
+def : InstRW<[A64FXWrite_LD102],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD110, WriteAdr],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_LD105],
+ (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD113, WriteAdr],
+ (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lone, B/H
+// ASIMD load, 3 element, one lane, S
+// ASIMD load, 3 element, one lane, D
+def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
+ (instregex "^LD3i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S
+// ASIMD load, 3 element, all lanes, D-form, D
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S
+// ASIMD load, 3 element, all lanes, Q-form, D
+def : InstRW<[A64FXWrite_LD104],
+ (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD112, WriteAdr],
+ (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_LD107],
+ (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD115, WriteAdr],
+ (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H
+// ASIMD load, 4 element, one lane, S
+// ASIMD load, 4 element, one lane, D
+def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
+ (instregex "^LD4i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S
+// ASIMD load, 4 element, all lanes, D-form, D
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S
+// ASIMD load, 4 element, all lanes, Q-form, D
+def : InstRW<[A64FXWrite_LD106],
+ (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD114, WriteAdr],
+ (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+//--
+// 3.16 ASIMD Store Instructions
+//--
+
+// ASIMD store, 1 element, multiple, 1 reg, D-form
+// ASIMD store, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[A64FXWrite_ST10],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST14, WriteAdr],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[A64FXWrite_ST11],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST15, WriteAdr],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[A64FXWrite_ST12],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST16, WriteAdr],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[A64FXWrite_ST13],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST17, WriteAdr],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S
+// ASIMD store, 1 element, one lane, D
+def : InstRW<[A64FXWrite_ST10],
+ (instregex "^ST1i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_ST14, WriteAdr],
+ (instregex "^ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_ST11],
+ (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST15, WriteAdr],
+ (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S
+// ASIMD store, 2 element, one lane, D
+def : InstRW<[A64FXWrite_ST11],
+ (instregex "^ST2i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_ST15, WriteAdr],
+ (instregex "^ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_ST12],
+ (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST16, WriteAdr],
+ (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H
+// ASIMD store, 3 element, one lane, S
+// ASIMD store, 3 element, one lane, D
+def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_ST16, WriteAdr],
+ (instregex "^ST3i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_ST13],
+ (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST17, WriteAdr],
+ (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H
+// ASIMD store, 4 element, one lane, S
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_ST17, WriteAdr],
+ (instregex "^ST4i(8|16|32|64)_POST$")>;
+
+// V8.1a Atomics (LSE)
+def : InstRW<[A64FXWrite_CAS, WriteAtomic],
+ (instrs CASB, CASH, CASW, CASX)>;
+
+def : InstRW<[A64FXWrite_CAS, WriteAtomic],
+ (instrs CASAB, CASAH, CASAW, CASAX)>;
+
+def : InstRW<[A64FXWrite_CAS, WriteAtomic],
+ (instrs CASLB, CASLH, CASLW, CASLX)>;
+
+def : InstRW<[A64FXWrite_CAS, WriteAtomic],
+ (instrs CASALB, CASALH, CASALW, CASALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
+ LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
+ LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
+ LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
+ LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
+ LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
+ LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
+ LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
+ LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
+ LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
+ LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
+ LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
+ LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
+
+def : InstRW<[A64FXWrite_SWP, WriteAtomic],
+ (instrs SWPB, SWPH, SWPW, SWPX)>;
+
+def : InstRW<[A64FXWrite_SWP, WriteAtomic],
+ (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
+
+def : InstRW<[A64FXWrite_SWP, WriteAtomic],
+ (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
+
+def : InstRW<[A64FXWrite_SWP, WriteAtomic],
+ (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
+
+def : InstRW<[A64FXWrite_STUR, WriteAtomic],
+ (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
+
+// [ 1] "abs $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ABS_ZPmZ_B, ABS_ZPmZ_D, ABS_ZPmZ_H, ABS_ZPmZ_S)>;
+
+// [ 2] "add $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZZZ_B, ADD_ZZZ_D, ADD_ZZZ_H, ADD_ZZZ_S)>;
+
+// [ 3] "add $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZPmZ_B, ADD_ZPmZ_D, ADD_ZPmZ_H, ADD_ZPmZ_S)>;
+
+// [ 4] "add $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZI_B, ADD_ZI_D, ADD_ZI_H, ADD_ZI_S)>;
+
+// [ 5] "addpl $Rd, $Rn, $imm6";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDPL_XXI)>;
+
+// [ 6] "addvl $Rd, $Rn, $imm6";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDVL_XXI)>;
+
+// [ 7] "adr $Zd, [$Zn, $Zm]";
+def : InstRW<[A64FXWrite_5Cyc_GI0], (instrs ADR_LSL_ZZZ_D_0, ADR_LSL_ZZZ_D_1, ADR_LSL_ZZZ_D_2, ADR_LSL_ZZZ_D_3, ADR_LSL_ZZZ_S_0, ADR_LSL_ZZZ_S_1, ADR_LSL_ZZZ_S_2, ADR_LSL_ZZZ_S_3, ADR_SXTW_ZZZ_D_0, ADR_SXTW_ZZZ_D_1, ADR_SXTW_ZZZ_D_2, ADR_SXTW_ZZZ_D_3, ADR_UXTW_ZZZ_D_0, ADR_UXTW_ZZZ_D_1, ADR_UXTW_ZZZ_D_2, ADR_UXTW_ZZZ_D_3)>;
+
+// [ 8] "and $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs AND_PPzPP)>;
+
+// [ 9] "and $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZZZ)>;
+
+// [10] "and $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZPmZ_B, AND_ZPmZ_D, AND_ZPmZ_H, AND_ZPmZ_S)>;
+
+// [11] "and $Zdn, $_Zdn, $imms13";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZI)>;
+
+// [12] "ands $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ANDS_PPzPP)>;
+
+// [13] "andv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ANDV_VPZ_B, ANDV_VPZ_D, ANDV_VPZ_H, ANDV_VPZ_S)>;
+
+// [14] "asr $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZZZ_B, ASR_WIDE_ZZZ_H, ASR_WIDE_ZZZ_S)>;
+
+// [15] "asr $Zd, $Zn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZZI_B, ASR_ZZI_D, ASR_ZZI_H, ASR_ZZI_S)>;
+
+// [16] "asr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZPmZ_B, ASR_WIDE_ZPmZ_H, ASR_WIDE_ZPmZ_S, ASR_ZPmZ_B, ASR_ZPmZ_D, ASR_ZPmZ_H, ASR_ZPmZ_S)>;
+
+// [17] "asr $Zdn, $Pg/m, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZPmI_B, ASR_ZPmI_D, ASR_ZPmI_H, ASR_ZPmI_S)>;
+
+// [18] "asrd $Zdn, $Pg/m, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRD_ZPmI_B, ASRD_ZPmI_D, ASRD_ZPmI_H, ASRD_ZPmI_S)>;
+
+// [19] "asrr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRR_ZPmZ_B, ASRR_ZPmZ_D, ASRR_ZPmZ_H, ASRR_ZPmZ_S)>;
+
+// [20] "bic $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BIC_PPzPP)>;
+
+// [21] "bic $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZZZ)>;
+
+// [22] "bic $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZPmZ_B, BIC_ZPmZ_D, BIC_ZPmZ_H, BIC_ZPmZ_S)>;
+
+// [23] "bics $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BICS_PPzPP)>;
+
+// [24] "brka $Pd, $Pg/m, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPmP)>;
+
+// [25] "brka $Pd, $Pg/z, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPzP)>;
+
+// [26] "brkas $Pd, $Pg/z, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKAS_PPzP)>;
+
+// [27] "brkb $Pd, $Pg/m, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPmP)>;
+
+// [28] "brkb $Pd, $Pg/z, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPzP)>;
+
+// [29] "brkbs $Pd, $Pg/z, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKBS_PPzP)>;
+
+// [30] "brkn $Pdm, $Pg/z, $Pn, $_Pdm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKN_PPzP)>;
+
+// [31] "brkns $Pdm, $Pg/z, $Pn, $_Pdm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKNS_PPzP)>;
+
+// [32] "brkpa $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPA_PPzPP)>;
+
+// [33] "brkpas $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPAS_PPzPP)>;
+
+// [34] "brkpb $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPB_PPzPP)>;
+
+// [35] "brkpbs $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPBS_PPzPP)>;
+
+// [36] "clasta $Rdn, $Pg, $_Rdn, $Zm";
+def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTA_RPZ_B, CLASTA_RPZ_D, CLASTA_RPZ_H, CLASTA_RPZ_S)>;
+
+// [37] "clasta $Vdn, $Pg, $_Vdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_VPZ_B, CLASTA_VPZ_D, CLASTA_VPZ_H, CLASTA_VPZ_S)>;
+
+// [38] "clasta $Zdn, $Pg, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_ZPZ_B, CLASTA_ZPZ_D, CLASTA_ZPZ_H, CLASTA_ZPZ_S)>;
+
+// [39] "clastb $Rdn, $Pg, $_Rdn, $Zm";
+def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTB_RPZ_B, CLASTB_RPZ_D, CLASTB_RPZ_H, CLASTB_RPZ_S)>;
+
+// [40] "clastb $Vdn, $Pg, $_Vdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_VPZ_B, CLASTB_VPZ_D, CLASTB_VPZ_H, CLASTB_VPZ_S)>;
+
+// [41] "clastb $Zdn, $Pg, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_ZPZ_B, CLASTB_ZPZ_D, CLASTB_ZPZ_H, CLASTB_ZPZ_S)>;
+
+// [42] "cls $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLS_ZPmZ_B, CLS_ZPmZ_D, CLS_ZPmZ_H, CLS_ZPmZ_S)>;
+
+// [43] "clz $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLZ_ZPmZ_B, CLZ_ZPmZ_D, CLZ_ZPmZ_H, CLZ_ZPmZ_S)>;
+
+// [44] "cmpeq $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZZ_B, CMPEQ_PPzZZ_D, CMPEQ_PPzZZ_H, CMPEQ_PPzZZ_S, CMPEQ_WIDE_PPzZZ_B, CMPEQ_WIDE_PPzZZ_H, CMPEQ_WIDE_PPzZZ_S)>;
+
+// [45] "cmpeq $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZI_B, CMPEQ_PPzZI_D, CMPEQ_PPzZI_H, CMPEQ_PPzZI_S)>;
+
+// [46] "cmpge $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZZ_B, CMPGE_PPzZZ_D, CMPGE_PPzZZ_H, CMPGE_PPzZZ_S, CMPGE_WIDE_PPzZZ_B, CMPGE_WIDE_PPzZZ_H, CMPGE_WIDE_PPzZZ_S)>;
+
+// [47] "cmpge $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZI_B, CMPGE_PPzZI_D, CMPGE_PPzZI_H, CMPGE_PPzZI_S)>;
+
+// [48] "cmpgt $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZZ_B, CMPGT_PPzZZ_D, CMPGT_PPzZZ_H, CMPGT_PPzZZ_S, CMPGT_WIDE_PPzZZ_B, CMPGT_WIDE_PPzZZ_H, CMPGT_WIDE_PPzZZ_S)>;
+
+// [49] "cmpgt $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZI_B, CMPGT_PPzZI_D, CMPGT_PPzZI_H, CMPGT_PPzZI_S)>;
+
+// [50] "cmphi $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZZ_B, CMPHI_PPzZZ_D, CMPHI_PPzZZ_H, CMPHI_PPzZZ_S, CMPHI_WIDE_PPzZZ_B, CMPHI_WIDE_PPzZZ_H, CMPHI_WIDE_PPzZZ_S)>;
+
+// [51] "cmphi $Pd, $Pg/z, $Zn, $imm7";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZI_B, CMPHI_PPzZI_D, CMPHI_PPzZI_H, CMPHI_PPzZI_S)>;
+
+// [52] "cmphs $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZZ_B, CMPHS_PPzZZ_D, CMPHS_PPzZZ_H, CMPHS_PPzZZ_S, CMPHS_WIDE_PPzZZ_B, CMPHS_WIDE_PPzZZ_H, CMPHS_WIDE_PPzZZ_S)>;
+
+// [53] "cmphs $Pd, $Pg/z, $Zn, $imm7";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZI_B, CMPHS_PPzZI_D, CMPHS_PPzZI_H, CMPHS_PPzZI_S)>;
+
+// [54] "cmple $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_WIDE_PPzZZ_B, CMPLE_WIDE_PPzZZ_H, CMPLE_WIDE_PPzZZ_S)>;
+
+// [55] "cmple $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_PPzZI_B, CMPLE_PPzZI_D, CMPLE_PPzZI_H, CMPLE_PPzZI_S)>;
+
+// [56] "cmplo $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_WIDE_PPzZZ_B, CMPLO_WIDE_PPzZZ_H, CMPLO_WIDE_PPzZZ_S)>;
+
+// [57] "cmplo $Pd, $Pg/z, $Zn, $imm7";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_PPzZI_B, CMPLO_PPzZI_D, CMPLO_PPzZI_H, CMPLO_PPzZI_S)>;
+
+// [58] "cmpls $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_WIDE_PPzZZ_B, CMPLS_WIDE_PPzZZ_H, CMPLS_WIDE_PPzZZ_S)>;
+
+// [59] "cmpls $Pd, $Pg/z, $Zn, $imm7";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_PPzZI_B, CMPLS_PPzZI_D, CMPLS_PPzZI_H, CMPLS_PPzZI_S)>;
+
+// [60] "cmplt $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_WIDE_PPzZZ_B, CMPLT_WIDE_PPzZZ_H, CMPLT_WIDE_PPzZZ_S)>;
+
+// [61] "cmplt $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_PPzZI_B, CMPLT_PPzZI_D, CMPLT_PPzZI_H, CMPLT_PPzZI_S)>;
+
+// [62] "cmpne $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZZ_B, CMPNE_PPzZZ_D, CMPNE_PPzZZ_H, CMPNE_PPzZZ_S, CMPNE_WIDE_PPzZZ_B, CMPNE_WIDE_PPzZZ_H, CMPNE_WIDE_PPzZZ_S)>;
+
+// [63] "cmpne $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZI_B, CMPNE_PPzZI_D, CMPNE_PPzZI_H, CMPNE_PPzZI_S)>;
+
+// [64] "cnot $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs CNOT_ZPmZ_B, CNOT_ZPmZ_D, CNOT_ZPmZ_H, CNOT_ZPmZ_S)>;
+
+// [65] "cnt $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI3], (instrs CNT_ZPmZ_B, CNT_ZPmZ_D, CNT_ZPmZ_H, CNT_ZPmZ_S)>;
+
+// [66] "cntb $Rd, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTB_XPiI)>;
+
+// [67] "cntd $Rd, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTD_XPiI)>;
+
+// [68] "cnth $Rd, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTH_XPiI)>;
+
+// [69] "cntp $Rd, $Pg, $Pn";
+def : InstRW<[A64FXWrite_6Cyc_GI01], (instrs CNTP_XPP_B, CNTP_XPP_D, CNTP_XPP_H, CNTP_XPP_S)>;
+
+// [70] "cntw $Rd, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTW_XPiI)>;
+
+// [71] "compact $Zd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs COMPACT_ZPZ_D, COMPACT_ZPZ_S)>;
+
+// [72] "cpy $Zd, $Pg/m, $Rn";
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>;
+
+// [73] "cpy $Zd, $Pg/m, $Vn";
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>;
+
+// [74] "cpy $Zd, $Pg/m, $imm";
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>;
+
+// [75] "cpy $Zd, $Pg/z, $imm";
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>;
+
+// [76] "ctermeq $Rn, $Rm";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMEQ_WW, CTERMEQ_XX)>;
+
+// [77] "ctermne $Rn, $Rm";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMNE_WW, CTERMNE_XX)>;
+
+// [78] "decb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECB_XPiI)>;
+
+// [79] "decd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECD_XPiI)>;
+
+// [80] "decd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECD_ZPiI)>;
+
+// [81] "dech $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECH_XPiI)>;
+
+// [82] "dech $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECH_ZPiI)>;
+
+// [83] "decp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs DECP_XP_B, DECP_XP_D, DECP_XP_H, DECP_XP_S)>;
+
+// [84] "decp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs DECP_ZP_D, DECP_ZP_H, DECP_ZP_S)>;
+
+// [85] "decw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECW_XPiI)>;
+
+// [86] "decw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECW_ZPiI)>;
+
+// [87] "dup $Zd, $Rn";
+def : InstRW<[A64FXWrite_8Cyc_GI01], (instrs DUP_ZR_B, DUP_ZR_D, DUP_ZR_H, DUP_ZR_S)>;
+
+// [88] "dup $Zd, $Zn$idx";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs DUP_ZZI_B, DUP_ZZI_D, DUP_ZZI_H, DUP_ZZI_Q, DUP_ZZI_S)>;
+
+// [89] "dup $Zd, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUP_ZI_B, DUP_ZI_D, DUP_ZI_H, DUP_ZI_S)>;
+
+// [90] "dupm $Zd, $imms";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUPM_ZI)>;
+
+// [91] "eor $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EOR_PPzPP)>;
+
+// [92] "eor $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZZZ)>;
+
+// [93] "eor $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZPmZ_B, EOR_ZPmZ_D, EOR_ZPmZ_H, EOR_ZPmZ_S)>;
+
+// [94] "eor $Zdn, $_Zdn, $imms13";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs EOR_ZI)>;
+
+// [95] "eors $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EORS_PPzPP)>;
+
+// [96] "eorv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs EORV_VPZ_B, EORV_VPZ_D, EORV_VPZ_H, EORV_VPZ_S)>;
+
+// [97] "ext $Zdn, $_Zdn, $Zm, $imm8";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs EXT_ZZI)>;
+
+// [99] "fabd $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FABD_ZPmZ_D, FABD_ZPmZ_H, FABD_ZPmZ_S)>;
+
+// [100] "fabs $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FABS_ZPmZ_D, FABS_ZPmZ_H, FABS_ZPmZ_S)>;
+
+// [101] "facge $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGE_PPzZZ_D, FACGE_PPzZZ_H, FACGE_PPzZZ_S)>;
+
+// [102] "facgt $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGT_PPzZZ_D, FACGT_PPzZZ_H, FACGT_PPzZZ_S)>;
+
+// [103] "fadd $Zd, $Zn, $Zm"; def is line 1638
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZZZ_D, FADD_ZZZ_H, FADD_ZZZ_S)>;
+
+// [104] "fadd $Zdn, $Pg/m, $_Zdn, $Zm"; def is line 1638
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmZ_D, FADD_ZPmZ_H, FADD_ZPmZ_S)>;
+
+// [105] "fadd $Zdn, $Pg/m, $_Zdn, $i1"; def is line 1638
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmI_D, FADD_ZPmI_H, FADD_ZPmI_S)>;
+
+// [106] "fadda $Vdn, $Pg, $_Vdn, $Zm";
+def : InstRW<[A64FXWrite_18Cyc_GI03], (instrs FADDA_VPZ_D, FADDA_VPZ_H, FADDA_VPZ_S)>;
+
+// [107] "faddv $Vd, $Pg, $Zn";
+// H : 4 / 6 / ([1,2]9 / [1]6) x 4 / [1,2]9 = 75 cycle
+// S : 4 / 6 / ([1,2]9 / [1]6) x 3 / [1,2]9 = 60 cycle
+// D : 4 / 6 / ([1,2]9 / [1]6) x 2 / [1,2]9 = 45 cycle
+def : InstRW<[A64FXWrite_75Cyc_GI03], (instrs FADDV_VPZ_H)>;
+def : InstRW<[A64FXWrite_60Cyc_GI03], (instrs FADDV_VPZ_S)>;
+def : InstRW<[A64FXWrite_45Cyc_GI03], (instrs FADDV_VPZ_D)>;
+
+// [108] "fcadd $Zdn, $Pg/m, $_Zdn, $Zm, $imm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCADD_ZPmZ_D, FCADD_ZPmZ_H, FCADD_ZPmZ_S)>;
+
+// [109] "fcmeq $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZ0_D, FCMEQ_PPzZ0_H, FCMEQ_PPzZ0_S)>;
+
+// [110] "fcmeq $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZZ_D, FCMEQ_PPzZZ_H, FCMEQ_PPzZZ_S)>;
+
+// [111] "fcmge $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZ0_D, FCMGE_PPzZ0_H, FCMGE_PPzZ0_S)>;
+
+// [112] "fcmge $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZZ_D, FCMGE_PPzZZ_H, FCMGE_PPzZZ_S)>;
+
+// [113] "fcmgt $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZ0_D, FCMGT_PPzZ0_H, FCMGT_PPzZ0_S)>;
+
+// [114] "fcmgt $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZZ_D, FCMGT_PPzZZ_H, FCMGT_PPzZZ_S)>;
+
+// [115] "fcmla $Zda, $Pg/m, $Zn, $Zm, $imm";
+def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZPmZZ_D, FCMLA_ZPmZZ_H, FCMLA_ZPmZZ_S)>;
+
+// [116] "fcmla $Zda, $Zn, $Zm$iop, $imm";
+def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZZZI_H, FCMLA_ZZZI_S)>;
+
+// [117] "fcmle $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLE_PPzZ0_D, FCMLE_PPzZ0_H, FCMLE_PPzZ0_S)>;
+
+// [118] "fcmlt $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLT_PPzZ0_D, FCMLT_PPzZ0_H, FCMLT_PPzZ0_S)>;
+
+// [119] "fcmne $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZ0_D, FCMNE_PPzZ0_H, FCMNE_PPzZ0_S)>;
+
+// [120] "fcmne $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZZ_D, FCMNE_PPzZZ_H, FCMNE_PPzZZ_S)>;
+
+// [121] "fcmuo $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMUO_PPzZZ_D, FCMUO_PPzZZ_H, FCMUO_PPzZZ_S)>;
+
+// [122] "fcpy $Zd, $Pg/m, $imm8";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCPY_ZPmI_D, FCPY_ZPmI_H, FCPY_ZPmI_S)>;
+
+// [123] "fcvt $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVT_ZPmZ_DtoH, FCVT_ZPmZ_DtoS, FCVT_ZPmZ_HtoD, FCVT_ZPmZ_HtoS, FCVT_ZPmZ_StoD, FCVT_ZPmZ_StoH)>;
+
+// [124] "fcvtzs $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZS_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoS, FCVTZS_ZPmZ_HtoD, FCVTZS_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoS, FCVTZS_ZPmZ_StoD, FCVTZS_ZPmZ_StoS)>;
+
+// [125] "fcvtzu $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZU_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoS, FCVTZU_ZPmZ_HtoD, FCVTZU_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoS, FCVTZU_ZPmZ_StoD, FCVTZU_ZPmZ_StoS)>;
+
+// [126] "fdiv $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIV_ZPmZ_D)>;
+def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIV_ZPmZ_H)>;
+def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIV_ZPmZ_S)>;
+
+// [127] "fdivr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIVR_ZPmZ_D)>;
+def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIVR_ZPmZ_H)>;
+def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIVR_ZPmZ_S)>;
+
+// [128] "fdup $Zd, $imm8";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FDUP_ZI_D, FDUP_ZI_H, FDUP_ZI_S)>;
+
+// [129] "fexpa $Zd, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FEXPA_ZZ_D, FEXPA_ZZ_H, FEXPA_ZZ_S)>;
+
+// [130] "fmad $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMAD_ZPmZZ_D, FMAD_ZPmZZ_H, FMAD_ZPmZZ_S)>;
+
+// [131] "fmax $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAX_ZPmZ_D, FMAX_ZPmZ_H, FMAX_ZPmZ_S)>;
+
+// [132] "fmax $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAX_ZPmI_D, FMAX_ZPmI_H, FMAX_ZPmI_S)>;
+
+// [133] "fmaxnm $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAXNM_ZPmZ_D, FMAXNM_ZPmZ_H, FMAXNM_ZPmZ_S)>;
+
+// [134] "fmaxnm $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAXNM_ZPmI_D, FMAXNM_ZPmI_H, FMAXNM_ZPmI_S)>;
+
+// [135] "fmaxnmv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXNMV_VPZ_D, FMAXNMV_VPZ_H, FMAXNMV_VPZ_S)>;
+
+// [136] "fmaxv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXV_VPZ_D, FMAXV_VPZ_H, FMAXV_VPZ_S)>;
+
+// [137] "fmin $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMIN_ZPmZ_D, FMIN_ZPmZ_H, FMIN_ZPmZ_S)>;
+
+// [138] "fmin $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMIN_ZPmI_D, FMIN_ZPmI_H, FMIN_ZPmI_S)>;
+
+// [139] "fminnm $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMINNM_ZPmZ_D, FMINNM_ZPmZ_H, FMINNM_ZPmZ_S)>;
+
+// [140] "fminnm $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMINNM_ZPmI_D, FMINNM_ZPmI_H, FMINNM_ZPmI_S)>;
+
+// [141] "fminnmv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINNMV_VPZ_D, FMINNMV_VPZ_H, FMINNMV_VPZ_S)>;
+
+// [142] "fminv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINV_VPZ_D, FMINV_VPZ_H, FMINV_VPZ_S)>;
+
+// [143] "fmla $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZPmZZ_D, FMLA_ZPmZZ_H, FMLA_ZPmZZ_S)>;
+
+// [144] "fmla $Zda, $Zn, $Zm$iop";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZZZI_D, FMLA_ZZZI_H, FMLA_ZZZI_S)>;
+
+// [145] "fmls $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZPmZZ_D, FMLS_ZPmZZ_H, FMLS_ZPmZZ_S)>;
+
+// [146] "fmls $Zda, $Zn, $Zm$iop";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZZZI_D, FMLS_ZZZI_H, FMLS_ZZZI_S)>;
+
+// [147] "fmsb $Zdn, $Pg/m, $Zm, $Za";
+
+// [148] "fmul $Zd, $Zn, $Zm";
+
+// [149] "fmul $Zd, $Zn, $Zm$iop";
+
+// [150] "fmul $Zdn, $Pg/m, $_Zdn, $Zm";
+
+// [151] "fmul $Zdn, $Pg/m, $_Zdn, $i1";
+
+// [152] "fmulx $Zdn, $Pg/m, $_Zdn, $Zm";
+
+// [153] "fneg $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FNEG_ZPmZ_D, FNEG_ZPmZ_H, FNEG_ZPmZ_S)>;
+
+// [154] "fnmad $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMAD_ZPmZZ_D, FNMAD_ZPmZZ_H, FNMAD_ZPmZZ_S)>;
+
+// [155] "fnmla $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLA_ZPmZZ_D, FNMLA_ZPmZZ_H, FNMLA_ZPmZZ_S)>;
+
+// [156] "fnmls $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLS_ZPmZZ_D, FNMLS_ZPmZZ_H, FNMLS_ZPmZZ_S)>;
+
+// [157] "fnmsb $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMSB_ZPmZZ_D, FNMSB_ZPmZZ_H, FNMSB_ZPmZZ_S)>;
+
+// [158] "frecpe $Zd, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPE_ZZ_D, FRECPE_ZZ_H, FRECPE_ZZ_S)>;
+
+// [159] "frecps $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRECPS_ZZZ_D, FRECPS_ZZZ_H, FRECPS_ZZZ_S)>;
+
+// [160] "frecpx $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPX_ZPmZ_D, FRECPX_ZPmZ_H, FRECPX_ZPmZ_S)>;
+
+// [161] "frinta $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTA_ZPmZ_D, FRINTA_ZPmZ_H, FRINTA_ZPmZ_S)>;
+
+// [162] "frinti $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTI_ZPmZ_D, FRINTI_ZPmZ_H, FRINTI_ZPmZ_S)>;
+
+// [163] "frintm $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTM_ZPmZ_D, FRINTM_ZPmZ_H, FRINTM_ZPmZ_S)>;
+
+// [164] "frintn $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTN_ZPmZ_D, FRINTN_ZPmZ_H, FRINTN_ZPmZ_S)>;
+
+// [165] "frintp $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTP_ZPmZ_D, FRINTP_ZPmZ_H, FRINTP_ZPmZ_S)>;
+
+// [166] "frintx $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTX_ZPmZ_D, FRINTX_ZPmZ_H, FRINTX_ZPmZ_S)>;
+
+// [167] "frintz $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTZ_ZPmZ_D, FRINTZ_ZPmZ_H, FRINTZ_ZPmZ_S)>;
+
+// [168] "frsqrte $Zd, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRSQRTE_ZZ_D, FRSQRTE_ZZ_H, FRSQRTE_ZZ_S)>;
+
+// [169] "frsqrts $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRSQRTS_ZZZ_D, FRSQRTS_ZZZ_H, FRSQRTS_ZZZ_S)>;
+
+// [170] "fscale $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSCALE_ZPmZ_D, FSCALE_ZPmZ_H, FSCALE_ZPmZ_S)>;
+
+// [171] "fsqrt $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FSQRT_ZPmZ_D)>;
+def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FSQRT_ZPmZ_H)>;
+def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FSQRT_ZPmZ_S)>;
+
+// [172] "fsub $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZZZ_D, FSUB_ZZZ_H, FSUB_ZZZ_S)>;
+
+// [173] "fsub $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZPmZ_D, FSUB_ZPmZ_H, FSUB_ZPmZ_S)>;
+
+// [174] "fsub $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUB_ZPmI_D, FSUB_ZPmI_H, FSUB_ZPmI_S)>;
+
+// [175] "fsubr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUBR_ZPmZ_D, FSUBR_ZPmZ_H, FSUBR_ZPmZ_S)>;
+
+// [176] "fsubr $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUBR_ZPmI_D, FSUBR_ZPmI_H, FSUBR_ZPmI_S)>;
+
+// [177] "ftmad $Zdn, $_Zdn, $Zm, $imm3";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTMAD_ZZI_D, FTMAD_ZZI_H, FTMAD_ZZI_S)>;
+
+// [178] "ftsmul $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTSMUL_ZZZ_D, FTSMUL_ZZZ_H, FTSMUL_ZZZ_S)>;
+
+// [180] "incb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCB_XPiI)>;
+
+// [181] "incd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCD_XPiI)>;
+
+// [182] "incd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCD_ZPiI)>;
+
+// [183] "inch $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCH_XPiI)>;
+
+// [184] "inch $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCH_ZPiI)>;
+
+// [185] "incp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs INCP_XP_B, INCP_XP_D, INCP_XP_H, INCP_XP_S)>;
+
+// [186] "incp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs INCP_ZP_D, INCP_ZP_H, INCP_ZP_S)>;
+
+// [187] "incw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCW_XPiI)>;
+
+// [188] "incw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCW_ZPiI)>;
+
+// [189] "index $Zd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_17Cyc_GI02], (instrs INDEX_RR_B, INDEX_RR_D, INDEX_RR_H, INDEX_RR_S)>;
+
+// [190] "index $Zd, $Rn, $imm5";
+def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_RI_B, INDEX_RI_D, INDEX_RI_H, INDEX_RI_S)>;
+
+// [191] "index $Zd, $imm5, $Rm";
+def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_IR_B, INDEX_IR_D, INDEX_IR_H, INDEX_IR_S)>;
+
+// [192] "index $Zd, $imm5, $imm5b";
+def : InstRW<[A64FXWrite_13Cyc_GI0], (instrs INDEX_II_B, INDEX_II_D, INDEX_II_H, INDEX_II_S)>;
+
+// [193] "insr $Zdn, $Rm";
+def : InstRW<[A64FXWrite_10Cyc_GI02], (instrs INSR_ZR_B, INSR_ZR_D, INSR_ZR_H, INSR_ZR_S)>;
+
+// [194] "insr $Zdn, $Vm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs INSR_ZV_B, INSR_ZV_D, INSR_ZV_H, INSR_ZV_S)>;
+
+// [195] "lasta $Rd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTA_RPZ_B, LASTA_RPZ_D, LASTA_RPZ_H, LASTA_RPZ_S)>;
+
+// [196] "lasta $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTA_VPZ_B, LASTA_VPZ_D, LASTA_VPZ_H, LASTA_VPZ_S)>;
+
+// [197] "lastb $Rd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTB_RPZ_B, LASTB_RPZ_D, LASTB_RPZ_H, LASTB_RPZ_S)>;
+
+// [198] "lastb $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTB_VPZ_B, LASTB_VPZ_D, LASTB_VPZ_H, LASTB_VPZ_S)>;
+
+// [199] "ld1b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B, LD1B_D, LD1B_H, LD1B_S)>;
+
+// [200] "ld1b $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1B_D_REAL, GLD1B_D_SXTW_REAL, GLD1B_D_UXTW_REAL, GLD1B_S_SXTW_REAL, GLD1B_S_UXTW_REAL)>;
+
+// [201] "ld1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B_D_IMM_REAL, LD1B_H_IMM_REAL, LD1B_IMM_REAL, LD1B_S_IMM_REAL)>;
+
+// [202] "ld1b $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1B_D_IMM_REAL, GLD1B_S_IMM_REAL)>;
+
+// [203] "ld1d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D)>;
+
+// [204] "ld1d $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1D_REAL, GLD1D_SCALED_REAL, GLD1D_SXTW_REAL, GLD1D_SXTW_SCALED_REAL, GLD1D_UXTW_REAL, GLD1D_UXTW_SCALED_REAL)>;
+
+// [205] "ld1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D_IMM_REAL)>;
+
+// [206] "ld1d $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1D_IMM_REAL)>;
+
+// [207] "ld1h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H, LD1H_D, LD1H_S)>;
+
+// [208] "ld1h $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1H_D_REAL, GLD1H_D_SCALED_REAL, GLD1H_D_SXTW_REAL, GLD1H_D_SXTW_SCALED_REAL, GLD1H_D_UXTW_REAL, GLD1H_D_UXTW_SCALED_REAL, GLD1H_S_SXTW_REAL, GLD1H_S_SXTW_SCALED_REAL, GLD1H_S_UXTW_REAL, GLD1H_S_UXTW_SCALED_REAL)>;
+
+// [209] "ld1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H_D_IMM_REAL, LD1H_IMM_REAL, LD1H_S_IMM_REAL)>;
+
+// [210] "ld1h $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1H_D_IMM_REAL, GLD1H_S_IMM_REAL)>;
+
+// [211] "ld1rb $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RB_D_IMM, LD1RB_H_IMM, LD1RB_IMM, LD1RB_S_IMM)>;
+
+// [212] "ld1rd $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RD_IMM)>;
+
+// [213] "ld1rh $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RH_D_IMM, LD1RH_IMM, LD1RH_S_IMM)>;
+
+// [214] "ld1rqb $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B)>;
+
+// [215] "ld1rqb $Zt, $Pg/z, [$Rn, $imm4]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B_IMM)>;
+
+// [216] "ld1rqd $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D)>;
+
+// [217] "ld1rqd $Zt, $Pg/z, [$Rn, $imm4]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D_IMM)>;
+
+// [218] "ld1rqh $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H)>;
+
+// [219] "ld1rqh $Zt, $Pg/z, [$Rn, $imm4]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H_IMM)>;
+
+// [220] "ld1rqw $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W)>;
+
+// [221] "ld1rqw $Zt, $Pg/z, [$Rn, $imm4]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W_IMM)>;
+
+// [222] "ld1rsb $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSB_D_IMM, LD1RSB_H_IMM, LD1RSB_S_IMM)>;
+
+// [223] "ld1rsh $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSH_D_IMM, LD1RSH_S_IMM)>;
+
+// [224] "ld1rsw $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSW_IMM)>;
+
+// [225] "ld1rw $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RW_D_IMM, LD1RW_IMM)>;
+
+// [226] "ld1sb $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D, LD1SB_H, LD1SB_S)>;
+
+// [227] "ld1sb $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SB_D_REAL, GLD1SB_D_SXTW_REAL, GLD1SB_D_UXTW_REAL, GLD1SB_S_SXTW_REAL, GLD1SB_S_UXTW_REAL)>;
+
+// [228] "ld1sb $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D_IMM_REAL, LD1SB_H_IMM_REAL, LD1SB_S_IMM_REAL)>;
+
+// [229] "ld1sb $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SB_D_IMM_REAL, GLD1SB_S_IMM_REAL)>;
+
+// [230] "ld1sh $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D, LD1SH_S)>;
+
+// [231] "ld1sh $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SH_D_REAL, GLD1SH_D_SCALED_REAL, GLD1SH_D_SXTW_REAL, GLD1SH_D_SXTW_SCALED_REAL, GLD1SH_D_UXTW_REAL, GLD1SH_D_UXTW_SCALED_REAL, GLD1SH_S_SXTW_REAL, GLD1SH_S_SXTW_SCALED_REAL, GLD1SH_S_UXTW_REAL, GLD1SH_S_UXTW_SCALED_REAL)>;
+
+// [232] "ld1sh $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D_IMM_REAL, LD1SH_S_IMM_REAL)>;
+
+// [233] "ld1sh $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SH_D_IMM_REAL, GLD1SH_S_IMM_REAL)>;
+
+// [234] "ld1sw $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D)>;
+
+// [235] "ld1sw $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SW_D_REAL, GLD1SW_D_SCALED_REAL, GLD1SW_D_SXTW_REAL, GLD1SW_D_SXTW_SCALED_REAL, GLD1SW_D_UXTW_REAL, GLD1SW_D_UXTW_SCALED_REAL)>;
+
+// [236] "ld1sw $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D_IMM_REAL)>;
+
+// [237] "ld1sw $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SW_D_IMM_REAL)>;
+
+// [238] "ld1w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W, LD1W_D)>;
+
+// [239] "ld1w $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1W_D_REAL, GLD1W_D_SCALED_REAL, GLD1W_D_SXTW_REAL, GLD1W_D_SXTW_SCALED_REAL, GLD1W_D_UXTW_REAL, GLD1W_D_UXTW_SCALED_REAL, GLD1W_SXTW_REAL, GLD1W_SXTW_SCALED_REAL, GLD1W_UXTW_REAL, GLD1W_UXTW_SCALED_REAL)>;
+
+// [240] "ld1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W_D_IMM_REAL, LD1W_IMM_REAL)>;
+
+// [241] "ld1w $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1W_D_IMM_REAL, GLD1W_IMM_REAL)>;
+
+// [242] "ld2b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B)>;
+
+// [243] "ld2b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B_IMM)>;
+
+// [244] "ld2d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D)>;
+
+// [245] "ld2d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D_IMM)>;
+
+// [246] "ld2h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H)>;
+
+// [247] "ld2h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H_IMM)>;
+
+// [248] "ld2w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W)>;
+
+// [249] "ld2w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W_IMM)>;
+
+// [250] "ld3b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B)>;
+
+// [251] "ld3b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B_IMM)>;
+
+// [252] "ld3d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D)>;
+
+// [253] "ld3d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D_IMM)>;
+
+// [254] "ld3h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H)>;
+
+// [255] "ld3h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H_IMM)>;
+
+// [256] "ld3w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W)>;
+
+// [257] "ld3w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W_IMM)>;
+
+// [258] "ld4b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B)>;
+
+// [259] "ld4b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B_IMM)>;
+
+// [260] "ld4d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D)>;
+
+// [261] "ld4d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D_IMM)>;
+
+// [262] "ld4h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H)>;
+
+// [263] "ld4h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H_IMM)>;
+
+// [264] "ld4w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W)>;
+
+// [265] "ld4w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W_IMM)>;
+
+// [266] "ldff1b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1B_D_REAL, LDFF1B_H_REAL, LDFF1B_REAL, LDFF1B_S_REAL)>;
+
+// [267] "ldff1b $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1B_D_REAL, GLDFF1B_D_SXTW_REAL, GLDFF1B_D_UXTW_REAL, GLDFF1B_S_SXTW_REAL, GLDFF1B_S_UXTW_REAL)>;
+
+// [268] "ldff1b $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1B_D_IMM_REAL, GLDFF1B_S_IMM_REAL)>;
+
+// [269] "ldff1d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1D_REAL)>;
+
+// [270] "ldff1d $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1D_REAL, GLDFF1D_SCALED_REAL, GLDFF1D_SXTW_REAL, GLDFF1D_SXTW_SCALED_REAL, GLDFF1D_UXTW_REAL, GLDFF1D_UXTW_SCALED_REAL)>;
+
+// [271] "ldff1d $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1D_IMM_REAL)>;
+
+// [272] "ldff1h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1H_D_REAL, LDFF1H_REAL, LDFF1H_S_REAL)>;
+
+// [273] "ldff1h $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1H_D_REAL, GLDFF1H_D_SCALED_REAL, GLDFF1H_D_SXTW_REAL, GLDFF1H_D_SXTW_SCALED_REAL, GLDFF1H_D_UXTW_REAL, GLDFF1H_D_UXTW_SCALED_REAL, GLDFF1H_S_SXTW_REAL, GLDFF1H_S_SXTW_SCALED_REAL, GLDFF1H_S_UXTW_REAL, GLDFF1H_S_UXTW_SCALED_REAL)>;
+
+// [274] "ldff1h $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1H_D_IMM_REAL, GLDFF1H_S_IMM_REAL)>;
+
+// [275] "ldff1sb $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SB_D_REAL, LDFF1SB_H_REAL, LDFF1SB_S_REAL)>;
+
+// [276] "ldff1sb $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SB_D_REAL, GLDFF1SB_D_SXTW_REAL, GLDFF1SB_D_UXTW_REAL, GLDFF1SB_S_SXTW_REAL, GLDFF1SB_S_UXTW_REAL)>;
+
+// [277] "ldff1sb $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SB_D_IMM_REAL, GLDFF1SB_S_IMM_REAL)>;
+
+// [278] "ldff1sh $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SH_D_REAL, LDFF1SH_S_REAL)>;
+
+// [279] "ldff1sh $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SH_D_REAL, GLDFF1SH_D_SCALED_REAL, GLDFF1SH_D_SXTW_REAL, GLDFF1SH_D_SXTW_SCALED_REAL, GLDFF1SH_D_UXTW_REAL, GLDFF1SH_D_UXTW_SCALED_REAL, GLDFF1SH_S_SXTW_REAL, GLDFF1SH_S_SXTW_SCALED_REAL, GLDFF1SH_S_UXTW_REAL, GLDFF1SH_S_UXTW_SCALED_REAL)>;
+
+// [280] "ldff1sh $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SH_D_IMM_REAL, GLDFF1SH_S_IMM_REAL)>;
+
+// [281] "ldff1sw $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SW_D_REAL)>;
+
+// [282] "ldff1sw $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SW_D_REAL, GLDFF1SW_D_SCALED_REAL, GLDFF1SW_D_SXTW_REAL, GLDFF1SW_D_SXTW_SCALED_REAL, GLDFF1SW_D_UXTW_REAL, GLDFF1SW_D_UXTW_SCALED_REAL)>;
+
+// [283] "ldff1sw $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SW_D_IMM_REAL)>;
+
+// [284] "ldff1w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1W_D_REAL, LDFF1W_REAL)>;
+
+// [285] "ldff1w $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1W_D_REAL, GLDFF1W_D_SCALED_REAL, GLDFF1W_D_SXTW_REAL, GLDFF1W_D_SXTW_SCALED_REAL, GLDFF1W_D_UXTW_REAL, GLDFF1W_D_UXTW_SCALED_REAL, GLDFF1W_SXTW_REAL, GLDFF1W_SXTW_SCALED_REAL, GLDFF1W_UXTW_REAL, GLDFF1W_UXTW_SCALED_REAL)>;
+
+// [286] "ldff1w $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1W_D_IMM_REAL, GLDFF1W_IMM_REAL)>;
+
+// [287] "ldnf1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1B_D_IMM_REAL, LDNF1B_H_IMM_REAL, LDNF1B_IMM_REAL, LDNF1B_S_IMM_REAL)>;
+
+// [288] "ldnf1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1D_IMM_REAL)>;
+
+// [289] "ldnf1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1H_D_IMM_REAL, LDNF1H_IMM_REAL, LDNF1H_S_IMM_REAL)>;
+
+// [290] "ldnf1sb $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SB_D_IMM_REAL, LDNF1SB_H_IMM_REAL, LDNF1SB_S_IMM_REAL)>;
+
+// [291] "ldnf1sh $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SH_D_IMM_REAL, LDNF1SH_S_IMM_REAL)>;
+
+// [292] "ldnf1sw $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SW_D_IMM_REAL)>;
+
+// [293] "ldnf1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1W_D_IMM_REAL, LDNF1W_IMM_REAL)>;
+
+// [294] "ldnt1b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRR)>;
+
+// [295] "ldnt1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRI)>;
+
+// [296] "ldnt1d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRR)>;
+
+// [297] "ldnt1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRI)>;
+
+// [298] "ldnt1h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRR)>;
+
+// [299] "ldnt1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRI)>;
+
+// [300] "ldnt1w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRR)>;
+
+// [301] "ldnt1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRI)>;
+
+// [302] "ldr $Pt, [$Rn, $imm9, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_PXI)>;
+
+// [303] "ldr $Zt, [$Rn, $imm9, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_ZXI)>;
+
+// [304] "lsl $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZZZ_B, LSL_WIDE_ZZZ_H, LSL_WIDE_ZZZ_S)>;
+
+// [305] "lsl $Zd, $Zn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZZI_B, LSL_ZZI_D, LSL_ZZI_H, LSL_ZZI_S)>;
+
+// [306] "lsl $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZPmZ_B, LSL_WIDE_ZPmZ_H, LSL_WIDE_ZPmZ_S, LSL_ZPmZ_B, LSL_ZPmZ_D, LSL_ZPmZ_H, LSL_ZPmZ_S)>;
+
+// [307] "lsl $Zdn, $Pg/m, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZPmI_B, LSL_ZPmI_D, LSL_ZPmI_H, LSL_ZPmI_S)>;
+
+// [308] "lslr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSLR_ZPmZ_B, LSLR_ZPmZ_D, LSLR_ZPmZ_H, LSLR_ZPmZ_S)>;
+
+// [309] "lsr $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZZZ_B, LSR_WIDE_ZZZ_H, LSR_WIDE_ZZZ_S)>;
+
+// [310] "lsr $Zd, $Zn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZZI_B, LSR_ZZI_D, LSR_ZZI_H, LSR_ZZI_S)>;
+
+// [311] "lsr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZPmZ_B, LSR_WIDE_ZPmZ_H, LSR_WIDE_ZPmZ_S, LSR_ZPmZ_B, LSR_ZPmZ_D, LSR_ZPmZ_H, LSR_ZPmZ_S)>;
+
+// [312] "lsr $Zdn, $Pg/m, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZPmI_B, LSR_ZPmI_D, LSR_ZPmI_H, LSR_ZPmI_S)>;
+
+// [313] "lsrr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSRR_ZPmZ_B, LSRR_ZPmZ_D, LSRR_ZPmZ_H, LSRR_ZPmZ_S)>;
+
+// [314] "mad $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MAD_ZPmZZ_B, MAD_ZPmZZ_D, MAD_ZPmZZ_H, MAD_ZPmZZ_S)>;
+
+// [315] "mla $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLA_ZPmZZ_B, MLA_ZPmZZ_D, MLA_ZPmZZ_H, MLA_ZPmZZ_S)>;
+
+// [316] "mls $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLS_ZPmZZ_B, MLS_ZPmZZ_D, MLS_ZPmZZ_H, MLS_ZPmZZ_S)>;
+
+// [317] "movprfx $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPmZ_B, MOVPRFX_ZPmZ_D, MOVPRFX_ZPmZ_H, MOVPRFX_ZPmZ_S)>;
+
+// [318] "movprfx $Zd, $Pg/z, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPzZ_B, MOVPRFX_ZPzZ_D, MOVPRFX_ZPzZ_H, MOVPRFX_ZPzZ_S)>;
+
+// [319] "movprfx $Zd, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZZ)>;
+
+// [320] "msb $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MSB_ZPmZZ_B, MSB_ZPmZZ_D, MSB_ZPmZZ_H, MSB_ZPmZZ_S)>;
+
+// [321] "mul $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MUL_ZPmZ_B, MUL_ZPmZ_D, MUL_ZPmZ_H, MUL_ZPmZ_S)>;
+
+// [322] "mul $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs MUL_ZI_B, MUL_ZI_D, MUL_ZI_H, MUL_ZI_S)>;
+
+// [323] "nand $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NAND_PPzPP)>;
+
+// [324] "nands $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NANDS_PPzPP)>;
+
+// [325] "neg $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NEG_ZPmZ_B, NEG_ZPmZ_D, NEG_ZPmZ_H, NEG_ZPmZ_S)>;
+
+// [326] "nor $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NOR_PPzPP)>;
+
+// [327] "nors $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NORS_PPzPP)>;
+
+// [328] "not $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NOT_ZPmZ_B, NOT_ZPmZ_D, NOT_ZPmZ_H, NOT_ZPmZ_S)>;
+
+// [329] "orn $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORN_PPzPP)>;
+
+// [330] "orns $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORNS_PPzPP)>;
+
+// [331] "orr $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORR_PPzPP)>;
+
+// [332] "orr $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZZZ)>;
+
+// [333] "orr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZPmZ_B, ORR_ZPmZ_D, ORR_ZPmZ_H, ORR_ZPmZ_S)>;
+
+// [334] "orr $Zdn, $_Zdn, $imms13";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs ORR_ZI)>;
+
+// [335] "orrs $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORRS_PPzPP)>;
+
+// [336] "orv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ORV_VPZ_B, ORV_VPZ_D, ORV_VPZ_H, ORV_VPZ_S)>;
+
+// [337] "pfalse $Pd";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PFALSE)>;
+
+// [338] "pnext $Pdn, $Pg, $_Pdn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PNEXT_B, PNEXT_D, PNEXT_H, PNEXT_S)>;
+
+// [339] "prfb $prfop, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRR)>;
+
+// [340] "prfb $prfop, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFB_D_SCALED, PRFB_D_SXTW_SCALED, PRFB_D_UXTW_SCALED, PRFB_S_SXTW_SCALED, PRFB_S_UXTW_SCALED)>;
+
+// [341] "prfb $prfop, $Pg, [$Rn, $imm6, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRI)>;
+
+// [342] "prfb $prfop, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFB_D_PZI, PRFB_S_PZI)>;
+
+// [343] "prfd $prfop, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRR)>;
+
+// [344] "prfd $prfop, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFD_D_SCALED, PRFD_D_SXTW_SCALED, PRFD_D_UXTW_SCALED, PRFD_S_SXTW_SCALED, PRFD_S_UXTW_SCALED)>;
+
+// [345] "prfd $prfop, $Pg, [$Rn, $imm6, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRI)>;
+
+// [346] "prfd $prfop, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFD_D_PZI, PRFD_S_PZI)>;
+
+// [347] "prfh $prfop, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRR)>;
+
+// [348] "prfh $prfop, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFH_D_SCALED, PRFH_D_SXTW_SCALED, PRFH_D_UXTW_SCALED, PRFH_S_SXTW_SCALED, PRFH_S_UXTW_SCALED)>;
+
+// [349] "prfh $prfop, $Pg, [$Rn, $imm6, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRI)>;
+
+// [350] "prfh $prfop, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFH_D_PZI, PRFH_S_PZI)>;
+
+// [351] "prfw $prfop, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFS_PRR)>;
+
+// [352] "prfw $prfop, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFW_D_SCALED, PRFW_D_SXTW_SCALED, PRFW_D_UXTW_SCALED, PRFW_S_SXTW_SCALED, PRFW_S_UXTW_SCALED)>;
+
+// [353] "prfw $prfop, $Pg, [$Rn, $imm6, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFW_PRI)>;
+
+// [354] "prfw $prfop, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFW_D_PZI, PRFW_S_PZI)>;
+
+// [355] "ptest $Pg, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTEST_PP)>;
+
+// [356] "ptrue $Pd, $pattern";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUE_B, PTRUE_D, PTRUE_H, PTRUE_S)>;
+
+// [357] "ptrues $Pd, $pattern";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUES_B, PTRUES_D, PTRUES_H, PTRUES_S)>;
+
+// [358] "punpkhi $Pd, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKHI_PP)>;
+
+// [359] "punpklo $Pd, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKLO_PP)>;
+
+// [360] "rbit $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBIT_ZPmZ_B, RBIT_ZPmZ_D, RBIT_ZPmZ_H, RBIT_ZPmZ_S)>;
+
+// [361] "rdffr $Pd";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_P)>;
+
+// [362] "rdffr $Pd, $Pg/z";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_PPz)>;
+
+// [363] "rdffrs $Pd, $Pg/z";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFRS_PPz)>;
+
+// [364] "rdvl $Rd, $imm6";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs RDVLI_XI)>;
+
+// [365] "rev $Pd, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs REV_PP_B, REV_PP_D, REV_PP_H, REV_PP_S)>;
+
+// [366] "rev $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs REV_ZZ_B, REV_ZZ_D, REV_ZZ_H, REV_ZZ_S)>;
+
+// [367] "revb $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVB_ZPmZ_D, REVB_ZPmZ_H, REVB_ZPmZ_S)>;
+
+// [368] "revh $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVH_ZPmZ_D, REVH_ZPmZ_S)>;
+
+// [369] "revw $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVW_ZPmZ_D)>;
+
+// [370] "sabd $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SABD_ZPmZ_B, SABD_ZPmZ_D, SABD_ZPmZ_H, SABD_ZPmZ_S)>;
+
+// [371] "saddv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs SADDV_VPZ_B, SADDV_VPZ_H, SADDV_VPZ_S)>;
+
+// [372] "scvtf $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SCVTF_ZPmZ_DtoD, SCVTF_ZPmZ_DtoH, SCVTF_ZPmZ_DtoS, SCVTF_ZPmZ_HtoH, SCVTF_ZPmZ_StoD, SCVTF_ZPmZ_StoH, SCVTF_ZPmZ_StoS)>;
+
+// [373] "sdiv $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIV_ZPmZ_D, SDIV_ZPmZ_S)>;
+
+// [374] "sdivr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIVR_ZPmZ_D, SDIVR_ZPmZ_S)>;
+
+// [375] "sdot $Zda, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SDOT_ZZZ_D, SDOT_ZZZ_S)>;
+
+// [376] "sdot $Zda, $Zn, $Zm$iop";
+def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs SDOT_ZZZI_D, SDOT_ZZZI_S)>;
+
+// [377] "sel $Pd, $Pg, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs SEL_PPPP)>;
+
+// [378] "sel $Zd, $Pg, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SEL_ZPZZ_B, SEL_ZPZZ_D, SEL_ZPZZ_H, SEL_ZPZZ_S)>;
+
+// [379] "setffr";
+def : InstRW<[A64FXWrite_6Cyc], (instrs SETFFR)>;
+
+// [380] "smax $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMAX_ZPmZ_B, SMAX_ZPmZ_D, SMAX_ZPmZ_H, SMAX_ZPmZ_S)>;
+
+// [381] "smax $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMAX_ZI_B, SMAX_ZI_D, SMAX_ZI_H, SMAX_ZI_S)>;
+
+// [382] "smaxv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMAXV_VPZ_B, SMAXV_VPZ_D, SMAXV_VPZ_H, SMAXV_VPZ_S)>;
+
+// [383] "smin $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMIN_ZPmZ_B, SMIN_ZPmZ_D, SMIN_ZPmZ_H, SMIN_ZPmZ_S)>;
+
+// [384] "smin $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMIN_ZI_B, SMIN_ZI_D, SMIN_ZI_H, SMIN_ZI_S)>;
+
+// [385] "sminv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMINV_VPZ_B, SMINV_VPZ_D, SMINV_VPZ_H, SMINV_VPZ_S)>;
+
+// [386] "smulh $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SMULH_ZPmZ_B, SMULH_ZPmZ_D, SMULH_ZPmZ_H, SMULH_ZPmZ_S)>;
+
+// [387] "splice $Zdn, $Pg, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SPLICE_ZPZ_B, SPLICE_ZPZ_D, SPLICE_ZPZ_H, SPLICE_ZPZ_S)>;
+
+// [388] "sqadd $Zd, $Zn, $Zm";
+
+// [389] "sqadd $Zdn, $_Zdn, $imm";
+
+// [390] "sqdecb $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiWdI)>;
+
+// [391] "sqdecb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiI)>;
+
+// [392] "sqdecd $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiWdI)>;
+
+// [393] "sqdecd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiI)>;
+
+// [394] "sqdecd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECD_ZPiI)>;
+
+// [395] "sqdech $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiWdI)>;
+
+// [396] "sqdech $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiI)>;
+
+// [397] "sqdech $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECH_ZPiI)>;
+
+// [398] "sqdecp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XP_B, SQDECP_XP_D, SQDECP_XP_H, SQDECP_XP_S)>;
+
+// [399] "sqdecp $Rdn, $Pg, $_Rdn";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XPWd_B, SQDECP_XPWd_D, SQDECP_XPWd_H, SQDECP_XPWd_S)>;
+
+// [400] "sqdecp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQDECP_ZP_D, SQDECP_ZP_H, SQDECP_ZP_S)>;
+
+// [401] "sqdecw $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiWdI)>;
+
+// [402] "sqdecw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiI)>;
+
+// [403] "sqdecw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECW_ZPiI)>;
+
+// [404] "sqincb $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiWdI)>;
+
+// [405] "sqincb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiI)>;
+
+// [406] "sqincd $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiWdI)>;
+
+// [407] "sqincd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiI)>;
+
+// [408] "sqincd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCD_ZPiI)>;
+
+// [409] "sqinch $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiWdI)>;
+
+// [410] "sqinch $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiI)>;
+
+// [411] "sqinch $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCH_ZPiI)>;
+
+// [412] "sqincp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XP_B, SQINCP_XP_D, SQINCP_XP_H, SQINCP_XP_S)>;
+
+// [413] "sqincp $Rdn, $Pg, $_Rdn";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XPWd_B, SQINCP_XPWd_D, SQINCP_XPWd_H, SQINCP_XPWd_S)>;
+
+// [414] "sqincp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQINCP_ZP_D, SQINCP_ZP_H, SQINCP_ZP_S)>;
+
+// [415] "sqincw $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiWdI)>;
+
+// [416] "sqincw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiI)>;
+
+// [417] "sqincw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCW_ZPiI)>;
+
+// [418] "sqsub $Zd, $Zn, $Zm";
+
+// [419] "sqsub $Zdn, $_Zdn, $imm";
+
+// [420] "st1b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B, ST1B_D, ST1B_H, ST1B_S)>;
+
+// [421] "st1b $Zt, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D_REAL, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>;
+
+// [422] "st1b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B_D_IMM, ST1B_H_IMM, ST1B_IMM, ST1B_S_IMM)>;
+
+// [423] "st1b $Zt, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1B_D_IMM, SST1B_S_IMM)>;
+
+// [424] "st1d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D)>;
+
+// [425] "st1d $Zt, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D_REAL, SST1D_SCALED_SCALED_REAL, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>;
+
+// [426] "st1d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D_IMM)>;
+
+// [427] "st1d $Zt, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1D_IMM)>;
+
+// [428] "st1h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H, ST1H_D, ST1H_S)>;
+
+// [429] "st1h $Zt, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D_REAL, SST1H_D_SCALED_SCALED_REAL, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>;
+
+// [430] "st1h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H_D_IMM, ST1H_IMM, ST1H_S_IMM)>;
+
+// [431] "st1h $Zt, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1H_D_IMM, SST1H_S_IMM)>;
+
+// [432] "st1w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W, ST1W_D)>;
+
+// [433] "st1w $Zt, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D_REAL, SST1W_D_SCALED_SCALED_REAL, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>;
+
+// [434] "st1w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W_D_IMM, ST1W_IMM)>;
+
+// [435] "st1w $Zt, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1W_D_IMM, SST1W_IMM)>;
+
+// [436] "st2b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B)>;
+
+// [437] "st2b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B_IMM)>;
+
+// [438] "st2d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D)>;
+
+// [439] "st2d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D_IMM)>;
+
+// [440] "st2h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H)>;
+
+// [441] "st2h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H_IMM)>;
+
+// [442] "st2w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W)>;
+
+// [443] "st2w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W_IMM)>;
+
+// [444] "st3b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B)>;
+
+// [445] "st3b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B_IMM)>;
+
+// [446] "st3d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D)>;
+
+// [447] "st3d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D_IMM)>;
+
+// [448] "st3h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H)>;
+
+// [449] "st3h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H_IMM)>;
+
+// [450] "st3w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W)>;
+
+// [451] "st3w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W_IMM)>;
+
+// [452] "st4b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B)>;
+
+// [453] "st4b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B_IMM)>;
+
+// [454] "st4d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D)>;
+
+// [455] "st4d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D_IMM)>;
+
+// [456] "st4h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H)>;
+
+// [457] "st4h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H_IMM)>;
+
+// [458] "st4w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W)>;
+
+// [459] "st4w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W_IMM)>;
+
+// [460] "stnt1b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRR)>;
+
+// [461] "stnt1b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRI)>;
+
+// [462] "stnt1d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRR)>;
+
+// [463] "stnt1d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRI)>;
+
+// [464] "stnt1h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRR)>;
+
+// [465] "stnt1h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRI)>;
+
+// [466] "stnt1w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRR)>;
+
+// [467] "stnt1w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRI)>;
+
+// [468] "str $Pt, [$Rn, $imm9, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI15], (instrs STR_PXI)>;
+
+// [469] "str $Zt, [$Rn, $imm9, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI05], (instrs STR_ZXI)>;
+
+// [470] "sub $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZZZ_B, SUB_ZZZ_D, SUB_ZZZ_H, SUB_ZZZ_S)>;
+
+// [471] "sub $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZPmZ_B, SUB_ZPmZ_D, SUB_ZPmZ_H, SUB_ZPmZ_S)>;
+
+// [472] "sub $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZI_B, SUB_ZI_D, SUB_ZI_H, SUB_ZI_S)>;
+
+// [473] "subr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUBR_ZPmZ_B, SUBR_ZPmZ_D, SUBR_ZPmZ_H, SUBR_ZPmZ_S)>;
+
+// [474] "subr $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SUBR_ZI_B, SUBR_ZI_D, SUBR_ZI_H, SUBR_ZI_S)>;
+
+// [475] "sunpkhi $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKHI_ZZ_D, SUNPKHI_ZZ_H, SUNPKHI_ZZ_S)>;
+
+// [476] "sunpklo $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKLO_ZZ_D, SUNPKLO_ZZ_H, SUNPKLO_ZZ_S)>;
+
+// [477] "sxtb $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTB_ZPmZ_D, SXTB_ZPmZ_H, SXTB_ZPmZ_S)>;
+
+// [478] "sxth $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTH_ZPmZ_D, SXTH_ZPmZ_S)>;
+
+// [479] "sxtw $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTW_ZPmZ_D)>;
+
+// [480] "tbl $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs TBL_ZZZ_B, TBL_ZZZ_D, TBL_ZZZ_H, TBL_ZZZ_S)>;
+
+// [481] "trn1 $Pd, $Pn, $Pm";
+
+// [482] "trn1 $Zd, $Zn, $Zm";
+
+// [483] "trn2 $Pd, $Pn, $Pm";
+
+// [484] "trn2 $Zd, $Zn, $Zm";
+
+// [486] "uabd $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UABD_ZPmZ_B, UABD_ZPmZ_D, UABD_ZPmZ_H, UABD_ZPmZ_S)>;
+
+// [487] "uaddv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs UADDV_VPZ_B, UADDV_VPZ_D, UADDV_VPZ_H, UADDV_VPZ_S)>;
+
+// [488] "ucvtf $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UCVTF_ZPmZ_DtoD, UCVTF_ZPmZ_DtoH, UCVTF_ZPmZ_DtoS, UCVTF_ZPmZ_HtoH, UCVTF_ZPmZ_StoD, UCVTF_ZPmZ_StoH, UCVTF_ZPmZ_StoS)>;
+
+// [489] "udiv $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIV_ZPmZ_D, UDIV_ZPmZ_S)>;
+
+// [490] "udivr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIVR_ZPmZ_D, UDIVR_ZPmZ_S)>;
+
+// [491] "udot $Zda, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UDOT_ZZZ_D, UDOT_ZZZ_S)>;
+
+// [492] "udot $Zda, $Zn, $Zm$iop";
+def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs UDOT_ZZZI_D, UDOT_ZZZI_S)>;
+
+// [493] "umax $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMAX_ZPmZ_B, UMAX_ZPmZ_D, UMAX_ZPmZ_H, UMAX_ZPmZ_S)>;
+
+// [494] "umax $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMAX_ZI_B, UMAX_ZI_D, UMAX_ZI_H, UMAX_ZI_S)>;
+
+// [495] "umaxv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMAXV_VPZ_B, UMAXV_VPZ_D, UMAXV_VPZ_H, UMAXV_VPZ_S)>;
+
+// [496] "umin $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMIN_ZPmZ_B, UMIN_ZPmZ_D, UMIN_ZPmZ_H, UMIN_ZPmZ_S)>;
+
+// [497] "umin $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMIN_ZI_B, UMIN_ZI_D, UMIN_ZI_H, UMIN_ZI_S)>;
+
+// [498] "uminv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMINV_VPZ_B, UMINV_VPZ_D, UMINV_VPZ_H, UMINV_VPZ_S)>;
+
+// [499] "umulh $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UMULH_ZPmZ_B, UMULH_ZPmZ_D, UMULH_ZPmZ_H, UMULH_ZPmZ_S)>;
+
+// [500] "uqadd $Zd, $Zn, $Zm";
+
+// [501] "uqadd $Zdn, $_Zdn, $imm";
+
+// [502] "uqdecb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECB_WPiI, UQDECB_XPiI)>;
+
+// [503] "uqdecd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECD_WPiI, UQDECD_XPiI)>;
+
+// [504] "uqdecd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECD_ZPiI)>;
+
+// [505] "uqdech $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECH_WPiI, UQDECH_XPiI)>;
+
+// [506] "uqdech $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECH_ZPiI)>;
+
+// [507] "uqdecp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQDECP_WP_B, UQDECP_WP_D, UQDECP_WP_H, UQDECP_WP_S, UQDECP_XP_B, UQDECP_XP_D, UQDECP_XP_H, UQDECP_XP_S)>;
+
+// [508] "uqdecp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQDECP_ZP_D, UQDECP_ZP_H, UQDECP_ZP_S)>;
+
+// [509] "uqdecw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECW_WPiI, UQDECW_XPiI)>;
+
+// [510] "uqdecw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECW_ZPiI)>;
+
+// [511] "uqincb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCB_WPiI, UQINCB_XPiI)>;
+
+// [512] "uqincd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCD_WPiI, UQINCD_XPiI)>;
+
+// [513] "uqincd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCD_ZPiI)>;
+
+// [514] "uqinch $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCH_WPiI, UQINCH_XPiI)>;
+
+// [515] "uqinch $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCH_ZPiI)>;
+
+// [516] "uqincp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQINCP_WP_B, UQINCP_WP_D, UQINCP_WP_H, UQINCP_WP_S, UQINCP_XP_B, UQINCP_XP_D, UQINCP_XP_H, UQINCP_XP_S)>;
+
+// [517] "uqincp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQINCP_ZP_D, UQINCP_ZP_H, UQINCP_ZP_S)>;
+
+// [518] "uqincw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCW_WPiI, UQINCW_XPiI)>;
+
+// [519] "uqincw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCW_ZPiI)>;
+
+// [520] "uqsub $Zd, $Zn, $Zm";
+//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZZZ_B, UQSUB_ZZZ_D, UQSUB_ZZZ_H, UQSUB_ZZZ_S)>;
+
+// [521] "uqsub $Zdn, $_Zdn, $imm";
+//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZI_B, UQSUB_ZI_D, UQSUB_ZI_H, UQSUB_ZI_S)>;
+
+// [522] "uunpkhi $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKHI_ZZ_D, UUNPKHI_ZZ_H, UUNPKHI_ZZ_S)>;
+
+// [523] "uunpklo $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKLO_ZZ_D, UUNPKLO_ZZ_H, UUNPKLO_ZZ_S)>;
+
+// [524] "uxtb $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTB_ZPmZ_D, UXTB_ZPmZ_H, UXTB_ZPmZ_S)>;
+
+// [525] "uxth $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTH_ZPmZ_D, UXTH_ZPmZ_S)>;
+
+// [526] "uxtw $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTW_ZPmZ_D)>;
+
+// [527] "uzp1 $Pd, $Pn, $Pm";
+
+// [528] "uzp1 $Zd, $Zn, $Zm";
+
+// [529] "uzp2 $Pd, $Pn, $Pm";
+
+// [530] "uzp2 $Zd, $Zn, $Zm";
+
+// [531] "whilele $Pd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELE_PWW_B, WHILELE_PWW_D, WHILELE_PWW_H, WHILELE_PWW_S, WHILELE_PXX_B, WHILELE_PXX_D, WHILELE_PXX_H, WHILELE_PXX_S)>;
+
+// [532] "whilelo $Pd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELO_PWW_B, WHILELO_PWW_D, WHILELO_PWW_H, WHILELO_PWW_S, WHILELO_PXX_B, WHILELO_PXX_D, WHILELO_PXX_H, WHILELO_PXX_S)>;
+
+// [533] "whilels $Pd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELS_PWW_B, WHILELS_PWW_D, WHILELS_PWW_H, WHILELS_PWW_S, WHILELS_PXX_B, WHILELS_PXX_D, WHILELS_PXX_H, WHILELS_PXX_S)>;
+
+// [534] "whilelt $Pd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELT_PWW_B, WHILELT_PWW_D, WHILELT_PWW_H, WHILELT_PWW_S, WHILELT_PXX_B, WHILELT_PXX_D, WHILELT_PXX_H, WHILELT_PXX_S)>;
+
+// [535] "wrffr $Pn";
+def : InstRW<[A64FXWrite_6Cyc_NGI1], (instrs WRFFR)>;
+
+// [536] "zip1 $Pd, $Pn, $Pm";
+
+// [537] "zip1 $Zd, $Zn, $Zm";
+
+// [538] "zip2 $Pd, $Pn, $Pm";
+
+// [539] "zip2 $Zd, $Zn, $Zm";
+
+} // SchedModel = A64FXModel
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
new file mode 100644
index 000000000000..438371c1b6a8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -0,0 +1,745 @@
+//==- AArch64SchedTSV110.td - Huawei TSV110 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Huawei TSV110 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
+
+// Huawei TSV110 scheduling machine model.
+def TSV110Model : SchedMachineModel {
+ let IssueWidth = 4; // 4 micro-ops dispatched per cycle.
+ let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
+ let LoopMicroOpBufferSize = 16;
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
+}
+
+// Define each kind of processor resource and number available on the TSV110,
+// which has 8 pipelines, each with its own queue where micro-ops wait for
+// their operands and issue out-of-order to one of eight execution pipelines.
+let SchedModel = TSV110Model in {
+ def TSV110UnitALU : ProcResource<1>; // Int ALU
+ def TSV110UnitAB : ProcResource<2>; // Int ALU/BRU
+ def TSV110UnitMDU : ProcResource<1>; // Multi-Cycle
+ def TSV110UnitFSU1 : ProcResource<1>; // FP/ASIMD
+ def TSV110UnitFSU2 : ProcResource<1>; // FP/ASIMD
+ def TSV110UnitLdSt : ProcResource<2>; // Load/Store
+
+ def TSV110UnitF : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2]>;
+ def TSV110UnitALUAB : ProcResGroup<[TSV110UnitALU, TSV110UnitAB]>;
+ def TSV110UnitFLdSt : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2, TSV110UnitLdSt]>;
+}
+
+let SchedModel = TSV110Model in {
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// TSV110
+
+// Integer ALU
+def : WriteRes<WriteImm, [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteI, [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [TSV110UnitMDU]> { let Latency = 2; }
+def : WriteRes<WriteIEReg, [TSV110UnitMDU]> { let Latency = 2; }
+def : WriteRes<WriteExtr, [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteIS, [TSV110UnitALUAB]> { let Latency = 1; }
+
+// Integer Mul/MAC/Div
+def : WriteRes<WriteID32, [TSV110UnitMDU]> { let Latency = 12;
+ let ResourceCycles = [12]; }
+def : WriteRes<WriteID64, [TSV110UnitMDU]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def : WriteRes<WriteIM32, [TSV110UnitMDU]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [TSV110UnitMDU]> { let Latency = 4; }
+
+// Load
+def : WriteRes<WriteLD, [TSV110UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDIdx, [TSV110UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+// Pre/Post Indexing
+def : WriteRes<WriteAdr, [TSV110UnitALUAB]> { let Latency = 1; }
+
+// Store
+def : WriteRes<WriteST, [TSV110UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [TSV110UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [TSV110UnitLdSt]> { let Latency = 1; }
+
+// FP
+def : WriteRes<WriteF, [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFCmp, [TSV110UnitF]> { let Latency = 3; }
+def : WriteRes<WriteFCvt, [TSV110UnitF]> { let Latency = 3; }
+def : WriteRes<WriteFCopy, [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFImm, [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFMul, [TSV110UnitF]> { let Latency = 5; }
+
+// FP Div, Sqrt
+def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; }
+
+def : WriteRes<WriteV, [TSV110UnitF]> { let Latency = 4; }
+def : WriteRes<WriteVLD, [TSV110UnitFLdSt]> { let Latency = 5; }
+def : WriteRes<WriteVST, [TSV110UnitF]> { let Latency = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [TSV110UnitAB]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [TSV110UnitAB]> { let Latency = 1; }
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Forwarding logic is modeled only for multiply and accumulate.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// Detailed Refinements
+//===----------------------------------------------------------------------===//
+
+// Contains all of the TSV110 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: TSV110Wr
+// Latency: #cyc
+// MicroOp Count/Types: #(ALU|AB|MDU|FSU1|FSU2|LdSt|ALUAB|F|FLdSt)
+//
+// e.g. TSV110Wr_6cyc_1ALU_6MDU_4LdSt means the total latency is 6 and there are
+// 1 micro-ops to be issued down one ALU pipe, six MDU pipes and four LdSt pipes.
+//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def TSV110Wr_1cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 1; }
+def TSV110Wr_1cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 1; }
+def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; }
+def TSV110Wr_1cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 1; }
+
+def TSV110Wr_2cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 2; }
+def TSV110Wr_2cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 2; }
+def TSV110Wr_2cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 2; }
+def TSV110Wr_2cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 2; }
+def TSV110Wr_2cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 2; }
+def TSV110Wr_2cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 2; }
+
+def TSV110Wr_3cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 3; }
+def TSV110Wr_3cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 3; }
+def TSV110Wr_3cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 3; }
+
+def TSV110Wr_4cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 4; }
+def TSV110Wr_4cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 4; }
+def TSV110Wr_4cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 4; }
+def TSV110Wr_4cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 4; }
+
+def TSV110Wr_5cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 5; }
+def TSV110Wr_5cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 5; }
+def TSV110Wr_5cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 5; }
+def TSV110Wr_5cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 5; }
+
+def TSV110Wr_6cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 6; }
+
+def TSV110Wr_7cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 7; }
+
+def TSV110Wr_8cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 8; }
+
+def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 11; }
+
+def TSV110Wr_12cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 12; }
+
+def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 17; }
+
+def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 18; }
+
+def TSV110Wr_20cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 20; }
+
+def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 24; }
+
+def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 31; }
+
+def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 36; }
+
+def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 38; }
+
+def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 64; }
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def TSV110Wr_1cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitALUAB]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitALUAB]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitLdSt]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_2F : SchedWriteRes<[TSV110UnitF,
+ TSV110UnitF]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
+ TSV110UnitFSU2]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_2F : SchedWriteRes<[TSV110UnitF,
+ TSV110UnitF]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
+ TSV110UnitFSU2]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitALUAB]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_5cyc_1ALU_1F : SchedWriteRes<[TSV110UnitALU,
+ TSV110UnitF]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_6cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitLdSt]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_6cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitALUAB]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_7cyc_1F_1LdSt : SchedWriteRes<[TSV110UnitF,
+ TSV110UnitLdSt]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_8cyc_2FSU1 : SchedWriteRes<[TSV110UnitFSU1,
+ TSV110UnitFSU1]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+
+def TSV110Wr_8cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
+ TSV110UnitFSU2]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def TSV110Wr_6cyc_3F : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitF]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def TSV110Wr_6cyc_3LdSt : SchedWriteRes<[TSV110UnitLdSt, TSV110UnitLdSt,
+ TSV110UnitLdSt]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def TSV110Wr_7cyc_2F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitLdSt]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 4 micro-op types
+
+def TSV110Wr_8cyc_4F : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitF, TSV110UnitF]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def TSV110Wr_8cyc_3F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitF, TSV110UnitLdSt]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 5 micro-op types
+
+def TSV110Wr_8cyc_3F_2LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, TSV110UnitF,
+ TSV110UnitLdSt, TSV110UnitLdSt]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 8 micro-op types
+
+def TSV110Wr_10cyc_4F_4LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitF, TSV110UnitF,
+ TSV110UnitLdSt, TSV110UnitLdSt,
+ TSV110UnitLdSt, TSV110UnitLdSt]> {
+ let Latency = 10;
+ let NumMicroOps = 8;
+}
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs B)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BL)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BLR)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>;
+
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AES[DE]")>;
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AESI?MC")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA1SU1")>;
+def : InstRW<[TSV110Wr_2cyc_2F], (instregex "^SHA1(H|SU0)")>;
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA1[CMP]")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA256SU0")>;
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^SHA256SU1")>;
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA256(H|H2)")>;
+def TSV110ReadCRC: SchedReadAdvance<1, [TSV110Wr_2cyc_1MDU]>;
+def : InstRW<[TSV110Wr_2cyc_1MDU, TSV110ReadCRC], (instregex "^CRC32.*$")>;
+
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(BIC|EON|ORN)[WX]rr")>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(BIC)S[WX]rr")>;
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)")>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(ADD|AND|EOR|ORR|SUB)S[WX]r(r|i)")>;
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(ADC|SBC|BIC)[WX]r$")>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(ADC|SBC)S[WX]r$")>;
+
+def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)S[WX]rs$")>;
+def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(ADD|SUB)[WX]r(s|x|x64)$")>;
+def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(ADD|SUB)S[WX]r(s|x|x64)$")>;
+
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
+
+
+// Move and Shift Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instrs ADR, ADRP)>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^MOV[NZK][WX]i")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(LSLV|LSRV|ASRV|RORV)(W|X)r")>;
+
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_12cyc_1MDU], (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[TSV110Wr_20cyc_1MDU], (instregex "^(S|U)DIVXr$")>;
+
+def TSV110ReadMAW : SchedReadAdvance<2, [TSV110Wr_3cyc_1MDU]>;
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instrs MADDWrrr, MSUBWrrr)>;
+def TSV110ReadMAQ : SchedReadAdvance<3, [TSV110Wr_4cyc_1MDU]>;
+def : InstRW<[TSV110Wr_4cyc_1MDU, TSV110ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+def : InstRW<[TSV110Wr_4cyc_1MDU], (instregex "^(S|U)MULHrr$")>;
+
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^EXTR(W|X)rri$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(S|U)?BFM(W|X)ri$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>;
+
+
+// Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(W|X)l$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs LDRSWl)>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(BB|HH|W|X)ui$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDUR(BB|HH|W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDP(W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFMl)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFUMi)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMui$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMro(W|X)$")>;
+
+
+// Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STN?P(W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR(BB|HH|W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)ui$")>;
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+
+
+// FP Data Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "F(ABS|NEG)(D|S)r")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCCMP(E)?(S|D)rr$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCSEL(S|D)rrr$")>;
+
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instrs FDIVSrr)>;
+def : InstRW<[TSV110Wr_18cyc_1FSU1], (instrs FDIVDrr)>;
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTSr)>;
+def : InstRW<[TSV110Wr_31cyc_1FSU2], (instrs FSQRTDr)>;
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN).+rr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^FN?M(ADD|SUB)Hrrr")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FN?M(ADD|SUB)Srrr")>;
+def : InstRW<[TSV110Wr_7cyc_1F], (instregex "^FN?M(ADD|SUB)Drrr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Hrr")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|SUB)Srr")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Drr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(N)?MULHrr$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULSrr$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULDrr$")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT.+r")>;
+
+
+// FP Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_5cyc_1ALU_1F], (instregex "^[SU]CVTF[SU][WX][SD]ri")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT[HSD][HSD]r")>;
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^FMOV(DX|WS|XD|SW|DXHigh|XDHigh)r$")>;
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOV[SD][ir]$")>;
+
+
+// FP Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[DSQ]l")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDUR[BDHSQ]i")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[BDHSQ]ui")>;
+def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDN?P[DQS]i")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
+
+
+// FP Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR[BHSDQ]i")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR[BHSDQ]ui")>;
+def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
+def : InstRW<[TSV110Wr_2cyc_2LdSt], (instregex "^STN?P[SDQ]i")>;
+def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr], (instregex "^STP[SDQ](post|pre)")>;
+
+
+// ASIMD Integer Instructions
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v8i8, v4i16, v2i32
+// Q form - v16i8, v8i16, v4i32
+// D form - v1i8, v1i16, v1i32, v1i64
+// Q form - v16i8, v8i16, v4i32, v2i64
+// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
+// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
+
+// ASIMD simple arithmetic
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(ABS|ADD(P)?|NEG|SUB)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](ADD(L|LP|W)|SUB(L|W))v")>;
+
+// ASIMD complex arithmetic
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]H(ADD|SUB)v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^R?(ADD|SUB)HN2?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]Q(ADD|SUB)v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^(SU|US)QADDv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]RHADDv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ADALPv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^((SQ)(ABS|NEG))v")>;
+
+// ASIMD compare
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT|TST)v")>;
+
+// ASIMD max/min
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)P?v")>;
+
+// ASIMD logical
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(AND|BIC|BIF|BIT|BSL|EOR|MVN|NOT|ORN|ORR)v")>;
+
+// ASIMD multiply accumulate, D-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[TSV110Wr_8cyc_2FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v16i8|v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD shift
+// ASIMD shift accumulate
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(S|SR|U|UR)SRA")>;
+// ASIMD shift by immed, basic
+def : InstRW<[TSV110Wr_4cyc_1FSU1],
+ (instregex "SHLv","SLIv","SRIv","SHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+// ASIMD shift by immed, complex
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD reduction
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?Vv16i8v$")>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+
+
+// Vector - Floating Point
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v2f32
+// Q form - v4f32, v2f64
+// D form - 32, 64
+// D form - v1i32, v1i64
+// D form - v2i32
+// Q form - v4i32, v2i64
+
+// ASIMD FP sign manipulation
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FABSv")>;
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FNEGv")>;
+
+// ASIMD FP compare
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v")>;
+
+// ASIMD FP convert
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FCVT[AMNPZ][SU]v")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT(L)v")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FCVT(N|XN)v")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instregex "FDIVv2f32")>;
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[TSV110Wr_24cyc_1FSU1], (instregex "FDIVv4f32")>;
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[TSV110Wr_38cyc_1FSU1], (instregex "FDIVv2f64")>;
+
+// ASIMD FP SQRT
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTv2f32)>;
+def : InstRW<[TSV110Wr_36cyc_1FSU2], (instrs FSQRTv4f32)>;
+def : InstRW<[TSV110Wr_64cyc_1FSU2], (instrs FSQRTv2f64)>;
+
+// ASIMD FP max,min
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?v")>;
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(MAX|MIN)(NM)?Vv")>;
+
+// ASIMD FP add
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|ADDP|SUB)v")>;
+
+// ASIMD FP multiply
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FMULX?v")>;
+
+
+// ASIMD Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(CLS|CLZ|CNT)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(DUP|INS)v.+lane")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(UZP|ZIP)[12]v")>;
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^EXTv")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^XTNv")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^RBITv")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^(INS|DUP)v.+gpr")>;
+
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^[SU]MOVv")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v8i8Four")>;
+// ASIMD table lookup, Q-form
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v16i8Four")>;
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOVv")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT[AIMNPXZ]v")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[SU]CVTFv")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
+
+
+// ASIMD Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_3LdSt], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_2LdSt], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+
+// ASIMD Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_1F], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_1F], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_8cyc_1F], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+} // SchedModel = TSV110Model
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 8f814d185e85..a5bc3668ed54 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -82,7 +82,8 @@ static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
unsigned OffsetScaled = 0;
while (OffsetScaled < ObjSizeScaled) {
if (ObjSizeScaled - OffsetScaled >= 2) {
- SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
+ SDValue AddrNode =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode2, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
@@ -94,7 +95,8 @@ static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
}
if (ObjSizeScaled - OffsetScaled > 0) {
- SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
+ SDValue AddrNode =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode1, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h
deleted file mode 100644
index 24751a81797d..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h
+++ /dev/null
@@ -1,151 +0,0 @@
-//==--AArch64StackOffset.h ---------------------------------------*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the StackOffset class, which is used to
-// describe scalable and non-scalable offsets during frame lowering.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H
-#define LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H
-
-#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/TypeSize.h"
-#include <cassert>
-
-namespace llvm {
-
-/// StackOffset is a wrapper around scalable and non-scalable offsets and is
-/// used in several functions such as 'isAArch64FrameOffsetLegal' and
-/// 'emitFrameOffset()'. StackOffsets are described by MVTs, e.g.
-//
-/// StackOffset(1, MVT::nxv16i8)
-//
-/// would describe an offset as being the size of a single SVE vector.
-///
-/// The class also implements simple arithmetic (addition/subtraction) on these
-/// offsets, e.g.
-//
-/// StackOffset(1, MVT::nxv16i8) + StackOffset(1, MVT::i64)
-//
-/// describes an offset that spans the combined storage required for an SVE
-/// vector and a 64bit GPR.
-class StackOffset {
- int64_t Bytes;
- int64_t ScalableBytes;
-
- explicit operator int() const;
-
-public:
- using Part = std::pair<int64_t, MVT>;
-
- StackOffset() : Bytes(0), ScalableBytes(0) {}
-
- StackOffset(int64_t Offset, MVT::SimpleValueType T) : StackOffset() {
- assert(MVT(T).isByteSized() && "Offset type is not a multiple of bytes");
- *this += Part(Offset, T);
- }
-
- StackOffset(const StackOffset &Other)
- : Bytes(Other.Bytes), ScalableBytes(Other.ScalableBytes) {}
-
- StackOffset &operator=(const StackOffset &) = default;
-
- StackOffset &operator+=(const StackOffset::Part &Other) {
- const TypeSize Size = Other.second.getSizeInBits();
- if (Size.isScalable())
- ScalableBytes += Other.first * ((int64_t)Size.getKnownMinSize() / 8);
- else
- Bytes += Other.first * ((int64_t)Size.getFixedSize() / 8);
- return *this;
- }
-
- StackOffset &operator+=(const StackOffset &Other) {
- Bytes += Other.Bytes;
- ScalableBytes += Other.ScalableBytes;
- return *this;
- }
-
- StackOffset operator+(const StackOffset &Other) const {
- StackOffset Res(*this);
- Res += Other;
- return Res;
- }
-
- StackOffset &operator-=(const StackOffset &Other) {
- Bytes -= Other.Bytes;
- ScalableBytes -= Other.ScalableBytes;
- return *this;
- }
-
- StackOffset operator-(const StackOffset &Other) const {
- StackOffset Res(*this);
- Res -= Other;
- return Res;
- }
-
- StackOffset operator-() const {
- StackOffset Res = {};
- const StackOffset Other(*this);
- Res -= Other;
- return Res;
- }
-
- /// Returns the scalable part of the offset in bytes.
- int64_t getScalableBytes() const { return ScalableBytes; }
-
- /// Returns the non-scalable part of the offset in bytes.
- int64_t getBytes() const { return Bytes; }
-
- /// Returns the offset in parts to which this frame offset can be
- /// decomposed for the purpose of describing a frame offset.
- /// For non-scalable offsets this is simply its byte size.
- void getForFrameOffset(int64_t &NumBytes, int64_t &NumPredicateVectors,
- int64_t &NumDataVectors) const {
- assert(isValid() && "Invalid frame offset");
-
- NumBytes = Bytes;
- NumDataVectors = 0;
- NumPredicateVectors = ScalableBytes / 2;
- // This method is used to get the offsets to adjust the frame offset.
- // If the function requires ADDPL to be used and needs more than two ADDPL
- // instructions, part of the offset is folded into NumDataVectors so that it
- // uses ADDVL for part of it, reducing the number of ADDPL instructions.
- if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
- NumPredicateVectors > 62) {
- NumDataVectors = NumPredicateVectors / 8;
- NumPredicateVectors -= NumDataVectors * 8;
- }
- }
-
- void getForDwarfOffset(int64_t &ByteSized, int64_t &VGSized) const {
- assert(isValid() && "Invalid frame offset");
-
- // VGSized offsets are divided by '2', because the VG register is the
- // the number of 64bit granules as opposed to 128bit vector chunks,
- // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
- // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
- // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
- ByteSized = Bytes;
- VGSized = ScalableBytes / 2;
- }
-
- /// Returns whether the offset is known zero.
- explicit operator bool() const { return Bytes || ScalableBytes; }
-
- bool isValid() const {
- // The smallest scalable element supported by scaled SVE addressing
- // modes are predicates, which are 2 scalable bytes in size. So the scalable
- // byte offset must always be a multiple of 2.
- return ScalableBytes % 2 == 0;
- }
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index 61f27cbc3b29..ab49e0c3f937 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -59,7 +59,7 @@
using namespace llvm;
-#define DEBUG_TYPE "stack-tagging"
+#define DEBUG_TYPE "aarch64-stack-tagging"
static cl::opt<bool> ClMergeInit(
"stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
@@ -73,6 +73,10 @@ static cl::opt<bool>
static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
cl::init(40), cl::Hidden);
+static cl::opt<unsigned>
+ ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
+ cl::Hidden);
+
static const Align kTagGranuleSize = Align(16);
namespace {
@@ -103,9 +107,10 @@ public:
SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
- auto I = std::lower_bound(
- Ranges.begin(), Ranges.end(), Start,
- [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; });
+ auto I =
+ llvm::lower_bound(Ranges, Start, [](const Range &LHS, uint64_t RHS) {
+ return LHS.End <= RHS;
+ });
if (I != Ranges.end() && End > I->Start) {
// Overlap - bail.
return false;
@@ -434,7 +439,8 @@ void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
bool LittleEndian =
Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
// Current implementation of initializer merging assumes little endianness.
- if (MergeInit && !F->hasOptNone() && LittleEndian) {
+ if (MergeInit && !F->hasOptNone() && LittleEndian &&
+ Size < ClMergeInitSizeLimit) {
LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
<< ", size = " << Size << "\n");
InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
@@ -544,7 +550,6 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
SmallVector<Instruction *, 8> RetVec;
- DenseMap<Value *, AllocaInst *> AllocaForValue;
SmallVector<Instruction *, 4> UnrecognizedLifetimes;
for (auto &BB : *F) {
@@ -566,8 +571,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
auto *II = dyn_cast<IntrinsicInst>(I);
if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end)) {
- AllocaInst *AI =
- llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue);
+ AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
if (!AI) {
UnrecognizedLifetimes.push_back(I);
continue;
@@ -655,7 +659,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
IntrinsicInst *Start = Info.LifetimeStart[0];
IntrinsicInst *End = Info.LifetimeEnd[0];
uint64_t Size =
- dyn_cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
+ cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
Size = alignTo(Size, kTagGranuleSize);
tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
// We need to ensure that if we tag some object, we certainly untag it
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index 73bd434ef123..41096a961330 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -13,7 +13,6 @@
#include "AArch64InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -50,6 +49,12 @@ cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
"apply unchecked-ld-st when the target is definitely within range"),
clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
+static cl::opt<bool>
+ ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
+ cl::ZeroOrMore,
+ cl::desc("Apply first slot optimization for stack tagging "
+ "(eliminate ADDG Rt, Rn, 0, 0)."));
+
namespace {
class AArch64StackTaggingPreRA : public MachineFunctionPass {
@@ -71,6 +76,7 @@ public:
bool mayUseUncheckedLoadStore();
void uncheckUsesOf(unsigned TaggedReg, int FI);
void uncheckLoadsAndStores();
+ Optional<int> findFirstSlotCandidate();
bool runOnMachineFunction(MachineFunction &Func) override;
StringRef getPassName() const override {
@@ -197,6 +203,141 @@ void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
}
}
+struct SlotWithTag {
+ int FI;
+ int Tag;
+ SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
+ explicit SlotWithTag(const MachineInstr &MI)
+ : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
+ bool operator==(const SlotWithTag &Other) const {
+ return FI == Other.FI && Tag == Other.Tag;
+ }
+};
+
+namespace llvm {
+template <> struct DenseMapInfo<SlotWithTag> {
+ static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
+ static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
+ static unsigned getHashValue(const SlotWithTag &V) {
+ return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
+ DenseMapInfo<int>::getHashValue(V.Tag));
+ }
+ static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
+ return A == B;
+ }
+};
+} // namespace llvm
+
+static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
+ return MFI->getUseLocalStackAllocationBlock() &&
+ MFI->isObjectPreAllocated(FI);
+}
+
+// Pin one of the tagged slots to offset 0 from the tagged base pointer.
+// This would make its address available in a virtual register (IRG's def), as
+// opposed to requiring an ADDG instruction to materialize. This effectively
+// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
+// live almost everywhere anyway), and therefore needs to happen before
+// regalloc.
+Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
+ // Find the best (FI, Tag) pair to pin to offset 0.
+ // Looking at the possible uses of a tagged address, the advantage of pinning
+ // is:
+ // - COPY to physical register.
+ // Does not matter, this would trade a MOV instruction for an ADDG.
+ // - ST*G matter, but those mostly appear near the function prologue where all
+ // the tagged addresses need to be materialized anyway; also, counting ST*G
+ // uses would overweight large allocas that require more than one ST*G
+ // instruction.
+ // - Load/Store instructions in the address operand do not require a tagged
+ // pointer, so they also do not benefit. These operands have already been
+ // eliminated (see uncheckLoadsAndStores) so all remaining load/store
+ // instructions count.
+ // - Any other instruction may benefit from being pinned to offset 0.
+ LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
+ if (!ClFirstSlot)
+ return None;
+
+ DenseMap<SlotWithTag, int> RetagScore;
+ SlotWithTag MaxScoreST{-1, -1};
+ int MaxScore = -1;
+ for (auto *I : ReTags) {
+ SlotWithTag ST{*I};
+ if (isSlotPreAllocated(MFI, ST.FI))
+ continue;
+
+ Register RetagReg = I->getOperand(0).getReg();
+ if (!Register::isVirtualRegister(RetagReg))
+ continue;
+
+ int Score = 0;
+ SmallVector<Register, 8> WorkList;
+ WorkList.push_back(RetagReg);
+
+ while (!WorkList.empty()) {
+ Register UseReg = WorkList.back();
+ WorkList.pop_back();
+ for (auto &UseI : MRI->use_instructions(UseReg)) {
+ unsigned Opcode = UseI.getOpcode();
+ if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
+ Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
+ Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
+ Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
+ Opcode == AArch64::STZGloop_wback)
+ continue;
+ if (UseI.isCopy()) {
+ Register DstReg = UseI.getOperand(0).getReg();
+ if (Register::isVirtualRegister(DstReg))
+ WorkList.push_back(DstReg);
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
+ << Register::virtReg2Index(UseReg) << " in " << UseI
+ << "\n");
+ Score++;
+ }
+ }
+
+ int TotalScore = RetagScore[ST] += Score;
+ if (TotalScore > MaxScore ||
+ (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
+ MaxScore = TotalScore;
+ MaxScoreST = ST;
+ }
+ }
+
+ if (MaxScoreST.FI < 0)
+ return None;
+
+ // If FI's tag is already 0, we are done.
+ if (MaxScoreST.Tag == 0)
+ return MaxScoreST.FI;
+
+ // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
+ SlotWithTag SwapST{-1, -1};
+ for (auto *I : ReTags) {
+ SlotWithTag ST{*I};
+ if (ST.Tag == 0) {
+ SwapST = ST;
+ break;
+ }
+ }
+
+ // Swap tags between the victim and the highest scoring pair.
+ // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
+ // the highest score slot without changing anything else.
+ for (auto *&I : ReTags) {
+ SlotWithTag ST{*I};
+ MachineOperand &TagOp = I->getOperand(4);
+ if (ST == MaxScoreST) {
+ TagOp.setImm(0);
+ } else if (ST == SwapST) {
+ TagOp.setImm(MaxScoreST.Tag);
+ }
+ }
+ return MaxScoreST.FI;
+}
+
bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
MRI = &MF->getRegInfo();
@@ -225,11 +366,35 @@ bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
}
}
+ // Take over from SSP. It does nothing for tagged slots, and should not really
+ // have been enabled in the first place.
+ for (int FI : TaggedSlots)
+ MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
+
if (ReTags.empty())
return false;
if (mayUseUncheckedLoadStore())
uncheckLoadsAndStores();
+ // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
+ // If the base tagged pointer is set up to the address of this slot,
+ // the ADDG instruction can be eliminated.
+ Optional<int> BaseSlot = findFirstSlotCandidate();
+ if (BaseSlot)
+ AFI->setTaggedBasePointerIndex(*BaseSlot);
+
+ for (auto *I : ReTags) {
+ int FI = I->getOperand(1).getIndex();
+ int Tag = I->getOperand(4).getImm();
+ Register Base = I->getOperand(3).getReg();
+ if (Tag == 0 && FI == BaseSlot) {
+ BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
+ I->getOperand(0).getReg())
+ .addReg(Base);
+ I->eraseFromParent();
+ }
+ }
+
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 029535cb98b5..71b2bb196486 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -67,7 +67,7 @@ AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
if (CPUString.empty())
CPUString = "generic";
- ParseSubtargetFeatures(CPUString, FS);
+ ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
initializeProperties();
return *this;
@@ -103,19 +103,26 @@ void AArch64Subtarget::initializeProperties() {
case CortexA76:
case CortexA77:
case CortexA78:
+ case CortexA78C:
+ case CortexR82:
case CortexX1:
PrefFunctionLogAlignment = 4;
break;
case A64FX:
CacheLineSize = 256;
- PrefFunctionLogAlignment = 5;
- PrefLoopLogAlignment = 5;
+ PrefFunctionLogAlignment = 3;
+ PrefLoopLogAlignment = 2;
+ MaxInterleaveFactor = 4;
+ PrefetchDistance = 128;
+ MinPrefetchStride = 1024;
+ MaxPrefetchIterationsAhead = 4;
break;
case AppleA7:
case AppleA10:
case AppleA11:
case AppleA12:
case AppleA13:
+ case AppleA14:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;
@@ -150,6 +157,8 @@ void AArch64Subtarget::initializeProperties() {
PrefFunctionLogAlignment = 3;
break;
case NeoverseN1:
+ case NeoverseN2:
+ case NeoverseV1:
PrefFunctionLogAlignment = 4;
break;
case Saphira:
@@ -200,7 +209,7 @@ void AArch64Subtarget::initializeProperties() {
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
- : AArch64GenSubtargetInfo(TT, CPU, FS),
+ : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian),
@@ -366,3 +375,8 @@ unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const {
return (SVEVectorBitsMin / 128) * 128;
return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
}
+
+bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
+ // Prefer NEON unless larger SVE registers are available.
+ return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b111f0016948..8fe2f125982f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -45,6 +45,7 @@ public:
AppleA11,
AppleA12,
AppleA13,
+ AppleA14,
Carmel,
CortexA35,
CortexA53,
@@ -57,20 +58,24 @@ public:
CortexA76,
CortexA77,
CortexA78,
+ CortexA78C,
+ CortexR82,
CortexX1,
ExynosM3,
Falkor,
Kryo,
NeoverseE1,
NeoverseN1,
+ NeoverseN2,
+ NeoverseV1,
Saphira,
ThunderX2T99,
ThunderX,
ThunderXT81,
ThunderXT83,
ThunderXT88,
- TSV110,
- ThunderX3T110
+ ThunderX3T110,
+ TSV110
};
protected:
@@ -83,6 +88,10 @@ protected:
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
+ bool HasV8_7aOps = false;
+
+ bool HasV8_0rOps = false;
+ bool HasCONTEXTIDREL2 = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
@@ -118,14 +127,13 @@ protected:
bool HasAES = false;
// ARMv8.3 extensions
- bool HasPA = false;
+ bool HasPAuth = false;
bool HasJS = false;
bool HasCCIDX = false;
bool HasComplxNum = false;
// ARMv8.4 extensions
bool HasNV = false;
- bool HasRASv8_4 = false;
bool HasMPAM = false;
bool HasDIT = false;
bool HasTRACEV8_4 = false;
@@ -133,7 +141,7 @@ protected:
bool HasSEL2 = false;
bool HasPMU = false;
bool HasTLB_RMI = false;
- bool HasFMI = false;
+ bool HasFlagM = false;
bool HasRCPC_IMMO = false;
bool HasLSLFast = false;
@@ -162,6 +170,12 @@ protected:
bool HasFineGrainedTraps = false;
bool HasEnhancedCounterVirtualization = false;
+ // Armv8.7-A Extensions
+ bool HasXS = false;
+ bool HasWFxT = false;
+ bool HasHCX = false;
+ bool HasLS64 = false;
+
// Arm SVE2 extensions
bool HasSVE2 = false;
bool HasSVE2AES = false;
@@ -172,6 +186,9 @@ protected:
// Future architecture extensions.
bool HasETE = false;
bool HasTRBE = false;
+ bool HasBRBE = false;
+ bool HasPAUTH = false;
+ bool HasSPE_EEF = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
@@ -191,6 +208,7 @@ protected:
// Enable 64-bit vectorization in SLP.
unsigned MinVectorRegisterBitWidth = 64;
+ bool OutlineAtomics = false;
bool UseAA = false;
bool PredictableSelectIsExpensive = false;
bool BalanceFPOps = false;
@@ -203,6 +221,7 @@ protected:
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
+ bool HasCmpBccFusion = false;
bool HasFuseAddress = false;
bool HasFuseAES = false;
bool HasFuseArithmeticLogic = false;
@@ -306,6 +325,7 @@ public:
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
+ bool hasV8_0rOps() const { return HasV8_0rOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -343,6 +363,7 @@ public:
bool hasSHA3() const { return HasSHA3; }
bool hasSHA2() const { return HasSHA2; }
bool hasAES() const { return HasAES; }
+ bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; }
bool balanceFPOps() const { return BalanceFPOps; }
bool predictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
@@ -357,6 +378,7 @@ public:
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
+ bool hasCmpBccFusion() const { return HasCmpBccFusion; }
bool hasFuseAddress() const { return HasFuseAddress; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
@@ -432,6 +454,7 @@ public:
bool hasRandGen() const { return HasRandGen; }
bool hasMTE() const { return HasMTE; }
bool hasTME() const { return HasTME; }
+ bool hasPAUTH() const { return HasPAUTH; }
// Arm SVE2 extensions
bool hasSVE2AES() const { return HasSVE2AES; }
bool hasSVE2SM4() const { return HasSVE2SM4; }
@@ -461,10 +484,15 @@ public:
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
- bool isTargetILP32() const { return TargetTriple.isArch32Bit(); }
+ bool isTargetILP32() const {
+ return TargetTriple.isArch32Bit() ||
+ TargetTriple.getEnvironment() == Triple::GNUILP32;
+ }
bool useAA() const override { return UseAA; }
+ bool outlineAtomics() const { return OutlineAtomics; }
+
bool hasVH() const { return HasVH; }
bool hasPAN() const { return HasPAN; }
bool hasLOR() const { return HasLOR; }
@@ -473,22 +501,25 @@ public:
bool hasPAN_RWV() const { return HasPAN_RWV; }
bool hasCCPP() const { return HasCCPP; }
- bool hasPA() const { return HasPA; }
+ bool hasPAuth() const { return HasPAuth; }
bool hasJS() const { return HasJS; }
bool hasCCIDX() const { return HasCCIDX; }
bool hasComplxNum() const { return HasComplxNum; }
bool hasNV() const { return HasNV; }
- bool hasRASv8_4() const { return HasRASv8_4; }
bool hasMPAM() const { return HasMPAM; }
bool hasDIT() const { return HasDIT; }
bool hasTRACEV8_4() const { return HasTRACEV8_4; }
bool hasAM() const { return HasAM; }
bool hasAMVS() const { return HasAMVS; }
+ bool hasXS() const { return HasXS; }
+ bool hasWFxT() const { return HasWFxT; }
+ bool hasHCX() const { return HasHCX; }
+ bool hasLS64() const { return HasLS64; }
bool hasSEL2() const { return HasSEL2; }
bool hasPMU() const { return HasPMU; }
bool hasTLB_RMI() const { return HasTLB_RMI; }
- bool hasFMI() const { return HasFMI; }
+ bool hasFlagM() const { return HasFlagM; }
bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
bool addrSinkUsingGEPs() const override {
@@ -511,7 +542,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
@@ -550,6 +581,7 @@ public:
// implied by the architecture.
unsigned getMaxSVEVectorSizeInBits() const;
unsigned getMinSVEVectorSizeInBits() const;
+ bool useSVEForFixedLengthVectors() const;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index ceceabc6ff4e..01ac52bd875a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -32,6 +32,11 @@ def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
AssemblerPredicate<(all_of FeaturePAN_RWV),
"ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
+def HasCONTEXTIDREL2
+ : Predicate<"Subtarget->hasCONTEXTIDREL2()">,
+ AssemblerPredicate<(all_of FeatureCONTEXTIDREL2),
+ "Target contains CONTEXTIDR_EL2 RW operand">;
+
//===----------------------------------------------------------------------===//
// AT (address translate) instruction options.
//===----------------------------------------------------------------------===//
@@ -93,6 +98,21 @@ def : DB<"ld", 0xd>;
def : DB<"st", 0xe>;
def : DB<"sy", 0xf>;
+class DBnXS<string name, bits<4> encoding, bits<5> immValue> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding", "ImmValue"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<4> Encoding = encoding;
+ bits<5> ImmValue = immValue;
+ code Requires = [{ {AArch64::FeatureXS} }];
+}
+
+def : DBnXS<"oshnxs", 0x3, 0x10>;
+def : DBnXS<"nshnxs", 0x7, 0x14>;
+def : DBnXS<"ishnxs", 0xb, 0x18>;
+def : DBnXS<"synxs", 0xf, 0x1c>;
+
//===----------------------------------------------------------------------===//
// DC (data cache maintenance) instruction options.
//===----------------------------------------------------------------------===//
@@ -384,11 +404,8 @@ def : BTI<"jc", 0b11>;
// TLBI (translation lookaside buffer invalidate) instruction options.
//===----------------------------------------------------------------------===//
-class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg = 1> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2, bit needsreg> {
string Name = name;
bits<14> Encoding;
let Encoding{13-11} = op1;
@@ -396,95 +413,122 @@ class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
bit NeedsReg = needsreg;
- code Requires = [{ {} }];
+ list<string> Requires = [];
+ list<string> ExtraRequires = [];
+ code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }];
}
-def : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
-def : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
-def : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
-def : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
-def : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
-def : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
-def : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
-def : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
-def : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
-def : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
-def : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
-def : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
-def : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
-def : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
-def : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
-def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
+def TLBITable : GenericTable {
+ let FilterClass = "TLBIEntry";
+ let CppTypeName = "TLBI";
+ let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+}
+
+def lookupTLBIByName : SearchIndex {
+ let Table = TLBITable;
+ let Key = ["Name"];
+}
+
+def lookupTLBIByEncoding : SearchIndex {
+ let Table = TLBITable;
+ let Key = ["Encoding"];
+}
+
+multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2, bit needsreg = 1> {
+ def : TLBIEntry<name, op1, crn, crm, op2, needsreg>;
+ def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
+ let Encoding{7} = 1;
+ let ExtraRequires = ["AArch64::FeatureXS"];
+ }
+}
+
+defm : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
+defm : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
+defm : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
+defm : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
+defm : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
+defm : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
+defm : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
+defm : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
+defm : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
+defm : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
+defm : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
+defm : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
+defm : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
+defm : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
-let Requires = [{ {AArch64::FeatureTLB_RMI} }] in {
+let Requires = ["AArch64::FeatureTLB_RMI"] in {
// Armv8.4-A Outer Sharable TLB Maintenance instructions:
// op1 CRn CRm op2
-def : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>;
-def : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>;
-def : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>;
-def : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>;
-def : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>;
-def : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>;
-def : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>;
-def : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>;
-def : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>;
-def : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>;
-def : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
-def : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>;
-def : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>;
-def : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>;
-def : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>;
-def : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>;
+defm : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>;
+defm : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>;
+defm : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>;
+defm : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>;
+defm : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
+defm : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>;
+defm : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>;
// Armv8.4-A TLB Range Maintenance instructions:
// op1 CRn CRm op2
-def : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>;
-def : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>;
-def : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>;
-def : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>;
-def : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>;
-def : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>;
-def : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>;
-def : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>;
-def : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>;
-def : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>;
-def : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>;
-def : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>;
-def : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>;
-def : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>;
-def : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>;
-def : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>;
-def : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>;
-def : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>;
-def : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>;
-def : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>;
-def : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>;
-def : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>;
-def : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>;
-def : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>;
-def : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>;
-def : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>;
-def : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>;
-def : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>;
-def : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>;
-def : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>;
+defm : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>;
+defm : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>;
+defm : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>;
+defm : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>;
+defm : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>;
+defm : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>;
+defm : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>;
+defm : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>;
+defm : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>;
+defm : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>;
+defm : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>;
+defm : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>;
} //FeatureTLB_RMI
// Armv8.5-A Prediction Restriction by Context instruction options:
@@ -599,6 +643,7 @@ def : ROSysReg<"ID_AA64AFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b100>;
def : ROSysReg<"ID_AA64AFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b101>;
def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>;
def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>;
+def : ROSysReg<"ID_AA64ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b010>;
def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>;
def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>;
def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010>;
@@ -814,6 +859,9 @@ def : RWSysReg<"ACTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b001>;
def : RWSysReg<"ACTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b001>;
def : RWSysReg<"ACTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b001>;
def : RWSysReg<"HCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b000>;
+def : RWSysReg<"HCRX_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b010> {
+ let Requires = [{ {AArch64::FeatureHCX} }];
+}
def : RWSysReg<"SCR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b000>;
def : RWSysReg<"MDCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b001>;
def : RWSysReg<"SDER32_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b001>;
@@ -1220,7 +1268,6 @@ def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>;
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::FeatureVH} }] in {
def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>;
-def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>;
def : RWSysReg<"CNTHV_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b010>;
def : RWSysReg<"CNTHV_CTL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b001>;
@@ -1246,6 +1293,9 @@ def : RWSysReg<"CNTV_CTL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b001>;
def : RWSysReg<"CNTV_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b010>;
def : RWSysReg<"SPSR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b000>;
def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>;
+let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in {
+ def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
+}
}
// v8.2a registers
// Op0 Op1 CRn CRm Op2
@@ -1286,7 +1336,7 @@ def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>;
// v8.3a "Pointer authentication extension" registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::FeaturePA} }] in {
+let Requires = [{ {AArch64::FeaturePAuth} }] in {
def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
@@ -1328,13 +1378,11 @@ def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
// v8.4a RAS registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::FeatureRASv8_4} }] in {
def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>;
def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>;
def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>;
-} // FeatureRASv8_4
// v8.4a MPAM registers
// Op0 Op1 CRn CRm Op2
@@ -1522,6 +1570,33 @@ def : RWSysReg<"CNTPCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b101>;
def : RWSysReg<"CNTVCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b110>;
}
+// v8.7a LD64B/ST64B Accelerator Extension system register
+let Requires = [{ {AArch64::FeatureLS64} }] in
+def : RWSysReg<"ACCDATA_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b101>;
+
+// Branch Record Buffer system registers
+let Requires = [{ {AArch64::FeatureBRBE} }] in {
+def : RWSysReg<"BRBCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBCR_EL12", 0b10, 0b101, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBCR_EL2", 0b10, 0b100, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBFCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b001>;
+def : ROSysReg<"BRBIDR0_EL1", 0b10, 0b001, 0b1001, 0b0010, 0b000>;
+def : RWSysReg<"BRBINFINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b000>;
+def : RWSysReg<"BRBSRCINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b001>;
+def : RWSysReg<"BRBTGTINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b010>;
+def : RWSysReg<"BRBTS_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b010>;
+foreach n = 0-31 in {
+ defvar nb = !cast<bits<5>>(n);
+ def : ROSysReg<"BRBINF"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b00}>;
+ def : ROSysReg<"BRBSRC"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b01}>;
+ def : ROSysReg<"BRBTGT"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b10}>;
+}
+}
+
+// Statistical Profiling Extension system register
+let Requires = [{ {AArch64::FeatureSPE_EEF} }] in
+def : RWSysReg<"PMSNEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b001>;
+
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::ProcAppleA7} }] in
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index a63b9a97ada5..bec1758a931b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -148,10 +148,10 @@ static cl::opt<int> EnableGlobalISelAtO(
cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
cl::init(0));
-static cl::opt<bool> EnableSVEIntrinsicOpts(
- "aarch64-sve-intrinsic-opts", cl::Hidden,
- cl::desc("Enable SVE intrinsic opts"),
- cl::init(true));
+static cl::opt<bool>
+ EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden,
+ cl::desc("Enable SVE intrinsic opts"),
+ cl::init(true));
static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
cl::init(true), cl::Hidden);
@@ -184,6 +184,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PostLegalizerCombinerPass(*PR);
+ initializeAArch64PostLegalizerLoweringPass(*PR);
+ initializeAArch64PostSelectOptimizePass(*PR);
initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);
@@ -213,8 +215,6 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static std::string computeDataLayout(const Triple &TT,
const MCTargetOptions &Options,
bool LittleEndian) {
- if (Options.getABIName() == "ilp32")
- return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
if (TT.isOSBinFormatMachO()) {
if (TT.getArch() == Triple::aarch64_32)
return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128";
@@ -222,9 +222,16 @@ static std::string computeDataLayout(const Triple &TT,
}
if (TT.isOSBinFormatCOFF())
return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
- if (LittleEndian)
- return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
- return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
+ std::string Endian = LittleEndian ? "e" : "E";
+ std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
+ return Endian + "-m:e" + Ptr32 +
+ "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
+}
+
+static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) {
+ if (CPU.empty() && TT.isArm64e())
+ return "apple-a12";
+ return CPU;
}
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
@@ -274,7 +281,8 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
bool LittleEndian)
: LLVMTargetMachine(T,
computeDataLayout(TT, Options.MCOptions, LittleEndian),
- TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM),
+ TT, computeDefaultCPU(TT, CPU), FS, Options,
+ getEffectiveRelocModel(TT, RM),
getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
initAsmInfo();
@@ -309,6 +317,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
// MachO/CodeModel::Large, which GlobalISel does not support.
if (getOptLevel() <= EnableGlobalISelAtO &&
TT.getArch() != Triple::aarch64_32 &&
+ TT.getEnvironment() != Triple::GNUILP32 &&
!(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) {
setGlobalISel(true);
setGlobalISelAbort(GlobalISelAbortMode::Disable);
@@ -331,12 +340,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
auto &I = SubtargetMap[CPU + FS];
if (!I) {
@@ -453,7 +460,12 @@ void AArch64PassConfig::addIRPasses() {
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
- addPass(createCFGSimplificationPass(1, true, true, false, true));
+ addPass(createCFGSimplificationPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
// Run LoopDataPrefetch
//
@@ -541,13 +553,13 @@ bool AArch64PassConfig::addInstSelector() {
}
bool AArch64PassConfig::addIRTranslator() {
- addPass(new IRTranslator());
+ addPass(new IRTranslator(getOptLevel()));
return false;
}
void AArch64PassConfig::addPreLegalizeMachineIR() {
bool IsOptNone = getOptLevel() == CodeGenOpt::None;
- addPass(createAArch64PreLegalizeCombiner(IsOptNone));
+ addPass(createAArch64PreLegalizerCombiner(IsOptNone));
}
bool AArch64PassConfig::addLegalizeMachineIR() {
@@ -556,11 +568,10 @@ bool AArch64PassConfig::addLegalizeMachineIR() {
}
void AArch64PassConfig::addPreRegBankSelect() {
- // For now we don't add this to the pipeline for -O0. We could do in future
- // if we split the combines into separate O0/opt groupings.
bool IsOptNone = getOptLevel() == CodeGenOpt::None;
if (!IsOptNone)
- addPass(createAArch64PostLegalizeCombiner(IsOptNone));
+ addPass(createAArch64PostLegalizerCombiner(IsOptNone));
+ addPass(createAArch64PostLegalizerLowering());
}
bool AArch64PassConfig::addRegBankSelect() {
@@ -574,6 +585,8 @@ void AArch64PassConfig::addPreGlobalInstructionSelect() {
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64PostSelectOptimize());
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 7738a4229391..25e626134317 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -57,6 +57,12 @@ public:
SMDiagnostic &Error,
SMRange &SourceRange) const override;
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
+
private:
bool isLittle;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index cf6de797727b..7fda6b8fb602 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64ExpandImm.h"
#include "AArch64TargetTransformInfo.h"
+#include "AArch64ExpandImm.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -16,9 +16,11 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include <algorithm>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64tti"
@@ -84,7 +86,8 @@ int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -192,6 +195,10 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
return TTI::TCC_Free;
break;
+ case Intrinsic::experimental_gc_statepoint:
+ if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
}
return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
@@ -205,14 +212,43 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
+unsigned
+AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ auto *RetTy = ICA.getReturnType();
+ switch (ICA.getID()) {
+ case Intrinsic::umin:
+ case Intrinsic::umax: {
+ auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ // umin(x,y) -> sub(x,usubsat(x,y))
+ // umax(x,y) -> add(x,usubsat(y,x))
+ if (LT.second == MVT::v2i64)
+ return LT.first * 2;
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::smin:
+ case Intrinsic::smax: {
+ static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
+ MVT::v8i16, MVT::v2i32, MVT::v4i32};
+ auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
+ return LT.first;
+ break;
+ }
+ default:
+ break;
+ }
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+}
+
bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
ArrayRef<const Value *> Args) {
// A helper that returns a vector type from the given type. The number of
// elements in type Ty determine the vector width.
auto toVectorTy = [&](Type *ArgTy) {
- return FixedVectorType::get(ArgTy->getScalarType(),
- cast<FixedVectorType>(DstTy)->getNumElements());
+ return VectorType::get(ArgTy->getScalarType(),
+ cast<VectorType>(DstTy)->getElementCount());
};
// Exit early if DstTy is not a vector type whose elements are at least
@@ -261,8 +297,8 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
return false;
// Get the total number of vector elements in the legalized types.
- unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
- unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
+ unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements();
+ unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
// Return true if the legalized types have the same number of vector elements
// and the destination element type size is twice that of the source type.
@@ -270,6 +306,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
}
int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -306,7 +343,8 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
+ return AdjustCost(
+ BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
static const TypeConversionCostTblEntry
ConversionTbl[] = {
@@ -410,7 +448,8 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
- return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
+ return AdjustCost(
+ BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
}
int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
@@ -442,12 +481,14 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
// we may get the extension for free. If not, get the default cost for the
// extend.
if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
- return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
+ return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
+ CostKind);
// The destination type should be larger than the element type. If not, get
// the default cost for the extend.
- if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
- return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
+ if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
+ return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
+ CostKind);
switch (Opcode) {
default:
@@ -466,7 +507,8 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
}
// If we are unable to perform the extend for free, get the default cost.
- return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
+ return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
+ CostKind);
}
unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
@@ -602,8 +644,20 @@ int AArch64TTIImpl::getArithmeticInstrCost(
}
return Cost;
- case ISD::ADD:
case ISD::MUL:
+ if (LT.second != MVT::v2i64)
+ return (Cost + 1) * LT.first;
+ // Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive
+ // as elements are extracted from the vectors and the muls scalarized.
+ // As getScalarizationOverhead is a bit too pessimistic, we estimate the
+ // cost for a i64 vector directly here, which is:
+ // - four i64 extracts,
+ // - two i64 inserts, and
+ // - two muls.
+ // So, for a v2i64 with LT.First = 1 the cost is 8, and for a v4i64 with
+ // LT.first = 2 the cost is 16.
+ return LT.first * 8;
+ case ISD::ADD:
case ISD::XOR:
case ISD::OR:
case ISD::AND:
@@ -642,19 +696,40 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
}
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy,
+ Type *CondTy, CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
+ I);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower some vector selects well that are wider than the register
// width.
- if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
const int AmortizationCost = 20;
+
+ // If VecPred is not set, check if we can get a predicate from the context
+ // instruction, if its type matches the requested ValTy.
+ if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) {
+ CmpInst::Predicate CurrentPred;
+ if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(),
+ m_Value())))
+ VecPred = CurrentPred;
+ }
+ // Check if we have a compare/select chain that can be lowered using CMxx &
+ // BFI pair.
+ if (CmpInst::isIntPredicate(VecPred)) {
+ static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
+ MVT::v8i16, MVT::v2i32, MVT::v4i32,
+ MVT::v2i64};
+ auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
+ return LT.first;
+ }
+
static const TypeConversionCostTblEntry
VectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
@@ -674,7 +749,9 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Entry->Cost;
}
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ // The base case handles scalable vectors fine for now, since it treats the
+ // cost as 1 * legalization cost.
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
AArch64TTIImpl::TTI::MemCmpExpansionOptions
@@ -695,6 +772,30 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return Options;
}
+unsigned AArch64TTIImpl::getGatherScatterOpCost(
+ unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
+
+ if (!isa<ScalableVectorType>(DataTy))
+ return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment, CostKind, I);
+ auto *VT = cast<VectorType>(DataTy);
+ auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
+ ElementCount LegalVF = LT.second.getVectorElementCount();
+ Optional<unsigned> MaxNumVScale = getMaxVScale();
+ assert(MaxNumVScale && "Expected valid max vscale value");
+
+ unsigned MemOpCost =
+ getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
+ unsigned MaxNumElementsPerGather =
+ MaxNumVScale.getValue() * LegalVF.getKnownMinValue();
+ return LT.first * MaxNumElementsPerGather * MemOpCost;
+}
+
+bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
+ return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
+}
+
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
@@ -722,7 +823,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
return LT.first * 2 * AmortizationCost;
}
- if (Ty->isVectorTy() &&
+ if (useNeonVector(Ty) &&
cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
unsigned ProfitableNumElements;
if (Opcode == Instruction::Store)
@@ -997,11 +1098,70 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
return false;
}
+int AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwise, bool IsUnsigned,
+ TTI::TargetCostKind CostKind) {
+ if (!isa<ScalableVectorType>(Ty))
+ return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
+ CostKind);
+ assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
+ "Both vector needs to be scalable");
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ int LegalizationCost = 0;
+ if (LT.first > 1) {
+ Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
+ unsigned CmpOpcode =
+ Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
+ LegalizationCost =
+ getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind) +
+ getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ LegalizationCost *= LT.first - 1;
+ }
+
+ return LegalizationCost + /*Cost of horizontal reduction*/ 2;
+}
+
+int AArch64TTIImpl::getArithmeticReductionCostSVE(
+ unsigned Opcode, VectorType *ValTy, bool IsPairwise,
+ TTI::TargetCostKind CostKind) {
+ assert(!IsPairwise && "Cannot be pair wise to continue");
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ int LegalizationCost = 0;
+ if (LT.first > 1) {
+ Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
+ LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy, CostKind);
+ LegalizationCost *= LT.first - 1;
+ }
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+ // Add the final reduction cost for the legal horizontal reduction
+ switch (ISD) {
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::FADD:
+ return LegalizationCost + 2;
+ default:
+ // TODO: Replace for invalid when InstructionCost is used
+ // cases not supported by SVE
+ return 16;
+ }
+}
+
int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
VectorType *ValTy,
bool IsPairwiseForm,
TTI::TargetCostKind CostKind) {
+ if (isa<ScalableVectorType>(ValTy))
+ return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm,
+ CostKind);
if (IsPairwiseForm)
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
CostKind);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 1f029689a60e..7c9360ada92e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -74,7 +74,8 @@ public:
int getIntImmCost(int64_t Val);
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind);
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
@@ -96,6 +97,9 @@ public:
return 31;
}
+ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+
unsigned getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasSVE())
@@ -111,10 +115,21 @@ public:
return ST->getMinVectorRegisterBitWidth();
}
+ Optional<unsigned> getMaxVScale() const {
+ if (ST->hasSVE())
+ return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
+ return BaseT::getMaxVScale();
+ }
+
unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::TargetCostKind CostKind,
+ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
@@ -124,6 +139,14 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwise, bool IsUnsigned,
+ TTI::TargetCostKind CostKind);
+
+ int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind);
+
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
@@ -137,11 +160,13 @@ public:
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
+ bool useNeonVector(const Type *Ty) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
@@ -166,6 +191,9 @@ public:
return false;
Type *Ty = cast<ScalableVectorType>(DataType)->getElementType();
+ if (Ty->isPointerTy())
+ return true;
+
if (Ty->isBFloatTy() || Ty->isHalfTy() ||
Ty->isFloatTy() || Ty->isDoubleTy())
return true;
@@ -213,28 +241,14 @@ public:
shouldConsiderAddressTypePromotion(const Instruction &I,
bool &AllowPromotionWithoutCommonHeader);
- bool shouldExpandReduction(const IntrinsicInst *II) const {
- switch (II->getIntrinsicID()) {
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- // We don't have legalization support for ordered FP reductions.
- return !II->getFastMathFlags().allowReassoc();
-
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
- // Lowering asserts that there are no NaNs.
- return !II->getFastMathFlags().noNaNs();
-
- default:
- // Don't expand anything else, let legalization deal with it.
- return false;
- }
- }
+ bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
unsigned getGISelRematGlobalCost() const {
return 2;
}
+ bool supportsScalableVectors() const { return ST->hasSVE(); }
+
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index e72ae0e62cb7..96c50ff3f8d0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64InstPrinter.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
@@ -158,8 +159,13 @@ private:
bool parseSymbolicImmVal(const MCExpr *&ImmVal);
bool parseNeonVectorList(OperandVector &Operands);
bool parseOptionalMulOperand(OperandVector &Operands);
+ bool parseKeywordOperand(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, bool isCondCode,
bool invertCondCode);
+ bool parseImmExpr(int64_t &Out);
+ bool parseComma();
+ bool parseRegisterInRange(unsigned &Out, unsigned Base, unsigned First,
+ unsigned Last);
bool showMatchError(SMLoc Loc, unsigned ErrCode, uint64_t ErrorInfo,
OperandVector &Operands);
@@ -181,6 +187,31 @@ private:
bool parseDirectiveVariantPCS(SMLoc L);
+ bool parseDirectiveSEHAllocStack(SMLoc L);
+ bool parseDirectiveSEHPrologEnd(SMLoc L);
+ bool parseDirectiveSEHSaveR19R20X(SMLoc L);
+ bool parseDirectiveSEHSaveFPLR(SMLoc L);
+ bool parseDirectiveSEHSaveFPLRX(SMLoc L);
+ bool parseDirectiveSEHSaveReg(SMLoc L);
+ bool parseDirectiveSEHSaveRegX(SMLoc L);
+ bool parseDirectiveSEHSaveRegP(SMLoc L);
+ bool parseDirectiveSEHSaveRegPX(SMLoc L);
+ bool parseDirectiveSEHSaveLRPair(SMLoc L);
+ bool parseDirectiveSEHSaveFReg(SMLoc L);
+ bool parseDirectiveSEHSaveFRegX(SMLoc L);
+ bool parseDirectiveSEHSaveFRegP(SMLoc L);
+ bool parseDirectiveSEHSaveFRegPX(SMLoc L);
+ bool parseDirectiveSEHSetFP(SMLoc L);
+ bool parseDirectiveSEHAddFP(SMLoc L);
+ bool parseDirectiveSEHNop(SMLoc L);
+ bool parseDirectiveSEHSaveNext(SMLoc L);
+ bool parseDirectiveSEHEpilogStart(SMLoc L);
+ bool parseDirectiveSEHEpilogEnd(SMLoc L);
+ bool parseDirectiveSEHTrapFrame(SMLoc L);
+ bool parseDirectiveSEHMachineFrame(SMLoc L);
+ bool parseDirectiveSEHContext(SMLoc L);
+ bool parseDirectiveSEHClearUnwoundToCall(SMLoc L);
+
bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -200,6 +231,7 @@ private:
RegKind MatchKind);
OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
+ OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
OperandMatchResultTy tryParseMRSSystemRegister(OperandVector &Operands);
OperandMatchResultTy tryParseSysReg(OperandVector &Operands);
OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
@@ -226,6 +258,7 @@ private:
OperandMatchResultTy tryParseVectorList(OperandVector &Operands,
bool ExpectMatch = false);
OperandMatchResultTy tryParseSVEPattern(OperandVector &Operands);
+ OperandMatchResultTy tryParseGPR64x8(OperandVector &Operands);
public:
enum AArch64MatchResultTy {
@@ -238,7 +271,7 @@ public:
AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII) {
- IsILP32 = Options.getABIName() == "ilp32";
+ IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
MCAsmParserExtension::Initialize(Parser);
MCStreamer &S = getParser().getStreamer();
if (S.getTargetStreamer() == nullptr)
@@ -371,6 +404,7 @@ private:
const char *Data;
unsigned Length;
unsigned Val; // Not the enum since not all values have names.
+ bool HasnXSModifier;
};
struct SysRegOp {
@@ -540,6 +574,11 @@ public:
return StringRef(Barrier.Data, Barrier.Length);
}
+ bool getBarriernXSModifier() const {
+ assert(Kind == k_Barrier && "Invalid access!");
+ return Barrier.HasnXSModifier;
+ }
+
unsigned getReg() const override {
assert(Kind == k_Register && "Invalid access!");
return Reg.RegNum;
@@ -711,7 +750,8 @@ public:
ELFRefKind == AArch64MCExpr::VK_GOTTPREL_LO12_NC ||
ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12 ||
ELFRefKind == AArch64MCExpr::VK_SECREL_LO12 ||
- ELFRefKind == AArch64MCExpr::VK_SECREL_HI12) {
+ ELFRefKind == AArch64MCExpr::VK_SECREL_HI12 ||
+ ELFRefKind == AArch64MCExpr::VK_GOT_PAGE_LO15) {
// Note that we don't range-check the addend. It's adjusted modulo page
// size when converted, so there is no "out of range" condition when using
// @pageoff.
@@ -857,7 +897,8 @@ public:
if (!isShiftedImm() && (!isImm() || !isa<MCConstantExpr>(getImm())))
return DiagnosticPredicateTy::NoMatch;
- bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value;
+ bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value ||
+ std::is_same<int8_t, T>::value;
if (auto ShiftedImm = getShiftedVal<8>())
if (!(IsByte && ShiftedImm->second) &&
AArch64_AM::isSVECpyImm<T>(uint64_t(ShiftedImm->first)
@@ -874,7 +915,8 @@ public:
if (!isShiftedImm() && (!isImm() || !isa<MCConstantExpr>(getImm())))
return DiagnosticPredicateTy::NoMatch;
- bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value;
+ bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value ||
+ std::is_same<int8_t, T>::value;
if (auto ShiftedImm = getShiftedVal<8>())
if (!(IsByte && ShiftedImm->second) &&
AArch64_AM::isSVEAddSubImm<T>(ShiftedImm->first
@@ -999,7 +1041,12 @@ public:
AArch64_AM::getFP64Imm(getFPImm().bitcastToAPInt()) != -1;
}
- bool isBarrier() const { return Kind == k_Barrier; }
+ bool isBarrier() const {
+ return Kind == k_Barrier && !getBarriernXSModifier();
+ }
+ bool isBarriernXS() const {
+ return Kind == k_Barrier && getBarriernXSModifier();
+ }
bool isSysReg() const { return Kind == k_SysReg; }
bool isMRSSystemRegister() const {
@@ -1126,6 +1173,12 @@ public:
AArch64MCRegisterClasses[AArch64::GPR32RegClassID].contains(Reg.RegNum);
}
+ bool isGPR64x8() const {
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
+ AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID].contains(
+ Reg.RegNum);
+ }
+
bool isWSeqPair() const {
return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
@@ -1689,6 +1742,11 @@ public:
Inst.addOperand(MCOperand::createImm(getBarrier()));
}
+ void addBarriernXSOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getBarrier()));
+ }
+
void addMRSSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
@@ -1924,11 +1982,13 @@ public:
static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val,
StringRef Str,
SMLoc S,
- MCContext &Ctx) {
+ MCContext &Ctx,
+ bool HasnXSModifier) {
auto Op = std::make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
Op->Barrier.Data = Str.data();
Op->Barrier.Length = Str.size();
+ Op->Barrier.HasnXSModifier = HasnXSModifier;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -2073,14 +2133,14 @@ void AArch64Operand::print(raw_ostream &OS) const {
case k_PSBHint:
OS << getPSBHintName();
break;
+ case k_BTIHint:
+ OS << getBTIHintName();
+ break;
case k_Register:
OS << "<register " << getReg() << ">";
if (!getShiftExtendAmount() && !hasShiftExtendAmount())
break;
LLVM_FALLTHROUGH;
- case k_BTIHint:
- OS << getBTIHintName();
- break;
case k_ShiftExtend:
OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
<< getShiftExtendAmount();
@@ -2510,6 +2570,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
ELFRefKind != AArch64MCExpr::VK_ABS_PAGE_NC &&
ELFRefKind != AArch64MCExpr::VK_GOT_PAGE &&
+ ELFRefKind != AArch64MCExpr::VK_GOT_PAGE_LO15 &&
ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE &&
ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) {
// The operand must be an @page or @gotpage qualified symbolref.
@@ -2843,6 +2904,7 @@ static const struct Extension {
{"predres", {AArch64::FeaturePredRes}},
{"ccdp", {AArch64::FeatureCacheDeepPersist}},
{"mte", {AArch64::FeatureMTE}},
+ {"memtag", {AArch64::FeatureMTE}},
{"tlb-rmi", {AArch64::FeatureTLB_RMI}},
{"pan-rwv", {AArch64::FeaturePAN_RWV}},
{"ccpp", {AArch64::FeatureCCPP}},
@@ -2853,6 +2915,10 @@ static const struct Extension {
{"sve2-sm4", {AArch64::FeatureSVE2SM4}},
{"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
{"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}},
+ {"ls64", {AArch64::FeatureLS64}},
+ {"xs", {AArch64::FeatureXS}},
+ {"pauth", {AArch64::FeaturePAuth}},
+ {"flagm", {AArch64::FeatureFlagM}},
// FIXME: Unsupported extensions
{"pan", {}},
{"lor", {}},
@@ -2873,15 +2939,16 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv8.5a";
else if (FBS[AArch64::HasV8_6aOps])
Str += "ARMv8.6a";
+ else if (FBS[AArch64::HasV8_7aOps])
+ Str += "ARMv8.7a";
else {
- auto ext = std::find_if(std::begin(ExtensionMap),
- std::end(ExtensionMap),
- [&](const Extension& e)
+ SmallVector<std::string, 2> ExtMatches;
+ for (const auto& Ext : ExtensionMap) {
// Use & in case multiple features are enabled
- { return (FBS & e.Features) != FeatureBitset(); }
- );
-
- Str += ext != std::end(ExtensionMap) ? ext->Name : "(unknown)";
+ if ((FBS & Ext.Features) != FeatureBitset())
+ ExtMatches.push_back(Ext.Name);
+ }
+ Str += !ExtMatches.empty() ? llvm::join(ExtMatches, ", ") : "(unknown)";
}
}
@@ -2926,7 +2993,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (!IC)
return TokError("invalid operand for IC instruction");
else if (!IC->haveFeatures(getSTI().getFeatureBits())) {
- std::string Str("IC " + std::string(IC->Name) + " requires ");
+ std::string Str("IC " + std::string(IC->Name) + " requires: ");
setRequiredFeatureString(IC->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -2936,7 +3003,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (!DC)
return TokError("invalid operand for DC instruction");
else if (!DC->haveFeatures(getSTI().getFeatureBits())) {
- std::string Str("DC " + std::string(DC->Name) + " requires ");
+ std::string Str("DC " + std::string(DC->Name) + " requires: ");
setRequiredFeatureString(DC->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -2946,7 +3013,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (!AT)
return TokError("invalid operand for AT instruction");
else if (!AT->haveFeatures(getSTI().getFeatureBits())) {
- std::string Str("AT " + std::string(AT->Name) + " requires ");
+ std::string Str("AT " + std::string(AT->Name) + " requires: ");
setRequiredFeatureString(AT->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -2956,7 +3023,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (!TLBI)
return TokError("invalid operand for TLBI instruction");
else if (!TLBI->haveFeatures(getSTI().getFeatureBits())) {
- std::string Str("TLBI " + std::string(TLBI->Name) + " requires ");
+ std::string Str("TLBI " + std::string(TLBI->Name) + " requires: ");
setRequiredFeatureString(TLBI->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -2967,7 +3034,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
return TokError("invalid operand for prediction restriction instruction");
else if (!PRCTX->haveFeatures(getSTI().getFeatureBits())) {
std::string Str(
- Mnemonic.upper() + std::string(PRCTX->Name) + " requires ");
+ Mnemonic.upper() + std::string(PRCTX->Name) + " requires: ");
setRequiredFeatureString(PRCTX->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -3011,11 +3078,11 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
if (Mnemonic == "tsb" && Tok.isNot(AsmToken::Identifier)) {
TokError("'csync' operand expected");
return MatchOperand_ParseFail;
- // Can be either a #imm style literal or an option name
} else if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
// Immediate operand.
const MCExpr *ImmVal;
SMLoc ExprLoc = getLoc();
+ AsmToken IntTok = Tok;
if (getParser().parseExpression(ImmVal))
return MatchOperand_ParseFail;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
@@ -3023,13 +3090,22 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
Error(ExprLoc, "immediate value expected for barrier operand");
return MatchOperand_ParseFail;
}
- if (MCE->getValue() < 0 || MCE->getValue() > 15) {
+ int64_t Value = MCE->getValue();
+ if (Mnemonic == "dsb" && Value > 15) {
+ // This case is a no match here, but it might be matched by the nXS
+ // variant. Deliberately not unlex the optional '#' as it is not necessary
+ // to characterize an integer immediate.
+ Parser.getLexer().UnLex(IntTok);
+ return MatchOperand_NoMatch;
+ }
+ if (Value < 0 || Value > 15) {
Error(ExprLoc, "barrier operand out of range");
return MatchOperand_ParseFail;
}
- auto DB = AArch64DB::lookupDBByEncoding(MCE->getValue());
- Operands.push_back(AArch64Operand::CreateBarrier(
- MCE->getValue(), DB ? DB->Name : "", ExprLoc, getContext()));
+ auto DB = AArch64DB::lookupDBByEncoding(Value);
+ Operands.push_back(AArch64Operand::CreateBarrier(Value, DB ? DB->Name : "",
+ ExprLoc, getContext(),
+ false /*hasnXSModifier*/));
return MatchOperand_Success;
}
@@ -3038,9 +3114,10 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- auto TSB = AArch64TSB::lookupTSBByName(Tok.getString());
+ StringRef Operand = Tok.getString();
+ auto TSB = AArch64TSB::lookupTSBByName(Operand);
+ auto DB = AArch64DB::lookupDBByName(Operand);
// The only valid named option for ISB is 'sy'
- auto DB = AArch64DB::lookupDBByName(Tok.getString());
if (Mnemonic == "isb" && (!DB || DB->Encoding != AArch64DB::sy)) {
TokError("'sy' or #imm operand expected");
return MatchOperand_ParseFail;
@@ -3049,12 +3126,73 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
TokError("'csync' operand expected");
return MatchOperand_ParseFail;
} else if (!DB && !TSB) {
+ if (Mnemonic == "dsb") {
+ // This case is a no match here, but it might be matched by the nXS
+ // variant.
+ return MatchOperand_NoMatch;
+ }
TokError("invalid barrier option name");
return MatchOperand_ParseFail;
}
Operands.push_back(AArch64Operand::CreateBarrier(
- DB ? DB->Encoding : TSB->Encoding, Tok.getString(), getLoc(), getContext()));
+ DB ? DB->Encoding : TSB->Encoding, Tok.getString(), getLoc(),
+ getContext(), false /*hasnXSModifier*/));
+ Parser.Lex(); // Consume the option
+
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+
+ assert(Mnemonic == "dsb" && "Instruction does not accept nXS operands");
+ if (Mnemonic != "dsb")
+ return MatchOperand_ParseFail;
+
+ if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
+ // Immediate operand.
+ const MCExpr *ImmVal;
+ SMLoc ExprLoc = getLoc();
+ if (getParser().parseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ Error(ExprLoc, "immediate value expected for barrier operand");
+ return MatchOperand_ParseFail;
+ }
+ int64_t Value = MCE->getValue();
+ // v8.7-A DSB in the nXS variant accepts only the following immediate
+ // values: 16, 20, 24, 28.
+ if (Value != 16 && Value != 20 && Value != 24 && Value != 28) {
+ Error(ExprLoc, "barrier operand out of range");
+ return MatchOperand_ParseFail;
+ }
+ auto DB = AArch64DBnXS::lookupDBnXSByImmValue(Value);
+ Operands.push_back(AArch64Operand::CreateBarrier(DB->Encoding, DB->Name,
+ ExprLoc, getContext(),
+ true /*hasnXSModifier*/));
+ return MatchOperand_Success;
+ }
+
+ if (Tok.isNot(AsmToken::Identifier)) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ StringRef Operand = Tok.getString();
+ auto DB = AArch64DBnXS::lookupDBnXSByName(Operand);
+
+ if (!DB) {
+ TokError("invalid barrier option name");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(
+ AArch64Operand::CreateBarrier(DB->Encoding, Tok.getString(), getLoc(),
+ getContext(), true /*hasnXSModifier*/));
Parser.Lex(); // Consume the option
return MatchOperand_Success;
@@ -3300,6 +3438,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
.Case("tprel_lo12_nc", AArch64MCExpr::VK_TPREL_LO12_NC)
.Case("tlsdesc_lo12", AArch64MCExpr::VK_TLSDESC_LO12)
.Case("got", AArch64MCExpr::VK_GOT_PAGE)
+ .Case("gotpage_lo15", AArch64MCExpr::VK_GOT_PAGE_LO15)
.Case("got_lo12", AArch64MCExpr::VK_GOT_LO12)
.Case("gottprel", AArch64MCExpr::VK_GOTTPREL_PAGE)
.Case("gottprel_lo12", AArch64MCExpr::VK_GOTTPREL_LO12_NC)
@@ -3568,6 +3707,17 @@ bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
return Error(getLoc(), "expected 'vl' or '#<imm>'");
}
+bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ auto Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return true;
+ Operands.push_back(AArch64Operand::CreateToken(Tok.getString(), false,
+ Tok.getLoc(), getContext()));
+ Parser.Lex();
+ return false;
+}
+
/// parseOperand - Parse a arm instruction operand. For now this parses the
/// operand regardless of the mnemonic.
bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
@@ -3632,6 +3782,11 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
if (GotShift != MatchOperand_NoMatch)
return GotShift;
+ // If this is a two-word mnemonic, parse its special keyword
+ // operand as an identifier.
+ if (Mnemonic == "brb")
+ return parseKeywordOperand(Operands);
+
// This was not a register so parse other operands that start with an
// identifier (like labels) as expressions and create them as immediates.
const MCExpr *IdVal;
@@ -3740,6 +3895,66 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
}
}
+bool AArch64AsmParser::parseImmExpr(int64_t &Out) {
+ const MCExpr *Expr = nullptr;
+ SMLoc L = getLoc();
+ if (check(getParser().parseExpression(Expr), L, "expected expression"))
+ return true;
+ const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
+ if (check(!Value, L, "expected constant expression"))
+ return true;
+ Out = Value->getValue();
+ return false;
+}
+
+bool AArch64AsmParser::parseComma() {
+ if (check(getParser().getTok().isNot(AsmToken::Comma), getLoc(),
+ "expected comma"))
+ return true;
+ // Eat the comma
+ getParser().Lex();
+ return false;
+}
+
+bool AArch64AsmParser::parseRegisterInRange(unsigned &Out, unsigned Base,
+ unsigned First, unsigned Last) {
+ unsigned Reg;
+ SMLoc Start, End;
+ if (check(ParseRegister(Reg, Start, End), getLoc(), "expected register"))
+ return true;
+
+ // Special handling for FP and LR; they aren't linearly after x28 in
+ // the registers enum.
+ unsigned RangeEnd = Last;
+ if (Base == AArch64::X0) {
+ if (Last == AArch64::FP) {
+ RangeEnd = AArch64::X28;
+ if (Reg == AArch64::FP) {
+ Out = 29;
+ return false;
+ }
+ }
+ if (Last == AArch64::LR) {
+ RangeEnd = AArch64::X28;
+ if (Reg == AArch64::FP) {
+ Out = 29;
+ return false;
+ } else if (Reg == AArch64::LR) {
+ Out = 30;
+ return false;
+ }
+ }
+ }
+
+ if (check(Reg < First || Reg > RangeEnd, Start,
+ Twine("expected register in range ") +
+ AArch64InstPrinter::getRegisterName(First) + " to " +
+ AArch64InstPrinter::getRegisterName(Last)))
+ return true;
+ Out = Reg - Base;
+ return false;
+}
+
bool AArch64AsmParser::regsEqual(const MCParsedAsmOperand &Op1,
const MCParsedAsmOperand &Op2) const {
auto &AOp1 = static_cast<const AArch64Operand&>(Op1);
@@ -5058,6 +5273,7 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
const MCObjectFileInfo::Environment Format =
getContext().getObjectFileInfo()->getObjectFileType();
bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+ bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
auto IDVal = DirectiveID.getIdentifier().lower();
SMLoc Loc = DirectiveID.getLoc();
@@ -5086,6 +5302,57 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveLOH(IDVal, Loc);
else
return true;
+ } else if (IsCOFF) {
+ if (IDVal == ".seh_stackalloc")
+ parseDirectiveSEHAllocStack(Loc);
+ else if (IDVal == ".seh_endprologue")
+ parseDirectiveSEHPrologEnd(Loc);
+ else if (IDVal == ".seh_save_r19r20_x")
+ parseDirectiveSEHSaveR19R20X(Loc);
+ else if (IDVal == ".seh_save_fplr")
+ parseDirectiveSEHSaveFPLR(Loc);
+ else if (IDVal == ".seh_save_fplr_x")
+ parseDirectiveSEHSaveFPLRX(Loc);
+ else if (IDVal == ".seh_save_reg")
+ parseDirectiveSEHSaveReg(Loc);
+ else if (IDVal == ".seh_save_reg_x")
+ parseDirectiveSEHSaveRegX(Loc);
+ else if (IDVal == ".seh_save_regp")
+ parseDirectiveSEHSaveRegP(Loc);
+ else if (IDVal == ".seh_save_regp_x")
+ parseDirectiveSEHSaveRegPX(Loc);
+ else if (IDVal == ".seh_save_lrpair")
+ parseDirectiveSEHSaveLRPair(Loc);
+ else if (IDVal == ".seh_save_freg")
+ parseDirectiveSEHSaveFReg(Loc);
+ else if (IDVal == ".seh_save_freg_x")
+ parseDirectiveSEHSaveFRegX(Loc);
+ else if (IDVal == ".seh_save_fregp")
+ parseDirectiveSEHSaveFRegP(Loc);
+ else if (IDVal == ".seh_save_fregp_x")
+ parseDirectiveSEHSaveFRegPX(Loc);
+ else if (IDVal == ".seh_set_fp")
+ parseDirectiveSEHSetFP(Loc);
+ else if (IDVal == ".seh_add_fp")
+ parseDirectiveSEHAddFP(Loc);
+ else if (IDVal == ".seh_nop")
+ parseDirectiveSEHNop(Loc);
+ else if (IDVal == ".seh_save_next")
+ parseDirectiveSEHSaveNext(Loc);
+ else if (IDVal == ".seh_startepilogue")
+ parseDirectiveSEHEpilogStart(Loc);
+ else if (IDVal == ".seh_endepilogue")
+ parseDirectiveSEHEpilogEnd(Loc);
+ else if (IDVal == ".seh_trap_frame")
+ parseDirectiveSEHTrapFrame(Loc);
+ else if (IDVal == ".seh_pushframe")
+ parseDirectiveSEHMachineFrame(Loc);
+ else if (IDVal == ".seh_context")
+ parseDirectiveSEHContext(Loc);
+ else if (IDVal == ".seh_clear_unwound_to_call")
+ parseDirectiveSEHClearUnwoundToCall(Loc);
+ else
+ return true;
} else
return true;
return false;
@@ -5093,12 +5360,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
SmallVector<StringRef, 4> &RequestedExtensions) {
- const bool NoCrypto =
- (std::find(RequestedExtensions.begin(), RequestedExtensions.end(),
- "nocrypto") != std::end(RequestedExtensions));
- const bool Crypto =
- (std::find(RequestedExtensions.begin(), RequestedExtensions.end(),
- "crypto") != std::end(RequestedExtensions));
+ const bool NoCrypto = llvm::is_contained(RequestedExtensions, "nocrypto");
+ const bool Crypto = llvm::is_contained(RequestedExtensions, "crypto");
if (!NoCrypto && Crypto) {
switch (ArchKind) {
@@ -5114,6 +5377,8 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
+ case AArch64::ArchKind::ARMV8_7A:
+ case AArch64::ArchKind::ARMV8R:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
RequestedExtensions.push_back("sha2");
@@ -5134,6 +5399,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
+ case AArch64::ArchKind::ARMV8_7A:
RequestedExtensions.push_back("nosm4");
RequestedExtensions.push_back("nosha3");
RequestedExtensions.push_back("nosha2");
@@ -5167,7 +5433,8 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
MCSubtargetInfo &STI = copySTI();
std::vector<std::string> ArchFeatures(AArch64Features.begin(), AArch64Features.end());
- STI.setDefaultFeatures("generic", join(ArchFeatures.begin(), ArchFeatures.end(), ","));
+ STI.setDefaultFeatures("generic", /*TuneCPU*/ "generic",
+ join(ArchFeatures.begin(), ArchFeatures.end(), ","));
SmallVector<StringRef, 4> RequestedExtensions;
if (!ExtensionString.empty())
@@ -5269,7 +5536,7 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
}
MCSubtargetInfo &STI = copySTI();
- STI.setDefaultFeatures(CPU, "");
+ STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
CurLoc = incrementLoc(CurLoc, CPU.size());
ExpandCryptoAEK(llvm::AArch64::getCPUArchKind(CPU), RequestedExtensions);
@@ -5537,6 +5804,238 @@ bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
return false;
}
+/// parseDirectiveSEHAllocStack
+/// ::= .seh_stackalloc
+bool AArch64AsmParser::parseDirectiveSEHAllocStack(SMLoc L) {
+ int64_t Size;
+ if (parseImmExpr(Size))
+ return true;
+ getTargetStreamer().EmitARM64WinCFIAllocStack(Size);
+ return false;
+}
+
+/// parseDirectiveSEHPrologEnd
+/// ::= .seh_endprologue
+bool AArch64AsmParser::parseDirectiveSEHPrologEnd(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIPrologEnd();
+ return false;
+}
+
+/// parseDirectiveSEHSaveR19R20X
+/// ::= .seh_save_r19r20_x
+bool AArch64AsmParser::parseDirectiveSEHSaveR19R20X(SMLoc L) {
+ int64_t Offset;
+ if (parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveR19R20X(Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFPLR
+/// ::= .seh_save_fplr
+bool AArch64AsmParser::parseDirectiveSEHSaveFPLR(SMLoc L) {
+ int64_t Offset;
+ if (parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFPLR(Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFPLRX
+/// ::= .seh_save_fplr_x
+bool AArch64AsmParser::parseDirectiveSEHSaveFPLRX(SMLoc L) {
+ int64_t Offset;
+ if (parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFPLRX(Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveReg
+/// ::= .seh_save_reg
+bool AArch64AsmParser::parseDirectiveSEHSaveReg(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveReg(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveRegX
+/// ::= .seh_save_reg_x
+bool AArch64AsmParser::parseDirectiveSEHSaveRegX(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveRegX(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveRegP
+/// ::= .seh_save_regp
+bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveRegP(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveRegPX
+/// ::= .seh_save_regp_x
+bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveRegPX(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveLRPair
+/// ::= .seh_save_lrpair
+bool AArch64AsmParser::parseDirectiveSEHSaveLRPair(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ L = getLoc();
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ if (check(((Reg - 19) % 2 != 0), L,
+ "expected register with even offset from x19"))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveLRPair(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFReg
+/// ::= .seh_save_freg
+bool AArch64AsmParser::parseDirectiveSEHSaveFReg(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFReg(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFRegX
+/// ::= .seh_save_freg_x
+bool AArch64AsmParser::parseDirectiveSEHSaveFRegX(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFRegX(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFRegP
+/// ::= .seh_save_fregp
+bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFRegP(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFRegPX
+/// ::= .seh_save_fregp_x
+bool AArch64AsmParser::parseDirectiveSEHSaveFRegPX(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFRegPX(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSetFP
+/// ::= .seh_set_fp
+bool AArch64AsmParser::parseDirectiveSEHSetFP(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFISetFP();
+ return false;
+}
+
+/// parseDirectiveSEHAddFP
+/// ::= .seh_add_fp
+bool AArch64AsmParser::parseDirectiveSEHAddFP(SMLoc L) {
+ int64_t Size;
+ if (parseImmExpr(Size))
+ return true;
+ getTargetStreamer().EmitARM64WinCFIAddFP(Size);
+ return false;
+}
+
+/// parseDirectiveSEHNop
+/// ::= .seh_nop
+bool AArch64AsmParser::parseDirectiveSEHNop(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFINop();
+ return false;
+}
+
+/// parseDirectiveSEHSaveNext
+/// ::= .seh_save_next
+bool AArch64AsmParser::parseDirectiveSEHSaveNext(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFISaveNext();
+ return false;
+}
+
+/// parseDirectiveSEHEpilogStart
+/// ::= .seh_startepilogue
+bool AArch64AsmParser::parseDirectiveSEHEpilogStart(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIEpilogStart();
+ return false;
+}
+
+/// parseDirectiveSEHEpilogEnd
+/// ::= .seh_endepilogue
+bool AArch64AsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIEpilogEnd();
+ return false;
+}
+
+/// parseDirectiveSEHTrapFrame
+/// ::= .seh_trap_frame
+bool AArch64AsmParser::parseDirectiveSEHTrapFrame(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFITrapFrame();
+ return false;
+}
+
+/// parseDirectiveSEHMachineFrame
+/// ::= .seh_pushframe
+bool AArch64AsmParser::parseDirectiveSEHMachineFrame(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIMachineFrame();
+ return false;
+}
+
+/// parseDirectiveSEHContext
+/// ::= .seh_context
+bool AArch64AsmParser::parseDirectiveSEHContext(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIContext();
+ return false;
+}
+
+/// parseDirectiveSEHClearUnwoundToCall
+/// ::= .seh_clear_unwound_to_call
+bool AArch64AsmParser::parseDirectiveSEHClearUnwoundToCall(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIClearUnwoundToCall();
+ return false;
+}
+
bool
AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
@@ -5824,3 +6323,26 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
return MatchOperand_Success;
}
+
+OperandMatchResultTy
+AArch64AsmParser::tryParseGPR64x8(OperandVector &Operands) {
+ SMLoc SS = getLoc();
+
+ unsigned XReg;
+ if (tryParseScalarRegister(XReg) != MatchOperand_Success)
+ return MatchOperand_NoMatch;
+
+ MCContext &ctx = getContext();
+ const MCRegisterInfo *RI = ctx.getRegisterInfo();
+ int X8Reg = RI->getMatchingSuperReg(
+ XReg, AArch64::x8sub_0,
+ &AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID]);
+ if (!X8Reg) {
+ Error(SS, "expected an even-numbered x-register in the range [x0,x22]");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(
+ AArch64Operand::CreateReg(X8Reg, RegKind::Scalar, SS, getLoc(), ctx));
+ return MatchOperand_Success;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 1ff4abb34054..dca76f8457fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -62,6 +62,10 @@ static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
@@ -267,8 +271,16 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint32_t Insn =
(Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
- // Calling the auto-generated decoder function.
- return decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
+ const uint8_t *Tables[] = {DecoderTable32, DecoderTableFallback32};
+
+ for (auto Table : Tables) {
+ DecodeStatus Result =
+ decodeInstruction(Table, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail)
+ return Result;
+ }
+
+ return MCDisassembler::Fail;
}
static MCSymbolizer *
@@ -449,6 +461,35 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
+static const unsigned GPR64x8DecoderTable[] = {
+ AArch64::X0_X1_X2_X3_X4_X5_X6_X7,
+ AArch64::X2_X3_X4_X5_X6_X7_X8_X9,
+ AArch64::X4_X5_X6_X7_X8_X9_X10_X11,
+ AArch64::X6_X7_X8_X9_X10_X11_X12_X13,
+ AArch64::X8_X9_X10_X11_X12_X13_X14_X15,
+ AArch64::X10_X11_X12_X13_X14_X15_X16_X17,
+ AArch64::X12_X13_X14_X15_X16_X17_X18_X19,
+ AArch64::X14_X15_X16_X17_X18_X19_X20_X21,
+ AArch64::X16_X17_X18_X19_X20_X21_X22_X23,
+ AArch64::X18_X19_X20_X21_X22_X23_X24_X25,
+ AArch64::X20_X21_X22_X23_X24_X25_X26_X27,
+ AArch64::X22_X23_X24_X25_X26_X27_X28_FP,
+};
+
+static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 22)
+ return Fail;
+ if (RegNo & 1)
+ return Fail;
+
+ unsigned Register = GPR64x8DecoderTable[RegNo >> 1];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return Success;
+}
+
static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 4832ae8f415f..0f8b1d6584b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -52,10 +52,10 @@ AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
: CallLowering(&TLI) {}
namespace {
-struct IncomingArgHandler : public CallLowering::ValueHandler {
+struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
- : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -101,9 +101,7 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
/// How the physical register gets marked varies between formal
/// parameters (it's a basic-block live-in), and a call instruction
/// (it's an implicit-def of the BL).
- virtual void markPhysRegUsed(unsigned PhysReg) = 0;
-
- bool isIncomingArgumentHandler() const override { return true; }
+ virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
uint64_t StackUsed;
};
@@ -113,7 +111,7 @@ struct FormalArgHandler : public IncomingArgHandler {
CCAssignFn *AssignFn)
: IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
- void markPhysRegUsed(unsigned PhysReg) override {
+ void markPhysRegUsed(MCRegister PhysReg) override {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
@@ -124,24 +122,22 @@ struct CallReturnHandler : public IncomingArgHandler {
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
: IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
- void markPhysRegUsed(unsigned PhysReg) override {
+ void markPhysRegUsed(MCRegister PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
}
MachineInstrBuilder MIB;
};
-struct OutgoingArgHandler : public CallLowering::ValueHandler {
+struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
int FPDiff = 0)
- : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
+ : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
StackSize(0), SPReg(0) {}
- bool isIncomingArgumentHandler() const override { return false; }
-
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
MachineFunction &MF = MIRBuilder.getMF();
@@ -191,6 +187,8 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
if (!Arg.IsFixed)
MaxSize = 0;
+ assert(Arg.Regs.size() == 1);
+
Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
? extendRegister(Arg.Regs[0], VA, MaxSize)
: Arg.Regs[0];
@@ -276,6 +274,7 @@ void AArch64CallLowering::splitToValueTypes(
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI,
Register SwiftErrorVReg) const {
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
@@ -421,7 +420,7 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
// Conservatively forward X8, since it might be used for an aggregate
// return.
if (!CCInfo.isAllocated(AArch64::X8)) {
- unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+ Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
}
@@ -442,7 +441,7 @@ bool AArch64CallLowering::fallBackToDAGISel(const Function &F) const {
bool AArch64CallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
+ ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
MachineFunction &MF = MIRBuilder.getMF();
MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -624,64 +623,25 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
MachineRegisterInfo &MRI = MF.getRegInfo();
- for (unsigned i = 0; i < OutLocs.size(); ++i) {
- auto &ArgLoc = OutLocs[i];
- // If it's not a register, it's fine.
- if (!ArgLoc.isRegLoc()) {
- if (Info.IsVarArg) {
- // Be conservative and disallow variadic memory operands to match SDAG's
- // behaviour.
- // FIXME: If the caller's calling convention is C, then we can
- // potentially use its argument area. However, for cases like fastcc,
- // we can't do anything.
- LLVM_DEBUG(
- dbgs()
- << "... Cannot tail call vararg function with stack arguments\n");
- return false;
- }
- continue;
- }
-
- Register Reg = ArgLoc.getLocReg();
-
- // Only look at callee-saved registers.
- if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
- continue;
-
- LLVM_DEBUG(
- dbgs()
- << "... Call has an argument passed in a callee-saved register.\n");
-
- // Check if it was copied from.
- ArgInfo &OutInfo = OutArgs[i];
-
- if (OutInfo.Regs.size() > 1) {
- LLVM_DEBUG(
- dbgs() << "... Cannot handle arguments in multiple registers.\n");
- return false;
- }
+ if (Info.IsVarArg) {
+ // Be conservative and disallow variadic memory operands to match SDAG's
+ // behaviour.
+ // FIXME: If the caller's calling convention is C, then we can
+ // potentially use its argument area. However, for cases like fastcc,
+ // we can't do anything.
+ for (unsigned i = 0; i < OutLocs.size(); ++i) {
+ auto &ArgLoc = OutLocs[i];
+ if (ArgLoc.isRegLoc())
+ continue;
- // Check if we copy the register, walking through copies from virtual
- // registers. Note that getDefIgnoringCopies does not ignore copies from
- // physical registers.
- MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
- if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
LLVM_DEBUG(
dbgs()
- << "... Parameter was not copied into a VReg, cannot tail call.\n");
- return false;
- }
-
- // Got a copy. Verify that it's the same as the register we want.
- Register CopyRHS = RegDef->getOperand(1).getReg();
- if (CopyRHS != Reg) {
- LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
- "VReg, cannot tail call.\n");
+ << "... Cannot tail call vararg function with stack arguments\n");
return false;
}
}
- return true;
+ return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
}
bool AArch64CallLowering::isEligibleForTailCallOptimization(
@@ -796,7 +756,7 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
// When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
// x16 or x17.
- if (CallerF.getFunction().hasFnAttribute("branch-target-enforcement"))
+ if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
return AArch64::TCRETURNriBTI;
return AArch64::TCRETURNri;
@@ -816,7 +776,7 @@ bool AArch64CallLowering::lowerTailCall(
// TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
// register class. Until we can do that, we should fall back here.
- if (F.hasFnAttribute("branch-target-enforcement")) {
+ if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
LLVM_DEBUG(
dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
return false;
@@ -934,10 +894,9 @@ bool AArch64CallLowering::lowerTailCall(
// If Callee is a reg, since it is used by a target specific instruction,
// it must have a register class matching the constraint of that instruction.
if (Info.Callee.isReg())
- MIB->getOperand(0).setReg(constrainOperandRegClass(
- MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
- *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
- 0));
+ constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
+ *MF.getSubtarget().getRegBankInfo(), *MIB,
+ MIB->getDesc(), Info.Callee, 0);
MF.getFrameInfo().setHasTailCall();
Info.LoweredTailCall = true;
@@ -1019,10 +978,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// instruction, it must have a register class matching the
// constraint of that instruction.
if (Info.Callee.isReg())
- MIB->getOperand(0).setReg(constrainOperandRegClass(
- MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
- *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
- 0));
+ constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
+ *MF.getSubtarget().getRegBankInfo(), *MIB,
+ MIB->getDesc(), Info.Callee, 0);
// Finally we can copy the returned value back into its virtual-register. In
// symmetry with the arguments, the physical register must be an
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
index 640a86253059..1f45c9ebc048 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -34,13 +34,14 @@ public:
AArch64CallLowering(const AArch64TargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs,
+ ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
Register SwiftErrorVReg) const override;
bool fallBackToDAGISel(const Function &F) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
new file mode 100644
index 000000000000..bed1136c7a67
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
@@ -0,0 +1,29 @@
+//===- AArch64GlobalISelUtils.h ----------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file APIs for AArch64-specific helper functions used in the GlobalISel
+/// pipeline.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
+#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
+
+#include <cstdint>
+
+namespace llvm {
+namespace AArch64GISelUtils {
+
+/// \returns true if \p C is a legal immediate operand for an arithmetic
+/// instruction.
+constexpr bool isLegalArithImmed(const uint64_t C) {
+ return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
+}
+
+} // namespace AArch64GISelUtils
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 7733fe7f7b24..fc5ef02e8457 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,6 +18,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@@ -33,14 +34,18 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "aarch64-isel"
using namespace llvm;
+using namespace MIPatternMatch;
namespace {
@@ -98,15 +103,23 @@ private:
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
- int64_t CmpConstant,
- const CmpInst::Predicate &Pred,
+ ///@{
+ /// Helper functions for selectCompareBranch.
+ bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
+ MachineIRBuilder &MIB) const;
+ bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
+ MachineIRBuilder &MIB) const;
+ bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
+ MachineIRBuilder &MIB) const;
+ bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
+ ///@}
+
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
// Helper to generate an equivalent of scalar_to_vector into a new register,
@@ -147,6 +160,7 @@ private:
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
@@ -159,20 +173,72 @@ private:
MachineIRBuilder &MIRBuilder) const;
// Emit an integer compare between LHS and RHS, which checks for Predicate.
- //
- // This returns the produced compare instruction, and the predicate which
- // was ultimately used in the compare. The predicate may differ from what
- // is passed in \p Predicate due to optimization.
- std::pair<MachineInstr *, CmpInst::Predicate>
- emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
- MachineOperand &Predicate,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
+ MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+ MachineOperand &Predicate,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit a floating point comparison between \p LHS and \p RHS.
+ /// \p Pred if given is the intended predicate to use.
+ MachineInstr *emitFPCompare(Register LHS, Register RHS,
+ MachineIRBuilder &MIRBuilder,
+ Optional<CmpInst::Predicate> = None) const;
+
+ MachineInstr *emitInstr(unsigned Opcode,
+ std::initializer_list<llvm::DstOp> DstOps,
+ std::initializer_list<llvm::SrcOp> SrcOps,
+ MachineIRBuilder &MIRBuilder,
+ const ComplexRendererFns &RenderFns = None) const;
+ /// Helper function to emit an add or sub instruction.
+ ///
+ /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
+ /// in a specific order.
+ ///
+ /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
+ ///
+ /// \code
+ /// const std::array<std::array<unsigned, 2>, 4> Table {
+ /// {{AArch64::ADDXri, AArch64::ADDWri},
+ /// {AArch64::ADDXrs, AArch64::ADDWrs},
+ /// {AArch64::ADDXrr, AArch64::ADDWrr},
+ /// {AArch64::SUBXri, AArch64::SUBWri},
+ /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
+ /// \endcode
+ ///
+ /// Each row in the table corresponds to a different addressing mode. Each
+ /// column corresponds to a different register size.
+ ///
+ /// \attention Rows must be structured as follows:
+ /// - Row 0: The ri opcode variants
+ /// - Row 1: The rs opcode variants
+ /// - Row 2: The rr opcode variants
+ /// - Row 3: The ri opcode variants for negative immediates
+ /// - Row 4: The rx opcode variants
+ ///
+ /// \attention Columns must be structured as follows:
+ /// - Column 0: The 64-bit opcode variants
+ /// - Column 1: The 32-bit opcode variants
+ ///
+ /// \p Dst is the destination register of the binop to emit.
+ /// \p LHS is the left-hand operand of the binop to emit.
+ /// \p RHS is the right-hand operand of the binop to emit.
+ MachineInstr *emitAddSub(
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
+ Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
+ MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitTST(const Register &LHS, const Register &RHS,
+ MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
+ AArch64CC::CondCode CC,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
@@ -184,9 +250,24 @@ private:
MachineInstr *emitFMovForFConstant(MachineInstr &MI,
MachineRegisterInfo &MRI) const;
- /// Emit a CSet for a compare.
+ /// Emit a CSet for an integer compare.
+ ///
+ /// \p DefReg is expected to be a 32-bit scalar register.
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder) const;
+ /// Emit a CSet for a FP compare.
+ ///
+ /// \p Dst is expected to be a 32-bit scalar register.
+ MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit the overflow op for \p Opcode.
+ ///
+ /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
+ /// G_USUBO, etc.
+ std::pair<MachineInstr *, AArch64CC::CondCode>
+ emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
/// \p IsNegative is true if the test should be "not zero".
@@ -195,6 +276,11 @@ private:
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
+ /// Emit a CB(N)Z instruction which branches to \p DestMBB.
+ MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const;
+
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
// We use these manually instead of using the importer since it doesn't
// support SDNodeXForm.
@@ -316,13 +402,6 @@ private:
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
- MachineOperand &RHS,
- CmpInst::Predicate &Predicate,
- MachineIRBuilder &MIB) const;
- MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIB) const;
/// Return true if \p MI is a load or store of \p NumBytes bytes.
bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
@@ -498,7 +577,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
- Immed = ValAndVReg->Value;
+ Immed = ValAndVReg->Value.getSExtValue();
} else
return None;
return Immed;
@@ -786,6 +865,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
#ifndef NDEBUG
ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
assert(ValidCopy && "Invalid copy.");
+ (void)KnownValid;
#endif
return ValidCopy;
};
@@ -932,44 +1012,173 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
return GenericOpc;
}
-static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
- const RegisterBankInfo &RBI) {
- const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
- AArch64::GPRRegBankID);
- LLT Ty = MRI.getType(I.getOperand(0).getReg());
- if (Ty == LLT::scalar(32))
- return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
- else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
- return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
- return 0;
-}
+MachineInstr *
+AArch64InstructionSelector::emitSelect(Register Dst, Register True,
+ Register False, AArch64CC::CondCode CC,
+ MachineIRBuilder &MIB) const {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
+ RBI.getRegBank(True, MRI, TRI)->getID() &&
+ "Expected both select operands to have the same regbank?");
+ LLT Ty = MRI.getType(True);
+ if (Ty.isVector())
+ return nullptr;
+ const unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) &&
+ "Expected 32 bit or 64 bit select only?");
+ const bool Is32Bit = Size == 32;
+ if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
+ unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
+ auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
+ constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
+ return &*FCSel;
+ }
+
+ // By default, we'll try and emit a CSEL.
+ unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
+ bool Optimized = false;
+ auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
+ &Optimized](Register &Reg, Register &OtherReg,
+ bool Invert) {
+ if (Optimized)
+ return false;
-/// Helper function to select the opcode for a G_FCMP.
-static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
- // If this is a compare against +0.0, then we don't have to explicitly
- // materialize a constant.
- const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
- bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
- unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
- if (OpSize != 32 && OpSize != 64)
- return 0;
- unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
- {AArch64::FCMPSri, AArch64::FCMPDri}};
- return CmpOpcTbl[ShouldUseImm][OpSize == 64];
-}
+ // Attempt to fold:
+ //
+ // %sub = G_SUB 0, %x
+ // %select = G_SELECT cc, %reg, %sub
+ //
+ // Into:
+ // %select = CSNEG %reg, %x, cc
+ Register MatchReg;
+ if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
+ Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
+ // Attempt to fold:
+ //
+ // %xor = G_XOR %x, -1
+ // %select = G_SELECT cc, %reg, %xor
+ //
+ // Into:
+ // %select = CSINV %reg, %x, cc
+ if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
+ // Attempt to fold:
+ //
+ // %add = G_ADD %x, 1
+ // %select = G_SELECT cc, %reg, %add
+ //
+ // Into:
+ // %select = CSINC %reg, %x, cc
+ if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
-/// Returns true if \p P is an unsigned integer comparison predicate.
-static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
- switch (P) {
- default:
return false;
- case CmpInst::ICMP_UGT:
- case CmpInst::ICMP_UGE:
- case CmpInst::ICMP_ULT:
- case CmpInst::ICMP_ULE:
- return true;
- }
+ };
+
+ // Helper lambda which tries to use CSINC/CSINV for the instruction when its
+ // true/false values are constants.
+ // FIXME: All of these patterns already exist in tablegen. We should be
+ // able to import these.
+ auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
+ &Optimized]() {
+ if (Optimized)
+ return false;
+ auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
+ auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
+ if (!TrueCst && !FalseCst)
+ return false;
+
+ Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
+ if (TrueCst && FalseCst) {
+ int64_t T = TrueCst->Value.getSExtValue();
+ int64_t F = FalseCst->Value.getSExtValue();
+
+ if (T == 0 && F == 1) {
+ // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ True = ZReg;
+ False = ZReg;
+ return true;
+ }
+
+ if (T == 0 && F == -1) {
+ // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ True = ZReg;
+ False = ZReg;
+ return true;
+ }
+ }
+
+ if (TrueCst) {
+ int64_t T = TrueCst->Value.getSExtValue();
+ if (T == 1) {
+ // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ True = False;
+ False = ZReg;
+ CC = AArch64CC::getInvertedCondCode(CC);
+ return true;
+ }
+
+ if (T == -1) {
+ // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ True = False;
+ False = ZReg;
+ CC = AArch64CC::getInvertedCondCode(CC);
+ return true;
+ }
+ }
+
+ if (FalseCst) {
+ int64_t F = FalseCst->Value.getSExtValue();
+ if (F == 1) {
+ // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ False = ZReg;
+ return true;
+ }
+
+ if (F == -1) {
+ // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ False = ZReg;
+ return true;
+ }
+ }
+ return false;
+ };
+
+ Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
+ Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
+ Optimized |= TryOptSelectCst();
+ auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
+ constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
+ return &*SelectInst;
}
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
@@ -1099,7 +1308,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
}
if (VRegAndVal)
- C = VRegAndVal->Value;
+ C = VRegAndVal->Value.getSExtValue();
break;
}
case TargetOpcode::G_ASHR:
@@ -1109,7 +1318,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
auto VRegAndVal =
getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (VRegAndVal)
- C = VRegAndVal->Value;
+ C = VRegAndVal->Value.getSExtValue();
break;
}
}
@@ -1211,8 +1420,9 @@ MachineInstr *AArch64InstructionSelector::emitTestBit(
}
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
- MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
- MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
+ MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
+ MachineIRBuilder &MIB) const {
+ assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
// Given something like this:
//
// %x = ...Something...
@@ -1230,65 +1440,96 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
//
// TBNZ %x %bb.3
//
- if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
- return false;
-
- // Need to be comparing against 0 to fold.
- if (CmpConstant != 0)
- return false;
-
- MachineRegisterInfo &MRI = *MIB.getMRI();
-
- // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
- // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
- // so folding would be redundant.
- if (Pred != CmpInst::Predicate::ICMP_EQ &&
- Pred != CmpInst::Predicate::ICMP_NE)
- return false;
// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
- auto MaybeBit =
- getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
- if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
+ auto MaybeBit = getConstantVRegValWithLookThrough(
+ AndInst.getOperand(2).getReg(), *MIB.getMRI());
+ if (!MaybeBit)
+ return false;
+
+ int32_t Bit = MaybeBit->Value.exactLogBase2();
+ if (Bit < 0)
return false;
- uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
- Register TestReg = AndInst->getOperand(1).getReg();
- bool Invert = Pred == CmpInst::Predicate::ICMP_NE;
+ Register TestReg = AndInst.getOperand(1).getReg();
// Emit a TB(N)Z.
emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
return true;
}
-bool AArch64InstructionSelector::selectCompareBranch(
- MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
+ bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const {
+ assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
+ AArch64::GPRRegBankID &&
+ "Expected GPRs only?");
+ auto Ty = MRI.getType(CompareReg);
+ unsigned Width = Ty.getSizeInBits();
+ assert(!Ty.isVector() && "Expected scalar only?");
+ assert(Width <= 64 && "Expected width to be at most 64?");
+ static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
+ {AArch64::CBNZW, AArch64::CBNZX}};
+ unsigned Opc = OpcTable[IsNegative][Width == 64];
+ auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
+ constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
+ return &*BranchMI;
+}
- const Register CondReg = I.getOperand(0).getReg();
+bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
+ MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
+ assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
+ // totally clean. Some of them require two branches to implement.
+ auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
+ emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
+ Pred);
+ AArch64CC::CondCode CC1, CC2;
+ changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- MachineInstr *CCMI = MRI.getVRegDef(CondReg);
- if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
- CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
- if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
+ if (CC2 != AArch64CC::AL)
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
+ MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
+ assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
+ //
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ if (!ProduceNonFlagSettingCondBr)
return false;
- Register LHS = CCMI->getOperand(2).getReg();
- Register RHS = CCMI->getOperand(3).getReg();
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ auto Pred =
+ static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
+ Register LHS = ICmp.getOperand(2).getReg();
+ Register RHS = ICmp.getOperand(3).getReg();
+
+ // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- MachineIRBuilder MIB(I);
- CmpInst::Predicate Pred =
- (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
- MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
+ MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
// When we can emit a TB(N)Z, prefer that.
//
// Handle non-commutative condition codes first.
// Note that we don't want to do this when we have a G_AND because it can
// become a tst. The tst will make the test bit in the TB(N)Z redundant.
- if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
- int64_t C = VRegAndVal->Value;
+ if (VRegAndVal && !AndInst) {
+ int64_t C = VRegAndVal->Value.getSExtValue();
// When we have a greater-than comparison, we can just test if the msb is
// zero.
@@ -1309,54 +1550,97 @@ bool AArch64InstructionSelector::selectCompareBranch(
}
}
- if (!VRegAndVal) {
- std::swap(RHS, LHS);
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- LHSMI = getDefIgnoringCopies(LHS, MRI);
+ // Attempt to handle commutative condition codes. Right now, that's only
+ // eq/ne.
+ if (ICmpInst::isEquality(Pred)) {
+ if (!VRegAndVal) {
+ std::swap(RHS, LHS);
+ VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
+ }
+
+ if (VRegAndVal && VRegAndVal->Value == 0) {
+ // If there's a G_AND feeding into this branch, try to fold it away by
+ // emitting a TB(N)Z instead.
+ //
+ // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
+ // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
+ // would be redundant.
+ if (AndInst &&
+ tryOptAndIntoCompareBranch(
+ *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
+ I.eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, try to emit a CB(N)Z instead.
+ auto LHSTy = MRI.getType(LHS);
+ if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
+ emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
+ I.eraseFromParent();
+ return true;
+ }
+ }
}
- if (!VRegAndVal || VRegAndVal->Value != 0) {
- // If we can't select a CBZ then emit a cmp + Bcc.
- MachineInstr *Cmp;
- std::tie(Cmp, Pred) = emitIntegerCompare(
- CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB);
- if (!Cmp)
- return false;
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
- I.eraseFromParent();
+ return false;
+}
+
+bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
+ MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
+ assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
return true;
+
+ // Couldn't optimize. Emit a compare + a Bcc.
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ auto PredOp = ICmp.getOperand(1);
+ emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
+ const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
+ static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectCompareBranch(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ Register CondReg = I.getOperand(0).getReg();
+ MachineInstr *CCMI = MRI.getVRegDef(CondReg);
+ if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
+ CondReg = CCMI->getOperand(1).getReg();
+ CCMI = MRI.getVRegDef(CondReg);
}
- // Try to emit a TB(N)Z for an eq or ne condition.
- if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
- MIB)) {
+ // Try to select the G_BRCOND using whatever is feeding the condition if
+ // possible.
+ MachineIRBuilder MIB(I);
+ unsigned CCMIOpc = CCMI->getOpcode();
+ if (CCMIOpc == TargetOpcode::G_FCMP)
+ return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
+ if (CCMIOpc == TargetOpcode::G_ICMP)
+ return selectCompareBranchFedByICmp(I, *CCMI, MIB);
+
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ if (ProduceNonFlagSettingCondBr) {
+ emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
+ I.getOperand(1).getMBB(), MIB);
I.eraseFromParent();
return true;
}
- const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
- if (RB.getID() != AArch64::GPRRegBankID)
- return false;
- if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
- return false;
-
- const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
- unsigned CBOpc = 0;
- if (CmpWidth <= 32)
- CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
- else if (CmpWidth == 64)
- CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
- else
- return false;
-
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
- .addUse(LHS)
- .addMBB(DestMBB)
- .constrainAllUses(TII, TRI, RBI);
-
+ // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
+ auto TstMI =
+ MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ auto Bcc = MIB.buildInstr(AArch64::Bcc)
+ .addImm(AArch64CC::EQ)
+ .addMBB(I.getOperand(1).getMBB());
I.eraseFromParent();
- return true;
+ return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
}
/// Returns the element immediate value of a vector shift operand if found.
@@ -1377,8 +1661,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
return None;
if (Idx == 1)
- ImmVal = VRegAndVal->Value;
- if (ImmVal != VRegAndVal->Value)
+ ImmVal = VRegAndVal->Value.getSExtValue();
+ if (ImmVal != VRegAndVal->Value.getSExtValue())
return None;
}
@@ -1441,6 +1725,14 @@ bool AArch64InstructionSelector::selectVectorSHL(
Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
} else if (Ty == LLT::vector(2, 32)) {
Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
+ } else if (Ty == LLT::vector(4, 16)) {
+ Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
+ } else if (Ty == LLT::vector(8, 16)) {
+ Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
+ } else if (Ty == LLT::vector(16, 8)) {
+ Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
+ } else if (Ty == LLT::vector(8, 8)) {
+ Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
return false;
@@ -1457,9 +1749,10 @@ bool AArch64InstructionSelector::selectVectorSHL(
return true;
}
-bool AArch64InstructionSelector::selectVectorASHR(
+bool AArch64InstructionSelector::selectVectorAshrLshr(
MachineInstr &I, MachineRegisterInfo &MRI) const {
- assert(I.getOpcode() == TargetOpcode::G_ASHR);
+ assert(I.getOpcode() == TargetOpcode::G_ASHR ||
+ I.getOpcode() == TargetOpcode::G_LSHR);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
@@ -1468,25 +1761,40 @@ bool AArch64InstructionSelector::selectVectorASHR(
if (!Ty.isVector())
return false;
+ bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
+
+ // We expect the immediate case to be lowered in the PostLegalCombiner to
+ // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
+
// There is not a shift right register instruction, but the shift left
// register instruction takes a signed value, where negative numbers specify a
// right shift.
unsigned Opc = 0;
unsigned NegOpc = 0;
- const TargetRegisterClass *RC = nullptr;
+ const TargetRegisterClass *RC =
+ getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
if (Ty == LLT::vector(2, 64)) {
- Opc = AArch64::SSHLv2i64;
+ Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
NegOpc = AArch64::NEGv2i64;
- RC = &AArch64::FPR128RegClass;
} else if (Ty == LLT::vector(4, 32)) {
- Opc = AArch64::SSHLv4i32;
+ Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
NegOpc = AArch64::NEGv4i32;
- RC = &AArch64::FPR128RegClass;
} else if (Ty == LLT::vector(2, 32)) {
- Opc = AArch64::SSHLv2i32;
+ Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
NegOpc = AArch64::NEGv2i32;
- RC = &AArch64::FPR64RegClass;
+ } else if (Ty == LLT::vector(4, 16)) {
+ Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
+ NegOpc = AArch64::NEGv4i16;
+ } else if (Ty == LLT::vector(8, 16)) {
+ Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
+ NegOpc = AArch64::NEGv8i16;
+ } else if (Ty == LLT::vector(16, 8)) {
+ Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
+ NegOpc = AArch64::NEGv16i8;
+ } else if (Ty == LLT::vector(8, 8)) {
+ Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
+ NegOpc = AArch64::NEGv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
return false;
@@ -1569,7 +1877,6 @@ void AArch64InstructionSelector::materializeLargeCMVal(
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
- return;
}
bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
@@ -1624,6 +1931,40 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MRI.setType(DstReg, LLT::scalar(64));
return true;
}
+ case AArch64::G_DUP: {
+ // Convert the type from p0 to s64 to help selection.
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (!DstTy.getElementType().isPointer())
+ return false;
+ MachineIRBuilder MIB(I);
+ auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
+ MRI.setType(I.getOperand(0).getReg(),
+ DstTy.changeElementType(LLT::scalar(64)));
+ MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+ I.getOperand(1).setReg(NewSrc.getReg(0));
+ return true;
+ }
+ case TargetOpcode::G_UITOFP:
+ case TargetOpcode::G_SITOFP: {
+ // If both source and destination regbanks are FPR, then convert the opcode
+ // to G_SITOF so that the importer can select it to an fpr variant.
+ // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
+ // copy.
+ Register SrcReg = I.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+ return false;
+
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
+ if (I.getOpcode() == TargetOpcode::G_SITOFP)
+ I.setDesc(TII.get(AArch64::G_SITOF));
+ else
+ I.setDesc(TII.get(AArch64::G_UITOF));
+ return true;
+ }
+ return false;
+ }
default:
return false;
}
@@ -1664,6 +2005,14 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
return false;
}
+
+ // Also take the opportunity here to try to do some optimization.
+ // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
+ Register NegatedReg;
+ if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
+ return true;
+ I.getOperand(2).setReg(NegatedReg);
+ I.setDesc(TII.get(TargetOpcode::G_SUB));
return true;
}
@@ -1753,6 +2102,17 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
+ case TargetOpcode::G_BR: {
+ // If the branch jumps to the fallthrough block, don't bother emitting it.
+ // Only do this for -O0 for a good code size improvement, because when
+ // optimizations are enabled we want to leave this choice to
+ // MachineBlockPlacement.
+ bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
+ if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
@@ -1872,48 +2232,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
MachineIRBuilder MIB(I);
switch (Opcode) {
- case TargetOpcode::G_BRCOND: {
- if (Ty.getSizeInBits() > 32) {
- // We shouldn't need this on AArch64, but it would be implemented as an
- // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
- // bit being tested is < 32.
- LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
- << ", expected at most 32-bits");
- return false;
- }
-
- const Register CondReg = I.getOperand(0).getReg();
- MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
-
- // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
- // instructions will not be produced, as they are conditional branch
- // instructions that do not set flags.
- if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
- return true;
-
- if (ProduceNonFlagSettingCondBr) {
- auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
- .addUse(CondReg)
- .addImm(/*bit offset=*/0)
- .addMBB(DestMBB);
-
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
- } else {
- auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
- .addDef(AArch64::WZR)
- .addUse(CondReg)
- .addImm(1);
- constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
- auto Bcc =
- BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
- .addImm(AArch64CC::EQ)
- .addMBB(DestMBB);
-
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
- }
- }
+ case TargetOpcode::G_BRCOND:
+ return selectCompareBranch(I, MF, MRI);
case TargetOpcode::G_BRINDIRECT: {
I.setDesc(TII.get(AArch64::BR));
@@ -1993,6 +2313,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
+ const LLT s128 = LLT::scalar(128);
const LLT p0 = LLT::pointer(0, 64);
const Register DefReg = I.getOperand(0).getReg();
@@ -2002,10 +2323,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// FIXME: Redundant check, but even less readable when factored out.
if (isFP) {
- if (Ty != s32 && Ty != s64) {
+ if (Ty != s32 && Ty != s64 && Ty != s128) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant, expected: " << s32 << " or " << s64
- << '\n');
+ << " or " << s128 << '\n');
return false;
}
@@ -2018,7 +2339,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// The case when we have 0.0 is covered by tablegen. Reject it here so we
// can be sure tablegen works correctly and isn't rescued by this code.
- if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
+ // 0.0 is not covered by tablegen for FP128. So we will handle this
+ // scenario in the code here.
+ if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
return false;
} else {
// s32 and s64 are covered by tablegen.
@@ -2045,15 +2368,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Either emit a FMOV, or emit a copy to emit a normal mov.
const TargetRegisterClass &GPRRC =
DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
- const TargetRegisterClass &FPRRC =
- DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
+ const TargetRegisterClass &FPRRC =
+ DefSize == 32 ? AArch64::FPR32RegClass
+ : (DefSize == 64 ? AArch64::FPR64RegClass
+ : AArch64::FPR128RegClass);
// Can we use a FMOV instruction to represent the immediate?
if (emitFMovForFConstant(I, MRI))
return true;
// For 64b values, emit a constant pool load instead.
- if (DefSize == 64) {
+ if (DefSize == 64 || DefSize == 128) {
auto *FPImm = I.getOperand(1).getFPImm();
MachineIRBuilder MIB(I);
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
@@ -2246,21 +2571,22 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto &MemOp = **I.memoperands_begin();
+ uint64_t MemSizeInBytes = MemOp.getSize();
if (MemOp.isAtomic()) {
// For now we just support s8 acquire loads to be able to compile stack
// protector code.
if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
- MemOp.getSize() == 1) {
+ MemSizeInBytes == 1) {
I.setDesc(TII.get(AArch64::LDARB));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
return false;
}
- unsigned MemSizeInBits = MemOp.getSize() * 8;
+ unsigned MemSizeInBits = MemSizeInBytes * 8;
- const Register PtrReg = I.getOperand(1).getReg();
#ifndef NDEBUG
+ const Register PtrReg = I.getOperand(1).getReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
// Sanity-check the pointer register.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
@@ -2272,68 +2598,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const Register ValReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
- const unsigned NewOpc =
- selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
- if (NewOpc == I.getOpcode())
- return false;
-
- I.setDesc(TII.get(NewOpc));
-
- uint64_t Offset = 0;
- auto *PtrMI = MRI.getVRegDef(PtrReg);
-
- // Try to fold a GEP into our unsigned immediate addressing mode.
- if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
- if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
- int64_t Imm = *COff;
- const unsigned Size = MemSizeInBits / 8;
- const unsigned Scale = Log2_32(Size);
- if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
- Register Ptr2Reg = PtrMI->getOperand(1).getReg();
- I.getOperand(1).setReg(Ptr2Reg);
- PtrMI = MRI.getVRegDef(Ptr2Reg);
- Offset = Imm / Size;
- }
+ // Helper lambda for partially selecting I. Either returns the original
+ // instruction with an updated opcode, or a new instruction.
+ auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
+ bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+ const unsigned NewOpc =
+ selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
+ if (NewOpc == I.getOpcode())
+ return nullptr;
+ // Check if we can fold anything into the addressing mode.
+ auto AddrModeFns =
+ selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
+ if (!AddrModeFns) {
+ // Can't fold anything. Use the original instruction.
+ I.setDesc(TII.get(NewOpc));
+ I.addOperand(MachineOperand::CreateImm(0));
+ return &I;
}
- }
- // If we haven't folded anything into our addressing mode yet, try to fold
- // a frame index into the base+offset.
- if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
- I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
+ // Folded something. Create a new instruction and return it.
+ auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
+ IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+ NewInst.cloneMemRefs(I);
+ for (auto &Fn : *AddrModeFns)
+ Fn(NewInst);
+ I.eraseFromParent();
+ return &*NewInst;
+ };
- I.addOperand(MachineOperand::CreateImm(Offset));
+ MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
+ if (!LoadStore)
+ return false;
// If we're storing a 0, use WZR/XZR.
- if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
- if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
- if (I.getOpcode() == AArch64::STRWui)
- I.getOperand(0).setReg(AArch64::WZR);
- else if (I.getOpcode() == AArch64::STRXui)
- I.getOperand(0).setReg(AArch64::XZR);
+ if (Opcode == TargetOpcode::G_STORE) {
+ auto CVal = getConstantVRegValWithLookThrough(
+ LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
+ /*HandleFConstants = */ false);
+ if (CVal && CVal->Value == 0) {
+ switch (LoadStore->getOpcode()) {
+ case AArch64::STRWui:
+ case AArch64::STRHHui:
+ case AArch64::STRBBui:
+ LoadStore->getOperand(0).setReg(AArch64::WZR);
+ break;
+ case AArch64::STRXui:
+ LoadStore->getOperand(0).setReg(AArch64::XZR);
+ break;
+ }
}
}
if (IsZExtLoad) {
- // The zextload from a smaller type to i32 should be handled by the importer.
- if (MRI.getType(ValReg).getSizeInBits() != 64)
+ // The zextload from a smaller type to i32 should be handled by the
+ // importer.
+ if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
return false;
// If we have a ZEXTLOAD then change the load's type to be a narrower reg
- //and zero_extend with SUBREG_TO_REG.
+ // and zero_extend with SUBREG_TO_REG.
Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- Register DstReg = I.getOperand(0).getReg();
- I.getOperand(0).setReg(LdReg);
+ Register DstReg = LoadStore->getOperand(0).getReg();
+ LoadStore->getOperand(0).setReg(LdReg);
- MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+ MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
.addImm(0)
.addUse(LdReg)
.addImm(AArch64::sub_32);
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
MRI);
}
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
case TargetOpcode::G_SMULH:
@@ -2364,22 +2700,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
-
+ case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
if (MRI.getType(I.getOperand(0).getReg()).isVector())
- return selectVectorASHR(I, MRI);
+ return selectVectorAshrLshr(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
LLVM_FALLTHROUGH;
- case TargetOpcode::G_OR:
- case TargetOpcode::G_LSHR: {
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_OR: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
@@ -2408,37 +2743,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return true;
}
- case TargetOpcode::G_UADDO: {
- // TODO: Support other types.
- unsigned OpSize = Ty.getSizeInBits();
- if (OpSize != 32 && OpSize != 64) {
- LLVM_DEBUG(
- dbgs()
- << "G_UADDO currently only supported for 32 and 64 b types.\n");
- return false;
- }
-
- // TODO: Support vectors.
- if (Ty.isVector()) {
- LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
- return false;
- }
-
- // Add and set the set condition flag.
- unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_USUBO: {
+ // Emit the operation and get the correct condition code.
MachineIRBuilder MIRBuilder(I);
- auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
- {I.getOperand(2), I.getOperand(3)});
- constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
+ auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
+ I.getOperand(2), I.getOperand(3), MIRBuilder);
// Now, put the overflow result in the register given by the first operand
- // to the G_UADDO. CSINC increments the result when the predicate is false,
- // so to get the increment when it's true, we need to use the inverse. In
- // this case, we want to increment when carry is set.
+ // to the overflow op. CSINC increments the result when the predicate is
+ // false, so to get the increment when it's true, we need to use the
+ // inverse. In this case, we want to increment when carry is set.
+ Register ZReg = AArch64::WZR;
auto CsetMI = MIRBuilder
.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
- {Register(AArch64::WZR), Register(AArch64::WZR)})
- .addImm(getInvertedCondCode(AArch64CC::HS));
+ {ZReg, ZReg})
+ .addImm(getInvertedCondCode(OpAndCC.second));
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
I.eraseFromParent();
return true;
@@ -2446,7 +2768,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
- Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
+ Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
// TODO: Implement arbitrary cases
if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
@@ -2737,22 +3059,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (tryOptSelect(I))
return true;
- Register CSelOpc = selectSelectOpc(I, MRI, RBI);
- MachineInstr &TstMI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
- .addDef(AArch64::WZR)
- .addUse(CondReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
-
- MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
- .addDef(I.getOperand(0).getReg())
- .addUse(TReg)
- .addUse(FReg)
- .addImm(AArch64CC::NE);
-
- constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
-
+ // Make sure to use an unused vreg instead of wzr, so that the peephole
+ // optimizations will be able to optimize these.
+ MachineIRBuilder MIB(I);
+ Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
+ return false;
I.eraseFromParent();
return true;
}
@@ -2767,76 +3082,22 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
MachineIRBuilder MIRBuilder(I);
- MachineInstr *Cmp;
- CmpInst::Predicate Pred;
- std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3),
- I.getOperand(1), MIRBuilder);
- if (!Cmp)
- return false;
+ auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+ emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
+ MIRBuilder);
emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_FCMP: {
- if (Ty != LLT::scalar(32)) {
- LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
- << ", expected: " << LLT::scalar(32) << '\n');
- return false;
- }
-
- unsigned CmpOpc = selectFCMPOpc(I, MRI);
- if (!CmpOpc)
+ MachineIRBuilder MIRBuilder(I);
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+ if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
+ MIRBuilder, Pred) ||
+ !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
return false;
-
- // FIXME: regbank
-
- AArch64CC::CondCode CC1, CC2;
- changeFCMPPredToAArch64CC(
- (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
-
- // Partially build the compare. Decide if we need to add a use for the
- // third operand based off whether or not we're comparing against 0.0.
- auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
- .addUse(I.getOperand(2).getReg());
-
- // If we don't have an immediate compare, then we need to add a use of the
- // register which wasn't used for the immediate.
- // Note that the immediate will always be the last operand.
- if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
- CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
-
- const Register DefReg = I.getOperand(0).getReg();
- Register Def1Reg = DefReg;
- if (CC2 != AArch64CC::AL)
- Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-
- MachineInstr &CSetMI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
- .addDef(Def1Reg)
- .addUse(AArch64::WZR)
- .addUse(AArch64::WZR)
- .addImm(getInvertedCondCode(CC1));
-
- if (CC2 != AArch64CC::AL) {
- Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- MachineInstr &CSet2MI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
- .addDef(Def2Reg)
- .addUse(AArch64::WZR)
- .addUse(AArch64::WZR)
- .addImm(getInvertedCondCode(CC2));
- MachineInstr &OrMI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
- .addDef(DefReg)
- .addUse(Def1Reg)
- .addUse(Def2Reg);
- constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
- }
- constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
-
I.eraseFromParent();
return true;
}
@@ -2875,6 +3136,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
+ case AArch64::G_DUP: {
+ // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
+ // imported patterns. Do it manually here. Avoiding generating s16 gpr is
+ // difficult because at RBS we may end up pessimizing the fpr case if we
+ // decided to add an anyextend to fix this. Manual selection is the most
+ // robust solution for now.
+ Register SrcReg = I.getOperand(1).getReg();
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
+ return false; // We expect the fpr regbank case to be imported.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.getSizeInBits() == 16)
+ I.setDesc(TII.get(AArch64::DUPv8i16gpr));
+ else if (SrcTy.getSizeInBits() == 8)
+ I.setDesc(TII.get(AArch64::DUPv16i8gpr));
+ else
+ return false;
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
case TargetOpcode::G_INTRINSIC_TRUNC:
return selectIntrinsicTrunc(I, MRI);
case TargetOpcode::G_INTRINSIC_ROUND:
@@ -2895,8 +3174,49 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ return selectReduction(I, MRI);
+ }
+
+ return false;
+}
+
+bool AArch64InstructionSelector::selectReduction(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ Register VecReg = I.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
+ unsigned Opc = 0;
+ if (VecTy == LLT::vector(16, 8))
+ Opc = AArch64::ADDVv16i8v;
+ else if (VecTy == LLT::vector(8, 16))
+ Opc = AArch64::ADDVv8i16v;
+ else if (VecTy == LLT::vector(4, 32))
+ Opc = AArch64::ADDVv4i32v;
+ else if (VecTy == LLT::vector(2, 64))
+ Opc = AArch64::ADDPv2i64p;
+ else {
+ LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
+ return false;
+ }
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+ if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
+ unsigned Opc = 0;
+ if (VecTy == LLT::vector(2, 32))
+ Opc = AArch64::FADDPv2i32p;
+ else if (VecTy == LLT::vector(2, 64))
+ Opc = AArch64::FADDPv2i64p;
+ else {
+ LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
+ return false;
+ }
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
return false;
}
@@ -2910,6 +3230,8 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
+
+ MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
{TargetReg, ScratchReg}, {JTAddr, Index})
.addJumpTableIndex(JTI);
@@ -2946,17 +3268,20 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
const GlobalValue &GV = *I.getOperand(1).getGlobal();
MachineIRBuilder MIB(I);
- MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
- .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
+ auto LoadGOT =
+ MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
+ .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
- {Register(AArch64::X0)})
+ {LoadGOT.getReg(0)})
.addImm(0);
+ MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
+ .addUse(AArch64::X0, RegState::Implicit)
.addDef(AArch64::X0, RegState::Implicit)
.addRegMask(TRI.getTLSCallPreservedMask());
@@ -3442,7 +3767,7 @@ bool AArch64InstructionSelector::selectExtractElt(
(void)WideTy;
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!");
- assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
+ assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
// Need the lane index to determine the correct copy opcode.
MachineOperand &LaneIdxOp = I.getOperand(2);
@@ -3457,7 +3782,7 @@ bool AArch64InstructionSelector::selectExtractElt(
auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
if (!VRegAndVal)
return false;
- unsigned LaneIdx = VRegAndVal->Value;
+ unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
MachineIRBuilder MIRBuilder(I);
@@ -3680,7 +4005,10 @@ static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
unsigned Opc, SubregIdx;
if (RB.getID() == AArch64::GPRRegBankID) {
- if (EltSize == 32) {
+ if (EltSize == 16) {
+ Opc = AArch64::INSvi16gpr;
+ SubregIdx = AArch64::ssub;
+ } else if (EltSize == 32) {
Opc = AArch64::INSvi32gpr;
SubregIdx = AArch64::ssub;
} else if (EltSize == 64) {
@@ -3709,135 +4037,223 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
return std::make_pair(Opc, SubregIdx);
}
+MachineInstr *AArch64InstructionSelector::emitInstr(
+ unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
+ std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
+ const ComplexRendererFns &RenderFns) const {
+ assert(Opcode && "Expected an opcode?");
+ assert(!isPreISelGenericOpcode(Opcode) &&
+ "Function should only be used to produce selected instructions!");
+ auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
+ if (RenderFns)
+ for (auto &Fn : *RenderFns)
+ Fn(MI);
+ constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
+ return &*MI;
+}
+
+MachineInstr *AArch64InstructionSelector::emitAddSub(
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
+ Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ auto Ty = MRI.getType(LHS.getReg());
+ assert(!Ty.isVector() && "Expected a scalar or pointer?");
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
+ bool Is32Bit = Size == 32;
+
+ // INSTRri form with positive arithmetic immediate.
+ if (auto Fns = selectArithImmed(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRri form with negative arithmetic immediate.
+ if (auto Fns = selectNegArithImmed(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRrx form.
+ if (auto Fns = selectArithExtendedRegister(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRrs form.
+ if (auto Fns = selectShiftedRegister(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+ return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
+ MIRBuilder);
+}
+
MachineInstr *
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
- MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
- {AArch64::ADDWrr, AArch64::ADDWri}};
- bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
- auto ImmFns = selectArithImmed(RHS);
- unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
- auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
-
- // If we matched a valid constant immediate, add those operands.
- if (ImmFns) {
- for (auto &RenderFn : *ImmFns)
- RenderFn(AddMI);
- } else {
- AddMI.addUse(RHS.getReg());
- }
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::ADDXri, AArch64::ADDWri},
+ {AArch64::ADDXrs, AArch64::ADDWrs},
+ {AArch64::ADDXrr, AArch64::ADDWrr},
+ {AArch64::SUBXri, AArch64::SUBWri},
+ {AArch64::ADDXrx, AArch64::ADDWrx}}};
+ return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::ADDSXri, AArch64::ADDSWri},
+ {AArch64::ADDSXrs, AArch64::ADDSWrs},
+ {AArch64::ADDSXrr, AArch64::ADDSWrr},
+ {AArch64::SUBSXri, AArch64::SUBSWri},
+ {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
+ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
+}
- constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
- return &*AddMI;
+MachineInstr *
+AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::SUBSXri, AArch64::SUBSWri},
+ {AArch64::SUBSXrs, AArch64::SUBSWrs},
+ {AArch64::SUBSXrr, AArch64::SUBSWrr},
+ {AArch64::ADDSXri, AArch64::ADDSWri},
+ {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
+ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}
MachineInstr *
AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
- {AArch64::ADDSWrr, AArch64::ADDSWri}};
bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
- auto ImmFns = selectArithImmed(RHS);
- unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
- Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
-
- auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
-
- // If we matched a valid constant immediate, add those operands.
- if (ImmFns) {
- for (auto &RenderFn : *ImmFns)
- RenderFn(CmpMI);
- } else {
- CmpMI.addUse(RHS.getReg());
- }
-
- constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
+ auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
+ return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
}
MachineInstr *
-AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
+AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- unsigned RegSize = MRI.getType(LHS).getSizeInBits();
+ LLT Ty = MRI.getType(LHS.getReg());
+ unsigned RegSize = Ty.getSizeInBits();
bool Is32Bit = (RegSize == 32);
- static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
- {AArch64::ANDSWrr, AArch64::ANDSWri}};
- Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
-
- // We might be able to fold in an immediate into the TST. We need to make sure
- // it's a logical immediate though, since ANDS requires that.
- auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
- bool IsImmForm = ValAndVReg.hasValue() &&
- AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
- unsigned Opc = OpcTable[Is32Bit][IsImmForm];
- auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
-
- if (IsImmForm)
- TstMI.addImm(
- AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
- else
- TstMI.addUse(RHS);
+ const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
+ {AArch64::ANDSXrs, AArch64::ANDSWrs},
+ {AArch64::ANDSXrr, AArch64::ANDSWrr}};
+ // ANDS needs a logical immediate for its immediate form. Check if we can
+ // fold one in.
+ if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
+ int64_t Imm = ValAndVReg->Value.getSExtValue();
+
+ if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
+ auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
+ TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ return &*TstMI;
+ }
+ }
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- return &*TstMI;
+ if (auto Fns = selectLogicalShiftedRegister(RHS))
+ return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
+ return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
}
-std::pair<MachineInstr *, CmpInst::Predicate>
-AArch64InstructionSelector::emitIntegerCompare(
+MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
assert(Predicate.isPredicate() && "Expected predicate?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ LLT CmpTy = MRI.getType(LHS.getReg());
+ assert(!CmpTy.isVector() && "Expected scalar or pointer");
+ unsigned Size = CmpTy.getSizeInBits();
+ (void)Size;
+ assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
+ // Fold the compare into a cmn or tst if possible.
+ if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
+ return FoldCmp;
+ auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
+ return emitSUBS(Dst, LHS, RHS, MIRBuilder);
+}
- CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
-
- // Fold the compare if possible.
- MachineInstr *FoldCmp =
- tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
- if (FoldCmp)
- return {FoldCmp, P};
+MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
+ Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+#ifndef NDEBUG
+ LLT Ty = MRI.getType(Dst);
+ assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
+ "Expected a 32-bit scalar register?");
+#endif
+ const Register ZeroReg = AArch64::WZR;
+ auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
+ auto CSet =
+ MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
+ .addImm(getInvertedCondCode(CC));
+ constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
+ return &*CSet;
+ };
- // Can't fold into a CMN. Just emit a normal compare.
- unsigned CmpOpc = 0;
- Register ZReg;
+ AArch64CC::CondCode CC1, CC2;
+ changeFCMPPredToAArch64CC(Pred, CC1, CC2);
+ if (CC2 == AArch64CC::AL)
+ return EmitCSet(Dst, CC1);
+
+ const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
+ Register Def1Reg = MRI.createVirtualRegister(RC);
+ Register Def2Reg = MRI.createVirtualRegister(RC);
+ EmitCSet(Def1Reg, CC1);
+ EmitCSet(Def2Reg, CC2);
+ auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
+ constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
+ return &*OrMI;
+}
- LLT CmpTy = MRI.getType(LHS.getReg());
- assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
- "Expected scalar or pointer");
- if (CmpTy == LLT::scalar(32)) {
- CmpOpc = AArch64::SUBSWrr;
- ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
- CmpOpc = AArch64::SUBSXrr;
- ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- } else {
- return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE};
- }
+MachineInstr *
+AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
+ MachineIRBuilder &MIRBuilder,
+ Optional<CmpInst::Predicate> Pred) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ LLT Ty = MRI.getType(LHS);
+ if (Ty.isVector())
+ return nullptr;
+ unsigned OpSize = Ty.getSizeInBits();
+ if (OpSize != 32 && OpSize != 64)
+ return nullptr;
- // Try to match immediate forms.
- MachineInstr *ImmedCmp =
- tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
- if (ImmedCmp)
- return {ImmedCmp, P};
+ // If this is a compare against +0.0, then we don't have
+ // to explicitly materialize a constant.
+ const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
+ bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
- // If we don't have an immediate, we may have a shift which can be folded
- // into the compare.
- MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
- if (ShiftedCmp)
- return {ShiftedCmp, P};
+ auto IsEqualityPred = [](CmpInst::Predicate P) {
+ return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
+ P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
+ };
+ if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
+ // Try commutating the operands.
+ const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
+ if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
+ ShouldUseImm = true;
+ std::swap(LHS, RHS);
+ }
+ }
+ unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
+ {AArch64::FCMPSri, AArch64::FCMPDri}};
+ unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
- auto CmpMI =
- MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
- // Make sure that we can constrain the compare that we emitted.
+ // Partially build the compare. Decide if we need to add a use for the
+ // third operand based off whether or not we're comparing against 0.0.
+ auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
+ if (!ShouldUseImm)
+ CmpMI.addUse(RHS);
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return {&*CmpMI, P};
+ return &*CmpMI;
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
@@ -3947,11 +4363,28 @@ AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
return &*I;
}
+std::pair<MachineInstr *, AArch64CC::CondCode>
+AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
+ MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_SADDO:
+ return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_UADDO:
+ return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
+ case TargetOpcode::G_SSUBO:
+ return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_USUBO:
+ return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
+ }
+}
+
bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
MachineIRBuilder MIB(I);
MachineRegisterInfo &MRI = *MIB.getMRI();
- const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
-
// We want to recognize this pattern:
//
// $z = G_FCMP pred, $x, $y
@@ -4008,27 +4441,17 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
AArch64CC::CondCode CondCode;
if (CondOpc == TargetOpcode::G_ICMP) {
- MachineInstr *Cmp;
- CmpInst::Predicate Pred;
-
- std::tie(Cmp, Pred) =
- emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
- CondDef->getOperand(1), MIB);
-
- if (!Cmp) {
- LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
- return false;
- }
-
- // Have to collect the CondCode after emitIntegerCompare, since it can
- // update the predicate.
+ auto Pred =
+ static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
CondCode = changeICMPPredToAArch64CC(Pred);
+ emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
+ CondDef->getOperand(1), MIB);
} else {
// Get the condition code for the select.
+ auto Pred =
+ static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
AArch64CC::CondCode CondCode2;
- changeFCMPPredToAArch64CC(
- (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
- CondCode2);
+ changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
// instructions to emit the comparison.
@@ -4037,25 +4460,16 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
if (CondCode2 != AArch64CC::AL)
return false;
- // Make sure we'll be able to select the compare.
- unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
- if (!CmpOpc)
+ if (!emitFPCompare(CondDef->getOperand(2).getReg(),
+ CondDef->getOperand(3).getReg(), MIB)) {
+ LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
return false;
-
- // Emit a new compare.
- auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
- if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
- Cmp.addUse(CondDef->getOperand(3).getReg());
- constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+ }
}
// Emit the select.
- unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
- auto CSel =
- MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
- {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
- .addImm(CondCode);
- constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
+ emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
+ I.getOperand(3).getReg(), CondCode, MIB);
I.eraseFromParent();
return true;
}
@@ -4138,162 +4552,20 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
// Produce this if the compare is signed:
//
// tst x, y
- if (!isUnsignedICMPPred(P) && LHSDef &&
+ if (!CmpInst::isUnsigned(P) && LHSDef &&
LHSDef->getOpcode() == TargetOpcode::G_AND) {
// Make sure that the RHS is 0.
auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return nullptr;
- return emitTST(LHSDef->getOperand(1).getReg(),
- LHSDef->getOperand(2).getReg(), MIRBuilder);
+ return emitTST(LHSDef->getOperand(1),
+ LHSDef->getOperand(2), MIRBuilder);
}
return nullptr;
}
-MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
- MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
- MachineIRBuilder &MIB) const {
- // Attempt to select the immediate form of an integer compare.
- MachineRegisterInfo &MRI = *MIB.getMRI();
- auto Ty = MRI.getType(LHS.getReg());
- assert(!Ty.isVector() && "Expected scalar or pointer only?");
- unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) &&
- "Expected 32 bit or 64 bit compare only?");
-
- // Check if this is a case we can already handle.
- InstructionSelector::ComplexRendererFns ImmFns;
- ImmFns = selectArithImmed(RHS);
-
- if (!ImmFns) {
- // We didn't get a rendering function, but we may still have a constant.
- auto MaybeImmed = getImmedFromMO(RHS);
- if (!MaybeImmed)
- return nullptr;
-
- // We have a constant, but it doesn't fit. Try adjusting it by one and
- // updating the predicate if possible.
- uint64_t C = *MaybeImmed;
- CmpInst::Predicate NewP;
- switch (P) {
- default:
- return nullptr;
- case CmpInst::ICMP_SLT:
- case CmpInst::ICMP_SGE:
- // Check for
- //
- // x slt c => x sle c - 1
- // x sge c => x sgt c - 1
- //
- // When c is not the smallest possible negative number.
- if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
- (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
- return nullptr;
- NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
- C -= 1;
- break;
- case CmpInst::ICMP_ULT:
- case CmpInst::ICMP_UGE:
- // Check for
- //
- // x ult c => x ule c - 1
- // x uge c => x ugt c - 1
- //
- // When c is not zero.
- if (C == 0)
- return nullptr;
- NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
- C -= 1;
- break;
- case CmpInst::ICMP_SLE:
- case CmpInst::ICMP_SGT:
- // Check for
- //
- // x sle c => x slt c + 1
- // x sgt c => s sge c + 1
- //
- // When c is not the largest possible signed integer.
- if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
- (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
- return nullptr;
- NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
- C += 1;
- break;
- case CmpInst::ICMP_ULE:
- case CmpInst::ICMP_UGT:
- // Check for
- //
- // x ule c => x ult c + 1
- // x ugt c => s uge c + 1
- //
- // When c is not the largest possible unsigned integer.
- if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
- (Size == 64 && C == UINT64_MAX))
- return nullptr;
- NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
- C += 1;
- break;
- }
-
- // Check if the new constant is valid.
- if (Size == 32)
- C = static_cast<uint32_t>(C);
- ImmFns = select12BitValueWithLeftShift(C);
- if (!ImmFns)
- return nullptr;
- P = NewP;
- }
-
- // At this point, we know we can select an immediate form. Go ahead and do
- // that.
- Register ZReg;
- unsigned Opc;
- if (Size == 32) {
- ZReg = AArch64::WZR;
- Opc = AArch64::SUBSWri;
- } else {
- ZReg = AArch64::XZR;
- Opc = AArch64::SUBSXri;
- }
-
- auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
- for (auto &RenderFn : *ImmFns)
- RenderFn(CmpMI);
- constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
-}
-
-MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
- MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const {
- // We are looking for the following pattern:
- //
- // shift = G_SHL/ASHR/LHSR y, c
- // ...
- // cmp = G_ICMP pred, something, shift
- //
- // Since we will select the G_ICMP to a SUBS, we can potentially fold the
- // shift into the subtract.
- static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs};
- static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR};
- auto ImmFns = selectShiftedRegister(RHS);
- if (!ImmFns)
- return nullptr;
- MachineRegisterInfo &MRI = *MIB.getMRI();
- auto Ty = MRI.getType(LHS.getReg());
- assert(!Ty.isVector() && "Expected scalar or pointer only?");
- unsigned Size = Ty.getSizeInBits();
- bool Idx = (Size == 64);
- Register ZReg = ZRegTable[Idx];
- unsigned Opc = OpcTable[Idx];
- auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
- for (auto &RenderFn : *ImmFns)
- RenderFn(CmpMI);
- constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
-}
-
bool AArch64InstructionSelector::selectShuffleVector(
MachineInstr &I, MachineRegisterInfo &MRI) const {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
@@ -4436,7 +4708,7 @@ bool AArch64InstructionSelector::selectInsertElt(
auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
if (!VRegAndVal)
return false;
- unsigned LaneIdx = VRegAndVal->Value;
+ unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
// Perform the lane insert.
Register SrcReg = I.getOperand(1).getReg();
@@ -4493,8 +4765,9 @@ bool AArch64InstructionSelector::selectInsertElt(
bool AArch64InstructionSelector::tryOptConstantBuildVec(
MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!");
- if (DstTy.getSizeInBits() < 32)
+ unsigned DstSize = DstTy.getSizeInBits();
+ assert(DstSize <= 128 && "Unexpected build_vec type!");
+ if (DstSize < 32)
return false;
// Check if we're building a constant vector, in which case we want to
// generate a constant pool load instead of a vector insert sequence.
@@ -4515,6 +4788,24 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
}
Constant *CV = ConstantVector::get(Csts);
MachineIRBuilder MIB(I);
+ if (CV->isNullValue()) {
+ // Until the importer can support immAllZerosV in pattern leaf nodes,
+ // select a zero move manually here.
+ Register DstReg = I.getOperand(0).getReg();
+ if (DstSize == 128) {
+ auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ } else if (DstSize == 64) {
+ auto Mov =
+ MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
+ .addImm(0);
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addReg(Mov.getReg(0), 0, AArch64::dsub);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
+ }
+ }
auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
if (!CPLoad) {
LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
@@ -4634,10 +4925,12 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
break;
case Intrinsic::debugtrap:
- if (!STI.isTargetWindows())
- return false;
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
break;
+ case Intrinsic::ubsantrap:
+ MIRBuilder.buildInstr(AArch64::BRK, {}, {})
+ .addImm(I.getOperand(1).getImm() | ('U' << 8));
+ break;
}
I.eraseFromParent();
@@ -4703,22 +4996,22 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
- if (MFReturnAddr) {
- MIRBuilder.buildCopy({DstReg}, MFReturnAddr);
- I.eraseFromParent();
- return true;
+ if (!MFReturnAddr) {
+ // Insert the copy from LR/X30 into the entry block, before it can be
+ // clobbered by anything.
+ MFI.setReturnAddressIsTaken(true);
+ MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
+ AArch64::GPR64RegClass);
}
- MFI.setReturnAddressIsTaken(true);
- MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass);
- // Insert the copy from LR/X30 into the entry block, before it can be
- // clobbered by anything.
- MachineBasicBlock &EntryBlock = *MF.begin();
- if (!EntryBlock.isLiveIn(AArch64::LR))
- EntryBlock.addLiveIn(AArch64::LR);
- MachineIRBuilder EntryBuilder(MF);
- EntryBuilder.setInstr(*EntryBlock.begin());
- EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
- MFReturnAddr = DstReg;
+
+ if (STI.hasPAuth()) {
+ MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
+ } else {
+ MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
+ MIRBuilder.buildInstr(AArch64::XPACLRI);
+ MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ }
+
I.eraseFromParent();
return true;
}
@@ -4738,7 +5031,16 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MIRBuilder.buildCopy({DstReg}, {FrameAddr});
else {
MFI.setReturnAddressIsTaken(true);
- MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1);
+
+ if (STI.hasPAuth()) {
+ Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
+ MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
+ } else {
+ MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
+ MIRBuilder.buildInstr(AArch64::XPACLRI);
+ MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ }
}
I.eraseFromParent();
@@ -4946,7 +5248,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// The value must fit into 3 bits, and must be positive. Make sure that is
// true.
- int64_t ImmVal = ValAndVReg->Value;
+ int64_t ImmVal = ValAndVReg->Value.getSExtValue();
// Since we're going to pull this into a shift, the constant value must be
// a power of 2. If we got a multiply, then we need to check this.
@@ -5086,12 +5388,60 @@ InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
-
- // If we have a constant offset, then we probably don't want to match a
- // register offset.
- if (isBaseWithConstantOffset(Root, MRI))
+ if (!Root.isReg())
+ return None;
+ MachineInstr *PtrAdd =
+ getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+ if (!PtrAdd)
return None;
+ // Check for an immediates which cannot be encoded in the [base + imm]
+ // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
+ // end up with code like:
+ //
+ // mov x0, wide
+ // add x1 base, x0
+ // ldr x2, [x1, x0]
+ //
+ // In this situation, we can use the [base, xreg] addressing mode to save an
+ // add/sub:
+ //
+ // mov x0, wide
+ // ldr x2, [base, x0]
+ auto ValAndVReg =
+ getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
+ if (ValAndVReg) {
+ unsigned Scale = Log2_32(SizeInBytes);
+ int64_t ImmOff = ValAndVReg->Value.getSExtValue();
+
+ // Skip immediates that can be selected in the load/store addresing
+ // mode.
+ if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
+ ImmOff < (0x1000 << Scale))
+ return None;
+
+ // Helper lambda to decide whether or not it is preferable to emit an add.
+ auto isPreferredADD = [](int64_t ImmOff) {
+ // Constants in [0x0, 0xfff] can be encoded in an add.
+ if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
+ return true;
+
+ // Can it be encoded in an add lsl #12?
+ if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
+ return false;
+
+ // It can be encoded in an add lsl #12, but we may not want to. If it is
+ // possible to select this as a single movz, then prefer that. A single
+ // movz is faster than an add with a shift.
+ return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
+ (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
+ };
+
+ // If the immediate can be encoded in a single add/sub, then bail out.
+ if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
+ return None;
+ }
+
// Try to fold shifts into the addressing mode.
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
if (AddrModeFns)
@@ -5521,7 +5871,8 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
- Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
+ Optional<int64_t> CstVal =
+ getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4ffde2a7e3c4..5a6c904e3f5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -14,6 +14,7 @@
#include "AArch64LegalizerInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -22,6 +23,8 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
+#include <initializer_list>
+#include "llvm/Support/MathExtras.h"
#define DEBUG_TYPE "aarch64-legalinfo"
@@ -53,6 +56,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT v2s64 = LLT::vector(2, 64);
const LLT v2p0 = LLT::vector(2, p0);
+ std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
+ v16s8, v8s16, v4s32,
+ v2s64, v2p0,
+ /* End 128bit types */
+ /* Begin 64bit types */
+ v8s8, v4s16, v2s32};
+
const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
// FIXME: support subtargets which have neon/fp-armv8 disabled.
@@ -61,25 +71,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return;
}
+ // Some instructions only support s16 if the subtarget has full 16-bit FP
+ // support.
+ const bool HasFP16 = ST.hasFullFP16();
+ const LLT &MinFPScalar = HasFP16 ? s16 : s32;
+
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
- .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
- .clampScalar(0, s1, s64)
- .widenScalarToNextPow2(0, 8)
- .fewerElementsIf(
- [=](const LegalityQuery &Query) {
- return Query.Types[0].isVector() &&
- (Query.Types[0].getElementType() != s64 ||
- Query.Types[0].getNumElements() != 2);
- },
- [=](const LegalityQuery &Query) {
- LLT EltTy = Query.Types[0].getElementType();
- if (EltTy == s64)
- return std::make_pair(0, LLT::vector(2, 64));
- return std::make_pair(0, EltTy);
- });
-
- getActionDefinitionsBuilder(G_PHI)
- .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
+ .legalFor({p0, s1, s8, s16, s32, s64})
+ .legalFor(PackedVectorAllTypeList)
+ .clampScalar(0, s1, s64)
+ .widenScalarToNextPow2(0, 8)
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].isVector() &&
+ (Query.Types[0].getElementType() != s64 ||
+ Query.Types[0].getNumElements() != 2);
+ },
+ [=](const LegalityQuery &Query) {
+ LLT EltTy = Query.Types[0].getElementType();
+ if (EltTy == s64)
+ return std::make_pair(0, LLT::vector(2, 64));
+ return std::make_pair(0, EltTy);
+ });
+
+ getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64})
+ .legalFor(PackedVectorAllTypeList)
.clampScalar(0, s16, s64)
.widenScalarToNextPow2(0);
@@ -89,26 +105,38 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
- .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
+ .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
+ .scalarizeIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
+ },
+ 0)
+ .legalFor({v2s64})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);
- getActionDefinitionsBuilder(G_SHL)
+ getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
.customIf([=](const LegalityQuery &Query) {
const auto &SrcTy = Query.Types[0];
const auto &AmtTy = Query.Types[1];
return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
AmtTy.getSizeInBits() == 32;
})
- .legalFor({{s32, s32},
- {s64, s64},
- {s32, s64},
- {v2s32, v2s32},
- {v4s32, v4s32},
- {v2s64, v2s64}})
+ .legalFor({
+ {s32, s32},
+ {s32, s64},
+ {s64, s64},
+ {v8s8, v8s8},
+ {v16s8, v16s8},
+ {v4s16, v4s16},
+ {v8s16, v8s16},
+ {v2s32, v2s32},
+ {v4s32, v4s32},
+ {v2s64, v2s64},
+ })
.clampScalar(1, s32, s64)
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
@@ -130,43 +158,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0)
.scalarize(0);
- getActionDefinitionsBuilder({G_LSHR, G_ASHR})
- .customIf([=](const LegalityQuery &Query) {
- const auto &SrcTy = Query.Types[0];
- const auto &AmtTy = Query.Types[1];
- return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
- AmtTy.getSizeInBits() == 32;
- })
- .legalFor({{s32, s32},
- {s32, s64},
- {s64, s64},
- {v2s32, v2s32},
- {v4s32, v4s32},
- {v2s64, v2s64}})
- .clampScalar(1, s32, s64)
- .clampScalar(0, s32, s64)
- .minScalarSameAs(1, 0);
-
getActionDefinitionsBuilder({G_SREM, G_UREM})
.lowerFor({s1, s8, s16, s32, s64});
- getActionDefinitionsBuilder({G_SMULO, G_UMULO})
- .lowerFor({{s64, s1}});
+ getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}});
getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
- getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
+ getActionDefinitionsBuilder(
+ {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
.legalFor({{s32, s1}, {s64, s1}})
.minScalar(0, s32);
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
- .legalFor({s32, s64, v2s64, v4s32, v2s32});
+ .legalFor({s32, s64, v2s64, v4s32, v2s32})
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64);
getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
- G_FNEARBYINT})
+ G_FNEARBYINT, G_INTRINSIC_LRINT})
// If we don't have full FP16 support, then scalarize the elements of
// vectors containing fp16 types.
.fewerElementsIf(
@@ -272,8 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v4s32, p0, 128, 8},
{v2s64, p0, 128, 8}})
// These extends are also legal
- .legalForTypesWithMemDesc({{s32, p0, 8, 8},
- {s32, p0, 16, 8}})
+ .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}})
.clampScalar(0, s8, s64)
.lowerIfMemSizeNotPow2()
// Lower any any-extending loads left into G_ANYEXT and G_LOAD
@@ -295,6 +307,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{p0, p0, 64, 8},
{s128, p0, 128, 8},
{v16s8, p0, 128, 8},
+ {v8s8, p0, 64, 8},
{v4s16, p0, 64, 8},
{v8s16, p0, 128, 8},
{v2s32, p0, 64, 8},
@@ -312,14 +325,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
- .legalFor({p0, s8, s16, s32, s64})
+ .legalFor({p0, s8, s16, s32, s64})
.clampScalar(0, s8, s64)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder(G_FCONSTANT)
- .legalFor({s32, s64})
- .clampScalar(0, s32, s64);
+ .legalIf([=](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[0];
+ if (HasFP16 && Ty == s16)
+ return true;
+ return Ty == s32 || Ty == s64 || Ty == s128;
+ })
+ .clampScalar(0, MinFPScalar, s128);
- getActionDefinitionsBuilder(G_ICMP)
+ getActionDefinitionsBuilder({G_ICMP, G_FCMP})
.legalFor({{s32, s32},
{s32, s64},
{s32, p0},
@@ -347,13 +365,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
s64)
- .widenScalarOrEltToNextPow2(1);
-
- getActionDefinitionsBuilder(G_FCMP)
- .legalFor({{s32, s32}, {s32, s64}})
- .clampScalar(0, s32, s32)
- .clampScalar(1, s32, s64)
- .widenScalarToNextPow2(1);
+ .widenScalarOrEltToNextPow2(1)
+ .clampNumElements(0, v2s32, v4s32);
// Extensions
auto ExtLegalFunc = [=](const LegalityQuery &Query) {
@@ -361,7 +374,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
if (DstSize == 128 && !Query.Types[0].isVector())
return false; // Extending to a scalar s128 needs narrowing.
-
+
// Make sure that we have something that will fit in a register, and
// make sure it's a power of 2.
if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
@@ -386,17 +399,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalIf(ExtLegalFunc)
.clampScalar(0, s64, s64); // Just for s128, others are handled above.
- getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
+ getActionDefinitionsBuilder(G_TRUNC)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
+ 0, s8)
+ .customIf([=](const LegalityQuery &Query) {
+ LLT DstTy = Query.Types[0];
+ LLT SrcTy = Query.Types[1];
+ return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
+ })
+ .alwaysLegal();
- getActionDefinitionsBuilder(G_SEXT_INREG)
- .legalFor({s32, s64})
- .lower();
+ getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower();
// FP conversions
- getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
- {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
- getActionDefinitionsBuilder(G_FPEXT).legalFor(
- {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
+ getActionDefinitionsBuilder(G_FPTRUNC)
+ .legalFor(
+ {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
+ .clampMaxNumElements(0, s32, 2);
+ getActionDefinitionsBuilder(G_FPEXT)
+ .legalFor(
+ {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
+ .clampMaxNumElements(0, s64, 2);
// Conversions
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
@@ -409,7 +433,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
.clampScalar(1, s32, s64)
- .widenScalarToNextPow2(1)
+ .minScalarSameAs(1, 0)
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0);
@@ -417,14 +441,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
- // Select
- // FIXME: We can probably do a bit better than just scalarizing vector
- // selects.
getActionDefinitionsBuilder(G_SELECT)
.legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
- .scalarize(0);
+ .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
+ .lowerIf(isVector(0));
// Pointer-handling
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
@@ -554,8 +576,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
})
// Any vectors left are the wrong size. Scalarize them.
- .scalarize(0)
- .scalarize(1);
+ .scalarize(0)
+ .scalarize(1);
}
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -567,18 +589,40 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalIf([=](const LegalityQuery &Query) {
const LLT &VecTy = Query.Types[1];
return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
- VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
- });
+ VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
+ VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
+ })
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ // We want to promote to <M x s1> to <M x s64> if that wouldn't
+ // cause the total vec size to be > 128b.
+ return Query.Types[1].getNumElements() <= 2;
+ },
+ 0, s64)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[1].getNumElements() <= 4;
+ },
+ 0, s32)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[1].getNumElements() <= 8;
+ },
+ 0, s16)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[1].getNumElements() <= 16;
+ },
+ 0, s8)
+ .minScalarOrElt(0, s8); // Worst case, we need at least s8.
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &VecTy = Query.Types[0];
- // TODO: Support s8 and s16
- return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
- });
+ .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
getActionDefinitionsBuilder(G_BUILD_VECTOR)
- .legalFor({{v4s16, s16},
+ .legalFor({{v8s8, s8},
+ {v16s8, s8},
+ {v4s16, s16},
{v8s16, s16},
{v2s32, s32},
{v4s32, s32},
@@ -594,8 +638,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.minScalarSameAs(1, 0);
- getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
- {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
+ getActionDefinitionsBuilder(G_CTLZ)
+ .legalForCartesianProduct(
+ {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
.scalarize(1);
getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
@@ -606,7 +651,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// to be the same size as the dest.
if (DstTy != SrcTy)
return false;
- for (auto &Ty : {v2s32, v4s32, v2s64}) {
+ for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) {
if (DstTy == Ty)
return true;
}
@@ -623,8 +668,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
- getActionDefinitionsBuilder(G_JUMP_TABLE)
- .legalFor({{p0}, {s64}});
+ getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
return Query.Types[0] == p0 && Query.Types[1] == s64;
@@ -632,6 +676,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
+
+ getActionDefinitionsBuilder(G_ABS).lowerIf(
+ [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); });
+
+ getActionDefinitionsBuilder(G_VECREDUCE_FADD)
+ // We only have FADDP to do reduction-like operations. Lower the rest.
+ .legalFor({{s32, v2s32}, {s64, v2s64}})
+ .lower();
+
+ getActionDefinitionsBuilder(G_VECREDUCE_ADD)
+ .legalFor({{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s64, v2s64}})
+ .lower();
+
computeTables();
verify(*ST.getInstrInfo());
}
@@ -656,15 +714,63 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
case TargetOpcode::G_GLOBAL_VALUE:
return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
+ case TargetOpcode::G_TRUNC:
+ return legalizeVectorTrunc(MI, Helper);
}
llvm_unreachable("expected switch to return");
}
-bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const {
+static void extractParts(Register Reg, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs) {
+ for (int I = 0; I < NumParts; ++I)
+ VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+}
+
+bool AArch64LegalizerInfo::legalizeVectorTrunc(
+ MachineInstr &MI, LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ // Similar to how operand splitting is done in SelectiondDAG, we can handle
+ // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
+ // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
+ // %lo16(<4 x s16>) = G_TRUNC %inlo
+ // %hi16(<4 x s16>) = G_TRUNC %inhi
+ // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
+ // %res(<8 x s8>) = G_TRUNC %in16
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+ assert(isPowerOf2_32(DstTy.getSizeInBits()) &&
+ isPowerOf2_32(SrcTy.getSizeInBits()));
+
+ // Split input type.
+ LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2);
+ // First, split the source into two smaller vectors.
+ SmallVector<Register, 2> SplitSrcs;
+ extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
+
+ // Truncate the splits into intermediate narrower elements.
+ LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
+ for (unsigned I = 0; I < SplitSrcs.size(); ++I)
+ SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
+
+ auto Concat = MIRBuilder.buildConcatVectors(
+ DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
+
+ Helper.Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Concat.getReg(0));
+ Helper.Observer.changedInstr(MI);
+ return true;
+}
+
+bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
// We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
// G_ADD_LOW instructions.
@@ -686,6 +792,27 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI,
// Set the regclass on the dest reg too.
MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
+ // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
+ // by creating a MOVK that sets bits 48-63 of the register to (global address
+ // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
+ // prevent an incorrect tag being generated during relocation when the the
+ // global appears before the code section. Without the offset, a global at
+ // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
+ // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
+ // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
+ // instead of `0xf`.
+ // This assumes that we're in the small code model so we can assume a binary
+ // size of <= 4GB, which makes the untagged PC relative offset positive. The
+ // binary must also be loaded into address range [0, 2^48). Both of these
+ // properties need to be ensured at runtime when using tagged addresses.
+ if (OpFlags & AArch64II::MO_TAGGED) {
+ ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
+ .addGlobalAddress(GV, 0x100000000,
+ AArch64II::MO_PREL | AArch64II::MO_G3)
+ .addImm(48);
+ MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
+ }
+
MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
.addGlobalAddress(GV, 0,
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
@@ -693,21 +820,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI,
return true;
}
-bool AArch64LegalizerInfo::legalizeIntrinsic(
- LegalizerHelper &Helper, MachineInstr &MI) const {
- MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
- switch (MI.getIntrinsicID()) {
- case Intrinsic::memcpy:
- case Intrinsic::memset:
- case Intrinsic::memmove:
- if (createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI) ==
- LegalizerHelper::UnableToLegalize)
- return false;
- MI.eraseFromParent();
- return true;
- default:
- break;
- }
+bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
return true;
}
@@ -724,11 +838,13 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
if (!VRegAndVal)
return true;
// Check the shift amount is in range for an immediate form.
- int64_t Amount = VRegAndVal->Value;
+ int64_t Amount = VRegAndVal->Value.getSExtValue();
if (Amount > 31)
return true; // This will have to remain a register variant.
auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
+ Observer.changingInstr(MI);
MI.getOperand(2).setReg(ExtCst.getReg(0));
+ Observer.changedInstr(MI);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 1cb24559c1ab..8217e37c8512 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
namespace llvm {
@@ -45,6 +46,7 @@ private:
bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
+ bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index baa8515baf3e..fdd04cb77fad 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -1,17 +1,22 @@
- //=== lib/CodeGen/GlobalISel/AArch64PostLegalizerCombiner.cpp -------------===//
+//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This performs post-legalization combines on generic MachineInstrs.
-//
-// Any combine that this pass performs must preserve instruction legality.
-// Combines unconcerned with legality should be handled by the
-// PreLegalizerCombiner instead.
-//
+///
+/// \file
+/// Post-legalization combines on generic MachineInstrs.
+///
+/// The combines here must preserve instruction legality.
+///
+/// Lowering combines (e.g. pseudo matching) should be handled by
+/// AArch64PostLegalizerLowering.
+///
+/// Combines which don't rely on instruction legality should go in the
+/// AArch64PreLegalizerCombiner.
+///
//===----------------------------------------------------------------------===//
#include "AArch64TargetMachine.h"
@@ -19,373 +24,215 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
using namespace llvm;
-using namespace MIPatternMatch;
-
-/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
-///
-/// Used for matching target-supported shuffles before codegen.
-struct ShuffleVectorPseudo {
- unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
- Register Dst; ///< Destination register.
- SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
- ShuffleVectorPseudo(unsigned Opc, Register Dst,
- std::initializer_list<SrcOp> SrcOps)
- : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
- ShuffleVectorPseudo() {}
-};
-
-/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat.
-/// If \p MI is not a splat, returns None.
-static Optional<int> getSplatIndex(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
- "Only G_SHUFFLE_VECTOR can have a splat index!");
- ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
-
- // If all elements are undefined, this shuffle can be considered a splat.
- // Return 0 for better potential for callers to simplify.
- if (FirstDefinedIdx == Mask.end())
- return 0;
-
- // Make sure all remaining elements are either undef or the same
- // as the first non-undef value.
- int SplatValue = *FirstDefinedIdx;
- if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
- [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
- return None;
-
- return SplatValue;
-}
-
-/// Check if a vector shuffle corresponds to a REV instruction with the
-/// specified blocksize.
-static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
- unsigned BlockSize) {
- assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
- "Only possible block sizes for REV are: 16, 32, 64");
- assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
-
- unsigned BlockElts = M[0] + 1;
- // If the first shuffle index is UNDEF, be optimistic.
- if (M[0] < 0)
- BlockElts = BlockSize / EltSize;
-
- if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
+/// This combine tries do what performExtractVectorEltCombine does in SDAG.
+/// Rewrite for pairwise fadd pattern
+/// (s32 (g_extract_vector_elt
+/// (g_fadd (vXs32 Other)
+/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
+/// ->
+/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
+/// (g_extract_vector_elt (vXs32 Other) 1))
+bool matchExtractVecEltPairwiseAdd(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::tuple<unsigned, LLT, Register> &MatchInfo) {
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
+ if (!Cst || Cst->Value != 0)
return false;
+ // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
- for (unsigned i = 0; i < NumElts; ++i) {
- // Ignore undef indices.
- if (M[i] < 0)
- continue;
- if (static_cast<unsigned>(M[i]) !=
- (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
- return false;
- }
-
- return true;
-}
-
-/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
-/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
-static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
- unsigned &WhichResult) {
- if (NumElts % 2 != 0)
+ // Now check for an fadd operation. TODO: expand this for integer add?
+ auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
+ if (!FAddMI)
return false;
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
- (M[i + 1] >= 0 &&
- static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
- return false;
- }
- return true;
-}
-
-/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
-/// sources of the shuffle are different.
-static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
- unsigned NumElts) {
- // Look for the first non-undef element.
- auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
- if (FirstRealElt == M.end())
- return None;
-
- // Use APInt to handle overflow when calculating expected element.
- unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
- APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
-
- // The following shuffle indices must be the successive elements after the
- // first real element.
- if (any_of(
- make_range(std::next(FirstRealElt), M.end()),
- [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
- return None;
-
- // The index of an EXT is the first element if it is not UNDEF.
- // Watch out for the beginning UNDEFs. The EXT index should be the expected
- // value of the first element. E.g.
- // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
- // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
- // ExpectedElt is the last mask index plus 1.
- uint64_t Imm = ExpectedElt.getZExtValue();
- bool ReverseExt = false;
-
- // There are two difference cases requiring to reverse input vectors.
- // For example, for vector <4 x i32> we have the following cases,
- // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
- // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
- // For both cases, we finally use mask <5, 6, 7, 0>, which requires
- // to reverse two input vectors.
- if (Imm < NumElts)
- ReverseExt = true;
- else
- Imm -= NumElts;
- return std::make_pair(ReverseExt, Imm);
-}
-
-/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
-/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
-static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
- unsigned &WhichResult) {
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i != NumElts; ++i) {
- // Skip undef indices.
- if (M[i] < 0)
- continue;
- if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
- return false;
- }
- return true;
-}
-/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
-/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
-static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
- unsigned &WhichResult) {
- if (NumElts % 2 != 0)
+ // If we add support for integer add, must restrict these types to just s64.
+ unsigned DstSize = DstTy.getSizeInBits();
+ if (DstSize != 16 && DstSize != 32 && DstSize != 64)
return false;
- // 0 means use ZIP1, 1 means use ZIP2.
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
- (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
- return false;
- Idx += 1;
+ Register Src1Op1 = FAddMI->getOperand(1).getReg();
+ Register Src1Op2 = FAddMI->getOperand(2).getReg();
+ MachineInstr *Shuffle =
+ getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
+ MachineInstr *Other = MRI.getVRegDef(Src1Op1);
+ if (!Shuffle) {
+ Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
+ Other = MRI.getVRegDef(Src1Op2);
}
- return true;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
-/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
-static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(Dst);
- unsigned EltSize = Ty.getScalarSizeInBits();
-
- // Element size for a rev cannot be 64.
- if (EltSize == 64)
- return false;
- unsigned NumElts = Ty.getNumElements();
-
- // Try to produce G_REV64
- if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
- MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
+ // We're looking for a shuffle that moves the second element to index 0.
+ if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
+ Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
+ std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
+ std::get<1>(MatchInfo) = DstTy;
+ std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
return true;
}
-
- // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
- // This should be identical to above, but with a constant 32 and constant
- // 16.
return false;
}
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
-/// a G_TRN1 or G_TRN2 instruction.
-static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- unsigned WhichResult;
- ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
- Register Dst = MI.getOperand(0).getReg();
- unsigned NumElts = MRI.getType(Dst).getNumElements();
- if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
- return false;
- unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
- Register V1 = MI.getOperand(1).getReg();
- Register V2 = MI.getOperand(2).getReg();
- MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+bool applyExtractVecEltPairwiseAdd(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ std::tuple<unsigned, LLT, Register> &MatchInfo) {
+ unsigned Opc = std::get<0>(MatchInfo);
+ assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
+ // We want to generate two extracts of elements 0 and 1, and add them.
+ LLT Ty = std::get<1>(MatchInfo);
+ Register Src = std::get<2>(MatchInfo);
+ LLT s64 = LLT::scalar(64);
+ B.setInstrAndDebugLoc(MI);
+ auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
+ auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
+ B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
+ MI.eraseFromParent();
return true;
}
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
-/// a G_UZP1 or G_UZP2 instruction.
-///
-/// \param [in] MI - The shuffle vector instruction.
-/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
-static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- unsigned WhichResult;
- ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
- Register Dst = MI.getOperand(0).getReg();
- unsigned NumElts = MRI.getType(Dst).getNumElements();
- if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
- return false;
- unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
- Register V1 = MI.getOperand(1).getReg();
- Register V2 = MI.getOperand(2).getReg();
- MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
- return true;
+static bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
+ // TODO: check if extended build vector as well.
+ unsigned Opc = MRI.getVRegDef(R)->getOpcode();
+ return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
}
-static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- unsigned WhichResult;
- ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
- Register Dst = MI.getOperand(0).getReg();
- unsigned NumElts = MRI.getType(Dst).getNumElements();
- if (!isZipMask(ShuffleMask, NumElts, WhichResult))
- return false;
- unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
- Register V1 = MI.getOperand(1).getReg();
- Register V2 = MI.getOperand(2).getReg();
- MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
- return true;
+static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
+ // TODO: check if extended build vector as well.
+ return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
}
-/// Helper function for matchDup.
-static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
- MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- if (Lane != 0)
- return false;
-
- // Try to match a vector splat operation into a dup instruction.
- // We're looking for this pattern:
- //
- // %scalar:gpr(s64) = COPY $x0
- // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
- // %cst0:gpr(s32) = G_CONSTANT i32 0
- // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
- // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
- // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
- //
- // ...into:
- // %splat = G_DUP %scalar
-
- // Begin matching the insert.
- auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
- MI.getOperand(1).getReg(), MRI);
- if (!InsMI)
- return false;
- // Match the undef vector operand.
- if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
- MRI))
- return false;
-
- // Match the index constant 0.
- int64_t Index = 0;
- if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
- return false;
-
- MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
- {InsMI->getOperand(2).getReg()});
- return true;
-}
+bool matchAArch64MulConstCombine(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL);
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(LHS);
-/// Helper function for matchDup.
-static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
- MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(Lane >= 0 && "Expected positive lane?");
- // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
- // lane's definition directly.
- auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
- MI.getOperand(1).getReg(), MRI);
- if (!BuildVecMI)
+ // The below optimizations require a constant RHS.
+ auto Const = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!Const)
return false;
- Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
- MatchInfo =
- ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
- return true;
-}
-static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- auto MaybeLane = getSplatIndex(MI);
- if (!MaybeLane)
- return false;
- int Lane = *MaybeLane;
- // If this is undef splat, generate it via "just" vdup, if possible.
- if (Lane < 0)
- Lane = 0;
- if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
- return true;
- if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
- return true;
- return false;
-}
+ const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits());
+ // The following code is ported from AArch64ISelLowering.
+ // Multiplication of a power of two plus/minus one can be done more
+ // cheaply as as shift+add/sub. For now, this is true unilaterally. If
+ // future CPUs have a cheaper MADD instruction, this may need to be
+ // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
+ // 64-bit is 5 cycles, so this is always a win.
+ // More aggressively, some multiplications N0 * C can be lowered to
+ // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
+ // e.g. 6=3*2=(2+1)*2.
+ // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
+ // which equals to (1+2)*16-(1+2).
+ // TrailingZeroes is used to test if the mul can be lowered to
+ // shift+add+shift.
+ unsigned TrailingZeroes = ConstValue.countTrailingZeros();
+ if (TrailingZeroes) {
+ // Conservatively do not lower to shift+add+shift if the mul might be
+ // folded into smul or umul.
+ if (MRI.hasOneNonDBGUse(LHS) &&
+ (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
+ return false;
+ // Conservatively do not lower to shift+add+shift if the mul might be
+ // folded into madd or msub.
+ if (MRI.hasOneNonDBGUse(Dst)) {
+ MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
+ if (UseMI.getOpcode() == TargetOpcode::G_ADD ||
+ UseMI.getOpcode() == TargetOpcode::G_SUB)
+ return false;
+ }
+ }
+ // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
+ // and shift+add+shift.
+ APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
+
+ unsigned ShiftAmt, AddSubOpc;
+ // Is the shifted value the LHS operand of the add/sub?
+ bool ShiftValUseIsLHS = true;
+ // Do we need to negate the result?
+ bool NegateResult = false;
+
+ if (ConstValue.isNonNegative()) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
+ APInt SCVMinus1 = ShiftedConstValue - 1;
+ APInt CVPlus1 = ConstValue + 1;
+ if (SCVMinus1.isPowerOf2()) {
+ ShiftAmt = SCVMinus1.logBase2();
+ AddSubOpc = TargetOpcode::G_ADD;
+ } else if (CVPlus1.isPowerOf2()) {
+ ShiftAmt = CVPlus1.logBase2();
+ AddSubOpc = TargetOpcode::G_SUB;
+ } else
+ return false;
+ } else {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ APInt CVNegPlus1 = -ConstValue + 1;
+ APInt CVNegMinus1 = -ConstValue - 1;
+ if (CVNegPlus1.isPowerOf2()) {
+ ShiftAmt = CVNegPlus1.logBase2();
+ AddSubOpc = TargetOpcode::G_SUB;
+ ShiftValUseIsLHS = false;
+ } else if (CVNegMinus1.isPowerOf2()) {
+ ShiftAmt = CVNegMinus1.logBase2();
+ AddSubOpc = TargetOpcode::G_ADD;
+ NegateResult = true;
+ } else
+ return false;
+ }
-static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- Register Dst = MI.getOperand(0).getReg();
- auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
- MRI.getType(Dst).getNumElements());
- if (!ExtInfo)
+ if (NegateResult && TrailingZeroes)
return false;
- bool ReverseExt;
- uint64_t Imm;
- std::tie(ReverseExt, Imm) = *ExtInfo;
- Register V1 = MI.getOperand(1).getReg();
- Register V2 = MI.getOperand(2).getReg();
- if (ReverseExt)
- std::swap(V1, V2);
- uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
- Imm *= ExtFactor;
- MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
- return true;
-}
-/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
-/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
-static bool applyShuffleVectorPseudo(MachineInstr &MI,
- ShuffleVectorPseudo &MatchInfo) {
- MachineIRBuilder MIRBuilder(MI);
- MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
- MI.eraseFromParent();
+ ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
+ auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
+ auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
+
+ Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
+ Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
+ auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
+ assert(!(NegateResult && TrailingZeroes) &&
+ "NegateResult and TrailingZeroes cannot both be true for now.");
+ // Negate the result.
+ if (NegateResult) {
+ B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
+ return;
+ }
+ // Shift the result.
+ if (TrailingZeroes) {
+ B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
+ return;
+ }
+ B.buildCopy(DstReg, Res.getReg(0));
+ };
return true;
}
-/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
-/// Special-cased because the constant operand must be emitted as a G_CONSTANT
-/// for the imported tablegen patterns to work.
-static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
- MachineIRBuilder MIRBuilder(MI);
- // Tablegen patterns expect an i32 G_CONSTANT as the final op.
- auto Cst =
- MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
- MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
- {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
+bool applyAArch64MulConstCombine(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
+ B.setInstrAndDebugLoc(MI);
+ ApplyFn(B, MI.getOperand(0).getReg());
MI.eraseFromParent();
return true;
}
@@ -501,7 +348,7 @@ INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,
false)
namespace llvm {
-FunctionPass *createAArch64PostLegalizeCombiner(bool IsOptNone) {
+FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
return new AArch64PostLegalizerCombiner(IsOptNone);
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
new file mode 100644
index 000000000000..a06ff4b5417a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -0,0 +1,704 @@
+//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Post-legalization lowering for instructions.
+///
+/// This is used to offload pattern matching from the selector.
+///
+/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
+/// a G_ZIP, G_UZP, etc.
+///
+/// General optimization combines should be handled by either the
+/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AArch64TargetMachine.h"
+#include "AArch64GlobalISelUtils.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+using namespace AArch64GISelUtils;
+
+/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
+///
+/// Used for matching target-supported shuffles before codegen.
+struct ShuffleVectorPseudo {
+ unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
+ Register Dst; ///< Destination register.
+ SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
+ ShuffleVectorPseudo(unsigned Opc, Register Dst,
+ std::initializer_list<SrcOp> SrcOps)
+ : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
+ ShuffleVectorPseudo() {}
+};
+
+/// Check if a vector shuffle corresponds to a REV instruction with the
+/// specified blocksize.
+static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
+ unsigned BlockSize) {
+ assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+ "Only possible block sizes for REV are: 16, 32, 64");
+ assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
+
+ unsigned BlockElts = M[0] + 1;
+
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSize;
+
+ if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ // Ignore undef indices.
+ if (M[i] < 0)
+ continue;
+ if (static_cast<unsigned>(M[i]) !=
+ (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
+/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
+static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
+ unsigned &WhichResult) {
+ if (NumElts % 2 != 0)
+ return false;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
+ (M[i + 1] >= 0 &&
+ static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
+/// sources of the shuffle are different.
+static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
+ unsigned NumElts) {
+ // Look for the first non-undef element.
+ auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
+ if (FirstRealElt == M.end())
+ return None;
+
+ // Use APInt to handle overflow when calculating expected element.
+ unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
+ APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
+
+ // The following shuffle indices must be the successive elements after the
+ // first real element.
+ if (any_of(
+ make_range(std::next(FirstRealElt), M.end()),
+ [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
+ return None;
+
+ // The index of an EXT is the first element if it is not UNDEF.
+ // Watch out for the beginning UNDEFs. The EXT index should be the expected
+ // value of the first element. E.g.
+ // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
+ // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
+ // ExpectedElt is the last mask index plus 1.
+ uint64_t Imm = ExpectedElt.getZExtValue();
+ bool ReverseExt = false;
+
+ // There are two difference cases requiring to reverse input vectors.
+ // For example, for vector <4 x i32> we have the following cases,
+ // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
+ // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
+ // For both cases, we finally use mask <5, 6, 7, 0>, which requires
+ // to reverse two input vectors.
+ if (Imm < NumElts)
+ ReverseExt = true;
+ else
+ Imm -= NumElts;
+ return std::make_pair(ReverseExt, Imm);
+}
+
+/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
+/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
+static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
+ unsigned &WhichResult) {
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // Skip undef indices.
+ if (M[i] < 0)
+ continue;
+ if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
+ return false;
+ }
+ return true;
+}
+
+/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
+/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
+static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
+ unsigned &WhichResult) {
+ if (NumElts % 2 != 0)
+ return false;
+
+ // 0 means use ZIP1, 1 means use ZIP2.
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
+ (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
+ return true;
+}
+
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
+/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
+static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(Dst);
+ unsigned EltSize = Ty.getScalarSizeInBits();
+
+ // Element size for a rev cannot be 64.
+ if (EltSize == 64)
+ return false;
+
+ unsigned NumElts = Ty.getNumElements();
+
+ // Try to produce G_REV64
+ if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
+ return true;
+ }
+
+ // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
+ // This should be identical to above, but with a constant 32 and constant
+ // 16.
+ return false;
+}
+
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
+/// a G_TRN1 or G_TRN2 instruction.
+static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ unsigned WhichResult;
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned NumElts = MRI.getType(Dst).getNumElements();
+ if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
+ return false;
+ unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+ return true;
+}
+
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
+/// a G_UZP1 or G_UZP2 instruction.
+///
+/// \param [in] MI - The shuffle vector instruction.
+/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
+static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ unsigned WhichResult;
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned NumElts = MRI.getType(Dst).getNumElements();
+ if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
+ return false;
+ unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+ return true;
+}
+
+static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ unsigned WhichResult;
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned NumElts = MRI.getType(Dst).getNumElements();
+ if (!isZipMask(ShuffleMask, NumElts, WhichResult))
+ return false;
+ unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+ return true;
+}
+
+/// Helper function for matchDup.
+static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ if (Lane != 0)
+ return false;
+
+ // Try to match a vector splat operation into a dup instruction.
+ // We're looking for this pattern:
+ //
+ // %scalar:gpr(s64) = COPY $x0
+ // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
+ // %cst0:gpr(s32) = G_CONSTANT i32 0
+ // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
+ // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
+ // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
+ //
+ // ...into:
+ // %splat = G_DUP %scalar
+
+ // Begin matching the insert.
+ auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
+ MI.getOperand(1).getReg(), MRI);
+ if (!InsMI)
+ return false;
+ // Match the undef vector operand.
+ if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
+ MRI))
+ return false;
+
+ // Match the index constant 0.
+ if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
+ return false;
+
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
+ {InsMI->getOperand(2).getReg()});
+ return true;
+}
+
+/// Helper function for matchDup.
+static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(Lane >= 0 && "Expected positive lane?");
+ // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
+ // lane's definition directly.
+ auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
+ MI.getOperand(1).getReg(), MRI);
+ if (!BuildVecMI)
+ return false;
+ Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
+ MatchInfo =
+ ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
+ return true;
+}
+
+static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ auto MaybeLane = getSplatIndex(MI);
+ if (!MaybeLane)
+ return false;
+ int Lane = *MaybeLane;
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane < 0)
+ Lane = 0;
+ if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
+ return true;
+ if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
+ return true;
+ return false;
+}
+
+static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ Register Dst = MI.getOperand(0).getReg();
+ auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
+ MRI.getType(Dst).getNumElements());
+ if (!ExtInfo)
+ return false;
+ bool ReverseExt;
+ uint64_t Imm;
+ std::tie(ReverseExt, Imm) = *ExtInfo;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ if (ReverseExt)
+ std::swap(V1, V2);
+ uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
+ Imm *= ExtFactor;
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
+ return true;
+}
+
+/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
+/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
+static bool applyShuffleVectorPseudo(MachineInstr &MI,
+ ShuffleVectorPseudo &MatchInfo) {
+ MachineIRBuilder MIRBuilder(MI);
+ MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
+ MI.eraseFromParent();
+ return true;
+}
+
+/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
+/// Special-cased because the constant operand must be emitted as a G_CONSTANT
+/// for the imported tablegen patterns to work.
+static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
+ MachineIRBuilder MIRBuilder(MI);
+ // Tablegen patterns expect an i32 G_CONSTANT as the final op.
+ auto Cst =
+ MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
+ MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
+ {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
+ MI.eraseFromParent();
+ return true;
+}
+
+/// isVShiftRImm - Check if this is a valid vector for the immediate
+/// operand of a vector shift right operation. The value must be in the range:
+/// 1 <= Value <= ElementBits for a right shift.
+static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
+ int64_t &Cnt) {
+ assert(Ty.isVector() && "vector shift count is not a vector type");
+ MachineInstr *MI = MRI.getVRegDef(Reg);
+ auto Cst = getBuildVectorConstantSplat(*MI, MRI);
+ if (!Cst)
+ return false;
+ Cnt = *Cst;
+ int64_t ElementBits = Ty.getScalarSizeInBits();
+ return Cnt >= 1 && Cnt <= ElementBits;
+}
+
+/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
+static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
+ int64_t &Imm) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
+ MI.getOpcode() == TargetOpcode::G_LSHR);
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ if (!Ty.isVector())
+ return false;
+ return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
+}
+
+static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
+ int64_t &Imm) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
+ unsigned NewOpc =
+ Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
+ MachineIRBuilder MIB(MI);
+ auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
+ MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
+ MI.eraseFromParent();
+ return true;
+}
+
+/// Determine if it is possible to modify the \p RHS and predicate \p P of a
+/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
+///
+/// \returns A pair containing the updated immediate and predicate which may
+/// be used to optimize the instruction.
+///
+/// \note This assumes that the comparison has been legalized.
+Optional<std::pair<uint64_t, CmpInst::Predicate>>
+tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
+ const MachineRegisterInfo &MRI) {
+ const auto &Ty = MRI.getType(RHS);
+ if (Ty.isVector())
+ return None;
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
+
+ // If the RHS is not a constant, or the RHS is already a valid arithmetic
+ // immediate, then there is nothing to change.
+ auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!ValAndVReg)
+ return None;
+ uint64_t C = ValAndVReg->Value.getZExtValue();
+ if (isLegalArithImmed(C))
+ return None;
+
+ // We have a non-arithmetic immediate. Check if adjusting the immediate and
+ // adjusting the predicate will result in a legal arithmetic immediate.
+ switch (P) {
+ default:
+ return None;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SGE:
+ // Check for
+ //
+ // x slt c => x sle c - 1
+ // x sge c => x sgt c - 1
+ //
+ // When c is not the smallest possible negative number.
+ if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
+ (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
+ return None;
+ P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
+ C -= 1;
+ break;
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_UGE:
+ // Check for
+ //
+ // x ult c => x ule c - 1
+ // x uge c => x ugt c - 1
+ //
+ // When c is not zero.
+ if (C == 0)
+ return None;
+ P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
+ C -= 1;
+ break;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::ICMP_SGT:
+ // Check for
+ //
+ // x sle c => x slt c + 1
+ // x sgt c => s sge c + 1
+ //
+ // When c is not the largest possible signed integer.
+ if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
+ (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
+ return None;
+ P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
+ C += 1;
+ break;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_UGT:
+ // Check for
+ //
+ // x ule c => x ult c + 1
+ // x ugt c => s uge c + 1
+ //
+ // When c is not the largest possible unsigned integer.
+ if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
+ (Size == 64 && C == UINT64_MAX))
+ return None;
+ P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
+ C += 1;
+ break;
+ }
+
+ // Check if the new constant is valid, and return the updated constant and
+ // predicate if it is.
+ if (Size == 32)
+ C = static_cast<uint32_t>(C);
+ if (!isLegalArithImmed(C))
+ return None;
+ return {{C, P}};
+}
+
+/// Determine whether or not it is possible to update the RHS and predicate of
+/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
+/// immediate.
+///
+/// \p MI - The G_ICMP instruction
+/// \p MatchInfo - The new RHS immediate and predicate on success
+///
+/// See tryAdjustICmpImmAndPred for valid transformations.
+bool matchAdjustICmpImmAndPred(
+ MachineInstr &MI, const MachineRegisterInfo &MRI,
+ std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ Register RHS = MI.getOperand(3).getReg();
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
+ MatchInfo = *MaybeNewImmAndPred;
+ return true;
+ }
+ return false;
+}
+
+bool applyAdjustICmpImmAndPred(
+ MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
+ MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
+ MIB.setInstrAndDebugLoc(MI);
+ MachineOperand &RHS = MI.getOperand(3);
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
+ MatchInfo.first);
+ Observer.changingInstr(MI);
+ RHS.setReg(Cst->getOperand(0).getReg());
+ MI.getOperand(1).setPredicate(MatchInfo.second);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::pair<unsigned, int> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ Register Src1Reg = MI.getOperand(1).getReg();
+ const LLT SrcTy = MRI.getType(Src1Reg);
+ const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto LaneIdx = getSplatIndex(MI);
+ if (!LaneIdx)
+ return false;
+
+ // The lane idx should be within the first source vector.
+ if (*LaneIdx >= SrcTy.getNumElements())
+ return false;
+
+ if (DstTy != SrcTy)
+ return false;
+
+ LLT ScalarTy = SrcTy.getElementType();
+ unsigned ScalarSize = ScalarTy.getSizeInBits();
+
+ unsigned Opc = 0;
+ switch (SrcTy.getNumElements()) {
+ case 2:
+ if (ScalarSize == 64)
+ Opc = AArch64::G_DUPLANE64;
+ break;
+ case 4:
+ if (ScalarSize == 32)
+ Opc = AArch64::G_DUPLANE32;
+ break;
+ case 8:
+ if (ScalarSize == 16)
+ Opc = AArch64::G_DUPLANE16;
+ break;
+ case 16:
+ if (ScalarSize == 8)
+ Opc = AArch64::G_DUPLANE8;
+ break;
+ default:
+ break;
+ }
+ if (!Opc)
+ return false;
+
+ MatchInfo.first = Opc;
+ MatchInfo.second = *LaneIdx;
+ return true;
+}
+
+bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ B.setInstrAndDebugLoc(MI);
+ auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
+ B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()},
+ {MI.getOperand(1).getReg(), Lane});
+ MI.eraseFromParent();
+ return true;
+}
+
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
+#include "AArch64GenPostLegalizeGILowering.inc"
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
+
+namespace {
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
+#include "AArch64GenPostLegalizeGILowering.inc"
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
+
+class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
+public:
+ AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg;
+
+ AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
+ : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
+ MinSize) {
+ if (!GeneratedRuleCfg.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+ }
+
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ CombinerHelper Helper(Observer, B);
+ AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg);
+ return Generated.tryCombineAll(Observer, MI, B, Helper);
+}
+
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
+#include "AArch64GenPostLegalizeGILowering.inc"
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
+
+class AArch64PostLegalizerLowering : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AArch64PostLegalizerLowering();
+
+ StringRef getPassName() const override {
+ return "AArch64PostLegalizerLowering";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+} // end anonymous namespace
+
+void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
+ : MachineFunctionPass(ID) {
+ initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
+}
+
+bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized) &&
+ "Expected a legalized function?");
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize());
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AArch64PostLegalizerLowering::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
+ "Lower AArch64 MachineInstrs after legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
+ "Lower AArch64 MachineInstrs after legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createAArch64PostLegalizerLowering() {
+ return new AArch64PostLegalizerLowering();
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
new file mode 100644
index 000000000000..2f882ecb1fd4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -0,0 +1,187 @@
+//=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does post-instruction-selection optimizations in the GlobalISel
+// pipeline, before the rest of codegen runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aarch64-post-select-optimize"
+
+using namespace llvm;
+
+namespace {
+class AArch64PostSelectOptimize : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AArch64PostSelectOptimize();
+
+ StringRef getPassName() const override {
+ return "AArch64 Post Select Optimizer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ bool optimizeNZCVDefs(MachineBasicBlock &MBB);
+};
+} // end anonymous namespace
+
+void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AArch64PostSelectOptimize::AArch64PostSelectOptimize()
+ : MachineFunctionPass(ID) {
+ initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
+}
+
+unsigned getNonFlagSettingVariant(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return 0;
+ case AArch64::SUBSXrr:
+ return AArch64::SUBXrr;
+ case AArch64::SUBSWrr:
+ return AArch64::SUBWrr;
+ case AArch64::SUBSXrs:
+ return AArch64::SUBXrs;
+ case AArch64::SUBSXri:
+ return AArch64::SUBXri;
+ case AArch64::SUBSWri:
+ return AArch64::SUBWri;
+ }
+}
+
+bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
+ // Consider the following code:
+ // FCMPSrr %0, %1, implicit-def $nzcv
+ // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
+ // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
+ // FCMPSrr %0, %1, implicit-def $nzcv
+ // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
+ // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
+ // when we have a single IR fcmp being used by two selects. During selection,
+ // to ensure that there can be no clobbering of nzcv between the fcmp and the
+ // csel, we have to generate an fcmp immediately before each csel is
+ // selected.
+ // However, often we can essentially CSE these together later in MachineCSE.
+ // This doesn't work though if there are unrelated flag-setting instructions
+ // in between the two FCMPs. In this case, the SUBS defines NZCV
+ // but it doesn't have any users, being overwritten by the second FCMP.
+ //
+ // Our solution here is to try to convert flag setting operations between
+ // a interval of identical FCMPs, so that CSE will be able to eliminate one.
+ bool Changed = false;
+ const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+
+ // The first step is to find the first and last FCMPs. If we have found
+ // at least two, then set the limit of the bottom-up walk to the first FCMP
+ // found since we're only interested in dealing with instructions between
+ // them.
+ MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
+ for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
+ if (MI.getOpcode() == AArch64::FCMPSrr ||
+ MI.getOpcode() == AArch64::FCMPDrr) {
+ if (!FirstCmp)
+ FirstCmp = &MI;
+ else
+ LastCmp = &MI;
+ }
+ }
+
+ // In addition to converting flag-setting ops in fcmp ranges into non-flag
+ // setting ops, across the whole basic block we also detect when nzcv
+ // implicit-defs are dead, and mark them as dead. Peephole optimizations need
+ // this information later.
+
+ LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
+ LRU.addLiveOuts(MBB);
+ bool NZCVDead = LRU.available(AArch64::NZCV);
+ bool InsideCmpRange = false;
+ for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
+ LRU.stepBackward(II);
+
+ if (LastCmp) { // There's a range present in this block.
+ // If we're inside an fcmp range, look for begin instruction.
+ if (InsideCmpRange && &II == FirstCmp)
+ InsideCmpRange = false;
+ else if (&II == LastCmp)
+ InsideCmpRange = true;
+ }
+
+ // Did this instruction define NZCV?
+ bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
+ if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
+ // If we have a def and NZCV is dead, then we may convert this op.
+ unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
+ int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
+ if (DeadNZCVIdx != -1) {
+ // If we're inside an fcmp range, then convert flag setting ops.
+ if (InsideCmpRange && NewOpc) {
+ LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
+ "op in fcmp range: "
+ << II);
+ II.setDesc(TII->get(NewOpc));
+ II.RemoveOperand(DeadNZCVIdx);
+ Changed |= true;
+ } else {
+ // Otherwise, we just set the nzcv imp-def operand to be dead, so the
+ // peephole optimizations can optimize them further.
+ II.getOperand(DeadNZCVIdx).setIsDead();
+ }
+ }
+ }
+
+ NZCVDead = NZCVDeadAtCurrInstr;
+ }
+ return Changed;
+}
+
+bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected) &&
+ "Expected a selected MF");
+
+ bool Changed = false;
+ for (auto &BB : MF)
+ Changed |= optimizeNZCVDefs(BB);
+ return true;
+}
+
+char AArch64PostSelectOptimize::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
+ "Optimize AArch64 selected instructions",
+ false, false)
+INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
+ "Optimize AArch64 selected instructions", false,
+ false)
+
+namespace llvm {
+FunctionPass *createAArch64PostSelectOptimize() {
+ return new AArch64PostSelectOptimize();
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 9a1f200d5222..5f9b64e274b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -96,24 +96,6 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
CombinerHelper Helper(Observer, B, KB, MDT);
AArch64GenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
- switch (MI.getOpcode()) {
- case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
- switch (MI.getIntrinsicID()) {
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::memset: {
- // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
- // heuristics decide.
- unsigned MaxLen = EnableOpt ? 0 : 32;
- // Try to inline memcpy type calls if optimizations are enabled.
- return (!EnableMinSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen)
- : false;
- }
- default:
- break;
- }
- }
-
if (Generated.tryCombineAll(Observer, MI, B))
return true;
@@ -122,6 +104,15 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET: {
+ // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
+ // heuristics decide.
+ unsigned MaxLen = EnableOpt ? 0 : 32;
+ // Try to inline memcpy type calls if optimizations are enabled.
+ return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false;
+ }
}
return false;
@@ -197,7 +188,7 @@ INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
namespace llvm {
-FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone) {
+FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) {
return new AArch64PreLegalizerCombiner(IsOptNone);
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 93213f5977e5..c76c43389b37 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -13,6 +13,7 @@
#include "AArch64RegisterBankInfo.h"
#include "AArch64InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@@ -465,9 +466,10 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
getValueMapping(RBIdx, Size), NumOperands);
}
-bool AArch64RegisterBankInfo::hasFPConstraints(
- const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const {
+bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
unsigned Op = MI.getOpcode();
// Do we have an explicit floating point instruction?
@@ -479,14 +481,30 @@ bool AArch64RegisterBankInfo::hasFPConstraints(
if (Op != TargetOpcode::COPY && !MI.isPHI())
return false;
- // MI is copy-like. Return true if it outputs an FPR.
- return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
- &AArch64::FPRRegBank;
+ // Check if we already know the register bank.
+ auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
+ if (RB == &AArch64::FPRRegBank)
+ return true;
+ if (RB == &AArch64::GPRRegBank)
+ return false;
+
+ // We don't know anything.
+ //
+ // If we have a phi, we may be able to infer that it will be assigned a FPR
+ // based off of its inputs.
+ if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+ return false;
+
+ return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
+ return Op.isReg() &&
+ onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
+ });
}
bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const {
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
@@ -495,12 +513,13 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
default:
break;
}
- return hasFPConstraints(MI, MRI, TRI);
+ return hasFPConstraints(MI, MRI, TRI, Depth);
}
-bool AArch64RegisterBankInfo::onlyDefinesFP(
- const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const {
+bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
switch (MI.getOpcode()) {
case AArch64::G_DUP:
case TargetOpcode::G_SITOFP:
@@ -511,7 +530,7 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(
default:
break;
}
- return hasFPConstraints(MI, MRI, TRI);
+ return hasFPConstraints(MI, MRI, TRI, Depth);
}
const RegisterBankInfo::InstructionMapping &
@@ -661,11 +680,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_UITOFP:
+ case TargetOpcode::G_UITOFP: {
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
break;
- OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
+ // Integer to FP conversions don't necessarily happen between GPR -> FPR
+ // regbanks. They can also be done within an FPR register.
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ else
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
break;
+ }
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
@@ -703,7 +729,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// assume this was a floating point load in the IR.
// If it was not, we would have had a bitcast before
// reaching that instruction.
- if (onlyUsesFP(UseMI, MRI, TRI)) {
+ // Int->FP conversion operations are also captured in onlyDefinesFP().
+ if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
@@ -826,7 +853,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
- case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_BUILD_VECTOR: {
// If the first source operand belongs to a FPR register bank, then make
// sure that we preserve that.
if (OpRegBankIdx[1] != PMI_FirstGPR)
@@ -837,10 +864,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Get the instruction that defined the source operand reg, and check if
// it's a floating point operation. Or, if it's a type like s16 which
- // doesn't have a exact size gpr register class.
+ // doesn't have a exact size gpr register class. The exception is if the
+ // build_vector has all constant operands, which may be better to leave as
+ // gpr without copies, so it can be matched in imported patterns.
MachineInstr *DefMI = MRI.getVRegDef(VReg);
unsigned DefOpc = DefMI->getOpcode();
const LLT SrcTy = MRI.getType(VReg);
+ if (all_of(MI.operands(), [&](const MachineOperand &Op) {
+ return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
+ TargetOpcode::G_CONSTANT;
+ }))
+ break;
if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
SrcTy.getSizeInBits() < 32) {
// Have a floating point op.
@@ -851,6 +885,30 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ case TargetOpcode::G_VECREDUCE_FMAX:
+ case TargetOpcode::G_VECREDUCE_FMIN:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ case TargetOpcode::G_VECREDUCE_MUL:
+ case TargetOpcode::G_VECREDUCE_AND:
+ case TargetOpcode::G_VECREDUCE_OR:
+ case TargetOpcode::G_VECREDUCE_XOR:
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ case TargetOpcode::G_VECREDUCE_UMIN:
+ // Reductions produce a scalar value from a vector, the scalar should be on
+ // FPR bank.
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ break;
+ case TargetOpcode::G_VECREDUCE_SEQ_FADD:
+ case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
+ // These reductions also take a scalar accumulator input.
+ // Assign them FPR for now.
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
+ break;
+ }
// Finally construct the computed mapping.
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index e956fca1aa10..019017bc3ec4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -114,17 +114,20 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
const InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr &MI) const;
- /// Returns true if the output of \p MI must be stored on a FPR register.
+ /// Maximum recursion depth for hasFPConstraints.
+ const unsigned MaxFPRSearchDepth = 2;
+
+ /// \returns true if \p MI only uses and defines FPRs.
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const;
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
- /// Returns true if the source registers of \p MI must all be FPRs.
+ /// \returns true if \p MI only uses FPRs.
bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const;
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
- /// Returns true if the destination register of \p MI must be a FPR.
+ /// \returns true if \p MI only defines FPRs.
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const;
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
public:
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/select-saddo.mir b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/select-saddo.mir
new file mode 100644
index 000000000000..6f05bd7ac838
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/select-saddo.mir
@@ -0,0 +1,158 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s
+
+...
+---
+name: saddo_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+
+ ; CHECK-LABEL: name: saddo_s32
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: %reg0:gpr32 = COPY $w0
+ ; CHECK: %reg1:gpr32 = COPY $w1
+ ; CHECK: %saddo:gpr32 = ADDSWrr %reg0, %reg1, implicit-def $nzcv
+ ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $w0 = COPY %saddo
+ ; CHECK: RET_ReallyLR implicit $w0
+ %reg0:gpr(s32) = COPY $w0
+ %reg1:gpr(s32) = COPY $w1
+ %saddo:gpr(s32), %4:gpr(s1) = G_SADDO %reg0, %reg1
+ $w0 = COPY %saddo(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: saddo_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: saddo_s64
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %reg0:gpr64 = COPY $x0
+ ; CHECK: %reg1:gpr64 = COPY $x1
+ ; CHECK: %saddo:gpr64 = ADDSXrr %reg0, %reg1, implicit-def $nzcv
+ ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $x0 = COPY %saddo
+ ; CHECK: RET_ReallyLR implicit $x0
+ %reg0:gpr(s64) = COPY $x0
+ %reg1:gpr(s64) = COPY $x1
+ %saddo:gpr(s64), %4:gpr(s1) = G_SADDO %reg0, %reg1
+ $x0 = COPY %saddo(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: saddo_s32_imm
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+ ; Check that we get ADDSWri when we can fold in a constant.
+ ;
+ ; CHECK-LABEL: name: saddo_s32_imm
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: %copy:gpr32sp = COPY $w0
+ ; CHECK: %saddo:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
+ ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $w0 = COPY %saddo
+ ; CHECK: RET_ReallyLR implicit $w0
+ %copy:gpr(s32) = COPY $w0
+ %constant:gpr(s32) = G_CONSTANT i32 16
+ %saddo:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant
+ $w0 = COPY %saddo(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: saddo_s32_shifted
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+ ; Check that we get ADDSWrs when we can fold in a shift.
+ ;
+ ; CHECK-LABEL: name: saddo_s32_shifted
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: %reg0:gpr32 = COPY $w0
+ ; CHECK: %reg1:gpr32 = COPY $w1
+ ; CHECK: %add:gpr32 = ADDSWrs %reg0, %reg1, 16, implicit-def $nzcv
+ ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $w0 = COPY %add
+ ; CHECK: RET_ReallyLR implicit $w0
+ %reg0:gpr(s32) = COPY $w0
+ %reg1:gpr(s32) = COPY $w1
+ %constant:gpr(s32) = G_CONSTANT i32 16
+ %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32)
+ %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %reg0, %shift
+ $w0 = COPY %add(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: saddo_s32_neg_imm
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+ ; Check that we get SUBSWri when we can fold in a negative constant.
+ ;
+ ; CHECK-LABEL: name: saddo_s32_neg_imm
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: %copy:gpr32sp = COPY $w0
+ ; CHECK: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
+ ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $w0 = COPY %add
+ ; CHECK: RET_ReallyLR implicit $w0
+ %copy:gpr(s32) = COPY $w0
+ %constant:gpr(s32) = G_CONSTANT i32 -16
+ %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant
+ $w0 = COPY %add(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: saddo_arith_extended
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $x0
+ ; Check that we get ADDSXrx.
+ ; CHECK-LABEL: name: saddo_arith_extended
+ ; CHECK: liveins: $w0, $x0
+ ; CHECK: %reg0:gpr64sp = COPY $x0
+ ; CHECK: %reg1:gpr32 = COPY $w0
+ ; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
+ ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $x0 = COPY %add
+ ; CHECK: RET_ReallyLR implicit $x0
+ %reg0:gpr(s64) = COPY $x0
+ %reg1:gpr(s32) = COPY $w0
+ %ext:gpr(s64) = G_ZEXT %reg1(s32)
+ %cst:gpr(s64) = G_CONSTANT i64 2
+ %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+ %add:gpr(s64), %flags:gpr(s1) = G_SADDO %reg0, %shift
+ $x0 = COPY %add(s64)
+ RET_ReallyLR implicit $x0
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/select-ssubo.mir b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/select-ssubo.mir
new file mode 100644
index 000000000000..f6b1794645f7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/select-ssubo.mir
@@ -0,0 +1,158 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s
+
+...
+---
+name: ssubo_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+
+ ; CHECK-LABEL: name: ssubo_s32
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: %reg0:gpr32 = COPY $w0
+ ; CHECK: %reg1:gpr32 = COPY $w1
+ ; CHECK: %ssubo:gpr32 = SUBSWrr %reg0, %reg1, implicit-def $nzcv
+ ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $w0 = COPY %ssubo
+ ; CHECK: RET_ReallyLR implicit $w0
+ %reg0:gpr(s32) = COPY $w0
+ %reg1:gpr(s32) = COPY $w1
+ %ssubo:gpr(s32), %4:gpr(s1) = G_SSUBO %reg0, %reg1
+ $w0 = COPY %ssubo(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: ssubo_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: ssubo_s64
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %reg0:gpr64 = COPY $x0
+ ; CHECK: %reg1:gpr64 = COPY $x1
+ ; CHECK: %ssubo:gpr64 = SUBSXrr %reg0, %reg1, implicit-def $nzcv
+ ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $x0 = COPY %ssubo
+ ; CHECK: RET_ReallyLR implicit $x0
+ %reg0:gpr(s64) = COPY $x0
+ %reg1:gpr(s64) = COPY $x1
+ %ssubo:gpr(s64), %4:gpr(s1) = G_SSUBO %reg0, %reg1
+ $x0 = COPY %ssubo(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: ssubo_s32_imm
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+ ; Check that we get SUBSWri when we can fold in a constant.
+ ;
+ ; CHECK-LABEL: name: ssubo_s32_imm
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: %copy:gpr32sp = COPY $w0
+ ; CHECK: %ssubo:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
+ ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $w0 = COPY %ssubo
+ ; CHECK: RET_ReallyLR implicit $w0
+ %copy:gpr(s32) = COPY $w0
+ %constant:gpr(s32) = G_CONSTANT i32 16
+ %ssubo:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant
+ $w0 = COPY %ssubo(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: ssubo_s32_shifted
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+ ; Check that we get SUBSWrs when we can fold in a shift.
+ ;
+ ; CHECK-LABEL: name: ssubo_s32_shifted
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: %reg0:gpr32 = COPY $w0
+ ; CHECK: %reg1:gpr32 = COPY $w1
+ ; CHECK: %sub:gpr32 = SUBSWrs %reg0, %reg1, 16, implicit-def $nzcv
+ ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $w0 = COPY %sub
+ ; CHECK: RET_ReallyLR implicit $w0
+ %reg0:gpr(s32) = COPY $w0
+ %reg1:gpr(s32) = COPY $w1
+ %constant:gpr(s32) = G_CONSTANT i32 16
+ %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32)
+ %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %reg0, %shift
+ $w0 = COPY %sub(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: ssubo_s32_neg_imm
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+ ; Check that we get ADDSWri when we can fold in a negative constant.
+ ;
+ ; CHECK-LABEL: name: ssubo_s32_neg_imm
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: %copy:gpr32sp = COPY $w0
+ ; CHECK: %sub:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
+ ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $w0 = COPY %sub
+ ; CHECK: RET_ReallyLR implicit $w0
+ %copy:gpr(s32) = COPY $w0
+ %constant:gpr(s32) = G_CONSTANT i32 -16
+ %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant
+ $w0 = COPY %sub(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: ssubo_arith_extended
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $x0
+ ; Check that we get SUBSXrx.
+ ; CHECK-LABEL: name: ssubo_arith_extended
+ ; CHECK: liveins: $w0, $x0
+ ; CHECK: %reg0:gpr64sp = COPY $x0
+ ; CHECK: %reg1:gpr32 = COPY $w0
+ ; CHECK: %sub:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv
+ ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK: $x0 = COPY %sub
+ ; CHECK: RET_ReallyLR implicit $x0
+ %reg0:gpr(s64) = COPY $x0
+ %reg1:gpr(s32) = COPY $w0
+ %ext:gpr(s64) = G_ZEXT %reg1(s32)
+ %cst:gpr(s64) = G_CONSTANT i64 2
+ %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+ %sub:gpr(s64), %flags:gpr(s1) = G_SSUBO %reg0, %shift
+ $x0 = COPY %sub(s64)
+ RET_ReallyLR implicit $x0
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 9814f7625853..2cbe8315bc7e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -763,7 +763,8 @@ static inline bool isSVECpyImm(int64_t Imm) {
bool IsImm8 = int8_t(Imm) == Imm;
bool IsImm16 = int16_t(Imm & ~0xff) == Imm;
- if (std::is_same<int8_t, std::make_signed_t<T>>::value)
+ if (std::is_same<int8_t, std::make_signed_t<T>>::value ||
+ std::is_same<int8_t, T>::value)
return IsImm8 || uint8_t(Imm) == Imm;
if (std::is_same<int16_t, std::make_signed_t<T>>::value)
@@ -775,7 +776,8 @@ static inline bool isSVECpyImm(int64_t Imm) {
/// Returns true if Imm is valid for ADD/SUB.
template <typename T>
static inline bool isSVEAddSubImm(int64_t Imm) {
- bool IsInt8t = std::is_same<int8_t, std::make_signed_t<T>>::value;
+ bool IsInt8t = std::is_same<int8_t, std::make_signed_t<T>>::value ||
+ std::is_same<int8_t, T>::value;
return uint8_t(Imm) == Imm || (!IsInt8t && uint16_t(Imm & ~0xff) == Imm);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 9f7dfdf62482..75a9f2f5c80e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -88,8 +88,6 @@ public:
uint64_t Value, bool IsResolved,
const MCSubtargetInfo *STI) const override;
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
@@ -156,19 +154,6 @@ static unsigned AdrImmBits(unsigned Value) {
return (hi19 << 5) | (lo2 << 29);
}
-static bool valueFitsIntoFixupKind(unsigned Kind, uint64_t Value) {
- unsigned NumBits;
- switch(Kind) {
- case FK_Data_1: NumBits = 8; break;
- case FK_Data_2: NumBits = 16; break;
- case FK_Data_4: NumBits = 32; break;
- case FK_Data_8: NumBits = 64; break;
- default: return true;
- }
- return isUIntN(NumBits, Value) ||
- isIntN(NumBits, static_cast<int64_t>(Value));
-}
-
static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
uint64_t Value, MCContext &Ctx,
const Triple &TheTriple, bool IsResolved) {
@@ -343,9 +328,6 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
- if (!valueFitsIntoFixupKind(Fixup.getTargetKind(), Value))
- Ctx.reportError(Fixup.getLoc(), "fixup value too large for data type!");
- LLVM_FALLTHROUGH;
case FK_SecRel_2:
case FK_SecRel_4:
return Value;
@@ -463,11 +445,6 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
}
}
-bool AArch64AsmBackend::mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const {
- return false;
-}
-
bool AArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCRelaxableFragment *DF,
@@ -781,7 +758,7 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
assert(TheTriple.isOSBinFormatELF() && "Invalid target");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
- bool IsILP32 = Options.getABIName() == "ilp32";
+ bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
return new ELFAArch64AsmBackend(T, TheTriple, OSABI, /*IsLittleEndian=*/true,
IsILP32);
}
@@ -794,7 +771,7 @@ MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
assert(TheTriple.isOSBinFormatELF() &&
"Big endian is only supported for ELF targets!");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
- bool IsILP32 = Options.getABIName() == "ilp32";
+ bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
return new ELFAArch64AsmBackend(T, TheTriple, OSABI, /*IsLittleEndian=*/false,
IsILP32);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index e5637dcab941..fcf67bd2f740 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -43,7 +43,7 @@ protected:
} // end anonymous namespace
AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32)
- : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
+ : MCELFObjectTargetWriter(/*Is64Bit*/ !IsILP32, OSABI, ELF::EM_AARCH64,
/*HasRelocationAddend*/ true),
IsILP32(IsILP32) {}
@@ -322,7 +322,11 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
return R_CLS(LDST64_ABS_LO12_NC);
if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) {
+ AArch64MCExpr::VariantKind AddressLoc =
+ AArch64MCExpr::getAddressFrag(RefKind);
if (!IsILP32) {
+ if (AddressLoc == AArch64MCExpr::VK_LO15)
+ return ELF::R_AARCH64_LD64_GOTPAGE_LO15;
return ELF::R_AARCH64_LD64_GOT_LO12_NC;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 6dfda8217628..ec97e1c8b76a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -51,6 +51,61 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer {
OS << "\t.variant_pcs " << Symbol->getName() << "\n";
}
+ void EmitARM64WinCFIAllocStack(unsigned Size) override {
+ OS << "\t.seh_stackalloc " << Size << "\n";
+ }
+ void EmitARM64WinCFISaveR19R20X(int Offset) override {
+ OS << "\t.seh_save_r19r20_x " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFPLR(int Offset) override {
+ OS << "\t.seh_save_fplr " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFPLRX(int Offset) override {
+ OS << "\t.seh_save_fplr_x " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_reg x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_reg_x x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_regp x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_regp_x x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_lrpair x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_freg d" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_freg_x d" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_fregp d" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_fregp_x d" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISetFP() override { OS << "\t.seh_set_fp\n"; }
+ void EmitARM64WinCFIAddFP(unsigned Size) override {
+ OS << "\t.seh_add_fp " << Size << "\n";
+ }
+ void EmitARM64WinCFINop() override { OS << "\t.seh_nop\n"; }
+ void EmitARM64WinCFISaveNext() override { OS << "\t.seh_save_next\n"; }
+ void EmitARM64WinCFIPrologEnd() override { OS << "\t.seh_endprologue\n"; }
+ void EmitARM64WinCFIEpilogStart() override { OS << "\t.seh_startepilogue\n"; }
+ void EmitARM64WinCFIEpilogEnd() override { OS << "\t.seh_endepilogue\n"; }
+ void EmitARM64WinCFITrapFrame() override { OS << "\t.seh_trap_frame\n"; }
+ void EmitARM64WinCFIMachineFrame() override { OS << "\t.seh_pushframe\n"; }
+ void EmitARM64WinCFIContext() override { OS << "\t.seh_context\n"; }
+ void EmitARM64WinCFIClearUnwoundToCall() override {
+ OS << "\t.seh_clear_unwound_to_call\n";
+ }
+
public:
AArch64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 38474d31460d..340120d2b9e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -849,7 +849,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
}
break;
}
- } else if (CnVal == 8) {
+ } else if (CnVal == 8 || CnVal == 9) {
// TLBI aliases
const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding);
if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits()))
@@ -1377,7 +1377,8 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
}
}
-void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address,
+ unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
@@ -1385,7 +1386,11 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
// If the label has already been resolved to an immediate offset (say, when
// we're running the disassembler), just print the immediate.
if (Op.isImm()) {
- O << "#" << formatImm(Op.getImm() * (1 << 12));
+ const int64_t Offset = Op.getImm() * 4096;
+ if (PrintBranchImmAsAddress)
+ O << formatHex((Address & -4096) + Offset);
+ else
+ O << "#" << Offset;
return;
}
@@ -1416,6 +1421,22 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
O << "#" << Val;
}
+void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Val = MI->getOperand(OpNo).getImm();
+ assert(MI->getOpcode() == AArch64::DSBnXS);
+
+ StringRef Name;
+ auto DB = AArch64DBnXS::lookupDBnXSByEncoding(Val);
+ Name = DB ? DB->Name : "";
+
+ if (!Name.empty())
+ O << Name;
+ else
+ O << "#" << Val;
+}
+
void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1623,3 +1644,10 @@ void AArch64InstPrinter::printGPR64as32(const MCInst *MI, unsigned OpNum,
unsigned Reg = MI->getOperand(OpNum).getReg();
O << getRegisterName(getWRegFromXReg(Reg));
}
+
+void AArch64InstPrinter::printGPR64x8(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ O << getRegisterName(MRI.getSubReg(Reg, AArch64::x8sub_0));
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 6da5f0e81c80..4be885e667d8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -30,6 +30,7 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
virtual void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
virtual bool printAliasInstr(const MCInst *MI, uint64_t Address,
@@ -155,10 +156,12 @@ protected:
void printVectorIndex(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printAdrpLabel(const MCInst *MI, unsigned OpNum,
+ void printAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printBarrierOption(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBarriernXSOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
@@ -187,6 +190,8 @@ protected:
const MCSubtargetInfo &STI, raw_ostream &O);
void printGPR64as32(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printGPR64x8(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
template <int Width>
void printZPRasFPR(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -203,6 +208,7 @@ public:
void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
const MCSubtargetInfo &STI, raw_ostream &O) override;
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O) override;
bool printAliasInstr(const MCInst *MI, uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 9a63e26dec19..68c721cb0d72 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -73,7 +73,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
// targeting ELF.
AssemblerDialect = AsmWriterVariant == Default ? Generic : AsmWriterVariant;
- CodePointerSize = 8;
+ CodePointerSize = T.getEnvironment() == Triple::GNUILP32 ? 4 : 8;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
@@ -111,7 +111,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
SupportsDebugInformation = true;
CodePointerSize = 8;
- CommentString = ";";
+ CommentString = "//";
ExceptionsType = ExceptionHandling::WinEH;
WinEHEncodingType = WinEH::EncodingType::Itanium;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 548e399e05a3..844bd6bbada9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -70,6 +70,7 @@ StringRef AArch64MCExpr::getVariantKindName() const {
case VK_ABS_PAGE_NC: return ":pg_hi21_nc:";
case VK_GOT: return ":got:";
case VK_GOT_PAGE: return ":got:";
+ case VK_GOT_PAGE_LO15: return ":gotpage_lo15:";
case VK_GOT_LO12: return ":got_lo12:";
case VK_GOTTPREL: return ":gottprel:";
case VK_GOTTPREL_PAGE: return ":gottprel:";
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index a82ff2e91426..d3e834a140b2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -46,6 +46,7 @@ public:
VK_G1 = 0x050,
VK_G2 = 0x060,
VK_G3 = 0x070,
+ VK_LO15 = 0x080,
VK_AddressFragBits = 0x0f0,
// Whether the final relocation is a checked one (where a linker should
@@ -82,6 +83,7 @@ public:
VK_PREL_G0_NC = VK_PREL | VK_G0 | VK_NC,
VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC,
VK_GOT_PAGE = VK_GOT | VK_PAGE,
+ VK_GOT_PAGE_LO15 = VK_GOT | VK_LO15 | VK_NC,
VK_DTPREL_G2 = VK_DTPREL | VK_G2,
VK_DTPREL_G1 = VK_DTPREL | VK_G1,
VK_DTPREL_G1_NC = VK_DTPREL | VK_G1 | VK_NC,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 209bff3a2311..3c2df1621e11 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -50,10 +50,14 @@ static MCInstrInfo *createAArch64MCInstrInfo() {
static MCSubtargetInfo *
createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- if (CPU.empty())
+ if (CPU.empty()) {
CPU = "generic";
- return createAArch64MCSubtargetInfoImpl(TT, CPU, FS);
+ if (TT.isArm64e())
+ CPU = "apple-a12";
+ }
+
+ return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index b0f414bd27ed..012661edbbfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -373,7 +373,11 @@ void AArch64MachObjectWriter::recordRelocation(
Type == MachO::ARM64_RELOC_PAGE21 ||
Type == MachO::ARM64_RELOC_PAGEOFF12) &&
Value) {
- assert((Value & 0xff000000) == 0 && "Added relocation out of range!");
+ if (!isInt<24>(Value)) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "addend too big for relocation");
+ return;
+ }
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 48ed68f49263..f32a8f15b8a5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -11,12 +11,23 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetStreamer.h"
+#include "AArch64MCAsmInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool> MarkBTIProperty(
+ "aarch64-mark-bti-property", cl::Hidden,
+ cl::desc("Add .note.gnu.property with BTI to assembly files"),
+ cl::init(false));
+
//
// AArch64TargetStreamer Implemenation
//
@@ -37,8 +48,50 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
ConstantPools->emitForCurrentSection(Streamer);
}
-// finish() - write out any non-empty assembler constant pools.
-void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
+// finish() - write out any non-empty assembler constant pools and
+// write out note.gnu.properties if need.
+void AArch64TargetStreamer::finish() {
+ ConstantPools->emitAll(Streamer);
+
+ if (MarkBTIProperty)
+ emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
+}
+
+void AArch64TargetStreamer::emitNoteSection(unsigned Flags) {
+ if (Flags == 0)
+ return;
+
+ MCStreamer &OutStreamer = getStreamer();
+ MCContext &Context = OutStreamer.getContext();
+ // Emit a .note.gnu.property section with the flags.
+ MCSectionELF *Nt = Context.getELFSection(".note.gnu.property", ELF::SHT_NOTE,
+ ELF::SHF_ALLOC);
+ if (Nt->isRegistered()) {
+ SMLoc Loc;
+ Context.reportWarning(
+ Loc,
+ "The .note.gnu.property is not emitted because it is already present.");
+ return;
+ }
+ MCSection *Cur = OutStreamer.getCurrentSectionOnly();
+ OutStreamer.SwitchSection(Nt);
+
+ // Emit the note header.
+ OutStreamer.emitValueToAlignment(Align(8).value());
+ OutStreamer.emitIntValue(4, 4); // data size for "GNU\0"
+ OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size
+ OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4);
+ OutStreamer.emitBytes(StringRef("GNU", 4)); // note name
+
+ // Emit the PAC/BTI properties.
+ OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4);
+ OutStreamer.emitIntValue(4, 4); // data size
+ OutStreamer.emitIntValue(Flags, 4); // data
+ OutStreamer.emitIntValue(0, 4); // pad
+
+ OutStreamer.endSection(Nt);
+ OutStreamer.SwitchSection(Cur);
+}
void AArch64TargetStreamer::emitInst(uint32_t Inst) {
char Buffer[4];
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 1af978a806d1..73dc1e5d4d2a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -33,6 +33,9 @@ public:
/// Emit contents of constant pool for the current section.
void emitCurrentConstantPool();
+ /// Callback used to implement the .note.gnu.property section.
+ void emitNoteSection(unsigned Flags);
+
/// Callback used to implement the .inst directive.
virtual void emitInst(uint32_t Inst);
@@ -40,12 +43,14 @@ public:
virtual void emitDirectiveVariantPCS(MCSymbol *Symbol) {};
virtual void EmitARM64WinCFIAllocStack(unsigned Size) {}
+ virtual void EmitARM64WinCFISaveR19R20X(int Offset) {}
virtual void EmitARM64WinCFISaveFPLR(int Offset) {}
virtual void EmitARM64WinCFISaveFPLRX(int Offset) {}
virtual void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) {}
@@ -53,9 +58,14 @@ public:
virtual void EmitARM64WinCFISetFP() {}
virtual void EmitARM64WinCFIAddFP(unsigned Size) {}
virtual void EmitARM64WinCFINop() {}
+ virtual void EmitARM64WinCFISaveNext() {}
virtual void EmitARM64WinCFIPrologEnd() {}
virtual void EmitARM64WinCFIEpilogStart() {}
virtual void EmitARM64WinCFIEpilogEnd() {}
+ virtual void EmitARM64WinCFITrapFrame() {}
+ virtual void EmitARM64WinCFIMachineFrame() {}
+ virtual void EmitARM64WinCFIContext() {}
+ virtual void EmitARM64WinCFIClearUnwoundToCall() {}
private:
std::unique_ptr<AssemblerConstantPools> ConstantPools;
@@ -86,12 +96,14 @@ public:
// The unwind codes on ARM64 Windows are documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
void EmitARM64WinCFIAllocStack(unsigned Size) override;
+ void EmitARM64WinCFISaveR19R20X(int Offset) override;
void EmitARM64WinCFISaveFPLR(int Offset) override;
void EmitARM64WinCFISaveFPLRX(int Offset) override;
void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override;
@@ -99,9 +111,15 @@ public:
void EmitARM64WinCFISetFP() override;
void EmitARM64WinCFIAddFP(unsigned Size) override;
void EmitARM64WinCFINop() override;
+ void EmitARM64WinCFISaveNext() override;
void EmitARM64WinCFIPrologEnd() override;
void EmitARM64WinCFIEpilogStart() override;
void EmitARM64WinCFIEpilogEnd() override;
+ void EmitARM64WinCFITrapFrame() override;
+ void EmitARM64WinCFIMachineFrame() override;
+ void EmitARM64WinCFIContext() override;
+ void EmitARM64WinCFIClearUnwoundToCall() override;
+
private:
void EmitARM64WinUnwindCode(unsigned UnwindCode, int Reg, int Offset);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 03fbab5142a2..1c50706a26f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -28,6 +28,7 @@ public:
void EmitWinEHHandlerData(SMLoc Loc) override;
void EmitWindowsUnwindTables() override;
+ void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
void finishImpl() override;
};
@@ -36,7 +37,12 @@ void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
// We have to emit the unwind info now, because this directive
// actually switches to the .xdata section!
- EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo());
+ EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo(),
+ /* HandlerData = */ true);
+}
+
+void AArch64WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
+ EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false);
}
void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() {
@@ -85,6 +91,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAllocStack(unsigned Size) {
EmitARM64WinUnwindCode(Op, -1, Size);
}
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveR19R20X(int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveR19R20X, -1, Offset);
+}
+
void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLR(int Offset) {
EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLR, -1, Offset);
}
@@ -115,6 +125,11 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegPX(unsigned Reg,
EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegPX, Reg, Offset);
}
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveLRPair(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveLRPair, Reg, Offset);
+}
+
void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFReg(unsigned Reg,
int Offset) {
assert(Offset >= 0 && Offset <= 504 &&
@@ -150,6 +165,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFINop() {
EmitARM64WinUnwindCode(Win64EH::UOP_Nop, -1, 0);
}
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveNext() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveNext, -1, 0);
+}
+
// The functions below handle opcodes that can end up in either a prolog or
// an epilog, but not both.
void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIPrologEnd() {
@@ -188,6 +207,22 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() {
CurrentEpilog = nullptr;
}
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFITrapFrame() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_TrapFrame, -1, 0);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIMachineFrame() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_PushMachFrame, -1, 0);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIContext() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_Context, -1, 0);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIClearUnwoundToCall() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_ClearUnwoundToCall, -1, 0);
+}
+
MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
index e86f2a6ebde4..4eecf72862a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -206,10 +206,20 @@ def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>",
def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
-def SVEArithUImmPat : ComplexPattern<i32, 1, "SelectSVEArithImm", []>;
+def SVEArithUImm8Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
+def SVEArithUImm16Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i16>", []>;
+def SVEArithUImm32Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i32>", []>;
+def SVEArithUImm64Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i64>", []>;
def SVEArithSImmPat : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;
-def SVEShiftImm64 : ComplexPattern<i32, 1, "SelectSVEShiftImm64<0, 64>", []>;
+def SVEShiftImmL8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 7>", []>;
+def SVEShiftImmL16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 15>", []>;
+def SVEShiftImmL32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 31>", []>;
+def SVEShiftImmL64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 63>", []>;
+def SVEShiftImmR8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 8, true>", []>;
+def SVEShiftImmR16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 16, true>", []>;
+def SVEShiftImmR32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 32, true>", []>;
+def SVEShiftImmR64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 64, true>", []>;
class SVEExactFPImm<string Suffix, string ValA, string ValB> : AsmOperandClass {
let Name = "SVEExactFPImmOperand" # Suffix;
@@ -270,6 +280,8 @@ class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{3-0} = Pd;
let Defs = !if(!eq (opc{0}, 1), [NZCV], []);
+ let ElementSize = pprty.ElementSize;
+ let isReMaterializable = 1;
}
multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
@@ -305,6 +317,18 @@ class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1)),
(inst $Op1)>;
+class SVE_1_Op_Passthru_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst>
+: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)),
+ (inst $Op3, $Op1, $Op2)>;
+
+// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
+// type of rounding. This is matched by timm0_1 in pattern below and ignored.
+class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst>
+: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)),
+ (inst $Op3, $Op1, $Op2)>;
+
class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
ValueType it, ComplexPattern cpx, Instruction inst>
: Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))),
@@ -315,16 +339,6 @@ class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty
: Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))),
(inst $Op1, i32:$imm, i32:$shift)>;
-class SVE_1_Op_Imm_Arith_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
- ValueType it, ComplexPattern cpx, Instruction inst>
- : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))),
- (inst $Op1, i32:$imm)>;
-
-class SVE_1_Op_Imm_Shift_Pred_Pat<ValueType vt, ValueType pt, SDPatternOperator op,
- ZPRRegOp zprty, Operand ImmTy, Instruction inst>
- : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (ImmTy:$imm))))),
- (inst $Op1, ImmTy:$imm)>;
-
class SVE_1_Op_Imm_Arith_Pred_Pat<ValueType vt, ValueType pt, SDPatternOperator op,
ZPRRegOp zprty, ValueType it, ComplexPattern cpx, Instruction inst>
: Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))),
@@ -340,10 +354,11 @@ class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
(inst $Op1, $Op2)>;
-class SVE_2_Op_Pat_Reduce_To_Neon<ValueType vtd, SDPatternOperator op, ValueType vt1,
- ValueType vt2, Instruction inst, SubRegIndex sub>
-: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
- (INSERT_SUBREG (vtd (IMPLICIT_DEF)), (inst $Op1, $Op2), sub)>;
+class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
+ ValueType pt, ValueType vt1, ValueType vt2,
+ Instruction inst>
+: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)),
+ (inst $Op1, $Op2)>;
class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst>
@@ -403,6 +418,23 @@ class SVE_2_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
(inst (ptrue 31), $Op1, $Op2)>;
+class SVE_InReg_Extend<ValueType vt, SDPatternOperator op, ValueType pt,
+ ValueType inreg_vt, Instruction inst>
+: Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)),
+ (inst $PassThru, $Pg, $Src)>;
+
+class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
+ ValueType pt, ValueType it,
+ ComplexPattern cast, Instruction inst>
+: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
+ (inst $Pg, $Rn, i32:$imm)>;
+
+class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op,
+ ValueType pt, ValueType it,
+ ComplexPattern cast, Instruction inst>
+: Pat<(vt (op (pt (AArch64ptrue 31)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
+ (inst $Rn, i32:$imm)>;
+
//
// Pseudo -> Instruction mappings
//
@@ -479,6 +511,8 @@ class sve_int_pfalse<bits<6> opc, string asm>
let Inst{9} = opc{0};
let Inst{8-4} = 0b00000;
let Inst{3-0} = Pd;
+
+ let isReMaterializable = 1;
}
class sve_int_ptest<bits<6> opc, string asm>
@@ -499,6 +533,7 @@ class sve_int_ptest<bits<6> opc, string asm>
let Inst{4-0} = 0b00000;
let Defs = [NZCV];
+ let isCompare = 1;
}
class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
@@ -979,8 +1014,8 @@ multiclass sve_int_perm_dup_i<string asm> {
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
}
-class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm,
- ZPRRegOp zprty, RegisterOperand VecList>
+class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,
+ RegisterOperand VecList>
: I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm),
asm, "\t$Zd, $Zn, $Zm",
"",
@@ -1022,6 +1057,8 @@ multiclass sve_int_perm_tbl<string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
}
multiclass sve2_int_perm_tbl<string asm, SDPatternOperator op> {
@@ -1064,6 +1101,11 @@ multiclass sve2_int_perm_tbl<string asm, SDPatternOperator op> {
(nxv2f64 (!cast<Instruction>(NAME # _D) (REG_SEQUENCE ZPR2, nxv2f64:$Op1, zsub0,
nxv2f64:$Op2, zsub1),
nxv2i64:$Op3))>;
+
+ def : Pat<(nxv8bf16 (op nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)),
+ (nxv8bf16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0,
+ nxv8bf16:$Op2, zsub1),
+ nxv8i16:$Op3))>;
}
class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -1099,6 +1141,8 @@ multiclass sve2_int_perm_tbx<string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -1129,6 +1173,8 @@ multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
def : SVE_1_Op_Pat<nxv8f16, op, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Pat<nxv4f32, op, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv2f64, op, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_1_Op_Pat<nxv8bf16, op, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
@@ -1241,6 +1287,8 @@ multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, f16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, bf16, !cast<Instruction>(NAME # _H)>;
}
//===----------------------------------------------------------------------===//
@@ -1327,6 +1375,8 @@ multiclass sve_int_sel_vvv<string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+
def : InstAlias<"mov $Zd, $Pg/m, $Zn",
(!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>;
def : InstAlias<"mov $Zd, $Pg/m, $Zn",
@@ -1389,7 +1439,6 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
!cast<Instruction>(NAME), PTRUE_D>;
}
-
//===----------------------------------------------------------------------===//
// SVE Logical Mask Immediate Group
//===----------------------------------------------------------------------===//
@@ -1642,7 +1691,6 @@ multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
(!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>;
}
-
//===----------------------------------------------------------------------===//
// SVE Floating Point Arithmetic - Unpredicated Group
//===----------------------------------------------------------------------===//
@@ -1665,7 +1713,8 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
+ SDPatternOperator predicated_op = null_frag> {
def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
@@ -1674,6 +1723,9 @@ multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pred_All_Active<nxv8f16, predicated_op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pred_All_Active<nxv4f32, predicated_op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
@@ -2065,7 +2117,8 @@ class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>;
def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>;
def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>;
@@ -2217,7 +2270,11 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4f16, op, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f16, op, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv2f32, op, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
//===----------------------------------------------------------------------===//
@@ -2225,7 +2282,7 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
//===----------------------------------------------------------------------===//
class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
- RegisterOperand o_zprtype, ElementSizeEnum size>
+ RegisterOperand o_zprtype, ElementSizeEnum Sz>
: I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn),
asm, "\t$Zd, $Pg/m, $Zn",
"",
@@ -2244,17 +2301,51 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
- let ElementSize = size;
+ let ElementSize = Sz;
}
multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
RegisterOperand i_zprtype,
RegisterOperand o_zprtype,
- SDPatternOperator op, ValueType vt1,
+ SDPatternOperator int_op,
+ SDPatternOperator ir_op, ValueType vt1,
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
- def : SVE_3_Op_Pat<vt1, op, vt1, vt2, vt3, !cast<Instruction>(NAME)>;
+ // convert vt1 to a packed type for the intrinsic patterns
+ defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
+ !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
+ !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
+ 1 : vt1);
+
+ // convert vt3 to a packed type for the intrinsic patterns
+ defvar packedvt3 = !cond(!eq(!cast<string>(vt3), "nxv2f16"): nxv8f16,
+ !eq(!cast<string>(vt3), "nxv4f16"): nxv8f16,
+ !eq(!cast<string>(vt3), "nxv2f32"): nxv4f32,
+ 1 : vt3);
+
+ def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
+
+ def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
+}
+
+multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
+ RegisterOperand i_zprtype,
+ RegisterOperand o_zprtype,
+ SDPatternOperator int_op,
+ SDPatternOperator ir_op, ValueType vt1,
+ ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
+ def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
+
+ // convert vt1 to a packed type for the intrinsic patterns
+ defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
+ !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
+ !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
+ 1 : vt1);
+
+ def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
+
+ def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
}
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
@@ -2262,9 +2353,12 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
- def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
@@ -2372,11 +2466,19 @@ multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm, string Ps,
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>;
- def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>;
- def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>;
- def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>;
+multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, string Ps,
+ SDPatternOperator op,
+ DestructiveInstTypeEnum flags> {
+ let DestructiveInstType = flags in {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>,
+ SVEPseudo2Instr<Ps # _B, 1>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>;
+ }
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -2384,11 +2486,19 @@ multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, SDPatternOperator op
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>;
- def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>;
- def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>;
- def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>;
+multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm, string Ps,
+ SDPatternOperator op,
+ DestructiveInstTypeEnum flags> {
+ let DestructiveInstType = flags in {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>,
+ SVEPseudo2Instr<Ps # _B, 1>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>;
+ }
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -2478,7 +2588,8 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
+ SDPatternOperator outerop, SDPatternOperator mulop> {
def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>;
def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>;
def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>;
@@ -2488,6 +2599,15 @@ multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op>
def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_4_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : Pat<(outerop nxv16i8:$Op1, (mulop nxv16i1:$pred, nxv16i8:$Op2, nxv16i8:$Op3)),
+ (!cast<Instruction>(NAME # _B) $pred, $Op1, $Op2, $Op3)>;
+ def : Pat<(outerop nxv8i16:$Op1, (mulop nxv8i1:$pred, nxv8i16:$Op2, nxv8i16:$Op3)),
+ (!cast<Instruction>(NAME # _H) $pred, $Op1, $Op2, $Op3)>;
+ def : Pat<(outerop nxv4i32:$Op1, (mulop nxv4i1:$pred, nxv4i32:$Op2, nxv4i32:$Op3)),
+ (!cast<Instruction>(NAME # _S) $pred, $Op1, $Op2, $Op3)>;
+ def : Pat<(outerop nxv2i64:$Op1, (mulop nxv2i1:$pred, nxv2i64:$Op2, nxv2i64:$Op3)),
+ (!cast<Instruction>(NAME # _D) $pred, $Op1, $Op2, $Op3)>;
}
//===----------------------------------------------------------------------===//
@@ -2591,7 +2711,8 @@ multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm,
// SVE2 Integer Multiply-Add Long - Indexed Group
//===----------------------------------------------------------------------===//
-multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm, SDPatternOperator op> {
+multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm,
+ SDPatternOperator op> {
def _S : sve2_int_mla_by_indexed_elem<0b10, { opc{3}, 0b0, opc{2-1}, ?, opc{0} },
asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
@@ -2841,7 +2962,8 @@ class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
+ SDPatternOperator op_pred = null_frag> {
def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
def _H : sve2_int_mul<0b01, opc, asm, ZPR16>;
def _S : sve2_int_mul<0b10, opc, asm, ZPR32>;
@@ -2851,6 +2973,11 @@ multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pred_All_Active<nxv16i8, op_pred, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pred_All_Active<nxv8i16, op_pred, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pred_All_Active<nxv4i32, op_pred, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pred_All_Active<nxv2i64, op_pred, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_mul_single<bits<3> opc, string asm, SDPatternOperator op> {
@@ -3404,7 +3531,8 @@ multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm,
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm, SDPatternOperator op> {
+multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
ZPR32, ZPR32>;
def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
@@ -3448,7 +3576,7 @@ multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
- vecshiftR32> {
+ tvecshiftR32> {
let Inst{20-19} = imm{4-3};
}
def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
@@ -3488,7 +3616,7 @@ multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
- vecshiftR32> {
+ tvecshiftR32> {
let Inst{20-19} = imm{4-3};
}
def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
@@ -3649,10 +3777,10 @@ multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
@@ -3661,9 +3789,9 @@ multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_InReg_Extend<nxv8i16, op, nxv8i1, nxv8i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
+ def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
@@ -3671,15 +3799,15 @@ multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
SDPatternOperator op> {
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
@@ -3689,25 +3817,23 @@ multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm,
- SDPatternOperator op> {
+multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -3876,10 +4002,10 @@ multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperato
def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>;
def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImmPat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImm8Pat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImm16Pat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImm32Pat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImm64Pat, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
@@ -3888,10 +4014,10 @@ multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>;
def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv16i8, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv8i16, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv4i32, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv2i64, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4004,6 +4130,7 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+
def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
@@ -4162,6 +4289,8 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
let Inst{3-0} = Pd;
let Defs = [NZCV];
+ let ElementSize = pprty.ElementSize;
+ let isPTestLike = 1;
}
multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
@@ -4234,6 +4363,7 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let isPTestLike = 1;
}
multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
@@ -4293,6 +4423,8 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
let Inst{3-0} = Pd;
let Defs = [NZCV];
+ let ElementSize = pprty.ElementSize;
+ let isPTestLike = 1;
}
multiclass sve_int_ucmp_vi<bits<2> opc, string asm, CondCode cc,
@@ -4337,8 +4469,7 @@ class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
}
class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
- RegisterClass gprty, PPRRegOp pprty,
- ValueType vt, SDPatternOperator op>
+ RegisterClass gprty, PPRRegOp pprty>
: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm),
asm, "\t$Pd, $Rn, $Rm",
"", []>, Sched<[]> {
@@ -4356,30 +4487,32 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
let Inst{3-0} = Pd;
let Defs = [NZCV];
+ let ElementSize = pprty.ElementSize;
+ let isWhile = 1;
}
multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8, nxv16i1, op>;
- def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16, nxv8i1, op>;
- def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32, nxv4i1, op>;
- def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64, nxv2i1, op>;
+ def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
def : SVE_2_Op_Pat<nxv16i1, op, i32, i32, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv8i1, op, i32, i32, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv4i1, op, i32, i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2i1, op, i32, i32, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv8i1, op, i32, i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, op, i32, i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, op, i32, i32, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8, nxv16i1, op>;
- def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16, nxv8i1, op>;
- def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32, nxv4i1, op>;
- def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64, nxv2i1, op>;
+ def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
def : SVE_2_Op_Pat<nxv16i1, op, i64, i64, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv8i1, op, i64, i64, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv4i1, op, i64, i64, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2i1, op, i64, i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv8i1, op, i64, i64, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, op, i64, i64, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, op, i64, i64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
@@ -4400,6 +4533,8 @@ class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
let Inst{3-0} = Pd;
let Defs = [NZCV];
+ let ElementSize = pprty.ElementSize;
+ let isWhile = 1;
}
multiclass sve2_int_while_rr<bits<1> rw, string asm, string op> {
@@ -4412,7 +4547,6 @@ multiclass sve2_int_while_rr<bits<1> rw, string asm, string op> {
def : SVE_2_Op_Pat<nxv8i1, !cast<SDPatternOperator>(op # _h), i64, i64, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4i1, !cast<SDPatternOperator>(op # _s), i64, i64, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2i1, !cast<SDPatternOperator>(op # _d), i64, i64, !cast<Instruction>(NAME # _D)>;
-
}
//===----------------------------------------------------------------------===//
@@ -4443,12 +4577,14 @@ multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32asZPR>;
def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64asZPR>;
+ def : SVE_2_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
-
//===----------------------------------------------------------------------===//
// SVE Floating Point Accumulating Reduction Group
//===----------------------------------------------------------------------===//
@@ -4480,7 +4616,10 @@ multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32asZPR>;
def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64asZPR>;
+ def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
@@ -4701,10 +4840,11 @@ multiclass sve_int_index_rr<string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv4i32, op, i32, i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2i64, op, i64, i64, !cast<Instruction>(NAME # _D)>;
}
-//
+
//===----------------------------------------------------------------------===//
// SVE Bitwise Shift - Predicated Group
//===----------------------------------------------------------------------===//
+
class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
@@ -4729,38 +4869,19 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string psName=""> {
- def _B : SVEPseudo2Instr<psName # _B, 1>,
+multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
+ SDPatternOperator op = null_frag> {
+ def _B : SVEPseudo2Instr<Ps # _B, 1>,
sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
- def _H : SVEPseudo2Instr<psName # _H, 1>,
- sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
- let Inst{8} = imm{3};
- }
- def _S : SVEPseudo2Instr<psName # _S, 1>,
- sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
- let Inst{9-8} = imm{4-3};
- }
- def _D : SVEPseudo2Instr<psName # _D, 1>,
- sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
- let Inst{22} = imm{5};
- let Inst{9-8} = imm{4-3};
- }
-}
-
-multiclass sve2_int_bin_pred_shift_imm_left<bits<4> opc, string asm,
- string psName,
- SDPatternOperator op> {
-
- def _B : SVEPseudo2Instr<psName # _B, 1>, sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
- def _H : SVEPseudo2Instr<psName # _H, 1>,
+ def _H : SVEPseudo2Instr<Ps # _H, 1>,
sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{8} = imm{3};
}
- def _S : SVEPseudo2Instr<psName # _S, 1>,
+ def _S : SVEPseudo2Instr<Ps # _S, 1>,
sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
let Inst{9-8} = imm{4-3};
}
- def _D : SVEPseudo2Instr<psName # _D, 1>,
+ def _D : SVEPseudo2Instr<Ps # _D, 1>,
sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
@@ -4772,6 +4893,16 @@ multiclass sve2_int_bin_pred_shift_imm_left<bits<4> opc, string asm,
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
}
+// As above but shift amount takes the form of a "vector immediate".
+multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm,
+ string Ps, SDPatternOperator op>
+: sve_int_bin_pred_shift_imm_left<opc, asm, Ps, null_frag> {
+ def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
+}
+
multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
@@ -4808,6 +4939,16 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
}
+// As above but shift amount takes the form of a "vector immediate".
+multiclass sve_int_bin_pred_shift_imm_right_dup<bits<4> opc, string asm,
+ string Ps, SDPatternOperator op>
+: sve_int_bin_pred_shift_imm_right<opc, asm, Ps, null_frag> {
+ def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
+}
+
multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = null_frag> {
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftR8, FalseLanesZero>;
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftR16, FalseLanesZero>;
@@ -4948,10 +5089,10 @@ multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, vecshiftL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, vecshiftL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEShiftImm64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
@@ -4968,11 +5109,12 @@ multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, vecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, vecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEShiftImm64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
}
+
//===----------------------------------------------------------------------===//
// SVE Memory - Store Group
//===----------------------------------------------------------------------===//
@@ -5481,8 +5623,7 @@ class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
PPRRegOp pprty>
: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm),
asm, "\t$Pd, $Pn, $Pm",
- "",
- []>, Sched<[]> {
+ "", []>, Sched<[]> {
bits<4> Pd;
bits<4> Pm;
bits<4> Pn;
@@ -5548,7 +5689,7 @@ class sve_int_rdffr_pred<bit s, string asm>
let Inst{4} = 0;
let Inst{3-0} = Pd;
- let Defs = !if(!eq (s, 1), [NZCV], []);
+ let Defs = !if(s, [NZCV], []);
let Uses = [FFR];
}
@@ -5675,9 +5816,11 @@ multiclass sve_int_perm_clast_vz<bit ab, string asm, SDPatternOperator op> {
def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>;
def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>;
- def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<bf16, op, nxv8i1, bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
@@ -5717,6 +5860,8 @@ multiclass sve_int_perm_clast_zz<bit ab, string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm,
@@ -5779,6 +5924,8 @@ multiclass sve_int_perm_last_v<bit ab, string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -5815,6 +5962,8 @@ multiclass sve_int_perm_splice<string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
@@ -5870,26 +6019,20 @@ multiclass sve_int_perm_rev_rbit<string asm, SDPatternOperator op> {
def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>;
def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_perm_rev_revb<string asm,
- SDPatternOperator int_op,
- SDPatternOperator ir_op> {
+multiclass sve_int_perm_rev_revb<string asm, SDPatternOperator op> {
def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>;
def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>;
def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv8i16, int_op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4i32, int_op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i64, int_op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_1_Op_AllActive_Pat<nxv8i16, ir_op, nxv8i16, !cast<Instruction>(NAME # _H), PTRUE_H>;
- def : SVE_1_Op_AllActive_Pat<nxv4i32, ir_op, nxv4i32, !cast<Instruction>(NAME # _S), PTRUE_S>;
- def : SVE_1_Op_AllActive_Pat<nxv2i64, ir_op, nxv2i64, !cast<Instruction>(NAME # _D), PTRUE_D>;
+ def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_perm_rev_revh<string asm, SDPatternOperator op> {
@@ -5988,7 +6131,6 @@ multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {
def : InstAlias<"mov $Zd, $Pg/m, $Vn",
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, FPR64:$Vn), 1>;
-
def : Pat<(nxv8f16 (op nxv8i1:$pg, f16:$splat, nxv8f16:$passthru)),
(!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
def : Pat<(nxv2f32 (op nxv2i1:$pg, f32:$splat, nxv2f32:$passthru)),
@@ -5997,6 +6139,9 @@ multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {
(!cast<Instruction>(NAME # _S) $passthru, $pg, $splat)>;
def : Pat<(nxv2f64 (op nxv2i1:$pg, f64:$splat, nxv2f64:$passthru)),
(!cast<Instruction>(NAME # _D) $passthru, $pg, $splat)>;
+
+ def : Pat<(nxv8bf16 (op nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)),
+ (!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
}
class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
@@ -6025,7 +6170,6 @@ multiclass sve_int_perm_compact<string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
-
//===----------------------------------------------------------------------===//
// SVE Memory - Contiguous Load Group
//===----------------------------------------------------------------------===//
@@ -6050,8 +6194,8 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
let Inst{4-0} = Zt;
let mayLoad = 1;
- let Uses = !if(!eq(nf, 1), [FFR], []);
- let Defs = !if(!eq(nf, 1), [FFR], []);
+ let Uses = !if(nf, [FFR], []);
+ let Defs = !if(nf, [FFR], []);
}
multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
@@ -6253,8 +6397,8 @@ class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm,
let Inst{4-0} = Zt;
let mayLoad = 1;
- let Uses = !if(!eq(ff, 1), [FFR], []);
- let Defs = !if(!eq(ff, 1), [FFR], []);
+ let Uses = !if(ff, [FFR], []);
+ let Defs = !if(ff, [FFR], []);
}
multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty,
@@ -6937,7 +7081,6 @@ multiclass sve_mem_64b_prfm_sv_lsl_scaled<bits<2> msz, string asm,
}
-
class sve_mem_64b_prfm_vi<bits<2> msz, string asm, Operand imm_ty>
: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5),
asm, "\t$prfop, $Pg, [$Zn, $imm5]",
@@ -7021,7 +7164,6 @@ multiclass sve_int_bin_cons_misc_0_a_64_lsl<bits<2> opc, string asm> {
def _3 : sve_int_bin_cons_misc_0_a<opc, 0b11, asm, ZPR64, ZPR64ExtLSL64>;
}
-
//===----------------------------------------------------------------------===//
// SVE Integer Misc - Unpredicated Group
//===----------------------------------------------------------------------===//
@@ -7085,8 +7227,8 @@ multiclass sve_int_bin_cons_misc_0_c_fexpa<string asm, SDPatternOperator op> {
//===----------------------------------------------------------------------===//
class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
- ZPRRegOp zprty, RegisterClass regtype>
-: I<(outs regtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
+ ZPRRegOp zprty, FPRasZPROperand dstOpType>
+: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
asm, "\t$Vd, $Pg, $Zn",
"",
[]>, Sched<[]> {
@@ -7104,51 +7246,54 @@ class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
let Inst{4-0} = Vd;
}
-multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>;
- def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>;
- def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>;
+multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm,
+ SDPatternOperator op> {
+ def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
+ def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
+ def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
- def : SVE_2_Op_Pat<i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
}
-multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm, SDPatternOperator op, SDPatternOperator opSaddv> {
- def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>;
- def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>;
- def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>;
- def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>;
+multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm,
+ SDPatternOperator op> {
+ def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
+ def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
+ def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
+ def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_2_Op_Pat<i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
- def : SVE_2_Op_Pat<i64, opSaddv, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_reduce_1<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>;
- def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>;
- def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>;
- def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>;
+multiclass sve_int_reduce_1<bits<3> opc, string asm,
+ SDPatternOperator op> {
+ def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8asZPR>;
+ def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16asZPR>;
+ def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32asZPR>;
+ def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_2_Op_Pat_Reduce_To_Neon<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B), bsub>;
- def : SVE_2_Op_Pat_Reduce_To_Neon<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H), hsub>;
- def : SVE_2_Op_Pat_Reduce_To_Neon<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S), ssub>;
- def : SVE_2_Op_Pat_Reduce_To_Neon<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D), dsub>;
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_reduce_2<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>;
- def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>;
- def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>;
- def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>;
+multiclass sve_int_reduce_2<bits<3> opc, string asm,
+ SDPatternOperator op> {
+ def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8asZPR>;
+ def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16asZPR>;
+ def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32asZPR>;
+ def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_2_Op_Pat_Reduce_To_Neon<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B), bsub>;
- def : SVE_2_Op_Pat_Reduce_To_Neon<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H), hsub>;
- def : SVE_2_Op_Pat_Reduce_To_Neon<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S), ssub>;
- def : SVE_2_Op_Pat_Reduce_To_Neon<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D), dsub>;
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
@@ -7253,7 +7398,7 @@ class sve_int_brkn<bit S, string asm>
let Inst{3-0} = Pdm;
let Constraints = "$Pdm = $_Pdm";
- let Defs = !if(!eq (S, 0b1), [NZCV], []);
+ let Defs = !if(S, [NZCV], []);
}
multiclass sve_int_brkn<bits<1> opc, string asm, SDPatternOperator op> {
@@ -7755,8 +7900,8 @@ multiclass sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand listty,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
- def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), (AddrCP GPR64sp:$base, gprty:$offset))),
- (!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, gprty:$offset)>;
+ def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), (AddrCP GPR64sp:$base, gprty:$offset))),
+ (!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, gprty:$offset)>;
}
//===----------------------------------------------------------------------===//
@@ -7790,6 +7935,7 @@ multiclass sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm, SDPatter
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME)>;
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
}
/// Addressing modes
@@ -7808,7 +7954,10 @@ multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
@@ -7833,3 +7982,19 @@ multiclass sve_int_bin_pred_sd<SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
+
+// Predicated pseudo integer two operand instructions. Second operand is an
+// immediate specified by imm_[bhsd].
+multiclass sve_int_shift_pred_bhsd<SDPatternOperator op,
+ ComplexPattern imm_b, ComplexPattern imm_h,
+ ComplexPattern imm_s, ComplexPattern imm_d> {
+ def _UNDEF_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, Operand<i32>, FalseLanesUndef>;
+ def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesUndef>;
+ def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesUndef>;
+ def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesUndef>;
+
+ def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Instruction>(NAME # _UNDEF_B)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, imm_h, !cast<Instruction>(NAME # _UNDEF_H)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Instruction>(NAME # _UNDEF_S)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 0245dd1d611a..9911f33371c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -37,7 +37,7 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "sve-intrinsic-opts"
+#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
namespace llvm {
void initializeSVEIntrinsicOptsPass(PassRegistry &);
@@ -177,22 +177,50 @@ bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
if (isa<PHINode>(I->getArgOperand(0)))
return processPhiNode(I);
- // If we have a reinterpret intrinsic I of type A which is converting from
- // another reinterpret Y of type B, and the source type of Y is A, then we can
- // elide away both reinterprets if there are no other users of Y.
- auto *Y = isReinterpretToSVBool(I->getArgOperand(0));
- if (!Y)
- return false;
+ SmallVector<Instruction *, 32> CandidatesForRemoval;
+ Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr;
+
+ const auto *IVTy = cast<VectorType>(I->getType());
+
+ // Walk the chain of conversions.
+ while (Cursor) {
+ // If the type of the cursor has fewer lanes than the final result, zeroing
+ // must take place, which breaks the equivalence chain.
+ const auto *CursorVTy = cast<VectorType>(Cursor->getType());
+ if (CursorVTy->getElementCount().getKnownMinValue() <
+ IVTy->getElementCount().getKnownMinValue())
+ break;
+
+ // If the cursor has the same type as I, it is a viable replacement.
+ if (Cursor->getType() == IVTy)
+ EarliestReplacement = Cursor;
+
+ auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
- Value *SourceVal = Y->getArgOperand(0);
- if (I->getType() != SourceVal->getType())
+ // If this is not an SVE conversion intrinsic, this is the end of the chain.
+ if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
+ Intrinsic::aarch64_sve_convert_to_svbool ||
+ IntrinsicCursor->getIntrinsicID() ==
+ Intrinsic::aarch64_sve_convert_from_svbool))
+ break;
+
+ CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
+ Cursor = IntrinsicCursor->getOperand(0);
+ }
+
+ // If no viable replacement in the conversion chain was found, there is
+ // nothing to do.
+ if (!EarliestReplacement)
return false;
- I->replaceAllUsesWith(SourceVal);
+ I->replaceAllUsesWith(EarliestReplacement);
I->eraseFromParent();
- if (Y->use_empty())
- Y->eraseFromParent();
+ while (!CandidatesForRemoval.empty()) {
+ Instruction *Candidate = CandidatesForRemoval.pop_back_val();
+ if (Candidate->use_empty())
+ Candidate->eraseFromParent();
+ }
return true;
}
@@ -248,10 +276,8 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) {
case Intrinsic::aarch64_sve_ptest_any:
case Intrinsic::aarch64_sve_ptest_first:
case Intrinsic::aarch64_sve_ptest_last:
- for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
- auto *Inst = dyn_cast<Instruction>(*I++);
- Functions.insert(Inst->getFunction());
- }
+ for (User *U : F.users())
+ Functions.insert(cast<Instruction>(U)->getFunction());
break;
default:
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index c27fc7a112ec..ac59d73fd9fd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -26,6 +26,13 @@ namespace llvm {
namespace llvm {
+ namespace AArch64DBnXS {
+#define GET_DBNXS_IMPL
+#include "AArch64GenSystemOperands.inc"
+ }
+}
+
+namespace llvm {
namespace AArch64DB {
#define GET_DB_IMPL
#include "AArch64GenSystemOperands.inc"
@@ -158,7 +165,7 @@ std::string AArch64SysReg::genericRegisterString(uint32_t Bits) {
namespace llvm {
namespace AArch64TLBI {
-#define GET_TLBI_IMPL
+#define GET_TLBITable_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 4e289fbe2325..1b13c94389cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -338,6 +338,14 @@ struct SysAliasReg : SysAlias {
: SysAlias(N, E, F), NeedsReg(R) {}
};
+struct SysAliasImm : SysAlias {
+ uint16_t ImmValue;
+ constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)
+ : SysAlias(N, E), ImmValue(I) {}
+ constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I, FeatureBitset F)
+ : SysAlias(N, E, F), ImmValue(I) {}
+};
+
namespace AArch64AT{
struct AT : SysAlias {
using SysAlias::SysAlias;
@@ -354,6 +362,14 @@ namespace AArch64DB {
#include "AArch64GenSystemOperands.inc"
}
+namespace AArch64DBnXS {
+ struct DBnXS : SysAliasImm {
+ using SysAliasImm::SysAliasImm;
+ };
+ #define GET_DBNXS_DECL
+ #include "AArch64GenSystemOperands.inc"
+}
+
namespace AArch64DC {
struct DC : SysAlias {
using SysAlias::SysAlias;
@@ -552,7 +568,7 @@ namespace AArch64TLBI {
struct TLBI : SysAliasReg {
using SysAliasReg::SysAliasReg;
};
- #define GET_TLBI_DECL
+ #define GET_TLBITable_DECL
#include "AArch64GenSystemOperands.inc"
}
@@ -606,7 +622,7 @@ namespace AArch64II {
MO_HI12 = 7,
/// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the ".refptrp.FOO" symbol. This is used for
+ /// reference is actually to the ".refptr.FOO" symbol. This is used for
/// stub symbols on windows.
MO_COFFSTUB = 0x8,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
index 88c79665be60..677c49331cd5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -10,8 +10,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
-#include "llvm/IR/IntrinsicsR600.h" // TODO: Sink this.
-#include "llvm/IR/IntrinsicsAMDGPU.h" // TODO: Sink this.
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/CodeGen.h"
namespace llvm {
@@ -52,7 +51,6 @@ FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSIPeepholeSDWAPass();
FunctionPass *createSILowerI1CopiesPass();
-FunctionPass *createSIFixupVectorISelPass();
FunctionPass *createSIAddIMGInitPass();
FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILoadStoreOptimizerPass();
@@ -69,12 +67,25 @@ FunctionPass *createSIPostRABundlerPass();
FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
FunctionPass *createAMDGPUUseNativeCallsPass();
FunctionPass *createAMDGPUCodeGenPreparePass();
+FunctionPass *createAMDGPULateCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
FunctionPass *createAMDGPURewriteOutArgumentsPass();
FunctionPass *createSIModeRegisterPass();
+struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
+ AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ TargetMachine &TM;
+};
+
+struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
@@ -106,12 +117,35 @@ ModulePass *createAMDGPULowerKernelAttributesPass();
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
extern char &AMDGPULowerKernelAttributesID;
+struct AMDGPULowerKernelAttributesPass
+ : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
extern char &AMDGPUPropagateAttributesEarlyID;
+struct AMDGPUPropagateAttributesEarlyPass
+ : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> {
+ AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ TargetMachine &TM;
+};
+
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
extern char &AMDGPUPropagateAttributesLateID;
+struct AMDGPUPropagateAttributesLatePass
+ : PassInfoMixin<AMDGPUPropagateAttributesLatePass> {
+ AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ TargetMachine &TM;
+};
+
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;
@@ -148,9 +182,6 @@ extern char &SIFixSGPRCopiesID;
void initializeSIFixVGPRCopiesPass(PassRegistry &);
extern char &SIFixVGPRCopiesID;
-void initializeSIFixupVectorISelPass(PassRegistry &);
-extern char &SIFixupVectorISelID;
-
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
@@ -202,11 +233,37 @@ FunctionPass *createAMDGPUPromoteAllocaToVector();
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
extern char &AMDGPUPromoteAllocaToVectorID;
+struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
+ AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ TargetMachine &TM;
+};
+
+struct AMDGPUPromoteAllocaToVectorPass
+ : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
+ AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ TargetMachine &TM;
+};
+
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(
TargetMachine *TM = nullptr,
CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
+
+struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
+ AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ bool GlobalOpt;
+};
+
ModulePass *createR600OpenCLImageTypeLoweringPass();
FunctionPass *createAMDGPUAnnotateUniformValues();
@@ -214,10 +271,19 @@ ModulePass *createAMDGPUPrintfRuntimeBinding();
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
extern char &AMDGPUPrintfRuntimeBindingID;
+struct AMDGPUPrintfRuntimeBindingPass
+ : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
ModulePass* createAMDGPUUnifyMetadataPass();
void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
extern char &AMDGPUUnifyMetadataID;
+struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
extern char &SIOptimizeExecMaskingPreRAID;
@@ -227,6 +293,9 @@ extern char &AMDGPUAnnotateUniformValuesPassID;
void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
extern char &AMDGPUCodeGenPrepareID;
+void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
+extern char &AMDGPULateCodeGenPrepareID;
+
void initializeSIAnnotateControlFlowPass(PassRegistry&);
extern char &SIAnnotateControlFlowPassID;
@@ -258,9 +327,6 @@ void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
-Pass *createAMDGPUFunctionInliningPass();
-void initializeAMDGPUInlinerPass(PassRegistry&);
-
ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
@@ -281,8 +347,6 @@ enum TargetIndex {
};
}
-} // End namespace llvm
-
/// OpenCL uses address spaces to differentiate between
/// various memory regions on the hardware. On the CPU
/// all of the address spaces point to the same memory,
@@ -339,4 +403,17 @@ namespace AMDGPUAS {
};
}
+namespace AMDGPU {
+
+// FIXME: Missing constant_32bit
+inline bool isFlatGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
+}
+}
+
+} // End namespace llvm
+
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
index e32f0fcc4771..c352c0097c5c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -90,7 +90,7 @@ def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
"UnalignedBufferAccess",
"true",
- "Support unaligned global loads and stores"
+ "Hardware supports unaligned global loads and stores"
>;
def FeatureTrapHandler: SubtargetFeature<"trap-handler",
@@ -105,6 +105,12 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"Support unaligned scratch loads and stores"
>;
+def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access",
+ "UnalignedDSAccess",
+ "true",
+ "Hardware supports unaligned local and region loads and stores"
+>;
+
def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
"HasApertureRegs",
"true",
@@ -123,10 +129,10 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
>;
-def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support",
- "DoesNotSupportXNACK",
+def FeatureSupportsXNACK : SubtargetFeature<"xnack-support",
+ "SupportsXNACK",
"true",
- "Hardware does not support XNACK"
+ "Hardware supports XNACK"
>;
// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
@@ -157,7 +163,7 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
"LDSMisalignedBug",
"true",
- "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
+ "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode"
>;
def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug",
@@ -220,6 +226,18 @@ def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug",
"Branch offset of 3f hardware bug"
>;
+def FeatureImageStoreD16Bug : SubtargetFeature<"image-store-d16-bug",
+ "HasImageStoreD16Bug",
+ "true",
+ "Image Store D16 hardware bug"
+>;
+
+def FeatureImageGather4D16Bug : SubtargetFeature<"image-gather4-d16-bug",
+ "HasImageGather4D16Bug",
+ "true",
+ "Image Gather4 D16 hardware bug"
+>;
+
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
@@ -473,16 +491,16 @@ def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
[FeatureFlatGlobalInsts]
>;
-def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
- "DoesNotSupportSRAMECC",
+def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support",
+ "SupportsSRAMECC",
"true",
- "Hardware does not support SRAM ECC"
+ "Hardware supports SRAMECC"
>;
-def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
+def FeatureSRAMECC : SubtargetFeature<"sramecc",
"EnableSRAMECC",
"true",
- "Enable SRAM ECC"
+ "Enable SRAMECC"
>;
def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
@@ -626,19 +644,21 @@ def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
"Hardware automatically inserts waitcnt before barrier"
>;
-def FeatureCodeObjectV3 : SubtargetFeature <
- "code-object-v3",
- "CodeObjectV3",
- "true",
- "Generate code object version 3"
->;
-
def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
"HasTrigReducedRange",
"true",
"Requires use of fract on arguments to trig instructions"
>;
+// Alignment enforcement is controlled by a configuration register:
+// SH_MEM_CONFIG.alignment_mode
+def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
+ "UnalignedAccessMode",
+ "true",
+ "Enable unaligned global, local and region loads and stores if the hardware"
+ " supports it"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -655,8 +675,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
- FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
- FeatureDoesNotSupportXNACK]
+ FeatureTrigReducedRange]
>;
def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
@@ -665,7 +684,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureWavefrontSize64, FeatureFlatAddressSpace,
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
- FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC]
+ FeatureDsSrc2Insts, FeatureUnalignedBufferAccess]
>;
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
@@ -678,8 +697,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
- FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32
- ]
+ FeatureDsSrc2Insts, FeatureFastDenormalF32, FeatureUnalignedBufferAccess]
>;
def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
@@ -695,8 +713,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts,
- FeatureFastDenormalF32
- ]
+ FeatureFastDenormalF32, FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
+ FeatureSupportsXNACK]
>;
def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
@@ -712,8 +730,9 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
FeatureVOP3Literal, FeatureDPP8,
- FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
- FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16
+ FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
+ FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16,
+ FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
]
>;
@@ -724,102 +743,92 @@ class FeatureSet<list<SubtargetFeature> Features_> {
def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands,
FeatureFastFMAF32,
HalfRate64Ops,
- FeatureLDSBankCount32,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureLDSBankCount32]>;
def FeatureISAVersion6_0_1 : FeatureSet<
[FeatureSouthernIslands,
- FeatureLDSBankCount32,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureLDSBankCount32]>;
+
+def FeatureISAVersion6_0_2 : FeatureSet<
+ [FeatureSouthernIslands,
+ FeatureLDSBankCount32]>;
def FeatureISAVersion7_0_0 : FeatureSet<
[FeatureSeaIslands,
- FeatureLDSBankCount32,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureLDSBankCount32]>;
def FeatureISAVersion7_0_1 : FeatureSet<
[FeatureSeaIslands,
HalfRate64Ops,
FeatureLDSBankCount32,
- FeatureFastFMAF32,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureFastFMAF32]>;
def FeatureISAVersion7_0_2 : FeatureSet<
[FeatureSeaIslands,
FeatureLDSBankCount16,
- FeatureFastFMAF32,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureFastFMAF32]>;
def FeatureISAVersion7_0_3 : FeatureSet<
[FeatureSeaIslands,
- FeatureLDSBankCount16,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureLDSBankCount16]>;
def FeatureISAVersion7_0_4 : FeatureSet<
[FeatureSeaIslands,
- FeatureLDSBankCount32,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureLDSBankCount32]>;
+
+def FeatureISAVersion7_0_5 : FeatureSet<
+ [FeatureSeaIslands,
+ FeatureLDSBankCount16]>;
def FeatureISAVersion8_0_1 : FeatureSet<
[FeatureVolcanicIslands,
FeatureFastFMAF32,
HalfRate64Ops,
FeatureLDSBankCount32,
- FeatureXNACK,
- FeatureUnpackedD16VMem,
- FeatureCodeObjectV3]>;
+ FeatureSupportsXNACK,
+ FeatureUnpackedD16VMem]>;
def FeatureISAVersion8_0_2 : FeatureSet<
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
FeatureSGPRInitBug,
- FeatureUnpackedD16VMem,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureUnpackedD16VMem]>;
def FeatureISAVersion8_0_3 : FeatureSet<
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
- FeatureUnpackedD16VMem,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureUnpackedD16VMem]>;
+
+def FeatureISAVersion8_0_5 : FeatureSet<
+ [FeatureVolcanicIslands,
+ FeatureLDSBankCount32,
+ FeatureSGPRInitBug,
+ FeatureUnpackedD16VMem]>;
def FeatureISAVersion8_1_0 : FeatureSet<
[FeatureVolcanicIslands,
FeatureLDSBankCount16,
- FeatureXNACK,
- FeatureCodeObjectV3]>;
+ FeatureSupportsXNACK,
+ FeatureImageStoreD16Bug,
+ FeatureImageGather4D16Bug]>;
def FeatureISAVersion9_0_0 : FeatureSet<
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
- FeatureCodeObjectV3,
- FeatureDoesNotSupportXNACK,
- FeatureDoesNotSupportSRAMECC]>;
+ FeatureImageGather4D16Bug]>;
def FeatureISAVersion9_0_2 : FeatureSet<
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
- FeatureXNACK,
- FeatureDoesNotSupportSRAMECC,
- FeatureCodeObjectV3]>;
+ FeatureImageGather4D16Bug]>;
def FeatureISAVersion9_0_4 : FeatureSet<
[FeatureGFX9,
FeatureLDSBankCount32,
FeatureFmaMixInsts,
- FeatureDoesNotSupportXNACK,
- FeatureDoesNotSupportSRAMECC,
- FeatureCodeObjectV3]>;
+ FeatureImageGather4D16Bug]>;
def FeatureISAVersion9_0_6 : FeatureSet<
[FeatureGFX9,
@@ -829,8 +838,8 @@ def FeatureISAVersion9_0_6 : FeatureSet<
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureSupportsSRAMECC,
+ FeatureImageGather4D16Bug]>;
def FeatureISAVersion9_0_8 : FeatureSet<
[FeatureGFX9,
@@ -847,16 +856,22 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeatureMAIInsts,
FeaturePkFmacF16Inst,
FeatureAtomicFaddInsts,
- FeatureSRAMECC,
+ FeatureSupportsSRAMECC,
FeatureMFMAInlineLiteralBug,
- FeatureCodeObjectV3]>;
+ FeatureImageGather4D16Bug]>;
def FeatureISAVersion9_0_9 : FeatureSet<
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
+ FeatureImageGather4D16Bug]>;
+
+def FeatureISAVersion9_0_C : FeatureSet<
+ [FeatureGFX9,
+ FeatureMadMixInsts,
+ FeatureLDSBankCount32,
FeatureXNACK,
- FeatureCodeObjectV3]>;
+ FeatureImageGather4D16Bug]>;
// TODO: Organize more features into groups.
def FeatureGroup {
@@ -889,8 +904,7 @@ def FeatureISAVersion10_1_0 : FeatureSet<
FeatureMadMacF32Insts,
FeatureDsSrc2Insts,
FeatureLdsMisalignedBug,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3])>;
+ FeatureSupportsXNACK])>;
def FeatureISAVersion10_1_1 : FeatureSet<
!listconcat(FeatureGroup.GFX10_1_Bugs,
@@ -910,8 +924,8 @@ def FeatureISAVersion10_1_1 : FeatureSet<
FeatureSMemTimeInst,
FeatureMadMacF32Insts,
FeatureDsSrc2Insts,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3])>;
+ FeatureLdsMisalignedBug,
+ FeatureSupportsXNACK])>;
def FeatureISAVersion10_1_2 : FeatureSet<
!listconcat(FeatureGroup.GFX10_1_Bugs,
@@ -932,8 +946,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
FeatureMadMacF32Insts,
FeatureDsSrc2Insts,
FeatureLdsMisalignedBug,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3])>;
+ FeatureSupportsXNACK])>;
def FeatureISAVersion10_3_0 : FeatureSet<
[FeatureGFX10,
@@ -946,9 +959,7 @@ def FeatureISAVersion10_3_0 : FeatureSet<
FeatureDot5Insts,
FeatureDot6Insts,
FeatureNSAEncoding,
- FeatureWavefrontSize32,
- FeatureDoesNotSupportXNACK,
- FeatureCodeObjectV3]>;
+ FeatureWavefrontSize32]>;
//===----------------------------------------------------------------------===//
@@ -1095,6 +1106,11 @@ def isGFX10Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
AssemblerPredicate<(all_of FeatureGFX10Insts)>;
+def isGFX10Before1030 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 &&"
+ "!Subtarget->hasGFX10_3Insts()">,
+ AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>;
+
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
@@ -1107,6 +1123,9 @@ def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts(
def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>;
+def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">,
+ AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
+
def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">,
AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>;
@@ -1225,6 +1244,9 @@ def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">,
AssemblerPredicate<(all_of FeatureMAIInsts)>;
+def HasSMemRealTime : Predicate<"Subtarget->hasSMemRealTime()">,
+ AssemblerPredicate<(all_of FeatureSMemRealTime)>;
+
def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">,
AssemblerPredicate<(all_of FeatureSMemTimeInst)>;
@@ -1236,12 +1258,12 @@ def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">,
AssemblerPredicate<(all_of FeatureMadMacF32Insts)>;
+def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">,
+ AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
+
def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
AssemblerPredicate<(all_of FeatureAtomicFaddInsts)>;
-def HasNoMadMacF32Insts : Predicate<"!Subtarget->hasMadMacF32Insts()">,
- AssemblerPredicate<(all_of (not FeatureMadMacF32Insts))>;
-
def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">,
AssemblerPredicate<(all_of FeatureDsSrc2Insts)>;
@@ -1251,6 +1273,13 @@ def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
+def EnableFlatScratch : Predicate<"Subtarget->enableFlatScratch()">;
+
+def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">;
+
+def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">,
+ AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>;
+
// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index bb2aba044974..0ed89e9ca8d6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -10,27 +10,15 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUAliasAnalysis.h"
-#include "AMDGPU.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
+#include "llvm/IR/Instructions.h"
using namespace llvm;
#define DEBUG_TYPE "amdgpu-aa"
+AnalysisKey AMDGPUAA::Key;
+
// Register this pass...
char AMDGPUAAWrapperPass::ID = 0;
char AMDGPUExternalAAWrapper::ID = 0;
@@ -85,6 +73,44 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
if (Result == NoAlias)
return Result;
+ // In general, FLAT (generic) pointers could be aliased to LOCAL or PRIVATE
+ // pointers. However, as LOCAL or PRIVATE pointers point to local objects, in
+ // certain cases, it's still viable to check whether a FLAT pointer won't
+ // alias to a LOCAL or PRIVATE pointer.
+ MemoryLocation A = LocA;
+ MemoryLocation B = LocB;
+ // Canonicalize the location order to simplify the following alias check.
+ if (asA != AMDGPUAS::FLAT_ADDRESS) {
+ std::swap(asA, asB);
+ std::swap(A, B);
+ }
+ if (asA == AMDGPUAS::FLAT_ADDRESS &&
+ (asB == AMDGPUAS::LOCAL_ADDRESS || asB == AMDGPUAS::PRIVATE_ADDRESS)) {
+ const auto *ObjA =
+ getUnderlyingObject(A.Ptr->stripPointerCastsAndInvariantGroups());
+ if (const LoadInst *LI = dyn_cast<LoadInst>(ObjA)) {
+ // If a generic pointer is loaded from the constant address space, it
+ // could only be a GLOBAL or CONSTANT one as that address space is soley
+ // prepared on the host side, where only GLOBAL or CONSTANT variables are
+ // visible. Note that this even holds for regular functions.
+ if (LI->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+ return NoAlias;
+ } else if (const Argument *Arg = dyn_cast<Argument>(ObjA)) {
+ const Function *F = Arg->getParent();
+ switch (F->getCallingConv()) {
+ case CallingConv::AMDGPU_KERNEL:
+ // In the kernel function, kernel arguments won't alias to (local)
+ // variables in shared or private address space.
+ return NoAlias;
+ default:
+ // TODO: In the regular function, if that local variable in the
+ // location B is not captured, that argument pointer won't alias to it
+ // as well.
+ break;
+ }
+ }
+ }
+
// Forward the query to the next alias analysis.
return AAResultBase::alias(LocA, LocB, AAQI);
}
@@ -96,7 +122,7 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return true;
- const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
+ const Value *Base = getUnderlyingObject(Loc.Ptr);
AS = Base->getType()->getPointerAddressSpace();
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index fd8889ea5c0d..44de40d4aa7f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -13,13 +13,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
#include "AMDGPU.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include <algorithm>
-#include <memory>
namespace llvm {
@@ -34,15 +28,17 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
const DataLayout &DL;
public:
- explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
- DL(DL) {}
+ explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {}
AMDGPUAAResult(AMDGPUAAResult &&Arg)
: AAResultBase(std::move(Arg)), DL(Arg.DL) {}
/// Handle invalidation events from the new pass manager.
///
/// By definition, this result is stateless and so remains valid.
- bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+ bool invalidate(Function &, const PreservedAnalyses &,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ return false;
+ }
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
AAQueryInfo &AAQI);
@@ -54,14 +50,13 @@ public:
class AMDGPUAA : public AnalysisInfoMixin<AMDGPUAA> {
friend AnalysisInfoMixin<AMDGPUAA>;
- static char PassID;
+ static AnalysisKey Key;
public:
using Result = AMDGPUAAResult;
AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
- return AMDGPUAAResult(F.getParent()->getDataLayout(),
- Triple(F.getParent()->getTargetTriple()));
+ return AMDGPUAAResult(F.getParent()->getDataLayout());
}
};
@@ -80,8 +75,7 @@ public:
const AMDGPUAAResult &getResult() const { return *Result; }
bool doInitialization(Module &M) override {
- Result.reset(new AMDGPUAAResult(M.getDataLayout(),
- Triple(M.getTargetTriple())));
+ Result.reset(new AMDGPUAAResult(M.getDataLayout()));
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 22947544ac07..51af25050950 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -15,9 +15,9 @@
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -32,8 +32,6 @@ static cl::opt<bool> StressCalls(
class AMDGPUAlwaysInline : public ModulePass {
bool GlobalOpt;
- void recursivelyVisitUsers(GlobalValue &GV,
- SmallPtrSetImpl<Function *> &FuncsToAlwaysInline);
public:
static char ID;
@@ -53,16 +51,13 @@ INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
char AMDGPUAlwaysInline::ID = 0;
-void AMDGPUAlwaysInline::recursivelyVisitUsers(
- GlobalValue &GV,
- SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
- SmallVector<User *, 16> Stack;
+static void
+recursivelyVisitUsers(GlobalValue &GV,
+ SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
+ SmallVector<User *, 16> Stack(GV.users());
SmallPtrSet<const Value *, 8> Visited;
- for (User *U : GV.users())
- Stack.push_back(U);
-
while (!Stack.empty()) {
User *U = Stack.pop_back_val();
if (!Visited.insert(U).second)
@@ -86,12 +81,11 @@ void AMDGPUAlwaysInline::recursivelyVisitUsers(
continue;
}
- for (User *UU : U->users())
- Stack.push_back(UU);
+ append_range(Stack, U->users());
}
}
-bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
std::vector<GlobalAlias*> AliasesToRemove;
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
@@ -157,7 +151,16 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
}
+bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+ return alwaysInlineImpl(M, GlobalOpt);
+}
+
ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
return new AMDGPUAlwaysInline(GlobalOpt);
}
+PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ alwaysInlineImpl(M, GlobalOpt);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 625074569cfa..a4e72f787230 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -12,27 +12,12 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
+#include "GCNSubtarget.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 45f515c5115e..c2a4d67ea98e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -18,11 +18,8 @@
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "amdgpu-annotate-uniform"
@@ -108,9 +105,11 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
for (auto &BB : Checklist) {
BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
BasicBlock::iterator(Load) : BB->end();
- auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
- StartIt, BB, Load);
- if (Q.isClobber() || Q.isUnknown())
+ auto Q = MDR->getPointerDependencyFrom(
+ MemoryLocation::getBeforeOrAfter(Ptr), true, StartIt, BB, Load);
+ if (Q.isClobber() || Q.isUnknown() ||
+ // Store defines the load and thus clobbers it.
+ (Q.isDef() && Q.getInst()->mayWriteToMemory()))
return true;
}
return false;
@@ -140,10 +139,11 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
}
bool NotClobbered = false;
+ bool GlobalLoad = isGlobalLoad(I);
if (PtrI)
- NotClobbered = !isClobberedInFunction(&I);
+ NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
- if (isGlobalLoad(I) && !isClobberedInFunction(&I)) {
+ if (GlobalLoad && !isClobberedInFunction(&I)) {
NotClobbered = true;
// Lookup for the existing GEP
if (noClobberClones.count(Ptr)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index d078fc147a36..fb273a1650ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUArgumentUsageInfo.h"
+#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 576e6cfe929e..139ac3bab14c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -9,14 +9,13 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/Pass.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
namespace llvm {
class Function;
+class LLT;
class raw_ostream;
class TargetRegisterClass;
class TargetRegisterInfo;
@@ -27,7 +26,7 @@ private:
friend class AMDGPUArgumentUsageInfo;
union {
- Register Reg;
+ MCRegister Reg;
unsigned StackOffset;
};
@@ -69,7 +68,7 @@ public:
return !IsStack;
}
- Register getRegister() const {
+ MCRegister getRegister() const {
assert(!IsStack);
return Reg;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index eef8fe2fc3b7..c655e5ec87b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,37 +17,27 @@
#include "AMDGPUAsmPrinter.h"
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPUHSAMetadataStreamer.h"
+#include "AMDKernelCodeT.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
-#include "R600Defines.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/AMDGPUMetadata.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
using namespace llvm::AMDGPU;
-using namespace llvm::AMDGPU::HSAMD;
// We need to tell the runtime some amount ahead of time if we don't know the
// true stack size. Assume a smaller number if this is only due to dynamic /
@@ -108,10 +98,13 @@ extern "C" void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter() {
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {
- if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
- HSAMetadataStream.reset(new MetadataStreamerV3());
- else
- HSAMetadataStream.reset(new MetadataStreamerV2());
+ if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
+ if (isHsaAbiVersion2(getGlobalSTI())) {
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
+ } else {
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
+ }
+ }
}
StringRef AMDGPUAsmPrinter::getPassName() const {
@@ -129,7 +122,7 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
- if (IsaInfo::hasCodeObjectV3(getGlobalSTI())) {
+ if (isHsaAbiVersion3(getGlobalSTI())) {
std::string ExpectedTarget;
raw_string_ostream ExpectedTargetOS(ExpectedTarget);
IsaInfo::streamIsaVersion(getGlobalSTI(), ExpectedTargetOS);
@@ -147,7 +140,7 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
getTargetStreamer()->getPALMetadata()->readFromIR(M);
- if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
+ if (isHsaAbiVersion3(getGlobalSTI()))
return;
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
@@ -165,7 +158,8 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
if (!getTargetStreamer())
return;
- if (!IsaInfo::hasCodeObjectV3(getGlobalSTI())) {
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
+ isHsaAbiVersion2(getGlobalSTI())) {
// Emit ISA Version (NT_AMD_AMDGPU_ISA).
std::string ISAVersionString;
raw_string_ostream ISAVersionStream(ISAVersionString);
@@ -203,7 +197,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
const Function &F = MF->getFunction();
- if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) &&
+ if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
amd_kernel_code_t KernelCode;
@@ -220,8 +214,8 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
if (!MFI.isEntryFunction())
return;
- if (!IsaInfo::hasCodeObjectV3(getGlobalSTI()) ||
- TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
+ isHsaAbiVersion2(getGlobalSTI()))
return;
auto &Streamer = getTargetStreamer()->getStreamer();
@@ -256,8 +250,8 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
}
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
- if (IsaInfo::hasCodeObjectV3(getGlobalSTI()) &&
- TM.getTargetTriple().getOS() == Triple::AMDHSA) {
+ if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
+ isHsaAbiVersion3(getGlobalSTI())) {
AsmPrinter::emitFunctionEntryLabel();
return;
}
@@ -334,7 +328,7 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) {
// causing stale data in caches. Arguably this should be done by the linker,
// which is why this isn't done for Mesa.
const MCSubtargetInfo &STI = *getGlobalSTI();
- if (AMDGPU::isGFX10(STI) &&
+ if (AMDGPU::isGFX10Plus(STI) &&
(STI.getTargetTriple().getOS() == Triple::AMDHSA ||
STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
@@ -410,12 +404,12 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
assert(isUInt<32>(PI.ScratchSize));
- assert(isUInt<32>(PI.ComputePGMRSrc1));
+ assert(isUInt<32>(PI.getComputePGMRSrc1()));
assert(isUInt<32>(PI.ComputePGMRSrc2));
KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
- KernelDescriptor.compute_pgm_rsrc1 = PI.ComputePGMRSrc1;
+ KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
@@ -442,7 +436,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->SwitchSection(ConfigSection);
}
- if (MFI->isEntryFunction()) {
+ if (MFI->isModuleEntryFunction()) {
getSIProgramInfo(CurrentProgramInfo, MF);
} else {
auto I = CallGraphResourceInfo.insert(
@@ -452,9 +446,12 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Info = analyzeResourceUsage(MF);
}
- if (STM.isAmdPalOS())
- EmitPALMetadata(MF, CurrentProgramInfo);
- else if (!STM.isAmdHsaOS()) {
+ if (STM.isAmdPalOS()) {
+ if (MFI->isEntryFunction())
+ EmitPALMetadata(MF, CurrentProgramInfo);
+ else if (MFI->isModuleEntryFunction())
+ emitPALFunctionMetadata(MF);
+ } else if (!STM.isAmdHsaOS()) {
EmitProgramInfoSI(MF, CurrentProgramInfo);
}
@@ -532,6 +529,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
" WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
OutStreamer->emitRawComment(
+ " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
+ Twine(G_00B84C_SCRATCH_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
+ OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
OutStreamer->emitRawComment(
@@ -741,7 +741,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
llvm_unreachable("src_pops_exiting_wave_id should not be used");
case AMDGPU::NoRegister:
- assert(MI.isDebugInstr());
+ assert(MI.isDebugInstr() && "Instruction uses invalid noreg register");
continue;
case AMDGPU::VCC:
@@ -915,7 +915,22 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
= TII->getNamedOperand(MI, AMDGPU::OpName::callee);
const Function *Callee = getCalleeFunction(*CalleeOp);
- if (!Callee || Callee->isDeclaration()) {
+ DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
+ CallGraphResourceInfo.end();
+ bool IsExternal = !Callee || Callee->isDeclaration();
+ if (!IsExternal)
+ I = CallGraphResourceInfo.find(Callee);
+
+ if (IsExternal || I == CallGraphResourceInfo.end()) {
+ // Avoid crashing on undefined behavior with an illegal call to a
+ // kernel. If a callsite's calling convention doesn't match the
+ // function's, it's undefined behavior. If the callsite calling
+ // convention does match, that would have errored earlier.
+ // FIXME: The verifier shouldn't allow this.
+ if (!IsExternal &&
+ AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
+ report_fatal_error("invalid call to entry function");
+
// If this is a call to an external function, we can't do much. Make
// conservative guesses.
@@ -936,19 +951,6 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
// We force CodeGen to run in SCC order, so the callee's register
// usage etc. should be the cumulative usage of all callees.
- auto I = CallGraphResourceInfo.find(Callee);
- if (I == CallGraphResourceInfo.end()) {
- // Avoid crashing on undefined behavior with an illegal call to a
- // kernel. If a callsite's calling convention doesn't match the
- // function's, it's undefined behavior. If the callsite calling
- // convention does match, that would have errored earlier.
- // FIXME: The verifier shouldn't allow this.
- if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
- report_fatal_error("invalid call to entry function");
-
- llvm_unreachable("callee should have been handled before caller");
- }
-
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
@@ -989,7 +991,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.FlatUsed = Info.UsesFlatScratch;
ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
- if (!isUInt<32>(ProgInfo.ScratchSize)) {
+ const uint64_t MaxScratchPerWorkitem =
+ GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize();
+ if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
ProgInfo.ScratchSize, DS_Error);
MF.getFunction().getContext().diagnose(DiagStackSize);
@@ -1023,18 +1027,26 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// Account for extra SGPRs and VGPRs reserved for debugger use.
ProgInfo.NumSGPR += ExtraSGPRs;
+ const Function &F = MF.getFunction();
+
// Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
// dispatch registers are function args.
unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
- for (auto &Arg : MF.getFunction().args()) {
- unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32;
- if (Arg.hasAttribute(Attribute::InReg))
- WaveDispatchNumSGPR += NumRegs;
- else
- WaveDispatchNumVGPR += NumRegs;
+
+ if (isShader(F.getCallingConv())) {
+ // FIXME: We should be using the number of registers determined during
+ // calling convention lowering to legalize the types.
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (auto &Arg : F.args()) {
+ unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
+ if (Arg.hasAttribute(Attribute::InReg))
+ WaveDispatchNumSGPR += NumRegs;
+ else
+ WaveDispatchNumVGPR += NumRegs;
+ }
+ ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
+ ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
}
- ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
- ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
@@ -1129,18 +1141,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.MemOrdered = 1;
}
- ProgInfo.ComputePGMRSrc1 =
- S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
- S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
- S_00B848_PRIORITY(ProgInfo.Priority) |
- S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
- S_00B848_PRIV(ProgInfo.Priv) |
- S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
- S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
- S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
- S_00B848_WGP_MODE(ProgInfo.WgpMode) |
- S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
-
// 0 = X, 1 = XY, 2 = XYZ
unsigned TIDIGCompCnt = 0;
if (MFI->hasWorkItemIDZ())
@@ -1189,7 +1189,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
OutStreamer->emitInt32(R_00B848_COMPUTE_PGM_RSRC1);
- OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc1);
+ OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);
OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
@@ -1238,12 +1238,10 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
MD->setEntryPoint(CC, MF.getFunction().getName());
MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
- if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
- MD->setRsrc1(CC, CurrentProgramInfo.ComputePGMRSrc1);
+ MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
+ if (AMDGPU::isCompute(CC)) {
MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
} else {
- MD->setRsrc1(CC, S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
- S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks));
if (CurrentProgramInfo.ScratchBlocks > 0)
MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
}
@@ -1260,6 +1258,16 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
MD->setWave32(MF.getFunction().getCallingConv());
}
+void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
+ auto *MD = getTargetStreamer()->getPALMetadata();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ MD->setFunctionScratchSize(MF, MFI.getStackSize());
+ // Set compute registers
+ MD->setRsrc1(CallingConv::AMDGPU_CS,
+ CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
+ MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
+}
+
// This is supposed to be log2(Size)
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
switch (Size) {
@@ -1287,7 +1295,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
Out.compute_pgm_resource_registers =
- CurrentProgramInfo.ComputePGMRSrc1 |
+ CurrentProgramInfo.getComputePGMRSrc1() |
(CurrentProgramInfo.ComputePGMRSrc2 << 32);
Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
@@ -1296,7 +1304,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
AMD_HSA_BITS_SET(Out.code_properties,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
- getElementByteSizeValue(STM.getMaxPrivateElementSize()));
+ getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
if (MFI->hasPrivateSegmentBuffer()) {
Out.code_properties |=
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 54e8338ab4b0..9e1e26d65d8c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -14,19 +14,10 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
-#include "AMDGPU.h"
-#include "AMDKernelCodeT.h"
-#include "AMDGPUHSAMetadataStreamer.h"
#include "SIProgramInfo.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Support/AMDHSAKernelDescriptor.h"
-#include <cstddef>
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
+
+struct amd_kernel_code_t;
namespace llvm {
@@ -36,6 +27,16 @@ class MCCodeEmitter;
class MCOperand;
class GCNSubtarget;
+namespace AMDGPU {
+namespace HSAMD {
+class MetadataStreamer;
+}
+} // namespace AMDGPU
+
+namespace amdhsa {
+struct kernel_descriptor_t;
+}
+
class AMDGPUAsmPrinter final : public AsmPrinter {
private:
// Track resource usage for callee functions.
@@ -78,6 +79,7 @@ private:
const SIProgramInfo &KernelInfo);
void EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);
+ void emitPALFunctionMetadata(const MachineFunction &MF);
void emitCommonFunctionComments(uint32_t NumVGPR,
Optional<uint32_t> NumAGPR,
uint32_t TotalNumVGPR,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index c9d25d4250d5..aae2a54c198b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -14,13 +14,14 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
+#include "GCNSubtarget.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#define DEBUG_TYPE "amdgpu-atomic-optimizer"
@@ -404,6 +405,11 @@ static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
}
}
+static Value *buildMul(IRBuilder<> &B, Value *LHS, Value *RHS) {
+ const ConstantInt *CI = dyn_cast<ConstantInt>(LHS);
+ return (CI && CI->isOne()) ? RHS : B.CreateMul(LHS, RHS);
+}
+
void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
AtomicRMWInst::BinOp Op,
unsigned ValIdx,
@@ -523,7 +529,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// old value times the number of active lanes.
Value *const Ctpop = B.CreateIntCast(
B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
- NewV = B.CreateMul(V, Ctpop);
+ NewV = buildMul(B, V, Ctpop);
break;
}
@@ -543,7 +549,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// old value times the parity of the number of active lanes.
Value *const Ctpop = B.CreateIntCast(
B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
- NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1));
+ NewV = buildMul(B, V, B.CreateAnd(Ctpop, 1));
break;
}
}
@@ -622,7 +628,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
llvm_unreachable("Unhandled atomic op");
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
- LaneOffset = B.CreateMul(V, Mbcnt);
+ LaneOffset = buildMul(B, V, Mbcnt);
break;
case AtomicRMWInst::And:
case AtomicRMWInst::Or:
@@ -633,7 +639,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
LaneOffset = B.CreateSelect(Cond, Identity, V);
break;
case AtomicRMWInst::Xor:
- LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1));
+ LaneOffset = buildMul(B, V, B.CreateAnd(Mbcnt, 1));
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 949dcea3aa18..852a05b3c181 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -14,31 +14,45 @@
#include "AMDGPUCallLowering.h"
#include "AMDGPU.h"
-#include "AMDGPUISelLowering.h"
-#include "AMDGPUSubtarget.h"
+#include "AMDGPULegalizerInfo.h"
#include "AMDGPUTargetMachine.h"
-#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+
+#define DEBUG_TYPE "amdgpu-call-lowering"
using namespace llvm;
namespace {
-struct OutgoingValueHandler : public CallLowering::ValueHandler {
- OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB, CCAssignFn *AssignFn)
- : ValueHandler(B, MRI, AssignFn), MIB(MIB) {}
+struct AMDGPUValueHandler : public CallLowering::ValueHandler {
+ AMDGPUValueHandler(bool IsIncoming, MachineIRBuilder &B,
+ MachineRegisterInfo &MRI, CCAssignFn *AssignFn)
+ : ValueHandler(IsIncoming, B, MRI, AssignFn) {}
- MachineInstrBuilder MIB;
+ /// Wrapper around extendRegister to ensure we extend to a full 32-bit
+ /// register.
+ Register extendRegisterMin32(Register ValVReg, CCValAssign &VA) {
+ if (VA.getLocVT().getSizeInBits() < 32) {
+ // 16-bit types are reported as legal for 32-bit registers. We need to
+ // extend and do a 32-bit copy to avoid the verifier complaining about it.
+ return MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
+ }
+
+ return extendRegister(ValVReg, VA);
+ }
+};
+
+struct AMDGPUOutgoingValueHandler : public AMDGPUValueHandler {
+ AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : AMDGPUValueHandler(false, B, MRI, AssignFn), MIB(MIB) {}
- bool isIncomingArgumentHandler() const override { return false; }
+ MachineInstrBuilder MIB;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -52,13 +66,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
- Register ExtReg;
- if (VA.getLocVT().getSizeInBits() < 32) {
- // 16-bit types are reported as legal for 32-bit registers. We need to
- // extend and do a 32-bit copy to avoid the verifier complaining about it.
- ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
- } else
- ExtReg = extendRegister(ValVReg, VA);
+ Register ExtReg = extendRegisterMin32(ValVReg, VA);
// If this is a scalar return, insert a readfirstlane just in case the value
// ends up in a VGPR.
@@ -85,12 +93,12 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
}
};
-struct IncomingArgHandler : public CallLowering::ValueHandler {
+struct AMDGPUIncomingArgHandler : public AMDGPUValueHandler {
uint64_t StackUsed = 0;
- IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
- CCAssignFn *AssignFn)
- : ValueHandler(B, MRI, AssignFn) {}
+ AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : AMDGPUValueHandler(true, B, MRI, AssignFn) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -148,21 +156,107 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
/// parameters (it's a basic-block live-in), and a call instruction
/// (it's an implicit-def of the BL).
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
-
- // FIXME: What is the point of this being a callback?
- bool isIncomingArgumentHandler() const override { return true; }
};
-struct FormalArgHandler : public IncomingArgHandler {
+struct FormalArgHandler : public AMDGPUIncomingArgHandler {
FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
- : IncomingArgHandler(B, MRI, AssignFn) {}
+ : AMDGPUIncomingArgHandler(B, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
};
+struct CallReturnHandler : public AMDGPUIncomingArgHandler {
+ CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : AMDGPUIncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIB.addDef(PhysReg, RegState::Implicit);
+ }
+
+ MachineInstrBuilder MIB;
+};
+
+struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler {
+ MachineInstrBuilder MIB;
+ CCAssignFn *AssignFnVarArg;
+
+ /// For tail calls, the byte offset of the call's argument area from the
+ /// callee's. Unused elsewhere.
+ int FPDiff;
+
+ // Cache the SP register vreg if we need it more than once in this call site.
+ Register SPReg;
+
+ bool IsTailCall;
+
+ AMDGPUOutgoingArgHandler(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, MachineInstrBuilder MIB,
+ CCAssignFn *AssignFn, CCAssignFn *AssignFnVarArg,
+ bool IsTailCall = false, int FPDiff = 0)
+ : AMDGPUValueHandler(false, MIRBuilder, MRI, AssignFn), MIB(MIB),
+ AssignFnVarArg(AssignFnVarArg), FPDiff(FPDiff), IsTailCall(IsTailCall) {
+ }
+
+ Register getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const LLT PtrTy = LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32);
+ const LLT S32 = LLT::scalar(32);
+
+ if (IsTailCall) {
+ llvm_unreachable("implement me");
+ }
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (!SPReg)
+ SPReg = MIRBuilder.buildCopy(PtrTy, MFI->getStackPtrOffsetReg()).getReg(0);
+
+ auto OffsetReg = MIRBuilder.buildConstant(S32, Offset);
+
+ auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);
+ MPO = MachinePointerInfo::getStack(MF, Offset);
+ return AddrReg.getReg(0);
+ }
+
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign &VA) override {
+ MIB.addUse(PhysReg, RegState::Implicit);
+ Register ExtReg = extendRegisterMin32(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
+ }
+
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ MachineFunction &MF = MIRBuilder.getMF();
+ uint64_t LocMemOffset = VA.getLocMemOffset();
+ const auto &ST = MF.getSubtarget<GCNSubtarget>();
+
+ auto MMO = MF.getMachineMemOperand(
+ MPO, MachineMemOperand::MOStore, Size,
+ commonAlignment(ST.getStackAlignment(), LocMemOffset));
+ MIRBuilder.buildStore(ValVReg, Addr, *MMO);
+ }
+
+ void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr,
+ uint64_t MemSize, MachinePointerInfo &MPO,
+ CCValAssign &VA) override {
+ Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
+ ? extendRegister(Arg.Regs[0], VA)
+ : Arg.Regs[0];
+
+ // If we extended the value type we might need to adjust the MMO's
+ // Size. This happens if ComputeValueVTs widened a small type value to a
+ // legal register type (e.g. s8->s16)
+ const LLT RegTy = MRI.getType(ValVReg);
+ MemSize = std::min(MemSize, (uint64_t)RegTy.getSizeInBytes());
+ assignValueToAddress(ValVReg, Addr, MemSize, MPO, VA);
+ }
+};
}
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
@@ -183,48 +277,64 @@ static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
}
}
-void AMDGPUCallLowering::splitToValueTypes(
- MachineIRBuilder &B,
- const ArgInfo &OrigArg, unsigned OrigArgIdx,
- SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL, CallingConv::ID CallConv,
- SplitArgTy PerformArgSplit) const {
+// FIXME: This should move to generic code.
+void AMDGPUCallLowering::splitToValueTypes(MachineIRBuilder &B,
+ const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL,
+ CallingConv::ID CallConv) const {
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
- if (OrigArg.Ty->isVoidTy())
- return;
-
SmallVector<EVT, 4> SplitVTs;
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
assert(OrigArg.Regs.size() == SplitVTs.size());
- int SplitIdx = 0;
- for (EVT VT : SplitVTs) {
- Register Reg = OrigArg.Regs[SplitIdx];
- Type *Ty = VT.getTypeForEVT(Ctx);
- LLT LLTy = getLLTForType(*Ty, DL);
+ if (SplitVTs.size() == 0)
+ return;
- if (OrigArgIdx == AttributeList::ReturnIndex && VT.isScalarInteger()) {
- unsigned ExtendOp = TargetOpcode::G_ANYEXT;
- if (OrigArg.Flags[0].isSExt()) {
- assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
- ExtendOp = TargetOpcode::G_SEXT;
- } else if (OrigArg.Flags[0].isZExt()) {
- assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
- ExtendOp = TargetOpcode::G_ZEXT;
- }
+ if (SplitVTs.size() == 1) {
+ // No splitting to do, but we want to replace the original type (e.g. [1 x
+ // double] -> double).
+ SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
+ OrigArg.Flags[0], OrigArg.IsFixed);
+ return;
+ }
- EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
- extOpcodeToISDExtOpcode(ExtendOp));
- if (ExtVT != VT) {
- VT = ExtVT;
- Ty = ExtVT.getTypeForEVT(Ctx);
- LLTy = getLLTForType(*Ty, DL);
- Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0);
- }
- }
+ // Create one ArgInfo for each virtual register in the original ArgInfo.
+ assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
+
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ OrigArg.Ty, CallConv, false);
+ for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
+ Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
+ SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
+ OrigArg.IsFixed);
+ if (NeedsRegBlock)
+ SplitArgs.back().Flags[0].setInConsecutiveRegs();
+ }
+
+ SplitArgs.back().Flags[0].setInConsecutiveRegsLast();
+}
+
+void AMDGPUCallLowering::processSplitArgs(
+ MachineIRBuilder &B, const ArgInfo &OrigArg,
+ const SmallVectorImpl<ArgInfo> &SplitArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs, const DataLayout &DL,
+ CallingConv::ID CallConv, bool IsOutgoing,
+ SplitArgTy PerformArgSplit) const {
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+
+ // FIXME: This is mostly nasty pre-processing before handleAssignments. Most
+ // of this should be performed by handleAssignments.
+
+ for (int SplitIdx = 0, e = SplitArg.size(); SplitIdx != e; ++SplitIdx) {
+ const ArgInfo &CurSplitArg = SplitArg[SplitIdx];
+ Register Reg = OrigArg.Regs[SplitIdx];
+ EVT VT = EVT::getEVT(CurSplitArg.Ty);
+ LLT LLTy = getLLTForType(*CurSplitArg.Ty, DL);
unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
@@ -232,9 +342,8 @@ void AMDGPUCallLowering::splitToValueTypes(
if (NumParts == 1) {
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
- SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed);
-
- ++SplitIdx;
+ SplitArgs.emplace_back(Reg, CurSplitArg.Ty, OrigArg.Flags,
+ OrigArg.IsFixed);
continue;
}
@@ -252,21 +361,9 @@ void AMDGPUCallLowering::splitToValueTypes(
}
PerformArgSplit(SplitRegs, Reg, LLTy, PartLLT, SplitIdx);
-
- ++SplitIdx;
}
}
-// Get the appropriate type to make \p OrigTy \p Factor times bigger.
-static LLT getMultipleType(LLT OrigTy, int Factor) {
- if (OrigTy.isVector()) {
- return LLT::vector(OrigTy.getNumElements() * Factor,
- OrigTy.getElementType());
- }
-
- return LLT::scalar(OrigTy.getSizeInBits() * Factor);
-}
-
// TODO: Move to generic code
static void unpackRegsToOrigType(MachineIRBuilder &B,
ArrayRef<Register> DstRegs,
@@ -276,34 +373,67 @@ static void unpackRegsToOrigType(MachineIRBuilder &B,
LLT PartTy) {
assert(DstRegs.size() > 1 && "Nothing to unpack");
- const unsigned SrcSize = SrcTy.getSizeInBits();
const unsigned PartSize = PartTy.getSizeInBits();
if (SrcTy.isVector() && !PartTy.isVector() &&
PartSize > SrcTy.getElementType().getSizeInBits()) {
// Vector was scalarized, and the elements extended.
- auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(),
- SrcReg);
+ auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg);
for (int i = 0, e = DstRegs.size(); i != e; ++i)
B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
return;
}
- if (SrcSize % PartSize == 0) {
+ LLT GCDTy = getGCDType(SrcTy, PartTy);
+ if (GCDTy == PartTy) {
+ // If this already evenly divisible, we can create a simple unmerge.
B.buildUnmerge(DstRegs, SrcReg);
return;
}
- const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize;
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT DstTy = MRI.getType(DstRegs[0]);
+ LLT LCMTy = getLCMType(SrcTy, PartTy);
+
+ const unsigned LCMSize = LCMTy.getSizeInBits();
+ const unsigned DstSize = DstTy.getSizeInBits();
+ const unsigned SrcSize = SrcTy.getSizeInBits();
+
+ Register UnmergeSrc = SrcReg;
+ if (LCMSize != SrcSize) {
+ // Widen to the common type.
+ Register Undef = B.buildUndef(SrcTy).getReg(0);
+ SmallVector<Register, 8> MergeParts(1, SrcReg);
+ for (unsigned Size = SrcSize; Size != LCMSize; Size += SrcSize)
+ MergeParts.push_back(Undef);
- LLT BigTy = getMultipleType(PartTy, NumRoundedParts);
- auto ImpDef = B.buildUndef(BigTy);
+ UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0);
+ }
- auto Big = B.buildInsert(BigTy, ImpDef.getReg(0), SrcReg, 0).getReg(0);
+ // Unmerge to the original registers and pad with dead defs.
+ SmallVector<Register, 8> UnmergeResults(DstRegs.begin(), DstRegs.end());
+ for (unsigned Size = DstSize * DstRegs.size(); Size != LCMSize;
+ Size += DstSize) {
+ UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy));
+ }
- int64_t Offset = 0;
- for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize)
- B.buildExtract(DstRegs[i], Big, Offset);
+ B.buildUnmerge(UnmergeResults, UnmergeSrc);
+}
+
+bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
+ CallingConv::ID CallConv,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ bool IsVarArg) const {
+ // For shaders. Vector types should be explicitly handled by CC.
+ if (AMDGPU::isEntryFunctionCC(CallConv))
+ return true;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
+ MF.getFunction().getContext());
+
+ return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));
}
/// Lower the return value for the already existing \p Ret. This assumes that
@@ -318,31 +448,77 @@ bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
const auto &F = MF.getFunction();
const DataLayout &DL = MF.getDataLayout();
MachineRegisterInfo *MRI = B.getMRI();
+ LLVMContext &Ctx = F.getContext();
CallingConv::ID CC = F.getCallingConv();
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
- ArgInfo OrigRetInfo(VRegs, Val->getType());
- setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
- SmallVector<ArgInfo, 4> SplitRetInfos;
+ SmallVector<EVT, 8> SplitEVTs;
+ ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+ assert(VRegs.size() == SplitEVTs.size() &&
+ "For each split Type there should be exactly one VReg.");
+
+ // We pre-process the return value decomposed into EVTs.
+ SmallVector<ArgInfo, 8> PreSplitRetInfos;
+
+ // Further processing is applied to split the arguments from PreSplitRetInfos
+ // into 32-bit pieces in SplitRetInfos before passing off to
+ // handleAssignments.
+ SmallVector<ArgInfo, 8> SplitRetInfos;
- splitToValueTypes(
- B, OrigRetInfo, AttributeList::ReturnIndex, SplitRetInfos, DL, CC,
- [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
- int VTSplitIdx) {
- unpackRegsToOrigType(B, Regs, SrcReg,
- SplitRetInfos[VTSplitIdx],
- LLTy, PartLLT);
- });
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ EVT VT = SplitEVTs[i];
+ Register Reg = VRegs[i];
+ ArgInfo RetInfo(Reg, VT.getTypeForEVT(Ctx));
+ setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
+
+ if (VT.isScalarInteger()) {
+ unsigned ExtendOp = TargetOpcode::G_ANYEXT;
+ if (RetInfo.Flags[0].isSExt()) {
+ assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
+ ExtendOp = TargetOpcode::G_SEXT;
+ } else if (RetInfo.Flags[0].isZExt()) {
+ assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
+ ExtendOp = TargetOpcode::G_ZEXT;
+ }
+
+ EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
+ extOpcodeToISDExtOpcode(ExtendOp));
+ if (ExtVT != VT) {
+ RetInfo.Ty = ExtVT.getTypeForEVT(Ctx);
+ LLT ExtTy = getLLTForType(*RetInfo.Ty, DL);
+ Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
+ }
+ }
+
+ if (Reg != RetInfo.Regs[0]) {
+ RetInfo.Regs[0] = Reg;
+ // Reset the arg flags after modifying Reg.
+ setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
+ }
+
+ splitToValueTypes(B, RetInfo, PreSplitRetInfos, DL, CC);
+
+ // FIXME: This splitting should mostly be done by handleAssignments
+ processSplitArgs(B, RetInfo,
+ PreSplitRetInfos, SplitRetInfos, DL, CC, true,
+ [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy,
+ LLT PartLLT, int VTSplitIdx) {
+ unpackRegsToOrigType(B, Regs, SrcReg,
+ PreSplitRetInfos[VTSplitIdx], LLTy,
+ PartLLT);
+ });
+ PreSplitRetInfos.clear();
+ }
CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
- OutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn);
+ AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn);
return handleAssignments(B, SplitRetInfos, RetHandler);
}
-bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B,
- const Value *Val,
- ArrayRef<Register> VRegs) const {
+bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
+ ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const {
MachineFunction &MF = B.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -353,8 +529,8 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B,
CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
const bool IsShader = AMDGPU::isShader(CC);
- const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) ||
- AMDGPU::isKernel(CC);
+ const bool IsWaveEnd =
+ (IsShader && MFI->returnsVoid()) || AMDGPU::isKernel(CC);
if (IsWaveEnd) {
B.buildInstr(AMDGPU::S_ENDPGM)
.addImm(0);
@@ -373,7 +549,9 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B,
Ret.addUse(ReturnAddrVReg);
}
- if (!lowerReturnVal(B, Val, VRegs, Ret))
+ if (!FLI.CanLowerReturn)
+ insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
+ else if (!lowerReturnVal(B, Val, VRegs, Ret))
return false;
if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
@@ -389,24 +567,19 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B,
return true;
}
-Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B,
- Type *ParamTy,
- uint64_t Offset) const {
-
+void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
+ Type *ParamTy,
+ uint64_t Offset) const {
MachineFunction &MF = B.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- const Function &F = MF.getFunction();
- const DataLayout &DL = F.getParent()->getDataLayout();
- PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
- LLT PtrType = getLLTForType(*PtrTy, DL);
Register KernArgSegmentPtr =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset);
- return B.buildPtrAdd(PtrType, KernArgSegmentVReg, OffsetReg).getReg(0);
+ B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
}
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy,
@@ -417,7 +590,10 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy,
const DataLayout &DL = F.getParent()->getDataLayout();
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
- Register PtrReg = lowerParameterPtr(B, ParamTy, Offset);
+
+ LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+ Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
+ lowerParameterPtr(PtrReg, B, ParamTy, Offset);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo,
@@ -504,12 +680,15 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
// TODO: Align down to dword alignment and extract bits for extending loads.
for (auto &Arg : F.args()) {
- Type *ArgTy = Arg.getType();
+ const bool IsByRef = Arg.hasByRefAttr();
+ Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
if (AllocSize == 0)
continue;
- Align ABIAlign = DL.getABITypeAlign(ArgTy);
+ MaybeAlign ABIAlign = IsByRef ? Arg.getParamAlign() : None;
+ if (!ABIAlign)
+ ABIAlign = DL.getABITypeAlign(ArgTy);
uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
@@ -519,16 +698,34 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
continue;
}
- ArrayRef<Register> OrigArgRegs = VRegs[i];
- Register ArgReg =
- OrigArgRegs.size() == 1
- ? OrigArgRegs[0]
- : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
-
Align Alignment = commonAlignment(KernArgBaseAlign, ArgOffset);
- lowerParameter(B, ArgTy, ArgOffset, Alignment, ArgReg);
- if (OrigArgRegs.size() > 1)
- unpackRegs(OrigArgRegs, ArgReg, ArgTy, B);
+
+ if (IsByRef) {
+ unsigned ByRefAS = cast<PointerType>(Arg.getType())->getAddressSpace();
+
+ assert(VRegs[i].size() == 1 &&
+ "expected only one register for byval pointers");
+ if (ByRefAS == AMDGPUAS::CONSTANT_ADDRESS) {
+ lowerParameterPtr(VRegs[i][0], B, ArgTy, ArgOffset);
+ } else {
+ const LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+ Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
+ lowerParameterPtr(PtrReg, B, ArgTy, ArgOffset);
+
+ B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
+ }
+ } else {
+ ArrayRef<Register> OrigArgRegs = VRegs[i];
+ Register ArgReg =
+ OrigArgRegs.size() == 1
+ ? OrigArgRegs[0]
+ : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
+
+ lowerParameter(B, ArgTy, ArgOffset, Alignment, ArgReg);
+ if (OrigArgRegs.size() > 1)
+ unpackRegs(OrigArgRegs, ArgReg, ArgTy, B);
+ }
+
++i;
}
@@ -649,8 +846,8 @@ static void packSplitRegsToOrigType(MachineIRBuilder &B,
}
bool AMDGPUCallLowering::lowerFormalArguments(
- MachineIRBuilder &B, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
+ MachineIRBuilder &B, const Function &F, ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
CallingConv::ID CC = F.getCallingConv();
// The infrastructure for normal calling convention lowering is essentially
@@ -659,7 +856,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
if (CC == CallingConv::AMDGPU_KERNEL)
return lowerFormalArgumentsKernel(B, F, VRegs);
- const bool IsShader = AMDGPU::isShader(CC);
+ const bool IsGraphics = AMDGPU::isGraphics(CC);
const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
MachineFunction &MF = B.getMF();
@@ -688,11 +885,16 @@ bool AMDGPUCallLowering::lowerFormalArguments(
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
-
+ SmallVector<ArgInfo, 8> SplitArg;
SmallVector<ArgInfo, 32> SplitArgs;
unsigned Idx = 0;
unsigned PSInputNum = 0;
+ // Insert the hidden sret parameter if the return value won't fit in the
+ // return registers.
+ if (!FLI.CanLowerReturn)
+ insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
+
for (auto &Arg : F.args()) {
if (DL.getTypeStoreSize(Arg.getType()) == 0)
continue;
@@ -700,7 +902,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
const bool InReg = Arg.hasAttribute(Attribute::InReg);
// SGPR arguments to functions not implemented.
- if (!IsShader && InReg)
+ if (!IsGraphics && InReg)
return false;
if (Arg.hasAttribute(Attribute::SwiftSelf) ||
@@ -733,16 +935,18 @@ bool AMDGPUCallLowering::lowerFormalArguments(
const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
setArgFlags(OrigArg, OrigArgIdx, DL, F);
- splitToValueTypes(
- B, OrigArg, OrigArgIdx, SplitArgs, DL, CC,
- // FIXME: We should probably be passing multiple registers to
- // handleAssignments to do this
- [&](ArrayRef<Register> Regs, Register DstReg,
- LLT LLTy, LLT PartLLT, int VTSplitIdx) {
- assert(DstReg == VRegs[Idx][VTSplitIdx]);
- packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs,
- LLTy, PartLLT);
- });
+ SplitArg.clear();
+ splitToValueTypes(B, OrigArg, SplitArg, DL, CC);
+
+ processSplitArgs(B, OrigArg, SplitArg, SplitArgs, DL, CC, false,
+ // FIXME: We should probably be passing multiple registers
+ // to handleAssignments to do this
+ [&](ArrayRef<Register> Regs, Register DstReg, LLT LLTy,
+ LLT PartLLT, int VTSplitIdx) {
+ assert(DstReg == VRegs[Idx][VTSplitIdx]);
+ packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs,
+ LLTy, PartLLT);
+ });
++Idx;
}
@@ -811,9 +1015,10 @@ bool AMDGPUCallLowering::lowerFormalArguments(
// Start adding system SGPRs.
if (IsEntryFunc) {
- TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
+ TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
} else {
- CCInfo.AllocateReg(Info->getScratchRSrcReg());
+ if (!Subtarget.enableFlatScratch())
+ CCInfo.AllocateReg(Info->getScratchRSrcReg());
TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
}
@@ -822,3 +1027,368 @@ bool AMDGPUCallLowering::lowerFormalArguments(
return true;
}
+
+bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
+ CCState &CCInfo,
+ SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
+ CallLoweringInfo &Info) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+
+ const AMDGPUFunctionArgInfo *CalleeArgInfo
+ = &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const AMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo();
+
+
+ // TODO: Unify with private memory register handling. This is complicated by
+ // the fact that at least in kernels, the input argument is not necessarily
+ // in the same location as the input.
+ AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
+ AMDGPUFunctionArgInfo::DISPATCH_PTR,
+ AMDGPUFunctionArgInfo::QUEUE_PTR,
+ AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR,
+ AMDGPUFunctionArgInfo::DISPATCH_ID,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
+ };
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const AMDGPULegalizerInfo *LI
+ = static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
+
+ for (auto InputID : InputRegs) {
+ const ArgDescriptor *OutgoingArg;
+ const TargetRegisterClass *ArgRC;
+ LLT ArgTy;
+
+ std::tie(OutgoingArg, ArgRC, ArgTy) =
+ CalleeArgInfo->getPreloadedValue(InputID);
+ if (!OutgoingArg)
+ continue;
+
+ const ArgDescriptor *IncomingArg;
+ const TargetRegisterClass *IncomingArgRC;
+ std::tie(IncomingArg, IncomingArgRC, ArgTy) =
+ CallerArgInfo.getPreloadedValue(InputID);
+ assert(IncomingArgRC == ArgRC);
+
+ Register InputReg = MRI.createGenericVirtualRegister(ArgTy);
+
+ if (IncomingArg) {
+ LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
+ } else {
+ assert(InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
+ LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
+ }
+
+ if (OutgoingArg->isRegister()) {
+ ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+ if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
+ report_fatal_error("failed to allocate implicit input argument");
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
+ return false;
+ }
+ }
+
+ // Pack workitem IDs into a single register or pass it as is if already
+ // packed.
+ const ArgDescriptor *OutgoingArg;
+ const TargetRegisterClass *ArgRC;
+ LLT ArgTy;
+
+ std::tie(OutgoingArg, ArgRC, ArgTy) =
+ CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+ if (!OutgoingArg)
+ std::tie(OutgoingArg, ArgRC, ArgTy) =
+ CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+ if (!OutgoingArg)
+ std::tie(OutgoingArg, ArgRC, ArgTy) =
+ CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
+ if (!OutgoingArg)
+ return false;
+
+ auto WorkitemIDX =
+ CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+ auto WorkitemIDY =
+ CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+ auto WorkitemIDZ =
+ CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
+
+ const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
+ const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
+ const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
+ const LLT S32 = LLT::scalar(32);
+
+ // If incoming ids are not packed we need to pack them.
+ // FIXME: Should consider known workgroup size to eliminate known 0 cases.
+ Register InputReg;
+ if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) {
+ InputReg = MRI.createGenericVirtualRegister(S32);
+ LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
+ std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
+ }
+
+ if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
+ Register Y = MRI.createGenericVirtualRegister(S32);
+ LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
+ std::get<2>(WorkitemIDY));
+
+ Y = MIRBuilder.buildShl(S32, Y, MIRBuilder.buildConstant(S32, 10)).getReg(0);
+ InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
+ }
+
+ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
+ Register Z = MRI.createGenericVirtualRegister(S32);
+ LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
+ std::get<2>(WorkitemIDZ));
+
+ Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0);
+ InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
+ }
+
+ if (!InputReg) {
+ InputReg = MRI.createGenericVirtualRegister(S32);
+
+ // Workitem ids are already packed, any of present incoming arguments will
+ // carry all required fields.
+ ArgDescriptor IncomingArg = ArgDescriptor::createArg(
+ IncomingArgX ? *IncomingArgX :
+ IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
+ LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
+ &AMDGPU::VGPR_32RegClass, S32);
+ }
+
+ if (OutgoingArg->isRegister()) {
+ ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+ if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
+ report_fatal_error("failed to allocate implicit input argument");
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
+ return false;
+ }
+
+ return true;
+}
+
+/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
+/// CC.
+static std::pair<CCAssignFn *, CCAssignFn *>
+getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI) {
+ return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
+}
+
+static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
+ bool IsTailCall) {
+ return AMDGPU::SI_CALL;
+}
+
+// Add operands to call instruction to track the callee.
+static bool addCallTargetOperands(MachineInstrBuilder &CallInst,
+ MachineIRBuilder &MIRBuilder,
+ AMDGPUCallLowering::CallLoweringInfo &Info) {
+ if (Info.Callee.isReg()) {
+ CallInst.addReg(Info.Callee.getReg());
+ CallInst.addImm(0);
+ } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {
+ // The call lowering lightly assumed we can directly encode a call target in
+ // the instruction, which is not the case. Materialize the address here.
+ const GlobalValue *GV = Info.Callee.getGlobal();
+ auto Ptr = MIRBuilder.buildGlobalValue(
+ LLT::pointer(GV->getAddressSpace(), 64), GV);
+ CallInst.addReg(Ptr.getReg(0));
+ CallInst.add(Info.Callee);
+ } else
+ return false;
+
+ return true;
+}
+
+bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const {
+ if (Info.IsVarArg) {
+ LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
+ return false;
+ }
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ const Function &F = MF.getFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ CallingConv::ID CallConv = F.getCallingConv();
+
+ if (!AMDGPUTargetMachine::EnableFixedFunctionABI &&
+ CallConv != CallingConv::AMDGPU_Gfx) {
+ LLVM_DEBUG(dbgs() << "Variable function ABI not implemented\n");
+ return false;
+ }
+
+ if (AMDGPU::isShader(CallConv)) {
+ LLVM_DEBUG(dbgs() << "Unhandled call from graphics shader\n");
+ return false;
+ }
+
+ SmallVector<ArgInfo, 8> OutArgs;
+
+ SmallVector<ArgInfo, 8> SplitArg;
+ for (auto &OrigArg : Info.OrigArgs) {
+ splitToValueTypes(MIRBuilder, OrigArg, SplitArg, DL, Info.CallConv);
+
+ processSplitArgs(
+ MIRBuilder, OrigArg, SplitArg, OutArgs, DL, Info.CallConv, true,
+ // FIXME: We should probably be passing multiple registers to
+ // handleAssignments to do this
+ [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
+ int VTSplitIdx) {
+ unpackRegsToOrigType(MIRBuilder, Regs, SrcReg, OrigArg, LLTy, PartLLT);
+ });
+
+ SplitArg.clear();
+ }
+
+ // If we can lower as a tail call, do that instead.
+ bool CanTailCallOpt = false;
+
+ // We must emit a tail call if we have musttail.
+ if (Info.IsMustTailCall && !CanTailCallOpt) {
+ LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
+ return false;
+ }
+
+ // Find out which ABI gets to decide where things go.
+ CCAssignFn *AssignFnFixed;
+ CCAssignFn *AssignFnVarArg;
+ std::tie(AssignFnFixed, AssignFnVarArg) =
+ getAssignFnsForCC(Info.CallConv, TLI);
+
+ MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)
+ .addImm(0)
+ .addImm(0);
+
+ // Create a temporarily-floating call instruction so we can add the implicit
+ // uses of arg registers.
+ unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
+
+ auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
+ MIB.addDef(TRI->getReturnAddressReg(MF));
+
+ if (!addCallTargetOperands(MIB, MIRBuilder, Info))
+ return false;
+
+ // Tell the call which registers are clobbered.
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
+ MIB.addRegMask(Mask);
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
+
+ // We could pass MIB and directly add the implicit uses to the call
+ // now. However, as an aesthetic choice, place implicit argument operands
+ // after the ordinary user argument registers.
+ SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
+
+ if (AMDGPUTargetMachine::EnableFixedFunctionABI) {
+ // With a fixed ABI, allocate fixed registers before user arguments.
+ if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
+ return false;
+ }
+
+ // Do the actual argument marshalling.
+ SmallVector<Register, 8> PhysRegs;
+ AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
+ AssignFnVarArg, false);
+ if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, OutArgs, Handler))
+ return false;
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (!ST.enableFlatScratch()) {
+ // Insert copies for the SRD. In the HSA case, this should be an identity
+ // copy.
+ auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::vector(4, 32),
+ MFI->getScratchRSrcReg());
+ MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
+ MIB.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit);
+ }
+
+ for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
+ MIRBuilder.buildCopy((Register)ArgReg.first, ArgReg.second);
+ MIB.addReg(ArgReg.first, RegState::Implicit);
+ }
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // If Callee is a reg, since it is used by a target specific
+ // instruction, it must have a register class matching the
+ // constraint of that instruction.
+
+ // FIXME: We should define regbankselectable call instructions to handle
+ // divergent call targets.
+ if (MIB->getOperand(1).isReg()) {
+ MIB->getOperand(1).setReg(constrainOperandRegClass(
+ MF, *TRI, MRI, *ST.getInstrInfo(),
+ *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
+ 1));
+ }
+
+ auto OrigInsertPt = MIRBuilder.getInsertPt();
+
+ // Now we can add the actual call instruction to the correct position.
+ MIRBuilder.insertInstr(MIB);
+
+ // Insert this now to give us an anchor point for managing the insert point.
+ MachineInstrBuilder CallSeqEnd =
+ MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN);
+
+ SmallVector<ArgInfo, 8> InArgs;
+ if (!Info.CanLowerReturn) {
+ insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
+ Info.DemoteRegister, Info.DemoteStackIndex);
+ } else if (!Info.OrigRet.Ty->isVoidTy()) {
+ SmallVector<ArgInfo, 8> PreSplitRetInfos;
+
+ splitToValueTypes(
+ MIRBuilder, Info.OrigRet, PreSplitRetInfos/*InArgs*/, DL, Info.CallConv);
+
+ processSplitArgs(MIRBuilder, Info.OrigRet,
+ PreSplitRetInfos, InArgs/*SplitRetInfos*/, DL, Info.CallConv, false,
+ [&](ArrayRef<Register> Regs, Register DstReg,
+ LLT LLTy, LLT PartLLT, int VTSplitIdx) {
+ assert(DstReg == Info.OrigRet.Regs[VTSplitIdx]);
+ packSplitRegsToOrigType(MIRBuilder, Info.OrigRet.Regs[VTSplitIdx],
+ Regs, LLTy, PartLLT);
+ });
+ }
+
+ // Make sure the raw argument copies are inserted before the marshalling to
+ // the original types.
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), CallSeqEnd);
+
+ // Finally we can copy the returned value back into its virtual-register. In
+ // symmetry with the arguments, the physical register must be an
+ // implicit-define of the call instruction.
+ if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
+ CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,
+ Info.IsVarArg);
+ CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
+ if (!handleAssignments(MIRBuilder, InArgs, Handler))
+ return false;
+ }
+
+ uint64_t CalleePopBytes = NumBytes;
+ CallSeqEnd.addImm(0)
+ .addImm(CalleePopBytes);
+
+ // Restore the insert point to after the call sequence.
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), OrigInsertPt);
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 446619d1502e..1312388e4a38 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -14,7 +14,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
-#include "AMDGPU.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
namespace llvm {
@@ -22,9 +21,9 @@ namespace llvm {
class AMDGPUTargetLowering;
class MachineInstrBuilder;
-class AMDGPUCallLowering: public CallLowering {
- Register lowerParameterPtr(MachineIRBuilder &B, Type *ParamTy,
- uint64_t Offset) const;
+class AMDGPUCallLowering final : public CallLowering {
+ void lowerParameterPtr(Register DstReg, MachineIRBuilder &B, Type *ParamTy,
+ uint64_t Offset) const;
void lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset,
Align Alignment, Register DstReg) const;
@@ -32,13 +31,20 @@ class AMDGPUCallLowering: public CallLowering {
/// A function of this type is used to perform value split action.
using SplitArgTy = std::function<void(ArrayRef<Register>, Register, LLT, LLT, int)>;
- void splitToValueTypes(MachineIRBuilder &B,
- const ArgInfo &OrigArgInfo,
- unsigned OrigArgIdx,
+ void splitToValueTypes(MachineIRBuilder &B, const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL,
- CallingConv::ID CallConv,
- SplitArgTy SplitArg) const;
+ const DataLayout &DL, CallingConv::ID CallConv) const;
+
+ void processSplitArgs(MachineIRBuilder &B, const ArgInfo &OrigArgInfo,
+ const SmallVectorImpl<ArgInfo> &SplitArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, CallingConv::ID CallConv,
+ bool IsOutgoing,
+ SplitArgTy PerformArgSplit) const;
+
+ bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ bool IsVarArg) const override;
bool lowerReturnVal(MachineIRBuilder &B, const Value *Val,
ArrayRef<Register> VRegs, MachineInstrBuilder &Ret) const;
@@ -47,13 +53,24 @@ public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &B, const Value *Val,
- ArrayRef<Register> VRegs) const override;
+ ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const;
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
+
+ bool passSpecialInputs(MachineIRBuilder &MIRBuilder,
+ CCState &CCInfo,
+ SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
+ CallLoweringInfo &Info) const;
+
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
+
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 7c83b6dcb44b..250c42776297 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -16,7 +16,75 @@ class CCIfExtend<CCAction A>
: CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
// Calling convention for SI
-def CC_SI : CallingConv<[
+def CC_SI_Gfx : CallingConv<[
+ // 0-3 are reserved for the stack buffer descriptor
+ // 30-31 are reserved for the return address
+ // 32 is reserved for the stack pointer
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
+ SGPR4, SGPR5, SGPR6, SGPR7,
+ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
+ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
+ SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29,
+ ]>>>,
+
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+ ]>>>,
+
+ CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
+ CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
+ CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>,
+ CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
+ CCIfType<[v5i32, v5f32], CCAssignToStack<20, 4>>,
+ CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
+ CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
+]>;
+
+def RetCC_SI_Gfx : CallingConv<[
+ // 0-3 are reserved for the stack buffer descriptor
+ // 32 is reserved for the stack pointer
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
+ SGPR4, SGPR5, SGPR6, SGPR7,
+ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
+ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
+ SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
+ SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
+ SGPR40, SGPR41, SGPR42, SGPR43
+ ]>>>,
+
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
+ VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
+ VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
+ VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
+ VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
+ VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
+ VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
+ VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
+ VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
+ VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
+ VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
+ VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
+ VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
+ VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
+ ]>>>,
+
+ CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
+ CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
+ CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>,
+ CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
+ CCIfType<[v5i32, v5f32], CCAssignToStack<20, 4>>,
+ CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
+ CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
+]>;
+
+def CC_SI_SHADER : CallingConv<[
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
@@ -125,11 +193,13 @@ def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
(add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_32_105)
>;
+def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
+
// Calling convention for leaf functions
def CC_AMDGPU_Func : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
CCIfType<[i1], CCPromoteToType<i32>>,
- CCIfType<[i1, i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
+ CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
@@ -159,7 +229,7 @@ def CC_AMDGPU : CallingConv<[
CCIf<"static_cast<const GCNSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
"AMDGPUSubtarget::SOUTHERN_ISLANDS",
- CCDelegateTo<CC_SI>>,
+ CCDelegateTo<CC_SI_SHADER>>,
CCIf<"static_cast<const GCNSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
"AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index a79549301740..2556996df97f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -13,40 +13,19 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "llvm/ADT/FloatingPointMode.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/IntegerDivision.h"
-#include <cassert>
-#include <iterator>
#define DEBUG_TYPE "amdgpu-codegenprepare"
@@ -60,6 +39,12 @@ static cl::opt<bool> WidenLoads(
cl::ReallyHidden,
cl::init(false));
+static cl::opt<bool> Widen16BitOps(
+ "amdgpu-codegenprepare-widen-16-bit-ops",
+ cl::desc("Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"),
+ cl::ReallyHidden,
+ cl::init(true));
+
static cl::opt<bool> UseMul24Intrin(
"amdgpu-codegenprepare-mul24",
cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
@@ -269,6 +254,9 @@ bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
}
bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
+ if (!Widen16BitOps)
+ return false;
+
const IntegerType *IntTy = dyn_cast<IntegerType>(T);
if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
return true;
@@ -751,6 +739,11 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Type *Ty = FDiv.getType()->getScalarType();
+ // The f64 rcp/rsq approximations are pretty inaccurate. We can do an
+ // expansion around them in codegen.
+ if (Ty->isDoubleTy())
+ return false;
+
// No intrinsic for fdiv16 if target does not support f16.
if (Ty->isHalfTy() && !ST->has16BitInsts())
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index faaf9168d0dd..a8399176bb4a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -11,22 +11,22 @@ include "llvm/Target/GlobalISel/Combine.td"
// TODO: This really belongs after legalization after scalarization.
// TODO: GICombineRules should accept subtarget predicates
-def fmin_fmax_legacy_matchdata : GIDefMatchData<"FMinFMaxLegacyInfo">;
+def fmin_fmax_legacy_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo">;
def fcmp_select_to_fmin_fmax_legacy : GICombineRule<
(defs root:$select, fmin_fmax_legacy_matchdata:$matchinfo),
(match (wip_match_opcode G_SELECT):$select,
- [{ return matchFMinFMaxLegacy(*${select}, MRI, *MF, ${matchinfo}); }]),
- (apply [{ applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>;
+ [{ return PostLegalizerHelper.matchFMinFMaxLegacy(*${select}, ${matchinfo}); }]),
+ (apply [{ PostLegalizerHelper.applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>;
def uchar_to_float : GICombineRule<
(defs root:$itofp),
(match (wip_match_opcode G_UITOFP, G_SITOFP):$itofp,
- [{ return matchUCharToFloat(*${itofp}, MRI, *MF, Helper); }]),
- (apply [{ applyUCharToFloat(*${itofp}); }])>;
+ [{ return PostLegalizerHelper.matchUCharToFloat(*${itofp}); }]),
+ (apply [{ PostLegalizerHelper.applyUCharToFloat(*${itofp}); }])>;
-def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">;
+def cvt_f32_ubyteN_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo">;
def cvt_f32_ubyteN : GICombineRule<
(defs root:$cvt_f32_ubyteN, cvt_f32_ubyteN_matchdata:$matchinfo),
@@ -34,33 +34,25 @@ def cvt_f32_ubyteN : GICombineRule<
G_AMDGPU_CVT_F32_UBYTE1,
G_AMDGPU_CVT_F32_UBYTE2,
G_AMDGPU_CVT_F32_UBYTE3):$cvt_f32_ubyteN,
- [{ return matchCvtF32UByteN(*${cvt_f32_ubyteN}, MRI, *MF, ${matchinfo}); }]),
- (apply [{ applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>;
+ [{ return PostLegalizerHelper.matchCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }]),
+ (apply [{ PostLegalizerHelper.applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>;
// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
- "AMDGPUGenPreLegalizerCombinerHelper", [all_combines,
- elide_br_by_inverting_cond]> {
+ "AMDGPUGenPreLegalizerCombinerHelper", [all_combines]> {
let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
}
-
-// FIXME: combines_for_extload can introduce illegal extloads which
-// aren't re-legalized.
-// FIXME: Is there a way to remove a single item from all_combines?
-def all_combines_minus_extload : GICombineGroup<[trivial_combines,
- ptr_add_immed_chain, combine_indexed_load_store, undef_combines,
- identity_combines]
->;
-
def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
"AMDGPUGenPostLegalizerCombinerHelper",
- [all_combines_minus_extload, gfx6gfx7_combines,
+ [all_combines, gfx6gfx7_combines,
uchar_to_float, cvt_f32_ubyteN]> {
let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
+ let StateClass = "AMDGPUPostLegalizerCombinerHelperState";
+ let AdditionalArguments = [];
}
def AMDGPURegBankCombinerHelper : GICombinerHelper<
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
index 25c82ed61fc2..bed0707f3aa7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUExportClustering.h"
-#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
using namespace llvm;
@@ -27,15 +27,13 @@ public:
};
static bool isExport(const SUnit &SU) {
- const MachineInstr *MI = SU.getInstr();
- return MI->getOpcode() == AMDGPU::EXP ||
- MI->getOpcode() == AMDGPU::EXP_DONE;
+ return SIInstrInfo::isEXP(*SU.getInstr());
}
static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
const MachineInstr *MI = SU->getInstr();
- int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
- return Imm >= 12 && Imm <= 15;
+ unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
+ return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
}
static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h
index 58491d0671e4..041d6deef243 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include <memory>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 3f12addbcc79..bba03736d01a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,6 +51,11 @@ def gi_vop3opselmods :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
GIComplexPatternEquiv<VOP3OpSelMods>;
+// FIXME: Why do we have both VOP3OpSel and VOP3OpSelMods?
+def gi_vop3opsel :
+ GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
+ GIComplexPatternEquiv<VOP3OpSel>;
+
def gi_smrd_imm :
GIComplexOperandMatcher<s64, "selectSmrdImm">,
GIComplexPatternEquiv<SMRDImm>;
@@ -63,19 +68,15 @@ def gi_smrd_sgpr :
GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
GIComplexPatternEquiv<SMRDSgpr>;
-// FIXME: Why are the atomic versions separated?
def gi_flat_offset :
GIComplexOperandMatcher<s64, "selectFlatOffset">,
GIComplexPatternEquiv<FLATOffset>;
def gi_flat_offset_signed :
GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
GIComplexPatternEquiv<FLATOffsetSigned>;
-def gi_flat_atomic :
- GIComplexOperandMatcher<s64, "selectFlatOffset">,
- GIComplexPatternEquiv<FLATAtomic>;
-def gi_flat_signed_atomic :
- GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
- GIComplexPatternEquiv<FLATSignedAtomic>;
+def gi_global_saddr :
+ GIComplexOperandMatcher<s64, "selectGlobalSAddr">,
+ GIComplexPatternEquiv<GlobalSAddr>;
def gi_mubuf_scratch_offset :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
@@ -84,6 +85,14 @@ def gi_mubuf_scratch_offen :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
GIComplexPatternEquiv<MUBUFScratchOffen>;
+def gi_flat_scratch_offset :
+ GIComplexOperandMatcher<s32, "selectFlatOffsetSigned">,
+ GIComplexPatternEquiv<ScratchOffset>;
+
+def gi_flat_scratch_saddr :
+ GIComplexOperandMatcher<s32, "selectScratchSAddr">,
+ GIComplexPatternEquiv<ScratchSAddr>;
+
def gi_ds_1addr_1offset :
GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
GIComplexPatternEquiv<DS1Addr1Offset>;
@@ -92,6 +101,10 @@ def gi_ds_64bit_4byte_aligned :
GIComplexOperandMatcher<s64, "selectDS64Bit4ByteAligned">,
GIComplexPatternEquiv<DS64Bit4ByteAligned>;
+def gi_ds_128bit_8byte_aligned :
+ GIComplexOperandMatcher<s64, "selectDS128Bit8ByteAligned">,
+ GIComplexPatternEquiv<DS128Bit8ByteAligned>;
+
def gi_mubuf_addr64 :
GIComplexOperandMatcher<s64, "selectMUBUFAddr64">,
GIComplexPatternEquiv<MUBUFAddr64>;
@@ -133,6 +146,9 @@ def : GINodeEquiv<G_LOAD, AMDGPUatomic_ld_glue> {
bit CheckMMOIsAtomic = 1;
}
+def : GINodeEquiv<G_STORE, AMDGPUatomic_st_glue> {
+ bit CheckMMOIsAtomic = 1;
+}
def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap_glue>;
@@ -181,6 +197,11 @@ def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, SIatomic_dec>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, atomic_inc_glue>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, atomic_dec_glue>;
+def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, SIatomic_fmin>;
+def : GINodeEquiv<G_AMDGPU_ATOMIC_FMAX, SIatomic_fmax>;
+def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, atomic_load_fmin_glue>;
+def : GINodeEquiv<G_AMDGPU_ATOMIC_FMAX, atomic_load_fmax_glue>;
+
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SWAP, SIbuffer_atomic_swap>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_ADD, SIbuffer_atomic_add>;
@@ -194,6 +215,7 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_OR, SIbuffer_atomic_or>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FADD, SIbuffer_atomic_fadd>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
@@ -312,3 +334,6 @@ def gi_extract_dlc : GICustomOperandRenderer<"renderExtractDLC">,
def gi_extract_swz : GICustomOperandRenderer<"renderExtractSWZ">,
GISDNodeXFormEquiv<extract_swz>;
+
+def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameIndex">,
+ GISDNodeXFormEquiv<frameindex_to_targetframeindex>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 600b351f9ea1..bfeee37feb4b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -37,8 +37,9 @@ enum PartialMappingIdx {
PM_AGPR32 = 31,
PM_AGPR64 = 32,
PM_AGPR128 = 33,
- PM_AGPR512 = 34,
- PM_AGPR1024 = 35
+ PM_AGPR256 = 34,
+ PM_AGPR512 = 35,
+ PM_AGPR1024 = 36
};
const RegisterBankInfo::PartialMapping PartMappings[] {
@@ -69,6 +70,7 @@ const RegisterBankInfo::PartialMapping PartMappings[] {
{0, 32, AGPRRegBank}, // AGPR begin
{0, 64, AGPRRegBank},
{0, 128, AGPRRegBank},
+ {0, 256, AGPRRegBank},
{0, 512, AGPRRegBank},
{0, 1024, AGPRRegBank}
};
@@ -115,9 +117,9 @@ const RegisterBankInfo::ValueMapping ValMappings[] {
{&PartMappings[20], 1}, // 32
{&PartMappings[21], 1}, // 64
{&PartMappings[22], 1}, // 128
- {nullptr, 0},
- {&PartMappings[23], 1}, // 512
- {&PartMappings[24], 1} // 1024
+ {&PartMappings[23], 1}, // 256
+ {&PartMappings[24], 1}, // 512
+ {&PartMappings[25], 1} // 1024
};
const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 989937a597fb..b3bafc5b2720 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -13,11 +13,11 @@
using namespace llvm;
using namespace MIPatternMatch;
-std::tuple<Register, unsigned, MachineInstr *>
+std::pair<Register, unsigned>
AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (!Def)
- return std::make_tuple(Reg, 0, nullptr);
+ return std::make_pair(Reg, 0);
if (Def->getOpcode() == TargetOpcode::G_CONSTANT) {
unsigned Offset;
@@ -27,21 +27,21 @@ AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
else
Offset = Op.getCImm()->getZExtValue();
- return std::make_tuple(Register(), Offset, Def);
+ return std::make_pair(Register(), Offset);
}
int64_t Offset;
if (Def->getOpcode() == TargetOpcode::G_ADD) {
// TODO: Handle G_OR used for add case
if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(Offset)))
- return std::make_tuple(Def->getOperand(1).getReg(), Offset, Def);
+ return std::make_pair(Def->getOperand(1).getReg(), Offset);
// FIXME: matcher should ignore copies
if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset))))
- return std::make_tuple(Def->getOperand(1).getReg(), Offset, Def);
+ return std::make_pair(Def->getOperand(1).getReg(), Offset);
}
- return std::make_tuple(Reg, 0, Def);
+ return std::make_pair(Reg, 0);
}
bool AMDGPU::isLegalVOP3PShuffleMask(ArrayRef<int> Mask) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
index 766750758efc..404e0fcd1166 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
@@ -9,53 +9,21 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H
-#include "AMDGPUInstrInfo.h"
#include "llvm/CodeGen/Register.h"
-#include <tuple>
+#include <utility>
namespace llvm {
-class MachineInstr;
class MachineRegisterInfo;
namespace AMDGPU {
-/// Returns Base register, constant offset, and offset def point.
-std::tuple<Register, unsigned, MachineInstr *>
+/// Returns base register and constant offset.
+std::pair<Register, unsigned>
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg);
bool isLegalVOP3PShuffleMask(ArrayRef<int> Mask);
-/// Return number of address arguments, and the number of gradients for an image
-/// intrinsic.
-inline std::pair<int, int>
-getImageNumVAddr(const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
- const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode) {
- const AMDGPU::MIMGDimInfo *DimInfo
- = AMDGPU::getMIMGDimInfo(ImageDimIntr->Dim);
-
- int NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
- int NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
- int NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
- int NumVAddr = BaseOpcode->NumExtraArgs + NumGradients + NumCoords + NumLCM;
- return {NumVAddr, NumGradients};
-}
-
-/// Return index of dmask in an gMIR image intrinsic
-inline int getDMaskIdx(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode,
- int NumDefs) {
- assert(!BaseOpcode->Atomic);
- return NumDefs + 1 + (BaseOpcode->Store ? 1 : 0);
-}
-
-/// Return first address operand index in a gMIR image intrinsic.
-inline int getImageVAddrIdxBegin(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode,
- int NumDefs) {
- if (BaseOpcode->Atomic)
- return NumDefs + 1 + (BaseOpcode->AtomicX2 ? 2 : 1);
- return getDMaskIdx(BaseOpcode, NumDefs) + 1;
-}
-
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index c6f6a3b84e36..39f9092ce77c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -14,15 +14,27 @@
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIMachineFunctionInfo.h"
#include "SIProgramInfo.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
+ const DataLayout &DL) {
+ Type *Ty = Arg.getType();
+ MaybeAlign ArgAlign;
+ if (Arg.hasByRefAttr()) {
+ Ty = Arg.getParamByRefType();
+ ArgAlign = Arg.getParamAlign();
+ }
+
+ if (!ArgAlign)
+ ArgAlign = DL.getABITypeAlign(Ty);
+
+ return std::make_pair(Ty, *ArgAlign);
+}
namespace llvm {
@@ -47,7 +59,7 @@ void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata Parser Test: ";
HSAMD::Metadata FromHSAMetadataString;
- if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) {
+ if (fromString(HSAMetadataString, FromHSAMetadataString)) {
errs() << "FAIL\n";
return;
}
@@ -311,23 +323,28 @@ void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
if (Node && ArgNo < Node->getNumOperands())
TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
- Type *Ty = Arg.getType();
const DataLayout &DL = Func->getParent()->getDataLayout();
MaybeAlign PointeeAlign;
- if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+ if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ // FIXME: Should report this for all address spaces
PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
PtrTy->getElementType());
}
}
- emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
- PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
+ Type *ArgTy;
+ Align ArgAlign;
+ std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
+
+ emitKernelArg(DL, ArgTy, ArgAlign,
+ getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
+ TypeName, BaseTypeName, AccQual, TypeQual);
}
void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
- ValueKind ValueKind,
+ Align Alignment, ValueKind ValueKind,
MaybeAlign PointeeAlign, StringRef Name,
StringRef TypeName,
StringRef BaseTypeName,
@@ -338,7 +355,7 @@ void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
Arg.mName = std::string(Name);
Arg.mTypeName = std::string(TypeName);
Arg.mSize = DL.getTypeAllocSize(Ty);
- Arg.mAlign = DL.getABITypeAlign(Ty).value();
+ Arg.mAlign = Alignment.value();
Arg.mValueKind = ValueKind;
Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
@@ -374,11 +391,11 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
auto Int64Ty = Type::getInt64Ty(Func.getContext());
if (HiddenArgNumBytes >= 8)
- emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
+ emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
if (HiddenArgNumBytes >= 16)
- emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
+ emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
if (HiddenArgNumBytes >= 24)
- emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
+ emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
AMDGPUAS::GLOBAL_ADDRESS);
@@ -387,31 +404,31 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
// "none" argument.
if (HiddenArgNumBytes >= 32) {
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
// The printf runtime binding pass should have ensured that hostcall and
// printf are not used in the same module.
assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
} else
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
}
// Emit "default queue" and "completion action" arguments if enqueue kernel is
// used, otherwise emit dummy "none" arguments.
if (HiddenArgNumBytes >= 48) {
if (Func.hasFnAttribute("calls-enqueue-kernel")) {
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
} else {
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
}
}
// Emit the pointer argument for multi-grid object.
if (HiddenArgNumBytes >= 56)
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
}
bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
@@ -699,10 +716,12 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
if (Node && ArgNo < Node->getNumOperands())
TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
- Type *Ty = Arg.getType();
const DataLayout &DL = Func->getParent()->getDataLayout();
MaybeAlign PointeeAlign;
+ Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
+
+ // FIXME: Need to distinguish in memory alignment from pointer alignment.
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
@@ -710,19 +729,21 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
}
}
- emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
- getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
- Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
- TypeQual);
+ // There's no distinction between byval aggregates and raw aggregates.
+ Type *ArgTy;
+ Align ArgAlign;
+ std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
+
+ emitKernelArg(DL, ArgTy, ArgAlign,
+ getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
+ PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
}
-void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
- StringRef ValueKind, unsigned &Offset,
- msgpack::ArrayDocNode Args,
- MaybeAlign PointeeAlign, StringRef Name,
- StringRef TypeName,
- StringRef BaseTypeName,
- StringRef AccQual, StringRef TypeQual) {
+void MetadataStreamerV3::emitKernelArg(
+ const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
+ unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
+ StringRef Name, StringRef TypeName, StringRef BaseTypeName,
+ StringRef AccQual, StringRef TypeQual) {
auto Arg = Args.getDocument()->getMapNode();
if (!Name.empty())
@@ -730,7 +751,6 @@ void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
if (!TypeName.empty())
Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
auto Size = DL.getTypeAllocSize(Ty);
- Align Alignment = DL.getABITypeAlign(Ty);
Arg[".size"] = Arg.getDocument()->getNode(Size);
Offset = alignTo(Offset, Alignment);
Arg[".offset"] = Arg.getDocument()->getNode(Offset);
@@ -777,11 +797,14 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
auto Int64Ty = Type::getInt64Ty(Func.getContext());
if (HiddenArgNumBytes >= 8)
- emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
+ emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
+ Args);
if (HiddenArgNumBytes >= 16)
- emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
+ emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
+ Args);
if (HiddenArgNumBytes >= 24)
- emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
+ emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
+ Args);
auto Int8PtrTy =
Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
@@ -790,31 +813,36 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
// "none" argument.
if (HiddenArgNumBytes >= 32) {
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
- emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
+ Args);
else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
// The printf runtime binding pass should have ensured that hostcall and
// printf are not used in the same module.
assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
- emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
+ Args);
} else
- emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
}
// Emit "default queue" and "completion action" arguments if enqueue kernel is
// used, otherwise emit dummy "none" arguments.
if (HiddenArgNumBytes >= 48) {
if (Func.hasFnAttribute("calls-enqueue-kernel")) {
- emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
- emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
+ Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
+ Args);
} else {
- emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
- emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
}
}
// Emit the pointer argument for multi-grid object.
if (HiddenArgNumBytes >= 56)
- emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
+ Args);
}
msgpack::MapDocNode
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 9534fffd228d..1c6db14b85cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -15,9 +15,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
-#include "AMDGPU.h"
-#include "AMDKernelCodeT.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/Alignment.h"
@@ -87,11 +84,12 @@ private:
void emitKernelArg(const Argument &Arg, unsigned &Offset,
msgpack::ArrayDocNode Args);
- void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
- unsigned &Offset, msgpack::ArrayDocNode Args,
- MaybeAlign PointeeAlign = None, StringRef Name = "",
- StringRef TypeName = "", StringRef BaseTypeName = "",
- StringRef AccQual = "", StringRef TypeQual = "");
+ void emitKernelArg(const DataLayout &DL, Type *Ty, Align Alignment,
+ StringRef ValueKind, unsigned &Offset,
+ msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign = None,
+ StringRef Name = "", StringRef TypeName = "",
+ StringRef BaseTypeName = "", StringRef AccQual = "",
+ StringRef TypeQual = "");
void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
msgpack::ArrayDocNode Args);
@@ -156,10 +154,11 @@ private:
void emitKernelArg(const Argument &Arg);
- void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind,
- MaybeAlign PointeeAlign = None, StringRef Name = "",
- StringRef TypeName = "", StringRef BaseTypeName = "",
- StringRef AccQual = "", StringRef TypeQual = "");
+ void emitKernelArg(const DataLayout &DL, Type *Ty, Align Alignment,
+ ValueKind ValueKind, MaybeAlign PointeeAlign = None,
+ StringRef Name = "", StringRef TypeName = "",
+ StringRef BaseTypeName = "", StringRef AccQual = "",
+ StringRef TypeQual = "");
void emitHiddenKernelArgs(const Function &Func);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index aaf448346b53..340f4ac6f57a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -12,48 +12,21 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUArgumentUsageInfo.h"
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUPerfHintAnalysis.h"
-#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "SIISelLowering.h"
-#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
+
#ifdef EXPENSIVE_CHECKS
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#endif
-#include "llvm/IR/Instruction.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-#include <cstdint>
-#include <new>
-#include <vector>
#define DEBUG_TYPE "isel"
@@ -191,6 +164,9 @@ private:
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
+ bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
+ SDValue &RHS) const;
+
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
@@ -200,11 +176,16 @@ private:
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(SDValue Base, unsigned Offset,
- unsigned OffsetBits) const;
+ bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
+ bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
+ unsigned Size) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
+ bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1, unsigned Size) const;
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
@@ -233,11 +214,11 @@ private:
template <bool IsSigned>
bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, SDValue &SLC) const;
- bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, SDValue &SLC) const;
- bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, SDValue &SLC) const;
+ SDValue &Offset) const;
+ bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &VOffset, SDValue &Offset) const;
+ bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &Offset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
@@ -252,11 +233,15 @@ private:
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
+ bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
+ bool AllowAbs = true) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
@@ -519,8 +504,8 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
return true;
// TODO: Move into isKnownNeverNaN
- if (N->getFlags().isDefined())
- return N->getFlags().hasNoNaNs();
+ if (N->getFlags().hasNoNaNs())
+ return true;
return CurDAG->isKnownNeverNaN(N);
}
@@ -557,8 +542,8 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
unsigned OpNo) const {
if (!N->isMachineOpcode()) {
if (N->getOpcode() == ISD::CopyToReg) {
- unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg)) {
+ Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
return MRI.getRegClass(Reg);
}
@@ -716,8 +701,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
(Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
Opc == ISD::ATOMIC_LOAD_FADD ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMAX ||
- Opc == AMDGPUISD::ATOMIC_LOAD_CSUB)) {
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
N = glueCopyToM0LDSInit(N);
SelectCode(N);
return;
@@ -920,6 +904,53 @@ bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
Term->getMetadata("structurizecfg.uniform");
}
+static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
+ SDValue &N0, SDValue &N1) {
+ if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
+ Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
+ // (i64 (bitcast (v2i32 (build_vector
+ // (or (extract_vector_elt V, 0), OFFSET),
+ // (extract_vector_elt V, 1)))))
+ SDValue Lo = Addr.getOperand(0).getOperand(0);
+ if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
+ SDValue BaseLo = Lo.getOperand(0);
+ SDValue BaseHi = Addr.getOperand(0).getOperand(1);
+ // Check that split base (Lo and Hi) are extracted from the same one.
+ if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
+ // Lo is statically extracted from index 0.
+ isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
+ BaseLo.getConstantOperandVal(1) == 0 &&
+ // Hi is statically extracted from index 0.
+ isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
+ BaseHi.getConstantOperandVal(1) == 1) {
+ N0 = BaseLo.getOperand(0).getOperand(0);
+ N1 = Lo.getOperand(1);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
+ SDValue &RHS) const {
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ LHS = Addr.getOperand(0);
+ RHS = Addr.getOperand(1);
+ return true;
+ }
+
+ if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
+ assert(LHS && RHS && isa<ConstantSDNode>(RHS));
+ return true;
+ }
+
+ return false;
+}
+
StringRef AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
@@ -994,7 +1025,7 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
static const unsigned OpcMap[2][2][2] = {
{{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
- {AMDGPU::V_SUB_I32_e32, AMDGPU::V_ADD_I32_e32}},
+ {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
{{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
{AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
@@ -1073,7 +1104,7 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
}
if (IsVALU) {
- unsigned Opc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
CurDAG->SelectNodeTo(
N, Opc, N->getVTList(),
@@ -1099,7 +1130,7 @@ void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
Ops[8] = N->getOperand(0);
Ops[9] = N->getOperand(4);
- CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
+ CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops);
}
void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
@@ -1124,9 +1155,14 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
assert(VT == MVT::f32 || VT == MVT::f64);
unsigned Opc
- = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
+ = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
+ // omod
+ SDValue Ops[8];
+ SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+ SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
+ SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
@@ -1135,7 +1171,7 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
SDLoc SL(N);
bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
- unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
+ unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
@@ -1143,13 +1179,11 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
-bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
- unsigned OffsetBits) const {
- if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
- (OffsetBits == 8 && !isUInt<8>(Offset)))
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
+ if (!isUInt<16>(Offset))
return false;
- if (Subtarget->hasUsableDSOffset() ||
+ if (!Base || Subtarget->hasUsableDSOffset() ||
Subtarget->unsafeDSOffsetFoldingEnabled())
return true;
@@ -1165,7 +1199,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
+ if (isDSOffsetLegal(N0, C1->getSExtValue())) {
// (add n0, c0)
Base = N0;
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
@@ -1175,7 +1209,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
// sub C, x -> add (sub 0, x), C
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
int64_t ByteOffset = C->getSExtValue();
- if (isUInt<16>(ByteOffset)) {
+ if (isDSOffsetLegal(SDValue(), ByteOffset)) {
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
// XXX - This is kind of hacky. Create a dummy sub node so we can check
@@ -1184,13 +1218,13 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
Zero, Addr.getOperand(1));
- if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
+ if (isDSOffsetLegal(Sub, ByteOffset)) {
SmallVector<SDValue, 3> Opnds;
Opnds.push_back(Zero);
Opnds.push_back(Addr.getOperand(1));
// FIXME: Select to VOP3 version for with-carry.
- unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+ unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
if (Subtarget->hasAddNoCarry()) {
SubOp = AMDGPU::V_SUB_U32_e64;
Opnds.push_back(
@@ -1214,7 +1248,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
SDLoc DL(Addr);
- if (isUInt<16>(CAddr->getZExtValue())) {
+ if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
@@ -1230,75 +1264,104 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
return true;
}
+bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
+ unsigned Offset1,
+ unsigned Size) const {
+ if (Offset0 % Size != 0 || Offset1 % Size != 0)
+ return false;
+ if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
+ return false;
+
+ if (!Base || Subtarget->hasUsableDSOffset() ||
+ Subtarget->unsafeDSOffsetFoldingEnabled())
+ return true;
+
+ // On Southern Islands instruction with a negative base value and an offset
+ // don't seem to work.
+ return CurDAG->SignBitIsZero(Base);
+}
+
// TODO: If offset is too big, put low 16-bit into offset.
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
SDValue &Offset0,
SDValue &Offset1) const {
+ return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
+ SDValue &Offset0,
+ SDValue &Offset1) const {
+ return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
+}
+
+bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
+ SDValue &Offset0, SDValue &Offset1,
+ unsigned Size) const {
SDLoc DL(Addr);
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- unsigned DWordOffset0 = C1->getZExtValue() / 4;
- unsigned DWordOffset1 = DWordOffset0 + 1;
+ unsigned OffsetValue0 = C1->getZExtValue();
+ unsigned OffsetValue1 = OffsetValue0 + Size;
+
// (add n0, c0)
- if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
+ if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
Base = N0;
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
return true;
}
} else if (Addr.getOpcode() == ISD::SUB) {
// sub C, x -> add (sub 0, x), C
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
- unsigned DWordOffset0 = C->getZExtValue() / 4;
- unsigned DWordOffset1 = DWordOffset0 + 1;
+ if (const ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
+ unsigned OffsetValue0 = C->getZExtValue();
+ unsigned OffsetValue1 = OffsetValue0 + Size;
- if (isUInt<8>(DWordOffset0)) {
+ if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
SDLoc DL(Addr);
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
// XXX - This is kind of hacky. Create a dummy sub node so we can check
// the known bits in isDSOffsetLegal. We need to emit the selected node
// here, so this is thrown away.
- SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
- Zero, Addr.getOperand(1));
+ SDValue Sub =
+ CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
- if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
+ if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
SmallVector<SDValue, 3> Opnds;
Opnds.push_back(Zero);
Opnds.push_back(Addr.getOperand(1));
- unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+ unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
if (Subtarget->hasAddNoCarry()) {
SubOp = AMDGPU::V_SUB_U32_e64;
Opnds.push_back(
CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
}
- MachineSDNode *MachineSub
- = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
+ MachineSDNode *MachineSub = CurDAG->getMachineNode(
+ SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
Base = SDValue(MachineSub, 0);
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
return true;
}
}
}
} else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
- unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
- unsigned DWordOffset1 = DWordOffset0 + 1;
- assert(4 * DWordOffset0 == CAddr->getZExtValue());
+ unsigned OffsetValue0 = CAddr->getZExtValue();
+ unsigned OffsetValue1 = OffsetValue0 + Size;
- if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
+ if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
- MachineSDNode *MovZero
- = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- DL, MVT::i32, Zero);
+ MachineSDNode *MovZero =
+ CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
Base = SDValue(MovZero, 0);
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
return true;
}
}
@@ -1454,22 +1517,16 @@ static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
SDLoc DL(N);
- const MachineFunction &MF = CurDAG->getMachineFunction();
- const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-
- if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
- SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
- FI->getValueType(0));
- // If we can resolve this to a frame index access, this will be relative to
- // either the stack or frame pointer SGPR.
- return std::make_pair(
- TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
- }
+ auto *FI = dyn_cast<FrameIndexSDNode>(N);
+ SDValue TFI =
+ FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
- // If we don't know this private access is a local stack object, it needs to
- // be relative to the entry point's scratch wave offset.
- return std::make_pair(N, CurDAG->getTargetConstant(0, DL, MVT::i32));
+ // We rebase the base address into an absolute stack address and hence
+ // use constant 0 for soffset. This value must be retained until
+ // frame elimination and eliminateFrameIndex will choose the appropriate
+ // frame register if need be.
+ return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
@@ -1628,155 +1685,245 @@ static MemSDNode* findMemSDNode(SDNode *N) {
llvm_unreachable("cannot find MemSDNode in the pattern!");
}
-static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
- SDValue &N0, SDValue &N1) {
- if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
- Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
- // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
- // (i64 (bitcast (v2i32 (build_vector
- // (or (extract_vector_elt V, 0), OFFSET),
- // (extract_vector_elt V, 1)))))
- SDValue Lo = Addr.getOperand(0).getOperand(0);
- if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
- SDValue BaseLo = Lo.getOperand(0);
- SDValue BaseHi = Addr.getOperand(0).getOperand(1);
- // Check that split base (Lo and Hi) are extracted from the same one.
- if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
- // Lo is statically extracted from index 0.
- isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
- BaseLo.getConstantOperandVal(1) == 0 &&
- // Hi is statically extracted from index 0.
- isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
- BaseHi.getConstantOperandVal(1) == 1) {
- N0 = BaseLo.getOperand(0).getOperand(0);
- N1 = Lo.getOperand(1);
- return true;
- }
- }
- }
- return false;
-}
-
template <bool IsSigned>
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
SDValue Addr,
SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
+ SDValue &Offset) const {
int64_t OffsetVal = 0;
+ unsigned AS = findMemSDNode(N)->getAddressSpace();
+
if (Subtarget->hasFlatInstOffsets() &&
(!Subtarget->hasFlatSegmentOffsetBug() ||
- findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)) {
+ AS != AMDGPUAS::FLAT_ADDRESS)) {
SDValue N0, N1;
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- N0 = Addr.getOperand(0);
- N1 = Addr.getOperand(1);
- } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
- assert(N0 && N1 && isa<ConstantSDNode>(N1));
- }
- if (N0 && N1) {
+ if (isBaseWithConstantOffset64(Addr, N0, N1)) {
uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
const SIInstrInfo *TII = Subtarget->getInstrInfo();
- unsigned AS = findMemSDNode(N)->getAddressSpace();
if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
Addr = N0;
OffsetVal = COffsetVal;
} else {
// If the offset doesn't fit, put the low bits into the offset field and
// add the rest.
+ //
+ // For a FLAT instruction the hardware decides whether to access
+ // global/scratch/shared memory based on the high bits of vaddr,
+ // ignoring the offset field, so we have to ensure that when we add
+ // remainder to vaddr it still points into the same underlying object.
+ // The easiest way to do that is to make sure that we split the offset
+ // into two pieces that are both >= 0 or both <= 0.
SDLoc DL(N);
- uint64_t ImmField;
- const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
- if (IsSigned) {
- ImmField = SignExtend64(COffsetVal, NumBits);
-
- // Don't use a negative offset field if the base offset is positive.
- // Since the scheduler currently relies on the offset field, doing so
- // could result in strange scheduling decisions.
-
- // TODO: Should we not do this in the opposite direction as well?
- if (static_cast<int64_t>(COffsetVal) > 0) {
- if (static_cast<int64_t>(ImmField) < 0) {
- const uint64_t OffsetMask =
- maskTrailingOnes<uint64_t>(NumBits - 1);
- ImmField = COffsetVal & OffsetMask;
- }
- }
- } else {
- // TODO: Should we do this for a negative offset?
- const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
- ImmField = COffsetVal & OffsetMask;
- }
+ uint64_t RemainderOffset;
- uint64_t RemainderOffset = COffsetVal - ImmField;
+ std::tie(OffsetVal, RemainderOffset)
+ = TII->splitFlatOffset(COffsetVal, AS, IsSigned);
- assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
- assert(RemainderOffset + ImmField == COffsetVal);
-
- OffsetVal = ImmField;
+ SDValue AddOffsetLo =
+ getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
+ SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
- // TODO: Should this try to use a scalar add pseudo if the base address
- // is uniform and saddr is usable?
- SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
- SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+ if (Addr.getValueType().getSizeInBits() == 32) {
+ SmallVector<SDValue, 3> Opnds;
+ Opnds.push_back(N0);
+ Opnds.push_back(AddOffsetLo);
+ unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
+ if (Subtarget->hasAddNoCarry()) {
+ AddOp = AMDGPU::V_ADD_U32_e64;
+ Opnds.push_back(Clamp);
+ }
+ Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
+ } else {
+ // TODO: Should this try to use a scalar add pseudo if the base address
+ // is uniform and saddr is usable?
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
- SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- MVT::i32, N0, Sub0);
- SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- MVT::i32, N0, Sub1);
+ SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, N0, Sub0);
+ SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, N0, Sub1);
- SDValue AddOffsetLo =
- getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
- SDValue AddOffsetHi =
- getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
+ SDValue AddOffsetHi =
+ getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
- SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
- SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
- SDNode *Add =
- CurDAG->getMachineNode(AMDGPU::V_ADD_I32_e64, DL, VTs,
- {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
+ SDNode *Add =
+ CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
+ {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
- SDNode *Addc = CurDAG->getMachineNode(
- AMDGPU::V_ADDC_U32_e64, DL, VTs,
- {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
+ SDNode *Addc = CurDAG->getMachineNode(
+ AMDGPU::V_ADDC_U32_e64, DL, VTs,
+ {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
- SDValue RegSequenceArgs[] = {
- CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
- SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
+ SDValue RegSequenceArgs[] = {
+ CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
+ SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
- Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::i64, RegSequenceArgs),
- 0);
+ Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::i64, RegSequenceArgs),
+ 0);
+ }
}
}
}
VAddr = Addr;
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
- SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
return true;
}
-bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
- SDValue Addr,
- SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
- return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
+// If this matches zero_extend i32:x, return x
+static SDValue matchZExtFromI32(SDValue Op) {
+ if (Op.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+
+ SDValue ExtSrc = Op.getOperand(0);
+ return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
+}
+
+// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
+bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
+ SDValue Addr,
+ SDValue &SAddr,
+ SDValue &VOffset,
+ SDValue &Offset) const {
+ int64_t ImmOffset = 0;
+
+ // Match the immediate offset first, which canonically is moved as low as
+ // possible.
+
+ SDValue LHS, RHS;
+ if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
+ int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+
+ if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true)) {
+ Addr = LHS;
+ ImmOffset = COffsetVal;
+ } else if (!LHS->isDivergent() && COffsetVal > 0) {
+ SDLoc SL(N);
+ // saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) +
+ // (large_offset & MaxOffset);
+ int64_t SplitImmOffset, RemainderOffset;
+ std::tie(SplitImmOffset, RemainderOffset)
+ = TII->splitFlatOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true);
+
+ if (isUInt<32>(RemainderOffset)) {
+ SDNode *VMov = CurDAG->getMachineNode(
+ AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
+ CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
+ VOffset = SDValue(VMov, 0);
+ SAddr = LHS;
+ Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+ return true;
+ }
+ }
+ }
+
+ // Match the variable offset.
+ if (Addr.getOpcode() != ISD::ADD) {
+ if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
+ isa<ConstantSDNode>(Addr))
+ return false;
+
+ // It's cheaper to materialize a single 32-bit zero for vaddr than the two
+ // moves required to copy a 64-bit SGPR to VGPR.
+ SAddr = Addr;
+ SDNode *VMov = CurDAG->getMachineNode(
+ AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
+ CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
+ VOffset = SDValue(VMov, 0);
+ Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+ return true;
+ }
+
+ LHS = Addr.getOperand(0);
+ RHS = Addr.getOperand(1);
+
+ if (!LHS->isDivergent()) {
+ // add (i64 sgpr), (zero_extend (i32 vgpr))
+ if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
+ SAddr = LHS;
+ VOffset = ZextRHS;
+ }
+ }
+
+ if (!SAddr && !RHS->isDivergent()) {
+ // add (zero_extend (i32 vgpr)), (i64 sgpr)
+ if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
+ SAddr = RHS;
+ VOffset = ZextLHS;
+ }
+ }
+
+ if (!SAddr)
+ return false;
+
+ Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+ return true;
}
-bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
- SDValue Addr,
- SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
- return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
+// Match (32-bit SGPR base) + sext(imm offset)
+bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
+ SDValue Addr,
+ SDValue &SAddr,
+ SDValue &Offset) const {
+ if (Addr->isDivergent())
+ return false;
+
+ SAddr = Addr;
+ int64_t COffsetVal = 0;
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ SAddr = Addr.getOperand(0);
+ }
+
+ if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
+ SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
+ } else if (SAddr.getOpcode() == ISD::ADD &&
+ isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
+ // Materialize this into a scalar move for scalar address to avoid
+ // readfirstlane.
+ auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
+ FI->getValueType(0));
+ SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, SDLoc(SAddr),
+ MVT::i32, TFI, SAddr.getOperand(1)),
+ 0);
+ }
+
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+
+ if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+ int64_t RemainderOffset = COffsetVal;
+ int64_t ImmField = 0;
+ const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(*Subtarget, true);
+ // Use signed division by a power of two to truncate towards 0.
+ int64_t D = 1LL << (NumBits - 1);
+ RemainderOffset = (COffsetVal / D) * D;
+ ImmField = COffsetVal - RemainderOffset;
+
+ assert(TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true));
+ assert(RemainderOffset + ImmField == COffsetVal);
+
+ COffsetVal = ImmField;
+
+ SDLoc DL(N);
+ SDValue AddOffset =
+ getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
+ SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
+ SAddr, AddOffset), 0);
+ }
+
+ Offset = CurDAG->getTargetConstant(COffsetVal, SDLoc(), MVT::i16);
+
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
@@ -2223,11 +2370,12 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
SDValue CmpVal = Mem->getOperand(2);
+ SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
// XXX - Do we care about glue operands?
SDValue Ops[] = {
- CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
+ CmpVal, VAddr, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
@@ -2241,8 +2389,9 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
SDValue CmpVal = Mem->getOperand(2);
+ SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
SDValue Ops[] = {
- CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
+ CmpVal, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
@@ -2284,7 +2433,7 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
SDValue PtrOffset = Ptr.getOperand(1);
const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
- if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
+ if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
N = glueCopyToM0(N, PtrBase);
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
}
@@ -2379,15 +2528,11 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
SDValue Chain = N->getOperand(0);
SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
- // TODO: Can this just be removed from the instruction?
- SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
-
const unsigned Opc = gwsIntrinToOpcode(IntrID);
SmallVector<SDValue, 5> Ops;
if (HasVSrc)
Ops.push_back(N->getOperand(2));
Ops.push_back(OffsetField);
- Ops.push_back(GDS);
Ops.push_back(Chain);
SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
@@ -2511,7 +2656,8 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
}
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
- unsigned &Mods) const {
+ unsigned &Mods,
+ bool AllowAbs) const {
Mods = 0;
Src = In;
@@ -2520,7 +2666,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
Src = Src.getOperand(0);
}
- if (Src.getOpcode() == ISD::FABS) {
+ if (AllowAbs && Src.getOpcode() == ISD::FABS) {
Mods |= SISrcMods::ABS;
Src = Src.getOperand(0);
}
@@ -2539,6 +2685,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
return false;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods;
+ if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -2563,6 +2720,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods, SDValue &Clamp,
+ SDValue &Omod) const {
+ SDLoc DL(In);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
+
+ return SelectVOP3BMods(In, Src, SrcMods);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const {
Src = In;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 940ec6f31c69..0b4b4776ad39 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -14,25 +14,17 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
-#include "AMDGPUCallLowering.h"
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDGPUTargetMachine.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "SIInstrInfo.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUMachineFunction.h"
+#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+
using namespace llvm;
#include "AMDGPUGenCallingConv.inc"
@@ -320,6 +312,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+ setOperationAction(ISD::FREM, MVT::f16, Custom);
setOperationAction(ISD::FREM, MVT::f32, Custom);
setOperationAction(ISD::FREM, MVT::f64, Custom);
@@ -396,6 +389,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTL, MVT::i64, Expand);
setOperationAction(ISD::ROTR, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i64, Expand);
setOperationAction(ISD::MULHS, MVT::i64, Expand);
@@ -569,6 +565,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
}
+bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
+ if (getTargetMachine().Options.NoSignedZerosFPMath)
+ return true;
+
+ const auto Flags = Op.getNode()->getFlags();
+ if (Flags.hasNoSignedZeros())
+ return true;
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Target Information
//===----------------------------------------------------------------------===//
@@ -598,6 +605,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case AMDGPUISD::FMIN_LEGACY:
case AMDGPUISD::FMAX_LEGACY:
case AMDGPUISD::FMED3:
+ // TODO: handle llvm.amdgcn.fma.legacy
return true;
default:
return false;
@@ -781,34 +789,27 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
return true;
}
-bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
+bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
switch (N->getOpcode()) {
- default:
- return false;
- case ISD::EntryToken:
- case ISD::TokenFactor:
+ case ISD::EntryToken:
+ case ISD::TokenFactor:
+ return true;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntrID) {
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane:
return true;
- case ISD::INTRINSIC_WO_CHAIN:
- {
- unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntrID) {
- default:
- return false;
- case Intrinsic::amdgcn_readfirstlane:
- case Intrinsic::amdgcn_readlane:
- return true;
- }
}
- break;
- case ISD::LOAD:
- {
- if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() ==
- AMDGPUAS::CONSTANT_ADDRESS_32BIT)
- return true;
- return false;
- }
- break;
+ return false;
}
+ case ISD::LOAD:
+ if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() ==
+ AMDGPUAS::CONSTANT_ADDRESS_32BIT)
+ return true;
+ return false;
+ }
+ return false;
}
SDValue AMDGPUTargetLowering::getNegatedExpression(
@@ -944,6 +945,8 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::Fast:
case CallingConv::Cold:
return CC_AMDGPU_Func;
+ case CallingConv::AMDGPU_Gfx:
+ return CC_SI_Gfx;
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
default:
@@ -965,6 +968,8 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_LS:
return RetCC_SI_Shader;
+ case CallingConv::AMDGPU_Gfx:
+ return RetCC_SI_Gfx;
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
@@ -1017,10 +1022,14 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
unsigned InIndex = 0;
for (const Argument &Arg : Fn.args()) {
+ const bool IsByRef = Arg.hasByRefAttr();
Type *BaseArgTy = Arg.getType();
- Align Alignment = DL.getABITypeAlign(BaseArgTy);
- MaxAlign = std::max(Alignment, MaxAlign);
- unsigned AllocSize = DL.getTypeAllocSize(BaseArgTy);
+ Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;
+ MaybeAlign Alignment = IsByRef ? Arg.getParamAlign() : None;
+ if (!Alignment)
+ Alignment = DL.getABITypeAlign(MemArgTy);
+ MaxAlign = max(Alignment, MaxAlign);
+ uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy);
uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize;
@@ -1224,7 +1233,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
switch (Op.getOpcode()) {
default:
Op->print(errs(), &DAG);
- llvm_unreachable("Custom lowering code for this"
+ llvm_unreachable("Custom lowering code for this "
"instruction is not implemented yet!");
break;
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
@@ -1295,7 +1304,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
- if (!MFI->isEntryFunction()) {
+ if (!MFI->isModuleEntryFunction()) {
SDLoc DL(Op);
const Function &Fn = DAG.getMachineFunction().getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
@@ -1539,7 +1548,7 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
Load->getChain(), BasePtr, SrcValue, LoMemVT,
BaseAlign, Load->getMemOperand()->getFlags());
- SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, Size);
+ SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Size));
SDValue HiLoad =
DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
@@ -1564,17 +1573,25 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
return DAG.getMergeValues(Ops, SL);
}
-// Widen a vector load from vec3 to vec4.
-SDValue AMDGPUTargetLowering::WidenVectorLoad(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op,
+ SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
EVT VT = Op.getValueType();
- assert(VT.getVectorNumElements() == 3);
SDValue BasePtr = Load->getBasePtr();
EVT MemVT = Load->getMemoryVT();
SDLoc SL(Op);
const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
unsigned BaseAlign = Load->getAlignment();
+ unsigned NumElements = MemVT.getVectorNumElements();
+
+ // Widen from vec3 to vec4 when the load is at least 8-byte aligned
+ // or 16-byte fully dereferenceable. Otherwise, split the vector load.
+ if (NumElements != 3 ||
+ (BaseAlign < 8 &&
+ !SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout())))
+ return SplitVectorLoad(Op, DAG);
+
+ assert(NumElements == 3);
EVT WideVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
@@ -2075,20 +2092,19 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
return DAG.getMergeValues(Res, DL);
}
-// (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
+// (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x)
SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
EVT VT = Op.getValueType();
+ auto Flags = Op->getFlags();
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
- // TODO: Should this propagate fast-math-flags?
-
- SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
- SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
-
- return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
+ SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags);
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags);
+ SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags);
+ // TODO: For f32 use FMAD instead if !hasFastFMA32?
+ return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags);
}
SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
@@ -2698,14 +2714,12 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
// TODO: Factor out code common with LowerFP_TO_UINT.
EVT SrcVT = Src.getValueType();
- if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) {
+ if (SrcVT == MVT::f16 ||
+ (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {
SDLoc DL(Op);
- SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
- SDValue FpToInt32 =
- DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend);
-
- return FpToInt32;
+ SDValue FpToInt32 = DAG.getNode(Op.getOpcode(), DL, MVT::i32, Src);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, FpToInt32);
}
if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
@@ -2721,14 +2735,12 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
// TODO: Factor out code common with LowerFP_TO_SINT.
EVT SrcVT = Src.getValueType();
- if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) {
+ if (SrcVT == MVT::f16 ||
+ (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {
SDLoc DL(Op);
- SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
- SDValue FpToInt32 =
- DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend);
-
- return FpToInt32;
+ SDValue FpToUInt32 = DAG.getNode(Op.getOpcode(), DL, MVT::i32, Src);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, FpToUInt32);
}
if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
@@ -3204,7 +3216,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
SDValue Elt0 = Vec.getOperand(0);
EVT EltVT = Elt0.getValueType();
- if (VT.getSizeInBits() <= EltVT.getSizeInBits()) {
+ if (VT.getFixedSizeInBits() <= EltVT.getFixedSizeInBits()) {
if (EltVT.isFloatingPoint()) {
Elt0 = DAG.getNode(ISD::BITCAST, SL,
EltVT.changeTypeToInteger(), Elt0);
@@ -3287,17 +3299,13 @@ static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);
}
- // Because we want to eliminate extension instructions before the
- // operation, we need to create a single user here (i.e. not the separate
- // mul_lo + mul_hi) so that SimplifyDemandedBits will deal with it.
-
- unsigned MulOpc = Signed ? AMDGPUISD::MUL_LOHI_I24 : AMDGPUISD::MUL_LOHI_U24;
+ unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
+ unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;
- SDValue Mul = DAG.getNode(MulOpc, SL,
- DAG.getVTList(MVT::i32, MVT::i32), N0, N1);
+ SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);
+ SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);
- return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64,
- Mul.getValue(0), Mul.getValue(1));
+ return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, MulLo, MulHi);
}
SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
@@ -3395,29 +3403,6 @@ SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
return DAG.getZExtOrTrunc(Mulhi, DL, VT);
}
-SDValue AMDGPUTargetLowering::performMulLoHi24Combine(
- SDNode *N, DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
-
- // Simplify demanded bits before splitting into multiple users.
- if (SDValue V = simplifyI24(N, DCI))
- return V;
-
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- bool Signed = (N->getOpcode() == AMDGPUISD::MUL_LOHI_I24);
-
- unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
- unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;
-
- SDLoc SL(N);
-
- SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);
- SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);
- return DAG.getMergeValues({ MulLo, MulHi }, SL);
-}
-
static bool isNegativeOne(SDValue Val) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
return C->isAllOnesValue();
@@ -3730,6 +3715,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
}
case ISD::FMA:
case ISD::FMAD: {
+ // TODO: handle llvm.amdgcn.fma.legacy
if (!mayIgnoreSignedZero(N0))
return SDValue();
@@ -3795,8 +3781,15 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
if (Res.getOpcode() != AMDGPUISD::FMED3)
return SDValue(); // Op got folded away.
- if (!N0.hasOneUse())
- DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
+
+ if (!N0.hasOneUse()) {
+ SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res);
+ DAG.ReplaceAllUsesWith(N0, Neg);
+
+ for (SDNode *U : Neg->uses())
+ DCI.AddToWorklist(U);
+ }
+
return Res;
}
case ISD::FP_EXTEND:
@@ -3933,7 +3926,7 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
}
}
- if (DestVT.getSizeInBits() != 64 && !DestVT.isVector())
+ if (DestVT.getSizeInBits() != 64 || !DestVT.isVector())
break;
// Fold bitcasts of constants.
@@ -3942,14 +3935,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
// TODO: Generalize and move to DAGCombiner
SDValue Src = N->getOperand(0);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
- if (Src.getValueType() == MVT::i64) {
- SDLoc SL(N);
- uint64_t CVal = C->getZExtValue();
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
- DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
- DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
- return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
- }
+ SDLoc SL(N);
+ uint64_t CVal = C->getZExtValue();
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+ DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
+ DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
}
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
@@ -3999,9 +3990,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return V;
return SDValue();
}
- case AMDGPUISD::MUL_LOHI_I24:
- case AMDGPUISD::MUL_LOHI_U24:
- return performMulLoHi24Combine(N, DCI);
case ISD::SELECT:
return performSelectCombine(N, DCI);
case ISD::FNEG:
@@ -4159,9 +4147,9 @@ SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset);
SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32);
- return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, 4,
+ return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, Align(4),
MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
+ MachineMemOperand::MOInvariant);
}
SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
@@ -4173,7 +4161,7 @@ SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
- SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4,
+ SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4),
MachineMemOperand::MODereferenceable);
return Store;
}
@@ -4285,8 +4273,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MULHI_U24)
NODE_NAME_CASE(MULHI_I24)
- NODE_NAME_CASE(MUL_LOHI_U24)
- NODE_NAME_CASE(MUL_LOHI_I24)
NODE_NAME_CASE(MAD_U24)
NODE_NAME_CASE(MAD_I24)
NODE_NAME_CASE(MAD_I64_I32)
@@ -4336,7 +4322,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ATOMIC_DEC)
NODE_NAME_CASE(ATOMIC_LOAD_FMIN)
NODE_NAME_CASE(ATOMIC_LOAD_FMAX)
- NODE_NAME_CASE(ATOMIC_LOAD_CSUB)
NODE_NAME_CASE(BUFFER_LOAD)
NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
NODE_NAME_CASE(BUFFER_LOAD_USHORT)
@@ -4365,8 +4350,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
NODE_NAME_CASE(BUFFER_ATOMIC_CSUB)
NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
- NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD)
- NODE_NAME_CASE(ATOMIC_PK_FADD)
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
}
@@ -4718,6 +4701,12 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
case Intrinsic::amdgcn_fdot2:
// TODO: Refine on operand
return SNaN;
+ case Intrinsic::amdgcn_fma_legacy:
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(3), SNaN, Depth + 1);
default:
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 85f23c81db17..ce3618f83130 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -15,10 +15,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
-#include "AMDGPU.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -90,7 +88,6 @@ protected:
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue performMulLoHi24Combine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
SDValue RHS, DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -125,8 +122,9 @@ protected:
/// Split a vector load into 2 loads of half the vector.
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
- /// Widen a vector load from vec3 to vec4.
- SDValue WidenVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+ /// Widen a suitably aligned v3 load. For all other cases, split the input
+ /// vector load.
+ SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
/// Split a vector store into 2 stores of half the vector.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -145,16 +143,7 @@ protected:
public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
- bool mayIgnoreSignedZero(SDValue Op) const {
- if (getTargetMachine().Options.NoSignedZerosFPMath)
- return true;
-
- const auto Flags = Op.getNode()->getFlags();
- if (Flags.isDefined())
- return Flags.hasNoSignedZeros();
-
- return false;
- }
+ bool mayIgnoreSignedZero(SDValue Op) const;
static inline SDValue stripBitcast(SDValue Val) {
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
@@ -440,8 +429,6 @@ enum NodeType : unsigned {
MAD_I24,
MAD_U64_U32,
MAD_I64_I32,
- MUL_LOHI_I24,
- MUL_LOHI_U24,
PERM,
TEXTURE_FETCH,
R600_EXPORT,
@@ -508,7 +495,6 @@ enum NodeType : unsigned {
ATOMIC_DEC,
ATOMIC_LOAD_FMIN,
ATOMIC_LOAD_FMAX,
- ATOMIC_LOAD_CSUB,
BUFFER_LOAD,
BUFFER_LOAD_UBYTE,
BUFFER_LOAD_USHORT,
@@ -537,8 +523,6 @@ enum NodeType : unsigned {
BUFFER_ATOMIC_CMPSWAP,
BUFFER_ATOMIC_CSUB,
BUFFER_ATOMIC_FADD,
- BUFFER_ATOMIC_PK_FADD,
- ATOMIC_PK_FADD,
LAST_AMDGPU_ISD_NUMBER
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
deleted file mode 100644
index 3b5d91133a2f..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-//===- AMDGPUInline.cpp - Code to perform simple function inlining --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This is AMDGPU specific replacement of the standard inliner.
-/// The main purpose is to account for the fact that calls not only expensive
-/// on the AMDGPU, but much more expensive if a private memory pointer is
-/// passed to a function as an argument. In this situation, we are unable to
-/// eliminate private memory in the caller unless inlined and end up with slow
-/// and expensive scratch access. Thus, we boost the inline threshold for such
-/// functions here.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/Inliner.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "inline"
-
-static cl::opt<int>
-ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000),
- cl::desc("Cost of alloca argument"));
-
-// If the amount of scratch memory to eliminate exceeds our ability to allocate
-// it into registers we gain nothing by aggressively inlining functions for that
-// heuristic.
-static cl::opt<unsigned>
-ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
- cl::desc("Maximum alloca size to use for inline cost"));
-
-// Inliner constraint to achieve reasonable compilation time
-static cl::opt<size_t>
-MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100),
- cl::desc("Maximum BB number allowed in a function after inlining"
- " (compile time constraint)"));
-
-namespace {
-
-class AMDGPUInliner : public LegacyInlinerBase {
-
-public:
- AMDGPUInliner() : LegacyInlinerBase(ID) {
- initializeAMDGPUInlinerPass(*PassRegistry::getPassRegistry());
- Params = getInlineParams();
- }
-
- static char ID; // Pass identification, replacement for typeid
-
- unsigned getInlineThreshold(CallBase &CB) const;
-
- InlineCost getInlineCost(CallBase &CB) override;
-
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-private:
- TargetTransformInfoWrapperPass *TTIWP;
-
- InlineParams Params;
-};
-
-} // end anonymous namespace
-
-char AMDGPUInliner::ID = 0;
-INITIALIZE_PASS_BEGIN(AMDGPUInliner, "amdgpu-inline",
- "AMDGPU Function Integration/Inlining", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(AMDGPUInliner, "amdgpu-inline",
- "AMDGPU Function Integration/Inlining", false, false)
-
-Pass *llvm::createAMDGPUFunctionInliningPass() { return new AMDGPUInliner(); }
-
-bool AMDGPUInliner::runOnSCC(CallGraphSCC &SCC) {
- TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
- return LegacyInlinerBase::runOnSCC(SCC);
-}
-
-void AMDGPUInliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetTransformInfoWrapperPass>();
- LegacyInlinerBase::getAnalysisUsage(AU);
-}
-
-unsigned AMDGPUInliner::getInlineThreshold(CallBase &CB) const {
- int Thres = Params.DefaultThreshold;
-
- Function *Caller = CB.getCaller();
- // Listen to the inlinehint attribute when it would increase the threshold
- // and the caller does not need to minimize its size.
- Function *Callee = CB.getCalledFunction();
- bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::InlineHint);
- if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
- && !Caller->hasFnAttribute(Attribute::MinSize))
- Thres = Params.HintThreshold.getValue() *
- TTIWP->getTTI(*Callee).getInliningThresholdMultiplier();
-
- const DataLayout &DL = Caller->getParent()->getDataLayout();
- if (!Callee)
- return (unsigned)Thres;
-
- // If we have a pointer to private array passed into a function
- // it will not be optimized out, leaving scratch usage.
- // Increase the inline threshold to allow inliniting in this case.
- uint64_t AllocaSize = 0;
- SmallPtrSet<const AllocaInst *, 8> AIVisited;
- for (Value *PtrArg : CB.args()) {
- PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
- if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
- Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
- continue;
-
- PtrArg = GetUnderlyingObject(PtrArg, DL);
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
- if (!AI->isStaticAlloca() || !AIVisited.insert(AI).second)
- continue;
- AllocaSize += DL.getTypeAllocSize(AI->getAllocatedType());
- // If the amount of stack memory is excessive we will not be able
- // to get rid of the scratch anyway, bail out.
- if (AllocaSize > ArgAllocaCutoff) {
- AllocaSize = 0;
- break;
- }
- }
- }
- if (AllocaSize)
- Thres += ArgAllocaCost;
-
- return (unsigned)Thres;
-}
-
-// Check if call is just a wrapper around another call.
-// In this case we only have call and ret instructions.
-static bool isWrapperOnlyCall(CallBase &CB) {
- Function *Callee = CB.getCalledFunction();
- if (!Callee || Callee->size() != 1)
- return false;
- const BasicBlock &BB = Callee->getEntryBlock();
- if (const Instruction *I = BB.getFirstNonPHI()) {
- if (!isa<CallInst>(I)) {
- return false;
- }
- if (isa<ReturnInst>(*std::next(I->getIterator()))) {
- LLVM_DEBUG(dbgs() << " Wrapper only call detected: "
- << Callee->getName() << '\n');
- return true;
- }
- }
- return false;
-}
-
-InlineCost AMDGPUInliner::getInlineCost(CallBase &CB) {
- Function *Callee = CB.getCalledFunction();
- Function *Caller = CB.getCaller();
-
- if (!Callee || Callee->isDeclaration())
- return llvm::InlineCost::getNever("undefined callee");
-
- if (CB.isNoInline())
- return llvm::InlineCost::getNever("noinline");
-
- TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
- if (!TTI.areInlineCompatible(Caller, Callee))
- return llvm::InlineCost::getNever("incompatible");
-
- if (CB.hasFnAttr(Attribute::AlwaysInline)) {
- auto IsViable = isInlineViable(*Callee);
- if (IsViable.isSuccess())
- return llvm::InlineCost::getAlways("alwaysinline viable");
- return llvm::InlineCost::getNever(IsViable.getFailureReason());
- }
-
- if (isWrapperOnlyCall(CB))
- return llvm::InlineCost::getAlways("wrapper-only call");
-
- InlineParams LocalParams = Params;
- LocalParams.DefaultThreshold = (int)getInlineThreshold(CB);
- bool RemarksEnabled = false;
- const auto &BBs = Caller->getBasicBlockList();
- if (!BBs.empty()) {
- auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front());
- if (DI.isEnabled())
- RemarksEnabled = true;
- }
-
- OptimizationRemarkEmitter ORE(Caller);
- auto GetAssumptionCache = [this](Function &F) -> AssumptionCache & {
- return ACT->getAssumptionCache(F);
- };
-
- auto IC = llvm::getInlineCost(CB, Callee, LocalParams, TTI,
- GetAssumptionCache, GetTLI, nullptr, PSI,
- RemarksEnabled ? &ORE : nullptr);
-
- if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) {
- // Single BB does not increase total BB amount, thus subtract 1
- size_t Size = Caller->size() + Callee->size() - 1;
- if (MaxBB && Size > MaxBB)
- return llvm::InlineCost::getNever("max number of bb exceeded");
- }
- return IC;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
new file mode 100644
index 000000000000..06aa0055e4bb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -0,0 +1,1075 @@
+//===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// AMDGPU target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUTargetTransformInfo.h"
+#include "GCNSubtarget.h"
+#include "R600Subtarget.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "AMDGPUtti"
+
+namespace {
+
+struct AMDGPUImageDMaskIntrinsic {
+ unsigned Intr;
+};
+
+#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
+#include "InstCombineTables.inc"
+
+} // end anonymous namespace
+
+// Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
+//
+// A single NaN input is folded to minnum, so we rely on that folding for
+// handling NaNs.
+static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
+ const APFloat &Src2) {
+ APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
+
+ APFloat::cmpResult Cmp0 = Max3.compare(Src0);
+ assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
+ if (Cmp0 == APFloat::cmpEqual)
+ return maxnum(Src1, Src2);
+
+ APFloat::cmpResult Cmp1 = Max3.compare(Src1);
+ assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
+ if (Cmp1 == APFloat::cmpEqual)
+ return maxnum(Src0, Src2);
+
+ return maxnum(Src0, Src1);
+}
+
+// Check if a value can be converted to a 16-bit value without losing
+// precision.
+static bool canSafelyConvertTo16Bit(Value &V) {
+ Type *VTy = V.getType();
+ if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
+ // The value is already 16-bit, so we don't want to convert to 16-bit again!
+ return false;
+ }
+ if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
+ // We need to check that if we cast the index down to a half, we do not lose
+ // precision.
+ APFloat FloatValue(ConstFloat->getValueAPF());
+ bool LosesInfo = true;
+ FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
+ return !LosesInfo;
+ }
+ Value *CastSrc;
+ if (match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) ||
+ match(&V, m_SExt(PatternMatch::m_Value(CastSrc))) ||
+ match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)))) {
+ Type *CastSrcTy = CastSrc->getType();
+ if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
+ return true;
+ }
+
+ return false;
+}
+
+// Convert a value to 16-bit.
+static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
+ Type *VTy = V.getType();
+ if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
+ return cast<Instruction>(&V)->getOperand(0);
+ if (VTy->isIntegerTy())
+ return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
+ if (VTy->isFloatingPointTy())
+ return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
+
+ llvm_unreachable("Should never be called!");
+}
+
+static Optional<Instruction *>
+simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
+ const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
+ IntrinsicInst &II, InstCombiner &IC) {
+ if (!ST->hasA16() && !ST->hasG16())
+ return None;
+
+ bool FloatCoord = false;
+ // true means derivatives can be converted to 16 bit, coordinates not
+ bool OnlyDerivatives = false;
+
+ for (unsigned OperandIndex = ImageDimIntr->GradientStart;
+ OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
+ Value *Coord = II.getOperand(OperandIndex);
+ // If the values are not derived from 16-bit values, we cannot optimize.
+ if (!canSafelyConvertTo16Bit(*Coord)) {
+ if (OperandIndex < ImageDimIntr->CoordStart ||
+ ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
+ return None;
+ }
+ // All gradients can be converted, so convert only them
+ OnlyDerivatives = true;
+ break;
+ }
+
+ assert(OperandIndex == ImageDimIntr->GradientStart ||
+ FloatCoord == Coord->getType()->isFloatingPointTy());
+ FloatCoord = Coord->getType()->isFloatingPointTy();
+ }
+
+ if (OnlyDerivatives) {
+ if (!ST->hasG16())
+ return None;
+ } else {
+ if (!ST->hasA16())
+ OnlyDerivatives = true; // Only supports G16
+ }
+
+ Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
+ : Type::getInt16Ty(II.getContext());
+
+ SmallVector<Type *, 4> ArgTys;
+ if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
+ return None;
+
+ ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
+ if (!OnlyDerivatives)
+ ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
+ Function *I =
+ Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);
+
+ SmallVector<Value *, 8> Args(II.arg_operands());
+
+ unsigned EndIndex =
+ OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
+ for (unsigned OperandIndex = ImageDimIntr->GradientStart;
+ OperandIndex < EndIndex; OperandIndex++) {
+ Args[OperandIndex] =
+ convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
+ }
+
+ CallInst *NewCall = IC.Builder.CreateCall(I, Args);
+ NewCall->takeName(&II);
+ NewCall->copyMetadata(II);
+ if (isa<FPMathOperator>(NewCall))
+ NewCall->copyFastMathFlags(&II);
+ return IC.replaceInstUsesWith(II, NewCall);
+}
+
+bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
+ InstCombiner &IC) const {
+ // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
+ // infinity, gives +0.0. If we can prove we don't have one of the special
+ // cases then we can use a normal multiply instead.
+ // TODO: Create and use isKnownFiniteNonZero instead of just matching
+ // constants here.
+ if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
+ match(Op1, PatternMatch::m_FiniteNonZero())) {
+ // One operand is not zero or infinity or NaN.
+ return true;
+ }
+ auto *TLI = &IC.getTargetLibraryInfo();
+ if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
+ isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
+ // Neither operand is infinity or NaN.
+ return true;
+ }
+ return false;
+}
+
+Optional<Instruction *>
+GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::amdgcn_rcp: {
+ Value *Src = II.getArgOperand(0);
+
+ // TODO: Move to ConstantFolding/InstSimplify?
+ if (isa<UndefValue>(Src)) {
+ Type *Ty = II.getType();
+ auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
+ return IC.replaceInstUsesWith(II, QNaN);
+ }
+
+ if (II.isStrictFP())
+ break;
+
+ if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
+ const APFloat &ArgVal = C->getValueAPF();
+ APFloat Val(ArgVal.getSemantics(), 1);
+ Val.divide(ArgVal, APFloat::rmNearestTiesToEven);
+
+ // This is more precise than the instruction may give.
+ //
+ // TODO: The instruction always flushes denormal results (except for f16),
+ // should this also?
+ return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_rsq: {
+ Value *Src = II.getArgOperand(0);
+
+ // TODO: Move to ConstantFolding/InstSimplify?
+ if (isa<UndefValue>(Src)) {
+ Type *Ty = II.getType();
+ auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
+ return IC.replaceInstUsesWith(II, QNaN);
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_frexp_mant:
+ case Intrinsic::amdgcn_frexp_exp: {
+ Value *Src = II.getArgOperand(0);
+ if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
+ int Exp;
+ APFloat Significand =
+ frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
+
+ if (IID == Intrinsic::amdgcn_frexp_mant) {
+ return IC.replaceInstUsesWith(
+ II, ConstantFP::get(II.getContext(), Significand));
+ }
+
+ // Match instruction special case behavior.
+ if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
+ Exp = 0;
+
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
+ }
+
+ if (isa<UndefValue>(Src)) {
+ return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_class: {
+ enum {
+ S_NAN = 1 << 0, // Signaling NaN
+ Q_NAN = 1 << 1, // Quiet NaN
+ N_INFINITY = 1 << 2, // Negative infinity
+ N_NORMAL = 1 << 3, // Negative normal
+ N_SUBNORMAL = 1 << 4, // Negative subnormal
+ N_ZERO = 1 << 5, // Negative zero
+ P_ZERO = 1 << 6, // Positive zero
+ P_SUBNORMAL = 1 << 7, // Positive subnormal
+ P_NORMAL = 1 << 8, // Positive normal
+ P_INFINITY = 1 << 9 // Positive infinity
+ };
+
+ const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
+ N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL |
+ P_NORMAL | P_INFINITY;
+
+ Value *Src0 = II.getArgOperand(0);
+ Value *Src1 = II.getArgOperand(1);
+ const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
+ if (!CMask) {
+ if (isa<UndefValue>(Src0)) {
+ return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ }
+
+ if (isa<UndefValue>(Src1)) {
+ return IC.replaceInstUsesWith(II,
+ ConstantInt::get(II.getType(), false));
+ }
+ break;
+ }
+
+ uint32_t Mask = CMask->getZExtValue();
+
+ // If all tests are made, it doesn't matter what the value is.
+ if ((Mask & FullMask) == FullMask) {
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
+ }
+
+ if ((Mask & FullMask) == 0) {
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
+ }
+
+ if (Mask == (S_NAN | Q_NAN)) {
+ // Equivalent of isnan. Replace with standard fcmp.
+ Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
+ FCmp->takeName(&II);
+ return IC.replaceInstUsesWith(II, FCmp);
+ }
+
+ if (Mask == (N_ZERO | P_ZERO)) {
+ // Equivalent of == 0.
+ Value *FCmp =
+ IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
+
+ FCmp->takeName(&II);
+ return IC.replaceInstUsesWith(II, FCmp);
+ }
+
+ // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
+ if (((Mask & S_NAN) || (Mask & Q_NAN)) &&
+ isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
+ return IC.replaceOperand(
+ II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN)));
+ }
+
+ const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
+ if (!CVal) {
+ if (isa<UndefValue>(Src0)) {
+ return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ }
+
+ // Clamp mask to used bits
+ if ((Mask & FullMask) != Mask) {
+ CallInst *NewCall = IC.Builder.CreateCall(
+ II.getCalledFunction(),
+ {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)});
+
+ NewCall->takeName(&II);
+ return IC.replaceInstUsesWith(II, NewCall);
+ }
+
+ break;
+ }
+
+ const APFloat &Val = CVal->getValueAPF();
+
+ bool Result =
+ ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
+ ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
+ ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
+ ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
+ ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
+ ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
+ ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
+ ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
+ ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
+ ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
+
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
+ }
+ case Intrinsic::amdgcn_cvt_pkrtz: {
+ Value *Src0 = II.getArgOperand(0);
+ Value *Src1 = II.getArgOperand(1);
+ if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
+ if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
+ const fltSemantics &HalfSem =
+ II.getType()->getScalarType()->getFltSemantics();
+ bool LosesInfo;
+ APFloat Val0 = C0->getValueAPF();
+ APFloat Val1 = C1->getValueAPF();
+ Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
+ Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
+
+ Constant *Folded =
+ ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
+ ConstantFP::get(II.getContext(), Val1)});
+ return IC.replaceInstUsesWith(II, Folded);
+ }
+ }
+
+ if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
+ return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_cvt_pknorm_i16:
+ case Intrinsic::amdgcn_cvt_pknorm_u16:
+ case Intrinsic::amdgcn_cvt_pk_i16:
+ case Intrinsic::amdgcn_cvt_pk_u16: {
+ Value *Src0 = II.getArgOperand(0);
+ Value *Src1 = II.getArgOperand(1);
+
+ if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
+ return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_ubfe:
+ case Intrinsic::amdgcn_sbfe: {
+ // Decompose simple cases into standard shifts.
+ Value *Src = II.getArgOperand(0);
+ if (isa<UndefValue>(Src)) {
+ return IC.replaceInstUsesWith(II, Src);
+ }
+
+ unsigned Width;
+ Type *Ty = II.getType();
+ unsigned IntSize = Ty->getIntegerBitWidth();
+
+ ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
+ if (CWidth) {
+ Width = CWidth->getZExtValue();
+ if ((Width & (IntSize - 1)) == 0) {
+ return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty));
+ }
+
+ // Hardware ignores high bits, so remove those.
+ if (Width >= IntSize) {
+ return IC.replaceOperand(
+ II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
+ }
+ }
+
+ unsigned Offset;
+ ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
+ if (COffset) {
+ Offset = COffset->getZExtValue();
+ if (Offset >= IntSize) {
+ return IC.replaceOperand(
+ II, 1,
+ ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
+ }
+ }
+
+ bool Signed = IID == Intrinsic::amdgcn_sbfe;
+
+ if (!CWidth || !COffset)
+ break;
+
+ // The case of Width == 0 is handled above, which makes this tranformation
+ // safe. If Width == 0, then the ashr and lshr instructions become poison
+ // value since the shift amount would be equal to the bit size.
+ assert(Width != 0);
+
+ // TODO: This allows folding to undef when the hardware has specific
+ // behavior?
+ if (Offset + Width < IntSize) {
+ Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
+ Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
+ : IC.Builder.CreateLShr(Shl, IntSize - Width);
+ RightShift->takeName(&II);
+ return IC.replaceInstUsesWith(II, RightShift);
+ }
+
+ Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
+ : IC.Builder.CreateLShr(Src, Offset);
+
+ RightShift->takeName(&II);
+ return IC.replaceInstUsesWith(II, RightShift);
+ }
+ case Intrinsic::amdgcn_exp:
+ case Intrinsic::amdgcn_exp_compr: {
+ ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
+ unsigned EnBits = En->getZExtValue();
+ if (EnBits == 0xf)
+ break; // All inputs enabled.
+
+ bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
+ bool Changed = false;
+ for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
+ if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
+ (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
+ Value *Src = II.getArgOperand(I + 2);
+ if (!isa<UndefValue>(Src)) {
+ IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
+ Changed = true;
+ }
+ }
+ }
+
+ if (Changed) {
+ return &II;
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_fmed3: {
+ // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
+ // for the shader.
+
+ Value *Src0 = II.getArgOperand(0);
+ Value *Src1 = II.getArgOperand(1);
+ Value *Src2 = II.getArgOperand(2);
+
+ // Checking for NaN before canonicalization provides better fidelity when
+ // mapping other operations onto fmed3 since the order of operands is
+ // unchanged.
+ CallInst *NewCall = nullptr;
+ if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
+ NewCall = IC.Builder.CreateMinNum(Src1, Src2);
+ } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
+ NewCall = IC.Builder.CreateMinNum(Src0, Src2);
+ } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
+ NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
+ }
+
+ if (NewCall) {
+ NewCall->copyFastMathFlags(&II);
+ NewCall->takeName(&II);
+ return IC.replaceInstUsesWith(II, NewCall);
+ }
+
+ bool Swap = false;
+ // Canonicalize constants to RHS operands.
+ //
+ // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
+ if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+ std::swap(Src0, Src1);
+ Swap = true;
+ }
+
+ if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
+ std::swap(Src1, Src2);
+ Swap = true;
+ }
+
+ if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+ std::swap(Src0, Src1);
+ Swap = true;
+ }
+
+ if (Swap) {
+ II.setArgOperand(0, Src0);
+ II.setArgOperand(1, Src1);
+ II.setArgOperand(2, Src2);
+ return &II;
+ }
+
+ if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
+ if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
+ if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
+ APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
+ C2->getValueAPF());
+ return IC.replaceInstUsesWith(
+ II, ConstantFP::get(IC.Builder.getContext(), Result));
+ }
+ }
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_icmp:
+ case Intrinsic::amdgcn_fcmp: {
+ const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
+ // Guard against invalid arguments.
+ int64_t CCVal = CC->getZExtValue();
+ bool IsInteger = IID == Intrinsic::amdgcn_icmp;
+ if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
+ CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
+ (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
+ CCVal > CmpInst::LAST_FCMP_PREDICATE)))
+ break;
+
+ Value *Src0 = II.getArgOperand(0);
+ Value *Src1 = II.getArgOperand(1);
+
+ if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
+ if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
+ Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
+ if (CCmp->isNullValue()) {
+ return IC.replaceInstUsesWith(
+ II, ConstantExpr::getSExt(CCmp, II.getType()));
+ }
+
+ // The result of V_ICMP/V_FCMP assembly instructions (which this
+ // intrinsic exposes) is one bit per thread, masked with the EXEC
+ // register (which contains the bitmask of live threads). So a
+ // comparison that always returns true is the same as a read of the
+ // EXEC register.
+ Function *NewF = Intrinsic::getDeclaration(
+ II.getModule(), Intrinsic::read_register, II.getType());
+ Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
+ MDNode *MD = MDNode::get(II.getContext(), MDArgs);
+ Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
+ CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
+ NewCall->addAttribute(AttributeList::FunctionIndex,
+ Attribute::Convergent);
+ NewCall->takeName(&II);
+ return IC.replaceInstUsesWith(II, NewCall);
+ }
+
+ // Canonicalize constants to RHS.
+ CmpInst::Predicate SwapPred =
+ CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
+ II.setArgOperand(0, Src1);
+ II.setArgOperand(1, Src0);
+ II.setArgOperand(
+ 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
+ return &II;
+ }
+
+ if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
+ break;
+
+ // Canonicalize compare eq with true value to compare != 0
+ // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
+ // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
+ // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
+ // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
+ Value *ExtSrc;
+ if (CCVal == CmpInst::ICMP_EQ &&
+ ((match(Src1, PatternMatch::m_One()) &&
+ match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
+ (match(Src1, PatternMatch::m_AllOnes()) &&
+ match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
+ ExtSrc->getType()->isIntegerTy(1)) {
+ IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType()));
+ IC.replaceOperand(II, 2,
+ ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
+ return &II;
+ }
+
+ CmpInst::Predicate SrcPred;
+ Value *SrcLHS;
+ Value *SrcRHS;
+
+ // Fold compare eq/ne with 0 from a compare result as the predicate to the
+ // intrinsic. The typical use is a wave vote function in the library, which
+ // will be fed from a user code condition compared with 0. Fold in the
+ // redundant compare.
+
+ // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
+ // -> llvm.amdgcn.[if]cmp(a, b, pred)
+ //
+ // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
+ // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
+ if (match(Src1, PatternMatch::m_Zero()) &&
+ match(Src0, PatternMatch::m_ZExtOrSExt(
+ m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
+ PatternMatch::m_Value(SrcRHS))))) {
+ if (CCVal == CmpInst::ICMP_EQ)
+ SrcPred = CmpInst::getInversePredicate(SrcPred);
+
+ Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
+ ? Intrinsic::amdgcn_fcmp
+ : Intrinsic::amdgcn_icmp;
+
+ Type *Ty = SrcLHS->getType();
+ if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
+ // Promote to next legal integer type.
+ unsigned Width = CmpType->getBitWidth();
+ unsigned NewWidth = Width;
+
+ // Don't do anything for i1 comparisons.
+ if (Width == 1)
+ break;
+
+ if (Width <= 16)
+ NewWidth = 16;
+ else if (Width <= 32)
+ NewWidth = 32;
+ else if (Width <= 64)
+ NewWidth = 64;
+ else if (Width > 64)
+ break; // Can't handle this.
+
+ if (Width != NewWidth) {
+ IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
+ if (CmpInst::isSigned(SrcPred)) {
+ SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
+ SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
+ } else {
+ SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
+ SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
+ }
+ }
+ } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
+ break;
+
+ Function *NewF = Intrinsic::getDeclaration(
+ II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
+ Value *Args[] = {SrcLHS, SrcRHS,
+ ConstantInt::get(CC->getType(), SrcPred)};
+ CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
+ NewCall->takeName(&II);
+ return IC.replaceInstUsesWith(II, NewCall);
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_ballot: {
+ if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
+ if (Src->isZero()) {
+ // amdgcn.ballot(i1 0) is zero.
+ return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
+ }
+
+ if (Src->isOne()) {
+ // amdgcn.ballot(i1 1) is exec.
+ const char *RegName = "exec";
+ if (II.getType()->isIntegerTy(32))
+ RegName = "exec_lo";
+ else if (!II.getType()->isIntegerTy(64))
+ break;
+
+ Function *NewF = Intrinsic::getDeclaration(
+ II.getModule(), Intrinsic::read_register, II.getType());
+ Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
+ MDNode *MD = MDNode::get(II.getContext(), MDArgs);
+ Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
+ CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
+ NewCall->addAttribute(AttributeList::FunctionIndex,
+ Attribute::Convergent);
+ NewCall->takeName(&II);
+ return IC.replaceInstUsesWith(II, NewCall);
+ }
+ }
+ break;
+ }
+ case Intrinsic::amdgcn_wqm_vote: {
+ // wqm_vote is identity when the argument is constant.
+ if (!isa<Constant>(II.getArgOperand(0)))
+ break;
+
+ return IC.replaceInstUsesWith(II, II.getArgOperand(0));
+ }
+ case Intrinsic::amdgcn_kill: {
+ const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
+ if (!C || !C->getZExtValue())
+ break;
+
+ // amdgcn.kill(i1 1) is a no-op
+ return IC.eraseInstFromFunction(II);
+ }
+ case Intrinsic::amdgcn_update_dpp: {
+ Value *Old = II.getArgOperand(0);
+
+ auto *BC = cast<ConstantInt>(II.getArgOperand(5));
+ auto *RM = cast<ConstantInt>(II.getArgOperand(3));
+ auto *BM = cast<ConstantInt>(II.getArgOperand(4));
+ if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
+ BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
+ break;
+
+ // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
+ return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
+ }
+ case Intrinsic::amdgcn_permlane16:
+ case Intrinsic::amdgcn_permlanex16: {
+ // Discard vdst_in if it's not going to be read.
+ Value *VDstIn = II.getArgOperand(0);
+ if (isa<UndefValue>(VDstIn))
+ break;
+
+ ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
+ ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
+ if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
+ break;
+
+ return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
+ }
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane: {
+ // A constant value is trivially uniform.
+ if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
+ return IC.replaceInstUsesWith(II, C);
+ }
+
+ // The rest of these may not be safe if the exec may not be the same between
+ // the def and use.
+ Value *Src = II.getArgOperand(0);
+ Instruction *SrcInst = dyn_cast<Instruction>(Src);
+ if (SrcInst && SrcInst->getParent() != II.getParent())
+ break;
+
+ // readfirstlane (readfirstlane x) -> readfirstlane x
+ // readlane (readfirstlane x), y -> readfirstlane x
+ if (match(Src,
+ PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
+ return IC.replaceInstUsesWith(II, Src);
+ }
+
+ if (IID == Intrinsic::amdgcn_readfirstlane) {
+ // readfirstlane (readlane x, y) -> readlane x, y
+ if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
+ return IC.replaceInstUsesWith(II, Src);
+ }
+ } else {
+ // readlane (readlane x, y), y -> readlane x, y
+ if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
+ PatternMatch::m_Value(),
+ PatternMatch::m_Specific(II.getArgOperand(1))))) {
+ return IC.replaceInstUsesWith(II, Src);
+ }
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_ldexp: {
+ // FIXME: This doesn't introduce new instructions and belongs in
+ // InstructionSimplify.
+ Type *Ty = II.getType();
+ Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+
+ // Folding undef to qnan is safe regardless of the FP mode.
+ if (isa<UndefValue>(Op0)) {
+ auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
+ return IC.replaceInstUsesWith(II, QNaN);
+ }
+
+ const APFloat *C = nullptr;
+ match(Op0, PatternMatch::m_APFloat(C));
+
+ // FIXME: Should flush denorms depending on FP mode, but that's ignored
+ // everywhere else.
+ //
+ // These cases should be safe, even with strictfp.
+ // ldexp(0.0, x) -> 0.0
+ // ldexp(-0.0, x) -> -0.0
+ // ldexp(inf, x) -> inf
+ // ldexp(-inf, x) -> -inf
+ if (C && (C->isZero() || C->isInfinity())) {
+ return IC.replaceInstUsesWith(II, Op0);
+ }
+
+ // With strictfp, be more careful about possibly needing to flush denormals
+ // or not, and snan behavior depends on ieee_mode.
+ if (II.isStrictFP())
+ break;
+
+ if (C && C->isNaN()) {
+ // FIXME: We just need to make the nan quiet here, but that's unavailable
+ // on APFloat, only IEEEfloat
+ auto *Quieted =
+ ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
+ return IC.replaceInstUsesWith(II, Quieted);
+ }
+
+ // ldexp(x, 0) -> x
+ // ldexp(x, undef) -> x
+ if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
+ return IC.replaceInstUsesWith(II, Op0);
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_fmul_legacy: {
+ Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+
+ // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
+ // infinity, gives +0.0.
+ // TODO: Move to InstSimplify?
+ if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
+ match(Op1, PatternMatch::m_AnyZeroFP()))
+ return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
+
+ // If we can prove we don't have one of the special cases then we can use a
+ // normal fmul instruction instead.
+ if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
+ auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
+ FMul->takeName(&II);
+ return IC.replaceInstUsesWith(II, FMul);
+ }
+ break;
+ }
+ case Intrinsic::amdgcn_fma_legacy: {
+ Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ Value *Op2 = II.getArgOperand(2);
+
+ // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
+ // infinity, gives +0.0.
+ // TODO: Move to InstSimplify?
+ if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
+ match(Op1, PatternMatch::m_AnyZeroFP())) {
+ // It's tempting to just return Op2 here, but that would give the wrong
+ // result if Op2 was -0.0.
+ auto *Zero = ConstantFP::getNullValue(II.getType());
+ auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
+ FAdd->takeName(&II);
+ return IC.replaceInstUsesWith(II, FAdd);
+ }
+
+ // If we can prove we don't have one of the special cases then we can use a
+ // normal fma instead.
+ if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
+ II.setCalledOperand(Intrinsic::getDeclaration(
+ II.getModule(), Intrinsic::fma, II.getType()));
+ return &II;
+ }
+ break;
+ }
+ default: {
+ if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
+ AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
+ return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
+ }
+ }
+ }
+ return None;
+}
+
+/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
+///
+/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
+/// struct returns.
+static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
+ IntrinsicInst &II,
+ APInt DemandedElts,
+ int DMaskIdx = -1) {
+
+ auto *IIVTy = cast<FixedVectorType>(II.getType());
+ unsigned VWidth = IIVTy->getNumElements();
+ if (VWidth == 1)
+ return nullptr;
+
+ IRBuilderBase::InsertPointGuard Guard(IC.Builder);
+ IC.Builder.SetInsertPoint(&II);
+
+ // Assume the arguments are unchanged and later override them, if needed.
+ SmallVector<Value *, 16> Args(II.args());
+
+ if (DMaskIdx < 0) {
+ // Buffer case.
+
+ const unsigned ActiveBits = DemandedElts.getActiveBits();
+ const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
+
+ // Start assuming the prefix of elements is demanded, but possibly clear
+ // some other bits if there are trailing zeros (unused components at front)
+ // and update offset.
+ DemandedElts = (1 << ActiveBits) - 1;
+
+ if (UnusedComponentsAtFront > 0) {
+ static const unsigned InvalidOffsetIdx = 0xf;
+
+ unsigned OffsetIdx;
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::amdgcn_raw_buffer_load:
+ OffsetIdx = 1;
+ break;
+ case Intrinsic::amdgcn_s_buffer_load:
+ // If resulting type is vec3, there is no point in trimming the
+ // load with updated offset, as the vec3 would most likely be widened to
+ // vec4 anyway during lowering.
+ if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
+ OffsetIdx = InvalidOffsetIdx;
+ else
+ OffsetIdx = 1;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_load:
+ OffsetIdx = 2;
+ break;
+ default:
+ // TODO: handle tbuffer* intrinsics.
+ OffsetIdx = InvalidOffsetIdx;
+ break;
+ }
+
+ if (OffsetIdx != InvalidOffsetIdx) {
+ // Clear demanded bits and update the offset.
+ DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
+ auto *Offset = II.getArgOperand(OffsetIdx);
+ unsigned SingleComponentSizeInBits =
+ IC.getDataLayout().getTypeSizeInBits(II.getType()->getScalarType());
+ unsigned OffsetAdd =
+ UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
+ auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
+ Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
+ }
+ }
+ } else {
+ // Image case.
+
+ ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
+ unsigned DMaskVal = DMask->getZExtValue() & 0xf;
+
+ // Mask off values that are undefined because the dmask doesn't cover them
+ DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
+
+ unsigned NewDMaskVal = 0;
+ unsigned OrigLoadIdx = 0;
+ for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
+ const unsigned Bit = 1 << SrcIdx;
+ if (!!(DMaskVal & Bit)) {
+ if (!!DemandedElts[OrigLoadIdx])
+ NewDMaskVal |= Bit;
+ OrigLoadIdx++;
+ }
+ }
+
+ if (DMaskVal != NewDMaskVal)
+ Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
+ }
+
+ unsigned NewNumElts = DemandedElts.countPopulation();
+ if (!NewNumElts)
+ return UndefValue::get(II.getType());
+
+ if (NewNumElts >= VWidth && DemandedElts.isMask()) {
+ if (DMaskIdx >= 0)
+ II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
+ return nullptr;
+ }
+
+ // Validate function argument and return types, extracting overloaded types
+ // along the way.
+ SmallVector<Type *, 6> OverloadTys;
+ if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
+ return nullptr;
+
+ Module *M = II.getParent()->getParent()->getParent();
+ Type *EltTy = IIVTy->getElementType();
+ Type *NewTy =
+ (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
+
+ OverloadTys[0] = NewTy;
+ Function *NewIntrin =
+ Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
+
+ CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
+ NewCall->takeName(&II);
+ NewCall->copyMetadata(II);
+
+ if (NewNumElts == 1) {
+ return IC.Builder.CreateInsertElement(UndefValue::get(II.getType()),
+ NewCall,
+ DemandedElts.countTrailingZeros());
+ }
+
+ SmallVector<int, 8> EltMask;
+ unsigned NewLoadIdx = 0;
+ for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
+ if (!!DemandedElts[OrigLoadIdx])
+ EltMask.push_back(NewLoadIdx++);
+ else
+ EltMask.push_back(NewNumElts);
+ }
+
+ Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
+
+ return Shuffle;
+}
+
+Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const {
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::amdgcn_buffer_load:
+ case Intrinsic::amdgcn_buffer_load_format:
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_s_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_load_format:
+ case Intrinsic::amdgcn_struct_tbuffer_load:
+ case Intrinsic::amdgcn_tbuffer_load:
+ return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
+ default: {
+ if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
+ return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
+ }
+ break;
+ }
+ }
+ return None;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 6c13bc8599db..f2d62956e25b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -13,11 +13,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUInstrInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "AMDGPU.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Value.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 61b78acad3f4..8e7a6a7029c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -15,9 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
-#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
namespace llvm {
@@ -25,6 +23,7 @@ class GCNSubtarget;
class MachineFunction;
class MachineInstr;
class MachineInstrBuilder;
+class MachineMemOperand;
class AMDGPUInstrInfo {
public:
@@ -52,6 +51,28 @@ struct ImageDimIntrinsicInfo {
unsigned Intr;
unsigned BaseOpcode;
MIMGDim Dim;
+
+ uint8_t NumGradients;
+ uint8_t NumDmask;
+ uint8_t NumData;
+ uint8_t NumVAddrs;
+ uint8_t NumArgs;
+
+ uint8_t DMaskIndex;
+ uint8_t VAddrStart;
+ uint8_t GradientStart;
+ uint8_t CoordStart;
+ uint8_t LodIndex;
+ uint8_t MipIndex;
+ uint8_t VAddrEnd;
+ uint8_t RsrcIndex;
+ uint8_t SampIndex;
+ uint8_t UnormIndex;
+ uint8_t TexFailCtrlIndex;
+ uint8_t CachePolicyIndex;
+
+ uint8_t GradientTyArg;
+ uint8_t CoordTyArg;
};
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 2025c0fa5d21..bd577a6fb8c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -12,27 +12,17 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUInstructionSelector.h"
-#include "AMDGPUInstrInfo.h"
+#include "AMDGPU.h"
#include "AMDGPUGlobalISelUtils.h"
+#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterBankInfo.h"
-#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/IR/DiagnosticInfo.h"
#define DEBUG_TYPE "amdgpu-isel"
@@ -72,13 +62,15 @@ const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB,
CodeGenCoverage &CoverageInfo) {
MRI = &MF.getRegInfo();
+ Subtarget = &MF.getSubtarget<GCNSubtarget>();
InstructionSelector::setupMF(MF, KB, CoverageInfo);
}
bool AMDGPUInstructionSelector::isVCC(Register Reg,
const MachineRegisterInfo &MRI) const {
- if (Register::isPhysicalRegister(Reg))
- return Reg == TRI.getVCC();
+ // The verifier is oblivious to s1 being a valid value for wavesize registers.
+ if (Reg.isPhysical())
+ return false;
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
const TargetRegisterClass *RC =
@@ -170,24 +162,11 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
return false;
- // Don't constrain the source register to a class so the def instruction
- // handles it (unless it's undef).
- //
- // FIXME: This is a hack. When selecting the def, we neeed to know
- // specifically know that the result is VCCRegBank, and not just an SGPR
- // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
- if (Src.isUndef()) {
- const TargetRegisterClass *SrcRC =
- TRI.getConstrainedRegClassForOperand(Src, *MRI);
- if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
- return false;
- }
-
return true;
}
for (const MachineOperand &MO : I.operands()) {
- if (Register::isPhysicalRegister(MO.getReg()))
+ if (MO.getReg().isPhysical())
continue;
const TargetRegisterClass *RC =
@@ -286,50 +265,24 @@ static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
}
bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
- MachineOperand &Dst = I.getOperand(0);
- MachineOperand &Src0 = I.getOperand(1);
- MachineOperand &Src1 = I.getOperand(2);
- Register DstReg = Dst.getReg();
+ Register DstReg = I.getOperand(0).getReg();
unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
- if (DstRB->getID() == AMDGPU::VCCRegBankID) {
- const TargetRegisterClass *RC = TRI.getBoolRC();
- unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
- RC == &AMDGPU::SReg_64RegClass);
- I.setDesc(TII.get(InstOpc));
- // Dead implicit-def of scc
- I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
- true, // isImp
- false, // isKill
- true)); // isDead
-
- // FIXME: Hack to avoid turning the register bank into a register class.
- // The selector for G_ICMP relies on seeing the register bank for the result
- // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
- // be ambiguous whether it's a scalar or vector bool.
- if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg()))
- MRI->setRegClass(Src0.getReg(), RC);
- if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg()))
- MRI->setRegClass(Src1.getReg(), RC);
-
- return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
- }
-
- // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
- // the result?
- if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
- unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
- I.setDesc(TII.get(InstOpc));
- // Dead implicit-def of scc
- I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
- true, // isImp
- false, // isKill
- true)); // isDead
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
+ if (DstRB->getID() != AMDGPU::SGPRRegBankID &&
+ DstRB->getID() != AMDGPU::VCCRegBankID)
+ return false;
- return false;
+ bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID &&
+ STI.isWave64());
+ I.setDesc(TII.get(getLogicalBitOpcode(I.getOpcode(), Is64)));
+
+ // Dead implicit-def of scc
+ I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
+ true, // isImp
+ false, // isKill
+ true)); // isDead
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
@@ -365,7 +318,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
+ const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
MachineInstr *Add
@@ -403,7 +356,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
} else {
const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
Register CarryReg = MRI->createVirtualRegister(CarryRC);
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
.addDef(CarryReg)
.add(Lo1)
.add(Lo2)
@@ -446,10 +399,8 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
I.getOpcode() == AMDGPU::G_USUBE;
if (isVCC(Dst1Reg, *MRI)) {
- // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
- // carry out despite the _i32 name. These were renamed in VI to _U32.
- // FIXME: We should probably rename the opcodes here.
- unsigned NoCarryOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ unsigned NoCarryOpc =
+ IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
@@ -597,8 +548,6 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
return false;
- const unsigned SrcFlags = getUndefRegState(Src.isUndef());
-
// Note we could have mixed SGPR and VGPR destination banks for an SGPR
// source, and this relies on the fact that the same subregister indices are
// used for both.
@@ -606,7 +555,12 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
for (int I = 0, E = NumDst; I != E; ++I) {
MachineOperand &Dst = MI.getOperand(I);
BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
- .addReg(SrcReg, SrcFlags, SubRegs[I]);
+ .addReg(SrcReg, 0, SubRegs[I]);
+
+ // Make sure the subregister index is valid for the source register.
+ SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]);
+ if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
+ return false;
const TargetRegisterClass *DstRC =
TRI.getConstrainedRegClassForOperand(Dst, *MRI);
@@ -618,11 +572,6 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
return true;
}
-static bool isZero(Register Reg, const MachineRegisterInfo &MRI) {
- int64_t Val;
- return mi_match(Reg, MRI, m_ICst(Val)) && Val == 0;
-}
-
bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
MachineInstr &MI) const {
if (selectImpl(MI, *CoverageInfo))
@@ -647,6 +596,24 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *BB = MI.getParent();
+ auto ConstSrc1 =
+ getConstantVRegValWithLookThrough(Src1, *MRI, true, true, true);
+ if (ConstSrc1) {
+ auto ConstSrc0 =
+ getConstantVRegValWithLookThrough(Src0, *MRI, true, true, true);
+ if (ConstSrc0) {
+ const int64_t K0 = ConstSrc0->Value.getSExtValue();
+ const int64_t K1 = ConstSrc1->Value.getSExtValue();
+ uint32_t Lo16 = static_cast<uint32_t>(K0) & 0xffff;
+ uint32_t Hi16 = static_cast<uint32_t>(K1) & 0xffff;
+
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
+ .addImm(Lo16 | (Hi16 << 16));
+ MI.eraseFromParent();
+ return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
+ }
+ }
+
// TODO: This should probably be a combine somewhere
// (build_vector_trunc $src0, undef -> copy $src0
MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
@@ -659,7 +626,6 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
Register ShiftSrc0;
Register ShiftSrc1;
- int64_t ShiftAmt;
// With multiple uses of the shift, this will duplicate the shift and
// increase register pressure.
@@ -671,14 +637,11 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
// (build_vector_trunc $src0, $src1)
// => (S_PACK_LL_B32_B16 $src0, $src1)
- // FIXME: This is an inconvenient way to check a specific value
bool Shift0 = mi_match(
- Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_ICst(ShiftAmt)))) &&
- ShiftAmt == 16;
+ Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_SpecificICst(16))));
bool Shift1 = mi_match(
- Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_ICst(ShiftAmt)))) &&
- ShiftAmt == 16;
+ Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_SpecificICst(16))));
unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
if (Shift0 && Shift1) {
@@ -688,7 +651,7 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
} else if (Shift1) {
Opc = AMDGPU::S_PACK_LH_B32_B16;
MI.getOperand(2).setReg(ShiftSrc1);
- } else if (Shift0 && isZero(Src1, *MRI)) {
+ } else if (Shift0 && ConstSrc1 && ConstSrc1->Value == 0) {
// build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
.addReg(ShiftSrc0)
@@ -738,6 +701,10 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
if (Offset % 32 != 0 || InsSize % 32 != 0)
return false;
+ // Currently not handled by getSubRegFromChannel.
+ if (InsSize > 128)
+ return false;
+
unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32);
if (SubReg == AMDGPU::NoSubRegister)
return false;
@@ -821,6 +788,63 @@ bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const {
return true;
}
+// Writelane is special in that it can use SGPR and M0 (which would normally
+// count as using the constant bus twice - but in this case it is allowed since
+// the lane selector doesn't count as a use of the constant bus). However, it is
+// still required to abide by the 1 SGPR rule. Fix this up if we might have
+// multiple SGPRs.
+bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {
+ // With a constant bus limit of at least 2, there's no issue.
+ if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
+ return selectImpl(MI, *CoverageInfo);
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ Register VDst = MI.getOperand(0).getReg();
+ Register Val = MI.getOperand(2).getReg();
+ Register LaneSelect = MI.getOperand(3).getReg();
+ Register VDstIn = MI.getOperand(4).getReg();
+
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
+
+ Optional<ValueAndVReg> ConstSelect =
+ getConstantVRegValWithLookThrough(LaneSelect, *MRI, true, true);
+ if (ConstSelect) {
+ // The selector has to be an inline immediate, so we can use whatever for
+ // the other operands.
+ MIB.addReg(Val);
+ MIB.addImm(ConstSelect->Value.getSExtValue() &
+ maskTrailingOnes<uint64_t>(STI.getWavefrontSizeLog2()));
+ } else {
+ Optional<ValueAndVReg> ConstVal =
+ getConstantVRegValWithLookThrough(Val, *MRI, true, true);
+
+ // If the value written is an inline immediate, we can get away without a
+ // copy to m0.
+ if (ConstVal && AMDGPU::isInlinableLiteral32(ConstVal->Value.getSExtValue(),
+ STI.hasInv2PiInlineImm())) {
+ MIB.addImm(ConstVal->Value.getSExtValue());
+ MIB.addReg(LaneSelect);
+ } else {
+ MIB.addReg(Val);
+
+ // If the lane selector was originally in a VGPR and copied with
+ // readfirstlane, there's a hazard to read the same SGPR from the
+ // VALU. Constrain to a different SGPR to help avoid needing a nop later.
+ RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
+
+ BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(LaneSelect);
+ MIB.addReg(AMDGPU::M0);
+ }
+ }
+
+ MIB.addReg(VDstIn);
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
@@ -830,12 +854,14 @@ bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
LLT Ty = MRI->getType(Dst0);
unsigned Opc;
if (Ty == LLT::scalar(32))
- Opc = AMDGPU::V_DIV_SCALE_F32;
+ Opc = AMDGPU::V_DIV_SCALE_F32_e64;
else if (Ty == LLT::scalar(64))
- Opc = AMDGPU::V_DIV_SCALE_F64;
+ Opc = AMDGPU::V_DIV_SCALE_F64_e64;
else
return false;
+ // TODO: Match source modifiers.
+
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
@@ -847,9 +873,14 @@ bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)
.addDef(Dst1)
- .addUse(Src0)
- .addUse(Denom)
- .addUse(Numer);
+ .addImm(0) // $src0_modifiers
+ .addUse(Src0) // $src0
+ .addImm(0) // $src1_modifiers
+ .addUse(Denom) // $src1
+ .addImm(0) // $src2_modifiers
+ .addUse(Numer) // $src2
+ .addImm(0) // $clamp
+ .addImm(0); // $omod
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
@@ -887,12 +918,20 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM);
case Intrinsic::amdgcn_wwm:
return constrainCopyLikeIntrin(I, AMDGPU::WWM);
+ case Intrinsic::amdgcn_writelane:
+ return selectWritelane(I);
case Intrinsic::amdgcn_div_scale:
return selectDivScale(I);
case Intrinsic::amdgcn_icmp:
return selectIntrinsicIcmp(I);
case Intrinsic::amdgcn_ballot:
return selectBallot(I);
+ case Intrinsic::amdgcn_reloc_constant:
+ return selectRelocConstant(I);
+ case Intrinsic::amdgcn_groupstaticsize:
+ return selectGroupStaticSize(I);
+ case Intrinsic::returnaddress:
+ return selectReturnAddress(I);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -1055,7 +1094,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI, true);
if (Arg.hasValue()) {
- const int64_t Value = Arg.getValue().Value;
+ const int64_t Value = Arg.getValue().Value.getSExtValue();
if (Value == 0) {
unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0);
@@ -1073,6 +1112,96 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
return true;
}
+bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
+ Register DstReg = I.getOperand(0).getReg();
+ const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
+ const TargetRegisterClass *DstRC =
+ TRI.getRegClassForSizeOnBank(32, *DstBank, *MRI);
+ if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
+ return false;
+
+ const bool IsVALU = DstBank->getID() == AMDGPU::VGPRRegBankID;
+
+ Module *M = MF->getFunction().getParent();
+ const MDNode *Metadata = I.getOperand(2).getMetadata();
+ auto SymbolName = cast<MDString>(Metadata->getOperand(0))->getString();
+ auto RelocSymbol = cast<GlobalVariable>(
+ M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));
+
+ MachineBasicBlock *BB = I.getParent();
+ BuildMI(*BB, &I, I.getDebugLoc(),
+ TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
+ .addGlobalAddress(RelocSymbol, 0, SIInstrInfo::MO_ABS32_LO);
+
+ I.eraseFromParent();
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const {
+ Triple::OSType OS = MF->getTarget().getTargetTriple().getOS();
+
+ Register DstReg = I.getOperand(0).getReg();
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ?
+ AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+
+ MachineBasicBlock *MBB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+
+ auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg);
+
+ if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) {
+ const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ MIB.addImm(MFI->getLDSSize());
+ } else {
+ Module *M = MF->getFunction().getParent();
+ const GlobalValue *GV
+ = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_groupstaticsize);
+ MIB.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO);
+ }
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
+bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
+ MachineBasicBlock *MBB = I.getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+
+ MachineOperand &Dst = I.getOperand(0);
+ Register DstReg = Dst.getReg();
+ unsigned Depth = I.getOperand(2).getImm();
+
+ const TargetRegisterClass *RC
+ = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
+ if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) ||
+ !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
+ return false;
+
+ // Check for kernel and shader functions
+ if (Depth != 0 ||
+ MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
+ .addImm(0);
+ I.eraseFromParent();
+ return true;
+ }
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ // There is a call to @llvm.returnaddress in this function
+ MFI.setReturnAddressIsTaken(true);
+
+ // Get the return address reg and mark it as an implicit live-in
+ Register ReturnAddrReg = TRI.getReturnAddressReg(MF);
+ Register LiveIn = getFunctionLiveInPhysReg(MF, TII, ReturnAddrReg,
+ AMDGPU::SReg_64RegClass);
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
+ .addReg(LiveIn);
+ I.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
@@ -1088,28 +1217,6 @@ bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
return true;
}
-static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
- switch (MF.getFunction().getCallingConv()) {
- case CallingConv::AMDGPU_PS:
- return 1;
- case CallingConv::AMDGPU_VS:
- return 2;
- case CallingConv::AMDGPU_GS:
- return 3;
- case CallingConv::AMDGPU_HS:
- case CallingConv::AMDGPU_LS:
- case CallingConv::AMDGPU_ES:
- report_fatal_error("ds_ordered_count unsupported for this calling conv");
- case CallingConv::AMDGPU_CS:
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::C:
- case CallingConv::Fast:
- default:
- // Assume other calling conventions are various compute callable functions
- return 0;
- }
-}
-
bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
MachineInstr &MI, Intrinsic::ID IntrID) const {
MachineBasicBlock *MBB = MI.getParent();
@@ -1141,7 +1248,7 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
report_fatal_error("ds_ordered_count: bad index operand");
unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
- unsigned ShaderType = getDSShaderTypeValue(*MF);
+ unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(*MF);
unsigned Offset0 = OrderedCountIndex << 2;
unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
@@ -1235,8 +1342,8 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addImm(0);
} else {
- std::tie(BaseOffset, ImmOffset, OffsetDef)
- = AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset);
+ std::tie(BaseOffset, ImmOffset) =
+ AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset);
if (Readfirstlane) {
// We have the constant offset now, so put the readfirstlane back on the
@@ -1274,7 +1381,6 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
}
MIB.addImm(ImmOffset)
- .addImm(-1) // $gds
.cloneMemRefs(MI);
MI.eraseFromParent();
@@ -1291,7 +1397,7 @@ bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2));
// TODO: Should this try to look through readfirstlane like GWS?
- if (!isDSOffsetLegal(PtrBase, Offset, 16)) {
+ if (!isDSOffsetLegal(PtrBase, Offset)) {
PtrBase = MI.getOperand(2).getReg();
Offset = 0;
}
@@ -1302,12 +1408,29 @@ bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
.addReg(PtrBase);
- BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg())
+ if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
+ return false;
+
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg())
.addImm(Offset)
.addImm(IsGDS ? -1 : 0)
.cloneMemRefs(MI);
MI.eraseFromParent();
- return true;
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
+bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
+ if (TM.getOptLevel() > CodeGenOpt::None) {
+ unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second;
+ if (WGSize <= STI.getWavefrontSize()) {
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER));
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+ return selectImpl(MI, *CoverageInfo);
}
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE,
@@ -1355,36 +1478,29 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;
- const bool IsGFX10 = STI.getGeneration() >= AMDGPUSubtarget::GFX10;
+ const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
- const int VAddrIdx = getImageVAddrIdxBegin(BaseOpcode,
- MI.getNumExplicitDefs());
- int NumVAddr, NumGradients;
- std::tie(NumVAddr, NumGradients) = getImageNumVAddr(Intr, BaseOpcode);
+ const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;
Register VDataIn, VDataOut;
LLT VDataTy;
int NumVDataDwords = -1;
bool IsD16 = false;
- // XXX - Can we just get the second to last argument for ctrl?
- unsigned CtrlIdx; // Index of texfailctrl argument
bool Unorm;
- if (!BaseOpcode->Sampler) {
+ if (!BaseOpcode->Sampler)
Unorm = true;
- CtrlIdx = VAddrIdx + NumVAddr + 1;
- } else {
- Unorm = MI.getOperand(VAddrIdx + NumVAddr + 2).getImm() != 0;
- CtrlIdx = VAddrIdx + NumVAddr + 3;
- }
+ else
+ Unorm = MI.getOperand(ArgOffset + Intr->UnormIndex).getImm() != 0;
bool TFE;
bool LWE;
bool IsTexFail = false;
- if (!parseTexFail(MI.getOperand(CtrlIdx).getImm(), TFE, LWE, IsTexFail))
+ if (!parseTexFail(MI.getOperand(ArgOffset + Intr->TexFailCtrlIndex).getImm(),
+ TFE, LWE, IsTexFail))
return false;
- const int Flags = MI.getOperand(CtrlIdx + 2).getImm();
+ const int Flags = MI.getOperand(ArgOffset + Intr->NumArgs).getImm();
const bool IsA16 = (Flags & 1) != 0;
const bool IsG16 = (Flags & 2) != 0;
@@ -1415,11 +1531,19 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
NumVDataDwords = Is64Bit ? 2 : 1;
}
} else {
- const int DMaskIdx = 2; // Input/output + intrinsic ID.
-
- DMask = MI.getOperand(DMaskIdx).getImm();
+ DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm();
DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
+ // One memoperand is mandatory, except for getresinfo.
+ // FIXME: Check this in verifier.
+ if (!MI.memoperands_empty()) {
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ // Infer d16 from the memory size, as the register type will be mangled by
+ // unpacked subtargets, or by TFE.
+ IsD16 = ((8 * MMO->getSize()) / DMaskLanes) < 32;
+ }
+
if (BaseOpcode->Store) {
VDataIn = MI.getOperand(1).getReg();
VDataTy = MRI->getType(VDataIn);
@@ -1429,18 +1553,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
VDataTy = MRI->getType(VDataOut);
NumVDataDwords = DMaskLanes;
- // One memoperand is mandatory, except for getresinfo.
- // FIXME: Check this in verifier.
- if (!MI.memoperands_empty()) {
- const MachineMemOperand *MMO = *MI.memoperands_begin();
-
- // Infer d16 from the memory size, as the register type will be mangled by
- // unpacked subtargets, or by TFE.
- IsD16 = ((8 * MMO->getSize()) / DMaskLanes) < 32;
-
- if (IsD16 && !STI.hasUnpackedD16VMem())
- NumVDataDwords = (DMaskLanes + 1) / 2;
- }
+ if (IsD16 && !STI.hasUnpackedD16VMem())
+ NumVDataDwords = (DMaskLanes + 1) / 2;
}
}
@@ -1448,7 +1562,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
if (LZMappingInfo) {
// The legalizer replaced the register with an immediate 0 if we need to
// change the opcode.
- const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1);
+ const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->LodIndex);
if (Lod.isImm()) {
assert(Lod.getImm() == 0);
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
@@ -1457,7 +1571,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
// Optimize _mip away, when 'lod' is zero
if (MIPMappingInfo) {
- const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1);
+ const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->MipIndex);
if (Lod.isImm()) {
assert(Lod.getImm() == 0);
IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
@@ -1480,20 +1594,22 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
bool DLC = false;
if (BaseOpcode->Atomic) {
GLC = true; // TODO no-return optimization
- if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), nullptr, &SLC,
- IsGFX10 ? &DLC : nullptr))
+ if (!parseCachePolicy(
+ MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), nullptr,
+ &SLC, IsGFX10Plus ? &DLC : nullptr))
return false;
} else {
- if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), &GLC, &SLC,
- IsGFX10 ? &DLC : nullptr))
+ if (!parseCachePolicy(
+ MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), &GLC,
+ &SLC, IsGFX10Plus ? &DLC : nullptr))
return false;
}
int NumVAddrRegs = 0;
int NumVAddrDwords = 0;
- for (int I = 0; I < NumVAddr; ++I) {
+ for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) {
// Skip the $noregs and 0s inserted during legalization.
- MachineOperand &AddrOp = MI.getOperand(VAddrIdx + I);
+ MachineOperand &AddrOp = MI.getOperand(ArgOffset + I);
if (!AddrOp.isReg())
continue; // XXX - Break?
@@ -1518,7 +1634,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
++NumVDataDwords;
int Opcode = -1;
- if (IsGFX10) {
+ if (IsGFX10Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx10NSA
: AMDGPU::MIMGEncGfx10Default,
@@ -1556,36 +1672,36 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
if (VDataIn)
MIB.addReg(VDataIn); // vdata input
- for (int i = 0; i != NumVAddrRegs; ++i) {
- MachineOperand &SrcOp = MI.getOperand(VAddrIdx + i);
+ for (int I = 0; I != NumVAddrRegs; ++I) {
+ MachineOperand &SrcOp = MI.getOperand(ArgOffset + Intr->VAddrStart + I);
if (SrcOp.isReg()) {
assert(SrcOp.getReg() != 0);
MIB.addReg(SrcOp.getReg());
}
}
- MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr).getReg()); // rsrc
+ MIB.addReg(MI.getOperand(ArgOffset + Intr->RsrcIndex).getReg());
if (BaseOpcode->Sampler)
- MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr + 1).getReg()); // sampler
+ MIB.addReg(MI.getOperand(ArgOffset + Intr->SampIndex).getReg());
MIB.addImm(DMask); // dmask
- if (IsGFX10)
+ if (IsGFX10Plus)
MIB.addImm(DimInfo->Encoding);
MIB.addImm(Unorm);
- if (IsGFX10)
+ if (IsGFX10Plus)
MIB.addImm(DLC);
MIB.addImm(GLC);
MIB.addImm(SLC);
MIB.addImm(IsA16 && // a16 or r128
STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
- if (IsGFX10)
+ if (IsGFX10Plus)
MIB.addImm(IsA16 ? -1 : 0);
MIB.addImm(TFE); // tfe
MIB.addImm(LWE); // lwe
- if (!IsGFX10)
+ if (!IsGFX10Plus)
MIB.addImm(DimInfo->DA ? -1 : 0);
if (BaseOpcode->HasD16)
MIB.addImm(IsD16 ? -1 : 0);
@@ -1614,6 +1730,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectDSAppendConsume(I, true);
case Intrinsic::amdgcn_ds_consume:
return selectDSAppendConsume(I, false);
+ case Intrinsic::amdgcn_s_barrier:
+ return selectSBarrier(I);
+ case Intrinsic::amdgcn_global_atomic_fadd:
+ return selectGlobalAtomicFaddIntrinsic(I);
default: {
return selectImpl(I, *CoverageInfo);
}
@@ -1670,11 +1790,6 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
return Ret;
}
-bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
- initM0(I);
- return selectImpl(I, *CoverageInfo);
-}
-
static int sizeToSubRegIndex(unsigned Size) {
switch (Size) {
case 32:
@@ -1853,12 +1968,33 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
if (!DstTy.isScalar())
return false;
- if (I.getOpcode() == AMDGPU::G_ANYEXT)
- return selectCOPY(I);
-
// Artifact casts should never use vcc.
const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
+ // FIXME: This should probably be illegal and split earlier.
+ if (I.getOpcode() == AMDGPU::G_ANYEXT) {
+ if (DstSize <= 32)
+ return selectCOPY(I);
+
+ const TargetRegisterClass *SrcRC =
+ TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank, *MRI);
+ const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
+ const TargetRegisterClass *DstRC =
+ TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
+
+ Register UndefReg = MRI->createVirtualRegister(SrcRC);
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+ .addReg(SrcReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(UndefReg)
+ .addImm(AMDGPU::sub1);
+ I.eraseFromParent();
+
+ return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
+ RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
+ }
+
if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
// 64-bit should have been split up in RegBankSelect
@@ -1873,7 +2009,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
}
- const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
+ const unsigned BFE = Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
MachineInstr *ExtI =
BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
.addReg(SrcReg)
@@ -1944,33 +2080,36 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineOperand &ImmOp = I.getOperand(1);
+ Register DstReg = I.getOperand(0).getReg();
+ unsigned Size = MRI->getType(DstReg).getSizeInBits();
// The AMDGPU backend only supports Imm operands and not CImm or FPImm.
if (ImmOp.isFPImm()) {
const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
ImmOp.ChangeToImmediate(Imm.getZExtValue());
} else if (ImmOp.isCImm()) {
- ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
+ ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue());
+ } else {
+ llvm_unreachable("Not supported by g_constants");
}
- Register DstReg = I.getOperand(0).getReg();
- unsigned Size;
- bool IsSgpr;
- const RegisterBank *RB = MRI->getRegBankOrNull(I.getOperand(0).getReg());
- if (RB) {
- IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
- Size = MRI->getType(DstReg).getSizeInBits();
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ const bool IsSgpr = DstRB->getID() == AMDGPU::SGPRRegBankID;
+
+ unsigned Opcode;
+ if (DstRB->getID() == AMDGPU::VCCRegBankID) {
+ Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
} else {
- const TargetRegisterClass *RC = TRI.getRegClassForReg(*MRI, DstReg);
- IsSgpr = TRI.isSGPRClass(RC);
- Size = TRI.getRegSizeInBits(*RC);
- }
+ Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
- if (Size != 32 && Size != 64)
- return false;
+ // We should never produce s1 values on banks other than VCC. If the user of
+ // this already constrained the register, we may incorrectly think it's VCC
+ // if it wasn't originally.
+ if (Size == 1)
+ return false;
+ }
- unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
- if (Size == 32) {
+ if (Size != 64) {
I.setDesc(TII.get(Opcode));
I.addImplicitDefUseOperands(*MF);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -2148,6 +2287,10 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
getAddrModeInfo(*PtrMI, MRI, AddrInfo);
}
+bool AMDGPUInstructionSelector::isSGPR(Register Reg) const {
+ return RBI.getRegBank(Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
+}
+
bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
if (!MI.hasOneMemOperand())
return false;
@@ -2179,19 +2322,20 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
}
void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
-
const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());
unsigned AS = PtrTy.getAddressSpace();
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) &&
STI.ldsRequiresM0Init()) {
+ MachineBasicBlock *BB = I.getParent();
+
// If DS instructions require M0 initializtion, insert it before selecting.
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addImm(-1);
}
}
-bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
+ MachineInstr &I) const {
initM0(I);
return selectImpl(I, *CoverageInfo);
}
@@ -2242,6 +2386,7 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
MIB.addImm(0);
MIB.addImm(Offset);
+ MIB.addImm(1); // glc
MIB.addImm(0); // slc
MIB.cloneMemRefs(MI);
@@ -2276,8 +2421,7 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
CondPhysReg = AMDGPU::SCC;
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
- // FIXME: Hack for isSCC tests
- ConstrainRC = &AMDGPU::SGPR_32RegClass;
+ ConstrainRC = &AMDGPU::SReg_32RegClass;
} else {
// FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
// We sort of know that a VCC producer based on the register bank, that ands
@@ -2301,7 +2445,7 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
return true;
}
-bool AMDGPUInstructionSelector::selectG_FRAME_INDEX_GLOBAL_VALUE(
+bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
MachineInstr &I) const {
Register DstReg = I.getOperand(0).getReg();
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
@@ -2422,10 +2566,8 @@ computeIndirectRegIndex(MachineRegisterInfo &MRI,
unsigned EltSize) {
Register IdxBaseReg;
int Offset;
- MachineInstr *Unused;
- std::tie(IdxBaseReg, Offset, Unused)
- = AMDGPU::getBaseWithConstantOffset(MRI, IdxReg);
+ std::tie(IdxBaseReg, Offset) = AMDGPU::getBaseWithConstantOffset(MRI, IdxReg);
if (IdxBaseReg == AMDGPU::NoRegister) {
// This will happen if the index is a known constant. This should ordinarily
// be legalized out, but handle it as a register just in case.
@@ -2501,20 +2643,18 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
.addReg(IdxReg);
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
- .addReg(SrcReg, RegState::Undef, SubReg)
+ .addReg(SrcReg, 0, SubReg)
.addReg(SrcReg, RegState::Implicit);
MI.eraseFromParent();
return true;
}
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON))
- .addReg(IdxReg)
- .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE);
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), DstReg)
- .addReg(SrcReg, RegState::Undef, SubReg)
- .addReg(SrcReg, RegState::Implicit)
- .addReg(AMDGPU::M0, RegState::Implicit);
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF));
+ const MCInstrDesc &GPRIDXDesc =
+ TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC), true);
+ BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)
+ .addReg(SrcReg)
+ .addReg(IdxReg)
+ .addImm(SubReg);
MI.eraseFromParent();
return true;
@@ -2568,25 +2708,27 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
MachineBasicBlock *BB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
- if (IndexMode) {
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON))
- .addReg(IdxReg)
- .addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
- } else {
+ if (!IndexMode) {
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
.addReg(IdxReg);
- }
- const MCInstrDesc &RegWriteOp
- = TII.getIndirectRegWritePseudo(VecSize, ValSize,
- VecRB->getID() == AMDGPU::SGPRRegBankID);
- BuildMI(*BB, MI, DL, RegWriteOp, DstReg)
- .addReg(VecReg)
- .addReg(ValReg)
- .addImm(SubReg);
+ const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
+ VecSize, ValSize, VecRB->getID() == AMDGPU::SGPRRegBankID);
+ BuildMI(*BB, MI, DL, RegWriteOp, DstReg)
+ .addReg(VecReg)
+ .addReg(ValReg)
+ .addImm(SubReg);
+ MI.eraseFromParent();
+ return true;
+ }
- if (IndexMode)
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF));
+ const MCInstrDesc &GPRIDXDesc =
+ TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false);
+ BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)
+ .addReg(VecReg)
+ .addReg(ValReg)
+ .addReg(IdxReg)
+ .addImm(SubReg);
MI.eraseFromParent();
return true;
@@ -2731,7 +2873,7 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
}
} else if (Mask[0] == 1 && Mask[1] == 0) {
if (IsVALU) {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_ALIGNBIT_B32), DstReg)
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_ALIGNBIT_B32_e64), DstReg)
.addReg(SrcVec)
.addReg(SrcVec)
.addImm(16);
@@ -2751,6 +2893,130 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
return true;
}
+bool AMDGPUInstructionSelector::selectAMDGPU_BUFFER_ATOMIC_FADD(
+ MachineInstr &MI) const {
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ if (!MRI->use_nodbg_empty(MI.getOperand(0).getReg())) {
+ Function &F = MBB->getParent()->getFunction();
+ DiagnosticInfoUnsupported
+ NoFpRet(F, "return versions of fp atomics not supported",
+ MI.getDebugLoc(), DS_Error);
+ F.getContext().diagnose(NoFpRet);
+ return false;
+ }
+
+ // FIXME: This is only needed because tablegen requires number of dst operands
+ // in match and replace pattern to be the same. Otherwise patterns can be
+ // exported from SDag path.
+ MachineOperand &VDataIn = MI.getOperand(1);
+ MachineOperand &VIndex = MI.getOperand(3);
+ MachineOperand &VOffset = MI.getOperand(4);
+ MachineOperand &SOffset = MI.getOperand(5);
+ int16_t Offset = MI.getOperand(6).getImm();
+
+ bool HasVOffset = !isOperandImmEqual(VOffset, 0, *MRI);
+ bool HasVIndex = !isOperandImmEqual(VIndex, 0, *MRI);
+
+ unsigned Opcode;
+ if (HasVOffset) {
+ Opcode = HasVIndex ? AMDGPU::BUFFER_ATOMIC_ADD_F32_BOTHEN
+ : AMDGPU::BUFFER_ATOMIC_ADD_F32_OFFEN;
+ } else {
+ Opcode = HasVIndex ? AMDGPU::BUFFER_ATOMIC_ADD_F32_IDXEN
+ : AMDGPU::BUFFER_ATOMIC_ADD_F32_OFFSET;
+ }
+
+ if (MRI->getType(VDataIn.getReg()).isVector()) {
+ switch (Opcode) {
+ case AMDGPU::BUFFER_ATOMIC_ADD_F32_BOTHEN:
+ Opcode = AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_BOTHEN;
+ break;
+ case AMDGPU::BUFFER_ATOMIC_ADD_F32_OFFEN:
+ Opcode = AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_OFFEN;
+ break;
+ case AMDGPU::BUFFER_ATOMIC_ADD_F32_IDXEN:
+ Opcode = AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_IDXEN;
+ break;
+ case AMDGPU::BUFFER_ATOMIC_ADD_F32_OFFSET:
+ Opcode = AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_OFFSET;
+ break;
+ }
+ }
+
+ auto I = BuildMI(*MBB, MI, DL, TII.get(Opcode));
+ I.add(VDataIn);
+
+ if (Opcode == AMDGPU::BUFFER_ATOMIC_ADD_F32_BOTHEN ||
+ Opcode == AMDGPU::BUFFER_ATOMIC_PK_ADD_F16_BOTHEN) {
+ Register IdxReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ BuildMI(*MBB, &*I, DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
+ .addReg(VIndex.getReg())
+ .addImm(AMDGPU::sub0)
+ .addReg(VOffset.getReg())
+ .addImm(AMDGPU::sub1);
+
+ I.addReg(IdxReg);
+ } else if (HasVIndex) {
+ I.add(VIndex);
+ } else if (HasVOffset) {
+ I.add(VOffset);
+ }
+
+ I.add(MI.getOperand(2)); // rsrc
+ I.add(SOffset);
+ I.addImm(Offset);
+ renderExtractSLC(I, MI, 7);
+ I.cloneMemRefs(MI);
+
+ MI.eraseFromParent();
+
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
+ MachineInstr &MI) const{
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ if (!MRI->use_nodbg_empty(MI.getOperand(0).getReg())) {
+ Function &F = MBB->getParent()->getFunction();
+ DiagnosticInfoUnsupported
+ NoFpRet(F, "return versions of fp atomics not supported",
+ MI.getDebugLoc(), DS_Error);
+ F.getContext().diagnose(NoFpRet);
+ return false;
+ }
+
+ // FIXME: This is only needed because tablegen requires number of dst operands
+ // in match and replace pattern to be the same. Otherwise patterns can be
+ // exported from SDag path.
+ auto Addr = selectFlatOffsetImpl<true>(MI.getOperand(2));
+
+ Register Data = MI.getOperand(3).getReg();
+ const unsigned Opc = MRI->getType(Data).isVector() ?
+ AMDGPU::GLOBAL_ATOMIC_PK_ADD_F16 : AMDGPU::GLOBAL_ATOMIC_ADD_F32;
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc))
+ .addReg(Addr.first)
+ .addReg(Data)
+ .addImm(Addr.second)
+ .addImm(0) // SLC
+ .cloneMemRefs(MI);
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
+bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{
+ MI.setDesc(TII.get(MI.getOperand(1).getImm()));
+ MI.RemoveOperand(1);
+ MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
+ return true;
+}
+
bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);
@@ -2807,6 +3073,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_PTR_ADD(I);
case TargetOpcode::G_IMPLICIT_DEF:
return selectG_IMPLICIT_DEF(I);
+ case TargetOpcode::G_FREEZE:
+ return selectCOPY(I);
case TargetOpcode::G_INSERT:
return selectG_INSERT(I);
case TargetOpcode::G_INTRINSIC:
@@ -2818,6 +3086,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return true;
return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE:
case TargetOpcode::G_ATOMIC_CMPXCHG:
case TargetOpcode::G_ATOMICRMW_XCHG:
case TargetOpcode::G_ATOMICRMW_ADD:
@@ -2830,13 +3099,15 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ATOMICRMW_UMIN:
case TargetOpcode::G_ATOMICRMW_UMAX:
case TargetOpcode::G_ATOMICRMW_FADD:
- return selectG_LOAD_ATOMICRMW(I);
+ case AMDGPU::G_AMDGPU_ATOMIC_INC:
+ case AMDGPU::G_AMDGPU_ATOMIC_DEC:
+ case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
+ case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
+ return selectG_LOAD_STORE_ATOMICRMW(I);
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
return selectG_AMDGPU_ATOMIC_CMPXCHG(I);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
- case TargetOpcode::G_STORE:
- return selectG_STORE(I);
case TargetOpcode::G_TRUNC:
return selectG_TRUNC(I);
case TargetOpcode::G_SEXT:
@@ -2848,9 +3119,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_SZA_EXT(I);
case TargetOpcode::G_BRCOND:
return selectG_BRCOND(I);
- case TargetOpcode::G_FRAME_INDEX:
case TargetOpcode::G_GLOBAL_VALUE:
- return selectG_FRAME_INDEX_GLOBAL_VALUE(I);
+ return selectG_GLOBAL_VALUE(I);
case TargetOpcode::G_PTRMASK:
return selectG_PTRMASK(I);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
@@ -2859,10 +3129,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_INSERT_VECTOR_ELT(I);
case TargetOpcode::G_SHUFFLE_VECTOR:
return selectG_SHUFFLE_VECTOR(I);
- case AMDGPU::G_AMDGPU_ATOMIC_INC:
- case AMDGPU::G_AMDGPU_ATOMIC_DEC:
- initM0(I);
- return selectImpl(I, *CoverageInfo);
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: {
const AMDGPU::ImageDimIntrinsicInfo *Intr
@@ -2870,6 +3136,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
assert(Intr && "not an image intrinsic with image pseudo");
return selectImageIntrinsic(I, Intr);
}
+ case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
+ return selectBVHIntrinsic(I);
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
+ return selectAMDGPU_BUFFER_ATOMIC_FADD(I);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -2885,7 +3155,8 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
}
std::pair<Register, unsigned>
-AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root) const {
+AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root,
+ bool AllowAbs) const {
Register Src = Root.getReg();
Register OrigSrc = Src;
unsigned Mods = 0;
@@ -2897,7 +3168,7 @@ AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root) const {
MI = getDefIgnoringCopies(Src, *MRI);
}
- if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
+ if (AllowAbs && MI && MI->getOpcode() == AMDGPU::G_FABS) {
Src = MI->getOperand(1).getReg();
Mods |= SISrcMods::ABS;
}
@@ -2944,6 +3215,20 @@ AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Root); },
@@ -2965,6 +3250,18 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
Register Reg = Root.getReg();
const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI);
@@ -3019,7 +3316,7 @@ AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
Register Src;
unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
- if (!TM.Options.NoNaNsFPMath && !isKnownNeverNaN(Src, *MRI))
+ if (!isKnownNeverNaN(Src, *MRI))
return None;
return {{
@@ -3112,49 +3409,234 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
}
template <bool Signed>
-InstructionSelector::ComplexRendererFns
+std::pair<Register, int>
AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
MachineInstr *MI = Root.getParent();
- InstructionSelector::ComplexRendererFns Default = {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
- }};
+ auto Default = std::make_pair(Root.getReg(), 0);
if (!STI.hasFlatInstOffsets())
return Default;
- const MachineInstr *OpDef = MRI->getVRegDef(Root.getReg());
- if (!OpDef || OpDef->getOpcode() != AMDGPU::G_PTR_ADD)
- return Default;
-
- Optional<int64_t> Offset =
- getConstantVRegVal(OpDef->getOperand(2).getReg(), *MRI);
- if (!Offset.hasValue())
+ Register PtrBase;
+ int64_t ConstOffset;
+ std::tie(PtrBase, ConstOffset) =
+ getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
+ if (ConstOffset == 0)
return Default;
unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
- if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
+ if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, Signed))
return Default;
- Register BasePtr = OpDef->getOperand(1).getReg();
+ return std::make_pair(PtrBase, ConstOffset);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
+ auto PtrWithOffset = selectFlatOffsetImpl<false>(Root);
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); },
}};
}
InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
- return selectFlatOffsetImpl<false>(Root);
+AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
+ auto PtrWithOffset = selectFlatOffsetImpl<true>(Root);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); },
+ }};
}
+/// Match a zero extend from a 32-bit value to 64-bits.
+static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
+ Register ZExtSrc;
+ if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc))))
+ return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register();
+
+ // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0)
+ const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+ if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
+ return false;
+
+ if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
+ return Def->getOperand(1).getReg();
+ }
+
+ return Register();
+}
+
+// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
- return selectFlatOffsetImpl<true>(Root);
+AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
+ Register Addr = Root.getReg();
+ Register PtrBase;
+ int64_t ConstOffset;
+ int64_t ImmOffset = 0;
+
+ // Match the immediate offset first, which canonically is moved as low as
+ // possible.
+ std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
+
+ if (ConstOffset != 0) {
+ if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, true)) {
+ Addr = PtrBase;
+ ImmOffset = ConstOffset;
+ } else if (ConstOffset > 0) {
+ auto PtrBaseDef = getDefSrcRegIgnoringCopies(PtrBase, *MRI);
+ if (!PtrBaseDef)
+ return None;
+
+ if (isSGPR(PtrBaseDef->Reg)) {
+ // Offset is too large.
+ //
+ // saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset)
+ // + (large_offset & MaxOffset);
+ int64_t SplitImmOffset, RemainderOffset;
+ std::tie(SplitImmOffset, RemainderOffset)
+ = TII.splitFlatOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, true);
+
+ if (isUInt<32>(RemainderOffset)) {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ Register HighBits
+ = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
+ HighBits)
+ .addImm(RemainderOffset);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrBase); }, // saddr
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(HighBits); }, // voffset
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(SplitImmOffset); },
+ }};
+ }
+ }
+ }
+ }
+
+ auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
+ if (!AddrDef)
+ return None;
+
+ // Match the variable offset.
+ if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD) {
+ // FIXME: We should probably have folded COPY (G_IMPLICIT_DEF) earlier, and
+ // drop this.
+ if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
+ AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT)
+ return None;
+
+ // It's cheaper to materialize a single 32-bit zero for vaddr than the two
+ // moves required to copy a 64-bit SGPR to VGPR.
+ const Register SAddr = AddrDef->Reg;
+ if (!isSGPR(SAddr))
+ return None;
+
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
+ VOffset)
+ .addImm(0);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(SAddr); }, // saddr
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); }, // voffset
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+ }};
+ }
+
+ // Look through the SGPR->VGPR copy.
+ Register SAddr =
+ getSrcRegIgnoringCopies(AddrDef->MI->getOperand(1).getReg(), *MRI);
+ if (!SAddr || !isSGPR(SAddr))
+ return None;
+
+ Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
+
+ // It's possible voffset is an SGPR here, but the copy to VGPR will be
+ // inserted later.
+ Register VOffset = matchZeroExtendFromS32(*MRI, PtrBaseOffset);
+ if (!VOffset)
+ return None;
+
+ return {{[=](MachineInstrBuilder &MIB) { // saddr
+ MIB.addReg(SAddr);
+ },
+ [=](MachineInstrBuilder &MIB) { // voffset
+ MIB.addReg(VOffset);
+ },
+ [=](MachineInstrBuilder &MIB) { // offset
+ MIB.addImm(ImmOffset);
+ }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
+ Register Addr = Root.getReg();
+ Register PtrBase;
+ int64_t ConstOffset;
+ int64_t ImmOffset = 0;
+
+ // Match the immediate offset first, which canonically is moved as low as
+ // possible.
+ std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
+
+ if (ConstOffset != 0 &&
+ TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+ Addr = PtrBase;
+ ImmOffset = ConstOffset;
+ }
+
+ auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
+ if (!AddrDef)
+ return None;
+
+ if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
+ int FI = AddrDef->MI->getOperand(1).getIndex();
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+ }};
+ }
+
+ Register SAddr = AddrDef->Reg;
+
+ if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
+ Register LHS = AddrDef->MI->getOperand(1).getReg();
+ Register RHS = AddrDef->MI->getOperand(2).getReg();
+ auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
+ auto RHSDef = getDefSrcRegIgnoringCopies(RHS, *MRI);
+
+ if (LHSDef && RHSDef &&
+ LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
+ isSGPR(RHSDef->Reg)) {
+ int FI = LHSDef->MI->getOperand(1).getIndex();
+ MachineInstr &I = *Root.getParent();
+ MachineBasicBlock *BB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), SAddr)
+ .addFrameIndex(FI)
+ .addReg(RHSDef->Reg);
+ }
+ }
+
+ if (!isSGPR(SAddr))
+ return None;
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(SAddr); }, // saddr
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+ }};
}
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
@@ -3187,13 +3669,9 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
MIB.addReg(HighBits);
},
[=](MachineInstrBuilder &MIB) { // soffset
- const MachineMemOperand *MMO = *MI->memoperands_begin();
- const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
-
- if (isStackPtrRelative(PtrInfo))
- MIB.addReg(Info->getStackPtrOffsetReg());
- else
- MIB.addImm(0);
+ // Use constant zero for soffset and rely on eliminateFrameIndex
+ // to choose the appropriate frame register if need be.
+ MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(Offset & 4095);
@@ -3240,15 +3718,9 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
MIB.addReg(VAddr);
},
[=](MachineInstrBuilder &MIB) { // soffset
- // If we don't know this private access is a local stack object, it
- // needs to be relative to the entry point's scratch wave offset.
- // TODO: Should split large offsets that don't fit like above.
- // TODO: Don't use scratch wave offset just because the offset
- // didn't fit.
- if (!Info->isEntryFunction() && FI.hasValue())
- MIB.addReg(Info->getStackPtrOffsetReg());
- else
- MIB.addImm(0);
+ // Use constant zero for soffset and rely on eliminateFrameIndex
+ // to choose the appropriate frame register if need be.
+ MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(Offset);
@@ -3256,10 +3728,24 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
}
bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base,
- int64_t Offset,
- unsigned OffsetBits) const {
- if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
- (OffsetBits == 8 && !isUInt<8>(Offset)))
+ int64_t Offset) const {
+ if (!isUInt<16>(Offset))
+ return false;
+
+ if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
+ return true;
+
+ // On Southern Islands instruction with a negative base value and an offset
+ // don't seem to work.
+ return KnownBits->signBitIsZero(Base);
+}
+
+bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
+ int64_t Offset1,
+ unsigned Size) const {
+ if (Offset0 % Size != 0 || Offset1 % Size != 0)
+ return false;
+ if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
return false;
if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
@@ -3314,7 +3800,7 @@ AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const
getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
if (Offset) {
- if (isDSOffsetLegal(PtrBase, Offset, 16)) {
+ if (isDSOffsetLegal(PtrBase, Offset)) {
// (add n0, c0)
return std::make_pair(PtrBase, Offset);
}
@@ -3343,9 +3829,20 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const {
+ return selectDSReadWrite2(Root, 4);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(MachineOperand &Root) const {
+ return selectDSReadWrite2(Root, 8);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDSReadWrite2(MachineOperand &Root,
+ unsigned Size) const {
Register Reg;
unsigned Offset;
- std::tie(Reg, Offset) = selectDS64Bit4ByteAlignedImpl(Root);
+ std::tie(Reg, Offset) = selectDSReadWrite2Impl(Root, Size);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); },
@@ -3354,7 +3851,8 @@ AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const
}
std::pair<Register, unsigned>
-AMDGPUInstructionSelector::selectDS64Bit4ByteAlignedImpl(MachineOperand &Root) const {
+AMDGPUInstructionSelector::selectDSReadWrite2Impl(MachineOperand &Root,
+ unsigned Size) const {
const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
if (!RootDef)
return std::make_pair(Root.getReg(), 0);
@@ -3367,11 +3865,11 @@ AMDGPUInstructionSelector::selectDS64Bit4ByteAlignedImpl(MachineOperand &Root) c
getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
if (Offset) {
- int64_t DWordOffset0 = Offset / 4;
- int64_t DWordOffset1 = DWordOffset0 + 1;
- if (isDSOffsetLegal(PtrBase, DWordOffset1, 8)) {
+ int64_t OffsetValue0 = Offset;
+ int64_t OffsetValue1 = Offset + Size;
+ if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1, Size)) {
// (add n0, c0)
- return std::make_pair(PtrBase, DWordOffset0);
+ return std::make_pair(PtrBase, OffsetValue0 / Size);
}
} else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
// TODO
@@ -3391,7 +3889,7 @@ AMDGPUInstructionSelector::selectDS64Bit4ByteAlignedImpl(MachineOperand &Root) c
std::pair<Register, int64_t>
AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
Register Root, const MachineRegisterInfo &MRI) const {
- MachineInstr *RootI = MRI.getVRegDef(Root);
+ MachineInstr *RootI = getDefIgnoringCopies(Root, MRI);
if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
return {Root, 0};
@@ -3400,7 +3898,7 @@ AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
= getConstantVRegValWithLookThrough(RHS.getReg(), MRI, true);
if (!MaybeOffset)
return {Root, 0};
- return {RootI->getOperand(1).getReg(), MaybeOffset->Value};
+ return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()};
}
static void addZeroImm(MachineInstrBuilder &MIB) {
@@ -3582,6 +4080,11 @@ bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
MachineOperand &Root, Register &RSrcReg, Register &SOffset,
int64_t &Offset) const {
+
+ // FIXME: Pattern should not reach here.
+ if (STI.useFlatForGlobal())
+ return false;
+
MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());
if (shouldUseAddr64(AddrData))
return false;
@@ -3723,7 +4226,7 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
static Optional<uint64_t> getConstantZext32Val(Register Reg,
const MachineRegisterInfo &MRI) {
// getConstantVRegVal sexts any values, so see if that matters.
- Optional<int64_t> OffsetVal = getConstantVRegVal(Reg, MRI);
+ Optional<int64_t> OffsetVal = getConstantVRegSExtVal(Reg, MRI);
if (!OffsetVal || !isInt<32>(*OffsetVal))
return None;
return Lo_32(*OffsetVal);
@@ -3833,6 +4336,12 @@ void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
}
+void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ MIB.addFrameIndex((MI.getOperand(1).getIndex()));
+}
+
bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const {
return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 1fe80958917d..d70f18098cd7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -13,13 +13,11 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
-#include "AMDGPU.h"
-#include "AMDGPUArgumentUsageInfo.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
@@ -37,6 +35,7 @@ struct ImageDimIntrinsicInfo;
class AMDGPUInstrInfo;
class AMDGPURegisterBankInfo;
+class AMDGPUTargetMachine;
class GCNSubtarget;
class MachineInstr;
class MachineIRBuilder;
@@ -47,9 +46,10 @@ class SIInstrInfo;
class SIMachineFunctionInfo;
class SIRegisterInfo;
-class AMDGPUInstructionSelector : public InstructionSelector {
+class AMDGPUInstructionSelector final : public InstructionSelector {
private:
MachineRegisterInfo *MRI;
+ const GCNSubtarget *Subtarget;
public:
AMDGPUInstructionSelector(const GCNSubtarget &STI,
@@ -71,6 +71,8 @@ private:
GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
};
+ bool isSGPR(Register Reg) const;
+
bool isInstrUniform(const MachineInstr &MI) const;
bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const;
@@ -105,15 +107,20 @@ private:
bool selectG_INSERT(MachineInstr &I) const;
bool selectInterpP1F16(MachineInstr &MI) const;
+ bool selectWritelane(MachineInstr &MI) const;
bool selectDivScale(MachineInstr &MI) const;
bool selectIntrinsicIcmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
+ bool selectRelocConstant(MachineInstr &I) const;
+ bool selectGroupStaticSize(MachineInstr &I) const;
+ bool selectReturnAddress(MachineInstr &I) const;
bool selectG_INTRINSIC(MachineInstr &I) const;
bool selectEndCfIntrinsic(MachineInstr &MI) const;
bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
+ bool selectSBarrier(MachineInstr &MI) const;
bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
@@ -126,19 +133,21 @@ private:
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
void initM0(MachineInstr &I) const;
- bool selectG_LOAD_ATOMICRMW(MachineInstr &I) const;
+ bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const;
bool selectG_AMDGPU_ATOMIC_CMPXCHG(MachineInstr &I) const;
- bool selectG_STORE(MachineInstr &I) const;
bool selectG_SELECT(MachineInstr &I) const;
bool selectG_BRCOND(MachineInstr &I) const;
- bool selectG_FRAME_INDEX_GLOBAL_VALUE(MachineInstr &I) const;
+ bool selectG_GLOBAL_VALUE(MachineInstr &I) const;
bool selectG_PTRMASK(MachineInstr &I) const;
bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;
bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;
bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const;
+ bool selectAMDGPU_BUFFER_ATOMIC_FADD(MachineInstr &I) const;
+ bool selectGlobalAtomicFaddIntrinsic(MachineInstr &I) const;
+ bool selectBVHIntrinsic(MachineInstr &I) const;
- std::pair<Register, unsigned>
- selectVOP3ModsImpl(MachineOperand &Root) const;
+ std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
+ bool AllowAbs = true) const;
InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand &Root) const;
@@ -149,9 +158,13 @@ private:
InstructionSelector::ComplexRendererFns
selectVOP3Mods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectVOP3BMods0(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
selectVOP3OMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectVOP3BMods(MachineOperand &Root) const;
ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;
@@ -175,32 +188,45 @@ private:
selectSmrdSgpr(MachineOperand &Root) const;
template <bool Signed>
- InstructionSelector::ComplexRendererFns
+ std::pair<Register, int>
selectFlatOffsetImpl(MachineOperand &Root) const;
+
InstructionSelector::ComplexRendererFns
selectFlatOffset(MachineOperand &Root) const;
-
InstructionSelector::ComplexRendererFns
selectFlatOffsetSigned(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectGlobalSAddr(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectScratchSAddr(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffen(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffset(MachineOperand &Root) const;
- bool isDSOffsetLegal(Register Base, int64_t Offset,
- unsigned OffsetBits) const;
+ bool isDSOffsetLegal(Register Base, int64_t Offset) const;
+ bool isDSOffset2Legal(Register Base, int64_t Offset0, int64_t Offset1,
+ unsigned Size) const;
std::pair<Register, unsigned>
selectDS1Addr1OffsetImpl(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
- std::pair<Register, unsigned>
- selectDS64Bit4ByteAlignedImpl(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectDS64Bit4ByteAligned(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectDS128Bit8ByteAligned(MachineOperand &Root) const;
+
+ std::pair<Register, unsigned> selectDSReadWrite2Impl(MachineOperand &Root,
+ unsigned size) const;
+ InstructionSelector::ComplexRendererFns
+ selectDSReadWrite2(MachineOperand &Root, unsigned size) const;
+
std::pair<Register, int64_t>
getPtrBaseWithConstantOffset(Register Root,
const MachineRegisterInfo &MRI) const;
@@ -284,6 +310,8 @@ private:
int OpIdx) const;
void renderExtractSWZ(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ void renderFrameIndex(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
bool isInlineImmediate16(int64_t Imm) const;
bool isInlineImmediate32(int64_t Imm) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 5cb7ac320d2f..8ef9c99e8b35 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -83,9 +83,8 @@ def FalsePredicate : Predicate<"false">;
// Add a predicate to the list if does not already exist to deduplicate it.
class PredConcat<list<Predicate> lst, Predicate pred> {
list<Predicate> ret =
- !foldl([pred], lst, acc, cur,
- !listconcat(acc, !if(!eq(!cast<string>(cur),!cast<string>(pred)),
- [], [cur])));
+ !listconcat([pred], !filter(item, lst,
+ !ne(!cast<string>(item), !cast<string>(pred))));
}
class PredicateControl {
@@ -483,19 +482,20 @@ defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
+let MemoryVT = v2f16 in
+defm atomic_load_fadd_v2f16 : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>;
-
-def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
+def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
+ Aligned<8> {
let IsLoad = 1;
let IsNonExtLoad = 1;
- let MinAlignment = 8;
}
-def load_align16_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
+def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
+ Aligned<16> {
let IsLoad = 1;
let IsNonExtLoad = 1;
- let MinAlignment = 16;
}
def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
@@ -596,149 +596,6 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
(vt rc:$addr)
>;
-// BFI_INT patterns
-
-multiclass BFIPatterns <Instruction BFI_INT,
- Instruction LoadImm32,
- RegisterClass RC64> {
- // Definition from ISA doc:
- // (y & x) | (z & ~x)
- def : AMDGPUPat <
- (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
- (BFI_INT $x, $y, $z)
- >;
-
- // 64-bit version
- def : AMDGPUPat <
- (or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
- (REG_SEQUENCE RC64,
- (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub0)),
- (i32 (EXTRACT_SUBREG RC64:$y, sub0)),
- (i32 (EXTRACT_SUBREG RC64:$z, sub0))), sub0,
- (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub1)),
- (i32 (EXTRACT_SUBREG RC64:$y, sub1)),
- (i32 (EXTRACT_SUBREG RC64:$z, sub1))), sub1)
- >;
-
- // SHA-256 Ch function
- // z ^ (x & (y ^ z))
- def : AMDGPUPat <
- (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
- (BFI_INT $x, $y, $z)
- >;
-
- // 64-bit version
- def : AMDGPUPat <
- (xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
- (REG_SEQUENCE RC64,
- (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub0)),
- (i32 (EXTRACT_SUBREG RC64:$y, sub0)),
- (i32 (EXTRACT_SUBREG RC64:$z, sub0))), sub0,
- (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub1)),
- (i32 (EXTRACT_SUBREG RC64:$y, sub1)),
- (i32 (EXTRACT_SUBREG RC64:$z, sub1))), sub1)
- >;
-
- def : AMDGPUPat <
- (fcopysign f32:$src0, f32:$src1),
- (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
- >;
-
- def : AMDGPUPat <
- (f32 (fcopysign f32:$src0, f64:$src1)),
- (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
- (i32 (EXTRACT_SUBREG RC64:$src1, sub1)))
- >;
-
- def : AMDGPUPat <
- (f64 (fcopysign f64:$src0, f64:$src1)),
- (REG_SEQUENCE RC64,
- (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
- (BFI_INT (LoadImm32 (i32 0x7fffffff)),
- (i32 (EXTRACT_SUBREG RC64:$src0, sub1)),
- (i32 (EXTRACT_SUBREG RC64:$src1, sub1))), sub1)
- >;
-
- def : AMDGPUPat <
- (f64 (fcopysign f64:$src0, f32:$src1)),
- (REG_SEQUENCE RC64,
- (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
- (BFI_INT (LoadImm32 (i32 0x7fffffff)),
- (i32 (EXTRACT_SUBREG RC64:$src0, sub1)),
- $src1), sub1)
- >;
-}
-
-// SHA-256 Ma patterns
-
-// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
-multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass RC64> {
- def : AMDGPUPat <
- (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
- (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
- >;
-
- def : AMDGPUPat <
- (or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
- (REG_SEQUENCE RC64,
- (BFI_INT (XOR (i32 (EXTRACT_SUBREG RC64:$x, sub0)),
- (i32 (EXTRACT_SUBREG RC64:$y, sub0))),
- (i32 (EXTRACT_SUBREG RC64:$z, sub0)),
- (i32 (EXTRACT_SUBREG RC64:$y, sub0))), sub0,
- (BFI_INT (XOR (i32 (EXTRACT_SUBREG RC64:$x, sub1)),
- (i32 (EXTRACT_SUBREG RC64:$y, sub1))),
- (i32 (EXTRACT_SUBREG RC64:$z, sub1)),
- (i32 (EXTRACT_SUBREG RC64:$y, sub1))), sub1)
- >;
-}
-
-// Bitfield extract patterns
-
-def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
- return isMask_32(Imm);
-}]>;
-
-def IMMPopCount : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
- MVT::i32);
-}]>;
-
-multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
- def : AMDGPUPat <
- (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
- (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
- >;
-
- // x & ((1 << y) - 1)
- def : AMDGPUPat <
- (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
- (UBFE $src, (MOV (i32 0)), $width)
- >;
-
- // x & ~(-1 << y)
- def : AMDGPUPat <
- (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
- (UBFE $src, (MOV (i32 0)), $width)
- >;
-
- // x & (-1 >> (bitwidth - y))
- def : AMDGPUPat <
- (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
- (UBFE $src, (MOV (i32 0)), $width)
- >;
-
- // x << (bitwidth - y) >> (bitwidth - y)
- def : AMDGPUPat <
- (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
- (UBFE $src, (MOV (i32 0)), $width)
- >;
-
- def : AMDGPUPat <
- (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
- (SBFE $src, (MOV (i32 0)), $width)
- >;
-}
-
// fshr pattern
class FSHRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
(fshr i32:$src0, i32:$src1, i32:$src2),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
new file mode 100644
index 000000000000..8aea33cf289d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -0,0 +1,195 @@
+//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass does misc. AMDGPU optimizations on IR *just* before instruction
+/// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#define DEBUG_TYPE "amdgpu-late-codegenprepare"
+
+using namespace llvm;
+
+// Scalar load widening needs running after load-store-vectorizer as that pass
+// doesn't handle overlapping cases. In addition, this pass enhances the
+// widening to handle cases where scalar sub-dword loads are naturally aligned
+// only but not dword aligned.
+static cl::opt<bool>
+ WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
+ cl::desc("Widen sub-dword constant address space loads in "
+ "AMDGPULateCodeGenPrepare"),
+ cl::ReallyHidden, cl::init(true));
+
+namespace {
+
+class AMDGPULateCodeGenPrepare
+ : public FunctionPass,
+ public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
+ Module *Mod = nullptr;
+ const DataLayout *DL = nullptr;
+
+ AssumptionCache *AC = nullptr;
+ LegacyDivergenceAnalysis *DA = nullptr;
+
+public:
+ static char ID;
+
+ AMDGPULateCodeGenPrepare() : FunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "AMDGPU IR late optimizations";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+ bool visitInstruction(Instruction &) { return false; }
+
+ // Check if the specified value is at least DWORD aligned.
+ bool isDWORDAligned(const Value *V) const {
+ KnownBits Known = computeKnownBits(V, *DL, 0, AC);
+ return Known.countMinTrailingZeros() >= 2;
+ }
+
+ bool canWidenScalarExtLoad(LoadInst &LI) const;
+ bool visitLoadInst(LoadInst &LI);
+};
+
+} // end anonymous namespace
+
+bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) {
+ Mod = &M;
+ DL = &Mod->getDataLayout();
+ return false;
+}
+
+bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
+
+ bool Changed = false;
+ for (auto &BB : F)
+ for (auto BI = BB.begin(), BE = BB.end(); BI != BE; /*EMPTY*/) {
+ Instruction *I = &*BI++;
+ Changed |= visit(*I);
+ }
+
+ return Changed;
+}
+
+bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
+ unsigned AS = LI.getPointerAddressSpace();
+ // Skip non-constant address space.
+ if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
+ AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
+ return false;
+ // Skip non-simple loads.
+ if (!LI.isSimple())
+ return false;
+ auto *Ty = LI.getType();
+ // Skip aggregate types.
+ if (Ty->isAggregateType())
+ return false;
+ unsigned TySize = DL->getTypeStoreSize(Ty);
+ // Only handle sub-DWORD loads.
+ if (TySize >= 4)
+ return false;
+ // That load must be at least naturally aligned.
+ if (LI.getAlign() < DL->getABITypeAlign(Ty))
+ return false;
+ // It should be uniform, i.e. a scalar load.
+ return DA->isUniform(&LI);
+}
+
+bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
+ if (!WidenLoads)
+ return false;
+
+ // Skip if that load is already aligned on DWORD at least as it's handled in
+ // SDAG.
+ if (LI.getAlign() >= 4)
+ return false;
+
+ if (!canWidenScalarExtLoad(LI))
+ return false;
+
+ int64_t Offset = 0;
+ auto *Base =
+ GetPointerBaseWithConstantOffset(LI.getPointerOperand(), Offset, *DL);
+ // If that base is not DWORD aligned, it's not safe to perform the following
+ // transforms.
+ if (!isDWORDAligned(Base))
+ return false;
+
+ int64_t Adjust = Offset & 0x3;
+ if (Adjust == 0) {
+ // With a zero adjust, the original alignment could be promoted with a
+ // better one.
+ LI.setAlignment(Align(4));
+ return true;
+ }
+
+ IRBuilder<> IRB(&LI);
+ IRB.SetCurrentDebugLocation(LI.getDebugLoc());
+
+ unsigned AS = LI.getPointerAddressSpace();
+ unsigned LdBits = DL->getTypeStoreSize(LI.getType()) * 8;
+ auto IntNTy = Type::getIntNTy(LI.getContext(), LdBits);
+
+ PointerType *Int32PtrTy = Type::getInt32PtrTy(LI.getContext(), AS);
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(LI.getContext(), AS);
+ auto *NewPtr = IRB.CreateBitCast(
+ IRB.CreateConstGEP1_64(IRB.CreateBitCast(Base, Int8PtrTy),
+ Offset - Adjust),
+ Int32PtrTy);
+ LoadInst *NewLd = IRB.CreateAlignedLoad(NewPtr, Align(4));
+ NewLd->copyMetadata(LI);
+ NewLd->setMetadata(LLVMContext::MD_range, nullptr);
+
+ unsigned ShAmt = Adjust * 8;
+ auto *NewVal = IRB.CreateBitCast(
+ IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt), IntNTy), LI.getType());
+ LI.replaceAllUsesWith(NewVal);
+ RecursivelyDeleteTriviallyDeadInstructions(&LI);
+
+ return true;
+}
+
+INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
+ "AMDGPU IR late optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_END(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
+ "AMDGPU IR late optimizations", false, false)
+
+char AMDGPULateCodeGenPrepare::ID = 0;
+
+FunctionPass *llvm::createAMDGPULateCodeGenPreparePass() {
+ return new AMDGPULateCodeGenPrepare();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 2976794b49c3..9f359c232981 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -15,18 +15,15 @@
#include "AMDGPU.h"
#include "AMDGPUGlobalISelUtils.h"
+#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#define DEBUG_TYPE "amdgpu-legalinfo"
@@ -60,16 +57,30 @@ static LLT getPow2ScalarType(LLT Ty) {
return LLT::scalar(Pow2Bits);
}
+/// \returs true if this is an odd sized vector which should widen by adding an
+/// additional element. This is mostly to handle <3 x s16> -> <4 x s16>. This
+/// excludes s1 vectors, which should always be scalarized.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
- return Ty.isVector() &&
- Ty.getNumElements() % 2 != 0 &&
- Ty.getElementType().getSizeInBits() < 32 &&
+ if (!Ty.isVector())
+ return false;
+
+ const LLT EltTy = Ty.getElementType();
+ const unsigned EltSize = EltTy.getSizeInBits();
+ return Ty.getNumElements() % 2 != 0 &&
+ EltSize > 1 && EltSize < 32 &&
Ty.getSizeInBits() % 32 != 0;
};
}
+static LegalityPredicate sizeIsMultipleOf32(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ return Ty.getSizeInBits() % 32 == 0;
+ };
+}
+
static LegalityPredicate isWideVec16(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
@@ -115,20 +126,32 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
};
}
+static LLT getBitcastRegisterType(const LLT Ty) {
+ const unsigned Size = Ty.getSizeInBits();
+
+ LLT CoercedTy;
+ if (Size <= 32) {
+ // <2 x s8> -> s16
+ // <4 x s8> -> s32
+ return LLT::scalar(Size);
+ }
+
+ return LLT::scalarOrVector(Size / 32, 32);
+}
+
static LegalizeMutation bitcastToRegisterType(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
- unsigned Size = Ty.getSizeInBits();
-
- LLT CoercedTy;
- if (Size <= 32) {
- // <2 x s8> -> s16
- // <4 x s8> -> s32
- CoercedTy = LLT::scalar(Size);
- } else
- CoercedTy = LLT::scalarOrVector(Size / 32, 32);
+ return std::make_pair(TypeIdx, getBitcastRegisterType(Ty));
+ };
+}
- return std::make_pair(TypeIdx, CoercedTy);
+static LegalizeMutation bitcastToVectorElement32(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ unsigned Size = Ty.getSizeInBits();
+ assert(Size % 32 == 0);
+ return std::make_pair(TypeIdx, LLT::scalarOrVector(Size / 32, 32));
};
}
@@ -213,7 +236,7 @@ static unsigned maxSizeForAddrSpace(const GCNSubtarget &ST, unsigned AS,
switch (AS) {
case AMDGPUAS::PRIVATE_ADDRESS:
// FIXME: Private element size.
- return 32;
+ return ST.enableFlatScratch() ? 128 : 32;
case AMDGPUAS::LOCAL_ADDRESS:
return ST.useDS128() ? 128 : 64;
case AMDGPUAS::GLOBAL_ADDRESS:
@@ -243,7 +266,7 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
unsigned RegSize = Ty.getSizeInBits();
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
- unsigned Align = Query.MMODescrs[0].AlignInBits;
+ unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
unsigned AS = Query.Types[1].getAddressSpace();
// All of these need to be custom lowered to cast the pointer operand.
@@ -286,9 +309,10 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
assert(RegSize >= MemSize);
- if (Align < MemSize) {
+ if (AlignBits < MemSize) {
const SITargetLowering *TLI = ST.getTargetLowering();
- if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8))
+ if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS,
+ Align(AlignBits / 8)))
return false;
}
@@ -308,7 +332,12 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
return false;
if (!Ty.isVector())
return true;
- unsigned EltSize = Ty.getElementType().getSizeInBits();
+
+ LLT EltTy = Ty.getElementType();
+ if (EltTy.isPointer())
+ return true;
+
+ unsigned EltSize = EltTy.getSizeInBits();
return EltSize != 32 && EltSize != 64;
}
@@ -319,6 +348,66 @@ static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query,
!loadStoreBitcastWorkaround(Ty);
}
+/// Return true if a load or store of the type should be lowered with a bitcast
+/// to a different type.
+static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
+ const unsigned MemSizeInBits) {
+ const unsigned Size = Ty.getSizeInBits();
+ if (Size != MemSizeInBits)
+ return Size <= 32 && Ty.isVector();
+
+ if (loadStoreBitcastWorkaround(Ty) && isRegisterType(Ty))
+ return true;
+ return Ty.isVector() && (Size <= 32 || isRegisterSize(Size)) &&
+ !isRegisterVectorElementType(Ty.getElementType());
+}
+
+/// Return true if we should legalize a load by widening an odd sized memory
+/// access up to the alignment. Note this case when the memory access itself
+/// changes, not the size of the result register.
+static bool shouldWidenLoad(const GCNSubtarget &ST, unsigned SizeInBits,
+ unsigned AlignInBits, unsigned AddrSpace,
+ unsigned Opcode) {
+ // We don't want to widen cases that are naturally legal.
+ if (isPowerOf2_32(SizeInBits))
+ return false;
+
+ // If we have 96-bit memory operations, we shouldn't touch them. Note we may
+ // end up widening these for a scalar load during RegBankSelect, since there
+ // aren't 96-bit scalar loads.
+ if (SizeInBits == 96 && ST.hasDwordx3LoadStores())
+ return false;
+
+ if (SizeInBits >= maxSizeForAddrSpace(ST, AddrSpace, Opcode))
+ return false;
+
+ // A load is known dereferenceable up to the alignment, so it's legal to widen
+ // to it.
+ //
+ // TODO: Could check dereferenceable for less aligned cases.
+ unsigned RoundedSize = NextPowerOf2(SizeInBits);
+ if (AlignInBits < RoundedSize)
+ return false;
+
+ // Do not widen if it would introduce a slow unaligned load.
+ const SITargetLowering *TLI = ST.getTargetLowering();
+ bool Fast = false;
+ return TLI->allowsMisalignedMemoryAccessesImpl(
+ RoundedSize, AddrSpace, Align(AlignInBits / 8),
+ MachineMemOperand::MOLoad, &Fast) &&
+ Fast;
+}
+
+static bool shouldWidenLoad(const GCNSubtarget &ST, const LegalityQuery &Query,
+ unsigned Opcode) {
+ if (Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic)
+ return false;
+
+ return shouldWidenLoad(ST, Query.MMODescrs[0].SizeInBits,
+ Query.MMODescrs[0].AlignInBits,
+ Query.Types[1].getAddressSpace(), Opcode);
+}
+
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const GCNTargetMachine &TM)
: ST(ST_) {
@@ -329,6 +418,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
};
const LLT S1 = LLT::scalar(1);
+ const LLT S8 = LLT::scalar(8);
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
@@ -337,6 +427,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT S512 = LLT::scalar(512);
const LLT MaxScalar = LLT::scalar(MaxRegisterSize);
+ const LLT V2S8 = LLT::vector(2, 8);
const LLT V2S16 = LLT::vector(2, 16);
const LLT V4S16 = LLT::vector(4, 16);
@@ -410,48 +501,103 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
// elements for v3s16
getActionDefinitionsBuilder(G_PHI)
- .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
+ .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256})
.legalFor(AllS32Vectors)
.legalFor(AllS64Vectors)
.legalFor(AddrSpaces64)
.legalFor(AddrSpaces32)
- .clampScalar(0, S32, S256)
+ .legalIf(isPointer(0))
+ .clampScalar(0, S16, S256)
.widenScalarToNextPow2(0, 32)
.clampMaxNumElements(0, S32, 16)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .legalIf(isPointer(0));
+ .scalarize(0);
- if (ST.hasVOP3PInsts()) {
+ if (ST.hasVOP3PInsts() && ST.hasAddNoCarry() && ST.hasIntClamp()) {
+ // Full set of gfx9 features.
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16, V2S16})
.clampScalar(0, S16, S32)
.clampMaxNumElements(0, S16, 2)
.scalarize(0)
.widenScalarToNextPow2(0, 32);
+
+ getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT})
+ .legalFor({S32, S16, V2S16}) // Clamp modifier
+ .minScalarOrElt(0, S16)
+ .clampMaxNumElements(0, S16, 2)
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 32)
+ .lower();
} else if (ST.has16BitInsts()) {
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16})
.clampScalar(0, S16, S32)
.scalarize(0)
- .widenScalarToNextPow2(0, 32);
+ .widenScalarToNextPow2(0, 32); // FIXME: min should be 16
+
+ // Technically the saturating operations require clamp bit support, but this
+ // was introduced at the same time as 16-bit operations.
+ getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
+ .legalFor({S32, S16}) // Clamp modifier
+ .minScalar(0, S16)
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 16)
+ .lower();
+
+ // We're just lowering this, but it helps get a better result to try to
+ // coerce to the desired type first.
+ getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT})
+ .minScalar(0, S16)
+ .scalarize(0)
+ .lower();
} else {
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32})
.clampScalar(0, S32, S32)
.scalarize(0);
+
+ if (ST.hasIntClamp()) {
+ getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
+ .legalFor({S32}) // Clamp modifier.
+ .scalarize(0)
+ .minScalarOrElt(0, S32)
+ .lower();
+ } else {
+ // Clamp bit support was added in VI, along with 16-bit operations.
+ getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
+ .minScalar(0, S32)
+ .scalarize(0)
+ .lower();
+ }
+
+ // FIXME: DAG expansion gets better results. The widening uses the smaller
+ // range values and goes for the min/max lowering directly.
+ getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT})
+ .minScalar(0, S32)
+ .scalarize(0)
+ .lower();
}
- // FIXME: Not really legal. Placeholder for custom lowering.
getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_SREM, G_UREM})
.customFor({S32, S64})
.clampScalar(0, S32, S64)
.widenScalarToNextPow2(0, 32)
.scalarize(0);
- getActionDefinitionsBuilder({G_UMULH, G_SMULH})
- .legalFor({S32})
- .clampScalar(0, S32, S32)
- .scalarize(0);
+ auto &Mulh = getActionDefinitionsBuilder({G_UMULH, G_SMULH})
+ .legalFor({S32})
+ .maxScalarOrElt(0, S32);
+
+ if (ST.hasVOP3PInsts()) {
+ Mulh
+ .clampMaxNumElements(0, S8, 2)
+ .lowerFor({V2S8});
+ }
+
+ Mulh
+ .scalarize(0)
+ .lower();
// Report legal for any types we can handle anywhere. For the cases only legal
// on the SALU, RegBankSelect will be able to re-legalize.
@@ -479,9 +625,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_CONSTANT)
.legalFor({S1, S32, S64, S16, GlobalPtr,
LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
+ .legalIf(isPointer(0))
.clampScalar(0, S32, S64)
- .widenScalarToNextPow2(0)
- .legalIf(isPointer(0));
+ .widenScalarToNextPow2(0);
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({S32, S64, S16})
@@ -505,8 +651,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor({{PrivatePtr, S32}});
getActionDefinitionsBuilder(G_GLOBAL_VALUE)
- .unsupportedFor({PrivatePtr})
- .custom();
+ .customIf(typeIsNot(0, PrivatePtr));
+
setAction({G_BLOCK_ADDR, CodePtr}, Legal);
auto &FPOpActions = getActionDefinitionsBuilder(
@@ -599,7 +745,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_FPEXT)
.legalFor({{S64, S32}, {S32, S16}})
- .lowerFor({{S64, S16}}) // FIXME: Implement
+ .narrowScalarFor({{S64, S16}}, changeTo(0, S32))
.scalarize(0);
getActionDefinitionsBuilder(G_FSUB)
@@ -621,6 +767,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
FMad.scalarize(0)
.lower();
+ auto &FRem = getActionDefinitionsBuilder(G_FREM);
+ if (ST.has16BitInsts()) {
+ FRem.customFor({S16, S32, S64});
+ } else {
+ FRem.minScalar(0, S32)
+ .customFor({S32, S64});
+ }
+ FRem.scalarize(0);
+
// TODO: Do we need to clamp maximum bitwidth?
getActionDefinitionsBuilder(G_TRUNC)
.legalIf(isScalar(0))
@@ -648,12 +803,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.has16BitInsts())
IToFP.legalFor({{S16, S16}});
IToFP.clampScalar(1, S32, S64)
+ .minScalar(0, S32)
.scalarize(0)
.widenScalarToNextPow2(1);
auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
.legalFor({{S32, S32}, {S32, S64}, {S32, S16}})
- .customFor({{S64, S64}});
+ .customFor({{S64, S64}})
+ .narrowScalarFor({{S64, S16}}, changeTo(0, S32));
if (ST.has16BitInsts())
FPToI.legalFor({{S16, S16}});
else
@@ -663,7 +820,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.lower();
- getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
+ // Lower roundeven into G_FRINT
+ getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
.scalarize(0)
.lower();
@@ -685,16 +843,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
}
- // FIXME: Clamp offset operand.
getActionDefinitionsBuilder(G_PTR_ADD)
- .legalIf(isPointer(0))
- .scalarize(0);
+ .legalIf(all(isPointer(0), sameSize(0, 1)))
+ .scalarize(0)
+ .scalarSameSizeAs(1, 0);
getActionDefinitionsBuilder(G_PTRMASK)
- .legalIf(typeInSet(1, {S64, S32}))
- .minScalar(1, S32)
- .maxScalarIf(sizeIs(0, 32), 1, S32)
- .maxScalarIf(sizeIs(0, 64), 1, S64)
+ .legalIf(all(sameSize(0, 1), typeInSet(1, {S64, S32})))
+ .scalarSameSizeAs(1, 0)
.scalarize(0);
auto &CmpBuilder =
@@ -746,6 +902,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
ExpOps.clampScalar(0, MinScalarFPTy, S32)
.scalarize(0);
+ getActionDefinitionsBuilder(G_FPOWI)
+ .clampScalar(0, MinScalarFPTy, S32)
+ .lower();
+
// The 64-bit versions produce 32-bit results, but only on the SALU.
getActionDefinitionsBuilder(G_CTPOP)
.legalFor({{S32, S32}, {S32, S64}})
@@ -870,10 +1030,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Split vector extloads.
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
- unsigned Align = Query.MMODescrs[0].AlignInBits;
+ unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
if (MemSize < DstTy.getSizeInBits())
- MemSize = std::max(MemSize, Align);
+ MemSize = std::max(MemSize, AlignBits);
if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
return true;
@@ -895,35 +1055,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return true;
}
- if (Align < MemSize) {
+ if (AlignBits < MemSize) {
const SITargetLowering *TLI = ST.getTargetLowering();
- return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8);
+ return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS,
+ Align(AlignBits / 8));
}
return false;
};
- const auto shouldWidenLoadResult = [=](const LegalityQuery &Query,
- unsigned Opc) -> bool {
- unsigned Size = Query.Types[0].getSizeInBits();
- if (isPowerOf2_32(Size))
- return false;
-
- if (Size == 96 && ST.hasDwordx3LoadStores())
- return false;
-
- unsigned AddrSpace = Query.Types[1].getAddressSpace();
- if (Size >= maxSizeForAddrSpace(ST, AddrSpace, Opc))
- return false;
-
- unsigned Align = Query.MMODescrs[0].AlignInBits;
- unsigned RoundedSize = NextPowerOf2(Size);
- return (Align >= RoundedSize);
- };
-
- unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
- unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
- unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
+ unsigned GlobalAlign32 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 32;
+ unsigned GlobalAlign16 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 16;
+ unsigned GlobalAlign8 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 8;
// TODO: Refine based on subtargets which support unaligned access or 128-bit
// LDS
@@ -981,31 +1124,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// 16-bit vector parts.
Actions.bitcastIf(
[=](const LegalityQuery &Query) -> bool {
- const LLT Ty = Query.Types[0];
- const unsigned Size = Ty.getSizeInBits();
-
- if (Size != Query.MMODescrs[0].SizeInBits)
- return Size <= 32 && Ty.isVector();
-
- if (loadStoreBitcastWorkaround(Ty) && isRegisterType(Ty))
- return true;
- return Ty.isVector() && (Size <= 32 || isRegisterSize(Size)) &&
- !isRegisterVectorElementType(Ty.getElementType());
+ return shouldBitcastLoadStoreType(ST, Query.Types[0],
+ Query.MMODescrs[0].SizeInBits);
}, bitcastToRegisterType(0));
+ if (!IsStore) {
+ // Widen suitably aligned loads by loading extra bytes. The standard
+ // legalization actions can't properly express widening memory operands.
+ Actions.customIf([=](const LegalityQuery &Query) -> bool {
+ return shouldWidenLoad(ST, Query, G_LOAD);
+ });
+ }
+
+ // FIXME: load/store narrowing should be moved to lower action
Actions
- .customIf(typeIs(1, Constant32Ptr))
- // Widen suitably aligned loads by loading extra elements.
- .moreElementsIf([=](const LegalityQuery &Query) {
- const LLT Ty = Query.Types[0];
- return Op == G_LOAD && Ty.isVector() &&
- shouldWidenLoadResult(Query, Op);
- }, moreElementsToNextPow2(0))
- .widenScalarIf([=](const LegalityQuery &Query) {
- const LLT Ty = Query.Types[0];
- return Op == G_LOAD && !Ty.isVector() &&
- shouldWidenLoadResult(Query, Op);
- }, widenScalarOrEltToNextPow2(0))
.narrowScalarIf(
[=](const LegalityQuery &Query) -> bool {
return !Query.Types[0].isVector() &&
@@ -1111,15 +1243,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// May need relegalization for the scalars.
return std::make_pair(0, EltTy);
})
- .minScalar(0, S32);
+ .lowerIfMemSizeNotPow2()
+ .minScalar(0, S32);
if (IsStore)
Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32));
- // TODO: Need a bitcast lower option?
Actions
.widenScalarToNextPow2(0)
- .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0));
+ .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
+ .lower();
}
auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
@@ -1147,14 +1280,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
G_ATOMICRMW_UMIN})
.legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
- {S64, GlobalPtr}, {S64, LocalPtr}});
+ {S64, GlobalPtr}, {S64, LocalPtr},
+ {S32, RegionPtr}, {S64, RegionPtr}});
if (ST.hasFlatAddressSpace()) {
Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
}
if (ST.hasLDSFPAtomics()) {
getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
- .legalFor({{S32, LocalPtr}});
+ .legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
}
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
@@ -1207,6 +1341,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Shifts.clampScalar(1, S32, S32);
Shifts.clampScalar(0, S16, S64);
Shifts.widenScalarToNextPow2(0, 16);
+
+ getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT})
+ .minScalar(0, S16)
+ .scalarize(0)
+ .lower();
} else {
// Make sure we legalize the shift amount type first, as the general
// expansion for the shifted type will produce much worse code if it hasn't
@@ -1214,6 +1353,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Shifts.clampScalar(1, S32, S32);
Shifts.clampScalar(0, S32, S64);
Shifts.widenScalarToNextPow2(0, 32);
+
+ getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT})
+ .minScalar(0, S32)
+ .scalarize(0)
+ .lower();
}
Shifts.scalarize(0);
@@ -1227,15 +1371,38 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT EltTy = Query.Types[EltTypeIdx];
const LLT VecTy = Query.Types[VecTypeIdx];
const LLT IdxTy = Query.Types[IdxTypeIdx];
- return (EltTy.getSizeInBits() == 16 ||
- EltTy.getSizeInBits() % 32 == 0) &&
- VecTy.getSizeInBits() % 32 == 0 &&
- VecTy.getSizeInBits() <= MaxRegisterSize &&
- IdxTy.getSizeInBits() == 32;
+ const unsigned EltSize = EltTy.getSizeInBits();
+ return (EltSize == 32 || EltSize == 64) &&
+ VecTy.getSizeInBits() % 32 == 0 &&
+ VecTy.getSizeInBits() <= MaxRegisterSize &&
+ IdxTy.getSizeInBits() == 32;
+ })
+ .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltNarrowerThan(VecTypeIdx, 32)),
+ bitcastToVectorElement32(VecTypeIdx))
+ //.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
+ .bitcastIf(
+ all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltWiderThan(VecTypeIdx, 64)),
+ [=](const LegalityQuery &Query) {
+ // For > 64-bit element types, try to turn this into a 64-bit
+ // element vector since we may be able to do better indexing
+ // if this is scalar. If not, fall back to 32.
+ const LLT EltTy = Query.Types[EltTypeIdx];
+ const LLT VecTy = Query.Types[VecTypeIdx];
+ const unsigned DstEltSize = EltTy.getSizeInBits();
+ const unsigned VecSize = VecTy.getSizeInBits();
+
+ const unsigned TargetEltSize = DstEltSize % 64 == 0 ? 64 : 32;
+ return std::make_pair(
+ VecTypeIdx, LLT::vector(VecSize / TargetEltSize, TargetEltSize));
})
.clampScalar(EltTypeIdx, S32, S64)
.clampScalar(VecTypeIdx, S32, S64)
- .clampScalar(IdxTypeIdx, S32, S32);
+ .clampScalar(IdxTypeIdx, S32, S32)
+ .clampMaxNumElements(VecTypeIdx, S32, 32)
+ // TODO: Clamp elements for 64-bit vectors?
+ // It should only be necessary with variable indexes.
+ // As a last resort, lower to the stack
+ .lower();
}
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -1306,7 +1473,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// FIXME: Clamp maximum size
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
- .legalIf(isRegisterType(0));
+ .legalIf(all(isRegisterType(0), isRegisterType(1)))
+ .clampMaxNumElements(0, S32, 32)
+ .clampMaxNumElements(1, S16, 2) // TODO: Make 4?
+ .clampMaxNumElements(0, S16, 64);
// TODO: Don't fully scalarize v2s16 pieces? Or combine out thosse
// pre-legalize.
@@ -1335,6 +1505,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
};
auto &Builder = getActionDefinitionsBuilder(Op)
+ .legalIf(all(isRegisterType(0), isRegisterType(1)))
.lowerFor({{S16, V2S16}})
.lowerIf([=](const LegalityQuery &Query) {
const LLT BigTy = Query.Types[BigTyIdx];
@@ -1390,19 +1561,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}
return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
})
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &BigTy = Query.Types[BigTyIdx];
- const LLT &LitTy = Query.Types[LitTyIdx];
-
- if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
- return false;
- if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
- return false;
-
- return BigTy.getSizeInBits() % 16 == 0 &&
- LitTy.getSizeInBits() % 16 == 0 &&
- BigTy.getSizeInBits() <= MaxRegisterSize;
- })
// Any vectors left are the wrong size. Scalarize them.
.scalarize(0)
.scalarize(1);
@@ -1427,12 +1585,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
SextInReg.lowerFor({{S32}, {S64}});
}
- // FIXME: Placeholder rule. Really depends on whether the clamp modifier is
- // available, and is selectively legal for s16, s32, v2s16.
- getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT, G_UADDSAT, G_USUBSAT})
- .scalarize(0)
- .clampScalar(0, S16, S32);
-
SextInReg
.scalarize(0)
.clampScalar(0, S32, S64)
@@ -1446,11 +1598,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_READCYCLECOUNTER)
.legalFor({S64});
+ getActionDefinitionsBuilder(G_FENCE)
+ .alwaysLegal();
+
getActionDefinitionsBuilder({
// TODO: Verify V_BFI_B32 is generated from expanded bit ops
G_FCOPYSIGN,
G_ATOMIC_CMPXCHG_WITH_SUCCESS,
+ G_ATOMICRMW_NAND,
+ G_ATOMICRMW_FSUB,
G_READ_REGISTER,
G_WRITE_REGISTER,
@@ -1474,7 +1631,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &B = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *B.getMRI();
- GISelChangeObserver &Observer = Helper.Observer;
switch (MI.getOpcode()) {
case TargetOpcode::G_ADDRSPACE_CAST:
@@ -1483,6 +1639,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeFrint(MI, MRI, B);
case TargetOpcode::G_FCEIL:
return legalizeFceil(MI, MRI, B);
+ case TargetOpcode::G_FREM:
+ return legalizeFrem(MI, MRI, B);
case TargetOpcode::G_INTRINSIC_TRUNC:
return legalizeIntrinsicTrunc(MI, MRI, B);
case TargetOpcode::G_SITOFP:
@@ -1510,7 +1668,7 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_GLOBAL_VALUE:
return legalizeGlobalValue(MI, MRI, B);
case TargetOpcode::G_LOAD:
- return legalizeLoad(MI, MRI, B, Observer);
+ return legalizeLoad(Helper, MI);
case TargetOpcode::G_FMAD:
return legalizeFMad(MI, MRI, B);
case TargetOpcode::G_FDIV:
@@ -1580,8 +1738,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
Register QueuePtr = MRI.createGenericVirtualRegister(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- if (!loadInputValue(QueuePtr, B, &MFI->getArgInfo().QueuePtr))
+ if (!loadInputValue(QueuePtr, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
return Register();
// Offset into amd_queue_t for group_segment_aperture_base_hi /
@@ -1623,8 +1780,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
const AMDGPUTargetMachine &TM
= static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
+ if (TM.isNoopAddrSpaceCast(SrcAS, DestAS)) {
MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST));
return true;
}
@@ -1721,6 +1877,7 @@ bool AMDGPULegalizerInfo::legalizeFrint(
auto Cond = B.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
B.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
+ MI.eraseFromParent();
return true;
}
@@ -1752,7 +1909,24 @@ bool AMDGPULegalizerInfo::legalizeFceil(
return true;
}
-static MachineInstrBuilder extractF64Exponent(unsigned Hi,
+bool AMDGPULegalizerInfo::legalizeFrem(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0Reg = MI.getOperand(1).getReg();
+ Register Src1Reg = MI.getOperand(2).getReg();
+ auto Flags = MI.getFlags();
+ LLT Ty = MRI.getType(DstReg);
+
+ auto Div = B.buildFDiv(Ty, Src0Reg, Src1Reg, Flags);
+ auto Trunc = B.buildIntrinsicTrunc(Ty, Div, Flags);
+ auto Neg = B.buildFNeg(Ty, Trunc, Flags);
+ B.buildFMA(DstReg, Neg, Src1Reg, Src0Reg, Flags);
+ MI.eraseFromParent();
+ return true;
+}
+
+static MachineInstrBuilder extractF64Exponent(Register Hi,
MachineIRBuilder &B) {
const unsigned FractBits = 52;
const unsigned ExpBits = 11;
@@ -1762,6 +1936,7 @@ static MachineInstrBuilder extractF64Exponent(unsigned Hi,
auto Const1 = B.buildConstant(S32, ExpBits);
auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
+ .addUse(Hi)
.addUse(Const0.getReg(0))
.addUse(Const1.getReg(0));
@@ -1809,6 +1984,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
+ MI.eraseFromParent();
return true;
}
@@ -1907,10 +2083,11 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
// FIXME: Artifact combiner probably should have replaced the truncated
// constant before this, so we shouldn't need
// getConstantVRegValWithLookThrough.
- Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
- MI.getOperand(2).getReg(), MRI);
- if (!IdxVal) // Dynamic case will be selected to register indexing.
+ Optional<ValueAndVReg> MaybeIdxVal =
+ getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeIdxVal) // Dynamic case will be selected to register indexing.
return true;
+ const int64_t IdxVal = MaybeIdxVal->Value.getSExtValue();
Register Dst = MI.getOperand(0).getReg();
Register Vec = MI.getOperand(1).getReg();
@@ -1919,8 +2096,8 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
LLT EltTy = VecTy.getElementType();
assert(EltTy == MRI.getType(Dst));
- if (IdxVal->Value < VecTy.getNumElements())
- B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits());
+ if (IdxVal < VecTy.getNumElements())
+ B.buildExtract(Dst, Vec, IdxVal * EltTy.getSizeInBits());
else
B.buildUndef(Dst);
@@ -1938,11 +2115,12 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
// FIXME: Artifact combiner probably should have replaced the truncated
// constant before this, so we shouldn't need
// getConstantVRegValWithLookThrough.
- Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
- MI.getOperand(3).getReg(), MRI);
- if (!IdxVal) // Dynamic case will be selected to register indexing.
+ Optional<ValueAndVReg> MaybeIdxVal =
+ getConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
+ if (!MaybeIdxVal) // Dynamic case will be selected to register indexing.
return true;
+ int64_t IdxVal = MaybeIdxVal->Value.getSExtValue();
Register Dst = MI.getOperand(0).getReg();
Register Vec = MI.getOperand(1).getReg();
Register Ins = MI.getOperand(2).getReg();
@@ -1951,8 +2129,8 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
LLT EltTy = VecTy.getElementType();
assert(EltTy == MRI.getType(Ins));
- if (IdxVal->Value < VecTy.getNumElements())
- B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits());
+ if (IdxVal < VecTy.getNumElements())
+ B.buildInsert(Dst, Vec, Ins, IdxVal * EltTy.getSizeInBits());
else
B.buildUndef(Dst);
@@ -2043,7 +2221,9 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
// variable, but since the encoding of $symbol starts 4 bytes after the start
// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
// small. This requires us to add 4 to the global variable offset in order to
- // compute the correct address.
+ // compute the correct address. Similarly for the s_addc_u32 instruction, the
+ // encoding of $symbol starts 12 bytes after the start of the s_add_u32
+ // instruction.
LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
@@ -2057,7 +2237,7 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
if (GAFlags == SIInstrInfo::MO_NONE)
MIB.addImm(0);
else
- MIB.addGlobalAddress(GV, Offset + 4, GAFlags + 1);
+ MIB.addGlobalAddress(GV, Offset + 12, GAFlags + 1);
B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
@@ -2078,7 +2258,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
- if (!MFI->isEntryFunction()) {
+ if (!MFI->isModuleEntryFunction()) {
const Function &Fn = MF.getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
Fn, "local memory global used by non-kernel function", MI.getDebugLoc(),
@@ -2104,6 +2284,25 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
return true; // Leave in place;
}
+ if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) {
+ Type *Ty = GV->getValueType();
+ // HIP uses an unsized array `extern __shared__ T s[]` or similar
+ // zero-sized type in other languages to declare the dynamic shared
+ // memory which size is not known at the compile time. They will be
+ // allocated by the runtime and placed directly after the static
+ // allocated ones. They all share the same offset.
+ if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) {
+ // Adjust alignment for that dynamic shared memory array.
+ MFI->setDynLDSAlign(B.getDataLayout(), *cast<GlobalVariable>(GV));
+ LLT S32 = LLT::scalar(32);
+ auto Sz =
+ B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
+ B.buildIntToPtr(DstReg, Sz);
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+
B.buildConstant(
DstReg,
MFI->allocateLDSGlobal(B.getDataLayout(), *cast<GlobalVariable>(GV)));
@@ -2154,15 +2353,90 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
return true;
}
-bool AMDGPULegalizerInfo::legalizeLoad(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, GISelChangeObserver &Observer) const {
- LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
- auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg());
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(Cast.getReg(0));
- Observer.changedInstr(MI);
- return true;
+static LLT widenToNextPowerOf2(LLT Ty) {
+ if (Ty.isVector())
+ return Ty.changeNumElements(PowerOf2Ceil(Ty.getNumElements()));
+ return LLT::scalar(PowerOf2Ceil(Ty.getSizeInBits()));
+}
+
+bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+ GISelChangeObserver &Observer = Helper.Observer;
+
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AddrSpace = PtrTy.getAddressSpace();
+
+ if (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+ auto Cast = B.buildAddrSpaceCast(ConstPtr, PtrReg);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Cast.getReg(0));
+ Observer.changedInstr(MI);
+ return true;
+ }
+
+ Register ValReg = MI.getOperand(0).getReg();
+ LLT ValTy = MRI.getType(ValReg);
+
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ const unsigned ValSize = ValTy.getSizeInBits();
+ const unsigned MemSize = 8 * MMO->getSize();
+ const Align MemAlign = MMO->getAlign();
+ const unsigned AlignInBits = 8 * MemAlign.value();
+
+ // Widen non-power-of-2 loads to the alignment if needed
+ if (shouldWidenLoad(ST, MemSize, AlignInBits, AddrSpace, MI.getOpcode())) {
+ const unsigned WideMemSize = PowerOf2Ceil(MemSize);
+
+ // This was already the correct extending load result type, so just adjust
+ // the memory type.
+ if (WideMemSize == ValSize) {
+ MachineFunction &MF = B.getMF();
+
+ MachineMemOperand *WideMMO =
+ MF.getMachineMemOperand(MMO, 0, WideMemSize / 8);
+ Observer.changingInstr(MI);
+ MI.setMemRefs(MF, {WideMMO});
+ Observer.changedInstr(MI);
+ return true;
+ }
+
+ // Don't bother handling edge case that should probably never be produced.
+ if (ValSize > WideMemSize)
+ return false;
+
+ LLT WideTy = widenToNextPowerOf2(ValTy);
+
+ Register WideLoad;
+ if (!WideTy.isVector()) {
+ WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);
+ B.buildTrunc(ValReg, WideLoad).getReg(0);
+ } else {
+ // Extract the subvector.
+
+ if (isRegisterType(ValTy)) {
+ // If this a case where G_EXTRACT is legal, use it.
+ // (e.g. <3 x s32> -> <4 x s32>)
+ WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);
+ B.buildExtract(ValReg, WideLoad, 0);
+ } else {
+ // For cases where the widened type isn't a nice register value, unmerge
+ // from a widened register (e.g. <3 x s16> -> <4 x s16>)
+ B.setInsertPt(B.getMBB(), ++B.getInsertPt());
+ WideLoad = Helper.widenWithUnmerge(WideTy, ValReg);
+ B.setInsertPt(B.getMBB(), MI.getIterator());
+ B.buildLoadFromOffset(WideLoad, PtrReg, *MMO, 0);
+ }
+ }
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return false;
}
bool AMDGPULegalizerInfo::legalizeFMad(
@@ -2194,8 +2468,7 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
Register CmpVal = MI.getOperand(2).getReg();
Register NewVal = MI.getOperand(3).getReg();
- assert(SITargetLowering::isFlatGlobalAddrSpace(
- MRI.getType(PtrReg).getAddressSpace()) &&
+ assert(AMDGPU::isFlatGlobalAddrSpace(MRI.getType(PtrReg).getAddressSpace()) &&
"this should not have been custom lowered");
LLT ValTy = MRI.getType(CmpVal);
@@ -2364,23 +2637,42 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
return true;
}
+// Check that this is a G_XOR x, -1
+static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) {
+ if (MI.getOpcode() != TargetOpcode::G_XOR)
+ return false;
+ auto ConstVal = getConstantVRegSExtVal(MI.getOperand(2).getReg(), MRI);
+ return ConstVal && *ConstVal == -1;
+}
+
// Return the use branch instruction, otherwise null if the usage is invalid.
-static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineInstr *&Br,
- MachineBasicBlock *&UncondBrTarget) {
+static MachineInstr *
+verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineInstr *&Br,
+ MachineBasicBlock *&UncondBrTarget, bool &Negated) {
Register CondDef = MI.getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(CondDef))
return nullptr;
MachineBasicBlock *Parent = MI.getParent();
- MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
- if (UseMI.getParent() != Parent ||
- UseMI.getOpcode() != AMDGPU::G_BRCOND)
+ MachineInstr *UseMI = &*MRI.use_instr_nodbg_begin(CondDef);
+
+ if (isNot(MRI, *UseMI)) {
+ Register NegatedCond = UseMI->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(NegatedCond))
+ return nullptr;
+
+ // We're deleting the def of this value, so we need to remove it.
+ UseMI->eraseFromParent();
+
+ UseMI = &*MRI.use_instr_nodbg_begin(NegatedCond);
+ Negated = true;
+ }
+
+ if (UseMI->getParent() != Parent || UseMI->getOpcode() != AMDGPU::G_BRCOND)
return nullptr;
// Make sure the cond br is followed by a G_BR, or is the last instruction.
- MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());
+ MachineBasicBlock::iterator Next = std::next(UseMI->getIterator());
if (Next == Parent->end()) {
MachineFunction::iterator NextMBB = std::next(Parent->getIterator());
if (NextMBB == Parent->getParent()->end()) // Illegal intrinsic use.
@@ -2393,84 +2685,19 @@ static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
UncondBrTarget = Br->getOperand(0).getMBB();
}
- return &UseMI;
-}
-
-Register AMDGPULegalizerInfo::insertLiveInCopy(MachineIRBuilder &B,
- MachineRegisterInfo &MRI,
- Register LiveIn,
- Register PhyReg) const {
- assert(PhyReg.isPhysical() && "Physical register expected");
-
- // Insert the live-in copy, if required, by defining destination virtual
- // register.
- // FIXME: It seems EmitLiveInCopies isn't called anywhere?
- if (!MRI.getVRegDef(LiveIn)) {
- // FIXME: Should have scoped insert pt
- MachineBasicBlock &OrigInsBB = B.getMBB();
- auto OrigInsPt = B.getInsertPt();
-
- MachineBasicBlock &EntryMBB = B.getMF().front();
- EntryMBB.addLiveIn(PhyReg);
- B.setInsertPt(EntryMBB, EntryMBB.begin());
- B.buildCopy(LiveIn, PhyReg);
-
- B.setInsertPt(OrigInsBB, OrigInsPt);
- }
-
- return LiveIn;
-}
-
-Register AMDGPULegalizerInfo::getLiveInRegister(MachineIRBuilder &B,
- MachineRegisterInfo &MRI,
- Register PhyReg, LLT Ty,
- bool InsertLiveInCopy) const {
- assert(PhyReg.isPhysical() && "Physical register expected");
-
- // Get or create virtual live-in regester
- Register LiveIn = MRI.getLiveInVirtReg(PhyReg);
- if (!LiveIn) {
- LiveIn = MRI.createGenericVirtualRegister(Ty);
- MRI.addLiveIn(PhyReg, LiveIn);
- }
-
- // When the actual true copy required is from virtual register to physical
- // register (to be inserted later), live-in copy insertion from physical
- // to register virtual register is not required
- if (!InsertLiveInCopy)
- return LiveIn;
-
- return insertLiveInCopy(B, MRI, LiveIn, PhyReg);
-}
-
-const ArgDescriptor *AMDGPULegalizerInfo::getArgDescriptor(
- MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
- const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
- const ArgDescriptor *Arg;
- const TargetRegisterClass *RC;
- LLT ArgTy;
- std::tie(Arg, RC, ArgTy) = MFI->getPreloadedValue(ArgType);
- if (!Arg) {
- LLVM_DEBUG(dbgs() << "Required arg register missing\n");
- return nullptr;
- }
- return Arg;
+ return UseMI;
}
bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
- const ArgDescriptor *Arg) const {
- if (!Arg->isRegister() || !Arg->getRegister().isValid())
- return false; // TODO: Handle these
-
- Register SrcReg = Arg->getRegister();
- assert(SrcReg.isPhysical() && "Physical register expected");
+ const ArgDescriptor *Arg,
+ const TargetRegisterClass *ArgRC,
+ LLT ArgTy) const {
+ MCRegister SrcReg = Arg->getRegister();
+ assert(Register::isPhysicalRegister(SrcReg) && "Physical register expected");
assert(DstReg.isVirtual() && "Virtual register expected");
- MachineRegisterInfo &MRI = *B.getMRI();
-
- LLT Ty = MRI.getType(DstReg);
- Register LiveIn = getLiveInRegister(B, MRI, SrcReg, Ty);
-
+ Register LiveIn = getFunctionLiveInPhysReg(B.getMF(), B.getTII(), SrcReg, *ArgRC,
+ ArgTy);
if (Arg->isMasked()) {
// TODO: Should we try to emit this once in the entry block?
const LLT S32 = LLT::scalar(32);
@@ -2492,15 +2719,24 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
return true;
}
-bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
- MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+bool AMDGPULegalizerInfo::loadInputValue(
+ Register DstReg, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ const ArgDescriptor *Arg;
+ const TargetRegisterClass *ArgRC;
+ LLT ArgTy;
+ std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
- const ArgDescriptor *Arg = getArgDescriptor(B, ArgType);
- if (!Arg)
- return false;
+ if (!Arg->isRegister() || !Arg->getRegister().isValid())
+ return false; // TODO: Handle these
+ return loadInputValue(DstReg, B, Arg, ArgRC, ArgTy);
+}
- if (!loadInputValue(MI.getOperand(0).getReg(), B, Arg))
+bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
+ if (!loadInputValue(MI.getOperand(0).getReg(), B, ArgType))
return false;
MI.eraseFromParent();
@@ -2516,9 +2752,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
LLT S32 = LLT::scalar(32);
LLT S64 = LLT::scalar(64);
- if (legalizeFastUnsafeFDIV(MI, MRI, B))
- return true;
-
if (DstTy == S16)
return legalizeFDIV16(MI, MRI, B);
if (DstTy == S32)
@@ -2813,22 +3046,14 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
-
uint16_t Flags = MI.getFlags();
-
LLT ResTy = MRI.getType(Res);
- LLT S32 = LLT::scalar(32);
- LLT S64 = LLT::scalar(64);
const MachineFunction &MF = B.getMF();
- bool Unsafe =
- MF.getTarget().Options.UnsafeFPMath || MI.getFlag(MachineInstr::FmArcp);
-
- if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64)
- return false;
+ bool AllowInaccurateRcp = MF.getTarget().Options.UnsafeFPMath ||
+ MI.getFlag(MachineInstr::FmAfn);
- if (!Unsafe && ResTy == S32 &&
- MF.getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals())
+ if (!AllowInaccurateRcp)
return false;
if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
@@ -2855,22 +3080,58 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
}
// x / y -> x * (1.0 / y)
- if (Unsafe) {
- auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
- .addUse(RHS)
- .setMIFlags(Flags);
- B.buildFMul(Res, LHS, RCP, Flags);
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
+ .addUse(RHS)
+ .setMIFlags(Flags);
+ B.buildFMul(Res, LHS, RCP, Flags);
- MI.eraseFromParent();
- return true;
- }
+ MI.eraseFromParent();
+ return true;
+}
- return false;
+bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV64(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ Register Res = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ uint16_t Flags = MI.getFlags();
+ LLT ResTy = MRI.getType(Res);
+
+ const MachineFunction &MF = B.getMF();
+ bool AllowInaccurateRcp = MF.getTarget().Options.UnsafeFPMath ||
+ MI.getFlag(MachineInstr::FmAfn);
+
+ if (!AllowInaccurateRcp)
+ return false;
+
+ auto NegY = B.buildFNeg(ResTy, Y);
+ auto One = B.buildFConstant(ResTy, 1.0);
+
+ auto R = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
+ .addUse(Y)
+ .setMIFlags(Flags);
+
+ auto Tmp0 = B.buildFMA(ResTy, NegY, R, One);
+ R = B.buildFMA(ResTy, Tmp0, R, R);
+
+ auto Tmp1 = B.buildFMA(ResTy, NegY, R, One);
+ R = B.buildFMA(ResTy, Tmp1, R, R);
+
+ auto Ret = B.buildFMul(ResTy, X, R);
+ auto Tmp2 = B.buildFMA(ResTy, NegY, Ret, X);
+
+ B.buildFMA(Res, Tmp2, R, Ret);
+ MI.eraseFromParent();
+ return true;
}
bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
+ if (legalizeFastUnsafeFDIV(MI, MRI, B))
+ return true;
+
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
@@ -2933,6 +3194,9 @@ static void toggleSPDenormMode(bool Enable,
bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
+ if (legalizeFastUnsafeFDIV(MI, MRI, B))
+ return true;
+
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
@@ -2999,6 +3263,9 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
+ if (legalizeFastUnsafeFDIV64(MI, MRI, B))
+ return true;
+
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
@@ -3109,35 +3376,118 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
return true;
}
-bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
+// Expand llvm.amdgcn.rsq.clamp on targets that don't support the instruction.
+// FIXME: Why do we handle this one but not other removed instructions?
+//
+// Reciprocal square root. The clamp prevents infinite results, clamping
+// infinities to max_float. D.f = 1.0 / sqrt(S0.f), result clamped to
+// +-max_float.
+bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return true;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(2).getReg();
+ auto Flags = MI.getFlags();
+
+ LLT Ty = MRI.getType(Dst);
+
+ const fltSemantics *FltSemantics;
+ if (Ty == LLT::scalar(32))
+ FltSemantics = &APFloat::IEEEsingle();
+ else if (Ty == LLT::scalar(64))
+ FltSemantics = &APFloat::IEEEdouble();
+ else
+ return false;
+
+ auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty}, false)
+ .addUse(Src)
+ .setMIFlags(Flags);
+
+ // We don't need to concern ourselves with the snan handling difference, since
+ // the rsq quieted (or not) so use the one which will directly select.
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
- if (!MFI->isEntryFunction()) {
- return legalizePreloadedArgIntrin(MI, MRI, B,
- AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
+ const bool UseIEEE = MFI->getMode().IEEE;
+
+ auto MaxFlt = B.buildFConstant(Ty, APFloat::getLargest(*FltSemantics));
+ auto ClampMax = UseIEEE ? B.buildFMinNumIEEE(Ty, Rsq, MaxFlt, Flags) :
+ B.buildFMinNum(Ty, Rsq, MaxFlt, Flags);
+
+ auto MinFlt = B.buildFConstant(Ty, APFloat::getLargest(*FltSemantics, true));
+
+ if (UseIEEE)
+ B.buildFMaxNumIEEE(Dst, ClampMax, MinFlt, Flags);
+ else
+ B.buildFMaxNum(Dst, ClampMax, MinFlt, Flags);
+ MI.eraseFromParent();
+ return true;
+}
+
+static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) {
+ switch (IID) {
+ case Intrinsic::amdgcn_ds_fadd:
+ return AMDGPU::G_ATOMICRMW_FADD;
+ case Intrinsic::amdgcn_ds_fmin:
+ return AMDGPU::G_AMDGPU_ATOMIC_FMIN;
+ case Intrinsic::amdgcn_ds_fmax:
+ return AMDGPU::G_AMDGPU_ATOMIC_FMAX;
+ default:
+ llvm_unreachable("not a DS FP intrinsic");
}
+}
+bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI,
+ Intrinsic::ID IID) const {
+ GISelChangeObserver &Observer = Helper.Observer;
+ Observer.changingInstr(MI);
+
+ MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID)));
+
+ // The remaining operands were used to set fields in the MemOperand on
+ // construction.
+ for (int I = 6; I > 3; --I)
+ MI.RemoveOperand(I);
+
+ MI.RemoveOperand(1); // Remove the intrinsic ID.
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
uint64_t Offset =
ST.getTargetLowering()->getImplicitParameterOffset(
B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
- Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
- const ArgDescriptor *Arg;
- const TargetRegisterClass *RC;
- LLT ArgTy;
- std::tie(Arg, RC, ArgTy) =
- MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
- if (!Arg)
- return false;
-
Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
- if (!loadInputValue(KernargPtrReg, B, Arg))
+ if (!loadInputValue(KernargPtrReg, B,
+ AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR))
return false;
+ // FIXME: This should be nuw
B.buildPtrAdd(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ if (!MFI->isEntryFunction()) {
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!getImplicitArgPtr(DstReg, MRI, B))
+ return false;
+
MI.eraseFromParent();
return true;
}
@@ -3147,7 +3497,9 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
MachineIRBuilder &B,
unsigned AddrSpace) const {
Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B);
- auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32);
+ auto Unmerge = B.buildUnmerge(LLT::scalar(32), MI.getOperand(2).getReg());
+ Register Hi32 = Unmerge.getReg(1);
+
B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg);
MI.eraseFromParent();
return true;
@@ -3165,11 +3517,10 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
const unsigned MaxImm = 4095;
Register BaseReg;
unsigned TotalConstOffset;
- MachineInstr *OffsetDef;
const LLT S32 = LLT::scalar(32);
- std::tie(BaseReg, TotalConstOffset, OffsetDef)
- = AMDGPU::getBaseWithConstantOffset(*B.getMRI(), OrigOffset);
+ std::tie(BaseReg, TotalConstOffset) =
+ AMDGPU::getBaseWithConstantOffset(*B.getMRI(), OrigOffset);
unsigned ImmOffset = TotalConstOffset;
@@ -3205,24 +3556,58 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
/// Handle register layout difference for f16 images for some subtargets.
Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
MachineRegisterInfo &MRI,
- Register Reg) const {
- if (!ST.hasUnpackedD16VMem())
- return Reg;
-
+ Register Reg,
+ bool ImageStore) const {
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
LLT StoreVT = MRI.getType(Reg);
assert(StoreVT.isVector() && StoreVT.getElementType() == S16);
- auto Unmerge = B.buildUnmerge(S16, Reg);
+ if (ST.hasUnpackedD16VMem()) {
+ auto Unmerge = B.buildUnmerge(S16, Reg);
- SmallVector<Register, 4> WideRegs;
- for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
- WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
+ SmallVector<Register, 4> WideRegs;
+ for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+ WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
- int NumElts = StoreVT.getNumElements();
+ int NumElts = StoreVT.getNumElements();
- return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
+ return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
+ }
+
+ if (ImageStore && ST.hasImageStoreD16Bug()) {
+ if (StoreVT.getNumElements() == 2) {
+ SmallVector<Register, 4> PackedRegs;
+ Reg = B.buildBitcast(S32, Reg).getReg(0);
+ PackedRegs.push_back(Reg);
+ PackedRegs.resize(2, B.buildUndef(S32).getReg(0));
+ return B.buildBuildVector(LLT::vector(2, S32), PackedRegs).getReg(0);
+ }
+
+ if (StoreVT.getNumElements() == 3) {
+ SmallVector<Register, 4> PackedRegs;
+ auto Unmerge = B.buildUnmerge(S16, Reg);
+ for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+ PackedRegs.push_back(Unmerge.getReg(I));
+ PackedRegs.resize(6, B.buildUndef(S16).getReg(0));
+ Reg = B.buildBuildVector(LLT::vector(6, S16), PackedRegs).getReg(0);
+ return B.buildBitcast(LLT::vector(3, S32), Reg).getReg(0);
+ }
+
+ if (StoreVT.getNumElements() == 4) {
+ SmallVector<Register, 4> PackedRegs;
+ Reg = B.buildBitcast(LLT::vector(2, S32), Reg).getReg(0);
+ auto Unmerge = B.buildUnmerge(S32, Reg);
+ for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+ PackedRegs.push_back(Unmerge.getReg(I));
+ PackedRegs.resize(4, B.buildUndef(S32).getReg(0));
+ return B.buildBuildVector(LLT::vector(4, S32), PackedRegs).getReg(0);
+ }
+
+ llvm_unreachable("invalid data type");
+ }
+
+ return Reg;
}
Register AMDGPULegalizerInfo::fixStoreSourceType(
@@ -3513,6 +3898,9 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP;
+ case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
+ case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD;
default:
llvm_unreachable("unhandled atomic opcode");
}
@@ -3523,12 +3911,20 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
Intrinsic::ID IID) const {
const bool IsCmpSwap = IID == Intrinsic::amdgcn_raw_buffer_atomic_cmpswap ||
IID == Intrinsic::amdgcn_struct_buffer_atomic_cmpswap;
+ const bool HasReturn = MI.getNumExplicitDefs() != 0;
- Register Dst = MI.getOperand(0).getReg();
- Register VData = MI.getOperand(2).getReg();
+ Register Dst;
- Register CmpVal;
int OpOffset = 0;
+ if (HasReturn) {
+ // A few FP atomics do not support return values.
+ Dst = MI.getOperand(0).getReg();
+ } else {
+ OpOffset = -1;
+ }
+
+ Register VData = MI.getOperand(2 + OpOffset).getReg();
+ Register CmpVal;
if (IsCmpSwap) {
CmpVal = MI.getOperand(3 + OpOffset).getReg();
@@ -3536,7 +3932,7 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
}
Register RSrc = MI.getOperand(3 + OpOffset).getReg();
- const unsigned NumVIndexOps = IsCmpSwap ? 9 : 8;
+ const unsigned NumVIndexOps = (IsCmpSwap ? 8 : 7) + HasReturn;
// The struct intrinsic variants add one additional operand over raw.
const bool HasVIndex = MI.getNumOperands() == NumVIndexOps;
@@ -3561,9 +3957,12 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
if (!VIndex)
VIndex = B.buildConstant(LLT::scalar(32), 0).getReg(0);
- auto MIB = B.buildInstr(getBufferAtomicPseudo(IID))
- .addDef(Dst)
- .addUse(VData); // vdata
+ auto MIB = B.buildInstr(getBufferAtomicPseudo(IID));
+
+ if (HasReturn)
+ MIB.addDef(Dst);
+
+ MIB.addUse(VData); // vdata
if (IsCmpSwap)
MIB.addReg(CmpVal);
@@ -3583,38 +3982,41 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
/// Turn a set of s16 typed registers in \p A16AddrRegs into a dword sized
/// vector with s16 typed elements.
-static void packImageA16AddressToDwords(MachineIRBuilder &B, MachineInstr &MI,
- SmallVectorImpl<Register> &PackedAddrs,
- int AddrIdx, int DimIdx, int EndIdx,
- int NumGradients) {
+static void packImageA16AddressToDwords(
+ MachineIRBuilder &B, MachineInstr &MI,
+ SmallVectorImpl<Register> &PackedAddrs, unsigned ArgOffset,
+ const AMDGPU::ImageDimIntrinsicInfo *Intr, unsigned EndIdx) {
const LLT S16 = LLT::scalar(16);
const LLT V2S16 = LLT::vector(2, 16);
- for (int I = AddrIdx; I < EndIdx; ++I) {
- MachineOperand &SrcOp = MI.getOperand(I);
+ for (unsigned I = Intr->VAddrStart; I < EndIdx; I++) {
+ MachineOperand &SrcOp = MI.getOperand(ArgOffset + I);
if (!SrcOp.isReg())
continue; // _L to _LZ may have eliminated this.
Register AddrReg = SrcOp.getReg();
- if (I < DimIdx) {
+ if (I < Intr->GradientStart) {
AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
PackedAddrs.push_back(AddrReg);
} else {
// Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D,
// derivatives dx/dh and dx/dv are packed with undef.
if (((I + 1) >= EndIdx) ||
- ((NumGradients / 2) % 2 == 1 &&
- (I == DimIdx + (NumGradients / 2) - 1 ||
- I == DimIdx + NumGradients - 1)) ||
+ ((Intr->NumGradients / 2) % 2 == 1 &&
+ (I == static_cast<unsigned>(Intr->GradientStart +
+ (Intr->NumGradients / 2) - 1) ||
+ I == static_cast<unsigned>(Intr->GradientStart +
+ Intr->NumGradients - 1))) ||
// Check for _L to _LZ optimization
- !MI.getOperand(I + 1).isReg()) {
+ !MI.getOperand(ArgOffset + I + 1).isReg()) {
PackedAddrs.push_back(
B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)})
.getReg(0));
} else {
PackedAddrs.push_back(
- B.buildBuildVector(V2S16, {AddrReg, MI.getOperand(I + 1).getReg()})
+ B.buildBuildVector(
+ V2S16, {AddrReg, MI.getOperand(ArgOffset + I + 1).getReg()})
.getReg(0));
++I;
}
@@ -3673,43 +4075,37 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
/// the intrinsic's arguments. In cases like a16 addreses, this requires padding
/// now unnecessary arguments with $noreg.
bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
- MachineInstr &MI, MachineIRBuilder &B,
- GISelChangeObserver &Observer,
- const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const {
+ MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer,
+ const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
- const int NumDefs = MI.getNumExplicitDefs();
+ const unsigned NumDefs = MI.getNumExplicitDefs();
+ const unsigned ArgOffset = NumDefs + 1;
bool IsTFE = NumDefs == 2;
// We are only processing the operands of d16 image operations on subtargets
// that use the unpacked register layout, or need to repack the TFE result.
// TODO: Do we need to guard against already legalized intrinsics?
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
- AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
+ AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
MachineRegisterInfo *MRI = B.getMRI();
const LLT S32 = LLT::scalar(32);
const LLT S16 = LLT::scalar(16);
const LLT V2S16 = LLT::vector(2, 16);
- // Index of first address argument
- const int AddrIdx = getImageVAddrIdxBegin(BaseOpcode, NumDefs);
-
- int NumVAddrs, NumGradients;
- std::tie(NumVAddrs, NumGradients) = getImageNumVAddr(ImageDimIntr, BaseOpcode);
- const int DMaskIdx = BaseOpcode->Atomic ? -1 :
- getDMaskIdx(BaseOpcode, NumDefs);
unsigned DMask = 0;
// Check for 16 bit addresses and pack if true.
- int DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
- LLT GradTy = MRI->getType(MI.getOperand(DimIdx).getReg());
- LLT AddrTy = MRI->getType(MI.getOperand(DimIdx + NumGradients).getReg());
+ LLT GradTy =
+ MRI->getType(MI.getOperand(ArgOffset + Intr->GradientStart).getReg());
+ LLT AddrTy =
+ MRI->getType(MI.getOperand(ArgOffset + Intr->CoordStart).getReg());
const bool IsG16 = GradTy == S16;
const bool IsA16 = AddrTy == S16;
int DMaskLanes = 0;
if (!BaseOpcode->Atomic) {
- DMask = MI.getOperand(DMaskIdx).getImm();
+ DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm();
if (BaseOpcode->Gather4) {
DMaskLanes = 4;
} else if (DMask != 0) {
@@ -3736,7 +4132,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
if (IsTFE && DMask == 0) {
DMask = 0x1;
DMaskLanes = 1;
- MI.getOperand(DMaskIdx).setImm(DMask);
+ MI.getOperand(ArgOffset + Intr->DMaskIndex).setImm(DMask);
}
if (BaseOpcode->Atomic) {
@@ -3757,41 +4153,41 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
}
}
- int CorrectedNumVAddrs = NumVAddrs;
+ unsigned CorrectedNumVAddrs = Intr->NumVAddrs;
// Optimize _L to _LZ when _L is zero
if (const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
- AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
+ AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode)) {
const ConstantFP *ConstantLod;
- const int LodIdx = AddrIdx + NumVAddrs - 1;
- if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_GFCst(ConstantLod))) {
+ if (mi_match(MI.getOperand(ArgOffset + Intr->LodIndex).getReg(), *MRI,
+ m_GFCst(ConstantLod))) {
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
// Set new opcode to _lz variant of _l, and change the intrinsic ID.
- ImageDimIntr = AMDGPU::getImageDimInstrinsicByBaseOpcode(
- LZMappingInfo->LZ, ImageDimIntr->Dim);
+ const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+ AMDGPU::getImageDimInstrinsicByBaseOpcode(LZMappingInfo->LZ,
+ Intr->Dim);
// The starting indexes should remain in the same place.
- --NumVAddrs;
--CorrectedNumVAddrs;
- MI.getOperand(MI.getNumExplicitDefs()).setIntrinsicID(
- static_cast<Intrinsic::ID>(ImageDimIntr->Intr));
- MI.RemoveOperand(LodIdx);
+ MI.getOperand(MI.getNumExplicitDefs())
+ .setIntrinsicID(static_cast<Intrinsic::ID>(NewImageDimIntr->Intr));
+ MI.RemoveOperand(ArgOffset + Intr->LodIndex);
+ Intr = NewImageDimIntr;
}
}
}
// Optimize _mip away, when 'lod' is zero
- if (AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
+ if (AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode)) {
int64_t ConstantLod;
- const int LodIdx = AddrIdx + NumVAddrs - 1;
-
- if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_ICst(ConstantLod))) {
+ if (mi_match(MI.getOperand(ArgOffset + Intr->MipIndex).getReg(), *MRI,
+ m_ICst(ConstantLod))) {
if (ConstantLod == 0) {
// TODO: Change intrinsic opcode and remove operand instead or replacing
// it with 0, as the _L to _LZ handling is done above.
- MI.getOperand(LodIdx).ChangeToImmediate(0);
+ MI.getOperand(ArgOffset + Intr->MipIndex).ChangeToImmediate(0);
--CorrectedNumVAddrs;
}
}
@@ -3806,18 +4202,17 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
} else if (!ST.hasG16())
return false;
- if (NumVAddrs > 1) {
+ if (Intr->NumVAddrs > 1) {
SmallVector<Register, 4> PackedRegs;
// Don't compress addresses for G16
- const int PackEndIdx =
- IsA16 ? (AddrIdx + NumVAddrs) : (DimIdx + NumGradients);
- packImageA16AddressToDwords(B, MI, PackedRegs, AddrIdx, DimIdx,
- PackEndIdx, NumGradients);
+ const int PackEndIdx = IsA16 ? Intr->VAddrEnd : Intr->CoordStart;
+ packImageA16AddressToDwords(B, MI, PackedRegs, ArgOffset, Intr,
+ PackEndIdx);
if (!IsA16) {
// Add uncompressed address
- for (int I = DimIdx + NumGradients; I != AddrIdx + NumVAddrs; ++I) {
- int AddrReg = MI.getOperand(I).getReg();
+ for (unsigned I = Intr->CoordStart; I < Intr->VAddrEnd; I++) {
+ int AddrReg = MI.getOperand(ArgOffset + I).getReg();
assert(B.getMRI()->getType(AddrReg) == LLT::scalar(32));
PackedRegs.push_back(AddrReg);
}
@@ -3833,9 +4228,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
PackedRegs.resize(1);
}
- const int NumPacked = PackedRegs.size();
- for (int I = 0; I != NumVAddrs; ++I) {
- MachineOperand &SrcOp = MI.getOperand(AddrIdx + I);
+ const unsigned NumPacked = PackedRegs.size();
+ for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) {
+ MachineOperand &SrcOp = MI.getOperand(ArgOffset + I);
if (!SrcOp.isReg()) {
assert(SrcOp.isImm() && SrcOp.getImm() == 0);
continue;
@@ -3843,8 +4238,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
assert(SrcOp.getReg() != AMDGPU::NoRegister);
- if (I < NumPacked)
- SrcOp.setReg(PackedRegs[I]);
+ if (I - Intr->VAddrStart < NumPacked)
+ SrcOp.setReg(PackedRegs[I - Intr->VAddrStart]);
else
SrcOp.setReg(AMDGPU::NoRegister);
}
@@ -3863,8 +4258,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
// allocation when possible.
const bool UseNSA = CorrectedNumVAddrs >= 3 && ST.hasNSAEncoding();
- if (!UseNSA && NumVAddrs > 1)
- convertImageAddrToPacked(B, MI, AddrIdx, NumVAddrs);
+ if (!UseNSA && Intr->NumVAddrs > 1)
+ convertImageAddrToPacked(B, MI, ArgOffset + Intr->VAddrStart,
+ Intr->NumVAddrs);
}
int Flags = 0;
@@ -3881,7 +4277,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
if (!Ty.isVector() || Ty.getElementType() != S16)
return true;
- Register RepackedReg = handleD16VData(B, *MRI, VData);
+ Register RepackedReg = handleD16VData(B, *MRI, VData, true);
if (RepackedReg != VData) {
MI.getOperand(1).setReg(RepackedReg);
}
@@ -4053,8 +4449,10 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
}
bool AMDGPULegalizerInfo::legalizeSBufferLoad(
- MachineInstr &MI, MachineIRBuilder &B,
- GISelChangeObserver &Observer) const {
+ LegalizerHelper &Helper, MachineInstr &MI) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ GISelChangeObserver &Observer = Helper.Observer;
+
Register Dst = MI.getOperand(0).getReg();
LLT Ty = B.getMRI()->getType(Dst);
unsigned Size = Ty.getSizeInBits();
@@ -4062,6 +4460,13 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
Observer.changingInstr(MI);
+ if (shouldBitcastLoadStoreType(ST, Ty, Size)) {
+ Ty = getBitcastRegisterType(Ty);
+ Helper.bitcastDst(MI, Ty, 0);
+ Dst = MI.getOperand(0).getReg();
+ B.setInsertPt(B.getMBB(), MI);
+ }
+
// FIXME: We don't really need this intermediate instruction. The intrinsic
// should be fixed to have a memory operand. Since it's readnone, we're not
// allowed to add one.
@@ -4083,8 +4488,6 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
// always be legal. We may need to restore this to a 96-bit result if it turns
// out this needs to be converted to a vector load during RegBankSelect.
if (!isPowerOf2_32(Size)) {
- LegalizerHelper Helper(MF, *this, Observer, B);
-
if (Ty.isVector())
Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0);
else
@@ -4095,6 +4498,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
return true;
}
+// TODO: Move to selection
bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -4105,17 +4509,14 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
} else {
// Pass queue pointer to trap handler as input, and insert trap instruction
// Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
- const ArgDescriptor *Arg =
- getArgDescriptor(B, AMDGPUFunctionArgInfo::QUEUE_PTR);
- if (!Arg)
- return false;
MachineRegisterInfo &MRI = *B.getMRI();
- Register SGPR01(AMDGPU::SGPR0_SGPR1);
- Register LiveIn = getLiveInRegister(
- B, MRI, SGPR01, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64),
- /*InsertLiveInCopy=*/false);
- if (!loadInputValue(LiveIn, B, Arg))
+
+ Register LiveIn =
+ MRI.createGenericVirtualRegister(LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+ if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
return false;
+
+ Register SGPR01(AMDGPU::SGPR0_SGPR1);
B.buildCopy(SGPR01, LiveIn);
B.buildInstr(AMDGPU::S_TRAP)
.addImm(GCNSubtarget::TrapIDLLVMTrap)
@@ -4146,6 +4547,78 @@ bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
return true;
}
+bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register NodePtr = MI.getOperand(2).getReg();
+ Register RayExtent = MI.getOperand(3).getReg();
+ Register RayOrigin = MI.getOperand(4).getReg();
+ Register RayDir = MI.getOperand(5).getReg();
+ Register RayInvDir = MI.getOperand(6).getReg();
+ Register TDescr = MI.getOperand(7).getReg();
+
+ bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
+ bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
+ unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa
+ : AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa
+ : Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa
+ : AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa;
+
+ SmallVector<Register, 12> Ops;
+ if (Is64) {
+ auto Unmerge = B.buildUnmerge({S32, S32}, NodePtr);
+ Ops.push_back(Unmerge.getReg(0));
+ Ops.push_back(Unmerge.getReg(1));
+ } else {
+ Ops.push_back(NodePtr);
+ }
+ Ops.push_back(RayExtent);
+
+ auto packLanes = [&Ops, &S32, &B] (Register Src) {
+ auto Unmerge = B.buildUnmerge({S32, S32, S32, S32}, Src);
+ Ops.push_back(Unmerge.getReg(0));
+ Ops.push_back(Unmerge.getReg(1));
+ Ops.push_back(Unmerge.getReg(2));
+ };
+
+ packLanes(RayOrigin);
+ if (IsA16) {
+ auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16, S16}, RayDir);
+ auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16, S16}, RayInvDir);
+ Register R1 = MRI.createGenericVirtualRegister(S32);
+ Register R2 = MRI.createGenericVirtualRegister(S32);
+ Register R3 = MRI.createGenericVirtualRegister(S32);
+ B.buildMerge(R1, {UnmergeRayDir.getReg(0), UnmergeRayDir.getReg(1)});
+ B.buildMerge(R2, {UnmergeRayDir.getReg(2), UnmergeRayInvDir.getReg(0)});
+ B.buildMerge(R3, {UnmergeRayInvDir.getReg(1), UnmergeRayInvDir.getReg(2)});
+ Ops.push_back(R1);
+ Ops.push_back(R2);
+ Ops.push_back(R3);
+ } else {
+ packLanes(RayDir);
+ packLanes(RayInvDir);
+ }
+
+ auto MIB = B.buildInstr(AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY)
+ .addDef(DstReg)
+ .addImm(Opcode);
+
+ for (Register R : Ops) {
+ MIB.addUse(R);
+ }
+
+ MIB.addUse(TDescr)
+ .addImm(IsA16 ? 1 : 0)
+ .cloneMemRefs(MI);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &B = Helper.MIRBuilder;
@@ -4158,7 +4631,9 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_else: {
MachineInstr *Br = nullptr;
MachineBasicBlock *UncondBrTarget = nullptr;
- if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
+ bool Negated = false;
+ if (MachineInstr *BrCond =
+ verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget, Negated)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
@@ -4166,6 +4641,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
Register Use = MI.getOperand(3).getReg();
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
+
+ if (Negated)
+ std::swap(CondBrTarget, UncondBrTarget);
+
B.setInsertPt(B.getMBB(), BrCond->getIterator());
if (IntrID == Intrinsic::amdgcn_if) {
B.buildInstr(AMDGPU::SI_IF)
@@ -4174,10 +4653,9 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
.addMBB(UncondBrTarget);
} else {
B.buildInstr(AMDGPU::SI_ELSE)
- .addDef(Def)
- .addUse(Use)
- .addMBB(UncondBrTarget)
- .addImm(0);
+ .addDef(Def)
+ .addUse(Use)
+ .addMBB(UncondBrTarget);
}
if (Br) {
@@ -4201,13 +4679,18 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_loop: {
MachineInstr *Br = nullptr;
MachineBasicBlock *UncondBrTarget = nullptr;
- if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
+ bool Negated = false;
+ if (MachineInstr *BrCond =
+ verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget, Negated)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
Register Reg = MI.getOperand(2).getReg();
+ if (Negated)
+ std::swap(CondBrTarget, UncondBrTarget);
+
B.setInsertPt(B.getMBB(), BrCond->getIterator());
B.buildInstr(AMDGPU::SI_LOOP)
.addUse(Reg)
@@ -4280,7 +4763,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return true;
}
case Intrinsic::amdgcn_s_buffer_load:
- return legalizeSBufferLoad(MI, B, Helper.Observer);
+ return legalizeSBufferLoad(Helper, MI);
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store:
return legalizeBufferStore(MI, MRI, B, false, false);
@@ -4323,6 +4806,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_struct_buffer_atomic_inc:
case Intrinsic::amdgcn_raw_buffer_atomic_dec:
case Intrinsic::amdgcn_struct_buffer_atomic_dec:
+ case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
+ case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
return legalizeBufferAtomic(MI, B, IntrID);
@@ -4334,6 +4819,14 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeTrapIntrinsic(MI, MRI, B);
case Intrinsic::debugtrap:
return legalizeDebugTrapIntrinsic(MI, MRI, B);
+ case Intrinsic::amdgcn_rsq_clamp:
+ return legalizeRsqClampIntrinsic(MI, MRI, B);
+ case Intrinsic::amdgcn_ds_fadd:
+ case Intrinsic::amdgcn_ds_fmin:
+ case Intrinsic::amdgcn_ds_fmax:
+ return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID);
+ case Intrinsic::amdgcn_image_bvh_intersect_ray:
+ return legalizeBVHIntrinsic(MI, B);
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index ce32bbf76b34..87e8b2128a25 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -23,9 +23,13 @@ namespace llvm {
class GCNTargetMachine;
class LLVMContext;
class GCNSubtarget;
+class MachineIRBuilder;
+namespace AMDGPU {
+struct ImageDimIntrinsicInfo;
+}
/// This class provides the information for the target register banks.
-class AMDGPULegalizerInfo : public LegalizerInfo {
+class AMDGPULegalizerInfo final : public LegalizerInfo {
const GCNSubtarget &ST;
public:
@@ -44,6 +48,8 @@ public:
MachineIRBuilder &B) const;
bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeFrem(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -67,9 +73,7 @@ public:
bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
- bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- GISelChangeObserver &Observer) const;
+ bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
@@ -86,16 +90,11 @@ public:
bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
- Register getLiveInRegister(MachineIRBuilder &B, MachineRegisterInfo &MRI,
- Register PhyReg, LLT Ty,
- bool InsertLiveInCopy = true) const;
- Register insertLiveInCopy(MachineIRBuilder &B, MachineRegisterInfo &MRI,
- Register LiveIn, Register PhyReg) const;
- const ArgDescriptor *
- getArgDescriptor(MachineIRBuilder &B,
- AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
- const ArgDescriptor *Arg) const;
+ const ArgDescriptor *Arg,
+ const TargetRegisterClass *ArgRC, LLT ArgTy) const;
+ bool loadInputValue(Register DstReg, MachineIRBuilder &B,
+ AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
bool legalizePreloadedArgIntrin(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
@@ -130,9 +129,20 @@ public:
MachineIRBuilder &B) const;
bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeFastUnsafeFDIV64(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeRsqClampIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
+ bool legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI, Intrinsic::ID IID) const;
+
+ bool getImplicitArgPtr(Register DstReg, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -142,7 +152,7 @@ public:
splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
- Register Reg) const;
+ Register Reg, bool ImageStore = false) const;
bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool IsFormat) const;
bool legalizeRawBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -154,19 +164,19 @@ public:
MachineIRBuilder &B, bool IsTyped,
bool IsFormat) const;
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, bool IsTyped,
- bool IsFormat) const;
+ MachineIRBuilder &B, bool IsFormat,
+ bool IsTyped) const;
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
Intrinsic::ID IID) const;
+ bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
+
bool legalizeImageIntrinsic(
MachineInstr &MI, MachineIRBuilder &B,
GISelChangeObserver &Observer,
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const;
- bool legalizeSBufferLoad(
- MachineInstr &MI, MachineIRBuilder &B,
- GISelChangeObserver &Observer) const;
+ bool legalizeSBufferLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B,
bool IsInc) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 4a14259f1bdb..6b7f57252b7a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -13,27 +13,12 @@
#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
+#include "GCNSubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include <cmath>
-#include <vector>
#define DEBUG_TYPE "amdgpu-simplifylib"
@@ -495,8 +480,7 @@ bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
}
bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
- return AllNative ||
- std::find(UseNative.begin(), UseNative.end(), F) != UseNative.end();
+ return AllNative || llvm::is_contained(UseNative, F);
}
void AMDGPULibCalls::initNativeFuncs() {
@@ -1289,6 +1273,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
BasicBlock * const CBB = CI->getParent();
int const MaxScan = 30;
+ bool Changed = false;
{ // fold in load value.
LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
@@ -1296,6 +1281,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
BasicBlock::iterator BBI = LI->getIterator();
Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
if (AvailableVal) {
+ Changed = true;
CArgVal->replaceAllUsesWith(AvailableVal);
if (CArgVal->getNumUses() == 0)
LI->eraseFromParent();
@@ -1331,7 +1317,8 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
if (UI) break;
}
- if (!UI) return false;
+ if (!UI)
+ return Changed;
// Merge the sin and cos.
@@ -1340,7 +1327,8 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
FunctionCallee Fsincos = getFunction(M, nf);
- if (!Fsincos) return false;
+ if (!Fsincos)
+ return Changed;
BasicBlock::iterator ItOld = B.GetInsertPoint();
AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
@@ -1747,6 +1735,40 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
return Changed;
}
+PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ AMDGPULibCalls Simplifier(&TM);
+ Simplifier.initNativeFuncs();
+
+ bool Changed = false;
+ auto AA = &AM.getResult<AAManager>(F);
+
+ LLVM_DEBUG(dbgs() << "AMDIC: process function ";
+ F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
+
+ for (auto &BB : F) {
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
+ // Ignore non-calls.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ ++I;
+ // Ignore intrinsics that do not become real instructions.
+ if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
+ continue;
+
+ // Ignore indirect calls.
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
+ dbgs().flush());
+ if (Simplifier.fold(CI, AA))
+ Changed = true;
+ }
+ }
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
if (skipFunction(F) || UseNative.empty())
return false;
@@ -1769,3 +1791,32 @@ bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
}
return Changed;
}
+
+PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (UseNative.empty())
+ return PreservedAnalyses::all();
+
+ AMDGPULibCalls Simplifier;
+ Simplifier.initNativeFuncs();
+
+ bool Changed = false;
+ for (auto &BB : F) {
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
+ // Ignore non-calls.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ ++I;
+ if (!CI)
+ continue;
+
+ // Ignore indirect calls.
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0)
+ continue;
+
+ if (Simplifier.useNative(CI))
+ Changed = true;
+ }
+ }
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index 2b5143ba7506..646087cdb7db 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -12,17 +12,14 @@
#include "AMDGPULibFunc.h"
#include "AMDGPU.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/raw_ostream.h"
-#include <string>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index 54c15e4e4d39..714e74faaf13 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -8,12 +8,16 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
#define DEBUG_TYPE "amdgpu-lower-intrinsics"
@@ -131,7 +135,9 @@ bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
if (!CI)
continue;
- Changed |= AMDGPUSubtarget::get(TM, F).makeLIDRangeMetadata(CI);
+ Function *Caller = CI->getParent()->getParent();
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *Caller);
+ Changed |= ST.makeLIDRangeMetadata(CI);
}
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 62ab5bb55a16..8fb4f93fd4b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -12,30 +12,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDGPUTargetMachine.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/CodeGen/Passes.h"
+#include "GCNSubtarget.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"
using namespace llvm;
@@ -108,10 +89,14 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
uint64_t ExplicitArgOffset = 0;
for (Argument &Arg : F.args()) {
- Type *ArgTy = Arg.getType();
- Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
- unsigned Size = DL.getTypeSizeInBits(ArgTy);
- unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
+ const bool IsByRef = Arg.hasByRefAttr();
+ Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
+ MaybeAlign ABITypeAlign = IsByRef ? Arg.getParamAlign() : None;
+ if (!ABITypeAlign)
+ ABITypeAlign = DL.getABITypeAlign(ArgTy);
+
+ uint64_t Size = DL.getTypeSizeInBits(ArgTy);
+ uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
@@ -119,6 +104,19 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
if (Arg.use_empty())
continue;
+ // If this is byval, the loads are already explicit in the function. We just
+ // need to rewrite the pointer values.
+ if (IsByRef) {
+ Value *ArgOffsetPtr = Builder.CreateConstInBoundsGEP1_64(
+ Builder.getInt8Ty(), KernArgSegment, EltOffset,
+ Arg.getName() + ".byval.kernarg.offset");
+
+ Value *CastOffsetPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ ArgOffsetPtr, Arg.getType());
+ Arg.replaceAllUsesWith(CastOffsetPtr);
+ continue;
+ }
+
if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) {
// FIXME: Hack. We rely on AssertZext to be able to fold DS addressing
// modes on SI to know the high bits are 0 so pointer adds don't wrap. We
@@ -224,8 +222,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
Arg.getName() + ".load");
Arg.replaceAllUsesWith(NewVal);
} else if (IsV3) {
- Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty),
- ArrayRef<int>{0, 1, 2},
+ Value *Shuf = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 2},
Arg.getName() + ".load");
Arg.replaceAllUsesWith(Shuf);
} else {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
index 00e12f808783..9ab6a5246ce5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
@@ -13,13 +13,14 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUTargetMachine.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
@@ -41,16 +42,11 @@ enum DispatchPackedOffsets {
};
class AMDGPULowerKernelAttributes : public ModulePass {
- Module *Mod = nullptr;
-
public:
static char ID;
AMDGPULowerKernelAttributes() : ModulePass(ID) {}
- bool processUse(CallInst *CI);
-
- bool doInitialization(Module &M) override;
bool runOnModule(Module &M) override;
StringRef getPassName() const override {
@@ -64,12 +60,7 @@ public:
} // end anonymous namespace
-bool AMDGPULowerKernelAttributes::doInitialization(Module &M) {
- Mod = &M;
- return false;
-}
-
-bool AMDGPULowerKernelAttributes::processUse(CallInst *CI) {
+static bool processUse(CallInst *CI) {
Function *F = CI->getParent()->getParent();
auto MD = F->getMetadata("reqd_work_group_size");
@@ -89,7 +80,7 @@ bool AMDGPULowerKernelAttributes::processUse(CallInst *CI) {
Value *GridSizeY = nullptr;
Value *GridSizeZ = nullptr;
- const DataLayout &DL = Mod->getDataLayout();
+ const DataLayout &DL = F->getParent()->getDataLayout();
// We expect to see several GEP users, casted to the appropriate type and
// loaded.
@@ -239,7 +230,7 @@ bool AMDGPULowerKernelAttributes::runOnModule(Module &M) {
StringRef DispatchPtrName
= Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr);
- Function *DispatchPtr = Mod->getFunction(DispatchPtrName);
+ Function *DispatchPtr = M.getFunction(DispatchPtrName);
if (!DispatchPtr) // Dispatch ptr not used.
return false;
@@ -267,3 +258,22 @@ char AMDGPULowerKernelAttributes::ID = 0;
ModulePass *llvm::createAMDGPULowerKernelAttributesPass() {
return new AMDGPULowerKernelAttributes();
}
+
+PreservedAnalyses
+AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) {
+ StringRef DispatchPtrName =
+ Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr);
+
+ Function *DispatchPtr = F.getParent()->getFunction(DispatchPtrName);
+ if (!DispatchPtr) // Dispatch ptr not used.
+ return PreservedAnalyses::all();
+
+ for (Instruction &I : instructions(F)) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->getCalledFunction() == DispatchPtr)
+ processUse(CI);
+ }
+ }
+
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 99d229c9b74e..a8cba3f5cc5c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -13,12 +13,10 @@
//
#include "AMDGPUAsmPrinter.h"
-#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600AsmPrinter.h"
-#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
@@ -323,7 +321,10 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
// The isPseudo check really shouldn't be here, but unfortunately there are
// some negative lit tests that depend on being able to continue through
// here even when pseudo instructions haven't been lowered.
- if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU())) {
+ //
+ // We also overestimate branch sizes with the offset bug.
+ if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU()) &&
+ (!STI.hasOffset3fBug() || !MI->isBranch())) {
SmallVector<MCFixup, 4> Fixups;
SmallVector<char, 16> CodeBytes;
raw_svector_ostream CodeStream(CodeBytes);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
new file mode 100644
index 000000000000..c3441f81a78e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
@@ -0,0 +1,38 @@
+//===- AMDGPUMIRFormatter.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implementation of AMDGPU overrides of MIRFormatter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMIRFormatter.h"
+#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+bool AMDGPUMIRFormatter::parseCustomPseudoSourceValue(
+ StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const SIInstrInfo &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ if (Src == "BufferResource") {
+ PSV = MFI->getBufferPSV(TII);
+ return false;
+ }
+ if (Src == "ImageResource") {
+ PSV = MFI->getImagePSV(TII);
+ return false;
+ }
+ if (Src == "GWSResource") {
+ PSV = MFI->getGWSPSV(TII);
+ return false;
+ }
+ llvm_unreachable("unknown MIR custom pseudo source value");
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
new file mode 100644
index 000000000000..a61f1f7b8182
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
@@ -0,0 +1,47 @@
+//===-- llvm/Target/AMDGPU/AMDGPUMIRFormatter.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// AMDGPU specific overrides of MIRFormatter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H
+#define LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/MIRFormatter.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+
+namespace llvm {
+
+class MachineFunction;
+class MachineInstr;
+struct PerFunctionMIParsingState;
+struct SlotMapping;
+
+class AMDGPUMIRFormatter final : public MIRFormatter {
+public:
+ AMDGPUMIRFormatter() {}
+ virtual ~AMDGPUMIRFormatter() = default;
+
+ /// Implement target specific parsing of target custom pseudo source value.
+ virtual bool
+ parseCustomPseudoSourceValue(StringRef Src, MachineFunction &MF,
+ PerFunctionMIParsingState &PFS,
+ const PseudoSourceValue *&PSV,
+ ErrorCallbackType ErrorCallback) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index f61af5a27943..b6a69b2819ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -11,36 +11,17 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
+#include "GCNSubtarget.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegionInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <tuple>
-#include <utility>
using namespace llvm;
@@ -342,11 +323,11 @@ protected:
LinearizedRegion *Parent;
RegionMRT *RMRT;
- void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
+ void storeLiveOutReg(MachineBasicBlock *MBB, Register Reg,
MachineInstr *DefInstr, const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
- void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
+ void storeLiveOutRegRegion(RegionMRT *Region, Register Reg,
MachineInstr *DefInstr,
const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI,
@@ -397,7 +378,7 @@ public:
void replaceLiveOut(unsigned OldReg, unsigned NewReg);
- void replaceRegister(unsigned Register, unsigned NewRegister,
+ void replaceRegister(unsigned Register, class Register NewRegister,
MachineRegisterInfo *MRI, bool ReplaceInside,
bool ReplaceOutside, bool IncludeLoopPHIs);
@@ -690,12 +671,12 @@ RegionMRT *MRT::buildMRT(MachineFunction &MF,
return Result;
}
-void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
+void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, Register Reg,
MachineInstr *DefInstr,
const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI,
PHILinearize &PHIInfo) {
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
<< "\n");
// If this is a source register to a PHI we are chaining, it
@@ -730,12 +711,12 @@ void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
}
}
-void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
+void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, Register Reg,
MachineInstr *DefInstr,
const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI,
PHILinearize &PHIInfo) {
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
<< "\n");
for (auto &UI : MRI->use_operands(Reg)) {
@@ -907,7 +888,8 @@ void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) {
}
}
-void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
+void LinearizedRegion::replaceRegister(unsigned Register,
+ class Register NewRegister,
MachineRegisterInfo *MRI,
bool ReplaceInside, bool ReplaceOutside,
bool IncludeLoopPHI) {
@@ -950,7 +932,7 @@ void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
(IncludeLoopPHI && IsLoopPHI);
if (ShouldReplace) {
- if (Register::isPhysicalRegister(NewRegister)) {
+ if (NewRegister.isPhysical()) {
LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
<< printReg(NewRegister, MRI->getTargetRegisterInfo())
<< "\n");
@@ -1002,11 +984,11 @@ void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) {
}
bool LinearizedRegion::contains(MachineBasicBlock *MBB) {
- return MBBs.count(MBB) == 1;
+ return MBBs.contains(MBB);
}
bool LinearizedRegion::isLiveOut(unsigned Reg) {
- return LiveOuts.count(Reg) == 1;
+ return LiveOuts.contains(Reg);
}
bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) {
@@ -1025,7 +1007,7 @@ void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
for (auto &RI : II.uses()) {
if (RI.isReg()) {
Register Reg = RI.getReg();
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
if (hasNoDef(Reg, MRI))
continue;
if (!MRI->hasOneDef(Reg)) {
@@ -1168,7 +1150,7 @@ private:
void createEntryPHIs(LinearizedRegion *CurrentRegion);
void resolvePHIInfos(MachineBasicBlock *FunctionEntry);
- void replaceRegisterWith(unsigned Register, unsigned NewRegister);
+ void replaceRegisterWith(unsigned Register, class Register NewRegister);
MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB,
MachineBasicBlock *CodeBB,
@@ -1872,7 +1854,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
? SinglePred->findDebugLoc(SinglePred->getFirstTerminator())
: DebugLoc();
- unsigned Reg =
+ Register Reg =
TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg,
SelectBB->getNumber() /* CodeBBStart->getNumber() */);
if (&(*(IfBB->getParent()->begin())) == IfBB) {
@@ -2224,8 +2206,8 @@ void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegi
PHIInfo.clear();
}
-void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
- unsigned NewRegister) {
+void AMDGPUMachineCFGStructurizer::replaceRegisterWith(
+ unsigned Register, class Register NewRegister) {
assert(Register != NewRegister && "Cannot replace a reg with itself");
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
@@ -2233,7 +2215,7 @@ void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
I != E;) {
MachineOperand &O = *I;
++I;
- if (Register::isPhysicalRegister(NewRegister)) {
+ if (NewRegister.isPhysical()) {
LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
<< printReg(NewRegister, MRI->getTargetRegisterInfo())
<< "\n");
@@ -2334,7 +2316,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
TII->removeBranch(*RegionExit);
// We need to create a backedge if there is a loop
- unsigned Reg = TII->insertNE(
+ Register Reg = TII->insertNE(
RegionExit, RegionExit->instr_end(), DL,
CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
CurrentRegion->getRegionMRT()->getEntry()->getNumber());
@@ -2393,7 +2375,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
TII->removeBranch(*RegionExit);
// We need to create a backedge if there is a loop
- unsigned Reg =
+ Register Reg =
TII->insertNE(RegionExit, RegionExit->instr_end(), DL,
CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
CurrentRegion->getRegionMRT()->getEntry()->getNumber());
@@ -2592,7 +2574,7 @@ static void removeOldExitPreds(RegionMRT *Region) {
static bool mbbHasBackEdge(MachineBasicBlock *MBB,
SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
- if (MBBs.count(*SI) != 0) {
+ if (MBBs.contains(*SI)) {
return true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 64acd6efe028..717145b7af53 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -7,17 +7,20 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMachineFunction.h"
-#include "AMDGPUSubtarget.h"
#include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
- MachineFunctionInfo(),
- Mode(MF.getFunction()),
- IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
- NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
+ : MachineFunctionInfo(), Mode(MF.getFunction()),
+ IsEntryFunction(
+ AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
+ IsModuleEntryFunction(
+ AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
+ NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
@@ -49,10 +52,27 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
/// TODO: We should sort these to minimize wasted space due to alignment
/// padding. Currently the padding is decided by the first encountered use
/// during lowering.
- unsigned Offset = LDSSize = alignTo(LDSSize, Alignment);
+ unsigned Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
Entry.first->second = Offset;
- LDSSize += DL.getTypeAllocSize(GV.getValueType());
+ StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
+
+ // Update the LDS size considering the padding to align the dynamic shared
+ // memory.
+ LDSSize = alignTo(StaticLDSSize, DynLDSAlign);
return Offset;
}
+
+void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
+ const GlobalVariable &GV) {
+ assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
+
+ Align Alignment =
+ DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
+ if (Alignment <= DynLDSAlign)
+ return;
+
+ LDSSize = alignTo(StaticLDSSize, Alignment);
+ DynLDSAlign = Alignment;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index c504dd76bc65..07cac776082d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -9,9 +9,9 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "Utils/AMDGPUBaseInfo.h"
namespace llvm {
@@ -29,13 +29,27 @@ protected:
/// Number of bytes in the LDS that are being used.
unsigned LDSSize = 0;
+ /// Number of bytes in the LDS allocated statically. This field is only used
+ /// in the instruction selector and not part of the machine function info.
+ unsigned StaticLDSSize = 0;
+
+ /// Align for dynamic shared memory if any. Dynamic shared memory is
+ /// allocated directly after the static one, i.e., LDSSize. Need to pad
+ /// LDSSize to ensure that dynamic one is aligned accordingly.
+ /// The maximal alignment is updated during IR translation or lowering
+ /// stages.
+ Align DynLDSAlign;
+
// State of MODE register, assumed FP mode.
AMDGPU::SIModeRegisterDefaults Mode;
- // Kernels + shaders. i.e. functions called by the driver and not called
+ // Kernels + shaders. i.e. functions called by the hardware and not called
// by other functions.
bool IsEntryFunction = false;
+ // Entry points called by other functions instead of directly by the hardware.
+ bool IsModuleEntryFunction = false;
+
bool NoSignedZerosFPMath = false;
// Function may be memory bound.
@@ -65,6 +79,8 @@ public:
return IsEntryFunction;
}
+ bool isModuleEntryFunction() const { return IsModuleEntryFunction; }
+
bool hasNoSignedZerosFPMath() const {
return NoSignedZerosFPMath;
}
@@ -78,6 +94,10 @@ public:
}
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV);
+
+ Align getDynLDSAlign() const { return DynLDSAlign; }
+
+ void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV);
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
index 4d9f08b3af01..6646cce8186b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMachineModuleInfo.h"
-#include "llvm/IR/Module.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
index 2b0b8b42acfe..1b513c456307 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
@@ -15,11 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEMODULEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEMODULEINFO_H
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/IR/LLVMContext.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index b05855d1afc6..c15c94ee17f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -12,10 +12,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMacroFusion.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-
+#include "SIInstrInfo.h"
#include "llvm/CodeGen/MacroFusion.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h
index da4b3cf8bc24..82c6d75bb060 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include <memory>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 4f9ffa11bc73..d27eb68ca74b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -34,16 +34,11 @@
#include "AMDGPU.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/User.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "amdgpu-lower-enqueued-block"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
index 8b69f51c1a0d..756bc948b1dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H
+namespace llvm {
namespace AMDGPU {
namespace ElfNote {
@@ -41,7 +42,7 @@ enum NoteType{
NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
};
-}
-}
-
+} // End namespace ElfNote
+} // End namespace AMDGPU
+} // End namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNOTETYPE_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 93079738ef99..2f6220e425cc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -22,11 +22,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/ValueMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index 9599e09fbd96..99dbf5080741 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -17,7 +17,6 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/IR/ValueMap.h"
-#include "llvm/Pass.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 098b0e993886..09e2c762abdb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -11,35 +11,65 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/Debug.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
using namespace llvm;
using namespace MIPatternMatch;
-struct FMinFMaxLegacyInfo {
- Register LHS;
- Register RHS;
- Register True;
- Register False;
- CmpInst::Predicate Pred;
+class AMDGPUPostLegalizerCombinerHelper {
+protected:
+ MachineIRBuilder &B;
+ MachineFunction &MF;
+ MachineRegisterInfo &MRI;
+ CombinerHelper &Helper;
+
+public:
+ AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
+ : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
+
+ struct FMinFMaxLegacyInfo {
+ Register LHS;
+ Register RHS;
+ Register True;
+ Register False;
+ CmpInst::Predicate Pred;
+ };
+
+ // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
+ bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
+ void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
+ const FMinFMaxLegacyInfo &Info);
+
+ bool matchUCharToFloat(MachineInstr &MI);
+ void applyUCharToFloat(MachineInstr &MI);
+
+ // FIXME: Should be able to have 2 separate matchdatas rather than custom
+ // struct boilerplate.
+ struct CvtF32UByteMatchInfo {
+ Register CvtVal;
+ unsigned ShiftOffset;
+ };
+
+ bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
+ void applyCvtF32UByteN(MachineInstr &MI,
+ const CvtF32UByteMatchInfo &MatchInfo);
};
-// TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
-static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineFunction &MF, FMinFMaxLegacyInfo &Info) {
+bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
+ MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
// FIXME: Combines should have subtarget predicates, and we shouldn't need
// this here.
if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
@@ -77,12 +107,11 @@ static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
}
}
-static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
- const FMinFMaxLegacyInfo &Info) {
-
- auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) {
- MachineIRBuilder MIB(MI);
- MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
+void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
+ MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
+ B.setInstrAndDebugLoc(MI);
+ auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
+ B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
};
switch (Info.Pred) {
@@ -127,8 +156,7 @@ static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
MI.eraseFromParent();
}
-static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineFunction &MF, CombinerHelper &Helper) {
+bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
// TODO: We could try to match extracting the higher bytes, which would be
@@ -147,15 +175,15 @@ static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
return false;
}
-static void applyUCharToFloat(MachineInstr &MI) {
- MachineIRBuilder B(MI);
+void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
+ B.setInstrAndDebugLoc(MI);
const LLT S32 = LLT::scalar(32);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- LLT Ty = B.getMRI()->getType(DstReg);
- LLT SrcTy = B.getMRI()->getType(SrcReg);
+ LLT Ty = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy != S32)
SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
@@ -171,16 +199,8 @@ static void applyUCharToFloat(MachineInstr &MI) {
MI.eraseFromParent();
}
-// FIXME: Should be able to have 2 separate matchdatas rather than custom struct
-// boilerplate.
-struct CvtF32UByteMatchInfo {
- Register CvtVal;
- unsigned ShiftOffset;
-};
-
-static bool matchCvtF32UByteN(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineFunction &MF,
- CvtF32UByteMatchInfo &MatchInfo) {
+bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
+ MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
Register SrcReg = MI.getOperand(1).getReg();
// Look through G_ZEXT.
@@ -207,14 +227,14 @@ static bool matchCvtF32UByteN(MachineInstr &MI, MachineRegisterInfo &MRI,
return false;
}
-static void applyCvtF32UByteN(MachineInstr &MI,
- const CvtF32UByteMatchInfo &MatchInfo) {
- MachineIRBuilder B(MI);
+void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
+ MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
+ B.setInstrAndDebugLoc(MI);
unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
const LLT S32 = LLT::scalar(32);
Register CvtSrc = MatchInfo.CvtVal;
- LLT SrcTy = B.getMRI()->getType(MatchInfo.CvtVal);
+ LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
if (SrcTy != S32) {
assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
@@ -225,6 +245,18 @@ static void applyCvtF32UByteN(MachineInstr &MI,
MI.eraseFromParent();
}
+class AMDGPUPostLegalizerCombinerHelperState {
+protected:
+ CombinerHelper &Helper;
+ AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
+
+public:
+ AMDGPUPostLegalizerCombinerHelperState(
+ CombinerHelper &Helper,
+ AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
+ : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
+};
+
#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
@@ -234,7 +266,7 @@ namespace {
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
-class AMDGPUPostLegalizerCombinerInfo : public CombinerInfo {
+class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
@@ -258,10 +290,12 @@ public:
bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, KB, MDT);
- AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg);
+ CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
+ AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
+ AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
+ PostLegalizerHelper);
- if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ if (Generated.tryCombineAll(Observer, MI, B))
return true;
switch (MI.getOpcode()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 800ad2039f0e..e4b628bf6b23 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -11,17 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/Debug.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
@@ -37,7 +35,7 @@ namespace {
#include "AMDGPUGenPreLegalizeGICombiner.inc"
#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
-class AMDGPUPreLegalizerCombinerInfo : public CombinerInfo {
+class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 524a34be876f..c8bd9b96b44f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -19,33 +19,21 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
using namespace llvm;
#define DEBUG_TYPE "printfToRuntime"
#define DWORD_ALIGN 4
namespace {
-class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final
- : public ModulePass {
+class AMDGPUPrintfRuntimeBinding final : public ModulePass {
public:
static char ID;
@@ -54,25 +42,36 @@ public:
private:
bool runOnModule(Module &M) override;
- void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers,
- StringRef fmt, size_t num_ops) const;
-
- bool shouldPrintAsStr(char Specifier, Type *OpType) const;
- bool
- lowerPrintfForGpu(Module &M,
- function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
}
+};
- Value *simplify(Instruction *I, const TargetLibraryInfo *TLI) {
+class AMDGPUPrintfRuntimeBindingImpl {
+public:
+ AMDGPUPrintfRuntimeBindingImpl(
+ function_ref<const DominatorTree &(Function &)> GetDT,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI)
+ : GetDT(GetDT), GetTLI(GetTLI) {}
+ bool run(Module &M);
+
+private:
+ void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers,
+ StringRef fmt, size_t num_ops) const;
+
+ bool shouldPrintAsStr(char Specifier, Type *OpType) const;
+ bool lowerPrintfForGpu(Module &M);
+
+ Value *simplify(Instruction *I, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
return SimplifyInstruction(I, {*TD, TLI, DT});
}
const DataLayout *TD;
- const DominatorTree *DT;
+ function_ref<const DominatorTree &(Function &)> GetDT;
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
SmallVector<CallInst *, 32> Printfs;
};
} // namespace
@@ -95,12 +94,11 @@ ModulePass *createAMDGPUPrintfRuntimeBinding() {
}
} // namespace llvm
-AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding()
- : ModulePass(ID), TD(nullptr), DT(nullptr) {
+AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass(ID) {
initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry());
}
-void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers(
+void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt,
size_t NumOps) const {
// not all format characters are collected.
@@ -132,8 +130,8 @@ void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers(
}
}
-bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier,
- Type *OpType) const {
+bool AMDGPUPrintfRuntimeBindingImpl::shouldPrintAsStr(char Specifier,
+ Type *OpType) const {
if (Specifier != 's')
return false;
const PointerType *PT = dyn_cast<PointerType>(OpType);
@@ -146,8 +144,7 @@ bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier,
return ElemIType->getBitWidth() == 8;
}
-bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
- Module &M, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
LLVMContext &Ctx = M.getContext();
IRBuilder<> Builder(Ctx);
Type *I32Ty = Type::getInt32Ty(Ctx);
@@ -172,7 +169,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
}
if (auto I = dyn_cast<Instruction>(Op)) {
- Value *Op_simplified = simplify(I, &GetTLI(*I->getFunction()));
+ Value *Op_simplified =
+ simplify(I, &GetTLI(*I->getFunction()), &GetDT(*I->getFunction()));
if (Op_simplified)
Op = Op_simplified;
}
@@ -184,8 +182,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
StringRef Str("unknown");
if (GVar && GVar->hasInitializer()) {
- auto Init = GVar->getInitializer();
- if (auto CA = dyn_cast<ConstantDataArray>(Init)) {
+ auto *Init = GVar->getInitializer();
+ if (auto *CA = dyn_cast<ConstantDataArray>(Init)) {
if (CA->isString())
Str = CA->getAsCString();
} else if (isa<ConstantAggregateZero>(Init)) {
@@ -248,16 +246,15 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
}
}
if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
- if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
- GlobalVariable *GV =
- dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
+ if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
+ auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
if (GV && GV->hasInitializer()) {
Constant *Init = GV->getInitializer();
- ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init);
- if (Init->isZeroValue() || CA->isString()) {
- size_t SizeStr = Init->isZeroValue()
- ? 1
- : (strlen(CA->getAsCString().data()) + 1);
+ bool IsZeroValue = Init->isZeroValue();
+ auto *CA = dyn_cast<ConstantDataArray>(Init);
+ if (IsZeroValue || (CA && CA->isString())) {
+ size_t SizeStr =
+ IsZeroValue ? 1 : (strlen(CA->getAsCString().data()) + 1);
size_t Rem = SizeStr % DWORD_ALIGN;
size_t NSizeStr = 0;
LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr
@@ -379,9 +376,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10));
ZeroIdxList.push_back(zeroInt);
- GetElementPtrInst *BufferIdx =
- dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create(
- nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch));
+ GetElementPtrInst *BufferIdx = GetElementPtrInst::Create(
+ nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch);
Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS);
Value *id_gep_cast =
@@ -395,8 +391,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id
// the following GEP is the buffer pointer
- BufferIdx = cast<GetElementPtrInst>(GetElementPtrInst::Create(
- nullptr, pcall, FourthIdxList, "PrintBuffGep", Brnch));
+ BufferIdx = GetElementPtrInst::Create(nullptr, pcall, FourthIdxList,
+ "PrintBuffGep", Brnch);
Type *Int32Ty = Type::getInt32Ty(Ctx);
Type *Int64Ty = Type::getInt64Ty(Ctx);
@@ -409,17 +405,15 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) {
Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty;
if (OpConvSpecifiers[ArgCount - 1] == 'f') {
- ConstantFP *fpCons = dyn_cast<ConstantFP>(Arg);
- if (fpCons) {
- APFloat Val(fpCons->getValueAPF());
+ if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) {
+ APFloat Val(FpCons->getValueAPF());
bool Lost = false;
Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
&Lost);
Arg = ConstantFP::get(Ctx, Val);
IType = Int32Ty;
- } else {
- FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg);
- if (FpExt && FpExt->getType()->isDoubleTy() &&
+ } else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) {
+ if (FpExt->getType()->isDoubleTy() &&
FpExt->getOperand(0)->getType()->isFloatTy()) {
Arg = FpExt->getOperand(0);
IType = Int32Ty;
@@ -431,14 +425,14 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
} else if (ArgType->getTypeID() == Type::PointerTyID) {
if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
const char *S = NonLiteralStr;
- if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
- GlobalVariable *GV =
- dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
+ if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
+ auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
if (GV && GV->hasInitializer()) {
Constant *Init = GV->getInitializer();
- ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init);
- if (Init->isZeroValue() || CA->isString()) {
- S = Init->isZeroValue() ? "" : CA->getAsCString().data();
+ bool IsZeroValue = Init->isZeroValue();
+ auto *CA = dyn_cast<ConstantDataArray>(Init);
+ if (IsZeroValue || (CA && CA->isString())) {
+ S = IsZeroValue ? "" : CA->getAsCString().data();
}
}
}
@@ -491,27 +485,27 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
switch (EleSize) {
default:
EleCount = TotalSize / 64;
- IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext()));
+ IType = Type::getInt64Ty(ArgType->getContext());
break;
case 8:
if (EleCount >= 8) {
EleCount = TotalSize / 64;
- IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext()));
+ IType = Type::getInt64Ty(ArgType->getContext());
} else if (EleCount >= 3) {
EleCount = 1;
- IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext()));
+ IType = Type::getInt32Ty(ArgType->getContext());
} else {
EleCount = 1;
- IType = dyn_cast<Type>(Type::getInt16Ty(ArgType->getContext()));
+ IType = Type::getInt16Ty(ArgType->getContext());
}
break;
case 16:
if (EleCount >= 3) {
EleCount = TotalSize / 64;
- IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext()));
+ IType = Type::getInt64Ty(ArgType->getContext());
} else {
EleCount = 1;
- IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext()));
+ IType = Type::getInt32Ty(ArgType->getContext());
}
break;
}
@@ -539,8 +533,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
(void)StBuff;
if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands())
break;
- BufferIdx = dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create(
- nullptr, BufferIdx, BuffOffset, "PrintBuffNextPtr", Brnch));
+ BufferIdx = GetElementPtrInst::Create(nullptr, BufferIdx, BuffOffset,
+ "PrintBuffNextPtr", Brnch);
LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
<< *BufferIdx << '\n');
}
@@ -556,7 +550,7 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
return true;
}
-bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
+bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
Triple TT(M.getTargetTriple());
if (TT.getArch() == Triple::r600)
return false;
@@ -585,11 +579,31 @@ bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
}
TD = &M.getDataLayout();
- auto DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
+ return lowerPrintfForGpu(M);
+}
+
+bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
+ auto GetDT = [this](Function &F) -> DominatorTree & {
+ return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ };
auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
};
- return lowerPrintfForGpu(M, GetTLI);
+ return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
+}
+
+PreservedAnalyses
+AMDGPUPrintfRuntimeBindingPass::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetDT = [&FAM](Function &F) -> DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 727f71b35049..2a6ea838efc0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -12,53 +12,15 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/Twine.h"
+#include "GCNSubtarget.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <map>
-#include <tuple>
-#include <utility>
-#include <vector>
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -83,8 +45,26 @@ static cl::opt<unsigned> PromoteAllocaToVectorLimit(
// FIXME: This can create globals so should be a module pass.
class AMDGPUPromoteAlloca : public FunctionPass {
+public:
+ static char ID;
+
+ AMDGPUPromoteAlloca() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
+
+ bool handleAlloca(AllocaInst &I, bool SufficientLDS);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+class AMDGPUPromoteAllocaImpl {
private:
- const TargetMachine *TM;
+ const TargetMachine &TM;
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
@@ -116,28 +96,14 @@ private:
/// Check whether we have enough local memory for promotion.
bool hasSufficientLocalMem(const Function &F);
-public:
- static char ID;
-
- AMDGPUPromoteAlloca() : FunctionPass(ID) {}
-
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
-
- StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
-
bool handleAlloca(AllocaInst &I, bool SufficientLDS);
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- FunctionPass::getAnalysisUsage(AU);
- }
+public:
+ AMDGPUPromoteAllocaImpl(TargetMachine &TM) : TM(TM) {}
+ bool run(Function &F);
};
class AMDGPUPromoteAllocaToVector : public FunctionPass {
-private:
- unsigned MaxVGPRs;
-
public:
static char ID;
@@ -149,8 +115,6 @@ public:
return "AMDGPU Promote Alloca to vector";
}
- bool handleAlloca(AllocaInst &I);
-
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
@@ -171,32 +135,41 @@ INITIALIZE_PASS(AMDGPUPromoteAllocaToVector, DEBUG_TYPE "-to-vector",
char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID;
char &llvm::AMDGPUPromoteAllocaToVectorID = AMDGPUPromoteAllocaToVector::ID;
-bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
- Mod = &M;
- DL = &Mod->getDataLayout();
+bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ return AMDGPUPromoteAllocaImpl(TPC->getTM<TargetMachine>()).run(F);
+ }
return false;
}
-bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
+PreservedAnalyses AMDGPUPromoteAllocaPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = AMDGPUPromoteAllocaImpl(TM).run(F);
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
- if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
- TM = &TPC->getTM<TargetMachine>();
- else
- return false;
+bool AMDGPUPromoteAllocaImpl::run(Function &F) {
+ Mod = F.getParent();
+ DL = &Mod->getDataLayout();
- const Triple &TT = TM->getTargetTriple();
+ const Triple &TT = TM.getTargetTriple();
IsAMDGCN = TT.getArch() == Triple::amdgcn;
IsAMDHSA = TT.getOS() == Triple::AMDHSA;
- const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
if (!ST.isPromoteAllocaEnabled())
return false;
if (IsAMDGCN) {
- const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
} else {
MaxVGPRs = 128;
@@ -221,9 +194,9 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
}
std::pair<Value *, Value *>
-AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
+AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
const Function &F = *Builder.GetInsertBlock()->getParent();
- const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
if (!IsAMDHSA) {
Function *LocalSizeYFn
@@ -308,9 +281,10 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
return std::make_pair(Y, LoadZU);
}
-Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
+Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
+ unsigned N) {
const AMDGPUSubtarget &ST =
- AMDGPUSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());
+ AMDGPUSubtarget::get(TM, *Builder.GetInsertBlock()->getParent());
Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
switch (N) {
@@ -592,11 +566,9 @@ static bool isCallPromotable(CallInst *CI) {
}
}
-bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
- Value *Val,
- Instruction *Inst,
- int OpIdx0,
- int OpIdx1) const {
+bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
+ Value *BaseAlloca, Value *Val, Instruction *Inst, int OpIdx0,
+ int OpIdx1) const {
// Figure out which operand is the one we might not be promoting.
Value *OtherOp = Inst->getOperand(OpIdx0);
if (Val == OtherOp)
@@ -605,7 +577,7 @@ bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
if (isa<ConstantPointerNull>(OtherOp))
return true;
- Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
+ Value *OtherObj = getUnderlyingObject(OtherOp);
if (!isa<AllocaInst>(OtherObj))
return false;
@@ -624,10 +596,8 @@ bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
return true;
}
-bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
- Value *BaseAlloca,
- Value *Val,
- std::vector<Value*> &WorkList) const {
+bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
+ Value *BaseAlloca, Value *Val, std::vector<Value *> &WorkList) const {
for (User *User : Val->users()) {
if (is_contained(WorkList, User))
@@ -727,10 +697,10 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
return true;
}
-bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
+bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {
FunctionType *FTy = F.getFunctionType();
- const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
// If the function has any arguments in the local address space, then it's
// possible these arguments require the entire local memory space, so
@@ -749,35 +719,79 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
if (LocalMemLimit == 0)
return false;
- const DataLayout &DL = Mod->getDataLayout();
+ SmallVector<const Constant *, 16> Stack;
+ SmallPtrSet<const Constant *, 8> VisitedConstants;
+ SmallPtrSet<const GlobalVariable *, 8> UsedLDS;
+
+ auto visitUsers = [&](const GlobalVariable *GV, const Constant *Val) -> bool {
+ for (const User *U : Val->users()) {
+ if (const Instruction *Use = dyn_cast<Instruction>(U)) {
+ if (Use->getParent()->getParent() == &F)
+ return true;
+ } else {
+ const Constant *C = cast<Constant>(U);
+ if (VisitedConstants.insert(C).second)
+ Stack.push_back(C);
+ }
+ }
+
+ return false;
+ };
- // Check how much local memory is being used by global objects
- CurrentLocalMemUsage = 0;
for (GlobalVariable &GV : Mod->globals()) {
if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
continue;
- for (const User *U : GV.users()) {
- const Instruction *Use = dyn_cast<Instruction>(U);
- if (!Use)
- continue;
+ if (visitUsers(&GV, &GV)) {
+ UsedLDS.insert(&GV);
+ Stack.clear();
+ continue;
+ }
- if (Use->getParent()->getParent() == &F) {
- Align Alignment =
- DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
-
- // FIXME: Try to account for padding here. The padding is currently
- // determined from the inverse order of uses in the function. I'm not
- // sure if the use list order is in any way connected to this, so the
- // total reported size is likely incorrect.
- uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
- CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Alignment);
- CurrentLocalMemUsage += AllocSize;
+ // For any ConstantExpr uses, we need to recursively search the users until
+ // we see a function.
+ while (!Stack.empty()) {
+ const Constant *C = Stack.pop_back_val();
+ if (visitUsers(&GV, C)) {
+ UsedLDS.insert(&GV);
+ Stack.clear();
break;
}
}
}
+ const DataLayout &DL = Mod->getDataLayout();
+ SmallVector<std::pair<uint64_t, Align>, 16> AllocatedSizes;
+ AllocatedSizes.reserve(UsedLDS.size());
+
+ for (const GlobalVariable *GV : UsedLDS) {
+ Align Alignment =
+ DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
+ uint64_t AllocSize = DL.getTypeAllocSize(GV->getValueType());
+ AllocatedSizes.emplace_back(AllocSize, Alignment);
+ }
+
+ // Sort to try to estimate the worst case alignment padding
+ //
+ // FIXME: We should really do something to fix the addresses to a more optimal
+ // value instead
+ llvm::sort(AllocatedSizes, [](std::pair<uint64_t, Align> LHS,
+ std::pair<uint64_t, Align> RHS) {
+ return LHS.second < RHS.second;
+ });
+
+ // Check how much local memory is being used by global objects
+ CurrentLocalMemUsage = 0;
+
+ // FIXME: Try to account for padding here. The real padding and address is
+ // currently determined from the inverse order of uses in the function when
+ // legalizing, which could also potentially change. We try to estimate the
+ // worst case here, but we probably should fix the addresses earlier.
+ for (auto Alloc : AllocatedSizes) {
+ CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Alloc.second);
+ CurrentLocalMemUsage += Alloc.first;
+ }
+
unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
F);
@@ -819,7 +833,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
}
// FIXME: Should try to pick the most likely to be profitable allocas first.
-bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
+bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
// Array allocations are probably not worth handling, since an allocation of
// the array type is the canonical form.
if (!I.isStaticAlloca() || I.isArrayAllocation())
@@ -860,7 +874,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
if (!SufficientLDS)
return false;
- const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, ContainingFunction);
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
Align Alignment =
@@ -1039,22 +1053,29 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
return true;
}
-bool AMDGPUPromoteAllocaToVector::runOnFunction(Function &F) {
- if (skipFunction(F) || DisablePromoteAllocaToVector)
+bool handlePromoteAllocaToVector(AllocaInst &I, unsigned MaxVGPRs) {
+ // Array allocations are probably not worth handling, since an allocation of
+ // the array type is the canonical form.
+ if (!I.isStaticAlloca() || I.isArrayAllocation())
return false;
- const TargetMachine *TM;
- if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
- TM = &TPC->getTM<TargetMachine>();
- else
+ LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
+
+ Module *Mod = I.getParent()->getParent()->getParent();
+ return tryPromoteAllocaToVector(&I, Mod->getDataLayout(), MaxVGPRs);
+}
+
+bool promoteAllocasToVector(Function &F, TargetMachine &TM) {
+ if (DisablePromoteAllocaToVector)
return false;
- const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
if (!ST.isPromoteAllocaEnabled())
return false;
- if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
- const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
+ unsigned MaxVGPRs;
+ if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
} else {
MaxVGPRs = 128;
@@ -1070,23 +1091,31 @@ bool AMDGPUPromoteAllocaToVector::runOnFunction(Function &F) {
}
for (AllocaInst *AI : Allocas) {
- if (handleAlloca(*AI))
+ if (handlePromoteAllocaToVector(*AI, MaxVGPRs))
Changed = true;
}
return Changed;
}
-bool AMDGPUPromoteAllocaToVector::handleAlloca(AllocaInst &I) {
- // Array allocations are probably not worth handling, since an allocation of
- // the array type is the canonical form.
- if (!I.isStaticAlloca() || I.isArrayAllocation())
+bool AMDGPUPromoteAllocaToVector::runOnFunction(Function &F) {
+ if (skipFunction(F))
return false;
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ return promoteAllocasToVector(F, TPC->getTM<TargetMachine>());
+ }
+ return false;
+}
- LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
-
- Module *Mod = I.getParent()->getParent()->getParent();
- return tryPromoteAllocaToVector(&I, Mod->getDataLayout(), MaxVGPRs);
+PreservedAnalyses
+AMDGPUPromoteAllocaToVectorPass::run(Function &F, FunctionAnalysisManager &AM) {
+ bool Changed = promoteAllocasToVector(F, TM);
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
}
FunctionPass *llvm::createAMDGPUPromoteAlloca() {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
index 982aae374884..cd71c7a16c73 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -27,16 +27,14 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include <string>
#define DEBUG_TYPE "amdgpu-propagate-attributes"
@@ -56,8 +54,10 @@ static constexpr const FeatureBitset TargetFeatures = {
};
// Attributes to propagate.
+// TODO: Support conservative min/max merging instead of cloning.
static constexpr const char* AttributeNames[] = {
- "amdgpu-waves-per-eu"
+ "amdgpu-waves-per-eu",
+ "amdgpu-flat-work-group-size"
};
static constexpr unsigned NumAttr =
@@ -371,15 +371,28 @@ AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
}
bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
- if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ if (!TM) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ TM = &TPC->getTM<TargetMachine>();
+ }
+
+ if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
return false;
return AMDGPUPropagateAttributes(TM, false).process(F);
}
bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
- if (!TM)
- return false;
+ if (!TM) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ TM = &TPC->getTM<TargetMachine>();
+ }
return AMDGPUPropagateAttributes(TM, true).process(M);
}
@@ -393,3 +406,21 @@ ModulePass
*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
return new AMDGPUPropagateAttributesLate(TM);
}
+
+PreservedAnalyses
+AMDGPUPropagateAttributesEarlyPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ return PreservedAnalyses::all();
+
+ return AMDGPUPropagateAttributes(&TM, false).process(F)
+ ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
+PreservedAnalyses
+AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
+ return AMDGPUPropagateAttributes(&TM, true).process(M)
+ ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 71d82679b3ff..d644c0319286 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -11,19 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
+#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/Debug.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-regbank-combiner"
using namespace llvm;
@@ -39,7 +37,7 @@ namespace {
#include "AMDGPUGenRegBankGICombiner.inc"
#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
-class AMDGPURegBankCombinerInfo : public CombinerInfo {
+class AMDGPURegBankCombinerInfo final : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index dfaf97bfb08e..502356d4f9a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -70,21 +70,17 @@
#include "AMDGPURegisterBankInfo.h"
+#include "AMDGPU.h"
#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#define GET_TARGET_REGBANK_IMPL
#include "AMDGPUGenRegisterBank.inc"
@@ -187,7 +183,12 @@ public:
}
void changingInstr(MachineInstr &MI) override {}
- void changedInstr(MachineInstr &MI) override {}
+ void changedInstr(MachineInstr &MI) override {
+ // FIXME: In principle we should probably add the instruction to NewInsts,
+ // but the way the LegalizerHelper uses the observer, we will always see the
+ // registers we need to set the regbank on also referenced in a new
+ // instruction.
+ }
};
}
@@ -750,6 +751,9 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
for (MachineInstr &MI : Range) {
for (MachineOperand &Def : MI.defs()) {
+ if (MRI.use_nodbg_empty(Def.getReg()))
+ continue;
+
LLT ResTy = MRI.getType(Def.getReg());
const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
ResultRegs.push_back(Def.getReg());
@@ -847,7 +851,18 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
continue;
}
- LLT OpTy = MRI.getType(Op.getReg());
+ Register OpReg = Op.getReg();
+ LLT OpTy = MRI.getType(OpReg);
+
+ const RegisterBank *OpBank = getRegBank(OpReg, MRI, *TRI);
+ if (OpBank != &AMDGPU::VGPRRegBank) {
+ // Insert copy from AGPR to VGPR before the loop.
+ B.setMBB(MBB);
+ OpReg = B.buildCopy(OpTy, OpReg).getReg(0);
+ MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
+ B.setInstr(*I);
+ }
+
unsigned OpSize = OpTy.getSizeInBits();
// Can only do a readlane of 32-bit pieces.
@@ -857,11 +872,11 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
= MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
MRI.setType(CurrentLaneOpReg, OpTy);
- constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
+ constrainGenericRegister(OpReg, AMDGPU::VGPR_32RegClass, MRI);
// Read the next variant <- also loop target.
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
CurrentLaneOpReg)
- .addReg(Op.getReg());
+ .addReg(OpReg);
Register NewCondReg = MRI.createVirtualRegister(WaveRC);
bool First = CondReg == AMDGPU::NoRegister;
@@ -872,7 +887,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
.addDef(NewCondReg)
.addReg(CurrentLaneOpReg)
- .addReg(Op.getReg());
+ .addReg(OpReg);
Op.setReg(CurrentLaneOpReg);
if (!First) {
@@ -904,7 +919,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
// Insert the unmerge before the loop.
B.setMBB(MBB);
- auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
+ auto Unmerge = B.buildUnmerge(UnmergeTy, OpReg);
B.setInstr(*I);
unsigned NumPieces = Unmerge->getNumOperands() - 1;
@@ -1039,7 +1054,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
// Return any unique registers used by \p MI at \p OpIndices that need to be
// handled in a waterfall loop. Returns these registers in \p
-// SGPROperandRegs. Returns true if there are any operansd to handle and a
+// SGPROperandRegs. Returns true if there are any operands to handle and a
// waterfall loop is necessary.
bool AMDGPURegisterBankInfo::collectWaterfallOperands(
SmallSet<Register, 4> &SGPROperandRegs, MachineInstr &MI,
@@ -1048,7 +1063,7 @@ bool AMDGPURegisterBankInfo::collectWaterfallOperands(
assert(MI.getOperand(Op).isUse());
Register Reg = MI.getOperand(Op).getReg();
const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
- if (OpBank->getID() == AMDGPU::VGPRRegBankID)
+ if (OpBank->getID() != AMDGPU::SGPRRegBankID)
SGPROperandRegs.insert(Reg);
}
@@ -1083,16 +1098,24 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const {
Register Reg = MI.getOperand(OpIdx).getReg();
const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
- if (Bank != &AMDGPU::VGPRRegBank)
+ if (Bank == &AMDGPU::SGPRRegBank)
return;
+ LLT Ty = MRI.getType(Reg);
MachineIRBuilder B(MI);
+
+ if (Bank != &AMDGPU::VGPRRegBank) {
+ // We need to copy from AGPR to VGPR
+ Reg = B.buildCopy(Ty, Reg).getReg(0);
+ MRI.setRegBank(Reg, AMDGPU::VGPRRegBank);
+ }
+
Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
B.buildInstr(AMDGPU::V_READFIRSTLANE_B32)
.addDef(SGPR)
.addReg(Reg);
- MRI.setType(SGPR, MRI.getType(Reg));
+ MRI.setType(SGPR, Ty);
const TargetRegisterClass *Constrained =
constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI);
@@ -1149,10 +1172,8 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
// 96-bit loads are only available for vector loads. We need to split this
// into a 64-bit part, and 32 (unless we can widen to a 128-bit load).
- MachineIRBuilder B(MI);
ApplyRegBankMapping O(*this, MRI, &AMDGPU::SGPRRegBank);
- GISelObserverWrapper Observer(&O);
- B.setChangeObserver(Observer);
+ MachineIRBuilder B(MI, O);
if (MMO->getAlign() < Align(16)) {
LLT Part64, Part32;
@@ -1191,13 +1212,10 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
MRI.setType(BasePtrReg, PtrTy);
- MachineIRBuilder B(MI);
-
unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize;
const LLT LoadSplitTy = LoadTy.divide(NumSplitParts);
- ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank);
- GISelObserverWrapper Observer(&O);
- B.setChangeObserver(Observer);
+ ApplyRegBankMapping Observer(*this, MRI, &AMDGPU::VGPRRegBank);
+ MachineIRBuilder B(MI, Observer);
LegalizerHelper Helper(B.getMF(), Observer, B);
if (LoadTy.isVector()) {
@@ -1241,10 +1259,7 @@ bool AMDGPURegisterBankInfo::applyMappingDynStackAlloc(
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register SPReg = Info->getStackPtrOffsetReg();
ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::SGPRRegBank);
- GISelObserverWrapper Observer(&ApplyBank);
-
- MachineIRBuilder B(MI);
- B.setChangeObserver(Observer);
+ MachineIRBuilder B(MI, ApplyBank);
auto WaveSize = B.buildConstant(LLT::scalar(32), ST.getWavefrontSizeLog2());
auto ScaledSize = B.buildShl(IntPtrTy, AllocSize, WaveSize);
@@ -1309,7 +1324,7 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
const LLT S32 = LLT::scalar(32);
MachineRegisterInfo *MRI = B.getMRI();
- if (Optional<int64_t> Imm = getConstantVRegVal(CombinedOffset, *MRI)) {
+ if (Optional<int64_t> Imm = getConstantVRegSExtVal(CombinedOffset, *MRI)) {
uint32_t SOffset, ImmOffset;
if (AMDGPU::splitMUBUFOffset(*Imm, SOffset, ImmOffset, &RBI.Subtarget,
Alignment)) {
@@ -1325,10 +1340,9 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
Register Base;
unsigned Offset;
- MachineInstr *Unused;
- std::tie(Base, Offset, Unused)
- = AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset);
+ std::tie(Base, Offset) =
+ AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset);
uint32_t SOffset, ImmOffset;
if (Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
@@ -1535,9 +1549,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFEIntrinsic(
// The scalar form packs the offset and width in a single operand.
ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::SGPRRegBank);
- GISelObserverWrapper Observer(&ApplyBank);
- MachineIRBuilder B(MI);
- B.setChangeObserver(Observer);
+ MachineIRBuilder B(MI, ApplyBank);
// Ensure the high bits are clear to insert the offset.
auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
@@ -1922,7 +1934,7 @@ bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
const RegisterBank &IdxBank =
*OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
- bool IsDivergentIdx = IdxBank == AMDGPU::VGPRRegBank;
+ bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
LLT VecTy = MRI.getType(VecReg);
unsigned EltSize = VecTy.getScalarSizeInBits();
@@ -2004,7 +2016,7 @@ bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
const RegisterBank &IdxBank =
*OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
- bool IsDivergentIdx = IdxBank == AMDGPU::VGPRRegBank;
+ bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
LLT VecTy = MRI.getType(VecReg);
unsigned EltSize = VecTy.getScalarSizeInBits();
@@ -2129,9 +2141,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Promote SGPR/VGPR booleans to s32
MachineFunction *MF = MI.getParent()->getParent();
ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
- GISelObserverWrapper Observer(&ApplyBank);
- MachineIRBuilder B(MI);
- LegalizerHelper Helper(*MF, Observer, B);
+ MachineIRBuilder B(MI, ApplyBank);
+ LegalizerHelper Helper(*MF, ApplyBank, B);
if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
llvm_unreachable("widen scalar should have succeeded");
@@ -2274,9 +2285,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MachineFunction *MF = MI.getParent()->getParent();
ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
- GISelObserverWrapper Observer(&ApplyBank);
- MachineIRBuilder B(MI);
- LegalizerHelper Helper(*MF, Observer, B);
+ MachineIRBuilder B(MI, ApplyBank);
+ LegalizerHelper Helper(*MF, ApplyBank, B);
if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
LegalizerHelper::Legalized)
@@ -2319,15 +2329,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
setRegsToType(MRI, DefRegs, HalfTy);
- B.buildInstr(Opc)
- .addDef(DefRegs[0])
- .addUse(Src0Regs[0])
- .addUse(Src1Regs[0]);
-
- B.buildInstr(Opc)
- .addDef(DefRegs[1])
- .addUse(Src0Regs[1])
- .addUse(Src1Regs[1]);
+ B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]});
+ B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]});
MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
MI.eraseFromParent();
@@ -2355,13 +2358,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
const LLT S32 = LLT::scalar(32);
MachineBasicBlock *MBB = MI.getParent();
MachineFunction *MF = MBB->getParent();
- MachineIRBuilder B(MI);
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
- GISelObserverWrapper Observer(&ApplySALU);
+ MachineIRBuilder B(MI, ApplySALU);
if (DstTy.isVector()) {
- B.setChangeObserver(Observer);
-
Register WideSrc0Lo, WideSrc0Hi;
Register WideSrc1Lo, WideSrc1Hi;
@@ -2374,7 +2374,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});
MI.eraseFromParent();
} else {
- LegalizerHelper Helper(*MF, Observer, B);
+ LegalizerHelper Helper(*MF, ApplySALU, B);
if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
llvm_unreachable("widen scalar should have succeeded");
@@ -2411,8 +2411,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (Ty == V2S16) {
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
- GISelObserverWrapper Observer(&ApplySALU);
- B.setChangeObserver(Observer);
+ B.setChangeObserver(ApplySALU);
// Need to widen to s32, and expand as cmp + select, and avoid producing
// illegal vector extends or unmerges that would need further
@@ -2444,8 +2443,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MI.eraseFromParent();
} else if (Ty == S16) {
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
- GISelObserverWrapper Observer(&ApplySALU);
- LegalizerHelper Helper(*MF, Observer, B);
+ B.setChangeObserver(ApplySALU);
+ LegalizerHelper Helper(*MF, ApplySALU, B);
// Need to widen to s32, and expand as cmp + select.
if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
@@ -2499,9 +2498,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_CTPOP:
case AMDGPU::G_CTLZ_ZERO_UNDEF:
case AMDGPU::G_CTTZ_ZERO_UNDEF: {
- MachineIRBuilder B(MI);
- MachineFunction &MF = B.getMF();
-
const RegisterBank *DstBank =
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
if (DstBank == &AMDGPU::SGPRRegBank)
@@ -2514,8 +2510,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
break;
ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
- GISelObserverWrapper Observer(&ApplyVALU);
- LegalizerHelper Helper(MF, Observer, B);
+ MachineIRBuilder B(MI, ApplyVALU);
+
+ MachineFunction &MF = B.getMF();
+ LegalizerHelper Helper(MF, ApplyVALU, B);
if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized)
llvm_unreachable("narrowScalar should have succeeded");
@@ -2693,8 +2691,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
Register BaseIdxReg;
unsigned ConstOffset;
- MachineInstr *OffsetDef;
- std::tie(BaseIdxReg, ConstOffset, OffsetDef) =
+ std::tie(BaseIdxReg, ConstOffset) =
AMDGPU::getBaseWithConstantOffset(MRI, MI.getOperand(2).getReg());
// See if the index is an add of a constant which will be foldable by moving
@@ -2825,9 +2822,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
Register BaseIdxReg;
unsigned ConstOffset;
- MachineInstr *OffsetDef;
- std::tie(BaseIdxReg, ConstOffset, OffsetDef) =
- AMDGPU::getBaseWithConstantOffset(MRI, MI.getOperand(3).getReg());
+ std::tie(BaseIdxReg, ConstOffset) =
+ AMDGPU::getBaseWithConstantOffset(MRI, MI.getOperand(3).getReg());
// See if the index is an add of a constant which will be foldable by moving
// the base register of the index later if this is going to be executed in a
@@ -2957,6 +2953,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
executeInWaterfallLoop(MI, MRI, {2, 5});
return;
}
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, {2, 5});
+ return;
+ }
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(MI, MRI, {3, 6});
@@ -2989,7 +2990,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 3); // Index
return;
}
- case Intrinsic::amdgcn_ballot:
case Intrinsic::amdgcn_interp_p1:
case Intrinsic::amdgcn_interp_p2:
case Intrinsic::amdgcn_interp_mov:
@@ -3017,6 +3017,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case Intrinsic::amdgcn_ubfe:
applyMappingBFEIntrinsic(OpdMapper, false);
return;
+ case Intrinsic::amdgcn_ballot:
+ // Use default handling and insert copy to vcc source.
+ break;
}
break;
}
@@ -3031,6 +3034,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
return;
}
+ case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
+ unsigned N = MI.getNumExplicitOperands() - 2;
+ executeInWaterfallLoop(MI, MRI, { N });
+ return;
+ }
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
auto IntrID = MI.getIntrinsicID();
switch (IntrID) {
@@ -3106,6 +3114,59 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return applyDefaultMapping(OpdMapper);
}
+// vgpr, sgpr -> vgpr
+// vgpr, agpr -> vgpr
+// agpr, agpr -> agpr
+// agpr, sgpr -> vgpr
+static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
+ if (RB0 == AMDGPU::InvalidRegBankID)
+ return RB1;
+ if (RB1 == AMDGPU::InvalidRegBankID)
+ return RB0;
+
+ if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
+ return AMDGPU::SGPRRegBankID;
+
+ if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
+ return AMDGPU::AGPRRegBankID;
+
+ return AMDGPU::VGPRRegBankID;
+}
+
+static unsigned regBankBoolUnion(unsigned RB0, unsigned RB1) {
+ if (RB0 == AMDGPU::InvalidRegBankID)
+ return RB1;
+ if (RB1 == AMDGPU::InvalidRegBankID)
+ return RB0;
+
+ // vcc, vcc -> vcc
+ // vcc, sgpr -> vcc
+ // vcc, vgpr -> vcc
+ if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
+ return AMDGPU::VCCRegBankID;
+
+ // vcc, vgpr -> vgpr
+ return regBankUnion(RB0, RB1);
+}
+
+unsigned AMDGPURegisterBankInfo::getMappingType(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI) const {
+ unsigned RegBank = AMDGPU::InvalidRegBankID;
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
+ Register Reg = MI.getOperand(i).getReg();
+ if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
+ RegBank = regBankUnion(RegBank, Bank->getID());
+ if (RegBank == AMDGPU::VGPRRegBankID)
+ break;
+ }
+ }
+
+ return RegBank;
+}
+
bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -3214,7 +3275,7 @@ AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
if (MustBeSGPR) {
// If this must be an SGPR, so we must report whatever it is as legal.
- unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ unsigned NewBank = getRegBankID(OpReg, MRI, AMDGPU::SGPRRegBankID);
OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);
} else {
// Some operands must be VGPR, and these are easy to copy to.
@@ -3232,7 +3293,7 @@ AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI,
LLT PtrTy = MRI.getType(PtrReg);
unsigned Size = PtrTy.getSizeInBits();
if (Subtarget.useFlatForGlobal() ||
- !SITargetLowering::isFlatGlobalAddrSpace(PtrTy.getAddressSpace()))
+ !AMDGPU::isFlatGlobalAddrSpace(PtrTy.getAddressSpace()))
return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
// If we're using MUBUF instructions for global memory, an SGPR base register
@@ -3258,8 +3319,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
- if (PtrBank == &AMDGPU::SGPRRegBank &&
- SITargetLowering::isFlatGlobalAddrSpace(AS)) {
+ if (PtrBank == &AMDGPU::SGPRRegBank && AMDGPU::isFlatGlobalAddrSpace(AS)) {
if (isScalarLoadLegal(MI)) {
// We have a uniform instruction so we want to use an SMRD load
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
@@ -3292,41 +3352,18 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
unsigned
AMDGPURegisterBankInfo::getRegBankID(Register Reg,
const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
unsigned Default) const {
- const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
+ const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
return Bank ? Bank->getID() : Default;
}
-
-static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
- return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
- AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-}
-
-static int regBankBoolUnion(int RB0, int RB1) {
- if (RB0 == -1)
- return RB1;
- if (RB1 == -1)
- return RB0;
-
- // vcc, vcc -> vcc
- // vcc, sgpr -> vcc
- // vcc, vgpr -> vcc
- if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
- return AMDGPU::VCCRegBankID;
-
- // vcc, vgpr -> vgpr
- return regBankUnion(RB0, RB1);
-}
-
const RegisterBankInfo::ValueMapping *
AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
// Lie and claim anything is legal, even though this needs to be an SGPR
// applyMapping will have to deal with it as a waterfall loop.
- unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID);
+ unsigned Bank = getRegBankID(Reg, MRI, AMDGPU::SGPRRegBankID);
unsigned Size = getSizeInBits(Reg, MRI, TRI);
return AMDGPU::getValueMapping(Bank, Size);
}
@@ -3361,7 +3398,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (MI.isCopy()) {
+ if (MI.isCopy() || MI.getOpcode() == AMDGPU::G_FREEZE) {
// The default logic bothers to analyze impossible alternative mappings. We
// want the most straightforward mapping, so just directly handle this.
const RegisterBank *DstBank = getRegBank(MI.getOperand(0).getReg(), MRI,
@@ -3377,9 +3414,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getInvalidInstructionMapping();
const ValueMapping &ValMap = getValueMapping(0, Size, *DstBank);
+ unsigned OpdsMappingSize = MI.isCopy() ? 1 : 2;
+ SmallVector<const ValueMapping *, 1> OpdsMapping(OpdsMappingSize);
+ OpdsMapping[0] = &ValMap;
+ if (MI.getOpcode() == AMDGPU::G_FREEZE)
+ OpdsMapping[1] = &ValMap;
+
return getInstructionMapping(
1, /*Cost*/ 1,
- /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
+ /*OperandsMapping*/ getOperandsMapping(OpdsMapping), OpdsMappingSize);
}
if (MI.isRegSequence()) {
@@ -3388,7 +3431,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned BankID = AMDGPU::SGPRRegBankID;
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
- auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI, *TRI);
+ auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI);
// It doesn't make sense to use vcc or scc banks here, so just ignore
// them.
if (OpBank != AMDGPU::SGPRRegBankID) {
@@ -3409,8 +3452,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
//
// TODO: There are additional exec masking dependencies to analyze.
if (MI.getOpcode() == TargetOpcode::G_PHI) {
- // TODO: Generate proper invalid bank enum.
- int ResultBank = -1;
+ unsigned ResultBank = AMDGPU::InvalidRegBankID;
Register DstReg = MI.getOperand(0).getReg();
// Sometimes the result may have already been assigned a bank.
@@ -3432,7 +3474,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
ResultBank = regBankBoolUnion(ResultBank, OpBank);
}
- assert(ResultBank != -1);
+ assert(ResultBank != AMDGPU::InvalidRegBankID);
unsigned Size = MRI.getType(DstReg).getSizeInBits();
@@ -3461,9 +3503,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const RegisterBank *DstBank
= getRegBank(MI.getOperand(0).getReg(), MRI, *TRI);
- unsigned TargetBankID = -1;
- unsigned BankLHS = -1;
- unsigned BankRHS = -1;
+ unsigned TargetBankID = AMDGPU::InvalidRegBankID;
+ unsigned BankLHS = AMDGPU::InvalidRegBankID;
+ unsigned BankRHS = AMDGPU::InvalidRegBankID;
if (DstBank) {
TargetBankID = DstBank->getID();
if (DstBank == &AMDGPU::VCCRegBank) {
@@ -3471,15 +3513,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
BankLHS = AMDGPU::VCCRegBankID;
BankRHS = AMDGPU::VCCRegBankID;
} else {
- BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI,
AMDGPU::SGPRRegBankID);
- BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI,
AMDGPU::SGPRRegBankID);
}
} else {
- BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI,
AMDGPU::VCCRegBankID);
- BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI,
AMDGPU::VCCRegBankID);
// Both inputs should be true booleans to produce a boolean result.
@@ -3507,10 +3549,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
} else {
OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
- unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
+ unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI /*, DefaultBankID*/);
OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
- unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
+ unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI /*, DefaultBankID*/);
OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
}
@@ -3542,6 +3584,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getDefaultMappingSOP(MI);
LLVM_FALLTHROUGH;
+ case AMDGPU::G_SADDSAT: // FIXME: Could lower sat ops for SALU
+ case AMDGPU::G_SSUBSAT:
+ case AMDGPU::G_UADDSAT:
+ case AMDGPU::G_USUBSAT:
case AMDGPU::G_FADD:
case AMDGPU::G_FSUB:
case AMDGPU::G_FPTOSI:
@@ -3606,13 +3652,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_DYN_STACKALLOC: {
// Result is always uniform, and a wave reduction is needed for the source.
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
- unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
break;
}
case AMDGPU::G_INSERT: {
- unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
- AMDGPU::VGPRRegBankID;
+ unsigned BankID = getMappingType(MRI, MI);
unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
@@ -3623,7 +3668,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_EXTRACT: {
- unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
@@ -3637,8 +3682,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
if (DstTy == LLT::vector(2, 16)) {
unsigned DstSize = DstTy.getSizeInBits();
unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
- unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
+ unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI);
unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
@@ -3651,8 +3696,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_MERGE_VALUES:
case AMDGPU::G_CONCAT_VECTORS: {
- unsigned Bank = isSALUMapping(MI) ?
- AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+ unsigned Bank = getMappingType(MRI, MI);
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
@@ -3669,7 +3713,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FABS:
case AMDGPU::G_FNEG: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
break;
}
@@ -3677,7 +3721,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_CTTZ_ZERO_UNDEF:
case AMDGPU::G_CTPOP: {
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
// This should really be getValueMappingSGPR64Only, but allowing the generic
@@ -3689,7 +3733,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_TRUNC: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
- unsigned Bank = getRegBankID(Src, MRI, *TRI);
+ unsigned Bank = getRegBankID(Src, MRI);
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
@@ -3726,7 +3770,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_FCMP: {
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI);
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
OpdsMapping[1] = nullptr; // Predicate Operand.
OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
@@ -3751,10 +3795,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// See if the result register has already been constrained to vcc, which may
// happen due to control flow intrinsic lowering.
- unsigned DstBank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
+ unsigned DstBank = getRegBankID(MI.getOperand(0).getReg(), MRI,
AMDGPU::SGPRRegBankID);
- unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
- unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+ unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI);
+ unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI);
bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
Op2Bank == AMDGPU::SGPRRegBankID &&
@@ -3777,11 +3821,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_EXTRACT_VECTOR_ELT: {
// VGPR index can be used for waterfall when indexing a SGPR vector.
- unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI);
unsigned OutputBankID = regBankUnion(SrcBankID, IdxBank);
OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
@@ -3798,9 +3842,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
- unsigned InsertEltBankID = getRegBankID(MI.getOperand(2).getReg(),
- MRI, *TRI);
- unsigned IdxBankID = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+ unsigned InsertEltBankID = getRegBankID(MI.getOperand(2).getReg(), MRI);
+ unsigned IdxBankID = getRegBankID(MI.getOperand(3).getReg(), MRI);
OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
@@ -3820,8 +3863,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_UNMERGE_VALUES: {
- unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
- AMDGPU::VGPRRegBankID;
+ unsigned Bank = getMappingType(MRI, MI);
// Op1 and Dst should use the same register bank.
// FIXME: Shouldn't this be the default? Why do we need to handle this?
@@ -3876,7 +3918,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
- case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: {
// vdata_out
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
@@ -3958,6 +4001,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_rsq_clamp:
case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fma_legacy:
case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_frexp_exp:
@@ -4011,10 +4055,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_wwm:
case Intrinsic::amdgcn_wqm:
case Intrinsic::amdgcn_softwqm:
+ case Intrinsic::amdgcn_set_inactive:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_kernarg_segment_ptr:
case Intrinsic::amdgcn_s_getpc:
- case Intrinsic::amdgcn_groupstaticsize: {
+ case Intrinsic::amdgcn_groupstaticsize:
+ case Intrinsic::amdgcn_reloc_constant:
+ case Intrinsic::returnaddress: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
@@ -4065,7 +4112,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// This must be an SGPR, but accept a VGPR.
Register IdxReg = MI.getOperand(3).getReg();
unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
- unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ unsigned IdxBank = getRegBankID(IdxReg, MRI, AMDGPU::SGPRRegBankID);
OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
LLVM_FALLTHROUGH;
}
@@ -4080,10 +4127,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
Register SrcReg = MI.getOperand(2).getReg();
unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
- unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ unsigned SrcBank = getRegBankID(SrcReg, MRI, AMDGPU::SGPRRegBankID);
Register IdxReg = MI.getOperand(3).getReg();
unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
- unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ unsigned IdxBank = getRegBankID(IdxReg, MRI, AMDGPU::SGPRRegBankID);
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
// These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
@@ -4149,7 +4196,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_interp_p2_f16: {
const int M0Idx = MI.getNumOperands() - 1;
Register M0Reg = MI.getOperand(M0Idx).getReg();
- unsigned M0Bank = getRegBankID(M0Reg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ unsigned M0Bank = getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID);
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
@@ -4182,6 +4229,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
assert(RSrcIntrin->IsImage);
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
}
+ case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
+ unsigned N = MI.getNumExplicitOperands() - 2;
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
+ OpdsMapping[N] = getSGPROpMapping(MI.getOperand(N).getReg(), MRI, *TRI);
+ for (unsigned I = 2; I < N; ++I)
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ break;
+ }
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
auto IntrID = MI.getIntrinsicID();
switch (IntrID) {
@@ -4193,15 +4248,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
- case Intrinsic::amdgcn_ds_fadd:
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax:
+ case Intrinsic::amdgcn_global_atomic_fadd:
+ case Intrinsic::amdgcn_global_atomic_csub:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
- unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI,
AMDGPU::SGPRRegBankID);
OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
@@ -4228,14 +4282,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// This must be an SGPR, but accept a VGPR.
- unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI,
AMDGPU::SGPRRegBankID);
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
case Intrinsic::amdgcn_s_setreg: {
// This must be an SGPR, but accept a VGPR.
- unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI,
AMDGPU::SGPRRegBankID);
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
@@ -4304,7 +4358,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
// This must be an SGPR, but accept a VGPR.
- unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI,
AMDGPU::SGPRRegBankID);
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
@@ -4313,7 +4367,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_ds_gws_sema_p:
case Intrinsic::amdgcn_ds_gws_sema_release_all: {
// This must be an SGPR, but accept a VGPR.
- unsigned Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ unsigned Bank = getRegBankID(MI.getOperand(1).getReg(), MRI,
AMDGPU::SGPRRegBankID);
OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
break;
@@ -4325,16 +4379,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_SELECT: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI,
AMDGPU::SGPRRegBankID);
- unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI,
+ unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI,
AMDGPU::SGPRRegBankID);
bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
Op3Bank == AMDGPU::SGPRRegBankID;
unsigned CondBankDefault = SGPRSrcs ?
AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
- unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI,
CondBankDefault);
if (CondBank == AMDGPU::SGPRRegBankID)
CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
@@ -4380,7 +4434,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ATOMICRMW_FADD:
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
case AMDGPU::G_AMDGPU_ATOMIC_INC:
- case AMDGPU::G_AMDGPU_ATOMIC_DEC: {
+ case AMDGPU::G_AMDGPU_ATOMIC_DEC:
+ case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
+ case AMDGPU::G_AMDGPU_ATOMIC_FMAX: {
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
@@ -4394,7 +4450,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_BRCOND: {
- unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
+ unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI,
AMDGPU::SGPRRegBankID);
assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
if (Bank != AMDGPU::SGPRRegBankID)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 8f38ec4eeb3a..1c1441729e30 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -20,7 +20,6 @@
#define GET_REGBANK_DECLARATIONS
#include "AMDGPUGenRegisterBank.inc"
-#undef GET_REGBANK_DECLARATIONS
namespace llvm {
@@ -39,7 +38,8 @@ protected:
#define GET_TARGET_REGBANK_CLASS
#include "AMDGPUGenRegisterBank.inc"
};
-class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
+
+class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
public:
const GCNSubtarget &Subtarget;
const SIRegisterInfo *TRI;
@@ -105,7 +105,6 @@ public:
getInstrMappingForLoad(const MachineInstr &MI) const;
unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
unsigned Default = AMDGPU::VGPRRegBankID) const;
// Return a value mapping for an operand that is required to be an SGPR.
@@ -150,6 +149,9 @@ public:
getInstrAlternativeMappingsIntrinsicWSideEffects(
const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+ unsigned getMappingType(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI) const;
+
bool isSALUMapping(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 9f6ebd00cd97..6c70b53b23c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
def SGPRRegBank : RegisterBank<"SGPR",
- [SReg_LO16, SReg_32, SReg_64, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024]
+ [SReg_LO16, SReg_32, SReg_64, SReg_96, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024]
>;
def VGPRRegBank : RegisterBank<"VGPR",
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 9c3d96de6d68..e2aafa25142e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -43,35 +43,16 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <utility>
#define DEBUG_TYPE "amdgpu-rewrite-out-arguments"
@@ -303,8 +284,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
for (ReturnInst *RI : Returns) {
BasicBlock *BB = RI->getParent();
- MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg),
- true, BB->end(), BB, RI);
+ MemDepResult Q = MDA->getPointerDependencyFrom(
+ MemoryLocation::getBeforeOrAfter(OutArg), true, BB->end(), BB, RI);
StoreInst *SI = nullptr;
if (Q.isDef())
SI = dyn_cast<StoreInst>(Q.getInst());
@@ -325,9 +306,10 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
Value *ReplVal = Store.second->getValueOperand();
auto &ValVec = Replacements[Store.first];
- if (llvm::find_if(ValVec,
- [OutArg](const std::pair<Argument *, Value *> &Entry) {
- return Entry.first == OutArg;}) != ValVec.end()) {
+ if (llvm::any_of(ValVec,
+ [OutArg](const std::pair<Argument *, Value *> &Entry) {
+ return Entry.first == OutArg;
+ })) {
LLVM_DEBUG(dbgs()
<< "Saw multiple out arg stores" << *OutArg << '\n');
// It is possible to see stores to the same argument multiple times,
@@ -408,8 +390,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (DL->getTypeSizeInBits(EffectiveEltTy) !=
DL->getTypeSizeInBits(Val->getType())) {
assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
- Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()),
- ArrayRef<int>{0, 1, 2});
+ Val = B.CreateShuffleVector(Val, ArrayRef<int>{0, 1, 2});
}
Val = B.CreateBitCast(Val, EffectiveEltTy);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index bc68310b2f5c..fd65727f04d4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -225,6 +225,7 @@ def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_inc>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_dec>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
@@ -238,6 +239,7 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_inc>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_dec>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_buffer_atomic_csub>;
def : SourceOfDivergence<int_amdgcn_ps_live>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 213788ae0f67..f1a7d7463676 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -13,18 +13,21 @@
#include "AMDGPUSubtarget.h"
#include "AMDGPU.h"
-#include "AMDGPUTargetMachine.h"
#include "AMDGPUCallLowering.h"
#include "AMDGPUInstructionSelector.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
+#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/IR/MDBuilder.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -50,6 +53,15 @@ static cl::opt<bool> EnableVGPRIndexMode(
cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
cl::init(false));
+static cl::opt<bool> EnableFlatScratch(
+ "amdgpu-enable-flat-scratch",
+ cl::desc("Use flat scratch instructions"),
+ cl::init(false));
+
+static cl::opt<bool> UseAA("amdgpu-use-aa-in-codegen",
+ cl::desc("Enable the use of AA during codegen."),
+ cl::init(true));
+
GCNSubtarget::~GCNSubtarget() = default;
R600Subtarget &
@@ -57,7 +69,7 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
SmallString<256> FullFS("+promote-alloca,");
FullFS += FS;
- ParseSubtargetFeatures(GPU, FullFS);
+ ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
HasMulU24 = getGeneration() >= EVERGREEN;
HasMulI24 = hasCaymanISA();
@@ -77,11 +89,11 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
// Similarly we want enable-prt-strict-null to be on by default and not to
// unset everything else if it is disabled
- // Assuming ECC is enabled is the conservative default.
- SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
+ SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,");
- if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
- FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
+ // Turn on features that HSA ABI requires. Also turn on FlatForGlobal by default
+ if (isAmdHsaOS())
+ FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
@@ -97,17 +109,38 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += FS;
- ParseSubtargetFeatures(GPU, FullFS);
+ ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
+
+ // Implement the "generic" processors, which acts as the default when no
+ // generation features are enabled (e.g for -mcpu=''). HSA OS defaults to
+ // the first amdgcn target that supports flat addressing. Other OSes defaults
+ // to the first amdgcn target.
+ if (Gen == AMDGPUSubtarget::INVALID) {
+ Gen = TT.getOS() == Triple::AMDHSA ? AMDGPUSubtarget::SEA_ISLANDS
+ : AMDGPUSubtarget::SOUTHERN_ISLANDS;
+ }
// We don't support FP64 for EG/NI atm.
assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
- // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
- // on VI and newer hardware to avoid assertion failures due to missing ADDR64
- // variants of MUBUF instructions.
- if (!hasAddr64() && !FS.contains("flat-for-global")) {
+ // Targets must either support 64-bit offsets for MUBUF instructions, and/or
+ // support flat operations, otherwise they cannot access a 64-bit global
+ // address space
+ assert(hasAddr64() || hasFlat());
+ // Unless +-flat-for-global is specified, turn on FlatForGlobal for targets
+ // that do not support ADDR64 variants of MUBUF instructions. Such targets
+ // cannot use a 64 bit offset with a MUBUF instruction to access the global
+ // address space
+ if (!hasAddr64() && !FS.contains("flat-for-global") && !FlatForGlobal) {
+ ToggleFeature(AMDGPU::FeatureFlatForGlobal);
FlatForGlobal = true;
}
+ // Unless +-flat-for-global is specified, use MUBUF instructions for global
+ // address space access if flat operations are not available.
+ if (!hasFlat() && !FS.contains("flat-for-global") && FlatForGlobal) {
+ ToggleFeature(AMDGPU::FeatureFlatForGlobal);
+ FlatForGlobal = false;
+ }
// Set defaults if needed.
if (MaxPrivateElementSize == 0)
@@ -131,20 +164,12 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
- // Disable XNACK on targets where it is not enabled by default unless it is
- // explicitly requested.
- if (!FS.contains("+xnack") && DoesNotSupportXNACK && EnableXNACK) {
- ToggleFeature(AMDGPU::FeatureXNACK);
- EnableXNACK = false;
- }
+ TargetID.setTargetIDFromFeaturesString(FS);
- // ECC is on by default, but turn it off if the hardware doesn't support it
- // anyway. This matters for the gfx9 targets with d16 loads, but don't support
- // ECC.
- if (DoesNotSupportSRAMECC && EnableSRAMECC) {
- ToggleFeature(AMDGPU::FeatureSRAMECC);
- EnableSRAMECC = false;
- }
+ LLVM_DEBUG(dbgs() << "xnack setting for subtarget: "
+ << TargetID.getXnackSetting() << '\n');
+ LLVM_DEBUG(dbgs() << "sramecc setting for subtarget: "
+ << TargetID.getSramEccSetting() << '\n');
return *this;
}
@@ -170,10 +195,11 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const GCNTargetMachine &TM) :
- AMDGPUGenSubtargetInfo(TT, GPU, FS),
+ AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS),
AMDGPUSubtarget(TT),
TargetTriple(TT),
- Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS),
+ TargetID(*this),
+ Gen(INVALID),
InstrItins(getInstrItineraryForCPU(GPU)),
LDSBankCount(0),
MaxPrivateElementSize(0),
@@ -184,13 +210,12 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
- CodeObjectV3(false),
UnalignedScratchAccess(false),
- UnalignedBufferAccess(false),
+ UnalignedAccessMode(false),
HasApertureRegs(false),
+ SupportsXNACK(false),
EnableXNACK(false),
- DoesNotSupportXNACK(false),
EnableCuMode(false),
TrapHandler(false),
@@ -239,8 +264,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasMAIInsts(false),
HasPkFmacF16Inst(false),
HasAtomicFaddInsts(false),
+ SupportsSRAMECC(false),
EnableSRAMECC(false),
- DoesNotSupportSRAMECC(false),
HasNoSdstCMPX(false),
HasVscnt(false),
HasGetWaveIdInst(false),
@@ -257,6 +282,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasUnpackedD16VMem(false),
LDSMisalignedBug(false),
HasMFMAInlineLiteralBug(false),
+ UnalignedBufferAccess(false),
+ UnalignedDSAccess(false),
ScalarizeGlobal(false),
@@ -269,6 +296,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasNSAtoVMEMBug(false),
HasOffset3fBug(false),
HasFlatSegmentOffsetBug(false),
+ HasImageStoreD16Bug(false),
+ HasImageGather4D16Bug(false),
FeatureDisable(false),
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
@@ -283,20 +312,24 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}
+bool GCNSubtarget::enableFlatScratch() const {
+ return EnableFlatScratch && hasFlatScratchInsts();
+}
+
unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
if (getGeneration() < GFX10)
return 1;
switch (Opcode) {
- case AMDGPU::V_LSHLREV_B64:
+ case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
- case AMDGPU::V_LSHL_B64:
- case AMDGPU::V_LSHRREV_B64:
+ case AMDGPU::V_LSHL_B64_e64:
+ case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
- case AMDGPU::V_LSHR_B64:
- case AMDGPU::V_ASHRREV_I64:
+ case AMDGPU::V_LSHR_B64_e64:
+ case AMDGPU::V_ASHRREV_I64_e64:
case AMDGPU::V_ASHRREV_I64_gfx10:
- case AMDGPU::V_ASHR_I64:
+ case AMDGPU::V_ASHR_I64_e64:
return 1;
}
@@ -436,6 +469,25 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
return Requested;
}
+static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim) {
+ auto Node = Kernel.getMetadata("reqd_work_group_size");
+ if (Node && Node->getNumOperands() == 3)
+ return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
+ return std::numeric_limits<unsigned>::max();
+}
+
+bool AMDGPUSubtarget::isMesaKernel(const Function &F) const {
+ return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
+}
+
+unsigned AMDGPUSubtarget::getMaxWorkitemID(const Function &Kernel,
+ unsigned Dimension) const {
+ unsigned ReqdSize = getReqdWorkGroupSize(Kernel, Dimension);
+ if (ReqdSize != std::numeric_limits<unsigned>::max())
+ return ReqdSize - 1;
+ return getFlatWorkGroupSizes(Kernel).second - 1;
+}
+
bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
Function *Kernel = I->getParent()->getParent();
unsigned MinSize = 0;
@@ -472,11 +524,11 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
default:
break;
}
+
if (Dim <= 3) {
- if (auto Node = Kernel->getMetadata("reqd_work_group_size"))
- if (Node->getNumOperands() == 3)
- MinSize = MaxSize = mdconst::extract<ConstantInt>(
- Node->getOperand(Dim))->getZExtValue();
+ unsigned ReqdSize = getReqdWorkGroupSize(*Kernel, Dim);
+ if (ReqdSize != std::numeric_limits<unsigned>::max())
+ MinSize = MaxSize = ReqdSize;
}
}
}
@@ -498,6 +550,12 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
return true;
}
+unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
+ if (isMesaKernel(F))
+ return 16;
+ return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
+}
+
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
Align &MaxAlign) const {
assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
@@ -508,12 +566,15 @@ uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
MaxAlign = Align(1);
for (const Argument &Arg : F.args()) {
- Type *ArgTy = Arg.getType();
+ const bool IsByRef = Arg.hasByRefAttr();
+ Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
+ MaybeAlign Alignment = IsByRef ? Arg.getParamAlign() : None;
+ if (!Alignment)
+ Alignment = DL.getABITypeAlign(ArgTy);
- const Align Alignment = DL.getABITypeAlign(ArgTy);
uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
- MaxAlign = std::max(MaxAlign, Alignment);
+ MaxAlign = max(MaxAlign, Alignment);
}
return ExplicitArgBytes;
@@ -536,9 +597,14 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
return alignTo(TotalSize, 4);
}
+AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour() const {
+ return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32
+ : AMDGPUDwarfFlavour::Wave64;
+}
+
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM) :
- R600GenSubtargetInfo(TT, GPU, FS),
+ R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/GPU, FS),
AMDGPUSubtarget(TT),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
@@ -571,13 +637,15 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
}
bool GCNSubtarget::hasMadF16() const {
- return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
+ return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1;
}
bool GCNSubtarget::useVGPRIndexMode() const {
return !hasMovrel() || (EnableVGPRIndexMode && hasVGPRIndexMode());
}
+bool GCNSubtarget::useAA() const { return UseAA; }
+
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return getMaxWavesPerEU();
@@ -787,7 +855,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
for (unsigned I = 0; I < Succs.size(); ++I) {
for (const SDep &SI : Succs[I]->Succs) {
const SUnit *SU = SI.getSUnit();
- if (SU != Succs[I] && llvm::find(Succs, SU) == Succs.end())
+ if (SU != Succs[I] && !llvm::is_contained(Succs, SU))
Succs.push_back(SU);
}
}
@@ -795,7 +863,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
SmallPtrSet<const SUnit*, 32> Visited;
while (!Preds.empty()) {
const SUnit *SU = Preds.pop_back_val();
- if (llvm::find(Succs, SU) != Succs.end())
+ if (llvm::is_contained(Succs, SU))
return false;
Visited.insert(SU);
for (const SDep &SI : SU->Preds)
@@ -859,8 +927,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
for (SUnit &SU : DAG->SUnits) {
MachineInstr &MAI = *SU.getInstr();
if (!TII->isMAI(MAI) ||
- MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32 ||
- MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32)
+ MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
+ MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64)
continue;
unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index c833bfbcf936..ba3a8acae551 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
+//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,58 +7,38 @@
//==-----------------------------------------------------------------------===//
//
/// \file
-/// AMDGPU specific subclass of TargetSubtarget.
+/// Base class for AMDGPU specific classes of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
-#include "AMDGPU.h"
-#include "AMDGPUCallLowering.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "R600FrameLowering.h"
-#include "R600ISelLowering.h"
-#include "R600InstrInfo.h"
-#include "SIFrameLowering.h"
-#include "SIISelLowering.h"
-#include "SIInstrInfo.h"
-#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-#include <cstdint>
-#include <memory>
-#include <utility>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "AMDGPUGenSubtargetInfo.inc"
-#define GET_SUBTARGETINFO_HEADER
-#include "R600GenSubtargetInfo.inc"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Support/Alignment.h"
namespace llvm {
-class StringRef;
+enum AMDGPUDwarfFlavour : unsigned;
+class Function;
+class Instruction;
+class MachineFunction;
+class TargetMachine;
class AMDGPUSubtarget {
public:
enum Generation {
- R600 = 0,
- R700 = 1,
- EVERGREEN = 2,
- NORTHERN_ISLANDS = 3,
- SOUTHERN_ISLANDS = 4,
- SEA_ISLANDS = 5,
- VOLCANIC_ISLANDS = 6,
- GFX9 = 7,
- GFX10 = 8
+ INVALID = 0,
+ R600 = 1,
+ R700 = 2,
+ EVERGREEN = 3,
+ NORTHERN_ISLANDS = 4,
+ SOUTHERN_ISLANDS = 5,
+ SEA_ISLANDS = 6,
+ VOLCANIC_ISLANDS = 7,
+ GFX9 = 8,
+ GFX10 = 9
};
private:
@@ -78,7 +58,7 @@ protected:
bool EnablePromoteAlloca;
bool HasTrigReducedRange;
unsigned MaxWavesPerEU;
- int LocalMemorySize;
+ unsigned LocalMemorySize;
char WavefrontSizeLog2;
public:
@@ -134,9 +114,7 @@ public:
return TargetTriple.getOS() == Triple::Mesa3D;
}
- bool isMesaKernel(const Function &F) const {
- return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
- }
+ bool isMesaKernel(const Function &F) const;
bool isAmdHsaOrMesa(const Function &F) const {
return isAmdHsaOS() || isMesaKernel(F);
@@ -202,7 +180,7 @@ public:
return WavefrontSizeLog2;
}
- int getLocalMemorySize() const {
+ unsigned getLocalMemorySize() const {
return LocalMemorySize;
}
@@ -239,1150 +217,26 @@ public:
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
- /// Creates value range metadata on an workitemid.* inrinsic call or load.
+ /// Return the maximum workitem ID value in the function, for the given (0, 1,
+ /// 2) dimension.
+ unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
+
+ /// Creates value range metadata on an workitemid.* intrinsic call or load.
bool makeLIDRangeMetadata(Instruction *I) const;
/// \returns Number of bytes of arguments that are passed to a shader or
/// kernel in addition to the explicit ones declared for the function.
- unsigned getImplicitArgNumBytes(const Function &F) const {
- if (isMesaKernel(F))
- return 16;
- return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
- }
+ unsigned getImplicitArgNumBytes(const Function &F) const;
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
/// \returns Corresponsing DWARF register number mapping flavour for the
/// \p WavefrontSize.
- AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const {
- return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32
- : AMDGPUDwarfFlavour::Wave64;
- }
+ AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
virtual ~AMDGPUSubtarget() {}
};
-class GCNSubtarget : public AMDGPUGenSubtargetInfo,
- public AMDGPUSubtarget {
-
- using AMDGPUSubtarget::getMaxWavesPerEU;
-
-public:
- enum TrapHandlerAbi {
- TrapHandlerAbiNone = 0,
- TrapHandlerAbiHsa = 1
- };
-
- enum TrapID {
- TrapIDHardwareReserved = 0,
- TrapIDHSADebugTrap = 1,
- TrapIDLLVMTrap = 2,
- TrapIDLLVMDebugTrap = 3,
- TrapIDDebugBreakpoint = 7,
- TrapIDDebugReserved8 = 8,
- TrapIDDebugReservedFE = 0xfe,
- TrapIDDebugReservedFF = 0xff
- };
-
- enum TrapRegValues {
- LLVMTrapHandlerRegValue = 1
- };
-
-private:
- /// GlobalISel related APIs.
- std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
- std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
-
-protected:
- // Basic subtarget description.
- Triple TargetTriple;
- unsigned Gen;
- InstrItineraryData InstrItins;
- int LDSBankCount;
- unsigned MaxPrivateElementSize;
-
- // Possibly statically set by tablegen, but may want to be overridden.
- bool FastFMAF32;
- bool FastDenormalF32;
- bool HalfRate64Ops;
-
- // Dynamially set bits that enable features.
- bool FlatForGlobal;
- bool AutoWaitcntBeforeBarrier;
- bool CodeObjectV3;
- bool UnalignedScratchAccess;
- bool UnalignedBufferAccess;
- bool HasApertureRegs;
- bool EnableXNACK;
- bool DoesNotSupportXNACK;
- bool EnableCuMode;
- bool TrapHandler;
-
- // Used as options.
- bool EnableLoadStoreOpt;
- bool EnableUnsafeDSOffsetFolding;
- bool EnableSIScheduler;
- bool EnableDS128;
- bool EnablePRTStrictNull;
- bool DumpCode;
-
- // Subtarget statically properties set by tablegen
- bool FP64;
- bool FMA;
- bool MIMG_R128;
- bool IsGCN;
- bool GCN3Encoding;
- bool CIInsts;
- bool GFX8Insts;
- bool GFX9Insts;
- bool GFX10Insts;
- bool GFX10_3Insts;
- bool GFX7GFX8GFX9Insts;
- bool SGPRInitBug;
- bool HasSMemRealTime;
- bool HasIntClamp;
- bool HasFmaMixInsts;
- bool HasMovrel;
- bool HasVGPRIndexMode;
- bool HasScalarStores;
- bool HasScalarAtomics;
- bool HasSDWAOmod;
- bool HasSDWAScalar;
- bool HasSDWASdst;
- bool HasSDWAMac;
- bool HasSDWAOutModsVOPC;
- bool HasDPP;
- bool HasDPP8;
- bool HasR128A16;
- bool HasGFX10A16;
- bool HasG16;
- bool HasNSAEncoding;
- bool GFX10_BEncoding;
- bool HasDLInsts;
- bool HasDot1Insts;
- bool HasDot2Insts;
- bool HasDot3Insts;
- bool HasDot4Insts;
- bool HasDot5Insts;
- bool HasDot6Insts;
- bool HasMAIInsts;
- bool HasPkFmacF16Inst;
- bool HasAtomicFaddInsts;
- bool EnableSRAMECC;
- bool DoesNotSupportSRAMECC;
- bool HasNoSdstCMPX;
- bool HasVscnt;
- bool HasGetWaveIdInst;
- bool HasSMemTimeInst;
- bool HasRegisterBanking;
- bool HasVOP3Literal;
- bool HasNoDataDepHazard;
- bool FlatAddressSpace;
- bool FlatInstOffsets;
- bool FlatGlobalInsts;
- bool FlatScratchInsts;
- bool ScalarFlatScratchInsts;
- bool AddNoCarryInsts;
- bool HasUnpackedD16VMem;
- bool R600ALUInst;
- bool CaymanISA;
- bool CFALUBug;
- bool LDSMisalignedBug;
- bool HasMFMAInlineLiteralBug;
- bool HasVertexCache;
- short TexVTXClauseSize;
- bool ScalarizeGlobal;
-
- bool HasVcmpxPermlaneHazard;
- bool HasVMEMtoScalarWriteHazard;
- bool HasSMEMtoVectorWriteHazard;
- bool HasInstFwdPrefetchBug;
- bool HasVcmpxExecWARHazard;
- bool HasLdsBranchVmemWARHazard;
- bool HasNSAtoVMEMBug;
- bool HasOffset3fBug;
- bool HasFlatSegmentOffsetBug;
-
- // Dummy feature to use for assembler in tablegen.
- bool FeatureDisable;
-
- SelectionDAGTargetInfo TSInfo;
-private:
- SIInstrInfo InstrInfo;
- SITargetLowering TLInfo;
- SIFrameLowering FrameLowering;
-
- // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
- static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
-
-public:
- GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const GCNTargetMachine &TM);
- ~GCNSubtarget() override;
-
- GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
- StringRef GPU, StringRef FS);
-
- const SIInstrInfo *getInstrInfo() const override {
- return &InstrInfo;
- }
-
- const SIFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
-
- const SITargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
-
- const SIRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
- }
-
- const CallLowering *getCallLowering() const override {
- return CallLoweringInfo.get();
- }
-
- const InlineAsmLowering *getInlineAsmLowering() const override {
- return InlineAsmLoweringInfo.get();
- }
-
- InstructionSelector *getInstructionSelector() const override {
- return InstSelector.get();
- }
-
- const LegalizerInfo *getLegalizerInfo() const override {
- return Legalizer.get();
- }
-
- const RegisterBankInfo *getRegBankInfo() const override {
- return RegBankInfo.get();
- }
-
- // Nothing implemented, just prevent crashes on use.
- const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
-
- const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
- }
-
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- Generation getGeneration() const {
- return (Generation)Gen;
- }
-
- /// Return the number of high bits known to be zero fror a frame index.
- unsigned getKnownHighZeroBitsForFrameIndex() const {
- return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
- }
-
- int getLDSBankCount() const {
- return LDSBankCount;
- }
-
- unsigned getMaxPrivateElementSize() const {
- return MaxPrivateElementSize;
- }
-
- unsigned getConstantBusLimit(unsigned Opcode) const;
-
- bool hasIntClamp() const {
- return HasIntClamp;
- }
-
- bool hasFP64() const {
- return FP64;
- }
-
- bool hasMIMG_R128() const {
- return MIMG_R128;
- }
-
- bool hasHWFP64() const {
- return FP64;
- }
-
- bool hasFastFMAF32() const {
- return FastFMAF32;
- }
-
- bool hasHalfRate64Ops() const {
- return HalfRate64Ops;
- }
-
- bool hasAddr64() const {
- return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
- }
-
- // Return true if the target only has the reverse operand versions of VALU
- // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
- bool hasOnlyRevVALUShifts() const {
- return getGeneration() >= VOLCANIC_ISLANDS;
- }
-
- bool hasFractBug() const {
- return getGeneration() == SOUTHERN_ISLANDS;
- }
-
- bool hasBFE() const {
- return true;
- }
-
- bool hasBFI() const {
- return true;
- }
-
- bool hasBFM() const {
- return hasBFE();
- }
-
- bool hasBCNT(unsigned Size) const {
- return true;
- }
-
- bool hasFFBL() const {
- return true;
- }
-
- bool hasFFBH() const {
- return true;
- }
-
- bool hasMed3_16() const {
- return getGeneration() >= AMDGPUSubtarget::GFX9;
- }
-
- bool hasMin3Max3_16() const {
- return getGeneration() >= AMDGPUSubtarget::GFX9;
- }
-
- bool hasFmaMixInsts() const {
- return HasFmaMixInsts;
- }
-
- bool hasCARRY() const {
- return true;
- }
-
- bool hasFMA() const {
- return FMA;
- }
-
- bool hasSwap() const {
- return GFX9Insts;
- }
-
- bool hasScalarPackInsts() const {
- return GFX9Insts;
- }
-
- bool hasScalarMulHiInsts() const {
- return GFX9Insts;
- }
-
- TrapHandlerAbi getTrapHandlerAbi() const {
- return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
- }
-
- /// True if the offset field of DS instructions works as expected. On SI, the
- /// offset uses a 16-bit adder and does not always wrap properly.
- bool hasUsableDSOffset() const {
- return getGeneration() >= SEA_ISLANDS;
- }
-
- bool unsafeDSOffsetFoldingEnabled() const {
- return EnableUnsafeDSOffsetFolding;
- }
-
- /// Condition output from div_scale is usable.
- bool hasUsableDivScaleConditionOutput() const {
- return getGeneration() != SOUTHERN_ISLANDS;
- }
-
- /// Extra wait hazard is needed in some cases before
- /// s_cbranch_vccnz/s_cbranch_vccz.
- bool hasReadVCCZBug() const {
- return getGeneration() <= SEA_ISLANDS;
- }
-
- /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
- bool partialVCCWritesUpdateVCCZ() const {
- return getGeneration() >= GFX10;
- }
-
- /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
- /// was written by a VALU instruction.
- bool hasSMRDReadVALUDefHazard() const {
- return getGeneration() == SOUTHERN_ISLANDS;
- }
-
- /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
- /// SGPR was written by a VALU Instruction.
- bool hasVMEMReadSGPRVALUDefHazard() const {
- return getGeneration() >= VOLCANIC_ISLANDS;
- }
-
- bool hasRFEHazards() const {
- return getGeneration() >= VOLCANIC_ISLANDS;
- }
-
- /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
- unsigned getSetRegWaitStates() const {
- return getGeneration() <= SEA_ISLANDS ? 1 : 2;
- }
-
- bool dumpCode() const {
- return DumpCode;
- }
-
- /// Return the amount of LDS that can be used that will not restrict the
- /// occupancy lower than WaveCount.
- unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
- const Function &) const;
-
- bool supportsMinMaxDenormModes() const {
- return getGeneration() >= AMDGPUSubtarget::GFX9;
- }
-
- /// \returns If target supports S_DENORM_MODE.
- bool hasDenormModeInst() const {
- return getGeneration() >= AMDGPUSubtarget::GFX10;
- }
-
- bool useFlatForGlobal() const {
- return FlatForGlobal;
- }
-
- /// \returns If target supports ds_read/write_b128 and user enables generation
- /// of ds_read/write_b128.
- bool useDS128() const {
- return CIInsts && EnableDS128;
- }
-
- /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
- bool haveRoundOpsF64() const {
- return CIInsts;
- }
-
- /// \returns If MUBUF instructions always perform range checking, even for
- /// buffer resources used for private memory access.
- bool privateMemoryResourceIsRangeChecked() const {
- return getGeneration() < AMDGPUSubtarget::GFX9;
- }
-
- /// \returns If target requires PRT Struct NULL support (zero result registers
- /// for sparse texture support).
- bool usePRTStrictNull() const {
- return EnablePRTStrictNull;
- }
-
- bool hasAutoWaitcntBeforeBarrier() const {
- return AutoWaitcntBeforeBarrier;
- }
-
- bool hasCodeObjectV3() const {
- // FIXME: Need to add code object v3 support for mesa and pal.
- return isAmdHsaOS() ? CodeObjectV3 : false;
- }
-
- bool hasUnalignedBufferAccess() const {
- return UnalignedBufferAccess;
- }
-
- bool hasUnalignedScratchAccess() const {
- return UnalignedScratchAccess;
- }
-
- bool hasApertureRegs() const {
- return HasApertureRegs;
- }
-
- bool isTrapHandlerEnabled() const {
- return TrapHandler;
- }
-
- bool isXNACKEnabled() const {
- return EnableXNACK;
- }
-
- bool isCuModeEnabled() const {
- return EnableCuMode;
- }
-
- bool hasFlatAddressSpace() const {
- return FlatAddressSpace;
- }
-
- bool hasFlatScrRegister() const {
- return hasFlatAddressSpace();
- }
-
- bool hasFlatInstOffsets() const {
- return FlatInstOffsets;
- }
-
- bool hasFlatGlobalInsts() const {
- return FlatGlobalInsts;
- }
-
- bool hasFlatScratchInsts() const {
- return FlatScratchInsts;
- }
-
- bool hasScalarFlatScratchInsts() const {
- return ScalarFlatScratchInsts;
- }
-
- bool hasGlobalAddTidInsts() const {
- return GFX10_BEncoding;
- }
-
- bool hasAtomicCSub() const {
- return GFX10_BEncoding;
- }
-
- bool hasMultiDwordFlatScratchAddressing() const {
- return getGeneration() >= GFX9;
- }
-
- bool hasFlatSegmentOffsetBug() const {
- return HasFlatSegmentOffsetBug;
- }
-
- bool hasFlatLgkmVMemCountInOrder() const {
- return getGeneration() > GFX9;
- }
-
- bool hasD16LoadStore() const {
- return getGeneration() >= GFX9;
- }
-
- bool d16PreservesUnusedBits() const {
- return hasD16LoadStore() && !isSRAMECCEnabled();
- }
-
- bool hasD16Images() const {
- return getGeneration() >= VOLCANIC_ISLANDS;
- }
-
- /// Return if most LDS instructions have an m0 use that require m0 to be
- /// iniitalized.
- bool ldsRequiresM0Init() const {
- return getGeneration() < GFX9;
- }
-
- // True if the hardware rewinds and replays GWS operations if a wave is
- // preempted.
- //
- // If this is false, a GWS operation requires testing if a nack set the
- // MEM_VIOL bit, and repeating if so.
- bool hasGWSAutoReplay() const {
- return getGeneration() >= GFX9;
- }
-
- /// \returns if target has ds_gws_sema_release_all instruction.
- bool hasGWSSemaReleaseAll() const {
- return CIInsts;
- }
-
- bool hasAddNoCarry() const {
- return AddNoCarryInsts;
- }
-
- bool hasUnpackedD16VMem() const {
- return HasUnpackedD16VMem;
- }
-
- // Covers VS/PS/CS graphics shaders
- bool isMesaGfxShader(const Function &F) const {
- return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
- }
-
- bool hasMad64_32() const {
- return getGeneration() >= SEA_ISLANDS;
- }
-
- bool hasSDWAOmod() const {
- return HasSDWAOmod;
- }
-
- bool hasSDWAScalar() const {
- return HasSDWAScalar;
- }
-
- bool hasSDWASdst() const {
- return HasSDWASdst;
- }
-
- bool hasSDWAMac() const {
- return HasSDWAMac;
- }
-
- bool hasSDWAOutModsVOPC() const {
- return HasSDWAOutModsVOPC;
- }
-
- bool hasDLInsts() const {
- return HasDLInsts;
- }
-
- bool hasDot1Insts() const {
- return HasDot1Insts;
- }
-
- bool hasDot2Insts() const {
- return HasDot2Insts;
- }
-
- bool hasDot3Insts() const {
- return HasDot3Insts;
- }
-
- bool hasDot4Insts() const {
- return HasDot4Insts;
- }
-
- bool hasDot5Insts() const {
- return HasDot5Insts;
- }
-
- bool hasDot6Insts() const {
- return HasDot6Insts;
- }
-
- bool hasMAIInsts() const {
- return HasMAIInsts;
- }
-
- bool hasPkFmacF16Inst() const {
- return HasPkFmacF16Inst;
- }
-
- bool hasAtomicFaddInsts() const {
- return HasAtomicFaddInsts;
- }
-
- bool isSRAMECCEnabled() const {
- return EnableSRAMECC;
- }
-
- bool hasNoSdstCMPX() const {
- return HasNoSdstCMPX;
- }
-
- bool hasVscnt() const {
- return HasVscnt;
- }
-
- bool hasGetWaveIdInst() const {
- return HasGetWaveIdInst;
- }
-
- bool hasSMemTimeInst() const {
- return HasSMemTimeInst;
- }
-
- bool hasRegisterBanking() const {
- return HasRegisterBanking;
- }
-
- bool hasVOP3Literal() const {
- return HasVOP3Literal;
- }
-
- bool hasNoDataDepHazard() const {
- return HasNoDataDepHazard;
- }
-
- bool vmemWriteNeedsExpWaitcnt() const {
- return getGeneration() < SEA_ISLANDS;
- }
-
- // Scratch is allocated in 256 dword per wave blocks for the entire
- // wavefront. When viewed from the perspecive of an arbitrary workitem, this
- // is 4-byte aligned.
- //
- // Only 4-byte alignment is really needed to access anything. Transformations
- // on the pointer value itself may rely on the alignment / known low bits of
- // the pointer. Set this to something above the minimum to avoid needing
- // dynamic realignment in common cases.
- Align getStackAlignment() const { return Align(16); }
-
- bool enableMachineScheduler() const override {
- return true;
- }
-
- bool enableSubRegLiveness() const override {
- return true;
- }
-
- void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
- bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
-
- // static wrappers
- static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
-
- // XXX - Why is this here if it isn't in the default pass set?
- bool enableEarlyIfConversion() const override {
- return true;
- }
-
- void overrideSchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const override;
-
- unsigned getMaxNumUserSGPRs() const {
- return 16;
- }
-
- bool hasSMemRealTime() const {
- return HasSMemRealTime;
- }
-
- bool hasMovrel() const {
- return HasMovrel;
- }
-
- bool hasVGPRIndexMode() const {
- return HasVGPRIndexMode;
- }
-
- bool useVGPRIndexMode() const;
-
- bool hasScalarCompareEq64() const {
- return getGeneration() >= VOLCANIC_ISLANDS;
- }
-
- bool hasScalarStores() const {
- return HasScalarStores;
- }
-
- bool hasScalarAtomics() const {
- return HasScalarAtomics;
- }
-
- bool hasLDSFPAtomics() const {
- return GFX8Insts;
- }
-
- bool hasDPP() const {
- return HasDPP;
- }
-
- bool hasDPPBroadcasts() const {
- return HasDPP && getGeneration() < GFX10;
- }
-
- bool hasDPPWavefrontShifts() const {
- return HasDPP && getGeneration() < GFX10;
- }
-
- bool hasDPP8() const {
- return HasDPP8;
- }
-
- bool hasR128A16() const {
- return HasR128A16;
- }
-
- bool hasGFX10A16() const {
- return HasGFX10A16;
- }
-
- bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
-
- bool hasG16() const { return HasG16; }
-
- bool hasOffset3fBug() const {
- return HasOffset3fBug;
- }
-
- bool hasNSAEncoding() const {
- return HasNSAEncoding;
- }
-
- bool hasGFX10_BEncoding() const {
- return GFX10_BEncoding;
- }
-
- bool hasGFX10_3Insts() const {
- return GFX10_3Insts;
- }
-
- bool hasMadF16() const;
-
- bool enableSIScheduler() const {
- return EnableSIScheduler;
- }
-
- bool loadStoreOptEnabled() const {
- return EnableLoadStoreOpt;
- }
-
- bool hasSGPRInitBug() const {
- return SGPRInitBug;
- }
-
- bool hasMFMAInlineLiteralBug() const {
- return HasMFMAInlineLiteralBug;
- }
-
- bool has12DWordStoreHazard() const {
- return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
- }
-
- // \returns true if the subtarget supports DWORDX3 load/store instructions.
- bool hasDwordx3LoadStores() const {
- return CIInsts;
- }
-
- bool hasSMovFedHazard() const {
- return getGeneration() == AMDGPUSubtarget::GFX9;
- }
-
- bool hasReadM0MovRelInterpHazard() const {
- return getGeneration() == AMDGPUSubtarget::GFX9;
- }
-
- bool hasReadM0SendMsgHazard() const {
- return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
- getGeneration() <= AMDGPUSubtarget::GFX9;
- }
-
- bool hasVcmpxPermlaneHazard() const {
- return HasVcmpxPermlaneHazard;
- }
-
- bool hasVMEMtoScalarWriteHazard() const {
- return HasVMEMtoScalarWriteHazard;
- }
-
- bool hasSMEMtoVectorWriteHazard() const {
- return HasSMEMtoVectorWriteHazard;
- }
-
- bool hasLDSMisalignedBug() const {
- return LDSMisalignedBug && !EnableCuMode;
- }
-
- bool hasInstFwdPrefetchBug() const {
- return HasInstFwdPrefetchBug;
- }
-
- bool hasVcmpxExecWARHazard() const {
- return HasVcmpxExecWARHazard;
- }
-
- bool hasLdsBranchVmemWARHazard() const {
- return HasLdsBranchVmemWARHazard;
- }
-
- bool hasNSAtoVMEMBug() const {
- return HasNSAtoVMEMBug;
- }
-
- bool hasHardClauses() const { return getGeneration() >= GFX10; }
-
- /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
- /// SGPRs
- unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
-
- /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
- /// VGPRs
- unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
-
- /// Return occupancy for the given function. Used LDS and a number of
- /// registers if provided.
- /// Note, occupancy can be affected by the scratch allocation as well, but
- /// we do not have enough information to compute it.
- unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
- unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
-
- /// \returns true if the flat_scratch register should be initialized with the
- /// pointer to the wave's scratch memory rather than a size and offset.
- bool flatScratchIsPointer() const {
- return getGeneration() >= AMDGPUSubtarget::GFX9;
- }
-
- /// \returns true if the machine has merged shaders in which s0-s7 are
- /// reserved by the hardware and user SGPRs start at s8
- bool hasMergedShaders() const {
- return getGeneration() >= GFX9;
- }
-
- /// \returns SGPR allocation granularity supported by the subtarget.
- unsigned getSGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
- }
-
- /// \returns SGPR encoding granularity supported by the subtarget.
- unsigned getSGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
- }
-
- /// \returns Total number of SGPRs supported by the subtarget.
- unsigned getTotalNumSGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
- }
-
- /// \returns Addressable number of SGPRs supported by the subtarget.
- unsigned getAddressableNumSGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
- }
-
- /// \returns Minimum number of SGPRs that meets the given number of waves per
- /// execution unit requirement supported by the subtarget.
- unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
- }
-
- /// \returns Maximum number of SGPRs that meets the given number of waves per
- /// execution unit requirement supported by the subtarget.
- unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
- return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
- }
-
- /// \returns Reserved number of SGPRs for given function \p MF.
- unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
-
- /// \returns Maximum number of SGPRs that meets number of waves per execution
- /// unit requirement for function \p MF, or number of SGPRs explicitly
- /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
- ///
- /// \returns Value that meets number of waves per execution unit requirement
- /// if explicitly requested value cannot be converted to integer, violates
- /// subtarget's specifications, or does not meet number of waves per execution
- /// unit requirement.
- unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
-
- /// \returns VGPR allocation granularity supported by the subtarget.
- unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
- }
-
- /// \returns VGPR encoding granularity supported by the subtarget.
- unsigned getVGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
- }
-
- /// \returns Total number of VGPRs supported by the subtarget.
- unsigned getTotalNumVGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
- }
-
- /// \returns Addressable number of VGPRs supported by the subtarget.
- unsigned getAddressableNumVGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
- }
-
- /// \returns Minimum number of VGPRs that meets given number of waves per
- /// execution unit requirement supported by the subtarget.
- unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
- }
-
- /// \returns Maximum number of VGPRs that meets given number of waves per
- /// execution unit requirement supported by the subtarget.
- unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
- }
-
- /// \returns Maximum number of VGPRs that meets number of waves per execution
- /// unit requirement for function \p MF, or number of VGPRs explicitly
- /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
- ///
- /// \returns Value that meets number of waves per execution unit requirement
- /// if explicitly requested value cannot be converted to integer, violates
- /// subtarget's specifications, or does not meet number of waves per execution
- /// unit requirement.
- unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
-
- void getPostRAMutations(
- std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
- const override;
-
- bool isWave32() const {
- return getWavefrontSize() == 32;
- }
-
- const TargetRegisterClass *getBoolRC() const {
- return getRegisterInfo()->getBoolRC();
- }
-
- /// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
- }
-
- /// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const override {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
- }
-
- /// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const override {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
- }
-
- /// \returns Number of waves per execution unit required to support the given
- /// \p FlatWorkGroupSize.
- unsigned
- getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
- }
-
- /// \returns Minimum number of waves per execution unit supported by the
- /// subtarget.
- unsigned getMinWavesPerEU() const override {
- return AMDGPU::IsaInfo::getMinWavesPerEU(this);
- }
-
- void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
- SDep &Dep) const override;
-};
-
-class R600Subtarget final : public R600GenSubtargetInfo,
- public AMDGPUSubtarget {
-private:
- R600InstrInfo InstrInfo;
- R600FrameLowering FrameLowering;
- bool FMA;
- bool CaymanISA;
- bool CFALUBug;
- bool HasVertexCache;
- bool R600ALUInst;
- bool FP64;
- short TexVTXClauseSize;
- Generation Gen;
- R600TargetLowering TLInfo;
- InstrItineraryData InstrItins;
- SelectionDAGTargetInfo TSInfo;
-
-public:
- R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
- const TargetMachine &TM);
-
- const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
-
- const R600FrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
-
- const R600TargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
-
- const R600RegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
- }
-
- const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
- }
-
- // Nothing implemented, just prevent crashes on use.
- const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
-
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- Generation getGeneration() const {
- return Gen;
- }
-
- Align getStackAlignment() const { return Align(4); }
-
- R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
- StringRef GPU, StringRef FS);
-
- bool hasBFE() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasBFI() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasBCNT(unsigned Size) const {
- if (Size == 32)
- return (getGeneration() >= EVERGREEN);
-
- return false;
- }
-
- bool hasBORROW() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasCARRY() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasCaymanISA() const {
- return CaymanISA;
- }
-
- bool hasFFBL() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasFFBH() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasFMA() const { return FMA; }
-
- bool hasCFAluBug() const { return CFALUBug; }
-
- bool hasVertexCache() const { return HasVertexCache; }
-
- short getTexVTXClauseSize() const { return TexVTXClauseSize; }
-
- bool enableMachineScheduler() const override {
- return true;
- }
-
- bool enableSubRegLiveness() const override {
- return true;
- }
-
- /// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
- }
-
- /// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const override {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
- }
-
- /// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const override {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
- }
-
- /// \returns Number of waves per execution unit required to support the given
- /// \p FlatWorkGroupSize.
- unsigned
- getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
- }
-
- /// \returns Minimum number of waves per execution unit supported by the
- /// subtarget.
- unsigned getMinWavesPerEU() const override {
- return AMDGPU::IsaInfo::getMinWavesPerEU(this);
- }
-};
-
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b4b10835837c..ce7c82e2a88a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -15,45 +15,40 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
-#include "AMDGPUCallLowering.h"
#include "AMDGPUExportClustering.h"
-#include "AMDGPUInstructionSelector.h"
-#include "AMDGPULegalizerInfo.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600MachineScheduler.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/GlobalDCE.h"
+#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Vectorize.h"
-#include <memory>
using namespace llvm;
@@ -216,7 +211,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSILowerSGPRSpillsPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
- initializeSIFixupVectorISelPass(*PR);
initializeSIFoldOperandsPass(*PR);
initializeSIPeepholeSDWAPass(*PR);
initializeSIShrinkInstructionsPass(*PR);
@@ -237,6 +231,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUPromoteAllocaToVectorPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
+ initializeAMDGPULateCodeGenPreparePass(*PR);
initializeAMDGPUPropagateAttributesEarlyPass(*PR);
initializeAMDGPUPropagateAttributesLatePass(*PR);
initializeAMDGPURewriteOutArgumentsPass(*PR);
@@ -260,7 +255,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUExternalAAWrapperPass(*PR);
initializeAMDGPUUseNativeCallsPass(*PR);
initializeAMDGPUSimplifyLibCallsPass(*PR);
- initializeAMDGPUInlinerPass(*PR);
initializeAMDGPUPrintfRuntimeBindingPass(*PR);
initializeGCNRegBankReassignPass(*PR);
initializeGCNNSAReassignPass(*PR);
@@ -284,7 +278,6 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
return DAG;
@@ -295,7 +288,6 @@ createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
auto DAG = new GCNIterativeScheduler(C,
GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
@@ -309,7 +301,6 @@ createIterativeILPMachineScheduler(MachineSchedContext *C) {
auto DAG = new GCNIterativeScheduler(C,
GCNIterativeScheduler::SCHEDULE_ILP);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
return DAG;
}
@@ -345,15 +336,15 @@ GCNILPSchedRegistry("gcn-ilp",
static StringRef computeDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::r600) {
// 32-bit pointers.
- return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
- "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
+ return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
}
// 32-bit private, local, and region pointers. 64-bit global, constant and
// flat, non-integral buffer fat pointers.
- return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
+ return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
- "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
"-ni:7";
}
@@ -402,16 +393,14 @@ AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
Attribute GPUAttr = F.getFnAttribute("target-cpu");
- return GPUAttr.hasAttribute(Attribute::None) ?
- getTargetCPU() : GPUAttr.getValueAsString();
+ return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
}
StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
Attribute FSAttr = F.getFnAttribute("target-features");
- return FSAttr.hasAttribute(Attribute::None) ?
- getTargetFeatureString() :
- FSAttr.getValueAsString();
+ return FSAttr.isValid() ? FSAttr.getValueAsString()
+ : getTargetFeatureString();
}
/// Predicate for Internalize pass.
@@ -433,7 +422,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
if (EnableFunctionCalls) {
delete Builder.Inliner;
- Builder.Inliner = createAMDGPUFunctionInliningPass();
+ Builder.Inliner = createFunctionInliningPass();
}
Builder.addExtension(
@@ -487,6 +476,133 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
});
}
+void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
+ AAM.registerFunctionAnalysis<AMDGPUAA>();
+}
+
+void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) {
+ PB.registerPipelineParsingCallback(
+ [this](StringRef PassName, ModulePassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+ if (PassName == "amdgpu-propagate-attributes-late") {
+ PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
+ return true;
+ }
+ if (PassName == "amdgpu-unify-metadata") {
+ PM.addPass(AMDGPUUnifyMetadataPass());
+ return true;
+ }
+ if (PassName == "amdgpu-printf-runtime-binding") {
+ PM.addPass(AMDGPUPrintfRuntimeBindingPass());
+ return true;
+ }
+ if (PassName == "amdgpu-always-inline") {
+ PM.addPass(AMDGPUAlwaysInlinePass());
+ return true;
+ }
+ return false;
+ });
+ PB.registerPipelineParsingCallback(
+ [this](StringRef PassName, FunctionPassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+ if (PassName == "amdgpu-simplifylib") {
+ PM.addPass(AMDGPUSimplifyLibCallsPass(*this));
+ return true;
+ }
+ if (PassName == "amdgpu-usenative") {
+ PM.addPass(AMDGPUUseNativeCallsPass());
+ return true;
+ }
+ if (PassName == "amdgpu-promote-alloca") {
+ PM.addPass(AMDGPUPromoteAllocaPass(*this));
+ return true;
+ }
+ if (PassName == "amdgpu-promote-alloca-to-vector") {
+ PM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
+ return true;
+ }
+ if (PassName == "amdgpu-lower-kernel-attributes") {
+ PM.addPass(AMDGPULowerKernelAttributesPass());
+ return true;
+ }
+ if (PassName == "amdgpu-propagate-attributes-early") {
+ PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
+ return true;
+ }
+
+ return false;
+ });
+
+ PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &FAM) {
+ FAM.registerPass([&] { return AMDGPUAA(); });
+ });
+
+ PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
+ if (AAName == "amdgpu-aa") {
+ AAM.registerFunctionAnalysis<AMDGPUAA>();
+ return true;
+ }
+ return false;
+ });
+
+ PB.registerPipelineStartEPCallback([this, DebugPassManager](
+ ModulePassManager &PM,
+ PassBuilder::OptimizationLevel Level) {
+ FunctionPassManager FPM(DebugPassManager);
+ FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
+ FPM.addPass(AMDGPUUseNativeCallsPass());
+ if (EnableLibCallSimplify && Level != PassBuilder::OptimizationLevel::O0)
+ FPM.addPass(AMDGPUSimplifyLibCallsPass(*this));
+ PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ });
+
+ PB.registerPipelineEarlySimplificationEPCallback(
+ [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) {
+ if (Level == PassBuilder::OptimizationLevel::O0)
+ return;
+
+ PM.addPass(AMDGPUUnifyMetadataPass());
+ PM.addPass(AMDGPUPrintfRuntimeBindingPass());
+
+ if (InternalizeSymbols) {
+ PM.addPass(InternalizePass(mustPreserveGV));
+ }
+ PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
+ if (InternalizeSymbols) {
+ PM.addPass(GlobalDCEPass());
+ }
+ if (EarlyInlineAll && !EnableFunctionCalls)
+ PM.addPass(AMDGPUAlwaysInlinePass());
+ });
+
+ PB.registerCGSCCOptimizerLateEPCallback(
+ [this, DebugPassManager](CGSCCPassManager &PM,
+ PassBuilder::OptimizationLevel Level) {
+ if (Level == PassBuilder::OptimizationLevel::O0)
+ return;
+
+ FunctionPassManager FPM(DebugPassManager);
+
+ // Add infer address spaces pass to the opt pipeline after inlining
+ // but before SROA to increase SROA opportunities.
+ FPM.addPass(InferAddressSpacesPass());
+
+ // This should run after inlining to have any chance of doing
+ // anything, and before other cleanup optimizations.
+ FPM.addPass(AMDGPULowerKernelAttributesPass());
+
+ if (Level != PassBuilder::OptimizationLevel::O0) {
+ // Promote alloca to vector before SROA and loop unroll. If we
+ // manage to eliminate allocas before unroll we may choose to unroll
+ // less.
+ FPM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
+ }
+
+ PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+ });
+}
+
//===----------------------------------------------------------------------===//
// R600 Target Machine (R600 -> Cayman)
//===----------------------------------------------------------------------===//
@@ -526,6 +642,39 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl(
return I.get();
}
+int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
+ return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)
+ ? -1
+ : 0;
+}
+
+bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
+ AMDGPU::isFlatGlobalAddrSpace(DestAS);
+}
+
+unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
+ const auto *LD = dyn_cast<LoadInst>(V);
+ if (!LD)
+ return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+
+ // It must be a generic pointer loaded.
+ assert(V->getType()->isPointerTy() &&
+ V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS);
+
+ const auto *Ptr = LD->getPointerOperand();
+ if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+ return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+ // For a generic pointer loaded from the constant memory, it could be assumed
+ // as a global pointer since the constant memory is only populated on the
+ // host side. As implied by the offload programming model, only global
+ // pointers could be referenced on the host side.
+ return AMDGPUAS::GLOBAL_ADDRESS;
+}
+
TargetTransformInfo
R600TargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(R600TTIImpl(this, F));
@@ -593,7 +742,6 @@ public:
createMachineScheduler(MachineSchedContext *C) const override {
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
@@ -866,6 +1014,7 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
+ addPass(createAMDGPULateCodeGenPreparePass());
if (EnableAtomicOptimizations) {
addPass(createAMDGPUAtomicOptimizerPass());
}
@@ -930,19 +1079,12 @@ bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();
addPass(&SIFixSGPRCopiesID);
addPass(createSILowerI1CopiesPass());
- // TODO: We have to add FinalizeISel
- // to expand V_ADD/SUB_U64_PSEUDO before SIFixupVectorISel
- // that expects V_ADD/SUB -> A_ADDC/SUBB pairs expanded.
- // Will be removed as soon as SIFixupVectorISel is changed
- // to work with V_ADD/SUB_U64_PSEUDO instead.
- addPass(&FinalizeISelID);
- addPass(createSIFixupVectorISelPass());
addPass(createSIAddIMGInitPass());
return false;
}
bool GCNPassConfig::addIRTranslator() {
- addPass(new IRTranslator());
+ addPass(new IRTranslator(getOptLevel()));
return false;
}
@@ -969,6 +1111,10 @@ bool GCNPassConfig::addRegBankSelect() {
bool GCNPassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
+ // TODO: Fix instruction selection to do the right thing for image
+ // instructions with tfe or lwe in the first place, instead of running a
+ // separate pass to fix them up?
+ addPass(createSIAddIMGInitPass());
return false;
}
@@ -976,7 +1122,6 @@ void GCNPassConfig::addPreRegAlloc() {
if (LateCFGStructurize) {
addPass(createAMDGPUMachineCFGStructurizerPass());
}
- addPass(createSIWholeQuadModePass());
}
void GCNPassConfig::addFastRegAlloc() {
@@ -988,13 +1133,18 @@ void GCNPassConfig::addFastRegAlloc() {
// SI_ELSE will introduce a copy of the tied operand source after the else.
insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
- // This must be run just after RegisterCoalescing.
- insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
+ insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
+ insertPass(&TwoAddressInstructionPassID, &SIPreAllocateWWMRegsID);
TargetPassConfig::addFastRegAlloc();
}
void GCNPassConfig::addOptimizedRegAlloc() {
+ // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
+ // instructions that cause scheduling barriers.
+ insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
+ insertPass(&MachineSchedulerID, &SIPreAllocateWWMRegsID);
+
if (OptExecMaskPreRA)
insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
@@ -1004,9 +1154,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
// SI_ELSE will introduce a copy of the tied operand source after the else.
insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
- // This must be run just after RegisterCoalescing.
- insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
-
if (EnableDCEInRA)
insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
@@ -1041,6 +1188,12 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createSIShrinkInstructionsPass());
addPass(createSIModeRegisterPass());
+ if (getOptLevel() > CodeGenOpt::None)
+ addPass(&SIInsertHardClausesID);
+
+ addPass(&SIRemoveShortExecBranchesID);
+ addPass(&SIInsertSkipsPassID);
+ addPass(&SIPreEmitPeepholeID);
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
// are multiple scheduling regions in a basic block, the regions are scheduled
@@ -1049,16 +1202,7 @@ void GCNPassConfig::addPreEmitPass() {
//
// Here we add a stand-alone hazard recognizer pass which can handle all
// cases.
- //
- // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
- // be better for it to emit S_NOP <N> when possible.
addPass(&PostRAHazardRecognizerID);
- if (getOptLevel() > CodeGenOpt::None)
- addPass(&SIInsertHardClausesID);
-
- addPass(&SIRemoveShortExecBranchesID);
- addPass(&SIInsertSkipsPassID);
- addPass(&SIPreEmitPeepholeID);
addPass(&BranchRelaxationPassID);
}
@@ -1087,6 +1231,12 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->initializeBaseYamlFields(YamlMFI);
+ if (MFI->Occupancy == 0) {
+ // Fixup the subtarget dependent default value.
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
+ }
+
auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
Register TempReg;
if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index e223fecc8819..95aefa23c24c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -14,14 +14,9 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
-#include "AMDGPUSubtarget.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Support/CodeGen.h"
+#include "GCNSubtarget.h"
+#include "R600Subtarget.h"
#include "llvm/Target/TargetMachine.h"
-#include <memory>
namespace llvm {
@@ -56,12 +51,16 @@ public:
void adjustPassManager(PassManagerBuilder &) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) override;
+ void registerDefaultAliasAnalyses(AAManager &) override;
+
/// Get the integer value of a null pointer in the given address space.
- static int64_t getNullPointerValue(unsigned AddrSpace) {
- return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
- AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
- AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0;
- }
+ static int64_t getNullPointerValue(unsigned AddrSpace);
+
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+
+ unsigned getAssumedAddrSpace(const Value *V) const override;
};
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index 6569980d2c75..f854c8c16e5a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -7,13 +7,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetObjectFile.h"
-#include "AMDGPU.h"
-#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 542a5f006c0f..7b8a79640bb2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -15,40 +15,12 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetTransformInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/STLExtras.h"
+#include "AMDGPUTargetMachine.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <utility>
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
@@ -82,7 +54,25 @@ static cl::opt<bool> UseLegacyDA(
static cl::opt<unsigned> UnrollMaxBlockToAnalyze(
"amdgpu-unroll-max-block-to-analyze",
cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"),
- cl::init(20), cl::Hidden);
+ cl::init(32), cl::Hidden);
+
+static cl::opt<unsigned> ArgAllocaCost("amdgpu-inline-arg-alloca-cost",
+ cl::Hidden, cl::init(4000),
+ cl::desc("Cost of alloca argument"));
+
+// If the amount of scratch memory to eliminate exceeds our ability to allocate
+// it into registers we gain nothing by aggressively inlining functions for that
+// heuristic.
+static cl::opt<unsigned>
+ ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden,
+ cl::init(256),
+ cl::desc("Maximum alloca size to use for inline cost"));
+
+// Inliner constraint to achieve reasonable compilation time.
+static cl::opt<size_t> InlineMaxBB(
+ "amdgpu-inline-max-bb", cl::Hidden, cl::init(1100),
+ cl::desc("Maximum number of BBs allowed in a function after inlining"
+ " (compile time constraint)"));
static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
unsigned Depth = 0) {
@@ -103,6 +93,12 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
return false;
}
+AMDGPUTTIImpl::AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ TargetTriple(TM->getTargetTriple()),
+ ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
+ TLI(ST->getTargetLowering()) {}
+
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
const Function &F = *L->getHeader()->getParent();
@@ -116,6 +112,26 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
const unsigned MaxAlloca = (256 - 16) * 4;
unsigned ThresholdPrivate = UnrollThresholdPrivate;
unsigned ThresholdLocal = UnrollThresholdLocal;
+
+ // If this loop has the amdgpu.loop.unroll.threshold metadata we will use the
+ // provided threshold value as the default for Threshold
+ if (MDNode *LoopUnrollThreshold =
+ findOptionMDForLoop(L, "amdgpu.loop.unroll.threshold")) {
+ if (LoopUnrollThreshold->getNumOperands() == 2) {
+ ConstantInt *MetaThresholdValue = mdconst::extract_or_null<ConstantInt>(
+ LoopUnrollThreshold->getOperand(1));
+ if (MetaThresholdValue) {
+ // We will also use the supplied value for PartialThreshold for now.
+ // We may introduce additional metadata if it becomes necessary in the
+ // future.
+ UP.Threshold = MetaThresholdValue->getSExtValue();
+ UP.PartialThreshold = UP.Threshold;
+ ThresholdPrivate = std::min(ThresholdPrivate, UP.Threshold);
+ ThresholdLocal = std::min(ThresholdLocal, UP.Threshold);
+ }
+ }
+ }
+
unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
for (const BasicBlock *BB : L->getBlocks()) {
const DataLayout &DL = BB->getModule()->getDataLayout();
@@ -169,7 +185,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
const Value *Ptr = GEP->getPointerOperand();
const AllocaInst *Alloca =
- dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
+ dyn_cast<AllocaInst>(getUnderlyingObject(Ptr));
if (!Alloca || !Alloca->isStaticAlloca())
continue;
Type *Ty = Alloca->getAllocatedType();
@@ -231,7 +247,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// If we got a GEP in a small BB from inner loop then increase max trip
// count to analyze for better estimation cost in unroll
- if (L->empty() && BB->size() < UnrollMaxBlockToAnalyze)
+ if (L->isInnermost() && BB->size() < UnrollMaxBlockToAnalyze)
UP.MaxIterationsCountToAnalyze = 32;
}
}
@@ -240,6 +256,41 @@ void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
+
+const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = {
+ // Codegen control options which don't matter.
+ AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler,
+ AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal,
+ AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess,
+ AMDGPU::FeatureUnalignedAccessMode,
+
+ AMDGPU::FeatureAutoWaitcntBeforeBarrier,
+
+ // Property of the kernel/environment which can't actually differ.
+ AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK,
+ AMDGPU::FeatureTrapHandler,
+
+ // The default assumption needs to be ecc is enabled, but no directly
+ // exposed operations depend on it, so it can be safely inlined.
+ AMDGPU::FeatureSRAMECC,
+
+ // Perf-tuning features
+ AMDGPU::FeatureFastFMAF32, AMDGPU::HalfRate64Ops};
+
+GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
+ TLI(ST->getTargetLowering()), CommonTTI(TM, F),
+ IsGraphics(AMDGPU::isGraphics(F.getCallingConv())),
+ MaxVGPRs(ST->getMaxNumVGPRs(
+ std::max(ST->getWavesPerEU(F).first,
+ ST->getWavesPerEUForWorkGroup(
+ ST->getFlatWorkGroupSizes(F).second)))) {
+ AMDGPU::SIModeRegisterDefaults Mode(F);
+ HasFP32Denormals = Mode.allFP32Denormals();
+ HasFP64FP16Denormals = Mode.allFP64FP16Denormals();
+}
+
unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
// The concept of vector registers doesn't really exist. Some packed vector
// operations operate on the normal 32-bit registers.
@@ -267,6 +318,12 @@ unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
return 32;
}
+unsigned GCNTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
+ if (Opcode == Instruction::Load || Opcode == Instruction::Store)
+ return 32 * 4 / ElemWidth;
+ return (ElemWidth == 16 && ST->has16BitInsts()) ? 2 : 1;
+}
+
unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const {
@@ -451,9 +508,50 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// FIXME: We're having to query the throughput cost so that the basic
// implementation tries to generate legalize and scalarization costs. Maybe
// we could hoist the scalarization code here?
- return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput,
- Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo);
+ if (CostKind != TTI::TCK_CodeSize)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput,
+ Opd1Info, Opd2Info, Opd1PropInfo,
+ Opd2PropInfo, Args, CxtI);
+ // Scalarization
+
+ // Check if any of the operands are vector operands.
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+
+ bool IsFloat = Ty->isFPOrFPVectorTy();
+ // Assume that floating point arithmetic operations cost twice as much as
+ // integer operations.
+ unsigned OpCost = (IsFloat ? 2 : 1);
+
+ if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1.
+ // TODO: Once we have extract/insert subvector cost we need to use them.
+ return LT.first * OpCost;
+ }
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // If the operation is custom lowered, then assume that the code is twice
+ // as expensive.
+ return LT.first * 2 * OpCost;
+ }
+
+ // Else, assume that we need to scalarize this op.
+ // TODO: If one of the types get legalized by splitting, handle this
+ // similarly to what getCastInstrCost() does.
+ if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+ unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
+ unsigned Cost = getArithmeticInstrCost(
+ Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo, Args, CxtI);
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values.
+ return getScalarizationOverhead(VTy, Args) + Num * Cost;
+ }
+
+ // We don't know anything about this scalar instruction.
+ return OpCost;
}
// Legalize the type.
@@ -472,7 +570,7 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
case ISD::SRL:
case ISD::SRA:
if (SLT == MVT::i64)
- return get64BitInstrCost() * LT.first * NElts;
+ return get64BitInstrCost(CostKind) * LT.first * NElts;
if (ST->has16BitInsts() && SLT == MVT::i16)
NElts = (NElts + 1) / 2;
@@ -494,7 +592,7 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return LT.first * NElts * getFullRateInstrCost();
case ISD::MUL: {
- const int QuarterRateCost = getQuarterRateInstrCost();
+ const int QuarterRateCost = getQuarterRateInstrCost(CostKind);
if (SLT == MVT::i64) {
const int FullRateCost = getFullRateInstrCost();
return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
@@ -506,11 +604,32 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// i32
return QuarterRateCost * NElts * LT.first;
}
+ case ISD::FMUL:
+ // Check possible fuse {fadd|fsub}(a,fmul(b,c)) and return zero cost for
+ // fmul(b,c) supposing the fadd|fsub will get estimated cost for the whole
+ // fused operation.
+ if (CxtI && CxtI->hasOneUse())
+ if (const auto *FAdd = dyn_cast<BinaryOperator>(*CxtI->user_begin())) {
+ const int OPC = TLI->InstructionOpcodeToISD(FAdd->getOpcode());
+ if (OPC == ISD::FADD || OPC == ISD::FSUB) {
+ if (ST->hasMadMacF32Insts() && SLT == MVT::f32 && !HasFP32Denormals)
+ return TargetTransformInfo::TCC_Free;
+ if (ST->has16BitInsts() && SLT == MVT::f16 && !HasFP64FP16Denormals)
+ return TargetTransformInfo::TCC_Free;
+
+ // Estimate all types may be fused with contract/unsafe flags
+ const TargetOptions &Options = TLI->getTargetMachine().Options;
+ if (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath ||
+ (FAdd->hasAllowContract() && CxtI->hasAllowContract()))
+ return TargetTransformInfo::TCC_Free;
+ }
+ }
+ LLVM_FALLTHROUGH;
case ISD::FADD:
case ISD::FSUB:
- case ISD::FMUL:
if (SLT == MVT::f64)
- return LT.first * NElts * get64BitInstrCost();
+ return LT.first * NElts * get64BitInstrCost(CostKind);
if (ST->has16BitInsts() && SLT == MVT::f16)
NElts = (NElts + 1) / 2;
@@ -523,7 +642,9 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// FIXME: frem should be handled separately. The fdiv in it is most of it,
// but the current lowering is also not entirely correct.
if (SLT == MVT::f64) {
- int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost();
+ int Cost = 7 * get64BitInstrCost(CostKind) +
+ getQuarterRateInstrCost(CostKind) +
+ 3 * getHalfRateInstrCost(CostKind);
// Add cost of workaround.
if (!ST->hasUsableDivScaleConditionOutput())
Cost += 3 * getFullRateInstrCost();
@@ -535,7 +656,7 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// TODO: This is more complicated, unsafe flags etc.
if ((SLT == MVT::f32 && !HasFP32Denormals) ||
(SLT == MVT::f16 && ST->has16BitInsts())) {
- return LT.first * getQuarterRateInstrCost() * NElts;
+ return LT.first * getQuarterRateInstrCost(CostKind) * NElts;
}
}
@@ -545,12 +666,15 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// f32 fmul
// v_cvt_f16_f32
// f16 div_fixup
- int Cost = 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost();
+ int Cost =
+ 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost(CostKind);
return LT.first * Cost * NElts;
}
if (SLT == MVT::f32 || SLT == MVT::f16) {
- int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
+ // 4 more v_cvt_* insts without f16 insts support
+ int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() +
+ 1 * getQuarterRateInstrCost(CostKind);
if (!HasFP32Denormals) {
// FP mode switches.
@@ -568,18 +692,21 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
break;
}
- return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
- Opd2Info,
- Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
-// Return true if there's a potential benefit from using v2f16 instructions for
-// an intrinsic, even if it requires nontrivial legalization.
+// Return true if there's a potential benefit from using v2f16/v2i16
+// instructions for an intrinsic, even if it requires nontrivial legalization.
static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::fma: // TODO: fmuladd
// There's a small benefit to using vector ops in the legalized code.
case Intrinsic::round:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
return true;
default:
return false;
@@ -597,7 +724,48 @@ int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
Type *RetTy = ICA.getReturnType();
EVT OrigTy = TLI->getValueType(DL, RetTy);
if (!OrigTy.isSimple()) {
- return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+ if (CostKind != TTI::TCK_CodeSize)
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
+ // TODO: Combine these two logic paths.
+ if (ICA.isTypeBasedOnly())
+ return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
+
+ Type *RetTy = ICA.getReturnType();
+ unsigned VF = ICA.getVectorFactor().getFixedValue();
+ unsigned RetVF =
+ (RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getNumElements()
+ : 1);
+ assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
+ const IntrinsicInst *I = ICA.getInst();
+ const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
+ FastMathFlags FMF = ICA.getFlags();
+ // Assume that we need to scalarize this intrinsic.
+ SmallVector<Type *, 4> Types;
+ for (const Value *Op : Args) {
+ Type *OpTy = Op->getType();
+ assert(VF == 1 || !OpTy->isVectorTy());
+ Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF));
+ }
+
+ if (VF > 1 && !RetTy->isVoidTy())
+ RetTy = FixedVectorType::get(RetTy, VF);
+
+ // Compute the scalarization overhead based on Args for a vector
+ // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
+ // CostModel will pass a vector RetTy and VF is 1.
+ unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
+ if (RetVF > 1 || VF > 1) {
+ ScalarizationCost = 0;
+ if (!RetTy->isVoidTy())
+ ScalarizationCost +=
+ getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
+ ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
+ }
+
+ IntrinsicCostAttributes Attrs(ICA.getID(), RetTy, Types, FMF,
+ ScalarizationCost, I);
+ return getIntrinsicInstrCost(Attrs, CostKind);
}
// Legalize the type.
@@ -609,16 +777,16 @@ int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
if (SLT == MVT::f64)
- return LT.first * NElts * get64BitInstrCost();
+ return LT.first * NElts * get64BitInstrCost(CostKind);
if (ST->has16BitInsts() && SLT == MVT::f16)
NElts = (NElts + 1) / 2;
// TODO: Get more refined intrinsic costs?
- unsigned InstRate = getQuarterRateInstrCost();
+ unsigned InstRate = getQuarterRateInstrCost(CostKind);
if (ICA.getID() == Intrinsic::fma) {
- InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost()
- : getQuarterRateInstrCost();
+ InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost(CostKind)
+ : getQuarterRateInstrCost(CostKind);
}
return LT.first * NElts * InstRate;
@@ -669,7 +837,7 @@ int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
- return LT.first * getHalfRateInstrCost();
+ return LT.first * getHalfRateInstrCost(CostKind);
}
int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
@@ -697,32 +865,6 @@ int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
}
}
-static bool isArgPassedInSGPR(const Argument *A) {
- const Function *F = A->getParent();
-
- // Arguments to compute shaders are never a source of divergence.
- CallingConv::ID CC = F->getCallingConv();
- switch (CC) {
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::SPIR_KERNEL:
- return true;
- case CallingConv::AMDGPU_VS:
- case CallingConv::AMDGPU_LS:
- case CallingConv::AMDGPU_HS:
- case CallingConv::AMDGPU_ES:
- case CallingConv::AMDGPU_GS:
- case CallingConv::AMDGPU_PS:
- case CallingConv::AMDGPU_CS:
- // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
- // Everything else is in VGPRs.
- return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
- F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
- default:
- // TODO: Should calls support inreg for SGPR inputs?
- return false;
- }
-}
-
/// Analyze if the results of inline asm are divergent. If \p Indices is empty,
/// this is analyzing the collective result of all output registers. Otherwise,
/// this is only querying a specific result index if this returns multiple
@@ -779,7 +921,7 @@ bool GCNTTIImpl::useGPUDivergenceAnalysis() const {
/// different across workitems in a wavefront.
bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
if (const Argument *A = dyn_cast<Argument>(V))
- return !isArgPassedInSGPR(A);
+ return !AMDGPU::isArgPassedInSGPR(A);
// Loads from the private and flat address spaces are divergent, because
// threads can execute the load instruction with the same inputs and get
@@ -921,7 +1063,10 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Type *MaskTy = MaskOp->getType();
bool DoTruncate = false;
- if (!getTLI()->isNoopAddrSpaceCast(OldAS, NewAS)) {
+
+ const GCNTargetMachine &TM =
+ static_cast<const GCNTargetMachine &>(getTLI()->getTargetMachine());
+ if (!TM.isNoopAddrSpaceCast(OldAS, NewAS)) {
// All valid 64-bit to 32-bit casts work by chopping off the high
// bits. Any masking only clearing the low bits will also apply in the new
// address space.
@@ -993,7 +1138,47 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
// no way to support merge for backend defined attributes.
AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
- return CallerMode.isInlineCompatible(CalleeMode);
+ if (!CallerMode.isInlineCompatible(CalleeMode))
+ return false;
+
+ // Hack to make compile times reasonable.
+ if (InlineMaxBB && !Callee->hasFnAttribute(Attribute::InlineHint)) {
+ // Single BB does not increase total BB amount, thus subtract 1.
+ size_t BBSize = Caller->size() + Callee->size() - 1;
+ return BBSize <= InlineMaxBB;
+ }
+
+ return true;
+}
+
+unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
+ // If we have a pointer to private array passed into a function
+ // it will not be optimized out, leaving scratch usage.
+ // Increase the inline threshold to allow inlining in this case.
+ uint64_t AllocaSize = 0;
+ SmallPtrSet<const AllocaInst *, 8> AIVisited;
+ for (Value *PtrArg : CB->args()) {
+ PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
+ if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
+ Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
+ continue;
+
+ PtrArg = getUnderlyingObject(PtrArg);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
+ if (!AI->isStaticAlloca() || !AIVisited.insert(AI).second)
+ continue;
+ AllocaSize += DL.getTypeAllocSize(AI->getAllocatedType());
+ // If the amount of stack memory is excessive we will not be able
+ // to get rid of the scratch anyway, bail out.
+ if (AllocaSize > ArgAllocaCutoff) {
+ AllocaSize = 0;
+ break;
+ }
+ }
+ }
+ if (AllocaSize)
+ return ArgAllocaCost;
+ return 0;
}
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
@@ -1006,6 +1191,16 @@ void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
CommonTTI.getPeelingPreferences(L, SE, PP);
}
+int GCNTTIImpl::get64BitInstrCost(TTI::TargetCostKind CostKind) const {
+ return ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind)
+ : getQuarterRateInstrCost(CostKind);
+}
+
+R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
+ TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
+
unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 3364a9bcaccb..b29c94180fb8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -19,22 +19,18 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
-#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/IR/Function.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
namespace llvm {
class AMDGPUTargetLowering;
+class GCNSubtarget;
+class InstCombiner;
class Loop;
+class R600Subtarget;
class ScalarEvolution;
+class SITargetLowering;
class Type;
class Value;
@@ -46,18 +42,14 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
Triple TargetTriple;
- const GCNSubtarget *ST;
+ const TargetSubtargetInfo *ST;
const TargetLoweringBase *TLI;
const TargetSubtargetInfo *getST() const { return ST; }
const TargetLoweringBase *getTLI() const { return TLI; }
public:
- explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()),
- TargetTriple(TM->getTargetTriple()),
- ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
- TLI(ST->getTargetLowering()) {}
+ explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
@@ -75,73 +67,41 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
const GCNSubtarget *ST;
const SITargetLowering *TLI;
AMDGPUTTIImpl CommonTTI;
- bool IsGraphicsShader;
+ bool IsGraphics;
bool HasFP32Denormals;
+ bool HasFP64FP16Denormals;
unsigned MaxVGPRs;
- const FeatureBitset InlineFeatureIgnoreList = {
- // Codegen control options which don't matter.
- AMDGPU::FeatureEnableLoadStoreOpt,
- AMDGPU::FeatureEnableSIScheduler,
- AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
- AMDGPU::FeatureFlatForGlobal,
- AMDGPU::FeaturePromoteAlloca,
- AMDGPU::FeatureUnalignedBufferAccess,
- AMDGPU::FeatureUnalignedScratchAccess,
-
- AMDGPU::FeatureAutoWaitcntBeforeBarrier,
-
- // Property of the kernel/environment which can't actually differ.
- AMDGPU::FeatureSGPRInitBug,
- AMDGPU::FeatureXNACK,
- AMDGPU::FeatureTrapHandler,
- AMDGPU::FeatureCodeObjectV3,
-
- // The default assumption needs to be ecc is enabled, but no directly
- // exposed operations depend on it, so it can be safely inlined.
- AMDGPU::FeatureSRAMECC,
-
- // Perf-tuning features
- AMDGPU::FeatureFastFMAF32,
- AMDGPU::HalfRate64Ops
- };
+ static const FeatureBitset InlineFeatureIgnoreList;
const GCNSubtarget *getST() const { return ST; }
- const AMDGPUTargetLowering *getTLI() const { return TLI; }
+ const SITargetLowering *getTLI() const { return TLI; }
static inline int getFullRateInstrCost() {
return TargetTransformInfo::TCC_Basic;
}
- static inline int getHalfRateInstrCost() {
- return 2 * TargetTransformInfo::TCC_Basic;
+ static inline int getHalfRateInstrCost(
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
+ return CostKind == TTI::TCK_CodeSize ? 2
+ : 2 * TargetTransformInfo::TCC_Basic;
}
// TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
// should be 2 or 4.
- static inline int getQuarterRateInstrCost() {
- return 3 * TargetTransformInfo::TCC_Basic;
+ static inline int getQuarterRateInstrCost(
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
+ return CostKind == TTI::TCK_CodeSize ? 2
+ : 4 * TargetTransformInfo::TCC_Basic;
}
- // On some parts, normal fp64 operations are half rate, and others
- // quarter. This also applies to some integer operations.
- inline int get64BitInstrCost() const {
- return ST->hasHalfRate64Ops() ?
- getHalfRateInstrCost() : getQuarterRateInstrCost();
- }
+ // On some parts, normal fp64 operations are half rate, and others
+ // quarter. This also applies to some integer operations.
+ int get64BitInstrCost(
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
public:
- explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()),
- ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
- TLI(ST->getTargetLowering()),
- CommonTTI(TM, F),
- IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
- HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()),
- MaxVGPRs(ST->getMaxNumVGPRs(
- std::max(ST->getWavesPerEU(F).first,
- ST->getWavesPerEUForWorkGroup(
- ST->getFlatWorkGroupSizes(F).second)))) {}
+ explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
bool hasBranchDivergence() { return true; }
bool useGPUDivergenceAnalysis() const;
@@ -162,6 +122,7 @@ public:
unsigned getNumberOfRegisters(unsigned RCID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
+ unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const;
@@ -213,7 +174,7 @@ public:
unsigned getFlatAddressSpace() const {
// Don't bother running InferAddressSpaces pass on graphics shaders which
// don't use flat addressing.
- if (IsGraphicsShader)
+ if (IsGraphics)
return -1;
return AMDGPUAS::FLAT_ADDRESS;
}
@@ -223,6 +184,16 @@ public:
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const;
+ bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
+ InstCombiner &IC) const;
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const;
+
unsigned getVectorSplitCost() { return 0; }
unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
@@ -232,6 +203,7 @@ public:
const Function *Callee) const;
unsigned getInliningThresholdMultiplier() { return 11; }
+ unsigned adjustInliningThreshold(const CallBase *CB) const;
int getInlinerVectorBonusPercent() { return 0; }
@@ -259,11 +231,7 @@ class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
AMDGPUTTIImpl CommonTTI;
public:
- explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()),
- ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
- TLI(ST->getTargetLowering()),
- CommonTTI(TM, F) {}
+ explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
const R600Subtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 418296684d76..84d72e1b579f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -20,21 +20,25 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "SIDefines.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -70,17 +74,25 @@ char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
- // TODO: Preserve dominator tree.
+ if (RequireAndPreserveDomTree)
+ AU.addRequired<DominatorTreeWrapperPass>();
+
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<LegacyDivergenceAnalysis>();
+ if (RequireAndPreserveDomTree) {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ // FIXME: preserve PostDominatorTreeWrapperPass
+ }
+
// No divergent values are changed, only blocks and branch edges.
AU.addPreserved<LegacyDivergenceAnalysis>();
@@ -133,7 +145,7 @@ static void removeDoneExport(Function &F) {
}
}
-static BasicBlock *unifyReturnBlockSet(Function &F,
+static BasicBlock *unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU,
ArrayRef<BasicBlock *> ReturningBlocks,
bool InsertExport,
const TargetTransformInfo &TTI,
@@ -153,7 +165,7 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
Value *Undef = UndefValue::get(B.getFloatTy());
B.CreateIntrinsic(Intrinsic::amdgcn_exp, { B.getFloatTy() },
{
- B.getInt32(9), // target, SQ_EXP_NULL
+ B.getInt32(AMDGPU::Exp::ET_NULL),
B.getInt32(0), // enabled channels
Undef, Undef, Undef, Undef, // values
B.getTrue(), // done
@@ -174,6 +186,8 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
// Loop over all of the blocks, replacing the return instruction with an
// unconditional branch.
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(ReturningBlocks.size());
for (BasicBlock *BB : ReturningBlocks) {
// Add an incoming element to the PHI node for every return instruction that
// is merging into this new block...
@@ -183,17 +197,27 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
// Remove and delete the return inst.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(NewRetBlock, BB);
+ Updates.push_back({DominatorTree::Insert, BB, NewRetBlock});
}
+ if (RequireAndPreserveDomTree)
+ DTU.applyUpdates(Updates);
+ Updates.clear();
+
for (BasicBlock *BB : ReturningBlocks) {
// Cleanup possible branch to unconditional branch to the return.
- simplifyCFG(BB, TTI, {2});
+ simplifyCFG(BB, TTI, RequireAndPreserveDomTree ? &DTU : nullptr,
+ SimplifyCFGOptions().bonusInstThreshold(2));
}
return NewRetBlock;
}
bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
+ DominatorTree *DT = nullptr;
+ if (RequireAndPreserveDomTree)
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
// If there's only one exit, we don't need to do anything, unless this is a
@@ -216,6 +240,8 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
bool InsertExport = false;
bool Changed = false;
+ std::vector<DominatorTree::UpdateType> Updates;
+
for (BasicBlock *BB : PDT.roots()) {
if (isa<ReturnInst>(BB->getTerminator())) {
if (!isUniformlyReached(DA, *BB))
@@ -272,14 +298,28 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
BI->eraseFromParent(); // Delete the unconditional branch.
// Add a new conditional branch with a dummy edge to the return block.
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
+ Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
} else { // Conditional branch.
+ SmallVector<BasicBlock *, 2> Successors(succ_begin(BB), succ_end(BB));
+
// Create a new transition block to hold the conditional branch.
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
+ Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
+
+ // 'Successors' become successors of TransitionBB instead of BB,
+ // and TransitionBB becomes a single successor of BB.
+ Updates.push_back({DominatorTree::Insert, BB, TransitionBB});
+ for (BasicBlock *Successor : Successors) {
+ Updates.push_back({DominatorTree::Insert, TransitionBB, Successor});
+ Updates.push_back({DominatorTree::Delete, BB, Successor});
+ }
+
// Create a branch that will always branch to the transition block and
// references DummyReturnBB.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
+ Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
}
Changed = true;
}
@@ -295,10 +335,12 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
"UnifiedUnreachableBlock", &F);
new UnreachableInst(F.getContext(), UnreachableBlock);
+ Updates.reserve(Updates.size() + UnreachableBlocks.size());
for (BasicBlock *BB : UnreachableBlocks) {
// Remove and delete the unreachable inst.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(UnreachableBlock, BB);
+ Updates.push_back({DominatorTree::Insert, BB, UnreachableBlock});
}
Changed = true;
}
@@ -328,6 +370,12 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
}
}
+ // FIXME: add PDT here once simplifycfg is ready.
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ if (RequireAndPreserveDomTree)
+ DTU.applyUpdates(Updates);
+ Updates.clear();
+
// Now handle return blocks.
if (ReturningBlocks.empty())
return Changed; // No blocks return
@@ -345,11 +393,10 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
// uniformly reached block with the "done" bit cleared.
auto BlocksToUnify = std::move(ReturningBlocks);
if (InsertExport) {
- BlocksToUnify.insert(BlocksToUnify.end(), UniformlyReachedRetBlocks.begin(),
- UniformlyReachedRetBlocks.end());
+ llvm::append_range(BlocksToUnify, UniformlyReachedRetBlocks);
}
- unifyReturnBlockSet(F, BlocksToUnify, InsertExport, TTI,
+ unifyReturnBlockSet(F, DTU, BlocksToUnify, InsertExport, TTI,
"UnifiedReturnBlock");
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
index 281ae6d646e9..240b6c2ff462 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
@@ -12,14 +12,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
-#include <algorithm>
-#include <cassert>
using namespace llvm;
@@ -45,6 +41,7 @@ namespace {
private:
bool runOnModule(Module &M) override;
+ };
/// Unify version metadata.
/// \return true if changes are made.
@@ -96,7 +93,7 @@ namespace {
SmallVector<Metadata *, 4> All;
for (auto MD : NamedMD->operands())
for (const auto &Op : MD->operands())
- if (std::find(All.begin(), All.end(), Op.get()) == All.end())
+ if (!llvm::is_contained(All, Op.get()))
All.push_back(Op.get());
NamedMD->eraseFromParent();
@@ -106,41 +103,42 @@ namespace {
return true;
}
-};
-} // end anonymous namespace
+ bool unifyMetadataImpl(Module &M) {
+ const char *Vers[] = {kOCLMD::SpirVer, kOCLMD::OCLVer};
+ const char *Exts[] = {kOCLMD::UsedExt, kOCLMD::UsedOptCoreFeat,
+ kOCLMD::CompilerOptions, kOCLMD::LLVMIdent};
-char AMDGPUUnifyMetadata::ID = 0;
+ bool Changed = false;
-char &llvm::AMDGPUUnifyMetadataID = AMDGPUUnifyMetadata::ID;
+ for (auto &I : Vers)
+ Changed |= unifyVersionMD(M, I, true);
-INITIALIZE_PASS(AMDGPUUnifyMetadata, "amdgpu-unify-metadata",
- "Unify multiple OpenCL metadata due to linking",
- false, false)
+ for (auto &I : Exts)
+ Changed |= unifyExtensionMD(M, I);
-ModulePass* llvm::createAMDGPUUnifyMetadataPass() {
- return new AMDGPUUnifyMetadata();
-}
+ return Changed;
+ }
-bool AMDGPUUnifyMetadata::runOnModule(Module &M) {
- const char* Vers[] = {
- kOCLMD::SpirVer,
- kOCLMD::OCLVer
- };
- const char* Exts[] = {
- kOCLMD::UsedExt,
- kOCLMD::UsedOptCoreFeat,
- kOCLMD::CompilerOptions,
- kOCLMD::LLVMIdent
- };
+ } // end anonymous namespace
- bool Changed = false;
+ char AMDGPUUnifyMetadata::ID = 0;
- for (auto &I : Vers)
- Changed |= unifyVersionMD(M, I, true);
+ char &llvm::AMDGPUUnifyMetadataID = AMDGPUUnifyMetadata::ID;
- for (auto &I : Exts)
- Changed |= unifyExtensionMD(M, I);
+ INITIALIZE_PASS(AMDGPUUnifyMetadata, "amdgpu-unify-metadata",
+ "Unify multiple OpenCL metadata due to linking", false, false)
- return Changed;
-}
+ ModulePass *llvm::createAMDGPUUnifyMetadataPass() {
+ return new AMDGPUUnifyMetadata();
+ }
+
+ bool AMDGPUUnifyMetadata::runOnModule(Module &M) {
+ return unifyMetadataImpl(M);
+ }
+
+ PreservedAnalyses AMDGPUUnifyMetadataPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ return unifyMetadataImpl(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+ }
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 1f28688a7296..b9a8c6bd005d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -7,42 +7,17 @@
//==-----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "R600Subtarget.h"
#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstddef>
-#include <deque>
-#include <iterator>
-#include <map>
-#include <utility>
-#include <vector>
using namespace llvm;
@@ -467,7 +442,7 @@ MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
const DebugLoc &DL) {
MachineInstr *MI =
MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
- if (MBB->begin() != MBB->end())
+ if (!MBB->empty())
MBB->insert(MBB->begin(), MI);
else
MBB->push_back(MI);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
index 3e658a144c1f..654153ea5151 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -11,12 +11,8 @@
#ifndef AMDKERNELCODET_H
#define AMDKERNELCODET_H
-#include "llvm/MC/SubtargetFeature.h"
-
-#include <cstddef>
#include <cstdint>
-#include "llvm/Support/Debug.h"
//---------------------------------------------------------------------------//
// AMD Kernel Code, and its dependencies //
//---------------------------------------------------------------------------//
@@ -527,7 +523,7 @@ typedef struct hsa_ext_control_directives_s {
/// the kernarg segment is constant for the duration of the kernel execution.
///
-typedef struct amd_kernel_code_s {
+struct amd_kernel_code_t {
uint32_t amd_kernel_code_version_major;
uint32_t amd_kernel_code_version_minor;
uint16_t amd_machine_kind;
@@ -650,6 +646,6 @@ typedef struct amd_kernel_code_s {
uint8_t reserved3[12];
uint64_t runtime_loader_kernel_symbol;
uint64_t control_directives[16];
-} amd_kernel_code_t;
+};
#endif // AMDKERNELCODET_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 013b7a0cf25d..af4a47935e3f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
@@ -17,49 +16,23 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Error.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <iterator>
-#include <map>
-#include <memory>
-#include <string>
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -188,6 +161,12 @@ public:
ImmTyEndpgm,
};
+ enum ImmKindTy {
+ ImmKindTyNone,
+ ImmKindTyLiteral,
+ ImmKindTyConst,
+ };
+
private:
struct TokOp {
const char *Data;
@@ -198,6 +177,7 @@ private:
int64_t Val;
ImmTy Type;
bool IsFPImm;
+ mutable ImmKindTy Kind;
Modifiers Mods;
};
@@ -233,6 +213,29 @@ public:
return Kind == Immediate;
}
+ void setImmKindNone() const {
+ assert(isImm());
+ Imm.Kind = ImmKindTyNone;
+ }
+
+ void setImmKindLiteral() const {
+ assert(isImm());
+ Imm.Kind = ImmKindTyLiteral;
+ }
+
+ void setImmKindConst() const {
+ assert(isImm());
+ Imm.Kind = ImmKindTyConst;
+ }
+
+ bool IsImmKindLiteral() const {
+ return isImm() && Imm.Kind == ImmKindTyLiteral;
+ }
+
+ bool isImmKindConst() const {
+ return isImm() && Imm.Kind == ImmKindTyConst;
+ }
+
bool isInlinableImm(MVT type) const;
bool isLiteralImm(MVT type) const;
@@ -335,11 +338,14 @@ public:
bool isLDS() const { return isImmTy(ImmTyLDS); }
bool isDLC() const { return isImmTy(ImmTyDLC); }
bool isGLC() const { return isImmTy(ImmTyGLC); }
+ // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
+ // value of the GLC operand.
+ bool isGLC_1() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isSWZ() const { return isImmTy(ImmTySWZ); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isD16() const { return isImmTy(ImmTyD16); }
- bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
+ bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
@@ -689,6 +695,11 @@ public:
return Imm.Val;
}
+ void setImm(int64_t Val) {
+ assert(isImm());
+ Imm.Val = Val;
+ }
+
ImmTy getImmTy() const {
assert(isImm());
return Imm.Type;
@@ -903,6 +914,7 @@ public:
auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
Op->Imm.Val = Val;
Op->Imm.IsFPImm = IsFPImm;
+ Op->Imm.Kind = ImmKindTyNone;
Op->Imm.Type = Type;
Op->Imm.Mods = Modifiers();
Op->StartLoc = Loc;
@@ -1065,7 +1077,7 @@ private:
std::string &CollectString);
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
- RegisterKind RegKind, unsigned Reg1);
+ RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
unsigned &RegNum, unsigned &RegWidth,
bool RestoreOnFailure = false);
@@ -1083,7 +1095,8 @@ private:
bool ParseRegRange(unsigned& Num, unsigned& Width);
unsigned getRegularReg(RegisterKind RegKind,
unsigned RegNum,
- unsigned RegWidth);
+ unsigned RegWidth,
+ SMLoc Loc);
bool isRegister();
bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
@@ -1127,7 +1140,7 @@ public:
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
- if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
@@ -1144,7 +1157,7 @@ public:
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
- if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
@@ -1184,10 +1197,16 @@ public:
return AMDGPU::isGFX9(getSTI());
}
+ bool isGFX9Plus() const {
+ return AMDGPU::isGFX9Plus(getSTI());
+ }
+
bool isGFX10() const {
return AMDGPU::isGFX10(getSTI());
}
+ bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
+
bool isGFX10_BEncoding() const {
return AMDGPU::isGFX10_BEncoding(getSTI());
}
@@ -1204,9 +1223,7 @@ public:
return !isVI() && !isGFX9();
}
- bool hasSGPR104_SGPR105() const {
- return isGFX10();
- }
+ bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
bool hasIntClamp() const {
return getFeatureBits()[AMDGPU::FeatureIntClamp];
@@ -1240,6 +1257,7 @@ public:
bool isForcedDPP() const { return ForcedDPP; }
bool isForcedSDWA() const { return ForcedSDWA; }
ArrayRef<unsigned> getMatchedVariants() const;
+ StringRef getMatchedVariantName() const;
std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
@@ -1279,7 +1297,8 @@ public:
parseNamedBit(const char *Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
- StringRef &Value);
+ StringRef &Value,
+ SMLoc &StringLoc);
bool isModifier();
bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
@@ -1295,7 +1314,15 @@ public:
OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
- OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
+ OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
+ OperandMatchResultTy parseUfmt(int64_t &Format);
+ OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
+ OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
+ OperandMatchResultTy parseFORMAT(OperandVector &Operands);
+ OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
+ OperandMatchResultTy parseNumericFormat(int64_t &Format);
+ bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
+ bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
@@ -1308,6 +1335,7 @@ public:
private:
struct OperandInfoTy {
+ SMLoc Loc;
int64_t Id;
bool IsSymbolic = false;
bool IsDefined = false;
@@ -1318,30 +1346,35 @@ private:
bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
bool validateSendMsg(const OperandInfoTy &Msg,
const OperandInfoTy &Op,
- const OperandInfoTy &Stream,
- const SMLoc Loc);
+ const OperandInfoTy &Stream);
- bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
+ bool parseHwregBody(OperandInfoTy &HwReg,
+ OperandInfoTy &Offset,
+ OperandInfoTy &Width);
bool validateHwreg(const OperandInfoTy &HwReg,
- const int64_t Offset,
- const int64_t Width,
- const SMLoc Loc);
+ const OperandInfoTy &Offset,
+ const OperandInfoTy &Width);
- void errorExpTgt();
- OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
+ SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
+ const OperandVector &Operands) const;
+ SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
+ SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
+ SMLoc getLitLoc(const OperandVector &Operands) const;
+ SMLoc getConstLoc(const OperandVector &Operands) const;
+
bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSOPLiteral(const MCInst &Inst) const;
- bool validateConstantBusLimitations(const MCInst &Inst);
- bool validateEarlyClobberLimitations(const MCInst &Inst);
+ bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
+ bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
bool validateIntClampSupported(const MCInst &Inst);
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
- bool validateMovrels(const MCInst &Inst);
+ bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
bool validateMIMGDataSize(const MCInst &Inst);
bool validateMIMGAddrSize(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
@@ -1349,13 +1382,23 @@ private:
bool validateLdsDirect(const MCInst &Inst);
bool validateOpSel(const MCInst &Inst);
bool validateVccOperand(unsigned Reg) const;
- bool validateVOP3Literal(const MCInst &Inst) const;
- bool validateMAIAccWrite(const MCInst &Inst);
+ bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
+ bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
+ bool validateDivScale(const MCInst &Inst);
+ bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
+ const SMLoc &IDLoc);
unsigned getConstantBusLimit(unsigned Opcode) const;
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
+ bool isSupportedMnemo(StringRef Mnemo,
+ const FeatureBitset &FBS);
+ bool isSupportedMnemo(StringRef Mnemo,
+ const FeatureBitset &FBS,
+ ArrayRef<unsigned> Variants);
+ bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
+
bool isId(const StringRef Id) const;
bool isId(const AsmToken &Token, const StringRef Id) const;
bool isToken(const AsmToken::TokenKind Kind) const;
@@ -1364,9 +1407,11 @@ private:
bool trySkipToken(const AsmToken::TokenKind Kind);
bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
+ bool parseId(StringRef &Val, const StringRef ErrMsg = "");
+
void peekTokens(MutableArrayRef<AsmToken> Tokens);
AsmToken::TokenKind getTokenKind() const;
- bool parseExpr(int64_t &Imm);
+ bool parseExpr(int64_t &Imm, StringRef Expected = "");
bool parseExpr(OperandVector &Operands);
StringRef getTokenStr() const;
AsmToken peekToken();
@@ -1385,6 +1430,11 @@ public:
OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
OperandMatchResultTy parseBoolReg(OperandVector &Operands);
+ bool parseSwizzleOperand(int64_t &Op,
+ const unsigned MinVal,
+ const unsigned MaxVal,
+ const StringRef ErrMsg,
+ SMLoc &Loc);
bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
const unsigned MinVal,
const unsigned MaxVal,
@@ -1409,6 +1459,7 @@ public:
AMDGPUOperand::Ptr defaultDLC() const;
AMDGPUOperand::Ptr defaultGLC() const;
+ AMDGPUOperand::Ptr defaultGLC_1() const;
AMDGPUOperand::Ptr defaultSLC() const;
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
@@ -1429,10 +1480,14 @@ public:
void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic = false);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
+ void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
OperandMatchResultTy parseDim(OperandVector &Operands);
OperandMatchResultTy parseDPP8(OperandVector &Operands);
OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
+ bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
+ int64_t parseDPPCtrlSel(StringRef Ctrl);
+ int64_t parseDPPCtrlPerm();
AMDGPUOperand::Ptr defaultRowMask() const;
AMDGPUOperand::Ptr defaultBankMask() const;
AMDGPUOperand::Ptr defaultBoundCtrl() const;
@@ -1673,7 +1728,7 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
if (AsmParser->isVI())
return isVReg32();
- else if (AsmParser->isGFX9() || AsmParser->isGFX10())
+ else if (AsmParser->isGFX9Plus())
return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
else
return false;
@@ -1726,6 +1781,7 @@ void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers
} else {
assert(!isImmTy(ImmTyNone) || !hasModifiers());
Inst.addOperand(MCOperand::createImm(Imm.Val));
+ setImmKindNone();
}
}
@@ -1753,6 +1809,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
+ setImmKindConst();
return;
}
@@ -1766,6 +1823,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
}
Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
+ setImmKindLiteral();
return;
}
@@ -1802,6 +1860,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
Inst.addOperand(MCOperand::createImm(ImmVal));
+ setImmKindLiteral();
return;
}
default:
@@ -1826,10 +1885,12 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
+ setImmKindConst();
return;
}
Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
+ setImmKindLiteral();
return;
case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -1838,10 +1899,12 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
+ setImmKindConst();
return;
}
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
+ setImmKindLiteral();
return;
case AMDGPU::OPERAND_REG_IMM_INT16:
@@ -1854,10 +1917,12 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
+ setImmKindConst();
return;
}
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
+ setImmKindLiteral();
return;
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -1879,6 +1944,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
template <unsigned Bitwidth>
void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
APInt Literal(64, Imm.Val);
+ setImmKindNone();
if (!Imm.IsFPImm) {
// We got int literal token.
@@ -2051,7 +2117,8 @@ OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
}
bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
- RegisterKind RegKind, unsigned Reg1) {
+ RegisterKind RegKind, unsigned Reg1,
+ SMLoc Loc) {
switch (RegKind) {
case IS_SPECIAL:
if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
@@ -2084,12 +2151,14 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
RegWidth = 2;
return true;
}
+ Error(Loc, "register does not fit in the list");
return false;
case IS_VGPR:
case IS_SGPR:
case IS_AGPR:
case IS_TTMP:
if (Reg1 != Reg + RegWidth) {
+ Error(Loc, "registers in a list must have consecutive indices");
return false;
}
RegWidth++;
@@ -2172,7 +2241,8 @@ AMDGPUAsmParser::isRegister()
unsigned
AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
- unsigned RegWidth) {
+ unsigned RegWidth,
+ SMLoc Loc) {
assert(isRegularReg(RegKind));
@@ -2183,18 +2253,24 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
AlignSize = std::min(RegWidth, 4u);
}
- if (RegNum % AlignSize != 0)
+ if (RegNum % AlignSize != 0) {
+ Error(Loc, "invalid register alignment");
return AMDGPU::NoRegister;
+ }
unsigned RegIdx = RegNum / AlignSize;
int RCID = getRegClass(RegKind, RegWidth);
- if (RCID == -1)
+ if (RCID == -1) {
+ Error(Loc, "invalid or unsupported register size");
return AMDGPU::NoRegister;
+ }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
const MCRegisterClass RC = TRI->getRegClass(RCID);
- if (RegIdx >= RC.getNumRegs())
+ if (RegIdx >= RC.getNumRegs()) {
+ Error(Loc, "register index is out of range");
return AMDGPU::NoRegister;
+ }
return RC.getRegister(RegIdx);
}
@@ -2202,24 +2278,40 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
bool
AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
int64_t RegLo, RegHi;
- if (!trySkipToken(AsmToken::LBrac))
+ if (!skipToken(AsmToken::LBrac, "missing register index"))
return false;
+ SMLoc FirstIdxLoc = getLoc();
+ SMLoc SecondIdxLoc;
+
if (!parseExpr(RegLo))
return false;
if (trySkipToken(AsmToken::Colon)) {
+ SecondIdxLoc = getLoc();
if (!parseExpr(RegHi))
return false;
} else {
RegHi = RegLo;
}
- if (!trySkipToken(AsmToken::RBrac))
+ if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
return false;
- if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
+ if (!isUInt<32>(RegLo)) {
+ Error(FirstIdxLoc, "invalid register index");
return false;
+ }
+
+ if (!isUInt<32>(RegHi)) {
+ Error(SecondIdxLoc, "invalid register index");
+ return false;
+ }
+
+ if (RegLo > RegHi) {
+ Error(FirstIdxLoc, "first register index should not exceed second index");
+ return false;
+ }
Num = static_cast<unsigned>(RegLo);
Width = (RegHi - RegLo) + 1;
@@ -2246,10 +2338,14 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
SmallVectorImpl<AsmToken> &Tokens) {
assert(isToken(AsmToken::Identifier));
StringRef RegName = getTokenStr();
+ auto Loc = getLoc();
const RegInfo *RI = getRegularRegInfo(RegName);
- if (!RI)
+ if (!RI) {
+ Error(Loc, "invalid register name");
return AMDGPU::NoRegister;
+ }
+
Tokens.push_back(getToken());
lex(); // skip register name
@@ -2257,8 +2353,10 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
StringRef RegSuffix = RegName.substr(RI->Name.size());
if (!RegSuffix.empty()) {
// Single 32-bit register: vXX.
- if (!getRegNum(RegSuffix, RegNum))
+ if (!getRegNum(RegSuffix, RegNum)) {
+ Error(Loc, "invalid register index");
return AMDGPU::NoRegister;
+ }
RegWidth = 1;
} else {
// Range of registers: v[XX:YY]. ":YY" is optional.
@@ -2266,44 +2364,59 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
return AMDGPU::NoRegister;
}
- return getRegularReg(RegKind, RegNum, RegWidth);
+ return getRegularReg(RegKind, RegNum, RegWidth, Loc);
}
unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
unsigned &RegWidth,
SmallVectorImpl<AsmToken> &Tokens) {
unsigned Reg = AMDGPU::NoRegister;
+ auto ListLoc = getLoc();
- if (!trySkipToken(AsmToken::LBrac))
+ if (!skipToken(AsmToken::LBrac,
+ "expected a register or a list of registers")) {
return AMDGPU::NoRegister;
+ }
// List of consecutive registers, e.g.: [s0,s1,s2,s3]
+ auto Loc = getLoc();
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
return AMDGPU::NoRegister;
- if (RegWidth != 1)
+ if (RegWidth != 1) {
+ Error(Loc, "expected a single 32-bit register");
return AMDGPU::NoRegister;
+ }
for (; trySkipToken(AsmToken::Comma); ) {
RegisterKind NextRegKind;
unsigned NextReg, NextRegNum, NextRegWidth;
+ Loc = getLoc();
- if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
- Tokens))
+ if (!ParseAMDGPURegister(NextRegKind, NextReg,
+ NextRegNum, NextRegWidth,
+ Tokens)) {
return AMDGPU::NoRegister;
- if (NextRegWidth != 1)
+ }
+ if (NextRegWidth != 1) {
+ Error(Loc, "expected a single 32-bit register");
return AMDGPU::NoRegister;
- if (NextRegKind != RegKind)
+ }
+ if (NextRegKind != RegKind) {
+ Error(Loc, "registers in a list must be of the same kind");
return AMDGPU::NoRegister;
- if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
+ }
+ if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
return AMDGPU::NoRegister;
}
- if (!trySkipToken(AsmToken::RBrac))
+ if (!skipToken(AsmToken::RBrac,
+ "expected a comma or a closing square bracket")) {
return AMDGPU::NoRegister;
+ }
if (isRegularReg(RegKind))
- Reg = getRegularReg(RegKind, RegNum, RegWidth);
+ Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
return Reg;
}
@@ -2311,6 +2424,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
unsigned &RegNum, unsigned &RegWidth,
SmallVectorImpl<AsmToken> &Tokens) {
+ auto Loc = getLoc();
Reg = AMDGPU::NoRegister;
if (isToken(AsmToken::Identifier)) {
@@ -2322,12 +2436,26 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
}
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
- return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
+ if (Reg == AMDGPU::NoRegister) {
+ assert(Parser.hasPendingError());
+ return false;
+ }
+
+ if (!subtargetHasRegister(*TRI, Reg)) {
+ if (Reg == AMDGPU::SGPR_NULL) {
+ Error(Loc, "'null' operand is not supported on this GPU");
+ } else {
+ Error(Loc, "register not available on this GPU");
+ }
+ return false;
+ }
+
+ return true;
}
bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
unsigned &RegNum, unsigned &RegWidth,
- bool RestoreOnFailure) {
+ bool RestoreOnFailure /*=false*/) {
Reg = AMDGPU::NoRegister;
SmallVector<AsmToken, 1> Tokens;
@@ -2377,11 +2505,11 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
int64_t OldCount;
if (!Sym->isVariable())
- return !Error(getParser().getTok().getLoc(),
+ return !Error(getLoc(),
".amdgcn.next_free_{v,s}gpr symbols must be variable");
if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
return !Error(
- getParser().getTok().getLoc(),
+ getLoc(),
".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
if (OldCount <= NewMax)
@@ -2392,18 +2520,16 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
std::unique_ptr<AMDGPUOperand>
AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
- const auto &Tok = Parser.getTok();
+ const auto &Tok = getToken();
SMLoc StartLoc = Tok.getLoc();
SMLoc EndLoc = Tok.getEndLoc();
RegisterKind RegKind;
unsigned Reg, RegNum, RegWidth;
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
- //FIXME: improve error messages (bug 41303).
- Error(StartLoc, "not a valid operand.");
return nullptr;
}
- if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (isHsaAbiVersion3(&getSTI())) {
if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
@@ -2466,7 +2592,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
// This syntax is not compatible with syntax of standard
// MC expressions (due to the trailing '|').
SMLoc EndLoc;
- if (getParser().parsePrimaryExpr(Expr, EndLoc))
+ if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
return MatchOperand_ParseFail;
} else {
if (Parser.parseExpression(Expr))
@@ -2761,6 +2887,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_Success;
}
+static ArrayRef<unsigned> getAllVariants() {
+ static const unsigned Variants[] = {
+ AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
+ AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
+ };
+
+ return makeArrayRef(Variants);
+}
+
// What asm variants we should check
ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
if (getForcedEncodingSize() == 32) {
@@ -2784,12 +2919,23 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
return makeArrayRef(Variants);
}
- static const unsigned Variants[] = {
- AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
- AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
- };
+ return getAllVariants();
+}
- return makeArrayRef(Variants);
+StringRef AMDGPUAsmParser::getMatchedVariantName() const {
+ if (getForcedEncodingSize() == 32)
+ return "e32";
+
+ if (isForcedVOP3())
+ return "e64";
+
+ if (isForcedSDWA())
+ return "sdwa";
+
+ if (isForcedDPP())
+ return "dpp";
+
+ return "";
}
unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
@@ -2858,20 +3004,20 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
}
unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
- if (!isGFX10())
+ if (!isGFX10Plus())
return 1;
switch (Opcode) {
// 64-bit shift instructions can use only one scalar value input
- case AMDGPU::V_LSHLREV_B64:
+ case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
- case AMDGPU::V_LSHL_B64:
- case AMDGPU::V_LSHRREV_B64:
+ case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
- case AMDGPU::V_LSHR_B64:
- case AMDGPU::V_ASHRREV_I64:
+ case AMDGPU::V_ASHRREV_I64_e64:
case AMDGPU::V_ASHRREV_I64_gfx10:
- case AMDGPU::V_ASHR_I64:
+ case AMDGPU::V_LSHL_B64_e64:
+ case AMDGPU::V_LSHR_B64_e64:
+ case AMDGPU::V_ASHR_I64_e64:
return 1;
default:
return 2;
@@ -2885,15 +3031,19 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
} else if (MO.isReg()) {
auto Reg = MO.getReg();
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
- return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
+ auto PReg = mc2PseudoReg(Reg);
+ return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
} else {
return true;
}
}
-bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
+bool
+AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
+ const OperandVector &Operands) {
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
+ unsigned LastSGPR = AMDGPU::NoRegister;
unsigned ConstantBusUseCount = 0;
unsigned NumLiterals = 0;
unsigned LiteralSize;
@@ -2927,15 +3077,15 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
const MCOperand &MO = Inst.getOperand(OpIdx);
if (usesConstantBus(Inst, OpIdx)) {
if (MO.isReg()) {
- const unsigned Reg = mc2PseudoReg(MO.getReg());
+ LastSGPR = mc2PseudoReg(MO.getReg());
// Pairs of registers with a partial intersections like these
// s0, s[0:1]
// flat_scratch_lo, flat_scratch
// flat_scratch_lo, flat_scratch_hi
// are theoretically valid but they are disabled anyway.
// Note that this code mimics SIInstrInfo::verifyInstruction
- if (!SGPRsUsed.count(Reg)) {
- SGPRsUsed.insert(Reg);
+ if (!SGPRsUsed.count(LastSGPR)) {
+ SGPRsUsed.insert(LastSGPR);
++ConstantBusUseCount;
}
} else { // Expression or a literal
@@ -2967,10 +3117,19 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
}
ConstantBusUseCount += NumLiterals;
- return ConstantBusUseCount <= getConstantBusLimit(Opcode);
+ if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
+ return true;
+
+ SMLoc LitLoc = getLitLoc(Operands);
+ SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
+ SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
+ Error(Loc, "invalid operand (violates constant bus restrictions)");
+ return false;
}
-bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
+bool
+AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
+ const OperandVector &Operands) {
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
@@ -2999,6 +3158,8 @@ bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
if (Src.isReg()) {
const unsigned SrcReg = mc2PseudoReg(Src.getReg());
if (isRegIntersect(DstReg, SrcReg, TRI)) {
+ Error(getRegLoc(SrcReg, Operands),
+ "destination must be different than all sources");
return false;
}
}
@@ -3034,8 +3195,9 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
assert(VDataIdx != -1);
- assert(DMaskIdx != -1);
- assert(TFEIdx != -1);
+
+ if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
+ return true;
unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
@@ -3058,10 +3220,11 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
+ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
return true;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
+
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
@@ -3070,9 +3233,11 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
assert(VAddr0Idx != -1);
assert(SrsrcIdx != -1);
- assert(DimIdx != -1);
assert(SrsrcIdx > VAddr0Idx);
+ if (DimIdx == -1)
+ return true; // intersect_ray
+
unsigned Dim = Inst.getOperand(DimIdx).getImm();
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
@@ -3148,7 +3313,8 @@ static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
// movrels* opcodes should only allow VGPRS as src0.
// This is specified in .td description for vop1/vop3,
// but sdwa is handled differently. See isSDWAOperand.
-bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
+bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
+ const OperandVector &Operands) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
@@ -3159,16 +3325,24 @@ bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
assert(Src0Idx != -1);
+ SMLoc ErrLoc;
const MCOperand &Src0 = Inst.getOperand(Src0Idx);
- if (!Src0.isReg())
- return false;
+ if (Src0.isReg()) {
+ auto Reg = mc2PseudoReg(Src0.getReg());
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ if (!isSGPR(Reg, TRI))
+ return true;
+ ErrLoc = getRegLoc(Reg, Operands);
+ } else {
+ ErrLoc = getConstLoc(Operands);
+ }
- auto Reg = Src0.getReg();
- const MCRegisterInfo *TRI = getContext().getRegisterInfo();
- return !isSGPR(mc2PseudoReg(Reg), TRI);
+ Error(ErrLoc, "source operand must be a VGPR");
+ return false;
}
-bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
+bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
+ const OperandVector &Operands) {
const unsigned Opc = Inst.getOpcode();
@@ -3182,16 +3356,45 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
if (!Src0.isReg())
return true;
- auto Reg = Src0.getReg();
+ auto Reg = mc2PseudoReg(Src0.getReg());
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
- if (isSGPR(mc2PseudoReg(Reg), TRI)) {
- Error(getLoc(), "source operand must be either a VGPR or an inline constant");
+ if (isSGPR(Reg, TRI)) {
+ Error(getRegLoc(Reg, Operands),
+ "source operand must be either a VGPR or an inline constant");
return false;
}
return true;
}
+bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
+ switch (Inst.getOpcode()) {
+ default:
+ return true;
+ case V_DIV_SCALE_F32_gfx6_gfx7:
+ case V_DIV_SCALE_F32_vi:
+ case V_DIV_SCALE_F32_gfx10:
+ case V_DIV_SCALE_F64_gfx6_gfx7:
+ case V_DIV_SCALE_F64_vi:
+ case V_DIV_SCALE_F64_gfx10:
+ break;
+ }
+
+ // TODO: Check that src0 = src1 or src2.
+
+ for (auto Name : {AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src2_modifiers,
+ AMDGPU::OpName::src2_modifiers}) {
+ if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
+ .getImm() &
+ SISrcMods::ABS) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -3239,8 +3442,8 @@ static bool IsRevOpcode(const unsigned Opcode)
case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
case AMDGPU::V_SUBREV_F32_e64_vi:
- case AMDGPU::V_SUBREV_I32_e32:
- case AMDGPU::V_SUBREV_I32_e64:
+ case AMDGPU::V_SUBREV_CO_U32_e32:
+ case AMDGPU::V_SUBREV_CO_U32_e64:
case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
@@ -3328,15 +3531,15 @@ static bool IsRevOpcode(const unsigned Opcode)
case AMDGPU::V_ASHRREV_I16_e64_vi:
case AMDGPU::V_ASHRREV_I16_gfx10:
- case AMDGPU::V_LSHLREV_B64:
+ case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
case AMDGPU::V_LSHLREV_B64_vi:
- case AMDGPU::V_LSHRREV_B64:
+ case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
case AMDGPU::V_LSHRREV_B64_vi:
- case AMDGPU::V_ASHRREV_I64:
+ case AMDGPU::V_ASHRREV_I64_e64:
case AMDGPU::V_ASHRREV_I64_gfx10:
case AMDGPU::V_ASHRREV_I64_vi:
@@ -3419,22 +3622,20 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
return false;
}
- // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
// For FLAT segment the offset must be positive;
// MSB is ignored and forced to zero.
- unsigned OffsetSize = isGFX9() ? 13 : 12;
- if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
+ if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
+ unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
if (!isIntN(OffsetSize, Op.getImm())) {
Error(getFlatOffsetLoc(Operands),
- isGFX9() ? "expected a 13-bit signed offset" :
- "expected a 12-bit signed offset");
+ Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
return false;
}
} else {
- if (!isUIntN(OffsetSize - 1, Op.getImm())) {
+ unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
+ if (!isUIntN(OffsetSize, Op.getImm())) {
Error(getFlatOffsetLoc(Operands),
- isGFX9() ? "expected a 12-bit unsigned offset" :
- "expected an 11-bit unsigned offset");
+ Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
return false;
}
}
@@ -3443,7 +3644,8 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
}
SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ // Start with second operand because SMEM Offset cannot be dst or src0.
+ for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
if (Op.isSMEMOffset())
return Op.getStartLoc();
@@ -3539,7 +3741,8 @@ bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
}
// VOP3 literal is only allowed in GFX10+ and only one can be used
-bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
+bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
+ const OperandVector &Operands) {
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
@@ -3565,8 +3768,11 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
continue;
if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
- getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
+ getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
+ Error(getConstLoc(Operands),
+ "inline constants are not allowed for this operand");
return false;
+ }
if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
uint32_t Value = static_cast<uint32_t>(MO.getImm());
@@ -3580,51 +3786,74 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
}
NumLiterals += NumExprs;
- return !NumLiterals ||
- (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
+ if (!NumLiterals)
+ return true;
+
+ if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+ Error(getLitLoc(Operands), "literal operands are not supported");
+ return false;
+ }
+
+ if (NumLiterals > 1) {
+ Error(getLitLoc(Operands), "only one literal operand is allowed");
+ return false;
+ }
+
+ return true;
+}
+
+bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
+ const OperandVector &Operands,
+ const SMLoc &IDLoc) {
+ int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::glc1);
+ if (GLCPos != -1) {
+ // -1 is set by GLC_1 default operand. In all cases "glc" must be present
+ // in the asm string, and the default value means it is not present.
+ if (Inst.getOperand(GLCPos).getImm() == -1) {
+ Error(IDLoc, "instruction must use glc");
+ return false;
+ }
+ }
+
+ return true;
}
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
const SMLoc &IDLoc,
const OperandVector &Operands) {
if (!validateLdsDirect(Inst)) {
- Error(IDLoc,
+ Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
"invalid use of lds_direct");
return false;
}
if (!validateSOPLiteral(Inst)) {
- Error(IDLoc,
+ Error(getLitLoc(Operands),
"only one literal operand is allowed");
return false;
}
- if (!validateVOP3Literal(Inst)) {
- Error(IDLoc,
- "invalid literal operand");
+ if (!validateVOP3Literal(Inst, Operands)) {
return false;
}
- if (!validateConstantBusLimitations(Inst)) {
- Error(IDLoc,
- "invalid operand (violates constant bus restrictions)");
+ if (!validateConstantBusLimitations(Inst, Operands)) {
return false;
}
- if (!validateEarlyClobberLimitations(Inst)) {
- Error(IDLoc,
- "destination must be different than all sources");
+ if (!validateEarlyClobberLimitations(Inst, Operands)) {
return false;
}
if (!validateIntClampSupported(Inst)) {
- Error(IDLoc,
+ Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
"integer clamping is not supported on this GPU");
return false;
}
if (!validateOpSel(Inst)) {
- Error(IDLoc,
+ Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
"invalid op_sel operand");
return false;
}
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
if (!validateMIMGD16(Inst)) {
- Error(IDLoc,
+ Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
"d16 modifier is not supported on this GPU");
return false;
}
@@ -3643,17 +3872,16 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
return false;
}
if (!validateMIMGAtomicDMask(Inst)) {
- Error(IDLoc,
+ Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
"invalid atomic image dmask");
return false;
}
if (!validateMIMGGatherDMask(Inst)) {
- Error(IDLoc,
+ Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
"invalid image_gather dmask: only one bit must be set");
return false;
}
- if (!validateMovrels(Inst)) {
- Error(IDLoc, "source operand must be a VGPR");
+ if (!validateMovrels(Inst, Operands)) {
return false;
}
if (!validateFlatOffset(Inst, Operands)) {
@@ -3662,7 +3890,14 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateSMEMOffset(Inst, Operands)) {
return false;
}
- if (!validateMAIAccWrite(Inst)) {
+ if (!validateMAIAccWrite(Inst, Operands)) {
+ return false;
+ }
+ if (!validateDivScale(Inst)) {
+ Error(IDLoc, "ABS not allowed in VOP3B instructions");
+ return false;
+ }
+ if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
return false;
}
@@ -3673,6 +3908,57 @@ static std::string AMDGPUMnemonicSpellCheck(StringRef S,
const FeatureBitset &FBS,
unsigned VariantID = 0);
+static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
+ const FeatureBitset &AvailableFeatures,
+ unsigned VariantID);
+
+bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
+ const FeatureBitset &FBS) {
+ return isSupportedMnemo(Mnemo, FBS, getAllVariants());
+}
+
+bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
+ const FeatureBitset &FBS,
+ ArrayRef<unsigned> Variants) {
+ for (auto Variant : Variants) {
+ if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
+ return true;
+ }
+
+ return false;
+}
+
+bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
+ const SMLoc &IDLoc) {
+ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+
+ // Check if requested instruction variant is supported.
+ if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
+ return false;
+
+ // This instruction is not supported.
+ // Clear any other pending errors because they are no longer relevant.
+ getParser().clearPendingErrors();
+
+ // Requested instruction variant is not supported.
+ // Check if any other variants are supported.
+ StringRef VariantName = getMatchedVariantName();
+ if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
+ return Error(IDLoc,
+ Twine(VariantName,
+ " variant of this instruction is not supported"));
+ }
+
+ // Finally check if this instruction is supported on any other GPU.
+ if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
+ return Error(IDLoc, "instruction not supported on this GPU");
+ }
+
+ // Instruction not supported on any GPU. Probably a typo.
+ std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
+ return Error(IDLoc, "invalid instruction" + Suggestion);
+}
+
bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -3702,27 +3988,28 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
break;
}
- switch (Result) {
- default: break;
- case Match_Success:
+ if (Result == Match_Success) {
if (!validateInstruction(Inst, IDLoc, Operands)) {
return true;
}
Inst.setLoc(IDLoc);
Out.emitInstruction(Inst, getSTI());
return false;
+ }
- case Match_MissingFeature:
- return Error(IDLoc, "instruction not supported on this GPU");
-
- case Match_MnemonicFail: {
- FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
- std::string Suggestion = AMDGPUMnemonicSpellCheck(
- ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
- return Error(IDLoc, "invalid instruction" + Suggestion,
- ((AMDGPUOperand &)*Operands[0]).getLocRange());
+ StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
+ if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
+ return true;
}
+ switch (Result) {
+ default: break;
+ case Match_MissingFeature:
+ // It has been verified that the specified instruction
+ // mnemonic is valid. A match was found but it requires
+ // features which are not supported on this GPU.
+ return Error(IDLoc, "operands are not valid for this GPU or mode");
+
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0ULL) {
@@ -3739,13 +4026,15 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_PreferE32:
return Error(IDLoc, "internal error: instruction without _e64 suffix "
"should be encoded as e32");
+ case Match_MnemonicFail:
+ llvm_unreachable("Invalid instructions should have been handled already");
}
llvm_unreachable("Implement any new match types added!");
}
bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
int64_t Tmp = -1;
- if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
+ if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
return true;
}
if (getParser().parseAbsoluteExpression(Tmp)) {
@@ -3760,9 +4049,8 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
if (ParseAsAbsoluteExpression(Major))
return TokError("invalid major version");
- if (getLexer().isNot(AsmToken::Comma))
+ if (!trySkipToken(AsmToken::Comma))
return TokError("minor version number required, comma expected");
- Lex();
if (ParseAsAbsoluteExpression(Minor))
return TokError("invalid minor version");
@@ -3776,25 +4064,24 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
std::string Target;
- SMLoc TargetStart = getTok().getLoc();
+ SMLoc TargetStart = getLoc();
if (getParser().parseEscapedString(Target))
return true;
- SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
+ SMRange TargetRange = SMRange(TargetStart, getLoc());
std::string ExpectedTarget;
raw_string_ostream ExpectedTargetOS(ExpectedTarget);
IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
if (Target != ExpectedTargetOS.str())
- return getParser().Error(TargetRange.Start, "target must match options",
- TargetRange);
+ return Error(TargetRange.Start, "target must match options", TargetRange);
getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
return false;
}
bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
- return getParser().Error(Range.Start, "value out of range", Range);
+ return Error(Range.Start, "value out of range", Range);
}
bool AMDGPUAsmParser::calculateGPRBlocks(
@@ -3865,15 +4152,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
Optional<bool> EnableWavefrontSize32;
while (true) {
- while (getLexer().is(AsmToken::EndOfStatement))
- Lex();
-
- if (getLexer().isNot(AsmToken::Identifier))
- return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
+ while (trySkipToken(AsmToken::EndOfStatement));
- StringRef ID = getTok().getIdentifier();
+ StringRef ID;
SMRange IDRange = getTok().getLocRange();
- Lex();
+ if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
+ return true;
if (ID == ".end_amdhsa_kernel")
break;
@@ -3882,11 +4166,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError(".amdhsa_ directives cannot be repeated");
Seen.insert(ID);
- SMLoc ValStart = getTok().getLoc();
+ SMLoc ValStart = getLoc();
int64_t IVal;
if (getParser().parseAbsoluteExpression(IVal))
return true;
- SMLoc ValEnd = getTok().getLoc();
+ SMLoc ValEnd = getLoc();
SMRange ValRange = SMRange(ValStart, ValEnd);
if (IVal < 0)
@@ -3951,8 +4235,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
UserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
if (IVersion.Major < 10)
- return getParser().Error(IDRange.Start, "directive requires gfx10+",
- IDRange);
+ return Error(IDRange.Start, "directive requires gfx10+", IDRange);
EnableWavefrontSize32 = Val;
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
@@ -3960,7 +4243,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
@@ -3994,15 +4277,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
ReserveVCC = Val;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
if (IVersion.Major < 7)
- return getParser().Error(IDRange.Start, "directive requires gfx7+",
- IDRange);
+ return Error(IDRange.Start, "directive requires gfx7+", IDRange);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveFlatScr = Val;
} else if (ID == ".amdhsa_reserve_xnack_mask") {
if (IVersion.Major < 8)
- return getParser().Error(IDRange.Start, "directive requires gfx8+",
- IDRange);
+ return Error(IDRange.Start, "directive requires gfx8+", IDRange);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveXNACK = Val;
@@ -4027,26 +4308,22 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
Val, ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
- return getParser().Error(IDRange.Start, "directive requires gfx9+",
- IDRange);
+ return Error(IDRange.Start, "directive requires gfx9+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
ValRange);
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
- return getParser().Error(IDRange.Start, "directive requires gfx10+",
- IDRange);
+ return Error(IDRange.Start, "directive requires gfx10+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
- return getParser().Error(IDRange.Start, "directive requires gfx10+",
- IDRange);
+ return Error(IDRange.Start, "directive requires gfx10+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
- return getParser().Error(IDRange.Start, "directive requires gfx10+",
- IDRange);
+ return Error(IDRange.Start, "directive requires gfx10+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
@@ -4080,8 +4357,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
Val, ValRange);
} else {
- return getParser().Error(IDRange.Start,
- "unknown .amdhsa_kernel directive", IDRange);
+ return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
}
#undef PARSE_BITS_ENTRY
@@ -4145,7 +4421,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
- if (getLexer().is(AsmToken::EndOfStatement)) {
+ if (isToken(AsmToken::EndOfStatement)) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
ISA.Stepping,
@@ -4156,32 +4432,23 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
if (ParseDirectiveMajorMinor(Major, Minor))
return true;
- if (getLexer().isNot(AsmToken::Comma))
+ if (!trySkipToken(AsmToken::Comma))
return TokError("stepping version number required, comma expected");
- Lex();
if (ParseAsAbsoluteExpression(Stepping))
return TokError("invalid stepping version");
- if (getLexer().isNot(AsmToken::Comma))
+ if (!trySkipToken(AsmToken::Comma))
return TokError("vendor name required, comma expected");
- Lex();
-
- if (getLexer().isNot(AsmToken::String))
- return TokError("invalid vendor name");
- VendorName = getLexer().getTok().getStringContents();
- Lex();
+ if (!parseString(VendorName, "invalid vendor name"))
+ return true;
- if (getLexer().isNot(AsmToken::Comma))
+ if (!trySkipToken(AsmToken::Comma))
return TokError("arch name required, comma expected");
- Lex();
- if (getLexer().isNot(AsmToken::String))
- return TokError("invalid arch name");
-
- ArchName = getLexer().getTok().getStringContents();
- Lex();
+ if (!parseString(ArchName, "invalid arch name"))
+ return true;
getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
VendorName, ArchName);
@@ -4206,7 +4473,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
if (ID == "enable_wavefront_size32") {
if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
- if (!isGFX10())
+ if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
@@ -4218,7 +4485,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
if (ID == "wavefront_size") {
if (Header.wavefront_size == 5) {
- if (!isGFX10())
+ if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
return TokError("wavefront_size=5 requires +WavefrontSize32");
@@ -4229,17 +4496,20 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}
if (ID == "enable_wgp_mode") {
- if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
+ if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
+ !isGFX10Plus())
return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
}
if (ID == "enable_mem_ordered") {
- if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
+ if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
+ !isGFX10Plus())
return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
}
if (ID == "enable_fwd_progress") {
- if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
+ if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
+ !isGFX10Plus())
return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
}
@@ -4253,14 +4523,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
// will set the current token to EndOfStatement.
- while(getLexer().is(AsmToken::EndOfStatement))
- Lex();
+ while(trySkipToken(AsmToken::EndOfStatement));
- if (getLexer().isNot(AsmToken::Identifier))
- return TokError("expected value identifier or .end_amd_kernel_code_t");
-
- StringRef ID = getLexer().getTok().getIdentifier();
- Lex();
+ StringRef ID;
+ if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
+ return true;
if (ID == ".end_amd_kernel_code_t")
break;
@@ -4275,34 +4542,32 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
}
bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
- if (getLexer().isNot(AsmToken::Identifier))
- return TokError("expected symbol name");
-
- StringRef KernelName = Parser.getTok().getString();
+ StringRef KernelName;
+ if (!parseId(KernelName, "expected symbol name"))
+ return true;
getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
ELF::STT_AMDGPU_HSA_KERNEL);
- Lex();
- if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
- KernelScope.initialize(getContext());
+
+ KernelScope.initialize(getContext());
return false;
}
bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
- return Error(getParser().getTok().getLoc(),
+ return Error(getLoc(),
".amd_amdgpu_isa directive is not available on non-amdgcn "
"architectures");
}
- auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
+ auto ISAVersionStringFromASM = getToken().getStringContents();
std::string ISAVersionStringFromSTI;
raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
- return Error(getParser().getTok().getLoc(),
+ return Error(getLoc(),
".amd_amdgpu_isa directive does not match triple and/or mcpu "
"arguments specified through the command line");
}
@@ -4317,14 +4582,14 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
const char *AssemblerDirectiveBegin;
const char *AssemblerDirectiveEnd;
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
- AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
+ isHsaAbiVersion3(&getSTI())
? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd)
: std::make_tuple(HSAMD::AssemblerDirectiveBegin,
HSAMD::AssemblerDirectiveEnd);
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
- return Error(getParser().getTok().getLoc(),
+ return Error(getLoc(),
(Twine(AssemblerDirectiveBegin) + Twine(" directive is "
"not available on non-amdhsa OSes")).str());
}
@@ -4334,12 +4599,12 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
HSAMetadataString))
return true;
- if (IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (isHsaAbiVersion3(&getSTI())) {
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
- return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ return Error(getLoc(), "invalid HSA metadata");
} else {
if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
- return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ return Error(getLoc(), "invalid HSA metadata");
}
return false;
@@ -4356,19 +4621,15 @@ bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
getLexer().setSkipSpace(false);
bool FoundEnd = false;
- while (!getLexer().is(AsmToken::Eof)) {
- while (getLexer().is(AsmToken::Space)) {
- CollectStream << getLexer().getTok().getString();
+ while (!isToken(AsmToken::Eof)) {
+ while (isToken(AsmToken::Space)) {
+ CollectStream << getTokenStr();
Lex();
}
- if (getLexer().is(AsmToken::Identifier)) {
- StringRef ID = getLexer().getTok().getIdentifier();
- if (ID == AssemblerDirectiveEnd) {
- Lex();
- FoundEnd = true;
- break;
- }
+ if (trySkipId(AssemblerDirectiveEnd)) {
+ FoundEnd = true;
+ break;
}
CollectStream << Parser.parseStringToEndOfStatement()
@@ -4379,7 +4640,7 @@ bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
getLexer().setSkipSpace(true);
- if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
+ if (isToken(AsmToken::Eof) && !FoundEnd) {
return TokError(Twine("expected directive ") +
Twine(AssemblerDirectiveEnd) + Twine(" not found"));
}
@@ -4397,14 +4658,14 @@ bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
auto PALMetadata = getTargetStreamer().getPALMetadata();
if (!PALMetadata->setFromString(String))
- return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
+ return Error(getLoc(), "invalid PAL metadata");
return false;
}
/// Parse the assembler directive for old linear-format PAL metadata.
bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
- return Error(getParser().getTok().getLoc(),
+ return Error(getLoc(),
(Twine(PALMD::AssemblerDirective) + Twine(" directive is "
"not available on non-amdpal OSes")).str());
}
@@ -4417,19 +4678,17 @@ bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
return TokError(Twine("invalid value in ") +
Twine(PALMD::AssemblerDirective));
}
- if (getLexer().isNot(AsmToken::Comma)) {
+ if (!trySkipToken(AsmToken::Comma)) {
return TokError(Twine("expected an even number of values in ") +
Twine(PALMD::AssemblerDirective));
}
- Lex();
if (ParseAsAbsoluteExpression(Value)) {
return TokError(Twine("invalid value in ") +
Twine(PALMD::AssemblerDirective));
}
PALMetadata->setRegister(Key, Value);
- if (getLexer().isNot(AsmToken::Comma))
+ if (!trySkipToken(AsmToken::Comma))
break;
- Lex();
}
return false;
}
@@ -4441,7 +4700,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
return true;
StringRef Name;
- SMLoc NameLoc = getLexer().getLoc();
+ SMLoc NameLoc = getLoc();
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");
@@ -4452,7 +4711,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
int64_t Size;
- SMLoc SizeLoc = getLexer().getLoc();
+ SMLoc SizeLoc = getLoc();
if (getParser().parseAbsoluteExpression(Size))
return true;
if (Size < 0)
@@ -4461,9 +4720,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
return Error(SizeLoc, "size is too large");
int64_t Alignment = 4;
- if (getLexer().is(AsmToken::Comma)) {
- Lex();
- SMLoc AlignLoc = getLexer().getLoc();
+ if (trySkipToken(AsmToken::Comma)) {
+ SMLoc AlignLoc = getLoc();
if (getParser().parseAbsoluteExpression(Alignment))
return true;
if (Alignment < 0 || !isPowerOf2_64(Alignment))
@@ -4491,7 +4749,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
- if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (isHsaAbiVersion3(&getSTI())) {
if (IDVal == ".amdgcn_target")
return ParseDirectiveAMDGCNTarget();
@@ -4539,7 +4797,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
R.isValid(); ++R) {
if (*R == RegNo)
- return isGFX9() || isGFX10();
+ return isGFX9Plus();
}
// GFX10 has 2 more SGPRs 104 and 105.
@@ -4555,20 +4813,20 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
case AMDGPU::SRC_PRIVATE_BASE:
case AMDGPU::SRC_PRIVATE_LIMIT:
case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
- return !isCI() && !isSI() && !isVI();
+ return isGFX9Plus();
case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:
case AMDGPU::TMA:
case AMDGPU::TMA_LO:
case AMDGPU::TMA_HI:
- return !isGFX9() && !isGFX10();
+ return !isGFX9Plus();
case AMDGPU::XNACK_MASK:
case AMDGPU::XNACK_MASK_LO:
case AMDGPU::XNACK_MASK_HI:
- return !isCI() && !isSI() && !isGFX10() && hasXNACK();
+ return (isVI() || isGFX9()) && hasXNACK();
case AMDGPU::SGPR_NULL:
- return isGFX10();
+ return isGFX10Plus();
default:
break;
}
@@ -4576,7 +4834,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
if (isCI())
return true;
- if (isSI() || isGFX10()) {
+ if (isSI() || isGFX10Plus()) {
// No flat_scr on SI.
// On GFX10 flat scratch is not a valid register operand and can only be
// accessed with s_setreg/s_getreg.
@@ -4614,35 +4872,33 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
// are appending default values to the Operands list. This is only done
// by custom parser, so we shouldn't continue on to the generic parsing.
if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
- getLexer().is(AsmToken::EndOfStatement))
+ isToken(AsmToken::EndOfStatement))
return ResTy;
- if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
+ SMLoc RBraceLoc;
+ SMLoc LBraceLoc = getLoc();
+ if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
unsigned Prefix = Operands.size();
- SMLoc LBraceLoc = getTok().getLoc();
- Parser.Lex(); // eat the '['
for (;;) {
ResTy = parseReg(Operands);
if (ResTy != MatchOperand_Success)
return ResTy;
- if (getLexer().is(AsmToken::RBrac))
+ RBraceLoc = getLoc();
+ if (trySkipToken(AsmToken::RBrac))
break;
- if (getLexer().isNot(AsmToken::Comma))
+ if (!trySkipToken(AsmToken::Comma))
return MatchOperand_ParseFail;
- Parser.Lex();
}
if (Operands.size() - Prefix > 1) {
Operands.insert(Operands.begin() + Prefix,
AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
- Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
- getTok().getLoc()));
+ Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
}
- Parser.Lex(); // eat the ']'
return MatchOperand_Success;
}
@@ -4680,32 +4936,28 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
bool IsMIMG = Name.startswith("image_");
- while (!getLexer().is(AsmToken::EndOfStatement)) {
+ while (!trySkipToken(AsmToken::EndOfStatement)) {
OperandMode Mode = OperandMode_Default;
- if (IsMIMG && isGFX10() && Operands.size() == 2)
+ if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
Mode = OperandMode_NSA;
OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
// Eat the comma or space if there is one.
- if (getLexer().is(AsmToken::Comma))
- Parser.Lex();
+ trySkipToken(AsmToken::Comma);
- switch (Res) {
- case MatchOperand_Success: break;
- case MatchOperand_ParseFail:
+ if (Res != MatchOperand_Success) {
+ checkUnsupportedInstruction(Name, NameLoc);
+ if (!Parser.hasPendingError()) {
// FIXME: use real operand location rather than the current location.
- Error(getLexer().getLoc(), "failed parsing operand.");
- while (!getLexer().is(AsmToken::EndOfStatement)) {
- Parser.Lex();
- }
- return true;
- case MatchOperand_NoMatch:
- // FIXME: use real operand location rather than the current location.
- Error(getLexer().getLoc(), "not a valid operand.");
- while (!getLexer().is(AsmToken::EndOfStatement)) {
- Parser.Lex();
- }
- return true;
+ StringRef Msg =
+ (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
+ "not a valid operand.";
+ Error(getLoc(), Msg);
+ }
+ while (!trySkipToken(AsmToken::EndOfStatement)) {
+ lex();
+ }
+ return true;
}
}
@@ -4794,14 +5046,14 @@ OperandMatchResultTy
AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy) {
int64_t Bit = 0;
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc S = getLoc();
// We are at the end of the statement, and this is a default argument, so
// use a default value.
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- switch(getLexer().getKind()) {
+ if (!isToken(AsmToken::EndOfStatement)) {
+ switch(getTokenKind()) {
case AsmToken::Identifier: {
- StringRef Tok = Parser.getTok().getString();
+ StringRef Tok = getTokenStr();
if (Tok == Name) {
if (Tok == "r128" && !hasMIMG_R128())
Error(S, "r128 modifier is not supported on this GPU");
@@ -4822,7 +5074,7 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
}
}
- if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
+ if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC)
return MatchOperand_ParseFail;
if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
@@ -4847,73 +5099,273 @@ static void addOptionalImmOperand(
}
OperandMatchResultTy
-AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
- if (getLexer().isNot(AsmToken::Identifier)) {
+AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
+ StringRef &Value,
+ SMLoc &StringLoc) {
+ if (!trySkipId(Prefix, AsmToken::Colon))
return MatchOperand_NoMatch;
+
+ StringLoc = getLoc();
+ return parseId(Value, "expected an identifier") ? MatchOperand_Success
+ : MatchOperand_ParseFail;
+}
+
+//===----------------------------------------------------------------------===//
+// MTBUF format
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
+ int64_t MaxVal,
+ int64_t &Fmt) {
+ int64_t Val;
+ SMLoc Loc = getLoc();
+
+ auto Res = parseIntWithPrefix(Pref, Val);
+ if (Res == MatchOperand_ParseFail)
+ return false;
+ if (Res == MatchOperand_NoMatch)
+ return true;
+
+ if (Val < 0 || Val > MaxVal) {
+ Error(Loc, Twine("out of range ", StringRef(Pref)));
+ return false;
}
- StringRef Tok = Parser.getTok().getString();
- if (Tok != Prefix) {
+
+ Fmt = Val;
+ return true;
+}
+
+// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
+// values to live in a joint format operand in the MCInst encoding.
+OperandMatchResultTy
+AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
+ using namespace llvm::AMDGPU::MTBUFFormat;
+
+ int64_t Dfmt = DFMT_UNDEF;
+ int64_t Nfmt = NFMT_UNDEF;
+
+ // dfmt and nfmt can appear in either order, and each is optional.
+ for (int I = 0; I < 2; ++I) {
+ if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
+ return MatchOperand_ParseFail;
+
+ if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
+ return MatchOperand_ParseFail;
+ }
+ // Skip optional comma between dfmt/nfmt
+ // but guard against 2 commas following each other.
+ if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
+ !peekToken().is(AsmToken::Comma)) {
+ trySkipToken(AsmToken::Comma);
+ }
+ }
+
+ if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
return MatchOperand_NoMatch;
+
+ Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
+ Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
+
+ Format = encodeDfmtNfmt(Dfmt, Nfmt);
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseUfmt(int64_t &Format) {
+ using namespace llvm::AMDGPU::MTBUFFormat;
+
+ int64_t Fmt = UFMT_UNDEF;
+
+ if (!tryParseFmt("format", UFMT_MAX, Fmt))
+ return MatchOperand_ParseFail;
+
+ if (Fmt == UFMT_UNDEF)
+ return MatchOperand_NoMatch;
+
+ Format = Fmt;
+ return MatchOperand_Success;
+}
+
+bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
+ int64_t &Nfmt,
+ StringRef FormatStr,
+ SMLoc Loc) {
+ using namespace llvm::AMDGPU::MTBUFFormat;
+ int64_t Format;
+
+ Format = getDfmt(FormatStr);
+ if (Format != DFMT_UNDEF) {
+ Dfmt = Format;
+ return true;
}
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Colon)) {
+ Format = getNfmt(FormatStr, getSTI());
+ if (Format != NFMT_UNDEF) {
+ Nfmt = Format;
+ return true;
+ }
+
+ Error(Loc, "unsupported format");
+ return false;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
+ SMLoc FormatLoc,
+ int64_t &Format) {
+ using namespace llvm::AMDGPU::MTBUFFormat;
+
+ int64_t Dfmt = DFMT_UNDEF;
+ int64_t Nfmt = NFMT_UNDEF;
+ if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
return MatchOperand_ParseFail;
+
+ if (trySkipToken(AsmToken::Comma)) {
+ StringRef Str;
+ SMLoc Loc = getLoc();
+ if (!parseId(Str, "expected a format string") ||
+ !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
+ return MatchOperand_ParseFail;
+ }
+ if (Dfmt == DFMT_UNDEF) {
+ Error(Loc, "duplicate numeric format");
+ return MatchOperand_ParseFail;
+ } else if (Nfmt == NFMT_UNDEF) {
+ Error(Loc, "duplicate data format");
+ return MatchOperand_ParseFail;
+ }
}
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Identifier)) {
+ Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
+ Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
+
+ if (isGFX10Plus()) {
+ auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
+ if (Ufmt == UFMT_UNDEF) {
+ Error(FormatLoc, "unsupported format");
+ return MatchOperand_ParseFail;
+ }
+ Format = Ufmt;
+ } else {
+ Format = encodeDfmtNfmt(Dfmt, Nfmt);
+ }
+
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
+ SMLoc Loc,
+ int64_t &Format) {
+ using namespace llvm::AMDGPU::MTBUFFormat;
+
+ auto Id = getUnifiedFormat(FormatStr);
+ if (Id == UFMT_UNDEF)
+ return MatchOperand_NoMatch;
+
+ if (!isGFX10Plus()) {
+ Error(Loc, "unified format is not supported on this GPU");
return MatchOperand_ParseFail;
}
- Value = Parser.getTok().getString();
+ Format = Id;
return MatchOperand_Success;
}
-// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
-// values to live in a joint format operand in the MCInst encoding.
OperandMatchResultTy
-AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
- SMLoc S = Parser.getTok().getLoc();
- int64_t Dfmt = 0, Nfmt = 0;
- // dfmt and nfmt can appear in either order, and each is optional.
- bool GotDfmt = false, GotNfmt = false;
- while (!GotDfmt || !GotNfmt) {
- if (!GotDfmt) {
- auto Res = parseIntWithPrefix("dfmt", Dfmt);
- if (Res != MatchOperand_NoMatch) {
- if (Res != MatchOperand_Success)
- return Res;
- if (Dfmt >= 16) {
- Error(Parser.getTok().getLoc(), "out of range dfmt");
- return MatchOperand_ParseFail;
- }
- GotDfmt = true;
- Parser.Lex();
- continue;
- }
- }
- if (!GotNfmt) {
- auto Res = parseIntWithPrefix("nfmt", Nfmt);
- if (Res != MatchOperand_NoMatch) {
- if (Res != MatchOperand_Success)
- return Res;
- if (Nfmt >= 8) {
- Error(Parser.getTok().getLoc(), "out of range nfmt");
- return MatchOperand_ParseFail;
- }
- GotNfmt = true;
- Parser.Lex();
- continue;
- }
- }
- break;
+AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
+ using namespace llvm::AMDGPU::MTBUFFormat;
+ SMLoc Loc = getLoc();
+
+ if (!parseExpr(Format))
+ return MatchOperand_ParseFail;
+ if (!isValidFormatEncoding(Format, getSTI())) {
+ Error(Loc, "out of range format");
+ return MatchOperand_ParseFail;
}
- if (!GotDfmt && !GotNfmt)
+
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
+ using namespace llvm::AMDGPU::MTBUFFormat;
+
+ if (!trySkipId("format", AsmToken::Colon))
return MatchOperand_NoMatch;
- auto Format = Dfmt | Nfmt << 4;
+
+ if (trySkipToken(AsmToken::LBrac)) {
+ StringRef FormatStr;
+ SMLoc Loc = getLoc();
+ if (!parseId(FormatStr, "expected a format string"))
+ return MatchOperand_ParseFail;
+
+ auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
+ if (Res == MatchOperand_NoMatch)
+ Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
+ if (Res != MatchOperand_Success)
+ return Res;
+
+ if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
+ return MatchOperand_ParseFail;
+
+ return MatchOperand_Success;
+ }
+
+ return parseNumericFormat(Format);
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
+ using namespace llvm::AMDGPU::MTBUFFormat;
+
+ int64_t Format = getDefaultFormatEncoding(getSTI());
+ OperandMatchResultTy Res;
+ SMLoc Loc = getLoc();
+
+ // Parse legacy format syntax.
+ Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
+ if (Res == MatchOperand_ParseFail)
+ return Res;
+
+ bool FormatFound = (Res == MatchOperand_Success);
+
Operands.push_back(
- AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
+ AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
+
+ if (FormatFound)
+ trySkipToken(AsmToken::Comma);
+
+ if (isToken(AsmToken::EndOfStatement)) {
+ // We are expecting an soffset operand,
+ // but let matcher handle the error.
+ return MatchOperand_Success;
+ }
+
+ // Parse soffset.
+ Res = parseRegOrImm(Operands);
+ if (Res != MatchOperand_Success)
+ return Res;
+
+ trySkipToken(AsmToken::Comma);
+
+ if (!FormatFound) {
+ Res = parseSymbolicOrNumericFormat(Format);
+ if (Res == MatchOperand_ParseFail)
+ return Res;
+ if (Res == MatchOperand_Success) {
+ auto Size = Operands.size();
+ AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
+ assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
+ Op.setImm(Format);
+ }
+ return MatchOperand_Success;
+ }
+
+ if (isId("format") && peekToken().is(AsmToken::Colon)) {
+ Error(getLoc(), "duplicate format");
+ return MatchOperand_ParseFail;
+ }
return MatchOperand_Success;
}
@@ -5122,12 +5574,14 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = getLoc();
- // If parse failed, do not return error code
- // to avoid excessive error messages.
if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
- while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
+ while (!isToken(AsmToken::EndOfStatement)) {
+ if (!parseCnt(Waitcnt))
+ return MatchOperand_ParseFail;
+ }
} else {
- parseExpr(Waitcnt);
+ if (!parseExpr(Waitcnt))
+ return MatchOperand_ParseFail;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
@@ -5145,16 +5599,17 @@ AMDGPUOperand::isSWaitCnt() const {
bool
AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
- int64_t &Offset,
- int64_t &Width) {
+ OperandInfoTy &Offset,
+ OperandInfoTy &Width) {
using namespace llvm::AMDGPU::Hwreg;
// The register may be specified by name or using a numeric code
+ HwReg.Loc = getLoc();
if (isToken(AsmToken::Identifier) &&
(HwReg.Id = getHwregId(getTokenStr())) >= 0) {
HwReg.IsSymbolic = true;
- lex(); // skip message name
- } else if (!parseExpr(HwReg.Id)) {
+ lex(); // skip register name
+ } else if (!parseExpr(HwReg.Id, "a register name")) {
return false;
}
@@ -5162,33 +5617,45 @@ AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
return true;
// parse optional params
- return
- skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
- parseExpr(Offset) &&
- skipToken(AsmToken::Comma, "expected a comma") &&
- parseExpr(Width) &&
- skipToken(AsmToken::RParen, "expected a closing parenthesis");
+ if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
+ return false;
+
+ Offset.Loc = getLoc();
+ if (!parseExpr(Offset.Id))
+ return false;
+
+ if (!skipToken(AsmToken::Comma, "expected a comma"))
+ return false;
+
+ Width.Loc = getLoc();
+ return parseExpr(Width.Id) &&
+ skipToken(AsmToken::RParen, "expected a closing parenthesis");
}
bool
AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
- const int64_t Offset,
- const int64_t Width,
- const SMLoc Loc) {
+ const OperandInfoTy &Offset,
+ const OperandInfoTy &Width) {
using namespace llvm::AMDGPU::Hwreg;
if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
- Error(Loc, "specified hardware register is not supported on this GPU");
+ Error(HwReg.Loc,
+ "specified hardware register is not supported on this GPU");
return false;
- } else if (!isValidHwreg(HwReg.Id)) {
- Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
+ }
+ if (!isValidHwreg(HwReg.Id)) {
+ Error(HwReg.Loc,
+ "invalid code of hardware register: only 6-bit values are legal");
return false;
- } else if (!isValidHwregOffset(Offset)) {
- Error(Loc, "invalid bit offset: only 5-bit values are legal");
+ }
+ if (!isValidHwregOffset(Offset.Id)) {
+ Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
return false;
- } else if (!isValidHwregWidth(Width)) {
- Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
+ }
+ if (!isValidHwregWidth(Width.Id)) {
+ Error(Width.Loc,
+ "invalid bitfield width: only values from 1 to 32 are legal");
return false;
}
return true;
@@ -5201,19 +5668,23 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
int64_t ImmVal = 0;
SMLoc Loc = getLoc();
- // If parse failed, do not return error code
- // to avoid excessive error messages.
if (trySkipId("hwreg", AsmToken::LParen)) {
OperandInfoTy HwReg(ID_UNKNOWN_);
- int64_t Offset = OFFSET_DEFAULT_;
- int64_t Width = WIDTH_DEFAULT_;
+ OperandInfoTy Offset(OFFSET_DEFAULT_);
+ OperandInfoTy Width(WIDTH_DEFAULT_);
if (parseHwregBody(HwReg, Offset, Width) &&
- validateHwreg(HwReg, Offset, Width, Loc)) {
- ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
+ validateHwreg(HwReg, Offset, Width)) {
+ ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
+ } else {
+ return MatchOperand_ParseFail;
}
- } else if (parseExpr(ImmVal)) {
- if (ImmVal < 0 || !isUInt<16>(ImmVal))
+ } else if (parseExpr(ImmVal, "a hwreg macro")) {
+ if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
Error(Loc, "invalid immediate: only 16-bit values are legal");
+ return MatchOperand_ParseFail;
+ }
+ } else {
+ return MatchOperand_ParseFail;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
@@ -5234,24 +5705,27 @@ AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
OperandInfoTy &Stream) {
using namespace llvm::AMDGPU::SendMsg;
+ Msg.Loc = getLoc();
if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
Msg.IsSymbolic = true;
lex(); // skip message name
- } else if (!parseExpr(Msg.Id)) {
+ } else if (!parseExpr(Msg.Id, "a message name")) {
return false;
}
if (trySkipToken(AsmToken::Comma)) {
Op.IsDefined = true;
+ Op.Loc = getLoc();
if (isToken(AsmToken::Identifier) &&
(Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
lex(); // skip operation name
- } else if (!parseExpr(Op.Id)) {
+ } else if (!parseExpr(Op.Id, "an operation name")) {
return false;
}
if (trySkipToken(AsmToken::Comma)) {
Stream.IsDefined = true;
+ Stream.Loc = getLoc();
if (!parseExpr(Stream.Id))
return false;
}
@@ -5263,8 +5737,7 @@ AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
bool
AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
const OperandInfoTy &Op,
- const OperandInfoTy &Stream,
- const SMLoc S) {
+ const OperandInfoTy &Stream) {
using namespace llvm::AMDGPU::SendMsg;
// Validation strictness depends on whether message is specified
@@ -5273,21 +5746,27 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
bool Strict = Msg.IsSymbolic;
if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
- Error(S, "invalid message id");
+ Error(Msg.Loc, "invalid message id");
return false;
- } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
- Error(S, Op.IsDefined ?
- "message does not support operations" :
- "missing message operation");
+ }
+ if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
+ if (Op.IsDefined) {
+ Error(Op.Loc, "message does not support operations");
+ } else {
+ Error(Msg.Loc, "missing message operation");
+ }
return false;
- } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
- Error(S, "invalid operation id");
+ }
+ if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
+ Error(Op.Loc, "invalid operation id");
return false;
- } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
- Error(S, "message operation does not support streams");
+ }
+ if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
+ Error(Stream.Loc, "message operation does not support streams");
return false;
- } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
- Error(S, "invalid message stream id");
+ }
+ if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
+ Error(Stream.Loc, "invalid message stream id");
return false;
}
return true;
@@ -5300,19 +5779,23 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
int64_t ImmVal = 0;
SMLoc Loc = getLoc();
- // If parse failed, do not return error code
- // to avoid excessive error messages.
if (trySkipId("sendmsg", AsmToken::LParen)) {
OperandInfoTy Msg(ID_UNKNOWN_);
OperandInfoTy Op(OP_NONE_);
OperandInfoTy Stream(STREAM_ID_NONE_);
if (parseSendMsgBody(Msg, Op, Stream) &&
- validateSendMsg(Msg, Op, Stream, Loc)) {
+ validateSendMsg(Msg, Op, Stream)) {
ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
+ } else {
+ return MatchOperand_ParseFail;
}
- } else if (parseExpr(ImmVal)) {
- if (ImmVal < 0 || !isUInt<16>(ImmVal))
+ } else if (parseExpr(ImmVal, "a sendmsg macro")) {
+ if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
Error(Loc, "invalid immediate: only 16-bit values are legal");
+ return MatchOperand_ParseFail;
+ }
+ } else {
+ return MatchOperand_ParseFail;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
@@ -5328,34 +5811,40 @@ bool AMDGPUOperand::isSendMsg() const {
//===----------------------------------------------------------------------===//
OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
- if (getLexer().getKind() != AsmToken::Identifier)
+ StringRef Str;
+ SMLoc S = getLoc();
+
+ if (!parseId(Str))
return MatchOperand_NoMatch;
- StringRef Str = Parser.getTok().getString();
int Slot = StringSwitch<int>(Str)
.Case("p10", 0)
.Case("p20", 1)
.Case("p0", 2)
.Default(-1);
- SMLoc S = Parser.getTok().getLoc();
- if (Slot == -1)
+ if (Slot == -1) {
+ Error(S, "invalid interpolation slot");
return MatchOperand_ParseFail;
+ }
- Parser.Lex();
Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
AMDGPUOperand::ImmTyInterpSlot));
return MatchOperand_Success;
}
OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
- if (getLexer().getKind() != AsmToken::Identifier)
- return MatchOperand_NoMatch;
+ StringRef Str;
+ SMLoc S = getLoc();
- StringRef Str = Parser.getTok().getString();
- if (!Str.startswith("attr"))
+ if (!parseId(Str))
return MatchOperand_NoMatch;
+ if (!Str.startswith("attr")) {
+ Error(S, "invalid interpolation attribute");
+ return MatchOperand_ParseFail;
+ }
+
StringRef Chan = Str.take_back(2);
int AttrChan = StringSwitch<int>(Chan)
.Case(".x", 0)
@@ -5363,20 +5852,22 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
.Case(".z", 2)
.Case(".w", 3)
.Default(-1);
- if (AttrChan == -1)
+ if (AttrChan == -1) {
+ Error(S, "invalid or missing interpolation attribute channel");
return MatchOperand_ParseFail;
+ }
Str = Str.drop_back(2).drop_front(4);
uint8_t Attr;
- if (Str.getAsInteger(10, Attr))
+ if (Str.getAsInteger(10, Attr)) {
+ Error(S, "invalid or missing interpolation attribute number");
return MatchOperand_ParseFail;
+ }
- SMLoc S = Parser.getTok().getLoc();
- Parser.Lex();
if (Attr > 63) {
- Error(S, "out of bounds attr");
- return MatchOperand_Success;
+ Error(S, "out of bounds interpolation attribute number");
+ return MatchOperand_ParseFail;
}
SMLoc SChan = SMLoc::getFromPointer(Chan.data());
@@ -5392,86 +5883,24 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
// exp
//===----------------------------------------------------------------------===//
-void AMDGPUAsmParser::errorExpTgt() {
- Error(Parser.getTok().getLoc(), "invalid exp target");
-}
-
-OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
- uint8_t &Val) {
- if (Str == "null") {
- Val = 9;
- return MatchOperand_Success;
- }
-
- if (Str.startswith("mrt")) {
- Str = Str.drop_front(3);
- if (Str == "z") { // == mrtz
- Val = 8;
- return MatchOperand_Success;
- }
-
- if (Str.getAsInteger(10, Val))
- return MatchOperand_ParseFail;
-
- if (Val > 7)
- errorExpTgt();
-
- return MatchOperand_Success;
- }
-
- if (Str.startswith("pos")) {
- Str = Str.drop_front(3);
- if (Str.getAsInteger(10, Val))
- return MatchOperand_ParseFail;
-
- if (Val > 4 || (Val == 4 && !isGFX10()))
- errorExpTgt();
-
- Val += 12;
- return MatchOperand_Success;
- }
-
- if (isGFX10() && Str == "prim") {
- Val = 20;
- return MatchOperand_Success;
- }
-
- if (Str.startswith("param")) {
- Str = Str.drop_front(5);
- if (Str.getAsInteger(10, Val))
- return MatchOperand_ParseFail;
-
- if (Val >= 32)
- errorExpTgt();
+OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
+ using namespace llvm::AMDGPU::Exp;
- Val += 32;
- return MatchOperand_Success;
- }
+ StringRef Str;
+ SMLoc S = getLoc();
- if (Str.startswith("invalid_target_")) {
- Str = Str.drop_front(15);
- if (Str.getAsInteger(10, Val))
- return MatchOperand_ParseFail;
+ if (!parseId(Str))
+ return MatchOperand_NoMatch;
- errorExpTgt();
- return MatchOperand_Success;
+ unsigned Id = getTgtId(Str);
+ if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
+ Error(S, (Id == ET_INVALID) ?
+ "invalid exp target" :
+ "exp target is not supported on this GPU");
+ return MatchOperand_ParseFail;
}
- return MatchOperand_NoMatch;
-}
-
-OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
- uint8_t Val;
- StringRef Str = Parser.getTok().getString();
-
- auto Res = parseExpTgtImpl(Str, Val);
- if (Res != MatchOperand_Success)
- return Res;
-
- SMLoc S = Parser.getTok().getLoc();
- Parser.Lex();
-
- Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
AMDGPUOperand::ImmTyExpTgt));
return MatchOperand_Success;
}
@@ -5534,8 +5963,23 @@ AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
}
bool
-AMDGPUAsmParser::parseExpr(int64_t &Imm) {
- return !getParser().parseAbsoluteExpression(Imm);
+AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
+ SMLoc S = getLoc();
+
+ const MCExpr *Expr;
+ if (Parser.parseExpression(Expr))
+ return false;
+
+ if (Expr->evaluateAsAbsolute(Imm))
+ return true;
+
+ if (Expected.empty()) {
+ Error(S, "expected absolute expression");
+ } else {
+ Error(S, Twine("expected ", Expected) +
+ Twine(" or an absolute expression"));
+ }
+ return false;
}
bool
@@ -5567,6 +6011,19 @@ AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
}
}
+bool
+AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
+ if (isToken(AsmToken::Identifier)) {
+ Val = getTokenStr();
+ lex();
+ return true;
+ } else {
+ if (!ErrMsg.empty())
+ Error(getLoc(), ErrMsg);
+ return false;
+ }
+}
+
AsmToken
AMDGPUAsmParser::getToken() const {
return Parser.getTok();
@@ -5574,7 +6031,7 @@ AMDGPUAsmParser::getToken() const {
AsmToken
AMDGPUAsmParser::peekToken() {
- return getLexer().peekTok();
+ return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
}
void
@@ -5605,6 +6062,49 @@ AMDGPUAsmParser::lex() {
Parser.Lex();
}
+SMLoc
+AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
+ const OperandVector &Operands) const {
+ for (unsigned i = Operands.size() - 1; i > 0; --i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (Test(Op))
+ return Op.getStartLoc();
+ }
+ return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
+}
+
+SMLoc
+AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
+ const OperandVector &Operands) const {
+ auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
+ return getOperandLoc(Test, Operands);
+}
+
+SMLoc
+AMDGPUAsmParser::getRegLoc(unsigned Reg,
+ const OperandVector &Operands) const {
+ auto Test = [=](const AMDGPUOperand& Op) {
+ return Op.isRegKind() && Op.getReg() == Reg;
+ };
+ return getOperandLoc(Test, Operands);
+}
+
+SMLoc
+AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
+ auto Test = [](const AMDGPUOperand& Op) {
+ return Op.IsImmKindLiteral() || Op.isExpr();
+ };
+ return getOperandLoc(Test, Operands);
+}
+
+SMLoc
+AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
+ auto Test = [](const AMDGPUOperand& Op) {
+ return Op.isImmKindConst();
+ };
+ return getOperandLoc(Test, Operands);
+}
+
//===----------------------------------------------------------------------===//
// swizzle
//===----------------------------------------------------------------------===//
@@ -5623,22 +6123,35 @@ encodeBitmaskPerm(const unsigned AndMask,
}
bool
+AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
+ const unsigned MinVal,
+ const unsigned MaxVal,
+ const StringRef ErrMsg,
+ SMLoc &Loc) {
+ if (!skipToken(AsmToken::Comma, "expected a comma")) {
+ return false;
+ }
+ Loc = getLoc();
+ if (!parseExpr(Op)) {
+ return false;
+ }
+ if (Op < MinVal || Op > MaxVal) {
+ Error(Loc, ErrMsg);
+ return false;
+ }
+
+ return true;
+}
+
+bool
AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
const unsigned MinVal,
const unsigned MaxVal,
const StringRef ErrMsg) {
+ SMLoc Loc;
for (unsigned i = 0; i < OpNum; ++i) {
- if (!skipToken(AsmToken::Comma, "expected a comma")){
+ if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
return false;
- }
- SMLoc ExprLoc = Parser.getTok().getLoc();
- if (!parseExpr(Op[i])) {
- return false;
- }
- if (Op[i] < MinVal || Op[i] > MaxVal) {
- Error(ExprLoc, ErrMsg);
- return false;
- }
}
return true;
@@ -5664,22 +6177,24 @@ bool
AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc Loc;
int64_t GroupSize;
int64_t LaneIdx;
- if (!parseSwizzleOperands(1, &GroupSize,
- 2, 32,
- "group size must be in the interval [2,32]")) {
+ if (!parseSwizzleOperand(GroupSize,
+ 2, 32,
+ "group size must be in the interval [2,32]",
+ Loc)) {
return false;
}
if (!isPowerOf2_64(GroupSize)) {
- Error(S, "group size must be a power of two");
+ Error(Loc, "group size must be a power of two");
return false;
}
- if (parseSwizzleOperands(1, &LaneIdx,
- 0, GroupSize - 1,
- "lane id must be in the interval [0,group size - 1]")) {
+ if (parseSwizzleOperand(LaneIdx,
+ 0, GroupSize - 1,
+ "lane id must be in the interval [0,group size - 1]",
+ Loc)) {
Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
return true;
}
@@ -5690,15 +6205,17 @@ bool
AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc Loc;
int64_t GroupSize;
- if (!parseSwizzleOperands(1, &GroupSize,
- 2, 32, "group size must be in the interval [2,32]")) {
+ if (!parseSwizzleOperand(GroupSize,
+ 2, 32,
+ "group size must be in the interval [2,32]",
+ Loc)) {
return false;
}
if (!isPowerOf2_64(GroupSize)) {
- Error(S, "group size must be a power of two");
+ Error(Loc, "group size must be a power of two");
return false;
}
@@ -5710,15 +6227,17 @@ bool
AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc Loc;
int64_t GroupSize;
- if (!parseSwizzleOperands(1, &GroupSize,
- 1, 16, "group size must be in the interval [1,16]")) {
+ if (!parseSwizzleOperand(GroupSize,
+ 1, 16,
+ "group size must be in the interval [1,16]",
+ Loc)) {
return false;
}
if (!isPowerOf2_64(GroupSize)) {
- Error(S, "group size must be a power of two");
+ Error(Loc, "group size must be a power of two");
return false;
}
@@ -5735,7 +6254,7 @@ AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
}
StringRef Ctl;
- SMLoc StrLoc = Parser.getTok().getLoc();
+ SMLoc StrLoc = getLoc();
if (!parseString(Ctl)) {
return false;
}
@@ -5776,9 +6295,9 @@ AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
bool
AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
- SMLoc OffsetLoc = Parser.getTok().getLoc();
+ SMLoc OffsetLoc = getLoc();
- if (!parseExpr(Imm)) {
+ if (!parseExpr(Imm, "a swizzle macro")) {
return false;
}
if (!isUInt<16>(Imm)) {
@@ -5794,7 +6313,7 @@ AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
- SMLoc ModeLoc = Parser.getTok().getLoc();
+ SMLoc ModeLoc = getLoc();
bool Ok = false;
if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
@@ -5819,7 +6338,7 @@ AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
OperandMatchResultTy
AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc S = getLoc();
int64_t Imm = 0;
if (trySkipId("offset")) {
@@ -5864,7 +6383,7 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
while (true) {
unsigned Mode = 0;
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc S = getLoc();
for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
if (trySkipId(IdSymbolic[ModeId])) {
@@ -5877,12 +6396,12 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
Error(S, (Imm == 0)?
"expected a VGPR index mode or a closing parenthesis" :
"expected a VGPR index mode");
- break;
+ return UNDEF;
}
if (Imm & Mode) {
Error(S, "duplicate VGPR index mode");
- break;
+ return UNDEF;
}
Imm |= Mode;
@@ -5890,7 +6409,7 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
break;
if (!skipToken(AsmToken::Comma,
"expected a comma or a closing parenthesis"))
- break;
+ return UNDEF;
}
return Imm;
@@ -5899,25 +6418,21 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
OperandMatchResultTy
AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
- int64_t Imm = 0;
- SMLoc S = Parser.getTok().getLoc();
-
- if (getLexer().getKind() == AsmToken::Identifier &&
- Parser.getTok().getString() == "gpr_idx" &&
- getLexer().peekTok().is(AsmToken::LParen)) {
+ using namespace llvm::AMDGPU::VGPRIndexMode;
- Parser.Lex();
- Parser.Lex();
+ int64_t Imm = 0;
+ SMLoc S = getLoc();
- // If parse failed, trigger an error but do not return error code
- // to avoid excessive error messages.
+ if (trySkipId("gpr_idx", AsmToken::LParen)) {
Imm = parseGPRIdxMacro();
-
+ if (Imm == UNDEF)
+ return MatchOperand_ParseFail;
} else {
if (getParser().parseAbsoluteExpression(Imm))
- return MatchOperand_NoMatch;
+ return MatchOperand_ParseFail;
if (Imm < 0 || !isUInt<4>(Imm)) {
Error(S, "invalid immediate: only 4-bit values are legal");
+ return MatchOperand_ParseFail;
}
}
@@ -5943,22 +6458,22 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
if (isRegister() || isModifier())
return MatchOperand_NoMatch;
- if (parseExpr(Operands)) {
+ if (!parseExpr(Operands))
+ return MatchOperand_ParseFail;
- AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
- assert(Opr.isImm() || Opr.isExpr());
- SMLoc Loc = Opr.getStartLoc();
+ AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
+ assert(Opr.isImm() || Opr.isExpr());
+ SMLoc Loc = Opr.getStartLoc();
- // Currently we do not support arbitrary expressions as branch targets.
- // Only labels and absolute expressions are accepted.
- if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
- Error(Loc, "expected an absolute expression or a label");
- } else if (Opr.isImm() && !Opr.isS16Imm()) {
- Error(Loc, "expected a 16-bit signed jump offset");
- }
+ // Currently we do not support arbitrary expressions as branch targets.
+ // Only labels and absolute expressions are accepted.
+ if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
+ Error(Loc, "expected an absolute expression or a label");
+ } else if (Opr.isImm() && !Opr.isS16Imm()) {
+ Error(Loc, "expected a 16-bit signed jump offset");
}
- return MatchOperand_Success; // avoid excessive error messages
+ return MatchOperand_Success;
}
//===----------------------------------------------------------------------===//
@@ -5982,6 +6497,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
}
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
+ return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
+}
+
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
}
@@ -6046,8 +6565,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
- if (!IsAtomic) { // glc is hard-coded.
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
+ if (!IsAtomic || IsAtomicReturn) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
+ IsAtomicReturn ? -1 : 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
@@ -6055,7 +6575,7 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
- if (isGFX10())
+ if (isGFX10Plus())
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
}
@@ -6095,7 +6615,7 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
- if (isGFX10())
+ if (isGFX10Plus())
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
}
@@ -6132,22 +6652,22 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
}
}
- bool IsGFX10 = isGFX10();
+ bool IsGFX10Plus = isGFX10Plus();
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
- if (IsGFX10)
+ if (IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
- if (IsGFX10)
+ if (IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
- if (IsGFX10)
+ if (IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
- if (!IsGFX10)
+ if (!IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
}
@@ -6156,6 +6676,17 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
cvtMIMG(Inst, Operands, true);
}
+void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
+ const OperandVector &Operands) {
+ for (unsigned I = 1; I < Operands.size(); ++I) {
+ auto &Operand = (AMDGPUOperand &)*Operands[I];
+ if (Operand.isReg())
+ Operand.addRegOperands(Inst, 1);
+ }
+
+ Inst.addOperand(MCOperand::createImm(1)); // a16
+}
+
//===----------------------------------------------------------------------===//
// smrd
//===----------------------------------------------------------------------===//
@@ -6242,7 +6773,6 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
{"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
- {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
@@ -6327,8 +6857,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
Op.ConvertResult);
} else if (Op.Type == AMDGPUOperand::ImmTyDim) {
res = parseDim(Operands);
- } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
- res = parseDfmtNfmt(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@@ -6340,7 +6868,7 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
}
OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
- StringRef Name = Parser.getTok().getString();
+ StringRef Name = getTokenStr();
if (Name == "mul") {
return parseIntWithPrefix("mul", Operands,
AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
@@ -6479,15 +7007,19 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F32_e64_vi ||
+ Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
+ Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F16_e64_vi ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F32_e64_vi ||
+ Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
auto it = Inst.begin();
std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
++it;
- Inst.insert(it, Inst.getOperand(0)); // src2 = dst
+ // Copy the operand to ensure it's not invalidated when Inst grows.
+ Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
}
}
@@ -6636,35 +7168,27 @@ bool AMDGPUOperand::isU16Imm() const {
}
OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
- if (!isGFX10())
+ if (!isGFX10Plus())
return MatchOperand_NoMatch;
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc S = getLoc();
- if (getLexer().isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
- if (getLexer().getTok().getString() != "dim")
+ if (!trySkipId("dim", AsmToken::Colon))
return MatchOperand_NoMatch;
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Colon))
- return MatchOperand_ParseFail;
-
- Parser.Lex();
-
// We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
// integer.
std::string Token;
- if (getLexer().is(AsmToken::Integer)) {
- SMLoc Loc = getLexer().getTok().getEndLoc();
- Token = std::string(getLexer().getTok().getString());
- Parser.Lex();
- if (getLexer().getTok().getLoc() != Loc)
+ if (isToken(AsmToken::Integer)) {
+ SMLoc Loc = getToken().getEndLoc();
+ Token = std::string(getTokenStr());
+ lex();
+ if (getLoc() != Loc)
return MatchOperand_ParseFail;
}
- if (getLexer().isNot(AsmToken::Identifier))
+ if (!isToken(AsmToken::Identifier))
return MatchOperand_ParseFail;
- Token += getLexer().getTok().getString();
+ Token += getTokenStr();
StringRef DimId = Token;
if (DimId.startswith("SQ_RSRC_IMG_"))
@@ -6674,7 +7198,7 @@ OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
if (!DimInfo)
return MatchOperand_ParseFail;
- Parser.Lex();
+ lex();
Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
AMDGPUOperand::ImmTyDim));
@@ -6682,52 +7206,33 @@ OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
}
OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
- SMLoc S = Parser.getTok().getLoc();
- StringRef Prefix;
-
- if (getLexer().getKind() == AsmToken::Identifier) {
- Prefix = Parser.getTok().getString();
- } else {
- return MatchOperand_NoMatch;
- }
+ SMLoc S = getLoc();
- if (Prefix != "dpp8")
- return parseDPPCtrl(Operands);
- if (!isGFX10())
+ if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
return MatchOperand_NoMatch;
// dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
int64_t Sels[8];
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Colon))
- return MatchOperand_ParseFail;
-
- Parser.Lex();
- if (getLexer().isNot(AsmToken::LBrac))
+ if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
return MatchOperand_ParseFail;
- Parser.Lex();
- if (getParser().parseAbsoluteExpression(Sels[0]))
- return MatchOperand_ParseFail;
- if (0 > Sels[0] || 7 < Sels[0])
- return MatchOperand_ParseFail;
-
- for (size_t i = 1; i < 8; ++i) {
- if (getLexer().isNot(AsmToken::Comma))
+ for (size_t i = 0; i < 8; ++i) {
+ if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
return MatchOperand_ParseFail;
- Parser.Lex();
+ SMLoc Loc = getLoc();
if (getParser().parseAbsoluteExpression(Sels[i]))
return MatchOperand_ParseFail;
- if (0 > Sels[i] || 7 < Sels[i])
+ if (0 > Sels[i] || 7 < Sels[i]) {
+ Error(Loc, "expected a 3-bit value");
return MatchOperand_ParseFail;
+ }
}
- if (getLexer().isNot(AsmToken::RBrac))
+ if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
return MatchOperand_ParseFail;
- Parser.Lex();
unsigned DPP8 = 0;
for (size_t i = 0; i < 8; ++i)
@@ -6737,119 +7242,138 @@ OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
return MatchOperand_Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
+bool
+AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
+ const OperandVector &Operands) {
+ if (Ctrl == "row_share" ||
+ Ctrl == "row_xmask")
+ return isGFX10Plus();
+
+ if (Ctrl == "wave_shl" ||
+ Ctrl == "wave_shr" ||
+ Ctrl == "wave_rol" ||
+ Ctrl == "wave_ror" ||
+ Ctrl == "row_bcast")
+ return isVI() || isGFX9();
+
+ return Ctrl == "row_mirror" ||
+ Ctrl == "row_half_mirror" ||
+ Ctrl == "quad_perm" ||
+ Ctrl == "row_shl" ||
+ Ctrl == "row_shr" ||
+ Ctrl == "row_ror";
+}
+
+int64_t
+AMDGPUAsmParser::parseDPPCtrlPerm() {
+ // quad_perm:[%d,%d,%d,%d]
+
+ if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
+ return -1;
+
+ int64_t Val = 0;
+ for (int i = 0; i < 4; ++i) {
+ if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
+ return -1;
+
+ int64_t Temp;
+ SMLoc Loc = getLoc();
+ if (getParser().parseAbsoluteExpression(Temp))
+ return -1;
+ if (Temp < 0 || Temp > 3) {
+ Error(Loc, "expected a 2-bit value");
+ return -1;
+ }
+
+ Val += (Temp << i * 2);
+ }
+
+ if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
+ return -1;
+
+ return Val;
+}
+
+int64_t
+AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
using namespace AMDGPU::DPP;
- SMLoc S = Parser.getTok().getLoc();
- StringRef Prefix;
- int64_t Int;
+ // sel:%d
+
+ int64_t Val;
+ SMLoc Loc = getLoc();
- if (getLexer().getKind() == AsmToken::Identifier) {
- Prefix = Parser.getTok().getString();
+ if (getParser().parseAbsoluteExpression(Val))
+ return -1;
+
+ struct DppCtrlCheck {
+ int64_t Ctrl;
+ int Lo;
+ int Hi;
+ };
+
+ DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
+ .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
+ .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
+ .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
+ .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
+ .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
+ .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
+ .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
+ .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
+ .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
+ .Default({-1, 0, 0});
+
+ bool Valid;
+ if (Check.Ctrl == -1) {
+ Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
+ Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
} else {
- return MatchOperand_NoMatch;
+ Valid = Check.Lo <= Val && Val <= Check.Hi;
+ Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
}
- if (Prefix == "row_mirror") {
- Int = DppCtrl::ROW_MIRROR;
- Parser.Lex();
- } else if (Prefix == "row_half_mirror") {
- Int = DppCtrl::ROW_HALF_MIRROR;
- Parser.Lex();
- } else {
- // Check to prevent parseDPPCtrlOps from eating invalid tokens
- if (Prefix != "quad_perm"
- && Prefix != "row_shl"
- && Prefix != "row_shr"
- && Prefix != "row_ror"
- && Prefix != "wave_shl"
- && Prefix != "wave_rol"
- && Prefix != "wave_shr"
- && Prefix != "wave_ror"
- && Prefix != "row_bcast"
- && Prefix != "row_share"
- && Prefix != "row_xmask") {
- return MatchOperand_NoMatch;
- }
-
- if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
- return MatchOperand_NoMatch;
-
- if (!isVI() && !isGFX9() &&
- (Prefix == "wave_shl" || Prefix == "wave_shr" ||
- Prefix == "wave_rol" || Prefix == "wave_ror" ||
- Prefix == "row_bcast"))
- return MatchOperand_NoMatch;
-
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Colon))
- return MatchOperand_ParseFail;
+ if (!Valid) {
+ Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
+ return -1;
+ }
- if (Prefix == "quad_perm") {
- // quad_perm:[%d,%d,%d,%d]
- Parser.Lex();
- if (getLexer().isNot(AsmToken::LBrac))
- return MatchOperand_ParseFail;
- Parser.Lex();
+ return Val;
+}
- if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
- return MatchOperand_ParseFail;
+OperandMatchResultTy
+AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
+ using namespace AMDGPU::DPP;
- for (int i = 0; i < 3; ++i) {
- if (getLexer().isNot(AsmToken::Comma))
- return MatchOperand_ParseFail;
- Parser.Lex();
+ if (!isToken(AsmToken::Identifier) ||
+ !isSupportedDPPCtrl(getTokenStr(), Operands))
+ return MatchOperand_NoMatch;
- int64_t Temp;
- if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
- return MatchOperand_ParseFail;
- const int shift = i*2 + 2;
- Int += (Temp << shift);
- }
+ SMLoc S = getLoc();
+ int64_t Val = -1;
+ StringRef Ctrl;
- if (getLexer().isNot(AsmToken::RBrac))
- return MatchOperand_ParseFail;
- Parser.Lex();
- } else {
- // sel:%d
- Parser.Lex();
- if (getParser().parseAbsoluteExpression(Int))
- return MatchOperand_ParseFail;
+ parseId(Ctrl);
- if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
- Int |= DppCtrl::ROW_SHL0;
- } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
- Int |= DppCtrl::ROW_SHR0;
- } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
- Int |= DppCtrl::ROW_ROR0;
- } else if (Prefix == "wave_shl" && 1 == Int) {
- Int = DppCtrl::WAVE_SHL1;
- } else if (Prefix == "wave_rol" && 1 == Int) {
- Int = DppCtrl::WAVE_ROL1;
- } else if (Prefix == "wave_shr" && 1 == Int) {
- Int = DppCtrl::WAVE_SHR1;
- } else if (Prefix == "wave_ror" && 1 == Int) {
- Int = DppCtrl::WAVE_ROR1;
- } else if (Prefix == "row_bcast") {
- if (Int == 15) {
- Int = DppCtrl::BCAST15;
- } else if (Int == 31) {
- Int = DppCtrl::BCAST31;
- } else {
- return MatchOperand_ParseFail;
- }
- } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
- Int |= DppCtrl::ROW_SHARE_FIRST;
- } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
- Int |= DppCtrl::ROW_XMASK_FIRST;
+ if (Ctrl == "row_mirror") {
+ Val = DppCtrl::ROW_MIRROR;
+ } else if (Ctrl == "row_half_mirror") {
+ Val = DppCtrl::ROW_HALF_MIRROR;
+ } else {
+ if (skipToken(AsmToken::Colon, "expected a colon")) {
+ if (Ctrl == "quad_perm") {
+ Val = parseDPPCtrlPerm();
} else {
- return MatchOperand_ParseFail;
+ Val = parseDPPCtrlSel(Ctrl);
}
}
}
- Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
+ if (Val == -1)
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(
+ AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
return MatchOperand_Success;
}
@@ -6947,11 +7471,12 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type) {
using namespace llvm::AMDGPU::SDWA;
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc S = getLoc();
StringRef Value;
OperandMatchResultTy res;
- res = parseStringWithPrefix(Prefix, Value);
+ SMLoc StringLoc;
+ res = parseStringWithPrefix(Prefix, Value, StringLoc);
if (res != MatchOperand_Success) {
return res;
}
@@ -6966,9 +7491,9 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
.Case("WORD_1", SdwaSel::WORD_1)
.Case("DWORD", SdwaSel::DWORD)
.Default(0xffffffff);
- Parser.Lex(); // eat last token
if (Int == 0xffffffff) {
+ Error(StringLoc, "invalid " + Twine(Prefix) + " value");
return MatchOperand_ParseFail;
}
@@ -6980,11 +7505,12 @@ OperandMatchResultTy
AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
using namespace llvm::AMDGPU::SDWA;
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc S = getLoc();
StringRef Value;
OperandMatchResultTy res;
- res = parseStringWithPrefix("dst_unused", Value);
+ SMLoc StringLoc;
+ res = parseStringWithPrefix("dst_unused", Value, StringLoc);
if (res != MatchOperand_Success) {
return res;
}
@@ -6995,9 +7521,9 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
.Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
.Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
.Default(0xffffffff);
- Parser.Lex(); // eat last token
if (Int == 0xffffffff) {
+ Error(StringLoc, "invalid dst_unused value");
return MatchOperand_ParseFail;
}
@@ -7146,6 +7672,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#define GET_MNEMONIC_SPELL_CHECKER
+#define GET_MNEMONIC_CHECKER
#include "AMDGPUGenAsmMatcher.inc"
// This fuction should be defined after auto-generated include so that we have
@@ -7210,7 +7737,7 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
//===----------------------------------------------------------------------===//
OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc S = getLoc();
int64_t Imm = 0;
if (!parseExpr(Imm)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
index fa42ddc54b56..5dc5481df49e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -114,6 +114,7 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
let isCodeGenOnly = 0;
// copy relevant pseudo op flags
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
let SubtargetPredicate = ps.SubtargetPredicate;
let AsmMatchConverter = ps.AsmMatchConverter;
let Constraints = ps.Constraints;
@@ -168,29 +169,29 @@ class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
class getMTBUFAsmOps<int addrKind> {
string Pfx =
- !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $format, $soffset",
+ !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc,$format $soffset",
!if(!eq(addrKind, BUFAddrKind.OffEn),
- "$vaddr, $srsrc, $format, $soffset offen",
+ "$vaddr, $srsrc,$format $soffset offen",
!if(!eq(addrKind, BUFAddrKind.IdxEn),
- "$vaddr, $srsrc, $format, $soffset idxen",
+ "$vaddr, $srsrc,$format $soffset idxen",
!if(!eq(addrKind, BUFAddrKind.BothEn),
- "$vaddr, $srsrc, $format, $soffset idxen offen",
+ "$vaddr, $srsrc,$format $soffset idxen offen",
!if(!eq(addrKind, BUFAddrKind.Addr64),
- "$vaddr, $srsrc, $format, $soffset addr64",
+ "$vaddr, $srsrc,$format $soffset addr64",
"")))));
string ret = Pfx # "$offset";
}
class MTBUF_SetupAddr<int addrKind> {
- bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1,
- !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+ bits<1> offen = !or(!eq(addrKind, BUFAddrKind.OffEn),
+ !eq(addrKind, BUFAddrKind.BothEn));
- bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1,
- !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+ bits<1> idxen = !or(!eq(addrKind, BUFAddrKind.IdxEn),
+ !eq(addrKind, BUFAddrKind.BothEn));
- bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0);
+ bits<1> addr64 = !eq(addrKind, BUFAddrKind.Addr64);
- bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1);
+ bits<1> has_vaddr = !ne(addrKind, BUFAddrKind.Offset);
}
class MTBUF_Load_Pseudo <string opName,
@@ -349,11 +350,13 @@ class MUBUF_Real <MUBUF_Pseudo ps> :
let isCodeGenOnly = 0;
// copy relevant pseudo op flags
- let SubtargetPredicate = ps.SubtargetPredicate;
- let AsmMatchConverter = ps.AsmMatchConverter;
- let Constraints = ps.Constraints;
- let DisableEncoding = ps.DisableEncoding;
- let TSFlags = ps.TSFlags;
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let OtherPredicates = ps.OtherPredicates;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+ let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
bits<12> offset;
bits<1> glc;
@@ -461,15 +464,15 @@ class getMUBUFAsmOps<int addrKind> {
}
class MUBUF_SetupAddr<int addrKind> {
- bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1,
- !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+ bits<1> offen = !or(!eq(addrKind, BUFAddrKind.OffEn),
+ !eq(addrKind, BUFAddrKind.BothEn));
- bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1,
- !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+ bits<1> idxen = !or(!eq(addrKind, BUFAddrKind.IdxEn),
+ !eq(addrKind, BUFAddrKind.BothEn));
- bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0);
+ bits<1> addr64 = !eq(addrKind, BUFAddrKind.Addr64);
- bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1);
+ bits<1> has_vaddr = !ne(addrKind, BUFAddrKind.Offset);
}
class MUBUF_Load_Pseudo <string opName,
@@ -485,7 +488,7 @@ class MUBUF_Load_Pseudo <string opName,
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
!if(HasTiedDest, (ins getVregSrcForVT<vdata_vt>.ret:$vdata_in), (ins))),
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
- !if(isLds, " lds", "$tfe") # "$dlc" # "$swz",
+ !if(isLds, " lds", "$tfe") # "$dlc$swz",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # !if(isLds, "_lds", "") #
@@ -497,7 +500,7 @@ class MUBUF_Load_Pseudo <string opName,
let mayStore = 0;
let maybeAtomic = 1;
let Uses = !if(isLds, [EXEC, M0], [EXEC]);
- let has_tfe = !if(isLds, 0, 1);
+ let has_tfe = !not(isLds);
let lds = isLds;
let elements = getMUBUFElements<vdata_vt>.ret;
}
@@ -528,21 +531,23 @@ multiclass MUBUF_Pseudo_Loads<string opName,
bit TiedDest = 0,
bit isLds = 0> {
- def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, load_vt, TiedDest, isLds>,
+ defvar legal_load_vt = !if(!eq(!cast<string>(load_vt), !cast<string>(v3f16)), v4f16, load_vt);
+
+ def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds>,
MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
- def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, load_vt, TiedDest, isLds>,
+ def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, legal_load_vt, TiedDest, isLds>,
MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
- def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, load_vt, TiedDest, isLds>;
- def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, load_vt, TiedDest, isLds>;
- def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, load_vt, TiedDest, isLds>;
+ def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds>;
+ def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds>;
+ def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, load_vt, TiedDest, isLds>;
- def _OFFEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, load_vt, TiedDest, isLds>;
- def _IDXEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, load_vt, TiedDest, isLds>;
- def _BOTHEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, load_vt, TiedDest, isLds>;
+ def _OFFSET_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds>;
+ def _OFFEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds>;
+ def _IDXEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds>;
+ def _BOTHEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds>;
}
}
@@ -576,25 +581,27 @@ multiclass MUBUF_Pseudo_Stores<string opName,
ValueType store_vt = i32,
SDPatternOperator st = null_frag> {
- def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, store_vt,
- [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+ defvar legal_store_vt = !if(!eq(!cast<string>(store_vt), !cast<string>(v3f16)), v4f16, store_vt);
+
+ def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt,
+ [(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
MUBUFAddr64Table<0, NAME>;
- def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, store_vt,
- [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt,
+ [(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
MUBUFAddr64Table<1, NAME>;
- def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, store_vt>;
- def _IDXEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, store_vt>;
- def _BOTHEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, store_vt>;
+ def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>;
+ def _IDXEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, legal_store_vt>;
+ def _BOTHEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, legal_store_vt>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, store_vt>;
- def _OFFEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, store_vt>;
- def _IDXEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, store_vt>;
- def _BOTHEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, store_vt>;
+ def _OFFSET_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt>;
+ def _OFFEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>;
+ def _IDXEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, legal_store_vt>;
+ def _BOTHEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, legal_store_vt>;
}
}
@@ -622,9 +629,9 @@ class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in,
dag ret = !if(vdata_in,
!if(!empty(vaddrList),
(ins vdataClass:$vdata_in,
- SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc),
+ SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc),
(ins vdataClass:$vdata_in, vaddrClass:$vaddr,
- SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc)
+ SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc)
),
!if(!empty(vaddrList),
(ins vdataClass:$vdata,
@@ -701,7 +708,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
(outs vdataClassCopy:$vdata),
getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret,
- " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # " glc$slc",
+ " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc1$slc",
pattern>,
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
@@ -1006,7 +1013,7 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
let SubtargetPredicate = HasGFX10_BEncoding in
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
- "buffer_atomic_csub", VGPR_32, i32, atomic_csub_global_32
+ "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
>;
let SubtargetPredicate = isGFX8GFX9 in {
@@ -1093,14 +1100,12 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
int_amdgcn_buffer_wbinvl1>;
let SubtargetPredicate = HasAtomicFaddInsts in {
-
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
+ "buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_noret_32
>;
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
+ "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_noret_32
>;
-
} // End SubtargetPredicate = HasAtomicFaddInsts
//===----------------------------------------------------------------------===//
@@ -1163,9 +1168,11 @@ let SubtargetPredicate = isGFX10Plus in {
//===----------------------------------------------------------------------===//
multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
- string opcode> {
+ string opcode, ValueType memoryVt = vt> {
+ defvar st = !if(!eq(!cast<string>(memoryVt), !cast<string>(vt)), name, mubuf_intrinsic_load<name, memoryVt>);
+
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
@@ -1173,7 +1180,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
@@ -1181,7 +1188,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
@@ -1189,7 +1196,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
@@ -1213,6 +1220,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v3i32, "BUFFER_LOAD_FORMAT_D16_XYZ_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
@@ -1222,6 +1230,8 @@ let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i16, "BUFFER_LOAD_FORMAT_D16_XY">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZ", v3f16>;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i16, "BUFFER_LOAD_FORMAT_D16_XYZ", v3i16>;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
@@ -1244,9 +1254,11 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ubyte, i32, "BUFFER_LOAD_UBYTE">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
- string opcode> {
+ string opcode, ValueType memoryVt = vt> {
+ defvar st = !if(!eq(!cast<string>(memoryVt), !cast<string>(vt)), name, mubuf_intrinsic_store<name, memoryVt>);
+
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
@@ -1254,7 +1266,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_glc $auxiliary),
@@ -1263,7 +1275,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_glc $auxiliary),
@@ -1272,7 +1284,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
getVregSrcForVT<vt>.ret:$vdata,
@@ -1297,6 +1309,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v3i32, "BUFFER_STORE_FORMAT_D16_XYZ_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
@@ -1306,6 +1319,8 @@ let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i16, "BUFFER_STORE_FORMAT_D16_XY">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZ", v3f16>;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i16, "BUFFER_STORE_FORMAT_D16_XYZ", v3i16>;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i16, "BUFFER_STORE_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
@@ -1367,6 +1382,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
}
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i32, "BUFFER_ATOMIC_SWAP">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_swap, f32, "BUFFER_ATOMIC_SWAP">;
defm : BufferAtomicPatterns<SIbuffer_atomic_add, i32, "BUFFER_ATOMIC_ADD">;
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i32, "BUFFER_ATOMIC_SUB">;
defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i32, "BUFFER_ATOMIC_SMIN">;
@@ -1392,46 +1408,56 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">;
+class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
+ (ops node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5, node:$src6, node:$src7),
+ (vt (Op $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7)),
+ [{ return SDValue(N, 0).use_empty(); }]> {
+
+ let GISelPredicateCode = [{
+ return MRI.use_nodbg_empty(MI.getOperand(0).getReg());
+ }];
+}
+
multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (name vt:$vdata_in, v4i32:$rsrc, 0,
- 0, i32:$soffset, timm:$offset,
- timm:$cachepolicy, 0),
- (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
- (as_i16imm $offset), (extract_slc $cachepolicy))
+ (NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, 0,
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFSET) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
- (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, timm:$offset,
- timm:$cachepolicy, timm),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (extract_slc $cachepolicy))
+ (NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
- (name vt:$vdata_in, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, timm:$offset,
- timm:$cachepolicy, 0),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (extract_slc $cachepolicy))
+ (NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, 0,
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
- (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, timm:$offset,
- timm:$cachepolicy, timm),
+ (NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
- $vdata_in,
- (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy))
+ getVregSrcForVT<vt>.ret:$vdata_in,
+ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy))
>;
}
let SubtargetPredicate = HasAtomicFaddInsts in {
defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">;
-defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
+defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
}
def : GCNPat<
@@ -1568,6 +1594,7 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
>;
}
+let OtherPredicates = [DisableFlatScratch] in {
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, zextloadi8_private>;
@@ -1586,7 +1613,7 @@ defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSE
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX3_OFFEN, BUFFER_LOAD_DWORDX3_OFFSET, v3i32, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
-let OtherPredicates = [D16PreservesUnusedBits] in {
+let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in {
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2i16, load_d16_hi_private>;
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2i16, az_extloadi8_d16_hi_private>;
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2i16, sextloadi8_d16_hi_private>;
@@ -1602,6 +1629,8 @@ defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D1
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2f16, sextloadi8_d16_lo_private>;
}
+} // End OtherPredicates = [DisableFlatScratch]
+
multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_st> {
// Store follows atomic op convention so address is first
@@ -1652,6 +1681,7 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
>;
}
+let OtherPredicates = [DisableFlatScratch] in {
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i32, truncstorei8_private>;
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>;
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
@@ -1666,7 +1696,7 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OF
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
-let OtherPredicates = [D16PreservesUnusedBits] in {
+let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in {
// Hiding the extract high pattern in the PatFrag seems to not
// automatically increase the complexity.
let AddedComplexity = 1 in {
@@ -1674,6 +1704,7 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_D16_HI_OFFEN, BUFFER_STORE_SHORT
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D16_HI_OFFSET, i32, truncstorei8_hi16_private>;
}
}
+} // End OtherPredicates = [DisableFlatScratch]
//===----------------------------------------------------------------------===//
// MTBUF Patterns
@@ -1684,9 +1715,11 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D
//===----------------------------------------------------------------------===//
multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
- string opcode> {
+ string opcode, ValueType memoryVt = vt> {
+ defvar st = !if(!eq(!cast<string>(memoryVt), !cast<string>(vt)), name, mtbuf_intrinsic_load<name, memoryVt>);
+
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
@@ -1695,7 +1728,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
@@ -1704,7 +1737,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
@@ -1713,7 +1746,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
@@ -1737,6 +1770,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v3i32, "TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
@@ -1744,13 +1778,16 @@ let SubtargetPredicate = HasPackedD16VMem in {
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4f16, "TBUFFER_LOAD_FORMAT_D16_XYZ", v3f16>;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4f16, "TBUFFER_LOAD_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
- string opcode> {
+ string opcode, ValueType memoryVt = vt> {
+ defvar st = !if(!eq(!cast<string>(memoryVt), !cast<string>(vt)), name, mtbuf_intrinsic_store<name, memoryVt>);
+
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (as_i8timm $format),
@@ -1759,7 +1796,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (as_i8timm $format),
@@ -1768,7 +1805,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (as_i8timm $format),
@@ -1777,7 +1814,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
+ (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
timm:$offset, timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
getVregSrcForVT<vt>.ret:$vdata,
@@ -1801,6 +1838,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">;
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v3i32, "TBUFFER_STORE_FORMAT_D16_XYZ_gfx80">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
@@ -1808,6 +1846,7 @@ let SubtargetPredicate = HasPackedD16VMem in {
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">;
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4f16, "TBUFFER_STORE_FORMAT_D16_XYZ", v3f16>;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4f16, "TBUFFER_STORE_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
@@ -1825,7 +1864,7 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
- let Inst{16} = !if(ps.lds, 1, 0);
+ let Inst{16} = ps.lds;
let Inst{24-18} = op;
let Inst{31-26} = 0x38;
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
@@ -2176,7 +2215,7 @@ class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> :
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
- let Inst{16} = !if(ps.lds, 1, 0);
+ let Inst{16} = ps.lds;
let Inst{17} = !if(ps.has_slc, slc, ?);
let Inst{24-18} = op;
let Inst{31-26} = 0x38; //encoding
@@ -2226,7 +2265,7 @@ class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
- let Inst{16} = !if(ps.lds, 1, 0);
+ let Inst{16} = ps.lds;
let Inst{17} = !if(ps.has_slc, slc, ?);
let Inst{24-18} = op;
let Inst{31-26} = 0x38; //encoding
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
index beb01b1abf0f..328c81005df4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -53,7 +53,7 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
}
class DS_Real <DS_Pseudo ds> :
- InstSI <ds.OutOperandList, ds.InOperandList, ds.Mnemonic # " " # ds.AsmOperands, []>,
+ InstSI <ds.OutOperandList, ds.InOperandList, ds.Mnemonic # ds.AsmOperands, []>,
Enc64 {
let isPseudo = 0;
@@ -87,7 +87,7 @@ class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs),
(ins rc:$data0, offset:$offset, gds:$gds),
- "$data0$offset$gds"> {
+ " $data0$offset$gds"> {
let has_addr = 0;
let has_data1 = 0;
@@ -98,7 +98,7 @@ class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs),
(ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
- "$addr, $data0$offset$gds"> {
+ " $addr, $data0$offset$gds"> {
let has_data1 = 0;
let has_vdst = 0;
@@ -118,7 +118,7 @@ class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs),
(ins VGPR_32:$addr, rc:$data0, rc:$data1, offset:$offset, gds:$gds),
- "$addr, $data0, $data1"#"$offset"#"$gds"> {
+ " $addr, $data0, $data1$offset$gds"> {
let has_vdst = 0;
}
@@ -138,7 +138,7 @@ class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32>
(outs),
(ins VGPR_32:$addr, rc:$data0, rc:$data1,
offset0:$offset0, offset1:$offset1, gds:$gds),
- "$addr, $data0, $data1$offset0$offset1$gds"> {
+ " $addr, $data0, $data1$offset0$offset1$gds"> {
let has_vdst = 0;
let has_offset = 0;
@@ -157,7 +157,7 @@ class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs rc:$vdst),
(ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
- "$vdst, $addr, $data0$offset$gds"> {
+ " $vdst, $addr, $data0$offset$gds"> {
let hasPostISelHook = 1;
let has_data1 = 0;
@@ -166,12 +166,12 @@ class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32>
multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32,
string NoRetOp = ""> {
def "" : DS_1A1D_RET<opName, rc>,
- AtomicNoRet<NoRetOp, !if(!eq(NoRetOp, ""), 0, 1)>;
+ AtomicNoRet<NoRetOp, !ne(NoRetOp, "")>;
let has_m0_read = 0 in {
def _gfx9 : DS_1A1D_RET<opName, rc>,
AtomicNoRet<!if(!eq(NoRetOp, ""), "", NoRetOp#"_gfx9"),
- !if(!eq(NoRetOp, ""), 0, 1)>;
+ !ne(NoRetOp, "")>;
}
}
@@ -181,7 +181,7 @@ class DS_1A2D_RET<string opName,
: DS_Pseudo<opName,
(outs rc:$vdst),
(ins VGPR_32:$addr, src:$data0, src:$data1, offset:$offset, gds:$gds),
- "$vdst, $addr, $data0, $data1$offset$gds"> {
+ " $vdst, $addr, $data0, $data1$offset$gds"> {
let hasPostISelHook = 1;
}
@@ -191,11 +191,11 @@ multiclass DS_1A2D_RET_mc<string opName,
string NoRetOp = "",
RegisterClass src = rc> {
def "" : DS_1A2D_RET<opName, rc, src>,
- AtomicNoRet<NoRetOp, !if(!eq(NoRetOp, ""), 0, 1)>;
+ AtomicNoRet<NoRetOp, !ne(NoRetOp, "")>;
let has_m0_read = 0 in {
def _gfx9 : DS_1A2D_RET<opName, rc, src>,
- AtomicNoRet<NoRetOp#"_gfx9", !if(!eq(NoRetOp, ""), 0, 1)>;
+ AtomicNoRet<NoRetOp#"_gfx9", !ne(NoRetOp, "")>;
}
}
@@ -205,7 +205,7 @@ class DS_1A2D_Off8_RET<string opName,
: DS_Pseudo<opName,
(outs rc:$vdst),
(ins VGPR_32:$addr, src:$data0, src:$data1, offset0:$offset0, offset1:$offset1, gds:$gds),
- "$vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
+ " $vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
let has_offset = 0;
let AsmMatchConverter = "cvtDSOffset01";
@@ -230,7 +230,7 @@ class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0
!if(HasTiedOutput,
(ins VGPR_32:$addr, ofs:$offset, gds:$gds, rc:$vdst_in),
(ins VGPR_32:$addr, ofs:$offset, gds:$gds)),
- "$vdst, $addr$offset$gds"> {
+ " $vdst, $addr$offset$gds"> {
let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
let has_data0 = 0;
@@ -252,7 +252,7 @@ class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs rc:$vdst),
(ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, gds:$gds),
- "$vdst, $addr$offset0$offset1$gds"> {
+ " $vdst, $addr$offset0$offset1$gds"> {
let has_offset = 0;
let has_data0 = 0;
@@ -271,7 +271,7 @@ multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> {
class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
(outs VGPR_32:$vdst),
(ins VGPR_32:$addr, offset:$offset),
- "$vdst, $addr$offset gds"> {
+ " $vdst, $addr$offset gds"> {
let has_data0 = 0;
let has_data1 = 0;
@@ -283,7 +283,7 @@ class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
class DS_0A_RET <string opName> : DS_Pseudo<opName,
(outs VGPR_32:$vdst),
(ins offset:$offset, gds:$gds),
- "$vdst$offset$gds"> {
+ " $vdst$offset$gds"> {
let mayLoad = 1;
let mayStore = 1;
@@ -296,7 +296,7 @@ class DS_0A_RET <string opName> : DS_Pseudo<opName,
class DS_1A <string opName> : DS_Pseudo<opName,
(outs),
(ins VGPR_32:$addr, offset:$offset, gds:$gds),
- "$addr$offset$gds"> {
+ " $addr$offset$gds"> {
let mayLoad = 1;
let mayStore = 1;
@@ -330,13 +330,13 @@ class DS_GWS <string opName, dag ins, string asmOps>
class DS_GWS_0D <string opName>
: DS_GWS<opName,
- (ins offset:$offset, gds:$gds), "$offset gds"> {
+ (ins offset:$offset), "$offset gds"> {
let hasSideEffects = 1;
}
class DS_GWS_1D <string opName>
: DS_GWS<opName,
- (ins VGPR_32:$data0, offset:$offset, gds:$gds), "$data0$offset gds"> {
+ (ins VGPR_32:$data0, offset:$offset), " $data0$offset gds"> {
let has_gws_data0 = 1;
let hasSideEffects = 1;
@@ -364,7 +364,7 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
: DS_Pseudo<opName,
(outs VGPR_32:$vdst),
(ins VGPR_32:$addr, VGPR_32:$data0, offset:$offset),
- "$vdst, $addr, $data0$offset",
+ " $vdst, $addr, $data0$offset",
[(set i32:$vdst,
(node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {
@@ -680,7 +680,29 @@ foreach vt = VReg_64.RegTypes in {
defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
}
-defm : DSReadPat_mc <DS_READ_B128, v4i32, "load_align16_local">;
+let SubtargetPredicate = isGFX7Plus in {
+
+foreach vt = VReg_96.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
+}
+
+foreach vt = VReg_128.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
+}
+
+let SubtargetPredicate = HasUnalignedAccessMode in {
+
+foreach vt = VReg_96.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
+}
+
+foreach vt = VReg_128.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B128, vt, "load_local">;
+}
+
+} // End SubtargetPredicate = HasUnalignedAccessMode
+
+} // End SubtargetPredicate = isGFX7Plus
} // End AddedComplexity = 100
@@ -719,7 +741,7 @@ multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
// normal store.
class DSAtomicWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
- (inst $ptr, $value, offset:$offset, (i1 0))
+ (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 0))
>;
multiclass DSAtomicWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
@@ -761,6 +783,18 @@ class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> :
(i1 0))
>;
+class DS128Bit8ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+ (vt:$value (frag (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
+ (inst $ptr, $offset0, $offset1, (i1 0))
+>;
+
+class DS128Bit8ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
+ (frag vt:$value, (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
+ (inst $ptr, (i64 (EXTRACT_SUBREG VReg_128:$value, sub0_sub1)),
+ (i64 (EXTRACT_SUBREG VReg_128:$value, sub2_sub3)), $offset0, $offset1,
+ (i1 0))
+>;
+
multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, vt, load_local_m0>;
@@ -773,21 +807,60 @@ multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
}
}
+multiclass DS128Bit8ByteAlignedPat_mc<ValueType vt> {
+ let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
+ def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64, vt, load_local_m0>;
+ def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64, vt, store_local_m0>;
+ }
+
+ let OtherPredicates = [NotLDSRequiresM0Init] in {
+ def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64_gfx9, vt, load_local>;
+ def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64_gfx9, vt, store_local>;
+ }
+}
+
// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
// related to bounds checking.
foreach vt = VReg_64.RegTypes in {
defm : DS64Bit4ByteAlignedPat_mc<vt>;
}
+foreach vt = VReg_128.RegTypes in {
+defm : DS128Bit8ByteAlignedPat_mc<vt>;
+}
+
let AddedComplexity = 100 in {
foreach vt = VReg_64.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
}
-defm : DSWritePat_mc <DS_WRITE_B128, v4i32, "store_align16_local">;
+let SubtargetPredicate = isGFX7Plus in {
+
+foreach vt = VReg_96.RegTypes in {
+defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
+}
+
+foreach vt = VReg_128.RegTypes in {
+defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
+}
+
+let SubtargetPredicate = HasUnalignedAccessMode in {
+
+foreach vt = VReg_96.RegTypes in {
+defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
+}
+
+foreach vt = VReg_128.RegTypes in {
+defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_local">;
+}
+
+} // End SubtargetPredicate = HasUnalignedAccessMode
+
+} // End SubtargetPredicate = isGFX7Plus
} // End AddedComplexity = 100
+
class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
(inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9c2f2e7eecd1..8061c6c509e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -17,42 +17,24 @@
// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
#include "Disassembler/AMDGPUDisassembler.h"
-#include "AMDGPU.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm-c/Disassembler.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/ELF.h"
+#include "llvm-c/DisassemblerTypes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <iterator>
-#include <tuple>
-#include <vector>
using namespace llvm;
#define DEBUG_TYPE "amdgpu-disassembler"
-#define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
- : AMDGPU::EncValues::SGPR_MAX_SI)
+#define SGPR_MAX \
+ (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
+ : AMDGPU::EncValues::SGPR_MAX_SI)
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
@@ -63,7 +45,7 @@ AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
- if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10())
+ if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus())
report_fatal_error("Disassembly not yet supported for subtarget");
}
@@ -139,6 +121,8 @@ DECODE_OPERAND_REG(VS_128)
DECODE_OPERAND_REG(VReg_64)
DECODE_OPERAND_REG(VReg_96)
DECODE_OPERAND_REG(VReg_128)
+DECODE_OPERAND_REG(VReg_256)
+DECODE_OPERAND_REG(VReg_512)
DECODE_OPERAND_REG(SReg_32)
DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
@@ -382,15 +366,24 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
+ MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
+ MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+ MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
}
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) &&
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) {
+ insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1);
+ }
+
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
@@ -499,8 +492,16 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::OpName::d16);
assert(VDataIdx != -1);
- assert(DMaskIdx != -1);
- assert(TFEIdx != -1);
+ if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray
+ if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) {
+ assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa ||
+ MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa ||
+ MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa ||
+ MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa);
+ addOperand(MI, MCOperand::createImm(1));
+ }
+ return MCDisassembler::Success;
+ }
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
bool IsAtomic = (VDstIdx != -1);
@@ -544,9 +545,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
DstSize = (DstSize + 1) / 2;
}
- // FIXME: Add tfe support
if (MI.getOperand(TFEIdx).getImm())
- return MCDisassembler::Success;
+ DstSize += 1;
if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
return MCDisassembler::Success;
@@ -996,10 +996,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
using namespace AMDGPU::EncValues;
- unsigned TTmpMin =
- (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN;
- unsigned TTmpMax =
- (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX;
+ unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
+ unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
}
@@ -1017,7 +1015,8 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
: getVgprClassId(Width), Val - VGPR_MIN);
}
if (Val <= SGPR_MAX) {
- assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
+ // "SGPR_MIN <= Val" is always true and causes compilation warning.
+ static_assert(SGPR_MIN == 0, "");
return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
}
@@ -1054,7 +1053,8 @@ MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) c
assert(Width == OPW256 || Width == OPW512);
if (Val <= SGPR_MAX) {
- assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
+ // "SGPR_MIN <= Val" is always true and causes compilation warning.
+ static_assert(SGPR_MIN == 0, "");
return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
}
@@ -1137,8 +1137,8 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
Val - SDWA9EncValues::SRC_VGPR_MIN);
}
if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
- Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
- : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
+ Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
+ : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
return createSRegOperand(getSgprClassId(Width),
Val - SDWA9EncValues::SRC_SGPR_MIN);
}
@@ -1207,12 +1207,358 @@ bool AMDGPUDisassembler::isVI() const {
return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
}
-bool AMDGPUDisassembler::isGFX9() const {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
+bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
+
+bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
+
+bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
+
+bool AMDGPUDisassembler::isGFX10Plus() const {
+ return AMDGPU::isGFX10Plus(STI);
+}
+
+//===----------------------------------------------------------------------===//
+// AMDGPU specific symbol handling
+//===----------------------------------------------------------------------===//
+#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
+ do { \
+ KdStream << Indent << DIRECTIVE " " \
+ << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
+ } while (0)
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
+ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
+ using namespace amdhsa;
+ StringRef Indent = "\t";
+
+ // We cannot accurately backward compute #VGPRs used from
+ // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
+ // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
+ // simply calculate the inverse of what the assembler does.
+
+ uint32_t GranulatedWorkitemVGPRCount =
+ (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
+
+ uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
+ AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
+
+ KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
+
+ // We cannot backward compute values used to calculate
+ // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
+ // directives can't be computed:
+ // .amdhsa_reserve_vcc
+ // .amdhsa_reserve_flat_scratch
+ // .amdhsa_reserve_xnack_mask
+ // They take their respective default values if not specified in the assembly.
+ //
+ // GRANULATED_WAVEFRONT_SGPR_COUNT
+ // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
+ //
+ // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
+ // are set to 0. So while disassembling we consider that:
+ //
+ // GRANULATED_WAVEFRONT_SGPR_COUNT
+ // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
+ //
+ // The disassembler cannot recover the original values of those 3 directives.
+
+ uint32_t GranulatedWavefrontSGPRCount =
+ (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
+
+ if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
+ return MCDisassembler::Fail;
+
+ uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
+ AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
+
+ KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
+ KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
+ KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
+ KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
+ PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
+ PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
+ PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
+ return MCDisassembler::Fail;
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
+ return MCDisassembler::Fail;
+
+ if (isGFX10Plus()) {
+ PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
+ COMPUTE_PGM_RSRC1_WGP_MODE);
+ PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
+ PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ }
+ return MCDisassembler::Success;
}
-bool AMDGPUDisassembler::isGFX10() const {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
+// NOLINTNEXTLINE(readability-identifier-naming)
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
+ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
+ using namespace amdhsa;
+ StringRef Indent = "\t";
+ PRINT_DIRECTIVE(
+ ".amdhsa_system_sgpr_private_segment_wavefront_offset",
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
+ PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
+ return MCDisassembler::Fail;
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
+ return MCDisassembler::Fail;
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(
+ ".amdhsa_exception_fp_ieee_invalid_op",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
+ PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
+ PRINT_DIRECTIVE(
+ ".amdhsa_exception_fp_ieee_div_zero",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
+ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
+ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
+ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
+ PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
+ return MCDisassembler::Fail;
+
+ return MCDisassembler::Success;
+}
+
+#undef PRINT_DIRECTIVE
+
+MCDisassembler::DecodeStatus
+AMDGPUDisassembler::decodeKernelDescriptorDirective(
+ DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
+ raw_string_ostream &KdStream) const {
+#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
+ do { \
+ KdStream << Indent << DIRECTIVE " " \
+ << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
+ } while (0)
+
+ uint16_t TwoByteBuffer = 0;
+ uint32_t FourByteBuffer = 0;
+ uint64_t EightByteBuffer = 0;
+
+ StringRef ReservedBytes;
+ StringRef Indent = "\t";
+
+ assert(Bytes.size() == 64);
+ DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
+
+ switch (Cursor.tell()) {
+ case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
+ << '\n';
+ return MCDisassembler::Success;
+
+ case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ KdStream << Indent << ".amdhsa_private_segment_fixed_size "
+ << FourByteBuffer << '\n';
+ return MCDisassembler::Success;
+
+ case amdhsa::RESERVED0_OFFSET:
+ // 8 reserved bytes, must be 0.
+ EightByteBuffer = DE.getU64(Cursor);
+ if (EightByteBuffer) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
+ // KERNEL_CODE_ENTRY_BYTE_OFFSET
+ // So far no directive controls this for Code Object V3, so simply skip for
+ // disassembly.
+ DE.skip(Cursor, 8);
+ return MCDisassembler::Success;
+
+ case amdhsa::RESERVED1_OFFSET:
+ // 20 reserved bytes, must be 0.
+ ReservedBytes = DE.getBytes(Cursor, 20);
+ for (int I = 0; I < 20; ++I) {
+ if (ReservedBytes[I] != 0) {
+ return MCDisassembler::Fail;
+ }
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
+ // COMPUTE_PGM_RSRC3
+ // - Only set for GFX10, GFX6-9 have this to be 0.
+ // - Currently no directives directly control this.
+ FourByteBuffer = DE.getU32(Cursor);
+ if (!isGFX10Plus() && FourByteBuffer) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
+ MCDisassembler::Fail) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
+ MCDisassembler::Fail) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
+ using namespace amdhsa;
+ TwoByteBuffer = DE.getU16(Cursor);
+
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+
+ if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
+ return MCDisassembler::Fail;
+
+ // Reserved for GFX9
+ if (isGFX9() &&
+ (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
+ return MCDisassembler::Fail;
+ } else if (isGFX10Plus()) {
+ PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ }
+
+ if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
+ return MCDisassembler::Fail;
+
+ return MCDisassembler::Success;
+
+ case amdhsa::RESERVED2_OFFSET:
+ // 6 bytes from here are reserved, must be 0.
+ ReservedBytes = DE.getBytes(Cursor, 6);
+ for (int I = 0; I < 6; ++I) {
+ if (ReservedBytes[I] != 0)
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ default:
+ llvm_unreachable("Unhandled index. Case statements cover everything.");
+ return MCDisassembler::Fail;
+ }
+#undef PRINT_DIRECTIVE
+}
+
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
+ StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
+ // CP microcode requires the kernel descriptor to be 64 aligned.
+ if (Bytes.size() != 64 || KdAddress % 64 != 0)
+ return MCDisassembler::Fail;
+
+ std::string Kd;
+ raw_string_ostream KdStream(Kd);
+ KdStream << ".amdhsa_kernel " << KdName << '\n';
+
+ DataExtractor::Cursor C(0);
+ while (C && C.tell() < Bytes.size()) {
+ MCDisassembler::DecodeStatus Status =
+ decodeKernelDescriptorDirective(C, Bytes, KdStream);
+
+ cantFail(C.takeError());
+
+ if (Status == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ }
+ KdStream << ".end_amdhsa_kernel\n";
+ outs() << KdStream.str();
+ return MCDisassembler::Success;
+}
+
+Optional<MCDisassembler::DecodeStatus>
+AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &CStream) const {
+ // Right now only kernel descriptor needs to be handled.
+ // We ignore all other symbols for target specific handling.
+ // TODO:
+ // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
+ // Object V2 and V3 when symbols are marked protected.
+
+ // amd_kernel_code_t for Code Object V2.
+ if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
+ Size = 256;
+ return MCDisassembler::Fail;
+ }
+
+ // Code Object V3 kernel descriptors.
+ StringRef Name = Symbol.Name;
+ if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
+ Size = 64; // Size = 64 regardless of success or failure.
+ return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
+ }
+ return None;
}
//===----------------------------------------------------------------------===//
@@ -1233,11 +1579,10 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
if (!Symbols)
return false;
- auto Result = std::find_if(Symbols->begin(), Symbols->end(),
- [Value](const SymbolInfoTy& Val) {
- return Val.Addr == static_cast<uint64_t>(Value)
- && Val.Type == ELF::STT_NOTYPE;
- });
+ auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
+ return Val.Addr == static_cast<uint64_t>(Value) &&
+ Val.Type == ELF::STT_NOTYPE;
+ });
if (Result != Symbols->end()) {
auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index f975af409a09..714dabbc5184 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -15,15 +15,9 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
#define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
-#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
-
-#include <algorithm>
-#include <cstdint>
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/DataExtractor.h"
#include <memory>
namespace llvm {
@@ -66,6 +60,33 @@ public:
DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst,
uint64_t Address) const;
+ Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CStream) const override;
+
+ DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef<uint8_t> Bytes,
+ uint64_t KdAddress) const;
+
+ DecodeStatus
+ decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor,
+ ArrayRef<uint8_t> Bytes,
+ raw_string_ostream &KdStream) const;
+
+ /// Decode as directives that handle COMPUTE_PGM_RSRC1.
+ /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC1.
+ /// \param KdStream - Stream to write the disassembled directives to.
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer,
+ raw_string_ostream &KdStream) const;
+
+ /// Decode as directives that handle COMPUTE_PGM_RSRC2.
+ /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC2.
+ /// \param KdStream - Stream to write the disassembled directives to.
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
+ raw_string_ostream &KdStream) const;
+
DecodeStatus convertSDWAInst(MCInst &MI) const;
DecodeStatus convertDPP8Inst(MCInst &MI) const;
DecodeStatus convertMIMGInst(MCInst &MI) const;
@@ -140,7 +161,9 @@ public:
bool isVI() const;
bool isGFX9() const;
+ bool isGFX9Plus() const;
bool isGFX10() const;
+ bool isGFX10Plus() const;
};
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/EXPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EXPInstructions.td
new file mode 100644
index 000000000000..b3b55ddd2c97
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EXPInstructions.td
@@ -0,0 +1,125 @@
+//===-- EXPInstructions.td - Export Instruction Definitions ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// EXP classes
+//===----------------------------------------------------------------------===//
+
+class EXPCommon<bit done, string asm = ""> : InstSI<
+ (outs),
+ (ins exp_tgt:$tgt,
+ ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
+ exp_vm:$vm, exp_compr:$compr, i32imm:$en),
+ asm> {
+ let EXP = 1;
+ let EXP_CNT = 1;
+ let mayLoad = done;
+ let mayStore = 1;
+ let UseNamedOperandTable = 1;
+ let Uses = [EXEC];
+ let SchedRW = [WriteExport];
+ let DisableWQM = 1;
+}
+
+class EXP_Pseudo<bit done> : EXPCommon<done>,
+ SIMCInstr <NAME, SIEncodingFamily.NONE> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+class EXP_Real<bit done, string pseudo, int subtarget>
+ : EXPCommon<done, "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")
+ #"$compr$vm">,
+ SIMCInstr <pseudo, subtarget> {
+ let AsmMatchConverter = "cvtExp";
+}
+
+//===----------------------------------------------------------------------===//
+// EXP Instructions
+//===----------------------------------------------------------------------===//
+
+// Split EXP instruction into EXP and EXP_DONE so we can set
+// mayLoad for done=1.
+def EXP : EXP_Pseudo<0>;
+def EXP_DONE : EXP_Pseudo<1>;
+
+//===----------------------------------------------------------------------===//
+// SI
+//===----------------------------------------------------------------------===//
+
+class EXP_Real_si<bit _done, string pseudo>
+ : EXP_Real<_done, pseudo, SIEncodingFamily.SI>, EXPe {
+ let AssemblerPredicate = isGFX6GFX7;
+ let DecoderNamespace = "GFX6GFX7";
+ let done = _done;
+}
+
+def EXP_si : EXP_Real_si<0, "EXP">;
+def EXP_DONE_si : EXP_Real_si<1, "EXP_DONE">;
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+class EXP_Real_vi<bit _done, string pseudo>
+ : EXP_Real<_done, pseudo, SIEncodingFamily.VI>, EXPe_vi {
+ let AssemblerPredicate = isGFX8GFX9;
+ let DecoderNamespace = "GFX8";
+ let done = _done;
+}
+
+def EXP_vi : EXP_Real_vi<0, "EXP">;
+def EXP_DONE_vi : EXP_Real_vi<1, "EXP_DONE">;
+
+//===----------------------------------------------------------------------===//
+// GFX10+
+//===----------------------------------------------------------------------===//
+
+class EXP_Real_gfx10<bit _done, string pseudo>
+ : EXP_Real<_done, pseudo, SIEncodingFamily.GFX10>, EXPe {
+ let AssemblerPredicate = isGFX10Plus;
+ let DecoderNamespace = "GFX10";
+ let done = _done;
+}
+
+def EXP_gfx10 : EXP_Real_gfx10<0, "EXP">;
+def EXP_DONE_gfx10 : EXP_Real_gfx10<1, "EXP_DONE">;
+
+//===----------------------------------------------------------------------===//
+// EXP Patterns
+//===----------------------------------------------------------------------===//
+
+class ExpPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
+ (int_amdgcn_exp timm:$tgt, timm:$en,
+ (vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
+ (vt ExpSrc2:$src2), (vt ExpSrc3:$src3),
+ done_val, timm:$vm),
+ (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
+ ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en)
+>;
+
+class ExpComprPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
+ (int_amdgcn_exp_compr timm:$tgt, timm:$en,
+ (vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
+ done_val, timm:$vm),
+ (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
+ (IMPLICIT_DEF), (IMPLICIT_DEF), timm:$vm, 1, timm:$en)
+>;
+
+// FIXME: The generated DAG matcher seems to have strange behavior
+// with a 1-bit literal to match, so use a -1 for checking a true
+// 1-bit value.
+def : ExpPattern<i32, EXP, 0>;
+def : ExpPattern<i32, EXP_DONE, -1>;
+def : ExpPattern<f32, EXP, 0>;
+def : ExpPattern<f32, EXP_DONE, -1>;
+
+def : ExpComprPattern<v2i16, EXP, 0>;
+def : ExpComprPattern<v2i16, EXP_DONE, -1>;
+def : ExpComprPattern<v2f16, EXP, 0>;
+def : ExpComprPattern<v2f16, EXP_DONE, -1>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 97104a242d8c..8d3e138ba56a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -30,6 +30,15 @@ class EGOrCaymanPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
let SubtargetPredicate = isEGorCayman;
}
+def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
+ return isMask_32(Imm);
+}]>;
+
+def IMMPopCount : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
+ MVT::i32);
+}]>;
+
//===----------------------------------------------------------------------===//
// Evergreen / Cayman store instructions
//===----------------------------------------------------------------------===//
@@ -69,7 +78,7 @@ multiclass RAT_ATOMIC<bits<6> op_ret, bits<6> op_noret, string name> {
def _RTN: CF_MEM_RAT <op_ret, 0, 0xf,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
(outs R600_Reg128:$out_gpr),
- name # "_RTN" # " $rw_gpr, $index_gpr", [] >;
+ name # "_RTN $rw_gpr, $index_gpr", [] >;
def _NORET: CF_MEM_RAT <op_noret, 0, 0xf,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
(outs R600_Reg128:$out_gpr),
@@ -394,7 +403,41 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
VecALU
>;
-defm : BFEPattern <BFE_UINT_eg, BFE_INT_eg, MOV_IMM_I32>;
+// Bitfield extract patterns
+
+def : AMDGPUPat <
+ (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask),
+ (BFE_UINT_eg $src, $rshift, (MOV_IMM_I32 (i32 (IMMPopCount $mask))))
+>;
+
+// x & ((1 << y) - 1)
+def : AMDGPUPat <
+ (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
+ (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+// x & ~(-1 << y)
+def : AMDGPUPat <
+ (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
+ (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+// x & (-1 >> (bitwidth - y))
+def : AMDGPUPat <
+ (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
+ (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+// x << (bitwidth - y) >> (bitwidth - y)
+def : AMDGPUPat <
+ (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+def : AMDGPUPat <
+ (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (BFE_INT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
@@ -408,7 +451,74 @@ def : EGOrCaymanPat<(i32 (sext_inreg i32:$src, i8)),
def : EGOrCaymanPat<(i32 (sext_inreg i32:$src, i16)),
(BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
-defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32, R600_Reg64>;
+// BFI patterns
+
+// Definition from ISA doc:
+// (y & x) | (z & ~x)
+def : AMDGPUPat <
+ (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
+ (BFI_INT_eg $x, $y, $z)
+>;
+
+// 64-bit version
+def : AMDGPUPat <
+ (or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
+ (REG_SEQUENCE R600_Reg64,
+ (BFI_INT_eg (i32 (EXTRACT_SUBREG R600_Reg64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$z, sub0))), sub0,
+ (BFI_INT_eg (i32 (EXTRACT_SUBREG R600_Reg64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$z, sub1))), sub1)
+>;
+
+// SHA-256 Ch function
+// z ^ (x & (y ^ z))
+def : AMDGPUPat <
+ (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
+ (BFI_INT_eg $x, $y, $z)
+>;
+
+// 64-bit version
+def : AMDGPUPat <
+ (xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
+ (REG_SEQUENCE R600_Reg64,
+ (BFI_INT_eg (i32 (EXTRACT_SUBREG R600_Reg64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$z, sub0))), sub0,
+ (BFI_INT_eg (i32 (EXTRACT_SUBREG R600_Reg64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$z, sub1))), sub1)
+>;
+
+def : AMDGPUPat <
+ (fcopysign f32:$src0, f32:$src1),
+ (BFI_INT_eg (MOV_IMM_I32 (i32 0x7fffffff)), $src0, $src1)
+>;
+
+def : AMDGPUPat <
+ (fcopysign f32:$src0, f64:$src1),
+ (BFI_INT_eg (MOV_IMM_I32 (i32 0x7fffffff)), $src0,
+ (i32 (EXTRACT_SUBREG R600_Reg64:$src1, sub1)))
+>;
+
+def : AMDGPUPat <
+ (fcopysign f64:$src0, f64:$src1),
+ (REG_SEQUENCE R600_Reg64,
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
+ (BFI_INT_eg (MOV_IMM_I32 (i32 0x7fffffff)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$src0, sub1)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$src1, sub1))), sub1)
+>;
+
+def : AMDGPUPat <
+ (fcopysign f64:$src0, f32:$src1),
+ (REG_SEQUENCE R600_Reg64,
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
+ (BFI_INT_eg (MOV_IMM_I32 (i32 0x7fffffff)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$src0, sub1)),
+ $src1), sub1)
+>;
def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
[(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
@@ -692,8 +802,26 @@ def : EGOrCaymanPat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
def : EGOrCaymanPat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
-// SHA-256 Patterns
-defm : SHA256MaPattern <BFI_INT_eg, XOR_INT, R600_Reg64>;
+// SHA-256 Ma patterns
+
+// ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y
+def : AMDGPUPat <
+ (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
+ (BFI_INT_eg (XOR_INT i32:$x, i32:$y), i32:$z, i32:$y)
+>;
+
+def : AMDGPUPat <
+ (or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
+ (REG_SEQUENCE R600_Reg64,
+ (BFI_INT_eg (XOR_INT (i32 (EXTRACT_SUBREG R600_Reg64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$y, sub0))),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$z, sub0)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$y, sub0))), sub0,
+ (BFI_INT_eg (XOR_INT (i32 (EXTRACT_SUBREG R600_Reg64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$y, sub1))),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$z, sub1)),
+ (i32 (EXTRACT_SUBREG R600_Reg64:$y, sub1))), sub1)
+>;
def EG_ExportSwz : ExportSwzInst {
let Word1{19-16} = 0; // BURST_COUNT
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 69facada2e96..57a355a55a02 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -6,11 +6,12 @@
//
//===----------------------------------------------------------------------===//
-def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
-def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
+def FLATOffset : ComplexPattern<i64, 2, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
+def FLATOffsetSigned : ComplexPattern<i64, 2, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
+def ScratchOffset : ComplexPattern<i32, 2, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
-def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
-def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
+def GlobalSAddr : ComplexPattern<i64, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
+def ScratchSAddr : ComplexPattern<i32, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
@@ -64,9 +65,11 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
// Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
// and are not considered done until both have been decremented.
let VM_CNT = 1;
- let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
+ let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch));
- let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0);
+ let IsFlatGlobal = is_flat_global;
+
+ let IsFlatScratch = is_flat_scratch;
}
class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
@@ -79,6 +82,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
let AsmMatchConverter = ps.AsmMatchConverter;
+ let OtherPredicates = ps.OtherPredicates;
let TSFlags = ps.TSFlags;
let UseNamedOperandTable = ps.UseNamedOperandTable;
@@ -140,10 +144,13 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
(outs regClass:$vdst),
!con(
!con(
- !con((ins VReg_64:$vaddr),
- !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
- (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
- !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
+ !if(EnableSaddr,
+ (ins SReg_64:$saddr, VGPR_32:$vaddr),
+ (ins VReg_64:$vaddr)),
+ (ins flat_offset:$offset)),
+ // FIXME: Operands with default values do not work with following non-optional operands.
+ !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in),
+ (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))),
" $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
let has_data = 0;
let mayLoad = 1;
@@ -161,9 +168,10 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
opName,
(outs),
!con(
- !con((ins VReg_64:$vaddr, vdataClass:$vdata),
- !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
- (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ !if(EnableSaddr,
+ (ins VGPR_32:$vaddr, vdataClass:$vdata, SReg_64:$saddr),
+ (ins VReg_64:$vaddr, vdataClass:$vdata)),
+ (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc)),
" $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
let mayLoad = 0;
let mayStore = 1;
@@ -184,24 +192,34 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
}
class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
- bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
+ bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs regClass:$vdst),
- !con((ins SReg_64:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+ !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
+ (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
- " $vdst, $saddr$offset$glc$slc$dlc"> {
+ " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
let is_flat_global = 1;
let has_data = 0;
let mayLoad = 1;
let has_vaddr = 0;
let has_saddr = 1;
- let enabled_saddr = 1;
+ let enabled_saddr = EnableSaddr;
let maybeAtomic = 1;
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
}
+multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
+ bit HasTiedOutput = 0, bit HasSignedOffset = 0> {
+ def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>,
+ GlobalSaddrTable<1, opName>;
+}
+
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
@@ -212,68 +230,107 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}
class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
- bit HasSignedOffset = 0> : FLAT_Pseudo<
+ bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs),
- !con(
- (ins vdataClass:$vdata, SReg_64:$saddr),
- (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
- " $vdata, $saddr$offset$glc$slc$dlc"> {
+ !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
+ (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
let is_flat_global = 1;
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
let has_vaddr = 0;
let has_saddr = 1;
- let enabled_saddr = 1;
+ let enabled_saddr = EnableSaddr;
let maybeAtomic = 1;
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+}
+
+multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass,
+ bit HasSignedOffset = 0> {
+ def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>,
+ GlobalSaddrTable<1, opName>;
+}
+
+class FlatScratchInst <string sv_op, string mode> {
+ string SVOp = sv_op;
+ string Mode = mode;
}
class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
- bit EnableSaddr = 0>: FLAT_Pseudo<
+ bit HasTiedOutput = 0,
+ bit EnableSaddr = 0,
+ bit EnableVaddr = !not(EnableSaddr)>
+ : FLAT_Pseudo<
opName,
(outs regClass:$vdst),
- !if(EnableSaddr,
- (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
- (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
- " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
+ !con(
+ !if(EnableSaddr,
+ (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
+ !if(EnableVaddr,
+ (ins VGPR_32:$vaddr, flat_offset:$offset),
+ (ins flat_offset:$offset))),
+ !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in),
+ (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))),
+ " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
let has_data = 0;
let mayLoad = 1;
let has_saddr = 1;
let enabled_saddr = EnableSaddr;
- let has_vaddr = !if(EnableSaddr, 0, 1);
- let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+ let has_vaddr = EnableVaddr;
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
let maybeAtomic = 1;
+
+ let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
+ let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
}
-class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
+class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0,
+ bit EnableVaddr = !not(EnableSaddr)> : FLAT_Pseudo<
opName,
(outs),
!if(EnableSaddr,
- (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
- (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
- " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
+ (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
+ !if(EnableVaddr,
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
+ (ins vdataClass:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))),
+ " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
let has_saddr = 1;
let enabled_saddr = EnableSaddr;
- let has_vaddr = !if(EnableSaddr, 0, 1);
- let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+ let has_vaddr = EnableVaddr;
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
let maybeAtomic = 1;
}
-multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
+multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> {
let is_flat_scratch = 1 in {
- def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
- def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
+ def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>,
+ FlatScratchInst<opName, "SV">;
+ def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>,
+ FlatScratchInst<opName, "SS">;
+
+ let SubtargetPredicate = HasFlatScratchSTMode in
+ def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0>,
+ FlatScratchInst<opName, "ST">;
}
}
multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
let is_flat_scratch = 1 in {
- def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
- def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
+ def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>,
+ FlatScratchInst<opName, "SV">;
+ def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>,
+ FlatScratchInst<opName, "SS">;
+
+ let SubtargetPredicate = HasFlatScratchSTMode in
+ def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0>,
+ FlatScratchInst<opName, "ST">;
}
}
@@ -310,7 +367,7 @@ multiclass FLAT_Atomic_Pseudo<
bit isFP = isFloatType<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc),
" $vaddr, $vdata$offset$slc">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
@@ -321,10 +378,10 @@ multiclass FLAT_Atomic_Pseudo<
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
- " $vdst, $vaddr, $vdata$offset glc$slc",
+ (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc),
+ " $vdst, $vaddr, $vdata$offset$glc1$slc",
[(set vt:$vdst,
- (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
+ (atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1>{
let FPAtomic = isFP;
@@ -343,7 +400,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc),
" $vaddr, $vdata, off$offset$slc">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
@@ -354,7 +411,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
+ (ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
" $vaddr, $vdata, $saddr$offset$slc">,
GlobalSaddrTable<1, opName>,
AtomicNoRet <opName#"_saddr", 0> {
@@ -376,10 +433,10 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
- " $vdst, $vaddr, $vdata, off$offset glc$slc",
+ (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc),
+ " $vdst, $vaddr, $vdata, off$offset$glc1$slc",
[(set vt:$vdst,
- (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
+ (atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1> {
let has_saddr = 1;
@@ -388,8 +445,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
- " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
+ (ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc),
+ " $vdst, $vaddr, $vdata, $saddr$offset$glc1$slc">,
GlobalSaddrTable<1, opName#"_rtn">,
AtomicNoRet <opName#"_saddr", 1> {
let has_saddr = 1;
@@ -564,7 +621,7 @@ defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_
defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
let OtherPredicates = [HasGFX10_BEncoding] in
-def GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
+defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
@@ -573,7 +630,7 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VR
defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
let OtherPredicates = [HasGFX10_BEncoding] in
-def GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
+defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
@@ -662,7 +719,7 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
let SubtargetPredicate = HasGFX10_BEncoding in
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
- VGPR_32, i32, atomic_csub_global_32>;
+ VGPR_32, i32, int_amdgcn_global_atomic_csub>;
} // End is_flat_global = 1
@@ -677,12 +734,12 @@ defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", V
defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
-defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>;
-defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>;
-defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>;
-defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>;
-defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>;
-defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>;
+defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>;
+defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>;
+defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>;
+defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>;
+defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>;
+defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>;
defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
@@ -711,16 +768,16 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
-let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
-
-defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
- "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
->;
-defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
- "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
->;
-
-} // End SubtargetPredicate = HasAtomicFaddInsts
+let is_flat_global = 1 in {
+let OtherPredicates = [HasAtomicFaddInsts] in {
+ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
+ "global_atomic_add_f32", VGPR_32, f32
+ >;
+ defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
+ "global_atomic_pk_add_f16", VGPR_32, v2f16
+ >;
+} // End OtherPredicates = [HasAtomicFaddInsts]
+} // End is_flat_global = 1
//===----------------------------------------------------------------------===//
// Flat Patterns
@@ -728,69 +785,135 @@ defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
- (inst $vaddr, $offset, 0, 0, $slc)
+ (vt (node (FLATOffset i64:$vaddr, i16:$offset))),
+ (inst $vaddr, $offset)
>;
class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
- (inst $vaddr, $offset, 0, 0, $slc, $in)
+ (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
+ (inst $vaddr, $offset, 0, 0, 0, $in)
>;
class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
- (inst $vaddr, $offset, 0, 0, $slc, $in)
+ (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
+ (inst $vaddr, $offset, 0, 0, 0, $in)
>;
-class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
- (inst $vaddr, $offset, 0, 0, $slc)
+class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)),
+ (inst $saddr, $voffset, $offset, 0, 0, 0, $in)
>;
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
- (inst $vaddr, $offset, 0, 0, $slc)
+ (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset))),
+ (inst $vaddr, $offset)
+>;
+
+class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))),
+ (inst $saddr, $voffset, $offset, 0, 0, 0)
>;
-class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
- (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
- (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
+class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt> : GCNPat <
+ (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset)),
+ (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
-class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
- (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
- (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
+class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt> : GCNPat <
+ (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data),
+ (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
-class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
+class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt, ValueType data_vt = vt> : GCNPat <
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), data_vt:$data)),
+ (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset)
+>;
+
+class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt> : GCNPat <
+ (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data),
+ (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
+>;
+
+class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset)),
+ (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
+>;
+
+class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset)),
+ (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
+>;
+
+class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
- (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
- (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
+ (node (FLATOffset i64:$vaddr, i16:$offset), vt:$data),
+ (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;
-class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
+class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt, ValueType data_vt = vt> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
- (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
- (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
+ (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data),
+ (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
>;
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : GCNPat <
- (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
- (inst $vaddr, $data, $offset, $slc)
+ (vt (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data)),
+ (inst $vaddr, $data, $offset)
>;
class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
- (inst $vaddr, $data, $offset, $slc)
+ (node (FLATOffset i64:$vaddr, i16:$offset), vt:$data),
+ (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
+>;
+
+class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (node (FLATOffsetSigned i64:$vaddr, i16:$offset), vt:$data),
+ (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;
class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
- ValueType data_vt = vt> : GCNPat <
- (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
- (inst $vaddr, $data, $offset, $slc)
+ ValueType data_vt = vt> : GCNPat <
+ (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)),
+ (inst $vaddr, $data, $offset)
+>;
+
+class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset))),
+ (inst $vaddr, $offset)
+>;
+
+class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in),
+ (inst $vaddr, $offset, 0, 0, 0, $in)
+>;
+
+class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset)),
+ (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
+>;
+
+class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset))),
+ (inst $saddr, $offset)
+>;
+
+class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
+ (inst $saddr, $offset, 0, 0, 0, $in)
+>;
+
+class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt> : GCNPat <
+ (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset)),
+ (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
let OtherPredicates = [HasFlatAddressSpace] in {
@@ -807,8 +930,8 @@ def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
+def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
@@ -819,19 +942,19 @@ def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
}
foreach vt = VReg_64.RegTypes in {
-def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
}
-def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
+def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
foreach vt = VReg_128.RegTypes in {
def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt, VReg_128>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
}
def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>;
def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
@@ -885,101 +1008,258 @@ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
} // End OtherPredicates = [HasFlatAddressSpace]
-let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
+multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
+ def : FlatLoadSignedPat <inst, node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
+multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
+ def : FlatSignedLoadPat_D16 <inst, node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
+multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt> {
+ def : FlatStoreSignedPat <inst, node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
+// Deal with swapped operands for atomic_store vs. regular store
+multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
+ def : FlatStoreSignedAtomicPat <inst, node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalAtomicStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
+multiclass GlobalFLATAtomicPats<string nortn_inst_name, SDPatternOperator node,
+ ValueType vt, ValueType data_vt = vt> {
+ def : FlatSignedAtomicPat <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(nortn_inst_name#"_SADDR_RTN"), node, vt, data_vt> {
+ let AddedComplexity = 11;
+ }
+}
+
+multiclass GlobalFLATNoRtnAtomicPats<FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt> {
+ def : FlatSignedAtomicPatNoRtn <inst, node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalAtomicNoRtnSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
+multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
+ def : ScratchLoadSignedPat <inst, node, vt> {
+ let AddedComplexity = 25;
+ }
+
+ def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let AddedComplexity = 26;
+ }
+}
+
+multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt> {
+ def : ScratchStoreSignedPat <inst, node, vt> {
+ let AddedComplexity = 25;
+ }
+
+ def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let AddedComplexity = 26;
+ }
+}
+
+multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
+ def : ScratchLoadSignedPat_D16 <inst, node, vt> {
+ let AddedComplexity = 25;
+ }
+
+ def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let AddedComplexity = 26;
+ }
+}
+
+let OtherPredicates = [HasFlatGlobalInsts] in {
+
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
foreach vt = Reg32Types.types in {
-def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
}
foreach vt = VReg_64.RegTypes in {
-def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>;
}
-def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
foreach vt = VReg_128.RegTypes in {
-def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, vt>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, vt, VReg_128>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>;
}
-def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
-def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
+// There is no distinction for atomic load lowering during selection;
+// the memory legalizer will set the cache bits and insert the
+// appropriate waits.
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
-def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
let OtherPredicates = [D16PreservesUnusedBits] in {
-def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
-
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
-
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
-def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
+
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
+
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
+defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
}
-def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
-def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64, VReg_64>;
-
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CSUB_RTN, atomic_csub_global_32, i32>;
-
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
-
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>;
+defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
+defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64>;
+
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", atomic_load_add_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", atomic_load_sub_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", atomic_inc_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", atomic_dec_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", atomic_load_and_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", atomic_load_max_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", atomic_load_umax_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", atomic_load_min_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", atomic_load_umin_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", atomic_load_or_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", atomic_swap_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", atomic_load_xor_global_32, i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>;
+
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", atomic_load_add_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", atomic_load_sub_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", atomic_inc_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", atomic_dec_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", atomic_load_and_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", atomic_load_max_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", atomic_load_umax_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", atomic_load_min_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", atomic_load_umin_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", atomic_load_or_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>;
+
+let OtherPredicates = [HasAtomicFaddInsts] in {
+defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32, atomic_load_fadd_global_noret_32, f32>;
+defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>;
+}
} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
+let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in {
+
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>;
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>;
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>;
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>;
+
+foreach vt = Reg32Types.types in {
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>;
+defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>;
+}
+
+foreach vt = VReg_64.RegTypes in {
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>;
+defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>;
+}
+
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>;
+
+foreach vt = VReg_128.RegTypes in {
+defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>;
+defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>;
+}
+
+defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>;
+defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
+defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>;
+defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>;
+defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>;
+
+let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
+defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>;
+defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>;
+
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>;
+
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>;
+defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>;
+}
+
+} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
//===----------------------------------------------------------------------===//
// Target
@@ -1246,6 +1526,13 @@ multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
}
+multiclass FLAT_Real_ST_gfx10<bits<7> op> {
+ def _ST_gfx10 :
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> {
+ let Inst{54-48} = !cast<int>(EXEC_HI.HWEncoding);
+ let OtherPredicates = [HasFlatScratchSTMode];
+ }
+}
multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
FLAT_Real_Base_gfx10<op>,
@@ -1264,6 +1551,11 @@ multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
FLAT_Real_RTN_gfx10<op>,
FLAT_Real_SADDR_RTN_gfx10<op>;
+multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> :
+ FLAT_Real_Base_gfx10<op>,
+ FLAT_Real_SADDR_gfx10<op>,
+ FLAT_Real_ST_gfx10<op>;
+
// ENC_FLAT.
defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>;
defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>;
@@ -1377,32 +1669,32 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
-defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x016>;
-defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x017>;
+defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>;
+defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>;
// ENC_FLAT_SCRATCH.
-defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
-defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>;
-defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>;
-defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>;
-defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>;
-defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>;
-defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>;
-defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>;
-defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>;
-defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>;
-defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>;
-defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
-defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>;
-defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>;
-defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>;
-defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>;
-defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>;
-defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>;
-defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>;
-defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>;
-defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>;
-defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>;
+defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>;
+defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>;
+defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>;
+defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>;
+defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>;
+defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>;
+defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>;
+defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>;
+defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>;
+defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>;
+defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>;
+defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>;
+defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>;
+defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>;
+defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>;
+defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>;
+defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>;
+defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>;
+defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>;
+defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>;
+defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>;
let SubtargetPredicate = HasAtomicFaddInsts in {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 719a968b8314..e4eacd101ce8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -38,22 +38,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/Pass.h"
-#include <cassert>
using namespace llvm;
@@ -274,14 +262,14 @@ static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
default: break;
case AMDGPU::V_ADD_U32_e32:
case AMDGPU::V_ADD_U32_e64:
- case AMDGPU::V_ADD_I32_e32:
- case AMDGPU::V_ADD_I32_e64:
+ case AMDGPU::V_ADD_CO_U32_e32:
+ case AMDGPU::V_ADD_CO_U32_e64:
case AMDGPU::V_OR_B32_e32:
case AMDGPU::V_OR_B32_e64:
case AMDGPU::V_SUBREV_U32_e32:
case AMDGPU::V_SUBREV_U32_e64:
- case AMDGPU::V_SUBREV_I32_e32:
- case AMDGPU::V_SUBREV_I32_e64:
+ case AMDGPU::V_SUBREV_CO_U32_e32:
+ case AMDGPU::V_SUBREV_CO_U32_e64:
case AMDGPU::V_MAX_U32_e32:
case AMDGPU::V_MAX_U32_e64:
case AMDGPU::V_XOR_B32_e32:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8482dbfec250..ed1dc77bd545 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -11,25 +11,11 @@
//===----------------------------------------------------------------------===//
#include "GCNHazardRecognizer.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
@@ -50,6 +36,10 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
TSchedModel.init(&ST);
}
+void GCNHazardRecognizer::Reset() {
+ EmittedInstrs.clear();
+}
+
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
EmitInstruction(SU->getInstr());
}
@@ -59,7 +49,7 @@ void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
}
static bool isDivFMas(unsigned Opcode) {
- return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
+ return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64;
}
static bool isSGetReg(unsigned Opcode) {
@@ -67,7 +57,14 @@ static bool isSGetReg(unsigned Opcode) {
}
static bool isSSetReg(unsigned Opcode) {
- return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
+ switch (Opcode) {
+ case AMDGPU::S_SETREG_B32:
+ case AMDGPU::S_SETREG_B32_mode:
+ case AMDGPU::S_SETREG_IMM32_B32:
+ case AMDGPU::S_SETREG_IMM32_B32_mode:
+ return true;
+ }
+ return false;
}
static bool isRWLane(unsigned Opcode) {
@@ -118,8 +115,8 @@ static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
static bool isPermlane(const MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
- return Opcode == AMDGPU::V_PERMLANE16_B32 ||
- Opcode == AMDGPU::V_PERMLANEX16_B32;
+ return Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
}
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
@@ -131,75 +128,83 @@ static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
ScheduleHazardRecognizer::HazardType
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
+ // If we are not in "HazardRecognizerMode" and therefore not being run from
+ // the scheduler, track possible stalls from hazards but don't insert noops.
+ auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard;
+
if (MI->isBundle())
return NoHazard;
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
// FIXME: Should flat be considered vmem?
if ((SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI))
&& checkVMEMHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (checkFPAtomicToDenormModeHazard(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (ST.hasNoDataDepHazard())
return NoHazard;
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (ST.hasReadM0MovRelInterpHazard() &&
(TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
checkReadM0Hazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
checkReadM0Hazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
- if (MI->mayLoadOrStore() && checkMAILdStHazards(MI) > 0)
- return NoopHazard;
+ if ((SIInstrInfo::isVMEM(*MI) ||
+ SIInstrInfo::isFLAT(*MI) ||
+ SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
+ return HazardType;
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
- return NoopHazard;
-
- if (checkAnyInstHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
return NoHazard;
}
-static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
- .addImm(0);
+static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
+ unsigned Quantity) {
+ while (Quantity > 0) {
+ unsigned Arg = std::min(Quantity, 8u);
+ Quantity -= Arg;
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
+ .addImm(Arg - 1);
+ }
}
void GCNHazardRecognizer::processBundle() {
@@ -210,11 +215,11 @@ void GCNHazardRecognizer::processBundle() {
CurrCycleInstr = &*MI;
unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
- if (IsHazardRecognizerMode)
+ if (IsHazardRecognizerMode) {
fixHazards(CurrCycleInstr);
- for (unsigned i = 0; i < WaitStates; ++i)
- insertNoopInBundle(CurrCycleInstr, TII);
+ insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
+ }
// It’s unnecessary to track more than MaxLookAhead instructions. Since we
// include the bundled MI directly after, only add a maximum of
@@ -241,7 +246,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (MI->isBundle())
return 0;
- int WaitStates = std::max(0, checkAnyInstHazards(MI));
+ int WaitStates = 0;
if (SIInstrInfo::isSMRD(*MI))
return std::max(WaitStates, checkSMRDHazards(MI));
@@ -291,7 +296,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (SIInstrInfo::isMAI(*MI))
return std::max(WaitStates, checkMAIHazards(MI));
- if (MI->mayLoadOrStore())
+ if (SIInstrInfo::isVMEM(*MI) ||
+ SIInstrInfo::isFLAT(*MI) ||
+ SIInstrInfo::isDS(*MI))
return std::max(WaitStates, checkMAILdStHazards(MI));
return WaitStates;
@@ -304,15 +311,19 @@ void GCNHazardRecognizer::EmitNoop() {
void GCNHazardRecognizer::AdvanceCycle() {
// When the scheduler detects a stall, it will call AdvanceCycle() without
// emitting any instructions.
- if (!CurrCycleInstr)
+ if (!CurrCycleInstr) {
+ EmittedInstrs.push_front(nullptr);
return;
+ }
// Do not track non-instructions which do not affect the wait states.
// If included, these instructions can lead to buffer overflow such that
// detectable hazards are missed.
if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
- CurrCycleInstr->isKill())
+ CurrCycleInstr->isKill()) {
+ CurrCycleInstr = nullptr;
return;
+ }
if (CurrCycleInstr->isBundle()) {
processBundle();
@@ -367,7 +378,7 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
if (IsHazard(&*I))
return WaitStates;
- if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
+ if (I->isInlineAsm() || I->isMetaInstruction())
continue;
WaitStates += SIInstrInfo::getNumWaitStates(*I);
@@ -460,8 +471,8 @@ int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
// No-op Hazard Detection
//===----------------------------------------------------------------------===//
-static void addRegUnits(const SIRegisterInfo &TRI,
- BitVector &BV, unsigned Reg) {
+static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV,
+ MCRegister Reg) {
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
BV.set(*RUI);
}
@@ -471,7 +482,7 @@ static void addRegsToSet(const SIRegisterInfo &TRI,
BitVector &Set) {
for (const MachineOperand &Op : Ops) {
if (Op.isReg())
- addRegUnits(TRI, Set, Op.getReg());
+ addRegUnits(TRI, Set, Op.getReg().asMCReg());
}
}
@@ -718,8 +729,9 @@ int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
return -1;
}
-int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
- const MachineRegisterInfo &MRI) {
+int
+GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
+ const MachineRegisterInfo &MRI) {
// Helper to check for the hazard where VMEM instructions that store more than
// 8 bytes can have there store data over written by the next instruction.
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -821,34 +833,6 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
return RFEWaitStates - WaitStatesNeeded;
}
-int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
- if (MI->isDebugInstr())
- return 0;
-
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- if (!ST.hasSMovFedHazard())
- return 0;
-
- // Check for any instruction reading an SGPR after a write from
- // s_mov_fed_b32.
- int MovFedWaitStates = 1;
- int WaitStatesNeeded = 0;
-
- for (const MachineOperand &Use : MI->uses()) {
- if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
- continue;
- auto IsHazardFn = [] (MachineInstr *MI) {
- return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
- };
- int WaitStatesNeededForUse =
- MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
- MovFedWaitStates);
- WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
- }
-
- return WaitStatesNeeded;
-}
-
int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
const SIInstrInfo *TII = ST.getInstrInfo();
const int SMovRelWaitStates = 1;
@@ -930,10 +914,12 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
return false;
};
- auto IsExpiredFn = [] (MachineInstr *MI, int) {
+ auto IsExpiredFn = [](MachineInstr *MI, int) {
return MI && (SIInstrInfo::isVALU(*MI) ||
(MI->getOpcode() == AMDGPU::S_WAITCNT &&
- !MI->getOperand(0).getImm()));
+ !MI->getOperand(0).getImm()) ||
+ (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ MI->getOperand(0).getImm() == 0xffe3));
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
@@ -941,7 +927,9 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
return false;
const SIInstrInfo *TII = ST.getInstrInfo();
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0xffe3);
return true;
}
@@ -955,7 +943,6 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
unsigned SDSTName;
switch (MI->getOpcode()) {
case AMDGPU::V_READLANE_B32:
- case AMDGPU::V_READLANE_B32_gfx10:
case AMDGPU::V_READFIRSTLANE_B32:
SDSTName = AMDGPU::OpName::vdst;
break;
@@ -1183,7 +1170,7 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
case AMDGPU::S_WAITCNT_VMCNT:
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
- case AMDGPU::S_WAITCNT_IDLE:
+ case AMDGPU::S_WAIT_IDLE:
return true;
default:
break;
@@ -1207,7 +1194,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
return SIInstrInfo::isVALU(*MI);
};
- if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
+ if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write
const int LegacyVALUWritesVGPRWaitStates = 2;
const int VALUWritesExecWaitStates = 4;
const int MaxWaitStates = 4;
@@ -1235,15 +1222,15 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
auto IsMFMAFn = [] (MachineInstr *MI) {
return SIInstrInfo::isMAI(*MI) &&
- MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
- MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
};
for (const MachineOperand &Op : MI->explicit_operands()) {
if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
continue;
- if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
+ if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
continue;
const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
@@ -1277,7 +1264,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
int OpNo = MI->getOperandNo(&Op);
if (OpNo == SrcCIdx) {
NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
- } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
+ } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) {
switch (HazardDefLatency) {
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
break;
@@ -1287,7 +1274,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
break;
}
- } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
switch (HazardDefLatency) {
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
break;
@@ -1306,7 +1293,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
return WaitStatesNeeded; // Early exit.
auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
- if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
return false;
Register DstReg = MI->getOperand(0).getReg();
return TRI.regsOverlap(Reg, DstReg);
@@ -1318,7 +1305,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
if (OpNo == SrcCIdx)
NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
- else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
+ else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64)
NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
WaitStatesNeededForUse = NeedWaitStates -
@@ -1329,7 +1316,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
return WaitStatesNeeded; // Early exit.
}
- if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
@@ -1373,7 +1360,7 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
int WaitStatesNeeded = 0;
auto IsAccVgprReadFn = [] (MachineInstr *MI) {
- return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
+ return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64;
};
for (const MachineOperand &Op : MI->explicit_uses()) {
@@ -1383,7 +1370,7 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
Register Reg = Op.getReg();
const int AccVgprReadLdStWaitStates = 2;
- const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
+ const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1;
const int MaxWaitStates = 2;
int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
@@ -1393,8 +1380,9 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
if (WaitStatesNeeded == MaxWaitStates)
return WaitStatesNeeded; // Early exit.
- auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
- if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
+ auto IsVALUAccVgprRdWrCheckFn = [Reg, this](MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
return false;
auto IsVALUFn = [] (MachineInstr *MI) {
return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
@@ -1403,10 +1391,34 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
std::numeric_limits<int>::max();
};
- WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
- getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
+ WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates -
+ getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
return WaitStatesNeeded;
}
+
+bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ if (!SU->isInstr())
+ return false;
+
+ MachineInstr *MAI = nullptr;
+ auto IsMFMAFn = [&MAI] (MachineInstr *MI) {
+ MAI = nullptr;
+ if (SIInstrInfo::isMAI(*MI) &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64)
+ MAI = MI;
+ return MAI != nullptr;
+ };
+
+ MachineInstr *MI = SU->getInstr();
+ if (IsMFMAFn(MI)) {
+ int W = getWaitStatesSince(IsMFMAFn, 16);
+ if (MAI)
+ return W < (int)TSchedModel.computeInstrLatency(MAI);
+ }
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index cd17f2755bd1..447ca828ae64 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -83,7 +83,6 @@ private:
int checkRWLaneHazards(MachineInstr *RWLane);
int checkRFEHazards(MachineInstr *RFE);
int checkInlineAsmHazards(MachineInstr *IA);
- int checkAnyInstHazards(MachineInstr *MI);
int checkReadM0Hazards(MachineInstr *SMovRel);
int checkNSAtoVMEMHazard(MachineInstr *MI);
int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
@@ -109,6 +108,8 @@ public:
unsigned PreEmitNoopsCommon(MachineInstr *);
void AdvanceCycle() override;
void RecedeCycle() override;
+ bool ShouldPreferAnother(SUnit *SU) override;
+ void Reset() override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNILPSched.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
index 39072af7d871..1eb617640c32 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 75a02c839034..f3f9eb53355f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -12,29 +12,8 @@
//===----------------------------------------------------------------------===//
#include "GCNIterativeScheduler.h"
-#include "AMDGPUSubtarget.h"
-#include "GCNRegPressure.h"
#include "GCNSchedStrategy.h"
#include "SIMachineFunctionInfo.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <iterator>
-#include <limits>
-#include <memory>
-#include <type_traits>
-#include <vector>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h
index a0d4f432aa48..c0228540b7a2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h
@@ -18,13 +18,7 @@
#define LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
#include "GCNRegPressure.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/Support/Allocator.h"
-#include <limits>
-#include <memory>
-#include <vector>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index 884b2e17289c..443472a3b99a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -13,20 +13,7 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/simple_ilist.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstdint>
-#include <limits>
-#include <vector>
-
using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 57346087d017..fc7105bc15a7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -14,18 +14,13 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/MathExtras.h"
-#include <algorithm>
using namespace llvm;
@@ -114,15 +109,15 @@ GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
unsigned NumRegs = Intervals.size();
for (unsigned N = 0; N < NumRegs; ++N)
- if (VRM->hasPhys(Intervals[N]->reg))
+ if (VRM->hasPhys(Intervals[N]->reg()))
LRM->unassign(*Intervals[N]);
for (unsigned N = 0; N < NumRegs; ++N)
- if (LRM->checkInterference(*Intervals[N], StartReg + N))
+ if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))
return false;
for (unsigned N = 0; N < NumRegs; ++N)
- LRM->assign(*Intervals[N], StartReg + N);
+ LRM->assign(*Intervals[N], MCRegister::from(StartReg + N));
return true;
}
@@ -175,7 +170,7 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
Register Reg = Op.getReg();
- if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
+ if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
return NSA_Status::FIXED;
Register PhysReg = VRM->getPhys(Reg);
@@ -273,13 +268,13 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
SmallVector<LiveInterval *, 16> Intervals;
- SmallVector<unsigned, 16> OrigRegs;
+ SmallVector<MCRegister, 16> OrigRegs;
SlotIndex MinInd, MaxInd;
for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
Register Reg = Op.getReg();
LiveInterval *LI = &LIS->getInterval(Reg);
- if (llvm::find(Intervals, LI) != Intervals.end()) {
+ if (llvm::is_contained(Intervals, LI)) {
// Same register used, unable to make sequential
Intervals.clear();
break;
@@ -302,14 +297,15 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
<< "\tOriginal allocation:\t";
- for(auto *LI : Intervals)
- dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI);
+ for (auto *LI
+ : Intervals) dbgs()
+ << " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI);
dbgs() << '\n');
bool Success = scavengeRegs(Intervals);
if (!Success) {
LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
- if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation.
+ if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation.
continue;
} else {
// Check we did not make it worse for other instructions.
@@ -328,7 +324,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
if (!Success) {
for (unsigned I = 0; I < Info->VAddrDwords; ++I)
- if (VRM->hasPhys(Intervals[I]->reg))
+ if (VRM->hasPhys(Intervals[I]->reg()))
LRM->unassign(*Intervals[I]);
for (unsigned I = 0; I < Info->VAddrDwords; ++I)
@@ -339,11 +335,12 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
C.second = true;
++NumNSAConverted;
- LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t ["
- << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI)
- << " : "
- << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI)
- << "]\n");
+ LLVM_DEBUG(
+ dbgs() << "\tNew allocation:\t\t ["
+ << llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI)
+ << " : "
+ << llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI)
+ << "]\n");
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 17e6098d880d..7447ec2db188 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -32,20 +32,24 @@ def : ProcessorModel<"gfx601", SIQuarterSpeedModel,
FeatureISAVersion6_0_1.Features
>;
-def : ProcessorModel<"hainan", SIQuarterSpeedModel,
+def : ProcessorModel<"pitcairn", SIQuarterSpeedModel,
FeatureISAVersion6_0_1.Features
>;
-def : ProcessorModel<"oland", SIQuarterSpeedModel,
+def : ProcessorModel<"verde", SIQuarterSpeedModel,
FeatureISAVersion6_0_1.Features
>;
-def : ProcessorModel<"pitcairn", SIQuarterSpeedModel,
- FeatureISAVersion6_0_1.Features
+def : ProcessorModel<"gfx602", SIQuarterSpeedModel,
+ FeatureISAVersion6_0_2.Features
>;
-def : ProcessorModel<"verde", SIQuarterSpeedModel,
- FeatureISAVersion6_0_1.Features
+def : ProcessorModel<"hainan", SIQuarterSpeedModel,
+ FeatureISAVersion6_0_2.Features
+>;
+
+def : ProcessorModel<"oland", SIQuarterSpeedModel,
+ FeatureISAVersion6_0_2.Features
>;
//===------------------------------------------------------------===//
@@ -92,6 +96,10 @@ def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
FeatureISAVersion7_0_4.Features
>;
+def : ProcessorModel<"gfx705", SIQuarterSpeedModel,
+ FeatureISAVersion7_0_5.Features
+>;
+
//===------------------------------------------------------------===//
// GCN GFX8 (Volcanic Islands (VI)).
//===------------------------------------------------------------===//
@@ -132,6 +140,14 @@ def : ProcessorModel<"polaris11", SIQuarterSpeedModel,
FeatureISAVersion8_0_3.Features
>;
+def : ProcessorModel<"gfx805", SIQuarterSpeedModel,
+ FeatureISAVersion8_0_5.Features
+>;
+
+def : ProcessorModel<"tongapro", SIQuarterSpeedModel,
+ FeatureISAVersion8_0_5.Features
+>;
+
def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
FeatureISAVersion8_1_0.Features
>;
@@ -168,6 +184,10 @@ def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
FeatureISAVersion9_0_9.Features
>;
+def : ProcessorModel<"gfx90c", SIQuarterSpeedModel,
+ FeatureISAVersion9_0_C.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
@@ -187,3 +207,15 @@ def : ProcessorModel<"gfx1012", GFX10SpeedModel,
def : ProcessorModel<"gfx1030", GFX10SpeedModel,
FeatureISAVersion10_3_0.Features
>;
+
+def : ProcessorModel<"gfx1031", GFX10SpeedModel,
+ FeatureISAVersion10_3_0.Features
+>;
+
+def : ProcessorModel<"gfx1032", GFX10SpeedModel,
+ FeatureISAVersion10_3_0.Features
+>;
+
+def : ProcessorModel<"gfx1033", GFX10SpeedModel,
+ FeatureISAVersion10_3_0.Features
+>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
index 98d971630ca4..a12e9ab03e1d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
@@ -31,20 +31,15 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -76,41 +71,59 @@ class GCNRegBankReassign : public MachineFunctionPass {
public:
OperandMask(unsigned r, unsigned s, unsigned m)
: Reg(r), SubReg(s), Mask(m) {}
- unsigned Reg;
+ Register Reg;
unsigned SubReg;
unsigned Mask;
};
class Candidate {
public:
- Candidate(MachineInstr *mi, unsigned reg, unsigned freebanks,
- unsigned weight)
- : MI(mi), Reg(reg), FreeBanks(freebanks), Weight(weight) {}
-
- bool operator< (const Candidate& RHS) const { return Weight < RHS.Weight; }
+ Candidate(MachineInstr *mi, Register reg, unsigned subreg,
+ unsigned freebanks)
+ : MI(mi), Reg(reg), SubReg(subreg), FreeBanks(freebanks) {}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump(const GCNRegBankReassign *P) const {
MI->dump();
dbgs() << P->printReg(Reg) << " to banks ";
dumpFreeBanks(FreeBanks);
- dbgs() << " weight " << Weight << '\n';
+ dbgs() << '\n';
}
#endif
MachineInstr *MI;
- unsigned Reg;
+ Register Reg;
+ unsigned SubReg;
unsigned FreeBanks;
- unsigned Weight;
};
- class CandidateList : public std::list<Candidate> {
+ class CandidateList : public std::map<unsigned, std::list<Candidate>> {
public:
- // Speedup subsequent sort.
- void push(const Candidate&& C) {
- if (C.Weight) push_back(C);
- else push_front(C);
+ void push(unsigned Weight, const Candidate&& C) {
+ operator[](Weight).push_front(C);
+ }
+
+ Candidate &back() {
+ return rbegin()->second.back();
+ }
+
+ void pop_back() {
+ rbegin()->second.pop_back();
+ if (rbegin()->second.empty())
+ erase(rbegin()->first);
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump(const GCNRegBankReassign *P) const {
+ dbgs() << "\nCandidates:\n\n";
+ for (auto &B : *this) {
+ dbgs() << " Weight " << B.first << ":\n";
+ for (auto &C : B.second)
+ C.dump(P);
+ }
+ dbgs() << "\n\n";
}
+#endif
};
public:
@@ -162,32 +175,32 @@ private:
const MCPhysReg *CSRegs;
// Returns bank for a phys reg.
- unsigned getPhysRegBank(unsigned Reg) const;
+ unsigned getPhysRegBank(Register Reg, unsigned SubReg) const;
// Return a bit set for each register bank used. 4 banks for VGPRs and
// 8 banks for SGPRs.
// Registers already processed and recorded in RegsUsed are excluded.
// If Bank is not -1 assume Reg:SubReg to belong to that Bank.
- uint32_t getRegBankMask(unsigned Reg, unsigned SubReg, int Bank);
+ uint32_t getRegBankMask(Register Reg, unsigned SubReg, int Bank);
// Analyze one instruction returning the number of stalls and a mask of the
// banks used by all operands.
// If Reg and Bank are provided, assume all uses of Reg will be replaced with
// a register chosen from Bank.
std::pair<unsigned, unsigned> analyzeInst(const MachineInstr &MI,
- unsigned Reg = AMDGPU::NoRegister,
- int Bank = -1);
+ Register Reg = Register(),
+ unsigned SubReg = 0, int Bank = -1);
// Return true if register is regular VGPR or SGPR or their tuples.
// Returns false for special registers like m0, vcc etc.
- bool isReassignable(unsigned Reg) const;
+ bool isReassignable(Register Reg) const;
// Check if registers' defs are old and may be pre-loaded.
// Returns 0 if both registers are old enough, 1 or 2 if one or both
// registers will not likely be pre-loaded.
unsigned getOperandGatherWeight(const MachineInstr& MI,
- unsigned Reg1,
- unsigned Reg2,
+ Register Reg1,
+ Register Reg2,
unsigned StallCycles) const;
@@ -197,7 +210,7 @@ private:
// Find all bank bits in UsedBanks where Mask can be relocated to.
// Bank is relative to the register and not its subregister component.
// Returns 0 is a register is not reassignable.
- unsigned getFreeBanks(unsigned Reg, unsigned SubReg, unsigned Mask,
+ unsigned getFreeBanks(Register Reg, unsigned SubReg, unsigned Mask,
unsigned UsedBanks) const;
// Add cadidate instruction to the work list.
@@ -209,18 +222,20 @@ private:
unsigned collectCandidates(MachineFunction &MF, bool Collect = true);
// Remove all candidates that read specified register.
- void removeCandidates(unsigned Reg);
+ void removeCandidates(Register Reg);
// Compute stalls within the uses of SrcReg replaced by a register from
// Bank. If Bank is -1 does not perform substitution. If Collect is set
// candidates are collected and added to work list.
- unsigned computeStallCycles(unsigned SrcReg,
- unsigned Reg = AMDGPU::NoRegister,
- int Bank = -1, bool Collect = false);
+ unsigned computeStallCycles(Register SrcReg,
+ Register Reg = Register(),
+ unsigned SubReg = 0, int Bank = -1,
+ bool Collect = false);
// Search for a register in Bank unused within LI.
// Returns phys reg or NoRegister.
- unsigned scavengeReg(LiveInterval& LI, unsigned Bank) const;
+ MCRegister scavengeReg(LiveInterval &LI, unsigned Bank,
+ unsigned SubReg) const;
// Try to reassign candidate. Returns number or stall cycles saved.
unsigned tryReassign(Candidate &C);
@@ -231,9 +246,9 @@ private:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
public:
- Printable printReg(unsigned Reg, unsigned SubReg = 0) const {
+ Printable printReg(Register Reg, unsigned SubReg = 0) const {
return Printable([Reg, SubReg, this](raw_ostream &OS) {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
OS << llvm::printReg(Reg, TRI);
return;
}
@@ -277,28 +292,37 @@ char GCNRegBankReassign::ID = 0;
char &llvm::GCNRegBankReassignID = GCNRegBankReassign::ID;
-unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const {
- assert(Register::isPhysicalRegister(Reg));
+unsigned GCNRegBankReassign::getPhysRegBank(Register Reg,
+ unsigned SubReg) const {
+ assert(Reg.isPhysical());
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
unsigned Size = TRI->getRegSizeInBits(*RC);
if (Size == 16)
Reg = TRI->get32BitRegister(Reg);
- else if (Size > 32)
- Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+ else if (Size > 32) {
+ if (SubReg) {
+ const TargetRegisterClass *SubRC = TRI->getSubRegClass(RC, SubReg);
+ Reg = TRI->getSubReg(Reg, SubReg);
+ if (TRI->getRegSizeInBits(*SubRC) > 32)
+ Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+ } else {
+ Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+ }
+ }
if (TRI->hasVGPRs(RC)) {
- Reg -= AMDGPU::VGPR0;
- return Reg % NUM_VGPR_BANKS;
+ unsigned RegNo = Reg - AMDGPU::VGPR0;
+ return RegNo % NUM_VGPR_BANKS;
}
- Reg = TRI->getEncodingValue(Reg) / 2;
- return Reg % NUM_SGPR_BANKS + SGPR_BANK_OFFSET;
+ unsigned RegNo = TRI->getEncodingValue(AMDGPU::getMCReg(Reg, *ST)) / 2;
+ return RegNo % NUM_SGPR_BANKS + SGPR_BANK_OFFSET;
}
-uint32_t GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
+uint32_t GCNRegBankReassign::getRegBankMask(Register Reg, unsigned SubReg,
int Bank) {
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
if (!VRM->isAssignedReg(Reg))
return 0;
@@ -323,23 +347,23 @@ uint32_t GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
if (TRI->hasVGPRs(RC)) {
// VGPRs have 4 banks assigned in a round-robin fashion.
- Reg -= AMDGPU::VGPR0;
+ unsigned RegNo = Reg - AMDGPU::VGPR0;
uint32_t Mask = maskTrailingOnes<uint32_t>(Size);
unsigned Used = 0;
// Bitmask lacks an extract method
for (unsigned I = 0; I < Size; ++I)
- if (RegsUsed.test(Reg + I))
+ if (RegsUsed.test(RegNo + I))
Used |= 1 << I;
- RegsUsed.set(Reg, Reg + Size);
+ RegsUsed.set(RegNo, RegNo + Size);
Mask &= ~Used;
- Mask <<= (Bank == -1) ? Reg % NUM_VGPR_BANKS : uint32_t(Bank);
+ Mask <<= (Bank == -1) ? RegNo % NUM_VGPR_BANKS : uint32_t(Bank);
return (Mask | (Mask >> NUM_VGPR_BANKS)) & VGPR_BANK_MASK;
}
// SGPRs have 8 banks holding 2 consequitive registers each.
- Reg = TRI->getEncodingValue(Reg) / 2;
+ unsigned RegNo = TRI->getEncodingValue(AMDGPU::getMCReg(Reg, *ST)) / 2;
unsigned StartBit = AMDGPU::VGPR_32RegClass.getNumRegs();
- if (Reg + StartBit >= RegsUsed.size())
+ if (RegNo + StartBit >= RegsUsed.size())
return 0;
if (Size > 1)
@@ -347,11 +371,11 @@ uint32_t GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
unsigned Mask = (1 << Size) - 1;
unsigned Used = 0;
for (unsigned I = 0; I < Size; ++I)
- if (RegsUsed.test(StartBit + Reg + I))
+ if (RegsUsed.test(StartBit + RegNo + I))
Used |= 1 << I;
- RegsUsed.set(StartBit + Reg, StartBit + Reg + Size);
+ RegsUsed.set(StartBit + RegNo, StartBit + RegNo + Size);
Mask &= ~Used;
- Mask <<= (Bank == -1) ? Reg % NUM_SGPR_BANKS
+ Mask <<= (Bank == -1) ? RegNo % NUM_SGPR_BANKS
: unsigned(Bank - SGPR_BANK_OFFSET);
Mask = (Mask | (Mask >> NUM_SGPR_BANKS)) & SGPR_BANK_SHIFTED_MASK;
// Reserve 4 bank ids for VGPRs.
@@ -359,8 +383,8 @@ uint32_t GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
}
std::pair<unsigned, unsigned>
-GCNRegBankReassign::analyzeInst(const MachineInstr &MI, unsigned Reg,
- int Bank) {
+GCNRegBankReassign::analyzeInst(const MachineInstr &MI, Register Reg,
+ unsigned SubReg, int Bank) {
unsigned StallCycles = 0;
unsigned UsedBanks = 0;
@@ -375,26 +399,39 @@ GCNRegBankReassign::analyzeInst(const MachineInstr &MI, unsigned Reg,
if (!Op.isReg() || Op.isUndef())
continue;
- Register R = Op.getReg();
- if (TRI->hasAGPRs(TRI->getRegClassForReg(*MRI, R)))
- continue;
+ const Register R = Op.getReg();
+ const TargetRegisterClass *RC = TRI->getRegClassForReg(*MRI, R);
- unsigned ShiftedBank = Bank;
+ // Do not compute stalls for AGPRs
+ if (TRI->hasAGPRs(RC))
+ continue;
- if (Bank != -1 && R == Reg && Op.getSubReg()) {
- unsigned Offset = TRI->getChannelFromSubReg(Op.getSubReg());
+ // Do not compute stalls if sub-register covers all banks
+ if (Op.getSubReg()) {
LaneBitmask LM = TRI->getSubRegIndexLaneMask(Op.getSubReg());
- if (Offset && Bank < NUM_VGPR_BANKS) {
- // If a register spans all banks we cannot shift it to avoid conflict.
+ if (TRI->hasVGPRs(RC)) {
if (TRI->getNumCoveredRegs(LM) >= NUM_VGPR_BANKS)
continue;
- ShiftedBank = (Bank + Offset) % NUM_VGPR_BANKS;
- } else if (Offset > 1 && Bank >= SGPR_BANK_OFFSET) {
- // If a register spans all banks we cannot shift it to avoid conflict.
+ } else {
if (TRI->getNumCoveredRegs(LM) / 2 >= NUM_SGPR_BANKS)
continue;
+ }
+ }
+
+ unsigned ShiftedBank = Bank;
+
+ if (Bank != -1 && R == Reg && (Op.getSubReg() || SubReg)) {
+ unsigned RegOffset =
+ TRI->getChannelFromSubReg(SubReg ? SubReg : (unsigned)AMDGPU::sub0);
+ unsigned Offset = TRI->getChannelFromSubReg(
+ Op.getSubReg() ? Op.getSubReg() : (unsigned)AMDGPU::sub0);
+ if (Bank < NUM_VGPR_BANKS) {
+ unsigned Shift = ((NUM_VGPR_BANKS + Offset) - RegOffset);
+ ShiftedBank = (Bank + Shift) % NUM_VGPR_BANKS;
+ } else if (Bank >= SGPR_BANK_OFFSET) {
+ unsigned Shift = (NUM_SGPR_BANKS + (Offset >> 1)) - (RegOffset >> 1);
ShiftedBank = SGPR_BANK_OFFSET +
- (Bank - SGPR_BANK_OFFSET + (Offset >> 1)) % NUM_SGPR_BANKS;
+ (Bank - SGPR_BANK_OFFSET + Shift) % NUM_SGPR_BANKS;
}
}
@@ -409,8 +446,8 @@ GCNRegBankReassign::analyzeInst(const MachineInstr &MI, unsigned Reg,
}
unsigned GCNRegBankReassign::getOperandGatherWeight(const MachineInstr& MI,
- unsigned Reg1,
- unsigned Reg2,
+ Register Reg1,
+ Register Reg2,
unsigned StallCycles) const
{
unsigned Defs = 0;
@@ -430,8 +467,8 @@ unsigned GCNRegBankReassign::getOperandGatherWeight(const MachineInstr& MI,
return countPopulation(Defs);
}
-bool GCNRegBankReassign::isReassignable(unsigned Reg) const {
- if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
+bool GCNRegBankReassign::isReassignable(Register Reg) const {
+ if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
return false;
const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
@@ -506,7 +543,7 @@ unsigned GCNRegBankReassign::getFreeBanks(unsigned Mask,
return FreeBanks;
}
-unsigned GCNRegBankReassign::getFreeBanks(unsigned Reg,
+unsigned GCNRegBankReassign::getFreeBanks(Register Reg,
unsigned SubReg,
unsigned Mask,
unsigned UsedBanks) const {
@@ -556,8 +593,8 @@ void GCNRegBankReassign::collectCandidates(MachineInstr& MI,
if (!(OperandMasks[I].Mask & OperandMasks[J].Mask))
continue;
- unsigned Reg1 = OperandMasks[I].Reg;
- unsigned Reg2 = OperandMasks[J].Reg;
+ Register Reg1 = OperandMasks[I].Reg;
+ Register Reg2 = OperandMasks[J].Reg;
unsigned SubReg1 = OperandMasks[I].SubReg;
unsigned SubReg2 = OperandMasks[J].SubReg;
unsigned Mask1 = OperandMasks[I].Mask;
@@ -576,17 +613,17 @@ void GCNRegBankReassign::collectCandidates(MachineInstr& MI,
unsigned FreeBanks1 = getFreeBanks(Reg1, SubReg1, Mask1, UsedBanks);
unsigned FreeBanks2 = getFreeBanks(Reg2, SubReg2, Mask2, UsedBanks);
if (FreeBanks1)
- Candidates.push(Candidate(&MI, Reg1, FreeBanks1, Weight
- + ((Size2 > Size1) ? 1 : 0)));
+ Candidates.push(Weight + ((Size2 > Size1) ? 1 : 0),
+ Candidate(&MI, Reg1, SubReg1, FreeBanks1));
if (FreeBanks2)
- Candidates.push(Candidate(&MI, Reg2, FreeBanks2, Weight
- + ((Size1 > Size2) ? 1 : 0)));
+ Candidates.push(Weight + ((Size1 > Size2) ? 1 : 0),
+ Candidate(&MI, Reg2, SubReg2, FreeBanks2));
}
}
}
-unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg,
- unsigned Reg, int Bank,
+unsigned GCNRegBankReassign::computeStallCycles(Register SrcReg, Register Reg,
+ unsigned SubReg, int Bank,
bool Collect) {
unsigned TotalStallCycles = 0;
SmallSet<const MachineInstr *, 16> Visited;
@@ -598,7 +635,7 @@ unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg,
continue;
unsigned StallCycles;
unsigned UsedBanks;
- std::tie(StallCycles, UsedBanks) = analyzeInst(MI, Reg, Bank);
+ std::tie(StallCycles, UsedBanks) = analyzeInst(MI, Reg, SubReg, Bank);
TotalStallCycles += StallCycles;
if (Collect)
collectCandidates(MI, UsedBanks, StallCycles);
@@ -607,26 +644,26 @@ unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg,
return TotalStallCycles;
}
-unsigned GCNRegBankReassign::scavengeReg(LiveInterval& LI,
- unsigned Bank) const {
- const TargetRegisterClass *RC = MRI->getRegClass(LI.reg);
+MCRegister GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank,
+ unsigned SubReg) const {
+ const TargetRegisterClass *RC = MRI->getRegClass(LI.reg());
unsigned MaxNumRegs = (Bank < NUM_VGPR_BANKS) ? MaxNumVGPRs
: MaxNumSGPRs;
unsigned MaxReg = MaxNumRegs + (Bank < NUM_VGPR_BANKS ? AMDGPU::VGPR0
: AMDGPU::SGPR0);
- for (unsigned Reg : RC->getRegisters()) {
+ for (MCRegister Reg : RC->getRegisters()) {
// Check occupancy limit.
if (TRI->isSubRegisterEq(Reg, MaxReg))
break;
- if (!MRI->isAllocatable(Reg) || getPhysRegBank(Reg) != Bank)
+ if (!MRI->isAllocatable(Reg) || getPhysRegBank(Reg, SubReg) != Bank)
continue;
for (unsigned I = 0; CSRegs[I]; ++I)
if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
!LRM->isPhysRegUsed(CSRegs[I]))
- return AMDGPU::NoRegister;
+ return MCRegister::from(AMDGPU::NoRegister);
LLVM_DEBUG(dbgs() << "Trying register " << printReg(Reg) << '\n');
@@ -634,7 +671,7 @@ unsigned GCNRegBankReassign::scavengeReg(LiveInterval& LI,
return Reg;
}
- return AMDGPU::NoRegister;
+ return MCRegister::from(AMDGPU::NoRegister);
}
unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
@@ -669,7 +706,7 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
for (int Bank = 0; Bank < NUM_BANKS; ++Bank) {
if (C.FreeBanks & (1 << Bank)) {
LLVM_DEBUG(dbgs() << "Trying bank " << printBank(Bank) << '\n');
- unsigned Stalls = computeStallCycles(C.Reg, C.Reg, Bank);
+ unsigned Stalls = computeStallCycles(C.Reg, C.Reg, C.SubReg, Bank);
if (Stalls < OrigStalls) {
LLVM_DEBUG(dbgs() << "With bank " << printBank(Bank) << " -> "
<< Stalls << '\n');
@@ -679,11 +716,11 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
}
llvm::sort(BankStalls);
- Register OrigReg = VRM->getPhys(C.Reg);
+ MCRegister OrigReg = VRM->getPhys(C.Reg);
LRM->unassign(LI);
while (!BankStalls.empty()) {
BankStall BS = BankStalls.pop_back_val();
- unsigned Reg = scavengeReg(LI, BS.Bank);
+ MCRegister Reg = scavengeReg(LI, BS.Bank, C.SubReg);
if (Reg == AMDGPU::NoRegister) {
LLVM_DEBUG(dbgs() << "No free registers in bank " << printBank(BS.Bank)
<< '\n');
@@ -735,10 +772,16 @@ unsigned GCNRegBankReassign::collectCandidates(MachineFunction &MF,
return TotalStallCycles;
}
-void GCNRegBankReassign::removeCandidates(unsigned Reg) {
- Candidates.remove_if([Reg, this](const Candidate& C) {
- return C.MI->readsRegister(Reg, TRI);
- });
+void GCNRegBankReassign::removeCandidates(Register Reg) {
+ typename CandidateList::iterator Next;
+ for (auto I = Candidates.begin(), E = Candidates.end(); I != E; I = Next) {
+ Next = std::next(I);
+ I->second.remove_if([Reg, this](const Candidate& C) {
+ return C.MI->readsRegister(Reg, TRI);
+ });
+ if (I->second.empty())
+ Candidates.erase(I);
+ }
}
bool GCNRegBankReassign::verifyCycles(MachineFunction &MF,
@@ -770,9 +813,10 @@ bool GCNRegBankReassign::runOnMachineFunction(MachineFunction &MF) {
MaxNumSGPRs = std::min(ST->getMaxNumSGPRs(Occupancy, true), MaxNumSGPRs);
CSRegs = MRI->getCalleeSavedRegs();
-
- RegsUsed.resize(AMDGPU::VGPR_32RegClass.getNumRegs() +
- TRI->getEncodingValue(AMDGPU::SGPR_NULL) / 2 + 1);
+ unsigned NumRegBanks = AMDGPU::VGPR_32RegClass.getNumRegs() +
+ // Not a tight bound
+ AMDGPU::SReg_32RegClass.getNumRegs() / 2 + 1;
+ RegsUsed.resize(NumRegBanks);
LLVM_DEBUG(dbgs() << "=== RegBanks reassign analysis on function " << MF.getName()
<< '\n');
@@ -783,11 +827,7 @@ bool GCNRegBankReassign::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "=== " << StallCycles << " stall cycles detected in "
"function " << MF.getName() << '\n');
- Candidates.sort();
-
- LLVM_DEBUG(dbgs() << "\nCandidates:\n\n";
- for (auto C : Candidates) C.dump(this);
- dbgs() << "\n\n");
+ LLVM_DEBUG(Candidates.dump(this));
unsigned CyclesSaved = 0;
while (!Candidates.empty()) {
@@ -801,13 +841,9 @@ bool GCNRegBankReassign::runOnMachineFunction(MachineFunction &MF) {
Candidates.pop_back();
if (LocalCyclesSaved) {
removeCandidates(C.Reg);
- computeStallCycles(C.Reg, AMDGPU::NoRegister, -1, true);
- Candidates.sort();
+ computeStallCycles(C.Reg, AMDGPU::NoRegister, 0, -1, true);
- LLVM_DEBUG(dbgs() << "\nCandidates:\n\n";
- for (auto C : Candidates)
- C.dump(this);
- dbgs() << "\n\n");
+ LLVM_DEBUG(Candidates.dump(this));
}
}
NumStallsRecovered += CyclesSaved;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 86a3cb9af32f..aeec3e886327 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,25 +12,7 @@
//===----------------------------------------------------------------------===//
#include "GCNRegPressure.h"
-#include "AMDGPUSubtarget.h"
-#include "SIRegisterInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/Config/llvm-config.h"
-#include "llvm/MC/LaneBitmask.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
using namespace llvm;
@@ -87,9 +69,9 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
///////////////////////////////////////////////////////////////////////////////
// GCNRegPressure
-unsigned GCNRegPressure::getRegKind(unsigned Reg,
+unsigned GCNRegPressure::getRegKind(Register Reg,
const MachineRegisterInfo &MRI) {
- assert(Register::isVirtualRegister(Reg));
+ assert(Reg.isVirtual());
const auto RC = MRI.getRegClass(Reg);
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
return STI->isSGPRClass(RC) ?
@@ -199,7 +181,7 @@ void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const {
static LaneBitmask getDefRegMask(const MachineOperand &MO,
const MachineRegisterInfo &MRI) {
- assert(MO.isDef() && MO.isReg() && Register::isVirtualRegister(MO.getReg()));
+ assert(MO.isDef() && MO.isReg() && MO.getReg().isVirtual());
// We don't rely on read-undef flag because in case of tentative schedule
// tracking it isn't set correctly yet. This works correctly however since
@@ -212,7 +194,7 @@ static LaneBitmask getDefRegMask(const MachineOperand &MO,
static LaneBitmask getUsedRegMask(const MachineOperand &MO,
const MachineRegisterInfo &MRI,
const LiveIntervals &LIS) {
- assert(MO.isUse() && MO.isReg() && Register::isVirtualRegister(MO.getReg()));
+ assert(MO.isUse() && MO.isReg() && MO.getReg().isVirtual());
if (auto SubReg = MO.getSubReg())
return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
@@ -233,7 +215,7 @@ collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
SmallVector<RegisterMaskPair, 8> Res;
for (const auto &MO : MI.operands()) {
- if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
if (!MO.isUse() || !MO.readsReg())
continue;
@@ -241,9 +223,8 @@ collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS,
auto const UsedMask = getUsedRegMask(MO, MRI, LIS);
auto Reg = MO.getReg();
- auto I = std::find_if(Res.begin(), Res.end(), [Reg](const RegisterMaskPair &RM) {
- return RM.RegUnit == Reg;
- });
+ auto I = llvm::find_if(
+ Res, [Reg](const RegisterMaskPair &RM) { return RM.RegUnit == Reg; });
if (I != Res.end())
I->LaneMask |= UsedMask;
else
@@ -330,8 +311,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
MaxPressure = max(AtMIPressure, MaxPressure);
for (const auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() ||
- !Register::isVirtualRegister(MO.getReg()) || MO.isDead())
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual() || MO.isDead())
continue;
auto Reg = MO.getReg();
@@ -410,7 +390,7 @@ void GCNDownwardRPTracker::advanceToNext() {
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
auto &LiveMask = LiveRegs[Reg];
auto PrevMask = LiveMask;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 2ef79410719f..ba8c85aa502b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -17,21 +17,15 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
#define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
-#include "AMDGPUSubtarget.h"
-#include "llvm/ADT/DenseMap.h"
+#include "GCNSubtarget.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/MC/LaneBitmask.h"
-#include "llvm/Support/Debug.h"
#include <algorithm>
-#include <limits>
namespace llvm {
class MachineRegisterInfo;
class raw_ostream;
+class SlotIndex;
struct GCNRegPressure {
enum RegKind {
@@ -90,7 +84,7 @@ struct GCNRegPressure {
private:
unsigned Value[TOTAL_KINDS];
- static unsigned getRegKind(unsigned Reg, const MachineRegisterInfo &MRI);
+ static unsigned getRegKind(Register Reg, const MachineRegisterInfo &MRI);
friend GCNRegPressure max(const GCNRegPressure &P1,
const GCNRegPressure &P2);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index deed50b6db7d..6e2550298dc6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -12,13 +12,7 @@
//===----------------------------------------------------------------------===//
#include "GCNSchedStrategy.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/Support/MathExtras.h"
#define DEBUG_TYPE "machine-scheduler"
@@ -567,8 +561,10 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
SavedMutations.swap(Mutations);
for (auto Region : Regions) {
- if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx])
+ if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) {
+ ++RegionIdx;
continue;
+ }
RegionBegin = Region.first;
RegionEnd = Region.second;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
new file mode 100644
index 000000000000..7a7178126444
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -0,0 +1,1064 @@
+//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// AMD GCN specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
+
+#include "AMDGPUCallLowering.h"
+#include "AMDGPUSubtarget.h"
+#include "SIFrameLowering.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
+namespace llvm {
+
+class MCInst;
+class MCInstrInfo;
+
+} // namespace llvm
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDGPUGenSubtargetInfo.inc"
+
+namespace llvm {
+
+class GCNTargetMachine;
+
+class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
+ public AMDGPUSubtarget {
+
+ using AMDGPUSubtarget::getMaxWavesPerEU;
+
+public:
+ enum TrapHandlerAbi {
+ TrapHandlerAbiNone = 0,
+ TrapHandlerAbiHsa = 1
+ };
+
+ enum TrapID {
+ TrapIDHardwareReserved = 0,
+ TrapIDHSADebugTrap = 1,
+ TrapIDLLVMTrap = 2,
+ TrapIDLLVMDebugTrap = 3,
+ TrapIDDebugBreakpoint = 7,
+ TrapIDDebugReserved8 = 8,
+ TrapIDDebugReservedFE = 0xfe,
+ TrapIDDebugReservedFF = 0xff
+ };
+
+ enum TrapRegValues {
+ LLVMTrapHandlerRegValue = 1
+ };
+
+private:
+ /// GlobalISel related APIs.
+ std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
+protected:
+ // Basic subtarget description.
+ Triple TargetTriple;
+ AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
+ unsigned Gen;
+ InstrItineraryData InstrItins;
+ int LDSBankCount;
+ unsigned MaxPrivateElementSize;
+
+ // Possibly statically set by tablegen, but may want to be overridden.
+ bool FastFMAF32;
+ bool FastDenormalF32;
+ bool HalfRate64Ops;
+
+ // Dynamically set bits that enable features.
+ bool FlatForGlobal;
+ bool AutoWaitcntBeforeBarrier;
+ bool UnalignedScratchAccess;
+ bool UnalignedAccessMode;
+ bool HasApertureRegs;
+ bool SupportsXNACK;
+
+ // This should not be used directly. 'TargetID' tracks the dynamic settings
+ // for XNACK.
+ bool EnableXNACK;
+
+ bool EnableCuMode;
+ bool TrapHandler;
+
+ // Used as options.
+ bool EnableLoadStoreOpt;
+ bool EnableUnsafeDSOffsetFolding;
+ bool EnableSIScheduler;
+ bool EnableDS128;
+ bool EnablePRTStrictNull;
+ bool DumpCode;
+
+ // Subtarget statically properties set by tablegen
+ bool FP64;
+ bool FMA;
+ bool MIMG_R128;
+ bool GCN3Encoding;
+ bool CIInsts;
+ bool GFX8Insts;
+ bool GFX9Insts;
+ bool GFX10Insts;
+ bool GFX10_3Insts;
+ bool GFX7GFX8GFX9Insts;
+ bool SGPRInitBug;
+ bool HasSMemRealTime;
+ bool HasIntClamp;
+ bool HasFmaMixInsts;
+ bool HasMovrel;
+ bool HasVGPRIndexMode;
+ bool HasScalarStores;
+ bool HasScalarAtomics;
+ bool HasSDWAOmod;
+ bool HasSDWAScalar;
+ bool HasSDWASdst;
+ bool HasSDWAMac;
+ bool HasSDWAOutModsVOPC;
+ bool HasDPP;
+ bool HasDPP8;
+ bool HasR128A16;
+ bool HasGFX10A16;
+ bool HasG16;
+ bool HasNSAEncoding;
+ bool GFX10_BEncoding;
+ bool HasDLInsts;
+ bool HasDot1Insts;
+ bool HasDot2Insts;
+ bool HasDot3Insts;
+ bool HasDot4Insts;
+ bool HasDot5Insts;
+ bool HasDot6Insts;
+ bool HasMAIInsts;
+ bool HasPkFmacF16Inst;
+ bool HasAtomicFaddInsts;
+ bool SupportsSRAMECC;
+
+ // This should not be used directly. 'TargetID' tracks the dynamic settings
+ // for SRAMECC.
+ bool EnableSRAMECC;
+
+ bool HasNoSdstCMPX;
+ bool HasVscnt;
+ bool HasGetWaveIdInst;
+ bool HasSMemTimeInst;
+ bool HasRegisterBanking;
+ bool HasVOP3Literal;
+ bool HasNoDataDepHazard;
+ bool FlatAddressSpace;
+ bool FlatInstOffsets;
+ bool FlatGlobalInsts;
+ bool FlatScratchInsts;
+ bool ScalarFlatScratchInsts;
+ bool AddNoCarryInsts;
+ bool HasUnpackedD16VMem;
+ bool LDSMisalignedBug;
+ bool HasMFMAInlineLiteralBug;
+ bool UnalignedBufferAccess;
+ bool UnalignedDSAccess;
+ bool ScalarizeGlobal;
+
+ bool HasVcmpxPermlaneHazard;
+ bool HasVMEMtoScalarWriteHazard;
+ bool HasSMEMtoVectorWriteHazard;
+ bool HasInstFwdPrefetchBug;
+ bool HasVcmpxExecWARHazard;
+ bool HasLdsBranchVmemWARHazard;
+ bool HasNSAtoVMEMBug;
+ bool HasOffset3fBug;
+ bool HasFlatSegmentOffsetBug;
+ bool HasImageStoreD16Bug;
+ bool HasImageGather4D16Bug;
+
+ // Dummy feature to use for assembler in tablegen.
+ bool FeatureDisable;
+
+ SelectionDAGTargetInfo TSInfo;
+private:
+ SIInstrInfo InstrInfo;
+ SITargetLowering TLInfo;
+ SIFrameLowering FrameLowering;
+
+public:
+ // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
+ static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
+
+ GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const GCNTargetMachine &TM);
+ ~GCNSubtarget() override;
+
+ GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU, StringRef FS);
+
+ const SIInstrInfo *getInstrInfo() const override {
+ return &InstrInfo;
+ }
+
+ const SIFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+
+ const SITargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+
+ const SIRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+
+ const InlineAsmLowering *getInlineAsmLowering() const override {
+ return InlineAsmLoweringInfo.get();
+ }
+
+ InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+
+ // Nothing implemented, just prevent crashes on use.
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+
+ Generation getGeneration() const {
+ return (Generation)Gen;
+ }
+
+ /// Return the number of high bits known to be zero fror a frame index.
+ unsigned getKnownHighZeroBitsForFrameIndex() const {
+ return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
+ }
+
+ int getLDSBankCount() const {
+ return LDSBankCount;
+ }
+
+ unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
+ return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
+ }
+
+ unsigned getConstantBusLimit(unsigned Opcode) const;
+
+ bool hasIntClamp() const {
+ return HasIntClamp;
+ }
+
+ bool hasFP64() const {
+ return FP64;
+ }
+
+ bool hasMIMG_R128() const {
+ return MIMG_R128;
+ }
+
+ bool hasHWFP64() const {
+ return FP64;
+ }
+
+ bool hasFastFMAF32() const {
+ return FastFMAF32;
+ }
+
+ bool hasHalfRate64Ops() const {
+ return HalfRate64Ops;
+ }
+
+ bool hasAddr64() const {
+ return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
+ }
+
+ bool hasFlat() const {
+ return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);
+ }
+
+ // Return true if the target only has the reverse operand versions of VALU
+ // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
+ bool hasOnlyRevVALUShifts() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ bool hasFractBug() const {
+ return getGeneration() == SOUTHERN_ISLANDS;
+ }
+
+ bool hasBFE() const {
+ return true;
+ }
+
+ bool hasBFI() const {
+ return true;
+ }
+
+ bool hasBFM() const {
+ return hasBFE();
+ }
+
+ bool hasBCNT(unsigned Size) const {
+ return true;
+ }
+
+ bool hasFFBL() const {
+ return true;
+ }
+
+ bool hasFFBH() const {
+ return true;
+ }
+
+ bool hasMed3_16() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasMin3Max3_16() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasFmaMixInsts() const {
+ return HasFmaMixInsts;
+ }
+
+ bool hasCARRY() const {
+ return true;
+ }
+
+ bool hasFMA() const {
+ return FMA;
+ }
+
+ bool hasSwap() const {
+ return GFX9Insts;
+ }
+
+ bool hasScalarPackInsts() const {
+ return GFX9Insts;
+ }
+
+ bool hasScalarMulHiInsts() const {
+ return GFX9Insts;
+ }
+
+ TrapHandlerAbi getTrapHandlerAbi() const {
+ return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
+ }
+
+ /// True if the offset field of DS instructions works as expected. On SI, the
+ /// offset uses a 16-bit adder and does not always wrap properly.
+ bool hasUsableDSOffset() const {
+ return getGeneration() >= SEA_ISLANDS;
+ }
+
+ bool unsafeDSOffsetFoldingEnabled() const {
+ return EnableUnsafeDSOffsetFolding;
+ }
+
+ /// Condition output from div_scale is usable.
+ bool hasUsableDivScaleConditionOutput() const {
+ return getGeneration() != SOUTHERN_ISLANDS;
+ }
+
+ /// Extra wait hazard is needed in some cases before
+ /// s_cbranch_vccnz/s_cbranch_vccz.
+ bool hasReadVCCZBug() const {
+ return getGeneration() <= SEA_ISLANDS;
+ }
+
+ /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
+ bool partialVCCWritesUpdateVCCZ() const {
+ return getGeneration() >= GFX10;
+ }
+
+ /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
+ /// was written by a VALU instruction.
+ bool hasSMRDReadVALUDefHazard() const {
+ return getGeneration() == SOUTHERN_ISLANDS;
+ }
+
+ /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
+ /// SGPR was written by a VALU Instruction.
+ bool hasVMEMReadSGPRVALUDefHazard() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ bool hasRFEHazards() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
+ unsigned getSetRegWaitStates() const {
+ return getGeneration() <= SEA_ISLANDS ? 1 : 2;
+ }
+
+ bool dumpCode() const {
+ return DumpCode;
+ }
+
+ /// Return the amount of LDS that can be used that will not restrict the
+ /// occupancy lower than WaveCount.
+ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
+ const Function &) const;
+
+ bool supportsMinMaxDenormModes() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ /// \returns If target supports S_DENORM_MODE.
+ bool hasDenormModeInst() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX10;
+ }
+
+ bool useFlatForGlobal() const {
+ return FlatForGlobal;
+ }
+
+ /// \returns If target supports ds_read/write_b128 and user enables generation
+ /// of ds_read/write_b128.
+ bool useDS128() const {
+ return CIInsts && EnableDS128;
+ }
+
+ /// \return If target supports ds_read/write_b96/128.
+ bool hasDS96AndDS128() const {
+ return CIInsts;
+ }
+
+ /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
+ bool haveRoundOpsF64() const {
+ return CIInsts;
+ }
+
+ /// \returns If MUBUF instructions always perform range checking, even for
+ /// buffer resources used for private memory access.
+ bool privateMemoryResourceIsRangeChecked() const {
+ return getGeneration() < AMDGPUSubtarget::GFX9;
+ }
+
+ /// \returns If target requires PRT Struct NULL support (zero result registers
+ /// for sparse texture support).
+ bool usePRTStrictNull() const {
+ return EnablePRTStrictNull;
+ }
+
+ bool hasAutoWaitcntBeforeBarrier() const {
+ return AutoWaitcntBeforeBarrier;
+ }
+
+ bool hasUnalignedBufferAccess() const {
+ return UnalignedBufferAccess;
+ }
+
+ bool hasUnalignedBufferAccessEnabled() const {
+ return UnalignedBufferAccess && UnalignedAccessMode;
+ }
+
+ bool hasUnalignedDSAccess() const {
+ return UnalignedDSAccess;
+ }
+
+ bool hasUnalignedDSAccessEnabled() const {
+ return UnalignedDSAccess && UnalignedAccessMode;
+ }
+
+ bool hasUnalignedScratchAccess() const {
+ return UnalignedScratchAccess;
+ }
+
+ bool hasUnalignedAccessMode() const {
+ return UnalignedAccessMode;
+ }
+
+ bool hasApertureRegs() const {
+ return HasApertureRegs;
+ }
+
+ bool isTrapHandlerEnabled() const {
+ return TrapHandler;
+ }
+
+ bool isXNACKEnabled() const {
+ return TargetID.isXnackOnOrAny();
+ }
+
+ bool isCuModeEnabled() const {
+ return EnableCuMode;
+ }
+
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
+
+ bool hasFlatScrRegister() const {
+ return hasFlatAddressSpace();
+ }
+
+ bool hasFlatInstOffsets() const {
+ return FlatInstOffsets;
+ }
+
+ bool hasFlatGlobalInsts() const {
+ return FlatGlobalInsts;
+ }
+
+ bool hasFlatScratchInsts() const {
+ return FlatScratchInsts;
+ }
+
+ // Check if target supports ST addressing mode with FLAT scratch instructions.
+ // The ST addressing mode means no registers are used, either VGPR or SGPR,
+ // but only immediate offset is swizzled and added to the FLAT scratch base.
+ bool hasFlatScratchSTMode() const {
+ return hasFlatScratchInsts() && hasGFX10_3Insts();
+ }
+
+ bool hasScalarFlatScratchInsts() const {
+ return ScalarFlatScratchInsts;
+ }
+
+ bool hasGlobalAddTidInsts() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasAtomicCSub() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasMultiDwordFlatScratchAddressing() const {
+ return getGeneration() >= GFX9;
+ }
+
+ bool hasFlatSegmentOffsetBug() const {
+ return HasFlatSegmentOffsetBug;
+ }
+
+ bool hasFlatLgkmVMemCountInOrder() const {
+ return getGeneration() > GFX9;
+ }
+
+ bool hasD16LoadStore() const {
+ return getGeneration() >= GFX9;
+ }
+
+ bool d16PreservesUnusedBits() const {
+ return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
+ }
+
+ bool hasD16Images() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ /// Return if most LDS instructions have an m0 use that require m0 to be
+ /// iniitalized.
+ bool ldsRequiresM0Init() const {
+ return getGeneration() < GFX9;
+ }
+
+ // True if the hardware rewinds and replays GWS operations if a wave is
+ // preempted.
+ //
+ // If this is false, a GWS operation requires testing if a nack set the
+ // MEM_VIOL bit, and repeating if so.
+ bool hasGWSAutoReplay() const {
+ return getGeneration() >= GFX9;
+ }
+
+ /// \returns if target has ds_gws_sema_release_all instruction.
+ bool hasGWSSemaReleaseAll() const {
+ return CIInsts;
+ }
+
+ /// \returns true if the target has integer add/sub instructions that do not
+ /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
+ /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
+ /// for saturation.
+ bool hasAddNoCarry() const {
+ return AddNoCarryInsts;
+ }
+
+ bool hasUnpackedD16VMem() const {
+ return HasUnpackedD16VMem;
+ }
+
+ // Covers VS/PS/CS graphics shaders
+ bool isMesaGfxShader(const Function &F) const {
+ return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
+ }
+
+ bool hasMad64_32() const {
+ return getGeneration() >= SEA_ISLANDS;
+ }
+
+ bool hasSDWAOmod() const {
+ return HasSDWAOmod;
+ }
+
+ bool hasSDWAScalar() const {
+ return HasSDWAScalar;
+ }
+
+ bool hasSDWASdst() const {
+ return HasSDWASdst;
+ }
+
+ bool hasSDWAMac() const {
+ return HasSDWAMac;
+ }
+
+ bool hasSDWAOutModsVOPC() const {
+ return HasSDWAOutModsVOPC;
+ }
+
+ bool hasDLInsts() const {
+ return HasDLInsts;
+ }
+
+ bool hasDot1Insts() const {
+ return HasDot1Insts;
+ }
+
+ bool hasDot2Insts() const {
+ return HasDot2Insts;
+ }
+
+ bool hasDot3Insts() const {
+ return HasDot3Insts;
+ }
+
+ bool hasDot4Insts() const {
+ return HasDot4Insts;
+ }
+
+ bool hasDot5Insts() const {
+ return HasDot5Insts;
+ }
+
+ bool hasDot6Insts() const {
+ return HasDot6Insts;
+ }
+
+ bool hasMAIInsts() const {
+ return HasMAIInsts;
+ }
+
+ bool hasPkFmacF16Inst() const {
+ return HasPkFmacF16Inst;
+ }
+
+ bool hasAtomicFaddInsts() const {
+ return HasAtomicFaddInsts;
+ }
+
+ bool hasNoSdstCMPX() const {
+ return HasNoSdstCMPX;
+ }
+
+ bool hasVscnt() const {
+ return HasVscnt;
+ }
+
+ bool hasGetWaveIdInst() const {
+ return HasGetWaveIdInst;
+ }
+
+ bool hasSMemTimeInst() const {
+ return HasSMemTimeInst;
+ }
+
+ bool hasRegisterBanking() const {
+ return HasRegisterBanking;
+ }
+
+ bool hasVOP3Literal() const {
+ return HasVOP3Literal;
+ }
+
+ bool hasNoDataDepHazard() const {
+ return HasNoDataDepHazard;
+ }
+
+ bool vmemWriteNeedsExpWaitcnt() const {
+ return getGeneration() < SEA_ISLANDS;
+ }
+
+ // Scratch is allocated in 256 dword per wave blocks for the entire
+ // wavefront. When viewed from the perspecive of an arbitrary workitem, this
+ // is 4-byte aligned.
+ //
+ // Only 4-byte alignment is really needed to access anything. Transformations
+ // on the pointer value itself may rely on the alignment / known low bits of
+ // the pointer. Set this to something above the minimum to avoid needing
+ // dynamic realignment in common cases.
+ Align getStackAlignment() const { return Align(16); }
+
+ bool enableMachineScheduler() const override {
+ return true;
+ }
+
+ bool useAA() const override;
+
+ bool enableSubRegLiveness() const override {
+ return true;
+ }
+
+ void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
+ bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
+
+ // static wrappers
+ static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
+
+ // XXX - Why is this here if it isn't in the default pass set?
+ bool enableEarlyIfConversion() const override {
+ return true;
+ }
+
+ bool enableFlatScratch() const;
+
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const override;
+
+ unsigned getMaxNumUserSGPRs() const {
+ return 16;
+ }
+
+ bool hasSMemRealTime() const {
+ return HasSMemRealTime;
+ }
+
+ bool hasMovrel() const {
+ return HasMovrel;
+ }
+
+ bool hasVGPRIndexMode() const {
+ return HasVGPRIndexMode;
+ }
+
+ bool useVGPRIndexMode() const;
+
+ bool hasScalarCompareEq64() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ bool hasScalarStores() const {
+ return HasScalarStores;
+ }
+
+ bool hasScalarAtomics() const {
+ return HasScalarAtomics;
+ }
+
+ bool hasLDSFPAtomics() const {
+ return GFX8Insts;
+ }
+
+ bool hasDPP() const {
+ return HasDPP;
+ }
+
+ bool hasDPPBroadcasts() const {
+ return HasDPP && getGeneration() < GFX10;
+ }
+
+ bool hasDPPWavefrontShifts() const {
+ return HasDPP && getGeneration() < GFX10;
+ }
+
+ bool hasDPP8() const {
+ return HasDPP8;
+ }
+
+ bool hasR128A16() const {
+ return HasR128A16;
+ }
+
+ bool hasGFX10A16() const {
+ return HasGFX10A16;
+ }
+
+ bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
+
+ bool hasG16() const { return HasG16; }
+
+ bool hasOffset3fBug() const {
+ return HasOffset3fBug;
+ }
+
+ bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
+
+ bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
+
+ bool hasNSAEncoding() const { return HasNSAEncoding; }
+
+ bool hasGFX10_BEncoding() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasGFX10_3Insts() const {
+ return GFX10_3Insts;
+ }
+
+ bool hasMadF16() const;
+
+ bool enableSIScheduler() const {
+ return EnableSIScheduler;
+ }
+
+ bool loadStoreOptEnabled() const {
+ return EnableLoadStoreOpt;
+ }
+
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
+
+ bool hasMFMAInlineLiteralBug() const {
+ return HasMFMAInlineLiteralBug;
+ }
+
+ bool has12DWordStoreHazard() const {
+ return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
+ }
+
+ // \returns true if the subtarget supports DWORDX3 load/store instructions.
+ bool hasDwordx3LoadStores() const {
+ return CIInsts;
+ }
+
+ bool hasReadM0MovRelInterpHazard() const {
+ return getGeneration() == AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasReadM0SendMsgHazard() const {
+ return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+ getGeneration() <= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasVcmpxPermlaneHazard() const {
+ return HasVcmpxPermlaneHazard;
+ }
+
+ bool hasVMEMtoScalarWriteHazard() const {
+ return HasVMEMtoScalarWriteHazard;
+ }
+
+ bool hasSMEMtoVectorWriteHazard() const {
+ return HasSMEMtoVectorWriteHazard;
+ }
+
+ bool hasLDSMisalignedBug() const {
+ return LDSMisalignedBug && !EnableCuMode;
+ }
+
+ bool hasInstFwdPrefetchBug() const {
+ return HasInstFwdPrefetchBug;
+ }
+
+ bool hasVcmpxExecWARHazard() const {
+ return HasVcmpxExecWARHazard;
+ }
+
+ bool hasLdsBranchVmemWARHazard() const {
+ return HasLdsBranchVmemWARHazard;
+ }
+
+ bool hasNSAtoVMEMBug() const {
+ return HasNSAtoVMEMBug;
+ }
+
+ bool hasHardClauses() const { return getGeneration() >= GFX10; }
+
+ /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
+ /// SGPRs
+ unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
+
+ /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
+ /// VGPRs
+ unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
+
+ /// Return occupancy for the given function. Used LDS and a number of
+ /// registers if provided.
+ /// Note, occupancy can be affected by the scratch allocation as well, but
+ /// we do not have enough information to compute it.
+ unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
+ unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
+
+ /// \returns true if the flat_scratch register should be initialized with the
+ /// pointer to the wave's scratch memory rather than a size and offset.
+ bool flatScratchIsPointer() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ /// \returns true if the machine has merged shaders in which s0-s7 are
+ /// reserved by the hardware and user SGPRs start at s8
+ bool hasMergedShaders() const {
+ return getGeneration() >= GFX9;
+ }
+
+ /// \returns SGPR allocation granularity supported by the subtarget.
+ unsigned getSGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
+ }
+
+ /// \returns SGPR encoding granularity supported by the subtarget.
+ unsigned getSGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
+ }
+
+ /// \returns Total number of SGPRs supported by the subtarget.
+ unsigned getTotalNumSGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
+ }
+
+ /// \returns Addressable number of SGPRs supported by the subtarget.
+ unsigned getAddressableNumSGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
+ }
+
+ /// \returns Minimum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
+ }
+
+ /// \returns Maximum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
+ }
+
+ /// \returns Reserved number of SGPRs for given function \p MF.
+ unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns Maximum number of SGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of SGPRs explicitly
+ /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns VGPR allocation granularity supported by the subtarget.
+ unsigned getVGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
+ }
+
+ /// \returns VGPR encoding granularity supported by the subtarget.
+ unsigned getVGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
+ }
+
+ /// \returns Total number of VGPRs supported by the subtarget.
+ unsigned getTotalNumVGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
+ }
+
+ /// \returns Addressable number of VGPRs supported by the subtarget.
+ unsigned getAddressableNumVGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
+ }
+
+ /// \returns Minimum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
+ }
+
+ /// \returns Maximum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
+ }
+
+ /// \returns Maximum number of VGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of VGPRs explicitly
+ /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
+
+ void getPostRAMutations(
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
+ const override;
+
+ bool isWave32() const {
+ return getWavefrontSize() == 32;
+ }
+
+ bool isWave64() const {
+ return getWavefrontSize() == 64;
+ }
+
+ const TargetRegisterClass *getBoolRC() const {
+ return getRegisterInfo()->getBoolRC();
+ }
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Number of waves per execution unit required to support the given
+ /// \p FlatWorkGroupSize.
+ unsigned
+ getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
+
+ void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
+ SDep &Dep) const override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineTables.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/InstCombineTables.td
index 98b2adc442fa..98b2adc442fa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineTables.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/InstCombineTables.td
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index ea6e9038fd1e..dd0db6c7b655 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -9,17 +9,14 @@
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/ELF.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCValue.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/TargetRegistry.h"
-#include "Utils/AMDGPUBaseInfo.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -61,7 +58,6 @@ void AMDGPUAsmBackend::relaxInstruction(MCInst &Inst,
Res.setOpcode(RelaxedOpcode);
Res.addOperand(Inst.getOperand(0));
Inst = std::move(Res);
- return;
}
bool AMDGPUAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
@@ -237,7 +233,6 @@ MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
- // Use 64-bit ELF for amdgcn
return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple(),
- IsaInfo::hasCodeObjectV3(&STI) ? 1 : 0);
+ getHsaAbiVersion(&STI).getValueOr(0));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 619fde74e88d..426648d19d55 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -8,15 +8,9 @@
#include "AMDGPUFixupKinds.h"
#include "AMDGPUMCTargetDesc.h"
-#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
index 40437d8fa1a4..1ce7012040da 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
@@ -7,10 +7,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUELFStreamer.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCObjectWriter.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
index 9fbf53c944ef..b56f75132135 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
@@ -14,13 +14,15 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H
-#include "llvm/MC/MCELFStreamer.h"
-
+#include <memory>
namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCSubtargetInfo;
+class MCELFStreamer;
+class Triple;
+class MCObjectWriter;
MCELFStreamer *createAMDGPUELFStreamer(const Triple &T, MCContext &Context,
std::unique_ptr<MCAsmBackend> MAB,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index fe063d33ea3e..fbf7dc2a72db 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -16,13 +16,9 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -136,7 +132,7 @@ void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
- O << ((OpNo == 0)? "offset:" : " offset:");
+ O << " offset:";
printU16ImmDecOperand(MI, OpNo, O);
}
}
@@ -146,15 +142,16 @@ void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
- O << ((OpNo == 0)? "offset:" : " offset:");
+ O << " offset:";
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- bool IsFlatSeg = !(Desc.TSFlags & SIInstrFlags::IsNonFlatSeg);
+ bool IsFlatSeg = !(Desc.TSFlags &
+ (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch));
if (IsFlatSeg) { // Unsigned offset
printU16ImmDecOperand(MI, OpNo, O);
} else { // Signed offset
- if (AMDGPU::isGFX10(STI)) {
+ if (AMDGPU::isGFX10Plus(STI)) {
O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
} else {
O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
@@ -206,7 +203,7 @@ void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
- if (AMDGPU::isGFX10(STI))
+ if (AMDGPU::isGFX10Plus(STI))
printNamedBit(MI, OpNo, O, "dlc");
}
@@ -285,26 +282,58 @@ void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " compr";
+ printNamedBit(MI, OpNo, O, "compr");
}
void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " vm";
+ printNamedBit(MI, OpNo, O, "vm");
}
void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (unsigned Val = MI->getOperand(OpNo).getImm()) {
- if (AMDGPU::isGFX10(STI))
+}
+
+void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ using namespace llvm::AMDGPU::MTBUFFormat;
+
+ int OpNo =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::format);
+ assert(OpNo != -1);
+
+ unsigned Val = MI->getOperand(OpNo).getImm();
+ if (AMDGPU::isGFX10Plus(STI)) {
+ if (Val == UFMT_DEFAULT)
+ return;
+ if (isValidUnifiedFormat(Val)) {
+ O << " format:[" << getUnifiedFormatName(Val) << ']';
+ } else {
+ O << " format:" << Val;
+ }
+ } else {
+ if (Val == DFMT_NFMT_DEFAULT)
+ return;
+ if (isValidDfmtNfmt(Val, STI)) {
+ unsigned Dfmt;
+ unsigned Nfmt;
+ decodeDfmtNfmt(Val, Dfmt, Nfmt);
+ O << " format:[";
+ if (Dfmt != DFMT_DEFAULT) {
+ O << getDfmtName(Dfmt);
+ if (Nfmt != NFMT_DEFAULT) {
+ O << ',';
+ }
+ }
+ if (Nfmt != NFMT_DEFAULT) {
+ O << getNfmtName(Nfmt, STI);
+ }
+ O << ']';
+ } else {
O << " format:" << Val;
- else {
- O << " dfmt:" << (Val & 15);
- O << ", nfmt:" << (Val >> 4);
}
}
}
@@ -382,10 +411,12 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
- if (isInlinableIntLiteral(SImm))
+ if (isInlinableIntLiteral(SImm)) {
O << SImm;
- else
- O << formatHex(static_cast<uint64_t>(Imm));
+ } else {
+ uint64_t Imm16 = static_cast<uint16_t>(Imm);
+ O << formatHex(Imm16);
+ }
}
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
@@ -413,11 +444,13 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
O<< "4.0";
else if (Imm == 0xC400)
O<< "-4.0";
- else if (Imm == 0x3118) {
- assert(STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]);
+ else if (Imm == 0x3118 &&
+ STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) {
O << "0.15915494";
- } else
- O << formatHex(static_cast<uint64_t>(Imm));
+ } else {
+ uint64_t Imm16 = static_cast<uint16_t>(Imm);
+ O << formatHex(Imm16);
+ }
}
void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
@@ -669,6 +702,14 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printDefaultVccOperand(OpNo, STI, O);
break;
}
+
+ if (Desc.TSFlags & SIInstrFlags::MTBUF) {
+ int SOffsetIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::soffset);
+ assert(SOffsetIdx != -1);
+ if ((int)OpNo == SOffsetIdx)
+ printSymbolicFormat(MI, STI, O);
+ }
}
void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
@@ -735,11 +776,11 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (!AMDGPU::isGFX10(STI))
+ if (!AMDGPU::isGFX10Plus(STI))
llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
unsigned Imm = MI->getOperand(OpNo).getImm();
- O << " dpp8:[" << formatDec(Imm & 0x7);
+ O << "dpp8:[" << formatDec(Imm & 0x7);
for (size_t i = 1; i < 8; ++i) {
O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
}
@@ -753,81 +794,81 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
unsigned Imm = MI->getOperand(OpNo).getImm();
if (Imm <= DppCtrl::QUAD_PERM_LAST) {
- O << " quad_perm:[";
+ O << "quad_perm:[";
O << formatDec(Imm & 0x3) << ',';
O << formatDec((Imm & 0xc) >> 2) << ',';
O << formatDec((Imm & 0x30) >> 4) << ',';
O << formatDec((Imm & 0xc0) >> 6) << ']';
} else if ((Imm >= DppCtrl::ROW_SHL_FIRST) &&
(Imm <= DppCtrl::ROW_SHL_LAST)) {
- O << " row_shl:";
+ O << "row_shl:";
printU4ImmDecOperand(MI, OpNo, O);
} else if ((Imm >= DppCtrl::ROW_SHR_FIRST) &&
(Imm <= DppCtrl::ROW_SHR_LAST)) {
- O << " row_shr:";
+ O << "row_shr:";
printU4ImmDecOperand(MI, OpNo, O);
} else if ((Imm >= DppCtrl::ROW_ROR_FIRST) &&
(Imm <= DppCtrl::ROW_ROR_LAST)) {
- O << " row_ror:";
+ O << "row_ror:";
printU4ImmDecOperand(MI, OpNo, O);
} else if (Imm == DppCtrl::WAVE_SHL1) {
- if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
- O << " /* wave_shl is not supported starting from GFX10 */";
+ if (AMDGPU::isGFX10Plus(STI)) {
+ O << "/* wave_shl is not supported starting from GFX10 */";
return;
}
- O << " wave_shl:1";
+ O << "wave_shl:1";
} else if (Imm == DppCtrl::WAVE_ROL1) {
- if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
- O << " /* wave_rol is not supported starting from GFX10 */";
+ if (AMDGPU::isGFX10Plus(STI)) {
+ O << "/* wave_rol is not supported starting from GFX10 */";
return;
}
- O << " wave_rol:1";
+ O << "wave_rol:1";
} else if (Imm == DppCtrl::WAVE_SHR1) {
- if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
- O << " /* wave_shr is not supported starting from GFX10 */";
+ if (AMDGPU::isGFX10Plus(STI)) {
+ O << "/* wave_shr is not supported starting from GFX10 */";
return;
}
- O << " wave_shr:1";
+ O << "wave_shr:1";
} else if (Imm == DppCtrl::WAVE_ROR1) {
- if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
- O << " /* wave_ror is not supported starting from GFX10 */";
+ if (AMDGPU::isGFX10Plus(STI)) {
+ O << "/* wave_ror is not supported starting from GFX10 */";
return;
}
- O << " wave_ror:1";
+ O << "wave_ror:1";
} else if (Imm == DppCtrl::ROW_MIRROR) {
- O << " row_mirror";
+ O << "row_mirror";
} else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
- O << " row_half_mirror";
+ O << "row_half_mirror";
} else if (Imm == DppCtrl::BCAST15) {
- if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
- O << " /* row_bcast is not supported starting from GFX10 */";
+ if (AMDGPU::isGFX10Plus(STI)) {
+ O << "/* row_bcast is not supported starting from GFX10 */";
return;
}
- O << " row_bcast:15";
+ O << "row_bcast:15";
} else if (Imm == DppCtrl::BCAST31) {
- if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
- O << " /* row_bcast is not supported starting from GFX10 */";
+ if (AMDGPU::isGFX10Plus(STI)) {
+ O << "/* row_bcast is not supported starting from GFX10 */";
return;
}
- O << " row_bcast:31";
+ O << "row_bcast:31";
} else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
(Imm <= DppCtrl::ROW_SHARE_LAST)) {
- if (!AMDGPU::isGFX10(STI)) {
- O << " /* row_share is not supported on ASICs earlier than GFX10 */";
+ if (!AMDGPU::isGFX10Plus(STI)) {
+ O << "/* row_share is not supported on ASICs earlier than GFX10 */";
return;
}
- O << " row_share:";
+ O << "row_share:";
printU4ImmDecOperand(MI, OpNo, O);
} else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
(Imm <= DppCtrl::ROW_XMASK_LAST)) {
- if (!AMDGPU::isGFX10(STI)) {
- O << " /* row_xmask is not supported on ASICs earlier than GFX10 */";
+ if (!AMDGPU::isGFX10Plus(STI)) {
+ O << "/* row_xmask is not supported on ASICs earlier than GFX10 */";
return;
}
O << "row_xmask:";
printU4ImmDecOperand(MI, OpNo, O);
} else {
- O << " /* Invalid dpp_ctrl value */";
+ O << "/* Invalid dpp_ctrl value */";
}
}
@@ -917,10 +958,9 @@ void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
}
}
-template <unsigned N>
void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O,
+ unsigned N) {
unsigned Opc = MI->getOpcode();
int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
unsigned En = MI->getOperand(EnIdx).getImm();
@@ -928,12 +968,8 @@ void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
// If compr is set, print as src0, src0, src1, src1
- if (MI->getOperand(ComprIdx).getImm()) {
- if (N == 1 || N == 2)
- --OpNo;
- else if (N == 3)
- OpNo -= 2;
- }
+ if (MI->getOperand(ComprIdx).getImm())
+ OpNo = OpNo - N + N / 2;
if (En & (1 << N))
printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
@@ -944,48 +980,43 @@ void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- printExpSrcN<0>(MI, OpNo, STI, O);
+ printExpSrcN(MI, OpNo, STI, O, 0);
}
void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- printExpSrcN<1>(MI, OpNo, STI, O);
+ printExpSrcN(MI, OpNo, STI, O, 1);
}
void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- printExpSrcN<2>(MI, OpNo, STI, O);
+ printExpSrcN(MI, OpNo, STI, O, 2);
}
void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- printExpSrcN<3>(MI, OpNo, STI, O);
+ printExpSrcN(MI, OpNo, STI, O, 3);
}
void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ using namespace llvm::AMDGPU::Exp;
+
// This is really a 6 bit field.
- uint32_t Tgt = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
-
- if (Tgt <= 7)
- O << " mrt" << Tgt;
- else if (Tgt == 8)
- O << " mrtz";
- else if (Tgt == 9)
- O << " null";
- else if ((Tgt >= 12 && Tgt <= 15) || (Tgt == 16 && AMDGPU::isGFX10(STI)))
- O << " pos" << Tgt - 12;
- else if (AMDGPU::isGFX10(STI) && Tgt == 20)
- O << " prim";
- else if (Tgt >= 32 && Tgt <= 63)
- O << " param" << Tgt - 32;
- else {
- // Reserved values 10, 11
- O << " invalid_target_" << Tgt;
+ unsigned Id = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
+
+ int Index;
+ StringRef TgtName;
+ if (getTgtName(Id, TgtName, Index) && isSupportedTgtId(Id, STI)) {
+ O << ' ' << TgtName;
+ if (Index >= 0)
+ O << Index;
+ } else {
+ O << " invalid_target_" << Id;
}
}
@@ -1124,9 +1155,9 @@ void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
unsigned Val = MI->getOperand(OpNo).getImm();
if ((Val & ~ENABLE_MASK) != 0) {
- O << " " << formatHex(static_cast<uint64_t>(Val));
+ O << formatHex(static_cast<uint64_t>(Val));
} else {
- O << " gpr_idx(";
+ O << "gpr_idx(";
bool NeedComma = false;
for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
if (Val & (1 << ModeId)) {
@@ -1171,15 +1202,13 @@ void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " high";
+ printNamedBit(MI, OpNo, O, "high");
}
void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " clamp";
+ printNamedBit(MI, OpNo, O, "clamp");
}
void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 6dfd23ea72e6..8d13aa682211 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -24,6 +24,7 @@ public:
//Autogenerated by tblgen
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
@@ -99,6 +100,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printFORMAT(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSymbolicFormat(const MCInst *MI,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -109,8 +112,6 @@ private:
raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printImmediateIntV216(uint32_t Imm, const MCSubtargetInfo &STI,
- raw_ostream &O);
void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
@@ -178,10 +179,8 @@ private:
void printDefaultVccOperand(unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
-
- template <unsigned N>
- void printExpSrcN(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O, unsigned N);
void printExpSrc0(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printExpSrc1(const MCInst *MI, unsigned OpNo,
@@ -253,6 +252,7 @@ public:
void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
const MCSubtargetInfo &STI, raw_ostream &O) override;
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 687cfef4559f..1836237c8df5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -40,7 +40,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = false;
HasNoDeadStrip = true;
- WeakRefDirective = ".weakref\t";
//===--- Dwarf Emission Directives -----------------------------------===//
SupportsDebugInformation = true;
DwarfRegNumForCFI = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index d7d8c8181b02..1a7ca7e1a330 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 7d3235efc59e..34b2cd1fc1e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -20,15 +20,15 @@
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -74,8 +74,8 @@ MCRegisterInfo *llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour) {
static MCSubtargetInfo *
createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
if (TT.getArch() == Triple::r600)
- return createR600MCSubtargetInfoImpl(TT, CPU, FS);
- return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS);
+ return createR600MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+ return createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index b9cdbc6502e5..71b44a509108 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -15,8 +15,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCTARGETDESC_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCTARGETDESC_H
-#include "llvm/Support/DataTypes.h"
-
#include <memory>
namespace llvm {
@@ -33,7 +31,7 @@ class Target;
class Triple;
class raw_pwrite_stream;
-enum AMDGPUDwarfFlavour { Wave64 = 0, Wave32 = 1 };
+enum AMDGPUDwarfFlavour : unsigned { Wave64 = 0, Wave32 = 1 };
MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour);
@@ -58,34 +56,24 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
-#undef GET_REGINFO_ENUM
#define GET_REGINFO_ENUM
#include "R600GenRegisterInfo.inc"
-#undef GET_REGINFO_ENUM
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
#define GET_INSTRINFO_SCHED_ENUM
#include "AMDGPUGenInstrInfo.inc"
-#undef GET_INSTRINFO_SCHED_ENUM
-#undef GET_INSTRINFO_OPERAND_ENUM
-#undef GET_INSTRINFO_ENUM
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
#define GET_INSTRINFO_SCHED_ENUM
#include "R600GenInstrInfo.inc"
-#undef GET_INSTRINFO_SCHED_ENUM
-#undef GET_INSTRINFO_OPERAND_ENUM
-#undef GET_INSTRINFO_ENUM
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
-#undef GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_ENUM
#include "R600GenSubtargetInfo.inc"
-#undef GET_SUBTARGETINFO_ENUM
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 3d202d7960d6..f0eb11b70c97 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,31 +11,21 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetStreamer.h"
-#include "AMDGPU.h"
-#include "SIDefines.h"
+#include "AMDGPUPTNote.h"
+#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetParser.h"
-
-namespace llvm {
-#include "AMDGPUPTNote.h"
-}
using namespace llvm;
using namespace llvm::AMDGPU;
-using namespace llvm::AMDGPU::HSAMD;
//===----------------------------------------------------------------------===//
// AMDGPUTargetStreamer
@@ -43,9 +33,8 @@ using namespace llvm::AMDGPU::HSAMD;
bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
- if (HSAMD::fromString(std::string(HSAMetadataString), HSAMetadata))
+ if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
return false;
-
return EmitHSAMetadata(HSAMetadata);
}
@@ -79,14 +68,17 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
@@ -94,10 +86,14 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
@@ -131,14 +127,17 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
+ case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
+ case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
+ case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
@@ -146,10 +145,14 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
+ case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
+ case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
+ case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
+ case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
@@ -166,10 +169,15 @@ AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
// A hook for emitting stuff at the end.
// We use it for emitting the accumulated PAL metadata as directives.
+// The PAL metadata is reset after it is emitted.
void AMDGPUTargetAsmStreamer::finish() {
std::string S;
getPALMetadata()->toString(S);
OS << S;
+
+ // Reset the pal metadata so its data will not affect a compilation that
+ // reuses this object.
+ getPALMetadata()->reset();
}
void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
@@ -228,15 +236,15 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
if (HSAMD::toString(HSAMetadata, HSAMetadataString))
return false;
- OS << '\t' << AssemblerDirectiveBegin << '\n';
+ OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
OS << HSAMetadataString << '\n';
- OS << '\t' << AssemblerDirectiveEnd << '\n';
+ OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
return true;
}
bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
msgpack::Document &HSAMetadataDoc, bool Strict) {
- V3::MetadataVerifier Verifier(Strict);
+ HSAMD::V3::MetadataVerifier Verifier(Strict);
if (!Verifier.verify(HSAMetadataDoc.getRoot()))
return false;
@@ -244,9 +252,9 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
raw_string_ostream StrOS(HSAMetadataString);
HSAMetadataDoc.toYAML(StrOS);
- OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
+ OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
OS << StrOS.str() << '\n';
- OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
+ OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
return true;
}
@@ -302,7 +310,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(
OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
@@ -421,6 +429,7 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
// A hook for emitting stuff at the end.
// We use it for emitting the accumulated PAL metadata as a .note record.
+// The PAL metadata is reset after it is emitted.
void AMDGPUTargetELFStreamer::finish() {
std::string Blob;
const char *Vendor = getPALMetadata()->getVendor();
@@ -430,6 +439,10 @@ void AMDGPUTargetELFStreamer::finish() {
return;
EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
[&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
+
+ // Reset the pal metadata so its data will not affect a compilation that
+ // reuses this object.
+ getPALMetadata()->reset();
}
void AMDGPUTargetELFStreamer::EmitNote(
@@ -554,7 +567,7 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
bool Strict) {
- V3::MetadataVerifier Verifier(Strict);
+ HSAMD::V3::MetadataVerifier Verifier(Strict);
if (!Verifier.verify(HSAMetadataDoc.getRoot()))
return false;
@@ -644,9 +657,10 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
Streamer.emitLabel(KernelDescriptorSymbol);
- Streamer.emitBytes(StringRef(
- (const char*)&(KernelDescriptor),
- offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
+ Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
+ Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
+ for (uint8_t Res : KernelDescriptor.reserved0)
+ Streamer.emitInt8(Res);
// FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
// expression being created is:
// (start of kernel code) - (start of kernel descriptor)
@@ -658,11 +672,12 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
Context),
sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
- Streamer.emitBytes(StringRef(
- (const char*)&(KernelDescriptor) +
- offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
- sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
- sizeof(KernelDescriptor) -
- offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) -
- sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));
+ for (uint8_t Res : KernelDescriptor.reserved1)
+ Streamer.emitInt8(Res);
+ Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3);
+ Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
+ Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
+ Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
+ for (uint8_t Res : KernelDescriptor.reserved2)
+ Streamer.emitInt8(Res);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index a19d4646deb2..1ad64532931c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -9,16 +9,12 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
-#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUPALMetadata.h"
-#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/AMDGPUMetadata.h"
-#include "llvm/Support/AMDHSAKernelDescriptor.h"
+
+struct amd_kernel_code_t;
namespace llvm {
-#include "AMDGPUPTNote.h"
class DataLayout;
class Function;
@@ -28,6 +24,16 @@ class MDNode;
class Module;
class Type;
+namespace AMDGPU {
+namespace HSAMD {
+struct Metadata;
+}
+} // namespace AMDGPU
+
+namespace amdhsa {
+struct kernel_descriptor_t;
+}
+
class AMDGPUTargetStreamer : public MCTargetStreamer {
AMDGPUPALMetadata PALMetadata;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index f61470573050..bbca8cbb742c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -13,22 +13,15 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600Defines.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstdint>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 2cd6c3a81d2b..1a1ffcda3b4e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -12,29 +12,15 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstdint>
-#include <cstdlib>
using namespace llvm;
@@ -303,7 +289,7 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
// NSA encoding.
- if (AMDGPU::isGFX10(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
+ if (AMDGPU::isGFX10Plus(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vaddr0);
int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 2bfc2d579533..54c8cdf196ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -51,7 +51,7 @@ def MIMGBaseOpcodesTable : GenericTable {
let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
"Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
"LodOrClampOrMip", "HasD16"];
- GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
+ string TypeOf_BaseOpcode = "MIMGBaseOpcode";
let PrimaryKey = ["BaseOpcode"];
let PrimaryKeyName = "getMIMGBaseOpcodeInfo";
@@ -65,7 +65,7 @@ def MIMGDimInfoTable : GenericTable {
let FilterClass = "AMDGPUDimProps";
let CppTypeName = "MIMGDimInfo";
let Fields = ["Dim", "NumCoords", "NumGradients", "DA", "Encoding", "AsmSuffix"];
- GenericEnum TypeOf_Dim = MIMGDim;
+ string TypeOf_Dim = "MIMGDim";
let PrimaryKey = ["Dim"];
let PrimaryKeyName = "getMIMGDimInfo";
@@ -95,8 +95,8 @@ def MIMGLZMappingTable : GenericTable {
let FilterClass = "MIMGLZMapping";
let CppTypeName = "MIMGLZMappingInfo";
let Fields = ["L", "LZ"];
- GenericEnum TypeOf_L = MIMGBaseOpcode;
- GenericEnum TypeOf_LZ = MIMGBaseOpcode;
+ string TypeOf_L = "MIMGBaseOpcode";
+ string TypeOf_LZ = "MIMGBaseOpcode";
let PrimaryKey = ["L"];
let PrimaryKeyName = "getMIMGLZMappingInfo";
@@ -111,8 +111,8 @@ def MIMGMIPMappingTable : GenericTable {
let FilterClass = "MIMGMIPMapping";
let CppTypeName = "MIMGMIPMappingInfo";
let Fields = ["MIP", "NONMIP"];
- GenericEnum TypeOf_MIP = MIMGBaseOpcode;
- GenericEnum TypeOf_NONMIP = MIMGBaseOpcode;
+ string TypeOf_MIP = "MIMGBaseOpcode";
+ string TypeOf_NONMIP = "MIMGBaseOpcode";
let PrimaryKey = ["MIP"];
let PrimaryKeyName = "getMIMGMIPMappingInfo";
@@ -127,8 +127,8 @@ def MIMGG16MappingTable : GenericTable {
let FilterClass = "MIMGG16Mapping";
let CppTypeName = "MIMGG16MappingInfo";
let Fields = ["G", "G16"];
- GenericEnum TypeOf_G = MIMGBaseOpcode;
- GenericEnum TypeOf_G16 = MIMGBaseOpcode;
+ string TypeOf_G = "MIMGBaseOpcode";
+ string TypeOf_G16 = "MIMGBaseOpcode";
let PrimaryKey = ["G"];
let PrimaryKeyName = "getMIMGG16MappingInfo";
@@ -148,7 +148,7 @@ class MIMG_Base <dag outs, string dns = "">
let hasSideEffects = 0; // XXX ????
let DecoderNamespace = dns;
- let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
+ let isAsmParserOnly = !eq(dns, "");
}
class MIMG <dag outs, string dns = "">
@@ -168,8 +168,8 @@ def MIMGInfoTable : GenericTable {
let FilterClass = "MIMG";
let CppTypeName = "MIMGInfo";
let Fields = ["Opcode", "BaseOpcode", "MIMGEncoding", "VDataDwords", "VAddrDwords"];
- GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
- GenericEnum TypeOf_MIMGEncoding = MIMGEncoding;
+ string TypeOf_BaseOpcode = "MIMGBaseOpcode";
+ string TypeOf_MIMGEncoding = "MIMGEncoding";
let PrimaryKey = ["BaseOpcode", "MIMGEncoding", "VDataDwords", "VAddrDwords"];
let PrimaryKeyName = "getMIMGOpcodeHelper";
@@ -180,14 +180,14 @@ def getMIMGInfo : SearchIndex {
let Key = ["Opcode"];
}
-// This is a separate class so that TableGen memoizes the computations.
+// This class used to use !foldl to memoize the AddrAsmNames list.
+// It turned out that that was much slower than using !filter.
class MIMGNSAHelper<int num_addrs> {
list<string> AddrAsmNames =
- !foldl([]<string>, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], lhs, i,
- !if(!lt(i, num_addrs), !listconcat(lhs, ["vaddr"#!size(lhs)]), lhs));
+ !foreach(i, !filter(i, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
+ !lt(i, num_addrs)), "vaddr" # i);
dag AddrIns = !dag(ins, !foreach(arg, AddrAsmNames, VGPR_32), AddrAsmNames);
- string AddrAsm = "[" # !foldl("$" # !head(AddrAsmNames), !tail(AddrAsmNames), lhs, rhs,
- lhs # ", $" # rhs) # "]";
+ string AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]";
int NSA = !if(!le(num_addrs, 1), ?,
!if(!le(num_addrs, 5), 1,
@@ -308,13 +308,13 @@ multiclass MIMG_NoSampler_Src_Helper <bits<8> op, string asm,
multiclass MIMG_NoSampler <bits<8> op, string asm, bit has_d16, bit mip = 0,
bit isResInfo = 0> {
def "" : MIMGBaseOpcode {
- let Coordinates = !if(isResInfo, 0, 1);
+ let Coordinates = !not(isResInfo);
let LodOrClampOrMip = mip;
let HasD16 = has_d16;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
- mayLoad = !if(isResInfo, 0, 1) in {
+ mayLoad = !not(isResInfo) in {
let VDataDwords = 1 in
defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1>;
let VDataDwords = 2 in
@@ -413,6 +413,8 @@ multiclass MIMG_Store <bits<8> op, string asm, bit has_d16, bit mip = 0> {
defm _V3 : MIMG_Store_Addr_Helper <op, asm, VReg_96, 0>;
let VDataDwords = 4 in
defm _V4 : MIMG_Store_Addr_Helper <op, asm, VReg_128, 0>;
+ let VDataDwords = 5 in
+ defm _V5 : MIMG_Store_Addr_Helper <op, asm, VReg_160, 0>;
}
}
@@ -665,12 +667,12 @@ multiclass MIMG_Sampler <bits<8> op, AMDGPUSampleVariant sample, bit wqm = 0,
bit isG16 = 0, bit isGetLod = 0,
string asm = "image_sample"#sample.LowerCaseMod#!if(isG16, "_g16", "")> {
def "" : MIMG_Sampler_BaseOpcode<sample> {
- let HasD16 = !if(isGetLod, 0, 1);
+ let HasD16 = !not(isGetLod);
let G16 = isG16;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
- mayLoad = !if(isGetLod, 0, 1) in {
+ mayLoad = !not(isGetLod) in {
let VDataDwords = 1 in
defm _V1 : MIMG_Sampler_Src_Helper<op, asm, sample, VGPR_32, 1>;
let VDataDwords = 2 in
@@ -708,6 +710,55 @@ multiclass MIMG_Gather <bits<8> op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <bits<8> op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;
+class MIMG_IntersectRay_gfx10<int op, string opcode, RegisterClass AddrRC, bit A16>
+ : MIMG_gfx10<op, (outs VReg_128:$vdata), "AMDGPU"> {
+
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
+ !if(A16, (ins GFX10A16:$a16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(A16, "$a16", "");
+
+ let nsa = 0;
+}
+
+class MIMG_IntersectRay_nsa_gfx10<int op, string opcode, int num_addrs, bit A16>
+ : MIMG_nsa_gfx10<op, (outs VReg_128:$vdata), num_addrs, "AMDGPU"> {
+ let InOperandList = !con(nsah.AddrIns,
+ (ins SReg_128:$srsrc),
+ !if(A16, (ins GFX10A16:$a16), (ins)));
+ let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(A16, "$a16", "");
+}
+
+multiclass MIMG_IntersectRay<int op, string opcode, int num_addrs, bit A16> {
+ def "" : MIMGBaseOpcode;
+ let SubtargetPredicate = HasGFX10_BEncoding,
+ AssemblerPredicate = HasGFX10_BEncoding,
+ AsmMatchConverter = !if(A16, "cvtIntersectRay", ""),
+ dmask = 0xf,
+ unorm = 1,
+ d16 = 0,
+ glc = 0,
+ slc = 0,
+ dlc = 0,
+ tfe = 0,
+ lwe = 0,
+ r128 = 1,
+ ssamp = 0,
+ dim = {0, 0, 0},
+ a16 = A16,
+ d16 = 0,
+ BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
+ VDataDwords = 4 in {
+ // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple,
+ // when we only need 9, 11 or 12 depending on A16 field and ptr size.
+ def "_sa" : MIMG_IntersectRay_gfx10<op, opcode, MIMGAddrSize<num_addrs, 0>.RegClass, A16> {
+ let VAddrDwords = !srl(MIMGAddrSize<num_addrs, 0>.RegClass.Size, 5);
+ }
+ def _nsa : MIMG_IntersectRay_nsa_gfx10<op, opcode, num_addrs, A16> {
+ let VAddrDwords = num_addrs;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// MIMG Instructions
//===----------------------------------------------------------------------===//
@@ -832,6 +883,11 @@ defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <0x000000ef, AMDGPUSample_c_cd_cl
let SubtargetPredicate = HasGFX10_BEncoding in
defm IMAGE_MSAA_LOAD : MIMG_NoSampler <0x00000080, "image_msaa_load", 1>;
+defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<0xe6, "image_bvh_intersect_ray", 11, 0>;
+defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<0xe6, "image_bvh_intersect_ray", 8, 1>;
+defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<0xe7, "image_bvh64_intersect_ray", 12, 0>;
+defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<0xe7, "image_bvh64_intersect_ray", 9, 1>;
+
/********** ========================================= **********/
/********** Table of dimension-aware image intrinsics **********/
/********** ========================================= **********/
@@ -840,13 +896,40 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
Intrinsic Intr = I;
MIMGBaseOpcode BaseOpcode = !cast<MIMGBaseOpcode>(!strconcat("IMAGE_", I.P.OpMod));
AMDGPUDimProps Dim = I.P.Dim;
+ AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval<I.P>;
+
+ bits<8> NumGradients = DimEval.NumGradientArgs;
+ bits<8> NumDmask = DimEval.NumDmaskArgs;
+ bits<8> NumData = DimEval.NumDataArgs;
+ bits<8> NumVAddrs = DimEval.NumVAddrArgs;
+ bits<8> NumArgs = !add(DimEval.CachePolicyArgIndex, 1);
+
+ bits<8> DMaskIndex = DimEval.DmaskArgIndex;
+ bits<8> VAddrStart = DimEval.VAddrArgIndex;
+ bits<8> GradientStart = DimEval.GradientArgIndex;
+ bits<8> CoordStart = DimEval.CoordArgIndex;
+ bits<8> LodIndex = DimEval.LodArgIndex;
+ bits<8> MipIndex = DimEval.MipArgIndex;
+ bits<8> VAddrEnd = !add(DimEval.VAddrArgIndex, DimEval.NumVAddrArgs);
+ bits<8> RsrcIndex = DimEval.RsrcArgIndex;
+ bits<8> SampIndex = DimEval.SampArgIndex;
+ bits<8> UnormIndex = DimEval.UnormArgIndex;
+ bits<8> TexFailCtrlIndex = DimEval.TexFailCtrlArgIndex;
+ bits<8> CachePolicyIndex = DimEval.CachePolicyArgIndex;
+
+ bits<8> GradientTyArg = !add(I.P.NumRetAndDataAnyTypes,
+ !foldl(0, I.P.ExtraAddrArgs, cnt, arg, !add(cnt, arg.Type.isAny)));
+ bits<8> CoordTyArg = !add(GradientTyArg, !if(I.P.Gradients, 1, 0));
}
def ImageDimIntrinsicTable : GenericTable {
let FilterClass = "ImageDimIntrinsicInfo";
- let Fields = ["Intr", "BaseOpcode", "Dim"];
- GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
- GenericEnum TypeOf_Dim = MIMGDim;
+ let Fields = ["Intr", "BaseOpcode", "Dim", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs",
+ "DMaskIndex", "VAddrStart", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd",
+ "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex",
+ "GradientTyArg", "CoordTyArg"];
+ string TypeOf_BaseOpcode = "MIMGBaseOpcode";
+ string TypeOf_Dim = "MIMGDim";
let PrimaryKey = ["Intr"];
let PrimaryKeyName = "getImageDimIntrinsicInfo";
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
index d363baa15507..a96fc7ef234e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -15,10 +15,10 @@
//===----------------------------------------------------------------------===//
#include "R600AsmPrinter.h"
-#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "R600Subtarget.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index 290a960ae901..a19d00b62502 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -13,17 +13,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "R600Subtarget.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 8124df68f688..ca1e61393e9a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -13,35 +13,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
+#include "R600MachineFunctionInfo.h"
+#include "R600Subtarget.h"
#include <set>
-#include <utility>
-#include <vector>
using namespace llvm;
@@ -84,12 +59,7 @@ unsigned CFStack::getLoopDepth() {
}
bool CFStack::branchStackContains(CFStack::StackItem Item) {
- for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
- E = BranchStack.end(); I != E; ++I) {
- if (*I == Item)
- return true;
- }
- return false;
+ return llvm::is_contained(BranchStack, Item);
}
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Defines.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Defines.h
index d72534908dcf..613a59ae81f0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Defines.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Defines.h
@@ -10,8 +10,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_R600DEFINES_H
#define LLVM_LIB_TARGET_AMDGPU_R600DEFINES_H
-#include "llvm/MC/MCRegisterInfo.h"
-
// Operand Flags
#define MO_FLAG_CLAMP (1 << 0)
#define MO_FLAG_NEG (1 << 1)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index b97e3c8b8dd7..664e134889e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -14,25 +14,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600RegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
-#include <cstdint>
-#include <utility>
-#include <vector>
+#include "R600Defines.h"
+#include "R600Subtarget.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 5f682d86d26e..81dc91ab922f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -14,21 +14,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/Pass.h"
-#include <cassert>
-#include <cstdint>
-#include <iterator>
+#include "R600Defines.h"
+#include "R600Subtarget.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
index c568a4aa61c3..abd4086db62c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -7,19 +7,16 @@
//==-----------------------------------------------------------------------===//
#include "R600FrameLowering.h"
-#include "AMDGPUSubtarget.h"
-#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Support/MathExtras.h"
+#include "R600Subtarget.h"
using namespace llvm;
R600FrameLowering::~R600FrameLowering() = default;
/// \returns The number of registers allocated for \p FI.
-int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
+StackOffset
+R600FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const R600RegisterInfo *RI
= MF.getSubtarget<R600Subtarget>().getRegisterInfo();
@@ -44,5 +41,5 @@ int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
if (FI != -1)
OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlign(FI));
- return OffsetBytes / (getStackWidth(MF) * 4);
+ return StackOffset::getFixed(OffsetBytes / (getStackWidth(MF) * 4));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h
index b877ecd29829..f171bc4fea78 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h
@@ -24,8 +24,8 @@ public:
MachineBasicBlock &MBB) const override {}
void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const override {}
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
bool hasFP(const MachineFunction &MF) const override {
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index dc2e73e1f94e..c0120903396c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -12,42 +12,14 @@
//===----------------------------------------------------------------------===//
#include "R600ISelLowering.h"
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUSubtarget.h"
+#include "AMDGPU.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600Defines.h"
-#include "R600FrameLowering.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/DAGCombine.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "R600Subtarget.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-#include <cstdint>
-#include <iterator>
-#include <utility>
-#include <vector>
using namespace llvm;
@@ -338,7 +310,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case R600::MASK_WRITE: {
Register maskedRegister = MI.getOperand(0).getReg();
- assert(Register::isVirtualRegister(maskedRegister));
+ assert(maskedRegister.isVirtual());
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
break;
@@ -1550,10 +1522,10 @@ SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
unsigned FrameIndex = FIN->getIndex();
Register IgnoredFrameReg;
- unsigned Offset =
- TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
- return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
- Op.getValueType());
+ StackOffset Offset =
+ TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
+ return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
+ SDLoc(Op), Op.getValueType());
}
CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
@@ -1608,7 +1580,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
if (AMDGPU::isShader(CallConv)) {
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
+ Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
InVals.push_back(Register);
continue;
@@ -1747,7 +1719,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
for (unsigned i = 0; i < 4; i++) {
RemapSwizzle[i] = i;
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
+ unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
->getZExtValue();
if (i == Idx)
isUnmovable[Idx] = true;
@@ -1756,7 +1728,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
for (unsigned i = 0; i < 4; i++) {
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
+ unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
->getZExtValue();
if (isUnmovable[Idx])
continue;
@@ -2160,7 +2132,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
uint64_t ImmValue = 0;
if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
- ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
+ ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
float FloatValue = FPC->getValueAPF().convertToFloat();
if (FloatValue == 0.0) {
ImmReg = R600::ZERO;
@@ -2172,7 +2144,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
}
} else {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
+ ConstantSDNode *C = cast<ConstantSDNode>(Src.getOperand(0));
uint64_t Value = C->getZExtValue();
if (Value == 0) {
ImmReg = R600::ZERO;
@@ -2189,8 +2161,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
if (ImmReg == R600::ALU_LITERAL_X) {
if (!Imm.getNode())
return false;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
- assert(C);
+ ConstantSDNode *C = cast<ConstantSDNode>(Imm);
if (C->getZExtValue())
return false;
Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 088cf16d8ed2..7a623f3e304e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -13,33 +13,10 @@
#include "R600InstrInfo.h"
#include "AMDGPU.h"
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600FrameLowering.h"
-#include "R600RegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/BitVector.h"
+#include "R600Defines.h"
+#include "R600Subtarget.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <iterator>
-#include <utility>
-#include <vector>
using namespace llvm;
@@ -97,7 +74,7 @@ bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {
for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
E = MBBI->operands_end(); I != E; ++I) {
- if (I->isReg() && !Register::isVirtualRegister(I->getReg()) && I->isUse() &&
+ if (I->isReg() && !I->getReg().isVirtual() && I->isUse() &&
RI.isPhysRegLiveAcrossClauses(I->getReg()))
return false;
}
@@ -242,7 +219,7 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
E = MI.operands_end();
I != E; ++I) {
- if (!I->isReg() || !I->isUse() || Register::isVirtualRegister(I->getReg()))
+ if (!I->isReg() || !I->isUse() || I->getReg().isVirtual())
continue;
if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
@@ -963,8 +940,9 @@ R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) con
return false;
}
-bool R600InstrInfo::DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const {
+bool R600InstrInfo::ClobbersPredicate(MachineInstr &MI,
+ std::vector<MachineOperand> &Pred,
+ bool SkipDead) const {
return isPredicateSetter(MI.getOpcode());
}
@@ -1191,15 +1169,15 @@ int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
for (std::pair<unsigned, unsigned> LI : MRI.liveins()) {
- unsigned Reg = LI.first;
- if (Register::isVirtualRegister(Reg) || !IndirectRC->contains(Reg))
+ Register Reg = LI.first;
+ if (Reg.isVirtual() || !IndirectRC->contains(Reg))
continue;
unsigned RegIndex;
unsigned RegEnd;
for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
++RegIndex) {
- if (IndirectRC->getRegister(RegIndex) == Reg)
+ if (IndirectRC->getRegister(RegIndex) == (unsigned)Reg)
break;
}
Offset = std::max(Offset, (int)RegIndex);
@@ -1225,7 +1203,7 @@ int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
const R600FrameLowering *TFL = ST.getFrameLowering();
Register IgnoredFrameReg;
- Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
+ Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg).getFixed();
return getIndirectIndexBegin(MF) + Offset;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index 873ee08470cb..1e249c6348f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -194,8 +194,8 @@ public:
unsigned NumFCycles, unsigned ExtraFCycles,
BranchProbability Probability) const override;
- bool DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const override;
+ bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred,
+ bool SkipDead) const override;
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
index 2cc21364c439..055e2de59ea1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -353,8 +353,8 @@ class LoadVtxId1 <PatFrag load> : PatFrag <
const MemSDNode *LD = cast<MemSDNode>(N);
return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
(LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
- !isa<GlobalValue>(GetUnderlyingObject(
- LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
+ !isa<GlobalValue>(getUnderlyingObject(
+ LD->getMemOperand()->getValue())));
}]>;
def vtx_id1_az_extloadi8 : LoadVtxId1 <az_extloadi8>;
@@ -365,8 +365,8 @@ class LoadVtxId2 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
- isa<GlobalValue>(GetUnderlyingObject(
- LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
+ isa<GlobalValue>(getUnderlyingObject(
+ LD->getMemOperand()->getValue()));
}]>;
def vtx_id2_az_extloadi8 : LoadVtxId2 <az_extloadi8>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index 7569a2629539..f85a68706287 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -12,13 +12,8 @@
//===----------------------------------------------------------------------===//
#include "R600MachineScheduler.h"
-#include "AMDGPUSubtarget.h"
-#include "R600InstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
+#include "R600Subtarget.h"
using namespace llvm;
@@ -45,7 +40,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
std::vector<SUnit *> &QDst)
{
- QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
+ llvm::append_range(QDst, QSrc);
QSrc.clear();
}
@@ -183,7 +178,7 @@ isPhysicalRegCopy(MachineInstr *MI) {
if (MI->getOpcode() != R600::COPY)
return false;
- return !Register::isVirtualRegister(MI->getOperand(1).getReg());
+ return !MI->getOperand(1).getReg().isVirtual();
}
void R600SchedStrategy::releaseTopNode(SUnit *SU) {
@@ -207,9 +202,9 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
}
-bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
+bool R600SchedStrategy::regBelongsToClass(Register Reg,
const TargetRegisterClass *RC) const {
- if (!Register::isVirtualRegister(Reg)) {
+ if (!Reg.isVirtual()) {
return RC->contains(Reg);
} else {
return MRI->getRegClass(Reg) == RC;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
index bc66f2ef5907..abcc37f8400d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
@@ -80,7 +80,7 @@ private:
bool VLIW5;
int getInstKind(SUnit *SU);
- bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
+ bool regBelongsToClass(Register Reg, const TargetRegisterClass *RC) const;
AluKind getAluKind(SUnit *SU) const;
void LoadAlu();
unsigned AvailablesAluCount() const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
index 1fe92d2269d3..5fd912e0fb39 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
@@ -27,27 +27,12 @@
#include "AMDGPU.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <tuple>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index b0620663a230..8f19a3e478e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -27,30 +27,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "R600Defines.h"
+#include "R600Subtarget.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <utility>
-#include <vector>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index 176269f9b68c..eaac938b098a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -14,17 +14,12 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600InstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "R600Subtarget.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
index 78ef71cdf8e3..e4f7d89bf4c9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -12,11 +12,9 @@
//===----------------------------------------------------------------------===//
#include "R600RegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "R600Defines.h"
+#include "R600Subtarget.h"
using namespace llvm;
@@ -94,8 +92,8 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
}
}
-bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {
- assert(!Register::isVirtualRegister(Reg));
+bool R600RegisterInfo::isPhysRegLiveAcrossClauses(Register Reg) const {
+ assert(!Reg.isVirtual());
switch (Reg) {
case R600::OQAP:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
index 06981c4cf9c5..1308e9fff1fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -45,7 +45,7 @@ struct R600RegisterInfo final : public R600GenRegisterInfo {
// \returns true if \p Reg can be defined in one ALU clause and used in
// another.
- bool isPhysRegLiveAcrossClauses(unsigned Reg) const;
+ bool isPhysRegLiveAcrossClauses(Register Reg) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Subtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Subtarget.h
new file mode 100644
index 000000000000..07238da18c67
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -0,0 +1,174 @@
+//=====-- R600Subtarget.h - Define Subtarget for AMDGPU R600 ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// AMDGPU R600 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_R600SUBTARGET_H
+#define LLVM_LIB_TARGET_AMDGPU_R600SUBTARGET_H
+
+#include "AMDGPUSubtarget.h"
+#include "R600FrameLowering.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
+namespace llvm {
+
+class MCInst;
+class MCInstrInfo;
+
+} // namespace llvm
+
+#define GET_SUBTARGETINFO_HEADER
+#include "R600GenSubtargetInfo.inc"
+
+namespace llvm {
+
+class R600Subtarget final : public R600GenSubtargetInfo,
+ public AMDGPUSubtarget {
+private:
+ R600InstrInfo InstrInfo;
+ R600FrameLowering FrameLowering;
+ bool FMA;
+ bool CaymanISA;
+ bool CFALUBug;
+ bool HasVertexCache;
+ bool R600ALUInst;
+ bool FP64;
+ short TexVTXClauseSize;
+ Generation Gen;
+ R600TargetLowering TLInfo;
+ InstrItineraryData InstrItins;
+ SelectionDAGTargetInfo TSInfo;
+
+public:
+ R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
+ const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+
+ const R600FrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+
+ const R600TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+
+ const R600RegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ // Nothing implemented, just prevent crashes on use.
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+
+ Generation getGeneration() const {
+ return Gen;
+ }
+
+ Align getStackAlignment() const { return Align(4); }
+
+ R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU, StringRef FS);
+
+ bool hasBFE() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasBFI() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasBCNT(unsigned Size) const {
+ if (Size == 32)
+ return (getGeneration() >= EVERGREEN);
+
+ return false;
+ }
+
+ bool hasBORROW() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasCARRY() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasCaymanISA() const {
+ return CaymanISA;
+ }
+
+ bool hasFFBL() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasFFBH() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasFMA() const { return FMA; }
+
+ bool hasCFAluBug() const { return CFALUBug; }
+
+ bool hasVertexCache() const { return HasVertexCache; }
+
+ short getTexVTXClauseSize() const { return TexVTXClauseSize; }
+
+ bool enableMachineScheduler() const override {
+ return true;
+ }
+
+ bool enableSubRegLiveness() const override {
+ return true;
+ }
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Number of waves per execution unit required to support the given
+ /// \p FlatWorkGroupSize.
+ unsigned
+ getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_R600SUBTARGET_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
index 90e48c63b5dc..3b753cb66ead 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
@@ -16,15 +16,9 @@
//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-img-init"
@@ -80,9 +74,8 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
- // Check for instructions that don't have tfe or lwe fields
- // There shouldn't be any at this point.
- assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
+ if (!TFE && !LWE) // intersect_ray
+ continue;
unsigned TFEVal = TFE->getImm();
unsigned LWEVal = LWE->getImm();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 3c41bf1fef5e..625749deb3a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -12,36 +12,18 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
+#include "GCNSubtarget.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <cassert>
-#include <utility>
using namespace llvm;
@@ -313,8 +295,15 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
Value *Exec = popSaved();
Instruction *FirstInsertionPt = &*BB->getFirstInsertionPt();
- if (!isa<UndefValue>(Exec) && !isa<UnreachableInst>(FirstInsertionPt))
+ if (!isa<UndefValue>(Exec) && !isa<UnreachableInst>(FirstInsertionPt)) {
+ Instruction *ExecDef = cast<Instruction>(Exec);
+ BasicBlock *DefBB = ExecDef->getParent();
+ if (!DT->dominates(DefBB, BB)) {
+ // Split edge to make Def dominate Use
+ FirstInsertionPt = &*SplitEdge(DefBB, BB, DT, LI)->getFirstInsertionPt();
+ }
CallInst::Create(EndCf, Exec, "", FirstInsertionPt);
+ }
}
/// Annotate the control flow with intrinsics so the backend can
@@ -327,7 +316,6 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
const TargetMachine &TM = TPC.getTM<TargetMachine>();
initialize(*F.getParent(), TM.getSubtarget<GCNSubtarget>(F));
-
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
BasicBlock *BB = *I;
@@ -344,7 +332,8 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
if (isTopOfStack(BB))
closeControlFlow(BB);
- handleLoop(Term);
+ if (DT->dominates(Term->getSuccessor(1), BB))
+ handleLoop(Term);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
index 4f7d255eb450..c83802b323c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -33,26 +33,27 @@ enum : uint64_t {
VOP2 = 1 << 8,
VOPC = 1 << 9,
- // TODO: Should this be spilt into VOP3 a and b?
+ // TODO: Should this be spilt into VOP3 a and b?
VOP3 = 1 << 10,
VOP3P = 1 << 12,
VINTRP = 1 << 13,
SDWA = 1 << 14,
DPP = 1 << 15,
+ TRANS = 1 << 16,
// Memory instruction formats.
- MUBUF = 1 << 16,
- MTBUF = 1 << 17,
- SMRD = 1 << 18,
- MIMG = 1 << 19,
- EXP = 1 << 20,
- FLAT = 1 << 21,
- DS = 1 << 22,
+ MUBUF = 1 << 17,
+ MTBUF = 1 << 18,
+ SMRD = 1 << 19,
+ MIMG = 1 << 20,
+ EXP = 1 << 21,
+ FLAT = 1 << 22,
+ DS = 1 << 23,
// Pseudo instruction formats.
- VGPRSpill = 1 << 23,
- SGPRSpill = 1 << 24,
+ VGPRSpill = 1 << 24,
+ SGPRSpill = 1 << 25,
// High bits - other information.
VM_CNT = UINT64_C(1) << 32,
@@ -89,8 +90,8 @@ enum : uint64_t {
// Is a D16 buffer instruction.
D16Buf = UINT64_C(1) << 50,
- // FLAT instruction accesses FLAT_GLBL or FLAT_SCRATCH segment.
- IsNonFlatSeg = UINT64_C(1) << 51,
+ // FLAT instruction accesses FLAT_GLBL segment.
+ IsFlatGlobal = UINT64_C(1) << 51,
// Uses floating point double precision rounding mode
FPDPRounding = UINT64_C(1) << 52,
@@ -102,7 +103,10 @@ enum : uint64_t {
IsMAI = UINT64_C(1) << 54,
// Is a DOT instruction.
- IsDOT = UINT64_C(1) << 55
+ IsDOT = UINT64_C(1) << 55,
+
+ // FLAT instruction accesses FLAT_SCRATCH segment.
+ IsFlatScratch = UINT64_C(1) << 56
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
@@ -217,7 +221,8 @@ enum EncBits : unsigned {
SRC1_ENABLE = 1 << ID_SRC1,
SRC2_ENABLE = 1 << ID_SRC2,
DST_ENABLE = 1 << ID_DST,
- ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE
+ ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE,
+ UNDEF = 0xFFFF
};
} // namespace VGPRIndexMode
@@ -242,8 +247,8 @@ enum : unsigned {
SGPR_MAX_GFX10 = 105,
TTMP_VI_MIN = 112,
TTMP_VI_MAX = 123,
- TTMP_GFX9_GFX10_MIN = 108,
- TTMP_GFX9_GFX10_MAX = 123,
+ TTMP_GFX9PLUS_MIN = 108,
+ TTMP_GFX9PLUS_MAX = 123,
INLINE_INTEGER_C_MIN = 128,
INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64
INLINE_INTEGER_C_MAX = 208,
@@ -392,6 +397,172 @@ enum ModeRegisterMasks : uint32_t {
} // namespace Hwreg
+namespace MTBUFFormat {
+
+enum DataFormat : int64_t {
+ DFMT_INVALID = 0,
+ DFMT_8,
+ DFMT_16,
+ DFMT_8_8,
+ DFMT_32,
+ DFMT_16_16,
+ DFMT_10_11_11,
+ DFMT_11_11_10,
+ DFMT_10_10_10_2,
+ DFMT_2_10_10_10,
+ DFMT_8_8_8_8,
+ DFMT_32_32,
+ DFMT_16_16_16_16,
+ DFMT_32_32_32,
+ DFMT_32_32_32_32,
+ DFMT_RESERVED_15,
+
+ DFMT_MIN = DFMT_INVALID,
+ DFMT_MAX = DFMT_RESERVED_15,
+
+ DFMT_UNDEF = -1,
+ DFMT_DEFAULT = DFMT_8,
+
+ DFMT_SHIFT = 0,
+ DFMT_MASK = 0xF
+};
+
+enum NumFormat : int64_t {
+ NFMT_UNORM = 0,
+ NFMT_SNORM,
+ NFMT_USCALED,
+ NFMT_SSCALED,
+ NFMT_UINT,
+ NFMT_SINT,
+ NFMT_RESERVED_6, // VI and GFX9
+ NFMT_SNORM_OGL = NFMT_RESERVED_6, // SI and CI only
+ NFMT_FLOAT,
+
+ NFMT_MIN = NFMT_UNORM,
+ NFMT_MAX = NFMT_FLOAT,
+
+ NFMT_UNDEF = -1,
+ NFMT_DEFAULT = NFMT_UNORM,
+
+ NFMT_SHIFT = 4,
+ NFMT_MASK = 7
+};
+
+enum MergedFormat : int64_t {
+ DFMT_NFMT_UNDEF = -1,
+ DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) |
+ ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT),
+
+
+ DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT),
+
+ DFMT_NFMT_MAX = DFMT_NFMT_MASK
+};
+
+enum UnifiedFormat : int64_t {
+ UFMT_INVALID = 0,
+
+ UFMT_8_UNORM,
+ UFMT_8_SNORM,
+ UFMT_8_USCALED,
+ UFMT_8_SSCALED,
+ UFMT_8_UINT,
+ UFMT_8_SINT,
+
+ UFMT_16_UNORM,
+ UFMT_16_SNORM,
+ UFMT_16_USCALED,
+ UFMT_16_SSCALED,
+ UFMT_16_UINT,
+ UFMT_16_SINT,
+ UFMT_16_FLOAT,
+
+ UFMT_8_8_UNORM,
+ UFMT_8_8_SNORM,
+ UFMT_8_8_USCALED,
+ UFMT_8_8_SSCALED,
+ UFMT_8_8_UINT,
+ UFMT_8_8_SINT,
+
+ UFMT_32_UINT,
+ UFMT_32_SINT,
+ UFMT_32_FLOAT,
+
+ UFMT_16_16_UNORM,
+ UFMT_16_16_SNORM,
+ UFMT_16_16_USCALED,
+ UFMT_16_16_SSCALED,
+ UFMT_16_16_UINT,
+ UFMT_16_16_SINT,
+ UFMT_16_16_FLOAT,
+
+ UFMT_10_11_11_UNORM,
+ UFMT_10_11_11_SNORM,
+ UFMT_10_11_11_USCALED,
+ UFMT_10_11_11_SSCALED,
+ UFMT_10_11_11_UINT,
+ UFMT_10_11_11_SINT,
+ UFMT_10_11_11_FLOAT,
+
+ UFMT_11_11_10_UNORM,
+ UFMT_11_11_10_SNORM,
+ UFMT_11_11_10_USCALED,
+ UFMT_11_11_10_SSCALED,
+ UFMT_11_11_10_UINT,
+ UFMT_11_11_10_SINT,
+ UFMT_11_11_10_FLOAT,
+
+ UFMT_10_10_10_2_UNORM,
+ UFMT_10_10_10_2_SNORM,
+ UFMT_10_10_10_2_USCALED,
+ UFMT_10_10_10_2_SSCALED,
+ UFMT_10_10_10_2_UINT,
+ UFMT_10_10_10_2_SINT,
+
+ UFMT_2_10_10_10_UNORM,
+ UFMT_2_10_10_10_SNORM,
+ UFMT_2_10_10_10_USCALED,
+ UFMT_2_10_10_10_SSCALED,
+ UFMT_2_10_10_10_UINT,
+ UFMT_2_10_10_10_SINT,
+
+ UFMT_8_8_8_8_UNORM,
+ UFMT_8_8_8_8_SNORM,
+ UFMT_8_8_8_8_USCALED,
+ UFMT_8_8_8_8_SSCALED,
+ UFMT_8_8_8_8_UINT,
+ UFMT_8_8_8_8_SINT,
+
+ UFMT_32_32_UINT,
+ UFMT_32_32_SINT,
+ UFMT_32_32_FLOAT,
+
+ UFMT_16_16_16_16_UNORM,
+ UFMT_16_16_16_16_SNORM,
+ UFMT_16_16_16_16_USCALED,
+ UFMT_16_16_16_16_SSCALED,
+ UFMT_16_16_16_16_UINT,
+ UFMT_16_16_16_16_SINT,
+ UFMT_16_16_16_16_FLOAT,
+
+ UFMT_32_32_32_UINT,
+ UFMT_32_32_32_SINT,
+ UFMT_32_32_32_FLOAT,
+ UFMT_32_32_32_32_UINT,
+ UFMT_32_32_32_32_SINT,
+ UFMT_32_32_32_32_FLOAT,
+
+ UFMT_FIRST = UFMT_INVALID,
+ UFMT_LAST = UFMT_32_32_32_32_FLOAT,
+
+ UFMT_MAX = 127,
+
+ UFMT_UNDEF = -1,
+ UFMT_DEFAULT = UFMT_8_UNORM
+};
+
+} // namespace MTBUFFormat
+
namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
enum Id : unsigned { // id of symbolic names
@@ -518,19 +689,67 @@ enum DppFiMode {
};
} // namespace DPP
+
+namespace Exp {
+
+enum Target : unsigned {
+ ET_MRT0 = 0,
+ ET_MRT7 = 7,
+ ET_MRTZ = 8,
+ ET_NULL = 9,
+ ET_POS0 = 12,
+ ET_POS3 = 15,
+ ET_POS4 = 16, // GFX10+
+ ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget
+ ET_PRIM = 20, // GFX10+
+ ET_PARAM0 = 32,
+ ET_PARAM31 = 63,
+
+ ET_NULL_MAX_IDX = 0,
+ ET_MRTZ_MAX_IDX = 0,
+ ET_PRIM_MAX_IDX = 0,
+ ET_MRT_MAX_IDX = 7,
+ ET_POS_MAX_IDX = 4,
+ ET_PARAM_MAX_IDX = 31,
+
+ ET_INVALID = 255,
+};
+
+} // namespace Exp
} // namespace AMDGPU
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
+#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
+#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
+#define S_00B028_MEM_ORDERED(x) (((x) & 0x1) << 25)
+#define G_00B028_MEM_ORDERED(x) (((x) >> 25) & 0x1)
+#define C_00B028_MEM_ORDERED 0xFDFFFFFF
+
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C
#define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8)
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128
+#define S_00B128_MEM_ORDERED(x) (((x) & 0x1) << 27)
+#define G_00B128_MEM_ORDERED(x) (((x) >> 27) & 0x1)
+#define C_00B128_MEM_ORDERED 0xF7FFFFFF
+
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228
+#define S_00B228_WGP_MODE(x) (((x) & 0x1) << 27)
+#define G_00B228_WGP_MODE(x) (((x) >> 27) & 0x1)
+#define C_00B228_WGP_MODE 0xF7FFFFFF
+#define S_00B228_MEM_ORDERED(x) (((x) & 0x1) << 25)
+#define G_00B228_MEM_ORDERED(x) (((x) >> 25) & 0x1)
+#define C_00B228_MEM_ORDERED 0xFDFFFFFF
+
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES 0x00B328
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428
+#define S_00B428_WGP_MODE(x) (((x) & 0x1) << 26)
+#define G_00B428_WGP_MODE(x) (((x) >> 26) & 0x1)
+#define C_00B428_WGP_MODE 0xFBFFFFFF
+#define S_00B428_MEM_ORDERED(x) (((x) & 0x1) << 24)
+#define G_00B428_MEM_ORDERED(x) (((x) >> 24) & 0x1)
+#define C_00B428_MEM_ORDERED 0xFEFFFFFF
+
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS 0x00B528
-#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
-#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
-#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
#define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index ef64c5674bd1..34f59bf34dd5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -65,37 +65,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <cstdint>
-#include <iterator>
-#include <list>
-#include <map>
-#include <tuple>
-#include <utility>
using namespace llvm;
@@ -122,7 +96,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
- void processPHINode(MachineInstr &MI);
+ MachineBasicBlock *processPHINode(MachineInstr &MI);
StringRef getPassName() const override { return "SI Fix SGPR copies"; }
@@ -154,8 +128,7 @@ static bool hasVectorOperands(const MachineInstr &MI,
const SIRegisterInfo *TRI) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isReg() ||
- !Register::isVirtualRegister(MI.getOperand(i).getReg()))
+ if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual())
continue;
if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg())))
@@ -171,14 +144,14 @@ getCopyRegClasses(const MachineInstr &Copy,
Register DstReg = Copy.getOperand(0).getReg();
Register SrcReg = Copy.getOperand(1).getReg();
- const TargetRegisterClass *SrcRC = Register::isVirtualRegister(SrcReg)
+ const TargetRegisterClass *SrcRC = SrcReg.isVirtual()
? MRI.getRegClass(SrcReg)
: TRI.getPhysRegClass(SrcReg);
// We don't really care about the subregister here.
// SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
- const TargetRegisterClass *DstRC = Register::isVirtualRegister(DstReg)
+ const TargetRegisterClass *DstRC = DstReg.isVirtual()
? MRI.getRegClass(DstReg)
: TRI.getPhysRegClass(DstReg);
@@ -206,8 +179,7 @@ static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
auto &Src = MI.getOperand(1);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = Src.getReg();
- if (!Register::isVirtualRegister(SrcReg) ||
- !Register::isVirtualRegister(DstReg))
+ if (!SrcReg.isVirtual() || !DstReg.isVirtual())
return false;
for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
@@ -215,8 +187,12 @@ static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
if (UseMI == &MI)
continue;
if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
- UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
- !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
+ UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
+ return false;
+
+ unsigned OpIdx = UseMI->getOperandNo(&MO);
+ if (OpIdx >= UseMI->getDesc().getNumOperands() ||
+ !TII->isOperandLegal(*UseMI, OpIdx, &Src))
return false;
}
// Change VGPR to SGPR destination.
@@ -255,7 +231,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
return false;
// It is illegal to have vreg inputs to a physreg defining reg_sequence.
- if (Register::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
+ if (CopyUse.getOperand(0).getReg().isPhysical())
return false;
const TargetRegisterClass *SrcRC, *DstRC;
@@ -306,7 +282,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC);
Register TmpAReg = MRI.createVirtualRegister(NewSrcRC);
unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
- AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY;
+ AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY;
BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc),
TmpAReg)
.addReg(TmpReg, RegState::Kill);
@@ -362,8 +338,7 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
return false;
DenseSet<const MachineBasicBlock *> Visited;
- SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(),
- MBB->pred_end());
+ SmallVector<MachineBasicBlock *, 4> Worklist(MBB->predecessors());
while (!Worklist.empty()) {
MachineBasicBlock *MBB = Worklist.pop_back_val();
@@ -388,17 +363,13 @@ static bool isReachable(const MachineInstr *From,
const MachineInstr *To,
const MachineBasicBlock *CutOff,
MachineDominatorTree &MDT) {
- // If either From block dominates To block or instructions are in the same
- // block and From is higher.
if (MDT.dominates(From, To))
return true;
const MachineBasicBlock *MBBFrom = From->getParent();
const MachineBasicBlock *MBBTo = To->getParent();
- if (MBBFrom == MBBTo)
- return false;
- // Instructions are in different blocks, do predecessor search.
+ // Do predecessor search.
// We should almost never get here since we do not usually produce M0 stores
// other than -1.
return searchPredecessors(MBBTo, CutOff, [MBBFrom]
@@ -598,13 +569,11 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
MDT = &getAnalysis<MachineDominatorTree>();
- SmallVector<MachineInstr *, 16> Worklist;
-
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
- MachineBasicBlock &MBB = *BI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
+ MachineBasicBlock *MBB = &*BI;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
@@ -619,7 +588,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
- if (!Register::isVirtualRegister(DstReg)) {
+ if (!DstReg.isVirtual()) {
// If the destination register is a physical register there isn't
// really much we can do to fix this.
// Some special instructions use M0 as an input. Some even only use
@@ -628,9 +597,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
Register TmpReg
= MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(MBB, MI, MI.getDebugLoc(),
+ BuildMI(*MBB, MI, MI.getDebugLoc(),
TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
- .add(MI.getOperand(1));
+ .add(MI.getOperand(1));
MI.getOperand(1).setReg(TmpReg);
}
@@ -639,8 +608,16 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
Register SrcReg = MI.getOperand(1).getReg();
- if (!Register::isVirtualRegister(SrcReg)) {
- TII->moveToVALU(MI, MDT);
+ if (!SrcReg.isVirtual()) {
+ MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
+ if (NewBB && NewBB != MBB) {
+ MBB = NewBB;
+ E = MBB->end();
+ BI = MachineFunction::iterator(MBB);
+ BE = MF.end();
+ }
+ assert((!NewBB || NewBB == I->getParent()) &&
+ "moveToVALU did not return the right basic block");
break;
}
@@ -655,7 +632,15 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MI.setDesc(TII->get(SMovOp));
break;
}
- TII->moveToVALU(MI, MDT);
+ MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
+ if (NewBB && NewBB != MBB) {
+ MBB = NewBB;
+ E = MBB->end();
+ BI = MachineFunction::iterator(MBB);
+ BE = MF.end();
+ }
+ assert((!NewBB || NewBB == I->getParent()) &&
+ "moveToVALU did not return the right basic block");
} else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
}
@@ -663,10 +648,18 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
}
case AMDGPU::PHI: {
- processPHINode(MI);
+ MachineBasicBlock *NewBB = processPHINode(MI);
+ if (NewBB && NewBB != MBB) {
+ MBB = NewBB;
+ E = MBB->end();
+ BI = MachineFunction::iterator(MBB);
+ BE = MF.end();
+ }
+ assert((!NewBB || NewBB == I->getParent()) &&
+ "moveToVALU did not return the right basic block");
break;
}
- case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::REG_SEQUENCE: {
if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
!hasVectorOperands(MI, TRI)) {
foldVGPRCopyIntoRegSequence(MI, TRI, TII, *MRI);
@@ -675,8 +668,17 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
- TII->moveToVALU(MI, MDT);
+ MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
+ if (NewBB && NewBB != MBB) {
+ MBB = NewBB;
+ E = MBB->end();
+ BI = MachineFunction::iterator(MBB);
+ BE = MF.end();
+ }
+ assert((!NewBB || NewBB == I->getParent()) &&
+ "moveToVALU did not return the right basic block");
break;
+ }
case AMDGPU::INSERT_SUBREG: {
const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
DstRC = MRI->getRegClass(MI.getOperand(0).getReg());
@@ -686,7 +688,15 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
(TRI->hasVectorRegisters(Src0RC) ||
TRI->hasVectorRegisters(Src1RC))) {
LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
- TII->moveToVALU(MI, MDT);
+ MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
+ if (NewBB && NewBB != MBB) {
+ MBB = NewBB;
+ E = MBB->end();
+ BI = MachineFunction::iterator(MBB);
+ BE = MF.end();
+ }
+ assert((!NewBB || NewBB == I->getParent()) &&
+ "moveToVALU did not return the right basic block");
}
break;
}
@@ -721,7 +731,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
// that can't be resolved in later operand folding pass
bool Resolved = false;
for (MachineOperand *MO : {&Src0, &Src1}) {
- if (Register::isVirtualRegister(MO->getReg())) {
+ if (MO->getReg().isVirtual()) {
MachineInstr *DefMI = MRI->getVRegDef(MO->getReg());
if (DefMI && TII->isFoldableCopy(*DefMI)) {
const MachineOperand &Def = DefMI->getOperand(0);
@@ -761,17 +771,18 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
+MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
unsigned numVGPRUses = 0;
bool AllAGPRUses = true;
SetVector<const MachineInstr *> worklist;
SmallSet<const MachineInstr *, 4> Visited;
SetVector<MachineInstr *> PHIOperands;
+ MachineBasicBlock *CreatedBB = nullptr;
worklist.insert(&MI);
Visited.insert(&MI);
while (!worklist.empty()) {
const MachineInstr *Instr = worklist.pop_back_val();
- unsigned Reg = Instr->getOperand(0).getReg();
+ Register Reg = Instr->getOperand(0).getReg();
for (const auto &Use : MRI->use_operands(Reg)) {
const MachineInstr *UseMI = Use.getParent();
AllAGPRUses &= (UseMI->isCopy() &&
@@ -820,11 +831,11 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
bool hasVGPRInput = false;
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
- unsigned InputReg = MI.getOperand(i).getReg();
+ Register InputReg = MI.getOperand(i).getReg();
MachineInstr *Def = MRI->getVRegDef(InputReg);
if (TRI->isVectorRegister(*MRI, InputReg)) {
if (Def->isCopy()) {
- unsigned SrcReg = Def->getOperand(1).getReg();
+ Register SrcReg = Def->getOperand(1).getReg();
const TargetRegisterClass *RC =
TRI->getRegClassForReg(*MRI, SrcReg);
if (TRI->isSGPRClass(RC))
@@ -858,7 +869,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
RC0 != &AMDGPU::VReg_1RegClass) &&
(hasVGPRInput || numVGPRUses > 1)) {
LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
- TII->moveToVALU(MI);
+ CreatedBB = TII->moveToVALU(MI);
}
else {
LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
@@ -869,4 +880,5 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
while (!PHIOperands.empty()) {
processPHINode(*PHIOperands.pop_back_val());
}
+ return CreatedBB;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
index 29484668a01d..f7e3ea5fc072 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
@@ -12,8 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
deleted file mode 100644
index 8e3402b537b3..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
+++ /dev/null
@@ -1,239 +0,0 @@
-//===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-/// \file
-/// SIFixupVectorISel pass cleans up post ISEL Vector issues.
-/// Currently this will convert GLOBAL_{LOAD|STORE}_*
-/// and GLOBAL_Atomic_* instructions into their _SADDR variants,
-/// feeding the sreg into the saddr field of the new instruction.
-/// We currently handle a REG_SEQUENCE feeding the vaddr
-/// and decompose it into a base and index.
-///
-/// Transform:
-/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32
-/// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32,
-/// %24:vgpr_32, %19:sreg_64_xexec
-/// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1
-/// %11:vreg_64 = COPY %16:vreg_64
-/// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0
-/// Into:
-/// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0
-/// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1
-/// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16...
-///
-//===----------------------------------------------------------------------===//
-//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetMachine.h"
-#define DEBUG_TYPE "si-fixup-vector-isel"
-
-using namespace llvm;
-
-static cl::opt<bool> EnableGlobalSGPRAddr(
- "amdgpu-enable-global-sgpr-addr",
- cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"),
- cl::init(false));
-
-STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");
-STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted");
-
-namespace {
-
-class SIFixupVectorISel : public MachineFunctionPass {
-public:
- static char ID;
-
-public:
- SIFixupVectorISel() : MachineFunctionPass(ID) {
- initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE,
- "SI Fixup Vector ISel", false, false)
-
-char SIFixupVectorISel::ID = 0;
-
-char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID;
-
-FunctionPass *llvm::createSIFixupVectorISelPass() {
- return new SIFixupVectorISel();
-}
-
-static bool findSRegBaseAndIndex(MachineOperand *Op,
- unsigned &BaseReg,
- unsigned &IndexReg,
- MachineRegisterInfo &MRI,
- const SIRegisterInfo *TRI) {
- SmallVector<MachineOperand *, 8> Worklist;
- Worklist.push_back(Op);
- while (!Worklist.empty()) {
- MachineOperand *WOp = Worklist.pop_back_val();
- if (!WOp->isReg() || !Register::isVirtualRegister(WOp->getReg()))
- continue;
- MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg());
- switch (DefInst->getOpcode()) {
- default:
- continue;
- case AMDGPU::COPY:
- Worklist.push_back(&DefInst->getOperand(1));
- break;
- case AMDGPU::REG_SEQUENCE:
- if (DefInst->getNumOperands() != 5)
- continue;
- Worklist.push_back(&DefInst->getOperand(1));
- Worklist.push_back(&DefInst->getOperand(3));
- break;
- case AMDGPU::V_ADD_I32_e64:
- // The V_ADD_* and its analogous V_ADDCV_* are generated by
- // a previous pass which lowered from an ADD_64_PSEUDO,
- // which generates subregs to break up the 64 bit args.
- if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister)
- continue;
- BaseReg = DefInst->getOperand(2).getReg();
- if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister)
- continue;
- IndexReg = DefInst->getOperand(3).getReg();
- // Chase the IndexReg.
- MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg);
- if (!MI || !MI->isCopy())
- continue;
- // Make sure the reg class is 64 bit for Index.
- // If the Index register is a subreg, we want it to reference
- // a 64 bit register which we will use as the Index reg.
- const TargetRegisterClass *IdxRC, *BaseRC;
- IdxRC = MRI.getRegClass(MI->getOperand(1).getReg());
- if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64)
- continue;
- IndexReg = MI->getOperand(1).getReg();
- // Chase the BaseReg.
- MI = MRI.getUniqueVRegDef(BaseReg);
- if (!MI || !MI->isCopy())
- continue;
- // Make sure the register class is 64 bit for Base.
- BaseReg = MI->getOperand(1).getReg();
- BaseRC = MRI.getRegClass(BaseReg);
- if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64)
- continue;
- // Make sure Base is SReg and Index is VReg.
- if (!TRI->isSGPRReg(MRI, BaseReg))
- return false;
- if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg)))
- return false;
- // clear any killed flags on Index and Base regs, used later.
- MRI.clearKillFlags(IndexReg);
- MRI.clearKillFlags(BaseReg);
- return true;
- }
- }
- return false;
-}
-
-// Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR.
-static bool fixupGlobalSaddr(MachineBasicBlock &MBB,
- MachineFunction &MF,
- MachineRegisterInfo &MRI,
- const GCNSubtarget &ST,
- const SIInstrInfo *TII,
- const SIRegisterInfo *TRI) {
- if (!EnableGlobalSGPRAddr)
- return false;
- bool FuncModified = false;
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
- MachineInstr &MI = *I;
- int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode());
- if (NewOpcd < 0)
- continue;
- // Update our statistics on opportunities seen.
- ++NumSGPRGlobalOccurs;
- LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n');
- // Need a Base and Index or we cant transform to _SADDR.
- unsigned BaseReg = 0;
- unsigned IndexReg = 0;
- MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
- if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI))
- continue;
- ++NumSGPRGlobalSaddrs;
- FuncModified = true;
- // Create the new _SADDR Memory instruction.
- bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr;
- MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
- MachineInstr *NewGlob = nullptr;
- NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd));
- if (HasVdst)
- NewGlob->addOperand(MF, MI.getOperand(0));
- NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false));
- if (VData)
- NewGlob->addOperand(MF, *VData);
- NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false));
- NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset));
-
- MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc);
- // Atomics dont have a GLC, so omit the field if not there.
- if (Glc)
- NewGlob->addOperand(MF, *Glc);
-
- MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc);
- if (DLC)
- NewGlob->addOperand(MF, *DLC);
-
- NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
- // _D16 have an vdst_in operand, copy it in.
- MachineOperand *VDstInOp = TII->getNamedOperand(MI,
- AMDGPU::OpName::vdst_in);
- if (VDstInOp)
- NewGlob->addOperand(MF, *VDstInOp);
- NewGlob->copyImplicitOps(MF, MI);
- NewGlob->cloneMemRefs(MF, MI);
- // Remove the old Global Memop instruction.
- MI.eraseFromParent();
- LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n');
- }
- return FuncModified;
-}
-
-bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) {
- // Only need to run this in SelectionDAG path.
- if (MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::Selected))
- return false;
-
- if (skipFunction(MF.getFunction()))
- return false;
-
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
-
- bool FuncModified = false;
- for (MachineBasicBlock &MBB : MF) {
- // Cleanup missed Saddr opportunites from ISel.
- FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI);
- }
- return FuncModified;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 92980d2406cf..d5fa9afded27 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -9,18 +9,11 @@
//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-fold-operands"
using namespace llvm;
@@ -35,7 +28,7 @@ struct FoldCandidate {
int FrameIndexToFold;
};
int ShrinkOpcode;
- unsigned char UseOpNo;
+ unsigned UseOpNo;
MachineOperand::MachineOperandType Kind;
bool Commuted;
@@ -129,6 +122,23 @@ char SIFoldOperands::ID = 0;
char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
+// Map multiply-accumulate opcode to corresponding multiply-add opcode if any.
+static unsigned macToMad(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::V_MAC_F32_e64:
+ return AMDGPU::V_MAD_F32_e64;
+ case AMDGPU::V_MAC_F16_e64:
+ return AMDGPU::V_MAD_F16_e64;
+ case AMDGPU::V_FMAC_F32_e64:
+ return AMDGPU::V_FMA_F32_e64;
+ case AMDGPU::V_FMAC_F16_e64:
+ return AMDGPU::V_FMA_F16_gfx9_e64;
+ case AMDGPU::V_FMAC_LEGACY_F32_e64:
+ return AMDGPU::V_FMA_LEGACY_F32_e64;
+ }
+ return AMDGPU::INSTRUCTION_LIST_END;
+}
+
// Wrapper around isInlineConstant that understands special cases when
// instruction types are replaced during operand folding.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
@@ -139,31 +149,18 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
return true;
unsigned Opc = UseMI.getOpcode();
- switch (Opc) {
- case AMDGPU::V_MAC_F32_e64:
- case AMDGPU::V_MAC_F16_e64:
- case AMDGPU::V_FMAC_F32_e64:
- case AMDGPU::V_FMAC_F16_e64: {
+ unsigned NewOpc = macToMad(Opc);
+ if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
// Special case for mac. Since this is replaced with mad when folded into
// src2, we need to check the legality for the final instruction.
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (static_cast<int>(OpNo) == Src2Idx) {
- bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
- Opc == AMDGPU::V_FMAC_F16_e64;
- bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
- Opc == AMDGPU::V_FMAC_F32_e64;
-
- unsigned Opc = IsFMA ?
- (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
- (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
- const MCInstrDesc &MadDesc = TII->get(Opc);
+ const MCInstrDesc &MadDesc = TII->get(NewOpc);
return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
}
- return false;
- }
- default:
- return false;
}
+
+ return false;
}
// TODO: Add heuristic that the frame index might not fit in the addressing mode
@@ -172,9 +169,23 @@ static bool frameIndexMayFold(const SIInstrInfo *TII,
const MachineInstr &UseMI,
int OpNo,
const MachineOperand &OpToFold) {
- return OpToFold.isFI() &&
- (TII->isMUBUF(UseMI) || TII->isFLATScratch(UseMI)) &&
- OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr);
+ if (!OpToFold.isFI())
+ return false;
+
+ if (TII->isMUBUF(UseMI))
+ return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
+ AMDGPU::OpName::vaddr);
+ if (!TII->isFLATScratch(UseMI))
+ return false;
+
+ int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
+ AMDGPU::OpName::saddr);
+ if (OpNo == SIdx)
+ return true;
+
+ int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
+ AMDGPU::OpName::vaddr);
+ return OpNo == VIdx && SIdx == -1;
}
FunctionPass *llvm::createSIFoldOperandsPass() {
@@ -282,9 +293,6 @@ static bool updateOperand(FoldCandidate &Fold,
assert(!Fold.needsShrink() && "not handled");
if (Fold.isImm()) {
- // FIXME: ChangeToImmediate should probably clear the subreg flags. It's
- // reinterpreted as TargetFlags.
- Old.setSubReg(0);
Old.ChangeToImmediate(Fold.ImmToFold);
return true;
}
@@ -335,17 +343,8 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
// Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
unsigned Opc = MI->getOpcode();
- if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
- (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
- bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
- Opc == AMDGPU::V_FMAC_F16_e64;
- bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
- Opc == AMDGPU::V_FMAC_F32_e64;
- unsigned NewOpc = IsFMA ?
- (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
- (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
-
+ unsigned NewOpc = macToMad(Opc);
+ if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
// Check if changing this to a v_mad_{f16, f32} instruction will allow us
// to fold the operand.
MI->setDesc(TII->get(NewOpc));
@@ -358,10 +357,17 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
}
// Special case for s_setreg_b32
- if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
- MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
- appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
- return true;
+ if (OpToFold->isImm()) {
+ unsigned ImmOpc = 0;
+ if (Opc == AMDGPU::S_SETREG_B32)
+ ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
+ else if (Opc == AMDGPU::S_SETREG_B32_mode)
+ ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
+ if (ImmOpc) {
+ MI->setDesc(TII->get(ImmOpc));
+ appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
+ return true;
+ }
}
// If we are already folding into another operand of MI, then
@@ -399,9 +405,9 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
return false;
if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
- if ((Opc == AMDGPU::V_ADD_I32_e64 ||
- Opc == AMDGPU::V_SUB_I32_e64 ||
- Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
+ if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
+ Opc == AMDGPU::V_SUB_CO_U32_e64 ||
+ Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME
(OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -463,7 +469,18 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
static bool isUseSafeToFold(const SIInstrInfo *TII,
const MachineInstr &MI,
const MachineOperand &UseMO) {
- return !UseMO.isUndef() && !TII->isSDWA(MI);
+ if (UseMO.isUndef() || TII->isSDWA(MI))
+ return false;
+
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ // Do not fold into an indirect mov.
+ return !MI.hasRegisterImplicitUseOperand(AMDGPU::M0);
+ }
+
+ return true;
//return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
}
@@ -528,11 +545,12 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
return false;
Register UseReg = OpToFold.getReg();
- if (!Register::isVirtualRegister(UseReg))
+ if (!UseReg.isVirtual())
return false;
- if (llvm::find_if(FoldList, [UseMI](const FoldCandidate &FC) {
- return FC.UseMI == UseMI; }) != FoldList.end())
+ if (llvm::any_of(FoldList, [UseMI](const FoldCandidate &FC) {
+ return FC.UseMI == UseMI;
+ }))
return false;
MachineRegisterInfo &MRI = UseMI->getParent()->getParent()->getRegInfo();
@@ -587,9 +605,9 @@ void SIFoldOperands::foldOperand(
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
- MachineRegisterInfo::use_iterator Next;
- for (MachineRegisterInfo::use_iterator
- RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
+ MachineRegisterInfo::use_nodbg_iterator Next;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ RSUse = MRI->use_nodbg_begin(RegSeqDstReg), RSE = MRI->use_nodbg_end();
RSUse != RSE; RSUse = Next) {
Next = std::next(RSUse);
@@ -616,25 +634,30 @@ void SIFoldOperands::foldOperand(
// Sanity check that this is a stack access.
// FIXME: Should probably use stack pseudos before frame lowering.
- if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
- MFI->getScratchRSrcReg())
- return;
+ if (TII->isMUBUF(*UseMI)) {
+ if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
+ MFI->getScratchRSrcReg())
+ return;
- // Ensure this is either relative to the current frame or the current wave.
- MachineOperand &SOff =
- *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
- if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) &&
- (!SOff.isImm() || SOff.getImm() != 0))
- return;
+ // Ensure this is either relative to the current frame or the current
+ // wave.
+ MachineOperand &SOff =
+ *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
+ if (!SOff.isImm() || SOff.getImm() != 0)
+ return;
+ }
// A frame index will resolve to a positive constant, so it should always be
// safe to fold the addressing mode, even pre-GFX9.
UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
- // If this is relative to the current wave, update it to be relative to the
- // current frame.
- if (SOff.isImm())
- SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false);
+ if (TII->isFLATScratch(*UseMI) &&
+ AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
+ AMDGPU::OpName::vaddr) != -1) {
+ unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
+ UseMI->setDesc(TII->get(NewOpc));
+ }
+
return;
}
@@ -643,42 +666,46 @@ void SIFoldOperands::foldOperand(
if (FoldingImmLike && UseMI->isCopy()) {
Register DestReg = UseMI->getOperand(0).getReg();
+ Register SrcReg = UseMI->getOperand(1).getReg();
+ assert(SrcReg.isVirtual());
- // Don't fold into a copy to a physical register. Doing so would interfere
- // with the register coalescer's logic which would avoid redundant
- // initalizations.
- if (DestReg.isPhysical())
- return;
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
- const TargetRegisterClass *DestRC = MRI->getRegClass(DestReg);
+ // Don't fold into a copy to a physical register with the same class. Doing
+ // so would interfere with the register coalescer's logic which would avoid
+ // redundant initalizations.
+ if (DestReg.isPhysical() && SrcRC->contains(DestReg))
+ return;
- Register SrcReg = UseMI->getOperand(1).getReg();
- if (SrcReg.isVirtual()) { // XXX - This can be an assert?
- const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
+ if (!DestReg.isPhysical()) {
if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
- MachineRegisterInfo::use_iterator NextUse;
+ MachineRegisterInfo::use_nodbg_iterator NextUse;
SmallVector<FoldCandidate, 4> CopyUses;
- for (MachineRegisterInfo::use_iterator
- Use = MRI->use_begin(DestReg), E = MRI->use_end();
- Use != E; Use = NextUse) {
+ for (MachineRegisterInfo::use_nodbg_iterator Use = MRI->use_nodbg_begin(DestReg),
+ E = MRI->use_nodbg_end();
+ Use != E; Use = NextUse) {
NextUse = std::next(Use);
- FoldCandidate FC = FoldCandidate(Use->getParent(),
- Use.getOperandNo(), &UseMI->getOperand(1));
+ // There's no point trying to fold into an implicit operand.
+ if (Use->isImplicit())
+ continue;
+
+ FoldCandidate FC = FoldCandidate(Use->getParent(), Use.getOperandNo(),
+ &UseMI->getOperand(1));
CopyUses.push_back(FC);
- }
- for (auto & F : CopyUses) {
- foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
- FoldList, CopiesToReplace);
+ }
+ for (auto &F : CopyUses) {
+ foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace);
}
}
- }
- if (DestRC == &AMDGPU::AGPR_32RegClass &&
- TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
- UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
- CopiesToReplace.push_back(UseMI);
- return;
+ if (DestRC == &AMDGPU::AGPR_32RegClass &&
+ TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
+ UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
+ UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+ CopiesToReplace.push_back(UseMI);
+ return;
+ }
}
// In order to fold immediates into copies, we need to change the
@@ -738,7 +765,7 @@ void SIFoldOperands::foldOperand(
auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
BuildMI(MBB, UseMI, DL,
- TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), Tmp).addImm(Imm);
+ TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addImm(Imm);
B.addReg(Tmp);
} else if (Def->isReg() && TRI->isAGPR(*MRI, Def->getReg())) {
auto Src = getRegSubRegPair(*Def);
@@ -780,7 +807,7 @@ void SIFoldOperands::foldOperand(
}
auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
BuildMI(MBB, UseMI, DL,
- TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), Tmp).addReg(Vgpr);
+ TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addReg(Vgpr);
B.addReg(Tmp);
}
@@ -794,10 +821,10 @@ void SIFoldOperands::foldOperand(
return;
if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()))
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
+ UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32));
+ UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64));
return;
}
@@ -819,8 +846,6 @@ void SIFoldOperands::foldOperand(
UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
- // FIXME: ChangeToImmediate should clear subreg
- UseMI->getOperand(1).setSubReg(0);
if (OpToFold.isImm())
UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
else
@@ -991,8 +1016,7 @@ static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI,
MachineOperand &Op) {
if (Op.isReg()) {
// If this has a subregister, it obviously is a register source.
- if (Op.getSubReg() != AMDGPU::NoSubRegister ||
- !Register::isVirtualRegister(Op.getReg()))
+ if (Op.getSubReg() != AMDGPU::NoSubRegister || !Op.getReg().isVirtual())
return &Op;
MachineInstr *Def = MRI.getVRegDef(Op.getReg());
@@ -1032,25 +1056,6 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
if (!Src0->isImm() && !Src1->isImm())
return false;
- if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32 ||
- MI->getOpcode() == AMDGPU::V_LSHL_ADD_U32 ||
- MI->getOpcode() == AMDGPU::V_AND_OR_B32) {
- if (Src0->isImm() && Src0->getImm() == 0) {
- // v_lshl_or_b32 0, X, Y -> copy Y
- // v_lshl_or_b32 0, X, K -> v_mov_b32 K
- // v_lshl_add_b32 0, X, Y -> copy Y
- // v_lshl_add_b32 0, X, K -> v_mov_b32 K
- // v_and_or_b32 0, X, Y -> copy Y
- // v_and_or_b32 0, X, K -> v_mov_b32 K
- bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
- MI->RemoveOperand(Src1Idx);
- MI->RemoveOperand(Src0Idx);
-
- MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
- return true;
- }
- }
-
// and k0, k1 -> v_mov_b32 (k0 & k1)
// or k0, k1 -> v_mov_b32 (k0 | k1)
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
@@ -1178,9 +1183,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
MachineOperand *NonInlineUse = nullptr;
int NonInlineUseOpNo = -1;
- MachineRegisterInfo::use_iterator NextUse;
- for (MachineRegisterInfo::use_iterator
- Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
+ MachineRegisterInfo::use_nodbg_iterator NextUse;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ Use = MRI->use_nodbg_begin(Dst.getReg()), E = MRI->use_nodbg_end();
Use != E; Use = NextUse) {
NextUse = std::next(Use);
MachineInstr *UseMI = Use->getParent();
@@ -1202,7 +1207,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
// instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
// again. The same constant folded instruction could also have a second
// use operand.
- NextUse = MRI->use_begin(Dst.getReg());
+ NextUse = MRI->use_nodbg_begin(Dst.getReg());
FoldList.clear();
continue;
}
@@ -1241,9 +1246,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
}
} else {
// Folding register.
- SmallVector <MachineRegisterInfo::use_iterator, 4> UsesToProcess;
- for (MachineRegisterInfo::use_iterator
- Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
+ SmallVector <MachineRegisterInfo::use_nodbg_iterator, 4> UsesToProcess;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ Use = MRI->use_nodbg_begin(Dst.getReg()), E = MRI->use_nodbg_end();
Use != E; ++Use) {
UsesToProcess.push_back(Use);
}
@@ -1260,9 +1265,12 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
for (MachineInstr *Copy : CopiesToReplace)
Copy->addImplicitDefUseOperands(*MF);
+ SmallPtrSet<MachineInstr *, 16> Folded;
for (FoldCandidate &Fold : FoldList) {
assert(!Fold.isReg() || Fold.OpToFold);
- if (Fold.isReg() && Register::isVirtualRegister(Fold.OpToFold->getReg())) {
+ if (Folded.count(Fold.UseMI))
+ continue;
+ if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
Register Reg = Fold.OpToFold->getReg();
MachineInstr *DefMI = Fold.OpToFold->getParent();
if (DefMI->readsRegister(AMDGPU::EXEC, TRI) &&
@@ -1281,7 +1289,8 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
<< static_cast<int>(Fold.UseOpNo) << " of "
<< *Fold.UseMI << '\n');
- tryFoldInst(TII, Fold.UseMI);
+ if (tryFoldInst(TII, Fold.UseMI))
+ Folded.insert(Fold.UseMI);
} else if (Fold.isCommuted()) {
// Restoring instruction's original operand order if fold has failed.
TII->commuteInstruction(*Fold.UseMI, false);
@@ -1296,7 +1305,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
switch (Op) {
case AMDGPU::V_MAX_F32_e64:
case AMDGPU::V_MAX_F16_e64:
- case AMDGPU::V_MAX_F64:
+ case AMDGPU::V_MAX_F64_e64:
case AMDGPU::V_PK_MAX_F16: {
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
return nullptr;
@@ -1557,7 +1566,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (!FoldingImm && !OpToFold.isReg())
continue;
- if (OpToFold.isReg() && !Register::isVirtualRegister(OpToFold.getReg()))
+ if (OpToFold.isReg() && !OpToFold.getReg().isVirtual())
continue;
// Prevent folding operands backwards in the function. For example,
@@ -1567,7 +1576,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// ...
// %vgpr0 = V_MOV_B32_e32 1, implicit %exec
MachineOperand &Dst = MI.getOperand(0);
- if (Dst.isReg() && !Register::isVirtualRegister(Dst.getReg()))
+ if (Dst.isReg() && !Dst.getReg().isVirtual())
continue;
foldInstOperand(MI, OpToFold);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 8ef02e73865d..a12e013b4fe6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -14,15 +14,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
#include "GCNRegPressure.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -60,9 +53,14 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
private:
template <typename Callable>
- void forAllLanes(unsigned Reg, LaneBitmask LaneMask, Callable Func) const;
+ void forAllLanes(Register Reg, LaneBitmask LaneMask, Callable Func) const;
bool canBundle(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT);
@@ -145,15 +143,15 @@ static unsigned getMopState(const MachineOperand &MO) {
S |= RegState::Kill;
if (MO.isEarlyClobber())
S |= RegState::EarlyClobber;
- if (Register::isPhysicalRegister(MO.getReg()) && MO.isRenamable())
+ if (MO.getReg().isPhysical() && MO.isRenamable())
S |= RegState::Renamable;
return S;
}
template <typename Callable>
-void SIFormMemoryClauses::forAllLanes(unsigned Reg, LaneBitmask LaneMask,
+void SIFormMemoryClauses::forAllLanes(Register Reg, LaneBitmask LaneMask,
Callable Func) const {
- if (LaneMask.all() || Register::isPhysicalRegister(Reg) ||
+ if (LaneMask.all() || Reg.isPhysical() ||
LaneMask == MRI->getMaxLaneMaskForVReg(Reg)) {
Func(0);
return;
@@ -228,7 +226,7 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI,
if (Conflict == Map.end())
continue;
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
return false;
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
@@ -270,7 +268,7 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
if (!Reg)
continue;
- LaneBitmask Mask = Register::isVirtualRegister(Reg)
+ LaneBitmask Mask = Reg.isVirtual()
? TRI->getSubRegIndexLaneMask(MO.getSubReg())
: LaneBitmask::getAll();
RegUse &Map = MO.isDef() ? Defs : Uses;
@@ -324,6 +322,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
for (MachineBasicBlock &MBB : MF) {
+ GCNDownwardRPTracker RPT(*LIS);
MachineBasicBlock::instr_iterator Next;
for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
MachineInstr &MI = *I;
@@ -334,12 +333,19 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
if (!isValidClauseInst(MI, IsVMEM))
continue;
- RegUse Defs, Uses;
- GCNDownwardRPTracker RPT(*LIS);
- RPT.reset(MI);
+ if (!RPT.getNext().isValid())
+ RPT.reset(MI);
+ else { // Advance the state to the current MI.
+ RPT.advance(MachineBasicBlock::const_iterator(MI));
+ RPT.advanceBeforeNext();
+ }
- if (!processRegUses(MI, Defs, Uses, RPT))
+ const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getLiveRegs());
+ RegUse Defs, Uses;
+ if (!processRegUses(MI, Defs, Uses, RPT)) {
+ RPT.reset(MI, &LiveRegsCopy);
continue;
+ }
unsigned Length = 1;
for ( ; Next != E && Length < FuncMaxClause; ++Next) {
@@ -354,8 +360,10 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
++Length;
}
- if (Length < 2)
+ if (Length < 2) {
+ RPT.reset(MI, &LiveRegsCopy);
continue;
+ }
Changed = true;
MFI->limitOccupancy(LastRecordedOccupancy);
@@ -363,6 +371,9 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
auto B = BuildMI(MBB, I, DebugLoc(), TII->get(TargetOpcode::BUNDLE));
Ind->insertMachineInstrInMaps(*B);
+ // Restore the state after processing the bundle.
+ RPT.reset(*B, &LiveRegsCopy);
+
for (auto BI = I; BI != Next; ++BI) {
BI->bundleWithPred();
Ind->removeSingleMachineInstrFromMaps(*BI);
@@ -388,17 +399,17 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
}
for (auto &&R : Defs) {
- unsigned Reg = R.first;
+ Register Reg = R.first;
Uses.erase(Reg);
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
continue;
LIS->removeInterval(Reg);
LIS->createAndComputeVirtRegInterval(Reg);
}
for (auto &&R : Uses) {
- unsigned Reg = R.first;
- if (Register::isPhysicalRegister(Reg))
+ Register Reg = R.first;
+ if (Reg.isPhysical())
continue;
LIS->removeInterval(Reg);
LIS->createAndComputeVirtRegInterval(Reg);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index a2e802009d09..0398d27756db 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -7,17 +7,14 @@
//==-----------------------------------------------------------------------===//
#include "SIFrameLowering.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -112,15 +109,19 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
// 3: There's no free lane to spill, and no free register to save FP/BP,
// so we're forced to spill another VGPR to use for the spill.
FrameIndex = NewFI;
+
+ LLVM_DEBUG(
+ auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+ dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
+ << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
} else {
+ // Remove dead <NewFI> index
+ MF.getFrameInfo().RemoveStackObject(NewFI);
// 4: If all else fails, spill the FP/BP to memory.
FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
+ LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
+ << (IsFP ? "FP" : "BP") << '\n');
}
-
- LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
- dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
- << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
- << '\n';);
} else {
LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
<< printReg(TempSGPR, TRI) << '\n');
@@ -130,7 +131,8 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
// We need to specially emit stack operations here because a different frame
// register is used than in the rest of the function, as getFrameRegister would
// use.
-static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
+static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
+ MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const SIInstrInfo *TII, Register SpillReg,
Register ScratchRsrcReg, Register SPReg, int FI) {
@@ -143,7 +145,19 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
MFI.getObjectAlign(FI));
- if (isUInt<12>(Offset)) {
+ if (ST.enableFlatScratch()) {
+ if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR))
+ .addReg(SpillReg, RegState::Kill)
+ .addReg(SPReg)
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // dlc
+ .addMemOperand(MMO);
+ return;
+ }
+ } else if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) {
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
.addReg(SpillReg, RegState::Kill)
.addReg(ScratchRsrcReg)
@@ -158,27 +172,52 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
return;
}
- MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
- MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
+ // Don't clobber the TmpVGPR if we also need a scratch reg for the stack
+ // offset in the spill.
+ LiveRegs.addReg(SpillReg);
- BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
- .addImm(Offset);
+ if (ST.enableFlatScratch()) {
+ MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
+ MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass);
- BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
- .addReg(SpillReg, RegState::Kill)
- .addReg(OffsetReg, RegState::Kill)
- .addReg(ScratchRsrcReg)
- .addReg(SPReg)
- .addImm(0)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .addImm(0) // dlc
- .addImm(0) // swz
- .addMemOperand(MMO);
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg)
+ .addReg(SPReg)
+ .addImm(Offset);
+
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR))
+ .addReg(SpillReg, RegState::Kill)
+ .addReg(OffsetReg, RegState::Kill)
+ .addImm(0)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // dlc
+ .addMemOperand(MMO);
+ } else {
+ MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
+ MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
+ .addImm(Offset);
+
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
+ .addReg(SpillReg, RegState::Kill)
+ .addReg(OffsetReg, RegState::Kill)
+ .addReg(ScratchRsrcReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addImm(0) // dlc
+ .addImm(0) // swz
+ .addMemOperand(MMO);
+ }
+
+ LiveRegs.removeReg(SpillReg);
}
-static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
+static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
+ MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const SIInstrInfo *TII, Register SpillReg,
Register ScratchRsrcReg, Register SPReg, int FI) {
@@ -190,7 +229,36 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
MFI.getObjectAlign(FI));
- if (isUInt<12>(Offset)) {
+ if (ST.enableFlatScratch()) {
+ if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+ BuildMI(MBB, I, DebugLoc(),
+ TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg)
+ .addReg(SPReg)
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // dlc
+ .addMemOperand(MMO);
+ return;
+ }
+ MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
+ MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass);
+
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg)
+ .addReg(SPReg)
+ .addImm(Offset);
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR),
+ SpillReg)
+ .addReg(OffsetReg, RegState::Kill)
+ .addImm(0)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // dlc
+ .addMemOperand(MMO);
+ return;
+ }
+
+ if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) {
BuildMI(MBB, I, DebugLoc(),
TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
.addReg(ScratchRsrcReg)
@@ -225,6 +293,31 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
.addMemOperand(MMO);
}
+static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ const DebugLoc &DL, const SIInstrInfo *TII,
+ Register TargetReg) {
+ MachineFunction *MF = MBB.getParent();
+ const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
+ Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
+ Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
+
+ if (MFI->getGITPtrHigh() != 0xffffffff) {
+ BuildMI(MBB, I, DL, SMovB32, TargetHi)
+ .addImm(MFI->getGITPtrHigh())
+ .addReg(TargetReg, RegState::ImplicitDefine);
+ } else {
+ const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
+ BuildMI(MBB, I, DL, GetPC64, TargetReg);
+ }
+ Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
+ MF->getRegInfo().addLiveIn(GitPtrLo);
+ MBB.addLiveIn(GitPtrLo);
+ BuildMI(MBB, I, DL, SMovB32, TargetLo)
+ .addReg(GitPtrLo);
+}
+
// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
void SIFrameLowering::emitEntryFunctionFlatScratchInit(
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -244,15 +337,74 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
// pointer. Because we only detect if flat instructions are used at all,
// this will be used more often than necessary on VI.
- Register FlatScratchInitReg =
- MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
+ Register FlatScrInitLo;
+ Register FlatScrInitHi;
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MRI.addLiveIn(FlatScratchInitReg);
- MBB.addLiveIn(FlatScratchInitReg);
+ if (ST.isAmdPalOS()) {
+ // Extract the scratch offset from the descriptor in the GIT
+ LivePhysRegs LiveRegs;
+ LiveRegs.init(*TRI);
+ LiveRegs.addLiveIns(MBB);
+
+ // Find unused reg to load flat scratch init into
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register FlatScrInit = AMDGPU::NoRegister;
+ ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
+ unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
+ AllSGPR64s = AllSGPR64s.slice(
+ std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
+ Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
+ for (MCPhysReg Reg : AllSGPR64s) {
+ if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
+ !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
+ FlatScrInit = Reg;
+ break;
+ }
+ }
+ assert(FlatScrInit && "Failed to find free register for scratch init");
+
+ FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
+ FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
- Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
- Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
+ buildGitPtr(MBB, I, DL, TII, FlatScrInit);
+
+ // We now have the GIT ptr - now get the scratch descriptor from the entry
+ // at offset 0 (or offset 16 for a compute shader).
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
+ const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
+ auto *MMO = MF.getMachineMemOperand(
+ PtrInfo,
+ MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable,
+ 8, Align(4));
+ unsigned Offset =
+ MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
+ const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
+ unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
+ BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
+ .addReg(FlatScrInit)
+ .addImm(EncodedOffset) // offset
+ .addImm(0) // glc
+ .addImm(0) // dlc
+ .addMemOperand(MMO);
+
+ // Mask the offset in [47:0] of the descriptor
+ const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
+ BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
+ .addReg(FlatScrInitHi)
+ .addImm(0xffff);
+ } else {
+ Register FlatScratchInitReg =
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
+ assert(FlatScratchInitReg);
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.addLiveIn(FlatScratchInitReg);
+ MBB.addLiveIn(FlatScratchInitReg);
+
+ FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
+ FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
+ }
// Do a 64-bit pointer add.
if (ST.flatScratchIsPointer()) {
@@ -274,6 +426,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
return;
}
+ // For GFX9.
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
@@ -284,7 +437,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
return;
}
- assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
+ assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
// Copy the size in bytes.
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
@@ -302,6 +455,18 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
.addImm(8);
}
+// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
+// memory. They should have been removed by now.
+static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
+ for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
+ I != E; ++I) {
+ if (!MFI.isDeadObjectIndex(I))
+ return false;
+ }
+
+ return true;
+}
+
// Shift down registers reserved for the scratch RSRC.
Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
MachineFunction &MF) const {
@@ -316,7 +481,8 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
Register ScratchRsrcReg = MFI->getScratchRSrcReg();
- if (!ScratchRsrcReg || !MRI.isPhysRegUsed(ScratchRsrcReg))
+ if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
+ allStackObjectsAreDead(MF.getFrameInfo())))
return Register();
if (ST.hasSGPRInitBug() ||
@@ -354,6 +520,10 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
return ScratchRsrcReg;
}
+static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
+ return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
+}
+
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
@@ -390,7 +560,9 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
//
// This will return `Register()` in cases where there are no actual
// uses of the SRSRC.
- Register ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
+ Register ScratchRsrcReg;
+ if (!ST.enableFlatScratch())
+ ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
// Make the selected register live throughout the function.
if (ScratchRsrcReg) {
@@ -446,11 +618,11 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
assert(ScratchWaveOffsetReg);
- if (MF.getFrameInfo().hasCalls()) {
+ if (requiresStackPointerReference(MF)) {
Register SPReg = MFI->getStackPtrOffsetReg();
assert(SPReg != AMDGPU::SP_REG);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
- .addImm(MF.getFrameInfo().getStackSize() * ST.getWavefrontSize());
+ .addImm(MF.getFrameInfo().getStackSize() * getScratchScaleFactor(ST));
}
if (hasFP(MF)) {
@@ -490,26 +662,9 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
if (ST.isAmdPalOS()) {
// The pointer to the GIT is formed from the offset passed in and either
// the amdgpu-git-ptr-high function attribute or the top part of the PC
- Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
- const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
-
- if (MFI->getGITPtrHigh() != 0xffffffff) {
- BuildMI(MBB, I, DL, SMovB32, RsrcHi)
- .addImm(MFI->getGITPtrHigh())
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
- } else {
- const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
- BuildMI(MBB, I, DL, GetPC64, Rsrc01);
- }
- Register GitPtrLo = MFI->getGITPtrLoReg(MF);
- MF.getRegInfo().addLiveIn(GitPtrLo);
- MBB.addLiveIn(GitPtrLo);
- BuildMI(MBB, I, DL, SMovB32, RsrcLo)
- .addReg(GitPtrLo)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ buildGitPtr(MBB, I, DL, TII, Rsrc01);
// We now have the GIT ptr - now get the scratch descriptor from the entry
// at offset 0 (or offset 16 for a compute shader).
@@ -629,7 +784,7 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
case TargetStackID::NoAlloc:
case TargetStackID::SGPRSpill:
return true;
- case TargetStackID::SVEVector:
+ case TargetStackID::ScalableVector:
return false;
}
llvm_unreachable("Invalid TargetStackID::Value");
@@ -769,7 +924,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (!ScratchExecCopy)
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
- buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
+ buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR,
FuncInfo->getScratchRSrcReg(),
StackPtrReg,
Reg.FI.getValue());
@@ -787,7 +942,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(FramePtrReg);
- buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR,
+ buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR,
FuncInfo->getScratchRSrcReg(), StackPtrReg,
FuncInfo->FramePointerSaveIndex.getValue());
}
@@ -804,7 +959,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(BasePtrReg);
- buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR,
+ buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR,
FuncInfo->getScratchRSrcReg(), StackPtrReg,
*FuncInfo->BasePointerSaveIndex);
}
@@ -830,8 +985,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// Save FP before setting it up.
// FIXME: This should respect spillSGPRToVGPR;
- BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
- Spill[0].VGPR)
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
.addReg(FramePtrReg)
.addImm(Spill[0].Lane)
.addReg(Spill[0].VGPR, RegState::Undef);
@@ -849,8 +1003,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// Save BP before setting it up.
// FIXME: This should respect spillSGPRToVGPR;
- BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
- Spill[0].VGPR)
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
.addReg(BasePtrReg)
.addImm(Spill[0].Lane)
.addReg(Spill[0].VGPR, RegState::Undef);
@@ -877,11 +1030,11 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// s_and_b32 s32, tmp_reg, 0b111...0000
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
.addReg(StackPtrReg)
- .addImm((Alignment - 1) * ST.getWavefrontSize())
+ .addImm((Alignment - 1) * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
.addReg(ScratchSPReg, RegState::Kill)
- .addImm(-Alignment * ST.getWavefrontSize())
+ .addImm(-Alignment * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
FuncInfo->setIsStackRealigned(true);
} else if ((HasFP = hasFP(MF))) {
@@ -903,7 +1056,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP && RoundedSize != 0) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
.addReg(StackPtrReg)
- .addImm(RoundedSize * ST.getWavefrontSize())
+ .addImm(RoundedSize * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
}
@@ -965,7 +1118,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
if (RoundedSize != 0 && hasFP(MF)) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
.addReg(StackPtrReg)
- .addImm(RoundedSize * ST.getWavefrontSize())
+ .addImm(RoundedSize * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameDestroy);
}
@@ -991,7 +1144,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
- buildEpilogReload(LiveRegs, MBB, MBBI, TII, TempVGPR,
+ buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR,
FuncInfo->getScratchRSrcReg(), StackPtrReg, FI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
.addReg(TempVGPR, RegState::Kill);
@@ -1001,8 +1154,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
FuncInfo->getSGPRToVGPRSpills(FI);
assert(Spill.size() == 1);
- BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
- FramePtrReg)
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
.addReg(Spill[0].VGPR)
.addImm(Spill[0].Lane);
}
@@ -1017,7 +1169,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
- buildEpilogReload(LiveRegs, MBB, MBBI, TII, TempVGPR,
+ buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR,
FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
.addReg(TempVGPR, RegState::Kill);
@@ -1027,8 +1179,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
assert(Spill.size() == 1);
- BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
- BasePtrReg)
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
.addReg(Spill[0].VGPR)
.addImm(Spill[0].Lane);
}
@@ -1042,7 +1193,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
if (!ScratchExecCopy)
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
- buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
+ buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR,
FuncInfo->getScratchRSrcReg(), StackPtrReg,
Reg.FI.getValue());
}
@@ -1056,28 +1207,16 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
-// memory. They should have been removed by now.
-static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
- for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
- I != E; ++I) {
- if (!MFI.isDeadObjectIndex(I))
- return false;
- }
-
- return true;
-}
-
#ifndef NDEBUG
-static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
- Optional<int> FramePointerSaveIndex,
- Optional<int> BasePointerSaveIndex) {
+static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
I != E; ++I) {
if (!MFI.isDeadObjectIndex(I) &&
MFI.getStackID(I) == TargetStackID::SGPRSpill &&
- ((FramePointerSaveIndex && I != FramePointerSaveIndex) ||
- (BasePointerSaveIndex && I != BasePointerSaveIndex))) {
+ (I != FuncInfo->FramePointerSaveIndex &&
+ I != FuncInfo->BasePointerSaveIndex)) {
return false;
}
}
@@ -1086,12 +1225,13 @@ static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
}
#endif
-int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
+StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ Register &FrameReg) const {
const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
FrameReg = RI->getFrameRegister(MF);
- return MF.getFrameInfo().getObjectOffset(FI);
+ return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
}
void SIFrameLowering::processFunctionBeforeFrameFinalized(
@@ -1104,7 +1244,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
FuncInfo->removeDeadFrameIndices(MFI);
- assert(allSGPRSpillsAreDead(MFI, None, None) &&
+ assert(allSGPRSpillsAreDead(MF) &&
"SGPR spill should have been removed in SILowerSGPRSpills");
// FIXME: The other checks should be redundant with allStackObjectsAreDead,
@@ -1163,6 +1303,8 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
LiveRegs.init(*TRI);
if (WillHaveFP || hasFP(MF)) {
+ assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&
+ "Re-reserving spill slot for FP");
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
MFI->FramePointerSaveIndex, true);
}
@@ -1170,6 +1312,9 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (TRI->hasBasePointer(MF)) {
if (MFI->SGPRForFPSaveRestoreCopy)
LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
+
+ assert(!MFI->SGPRForBPSaveRestoreCopy &&
+ !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
MFI->BasePointerSaveIndex, false);
}
@@ -1188,7 +1333,21 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
// The SP is specifically managed and we don't want extra spills of it.
SavedRegs.reset(MFI->getStackPtrOffsetReg());
+
+ const BitVector AllSavedRegs = SavedRegs;
SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
+
+ // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
+ const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
+
+ // We have to anticipate introducing CSR VGPR spills if we don't have any
+ // stack objects already, since we require an FP if there is a call and stack.
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR;
+
+ // FP will be specially managed like SP.
+ if (WillHaveFP || hasFP(MF))
+ SavedRegs.reset(MFI->getFrameOffsetReg());
}
bool SIFrameLowering::assignCalleeSavedSpillSlots(
@@ -1253,7 +1412,7 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
BuildMI(MBB, I, DL, TII->get(Op), SPReg)
.addReg(SPReg)
- .addImm(Amount * ST.getWavefrontSize());
+ .addImm(Amount * getScratchScaleFactor(ST));
} else if (CalleePopAmount != 0) {
llvm_unreachable("is this used?");
}
@@ -1261,6 +1420,20 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
return MBB.erase(I);
}
+/// Returns true if the frame will require a reference to the stack pointer.
+///
+/// This is the set of conditions common to setting up the stack pointer in a
+/// kernel, and for using a frame pointer in a callable function.
+///
+/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
+/// references SP.
+static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
+ return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
+}
+
+// The FP for kernels is always known 0, so we never really need to setup an
+// explicit register for it. However, DisableFramePointerElim will force us to
+// use a register for it.
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1276,8 +1449,31 @@ bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
return MFI.getStackSize() != 0;
}
- return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
- MFI.hasStackMap() || MFI.hasPatchPoint() ||
+ return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
MF.getTarget().Options.DisableFramePointerElim(MF);
}
+
+// This is essentially a reduced version of hasFP for entry functions. Since the
+// stack pointer is known 0 on entry to kernels, we never really need an FP
+// register. We may need to initialize the stack pointer depending on the frame
+// properties, which logically overlaps many of the cases where an ordinary
+// function would require an FP.
+bool SIFrameLowering::requiresStackPointerReference(
+ const MachineFunction &MF) const {
+ // Callable functions always require a stack pointer reference.
+ assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
+ "only expected to call this for entry points");
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Entry points ordinarily don't need to initialize SP. We have to set it up
+ // for callees if there are any. Also note tail calls are impossible/don't
+ // make any sense for kernels.
+ if (MFI.hasCalls())
+ return true;
+
+ // We still need to initialize the SP if we're doing anything weird that
+ // references the SP, like variable sized stack objects.
+ return frameTriviallyRequiresSP(MFI);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index e89432040661..951ea79b2809 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -31,8 +31,8 @@ public:
MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
@@ -71,6 +71,8 @@ private:
public:
bool hasFP(const MachineFunction &MF) const override;
+
+ bool requiresStackPointerReference(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d035aa8f72bd..839437b5e3f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13,72 +13,21 @@
#include "SIISelLowering.h"
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/DAGCombine.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetCallingConv.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CodeGen.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
-#include <cassert>
-#include <cmath>
-#include <cstdint>
-#include <iterator>
-#include <tuple>
-#include <utility>
-#include <vector>
using namespace llvm;
@@ -449,6 +398,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::f16, Custom);
setOperationAction(ISD::FEXP, MVT::f16, Custom);
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
@@ -486,6 +436,19 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasBFE())
setHasExtractBitsInsn(true);
+ // Clamp modifier on add/sub
+ if (Subtarget->hasIntClamp()) {
+ setOperationAction(ISD::UADDSAT, MVT::i32, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::i32, Legal);
+ }
+
+ if (Subtarget->hasAddNoCarry()) {
+ setOperationAction(ISD::SADDSAT, MVT::i16, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::i16, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
+ }
+
setOperationAction(ISD::FMINNUM, MVT::f32, Custom);
setOperationAction(ISD::FMAXNUM, MVT::f32, Custom);
setOperationAction(ISD::FMINNUM, MVT::f64, Custom);
@@ -531,13 +494,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Promote);
AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32);
- setOperationAction(ISD::ROTR, MVT::i16, Promote);
- setOperationAction(ISD::ROTL, MVT::i16, Promote);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
setOperationAction(ISD::SDIV, MVT::i16, Promote);
setOperationAction(ISD::UDIV, MVT::i16, Promote);
setOperationAction(ISD::SREM, MVT::i16, Promote);
setOperationAction(ISD::UREM, MVT::i16, Promote);
+ setOperationAction(ISD::UADDSAT, MVT::i16, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::i16, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i16, Promote);
@@ -702,6 +667,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMAX, MVT::v2i16, Legal);
setOperationAction(ISD::UMAX, MVT::v2i16, Legal);
+ setOperationAction(ISD::UADDSAT, MVT::v2i16, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v2i16, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v2i16, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v2i16, Legal);
+
setOperationAction(ISD::FADD, MVT::v2f16, Legal);
setOperationAction(ISD::FMUL, MVT::v2f16, Legal);
setOperationAction(ISD::FMA, MVT::v2f16, Legal);
@@ -729,6 +699,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v4i16, Custom);
setOperationAction(ISD::UMAX, MVT::v4i16, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v4i16, Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v4i16, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v4i16, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v4i16, Custom);
+
setOperationAction(ISD::FADD, MVT::v4f16, Custom);
setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
setOperationAction(ISD::FMA, MVT::v4f16, Custom);
@@ -779,6 +754,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2i16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v3f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v3i16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4i16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
@@ -790,6 +767,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v3i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v3f16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v4i16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::f16, Custom);
@@ -844,6 +823,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN);
setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
// FIXME: In other contexts we pretend this is a per-function property.
setStackPointerRegisterToSaveRestore(AMDGPU::SGPR32);
@@ -888,15 +869,18 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
if (VT.isVector()) {
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
- if (Size == 32)
- return ScalarVT.getSimpleVT();
+ if (Size == 16) {
+ if (Subtarget->has16BitInsts())
+ return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ return VT.isInteger() ? MVT::i32 : MVT::f32;
+ }
- if (Size > 32)
- return MVT::i32;
+ if (Size < 16)
+ return Subtarget->has16BitInsts() ? MVT::i16 : MVT::i32;
+ return Size == 32 ? ScalarVT.getSimpleVT() : MVT::i32;
+ }
- if (Size == 16 && Subtarget->has16BitInsts())
- return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
- } else if (VT.getSizeInBits() > 32)
+ if (VT.getSizeInBits() > 32)
return MVT::i32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
@@ -913,14 +897,15 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
- if (Size == 32)
+ // FIXME: Should probably promote 8-bit vectors to i16.
+ if (Size == 16 && Subtarget->has16BitInsts())
+ return (NumElts + 1) / 2;
+
+ if (Size <= 32)
return NumElts;
if (Size > 32)
return NumElts * ((Size + 31) / 32);
-
- if (Size == 16 && Subtarget->has16BitInsts())
- return (NumElts + 1) / 2;
} else if (VT.getSizeInBits() > 32)
return (VT.getSizeInBits() + 31) / 32;
@@ -935,6 +920,16 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
unsigned NumElts = VT.getVectorNumElements();
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
+ // FIXME: We should fix the ABI to be the same on targets without 16-bit
+ // support, but unless we can properly handle 3-vectors, it will be still be
+ // inconsistent.
+ if (Size == 16 && Subtarget->has16BitInsts()) {
+ RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ IntermediateVT = RegisterVT;
+ NumIntermediates = (NumElts + 1) / 2;
+ return NumIntermediates;
+ }
+
if (Size == 32) {
RegisterVT = ScalarVT.getSimpleVT();
IntermediateVT = RegisterVT;
@@ -942,20 +937,26 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
return NumIntermediates;
}
- if (Size > 32) {
+ if (Size < 16 && Subtarget->has16BitInsts()) {
+ // FIXME: Should probably form v2i16 pieces
+ RegisterVT = MVT::i16;
+ IntermediateVT = ScalarVT;
+ NumIntermediates = NumElts;
+ return NumIntermediates;
+ }
+
+
+ if (Size != 16 && Size <= 32) {
RegisterVT = MVT::i32;
- IntermediateVT = RegisterVT;
- NumIntermediates = NumElts * ((Size + 31) / 32);
+ IntermediateVT = ScalarVT;
+ NumIntermediates = NumElts;
return NumIntermediates;
}
- // FIXME: We should fix the ABI to be the same on targets without 16-bit
- // support, but unless we can properly handle 3-vectors, it will be still be
- // inconsistent.
- if (Size == 16 && Subtarget->has16BitInsts()) {
- RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ if (Size > 32) {
+ RegisterVT = MVT::i32;
IntermediateVT = RegisterVT;
- NumIntermediates = (NumElts + 1) / 2;
+ NumIntermediates = NumElts * ((Size + 31) / 32);
return NumIntermediates;
}
}
@@ -1007,14 +1008,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
if (RsrcIntr->IsImage) {
- Info.ptrVal = MFI->getImagePSV(
- *MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
- CI.getArgOperand(RsrcIntr->RsrcArg));
+ Info.ptrVal =
+ MFI->getImagePSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
Info.align.reset();
} else {
- Info.ptrVal = MFI->getBufferPSV(
- *MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
- CI.getArgOperand(RsrcIntr->RsrcArg));
+ Info.ptrVal =
+ MFI->getBufferPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
}
Info.flags = MachineMemOperand::MODereferenceable;
@@ -1056,8 +1055,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags |= MachineMemOperand::MOStore;
} else {
// Atomic
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(CI.getType());
+ Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
+ ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable;
@@ -1091,11 +1091,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_buffer_atomic_fadd: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- Info.opc = ISD::INTRINSIC_VOID;
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
- Info.ptrVal = MFI->getBufferPSV(
- *MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
- CI.getArgOperand(1));
+ Info.ptrVal =
+ MFI->getBufferPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
Info.align.reset();
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
@@ -1105,16 +1104,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
- case Intrinsic::amdgcn_global_atomic_fadd: {
- Info.opc = ISD::INTRINSIC_VOID;
- Info.memVT = MVT::getVT(CI.getOperand(0)->getType()
- ->getPointerElementType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
-
- return true;
- }
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
@@ -1136,10 +1125,31 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align.reset();
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
+ case Intrinsic::amdgcn_global_atomic_fadd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = CI.getOperand(0);
+ Info.align.reset();
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile;
return true;
}
+ case Intrinsic::amdgcn_image_bvh_intersect_ray: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
+ Info.ptrVal =
+ MFI->getImagePSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
+ Info.align.reset();
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable;
+ return true;
+ }
case Intrinsic::amdgcn_ds_gws_init:
case Intrinsic::amdgcn_ds_gws_barrier:
case Intrinsic::amdgcn_ds_gws_sema_v:
@@ -1175,9 +1185,13 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax: {
+ case Intrinsic::amdgcn_ds_fmax:
+ case Intrinsic::amdgcn_global_atomic_fadd:
+ case Intrinsic::amdgcn_global_atomic_csub: {
Value *Ptr = II->getArgOperand(0);
AccessTy = II->getType();
Ops.push_back(Ptr);
@@ -1234,7 +1248,7 @@ bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
// assume those use MUBUF instructions. Scratch loads / stores are currently
// implemented as mubuf instructions with offen bit set, so slightly
// different than the normal addr64.
- if (!isUInt<12>(AM.BaseOffs))
+ if (!SIInstrInfo::isLegalMUBUFImmOffset(AM.BaseOffs))
return false;
// FIXME: Since we can split immediate into soffset and immediate offset,
@@ -1355,37 +1369,77 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
}
bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
- unsigned Size, unsigned AddrSpace, unsigned Align,
+ unsigned Size, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags, bool *IsFast) const {
if (IsFast)
*IsFast = false;
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
- // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
- // aligned, 8 byte access in a single operation using ds_read2/write2_b32
- // with adjacent offsets.
- bool AlignedBy4 = (Align % 4 == 0);
+ // Check if alignment requirements for ds_read/write instructions are
+ // disabled.
+ if (Subtarget->hasUnalignedDSAccessEnabled() &&
+ !Subtarget->hasLDSMisalignedBug()) {
+ if (IsFast)
+ *IsFast = Alignment != Align(2);
+ return true;
+ }
+
+ if (Size == 64) {
+ // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
+ // aligned, 8 byte access in a single operation using ds_read2/write2_b32
+ // with adjacent offsets.
+ bool AlignedBy4 = Alignment >= Align(4);
+ if (IsFast)
+ *IsFast = AlignedBy4;
+
+ return AlignedBy4;
+ }
+ if (Size == 96) {
+ // ds_read/write_b96 require 16-byte alignment on gfx8 and older.
+ bool Aligned = Alignment >= Align(16);
+ if (IsFast)
+ *IsFast = Aligned;
+
+ return Aligned;
+ }
+ if (Size == 128) {
+ // ds_read/write_b128 require 16-byte alignment on gfx8 and older, but we
+ // can do a 8 byte aligned, 16 byte access in a single operation using
+ // ds_read2/write2_b64.
+ bool Aligned = Alignment >= Align(8);
+ if (IsFast)
+ *IsFast = Aligned;
+
+ return Aligned;
+ }
+ }
+
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
+ bool AlignedBy4 = Alignment >= Align(4);
if (IsFast)
*IsFast = AlignedBy4;
- return AlignedBy4;
+ return AlignedBy4 ||
+ Subtarget->enableFlatScratch() ||
+ Subtarget->hasUnalignedScratchAccess();
}
// FIXME: We have to be conservative here and assume that flat operations
// will access scratch. If we had access to the IR function, then we
// could determine if any private memory was used in the function.
- if (!Subtarget->hasUnalignedScratchAccess() &&
- (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
- AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
- bool AlignedBy4 = Align >= 4;
+ if (AddrSpace == AMDGPUAS::FLAT_ADDRESS &&
+ !Subtarget->hasUnalignedScratchAccess()) {
+ bool AlignedBy4 = Alignment >= Align(4);
if (IsFast)
*IsFast = AlignedBy4;
return AlignedBy4;
}
- if (Subtarget->hasUnalignedBufferAccess()) {
+ if (Subtarget->hasUnalignedBufferAccessEnabled() &&
+ !(AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
// If we have an uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
@@ -1393,7 +1447,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
// 2-byte alignment is worse than 1 unless doing a 2-byte accesss.
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
- Align >= 4 : Align != 2;
+ Alignment >= Align(4) : Alignment != Align(2);
}
return true;
@@ -1409,12 +1463,12 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
if (IsFast)
*IsFast = true;
- return Size >= 32 && Align >= 4;
+ return Size >= 32 && Alignment >= Align(4);
}
bool SITargetLowering::allowsMisalignedMemoryAccesses(
- EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
- bool *IsFast) const {
+ EVT VT, unsigned AddrSpace, unsigned Alignment,
+ MachineMemOperand::Flags Flags, bool *IsFast) const {
if (IsFast)
*IsFast = false;
@@ -1428,7 +1482,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
}
return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
- Align, Flags, IsFast);
+ Align(Alignment), Flags, IsFast);
}
EVT SITargetLowering::getOptimalMemOpType(
@@ -1449,11 +1503,6 @@ EVT SITargetLowering::getOptimalMemOpType(
return MVT::Other;
}
-bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
- unsigned DestAS) const {
- return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
-}
-
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
const Value *Ptr = MemNode->getMemOperand()->getValue();
@@ -1461,6 +1510,11 @@ bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
return I && I->getMetadata("amdgpu.noclobber");
}
+bool SITargetLowering::isNonGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS ||
+ AS == AMDGPUAS::PRIVATE_ADDRESS;
+}
+
bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
// Flat -> private/local is a simple truncate.
@@ -1468,7 +1522,9 @@ bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS,
if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
return true;
- return isNoopAddrSpaceCast(SrcAS, DestAS);
+ const GCNTargetMachine &TM =
+ static_cast<const GCNTargetMachine &>(getTargetMachine());
+ return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
}
bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
@@ -1537,7 +1593,7 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
- return DAG.getObjectPtrOffset(SL, BasePtr, Offset);
+ return DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Offset));
}
SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
@@ -1597,9 +1653,9 @@ SDValue SITargetLowering::lowerKernargMemParameter(
// TODO: If we passed in the base kernel offset we could have a better
// alignment than 4, but we don't really need it.
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
- SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, 4,
+ SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, Align(4),
MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
+ MachineMemOperand::MOInvariant);
SDValue ShiftAmt = DAG.getConstant(OffsetDiff * 8, SL, MVT::i32);
SDValue Extract = DAG.getNode(ISD::SRL, SL, MVT::i32, Load, ShiftAmt);
@@ -1682,12 +1738,11 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
}
-static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
- CallingConv::ID CallConv,
- ArrayRef<ISD::InputArg> Ins,
- BitVector &Skipped,
- FunctionType *FType,
- SIMachineFunctionInfo *Info) {
+static void processPSInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
+ CallingConv::ID CallConv,
+ ArrayRef<ISD::InputArg> Ins, BitVector &Skipped,
+ FunctionType *FType,
+ SIMachineFunctionInfo *Info) {
for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
const ISD::InputArg *Arg = &Ins[I];
@@ -1895,26 +1950,26 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const {
if (Info.hasImplicitBufferPtr()) {
- unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
+ Register ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
if (Info.hasPrivateSegmentBuffer()) {
- unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
+ Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
if (Info.hasDispatchPtr()) {
- unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
+ Register DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
if (Info.hasQueuePtr()) {
- unsigned QueuePtrReg = Info.addQueuePtr(TRI);
+ Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
@@ -1929,13 +1984,13 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
}
if (Info.hasDispatchID()) {
- unsigned DispatchIDReg = Info.addDispatchID(TRI);
+ Register DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
- if (Info.hasFlatScratchInit()) {
- unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
+ if (Info.hasFlatScratchInit() && !getSubtarget()->isAmdPalOS()) {
+ Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
}
@@ -1951,25 +2006,25 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
CallingConv::ID CallConv,
bool IsShader) const {
if (Info.hasWorkGroupIDX()) {
- unsigned Reg = Info.addWorkGroupIDX();
+ Register Reg = Info.addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupIDY()) {
- unsigned Reg = Info.addWorkGroupIDY();
+ Register Reg = Info.addWorkGroupIDY();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupIDZ()) {
- unsigned Reg = Info.addWorkGroupIDZ();
+ Register Reg = Info.addWorkGroupIDZ();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupInfo()) {
- unsigned Reg = Info.addWorkGroupInfo();
+ Register Reg = Info.addWorkGroupInfo();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
@@ -2020,26 +2075,28 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// the scratch registers to pass in.
bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
- if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.getFunction())) {
- // If we have stack objects, we unquestionably need the private buffer
- // resource. For the Code Object V2 ABI, this will be the first 4 user
- // SGPR inputs. We can reserve those and use them directly.
+ if (!ST.enableFlatScratch()) {
+ if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.getFunction())) {
+ // If we have stack objects, we unquestionably need the private buffer
+ // resource. For the Code Object V2 ABI, this will be the first 4 user
+ // SGPR inputs. We can reserve those and use them directly.
- Register PrivateSegmentBufferReg =
- Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
- Info.setScratchRSrcReg(PrivateSegmentBufferReg);
- } else {
- unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
- // We tentatively reserve the last registers (skipping the last registers
- // which may contain VCC, FLAT_SCR, and XNACK). After register allocation,
- // we'll replace these with the ones immediately after those which were
- // really allocated. In the prologue copies will be inserted from the
- // argument to these reserved registers.
+ Register PrivateSegmentBufferReg =
+ Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
+ Info.setScratchRSrcReg(PrivateSegmentBufferReg);
+ } else {
+ unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
+ // We tentatively reserve the last registers (skipping the last registers
+ // which may contain VCC, FLAT_SCR, and XNACK). After register allocation,
+ // we'll replace these with the ones immediately after those which were
+ // really allocated. In the prologue copies will be inserted from the
+ // argument to these reserved registers.
- // Without HSA, relocations are used for the scratch pointer and the
- // buffer resource setup is always inserted in the prologue. Scratch wave
- // offset is still in an input SGPR.
- Info.setScratchRSrcReg(ReservedBufferReg);
+ // Without HSA, relocations are used for the scratch pointer and the
+ // buffer resource setup is always inserted in the prologue. Scratch wave
+ // offset is still in an input SGPR.
+ Info.setScratchRSrcReg(ReservedBufferReg);
+ }
}
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -2139,7 +2196,7 @@ SDValue SITargetLowering::LowerFormalArguments(
FunctionType *FType = MF.getFunction().getFunctionType();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
+ if (Subtarget->isAmdHsaOS() && AMDGPU::isGraphics(CallConv)) {
DiagnosticInfoUnsupported NoGraphicsHSA(
Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
DAG.getContext()->diagnose(NoGraphicsHSA);
@@ -2152,12 +2209,21 @@ SDValue SITargetLowering::LowerFormalArguments(
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
- bool IsShader = AMDGPU::isShader(CallConv);
+ bool IsGraphics = AMDGPU::isGraphics(CallConv);
bool IsKernel = AMDGPU::isKernel(CallConv);
bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
- if (IsShader) {
- processShaderInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
+ if (IsGraphics) {
+ assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() &&
+ (!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) &&
+ !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
+ !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
+ !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
+ !Info->hasWorkItemIDZ());
+ }
+
+ if (CallConv == CallingConv::AMDGPU_PS) {
+ processPSInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
// At least one interpolation mode must be enabled or else the GPU will
// hang.
@@ -2172,39 +2238,28 @@ SDValue SITargetLowering::LowerFormalArguments(
// - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
// - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
// enabled too.
- if (CallConv == CallingConv::AMDGPU_PS) {
- if ((Info->getPSInputAddr() & 0x7F) == 0 ||
- ((Info->getPSInputAddr() & 0xF) == 0 &&
- Info->isPSInputAllocated(11))) {
- CCInfo.AllocateReg(AMDGPU::VGPR0);
- CCInfo.AllocateReg(AMDGPU::VGPR1);
- Info->markPSInputAllocated(0);
- Info->markPSInputEnabled(0);
- }
- if (Subtarget->isAmdPalOS()) {
- // For isAmdPalOS, the user does not enable some bits after compilation
- // based on run-time states; the register values being generated here are
- // the final ones set in hardware. Therefore we need to apply the
- // workaround to PSInputAddr and PSInputEnable together. (The case where
- // a bit is set in PSInputAddr but not PSInputEnable is where the
- // frontend set up an input arg for a particular interpolation mode, but
- // nothing uses that input arg. Really we should have an earlier pass
- // that removes such an arg.)
- unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
- if ((PsInputBits & 0x7F) == 0 ||
- ((PsInputBits & 0xF) == 0 &&
- (PsInputBits >> 11 & 1)))
- Info->markPSInputEnabled(
- countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
- }
+ if ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 && Info->isPSInputAllocated(11))) {
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->markPSInputEnabled(0);
+ }
+ if (Subtarget->isAmdPalOS()) {
+ // For isAmdPalOS, the user does not enable some bits after compilation
+ // based on run-time states; the register values being generated here are
+ // the final ones set in hardware. Therefore we need to apply the
+ // workaround to PSInputAddr and PSInputEnable together. (The case where
+ // a bit is set in PSInputAddr but not PSInputEnable is where the
+ // frontend set up an input arg for a particular interpolation mode, but
+ // nothing uses that input arg. Really we should have an earlier pass
+ // that removes such an arg.)
+ unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
+ if ((PsInputBits & 0x7F) == 0 ||
+ ((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1)))
+ Info->markPSInputEnabled(
+ countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
}
-
- assert(!Info->hasDispatchPtr() &&
- !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
- !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
- !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
- !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
- !Info->hasWorkItemIDZ());
} else if (IsKernel) {
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else {
@@ -2253,9 +2308,23 @@ SDValue SITargetLowering::LowerFormalArguments(
const uint64_t Offset = VA.getLocMemOffset();
Align Alignment = commonAlignment(KernelArgBaseAlign, Offset);
- SDValue Arg =
- lowerKernargMemParameter(DAG, VT, MemVT, DL, Chain, Offset, Alignment,
- Ins[i].Flags.isSExt(), &Ins[i]);
+ if (Arg.Flags.isByRef()) {
+ SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, Chain, Offset);
+
+ const GCNTargetMachine &TM =
+ static_cast<const GCNTargetMachine &>(getTargetMachine());
+ if (!TM.isNoopAddrSpaceCast(AMDGPUAS::CONSTANT_ADDRESS,
+ Arg.Flags.getPointerAddrSpace())) {
+ Ptr = DAG.getAddrSpaceCast(DL, VT, Ptr, AMDGPUAS::CONSTANT_ADDRESS,
+ Arg.Flags.getPointerAddrSpace());
+ }
+
+ InVals.push_back(Ptr);
+ continue;
+ }
+
+ SDValue Arg = lowerKernargMemParameter(
+ DAG, VT, MemVT, DL, Chain, Offset, Alignment, Ins[i].Flags.isSExt(), &Ins[i]);
Chains.push_back(Arg.getValue(1));
auto *ParamTy =
@@ -2337,7 +2406,7 @@ SDValue SITargetLowering::LowerFormalArguments(
// Start adding system SGPRs.
if (IsEntryFunc) {
- allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader);
+ allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
} else {
CCInfo.AllocateReg(Info->getScratchRSrcReg());
allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
@@ -2820,7 +2889,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
report_fatal_error("unsupported libcall legalization");
if (!AMDGPUTargetMachine::EnableFixedFunctionABI &&
- !CLI.CB->getCalledFunction()) {
+ !CLI.CB->getCalledFunction() && CallConv != CallingConv::AMDGPU_Gfx) {
return lowerUnhandledCall(CLI, InVals,
"unsupported indirect call to function ");
}
@@ -2830,11 +2899,19 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
"unsupported required tail call to function ");
}
- if (AMDGPU::isShader(MF.getFunction().getCallingConv())) {
- // Note the issue is with the CC of the calling function, not of the call
+ if (AMDGPU::isShader(CallConv)) {
+ // Note the issue is with the CC of the called function, not of the call
// itself.
return lowerUnhandledCall(CLI, InVals,
- "unsupported call from graphics shader of function ");
+ "unsupported call to a shader function ");
+ }
+
+ if (AMDGPU::isShader(MF.getFunction().getCallingConv()) &&
+ CallConv != CallingConv::AMDGPU_Gfx) {
+ // Only allow calls with specific calling conventions.
+ return lowerUnhandledCall(CLI, InVals,
+ "unsupported calling convention for call from "
+ "graphics shader of function ");
}
if (IsTailCall) {
@@ -2865,7 +2942,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
- if (AMDGPUTargetMachine::EnableFixedFunctionABI) {
+ if (AMDGPUTargetMachine::EnableFixedFunctionABI &&
+ CallConv != CallingConv::AMDGPU_Gfx) {
// With a fixed ABI, allocate fixed registers before user arguments.
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
}
@@ -2894,14 +2972,16 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
if (!IsSibCall) {
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
- SmallVector<SDValue, 4> CopyFromChains;
+ if (!Subtarget->enableFlatScratch()) {
+ SmallVector<SDValue, 4> CopyFromChains;
- // In the HSA case, this should be an identity copy.
- SDValue ScratchRSrcReg
- = DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
- RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
- CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
- Chain = DAG.getTokenFactor(DL, CopyFromChains);
+ // In the HSA case, this should be an identity copy.
+ SDValue ScratchRSrcReg
+ = DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
+ RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
+ CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
+ Chain = DAG.getTokenFactor(DL, CopyFromChains);
+ }
}
MVT PtrVT = MVT::i32;
@@ -2992,14 +3072,15 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
MemOpChains.push_back(Cpy);
} else {
- SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo,
- Alignment ? Alignment->value() : 0);
+ SDValue Store =
+ DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Alignment);
MemOpChains.push_back(Store);
}
}
}
- if (!AMDGPUTargetMachine::EnableFixedFunctionABI) {
+ if (!AMDGPUTargetMachine::EnableFixedFunctionABI &&
+ CallConv != CallingConv::AMDGPU_Gfx) {
// Copy special input registers after user input arguments.
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
}
@@ -3223,29 +3304,11 @@ Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT,
// If kill is not the last instruction, split the block so kill is always a
// proper terminator.
-MachineBasicBlock *SITargetLowering::splitKillBlock(MachineInstr &MI,
- MachineBasicBlock *BB) const {
+MachineBasicBlock *
+SITargetLowering::splitKillBlock(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ MachineBasicBlock *SplitBB = BB->splitAt(MI, false /*UpdateLiveIns*/);
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
-
- MachineBasicBlock::iterator SplitPoint(&MI);
- ++SplitPoint;
-
- if (SplitPoint == BB->end()) {
- // Don't bother with a new block.
- MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode()));
- return BB;
- }
-
- MachineFunction *MF = BB->getParent();
- MachineBasicBlock *SplitBB
- = MF->CreateMachineBasicBlock(BB->getBasicBlock());
-
- MF->insert(++MachineFunction::iterator(BB), SplitBB);
- SplitBB->splice(SplitBB->begin(), BB, SplitPoint, BB->end());
-
- SplitBB->transferSuccessorsAndUpdatePHIs(BB);
- BB->addSuccessor(SplitBB);
-
MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode()));
return SplitBB;
}
@@ -3357,20 +3420,14 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
// will only do one iteration. In the worst case, this will loop 64 times.
//
// TODO: Just use v_readlane_b32 if we know the VGPR has a uniform value.
-static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
- const SIInstrInfo *TII,
- MachineRegisterInfo &MRI,
- MachineBasicBlock &OrigBB,
- MachineBasicBlock &LoopBB,
- const DebugLoc &DL,
- const MachineOperand &IdxReg,
- unsigned InitReg,
- unsigned ResultReg,
- unsigned PhiReg,
- unsigned InitSaveExecReg,
- int Offset,
- bool UseGPRIdxMode,
- bool IsIndirectSrc) {
+static MachineBasicBlock::iterator
+emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI,
+ MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
+ const DebugLoc &DL, const MachineOperand &Idx,
+ unsigned InitReg, unsigned ResultReg, unsigned PhiReg,
+ unsigned InitSaveExecReg, int Offset, bool UseGPRIdxMode,
+ Register &SGPRIdxReg) {
+
MachineFunction *MF = OrigBB.getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -3396,12 +3453,12 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
// Read the next variant <- also loop target.
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentIdxReg)
- .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
+ .addReg(Idx.getReg(), getUndefRegState(Idx.isUndef()));
// Compare the just read M0 value to all possible Idx values.
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e64), CondReg)
- .addReg(CurrentIdxReg)
- .addReg(IdxReg.getReg(), 0, IdxReg.getSubReg());
+ .addReg(CurrentIdxReg)
+ .addReg(Idx.getReg(), 0, Idx.getSubReg());
// Update EXEC, save the original EXEC value to VCC.
BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32
@@ -3412,22 +3469,14 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
MRI.setSimpleHint(NewExec, CondReg);
if (UseGPRIdxMode) {
- unsigned IdxReg;
if (Offset == 0) {
- IdxReg = CurrentIdxReg;
+ SGPRIdxReg = CurrentIdxReg;
} else {
- IdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), IdxReg)
- .addReg(CurrentIdxReg, RegState::Kill)
- .addImm(Offset);
+ SGPRIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SGPRIdxReg)
+ .addReg(CurrentIdxReg, RegState::Kill)
+ .addImm(Offset);
}
- unsigned IdxMode = IsIndirectSrc ?
- AMDGPU::VGPRIndexMode::SRC0_ENABLE : AMDGPU::VGPRIndexMode::DST_ENABLE;
- MachineInstr *SetOn =
- BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
- .addReg(IdxReg, RegState::Kill)
- .addImm(IdxMode);
- SetOn->getOperand(3).setIsUndef();
} else {
// Move index from VCC into M0
if (Offset == 0) {
@@ -3463,14 +3512,10 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
// per-workitem, so is kept alive for the whole loop so we end up not re-using a
// subregister from it, using 1 more VGPR than necessary. This was saved when
// this was expanded after register allocation.
-static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII,
- MachineBasicBlock &MBB,
- MachineInstr &MI,
- unsigned InitResultReg,
- unsigned PhiReg,
- int Offset,
- bool UseGPRIdxMode,
- bool IsIndirectSrc) {
+static MachineBasicBlock::iterator
+loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI,
+ unsigned InitResultReg, unsigned PhiReg, int Offset,
+ bool UseGPRIdxMode, Register &SGPRIdxReg) {
MachineFunction *MF = MBB.getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -3499,7 +3544,8 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII,
auto InsPt = emitLoadM0FromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, *Idx,
InitResultReg, DstReg, PhiReg, TmpExec,
- Offset, UseGPRIdxMode, IsIndirectSrc);
+ Offset, UseGPRIdxMode, SGPRIdxReg);
+
MachineBasicBlock* LandingPad = MF->CreateMachineBasicBlock();
MachineFunction::iterator MBBI(LoopBB);
++MBBI;
@@ -3530,64 +3576,45 @@ computeIndirectRegAndOffset(const SIRegisterInfo &TRI,
return std::make_pair(SIRegisterInfo::getSubRegFromChannel(Offset), 0);
}
-// Return true if the index is an SGPR and was set.
-static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
- MachineRegisterInfo &MRI,
- MachineInstr &MI,
- int Offset,
- bool UseGPRIdxMode,
- bool IsIndirectSrc) {
+static void setM0ToIndexFromSGPR(const SIInstrInfo *TII,
+ MachineRegisterInfo &MRI, MachineInstr &MI,
+ int Offset) {
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
- const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg());
assert(Idx->getReg() != AMDGPU::NoRegister);
- if (!TII->getRegisterInfo().isSGPRClass(IdxRC))
- return false;
-
- if (UseGPRIdxMode) {
- unsigned IdxMode = IsIndirectSrc ?
- AMDGPU::VGPRIndexMode::SRC0_ENABLE : AMDGPU::VGPRIndexMode::DST_ENABLE;
- if (Offset == 0) {
- MachineInstr *SetOn =
- BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
- .add(*Idx)
- .addImm(IdxMode);
+ if (Offset == 0) {
+ BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0).add(*Idx);
+ } else {
+ BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+ .add(*Idx)
+ .addImm(Offset);
+ }
+}
- SetOn->getOperand(3).setIsUndef();
- } else {
- Register Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp)
- .add(*Idx)
- .addImm(Offset);
- MachineInstr *SetOn =
- BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
- .addReg(Tmp, RegState::Kill)
- .addImm(IdxMode);
+static Register getIndirectSGPRIdx(const SIInstrInfo *TII,
+ MachineRegisterInfo &MRI, MachineInstr &MI,
+ int Offset) {
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator I(&MI);
- SetOn->getOperand(3).setIsUndef();
- }
+ const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
- return true;
- }
+ if (Offset == 0)
+ return Idx->getReg();
- if (Offset == 0) {
- BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .add(*Idx);
- } else {
- BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+ Register Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp)
.add(*Idx)
.addImm(Offset);
- }
-
- return true;
+ return Tmp;
}
-// Control flow needs to be inserted if indexing with a VGPR.
static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
MachineBasicBlock &MBB,
const GCNSubtarget &ST) {
@@ -3597,10 +3624,12 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
Register Dst = MI.getOperand(0).getReg();
+ const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
Register SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
const TargetRegisterClass *VecRC = MRI.getRegClass(SrcReg);
+ const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg());
unsigned SubReg;
std::tie(SubReg, Offset)
@@ -3608,7 +3637,8 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
const bool UseGPRIdxMode = ST.useVGPRIndexMode();
- if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) {
+ // Check for a SGPR index.
+ if (TII->getRegisterInfo().isSGPRClass(IdxRC)) {
MachineBasicBlock::iterator I(&MI);
const DebugLoc &DL = MI.getDebugLoc();
@@ -3616,14 +3646,19 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
// TODO: Look at the uses to avoid the copy. This may require rescheduling
// to avoid interfering with other uses, so probably requires a new
// optimization pass.
- BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
- .addReg(SrcReg, RegState::Undef, SubReg)
- .addReg(SrcReg, RegState::Implicit)
- .addReg(AMDGPU::M0, RegState::Implicit);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
+ Register Idx = getIndirectSGPRIdx(TII, MRI, MI, Offset);
+
+ const MCInstrDesc &GPRIDXDesc =
+ TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), true);
+ BuildMI(MBB, I, DL, GPRIDXDesc, Dst)
+ .addReg(SrcReg)
+ .addReg(Idx)
+ .addImm(SubReg);
} else {
+ setM0ToIndexFromSGPR(TII, MRI, MI, Offset);
+
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(SrcReg, RegState::Undef, SubReg)
+ .addReg(SrcReg, 0, SubReg)
.addReg(SrcReg, RegState::Implicit);
}
@@ -3632,6 +3667,7 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
return &MBB;
}
+ // Control flow needs to be inserted if indexing with a VGPR.
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
@@ -3640,19 +3676,23 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg);
- auto InsPt = loadM0FromVGPR(TII, MBB, MI, InitReg, PhiReg,
- Offset, UseGPRIdxMode, true);
+ Register SGPRIdxReg;
+ auto InsPt = loadM0FromVGPR(TII, MBB, MI, InitReg, PhiReg, Offset,
+ UseGPRIdxMode, SGPRIdxReg);
+
MachineBasicBlock *LoopBB = InsPt->getParent();
if (UseGPRIdxMode) {
- BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
- .addReg(SrcReg, RegState::Undef, SubReg)
- .addReg(SrcReg, RegState::Implicit)
- .addReg(AMDGPU::M0, RegState::Implicit);
- BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
+ const MCInstrDesc &GPRIDXDesc =
+ TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), true);
+
+ BuildMI(*LoopBB, InsPt, DL, GPRIDXDesc, Dst)
+ .addReg(SrcReg)
+ .addReg(SGPRIdxReg)
+ .addImm(SubReg);
} else {
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(SrcReg, RegState::Undef, SubReg)
+ .addReg(SrcReg, 0, SubReg)
.addReg(SrcReg, RegState::Implicit);
}
@@ -3675,6 +3715,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
const TargetRegisterClass *VecRC = MRI.getRegClass(SrcVec->getReg());
+ const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg());
// This can be an immediate, but will be folded later.
assert(Val->getReg());
@@ -3700,23 +3741,36 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
return &MBB;
}
- const MCInstrDesc &MovRelDesc
- = TII->getIndirectRegWritePseudo(TRI.getRegSizeInBits(*VecRC), 32, false);
-
- if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, false)) {
+ // Check for a SGPR index.
+ if (TII->getRegisterInfo().isSGPRClass(IdxRC)) {
MachineBasicBlock::iterator I(&MI);
const DebugLoc &DL = MI.getDebugLoc();
- BuildMI(MBB, I, DL, MovRelDesc, Dst)
- .addReg(SrcVec->getReg())
- .add(*Val)
- .addImm(SubReg);
- if (UseGPRIdxMode)
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
+ if (UseGPRIdxMode) {
+ Register Idx = getIndirectSGPRIdx(TII, MRI, MI, Offset);
+
+ const MCInstrDesc &GPRIDXDesc =
+ TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false);
+ BuildMI(MBB, I, DL, GPRIDXDesc, Dst)
+ .addReg(SrcVec->getReg())
+ .add(*Val)
+ .addReg(Idx)
+ .addImm(SubReg);
+ } else {
+ setM0ToIndexFromSGPR(TII, MRI, MI, Offset);
+
+ const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo(
+ TRI.getRegSizeInBits(*VecRC), 32, false);
+ BuildMI(MBB, I, DL, MovRelDesc, Dst)
+ .addReg(SrcVec->getReg())
+ .add(*Val)
+ .addImm(SubReg);
+ }
MI.eraseFromParent();
return &MBB;
}
+ // Control flow needs to be inserted if indexing with a VGPR.
if (Val->isReg())
MRI.clearKillFlags(Val->getReg());
@@ -3724,16 +3778,28 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
Register PhiReg = MRI.createVirtualRegister(VecRC);
- auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg,
- Offset, UseGPRIdxMode, false);
+ Register SGPRIdxReg;
+ auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, Offset,
+ UseGPRIdxMode, SGPRIdxReg);
MachineBasicBlock *LoopBB = InsPt->getParent();
- BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst)
- .addReg(PhiReg)
- .add(*Val)
- .addImm(AMDGPU::sub0);
- if (UseGPRIdxMode)
- BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
+ if (UseGPRIdxMode) {
+ const MCInstrDesc &GPRIDXDesc =
+ TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false);
+
+ BuildMI(*LoopBB, InsPt, DL, GPRIDXDesc, Dst)
+ .addReg(PhiReg)
+ .add(*Val)
+ .addReg(SGPRIdxReg)
+ .addImm(AMDGPU::sub0);
+ } else {
+ const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo(
+ TRI.getRegSizeInBits(*VecRC), 32, false);
+ BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst)
+ .addReg(PhiReg)
+ .add(*Val)
+ .addImm(AMDGPU::sub0);
+ }
MI.eraseFromParent();
return LoopBB;
@@ -3849,7 +3915,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
- unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
MachineInstr *LoHalf = BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0)
.addReg(CarryReg, RegState::Define)
.add(SrcReg0Sub0)
@@ -3912,10 +3978,29 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
Src2.setReg(RegOp2);
}
- if (TRI->getRegSizeInBits(*MRI.getRegClass(Src2.getReg())) == 64) {
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
- .addReg(Src2.getReg())
- .addImm(0);
+ const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
+ if (TRI->getRegSizeInBits(*Src2RC) == 64) {
+ if (ST.hasScalarCompareEq64()) {
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
+ .addReg(Src2.getReg())
+ .addImm(0);
+ } else {
+ const TargetRegisterClass *SubRC =
+ TRI->getSubRegClass(Src2RC, AMDGPU::sub0);
+ MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
+ MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
+ MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
+ MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
+ Register Src2_32 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_OR_B32), Src2_32)
+ .add(Src2Sub0)
+ .add(Src2Sub1);
+
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
+ .addReg(Src2_32, RegState::Kill)
+ .addImm(0);
+ }
} else {
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMPK_LG_U32))
.addReg(Src2.getReg())
@@ -3936,77 +4021,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MI.eraseFromParent();
return BB;
}
- case AMDGPU::SI_INIT_EXEC:
- // This should be before all vector instructions.
- BuildMI(*BB, &*BB->begin(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
- AMDGPU::EXEC)
- .addImm(MI.getOperand(0).getImm());
- MI.eraseFromParent();
- return BB;
-
- case AMDGPU::SI_INIT_EXEC_LO:
- // This should be before all vector instructions.
- BuildMI(*BB, &*BB->begin(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
- AMDGPU::EXEC_LO)
- .addImm(MI.getOperand(0).getImm());
- MI.eraseFromParent();
- return BB;
-
- case AMDGPU::SI_INIT_EXEC_FROM_INPUT: {
- // Extract the thread count from an SGPR input and set EXEC accordingly.
- // Since BFM can't shift by 64, handle that case with CMP + CMOV.
- //
- // S_BFE_U32 count, input, {shift, 7}
- // S_BFM_B64 exec, count, 0
- // S_CMP_EQ_U32 count, 64
- // S_CMOV_B64 exec, -1
- MachineInstr *FirstMI = &*BB->begin();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- Register InputReg = MI.getOperand(0).getReg();
- Register CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- bool Found = false;
-
- // Move the COPY of the input reg to the beginning, so that we can use it.
- for (auto I = BB->begin(); I != &MI; I++) {
- if (I->getOpcode() != TargetOpcode::COPY ||
- I->getOperand(0).getReg() != InputReg)
- continue;
-
- if (I == FirstMI) {
- FirstMI = &*++BB->begin();
- } else {
- I->removeFromParent();
- BB->insert(FirstMI, &*I);
- }
- Found = true;
- break;
- }
- assert(Found);
- (void)Found;
-
- // This should be before all vector instructions.
- unsigned Mask = (getSubtarget()->getWavefrontSize() << 1) - 1;
- bool isWave32 = getSubtarget()->isWave32();
- unsigned Exec = isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFE_U32), CountReg)
- .addReg(InputReg)
- .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
- BuildMI(*BB, FirstMI, DebugLoc(),
- TII->get(isWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64),
- Exec)
- .addReg(CountReg)
- .addImm(0);
- BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMP_EQ_U32))
- .addReg(CountReg, RegState::Kill)
- .addImm(getSubtarget()->getWavefrontSize());
- BuildMI(*BB, FirstMI, DebugLoc(),
- TII->get(isWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
- Exec)
- .addImm(-1);
- MI.eraseFromParent();
- return BB;
- }
-
case AMDGPU::GET_GROUPSTATICSIZE: {
assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);
@@ -4086,13 +4100,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::ADJCALLSTACKDOWN: {
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
MachineInstrBuilder MIB(*MF, &MI);
-
- // Add an implicit use of the frame offset reg to prevent the restore copy
- // inserted after the call from being reorderd after stack operations in the
- // the caller's frame.
MIB.addReg(Info->getStackPtrOffsetReg(), RegState::ImplicitDefine)
- .addReg(Info->getStackPtrOffsetReg(), RegState::Implicit)
- .addReg(Info->getFrameOffsetReg(), RegState::Implicit);
+ .addReg(Info->getStackPtrOffsetReg(), RegState::Implicit);
return BB;
}
case AMDGPU::SI_CALL_ISEL: {
@@ -4111,9 +4120,9 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MI.eraseFromParent();
return BB;
}
- case AMDGPU::V_ADD_I32_e32:
- case AMDGPU::V_SUB_I32_e32:
- case AMDGPU::V_SUBREV_I32_e32: {
+ case AMDGPU::V_ADD_CO_U32_e32:
+ case AMDGPU::V_SUB_CO_U32_e32:
+ case AMDGPU::V_SUBREV_CO_U32_e32: {
// TODO: Define distinct V_*_I32_Pseudo instructions instead.
const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc = MI.getOpcode();
@@ -4154,9 +4163,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
return emitGWSMemViolTestLoop(MI, BB);
case AMDGPU::S_SETREG_B32: {
- if (!getSubtarget()->hasDenormModeInst())
- return BB;
-
// Try to optimize cases that only set the denormal mode or rounding mode.
//
// If the s_setreg_b32 fully sets all of the bits in the rounding mode or
@@ -4166,9 +4172,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
// FIXME: This could be predicates on the immediate, but tablegen doesn't
// allow you to have a no side effect instruction in the output of a
// sideeffecting pattern.
-
- // TODO: Should also emit a no side effects pseudo if only FP bits are
- // touched, even if not all of them or to a variable.
unsigned ID, Offset, Width;
AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width);
if (ID != AMDGPU::Hwreg::ID_MODE)
@@ -4176,50 +4179,54 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
const unsigned WidthMask = maskTrailingOnes<unsigned>(Width);
const unsigned SetMask = WidthMask << Offset;
- unsigned SetDenormOp = 0;
- unsigned SetRoundOp = 0;
-
- // The dedicated instructions can only set the whole denorm or round mode at
- // once, not a subset of bits in either.
- if (Width == 8 && (SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK |
- AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask) {
- // If this fully sets both the round and denorm mode, emit the two
- // dedicated instructions for these.
- assert(Offset == 0);
- SetRoundOp = AMDGPU::S_ROUND_MODE;
- SetDenormOp = AMDGPU::S_DENORM_MODE;
- } else if (Width == 4) {
- if ((SetMask & AMDGPU::Hwreg::FP_ROUND_MASK) == SetMask) {
+
+ if (getSubtarget()->hasDenormModeInst()) {
+ unsigned SetDenormOp = 0;
+ unsigned SetRoundOp = 0;
+
+ // The dedicated instructions can only set the whole denorm or round mode
+ // at once, not a subset of bits in either.
+ if (SetMask ==
+ (AMDGPU::Hwreg::FP_ROUND_MASK | AMDGPU::Hwreg::FP_DENORM_MASK)) {
+ // If this fully sets both the round and denorm mode, emit the two
+ // dedicated instructions for these.
SetRoundOp = AMDGPU::S_ROUND_MODE;
- assert(Offset == 0);
- } else if ((SetMask & AMDGPU::Hwreg::FP_DENORM_MASK) == SetMask) {
SetDenormOp = AMDGPU::S_DENORM_MODE;
- assert(Offset == 4);
+ } else if (SetMask == AMDGPU::Hwreg::FP_ROUND_MASK) {
+ SetRoundOp = AMDGPU::S_ROUND_MODE;
+ } else if (SetMask == AMDGPU::Hwreg::FP_DENORM_MASK) {
+ SetDenormOp = AMDGPU::S_DENORM_MODE;
}
- }
- if (SetRoundOp || SetDenormOp) {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg());
- if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
- unsigned ImmVal = Def->getOperand(1).getImm();
- if (SetRoundOp) {
- BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp))
- .addImm(ImmVal & 0xf);
+ if (SetRoundOp || SetDenormOp) {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg());
+ if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
+ unsigned ImmVal = Def->getOperand(1).getImm();
+ if (SetRoundOp) {
+ BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp))
+ .addImm(ImmVal & 0xf);
+
+ // If we also have the denorm mode, get just the denorm mode bits.
+ ImmVal >>= 4;
+ }
- // If we also have the denorm mode, get just the denorm mode bits.
- ImmVal >>= 4;
- }
+ if (SetDenormOp) {
+ BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp))
+ .addImm(ImmVal & 0xf);
+ }
- if (SetDenormOp) {
- BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp))
- .addImm(ImmVal & 0xf);
+ MI.eraseFromParent();
+ return BB;
}
-
- MI.eraseFromParent();
}
}
+ // If only FP bits are touched, used the no side effects pseudo.
+ if ((SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK |
+ AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask)
+ MI.setDesc(TII->get(AMDGPU::S_SETREG_B32_mode));
+
return BB;
}
default:
@@ -4256,6 +4263,12 @@ MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT VT) const {
return (VT == MVT::i16) ? MVT::i16 : MVT::i32;
}
+LLT SITargetLowering::getPreferredShiftAmountTy(LLT Ty) const {
+ return (Ty.getScalarSizeInBits() <= 16 && Subtarget->has16BitInsts())
+ ? Ty.changeElementSize(16)
+ : Ty.changeElementSize(32);
+}
+
// Answering this is somewhat tricky and depends on the specific device which
// have different rates for fma or all f64 operations.
//
@@ -4457,6 +4470,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FMUL:
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE:
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
return splitBinaryVectorOp(Op, DAG);
case ISD::SMULO:
case ISD::UMULO:
@@ -4467,31 +4484,47 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+// Used for D16: Casts the result of an instruction into the right vector,
+// packs values if loads return unpacked values.
static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT,
const SDLoc &DL,
SelectionDAG &DAG, bool Unpacked) {
if (!LoadVT.isVector())
return Result;
+ // Cast back to the original packed type or to a larger type that is a
+ // multiple of 32 bit for D16. Widening the return type is a required for
+ // legalization.
+ EVT FittingLoadVT = LoadVT;
+ if ((LoadVT.getVectorNumElements() % 2) == 1) {
+ FittingLoadVT =
+ EVT::getVectorVT(*DAG.getContext(), LoadVT.getVectorElementType(),
+ LoadVT.getVectorNumElements() + 1);
+ }
+
if (Unpacked) { // From v2i32/v4i32 back to v2f16/v4f16.
// Truncate to v2i16/v4i16.
- EVT IntLoadVT = LoadVT.changeTypeToInteger();
+ EVT IntLoadVT = FittingLoadVT.changeTypeToInteger();
// Workaround legalizer not scalarizing truncate after vector op
- // legalization byt not creating intermediate vector trunc.
+ // legalization but not creating intermediate vector trunc.
SmallVector<SDValue, 4> Elts;
DAG.ExtractVectorElements(Result, Elts);
for (SDValue &Elt : Elts)
Elt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Elt);
+ // Pad illegal v1i16/v3fi6 to v4i16
+ if ((LoadVT.getVectorNumElements() % 2) == 1)
+ Elts.push_back(DAG.getUNDEF(MVT::i16));
+
Result = DAG.getBuildVector(IntLoadVT, DL, Elts);
// Bitcast to original type (v2f16/v4f16).
- return DAG.getNode(ISD::BITCAST, DL, LoadVT, Result);
+ return DAG.getNode(ISD::BITCAST, DL, FittingLoadVT, Result);
}
// Cast back to the original packed type.
- return DAG.getNode(ISD::BITCAST, DL, LoadVT, Result);
+ return DAG.getNode(ISD::BITCAST, DL, FittingLoadVT, Result);
}
SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
@@ -4505,10 +4538,16 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
EVT LoadVT = M->getValueType(0);
EVT EquivLoadVT = LoadVT;
- if (Unpacked && LoadVT.isVector()) {
- EquivLoadVT = LoadVT.isVector() ?
- EVT::getVectorVT(*DAG.getContext(), MVT::i32,
- LoadVT.getVectorNumElements()) : LoadVT;
+ if (LoadVT.isVector()) {
+ if (Unpacked) {
+ EquivLoadVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ LoadVT.getVectorNumElements());
+ } else if ((LoadVT.getVectorNumElements() % 2) == 1) {
+ // Widen v3f16 to legal type
+ EquivLoadVT =
+ EVT::getVectorVT(*DAG.getContext(), LoadVT.getVectorElementType(),
+ LoadVT.getVectorNumElements() + 1);
+ }
}
// Change from v4f16/v2f16 to EquivLoadVT.
@@ -4519,8 +4558,6 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
IsIntrinsic ? (unsigned)ISD::INTRINSIC_W_CHAIN : Opcode, DL,
VTList, Ops, M->getMemoryVT(),
M->getMemOperand());
- if (!Unpacked) // Just adjusted the opcode.
- return Load;
SDValue Adjusted = adjustLoadValueTypeImpl(Load, LoadVT, DL, DAG, Unpacked);
@@ -4724,8 +4761,9 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
if (SDValue Res = LowerINTRINSIC_W_CHAIN(SDValue(N, 0), DAG)) {
if (Res.getOpcode() == ISD::MERGE_VALUES) {
// FIXME: Hacky
- Results.push_back(Res.getOperand(0));
- Results.push_back(Res.getOperand(1));
+ for (unsigned I = 0; I < Res.getNumOperands(); I++) {
+ Results.push_back(Res.getOperand(I));
+ }
} else {
Results.push_back(Res);
Results.push_back(Res.getValue(1));
@@ -4967,7 +5005,7 @@ SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
// Get the return address reg and mark it as an implicit live-in
- unsigned Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT, Op.getNode()->isDivergent()));
+ Register Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT, Op.getNode()->isDivergent()));
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
}
@@ -5063,7 +5101,7 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- unsigned UserSGPR = Info->getQueuePtrUserSGPR();
+ Register UserSGPR = Info->getQueuePtrUserSGPR();
assert(UserSGPR != AMDGPU::NoRegister);
SDValue QueuePtr = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
@@ -5136,14 +5174,15 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
// private_segment_aperture_base_hi.
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
- SDValue Ptr = DAG.getObjectPtrOffset(DL, QueuePtr, StructOffset);
+ SDValue Ptr =
+ DAG.getObjectPtrOffset(DL, QueuePtr, TypeSize::Fixed(StructOffset));
// TODO: Use custom target PseudoSourceValue.
// TODO: We should use the value from the IR intrinsic call, but it might not
// be available and how do we get it?
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
- MinAlign(64, StructOffset),
+ commonAlignment(Align(64), StructOffset),
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
}
@@ -5504,7 +5543,9 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
// variable, but since the encoding of $symbol starts 4 bytes after the start
// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
// small. This requires us to add 4 to the global variable offset in order to
- // compute the correct address.
+ // compute the correct address. Similarly for the s_addc_u32 instruction, the
+ // encoding of $symbol starts 12 bytes after the start of the s_add_u32
+ // instruction.
SDValue PtrLo =
DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags);
SDValue PtrHi;
@@ -5512,7 +5553,7 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
PtrHi = DAG.getTargetConstant(0, DL, MVT::i32);
} else {
PtrHi =
- DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags + 1);
+ DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 12, GAFlags + 1);
}
return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi);
}
@@ -5521,15 +5562,32 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
+ SDLoc DL(GSD);
+ EVT PtrVT = Op.getValueType();
+
const GlobalValue *GV = GSD->getGlobal();
if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
shouldUseLDSConstAddress(GV)) ||
GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
- GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
+ GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ GV->hasExternalLinkage()) {
+ Type *Ty = GV->getValueType();
+ // HIP uses an unsized array `extern __shared__ T s[]` or similar
+ // zero-sized type in other languages to declare the dynamic shared
+ // memory which size is not known at the compile time. They will be
+ // allocated by the runtime and placed directly after the static
+ // allocated ones. They all share the same offset.
+ if (DAG.getDataLayout().getTypeAllocSize(Ty).isZero()) {
+ assert(PtrVT == MVT::i32 && "32-bit pointer is expected.");
+ // Adjust alignment for that dynamic shared memory array.
+ MFI->setDynLDSAlign(DAG.getDataLayout(), *cast<GlobalVariable>(GV));
+ return SDValue(
+ DAG.getMachineNode(AMDGPU::GET_GROUPSTATICSIZE, DL, PtrVT), 0);
+ }
+ }
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
-
- SDLoc DL(GSD);
- EVT PtrVT = Op.getValueType();
+ }
if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(),
@@ -5713,7 +5771,7 @@ static SDValue constructRetValue(SelectionDAG &DAG,
SDValue Data(Result, 0);
SDValue TexFail;
- if (IsTexFail) {
+ if (DMaskPop > 0 && Data.getValueType() != MaskPopVT) {
SDValue ZeroIdx = DAG.getConstant(0, DL, MVT::i32);
if (MaskPopVT.isVector()) {
Data = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskPopVT,
@@ -5722,10 +5780,6 @@ static SDValue constructRetValue(SelectionDAG &DAG,
Data = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskPopVT,
SDValue(Result, 0), ZeroIdx);
}
-
- TexFail = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
- SDValue(Result, 0),
- DAG.getConstant(MaskPopDwords, DL, MVT::i32));
}
if (DataDwordVT.isVector())
@@ -5735,13 +5789,27 @@ static SDValue constructRetValue(SelectionDAG &DAG,
if (IsD16)
Data = adjustLoadValueTypeImpl(Data, ReqRetVT, DL, DAG, Unpacked);
- if (!ReqRetVT.isVector())
+ EVT LegalReqRetVT = ReqRetVT;
+ if (!ReqRetVT.isVector()) {
Data = DAG.getNode(ISD::TRUNCATE, DL, ReqRetVT.changeTypeToInteger(), Data);
+ } else {
+ // We need to widen the return vector to a legal type
+ if ((ReqRetVT.getVectorNumElements() % 2) == 1 &&
+ ReqRetVT.getVectorElementType().getSizeInBits() == 16) {
+ LegalReqRetVT =
+ EVT::getVectorVT(*DAG.getContext(), ReqRetVT.getVectorElementType(),
+ ReqRetVT.getVectorNumElements() + 1);
+ }
+ }
+ Data = DAG.getNode(ISD::BITCAST, DL, LegalReqRetVT, Data);
- Data = DAG.getNode(ISD::BITCAST, DL, ReqRetVT, Data);
+ if (IsTexFail) {
+ TexFail =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, SDValue(Result, 0),
+ DAG.getConstant(MaskPopDwords, DL, MVT::i32));
- if (TexFail)
return DAG.getMergeValues({Data, TexFail, SDValue(Result, 1)}, DL);
+ }
if (Result->getNumValues() == 1)
return Data;
@@ -5798,7 +5866,7 @@ static void packImageA16AddressToDwords(SelectionDAG &DAG, SDValue Op,
SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::ImageDimIntrinsicInfo *Intr,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG, bool WithChain) const {
SDLoc DL(Op);
MachineFunction &MF = DAG.getMachineFunction();
const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
@@ -5810,10 +5878,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;
- bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
+ bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
- SmallVector<EVT, 3> ResultTypes(Op->value_begin(), Op->value_end());
- SmallVector<EVT, 3> OrigResultTypes(Op->value_begin(), Op->value_end());
+ SmallVector<EVT, 3> ResultTypes(Op->values());
+ SmallVector<EVT, 3> OrigResultTypes(Op->values());
bool IsD16 = false;
bool IsG16 = false;
bool IsA16 = false;
@@ -5821,7 +5889,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
int NumVDataDwords;
bool AdjustRetType = false;
- unsigned AddrIdx; // Index of first address argument
+ // Offset of intrinsic arguments
+ const unsigned ArgOffset = WithChain ? 2 : 1;
+
unsigned DMask;
unsigned DMaskLanes = 0;
@@ -5839,15 +5909,13 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
DMask = Is64Bit ? 0xf : 0x3;
NumVDataDwords = Is64Bit ? 4 : 2;
- AddrIdx = 4;
} else {
DMask = Is64Bit ? 0x3 : 0x1;
NumVDataDwords = Is64Bit ? 2 : 1;
- AddrIdx = 3;
}
} else {
- unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa<MemSDNode>(Op) ? 2 : 1;
- auto DMaskConst = cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
+ auto *DMaskConst =
+ cast<ConstantSDNode>(Op.getOperand(ArgOffset + Intr->DMaskIndex));
DMask = DMaskConst->getZExtValue();
DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
@@ -5860,7 +5928,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
return Op; // D16 is unsupported for this instruction
IsD16 = true;
- VData = handleD16VData(VData, DAG);
+ VData = handleD16VData(VData, DAG, true);
}
NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32;
@@ -5880,63 +5948,56 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
(!LoadVT.isVector() && DMaskLanes > 1))
return Op;
- if (IsD16 && !Subtarget->hasUnpackedD16VMem())
+ // The sq block of gfx8 and gfx9 do not estimate register use correctly
+ // for d16 image_gather4, image_gather4_l, and image_gather4_lz
+ // instructions.
+ if (IsD16 && !Subtarget->hasUnpackedD16VMem() &&
+ !(BaseOpcode->Gather4 && Subtarget->hasImageGather4D16Bug()))
NumVDataDwords = (DMaskLanes + 1) / 2;
else
NumVDataDwords = DMaskLanes;
AdjustRetType = true;
}
-
- AddrIdx = DMaskIdx + 1;
}
- unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
- unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
- unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
- unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
- NumCoords + NumLCM;
- unsigned NumMIVAddrs = NumVAddrs;
-
+ unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd;
SmallVector<SDValue, 4> VAddrs;
// Optimize _L to _LZ when _L is zero
if (LZMappingInfo) {
- if (auto ConstantLod =
- dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
+ if (auto *ConstantLod = dyn_cast<ConstantFPSDNode>(
+ Op.getOperand(ArgOffset + Intr->LodIndex))) {
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
- NumMIVAddrs--; // remove 'lod'
+ VAddrEnd--; // remove 'lod'
}
}
}
// Optimize _mip away, when 'lod' is zero
if (MIPMappingInfo) {
- if (auto ConstantLod =
- dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
+ if (auto *ConstantLod = dyn_cast<ConstantSDNode>(
+ Op.getOperand(ArgOffset + Intr->MipIndex))) {
if (ConstantLod->isNullValue()) {
IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
- NumMIVAddrs--; // remove 'lod'
+ VAddrEnd--; // remove 'mip'
}
}
}
// Push back extra arguments.
- for (unsigned I = 0; I < BaseOpcode->NumExtraArgs; I++)
- VAddrs.push_back(Op.getOperand(AddrIdx + I));
+ for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++)
+ VAddrs.push_back(Op.getOperand(ArgOffset + I));
// Check for 16 bit addresses or derivatives and pack if true.
- unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
- unsigned CoordIdx = DimIdx + NumGradients;
- unsigned CoordsEnd = AddrIdx + NumMIVAddrs;
-
- MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
+ MVT VAddrVT =
+ Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();
MVT VAddrScalarVT = VAddrVT.getScalarType();
MVT PackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
- VAddrVT = Op.getOperand(CoordIdx).getSimpleValueType();
+ VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType();
VAddrScalarVT = VAddrVT.getScalarType();
IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
if (IsA16 || IsG16) {
@@ -5971,17 +6032,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
// Don't compress addresses for G16
- const int PackEndIdx = IsA16 ? CoordsEnd : CoordIdx;
- packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, DimIdx,
- PackEndIdx, NumGradients);
+ const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart);
+ packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs,
+ ArgOffset + Intr->GradientStart, PackEndIdx,
+ Intr->NumGradients);
if (!IsA16) {
// Add uncompressed address
- for (unsigned I = CoordIdx; I < CoordsEnd; I++)
+ for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++)
VAddrs.push_back(Op.getOperand(I));
}
} else {
- for (unsigned I = DimIdx; I < CoordsEnd; I++)
+ for (unsigned I = ArgOffset + Intr->GradientStart; I < VAddrEnd; I++)
VAddrs.push_back(Op.getOperand(I));
}
@@ -6004,22 +6066,19 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
- unsigned CtrlIdx; // Index of texfailctrl argument
SDValue Unorm;
if (!BaseOpcode->Sampler) {
Unorm = True;
- CtrlIdx = AddrIdx + NumVAddrs + 1;
} else {
auto UnormConst =
- cast<ConstantSDNode>(Op.getOperand(AddrIdx + NumVAddrs + 2));
+ cast<ConstantSDNode>(Op.getOperand(ArgOffset + Intr->UnormIndex));
Unorm = UnormConst->getZExtValue() ? True : False;
- CtrlIdx = AddrIdx + NumVAddrs + 3;
}
SDValue TFE;
SDValue LWE;
- SDValue TexFail = Op.getOperand(CtrlIdx);
+ SDValue TexFail = Op.getOperand(ArgOffset + Intr->TexFailCtrlIndex);
bool IsTexFail = false;
if (!parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail))
return Op;
@@ -6066,42 +6125,40 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SDValue DLC;
if (BaseOpcode->Atomic) {
GLC = True; // TODO no-return optimization
- if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC,
- IsGFX10 ? &DLC : nullptr))
+ if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex),
+ DAG, nullptr, &SLC, IsGFX10Plus ? &DLC : nullptr))
return Op;
} else {
- if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC,
- IsGFX10 ? &DLC : nullptr))
+ if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex),
+ DAG, &GLC, &SLC, IsGFX10Plus ? &DLC : nullptr))
return Op;
}
SmallVector<SDValue, 26> Ops;
if (BaseOpcode->Store || BaseOpcode->Atomic)
Ops.push_back(VData); // vdata
- if (UseNSA) {
- for (const SDValue &Addr : VAddrs)
- Ops.push_back(Addr);
- } else {
+ if (UseNSA)
+ append_range(Ops, VAddrs);
+ else
Ops.push_back(VAddr);
- }
- Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc
+ Ops.push_back(Op.getOperand(ArgOffset + Intr->RsrcIndex));
if (BaseOpcode->Sampler)
- Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler
+ Ops.push_back(Op.getOperand(ArgOffset + Intr->SampIndex));
Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
- if (IsGFX10)
+ if (IsGFX10Plus)
Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
Ops.push_back(Unorm);
- if (IsGFX10)
+ if (IsGFX10Plus)
Ops.push_back(DLC);
Ops.push_back(GLC);
Ops.push_back(SLC);
Ops.push_back(IsA16 && // r128, a16 for gfx9
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
- if (IsGFX10)
+ if (IsGFX10Plus)
Ops.push_back(IsA16 ? True : False);
Ops.push_back(TFE);
Ops.push_back(LWE);
- if (!IsGFX10)
+ if (!IsGFX10Plus)
Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
Ops.push_back(IsD16 ? True : False);
@@ -6112,7 +6169,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
int Opcode = -1;
- if (IsGFX10) {
+ if (IsGFX10Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx10NSA
: AMDGPU::MIMGEncGfx10Default,
@@ -6391,11 +6448,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
SDLoc(Op), MVT::i32);
case Intrinsic::amdgcn_s_buffer_load: {
- bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
+ bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
SDValue GLC;
SDValue DLC = DAG.getTargetConstant(0, DL, MVT::i1);
if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr,
- IsGFX10 ? &DLC : nullptr))
+ IsGFX10Plus ? &DLC : nullptr))
return Op;
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
DAG);
@@ -6417,11 +6474,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
return SDValue();
- DiagnosticInfoUnsupported BadIntrin(
- MF.getFunction(), "intrinsic not supported on subtarget",
- DL.getDebugLoc());
- DAG.getContext()->diagnose(BadIntrin);
- return DAG.getUNDEF(VT);
+ return emitRemovedIntrinsicError(DAG, DL, VT);
}
case Intrinsic::amdgcn_ldexp:
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT,
@@ -6567,7 +6620,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
- return lowerImage(Op, ImageDimIntr, DAG);
+ return lowerImage(Op, ImageDimIntr, DAG, false);
return Op;
}
@@ -6597,26 +6650,59 @@ static unsigned getBufferOffsetForMMO(SDValue VOffset,
cast<ConstantSDNode>(Offset)->getSExtValue();
}
-static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
- switch (MF.getFunction().getCallingConv()) {
- case CallingConv::AMDGPU_PS:
- return 1;
- case CallingConv::AMDGPU_VS:
- return 2;
- case CallingConv::AMDGPU_GS:
- return 3;
- case CallingConv::AMDGPU_HS:
- case CallingConv::AMDGPU_LS:
- case CallingConv::AMDGPU_ES:
- report_fatal_error("ds_ordered_count unsupported for this calling conv");
- case CallingConv::AMDGPU_CS:
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::C:
- case CallingConv::Fast:
- default:
- // Assume other calling conventions are various compute callable functions
- return 0;
- }
+SDValue SITargetLowering::lowerRawBufferAtomicIntrin(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned NewOpcode) const {
+ SDLoc DL(Op);
+
+ SDValue VData = Op.getOperand(2);
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ VData, // vdata
+ Op.getOperand(3), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
+ };
+
+ auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6]));
+
+ EVT MemVT = VData.getValueType();
+ return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT,
+ M->getMemOperand());
+}
+
+SDValue
+SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
+ unsigned NewOpcode) const {
+ SDLoc DL(Op);
+
+ SDValue VData = Op.getOperand(2);
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ VData, // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
+ };
+
+ auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6],
+ Ops[3]));
+
+ EVT MemVT = VData.getValueType();
+ return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT,
+ M->getMemOperand());
}
SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
@@ -6656,7 +6742,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
report_fatal_error("ds_ordered_count: wave_done requires wave_release");
unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
- unsigned ShaderType = getDSShaderTypeValue(DAG.getMachineFunction());
+ unsigned ShaderType =
+ SIInstrInfo::getDSShaderTypeValue(DAG.getMachineFunction());
unsigned Offset0 = OrderedCountIndex << 2;
unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
(Instruction << 4);
@@ -6893,7 +6980,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_buffer_atomic_umax:
case Intrinsic::amdgcn_buffer_atomic_and:
case Intrinsic::amdgcn_buffer_atomic_or:
- case Intrinsic::amdgcn_buffer_atomic_xor: {
+ case Intrinsic::amdgcn_buffer_atomic_xor:
+ case Intrinsic::amdgcn_buffer_atomic_fadd: {
unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
unsigned IdxEn = 1;
if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
@@ -6953,6 +7041,17 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_buffer_atomic_xor:
Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
break;
+ case Intrinsic::amdgcn_buffer_atomic_fadd:
+ if (!Op.getValue(0).use_empty()) {
+ DiagnosticInfoUnsupported
+ NoFpRet(DAG.getMachineFunction().getFunction(),
+ "return versions of fp atomics not supported",
+ DL.getDebugLoc(), DS_Error);
+ DAG.getContext()->diagnose(NoFpRet);
+ return SDValue();
+ }
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_FADD;
+ break;
default:
llvm_unreachable("unhandled atomic opcode");
}
@@ -6960,155 +7059,64 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
M->getMemOperand());
}
+ case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
+ case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
+ return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SWAP);
case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_ADD);
case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SUB);
case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SMIN);
case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_UMIN);
case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SMAX);
case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_UMAX);
case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_AND);
case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_OR);
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_XOR);
case Intrinsic::amdgcn_raw_buffer_atomic_inc:
- case Intrinsic::amdgcn_raw_buffer_atomic_dec: {
- auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
- SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // vdata
- Op.getOperand(3), // rsrc
- DAG.getConstant(0, DL, MVT::i32), // vindex
- Offsets.first, // voffset
- Op.getOperand(5), // soffset
- Offsets.second, // offset
- Op.getOperand(6), // cachepolicy
- DAG.getTargetConstant(0, DL, MVT::i1), // idxen
- };
- EVT VT = Op.getValueType();
-
- auto *M = cast<MemSDNode>(Op);
- M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6]));
- unsigned Opcode = 0;
-
- switch (IntrID) {
- case Intrinsic::amdgcn_raw_buffer_atomic_swap:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_add:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_sub:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_smin:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_umin:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_smax:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_umax:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_and:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_or:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_xor:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_inc:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_INC;
- break;
- case Intrinsic::amdgcn_raw_buffer_atomic_dec:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC;
- break;
- default:
- llvm_unreachable("unhandled atomic opcode");
- }
-
- return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
- M->getMemOperand());
- }
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_INC);
+ case Intrinsic::amdgcn_raw_buffer_atomic_dec:
+ return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC);
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ return lowerStructBufferAtomicIntrin(Op, DAG,
+ AMDGPUISD::BUFFER_ATOMIC_SWAP);
case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_ADD);
case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SUB);
case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ return lowerStructBufferAtomicIntrin(Op, DAG,
+ AMDGPUISD::BUFFER_ATOMIC_SMIN);
case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ return lowerStructBufferAtomicIntrin(Op, DAG,
+ AMDGPUISD::BUFFER_ATOMIC_UMIN);
case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ return lowerStructBufferAtomicIntrin(Op, DAG,
+ AMDGPUISD::BUFFER_ATOMIC_SMAX);
case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ return lowerStructBufferAtomicIntrin(Op, DAG,
+ AMDGPUISD::BUFFER_ATOMIC_UMAX);
case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_AND);
case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_OR);
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_XOR);
case Intrinsic::amdgcn_struct_buffer_atomic_inc:
- case Intrinsic::amdgcn_struct_buffer_atomic_dec: {
- auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
- SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // vdata
- Op.getOperand(3), // rsrc
- Op.getOperand(4), // vindex
- Offsets.first, // voffset
- Op.getOperand(6), // soffset
- Offsets.second, // offset
- Op.getOperand(7), // cachepolicy
- DAG.getTargetConstant(1, DL, MVT::i1), // idxen
- };
- EVT VT = Op.getValueType();
-
- auto *M = cast<MemSDNode>(Op);
- M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6],
- Ops[3]));
- unsigned Opcode = 0;
-
- switch (IntrID) {
- case Intrinsic::amdgcn_struct_buffer_atomic_swap:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_add:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_sub:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_smin:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_umin:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_smax:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_umax:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_and:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_or:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_xor:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_inc:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_INC;
- break;
- case Intrinsic::amdgcn_struct_buffer_atomic_dec:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC;
- break;
- default:
- llvm_unreachable("unhandled atomic opcode");
- }
+ return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_INC);
+ case Intrinsic::amdgcn_struct_buffer_atomic_dec:
+ return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC);
- return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
- M->getMemOperand());
- }
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
unsigned IdxEn = 1;
@@ -7180,7 +7188,15 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
- case Intrinsic::amdgcn_global_atomic_csub: {
+ case Intrinsic::amdgcn_global_atomic_fadd: {
+ if (!Op.getValue(0).use_empty()) {
+ DiagnosticInfoUnsupported
+ NoFpRet(DAG.getMachineFunction().getFunction(),
+ "return versions of fp atomics not supported",
+ DL.getDebugLoc(), DS_Error);
+ DAG.getContext()->diagnose(NoFpRet);
+ return SDValue();
+ }
MemSDNode *M = cast<MemSDNode>(Op);
SDValue Ops[] = {
M->getOperand(0), // Chain
@@ -7188,15 +7204,85 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
M->getOperand(3) // Value
};
- return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_LOAD_CSUB, SDLoc(Op),
- M->getVTList(), Ops, M->getMemoryVT(),
- M->getMemOperand());
+ EVT VT = Op.getOperand(3).getValueType();
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_FADD, DL, VT,
+ DAG.getVTList(VT, MVT::Other), Ops,
+ M->getMemOperand());
}
+ case Intrinsic::amdgcn_image_bvh_intersect_ray: {
+ SDLoc DL(Op);
+ MemSDNode *M = cast<MemSDNode>(Op);
+ SDValue NodePtr = M->getOperand(2);
+ SDValue RayExtent = M->getOperand(3);
+ SDValue RayOrigin = M->getOperand(4);
+ SDValue RayDir = M->getOperand(5);
+ SDValue RayInvDir = M->getOperand(6);
+ SDValue TDescr = M->getOperand(7);
+
+ assert(NodePtr.getValueType() == MVT::i32 ||
+ NodePtr.getValueType() == MVT::i64);
+ assert(RayDir.getValueType() == MVT::v4f16 ||
+ RayDir.getValueType() == MVT::v4f32);
+
+ bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
+ bool Is64 = NodePtr.getValueType() == MVT::i64;
+ unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa
+ : AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa
+ : Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa
+ : AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa;
+
+ SmallVector<SDValue, 16> Ops;
+
+ auto packLanes = [&DAG, &Ops, &DL] (SDValue Op, bool IsAligned) {
+ SmallVector<SDValue, 3> Lanes;
+ DAG.ExtractVectorElements(Op, Lanes, 0, 3);
+ if (Lanes[0].getValueSizeInBits() == 32) {
+ for (unsigned I = 0; I < 3; ++I)
+ Ops.push_back(DAG.getBitcast(MVT::i32, Lanes[I]));
+ } else {
+ if (IsAligned) {
+ Ops.push_back(
+ DAG.getBitcast(MVT::i32,
+ DAG.getBuildVector(MVT::v2f16, DL,
+ { Lanes[0], Lanes[1] })));
+ Ops.push_back(Lanes[2]);
+ } else {
+ SDValue Elt0 = Ops.pop_back_val();
+ Ops.push_back(
+ DAG.getBitcast(MVT::i32,
+ DAG.getBuildVector(MVT::v2f16, DL,
+ { Elt0, Lanes[0] })));
+ Ops.push_back(
+ DAG.getBitcast(MVT::i32,
+ DAG.getBuildVector(MVT::v2f16, DL,
+ { Lanes[1], Lanes[2] })));
+ }
+ }
+ };
+ if (Is64)
+ DAG.ExtractVectorElements(DAG.getBitcast(MVT::v2i32, NodePtr), Ops, 0, 2);
+ else
+ Ops.push_back(NodePtr);
+
+ Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
+ packLanes(RayOrigin, true);
+ packLanes(RayDir, true);
+ packLanes(RayInvDir, false);
+ Ops.push_back(TDescr);
+ if (IsA16)
+ Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
+ Ops.push_back(M->getChain());
+
+ auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops);
+ MachineMemOperand *MemRef = M->getMemOperand();
+ DAG.setNodeMemRefs(NewNode, {MemRef});
+ return SDValue(NewNode, 0);
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
- return lowerImage(Op, ImageDimIntr, DAG);
+ return lowerImage(Op, ImageDimIntr, DAG, true);
return SDValue();
}
@@ -7234,8 +7320,8 @@ SDValue SITargetLowering::getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL,
return NewOp;
}
-SDValue SITargetLowering::handleD16VData(SDValue VData,
- SelectionDAG &DAG) const {
+SDValue SITargetLowering::handleD16VData(SDValue VData, SelectionDAG &DAG,
+ bool ImageStore) const {
EVT StoreVT = VData.getValueType();
// No change for f16 and legal vector D16 types.
@@ -7243,19 +7329,70 @@ SDValue SITargetLowering::handleD16VData(SDValue VData,
return VData;
SDLoc DL(VData);
- assert((StoreVT.getVectorNumElements() != 3) && "Handle v3f16");
+ unsigned NumElements = StoreVT.getVectorNumElements();
if (Subtarget->hasUnpackedD16VMem()) {
// We need to unpack the packed data to store.
EVT IntStoreVT = StoreVT.changeTypeToInteger();
SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
- EVT EquivStoreVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
- StoreVT.getVectorNumElements());
+ EVT EquivStoreVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElements);
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, EquivStoreVT, IntVData);
return DAG.UnrollVectorOp(ZExt.getNode());
}
+ // The sq block of gfx8.1 does not estimate register use correctly for d16
+ // image store instructions. The data operand is computed as if it were not a
+ // d16 image instruction.
+ if (ImageStore && Subtarget->hasImageStoreD16Bug()) {
+ // Bitcast to i16
+ EVT IntStoreVT = StoreVT.changeTypeToInteger();
+ SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
+
+ // Decompose into scalars
+ SmallVector<SDValue, 4> Elts;
+ DAG.ExtractVectorElements(IntVData, Elts);
+
+ // Group pairs of i16 into v2i16 and bitcast to i32
+ SmallVector<SDValue, 4> PackedElts;
+ for (unsigned I = 0; I < Elts.size() / 2; I += 1) {
+ SDValue Pair =
+ DAG.getBuildVector(MVT::v2i16, DL, {Elts[I * 2], Elts[I * 2 + 1]});
+ SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
+ PackedElts.push_back(IntPair);
+ }
+ if ((NumElements % 2) == 1) {
+ // Handle v3i16
+ unsigned I = Elts.size() / 2;
+ SDValue Pair = DAG.getBuildVector(MVT::v2i16, DL,
+ {Elts[I * 2], DAG.getUNDEF(MVT::i16)});
+ SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
+ PackedElts.push_back(IntPair);
+ }
+
+ // Pad using UNDEF
+ PackedElts.resize(Elts.size(), DAG.getUNDEF(MVT::i32));
+
+ // Build final vector
+ EVT VecVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i32, PackedElts.size());
+ return DAG.getBuildVector(VecVT, DL, PackedElts);
+ }
+
+ if (NumElements == 3) {
+ EVT IntStoreVT =
+ EVT::getIntegerVT(*DAG.getContext(), StoreVT.getStoreSizeInBits());
+ SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
+
+ EVT WidenedStoreVT = EVT::getVectorVT(
+ *DAG.getContext(), StoreVT.getVectorElementType(), NumElements + 1);
+ EVT WidenedIntVT = EVT::getIntegerVT(*DAG.getContext(),
+ WidenedStoreVT.getStoreSizeInBits());
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenedIntVT, IntVData);
+ return DAG.getNode(ISD::BITCAST, DL, WidenedStoreVT, ZExt);
+ }
+
assert(isTypeLegal(StoreVT));
return VData;
}
@@ -7433,8 +7570,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
EVT VDataVT = VData.getValueType();
EVT EltType = VDataVT.getScalarType();
bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
- if (IsD16)
+ if (IsD16) {
VData = handleD16VData(VData, DAG);
+ VDataVT = VData.getValueType();
+ }
if (!isTypeLegal(VDataVT)) {
VData =
@@ -7478,8 +7617,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
EVT EltType = VDataVT.getScalarType();
bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
- if (IsD16)
+ if (IsD16) {
VData = handleD16VData(VData, DAG);
+ VDataVT = VData.getValueType();
+ }
if (!isTypeLegal(VDataVT)) {
VData =
@@ -7514,57 +7655,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
-
- case Intrinsic::amdgcn_buffer_atomic_fadd: {
- unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
- unsigned IdxEn = 1;
- if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
- IdxEn = Idx->getZExtValue() != 0;
- SDValue Ops[] = {
- Chain,
- Op.getOperand(2), // vdata
- Op.getOperand(3), // rsrc
- Op.getOperand(4), // vindex
- SDValue(), // voffset -- will be set by setBufferOffsets
- SDValue(), // soffset -- will be set by setBufferOffsets
- SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
- DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
- };
- unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
- // We don't know the offset if vindex is non-zero, so clear it.
- if (IdxEn)
- Offset = 0;
- EVT VT = Op.getOperand(2).getValueType();
-
- auto *M = cast<MemSDNode>(Op);
- M->getMemOperand()->setOffset(Offset);
- unsigned Opcode = VT.isVector() ? AMDGPUISD::BUFFER_ATOMIC_PK_FADD
- : AMDGPUISD::BUFFER_ATOMIC_FADD;
-
- return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
- M->getMemOperand());
- }
-
- case Intrinsic::amdgcn_global_atomic_fadd: {
- SDValue Ops[] = {
- Chain,
- Op.getOperand(2), // ptr
- Op.getOperand(3) // vdata
- };
- EVT VT = Op.getOperand(3).getValueType();
-
- auto *M = cast<MemSDNode>(Op);
- if (VT.isVector()) {
- return DAG.getMemIntrinsicNode(
- AMDGPUISD::ATOMIC_PK_FADD, DL, Op->getVTList(), Ops, VT,
- M->getMemOperand());
- }
-
- return DAG.getAtomic(ISD::ATOMIC_LOAD_FADD, DL, VT,
- DAG.getVTList(VT, MVT::Other), Ops,
- M->getMemOperand()).getValue(1);
- }
case Intrinsic::amdgcn_end_cf:
return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
Op->getOperand(2), Chain), 0);
@@ -7572,7 +7662,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
- return lowerImage(Op, ImageDimIntr, DAG);
+ return lowerImage(Op, ImageDimIntr, DAG, true);
return Op;
}
@@ -7848,13 +7938,6 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
"Custom lowering for non-i32 vectors hasn't been implemented.");
- if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
- MemVT, *Load->getMemOperand())) {
- SDValue Ops[2];
- std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
- return DAG.getMergeValues(Ops, DL);
- }
-
unsigned Alignment = Load->getAlignment();
unsigned AS = Load->getAddressSpace();
if (Subtarget->hasLDSMisalignedBug() &&
@@ -7879,9 +7962,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32) {
if (MemVT.isPow2VectorType())
return SDValue();
- if (NumElements == 3)
- return WidenVectorLoad(Op, DAG);
- return SplitVectorLoad(Op, DAG);
+ return WidenOrSplitVectorLoad(Op, DAG);
}
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
@@ -7897,9 +7978,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Alignment >= 4 && NumElements < 32) {
if (MemVT.isPow2VectorType())
return SDValue();
- if (NumElements == 3)
- return WidenVectorLoad(Op, DAG);
- return SplitVectorLoad(Op, DAG);
+ return WidenOrSplitVectorLoad(Op, DAG);
}
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
@@ -7914,7 +7993,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return SplitVectorLoad(Op, DAG);
// v3 loads not supported on SI.
if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
- return WidenVectorLoad(Op, DAG);
+ return WidenOrSplitVectorLoad(Op, DAG);
+
// v3 and v4 loads are supported for private and global memory.
return SDValue();
}
@@ -7938,15 +8018,19 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return SplitVectorLoad(Op, DAG);
// v3 loads not supported on SI.
if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
- return WidenVectorLoad(Op, DAG);
+ return WidenOrSplitVectorLoad(Op, DAG);
+
return SDValue();
default:
llvm_unreachable("unsupported private_element_size");
}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
- // Use ds_read_b128 if possible.
- if (Subtarget->useDS128() && Load->getAlignment() >= 16 &&
- MemVT.getStoreSize() == 16)
+ // Use ds_read_b128 or ds_read_b96 when possible.
+ if (Subtarget->hasDS96AndDS128() &&
+ ((Subtarget->useDS128() && MemVT.getStoreSize() == 16) ||
+ MemVT.getStoreSize() == 12) &&
+ allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,
+ Load->getAlign()))
return SDValue();
if (NumElements > 2)
@@ -7963,6 +8047,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return SplitVectorLoad(Op, DAG);
}
}
+
+ if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ MemVT, *Load->getMemOperand())) {
+ SDValue Ops[2];
+ std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
+ return DAG.getMergeValues(Ops, DL);
+ }
+
return SDValue();
}
@@ -8003,8 +8095,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
- bool AllowInaccurateRcp = DAG.getTarget().Options.UnsafeFPMath ||
- Flags.hasApproximateFuncs();
+ bool AllowInaccurateRcp = Flags.hasApproximateFuncs();
// Without !fpmath accuracy information, we can't do more because we don't
// know exactly whether rcp is accurate enough to meet !fpmath requirement.
@@ -8045,6 +8136,33 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags);
}
+SDValue SITargetLowering::lowerFastUnsafeFDIV64(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ const SDNodeFlags Flags = Op->getFlags();
+
+ bool AllowInaccurateDiv = Flags.hasApproximateFuncs() ||
+ DAG.getTarget().Options.UnsafeFPMath;
+ if (!AllowInaccurateDiv)
+ return SDValue();
+
+ SDValue NegY = DAG.getNode(ISD::FNEG, SL, VT, Y);
+ SDValue One = DAG.getConstantFP(1.0, SL, VT);
+
+ SDValue R = DAG.getNode(AMDGPUISD::RCP, SL, VT, Y);
+ SDValue Tmp0 = DAG.getNode(ISD::FMA, SL, VT, NegY, R, One);
+
+ R = DAG.getNode(ISD::FMA, SL, VT, Tmp0, R, R);
+ SDValue Tmp1 = DAG.getNode(ISD::FMA, SL, VT, NegY, R, One);
+ R = DAG.getNode(ISD::FMA, SL, VT, Tmp1, R, R);
+ SDValue Ret = DAG.getNode(ISD::FMUL, SL, VT, X, R);
+ SDValue Tmp2 = DAG.getNode(ISD::FMA, SL, VT, NegY, Ret, X);
+ return DAG.getNode(ISD::FMA, SL, VT, Tmp2, R, Ret);
+}
+
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
EVT VT, SDValue A, SDValue B, SDValue GlueChain,
SDNodeFlags Flags) {
@@ -8273,8 +8391,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
- if (DAG.getTarget().Options.UnsafeFPMath)
- return lowerFastUnsafeFDIV(Op, DAG);
+ if (SDValue FastLowered = lowerFastUnsafeFDIV64(Op, DAG))
+ return FastLowered;
SDLoc SL(Op);
SDValue X = Op.getOperand(0);
@@ -8368,11 +8486,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.isVector() &&
Store->getValue().getValueType().getScalarType() == MVT::i32);
- if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
- VT, *Store->getMemOperand())) {
- return expandUnalignedStore(Store, DAG);
- }
-
unsigned AS = Store->getAddressSpace();
if (Subtarget->hasLDSMisalignedBug() &&
AS == AMDGPUAS::FLAT_ADDRESS &&
@@ -8397,6 +8510,11 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// v3 stores not supported on SI.
if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
return SplitVectorStore(Op, DAG);
+
+ if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ VT, *Store->getMemOperand()))
+ return expandUnalignedStore(Store, DAG);
+
return SDValue();
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
switch (Subtarget->getMaxPrivateElementSize()) {
@@ -8407,16 +8525,20 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SplitVectorStore(Op, DAG);
return SDValue();
case 16:
- if (NumElements > 4 || NumElements == 3)
+ if (NumElements > 4 ||
+ (NumElements == 3 && !Subtarget->enableFlatScratch()))
return SplitVectorStore(Op, DAG);
return SDValue();
default:
llvm_unreachable("unsupported private_element_size");
}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
- // Use ds_write_b128 if possible.
- if (Subtarget->useDS128() && Store->getAlignment() >= 16 &&
- VT.getStoreSize() == 16 && NumElements != 3)
+ // Use ds_write_b128 or ds_write_b96 when possible.
+ if (Subtarget->hasDS96AndDS128() &&
+ ((Subtarget->useDS128() && VT.getStoreSize() == 16) ||
+ (VT.getStoreSize() == 12)) &&
+ allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,
+ Store->getAlign()))
return SDValue();
if (NumElements > 2)
@@ -8433,6 +8555,13 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SplitVectorStore(Op, DAG);
}
+ if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ VT, *Store->getMemOperand())) {
+ if (VT.isVector())
+ return SplitVectorStore(Op, DAG);
+ return expandUnalignedStore(Store, DAG);
+ }
+
return SDValue();
} else {
llvm_unreachable("unhandled address space");
@@ -8474,7 +8603,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
unsigned AS = AtomicNode->getAddressSpace();
// No custom lowering required for local address space
- if (!isFlatGlobalAddrSpace(AS))
+ if (!AMDGPU::isFlatGlobalAddrSpace(AS))
return Op;
// Non-local address space requires custom lowering for atomic compare
@@ -8584,7 +8713,7 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
EVT VT = N->getValueType(0);
SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
- SDValue COffset = DAG.getConstant(Offset, SL, MVT::i32);
+ SDValue COffset = DAG.getConstant(Offset, SL, VT);
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
@@ -8594,12 +8723,28 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset, Flags);
}
+/// MemSDNode::getBasePtr() does not work for intrinsics, which needs to offset
+/// by the chain and intrinsic ID. Theoretically we would also need to check the
+/// specific intrinsic, but they all place the pointer operand first.
+static unsigned getBasePtrIndex(const MemSDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::STORE:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ return 2;
+ default:
+ return 1;
+ }
+}
+
SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
DAGCombinerInfo &DCI) const {
- SDValue Ptr = N->getBasePtr();
SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
+ unsigned PtrIdx = getBasePtrIndex(N);
+ SDValue Ptr = N->getOperand(PtrIdx);
+
// TODO: We could also do this for multiplies.
if (Ptr.getOpcode() == ISD::SHL) {
SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), N->getAddressSpace(),
@@ -8607,7 +8752,7 @@ SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
if (NewPtr) {
SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
- NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
+ NewOps[PtrIdx] = NewPtr;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
}
@@ -8868,7 +9013,7 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
// and (op x, c1), (op y, c2) -> perm x, y, permute_mask(c1, c2)
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
- N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32) != -1) {
+ N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
uint32_t LHSMask = getPermuteMask(DAG, LHS);
uint32_t RHSMask = getPermuteMask(DAG, RHS);
if (LHSMask != ~0u && RHSMask != ~0u) {
@@ -8965,7 +9110,7 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
// or (op x, c1), (op y, c2) -> perm x, y, permute_mask(c1, c2)
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
- N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32) != -1) {
+ N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
uint32_t LHSMask = getPermuteMask(DAG, LHS);
uint32_t RHSMask = getPermuteMask(DAG, RHS);
if (LHSMask != ~0u && RHSMask != ~0u) {
@@ -10509,8 +10654,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return SDValue();
switch (N->getOpcode()) {
- default:
- return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
case ISD::ADD:
return performAddCombine(N, DCI);
case ISD::SUB:
@@ -10537,35 +10680,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performMinMaxCombine(N, DCI);
case ISD::FMA:
return performFMACombine(N, DCI);
- case ISD::LOAD: {
- if (SDValue Widended = widenLoad(cast<LoadSDNode>(N), DCI))
- return Widended;
- LLVM_FALLTHROUGH;
- }
- case ISD::STORE:
- case ISD::ATOMIC_LOAD:
- case ISD::ATOMIC_STORE:
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_LOAD_FADD:
- case AMDGPUISD::ATOMIC_INC:
- case AMDGPUISD::ATOMIC_DEC:
- case AMDGPUISD::ATOMIC_LOAD_FMIN:
- case AMDGPUISD::ATOMIC_LOAD_FMAX: // TODO: Target mem intrinsics.
- if (DCI.isBeforeLegalize())
- break;
- return performMemSDNodeCombine(cast<MemSDNode>(N), DCI);
case ISD::AND:
return performAndCombine(N, DCI);
case ISD::OR:
@@ -10630,14 +10744,28 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performExtractVectorEltCombine(N, DCI);
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
+ case ISD::LOAD: {
+ if (SDValue Widended = widenLoad(cast<LoadSDNode>(N), DCI))
+ return Widended;
+ LLVM_FALLTHROUGH;
+ }
+ default: {
+ if (!DCI.isBeforeLegalize()) {
+ if (MemSDNode *MemNode = dyn_cast<MemSDNode>(N))
+ return performMemSDNodeCombine(MemNode, DCI);
+ }
+
+ break;
}
+ }
+
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
/// Helper function for adjustWritemask
static unsigned SubIdx2Lane(unsigned Idx) {
switch (Idx) {
- default: return 0;
+ default: return ~0u;
case AMDGPU::sub0: return 0;
case AMDGPU::sub1: return 1;
case AMDGPU::sub2: return 2;
@@ -10697,6 +10825,8 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
// set, etc.
Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+ if (Lane == ~0u)
+ return Node;
// Check if the use is for the TFE/LWE generated result at VGPRn+1.
if (UsesTFC && Lane == TFCLane) {
@@ -10826,8 +10956,7 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
// Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have
// to try understanding copies to physical registers.
- if (SrcVal.getValueType() == MVT::i1 &&
- Register::isPhysicalRegister(DestReg->getReg())) {
+ if (SrcVal.getValueType() == MVT::i1 && DestReg->getReg().isPhysical()) {
SDLoc SL(Node);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue VReg = DAG.getRegister(
@@ -10870,7 +10999,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
unsigned Opcode = Node->getMachineOpcode();
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
- !TII->isGather4(Opcode)) {
+ !TII->isGather4(Opcode) &&
+ AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) != -1) {
return adjustWritemask(Node, DAG);
}
@@ -10881,14 +11011,14 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
}
switch (Opcode) {
- case AMDGPU::V_DIV_SCALE_F32:
- case AMDGPU::V_DIV_SCALE_F64: {
+ case AMDGPU::V_DIV_SCALE_F32_e64:
+ case AMDGPU::V_DIV_SCALE_F64_e64: {
// Satisfy the operand register constraint when one of the inputs is
// undefined. Ordinarily each undef value will have its own implicit_def of
// a vreg, so force these to use a single register.
- SDValue Src0 = Node->getOperand(0);
- SDValue Src1 = Node->getOperand(1);
- SDValue Src2 = Node->getOperand(2);
+ SDValue Src0 = Node->getOperand(1);
+ SDValue Src1 = Node->getOperand(3);
+ SDValue Src2 = Node->getOperand(5);
if ((Src0.isMachineOpcode() &&
Src0.getMachineOpcode() != AMDGPU::IMPLICIT_DEF) &&
@@ -10923,10 +11053,10 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
} else
break;
- SmallVector<SDValue, 4> Ops = { Src0, Src1, Src2 };
- for (unsigned I = 3, N = Node->getNumOperands(); I != N; ++I)
- Ops.push_back(Node->getOperand(I));
-
+ SmallVector<SDValue, 9> Ops(Node->op_begin(), Node->op_end());
+ Ops[1] = Src0;
+ Ops[3] = Src1;
+ Ops[5] = Src2;
Ops.push_back(ImpDef.getValue(1));
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
@@ -10962,8 +11092,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
MachineOperand &Op = MI.getOperand(I);
if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID &&
OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) ||
- !Register::isVirtualRegister(Op.getReg()) ||
- !TRI->isAGPR(MRI, Op.getReg()))
+ !Op.getReg().isVirtual() || !TRI->isAGPR(MRI, Op.getReg()))
continue;
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
if (!Src || !Src->isCopy() ||
@@ -10985,8 +11114,12 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode());
if (NoRetAtomicOp != -1) {
if (!Node->hasAnyUseOfValue(0)) {
- MI.setDesc(TII->get(NoRetAtomicOp));
+ int Glc1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::glc1);
+ if (Glc1Idx != -1)
+ MI.RemoveOperand(Glc1Idx);
MI.RemoveOperand(0);
+ MI.setDesc(TII->get(NoRetAtomicOp));
return;
}
@@ -11341,17 +11474,7 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
Info->limitOccupancy(MF);
if (ST.isWave32() && !MF.empty()) {
- // Add VCC_HI def because many instructions marked as imp-use VCC where
- // we may only define VCC_LO. If nothing defines VCC_HI we may end up
- // having a use of undef.
-
const SIInstrInfo *TII = ST.getInstrInfo();
- DebugLoc DL;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.getFirstNonDebugInstr();
- BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), AMDGPU::VCC_HI);
-
for (auto &MBB : MF) {
for (auto &MI : MBB) {
TII->fixImplicitOperands(MI);
@@ -11379,6 +11502,55 @@ void SITargetLowering::computeKnownBitsForFrameIndex(
Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());
}
+static void knownBitsForWorkitemID(const GCNSubtarget &ST, GISelKnownBits &KB,
+ KnownBits &Known, unsigned Dim) {
+ unsigned MaxValue =
+ ST.getMaxWorkitemID(KB.getMachineFunction().getFunction(), Dim);
+ Known.Zero.setHighBits(countLeadingZeros(MaxValue));
+}
+
+void SITargetLowering::computeKnownBitsForTargetInstr(
+ GISelKnownBits &KB, Register R, KnownBits &Known, const APInt &DemandedElts,
+ const MachineRegisterInfo &MRI, unsigned Depth) const {
+ const MachineInstr *MI = MRI.getVRegDef(R);
+ switch (MI->getOpcode()) {
+ case AMDGPU::G_INTRINSIC: {
+ switch (MI->getIntrinsicID()) {
+ case Intrinsic::amdgcn_workitem_id_x:
+ knownBitsForWorkitemID(*getSubtarget(), KB, Known, 0);
+ break;
+ case Intrinsic::amdgcn_workitem_id_y:
+ knownBitsForWorkitemID(*getSubtarget(), KB, Known, 1);
+ break;
+ case Intrinsic::amdgcn_workitem_id_z:
+ knownBitsForWorkitemID(*getSubtarget(), KB, Known, 2);
+ break;
+ case Intrinsic::amdgcn_mbcnt_lo:
+ case Intrinsic::amdgcn_mbcnt_hi: {
+ // These return at most the wavefront size - 1.
+ unsigned Size = MRI.getType(R).getSizeInBits();
+ Known.Zero.setHighBits(Size - getSubtarget()->getWavefrontSizeLog2());
+ break;
+ }
+ case Intrinsic::amdgcn_groupstaticsize: {
+ // We can report everything over the maximum size as 0. We can't report
+ // based on the actual size because we don't know if it's accurate or not
+ // at any given point.
+ Known.Zero.setHighBits(countLeadingZeros(getSubtarget()->getLocalMemorySize()));
+ break;
+ }
+ }
+ break;
+ }
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
+ Known.Zero.setHighBits(24);
+ break;
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
+ Known.Zero.setHighBits(16);
+ break;
+ }
+}
+
Align SITargetLowering::computeKnownAlignForTargetInstr(
GISelKnownBits &KB, Register R, const MachineRegisterInfo &MRI,
unsigned Depth) const {
@@ -11484,46 +11656,40 @@ static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
return false;
}
-bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
- FunctionLoweringInfo * FLI, LegacyDivergenceAnalysis * KDA) const
-{
+bool SITargetLowering::isSDNodeSourceOfDivergence(
+ const SDNode *N, FunctionLoweringInfo *FLI,
+ LegacyDivergenceAnalysis *KDA) const {
switch (N->getOpcode()) {
- case ISD::CopyFromReg:
- {
- const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
- const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
- const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
- Register Reg = R->getReg();
+ case ISD::CopyFromReg: {
+ const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
+ const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ Register Reg = R->getReg();
- // FIXME: Why does this need to consider isLiveIn?
- if (Reg.isPhysical() || MRI.isLiveIn(Reg))
- return !TRI->isSGPRReg(MRI, Reg);
+ // FIXME: Why does this need to consider isLiveIn?
+ if (Reg.isPhysical() || MRI.isLiveIn(Reg))
+ return !TRI->isSGPRReg(MRI, Reg);
- if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
- return KDA->isDivergent(V);
+ if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
+ return KDA->isDivergent(V);
- assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
- return !TRI->isSGPRReg(MRI, Reg);
- }
- break;
- case ISD::LOAD: {
- const LoadSDNode *L = cast<LoadSDNode>(N);
- unsigned AS = L->getAddressSpace();
- // A flat load may access private memory.
- return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
- } break;
- case ISD::CALLSEQ_END:
+ assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
+ return !TRI->isSGPRReg(MRI, Reg);
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *L = cast<LoadSDNode>(N);
+ unsigned AS = L->getAddressSpace();
+ // A flat load may access private memory.
+ return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
+ }
+ case ISD::CALLSEQ_END:
return true;
- break;
- case ISD::INTRINSIC_WO_CHAIN:
- {
-
- }
- return AMDGPU::isIntrinsicSourceOfDivergence(
- cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
- case ISD::INTRINSIC_W_CHAIN:
- return AMDGPU::isIntrinsicSourceOfDivergence(
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ case ISD::INTRINSIC_WO_CHAIN:
+ return AMDGPU::isIntrinsicSourceOfDivergence(
+ cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
+ case ISD::INTRINSIC_W_CHAIN:
+ return AMDGPU::isIntrinsicSourceOfDivergence(
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
}
return false;
}
@@ -11558,6 +11724,16 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
SNaN, Depth);
}
+// Global FP atomic instructions have a hardcoded FP mode and do not support
+// FP32 denormals, and only support v2f16 denormals.
+static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) {
+ const fltSemantics &Flt = RMW->getType()->getScalarType()->getFltSemantics();
+ auto DenormMode = RMW->getParent()->getParent()->getDenormalMode(Flt);
+ if (&Flt == &APFloat::IEEEsingle())
+ return DenormMode == DenormalMode::getPreserveSign();
+ return DenormMode == DenormalMode::getIEEE();
+}
+
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
switch (RMW->getOperation()) {
@@ -11576,10 +11752,15 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
unsigned AS = RMW->getPointerAddressSpace();
if (AS == AMDGPUAS::GLOBAL_ADDRESS && Subtarget->hasAtomicFaddInsts()) {
+ if (!fpModeMatchesGlobalFPAtomicMode(RMW))
+ return AtomicExpansionKind::CmpXChg;
+
return RMW->use_empty() ? AtomicExpansionKind::None :
AtomicExpansionKind::CmpXChg;
}
+ // DS FP atomics do repect the denormal mode, but the rounding mode is fixed
+ // to round-to-nearest-even.
return (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomics()) ?
AtomicExpansionKind::None : AtomicExpansionKind::CmpXChg;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
index f4c076464057..823d6eca9bf8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -16,10 +16,17 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPUArgumentUsageInfo.h"
-#include "SIInstrInfo.h"
namespace llvm {
+class GCNSubtarget;
+class SIMachineFunctionInfo;
+class SIRegisterInfo;
+
+namespace AMDGPU {
+struct ImageDimIntrinsicInfo;
+}
+
class SITargetLowering final : public AMDGPUTargetLowering {
private:
const GCNSubtarget *Subtarget;
@@ -59,10 +66,15 @@ private:
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
MVT VT, unsigned Offset) const;
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG, bool WithChain) const;
SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
SDValue CachePolicy, SelectionDAG &DAG) const;
+ SDValue lowerRawBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
+ unsigned NewOpcode) const;
+ SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
+ unsigned NewOpcode) const;
+
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
@@ -80,12 +92,12 @@ private:
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFastUnsafeFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
@@ -104,7 +116,8 @@ private:
ArrayRef<SDValue> Ops, EVT MemVT,
MachineMemOperand *MMO, SelectionDAG &DAG) const;
- SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const;
+ SDValue handleD16VData(SDValue VData, SelectionDAG &DAG,
+ bool ImageStore = false) const;
/// Converts \p Op, which must be of floating point type, to the
/// floating point type \p VT, by either extending or truncating it.
@@ -255,12 +268,22 @@ public:
const SelectionDAG &DAG) const override;
bool allowsMisalignedMemoryAccessesImpl(
- unsigned Size, unsigned AS, unsigned Align,
+ unsigned Size, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const;
bool allowsMisalignedMemoryAccesses(
- EVT VT, unsigned AS, unsigned Align,
+ LLT Ty, unsigned AddrSpace, Align Alignment,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *IsFast = nullptr) const override {
+ if (IsFast)
+ *IsFast = false;
+ return allowsMisalignedMemoryAccessesImpl(Ty.getSizeInBits(), AddrSpace,
+ Alignment, Flags, IsFast);
+ }
+
+ bool allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AS, unsigned Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override;
@@ -270,20 +293,8 @@ public:
bool isMemOpUniform(const SDNode *N) const;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
- static bool isNonGlobalAddrSpace(unsigned AS) {
- return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS ||
- AS == AMDGPUAS::PRIVATE_ADDRESS;
- }
+ static bool isNonGlobalAddrSpace(unsigned AS);
- // FIXME: Missing constant_32bit
- static bool isFlatGlobalAddrSpace(unsigned AS) {
- return AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS ||
- AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
- }
-
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
TargetLoweringBase::LegalizeTypeAction
@@ -366,6 +377,8 @@ public:
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
+ LLT getPreferredShiftAmountTy(LLT Ty) const override;
+
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
@@ -412,6 +425,11 @@ public:
void computeKnownBitsForFrameIndex(int FrameIdx,
KnownBits &Known,
const MachineFunction &MF) const override;
+ void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth = 0) const override;
Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R,
const MachineRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 35c49ae8c0dd..5611c9c5d57e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -31,8 +31,9 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 052db5f6ea71..9d31cd5cedc3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -14,30 +14,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <cstdint>
-#include <iterator>
using namespace llvm;
@@ -58,16 +39,18 @@ private:
MachineDominatorTree *MDT = nullptr;
MachineBasicBlock *EarlyExitBlock = nullptr;
+ bool EarlyExitClearsExec = false;
bool shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
bool dominatesAllReachable(MachineBasicBlock &MBB);
- void createEarlyExitBlock(MachineBasicBlock &MBB);
+ void ensureEarlyExitBlock(MachineBasicBlock &MBB, bool ClearExec);
void skipIfDead(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL);
bool kill(MachineInstr &MI);
+ void earlyTerm(MachineInstr &MI);
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
@@ -164,31 +147,62 @@ bool SIInsertSkips::dominatesAllReachable(MachineBasicBlock &MBB) {
return true;
}
-static void generatePsEndPgm(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- const SIInstrInfo *TII) {
- // Generate "null export; s_endpgm".
- BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addImm(1) // vm
- .addImm(0) // compr
- .addImm(0); // en
+static void generateEndPgm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ const SIInstrInfo *TII, bool IsPS) {
+ // "null export"
+ if (IsPS) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
+ .addImm(AMDGPU::Exp::ET_NULL)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addImm(1) // vm
+ .addImm(0) // compr
+ .addImm(0); // en
+ }
+ // s_endpgm
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
}
-void SIInsertSkips::createEarlyExitBlock(MachineBasicBlock &MBB) {
+void SIInsertSkips::ensureEarlyExitBlock(MachineBasicBlock &MBB,
+ bool ClearExec) {
MachineFunction *MF = MBB.getParent();
DebugLoc DL;
- assert(!EarlyExitBlock);
- EarlyExitBlock = MF->CreateMachineBasicBlock();
- MF->insert(MF->end(), EarlyExitBlock);
+ if (!EarlyExitBlock) {
+ EarlyExitBlock = MF->CreateMachineBasicBlock();
+ MF->insert(MF->end(), EarlyExitBlock);
+ generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII,
+ MF->getFunction().getCallingConv() ==
+ CallingConv::AMDGPU_PS);
+ EarlyExitClearsExec = false;
+ }
- generatePsEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII);
+ if (ClearExec && !EarlyExitClearsExec) {
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ unsigned Mov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ Register Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ auto ExitI = EarlyExitBlock->getFirstNonPHI();
+ BuildMI(*EarlyExitBlock, ExitI, DL, TII->get(Mov), Exec).addImm(0);
+ EarlyExitClearsExec = true;
+ }
+}
+
+static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
+ MachineDominatorTree *MDT) {
+ MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true);
+
+ // Update dominator tree
+ using DomTreeT = DomTreeBase<MachineBasicBlock>;
+ SmallVector<DomTreeT::UpdateType, 16> DTUpdates;
+ for (MachineBasicBlock *Succ : SplitBB->successors()) {
+ DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
+ DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
+ }
+ DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
+ MDT->getBase().applyUpdates(DTUpdates);
}
/// Insert an "if exec=0 { null export; s_endpgm }" sequence before the given
@@ -196,6 +210,7 @@ void SIInsertSkips::createEarlyExitBlock(MachineBasicBlock &MBB) {
void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL) {
MachineFunction *MF = MBB.getParent();
+ (void)MF;
assert(MF->getFunction().getCallingConv() == CallingConv::AMDGPU_PS);
// It is possible for an SI_KILL_*_TERMINATOR to sit at the bottom of a
@@ -211,45 +226,22 @@ void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB,
// In this case, we write the "null_export; s_endpgm" skip code in the
// already-existing basic block.
auto NextBBI = std::next(MBB.getIterator());
- bool NoSuccessor = I == MBB.end() &&
- llvm::find(MBB.successors(), &*NextBBI) == MBB.succ_end();
+ bool NoSuccessor =
+ I == MBB.end() && !llvm::is_contained(MBB.successors(), &*NextBBI);
if (NoSuccessor) {
- generatePsEndPgm(MBB, I, DL, TII);
+ generateEndPgm(MBB, I, DL, TII, true);
} else {
- if (!EarlyExitBlock) {
- createEarlyExitBlock(MBB);
- // Update next block pointer to reflect any new blocks
- NextBBI = std::next(MBB.getIterator());
- }
+ ensureEarlyExitBlock(MBB, false);
- auto BranchMI = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
- .addMBB(EarlyExitBlock);
+ MachineInstr *BranchMI =
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ .addMBB(EarlyExitBlock);
// Split the block if the branch will not come at the end.
auto Next = std::next(BranchMI->getIterator());
- if (Next != MBB.end() && !Next->isTerminator()) {
- MachineBasicBlock *SplitBB =
- MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- MF->insert(NextBBI, SplitBB);
- SplitBB->splice(SplitBB->begin(), &MBB, I, MBB.end());
- SplitBB->transferSuccessorsAndUpdatePHIs(&MBB);
- // FIXME: the expectation is that this will be used near the beginning
- // of a block so just assume all registers are still live.
- for (auto LiveIn : MBB.liveins())
- SplitBB->addLiveIn(LiveIn);
- MBB.addSuccessor(SplitBB);
-
- // Update dominator tree
- using DomTreeT = DomTreeBase<MachineBasicBlock>;
- SmallVector<DomTreeT::UpdateType, 16> DTUpdates;
- for (MachineBasicBlock *Succ : SplitBB->successors()) {
- DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
- DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
- }
- DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
- MDT->getBase().applyUpdates(DTUpdates);
- }
+ if (Next != MBB.end() && !Next->isTerminator())
+ splitBlock(MBB, *BranchMI, MDT);
MBB.addSuccessor(EarlyExitBlock);
MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
@@ -382,6 +374,23 @@ bool SIInsertSkips::kill(MachineInstr &MI) {
}
}
+void SIInsertSkips::earlyTerm(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ const DebugLoc DL = MI.getDebugLoc();
+
+ ensureEarlyExitBlock(MBB, true);
+
+ auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
+ .addMBB(EarlyExitBlock);
+ auto Next = std::next(MI.getIterator());
+
+ if (Next != MBB.end() && !Next->isTerminator())
+ splitBlock(MBB, *BranchMI, MDT);
+
+ MBB.addSuccessor(EarlyExitBlock);
+ MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
+}
+
// Returns true if a branch over the block was inserted.
bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
MachineBasicBlock &SrcMBB) {
@@ -407,6 +416,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
SkipThreshold = SkipThresholdFlag;
SmallVector<MachineInstr *, 4> KillInstrs;
+ SmallVector<MachineInstr *, 4> EarlyTermInstrs;
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
@@ -465,18 +475,29 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
}
break;
+ case AMDGPU::SI_EARLY_TERMINATE_SCC0:
+ EarlyTermInstrs.push_back(&MI);
+ break;
+
default:
break;
}
}
}
+ for (MachineInstr *Instr : EarlyTermInstrs) {
+ // Early termination in GS does nothing
+ if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
+ earlyTerm(*Instr);
+ Instr->eraseFromParent();
+ }
for (MachineInstr *Kill : KillInstrs) {
skipIfDead(*Kill->getParent(), std::next(Kill->getIterator()),
Kill->getDebugLoc());
Kill->eraseFromParent();
}
KillInstrs.clear();
+ EarlyTermInstrs.clear();
EarlyExitBlock = nullptr;
return MadeChange;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 2a157eb20ab4..c12745586da1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -24,41 +24,15 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <utility>
-
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
#define DEBUG_TYPE "si-insert-waitcnts"
@@ -458,6 +432,7 @@ public:
#endif // NDEBUG
}
+ bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const;
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
bool generateWaitcntInstBefore(MachineInstr &MI,
WaitcntBrackets &ScoreBrackets,
@@ -486,7 +461,7 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
RegInterval Result;
- unsigned Reg = TRI->getEncodingValue(Op.getReg());
+ unsigned Reg = TRI->getEncodingValue(AMDGPU::getMCReg(Op.getReg(), *ST));
if (TRI->isVGPR(*MRI, Op.getReg())) {
assert(Reg >= RegisterEncoding.VGPR0 && Reg <= RegisterEncoding.VGPRL);
@@ -623,8 +598,9 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
MachineOperand &DefMO = Inst.getOperand(I);
if (DefMO.isReg() && DefMO.isDef() &&
TRI->isVGPR(*MRI, DefMO.getReg())) {
- setRegScore(TRI->getEncodingValue(DefMO.getReg()), EXP_CNT,
- CurrScore);
+ setRegScore(
+ TRI->getEncodingValue(AMDGPU::getMCReg(DefMO.getReg(), *ST)),
+ EXP_CNT, CurrScore);
}
}
}
@@ -855,7 +831,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
setForceEmitWaitcnt();
bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
- if (MI.isDebugInstr())
+ if (MI.isMetaInstruction())
return false;
AMDGPU::Waitcnt Wait;
@@ -876,7 +852,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
- Wait = Wait.combined(AMDGPU::Waitcnt::allZero(IV));
+ Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
}
// Resolve vm waits before gs-done.
else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
@@ -963,26 +939,28 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
int CallAddrOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
- RegInterval CallAddrOpInterval =
+
+ if (MI.getOperand(CallAddrOpIdx).isReg()) {
+ RegInterval CallAddrOpInterval =
ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, CallAddrOpIdx);
- for (int RegNo = CallAddrOpInterval.first;
- RegNo < CallAddrOpInterval.second; ++RegNo)
- ScoreBrackets.determineWait(
+ for (int RegNo = CallAddrOpInterval.first;
+ RegNo < CallAddrOpInterval.second; ++RegNo)
+ ScoreBrackets.determineWait(
LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
- int RtnAddrOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
- if (RtnAddrOpIdx != -1) {
- RegInterval RtnAddrOpInterval =
+ int RtnAddrOpIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+ if (RtnAddrOpIdx != -1) {
+ RegInterval RtnAddrOpInterval =
ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, RtnAddrOpIdx);
- for (int RegNo = RtnAddrOpInterval.first;
- RegNo < RtnAddrOpInterval.second; ++RegNo)
- ScoreBrackets.determineWait(
+ for (int RegNo = RtnAddrOpInterval.first;
+ RegNo < RtnAddrOpInterval.second; ++RegNo)
+ ScoreBrackets.determineWait(
LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+ }
}
-
} else {
// FIXME: Should not be relying on memoperands.
// Look at the source operands of every instruction to see if
@@ -1024,8 +1002,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
continue;
RegInterval Interval =
ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I);
+
+ const bool IsVGPR = TRI->isVGPR(*MRI, Op.getReg());
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
- if (TRI->isVGPR(*MRI, Op.getReg())) {
+ if (IsVGPR) {
// RAW always needs an s_waitcnt. WAW needs an s_waitcnt unless the
// previous write and this write are the same type of VMEM
// instruction, in which case they're guaranteed to write their
@@ -1055,7 +1035,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// requiring a WAITCNT beforehand.
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
!ST->hasAutoWaitcntBeforeBarrier()) {
- Wait = Wait.combined(AMDGPU::Waitcnt::allZero(IV));
+ Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
}
// TODO: Remove this work-around, enable the assert for Bug 457939
@@ -1088,8 +1068,8 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
} else {
assert(II->getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
assert(II->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
- ScoreBrackets.applyWaitcnt(
- AMDGPU::Waitcnt(~0u, ~0u, ~0u, II->getOperand(1).getImm()));
+ auto W = TII->getNamedOperand(*II, AMDGPU::OpName::simm16)->getImm();
+ ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt(~0u, ~0u, ~0u, W));
}
}
}
@@ -1097,7 +1077,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
}
if (ForceEmitZeroWaitcnts)
- Wait = AMDGPU::Waitcnt::allZero(IV);
+ Wait = AMDGPU::Waitcnt::allZero(ST->hasVscnt());
if (ForceEmitWaitcnt[VM_CNT])
Wait.VmCnt = 0;
@@ -1137,12 +1117,13 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
assert(II->getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
assert(II->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
- unsigned ICnt = II->getOperand(1).getImm();
+ unsigned ICnt = TII->getNamedOperand(*II, AMDGPU::OpName::simm16)
+ ->getImm();
OldWait.VsCnt = std::min(OldWait.VsCnt, ICnt);
if (!TrackedWaitcntSet.count(&*II))
Wait.VsCnt = std::min(Wait.VsCnt, ICnt);
if (Wait.VsCnt != ICnt) {
- II->getOperand(1).setImm(Wait.VsCnt);
+ TII->getNamedOperand(*II, AMDGPU::OpName::simm16)->setImm(Wait.VsCnt);
Modified = true;
}
Wait.VsCnt = ~0u;
@@ -1189,12 +1170,50 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
return Modified;
}
-// This is a flat memory operation. Check to see if it has memory
-// tokens for both LDS and Memory, and if so mark it as a flat.
+// This is a flat memory operation. Check to see if it has memory tokens other
+// than LDS. Other address spaces supported by flat memory operations involve
+// global memory.
+bool SIInsertWaitcnts::mayAccessVMEMThroughFlat(const MachineInstr &MI) const {
+ assert(TII->isFLAT(MI));
+
+ // All flat instructions use the VMEM counter.
+ assert(TII->usesVM_CNT(MI));
+
+ // If there are no memory operands then conservatively assume the flat
+ // operation may access VMEM.
+ if (MI.memoperands_empty())
+ return true;
+
+ // See if any memory operand specifies an address space that involves VMEM.
+ // Flat operations only supported FLAT, LOCAL (LDS), or address spaces
+ // involving VMEM such as GLOBAL, CONSTANT, PRIVATE (SCRATCH), etc. The REGION
+ // (GDS) address space is not supported by flat operations. Therefore, simply
+ // return true unless only the LDS address space is found.
+ for (const MachineMemOperand *Memop : MI.memoperands()) {
+ unsigned AS = Memop->getAddrSpace();
+ assert(AS != AMDGPUAS::REGION_ADDRESS);
+ if (AS != AMDGPUAS::LOCAL_ADDRESS)
+ return true;
+ }
+
+ return false;
+}
+
+// This is a flat memory operation. Check to see if it has memory tokens for
+// either LDS or FLAT.
bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
+ assert(TII->isFLAT(MI));
+
+ // Flat instruction such as SCRATCH and GLOBAL do not use the lgkm counter.
+ if (!TII->usesLGKM_CNT(MI))
+ return false;
+
+ // If there are no memory operands then conservatively assume the flat
+ // operation may access LDS.
if (MI.memoperands_empty())
return true;
+ // See if any memory operand specifies an address space that involves LDS.
for (const MachineMemOperand *Memop : MI.memoperands()) {
unsigned AS = Memop->getAddrSpace();
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS)
@@ -1221,7 +1240,10 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
} else if (TII->isFLAT(Inst)) {
assert(Inst.mayLoadOrStore());
- if (TII->usesVM_CNT(Inst)) {
+ int FlatASCount = 0;
+
+ if (mayAccessVMEMThroughFlat(Inst)) {
+ ++FlatASCount;
if (!ST->hasVscnt())
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
else if (Inst.mayLoad() &&
@@ -1231,15 +1253,19 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
}
- if (TII->usesLGKM_CNT(Inst)) {
+ if (mayAccessLDSThroughFlat(Inst)) {
+ ++FlatASCount;
ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
-
- // This is a flat memory operation, so note it - it will require
- // that both the VM and LGKM be flushed to zero if it is pending when
- // a VM or LGKM dependency occurs.
- if (mayAccessLDSThroughFlat(Inst))
- ScoreBrackets->setPendingFlat();
}
+
+ // A Flat memory operation must access at least one address space.
+ assert(FlatASCount);
+
+ // This is a flat memory operation that access both VMEM and LDS, so note it
+ // - it will require that both the VM and LGKM be flushed to zero if it is
+ // pending when a VM or LGKM dependency occurs.
+ if (FlatASCount > 1)
+ ScoreBrackets->setPendingFlat();
} else if (SIInstrInfo::isVMEM(Inst) &&
// TODO: get a better carve out.
Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1 &&
@@ -1266,34 +1292,29 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
} else if (Inst.isCall()) {
if (callWaitsOnFunctionReturn(Inst)) {
// Act as a wait on everything
- ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZero(IV));
+ ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
} else {
// May need to way wait for anything.
ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
}
+ } else if (SIInstrInfo::isEXP(Inst)) {
+ unsigned Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
+ if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31)
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_PARAM_ACCESS, Inst);
+ else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST)
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst);
+ else
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst);
} else {
switch (Inst.getOpcode()) {
case AMDGPU::S_SENDMSG:
case AMDGPU::S_SENDMSGHALT:
ScoreBrackets->updateByEvent(TII, TRI, MRI, SQ_MESSAGE, Inst);
break;
- case AMDGPU::EXP:
- case AMDGPU::EXP_DONE: {
- int Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
- if (Imm >= 32 && Imm <= 63)
- ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_PARAM_ACCESS, Inst);
- else if (Imm >= 12 && Imm <= 15)
- ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst);
- else
- ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst);
- break;
- }
case AMDGPU::S_MEMTIME:
case AMDGPU::S_MEMREALTIME:
ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
break;
- default:
- break;
}
}
}
@@ -1381,9 +1402,19 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
ScoreBrackets.dump();
});
- // Assume VCCZ is correct at basic block boundaries, unless and until we need
- // to handle cases where that is not true.
+ // Track the correctness of vccz through this basic block. There are two
+ // reasons why it might be incorrect; see ST->hasReadVCCZBug() and
+ // ST->partialVCCWritesUpdateVCCZ().
bool VCCZCorrect = true;
+ if (ST->hasReadVCCZBug()) {
+ // vccz could be incorrect at a basic block boundary if a predecessor wrote
+ // to vcc and then issued an smem load.
+ VCCZCorrect = false;
+ } else if (!ST->partialVCCWritesUpdateVCCZ()) {
+ // vccz could be incorrect at a basic block boundary if a predecessor wrote
+ // to vcc_lo or vcc_hi.
+ VCCZCorrect = false;
+ }
// Walk over the instructions.
MachineInstr *OldWaitcntInstr = nullptr;
@@ -1404,14 +1435,21 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
continue;
}
- // We might need to restore vccz to its correct value for either of two
- // different reasons; see ST->hasReadVCCZBug() and
- // ST->partialVCCWritesUpdateVCCZ().
- bool RestoreVCCZ = false;
- if (readsVCCZ(Inst)) {
- if (!VCCZCorrect)
- RestoreVCCZ = true;
- else if (ST->hasReadVCCZBug()) {
+ // Generate an s_waitcnt instruction to be placed before Inst, if needed.
+ Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr);
+ OldWaitcntInstr = nullptr;
+
+ // Restore vccz if it's not known to be correct already.
+ bool RestoreVCCZ = !VCCZCorrect && readsVCCZ(Inst);
+
+ // Don't examine operands unless we need to track vccz correctness.
+ if (ST->hasReadVCCZBug() || !ST->partialVCCWritesUpdateVCCZ()) {
+ if (Inst.definesRegister(AMDGPU::VCC_LO) ||
+ Inst.definesRegister(AMDGPU::VCC_HI)) {
+ // Up to gfx9, writes to vcc_lo and vcc_hi don't update vccz.
+ if (!ST->partialVCCWritesUpdateVCCZ())
+ VCCZCorrect = false;
+ } else if (Inst.definesRegister(AMDGPU::VCC)) {
// There is a hardware bug on CI/SI where SMRD instruction may corrupt
// vccz bit, so when we detect that an instruction may read from a
// corrupt vccz bit, we need to:
@@ -1419,10 +1457,16 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
// operations to complete.
// 2. Restore the correct value of vccz by writing the current value
// of vcc back to vcc.
- if (ScoreBrackets.getScoreLB(LGKM_CNT) <
- ScoreBrackets.getScoreUB(LGKM_CNT) &&
+ if (ST->hasReadVCCZBug() &&
+ ScoreBrackets.getScoreLB(LGKM_CNT) <
+ ScoreBrackets.getScoreUB(LGKM_CNT) &&
ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
- RestoreVCCZ = true;
+ // Writes to vcc while there's an outstanding smem read may get
+ // clobbered as soon as any read completes.
+ VCCZCorrect = false;
+ } else {
+ // Writes to vcc will fix any incorrect value in vccz.
+ VCCZCorrect = true;
}
}
}
@@ -1432,23 +1476,12 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
const Value *Ptr = Memop->getValue();
SLoadAddresses.insert(std::make_pair(Ptr, Inst.getParent()));
}
- }
-
- if (!ST->partialVCCWritesUpdateVCCZ()) {
- // Up to gfx9, writes to vcc_lo and vcc_hi don't update vccz.
- // Writes to vcc will fix it.
- if (Inst.definesRegister(AMDGPU::VCC_LO) ||
- Inst.definesRegister(AMDGPU::VCC_HI))
+ if (ST->hasReadVCCZBug()) {
+ // This smem read could complete and clobber vccz at any time.
VCCZCorrect = false;
- else if (Inst.definesRegister(AMDGPU::VCC))
- VCCZCorrect = true;
+ }
}
- // Generate an s_waitcnt instruction to be placed before
- // cur_Inst, if needed.
- Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr);
- OldWaitcntInstr = nullptr;
-
updateEventWaitcntAfter(Inst, &ScoreBrackets);
#if 0 // TODO: implement resource type check controlled by options with ub = LB.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 428c21c896d5..7ce042b67aba 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -33,6 +33,7 @@ class InstSI <dag outs, dag ins, string asm = "",
field bit VINTRP = 0;
field bit SDWA = 0;
field bit DPP = 0;
+ field bit TRANS = 0;
// Memory instruction formats.
field bit MUBUF = 0;
@@ -110,9 +111,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that this is a D16 buffer instruction.
field bit D16Buf = 0;
- // This field indicates that FLAT instruction accesses FLAT_GLBL or
- // FLAT_SCRATCH segment. Must be 0 for non-FLAT instructions.
- field bit IsNonFlatSeg = 0;
+ // This field indicates that FLAT instruction accesses FLAT_GLBL segment.
+ // Must be 0 for non-FLAT instructions.
+ field bit IsFlatGlobal = 0;
// Reads the mode register, usually for FP environment.
field bit ReadsModeReg = 0;
@@ -130,6 +131,10 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that this is one of DOT instructions.
field bit IsDOT = 0;
+ // This field indicates that FLAT instruction accesses FLAT_SCRATCH segment.
+ // Must be 0 for non-FLAT instructions.
+ field bit IsFlatScratch = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -149,17 +154,18 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{13} = VINTRP;
let TSFlags{14} = SDWA;
let TSFlags{15} = DPP;
+ let TSFlags{16} = TRANS;
- let TSFlags{16} = MUBUF;
- let TSFlags{17} = MTBUF;
- let TSFlags{18} = SMRD;
- let TSFlags{19} = MIMG;
- let TSFlags{20} = EXP;
- let TSFlags{21} = FLAT;
- let TSFlags{22} = DS;
+ let TSFlags{17} = MUBUF;
+ let TSFlags{18} = MTBUF;
+ let TSFlags{19} = SMRD;
+ let TSFlags{20} = MIMG;
+ let TSFlags{21} = EXP;
+ let TSFlags{22} = FLAT;
+ let TSFlags{23} = DS;
- let TSFlags{23} = VGPRSpill;
- let TSFlags{24} = SGPRSpill;
+ let TSFlags{24} = VGPRSpill;
+ let TSFlags{25} = SGPRSpill;
let TSFlags{32} = VM_CNT;
let TSFlags{33} = EXP_CNT;
@@ -187,7 +193,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{50} = D16Buf;
- let TSFlags{51} = IsNonFlatSeg;
+ let TSFlags{51} = IsFlatGlobal;
let TSFlags{52} = FPDPRounding;
@@ -197,17 +203,14 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{55} = IsDOT;
- let SchedRW = [Write32Bit];
+ let TSFlags{56} = IsFlatScratch;
- field bits<1> DisableSIDecoder = 0;
- field bits<1> DisableVIDecoder = 0;
- field bits<1> DisableDecoder = 0;
+ let SchedRW = [Write32Bit];
- let isAsmParserOnly = !if(!eq(DisableDecoder{0}, {0}), 0, 1);
let AsmVariantName = AMDGPUAsmVariants.Default;
// Avoid changing source registers in a way that violates constant bus read limitations.
- let hasExtraSrcRegAllocReq = !if(VOP1,1,!if(VOP2,1,!if(VOP3,1,!if(VOPC,1,!if(SDWA,1, !if(VALU,1,0))))));
+ let hasExtraSrcRegAllocReq = !or(VOP1, VOP2, VOP3, VOPC, SDWA, VALU);
}
class PseudoInstSI<dag outs, dag ins, list<dag> pattern = [], string asm = "">
@@ -362,15 +365,4 @@ class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
let VALU = 1;
}
-class EXPCommon<dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern> {
- let EXP = 1;
- let EXP_CNT = 1;
- let mayLoad = 0; // Set to 1 if done bit is set.
- let mayStore = 1;
- let UseNamedOperandTable = 1;
- let Uses = [EXEC];
- let SchedRW = [WriteExport];
-}
-
} // End Uses = [EXEC]
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9af8ffedce0f..dfd0075bf03a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -13,53 +13,20 @@
#include "SIInstrInfo.h"
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "AMDGPUInstrInfo.h"
#include "GCNHazardRecognizer.h"
-#include "SIDefines.h"
-#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MemoryLocation.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/Casting.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <cstdint>
-#include <iterator>
-#include <utility>
using namespace llvm;
@@ -69,6 +36,9 @@ using namespace llvm;
#include "AMDGPUGenInstrInfo.inc"
namespace llvm {
+
+class AAResults;
+
namespace AMDGPU {
#define GET_D16ImageDimIntrinsics_IMPL
#define GET_ImageDimIntrinsicTable_IMPL
@@ -136,7 +106,7 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
}
bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const {
+ AAResults *AA) const {
// TODO: The generic check fails for VALU instructions that should be
// rematerializable due to implicit reads of exec. We really want all of the
// generic logic for this except for this.
@@ -144,8 +114,8 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
- case AMDGPU::V_ACCVGPR_READ_B32:
- case AMDGPU::V_ACCVGPR_WRITE_B32:
+ case AMDGPU::V_ACCVGPR_READ_B32_e64:
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
// No implicit operands.
return MI.getNumOperands() == MI.getDesc().getNumOperands();
default:
@@ -418,7 +388,7 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
}
if (isFLAT(LdSt)) {
- // Instructions have either vaddr or saddr or both.
+ // Instructions have either vaddr or saddr or both or none.
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (BaseOp)
BaseOps.push_back(BaseOp);
@@ -459,10 +429,8 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
auto Base2 = MO2->getValue();
if (!Base1 || !Base2)
return false;
- const MachineFunction &MF = *MI1.getParent()->getParent();
- const DataLayout &DL = MF.getFunction().getParent()->getDataLayout();
- Base1 = GetUnderlyingObject(Base1, DL);
- Base2 = GetUnderlyingObject(Base2, DL);
+ Base1 = getUnderlyingObject(Base1);
+ Base2 = getUnderlyingObject(Base2);
if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
return false;
@@ -474,27 +442,33 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2,
unsigned NumLoads,
unsigned NumBytes) const {
- // If current mem ops pair do not have same base pointer, then they cannot be
- // clustered.
- assert(!BaseOps1.empty() && !BaseOps2.empty());
- const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
- const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
- if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
+ // If the mem ops (to be clustered) do not have the same base ptr, then they
+ // should not be clustered
+ if (!BaseOps1.empty() && !BaseOps2.empty()) {
+ const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
+ const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
+ if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
+ return false;
+ } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
+ // If only one base op is empty, they do not have the same base ptr
return false;
-
- // Compute max cluster size based on average number bytes clustered till now,
- // and decide based on it, if current mem ops pair can be clustered or not.
- assert((NumLoads > 0) && (NumBytes > 0) && (NumBytes >= NumLoads) &&
- "Invalid NumLoads/NumBytes values");
- unsigned MaxNumLoads;
- if (NumBytes <= 4 * NumLoads) {
- // Loads are dword or smaller (on average).
- MaxNumLoads = 5;
- } else {
- // Loads are bigger than a dword (on average).
- MaxNumLoads = 4;
}
- return NumLoads <= MaxNumLoads;
+
+ // In order to avoid regester pressure, on an average, the number of DWORDS
+ // loaded together by all clustered mem ops should not exceed 8. This is an
+ // empirical value based on certain observations and performance related
+ // experiments.
+ // The good thing about this heuristic is - it avoids clustering of too many
+ // sub-word loads, and also avoids clustering of wide loads. Below is the
+ // brief summary of how the heuristic behaves for various `LoadSize`.
+ // (1) 1 <= LoadSize <= 4: cluster at max 8 mem ops
+ // (2) 5 <= LoadSize <= 8: cluster at max 4 mem ops
+ // (3) 9 <= LoadSize <= 12: cluster at max 2 mem ops
+ // (4) 13 <= LoadSize <= 16: cluster at max 2 mem ops
+ // (5) LoadSize >= 17: do not cluster
+ const unsigned LoadSize = NumBytes / NumLoads;
+ const unsigned NumDWORDs = ((LoadSize + 3) / 4) * NumLoads;
+ return NumDWORDs <= 8;
}
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
@@ -533,6 +507,157 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
+/// Handle copying from SGPR to AGPR, or from AGPR to AGPR. It is not possible
+/// to directly copy, so an intermediate VGPR needs to be used.
+static void indirectCopyToAGPR(const SIInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc,
+ RegScavenger &RS,
+ Register ImpDefSuperReg = Register(),
+ Register ImpUseSuperReg = Register()) {
+ const SIRegisterInfo &RI = TII.getRegisterInfo();
+
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg) ||
+ AMDGPU::AGPR_32RegClass.contains(SrcReg));
+
+ // First try to find defining accvgpr_write to avoid temporary registers.
+ for (auto Def = MI, E = MBB.begin(); Def != E; ) {
+ --Def;
+ if (!Def->definesRegister(SrcReg, &RI))
+ continue;
+ if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
+ break;
+
+ MachineOperand &DefOp = Def->getOperand(1);
+ assert(DefOp.isReg() || DefOp.isImm());
+
+ if (DefOp.isReg()) {
+ // Check that register source operand if not clobbered before MI.
+ // Immediate operands are always safe to propagate.
+ bool SafeToPropagate = true;
+ for (auto I = Def; I != MI && SafeToPropagate; ++I)
+ if (I->modifiesRegister(DefOp.getReg(), &RI))
+ SafeToPropagate = false;
+
+ if (!SafeToPropagate)
+ break;
+
+ DefOp.setIsKill(false);
+ }
+
+ MachineInstrBuilder Builder =
+ BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
+ .add(DefOp);
+ if (ImpDefSuperReg)
+ Builder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit);
+
+ if (ImpUseSuperReg) {
+ Builder.addReg(ImpUseSuperReg,
+ getKillRegState(KillSrc) | RegState::Implicit);
+ }
+
+ return;
+ }
+
+ RS.enterBasicBlock(MBB);
+ RS.forward(MI);
+
+ // Ideally we want to have three registers for a long reg_sequence copy
+ // to hide 2 waitstates between v_mov_b32 and accvgpr_write.
+ unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
+ *MBB.getParent());
+
+ // Registers in the sequence are allocated contiguously so we can just
+ // use register number to pick one of three round-robin temps.
+ unsigned RegNo = DestReg % 3;
+ Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+ if (!Tmp)
+ report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
+ RS.setRegUsed(Tmp);
+ // Only loop through if there are any free registers left, otherwise
+ // scavenger may report a fatal error without emergency spill slot
+ // or spill with the slot.
+ while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
+ Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+ if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
+ break;
+ Tmp = Tmp2;
+ RS.setRegUsed(Tmp);
+ }
+
+ // Insert copy to temporary VGPR.
+ unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
+ if (AMDGPU::AGPR_32RegClass.contains(SrcReg)) {
+ TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
+ } else {
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ }
+
+ MachineInstrBuilder UseBuilder = BuildMI(MBB, MI, DL, TII.get(TmpCopyOp), Tmp)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ if (ImpUseSuperReg) {
+ UseBuilder.addReg(ImpUseSuperReg,
+ getKillRegState(KillSrc) | RegState::Implicit);
+ }
+
+ MachineInstrBuilder DefBuilder
+ = BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
+ .addReg(Tmp, RegState::Kill);
+
+ if (ImpDefSuperReg)
+ DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit);
+}
+
+static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, const DebugLoc &DL,
+ MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+ const TargetRegisterClass *RC, bool Forward) {
+ const SIRegisterInfo &RI = TII.getRegisterInfo();
+ ArrayRef<int16_t> BaseIndices = RI.getRegSplitParts(RC, 4);
+ MachineBasicBlock::iterator I = MI;
+ MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
+
+ for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) {
+ int16_t SubIdx = BaseIndices[Idx];
+ Register Reg = RI.getSubReg(DestReg, SubIdx);
+ unsigned Opcode = AMDGPU::S_MOV_B32;
+
+ // Is SGPR aligned? If so try to combine with next.
+ Register Src = RI.getSubReg(SrcReg, SubIdx);
+ bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0;
+ bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
+ if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) {
+ // Can use SGPR64 copy
+ unsigned Channel = RI.getChannelFromSubReg(SubIdx);
+ SubIdx = RI.getSubRegFromChannel(Channel, 2);
+ Opcode = AMDGPU::S_MOV_B64;
+ Idx++;
+ }
+
+ LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
+ .addReg(RI.getSubReg(SrcReg, SubIdx))
+ .addReg(SrcReg, RegState::Implicit);
+
+ if (!FirstMI)
+ FirstMI = LastMI;
+
+ if (!Forward)
+ I--;
+ }
+
+ assert(FirstMI && LastMI);
+ if (!Forward)
+ std::swap(FirstMI, LastMI);
+
+ FirstMI->addOperand(
+ MachineOperand::CreateReg(DestReg, true /*IsDef*/, true /*IsImp*/));
+
+ if (KillSrc)
+ LastMI->addRegisterKilled(SrcReg, &RI);
+}
+
void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
@@ -563,7 +688,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
AMDGPU::SReg_32RegClass.contains(SrcReg) ||
AMDGPU::AGPR_32RegClass.contains(SrcReg));
unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
- AMDGPU::V_ACCVGPR_READ_B32 : AMDGPU::V_MOV_B32_e32;
+ AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
BuildMI(MBB, MI, DL, get(Opc), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
@@ -639,88 +764,36 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (DestReg == AMDGPU::SCC) {
// Copying 64-bit or 32-bit sources to SCC barely makes sense,
// but SelectionDAG emits such copies for i1 sources.
- // TODO: Use S_BITCMP0_B32 instead and only consider the 0th bit.
if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
- SrcReg = RI.getSubReg(SrcReg, AMDGPU::sub0);
+ // This copy can only be produced by patterns
+ // with explicit SCC, which are known to be enabled
+ // only for subtargets with S_CMP_LG_U64 present.
+ assert(ST.hasScalarCompareEq64());
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U64))
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ } else {
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
}
- assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
-
- BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0);
return;
}
- if (RC == &AMDGPU::AGPR_32RegClass) {
- assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
- AMDGPU::SReg_32RegClass.contains(SrcReg) ||
- AMDGPU::AGPR_32RegClass.contains(SrcReg));
- if (!AMDGPU::VGPR_32RegClass.contains(SrcReg)) {
- // First try to find defining accvgpr_write to avoid temporary registers.
- for (auto Def = MI, E = MBB.begin(); Def != E; ) {
- --Def;
- if (!Def->definesRegister(SrcReg, &RI))
- continue;
- if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
- break;
-
- MachineOperand &DefOp = Def->getOperand(1);
- assert(DefOp.isReg() || DefOp.isImm());
-
- if (DefOp.isReg()) {
- // Check that register source operand if not clobbered before MI.
- // Immediate operands are always safe to propagate.
- bool SafeToPropagate = true;
- for (auto I = Def; I != MI && SafeToPropagate; ++I)
- if (I->modifiesRegister(DefOp.getReg(), &RI))
- SafeToPropagate = false;
-
- if (!SafeToPropagate)
- break;
-
- DefOp.setIsKill(false);
- }
- BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
- .add(DefOp);
- return;
- }
-
- RegScavenger RS;
- RS.enterBasicBlock(MBB);
- RS.forward(MI);
-
- // Ideally we want to have three registers for a long reg_sequence copy
- // to hide 2 waitstates between v_mov_b32 and accvgpr_write.
- unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
- *MBB.getParent());
-
- // Registers in the sequence are allocated contiguously so we can just
- // use register number to pick one of three round-robin temps.
- unsigned RegNo = DestReg % 3;
- Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
- if (!Tmp)
- report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
- RS.setRegUsed(Tmp);
- // Only loop through if there are any free registers left, otherwise
- // scavenger may report a fatal error without emergency spill slot
- // or spill with the slot.
- while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
- unsigned Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
- if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
- break;
- Tmp = Tmp2;
- RS.setRegUsed(Tmp);
- }
- copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
- .addReg(Tmp, RegState::Kill);
+ if (RC == &AMDGPU::AGPR_32RegClass) {
+ if (AMDGPU::VGPR_32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
- BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ // FIXME: Pass should maintain scavenger to avoid scan through the block on
+ // every AGPR spill.
+ RegScavenger RS;
+ indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS);
return;
}
@@ -790,31 +863,38 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- unsigned EltSize = 4;
- unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
if (RI.isSGPRClass(RC)) {
- // TODO: Copy vec3/vec5 with s_mov_b64s then final s_mov_b32.
- if (!(RI.getRegSizeInBits(*RC) % 64)) {
- Opcode = AMDGPU::S_MOV_B64;
- EltSize = 8;
- } else {
- Opcode = AMDGPU::S_MOV_B32;
- EltSize = 4;
- }
-
if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
- } else if (RI.hasAGPRs(RC)) {
+ expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward);
+ return;
+ }
+
+ unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ if (RI.hasAGPRs(RC)) {
Opcode = RI.hasVGPRs(RI.getPhysRegClass(SrcReg)) ?
- AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY;
+ AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
} else if (RI.hasVGPRs(RC) && RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
- Opcode = AMDGPU::V_ACCVGPR_READ_B32;
+ Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
}
- ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
- bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
+ // For the cases where we need an intermediate instruction/temporary register
+ // (destination is an AGPR), we need a scavenger.
+ //
+ // FIXME: The pass should maintain this for us so we don't have to re-scan the
+ // whole block for every handled copy.
+ std::unique_ptr<RegScavenger> RS;
+ if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
+ RS.reset(new RegScavenger());
+
+ ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, 4);
+
+ // If there is an overlap, we can't kill the super-register on the last
+ // instruction, since it will also kill the components made live by this def.
+ const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
unsigned SubIdx;
@@ -823,22 +903,23 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else
SubIdx = SubIndices[SubIndices.size() - Idx - 1];
- if (Opcode == TargetOpcode::COPY) {
- copyPhysReg(MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
- RI.getSubReg(SrcReg, SubIdx), KillSrc);
- continue;
- }
+ bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1;
- MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
- get(Opcode), RI.getSubReg(DestReg, SubIdx));
-
- Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
-
- if (Idx == 0)
- Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
+ if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
+ Register ImpDefSuper = Idx == 0 ? Register(DestReg) : Register();
+ Register ImpUseSuper = SrcReg;
+ indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
+ RI.getSubReg(SrcReg, SubIdx), UseKill, *RS,
+ ImpDefSuper, ImpUseSuper);
+ } else {
+ MachineInstrBuilder Builder =
+ BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx))
+ .addReg(RI.getSubReg(SrcReg, SubIdx));
+ if (Idx == 0)
+ Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
- bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
- Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
+ Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
+ }
}
}
@@ -928,8 +1009,6 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
Register TrueReg,
Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- MachineFunction *MF = MBB.getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const TargetRegisterClass *BoolXExecRC =
RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
@@ -1089,78 +1168,123 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
return AMDGPU::COPY;
}
-static unsigned getIndirectVGPRWritePseudoOpc(unsigned VecSize) {
+const MCInstrDesc &
+SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize,
+ bool IsIndirectSrc) const {
+ if (IsIndirectSrc) {
+ if (VecSize <= 32) // 4 bytes
+ return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
+ if (VecSize <= 64) // 8 bytes
+ return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
+ if (VecSize <= 96) // 12 bytes
+ return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
+ if (VecSize <= 128) // 16 bytes
+ return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
+ if (VecSize <= 160) // 20 bytes
+ return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
+ if (VecSize <= 256) // 32 bytes
+ return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
+ if (VecSize <= 512) // 64 bytes
+ return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
+ if (VecSize <= 1024) // 128 bytes
+ return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
+
+ llvm_unreachable("unsupported size for IndirectRegReadGPRIDX pseudos");
+ }
+
+ if (VecSize <= 32) // 4 bytes
+ return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
+ if (VecSize <= 64) // 8 bytes
+ return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
+ if (VecSize <= 96) // 12 bytes
+ return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
+ if (VecSize <= 128) // 16 bytes
+ return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
+ if (VecSize <= 160) // 20 bytes
+ return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
+ if (VecSize <= 256) // 32 bytes
+ return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
+ if (VecSize <= 512) // 64 bytes
+ return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
+ if (VecSize <= 1024) // 128 bytes
+ return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
+
+ llvm_unreachable("unsupported size for IndirectRegWriteGPRIDX pseudos");
+}
+
+static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) {
if (VecSize <= 32) // 4 bytes
- return AMDGPU::V_INDIRECT_REG_WRITE_B32_V1;
+ return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
if (VecSize <= 64) // 8 bytes
- return AMDGPU::V_INDIRECT_REG_WRITE_B32_V2;
+ return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
if (VecSize <= 96) // 12 bytes
- return AMDGPU::V_INDIRECT_REG_WRITE_B32_V3;
+ return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
if (VecSize <= 128) // 16 bytes
- return AMDGPU::V_INDIRECT_REG_WRITE_B32_V4;
+ return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
if (VecSize <= 160) // 20 bytes
- return AMDGPU::V_INDIRECT_REG_WRITE_B32_V5;
+ return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
if (VecSize <= 256) // 32 bytes
- return AMDGPU::V_INDIRECT_REG_WRITE_B32_V8;
+ return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
if (VecSize <= 512) // 64 bytes
- return AMDGPU::V_INDIRECT_REG_WRITE_B32_V16;
+ return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
if (VecSize <= 1024) // 128 bytes
- return AMDGPU::V_INDIRECT_REG_WRITE_B32_V32;
+ return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
llvm_unreachable("unsupported size for IndirectRegWrite pseudos");
}
-static unsigned getIndirectSGPRWritePseudo32(unsigned VecSize) {
+static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize) {
if (VecSize <= 32) // 4 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B32_V1;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
if (VecSize <= 64) // 8 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B32_V2;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
if (VecSize <= 96) // 12 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B32_V3;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
if (VecSize <= 128) // 16 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B32_V4;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
if (VecSize <= 160) // 20 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B32_V5;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
if (VecSize <= 256) // 32 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B32_V8;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
if (VecSize <= 512) // 64 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B32_V16;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
if (VecSize <= 1024) // 128 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B32_V32;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
llvm_unreachable("unsupported size for IndirectRegWrite pseudos");
}
-static unsigned getIndirectSGPRWritePseudo64(unsigned VecSize) {
+static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize) {
if (VecSize <= 64) // 8 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B64_V1;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
if (VecSize <= 128) // 16 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B64_V2;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
if (VecSize <= 256) // 32 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B64_V4;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
if (VecSize <= 512) // 64 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B64_V8;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
if (VecSize <= 1024) // 128 bytes
- return AMDGPU::S_INDIRECT_REG_WRITE_B64_V16;
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
llvm_unreachable("unsupported size for IndirectRegWrite pseudos");
}
-const MCInstrDesc &SIInstrInfo::getIndirectRegWritePseudo(
- unsigned VecSize, unsigned EltSize, bool IsSGPR) const {
+const MCInstrDesc &
+SIInstrInfo::getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize,
+ bool IsSGPR) const {
if (IsSGPR) {
switch (EltSize) {
case 32:
- return get(getIndirectSGPRWritePseudo32(VecSize));
+ return get(getIndirectSGPRWriteMovRelPseudo32(VecSize));
case 64:
- return get(getIndirectSGPRWritePseudo64(VecSize));
+ return get(getIndirectSGPRWriteMovRelPseudo64(VecSize));
default:
llvm_unreachable("invalid reg indexing elt size");
}
}
assert(EltSize == 32 && "invalid reg indexing elt size");
- return get(getIndirectVGPRWritePseudoOpc(VecSize));
+ return get(getIndirectVGPRWriteMovRelPseudoOpc(VecSize));
}
static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
@@ -1219,8 +1343,16 @@ static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_A32_SAVE;
case 8:
return AMDGPU::SI_SPILL_A64_SAVE;
+ case 12:
+ return AMDGPU::SI_SPILL_A96_SAVE;
case 16:
return AMDGPU::SI_SPILL_A128_SAVE;
+ case 20:
+ return AMDGPU::SI_SPILL_A160_SAVE;
+ case 24:
+ return AMDGPU::SI_SPILL_A192_SAVE;
+ case 32:
+ return AMDGPU::SI_SPILL_A256_SAVE;
case 64:
return AMDGPU::SI_SPILL_A512_SAVE;
case 128:
@@ -1260,7 +1392,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// The SGPR spill/restore instructions only work on number sgprs, so we need
// to make sure we are using the correct register class.
- if (Register::isVirtualRegister(SrcReg) && SpillSize == 4) {
+ if (SrcReg.isVirtual() && SpillSize == 4) {
MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
}
@@ -1269,11 +1401,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
- .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
.addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
- // Add the scratch resource registers as implicit uses because we may end up
- // needing them, and need to ensure that the reserved registers are
- // correctly handled.
+
if (RI.spillSGPRToVGPR())
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
return;
@@ -1283,18 +1412,12 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
: getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
- auto MIB = BuildMI(MBB, MI, DL, get(Opcode));
- if (RI.hasAGPRs(RC)) {
- MachineRegisterInfo &MRI = MF->getRegInfo();
- Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- MIB.addReg(Tmp, RegState::Define);
- }
- MIB.addReg(SrcReg, getKillRegState(isKill)) // data
- .addFrameIndex(FrameIndex) // addr
- .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
- .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
- .addImm(0) // offset
- .addMemOperand(MMO);
+ BuildMI(MBB, MI, DL, get(Opcode))
+ .addReg(SrcReg, getKillRegState(isKill)) // data
+ .addFrameIndex(FrameIndex) // addr
+ .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+ .addImm(0) // offset
+ .addMemOperand(MMO);
}
static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
@@ -1353,8 +1476,16 @@ static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_A32_RESTORE;
case 8:
return AMDGPU::SI_SPILL_A64_RESTORE;
+ case 12:
+ return AMDGPU::SI_SPILL_A96_RESTORE;
case 16:
return AMDGPU::SI_SPILL_A128_RESTORE;
+ case 20:
+ return AMDGPU::SI_SPILL_A160_RESTORE;
+ case 24:
+ return AMDGPU::SI_SPILL_A192_RESTORE;
+ case 32:
+ return AMDGPU::SI_SPILL_A256_RESTORE;
case 64:
return AMDGPU::SI_SPILL_A512_RESTORE;
case 128:
@@ -1401,143 +1532,36 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, OpDesc, DestReg)
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
- .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
.addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
+
return;
}
unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
: getVGPRSpillRestoreOpcode(SpillSize);
- auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg);
- if (RI.hasAGPRs(RC)) {
- MachineRegisterInfo &MRI = MF->getRegInfo();
- Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- MIB.addReg(Tmp, RegState::Define);
- }
- MIB.addFrameIndex(FrameIndex) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
- .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
- .addImm(0) // offset
- .addMemOperand(MMO);
+ BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+ .addFrameIndex(FrameIndex) // vaddr
+ .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+ .addImm(0) // offset
+ .addMemOperand(MMO);
}
-/// \param @Offset Offset in bytes of the FrameIndex being spilled
-unsigned SIInstrInfo::calculateLDSSpillAddress(
- MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
- unsigned FrameOffset, unsigned Size) const {
- MachineFunction *MF = MBB.getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const DebugLoc &DL = MBB.findDebugLoc(MI);
- unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
- unsigned WavefrontSize = ST.getWavefrontSize();
-
- Register TIDReg = MFI->getTIDReg();
- if (!MFI->hasCalculatedTID()) {
- MachineBasicBlock &Entry = MBB.getParent()->front();
- MachineBasicBlock::iterator Insert = Entry.front();
- const DebugLoc &DL = Insert->getDebugLoc();
-
- TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
- *MF);
- if (TIDReg == AMDGPU::NoRegister)
- return TIDReg;
-
- if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) &&
- WorkGroupSize > WavefrontSize) {
- Register TIDIGXReg =
- MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
- Register TIDIGYReg =
- MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
- Register TIDIGZReg =
- MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
- Register InputPtrReg =
- MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
- for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
- if (!Entry.isLiveIn(Reg))
- Entry.addLiveIn(Reg);
- }
-
- RS->enterBasicBlock(Entry);
- // FIXME: Can we scavenge an SReg_64 and access the subregs?
- Register STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- Register STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
- .addReg(InputPtrReg)
- .addImm(SI::KernelInputOffsets::NGROUPS_Z);
- BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
- .addReg(InputPtrReg)
- .addImm(SI::KernelInputOffsets::NGROUPS_Y);
-
- // NGROUPS.X * NGROUPS.Y
- BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
- .addReg(STmp1)
- .addReg(STmp0);
- // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
- .addReg(STmp1)
- .addReg(TIDIGXReg);
- // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
- .addReg(STmp0)
- .addReg(TIDIGYReg)
- .addReg(TIDReg);
- // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
- getAddNoCarry(Entry, Insert, DL, TIDReg)
- .addReg(TIDReg)
- .addReg(TIDIGZReg)
- .addImm(0); // clamp bit
- } else {
- // Get the wave id
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
- TIDReg)
- .addImm(-1)
- .addImm(0);
-
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
- TIDReg)
- .addImm(-1)
- .addReg(TIDReg);
- }
-
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
- TIDReg)
- .addImm(2)
- .addReg(TIDReg);
- MFI->setTIDReg(TIDReg);
- }
-
- // Add FrameIndex to LDS offset
- unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
- getAddNoCarry(MBB, MI, DL, TmpReg)
- .addImm(LDSOffset)
- .addReg(TIDReg)
- .addImm(0); // clamp bit
-
- return TmpReg;
+void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ insertNoops(MBB, MI, 1);
}
-void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- int Count) const {
+void SIInstrInfo::insertNoops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned Quantity) const {
DebugLoc DL = MBB.findDebugLoc(MI);
- while (Count > 0) {
- int Arg;
- if (Count >= 8)
- Arg = 7;
- else
- Arg = Count - 1;
- Count -= 8;
- BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
- .addImm(Arg);
+ while (Quantity > 0) {
+ unsigned Arg = std::min(Quantity, 8u);
+ Quantity -= Arg;
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg - 1);
}
}
-void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- insertWaitStates(MBB, MI, 1);
-}
-
void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
auto MF = MBB.getParent();
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
@@ -1593,7 +1617,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// register allocation.
MI.setDesc(get(AMDGPU::S_XOR_B32));
break;
-
+ case AMDGPU::S_OR_B64_term:
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(get(AMDGPU::S_OR_B64));
+ break;
case AMDGPU::S_OR_B32_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
@@ -1670,36 +1698,35 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
- case AMDGPU::V_INDIRECT_REG_WRITE_B32_V1:
- case AMDGPU::V_INDIRECT_REG_WRITE_B32_V2:
- case AMDGPU::V_INDIRECT_REG_WRITE_B32_V3:
- case AMDGPU::V_INDIRECT_REG_WRITE_B32_V4:
- case AMDGPU::V_INDIRECT_REG_WRITE_B32_V5:
- case AMDGPU::V_INDIRECT_REG_WRITE_B32_V8:
- case AMDGPU::V_INDIRECT_REG_WRITE_B32_V16:
- case AMDGPU::V_INDIRECT_REG_WRITE_B32_V32:
- case AMDGPU::S_INDIRECT_REG_WRITE_B32_V1:
- case AMDGPU::S_INDIRECT_REG_WRITE_B32_V2:
- case AMDGPU::S_INDIRECT_REG_WRITE_B32_V3:
- case AMDGPU::S_INDIRECT_REG_WRITE_B32_V4:
- case AMDGPU::S_INDIRECT_REG_WRITE_B32_V5:
- case AMDGPU::S_INDIRECT_REG_WRITE_B32_V8:
- case AMDGPU::S_INDIRECT_REG_WRITE_B32_V16:
- case AMDGPU::S_INDIRECT_REG_WRITE_B32_V32:
- case AMDGPU::S_INDIRECT_REG_WRITE_B64_V1:
- case AMDGPU::S_INDIRECT_REG_WRITE_B64_V2:
- case AMDGPU::S_INDIRECT_REG_WRITE_B64_V4:
- case AMDGPU::S_INDIRECT_REG_WRITE_B64_V8:
- case AMDGPU::S_INDIRECT_REG_WRITE_B64_V16: {
+ case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
+ case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
+ case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
+ case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
+ case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
+ case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
+ case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
+ case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
const TargetRegisterClass *EltRC = getOpRegClass(MI, 2);
unsigned Opc;
if (RI.hasVGPRs(EltRC)) {
- Opc = ST.useVGPRIndexMode() ?
- AMDGPU::V_MOV_B32_indirect : AMDGPU::V_MOVRELD_B32_e32;
+ Opc = AMDGPU::V_MOVRELD_B32_e32;
} else {
- Opc = RI.getRegSizeInBits(*EltRC) == 64 ?
- AMDGPU::S_MOVRELD_B64 : AMDGPU::S_MOVRELD_B32;
+ Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
+ : AMDGPU::S_MOVRELD_B32;
}
const MCInstrDesc &OpDesc = get(Opc);
@@ -1722,6 +1749,78 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
+ case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
+ case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
+ case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
+ case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
+ case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
+ case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
+ case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
+ case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
+ assert(ST.useVGPRIndexMode());
+ Register VecReg = MI.getOperand(0).getReg();
+ bool IsUndef = MI.getOperand(1).isUndef();
+ Register Idx = MI.getOperand(3).getReg();
+ Register SubReg = MI.getOperand(4).getImm();
+
+ MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON))
+ .addReg(Idx)
+ .addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
+ SetOn->getOperand(3).setIsUndef();
+
+ const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, OpDesc)
+ .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
+ .add(MI.getOperand(2))
+ .addReg(VecReg, RegState::ImplicitDefine)
+ .addReg(VecReg,
+ RegState::Implicit | (IsUndef ? RegState::Undef : 0));
+
+ const int ImpDefIdx = OpDesc.getNumOperands() + OpDesc.getNumImplicitUses();
+ const int ImpUseIdx = ImpDefIdx + 1;
+ MIB->tieOperands(ImpDefIdx, ImpUseIdx);
+
+ MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF));
+
+ finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator()));
+
+ MI.eraseFromParent();
+ break;
+ }
+ case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
+ case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
+ case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
+ case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
+ case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
+ case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
+ case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
+ case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
+ assert(ST.useVGPRIndexMode());
+ Register Dst = MI.getOperand(0).getReg();
+ Register VecReg = MI.getOperand(1).getReg();
+ bool IsUndef = MI.getOperand(1).isUndef();
+ Register Idx = MI.getOperand(2).getReg();
+ Register SubReg = MI.getOperand(3).getImm();
+
+ MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON))
+ .addReg(Idx)
+ .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE);
+ SetOn->getOperand(3).setIsUndef();
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32))
+ .addDef(Dst)
+ .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
+ .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0))
+ .addReg(AMDGPU::M0, RegState::Implicit);
+
+ MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF));
+
+ finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator()));
+
+ MI.eraseFromParent();
+ break;
+ }
case AMDGPU::SI_PC_ADD_REL_OFFSET: {
MachineFunction &MF = *MBB.getParent();
Register Reg = MI.getOperand(0).getReg();
@@ -2062,7 +2161,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// buzz;
RS->enterBasicBlockEnd(MBB);
- unsigned Scav = RS->scavengeRegisterBackwards(
+ Register Scav = RS->scavengeRegisterBackwards(
AMDGPU::SReg_64RegClass,
MachineBasicBlock::iterator(GetPC), false, 0);
MRI.replaceRegWith(PCReg, Scav);
@@ -2170,6 +2269,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
case AMDGPU::SI_MASK_BRANCH:
case AMDGPU::S_MOV_B64_term:
case AMDGPU::S_XOR_B64_term:
+ case AMDGPU::S_OR_B64_term:
case AMDGPU::S_ANDN2_B64_term:
case AMDGPU::S_MOV_B32_term:
case AMDGPU::S_XOR_B32_term:
@@ -2264,7 +2364,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
.addMBB(TBB);
if (BytesAdded)
- *BytesAdded = 4;
+ *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
return 1;
}
@@ -2291,7 +2391,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
fixImplicitOperands(*CondBr);
if (BytesAdded)
- *BytesAdded = 4;
+ *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
return 1;
}
@@ -2308,7 +2408,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
CondReg.setIsKill(Cond[1].isKill());
if (BytesAdded)
- *BytesAdded = 8;
+ *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
return 2;
}
@@ -2337,7 +2437,8 @@ bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
case VCCZ: {
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
- assert(MRI.getRegClass(FalseReg) == RC);
+ if (MRI.getRegClass(FalseReg) != RC)
+ return false;
int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
@@ -2351,7 +2452,8 @@ bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
// with a vector one.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
- assert(MRI.getRegClass(FalseReg) == RC);
+ if (MRI.getRegClass(FalseReg) != RC)
+ return false;
int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
@@ -2489,8 +2591,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::COPY:
- case AMDGPU::V_ACCVGPR_WRITE_B32:
- case AMDGPU::V_ACCVGPR_READ_B32:
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
+ case AMDGPU::V_ACCVGPR_READ_B32_e64:
return true;
default:
return false;
@@ -2543,7 +2645,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::S_MOV_B32:
- case AMDGPU::V_ACCVGPR_WRITE_B32:
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
break;
}
@@ -2567,7 +2669,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (RI.isAGPR(*MRI, DstReg)) {
if (!isInlineConstant(Imm))
return false;
- NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
+ NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
}
if (Is16Bit) {
@@ -2588,15 +2690,14 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.setDesc(get(NewOpc));
UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
- UseMI.getOperand(1).setTargetFlags(0);
UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
return true;
}
- if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
- Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64 ||
- Opc == AMDGPU::V_FMA_F32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
- Opc == AMDGPU::V_FMA_F16 || Opc == AMDGPU::V_FMAC_F16_e64) {
+ if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
+ Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
+ Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) {
// Don't fold if we are using source or output modifiers. The new VOP2
// instructions don't have them.
if (hasAnyModifiersSet(UseMI))
@@ -2611,10 +2712,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (isInlineConstant(UseMI, *Src0, *ImmOp))
return false;
- bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
- Opc == AMDGPU::V_FMA_F32 || Opc == AMDGPU::V_FMAC_F32_e64;
- bool IsFMA = Opc == AMDGPU::V_FMA_F32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
- Opc == AMDGPU::V_FMA_F16 || Opc == AMDGPU::V_FMAC_F16_e64;
+ bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
+ Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
+ bool IsFMA = Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64;
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
@@ -2686,10 +2787,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MRI->hasOneUse(Src0->getReg())) {
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
Src0Inlined = true;
- } else if ((Register::isPhysicalRegister(Src0->getReg()) &&
+ } else if ((Src0->getReg().isPhysical() &&
(ST.getConstantBusLimit(Opc) <= 1 &&
RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
- (Register::isVirtualRegister(Src0->getReg()) &&
+ (Src0->getReg().isVirtual() &&
(ST.getConstantBusLimit(Opc) <= 1 &&
RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
return false;
@@ -2704,9 +2805,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MRI->hasOneUse(Src1->getReg()) &&
commuteInstruction(UseMI)) {
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
- } else if ((Register::isPhysicalRegister(Src1->getReg()) &&
+ } else if ((Src1->getReg().isPhysical() &&
RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
- (Register::isVirtualRegister(Src1->getReg()) &&
+ (Src1->getReg().isVirtual() &&
RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
return false;
// VGPR is okay as Src1 - fallthrough
@@ -2864,6 +2965,18 @@ static int64_t getFoldableImm(const MachineOperand* MO) {
return AMDGPU::NoRegister;
}
+static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
+ MachineInstr &NewMI) {
+ if (LV) {
+ unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I < NumOps; ++I) {
+ MachineOperand &Op = MI.getOperand(I);
+ if (Op.isReg() && Op.isKill())
+ LV->replaceKillInstruction(Op.getReg(), MI, NewMI);
+ }
+ }
+}
+
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
MachineInstr &MI,
LiveVariables *LV) const {
@@ -2911,61 +3024,73 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
+ MachineInstrBuilder MIB;
if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
// If we have an SGPR input, we will violate the constant bus restriction.
- (ST.getConstantBusLimit(Opc) > 1 ||
- !Src0->isReg() ||
+ (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
if (auto Imm = getFoldableImm(Src2)) {
unsigned NewOpc =
- IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
- : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
- if (pseudoToMCOpcode(NewOpc) != -1)
- return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
- .add(*Dst)
- .add(*Src0)
- .add(*Src1)
- .addImm(Imm);
- }
- unsigned NewOpc =
- IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
- : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
+ IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
+ : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
+ if (pseudoToMCOpcode(NewOpc) != -1) {
+ MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(*Dst)
+ .add(*Src0)
+ .add(*Src1)
+ .addImm(Imm);
+ updateLiveVariables(LV, MI, *MIB);
+ return MIB;
+ }
+ }
+ unsigned NewOpc = IsFMA
+ ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
+ : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
if (auto Imm = getFoldableImm(Src1)) {
- if (pseudoToMCOpcode(NewOpc) != -1)
- return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
- .add(*Dst)
- .add(*Src0)
- .addImm(Imm)
- .add(*Src2);
+ if (pseudoToMCOpcode(NewOpc) != -1) {
+ MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(*Dst)
+ .add(*Src0)
+ .addImm(Imm)
+ .add(*Src2);
+ updateLiveVariables(LV, MI, *MIB);
+ return MIB;
+ }
}
if (auto Imm = getFoldableImm(Src0)) {
if (pseudoToMCOpcode(NewOpc) != -1 &&
- isOperandLegal(MI, AMDGPU::getNamedOperandIdx(NewOpc,
- AMDGPU::OpName::src0), Src1))
- return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
- .add(*Dst)
- .add(*Src1)
- .addImm(Imm)
- .add(*Src2);
+ isOperandLegal(
+ MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
+ Src1)) {
+ MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(*Dst)
+ .add(*Src1)
+ .addImm(Imm)
+ .add(*Src2);
+ updateLiveVariables(LV, MI, *MIB);
+ return MIB;
+ }
}
}
- unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16 : AMDGPU::V_FMA_F32)
- : (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
+ unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_e64 : AMDGPU::V_FMA_F32_e64)
+ : (IsF16 ? AMDGPU::V_MAD_F16_e64 : AMDGPU::V_MAD_F32_e64);
if (pseudoToMCOpcode(NewOpc) == -1)
return nullptr;
- return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
- .add(*Dst)
- .addImm(Src0Mods ? Src0Mods->getImm() : 0)
- .add(*Src0)
- .addImm(Src1Mods ? Src1Mods->getImm() : 0)
- .add(*Src1)
- .addImm(0) // Src mods
- .add(*Src2)
- .addImm(Clamp ? Clamp->getImm() : 0)
- .addImm(Omod ? Omod->getImm() : 0);
+ MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(*Dst)
+ .addImm(Src0Mods ? Src0Mods->getImm() : 0)
+ .add(*Src0)
+ .addImm(Src1Mods ? Src1Mods->getImm() : 0)
+ .add(*Src1)
+ .addImm(0) // Src mods
+ .add(*Src2)
+ .addImm(Clamp ? Clamp->getImm() : 0)
+ .addImm(Omod ? Omod->getImm() : 0);
+ updateLiveVariables(LV, MI, *MIB);
+ return MIB;
}
// It's not generally safe to move VALU instructions across these since it will
@@ -3003,9 +3128,6 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
// Target-independent instructions do not have an implicit-use of EXEC, even
// when they operate on VGPRs. Treating EXEC modifications as scheduling
// boundaries prevents incorrect movements of such instructions.
-
- // TODO: Don't treat setreg with known constant that only changes MODE as
- // barrier.
return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
@@ -3053,7 +3175,7 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
// EXEC = 0, but checking for that case here seems not worth it
// given the typical code patterns.
if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
- Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
+ isEXP(Opcode) ||
Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
return true;
@@ -3070,7 +3192,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
//
// However, executing them with EXEC = 0 causes them to operate on undefined
// data, which we avoid by returning true here.
- if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32)
+ if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
+ Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
return true;
return false;
@@ -3241,9 +3364,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
if (OpInfo.RegClass < 0)
return false;
- const MachineFunction *MF = MI.getParent()->getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
-
if (MO.isImm() && isInlineConstant(MO, OpInfo)) {
if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() &&
OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(),
@@ -3396,8 +3516,11 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
Inst32.add(*Src2);
} else {
// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
- // replaced with an implicit read of vcc. This was already added
- // during the initial BuildMI, so find it to preserve the flags.
+ // replaced with an implicit read of vcc or vcc_lo. The implicit read
+ // of vcc was already added during the initial BuildMI, but we
+ // 1) may need to change vcc to vcc_lo to preserve the original register
+ // 2) have to preserve the original flags.
+ fixImplicitOperands(*Inst32);
copyFlagsToImplicitVCC(*Inst32, *Src2);
}
}
@@ -3420,7 +3543,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
if (!MO.isUse())
return false;
- if (Register::isVirtualRegister(MO.getReg()))
+ if (MO.getReg().isVirtual())
return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
// Null is free
@@ -3464,13 +3587,7 @@ static bool shouldReadExec(const MachineInstr &MI) {
if (SIInstrInfo::isVALU(MI)) {
switch (MI.getOpcode()) {
case AMDGPU::V_READLANE_B32:
- case AMDGPU::V_READLANE_B32_gfx6_gfx7:
- case AMDGPU::V_READLANE_B32_gfx10:
- case AMDGPU::V_READLANE_B32_vi:
case AMDGPU::V_WRITELANE_B32:
- case AMDGPU::V_WRITELANE_B32_gfx6_gfx7:
- case AMDGPU::V_WRITELANE_B32_gfx10:
- case AMDGPU::V_WRITELANE_B32_vi:
return false;
}
@@ -3489,7 +3606,7 @@ static bool shouldReadExec(const MachineInstr &MI) {
static bool isSubRegOf(const SIRegisterInfo &TRI,
const MachineOperand &SuperVec,
const MachineOperand &SubReg) {
- if (Register::isPhysicalRegister(SubReg.getReg()))
+ if (SubReg.getReg().isPhysical())
return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
@@ -3530,7 +3647,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
continue;
Register Reg = Op.getReg();
- if (!Register::isVirtualRegister(Reg) && !RC->contains(Reg)) {
+ if (!Reg.isVirtual() && !RC->contains(Reg)) {
ErrInfo = "inlineasm operand has incorrect register class.";
return false;
}
@@ -3600,7 +3717,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (RegClass != -1) {
Register Reg = MI.getOperand(i).getReg();
- if (Reg == AMDGPU::NoRegister || Register::isVirtualRegister(Reg))
+ if (Reg == AMDGPU::NoRegister || Reg.isVirtual())
continue;
const TargetRegisterClass *RC = RI.getRegClass(RegClass);
@@ -3636,7 +3753,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
} else {
// No immediates on GFX9
if (!MO.isReg()) {
- ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
+ ErrInfo =
+ "Only reg allowed as operands in SDWA instructions on GFX9+";
return false;
}
}
@@ -3693,7 +3811,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo =
"Dst register should be tied to implicit use of preserved register";
return false;
- } else if (Register::isPhysicalRegister(TiedMO.getReg()) &&
+ } else if (TiedMO.getReg().isPhysical() &&
Dst.getReg() != TiedMO.getReg()) {
ErrInfo = "Dst register should use same physical register as preserved";
return false;
@@ -3752,11 +3870,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
++ConstantBusCount;
SmallVector<Register, 2> SGPRsUsed;
- Register SGPRUsed = findImplicitSGPRRead(MI);
- if (SGPRUsed != AMDGPU::NoRegister) {
- ++ConstantBusCount;
- SGPRsUsed.push_back(SGPRUsed);
- }
+ Register SGPRUsed;
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
@@ -3765,8 +3879,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
if (MO.isReg()) {
SGPRUsed = MO.getReg();
- if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) {
- return !RI.regsOverlap(SGPRUsed, SGPR);
+ if (llvm::all_of(SGPRsUsed, [SGPRUsed](unsigned SGPR) {
+ return SGPRUsed != SGPR;
})) {
++ConstantBusCount;
SGPRsUsed.push_back(SGPRUsed);
@@ -3777,7 +3891,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
}
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+
+ SGPRUsed = findImplicitSGPRRead(MI);
+ if (SGPRUsed != AMDGPU::NoRegister) {
+ // Implicit uses may safely overlap true overands
+ if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) {
+ return !RI.regsOverlap(SGPRUsed, SGPR);
+ })) {
+ ++ConstantBusCount;
+ SGPRsUsed.push_back(SGPRUsed);
+ }
+ }
+
// v_writelane_b32 is an exception from constant bus restriction:
// vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const
if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
@@ -3825,8 +3950,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
// Verify misc. restrictions on specific instructions.
- if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
- Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
+ if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
+ Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
const MachineOperand &Src0 = MI.getOperand(Src0Idx);
const MachineOperand &Src1 = MI.getOperand(Src1Idx);
const MachineOperand &Src2 = MI.getOperand(Src2Idx);
@@ -3837,6 +3962,15 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
return false;
}
}
+ if ((getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
+ SISrcMods::ABS) ||
+ (getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm() &
+ SISrcMods::ABS) ||
+ (getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
+ SISrcMods::ABS)) {
+ ErrInfo = "ABS not allowed in VOP3B instructions";
+ return false;
+ }
}
if (isSOP2(MI) || isSOPC(MI)) {
@@ -3945,7 +4079,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
- if (isFLAT(MI) && !MF->getSubtarget<GCNSubtarget>().hasFlatInstOffsets()) {
+ if (isFLAT(MI) && !ST.hasFlatInstOffsets()) {
const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
if (Offset->getImm() != 0) {
ErrInfo = "subtarget does not support offsets in flat instructions";
@@ -4079,21 +4213,21 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
}
case AMDGPU::S_ADD_I32:
- return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
+ return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
case AMDGPU::S_ADDC_U32:
return AMDGPU::V_ADDC_U32_e32;
case AMDGPU::S_SUB_I32:
- return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
+ return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
// FIXME: These are not consistently handled, and selected when the carry is
// used.
case AMDGPU::S_ADD_U32:
- return AMDGPU::V_ADD_I32_e32;
+ return AMDGPU::V_ADD_CO_U32_e32;
case AMDGPU::S_SUB_U32:
- return AMDGPU::V_SUB_I32_e32;
+ return AMDGPU::V_SUB_CO_U32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
- case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32;
- case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32;
- case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32;
+ case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32_e64;
+ case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32_e64;
+ case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32_e64;
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
@@ -4104,15 +4238,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
- case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
+ case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64_e64;
case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
- case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
+ case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64_e64;
case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
- case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
- case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
- case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
- case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
- case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64_e64;
+ case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32_e64;
+ case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32_e64;
+ case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32_e64;
+ case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32_e64;
case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
@@ -4150,7 +4284,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
Desc.OpInfo[OpNo].RegClass == -1) {
Register Reg = MI.getOperand(OpNo).getReg();
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
return MRI.getRegClass(Reg);
return RI.getPhysRegClass(Reg);
}
@@ -4164,11 +4298,9 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
MachineBasicBlock *MBB = MI.getParent();
MachineOperand &MO = MI.getOperand(OpIdx);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
const TargetRegisterClass *RC = RI.getRegClass(RCID);
- unsigned Size = TRI->getRegSizeInBits(*RC);
+ unsigned Size = RI.getRegSizeInBits(*RC);
unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
if (MO.isReg())
Opcode = AMDGPU::COPY;
@@ -4255,11 +4387,13 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
return false;
Register Reg = MO.getReg();
- const TargetRegisterClass *RC = Register::isVirtualRegister(Reg)
- ? MRI.getRegClass(Reg)
- : RI.getPhysRegClass(Reg);
const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass);
+ if (Reg.isPhysical())
+ return DRC->contains(Reg);
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+
if (MO.getSubReg()) {
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF);
@@ -4290,7 +4424,6 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
const MachineRegisterInfo &MRI = MF.getRegInfo();
const MCInstrDesc &InstDesc = MI.getDesc();
const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const TargetRegisterClass *DefinedRC =
OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
if (!MO)
@@ -4469,8 +4602,8 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
};
- if (Opc == AMDGPU::V_PERMLANE16_B32 ||
- Opc == AMDGPU::V_PERMLANEX16_B32) {
+ if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
+ Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
// src1 and src2 must be scalar
MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]);
MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]);
@@ -4493,7 +4626,7 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
int ConstantBusLimit = ST.getConstantBusLimit(Opc);
int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
SmallDenseSet<unsigned> SGPRsUsed;
- unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+ Register SGPRReg = findUsedSGPR(MI, VOP3Idx);
if (SGPRReg != AMDGPU::NoRegister) {
SGPRsUsed.insert(SGPRReg);
--ConstantBusLimit;
@@ -4597,16 +4730,32 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
// pointer value is uniform.
MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
- unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
+ Register SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
SBase->setReg(SGPR);
}
MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
- unsigned SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
+ Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
SOff->setReg(SGPR);
}
}
+// FIXME: Remove this when SelectionDAG is obsoleted.
+void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
+ MachineInstr &MI) const {
+ if (!isSegmentSpecificFLAT(MI))
+ return;
+
+ // Fixup SGPR operands in VGPRs. We only select these when the DAG divergence
+ // thinks they are uniform, so a readfirstlane should be valid.
+ MachineOperand *SAddr = getNamedOperand(MI, AMDGPU::OpName::saddr);
+ if (!SAddr || RI.isSGPRClass(MRI.getRegClass(SAddr->getReg())))
+ return;
+
+ Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);
+ SAddr->setReg(ToSGPR);
+}
+
void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
MachineBasicBlock::iterator I,
const TargetRegisterClass *DstRC,
@@ -4671,59 +4820,82 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
MachineBasicBlock::iterator I = LoopBB.begin();
+ SmallVector<Register, 8> ReadlanePieces;
+ Register CondReg = AMDGPU::NoRegister;
+
Register VRsrc = Rsrc.getReg();
unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
- Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
- Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
- Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
- Register AndCond = MRI.createVirtualRegister(BoolXExecRC);
- Register SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- Register SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- Register SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- Register SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- Register SRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
-
- // Beginning of the loop, read the next Rsrc variant.
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
- .addReg(VRsrc, VRsrcUndef, AMDGPU::sub0);
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub1)
- .addReg(VRsrc, VRsrcUndef, AMDGPU::sub1);
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub2)
- .addReg(VRsrc, VRsrcUndef, AMDGPU::sub2);
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub3)
- .addReg(VRsrc, VRsrcUndef, AMDGPU::sub3);
-
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc)
- .addReg(SRsrcSub0)
- .addImm(AMDGPU::sub0)
- .addReg(SRsrcSub1)
- .addImm(AMDGPU::sub1)
- .addReg(SRsrcSub2)
- .addImm(AMDGPU::sub2)
- .addReg(SRsrcSub3)
- .addImm(AMDGPU::sub3);
+ unsigned RegSize = TRI->getRegSizeInBits(Rsrc.getReg(), MRI);
+ unsigned NumSubRegs = RegSize / 32;
+ assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 && "Unhandled register size");
+
+ for (unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
+
+ Register CurRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register CurRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
+ .addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx));
+
+ // Read the next variant <- also loop target.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
+ .addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx + 1));
+
+ ReadlanePieces.push_back(CurRegLo);
+ ReadlanePieces.push_back(CurRegHi);
+
+ // Comparison is to be done as 64-bit.
+ Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), CurReg)
+ .addReg(CurRegLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(CurRegHi)
+ .addImm(AMDGPU::sub1);
+
+ Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC);
+ auto Cmp =
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), NewCondReg)
+ .addReg(CurReg);
+ if (NumSubRegs <= 2)
+ Cmp.addReg(VRsrc);
+ else
+ Cmp.addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx, 2));
+
+ // Combine the comparision results with AND.
+ if (CondReg == AMDGPU::NoRegister) // First.
+ CondReg = NewCondReg;
+ else { // If not the first, we create an AND.
+ Register AndReg = MRI.createVirtualRegister(BoolXExecRC);
+ BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg)
+ .addReg(CondReg)
+ .addReg(NewCondReg);
+ CondReg = AndReg;
+ }
+ } // End for loop.
+
+ auto SRsrcRC = TRI->getEquivalentSGPRClass(MRI.getRegClass(VRsrc));
+ Register SRsrc = MRI.createVirtualRegister(SRsrcRC);
+
+ // Build scalar Rsrc.
+ auto Merge = BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc);
+ unsigned Channel = 0;
+ for (Register Piece : ReadlanePieces) {
+ Merge.addReg(Piece)
+ .addImm(TRI->getSubRegFromChannel(Channel++));
+ }
// Update Rsrc operand to use the SGPR Rsrc.
Rsrc.setReg(SRsrc);
Rsrc.setIsKill(true);
- // Identify all lanes with identical Rsrc operands in their VGPRs.
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg0)
- .addReg(SRsrc, 0, AMDGPU::sub0_sub1)
- .addReg(VRsrc, 0, AMDGPU::sub0_sub1);
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
- .addReg(SRsrc, 0, AMDGPU::sub2_sub3)
- .addReg(VRsrc, 0, AMDGPU::sub2_sub3);
- BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndCond)
- .addReg(CondReg0)
- .addReg(CondReg1);
-
- MRI.setSimpleHint(SaveExec, AndCond);
+ Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+ MRI.setSimpleHint(SaveExec, CondReg);
// Update EXEC to matching lanes, saving original to SaveExec.
BuildMI(LoopBB, I, DL, TII.get(SaveExecOpc), SaveExec)
- .addReg(AndCond, RegState::Kill);
+ .addReg(CondReg, RegState::Kill);
// The original instruction is here; we insert the terminators after it.
I = LoopBB.end();
@@ -4732,19 +4904,29 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
BuildMI(LoopBB, I, DL, TII.get(XorTermOpc), Exec)
.addReg(Exec)
.addReg(SaveExec);
+
BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
}
// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
// with SGPRs by iterating over all unique values across all lanes.
-static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
- MachineOperand &Rsrc, MachineDominatorTree *MDT) {
+// Returns the loop basic block that now contains \p MI.
+static MachineBasicBlock *
+loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
+ MachineOperand &Rsrc, MachineDominatorTree *MDT,
+ MachineBasicBlock::iterator Begin = nullptr,
+ MachineBasicBlock::iterator End = nullptr) {
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineBasicBlock::iterator I(&MI);
+ if (!Begin.isValid())
+ Begin = &MI;
+ if (!End.isValid()) {
+ End = &MI;
+ ++End;
+ }
const DebugLoc &DL = MI.getDebugLoc();
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -4753,13 +4935,17 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
// Save the EXEC mask
- BuildMI(MBB, I, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec);
+ BuildMI(MBB, Begin, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec);
// Killed uses in the instruction we are waterfalling around will be
// incorrect due to the added control-flow.
- for (auto &MO : MI.uses()) {
- if (MO.isReg() && MO.isUse()) {
- MRI.clearKillFlags(MO.getReg());
+ MachineBasicBlock::iterator AfterMI = MI;
+ ++AfterMI;
+ for (auto I = Begin; I != AfterMI; I++) {
+ for (auto &MO : I->uses()) {
+ if (MO.isReg() && MO.isUse()) {
+ MRI.clearKillFlags(MO.getReg());
+ }
}
}
@@ -4776,11 +4962,11 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
LoopBB->addSuccessor(LoopBB);
LoopBB->addSuccessor(RemainderBB);
- // Move MI to the LoopBB, and the remainder of the block to RemainderBB.
- MachineBasicBlock::iterator J = I++;
+ // Move Begin to MI to the LoopBB, and the remainder of the block to
+ // RemainderBB.
RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
- RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
- LoopBB->splice(LoopBB->begin(), &MBB, J);
+ RemainderBB->splice(RemainderBB->begin(), &MBB, End, MBB.end());
+ LoopBB->splice(LoopBB->begin(), &MBB, Begin, MBB.end());
MBB.addSuccessor(LoopBB);
@@ -4803,6 +4989,7 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
// Restore the EXEC mask
MachineBasicBlock::iterator First = RemainderBB->begin();
BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec);
+ return LoopBB;
}
// Extract pointer from Rsrc and return a zero-value Rsrc replacement.
@@ -4848,27 +5035,35 @@ extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) {
return std::make_tuple(RsrcPtr, NewSRsrc);
}
-void SIInstrInfo::legalizeOperands(MachineInstr &MI,
- MachineDominatorTree *MDT) const {
+MachineBasicBlock *
+SIInstrInfo::legalizeOperands(MachineInstr &MI,
+ MachineDominatorTree *MDT) const {
MachineFunction &MF = *MI.getParent()->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineBasicBlock *CreatedBB = nullptr;
// Legalize VOP2
if (isVOP2(MI) || isVOPC(MI)) {
legalizeOperandsVOP2(MRI, MI);
- return;
+ return CreatedBB;
}
// Legalize VOP3
if (isVOP3(MI)) {
legalizeOperandsVOP3(MRI, MI);
- return;
+ return CreatedBB;
}
// Legalize SMRD
if (isSMRD(MI)) {
legalizeOperandsSMRD(MRI, MI);
- return;
+ return CreatedBB;
+ }
+
+ // Legalize FLAT
+ if (isFLAT(MI)) {
+ legalizeOperandsFLAT(MRI, MI);
+ return CreatedBB;
}
// Legalize REG_SEQUENCE and PHI
@@ -4877,8 +5072,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (MI.getOpcode() == AMDGPU::PHI) {
const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
- if (!MI.getOperand(i).isReg() ||
- !Register::isVirtualRegister(MI.getOperand(i).getReg()))
+ if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual())
continue;
const TargetRegisterClass *OpRC =
MRI.getRegClass(MI.getOperand(i).getReg());
@@ -4914,7 +5108,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// Update all the operands so they have the same type.
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg()))
+ if (!Op.isReg() || !Op.getReg().isVirtual())
continue;
// MI is a PHI instruction.
@@ -4939,7 +5133,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// subregister index types e.g. sub0_sub1 + sub2 + sub3
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg()))
+ if (!Op.isReg() || !Op.getReg().isVirtual())
continue;
const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
@@ -4952,7 +5146,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
}
}
- return;
+ return CreatedBB;
}
// Legalize INSERT_SUBREG
@@ -4967,7 +5161,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
MachineOperand &Op = MI.getOperand(1);
legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
}
- return;
+ return CreatedBB;
}
// Legalize SI_INIT_M0
@@ -4975,7 +5169,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
MachineOperand &Src = MI.getOperand(0);
if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
- return;
+ return CreatedBB;
}
// Legalize MIMG and MUBUF/MTBUF for shaders.
@@ -4983,21 +5177,44 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
// scratch memory access. In both cases, the legalization never involves
// conversion to the addr64 form.
- if (isMIMG(MI) ||
- (AMDGPU::isShader(MF.getFunction().getCallingConv()) &&
- (isMUBUF(MI) || isMTBUF(MI)))) {
+ if (isMIMG(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) &&
+ (isMUBUF(MI) || isMTBUF(MI)))) {
MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
- if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
- unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
- SRsrc->setReg(SGPR);
- }
+ if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg())))
+ CreatedBB = loadSRsrcFromVGPR(*this, MI, *SRsrc, MDT);
MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
- if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
- unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
- SSamp->setReg(SGPR);
+ if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg())))
+ CreatedBB = loadSRsrcFromVGPR(*this, MI, *SSamp, MDT);
+
+ return CreatedBB;
+ }
+
+ // Legalize SI_CALL
+ if (MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
+ MachineOperand *Dest = &MI.getOperand(0);
+ if (!RI.isSGPRClass(MRI.getRegClass(Dest->getReg()))) {
+ // Move everything between ADJCALLSTACKUP and ADJCALLSTACKDOWN and
+ // following copies, we also need to move copies from and to physical
+ // registers into the loop block.
+ unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
+
+ // Also move the copies to physical registers into the loop block
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::iterator Start(&MI);
+ while (Start->getOpcode() != FrameSetupOpcode)
+ --Start;
+ MachineBasicBlock::iterator End(&MI);
+ while (End->getOpcode() != FrameDestroyOpcode)
+ ++End;
+ // Also include following copies of the return value
+ ++End;
+ while (End != MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
+ MI.definesRegister(End->getOperand(1).getReg()))
+ ++End;
+ CreatedBB = loadSRsrcFromVGPR(*this, MI, *Dest, MDT, Start, End);
}
- return;
}
// Legalize MUBUF* instructions.
@@ -5011,7 +5228,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
RI.getRegClass(RsrcRC))) {
// The operands are legal.
// FIXME: We may need to legalize operands besided srsrc.
- return;
+ return CreatedBB;
}
// Legalize a VGPR Rsrc.
@@ -5046,7 +5263,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
const DebugLoc &DL = MI.getDebugLoc();
- BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e64), NewVAddrLo)
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_CO_U32_e64), NewVAddrLo)
.addDef(CondReg0)
.addReg(RsrcPtr, 0, AMDGPU::sub0)
.addReg(VAddr->getReg(), 0, AMDGPU::sub0)
@@ -5072,8 +5289,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
} else if (!VAddr && ST.hasAddr64()) {
// This instructions is the _OFFSET variant, so we need to convert it to
// ADDR64.
- assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration()
- < AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+ assert(ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS &&
"FIXME: Need to emit flat atomics here");
unsigned RsrcPtr, NewSRsrc;
@@ -5146,15 +5362,19 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
} else {
// This is another variant; legalize Rsrc with waterfall loop from VGPRs
// to SGPRs.
- loadSRsrcFromVGPR(*this, MI, *Rsrc, MDT);
+ CreatedBB = loadSRsrcFromVGPR(*this, MI, *Rsrc, MDT);
+ return CreatedBB;
}
}
+ return CreatedBB;
}
-void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
- MachineDominatorTree *MDT) const {
+MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
+ MachineDominatorTree *MDT) const {
SetVectorType Worklist;
Worklist.insert(&TopInst);
+ MachineBasicBlock *CreatedBB = nullptr;
+ MachineBasicBlock *CreatedBBTmp = nullptr;
while (!Worklist.empty()) {
MachineInstr &Inst = *Worklist.pop_back_val();
@@ -5174,13 +5394,18 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
Inst.eraseFromParent();
continue;
case AMDGPU::S_ADD_I32:
- case AMDGPU::S_SUB_I32:
+ case AMDGPU::S_SUB_I32: {
// FIXME: The u32 versions currently selected use the carry.
- if (moveScalarAddSub(Worklist, Inst, MDT))
+ bool Changed;
+ std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
+ if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
+ CreatedBB = CreatedBBTmp;
+ if (Changed)
continue;
// Default handling
break;
+ }
case AMDGPU::S_AND_B64:
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
Inst.eraseFromParent();
@@ -5259,19 +5484,19 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
break;
case AMDGPU::S_LSHL_B64:
if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_LSHLREV_B64;
+ NewOpcode = AMDGPU::V_LSHLREV_B64_e64;
swapOperands(Inst);
}
break;
case AMDGPU::S_ASHR_I64:
if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_ASHRREV_I64;
+ NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
swapOperands(Inst);
}
break;
case AMDGPU::S_LSHR_B64:
if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_LSHRREV_B64;
+ NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
swapOperands(Inst);
}
break;
@@ -5361,7 +5586,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
.add(Inst.getOperand(3))
.addReg(CarryInReg)
.addImm(0);
- legalizeOperands(*CarryOp);
+ CreatedBBTmp = legalizeOperands(*CarryOp);
+ if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
+ CreatedBB = CreatedBBTmp;
MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg);
addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
Inst.eraseFromParent();
@@ -5376,8 +5603,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
MachineOperand &Src1 = Inst.getOperand(3);
unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
- ? AMDGPU::V_ADD_I32_e64
- : AMDGPU::V_SUB_I32_e64;
+ ? AMDGPU::V_ADD_CO_U32_e64
+ : AMDGPU::V_SUB_CO_U32_e64;
const TargetRegisterClass *NewRC =
RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg()));
Register DestReg = MRI.createVirtualRegister(NewRC);
@@ -5387,7 +5614,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
.add(Src1)
.addImm(0); // clamp bit
- legalizeOperands(*NewInstr, MDT);
+ CreatedBBTmp = legalizeOperands(*NewInstr, MDT);
+ if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
+ CreatedBB = CreatedBBTmp;
MRI.replaceRegWith(Dest0.getReg(), DestReg);
addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI,
@@ -5406,7 +5635,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
// legalize its operands instead.
- legalizeOperands(Inst, MDT);
+ CreatedBBTmp = legalizeOperands(Inst, MDT);
+ if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
+ CreatedBB = CreatedBBTmp;
continue;
}
@@ -5462,7 +5693,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
unsigned NewDstReg = AMDGPU::NoRegister;
if (HasDst) {
Register DstReg = Inst.getOperand(0).getReg();
- if (Register::isPhysicalRegister(DstReg))
+ if (DstReg.isPhysical())
continue;
// Update the destination register class.
@@ -5470,8 +5701,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
if (!NewDstRC)
continue;
- if (Inst.isCopy() &&
- Register::isVirtualRegister(Inst.getOperand(1).getReg()) &&
+ if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() &&
NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
// Instead of creating a copy where src and dst are the same register
// class, we just replace all uses of dst with src. These kinds of
@@ -5498,16 +5728,20 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
}
// Legalize the operands
- legalizeOperands(Inst, MDT);
+ CreatedBBTmp = legalizeOperands(Inst, MDT);
+ if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
+ CreatedBB = CreatedBBTmp;
if (HasDst)
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
}
+ return CreatedBB;
}
// Add/sub require special handling to deal with carry outs.
-bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT) const {
+std::pair<bool, MachineBasicBlock *>
+SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
if (ST.hasAddNoCarry()) {
// Assume there is no user of scc since we don't select this in that case.
// Since scc isn't used, it doesn't really matter if the i32 or u32 variant
@@ -5532,13 +5766,13 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit
Inst.addImplicitDefUseOperands(*MBB.getParent());
MRI.replaceRegWith(OldDstReg, ResultReg);
- legalizeOperands(Inst, MDT);
+ MachineBasicBlock *NewBB = legalizeOperands(Inst, MDT);
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
- return true;
+ return std::make_pair(true, NewBB);
}
- return false;
+ return std::make_pair(false, nullptr);
}
void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
@@ -5626,7 +5860,7 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned SubOp = ST.hasAddNoCarry() ?
- AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
+ AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
.addImm(0)
@@ -5855,7 +6089,7 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
AMDGPU::sub1, Src1SubRC);
- unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
MachineInstr *LoHalf =
BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
.addReg(CarryReg, RegState::Define)
@@ -6055,7 +6289,7 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
Register MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32_e64), MidRegLo)
.addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
.addImm(0)
.addImm(BitWidth);
@@ -6152,7 +6386,7 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
.addReg(ImmReg, RegState::Kill)
.add(Src0);
- BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32_e64), ResultReg)
.add(Src1)
.addImm(16)
.addReg(TmpReg, RegState::Kill);
@@ -6162,7 +6396,7 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
.addImm(0xffff);
- BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32_e64), ResultReg)
.addReg(ImmReg, RegState::Kill)
.add(Src0)
.add(Src1);
@@ -6176,7 +6410,7 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
.add(Src0);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
.addImm(0xffff0000);
- BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32_e64), ResultReg)
.add(Src1)
.addReg(ImmReg, RegState::Kill)
.addReg(TmpReg, RegState::Kill);
@@ -6209,7 +6443,7 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1) {
if (MI.isCopy()) {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
for (auto &User : MRI.use_nodbg_instructions(DestReg)) {
if ((User.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) ||
@@ -6407,7 +6641,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
// GFX9 doesn't have ELEMENT_SIZE.
if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
+ uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize(true)) - 1;
Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
}
@@ -6503,8 +6737,16 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// If we have a definitive size, we can use it. Otherwise we need to inspect
// the operands to know the size.
- if (isFixedSize(MI))
- return DescSize;
+ if (isFixedSize(MI)) {
+ unsigned Size = DescSize;
+
+ // If we hit the buggy offset, an extra nop will be inserted in MC so
+ // estimate the worst case.
+ if (MI.isBranch() && ST.hasOffset3fBug())
+ Size += 4;
+
+ return Size;
+ }
// 4-byte instructions may have a 32-bit literal encoded after them. Check
// operands that coud ever be literals.
@@ -6555,8 +6797,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case TargetOpcode::INLINEASM_BR: {
const MachineFunction *MF = MI.getParent()->getParent();
const char *AsmStr = MI.getOperand(0).getSymbolName();
- return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(),
- &MF->getSubtarget());
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(), &ST);
}
default:
return DescSize;
@@ -6716,7 +6957,7 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC());
MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
- return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead);
}
@@ -6737,7 +6978,7 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
if (!UnusedCarry.isValid())
return MachineInstrBuilder();
- return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead);
}
@@ -6763,10 +7004,6 @@ const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) con
}
void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const {
- MachineBasicBlock *MBB = MI.getParent();
- MachineFunction *MF = MBB->getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
-
if (!ST.isWave32())
return;
@@ -6789,20 +7026,6 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
}
-unsigned SIInstrInfo::getNumFlatOffsetBits(unsigned AddrSpace,
- bool Signed) const {
- if (!ST.hasFlatInstOffsets())
- return 0;
-
- if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS)
- return 0;
-
- if (ST.getGeneration() >= AMDGPUSubtarget::GFX10)
- return Signed ? 12 : 11;
-
- return Signed ? 13 : 12;
-}
-
bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
bool Signed) const {
// TODO: Should 0 be special cased?
@@ -6812,16 +7035,31 @@ bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS)
return false;
- if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
- return (Signed && isInt<12>(Offset)) ||
- (!Signed && isUInt<11>(Offset));
+ unsigned N = AMDGPU::getNumFlatOffsetBits(ST, Signed);
+ return Signed ? isIntN(N, Offset) : isUIntN(N, Offset);
+}
+
+std::pair<int64_t, int64_t> SIInstrInfo::splitFlatOffset(int64_t COffsetVal,
+ unsigned AddrSpace,
+ bool IsSigned) const {
+ int64_t RemainderOffset = COffsetVal;
+ int64_t ImmField = 0;
+ const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST, IsSigned);
+ if (IsSigned) {
+ // Use signed division by a power of two to truncate towards 0.
+ int64_t D = 1LL << (NumBits - 1);
+ RemainderOffset = (COffsetVal / D) * D;
+ ImmField = COffsetVal - RemainderOffset;
+ } else if (COffsetVal >= 0) {
+ ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
+ RemainderOffset = COffsetVal - ImmField;
}
- return (Signed && isInt<13>(Offset)) ||
- (!Signed && isUInt<12>(Offset));
+ assert(isLegalFLATOffset(ImmField, AddrSpace, IsSigned));
+ assert(RemainderOffset + ImmField == COffsetVal);
+ return {ImmField, RemainderOffset};
}
-
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
@@ -6962,7 +7200,7 @@ static bool followSubRegDef(MachineInstr &MI,
MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
MachineRegisterInfo &MRI) {
assert(MRI.isSSA());
- if (!Register::isVirtualRegister(P.Reg))
+ if (!P.Reg.isVirtual())
return nullptr;
auto RSR = P;
@@ -6973,7 +7211,7 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
case AMDGPU::COPY:
case AMDGPU::V_MOV_B32_e32: {
auto &Op1 = MI->getOperand(1);
- if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) {
+ if (Op1.isReg() && Op1.getReg().isVirtual()) {
if (Op1.isUndef())
return nullptr;
RSR = getRegSubRegPair(Op1);
@@ -7035,36 +7273,51 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
auto *TRI = MRI.getTargetRegisterInfo();
auto *DefBB = DefMI.getParent();
- const int MaxUseInstScan = 10;
- int NumUseInst = 0;
+ const int MaxUseScan = 10;
+ int NumUse = 0;
- for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) {
+ for (auto &Use : MRI.use_nodbg_operands(VReg)) {
+ auto &UseInst = *Use.getParent();
// Don't bother searching between blocks, although it is possible this block
// doesn't modify exec.
if (UseInst.getParent() != DefBB)
return true;
- if (++NumUseInst > MaxUseInstScan)
+ if (++NumUse > MaxUseScan)
return true;
}
+ if (NumUse == 0)
+ return false;
+
const int MaxInstScan = 20;
int NumInst = 0;
// Stop scan when we have seen all the uses.
for (auto I = std::next(DefMI.getIterator()); ; ++I) {
+ assert(I != DefBB->end());
+
if (I->isDebugInstr())
continue;
if (++NumInst > MaxInstScan)
return true;
- if (I->readsRegister(VReg))
- if (--NumUseInst == 0)
- return false;
+ for (const MachineOperand &Op : I->operands()) {
+ // We don't check reg masks here as they're used only on calls:
+ // 1. EXEC is only considered const within one BB
+ // 2. Call should be a terminator instruction if present in a BB
- if (I->modifiesRegister(AMDGPU::EXEC, TRI))
- return true;
+ if (!Op.isReg())
+ continue;
+
+ Register Reg = Op.getReg();
+ if (Op.isUse()) {
+ if (Reg == VReg && --NumUse == 0)
+ return false;
+ } else if (TRI->regsOverlap(Reg, AMDGPU::EXEC))
+ return true;
+ }
}
}
@@ -7158,3 +7411,25 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return SchedModel.computeInstrLatency(&MI);
}
+
+unsigned SIInstrInfo::getDSShaderTypeValue(const MachineFunction &MF) {
+ switch (MF.getFunction().getCallingConv()) {
+ case CallingConv::AMDGPU_PS:
+ return 1;
+ case CallingConv::AMDGPU_VS:
+ return 2;
+ case CallingConv::AMDGPU_GS:
+ return 3;
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_ES:
+ report_fatal_error("ds_ordered_count unsupported for this calling conv");
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::C:
+ case CallingConv::Fast:
+ default:
+ // Assume other calling conventions are various compute callable functions
+ return 0;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 53e2ffba0f65..ce59fe86c688 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -14,22 +14,12 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
-#include "AMDGPUInstrInfo.h"
-#include "SIDefines.h"
+#include "AMDGPUMIRFormatter.h"
#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/Compiler.h"
-#include <cassert>
-#include <cstdint>
#define GET_INSTRINFO_HEADER
#include "AMDGPUGenInstrInfo.inc"
@@ -37,17 +27,20 @@
namespace llvm {
class APInt;
+class GCNSubtarget;
+class LiveVariables;
class MachineDominatorTree;
class MachineRegisterInfo;
class RegScavenger;
-class GCNSubtarget;
class TargetRegisterClass;
+class ScheduleHazardRecognizer;
class SIInstrInfo final : public AMDGPUGenInstrInfo {
private:
const SIRegisterInfo RI;
const GCNSubtarget &ST;
TargetSchedModel SchedModel;
+ mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
// The inverse predicate should have the negative value.
enum BranchPredicate {
@@ -81,8 +74,9 @@ public:
private:
void swapOperands(MachineInstr &Inst) const;
- bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT = nullptr) const;
+ std::pair<bool, MachineBasicBlock *>
+ moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT = nullptr) const;
@@ -201,10 +195,6 @@ public:
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
- unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI,
- RegScavenger *RS, unsigned TmpReg,
- unsigned Offset, unsigned Size) const;
-
void materializeImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL,
@@ -248,9 +238,12 @@ public:
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
- const MCInstrDesc &getIndirectRegWritePseudo(
- unsigned VecSize, unsigned EltSize, bool IsSGPR) const;
+ const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
+ unsigned EltSize,
+ bool IsSGPR) const;
+ const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
+ bool IsIndirectSrc) const;
LLVM_READONLY
int commuteOpcode(unsigned Opc) const;
@@ -508,12 +501,28 @@ public:
// i.e. global_* or scratch_*.
static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
auto Flags = MI.getDesc().TSFlags;
- return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT);
+ return Flags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch);
+ }
+
+ bool isSegmentSpecificFLAT(uint16_t Opcode) const {
+ auto Flags = get(Opcode).TSFlags;
+ return Flags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch);
+ }
+
+ static bool isFLATGlobal(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::IsFlatGlobal;
+ }
+
+ bool isFLATGlobal(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::IsFlatGlobal;
}
- // FIXME: Make this more precise
static bool isFLATScratch(const MachineInstr &MI) {
- return isSegmentSpecificFLAT(MI);
+ return MI.getDesc().TSFlags & SIInstrFlags::IsFlatScratch;
+ }
+
+ bool isFLATScratch(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::IsFlatScratch;
}
// Any FLAT encoded instruction, including global_* and scratch_*.
@@ -569,6 +578,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::DPP;
}
+ static bool isTRANS(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
+ }
+
+ bool isTRANS(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::TRANS;
+ }
+
static bool isVOP3P(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
}
@@ -677,7 +694,7 @@ public:
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
- unsigned Dest = MI.getOperand(0).getReg();
+ Register Dest = MI.getOperand(0).getReg();
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
return !RI.isSGPRReg(MRI, Dest);
@@ -883,6 +900,7 @@ public:
MachineRegisterInfo &MRI) const;
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
+ void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
MachineBasicBlock::iterator I,
@@ -893,20 +911,22 @@ public:
/// Legalize all operands in this instruction. This function may create new
/// instructions and control-flow around \p MI. If present, \p MDT is
/// updated.
- void legalizeOperands(MachineInstr &MI,
- MachineDominatorTree *MDT = nullptr) const;
+ /// \returns A new basic block that contains \p MI if new blocks were created.
+ MachineBasicBlock *
+ legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
/// Replace this instruction's opcode with the equivalent VALU
/// opcode. This function will also move the users of \p MI to the
/// VALU if necessary. If present, \p MDT is updated.
- void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
-
- void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
- int Count) const;
+ MachineBasicBlock *moveToVALU(MachineInstr &MI,
+ MachineDominatorTree *MDT = nullptr) const;
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+ void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned Quantity) const override;
+
void insertReturn(MachineBasicBlock &MBB) const;
/// Return the number of wait states that result from executing this
/// instruction.
@@ -1015,14 +1035,18 @@ public:
return isUInt<12>(Imm);
}
- unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const;
-
/// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
/// encoded instruction. If \p Signed, this is for an instruction that
/// interprets the offset as signed.
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
bool Signed) const;
+ /// Split \p COffsetVal into {immediate offset field, remainder offset}
+ /// values.
+ std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
+ unsigned AddrSpace,
+ bool IsSigned) const;
+
/// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
@@ -1053,6 +1077,14 @@ public:
unsigned getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr &MI,
unsigned *PredCost = nullptr) const override;
+
+ const MIRFormatter *getMIRFormatter() const override {
+ if (!Formatter.get())
+ Formatter = std::make_unique<AMDGPUMIRFormatter>();
+ return Formatter.get();
+ }
+
+ static unsigned getDSShaderTypeValue(const MachineFunction &MF);
};
/// \brief Returns true if a reg:subreg pair P has a TRC class
@@ -1148,6 +1180,12 @@ namespace AMDGPU {
LLVM_READONLY
int getVCMPXNoSDstOp(uint16_t Opcode);
+ LLVM_READONLY
+ int getFlatScratchInstSTfromSS(uint16_t Opcode);
+
+ LLVM_READONLY
+ int getFlatScratchInstSSfromSV(uint16_t Opcode);
+
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 7aee52f91360..5adc9e817d41 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -28,7 +28,6 @@ def SIEncodingFamily {
int GFX9 = 5;
int GFX10 = 6;
int SDWA10 = 7;
- int GFX10_B = 8;
}
//===----------------------------------------------------------------------===//
@@ -55,10 +54,6 @@ def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
-def SIatomic_csub : SDNode<"AMDGPUISD::ATOMIC_LOAD_CSUB", SDTAtomic2,
- [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
->;
-
def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
]>;
@@ -177,19 +172,6 @@ class SDBufferAtomic<string opcode> : SDNode <opcode,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
-class SDBufferAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
- SDTypeProfile<0, 8,
- [SDTCisVT<0, ty>, // vdata
- SDTCisVT<1, v4i32>, // rsrc
- SDTCisVT<2, i32>, // vindex(VGPR)
- SDTCisVT<3, i32>, // voffset(VGPR)
- SDTCisVT<4, i32>, // soffset(SGPR)
- SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // cachepolicy(imm)
- SDTCisVT<7, i1>]>, // idxen(imm)
- [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
->;
-
def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
@@ -203,8 +185,7 @@ def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
-def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
-def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
+def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
SDTypeProfile<1, 9,
@@ -228,8 +209,6 @@ class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
-def SIglobal_atomic_pk_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_PK_FADD", v2f16>;
-
def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
>;
@@ -280,41 +259,31 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
// Returns 1 if the source arguments have modifiers, 0 if they do not.
// XXX - do f16 instructions?
class isFloatType<ValueType SrcVT> {
- bit ret =
- !if(!eq(SrcVT.Value, f16.Value), 1,
- !if(!eq(SrcVT.Value, f32.Value), 1,
- !if(!eq(SrcVT.Value, f64.Value), 1,
- !if(!eq(SrcVT.Value, v2f16.Value), 1,
- !if(!eq(SrcVT.Value, v4f16.Value), 1,
- !if(!eq(SrcVT.Value, v2f32.Value), 1,
- !if(!eq(SrcVT.Value, v2f64.Value), 1,
- 0)))))));
+ bit ret = !or(!eq(SrcVT.Value, f16.Value),
+ !eq(SrcVT.Value, f32.Value),
+ !eq(SrcVT.Value, f64.Value),
+ !eq(SrcVT.Value, v2f16.Value),
+ !eq(SrcVT.Value, v4f16.Value),
+ !eq(SrcVT.Value, v2f32.Value),
+ !eq(SrcVT.Value, v2f64.Value));
}
class isIntType<ValueType SrcVT> {
- bit ret =
- !if(!eq(SrcVT.Value, i16.Value), 1,
- !if(!eq(SrcVT.Value, i32.Value), 1,
- !if(!eq(SrcVT.Value, i64.Value), 1,
- 0)));
+ bit ret = !or(!eq(SrcVT.Value, i16.Value),
+ !eq(SrcVT.Value, i32.Value),
+ !eq(SrcVT.Value, i64.Value));
}
class isPackedType<ValueType SrcVT> {
- bit ret =
- !if(!eq(SrcVT.Value, v2i16.Value), 1,
- !if(!eq(SrcVT.Value, v2f16.Value), 1,
- !if(!eq(SrcVT.Value, v4f16.Value), 1, 0)
- ));
+ bit ret = !or(!eq(SrcVT.Value, v2i16.Value),
+ !eq(SrcVT.Value, v2f16.Value),
+ !eq(SrcVT.Value, v4f16.Value));
}
//===----------------------------------------------------------------------===//
// PatFrags for global memory operations
//===----------------------------------------------------------------------===//
-let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_global").AddrSpaces in {
-defm atomic_csub_global : binary_atomic_op<SIatomic_csub>;
-}
-
foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
@@ -328,23 +297,6 @@ defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>;
} // End let AddressSpaces = ...
} // End foreach AddrSpace
-def atomic_fadd_global_noret : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_load_fadd node:$ptr, node:$value)> {
- // FIXME: Move this
- let MemoryVT = f32;
- let IsAtomic = 1;
- let AddressSpaces = StoreAddress_global.AddrSpaces;
-}
-
-def atomic_pk_fadd_global_noret : PatFrag<
- (ops node:$ptr, node:$value),
- (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> {
- // FIXME: Move this
- let MemoryVT = v2f16;
- let IsAtomic = 1;
- let AddressSpaces = StoreAddress_global.AddrSpaces;
-}
//===----------------------------------------------------------------------===//
// SDNodes PatFrags for loads/stores with a glue input.
@@ -450,16 +402,15 @@ def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)
}
def load_align8_local_m0 : PatFrag<(ops node:$ptr),
- (load_local_m0 node:$ptr)> {
+ (load_local_m0 node:$ptr)>, Aligned<8> {
let IsLoad = 1;
let IsNonExtLoad = 1;
- let MinAlignment = 8;
}
+
def load_align16_local_m0 : PatFrag<(ops node:$ptr),
- (load_local_m0 node:$ptr)> {
+ (load_local_m0 node:$ptr)>, Aligned<16> {
let IsLoad = 1;
let IsNonExtLoad = 1;
- let MinAlignment = 16;
}
} // End IsLoad = 1
@@ -535,20 +486,18 @@ def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
}
}
-def store_align16_local_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (store_local_m0 node:$value, node:$ptr)> {
+def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)>,
+ Aligned<8> {
let IsStore = 1;
let IsTruncStore = 0;
- let MinAlignment = 16;
}
-def store_align8_local_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (store_local_m0 node:$value, node:$ptr)> {
+def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)>,
+ Aligned<16> {
let IsStore = 1;
let IsTruncStore = 0;
- let MinAlignment = 8;
}
let AddressSpaces = StoreAddress_local.AddrSpaces in {
@@ -583,6 +532,48 @@ def si_setcc_uniform : PatFrag <
}]>;
//===----------------------------------------------------------------------===//
+// SDNodes PatFrags for a16 loads and stores with 3 components.
+// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory
+// load/store size.
+//===----------------------------------------------------------------------===//
+
+class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
+ (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
+ node:$auxiliary, node:$idxen),
+ (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
+ node:$auxiliary, node:$idxen)> {
+ let IsLoad = 1;
+ let MemoryVT = vt;
+}
+
+class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
+ (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
+ node:$auxiliary, node:$idxen),
+ (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
+ node:$auxiliary, node:$idxen)> {
+ let IsStore = 1;
+ let MemoryVT = vt;
+}
+
+class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
+ (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
+ node:$format, node:$auxiliary, node:$idxen),
+ (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
+ node:$format, node:$auxiliary, node:$idxen)> {
+ let IsLoad = 1;
+ let MemoryVT = vt;
+}
+
+class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
+ (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
+ node:$format, node:$auxiliary, node:$idxen),
+ (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
+ node:$format, node:$auxiliary, node:$idxen)> {
+ let IsStore = 1;
+ let MemoryVT = vt;
+}
+
+//===----------------------------------------------------------------------===//
// SDNodes PatFrags for d16 loads
//===----------------------------------------------------------------------===//
@@ -668,7 +659,6 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
-defm atomic_load_csub : SIAtomicM0Glue2 <"LOAD_CSUB", 1>;
defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
@@ -1051,6 +1041,12 @@ class NamedOperandBit_0<string Name, AsmOperandClass MatchClass> :
let ParserMatchClass = MatchClass;
}
+class NamedOperandBit_1<string Name, AsmOperandClass MatchClass> :
+ OperandWithDefaultOps<i1, (ops (i1 1))> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
@@ -1102,8 +1098,15 @@ def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>;
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
+def DLC_0 : NamedOperandBit_0<"DLC", NamedMatchClass<"DLC">>;
+
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
+def GLC_0 : NamedOperandBit_0<"GLC", NamedMatchClass<"GLC">>;
+def GLC_1 : NamedOperandBit_1<"GLC", NamedMatchClass<"GLC_1">>;
+
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
+def SLC_0 : NamedOperandBit_0<"SLC", NamedMatchClass<"SLC">>;
+
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
@@ -1115,7 +1118,7 @@ def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
-def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
+def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>;
def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
@@ -1133,10 +1136,10 @@ def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
-def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
-def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
-def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
-def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
+def op_sel0 : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
+def op_sel_hi0 : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
+def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
+def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
@@ -1308,11 +1311,11 @@ def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
+def DS128Bit8ByteAligned : ComplexPattern<i64, 3, "SelectDS128Bit8ByteAligned">;
def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
-def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
@@ -1328,9 +1331,6 @@ def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
-
-def Hi16Elt : ComplexPattern<untyped, 1, "SelectHi16Elt">;
-
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
@@ -1389,9 +1389,9 @@ def HWREG {
}
class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
- int ret = !or(Reg,
- !or(!shl(Offset, 6),
- !shl(!add(Size, -1), 11)));
+ int ret = !and(!or(Reg,
+ !shl(Offset, 6),
+ !shl(!add(Size, -1), 11)), 65535);
}
//===----------------------------------------------------------------------===//
@@ -1416,56 +1416,6 @@ class SIMCInstr <string pseudo, int subtarget> {
}
//===----------------------------------------------------------------------===//
-// EXP classes
-//===----------------------------------------------------------------------===//
-
-class EXP_Helper<bit done> : EXPCommon<
- (outs),
- (ins exp_tgt:$tgt,
- ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
- exp_vm:$vm, exp_compr:$compr, i32imm:$en),
- "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> {
- let AsmMatchConverter = "cvtExp";
-}
-
-// Split EXP instruction into EXP and EXP_DONE so we can set
-// mayLoad for done=1.
-multiclass EXP_m<bit done> {
- let mayLoad = done, DisableWQM = 1 in {
- let isPseudo = 1, isCodeGenOnly = 1 in {
- def "" : EXP_Helper<done>,
- SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
- }
-
- let done = done in {
- def _si : EXP_Helper<done>,
- SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
- EXPe {
- let AssemblerPredicate = isGFX6GFX7;
- let DecoderNamespace = "GFX6GFX7";
- let DisableDecoder = DisableSIDecoder;
- }
-
- def _vi : EXP_Helper<done>,
- SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
- EXPe_vi {
- let AssemblerPredicate = isGFX8GFX9;
- let DecoderNamespace = "GFX8";
- let DisableDecoder = DisableVIDecoder;
- }
-
- def _gfx10 : EXP_Helper<done>,
- SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.GFX10>,
- EXPe {
- let AssemblerPredicate = isGFX10Plus;
- let DecoderNamespace = "GFX10";
- let DisableDecoder = DisableSIDecoder;
- }
- }
- }
-}
-
-//===----------------------------------------------------------------------===//
// Vector ALU classes
//===----------------------------------------------------------------------===//
@@ -1528,6 +1478,10 @@ class getVOPSrc0ForVT<ValueType VT> {
);
}
+class getSOPSrcForVT<ValueType VT> {
+ RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32);
+}
+
// Returns the vreg register class to use for source operand given VT
class getVregSrcForVT<ValueType VT> {
RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
@@ -1583,13 +1537,11 @@ class getVOP3SrcForVT<ValueType VT> {
// Float or packed int
class isModifierType<ValueType SrcVT> {
- bit ret =
- !if(!eq(SrcVT.Value, f16.Value), 1,
- !if(!eq(SrcVT.Value, f32.Value), 1,
- !if(!eq(SrcVT.Value, f64.Value), 1,
- !if(!eq(SrcVT.Value, v2f16.Value), 1,
- !if(!eq(SrcVT.Value, v2i16.Value), 1,
- 0)))));
+ bit ret = !or(!eq(SrcVT.Value, f16.Value),
+ !eq(SrcVT.Value, f32.Value),
+ !eq(SrcVT.Value, f64.Value),
+ !eq(SrcVT.Value, v2f16.Value),
+ !eq(SrcVT.Value, v2i16.Value));
}
// Return type of input modifiers operand for specified input operand
@@ -1612,7 +1564,7 @@ class getOpSelMod <ValueType VT> {
}
// Return type of input modifiers operand specified input operand for DPP
-class getSrcModExt <ValueType VT> {
+class getSrcModDPP <ValueType VT> {
bit isFP = isFloatType<VT>.ret;
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
@@ -1635,7 +1587,7 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
// Returns the input arguments for VOP3 instructions for the given SrcVT.
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
- bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
+ bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret =
@@ -1644,20 +1596,20 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
(ins),
/* else */
!if (!eq(NumSrcArgs, 1),
- !if (!eq(HasModifiers, 1),
+ !if (HasModifiers,
// VOP1 with modifiers
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
clampmod0:$clamp, omod0:$omod)
/* else */,
// VOP1 without modifiers
- !if (!eq(HasIntClamp, 1),
+ !if (HasClamp,
(ins Src0RC:$src0, clampmod0:$clamp),
(ins Src0RC:$src0))
/* endif */ ),
!if (!eq(NumSrcArgs, 2),
- !if (!eq(HasModifiers, 1),
+ !if (HasModifiers,
// VOP 2 with modifiers
- !if( !eq(HasOMod, 1),
+ !if(HasOMod,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod0:$clamp, omod0:$omod),
@@ -1666,21 +1618,21 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
clampmod0:$clamp))
/* else */,
// VOP2 without modifiers
- !if (!eq(HasIntClamp, 1),
+ !if (HasClamp,
(ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp),
(ins Src0RC:$src0, Src1RC:$src1))
/* endif */ )
/* NumSrcArgs == 3 */,
- !if (!eq(HasModifiers, 1),
- !if (!eq(HasSrc2Mods, 1),
+ !if (HasModifiers,
+ !if (HasSrc2Mods,
// VOP3 with modifiers
- !if (!eq(HasOMod, 1),
+ !if (HasOMod,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
clampmod0:$clamp, omod0:$omod),
- !if (!eq(HasIntClamp, 1),
+ !if (HasClamp,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
@@ -1689,11 +1641,11 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2))),
// VOP3 with modifiers except src2
- !if (!eq(HasOMod, 1),
+ !if (HasOMod,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2RC:$src2, clampmod0:$clamp, omod0:$omod),
- !if (!eq(HasIntClamp, 1),
+ !if (HasClamp,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2RC:$src2, clampmod0:$clamp),
@@ -1702,119 +1654,87 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
Src2RC:$src2))))
/* else */,
// VOP3 without modifiers
- !if (!eq(HasIntClamp, 1),
+ !if (HasClamp,
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp),
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
/* endif */ ))));
}
-/// XXX - src1 may only allow VGPRs?
+class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs,
+ bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel,
+ bit IsVOP3P> {
+ // getInst64 handles clamp and omod. implicit mutex between vop3p and omod
+ dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
+ Src0Mod, Src1Mod, Src2Mod>.ret;
+ dag opsel = (ins op_sel0:$op_sel);
+ dag vop3pFields = (ins op_sel_hi0:$op_sel_hi, neg_lo0:$neg_lo, neg_hi0:$neg_hi);
+ dag ret = !con(base,
+ !if(HasOpSel, opsel,(ins)),
+ !if(IsVOP3P, vop3pFields,(ins)));
+}
-// The modifiers (except clamp) are dummy operands for the benefit of
-// printing and parsing. They defer their values to looking at the
-// srcN_modifiers for what to print.
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
- RegisterOperand Src2RC, int NumSrcArgs,
- bit HasClamp,
+ RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
- dag ret = !if (!eq(NumSrcArgs, 2),
- !if (HasClamp,
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod0:$clamp,
- op_sel:$op_sel, op_sel_hi:$op_sel_hi,
- neg_lo:$neg_lo, neg_hi:$neg_hi),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- op_sel:$op_sel, op_sel_hi:$op_sel_hi,
- neg_lo:$neg_lo, neg_hi:$neg_hi)),
- // else NumSrcArgs == 3
- !if (HasClamp,
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod0:$clamp,
- op_sel:$op_sel, op_sel_hi:$op_sel_hi,
- neg_lo:$neg_lo, neg_hi:$neg_hi),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- op_sel:$op_sel, op_sel_hi:$op_sel_hi,
- neg_lo:$neg_lo, neg_hi:$neg_hi))
- );
+ dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
+ 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod,
+ 1/*HasOpSel*/, 1/*IsVOP3P*/>.ret;
}
-class getInsVOP3OpSel <RegisterOperand Src0RC,
- RegisterOperand Src1RC,
- RegisterOperand Src2RC,
- int NumSrcArgs,
- bit HasClamp,
- Operand Src0Mod,
- Operand Src1Mod,
- Operand Src2Mod> {
- dag ret = !if (!eq(NumSrcArgs, 2),
- !if (HasClamp,
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod0:$clamp,
- op_sel:$op_sel),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- op_sel:$op_sel)),
- // else NumSrcArgs == 3
- !if (HasClamp,
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod0:$clamp,
- op_sel:$op_sel),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- op_sel:$op_sel))
- );
+class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs,
+ bit HasClamp, bit HasOMod,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = getInsVOP3Base<Src0RC, Src1RC,
+ Src2RC, NumSrcArgs,
+ HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod,
+ Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret;
}
-class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+class getInsDPPBase <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
int NumSrcArgs, bit HasModifiers,
Operand Src0Mod, Operand Src1Mod> {
dag ret = !if (!eq(NumSrcArgs, 0),
// VOP1 without input operands (V_NOP)
- (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl),
+ (ins ),
!if (!eq(NumSrcArgs, 1),
- !if (!eq(HasModifiers, 1),
+ !if (HasModifiers,
// VOP1_DPP with modifiers
(ins DstRC:$old, Src0Mod:$src0_modifiers,
- Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
+ Src0RC:$src0)
/* else */,
// VOP1_DPP without modifiers
- (ins DstRC:$old, Src0RC:$src0,
- dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
- /* endif */)
- /* NumSrcArgs == 2 */,
- !if (!eq(HasModifiers, 1),
+ (ins DstRC:$old, Src0RC:$src0)
+ /* endif */),
+ !if (HasModifiers,
// VOP2_DPP with modifiers
(ins DstRC:$old,
Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
+ Src1Mod:$src1_modifiers, Src1RC:$src1)
/* else */,
// VOP2_DPP without modifiers
(ins DstRC:$old,
- Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl,
- row_mask:$row_mask, bank_mask:$bank_mask,
- bound_ctrl:$bound_ctrl)
- /* endif */)));
+ Src0RC:$src0, Src1RC:$src1)
+ )));
+}
+
+class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+ int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod> {
+ dag ret = !con(getInsDPPBase<DstRC, Src0RC, Src1RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod>.ret,
+ (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
}
class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
- int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod> {
+ int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod> {
dag ret = !con(getInsDPP<DstRC, Src0RC, Src1RC, NumSrcArgs,
HasModifiers, Src0Mod, Src1Mod>.ret,
(ins FI:$fi));
@@ -1823,30 +1743,9 @@ class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Sr
class getInsDPP8 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
int NumSrcArgs, bit HasModifiers,
Operand Src0Mod, Operand Src1Mod> {
- dag ret = !if (!eq(NumSrcArgs, 0),
- // VOP1 without input operands (V_NOP)
- (ins dpp8:$dpp8, FI:$fi),
- !if (!eq(NumSrcArgs, 1),
- !if (!eq(HasModifiers, 1),
- // VOP1_DPP with modifiers
- (ins DstRC:$old, Src0Mod:$src0_modifiers,
- Src0RC:$src0, dpp8:$dpp8, FI:$fi)
- /* else */,
- // VOP1_DPP without modifiers
- (ins DstRC:$old, Src0RC:$src0, dpp8:$dpp8, FI:$fi)
- /* endif */)
- /* NumSrcArgs == 2 */,
- !if (!eq(HasModifiers, 1),
- // VOP2_DPP with modifiers
- (ins DstRC:$old,
- Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- dpp8:$dpp8, FI:$fi)
- /* else */,
- // VOP2_DPP without modifiers
- (ins DstRC:$old,
- Src0RC:$src0, Src1RC:$src1, dpp8:$dpp8, FI:$fi)
- /* endif */)));
+ dag ret = !con(getInsDPPBase<DstRC, Src0RC, Src1RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod>.ret,
+ (ins dpp8:$dpp8, FI:$fi));
}
@@ -1860,7 +1759,7 @@ class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs
(ins),
!if(!eq(NumSrcArgs, 1),
// VOP1
- !if(!eq(HasSDWAOMod, 0),
+ !if(!not(HasSDWAOMod),
// VOP1_SDWA without omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
clampmod:$clamp,
@@ -1878,7 +1777,7 @@ class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
// VOP2_SDWA
- !if(!eq(HasSDWAOMod, 0),
+ !if(!not(HasSDWAOMod),
// VOP2_SDWA without omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
@@ -1894,12 +1793,12 @@ class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs
(ins)/* endif */)));
}
-// Outs for DPP and SDWA
-class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> {
+// Outs for DPP
+class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
dag ret = !if(HasDst,
!if(!eq(DstVT.Size, 1),
(outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
- (outs DstRCExt:$vdst)),
+ (outs DstRCDPP:$vdst)),
(outs)); // V_NOP
}
@@ -1938,7 +1837,7 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
string iclamp = !if(HasIntClamp, "$clamp", "");
string ret =
- !if(!eq(HasModifiers, 0),
+ !if(!not(HasModifiers),
getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp,
dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
}
@@ -1964,6 +1863,7 @@ class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
class getAsmVOP3OpSel <int NumSrcArgs,
bit HasClamp,
+ bit HasOMod,
bit Src0HasMods,
bit Src1HasMods,
bit Src2HasMods> {
@@ -2000,7 +1900,7 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
string src1 = !if(!eq(NumSrcArgs, 1), "",
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
" $src1_modifiers,"));
- string args = !if(!eq(HasModifiers, 0),
+ string args = !if(!not(HasModifiers),
getAsm32<0, NumSrcArgs, DstVT>.ret,
", "#src0#src1);
string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
@@ -2010,22 +1910,12 @@ class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT
string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
}
-class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
- string dst = !if(HasDst,
- !if(!eq(DstVT.Size, 1),
- "$sdst",
- "$vdst"),
- ""); // use $sdst for VOPC
- string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
- string src1 = !if(!eq(NumSrcArgs, 1), "",
- !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
- " $src1_modifiers,"));
- string args = !if(!eq(HasModifiers, 0),
- getAsm32<0, NumSrcArgs, DstVT>.ret,
- ", "#src0#src1);
- string ret = dst#args#"$dpp8$fi";
+class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32>
+ : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT> {
+ let ret = dst#args#" $dpp8$fi";
}
+
class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
string dst = !if(HasDst,
!if(!eq(DstVT.Size, 1),
@@ -2063,7 +1953,7 @@ class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
"");
string src0 = "$src0_modifiers";
string src1 = "$src1_modifiers";
- string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod");
+ string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod");
string args = !if(!eq(NumSrcArgs, 0), "",
!if(!eq(NumSrcArgs, 1),
", "#src0,
@@ -2107,14 +1997,6 @@ class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
}
-class BitOr<bit a, bit b> {
- bit ret = !if(a, 1, !if(b, 1, 0));
-}
-
-class BitAnd<bit a, bit b> {
- bit ret = !if(a, !if(b, 1, 0), 0);
-}
-
def PatGenMode {
int NoPattern = 0;
int Pattern = 1;
@@ -2146,24 +2028,19 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
- field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
- field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
+ field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
+ field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
- field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
+ field bit HasDst = !ne(DstVT.Value, untyped.Value);
field bit HasDst32 = HasDst;
field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
- field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1);
- field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1);
- field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
-
- // TODO: Modifiers logic is somewhat adhoc here, to be refined later
- // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which
- // enables modifiers for i32 type.
- field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret;
+ field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value);
+ field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value);
+ field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value);
// HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
@@ -2175,16 +2052,12 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
- field bit HasSrc0Mods = HasModifiers;
- field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
- field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
-
- field bit HasClamp = BitOr<isModifierType<Src0VT>.ret, EnableClamp>.ret;
+ field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp);
field bit HasSDWAClamp = EmitDst;
- field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
+ field bit HasFPClamp = !and(isFloatType<DstVT>.ret, HasClamp);
field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
field bit HasClampLo = HasClamp;
- field bit HasClampHi = BitAnd<isPackedType<DstVT>.ret, HasClamp>.ret;
+ field bit HasClampHi = !and(isPackedType<DstVT>.ret, HasClamp);
field bit HasHigh = 0;
field bit IsPacked = isPackedType<Src0VT>.ret;
@@ -2192,6 +2065,16 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret);
field bit HasSDWAOMod = isFloatType<DstVT>.ret;
+ field bit HasModifiers = !or(isModifierType<Src0VT>.ret,
+ isModifierType<Src1VT>.ret,
+ isModifierType<Src2VT>.ret,
+ HasOMod,
+ EnableF32SrcMods);
+
+ field bit HasSrc0Mods = HasModifiers;
+ field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
+ field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
+
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA = HasExt;
@@ -2211,8 +2094,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
// output. This is manually overridden for them.
field dag Outs32 = Outs;
field dag Outs64 = Outs;
- field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
- field dag OutsDPP8 = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
+ field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
+ field dag OutsDPP8 = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
@@ -2223,11 +2106,10 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
NumSrcArgs, HasClamp,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
- NumSrcArgs,
- HasClamp,
- getOpSelMod<Src0VT>.ret,
- getOpSelMod<Src1VT>.ret,
- getOpSelMod<Src2VT>.ret>.ret;
+ NumSrcArgs, HasClamp, HasOMod,
+ getOpSelMod<Src0VT>.ret,
+ getOpSelMod<Src1VT>.ret,
+ getOpSelMod<Src2VT>.ret>.ret;
field dag InsDPP = !if(HasExtDPP,
getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
@@ -2245,7 +2127,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
- HasClamp,
+ HasClamp, HasOMod,
HasSrc0FloatMods,
HasSrc1FloatMods,
HasSrc2FloatMods>.ret;
@@ -2381,7 +2263,6 @@ class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
VINTRPCommon <outs, ins, asm, []>,
VINTRPe <op>,
SIMCInstr<opName, encodingFamily> {
- let DisableDecoder = DisableSIDecoder;
}
class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
@@ -2391,7 +2272,6 @@ class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
SIMCInstr<opName, SIEncodingFamily.VI> {
let AssemblerPredicate = VIAssemblerPredicate;
let DecoderNamespace = "GFX8";
- let DisableDecoder = DisableVIDecoder;
}
// FIXME-GFX10: WIP.
@@ -2492,8 +2372,7 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.GFX80)],
[!cast<string>(SIEncodingFamily.GFX9)],
[!cast<string>(SIEncodingFamily.GFX10)],
- [!cast<string>(SIEncodingFamily.SDWA10)],
- [!cast<string>(SIEncodingFamily.GFX10_B)]];
+ [!cast<string>(SIEncodingFamily.SDWA10)]];
}
// Get equivalent SOPK instruction.
@@ -2567,11 +2446,28 @@ def getVCMPXNoSDstOp : InstrMapping {
// Maps a SOPP to a SOPP with S_NOP
def getSOPPWithRelaxation : InstrMapping {
- let FilterClass = "Base_SOPP";
- let RowFields = ["AsmString"];
- let ColFields = ["Size"];
- let KeyCol = ["4"];
- let ValueCols = [["8"]];
+ let FilterClass = "SOPPRelaxTable";
+ let RowFields = ["KeyName"];
+ let ColFields = ["IsRelaxed"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Maps flat scratch opcodes by addressing modes
+def getFlatScratchInstSTfromSS : InstrMapping {
+ let FilterClass = "FlatScratchInst";
+ let RowFields = ["SVOp"];
+ let ColFields = ["Mode"];
+ let KeyCol = ["SS"];
+ let ValueCols = [["ST"]];
+}
+
+def getFlatScratchInstSSfromSV : InstrMapping {
+ let FilterClass = "FlatScratchInst";
+ let RowFields = ["SVOp"];
+ let ColFields = ["Mode"];
+ let KeyCol = ["SV"];
+ let ValueCols = [["SS"]];
}
include "SIInstructions.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0c4c9e0e9df2..7c1cbd67c993 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -19,43 +19,7 @@ include "VOPInstructions.td"
include "SMInstructions.td"
include "FLATInstructions.td"
include "BUFInstructions.td"
-
-//===----------------------------------------------------------------------===//
-// EXP Instructions
-//===----------------------------------------------------------------------===//
-
-defm EXP : EXP_m<0>;
-defm EXP_DONE : EXP_m<1>;
-
-class ExpPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
- (int_amdgcn_exp timm:$tgt, timm:$en,
- (vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
- (vt ExpSrc2:$src2), (vt ExpSrc3:$src3),
- done_val, timm:$vm),
- (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
- ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en)
->;
-
-class ExpComprPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
- (int_amdgcn_exp_compr timm:$tgt, timm:$en,
- (vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
- done_val, timm:$vm),
- (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
- (IMPLICIT_DEF), (IMPLICIT_DEF), timm:$vm, 1, timm:$en)
->;
-
-// FIXME: The generated DAG matcher seems to have strange behavior
-// with a 1-bit literal to match, so use a -1 for checking a true
-// 1-bit value.
-def : ExpPattern<i32, EXP, 0>;
-def : ExpPattern<i32, EXP_DONE, -1>;
-def : ExpPattern<f32, EXP, 0>;
-def : ExpPattern<f32, EXP_DONE, -1>;
-
-def : ExpComprPattern<v2i16, EXP, 0>;
-def : ExpComprPattern<v2i16, EXP_DONE, -1>;
-def : ExpComprPattern<v2f16, EXP, 0>;
-def : ExpComprPattern<v2f16, EXP_DONE, -1>;
+include "EXPInstructions.td"
//===----------------------------------------------------------------------===//
// VINTRP Instructions
@@ -264,6 +228,7 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
let WaveSizePredicate = isWave64 in {
def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
+def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
}
@@ -324,7 +289,7 @@ def SI_IF: CFPseudoInstSI <
def SI_ELSE : CFPseudoInstSI <
(outs SReg_1:$dst),
- (ins SReg_1:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
+ (ins SReg_1:$src, brtarget:$target), [], 1, 1> {
let Size = 12;
let hasSideEffects = 1;
}
@@ -356,6 +321,14 @@ def SI_IF_BREAK : CFPseudoInstSI <
let isReMaterializable = 1;
}
+// Branch to the early termination block of the shader if SCC is 0.
+// This uses SCC from a previous SALU operation, i.e. the update of
+// a mask of live lanes after a kill/demote operation.
+// Only valid in pixel shaders.
+def SI_EARLY_TERMINATE_SCC0 : SPseudoInstSI <(outs), (ins)> {
+ let Uses = [EXEC,SCC];
+}
+
let Uses = [EXEC] in {
multiclass PseudoInstKill <dag ins> {
@@ -426,32 +399,13 @@ def SI_INIT_EXEC : SPseudoInstSI <
(outs), (ins i64imm:$src),
[(int_amdgcn_init_exec (i64 timm:$src))]> {
let Defs = [EXEC];
- let usesCustomInserter = 1;
- let isAsCheapAsAMove = 1;
- let WaveSizePredicate = isWave64;
-}
-
-// FIXME: Intrinsic should be mangled for wave size.
-def SI_INIT_EXEC_LO : SPseudoInstSI <
- (outs), (ins i32imm:$src), []> {
- let Defs = [EXEC_LO];
- let usesCustomInserter = 1;
let isAsCheapAsAMove = 1;
- let WaveSizePredicate = isWave32;
}
-// FIXME: Wave32 version
def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
(outs), (ins SSrc_b32:$input, i32imm:$shift),
[(int_amdgcn_init_exec_from_input i32:$input, (i32 timm:$shift))]> {
let Defs = [EXEC];
- let usesCustomInserter = 1;
-}
-
-def : GCNPat <
- (int_amdgcn_init_exec timm:$src),
- (SI_INIT_EXEC_LO (as_i32timm timm:$src))> {
- let WaveSizePredicate = isWave32;
}
// Return for returning shaders to a shader variant epilog.
@@ -580,64 +534,97 @@ def SI_INDIRECT_DST_V32 : SI_INDIRECT_DST<VReg_1024>;
} // End Uses = [EXEC], Defs = [M0, EXEC]
-
-// This is a pseudo variant of the v_movreld_b32 (or v_mov_b32
-// expecting to be executed with gpr indexing mode enabled)
-// instruction in which the vector operand appears only twice, once as
-// def and once as use. Using this pseudo avoids problems with the Two
-// Address instructions pass.
-class INDIRECT_REG_WRITE_pseudo<RegisterClass rc,
+// This is a pseudo variant of the v_movreld_b32 instruction in which the
+// vector operand appears only twice, once as def and once as use. Using this
+// pseudo avoids problems with the Two Address instructions pass.
+class INDIRECT_REG_WRITE_MOVREL_pseudo<RegisterClass rc,
RegisterOperand val_ty> : PseudoInstSI <
(outs rc:$vdst), (ins rc:$vsrc, val_ty:$val, i32imm:$subreg)> {
let Constraints = "$vsrc = $vdst";
let Uses = [M0];
}
-class V_INDIRECT_REG_WRITE_B32_pseudo<RegisterClass rc> :
- INDIRECT_REG_WRITE_pseudo<rc, VSrc_b32> {
+class V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<RegisterClass rc> :
+ INDIRECT_REG_WRITE_MOVREL_pseudo<rc, VSrc_b32> {
let VALU = 1;
let VOP1 = 1;
let Uses = [M0, EXEC];
}
-class S_INDIRECT_REG_WRITE_pseudo<RegisterClass rc,
+class S_INDIRECT_REG_WRITE_MOVREL_pseudo<RegisterClass rc,
RegisterOperand val_ty> :
- INDIRECT_REG_WRITE_pseudo<rc, val_ty> {
+ INDIRECT_REG_WRITE_MOVREL_pseudo<rc, val_ty> {
let SALU = 1;
let SOP1 = 1;
let Uses = [M0];
}
-class S_INDIRECT_REG_WRITE_B32_pseudo<RegisterClass rc> :
- S_INDIRECT_REG_WRITE_pseudo<rc, SSrc_b32>;
-class S_INDIRECT_REG_WRITE_B64_pseudo<RegisterClass rc> :
- S_INDIRECT_REG_WRITE_pseudo<rc, SSrc_b64>;
-
-
-def V_INDIRECT_REG_WRITE_B32_V1 : V_INDIRECT_REG_WRITE_B32_pseudo<VGPR_32>;
-def V_INDIRECT_REG_WRITE_B32_V2 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_64>;
-def V_INDIRECT_REG_WRITE_B32_V3 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_96>;
-def V_INDIRECT_REG_WRITE_B32_V4 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_128>;
-def V_INDIRECT_REG_WRITE_B32_V5 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_160>;
-def V_INDIRECT_REG_WRITE_B32_V8 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_256>;
-def V_INDIRECT_REG_WRITE_B32_V16 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_512>;
-def V_INDIRECT_REG_WRITE_B32_V32 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_1024>;
+class S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<RegisterClass rc> :
+ S_INDIRECT_REG_WRITE_MOVREL_pseudo<rc, SSrc_b32>;
+class S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo<RegisterClass rc> :
+ S_INDIRECT_REG_WRITE_MOVREL_pseudo<rc, SSrc_b64>;
+
+def V_INDIRECT_REG_WRITE_MOVREL_B32_V1 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VGPR_32>;
+def V_INDIRECT_REG_WRITE_MOVREL_B32_V2 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_64>;
+def V_INDIRECT_REG_WRITE_MOVREL_B32_V3 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_96>;
+def V_INDIRECT_REG_WRITE_MOVREL_B32_V4 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_128>;
+def V_INDIRECT_REG_WRITE_MOVREL_B32_V5 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_160>;
+def V_INDIRECT_REG_WRITE_MOVREL_B32_V8 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_256>;
+def V_INDIRECT_REG_WRITE_MOVREL_B32_V16 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_512>;
+def V_INDIRECT_REG_WRITE_MOVREL_B32_V32 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_1024>;
+
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V1 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_32>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V2 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_64>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V3 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_96>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V4 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_128>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V5 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_160>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V8 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_256>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V16 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_512>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V32 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_1024>;
+
+def S_INDIRECT_REG_WRITE_MOVREL_B64_V1 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo<SReg_64>;
+def S_INDIRECT_REG_WRITE_MOVREL_B64_V2 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo<SReg_128>;
+def S_INDIRECT_REG_WRITE_MOVREL_B64_V4 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo<SReg_256>;
+def S_INDIRECT_REG_WRITE_MOVREL_B64_V8 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo<SReg_512>;
+def S_INDIRECT_REG_WRITE_MOVREL_B64_V16 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo<SReg_1024>;
+
+// These variants of V_INDIRECT_REG_READ/WRITE use VGPR indexing. By using these
+// pseudos we avoid spills or copies being inserted within indirect sequences
+// that switch the VGPR indexing mode. Spills to accvgprs could be effected by
+// this mode switching.
+
+class V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<RegisterClass rc> : PseudoInstSI <
+ (outs rc:$vdst), (ins rc:$vsrc, VSrc_b32:$val, SSrc_b32:$idx, i32imm:$subreg)> {
+ let Constraints = "$vsrc = $vdst";
+ let VALU = 1;
+ let Uses = [M0, EXEC];
+ let Defs = [M0];
+}
-def S_INDIRECT_REG_WRITE_B32_V1 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_32>;
-def S_INDIRECT_REG_WRITE_B32_V2 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_64>;
-def S_INDIRECT_REG_WRITE_B32_V3 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_96>;
-def S_INDIRECT_REG_WRITE_B32_V4 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_128>;
-def S_INDIRECT_REG_WRITE_B32_V5 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_160>;
-def S_INDIRECT_REG_WRITE_B32_V8 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_256>;
-def S_INDIRECT_REG_WRITE_B32_V16 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_512>;
-def S_INDIRECT_REG_WRITE_B32_V32 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_1024>;
+def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VGPR_32>;
+def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_64>;
+def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_96>;
+def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_128>;
+def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_160>;
+def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_256>;
+def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_512>;
+def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_1024>;
-def S_INDIRECT_REG_WRITE_B64_V1 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_64>;
-def S_INDIRECT_REG_WRITE_B64_V2 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_128>;
-def S_INDIRECT_REG_WRITE_B64_V4 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_256>;
-def S_INDIRECT_REG_WRITE_B64_V8 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_512>;
-def S_INDIRECT_REG_WRITE_B64_V16 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_1024>;
+class V_INDIRECT_REG_READ_GPR_IDX_pseudo<RegisterClass rc> : PseudoInstSI <
+ (outs VGPR_32:$vdst), (ins rc:$vsrc, SSrc_b32:$idx, i32imm:$subreg)> {
+ let VALU = 1;
+ let Uses = [M0, EXEC];
+ let Defs = [M0];
+}
+def V_INDIRECT_REG_READ_GPR_IDX_B32_V1 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VGPR_32>;
+def V_INDIRECT_REG_READ_GPR_IDX_B32_V2 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_64>;
+def V_INDIRECT_REG_READ_GPR_IDX_B32_V3 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_96>;
+def V_INDIRECT_REG_READ_GPR_IDX_B32_V4 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_128>;
+def V_INDIRECT_REG_READ_GPR_IDX_B32_V5 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_160>;
+def V_INDIRECT_REG_READ_GPR_IDX_B32_V8 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_256>;
+def V_INDIRECT_REG_READ_GPR_IDX_B32_V16 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_512>;
+def V_INDIRECT_REG_READ_GPR_IDX_B32_V32 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_1024>;
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in {
@@ -671,30 +658,33 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
-multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
+// VGPR or AGPR spill instructions. In case of AGPR spilling a temp register
+// needs to be used and an extra instruction to move between VGPR and AGPR.
+// UsesTmp adds to the total size of an expanded spill in this case.
+multiclass SI_SPILL_VGPR <RegisterClass vgpr_class, bit UsesTmp = 0> {
let UseNamedOperandTable = 1, VGPRSpill = 1,
SchedRW = [WriteVMEM] in {
def _SAVE : VPseudoInstSI <
(outs),
- (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc,
+ (ins vgpr_class:$vdata, i32imm:$vaddr,
SReg_32:$soffset, i32imm:$offset)> {
let mayStore = 1;
let mayLoad = 0;
// (2 * 4) + (8 * num_subregs) bytes maximum
- int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), !add(UsesTmp, 3)), 8);
// Size field is unsigned char and cannot fit more.
let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}
def _RESTORE : VPseudoInstSI <
(outs vgpr_class:$vdata),
- (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset,
- i32imm:$offset)> {
+ (ins i32imm:$vaddr,
+ SReg_32:$soffset, i32imm:$offset)> {
let mayStore = 0;
let mayLoad = 1;
// (2 * 4) + (8 * num_subregs) bytes maximum
- int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), !add(UsesTmp, 3)), 8);
// Size field is unsigned char and cannot fit more.
let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}
@@ -711,42 +701,15 @@ defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;
-multiclass SI_SPILL_AGPR <RegisterClass vgpr_class> {
- let UseNamedOperandTable = 1, VGPRSpill = 1,
- Constraints = "@earlyclobber $tmp",
- SchedRW = [WriteVMEM] in {
- def _SAVE : VPseudoInstSI <
- (outs VGPR_32:$tmp),
- (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc,
- SReg_32:$soffset, i32imm:$offset)> {
- let mayStore = 1;
- let mayLoad = 0;
- // (2 * 4) + (16 * num_subregs) bytes maximum
- int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
- // Size field is unsigned char and cannot fit more.
- let Size = !if(!le(MaxSize, 256), MaxSize, 252);
- }
-
- def _RESTORE : VPseudoInstSI <
- (outs vgpr_class:$vdata, VGPR_32:$tmp),
- (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset,
- i32imm:$offset)> {
- let mayStore = 0;
- let mayLoad = 1;
-
- // (2 * 4) + (16 * num_subregs) bytes maximum
- int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
- // Size field is unsigned char and cannot fit more.
- let Size = !if(!le(MaxSize, 256), MaxSize, 252);
- }
- } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
-}
-
-defm SI_SPILL_A32 : SI_SPILL_AGPR <AGPR_32>;
-defm SI_SPILL_A64 : SI_SPILL_AGPR <AReg_64>;
-defm SI_SPILL_A128 : SI_SPILL_AGPR <AReg_128>;
-defm SI_SPILL_A512 : SI_SPILL_AGPR <AReg_512>;
-defm SI_SPILL_A1024 : SI_SPILL_AGPR <AReg_1024>;
+defm SI_SPILL_A32 : SI_SPILL_VGPR <AGPR_32, 1>;
+defm SI_SPILL_A64 : SI_SPILL_VGPR <AReg_64, 1>;
+defm SI_SPILL_A96 : SI_SPILL_VGPR <AReg_96, 1>;
+defm SI_SPILL_A128 : SI_SPILL_VGPR <AReg_128, 1>;
+defm SI_SPILL_A160 : SI_SPILL_VGPR <AReg_160, 1>;
+defm SI_SPILL_A192 : SI_SPILL_VGPR <AReg_192, 1>;
+defm SI_SPILL_A256 : SI_SPILL_VGPR <AReg_256, 1>;
+defm SI_SPILL_A512 : SI_SPILL_VGPR <AReg_512, 1>;
+defm SI_SPILL_A1024 : SI_SPILL_VGPR <AReg_1024, 1>;
def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
(outs SReg_64:$dst),
@@ -768,7 +731,7 @@ def : GCNPat<
def : GCNPat<
(AMDGPUelse i1:$src, bb:$target),
- (SI_ELSE $src, $target, 0)
+ (SI_ELSE $src, $target)
>;
def : Pat <
@@ -804,12 +767,9 @@ def : Pat <
let OtherPredicates = [UnsafeFPMath] in {
-//def : RcpPat<V_RCP_F64_e32, f64>;
-//defm : RsqPat<V_RSQ_F64_e32, f64>;
//defm : RsqPat<V_RSQ_F32_e32, f32>;
def : RsqPat<V_RSQ_F32_e32, f32>;
-def : RsqPat<V_RSQ_F64_e32, f64>;
// Convert (x - floor(x)) to fract(x)
def : GCNPat <
@@ -889,7 +849,8 @@ def : GCNPat <
// VOP2 Patterns
//===----------------------------------------------------------------------===//
-// TODO: Check only no src2 mods?
+// NoMods pattern used for mac. If there are any source modifiers then it's
+// better to select mad instead of mac.
class FMADPat <ValueType vt, Instruction inst, SDPatternOperator node>
: GCNPat <(vt (node (vt (VOP3NoMods vt:$src0)),
(vt (VOP3NoMods vt:$src1)),
@@ -898,18 +859,41 @@ class FMADPat <ValueType vt, Instruction inst, SDPatternOperator node>
SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-
// Prefer mac form when there are no modifiers.
let AddedComplexity = 9 in {
+let OtherPredicates = [HasMadMacF32Insts] in {
def : FMADPat <f32, V_MAC_F32_e64, fmad>;
def : FMADPat <f32, V_MAC_F32_e64, AMDGPUfmad_ftz>;
+} // OtherPredicates = [HasMadMacF32Insts]
+
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select mad instead of mac.
+let SubtargetPredicate = isGFX6GFX7GFX10,
+ OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in
+def : GCNPat <
+ (f32 (fadd (AMDGPUfmul_legacy (VOP3NoMods f32:$src0),
+ (VOP3NoMods f32:$src1)),
+ (VOP3NoMods f32:$src2))),
+ (V_MAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
+ SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select fma instead of fmac.
+let SubtargetPredicate = HasFmaLegacy32 in
+def : GCNPat <
+ (f32 (int_amdgcn_fma_legacy (VOP3NoMods f32:$src0),
+ (VOP3NoMods f32:$src1),
+ (VOP3NoMods f32:$src2))),
+ (V_FMAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
+ SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
let SubtargetPredicate = Has16BitInsts in {
def : FMADPat <f16, V_MAC_F16_e64, fmad>;
def : FMADPat <f16, V_MAC_F16_e64, AMDGPUfmad_ftz>;
-}
-
-}
+} // SubtargetPredicate = Has16BitInsts
+} // AddedComplexity = 9
class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
: GCNPat<
@@ -920,11 +904,20 @@ class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-let SubtargetPredicate = HasMadMacF32Insts in
-def : FMADModsPat<f32, V_MAD_F32, AMDGPUfmad_ftz>;
-def : FMADModsPat<f16, V_MAD_F16, AMDGPUfmad_ftz> {
- let SubtargetPredicate = Has16BitInsts;
-}
+let OtherPredicates = [HasMadMacF32Insts] in
+def : FMADModsPat<f32, V_MAD_F32_e64, AMDGPUfmad_ftz>;
+
+let OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in
+def : GCNPat <
+ (f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod),
+ (VOP3Mods f32:$src1, i32:$src1_mod)),
+ (VOP3Mods f32:$src2, i32:$src2_mod))),
+ (V_MAD_LEGACY_F32_e64 $src0_mod, $src0, $src1_mod, $src1,
+ $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+let SubtargetPredicate = Has16BitInsts in
+def : FMADModsPat<f16, V_MAD_F16_e64, AMDGPUfmad_ftz>;
class VOPSelectModsPat <ValueType vt> : GCNPat <
(vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
@@ -1241,7 +1234,7 @@ class ClampPat<Instruction inst, ValueType vt> : GCNPat <
>;
def : ClampPat<V_MAX_F32_e64, f32>;
-def : ClampPat<V_MAX_F64, f64>;
+def : ClampPat<V_MAX_F64_e64, f64>;
def : ClampPat<V_MAX_F16_e64, f16>;
let SubtargetPredicate = HasVOP3PInsts in {
@@ -1422,12 +1415,12 @@ def : GCNPat <
def : GCNPat <
(fcopysign f16:$src0, f16:$src1),
- (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
>;
def : GCNPat <
(fcopysign f32:$src0, f16:$src1),
- (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), $src0,
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), $src0,
(V_LSHLREV_B32_e64 (i32 16), $src1))
>;
@@ -1435,19 +1428,19 @@ def : GCNPat <
(fcopysign f64:$src0, f16:$src1),
(REG_SEQUENCE SReg_64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
- (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)),
(V_LSHLREV_B32_e64 (i32 16), $src1)), sub1)
>;
def : GCNPat <
(fcopysign f16:$src0, f32:$src1),
- (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0,
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0,
(V_LSHRREV_B32_e64 (i32 16), $src1))
>;
def : GCNPat <
(fcopysign f16:$src0, f64:$src1),
- (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0,
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0,
(V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
>;
@@ -1499,8 +1492,13 @@ def : GCNPat <
>;
def : GCNPat <
- (i32 frameindex:$fi),
- (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi)))
+ (p5 frameindex:$fi),
+ (V_MOV_B32_e32 (p5 (frameindex_to_targetframeindex $fi)))
+>;
+
+def : GCNPat <
+ (p5 frameindex:$fi),
+ (S_MOV_B32 (p5 (frameindex_to_targetframeindex $fi)))
>;
def : GCNPat <
@@ -1565,19 +1563,103 @@ def : GCNPat <
// VOP3 Patterns
//===----------------------------------------------------------------------===//
-def : IMad24Pat<V_MAD_I32_I24, 1>;
-def : UMad24Pat<V_MAD_U32_U24, 1>;
+def : IMad24Pat<V_MAD_I32_I24_e64, 1>;
+def : UMad24Pat<V_MAD_U32_U24_e64, 1>;
+
+// BFI patterns
+
+def BFIImm32 : PatFrag<
+ (ops node:$x, node:$y, node:$z),
+ (i32 (DivergentBinFrag<or> (and node:$y, node:$x), (and node:$z, imm))),
+ [{
+ auto *X = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1));
+ auto *NotX = dyn_cast<ConstantSDNode>(N->getOperand(1)->getOperand(1));
+ return X && NotX &&
+ ~(unsigned)X->getZExtValue() == (unsigned)NotX->getZExtValue();
+ }]
+>;
+
+// Definition from ISA doc:
+// (y & x) | (z & ~x)
+def : AMDGPUPat <
+ (DivergentBinFrag<or> (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
+ (V_BFI_B32_e64 $x, $y, $z)
+>;
+
+// (y & C) | (z & ~C)
+def : AMDGPUPat <
+ (BFIImm32 i32:$x, i32:$y, i32:$z),
+ (V_BFI_B32_e64 $x, $y, $z)
+>;
+
+// 64-bit version
+def : AMDGPUPat <
+ (DivergentBinFrag<or> (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
+ (REG_SEQUENCE SReg_64,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG SReg_64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG SReg_64:$z, sub0))), sub0,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG SReg_64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG SReg_64:$z, sub1))), sub1)
+>;
+
+// SHA-256 Ch function
+// z ^ (x & (y ^ z))
+def : AMDGPUPat <
+ (DivergentBinFrag<xor> i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
+ (V_BFI_B32_e64 $x, $y, $z)
+>;
-// FIXME: This should only be done for VALU inputs
-defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
-def : ROTRPattern <V_ALIGNBIT_B32>;
+// 64-bit version
+def : AMDGPUPat <
+ (DivergentBinFrag<xor> i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
+ (REG_SEQUENCE SReg_64,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG SReg_64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG SReg_64:$z, sub0))), sub0,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG SReg_64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG SReg_64:$z, sub1))), sub1)
+>;
+
+def : AMDGPUPat <
+ (fcopysign f32:$src0, f32:$src1),
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), $src0, $src1)
+>;
+
+def : AMDGPUPat <
+ (fcopysign f32:$src0, f64:$src1),
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), $src0,
+ (i32 (EXTRACT_SUBREG SReg_64:$src1, sub1)))
+>;
+
+def : AMDGPUPat <
+ (fcopysign f64:$src0, f64:$src1),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)),
+ (i32 (EXTRACT_SUBREG SReg_64:$src0, sub1)),
+ (i32 (EXTRACT_SUBREG SReg_64:$src1, sub1))), sub1)
+>;
+
+def : AMDGPUPat <
+ (fcopysign f64:$src0, f32:$src1),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)),
+ (i32 (EXTRACT_SUBREG SReg_64:$src0, sub1)),
+ $src1), sub1)
+>;
+
+def : ROTRPattern <V_ALIGNBIT_B32_e64>;
def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
- (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+ (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
- (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+ (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
/********** ====================== **********/
@@ -1618,7 +1700,7 @@ def : GCNPat <
(add (sub_oneuse (umax i32:$src0, i32:$src1),
(umin i32:$src0, i32:$src1)),
i32:$src2),
- (V_SAD_U32 $src0, $src1, $src2, (i1 0))
+ (V_SAD_U32_e64 $src0, $src1, $src2, (i1 0))
>;
def : GCNPat <
@@ -1626,7 +1708,7 @@ def : GCNPat <
(sub i32:$src0, i32:$src1),
(sub i32:$src1, i32:$src0)),
i32:$src2),
- (V_SAD_U32 $src0, $src1, $src2, (i1 0))
+ (V_SAD_U32_e64 $src0, $src1, $src2, (i1 0))
>;
//===----------------------------------------------------------------------===//
@@ -1877,9 +1959,9 @@ def : GCNPat <
def : GCNPat <
(i32 (bswap i32:$a)),
- (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
- (V_ALIGNBIT_B32 VSrc_b32:$a, VSrc_b32:$a, (i32 24)),
- (V_ALIGNBIT_B32 VSrc_b32:$a, VSrc_b32:$a, (i32 8)))
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
+ (V_ALIGNBIT_B32_e64 VSrc_b32:$a, VSrc_b32:$a, (i32 24)),
+ (V_ALIGNBIT_B32_e64 VSrc_b32:$a, VSrc_b32:$a, (i32 8)))
>;
// FIXME: This should have been narrowed to i32 during legalization.
@@ -1887,19 +1969,19 @@ def : GCNPat <
def : GCNPat <
(i64 (bswap i64:$a)),
(REG_SEQUENCE VReg_64,
- (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
- (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
+ (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
(i32 24)),
- (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+ (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
(i32 8))),
sub0,
- (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
- (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
+ (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
(i32 24)),
- (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+ (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
(i32 8))),
sub1)
@@ -1914,7 +1996,7 @@ let SubtargetPredicate = isGFX8Plus, AddedComplexity = 1 in {
// register value, but this is what seems to work.
def : GCNPat <
(i32 (bswap i32:$a)),
- (V_PERM_B32 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x00010203)))
+ (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x00010203)))
>;
// FIXME: This should have been narrowed to i32 during legalization.
@@ -1922,10 +2004,10 @@ def : GCNPat <
def : GCNPat <
(i64 (bswap i64:$a)),
(REG_SEQUENCE VReg_64,
- (V_PERM_B32 (i32 0), (EXTRACT_SUBREG VReg_64:$a, sub1),
+ (V_PERM_B32_e64 (i32 0), (EXTRACT_SUBREG VReg_64:$a, sub1),
(S_MOV_B32 (i32 0x00010203))),
sub0,
- (V_PERM_B32 (i32 0), (EXTRACT_SUBREG VReg_64:$a, sub0),
+ (V_PERM_B32_e64 (i32 0), (EXTRACT_SUBREG VReg_64:$a, sub0),
(S_MOV_B32 (i32 0x00010203))),
sub1)
>;
@@ -1934,18 +2016,18 @@ def : GCNPat <
// The 12s emit 0s.
def : GCNPat <
(i16 (bswap i16:$a)),
- (V_PERM_B32 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
+ (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
>;
def : GCNPat <
(i32 (zext (bswap i16:$a))),
- (V_PERM_B32 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
+ (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
>;
// Magic number: 1 | (0 << 8) | (3 << 16) | (2 << 24)
def : GCNPat <
(v2i16 (bswap v2i16:$a)),
- (V_PERM_B32 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x02030001)))
+ (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x02030001)))
>;
}
@@ -1981,7 +2063,7 @@ def : GCNPat<
// TODO: Handle fneg like other types.
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
- (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src)
+ (V_MUL_F64_e64 0, CONST.FP64_ONE, $src_mods, $src)
>;
} // End AddedComplexity = -5
@@ -1997,7 +2079,7 @@ multiclass SelectCanonicalizeAsMax<
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
- (V_MAX_F64 $src_mods, $src, $src_mods, $src)> {
+ (V_MAX_F64_e64 $src_mods, $src, $src_mods, $src)> {
let OtherPredicates = f64_preds;
}
@@ -2059,14 +2141,22 @@ def : GCNPat <
SRCMODS.NONE, $src2)
>;
-// COPY is workaround tablegen bug from multiple outputs
-// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <
(v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))),
(S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
def : GCNPat <
+ (v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))),
+ (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+>;
+
+def : GCNPat <
+ (v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))),
+ (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+>;
+
+def : GCNPat <
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
>;
@@ -2177,12 +2267,12 @@ let SubtargetPredicate = isGFX6 in {
// FIXME: DAG should also custom lower this.
def : GCNPat <
(f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
- (V_ADD_F64
+ (V_ADD_F64_e64
$mods,
$x,
SRCMODS.NEG,
(V_CNDMASK_B64_PSEUDO
- (V_MIN_F64
+ (V_MIN_F64_e64
SRCMODS.NONE,
(V_FRACT_F64_e64 $mods, $x),
SRCMODS.NONE,
@@ -2213,7 +2303,7 @@ def : GCNPat<
def : GCNPat<
(add i32:$src0, (i32 NegSubInlineConst32:$src1)),
- (V_SUB_I32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
+ (V_SUB_CO_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
let SubtargetPredicate = NotHasAddNoCarryInsts;
}
@@ -2241,8 +2331,77 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
-defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
-defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;
+// Bitfield extract patterns
+
+def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
+ return isMask_32(Imm);
+}]>;
+
+def IMMPopCount : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
+ MVT::i32);
+}]>;
+
+def : AMDGPUPat <
+ (DivergentBinFrag<and> (i32 (srl i32:$src, i32:$rshift)),
+ IMMZeroBasedBitfieldMask:$mask),
+ (V_BFE_U32_e64 $src, $rshift, (i32 (IMMPopCount $mask)))
+>;
+
+// x & ((1 << y) - 1)
+def : AMDGPUPat <
+ (DivergentBinFrag<and> i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
+ (V_BFE_U32_e64 $src, (i32 0), $width)
+>;
+
+// x & ~(-1 << y)
+def : AMDGPUPat <
+ (DivergentBinFrag<and> i32:$src,
+ (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
+ (V_BFE_U32_e64 $src, (i32 0), $width)
+>;
+
+// x & (-1 >> (bitwidth - y))
+def : AMDGPUPat <
+ (DivergentBinFrag<and> i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
+ (V_BFE_U32_e64 $src, (i32 0), $width)
+>;
+
+// x << (bitwidth - y) >> (bitwidth - y)
+def : AMDGPUPat <
+ (DivergentBinFrag<srl> (shl_oneuse i32:$src, (sub 32, i32:$width)),
+ (sub 32, i32:$width)),
+ (V_BFE_U32_e64 $src, (i32 0), $width)
+>;
+
+def : AMDGPUPat <
+ (DivergentBinFrag<sra> (shl_oneuse i32:$src, (sub 32, i32:$width)),
+ (sub 32, i32:$width)),
+ (V_BFE_I32_e64 $src, (i32 0), $width)
+>;
+
+// SHA-256 Ma patterns
+
+// ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y
+def : AMDGPUPat <
+ (DivergentBinFrag<or> (and i32:$x, i32:$z),
+ (and i32:$y, (or i32:$x, i32:$z))),
+ (V_BFI_B32_e64 (V_XOR_B32_e64 i32:$x, i32:$y), i32:$z, i32:$y)
+>;
+
+def : AMDGPUPat <
+ (DivergentBinFrag<or> (and i64:$x, i64:$z),
+ (and i64:$y, (or i64:$x, i64:$z))),
+ (REG_SEQUENCE SReg_64,
+ (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG SReg_64:$y, sub0))),
+ (i32 (EXTRACT_SUBREG SReg_64:$z, sub0)),
+ (i32 (EXTRACT_SUBREG SReg_64:$y, sub0))), sub0,
+ (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG SReg_64:$y, sub1))),
+ (i32 (EXTRACT_SUBREG SReg_64:$z, sub1)),
+ (i32 (EXTRACT_SUBREG SReg_64:$y, sub1))), sub1)
+>;
multiclass IntMed3Pat<Instruction med3Inst,
SDPatternOperator min,
@@ -2267,8 +2426,8 @@ multiclass IntMed3Pat<Instruction med3Inst,
>;
}
-defm : IntMed3Pat<V_MED3_I32, smin, smax, smin_oneuse, smax_oneuse>;
-defm : IntMed3Pat<V_MED3_U32, umin, umax, umin_oneuse, umax_oneuse>;
+defm : IntMed3Pat<V_MED3_I32_e64, smin, smax, smin_oneuse, smax_oneuse>;
+defm : IntMed3Pat<V_MED3_U32_e64, umin, umax, umin_oneuse, umax_oneuse>;
// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
@@ -2315,12 +2474,12 @@ multiclass Int16Med3Pat<Instruction med3Inst,
>;
}
-def : FPMed3Pat<f32, V_MED3_F32>;
+def : FPMed3Pat<f32, V_MED3_F32_e64>;
let OtherPredicates = [isGFX9Plus] in {
-def : FP16Med3Pat<f16, V_MED3_F16>;
-defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
-defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
+def : FP16Med3Pat<f16, V_MED3_F16_e64>;
+defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, smax_oneuse, smin_oneuse>;
+defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax, umax_oneuse, umin_oneuse>;
} // End Predicates = [isGFX9Plus]
class AMDGPUGenericInstruction : GenericInstruction {
@@ -2428,10 +2587,12 @@ def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction {
let Namespace = "AMDGPU" in {
def G_AMDGPU_ATOMIC_INC : G_ATOMICRMW_OP;
def G_AMDGPU_ATOMIC_DEC : G_ATOMICRMW_OP;
+def G_AMDGPU_ATOMIC_FMIN : G_ATOMICRMW_OP;
+def G_AMDGPU_ATOMIC_FMAX : G_ATOMICRMW_OP;
}
-class BufferAtomicGenericInstruction : AMDGPUGenericInstruction {
- let OutOperandList = (outs type0:$dst);
+class BufferAtomicGenericInstruction<bit NoRtn = 0> : AMDGPUGenericInstruction {
+ let OutOperandList = !if(NoRtn, (outs), (outs type0:$dst));
let InOperandList = (ins type0:$vdata, type1:$rsrc, type2:$vindex, type2:$voffset,
type2:$soffset, untyped_imm_0:$offset,
untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen);
@@ -2452,6 +2613,7 @@ def G_AMDGPU_BUFFER_ATOMIC_OR : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_XOR : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_INC : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_DEC : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_FADD : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_CMPSWAP : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
@@ -2494,3 +2656,11 @@ def G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPUGenericInstruction {
let hasSideEffects = 0;
let mayStore = 1;
}
+
+def G_AMDGPU_INTRIN_BVH_INTERSECT_RAY : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins unknown:$intrin, variable_ops);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 2eb1c52f1b59..b39420f3c7db 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -58,33 +58,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <iterator>
-#include <utility>
using namespace llvm;
@@ -171,7 +149,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
return false;
// TODO: We should be able to merge physical reg addreses.
- if (Register::isPhysicalRegister(AddrOp->getReg()))
+ if (AddrOp->getReg().isPhysical())
return false;
// If an address has only one use then there will be on other
@@ -393,6 +371,15 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::DS_WRITE_B64:
case AMDGPU::DS_WRITE_B64_gfx9:
return DS_WRITE;
+ case AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa:
+ case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa:
+ case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa:
+ case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa:
+ case AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa:
+ case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa:
+ case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa:
+ case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa:
+ return UNKNOWN;
}
}
@@ -604,7 +591,7 @@ static void addDefsUsesToList(const MachineInstr &MI,
if (Op.isReg()) {
if (Op.isDef())
RegDefs.insert(Op.getReg());
- else if (Op.readsReg() && Register::isPhysicalRegister(Op.getReg()))
+ else if (Op.readsReg() && Op.getReg().isPhysical())
PhysRegUses.insert(Op.getReg());
}
}
@@ -633,11 +620,10 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<Register> &RegDefs,
// be moved for merging, then we need to move the def-instruction as well.
// This can only happen for physical registers such as M0; virtual
// registers are in SSA form.
- if (Use.isReg() &&
- ((Use.readsReg() && RegDefs.count(Use.getReg())) ||
- (Use.isDef() && RegDefs.count(Use.getReg())) ||
- (Use.isDef() && Register::isPhysicalRegister(Use.getReg()) &&
- PhysRegUses.count(Use.getReg())))) {
+ if (Use.isReg() && ((Use.readsReg() && RegDefs.count(Use.getReg())) ||
+ (Use.isDef() && RegDefs.count(Use.getReg())) ||
+ (Use.isDef() && Use.getReg().isPhysical() &&
+ PhysRegUses.count(Use.getReg())))) {
Insts.push_back(&MI);
addDefsUsesToList(MI, RegDefs, PhysRegUses);
return true;
@@ -1667,7 +1653,7 @@ Register SILoadStoreOptimizer::computeBase(MachineInstr &MI,
Register DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MachineInstr *LoHalf =
- BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
+ BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_CO_U32_e64), DestSub0)
.addReg(CarryReg, RegState::Define)
.addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
.add(OffsetLo)
@@ -1730,7 +1716,7 @@ SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) const {
// Expecting base computation as:
// %OFFSET0:sgpr_32 = S_MOV_B32 8000
// %LO:vgpr_32, %c:sreg_64_xexec =
-// V_ADD_I32_e64 %BASE_LO:vgpr_32, %103:sgpr_32,
+// V_ADD_CO_U32_e64 %BASE_LO:vgpr_32, %103:sgpr_32,
// %HI:vgpr_32, = V_ADDC_U32_e64 %BASE_HI:vgpr_32, 0, killed %c:sreg_64_xexec
// %Base:vreg_64 =
// REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1
@@ -1752,7 +1738,7 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base
MachineInstr *BaseLoDef = MRI->getUniqueVRegDef(BaseLo.getReg());
MachineInstr *BaseHiDef = MRI->getUniqueVRegDef(BaseHi.getReg());
- if (!BaseLoDef || BaseLoDef->getOpcode() != AMDGPU::V_ADD_I32_e64 ||
+ if (!BaseLoDef || BaseLoDef->getOpcode() != AMDGPU::V_ADD_CO_U32_e64 ||
!BaseHiDef || BaseHiDef->getOpcode() != AMDGPU::V_ADDC_U32_e64)
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 36d52ac3ee89..5839e59b4d7f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -48,28 +48,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
-#include <cassert>
-#include <iterator>
using namespace llvm;
@@ -99,6 +82,7 @@ private:
unsigned MovTermOpc;
unsigned Andn2TermOpc;
unsigned XorTermrOpc;
+ unsigned OrTermrOpc;
unsigned OrSaveExecOpc;
unsigned Exec;
@@ -106,14 +90,19 @@ private:
void emitElse(MachineInstr &MI);
void emitIfBreak(MachineInstr &MI);
void emitLoop(MachineInstr &MI);
- void emitEndCf(MachineInstr &MI);
+
+ MachineBasicBlock *emitEndCf(MachineInstr &MI);
+
+ void lowerInitExec(MachineBasicBlock *MBB, MachineInstr &MI);
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const;
void combineMasks(MachineInstr &MI);
- void process(MachineInstr &MI);
+ bool removeMBBifRedundant(MachineBasicBlock &MBB);
+
+ MachineBasicBlock *process(MachineInstr &MI);
// Skip to the next instruction, ignoring debug instructions, and trivial
// block boundaries (blocks that have one (typically fallthrough) successor,
@@ -122,6 +111,19 @@ private:
skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB,
MachineBasicBlock::iterator It) const;
+ /// Find the insertion point for a new conditional branch.
+ MachineBasicBlock::iterator
+ skipToUncondBrOrEnd(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ assert(I->isTerminator());
+
+ // FIXME: What if we had multiple pre-existing conditional branches?
+ MachineBasicBlock::iterator End = MBB.end();
+ while (I != End && !I->isUnconditionalBranch())
+ ++I;
+ return I;
+ }
+
// Remove redundant SI_END_CF instructions.
void optimizeEndCf();
@@ -141,9 +143,6 @@ public:
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
AU.addPreservedID(LiveVariablesID);
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -167,8 +166,7 @@ char &llvm::SILowerControlFlowID = SILowerControlFlow::ID;
static bool hasKill(const MachineBasicBlock *Begin,
const MachineBasicBlock *End, const SIInstrInfo *TII) {
DenseSet<const MachineBasicBlock*> Visited;
- SmallVector<MachineBasicBlock *, 4> Worklist(Begin->succ_begin(),
- Begin->succ_end());
+ SmallVector<MachineBasicBlock *, 4> Worklist(Begin->successors());
while (!Worklist.empty()) {
MachineBasicBlock *MBB = Worklist.pop_back_val();
@@ -275,6 +273,10 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
.addReg(Tmp, RegState::Kill);
+ // Skip ahead to the unconditional branch in case there are other terminators
+ // present.
+ I = skipToUncondBrOrEnd(MBB, I);
+
// Insert the S_CBRANCH_EXECZ instruction which will be optimized later
// during SIRemoveShortExecBranches.
MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
@@ -315,44 +317,37 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
- bool ExecModified = MI.getOperand(3).getImm() != 0;
MachineBasicBlock::iterator Start = MBB.begin();
- // We are running before TwoAddressInstructions, and si_else's operands are
- // tied. In order to correctly tie the registers, split this into a copy of
- // the src like it does.
- Register CopyReg = MRI->createVirtualRegister(BoolRC);
- MachineInstr *CopyExec =
- BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
- .add(MI.getOperand(1)); // Saved EXEC
-
// This must be inserted before phis and any spill code inserted before the
// else.
- Register SaveReg = ExecModified ?
- MRI->createVirtualRegister(BoolRC) : DstReg;
+ Register SaveReg = MRI->createVirtualRegister(BoolRC);
MachineInstr *OrSaveExec =
BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
- .addReg(CopyReg);
+ .add(MI.getOperand(1)); // Saved EXEC
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
MachineBasicBlock::iterator ElsePt(MI);
- if (ExecModified) {
- MachineInstr *And =
- BuildMI(MBB, ElsePt, DL, TII->get(AndOpc), DstReg)
- .addReg(Exec)
- .addReg(SaveReg);
+ // This accounts for any modification of the EXEC mask within the block and
+ // can be optimized out pre-RA when not required.
+ MachineInstr *And = BuildMI(MBB, ElsePt, DL, TII->get(AndOpc), DstReg)
+ .addReg(Exec)
+ .addReg(SaveReg);
- if (LIS)
- LIS->InsertMachineInstrInMaps(*And);
- }
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*And);
MachineInstr *Xor =
BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec)
.addReg(Exec)
.addReg(DstReg);
+ // Skip ahead to the unconditional branch in case there are other terminators
+ // present.
+ ElsePt = skipToUncondBrOrEnd(MBB, ElsePt);
+
MachineInstr *Branch =
BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
.addMBB(DestBB);
@@ -365,18 +360,14 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
LIS->RemoveMachineInstrFromMaps(MI);
MI.eraseFromParent();
- LIS->InsertMachineInstrInMaps(*CopyExec);
LIS->InsertMachineInstrInMaps(*OrSaveExec);
LIS->InsertMachineInstrInMaps(*Xor);
LIS->InsertMachineInstrInMaps(*Branch);
- // src reg is tied to dst reg.
LIS->removeInterval(DstReg);
LIS->createAndComputeVirtRegInterval(DstReg);
- LIS->createAndComputeVirtRegInterval(CopyReg);
- if (ExecModified)
- LIS->createAndComputeVirtRegInterval(SaveReg);
+ LIS->createAndComputeVirtRegInterval(SaveReg);
// Let this be recomputed.
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
@@ -435,8 +426,9 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
.addReg(Exec)
.add(MI.getOperand(0));
+ auto BranchPt = skipToUncondBrOrEnd(MBB, MI.getIterator());
MachineInstr *Branch =
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ BuildMI(MBB, BranchPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.add(MI.getOperand(1));
if (LIS) {
@@ -479,19 +471,37 @@ SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
} while (true);
}
-void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
+MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- unsigned CFMask = MI.getOperand(0).getReg();
- MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
const DebugLoc &DL = MI.getDebugLoc();
- MachineBasicBlock::iterator InsPt =
- Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
- : MBB.begin();
- MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
- .addReg(Exec)
- .add(MI.getOperand(0));
+ MachineBasicBlock::iterator InsPt = MBB.begin();
+
+ // If we have instructions that aren't prolog instructions, split the block
+ // and emit a terminator instruction. This ensures correct spill placement.
+ // FIXME: We should unconditionally split the block here.
+ bool NeedBlockSplit = false;
+ Register DataReg = MI.getOperand(0).getReg();
+ for (MachineBasicBlock::iterator I = InsPt, E = MI.getIterator();
+ I != E; ++I) {
+ if (I->modifiesRegister(DataReg, TRI)) {
+ NeedBlockSplit = true;
+ break;
+ }
+ }
+
+ unsigned Opcode = OrOpc;
+ MachineBasicBlock *SplitBB = &MBB;
+ if (NeedBlockSplit) {
+ SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/true, LIS);
+ Opcode = OrTermrOpc;
+ InsPt = MI;
+ }
+
+ MachineInstr *NewMI =
+ BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec)
+ .addReg(Exec)
+ .add(MI.getOperand(0));
LoweredEndCf.insert(NewMI);
@@ -512,6 +522,7 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
if (LIS)
LIS->handleMove(*NewMI);
+ return SplitBB;
}
// Returns replace operands for a logical operation, either single result
@@ -519,7 +530,7 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const {
MachineOperand &Op = MI.getOperand(OpNo);
- if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) {
+ if (!Op.isReg() || !Op.getReg().isVirtual()) {
Src.push_back(Op);
return;
}
@@ -539,7 +550,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
for (const auto &SrcOp : Def->explicit_operands())
if (SrcOp.isReg() && SrcOp.isUse() &&
- (Register::isVirtualRegister(SrcOp.getReg()) || SrcOp.getReg() == Exec))
+ (SrcOp.getReg().isVirtual() || SrcOp.getReg() == Exec))
Src.push_back(SrcOp);
}
@@ -593,15 +604,18 @@ void SILowerControlFlow::optimizeEndCf() {
if (LIS)
LIS->RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
+ removeMBBifRedundant(MBB);
}
}
}
-void SILowerControlFlow::process(MachineInstr &MI) {
+MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
MachineBasicBlock::iterator I(MI);
MachineInstr *Prev = (I != MBB.begin()) ? &*(std::prev(I)) : nullptr;
+ MachineBasicBlock *SplitBB = &MBB;
+
switch (MI.getOpcode()) {
case AMDGPU::SI_IF:
emitIf(MI);
@@ -620,7 +634,7 @@ void SILowerControlFlow::process(MachineInstr &MI) {
break;
case AMDGPU::SI_END_CF:
- emitEndCf(MI);
+ SplitBB = emitEndCf(MI);
break;
default:
@@ -645,6 +659,147 @@ void SILowerControlFlow::process(MachineInstr &MI) {
break;
}
}
+
+ return SplitBB;
+}
+
+void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
+ MachineInstr &MI) {
+ MachineFunction &MF = *MBB->getParent();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ bool IsWave32 = ST.isWave32();
+
+ if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
+ // This should be before all vector instructions.
+ BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
+ TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
+ .addImm(MI.getOperand(0).getImm());
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ return;
+ }
+
+ // Extract the thread count from an SGPR input and set EXEC accordingly.
+ // Since BFM can't shift by 64, handle that case with CMP + CMOV.
+ //
+ // S_BFE_U32 count, input, {shift, 7}
+ // S_BFM_B64 exec, count, 0
+ // S_CMP_EQ_U32 count, 64
+ // S_CMOV_B64 exec, -1
+ Register InputReg = MI.getOperand(0).getReg();
+ MachineInstr *FirstMI = &*MBB->begin();
+ if (InputReg.isVirtual()) {
+ MachineInstr *DefInstr = MRI->getVRegDef(InputReg);
+ assert(DefInstr && DefInstr->isCopy());
+ if (DefInstr->getParent() == MBB) {
+ if (DefInstr != FirstMI) {
+ // If the `InputReg` is defined in current block, we also need to
+ // move that instruction to the beginning of the block.
+ DefInstr->removeFromParent();
+ MBB->insert(FirstMI, DefInstr);
+ if (LIS)
+ LIS->handleMove(*DefInstr);
+ } else {
+ // If first instruction is definition then move pointer after it.
+ FirstMI = &*std::next(FirstMI->getIterator());
+ }
+ }
+ }
+
+ // Insert instruction sequence at block beginning (before vector operations).
+ const DebugLoc DL = MI.getDebugLoc();
+ const unsigned WavefrontSize = ST.getWavefrontSize();
+ const unsigned Mask = (WavefrontSize << 1) - 1;
+ Register CountReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)
+ .addReg(InputReg)
+ .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
+ auto BfmMI =
+ BuildMI(*MBB, FirstMI, DL,
+ TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
+ .addReg(CountReg)
+ .addImm(0);
+ auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
+ .addReg(CountReg, RegState::Kill)
+ .addImm(WavefrontSize);
+ auto CmovMI =
+ BuildMI(*MBB, FirstMI, DL,
+ TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
+ Exec)
+ .addImm(-1);
+
+ if (!LIS) {
+ MI.eraseFromParent();
+ return;
+ }
+
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+
+ LIS->InsertMachineInstrInMaps(*BfeMI);
+ LIS->InsertMachineInstrInMaps(*BfmMI);
+ LIS->InsertMachineInstrInMaps(*CmpMI);
+ LIS->InsertMachineInstrInMaps(*CmovMI);
+
+ LIS->removeInterval(InputReg);
+ LIS->createAndComputeVirtRegInterval(InputReg);
+ LIS->createAndComputeVirtRegInterval(CountReg);
+}
+
+bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
+ auto GetFallThroughSucc = [=](MachineBasicBlock *B) -> MachineBasicBlock * {
+ auto *S = B->getNextNode();
+ if (!S)
+ return nullptr;
+ if (B->isSuccessor(S)) {
+ // The only fallthrough candidate
+ MachineBasicBlock::iterator I(B->getFirstInstrTerminator());
+ MachineBasicBlock::iterator E = B->end();
+ for (; I != E; I++) {
+ if (I->isBranch() && TII->getBranchDestBlock(*I) == S)
+ // We have unoptimized branch to layout successor
+ return nullptr;
+ }
+ }
+ return S;
+ };
+
+ for (auto &I : MBB.instrs()) {
+ if (!I.isDebugInstr() && !I.isUnconditionalBranch())
+ return false;
+ }
+
+ assert(MBB.succ_size() == 1 && "MBB has more than one successor");
+
+ MachineBasicBlock *Succ = *MBB.succ_begin();
+ MachineBasicBlock *FallThrough = nullptr;
+
+ while (!MBB.predecessors().empty()) {
+ MachineBasicBlock *P = *MBB.pred_begin();
+ if (GetFallThroughSucc(P) == &MBB)
+ FallThrough = P;
+ P->ReplaceUsesOfBlockWith(&MBB, Succ);
+ }
+ MBB.removeSuccessor(Succ);
+ if (LIS) {
+ for (auto &I : MBB.instrs())
+ LIS->RemoveMachineInstrFromMaps(I);
+ }
+ MBB.clear();
+ MBB.eraseFromParent();
+ if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
+ if (!GetFallThroughSucc(Succ)) {
+ MachineFunction *MF = FallThrough->getParent();
+ MachineFunction::iterator FallThroughPos(FallThrough);
+ MF->splice(std::next(FallThroughPos), Succ);
+ } else
+ BuildMI(*FallThrough, FallThrough->end(),
+ FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+ .addMBB(Succ);
+ }
+
+ return true;
}
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
@@ -666,6 +821,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
MovTermOpc = AMDGPU::S_MOV_B32_term;
Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
XorTermrOpc = AMDGPU::S_XOR_B32_term;
+ OrTermrOpc = AMDGPU::S_OR_B32_term;
OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
Exec = AMDGPU::EXEC_LO;
} else {
@@ -675,6 +831,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
MovTermOpc = AMDGPU::S_MOV_B64_term;
Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
XorTermrOpc = AMDGPU::S_XOR_B64_term;
+ OrTermrOpc = AMDGPU::S_OR_B64_term;
OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
Exec = AMDGPU::EXEC;
}
@@ -682,19 +839,21 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineInstr *, 32> Worklist;
MachineFunction::iterator NextBB;
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; BI = NextBB) {
+ for (MachineFunction::iterator BI = MF.begin();
+ BI != MF.end(); BI = NextBB) {
NextBB = std::next(BI);
- MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock *MBB = &*BI;
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ MachineBasicBlock::iterator I, E, Next;
+ E = MBB->end();
+ for (I = MBB->begin(); I != E; I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
+ MachineBasicBlock *SplitMBB = MBB;
switch (MI.getOpcode()) {
case AMDGPU::SI_IF:
- process(MI);
+ SplitMBB = process(MI);
break;
case AMDGPU::SI_ELSE:
@@ -705,12 +864,25 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
if (InsertKillCleanups)
Worklist.push_back(&MI);
else
- process(MI);
+ SplitMBB = process(MI);
+ break;
+
+ // FIXME: find a better place for this
+ case AMDGPU::SI_INIT_EXEC:
+ case AMDGPU::SI_INIT_EXEC_FROM_INPUT:
+ lowerInitExec(MBB, MI);
+ if (LIS)
+ LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
break;
default:
break;
}
+
+ if (SplitMBB != MBB) {
+ MBB = Next->getParent();
+ E = MBB->end();
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 236a24a02ece..9570680ad9cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -22,20 +22,13 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-i1-copies"
@@ -89,16 +82,15 @@ private:
void lowerCopiesFromI1();
void lowerPhis();
void lowerCopiesToI1();
- bool isConstantLaneMask(unsigned Reg, bool &Val) const;
+ bool isConstantLaneMask(Register Reg, bool &Val) const;
void buildMergeLaneMasks(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
unsigned DstReg, unsigned PrevReg, unsigned CurReg);
MachineBasicBlock::iterator
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
- bool isVreg1(unsigned Reg) const {
- return Register::isVirtualRegister(Reg) &&
- MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
+ bool isVreg1(Register Reg) const {
+ return Reg.isVirtual() && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
}
bool isLaneMaskReg(unsigned Reg) const {
@@ -185,10 +177,8 @@ public:
}
}
- if (Divergent && PDT.dominates(&DefBlock, MBB)) {
- for (MachineBasicBlock *Succ : MBB->successors())
- Stack.push_back(Succ);
- }
+ if (Divergent && PDT.dominates(&DefBlock, MBB))
+ append_range(Stack, MBB->successors());
}
while (!Stack.empty()) {
@@ -197,8 +187,7 @@ public:
continue;
ReachableOrdered.push_back(MBB);
- for (MachineBasicBlock *Succ : MBB->successors())
- Stack.push_back(Succ);
+ append_range(Stack, MBB->successors());
}
for (MachineBasicBlock *MBB : ReachableOrdered) {
@@ -214,7 +203,7 @@ public:
ReachableMap[MBB] = true;
if (HaveReachablePred) {
for (MachineBasicBlock *UnreachablePred : Stack) {
- if (llvm::find(Predecessors, UnreachablePred) == Predecessors.end())
+ if (!llvm::is_contained(Predecessors, UnreachablePred))
Predecessors.push_back(UnreachablePred);
}
}
@@ -348,7 +337,7 @@ private:
if (DomIt != Visited.end() && DomIt->second <= LoopLevel)
return true;
- if (llvm::find(Blocks, &MBB) != Blocks.end())
+ if (llvm::is_contained(Blocks, &MBB))
return true;
return false;
@@ -658,7 +647,7 @@ void SILowerI1Copies::lowerPhis() {
}
}
- unsigned NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
+ Register NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
if (NewReg != DstReg) {
MRI->replaceRegWith(NewReg, DstReg);
MI->eraseFromParent();
@@ -703,8 +692,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
Register SrcReg = MI.getOperand(1).getReg();
assert(!MI.getOperand(1).getSubReg());
- if (!Register::isVirtualRegister(SrcReg) ||
- (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
+ if (!SrcReg.isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
unsigned TmpReg = createLaneMaskReg(*MF);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
@@ -740,7 +728,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
}
}
-bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const {
+bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const {
const MachineInstr *MI;
for (;;) {
MI = MRI->getUniqueVRegDef(Reg);
@@ -748,7 +736,7 @@ bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const {
break;
Reg = MI->getOperand(1).getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return false;
if (!isLaneMaskReg(Reg))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 1349d3b6bf3f..30405059530e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -16,19 +16,12 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -97,7 +90,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CS : CSI) {
// Insert the spill to the stack frame.
- unsigned Reg = CS.getReg();
+ MCRegister Reg = CS.getReg();
MachineInstrSpan MIS(I, &SaveBlock);
const TargetRegisterClass *RC =
@@ -184,6 +177,16 @@ void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
}
}
+// TODO: To support shrink wrapping, this would need to copy
+// PrologEpilogInserter's updateLiveness.
+static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
+ MachineBasicBlock &EntryBB = MF.front();
+
+ for (const CalleeSavedInfo &CSIReg : CSI)
+ EntryBB.addLiveIn(CSIReg.getReg());
+ EntryBB.sortUniqueLiveIns();
+}
+
bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
@@ -206,7 +209,8 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
for (unsigned I = 0; CSRegs[I]; ++I) {
- unsigned Reg = CSRegs[I];
+ MCRegister Reg = CSRegs[I];
+
if (SavedRegs.test(Reg)) {
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, MVT::i32);
@@ -221,6 +225,10 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
for (MachineBasicBlock *SaveBlock : SaveBlocks)
insertCSRSaves(*SaveBlock, CSI, LIS);
+ // Add live ins to save blocks.
+ assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
+ updateLiveness(MF, CSI);
+
for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
insertCSRRestores(*RestoreBlock, CSI, LIS);
return true;
@@ -233,38 +241,44 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
// Find lowest available VGPR and use it as VGPR reserved for SGPR spills.
static bool lowerShiftReservedVGPR(MachineFunction &MF,
const GCNSubtarget &ST) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineFrameInfo &FrameInfo = MF.getFrameInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- Register LowestAvailableVGPR, ReservedVGPR;
- ArrayRef<MCPhysReg> AllVGPR32s = ST.getRegisterInfo()->getAllVGPR32(MF);
- for (MCPhysReg Reg : AllVGPR32s) {
- if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) {
- LowestAvailableVGPR = Reg;
- break;
- }
- }
+ const Register PreReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill;
+ // Early out if pre-reservation of a VGPR for SGPR spilling is disabled.
+ if (!PreReservedVGPR)
+ return false;
+ // If there are no free lower VGPRs available, default to using the
+ // pre-reserved register instead.
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ Register LowestAvailableVGPR =
+ TRI->findUnusedRegister(MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF);
if (!LowestAvailableVGPR)
- return false;
+ LowestAvailableVGPR = PreReservedVGPR;
- ReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill;
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
- int i = 0;
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ Optional<int> FI;
+ // Check if we are reserving a CSR. Create a stack object for a possible spill
+ // in the function prologue.
+ if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR))
+ FI = FrameInfo.CreateSpillStackObject(4, Align(4));
+
+ // Find saved info about the pre-reserved register.
+ const auto *ReservedVGPRInfoItr =
+ llvm::find_if(FuncInfo->getSGPRSpillVGPRs(),
+ [PreReservedVGPR](const auto &SpillRegInfo) {
+ return SpillRegInfo.VGPR == PreReservedVGPR;
+ });
+
+ assert(ReservedVGPRInfoItr != FuncInfo->getSGPRSpillVGPRs().end());
+ auto Index =
+ std::distance(FuncInfo->getSGPRSpillVGPRs().begin(), ReservedVGPRInfoItr);
+
+ FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, Index);
for (MachineBasicBlock &MBB : MF) {
- for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
- if (Reg.VGPR == ReservedVGPR) {
- MBB.removeLiveIn(ReservedVGPR);
- MBB.addLiveIn(LowestAvailableVGPR);
- Optional<int> FI;
- if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR))
- FI = FrameInfo.CreateSpillStackObject(4, Align(4));
-
- FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, i);
- }
- ++i;
- }
+ assert(LowestAvailableVGPR.isValid() && "Did not find an available VGPR");
+ MBB.addLiveIn(LowestAvailableVGPR);
MBB.sortUniqueLiveIns();
}
@@ -300,11 +314,15 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
+ std::unique_ptr<RegScavenger> RS;
+
+ bool NewReservedRegs = false;
// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
// handled as SpilledToReg in regular PrologEpilogInserter.
- if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
- SpillVGPRToAGPR) {
+ const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
+ (HasCSRs || FuncInfo->hasSpilledSGPRs());
+ if (HasSGPRSpillToVGPR || SpillVGPRToAGPR) {
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
// are spilled to VGPRs, in which case we can eliminate the stack usage.
//
@@ -329,7 +347,13 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
TRI->isAGPR(MRI, VReg))) {
- TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
+ NewReservedRegs = true;
+ if (!RS)
+ RS.reset(new RegScavenger());
+
+ // FIXME: change to enterBasicBlockEnd()
+ RS->enterBasicBlock(MBB);
+ TRI->eliminateFrameIndex(MI, 0, FIOp, RS.get());
continue;
}
}
@@ -340,6 +364,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+ NewReservedRegs = true;
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
(void)Spilled;
assert(Spilled && "failed to spill SGPR to VGPR when allocated");
@@ -368,5 +393,9 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
SaveBlocks.clear();
RestoreBlocks.clear();
+ // Updated the reserved registers with any VGPRs added for SGPR spills.
+ if (NewReservedRegs)
+ MRI.freezeReservedRegs(MF);
+
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 788e9873f780..9a0cdc7b1f4d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -7,21 +7,7 @@
//===----------------------------------------------------------------------===//
#include "SIMachineFunctionInfo.h"
-#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUTargetMachine.h"
-#include "AMDGPUSubtarget.h"
-#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/Function.h"
-#include <cassert>
-#include <vector>
#define MAX_LANES 64
@@ -75,16 +61,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
if (!isEntryFunction()) {
- // Non-entry functions have no special inputs for now, other registers
- // required for scratch access.
- ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
-
// TODO: Pick a high register, and shift down, similar to a kernel.
FrameOffsetReg = AMDGPU::SGPR33;
StackPtrOffsetReg = AMDGPU::SGPR32;
- ArgInfo.PrivateSegmentBuffer =
- ArgDescriptor::createRegister(ScratchRSrcReg);
+ if (!ST.enableFlatScratch()) {
+ // Non-entry functions have no special inputs for now, other registers
+ // required for scratch access.
+ ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
+
+ ArgInfo.PrivateSegmentBuffer =
+ ArgDescriptor::createRegister(ScratchRSrcReg);
+ }
if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
ImplicitArgPtr = true;
@@ -142,7 +130,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
if (isAmdHsaOrMesa) {
- PrivateSegmentBuffer = true;
+ if (!ST.enableFlatScratch())
+ PrivateSegmentBuffer = true;
if (UseFixedABI) {
DispatchPtr = true;
@@ -167,11 +156,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
KernargSegmentPtr = true;
- if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
+ if (ST.hasFlatAddressSpace() && isEntryFunction() &&
+ (isAmdHsaOrMesa || ST.enableFlatScratch())) {
// TODO: This could be refined a lot. The attribute is a poor way of
// detecting calls or stack objects that may require it before argument
// lowering.
- if (HasCalls || HasStackObjects)
+ if (HasCalls || HasStackObjects || ST.enableFlatScratch())
FlatScratchInit = true;
}
@@ -352,6 +342,8 @@ bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
Register LaneVGPR = TRI->findUnusedRegister(
MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
+ if (LaneVGPR == Register())
+ return false;
SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None));
FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
return true;
@@ -537,21 +529,21 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
}
yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
- const llvm::SIMachineFunctionInfo& MFI,
- const TargetRegisterInfo &TRI)
- : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
- MaxKernArgAlign(MFI.getMaxKernArgAlign()),
- LDSSize(MFI.getLDSSize()),
- IsEntryFunction(MFI.isEntryFunction()),
- NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
- MemoryBound(MFI.isMemoryBound()),
- WaveLimiter(MFI.needsWaveLimiter()),
- HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
- ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
- FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
- StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
- ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
- Mode(MFI.getMode()) {}
+ const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI)
+ : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
+ MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
+ DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
+ NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
+ MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
+ HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
+ HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
+ HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
+ Occupancy(MFI.getOccupancy()),
+ ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
+ FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
+ StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
+ ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
+}
void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
@@ -562,11 +554,15 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
LDSSize = YamlMFI.LDSSize;
+ DynLDSAlign = YamlMFI.DynLDSAlign;
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
+ Occupancy = YamlMFI.Occupancy;
IsEntryFunction = YamlMFI.IsEntryFunction;
NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
MemoryBound = YamlMFI.MemoryBound;
WaveLimiter = YamlMFI.WaveLimiter;
+ HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
+ HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index cf1629fda0af..35fb43162199 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -17,28 +17,17 @@
#include "AMDGPUMachineFunction.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <array>
-#include <cassert>
-#include <utility>
-#include <vector>
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
class MachineFrameInfo;
class MachineFunction;
class TargetRegisterClass;
+class SIMachineFunctionInfo;
+class SIRegisterInfo;
class AMDGPUPseudoSourceValue : public PseudoSourceValue {
public:
@@ -76,6 +65,8 @@ public:
static bool classof(const PseudoSourceValue *V) {
return V->kind() == PSVBuffer;
}
+
+ void printCustom(raw_ostream &OS) const override { OS << "BufferResource"; }
};
class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
@@ -87,6 +78,8 @@ public:
static bool classof(const PseudoSourceValue *V) {
return V->kind() == PSVImage;
}
+
+ void printCustom(raw_ostream &OS) const override { OS << "ImageResource"; }
};
class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
@@ -277,12 +270,18 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint64_t ExplicitKernArgSize = 0;
unsigned MaxKernArgAlign = 0;
unsigned LDSSize = 0;
+ Align DynLDSAlign;
bool IsEntryFunction = false;
bool NoSignedZerosFPMath = false;
bool MemoryBound = false;
bool WaveLimiter = false;
+ bool HasSpilledSGPRs = false;
+ bool HasSpilledVGPRs = false;
uint32_t HighBitsOf32BitAddress = 0;
+ // TODO: 10 may be a better default since it's the maximum.
+ unsigned Occupancy = 0;
+
StringValue ScratchRSrcReg = "$private_rsrc_reg";
StringValue FrameOffsetReg = "$fp_reg";
StringValue StackPtrOffsetReg = "$sp_reg";
@@ -304,10 +303,13 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
UINT64_C(0));
YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
+ YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
+ YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
+ YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
StringValue("$private_rsrc_reg"));
YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
@@ -318,6 +320,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
YamlIO.mapOptional("highBitsOf32BitAddress",
MFI.HighBitsOf32BitAddress, 0u);
+ YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
}
};
@@ -370,10 +373,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// unit. Minimum - first, maximum - second.
std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
- DenseMap<const Value *,
- std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
- DenseMap<const Value *,
- std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
+ std::unique_ptr<const AMDGPUBufferPseudoSourceValue> BufferPSV;
+ std::unique_ptr<const AMDGPUImagePseudoSourceValue> ImagePSV;
std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
private:
@@ -684,9 +685,9 @@ public:
return ArgInfo.getPreloadedValue(Value);
}
- Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+ MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
- return Arg ? Arg->getRegister() : Register();
+ return Arg ? Arg->getRegister() : MCRegister();
}
unsigned getGITPtrHigh() const {
@@ -884,22 +885,18 @@ public:
return LDSWaveSpillSize;
}
- const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
- const Value *BufferRsrc) {
- assert(BufferRsrc);
- auto PSV = BufferPSVs.try_emplace(
- BufferRsrc,
- std::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
- return PSV.first->second.get();
+ const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII) {
+ if (!BufferPSV)
+ BufferPSV = std::make_unique<AMDGPUBufferPseudoSourceValue>(TII);
+
+ return BufferPSV.get();
}
- const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
- const Value *ImgRsrc) {
- assert(ImgRsrc);
- auto PSV = ImagePSVs.try_emplace(
- ImgRsrc,
- std::make_unique<AMDGPUImagePseudoSourceValue>(TII));
- return PSV.first->second.get();
+ const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII) {
+ if (!ImagePSV)
+ ImagePSV = std::make_unique<AMDGPUImagePseudoSourceValue>(TII);
+
+ return ImagePSV.get();
}
const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 3ba05aadbbbe..278dd05b049c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -12,29 +12,10 @@
//===----------------------------------------------------------------------===//
#include "SIMachineScheduler.h"
-#include "AMDGPU.h"
#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <map>
-#include <set>
-#include <utility>
-#include <vector>
using namespace llvm;
@@ -375,8 +356,8 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
// Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
// The use of findDefBetween removes the case 4.
for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
- unsigned Reg = RegMaskPair.RegUnit;
- if (Register::isVirtualRegister(Reg) &&
+ Register Reg = RegMaskPair.RegUnit;
+ if (Reg.isVirtual() &&
isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
LIS)) {
@@ -763,8 +744,7 @@ void SIScheduleBlockCreator::colorHighLatenciesGroups() {
// depend (order dependency) on one of the
// instruction in the block, and are required for the
// high latency instruction we add.
- AdditionalElements.insert(AdditionalElements.end(),
- SubGraph.begin(), SubGraph.end());
+ llvm::append_range(AdditionalElements, SubGraph);
}
}
if (CompatibleGroup) {
@@ -1682,9 +1662,9 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
// Tracking of currently alive registers to determine VGPR Usage.
void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
- for (unsigned Reg : Regs) {
+ for (Register Reg : Regs) {
// For now only track virtual registers.
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
// If not already in the live set, then add it.
(void) LiveRegs.insert(Reg);
@@ -1742,9 +1722,9 @@ SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
std::vector<int> DiffSetPressure;
DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
- for (unsigned Reg : InRegs) {
+ for (Register Reg : InRegs) {
// For now only track virtual registers.
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (LiveRegsConsumers[Reg] > 1)
continue;
@@ -1754,9 +1734,9 @@ SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
}
}
- for (unsigned Reg : OutRegs) {
+ for (Register Reg : OutRegs) {
// For now only track virtual registers.
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
for (; PSetI.isValid(); ++PSetI) {
@@ -1902,9 +1882,9 @@ SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
VgprUsage = 0;
SgprUsage = 0;
for (_Iterator RegI = First; RegI != End; ++RegI) {
- unsigned Reg = *RegI;
+ Register Reg = *RegI;
// For now only track virtual registers
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
PSetIterator PSetI = MRI.getPressureSets(Reg);
for (; PSetI.isValid(); ++PSetI) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
index 02e0a3fe1b61..a2f5a1453d6a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -14,20 +14,18 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include <cassert>
#include <cstdint>
-#include <map>
-#include <memory>
#include <set>
#include <vector>
namespace llvm {
+class SIInstrInfo;
+class SIRegisterInfo;
+
enum SIScheduleCandReason {
NoCand,
RegUsage,
@@ -455,7 +453,7 @@ public:
MachineRegisterInfo *getMRI() { return &MRI; }
const TargetRegisterInfo *getTRI() { return TRI; }
ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
- SUnit& getEntrySU() { return EntrySU; }
+ SUnit &getEntrySU() { return EntrySU; }
SUnit& getExitSU() { return ExitSU; }
void restoreSULinksLeft();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 4e6c72ca20e2..3caa75e4d958 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -15,31 +15,13 @@
#include "AMDGPU.h"
#include "AMDGPUMachineModuleInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-#include <list>
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -47,6 +29,10 @@ using namespace llvm::AMDGPU;
#define DEBUG_TYPE "si-memory-legalizer"
#define PASS_NAME "SI Memory Legalizer"
+static cl::opt<bool> AmdgcnSkipCacheInvalidations(
+ "amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden,
+ cl::desc("Use this to skip inserting cache invalidating instructions."));
+
namespace {
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
@@ -125,6 +111,7 @@ private:
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
bool IsCrossAddressSpaceOrdering = false;
+ bool IsVolatile = false;
bool IsNonTemporal = false;
SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
@@ -134,11 +121,13 @@ private:
bool IsCrossAddressSpaceOrdering = true,
AtomicOrdering FailureOrdering =
AtomicOrdering::SequentiallyConsistent,
+ bool IsVolatile = false,
bool IsNonTemporal = false)
: Ordering(Ordering), FailureOrdering(FailureOrdering),
Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
InstrAddrSpace(InstrAddrSpace),
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
+ IsVolatile(IsVolatile),
IsNonTemporal(IsNonTemporal) {
// There is also no cross address space ordering if the ordering
// address space is the same as the instruction address space and
@@ -186,7 +175,13 @@ public:
}
/// \returns True if memory access of the machine instruction used to
- /// create this SIMemOpInfo is non-temporal, false otherwise.
+ /// create this SIMemOpInfo is volatile, false otherwise.
+ bool isVolatile() const {
+ return IsVolatile;
+ }
+
+ /// \returns True if memory access of the machine instruction used to
+ /// create this SIMemOpInfo is nontemporal, false otherwise.
bool isNonTemporal() const {
return IsNonTemporal;
}
@@ -249,12 +244,15 @@ public:
class SICacheControl {
protected:
+ /// AMDGPU subtarget info.
+ const GCNSubtarget &ST;
+
/// Instruction info.
const SIInstrInfo *TII = nullptr;
IsaVersion IV;
- /// Whether to insert cache invalidation instructions.
+ /// Whether to insert cache invalidating instructions.
bool InsertCacheInv;
SICacheControl(const GCNSubtarget &ST);
@@ -271,28 +269,21 @@ public:
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const = 0;
- /// Update \p MI memory instruction to indicate it is
- /// nontemporal. Return true iff the instruction was modified.
- virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
- const = 0;
+ /// Update \p MI memory instruction of kind \p Op associated with address
+ /// spaces \p AddrSpace to indicate it is volatile and/or nontemporal. Return
+ /// true iff the instruction was modified.
+ virtual bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
+ SIAtomicAddrSpace AddrSpace,
+ SIMemOp Op, bool IsVolatile,
+ bool IsNonTemporal) const = 0;
/// Inserts any necessary instructions at position \p Pos relative
- /// to instruction \p MI to ensure any caches associated with
- /// address spaces \p AddrSpace for memory scopes up to memory scope
- /// \p Scope are invalidated. Returns true iff any instructions
- /// inserted.
- virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const = 0;
-
- /// Inserts any necessary instructions at position \p Pos relative
- /// to instruction \p MI to ensure memory instructions of kind \p Op
- /// associated with address spaces \p AddrSpace have completed as
- /// observed by other memory instructions executing in memory scope
- /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
- /// ordering is between address spaces. Returns true iff any
- /// instructions inserted.
+ /// to instruction \p MI to ensure memory instructions before \p Pos of kind
+ /// \p Op associated with address spaces \p AddrSpace have completed. Used
+ /// between memory instructions to enforce the order they become visible as
+ /// observed by other memory instructions executing in memory scope \p Scope.
+ /// \p IsCrossAddrSpaceOrdering indicates if the memory ordering is between
+ /// address spaces. Returns true iff any instructions inserted.
virtual bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
@@ -300,6 +291,28 @@ public:
bool IsCrossAddrSpaceOrdering,
Position Pos) const = 0;
+ /// Inserts any necessary instructions at position \p Pos relative to
+ /// instruction \p MI to ensure any subsequent memory instructions of this
+ /// thread with address spaces \p AddrSpace will observe the previous memory
+ /// operations by any thread for memory scopes up to memory scope \p Scope .
+ /// Returns true iff any instructions inserted.
+ virtual bool insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const = 0;
+
+ /// Inserts any necessary instructions at position \p Pos relative to
+ /// instruction \p MI to ensure previous memory instructions by this thread
+ /// with address spaces \p AddrSpace have completed and can be observed by
+ /// subsequent memory instructions by any thread executing in memory scope \p
+ /// Scope. \p IsCrossAddrSpaceOrdering indicates if the memory ordering is
+ /// between address spaces. Returns true iff any instructions inserted.
+ virtual bool insertRelease(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const = 0;
+
/// Virtual destructor to allow derivations to be deleted.
virtual ~SICacheControl() = default;
@@ -328,12 +341,10 @@ public:
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
- bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
-
- bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const override;
+ bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
+ SIAtomicAddrSpace AddrSpace, SIMemOp Op,
+ bool IsVolatile,
+ bool IsNonTemporal) const override;
bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -341,6 +352,17 @@ public:
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
+
+ bool insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
+
+ bool insertRelease(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const override;
};
class SIGfx7CacheControl : public SIGfx6CacheControl {
@@ -348,16 +370,15 @@ public:
SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
- bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const override;
+ bool insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
};
class SIGfx10CacheControl : public SIGfx7CacheControl {
protected:
- bool CuMode = false;
/// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
@@ -367,19 +388,16 @@ protected:
public:
- SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
- SIGfx7CacheControl(ST), CuMode(CuMode) {};
+ SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {};
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
- bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
-
- bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const override;
+ bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
+ SIAtomicAddrSpace AddrSpace, SIMemOp Op,
+ bool IsVolatile,
+ bool IsNonTemporal) const override;
bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -387,6 +405,11 @@ public:
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
+
+ bool insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
};
class SIMemoryLegalizer final : public MachineFunctionPass {
@@ -525,11 +548,13 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
bool IsNonTemporal = true;
+ bool IsVolatile = false;
// Validator should check whether or not MMOs cover the entire set of
// locations accessed by the memory instruction.
for (const auto &MMO : MI->memoperands()) {
IsNonTemporal &= MMO->isNonTemporal();
+ IsVolatile |= MMO->isVolatile();
InstrAddrSpace |=
toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
AtomicOrdering OpOrdering = MMO->getOrdering();
@@ -572,7 +597,8 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
}
}
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
- IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
+ IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
+ IsNonTemporal);
}
Optional<SIMemOpInfo> SIMemOpAccess::getLoadInfo(
@@ -650,10 +676,10 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
return constructFromMIWithMMO(MI);
}
-SICacheControl::SICacheControl(const GCNSubtarget &ST) {
+SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
TII = ST.getInstrInfo();
IV = getIsaVersion(ST.getCPU());
- InsertCacheInv = !ST.isAmdPalOS();
+ InsertCacheInv = !AmdgcnSkipCacheInvalidations;
}
/* static */
@@ -663,7 +689,7 @@ std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
return std::make_unique<SIGfx6CacheControl>(ST);
if (Generation < AMDGPUSubtarget::GFX10)
return std::make_unique<SIGfx7CacheControl>(ST);
- return std::make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
+ return std::make_unique<SIGfx10CacheControl>(ST);
}
bool SIGfx6CacheControl::enableLoadCacheBypass(
@@ -674,9 +700,6 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
- /// TODO: Do not set glc for rmw atomic operations as they
- /// implicitly bypass the L1 cache.
-
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -697,64 +720,48 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
/// sequentially consistent, and no other thread can access scratch
/// memory.
- /// Other address spaces do not hava a cache.
+ /// Other address spaces do not have a cache.
return Changed;
}
-bool SIGfx6CacheControl::enableNonTemporal(
- const MachineBasicBlock::iterator &MI) const {
+bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
+ MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
+ bool IsVolatile, bool IsNonTemporal) const {
+ // Only handle load and store, not atomic read-modify-write insructions. The
+ // latter use glc to indicate if the atomic returns a result and so must not
+ // be used for cache control.
assert(MI->mayLoad() ^ MI->mayStore());
- bool Changed = false;
-
- /// TODO: Do not enableGLCBit if rmw atomic.
- Changed |= enableGLCBit(MI);
- Changed |= enableSLCBit(MI);
- return Changed;
-}
-
-bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const {
- if (!InsertCacheInv)
- return false;
+ // Only update load and store, not LLVM IR atomic read-modify-write
+ // instructions. The latter are always marked as volatile so cannot sensibly
+ // handle it as do not want to pessimize all atomics. Also they do not support
+ // the nontemporal attribute.
+ assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
- MachineBasicBlock &MBB = *MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
+ if (IsVolatile) {
+ if (Op == SIMemOp::LOAD)
+ Changed |= enableGLCBit(MI);
- if (Pos == Position::AFTER)
- ++MI;
+ // Ensure operation has completed at system scope to cause all volatile
+ // operations to be visible outside the program in a global order. Do not
+ // request cross address space as only the global address space can be
+ // observable outside the program, so no need to cause a waitcnt for LDS
+ // address space operations.
+ Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
+ Position::AFTER);
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
- switch (Scope) {
- case SIAtomicScope::SYSTEM:
- case SIAtomicScope::AGENT:
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
- Changed = true;
- break;
- case SIAtomicScope::WORKGROUP:
- case SIAtomicScope::WAVEFRONT:
- case SIAtomicScope::SINGLETHREAD:
- // No cache to invalidate.
- break;
- default:
- llvm_unreachable("Unsupported synchronization scope");
- }
+ return Changed;
}
- /// The scratch address space does not need the global memory cache
- /// to be flushed as all memory operations by the same thread are
- /// sequentially consistent, and no other thread can access scratch
- /// memory.
-
- /// Other address spaces do not hava a cache.
-
- if (Pos == Position::AFTER)
- --MI;
+ if (IsNonTemporal) {
+ // Request L1 MISS_EVICT and L2 STREAM for load and store instructions.
+ Changed |= enableGLCBit(MI);
+ Changed |= enableSLCBit(MI);
+ return Changed;
+ }
return Changed;
}
@@ -776,7 +783,8 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
bool VMCnt = false;
bool LGKMCnt = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
+ SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -798,12 +806,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
case SIAtomicScope::WORKGROUP:
- // If no cross address space ordering then an LDS waitcnt is not
- // needed as LDS operations for all waves are executed in a
- // total global ordering as observed by all waves. Required if
- // also synchronizing with global/GDS memory as LDS operations
- // could be reordered with respect to later global/GDS memory
- // operations of the same wave.
+ // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
+ // not needed as LDS operations for all waves are executed in a total
+ // global ordering as observed by all waves. Required if also
+ // synchronizing with global/GDS memory as LDS operations could be
+ // reordered with respect to later global/GDS memory operations of the
+ // same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
@@ -820,12 +828,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- // If no cross address space ordering then an GDS waitcnt is not
- // needed as GDS operations for all waves are executed in a
- // total global ordering as observed by all waves. Required if
- // also synchronizing with global/LDS memory as GDS operations
- // could be reordered with respect to later global/LDS memory
- // operations of the same wave.
+ // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
+ // is not needed as GDS operations for all waves are executed in a total
+ // global ordering as observed by all waves. Required if also
+ // synchronizing with global/LDS memory as GDS operations could be
+ // reordered with respect to later global/LDS memory operations of the
+ // same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
@@ -855,10 +863,64 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
return Changed;
}
-bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const {
+bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
+ if (!InsertCacheInv)
+ return false;
+
+ bool Changed = false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Pos == Position::AFTER)
+ ++MI;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
+ Changed = true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to invalidate.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory cache
+ /// to be flushed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not have a cache.
+
+ if (Pos == Position::AFTER)
+ --MI;
+
+ return Changed;
+}
+
+bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const {
+ return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
+ IsCrossAddrSpaceOrdering, Pos);
+}
+
+bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
if (!InsertCacheInv)
return false;
@@ -869,9 +931,9 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
- const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
- ? AMDGPU::BUFFER_WBINVL1
- : AMDGPU::BUFFER_WBINVL1_VOL;
+ const unsigned InvalidateL1 = STM.isAmdPalOS() || STM.isMesa3DOS()
+ ? AMDGPU::BUFFER_WBINVL1
+ : AMDGPU::BUFFER_WBINVL1_VOL;
if (Pos == Position::AFTER)
++MI;
@@ -880,7 +942,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- BuildMI(MBB, MI, DL, TII->get(Flush));
+ BuildMI(MBB, MI, DL, TII->get(InvalidateL1));
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
@@ -898,7 +960,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
/// sequentially consistent, and no other thread can access scratch
/// memory.
- /// Other address spaces do not hava a cache.
+ /// Other address spaces do not have a cache.
if (Pos == Position::AFTER)
--MI;
@@ -926,9 +988,9 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
case SIAtomicScope::WORKGROUP:
// In WGP mode the waves of a work-group can be executing on either CU of
// the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
- // CU mode and all waves of a work-group are on the same CU, and so the
- // L0 does not need to be bypassed.
- if (!CuMode) Changed |= enableGLCBit(MI);
+ // CU mode all waves of a work-group are on the same CU, and so the L0
+ // does not need to be bypassed.
+ if (!ST.isCuModeEnabled()) Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
@@ -944,73 +1006,50 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
/// sequentially consistent, and no other thread can access scratch
/// memory.
- /// Other address spaces do not hava a cache.
+ /// Other address spaces do not have a cache.
return Changed;
}
-bool SIGfx10CacheControl::enableNonTemporal(
- const MachineBasicBlock::iterator &MI) const {
+bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
+ MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
+ bool IsVolatile, bool IsNonTemporal) const {
+
+ // Only handle load and store, not atomic read-modify-write insructions. The
+ // latter use glc to indicate if the atomic returns a result and so must not
+ // be used for cache control.
assert(MI->mayLoad() ^ MI->mayStore());
- bool Changed = false;
- Changed |= enableSLCBit(MI);
- /// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
-
- return Changed;
-}
-
-bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const {
- if (!InsertCacheInv)
- return false;
+ // Only update load and store, not LLVM IR atomic read-modify-write
+ // instructions. The latter are always marked as volatile so cannot sensibly
+ // handle it as do not want to pessimize all atomics. Also they do not support
+ // the nontemporal attribute.
+ assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
- MachineBasicBlock &MBB = *MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
-
- if (Pos == Position::AFTER)
- ++MI;
+ if (IsVolatile) {
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
- switch (Scope) {
- case SIAtomicScope::SYSTEM:
- case SIAtomicScope::AGENT:
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
- Changed = true;
- break;
- case SIAtomicScope::WORKGROUP:
- // In WGP mode the waves of a work-group can be executing on either CU of
- // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
- // in CU mode and all waves of a work-group are on the same CU, and so the
- // L0 does not need to be invalidated.
- if (!CuMode) {
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
- Changed = true;
- }
- break;
- case SIAtomicScope::WAVEFRONT:
- case SIAtomicScope::SINGLETHREAD:
- // No cache to invalidate.
- break;
- default:
- llvm_unreachable("Unsupported synchronization scope");
+ if (Op == SIMemOp::LOAD) {
+ Changed |= enableGLCBit(MI);
+ Changed |= enableDLCBit(MI);
}
- }
-
- /// The scratch address space does not need the global memory cache
- /// to be flushed as all memory operations by the same thread are
- /// sequentially consistent, and no other thread can access scratch
- /// memory.
- /// Other address spaces do not hava a cache.
+ // Ensure operation has completed at system scope to cause all volatile
+ // operations to be visible outside the program in a global order. Do not
+ // request cross address space as only the global address space can be
+ // observable outside the program, so no need to cause a waitcnt for LDS
+ // address space operations.
+ Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
+ Position::AFTER);
+ return Changed;
+ }
- if (Pos == Position::AFTER)
- --MI;
+ if (IsNonTemporal) {
+ // Request L0/L1 HIT_EVICT and L2 STREAM for load and store instructions.
+ Changed |= enableSLCBit(MI);
+ return Changed;
+ }
return Changed;
}
@@ -1033,7 +1072,8 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
bool VSCnt = false;
bool LGKMCnt = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
+ SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -1048,7 +1088,7 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// they are visible to waves in the other CU as the L0 is per CU.
// Otherwise in CU mode and all waves of a work-group are on the same CU
// which shares the same L0.
- if (!CuMode) {
+ if (!ST.isCuModeEnabled()) {
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
VMCnt |= true;
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
@@ -1070,12 +1110,12 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
case SIAtomicScope::WORKGROUP:
- // If no cross address space ordering then an LDS waitcnt is not
- // needed as LDS operations for all waves are executed in a
- // total global ordering as observed by all waves. Required if
- // also synchronizing with global/GDS memory as LDS operations
- // could be reordered with respect to later global/GDS memory
- // operations of the same wave.
+ // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
+ // not needed as LDS operations for all waves are executed in a total
+ // global ordering as observed by all waves. Required if also
+ // synchronizing with global/GDS memory as LDS operations could be
+ // reordered with respect to later global/GDS memory operations of the
+ // same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
@@ -1092,12 +1132,12 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- // If no cross address space ordering then an GDS waitcnt is not
- // needed as GDS operations for all waves are executed in a
- // total global ordering as observed by all waves. Required if
- // also synchronizing with global/LDS memory as GDS operations
- // could be reordered with respect to later global/LDS memory
- // operations of the same wave.
+ // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
+ // is not needed as GDS operations for all waves are executed in a total
+ // global ordering as observed by all waves. Required if also
+ // synchronizing with global/LDS memory as GDS operations could be
+ // reordered with respect to later global/LDS memory operations of the
+ // same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
@@ -1134,6 +1174,61 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
return Changed;
}
+bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
+ if (!InsertCacheInv)
+ return false;
+
+ bool Changed = false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Pos == Position::AFTER)
+ ++MI;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
+ Changed = true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In WGP mode the waves of a work-group can be executing on either CU of
+ // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
+ // in CU mode and all waves of a work-group are on the same CU, and so the
+ // L0 does not need to be invalidated.
+ if (!ST.isCuModeEnabled()) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
+ Changed = true;
+ }
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to invalidate.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory cache
+ /// to be flushed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not have a cache.
+
+ if (Pos == Position::AFTER)
+ --MI;
+
+ return Changed;
+}
+
bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
if (AtomicPseudoMIs.empty())
return false;
@@ -1173,20 +1268,20 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
SIMemOp::LOAD,
MOI.getIsCrossAddressSpaceOrdering(),
Position::AFTER);
- Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- Position::AFTER);
+ Changed |= CC->insertAcquire(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ Position::AFTER);
}
return Changed;
}
- // Atomic instructions do not have the nontemporal attribute.
- if (MOI.isNonTemporal()) {
- Changed |= CC->enableNonTemporal(MI);
- return Changed;
- }
-
+ // Atomic instructions already bypass caches to the scope specified by the
+ // SyncScope operand. Only non-atomic volatile and nontemporal instructions
+ // need additional treatment.
+ Changed |= CC->enableVolatileAndOrNonTemporal(MI, MOI.getInstrAddrSpace(),
+ SIMemOp::LOAD, MOI.isVolatile(),
+ MOI.isNonTemporal());
return Changed;
}
@@ -1199,21 +1294,20 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
if (MOI.isAtomic()) {
if (MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= CC->insertWait(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- SIMemOp::LOAD | SIMemOp::STORE,
- MOI.getIsCrossAddressSpaceOrdering(),
- Position::BEFORE);
+ Changed |= CC->insertRelease(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
return Changed;
}
- // Atomic instructions do not have the nontemporal attribute.
- if (MOI.isNonTemporal()) {
- Changed |= CC->enableNonTemporal(MI);
- return Changed;
- }
-
+ // Atomic instructions already bypass caches to the scope specified by the
+ // SyncScope operand. Only non-atomic volatile and nontemporal instructions
+ // need additional treatment.
+ Changed |= CC->enableVolatileAndOrNonTemporal(
+ MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
+ MOI.isNonTemporal());
return Changed;
}
@@ -1235,19 +1329,23 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
/// ordering and memory scope, then library does not need to
/// generate a fence. Could add support in this file for
/// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
- /// adding waitcnt before a S_BARRIER.
- Changed |= CC->insertWait(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- SIMemOp::LOAD | SIMemOp::STORE,
- MOI.getIsCrossAddressSpaceOrdering(),
- Position::BEFORE);
+ /// adding S_WAITCNT before a S_BARRIER.
+ Changed |= CC->insertRelease(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
+
+ // TODO: If both release and invalidate are happening they could be combined
+ // to use the single "BUFFER_WBL2" instruction. This could be done by
+ // reorganizing this code or as part of optimizing SIInsertWaitcnt pass to
+ // track cache invalidate and write back instructions.
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- Position::BEFORE);
+ Changed |= CC->insertAcquire(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ Position::BEFORE);
return Changed;
}
@@ -1266,11 +1364,10 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= CC->insertWait(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- SIMemOp::LOAD | SIMemOp::STORE,
- MOI.getIsCrossAddressSpaceOrdering(),
- Position::BEFORE);
+ Changed |= CC->insertRelease(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
@@ -1283,9 +1380,9 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
SIMemOp::STORE,
MOI.getIsCrossAddressSpaceOrdering(),
Position::AFTER);
- Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- Position::AFTER);
+ Changed |= CC->insertAcquire(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ Position::AFTER);
}
return Changed;
@@ -1303,7 +1400,8 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
for (auto &MBB : MF) {
for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
- if (MI->getOpcode() == TargetOpcode::BUNDLE && MI->mayLoadOrStore()) {
+ // Unbundle instructions after the post-RA scheduler.
+ if (MI->isBundle()) {
MachineBasicBlock::instr_iterator II(MI->getIterator());
for (MachineBasicBlock::instr_iterator I = ++II, E = MBB.instr_end();
I != E && I->isBundledWithPred(); ++I) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 0e162ac42c11..3d659eca47db 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -14,20 +14,9 @@
//===----------------------------------------------------------------------===//
//
#include "AMDGPU.h"
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include <queue>
#define DEBUG_TYPE "si-mode-register"
@@ -242,8 +231,10 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
Status IPChange;
for (MachineInstr &MI : MBB) {
Status InstrMode = getInstructionMode(MI, TII);
- if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) ||
- (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) {
+ if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
+ MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
+ MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
+ MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
// We preserve any explicit mode register setreg instruction we encounter,
// as we assume it has been inserted by a higher authority (this is
// likely to be a very rare occurrence).
@@ -267,7 +258,8 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
// If this is an immediate then we know the value being set, but if it is
// not an immediate then we treat the modified bits of the mode register
// as unknown.
- if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
+ if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
+ MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
unsigned Mode = (Val << Offset) & Mask;
Status Setreg = Status(Mask, Mode);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index a9717c6ffb70..54f20912d0a9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -7,15 +7,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -176,13 +171,17 @@ static unsigned getSaveExecOp(unsigned Opc) {
}
// These are only terminators to get correct spill code placement during
-// register allocation, so turn them back into normal instructions. Only one of
-// these is expected per block.
+// register allocation, so turn them back into normal instructions.
static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
switch (MI.getOpcode()) {
- case AMDGPU::S_MOV_B64_term:
case AMDGPU::S_MOV_B32_term: {
- MI.setDesc(TII.get(AMDGPU::COPY));
+ bool RegSrc = MI.getOperand(1).isReg();
+ MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
+ return true;
+ }
+ case AMDGPU::S_MOV_B64_term: {
+ bool RegSrc = MI.getOperand(1).isReg();
+ MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64));
return true;
}
case AMDGPU::S_XOR_B64_term: {
@@ -197,6 +196,12 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
MI.setDesc(TII.get(AMDGPU::S_XOR_B32));
return true;
}
+ case AMDGPU::S_OR_B64_term: {
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(TII.get(AMDGPU::S_OR_B64));
+ return true;
+ }
case AMDGPU::S_OR_B32_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
@@ -220,19 +225,29 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
}
}
+// Turn all pseudoterminators in the block into their equivalent non-terminator
+// instructions. Returns the reverse iterator to the first non-terminator
+// instruction in the block.
static MachineBasicBlock::reverse_iterator fixTerminators(
const SIInstrInfo &TII,
MachineBasicBlock &MBB) {
MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+
+ bool Seen = false;
+ MachineBasicBlock::reverse_iterator FirstNonTerm = I;
for (; I != E; ++I) {
if (!I->isTerminator())
- return I;
+ return Seen ? FirstNonTerm : I;
- if (removeTerminatorBit(TII, *I))
- return I;
+ if (removeTerminatorBit(TII, *I)) {
+ if (!Seen) {
+ FirstNonTerm = I;
+ Seen = true;
+ }
+ }
}
- return E;
+ return FirstNonTerm;
}
static MachineBasicBlock::reverse_iterator findExecCopy(
@@ -291,8 +306,20 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (I == E)
continue;
- Register CopyToExec = isCopyToExec(*I, ST);
- if (!CopyToExec.isValid())
+ // It's possible to see other terminator copies after the exec copy. This
+ // can happen if control flow pseudos had their outputs used by phis.
+ Register CopyToExec;
+
+ unsigned SearchCount = 0;
+ const unsigned SearchLimit = 5;
+ while (I != E && SearchCount++ < SearchLimit) {
+ CopyToExec = isCopyToExec(*I, ST);
+ if (CopyToExec)
+ break;
+ ++I;
+ }
+
+ if (!CopyToExec)
continue;
// Scan backwards to find the def.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 8af00fcf62a8..162e96655df2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -13,9 +13,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
@@ -31,6 +30,17 @@ private:
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+
+ unsigned AndOpc;
+ unsigned Andn2Opc;
+ unsigned OrSaveExecOpc;
+ unsigned XorTermrOpc;
+ MCRegister CondReg;
+ MCRegister ExecReg;
+
+ Register optimizeVcndVcmpPair(MachineBasicBlock &MBB);
+ bool optimizeElseBranch(MachineBasicBlock &MBB);
public:
static char ID;
@@ -68,11 +78,28 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
return new SIOptimizeExecMaskingPreRA();
}
-static bool isFullExecCopy(const MachineInstr& MI, const GCNSubtarget& ST) {
- unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+// See if there is a def between \p AndIdx and \p SelIdx that needs to live
+// beyond \p AndIdx.
+static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx,
+ SlotIndex SelIdx) {
+ LiveQueryResult AndLRQ = LR.Query(AndIdx);
+ return (!AndLRQ.isKill() && AndLRQ.valueIn() != LR.Query(SelIdx).valueOut());
+}
+
+// FIXME: Why do we bother trying to handle physical registers here?
+static bool isDefBetween(const SIRegisterInfo &TRI,
+ LiveIntervals *LIS, Register Reg,
+ const MachineInstr &Sel, const MachineInstr &And) {
+ SlotIndex AndIdx = LIS->getInstructionIndex(And);
+ SlotIndex SelIdx = LIS->getInstructionIndex(Sel);
+
+ if (Reg.isVirtual())
+ return isDefBetween(LIS->getInterval(Reg), AndIdx, SelIdx);
- if (MI.isFullCopy() && MI.getOperand(1).getReg() == Exec)
- return true;
+ for (MCRegUnitIterator UI(Reg.asMCReg(), &TRI); UI.isValid(); ++UI) {
+ if (isDefBetween(LIS->getRegUnit(*UI), AndIdx, SelIdx))
+ return true;
+ }
return false;
}
@@ -93,75 +120,71 @@ static bool isFullExecCopy(const MachineInstr& MI, const GCNSubtarget& ST) {
// lanes.
//
// Returns %cc register on success.
-static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
- const GCNSubtarget &ST,
- MachineRegisterInfo &MRI,
- LiveIntervals *LIS) {
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const SIInstrInfo *TII = ST.getInstrInfo();
- bool Wave32 = ST.isWave32();
- const unsigned AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
- const unsigned Andn2Opc = Wave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
- const unsigned CondReg = Wave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
- const unsigned ExecReg = Wave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
-
+Register
+SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
return Opc == AMDGPU::S_CBRANCH_VCCZ ||
Opc == AMDGPU::S_CBRANCH_VCCNZ; });
if (I == MBB.terminators().end())
- return AMDGPU::NoRegister;
+ return Register();
- auto *And = TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister,
- *I, MRI, LIS);
+ auto *And =
+ TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS);
if (!And || And->getOpcode() != AndOpc ||
!And->getOperand(1).isReg() || !And->getOperand(2).isReg())
- return AMDGPU::NoRegister;
+ return Register();
MachineOperand *AndCC = &And->getOperand(1);
Register CmpReg = AndCC->getReg();
unsigned CmpSubReg = AndCC->getSubReg();
- if (CmpReg == ExecReg) {
+ if (CmpReg == Register(ExecReg)) {
AndCC = &And->getOperand(2);
CmpReg = AndCC->getReg();
CmpSubReg = AndCC->getSubReg();
- } else if (And->getOperand(2).getReg() != ExecReg) {
- return AMDGPU::NoRegister;
+ } else if (And->getOperand(2).getReg() != Register(ExecReg)) {
+ return Register();
}
- auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, MRI, LIS);
+ auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, *MRI, LIS);
if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
Cmp->getParent() != And->getParent())
- return AMDGPU::NoRegister;
+ return Register();
MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
if (Op1->isImm() && Op2->isReg())
std::swap(Op1, Op2);
if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
- return AMDGPU::NoRegister;
+ return Register();
Register SelReg = Op1->getReg();
- auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, MRI, LIS);
+ auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS);
if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
- return AMDGPU::NoRegister;
+ return Register();
if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
- return AMDGPU::NoRegister;
+ return Register();
Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() ||
Op1->getImm() != 0 || Op2->getImm() != 1)
- return AMDGPU::NoRegister;
+ return Register();
+
+ Register CCReg = CC->getReg();
+
+ // If there was a def between the select and the and, we would need to move it
+ // to fold this.
+ if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And))
+ return Register();
LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
<< *And);
- Register CCReg = CC->getReg();
LIS->RemoveMachineInstrFromMaps(*And);
MachineInstr *Andn2 =
BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc),
@@ -180,8 +203,8 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
// Try to remove compare. Cmp value should not used in between of cmp
// and s_and_b64 if VCC or just unused if any other register.
- if ((Register::isVirtualRegister(CmpReg) && MRI.use_nodbg_empty(CmpReg)) ||
- (CmpReg == CondReg &&
+ if ((CmpReg.isVirtual() && MRI->use_nodbg_empty(CmpReg)) ||
+ (CmpReg == Register(CondReg) &&
std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
[&](const MachineInstr &MI) {
return MI.readsRegister(CondReg, TRI);
@@ -192,7 +215,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
Cmp->eraseFromParent();
// Try to remove v_cndmask_b32.
- if (Register::isVirtualRegister(SelReg) && MRI.use_nodbg_empty(SelReg)) {
+ if (SelReg.isVirtual() && MRI->use_nodbg_empty(SelReg)) {
LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
LIS->RemoveMachineInstrFromMaps(*Sel);
@@ -203,6 +226,81 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
return CCReg;
}
+// Optimize sequence
+// %dst = S_OR_SAVEEXEC %src
+// ... instructions not modifying exec ...
+// %tmp = S_AND $exec, %dst
+// $exec = S_XOR_term $exec, %tmp
+// =>
+// %dst = S_OR_SAVEEXEC %src
+// ... instructions not modifying exec ...
+// $exec = S_XOR_term $exec, %dst
+//
+// Clean up potentially unnecessary code added for safety during
+// control flow lowering.
+//
+// Return whether any changes were made to MBB.
+bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) {
+ if (MBB.empty())
+ return false;
+
+ // Check this is an else block.
+ auto First = MBB.begin();
+ MachineInstr &SaveExecMI = *First;
+ if (SaveExecMI.getOpcode() != OrSaveExecOpc)
+ return false;
+
+ auto I = llvm::find_if(MBB.terminators(), [this](const MachineInstr &MI) {
+ return MI.getOpcode() == XorTermrOpc;
+ });
+ if (I == MBB.terminators().end())
+ return false;
+
+ MachineInstr &XorTermMI = *I;
+ if (XorTermMI.getOperand(1).getReg() != Register(ExecReg))
+ return false;
+
+ Register SavedExecReg = SaveExecMI.getOperand(0).getReg();
+ Register DstReg = XorTermMI.getOperand(2).getReg();
+
+ // Find potentially unnecessary S_AND
+ MachineInstr *AndExecMI = nullptr;
+ I--;
+ while (I != First && !AndExecMI) {
+ if (I->getOpcode() == AndOpc && I->getOperand(0).getReg() == DstReg &&
+ I->getOperand(1).getReg() == Register(ExecReg))
+ AndExecMI = &*I;
+ I--;
+ }
+ if (!AndExecMI)
+ return false;
+
+ // Check for exec modifying instructions.
+ // Note: exec defs do not create live ranges beyond the
+ // instruction so isDefBetween cannot be used.
+ // Instead just check that the def segments are adjacent.
+ SlotIndex StartIdx = LIS->getInstructionIndex(SaveExecMI);
+ SlotIndex EndIdx = LIS->getInstructionIndex(*AndExecMI);
+ for (MCRegUnitIterator UI(ExecReg, TRI); UI.isValid(); ++UI) {
+ LiveRange &RegUnit = LIS->getRegUnit(*UI);
+ if (RegUnit.find(StartIdx) != std::prev(RegUnit.find(EndIdx)))
+ return false;
+ }
+
+ // Remove unnecessary S_AND
+ LIS->removeInterval(SavedExecReg);
+ LIS->removeInterval(DstReg);
+
+ SaveExecMI.getOperand(0).setReg(DstReg);
+
+ LIS->RemoveMachineInstrFromMaps(*AndExecMI);
+ AndExecMI->eraseFromParent();
+
+ LIS->createAndComputeVirtRegInterval(DstReg);
+
+ return true;
+}
+
bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -211,16 +309,28 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
-
- MachineRegisterInfo &MRI = MF.getRegInfo();
- LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
- DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
- unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ LIS = &getAnalysis<LiveIntervals>();
+
+ const bool Wave32 = ST.isWave32();
+ AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ Andn2Opc = Wave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
+ OrSaveExecOpc =
+ Wave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
+ XorTermrOpc = Wave32 ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
+ CondReg = MCRegister::from(Wave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
+ ExecReg = MCRegister::from(Wave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
+
+ DenseSet<Register> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
- if (unsigned Reg = optimizeVcndVcmpPair(MBB, ST, MRI, LIS)) {
+ if (optimizeElseBranch(MBB)) {
+ RecalcRegs.insert(AMDGPU::SCC);
+ Changed = true;
+ }
+
+ if (Register Reg = optimizeVcndVcmpPair(MBB)) {
RecalcRegs.insert(Reg);
RecalcRegs.insert(AMDGPU::VCC_LO);
RecalcRegs.insert(AMDGPU::VCC_HI);
@@ -301,16 +411,18 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
unsigned ScanThreshold = 10;
for (auto I = MBB.rbegin(), E = MBB.rend(); I != E
&& ScanThreshold--; ++I) {
- if (!isFullExecCopy(*I, ST))
+ // Continue scanning if this is not a full exec copy
+ if (!(I->isFullCopy() && I->getOperand(1).getReg() == Register(ExecReg)))
continue;
Register SavedExec = I->getOperand(0).getReg();
- if (SavedExec.isVirtual() && MRI.hasOneNonDBGUse(SavedExec) &&
- MRI.use_instr_nodbg_begin(SavedExec)->getParent() == I->getParent()) {
+ if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec) &&
+ MRI->use_instr_nodbg_begin(SavedExec)->getParent() ==
+ I->getParent()) {
LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n');
LIS->RemoveMachineInstrFromMaps(*I);
I->eraseFromParent();
- MRI.replaceRegWith(SavedExec, Exec);
+ MRI->replaceRegWith(SavedExec, ExecReg);
LIS->removeInterval(SavedExec);
Changed = true;
}
@@ -320,9 +432,9 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
if (Changed) {
for (auto Reg : RecalcRegs) {
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
LIS->removeInterval(Reg);
- if (!MRI.reg_empty(Reg))
+ if (!MRI->reg_empty(Reg))
LIS->createAndComputeVirtRegInterval(Reg);
} else {
LIS->removeAllRegUnitsForPhysReg(Reg);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 9a1855c3458b..7d7a753bb333 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -10,47 +10,21 @@
///
/// E.g. original:
/// V_LSHRREV_B32_e32 %0, 16, %1
-/// V_ADD_I32_e32 %2, %0, %3
+/// V_ADD_CO_U32_e32 %2, %0, %3
/// V_LSHLREV_B32_e32 %4, 16, %2
///
/// Replace:
-/// V_ADD_I32_sdwa %4, %1, %3
+/// V_ADD_CO_U32_sdwa %4, %1, %3
/// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
///
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/Config/llvm-config.h"
-#include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <memory>
-#include <unordered_map>
using namespace llvm;
@@ -570,8 +544,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Register::isPhysicalRegister(Src1->getReg()) ||
- Register::isPhysicalRegister(Dst->getReg()))
+ if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
break;
if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
@@ -609,8 +582,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Register::isPhysicalRegister(Src1->getReg()) ||
- Register::isPhysicalRegister(Dst->getReg()))
+ if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
break;
if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
@@ -625,8 +597,8 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
break;
}
- case AMDGPU::V_BFE_I32:
- case AMDGPU::V_BFE_U32: {
+ case AMDGPU::V_BFE_I32_e64:
+ case AMDGPU::V_BFE_U32_e64: {
// e.g.:
// from: v_bfe_u32 v1, v0, 8, 8
// to SDWA src:v0 src_sel:BYTE_1
@@ -673,12 +645,11 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Register::isPhysicalRegister(Src0->getReg()) ||
- Register::isPhysicalRegister(Dst->getReg()))
+ if (Src0->getReg().isPhysical() || Dst->getReg().isPhysical())
break;
return std::make_unique<SDWASrcOperand>(
- Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32);
+ Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32_e64);
}
case AMDGPU::V_AND_B32_e32:
@@ -702,8 +673,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Register::isPhysicalRegister(ValSrc->getReg()) ||
- Register::isPhysicalRegister(Dst->getReg()))
+ if (ValSrc->getReg().isPhysical() || Dst->getReg().isPhysical())
break;
return std::make_unique<SDWASrcOperand>(
@@ -863,19 +833,19 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
}
// Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and
-// V_ADD_I32_e64 into V_ADD_I32_e32. This allows isConvertibleToSDWA
-// to perform its transformation on V_ADD_I32_e32 into V_ADD_I32_sdwa.
+// V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows isConvertibleToSDWA
+// to perform its transformation on V_ADD_CO_U32_e32 into V_ADD_CO_U32_sdwa.
//
// We are transforming from a VOP3 into a VOP2 form of the instruction.
// %19:vgpr_32 = V_AND_B32_e32 255,
// killed %16:vgpr_32, implicit $exec
-// %47:vgpr_32, %49:sreg_64_xexec = V_ADD_I32_e64
+// %47:vgpr_32, %49:sreg_64_xexec = V_ADD_CO_U32_e64
// %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
// %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
// %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
//
// becomes
-// %47:vgpr_32 = V_ADD_I32_sdwa
+// %47:vgpr_32 = V_ADD_CO_U32_sdwa
// 0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
// implicit-def $vcc, implicit $exec
// %48:vgpr_32 = V_ADDC_U32_e32
@@ -883,8 +853,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
const GCNSubtarget &ST) const {
int Opc = MI.getOpcode();
- assert((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64) &&
- "Currently only handles V_ADD_I32_e64 or V_SUB_I32_e64");
+ assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&
+ "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
// Can the candidate MI be shrunk?
if (!TII->canShrink(MI, *MRI))
@@ -992,6 +962,16 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
if (Opc == AMDGPU::V_CNDMASK_B32_e32)
return false;
+ if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
+ if (!Src0->isReg() && !Src0->isImm())
+ return false;
+ }
+
+ if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {
+ if (!Src1->isReg() && !Src1->isImm())
+ return false;
+ }
+
return true;
}
@@ -1235,8 +1215,8 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
const auto &Operand = OperandPair.second;
MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
if (PotentialMI &&
- (PotentialMI->getOpcode() == AMDGPU::V_ADD_I32_e64 ||
- PotentialMI->getOpcode() == AMDGPU::V_SUB_I32_e64))
+ (PotentialMI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
+ PotentialMI->getOpcode() == AMDGPU::V_SUB_CO_U32_e64))
pseudoOpConvertToVOP2(*PotentialMI, ST);
}
SDWAOperands.clear();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index 4c72fa235975..ab05081e55d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -13,13 +13,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index 09dfe8753792..dc08d9dcb9bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -12,19 +12,13 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -92,11 +86,10 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
return false;
Register Reg = MO.getReg();
-
- if (!TRI->isVGPR(*MRI, Reg))
+ if (Reg.isPhysical())
return false;
- if (Register::isPhysicalRegister(Reg))
+ if (!TRI->isVGPR(*MRI, Reg))
return false;
if (VRM->hasPhys(Reg))
@@ -104,7 +97,7 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
LiveInterval &LI = LIS->getInterval(Reg);
- for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
+ for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
if (!MRI->isPhysRegUsed(PhysReg) &&
Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
Matrix->assign(LI, PhysReg);
@@ -126,7 +119,7 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
continue;
const Register VirtReg = MO.getReg();
- if (Register::isPhysicalRegister(VirtReg))
+ if (VirtReg.isPhysical())
continue;
if (!VRM->hasPhys(VirtReg))
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 442be886a8ac..9ca43512cd91 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -12,12 +12,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -70,6 +68,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
+ const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
E = MBB.rend();
@@ -136,9 +135,20 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
if (A->getOpcode() == AndN2)
MaskValue = ~MaskValue;
- if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
- MI.killsRegister(CondReg, TRI))
+ if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC)) {
+ if (!MI.killsRegister(CondReg, TRI)) {
+ // Replace AND with MOV
+ if (MaskValue == 0) {
+ BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
+ .addImm(0);
+ } else {
+ BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
+ .addReg(ExecReg);
+ }
+ }
+ // Remove AND instruction
A->eraseFromParent();
+ }
bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
if (SReg == ExecReg) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
new file mode 100644
index 000000000000..877c8b81b2c0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -0,0 +1,56 @@
+//===-- SIProgramInfo.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// The SIProgramInfo tracks resource usage and hardware flags for kernels and
+/// entry functions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "SIProgramInfo.h"
+#include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
+
+using namespace llvm;
+
+uint64_t SIProgramInfo::getComputePGMRSrc1() const {
+ return S_00B848_VGPRS(VGPRBlocks) | S_00B848_SGPRS(SGPRBlocks) |
+ S_00B848_PRIORITY(Priority) | S_00B848_FLOAT_MODE(FloatMode) |
+ S_00B848_PRIV(Priv) | S_00B848_DX10_CLAMP(DX10Clamp) |
+ S_00B848_DEBUG_MODE(DebugMode) | S_00B848_IEEE_MODE(IEEEMode) |
+ S_00B848_WGP_MODE(WgpMode) | S_00B848_MEM_ORDERED(MemOrdered);
+}
+
+uint64_t SIProgramInfo::getPGMRSrc1(CallingConv::ID CC) const {
+ if (AMDGPU::isCompute(CC)) {
+ return getComputePGMRSrc1();
+ }
+ uint64_t Reg = S_00B848_VGPRS(VGPRBlocks) | S_00B848_SGPRS(SGPRBlocks) |
+ S_00B848_PRIORITY(Priority) | S_00B848_FLOAT_MODE(FloatMode) |
+ S_00B848_PRIV(Priv) | S_00B848_DX10_CLAMP(DX10Clamp) |
+ S_00B848_DEBUG_MODE(DebugMode) | S_00B848_IEEE_MODE(IEEEMode);
+ switch (CC) {
+ case CallingConv::AMDGPU_PS:
+ Reg |= S_00B028_MEM_ORDERED(MemOrdered);
+ break;
+ case CallingConv::AMDGPU_VS:
+ Reg |= S_00B128_MEM_ORDERED(MemOrdered);
+ break;
+ case CallingConv::AMDGPU_GS:
+ Reg |= S_00B228_WGP_MODE(WgpMode) | S_00B228_MEM_ORDERED(MemOrdered);
+ break;
+ case CallingConv::AMDGPU_HS:
+ Reg |= S_00B428_WGP_MODE(WgpMode) | S_00B428_MEM_ORDERED(MemOrdered);
+ break;
+ default:
+ break;
+ }
+ return Reg;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index 7c039a54b57f..9b72d0829d80 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -7,7 +7,8 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// Defines struct to track resource usage for kernels and entry functions.
+/// Defines struct to track resource usage and hardware flags for kernels and
+/// entry functions.
///
//
//===----------------------------------------------------------------------===//
@@ -15,6 +16,9 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H
+#include "llvm/IR/CallingConv.h"
+#include <cstdint>
+
namespace llvm {
/// Track resource usage for kernels / entry functions.
@@ -32,8 +36,6 @@ struct SIProgramInfo {
uint32_t MemOrdered = 0; // GFX10+
uint64_t ScratchSize = 0;
- uint64_t ComputePGMRSrc1 = 0;
-
// Fields set in PGM_RSRC2 pm4 packet.
uint32_t LDSBlocks = 0;
uint32_t ScratchBlocks = 0;
@@ -64,6 +66,10 @@ struct SIProgramInfo {
bool VCCUsed = false;
SIProgramInfo() = default;
+
+ /// Compute the value of the ComputePGMRsrc1 register.
+ uint64_t getComputePGMRSrc1() const;
+ uint64_t getPGMRSrc1(CallingConv::ID CC) const;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 5d6009ebf384..7a45d8c54f9a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -12,21 +12,15 @@
//===----------------------------------------------------------------------===//
#include "SIRegisterInfo.h"
+#include "AMDGPU.h"
#include "AMDGPURegisterBankInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include <vector>
using namespace llvm;
@@ -40,6 +34,14 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
cl::init(true));
std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
+std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
+
+// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
+// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
+// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
+// meaning index 7 in SubRegFromChannelTable.
+static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
: AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
@@ -53,7 +55,8 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
"getNumCoveredRegs() will not work with generated subreg masks!");
RegPressureIgnoredUnits.resize(getNumRegUnits());
- RegPressureIgnoredUnits.set(*MCRegUnitIterator(AMDGPU::M0, this));
+ RegPressureIgnoredUnits.set(
+ *MCRegUnitIterator(MCRegister::from(AMDGPU::M0), this));
for (auto Reg : AMDGPU::VGPR_HI16RegClass)
RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this));
@@ -78,8 +81,28 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
}
};
+ static llvm::once_flag InitializeSubRegFromChannelTableFlag;
+
+ static auto InitializeSubRegFromChannelTableOnce = [this]() {
+ for (auto &Row : SubRegFromChannelTable)
+ Row.fill(AMDGPU::NoSubRegister);
+ for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
+ unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
+ unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
+ assert(Width < SubRegFromChannelTableWidthMap.size());
+ Width = SubRegFromChannelTableWidthMap[Width];
+ if (Width == 0)
+ continue;
+ unsigned TableIdx = Width - 1;
+ assert(TableIdx < SubRegFromChannelTable.size());
+ assert(Offset < SubRegFromChannelTable[TableIdx].size());
+ SubRegFromChannelTable[TableIdx][Offset] = Idx;
+ }
+ };
llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
+ llvm::call_once(InitializeSubRegFromChannelTableFlag,
+ InitializeSubRegFromChannelTableOnce);
}
void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
@@ -98,6 +121,7 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
+ case CallingConv::AMDGPU_Gfx:
return CSR_AMDGPU_HighRegs_SaveList;
default: {
// Dummy to not crash RegisterClassInfo.
@@ -118,12 +142,17 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
+ case CallingConv::AMDGPU_Gfx:
return CSR_AMDGPU_HighRegs_RegMask;
default:
return nullptr;
}
}
+const uint32_t *SIRegisterInfo::getNoPreservedMask() const {
+ return CSR_AMDGPU_NoRegs_RegMask;
+}
+
Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const SIFrameLowering *TFI =
MF.getSubtarget<GCNSubtarget>().getFrameLowering();
@@ -156,71 +185,13 @@ const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
}
-// FIXME: TableGen should generate something to make this manageable for all
-// register classes. At a minimum we could use the opposite of
-// composeSubRegIndices and go up from the base 32-bit subreg.
unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
unsigned NumRegs) {
- // Table of NumRegs sized pieces at every 32-bit offset.
- static const uint16_t SubRegFromChannelTable[][32] = {
- {AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
- AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
- AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
- AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
- AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31},
- {AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3,
- AMDGPU::sub3_sub4, AMDGPU::sub4_sub5, AMDGPU::sub5_sub6,
- AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, AMDGPU::sub8_sub9,
- AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
- AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15,
- AMDGPU::sub15_sub16, AMDGPU::sub16_sub17, AMDGPU::sub17_sub18,
- AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, AMDGPU::sub20_sub21,
- AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
- AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27,
- AMDGPU::sub27_sub28, AMDGPU::sub28_sub29, AMDGPU::sub29_sub30,
- AMDGPU::sub30_sub31, AMDGPU::NoSubRegister},
- {AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3,
- AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
- AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7,
- AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
- AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11,
- AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
- AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15,
- AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
- AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19,
- AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
- AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23,
- AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
- AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27,
- AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
- AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31,
- AMDGPU::NoSubRegister, AMDGPU::NoSubRegister},
- {AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4,
- AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
- AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8,
- AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
- AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12,
- AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
- AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16,
- AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
- AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20,
- AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
- AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24,
- AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
- AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28,
- AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
- AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister,
- AMDGPU::NoSubRegister, AMDGPU::NoSubRegister}};
-
- const unsigned NumRegIndex = NumRegs - 1;
-
- assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
- "Not implemented");
- assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
- return SubRegFromChannelTable[NumRegIndex][Channel];
+ assert(NumRegs < SubRegFromChannelTableWidthMap.size());
+ unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
+ assert(NumRegIndex && "Not implemented");
+ assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
+ return SubRegFromChannelTable[NumRegIndex - 1][Channel];
}
MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
@@ -322,7 +293,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
+ Register ScratchRSrcReg = MFI->getScratchRSrcReg();
if (ScratchRSrcReg != AMDGPU::NoRegister) {
// Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
// to spill.
@@ -363,9 +334,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
reserveRegisterTuples(Reserved, Reg);
- if (MFI->VGPRReservedForSGPRSpill)
- for (auto SSpill : MFI->getSGPRSpillVGPRs())
- reserveRegisterTuples(Reserved, SSpill.VGPR);
+ for (auto SSpill : MFI->getSGPRSpillVGPRs())
+ reserveRegisterTuples(Reserved, SSpill.VGPR);
return Reserved;
}
@@ -415,8 +385,8 @@ bool SIRegisterInfo::requiresVirtualBaseRegisters(
return true;
}
-int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
- assert(SIInstrInfo::isMUBUF(*MI));
+int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const {
+ assert(SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI));
int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::offset);
@@ -425,29 +395,34 @@ int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const {
- if (!SIInstrInfo::isMUBUF(*MI))
+ if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
return 0;
- assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::vaddr) &&
+ assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::vaddr) ||
+ (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::saddr))) &&
"Should never see frame index on non-address operand");
- return getMUBUFInstrOffset(MI);
+ return getScratchInstrOffset(MI);
}
bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
if (!MI->mayLoadOrStore())
return false;
- int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
+ int64_t FullOffset = Offset + getScratchInstrOffset(MI);
- return !isUInt<12>(FullOffset);
+ if (SIInstrInfo::isMUBUF(*MI))
+ return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset);
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS, true);
}
-void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- Register BaseReg,
- int FrameIdx,
- int64_t Offset) const {
+Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ int FrameIdx,
+ int64_t Offset) const {
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
@@ -456,32 +431,50 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
MachineFunction *MF = MBB->getParent();
const SIInstrInfo *TII = ST.getInstrInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
+ : AMDGPU::V_MOV_B32_e32;
+
+ Register BaseReg = MRI.createVirtualRegister(
+ ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
+ : &AMDGPU::VGPR_32RegClass);
if (Offset == 0) {
- BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
+ BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
.addFrameIndex(FrameIdx);
- return;
+ return BaseReg;
}
- MachineRegisterInfo &MRI = MF->getRegInfo();
Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- Register FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register FIReg = MRI.createVirtualRegister(
+ ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
+ : &AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
.addImm(Offset);
- BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
+ BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
.addFrameIndex(FrameIdx);
+ if (ST.enableFlatScratch() ) {
+ BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_U32), BaseReg)
+ .addReg(OffsetReg, RegState::Kill)
+ .addReg(FIReg);
+ return BaseReg;
+ }
+
TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
.addReg(OffsetReg, RegState::Kill)
.addReg(FIReg)
.addImm(0); // clamp bit
+
+ return BaseReg;
}
void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
const SIInstrInfo *TII = ST.getInstrInfo();
+ bool IsFlat = TII->isFLATScratch(MI);
#ifndef NDEBUG
// FIXME: Is it possible to be storing a frame index to itself?
@@ -496,20 +489,31 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
}
#endif
- MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
-#ifndef NDEBUG
- MachineBasicBlock *MBB = MI.getParent();
- MachineFunction *MF = MBB->getParent();
-#endif
- assert(FIOp && FIOp->isFI() && "frame index must be address operand");
- assert(TII->isMUBUF(MI));
- assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
- MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg() &&
- "should only be seeing stack pointer offset relative FrameIndex");
+ MachineOperand *FIOp =
+ TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
+ : AMDGPU::OpName::vaddr);
MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
int64_t NewOffset = OffsetOp->getImm() + Offset;
- assert(isUInt<12>(NewOffset) && "offset should be legal");
+
+ assert(FIOp && FIOp->isFI() && "frame index must be address operand");
+ assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI));
+
+ if (IsFlat) {
+ assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, true) &&
+ "offset should be legal");
+ FIOp->ChangeToRegister(BaseReg, false);
+ OffsetOp->setImm(NewOffset);
+ return;
+ }
+
+#ifndef NDEBUG
+ MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
+ assert(SOffset->isImm() && SOffset->getImm() == 0);
+#endif
+
+ assert(SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
+ "offset should be legal");
FIOp->ChangeToRegister(BaseReg, false);
OffsetOp->setImm(NewOffset);
@@ -518,12 +522,16 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
Register BaseReg,
int64_t Offset) const {
- if (!SIInstrInfo::isMUBUF(*MI))
+ if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
return false;
- int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
+ int64_t NewOffset = Offset + getScratchInstrOffset(MI);
+
+ if (SIInstrInfo::isMUBUF(*MI))
+ return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset);
- return isUInt<12>(NewOffset);
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, true);
}
const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
@@ -555,16 +563,22 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V256_RESTORE:
+ case AMDGPU::SI_SPILL_A256_SAVE:
+ case AMDGPU::SI_SPILL_A256_RESTORE:
return 8;
case AMDGPU::SI_SPILL_S192_SAVE:
case AMDGPU::SI_SPILL_S192_RESTORE:
case AMDGPU::SI_SPILL_V192_SAVE:
case AMDGPU::SI_SPILL_V192_RESTORE:
+ case AMDGPU::SI_SPILL_A192_SAVE:
+ case AMDGPU::SI_SPILL_A192_RESTORE:
return 6;
case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_V160_SAVE:
case AMDGPU::SI_SPILL_V160_RESTORE:
+ case AMDGPU::SI_SPILL_A160_SAVE:
+ case AMDGPU::SI_SPILL_A160_RESTORE:
return 5;
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S128_RESTORE:
@@ -577,6 +591,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_S96_RESTORE:
case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V96_RESTORE:
+ case AMDGPU::SI_SPILL_A96_SAVE:
+ case AMDGPU::SI_SPILL_A96_RESTORE:
return 3;
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S64_RESTORE:
@@ -672,11 +688,13 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
unsigned Dst = IsStore ? Reg : ValueReg;
unsigned Src = IsStore ? ValueReg : Reg;
- unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
- : AMDGPU::V_ACCVGPR_READ_B32;
+ unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
+ : AMDGPU::V_ACCVGPR_READ_B32_e64;
- return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
- .addReg(Src, getKillRegState(IsKill));
+ auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
+ .addReg(Src, getKillRegState(IsKill));
+ MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ return MIB;
}
// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
@@ -721,12 +739,46 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
return true;
}
+static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
+ unsigned LoadStoreOp,
+ unsigned EltSize) {
+ bool IsStore = TII->get(LoadStoreOp).mayStore();
+ bool UseST =
+ AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 &&
+ AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0;
+
+ switch (EltSize) {
+ case 4:
+ LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
+ : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
+ break;
+ case 8:
+ LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
+ : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
+ break;
+ case 12:
+ LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
+ : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
+ break;
+ case 16:
+ LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
+ : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
+ break;
+ default:
+ llvm_unreachable("Unexpected spill load/store size!");
+ }
+
+ if (UseST)
+ LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
+
+ return LoadStoreOp;
+}
+
void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
- MCRegister ScratchRsrcReg,
MCRegister ScratchOffsetReg,
int64_t InstOffset,
MachineMemOperand *MMO,
@@ -737,36 +789,51 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
const MachineFrameInfo &MFI = MF->getFrameInfo();
const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
- const MCInstrDesc &Desc = TII->get(LoadStoreOp);
+ const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
const DebugLoc &DL = MI->getDebugLoc();
- bool IsStore = Desc.mayStore();
+ bool IsStore = Desc->mayStore();
+ bool IsFlat = TII->isFLATScratch(LoadStoreOp);
bool Scavenged = false;
MCRegister SOffset = ScratchOffsetReg;
- const unsigned EltSize = 4;
const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
- unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
+ const bool IsAGPR = hasAGPRs(RC);
+ const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
+
+ // Always use 4 byte operations for AGPRs because we need to scavenge
+ // a temporary VGPR.
+ unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
+ unsigned NumSubRegs = RegWidth / EltSize;
unsigned Size = NumSubRegs * EltSize;
+ unsigned RemSize = RegWidth - Size;
+ unsigned NumRemSubRegs = RemSize ? 1 : 0;
int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
+ int64_t MaxOffset = Offset + Size + RemSize - EltSize;
int64_t ScratchOffsetRegDelta = 0;
+ if (IsFlat && EltSize > 4) {
+ LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
+ Desc = &TII->get(LoadStoreOp);
+ }
+
Align Alignment = MFI.getObjectAlign(Index);
const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
- Register TmpReg =
- hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
- : Register();
+ assert((IsFlat || ((Offset % EltSize) == 0)) &&
+ "unexpected VGPR spill offset");
- assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
-
- if (!isUInt<12>(Offset + Size - EltSize)) {
+ bool IsOffsetLegal = IsFlat
+ ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, true)
+ : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset);
+ if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
SOffset = MCRegister();
// We currently only support spilling VGPRs to EltSize boundaries, meaning
// we can simplify the adjustment of Offset here to just scale with
// WavefrontSize.
- Offset *= ST.getWavefrontSize();
+ if (!IsFlat)
+ Offset *= ST.getWavefrontSize();
// We don't have access to the register scavenger if this function is called
// during PEI::scavengeFrameVirtualRegs().
@@ -804,10 +871,30 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
Offset = 0;
}
- for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
- Register SubReg = NumSubRegs == 1
- ? Register(ValueReg)
- : getSubReg(ValueReg, getSubRegFromChannel(i));
+ if (IsFlat && SOffset == AMDGPU::NoRegister) {
+ assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
+ && "Unexpected vaddr for flat scratch with a FI operand");
+
+ assert(ST.hasFlatScratchSTMode());
+ LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
+ Desc = &TII->get(LoadStoreOp);
+ }
+
+ Register TmpReg;
+
+ for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
+ ++i, RegOffset += EltSize) {
+ if (i == NumSubRegs) {
+ EltSize = RemSize;
+ LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
+ }
+ Desc = &TII->get(LoadStoreOp);
+
+ unsigned NumRegs = EltSize / 4;
+ Register SubReg = e == 1
+ ? ValueReg
+ : Register(getSubReg(ValueReg,
+ getSubRegFromChannel(RegOffset / 4, NumRegs)));
unsigned SOffsetRegState = 0;
unsigned SrcDstRegState = getDefRegState(!IsStore);
@@ -817,46 +904,111 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
SrcDstRegState |= getKillRegState(IsKill);
}
- auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill);
-
- if (!MIB.getInstr()) {
- unsigned FinalReg = SubReg;
- if (TmpReg != AMDGPU::NoRegister) {
- if (IsStore)
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
- .addReg(SubReg, getKillRegState(IsKill));
- SubReg = TmpReg;
+ // Make sure the whole register is defined if there are undef components by
+ // adding an implicit def of the super-reg on the first instruction.
+ bool NeedSuperRegDef = e > 1 && IsStore && i == 0;
+ bool NeedSuperRegImpOperand = e > 1;
+
+ unsigned Lane = RegOffset / 4;
+ unsigned LaneE = (RegOffset + EltSize) / 4;
+ for ( ; Lane != LaneE; ++Lane) {
+ bool IsSubReg = e > 1 || EltSize > 4;
+ Register Sub = IsSubReg
+ ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
+ : ValueReg;
+ auto MIB = spillVGPRtoAGPR(ST, MI, Index, Lane, Sub, IsKill);
+ if (!MIB.getInstr())
+ break;
+ if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
+ MIB.addReg(ValueReg, RegState::ImplicitDefine);
+ NeedSuperRegDef = false;
+ }
+ if (IsSubReg || NeedSuperRegImpOperand) {
+ NeedSuperRegImpOperand = true;
+ unsigned State = SrcDstRegState;
+ if (Lane + 1 != LaneE)
+ State &= ~RegState::Kill;
+ MIB.addReg(ValueReg, RegState::Implicit | State);
}
+ }
- MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
- MachineMemOperand *NewMMO =
- MF->getMachineMemOperand(PInfo, MMO->getFlags(), EltSize,
- commonAlignment(Alignment, EltSize * i));
+ if (Lane == LaneE) // Fully spilled into AGPRs.
+ continue;
+
+ // Offset in bytes from the beginning of the ValueReg to its portion we
+ // still need to spill. It may differ from RegOffset if a portion of
+ // current SubReg has been already spilled into AGPRs by the loop above.
+ unsigned RemRegOffset = Lane * 4;
+ unsigned RemEltSize = EltSize - (RemRegOffset - RegOffset);
+ if (RemEltSize != EltSize) { // Partially spilled to AGPRs
+ assert(IsFlat && EltSize > 4);
+
+ unsigned NumRegs = RemEltSize / 4;
+ SubReg = Register(getSubReg(ValueReg,
+ getSubRegFromChannel(RemRegOffset / 4, NumRegs)));
+ unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
+ Desc = &TII->get(Opc);
+ }
- MIB = BuildMI(*MBB, MI, DL, Desc)
- .addReg(SubReg,
- getDefRegState(!IsStore) | getKillRegState(IsKill))
- .addReg(ScratchRsrcReg);
- if (SOffset == AMDGPU::NoRegister) {
- MIB.addImm(0);
- } else {
- MIB.addReg(SOffset, SOffsetRegState);
+ unsigned FinalReg = SubReg;
+
+ if (IsAGPR) {
+ assert(EltSize == 4);
+
+ if (!TmpReg) {
+ assert(RS && "Needs to have RegScavenger to spill an AGPR!");
+ // FIXME: change to scavengeRegisterBackwards()
+ TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+ RS->setRegUsed(TmpReg);
}
- MIB.addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .addImm(0) // dlc
- .addImm(0) // swz
- .addMemOperand(NewMMO);
+ if (IsStore) {
+ auto AccRead = BuildMI(*MBB, MI, DL,
+ TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
+ .addReg(SubReg, getKillRegState(IsKill));
+ if (NeedSuperRegDef)
+ AccRead.addReg(ValueReg, RegState::ImplicitDefine);
+ AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ }
+ SubReg = TmpReg;
+ }
+
+ MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RemRegOffset);
+ MachineMemOperand *NewMMO =
+ MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
+ commonAlignment(Alignment, RemRegOffset));
+
+ auto MIB = BuildMI(*MBB, MI, DL, *Desc)
+ .addReg(SubReg,
+ getDefRegState(!IsStore) | getKillRegState(IsKill));
+ if (!IsFlat)
+ MIB.addReg(FuncInfo->getScratchRSrcReg());
- if (!IsStore && TmpReg != AMDGPU::NoRegister)
- MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
- FinalReg)
- .addReg(TmpReg, RegState::Kill);
+ if (SOffset == AMDGPU::NoRegister) {
+ if (!IsFlat)
+ MIB.addImm(0);
+ } else {
+ MIB.addReg(SOffset, SOffsetRegState);
+ }
+ MIB.addImm(Offset + RemRegOffset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0); // tfe for MUBUF or dlc for FLAT
+ if (!IsFlat)
+ MIB.addImm(0) // dlc
+ .addImm(0); // swz
+ MIB.addMemOperand(NewMMO);
+
+ if (!IsAGPR && NeedSuperRegDef)
+ MIB.addReg(ValueReg, RegState::ImplicitDefine);
+
+ if (!IsStore && TmpReg != AMDGPU::NoRegister) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
+ FinalReg)
+ .addReg(TmpReg, RegState::Kill);
+ MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
}
- if (NumSubRegs > 1)
+ if (NeedSuperRegImpOperand)
MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
}
@@ -907,9 +1059,10 @@ void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI,
// Backup EXEC
if (OnlyExecLo) {
- SavedExecReg = NumSubRegs == 1
- ? SuperReg
- : getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]);
+ SavedExecReg =
+ NumSubRegs == 1
+ ? SuperReg
+ : Register(getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]));
} else {
// If src/dst is an odd size it is possible subreg0 is not aligned.
for (; ExecLane < (NumSubRegs - 1); ++ExecLane) {
@@ -942,15 +1095,19 @@ void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI,
EltSize, Alignment);
if (IsLoad) {
- buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+ unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
+ : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
+ buildSpillLoadStore(MI, Opc,
Index,
VGPR, false,
- MFI->getScratchRSrcReg(), FrameReg,
+ FrameReg,
Offset * EltSize, MMO,
RS);
} else {
- buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, Index, VGPR,
- IsKill, MFI->getScratchRSrcReg(), FrameReg,
+ unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
+ : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
+ buildSpillLoadStore(MI, Opc, Index, VGPR,
+ IsKill, FrameReg,
Offset * EltSize, MMO, RS);
// This only ever adds one VGPR spill
MFI->addToSpilledVGPRs(1);
@@ -966,15 +1123,15 @@ void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI,
} else if (!IsKill) {
// Restore SGPRs from appropriate VGPR lanes
if (!OnlyExecLo) {
- BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32),
getSubReg(SuperReg, SplitParts[FirstPart + ExecLane + 1]))
.addReg(VGPR)
.addImm(ExecLane + 1);
}
- BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
- NumSubRegs == 1
- ? SavedExecReg
- : getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]))
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32),
+ NumSubRegs == 1 ? SavedExecReg
+ : Register(getSubReg(
+ SuperReg, SplitParts[FirstPart + ExecLane])))
.addReg(VGPR, RegState::Kill)
.addImm(ExecLane);
}
@@ -987,7 +1144,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- DenseSet<Register> SGPRSpillVGPRDefinedSet;
ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
= MFI->getSGPRToVGPRSpills(Index);
@@ -1016,25 +1172,29 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
if (SpillToVGPR) {
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- Register SubReg =
- NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
+ Register SubReg = NumSubRegs == 1
+ ? SuperReg
+ : Register(getSubReg(SuperReg, SplitParts[i]));
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
- // During SGPR spilling to VGPR, determine if the VGPR is defined. The
- // only circumstance in which we say it is undefined is when it is the
- // first spill to this VGPR in the first basic block.
- bool VGPRDefined = true;
- if (MBB == &MF->front())
- VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
+ bool UseKill = IsKill && i == NumSubRegs - 1;
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
- BuildMI(*MBB, MI, DL,
- TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
- Spill.VGPR)
- .addReg(SubReg, getKillRegState(IsKill))
- .addImm(Spill.Lane)
- .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
+ auto MIB =
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
+ .addReg(SubReg, getKillRegState(UseKill))
+ .addImm(Spill.Lane)
+ .addReg(Spill.VGPR);
+
+ if (i == 0 && NumSubRegs > 1) {
+ // We may be spilling a super-register which is only partially defined,
+ // and need to ensure later spills think the value is defined.
+ MIB.addReg(SuperReg, RegState::ImplicitDefine);
+ }
+
+ if (NumSubRegs > 1)
+ MIB.addReg(SuperReg, getKillRegState(UseKill) | RegState::Implicit);
// FIXME: Since this spills to another register instead of an actual
// frame index, we should delete the frame index when all references to
@@ -1060,13 +1220,12 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
for (unsigned i = Offset * PerVGPR,
e = std::min((Offset + 1) * PerVGPR, NumSubRegs);
i < e; ++i) {
- Register SubReg =
- NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
+ Register SubReg = NumSubRegs == 1
+ ? SuperReg
+ : Register(getSubReg(SuperReg, SplitParts[i]));
MachineInstrBuilder WriteLane =
- BuildMI(*MBB, MI, DL,
- TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
- TmpVGPR)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), TmpVGPR)
.addReg(SubReg, SubKillState)
.addImm(i % PerVGPR)
.addReg(TmpVGPR, TmpVGPRFlags);
@@ -1126,15 +1285,14 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
if (SpillToVGPR) {
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- Register SubReg =
- NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
+ Register SubReg = NumSubRegs == 1
+ ? SuperReg
+ : Register(getSubReg(SuperReg, SplitParts[i]));
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
- auto MIB =
- BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
- SubReg)
- .addReg(Spill.VGPR)
- .addImm(Spill.Lane);
+ auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane);
if (NumSubRegs > 1 && i == 0)
MIB.addReg(SuperReg, RegState::ImplicitDefine);
}
@@ -1155,13 +1313,13 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
for (unsigned i = Offset * PerVGPR,
e = std::min((Offset + 1) * PerVGPR, NumSubRegs);
i < e; ++i) {
- Register SubReg =
- NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
+ Register SubReg = NumSubRegs == 1
+ ? SuperReg
+ : Register(getSubReg(SuperReg, SplitParts[i]));
bool LastSubReg = (i + 1 == e);
auto MIB =
- BuildMI(*MBB, MI, DL,
- TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
.addReg(TmpVGPR, getKillRegState(LastSubReg))
.addImm(i);
if (NumSubRegs > 1 && i == 0)
@@ -1259,6 +1417,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V1024_SAVE:
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V192_SAVE:
case AMDGPU::SI_SPILL_V160_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V96_SAVE:
@@ -1266,7 +1425,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V32_SAVE:
case AMDGPU::SI_SPILL_A1024_SAVE:
case AMDGPU::SI_SPILL_A512_SAVE:
+ case AMDGPU::SI_SPILL_A256_SAVE:
+ case AMDGPU::SI_SPILL_A192_SAVE:
+ case AMDGPU::SI_SPILL_A160_SAVE:
case AMDGPU::SI_SPILL_A128_SAVE:
+ case AMDGPU::SI_SPILL_A96_SAVE:
case AMDGPU::SI_SPILL_A64_SAVE:
case AMDGPU::SI_SPILL_A32_SAVE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
@@ -1274,10 +1437,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
MFI->getStackPtrOffsetReg());
- buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
+ unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
+ : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
+ buildSpillLoadStore(MI, Opc,
Index,
VData->getReg(), VData->isKill(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(),
@@ -1291,12 +1455,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V96_RESTORE:
case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_V160_RESTORE:
+ case AMDGPU::SI_SPILL_V192_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE:
case AMDGPU::SI_SPILL_V1024_RESTORE:
case AMDGPU::SI_SPILL_A32_RESTORE:
case AMDGPU::SI_SPILL_A64_RESTORE:
+ case AMDGPU::SI_SPILL_A96_RESTORE:
case AMDGPU::SI_SPILL_A128_RESTORE:
+ case AMDGPU::SI_SPILL_A160_RESTORE:
+ case AMDGPU::SI_SPILL_A192_RESTORE:
+ case AMDGPU::SI_SPILL_A256_RESTORE:
case AMDGPU::SI_SPILL_A512_RESTORE:
case AMDGPU::SI_SPILL_A1024_RESTORE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
@@ -1304,10 +1473,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
MFI->getStackPtrOffsetReg());
- buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+ unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
+ : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
+ buildSpillLoadStore(MI, Opc,
Index,
VData->getReg(), VData->isKill(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(),
@@ -1318,6 +1488,117 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
default: {
const DebugLoc &DL = MI->getDebugLoc();
+
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ if (ST.enableFlatScratch()) {
+ if (TII->isFLATScratch(*MI)) {
+ assert((int16_t)FIOperandNum ==
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::saddr));
+
+ // The offset is always swizzled, just replace it
+ if (FrameReg)
+ FIOp.ChangeToRegister(FrameReg, false);
+
+ if (!Offset)
+ return;
+
+ MachineOperand *OffsetOp =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
+ int64_t NewOffset = Offset + OffsetOp->getImm();
+ if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
+ true)) {
+ OffsetOp->setImm(NewOffset);
+ if (FrameReg)
+ return;
+ Offset = 0;
+ }
+
+ assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) &&
+ "Unexpected vaddr for flat scratch with a FI operand");
+
+ // On GFX10 we have ST mode to use no registers for an address.
+ // Otherwise we need to materialize 0 into an SGPR.
+ if (!Offset && ST.hasFlatScratchSTMode()) {
+ unsigned Opc = MI->getOpcode();
+ unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
+ MI->RemoveOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
+ MI->setDesc(TII->get(NewOpc));
+ return;
+ }
+ }
+
+ if (!FrameReg) {
+ FIOp.ChangeToImmediate(Offset);
+ if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
+ return;
+ }
+
+ // We need to use register here. Check if we can use an SGPR or need
+ // a VGPR.
+ FIOp.ChangeToRegister(AMDGPU::M0, false);
+ bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
+
+ if (!Offset && FrameReg && UseSGPR) {
+ FIOp.setReg(FrameReg);
+ return;
+ }
+
+ const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
+ : &AMDGPU::VGPR_32RegClass;
+
+ Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR);
+ FIOp.setReg(TmpReg);
+ FIOp.setIsKill(true);
+
+ if ((!FrameReg || !Offset) && TmpReg) {
+ unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+ auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
+ if (FrameReg)
+ MIB.addReg(FrameReg);
+ else
+ MIB.addImm(Offset);
+
+ return;
+ }
+
+ Register TmpSReg =
+ UseSGPR ? TmpReg
+ : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0,
+ !UseSGPR);
+
+ // TODO: for flat scratch another attempt can be made with a VGPR index
+ // if no SGPRs can be scavenged.
+ if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
+ report_fatal_error("Cannot scavenge register in FI elimination!");
+
+ if (!TmpSReg) {
+ // Use frame register and restore it after.
+ TmpSReg = FrameReg;
+ FIOp.setReg(FrameReg);
+ FIOp.setIsKill(false);
+ }
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), TmpSReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+
+ if (!UseSGPR)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ .addReg(TmpSReg, RegState::Kill);
+
+ if (TmpSReg == FrameReg) {
+ // Undo frame register modification.
+ BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_SUB_U32),
+ FrameReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ }
+
+ return;
+ }
+
bool IsMUBUF = TII->isMUBUF(*MI);
if (!IsMUBUF && !MFI->isEntryFunction()) {
@@ -1356,7 +1637,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (!IsVOP2)
MIB.addImm(0); // clamp bit
} else {
- assert(MIB->getOpcode() == AMDGPU::V_ADD_I32_e64 &&
+ assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
"Need to reuse carry out register");
// Use scavenged unused carry out as offset register.
@@ -1419,23 +1700,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
AMDGPU::OpName::vaddr));
auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
- assert((SOffset.isReg() &&
- SOffset.getReg() == MFI->getStackPtrOffsetReg()) ||
- (SOffset.isImm() && SOffset.getImm() == 0));
- if (SOffset.isReg()) {
- if (FrameReg == AMDGPU::NoRegister) {
- SOffset.ChangeToImmediate(0);
- } else {
- SOffset.setReg(FrameReg);
- }
- }
+ assert((SOffset.isImm() && SOffset.getImm() == 0));
+
+ if (FrameReg != AMDGPU::NoRegister)
+ SOffset.ChangeToRegister(FrameReg, false);
int64_t Offset = FrameInfo.getObjectOffset(Index);
int64_t OldImm
= TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
int64_t NewOffset = OldImm + Offset;
- if (isUInt<12>(NewOffset) &&
+ if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
MI->eraseFromParent();
return;
@@ -1445,7 +1720,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// If the offset is simply too big, don't convert to a scratch wave offset
// relative index.
- int64_t Offset = FrameInfo.getObjectOffset(Index);
FIOp.ChangeToImmediate(Offset);
if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
@@ -1590,6 +1864,16 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
return nullptr;
}
+bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
+ Register Reg) const {
+ const TargetRegisterClass *RC;
+ if (Reg.isVirtual())
+ RC = MRI.getRegClass(Reg);
+ else
+ RC = getPhysRegClass(Reg);
+ return isSGPRClass(RC);
+}
+
// TODO: It might be helpful to have some target specific flags in
// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
@@ -1698,6 +1982,12 @@ bool SIRegisterInfo::shouldRewriteCopySrc(
return getCommonSubClass(DefRC, SrcRC) != nullptr;
}
+bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
+ // TODO: 64-bit operands have extending behavior from 32-bit literal.
+ return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
+ OpType <= AMDGPU::OPERAND_REG_IMM_LAST;
+}
+
/// Returns a lowest register that is not used at any point in the function.
/// If all registers are used, then this function will return
/// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return
@@ -1903,7 +2193,8 @@ MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
DefIdx = V->def;
} else {
// Find last def.
- for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
+ for (MCRegUnitIterator Units(Reg.asMCReg(), this); Units.isValid();
+ ++Units) {
LiveRange &LR = LIS->getRegUnit(*Units);
if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
if (!DefIdx.isValid() ||
@@ -1963,11 +2254,12 @@ SIRegisterInfo::getAllSGPR128(const MachineFunction &MF) const {
}
ArrayRef<MCPhysReg>
-SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const {
- return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
+SIRegisterInfo::getAllSGPR64(const MachineFunction &MF) const {
+ return makeArrayRef(AMDGPU::SGPR_64RegClass.begin(),
+ ST.getMaxNumSGPRs(MF) / 2);
}
ArrayRef<MCPhysReg>
-SIRegisterInfo::getAllVGPR32(const MachineFunction &MF) const {
- return makeArrayRef(AMDGPU::VGPR_32RegClass.begin(), ST.getMaxNumVGPRs(MF));
+SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const {
+ return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 62d9f1174337..963da9b3536b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -17,13 +17,11 @@
#define GET_REGINFO_HEADER
#include "AMDGPUGenRegisterInfo.inc"
-#include "SIDefines.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
namespace llvm {
class GCNSubtarget;
class LiveIntervals;
+class RegisterBank;
class SIMachineFunctionInfo;
class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
@@ -40,6 +38,11 @@ private:
/// all elements of the inner vector combined give a full lane mask.
static std::array<std::vector<int16_t>, 16> RegSplitParts;
+ // Table representing sub reg of given width and offset.
+ // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
+ // Second index is 32 different dword offsets.
+ static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable;
+
void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
public:
@@ -63,6 +66,7 @@ public:
const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
+ const uint32_t *getNoPreservedMask() const override;
// Stack access is very expensive. CSRs are also the high registers, and we
// want to minimize the number of used registers.
@@ -83,16 +87,15 @@ public:
const MachineFunction &MF) const override;
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
- int64_t getMUBUFInstrOffset(const MachineInstr *MI) const;
+ int64_t getScratchInstrOffset(const MachineInstr *MI) const;
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
- int FrameIdx,
- int64_t Offset) const override;
+ Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
+ int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
@@ -126,6 +129,7 @@ public:
StringRef getRegAsmName(MCRegister Reg) const override;
+ // Pseudo regs are not allowed
unsigned getHWRegIndex(MCRegister Reg) const {
return getEncodingValue(Reg) & 0xff;
}
@@ -148,14 +152,7 @@ public:
return isSGPRClass(getRegClass(RCID));
}
- bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const {
- const TargetRegisterClass *RC;
- if (Reg.isVirtual())
- RC = MRI.getRegClass(Reg);
- else
- RC = getPhysRegClass(Reg);
- return isSGPRClass(RC);
- }
+ bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
/// \returns true if this class contains only AGPR registers
bool isAGPRClass(const TargetRegisterClass *RC) const {
@@ -198,11 +195,7 @@ public:
/// \returns True if operands defined with this operand type can accept
/// a literal constant (i.e. any 32-bit immediate).
- bool opCanUseLiteralConstant(unsigned OpType) const {
- // TODO: 64-bit operands have extending behavior from 32-bit literal.
- return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
- OpType <= AMDGPU::OPERAND_REG_IMM_LAST;
- }
+ bool opCanUseLiteralConstant(unsigned OpType) const;
/// \returns True if operands defined with this operand type can accept
/// an inline constant. i.e. An integer value in the range (-16, 64) or
@@ -317,13 +310,13 @@ public:
/// of the subtarget.
ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
- /// Return all SGPR32 which satisfy the waves per execution unit requirement
+ /// Return all SGPR64 which satisfy the waves per execution unit requirement
/// of the subtarget.
- ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
+ ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const;
- /// Return all VGPR32 which satisfy the waves per execution unit requirement
+ /// Return all SGPR32 which satisfy the waves per execution unit requirement
/// of the subtarget.
- ArrayRef<MCPhysReg> getAllVGPR32(const MachineFunction &MF) const;
+ ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
@@ -331,7 +324,6 @@ private:
int Index,
Register ValueReg,
bool ValueIsKill,
- MCRegister ScratchRsrcReg,
MCRegister ScratchOffsetReg,
int64_t InstrOffset,
MachineMemOperand *MMO,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index ff1f5c4bc49b..92390f1f3297 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -17,9 +17,7 @@ class Indexes<int N> {
24, 25, 26, 27, 28, 29, 30, 31];
// Returns list of indexes [0..N)
- list<int> slice =
- !foldl([]<int>, all, acc, cur,
- !listconcat(acc, !if(!lt(cur, N), [cur], [])));
+ list<int> slice = !filter(i, all, !lt(i, N));
}
let Namespace = "AMDGPU" in {
@@ -27,17 +25,17 @@ let Namespace = "AMDGPU" in {
def lo16 : SubRegIndex<16, 0>;
def hi16 : SubRegIndex<16, 16>;
-foreach Index = 0-31 in {
+foreach Index = 0...31 in {
def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
}
-foreach Index = 1-31 in {
+foreach Index = 1...31 in {
def sub#Index#_lo16 : ComposedSubRegIndex<!cast<SubRegIndex>(sub#Index), lo16>;
def sub#Index#_hi16 : ComposedSubRegIndex<!cast<SubRegIndex>(sub#Index), hi16>;
}
-foreach Size = {2-6,8,16} in {
- foreach Index = Indexes<!add(33, !mul(Size, -1))>.slice in {
+foreach Size = {2...6,8,16} in {
+ foreach Index = Indexes<!sub(33, Size)>.slice in {
def !foldl("", Indexes<Size>.slice, acc, cur,
!strconcat(acc#!if(!eq(acc,""),"","_"), "sub"#!add(cur, Index))) :
SubRegIndex<!mul(Size, 32), !shl(Index, 5)> {
@@ -89,7 +87,7 @@ class getSubRegs<int size> {
class RegSeqNames<int last_reg, int stride, int size, string prefix,
int start = 0> {
int next = !add(start, stride);
- int end_reg = !add(!add(start, size), -1);
+ int end_reg = !add(start, size, -1);
list<string> ret =
!if(!le(end_reg, last_reg),
!listconcat([prefix # "[" # start # ":" # end_reg # "]"],
@@ -102,7 +100,7 @@ class RegSeqDags<RegisterClass RC, int last_reg, int stride, int size,
int start = 0> {
dag trunc_rc = (trunc RC,
!if(!and(!eq(stride, 1), !eq(start, 0)),
- !add(!add(last_reg, 2), !mul(size, -1)),
+ !sub(!add(last_reg, 2), size),
!add(last_reg, 1)));
list<dag> ret =
!if(!lt(start, size),
@@ -149,7 +147,7 @@ multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
!cast<Register>(NAME#"_HI16")]> {
let Namespace = "AMDGPU";
let SubRegIndices = [lo16, hi16];
- let CoveredBySubRegs = !if(ArtificialHigh,0,1);
+ let CoveredBySubRegs = !not(ArtificialHigh);
let HWEncoding = regIdx;
let HWEncoding{8} = HWEncodingHigh;
}
@@ -247,10 +245,10 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> {
let HWEncoding = 110;
}
-foreach Index = 0-15 in {
- defm TTMP#Index#_vi : SIRegLoHi16<"ttmp"#Index, !add(112, Index)>;
- defm TTMP#Index#_gfx9_gfx10 : SIRegLoHi16<"ttmp"#Index, !add(108, Index)>;
- defm TTMP#Index : SIRegLoHi16<"ttmp"#Index, 0>;
+foreach Index = 0...15 in {
+ defm TTMP#Index#_vi : SIRegLoHi16<"ttmp"#Index, !add(112, Index)>;
+ defm TTMP#Index#_gfx9plus : SIRegLoHi16<"ttmp"#Index, !add(108, Index)>;
+ defm TTMP#Index : SIRegLoHi16<"ttmp"#Index, 0>;
}
multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
@@ -274,7 +272,7 @@ def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>;
def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
// SGPR registers
-foreach Index = 0-105 in {
+foreach Index = 0...105 in {
defm SGPR#Index :
SIRegLoHi16 <"s"#Index, Index>,
DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
@@ -282,14 +280,14 @@ foreach Index = 0-105 in {
}
// VGPR registers
-foreach Index = 0-255 in {
+foreach Index = 0...255 in {
defm VGPR#Index :
SIRegLoHi16 <"v"#Index, Index, 0, 1>,
DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>;
}
// AccVGPR registers
-foreach Index = 0-255 in {
+foreach Index = 0...255 in {
defm AGPR#Index :
SIRegLoHi16 <"a"#Index, Index, 1, 1>,
DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]>;
@@ -389,7 +387,7 @@ def TTMP_512Regs : SIRegisterTuples<getSubRegs<16>.ret, TTMP_32, 15, 4, 16, "ttm
class TmpRegTuplesBase<int index, int size,
list<Register> subRegs,
list<SubRegIndex> indices = getSubRegs<size>.ret,
- int index1 = !add(index, !add(size, -1)),
+ int index1 = !add(index, size, -1),
string name = "ttmp["#index#":"#index1#"]"> :
RegisterWithSubRegs<name, subRegs> {
let HWEncoding = subRegs[0].HWEncoding;
@@ -421,8 +419,8 @@ class TmpRegTuples<string tgt,
getSubRegs<size>.ret>;
foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
- def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
- def TTMP#Index#_TTMP#!add(Index,1)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 2, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#_gfx9plus : TmpRegTuples<"_gfx9plus", 2, Index>;
}
foreach Index = {0, 4, 8, 12} in {
@@ -431,7 +429,7 @@ foreach Index = {0, 4, 8, 12} in {
_TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>;
def TTMP#Index#_TTMP#!add(Index,1)#
_TTMP#!add(Index,2)#
- _TTMP#!add(Index,3)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 4, Index>;
+ _TTMP#!add(Index,3)#_gfx9plus : TmpRegTuples<"_gfx9plus", 4, Index>;
}
foreach Index = {0, 4, 8} in {
@@ -448,7 +446,7 @@ foreach Index = {0, 4, 8} in {
_TTMP#!add(Index,4)#
_TTMP#!add(Index,5)#
_TTMP#!add(Index,6)#
- _TTMP#!add(Index,7)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 8, Index>;
+ _TTMP#!add(Index,7)#_gfx9plus : TmpRegTuples<"_gfx9plus", 8, Index>;
}
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
@@ -458,12 +456,12 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT
TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
-def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9_gfx10 :
+def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9plus :
TmpRegTuplesBase<0, 16,
- [TTMP0_gfx9_gfx10, TTMP1_gfx9_gfx10, TTMP2_gfx9_gfx10, TTMP3_gfx9_gfx10,
- TTMP4_gfx9_gfx10, TTMP5_gfx9_gfx10, TTMP6_gfx9_gfx10, TTMP7_gfx9_gfx10,
- TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10,
- TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>;
+ [TTMP0_gfx9plus, TTMP1_gfx9plus, TTMP2_gfx9plus, TTMP3_gfx9plus,
+ TTMP4_gfx9plus, TTMP5_gfx9plus, TTMP6_gfx9plus, TTMP7_gfx9plus,
+ TTMP8_gfx9plus, TTMP9_gfx9plus, TTMP10_gfx9plus, TTMP11_gfx9plus,
+ TTMP12_gfx9plus, TTMP13_gfx9plus, TTMP14_gfx9plus, TTMP15_gfx9plus]>;
class RegisterTypes<list<ValueType> reg_types> {
list<ValueType> types = reg_types;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
index 64fca0b46797..d30ff4a3fd15 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
@@ -14,9 +14,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
index 932381c99e0b..db4a009e08d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -104,6 +104,9 @@ def HWVALU : ProcResource<1> {
def HWRC : ProcResource<1> { // Register destination cache
let BufferSize = 1;
}
+def HWXDL : ProcResource<1> { // MFMA CU
+ let BufferSize = 0;
+}
class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
int latency> : WriteRes<write, resources> {
@@ -138,12 +141,16 @@ multiclass SICommonWriteRes {
def : HWVALUWriteRes<WriteFloatCvt, 4>;
def : HWVALUWriteRes<WriteTrans32, 4>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
- def : HWVALUWriteRes<Write2PassMAI, 2>;
- def : HWVALUWriteRes<Write8PassMAI, 8>;
- def : HWVALUWriteRes<Write16PassMAI, 16>;
+
+ let ResourceCycles = [2] in
+ def : HWWriteRes<Write2PassMAI, [HWXDL], 2>;
+ let ResourceCycles = [8] in
+ def : HWWriteRes<Write8PassMAI, [HWXDL], 8>;
+ let ResourceCycles = [16] in
+ def : HWWriteRes<Write16PassMAI, [HWXDL], 16>;
def : ReadAdvance<MIVGPRRead, -2>;
- def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>;
+ def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>;
// Technically mfma reads can be from 0 to 4 cycles but that does not make
// sense to model because its register setup is huge. In particular if we
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 9c6833a7dab6..cdb78aae1c4f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -9,19 +9,10 @@
//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-shrink-instructions"
@@ -78,27 +69,25 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
MachineOperand &Src0 = MI.getOperand(Src0Idx);
if (Src0.isReg()) {
Register Reg = Src0.getReg();
- if (Register::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
+ if (Reg.isVirtual() && MRI.hasOneUse(Reg)) {
MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
if (Def && Def->isMoveImmediate()) {
MachineOperand &MovSrc = Def->getOperand(1);
bool ConstantFolded = false;
- if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
- isUInt<32>(MovSrc.getImm()))) {
- // It's possible to have only one component of a super-reg defined by
- // a single mov, so we need to clear any subregister flag.
- Src0.setSubReg(0);
- Src0.ChangeToImmediate(MovSrc.getImm());
- ConstantFolded = true;
- } else if (MovSrc.isFI()) {
- Src0.setSubReg(0);
- Src0.ChangeToFrameIndex(MovSrc.getIndex());
- ConstantFolded = true;
- } else if (MovSrc.isGlobal()) {
- Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
- MovSrc.getTargetFlags());
- ConstantFolded = true;
+ if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
+ if (MovSrc.isImm() &&
+ (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) {
+ Src0.ChangeToImmediate(MovSrc.getImm());
+ ConstantFolded = true;
+ } else if (MovSrc.isFI()) {
+ Src0.ChangeToFrameIndex(MovSrc.getIndex());
+ ConstantFolded = true;
+ } else if (MovSrc.isGlobal()) {
+ Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
+ MovSrc.getTargetFlags());
+ ConstantFolded = true;
+ }
}
if (ConstantFolded) {
@@ -276,8 +265,8 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
// enabled
int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
- unsigned TFEVal = MI.getOperand(TFEIdx).getImm();
- unsigned LWEVal = MI.getOperand(LWEIdx).getImm();
+ unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();
+ unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();
int ToUntie = -1;
if (TFEVal || LWEVal) {
// TFE/LWE is enabled so we need to deal with an implicit tied operand
@@ -367,19 +356,23 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
}
if (NewImm != 0) {
- if (Register::isVirtualRegister(Dest->getReg()) && SrcReg->isReg()) {
+ if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
return true;
}
if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
+ const bool IsUndef = SrcReg->isUndef();
+ const bool IsKill = SrcReg->isKill();
MI.setDesc(TII->get(Opc));
if (Opc == AMDGPU::S_BITSET0_B32 ||
Opc == AMDGPU::S_BITSET1_B32) {
Src0->ChangeToImmediate(NewImm);
// Remove the immediate and add the tied input.
- MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
+ MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false,
+ /*isImp*/ false, IsKill,
+ /*isDead*/ false, IsUndef);
MI.tieOperands(0, 2);
} else {
SrcImm->setImm(NewImm);
@@ -393,17 +386,16 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
// This is the same as MachineInstr::readsRegister/modifiesRegister except
// it takes subregs into account.
static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
- unsigned Reg, unsigned SubReg,
+ Register Reg, unsigned SubReg,
const SIRegisterInfo &TRI) {
for (const MachineOperand &MO : R) {
if (!MO.isReg())
continue;
- if (Register::isPhysicalRegister(Reg) &&
- Register::isPhysicalRegister(MO.getReg())) {
+ if (Reg.isPhysical() && MO.getReg().isPhysical()) {
if (TRI.regsOverlap(Reg, MO.getReg()))
return true;
- } else if (MO.getReg() == Reg && Register::isVirtualRegister(Reg)) {
+ } else if (MO.getReg() == Reg && Reg.isVirtual()) {
LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
TRI.getSubRegIndexLaneMask(MO.getSubReg());
if (Overlap.any())
@@ -426,10 +418,10 @@ static bool instModifiesReg(const MachineInstr *MI,
}
static TargetInstrInfo::RegSubRegPair
-getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
+getSubRegForIndex(Register Reg, unsigned Sub, unsigned I,
const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
} else {
Sub = TRI.getSubRegFromChannel(I + TRI.getChannelFromSubReg(Sub));
@@ -438,6 +430,22 @@ getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
return TargetInstrInfo::RegSubRegPair(Reg, Sub);
}
+static void dropInstructionKeepingImpDefs(MachineInstr &MI,
+ const SIInstrInfo *TII) {
+ for (unsigned i = MI.getDesc().getNumOperands() +
+ MI.getDesc().getNumImplicitUses() +
+ MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &Op = MI.getOperand(i);
+ if (!Op.isDef())
+ continue;
+ BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+ TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
+ }
+
+ MI.eraseFromParent();
+}
+
// Match:
// mov t, x
// mov x, y
@@ -477,18 +485,25 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
if (!TRI.isVGPR(MRI, X))
return nullptr;
+ if (MovT.hasRegisterImplicitUseOperand(AMDGPU::M0))
+ return nullptr;
+
const unsigned SearchLimit = 16;
unsigned Count = 0;
+ bool KilledT = false;
for (auto Iter = std::next(MovT.getIterator()),
E = MovT.getParent()->instr_end();
- Iter != E && Count < SearchLimit; ++Iter, ++Count) {
+ Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
MachineInstr *MovY = &*Iter;
+ KilledT = MovY->killsRegister(T, &TRI);
+
if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
MovY->getOpcode() != AMDGPU::COPY) ||
!MovY->getOperand(1).isReg() ||
MovY->getOperand(1).getReg() != T ||
- MovY->getOperand(1).getSubReg() != Tsub)
+ MovY->getOperand(1).getSubReg() != Tsub ||
+ MovY->hasRegisterImplicitUseOperand(AMDGPU::M0))
continue;
Register Y = MovY->getOperand(0).getReg();
@@ -522,32 +537,53 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
MovX = nullptr;
break;
}
+ // Implicit use of M0 is an indirect move.
+ if (I->hasRegisterImplicitUseOperand(AMDGPU::M0))
+ continue;
+
+ if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
+ continue;
+
MovX = &*I;
}
if (!MovX)
continue;
- LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
+ LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
for (unsigned I = 0; I < Size; ++I) {
TargetInstrInfo::RegSubRegPair X1, Y1;
X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
- BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
- TII->get(AMDGPU::V_SWAP_B32))
+ MachineBasicBlock &MBB = *MovT.getParent();
+ auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
+ TII->get(AMDGPU::V_SWAP_B32))
.addDef(X1.Reg, 0, X1.SubReg)
.addDef(Y1.Reg, 0, Y1.SubReg)
.addReg(Y1.Reg, 0, Y1.SubReg)
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
+ if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
+ // Drop implicit EXEC.
+ MIB->RemoveOperand(MIB->getNumExplicitOperands());
+ MIB->copyImplicitOps(*MBB.getParent(), *MovX);
+ }
}
MovX->eraseFromParent();
- MovY->eraseFromParent();
+ dropInstructionKeepingImpDefs(*MovY, TII);
MachineInstr *Next = &*std::next(MovT.getIterator());
- if (MRI.use_nodbg_empty(T))
- MovT.eraseFromParent();
- else
+
+ if (MRI.use_nodbg_empty(T)) {
+ dropInstructionKeepingImpDefs(MovT, TII);
+ } else {
Xop.setIsKill(false);
+ for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
+ unsigned OpNo = MovT.getNumExplicitOperands() + I;
+ const MachineOperand &Op = MovT.getOperand(OpNo);
+ if (Op.isKill() && TRI.regsOverlap(X, Op.getReg()))
+ MovT.RemoveOperand(OpNo);
+ }
+ }
return Next;
}
@@ -585,8 +621,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// optimizations happen because this will confuse them.
// XXX - not exactly a check for post-regalloc run.
MachineOperand &Src = MI.getOperand(1);
- if (Src.isImm() &&
- Register::isPhysicalRegister(MI.getOperand(0).getReg())) {
+ if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) {
int32_t ReverseImm;
if (isReverseInlineImm(TII, Src, ReverseImm)) {
MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
@@ -604,35 +639,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
}
- // Combine adjacent s_nops to use the immediate operand encoding how long
- // to wait.
- //
- // s_nop N
- // s_nop M
- // =>
- // s_nop (N + M)
- if (MI.getOpcode() == AMDGPU::S_NOP &&
- MI.getNumOperands() == 1 && // Don't merge with implicit operands
- Next != MBB.end() &&
- (*Next).getOpcode() == AMDGPU::S_NOP &&
- (*Next).getNumOperands() == 1) {
-
- MachineInstr &NextMI = *Next;
- // The instruction encodes the amount to wait with an offset of 1,
- // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
- // after adding.
- uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
- uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
-
- // Make sure we don't overflow the bounds.
- if (Nop0 + Nop1 <= 8) {
- NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
- MI.eraseFromParent();
- }
-
- continue;
- }
-
// FIXME: We also need to consider movs of constant operands since
// immediate operands are not folded if they have more than one use, and
// the operand folding pass is unaware if the immediate will be free since
@@ -652,7 +658,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// FIXME: This could work better if hints worked with subregisters. If
// we have a vector add of a constant, we usually don't get the correct
// allocation due to the subregister usage.
- if (Register::isVirtualRegister(Dest->getReg()) && Src0->isReg()) {
+ if (Dest->getReg().isVirtual() && Src0->isReg()) {
MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
continue;
@@ -680,7 +686,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &Dst = MI.getOperand(0);
MachineOperand &Src = MI.getOperand(1);
- if (Src.isImm() && Register::isPhysicalRegister(Dst.getReg())) {
+ if (Src.isImm() && Dst.getReg().isPhysical()) {
int32_t ReverseImm;
if (isKImmOperand(TII, Src))
MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
@@ -729,7 +735,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (TII->isVOPC(Op32)) {
Register DstReg = MI.getOperand(0).getReg();
- if (Register::isVirtualRegister(DstReg)) {
+ if (DstReg.isVirtual()) {
// VOPC instructions can only write to the VCC register. We can't
// force them to use VCC here, because this is only one register and
// cannot deal with sequences which would require multiple copies of
@@ -753,7 +759,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (!Src2->isReg())
continue;
Register SReg = Src2->getReg();
- if (Register::isVirtualRegister(SReg)) {
+ if (SReg.isVirtual()) {
MRI.setRegAllocationHint(SReg, 0, VCCReg);
continue;
}
@@ -773,7 +779,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
bool Next = false;
if (SDst->getReg() != VCCReg) {
- if (Register::isVirtualRegister(SDst->getReg()))
+ if (SDst->getReg().isVirtual())
MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg);
Next = true;
}
@@ -781,7 +787,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// All of the instructions with carry outs also have an SGPR input in
// src2.
if (Src2 && Src2->getReg() != VCCReg) {
- if (Register::isVirtualRegister(Src2->getReg()))
+ if (Src2->getReg().isVirtual())
MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg);
Next = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index b1c73df269fb..0640e24b37ec 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -56,35 +56,17 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <vector>
using namespace llvm;
@@ -154,6 +136,11 @@ private:
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
+ unsigned AndOpc;
+ unsigned XorTermrOpc;
+ unsigned OrSaveExecOpc;
+ unsigned Exec;
+
DenseMap<const MachineInstr *, InstrInfo> Instructions;
MapVector<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
@@ -164,6 +151,8 @@ private:
void markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist);
+ void markDefs(const MachineInstr &UseMI, LiveRange &LR, Register Reg,
+ unsigned SubReg, char Flag, std::vector<WorkItem> &Worklist);
void markInstructionUses(const MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist);
char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
@@ -252,6 +241,8 @@ void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
assert(!(Flag & StateExact) && Flag != 0);
+ LLVM_DEBUG(dbgs() << "markInstruction " << PrintState(Flag) << ": " << MI);
+
// Remove any disabled states from the flag. The user that required it gets
// an undefined value in the helper lanes. For example, this can happen if
// the result of an atomic is used by instruction that requires WQM, where
@@ -267,9 +258,70 @@ void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
Worklist.push_back(&MI);
}
+/// Mark all relevant definitions of register \p Reg in usage \p UseMI.
+void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
+ Register Reg, unsigned SubReg, char Flag,
+ std::vector<WorkItem> &Worklist) {
+ assert(!MRI->isSSA());
+
+ LLVM_DEBUG(dbgs() << "markDefs " << PrintState(Flag) << ": " << UseMI);
+
+ LiveQueryResult UseLRQ = LR.Query(LIS->getInstructionIndex(UseMI));
+ if (!UseLRQ.valueIn())
+ return;
+
+ SmallPtrSet<const VNInfo *, 4> Visited;
+ SmallVector<const VNInfo *, 4> ToProcess;
+ ToProcess.push_back(UseLRQ.valueIn());
+ do {
+ const VNInfo *Value = ToProcess.pop_back_val();
+ Visited.insert(Value);
+
+ if (Value->isPHIDef()) {
+ // Need to mark all defs used in the PHI node
+ const MachineBasicBlock *MBB = LIS->getMBBFromIndex(Value->def);
+ assert(MBB && "Phi-def has no defining MBB");
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end();
+ PI != PE; ++PI) {
+ if (const VNInfo *VN = LR.getVNInfoBefore(LIS->getMBBEndIdx(*PI))) {
+ if (!Visited.count(VN))
+ ToProcess.push_back(VN);
+ }
+ }
+ } else {
+ MachineInstr *MI = LIS->getInstructionFromIndex(Value->def);
+ assert(MI && "Def has no defining instruction");
+ markInstruction(*MI, Flag, Worklist);
+
+ // Iterate over all operands to find relevant definitions
+ for (const MachineOperand &Op : MI->operands()) {
+ if (!(Op.isReg() && Op.getReg() == Reg))
+ continue;
+
+ // Does this def cover whole register?
+ bool DefinesFullReg =
+ Op.isUndef() || !Op.getSubReg() || Op.getSubReg() == SubReg;
+ if (!DefinesFullReg) {
+ // Partial definition; need to follow and mark input value
+ LiveQueryResult LRQ = LR.Query(LIS->getInstructionIndex(*MI));
+ if (const VNInfo *VN = LRQ.valueIn()) {
+ if (!Visited.count(VN))
+ ToProcess.push_back(VN);
+ }
+ }
+ }
+ }
+ } while (!ToProcess.empty());
+}
+
/// Mark all instructions defining the uses in \p MI with \p Flag.
void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist) {
+
+ LLVM_DEBUG(dbgs() << "markInstructionUses " << PrintState(Flag) << ": "
+ << MI);
+
for (const MachineOperand &Use : MI.uses()) {
if (!Use.isReg() || !Use.isUse())
continue;
@@ -279,30 +331,39 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
// Handle physical registers that we need to track; this is mostly relevant
// for VCC, which can appear as the (implicit) input of a uniform branch,
// e.g. when a loop counter is stored in a VGPR.
- if (!Register::isVirtualRegister(Reg)) {
+ if (!Reg.isVirtual()) {
if (Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO)
continue;
- for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
+ for (MCRegUnitIterator RegUnit(Reg.asMCReg(), TRI); RegUnit.isValid();
+ ++RegUnit) {
LiveRange &LR = LIS->getRegUnit(*RegUnit);
const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
if (!Value)
continue;
- // Since we're in machine SSA, we do not need to track physical
- // registers across basic blocks.
- if (Value->isPHIDef())
- continue;
-
- markInstruction(*LIS->getInstructionFromIndex(Value->def), Flag,
- Worklist);
+ if (MRI->isSSA()) {
+ // Since we're in machine SSA, we do not need to track physical
+ // registers across basic blocks.
+ if (Value->isPHIDef())
+ continue;
+ markInstruction(*LIS->getInstructionFromIndex(Value->def), Flag,
+ Worklist);
+ } else {
+ markDefs(MI, LR, *RegUnit, AMDGPU::NoSubRegister, Flag, Worklist);
+ }
}
continue;
}
- for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
- markInstruction(DefMI, Flag, Worklist);
+ if (MRI->isSSA()) {
+ for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
+ markInstruction(DefMI, Flag, Worklist);
+ } else {
+ LiveRange &LR = LIS->getInterval(Reg);
+ markDefs(MI, LR, Reg, Use.getSubReg(), Flag, Worklist);
+ }
}
}
@@ -363,7 +424,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
LowerToCopyInstrs.push_back(&MI);
} else {
Register Reg = Inactive.getReg();
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
for (MachineInstr &DefMI : MRI->def_instructions(Reg))
markInstruction(DefMI, StateWWM, Worklist);
}
@@ -393,7 +454,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg) &&
+ if (!Reg.isVirtual() &&
TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) {
Flags = StateWQM;
break;
@@ -552,7 +613,8 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
if (!SaveSCC)
return PreferLast ? Last : First;
- LiveRange &LR = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));
+ LiveRange &LR =
+ LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
auto MBBE = MBB.end();
SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)
: LIS->getMBBEndIdx(&MBB);
@@ -572,7 +634,12 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
break;
Idx = Next;
} else {
- SlotIndex Next = S->end.getNextIndex().getBaseIndex();
+ MachineInstr *EndMI = LIS->getInstructionFromIndex(S->end.getBaseIndex());
+ assert(EndMI && "Segment does not end on valid instruction");
+ auto NextI = std::next(EndMI->getIterator());
+ if (NextI == MBB.end())
+ break;
+ SlotIndex Next = LIS->getInstructionIndex(*NextI);
if (Next > LastIdx)
break;
Idx = Next;
@@ -588,6 +655,23 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
MBBI = MBB.end();
}
+ // Move insertion point past any operations modifying EXEC.
+ // This assumes that the value of SCC defined by any of these operations
+ // does not need to be preserved.
+ while (MBBI != Last) {
+ bool IsExecDef = false;
+ for (const MachineOperand &MO : MBBI->operands()) {
+ if (MO.isReg() && MO.isDef()) {
+ IsExecDef |=
+ MO.getReg() == AMDGPU::EXEC_LO || MO.getReg() == AMDGPU::EXEC;
+ }
+ }
+ if (!IsExecDef)
+ break;
+ MBBI++;
+ S = nullptr;
+ }
+
if (S)
MBBI = saveSCC(MBB, MBBI);
@@ -682,8 +766,11 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
const TargetRegisterClass *BoolRC = TRI->getBoolRC();
auto II = MBB.getFirstNonPHI(), IE = MBB.end();
- if (isEntry)
- ++II; // Skip the instruction that saves LiveMask
+ if (isEntry) {
+ // Skip the instruction that saves LiveMask
+ if (II != IE && II->getOpcode() == AMDGPU::COPY)
+ ++II;
+ }
// This stores the first instruction where it's safe to switch from WQM to
// Exact or vice versa.
@@ -694,6 +781,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
// FirstWQM since if it's safe to switch to/from WWM, it must be safe to
// switch to/from WQM as well.
MachineBasicBlock::iterator FirstWWM = IE;
+
for (;;) {
MachineBasicBlock::iterator Next = II;
char Needs = StateExact | StateWQM; // WWM is disabled by default
@@ -730,9 +818,6 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (MI.isTerminator() && OutNeeds == StateExact)
Needs = StateExact;
- if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
- MI.getOperand(3).setImm(1);
-
++Next;
} else {
// End of basic block
@@ -809,6 +894,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (II == IE)
break;
+
II = Next;
}
assert(!SavedWQMReg);
@@ -819,6 +905,7 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
for (MachineInstr *MI : LiveMaskQueries) {
const DebugLoc &DL = MI->getDebugLoc();
Register Dest = MI->getOperand(0).getReg();
+
MachineInstr *Copy =
BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
.addReg(LiveMaskReg);
@@ -833,19 +920,35 @@ void SIWholeQuadMode::lowerCopyInstrs() {
assert(MI->getNumExplicitOperands() == 2);
const Register Reg = MI->getOperand(0).getReg();
+ const unsigned SubReg = MI->getOperand(0).getSubReg();
if (TRI->isVGPR(*MRI, Reg)) {
- const TargetRegisterClass *regClass = Register::isVirtualRegister(Reg)
- ? MRI->getRegClass(Reg)
- : TRI->getPhysRegClass(Reg);
+ const TargetRegisterClass *regClass =
+ Reg.isVirtual() ? MRI->getRegClass(Reg) : TRI->getPhysRegClass(Reg);
+ if (SubReg)
+ regClass = TRI->getSubRegClass(regClass, SubReg);
const unsigned MovOp = TII->getMovOpcode(regClass);
MI->setDesc(TII->get(MovOp));
// And make it implicitly depend on exec (like all VALU movs should do).
MI->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
- } else {
+ } else if (!MRI->isSSA()) {
+ // Remove early-clobber and exec dependency from simple SGPR copies.
+ // This allows some to be eliminated during/post RA.
+ LLVM_DEBUG(dbgs() << "simplify SGPR copy: " << *MI);
+ if (MI->getOperand(0).isEarlyClobber()) {
+ LIS->removeInterval(Reg);
+ MI->getOperand(0).setIsEarlyClobber(false);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ int Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC);
+ while (Index >= 0) {
+ MI->RemoveOperand(Index);
+ Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC);
+ }
MI->setDesc(TII->get(AMDGPU::COPY));
+ LLVM_DEBUG(dbgs() << " -> " << *MI);
}
}
for (MachineInstr *MI : LowerToCopyInstrs) {
@@ -881,9 +984,20 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
+ if (ST->isWave32()) {
+ AndOpc = AMDGPU::S_AND_B32;
+ XorTermrOpc = AMDGPU::S_XOR_B32_term;
+ OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
+ Exec = AMDGPU::EXEC_LO;
+ } else {
+ AndOpc = AMDGPU::S_AND_B64;
+ XorTermrOpc = AMDGPU::S_XOR_B64_term;
+ OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
+ Exec = AMDGPU::EXEC;
+ }
+
char GlobalFlags = analyzeFunction(MF);
unsigned LiveMaskReg = 0;
- unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
if (!(GlobalFlags & StateWQM)) {
lowerLiveMaskQueries(Exec);
if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty() && LowerToMovInstrs.empty())
@@ -932,7 +1046,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
// Physical registers like SCC aren't tracked by default anyway, so just
// removing the ranges we computed is the simplest option for maintaining
// the analysis results.
- LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));
+ LIS->removeRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
index 70bf215c03f3..5b8896c21832 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -332,7 +332,6 @@ let OtherPredicates = [HasScalarStores] in {
def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
} // End OtherPredicates = [HasScalarStores]
-def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
let is_buffer = 1 in {
@@ -340,6 +339,9 @@ defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
}
} // SubtargetPredicate = isGFX8Plus
+let SubtargetPredicate = HasSMemRealTime in
+def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
+
let SubtargetPredicate = isGFX10Plus in
def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
let SubtargetPredicate = HasGetWaveIdInst in
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 9d7b25d55217..7426af931a62 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -54,9 +54,9 @@ class SOP1_Pseudo <string opName, dag outs, dag ins,
bits<1> has_sdst = 1;
}
-class SOP1_Real<bits<8> op, SOP1_Pseudo ps> :
+class SOP1_Real<bits<8> op, SOP1_Pseudo ps, string real_name = ps.Mnemonic> :
InstSI <ps.OutOperandList, ps.InOperandList,
- ps.Mnemonic # " " # ps.AsmOperands, []>,
+ real_name # " " # ps.AsmOperands, []>,
Enc32 {
let isPseudo = 0;
@@ -288,13 +288,11 @@ def S_MOVRELD_B64 : SOP1_64_movreld <"s_movreld_b64">;
let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in {
def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">;
-def S_MOV_REGRD_B32 : SOP1_32 <"s_mov_regrd_b32">;
} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9
let Defs = [SCC] in {
def S_ABS_I32 : SOP1_32 <"s_abs_i32">;
} // End Defs = [SCC]
-def S_MOV_FED_B32 : SOP1_32 <"s_mov_fed_b32">;
let SubtargetPredicate = HasVGPRIndexMode in {
def S_SET_GPR_IDX_IDX : SOP1_0_32<"s_set_gpr_idx_idx"> {
@@ -361,9 +359,9 @@ class SOP2_Pseudo<string opName, dag outs, dag ins,
// let Size = 4; // Do we need size here?
}
-class SOP2_Real<bits<7> op, SOP_Pseudo ps> :
+class SOP2_Real<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
InstSI <ps.OutOperandList, ps.InOperandList,
- ps.Mnemonic # " " # ps.AsmOperands, []>,
+ real_name # " " # ps.AsmOperands, []>,
Enc32 {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -410,8 +408,14 @@ class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
(ops node:$src0),
(Op $src0),
- [{ return !N->isDivergent(); }]
->;
+ [{ return !N->isDivergent(); }]> {
+ // This check is unnecessary as it's captured by the result register
+ // bank constraint.
+ //
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
+}
class UniformBinFrag<SDPatternOperator Op> : PatFrag <
(ops node:$src0, node:$src1),
@@ -425,6 +429,18 @@ class UniformBinFrag<SDPatternOperator Op> : PatFrag <
let GISelPredicateCode = [{return true;}];
}
+class DivergentBinFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0, node:$src1),
+ (Op $src0, $src1),
+ [{ return N->isDivergent(); }]> {
+ // This check is unnecessary as it's captured by the result register
+ // bank constraint.
+ //
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
+}
+
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
def S_ADD_U32 : SOP2_32 <"s_add_u32">;
@@ -465,10 +481,15 @@ def S_MAX_U32 : SOP2_32 <"s_max_u32",
} // End isCommutable = 1
} // End Defs = [SCC]
+// This pattern is restricted to certain subtargets (practically GFX8Plus)
+// because isel sometimes produces an sreg_64 copy to SCC as a by-product
+// of this pattern, and only for subtargets with hasScalarCompareEq64
+// is it possible to map such copy to a single instruction (S_CMP_LG_U64).
class SelectPat<SDPatternOperator select> : PatFrag <
(ops node:$src1, node:$src2),
(select SCC, $src1, $src2),
- [{ return N->getOperand(0)->hasOneUse() && !N->isDivergent(); }]
+ [{ return Subtarget->hasScalarCompareEq64() &&
+ N->getOperand(0)->hasOneUse() && !N->isDivergent(); }]
>;
let Uses = [SCC] in {
@@ -532,6 +553,7 @@ def S_NOR_B64 : SOP2_64 <"s_nor_b64",
>;
} // End isCommutable = 1
+// There are also separate patterns for types other than i32
def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32",
[(set i32:$sdst, (UniformBinFrag<and> i32:$src0, (UniformUnaryFrag<not> i32:$src1)))]
>;
@@ -803,48 +825,65 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
"$sdst, $simm16"
>;
-let hasSideEffects = 1 in {
-
let mayLoad = 1 in {
// s_getreg_b32 should use hasSideEffects = 1 for tablegen to allow
// its use in the readcyclecounter selection.
+// FIXME: Need to truncate immediate to 16-bits.
def S_GETREG_B32 : SOPK_Pseudo <
"s_getreg_b32",
(outs SReg_32:$sdst), (ins hwreg:$simm16),
- "$sdst, $simm16"
->;
+ "$sdst, $simm16",
+ [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
+ let SOPKZext = 1;
+ let hasSideEffects = 1;
}
+} // End mayLoad = 1
-let mayLoad = 0, mayStore =0 in {
+let mayLoad = 0, mayStore = 0, Defs = [MODE], Uses = [MODE] in {
-def S_SETREG_B32 : SOPK_Pseudo <
+// FIXME: Need to truncate immediate to 16-bits.
+class S_SETREG_B32_Pseudo <list<dag> pattern=[]> : SOPK_Pseudo <
"s_setreg_b32",
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
"$simm16, $sdst",
- [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
+ pattern>;
+def S_SETREG_B32 : S_SETREG_B32_Pseudo <
+ [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
// Use custom inserter to optimize some cases to
- // S_DENORM_MODE/S_ROUND_MODE.
+ // S_DENORM_MODE/S_ROUND_MODE/S_SETREG_B32_mode.
let usesCustomInserter = 1;
- let Defs = [MODE];
- let Uses = [MODE];
+ let hasSideEffects = 1;
+}
+
+// Variant of SETREG that is guaranteed to only touch FP bits in the MODE
+// register, so doesn't have unmodeled side effects.
+def S_SETREG_B32_mode : S_SETREG_B32_Pseudo {
+ let hasSideEffects = 0;
}
// FIXME: Not on SI?
//def S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32">;
-def S_SETREG_IMM32_B32 : SOPK_Pseudo <
+class S_SETREG_IMM32_B32_Pseudo : SOPK_Pseudo <
"s_setreg_imm32_b32",
(outs), (ins i32imm:$imm, hwreg:$simm16),
"$simm16, $imm"> {
let Size = 8; // Unlike every other SOPK instruction.
let has_sdst = 0;
- let Defs = [MODE];
- let Uses = [MODE];
}
+def S_SETREG_IMM32_B32 : S_SETREG_IMM32_B32_Pseudo {
+ let hasSideEffects = 1;
+}
+
+// Variant of SETREG_IMM32 that is guaranteed to only touch FP bits in the MODE
+// register, so doesn't have unmodeled side effects.
+def S_SETREG_IMM32_B32_mode : S_SETREG_IMM32_B32_Pseudo {
+ let hasSideEffects = 0;
}
-} // End hasSideEffects = 1
+
+} // End mayLoad = 0, mayStore = 0, Defs = [MODE], Uses = [MODE]
class SOPK_WAITCNT<string opName, list<dag> pat=[]> :
SOPK_Pseudo<
@@ -891,88 +930,101 @@ let SubtargetPredicate = isGFX10Plus in {
// SOPC Instructions
//===----------------------------------------------------------------------===//
-class SOPCe <bits<7> op> : Enc32 {
- bits<8> src0;
- bits<8> src1;
-
- let Inst{7-0} = src0;
- let Inst{15-8} = src1;
- let Inst{22-16} = op;
- let Inst{31-23} = 0x17e;
-}
-
-class SOPC <bits<7> op, dag outs, dag ins, string asm,
- list<dag> pattern = []> :
- InstSI<outs, ins, asm, pattern>, SOPCe <op> {
+class SOPC_Pseudo<string opName, dag outs, dag ins,
+ string asmOps, list<dag> pattern=[]> :
+ SOP_Pseudo<opName, outs, ins, asmOps, pattern> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let SALU = 1;
let SOPC = 1;
- let isCodeGenOnly = 0;
let Defs = [SCC];
let SchedRW = [WriteSALU];
let UseNamedOperandTable = 1;
}
-class SOPC_Base <bits<7> op, RegisterOperand rc0, RegisterOperand rc1,
- string opName, list<dag> pattern = []> : SOPC <
- op, (outs), (ins rc0:$src0, rc1:$src1),
- opName#" $src0, $src1", pattern > {
- let Defs = [SCC];
+class SOPC_Real<bits<7> op, SOPC_Pseudo ps, string real_name = ps.Mnemonic> :
+ InstSI <ps.OutOperandList, ps.InOperandList,
+ real_name # " " # ps.AsmOperands, []>,
+ Enc32 {
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+
+ // copy relevant pseudo op flags
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let OtherPredicates = ps.OtherPredicates;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let TSFlags = ps.TSFlags;
+
+ // encoding
+ bits<8> src0;
+ bits<8> src1;
+
+ let Inst{7-0} = src0;
+ let Inst{15-8} = src1;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17e;
}
-class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
+
+class SOPC_Base <RegisterOperand rc0, RegisterOperand rc1,
+ string opName, list<dag> pattern = []> : SOPC_Pseudo <
+ opName, (outs), (ins rc0:$src0, rc1:$src1),
+ "$src0, $src1", pattern > {
+}
+
+class SOPC_Helper <RegisterOperand rc, ValueType vt,
string opName, SDPatternOperator cond> : SOPC_Base <
- op, rc, rc, opName,
+ rc, rc, opName,
[(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > {
}
-class SOPC_CMP_32<bits<7> op, string opName,
+class SOPC_CMP_32<string opName,
SDPatternOperator cond = COND_NULL, string revOp = opName>
- : SOPC_Helper<op, SSrc_b32, i32, opName, cond>,
+ : SOPC_Helper<SSrc_b32, i32, opName, cond>,
Commutable_REV<revOp, !eq(revOp, opName)>,
SOPKInstTable<0, opName> {
let isCompare = 1;
let isCommutable = 1;
}
-class SOPC_CMP_64<bits<7> op, string opName,
+class SOPC_CMP_64<string opName,
SDPatternOperator cond = COND_NULL, string revOp = opName>
- : SOPC_Helper<op, SSrc_b64, i64, opName, cond>,
+ : SOPC_Helper<SSrc_b64, i64, opName, cond>,
Commutable_REV<revOp, !eq(revOp, opName)> {
let isCompare = 1;
let isCommutable = 1;
}
-class SOPC_32<bits<7> op, string opName, list<dag> pattern = []>
- : SOPC_Base<op, SSrc_b32, SSrc_b32, opName, pattern>;
-
-class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
- : SOPC_Base<op, SSrc_b64, SSrc_b32, opName, pattern>;
-
-def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32">;
-def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32">;
-def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>;
-def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>;
-def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">;
-def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">;
-def S_CMP_EQ_U32 : SOPC_CMP_32 <0x06, "s_cmp_eq_u32", COND_EQ>;
-def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE>;
-def S_CMP_GT_U32 : SOPC_CMP_32 <0x08, "s_cmp_gt_u32", COND_UGT>;
-def S_CMP_GE_U32 : SOPC_CMP_32 <0x09, "s_cmp_ge_u32", COND_UGE>;
-def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">;
-def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">;
-
-def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">;
-def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">;
-def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">;
-def S_BITCMP1_B64 : SOPC_64_32 <0x0f, "s_bitcmp1_b64">;
+class SOPC_32<string opName, list<dag> pattern = []>
+ : SOPC_Base<SSrc_b32, SSrc_b32, opName, pattern>;
+
+class SOPC_64_32<string opName, list<dag> pattern = []>
+ : SOPC_Base<SSrc_b64, SSrc_b32, opName, pattern>;
+
+def S_CMP_EQ_I32 : SOPC_CMP_32 <"s_cmp_eq_i32">;
+def S_CMP_LG_I32 : SOPC_CMP_32 <"s_cmp_lg_i32">;
+def S_CMP_GT_I32 : SOPC_CMP_32 <"s_cmp_gt_i32", COND_SGT>;
+def S_CMP_GE_I32 : SOPC_CMP_32 <"s_cmp_ge_i32", COND_SGE>;
+def S_CMP_LT_I32 : SOPC_CMP_32 <"s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">;
+def S_CMP_LE_I32 : SOPC_CMP_32 <"s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">;
+def S_CMP_EQ_U32 : SOPC_CMP_32 <"s_cmp_eq_u32", COND_EQ>;
+def S_CMP_LG_U32 : SOPC_CMP_32 <"s_cmp_lg_u32", COND_NE>;
+def S_CMP_GT_U32 : SOPC_CMP_32 <"s_cmp_gt_u32", COND_UGT>;
+def S_CMP_GE_U32 : SOPC_CMP_32 <"s_cmp_ge_u32", COND_UGE>;
+def S_CMP_LT_U32 : SOPC_CMP_32 <"s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">;
+def S_CMP_LE_U32 : SOPC_CMP_32 <"s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">;
+
+def S_BITCMP0_B32 : SOPC_32 <"s_bitcmp0_b32">;
+def S_BITCMP1_B32 : SOPC_32 <"s_bitcmp1_b32">;
+def S_BITCMP0_B64 : SOPC_64_32 <"s_bitcmp0_b64">;
+def S_BITCMP1_B64 : SOPC_64_32 <"s_bitcmp1_b64">;
let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in
-def S_SETVSKIP : SOPC_32 <0x10, "s_setvskip">;
+def S_SETVSKIP : SOPC_32 <"s_setvskip">;
let SubtargetPredicate = isGFX8Plus in {
-def S_CMP_EQ_U64 : SOPC_CMP_64 <0x12, "s_cmp_eq_u64", COND_EQ>;
-def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>;
+def S_CMP_EQ_U64 : SOPC_CMP_64 <"s_cmp_eq_u64", COND_EQ>;
+def S_CMP_LG_U64 : SOPC_CMP_64 <"s_cmp_lg_u64", COND_NE>;
} // End SubtargetPredicate = isGFX8Plus
let SubtargetPredicate = HasVGPRIndexMode in {
@@ -980,10 +1032,11 @@ let SubtargetPredicate = HasVGPRIndexMode in {
// register. We don't want to add mode register uses to every
// instruction, and it's too complicated to deal with anyway. This is
// modeled just as a side effect.
-def S_SET_GPR_IDX_ON : SOPC <0x11,
+def S_SET_GPR_IDX_ON : SOPC_Pseudo <
+ "s_set_gpr_idx_on" ,
(outs),
(ins SSrc_b32:$src0, GPRIdxMode:$src1),
- "s_set_gpr_idx_on $src0,$src1"> {
+ "$src0, $src1"> {
let Defs = [M0, MODE]; // No scc def
let Uses = [M0, MODE]; // Other bits of mode, m0 unmodified.
let hasSideEffects = 1; // Sets mode.gpr_idx_en
@@ -995,225 +1048,239 @@ def S_SET_GPR_IDX_ON : SOPC <0x11,
// SOPP Instructions
//===----------------------------------------------------------------------===//
-class Base_SOPP <string asm> {
- string AsmString = asm;
-}
-
-class SOPPe <bits<7> op> : Enc32 {
- bits <16> simm16;
-
- let Inst{15-0} = simm16;
- let Inst{22-16} = op;
- let Inst{31-23} = 0x17f; // encoding
-}
-
-class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
- InstSI <(outs), ins, asm, pattern >, SOPPe <op>, Base_SOPP <asm> {
-
+class SOPP_Pseudo<string opName, dag ins,
+ string asmOps = "", list<dag> pattern=[], string keyName = opName> :
+ SOP_Pseudo<opName, (outs), ins, asmOps, pattern> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let SALU = 1;
let SOPP = 1;
- let Size = 4;
+ let FixedSize = 1;
let SchedRW = [WriteSALU];
-
let UseNamedOperandTable = 1;
+ bits <16> simm16;
+ bits <1> fixed_imm = 0;
+ string KeyName = keyName;
}
-def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
+class SOPPRelaxTable <bit isRelaxed, string keyName, string gfxip> {
+ bit IsRelaxed = isRelaxed;
+ string KeyName = keyName # gfxip;
+}
+
+//spaces inserted in realname on instantiation of this record to allow s_endpgm to omit whitespace
+class SOPP_Real<bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> :
+ InstSI <ps.OutOperandList, ps.InOperandList,
+ real_name # ps.AsmOperands, []> {
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
-class SOPP_w_nop_e <bits<7> op> : Enc64 {
+ // copy relevant pseudo op flags
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let OtherPredicates = ps.OtherPredicates;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let TSFlags = ps.TSFlags;
bits <16> simm16;
+}
- let Inst{15-0} = simm16;
+class SOPP_Real_32 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<op, ps, real_name>,
+Enc32 {
+ let Inst{15-0} = !if(ps.fixed_imm, ps.simm16, simm16);
let Inst{22-16} = op;
- let Inst{31-23} = 0x17f; // encoding
- let Inst{47-32} = 0x0;
- let Inst{54-48} = S_NOP.Inst{22-16}; // opcode
- let Inst{63-55} = S_NOP.Inst{31-23}; // encoding
+ let Inst{31-23} = 0x17f;
}
-class SOPP_w_nop <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
- InstSI <(outs), ins, asm, pattern >, SOPP_w_nop_e <op>, Base_SOPP <asm> {
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let SALU = 1;
- let SOPP = 1;
- let Size = 8;
- let SchedRW = [WriteSALU];
-
- let UseNamedOperandTable = 1;
+class SOPP_Real_64 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<op, ps, real_name>,
+Enc64 {
+ // encoding
+ let Inst{15-0} = !if(ps.fixed_imm, ps.simm16, simm16);
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17f;
+ //effectively a nop
+ let Inst{47-32} = 0x0;
+ let Inst{54-48} = 0x0;
+ let Inst{63-55} = 0x17f;
}
-multiclass SOPP_With_Relaxation <bits<7> op, dag ins, string asm, list<dag> pattern = []> {
- def "" : SOPP <op, ins, asm, pattern>;
- def _pad_s_nop : SOPP_w_nop <op, ins, asm, pattern>;
+multiclass SOPP_With_Relaxation <string opName, dag ins,
+ string asmOps, list<dag> pattern=[]> {
+ def "" : SOPP_Pseudo <opName, ins, asmOps, pattern>;
+ def _pad_s_nop : SOPP_Pseudo <opName # "_pad_s_nop", ins, asmOps, pattern, opName>;
}
-let isTerminator = 1 in {
+def S_NOP : SOPP_Pseudo<"s_nop" , (ins i16imm:$simm16), "$simm16">;
-def S_ENDPGM : SOPP <0x00000001, (ins EndpgmImm:$simm16), "s_endpgm$simm16"> {
+let isTerminator = 1 in {
+def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins EndpgmImm:$simm16), "$simm16"> {
let isBarrier = 1;
let isReturn = 1;
+ let hasSideEffects = 1;
}
-def S_ENDPGM_SAVED : SOPP <0x0000001B, (ins), "s_endpgm_saved"> {
+def S_ENDPGM_SAVED : SOPP_Pseudo<"s_endpgm_saved", (ins)> {
let SubtargetPredicate = isGFX8Plus;
let simm16 = 0;
+ let fixed_imm = 1;
let isBarrier = 1;
let isReturn = 1;
}
let SubtargetPredicate = isGFX9Plus in {
- let isBarrier = 1, isReturn = 1, simm16 = 0 in {
+ let isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 in {
def S_ENDPGM_ORDERED_PS_DONE :
- SOPP<0x01e, (ins), "s_endpgm_ordered_ps_done">;
- } // End isBarrier = 1, isReturn = 1, simm16 = 0
+ SOPP_Pseudo<"s_endpgm_ordered_ps_done", (ins)>;
+ } // End isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1
} // End SubtargetPredicate = isGFX9Plus
let SubtargetPredicate = isGFX10Plus in {
- let isBarrier = 1, isReturn = 1, simm16 = 0 in {
+ let isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 in {
def S_CODE_END :
- SOPP<0x01f, (ins), "s_code_end">;
- } // End isBarrier = 1, isReturn = 1, simm16 = 0
+ SOPP_Pseudo<"s_code_end", (ins)>;
+ } // End isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1
} // End SubtargetPredicate = isGFX10Plus
let isBranch = 1, SchedRW = [WriteBranch] in {
let isBarrier = 1 in {
-defm S_BRANCH : SOPP_With_Relaxation <
- 0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
+defm S_BRANCH : SOPP_With_Relaxation<
+ "s_branch" , (ins sopp_brtarget:$simm16), "$simm16",
[(br bb:$simm16)]>;
}
let Uses = [SCC] in {
-defm S_CBRANCH_SCC0 : SOPP_With_Relaxation <
- 0x00000004, (ins sopp_brtarget:$simm16),
- "s_cbranch_scc0 $simm16"
+defm S_CBRANCH_SCC0 : SOPP_With_Relaxation<
+ "s_cbranch_scc0" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
defm S_CBRANCH_SCC1 : SOPP_With_Relaxation <
- 0x00000005, (ins sopp_brtarget:$simm16),
- "s_cbranch_scc1 $simm16"
+ "s_cbranch_scc1" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
} // End Uses = [SCC]
let Uses = [VCC] in {
defm S_CBRANCH_VCCZ : SOPP_With_Relaxation <
- 0x00000006, (ins sopp_brtarget:$simm16),
- "s_cbranch_vccz $simm16"
+ "s_cbranch_vccz" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
defm S_CBRANCH_VCCNZ : SOPP_With_Relaxation <
- 0x00000007, (ins sopp_brtarget:$simm16),
- "s_cbranch_vccnz $simm16"
+ "s_cbranch_vccnz" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
} // End Uses = [VCC]
let Uses = [EXEC] in {
defm S_CBRANCH_EXECZ : SOPP_With_Relaxation <
- 0x00000008, (ins sopp_brtarget:$simm16),
- "s_cbranch_execz $simm16"
+ "s_cbranch_execz" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
defm S_CBRANCH_EXECNZ : SOPP_With_Relaxation <
- 0x00000009, (ins sopp_brtarget:$simm16),
- "s_cbranch_execnz $simm16"
+ "s_cbranch_execnz" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
} // End Uses = [EXEC]
defm S_CBRANCH_CDBGSYS : SOPP_With_Relaxation <
- 0x00000017, (ins sopp_brtarget:$simm16),
- "s_cbranch_cdbgsys $simm16"
+ "s_cbranch_cdbgsys" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_With_Relaxation <
- 0x0000001A, (ins sopp_brtarget:$simm16),
- "s_cbranch_cdbgsys_and_user $simm16"
+ "s_cbranch_cdbgsys_and_user" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_With_Relaxation <
- 0x00000019, (ins sopp_brtarget:$simm16),
- "s_cbranch_cdbgsys_or_user $simm16"
+ "s_cbranch_cdbgsys_or_user" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
defm S_CBRANCH_CDBGUSER : SOPP_With_Relaxation <
- 0x00000018, (ins sopp_brtarget:$simm16),
- "s_cbranch_cdbguser $simm16"
+ "s_cbranch_cdbguser" , (ins sopp_brtarget:$simm16),
+ "$simm16"
>;
} // End isBranch = 1
} // End isTerminator = 1
let hasSideEffects = 1 in {
-def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
+def S_BARRIER : SOPP_Pseudo <"s_barrier", (ins), "",
[(int_amdgcn_s_barrier)]> {
let SchedRW = [WriteBarrier];
let simm16 = 0;
+ let fixed_imm = 1;
let isConvergent = 1;
}
-def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
+def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
let SubtargetPredicate = isGFX8Plus;
let simm16 = 0;
+ let fixed_imm = 1;
let mayLoad = 1;
let mayStore = 1;
}
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
+def S_WAITCNT : SOPP_Pseudo <"s_waitcnt" , (ins WAIT_FLAG:$simm16), "$simm16",
[(int_amdgcn_s_waitcnt timm:$simm16)]>;
-def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
-def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
+def S_SETHALT : SOPP_Pseudo <"s_sethalt" , (ins i16imm:$simm16), "$simm16">;
+def S_SETKILL : SOPP_Pseudo <"s_setkill" , (ins i16imm:$simm16), "$simm16">;
// On SI the documentation says sleep for approximately 64 * low 2
// bits, consistent with the reported maximum of 448. On VI the
// maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the
// maximum really 15 on VI?
-def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
- "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
+def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16),
+ "$simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 0;
let mayStore = 0;
}
-def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
+def S_SETPRIO : SOPP_Pseudo <"s_setprio" , (ins i16imm:$simm16), "$simm16">;
let Uses = [EXEC, M0] in {
// FIXME: Should this be mayLoad+mayStore?
-def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
+def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsgImm:$simm16), "$simm16",
[(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>;
-def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
+def S_SENDMSGHALT : SOPP_Pseudo <"s_sendmsghalt" , (ins SendMsgImm:$simm16), "$simm16",
[(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>;
} // End Uses = [EXEC, M0]
-def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> {
+def S_TRAP : SOPP_Pseudo <"s_trap" , (ins i16imm:$simm16), "$simm16"> {
let isTrap = 1;
}
-def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
+def S_ICACHE_INV : SOPP_Pseudo <"s_icache_inv", (ins)> {
let simm16 = 0;
+ let fixed_imm = 1;
}
-def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
+def S_INCPERFLEVEL : SOPP_Pseudo <"s_incperflevel", (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_incperflevel timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 0;
let mayStore = 0;
}
-def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
+def S_DECPERFLEVEL : SOPP_Pseudo <"s_decperflevel", (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 0;
let mayStore = 0;
}
-def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
+def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins)> {
let simm16 = 0;
+ let fixed_imm = 1;
}
let SubtargetPredicate = HasVGPRIndexMode in {
-def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> {
+def S_SET_GPR_IDX_OFF : SOPP_Pseudo<"s_set_gpr_idx_off", (ins) > {
let simm16 = 0;
+ let fixed_imm = 1;
let Defs = [MODE];
let Uses = [MODE];
}
@@ -1221,8 +1288,8 @@ def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> {
} // End hasSideEffects
let SubtargetPredicate = HasVGPRIndexMode in {
-def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16),
- "s_set_gpr_idx_mode$simm16"> {
+def S_SET_GPR_IDX_MODE : SOPP_Pseudo<"s_set_gpr_idx_mode", (ins GPRIdxMode:$simm16),
+ "$simm16"> {
let Defs = [M0, MODE];
let Uses = [MODE];
}
@@ -1230,37 +1297,30 @@ def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16),
let SubtargetPredicate = isGFX10Plus in {
def S_INST_PREFETCH :
- SOPP<0x020, (ins s16imm:$simm16), "s_inst_prefetch $simm16">;
+ SOPP_Pseudo<"s_inst_prefetch", (ins s16imm:$simm16), "$simm16">;
def S_CLAUSE :
- SOPP<0x021, (ins s16imm:$simm16), "s_clause $simm16">;
- def S_WAITCNT_IDLE :
- SOPP <0x022, (ins), "s_wait_idle"> {
+ SOPP_Pseudo<"s_clause", (ins s16imm:$simm16), "$simm16">;
+ def S_WAIT_IDLE :
+ SOPP_Pseudo <"s_wait_idle", (ins), ""> {
let simm16 = 0;
+ let fixed_imm = 1;
}
def S_WAITCNT_DEPCTR :
- SOPP <0x023, (ins s16imm:$simm16), "s_waitcnt_depctr $simm16">;
+ SOPP_Pseudo <"s_waitcnt_depctr" , (ins s16imm:$simm16), "$simm16">;
let hasSideEffects = 0, Uses = [MODE], Defs = [MODE] in {
def S_ROUND_MODE :
- SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
+ SOPP_Pseudo<"s_round_mode", (ins s16imm:$simm16), "$simm16">;
def S_DENORM_MODE :
- SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
+ SOPP_Pseudo<"s_denorm_mode", (ins i32imm:$simm16), "$simm16",
[(SIdenorm_mode (i32 timm:$simm16))]>;
}
def S_TTRACEDATA_IMM :
- SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
+ SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">;
} // End SubtargetPredicate = isGFX10Plus
//===----------------------------------------------------------------------===//
-// S_GETREG_B32 Intrinsic Pattern.
-//===----------------------------------------------------------------------===//
-def : GCNPat <
- (int_amdgcn_s_getreg timm:$simm16),
- (S_GETREG_B32 (as_i16imm $simm16))
->;
-
-//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
@@ -1270,6 +1330,11 @@ def : GCNPat <
>;
def : GCNPat <
+ (int_amdgcn_endpgm),
+ (S_ENDPGM (i16 0))
+>;
+
+def : GCNPat <
(i64 (ctpop i64:$src)),
(i64 (REG_SEQUENCE SReg_64,
(i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0,
@@ -1325,13 +1390,27 @@ def : GCNPat<
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), $src)
>;
+// FIXME: ValueType should have isVector field
+class ScalarNot2Pat<Instruction inst, SDPatternOperator op, ValueType vt,
+ bit isVector = 1> : GCNPat<
+ (UniformBinFrag<op> vt:$src0, (UniformUnaryFrag<!if(isVector, vnot, not)> vt:$src1)),
+ (inst getSOPSrcForVT<vt>.ret:$src0, getSOPSrcForVT<vt>.ret:$src1)
+>;
-//===----------------------------------------------------------------------===//
-// Target-specific instruction encodings.
-//===----------------------------------------------------------------------===//
+// Match these for some more types
+// TODO: i1
+def : ScalarNot2Pat<S_ANDN2_B32, and, i16, 0>;
+def : ScalarNot2Pat<S_ANDN2_B32, and, v2i16>;
+def : ScalarNot2Pat<S_ANDN2_B64, and, v4i16>;
+def : ScalarNot2Pat<S_ANDN2_B64, and, v2i32>;
+
+def : ScalarNot2Pat<S_ORN2_B32, or, i16, 0>;
+def : ScalarNot2Pat<S_ORN2_B32, or, v2i16>;
+def : ScalarNot2Pat<S_ORN2_B64, or, v4i16>;
+def : ScalarNot2Pat<S_ORN2_B64, or, v2i32>;
//===----------------------------------------------------------------------===//
-// SOP1 - GFX10.
+// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
class Select_gfx10<string opName> : SIMCInstr<opName, SIEncodingFamily.GFX10> {
@@ -1339,6 +1418,20 @@ class Select_gfx10<string opName> : SIMCInstr<opName, SIEncodingFamily.GFX10> {
string DecoderNamespace = "GFX10";
}
+class Select_vi<string opName> : SIMCInstr<opName, SIEncodingFamily.VI> {
+ Predicate AssemblerPredicate = isGFX8GFX9;
+ string DecoderNamespace = "GFX8";
+}
+
+class Select_gfx6_gfx7<string opName> : SIMCInstr<opName, SIEncodingFamily.SI> {
+ Predicate AssemblerPredicate = isGFX6GFX7;
+ string DecoderNamespace = "GFX6GFX7";
+}
+
+//===----------------------------------------------------------------------===//
+// SOP1 - GFX10.
+//===----------------------------------------------------------------------===//
+
multiclass SOP1_Real_gfx10<bits<8> op> {
def _gfx10 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
Select_gfx10<!cast<SOP1_Pseudo>(NAME).Mnemonic>;
@@ -1367,10 +1460,6 @@ defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>;
// SOP1 - GFX6, GFX7.
//===----------------------------------------------------------------------===//
-class Select_gfx6_gfx7<string opName> : SIMCInstr<opName, SIEncodingFamily.SI> {
- Predicate AssemblerPredicate = isGFX6GFX7;
- string DecoderNamespace = "GFX6GFX7";
-}
multiclass SOP1_Real_gfx6_gfx7<bits<8> op> {
def _gfx6_gfx7 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
@@ -1381,7 +1470,6 @@ multiclass SOP1_Real_gfx6_gfx7_gfx10<bits<8> op> :
SOP1_Real_gfx6_gfx7<op>, SOP1_Real_gfx10<op>;
defm S_CBRANCH_JOIN : SOP1_Real_gfx6_gfx7<0x032>;
-defm S_MOV_REGRD_B32 : SOP1_Real_gfx6_gfx7<0x033>;
defm S_MOV_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x003>;
defm S_MOV_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x004>;
@@ -1430,7 +1518,6 @@ defm S_MOVRELS_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02f>;
defm S_MOVRELD_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x030>;
defm S_MOVRELD_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x031>;
defm S_ABS_I32 : SOP1_Real_gfx6_gfx7_gfx10<0x034>;
-defm S_MOV_FED_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x035>;
//===----------------------------------------------------------------------===//
// SOP2 - GFX10.
@@ -1574,15 +1661,163 @@ defm S_SETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x013>;
defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>;
//===----------------------------------------------------------------------===//
-// GFX8, GFX9 (VI).
+// SOPP - GFX6, GFX7, GFX8, GFX9, GFX10
//===----------------------------------------------------------------------===//
-class Select_vi<string opName> :
- SIMCInstr<opName, SIEncodingFamily.VI> {
- Predicate AssemblerPredicate = isGFX8GFX9;
- string DecoderNamespace = "GFX8";
+multiclass SOPP_Real_32_gfx6_gfx7<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic> {
+ def _gfx6_gfx7 : SOPP_Real_32<op, !cast<SOPP_Pseudo>(NAME), real_name>,
+ Select_gfx6_gfx7<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<0, !cast<SOPP_Pseudo>(NAME).KeyName, "_gfx6_gfx7">;
+}
+
+multiclass SOPP_Real_32_gfx8_gfx9<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> {
+ def _vi : SOPP_Real_32<op, !cast<SOPP_Pseudo>(NAME), real_name>,
+ Select_vi<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<0, !cast<SOPP_Pseudo>(NAME).KeyName, "_vi">;
+}
+
+multiclass SOPP_Real_32_gfx10<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> {
+ def _gfx10 : SOPP_Real_32<op, !cast<SOPP_Pseudo>(NAME), real_name>,
+ Select_gfx10<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<0, !cast<SOPP_Pseudo>(NAME).KeyName, "_gfx10">;
+}
+
+multiclass SOPP_Real_32_gfx8_gfx9_gfx10<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
+ SOPP_Real_32_gfx8_gfx9<op, real_name>, SOPP_Real_32_gfx10<op, real_name>;
+
+multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
+ SOPP_Real_32_gfx6_gfx7<op, real_name>, SOPP_Real_32_gfx8_gfx9<op, real_name>;
+
+multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
+ SOPP_Real_32_gfx6_gfx7_gfx8_gfx9<op, real_name>, SOPP_Real_32_gfx10<op, real_name>;
+
+//64 bit encodings, for Relaxation
+multiclass SOPP_Real_64_gfx6_gfx7<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> {
+ def _gfx6_gfx7 : SOPP_Real_64<op, !cast<SOPP_Pseudo>(NAME), real_name>,
+ Select_gfx6_gfx7<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<1, !cast<SOPP_Pseudo>(NAME).KeyName, "_gfx6_gfx7">;
+}
+
+multiclass SOPP_Real_64_gfx8_gfx9<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> {
+ def _vi : SOPP_Real_64<op, !cast<SOPP_Pseudo>(NAME), real_name>,
+ Select_vi<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<1, !cast<SOPP_Pseudo>(NAME).KeyName, "_vi">;
+}
+
+multiclass SOPP_Real_64_gfx10<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> {
+ def _gfx10 : SOPP_Real_64<op, !cast<SOPP_Pseudo>(NAME), real_name>,
+ Select_gfx10<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<1, !cast<SOPP_Pseudo>(NAME).KeyName, "_gfx10">;
+}
+
+multiclass SOPP_Real_64_gfx8_gfx9_gfx10<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
+ SOPP_Real_64_gfx8_gfx9<op, real_name>, SOPP_Real_64_gfx10<op, real_name>;
+
+multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
+ SOPP_Real_64_gfx6_gfx7<op, real_name>, SOPP_Real_64_gfx8_gfx9<op, real_name>;
+
+multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
+ SOPP_Real_64_gfx6_gfx7_gfx8_gfx9<op, real_name>, SOPP_Real_64_gfx10<op, real_name>;
+
+//relaxation for insts with no operands not implemented
+multiclass SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> {
+ defm "" : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<op>;
+ defm _pad_s_nop : SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<op>;
+}
+
+defm S_NOP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x000>;
+defm S_ENDPGM : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x001, "s_endpgm">;
+defm S_BRANCH : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x002>;
+defm S_WAKEUP : SOPP_Real_32_gfx8_gfx9_gfx10<0x003>;
+defm S_CBRANCH_SCC0 : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x004>;
+defm S_CBRANCH_SCC1 : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x005>;
+defm S_CBRANCH_VCCZ : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x006>;
+defm S_CBRANCH_VCCNZ : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x007>;
+defm S_CBRANCH_EXECZ : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x008>;
+defm S_CBRANCH_EXECNZ : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x009>;
+defm S_CBRANCH_CDBGSYS : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x017>;
+defm S_CBRANCH_CDBGUSER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x018>;
+defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x019>;
+defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x01A>;
+defm S_BARRIER : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00a>;
+defm S_WAITCNT : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00c>;
+defm S_SETHALT : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00d>;
+defm S_SETKILL : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00b>;
+defm S_SLEEP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00e>;
+defm S_SETPRIO : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00f>;
+defm S_SENDMSG : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x010>;
+defm S_SENDMSGHALT : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x011>;
+defm S_TRAP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x012>;
+defm S_ICACHE_INV : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x013>;
+defm S_INCPERFLEVEL : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x014>;
+defm S_DECPERFLEVEL : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x015>;
+defm S_TTRACEDATA : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x016>;
+defm S_ENDPGM_SAVED : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x01B>;
+defm S_SET_GPR_IDX_OFF : SOPP_Real_32_gfx8_gfx9<0x01c>;
+defm S_SET_GPR_IDX_MODE : SOPP_Real_32_gfx8_gfx9<0x01d>;
+defm S_ENDPGM_ORDERED_PS_DONE : SOPP_Real_32_gfx8_gfx9_gfx10<0x01e>;
+defm S_CODE_END : SOPP_Real_32_gfx10<0x01f>;
+defm S_INST_PREFETCH : SOPP_Real_32_gfx10<0x020>;
+defm S_CLAUSE : SOPP_Real_32_gfx10<0x021>;
+defm S_WAIT_IDLE : SOPP_Real_32_gfx10<0x022>;
+defm S_WAITCNT_DEPCTR : SOPP_Real_32_gfx10<0x023>;
+defm S_ROUND_MODE : SOPP_Real_32_gfx10<0x024>;
+defm S_DENORM_MODE : SOPP_Real_32_gfx10<0x025>;
+defm S_TTRACEDATA_IMM : SOPP_Real_32_gfx10<0x028>;
+
+//===----------------------------------------------------------------------===//
+// SOPC - GFX6, GFX7, GFX8, GFX9, GFX10
+//===----------------------------------------------------------------------===//
+
+multiclass SOPC_Real_gfx6_gfx7<bits<7> op> {
+ def _gfx6_gfx7 : SOPC_Real<op, !cast<SOPC_Pseudo>(NAME)>,
+ Select_gfx6_gfx7<!cast<SOPC_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPC_Real_gfx8_gfx9<bits<7> op> {
+ def _vi : SOPC_Real<op, !cast<SOPC_Pseudo>(NAME)>,
+ Select_vi<!cast<SOPC_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPC_Real_gfx10<bits<7> op> {
+ def _gfx10 : SOPC_Real<op, !cast<SOPC_Pseudo>(NAME)>,
+ Select_gfx10<!cast<SOPC_Pseudo>(NAME).Mnemonic>;
}
+multiclass SOPC_Real_gfx8_gfx9_gfx10<bits<7> op> :
+ SOPC_Real_gfx8_gfx9<op>, SOPC_Real_gfx10<op>;
+
+multiclass SOPC_Real_gfx6_gfx7_gfx8_gfx9<bits<7> op> :
+ SOPC_Real_gfx6_gfx7<op>, SOPC_Real_gfx8_gfx9<op>;
+
+multiclass SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> :
+ SOPC_Real_gfx6_gfx7_gfx8_gfx9<op>, SOPC_Real_gfx10<op>;
+
+defm S_CMP_EQ_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x00>;
+defm S_CMP_LG_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x01>;
+defm S_CMP_GT_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x02>;
+defm S_CMP_GE_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x03>;
+defm S_CMP_LT_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x04>;
+defm S_CMP_LE_I32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x05>;
+defm S_CMP_EQ_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x06>;
+defm S_CMP_LG_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x07>;
+defm S_CMP_GT_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x08>;
+defm S_CMP_GE_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x09>;
+defm S_CMP_LT_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0a>;
+defm S_CMP_LE_U32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0b>;
+defm S_BITCMP0_B32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0c>;
+defm S_BITCMP1_B32 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0d>;
+defm S_BITCMP0_B64 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0e>;
+defm S_BITCMP1_B64 : SOPC_Real_gfx6_gfx7_gfx8_gfx9_gfx10<0x0f>;
+defm S_SETVSKIP : SOPC_Real_gfx6_gfx7_gfx8_gfx9<0x10>;
+defm S_SET_GPR_IDX_ON : SOPC_Real_gfx8_gfx9<0x11>;
+defm S_CMP_EQ_U64 : SOPC_Real_gfx8_gfx9_gfx10<0x12>;
+defm S_CMP_LG_U64 : SOPC_Real_gfx8_gfx9_gfx10<0x13>;
+
+//===----------------------------------------------------------------------===//
+// GFX8 (VI), GFX9.
+//===----------------------------------------------------------------------===//
+
class SOP1_Real_vi<bits<8> op, SOP1_Pseudo ps> :
SOP1_Real<op, ps>,
Select_vi<ps.Mnemonic>;
@@ -1643,9 +1878,7 @@ def S_MOVRELS_B64_vi : SOP1_Real_vi <0x2b, S_MOVRELS_B64>;
def S_MOVRELD_B32_vi : SOP1_Real_vi <0x2c, S_MOVRELD_B32>;
def S_MOVRELD_B64_vi : SOP1_Real_vi <0x2d, S_MOVRELD_B64>;
def S_CBRANCH_JOIN_vi : SOP1_Real_vi <0x2e, S_CBRANCH_JOIN>;
-def S_MOV_REGRD_B32_vi : SOP1_Real_vi <0x2f, S_MOV_REGRD_B32>;
def S_ABS_I32_vi : SOP1_Real_vi <0x30, S_ABS_I32>;
-def S_MOV_FED_B32_vi : SOP1_Real_vi <0x31, S_MOV_FED_B32>;
def S_SET_GPR_IDX_IDX_vi : SOP1_Real_vi <0x32, S_SET_GPR_IDX_IDX>;
def S_ADD_U32_vi : SOP2_Real_vi <0x00, S_ADD_U32>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 5819a621f55d..c8a85d76a55b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -6,6 +6,9 @@
//
//===----------------------------------------------------------------------===//
#include "AMDGPUAsmUtils.h"
+#include "SIDefines.h"
+
+#include "llvm/ADT/StringRef.h"
namespace llvm {
namespace AMDGPU {
@@ -87,6 +90,250 @@ const char* const IdSymbolic[] = {
} // namespace Hwreg
+namespace MTBUFFormat {
+
+StringLiteral const DfmtSymbolic[] = {
+ "BUF_DATA_FORMAT_INVALID",
+ "BUF_DATA_FORMAT_8",
+ "BUF_DATA_FORMAT_16",
+ "BUF_DATA_FORMAT_8_8",
+ "BUF_DATA_FORMAT_32",
+ "BUF_DATA_FORMAT_16_16",
+ "BUF_DATA_FORMAT_10_11_11",
+ "BUF_DATA_FORMAT_11_11_10",
+ "BUF_DATA_FORMAT_10_10_10_2",
+ "BUF_DATA_FORMAT_2_10_10_10",
+ "BUF_DATA_FORMAT_8_8_8_8",
+ "BUF_DATA_FORMAT_32_32",
+ "BUF_DATA_FORMAT_16_16_16_16",
+ "BUF_DATA_FORMAT_32_32_32",
+ "BUF_DATA_FORMAT_32_32_32_32",
+ "BUF_DATA_FORMAT_RESERVED_15"
+};
+
+StringLiteral const NfmtSymbolicGFX10[] = {
+ "BUF_NUM_FORMAT_UNORM",
+ "BUF_NUM_FORMAT_SNORM",
+ "BUF_NUM_FORMAT_USCALED",
+ "BUF_NUM_FORMAT_SSCALED",
+ "BUF_NUM_FORMAT_UINT",
+ "BUF_NUM_FORMAT_SINT",
+ "",
+ "BUF_NUM_FORMAT_FLOAT"
+};
+
+StringLiteral const NfmtSymbolicSICI[] = {
+ "BUF_NUM_FORMAT_UNORM",
+ "BUF_NUM_FORMAT_SNORM",
+ "BUF_NUM_FORMAT_USCALED",
+ "BUF_NUM_FORMAT_SSCALED",
+ "BUF_NUM_FORMAT_UINT",
+ "BUF_NUM_FORMAT_SINT",
+ "BUF_NUM_FORMAT_SNORM_OGL",
+ "BUF_NUM_FORMAT_FLOAT"
+};
+
+StringLiteral const NfmtSymbolicVI[] = { // VI and GFX9
+ "BUF_NUM_FORMAT_UNORM",
+ "BUF_NUM_FORMAT_SNORM",
+ "BUF_NUM_FORMAT_USCALED",
+ "BUF_NUM_FORMAT_SSCALED",
+ "BUF_NUM_FORMAT_UINT",
+ "BUF_NUM_FORMAT_SINT",
+ "BUF_NUM_FORMAT_RESERVED_6",
+ "BUF_NUM_FORMAT_FLOAT"
+};
+
+StringLiteral const UfmtSymbolic[] = {
+ "BUF_FMT_INVALID",
+
+ "BUF_FMT_8_UNORM",
+ "BUF_FMT_8_SNORM",
+ "BUF_FMT_8_USCALED",
+ "BUF_FMT_8_SSCALED",
+ "BUF_FMT_8_UINT",
+ "BUF_FMT_8_SINT",
+
+ "BUF_FMT_16_UNORM",
+ "BUF_FMT_16_SNORM",
+ "BUF_FMT_16_USCALED",
+ "BUF_FMT_16_SSCALED",
+ "BUF_FMT_16_UINT",
+ "BUF_FMT_16_SINT",
+ "BUF_FMT_16_FLOAT",
+
+ "BUF_FMT_8_8_UNORM",
+ "BUF_FMT_8_8_SNORM",
+ "BUF_FMT_8_8_USCALED",
+ "BUF_FMT_8_8_SSCALED",
+ "BUF_FMT_8_8_UINT",
+ "BUF_FMT_8_8_SINT",
+
+ "BUF_FMT_32_UINT",
+ "BUF_FMT_32_SINT",
+ "BUF_FMT_32_FLOAT",
+
+ "BUF_FMT_16_16_UNORM",
+ "BUF_FMT_16_16_SNORM",
+ "BUF_FMT_16_16_USCALED",
+ "BUF_FMT_16_16_SSCALED",
+ "BUF_FMT_16_16_UINT",
+ "BUF_FMT_16_16_SINT",
+ "BUF_FMT_16_16_FLOAT",
+
+ "BUF_FMT_10_11_11_UNORM",
+ "BUF_FMT_10_11_11_SNORM",
+ "BUF_FMT_10_11_11_USCALED",
+ "BUF_FMT_10_11_11_SSCALED",
+ "BUF_FMT_10_11_11_UINT",
+ "BUF_FMT_10_11_11_SINT",
+ "BUF_FMT_10_11_11_FLOAT",
+
+ "BUF_FMT_11_11_10_UNORM",
+ "BUF_FMT_11_11_10_SNORM",
+ "BUF_FMT_11_11_10_USCALED",
+ "BUF_FMT_11_11_10_SSCALED",
+ "BUF_FMT_11_11_10_UINT",
+ "BUF_FMT_11_11_10_SINT",
+ "BUF_FMT_11_11_10_FLOAT",
+
+ "BUF_FMT_10_10_10_2_UNORM",
+ "BUF_FMT_10_10_10_2_SNORM",
+ "BUF_FMT_10_10_10_2_USCALED",
+ "BUF_FMT_10_10_10_2_SSCALED",
+ "BUF_FMT_10_10_10_2_UINT",
+ "BUF_FMT_10_10_10_2_SINT",
+
+ "BUF_FMT_2_10_10_10_UNORM",
+ "BUF_FMT_2_10_10_10_SNORM",
+ "BUF_FMT_2_10_10_10_USCALED",
+ "BUF_FMT_2_10_10_10_SSCALED",
+ "BUF_FMT_2_10_10_10_UINT",
+ "BUF_FMT_2_10_10_10_SINT",
+
+ "BUF_FMT_8_8_8_8_UNORM",
+ "BUF_FMT_8_8_8_8_SNORM",
+ "BUF_FMT_8_8_8_8_USCALED",
+ "BUF_FMT_8_8_8_8_SSCALED",
+ "BUF_FMT_8_8_8_8_UINT",
+ "BUF_FMT_8_8_8_8_SINT",
+
+ "BUF_FMT_32_32_UINT",
+ "BUF_FMT_32_32_SINT",
+ "BUF_FMT_32_32_FLOAT",
+
+ "BUF_FMT_16_16_16_16_UNORM",
+ "BUF_FMT_16_16_16_16_SNORM",
+ "BUF_FMT_16_16_16_16_USCALED",
+ "BUF_FMT_16_16_16_16_SSCALED",
+ "BUF_FMT_16_16_16_16_UINT",
+ "BUF_FMT_16_16_16_16_SINT",
+ "BUF_FMT_16_16_16_16_FLOAT",
+
+ "BUF_FMT_32_32_32_UINT",
+ "BUF_FMT_32_32_32_SINT",
+ "BUF_FMT_32_32_32_FLOAT",
+ "BUF_FMT_32_32_32_32_UINT",
+ "BUF_FMT_32_32_32_32_SINT",
+ "BUF_FMT_32_32_32_32_FLOAT"
+};
+
+unsigned const DfmtNfmt2UFmt[] = {
+ DFMT_INVALID | (NFMT_UNORM << NFMT_SHIFT),
+
+ DFMT_8 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_8 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_8 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_8 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_8 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_8 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_16 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_16 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_16 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_16 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_16 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_16 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_16 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_8_8 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_16_16 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_10_11_11 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_11_11_10 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_10_10_10_2 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_2_10_10_10 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_8_8_8_8 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_32_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32_32 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_16_16_16_16 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_32_32_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32_32_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT),
+ DFMT_32_32_32_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32_32_32_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT)
+};
+
+} // namespace MTBUFFormat
+
namespace Swizzle {
// This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
index cd91c5f6edd5..3eb27c5e5f42 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
@@ -10,7 +10,11 @@
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUASMUTILS_H
namespace llvm {
+
+class StringLiteral;
+
namespace AMDGPU {
+
namespace SendMsg { // Symbolic names for the sendmsg(...) syntax.
extern const char* const IdSymbolic[];
@@ -25,6 +29,17 @@ extern const char* const IdSymbolic[];
} // namespace Hwreg
+namespace MTBUFFormat {
+
+extern StringLiteral const DfmtSymbolic[];
+extern StringLiteral const NfmtSymbolicGFX10[];
+extern StringLiteral const NfmtSymbolicSICI[];
+extern StringLiteral const NfmtSymbolicVI[];
+extern StringLiteral const UfmtSymbolic[];
+extern unsigned const DfmtNfmt2UFmt[];
+
+} // namespace MTBUFFormat
+
namespace Swizzle { // Symbolic names for the swizzle(...) syntax.
extern const char* const IdSymbolic[];
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 3df2157fc402..4c1e4dec7ecb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -9,44 +9,28 @@
#include "AMDGPUBaseInfo.h"
#include "AMDGPU.h"
#include "AMDGPUAsmUtils.h"
-#include "AMDGPUTargetTransformInfo.h"
-#include "SIDefines.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
+#include "AMDKernelCodeT.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <utility>
-
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetParser.h"
#define GET_INSTRINFO_NAMED_OPS
#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
-#undef GET_INSTRMAP_INFO
-#undef GET_INSTRINFO_NAMED_OPS
+
+static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion(
+ "amdhsa-code-object-version", llvm::cl::Hidden,
+ llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(3));
namespace {
@@ -103,6 +87,32 @@ namespace llvm {
namespace AMDGPU {
+Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
+ if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
+ return None;
+
+ switch (AmdhsaCodeObjectVersion) {
+ case 2:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
+ case 3:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ default:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ }
+}
+
+bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
+ if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
+ return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
+ return false;
+}
+
+bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
+ if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
+ return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ return false;
+}
+
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
@@ -236,6 +246,94 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
namespace IsaInfo {
+AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
+ : XnackSetting(TargetIDSetting::Any), SramEccSetting(TargetIDSetting::Any) {
+ if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
+ XnackSetting = TargetIDSetting::Unsupported;
+ if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
+ SramEccSetting = TargetIDSetting::Unsupported;
+}
+
+void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
+ // Check if xnack or sramecc is explicitly enabled or disabled. In the
+ // absence of the target features we assume we must generate code that can run
+ // in any environment.
+ SubtargetFeatures Features(FS);
+ Optional<bool> XnackRequested;
+ Optional<bool> SramEccRequested;
+
+ for (const std::string &Feature : Features.getFeatures()) {
+ if (Feature == "+xnack")
+ XnackRequested = true;
+ else if (Feature == "-xnack")
+ XnackRequested = false;
+ else if (Feature == "+sramecc")
+ SramEccRequested = true;
+ else if (Feature == "-sramecc")
+ SramEccRequested = false;
+ }
+
+ bool XnackSupported = isXnackSupported();
+ bool SramEccSupported = isSramEccSupported();
+
+ if (XnackRequested) {
+ if (XnackSupported) {
+ XnackSetting =
+ *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
+ } else {
+ // If a specific xnack setting was requested and this GPU does not support
+ // xnack emit a warning. Setting will remain set to "Unsupported".
+ if (*XnackRequested) {
+ errs() << "warning: xnack 'On' was requested for a processor that does "
+ "not support it!\n";
+ } else {
+ errs() << "warning: xnack 'Off' was requested for a processor that "
+ "does not support it!\n";
+ }
+ }
+ }
+
+ if (SramEccRequested) {
+ if (SramEccSupported) {
+ SramEccSetting =
+ *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
+ } else {
+ // If a specific sramecc setting was requested and this GPU does not
+ // support sramecc emit a warning. Setting will remain set to
+ // "Unsupported".
+ if (*SramEccRequested) {
+ errs() << "warning: sramecc 'On' was requested for a processor that "
+ "does not support it!\n";
+ } else {
+ errs() << "warning: sramecc 'Off' was requested for a processor that "
+ "does not support it!\n";
+ }
+ }
+ }
+}
+
+static TargetIDSetting
+getTargetIDSettingFromFeatureString(StringRef FeatureString) {
+ if (FeatureString.endswith("-"))
+ return TargetIDSetting::Off;
+ if (FeatureString.endswith("+"))
+ return TargetIDSetting::On;
+
+ llvm_unreachable("Malformed feature string");
+}
+
+void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
+ SmallVector<StringRef, 3> TargetIDSplit;
+ TargetID.split(TargetIDSplit, ':');
+
+ for (const auto &FeatureString : TargetIDSplit) {
+ if (FeatureString.startswith("xnack"))
+ XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
+ if (FeatureString.startswith("sramecc"))
+ SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
+ }
+}
+
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
auto TargetTriple = STI->getTargetTriple();
auto Version = getIsaVersion(STI->getCPU());
@@ -252,16 +350,11 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
if (hasXNACK(*STI))
Stream << "+xnack";
if (hasSRAMECC(*STI))
- Stream << "+sram-ecc";
+ Stream << "+sramecc";
Stream.flush();
}
-bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
- return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
- STI->getFeatureBits().test(FeatureCodeObjectV3);
-}
-
unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
if (STI->getFeatureBits().test(FeatureWavefrontSize16))
return 16;
@@ -284,7 +377,7 @@ unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
// "Per CU" really means "per whatever functional block the waves of a
// workgroup must share". For gfx10 in CU mode this is the CU, which contains
// two SIMDs.
- if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode))
+ if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
return 2;
// Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
// two CUs, so a total of four SIMDs.
@@ -309,7 +402,7 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
// FIXME: Need to take scratch memory into account.
- if (!isGFX10(*STI))
+ if (!isGFX10Plus(*STI))
return 10;
return hasGFX10_3Insts(*STI) ? 16 : 20;
}
@@ -459,7 +552,7 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
}
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
- if (!isGFX10(*STI))
+ if (!isGFX10Plus(*STI))
return 256;
return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
}
@@ -578,7 +671,7 @@ bool isReadOnlySegment(const GlobalValue *GV) {
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
- return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600;
+ return TT.getArch() == Triple::r600;
}
int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
@@ -784,6 +877,165 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
} // namespace Hwreg
//===----------------------------------------------------------------------===//
+// exp tgt
+//===----------------------------------------------------------------------===//
+
+namespace Exp {
+
+struct ExpTgt {
+ StringLiteral Name;
+ unsigned Tgt;
+ unsigned MaxIndex;
+};
+
+static constexpr ExpTgt ExpTgtInfo[] = {
+ {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
+ {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
+ {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
+ {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
+ {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
+ {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
+};
+
+bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
+ for (const ExpTgt &Val : ExpTgtInfo) {
+ if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
+ Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
+ Name = Val.Name;
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned getTgtId(const StringRef Name) {
+
+ for (const ExpTgt &Val : ExpTgtInfo) {
+ if (Val.MaxIndex == 0 && Name == Val.Name)
+ return Val.Tgt;
+
+ if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) {
+ StringRef Suffix = Name.drop_front(Val.Name.size());
+
+ unsigned Id;
+ if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
+ return ET_INVALID;
+
+ // Disable leading zeroes
+ if (Suffix.size() > 1 && Suffix[0] == '0')
+ return ET_INVALID;
+
+ return Val.Tgt + Id;
+ }
+ }
+ return ET_INVALID;
+}
+
+bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
+ return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI);
+}
+
+} // namespace Exp
+
+//===----------------------------------------------------------------------===//
+// MTBUF Format
+//===----------------------------------------------------------------------===//
+
+namespace MTBUFFormat {
+
+int64_t getDfmt(const StringRef Name) {
+ for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
+ if (Name == DfmtSymbolic[Id])
+ return Id;
+ }
+ return DFMT_UNDEF;
+}
+
+StringRef getDfmtName(unsigned Id) {
+ assert(Id <= DFMT_MAX);
+ return DfmtSymbolic[Id];
+}
+
+static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
+ if (isSI(STI) || isCI(STI))
+ return NfmtSymbolicSICI;
+ if (isVI(STI) || isGFX9(STI))
+ return NfmtSymbolicVI;
+ return NfmtSymbolicGFX10;
+}
+
+int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
+ auto lookupTable = getNfmtLookupTable(STI);
+ for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
+ if (Name == lookupTable[Id])
+ return Id;
+ }
+ return NFMT_UNDEF;
+}
+
+StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
+ assert(Id <= NFMT_MAX);
+ return getNfmtLookupTable(STI)[Id];
+}
+
+bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
+ unsigned Dfmt;
+ unsigned Nfmt;
+ decodeDfmtNfmt(Id, Dfmt, Nfmt);
+ return isValidNfmt(Nfmt, STI);
+}
+
+bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
+ return !getNfmtName(Id, STI).empty();
+}
+
+int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
+ return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
+}
+
+void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
+ Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
+ Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
+}
+
+int64_t getUnifiedFormat(const StringRef Name) {
+ for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
+ if (Name == UfmtSymbolic[Id])
+ return Id;
+ }
+ return UFMT_UNDEF;
+}
+
+StringRef getUnifiedFormatName(unsigned Id) {
+ return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : "";
+}
+
+bool isValidUnifiedFormat(unsigned Id) {
+ return Id <= UFMT_LAST;
+}
+
+int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) {
+ int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
+ for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
+ if (Fmt == DfmtNfmt2UFmt[Id])
+ return Id;
+ }
+ return UFMT_UNDEF;
+}
+
+bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
+ return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
+}
+
+unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
+ if (isGFX10Plus(STI))
+ return UFMT_DEFAULT;
+ return DFMT_NFMT_DEFAULT;
+}
+
+} // namespace MTBUFFormat
+
+//===----------------------------------------------------------------------===//
// SendMsg
//===----------------------------------------------------------------------===//
@@ -804,7 +1056,7 @@ static bool isValidMsgId(int64_t MsgId) {
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
if (Strict) {
if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
- return isGFX9(STI) || isGFX10(STI);
+ return isGFX9Plus(STI);
else
return isValidMsgId(MsgId);
} else {
@@ -919,8 +1171,12 @@ bool isShader(CallingConv::ID cc) {
}
}
+bool isGraphics(CallingConv::ID cc) {
+ return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
+}
+
bool isCompute(CallingConv::ID cc) {
- return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
+ return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
}
bool isEntryFunctionCC(CallingConv::ID CC) {
@@ -940,6 +1196,15 @@ bool isEntryFunctionCC(CallingConv::ID CC) {
}
}
+bool isModuleEntryFunctionCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_Gfx:
+ return true;
+ default:
+ return isEntryFunctionCC(CC);
+ }
+}
+
bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}
@@ -980,10 +1245,16 @@ bool isGFX9(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
}
+bool isGFX9Plus(const MCSubtargetInfo &STI) {
+ return isGFX9(STI) || isGFX10Plus(STI);
+}
+
bool isGFX10(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
}
+bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); }
+
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
}
@@ -1017,46 +1288,46 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
CASE_CI_VI(FLAT_SCR) \
CASE_CI_VI(FLAT_SCR_LO) \
CASE_CI_VI(FLAT_SCR_HI) \
- CASE_VI_GFX9_GFX10(TTMP0) \
- CASE_VI_GFX9_GFX10(TTMP1) \
- CASE_VI_GFX9_GFX10(TTMP2) \
- CASE_VI_GFX9_GFX10(TTMP3) \
- CASE_VI_GFX9_GFX10(TTMP4) \
- CASE_VI_GFX9_GFX10(TTMP5) \
- CASE_VI_GFX9_GFX10(TTMP6) \
- CASE_VI_GFX9_GFX10(TTMP7) \
- CASE_VI_GFX9_GFX10(TTMP8) \
- CASE_VI_GFX9_GFX10(TTMP9) \
- CASE_VI_GFX9_GFX10(TTMP10) \
- CASE_VI_GFX9_GFX10(TTMP11) \
- CASE_VI_GFX9_GFX10(TTMP12) \
- CASE_VI_GFX9_GFX10(TTMP13) \
- CASE_VI_GFX9_GFX10(TTMP14) \
- CASE_VI_GFX9_GFX10(TTMP15) \
- CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
- CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
- CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
- CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
- CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
- CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
- CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
- CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
- CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
- CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
- CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
- CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
- CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
- CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
- CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
- CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0) \
+ CASE_VI_GFX9PLUS(TTMP1) \
+ CASE_VI_GFX9PLUS(TTMP2) \
+ CASE_VI_GFX9PLUS(TTMP3) \
+ CASE_VI_GFX9PLUS(TTMP4) \
+ CASE_VI_GFX9PLUS(TTMP5) \
+ CASE_VI_GFX9PLUS(TTMP6) \
+ CASE_VI_GFX9PLUS(TTMP7) \
+ CASE_VI_GFX9PLUS(TTMP8) \
+ CASE_VI_GFX9PLUS(TTMP9) \
+ CASE_VI_GFX9PLUS(TTMP10) \
+ CASE_VI_GFX9PLUS(TTMP11) \
+ CASE_VI_GFX9PLUS(TTMP12) \
+ CASE_VI_GFX9PLUS(TTMP13) \
+ CASE_VI_GFX9PLUS(TTMP14) \
+ CASE_VI_GFX9PLUS(TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
+ CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
+ CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
+ CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
+ CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
+ CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
+ CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
+ CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
+ CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
}
#define CASE_CI_VI(node) \
assert(!isSI(STI)); \
case node: return isCI(STI) ? node##_ci : node##_vi;
-#define CASE_VI_GFX9_GFX10(node) \
- case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
+#define CASE_VI_GFX9PLUS(node) \
+ case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
if (STI.getTargetTriple().getArch() == Triple::r600)
@@ -1065,17 +1336,17 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
}
#undef CASE_CI_VI
-#undef CASE_VI_GFX9_GFX10
+#undef CASE_VI_GFX9PLUS
#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
-#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
+#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
unsigned mc2PseudoReg(unsigned Reg) {
MAP_REG2REG
}
#undef CASE_CI_VI
-#undef CASE_VI_GFX9_GFX10
+#undef CASE_VI_GFX9PLUS
#undef MAP_REG2REG
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
@@ -1311,6 +1582,7 @@ bool isArgPassedInSGPR(const Argument *A) {
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_Gfx:
// For non-compute shaders, SGPR inputs are marked with either inreg or byval.
// Everything else is in VGPRs.
return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
@@ -1322,11 +1594,11 @@ bool isArgPassedInSGPR(const Argument *A) {
}
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
- return isGCN3Encoding(ST) || isGFX10(ST);
+ return isGCN3Encoding(ST) || isGFX10Plus(ST);
}
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
- return isGFX9(ST) || isGFX10(ST);
+ return isGFX9Plus(ST);
}
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
@@ -1382,6 +1654,14 @@ Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
}
+unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) {
+ // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
+ if (AMDGPU::isGFX10(ST))
+ return Signed ? 12 : 11;
+
+ return Signed ? 13 : 12;
+}
+
// Given Imm, split it into the values to put into the SOffset and ImmOffset
// fields in an MUBUF instruction. Return false if it is not possible (due to a
// hardware bug needing a workaround).
@@ -1483,7 +1763,7 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
uint8_t NumComponents,
uint8_t NumFormat,
const MCSubtargetInfo &STI) {
- return isGFX10(STI)
+ return isGFX10Plus(STI)
? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
NumFormat)
: getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
@@ -1491,9 +1771,29 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
const MCSubtargetInfo &STI) {
- return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format)
- : getGfx9BufferFormatInfo(Format);
+ return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format)
+ : getGfx9BufferFormatInfo(Format);
}
} // namespace AMDGPU
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const AMDGPU::IsaInfo::TargetIDSetting S) {
+ switch (S) {
+ case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
+ OS << "Unsupported";
+ break;
+ case (AMDGPU::IsaInfo::TargetIDSetting::Any):
+ OS << "Any";
+ break;
+ case (AMDGPU::IsaInfo::TargetIDSetting::Off):
+ OS << "Off";
+ break;
+ case (AMDGPU::IsaInfo::TargetIDSetting::On):
+ OS << "On";
+ break;
+ }
+ return OS;
+}
+
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 26bb77f4b4c7..f9378693cf48 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -9,22 +9,15 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
-#include "AMDGPU.h"
-#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Alignment.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetParser.h"
-#include <cstdint>
-#include <string>
-#include <utility>
+
+struct amd_kernel_code_t;
namespace llvm {
+struct Align;
class Argument;
class Function;
class GCNSubtarget;
@@ -35,8 +28,23 @@ class MCSubtargetInfo;
class StringRef;
class Triple;
+namespace amdhsa {
+struct kernel_descriptor_t;
+}
+
namespace AMDGPU {
+struct IsaVersion;
+
+/// \returns HSA OS ABI Version identification.
+Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 2,
+/// false otherwise.
+bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 3,
+/// false otherwise.
+bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
+
struct GcnBufferFormatInfo {
unsigned Format;
unsigned BitsPerComp;
@@ -61,13 +69,87 @@ enum {
TRAP_NUM_SGPRS = 16
};
+enum class TargetIDSetting {
+ Unsupported,
+ Any,
+ Off,
+ On
+};
+
+class AMDGPUTargetID {
+private:
+ TargetIDSetting XnackSetting;
+ TargetIDSetting SramEccSetting;
+
+public:
+ explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
+ ~AMDGPUTargetID() = default;
+
+ /// \return True if the current xnack setting is not "Unsupported".
+ bool isXnackSupported() const {
+ return XnackSetting != TargetIDSetting::Unsupported;
+ }
+
+ /// \returns True if the current xnack setting is "On" or "Any".
+ bool isXnackOnOrAny() const {
+ return XnackSetting == TargetIDSetting::On ||
+ XnackSetting == TargetIDSetting::Any;
+ }
+
+ /// \returns True if current xnack setting is "On" or "Off",
+ /// false otherwise.
+ bool isXnackOnOrOff() const {
+ return getXnackSetting() == TargetIDSetting::On ||
+ getXnackSetting() == TargetIDSetting::Off;
+ }
+
+ /// \returns The current xnack TargetIDSetting, possible options are
+ /// "Unsupported", "Any", "Off", and "On".
+ TargetIDSetting getXnackSetting() const {
+ return XnackSetting;
+ }
+
+ /// Sets xnack setting to \p NewXnackSetting.
+ void setXnackSetting(TargetIDSetting NewXnackSetting) {
+ XnackSetting = NewXnackSetting;
+ }
+
+ /// \return True if the current sramecc setting is not "Unsupported".
+ bool isSramEccSupported() const {
+ return SramEccSetting != TargetIDSetting::Unsupported;
+ }
+
+ /// \returns True if the current sramecc setting is "On" or "Any".
+ bool isSramEccOnOrAny() const {
+ return SramEccSetting == TargetIDSetting::On ||
+ SramEccSetting == TargetIDSetting::Any;
+ }
+
+ /// \returns True if current sramecc setting is "On" or "Off",
+ /// false otherwise.
+ bool isSramEccOnOrOff() const {
+ return getSramEccSetting() == TargetIDSetting::On ||
+ getSramEccSetting() == TargetIDSetting::Off;
+ }
+
+ /// \returns The current sramecc TargetIDSetting, possible options are
+ /// "Unsupported", "Any", "Off", and "On".
+ TargetIDSetting getSramEccSetting() const {
+ return SramEccSetting;
+ }
+
+ /// Sets sramecc setting to \p NewSramEccSetting.
+ void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
+ SramEccSetting = NewSramEccSetting;
+ }
+
+ void setTargetIDFromFeaturesString(StringRef FS);
+ void setTargetIDFromTargetIDStream(StringRef TargetID);
+};
+
/// Streams isa version string for given subtarget \p STI into \p Stream.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
-/// \returns True if given subtarget \p STI supports code object version 3,
-/// false otherwise.
-bool hasCodeObjectV3(const MCSubtargetInfo *STI);
-
/// \returns Wavefront size for given subtarget \p STI.
unsigned getWavefrontSize(const MCSubtargetInfo *STI);
@@ -368,8 +450,8 @@ struct Waitcnt {
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
: VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
- static Waitcnt allZero(const IsaVersion &Version) {
- return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
+ static Waitcnt allZero(bool HasVscnt) {
+ return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
}
static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
@@ -482,6 +564,51 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
} // namespace Hwreg
+namespace Exp {
+
+bool getTgtName(unsigned Id, StringRef &Name, int &Index);
+
+LLVM_READONLY
+unsigned getTgtId(const StringRef Name);
+
+LLVM_READNONE
+bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
+
+} // namespace Exp
+
+namespace MTBUFFormat {
+
+LLVM_READNONE
+int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
+
+void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
+
+int64_t getDfmt(const StringRef Name);
+
+StringRef getDfmtName(unsigned Id);
+
+int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
+
+StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
+
+bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
+
+bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
+
+int64_t getUnifiedFormat(const StringRef Name);
+
+StringRef getUnifiedFormatName(unsigned Id);
+
+bool isValidUnifiedFormat(unsigned Val);
+
+int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
+
+bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
+
+unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
+
+} // namespace MTBUFFormat
+
namespace SendMsg {
LLVM_READONLY
@@ -530,11 +657,23 @@ LLVM_READNONE
bool isShader(CallingConv::ID CC);
LLVM_READNONE
+bool isGraphics(CallingConv::ID CC);
+
+LLVM_READNONE
bool isCompute(CallingConv::ID CC);
LLVM_READNONE
bool isEntryFunctionCC(CallingConv::ID CC);
+// These functions are considered entrypoints into the current module, i.e. they
+// are allowed to be called from outside the current module. This is different
+// from isEntryFunctionCC, which is only true for functions that are entered by
+// the hardware. Module entry points include all entry functions but also
+// include functions that can be called from other functions inside or outside
+// the current module. Module entry functions are allowed to allocate LDS.
+LLVM_READNONE
+bool isModuleEntryFunctionCC(CallingConv::ID CC);
+
// FIXME: Remove this when calling conventions cleaned up
LLVM_READNONE
inline bool isKernel(CallingConv::ID CC) {
@@ -558,7 +697,9 @@ bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
+bool isGFX9Plus(const MCSubtargetInfo &STI);
bool isGFX10(const MCSubtargetInfo &STI);
+bool isGFX10Plus(const MCSubtargetInfo &STI);
bool isGCN3Encoding(const MCSubtargetInfo &STI);
bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
@@ -690,6 +831,13 @@ Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
int64_t ByteOffset);
+/// For FLAT segment the offset must be positive;
+/// MSB is ignored and forced to zero.
+///
+/// \return The number of bits available for the offset field in flat
+/// instructions.
+unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
+
/// \returns true if this offset is small enough to fit in the SMRD
/// offset field. \p ByteOffset should be the offset in bytes and
/// not the encoded offset.
@@ -735,10 +883,8 @@ struct SIModeRegisterDefaults {
SIModeRegisterDefaults(const Function &F);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
- const bool IsCompute = AMDGPU::isCompute(CC);
-
SIModeRegisterDefaults Mode;
- Mode.IEEE = IsCompute;
+ Mode.IEEE = !AMDGPU::isShader(CC);
return Mode;
}
@@ -805,6 +951,10 @@ struct SIModeRegisterDefaults {
};
} // end namespace AMDGPU
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const AMDGPU::IsaInfo::TargetIDSetting S);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index ef010a7ac157..b7dd757a8af3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -15,12 +15,10 @@
//
#include "AMDGPUPALMetadata.h"
-#include "AMDGPU.h"
-#include "AMDGPUAsmPrinter.h"
-#include "MCTargetDesc/AMDGPUTargetStreamer.h"
+#include "AMDGPUPTNote.h"
#include "SIDefines.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/IR/CallingConv.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/AMDGPUMetadata.h"
@@ -45,8 +43,11 @@ void AMDGPUPALMetadata::readFromIR(Module &M) {
}
BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
- if (!NamedMD || !NamedMD->getNumOperands())
+ if (!NamedMD || !NamedMD->getNumOperands()) {
+ // Emit msgpack metadata by default
+ BlobType = ELF::NT_AMDGPU_METADATA;
return;
+ }
// This is the old reg=value pair format for metadata. It is a NamedMD
// containing an MDTuple containing a number of MDNodes each of which is an
// integer value, and each two integer values forms a key=value pair that we
@@ -235,6 +236,13 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}
+// Set the stack frame size of a function in the metadata.
+void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
+ unsigned Val) {
+ auto Node = getShaderFunction(MF.getFunction().getName());
+ Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
+}
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void AMDGPUPALMetadata::setWave32(unsigned CC) {
@@ -718,6 +726,30 @@ msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() {
return Registers.getMap();
}
+// Reference (create if necessary) the node for the shader functions map.
+msgpack::DocNode &AMDGPUPALMetadata::refShaderFunctions() {
+ auto &N =
+ MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".shader_functions")];
+ N.getMap(/*Convert=*/true);
+ return N;
+}
+
+// Get (create if necessary) the shader functions map.
+msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() {
+ if (ShaderFunctions.isEmpty())
+ ShaderFunctions = refShaderFunctions();
+ return ShaderFunctions.getMap();
+}
+
+// Get (create if necessary) a function in the shader functions map.
+msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunction(StringRef Name) {
+ auto Functions = getShaderFunctions();
+ return Functions[Name].getMap(/*Convert=*/true);
+}
+
// Return the PAL metadata hardware shader stage name.
static const char *getStageName(CallingConv::ID CC) {
switch (CC) {
@@ -733,6 +765,8 @@ static const char *getStageName(CallingConv::ID CC) {
return ".hs";
case CallingConv::AMDGPU_LS:
return ".ls";
+ case CallingConv::AMDGPU_Gfx:
+ llvm_unreachable("Callable shader has no hardware stage");
default:
return ".cs";
}
@@ -773,3 +807,9 @@ void AMDGPUPALMetadata::setLegacy() {
BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
}
+// Erase all PAL metadata.
+void AMDGPUPALMetadata::reset() {
+ MsgPackDoc.clear();
+ Registers = MsgPackDoc.getEmptyNode();
+ HwStages = MsgPackDoc.getEmptyNode();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 544ab669d9ae..8fa1f738487c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -13,11 +13,11 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
-
#include "llvm/BinaryFormat/MsgPackDocument.h"
namespace llvm {
+class MachineFunction;
class Module;
class StringRef;
@@ -26,6 +26,7 @@ class AMDGPUPALMetadata {
msgpack::Document MsgPackDoc;
msgpack::DocNode Registers;
msgpack::DocNode HwStages;
+ msgpack::DocNode ShaderFunctions;
public:
// Read the amdgpu.pal.metadata supplied by the frontend, ready for
@@ -76,6 +77,9 @@ public:
// Set the scratch size in the metadata.
void setScratchSize(unsigned CC, unsigned Val);
+ // Set the stack frame size of a function in the metadata.
+ void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void setWave32(unsigned CC);
@@ -106,6 +110,9 @@ public:
// Set legacy PAL metadata format.
void setLegacy();
+ // Erase all PAL metadata.
+ void reset();
+
private:
// Return whether the blob type is legacy PAL metadata.
bool isLegacy() const;
@@ -116,6 +123,15 @@ private:
// Get (create if necessary) the registers map.
msgpack::MapDocNode getRegisters();
+ // Reference (create if necessary) the node for the shader functions map.
+ msgpack::DocNode &refShaderFunctions();
+
+ // Get (create if necessary) the shader functions map.
+ msgpack::MapDocNode getShaderFunctions();
+
+ // Get (create if necessary) a function in the shader functions map.
+ msgpack::MapDocNode getShaderFunction(StringRef Name);
+
// Get (create if necessary) the .hardware_stages entry for the given calling
// convention.
msgpack::MapDocNode getHwStage(unsigned CC);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index 443e2cc45ac0..45eb6c321476 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AMDKernelCodeTUtils.h"
+#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
@@ -18,9 +19,6 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstdint>
-#include <utility>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
index a87325a78df3..41d0e0d745e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
-#include "AMDKernelCodeT.h"
+struct amd_kernel_code_t;
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 17f334f62a30..f1e470031982 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -138,7 +138,6 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
let HasModifiers = 0;
let HasClamp = 1;
- let HasOMod = 1;
}
def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
@@ -242,25 +241,25 @@ defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
-let SchedRW = [WriteTrans32] in {
+let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>;
defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>;
-} // End SchedRW = [WriteTrans32]
+} // End TRANS = 1, SchedRW = [WriteTrans32]
-let SchedRW = [WriteTrans64] in {
+let TRANS = 1, SchedRW = [WriteTrans64] in {
defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>;
-} // End SchedRW = [WriteTrans64]
+} // End TRANS = 1, SchedRW = [WriteTrans64]
-let SchedRW = [WriteTrans32] in {
+let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>;
defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
-} // End SchedRW = [WriteTrans32]
+} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>;
@@ -338,10 +337,8 @@ defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>;
defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>;
} // End Uses = [M0, EXEC]
-defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
-
let SubtargetPredicate = isGFX6GFX7 in {
- let SchedRW = [WriteTrans32] in {
+ let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_LOG_CLAMP_F32 :
VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
defm V_RCP_CLAMP_F32 :
@@ -352,7 +349,7 @@ let SubtargetPredicate = isGFX6GFX7 in {
VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
defm V_RSQ_LEGACY_F32 :
VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>;
- } // End SchedRW = [WriteTrans32]
+ } // End TRANS = 1, SchedRW = [WriteTrans32]
let SchedRW = [WriteDouble] in {
defm V_RCP_CLAMP_F64 :
@@ -363,10 +360,10 @@ let SubtargetPredicate = isGFX6GFX7 in {
} // End SubtargetPredicate = isGFX6GFX7
let SubtargetPredicate = isGFX7GFX8GFX9 in {
- let SchedRW = [WriteTrans32] in {
+ let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>;
defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>;
- } // End SchedRW = [WriteTrans32]
+ } // End TRANS = 1, SchedRW = [WriteTrans32]
} // End SubtargetPredicate = isGFX7GFX8GFX9
let SubtargetPredicate = isGFX7Plus in {
@@ -386,7 +383,7 @@ defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
} // End FPDPRounding = 1
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
-let SchedRW = [WriteTrans32] in {
+let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;
@@ -394,7 +391,7 @@ defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>;
defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>;
defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
-} // End SchedRW = [WriteTrans32]
+} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>;
defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;
@@ -650,7 +647,6 @@ defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>;
defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>;
defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>;
defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>;
-defm V_MOV_FED_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x009>;
defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>;
defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>;
defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>;
@@ -754,7 +750,6 @@ defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;
defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>;
defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>;
defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>;
-defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>;
defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>;
defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>;
defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index aa37dbf1418f..7a334eaadaed 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -92,6 +92,7 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> :
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
+ let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
let AsmVariantName = ps.AsmVariantName;
let Constraints = ps.Constraints;
@@ -240,12 +241,16 @@ multiclass VOP2eInst <string opName,
}
}
-class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd> :
+class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> :
InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
(inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
- ps.Pfl.Src1RC32:$src1)>,
- PredicateControl {
-}
+ ps.Pfl.Src1RC32:$src1)>, PredicateControl;
+
+class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> :
+ InstAlias <ps.OpName#" "#ps.Pfl.Asm64,
+ (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst,
+ ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp)>,
+ PredicateControl;
multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
let WaveSizePredicate = isWave32 in {
@@ -328,11 +333,12 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
def VOP_MAC_F16 : VOP_MAC <f16>;
def VOP_MAC_F32 : VOP_MAC <f32>;
+let HasExtDPP = 0 in
+def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>;
class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
let HasClamp = 0;
let HasExtSDWA = 0;
- let HasModifiers = 1;
let HasOpSel = 0;
let IsPacked = 0;
}
@@ -341,7 +347,11 @@ def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
let Src0ModDPP = FPVRegInputMods;
let Src1ModDPP = FPVRegInputMods;
}
-def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32>;
+
+def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> {
+ let HasSrc0Mods = 1;
+ let HasSrc1Mods = 1;
+}
// Write out to vcc or arbitrary SGPR.
def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> {
@@ -361,8 +371,8 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp
def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> {
let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
- let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
- let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
let AsmDPP16 = AsmDPP#"$fi";
@@ -396,8 +406,8 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*
def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> {
let Asm32 = "$vdst, $src0, $src1";
let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
- let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
- let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi";
let AsmDPP16 = AsmDPP#"$fi";
@@ -468,7 +478,7 @@ def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>;
-defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
+defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>;
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
@@ -490,24 +500,25 @@ defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
let mayRaiseFPException = 0 in {
-let SubtargetPredicate = HasMadMacF32Insts in {
+let OtherPredicates = [HasMadMacF32Insts] in {
let Constraints = "$vdst = $src2", DisableEncoding="$src2",
isConvertibleToThreeAddress = 1 in {
defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
-}
+
+let SubtargetPredicate = isGFX6GFX7GFX10 in
+defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>;
+} // End Constraints = "$vdst = $src2", DisableEncoding="$src2",
+ // isConvertibleToThreeAddress = 1
def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
-} // End SubtargetPredicate = HasMadMacF32Insts
-}
+} // End OtherPredicates = [HasMadMacF32Insts]
+} // End mayRaiseFPException = 0
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
-
-// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
-// but the VI instructions behave the same as the SI versions.
-defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>;
-defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>;
-defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>;
+defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>;
+defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
+defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>;
defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
@@ -555,10 +566,6 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfma
} // End SubtargetPredicate = isGFX6GFX7
let isCommutable = 1 in {
-let SubtargetPredicate = isGFX6GFX7GFX10 in {
-let OtherPredicates = [HasMadMacF32Insts] in
-defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-} // End SubtargetPredicate = isGFX6GFX7GFX10
let SubtargetPredicate = isGFX6GFX7 in {
defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
@@ -595,8 +602,8 @@ let SubtargetPredicate = HasAddNoCarryInsts in {
}
let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in {
-def : DivergentClampingBinOp<add, V_ADD_I32_e64>;
-def : DivergentClampingBinOp<sub, V_SUB_I32_e64>;
+def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>;
+def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>;
}
def : DivergentBinOp<adde, V_ADDC_U32_e32>;
@@ -635,7 +642,7 @@ defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>;
let isCommutable = 1 in {
let FPDPRounding = 1 in {
defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>;
-defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>;
+defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, any_fsub>;
defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>;
@@ -668,14 +675,23 @@ let SubtargetPredicate = HasDLInsts in {
defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>;
let Constraints = "$vdst = $src2",
- DisableEncoding="$src2",
+ DisableEncoding = "$src2",
isConvertibleToThreeAddress = 1,
- isCommutable = 1 in {
+ isCommutable = 1 in
defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
-}
} // End SubtargetPredicate = HasDLInsts
+let SubtargetPredicate = HasFmaLegacy32 in {
+
+let Constraints = "$vdst = $src2",
+ DisableEncoding = "$src2",
+ isConvertibleToThreeAddress = 1,
+ isCommutable = 1 in
+defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>;
+
+} // End SubtargetPredicate = HasFmaLegacy32
+
let Constraints = "$vdst = $src2",
DisableEncoding="$src2",
isConvertibleToThreeAddress = 1,
@@ -827,6 +843,24 @@ def : GCNPat <
} // End Predicates = [Has16BitInsts]
+let SubtargetPredicate = HasIntClamp in {
+// Set clamp bit for saturation.
+def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>;
+def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>;
+}
+
+let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in {
+let AddedComplexity = 1 in { // Prefer over form with carry-out.
+def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>;
+def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>;
+}
+}
+
+let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in {
+def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>;
+def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>;
+}
+
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
@@ -854,6 +888,7 @@ class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
VOP2_DPP<op, ps, opName, p, 1> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
+ let OtherPredicates = ps.OtherPredicates;
}
class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
@@ -880,6 +915,7 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
let AssemblerPredicate = HasDPP8;
let SubtargetPredicate = HasDPP8;
+ let OtherPredicates = ps.OtherPredicates;
}
//===----------------------------------------------------------------------===//
@@ -1090,13 +1126,10 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
//===---------------------------- VOP3beOnly ----------------------------===//
- multiclass VOP3beOnly_Real_gfx10<bits<10> op, string opName, string asmName> {
+ multiclass VOP3beOnly_Real_gfx10<bits<10> op> {
def _e64_gfx10 :
- VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
- VOP3be_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
- VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
- let AsmString = asmName # Ps.AsmOperands;
- }
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
@@ -1126,21 +1159,25 @@ multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
-defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
-defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>;
-defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>;
-defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>;
-defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
-defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
-defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
-defm V_MUL_F16 : VOP2_Real_gfx10<0x035>;
-defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>;
-defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>;
-defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
-defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
-defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
-defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
-defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
+// NB: Same opcode as v_mac_legacy_f32
+let DecoderNamespace = "GFX10_B" in
+defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>;
+
+defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
+defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>;
+defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>;
+defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>;
+defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
+defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
+defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
+defm V_MUL_F16 : VOP2_Real_gfx10<0x035>;
+defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>;
+defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>;
+defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
+defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
+defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
+defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
+defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
// VOP2 no carry-in, carry-out.
defm V_ADD_NC_U32 :
@@ -1172,13 +1209,10 @@ defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>;
defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>;
defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>;
-// VOP3 carry-in, carry-out.
-defm V_ADD_CO_U32 :
- VOP3beOnly_Real_gfx10<0x30f, "V_ADD_I32", "v_add_co_u32">;
-defm V_SUB_CO_U32 :
- VOP3beOnly_Real_gfx10<0x310, "V_SUB_I32", "v_sub_co_u32">;
-defm V_SUBREV_CO_U32 :
- VOP3beOnly_Real_gfx10<0x319, "V_SUBREV_I32", "v_subrev_co_u32">;
+// VOP3 carry-out.
+defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>;
+defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>;
+defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>;
let SubtargetPredicate = isGFX10Plus in {
defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>;
@@ -1207,7 +1241,7 @@ class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
}
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
- multiclass VOP2Only_Real_gfx6_gfx7<bits<6> op> {
+ multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> {
def _gfx6_gfx7 :
VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
@@ -1217,20 +1251,20 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
}
- multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op> {
+ multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string PseudoName = NAME> {
def _e32_gfx6_gfx7 :
- VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
- VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+ VOP2_Real<!cast<VOP2_Pseudo>(PseudoName#"_e32"), SIEncodingFamily.SI>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(PseudoName#"_e32").Pfl>;
}
- multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op> {
+ multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string PseudoName = NAME> {
def _e64_gfx6_gfx7 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
- VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ VOP3_Real<!cast<VOP3_Pseudo>(PseudoName#"_e64"), SIEncodingFamily.SI>,
+ VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(PseudoName#"_e64").Pfl>;
}
- multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op> {
+ multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string PseudoName = NAME> {
def _e64_gfx6_gfx7 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
- VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ VOP3_Real<!cast<VOP3_Pseudo>(PseudoName#"_e64"), SIEncodingFamily.SI>,
+ VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(PseudoName#"_e64").Pfl>;
}
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
@@ -1246,6 +1280,20 @@ multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> :
multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> :
VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>;
+multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op,
+ string PseudoName, string asmName> {
+ defvar ps32 = !cast<VOP2_Pseudo>(PseudoName#"_e32");
+ defvar ps64 = !cast<VOP3_Pseudo>(PseudoName#"_e64");
+
+ let AsmString = asmName # ps32.AsmOperands in {
+ defm "" : VOP2_Real_e32_gfx6_gfx7<op, PseudoName>;
+ }
+
+ let AsmString = asmName # ps64.AsmOperands in {
+ defm "" : VOP2be_Real_e64_gfx6_gfx7<op, PseudoName>;
+ }
+}
+
defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>;
defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>;
defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>;
@@ -1262,27 +1310,36 @@ defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>;
defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>;
defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>;
defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>;
-defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7<0x025>;
-defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7<0x026>;
-defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7<0x027>;
+
+// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in
+// VI, but the VI instructions behave the same as the SI versions.
+defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">;
+defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">;
+defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">;
defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>;
defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>;
defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>;
-defm V_READLANE_B32 : VOP2Only_Real_gfx6_gfx7<0x001>;
+defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>;
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
- defm V_WRITELANE_B32 : VOP2Only_Real_gfx6_gfx7<0x002>;
+ defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>;
} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
let SubtargetPredicate = isGFX6GFX7 in {
defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>;
+ defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>;
+ defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>;
+ defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>;
+
+ def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>;
+ def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>;
+ def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>;
} // End SubtargetPredicate = isGFX6GFX7
defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>;
defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>;
-let OtherPredicates = [HasMadMacF32Insts] in
defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>;
@@ -1490,16 +1547,16 @@ defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>;
defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>;
defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>;
-defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32", "v_add_u32">;
-defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32", "v_sub_u32">;
-defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32", "v_subrev_u32">;
+defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">;
+defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">;
+defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">;
defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">;
defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">;
defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">;
-defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32", "v_add_co_u32">;
-defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32", "v_sub_co_u32">;
-defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32", "v_subrev_co_u32">;
+defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">;
+defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">;
+defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">;
defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">;
defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">;
defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">;
@@ -1568,11 +1625,11 @@ defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>;
let SubtargetPredicate = isGFX9Only in {
-defm : VOP2bInstAliases<V_ADD_I32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">;
+defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">;
defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">;
-defm : VOP2bInstAliases<V_SUB_I32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">;
+defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">;
defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">;
-defm : VOP2bInstAliases<V_SUBREV_I32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">;
+defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">;
defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">;
} // End SubtargetPredicate = isGFX9Only
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 169949f2171a..42dc995609f0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -119,28 +119,37 @@ class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
timm:$cbsz, timm:$abid, timm:$blgp))];
}
-class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
+// Consistently gives instructions a _e64 suffix.
+multiclass VOP3Inst_Pseudo_Wrapper<string opName, VOPProfile P, list<dag> pattern = [], bit VOP3Only = 0> {
+ def _e64 : VOP3_Pseudo<opName, P, pattern, VOP3Only>;
+}
+
+class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
VOP3_Pseudo<OpName, P,
- !if(P.HasOpSel,
- !if(P.HasModifiers,
- getVOP3OpSelModPat<P, node>.ret,
- getVOP3OpSelPat<P, node>.ret),
- !if(P.HasModifiers,
- getVOP3ModPat<P, node>.ret,
- !if(P.HasIntClamp,
- getVOP3ClampPat<P, node>.ret,
- !if (P.IsMAI,
- getVOP3MAIPat<P, node>.ret,
- getVOP3Pat<P, node>.ret)))),
- VOP3Only, 0, P.HasOpSel> {
+ !if(P.HasOpSel,
+ !if(P.HasModifiers,
+ getVOP3OpSelModPat<P, node>.ret,
+ getVOP3OpSelPat<P, node>.ret),
+ !if(P.HasModifiers,
+ getVOP3ModPat<P, node>.ret,
+ !if(P.HasIntClamp,
+ getVOP3ClampPat<P, node>.ret,
+ !if (P.IsMAI,
+ getVOP3MAIPat<P, node>.ret,
+ getVOP3Pat<P, node>.ret)))),
+ VOP3Only, 0, P.HasOpSel> {
let IntClamp = P.HasIntClamp;
let AsmMatchConverter =
- !if(P.HasOpSel,
- "cvtVOP3OpSel",
- !if(!or(P.HasModifiers, !or(P.HasOMod, P.HasIntClamp)),
- "cvtVOP3",
- ""));
+ !if(P.HasOpSel,
+ "cvtVOP3OpSel",
+ !if(!or(P.HasModifiers, P.HasOMod, P.HasIntClamp),
+ "cvtVOP3",
+ ""));
+}
+
+multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> {
+ def _e64 : VOP3InstBase<OpName, P, node, VOP3Only>;
}
// Special case for v_div_fmas_{f32|f64}, since it seems to be the
@@ -174,7 +183,7 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
- let HasModifiers = !if(Features.IsPacked, !if(Features.IsMAI, 0, 1), P.HasModifiers);
+ let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers));
// FIXME: Hack to stop printing _e64
let Outs64 = (outs DstRC.RegClass:$vdst);
@@ -182,6 +191,7 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
" " # !if(Features.HasOpSel,
getAsmVOP3OpSel<NumSrcArgs,
HasIntClamp,
+ P.HasOMod,
HasSrc0FloatMods,
HasSrc1FloatMods,
HasSrc2FloatMods>.ret,
@@ -193,12 +203,8 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
}
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
- // v_div_scale_{f32|f64} do not support input modifiers.
- let HasModifiers = 0;
- let HasClamp = 0;
- let HasOMod = 0;
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
- let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
+ let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
}
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
@@ -247,6 +253,7 @@ def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
let Asm64 = "$vdst, $src0, $attr$attrchan$clamp$omod";
let HasClamp = 1;
+ let HasSrc0Mods = 0;
}
class getInterp16Asm <bit HasSrc2, bit HasOMod> {
@@ -277,7 +284,7 @@ class getInterp16Ins <bit HasSrc2, bit HasOMod,
class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
- let HasOMod = !if(!eq(DstVT.Value, f16.Value), 0, 1);
+ let HasOMod = !ne(DstVT.Value, f16.Value);
let HasHigh = 1;
let Outs64 = (outs VGPR_32:$vdst);
@@ -293,34 +300,36 @@ let isCommutable = 1 in {
let mayRaiseFPException = 0 in {
let SubtargetPredicate = HasMadMacF32Insts in {
-def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>;
+defm V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+defm V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>;
} // End SubtargetPredicate = HasMadMacInsts
-let SubtargetPredicate = HasNoMadMacF32Insts in
-def V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+let SubtargetPredicate = HasFmaLegacy32 in
+defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
+ VOP3_Profile<VOP_F32_F32_F32_F32>,
+ int_amdgcn_fma_legacy>;
}
-def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>;
-def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
+defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>;
+defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SchedRW = [WriteDoubleAdd] in {
let FPDPRounding = 1 in {
-def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, any_fma>;
-def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd, 1>;
-def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
+defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, any_fma>;
+defm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd, 1>;
+defm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
} // End FPDPRounding = 1
-def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like, 1>;
-def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like, 1>;
+defm V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like, 1>;
+defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like, 1>;
} // End SchedRW = [WriteDoubleAdd]
let SchedRW = [WriteQuarterRate32] in {
-def V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>, mul>;
-def V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mulhu>;
-def V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
-def V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
+defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>, mul>;
+defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mulhu>;
+defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
+defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
} // End SchedRW = [WriteQuarterRate32]
let Uses = [MODE, VCC, EXEC] in {
@@ -329,191 +338,165 @@ let Uses = [MODE, VCC, EXEC] in {
// if (vcc)
// result *= 2^32
//
-def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, []> {
- let SchedRW = [WriteFloatFMA];
-}
+let SchedRW = [WriteFloatFMA] in
+defm V_DIV_FMAS_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, []>;
// v_div_fmas_f64:
// result = src0 * src1 + src2
// if (vcc)
// result *= 2^64
//
-def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []> {
- let SchedRW = [WriteDouble];
- let FPDPRounding = 1;
-}
-} // End Uses = [VCC, EXEC]
+let SchedRW = [WriteDouble], FPDPRounding = 1 in
+defm V_DIV_FMAS_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []>;
+} // End Uses = [MODE, VCC, EXEC]
} // End isCommutable = 1
let mayRaiseFPException = 0 in {
-def V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
-def V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
-def V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
-def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
+defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
+defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
+defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
+defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
} // End mayRaiseFPException
-def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
-def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
-def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
-def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
-def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
+defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
+defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
+defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
+defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
+defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
let mayRaiseFPException = 0 in { // XXX - Seems suspect but manual doesn't say it does
-def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
-def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
-def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
-def V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmax3>;
-def V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmax3>;
-def V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumax3>;
-def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
-def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
-def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
+defm V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
+defm V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
+defm V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
+defm V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmax3>;
+defm V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmax3>;
+defm V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumax3>;
+defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
+defm V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
+defm V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
} // End mayRaiseFPException = 0
-def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
-def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>;
+defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
+
+defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>;
let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
-def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
-def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
+ defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
+ defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
-def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
- let SchedRW = [WriteFloatFMA, WriteSALU];
- let AsmMatchConverter = "";
-}
+ let SchedRW = [WriteFloatFMA, WriteSALU] in
+ defm V_DIV_SCALE_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> ;
-// Double precision division pre-scale.
-def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
- let SchedRW = [WriteDouble, WriteSALU];
- let AsmMatchConverter = "";
- let FPDPRounding = 1;
-}
+ // Double precision division pre-scale.
+ let SchedRW = [WriteDouble, WriteSALU], FPDPRounding = 1 in
+ defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1>;
} // End mayRaiseFPException = 0
-def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+defm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
let Constraints = "@earlyclobber $vdst" in {
-def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
+defm V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
} // End Constraints = "@earlyclobber $vdst"
-def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, int_amdgcn_trig_preop> {
- let SchedRW = [WriteDouble];
-}
-let SchedRW = [Write64Bit] in {
-let SubtargetPredicate = isGFX6GFX7 in {
-def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, shl>;
-def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, srl>;
-def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, sra>;
-} // End SubtargetPredicate = isGFX6GFX7
+let SchedRW = [WriteDouble] in {
+defm V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, int_amdgcn_trig_preop>;
+} // End SchedRW = [WriteDouble]
-let SubtargetPredicate = isGFX8Plus in {
-def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
-def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshr_rev>;
-def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>;
-} // End SubtargetPredicate = isGFX8Plus
+let SchedRW = [Write64Bit] in {
+ let SubtargetPredicate = isGFX6GFX7 in {
+ defm V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, shl>;
+ defm V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, srl>;
+ defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, sra>;
+ } // End SubtargetPredicate = isGFX6GFX7
+
+ let SubtargetPredicate = isGFX8Plus in {
+ defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
+ defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshr_rev>;
+ defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>;
+ } // End SubtargetPredicate = isGFX8Plus
} // End SchedRW = [Write64Bit]
def : GCNPat<
- (i64 (getDivergentFrag<sext>.ret i16:$src)),
- (REG_SEQUENCE VReg_64,
- (i32 (V_BFE_I32 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))), sub0,
- (i32 (COPY_TO_REGCLASS
- (V_ASHRREV_I32_e32 (S_MOV_B32 (i32 0x1f)), (i32 (V_BFE_I32 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
- ), VGPR_32)), sub1)
->;
-
-def : GCNPat<
(i32 (getDivergentFrag<sext>.ret i16:$src)),
- (i32 (V_BFE_I32 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
+ (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
>;
let SubtargetPredicate = isGFX6GFX7GFX10 in {
-def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
} // End SubtargetPredicate = isGFX6GFX7GFX10
let SchedRW = [Write32Bit] in {
let SubtargetPredicate = isGFX8Plus in {
-def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>;
+defm V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>;
} // End SubtargetPredicate = isGFX8Plus
} // End SchedRW = [Write32Bit]
let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
-def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
-def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP>>;
+defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
+defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP>>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
let isCommutable = 1 in {
let SchedRW = [WriteQuarterRate32, WriteSALU] in {
-def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
-def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
+defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
+defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
} // End SchedRW = [WriteQuarterRate32, WriteSALU]
} // End isCommutable = 1
} // End SubtargetPredicate = isGFX7Plus
-
-def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup> {
- let Predicates = [Has16BitInsts, isGFX8Only];
- let FPDPRounding = 1;
-}
-def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
- VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup> {
- let renamedInGFX9 = 1;
- let Predicates = [Has16BitInsts, isGFX9Plus];
- let FPDPRounding = 1;
-}
-
-def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma> {
- let Predicates = [Has16BitInsts, isGFX8Only];
- let FPDPRounding = 1;
-}
-def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma> {
- let renamedInGFX9 = 1;
- let Predicates = [Has16BitInsts, isGFX9Plus];
- let FPDPRounding = 1;
-}
+let FPDPRounding = 1 in {
+ let Predicates = [Has16BitInsts, isGFX8Only] in {
+ defm V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
+ defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>;
+ } // End Predicates = [Has16BitInsts, isGFX8Only]
+
+ let renamedInGFX9 = 1, Predicates = [Has16BitInsts, isGFX9Plus] in {
+ defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
+ VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>;
+ defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
+ } // End renamedInGFX9 = 1, Predicates = [Has16BitInsts, isGFX9Plus]
+} // End FPDPRounding = 1
let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
let renamedInGFX9 = 1 in {
-def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
-def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
-let FPDPRounding = 1 in {
-def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
-let Uses = [MODE, M0, EXEC] in {
-// For some reason the intrinsic operands are in a different order
-// from the instruction operands.
-def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
- [(set f16:$vdst,
- (int_amdgcn_interp_p2_f16 (VOP3Mods f32:$src2, i32:$src2_modifiers),
- (VOP3Mods f32:$src0, i32:$src0_modifiers),
- (i32 timm:$attrchan),
- (i32 timm:$attr),
- (i1 timm:$high),
- M0))]>;
-} // End Uses = [M0, MODE, EXEC]
-} // End FPDPRounding = 1
+ defm V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
+ defm V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
+ let FPDPRounding = 1 in {
+ defm V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
+ let Uses = [MODE, M0, EXEC] in {
+ // For some reason the intrinsic operands are in a different order
+ // from the instruction operands.
+ def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
+ [(set f16:$vdst,
+ (int_amdgcn_interp_p2_f16 (VOP3Mods f32:$src2, i32:$src2_modifiers),
+ (VOP3Mods f32:$src0, i32:$src0_modifiers),
+ (i32 timm:$attrchan),
+ (i32 timm:$attr),
+ (i1 timm:$high),
+ M0))]>;
+ } // End Uses = [M0, MODE, EXEC]
+ } // End FPDPRounding = 1
} // End renamedInGFX9 = 1
-let SubtargetPredicate = isGFX9Only in {
-def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>> {
- let FPDPRounding = 1;
-}
-} // End SubtargetPredicate = isGFX9Only
+let SubtargetPredicate = isGFX9Only, FPDPRounding = 1 in {
+ defm V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>> ;
+} // End SubtargetPredicate = isGFX9Only, FPDPRounding = 1
let SubtargetPredicate = isGFX9Plus in {
-def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
-def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
+defm V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
+defm V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
} // End SubtargetPredicate = isGFX9Plus
@@ -535,6 +518,15 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
+def : GCNPat<
+ (i64 (getDivergentFrag<sext>.ret i16:$src)),
+ (REG_SEQUENCE VReg_64,
+ (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))), sub0,
+ (i32 (COPY_TO_REGCLASS
+ (V_ASHRREV_I32_e32 (S_MOV_B32 (i32 0x1f)), (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
+ ), VGPR_32)), sub1)
+>;
+
let SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC] in {
def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>;
def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
@@ -552,8 +544,8 @@ def : GCNPat <
}
-defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
-defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_U16_e64, zext>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_I16_e64, sext>;
} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
@@ -568,8 +560,8 @@ def : GCNPat <
}
-defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9, zext>;
-defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9, sext>;
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64, zext>;
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9_e64, sext>;
} // End Predicates = [Has16BitInsts, isGFX10Plus]
@@ -593,9 +585,9 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
if (!Operands[i]->isDivergent() &&
!isInlineImmediate(Operands[i].getNode())) {
ConstantBusUses++;
- // This uses AMDGPU::V_ADD3_U32, but all three operand instructions
+ // This uses AMDGPU::V_ADD3_U32_e64, but all three operand instructions
// have the same constant bus limit.
- if (ConstantBusUses > Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32))
+ if (ConstantBusUses > Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32_e64))
return false;
}
}
@@ -605,52 +597,60 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
let PredicateCodeUsesOperands = 1;
// The divergence predicate is irrelevant in GlobalISel, as we have
- // proper register bank checks. We also force all VOP instruction
- // operands to VGPR, so we should not need to check the constant bus
- // restriction.
+ // proper register bank checks. We just need to verify the constant
+ // bus restriction when all the sources are considered.
//
// FIXME: With unlucky SGPR operands, we could penalize code by
// blocking folding SGPR->VGPR copies later.
// FIXME: There's no register bank verifier
- // FIXME: Should add a way for the emitter to recognize this is a
- // trivially true predicate to eliminate the check.
- let GISelPredicateCode = [{return true;}];
+ let GISelPredicateCode = [{
+ const int ConstantBusLimit = Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32_e64);
+ int ConstantBusUses = 0;
+ for (unsigned i = 0; i < 3; ++i) {
+ const RegisterBank *RegBank = RBI.getRegBank(Operands[i]->getReg(), MRI, TRI);
+ if (RegBank->getID() == AMDGPU::SGPRRegBankID) {
+ if (++ConstantBusUses > ConstantBusLimit)
+ return false;
+ }
+ }
+ return true;
+ }];
}
let SubtargetPredicate = isGFX9Plus in {
-def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
-def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+defm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
+defm V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+defm V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+defm V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+defm V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+defm V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmed3>;
-def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmed3>;
-def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumed3>;
+defm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmed3>;
+defm V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmed3>;
+defm V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumed3>;
-def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmin3>;
-def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmin3>;
-def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumin3>;
+defm V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmin3>;
+defm V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmin3>;
+defm V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumin3>;
-def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmax3>;
-def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
-def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
+defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmax3>;
+defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
+defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
-def V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
-def V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
+defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
+defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
-def V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
-def V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
+defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
+defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
-def V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
-def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
+defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
+defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
-def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
-def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
+defm V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
+defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
@@ -659,14 +659,28 @@ class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instructio
(inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
>;
-def : ThreeOp_i32_Pats<shl, add, V_LSHL_ADD_U32>;
-def : ThreeOp_i32_Pats<add, shl, V_ADD_LSHL_U32>;
-def : ThreeOp_i32_Pats<add, add, V_ADD3_U32>;
-def : ThreeOp_i32_Pats<shl, or, V_LSHL_OR_B32>;
-def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32>;
-def : ThreeOp_i32_Pats<or, or, V_OR3_B32>;
-def : ThreeOp_i32_Pats<xor, add, V_XAD_U32>;
+def : ThreeOp_i32_Pats<shl, add, V_LSHL_ADD_U32_e64>;
+def : ThreeOp_i32_Pats<add, shl, V_ADD_LSHL_U32_e64>;
+def : ThreeOp_i32_Pats<add, add, V_ADD3_U32_e64>;
+def : ThreeOp_i32_Pats<shl, or, V_LSHL_OR_B32_e64>;
+def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>;
+def : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>;
+def : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>;
+
+def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
+def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
+
+// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
+class OpSelBinOpClampPat<SDPatternOperator node,
+ Instruction inst> : GCNPat<
+ (node (i16 (VOP3OpSel i16:$src0, i32:$src0_modifiers)),
+ (i16 (VOP3OpSel i16:$src1, i32:$src1_modifiers))),
+ (inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
+>;
+
+def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
+def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
} // End SubtargetPredicate = isGFX9Plus
def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
@@ -676,9 +690,8 @@ def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3
let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1,
IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2,
- VGPR_32:$vdst_in, op_sel:$op_sel);
+ VGPR_32:$vdst_in, op_sel0:$op_sel);
let HasClamp = 0;
- let HasOMod = 0;
}
class PermlanePat<SDPatternOperator permlane,
@@ -716,23 +729,23 @@ class PermlaneDiscardVDstIn<SDPatternOperator permlane,
let SubtargetPredicate = isGFX10Plus in {
- def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
- def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32>;
+ defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+ def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>;
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
- def V_PERMLANE16_B32 : VOP3Inst <"v_permlane16_b32", VOP3_PERMLANE_Profile>;
- def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
+ defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile>;
+ defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
- def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32>;
- def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32>;
+ def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64>;
+ def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64>;
def : PermlaneDiscardVDstIn<
BoundControlOrFetchInvalidPermlane<int_amdgcn_permlane16>,
- V_PERMLANE16_B32>;
+ V_PERMLANE16_B32_e64>;
def : PermlaneDiscardVDstIn<
BoundControlOrFetchInvalidPermlane<int_amdgcn_permlanex16>,
- V_PERMLANEX16_B32>;
+ V_PERMLANEX16_B32_e64>;
} // End SubtargetPredicate = isGFX10Plus
class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
@@ -744,13 +757,13 @@ class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
>;
let WaveSizePredicate = isWave64 in {
-def : DivFmasPat<f32, V_DIV_FMAS_F32, VCC>;
-def : DivFmasPat<f64, V_DIV_FMAS_F64, VCC>;
+def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC>;
+def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC>;
}
let WaveSizePredicate = isWave32 in {
-def : DivFmasPat<f32, V_DIV_FMAS_F32, VCC_LO>;
-def : DivFmasPat<f64, V_DIV_FMAS_F64, VCC_LO>;
+def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC_LO>;
+def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
}
//===----------------------------------------------------------------------===//
@@ -775,23 +788,23 @@ class getClampRes<VOPProfile P, Instruction inst> {
ret1));
}
-class IntClampPat<VOP3Inst inst, SDPatternOperator node> : GCNPat<
+class IntClampPat<VOP3InstBase inst, SDPatternOperator node> : GCNPat<
getClampPat<inst.Pfl, node>.ret,
getClampRes<inst.Pfl, inst>.ret
>;
-def : IntClampPat<V_MAD_I32_I24, AMDGPUmad_i24>;
-def : IntClampPat<V_MAD_U32_U24, AMDGPUmad_u24>;
+def : IntClampPat<V_MAD_I32_I24_e64, AMDGPUmad_i24>;
+def : IntClampPat<V_MAD_U32_U24_e64, AMDGPUmad_u24>;
-def : IntClampPat<V_SAD_U8, int_amdgcn_sad_u8>;
-def : IntClampPat<V_SAD_HI_U8, int_amdgcn_sad_hi_u8>;
-def : IntClampPat<V_SAD_U16, int_amdgcn_sad_u16>;
+def : IntClampPat<V_SAD_U8_e64, int_amdgcn_sad_u8>;
+def : IntClampPat<V_SAD_HI_U8_e64, int_amdgcn_sad_hi_u8>;
+def : IntClampPat<V_SAD_U16_e64, int_amdgcn_sad_u16>;
-def : IntClampPat<V_MSAD_U8, int_amdgcn_msad_u8>;
-def : IntClampPat<V_MQSAD_PK_U16_U8, int_amdgcn_mqsad_pk_u16_u8>;
+def : IntClampPat<V_MSAD_U8_e64, int_amdgcn_msad_u8>;
+def : IntClampPat<V_MQSAD_PK_U16_U8_e64, int_amdgcn_mqsad_pk_u16_u8>;
-def : IntClampPat<V_QSAD_PK_U16_U8, int_amdgcn_qsad_pk_u16_u8>;
-def : IntClampPat<V_MQSAD_U32_U8, int_amdgcn_mqsad_u32_u8>;
+def : IntClampPat<V_QSAD_PK_U16_U8_e64, int_amdgcn_qsad_pk_u16_u8>;
+def : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>;
//===----------------------------------------------------------------------===//
@@ -805,22 +818,27 @@ def : IntClampPat<V_MQSAD_U32_U8, int_amdgcn_mqsad_u32_u8>;
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
multiclass VOP3_Real_gfx10<bits<10> op> {
def _gfx10 :
+ VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
+ }
+ multiclass VOP3_Real_No_Suffix_gfx10<bits<10> op> {
+ def _gfx10 :
VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.GFX10>,
VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3_Real_gfx10_with_name<bits<10> op, string opName,
string asmName> {
def _gfx10 :
- VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
- VOP3e_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
- VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
+ VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3e_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64");
let AsmString = asmName # ps.AsmOperands;
}
}
multiclass VOP3be_Real_gfx10<bits<10> op> {
def _gfx10 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
- VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP3Interp_Real_gfx10<bits<10> op> {
def _gfx10 :
@@ -829,26 +847,30 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
multiclass VOP3OpSel_Real_gfx10<bits<10> op> {
def _gfx10 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
- VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP3OpSel_Real_gfx10_with_name<bits<10> op, string opName,
string asmName> {
def _gfx10 :
- VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
- VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
- VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
+ VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64");
let AsmString = asmName # ps.AsmOperands;
}
}
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
-defm V_READLANE_B32 : VOP3_Real_gfx10<0x360>;
+defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>;
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
- defm V_WRITELANE_B32 : VOP3_Real_gfx10<0x361>;
+ defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>;
} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
+let SubtargetPredicate = isGFX10Before1030 in {
+ defm V_MUL_LO_I32 : VOP3_Real_gfx10<0x16b>;
+}
+
defm V_XOR3_B32 : VOP3_Real_gfx10<0x178>;
defm V_LSHLREV_B64 : VOP3_Real_gfx10<0x2ff>;
defm V_LSHRREV_B64 : VOP3_Real_gfx10<0x300>;
@@ -868,9 +890,9 @@ defm V_ADD_NC_I16 :
defm V_SUB_NC_I16 :
VOP3OpSel_Real_gfx10_with_name<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
defm V_SUB_NC_I32 :
- VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32_gfx9", "v_sub_nc_i32">;
+ VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32", "v_sub_nc_i32">;
defm V_ADD_NC_I32 :
- VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32_gfx9", "v_add_nc_i32">;
+ VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32", "v_add_nc_i32">;
defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_gfx10<0x200>;
defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_gfx10<0x201>;
@@ -907,16 +929,16 @@ defm V_DIV_FIXUP_F16 :
// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these
// (they do not support SDWA or DPP).
-defm V_ADD_NC_U16 : VOP3_Real_gfx10_with_name<0x303, "V_ADD_U16_e64", "v_add_nc_u16">;
-defm V_SUB_NC_U16 : VOP3_Real_gfx10_with_name<0x304, "V_SUB_U16_e64", "v_sub_nc_u16">;
-defm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16_e64", "v_mul_lo_u16">;
-defm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16_e64", "v_lshrrev_b16">;
-defm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16_e64", "v_ashrrev_i16">;
-defm V_MAX_U16 : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16_e64", "v_max_u16">;
-defm V_MAX_I16 : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16_e64", "v_max_i16">;
-defm V_MIN_U16 : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16_e64", "v_min_u16">;
-defm V_MIN_I16 : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16_e64", "v_min_i16">;
-defm V_LSHLREV_B16 : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16_e64", "v_lshlrev_b16">;
+defm V_ADD_NC_U16 : VOP3_Real_gfx10_with_name<0x303, "V_ADD_U16", "v_add_nc_u16">;
+defm V_SUB_NC_U16 : VOP3_Real_gfx10_with_name<0x304, "V_SUB_U16", "v_sub_nc_u16">;
+defm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16", "v_mul_lo_u16">;
+defm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16", "v_lshrrev_b16">;
+defm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16", "v_ashrrev_i16">;
+defm V_MAX_U16 : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16", "v_max_u16">;
+defm V_MAX_I16 : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16", "v_max_i16">;
+defm V_MIN_U16 : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16", "v_min_u16">;
+defm V_MIN_I16 : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16", "v_min_i16">;
+defm V_LSHLREV_B16 : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16", "v_lshlrev_b16">;
defm V_PERMLANE16_B32 : VOP3OpSel_Real_gfx10<0x377>;
defm V_PERMLANEX16_B32 : VOP3OpSel_Real_gfx10<0x378>;
@@ -927,13 +949,13 @@ defm V_PERMLANEX16_B32 : VOP3OpSel_Real_gfx10<0x378>;
let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
multiclass VOP3_Real_gfx7<bits<10> op> {
def _gfx7 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP3be_Real_gfx7<bits<10> op> {
def _gfx7 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
@@ -955,13 +977,13 @@ defm V_MAD_I64_I32 : VOP3be_Real_gfx7_gfx10<0x177>;
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
multiclass VOP3_Real_gfx6_gfx7<bits<10> op> {
def _gfx6_gfx7 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP3be_Real_gfx6_gfx7<bits<10> op> {
def _gfx6_gfx7 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
@@ -974,6 +996,7 @@ multiclass VOP3be_Real_gfx6_gfx7_gfx10<bits<10> op> :
defm V_LSHL_B64 : VOP3_Real_gfx6_gfx7<0x161>;
defm V_LSHR_B64 : VOP3_Real_gfx6_gfx7<0x162>;
defm V_ASHR_I64 : VOP3_Real_gfx6_gfx7<0x163>;
+defm V_MUL_LO_I32 : VOP3_Real_gfx6_gfx7<0x16b>;
defm V_MAD_LEGACY_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x140>;
defm V_MAD_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x141>;
@@ -1015,7 +1038,6 @@ defm V_MAX_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x167>;
defm V_LDEXP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x168>;
defm V_MUL_LO_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x169>;
defm V_MUL_HI_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x16a>;
-defm V_MUL_LO_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x16b>;
defm V_MUL_HI_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x16c>;
defm V_DIV_FMAS_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x16f>;
defm V_DIV_FMAS_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x170>;
@@ -1036,18 +1058,22 @@ defm V_FMA_LEGACY_F32 : VOP3_Real_gfx10<0x140>;
let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
multiclass VOP3_Real_vi<bits<10> op> {
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
+}
+multiclass VOP3_Real_No_Suffix_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3e_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3be_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3be_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3be_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP3Interp_Real_vi<bits<10> op> {
@@ -1060,8 +1086,8 @@ multiclass VOP3Interp_Real_vi<bits<10> op> {
let AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" in {
multiclass VOP3_F16_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP3Interp_F16_Real_vi<bits<10> op> {
@@ -1074,17 +1100,17 @@ multiclass VOP3Interp_F16_Real_vi<bits<10> op> {
let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
multiclass VOP3_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> {
- def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
- VOP3e_vi <op, !cast<VOP3_Pseudo>(OpName).Pfl> {
- VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName);
+ def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>,
+ VOP3e_vi <op, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
let AsmString = AsmName # ps.AsmOperands;
}
}
multiclass VOP3OpSel_F16_Real_gfx9<bits<10> op, string AsmName> {
- def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX9>,
- VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
- VOP3_Pseudo ps = !cast<VOP3_Pseudo>(NAME);
+ def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
+ VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(NAME#"_e64");
let AsmString = AsmName # ps.AsmOperands;
}
}
@@ -1098,9 +1124,9 @@ multiclass VOP3Interp_F16_Real_gfx9<bits<10> op, string OpName, string AsmName>
}
multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
- def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.GFX9>,
- VOP3e_vi <op, !cast<VOP_Pseudo>(NAME).Pfl> {
- VOP_Pseudo ps = !cast<VOP_Pseudo>(NAME);
+ def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
+ VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
+ VOP_Pseudo ps = !cast<VOP_Pseudo>(NAME#"_e64");
let AsmString = AsmName # ps.AsmOperands;
}
}
@@ -1177,8 +1203,8 @@ defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">;
-defm V_ADD_I32_gfx9 : VOP3_Real_gfx9 <0x29c, "v_add_i32">;
-defm V_SUB_I32_gfx9 : VOP3_Real_gfx9 <0x29d, "v_sub_i32">;
+defm V_ADD_I32 : VOP3_Real_vi <0x29c>;
+defm V_SUB_I32 : VOP3_Real_vi <0x29d>;
defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>;
defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>;
@@ -1201,8 +1227,8 @@ defm V_MUL_LO_I32 : VOP3_Real_vi <0x285>;
defm V_MUL_HI_U32 : VOP3_Real_vi <0x286>;
defm V_MUL_HI_I32 : VOP3_Real_vi <0x287>;
-defm V_READLANE_B32 : VOP3_Real_vi <0x289>;
-defm V_WRITELANE_B32 : VOP3_Real_vi <0x28a>;
+defm V_READLANE_B32 : VOP3_Real_No_Suffix_vi <0x289>;
+defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_vi <0x28a>;
defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>;
defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index fc457ad212d4..64e70b8f64b0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -39,7 +39,7 @@ class VOP3_VOP3PInst<string OpName, VOPProfile P, bit UseTiedOutput = 0,
// class constraints.
!if(UseTiedOutput, (ins clampmod:$clamp, VGPR_32:$vdst_in),
(ins clampmod0:$clamp))),
- (ins op_sel:$op_sel, op_sel_hi:$op_sel_hi));
+ (ins op_sel0:$op_sel, op_sel_hi0:$op_sel_hi));
let Constraints = !if(UseTiedOutput, "$vdst = $vdst_in", "");
let DisableEncoding = !if(UseTiedOutput, "$vdst_in", "");
@@ -77,6 +77,8 @@ def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I1
def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
+let SubtargetPredicate = HasVOP3PInsts in {
+
// Undo sub x, c -> add x, -c canonicalization since c is more likely
// an inline immediate than -c.
// The constant will be emitted as a mov, and folded later.
@@ -86,6 +88,19 @@ def : GCNPat<
(V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1)
>;
+// Integer operations with clamp bit set.
+class VOP3PSatPat<SDPatternOperator pat, Instruction inst> : GCNPat<
+ (pat (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)),
+ (v2i16 (VOP3PMods v2i16:$src1, i32:$src1_modifiers))),
+ (inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE)
+>;
+
+def : VOP3PSatPat<uaddsat, V_PK_ADD_U16>;
+def : VOP3PSatPat<saddsat, V_PK_ADD_I16>;
+def : VOP3PSatPat<usubsat, V_PK_SUB_U16>;
+def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
+} // End SubtargetPredicate = HasVOP3PInsts
+
multiclass MadFmaMixPats<SDPatternOperator fma_like,
Instruction mix_inst,
Instruction mixlo_inst,
@@ -211,7 +226,7 @@ foreach Type = ["I", "U"] in
foreach Index = 0-3 in {
// Defines patterns that extract each Index'ed 8bit from an unsigned
// 32bit scalar value;
- def Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !if (!eq (Type, "U"), 1, 0)>;
+ def Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !eq (Type, "U")>;
// Defines multiplication patterns where the multiplication is happening on each
// Index'ed 8bit of a 32bit scalar value.
@@ -239,7 +254,7 @@ foreach Type = ["I", "U"] in
foreach Index = 0-7 in {
// Defines patterns that extract each Index'ed 4bit from an unsigned
// 32bit scalar value;
- def Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !if (!eq (Type, "U"), 1, 0)>;
+ def Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !eq (Type, "U")>;
// Defines multiplication patterns where the multiplication is happening on each
// Index'ed 8bit of a 32bit scalar value.
@@ -347,7 +362,6 @@ class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC
let Src2RC64 = _SrcRC;
let HasOpSel = 0;
let HasClamp = 0;
- let HasModifiers = 0;
let Asm64 = " $vdst, $src0, $src1, $src2$cbsz$abid$blgp";
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp);
}
@@ -368,34 +382,34 @@ def VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, A
let Predicates = [HasMAIInsts] in {
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
-def V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
-def V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite> {
- let isMoveImm = 1;
-}
-}
+ defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
+ let isMoveImm = 1 in {
+ defm V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite>;
+ } // End isMoveImm = 1
+} // End isAsCheapAsAMove = 1, isReMaterializable = 1
// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
-def V_MFMA_F32_4X4X1F32 : VOP3Inst<"v_mfma_f32_4x4x1f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_4x4x1f32>;
-def V_MFMA_F32_4X4X4F16 : VOP3Inst<"v_mfma_f32_4x4x4f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_4x4x4f16>;
-def V_MFMA_I32_4X4X4I8 : VOP3Inst<"v_mfma_i32_4x4x4i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_4x4x4i8>;
-def V_MFMA_F32_4X4X2BF16 : VOP3Inst<"v_mfma_f32_4x4x2bf16", VOPProfileMAI_F32_V2I16_X4, int_amdgcn_mfma_f32_4x4x2bf16>;
-def V_MFMA_F32_16X16X1F32 : VOP3Inst<"v_mfma_f32_16x16x1f32", VOPProfileMAI_F32_F32_X16, int_amdgcn_mfma_f32_16x16x1f32>;
-def V_MFMA_F32_16X16X4F32 : VOP3Inst<"v_mfma_f32_16x16x4f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_16x16x4f32>;
-def V_MFMA_F32_16X16X4F16 : VOP3Inst<"v_mfma_f32_16x16x4f16", VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_16x16x4f16>;
-def V_MFMA_F32_16X16X16F16 : VOP3Inst<"v_mfma_f32_16x16x16f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_16x16x16f16>;
-def V_MFMA_I32_16X16X4I8 : VOP3Inst<"v_mfma_i32_16x16x4i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_16x16x4i8>;
-def V_MFMA_I32_16X16X16I8 : VOP3Inst<"v_mfma_i32_16x16x16i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_16x16x16i8>;
-def V_MFMA_F32_16X16X2BF16 : VOP3Inst<"v_mfma_f32_16x16x2bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_16x16x2bf16>;
-def V_MFMA_F32_16X16X8BF16 : VOP3Inst<"v_mfma_f32_16x16x8bf16", VOPProfileMAI_F32_V2I16_X4, int_amdgcn_mfma_f32_16x16x8bf16>;
-def V_MFMA_F32_32X32X1F32 : VOP3Inst<"v_mfma_f32_32x32x1f32", VOPProfileMAI_F32_F32_X32, int_amdgcn_mfma_f32_32x32x1f32>;
-def V_MFMA_F32_32X32X2F32 : VOP3Inst<"v_mfma_f32_32x32x2f32", VOPProfileMAI_F32_F32_X16, int_amdgcn_mfma_f32_32x32x2f32>;
-def V_MFMA_F32_32X32X4F16 : VOP3Inst<"v_mfma_f32_32x32x4f16", VOPProfileMAI_F32_V4F16_X32, int_amdgcn_mfma_f32_32x32x4f16>;
-def V_MFMA_F32_32X32X8F16 : VOP3Inst<"v_mfma_f32_32x32x8f16", VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_32x32x8f16>;
-def V_MFMA_I32_32X32X4I8 : VOP3Inst<"v_mfma_i32_32x32x4i8", VOPProfileMAI_I32_I32_X32, int_amdgcn_mfma_i32_32x32x4i8>;
-def V_MFMA_I32_32X32X8I8 : VOP3Inst<"v_mfma_i32_32x32x8i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_32x32x8i8>;
-def V_MFMA_F32_32X32X2BF16 : VOP3Inst<"v_mfma_f32_32x32x2bf16", VOPProfileMAI_F32_V2I16_X32, int_amdgcn_mfma_f32_32x32x2bf16>;
-def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_32x32x4bf16>;
+defm V_MFMA_F32_4X4X1F32 : VOP3Inst<"v_mfma_f32_4x4x1f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_4x4x1f32>;
+defm V_MFMA_F32_4X4X4F16 : VOP3Inst<"v_mfma_f32_4x4x4f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_4x4x4f16>;
+defm V_MFMA_I32_4X4X4I8 : VOP3Inst<"v_mfma_i32_4x4x4i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_4x4x4i8>;
+defm V_MFMA_F32_4X4X2BF16 : VOP3Inst<"v_mfma_f32_4x4x2bf16", VOPProfileMAI_F32_V2I16_X4, int_amdgcn_mfma_f32_4x4x2bf16>;
+defm V_MFMA_F32_16X16X1F32 : VOP3Inst<"v_mfma_f32_16x16x1f32", VOPProfileMAI_F32_F32_X16, int_amdgcn_mfma_f32_16x16x1f32>;
+defm V_MFMA_F32_16X16X4F32 : VOP3Inst<"v_mfma_f32_16x16x4f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_16x16x4f32>;
+defm V_MFMA_F32_16X16X4F16 : VOP3Inst<"v_mfma_f32_16x16x4f16", VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_16x16x4f16>;
+defm V_MFMA_F32_16X16X16F16 : VOP3Inst<"v_mfma_f32_16x16x16f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_16x16x16f16>;
+defm V_MFMA_I32_16X16X4I8 : VOP3Inst<"v_mfma_i32_16x16x4i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_16x16x4i8>;
+defm V_MFMA_I32_16X16X16I8 : VOP3Inst<"v_mfma_i32_16x16x16i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_16x16x16i8>;
+defm V_MFMA_F32_16X16X2BF16 : VOP3Inst<"v_mfma_f32_16x16x2bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_16x16x2bf16>;
+defm V_MFMA_F32_16X16X8BF16 : VOP3Inst<"v_mfma_f32_16x16x8bf16", VOPProfileMAI_F32_V2I16_X4, int_amdgcn_mfma_f32_16x16x8bf16>;
+defm V_MFMA_F32_32X32X1F32 : VOP3Inst<"v_mfma_f32_32x32x1f32", VOPProfileMAI_F32_F32_X32, int_amdgcn_mfma_f32_32x32x1f32>;
+defm V_MFMA_F32_32X32X2F32 : VOP3Inst<"v_mfma_f32_32x32x2f32", VOPProfileMAI_F32_F32_X16, int_amdgcn_mfma_f32_32x32x2f32>;
+defm V_MFMA_F32_32X32X4F16 : VOP3Inst<"v_mfma_f32_32x32x4f16", VOPProfileMAI_F32_V4F16_X32, int_amdgcn_mfma_f32_32x32x4f16>;
+defm V_MFMA_F32_32X32X8F16 : VOP3Inst<"v_mfma_f32_32x32x8f16", VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_32x32x8f16>;
+defm V_MFMA_I32_32X32X4I8 : VOP3Inst<"v_mfma_i32_32x32x4i8", VOPProfileMAI_I32_I32_X32, int_amdgcn_mfma_i32_32x32x4i8>;
+defm V_MFMA_I32_32X32X8I8 : VOP3Inst<"v_mfma_i32_32x32x8i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_32x32x8i8>;
+defm V_MFMA_F32_32X32X2BF16 : VOP3Inst<"v_mfma_f32_32x32x2bf16", VOPProfileMAI_F32_V2I16_X32, int_amdgcn_mfma_f32_32x32x2bf16>;
+defm V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_32x32x4bf16>;
} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
} // End SubtargetPredicate = HasMAIInsts
@@ -403,7 +417,15 @@ def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F3
def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">;
def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;
-multiclass VOP3P_Real_vi<bits<10> op> {
+//===----------------------------------------------------------------------===//
+// Begin Real Encodings
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// GFX8 (VI)
+//===----------------------------------------------------------------------===//
+
+multiclass VOP3P_Real_vi<bits<7> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
let AssemblerPredicate = HasVOP3PInsts;
@@ -411,40 +433,51 @@ multiclass VOP3P_Real_vi<bits<10> op> {
}
}
-multiclass VOP3P_Real_MAI<bits<10> op> {
- def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
+multiclass VOP3P_Real_MAI<bits<7> op> {
+ def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+ let AssemblerPredicate = HasMAIInsts;
+ let DecoderNamespace = "GFX8";
+ let Inst{14} = 1; // op_sel_hi(2) default value
+ let Inst{59} = 1; // op_sel_hi(0) default value
+ let Inst{60} = 1; // op_sel_hi(1) default value
+ }
+}
+
+multiclass VOP3P_Real_MFMA<bits<7> op> {
+ def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
let AssemblerPredicate = HasMAIInsts;
let DecoderNamespace = "GFX8";
}
}
-defm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>;
-defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>;
-defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>;
-defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>;
-defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>;
-defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>;
-defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>;
-defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>;
-defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>;
-defm V_PK_MAD_U16 : VOP3P_Real_vi <0x389>;
-
-defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>;
-defm V_PK_SUB_U16 : VOP3P_Real_vi <0x38b>;
-defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>;
-defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>;
-defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>;
-defm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>;
-defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>;
-defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>;
-defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>;
+defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>;
+defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>;
+defm V_PK_ADD_I16 : VOP3P_Real_vi <0x02>;
+defm V_PK_SUB_I16 : VOP3P_Real_vi <0x03>;
+defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x04>;
+defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x05>;
+defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x06>;
+defm V_PK_MAX_I16 : VOP3P_Real_vi <0x07>;
+defm V_PK_MIN_I16 : VOP3P_Real_vi <0x08>;
+defm V_PK_MAD_U16 : VOP3P_Real_vi <0x09>;
+
+defm V_PK_ADD_U16 : VOP3P_Real_vi <0x0a>;
+defm V_PK_SUB_U16 : VOP3P_Real_vi <0x0b>;
+defm V_PK_MAX_U16 : VOP3P_Real_vi <0x0c>;
+defm V_PK_MIN_U16 : VOP3P_Real_vi <0x0d>;
+defm V_PK_FMA_F16 : VOP3P_Real_vi <0x0e>;
+defm V_PK_ADD_F16 : VOP3P_Real_vi <0x0f>;
+defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>;
+defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>;
+defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>;
let SubtargetPredicate = HasMadMixInsts in {
-defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>;
-defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
-defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
+defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>;
+defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>;
+defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>;
}
let SubtargetPredicate = HasFmaMixInsts in {
@@ -452,54 +485,54 @@ let DecoderNamespace = "GFX9_DL" in {
// The mad_mix instructions were renamed and their behaviors changed,
// but the opcode stayed the same so we need to put these in a
// different DecoderNamespace to avoid the ambiguity.
-defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x3a0>;
-defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
-defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
+defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>;
+defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>;
+defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>;
}
}
let SubtargetPredicate = HasDot2Insts in {
-defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
-defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
-defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>;
-defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>;
-defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>;
+defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
+defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
+defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
+defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
+defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot1Insts in {
-defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>;
-defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
+defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
+defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
} // End SubtargetPredicate = HasDot1Insts
let SubtargetPredicate = HasMAIInsts in {
-defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x3d8>;
-defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x3d9>;
-defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MAI <0x3c0>;
-defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MAI <0x3c1>;
-defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MAI <0x3c2>;
-defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MAI <0x3c4>;
-defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MAI <0x3c5>;
-defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MAI <0x3c8>;
-defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MAI <0x3c9>;
-defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MAI <0x3ca>;
-defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MAI <0x3cc>;
-defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x3cd>;
-defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MAI <0x3d0>;
-defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MAI <0x3d1>;
-defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MAI <0x3d2>;
-defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MAI <0x3d4>;
-defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MAI <0x3d5>;
-defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x3e8>;
-defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x3e9>;
-defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MAI <0x3eb>;
-defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x3ec>;
-defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>;
+defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>;
+defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>;
+defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MFMA <0x40>;
+defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MFMA <0x41>;
+defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MFMA <0x42>;
+defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MFMA <0x44>;
+defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MFMA <0x45>;
+defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MFMA <0x48>;
+defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MFMA <0x49>;
+defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MFMA <0x4a>;
+defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MFMA <0x4c>;
+defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MFMA <0x4d>;
+defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50>;
+defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51>;
+defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52>;
+defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA <0x55>;
+defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA <0x54>;
+defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA <0x68>;
+defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA <0x69>;
+defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA <0x6b>;
+defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA <0x6c>;
+defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA <0x6d>;
} // End SubtargetPredicate = HasMAIInsts
@@ -508,48 +541,48 @@ defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>;
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
- multiclass VOP3P_Real_gfx10<bits<10> op> {
+ multiclass VOP3P_Real_gfx10<bits<7> op> {
def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>,
VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
-defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x000>;
-defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x001>;
-defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x002>;
-defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x003>;
-defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x004>;
-defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x005>;
-defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x006>;
-defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x007>;
-defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x008>;
-defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x009>;
-defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x00a>;
-defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x00b>;
-defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x00c>;
-defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x00d>;
-defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x00e>;
-defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x00f>;
-defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x010>;
-defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x011>;
-defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>;
-defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>;
-defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>;
-defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>;
+defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x00>;
+defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x01>;
+defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x02>;
+defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x03>;
+defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x04>;
+defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x05>;
+defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x06>;
+defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x07>;
+defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x08>;
+defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x09>;
+defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x0a>;
+defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x0b>;
+defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x0c>;
+defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x0d>;
+defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x0e>;
+defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x0f>;
+defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x10>;
+defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x11>;
+defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x12>;
+defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x20>;
+defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x21>;
+defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>;
let SubtargetPredicate = HasDot2Insts in {
-defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>;
-defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>;
-defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>;
-defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>;
-defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>;
+defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>;
+defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
+defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
+defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>;
+defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot1Insts in {
-defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>;
-defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>;
+defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
+defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;
} // End SubtargetPredicate = HasDot1Insts
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index aa2fa260e7b5..99599c5cd667 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -229,7 +229,7 @@ multiclass VOPC_Pseudos <string opName,
foreach _ = BoolToList<P.HasExtSDWA>.ret in
def _sdwa : VOPC_SDWA_Pseudo <opName, P> {
- let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let Defs = !if(DefExec, [EXEC], []);
let SchedRW = P.Schedule;
let isConvergent = DefExec;
let isCompare = 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f8a83e5f74c0..282c1002d3c9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -8,6 +8,7 @@
// dummies for outer let
class LetDummies {
+ bit TRANS;
bit ReadsModeReg;
bit mayRaiseFPException;
bit isCommutable;
@@ -69,7 +70,7 @@ class VOP3Common <dag outs, dag ins, string asm = "",
let VOP3 = 1;
let AsmVariantName = AMDGPUAsmVariants.VOP3;
- let AsmMatchConverter = !if(!eq(HasMods,1), "cvtVOP3", "");
+ let AsmMatchConverter = !if(HasMods, "cvtVOP3", "");
let isCodeGenOnly = 0;
@@ -129,7 +130,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let AsmMatchConverter =
!if(isVOP3P,
"cvtVOP3P",
- !if(!or(P.HasModifiers, !or(P.HasOMod, P.HasIntClamp)),
+ !if(!or(P.HasModifiers, P.HasOMod, P.HasIntClamp),
"cvtVOP3",
""));
}
@@ -296,7 +297,7 @@ class VOP3be <VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
-class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
+class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 {
bits<8> vdst;
// neg, neg_hi, op_sel put in srcN_modifiers
bits<4> src0_modifiers;
@@ -320,8 +321,8 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
- let Inst{25-16} = op;
- let Inst{31-26} = 0x34; //encoding
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x1a7; //encoding
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
@@ -332,7 +333,7 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
}
-class VOP3Pe_MAI <bits<10> op, VOPProfile P> : Enc64 {
+class VOP3Pe_MAI <bits<7> op, VOPProfile P> : Enc64 {
bits<8> vdst;
bits<10> src0;
bits<10> src1;
@@ -349,8 +350,8 @@ class VOP3Pe_MAI <bits<10> op, VOPProfile P> : Enc64 {
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
- let Inst{25-16} = op;
- let Inst{31-26} = 0x34; //encoding
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x1a7; //encoding
let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0);
let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
@@ -362,8 +363,8 @@ class VOP3Pe_MAI <bits<10> op, VOPProfile P> : Enc64 {
}
-class VOP3Pe_gfx10 <bits<10> op, VOPProfile P> : VOP3Pe<op, P> {
- let Inst{31-26} = 0x33; //encoding
+class VOP3Pe_gfx10 <bits<7> op, VOPProfile P> : VOP3Pe<op, P> {
+ let Inst{31-23} = 0x198; //encoding
}
class VOP3be_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3be<p> {
@@ -626,7 +627,7 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
string Mnemonic = OpName;
string AsmOperands = P.AsmDPP;
- let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
+ let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", "");
let SubtargetPredicate = HasDPP;
let AssemblerPredicate = HasDPP;
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
@@ -681,7 +682,7 @@ class VOP_DPP <string OpName, VOPProfile P, bit IsDPP16,
let DPP = 1;
let Size = 8;
- let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
+ let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", "");
let SubtargetPredicate = HasDPP;
let AssemblerPredicate = HasDPP;
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
@@ -776,6 +777,19 @@ class DivergentFragOrOp<SDPatternOperator Op, VOPProfile P> {
!if(!isa<SDNode>(Op), getDivergentFrag<Op>.ret, Op), Op);
}
+class getVSrcOp<ValueType vt> {
+ RegisterOperand ret = !if(!eq(vt.Size, 32), VSrc_b32, VSrc_b16);
+}
+
+// Class for binary integer operations with the clamp bit set for saturation
+// TODO: Add sub with negated inline constant pattern.
+class VOPBinOpClampPat<SDPatternOperator node, Instruction inst, ValueType vt> :
+ GCNPat<(node vt:$src0, vt:$src1),
+ (inst getVSrcOp<vt>.ret:$src0, getVSrcOp<vt>.ret:$src1,
+ DSTCLAMP.ENABLE)
+>;
+
+
include "VOPCInstructions.td"
include "VOP1Instructions.td"
include "VOP2Instructions.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
index 4a6510f10eeb..ca33f5297471 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Intrinsics.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.cpp
index bce2dbd2eaa6..409dd2a98ab4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.cpp
@@ -26,5 +26,5 @@ void ARCSubtarget::anchor() {}
ARCSubtarget::ARCSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : ARCGenSubtargetInfo(TT, CPU, FS), FrameLowering(*this),
+ : ARCGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), FrameLowering(*this),
TLInfo(TM, *this) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.h b/contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.h
index 0be797f753d5..1f1b27f13f68 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCSubtarget.h
@@ -43,7 +43,7 @@ public:
/// Parses features string setting specified subtarget options.
/// Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
const ARCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const ARCFrameLowering *getFrameLowering() const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index 4a5b6fd4d5bf..b8c8949e18dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -21,9 +21,7 @@
using namespace llvm;
static Reloc::Model getRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
+ return RM.getValueOr(Reloc::Static);
}
/// ARCTargetMachine ctor - Create an ILP32 architecture model
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
index 266f2de08772..f6f8f9d089df 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
@@ -26,6 +26,7 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
index 3e3613ccb90f..358ee6002f80 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
@@ -48,7 +48,7 @@ static MCRegisterInfo *createARCMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *createARCMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- return createARCMCSubtargetInfoImpl(TT, CPU, FS);
+ return createARCMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS);
}
static MCAsmInfo *createARCMCAsmInfo(const MCRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index f8a86a70c077..bb81233cf803 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -359,8 +359,7 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
SmallVector<MachineInstr *, 8> Front;
Front.push_back(MI);
while (Front.size() != 0) {
- MI = Front.back();
- Front.pop_back();
+ MI = Front.pop_back_val();
// If we have already explored this MachineInstr, ignore it.
if (Reached.find(MI) != Reached.end())
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
index 7398968bb24a..f4fdc9803728 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
@@ -37,6 +37,7 @@ class PassRegistry;
Pass *createMVETailPredicationPass();
FunctionPass *createARMLowOverheadLoopsPass();
+FunctionPass *createARMBlockPlacementPass();
Pass *createARMParallelDSPPass();
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -55,6 +56,8 @@ InstructionSelector *
createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI);
Pass *createMVEGatherScatterLoweringPass();
+FunctionPass *createARMSLSHardeningPass();
+FunctionPass *createARMIndirectThunks();
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
@@ -69,8 +72,10 @@ void initializeThumb2ITBlockPass(PassRegistry &);
void initializeMVEVPTBlockPass(PassRegistry &);
void initializeMVEVPTOptimisationsPass(PassRegistry &);
void initializeARMLowOverheadLoopsPass(PassRegistry &);
+void initializeARMBlockPlacementPass(PassRegistry &);
void initializeMVETailPredicationPass(PassRegistry &);
void initializeMVEGatherScatterLoweringPass(PassRegistry &);
+void initializeARMSLSHardeningPass(PassRegistry &);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
index 0468f7f1cf8e..3d0a0bf7f8c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
@@ -535,6 +535,10 @@ def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true",
[HasV8_5aOps, FeatureBF16,
FeatureMatMulInt8]>;
+def HasV8_7aOps : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true",
+ "Support ARM v8.7a instructions",
+ [HasV8_6aOps]>;
+
def HasV8_1MMainlineOps : SubtargetFeature<
"v8.1m.main", "HasV8_1MMainlineOps", "true",
"Support ARM v8-1M Mainline instructions",
@@ -559,6 +563,20 @@ foreach i = {0-7} in
[HasCDEOps]>;
//===----------------------------------------------------------------------===//
+// Control codegen mitigation against Straight Line Speculation vulnerability.
+//===----------------------------------------------------------------------===//
+
+def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
+ "HardenSlsRetBr", "true",
+ "Harden against straight line speculation across RETurn and BranchRegister "
+ "instructions">;
+def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
+ "HardenSlsBlr", "true",
+ "Harden against straight line speculation across indirect calls">;
+
+
+
+//===----------------------------------------------------------------------===//
// ARM Processor subtarget features.
//
@@ -598,9 +616,14 @@ def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
"Cortex-A77 ARM processors", []>;
def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78",
"Cortex-A78 ARM processors", []>;
+def ProcA78C : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C",
+ "Cortex-A78C ARM processors", []>;
def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", []>;
+def ProcV1 : SubtargetFeature<"neoverse-v1", "ARMProcFamily",
+ "NeoverseV1", "Neoverse-V1 ARM processors", []>;
+
def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
"Qualcomm Krait processors", []>;
def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
@@ -639,7 +662,8 @@ def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52",
def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
"Cortex-M3 ARM processors", []>;
-
+def ProcM7 : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7",
+ "Cortex-M7 ARM processors", []>;
//===----------------------------------------------------------------------===//
// ARM Helper classes.
@@ -828,6 +852,19 @@ def ARMv86a : Architecture<"armv8.6-a", "ARMv86a", [HasV8_6aOps,
FeatureCRC,
FeatureRAS,
FeatureDotProd]>;
+def ARMv87a : Architecture<"armv8.7-a", "ARMv86a", [HasV8_7aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
@@ -882,6 +919,13 @@ def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>;
def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>;
def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+include "ARMRegisterBanks.td"
+include "ARMCallingConv.td"
//===----------------------------------------------------------------------===//
// ARM schedules.
@@ -891,9 +935,27 @@ include "ARMPredicates.td"
include "ARMSchedule.td"
//===----------------------------------------------------------------------===//
-// ARM processors
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+def ARMInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// ARM schedules
//
+include "ARMScheduleV6.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
+include "ARMScheduleSwift.td"
+include "ARMScheduleR52.td"
+include "ARMScheduleA57.td"
+include "ARMScheduleM4.td"
+include "ARMScheduleM7.td"
+//===----------------------------------------------------------------------===//
+// ARM processors
+//
// Dummy CPU, used to target architectures
def : ProcessorModel<"generic", CortexA8Model, []>;
@@ -1131,8 +1193,10 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
FeatureUseMISched,
FeatureHasNoBranchPredictor]>;
-def : ProcNoItin<"cortex-m7", [ARMv7em,
- FeatureFPARMv8_D16]>;
+def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em,
+ ProcM7,
+ FeatureFPARMv8_D16,
+ FeatureUseMISched]>;
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
FeatureNoMovt]>;
@@ -1246,6 +1310,14 @@ def : ProcNoItin<"cortex-a78", [ARMv82a, ProcA78,
FeatureFullFP16,
FeatureDotProd]>;
+def : ProcNoItin<"cortex-a78c", [ARMv82a, ProcA78C,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureDotProd,
+ FeatureFullFP16]>;
+
def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1,
FeatureHWDivThumb,
FeatureHWDivARM,
@@ -1254,6 +1326,15 @@ def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1,
FeatureFullFP16,
FeatureDotProd]>;
+def : ProcNoItin<"neoverse-v1", [ARMv84a,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFullFP16,
+ FeatureBF16,
+ FeatureMatMulInt8]>;
+
def : ProcNoItin<"neoverse-n1", [ARMv82a,
FeatureHWDivThumb,
FeatureHWDivARM,
@@ -1261,6 +1342,11 @@ def : ProcNoItin<"neoverse-n1", [ARMv82a,
FeatureCRC,
FeatureDotProd]>;
+def : ProcNoItin<"neoverse-n2", [ARMv85a,
+ FeatureBF16,
+ FeatureMatMulInt8,
+ FeaturePerfMon]>;
+
def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
@@ -1296,21 +1382,6 @@ def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
FeatureFPAO]>;
//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "ARMRegisterInfo.td"
-include "ARMRegisterBanks.td"
-include "ARMCallingConv.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "ARMInstrInfo.td"
-def ARMInstrInfo : InstrInfo;
-
-//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index d6c1efa6327c..04e21867d571 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -285,7 +285,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
case 'y': // Print a VFP single precision register as indexed double.
if (MI->getOperand(OpNum).isReg()) {
- Register Reg = MI->getOperand(OpNum).getReg();
+ MCRegister Reg = MI->getOperand(OpNum).getReg().asMCReg();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
// Find the 'd' register that has this 's' register as a sub-register,
// and determine the lane number.
@@ -903,7 +903,7 @@ void ARMAsmPrinter::emitMachineConstantPoolValue(
MCSymbol *MCSym;
if (ACPV->isLSDA()) {
- MCSym = getCurExceptionSym();
+ MCSym = getMBBExceptionSym(MF->front());
} else if (ACPV->isBlockAddress()) {
const BlockAddress *BA =
cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
@@ -1897,7 +1897,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
// LSJLJEH:
Register SrcReg = MI->getOperand(0).getReg();
Register ValReg = MI->getOperand(1).getReg();
- MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true);
+ MCSymbol *Label = OutContext.createTempSymbol("SJLJEH");
OutStreamer->AddComment("eh_setjmp begin");
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
.addReg(ValReg)
@@ -2180,6 +2180,48 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
case ARM::PATCHABLE_TAIL_CALL:
LowerPATCHABLE_TAIL_CALL(*MI);
return;
+ case ARM::SpeculationBarrierISBDSBEndBB: {
+ // Print DSB SYS + ISB
+ MCInst TmpInstDSB;
+ TmpInstDSB.setOpcode(ARM::DSB);
+ TmpInstDSB.addOperand(MCOperand::createImm(0xf));
+ EmitToStreamer(*OutStreamer, TmpInstDSB);
+ MCInst TmpInstISB;
+ TmpInstISB.setOpcode(ARM::ISB);
+ TmpInstISB.addOperand(MCOperand::createImm(0xf));
+ EmitToStreamer(*OutStreamer, TmpInstISB);
+ return;
+ }
+ case ARM::t2SpeculationBarrierISBDSBEndBB: {
+ // Print DSB SYS + ISB
+ MCInst TmpInstDSB;
+ TmpInstDSB.setOpcode(ARM::t2DSB);
+ TmpInstDSB.addOperand(MCOperand::createImm(0xf));
+ TmpInstDSB.addOperand(MCOperand::createImm(ARMCC::AL));
+ TmpInstDSB.addOperand(MCOperand::createReg(0));
+ EmitToStreamer(*OutStreamer, TmpInstDSB);
+ MCInst TmpInstISB;
+ TmpInstISB.setOpcode(ARM::t2ISB);
+ TmpInstISB.addOperand(MCOperand::createImm(0xf));
+ TmpInstISB.addOperand(MCOperand::createImm(ARMCC::AL));
+ TmpInstISB.addOperand(MCOperand::createReg(0));
+ EmitToStreamer(*OutStreamer, TmpInstISB);
+ return;
+ }
+ case ARM::SpeculationBarrierSBEndBB: {
+ // Print SB
+ MCInst TmpInstSB;
+ TmpInstSB.setOpcode(ARM::SB);
+ EmitToStreamer(*OutStreamer, TmpInstSB);
+ return;
+ }
+ case ARM::t2SpeculationBarrierSBEndBB: {
+ // Print SB
+ MCInst TmpInstSB;
+ TmpInstSB.setOpcode(ARM::t2SB);
+ EmitToStreamer(*OutStreamer, TmpInstSB);
+ return;
+ }
}
MCInst TmpInst;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 4cc2b6bf7e7e..e418d53b56a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MVETailPredUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -35,6 +36,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MultiHazardRecognizer.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -131,12 +134,43 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
}
+// Called during:
+// - pre-RA scheduling
+// - post-RA scheduling when FeatureUseMISched is set
+ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
+ MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
+
+ // We would like to restrict this hazard recognizer to only
+ // post-RA scheduling; we can tell that we're post-RA because we don't
+ // track VRegLiveness.
+ // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
+ // banks banked on bit 2. Assume that TCMs are in use.
+ if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
+ MHR->AddHazardRecognizer(
+ std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
+
+ // Not inserting ARMHazardRecognizerFPMLx because that would change
+ // legacy behavior
+
+ auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
+ MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
+ return MHR;
+}
+
+// Called during post-RA scheduling when FeatureUseMISched is not set
ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
+ MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
+
if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
- return new ARMHazardRecognizer(II, DAG);
- return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+ MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
+
+ auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+ if (BHR)
+ MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
+ return MHR;
}
MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
@@ -317,8 +351,8 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
TBB = nullptr;
FBB = nullptr;
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
+ MachineBasicBlock::instr_iterator I = MBB.instr_end();
+ if (I == MBB.instr_begin())
return false; // Empty blocks are easy.
--I;
@@ -330,9 +364,12 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// out.
bool CantAnalyze = false;
- // Skip over DEBUG values and predicated nonterminators.
- while (I->isDebugInstr() || !I->isTerminator()) {
- if (I == MBB.begin())
+ // Skip over DEBUG values, predicated nonterminators and speculation
+ // barrier terminators.
+ while (I->isDebugInstr() || !I->isTerminator() ||
+ isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
+ I->getOpcode() == ARM::t2DoLoopStartTP){
+ if (I == MBB.instr_begin())
return false;
--I;
}
@@ -356,7 +393,7 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
Cond.push_back(I->getOperand(2));
} else if (I->isReturn()) {
// Returns can't be analyzed, but we should run cleanup.
- CantAnalyze = !isPredicated(*I);
+ CantAnalyze = true;
} else {
// We encountered other unrecognized terminator. Bail out immediately.
return true;
@@ -377,18 +414,30 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// unconditional branch.
if (AllowModify) {
MachineBasicBlock::iterator DI = std::next(I);
- while (DI != MBB.end()) {
+ while (DI != MBB.instr_end()) {
MachineInstr &InstToDelete = *DI;
++DI;
+ // Speculation barriers must not be deleted.
+ if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
+ continue;
InstToDelete.eraseFromParent();
}
}
}
- if (CantAnalyze)
+ if (CantAnalyze) {
+ // We may not be able to analyze the block, but we could still have
+ // an unconditional branch as the last instruction in the block, which
+ // just branches to layout successor. If this is the case, then just
+ // remove it if we're allowed to make modifications.
+ if (AllowModify && !isPredicated(MBB.back()) &&
+ isUncondBranchOpcode(MBB.back().getOpcode()) &&
+ TBB && MBB.isLayoutSuccessor(TBB))
+ removeBranch(MBB);
return true;
+ }
- if (I == MBB.begin())
+ if (I == MBB.instr_begin())
return false;
--I;
@@ -537,6 +586,18 @@ bool ARMBaseInstrInfo::PredicateInstruction(
MachineOperand &PMO = MI.getOperand(PIdx);
PMO.setImm(Pred[0].getImm());
MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
+
+ // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
+ // IT block. This affects how they are printed.
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
+ assert(MCID.OpInfo[1].isOptionalDef() && "CPSR def isn't expected operand");
+ assert((MI.getOperand(1).isDead() ||
+ MI.getOperand(1).getReg() != ARM::CPSR) &&
+ "if conversion tried to stop defining used CPSR");
+ MI.getOperand(1).setReg(ARM::NoRegister);
+ }
+
return true;
}
return false;
@@ -568,13 +629,23 @@ bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
}
}
-bool ARMBaseInstrInfo::DefinesPredicate(
- MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
+bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI,
+ std::vector<MachineOperand> &Pred,
+ bool SkipDead) const {
bool Found = false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
- if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
- (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
+ bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
+ bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
+ if (ClobbersCPSR || IsCPSR) {
+
+ // Filter out T1 instructions that have a dead CPSR,
+ // allowing IT blocks to be generated containing T1 instructions
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
+ SkipDead)
+ continue;
+
Pred.push_back(MO);
Found = true;
}
@@ -590,61 +661,6 @@ bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
return false;
}
-bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
- unsigned Op) const {
- const MachineOperand &Offset = MI.getOperand(Op + 1);
- return Offset.getReg() != 0;
-}
-
-// Load with negative register offset requires additional 1cyc and +I unit
-// for Cortex A57
-bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
- unsigned Op) const {
- const MachineOperand &Offset = MI.getOperand(Op + 1);
- const MachineOperand &Opc = MI.getOperand(Op + 2);
- assert(Opc.isImm());
- assert(Offset.isReg());
- int64_t OpcImm = Opc.getImm();
-
- bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
- return (isSub && Offset.getReg() != 0);
-}
-
-bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
- unsigned Op) const {
- const MachineOperand &Opc = MI.getOperand(Op + 2);
- unsigned OffImm = Opc.getImm();
- return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
-}
-
-// Load, scaled register offset, not plus LSL2
-bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
- unsigned Op) const {
- const MachineOperand &Opc = MI.getOperand(Op + 2);
- unsigned OffImm = Opc.getImm();
-
- bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
- unsigned Amt = ARM_AM::getAM2Offset(OffImm);
- ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
- if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
- bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
- return !SimpleScaled;
-}
-
-// Minus reg for ldstso addr mode
-bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
- unsigned Op) const {
- unsigned OffImm = MI.getOperand(Op + 2).getImm();
- return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
-}
-
-// Load, scaled register offset
-bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
- unsigned Op) const {
- unsigned OffImm = MI.getOperand(Op + 2).getImm();
- return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
-}
-
static bool isEligibleForITBlock(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return true;
@@ -687,14 +703,23 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
if (!isEligibleForITBlock(&MI))
return false;
+ const MachineFunction *MF = MI.getParent()->getParent();
const ARMFunctionInfo *AFI =
- MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ MF->getInfo<ARMFunctionInfo>();
// Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
// In their ARM encoding, they can't be encoded in a conditional form.
if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
return false;
+ // Make indirect control flow changes unpredicable when SLS mitigation is
+ // enabled.
+ const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
+ if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
+ return false;
+ if (ST.hardenSlsBlr() && isIndirectCall(MI))
+ return false;
+
if (AFI->isThumb2Function()) {
if (getSubtarget().restrictIT())
return isV8EligibleForIT(&MI);
@@ -777,6 +802,14 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Size = alignTo(Size, 4);
return Size;
}
+ case ARM::SpeculationBarrierISBDSBEndBB:
+ case ARM::t2SpeculationBarrierISBDSBEndBB:
+ // This gets lowered to 2 4-byte instructions.
+ return 8;
+ case ARM::SpeculationBarrierSBEndBB:
+ case ARM::t2SpeculationBarrierSBEndBB:
+ // This gets lowered to 1 4-byte instructions.
+ return 4;
}
}
@@ -2142,7 +2175,12 @@ ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,
// Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
// ARM has a condition code field in every predicable instruction, using it
// doesn't change code size.
- return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0;
+ if (!Subtarget.isThumb2())
+ return 0;
+
+ // It's possible that the size of the IT is restricted to a single block.
+ unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
+ return divideCeil(NumInsts, MaxInsts) * 2;
}
unsigned
@@ -3379,7 +3417,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
case ARM::t2SUBspImm:
case ARM::t2ADDri:
case ARM::t2SUBri:
- MRI->setRegClass(UseMI.getOperand(0).getReg(), TRC);
+ MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
}
return true;
}
@@ -3840,22 +3878,6 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
return DefCycle;
}
-bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
- Register BaseReg = MI.getOperand(0).getReg();
- for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
- const auto &Op = MI.getOperand(i);
- if (Op.isReg() && Op.getReg() == BaseReg)
- return true;
- }
- return false;
-}
-unsigned
-ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const {
- // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops
- // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops)
- return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
-}
-
int
ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
@@ -4816,6 +4838,14 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
}
+ if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
+ assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
+ if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
+ MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
+ ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
+ return false;
+ }
+ }
return true;
}
@@ -5501,6 +5531,8 @@ unsigned llvm::ConstantMaterializationCost(unsigned Val,
return ForCodesize ? 4 : 1;
if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
return ForCodesize ? 8 : 2;
+ if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
+ return ForCodesize ? 8 : 2;
}
if (Subtarget->useMovt()) // MOVW + MOVT
return ForCodesize ? 8 : 2;
@@ -5605,12 +5637,32 @@ bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
/// | Frame overhead in Bytes | 2 | 4 |
/// | Stack fixup required | No | No |
/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerDefault implies that the function should be called with
+/// a save and restore of LR to the stack.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// BX LR
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 8 | 12 |
+/// | Frame overhead in Bytes | 2 | 4 |
+/// | Stack fixup required | Yes | Yes |
+/// +-------------------------+--------+-----+
enum MachineOutlinerClass {
MachineOutlinerTailCall,
MachineOutlinerThunk,
MachineOutlinerNoLRSave,
- MachineOutlinerRegSave
+ MachineOutlinerRegSave,
+ MachineOutlinerDefault
};
enum MachineOutlinerMBBFlags {
@@ -5628,6 +5680,9 @@ struct OutlinerCosts {
const int FrameNoLRSave;
const int CallRegSave;
const int FrameRegSave;
+ const int CallDefault;
+ const int FrameDefault;
+ const int SaveRestoreLROnStack;
OutlinerCosts(const ARMSubtarget &target)
: CallTailCall(target.isThumb() ? 4 : 4),
@@ -5637,7 +5692,10 @@ struct OutlinerCosts {
CallNoLRSave(target.isThumb() ? 4 : 4),
FrameNoLRSave(target.isThumb() ? 4 : 4),
CallRegSave(target.isThumb() ? 8 : 12),
- FrameRegSave(target.isThumb() ? 2 : 4) {}
+ FrameRegSave(target.isThumb() ? 2 : 4),
+ CallDefault(target.isThumb() ? 8 : 12),
+ FrameDefault(target.isThumb() ? 2 : 4),
+ SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
};
unsigned
@@ -5662,6 +5720,37 @@ ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
return 0u;
}
+// Compute liveness of LR at the point after the interval [I, E), which
+// denotes a *backward* iteration through instructions. Used only for return
+// basic blocks, which do not end with a tail call.
+static bool isLRAvailable(const TargetRegisterInfo &TRI,
+ MachineBasicBlock::reverse_iterator I,
+ MachineBasicBlock::reverse_iterator E) {
+ // At the end of the function LR dead.
+ bool Live = false;
+ for (; I != E; ++I) {
+ const MachineInstr &MI = *I;
+
+ // Check defs of LR.
+ if (MI.modifiesRegister(ARM::LR, &TRI))
+ Live = false;
+
+ // Check uses of LR.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
+ Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
+ Opcode == ARM::tBXNS_RET) {
+ // These instructions use LR, but it's not an (explicit or implicit)
+ // operand.
+ Live = true;
+ continue;
+ }
+ if (MI.readsRegister(ARM::LR, &TRI))
+ Live = true;
+ }
+ return !Live;
+}
+
outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
@@ -5707,10 +5796,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
// Erase every candidate that violates the restrictions above. (It could be
// true that we have viable candidates, so it's not worth bailing out in
// the case that, say, 1 out of 20 candidates violate the restructions.)
- RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- CantGuaranteeValueAcrossCall),
- RepeatedSequenceLocs.end());
+ llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
@@ -5730,8 +5816,8 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
};
OutlinerCosts Costs(Subtarget);
- unsigned FrameID = 0;
- unsigned NumBytesToCreateFrame = 0;
+ unsigned FrameID = MachineOutlinerDefault;
+ unsigned NumBytesToCreateFrame = Costs.FrameDefault;
// If the last instruction in any candidate is a terminator, then we should
// tail call all of the candidates.
@@ -5740,22 +5826,31 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
NumBytesToCreateFrame = Costs.FrameTailCall;
SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
} else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
- LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr ||
+ LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
+ LastInstrOpcode == ARM::tBLXr ||
+ LastInstrOpcode == ARM::tBLXr_noip ||
LastInstrOpcode == ARM::tBLXi) {
FrameID = MachineOutlinerThunk;
NumBytesToCreateFrame = Costs.FrameThunk;
SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
} else {
// We need to decide how to emit calls + frames. We can always emit the same
- // frame if we don't need to save to the stack.
+ // frame if we don't need to save to the stack. If we have to save to the
+ // stack, then we need a different frame.
unsigned NumBytesNoStackCalls = 0;
std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
for (outliner::Candidate &C : RepeatedSequenceLocs) {
C.initLRU(TRI);
-
- // Is LR available? If so, we don't need a save.
- if (C.LRU.available(ARM::LR)) {
+ // LR liveness is overestimated in return blocks, unless they end with a
+ // tail call.
+ const auto Last = C.getMBB()->rbegin();
+ const bool LRIsAvailable =
+ C.getMBB()->isReturnBlock() && !Last->isCall()
+ ? isLRAvailable(TRI, Last,
+ (MachineBasicBlock::reverse_iterator)C.front())
+ : C.LRU.available(ARM::LR);
+ if (LRIsAvailable) {
FrameID = MachineOutlinerNoLRSave;
NumBytesNoStackCalls += Costs.CallNoLRSave;
C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
@@ -5770,18 +5865,157 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
CandidatesWithoutStackFixups.push_back(C);
}
+
+ // Is SP used in the sequence at all? If not, we don't have to modify
+ // the stack, so we are guaranteed to get the same frame.
+ else if (C.UsedInSequence.available(ARM::SP)) {
+ NumBytesNoStackCalls += Costs.CallDefault;
+ C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // If we outline this, we need to modify the stack. Pretend we don't
+ // outline this by saving all of its bytes.
+ else
+ NumBytesNoStackCalls += SequenceSize;
}
- if (!CandidatesWithoutStackFixups.empty()) {
+ // If there are no places where we have to save LR, then note that we don't
+ // have to update the stack. Otherwise, give every candidate the default
+ // call type
+ if (NumBytesNoStackCalls <=
+ RepeatedSequenceLocs.size() * Costs.CallDefault) {
RepeatedSequenceLocs = CandidatesWithoutStackFixups;
+ FrameID = MachineOutlinerNoLRSave;
} else
- return outliner::OutlinedFunction();
+ SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
+ }
+
+ // Does every candidate's MBB contain a call? If so, then we might have a
+ // call in the range.
+ if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
+ // check if the range contains a call. These require a save + restore of
+ // the link register.
+ if (std::any_of(FirstCand.front(), FirstCand.back(),
+ [](const MachineInstr &MI) { return MI.isCall(); }))
+ NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
+
+ // Handle the last instruction separately. If it is tail call, then the
+ // last instruction is a call, we don't want to save + restore in this
+ // case. However, it could be possible that the last instruction is a
+ // call without it being valid to tail call this sequence. We should
+ // consider this as well.
+ else if (FrameID != MachineOutlinerThunk &&
+ FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
+ NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
}
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
NumBytesToCreateFrame, FrameID);
}
+bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
+ int64_t Fixup,
+ bool Updt) const {
+ int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP);
+ unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
+ if (SPIdx < 0)
+ // No SP operand
+ return true;
+ else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
+ // If SP is not the base register we can't do much
+ return false;
+
+ // Stack might be involved but addressing mode doesn't handle any offset.
+ // Rq: AddrModeT1_[1|2|4] don't operate on SP
+ if (AddrMode == ARMII::AddrMode1 // Arithmetic instructions
+ || AddrMode == ARMII::AddrMode4 // Load/Store Multiple
+ || AddrMode == ARMII::AddrMode6 // Neon Load/Store Multiple
+ || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
+ || AddrMode == ARMII::AddrModeT2_pc // PCrel access
+ || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST
+ || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE
+ || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE
+ || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR
+ || AddrMode == ARMII::AddrModeNone)
+ return false;
+
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ unsigned ImmIdx = NumOps - 3;
+
+ const MachineOperand &Offset = MI->getOperand(ImmIdx);
+ assert(Offset.isImm() && "Is not an immediate");
+ int64_t OffVal = Offset.getImm();
+
+ if (OffVal < 0)
+ // Don't override data if the are below SP.
+ return false;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+
+ switch (AddrMode) {
+ case ARMII::AddrMode3:
+ if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
+ return false;
+ OffVal = ARM_AM::getAM3Offset(OffVal);
+ NumBits = 8;
+ break;
+ case ARMII::AddrMode5:
+ if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
+ return false;
+ OffVal = ARM_AM::getAM5Offset(OffVal);
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode5FP16:
+ if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
+ return false;
+ OffVal = ARM_AM::getAM5FP16Offset(OffVal);
+ NumBits = 8;
+ Scale = 2;
+ break;
+ case ARMII::AddrModeT2_i8:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT2_i8s4:
+ // FIXME: Values are already scaled in this addressing mode.
+ assert((Fixup & 3) == 0 && "Can't encode this offset!");
+ NumBits = 10;
+ break;
+ case ARMII::AddrModeT2_ldrex:
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrModeT2_i12:
+ case ARMII::AddrMode_i12:
+ NumBits = 12;
+ break;
+ case ARMII::AddrModeT1_s: // SP-relative LD/ST
+ NumBits = 8;
+ Scale = 4;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
+ "Can't encode this offset!");
+ OffVal += Fixup / Scale;
+
+ unsigned Mask = (1 << NumBits) - 1;
+
+ if (OffVal <= Mask) {
+ if (Updt)
+ MI->getOperand(ImmIdx).setImm(OffVal);
+ return true;
+ }
+
+ return false;
+
+}
+
bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
const Function &F = MF.getFunction();
@@ -5841,7 +6075,13 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
Flags |= MachineOutlinerMBBFlags::HasCalls;
- if (!LRU.available(ARM::LR))
+ // LR liveness is overestimated in return blocks.
+
+ bool LRIsAvailable =
+ MBB.isReturnBlock() && !MBB.back().isCall()
+ ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
+ : LRU.available(ARM::LR);
+ if (!LRIsAvailable)
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
return true;
@@ -5879,8 +6119,9 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
// Be conservative with ARMv8.1 MVE instructions.
if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
- Opc == ARM::t2WhileLoopStart || Opc == ARM::t2LoopDec ||
- Opc == ARM::t2LoopEnd)
+ Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
+ Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
+ Opc == ARM::t2LoopEndDec)
return outliner::InstrType::Illegal;
const MCInstrDesc &MCID = MI.getDesc();
@@ -5914,16 +6155,56 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
return outliner::InstrType::Illegal;
if (MI.isCall()) {
+ // Get the function associated with the call. Look at each operand and find
+ // the one that represents the calle and get its name.
+ const Function *Callee = nullptr;
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isGlobal()) {
+ Callee = dyn_cast<Function>(MOP.getGlobal());
+ break;
+ }
+ }
+
+ // Dont't outline calls to "mcount" like functions, in particular Linux
+ // kernel function tracing relies on it.
+ if (Callee &&
+ (Callee->getName() == "\01__gnu_mcount_nc" ||
+ Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
+ return outliner::InstrType::Illegal;
+
// If we don't know anything about the callee, assume it depends on the
// stack layout of the caller. In that case, it's only legal to outline
// as a tail-call. Explicitly list the call instructions we know about so
// we don't get unexpected results with call pseudo-instructions.
auto UnknownCallOutlineType = outliner::InstrType::Illegal;
if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
- Opc == ARM::tBLXr || Opc == ARM::tBLXi)
+ Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
+ Opc == ARM::tBLXi)
UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
- return UnknownCallOutlineType;
+ if (!Callee)
+ return UnknownCallOutlineType;
+
+ // We have a function we have information about. Check if it's something we
+ // can safely outline.
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
+
+ // We don't know what's going on with the callee at all. Don't touch it.
+ if (!CalleeMF)
+ return UnknownCallOutlineType;
+
+ // Check if we know anything about the callee saves on the function. If we
+ // don't, then don't touch it, since that implies that we haven't computed
+ // anything about its stack frame yet.
+ MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
+ MFI.getNumObjects() > 0)
+ return UnknownCallOutlineType;
+
+ // At this point, we can say that CalleeMF ought to not pass anything on the
+ // stack. Therefore, we can outline it.
+ return outliner::InstrType::Legal;
}
// Since calls are handled, don't touch LR or PC
@@ -5946,6 +6227,19 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
if (!MightNeedStackFixUp)
return outliner::InstrType::Legal;
+ // Any modification of SP will break our code to save/restore LR.
+ // FIXME: We could handle some instructions which add a constant offset to
+ // SP, with a bit more work.
+ if (MI.modifiesRegister(ARM::SP, TRI))
+ return outliner::InstrType::Illegal;
+
+ // At this point, we have a stack instruction that we might need to fix up.
+ // up. We'll handle it if it's a load or store.
+ if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
+ false))
+ return outliner::InstrType::Legal;
+
+ // We can't fix it up, so don't outline it.
return outliner::InstrType::Illegal;
}
@@ -5961,13 +6255,107 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
return outliner::InstrType::Legal;
}
+void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
+ for (MachineInstr &MI : MBB) {
+ checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
+ }
+}
+
+void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const {
+ unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
+ int Align = -Subtarget.getStackAlignment().value();
+ BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
+ .addReg(ARM::LR, RegState::Kill)
+ .addReg(ARM::SP)
+ .addImm(Align)
+ .add(predOps(ARMCC::AL));
+}
+
+void ARMBaseInstrInfo::emitCFIForLRSaveOnStack(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
+ MachineFunction &MF = *MBB.getParent();
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+ int Align = Subtarget.getStackAlignment().value();
+ // Add a CFI saying the stack was moved down.
+ int64_t StackPosEntry =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(StackPosEntry)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ // Add a CFI saying that the LR that we want to find is now higher than
+ // before.
+ int64_t LRPosEntry =
+ MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfLR, -Align));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameSetup);
+}
+
+void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It,
+ Register Reg) const {
+ MachineFunction &MF = *MBB.getParent();
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+
+ int64_t LRPosEntry = MF.addFrameInst(
+ MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameSetup);
+}
+
+void ARMBaseInstrInfo::restoreLRFromStack(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
+ unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+ MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP);
+ if (!Subtarget.isThumb())
+ MIB.addReg(0);
+ MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL));
+}
+
+void ARMBaseInstrInfo::emitCFIForLRRestoreFromStack(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
+ // Now stack has moved back up...
+ MachineFunction &MF = *MBB.getParent();
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+ int64_t StackPosEntry =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(StackPosEntry)
+ .setMIFlags(MachineInstr::FrameDestroy);
+
+ // ... and we have restored LR.
+ int64_t LRPosEntry =
+ MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameDestroy);
+}
+
+void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
+ MachineFunction &MF = *MBB.getParent();
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+
+ int64_t LRPosEntry =
+ MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameDestroy);
+}
+
void ARMBaseInstrInfo::buildOutlinedFrame(
MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const {
- // Nothing is needed for tail-calls.
- if (OF.FrameConstructionID == MachineOutlinerTailCall)
- return;
-
// For thunk outlining, rewrite the last instruction from a call to a
// tail-call.
if (OF.FrameConstructionID == MachineOutlinerThunk) {
@@ -5984,13 +6372,59 @@ void ARMBaseInstrInfo::buildOutlinedFrame(
if (isThumb && !Call->getOperand(FuncOp).isReg())
MIB.add(predOps(ARMCC::AL));
Call->eraseFromParent();
- return;
}
+ // Is there a call in the outlined range?
+ auto IsNonTailCall = [](MachineInstr &MI) {
+ return MI.isCall() && !MI.isReturn();
+ };
+ if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
+ MachineBasicBlock::iterator It = MBB.begin();
+ MachineBasicBlock::iterator Et = MBB.end();
+
+ if (OF.FrameConstructionID == MachineOutlinerTailCall ||
+ OF.FrameConstructionID == MachineOutlinerThunk)
+ Et = std::prev(MBB.end());
+
+ // We have to save and restore LR, we need to add it to the liveins if it
+ // is not already part of the set. This is suffient since outlined
+ // functions only have one block.
+ if (!MBB.isLiveIn(ARM::LR))
+ MBB.addLiveIn(ARM::LR);
+
+ // Insert a save before the outlined region
+ saveLROnStack(MBB, It);
+ emitCFIForLRSaveOnStack(MBB, It);
+
+ // Fix up the instructions in the range, since we're going to modify the
+ // stack.
+ assert(OF.FrameConstructionID != MachineOutlinerDefault &&
+ "Can only fix up stack references once");
+ fixupPostOutline(MBB);
+
+ // Insert a restore before the terminator for the function. Restore LR.
+ restoreLRFromStack(MBB, Et);
+ emitCFIForLRRestoreFromStack(MBB, Et);
+ }
+
+ // If this is a tail call outlined function, then there's already a return.
+ if (OF.FrameConstructionID == MachineOutlinerTailCall ||
+ OF.FrameConstructionID == MachineOutlinerThunk)
+ return;
+
// Here we have to insert the return ourselves. Get the correct opcode from
// current feature set.
BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
.add(predOps(ARMCC::AL));
+
+ // Did we have to modify the stack by saving the link register?
+ if (OF.FrameConstructionID != MachineOutlinerDefault &&
+ OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
+ return;
+
+ // We modified the stack.
+ // Walk over the basic block and fix up all the stack accesses.
+ fixupPostOutline(MBB);
}
MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
@@ -6022,21 +6456,70 @@ MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
CallMIB.add(predOps(ARMCC::AL));
CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
+ if (C.CallConstructionID == MachineOutlinerNoLRSave ||
+ C.CallConstructionID == MachineOutlinerThunk) {
+ // No, so just insert the call.
+ It = MBB.insert(It, CallMIB);
+ return It;
+ }
+
+ const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
// Can we save to a register?
if (C.CallConstructionID == MachineOutlinerRegSave) {
unsigned Reg = findRegisterToSaveLRTo(C);
assert(Reg != 0 && "No callee-saved register available?");
// Save and restore LR from that register.
- if (!MBB.isLiveIn(ARM::LR))
- MBB.addLiveIn(ARM::LR);
copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
+ if (!AFI.isLRSpilled())
+ emitCFIForLRSaveToReg(MBB, It, Reg);
CallPt = MBB.insert(It, CallMIB);
copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
+ if (!AFI.isLRSpilled())
+ emitCFIForLRRestoreFromReg(MBB, It);
It--;
return CallPt;
}
- // Insert the call.
- It = MBB.insert(It, CallMIB);
- return It;
+ // We have the default case. Save and restore from SP.
+ if (!MBB.isLiveIn(ARM::LR))
+ MBB.addLiveIn(ARM::LR);
+ saveLROnStack(MBB, It);
+ if (!AFI.isLRSpilled())
+ emitCFIForLRSaveOnStack(MBB, It);
+ CallPt = MBB.insert(It, CallMIB);
+ restoreLRFromStack(MBB, It);
+ if (!AFI.isLRSpilled())
+ emitCFIForLRRestoreFromStack(MBB, It);
+ It--;
+ return CallPt;
}
+
+bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault(
+ MachineFunction &MF) const {
+ return Subtarget.isMClass() && MF.getFunction().hasMinSize();
+}
+
+bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const {
+ // Try hard to rematerialize any VCTPs because if we spill P0, it will block
+ // the tail predication conversion. This means that the element count
+ // register has to be live for longer, but that has to be better than
+ // spill/restore and VPT predication.
+ return isVCTP(&MI) && !isPredicated(MI);
+}
+
+unsigned llvm::getBLXOpcode(const MachineFunction &MF) {
+ return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
+ : ARM::BLX;
+}
+
+unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) {
+ return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
+ : ARM::tBLXr;
+}
+
+unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {
+ return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
+ : ARM::BLX_pred;
+}
+
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 1a75b011ca59..1b843c428130 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -132,6 +132,10 @@ public:
const ScheduleDAG *DAG) const override;
ScheduleHazardRecognizer *
+ CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAGMI *DAG) const override;
+
+ ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;
@@ -171,28 +175,13 @@ public:
bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
ArrayRef<MachineOperand> Pred2) const override;
- bool DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const override;
+ bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred,
+ bool SkipDead) const override;
bool isPredicable(const MachineInstr &MI) const override;
// CPSR defined in instruction
static bool isCPSRDefined(const MachineInstr &MI);
- bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const;
- bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const;
-
- // Load, scaled register offset
- bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const;
- // Load, scaled register offset, not plus LSL2
- bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const;
- // Minus reg for ldstso addr mode
- bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const;
- // Scaled register offset in address mode 2
- bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const;
- // Load multiple, base reg in list
- bool isLDMBaseRegInList(const MachineInstr &MI) const;
- // get LDM variable defs size
- unsigned getLDMVariableDefsSize(const MachineInstr &MI) const;
/// GetInstSize - Returns the size of the specified MachineInstr.
///
@@ -372,11 +361,60 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
+ /// Enable outlining by default at -Oz.
+ bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
+
+ bool isUnspillableTerminatorImpl(const MachineInstr *MI) const override {
+ return MI->getOpcode() == ARM::t2LoopEndDec ||
+ MI->getOpcode() == ARM::t2DoLoopStartTP;
+ }
+
private:
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
+ // Adds an instruction which saves the link register on top of the stack into
+ /// the MachineBasicBlock \p MBB at position \p It.
+ void saveLROnStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+
+ /// Adds an instruction which restores the link register from the top the
+ /// stack into the MachineBasicBlock \p MBB at position \p It.
+ void restoreLRFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+
+ /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
+ /// for the case when the LR is saved on the stack.
+ void emitCFIForLRSaveOnStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+
+ /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
+ /// for the case when the LR is saved in the register \p Reg.
+ void emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It,
+ Register Reg) const;
+
+ /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
+ /// after the LR is was restored from the stack.
+ void emitCFIForLRRestoreFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+
+ /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
+ /// after the LR is was restored from a register.
+ void emitCFIForLRRestoreFromReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+ /// \brief Sets the offsets on outlined instructions in \p MBB which use SP
+ /// so that they will be valid post-outlining.
+ ///
+ /// \param MBB A \p MachineBasicBlock in an outlined function.
+ void fixupPostOutline(MachineBasicBlock &MBB) const;
+
+ /// Returns true if the machine instruction offset can handle the stack fixup
+ /// and updates it if requested.
+ bool checkAndUpdateStackOffset(MachineInstr *MI, int64_t Fixup,
+ bool Updt) const;
+
unsigned getInstBundleLength(const MachineInstr &MI) const;
int getVLDMDefCycle(const InstrItineraryData *ItinData,
@@ -439,6 +477,9 @@ private:
MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) const;
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const override;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
@@ -593,56 +634,6 @@ unsigned VCMPOpcodeToVPT(unsigned Opcode) {
}
static inline
-unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
- switch (Opcode) {
- default:
- llvm_unreachable("unhandled vctp opcode");
- break;
- case ARM::MVE_VCTP8:
- return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
- case ARM::MVE_VCTP16:
- return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
- case ARM::MVE_VCTP32:
- return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
- case ARM::MVE_VCTP64:
- return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
- }
- return 0;
-}
-
-static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
- switch (Opcode) {
- default:
- llvm_unreachable("unhandled vctp opcode");
- case ARM::MVE_VCTP8: return 16;
- case ARM::MVE_VCTP16: return 8;
- case ARM::MVE_VCTP32: return 4;
- case ARM::MVE_VCTP64: return 2;
- }
- return 0;
-}
-
-static inline
-bool isVCTP(MachineInstr *MI) {
- switch (MI->getOpcode()) {
- default:
- break;
- case ARM::MVE_VCTP8:
- case ARM::MVE_VCTP16:
- case ARM::MVE_VCTP32:
- case ARM::MVE_VCTP64:
- return true;
- }
- return false;
-}
-
-static inline
-bool isLoopStart(MachineInstr &MI) {
- return MI.getOpcode() == ARM::t2DoLoopStart ||
- MI.getOpcode() == ARM::t2WhileLoopStart;
-}
-
-static inline
bool isCondBranchOpcode(int Opc) {
return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
}
@@ -653,11 +644,77 @@ static inline bool isJumpTableBranchOpcode(int Opc) {
Opc == ARM::t2BR_JT;
}
+static inline bool isLowOverheadTerminatorOpcode(int Opc) {
+ return Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
+ Opc == ARM::t2LoopEnd || Opc == ARM::t2LoopEndDec;
+}
+
static inline
bool isIndirectBranchOpcode(int Opc) {
return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
}
+static inline bool isIndirectCall(const MachineInstr &MI) {
+ int Opc = MI.getOpcode();
+ switch (Opc) {
+ // indirect calls:
+ case ARM::BLX:
+ case ARM::BLX_noip:
+ case ARM::BLX_pred:
+ case ARM::BLX_pred_noip:
+ case ARM::BX_CALL:
+ case ARM::BMOVPCRX_CALL:
+ case ARM::TCRETURNri:
+ case ARM::TAILJMPr:
+ case ARM::TAILJMPr4:
+ case ARM::tBLXr:
+ case ARM::tBLXr_noip:
+ case ARM::tBLXNSr:
+ case ARM::tBLXNS_CALL:
+ case ARM::tBX_CALL:
+ case ARM::tTAILJMPr:
+ assert(MI.isCall(MachineInstr::IgnoreBundle));
+ return true;
+ // direct calls:
+ case ARM::BL:
+ case ARM::BL_pred:
+ case ARM::BMOVPCB_CALL:
+ case ARM::BL_PUSHLR:
+ case ARM::BLXi:
+ case ARM::TCRETURNdi:
+ case ARM::TAILJMPd:
+ case ARM::SVC:
+ case ARM::HVC:
+ case ARM::TPsoft:
+ case ARM::tTAILJMPd:
+ case ARM::t2SMC:
+ case ARM::t2HVC:
+ case ARM::tBL:
+ case ARM::tBLXi:
+ case ARM::tBL_PUSHLR:
+ case ARM::tTAILJMPdND:
+ case ARM::tSVC:
+ case ARM::tTPsoft:
+ assert(MI.isCall(MachineInstr::IgnoreBundle));
+ return false;
+ }
+ assert(!MI.isCall(MachineInstr::IgnoreBundle));
+ return false;
+}
+
+static inline bool isIndirectControlFlowNotComingBack(const MachineInstr &MI) {
+ int opc = MI.getOpcode();
+ return MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode()) ||
+ isJumpTableBranchOpcode(opc);
+}
+
+static inline bool isSpeculationBarrierEndBBOpcode(int Opc) {
+ return Opc == ARM::SpeculationBarrierISBDSBEndBB ||
+ Opc == ARM::SpeculationBarrierSBEndBB ||
+ Opc == ARM::t2SpeculationBarrierISBDSBEndBB ||
+ Opc == ARM::t2SpeculationBarrierSBEndBB;
+}
+
static inline bool isPopOpcode(int Opc) {
return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET ||
Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD ||
@@ -829,13 +886,17 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm,
return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
case ARMII::AddrModeT2_i7s4:
return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
+ case ARMII::AddrModeT2_i8:
+ return std::abs(Imm) < (((1 << 8) * 1) - 1);
+ case ARMII::AddrModeT2_i12:
+ return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
default:
llvm_unreachable("Unhandled Addressing mode");
}
}
-// Return true if the given intrinsic is a gather or scatter
-inline bool isGatherScatter(IntrinsicInst *IntInst) {
+// Return true if the given intrinsic is a gather
+inline bool isGather(IntrinsicInst *IntInst) {
if (IntInst == nullptr)
return false;
unsigned IntrinsicID = IntInst->getIntrinsicID();
@@ -845,8 +906,15 @@ inline bool isGatherScatter(IntrinsicInst *IntInst) {
IntrinsicID == Intrinsic::arm_mve_vldr_gather_base_wb ||
IntrinsicID == Intrinsic::arm_mve_vldr_gather_base_wb_predicated ||
IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset ||
- IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset_predicated ||
- IntrinsicID == Intrinsic::masked_scatter ||
+ IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset_predicated);
+}
+
+// Return true if the given intrinsic is a scatter
+inline bool isScatter(IntrinsicInst *IntInst) {
+ if (IntInst == nullptr)
+ return false;
+ unsigned IntrinsicID = IntInst->getIntrinsicID();
+ return (IntrinsicID == Intrinsic::masked_scatter ||
IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base ||
IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base_predicated ||
IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base_wb ||
@@ -855,6 +923,17 @@ inline bool isGatherScatter(IntrinsicInst *IntInst) {
IntrinsicID == Intrinsic::arm_mve_vstr_scatter_offset_predicated);
}
+// Return true if the given intrinsic is a gather or scatter
+inline bool isGatherScatter(IntrinsicInst *IntInst) {
+ if (IntInst == nullptr)
+ return false;
+ return isGather(IntInst) || isScatter(IntInst);
+}
+
+unsigned getBLXOpcode(const MachineFunction &MF);
+unsigned gettBLXrOpcode(const MachineFunction &MF);
+unsigned getBLXpredOpcode(const MachineFunction &MF);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 3579635f83b5..1a264dabeeb5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -55,7 +55,9 @@
using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo()
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {}
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {
+ ARM_MC::initLLVMToCVRegMapping(this);
+}
static unsigned getFramePointerReg(const ARMSubtarget &STI) {
return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11;
@@ -328,9 +330,13 @@ bool ARMBaseRegisterInfo::getRegAllocationHints(
case ARMRI::RegPairOdd:
Odd = 1;
break;
- default:
+ case ARMRI::RegLR:
TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+ if (MRI.getRegClass(VirtReg)->contains(ARM::LR))
+ Hints.push_back(ARM::LR);
return false;
+ default:
+ return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
}
// This register should preferably be even (Odd == 0) or odd (Odd == 1).
@@ -634,10 +640,10 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
/// be a pointer to FrameIdx at the beginning of the basic block.
-void ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- Register BaseReg,
- int FrameIdx,
- int64_t Offset) const {
+Register
+ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ int FrameIdx,
+ int64_t Offset) const {
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
(AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
@@ -651,6 +657,7 @@ void ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
+ Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
@@ -658,6 +665,8 @@ void ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
if (!AFI->isThumb1OnlyFunction())
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+
+ return BaseReg;
}
void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 0a0907af2141..5afb6c6aa015 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -32,8 +32,11 @@ class LiveIntervals;
namespace ARMRI {
enum {
+ // Used for LDRD register pairs
RegPairOdd = 1,
- RegPairEven = 2
+ RegPairEven = 2,
+ // Used to hint for lr in t2DoLoopStart
+ RegLR = 3
};
} // end namespace ARMRI
@@ -165,9 +168,8 @@ public:
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
- int FrameIdx,
- int64_t Offset) const override;
+ Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
+ int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
new file mode 100644
index 000000000000..9ba16003a97a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
@@ -0,0 +1,228 @@
+//===-- ARMBlockPlacement.cpp - ARM block placement pass ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass re-arranges machine basic blocks to suit target requirements.
+// Currently it only moves blocks to fix backwards WLS branches.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBasicBlockInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-block-placement"
+#define DEBUG_PREFIX "ARM Block Placement: "
+
+namespace llvm {
+class ARMBlockPlacement : public MachineFunctionPass {
+private:
+ const ARMBaseInstrInfo *TII;
+ std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
+ MachineLoopInfo *MLI = nullptr;
+
+public:
+ static char ID;
+ ARMBlockPlacement() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After);
+ bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // namespace llvm
+
+FunctionPass *llvm::createARMBlockPlacementPass() {
+ return new ARMBlockPlacement();
+}
+
+char ARMBlockPlacement::ID = 0;
+
+INITIALIZE_PASS(ARMBlockPlacement, DEBUG_TYPE, "ARM block placement", false,
+ false)
+
+bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+ const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ if (!ST.hasLOB())
+ return false;
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n");
+ MLI = &getAnalysis<MachineLoopInfo>();
+ TII = static_cast<const ARMBaseInstrInfo *>(ST.getInstrInfo());
+ BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
+ MF.RenumberBlocks();
+ BBUtils->computeAllBlockSizes();
+ BBUtils->adjustBBOffsetsAfter(&MF.front());
+ bool Changed = false;
+
+ // Find loops with a backwards branching WLS.
+ // This requires looping over the loops in the function, checking each
+ // preheader for a WLS and if its target is before the preheader. If moving
+ // the target block wouldn't produce another backwards WLS or a new forwards
+ // LE branch then move the target block after the preheader.
+ for (auto *ML : *MLI) {
+ MachineBasicBlock *Preheader = ML->getLoopPredecessor();
+ if (!Preheader)
+ continue;
+
+ for (auto &Terminator : Preheader->terminators()) {
+ if (Terminator.getOpcode() != ARM::t2WhileLoopStart)
+ continue;
+ MachineBasicBlock *LoopExit = Terminator.getOperand(1).getMBB();
+ // We don't want to move the function's entry block.
+ if (!LoopExit->getPrevNode())
+ continue;
+ if (blockIsBefore(Preheader, LoopExit))
+ continue;
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from "
+ << Preheader->getFullName() << " to "
+ << LoopExit->getFullName() << "\n");
+
+ // Make sure that moving the target block doesn't cause any of its WLSs
+ // that were previously not backwards to become backwards
+ bool CanMove = true;
+ for (auto &LoopExitTerminator : LoopExit->terminators()) {
+ if (LoopExitTerminator.getOpcode() != ARM::t2WhileLoopStart)
+ continue;
+ // An example loop structure where the LoopExit can't be moved, since
+ // bb1's WLS will become backwards once it's moved after bb3 bb1: -
+ // LoopExit
+ // WLS bb2 - LoopExit2
+ // bb2:
+ // ...
+ // bb3: - Preheader
+ // WLS bb1
+ // bb4: - Header
+ MachineBasicBlock *LoopExit2 =
+ LoopExitTerminator.getOperand(1).getMBB();
+ // If the WLS from LoopExit to LoopExit2 is already backwards then
+ // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is
+ // after the Preheader then moving will keep it as a forward branch, so
+ // it can be moved. If LoopExit2 is between the Preheader and LoopExit
+ // then moving LoopExit will make it a backwards branch, so it can't be
+ // moved since we'd fix one and introduce one backwards branch.
+ // TODO: Analyse the blocks to make a decision if it would be worth
+ // moving LoopExit even if LoopExit2 is between the Preheader and
+ // LoopExit.
+ if (!blockIsBefore(LoopExit2, LoopExit) &&
+ (LoopExit2 == Preheader || blockIsBefore(LoopExit2, Preheader))) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX
+ << "Can't move the target block as it would "
+ "introduce a new backwards WLS branch\n");
+ CanMove = false;
+ break;
+ }
+ }
+
+ if (CanMove) {
+ // Make sure no LEs become forwards.
+ // An example loop structure where the LoopExit can't be moved, since
+ // bb2's LE will become forwards once bb1 is moved after bb3.
+ // bb1: - LoopExit
+ // bb2:
+ // LE bb1 - Terminator
+ // bb3: - Preheader
+ // WLS bb1
+ // bb4: - Header
+ for (auto It = LoopExit->getIterator(); It != Preheader->getIterator();
+ It++) {
+ MachineBasicBlock *MBB = &*It;
+ for (auto &Terminator : MBB->terminators()) {
+ if (Terminator.getOpcode() != ARM::t2LoopEndDec)
+ continue;
+ MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB();
+ // The LE will become forwards branching if it branches to LoopExit
+ // which isn't allowed by the architecture, so we should avoid
+ // introducing these.
+ // TODO: Analyse the blocks to make a decision if it would be worth
+ // moving LoopExit even if we'd introduce a forwards LE
+ if (LETarget == LoopExit) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX
+ << "Can't move the target block as it would "
+ "introduce a new forwards LE branch\n");
+ CanMove = false;
+ break;
+ }
+ }
+ }
+
+ if (!CanMove)
+ break;
+ }
+
+ if (CanMove) {
+ moveBasicBlock(LoopExit, Preheader);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB,
+ MachineBasicBlock *Other) {
+ return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB);
+}
+
+void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *After) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after "
+ << After->getName() << "\n");
+ MachineBasicBlock *BBPrevious = BB->getPrevNode();
+ assert(BBPrevious && "Cannot move the function entry basic block");
+ MachineBasicBlock *AfterNext = After->getNextNode();
+ MachineBasicBlock *BBNext = BB->getNextNode();
+
+ BB->moveAfter(After);
+
+ auto FixFallthrough = [&](MachineBasicBlock *From, MachineBasicBlock *To) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Checking for fallthrough from "
+ << From->getName() << " to " << To->getName() << "\n");
+ assert(From->isSuccessor(To) &&
+ "'To' is expected to be a successor of 'From'");
+ MachineInstr &Terminator = *(--From->terminators().end());
+ if (!Terminator.isUnconditionalBranch()) {
+ // The BB doesn't have an unconditional branch so it relied on
+ // fall-through. Fix by adding an unconditional branch to the moved BB.
+ MachineInstrBuilder MIB =
+ BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B));
+ MIB.addMBB(To);
+ MIB.addImm(ARMCC::CondCodes::AL);
+ MIB.addReg(ARM::NoRegister);
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from "
+ << From->getName() << " to " << To->getName() << ": "
+ << *MIB.getInstr());
+ }
+ };
+
+ // Fix fall-through to the moved BB from the one that used to be before it.
+ if (BBPrevious->isSuccessor(BB))
+ FixFallthrough(BBPrevious, BB);
+ // Fix fall through from the destination BB to the one that used to follow.
+ if (AfterNext && After->isSuccessor(AfterNext))
+ FixFallthrough(After, AfterNext);
+ // Fix fall through from the moved BB to the one that used to follow.
+ if (BBNext && BB->isSuccessor(BBNext))
+ FixFallthrough(BB, BBNext);
+
+ BBUtils->adjustBBOffsetsAfter(After);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
index d860473011e7..6feed82596cc 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -85,12 +85,11 @@ namespace {
/// Helper class for values going out through an ABI boundary (used for handling
/// function return values and call parameters).
-struct OutgoingValueHandler : public CallLowering::ValueHandler {
- OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
- : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
-
- bool isIncomingArgumentHandler() const override { return false; }
+struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
+ ARMOutgoingValueHandler(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, MachineInstrBuilder &MIB,
+ CCAssignFn *AssignFn)
+ : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -258,13 +257,14 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
- OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn);
+ ARMOutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret,
+ AssignFn);
return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler);
}
bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val,
- ArrayRef<Register> VRegs) const {
+ const Value *Val, ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const {
assert(!Val == VRegs.empty() && "Return value without a vreg");
auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
@@ -282,12 +282,10 @@ namespace {
/// Helper class for values coming in through an ABI boundary (used for handling
/// formal arguments and call return values).
-struct IncomingValueHandler : public CallLowering::ValueHandler {
- IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- CCAssignFn AssignFn)
- : ValueHandler(MIRBuilder, MRI, AssignFn) {}
-
- bool isIncomingArgumentHandler() const override { return true; }
+struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
+ ARMIncomingValueHandler(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, CCAssignFn AssignFn)
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -337,8 +335,8 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
- auto ValSize = VA.getValVT().getSizeInBits();
- auto LocSize = VA.getLocVT().getSizeInBits();
+ uint64_t ValSize = VA.getValVT().getFixedSizeInBits();
+ uint64_t LocSize = VA.getLocVT().getFixedSizeInBits();
assert(ValSize <= 64 && "Unsupported value size");
assert(LocSize <= 64 && "Unsupported location size");
@@ -399,10 +397,10 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
};
-struct FormalArgHandler : public IncomingValueHandler {
+struct FormalArgHandler : public ARMIncomingValueHandler {
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn AssignFn)
- : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
+ : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
@@ -412,9 +410,10 @@ struct FormalArgHandler : public IncomingValueHandler {
} // end anonymous namespace
-bool ARMCallLowering::lowerFormalArguments(
- MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
+bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
auto &TLI = *getTLI<ARMTargetLowering>();
auto Subtarget = TLI.getSubtarget();
@@ -435,7 +434,7 @@ bool ARMCallLowering::lowerFormalArguments(
for (auto &Arg : F.args()) {
if (!isSupportedType(DL, TLI, Arg.getType()))
return false;
- if (Arg.hasPassPointeeByValueAttr())
+ if (Arg.hasPassPointeeByValueCopyAttr())
return false;
}
@@ -469,10 +468,10 @@ bool ARMCallLowering::lowerFormalArguments(
namespace {
-struct CallReturnHandler : public IncomingValueHandler {
+struct CallReturnHandler : public ARMIncomingValueHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
- : IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+ : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
@@ -482,15 +481,16 @@ struct CallReturnHandler : public IncomingValueHandler {
};
// FIXME: This should move to the ARMSubtarget when it supports all the opcodes.
-unsigned getCallOpcode(const ARMSubtarget &STI, bool isDirect) {
+unsigned getCallOpcode(const MachineFunction &MF, const ARMSubtarget &STI,
+ bool isDirect) {
if (isDirect)
return STI.isThumb() ? ARM::tBL : ARM::BL;
if (STI.isThumb())
- return ARM::tBLXr;
+ return gettBLXrOpcode(MF);
if (STI.hasV5TOps())
- return ARM::BLX;
+ return getBLXOpcode(MF);
if (STI.hasV4TOps())
return ARM::BX_CALL;
@@ -518,7 +518,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
// Create the call instruction so we can add the implicit uses of arg
// registers, but don't insert it yet.
bool IsDirect = !Info.Callee.isReg();
- auto CallOpcode = getCallOpcode(STI, IsDirect);
+ auto CallOpcode = getCallOpcode(MF, STI, IsDirect);
auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode);
bool IsThumb = STI.isThumb();
@@ -538,23 +538,19 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
MIB.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));
- bool IsVarArg = false;
SmallVector<ArgInfo, 8> ArgInfos;
for (auto Arg : Info.OrigArgs) {
if (!isSupportedType(DL, TLI, Arg.Ty))
return false;
- if (!Arg.IsFixed)
- IsVarArg = true;
-
if (Arg.Flags[0].isByVal())
return false;
splitToValueTypes(Arg, ArgInfos, MF);
}
- auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, IsVarArg);
- OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
+ auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, Info.IsVarArg);
+ ARMOutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
return false;
@@ -567,7 +563,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
ArgInfos.clear();
splitToValueTypes(Info.OrigRet, ArgInfos, MF);
- auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, IsVarArg);
+ auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, Info.IsVarArg);
CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h
index ddbc9feb90e2..3be73d497d0b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -33,10 +33,12 @@ public:
ARMCallLowering(const ARMTargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs) const override;
+ ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 195d0a89291b..630490f6f914 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -338,6 +338,32 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
}
#endif
+// Align blocks where the previous block does not fall through. This may add
+// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
+// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
+static bool AlignBlocks(MachineFunction *MF) {
+ if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
+ MF->getFunction().hasOptSize())
+ return false;
+
+ auto *TLI = MF->getSubtarget().getTargetLowering();
+ const Align Alignment = TLI->getPrefLoopAlignment();
+ if (Alignment < 4)
+ return false;
+
+ bool Changed = false;
+ bool PrevCanFallthough = true;
+ for (auto &MBB : *MF) {
+ if (!PrevCanFallthough) {
+ Changed = true;
+ MBB.setAlignment(Alignment);
+ }
+ PrevCanFallthough = MBB.canFallThrough();
+ }
+
+ return Changed;
+}
+
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MCP = mf.getConstantPool();
@@ -359,6 +385,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
isThumb2 = AFI->isThumb2Function();
bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
+ // TBB generation code in this constant island pass has not been adapted to
+ // deal with speculation barriers.
+ if (STI->hardenSlsRetBr())
+ GenerateTBB = false;
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
@@ -376,6 +406,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF->RenumberBlocks();
}
+ // Align any non-fallthrough blocks
+ MadeChange |= AlignBlocks(MF);
+
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
@@ -491,7 +524,11 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
// The function needs to be as aligned as the basic blocks. The linker may
// move functions around based on their alignment.
- MF->ensureAlignment(BB->getAlignment());
+ // Special case: halfword literals still need word alignment on the function.
+ Align FuncAlign = MaxAlign;
+ if (MaxAlign == 2)
+ FuncAlign = Align(4);
+ MF->ensureAlignment(FuncAlign);
// Order the entries in BB by descending alignment. That ensures correct
// alignment of all entries as long as BB is sufficiently aligned. Keep
@@ -506,7 +543,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
- unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ unsigned Size = CPs[i].getSizeInBytes(TD);
Align Alignment = CPs[i].getAlign();
// Verify that all constant pool entries are a multiple of their alignment.
// If not, we would have to pad them out so that instructions stay aligned.
@@ -549,6 +586,12 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
for (MachineBasicBlock &MBB : *MF) {
auto MI = MBB.getLastNonDebugInstr();
+ // Look past potential SpeculationBarriers at end of BB.
+ while (MI != MBB.end() &&
+ (isSpeculationBarrierEndBBOpcode(MI->getOpcode()) ||
+ MI->isDebugInstr()))
+ --MI;
+
if (MI == MBB.end())
continue;
@@ -771,15 +814,26 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Taking the address of a CP entry.
case ARM::LEApcrel:
- case ARM::LEApcrelJT:
- // This takes a SoImm, which is 8 bit immediate rotated. We'll
- // pretend the maximum offset is 255 * 4. Since each instruction
- // 4 byte wide, this is always correct. We'll check for other
- // displacements that fits in a SoImm as well.
- Bits = 8;
- Scale = 4;
- NegOk = true;
- IsSoImm = true;
+ case ARM::LEApcrelJT: {
+ // This takes a SoImm, which is 8 bit immediate rotated. We'll
+ // pretend the maximum offset is 255 * 4. Since each instruction
+ // 4 byte wide, this is always correct. We'll check for other
+ // displacements that fits in a SoImm as well.
+ Bits = 8;
+ NegOk = true;
+ IsSoImm = true;
+ unsigned CPI = I.getOperand(op).getIndex();
+ assert(CPI < CPEMIs.size());
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ const Align CPEAlign = getCPEAlign(CPEMI);
+ const unsigned LogCPEAlign = Log2(CPEAlign);
+ if (LogCPEAlign >= 2)
+ Scale = 4;
+ else
+ // For constants with less than 4-byte alignment,
+ // we'll pretend the maximum offset is 255 * 1.
+ Scale = 1;
+ }
break;
case ARM::t2LEApcrel:
case ARM::t2LEApcrelJT:
@@ -2070,8 +2124,7 @@ static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
MachineFunction *MF = MBB->getParent();
++MBB;
- return MBB != MF->end() && MBB->begin() != MBB->end() &&
- &*MBB->begin() == CPEMI;
+ return MBB != MF->end() && !MBB->empty() && &*MBB->begin() == CPEMI;
}
static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 48622aae3cb4..a7f1765a9311 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -873,16 +873,27 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
// FIXME Windows CE supports older ARM CPUs
assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
- // Expand into a movi + orr.
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg);
-
assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
unsigned ImmVal = (unsigned)MO.getImm();
- unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
- unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ unsigned SOImmValV1 = 0, SOImmValV2 = 0;
+
+ if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+ SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ } else { // Expand into a mvn + sub.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+ SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
+ SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
+ SOImmValV1 = ~(-SOImmValV1);
+ }
+
unsigned MIFlags = MI.getFlags();
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
@@ -1860,6 +1871,66 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
default:
return false;
+ case ARM::VBSPd:
+ case ARM::VBSPq: {
+ Register DstReg = MI.getOperand(0).getReg();
+ if (DstReg == MI.getOperand(3).getReg()) {
+ // Expand to VBIT
+ unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ } else if (DstReg == MI.getOperand(2).getReg()) {
+ // Expand to VBIF
+ unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ } else {
+ // Expand to VBSL
+ unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
+ if (DstReg == MI.getOperand(1).getReg()) {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ } else {
+ // Use move to satisfy constraints
+ unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
+ .addReg(DstReg,
+ RegState::Define |
+ getRenamableRegState(MI.getOperand(0).isRenamable()))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+ .add(MI.getOperand(0))
+ .addReg(DstReg,
+ RegState::Kill |
+ getRenamableRegState(MI.getOperand(0).isRenamable()))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ }
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+
case ARM::TCRETURNdi:
case ARM::TCRETURNri: {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -2233,8 +2304,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
- MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(Thumb ? ARM::tBLXr : ARM::BLX));
+ MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF)));
if (Thumb)
MIB.add(predOps(ARMCC::AL));
MIB.addReg(Reg, RegState::Kill);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
index 4bfca8a803ca..da1d9af8d5b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -606,7 +606,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
}
}
- if (IsIndirect) {
+ if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||
+ (Subtarget->isTargetMachO() && IsIndirect) ||
+ Subtarget->genLongCalls()) {
MachineInstrBuilder MIB;
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
@@ -2173,7 +2175,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
if (UseReg)
- return isThumb2 ? ARM::tBLXr : ARM::BLX;
+ return isThumb2 ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF);
else
return isThumb2 ? ARM::tBL : ARM::BL;
}
@@ -2264,9 +2266,11 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// BL / BLX don't take a predicate, but tBL / tBLX do.
if (isThumb2)
MIB.add(predOps(ARMCC::AL));
- if (Subtarget->genLongCalls())
+ if (Subtarget->genLongCalls()) {
+ CalleeReg =
+ constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
MIB.addReg(CalleeReg);
- else
+ } else
MIB.addExternalSymbol(TLI.getLibcallName(Call));
// Add implicit physical register uses to the call.
@@ -2404,9 +2408,11 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// ARM calls don't take a predicate, but tBL / tBLX do.
if(isThumb2)
MIB.add(predOps(ARMCC::AL));
- if (UseReg)
+ if (UseReg) {
+ CalleeReg =
+ constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
MIB.addReg(CalleeReg);
- else if (!IntrMemName)
+ } else if (!IntrMemName)
MIB.addGlobalAddress(GV, 0, 0);
else
MIB.addExternalSymbol(IntrMemName, 0);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFeatures.h
index 5cd7006c22fc..99e0ef05b5e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFeatures.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFeatures.h
@@ -75,6 +75,7 @@ inline bool isV8EligibleForIT(const InstrType *Instr) {
// there are some "conditionally deprecated" opcodes
case ARM::tADDspr:
case ARM::tBLXr:
+ case ARM::tBLXr_noip:
return Instr->getOperand(2).getReg() != ARM::PC;
// ADD PC, SP and BLX PC were always unpredictable,
// now on top of it they're deprecated
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 8a8f3237bb6f..9eeb7f20dc8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -883,9 +883,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
/// debug info. It's the same as what we use for resolving the code-gen
/// references for now. FIXME: This can go wrong when references are
/// SP-relative and simple call frames aren't used.
-int ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
- return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ Register &FrameReg) const {
+ return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
}
int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
@@ -2113,8 +2114,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned NumExtras = TargetAlign.value() / 4;
SmallVector<unsigned, 2> Extras;
while (NumExtras && !UnspilledCS1GPRs.empty()) {
- unsigned Reg = UnspilledCS1GPRs.back();
- UnspilledCS1GPRs.pop_back();
+ unsigned Reg = UnspilledCS1GPRs.pop_back_val();
if (!MRI.isReserved(Reg) &&
(!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
Extras.push_back(Reg);
@@ -2124,8 +2124,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// For non-Thumb1 functions, also check for hi-reg CS registers
if (!AFI->isThumb1OnlyFunction()) {
while (NumExtras && !UnspilledCS2GPRs.empty()) {
- unsigned Reg = UnspilledCS2GPRs.back();
- UnspilledCS2GPRs.pop_back();
+ unsigned Reg = UnspilledCS2GPRs.pop_back_val();
if (!MRI.isReserved(Reg)) {
Extras.push_back(Reg);
NumExtras--;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
index 4c2c07d64f57..9822e2321bb4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -47,8 +48,8 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg, int SPAdj) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
index 0fa32a0abeff..f083fa6662e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -10,11 +10,19 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+
using namespace llvm;
+static cl::opt<int> DataBankMask("arm-data-bank-mask", cl::init(-1),
+ cl::Hidden);
+static cl::opt<bool> AssumeITCMConflict("arm-assume-itcm-bankconflict",
+ cl::init(false), cl::Hidden);
+
static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
const TargetRegisterInfo &TRI) {
// FIXME: Detect integer instructions properly.
@@ -31,7 +39,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
}
ScheduleHazardRecognizer::HazardType
-ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ARMHazardRecognizerFPMLx::getHazardType(SUnit *SU, int Stalls) {
assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
MachineInstr *MI = SU->getInstr();
@@ -68,33 +76,193 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
}
}
}
-
- return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+ return NoHazard;
}
-void ARMHazardRecognizer::Reset() {
+void ARMHazardRecognizerFPMLx::Reset() {
LastMI = nullptr;
FpMLxStalls = 0;
- ScoreboardHazardRecognizer::Reset();
}
-void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+void ARMHazardRecognizerFPMLx::EmitInstruction(SUnit *SU) {
MachineInstr *MI = SU->getInstr();
if (!MI->isDebugInstr()) {
LastMI = MI;
FpMLxStalls = 0;
}
-
- ScoreboardHazardRecognizer::EmitInstruction(SU);
}
-void ARMHazardRecognizer::AdvanceCycle() {
+void ARMHazardRecognizerFPMLx::AdvanceCycle() {
if (FpMLxStalls && --FpMLxStalls == 0)
// Stalled for 4 cycles but still can't schedule any other instructions.
LastMI = nullptr;
- ScoreboardHazardRecognizer::AdvanceCycle();
}
-void ARMHazardRecognizer::RecedeCycle() {
+void ARMHazardRecognizerFPMLx::RecedeCycle() {
llvm_unreachable("reverse ARM hazard checking unsupported");
}
+
+///////// Bank conflicts handled as hazards //////////////
+
+static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp,
+ int64_t &Offset) {
+
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ unsigned IndexMode =
+ (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+
+ // Address mode tells us what we want to know about operands for T2
+ // instructions (but not size). It tells us size (but not about operands)
+ // for T1 instructions.
+ switch (AddrMode) {
+ default:
+ return false;
+ case ARMII::AddrModeT2_i8:
+ // t2LDRBT, t2LDRB_POST, t2LDRB_PRE, t2LDRBi8,
+ // t2LDRHT, t2LDRH_POST, t2LDRH_PRE, t2LDRHi8,
+ // t2LDRSBT, t2LDRSB_POST, t2LDRSB_PRE, t2LDRSBi8,
+ // t2LDRSHT, t2LDRSH_POST, t2LDRSH_PRE, t2LDRSHi8,
+ // t2LDRT, t2LDR_POST, t2LDR_PRE, t2LDRi8
+ BaseOp = &MI.getOperand(1);
+ Offset = (IndexMode == ARMII::IndexModePost)
+ ? 0
+ : (IndexMode == ARMII::IndexModePre ||
+ IndexMode == ARMII::IndexModeUpd)
+ ? MI.getOperand(3).getImm()
+ : MI.getOperand(2).getImm();
+ return true;
+ case ARMII::AddrModeT2_i12:
+ // t2LDRBi12, t2LDRHi12
+ // t2LDRSBi12, t2LDRSHi12
+ // t2LDRi12
+ BaseOp = &MI.getOperand(1);
+ Offset = MI.getOperand(2).getImm();
+ return true;
+ case ARMII::AddrModeT2_i8s4:
+ // t2LDRD_POST, t2LDRD_PRE, t2LDRDi8
+ BaseOp = &MI.getOperand(2);
+ Offset = (IndexMode == ARMII::IndexModePost)
+ ? 0
+ : (IndexMode == ARMII::IndexModePre ||
+ IndexMode == ARMII::IndexModeUpd)
+ ? MI.getOperand(4).getImm()
+ : MI.getOperand(3).getImm();
+ return true;
+ case ARMII::AddrModeT1_1:
+ // tLDRBi, tLDRBr (watch out!), TLDRSB
+ case ARMII::AddrModeT1_2:
+ // tLDRHi, tLDRHr (watch out!), TLDRSH
+ case ARMII::AddrModeT1_4:
+ // tLDRi, tLDRr (watch out!)
+ BaseOp = &MI.getOperand(1);
+ Offset = MI.getOperand(2).isImm() ? MI.getOperand(2).getImm() : 0;
+ return MI.getOperand(2).isImm();
+ }
+ return false;
+}
+
+ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer(
+ const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict)
+ : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()),
+ DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask)
+ : CPUBankMask),
+ AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences()
+ ? AssumeITCMConflict
+ : CPUAssumeITCMConflict) {
+ MaxLookAhead = 1;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMBankConflictHazardRecognizer::CheckOffsets(unsigned O0, unsigned O1) {
+ return (((O0 ^ O1) & DataMask) != 0) ? NoHazard : Hazard;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMBankConflictHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ MachineInstr &L0 = *SU->getInstr();
+ if (!L0.mayLoad() || L0.mayStore() || L0.getNumMemOperands() != 1)
+ return NoHazard;
+
+ auto MO0 = *L0.memoperands().begin();
+ auto BaseVal0 = MO0->getValue();
+ auto BasePseudoVal0 = MO0->getPseudoValue();
+ int64_t Offset0 = 0;
+
+ if (MO0->getSize() > 4)
+ return NoHazard;
+
+ bool SPvalid = false;
+ const MachineOperand *SP = nullptr;
+ int64_t SPOffset0 = 0;
+
+ for (auto L1 : Accesses) {
+ auto MO1 = *L1->memoperands().begin();
+ auto BaseVal1 = MO1->getValue();
+ auto BasePseudoVal1 = MO1->getPseudoValue();
+ int64_t Offset1 = 0;
+
+ // Pointers to the same object
+ if (BaseVal0 && BaseVal1) {
+ const Value *Ptr0, *Ptr1;
+ Ptr0 = GetPointerBaseWithConstantOffset(BaseVal0, Offset0, DL, true);
+ Ptr1 = GetPointerBaseWithConstantOffset(BaseVal1, Offset1, DL, true);
+ if (Ptr0 == Ptr1 && Ptr0)
+ return CheckOffsets(Offset0, Offset1);
+ }
+
+ if (BasePseudoVal0 && BasePseudoVal1 &&
+ BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
+ BasePseudoVal0->kind() == PseudoSourceValue::FixedStack) {
+ // Spills/fills
+ auto FS0 = cast<FixedStackPseudoSourceValue>(BasePseudoVal0);
+ auto FS1 = cast<FixedStackPseudoSourceValue>(BasePseudoVal1);
+ Offset0 = MF.getFrameInfo().getObjectOffset(FS0->getFrameIndex());
+ Offset1 = MF.getFrameInfo().getObjectOffset(FS1->getFrameIndex());
+ return CheckOffsets(Offset0, Offset1);
+ }
+
+ // Constant pools (likely in ITCM)
+ if (BasePseudoVal0 && BasePseudoVal1 &&
+ BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
+ BasePseudoVal0->isConstantPool() && AssumeITCMBankConflict)
+ return Hazard;
+
+ // Is this a stack pointer-relative access? We could in general try to
+ // use "is this the same register and is it unchanged?", but the
+ // memory operand tracking is highly likely to have already found that.
+ // What we're after here is bank conflicts between different objects in
+ // the stack frame.
+ if (!SPvalid) { // set up SP
+ if (!getBaseOffset(L0, SP, SPOffset0) || SP->getReg().id() != ARM::SP)
+ SP = nullptr;
+ SPvalid = true;
+ }
+ if (SP) {
+ int64_t SPOffset1;
+ const MachineOperand *SP1;
+ if (getBaseOffset(*L1, SP1, SPOffset1) && SP1->getReg().id() == ARM::SP)
+ return CheckOffsets(SPOffset0, SPOffset1);
+ }
+ }
+
+ return NoHazard;
+}
+
+void ARMBankConflictHazardRecognizer::Reset() { Accesses.clear(); }
+
+void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr &MI = *SU->getInstr();
+ if (!MI.mayLoad() || MI.mayStore() || MI.getNumMemOperands() != 1)
+ return;
+
+ auto MO = *MI.memoperands().begin();
+ uint64_t Size1 = MO->getSize();
+ if (Size1 > 4)
+ return;
+ Accesses.push_back(&MI);
+}
+
+void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); }
+
+void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h
index ca02cc739e11..c1f1bcd0a629 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -13,27 +13,28 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
#define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
-#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/DataTypes.h"
+#include <array>
+#include <initializer_list>
namespace llvm {
-class ARMBaseInstrInfo;
-class ARMBaseRegisterInfo;
-class ARMSubtarget;
+class DataLayout;
+class MachineFunction;
class MachineInstr;
+class ScheduleDAG;
-/// ARMHazardRecognizer handles special constraints that are not expressed in
-/// the scheduling itinerary. This is only used during postRA scheduling. The
-/// ARM preRA scheduler uses an unspecialized instance of the
-/// ScoreboardHazardRecognizer.
-class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+// Hazards related to FP MLx instructions
+class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer {
MachineInstr *LastMI = nullptr;
unsigned FpMLxStalls = 0;
public:
- ARMHazardRecognizer(const InstrItineraryData *ItinData,
- const ScheduleDAG *DAG)
- : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched") {}
+ ARMHazardRecognizerFPMLx() : ScheduleHazardRecognizer() { MaxLookAhead = 1; }
HazardType getHazardType(SUnit *SU, int Stalls) override;
void Reset() override;
@@ -42,6 +43,27 @@ public:
void RecedeCycle() override;
};
+// Hazards related to bank conflicts
+class ARMBankConflictHazardRecognizer : public ScheduleHazardRecognizer {
+ SmallVector<MachineInstr *, 8> Accesses;
+ const MachineFunction &MF;
+ const DataLayout &DL;
+ int64_t DataMask;
+ bool AssumeITCMBankConflict;
+
+public:
+ ARMBankConflictHazardRecognizer(const ScheduleDAG *DAG, int64_t DDM,
+ bool ABC);
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ void Reset() override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+
+private:
+ inline HazardType CheckOffsets(unsigned O0, unsigned O1);
+};
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7bad485e390c..c3cb02431e66 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -143,7 +143,7 @@ static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
cl::desc("Maximum size of ALL constants to promote into a constant pool"),
cl::init(128));
-static cl::opt<unsigned>
+cl::opt<unsigned>
MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
cl::init(2));
@@ -289,6 +289,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
// Vector reductions
setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
@@ -335,6 +337,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
// Pre and Post inc are supported on loads and stores
for (unsigned im = (unsigned)ISD::PRE_INC;
@@ -439,6 +443,9 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
}
}
@@ -988,6 +995,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMAX);
setTargetDAGCombine(ISD::FP_EXTEND);
+ setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::SELECT_CC);
}
if (!Subtarget->hasFP64()) {
@@ -1717,8 +1726,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VCVTL: return "ARMISD::VCVTL";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::VQDMULH: return "ARMISD::VQDMULH";
case ARMISD::VADDVs: return "ARMISD::VADDVs";
case ARMISD::VADDVu: return "ARMISD::VADDVu";
+ case ARMISD::VADDVps: return "ARMISD::VADDVps";
+ case ARMISD::VADDVpu: return "ARMISD::VADDVpu";
case ARMISD::VADDLVs: return "ARMISD::VADDLVs";
case ARMISD::VADDLVu: return "ARMISD::VADDLVu";
case ARMISD::VADDLVAs: return "ARMISD::VADDLVAs";
@@ -1729,10 +1741,20 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VADDLVApu: return "ARMISD::VADDLVApu";
case ARMISD::VMLAVs: return "ARMISD::VMLAVs";
case ARMISD::VMLAVu: return "ARMISD::VMLAVu";
+ case ARMISD::VMLAVps: return "ARMISD::VMLAVps";
+ case ARMISD::VMLAVpu: return "ARMISD::VMLAVpu";
case ARMISD::VMLALVs: return "ARMISD::VMLALVs";
case ARMISD::VMLALVu: return "ARMISD::VMLALVu";
+ case ARMISD::VMLALVps: return "ARMISD::VMLALVps";
+ case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";
case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";
case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";
+ case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";
+ case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";
+ case ARMISD::VMINVu: return "ARMISD::VMINVu";
+ case ARMISD::VMINVs: return "ARMISD::VMINVs";
+ case ARMISD::VMAXVu: return "ARMISD::VMAXVu";
+ case ARMISD::VMAXVs: return "ARMISD::VMAXVs";
case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
@@ -1756,7 +1778,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
- case ARMISD::VBSL: return "ARMISD::VBSL";
+ case ARMISD::VBSP: return "ARMISD::VBSP";
case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
@@ -2510,9 +2532,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
@@ -3321,8 +3343,7 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet = DAG.getLoad(
MVT::i32, DL, Chain, DescAddr,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- /* Alignment = */ 4,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),
MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
Chain = FuncTLVGet.getValue(1);
@@ -3536,8 +3557,7 @@ static bool allUsersAreInFunction(const Value *V, const Function *F) {
while (!Worklist.empty()) {
auto *U = Worklist.pop_back_val();
if (isa<ConstantExpr>(U)) {
- for (auto *UU : U->users())
- Worklist.push_back(UU);
+ append_range(Worklist, U->users());
continue;
}
@@ -4424,13 +4444,26 @@ SDValue ARMTargetLowering::LowerFormalArguments(
}
// varargs
- if (isVarArg && MFI.hasVAStart())
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
- CCInfo.getNextStackOffset(),
+ if (isVarArg && MFI.hasVAStart()) {
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
TotalArgRegsSaveSize);
+ if (AFI->isCmseNSEntryFunction()) {
+ DiagnosticInfoUnsupported Diag(
+ DAG.getMachineFunction().getFunction(),
+ "secure entry function must not be variadic", dl.getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+ }
AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
+ if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
+ DiagnosticInfoUnsupported Diag(
+ DAG.getMachineFunction().getFunction(),
+ "secure entry function requires arguments on stack", dl.getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+
return Chain;
}
@@ -4991,16 +5024,6 @@ static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
}
-// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
-static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
- const SDValue TrueVal, const SDValue FalseVal,
- const ISD::CondCode CC, const SDValue K) {
- return (isGTorGE(CC) &&
- ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
- (isLTorLE(CC) &&
- ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
-}
-
// Check if two chained conditionals could be converted into SSAT or USAT.
//
// SSAT can replace a set of two conditional selectors that bound a number to an
@@ -5012,101 +5035,68 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
// x < k ? (x < -k ? -k : x) : k
// etc.
//
-// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is
-// a power of 2.
+// LLVM canonicalizes these to either a min(max()) or a max(min())
+// pattern. This function tries to match one of these and will return a SSAT
+// node if successful.
//
-// It returns true if the conversion can be done, false otherwise.
-// Additionally, the variable is returned in parameter V, the constant in K and
-// usat is set to true if the conditional represents an unsigned saturation
-static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
- uint64_t &K, bool &usat) {
- SDValue LHS1 = Op.getOperand(0);
- SDValue RHS1 = Op.getOperand(1);
+// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
+// is a power of 2.
+static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ SDValue V1 = Op.getOperand(0);
+ SDValue K1 = Op.getOperand(1);
SDValue TrueVal1 = Op.getOperand(2);
SDValue FalseVal1 = Op.getOperand(3);
ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
if (Op2.getOpcode() != ISD::SELECT_CC)
- return false;
+ return SDValue();
- SDValue LHS2 = Op2.getOperand(0);
- SDValue RHS2 = Op2.getOperand(1);
+ SDValue V2 = Op2.getOperand(0);
+ SDValue K2 = Op2.getOperand(1);
SDValue TrueVal2 = Op2.getOperand(2);
SDValue FalseVal2 = Op2.getOperand(3);
ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
- // Find out which are the constants and which are the variables
- // in each conditional
- SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
- ? &RHS1
- : nullptr;
- SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
- ? &RHS2
- : nullptr;
- SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
- SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
- SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
- SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
-
- // We must detect cases where the original operations worked with 16- or
- // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
- // must work with sign-extended values but the select operations return
- // the original non-extended value.
- SDValue V2TmpReg = V2Tmp;
- if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
- V2TmpReg = V2Tmp->getOperand(0);
-
- // Check that the registers and the constants have the correct values
- // in both conditionals
- if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
- V2TmpReg != V2)
- return false;
+ SDValue V1Tmp = V1;
+ SDValue V2Tmp = V2;
- // Figure out which conditional is saturating the lower/upper bound.
- const SDValue *LowerCheckOp =
- isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
- ? &Op
- : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
- ? &Op2
- : nullptr;
- const SDValue *UpperCheckOp =
- isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
- ? &Op
- : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
- ? &Op2
- : nullptr;
-
- if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
- return false;
+ // Check that the registers and the constants match a max(min()) or min(max())
+ // pattern
+ if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
+ K2 != FalseVal2 ||
+ !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
+ return SDValue();
// Check that the constant in the lower-bound check is
// the opposite of the constant in the upper-bound check
// in 1's complement.
- int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
- int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
+ if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
+ return SDValue();
+
+ int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
+ int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
int64_t PosVal = std::max(Val1, Val2);
int64_t NegVal = std::min(Val1, Val2);
- if (((Val1 > Val2 && UpperCheckOp == &Op) ||
- (Val1 < Val2 && UpperCheckOp == &Op2)) &&
- isPowerOf2_64(PosVal + 1)) {
-
- // Handle the difference between USAT (unsigned) and SSAT (signed) saturation
- if (Val1 == ~Val2)
- usat = false;
- else if (NegVal == 0)
- usat = true;
- else
- return false;
-
- V = V2;
- K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
+ if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
+ !isPowerOf2_64(PosVal + 1))
+ return SDValue();
- return true;
- }
+ // Handle the difference between USAT (unsigned) and SSAT (signed)
+ // saturation
+ // At this point, PosVal is guaranteed to be positive
+ uint64_t K = PosVal;
+ SDLoc dl(Op);
+ if (Val1 == ~Val2)
+ return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
+ DAG.getConstant(countTrailingOnes(K), dl, VT));
+ if (NegVal == 0)
+ return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
+ DAG.getConstant(countTrailingOnes(K), dl, VT));
- return false;
+ return SDValue();
}
// Check if a condition of the type x < k ? k : x can be converted into a
@@ -5166,18 +5156,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
// Try to convert two saturating conditional selects into a single SSAT
- SDValue SatValue;
- uint64_t SatConstant;
- bool SatUSat;
- if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
- isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) {
- if (SatUSat)
- return DAG.getNode(ARMISD::USAT, dl, VT, SatValue,
- DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
- else
- return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
- DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
- }
+ if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
+ if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
+ return SatValue;
// Try to convert expressions of the form x < k ? k : x (and similar forms)
// into more efficient bit operations, which is possible when k is 0 or -1
@@ -5186,6 +5167,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// instructions.
// Only allow this transformation on full-width (32-bit) operations
SDValue LowerSatConstant;
+ SDValue SatValue;
if (VT == MVT::i32 &&
isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
@@ -7769,17 +7751,19 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();
- if (SrcVT.getSizeInBits() == VT.getSizeInBits())
+ uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ if (SrcVTSize == VTSize)
continue;
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
- unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+ unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
- if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
- if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
+ if (SrcVTSize < VTSize) {
+ if (2 * SrcVTSize != VTSize)
return SDValue();
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
@@ -7789,7 +7773,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
continue;
}
- if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
+ if (SrcVTSize != 2 * VTSize)
return SDValue();
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
@@ -7857,7 +7841,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
- int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
+ int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
@@ -8659,6 +8643,23 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
}
+// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
+static SDValue LowerTruncatei1(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ assert(ST->hasMVEIntegerOps() && "Expected MVE!");
+ EVT VT = N.getValueType();
+ assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
+ "Expected a vector i1 type!");
+ SDValue Op = N.getOperand(0);
+ EVT FromVT = Op.getValueType();
+ SDLoc DL(N);
+
+ SDValue And =
+ DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
+ DAG.getCondCode(ISD::SETNE));
+}
+
/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
/// element has been zero/sign-extended, depending on the isSigned parameter,
/// from an integer type half its size.
@@ -8723,10 +8724,11 @@ static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
return false;
}
-/// isZeroExtended - Check if a node is a vector value that is zero-extended
-/// or a constant BUILD_VECTOR with zero-extended elements.
+/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
+/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+ if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
+ ISD::isZEXTLoad(N))
return true;
if (isExtendedBUILD_VECTOR(N, DAG, false))
return true;
@@ -8794,13 +8796,14 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
}
/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
-/// extending load, or BUILD_VECTOR with extended elements, return the
-/// unextended value. The unextended vector should be 64 bits so that it can
+/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
+/// the unextended value. The unextended vector should be 64 bits so that it can
/// be used as an operand to a VMULL instruction. If the original vector size
/// before extension is less than 64 bits we add a an extension to resize
/// the vector to 64 bits.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ if (N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
N->getOperand(0)->getValueType(0),
N->getValueType(0),
@@ -9768,6 +9771,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
+ case ISD::TRUNCATE: return LowerTruncatei1(Op, DAG, Subtarget);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV:
@@ -10400,8 +10404,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
- SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
- BB->succ_end());
+ SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
while (!Successors.empty()) {
MachineBasicBlock *SMBB = Successors.pop_back_val();
if (SMBB->isEHPad()) {
@@ -10885,7 +10888,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
.addExternalSymbol("__chkstk");
- BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
+ BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
.add(predOps(ARMCC::AL))
.addReg(Reg, RegState::Kill)
.addReg(ARM::R4, RegState::Implicit | RegState::Kill)
@@ -11264,6 +11267,14 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return EmitLowered__chkstk(MI, BB);
case ARM::WIN__DBZCHK:
return EmitLowered__dbzchk(MI, BB);
+ case ARM::t2DoLoopStart:
+ // We are just here to set a register allocation hint, prefering lr for the
+ // input register to make it more likely to be movable and removable, later
+ // in the pipeline.
+ Register R = MI.getOperand(1).getReg();
+ MachineFunction *MF = MI.getParent()->getParent();
+ MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
+ return BB;
}
}
@@ -12105,9 +12116,198 @@ static SDValue PerformAddeSubeCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasMVEIntegerOps())
+ return SDValue();
+
+ SDLoc dl(N);
+ SDValue SetCC;
+ SDValue LHS;
+ SDValue RHS;
+ ISD::CondCode CC;
+ SDValue TrueVal;
+ SDValue FalseVal;
+
+ if (N->getOpcode() == ISD::SELECT &&
+ N->getOperand(0)->getOpcode() == ISD::SETCC) {
+ SetCC = N->getOperand(0);
+ LHS = SetCC->getOperand(0);
+ RHS = SetCC->getOperand(1);
+ CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ TrueVal = N->getOperand(1);
+ FalseVal = N->getOperand(2);
+ } else if (N->getOpcode() == ISD::SELECT_CC) {
+ LHS = N->getOperand(0);
+ RHS = N->getOperand(1);
+ CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ TrueVal = N->getOperand(2);
+ FalseVal = N->getOperand(3);
+ } else {
+ return SDValue();
+ }
+
+ unsigned int Opcode = 0;
+ if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
+ FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
+ (CC == ISD::SETULT || CC == ISD::SETUGT)) {
+ Opcode = ARMISD::VMINVu;
+ if (CC == ISD::SETUGT)
+ std::swap(TrueVal, FalseVal);
+ } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
+ FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
+ (CC == ISD::SETLT || CC == ISD::SETGT)) {
+ Opcode = ARMISD::VMINVs;
+ if (CC == ISD::SETGT)
+ std::swap(TrueVal, FalseVal);
+ } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
+ FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
+ (CC == ISD::SETUGT || CC == ISD::SETULT)) {
+ Opcode = ARMISD::VMAXVu;
+ if (CC == ISD::SETULT)
+ std::swap(TrueVal, FalseVal);
+ } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
+ FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
+ (CC == ISD::SETGT || CC == ISD::SETLT)) {
+ Opcode = ARMISD::VMAXVs;
+ if (CC == ISD::SETLT)
+ std::swap(TrueVal, FalseVal);
+ } else
+ return SDValue();
+
+ // Normalise to the right hand side being the vector reduction
+ switch (TrueVal->getOpcode()) {
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_SMAX:
+ std::swap(LHS, RHS);
+ std::swap(TrueVal, FalseVal);
+ break;
+ }
+
+ EVT VectorType = FalseVal->getOperand(0).getValueType();
+
+ if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
+ VectorType != MVT::v4i32)
+ return SDValue();
+
+ EVT VectorScalarType = VectorType.getVectorElementType();
+
+ // The values being selected must also be the ones being compared
+ if (TrueVal != LHS || FalseVal != RHS)
+ return SDValue();
+
+ EVT LeftType = LHS->getValueType(0);
+ EVT RightType = RHS->getValueType(0);
+
+ // The types must match the reduced type too
+ if (LeftType != VectorScalarType || RightType != VectorScalarType)
+ return SDValue();
+
+ // Legalise the scalar to an i32
+ if (VectorScalarType != MVT::i32)
+ LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+ // Generate the reduction as an i32 for legalisation purposes
+ auto Reduction =
+ DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
+
+ // The result isn't actually an i32 so truncate it back to its original type
+ if (VectorScalarType != MVT::i32)
+ Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
+
+ return Reduction;
+}
+
+// A special combine for the vqdmulh family of instructions. This is one of the
+// potential set of patterns that could patch this instruction. The base pattern
+// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
+// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
+// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
+// the max is unnecessary.
+static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue Shft;
+ ConstantSDNode *Clamp;
+
+ if (N->getOpcode() == ISD::SMIN) {
+ Shft = N->getOperand(0);
+ Clamp = isConstOrConstSplat(N->getOperand(1));
+ } else if (N->getOpcode() == ISD::VSELECT) {
+ // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
+ SDValue Cmp = N->getOperand(0);
+ if (Cmp.getOpcode() != ISD::SETCC ||
+ cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
+ Cmp.getOperand(0) != N->getOperand(1) ||
+ Cmp.getOperand(1) != N->getOperand(2))
+ return SDValue();
+ Shft = N->getOperand(1);
+ Clamp = isConstOrConstSplat(N->getOperand(2));
+ } else
+ return SDValue();
+
+ if (!Clamp)
+ return SDValue();
+
+ MVT ScalarType;
+ int ShftAmt = 0;
+ switch (Clamp->getSExtValue()) {
+ case (1 << 7) - 1:
+ ScalarType = MVT::i8;
+ ShftAmt = 7;
+ break;
+ case (1 << 15) - 1:
+ ScalarType = MVT::i16;
+ ShftAmt = 15;
+ break;
+ case (1ULL << 31) - 1:
+ ScalarType = MVT::i32;
+ ShftAmt = 31;
+ break;
+ default:
+ return SDValue();
+ }
+
+ if (Shft.getOpcode() != ISD::SRA)
+ return SDValue();
+ ConstantSDNode *N1 = isConstOrConstSplat(Shft.getOperand(1));
+ if (!N1 || N1->getSExtValue() != ShftAmt)
+ return SDValue();
+
+ SDValue Mul = Shft.getOperand(0);
+ if (Mul.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ SDValue Ext0 = Mul.getOperand(0);
+ SDValue Ext1 = Mul.getOperand(1);
+ if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
+ Ext1.getOpcode() != ISD::SIGN_EXTEND)
+ return SDValue();
+ EVT VecVT = Ext0.getOperand(0).getValueType();
+ if (VecVT != MVT::v4i32 && VecVT != MVT::v8i16 && VecVT != MVT::v16i8)
+ return SDValue();
+ if (Ext1.getOperand(0).getValueType() != VecVT ||
+ VecVT.getScalarType() != ScalarType ||
+ VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
+ return SDValue();
+
+ SDLoc DL(Mul);
+ SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, VecVT, Ext0.getOperand(0),
+ Ext1.getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, VQDMULH);
+}
+
static SDValue PerformVSELECTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasMVEIntegerOps())
+ return SDValue();
+
+ if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
+ return V;
+
// Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
//
// We need to re-implement this optimization here as the implementation in the
@@ -12117,9 +12317,6 @@ static SDValue PerformVSELECTCombine(SDNode *N,
//
// Currently, this is only done for MVE, as it's the only target that benefits
// from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
- if (!Subtarget->hasMVEIntegerOps())
- return SDValue();
-
if (N->getOperand(0).getOpcode() != ISD::XOR)
return SDValue();
SDValue XOR = N->getOperand(0);
@@ -12260,6 +12457,14 @@ static SDValue PerformADDVecReduce(SDNode *N,
return M;
if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
+ return M;
return SDValue();
}
@@ -13154,7 +13359,7 @@ static SDValue PerformORCombine(SDNode *N,
// Canonicalize the vector type to make instruction selection
// simpler.
EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
- SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
N0->getOperand(1),
N0->getOperand(0),
N1->getOperand(0));
@@ -13465,6 +13670,12 @@ static SDValue PerformVMOVrhCombine(SDNode *N,
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // fold (VMOVrh (fpconst x)) -> const x
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
+ APFloat V = C->getValueAPF();
+ return DCI.DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
+ }
+
// fold (VMOVrh (load x)) -> (zextload (i16*)x)
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -13639,6 +13850,23 @@ PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
}
+ // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
+ // more VPNOT which might get folded as else predicates.
+ if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
+ SDValue X =
+ DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
+ SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
+ DCI.DAG.getConstant(65535, dl, MVT::i32));
+ return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
+ }
+
+ // Only the bottom 16 bits of the source register are used.
+ if (Op.getValueType() == MVT::i32) {
+ APInt DemandedMask = APInt::getLowBitsSet(32, 16);
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
+ return SDValue(N, 0);
+ }
return SDValue();
}
@@ -13851,10 +14079,13 @@ static SDValue CombineBaseUpdate(SDNode *N,
NumVecs = 3; break;
case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
NumVecs = 4; break;
+ case Intrinsic::arm_neon_vld1x2:
+ case Intrinsic::arm_neon_vld1x3:
+ case Intrinsic::arm_neon_vld1x4:
case Intrinsic::arm_neon_vld2dup:
case Intrinsic::arm_neon_vld3dup:
case Intrinsic::arm_neon_vld4dup:
- // TODO: Support updating VLDxDUP nodes. For now, we just skip
+ // TODO: Support updating VLD1x and VLDxDUP nodes. For now, we just skip
// combining base updates for such intrinsics.
continue;
case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
@@ -14446,27 +14677,38 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
// use the VMOVN over splitting the store. We are looking for patterns of:
// !rev: 0 N 1 N+1 2 N+2 ...
// rev: N 0 N+1 1 N+2 2 ...
- auto isVMOVNOriginalMask = [&](ArrayRef<int> M, bool rev) {
+ // The shuffle may either be a single source (in which case N = NumElts/2) or
+ // two inputs extended with concat to the same size (in which case N =
+ // NumElts).
+ auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
+ ArrayRef<int> M = SVN->getMask();
unsigned NumElts = ToVT.getVectorNumElements();
- if (NumElts != M.size())
- return false;
+ if (SVN->getOperand(1).isUndef())
+ NumElts /= 2;
- unsigned Off0 = rev ? NumElts : 0;
- unsigned Off1 = rev ? 0 : NumElts;
+ unsigned Off0 = Rev ? NumElts : 0;
+ unsigned Off1 = Rev ? 0 : NumElts;
- for (unsigned i = 0; i < NumElts; i += 2) {
- if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
+ for (unsigned I = 0; I < NumElts; I += 2) {
+ if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
return false;
- if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
+ if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
return false;
}
return true;
};
- if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc->getOperand(0)))
- if (isVMOVNOriginalMask(Shuffle->getMask(), false) ||
- isVMOVNOriginalMask(Shuffle->getMask(), true))
+ // It may be preferable to keep the store unsplit as the trunc may end up
+ // being removed. Check that here.
+ if (Trunc.getOperand(0).getOpcode() == ISD::SMIN) {
+ if (SDValue U = PerformVQDMULHCombine(Trunc.getOperand(0).getNode(), DAG)) {
+ DAG.ReplaceAllUsesWith(Trunc.getOperand(0), U);
+ return SDValue();
+ }
+ }
+ if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
+ if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
return SDValue();
LLVMContext &C = *DAG.getContext();
@@ -14487,7 +14729,8 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
SmallVector<SDValue, 4> Stores;
for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
- SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset);
+ SDValue NewPtr =
+ DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
SDValue Extract =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
@@ -14540,15 +14783,15 @@ static SDValue PerformSTORECombine(SDNode *N,
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(
St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
- BasePtr, St->getPointerInfo(), St->getAlignment(),
+ BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
return DAG.getStore(NewST1.getValue(0), DL,
StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
- OffsetPtr, St->getPointerInfo(),
- std::min(4U, St->getAlignment() / 2),
+ OffsetPtr, St->getPointerInfo().getWithOffset(4),
+ St->getOriginalAlign(),
St->getMemOperand()->getFlags());
}
@@ -14722,27 +14965,105 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
// VADDLV u/s 32
// VMLALV u/s 16/32
+ // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
+ // extend it and use v4i32 instead.
+ auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
+ EVT AVT = A.getValueType();
+ if (!AVT.is128BitVector())
+ A = DAG.getNode(ExtendCode, dl,
+ AVT.changeVectorElementType(MVT::getIntegerVT(
+ 128 / AVT.getVectorMinNumElements())),
+ A);
+ return A;
+ };
auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
return SDValue();
SDValue A = N0->getOperand(0);
if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
- return A;
+ return ExtendIfNeeded(A, ExtendCode);
+ return SDValue();
+ };
+ auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
+ ArrayRef<MVT> ExtTypes, SDValue &Mask) {
+ if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
+ !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
+ return SDValue();
+ Mask = N0->getOperand(0);
+ SDValue Ext = N0->getOperand(1);
+ if (Ext->getOpcode() != ExtendCode)
+ return SDValue();
+ SDValue A = Ext->getOperand(0);
+ if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
+ return ExtendIfNeeded(A, ExtendCode);
return SDValue();
};
auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
SDValue &A, SDValue &B) {
- if (ResVT != RetTy || N0->getOpcode() != ISD::MUL)
+ // For a vmla we are trying to match a larger pattern:
+ // ExtA = sext/zext A
+ // ExtB = sext/zext B
+ // Mul = mul ExtA, ExtB
+ // vecreduce.add Mul
+ // There might also be en extra extend between the mul and the addreduce, so
+ // long as the bitwidth is high enough to make them equivalent (for example
+ // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
+ if (ResVT != RetTy)
return false;
- SDValue ExtA = N0->getOperand(0);
- SDValue ExtB = N0->getOperand(1);
+ SDValue Mul = N0;
+ if (Mul->getOpcode() == ExtendCode &&
+ Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
+ ResVT.getScalarSizeInBits())
+ Mul = Mul->getOperand(0);
+ if (Mul->getOpcode() != ISD::MUL)
+ return false;
+ SDValue ExtA = Mul->getOperand(0);
+ SDValue ExtB = Mul->getOperand(1);
if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
return false;
A = ExtA->getOperand(0);
B = ExtB->getOperand(0);
if (A.getValueType() == B.getValueType() &&
- llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
+ llvm::any_of(ExtTypes,
+ [&A](MVT Ty) { return A.getValueType() == Ty; })) {
+ A = ExtendIfNeeded(A, ExtendCode);
+ B = ExtendIfNeeded(B, ExtendCode);
return true;
+ }
+ return false;
+ };
+ auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
+ SDValue &A, SDValue &B, SDValue &Mask) {
+ // Same as the pattern above with a select for the zero predicated lanes
+ // ExtA = sext/zext A
+ // ExtB = sext/zext B
+ // Mul = mul ExtA, ExtB
+ // N0 = select Mask, Mul, 0
+ // vecreduce.add N0
+ if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
+ !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
+ return false;
+ Mask = N0->getOperand(0);
+ SDValue Mul = N0->getOperand(1);
+ if (Mul->getOpcode() == ExtendCode &&
+ Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
+ ResVT.getScalarSizeInBits())
+ Mul = Mul->getOperand(0);
+ if (Mul->getOpcode() != ISD::MUL)
+ return false;
+ SDValue ExtA = Mul->getOperand(0);
+ SDValue ExtB = Mul->getOperand(1);
+ if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
+ return false;
+ A = ExtA->getOperand(0);
+ B = ExtB->getOperand(0);
+ if (A.getValueType() == B.getValueType() &&
+ llvm::any_of(ExtTypes,
+ [&A](MVT Ty) { return A.getValueType() == Ty; })) {
+ A = ExtendIfNeeded(A, ExtendCode);
+ B = ExtendIfNeeded(B, ExtendCode);
+ return true;
+ }
return false;
};
auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
@@ -14755,20 +15076,93 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
- if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
+ if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND,
+ {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
return Create64bitNode(ARMISD::VADDLVs, {A});
- if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
+ if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND,
+ {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
return Create64bitNode(ARMISD::VADDLVu, {A});
+ if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
+ if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
+
+ SDValue Mask;
+ if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
+ return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
+ if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
+ return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
+ if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND,
+ {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
+ return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
+ if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND,
+ {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
+ return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
+ if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
+ if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
SDValue A, B;
if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
- if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B))
+ if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND,
+ {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
return Create64bitNode(ARMISD::VMLALVs, {A, B});
- if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B))
+ if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND,
+ {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
return Create64bitNode(ARMISD::VMLALVu, {A, B});
+ if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
+ if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
+
+ if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
+ return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
+ if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
+ return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
+ if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND,
+ {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
+ B, Mask))
+ return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
+ if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND,
+ {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
+ B, Mask))
+ return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
+ if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
+ if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
+
+ // Some complications. We can get a case where the two inputs of the mul are
+ // the same, then the output sext will have been helpfully converted to a
+ // zext. Turn it back.
+ SDValue Op = N0;
+ if (Op->getOpcode() == ISD::VSELECT)
+ Op = Op->getOperand(1);
+ if (Op->getOpcode() == ISD::ZERO_EXTEND &&
+ Op->getOperand(0)->getOpcode() == ISD::MUL) {
+ SDValue Mul = Op->getOperand(0);
+ if (Mul->getOperand(0) == Mul->getOperand(1) &&
+ Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
+ if (Op != N0)
+ Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
+ N0->getOperand(0), Ext, N0->getOperand(2));
+ return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
+ }
+ }
+
return SDValue();
}
@@ -15220,12 +15614,13 @@ static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
SmallVector<SDValue, 4> Chains;
for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
- SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset);
+ SDValue NewPtr =
+ DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
SDValue NewLoad =
DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
- Alignment.value(), MMOFlags, AAInfo);
+ Alignment, MMOFlags, AAInfo);
Loads.push_back(NewLoad);
Chains.push_back(SDValue(NewLoad.getNode(), 1));
}
@@ -15313,6 +15708,9 @@ static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
if (!ST->hasMVEIntegerOps())
return SDValue();
+ if (SDValue V = PerformVQDMULHCombine(N, DAG))
+ return V;
+
if (VT != MVT::v4i32 && VT != MVT::v8i16)
return SDValue();
@@ -15920,6 +16318,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
+ case ISD::SELECT_CC:
+ case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
@@ -16336,6 +16736,19 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
return !IsFMS(I);
+ case Intrinsic::arm_mve_add_predicated:
+ case Intrinsic::arm_mve_mul_predicated:
+ case Intrinsic::arm_mve_qadd_predicated:
+ case Intrinsic::arm_mve_hadd_predicated:
+ case Intrinsic::arm_mve_vqdmull_predicated:
+ case Intrinsic::arm_mve_qdmulh_predicated:
+ case Intrinsic::arm_mve_qrdmulh_predicated:
+ case Intrinsic::arm_mve_fma_predicated:
+ return true;
+ case Intrinsic::arm_mve_sub_predicated:
+ case Intrinsic::arm_mve_qsub_predicated:
+ case Intrinsic::arm_mve_hsub_predicated:
+ return Operand == 1;
default:
return false;
}
@@ -17064,8 +17477,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
return;
KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
- Known.Zero &= KnownRHS.Zero;
- Known.One &= KnownRHS.One;
+ Known = KnownBits::commonBits(Known, KnownRHS);
return;
}
case ISD::INTRINSIC_W_CHAIN: {
@@ -17938,6 +18350,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false;
if (VT == MVT::f16 && Subtarget->hasFullFP16())
return ARM_AM::getFP16Imm(Imm) != -1;
+ if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
+ ARM_AM::getFP32FP16Imm(Imm) != -1)
+ return true;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
if (VT == MVT::f64 && Subtarget->hasFP64())
@@ -18247,6 +18662,8 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
: AtomicExpansionKind::None;
}
+// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
+// bits, and up to 64 bits on the non-M profiles.
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
@@ -18254,9 +18671,11 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
+ unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
bool HasAtomicCmpXchg =
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
- if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
+ if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
+ Size <= (Subtarget->isMClass() ? 32U : 64U))
return AtomicExpansionKind::LLSC;
return AtomicExpansionKind::None;
}
@@ -18711,8 +19130,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SmallVector<Value *, 6> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
- for (auto S : Shuffles)
- Ops.push_back(S);
+ append_range(Ops, Shuffles);
Ops.push_back(Builder.getInt32(SI->getAlignment()));
Builder.CreateCall(VstNFunc, Ops);
} else {
@@ -18728,8 +19146,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SmallVector<Value *, 6> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
- for (auto S : Shuffles)
- Ops.push_back(S);
+ append_range(Ops, Shuffles);
for (unsigned F = 0; F < Factor; F++) {
Ops.push_back(Builder.getInt32(F));
Builder.CreateCall(VstNFunc, Ops);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
index 8b1f4183032e..61a127af07de 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -216,23 +216,37 @@ class VectorType;
VMULLs, // ...signed
VMULLu, // ...unsigned
+ VQDMULH, // MVE vqdmulh instruction
+
// MVE reductions
VADDVs, // sign- or zero-extend the elements of a vector to i32,
VADDVu, // add them all together, and return an i32 of their sum
+ VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask
+ VADDVpu,
VADDLVs, // sign- or zero-extend elements to i64 and sum, returning
VADDLVu, // the low and high 32-bit halves of the sum
- VADDLVAs, // same as VADDLV[su] but also add an input accumulator
+ VADDLVAs, // Same as VADDLV[su] but also add an input accumulator
VADDLVAu, // provided as low and high halves
- VADDLVps, // same as VADDLVs but with a v4i1 predicate mask
- VADDLVpu, // same as VADDLVu but with a v4i1 predicate mask
- VADDLVAps, // same as VADDLVps but with a v4i1 predicate mask
- VADDLVApu, // same as VADDLVpu but with a v4i1 predicate mask
- VMLAVs,
- VMLAVu,
- VMLALVs,
- VMLALVu,
- VMLALVAs,
- VMLALVAu,
+ VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask
+ VADDLVpu,
+ VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask
+ VADDLVApu,
+ VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply them
+ VMLAVu, // and add the results together, returning an i32 of their sum
+ VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask
+ VMLAVpu,
+ VMLALVs, // Same as VMLAV but with i64, returning the low and
+ VMLALVu, // high 32-bit halves of the sum
+ VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
+ VMLALVpu,
+ VMLALVAs, // Same as VMLALV but also add an input accumulator
+ VMLALVAu, // provided as low and high halves
+ VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
+ VMLALVApu,
+ VMINVu, // Find minimum unsigned value of a vector and register
+ VMINVs, // Find minimum signed value of a vector and register
+ VMAXVu, // Find maximum unsigned value of a vector and register
+ VMAXVs, // Find maximum signed value of a vector and register
SMULWB, // Signed multiply word by half word, bottom
SMULWT, // Signed multiply word by half word, top
@@ -271,8 +285,8 @@ class VectorType;
// Vector AND with NOT of immediate
VBICIMM,
- // Vector bitwise select
- VBSL,
+ // Pseudo vector bitwise select
+ VBSP,
// Pseudo-instruction representing a memory copy using ldm/stm
// instructions.
@@ -520,12 +534,6 @@ class VectorType;
const TargetRegisterClass *
getRegClassFor(MVT VT, bool isDivergent = false) const override;
- /// Returns true if a cast between SrcAS and DestAS is a noop.
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
- // Addrspacecasts are always noops.
- return true;
- }
-
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
unsigned &PrefAlign) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td
index e13f3437cc7b..85da7c5a535e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -403,8 +403,9 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
bit isUnaryDataProc = 0;
bit canXformTo16Bit = 0;
// The instruction is a 16-bit flag setting Thumb instruction. Used
- // by the parser to determine whether to require the 'S' suffix on the
- // mnemonic (when not in an IT block) or preclude it (when in an IT block).
+ // by the parser and if-converter to determine whether to require the 'S'
+ // suffix on the mnemonic (when not in an IT block) or preclude it (when
+ // in an IT block).
bit thumbArithFlagSetting = 0;
bit validForTailPredication = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
index da0a836c8f95..8dcb319923ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -104,11 +104,6 @@ def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>,
SDTCisInt<0>,
SDTCisInt<4>]>;
-// TODO Add another operand for 'Size' so that we can re-use this node when we
-// start supporting *TP versions.
-def SDT_ARMLoLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>,
- SDTCisVT<1, OtherVT>]>;
-
def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
@@ -167,9 +162,9 @@ def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
def ARMsubs : SDNode<"ARMISD::SUBS", SDTIntBinOp, [SDNPOutGlue]>;
-def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
+def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
-def ARMusatnoshift : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
+def ARMusat : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
@@ -303,10 +298,6 @@ def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>;
def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>;
def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>;
-def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>;
-def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>;
-def ARMLoopDec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>;
-
// 'VECTOR_REG_CAST' is an operation that reinterprets the contents of a
// vector register as a different vector type, without changing the contents of
// the register. It differs from 'bitconvert' in that bitconvert reinterprets
@@ -380,6 +371,11 @@ def imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
+// asr_imm_XFORM - Returns a shift immediate with bit {5} set to 1
+def asr_imm_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(0x20 | N->getZExtValue(), SDLoc(N), MVT:: i32);
+}]>;
+
/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
def imm16_31 : ImmLeaf<i32, [{
return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
@@ -446,6 +442,8 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
+def asr_imm : ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }], asr_imm_XFORM>;
+
//===----------------------------------------------------------------------===//
// NEON/MVE pattern fragments
//
@@ -498,6 +496,18 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
}]>;
+def ARMimmAllZerosV: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 0))))>;
+def ARMimmAllZerosD: PatLeaf<(bitconvert (v2i32 (ARMvmovImm (i32 0))))>;
+def ARMimmAllOnesV: PatLeaf<(bitconvert (v16i8 (ARMvmovImm (i32 0xEFF))))>;
+def ARMimmAllOnesD: PatLeaf<(bitconvert (v8i8 (ARMvmovImm (i32 0xEFF))))>;
+
+def ARMimmOneV: PatLeaf<(ARMvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == N->getValueType(0).getScalarSizeInBits() && EltVal == 0x01);
+}]>;
+
//===----------------------------------------------------------------------===//
// Operand Definitions.
@@ -812,7 +822,9 @@ def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
def arm_i32imm : IntImmLeaf<i32, [{
if (Subtarget->useMovt())
return true;
- return ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue());
+ if (ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue()))
+ return true;
+ return ARM_AM::isSOImmTwoPartValNeg(Imm.getZExtValue());
}]>;
/// imm0_1 predicate - Immediate in the range [0,1].
@@ -2480,23 +2492,29 @@ let isCall = 1,
}
// ARMv5T and above
- def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm,
- IIC_Br, "blx\t$func",
- [(ARMcall GPR:$func)]>,
+ def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", []>,
Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
bits<4> func;
let Inst{31-4} = 0b1110000100101111111111110011;
let Inst{3-0} = func;
}
+ def BLX_noip : ARMPseudoExpand<(outs), (ins GPRnoip:$func),
+ 4, IIC_Br, [], (BLX GPR:$func)>,
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>;
+
def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm,
- IIC_Br, "blx", "\t$func",
- [(ARMcall_pred GPR:$func)]>,
+ IIC_Br, "blx", "\t$func", []>,
Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
bits<4> func;
let Inst{27-4} = 0b000100101111111111110011;
let Inst{3-0} = func;
}
+ def BLX_pred_noip : ARMPseudoExpand<(outs), (ins GPRnoip:$func),
+ 4, IIC_Br, [],
+ (BLX_pred GPR:$func, (ops 14, zero_reg))>,
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>;
+
// ARMv4T
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
@@ -2522,6 +2540,16 @@ let isCall = 1,
Requires<[IsARM]>, Sched<[WriteBr]>;
}
+def : ARMPat<(ARMcall GPR:$func), (BLX $func)>,
+ Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>;
+def : ARMPat<(ARMcall GPRnoip:$func), (BLX_noip $func)>,
+ Requires<[IsARM, HasV5T, SLSBLRMitigation]>;
+def : ARMPat<(ARMcall_pred GPR:$func), (BLX_pred $func)>,
+ Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>;
+def : ARMPat<(ARMcall_pred GPRnoip:$func), (BLX_pred_noip $func)>,
+ Requires<[IsARM, HasV5T, SLSBLRMitigation]>;
+
+
let isBranch = 1, isTerminator = 1 in {
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
// a two-value operand where a dag node expects two operands. :(
@@ -4061,14 +4089,31 @@ def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos),
(SSAT imm1_32:$pos, GPRnopc:$a, 0)>;
def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
(USAT imm0_31:$pos, GPRnopc:$a, 0)>;
-def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
+def : ARMPat<(ARMssat GPRnopc:$Rn, imm0_31:$imm),
(SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
-def : ARMPat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm),
+def : ARMPat<(ARMusat GPRnopc:$Rn, imm0_31:$imm),
(USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos),
(SSAT16 imm1_16:$pos, GPRnopc:$a)>;
def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos),
(USAT16 imm0_15:$pos, GPRnopc:$a)>;
+def : ARMV6Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos),
+ (SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : ARMV6Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos),
+ (SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>;
+def : ARMV6Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : ARMV6Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>;
+def : ARMPat<(ARMssat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos),
+ (SSAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;
+def : ARMPat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
+ (SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
+def : ARMPat<(ARMusat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;
+def : ARMPat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
+
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -6336,6 +6381,15 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
NoItinerary,
[(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>;
+// SpeculationBarrierEndBB must only be used after an unconditional control
+// flow, i.e. after a terminator for which isBarrier is True.
+let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+ def SpeculationBarrierISBDSBEndBB
+ : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ def SpeculationBarrierSBEndBB
+ : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+}
+
//===----------------------------------
// Atomic cmpxchg for -O0
//===----------------------------------
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
index 2a1f50d97e3b..0dfea68887e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -318,6 +318,78 @@ def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, ?, 0b11, "f", ?>;
def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b11, "p", 0b0>;
def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b11, "p", 0b1>;
+multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
+ dag PredOperands, Instruction Inst,
+ SDPatternOperator IdentityVec = null_frag> {
+ // Unpredicated
+ def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated with select
+ if !ne(VTI.Size, 0b11) then {
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
+ (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ // Optionally with the select folded through the op
+ def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$Qn),
+ (VTI.Vec IdentityVec))))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$Qm)))>;
+ }
+
+ // Predicated with intrinsic
+ def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)),
+ PredOperands,
+ (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
+multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
+ dag PredOperands, Instruction Inst,
+ SDPatternOperator IdentityVec = null_frag> {
+ // Unpredicated
+ def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn))>;
+
+ // Predicated with select
+ if !ne(VTI.Size, 0b11) then {
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
+ (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$Rn)))),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ // Optionally with the select folded through the op
+ def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
+ (ARMvdup rGPR:$Rn),
+ (VTI.Vec IdentityVec))))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$Qm)))>;
+ }
+
+ // Predicated with intrinsic
+ def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn))),
+ PredOperands,
+ (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
// --------- Start of base classes for the instructions themselves
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
@@ -378,7 +450,7 @@ class MVE_ScalarShift<string iname, dag oops, dag iops, string asm, string cstr,
: MVE_MI_with_pred<oops, iops, NoItinerary, iname, asm, cstr, pattern> {
let Inst{31-20} = 0b111010100101;
let Inst{8} = 0b1;
-
+ let validForTailPredication=1;
}
class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
@@ -612,8 +684,13 @@ class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
let validForTailPredication = 1;
}
+def SDTVecReduceP : SDTypeProfile<1, 2, [ // VADDLVp
+ SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>
+]>;
def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>;
def ARMVADDVu : SDNode<"ARMISD::VADDVu", SDTVecReduce>;
+def ARMVADDVps : SDNode<"ARMISD::VADDVps", SDTVecReduceP>;
+def ARMVADDVpu : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>;
multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
def acc : MVE_VADDV<"vaddva", VTI.Suffix,
@@ -630,20 +707,39 @@ multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
if VTI.Unsigned then {
def : Pat<(i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
+ def : Pat<(i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$vec),
+ (VTI.Vec ARMimmAllZerosV))))),
+ (i32 (InstN $vec, ARMVCCThen, $pred))>;
def : Pat<(i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
+ def : Pat<(i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
+ (i32 (InstN $vec, ARMVCCThen, $pred))>;
def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$vec),
+ (VTI.Vec ARMimmAllZerosV))))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
def : Pat<(i32 (add (i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
+ def : Pat<(i32 (add (i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
} else {
def : Pat<(i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
def : Pat<(i32 (add (i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
+ def : Pat<(i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
+ (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
}
def : Pat<(i32 (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
@@ -848,6 +944,14 @@ multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;
}
+def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
+]>;
+def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>;
+def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>;
+def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>;
+def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>;
+
defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;
@@ -878,6 +982,32 @@ let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;
+ def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
+ (i32 (MVE_VMINVu8 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
+ (i32 (MVE_VMINVu16 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
+ (i32 (MVE_VMINVu32 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
+ (i32 (MVE_VMINVs8 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
+ (i32 (MVE_VMINVs16 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
+ (i32 (MVE_VMINVs32 $x, $src))>;
+
+ def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
+ (i32 (MVE_VMAXVu8 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
+ (i32 (MVE_VMAXVu16 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
+ (i32 (MVE_VMAXVu32 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
+ (i32 (MVE_VMAXVs8 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
+ (i32 (MVE_VMAXVs16 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
+ (i32 (MVE_VMAXVs32 $x, $src))>;
+
}
multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {
@@ -1009,12 +1139,28 @@ def SDTVecReduce2LA : SDTypeProfile<2, 4, [ // VMLALVA
SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
SDTCisVec<4>, SDTCisVec<5>
]>;
+def SDTVecReduce2P : SDTypeProfile<1, 3, [ // VMLAV
+ SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>
+]>;
+def SDTVecReduce2LP : SDTypeProfile<2, 3, [ // VMLALV
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>
+]>;
+def SDTVecReduce2LAP : SDTypeProfile<2, 5, [ // VMLALVA
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
+ SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6>
+]>;
def ARMVMLAVs : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>;
def ARMVMLAVu : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>;
def ARMVMLALVs : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>;
def ARMVMLALVu : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>;
-def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>;
-def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>;
+def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>;
+def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>;
+def ARMVMLAVps : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>;
+def ARMVMLAVpu : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>;
+def ARMVMLALVps : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>;
+def ARMVMLALVpu : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>;
+def ARMVMLALVAps : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>;
+def ARMVMLALVApu : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>;
let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
@@ -1033,22 +1179,68 @@ let Predicates = [HasMVEInt] in {
(i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(i32 (add (i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
- (i32 tGPREven:$src3))),
+ (i32 tGPREven:$src3))),
(i32 (MVE_VMLADAVau32 $src3, $src1, $src2))>;
def : Pat<(i32 (add (i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
- (i32 tGPREven:$src3))),
+ (i32 tGPREven:$src3))),
(i32 (MVE_VMLADAVau16 $src3, $src1, $src2))>;
def : Pat<(i32 (add (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(i32 (add (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(i32 (add (i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
- (i32 tGPREven:$src3))),
+ (i32 tGPREven:$src3))),
(i32 (MVE_VMLADAVau8 $src3, $src1, $src2))>;
def : Pat<(i32 (add (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(i32 (add (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+
+ // Predicated
+ def : Pat<(i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
+ (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
+ (v4i32 ARMimmAllZerosV)))),
+ (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
+ (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
+ (v8i16 ARMimmAllZerosV)))),
+ (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
+ (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
+ (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
+ (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
+ (v16i8 ARMimmAllZerosV)))),
+ (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
+ (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
+ (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+
+ def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
+ (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
+ (v4i32 ARMimmAllZerosV)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
+ (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
+ (v8i16 ARMimmAllZerosV)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
+ (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
+ (v16i8 ARMimmAllZerosV)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
}
// vmlav aliases vmladav
@@ -1168,6 +1360,25 @@ let Predicates = [HasMVEInt] in {
(MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
def : Pat<(ARMVMLALVAu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
(MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
+
+ // Predicated
+ def : Pat<(ARMVMLALVps (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
+ (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVpu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
+ (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
+ (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
+ (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+
+ def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
+ (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
+ (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
+ (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
+ (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
}
// vmlalv aliases vmlaldav
@@ -1215,7 +1426,7 @@ class MVE_comp<InstrItinClass itin, string iname, string suffix,
}
class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
- list<dag> pattern=[]>
+ list<dag> pattern=[]>
: MVE_comp<NoItinerary, iname, suffix, "", pattern> {
let Inst{28} = 0b1;
@@ -1231,46 +1442,19 @@ class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
let Predicates = [HasMVEFloat];
}
-def MVE_VMAXNMf32 : MVE_VMINMAXNM<"vmaxnm", "f32", 0b0, 0b0>;
-def MVE_VMAXNMf16 : MVE_VMINMAXNM<"vmaxnm", "f16", 0b1, 0b0>;
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (fmaxnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
- (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
- def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
- (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
- def : Pat<(v4f32 (int_arm_mve_max_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), (i32 0),
- (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
- (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
- ARMVCCThen, (v4i1 VCCR:$mask),
- (v4f32 MQPR:$inactive)))>;
- def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), (i32 0),
- (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
- (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
- ARMVCCThen, (v8i1 VCCR:$mask),
- (v8f16 MQPR:$inactive)))>;
-}
-
-def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
-def MVE_VMINNMf16 : MVE_VMINMAXNM<"vminnm", "f16", 0b1, 0b1>;
+multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> {
+ def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size{0}, bit_4>;
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (fminnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
- (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
- def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
- (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
- def : Pat<(v4f32 (int_arm_mve_min_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
- (i32 0), (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
- (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
- ARMVCCThen, (v4i1 VCCR:$mask),
- (v4f32 MQPR:$inactive)))>;
- def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
- (i32 0), (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
- (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
- ARMVCCThen, (v8i1 VCCR:$mask),
- (v8f16 MQPR:$inactive)))>;
+ let Predicates = [HasMVEFloat] in {
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 0)), !cast<Instruction>(NAME)>;
+ }
}
+defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>;
+defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>;
+defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>;
+defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>;
+
class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
bit bit_4, list<dag> pattern=[]>
@@ -1288,22 +1472,11 @@ class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
}
multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>;
- defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- // Unpredicated min/max
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-
- // Predicated min/max
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
}
}
@@ -1476,61 +1649,41 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f
(MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
}
-multiclass MVE_bit_op<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> {
- let Predicates = [HasMVEInt] in {
- // Unpredicated operation
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
- // Predicated operation
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (instruction
- (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
- }
-}
-
-defm : MVE_bit_op<MVE_v16i8, and, int_arm_mve_and_predicated, MVE_VAND>;
-defm : MVE_bit_op<MVE_v8i16, and, int_arm_mve_and_predicated, MVE_VAND>;
-defm : MVE_bit_op<MVE_v4i32, and, int_arm_mve_and_predicated, MVE_VAND>;
-defm : MVE_bit_op<MVE_v2i64, and, int_arm_mve_and_predicated, MVE_VAND>;
-
-defm : MVE_bit_op<MVE_v16i8, or, int_arm_mve_orr_predicated, MVE_VORR>;
-defm : MVE_bit_op<MVE_v8i16, or, int_arm_mve_orr_predicated, MVE_VORR>;
-defm : MVE_bit_op<MVE_v4i32, or, int_arm_mve_orr_predicated, MVE_VORR>;
-defm : MVE_bit_op<MVE_v2i64, or, int_arm_mve_orr_predicated, MVE_VORR>;
-
-defm : MVE_bit_op<MVE_v16i8, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
-defm : MVE_bit_op<MVE_v8i16, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
-defm : MVE_bit_op<MVE_v4i32, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
-defm : MVE_bit_op<MVE_v2i64, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
-
-multiclass MVE_bit_op_with_inv<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> {
- let Predicates = [HasMVEInt] in {
- // Unpredicated operation
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (vnotq (VTI.Vec MQPR:$Qn)))),
- (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
- // Predicated operation
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (instruction
- (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
- }
+let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPattern<MVE_v16i8, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
+
+ defm : MVE_TwoOpPattern<MVE_v16i8, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
+
+ defm : MVE_TwoOpPattern<MVE_v16i8, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
+
+ defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
+
+ defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_orn_predicated, (? ), MVE_VORN>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_orn_predicated, (? ), MVE_VORN>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_orn_predicated, (? ), MVE_VORN>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_orn_predicated, (? ), MVE_VORN>;
}
-defm : MVE_bit_op_with_inv<MVE_v16i8, and, int_arm_mve_bic_predicated, MVE_VBIC>;
-defm : MVE_bit_op_with_inv<MVE_v8i16, and, int_arm_mve_bic_predicated, MVE_VBIC>;
-defm : MVE_bit_op_with_inv<MVE_v4i32, and, int_arm_mve_bic_predicated, MVE_VBIC>;
-defm : MVE_bit_op_with_inv<MVE_v2i64, and, int_arm_mve_bic_predicated, MVE_VBIC>;
-
-defm : MVE_bit_op_with_inv<MVE_v16i8, or, int_arm_mve_orn_predicated, MVE_VORN>;
-defm : MVE_bit_op_with_inv<MVE_v8i16, or, int_arm_mve_orn_predicated, MVE_VORN>;
-defm : MVE_bit_op_with_inv<MVE_v4i32, or, int_arm_mve_orn_predicated, MVE_VORN>;
-defm : MVE_bit_op_with_inv<MVE_v2i64, or, int_arm_mve_orn_predicated, MVE_VORN>;
-
class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
: MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
@@ -1565,7 +1718,8 @@ multiclass MVE_bit_cmode_p<string iname, bit opcode,
defvar UnpredPat = (VTI.Vec (op (VTI.Vec MQPR:$src), timm:$simm));
let Predicates = [HasMVEInt] in {
- def : Pat<UnpredPat, (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>;
+ def : Pat<UnpredPat,
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
UnpredPat, (VTI.Vec MQPR:$src))),
(VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm,
@@ -1775,31 +1929,18 @@ class MVE_VMULt1<string iname, string suffix, bits<2> size,
let validForTailPredication = 1;
}
-multiclass MVE_VMUL_m<string iname, MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
- def "" : MVE_VMULt1<iname, VTI.Suffix, VTI.Size>;
- defvar Inst = !cast<Instruction>(NAME);
+multiclass MVE_VMUL_m<MVEVectorVTInfo VTI> {
+ def "" : MVE_VMULt1<"vmul", VTI.Suffix, VTI.Size>;
let Predicates = [HasMVEInt] in {
- // Unpredicated multiply
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-
- // Predicated multiply
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
+ defm : MVE_TwoOpPattern<VTI, mul, int_arm_mve_mul_predicated, (? ),
+ !cast<Instruction>(NAME), ARMimmOneV>;
}
}
-multiclass MVE_VMUL<MVEVectorVTInfo VTI>
- : MVE_VMUL_m<"vmul", VTI, mul, int_arm_mve_mul_predicated>;
-
-defm MVE_VMULi8 : MVE_VMUL<MVE_v16i8>;
-defm MVE_VMULi16 : MVE_VMUL<MVE_v8i16>;
-defm MVE_VMULi32 : MVE_VMUL<MVE_v4i32>;
+defm MVE_VMULi8 : MVE_VMUL_m<MVE_v16i8>;
+defm MVE_VMULi16 : MVE_VMUL_m<MVE_v8i16>;
+defm MVE_VMULi32 : MVE_VMUL_m<MVE_v4i32>;
class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
list<dag> pattern=[]>
@@ -1811,30 +1952,30 @@ class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
let Inst{12-8} = 0b01011;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
+def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>;
+
multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int,
+ SDNode Op, Intrinsic unpred_int, Intrinsic pred_int,
bit rounding> {
def "" : MVE_VQxDMULH_Base<iname, VTI.Suffix, VTI.Size, rounding>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- // Unpredicated multiply
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ defm : MVE_TwoOpPattern<VTI, Op, pred_int, (? ), Inst>;
- // Predicated multiply
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
+ // Extra unpredicated multiply intrinsic patterns
+ def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
}
}
multiclass MVE_VQxDMULH<string iname, MVEVectorVTInfo VTI, bit rounding>
- : MVE_VQxDMULH_m<iname, VTI, !if(rounding, int_arm_mve_vqrdmulh,
+ : MVE_VQxDMULH_m<iname, VTI, !if(rounding, null_frag,
+ MVEvqdmulh),
+ !if(rounding, int_arm_mve_vqrdmulh,
int_arm_mve_vqdmulh),
!if(rounding, int_arm_mve_qrdmulh_predicated,
int_arm_mve_qdmulh_predicated),
@@ -1862,21 +2003,12 @@ class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
}
multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VADDSUB<iname, VTI.Suffix, VTI.Size, subtract>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- // Unpredicated add/subtract
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-
- // Predicated add/subtract
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
}
}
@@ -1914,22 +2046,13 @@ class MVE_VQSUB_<string suffix, bit U, bits<2> size>
: MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size>;
multiclass MVE_VQADD_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VQADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- // Unpredicated saturating add
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-
- // Predicated saturating add
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
+ !cast<Instruction>(NAME)>;
}
}
@@ -1944,22 +2067,13 @@ defm MVE_VQADDu16 : MVE_VQADD<MVE_v8u16, uaddsat>;
defm MVE_VQADDu32 : MVE_VQADD<MVE_v4u32, uaddsat>;
multiclass MVE_VQSUB_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VQSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- // Unpredicated saturating subtract
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-
- // Predicated saturating subtract
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
+ !cast<Instruction>(NAME)>;
}
}
@@ -2085,30 +2199,32 @@ defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
// modelling that here with these patterns, but we're using no wrap forms of
// add to ensure that the extra bit of information is not needed for the
// arithmetic or the rounding.
-def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
- (v16i8 (ARMvmovImm (i32 3585)))),
- (i32 1))),
- (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
-def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
- (v8i16 (ARMvmovImm (i32 2049)))),
- (i32 1))),
- (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
-def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
- (v4i32 (ARMvmovImm (i32 1)))),
- (i32 1))),
- (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
-def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
- (v16i8 (ARMvmovImm (i32 3585)))),
- (i32 1))),
- (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
-def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
- (v8i16 (ARMvmovImm (i32 2049)))),
- (i32 1))),
- (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
-def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
- (v4i32 (ARMvmovImm (i32 1)))),
- (i32 1))),
- (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvmovImm (i32 3585)))),
+ (i32 1))),
+ (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvmovImm (i32 2049)))),
+ (i32 1))),
+ (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvmovImm (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvmovImm (i32 3585)))),
+ (i32 1))),
+ (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvmovImm (i32 2049)))),
+ (i32 1))),
+ (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvmovImm (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
+}
class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
@@ -2357,8 +2473,9 @@ multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
let Predicates = [HasMVEInt] in {
// VQABS and VQNEG have more difficult isel patterns defined elsewhere
- if !eq(saturate, 0) then {
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
+ if !not(saturate) then {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
+ (VTI.Vec (Inst $v))>;
}
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
@@ -2915,7 +3032,7 @@ multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
defvar outparams = (inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
(imm:$imm));
- def : Pat<(OutVTI.Vec !setop(inparams, int_arm_mve_vshrn)),
+ def : Pat<(OutVTI.Vec !setdagop(inparams, int_arm_mve_vshrn)),
(OutVTI.Vec outparams)>;
def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
(InVTI.Pred VCCR:$pred)))),
@@ -3117,7 +3234,7 @@ multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # name);
defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # name # "_predicated");
- def : Pat<(VTI.Vec !setop(inparams, unpred_int)),
+ def : Pat<(VTI.Vec !setdagop(inparams, unpred_int)),
(VTI.Vec outparams)>;
def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
(VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
@@ -3469,18 +3586,12 @@ class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
}
multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
}
}
@@ -3571,14 +3682,23 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
let Predicates = [HasMVEFloat] in {
if fms then {
- def : Pat<(VTI.Vec (fma (fneg m1), m2, add)), (Inst $add, $m1, $m2)>;
- def : Pat<(VTI.Vec (fma m1, (fneg m2), add)), (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
+ (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma (fneg m1), m2, add)),
+ add)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
} else {
- def : Pat<(VTI.Vec (fma m1, m2, add)), (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (fma m1, m2, add)),
+ (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma m1, m2, add)),
+ add)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)),
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
}
@@ -3591,20 +3711,14 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
let validForTailPredication = 1;
}
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
}
}
@@ -3706,7 +3820,14 @@ multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
: MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
-defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
+defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))),
+ (MVE_VABDf16 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v4f32 (fabs (fsub (v4f32 MQPR:$Qm), (v4f32 MQPR:$Qn)))),
+ (MVE_VABDf32 MQPR:$Qm, MQPR:$Qn)>;
+}
class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
Operand imm_operand_type>
@@ -3926,8 +4047,8 @@ multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
-
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
+ (VTI.Vec (Inst $v))>;
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
@@ -3962,6 +4083,8 @@ class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
let Inst{4} = 0b0;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b1;
+
+ let isCommutable = 1;
}
multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
@@ -4287,6 +4410,10 @@ let Predicates = [HasMVEInt] in {
// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
+def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
let Predicates = [HasMVEInt] in {
foreach VT = [ v4i1, v8i1, v16i1 ] in {
def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
@@ -4299,6 +4426,13 @@ let Predicates = [HasMVEInt] in {
(VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
}
+ // If we happen to be casting from a load we can convert that straight
+ // into a predicate load, so long as the load is of the correct type.
+ foreach VT = [ v4i1, v8i1, v16i1 ] in {
+ def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))),
+ (VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>;
+ }
+
// Here we match the specific SDNode type 'ARMVectorRegCastImpl'
// rather than the more general 'ARMVectorRegCast' which would also
// match some bitconverts. If we use the latter in cases where the
@@ -4307,7 +4441,8 @@ let Predicates = [HasMVEInt] in {
foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
- def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))), (VT MQPR:$src)>;
+ def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))),
+ (VT MQPR:$src)>;
}
// end of MVE compares
@@ -4635,7 +4770,7 @@ class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
let Inst{16} = 0b1;
let Inst{12} = T;
let Inst{8} = 0b0;
- let Inst{7} = !if(!eq(bit_17, 0), 1, 0);
+ let Inst{7} = !not(bit_17);
let Inst{0} = 0b1;
let validForTailPredication = 1;
let retainsPreviousHalfElement = 1;
@@ -4666,7 +4801,7 @@ multiclass MVE_VMOVN_p<Instruction Inst, bit top,
(VTI.Vec MQPR:$Qm), (i32 top))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
- if !eq(top, 0) then {
+ if !not(top) then {
// If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd
// lanes of a with the odd lanes of b. In other words, the lanes we're
// _keeping_ from a are the even ones. So we can flip it round and say that
@@ -5023,32 +5158,6 @@ multiclass MVE_vec_scalar_int_pat_m<Instruction inst, MVEVectorVTInfo VTI,
}
}
-// Patterns for vector-scalar instructions with FP operands
-multiclass MVE_vec_scalar_fp_pat_m<SDNode unpred_op, Intrinsic pred_int,
- Instruction instr_f16,
- Instruction instr_f32> {
- let Predicates = [HasMVEFloat] in {
- // Unpredicated F16
- def : Pat<(v8f16 (unpred_op (v8f16 MQPR:$Qm), (v8f16 (ARMvdup rGPR:$val)))),
- (v8f16 (instr_f16 (v8f16 MQPR:$Qm), (i32 rGPR:$val)))>;
- // Unpredicated F32
- def : Pat<(v4f32 (unpred_op (v4f32 MQPR:$Qm), (v4f32 (ARMvdup rGPR:$val)))),
- (v4f32 (instr_f32 (v4f32 MQPR:$Qm), (i32 rGPR:$val)))>;
- // Predicated F16
- def : Pat<(v8f16 (pred_int (v8f16 MQPR:$Qm), (v8f16 (ARMvdup rGPR:$val)),
- (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
- (v8f16 (instr_f16 (v8f16 MQPR:$Qm), (i32 rGPR:$val),
- ARMVCCThen, (v8i1 VCCR:$mask),
- (v8f16 MQPR:$inactive)))>;
- // Predicated F32
- def : Pat<(v4f32 (pred_int (v4f32 MQPR:$Qm), (v4f32 (ARMvdup rGPR:$val)),
- (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
- (v4f32 (instr_f32 (v4f32 MQPR:$Qm), (i32 rGPR:$val),
- ARMVCCThen, (v4i1 VCCR:$mask),
- (v4f32 MQPR:$inactive)))>;
- }
-}
-
class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
bit bit_5, bit bit_12, bit bit_16, bit bit_28>
: MVE_qDest_rSrc<iname, suffix, ""> {
@@ -5064,10 +5173,11 @@ class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
// Vector-scalar add/sub
multiclass MVE_VADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b0, subtract, 0b1, 0b0>;
- defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
- unpred_op, pred_int>;
+ let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
+ }
}
multiclass MVE_VADD_qr_m<MVEVectorVTInfo VTI>
@@ -5086,36 +5196,35 @@ defm MVE_VSUB_qr_i32 : MVE_VSUB_qr_m<MVE_v4i32>;
// Vector-scalar saturating add/sub
multiclass MVE_VQADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode unpred_op_s, SDNode unpred_op_u,
- Intrinsic pred_int> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b1, subtract,
0b0, VTI.Unsigned>;
- defvar unpred_op = !if(VTI.Unsigned, unpred_op_u, unpred_op_s);
- defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
- unpred_op, pred_int, 0, 1>;
+
+ let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
+ !cast<Instruction>(NAME)>;
+ }
}
-multiclass MVE_VQADD_qr_m<MVEVectorVTInfo VTI>
- : MVE_VQADDSUB_qr_m<"vqadd", VTI, 0b0, saddsat, uaddsat,
- int_arm_mve_qadd_predicated>;
+multiclass MVE_VQADD_qr_m<MVEVectorVTInfo VTI, SDNode Op>
+ : MVE_VQADDSUB_qr_m<"vqadd", VTI, 0b0, Op, int_arm_mve_qadd_predicated>;
-multiclass MVE_VQSUB_qr_m<MVEVectorVTInfo VTI>
- : MVE_VQADDSUB_qr_m<"vqsub", VTI, 0b1, ssubsat, usubsat,
- int_arm_mve_qsub_predicated>;
+multiclass MVE_VQSUB_qr_m<MVEVectorVTInfo VTI, SDNode Op>
+ : MVE_VQADDSUB_qr_m<"vqsub", VTI, 0b1, Op, int_arm_mve_qsub_predicated>;
-defm MVE_VQADD_qr_s8 : MVE_VQADD_qr_m<MVE_v16s8>;
-defm MVE_VQADD_qr_s16 : MVE_VQADD_qr_m<MVE_v8s16>;
-defm MVE_VQADD_qr_s32 : MVE_VQADD_qr_m<MVE_v4s32>;
-defm MVE_VQADD_qr_u8 : MVE_VQADD_qr_m<MVE_v16u8>;
-defm MVE_VQADD_qr_u16 : MVE_VQADD_qr_m<MVE_v8u16>;
-defm MVE_VQADD_qr_u32 : MVE_VQADD_qr_m<MVE_v4u32>;
+defm MVE_VQADD_qr_s8 : MVE_VQADD_qr_m<MVE_v16s8, saddsat>;
+defm MVE_VQADD_qr_s16 : MVE_VQADD_qr_m<MVE_v8s16, saddsat>;
+defm MVE_VQADD_qr_s32 : MVE_VQADD_qr_m<MVE_v4s32, saddsat>;
+defm MVE_VQADD_qr_u8 : MVE_VQADD_qr_m<MVE_v16u8, uaddsat>;
+defm MVE_VQADD_qr_u16 : MVE_VQADD_qr_m<MVE_v8u16, uaddsat>;
+defm MVE_VQADD_qr_u32 : MVE_VQADD_qr_m<MVE_v4u32, uaddsat>;
-defm MVE_VQSUB_qr_s8 : MVE_VQSUB_qr_m<MVE_v16s8>;
-defm MVE_VQSUB_qr_s16 : MVE_VQSUB_qr_m<MVE_v8s16>;
-defm MVE_VQSUB_qr_s32 : MVE_VQSUB_qr_m<MVE_v4s32>;
-defm MVE_VQSUB_qr_u8 : MVE_VQSUB_qr_m<MVE_v16u8>;
-defm MVE_VQSUB_qr_u16 : MVE_VQSUB_qr_m<MVE_v8u16>;
-defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32>;
+defm MVE_VQSUB_qr_s8 : MVE_VQSUB_qr_m<MVE_v16s8, ssubsat>;
+defm MVE_VQSUB_qr_s16 : MVE_VQSUB_qr_m<MVE_v8s16, ssubsat>;
+defm MVE_VQSUB_qr_s32 : MVE_VQSUB_qr_m<MVE_v4s32, ssubsat>;
+defm MVE_VQSUB_qr_u8 : MVE_VQSUB_qr_m<MVE_v16u8, usubsat>;
+defm MVE_VQSUB_qr_u16 : MVE_VQSUB_qr_m<MVE_v8u16, usubsat>;
+defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32, usubsat>;
class MVE_VQDMULL_qr<string iname, string suffix, bit size,
bit T, string cstr="", list<dag> pattern=[]>
@@ -5206,19 +5315,25 @@ defm MVE_VHSUB_qr_u8 : MVE_VHSUB_qr_m<MVE_v16u8>;
defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>;
defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
+multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
+ SDNode Op, Intrinsic PredInt> {
+ def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract>;
+ defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
+ !cast<Instruction>(NAME)>;
+}
+
let Predicates = [HasMVEFloat] in {
- def MVE_VADD_qr_f32 : MVE_VxADDSUB_qr<"vadd", "f32", 0b0, 0b11, 0b0>;
- def MVE_VADD_qr_f16 : MVE_VxADDSUB_qr<"vadd", "f16", 0b1, 0b11, 0b0>;
+ defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
+ int_arm_mve_add_predicated>;
+ defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
+ int_arm_mve_add_predicated>;
- def MVE_VSUB_qr_f32 : MVE_VxADDSUB_qr<"vsub", "f32", 0b0, 0b11, 0b1>;
- def MVE_VSUB_qr_f16 : MVE_VxADDSUB_qr<"vsub", "f16", 0b1, 0b11, 0b1>;
+ defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
+ int_arm_mve_sub_predicated>;
+ defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
+ int_arm_mve_sub_predicated>;
}
-defm : MVE_vec_scalar_fp_pat_m<fadd, int_arm_mve_add_predicated,
- MVE_VADD_qr_f16, MVE_VADD_qr_f32>;
-defm : MVE_vec_scalar_fp_pat_m<fsub, int_arm_mve_sub_predicated,
- MVE_VSUB_qr_f16, MVE_VSUB_qr_f32>;
-
class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
bit bit_7, bit bit_17, list<dag> pattern=[]>
: MVE_qDest_single_rSrc<iname, suffix, pattern> {
@@ -5346,8 +5461,10 @@ class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size>
multiclass MVE_VMUL_qr_int_m<MVEVectorVTInfo VTI> {
def "" : MVE_VMUL_qr_int<"vmul", VTI.Suffix, VTI.Size>;
- defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
- mul, int_arm_mve_mul_predicated>;
+ let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPatternDup<VTI, mul, int_arm_mve_mul_predicated, (? ),
+ !cast<Instruction>(NAME), ARMimmOneV>;
+ }
}
defm MVE_VMUL_qr_i8 : MVE_VMUL_qr_int_m<MVE_v16i8>;
@@ -5364,21 +5481,25 @@ class MVE_VxxMUL_qr<string iname, string suffix,
let Inst{12} = 0b0;
let Inst{8} = 0b0;
let Inst{5} = 0b1;
+ let validForTailPredication = 1;
}
multiclass MVE_VxxMUL_qr_m<string iname, MVEVectorVTInfo VTI, bit bit_28,
- Intrinsic int_unpred, Intrinsic int_pred> {
+ PatFrag Op, Intrinsic int_unpred, Intrinsic int_pred> {
def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size>;
- defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
- int_unpred, int_pred>;
+
+ let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPatternDup<VTI, Op, int_pred, (? ), !cast<Instruction>(NAME)>;
+ }
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI, int_unpred, int_pred>;
}
multiclass MVE_VQDMULH_qr_m<MVEVectorVTInfo VTI> :
- MVE_VxxMUL_qr_m<"vqdmulh", VTI, 0b0,
+ MVE_VxxMUL_qr_m<"vqdmulh", VTI, 0b0, MVEvqdmulh,
int_arm_mve_vqdmulh, int_arm_mve_qdmulh_predicated>;
multiclass MVE_VQRDMULH_qr_m<MVEVectorVTInfo VTI> :
- MVE_VxxMUL_qr_m<"vqrdmulh", VTI, 0b1,
+ MVE_VxxMUL_qr_m<"vqrdmulh", VTI, 0b1, null_frag,
int_arm_mve_vqrdmulh, int_arm_mve_qrdmulh_predicated>;
defm MVE_VQDMULH_qr_s8 : MVE_VQDMULH_qr_m<MVE_v16s8>;
@@ -5389,13 +5510,17 @@ defm MVE_VQRDMULH_qr_s8 : MVE_VQRDMULH_qr_m<MVE_v16s8>;
defm MVE_VQRDMULH_qr_s16 : MVE_VQRDMULH_qr_m<MVE_v8s16>;
defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
-let Predicates = [HasMVEFloat], validForTailPredication = 1 in {
- def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>;
- def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>;
+multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> {
+ let validForTailPredication = 1 in
+ def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>;
+ defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
+ !cast<Instruction>(NAME)>;
}
-defm : MVE_vec_scalar_fp_pat_m<fmul, int_arm_mve_mul_predicated,
- MVE_VMUL_qr_f16, MVE_VMUL_qr_f32>;
+let Predicates = [HasMVEFloat] in {
+ defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16>;
+ defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32>;
+}
class MVE_VFMAMLA_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, bit S,
@@ -5470,6 +5595,10 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
if scalar_addend then {
def : Pat<(VTI.Vec (fma v1, v2, vs)),
(VTI.Vec (Inst v1, v2, is))>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma v1, v2, vs)),
+ v1)),
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)),
(VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>;
} else {
@@ -5477,6 +5606,14 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
(VTI.Vec (Inst v2, v1, is))>;
def : Pat<(VTI.Vec (fma vs, v1, v2)),
(VTI.Vec (Inst v2, v1, is))>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma vs, v2, v1)),
+ v1)),
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma v2, vs, v1)),
+ v1)),
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)),
(VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)),
@@ -5605,7 +5742,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>;
def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
-let hasSideEffects = 1 in
+let isReMaterializable = 1 in
class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
"$Rn", vpred_n, "", pattern> {
@@ -5629,7 +5766,8 @@ multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> {
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- def : Pat<(intr rGPR:$Rn), (VTI.Pred (Inst rGPR:$Rn))>;
+ def : Pat<(intr rGPR:$Rn),
+ (VTI.Pred (Inst rGPR:$Rn))>;
def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),
(VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>;
}
@@ -5707,6 +5845,41 @@ def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
}
+let Predicates = [HasMVEInt] in {
+ // Double lane moves. There are a number of patterns here. We know that the
+ // insertelt's will be in descending order by index, and need to match the 5
+ // patterns that might contain 2-0 or 3-1 pairs. These are:
+ // 3 2 1 0 -> vmovqrr 31; vmovqrr 20
+ // 3 2 1 -> vmovqrr 31; vmov 2
+ // 3 1 -> vmovqrr 31
+ // 2 1 0 -> vmovqrr 20; vmov 1
+ // 2 0 -> vmovqrr 20
+ // The other potential patterns will be handled by single lane inserts.
+ def : Pat<(insertelt (insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
+ rGPR:$srcA, (i32 0)),
+ rGPR:$srcB, (i32 1)),
+ rGPR:$srcC, (i32 2)),
+ rGPR:$srcD, (i32 3)),
+ (MVE_VMOV_q_rr (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcC, (i32 2), (i32 0)),
+ rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
+ def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
+ rGPR:$srcB, (i32 1)),
+ rGPR:$srcC, (i32 2)),
+ rGPR:$srcD, (i32 3)),
+ (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 2)),
+ rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
+ def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 1)), rGPR:$srcB, (i32 3)),
+ (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 3), (i32 1))>;
+ def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
+ rGPR:$srcB, (i32 0)),
+ rGPR:$srcC, (i32 1)),
+ rGPR:$srcD, (i32 2)),
+ (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 1)),
+ rGPR:$srcB, rGPR:$srcD, (i32 2), (i32 0))>;
+ def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 0)), rGPR:$srcB, (i32 2)),
+ (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 2), (i32 0))>;
+}
+
// end of coproc mov
// start of MVE interleaving load/store
@@ -5735,6 +5908,7 @@ class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
let mayLoad = load;
let mayStore = !eq(load,0);
let hasSideEffects = 0;
+ let validForTailPredication = load;
}
// A parameter class used to encapsulate all the ways the writeback
@@ -6344,6 +6518,7 @@ class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> patte
let Inst{4} = 0b0;
let Defs = [VPR];
+ let validForTailPredication=1;
}
class MVE_VPTt1<string suffix, bits<2> size, dag iops>
@@ -6456,6 +6631,7 @@ class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=
let Defs = [VPR];
let Predicates = [HasMVEFloat];
+ let validForTailPredication=1;
}
class MVE_VPTft1<string suffix, bit size>
@@ -6583,13 +6759,6 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
-
- def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))),
- (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, ARMCCne))>;
- def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))),
- (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, ARMCCne))>;
- def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))),
- (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, ARMCCne))>;
}
let Predicates = [HasMVEFloat] in {
@@ -6938,7 +7107,7 @@ class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
PatFrag LoadKind, int shift>
- : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))),
+ : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty (ARMvmovImm (i32 0))))),
(Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
@@ -7105,11 +7274,11 @@ multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string
(VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
// Masked ext loads
- def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
+ def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
(VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
- def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
+ def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
(VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
- def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
+ def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
(VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
index 1b3f6075c0e9..a8c0d05d91c4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -509,7 +509,7 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
-def NEONvbsl : SDNode<"ARMISD::VBSL",
+def NEONvbsp : SDNode<"ARMISD::VBSP",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
@@ -534,20 +534,6 @@ def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
-def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{
- ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits = 0;
- uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
- return (EltBits == 32 && EltVal == 0);
-}]>;
-
-def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{
- ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits = 0;
- uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
- return (EltBits == 8 && EltVal == 0xff);
-}]>;
-
//===----------------------------------------------------------------------===//
// NEON load / store instructions
//===----------------------------------------------------------------------===//
@@ -4211,10 +4197,10 @@ def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
"vaddl", "s", add, sext, 1>;
defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "u", add, zext, 1>;
+ "vaddl", "u", add, zanyext, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
-defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@@ -4526,9 +4512,9 @@ let Predicates = [HasNEON, HasV8_1a] in {
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (saddsat
(v4i32 QPR:$src1),
- (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 (int_arm_neon_vqrdmulh
(v4i32 QPR:$src2),
- (v4i32 (ARMvduplane (v4i32 QPR:$src3),
+ (v4i32 (ARMvduplane (v4i32 QPR:$src3),
imm:$lane)))))),
(v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
@@ -4579,17 +4565,17 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v2i32 DPR:$Vn),
(v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
- (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
imm:$lane))>;
def : Pat<(v8i16 (ssubsat
(v8i16 QPR:$src1),
(v8i16 (int_arm_neon_vqrdmulh
(v8i16 QPR:$src2),
- (v8i16 (ARMvduplane (v8i16 QPR:$src3),
+ (v8i16 (ARMvduplane (v8i16 QPR:$src3),
imm:$lane)))))),
(v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
(v8i16 QPR:$src2),
- (v4i16 (EXTRACT_SUBREG
+ (v4i16 (EXTRACT_SUBREG
QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
@@ -4601,7 +4587,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
imm:$lane)))))),
(v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
- (v2i32 (EXTRACT_SUBREG
+ (v2i32 (EXTRACT_SUBREG
QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
@@ -5059,10 +5045,10 @@ def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
"vsubl", "s", sub, sext, 0>;
defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "u", sub, zext, 0>;
+ "vsubl", "u", sub, zanyext, 0>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
-defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
// VHSUB : Vector Halving Subtract
defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
@@ -5273,9 +5259,9 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
// Vector Bitwise Operations.
def vnotd : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+ (xor node:$in, ARMimmAllOnesD)>;
def vnotq : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
+ (xor node:$in, ARMimmAllOnesV)>;
// VAND : Vector Bitwise AND
@@ -5442,74 +5428,86 @@ def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
}
-// VBSL : Vector Bitwise Select
-def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
- (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
- N3RegFrm, IIC_VCNTiD,
- "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [(set DPR:$Vd,
- (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+// The TwoAddress pass will not go looking for equivalent operations
+// with different register constraints; it just inserts copies.
+// That is why pseudo VBSP implemented. Is is expanded later into
+// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
+def VBSPd
+ : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ IIC_VBINiD, "",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
let Predicates = [HasNEON] in {
def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
(v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
(v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
(v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
(v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
- (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
- (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
}
-def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
- (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
- N3RegFrm, IIC_VCNTiQ,
- "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [(set QPR:$Vd,
- (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
-
+def VBSPq
+ : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ IIC_VBINiQ, "",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
let Predicates = [HasNEON] in {
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
(v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
(v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
(v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
(v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
- (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
- (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
}
+// VBSL : Vector Bitwise Select
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
-// FIXME: This instruction's encoding MAY NOT BE correct.
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
@@ -5523,7 +5521,6 @@ def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
// VBIT : Vector Bitwise Insert if True
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
-// FIXME: This instruction's encoding MAY NOT BE correct.
def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
@@ -5535,10 +5532,6 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[]>;
-// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
-// for equivalent operations with different register constraints; it just
-// inserts copies.
-
// Vector Absolute Differences.
// VABD : Vector Absolute Difference
@@ -6047,9 +6040,9 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
// Vector Negate.
def vnegd : PatFrag<(ops node:$in),
- (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+ (sub ARMimmAllZerosD, node:$in)>;
def vnegq : PatFrag<(ops node:$in),
- (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
+ (sub ARMimmAllZerosV, node:$in)>;
class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
@@ -6263,11 +6256,11 @@ defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
- [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
+ [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
(VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
Requires<[HasZCZ]>;
def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
- [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
+ [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
(VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
Requires<[HasZCZ]>;
}
@@ -7953,7 +7946,7 @@ let Predicates = [HasNEON,IsLE] in {
(VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
}
-// The following patterns are basically a copy of the patterns above,
+// The following patterns are basically a copy of the patterns above,
// however with an additional VREV16d instruction to convert data
// loaded by VLD1LN into proper vector format in big endian mode.
let Predicates = [HasNEON,IsBE] in {
@@ -9086,11 +9079,11 @@ multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
(!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
}
-def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v8i8>;
-def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v16i8>;
+def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>;
+def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>;
-defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v8i8, (v2f32 DPR_VFP2:$Vm)>;
-defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
+defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>;
+defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
class BF16MM<bit Q, RegisterClass RegTy,
string opc>
@@ -9098,8 +9091,8 @@ class BF16MM<bit Q, RegisterClass RegTy,
(outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
N3RegFrm, IIC_VDOTPROD, "", "",
[(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd),
- (v16i8 QPR:$Vn),
- (v16i8 QPR:$Vm)))]> {
+ (v8bf16 QPR:$Vn),
+ (v8bf16 QPR:$Vm)))]> {
let Constraints = "$dst = $Vd";
let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
let DecoderNamespace = "VFPV8";
@@ -9113,8 +9106,8 @@ class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode>
NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
[(set (v4f32 QPR:$dst),
(OpNode (v4f32 QPR:$Vd),
- (v16i8 QPR:$Vn),
- (v16i8 QPR:$Vm)))]> {
+ (v8bf16 QPR:$Vn),
+ (v8bf16 QPR:$Vm)))]> {
let Constraints = "$dst = $Vd";
let DecoderNamespace = "VFPV8";
}
@@ -9135,9 +9128,9 @@ multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> {
def : Pat<
(v4f32 (OpNode (v4f32 QPR:$Vd),
- (v16i8 QPR:$Vn),
- (v16i8 (bitconvert (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
- VectorIndex16:$lane)))))),
+ (v8bf16 QPR:$Vn),
+ (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
+ VectorIndex16:$lane)))),
(!cast<Instruction>(NAME) QPR:$Vd,
QPR:$Vn,
(EXTRACT_SUBREG QPR:$Vm,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
index 7fae32117243..3a33dfeecdc9 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -548,14 +548,18 @@ let isCall = 1,
// Also used for Thumb2
def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br,
- "blx${p}\t$func",
- [(ARMcall GPR:$func)]>,
+ "blx${p}\t$func", []>,
Requires<[IsThumb, HasV5T]>,
T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24;
bits<4> func;
let Inst{6-3} = func;
let Inst{2-0} = 0b000;
}
+ def tBLXr_noip : ARMPseudoExpand<(outs), (ins pred:$p, GPRnoip:$func),
+ 2, IIC_Br, [], (tBLXr pred:$p, GPR:$func)>,
+ Requires<[IsThumb, HasV5T]>,
+ Sched<[WriteBrL]>;
+
// ARMv8-M Security Extensions
def tBLXNSr : TI<(outs), (ins pred:$p, GPRnopc:$func), IIC_Br,
@@ -586,6 +590,11 @@ let isCall = 1,
Requires<[IsThumb]>, Sched<[WriteBr]>;
}
+def : ARMPat<(ARMcall GPR:$func), (tBLXr $func)>,
+ Requires<[IsThumb, HasV5T, NoSLSBLRMitigation]>;
+def : ARMPat<(ARMcall GPRnoip:$func), (tBLXr_noip $func)>,
+ Requires<[IsThumb, HasV5T, SLSBLRMitigation]>;
+
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 7137e8ee66b8..5642cab32e7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1724,7 +1724,7 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
class T2IstT<bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ : T2Ii8<(outs), (ins rGPR:$Rt, t2addrmode_imm8:$addr), ii, opc,
"\t$Rt, $addr", []>, Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -2575,7 +2575,6 @@ def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
Requires<[IsThumb2, HasDSP]>;
// Signed/Unsigned saturate.
-let hasSideEffects = 1 in
class T2SatI<dag iops, string opc, string asm>
: T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> {
bits<4> Rd;
@@ -2624,9 +2623,9 @@ def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
let Inst{4} = 0;
}
-def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
+def : T2Pat<(ARMssat GPRnopc:$Rn, imm0_31:$imm),
(t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
-def : T2Pat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm),
+def : T2Pat<(ARMusat GPRnopc:$Rn, imm0_31:$imm),
(t2USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos),
(t2SSAT imm1_32:$pos, GPR:$a, 0)>;
@@ -2636,6 +2635,23 @@ def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos),
(t2SSAT16 imm1_16:$pos, GPR:$a)>;
def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos),
(t2USAT16 imm0_15:$pos, GPR:$a)>;
+def : T2Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos),
+ (t2SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : T2Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos),
+ (t2SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>;
+def : T2Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : T2Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>;
+def : T2Pat<(ARMssat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
+ (t2SSAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : T2Pat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
+ (t2SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
+def : T2Pat<(ARMusat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : T2Pat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
+
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
@@ -4919,6 +4935,15 @@ def : InstAlias<"pssbb", (t2DSB 0x4, 14, 0), 1>, Requires<[HasDB, IsThumb2]>;
// Armv8-R 'Data Full Barrier'
def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>;
+// SpeculationBarrierEndBB must only be used after an unconditional control
+// flow, i.e. after a terminator for which isBarrier is True.
+let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+ def t2SpeculationBarrierISBDSBEndBB
+ : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ def t2SpeculationBarrierSBEndBB
+ : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+}
+
// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
// width specifier.
def : t2InstAlias<"ldr${p} $Rt, $addr",
@@ -5404,9 +5429,16 @@ def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> {
let isTerminator = 1;
}
+let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB] in {
+
+let usesCustomInserter = 1 in
def t2DoLoopStart :
- t2PseudoInst<(outs), (ins rGPR:$elts), 4, IIC_Br,
- [(int_set_loop_iterations rGPR:$elts)]>, Sched<[WriteBr]>;
+ t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts), 4, IIC_Br,
+ [(set GPRlr:$X, (int_start_loop_iterations rGPR:$elts))]>;
+
+let isTerminator = 1, hasSideEffects = 1 in
+def t2DoLoopStartTP :
+ t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts, rGPR:$count), 4, IIC_Br, []>;
let hasSideEffects = 0 in
def t2LoopDec :
@@ -5426,8 +5458,14 @@ def t2LoopEnd :
t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target),
8, IIC_Br, []>, Sched<[WriteBr]>;
+def t2LoopEndDec :
+ t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$elts, brtarget:$target),
+ 8, IIC_Br, []>, Sched<[WriteBr]>;
+
} // end isBranch, isTerminator, hasSideEffects
+}
+
} // end isNotDuplicable
class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
@@ -5446,6 +5484,7 @@ class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
let Inst{3-0} = Rm{3-0};
let Uses = [CPSR];
+ let hasSideEffects = 0;
}
def t2CSEL : CS<"csel", 0b1000>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
index 8a652c1d90f6..2be58d7a0e62 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -54,6 +54,16 @@ def vfp_f16imm : Operand<f16>,
let ParserMatchClass = FPImmOperand;
}
+def vfp_f32f16imm_xform : SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP32FP16Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>;
+
+def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{
+ return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1;
+ }], vfp_f32f16imm_xform>;
+
def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = ARM_AM::getFP32Imm(InVal);
@@ -1551,6 +1561,8 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Sm{0};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
+
+ let hasSideEffects = 0;
}
class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -2252,16 +2264,6 @@ def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
(VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
-// (fma x, (fneg y), z) -> (vfms z, x, y)
-def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
- (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
- (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
- Requires<[HasVFP4]>;
-def : Pat<(f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin))),
- (VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
- Requires<[HasFullFP16]>;
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -2379,16 +2381,6 @@ def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))),
(VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
-// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
-def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
- (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
- (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
- Requires<[HasVFP4]>;
-def : Pat<(fneg (f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin)))),
- (VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
- Requires<[HasFullFP16]>;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
@@ -2634,6 +2626,11 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
}
}
+def : Pat<(f32 (vfp_f32f16imm:$imm)),
+ (f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> {
+ let Predicates = [HasFullFP16];
+}
+
//===----------------------------------------------------------------------===//
// Assembler aliases.
//
@@ -2849,6 +2846,12 @@ let Predicates = [HasV8_1MMainline, HasMVEInt] in {
}
defm VSTR_P0 : vfp_vstrldr_sysreg<0b0,0b1101, "p0",
(outs), (ins VCCR:$P0)>;
+
+ let Defs = [VPR] in {
+ defm VLDR_VPR : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">;
+ }
+ defm VLDR_P0 : vfp_vstrldr_sysreg<0b1,0b1101, "p0",
+ (outs VCCR:$P0), (ins)>;
}
let Uses = [FPSCR] in {
@@ -2860,11 +2863,3 @@ let Uses = [FPSCR] in {
defm VLDR_FPCXTS : vfp_vstrldr_sysreg<0b1,0b1111, "fpcxts">;
}
}
-
-let Predicates = [HasV8_1MMainline, HasMVEInt] in {
- let Defs = [VPR] in {
- defm VLDR_VPR : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">;
- }
- defm VLDR_P0 : vfp_vstrldr_sysreg<0b1,0b1101, "p0",
- (outs VCCR:$P0), (ins)>;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index c8a894fb11a8..09a94cc3a8e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -164,8 +164,6 @@ createARMInstructionSelector(const ARMBaseTargetMachine &TM,
}
}
-const unsigned zero_reg = 0;
-
#define GET_GLOBALISEL_IMPL
#include "ARMGenGlobalISel.inc"
#undef GET_GLOBALISEL_IMPL
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index f3657155f47e..d9b60f4c4eba 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -88,7 +88,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32})
- .minScalar(0, s32);
+ .clampScalar(0, s32, s32);
if (ST.hasNEON())
getActionDefinitionsBuilder({G_ADD, G_SUB})
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a84d23d3bb96..aa1fe4e4ffda 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1268,6 +1268,7 @@ findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg,
bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Thumb1 is already using updating loads/stores.
if (isThumb1) return false;
+ LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
const MachineOperand &BaseOP = MI->getOperand(0);
Register Base = BaseOP.getReg();
@@ -1319,8 +1320,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
return false;
}
}
- if (MergeInstr != MBB.end())
+ if (MergeInstr != MBB.end()) {
+ LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
MBB.erase(MergeInstr);
+ }
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@@ -1335,6 +1338,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Transfer memoperands.
MIB.setMemRefs(MI->memoperands());
+ LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
MBB.erase(MBBI);
return true;
}
@@ -1382,9 +1386,27 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_POST;
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRBi12:
+ return ARM::t2LDRB_POST;
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBi12:
+ return ARM::t2LDRSB_POST;
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRHi12:
+ return ARM::t2LDRH_POST;
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHi12:
+ return ARM::t2LDRSH_POST;
case ARM::t2STRi8:
case ARM::t2STRi12:
return ARM::t2STR_POST;
+ case ARM::t2STRBi8:
+ case ARM::t2STRBi12:
+ return ARM::t2STRB_POST;
+ case ARM::t2STRHi8:
+ case ARM::t2STRHi12:
+ return ARM::t2STRH_POST;
case ARM::MVE_VLDRBS16:
return ARM::MVE_VLDRBS16_post;
@@ -1427,6 +1449,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// Thumb1 doesn't have updating LDR/STR.
// FIXME: Use LDM/STM with single register instead.
if (isThumb1) return false;
+ LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
Register Base = getLoadStoreBaseOp(*MI).getReg();
bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
@@ -1468,6 +1491,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
} else
return false;
}
+ LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
MBB.erase(MergeInstr);
ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
@@ -1479,39 +1503,54 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// updating load/store-multiple instructions can be used with only one
// register.)
MachineOperand &MO = MI->getOperand(0);
- BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
- .addReg(Base, getDefRegState(true)) // WB base register
- .addReg(Base, getKillRegState(isLd ? BaseKill : false))
- .addImm(Pred).addReg(PredReg)
- .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
- getKillRegState(MO.isKill())))
- .cloneMemRefs(*MI);
+ auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+ .addImm(Pred)
+ .addReg(PredReg)
+ .addReg(MO.getReg(), (isLd ? getDefRegState(true)
+ : getKillRegState(MO.isKill())))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
} else if (isLd) {
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
- BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg)
- .cloneMemRefs(*MI);
+ auto MIB =
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addImm(Offset)
+ .addImm(Pred)
+ .addReg(PredReg)
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
} else {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
- BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base)
- .addReg(0)
- .addImm(Imm)
- .add(predOps(Pred, PredReg))
- .cloneMemRefs(*MI);
+ auto MIB =
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
}
} else {
// t2LDR_PRE, t2LDR_POST
- BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base)
- .addImm(Offset)
- .add(predOps(Pred, PredReg))
- .cloneMemRefs(*MI);
+ auto MIB =
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
}
} else {
MachineOperand &MO = MI->getOperand(0);
@@ -1521,21 +1560,25 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
- BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base)
- .addReg(0)
- .addImm(Imm)
- .add(predOps(Pred, PredReg))
- .cloneMemRefs(*MI);
+ auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
} else {
// t2STR_PRE, t2STR_POST
- BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base)
- .addImm(Offset)
- .add(predOps(Pred, PredReg))
- .cloneMemRefs(*MI);
+ auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
}
}
MBB.erase(MBBI);
@@ -1549,6 +1592,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
"Must have t2STRDi8 or t2LDRDi8");
if (MI.getOperand(3).getImm() != 0)
return false;
+ LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
// Behaviour for writeback is undefined if base register is the same as one
// of the others.
@@ -1576,6 +1620,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
} else
return false;
}
+ LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
MBB.erase(MergeInstr);
DebugLoc DL = MI.getDebugLoc();
@@ -1597,6 +1642,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
MIB.add(MO);
MIB.cloneMemRefs(MI);
+ LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
MBB.erase(MBBI);
return true;
}
@@ -2539,11 +2585,169 @@ static int getBaseOperandIndex(MachineInstr &MI) {
case ARM::MVE_VSTRBU8:
case ARM::MVE_VSTRHU16:
case ARM::MVE_VSTRWU32:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRBi8:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi8:
+ case ARM::t2STRHi12:
return 1;
+ case ARM::MVE_VLDRBS16_post:
+ case ARM::MVE_VLDRBS32_post:
+ case ARM::MVE_VLDRBU16_post:
+ case ARM::MVE_VLDRBU32_post:
+ case ARM::MVE_VLDRHS32_post:
+ case ARM::MVE_VLDRHU32_post:
+ case ARM::MVE_VLDRBU8_post:
+ case ARM::MVE_VLDRHU16_post:
+ case ARM::MVE_VLDRWU32_post:
+ case ARM::MVE_VSTRB16_post:
+ case ARM::MVE_VSTRB32_post:
+ case ARM::MVE_VSTRH32_post:
+ case ARM::MVE_VSTRBU8_post:
+ case ARM::MVE_VSTRHU16_post:
+ case ARM::MVE_VSTRWU32_post:
+ case ARM::MVE_VLDRBS16_pre:
+ case ARM::MVE_VLDRBS32_pre:
+ case ARM::MVE_VLDRBU16_pre:
+ case ARM::MVE_VLDRBU32_pre:
+ case ARM::MVE_VLDRHS32_pre:
+ case ARM::MVE_VLDRHU32_pre:
+ case ARM::MVE_VLDRBU8_pre:
+ case ARM::MVE_VLDRHU16_pre:
+ case ARM::MVE_VLDRWU32_pre:
+ case ARM::MVE_VSTRB16_pre:
+ case ARM::MVE_VSTRB32_pre:
+ case ARM::MVE_VSTRH32_pre:
+ case ARM::MVE_VSTRBU8_pre:
+ case ARM::MVE_VSTRHU16_pre:
+ case ARM::MVE_VSTRWU32_pre:
+ return 2;
}
return -1;
}
+static bool isPostIndex(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case ARM::MVE_VLDRBS16_post:
+ case ARM::MVE_VLDRBS32_post:
+ case ARM::MVE_VLDRBU16_post:
+ case ARM::MVE_VLDRBU32_post:
+ case ARM::MVE_VLDRHS32_post:
+ case ARM::MVE_VLDRHU32_post:
+ case ARM::MVE_VLDRBU8_post:
+ case ARM::MVE_VLDRHU16_post:
+ case ARM::MVE_VLDRWU32_post:
+ case ARM::MVE_VSTRB16_post:
+ case ARM::MVE_VSTRB32_post:
+ case ARM::MVE_VSTRH32_post:
+ case ARM::MVE_VSTRBU8_post:
+ case ARM::MVE_VSTRHU16_post:
+ case ARM::MVE_VSTRWU32_post:
+ return true;
+ }
+ return false;
+}
+
+static bool isPreIndex(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case ARM::MVE_VLDRBS16_pre:
+ case ARM::MVE_VLDRBS32_pre:
+ case ARM::MVE_VLDRBU16_pre:
+ case ARM::MVE_VLDRBU32_pre:
+ case ARM::MVE_VLDRHS32_pre:
+ case ARM::MVE_VLDRHU32_pre:
+ case ARM::MVE_VLDRBU8_pre:
+ case ARM::MVE_VLDRHU16_pre:
+ case ARM::MVE_VLDRWU32_pre:
+ case ARM::MVE_VSTRB16_pre:
+ case ARM::MVE_VSTRB32_pre:
+ case ARM::MVE_VSTRH32_pre:
+ case ARM::MVE_VSTRBU8_pre:
+ case ARM::MVE_VSTRHU16_pre:
+ case ARM::MVE_VSTRWU32_pre:
+ return true;
+ }
+ return false;
+}
+
+// Given a memory access Opcode, check that the give Imm would be a valid Offset
+// for this instruction (same as isLegalAddressImm), Or if the instruction
+// could be easily converted to one where that was valid. For example converting
+// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
+// AdjustBaseAndOffset below.
+static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
+ const TargetInstrInfo *TII,
+ int &CodesizeEstimate) {
+ if (isLegalAddressImm(Opcode, Imm, TII))
+ return true;
+
+ // We can convert AddrModeT2_i12 to AddrModeT2_i8.
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i12:
+ CodesizeEstimate += 1;
+ return std::abs(Imm) < (((1 << 8) * 1) - 1);
+ }
+ return false;
+}
+
+// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
+// by -Offset. This can either happen in-place or be a replacement as MI is
+// converted to another instruction type.
+static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
+ int Offset, const TargetInstrInfo *TII) {
+ unsigned BaseOp = getBaseOperandIndex(*MI);
+ MI->getOperand(BaseOp).setReg(NewBaseReg);
+ int OldOffset = MI->getOperand(BaseOp + 1).getImm();
+ if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
+ MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
+ else {
+ unsigned ConvOpcode;
+ switch (MI->getOpcode()) {
+ case ARM::t2LDRHi12:
+ ConvOpcode = ARM::t2LDRHi8;
+ break;
+ case ARM::t2LDRSHi12:
+ ConvOpcode = ARM::t2LDRSHi8;
+ break;
+ case ARM::t2LDRBi12:
+ ConvOpcode = ARM::t2LDRBi8;
+ break;
+ case ARM::t2LDRSBi12:
+ ConvOpcode = ARM::t2LDRSBi8;
+ break;
+ case ARM::t2STRHi12:
+ ConvOpcode = ARM::t2STRHi8;
+ break;
+ case ARM::t2STRBi12:
+ ConvOpcode = ARM::t2STRBi8;
+ break;
+ default:
+ llvm_unreachable("Unhandled convertable opcode");
+ }
+ assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
+ "Illegal Address Immediate after convert!");
+
+ const MCInstrDesc &MCID = TII->get(ConvOpcode);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(OldOffset - Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ MI->eraseFromParent();
+ }
+}
+
static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
Register NewReg,
const TargetInstrInfo *TII,
@@ -2562,34 +2766,70 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
TRC = TII->getRegClass(MCID, 2, TRI, *MF);
MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
- return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
- .addReg(NewReg, RegState::Define)
- .add(MI->getOperand(0))
- .add(MI->getOperand(1))
- .addImm(Offset)
- .add(MI->getOperand(3))
- .add(MI->getOperand(4))
- .cloneMemRefs(*MI);
+ unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i7:
+ case ARMII::AddrModeT2_i7s2:
+ case ARMII::AddrModeT2_i7s4:
+ // Any MVE load/store
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ case ARMII::AddrModeT2_i8:
+ if (MI->mayLoad()) {
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .add(MI->getOperand(0))
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ } else {
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ }
+ default:
+ llvm_unreachable("Unhandled createPostIncLoadStore");
+ }
}
// Given a Base Register, optimise the load/store uses to attempt to create more
-// post-inc accesses. We do this by taking zero offset loads/stores with an add,
-// and convert them to a postinc load/store of the same type. Any subsequent
-// accesses will be adjusted to use and account for the post-inc value.
+// post-inc accesses and less register moves. We do this by taking zero offset
+// loads/stores with an add, and convert them to a postinc load/store of the
+// same type. Any subsequent accesses will be adjusted to use and account for
+// the post-inc value.
// For example:
// LDR #0 LDR_POSTINC #16
// LDR #4 LDR #-12
// LDR #8 LDR #-8
// LDR #12 LDR #-4
// ADD #16
+//
+// At the same time if we do not find an increment but do find an existing
+// pre/post inc instruction, we can still adjust the offsets of subsequent
+// instructions to save the register move that would otherwise be needed for the
+// in-place increment.
bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
// We are looking for:
// One zero offset load/store that can become postinc
MachineInstr *BaseAccess = nullptr;
+ MachineInstr *PrePostInc = nullptr;
// An increment that can be folded in
MachineInstr *Increment = nullptr;
// Other accesses after BaseAccess that will need to be updated to use the
- // postinc value
+ // postinc value.
SmallPtrSet<MachineInstr *, 8> OtherAccesses;
for (auto &Use : MRI->use_nodbg_instructions(Base)) {
if (!Increment && getAddSubImmediate(Use) != 0) {
@@ -2604,53 +2844,81 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
if (!Use.getOperand(BaseOp).isReg() ||
Use.getOperand(BaseOp).getReg() != Base)
return false;
- if (Use.getOperand(BaseOp + 1).getImm() == 0)
+ if (isPreIndex(Use) || isPostIndex(Use))
+ PrePostInc = &Use;
+ else if (Use.getOperand(BaseOp + 1).getImm() == 0)
BaseAccess = &Use;
else
OtherAccesses.insert(&Use);
}
- if (!BaseAccess || !Increment ||
- BaseAccess->getParent() != Increment->getParent())
- return false;
- Register PredReg;
- if (Increment->definesRegister(ARM::CPSR) ||
- getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
- return false;
+ int IncrementOffset;
+ Register NewBaseReg;
+ if (BaseAccess && Increment) {
+ if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
+ return false;
+ Register PredReg;
+ if (Increment->definesRegister(ARM::CPSR) ||
+ getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
+ return false;
- LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
- << Base.virtRegIndex() << "\n");
+ LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
+ << Base.virtRegIndex() << "\n");
- // Make sure that Increment has no uses before BaseAccess.
- for (MachineInstr &Use :
- MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
- if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
- LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
+ // Make sure that Increment has no uses before BaseAccess.
+ for (MachineInstr &Use :
+ MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
+ if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
+ LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
+ return false;
+ }
+ }
+
+ // Make sure that Increment can be folded into Base
+ IncrementOffset = getAddSubImmediate(*Increment);
+ unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
+ BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
+ if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
+ LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
return false;
}
}
+ else if (PrePostInc) {
+ // If we already have a pre/post index load/store then set BaseAccess,
+ // IncrementOffset and NewBaseReg to the values it already produces,
+ // allowing us to update and subsequent uses of BaseOp reg with the
+ // incremented value.
+ if (Increment)
+ return false;
- // Make sure that Increment can be folded into Base
- int IncrementOffset = getAddSubImmediate(*Increment);
- unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
- BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
- if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
- LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
- return false;
+ LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
+ << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
+ int BaseOp = getBaseOperandIndex(*PrePostInc);
+ IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
+ BaseAccess = PrePostInc;
+ NewBaseReg = PrePostInc->getOperand(0).getReg();
}
+ else
+ return false;
// And make sure that the negative value of increment can be added to all
// other offsets after the BaseAccess. We rely on either
// dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
// to keep things simple.
+ // This also adds a simple codesize metric, to detect if an instruction (like
+ // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
+ // cannot because it is converted to something else (t2LDRBi8). We start this
+ // at -1 for the gain from removing the increment.
SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
+ int CodesizeEstimate = -1;
for (auto *Use : OtherAccesses) {
if (DT->dominates(BaseAccess, Use)) {
SuccessorAccesses.insert(Use);
unsigned BaseOp = getBaseOperandIndex(*Use);
- if (!isLegalAddressImm(
- Use->getOpcode(),
- Use->getOperand(BaseOp + 1).getImm() - IncrementOffset, TII)) {
+ if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
+ Use->getOperand(BaseOp + 1).getImm() -
+ IncrementOffset,
+ TII, CodesizeEstimate)) {
LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
return false;
}
@@ -2660,24 +2928,27 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
return false;
}
}
+ if (STI->hasMinSize() && CodesizeEstimate > 0) {
+ LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
+ return false;
+ }
- // Replace BaseAccess with a post inc
- LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
- LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
- Register NewBaseReg = Increment->getOperand(0).getReg();
- MachineInstr *BaseAccessPost =
- createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
- BaseAccess->eraseFromParent();
- Increment->eraseFromParent();
- (void)BaseAccessPost;
- LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
+ if (!PrePostInc) {
+ // Replace BaseAccess with a post inc
+ LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
+ LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
+ NewBaseReg = Increment->getOperand(0).getReg();
+ MachineInstr *BaseAccessPost =
+ createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
+ BaseAccess->eraseFromParent();
+ Increment->eraseFromParent();
+ (void)BaseAccessPost;
+ LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
+ }
for (auto *Use : SuccessorAccesses) {
LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
- unsigned BaseOp = getBaseOperandIndex(*Use);
- Use->getOperand(BaseOp).setReg(NewBaseReg);
- int OldOffset = Use->getOperand(BaseOp + 1).getImm();
- Use->getOperand(BaseOp + 1).setImm(OldOffset - IncrementOffset);
+ AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII);
LLVM_DEBUG(dbgs() << " To : "; Use->dump());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index be75d6bef08c..8dc532058492 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -56,6 +56,7 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMBasicBlockInfo.h"
#include "ARMSubtarget.h"
+#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallSet.h"
@@ -73,6 +74,37 @@ using namespace llvm;
#define DEBUG_TYPE "arm-low-overhead-loops"
#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
+static cl::opt<bool>
+DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden,
+ cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"),
+ cl::init(false));
+
+static bool isVectorPredicated(MachineInstr *MI) {
+ int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
+ return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
+}
+
+static bool isVectorPredicate(MachineInstr *MI) {
+ return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
+}
+
+static bool hasVPRUse(MachineInstr &MI) {
+ return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
+}
+
+static bool isDomainMVE(MachineInstr *MI) {
+ uint64_t Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
+ return Domain == ARMII::DomainMVE;
+}
+
+static bool shouldInspect(MachineInstr &MI) {
+ return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
+}
+
+static bool isDo(MachineInstr *MI) {
+ return MI->getOpcode() != ARM::t2WhileLoopStart;
+}
+
namespace {
using InstSet = SmallPtrSetImpl<MachineInstr *>;
@@ -111,8 +143,7 @@ namespace {
// Insert exit blocks.
SmallVector<MachineBasicBlock*, 2> ExitBlocks;
ML.getExitBlocks(ExitBlocks);
- for (auto *MBB : ExitBlocks)
- Order.push_back(MBB);
+ append_range(Order, ExitBlocks);
// Then add the loop body.
Search(ML.getHeader());
@@ -143,73 +174,187 @@ namespace {
}
};
- // Represent a VPT block, a list of instructions that begins with a VPT/VPST
- // and has a maximum of four proceeding instructions. All instructions within
- // the block are predicated upon the vpr and we allow instructions to define
- // the vpr within in the block too.
- class VPTBlock {
- // The predicate then instruction, which is either a VPT, or a VPST
- // instruction.
- std::unique_ptr<PredicatedMI> PredicateThen;
- PredicatedMI *Divergent = nullptr;
- SmallVector<PredicatedMI, 4> Insts;
+ // Represent the current state of the VPR and hold all instances which
+ // represent a VPT block, which is a list of instructions that begins with a
+ // VPT/VPST and has a maximum of four proceeding instructions. All
+ // instructions within the block are predicated upon the vpr and we allow
+ // instructions to define the vpr within in the block too.
+ class VPTState {
+ friend struct LowOverheadLoop;
+
+ SmallVector<MachineInstr *, 4> Insts;
+
+ static SmallVector<VPTState, 4> Blocks;
+ static SetVector<MachineInstr *> CurrentPredicates;
+ static std::map<MachineInstr *,
+ std::unique_ptr<PredicatedMI>> PredicatedInsts;
+
+ static void CreateVPTBlock(MachineInstr *MI) {
+ assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR))
+ && "Can't begin VPT without predicate");
+ Blocks.emplace_back(MI);
+ // The execution of MI is predicated upon the current set of instructions
+ // that are AND'ed together to form the VPR predicate value. In the case
+ // that MI is a VPT, CurrentPredicates will also just be MI.
+ PredicatedInsts.emplace(
+ MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
+ }
- public:
- VPTBlock(MachineInstr *MI, SetVector<MachineInstr*> &Preds) {
- PredicateThen = std::make_unique<PredicatedMI>(MI, Preds);
+ static void reset() {
+ Blocks.clear();
+ PredicatedInsts.clear();
+ CurrentPredicates.clear();
}
- void addInst(MachineInstr *MI, SetVector<MachineInstr*> &Preds) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Adding predicated MI: " << *MI);
- if (!Divergent && !set_difference(Preds, PredicateThen->Predicates).empty()) {
- Divergent = &Insts.back();
- LLVM_DEBUG(dbgs() << " - has divergent predicate: " << *Divergent->MI);
- }
- Insts.emplace_back(MI, Preds);
- assert(Insts.size() <= 4 && "Too many instructions in VPT block!");
+ static void addInst(MachineInstr *MI) {
+ Blocks.back().insert(MI);
+ PredicatedInsts.emplace(
+ MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
}
+ static void addPredicate(MachineInstr *MI) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI);
+ CurrentPredicates.insert(MI);
+ }
+
+ static void resetPredicate(MachineInstr *MI) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI);
+ CurrentPredicates.clear();
+ CurrentPredicates.insert(MI);
+ }
+
+ public:
// Have we found an instruction within the block which defines the vpr? If
// so, not all the instructions in the block will have the same predicate.
- bool HasNonUniformPredicate() const {
- return Divergent != nullptr;
+ static bool hasUniformPredicate(VPTState &Block) {
+ return getDivergent(Block) == nullptr;
}
- // Is the given instruction part of the predicate set controlling the entry
- // to the block.
- bool IsPredicatedOn(MachineInstr *MI) const {
- return PredicateThen->Predicates.count(MI);
+ // If it exists, return the first internal instruction which modifies the
+ // VPR.
+ static MachineInstr *getDivergent(VPTState &Block) {
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+ for (unsigned i = 1; i < Insts.size(); ++i) {
+ MachineInstr *Next = Insts[i];
+ if (isVectorPredicate(Next))
+ return Next; // Found an instruction altering the vpr.
+ }
+ return nullptr;
}
- // Returns true if this is a VPT instruction.
- bool isVPT() const { return !isVPST(); }
+ // Return whether the given instruction is predicated upon a VCTP.
+ static bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) {
+ SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI]->Predicates;
+ if (Exclusive && Predicates.size() != 1)
+ return false;
+ for (auto *PredMI : Predicates)
+ if (isVCTP(PredMI))
+ return true;
+ return false;
+ }
- // Returns true if this is a VPST instruction.
- bool isVPST() const {
- return PredicateThen->MI->getOpcode() == ARM::MVE_VPST;
+ // Is the VPST, controlling the block entry, predicated upon a VCTP.
+ static bool isEntryPredicatedOnVCTP(VPTState &Block,
+ bool Exclusive = false) {
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+ return isPredicatedOnVCTP(Insts.front(), Exclusive);
}
- // Is the given instruction the only predicate which controls the entry to
- // the block.
- bool IsOnlyPredicatedOn(MachineInstr *MI) const {
- return IsPredicatedOn(MI) && PredicateThen->Predicates.size() == 1;
+ // If this block begins with a VPT, we can check whether it's using
+ // at least one predicated input(s), as well as possible loop invariant
+ // which would result in it being implicitly predicated.
+ static bool hasImplicitlyValidVPT(VPTState &Block,
+ ReachingDefAnalysis &RDA) {
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+ MachineInstr *VPT = Insts.front();
+ assert(isVPTOpcode(VPT->getOpcode()) &&
+ "Expected VPT block to begin with VPT/VPST");
+
+ if (VPT->getOpcode() == ARM::MVE_VPST)
+ return false;
+
+ auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
+ MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
+ return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
+ };
+
+ auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) {
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (!MO.isReg() || !MO.getReg())
+ return true;
+
+ SmallPtrSet<MachineInstr *, 2> Defs;
+ RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs);
+ if (Defs.empty())
+ return true;
+
+ for (auto *Def : Defs)
+ if (Def->getParent() == VPT->getParent())
+ return false;
+ return true;
+ };
+
+ // Check that at least one of the operands is directly predicated on a
+ // vctp and allow an invariant value too.
+ return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
+ (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
+ (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
}
- unsigned size() const { return Insts.size(); }
- SmallVectorImpl<PredicatedMI> &getInsts() { return Insts; }
- MachineInstr *getPredicateThen() const { return PredicateThen->MI; }
- PredicatedMI *getDivergent() const { return Divergent; }
- };
+ static bool isValid(ReachingDefAnalysis &RDA) {
+ // All predication within the loop should be based on vctp. If the block
+ // isn't predicated on entry, check whether the vctp is within the block
+ // and that all other instructions are then predicated on it.
+ for (auto &Block : Blocks) {
+ if (isEntryPredicatedOnVCTP(Block, false) ||
+ hasImplicitlyValidVPT(Block, RDA))
+ continue;
- struct Reduction {
- MachineInstr *Init;
- MachineInstr &Copy;
- MachineInstr &Reduce;
- MachineInstr &VPSEL;
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+ // We don't know how to convert a block with just a VPT;VCTP into
+ // anything valid once we remove the VCTP. For now just bail out.
+ assert(isVPTOpcode(Insts.front()->getOpcode()) &&
+ "Expected VPT block to start with a VPST or VPT!");
+ if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST &&
+ isVCTP(Insts.back()))
+ return false;
+
+ for (auto *MI : Insts) {
+ // Check that any internal VCTPs are 'Then' predicated.
+ if (isVCTP(MI) && getVPTInstrPredicate(*MI) != ARMVCC::Then)
+ return false;
+ // Skip other instructions that build up the predicate.
+ if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI))
+ continue;
+ // Check that any other instructions are predicated upon a vctp.
+ // TODO: We could infer when VPTs are implicitly predicated on the
+ // vctp (when the operands are predicated).
+ if (!isPredicatedOnVCTP(MI)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *MI);
+ return false;
+ }
+ }
+ }
+ return true;
+ }
- Reduction(MachineInstr *Init, MachineInstr *Mov, MachineInstr *Add,
- MachineInstr *Sel)
- : Init(Init), Copy(*Mov), Reduce(*Add), VPSEL(*Sel) { }
+ VPTState(MachineInstr *MI) { Insts.push_back(MI); }
+
+ void insert(MachineInstr *MI) {
+ Insts.push_back(MI);
+ // VPT/VPST + 4 predicated instructions.
+ assert(Insts.size() <= 5 && "Too many instructions in VPT block!");
+ }
+
+ bool containsVCTP() const {
+ for (auto *MI : Insts)
+ if (isVCTP(MI))
+ return true;
+ return false;
+ }
+
+ unsigned size() const { return Insts.size(); }
+ SmallVectorImpl<MachineInstr *> &getInsts() { return Insts; }
};
struct LowOverheadLoop {
@@ -221,17 +366,14 @@ namespace {
const TargetRegisterInfo &TRI;
const ARMBaseInstrInfo &TII;
MachineFunction *MF = nullptr;
- MachineInstr *InsertPt = nullptr;
+ MachineBasicBlock::iterator StartInsertPt;
+ MachineBasicBlock *StartInsertBB = nullptr;
MachineInstr *Start = nullptr;
MachineInstr *Dec = nullptr;
MachineInstr *End = nullptr;
- MachineInstr *VCTP = nullptr;
- SmallPtrSet<MachineInstr*, 4> SecondaryVCTPs;
- VPTBlock *CurrentBlock = nullptr;
- SetVector<MachineInstr*> CurrentPredicate;
- SmallVector<VPTBlock, 4> VPTBlocks;
+ MachineOperand TPNumElements;
+ SmallVector<MachineInstr*, 4> VCTPs;
SmallPtrSet<MachineInstr*, 4> ToRemove;
- SmallVector<std::unique_ptr<Reduction>, 1> Reductions;
SmallPtrSet<MachineInstr*, 4> BlockMasksToRecompute;
bool Revert = false;
bool CannotTailPredicate = false;
@@ -239,12 +381,14 @@ namespace {
LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI,
ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI,
const ARMBaseInstrInfo &TII)
- : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII) {
+ : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII),
+ TPNumElements(MachineOperand::CreateImm(0)) {
MF = ML.getHeader()->getParent();
if (auto *MBB = ML.getLoopPreheader())
Preheader = MBB;
else if (auto *MBB = MLI.findLoopPreheader(&ML, true))
Preheader = MBB;
+ VPTState::reset();
}
// If this is an MVE instruction, check that we know how to use tail
@@ -259,18 +403,18 @@ namespace {
bool IsTailPredicationLegal() const {
// For now, let's keep things really simple and only support a single
// block for tail predication.
- return !Revert && FoundAllComponents() && VCTP &&
+ return !Revert && FoundAllComponents() && !VCTPs.empty() &&
!CannotTailPredicate && ML.getNumBlocks() == 1;
}
+ // Given that MI is a VCTP, check that is equivalent to any other VCTPs
+ // found.
+ bool AddVCTP(MachineInstr *MI);
+
// Check that the predication in the loop will be equivalent once we
// perform the conversion. Also ensure that we can provide the number
// of elements to the loop start instruction.
- bool ValidateTailPredicate(MachineInstr *StartInsertPt);
-
- // See whether the live-out instructions are a reduction that we can fixup
- // later.
- bool FindValidReduction(InstSet &LiveMIs, InstSet &LiveOutUsers);
+ bool ValidateTailPredicate();
// Check that any values available outside of the loop will be the same
// after tail predication conversion.
@@ -283,34 +427,41 @@ namespace {
// Check the branch targets are within range and we satisfy our
// restrictions.
- void CheckLegality(ARMBasicBlockUtils *BBUtils);
+ void Validate(ARMBasicBlockUtils *BBUtils);
bool FoundAllComponents() const {
return Start && Dec && End;
}
- SmallVectorImpl<VPTBlock> &getVPTBlocks() { return VPTBlocks; }
+ SmallVectorImpl<VPTState> &getVPTBlocks() {
+ return VPTState::Blocks;
+ }
- // Return the loop iteration count, or the number of elements if we're tail
- // predicating.
- MachineOperand &getCount() {
- return IsTailPredicationLegal() ?
- VCTP->getOperand(1) : Start->getOperand(0);
+ // Return the operand for the loop start instruction. This will be the loop
+ // iteration count, or the number of elements if we're tail predicating.
+ MachineOperand &getLoopStartOperand() {
+ if (IsTailPredicationLegal())
+ return TPNumElements;
+ return isDo(Start) ? Start->getOperand(1) : Start->getOperand(0);
}
unsigned getStartOpcode() const {
- bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
+ bool IsDo = isDo(Start);
if (!IsTailPredicationLegal())
return IsDo ? ARM::t2DLS : ARM::t2WLS;
- return VCTPOpcodeToLSTP(VCTP->getOpcode(), IsDo);
+ return VCTPOpcodeToLSTP(VCTPs.back()->getOpcode(), IsDo);
}
void dump() const {
if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;
- if (VCTP) dbgs() << "ARM Loops: Found VCTP: " << *VCTP;
+ if (!VCTPs.empty()) {
+ dbgs() << "ARM Loops: Found VCTP(s):\n";
+ for (auto *MI : VCTPs)
+ dbgs() << " - " << *MI;
+ }
if (!FoundAllComponents())
dbgs() << "ARM Loops: Not a low-overhead loop.\n";
else if (!(Start && Dec && End))
@@ -357,14 +508,15 @@ namespace {
bool RevertNonLoops();
void RevertWhile(MachineInstr *MI) const;
+ void RevertDo(MachineInstr *MI) const;
bool RevertLoopDec(MachineInstr *MI) const;
void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
- void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
+ void RevertLoopEndDec(MachineInstr *MI) const;
- void FixupReductions(LowOverheadLoop &LoLoop) const;
+ void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);
@@ -376,149 +528,228 @@ namespace {
char ARMLowOverheadLoops::ID = 0;
+SmallVector<VPTState, 4> VPTState::Blocks;
+SetVector<MachineInstr *> VPTState::CurrentPredicates;
+std::map<MachineInstr *,
+ std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
+
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
false, false)
-MachineInstr *LowOverheadLoop::isSafeToDefineLR() {
- // We can define LR because LR already contains the same value.
- if (Start->getOperand(0).getReg() == ARM::LR)
- return Start;
-
- unsigned CountReg = Start->getOperand(0).getReg();
- auto IsMoveLR = [&CountReg](MachineInstr *MI) {
- return MI->getOpcode() == ARM::tMOVr &&
- MI->getOperand(0).getReg() == ARM::LR &&
- MI->getOperand(1).getReg() == CountReg &&
- MI->getOperand(2).getImm() == ARMCC::AL;
- };
-
- MachineBasicBlock *MBB = Start->getParent();
-
- // Find an insertion point:
- // - Is there a (mov lr, Count) before Start? If so, and nothing else writes
- // to Count before Start, we can insert at that mov.
- if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR))
- if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
- return LRDef;
-
- // - Is there a (mov lr, Count) after Start? If so, and nothing else writes
- // to Count after Start, we can insert at that mov.
- if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR))
- if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
- return LRDef;
-
- // We've found no suitable LR def and Start doesn't use LR directly. Can we
- // just define LR anyway?
- return RDA.isSafeToDefRegAt(Start, ARM::LR) ? Start : nullptr;
-}
+static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA,
+ InstSet &ToRemove, InstSet &Ignore) {
+
+ // Check that we can remove all of Killed without having to modify any IT
+ // blocks.
+ auto WontCorruptITs = [](InstSet &Killed, ReachingDefAnalysis &RDA) {
+ // Collect the dead code and the MBBs in which they reside.
+ SmallPtrSet<MachineBasicBlock*, 2> BasicBlocks;
+ for (auto *Dead : Killed)
+ BasicBlocks.insert(Dead->getParent());
+
+ // Collect IT blocks in all affected basic blocks.
+ std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
+ for (auto *MBB : BasicBlocks) {
+ for (auto &IT : *MBB) {
+ if (IT.getOpcode() != ARM::t2IT)
+ continue;
+ RDA.getReachingLocalUses(&IT, MCRegister::from(ARM::ITSTATE),
+ ITBlocks[&IT]);
+ }
+ }
-bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
- assert(VCTP && "VCTP instruction expected but is not set");
- // All predication within the loop should be based on vctp. If the block
- // isn't predicated on entry, check whether the vctp is within the block
- // and that all other instructions are then predicated on it.
- for (auto &Block : VPTBlocks) {
- if (Block.IsPredicatedOn(VCTP))
- continue;
- if (Block.HasNonUniformPredicate() && !isVCTP(Block.getDivergent()->MI)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Found unsupported diverging predicate: "
- << *Block.getDivergent()->MI);
- return false;
+ // If we're removing all of the instructions within an IT block, then
+ // also remove the IT instruction.
+ SmallPtrSet<MachineInstr *, 2> ModifiedITs;
+ SmallPtrSet<MachineInstr *, 2> RemoveITs;
+ for (auto *Dead : Killed) {
+ if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) {
+ MachineInstr *IT = RDA.getMIOperand(Dead, *MO);
+ RemoveITs.insert(IT);
+ auto &CurrentBlock = ITBlocks[IT];
+ CurrentBlock.erase(Dead);
+ if (CurrentBlock.empty())
+ ModifiedITs.erase(IT);
+ else
+ ModifiedITs.insert(IT);
+ }
}
- SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts();
- for (auto &PredMI : Insts) {
- // Check the instructions in the block and only allow:
- // - VCTPs
- // - Instructions predicated on the main VCTP
- // - Any VCMP
- // - VCMPs just "and" their result with VPR.P0. Whether they are
- // located before/after the VCTP is irrelevant - the end result will
- // be the same in both cases, so there's no point in requiring them
- // to be located after the VCTP!
- if (PredMI.Predicates.count(VCTP) || isVCTP(PredMI.MI) ||
- VCMPOpcodeToVPT(PredMI.MI->getOpcode()) != 0)
- continue;
- LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *PredMI.MI
- << " - which is predicated on:\n";
- for (auto *MI : PredMI.Predicates)
- dbgs() << " - " << *MI);
+ if (!ModifiedITs.empty())
return false;
+ Killed.insert(RemoveITs.begin(), RemoveITs.end());
+ return true;
+ };
+
+ SmallPtrSet<MachineInstr *, 2> Uses;
+ if (!RDA.isSafeToRemove(MI, Uses, Ignore))
+ return false;
+
+ if (WontCorruptITs(Uses, RDA)) {
+ ToRemove.insert(Uses.begin(), Uses.end());
+ LLVM_DEBUG(dbgs() << "ARM Loops: Able to remove: " << *MI
+ << " - can also remove:\n";
+ for (auto *Use : Uses)
+ dbgs() << " - " << *Use);
+
+ SmallPtrSet<MachineInstr*, 4> Killed;
+ RDA.collectKilledOperands(MI, Killed);
+ if (WontCorruptITs(Killed, RDA)) {
+ ToRemove.insert(Killed.begin(), Killed.end());
+ LLVM_DEBUG(for (auto *Dead : Killed)
+ dbgs() << " - " << *Dead);
}
+ return true;
+ }
+ return false;
+}
+
+bool LowOverheadLoop::ValidateTailPredicate() {
+ if (!IsTailPredicationLegal()) {
+ LLVM_DEBUG(if (VCTPs.empty())
+ dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
+ dbgs() << "ARM Loops: Tail-predication is not valid.\n");
+ return false;
}
- if (!ValidateLiveOuts())
+ assert(!VCTPs.empty() && "VCTP instruction expected but is not set");
+ assert(ML.getBlocks().size() == 1 &&
+ "Shouldn't be processing a loop with more than one block");
+
+ if (DisableTailPredication) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n");
return false;
+ }
- // For tail predication, we need to provide the number of elements, instead
- // of the iteration count, to the loop start instruction. The number of
- // elements is provided to the vctp instruction, so we need to check that
- // we can use this register at InsertPt.
- Register NumElements = VCTP->getOperand(1).getReg();
+ if (!VPTState::isValid(RDA)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n");
+ return false;
+ }
- // If the register is defined within loop, then we can't perform TP.
- // TODO: Check whether this is just a mov of a register that would be
- // available.
- if (RDA.hasLocalDefBefore(VCTP, NumElements)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");
+ if (!ValidateLiveOuts()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n");
return false;
}
- // The element count register maybe defined after InsertPt, in which case we
- // need to try to move either InsertPt or the def so that the [w|d]lstp can
- // use the value.
- // TODO: On failing to move an instruction, check if the count is provided by
- // a mov and whether we can use the mov operand directly.
- MachineBasicBlock *InsertBB = StartInsertPt->getParent();
- if (!RDA.isReachingDefLiveOut(StartInsertPt, NumElements)) {
- if (auto *ElemDef = RDA.getLocalLiveOutMIDef(InsertBB, NumElements)) {
- if (RDA.isSafeToMoveForwards(ElemDef, StartInsertPt)) {
- ElemDef->removeFromParent();
- InsertBB->insert(MachineBasicBlock::iterator(StartInsertPt), ElemDef);
- LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: "
- << *ElemDef);
- } else if (RDA.isSafeToMoveBackwards(StartInsertPt, ElemDef)) {
- StartInsertPt->removeFromParent();
- InsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
- StartInsertPt);
- LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
- } else {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unable to move element count to loop "
- << "start instruction.\n");
- return false;
+ // Check that creating a [W|D]LSTP, which will define LR with an element
+ // count instead of iteration count, won't affect any other instructions
+ // than the LoopStart and LoopDec.
+ // TODO: We should try to insert the [W|D]LSTP after any of the other uses.
+ Register StartReg = isDo(Start) ? Start->getOperand(1).getReg()
+ : Start->getOperand(0).getReg();
+ if (StartInsertPt == Start && StartReg == ARM::LR) {
+ if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) {
+ SmallPtrSet<MachineInstr *, 2> Uses;
+ RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses);
+ for (auto *Use : Uses) {
+ if (Use != Start && Use != Dec) {
+ LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use);
+ return false;
+ }
}
}
}
- // Especially in the case of while loops, InsertBB may not be the
- // preheader, so we need to check that the register isn't redefined
- // before entering the loop.
- auto CannotProvideElements = [this](MachineBasicBlock *MBB,
- Register NumElements) {
- // NumElements is redefined in this block.
- if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
- return true;
+ // For tail predication, we need to provide the number of elements, instead
+ // of the iteration count, to the loop start instruction. The number of
+ // elements is provided to the vctp instruction, so we need to check that
+ // we can use this register at InsertPt.
+ MachineInstr *VCTP = VCTPs.back();
+ if (Start->getOpcode() == ARM::t2DoLoopStartTP) {
+ TPNumElements = Start->getOperand(2);
+ StartInsertPt = Start;
+ StartInsertBB = Start->getParent();
+ } else {
+ TPNumElements = VCTP->getOperand(1);
+ MCRegister NumElements = TPNumElements.getReg().asMCReg();
+
+ // If the register is defined within loop, then we can't perform TP.
+ // TODO: Check whether this is just a mov of a register that would be
+ // available.
+ if (RDA.hasLocalDefBefore(VCTP, NumElements)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");
+ return false;
+ }
- // Don't continue searching up through multiple predecessors.
- if (MBB->pred_size() > 1)
- return true;
+ // The element count register maybe defined after InsertPt, in which case we
+ // need to try to move either InsertPt or the def so that the [w|d]lstp can
+ // use the value.
+
+ if (StartInsertPt != StartInsertBB->end() &&
+ !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) {
+ if (auto *ElemDef =
+ RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) {
+ if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) {
+ ElemDef->removeFromParent();
+ StartInsertBB->insert(StartInsertPt, ElemDef);
+ LLVM_DEBUG(dbgs()
+ << "ARM Loops: Moved element count def: " << *ElemDef);
+ } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) {
+ StartInsertPt->removeFromParent();
+ StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
+ &*StartInsertPt);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
+ } else {
+ // If we fail to move an instruction and the element count is provided
+ // by a mov, use the mov operand if it will have the same value at the
+ // insertion point
+ MachineOperand Operand = ElemDef->getOperand(1);
+ if (isMovRegOpcode(ElemDef->getOpcode()) &&
+ RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg().asMCReg()) ==
+ RDA.getUniqueReachingMIDef(&*StartInsertPt,
+ Operand.getReg().asMCReg())) {
+ TPNumElements = Operand;
+ NumElements = TPNumElements.getReg();
+ } else {
+ LLVM_DEBUG(dbgs()
+ << "ARM Loops: Unable to move element count to loop "
+ << "start instruction.\n");
+ return false;
+ }
+ }
+ }
+ }
- return false;
- };
+ // Especially in the case of while loops, InsertBB may not be the
+ // preheader, so we need to check that the register isn't redefined
+ // before entering the loop.
+ auto CannotProvideElements = [this](MachineBasicBlock *MBB,
+ MCRegister NumElements) {
+ if (MBB->empty())
+ return false;
+ // NumElements is redefined in this block.
+ if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
+ return true;
- // First, find the block that looks like the preheader.
- MachineBasicBlock *MBB = Preheader;
- if (!MBB) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find preheader.\n");
- return false;
- }
+ // Don't continue searching up through multiple predecessors.
+ if (MBB->pred_size() > 1)
+ return true;
- // Then search backwards for a def, until we get to InsertBB.
- while (MBB != InsertBB) {
- if (CannotProvideElements(MBB, NumElements)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
return false;
+ };
+
+ // Search backwards for a def, until we get to InsertBB.
+ MachineBasicBlock *MBB = Preheader;
+ while (MBB && MBB != StartInsertBB) {
+ if (CannotProvideElements(MBB, NumElements)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
+ return false;
+ }
+ MBB = *MBB->pred_begin();
}
- MBB = *MBB->pred_begin();
+ }
+
+ // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect
+ // world the [w|d]lstp instruction would be last instruction in the preheader
+ // and so it would only affect instructions within the loop body. But due to
+ // scheduling, and/or the logic in this pass (above), the insertion point can
+ // be moved earlier. So if the Loop Start isn't the last instruction in the
+ // preheader, and if the initial element count is smaller than the vector
+ // width, the Loop Start instruction will immediately generate one or more
+ // false lane mask which can, incorrectly, affect the proceeding MVE
+ // instructions in the preheader.
+ if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n");
+ return false;
}
// Check that the value change of the element count is what we expect and
@@ -529,15 +760,20 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
return -getAddSubImmediate(*MI) == ExpectedVecWidth;
};
- MBB = VCTP->getParent();
- if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), NumElements)) {
+ MachineBasicBlock *MBB = VCTP->getParent();
+ // Remove modifications to the element count since they have no purpose in a
+ // tail predicated loop. Explicitly refer to the vctp operand no matter which
+ // register NumElements has been assigned to, since that is what the
+ // modifications will be using
+ if (auto *Def = RDA.getUniqueReachingMIDef(
+ &MBB->back(), VCTP->getOperand(1).getReg().asMCReg())) {
SmallPtrSet<MachineInstr*, 2> ElementChain;
- SmallPtrSet<MachineInstr*, 2> Ignore = { VCTP };
+ SmallPtrSet<MachineInstr*, 2> Ignore;
unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode());
- Ignore.insert(SecondaryVCTPs.begin(), SecondaryVCTPs.end());
+ Ignore.insert(VCTPs.begin(), VCTPs.end());
- if (RDA.isSafeToRemove(Def, ElementChain, Ignore)) {
+ if (TryRemove(Def, RDA, ElementChain, Ignore)) {
bool FoundSub = false;
for (auto *MI : ElementChain) {
@@ -545,27 +781,24 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
continue;
if (isSubImmOpcode(MI->getOpcode())) {
- if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth))
+ if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
+ " count: " << *MI);
return false;
+ }
FoundSub = true;
- } else
+ } else {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
+ " count: " << *MI);
return false;
+ }
}
-
- LLVM_DEBUG(dbgs() << "ARM Loops: Will remove element count chain:\n";
- for (auto *MI : ElementChain)
- dbgs() << " - " << *MI);
ToRemove.insert(ElementChain.begin(), ElementChain.end());
}
}
return true;
}
-static bool isVectorPredicated(MachineInstr *MI) {
- int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
- return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
-}
-
static bool isRegInClass(const MachineOperand &MO,
const TargetRegisterClass *Class) {
return MO.isReg() && MO.getReg() && Class->contains(MO.getReg());
@@ -623,7 +856,6 @@ static bool canGenerateNonZeros(const MachineInstr &MI) {
return false;
}
-
// Look at its register uses to see if it only can only receive zeros
// into its false lanes which would then produce zeros. Also check that
// the output register is also defined by an FalseLanesZero instruction
@@ -636,120 +868,40 @@ static bool producesFalseLanesZero(MachineInstr &MI,
if (canGenerateNonZeros(MI))
return false;
+ bool isPredicated = isVectorPredicated(&MI);
+ // Predicated loads will write zeros to the falsely predicated bytes of the
+ // destination register.
+ if (MI.mayLoad())
+ return isPredicated;
+
+ auto IsZeroInit = [](MachineInstr *Def) {
+ return !isVectorPredicated(Def) &&
+ Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
+ Def->getOperand(1).getImm() == 0;
+ };
+
bool AllowScalars = isHorizontalReduction(MI);
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg())
continue;
if (!isRegInClass(MO, QPRs) && AllowScalars)
continue;
- if (auto *OpDef = RDA.getMIOperand(&MI, MO))
- if (FalseLanesZero.count(OpDef))
- continue;
- return false;
- }
- LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);
- return true;
-}
-
-bool
-LowOverheadLoop::FindValidReduction(InstSet &LiveMIs, InstSet &LiveOutUsers) {
- // Also check for reductions where the operation needs to be merging values
- // from the last and previous loop iterations. This means an instruction
- // producing a value and a vmov storing the value calculated in the previous
- // iteration. So we can have two live-out regs, one produced by a vmov and
- // both being consumed by a vpsel.
- LLVM_DEBUG(dbgs() << "ARM Loops: Looking for reduction live-outs:\n";
- for (auto *MI : LiveMIs)
- dbgs() << " - " << *MI);
-
- if (!Preheader)
- return false;
-
- // Expect a vmov, a vadd and a single vpsel user.
- // TODO: This means we can't currently support multiple reductions in the
- // loop.
- if (LiveMIs.size() != 2 || LiveOutUsers.size() != 1)
- return false;
-
- MachineInstr *VPSEL = *LiveOutUsers.begin();
- if (VPSEL->getOpcode() != ARM::MVE_VPSEL)
- return false;
-
- unsigned VPRIdx = llvm::findFirstVPTPredOperandIdx(*VPSEL) + 1;
- MachineInstr *Pred = RDA.getMIOperand(VPSEL, VPRIdx);
- if (!Pred || Pred != VCTP) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Not using equivalent predicate.\n");
- return false;
- }
- MachineInstr *Reduce = RDA.getMIOperand(VPSEL, 1);
- if (!Reduce)
- return false;
-
- assert(LiveMIs.count(Reduce) && "Expected MI to be live-out");
-
- // TODO: Support more operations than VADD.
- switch (VCTP->getOpcode()) {
- default:
- return false;
- case ARM::MVE_VCTP8:
- if (Reduce->getOpcode() != ARM::MVE_VADDi8)
- return false;
- break;
- case ARM::MVE_VCTP16:
- if (Reduce->getOpcode() != ARM::MVE_VADDi16)
- return false;
- break;
- case ARM::MVE_VCTP32:
- if (Reduce->getOpcode() != ARM::MVE_VADDi32)
+ // Check that this instruction will produce zeros in its false lanes:
+ // - If it only consumes false lanes zero or constant 0 (vmov #0)
+ // - If it's predicated, it only matters that it's def register already has
+ // false lane zeros, so we can ignore the uses.
+ SmallPtrSet<MachineInstr *, 2> Defs;
+ RDA.getGlobalReachingDefs(&MI, MO.getReg(), Defs);
+ for (auto *Def : Defs) {
+ if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
+ continue;
+ if (MO.isUse() && isPredicated)
+ continue;
return false;
- break;
- }
-
- // Test that the reduce op is overwriting ones of its operands.
- if (Reduce->getOperand(0).getReg() != Reduce->getOperand(1).getReg() &&
- Reduce->getOperand(0).getReg() != Reduce->getOperand(2).getReg()) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Reducing op isn't overwriting itself.\n");
- return false;
- }
-
- // Check that the VORR is actually a VMOV.
- MachineInstr *Copy = RDA.getMIOperand(VPSEL, 2);
- if (!Copy || Copy->getOpcode() != ARM::MVE_VORR ||
- !Copy->getOperand(1).isReg() || !Copy->getOperand(2).isReg() ||
- Copy->getOperand(1).getReg() != Copy->getOperand(2).getReg())
- return false;
-
- assert(LiveMIs.count(Copy) && "Expected MI to be live-out");
-
- // Check that the vadd and vmov are only used by each other and the vpsel.
- SmallPtrSet<MachineInstr*, 2> CopyUsers;
- RDA.getGlobalUses(Copy, Copy->getOperand(0).getReg(), CopyUsers);
- if (CopyUsers.size() > 2 || !CopyUsers.count(Reduce)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Copy users unsupported.\n");
- return false;
- }
-
- SmallPtrSet<MachineInstr*, 2> ReduceUsers;
- RDA.getGlobalUses(Reduce, Reduce->getOperand(0).getReg(), ReduceUsers);
- if (ReduceUsers.size() > 2 || !ReduceUsers.count(Copy)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Reduce users unsupported.\n");
- return false;
+ }
}
-
- // Then find whether there's an instruction initialising the register that
- // is storing the reduction.
- SmallPtrSet<MachineInstr*, 2> Incoming;
- RDA.getLiveOuts(Preheader, Copy->getOperand(1).getReg(), Incoming);
- if (Incoming.size() > 1)
- return false;
-
- MachineInstr *Init = Incoming.empty() ? nullptr : *Incoming.begin();
- LLVM_DEBUG(dbgs() << "ARM Loops: Found a reduction:\n"
- << " - " << *Copy
- << " - " << *Reduce
- << " - " << *VPSEL);
- Reductions.push_back(std::make_unique<Reduction>(Init, Copy, Reduce, VPSEL));
+ LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);
return true;
}
@@ -769,7 +921,7 @@ bool LowOverheadLoop::ValidateLiveOuts() {
// the false lanes are zeroed and here we're trying to track that those false
// lanes remain zero, or where they change, the differences are masked away
// by their user(s).
- // All MVE loads and stores have to be predicated, so we know that any load
+ // All MVE stores have to be predicated, so we know that any predicate load
// operands, or stored results are equivalent already. Other explicitly
// predicated instructions will perform the same operation in the original
// loop and the tail-predicated form too. Because of this, we can insert
@@ -782,42 +934,32 @@ bool LowOverheadLoop::ValidateLiveOuts() {
MachineBasicBlock *Header = ML.getHeader();
for (auto &MI : *Header) {
- const MCInstrDesc &MCID = MI.getDesc();
- uint64_t Flags = MCID.TSFlags;
- if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
+ if (!shouldInspect(MI))
continue;
if (isVCTP(&MI) || isVPTOpcode(MI.getOpcode()))
continue;
- // Predicated loads will write zeros to the falsely predicated bytes of the
- // destination register.
- if (isVectorPredicated(&MI)) {
- if (MI.mayLoad())
- FalseLanesZero.insert(&MI);
- Predicated.insert(&MI);
- continue;
- }
+ bool isPredicated = isVectorPredicated(&MI);
+ bool retainsOrReduces =
+ retainsPreviousHalfElement(MI) || isHorizontalReduction(MI);
- if (MI.getNumDefs() == 0)
+ if (isPredicated)
+ Predicated.insert(&MI);
+ if (producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero))
+ FalseLanesZero.insert(&MI);
+ else if (MI.getNumDefs() == 0)
continue;
-
- if (!producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero)) {
- // We require retaining and horizontal operations to operate upon zero'd
- // false lanes to ensure the conversion doesn't change the output.
- if (retainsPreviousHalfElement(MI) || isHorizontalReduction(MI))
- return false;
- // Otherwise we need to evaluate this instruction later to see whether
- // unknown false lanes will get masked away by their user(s).
+ else if (!isPredicated && retainsOrReduces)
+ return false;
+ else if (!isPredicated)
FalseLanesUnknown.insert(&MI);
- } else if (!isHorizontalReduction(MI))
- FalseLanesZero.insert(&MI);
}
auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
SmallPtrSetImpl<MachineInstr *> &Predicated) {
SmallPtrSet<MachineInstr *, 2> Uses;
- RDA.getGlobalUses(MI, MO.getReg(), Uses);
+ RDA.getGlobalUses(MI, MO.getReg().asMCReg(), Uses);
for (auto *Use : Uses) {
if (Use != MI && !Predicated.count(Use))
return false;
@@ -840,139 +982,155 @@ bool LowOverheadLoop::ValidateLiveOuts() {
LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : "
<< TRI.getRegAsmName(MO.getReg()) << " at " << *MI);
NonPredicated.insert(MI);
- continue;
+ break;
}
}
// Any unknown false lanes have been masked away by the user(s).
- Predicated.insert(MI);
+ if (!NonPredicated.contains(MI))
+ Predicated.insert(MI);
}
SmallPtrSet<MachineInstr *, 2> LiveOutMIs;
- SmallPtrSet<MachineInstr*, 2> LiveOutUsers;
SmallVector<MachineBasicBlock *, 2> ExitBlocks;
ML.getExitBlocks(ExitBlocks);
assert(ML.getNumBlocks() == 1 && "Expected single block loop!");
assert(ExitBlocks.size() == 1 && "Expected a single exit block");
MachineBasicBlock *ExitBB = ExitBlocks.front();
for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) {
+ // TODO: Instead of blocking predication, we could move the vctp to the exit
+ // block and calculate it's operand there in or the preheader.
+ if (RegMask.PhysReg == ARM::VPR)
+ return false;
// Check Q-regs that are live in the exit blocks. We don't collect scalars
// because they won't be affected by lane predication.
- if (QPRs->contains(RegMask.PhysReg)) {
+ if (QPRs->contains(RegMask.PhysReg))
if (auto *MI = RDA.getLocalLiveOutMIDef(Header, RegMask.PhysReg))
LiveOutMIs.insert(MI);
- RDA.getLiveInUses(ExitBB, RegMask.PhysReg, LiveOutUsers);
- }
}
- // If we have any non-predicated live-outs, they need to be part of a
- // reduction that we can fixup later. The reduction that the form of an
- // operation that uses its previous values through a vmov and then a vpsel
- // resides in the exit blocks to select the final bytes from n and n-1
- // iterations.
- if (!NonPredicated.empty() &&
- !FindValidReduction(NonPredicated, LiveOutUsers))
- return false;
-
// We've already validated that any VPT predication within the loop will be
// equivalent when we perform the predication transformation; so we know that
// any VPT predicated instruction is predicated upon VCTP. Any live-out
// instruction needs to be predicated, so check this here. The instructions
// in NonPredicated have been found to be a reduction that we can ensure its
// legality.
- for (auto *MI : LiveOutMIs)
- if (!isVectorPredicated(MI) && !NonPredicated.count(MI))
+ for (auto *MI : LiveOutMIs) {
+ if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI);
return false;
+ }
+ }
return true;
}
-void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils) {
+void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
if (Revert)
return;
- if (!End->getOperand(1).isMBB())
- report_fatal_error("Expected LoopEnd to target basic block");
+ // Check branch target ranges: WLS[TP] can only branch forwards and LE[TP]
+ // can only jump back.
+ auto ValidateRanges = [](MachineInstr *Start, MachineInstr *End,
+ ARMBasicBlockUtils *BBUtils, MachineLoop &ML) {
+ MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd
+ ? End->getOperand(1).getMBB()
+ : End->getOperand(2).getMBB();
+ // TODO Maybe there's cases where the target doesn't have to be the header,
+ // but for now be safe and revert.
+ if (TgtBB != ML.getHeader()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n");
+ return false;
+ }
- // TODO Maybe there's cases where the target doesn't have to be the header,
- // but for now be safe and revert.
- if (End->getOperand(1).getMBB() != ML.getHeader()) {
- LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
- Revert = true;
- return;
- }
+ // The WLS and LE instructions have 12-bits for the label offset. WLS
+ // requires a positive offset, while LE uses negative.
+ if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
+ !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
+ return false;
+ }
- // The WLS and LE instructions have 12-bits for the label offset. WLS
- // requires a positive offset, while LE uses negative.
- if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
- !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
- Revert = true;
- return;
- }
+ if (Start->getOpcode() == ARM::t2WhileLoopStart &&
+ (BBUtils->getOffsetOf(Start) >
+ BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
+ !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
+ return false;
+ }
+ return true;
+ };
- if (Start->getOpcode() == ARM::t2WhileLoopStart &&
- (BBUtils->getOffsetOf(Start) >
- BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
- !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
- LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
- Revert = true;
- return;
- }
+ // Find a suitable position to insert the loop start instruction. It needs to
+ // be able to safely define LR.
+ auto FindStartInsertionPoint = [](MachineInstr *Start, MachineInstr *Dec,
+ MachineBasicBlock::iterator &InsertPt,
+ MachineBasicBlock *&InsertBB,
+ ReachingDefAnalysis &RDA,
+ InstSet &ToRemove) {
+ // For a t2DoLoopStart it is always valid to use the start insertion point.
+ // For WLS we can define LR if LR already contains the same value.
+ if (isDo(Start) || Start->getOperand(0).getReg() == ARM::LR) {
+ InsertPt = MachineBasicBlock::iterator(Start);
+ InsertBB = Start->getParent();
+ return true;
+ }
+
+ // We've found no suitable LR def and Start doesn't use LR directly. Can we
+ // just define LR anyway?
+ if (!RDA.isSafeToDefRegAt(Start, MCRegister::from(ARM::LR)))
+ return false;
- InsertPt = Revert ? nullptr : isSafeToDefineLR();
- if (!InsertPt) {
+ InsertPt = MachineBasicBlock::iterator(Start);
+ InsertBB = Start->getParent();
+ return true;
+ };
+
+ if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA,
+ ToRemove)) {
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
Revert = true;
return;
- } else
- LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt);
+ }
+ LLVM_DEBUG(if (StartInsertPt == StartInsertBB->end())
+ dbgs() << "ARM Loops: Will insert LoopStart at end of block\n";
+ else
+ dbgs() << "ARM Loops: Will insert LoopStart at "
+ << *StartInsertPt
+ );
- if (!IsTailPredicationLegal()) {
- LLVM_DEBUG(if (!VCTP)
- dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
- dbgs() << "ARM Loops: Tail-predication is not valid.\n");
- return;
+ Revert = !ValidateRanges(Start, End, BBUtils, ML);
+ CannotTailPredicate = !ValidateTailPredicate();
+}
+
+bool LowOverheadLoop::AddVCTP(MachineInstr *MI) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Adding VCTP: " << *MI);
+ if (VCTPs.empty()) {
+ VCTPs.push_back(MI);
+ return true;
}
- assert(ML.getBlocks().size() == 1 &&
- "Shouldn't be processing a loop with more than one block");
- CannotTailPredicate = !ValidateTailPredicate(InsertPt);
- LLVM_DEBUG(if (CannotTailPredicate)
- dbgs() << "ARM Loops: Couldn't validate tail predicate.\n");
+ // If we find another VCTP, check whether it uses the same value as the main VCTP.
+ // If it does, store it in the VCTPs set, else refuse it.
+ MachineInstr *Prev = VCTPs.back();
+ if (!Prev->getOperand(1).isIdenticalTo(MI->getOperand(1)) ||
+ !RDA.hasSameReachingDef(Prev, MI, MI->getOperand(1).getReg().asMCReg())) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "
+ "definition from the main VCTP");
+ return false;
+ }
+ VCTPs.push_back(MI);
+ return true;
}
bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
if (CannotTailPredicate)
return false;
- if (isVCTP(MI)) {
- // If we find another VCTP, check whether it uses the same value as the main VCTP.
- // If it does, store it in the SecondaryVCTPs set, else refuse it.
- if (VCTP) {
- if (!VCTP->getOperand(1).isIdenticalTo(MI->getOperand(1)) ||
- !RDA.hasSameReachingDef(VCTP, MI, MI->getOperand(1).getReg())) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "
- "definition from the main VCTP");
- return false;
- }
- LLVM_DEBUG(dbgs() << "ARM Loops: Found secondary VCTP: " << *MI);
- SecondaryVCTPs.insert(MI);
- } else {
- LLVM_DEBUG(dbgs() << "ARM Loops: Found 'main' VCTP: " << *MI);
- VCTP = MI;
- }
- } else if (isVPTOpcode(MI->getOpcode())) {
- if (MI->getOpcode() != ARM::MVE_VPST) {
- assert(MI->findRegisterDefOperandIdx(ARM::VPR) != -1 &&
- "VPT does not implicitly define VPR?!");
- CurrentPredicate.insert(MI);
- }
-
- VPTBlocks.emplace_back(MI, CurrentPredicate);
- CurrentBlock = &VPTBlocks.back();
+ if (!shouldInspect(*MI))
return true;
- } else if (MI->getOpcode() == ARM::MVE_VPSEL ||
- MI->getOpcode() == ARM::MVE_VPNOT) {
+
+ if (MI->getOpcode() == ARM::MVE_VPSEL ||
+ MI->getOpcode() == ARM::MVE_VPNOT) {
// TODO: Allow VPSEL and VPNOT, we currently cannot because:
// 1) It will use the VPR as a predicate operand, but doesn't have to be
// instead a VPT block, which means we can assert while building up
@@ -984,49 +1142,62 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
return false;
}
- bool IsUse = false;
- bool IsDef = false;
+ // Record all VCTPs and check that they're equivalent to one another.
+ if (isVCTP(MI) && !AddVCTP(MI))
+ return false;
+
+ // Inspect uses first so that any instructions that alter the VPR don't
+ // alter the predicate upon themselves.
const MCInstrDesc &MCID = MI->getDesc();
- for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || MO.getReg() != ARM::VPR)
+ bool IsUse = false;
+ unsigned LastOpIdx = MI->getNumOperands() - 1;
+ for (auto &Op : enumerate(reverse(MCID.operands()))) {
+ const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index());
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != ARM::VPR)
continue;
- if (MO.isDef()) {
- CurrentPredicate.insert(MI);
- IsDef = true;
- } else if (ARM::isVpred(MCID.OpInfo[i].OperandType)) {
- CurrentBlock->addInst(MI, CurrentPredicate);
+ if (ARM::isVpred(Op.value().OperandType)) {
+ VPTState::addInst(MI);
IsUse = true;
- } else {
+ } else if (MI->getOpcode() != ARM::MVE_VPST) {
LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI);
return false;
}
}
- // If we find a vpr def that is not already predicated on the vctp, we've
- // got disjoint predicates that may not be equivalent when we do the
- // conversion.
- if (IsDef && !IsUse && VCTP && !isVCTP(MI)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Found disjoint vpr def: " << *MI);
- return false;
- }
-
- uint64_t Flags = MCID.TSFlags;
- if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
- return true;
-
// If we find an instruction that has been marked as not valid for tail
// predication, only allow the instruction if it's contained within a valid
// VPT block.
- if ((Flags & ARMII::ValidForTailPredication) == 0 && !IsUse) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
- return false;
+ bool RequiresExplicitPredication =
+ (MCID.TSFlags & ARMII::ValidForTailPredication) == 0;
+ if (isDomainMVE(MI) && RequiresExplicitPredication) {
+ LLVM_DEBUG(if (!IsUse)
+ dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
+ return IsUse;
}
// If the instruction is already explicitly predicated, then the conversion
- // will be fine, but ensure that all memory operations are predicated.
- return !IsUse && MI->mayLoadOrStore() ? false : true;
+ // will be fine, but ensure that all store operations are predicated.
+ if (MI->mayStore())
+ return IsUse;
+
+ // If this instruction defines the VPR, update the predicate for the
+ // proceeding instructions.
+ if (isVectorPredicate(MI)) {
+ // Clear the existing predicate when we're not in VPT Active state,
+ // otherwise we add to it.
+ if (!isVectorPredicated(MI))
+ VPTState::resetPredicate(MI);
+ else
+ VPTState::addPredicate(MI);
+ }
+
+ // Finally once the predicate has been modified, we can start a new VPT
+ // block if necessary.
+ if (isVPTOpcode(MI->getOpcode()))
+ VPTState::CreateVPTBlock(MI);
+
+ return true;
}
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
@@ -1049,7 +1220,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
bool Changed = false;
for (auto ML : *MLI) {
- if (!ML->getParentLoop())
+ if (ML->isOutermost())
Changed |= ProcessLoop(ML);
}
Changed |= RevertNonLoops();
@@ -1108,6 +1279,8 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
LoLoop.Dec = &MI;
else if (MI.getOpcode() == ARM::t2LoopEnd)
LoLoop.End = &MI;
+ else if (MI.getOpcode() == ARM::t2LoopEndDec)
+ LoLoop.End = LoLoop.Dec = &MI;
else if (isLoopStart(MI))
LoLoop.Start = &MI;
else if (MI.getDesc().isCall()) {
@@ -1130,15 +1303,18 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
return false;
}
- // Check that the only instruction using LoopDec is LoopEnd.
+ // Check that the only instruction using LoopDec is LoopEnd. This can only
+ // happen when the Dec and End are separate, not a single t2LoopEndDec.
// TODO: Check for copy chains that really have no effect.
- SmallPtrSet<MachineInstr*, 2> Uses;
- RDA->getReachingLocalUses(LoLoop.Dec, ARM::LR, Uses);
- if (Uses.size() > 1 || !Uses.count(LoLoop.End)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");
- LoLoop.Revert = true;
+ if (LoLoop.Dec != LoLoop.End) {
+ SmallPtrSet<MachineInstr *, 2> Uses;
+ RDA->getReachingLocalUses(LoLoop.Dec, MCRegister::from(ARM::LR), Uses);
+ if (Uses.size() > 1 || !Uses.count(LoLoop.End)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");
+ LoLoop.Revert = true;
+ }
}
- LoLoop.CheckLegality(BBUtils.get());
+ LoLoop.Validate(BBUtils.get());
Expand(LoLoop);
return true;
}
@@ -1149,23 +1325,16 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
// another low register.
void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
- MachineBasicBlock *MBB = MI->getParent();
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(ARM::t2CMPri));
- MIB.add(MI->getOperand(0));
- MIB.addImm(0);
- MIB.addImm(ARMCC::AL);
- MIB.addReg(ARM::NoRegister);
-
MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
ARM::tBcc : ARM::t2Bcc;
- MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
- MIB.add(MI->getOperand(1)); // branch target
- MIB.addImm(ARMCC::EQ); // condition code
- MIB.addReg(ARM::CPSR);
- MI->eraseFromParent();
+ RevertWhileLoopStart(MI, TII, BrOpc);
+}
+
+void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
+ RevertDoLoopStart(MI, TII);
}
bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
@@ -1180,23 +1349,10 @@ bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
}
// If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
- bool SetFlags = RDA->isSafeToDefRegAt(MI, ARM::CPSR, Ignore);
-
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(ARM::t2SUBri));
- MIB.addDef(ARM::LR);
- MIB.add(MI->getOperand(1));
- MIB.add(MI->getOperand(2));
- MIB.addImm(ARMCC::AL);
- MIB.addReg(0);
-
- if (SetFlags) {
- MIB.addReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
- } else
- MIB.addReg(0);
+ bool SetFlags =
+ RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore);
- MI->eraseFromParent();
+ llvm::RevertLoopDec(MI, TII, SetFlags);
return SetFlags;
}
@@ -1204,27 +1360,39 @@ bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
- MachineBasicBlock *MBB = MI->getParent();
- // Create cmp
- if (!SkipCmp) {
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(ARM::t2CMPri));
- MIB.addReg(ARM::LR);
- MIB.addImm(0);
- MIB.addImm(ARMCC::AL);
- MIB.addReg(ARM::NoRegister);
- }
-
MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
ARM::tBcc : ARM::t2Bcc;
- // Create bne
+ llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp);
+}
+
+// Generate a subs, or sub and cmp, and a branch instead of an LE.
+void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI);
+ assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!");
+ MachineBasicBlock *MBB = MI->getParent();
+
MachineInstrBuilder MIB =
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
- MIB.add(MI->getOperand(1)); // branch target
- MIB.addImm(ARMCC::NE); // condition code
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
+ MIB.addDef(ARM::LR);
+ MIB.add(MI->getOperand(1));
+ MIB.addImm(1);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::NoRegister);
+ MIB.addReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+
+ MachineBasicBlock *DestBB = MI->getOperand(2).getMBB();
+ unsigned BrOpc =
+ BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
+
+ // Create bne
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+ MIB.add(MI->getOperand(2)); // branch target
+ MIB.addImm(ARMCC::NE); // condition code
MIB.addReg(ARM::CPSR);
+
MI->eraseFromParent();
}
@@ -1235,7 +1403,7 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
//
// $lr = big-itercount-expression
// ..
-// t2DoLoopStart renamable $lr
+// $lr = t2DoLoopStart renamable $lr
// vector.body:
// ..
// $vpr = MVE_VCTP32 renamable $r3
@@ -1258,7 +1426,8 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");
- MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 0);
+ MachineInstr *Def =
+ RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0);
if (!Def) {
LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");
return;
@@ -1266,56 +1435,9 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
// Collect and remove the users of iteration count.
SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,
- LoLoop.End, LoLoop.InsertPt };
- SmallPtrSet<MachineInstr*, 2> Remove;
- if (RDA->isSafeToRemove(Def, Remove, Killed))
- LoLoop.ToRemove.insert(Remove.begin(), Remove.end());
- else {
+ LoLoop.End };
+ if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed))
LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");
- return;
- }
-
- // Collect the dead code and the MBBs in which they reside.
- RDA->collectKilledOperands(Def, Killed);
- SmallPtrSet<MachineBasicBlock*, 2> BasicBlocks;
- for (auto *MI : Killed)
- BasicBlocks.insert(MI->getParent());
-
- // Collect IT blocks in all affected basic blocks.
- std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
- for (auto *MBB : BasicBlocks) {
- for (auto &MI : *MBB) {
- if (MI.getOpcode() != ARM::t2IT)
- continue;
- RDA->getReachingLocalUses(&MI, ARM::ITSTATE, ITBlocks[&MI]);
- }
- }
-
- // If we're removing all of the instructions within an IT block, then
- // also remove the IT instruction.
- SmallPtrSet<MachineInstr*, 2> ModifiedITs;
- for (auto *MI : Killed) {
- if (MachineOperand *MO = MI->findRegisterUseOperand(ARM::ITSTATE)) {
- MachineInstr *IT = RDA->getMIOperand(MI, *MO);
- auto &CurrentBlock = ITBlocks[IT];
- CurrentBlock.erase(MI);
- if (CurrentBlock.empty())
- ModifiedITs.erase(IT);
- else
- ModifiedITs.insert(IT);
- }
- }
-
- // Delete the killed instructions only if we don't have any IT blocks that
- // need to be modified because we need to fixup the mask.
- // TODO: Handle cases where IT blocks are modified.
- if (ModifiedITs.empty()) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Will remove iteration count:\n";
- for (auto *MI : Killed)
- dbgs() << " - " << *MI);
- LoLoop.ToRemove.insert(Killed.begin(), Killed.end());
- } else
- LLVM_DEBUG(dbgs() << "ARM Loops: Would need to modify IT block(s).\n");
}
MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
@@ -1324,113 +1446,71 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
// calculate the number of loop iterations.
IterationCountDCE(LoLoop);
- MachineInstr *InsertPt = LoLoop.InsertPt;
+ MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt;
MachineInstr *Start = LoLoop.Start;
- MachineBasicBlock *MBB = InsertPt->getParent();
- bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
+ MachineBasicBlock *MBB = LoLoop.StartInsertBB;
unsigned Opc = LoLoop.getStartOpcode();
- MachineOperand &Count = LoLoop.getCount();
+ MachineOperand &Count = LoLoop.getLoopStartOperand();
MachineInstrBuilder MIB =
- BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
+ BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc));
MIB.addDef(ARM::LR);
MIB.add(Count);
- if (!IsDo)
+ if (!isDo(Start))
MIB.add(Start->getOperand(1));
- // If we're inserting at a mov lr, then remove it as it's redundant.
- if (InsertPt != Start)
- LoLoop.ToRemove.insert(InsertPt);
LoLoop.ToRemove.insert(Start);
LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
return &*MIB;
}
-void ARMLowOverheadLoops::FixupReductions(LowOverheadLoop &LoLoop) const {
- LLVM_DEBUG(dbgs() << "ARM Loops: Fixing up reduction(s).\n");
- auto BuildMov = [this](MachineInstr &InsertPt, Register To, Register From) {
- MachineBasicBlock *MBB = InsertPt.getParent();
- MachineInstrBuilder MIB =
- BuildMI(*MBB, &InsertPt, InsertPt.getDebugLoc(), TII->get(ARM::MVE_VORR));
- MIB.addDef(To);
- MIB.addReg(From);
- MIB.addReg(From);
- MIB.addImm(0);
- MIB.addReg(0);
- MIB.addReg(To);
- LLVM_DEBUG(dbgs() << "ARM Loops: Inserted VMOV: " << *MIB);
- };
-
- for (auto &Reduction : LoLoop.Reductions) {
- MachineInstr &Copy = Reduction->Copy;
- MachineInstr &Reduce = Reduction->Reduce;
- Register DestReg = Copy.getOperand(0).getReg();
-
- // Change the initialiser if present
- if (Reduction->Init) {
- MachineInstr *Init = Reduction->Init;
-
- for (unsigned i = 0; i < Init->getNumOperands(); ++i) {
- MachineOperand &MO = Init->getOperand(i);
- if (MO.isReg() && MO.isUse() && MO.isTied() &&
- Init->findTiedOperandIdx(i) == 0)
- Init->getOperand(i).setReg(DestReg);
- }
- Init->getOperand(0).setReg(DestReg);
- LLVM_DEBUG(dbgs() << "ARM Loops: Changed init regs: " << *Init);
- } else
- BuildMov(LoLoop.Preheader->instr_back(), DestReg, Copy.getOperand(1).getReg());
-
- // Change the reducing op to write to the register that is used to copy
- // its value on the next iteration. Also update the tied-def operand.
- Reduce.getOperand(0).setReg(DestReg);
- Reduce.getOperand(5).setReg(DestReg);
- LLVM_DEBUG(dbgs() << "ARM Loops: Changed reduction regs: " << Reduce);
-
- // Instead of a vpsel, just copy the register into the necessary one.
- MachineInstr &VPSEL = Reduction->VPSEL;
- if (VPSEL.getOperand(0).getReg() != DestReg)
- BuildMov(VPSEL, VPSEL.getOperand(0).getReg(), DestReg);
-
- // Remove the unnecessary instructions.
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing:\n"
- << " - " << Copy
- << " - " << VPSEL << "\n");
- Copy.eraseFromParent();
- VPSEL.eraseFromParent();
- }
-}
-
void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
auto RemovePredicate = [](MachineInstr *MI) {
+ if (MI->isDebugInstr())
+ return;
LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI);
- if (int PIdx = llvm::findFirstVPTPredOperandIdx(*MI)) {
- assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
- "Expected Then predicate!");
- MI->getOperand(PIdx).setImm(ARMVCC::None);
- MI->getOperand(PIdx+1).setReg(0);
- } else
- llvm_unreachable("trying to unpredicate a non-predicated instruction");
+ int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
+ assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction");
+ assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
+ "Expected Then predicate!");
+ MI->getOperand(PIdx).setImm(ARMVCC::None);
+ MI->getOperand(PIdx + 1).setReg(0);
};
- // There are a few scenarios which we have to fix up:
- // 1. VPT Blocks with non-uniform predicates:
- // - a. When the divergent instruction is a vctp
- // - b. When the block uses a vpst, and is only predicated on the vctp
- // - c. When the block uses a vpt and (optionally) contains one or more
- // vctp.
- // 2. VPT Blocks with uniform predicates:
- // - a. The block uses a vpst, and is only predicated on the vctp
for (auto &Block : LoLoop.getVPTBlocks()) {
- SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts();
- if (Block.HasNonUniformPredicate()) {
- PredicatedMI *Divergent = Block.getDivergent();
- if (isVCTP(Divergent->MI)) {
- // The vctp will be removed, so the block mask of the vp(s)t will need
- // to be recomputed.
- LoLoop.BlockMasksToRecompute.insert(Block.getPredicateThen());
- } else if (Block.isVPST() && Block.IsOnlyPredicatedOn(LoLoop.VCTP)) {
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+
+ auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) {
+ assert(TheVCMP && "Replacing a removed or non-existent VCMP");
+ // Replace the VCMP with a VPT
+ MachineInstrBuilder MIB =
+ BuildMI(*At->getParent(), At, At->getDebugLoc(),
+ TII->get(VCMPOpcodeToVPT(TheVCMP->getOpcode())));
+ MIB.addImm(ARMVCC::Then);
+ // Register one
+ MIB.add(TheVCMP->getOperand(1));
+ // Register two
+ MIB.add(TheVCMP->getOperand(2));
+ // The comparison code, e.g. ge, eq, lt
+ MIB.add(TheVCMP->getOperand(3));
+ LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB);
+ LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
+ LoLoop.ToRemove.insert(TheVCMP);
+ TheVCMP = nullptr;
+ };
+
+ if (VPTState::isEntryPredicatedOnVCTP(Block, /*exclusive*/ true)) {
+ MachineInstr *VPST = Insts.front();
+ if (VPTState::hasUniformPredicate(Block)) {
+ // A vpt block starting with VPST, is only predicated upon vctp and has no
+ // internal vpr defs:
+ // - Remove vpst.
+ // - Unpredicate the remaining instructions.
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
+ for (unsigned i = 1; i < Insts.size(); ++i)
+ RemovePredicate(Insts[i]);
+ } else {
// The VPT block has a non-uniform predicate but it uses a vpst and its
// entry is guarded only by a vctp, which means we:
// - Need to remove the original vpst.
@@ -1438,73 +1518,88 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
// we come across the divergent vpr def.
// - Insert a new vpst to predicate the instruction(s) that following
// the divergent vpr def.
- // TODO: We could be producing more VPT blocks than necessary and could
- // fold the newly created one into a proceeding one.
- for (auto I = ++MachineBasicBlock::iterator(Block.getPredicateThen()),
- E = ++MachineBasicBlock::iterator(Divergent->MI); I != E; ++I)
+ MachineInstr *Divergent = VPTState::getDivergent(Block);
+ MachineBasicBlock *MBB = Divergent->getParent();
+ auto DivergentNext = ++MachineBasicBlock::iterator(Divergent);
+ while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr())
+ ++DivergentNext;
+
+ bool DivergentNextIsPredicated =
+ DivergentNext != MBB->end() &&
+ getVPTInstrPredicate(*DivergentNext) != ARMVCC::None;
+
+ for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext;
+ I != E; ++I)
RemovePredicate(&*I);
- unsigned Size = 0;
- auto E = MachineBasicBlock::reverse_iterator(Divergent->MI);
- auto I = MachineBasicBlock::reverse_iterator(Insts.back().MI);
- MachineInstr *InsertAt = nullptr;
- while (I != E) {
- InsertAt = &*I;
- ++Size;
- ++I;
+ // Check if the instruction defining vpr is a vcmp so it can be combined
+ // with the VPST This should be the divergent instruction
+ MachineInstr *VCMP =
+ VCMPOpcodeToVPT(Divergent->getOpcode()) != 0 ? Divergent : nullptr;
+
+ if (DivergentNextIsPredicated) {
+ // Insert a VPST at the divergent only if the next instruction
+ // would actually use it. A VCMP following a VPST can be
+ // merged into a VPT so do that instead if the VCMP exists.
+ if (!VCMP) {
+ // Create a VPST (with a null mask for now, we'll recompute it
+ // later)
+ MachineInstrBuilder MIB =
+ BuildMI(*Divergent->getParent(), Divergent,
+ Divergent->getDebugLoc(), TII->get(ARM::MVE_VPST));
+ MIB.addImm(0);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);
+ LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
+ } else {
+ // No RDA checks are necessary here since the VPST would have been
+ // directly after the VCMP
+ ReplaceVCMPWithVPT(VCMP, VCMP);
+ }
}
- // Create a VPST (with a null mask for now, we'll recompute it later).
- MachineInstrBuilder MIB = BuildMI(*InsertAt->getParent(), InsertAt,
- InsertAt->getDebugLoc(),
- TII->get(ARM::MVE_VPST));
- MIB.addImm(0);
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen());
- LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);
- LoLoop.ToRemove.insert(Block.getPredicateThen());
- LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
}
- // Else, if the block uses a vpt, iterate over the block, removing the
- // extra VCTPs it may contain.
- else if (Block.isVPT()) {
- bool RemovedVCTP = false;
- for (PredicatedMI &Elt : Block.getInsts()) {
- MachineInstr *MI = Elt.MI;
- if (isVCTP(MI)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *MI);
- LoLoop.ToRemove.insert(MI);
- RemovedVCTP = true;
- continue;
- }
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
+ LoLoop.ToRemove.insert(VPST);
+ } else if (Block.containsVCTP()) {
+ // The vctp will be removed, so either the entire block will be dead or
+ // the block mask of the vp(s)t will need to be recomputed.
+ MachineInstr *VPST = Insts.front();
+ if (Block.size() == 2) {
+ assert(VPST->getOpcode() == ARM::MVE_VPST &&
+ "Found a VPST in an otherwise empty vpt block");
+ LoLoop.ToRemove.insert(VPST);
+ } else
+ LoLoop.BlockMasksToRecompute.insert(VPST);
+ } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) {
+ // If this block starts with a VPST then attempt to merge it with the
+ // preceeding un-merged VCMP into a VPT. This VCMP comes from a VPT
+ // block that no longer exists
+ MachineInstr *VPST = Insts.front();
+ auto Next = ++MachineBasicBlock::iterator(VPST);
+ assert(getVPTInstrPredicate(*Next) != ARMVCC::None &&
+ "The instruction after a VPST must be predicated");
+ (void)Next;
+ MachineInstr *VprDef = RDA->getUniqueReachingMIDef(VPST, ARM::VPR);
+ if (VprDef && VCMPOpcodeToVPT(VprDef->getOpcode()) &&
+ !LoLoop.ToRemove.contains(VprDef)) {
+ MachineInstr *VCMP = VprDef;
+ // The VCMP and VPST can only be merged if the VCMP's operands will have
+ // the same values at the VPST.
+ // If any of the instructions between the VCMP and VPST are predicated
+ // then a different code path is expected to have merged the VCMP and
+ // VPST already.
+ if (!std::any_of(++MachineBasicBlock::iterator(VCMP),
+ MachineBasicBlock::iterator(VPST), hasVPRUse) &&
+ RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) &&
+ RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) {
+ ReplaceVCMPWithVPT(VCMP, VPST);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
+ LoLoop.ToRemove.insert(VPST);
}
- if (RemovedVCTP)
- LoLoop.BlockMasksToRecompute.insert(Block.getPredicateThen());
}
- } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP) && Block.isVPST()) {
- // A vpt block starting with VPST, is only predicated upon vctp and has no
- // internal vpr defs:
- // - Remove vpst.
- // - Unpredicate the remaining instructions.
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen());
- LoLoop.ToRemove.insert(Block.getPredicateThen());
- for (auto &PredMI : Insts)
- RemovePredicate(PredMI.MI);
- }
- }
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing remaining VCTPs...\n");
- // Remove the "main" VCTP
- LoLoop.ToRemove.insert(LoLoop.VCTP);
- LLVM_DEBUG(dbgs() << " " << *LoLoop.VCTP);
- // Remove remaining secondary VCTPs
- for (MachineInstr *VCTP : LoLoop.SecondaryVCTPs) {
- // All VCTPs that aren't marked for removal yet should be unpredicated ones.
- // The predicated ones should have already been marked for removal when
- // visiting the VPT blocks.
- if (LoLoop.ToRemove.insert(VCTP).second) {
- assert(getVPTInstrPredicate(*VCTP) == ARMVCC::None &&
- "Removing Predicated VCTP without updating the block mask!");
- LLVM_DEBUG(dbgs() << " " << *VCTP);
}
}
+
+ LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
}
void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
@@ -1518,8 +1613,9 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
TII->get(Opc));
MIB.addDef(ARM::LR);
- MIB.add(End->getOperand(0));
- MIB.add(End->getOperand(1));
+ unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
+ MIB.add(End->getOperand(Off + 0));
+ MIB.add(End->getOperand(Off + 1));
LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
LoLoop.ToRemove.insert(LoLoop.Dec);
LoLoop.ToRemove.insert(End);
@@ -1547,18 +1643,18 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStart)
RevertWhile(LoLoop.Start);
else
- LoLoop.Start->eraseFromParent();
- bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec);
- RevertLoopEnd(LoLoop.End, FlagsAlreadySet);
+ RevertDo(LoLoop.Start);
+ if (LoLoop.Dec == LoLoop.End)
+ RevertLoopEndDec(LoLoop.End);
+ else
+ RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec));
} else {
LoLoop.Start = ExpandLoopStart(LoLoop);
RemoveDeadBranch(LoLoop.Start);
LoLoop.End = ExpandLoopEnd(LoLoop);
RemoveDeadBranch(LoLoop.End);
- if (LoLoop.IsTailPredicationLegal()) {
+ if (LoLoop.IsTailPredicationLegal())
ConvertVPTBlocks(LoLoop);
- FixupReductions(LoLoop);
- }
for (auto *I : LoLoop.ToRemove) {
LLVM_DEBUG(dbgs() << "ARM Loops: Erasing " << *I);
I->eraseFromParent();
@@ -1595,6 +1691,7 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
SmallVector<MachineInstr*, 4> Starts;
SmallVector<MachineInstr*, 4> Decs;
SmallVector<MachineInstr*, 4> Ends;
+ SmallVector<MachineInstr *, 4> EndDecs;
for (auto &I : MBB) {
if (isLoopStart(I))
@@ -1603,9 +1700,11 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
Decs.push_back(&I);
else if (I.getOpcode() == ARM::t2LoopEnd)
Ends.push_back(&I);
+ else if (I.getOpcode() == ARM::t2LoopEndDec)
+ EndDecs.push_back(&I);
}
- if (Starts.empty() && Decs.empty() && Ends.empty())
+ if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty())
continue;
Changed = true;
@@ -1614,13 +1713,15 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
if (Start->getOpcode() == ARM::t2WhileLoopStart)
RevertWhile(Start);
else
- Start->eraseFromParent();
+ RevertDo(Start);
}
for (auto *Dec : Decs)
RevertLoopDec(Dec);
for (auto *End : Ends)
RevertLoopEnd(End);
+ for (auto *End : EndDecs)
+ RevertLoopEndDec(End);
}
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index e750649ce86c..9a7c1f541aa2 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsARM.h"
@@ -201,8 +202,7 @@ namespace {
public:
WidenedLoad(SmallVectorImpl<LoadInst*> &Lds, LoadInst *Wide)
: NewLd(Wide) {
- for (auto *I : Lds)
- Loads.push_back(I);
+ append_range(Loads, Lds);
}
LoadInst *getLoad() {
return NewLd;
@@ -374,7 +374,7 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
DepMap RAWDeps;
// Record any writes that may alias a load.
- const auto Size = LocationSize::unknown();
+ const auto Size = LocationSize::beforeOrAfterPointer();
for (auto Write : Writes) {
for (auto Read : Loads) {
MemoryLocation ReadLoc =
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
index 1ae71be9f760..2dc097566d14 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
@@ -77,6 +77,8 @@ def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
+def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
+ AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">,
AssemblerPredicate<(all_of FeatureVFP2_SP), "VFP2">;
@@ -187,6 +189,9 @@ let RecomputePerFunction = 1 in {
def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
" TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
"Subtarget->hasMinSize())">;
+ def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>;
+ def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>;
+
}
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index f9dbfef4c113..1a7f10a13ed3 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -156,10 +156,10 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
"Subclass not added?");
assert(RBGPR.covers(*TRI.getRegClass(ARM::tcGPRRegClassID)) &&
"Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnoip_and_tcGPRRegClassID)) &&
"Subclass not added?");
- assert(RBGPR.covers(
- *TRI.getRegClass(ARM::tGPREven_and_tGPR_and_tcGPRRegClassID)) &&
+ assert(RBGPR.covers(*TRI.getRegClass(
+ ARM::tGPREven_and_GPRnoip_and_tcGPRRegClassID)) &&
"Subclass not added?");
assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) &&
"Subclass not added?");
@@ -182,10 +182,12 @@ ARMRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
switch (RC.getID()) {
case GPRRegClassID:
case GPRwithAPSRRegClassID:
+ case GPRnoipRegClassID:
case GPRnopcRegClassID:
+ case GPRnoip_and_GPRnopcRegClassID:
case rGPRRegClassID:
case GPRspRegClassID:
- case tGPR_and_tcGPRRegClassID:
+ case GPRnoip_and_tcGPRRegClassID:
case tcGPRRegClassID:
case tGPRRegClassID:
case tGPREvenRegClassID:
@@ -193,7 +195,7 @@ ARMRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case tGPR_and_tGPREvenRegClassID:
case tGPR_and_tGPROddRegClassID:
case tGPREven_and_tcGPRRegClassID:
- case tGPREven_and_tGPR_and_tcGPRRegClassID:
+ case tGPREven_and_GPRnoip_and_tcGPRRegClassID:
case tGPROdd_and_tcGPRRegClassID:
return getRegBank(ARM::GPRRegBankID);
case HPRRegClassID:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td
index a384b0dc757c..fe3243315d68 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -235,6 +235,23 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
let DiagnosticString = "operand must be a register in range [r0, r15]";
}
+// Register set that excludes registers that are reserved for procedure calls.
+// This is used for pseudo-instructions that are actually implemented using a
+// procedure call.
+def GPRnoip : RegisterClass<"ARM", [i32], 32, (sub GPR, R12, LR)> {
+ // Allocate LR as the first CSR since it is always saved anyway.
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+ // know how to spill them. If we make our prologue/epilogue code smarter at
+ // some point, we can go back to using the above allocation orders for the
+ // Thumb1 instructions that know how to use hi regs.
+ let AltOrders = [(add GPRnoip, GPRnoip), (trunc GPRnoip, 8),
+ (add (trunc GPRnoip, 8), (shl GPRnoip, 8))];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
+ }];
+ let DiagnosticString = "operand must be a register in range [r0, r14]";
+}
+
// GPRs without the PC. Some ARM instructions do not allow the PC in
// certain operand slots, particularly as the destination. Primarily
// useful for disassembly.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSLSHardening.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSLSHardening.cpp
new file mode 100644
index 000000000000..cfcc7d5a0408
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSLSHardening.cpp
@@ -0,0 +1,416 @@
+//===- ARMSLSHardening.cpp - Harden Straight Line Missspeculation ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass to insert code to mitigate against side channel
+// vulnerabilities that may happen under straight line miss-speculation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/IndirectThunks.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-sls-hardening"
+
+#define ARM_SLS_HARDENING_NAME "ARM sls hardening pass"
+
+namespace {
+
+class ARMSLSHardening : public MachineFunctionPass {
+public:
+ const TargetInstrInfo *TII;
+ const ARMSubtarget *ST;
+
+ static char ID;
+
+ ARMSLSHardening() : MachineFunctionPass(ID) {
+ initializeARMSLSHardeningPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override { return ARM_SLS_HARDENING_NAME; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const;
+ bool hardenIndirectCalls(MachineBasicBlock &MBB) const;
+ MachineBasicBlock &
+ ConvertIndirectCallToIndirectJump(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator) const;
+};
+
+} // end anonymous namespace
+
+char ARMSLSHardening::ID = 0;
+
+INITIALIZE_PASS(ARMSLSHardening, "arm-sls-hardening",
+ ARM_SLS_HARDENING_NAME, false, false)
+
+static void insertSpeculationBarrier(const ARMSubtarget *ST,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL,
+ bool AlwaysUseISBDSB = false) {
+ assert(MBBI != MBB.begin() &&
+ "Must not insert SpeculationBarrierEndBB as only instruction in MBB.");
+ assert(std::prev(MBBI)->isBarrier() &&
+ "SpeculationBarrierEndBB must only follow unconditional control flow "
+ "instructions.");
+ assert(std::prev(MBBI)->isTerminator() &&
+ "SpeculationBarrierEndBB must only follow terminators.");
+ const TargetInstrInfo *TII = ST->getInstrInfo();
+ assert(ST->hasDataBarrier() || ST->hasSB());
+ bool ProduceSB = ST->hasSB() && !AlwaysUseISBDSB;
+ unsigned BarrierOpc =
+ ProduceSB ? (ST->isThumb() ? ARM::t2SpeculationBarrierSBEndBB
+ : ARM::SpeculationBarrierSBEndBB)
+ : (ST->isThumb() ? ARM::t2SpeculationBarrierISBDSBEndBB
+ : ARM::SpeculationBarrierISBDSBEndBB);
+ if (MBBI == MBB.end() || !isSpeculationBarrierEndBBOpcode(MBBI->getOpcode()))
+ BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc));
+}
+
+bool ARMSLSHardening::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget<ARMSubtarget>();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ bool Modified = false;
+ for (auto &MBB : MF) {
+ Modified |= hardenReturnsAndBRs(MBB);
+ Modified |= hardenIndirectCalls(MBB);
+ }
+
+ return Modified;
+}
+
+bool ARMSLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
+ if (!ST->hardenSlsRetBr())
+ return false;
+ assert(!ST->isThumb1Only());
+ bool Modified = false;
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(), E = MBB.end();
+ MachineBasicBlock::iterator NextMBBI;
+ for (; MBBI != E; MBBI = NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ NextMBBI = std::next(MBBI);
+ if (isIndirectControlFlowNotComingBack(MI)) {
+ assert(MI.isTerminator());
+ assert(!TII->isPredicated(MI));
+ insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc());
+ Modified = true;
+ }
+ }
+ return Modified;
+}
+
+static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_";
+
+static const struct ThunkNameRegMode {
+ const char* Name;
+ Register Reg;
+ bool isThumb;
+} SLSBLRThunks[] = {
+ {"__llvm_slsblr_thunk_arm_r0", ARM::R0, false},
+ {"__llvm_slsblr_thunk_arm_r1", ARM::R1, false},
+ {"__llvm_slsblr_thunk_arm_r2", ARM::R2, false},
+ {"__llvm_slsblr_thunk_arm_r3", ARM::R3, false},
+ {"__llvm_slsblr_thunk_arm_r4", ARM::R4, false},
+ {"__llvm_slsblr_thunk_arm_r5", ARM::R5, false},
+ {"__llvm_slsblr_thunk_arm_r6", ARM::R6, false},
+ {"__llvm_slsblr_thunk_arm_r7", ARM::R7, false},
+ {"__llvm_slsblr_thunk_arm_r8", ARM::R8, false},
+ {"__llvm_slsblr_thunk_arm_r9", ARM::R9, false},
+ {"__llvm_slsblr_thunk_arm_r10", ARM::R10, false},
+ {"__llvm_slsblr_thunk_arm_r11", ARM::R11, false},
+ {"__llvm_slsblr_thunk_arm_sp", ARM::SP, false},
+ {"__llvm_slsblr_thunk_arm_pc", ARM::PC, false},
+ {"__llvm_slsblr_thunk_thumb_r0", ARM::R0, true},
+ {"__llvm_slsblr_thunk_thumb_r1", ARM::R1, true},
+ {"__llvm_slsblr_thunk_thumb_r2", ARM::R2, true},
+ {"__llvm_slsblr_thunk_thumb_r3", ARM::R3, true},
+ {"__llvm_slsblr_thunk_thumb_r4", ARM::R4, true},
+ {"__llvm_slsblr_thunk_thumb_r5", ARM::R5, true},
+ {"__llvm_slsblr_thunk_thumb_r6", ARM::R6, true},
+ {"__llvm_slsblr_thunk_thumb_r7", ARM::R7, true},
+ {"__llvm_slsblr_thunk_thumb_r8", ARM::R8, true},
+ {"__llvm_slsblr_thunk_thumb_r9", ARM::R9, true},
+ {"__llvm_slsblr_thunk_thumb_r10", ARM::R10, true},
+ {"__llvm_slsblr_thunk_thumb_r11", ARM::R11, true},
+ {"__llvm_slsblr_thunk_thumb_sp", ARM::SP, true},
+ {"__llvm_slsblr_thunk_thumb_pc", ARM::PC, true},
+};
+
+namespace {
+struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
+ const char *getThunkPrefix() { return SLSBLRNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ // FIXME: This could also check if there are any indirect calls in the
+ // function to more accurately reflect if a thunk will be needed.
+ return MF.getSubtarget<ARMSubtarget>().hardenSlsBlr();
+ }
+ void insertThunks(MachineModuleInfo &MMI);
+ void populateThunk(MachineFunction &MF);
+};
+} // namespace
+
+void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+ // FIXME: It probably would be possible to filter which thunks to produce
+ // based on which registers are actually used in indirect calls in this
+ // function. But would that be a worthwhile optimization?
+ for (auto T : SLSBLRThunks)
+ createThunkFunction(MMI, T.Name);
+}
+
+void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
+ // FIXME: How to better communicate Register number, rather than through
+ // name and lookup table?
+ assert(MF.getName().startswith(getThunkPrefix()));
+ auto ThunkIt = llvm::find_if(
+ SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); });
+ assert(ThunkIt != std::end(SLSBLRThunks));
+ Register ThunkReg = ThunkIt->Reg;
+ bool isThumb = ThunkIt->isThumb;
+
+ const TargetInstrInfo *TII = MF.getSubtarget<ARMSubtarget>().getInstrInfo();
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
+
+ // These thunks need to consist of the following instructions:
+ // __llvm_slsblr_thunk_(arm/thumb)_rN:
+ // bx rN
+ // barrierInsts
+ Entry->addLiveIn(ThunkReg);
+ if (isThumb)
+ BuildMI(Entry, DebugLoc(), TII->get(ARM::tBX))
+ .addReg(ThunkReg)
+ .add(predOps(ARMCC::AL));
+ else
+ BuildMI(Entry, DebugLoc(), TII->get(ARM::BX))
+ .addReg(ThunkReg);
+
+ // Make sure the thunks do not make use of the SB extension in case there is
+ // a function somewhere that will call to it that for some reason disabled
+ // the SB extension locally on that function, even though it's enabled for
+ // the module otherwise. Therefore set AlwaysUseISBSDB to true.
+ insertSpeculationBarrier(&MF.getSubtarget<ARMSubtarget>(), *Entry,
+ Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/);
+}
+
+MachineBasicBlock &ARMSLSHardening::ConvertIndirectCallToIndirectJump(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+ // Transform an indirect call to an indirect jump as follows:
+ // Before:
+ // |-----------------------------|
+ // | ... |
+ // | instI |
+ // | BLX rN |
+ // | instJ |
+ // | ... |
+ // |-----------------------------|
+ //
+ // After:
+ // |---------- -------------------------|
+ // | ... |
+ // | instI |
+ // | *call* __llvm_slsblr_thunk_mode_xN |
+ // | instJ |
+ // | ... |
+ // |--------------------------------------|
+ //
+ // __llvm_slsblr_thunk_mode_xN:
+ // |-----------------------------|
+ // | BX rN |
+ // | barrierInsts |
+ // |-----------------------------|
+ //
+ // The __llvm_slsblr_thunk_mode_xN thunks are created by the
+ // SLSBLRThunkInserter.
+ // This function merely needs to transform an indirect call to a direct call
+ // to __llvm_slsblr_thunk_xN.
+ MachineInstr &IndirectCall = *MBBI;
+ assert(isIndirectCall(IndirectCall) && !IndirectCall.isReturn());
+ int RegOpIdxOnIndirectCall = -1;
+ bool isThumb;
+ switch (IndirectCall.getOpcode()) {
+ case ARM::BLX: // !isThumb2
+ case ARM::BLX_noip: // !isThumb2
+ isThumb = false;
+ RegOpIdxOnIndirectCall = 0;
+ break;
+ case ARM::tBLXr: // isThumb2
+ case ARM::tBLXr_noip: // isThumb2
+ isThumb = true;
+ RegOpIdxOnIndirectCall = 2;
+ break;
+ default:
+ llvm_unreachable("unhandled Indirect Call");
+ }
+
+ Register Reg = IndirectCall.getOperand(RegOpIdxOnIndirectCall).getReg();
+ // Since linkers are allowed to clobber R12 on function calls, the above
+ // mitigation only works if the original indirect call instruction was not
+ // using R12. Code generation before must make sure that no indirect call
+ // using R12 was produced if the mitigation is enabled.
+ // Also, the transformation is incorrect if the indirect call uses LR, so
+ // also have to avoid that.
+ assert(Reg != ARM::R12 && Reg != ARM::LR);
+ bool RegIsKilled = IndirectCall.getOperand(RegOpIdxOnIndirectCall).isKill();
+
+ DebugLoc DL = IndirectCall.getDebugLoc();
+
+ MachineFunction &MF = *MBBI->getMF();
+ auto ThunkIt = llvm::find_if(SLSBLRThunks, [Reg, isThumb](auto T) {
+ return T.Reg == Reg && T.isThumb == isThumb;
+ });
+ assert(ThunkIt != std::end(SLSBLRThunks));
+ Module *M = MF.getFunction().getParent();
+ const GlobalValue *GV = cast<GlobalValue>(M->getNamedValue(ThunkIt->Name));
+
+ MachineInstr *BL =
+ isThumb ? BuildMI(MBB, MBBI, DL, TII->get(ARM::tBL))
+ .addImm(IndirectCall.getOperand(0).getImm())
+ .addReg(IndirectCall.getOperand(1).getReg())
+ .addGlobalAddress(GV)
+ : BuildMI(MBB, MBBI, DL, TII->get(ARM::BL)).addGlobalAddress(GV);
+
+ // Now copy the implicit operands from IndirectCall to BL and copy other
+ // necessary info.
+ // However, both IndirectCall and BL instructions implictly use SP and
+ // implicitly define LR. Blindly copying implicit operands would result in SP
+ // and LR operands to be present multiple times. While this may not be too
+ // much of an issue, let's avoid that for cleanliness, by removing those
+ // implicit operands from the BL created above before we copy over all
+ // implicit operands from the IndirectCall.
+ int ImpLROpIdx = -1;
+ int ImpSPOpIdx = -1;
+ for (unsigned OpIdx = BL->getNumExplicitOperands();
+ OpIdx < BL->getNumOperands(); OpIdx++) {
+ MachineOperand Op = BL->getOperand(OpIdx);
+ if (!Op.isReg())
+ continue;
+ if (Op.getReg() == ARM::LR && Op.isDef())
+ ImpLROpIdx = OpIdx;
+ if (Op.getReg() == ARM::SP && !Op.isDef())
+ ImpSPOpIdx = OpIdx;
+ }
+ assert(ImpLROpIdx != -1);
+ assert(ImpSPOpIdx != -1);
+ int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
+ int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
+ BL->RemoveOperand(FirstOpIdxToRemove);
+ BL->RemoveOperand(SecondOpIdxToRemove);
+ // Now copy over the implicit operands from the original IndirectCall
+ BL->copyImplicitOps(MF, IndirectCall);
+ MF.moveCallSiteInfo(&IndirectCall, BL);
+ // Also add the register called in the IndirectCall as being used in the
+ // called thunk.
+ BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/,
+ RegIsKilled /*isKill*/));
+ // Remove IndirectCallinstruction
+ MBB.erase(MBBI);
+ return MBB;
+}
+
+bool ARMSLSHardening::hardenIndirectCalls(MachineBasicBlock &MBB) const {
+ if (!ST->hardenSlsBlr())
+ return false;
+ bool Modified = false;
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMBBI;
+ for (; MBBI != E; MBBI = NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ NextMBBI = std::next(MBBI);
+ // Tail calls are both indirect calls and "returns".
+ // They are also indirect jumps, so should be handled by sls-harden-retbr,
+ // rather than sls-harden-blr.
+ if (isIndirectCall(MI) && !MI.isReturn()) {
+ ConvertIndirectCallToIndirectJump(MBB, MBBI);
+ Modified = true;
+ }
+ }
+ return Modified;
+}
+
+
+
+FunctionPass *llvm::createARMSLSHardeningPass() {
+ return new ARMSLSHardening();
+}
+
+namespace {
+class ARMIndirectThunks : public MachineFunctionPass {
+public:
+ static char ID;
+
+ ARMIndirectThunks() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "ARM Indirect Thunks"; }
+
+ bool doInitialization(Module &M) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ }
+
+private:
+ std::tuple<SLSBLRThunkInserter> TIs;
+
+ // FIXME: When LLVM moves to C++17, these can become folds
+ template <typename... ThunkInserterT>
+ static void initTIs(Module &M,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ (void)std::initializer_list<int>{
+ (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
+ }
+ template <typename... ThunkInserterT>
+ static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ bool Modified = false;
+ (void)std::initializer_list<int>{
+ Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
+ return Modified;
+ }
+};
+
+} // end anonymous namespace
+
+char ARMIndirectThunks::ID = 0;
+
+FunctionPass *llvm::createARMIndirectThunks() {
+ return new ARMIndirectThunks();
+}
+
+bool ARMIndirectThunks::doInitialization(Module &M) {
+ initTIs(M, TIs);
+ return false;
+}
+
+bool ARMIndirectThunks::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << getPassName() << '\n');
+ auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return runTIs(MMI, MF, TIs);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td
index ce74d325c4e5..53a2a6fec51e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td
@@ -151,7 +151,60 @@ def : PredicateProlog<[{
(void)STI;
}]>;
-def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>;
+def IsPredicated : CheckFunctionPredicateWithTII<
+ "ARM_MC::isPredicated",
+ "isPredicated"
+>;
+def IsPredicatedPred : MCSchedPredicate<IsPredicated>;
+
+def IsCPSRDefined : CheckFunctionPredicateWithTII<
+ "ARM_MC::isCPSRDefined",
+ "ARMBaseInstrInfo::isCPSRDefined"
+>;
+
+def IsCPSRDefinedPred : MCSchedPredicate<IsCPSRDefined>;
+
+let FunctionMapper = "ARM_AM::getAM2ShiftOpc" in {
+ class CheckAM2NoShift<int n> : CheckImmOperand_s<n, "ARM_AM::no_shift">;
+ class CheckAM2ShiftLSL<int n> : CheckImmOperand_s<n, "ARM_AM::lsl">;
+}
+
+let FunctionMapper = "ARM_AM::getAM2Op" in {
+ class CheckAM2OpAdd<int n> : CheckImmOperand_s<n, "ARM_AM::add"> {}
+ class CheckAM2OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
+}
+
+let FunctionMapper = "ARM_AM::getAM2Offset" in {
+ class CheckAM2Offset<int n, int of> : CheckImmOperand<n, of> {}
+}
+
+def IsLDMBaseRegInList : CheckFunctionPredicate<
+ "ARM_MC::isLDMBaseRegInList", "ARM_MC::isLDMBaseRegInList"
+>;
+
+let FunctionMapper = "ARM_AM::getAM3Op" in {
+ class CheckAM3OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
+}
+
+// LDM, base reg in list
+def IsLDMBaseRegInListPred : MCSchedPredicate<IsLDMBaseRegInList>;
+
+class IsRegPCPred<int n> : MCSchedPredicate<CheckRegOperand<n, PC>>;
+
+class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl,
+ list<int> rcl, SchedWriteRes wr> :
+ SchedWriteRes<!listconcat(wr.ProcResources, resl)> {
+ let Latency = !add(wr.Latency, lat);
+ let ResourceCycles = !listconcat(wr.ResourceCycles, rcl);
+ let NumMicroOps = !add(wr.NumMicroOps, uops);
+ SchedWriteRes BaseWr = wr;
+}
+
+class CheckBranchForm<int n, BranchWriteRes br> :
+ SchedWriteVariant<[
+ SchedVar<IsRegPCPred<n>, [br]>,
+ SchedVar<NoSchedPred, [br.BaseWr]>
+ ]>;
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for ARM
@@ -414,14 +467,3 @@ def IIC_VTBX2 : InstrItinClass;
def IIC_VTBX3 : InstrItinClass;
def IIC_VTBX4 : InstrItinClass;
def IIC_VDOTPROD : InstrItinClass;
-
-//===----------------------------------------------------------------------===//
-// Processor instruction itineraries.
-
-include "ARMScheduleV6.td"
-include "ARMScheduleA8.td"
-include "ARMScheduleA9.td"
-include "ARMScheduleSwift.td"
-include "ARMScheduleR52.td"
-include "ARMScheduleA57.td"
-include "ARMScheduleM4.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td
index d9a8d304c41f..0c610a4839f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -21,59 +21,47 @@
// Therefore, IssueWidth is set to the narrower of the two at three, while still
// modeling the machine as out-of-order.
-def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>;
+def IsCPSRDefinedAndPredicated : CheckAll<[IsCPSRDefined, IsPredicated]>;
def IsCPSRDefinedAndPredicatedPred :
- SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>;
+ MCSchedPredicate<IsCPSRDefinedAndPredicated>;
// Cortex A57 rev. r1p0 or later (false = r0px)
-def IsR1P0AndLaterPred : SchedPredicate<[{false}]>;
+def IsR1P0AndLaterPred : MCSchedPredicate<FalsePred>;
-// If Addrmode3 contains register offset (not immediate)
-def IsLdrAm3RegOffPred :
- SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>;
-// The same predicate with operand offset 2 and 3:
-def IsLdrAm3RegOffPredX2 :
- SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>;
-def IsLdrAm3RegOffPredX3 :
- SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>;
+def IsLdrAm3RegOffPred : MCSchedPredicate<CheckInvalidRegOperand<2>>;
+def IsLdrAm3RegOffPredX2 : MCSchedPredicate<CheckInvalidRegOperand<3>>;
+def IsLdrAm3RegOffPredX3 : MCSchedPredicate<CheckInvalidRegOperand<4>>;
// If Addrmode3 contains "minus register"
-def IsLdrAm3NegRegOffPred :
- SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>;
-// The same predicate with operand offset 2 and 3:
-def IsLdrAm3NegRegOffPredX2 :
- SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>;
-def IsLdrAm3NegRegOffPredX3 :
- SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>;
+class Am3NegativeRegOffset<int n> : MCSchedPredicate<CheckAll<[
+ CheckValidRegOperand<n>,
+ CheckAM3OpSub<!add(n, 1)>]>>;
+
+def IsLdrAm3NegRegOffPred : Am3NegativeRegOffset<2>;
+def IsLdrAm3NegRegOffPredX2 : Am3NegativeRegOffset<3>;
+def IsLdrAm3NegRegOffPredX3 : Am3NegativeRegOffset<4>;
// Load, scaled register offset, not plus LSL2
-def IsLdstsoScaledNotOptimalPredX0 :
- SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>;
-def IsLdstsoScaledNotOptimalPred :
- SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>;
-def IsLdstsoScaledNotOptimalPredX2 :
- SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>;
-
-// Load, scaled register offset
-def IsLdstsoScaledPred :
- SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>;
-def IsLdstsoScaledPredX2 :
- SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>;
-
-def IsLdstsoMinusRegPredX0 :
- SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>;
-def IsLdstsoMinusRegPred :
- SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>;
-def IsLdstsoMinusRegPredX2 :
- SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>;
-
-// Load, scaled register offset
-def IsLdrAm2ScaledPred :
- SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>;
-
-// LDM, base reg in list
-def IsLdmBaseRegInList :
- SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>;
+class ScaledRegNotPlusLsl2<int n> : CheckNot<
+ CheckAny<[
+ CheckAM2NoShift<n>,
+ CheckAll<[
+ CheckAM2OpAdd<n>,
+ CheckAM2ShiftLSL<n>,
+ CheckAM2Offset<n, 2>
+ ]>
+ ]>
+ >;
+
+def IsLdstsoScaledNotOptimalPredX0 : MCSchedPredicate<ScaledRegNotPlusLsl2<2>>;
+def IsLdstsoScaledNotOptimalPred : MCSchedPredicate<ScaledRegNotPlusLsl2<3>>;
+def IsLdstsoScaledNotOptimalPredX2 : MCSchedPredicate<ScaledRegNotPlusLsl2<4>>;
+
+def IsLdstsoScaledPredX2 : MCSchedPredicate<CheckNot<CheckAM2NoShift<4>>>;
+
+def IsLdstsoMinusRegPredX0 : MCSchedPredicate<CheckAM2OpSub<2>>;
+def IsLdstsoMinusRegPred : MCSchedPredicate<CheckAM2OpSub<3>>;
+def IsLdstsoMinusRegPredX2 : MCSchedPredicate<CheckAM2OpSub<4>>;
class A57WriteLMOpsListType<list<SchedWriteRes> writes> {
list <SchedWriteRes> Writes = writes;
@@ -185,28 +173,29 @@ def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
+// Check branch forms of ALU ops:
+// check reg 0 for ARM_AM::PC
+// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB
+class A57BranchForm<SchedWriteRes non_br> :
+ BranchWriteRes<2, 1, [A57UnitB], [1], non_br>;
+
// shift by register, conditional or unconditional
// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
// Why more complex instruction uses more simple pipeline?
// May be an error in doc.
-def A57WriteALUsi : SchedWriteVariant<[
- // lsl #2, lsl #1, or lsr #1.
- SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>,
- SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
-]>;
def A57WriteALUsr : SchedWriteVariant<[
- SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
- SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+ SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
+ SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
]>;
def A57WriteALUSsr : SchedWriteVariant<[
- SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
- SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+ SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
+ SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
]>;
def A57ReadALUsr : SchedReadVariant<[
SchedVar<IsPredicatedPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>
]>;
-def : SchedAlias<WriteALUsi, A57WriteALUsi>;
+def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
def : SchedAlias<WriteALUsr, A57WriteALUsr>;
def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
def : SchedAlias<ReadALUsr, A57ReadALUsr>;
@@ -282,7 +271,11 @@ def : ReadAdvance<ReadMUL, 0>;
// from similar μops, allowing a typical sequence of multiply-accumulate μops
// to issue one every 1 cycle (sched advance = 2).
def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
-def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
+def A57WriteMLAL : SchedWriteVariant<[
+ SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1M]>
+]>;
+
def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
def : InstRW<[A57WriteMLA],
@@ -477,11 +470,11 @@ def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
"LDRB_POST_REG", "LDR(B?)T_POST$")>;
def A57WriteLdrTRegPost : SchedWriteVariant<[
- SchedVar<IsLdrAm2ScaledPred, [A57Write_4cyc_1I_1L_1M]>,
+ SchedVar<IsLdstsoScaledPredX2, [A57Write_4cyc_1I_1L_1M]>,
SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
]>;
def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
- SchedVar<IsLdrAm2ScaledPred, [A57WrBackThree]>,
+ SchedVar<IsLdstsoScaledPredX2, [A57WrBackThree]>,
SchedVar<NoSchedPred, [A57WrBackTwo]>
]>;
// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
@@ -517,8 +510,12 @@ def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
// --- Load multiple instructions ---
foreach NumAddr = 1-8 in {
- def A57LMAddrPred#NumAddr :
- SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>;
+ def A57LMAddrPred#NumAddr : MCSchedPredicate<CheckAny<[
+ CheckNumOperands<!add(!shl(NumAddr, 1), 2)>,
+ CheckNumOperands<!add(!shl(NumAddr, 1), 3)>]>>;
+ def A57LMAddrUpdPred#NumAddr : MCSchedPredicate<CheckAny<[
+ CheckNumOperands<!add(!shl(NumAddr, 1), 3)>,
+ CheckNumOperands<!add(!shl(NumAddr, 1), 4)>]>>;
}
def A57LDMOpsListNoregin : A57WriteLMOpsListType<
@@ -574,20 +571,20 @@ def A57LDMOpsList_Upd : A57WriteLMOpsListType<
A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
def A57WriteLDM_Upd : SchedWriteVariant<[
- SchedVar<A57LMAddrPred1, A57LDMOpsList_Upd.Writes[0-2]>,
- SchedVar<A57LMAddrPred2, A57LDMOpsList_Upd.Writes[0-4]>,
- SchedVar<A57LMAddrPred3, A57LDMOpsList_Upd.Writes[0-6]>,
- SchedVar<A57LMAddrPred4, A57LDMOpsList_Upd.Writes[0-8]>,
- SchedVar<A57LMAddrPred5, A57LDMOpsList_Upd.Writes[0-10]>,
- SchedVar<A57LMAddrPred6, A57LDMOpsList_Upd.Writes[0-12]>,
- SchedVar<A57LMAddrPred7, A57LDMOpsList_Upd.Writes[0-14]>,
- SchedVar<A57LMAddrPred8, A57LDMOpsList_Upd.Writes[0-16]>,
- SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
+ SchedVar<A57LMAddrUpdPred1, A57LDMOpsList_Upd.Writes[0-2]>,
+ SchedVar<A57LMAddrUpdPred2, A57LDMOpsList_Upd.Writes[0-4]>,
+ SchedVar<A57LMAddrUpdPred3, A57LDMOpsList_Upd.Writes[0-6]>,
+ SchedVar<A57LMAddrUpdPred4, A57LDMOpsList_Upd.Writes[0-8]>,
+ SchedVar<A57LMAddrUpdPred5, A57LDMOpsList_Upd.Writes[0-10]>,
+ SchedVar<A57LMAddrUpdPred6, A57LDMOpsList_Upd.Writes[0-12]>,
+ SchedVar<A57LMAddrUpdPred7, A57LDMOpsList_Upd.Writes[0-14]>,
+ SchedVar<A57LMAddrUpdPred8, A57LDMOpsList_Upd.Writes[0-16]>,
+ SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
]> { let Variadic=1; }
def A57WriteLDM : SchedWriteVariant<[
- SchedVar<IsLdmBaseRegInList, [A57WriteLDMreginlist]>,
- SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
+ SchedVar<IsLDMBaseRegInListPred, [A57WriteLDMreginlist]>,
+ SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
]> { let Variadic=1; }
def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
@@ -834,7 +831,6 @@ def A57WriteVLDMuncond : SchedWriteVariant<[
SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>,
SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>,
SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>,
- SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>,
SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]>
]> { let Variadic=1; }
@@ -855,7 +851,6 @@ def A57WriteVLDMcond : SchedWriteVariant<[
SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>,
SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>,
SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>,
- SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>,
SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]>
]> { let Variadic=1; }
@@ -883,7 +878,6 @@ def A57WriteVLDMuncond_UPD : SchedWriteVariant<[
SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>,
SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>,
SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>,
- SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>,
SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]>
]> { let Variadic=1; }
@@ -904,7 +898,6 @@ def A57WriteVLDMcond_UPD : SchedWriteVariant<[
SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>,
SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>,
SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>,
- SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>,
SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]>
]> { let Variadic=1; }
@@ -1201,7 +1194,7 @@ def : InstRW<[A57Write_5cyc_1V], (instregex
// --- 3.16 ASIMD Miscellaneous Instructions ---
// ASIMD bitwise insert
-def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>;
// ASIMD count
def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
@@ -1490,7 +1483,7 @@ def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
// -----------------------------------------------------------------------------
// Common definitions
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-def : SchedAlias<WriteALU, A57Write_1cyc_1I>;
+def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>;
def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
index 5ba61503686e..531b10bc5cfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -36,13 +36,16 @@ def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20;
let ResourceCycles = [20]; }
def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
-def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
-def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1;
+ let ResourceCycles = [1]; }
+def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2;
+ let ResourceCycles = [1]; }
def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; }
def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; }
def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; }
-def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2;
+ let ResourceCycles = [1]; }
def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
let ResourceCycles = [32]; }
def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
@@ -68,7 +71,7 @@ foreach Lat = 4-16 in {
}
}
-def A57Write_4cyc_1M : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
+def A57Write_4cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
def A57Write_4cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 4; }
def A57Write_5cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td
index 3f0b71afd977..be7017a7b426 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -2525,8 +2525,8 @@ def : ReadAdvance<ReadFPMAC, 0>;
def : InstRW< [WriteALU],
(instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
"BICrr")>;
-def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>;
-def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>;
+def : InstRW< [WriteALUsi], (instrs ANDrsi, ORRrsi, EORrsi, BICrsi)>;
+def : InstRW< [WriteALUsr], (instrs ANDrsr, ORRrsr, EORrsr, BICrsr)>;
def : SchedAlias<WriteCMP, A9WriteALU>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM7.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM7.td
new file mode 100644
index 000000000000..12296ad09218
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM7.td
@@ -0,0 +1,488 @@
+//=- ARMScheduleM7.td - ARM Cortex-M7 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SchedRead/Write data for the ARM Cortex-M7 processor.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexM7Model : SchedMachineModel {
+ let IssueWidth = 2; // Dual issue for most instructions.
+ let MicroOpBufferSize = 0; // The Cortex-M7 is in-order.
+ let LoadLatency = 2; // Best case for load-use case.
+ let MispredictPenalty = 4; // Mispredict cost for forward branches is 6,
+ // but 4 works better
+ let CompleteModel = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
+// pipe. The stages relevant to scheduling are as follows:
+//
+// EX1: address generation shifts
+// EX2: fast load data ALUs FP operation
+// EX3: slow load data integer writeback FP operation
+// EX4: store data FP writeback
+//
+// There are shifters in both EX1 and EX2, and some instructions can be
+// flexibly allocated between them. EX2 is used as the "zero" point
+// for scheduling, so simple ALU operations executing in EX2 will have
+// ReadAdvance<0> (the default) for their source operands and Latency = 1.
+
+def M7UnitLoad : ProcResource<2> { let BufferSize = 0; }
+def M7UnitStore : ProcResource<1> { let BufferSize = 0; }
+def M7UnitALU : ProcResource<2>;
+def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
+def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
+def M7UnitMAC : ProcResource<1> { let BufferSize = 0; }
+def M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
+def M7UnitVFP : ProcResource<1> { let BufferSize = 0; }
+def M7UnitVPort : ProcResource<2> { let BufferSize = 0; }
+def M7UnitSIMD : ProcResource<1> { let BufferSize = 0; }
+
+//===---------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types with map ProcResources and set latency.
+
+let SchedModel = CortexM7Model in {
+
+def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
+
+// Basic ALU with shifts.
+let Latency = 1 in {
+ def : WriteRes<WriteALUsi, [M7UnitALU, M7UnitShift1]>;
+ def : WriteRes<WriteALUsr, [M7UnitALU, M7UnitShift1]>;
+ def : WriteRes<WriteALUSsr, [M7UnitALU, M7UnitShift1]>;
+}
+
+// Compares.
+def : WriteRes<WriteCMP, [M7UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteCMPsi, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
+def : WriteRes<WriteCMPsr, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
+
+// Multiplies.
+let Latency = 2 in {
+ def : WriteRes<WriteMUL16, [M7UnitMAC]>;
+ def : WriteRes<WriteMUL32, [M7UnitMAC]>;
+ def : WriteRes<WriteMUL64Lo, [M7UnitMAC]>;
+ def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; }
+}
+
+// Multiply-accumulates.
+let Latency = 2 in {
+ def : WriteRes<WriteMAC16, [M7UnitMAC]>;
+ def : WriteRes<WriteMAC32, [M7UnitMAC]>;
+ def : WriteRes<WriteMAC64Lo, [M7UnitMAC]> { let Latency = 2; }
+ def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; }
+}
+
+// Divisions.
+// These cannot be dual-issued with any instructions.
+def : WriteRes<WriteDIV, [M7UnitALU]> {
+ let Latency = 7;
+ let SingleIssue = 1;
+}
+
+// Loads/Stores.
+def : WriteRes<WriteLd, [M7UnitLoad]> { let Latency = 1; }
+def : WriteRes<WritePreLd, [M7UnitLoad]> { let Latency = 2; }
+def : WriteRes<WriteST, [M7UnitStore]> { let Latency = 2; }
+
+// Branches.
+def : WriteRes<WriteBr, [M7UnitBranch]> { let Latency = 2; }
+def : WriteRes<WriteBrL, [M7UnitBranch]> { let Latency = 2; }
+def : WriteRes<WriteBrTbl, [M7UnitBranch]> { let Latency = 2; }
+
+// Noop.
+def : WriteRes<WriteNoop, []> { let Latency = 0; }
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for floating-point instructions
+//
+// Floating point conversions.
+def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
+def : WriteRes<WriteFPMOV, [M7UnitVPort]> { let Latency = 3; }
+
+// The FP pipeline has a latency of 3 cycles.
+// ALU operations (32/64-bit). These go down the FP pipeline.
+def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
+def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 4;
+ let BeginGroup = 1;
+}
+
+// Multiplication
+def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
+def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 7;
+ let BeginGroup = 1;
+}
+
+// Multiply-accumulate. FPMAC goes down the FP Pipeline.
+def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
+def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 11;
+ let BeginGroup = 1;
+}
+
+// Division. Effective scheduling latency is 3, though real latency is larger
+def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
+def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 30;
+ let BeginGroup = 1;
+}
+
+// Square-root. Effective scheduling latency is 3; real latency is larger
+def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
+def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 30;
+ let BeginGroup = 1;
+}
+
+def M7WriteShift2 : SchedWriteRes<[M7UnitALU, M7UnitShift2]> {}
+
+// Not used for M7, but needing definitions anyway
+def : WriteRes<WriteVLD1, []>;
+def : WriteRes<WriteVLD2, []>;
+def : WriteRes<WriteVLD3, []>;
+def : WriteRes<WriteVLD4, []>;
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
+def M7SingleIssue : SchedWriteRes<[]> {
+ let SingleIssue = 1;
+ let NumMicroOps = 0;
+}
+def M7Slot0Only : SchedWriteRes<[]> {
+ let BeginGroup = 1;
+ let NumMicroOps = 0;
+}
+
+// What pipeline stage operands need to be ready for depending on
+// where they come from.
+def : ReadAdvance<ReadALUsr, 0>;
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 1>;
+def : ReadAdvance<ReadALU, 0>;
+def : ReadAdvance<ReadFPMUL, 0>;
+def : ReadAdvance<ReadFPMAC, 3>;
+def M7Read_ISS : SchedReadAdvance<-1>; // operands needed at EX1
+def M7Read_EX2 : SchedReadAdvance<1>; // operands needed at EX3
+def M7Read_EX3 : SchedReadAdvance<2>; // operands needed at EX4
+
+// Non general purpose instructions may not be dual issued. These
+// use both issue units.
+def M7NonGeneralPurpose : SchedWriteRes<[]> {
+ // Assume that these will go down the main ALU pipeline.
+ // In reality, many look likely to stall the whole pipeline.
+ let Latency = 3;
+ let SingleIssue = 1;
+}
+
+// List the non general purpose instructions.
+def : InstRW<[M7NonGeneralPurpose], (instregex "t2MRS", "tSVC", "tBKPT",
+ "t2MSR", "t2DMB", "t2DSB", "t2ISB",
+ "t2HVC", "t2SMC", "t2UDF", "ERET",
+ "tHINT", "t2HINT", "t2CLREX", "BUNDLE")>;
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for load/store
+//
+// Mark whether the loads/stores must be single-issue
+// Address operands are needed earlier
+// Data operands are needed later
+
+def M7BaseUpdate : SchedWriteRes<[]> {
+ let Latency = 0; // Update is bypassable out of EX1
+ let NumMicroOps = 0;
+}
+def M7LoadLatency1 : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+def M7SlowLoad : SchedWriteRes<[M7UnitLoad]> { let Latency = 2; }
+
+// Byte and half-word loads should have greater latency than other loads.
+// So should load exclusive.
+
+def : InstRW<[M7SlowLoad],
+ (instregex "t2LDR(B|H|SB|SH)pc")>;
+def : InstRW<[M7SlowLoad, M7Read_ISS],
+ (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i",
+ "tLDR(B|H)i")>;
+def : InstRW<[M7SlowLoad, M7Read_ISS, M7Read_ISS],
+ (instregex "t2LDR(B|H|SB|SH)s", "tLDR(B|H)r", "tLDR(SB|SH)")>;
+def : InstRW<[M7SlowLoad, M7BaseUpdate, M7Read_ISS],
+ (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>;
+
+// Exclusive loads/stores cannot be dual-issued
+def : InstRW<[WriteLd, M7Slot0Only, M7Read_ISS],
+ (instregex "t2LDREX$")>;
+def : InstRW<[M7SlowLoad, M7Slot0Only, M7Read_ISS],
+ (instregex "t2LDREX(B|H)")>;
+def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STREX(B|H)?$")>;
+
+// Load/store multiples cannot be dual-issued. Note that default scheduling
+// occurs around read/write times of individual registers in the list; read
+// time for STM cannot be overridden because it is a variadic source operand.
+
+def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
+ (instregex "(t|t2)LDM(DB|IA)$")>;
+def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
+ (instregex "(t|t2)STM(DB|IA)$")>;
+def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
+ (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>;
+def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
+ (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>;
+
+// Load/store doubles cannot be dual-issued.
+
+def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue,
+ M7Read_EX2, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STRD_(PRE|POST)")>;
+def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STRDi")>;
+def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7BaseUpdate, M7Read_ISS],
+ (instregex "t2LDRD_(PRE|POST)")>;
+def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7Read_ISS],
+ (instregex "t2LDRDi")>;
+
+// Word load / preload
+def : InstRW<[WriteLd],
+ (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>;
+def : InstRW<[WriteLd, M7Read_ISS],
+ (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi", "tLDRspi")>;
+def : InstRW<[WriteLd, M7Read_ISS, M7Read_ISS],
+ (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>;
+def : InstRW<[WriteLd, M7BaseUpdate, M7Read_ISS],
+ (instregex "t2LDR_(POST|PRE)")>;
+
+// Stores
+def : InstRW<[M7BaseUpdate, WriteST, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STR(B|H)?_(POST|PRE)")>;
+def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS, M7Read_ISS],
+ (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>;
+def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>;
+
+// TBB/TBH - single-issue only; takes two cycles to issue
+
+def M7TableLoad : SchedWriteRes<[M7UnitLoad]> {
+ let NumMicroOps = 2;
+ let SingleIssue = 1;
+}
+
+def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
+
+// VFP loads and stores
+
+def M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
+def M7LoadDP : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 2;
+ let SingleIssue = 1;
+}
+def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
+def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> {
+ let SingleIssue = 1;
+}
+
+def : InstRW<[M7LoadSP, M7Read_ISS], (instregex "VLDR(S|H)$")>;
+def : InstRW<[M7LoadDP, M7Read_ISS], (instregex "VLDRD$")>;
+def : InstRW<[M7StoreSP, M7Read_EX3, M7Read_ISS], (instregex "VSTR(S|H)$")>;
+def : InstRW<[M7StoreDP, M7Read_EX3, M7Read_ISS], (instregex "VSTRD$")>;
+
+// Load/store multiples cannot be dual-issued.
+
+def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
+ (instregex "VLDM(S|D|Q)(DB|IA)$")>;
+def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
+ (instregex "VSTM(S|D|Q)(DB|IA)$")>;
+def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
+ (instregex "VLDM(S|D|Q)(DB|IA)_UPD$")>;
+def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
+ (instregex "VSTM(S|D|Q)(DB|IA)_UPD$")>;
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for ALU
+//
+
+// Shifted ALU operands are read a cycle early.
+def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>;
+
+def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS],
+ (instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$",
+ "t2(SUB|CMP|CMNz|TEQ|TST)rs$",
+ "t2MOVsr(a|l)")>;
+def : InstRW<[WriteALUsi, M7Read_ISS],
+ (instregex "t2MVNs")>;
+
+// Treat pure shift operations (except for RRX) as if they used the EX1
+// shifter but have timing as if they used the EX2 shifter as they usually
+// can choose the EX2 shifter when needed. Will miss a few dual-issue cases,
+// but the results prove to be better than trying to get them exact.
+
+def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>;
+def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>;
+
+// Instructions that use the shifter, but have normal timing.
+
+def : InstRW<[WriteALUsi,M7Slot0Only], (instregex "t2(BFC|BFI)$")>;
+
+// Instructions which are slot zero only but otherwise normal.
+
+def : InstRW<[WriteALU, M7Slot0Only], (instregex "t2CLZ")>;
+
+// MAC operations that don't have SchedRW set.
+
+def : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>;
+
+// Divides are special because they stall for their latency, and so look like a
+// single-cycle as far as scheduling opportunities go. By putting WriteALU
+// first, we make the operand latency 1, but keep the instruction latency 7.
+
+def : InstRW<[WriteALU, WriteDIV], (instregex "t2(S|U)DIV")>;
+
+// DSP extension operations
+
+def M7WriteSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
+ let Latency = 1;
+ let BeginGroup = 1;
+}
+def M7WriteSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
+ let Latency = 2;
+ let BeginGroup = 1;
+}
+def M7WriteShSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
+ let Latency = 1;
+ let BeginGroup = 1;
+}
+def M7WriteShSIMD0 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
+ let Latency = 0; // Bypassable out of EX1
+ let BeginGroup = 1;
+}
+def M7WriteShSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
+ let Latency = 2;
+ let BeginGroup = 1;
+}
+
+def : InstRW<[M7WriteShSIMD2, M7Read_ISS],
+ (instregex "t2(S|U)SAT")>;
+def : InstRW<[M7WriteSIMD1, ReadALU],
+ (instregex "(t|t2)(S|U)XT(B|H)")>;
+def : InstRW<[M7WriteSIMD1, ReadALU, ReadALU],
+ (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)",
+ "t2SEL")>;
+def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU],
+ (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>;
+def : InstRW<[M7WriteShSIMD2, M7Read_ISS, M7Read_ISS],
+ (instregex "t2QD(ADD|SUB)")>;
+def : InstRW<[M7WriteShSIMD0, M7Read_ISS],
+ (instregex "t2(RBIT|REV)", "tREV")>;
+def : InstRW<[M7WriteShSIMD1, M7Read_ISS],
+ (instregex "t2(SBFX|UBFX)")>;
+def : InstRW<[M7WriteShSIMD1, ReadALU, M7Read_ISS],
+ (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>;
+def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU, M7Read_EX2],
+ (instregex "t2USADA8")>;
+
+// MSR/MRS
+def : InstRW<[M7NonGeneralPurpose], (instregex "MSR", "MRS")>;
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for FP operations
+//
+
+// Effective scheduling latency is really 3 for nearly all FP operations,
+// even if their true latency is higher.
+def M7WriteVFPLatOverride : SchedWriteRes<[]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+def M7WriteVFPExtraVPort : SchedWriteRes<[M7UnitVPort]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+
+// Instructions which are missing default schedules.
+def : InstRW<[WriteFPALU32],
+ (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)S$")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
+ (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)D$")>;
+
+// VCMP
+def M7WriteVCMPS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let Latency = 0; }
+def M7WriteVCMPD : SchedWriteRes<[M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 0;
+ let BeginGroup = 1;
+}
+def : InstRW<[M7WriteVCMPS], (instregex "VCMPS$")>;
+def : InstRW<[M7WriteVCMPD], (instregex "VCMPD$")>;
+
+ // VMRS/VMSR
+def M7VMRS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
+def M7VMSR : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
+def : InstRW<[M7VMRS], (instregex "FMSTAT")>;
+def : InstRW<[M7VMSR], (instregex "VMSR")>;
+
+// VSEL cannot bypass in its implied $cpsr operand; model as earlier read
+def : InstRW<[WriteFPALU32, M7Slot0Only, ReadALU, ReadALU, M7Read_ISS],
+ (instregex "VSEL.*S$")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64, M7Slot0Only,
+ ReadALU, ReadALU, M7Read_ISS],
+ (instregex "VSEL.*D$")>;
+
+// VMOV
+def : InstRW<[WriteFPMOV],
+ (instregex "VMOV(H|S)$", "FCONST(H|S)")>;
+def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
+ (instregex "VMOVD$")>;
+def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
+ (instregex "FCONSTD")>;
+def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7SingleIssue],
+ (instregex "VMOV(DRR|RRD|RRS|SRR)")>;
+
+// Larger-latency overrides.
+
+def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV32], (instregex "VDIVS")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV64], (instregex "VDIVD")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT32], (instregex "VSQRTS")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT64], (instregex "VSQRTD")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPMUL64],
+ (instregex "V(MUL|NMUL)D")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
+ (instregex "V(ADD|SUB)D")>;
+
+// Multiply-accumulate. Chained SP timing is correct; rest need overrides
+// Double-precision chained MAC stalls the pipeline behind it for 3 cycles,
+// making it appear to have 3 cycle latency for scheduling.
+
+def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
+ ReadFPMAC, ReadFPMUL, ReadFPMUL],
+ (instregex "V(N)?ML(A|S)D$")>;
+
+// Single-precision fused MACs look like latency 5 with advance of 2.
+
+def M7WriteVFPLatOverride5 : SchedWriteRes<[]> {
+ let Latency = 5;
+ let NumMicroOps = 0;
+}
+def M7ReadFPMAC2 : SchedReadAdvance<2>;
+
+def : InstRW<[M7WriteVFPLatOverride5, WriteFPMAC32,
+ M7ReadFPMAC2, ReadFPMUL, ReadFPMUL],
+ (instregex "VF(N)?M(A|S)S$")>;
+
+// Double-precision fused MAC stalls the pipeline behind it for 2 cycles, making
+// it appear to have 3 cycle latency for scheduling.
+
+def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
+ ReadFPMAC, ReadFPMUL, ReadFPMUL],
+ (instregex "VF(N)?M(A|S)D$")>;
+
+} // SchedModel = CortexM7Model
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td
index d1cbf754b5a1..466acec6f76a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td
@@ -787,8 +787,8 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC
def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
-def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
-def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>;
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
(instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td
index e0e98bfa0e9b..d66b3065c7b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -558,8 +558,8 @@ let SchedModel = SwiftModel in {
(instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
"VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
"VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
- "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF",
- "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
+ "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", "VBIT",
+ "VBSL", "VBSP", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
def : InstRW<[SwiftWriteP1TwoCycle],
(instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 46802037c2aa..5cb608b74ace 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -97,9 +97,9 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle,
bool MinSize)
- : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
- CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle),
- TargetTriple(TT), Options(TM.Options), TM(TM),
+ : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ UseMulOps(UseFusedMulOps), CPUString(CPU), OptMinSize(MinSize),
+ IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
@@ -185,7 +185,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
else
ArchFS = std::string(FS);
}
- ParseSubtargetFeatures(CPUString, ArchFS);
+ ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, ArchFS);
// FIXME: This used enable V6T2 support implicitly for Thumb2 mode.
// Assert this for now to make the change obvious.
@@ -237,7 +237,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
switch (IT) {
case DefaultIT:
- RestrictIT = hasV8Ops();
+ RestrictIT = hasV8Ops() && !hasMinSize();
break;
case RestrictedIT:
RestrictIT = true;
@@ -294,11 +294,13 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexA76:
case CortexA77:
case CortexA78:
+ case CortexA78C:
case CortexR4:
case CortexR4F:
case CortexR5:
case CortexR7:
case CortexM3:
+ case CortexM7:
case CortexR52:
case CortexX1:
break;
@@ -314,6 +316,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
PreISelOperandLatencyAdjustment = 1;
break;
case NeoverseN1:
+ case NeoverseN2:
+ case NeoverseV1:
break;
case Swift:
MaxInterleaveFactor = 2;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
index 2703e385dd81..fd9b94fdaa23 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -63,9 +63,11 @@ protected:
CortexA76,
CortexA77,
CortexA78,
+ CortexA78C,
CortexA8,
CortexA9,
CortexM3,
+ CortexM7,
CortexR4,
CortexR4F,
CortexR5,
@@ -76,6 +78,8 @@ protected:
Krait,
Kryo,
NeoverseN1,
+ NeoverseN2,
+ NeoverseV1,
Swift
};
enum ARMProcClassEnum {
@@ -163,6 +167,7 @@ protected:
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
+ bool HasV8_7aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
bool HasV8_1MMainlineOps = false;
@@ -461,6 +466,13 @@ protected:
/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
bool NegativeImmediates = true;
+ /// Harden against Straight Line Speculation for Returns and Indirect
+ /// Branches.
+ bool HardenSlsRetBr = false;
+
+ /// Harden against Straight Line Speculation for indirect calls.
+ bool HardenSlsBlr = false;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
Align stackAlignment = Align(4);
@@ -526,7 +538,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
@@ -594,6 +606,7 @@ public:
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8_6aOps() const { return HasV8_6aOps; }
+ bool hasV8_7aOps() const { return HasV8_7aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
@@ -614,6 +627,7 @@ public:
bool isCortexA15() const { return ARMProcFamily == CortexA15; }
bool isSwift() const { return ARMProcFamily == Swift; }
bool isCortexM3() const { return ARMProcFamily == CortexM3; }
+ bool isCortexM7() const { return ARMProcFamily == CortexM7; }
bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
bool isCortexR5() const { return ARMProcFamily == CortexR5; }
bool isKrait() const { return ARMProcFamily == Krait; }
@@ -901,6 +915,9 @@ public:
bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
unsigned PhysReg) const override;
unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
+
+ bool hardenSlsRetBr() const { return HardenSlsRetBr; }
+ bool hardenSlsBlr() const { return HardenSlsBlr; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 9ead5fa4308c..237ef54c8339 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -99,7 +99,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeMVEVPTOptimisationsPass(Registry);
initializeMVETailPredicationPass(Registry);
initializeARMLowOverheadLoopsPass(Registry);
+ initializeARMBlockPlacementPass(Registry);
initializeMVEGatherScatterLoweringPass(Registry);
+ initializeARMSLSHardeningPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -251,7 +253,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
// ARM supports the MachineOutliner.
setMachineOutliner(true);
- setSupportsDefaultOutlining(false);
+ setSupportsDefaultOutlining(true);
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
@@ -261,12 +263,10 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
// FIXME: This is related to the code below to reset the target options,
// we need to know whether or not the soft float flag is set on the
@@ -409,7 +409,8 @@ void ARMPassConfig::addIRPasses() {
// ldrex/strex loops to simplify this, but it needs tidying up.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass(
- 1, false, false, true, true, [this](const Function &F) {
+ SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true),
+ [this](const Function &F) {
const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
}));
@@ -471,7 +472,7 @@ bool ARMPassConfig::addInstSelector() {
}
bool ARMPassConfig::addIRTranslator() {
- addPass(new IRTranslator());
+ addPass(new IRTranslator(getOptLevel()));
return false;
}
@@ -539,6 +540,9 @@ void ARMPassConfig::addPreSched2() {
addPass(&PostMachineSchedulerID);
addPass(&PostRASchedulerID);
}
+
+ addPass(createARMIndirectThunks());
+ addPass(createARMSLSHardeningPass());
}
void ARMPassConfig::addPreEmitPass() {
@@ -549,9 +553,11 @@ void ARMPassConfig::addPreEmitPass() {
return MF.getSubtarget<ARMSubtarget>().isThumb2();
}));
- // Don't optimize barriers at -O0.
- if (getOptLevel() != CodeGenOpt::None)
+ // Don't optimize barriers or block placement at -O0.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(createARMBlockPlacementPass());
addPass(createARMOptimizeBarriersPass());
+ }
}
void ARMPassConfig::addPreEmitPass2() {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h
index ac55d2bdcc2b..8428092bf179 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -72,6 +72,12 @@ public:
}
bool targetSchedulesPostRAScheduling() const override { return true; };
+
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
};
/// ARM/Thumb little endian target machine.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index bea4e157a131..890193401373 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -20,14 +20,18 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <cassert>
@@ -46,10 +50,38 @@ static cl::opt<bool> DisableLowOverheadLoops(
"disable-arm-loloops", cl::Hidden, cl::init(false),
cl::desc("Disable the generation of low-overhead loops"));
+static cl::opt<bool>
+ AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
+ cl::desc("Enable the generation of WLS loops"));
+
extern cl::opt<TailPredication::Mode> EnableTailPredication;
extern cl::opt<bool> EnableMaskedGatherScatters;
+extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor;
+
+/// Convert a vector load intrinsic into a simple llvm load instruction.
+/// This is beneficial when the underlying object being addressed comes
+/// from a constant, since we get constant-folding for free.
+static Value *simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign,
+ InstCombiner::BuilderTy &Builder) {
+ auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
+
+ if (!IntrAlign)
+ return nullptr;
+
+ unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
+ ? MemAlign
+ : IntrAlign->getLimitedValue();
+
+ if (!isPowerOf2_32(Alignment))
+ return nullptr;
+
+ auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
+ PointerType::get(II.getType(), 0));
+ return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment));
+}
+
bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
@@ -82,6 +114,138 @@ bool ARMTTIImpl::shouldFavorPostInc() const {
return false;
}
+Optional<Instruction *>
+ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+ using namespace PatternMatch;
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::arm_neon_vld1: {
+ Align MemAlign =
+ getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree());
+ if (Value *V = simplifyNeonVld1(II, MemAlign.value(), IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+ }
+
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Align MemAlign =
+ getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree());
+ unsigned AlignArg = II.getNumArgOperands() - 1;
+ Value *AlignArgOp = II.getArgOperand(AlignArg);
+ MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
+ if (Align && *Align < MemAlign) {
+ return IC.replaceOperand(
+ II, AlignArg,
+ ConstantInt::get(Type::getInt32Ty(II.getContext()), MemAlign.value(),
+ false));
+ }
+ break;
+ }
+
+ case Intrinsic::arm_mve_pred_i2v: {
+ Value *Arg = II.getArgOperand(0);
+ Value *ArgArg;
+ if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
+ PatternMatch::m_Value(ArgArg))) &&
+ II.getType() == ArgArg->getType()) {
+ return IC.replaceInstUsesWith(II, ArgArg);
+ }
+ Constant *XorMask;
+ if (match(Arg, m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
+ PatternMatch::m_Value(ArgArg)),
+ PatternMatch::m_Constant(XorMask))) &&
+ II.getType() == ArgArg->getType()) {
+ if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
+ if (CI->getValue().trunc(16).isAllOnesValue()) {
+ auto TrueVector = IC.Builder.CreateVectorSplat(
+ cast<FixedVectorType>(II.getType())->getNumElements(),
+ IC.Builder.getTrue());
+ return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector);
+ }
+ }
+ }
+ KnownBits ScalarKnown(32);
+ if (IC.SimplifyDemandedBits(&II, 0, APInt::getLowBitsSet(32, 16),
+ ScalarKnown, 0)) {
+ return &II;
+ }
+ break;
+ }
+ case Intrinsic::arm_mve_pred_v2i: {
+ Value *Arg = II.getArgOperand(0);
+ Value *ArgArg;
+ if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
+ PatternMatch::m_Value(ArgArg)))) {
+ return IC.replaceInstUsesWith(II, ArgArg);
+ }
+ if (!II.getMetadata(LLVMContext::MD_range)) {
+ Type *IntTy32 = Type::getInt32Ty(II.getContext());
+ Metadata *M[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)),
+ ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0xFFFF))};
+ II.setMetadata(LLVMContext::MD_range, MDNode::get(II.getContext(), M));
+ return &II;
+ }
+ break;
+ }
+ case Intrinsic::arm_mve_vadc:
+ case Intrinsic::arm_mve_vadc_predicated: {
+ unsigned CarryOp =
+ (II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
+ assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
+ "Bad type for intrinsic!");
+
+ KnownBits CarryKnown(32);
+ if (IC.SimplifyDemandedBits(&II, CarryOp, APInt::getOneBitSet(32, 29),
+ CarryKnown)) {
+ return &II;
+ }
+ break;
+ }
+ case Intrinsic::arm_mve_vmldava: {
+ Instruction *I = cast<Instruction>(&II);
+ if (I->hasOneUse()) {
+ auto *User = cast<Instruction>(*I->user_begin());
+ Value *OpZ;
+ if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) &&
+ match(I->getOperand(3), m_Zero())) {
+ Value *OpX = I->getOperand(4);
+ Value *OpY = I->getOperand(5);
+ Type *OpTy = OpX->getType();
+
+ IC.Builder.SetInsertPoint(User);
+ Value *V =
+ IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy},
+ {I->getOperand(0), I->getOperand(1),
+ I->getOperand(2), OpZ, OpX, OpY});
+
+ IC.replaceInstUsesWith(*User, V);
+ return IC.eraseInstFromFunction(*User);
+ }
+ }
+ return None;
+ }
+ }
+ return None;
+}
+
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
@@ -125,8 +289,43 @@ int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
return 1;
}
-int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind) {
+// Checks whether Inst is part of a min(max()) or max(min()) pattern
+// that will match to an SSAT instruction
+static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
+ Value *LHS, *RHS;
+ ConstantInt *C;
+ SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor;
+
+ if (InstSPF == SPF_SMAX &&
+ PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) &&
+ C->getValue() == Imm && Imm.isNegative() && (-Imm).isPowerOf2()) {
+
+ auto isSSatMin = [&](Value *MinInst) {
+ if (isa<SelectInst>(MinInst)) {
+ Value *MinLHS, *MinRHS;
+ ConstantInt *MinC;
+ SelectPatternFlavor MinSPF =
+ matchSelectPattern(MinInst, MinLHS, MinRHS).Flavor;
+ if (MinSPF == SPF_SMIN &&
+ PatternMatch::match(MinRHS, PatternMatch::m_ConstantInt(MinC)) &&
+ MinC->getValue() == ((-Imm) - 1))
+ return true;
+ }
+ return false;
+ };
+
+ if (isSSatMin(Inst->getOperand(1)) ||
+ (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) ||
+ isSSatMin(*(++Inst->user_begin())))))
+ return true;
+ }
+ return false;
+}
+
+int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
// Division by a constant can be turned into multiplication, but only if we
// know it's constant. So it's not so much that the immediate is cheap (it's
// not), but that the alternative is worse.
@@ -165,10 +364,33 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
return 0;
+ // Ensures negative constant of min(max()) or max(min()) patterns that
+ // match to SSAT instructions don't get hoisted
+ if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
+ Ty->getIntegerBitWidth() <= 32) {
+ if (isSSATMinMaxPattern(Inst, Imm) ||
+ (isa<ICmpInst>(Inst) && Inst->hasOneUse() &&
+ isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm)))
+ return 0;
+ }
+
return getIntImmCost(Imm, Ty, CostKind);
}
+int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+ if (CostKind == TTI::TCK_RecipThroughput &&
+ (ST->hasNEON() || ST->hasMVEIntegerOps())) {
+ // FIXME: The vectorizer is highly sensistive to the cost of these
+ // instructions, which suggests that it may be using the costs incorrectly.
+ // But, for now, just make them free to avoid performance regressions for
+ // vector targets.
+ return 0;
+ }
+ return BaseT::getCFInstrCost(Opcode, CostKind);
+}
+
int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -180,15 +402,35 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return Cost == 0 ? 0 : 1;
return Cost;
};
+ auto IsLegalFPType = [this](EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
+ (EltVT == MVT::f64 && ST->hasFP64()) ||
+ (EltVT == MVT::f16 && ST->hasFullFP16());
+ };
EVT SrcTy = TLI->getValueType(DL, Src);
EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
-
- // The extend of a load is free
- if (I && isa<LoadInst>(I->getOperand(0))) {
+ return AdjustCost(
+ BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+
+ // Extending masked load/Truncating masked stores is expensive because we
+ // currently don't split them. This means that we'll likely end up
+ // loading/storing each element individually (hence the high cost).
+ if ((ST->hasMVEIntegerOps() &&
+ (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
+ Opcode == Instruction::SExt)) ||
+ (ST->hasMVEFloatOps() &&
+ (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
+ IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
+ if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits() > 128)
+ return 2 * DstTy.getVectorNumElements() * ST->getMVEVectorCostFactor();
+
+ // The extend of other kinds of load is free
+ if (CCH == TTI::CastContextHint::Normal ||
+ CCH == TTI::CastContextHint::Masked) {
static const TypeConversionCostTblEntry LoadConversionTbl[] = {
{ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},
{ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},
@@ -242,33 +484,32 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}
- }
- // The truncate of a store is free. This is the mirror of extends above.
- if (I && I->hasOneUse() && isa<StoreInst>(*I->user_begin())) {
- static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
+ // The truncate of a store is free. This is the mirror of extends above.
+ static const TypeConversionCostTblEntry MVEStoreConversionTbl[] = {
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},
{ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},
{ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},
+ {ISD::TRUNCATE, MVT::v8i32, MVT::v8i8, 1},
{ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},
{ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},
};
if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
if (const auto *Entry =
- ConvertCostTableLookup(MVELoadConversionTbl, ISD, SrcTy.getSimpleVT(),
- DstTy.getSimpleVT()))
+ ConvertCostTableLookup(MVEStoreConversionTbl, ISD,
+ SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}
- static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {
+ static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = {
{ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1},
{ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3},
};
if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
if (const auto *Entry =
- ConvertCostTableLookup(MVEFLoadConversionTbl, ISD, SrcTy.getSimpleVT(),
- DstTy.getSimpleVT()))
+ ConvertCostTableLookup(MVEFStoreConversionTbl, ISD,
+ SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}
}
@@ -504,19 +745,25 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
int Lanes = 1;
if (SrcTy.isFixedLengthVector())
Lanes = SrcTy.getVectorNumElements();
- auto IsLegal = [this](EVT VT) {
- EVT EltVT = VT.getScalarType();
- return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
- (EltVT == MVT::f64 && ST->hasFP64()) ||
- (EltVT == MVT::f16 && ST->hasFullFP16());
- };
- if (IsLegal(SrcTy) && IsLegal(DstTy))
+ if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
return Lanes;
else
return Lanes * CallCost;
}
+ if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() &&
+ SrcTy.isFixedLengthVector()) {
+ // Treat a truncate with larger than legal source (128bits for MVE) as
+ // expensive, 2 instructions per lane.
+ if ((SrcTy.getScalarType() == MVT::i8 ||
+ SrcTy.getScalarType() == MVT::i16 ||
+ SrcTy.getScalarType() == MVT::i32) &&
+ SrcTy.getSizeInBits() > 128 &&
+ SrcTy.getSizeInBits() > DstTy.getSizeInBits())
+ return SrcTy.getVectorNumElements() * 2;
+ }
+
// Scalar integer conversion costs.
static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
@@ -540,7 +787,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
? ST->getMVEVectorCostFactor()
: 1;
return AdjustCost(
- BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
+ BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
}
int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
@@ -580,15 +827,37 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
}
int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
- // TODO: Handle other cost kinds.
- if (CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
-
int ISD = TLI->InstructionOpcodeToISD(Opcode);
+
+ // Thumb scalar code size cost for select.
+ if (CostKind == TTI::TCK_CodeSize && ISD == ISD::SELECT &&
+ ST->isThumb() && !ValTy->isVectorTy()) {
+ // Assume expensive structs.
+ if (TLI->getValueType(DL, ValTy, true) == MVT::Other)
+ return TTI::TCC_Expensive;
+
+ // Select costs can vary because they:
+ // - may require one or more conditional mov (including an IT),
+ // - can't operate directly on immediates,
+ // - require live flags, which we can't copy around easily.
+ int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
+
+ // Possible IT instruction for Thumb2, or more for Thumb1.
+ ++Cost;
+
+ // i1 values may need rematerialising by using mov immediates and/or
+ // flag setting instructions.
+ if (ValTy->isIntegerTy(1))
+ ++Cost;
+
+ return Cost;
+ }
+
// On NEON a vector select gets lowered to vbsl.
- if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {
// Lowering of some vector selects is currently far from perfect.
static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
@@ -609,11 +878,15 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
return LT.first;
}
- int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy()
- ? ST->getMVEVectorCostFactor()
- : 1;
- return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind,
- I);
+ // Default to cheap (throughput/size of 1 instruction) but adjust throughput
+ // for "multiple beats" potentially needed by MVE instructions.
+ int BaseCost = 1;
+ if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
+ ValTy->isVectorTy())
+ BaseCost = ST->getMVEVectorCostFactor();
+
+ return BaseCost *
+ BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -695,39 +968,83 @@ bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) {
(EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
}
-int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
- const MemCpyInst *MI = dyn_cast<MemCpyInst>(I);
- assert(MI && "MemcpyInst expected");
- ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength());
+/// Given a memcpy/memset/memmove instruction, return the number of memory
+/// operations performed, via querying findOptimalMemOpLowering. Returns -1 if a
+/// call is used.
+int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const {
+ MemOp MOp;
+ unsigned DstAddrSpace = ~0u;
+ unsigned SrcAddrSpace = ~0u;
+ const Function *F = I->getParent()->getParent();
- // To model the cost of a library call, we assume 1 for the call, and
- // 3 for the argument setup.
- const unsigned LibCallCost = 4;
+ if (const auto *MC = dyn_cast<MemTransferInst>(I)) {
+ ConstantInt *C = dyn_cast<ConstantInt>(MC->getLength());
+ // If 'size' is not a constant, a library call will be generated.
+ if (!C)
+ return -1;
- // If 'size' is not a constant, a library call will be generated.
- if (!C)
- return LibCallCost;
+ const unsigned Size = C->getValue().getZExtValue();
+ const Align DstAlign = *MC->getDestAlign();
+ const Align SrcAlign = *MC->getSourceAlign();
- const unsigned Size = C->getValue().getZExtValue();
- const Align DstAlign = *MI->getDestAlign();
- const Align SrcAlign = *MI->getSourceAlign();
- const Function *F = I->getParent()->getParent();
- const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
- std::vector<EVT> MemOps;
+ MOp = MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign,
+ /*IsVolatile*/ false);
+ DstAddrSpace = MC->getDestAddressSpace();
+ SrcAddrSpace = MC->getSourceAddressSpace();
+ }
+ else if (const auto *MS = dyn_cast<MemSetInst>(I)) {
+ ConstantInt *C = dyn_cast<ConstantInt>(MS->getLength());
+ // If 'size' is not a constant, a library call will be generated.
+ if (!C)
+ return -1;
+
+ const unsigned Size = C->getValue().getZExtValue();
+ const Align DstAlign = *MS->getDestAlign();
+
+ MOp = MemOp::Set(Size, /*DstAlignCanChange*/ false, DstAlign,
+ /*IsZeroMemset*/ false, /*IsVolatile*/ false);
+ DstAddrSpace = MS->getDestAddressSpace();
+ }
+ else
+ llvm_unreachable("Expected a memcpy/move or memset!");
+
+ unsigned Limit, Factor = 2;
+ switch(I->getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ Limit = TLI->getMaxStoresPerMemcpy(F->hasMinSize());
+ break;
+ case Intrinsic::memmove:
+ Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
+ break;
+ case Intrinsic::memset:
+ Limit = TLI->getMaxStoresPerMemset(F->hasMinSize());
+ Factor = 1;
+ break;
+ default:
+ llvm_unreachable("Expected a memcpy/move or memset!");
+ }
// MemOps will be poplulated with a list of data types that needs to be
// loaded and stored. That's why we multiply the number of elements by 2 to
// get the cost for this memcpy.
+ std::vector<EVT> MemOps;
if (getTLI()->findOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign,
- /*IsVolatile*/ true),
- MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
- F->getAttributes()))
- return MemOps.size() * 2;
+ MemOps, Limit, MOp, DstAddrSpace,
+ SrcAddrSpace, F->getAttributes()))
+ return MemOps.size() * Factor;
// If we can't find an optimal memop lowering, return the default cost
- return LibCallCost;
+ return -1;
+}
+
+int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
+ int NumOps = getNumMemOps(cast<IntrinsicInst>(I));
+
+ // To model the cost of a library call, we assume 1 for the call, and
+ // 3 for the argument setup.
+ if (NumOps == -1)
+ return 4;
+ return NumOps;
}
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
@@ -832,13 +1149,22 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
- // TODO: Handle more cost kinds.
- if (CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
- Op2Info, Opd1PropInfo,
- Opd2PropInfo, Args, CxtI);
-
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
+ if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) {
+ // Make operations on i1 relatively expensive as this often involves
+ // combining predicates. AND and XOR should be easier to handle with IT
+ // blocks.
+ switch (ISDOpcode) {
+ default:
+ break;
+ case ISD::AND:
+ case ISD::XOR:
+ return 2;
+ case ISD::OR:
+ return 3;
+ }
+ }
+
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->hasNEON()) {
@@ -933,9 +1259,12 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (LooksLikeAFreeShift())
return 0;
- int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy()
- ? ST->getMVEVectorCostFactor()
- : 1;
+ // Default to cheap (throughput/size of 1 instruction) but adjust throughput
+ // for "multiple beats" potentially needed by MVE instructions.
+ int BaseCost = 1;
+ if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
+ Ty->isVectorTy())
+ BaseCost = ST->getMVEVectorCostFactor();
// The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
// without treating floats as more expensive that scalars or increasing the
@@ -1002,6 +1331,24 @@ int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
CostKind, I);
}
+unsigned ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) {
+ if (ST->hasMVEIntegerOps()) {
+ if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment))
+ return ST->getMVEVectorCostFactor();
+ if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment))
+ return ST->getMVEVectorCostFactor();
+ }
+ if (!isa<FixedVectorType>(Src))
+ return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
+ // Scalar cost, which is currently very high due to the efficiency of the
+ // generated code.
+ return cast<FixedVectorType>(Src)->getNumElements() * 8;
+}
+
int ARMTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1032,7 +1379,8 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(
// promoted differently). The cost of 2 here is then a load and vrev or
// vmovn.
if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
- VecTy->isIntOrIntVectorTy() && DL.getTypeSizeInBits(SubVecTy) <= 64)
+ VecTy->isIntOrIntVectorTy() &&
+ DL.getTypeSizeInBits(SubVecTy).getFixedSize() <= 64)
return 2 * BaseCost;
}
@@ -1065,13 +1413,13 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
// multiplied by the number of elements being loaded. This is possibly very
// conservative, but even so we still end up vectorising loops because the
// cost per iteration for many loops is lower than for scalar loops.
- unsigned VectorCost = NumElems * LT.first;
+ unsigned VectorCost = NumElems * LT.first * ST->getMVEVectorCostFactor();
// The scalarization cost should be a lot higher. We use the number of vector
// elements plus the scalarization overhead.
unsigned ScalarCost =
NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, {});
- if (Alignment < EltSize / 8)
+ if (EltSize < 8 || Alignment < EltSize / 8)
return ScalarCost;
unsigned ExtSize = EltSize;
@@ -1140,6 +1488,92 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
return ScalarCost;
}
+int ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind) {
+ EVT ValVT = TLI->getValueType(DL, ValTy);
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD)
+ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
+ CostKind);
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+
+ static const CostTblEntry CostTblAdd[]{
+ {ISD::ADD, MVT::v16i8, 1},
+ {ISD::ADD, MVT::v8i16, 1},
+ {ISD::ADD, MVT::v4i32, 1},
+ };
+ if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second))
+ return Entry->Cost * ST->getMVEVectorCostFactor() * LT.first;
+
+ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
+ CostKind);
+}
+
+InstructionCost
+ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
+ Type *ResTy, VectorType *ValTy,
+ TTI::TargetCostKind CostKind) {
+ EVT ValVT = TLI->getValueType(DL, ValTy);
+ EVT ResVT = TLI->getValueType(DL, ResTy);
+ if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) ||
+ (LT.second == MVT::v8i16 &&
+ ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) ||
+ (LT.second == MVT::v4i32 && ResVT.getSizeInBits() <= 64))
+ return ST->getMVEVectorCostFactor() * LT.first;
+ }
+
+ return BaseT::getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, ValTy,
+ CostKind);
+}
+
+int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ switch (ICA.getID()) {
+ case Intrinsic::get_active_lane_mask:
+ // Currently we make a somewhat optimistic assumption that
+ // active_lane_mask's are always free. In reality it may be freely folded
+ // into a tail predicated loop, expanded into a VCPT or expanded into a lot
+ // of add/icmp code. We may need to improve this in the future, but being
+ // able to detect if it is free or not involves looking at a lot of other
+ // code. We currently assume that the vectorizer inserted these, and knew
+ // what it was doing in adding one.
+ if (ST->hasMVEIntegerOps())
+ return 0;
+ break;
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat: {
+ if (!ST->hasMVEIntegerOps())
+ break;
+ // Get the Return type, either directly of from ICA.ReturnType and ICA.VF.
+ Type *VT = ICA.getReturnType();
+ if (!VT->isVectorTy() && !ICA.getVectorFactor().isScalar())
+ VT = VectorType::get(VT, ICA.getVectorFactor());
+
+ std::pair<int, MVT> LT =
+ TLI->getTypeLegalizationCost(DL, VT);
+ if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
+ LT.second == MVT::v16i8) {
+ // This is a base cost of 1 for the vadd, plus 3 extract shifts if we
+ // need to extend the type, as it uses shr(qadd(shl, shl)).
+ unsigned Instrs = LT.second.getScalarSizeInBits() ==
+ ICA.getReturnType()->getScalarSizeInBits()
+ ? 1
+ : 4;
+ return LT.first * ST->getMVEVectorCostFactor() * Instrs;
+ }
+ break;
+ }
+ }
+
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+}
+
bool ARMTTIImpl::isLoweredToCall(const Function *F) {
if (!F->isIntrinsic())
BaseT::isLoweredToCall(F);
@@ -1201,6 +1635,93 @@ bool ARMTTIImpl::isLoweredToCall(const Function *F) {
return BaseT::isLoweredToCall(F);
}
+bool ARMTTIImpl::maybeLoweredToCall(Instruction &I) {
+ unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
+ EVT VT = TLI->getValueType(DL, I.getType(), true);
+ if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
+ return true;
+
+ // Check if an intrinsic will be lowered to a call and assume that any
+ // other CallInst will generate a bl.
+ if (auto *Call = dyn_cast<CallInst>(&I)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(Call)) {
+ switch(II->getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ return getNumMemOps(II) == -1;
+ default:
+ if (const Function *F = Call->getCalledFunction())
+ return isLoweredToCall(F);
+ }
+ }
+ return true;
+ }
+
+ // FPv5 provides conversions between integer, double-precision,
+ // single-precision, and half-precision formats.
+ switch (I.getOpcode()) {
+ default:
+ break;
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ return !ST->hasFPARMv8Base();
+ }
+
+ // FIXME: Unfortunately the approach of checking the Operation Action does
+ // not catch all cases of Legalization that use library calls. Our
+ // Legalization step categorizes some transformations into library calls as
+ // Custom, Expand or even Legal when doing type legalization. So for now
+ // we have to special case for instance the SDIV of 64bit integers and the
+ // use of floating point emulation.
+ if (VT.isInteger() && VT.getSizeInBits() >= 64) {
+ switch (ISD) {
+ default:
+ break;
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ return true;
+ }
+ }
+
+ // Assume all other non-float operations are supported.
+ if (!VT.isFloatingPoint())
+ return false;
+
+ // We'll need a library call to handle most floats when using soft.
+ if (TLI->useSoftFloat()) {
+ switch (I.getOpcode()) {
+ default:
+ return true;
+ case Instruction::Alloca:
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::Select:
+ case Instruction::PHI:
+ return false;
+ }
+ }
+
+ // We'll need a libcall to perform double precision operations on a single
+ // precision only FPU.
+ if (I.getType()->isDoubleTy() && !ST->hasFP64())
+ return true;
+
+ // Likewise for half precision arithmetic.
+ if (I.getType()->isHalfTy() && !ST->hasFullFP16())
+ return true;
+
+ return false;
+}
+
bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
@@ -1235,93 +1756,13 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
// Making a call will trash LR and clear LO_BRANCH_INFO, so there's little
// point in generating a hardware loop if that's going to happen.
- auto MaybeCall = [this](Instruction &I) {
- const ARMTargetLowering *TLI = getTLI();
- unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
- EVT VT = TLI->getValueType(DL, I.getType(), true);
- if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
- return true;
-
- // Check if an intrinsic will be lowered to a call and assume that any
- // other CallInst will generate a bl.
- if (auto *Call = dyn_cast<CallInst>(&I)) {
- if (isa<IntrinsicInst>(Call)) {
- if (const Function *F = Call->getCalledFunction())
- return isLoweredToCall(F);
- }
- return true;
- }
-
- // FPv5 provides conversions between integer, double-precision,
- // single-precision, and half-precision formats.
- switch (I.getOpcode()) {
- default:
- break;
- case Instruction::FPToSI:
- case Instruction::FPToUI:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- return !ST->hasFPARMv8Base();
- }
-
- // FIXME: Unfortunately the approach of checking the Operation Action does
- // not catch all cases of Legalization that use library calls. Our
- // Legalization step categorizes some transformations into library calls as
- // Custom, Expand or even Legal when doing type legalization. So for now
- // we have to special case for instance the SDIV of 64bit integers and the
- // use of floating point emulation.
- if (VT.isInteger() && VT.getSizeInBits() >= 64) {
- switch (ISD) {
- default:
- break;
- case ISD::SDIV:
- case ISD::UDIV:
- case ISD::SREM:
- case ISD::UREM:
- case ISD::SDIVREM:
- case ISD::UDIVREM:
- return true;
- }
- }
-
- // Assume all other non-float operations are supported.
- if (!VT.isFloatingPoint())
- return false;
-
- // We'll need a library call to handle most floats when using soft.
- if (TLI->useSoftFloat()) {
- switch (I.getOpcode()) {
- default:
- return true;
- case Instruction::Alloca:
- case Instruction::Load:
- case Instruction::Store:
- case Instruction::Select:
- case Instruction::PHI:
- return false;
- }
- }
-
- // We'll need a libcall to perform double precision operations on a single
- // precision only FPU.
- if (I.getType()->isDoubleTy() && !ST->hasFP64())
- return true;
-
- // Likewise for half precision arithmetic.
- if (I.getType()->isHalfTy() && !ST->hasFullFP16())
- return true;
-
- return false;
- };
auto IsHardwareLoopIntrinsic = [](Instruction &I) {
if (auto *Call = dyn_cast<IntrinsicInst>(&I)) {
switch (Call->getIntrinsicID()) {
default:
break;
- case Intrinsic::set_loop_iterations:
+ case Intrinsic::start_loop_iterations:
case Intrinsic::test_set_loop_iterations:
case Intrinsic::loop_decrement:
case Intrinsic::loop_decrement_reg:
@@ -1332,14 +1773,24 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
};
// Scan the instructions to see if there's any that we know will turn into a
- // call or if this loop is already a low-overhead loop.
+ // call or if this loop is already a low-overhead loop or will become a tail
+ // predicated loop.
+ bool IsTailPredLoop = false;
auto ScanLoop = [&](Loop *L) {
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
- if (MaybeCall(I) || IsHardwareLoopIntrinsic(I)) {
+ if (maybeLoweredToCall(I) || IsHardwareLoopIntrinsic(I) ||
+ isa<InlineAsm>(I)) {
LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");
return false;
}
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ IsTailPredLoop |=
+ II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
+ II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
+ II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
+ II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
+ II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
}
}
return true;
@@ -1360,7 +1811,7 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
LLVMContext &C = L->getHeader()->getContext();
HWLoopInfo.CounterInReg = true;
HWLoopInfo.IsNestingLegal = false;
- HWLoopInfo.PerformEntryTest = true;
+ HWLoopInfo.PerformEntryTest = AllowWLSLoops && !IsTailPredLoop;
HWLoopInfo.CountType = Type::getInt32Ty(C);
HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
return true;
@@ -1408,35 +1859,28 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
const LoopAccessInfo *LAI) {
LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");
- // If there are live-out values, it is probably a reduction, which needs a
- // final reduction step after the loop. MVE has a VADDV instruction to reduce
- // integer vectors, but doesn't have an equivalent one for float vectors. A
- // live-out value that is not recognised as a reduction will result in the
- // tail-predicated loop to be reverted to a non-predicated loop and this is
- // very expensive, i.e. it has a significant performance impact. So, in this
- // case it's better not to tail-predicate the loop, which is what we check
- // here. Thus, we allow only 1 live-out value, which has to be an integer
- // reduction, which matches the loops supported by ARMLowOverheadLoops.
- // It is important to keep ARMLowOverheadLoops and canTailPredicateLoop in
- // sync with each other.
+ // If there are live-out values, it is probably a reduction. We can predicate
+ // most reduction operations freely under MVE using a combination of
+ // prefer-predicated-reduction-select and inloop reductions. We limit this to
+ // floating point and integer reductions, but don't check for operators
+ // specifically here. If the value ends up not being a reduction (and so the
+ // vectorizer cannot tailfold the loop), we should fall back to standard
+ // vectorization automatically.
SmallVector< Instruction *, 8 > LiveOuts;
LiveOuts = llvm::findDefsUsedOutsideOfLoop(L);
- bool IntReductionsDisabled =
+ bool ReductionsDisabled =
EnableTailPredication == TailPredication::EnabledNoReductions ||
EnableTailPredication == TailPredication::ForceEnabledNoReductions;
for (auto *I : LiveOuts) {
- if (!I->getType()->isIntegerTy()) {
- LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer "
+ if (!I->getType()->isIntegerTy() && !I->getType()->isFloatTy() &&
+ !I->getType()->isHalfTy()) {
+ LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float "
"live-out value\n");
return false;
}
- if (I->getOpcode() != Instruction::Add) {
- LLVM_DEBUG(dbgs() << "Only add reductions supported\n");
- return false;
- }
- if (IntReductionsDisabled) {
- LLVM_DEBUG(dbgs() << "Integer add reductions not enabled\n");
+ if (ReductionsDisabled) {
+ LLVM_DEBUG(dbgs() << "Reductions not enabled\n");
return false;
}
}
@@ -1445,7 +1889,6 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
PredicatedScalarEvolution PSE = LAI->getPSE();
SmallVector<Instruction *, 16> LoadStores;
int ICmpCount = 0;
- int Stride = 0;
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : BB->instructionsWithoutDebug()) {
@@ -1464,22 +1907,38 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
LLVM_DEBUG(dbgs() << "Unsupported Type: "; T->dump());
return false;
}
-
if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1);
int64_t NextStride = getPtrStride(PSE, Ptr, L);
- // TODO: for now only allow consecutive strides of 1. We could support
- // other strides as long as it is uniform, but let's keep it simple for
- // now.
- if (Stride == 0 && NextStride == 1) {
- Stride = NextStride;
+ if (NextStride == 1) {
+ // TODO: for now only allow consecutive strides of 1. We could support
+ // other strides as long as it is uniform, but let's keep it simple
+ // for now.
continue;
- }
- if (Stride != NextStride) {
- LLVM_DEBUG(dbgs() << "Different strides found, can't "
- "tail-predicate\n.");
+ } else if (NextStride == -1 ||
+ (NextStride == 2 && MVEMaxSupportedInterleaveFactor >= 2) ||
+ (NextStride == 4 && MVEMaxSupportedInterleaveFactor >= 4)) {
+ LLVM_DEBUG(dbgs()
+ << "Consecutive strides of 2 found, vld2/vstr2 can't "
+ "be tail-predicated\n.");
return false;
+ // TODO: don't tail predicate if there is a reversed load?
+ } else if (EnableMaskedGatherScatters) {
+ // Gather/scatters do allow loading from arbitrary strides, at
+ // least if they are loop invariant.
+ // TODO: Loop variant strides should in theory work, too, but
+ // this requires further testing.
+ const SCEV *PtrScev =
+ replaceSymbolicStrideSCEV(PSE, llvm::ValueToValueMap(), Ptr);
+ if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
+ const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
+ if (PSE.getSE()->isLoopInvariant(Step, L))
+ continue;
+ }
}
+ LLVM_DEBUG(dbgs() << "Bad stride found, can't "
+ "tail-predicate\n.");
+ return false;
}
}
}
@@ -1512,7 +1971,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
return false;
}
- assert(L->empty() && "preferPredicateOverEpilogue: inner-loop expected");
+ assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");
HardwareLoopInfo HWLoopInfo(L);
if (!HWLoopInfo.canAnalyze(*LI)) {
@@ -1580,6 +2039,10 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
return;
+ // Don't unroll vectorized loops, including the remainder loop
+ if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
+ return;
+
// Scan the loop: don't unroll loops with calls as this could prevent
// inlining.
unsigned Cost = 0;
@@ -1598,9 +2061,9 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
return;
}
- SmallVector<const Value*, 4> Operands(I.value_op_begin(),
- I.value_op_end());
- Cost += getUserCost(&I, Operands, TargetTransformInfo::TCK_CodeSize);
+ SmallVector<const Value*, 4> Operands(I.operand_values());
+ Cost +=
+ getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
}
}
@@ -1629,3 +2092,24 @@ bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const {
return ST->hasMVEIntegerOps();
}
+
+bool ARMTTIImpl::preferInLoopReduction(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const {
+ if (!ST->hasMVEIntegerOps())
+ return false;
+
+ unsigned ScalarBits = Ty->getScalarSizeInBits();
+ switch (Opcode) {
+ case Instruction::Add:
+ return ScalarBits <= 64;
+ default:
+ return false;
+ }
+}
+
+bool ARMTTIImpl::preferPredicatedReductionSelect(
+ unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const {
+ if (!ST->hasMVEIntegerOps())
+ return false;
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 7bf6de4bffe0..7f045080e320 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -113,6 +113,9 @@ public:
return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
}
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+
/// \name Scalar TTI Implementations
/// @{
@@ -123,7 +126,8 @@ public:
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind);
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr);
/// @}
@@ -177,40 +181,31 @@ public:
int getMemcpyCost(const Instruction *I);
+ int getNumMemOps(const IntrinsicInst *I) const;
+
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp);
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
- bool shouldExpandReduction(const IntrinsicInst *II) const {
- switch (II->getIntrinsicID()) {
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- // We don't have legalization support for ordered FP reductions.
- if (!II->getFastMathFlags().allowReassoc())
- return true;
- // Can't legalize reductions with soft floats.
- return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
-
- case Intrinsic::experimental_vector_reduce_fmin:
- case Intrinsic::experimental_vector_reduce_fmax:
- // Can't legalize reductions with soft floats, and NoNan will create
- // fminimum which we do not know how to lower.
- return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() ||
- !II->getFastMathFlags().noNaNs();
-
- default:
- // Don't expand anything else, let legalization deal with it.
- return false;
- }
- }
+ bool preferInLoopReduction(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const;
+
+ bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const;
+
+ bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
+
+ int getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::TargetCostKind CostKind,
+ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
@@ -234,6 +229,10 @@ public:
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
+ unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind);
+
int getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
@@ -245,6 +244,17 @@ public:
Align Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
+ int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind);
+ InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
+ Type *ResTy, VectorType *ValTy,
+ TTI::TargetCostKind CostKind);
+
+ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+
+ bool maybeLoweredToCall(Instruction &I);
bool isLoweredToCall(const Function *F);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 05f870b90ecd..52577d75ddf5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -3087,7 +3087,6 @@ public:
// This is container for the immediate that we will create the constant
// pool from
addExpr(Inst, getConstantPoolImm());
- return;
}
void addMemTBBOperands(MCInst &Inst, unsigned N) const {
@@ -6240,10 +6239,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
StringRef IDVal = Parser.getTok().getIdentifier();
const auto &Prefix =
- std::find_if(std::begin(PrefixEntries), std::end(PrefixEntries),
- [&IDVal](const PrefixEntry &PE) {
- return PE.Spelling == IDVal;
- });
+ llvm::find_if(PrefixEntries, [&IDVal](const PrefixEntry &PE) {
+ return PE.Spelling == IDVal;
+ });
if (Prefix == std::end(PrefixEntries)) {
Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
return true;
@@ -10309,11 +10307,14 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
!HasWideQualifier) {
// The operands aren't the same for tMOV[S]r... (no cc_out)
MCInst TmpInst;
- TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
+ unsigned Op = Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr;
+ TmpInst.setOpcode(Op);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(Inst.getOperand(2));
- TmpInst.addOperand(Inst.getOperand(3));
+ if (Op == ARM::tMOVr) {
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ }
Inst = TmpInst;
return true;
}
@@ -10598,6 +10599,12 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
(isThumb() && !hasV8Ops()))
return Match_InvalidOperand;
break;
+ case ARM::t2TBB:
+ case ARM::t2TBH:
+ // Rn = sp is only allowed with ARMv8-A
+ if (!hasV8Ops() && (Inst.getOperand(0).getReg() == ARM::SP))
+ return Match_RequiresV8;
+ break;
default:
break;
}
@@ -11128,7 +11135,8 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
bool WasThumb = isThumb();
Triple T;
MCSubtargetInfo &STI = copySTI();
- STI.setDefaultFeatures("", ("+" + ARM::getArchName(ID)).str());
+ STI.setDefaultFeatures("", /*TuneCPU*/ "",
+ ("+" + ARM::getArchName(ID)).str());
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
FixModeAfterArchChange(WasThumb, L);
@@ -11241,7 +11249,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
bool WasThumb = isThumb();
MCSubtargetInfo &STI = copySTI();
- STI.setDefaultFeatures(CPU, "");
+ STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
FixModeAfterArchChange(WasThumb, L);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 54ff0d9966cb..8ea323a9ced5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -860,7 +860,8 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
VCCPos + 2, MCOI::TIED_TO);
assert(TiedOp >= 0 &&
"Inactive register in vpred_r is not tied to an output!");
- MI.insert(VCCI, MI.getOperand(TiedOp));
+ // Copy the operand to ensure it's not invalidated when MI grows.
+ MI.insert(VCCI, MCOperand(MI.getOperand(TiedOp)));
}
} else if (VCC != ARMVCC::None) {
Check(S, SoftFail);
@@ -4529,12 +4530,14 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
static DecodeStatus
DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
+ const FeatureBitset &FeatureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
unsigned Rm = fieldFromInstruction(Insn, 0, 4);
- if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
+ if (Rn == 13 && !FeatureBits[ARM::HasV8Ops]) S = MCDisassembler::SoftFail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 24a9fabf0979..8459b4ff2a14 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -205,6 +205,20 @@ namespace ARM_AM {
return V;
}
+ /// isSOImmTwoPartValNeg - Return true if the specified value can be obtained
+ /// by two SOImmVal, that -V = First + Second.
+ /// "R+V" can be optimized to (sub (sub R, First), Second).
+ /// "R=V" can be optimized to (sub (mvn R, ~(-First)), Second).
+ inline bool isSOImmTwoPartValNeg(unsigned V) {
+ unsigned First;
+ if (!isSOImmTwoPartVal(-V))
+ return false;
+ // Return false if ~(-First) is not a SoImmval.
+ First = getSOImmTwoPartFirst(-V);
+ First = ~(-First);
+ return !(rotr32(~255U, getSOImmValRotate(First)) & First);
+ }
+
/// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
/// by a left shift. Returns the shift amount to use.
inline unsigned getThumbImmValShift(unsigned Imm) {
@@ -673,6 +687,18 @@ namespace ARM_AM {
return getFP16Imm(FPImm.bitcastToAPInt());
}
+ /// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding
+ /// for it. Otherwise return -1 like getFP16Imm.
+ inline int getFP32FP16Imm(const APInt &Imm) {
+ if (Imm.getActiveBits() > 16)
+ return -1;
+ return ARM_AM::getFP16Imm(Imm.trunc(16));
+ }
+
+ inline int getFP32FP16Imm(const APFloat &FPImm) {
+ return getFP32FP16Imm(FPImm.bitcastToAPInt());
+ }
+
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
/// floating-point value. If the value cannot be represented as an 8-bit
/// floating-point value, then return -1.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 9ad595c016c4..b02aef3c338b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -1010,6 +1010,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
case ARM::fixup_t2_condbranch:
case ARM::fixup_t2_uncondbranch:
case ARM::fixup_t2_pcrel_10:
+ case ARM::fixup_t2_pcrel_9:
case ARM::fixup_t2_adr_pcrel_12:
case ARM::fixup_arm_thumb_bl:
case ARM::fixup_arm_thumb_blx:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 74cd2e681ded..ecd96114e8a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -254,7 +254,7 @@ namespace ARMII {
MO_OPTION_MASK = 0x3,
/// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the ".refptrp.FOO" symbol. This is used for
+ /// reference is actually to the ".refptr.FOO" symbol. This is used for
/// stub symbols on windows.
MO_COFFSTUB = 0x4,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 876741d6c343..07ca5c29f0ec 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -644,7 +644,6 @@ private:
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
- Symbol->setExternal(false);
}
void EmitMappingSymbol(StringRef Name, SMLoc Loc, MCFragment *F,
@@ -654,7 +653,6 @@ private:
emitLabelAtPos(Symbol, Loc, F, Offset);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
- Symbol->setExternal(false);
}
void emitThumbFunc(MCSymbol *Func) override {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
index 37cb731ff001..d975d799e079 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -30,6 +30,7 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
virtual bool printAliasInstr(const MCInst *MI, uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 765613cf347d..40e8e244e312 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -87,6 +87,7 @@ void ARMCOFFMCAsmInfoMicrosoft::anchor() { }
ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
AlignmentIsInBytes = false;
+ SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::WinEH;
PrivateGlobalPrefix = "$M";
PrivateLabelPrefix = "$M";
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 05d73ccf6ff2..774f2507b8d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,11 +11,13 @@
//===----------------------------------------------------------------------===//
#include "ARMMCTargetDesc.h"
+#include "ARMAddressingModes.h"
#include "ARMBaseInfo.h"
#include "ARMInstPrinter.h"
#include "ARMMCAsmInfo.h"
#include "TargetInfo/ARMTargetInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCELFStreamer.h"
@@ -180,6 +182,23 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
return ARMArchFeature;
}
+bool ARM_MC::isPredicated(const MCInst &MI, const MCInstrInfo *MCII) {
+ const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+ int PredOpIdx = Desc.findFirstPredOperandIdx();
+ return PredOpIdx != -1 && MI.getOperand(PredOpIdx).getImm() != ARMCC::AL;
+}
+
+bool ARM_MC::isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII) {
+ const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ const MCOperand &MO = MI.getOperand(I);
+ if (MO.isReg() && MO.getReg() == ARM::CPSR &&
+ Desc.OpInfo[I].isOptionalDef())
+ return true;
+ }
+ return false;
+}
+
MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
@@ -190,7 +209,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
ArchFS = std::string(FS);
}
- return createARMMCSubtargetInfoImpl(TT, CPU, ArchFS);
+ return createARMMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS);
}
static MCInstrInfo *createARMMCInstrInfo() {
@@ -199,9 +218,120 @@ static MCInstrInfo *createARMMCInstrInfo() {
return X;
}
+void ARM_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
+ // Mapping from CodeView to MC register id.
+ static const struct {
+ codeview::RegisterId CVReg;
+ MCPhysReg Reg;
+ } RegMap[] = {
+ {codeview::RegisterId::ARM_R0, ARM::R0},
+ {codeview::RegisterId::ARM_R1, ARM::R1},
+ {codeview::RegisterId::ARM_R2, ARM::R2},
+ {codeview::RegisterId::ARM_R3, ARM::R3},
+ {codeview::RegisterId::ARM_R4, ARM::R4},
+ {codeview::RegisterId::ARM_R5, ARM::R5},
+ {codeview::RegisterId::ARM_R6, ARM::R6},
+ {codeview::RegisterId::ARM_R7, ARM::R7},
+ {codeview::RegisterId::ARM_R8, ARM::R8},
+ {codeview::RegisterId::ARM_R9, ARM::R9},
+ {codeview::RegisterId::ARM_R10, ARM::R10},
+ {codeview::RegisterId::ARM_R11, ARM::R11},
+ {codeview::RegisterId::ARM_R12, ARM::R12},
+ {codeview::RegisterId::ARM_SP, ARM::SP},
+ {codeview::RegisterId::ARM_LR, ARM::LR},
+ {codeview::RegisterId::ARM_PC, ARM::PC},
+ {codeview::RegisterId::ARM_CPSR, ARM::CPSR},
+ {codeview::RegisterId::ARM_FPSCR, ARM::FPSCR},
+ {codeview::RegisterId::ARM_FPEXC, ARM::FPEXC},
+ {codeview::RegisterId::ARM_FS0, ARM::S0},
+ {codeview::RegisterId::ARM_FS1, ARM::S1},
+ {codeview::RegisterId::ARM_FS2, ARM::S2},
+ {codeview::RegisterId::ARM_FS3, ARM::S3},
+ {codeview::RegisterId::ARM_FS4, ARM::S4},
+ {codeview::RegisterId::ARM_FS5, ARM::S5},
+ {codeview::RegisterId::ARM_FS6, ARM::S6},
+ {codeview::RegisterId::ARM_FS7, ARM::S7},
+ {codeview::RegisterId::ARM_FS8, ARM::S8},
+ {codeview::RegisterId::ARM_FS9, ARM::S9},
+ {codeview::RegisterId::ARM_FS10, ARM::S10},
+ {codeview::RegisterId::ARM_FS11, ARM::S11},
+ {codeview::RegisterId::ARM_FS12, ARM::S12},
+ {codeview::RegisterId::ARM_FS13, ARM::S13},
+ {codeview::RegisterId::ARM_FS14, ARM::S14},
+ {codeview::RegisterId::ARM_FS15, ARM::S15},
+ {codeview::RegisterId::ARM_FS16, ARM::S16},
+ {codeview::RegisterId::ARM_FS17, ARM::S17},
+ {codeview::RegisterId::ARM_FS18, ARM::S18},
+ {codeview::RegisterId::ARM_FS19, ARM::S19},
+ {codeview::RegisterId::ARM_FS20, ARM::S20},
+ {codeview::RegisterId::ARM_FS21, ARM::S21},
+ {codeview::RegisterId::ARM_FS22, ARM::S22},
+ {codeview::RegisterId::ARM_FS23, ARM::S23},
+ {codeview::RegisterId::ARM_FS24, ARM::S24},
+ {codeview::RegisterId::ARM_FS25, ARM::S25},
+ {codeview::RegisterId::ARM_FS26, ARM::S26},
+ {codeview::RegisterId::ARM_FS27, ARM::S27},
+ {codeview::RegisterId::ARM_FS28, ARM::S28},
+ {codeview::RegisterId::ARM_FS29, ARM::S29},
+ {codeview::RegisterId::ARM_FS30, ARM::S30},
+ {codeview::RegisterId::ARM_FS31, ARM::S31},
+ {codeview::RegisterId::ARM_ND0, ARM::D0},
+ {codeview::RegisterId::ARM_ND1, ARM::D1},
+ {codeview::RegisterId::ARM_ND2, ARM::D2},
+ {codeview::RegisterId::ARM_ND3, ARM::D3},
+ {codeview::RegisterId::ARM_ND4, ARM::D4},
+ {codeview::RegisterId::ARM_ND5, ARM::D5},
+ {codeview::RegisterId::ARM_ND6, ARM::D6},
+ {codeview::RegisterId::ARM_ND7, ARM::D7},
+ {codeview::RegisterId::ARM_ND8, ARM::D8},
+ {codeview::RegisterId::ARM_ND9, ARM::D9},
+ {codeview::RegisterId::ARM_ND10, ARM::D10},
+ {codeview::RegisterId::ARM_ND11, ARM::D11},
+ {codeview::RegisterId::ARM_ND12, ARM::D12},
+ {codeview::RegisterId::ARM_ND13, ARM::D13},
+ {codeview::RegisterId::ARM_ND14, ARM::D14},
+ {codeview::RegisterId::ARM_ND15, ARM::D15},
+ {codeview::RegisterId::ARM_ND16, ARM::D16},
+ {codeview::RegisterId::ARM_ND17, ARM::D17},
+ {codeview::RegisterId::ARM_ND18, ARM::D18},
+ {codeview::RegisterId::ARM_ND19, ARM::D19},
+ {codeview::RegisterId::ARM_ND20, ARM::D20},
+ {codeview::RegisterId::ARM_ND21, ARM::D21},
+ {codeview::RegisterId::ARM_ND22, ARM::D22},
+ {codeview::RegisterId::ARM_ND23, ARM::D23},
+ {codeview::RegisterId::ARM_ND24, ARM::D24},
+ {codeview::RegisterId::ARM_ND25, ARM::D25},
+ {codeview::RegisterId::ARM_ND26, ARM::D26},
+ {codeview::RegisterId::ARM_ND27, ARM::D27},
+ {codeview::RegisterId::ARM_ND28, ARM::D28},
+ {codeview::RegisterId::ARM_ND29, ARM::D29},
+ {codeview::RegisterId::ARM_ND30, ARM::D30},
+ {codeview::RegisterId::ARM_ND31, ARM::D31},
+ {codeview::RegisterId::ARM_NQ0, ARM::Q0},
+ {codeview::RegisterId::ARM_NQ1, ARM::Q1},
+ {codeview::RegisterId::ARM_NQ2, ARM::Q2},
+ {codeview::RegisterId::ARM_NQ3, ARM::Q3},
+ {codeview::RegisterId::ARM_NQ4, ARM::Q4},
+ {codeview::RegisterId::ARM_NQ5, ARM::Q5},
+ {codeview::RegisterId::ARM_NQ6, ARM::Q6},
+ {codeview::RegisterId::ARM_NQ7, ARM::Q7},
+ {codeview::RegisterId::ARM_NQ8, ARM::Q8},
+ {codeview::RegisterId::ARM_NQ9, ARM::Q9},
+ {codeview::RegisterId::ARM_NQ10, ARM::Q10},
+ {codeview::RegisterId::ARM_NQ11, ARM::Q11},
+ {codeview::RegisterId::ARM_NQ12, ARM::Q12},
+ {codeview::RegisterId::ARM_NQ13, ARM::Q13},
+ {codeview::RegisterId::ARM_NQ14, ARM::Q14},
+ {codeview::RegisterId::ARM_NQ15, ARM::Q15},
+ };
+ for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
+ MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
+}
+
static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
+ ARM_MC::initLLVMToCVRegMapping(X);
return X;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 7cfe6881b456..5a0874f0ef1f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -41,6 +41,21 @@ class raw_pwrite_stream;
namespace ARM_MC {
std::string ParseARMTriple(const Triple &TT, StringRef CPU);
+void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
+
+bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII);
+bool isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII);
+
+template<class Inst>
+bool isLDMBaseRegInList(const Inst &MI) {
+ auto BaseReg = MI.getOperand(0).getReg();
+ for (unsigned I = 1, E = MI.getNumOperands(); I < E; ++I) {
+ const auto &Op = MI.getOperand(I);
+ if (Op.isReg() && Op.getReg() == BaseReg)
+ return true;
+ }
+ return false;
+}
/// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
/// do not need to go through TargetRegistry.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 4d7ad6cd60cb..56823735e2d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -44,10 +44,10 @@
using namespace llvm;
-#define DEBUG_TYPE "mve-gather-scatter-lowering"
+#define DEBUG_TYPE "arm-mve-gather-scatter-lowering"
cl::opt<bool> EnableMaskedGatherScatters(
- "enable-arm-maskedgatscat", cl::Hidden, cl::init(false),
+ "enable-arm-maskedgatscat", cl::Hidden, cl::init(true),
cl::desc("Enable the generation of masked gathers and scatters"));
namespace {
@@ -84,7 +84,7 @@ private:
// Check for a getelementptr and deduce base and offsets from it, on success
// returning the base directly and the offsets indirectly using the Offsets
// argument
- Value *checkGEP(Value *&Offsets, Type *Ty, GetElementPtrInst *GEP,
+ Value *checkGEP(Value *&Offsets, FixedVectorType *Ty, GetElementPtrInst *GEP,
IRBuilder<> &Builder);
// Compute the scale of this gather/scatter instruction
int computeScale(unsigned GEPElemSize, unsigned MemoryElemSize);
@@ -132,6 +132,11 @@ private:
Value *tryCreateIncrementingWBGatScat(IntrinsicInst *I, Value *BasePtr,
Value *Ptr, unsigned TypeScale,
IRBuilder<> &Builder);
+
+ // Optimise the base and offsets of the given address
+ bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI);
+ // Try to fold consecutive geps together into one
+ Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder);
// Check whether these offsets could be moved out of the loop they're in
bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI);
// Pushes the given add out of the loop
@@ -167,7 +172,49 @@ bool MVEGatherScatterLowering::isLegalTypeAndAlignment(unsigned NumElements,
return false;
}
-Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, Type *Ty,
+static bool checkOffsetSize(Value *Offsets, unsigned TargetElemCount) {
+ // Offsets that are not of type <N x i32> are sign extended by the
+ // getelementptr instruction, and MVE gathers/scatters treat the offset as
+ // unsigned. Thus, if the element size is smaller than 32, we can only allow
+ // positive offsets - i.e., the offsets are not allowed to be variables we
+ // can't look into.
+ // Additionally, <N x i32> offsets have to either originate from a zext of a
+ // vector with element types smaller or equal the type of the gather we're
+ // looking at, or consist of constants that we can check are small enough
+ // to fit into the gather type.
+ // Thus we check that 0 < value < 2^TargetElemSize.
+ unsigned TargetElemSize = 128 / TargetElemCount;
+ unsigned OffsetElemSize = cast<FixedVectorType>(Offsets->getType())
+ ->getElementType()
+ ->getScalarSizeInBits();
+ if (OffsetElemSize != TargetElemSize || OffsetElemSize != 32) {
+ Constant *ConstOff = dyn_cast<Constant>(Offsets);
+ if (!ConstOff)
+ return false;
+ int64_t TargetElemMaxSize = (1ULL << TargetElemSize);
+ auto CheckValueSize = [TargetElemMaxSize](Value *OffsetElem) {
+ ConstantInt *OConst = dyn_cast<ConstantInt>(OffsetElem);
+ if (!OConst)
+ return false;
+ int SExtValue = OConst->getSExtValue();
+ if (SExtValue >= TargetElemMaxSize || SExtValue < 0)
+ return false;
+ return true;
+ };
+ if (isa<FixedVectorType>(ConstOff->getType())) {
+ for (unsigned i = 0; i < TargetElemCount; i++) {
+ if (!CheckValueSize(ConstOff->getAggregateElement(i)))
+ return false;
+ }
+ } else {
+ if (!CheckValueSize(ConstOff))
+ return false;
+ }
+ }
+ return true;
+}
+
+Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, FixedVectorType *Ty,
GetElementPtrInst *GEP,
IRBuilder<> &Builder) {
if (!GEP) {
@@ -178,40 +225,43 @@ Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, Type *Ty,
LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementpointer found."
<< " Looking at intrinsic for base + vector of offsets\n");
Value *GEPPtr = GEP->getPointerOperand();
- if (GEPPtr->getType()->isVectorTy()) {
+ Offsets = GEP->getOperand(1);
+ if (GEPPtr->getType()->isVectorTy() ||
+ !isa<FixedVectorType>(Offsets->getType()))
return nullptr;
- }
+
if (GEP->getNumOperands() != 2) {
LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementptr with too many"
<< " operands. Expanding.\n");
return nullptr;
}
Offsets = GEP->getOperand(1);
+ unsigned OffsetsElemCount =
+ cast<FixedVectorType>(Offsets->getType())->getNumElements();
// Paranoid check whether the number of parallel lanes is the same
- assert(cast<FixedVectorType>(Ty)->getNumElements() ==
- cast<FixedVectorType>(Offsets->getType())->getNumElements());
- // Only <N x i32> offsets can be integrated into an arm gather, any smaller
- // type would have to be sign extended by the gep - and arm gathers can only
- // zero extend. Additionally, the offsets do have to originate from a zext of
- // a vector with element types smaller or equal the type of the gather we're
- // looking at
- if (Offsets->getType()->getScalarSizeInBits() != 32)
- return nullptr;
- if (ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets))
+ assert(Ty->getNumElements() == OffsetsElemCount);
+
+ ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets);
+ if (ZextOffs)
Offsets = ZextOffs->getOperand(0);
- else if (!(cast<FixedVectorType>(Offsets->getType())->getNumElements() == 4 &&
- Offsets->getType()->getScalarSizeInBits() == 32))
- return nullptr;
+ FixedVectorType *OffsetType = cast<FixedVectorType>(Offsets->getType());
+
+ // If the offsets are already being zext-ed to <N x i32>, that relieves us of
+ // having to make sure that they won't overflow.
+ if (!ZextOffs || cast<FixedVectorType>(ZextOffs->getDestTy())
+ ->getElementType()
+ ->getScalarSizeInBits() != 32)
+ if (!checkOffsetSize(Offsets, OffsetsElemCount))
+ return nullptr;
+ // The offset sizes have been checked; if any truncating or zext-ing is
+ // required to fix them, do that now
if (Ty != Offsets->getType()) {
- if ((Ty->getScalarSizeInBits() <
- Offsets->getType()->getScalarSizeInBits())) {
- LLVM_DEBUG(dbgs() << "masked gathers/scatters: no correct offset type."
- << " Can't create intrinsic.\n");
- return nullptr;
+ if ((Ty->getElementType()->getScalarSizeInBits() <
+ OffsetType->getElementType()->getScalarSizeInBits())) {
+ Offsets = Builder.CreateTrunc(Offsets, Ty);
} else {
- Offsets = Builder.CreateZExt(
- Offsets, VectorType::getInteger(cast<VectorType>(Ty)));
+ Offsets = Builder.CreateZExt(Offsets, VectorType::getInteger(Ty));
}
}
// If none of the checks failed, return the gep's base pointer
@@ -426,7 +476,8 @@ Value *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
Value *Offsets;
- Value *BasePtr = checkGEP(Offsets, ResultTy, GEP, Builder);
+ Value *BasePtr =
+ checkGEP(Offsets, cast<FixedVectorType>(ResultTy), GEP, Builder);
if (!BasePtr)
return nullptr;
// Check whether the offset is a constant increment that could be merged into
@@ -566,7 +617,8 @@ Value *MVEGatherScatterLowering::tryCreateMaskedScatterOffset(
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
Value *Offsets;
- Value *BasePtr = checkGEP(Offsets, InputTy, GEP, Builder);
+ Value *BasePtr =
+ checkGEP(Offsets, cast<FixedVectorType>(InputTy), GEP, Builder);
if (!BasePtr)
return nullptr;
// Check whether the offset is a constant increment that could be merged into
@@ -801,7 +853,6 @@ void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
Phi->addIncoming(NewIncrement, Phi->getIncomingBlock(LoopIncrement));
Phi->removeIncomingValue((unsigned)0);
Phi->removeIncomingValue((unsigned)0);
- return;
}
// Check whether all usages of this instruction are as offsets of
@@ -887,11 +938,10 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
return false;
// The phi must be an induction variable
- Instruction *Op;
int IncrementingBlock = -1;
for (int i = 0; i < 2; i++)
- if ((Op = dyn_cast<Instruction>(Phi->getIncomingValue(i))) != nullptr)
+ if (auto *Op = dyn_cast<Instruction>(Phi->getIncomingValue(i)))
if (Op->getOpcode() == Instruction::Add &&
(Op->getOperand(0) == Phi || Op->getOperand(1) == Phi))
IncrementingBlock = i;
@@ -910,7 +960,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
// Get the value that is added to/multiplied with the phi
Value *OffsSecondOperand = Offs->getOperand(OffsSecondOp);
- if (IncrementPerRound->getType() != OffsSecondOperand->getType())
+ if (IncrementPerRound->getType() != OffsSecondOperand->getType() ||
+ !L->isLoopInvariant(OffsSecondOperand))
// Something has gone wrong, abort
return false;
@@ -978,6 +1029,128 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
return true;
}
+static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
+ IRBuilder<> &Builder) {
+ // Splat the non-vector value to a vector of the given type - if the value is
+ // a constant (and its value isn't too big), we can even use this opportunity
+ // to scale it to the size of the vector elements
+ auto FixSummands = [&Builder](FixedVectorType *&VT, Value *&NonVectorVal) {
+ ConstantInt *Const;
+ if ((Const = dyn_cast<ConstantInt>(NonVectorVal)) &&
+ VT->getElementType() != NonVectorVal->getType()) {
+ unsigned TargetElemSize = VT->getElementType()->getPrimitiveSizeInBits();
+ uint64_t N = Const->getZExtValue();
+ if (N < (unsigned)(1 << (TargetElemSize - 1))) {
+ NonVectorVal = Builder.CreateVectorSplat(
+ VT->getNumElements(), Builder.getIntN(TargetElemSize, N));
+ return;
+ }
+ }
+ NonVectorVal =
+ Builder.CreateVectorSplat(VT->getNumElements(), NonVectorVal);
+ };
+
+ FixedVectorType *XElType = dyn_cast<FixedVectorType>(X->getType());
+ FixedVectorType *YElType = dyn_cast<FixedVectorType>(Y->getType());
+ // If one of X, Y is not a vector, we have to splat it in order
+ // to add the two of them.
+ if (XElType && !YElType) {
+ FixSummands(XElType, Y);
+ YElType = cast<FixedVectorType>(Y->getType());
+ } else if (YElType && !XElType) {
+ FixSummands(YElType, X);
+ XElType = cast<FixedVectorType>(X->getType());
+ }
+ assert(XElType && YElType && "Unknown vector types");
+ // Check that the summands are of compatible types
+ if (XElType != YElType) {
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: incompatible gep offsets\n");
+ return nullptr;
+ }
+
+ if (XElType->getElementType()->getScalarSizeInBits() != 32) {
+ // Check that by adding the vectors we do not accidentally
+ // create an overflow
+ Constant *ConstX = dyn_cast<Constant>(X);
+ Constant *ConstY = dyn_cast<Constant>(Y);
+ if (!ConstX || !ConstY)
+ return nullptr;
+ unsigned TargetElemSize = 128 / XElType->getNumElements();
+ for (unsigned i = 0; i < XElType->getNumElements(); i++) {
+ ConstantInt *ConstXEl =
+ dyn_cast<ConstantInt>(ConstX->getAggregateElement(i));
+ ConstantInt *ConstYEl =
+ dyn_cast<ConstantInt>(ConstY->getAggregateElement(i));
+ if (!ConstXEl || !ConstYEl ||
+ ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >=
+ (unsigned)(1 << (TargetElemSize - 1)))
+ return nullptr;
+ }
+ }
+
+ Value *Add = Builder.CreateAdd(X, Y);
+
+ FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType());
+ if (checkOffsetSize(Add, GEPType->getNumElements()))
+ return Add;
+ else
+ return nullptr;
+}
+
+Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP,
+ Value *&Offsets,
+ IRBuilder<> &Builder) {
+ Value *GEPPtr = GEP->getPointerOperand();
+ Offsets = GEP->getOperand(1);
+ // We only merge geps with constant offsets, because only for those
+ // we can make sure that we do not cause an overflow
+ if (!isa<Constant>(Offsets))
+ return nullptr;
+ GetElementPtrInst *BaseGEP;
+ if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) {
+ // Merge the two geps into one
+ Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder);
+ if (!BaseBasePtr)
+ return nullptr;
+ Offsets =
+ CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder);
+ if (Offsets == nullptr)
+ return nullptr;
+ return BaseBasePtr;
+ }
+ return GEPPtr;
+}
+
+bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
+ LoopInfo *LI) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Address);
+ if (!GEP)
+ return false;
+ bool Changed = false;
+ if (GEP->hasOneUse() &&
+ dyn_cast<GetElementPtrInst>(GEP->getPointerOperand())) {
+ IRBuilder<> Builder(GEP->getContext());
+ Builder.SetInsertPoint(GEP);
+ Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
+ Value *Offsets;
+ Value *Base = foldGEP(GEP, Offsets, Builder);
+ // We only want to merge the geps if there is a real chance that they can be
+ // used by an MVE gather; thus the offset has to have the correct size
+ // (always i32 if it is not of vector type) and the base has to be a
+ // pointer.
+ if (Offsets && Base && Base != GEP) {
+ PointerType *BaseType = cast<PointerType>(Base->getType());
+ GetElementPtrInst *NewAddress = GetElementPtrInst::Create(
+ BaseType->getPointerElementType(), Base, Offsets, "gep.merged", GEP);
+ GEP->replaceAllUsesWith(NewAddress);
+ GEP = NewAddress;
+ Changed = true;
+ }
+ }
+ Changed |= optimiseOffsets(GEP->getOperand(1), GEP->getParent(), LI);
+ return Changed;
+}
+
bool MVEGatherScatterLowering::runOnFunction(Function &F) {
if (!EnableMaskedGatherScatters)
return false;
@@ -993,24 +1166,21 @@ bool MVEGatherScatterLowering::runOnFunction(Function &F) {
bool Changed = false;
for (BasicBlock &BB : F) {
+ Changed |= SimplifyInstructionsInBlock(&BB);
+
for (Instruction &I : BB) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::masked_gather) {
+ if (II && II->getIntrinsicID() == Intrinsic::masked_gather &&
+ isa<FixedVectorType>(II->getType())) {
Gathers.push_back(II);
- if (isa<GetElementPtrInst>(II->getArgOperand(0)))
- Changed |= optimiseOffsets(
- cast<Instruction>(II->getArgOperand(0))->getOperand(1),
- II->getParent(), LI);
- } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter) {
+ Changed |= optimiseAddress(II->getArgOperand(0), II->getParent(), LI);
+ } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter &&
+ isa<FixedVectorType>(II->getArgOperand(0)->getType())) {
Scatters.push_back(II);
- if (isa<GetElementPtrInst>(II->getArgOperand(1)))
- Changed |= optimiseOffsets(
- cast<Instruction>(II->getArgOperand(1))->getOperand(1),
- II->getParent(), LI);
+ Changed |= optimiseAddress(II->getArgOperand(1), II->getParent(), LI);
}
}
}
-
for (unsigned i = 0; i < Gathers.size(); i++) {
IntrinsicInst *I = Gathers[i];
Value *L = lowerGather(I);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredUtils.h b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredUtils.h
new file mode 100644
index 000000000000..9ab5d92729fe
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredUtils.h
@@ -0,0 +1,157 @@
+//===-- MVETailPredUtils.h - Tail predication utility functions -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utility functions for low overhead and tail predicated
+// loops, shared between the ARMLowOverheadLoops pass and anywhere else that
+// needs them.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+#define LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+namespace llvm {
+
+static inline unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unhandled vctp opcode");
+ break;
+ case ARM::MVE_VCTP8:
+ return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
+ case ARM::MVE_VCTP16:
+ return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
+ case ARM::MVE_VCTP32:
+ return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
+ case ARM::MVE_VCTP64:
+ return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
+ }
+ return 0;
+}
+
+static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unhandled vctp opcode");
+ case ARM::MVE_VCTP8:
+ return 16;
+ case ARM::MVE_VCTP16:
+ return 8;
+ case ARM::MVE_VCTP32:
+ return 4;
+ case ARM::MVE_VCTP64:
+ return 2;
+ }
+ return 0;
+}
+
+static inline bool isVCTP(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case ARM::MVE_VCTP8:
+ case ARM::MVE_VCTP16:
+ case ARM::MVE_VCTP32:
+ case ARM::MVE_VCTP64:
+ return true;
+ }
+ return false;
+}
+
+static inline bool isLoopStart(MachineInstr &MI) {
+ return MI.getOpcode() == ARM::t2DoLoopStart ||
+ MI.getOpcode() == ARM::t2DoLoopStartTP ||
+ MI.getOpcode() == ARM::t2WhileLoopStart;
+}
+
+// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
+// beq that branches to the exit branch.
+inline void RevertWhileLoopStart(MachineInstr *MI, const TargetInstrInfo *TII,
+ unsigned BrOpc = ARM::t2Bcc) {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // Cmp
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
+ MIB.add(MI->getOperand(0));
+ MIB.addImm(0);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::NoRegister);
+
+ // Branch
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+ MIB.add(MI->getOperand(1)); // branch target
+ MIB.addImm(ARMCC::EQ); // condition code
+ MIB.addReg(ARM::CPSR);
+
+ MI->eraseFromParent();
+}
+
+inline void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII) {
+ MachineBasicBlock *MBB = MI->getParent();
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .add(predOps(ARMCC::AL));
+
+ MI->eraseFromParent();
+}
+
+inline void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII,
+ bool SetFlags = false) {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
+ MIB.add(MI->getOperand(0));
+ MIB.add(MI->getOperand(1));
+ MIB.add(MI->getOperand(2));
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(0);
+
+ if (SetFlags) {
+ MIB.addReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ } else
+ MIB.addReg(0);
+
+ MI->eraseFromParent();
+}
+
+// Generate a subs, or sub and cmp, and a branch instead of an LE.
+inline void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII,
+ unsigned BrOpc = ARM::t2Bcc, bool SkipCmp = false) {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // Create cmp
+ if (!SkipCmp) {
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
+ MIB.add(MI->getOperand(0));
+ MIB.addImm(0);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::NoRegister);
+ }
+
+ // Create bne
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+ MIB.add(MI->getOperand(1)); // branch target
+ MIB.addImm(ARMCC::NE); // condition code
+ MIB.addReg(ARM::CPSR);
+ MI->eraseFromParent();
+}
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
index 5bf3522ab2e6..cccac5595288 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -22,23 +22,13 @@
/// The HardwareLoops pass inserts intrinsics identifying loops that the
/// backend will attempt to convert into a low-overhead loop. The vectorizer is
/// responsible for generating a vectorized loop in which the lanes are
-/// predicated upon the iteration counter. This pass looks at these predicated
-/// vector loops, that are targets for low-overhead loops, and prepares it for
-/// code generation. Once the vectorizer has produced a masked loop, there's a
-/// couple of final forms:
-/// - A tail-predicated loop, with implicit predication.
-/// - A loop containing multiple VCPT instructions, predicating multiple VPT
-/// blocks of instructions operating on different vector types.
-///
-/// This pass:
-/// 1) Checks if the predicates of the masked load/store instructions are
-/// generated by intrinsic @llvm.get.active.lanes(). This intrinsic consumes
-/// the Backedge Taken Count (BTC) of the scalar loop as its second argument,
-/// which we extract to set up the number of elements processed by the loop.
-/// 2) Intrinsic @llvm.get.active.lanes() is then replaced by the MVE target
-/// specific VCTP intrinsic to represent the effect of tail predication.
-/// This will be picked up by the ARM Low-overhead loop pass, which performs
-/// the final transformation to a DLSTP or WLSTP tail-predicated loop.
+/// predicated upon an get.active.lane.mask intrinsic. This pass looks at these
+/// get.active.lane.mask intrinsic and attempts to convert them to VCTP
+/// instructions. This will be picked up by the ARM Low-overhead loop pass later
+/// in the backend, which performs the final transformation to a DLSTP or WLSTP
+/// tail-predicated loop.
+//
+//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "ARMSubtarget.h"
@@ -57,6 +47,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -66,8 +57,8 @@ using namespace llvm;
#define DESC "Transform predicated vector loops to use MVE tail predication"
cl::opt<TailPredication::Mode> EnableTailPredication(
- "tail-predication", cl::desc("MVE tail-predication options"),
- cl::init(TailPredication::Disabled),
+ "tail-predication", cl::desc("MVE tail-predication pass options"),
+ cl::init(TailPredication::Enabled),
cl::values(clEnumValN(TailPredication::Disabled, "disabled",
"Don't tail-predicate loops"),
clEnumValN(TailPredication::EnabledNoReductions,
@@ -112,23 +103,18 @@ public:
bool runOnLoop(Loop *L, LPPassManager&) override;
private:
- /// Perform the relevant checks on the loop and convert if possible.
- bool TryConvert(Value *TripCount);
+ /// Perform the relevant checks on the loop and convert active lane masks if
+ /// possible.
+ bool TryConvertActiveLaneMask(Value *TripCount);
- /// Return whether this is a vectorized loop, that contains masked
- /// load/stores.
- bool IsPredicatedVectorLoop();
-
- /// Perform checks on the arguments of @llvm.get.active.lane.mask
- /// intrinsic: check if the first is a loop induction variable, and for the
- /// the second check that no overflow can occur in the expression that use
- /// this backedge-taken count.
- bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount,
- FixedVectorType *VecTy);
+ /// Perform several checks on the arguments of @llvm.get.active.lane.mask
+ /// intrinsic. E.g., check that the loop induction variable and the element
+ /// count are of the form we expect, and also perform overflow checks for
+ /// the new expressions that are created.
+ bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
/// Insert the intrinsic to represent the effect of tail predication.
- void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount,
- FixedVectorType *VecTy);
+ void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount);
/// Rematerialize the iteration count in exit blocks, which enables
/// ARMLowOverheadLoops to better optimise away loop update statements inside
@@ -138,25 +124,6 @@ private:
} // end namespace
-static bool IsDecrement(Instruction &I) {
- auto *Call = dyn_cast<IntrinsicInst>(&I);
- if (!Call)
- return false;
-
- Intrinsic::ID ID = Call->getIntrinsicID();
- return ID == Intrinsic::loop_decrement_reg;
-}
-
-static bool IsMasked(Instruction *I) {
- auto *Call = dyn_cast<IntrinsicInst>(I);
- if (!Call)
- return false;
-
- Intrinsic::ID ID = Call->getIntrinsicID();
- // TODO: Support gather/scatter expand/compress operations.
- return ID == Intrinsic::masked_store || ID == Intrinsic::masked_load;
-}
-
bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
if (skipLoop(L) || !EnableTailPredication)
return false;
@@ -188,7 +155,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
continue;
Intrinsic::ID ID = Call->getIntrinsicID();
- if (ID == Intrinsic::set_loop_iterations ||
+ if (ID == Intrinsic::start_loop_iterations ||
ID == Intrinsic::test_set_loop_iterations)
return cast<IntrinsicInst>(&I);
}
@@ -207,148 +174,23 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
return false;
}
- // Search for the hardware loop intrinic that decrements the loop counter.
- IntrinsicInst *Decrement = nullptr;
- for (auto *BB : L->getBlocks()) {
- for (auto &I : *BB) {
- if (IsDecrement(I)) {
- Decrement = cast<IntrinsicInst>(&I);
- break;
- }
- }
- }
-
- if (!Decrement)
- return false;
-
- LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n"
- << *Decrement << "\n");
-
- if (!TryConvert(Setup->getArgOperand(0))) {
- LLVM_DEBUG(dbgs() << "ARM TP: Can't tail-predicate this loop.\n");
- return false;
- }
-
- return true;
-}
-
-static FixedVectorType *getVectorType(IntrinsicInst *I) {
- unsigned TypeOp = I->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1;
- auto *PtrTy = cast<PointerType>(I->getOperand(TypeOp)->getType());
- auto *VecTy = cast<FixedVectorType>(PtrTy->getElementType());
- assert(VecTy && "No scalable vectors expected here");
- return VecTy;
-}
+ LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n");
-bool MVETailPredication::IsPredicatedVectorLoop() {
- // Check that the loop contains at least one masked load/store intrinsic.
- // We only support 'normal' vector instructions - other than masked
- // load/stores.
- bool ActiveLaneMask = false;
- for (auto *BB : L->getBlocks()) {
- for (auto &I : *BB) {
- auto *Int = dyn_cast<IntrinsicInst>(&I);
- if (!Int)
- continue;
+ bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0));
- switch (Int->getIntrinsicID()) {
- case Intrinsic::get_active_lane_mask:
- ActiveLaneMask = true;
- LLVM_FALLTHROUGH;
- case Intrinsic::sadd_sat:
- case Intrinsic::uadd_sat:
- case Intrinsic::ssub_sat:
- case Intrinsic::usub_sat:
- continue;
- case Intrinsic::fma:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::round:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::fabs:
- if (ST->hasMVEFloatOps())
- continue;
- LLVM_FALLTHROUGH;
- default:
- break;
- }
-
- if (IsMasked(&I)) {
- auto *VecTy = getVectorType(Int);
- unsigned Lanes = VecTy->getNumElements();
- unsigned ElementWidth = VecTy->getScalarSizeInBits();
- // MVE vectors are 128-bit, but don't support 128 x i1.
- // TODO: Can we support vectors larger than 128-bits?
- unsigned MaxWidth = TTI->getRegisterBitWidth(true);
- if (Lanes * ElementWidth > MaxWidth || Lanes == MaxWidth)
- return false;
- MaskedInsts.push_back(cast<IntrinsicInst>(&I));
- continue;
- }
-
- for (const Use &U : Int->args()) {
- if (isa<VectorType>(U->getType()))
- return false;
- }
- }
- }
-
- if (!ActiveLaneMask) {
- LLVM_DEBUG(dbgs() << "ARM TP: No get.active.lane.mask intrinsic found.\n");
- return false;
- }
- return !MaskedInsts.empty();
-}
-
-// Look through the exit block to see whether there's a duplicate predicate
-// instruction. This can happen when we need to perform a select on values
-// from the last and previous iteration. Instead of doing a straight
-// replacement of that predicate with the vctp, clone the vctp and place it
-// in the block. This means that the VPR doesn't have to be live into the
-// exit block which should make it easier to convert this loop into a proper
-// tail predicated loop.
-static void Cleanup(SetVector<Instruction*> &MaybeDead, Loop *L) {
- BasicBlock *Exit = L->getUniqueExitBlock();
- if (!Exit) {
- LLVM_DEBUG(dbgs() << "ARM TP: can't find loop exit block\n");
- return;
- }
-
- // Drop references and add operands to check for dead.
- SmallPtrSet<Instruction*, 4> Dead;
- while (!MaybeDead.empty()) {
- auto *I = MaybeDead.front();
- MaybeDead.remove(I);
- if (I->hasNUsesOrMore(1))
- continue;
-
- for (auto &U : I->operands())
- if (auto *OpI = dyn_cast<Instruction>(U))
- MaybeDead.insert(OpI);
-
- Dead.insert(I);
- }
-
- for (auto *I : Dead) {
- LLVM_DEBUG(dbgs() << "ARM TP: removing dead insn: "; I->dump());
- I->eraseFromParent();
- }
-
- for (auto I : L->blocks())
- DeleteDeadPHIs(I);
+ return Changed;
}
// The active lane intrinsic has this form:
//
-// @llvm.get.active.lane.mask(IV, BTC)
+// @llvm.get.active.lane.mask(IV, TC)
//
// Here we perform checks that this intrinsic behaves as expected,
// which means:
//
-// 1) The element count, which is calculated with BTC + 1, cannot overflow.
-// 2) The element count needs to be sufficiently large that the decrement of
-// element counter doesn't overflow, which means that we need to prove:
+// 1) Check that the TripCount (TC) belongs to this loop (originally).
+// 2) The element count (TC) needs to be sufficiently large that the decrement
+// of element counter doesn't overflow, which means that we need to prove:
// ceil(ElementCount / VectorWidth) >= TripCount
// by rounding up ElementCount up:
// ((ElementCount + (VectorWidth - 1)) / VectorWidth
@@ -357,109 +199,122 @@ static void Cleanup(SetVector<Instruction*> &MaybeDead, Loop *L) {
// 3) The IV must be an induction phi with an increment equal to the
// vector width.
bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
- Value *TripCount, FixedVectorType *VecTy) {
+ Value *TripCount) {
bool ForceTailPredication =
EnableTailPredication == TailPredication::ForceEnabledNoReductions ||
EnableTailPredication == TailPredication::ForceEnabled;
- // 1) Test whether entry to the loop is protected by a conditional
- // BTC + 1 < 0. In other words, if the scalar trip count overflows,
- // becomes negative, we shouldn't enter the loop and creating
- // tripcount expression BTC + 1 is not safe. So, check that BTC
- // isn't max. This is evaluated in unsigned, because the semantics
- // of @get.active.lane.mask is a ULE comparison.
-
- int VectorWidth = VecTy->getNumElements();
- auto *BackedgeTakenCount = ActiveLaneMask->getOperand(1);
- auto *BTC = SE->getSCEV(BackedgeTakenCount);
-
- if (!llvm::cannotBeMaxInLoop(BTC, L, *SE, false /*Signed*/) &&
- !ForceTailPredication) {
- LLVM_DEBUG(dbgs() << "ARM TP: Overflow possible, BTC can be max: ";
- BTC->dump());
+
+ Value *ElemCount = ActiveLaneMask->getOperand(1);
+ bool Changed = false;
+ if (!L->makeLoopInvariant(ElemCount, Changed))
return false;
- }
- // 2) Prove that the sub expression is non-negative, i.e. it doesn't overflow:
- //
- // (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount
- //
- // 2.1) First prove overflow can't happen in:
- //
- // ElementCount + (VectorWidth - 1)
- //
- // Because of a lack of context, it is difficult to get a useful bounds on
- // this expression. But since ElementCount uses the same variables as the
- // TripCount (TC), for which we can find meaningful value ranges, we use that
- // instead and assert that:
- //
- // upperbound(TC) <= UINT_MAX - VectorWidth
- //
+ auto *EC= SE->getSCEV(ElemCount);
auto *TC = SE->getSCEV(TripCount);
- unsigned SizeInBits = TripCount->getType()->getScalarSizeInBits();
- auto Diff = APInt(SizeInBits, ~0) - APInt(SizeInBits, VectorWidth);
- uint64_t MaxMinusVW = Diff.getZExtValue();
- uint64_t UpperboundTC = SE->getSignedRange(TC).getUpper().getZExtValue();
-
- if (UpperboundTC > MaxMinusVW && !ForceTailPredication) {
- LLVM_DEBUG(dbgs() << "ARM TP: Overflow possible in tripcount rounding:\n";
- dbgs() << "upperbound(TC) <= UINT_MAX - VectorWidth\n";
- dbgs() << UpperboundTC << " <= " << MaxMinusVW << "== false\n";);
+ int VectorWidth =
+ cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
+ if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16)
return false;
- }
+ ConstantInt *ConstElemCount = nullptr;
- // 2.2) Make sure overflow doesn't happen in final expression:
- // (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount,
- // To do this, compare the full ranges of these subexpressions:
- //
- // Range(Ceil) <= Range(TC)
- //
- // where Ceil = ElementCount + (VW-1) / VW. If Ceil and TC are runtime
- // values (and not constants), we have to compensate for the lowerbound value
- // range to be off by 1. The reason is that BTC lives in the preheader in
- // this form:
- //
- // %trip.count.minus = add nsw nuw i32 %N, -1
- //
- // For the loop to be executed, %N has to be >= 1 and as a result the value
- // range of %trip.count.minus has a lower bound of 0. Value %TC has this form:
- //
- // %5 = add nuw nsw i32 %4, 1
- // call void @llvm.set.loop.iterations.i32(i32 %5)
- //
- // where %5 is some expression using %N, which needs to have a lower bound of
- // 1. Thus, if the ranges of Ceil and TC are not a single constant but a set,
- // we first add 0 to TC such that we can do the <= comparison on both sets.
- //
- auto *One = SE->getOne(TripCount->getType());
- // ElementCount = BTC + 1
- auto *ElementCount = SE->getAddExpr(BTC, One);
- // Tmp = ElementCount + (VW-1)
- auto *ECPlusVWMinus1 = SE->getAddExpr(ElementCount,
- SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
- // Ceil = ElementCount + (VW-1) / VW
- auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1,
- SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth)));
-
- ConstantRange RangeCeil = SE->getSignedRange(Ceil) ;
- ConstantRange RangeTC = SE->getSignedRange(TC) ;
- if (!RangeTC.isSingleElement()) {
- auto ZeroRange =
- ConstantRange(APInt(TripCount->getType()->getScalarSizeInBits(), 0));
- RangeTC = RangeTC.unionWith(ZeroRange);
- }
- if (!RangeTC.contains(RangeCeil) && !ForceTailPredication) {
- LLVM_DEBUG(dbgs() << "ARM TP: Overflow possible in sub\n");
+ // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to
+ // this loop. The scalar tripcount corresponds the number of elements
+ // processed by the loop, so we will refer to that from this point on.
+ if (!SE->isLoopInvariant(EC, L)) {
+ LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n");
return false;
}
- // 3) Find out if IV is an induction phi. Note that We can't use Loop
+ if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) {
+ ConstantInt *TC = dyn_cast<ConstantInt>(TripCount);
+ if (!TC) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in "
+ "set.loop.iterations\n");
+ return false;
+ }
+
+ // Calculate 2 tripcount values and check that they are consistent with
+ // each other. The TripCount for a predicated vector loop body is
+ // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we
+ // work it out here.
+ uint64_t TC1 = TC->getZExtValue();
+ uint64_t TC2 =
+ (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth;
+
+ // If the tripcount values are inconsistent, we can't insert the VCTP and
+ // trigger tail-predication; keep the intrinsic as a get.active.lane.mask
+ // and legalize this.
+ if (TC1 != TC2) {
+ LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: "
+ << TC1 << " from set.loop.iterations, and "
+ << TC2 << " from get.active.lane.mask\n");
+ return false;
+ }
+ } else if (!ForceTailPredication) {
+ // 2) We need to prove that the sub expression that we create in the
+ // tail-predicated loop body, which calculates the remaining elements to be
+ // processed, is non-negative, i.e. it doesn't overflow:
+ //
+ // ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0
+ //
+ // This is true if:
+ //
+ // TripCount == (ElementCount + VectorWidth - 1) / VectorWidth
+ //
+ // which what we will be using here.
+ //
+ auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth));
+ // ElementCount + (VW-1):
+ auto *ECPlusVWMinus1 = SE->getAddExpr(EC,
+ SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
+
+ // Ceil = ElementCount + (VW-1) / VW
+ auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW);
+
+ // Prevent unused variable warnings with TC
+ (void)TC;
+ LLVM_DEBUG(
+ dbgs() << "ARM TP: Analysing overflow behaviour for:\n";
+ dbgs() << "ARM TP: - TripCount = "; TC->dump();
+ dbgs() << "ARM TP: - ElemCount = "; EC->dump();
+ dbgs() << "ARM TP: - VecWidth = " << VectorWidth << "\n";
+ dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump();
+ );
+
+ // As an example, almost all the tripcount expressions (produced by the
+ // vectoriser) look like this:
+ //
+ // TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4)
+ //
+ // and "ElementCount + (VW-1) / VW":
+ //
+ // Ceil = ((3 + %N) /u 4)
+ //
+ // Check for equality of TC and Ceil by calculating SCEV expression
+ // TC - Ceil and test it for zero.
+ //
+ bool Zero = SE->getMinusSCEV(
+ SE->getBackedgeTakenCount(L),
+ SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
+ SE->getNegativeSCEV(VW)),
+ VW))
+ ->isZero();
+
+ if (!Zero) {
+ LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
+ return false;
+ }
+ }
+
+ // 3) Find out if IV is an induction phi. Note that we can't use Loop
// helpers here to get the induction variable, because the hardware loop is
- // no longer in loopsimplify form, and also the hwloop intrinsic use a
- // different counter. Using SCEV, we check that the induction is of the
+ // no longer in loopsimplify form, and also the hwloop intrinsic uses a
+ // different counter. Using SCEV, we check that the induction is of the
// form i = i + 4, where the increment must be equal to the VectorWidth.
auto *IV = ActiveLaneMask->getOperand(0);
auto *IVExpr = SE->getSCEV(IV);
auto *AddExpr = dyn_cast<SCEVAddRecExpr>(IVExpr);
+
if (!AddExpr) {
LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump());
return false;
@@ -469,6 +324,11 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n");
return false;
}
+ auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
+ if (!Base || !Base->isZero()) {
+ LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n");
+ return false;
+ }
auto *Step = dyn_cast<SCEVConstant>(AddExpr->getOperand(1));
if (!Step) {
LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: ";
@@ -479,68 +339,29 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
if (VectorWidth == StepValue)
return true;
- LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue << " doesn't match "
- "vector width " << VectorWidth << "\n");
+ LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
+ << " doesn't match vector width " << VectorWidth << "\n");
return false;
}
-// Materialize NumElements in the preheader block.
-static Value *getNumElements(BasicBlock *Preheader, Value *BTC) {
- // First, check the preheader if it not already exist:
- //
- // preheader:
- // %BTC = add i32 %N, -1
- // ..
- // vector.body:
- //
- // if %BTC already exists. We don't need to emit %NumElems = %BTC + 1,
- // but instead can just return %N.
- for (auto &I : *Preheader) {
- if (I.getOpcode() != Instruction::Add || &I != BTC)
- continue;
- ConstantInt *MinusOne = nullptr;
- if (!(MinusOne = dyn_cast<ConstantInt>(I.getOperand(1))))
- continue;
- if (MinusOne->getSExtValue() == -1) {
- LLVM_DEBUG(dbgs() << "ARM TP: Found num elems: " << I << "\n");
- return I.getOperand(0);
- }
- }
-
- // But we do need to materialise BTC if it is not already there,
- // e.g. if it is a constant.
- IRBuilder<> Builder(Preheader->getTerminator());
- Value *NumElements = Builder.CreateAdd(BTC,
- ConstantInt::get(BTC->getType(), 1), "num.elements");
- LLVM_DEBUG(dbgs() << "ARM TP: Created num elems: " << *NumElements << "\n");
- return NumElements;
-}
-
void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
- Value *TripCount, FixedVectorType *VecTy) {
+ Value *TripCount) {
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
Module *M = L->getHeader()->getModule();
Type *Ty = IntegerType::get(M->getContext(), 32);
- unsigned VectorWidth = VecTy->getNumElements();
-
- // The backedge-taken count in @llvm.get.active.lane.mask, its 2nd operand,
- // is one less than the trip count. So we need to find or create
- // %num.elements = %BTC + 1 in the preheader.
- Value *BTC = ActiveLaneMask->getOperand(1);
- Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
- Value *NumElements = getNumElements(L->getLoopPreheader(), BTC);
+ unsigned VectorWidth =
+ cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
// Insert a phi to count the number of elements processed by the loop.
- Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI() );
+ Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
PHINode *Processed = Builder.CreatePHI(Ty, 2);
- Processed->addIncoming(NumElements, L->getLoopPreheader());
+ Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader());
- // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and thus
- // represent the effect of tail predication.
+ // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and
+ // thus represent the effect of tail predication.
Builder.SetInsertPoint(ActiveLaneMask);
- ConstantInt *Factor =
- ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
+ ConstantInt *Factor = ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
Intrinsic::ID VCTPID;
switch (VectorWidth) {
@@ -569,42 +390,36 @@ void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
<< "ARM TP: Inserted VCTP: " << *VCTPCall << "\n");
}
-bool MVETailPredication::TryConvert(Value *TripCount) {
- if (!IsPredicatedVectorLoop()) {
- LLVM_DEBUG(dbgs() << "ARM TP: no masked instructions in loop.\n");
+bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) {
+ SmallVector<IntrinsicInst *, 4> ActiveLaneMasks;
+ for (auto *BB : L->getBlocks())
+ for (auto &I : *BB)
+ if (auto *Int = dyn_cast<IntrinsicInst>(&I))
+ if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask)
+ ActiveLaneMasks.push_back(Int);
+
+ if (ActiveLaneMasks.empty())
return false;
- }
LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n");
- SetVector<Instruction*> Predicates;
-
- // Walk through the masked intrinsics and try to find whether the predicate
- // operand is generated by intrinsic @llvm.get.active.lane.mask().
- for (auto *I : MaskedInsts) {
- unsigned PredOp = I->getIntrinsicID() == Intrinsic::masked_load ? 2 : 3;
- auto *Predicate = dyn_cast<Instruction>(I->getArgOperand(PredOp));
- if (!Predicate || Predicates.count(Predicate))
- continue;
-
- auto *ActiveLaneMask = dyn_cast<IntrinsicInst>(Predicate);
- if (!ActiveLaneMask ||
- ActiveLaneMask->getIntrinsicID() != Intrinsic::get_active_lane_mask)
- continue;
-
- Predicates.insert(Predicate);
+
+ for (auto *ActiveLaneMask : ActiveLaneMasks) {
LLVM_DEBUG(dbgs() << "ARM TP: Found active lane mask: "
<< *ActiveLaneMask << "\n");
- auto *VecTy = getVectorType(I);
- if (!IsSafeActiveMask(ActiveLaneMask, TripCount, VecTy)) {
+ if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) {
LLVM_DEBUG(dbgs() << "ARM TP: Not safe to insert VCTP.\n");
return false;
}
LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP.\n");
- InsertVCTPIntrinsic(ActiveLaneMask, TripCount, VecTy);
+ InsertVCTPIntrinsic(ActiveLaneMask, TripCount);
}
- Cleanup(Predicates, L);
+ // Remove dead instructions and now dead phis.
+ for (auto *II : ActiveLaneMasks)
+ RecursivelyDeleteTriviallyDeadInstructions(II);
+ for (auto I : L->blocks())
+ DeleteDeadPHIs(I);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
index dc769ae526bc..c7f451cba14f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -107,6 +107,12 @@ static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter,
NumInstrsSteppedOver = 0;
while (Iter != EndIter) {
+ if (Iter->isDebugInstr()) {
+ // Skip debug instructions
+ ++Iter;
+ continue;
+ }
+
NextPred = getVPTInstrPredicate(*Iter, PredReg);
assert(NextPred != ARMVCC::Else &&
"VPT block pass does not expect Else preds");
@@ -170,6 +176,8 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter =
std::next(BlockBeg);
AddedInstIter != Iter; ++AddedInstIter) {
+ if (AddedInstIter->isDebugInstr())
+ continue;
dbgs() << " adding: ";
AddedInstIter->dump();
});
@@ -197,7 +205,7 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
break;
- LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump(););
+ LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump());
// Record the new size of the block
BlockSize += ElseInstCnt;
@@ -211,6 +219,9 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
// Note that we are using "Iter" to iterate over the block so we can update
// it at the same time.
for (; Iter != VPNOTBlockEndIter; ++Iter) {
+ if (Iter->isDebugInstr())
+ continue;
+
// Find the register in which the predicate is
int OpIdx = findFirstVPTPredOperandIdx(*Iter);
assert(OpIdx != -1);
@@ -270,26 +281,33 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
MIBuilder.add(VCMP->getOperand(1));
MIBuilder.add(VCMP->getOperand(2));
MIBuilder.add(VCMP->getOperand(3));
+
+ // We need to remove any kill flags between the original VCMP and the new
+ // insertion point.
+ for (MachineInstr &MII :
+ make_range(VCMP->getIterator(), MI->getIterator())) {
+ MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI);
+ MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI);
+ }
+
VCMP->eraseFromParent();
} else {
MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
MIBuilder.addImm((uint64_t)BlockMask);
}
+ // Erase all dead instructions (VPNOT's). Do that now so that they do not
+ // mess with the bundle creation.
+ for (MachineInstr *DeadMI : DeadInstructions)
+ DeadMI->eraseFromParent();
+ DeadInstructions.clear();
+
finalizeBundle(
Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter);
Modified = true;
}
- // Erase all dead instructions
- for (MachineInstr *DeadMI : DeadInstructions) {
- if (DeadMI->isInsideBundle())
- DeadMI->eraseFromBundle();
- else
- DeadMI->eraseFromParent();
- }
-
return Modified;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
index 382ddd4572c7..00e4449769f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
@@ -6,28 +6,28 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file This pass does a few optimisations related to MVE VPT blocks before
-/// register allocation is performed. The goal is to maximize the sizes of the
-/// blocks that will be created by the MVE VPT Block Insertion pass (which runs
-/// after register allocation). The first optimisation done by this pass is the
-/// replacement of "opposite" VCMPs with VPNOTs, so the Block Insertion pass
-/// can delete them later to create larger VPT blocks.
-/// The second optimisation replaces re-uses of old VCCR values with VPNOTs when
-/// inside a block of predicated instructions. This is done to avoid
-/// spill/reloads of VPR in the middle of a block, which prevents the Block
-/// Insertion pass from creating large blocks.
-//
+/// \file This pass does a few optimisations related to Tail predicated loops
+/// and MVE VPT blocks before register allocation is performed. For VPT blocks
+/// the goal is to maximize the sizes of the blocks that will be created by the
+/// MVE VPT Block Insertion pass (which runs after register allocation). For
+/// tail predicated loops we transform the loop into something that will
+/// hopefully make the backend ARMLowOverheadLoops pass's job easier.
+///
//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include <cassert>
@@ -35,6 +35,11 @@ using namespace llvm;
#define DEBUG_TYPE "arm-mve-vpt-opts"
+static cl::opt<bool>
+MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
+ cl::desc("Enable merging Loop End and Dec instructions."),
+ cl::init(true));
+
namespace {
class MVEVPTOptimisations : public MachineFunctionPass {
public:
@@ -46,25 +51,314 @@ public:
bool runOnMachineFunction(MachineFunction &Fn) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
StringRef getPassName() const override {
- return "ARM MVE VPT Optimisation Pass";
+ return "ARM MVE TailPred and VPT Optimisation Pass";
}
private:
+ bool MergeLoopEnd(MachineLoop *ML);
+ bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
MachineInstr &Instr,
MachineOperand &User,
Register Target);
bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
+ bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
+ bool ConvertVPSEL(MachineBasicBlock &MBB);
};
char MVEVPTOptimisations::ID = 0;
} // end anonymous namespace
-INITIALIZE_PASS(MVEVPTOptimisations, DEBUG_TYPE,
- "ARM MVE VPT Optimisations pass", false, false)
+INITIALIZE_PASS_BEGIN(MVEVPTOptimisations, DEBUG_TYPE,
+ "ARM MVE TailPred and VPT Optimisations pass", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MVEVPTOptimisations, DEBUG_TYPE,
+ "ARM MVE TailPred and VPT Optimisations pass", false, false)
+
+static MachineInstr *LookThroughCOPY(MachineInstr *MI,
+ MachineRegisterInfo *MRI) {
+ while (MI && MI->getOpcode() == TargetOpcode::COPY &&
+ MI->getOperand(1).getReg().isVirtual())
+ MI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ return MI;
+}
+
+// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
+// corresponding PHI that make up a low overhead loop. Only handles 'do' loops
+// at the moment, returning a t2DoLoopStart in LoopStart.
+static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
+ MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
+ MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
+ MachineBasicBlock *Header = ML->getHeader();
+ MachineBasicBlock *Latch = ML->getLoopLatch();
+ if (!Header || !Latch) {
+ LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
+ return false;
+ }
+
+ // Find the loop end from the terminators.
+ LoopEnd = nullptr;
+ for (auto &T : Latch->terminators()) {
+ if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
+ LoopEnd = &T;
+ break;
+ }
+ if (T.getOpcode() == ARM::t2LoopEndDec &&
+ T.getOperand(2).getMBB() == Header) {
+ LoopEnd = &T;
+ break;
+ }
+ }
+ if (!LoopEnd) {
+ LLVM_DEBUG(dbgs() << " no LoopEnd\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
+
+ // Find the dec from the use of the end. There may be copies between
+ // instructions. We expect the loop to loop like:
+ // $vs = t2DoLoopStart ...
+ // loop:
+ // $vp = phi [ $vs ], [ $vd ]
+ // ...
+ // $vd = t2LoopDec $vp
+ // ...
+ // t2LoopEnd $vd, loop
+ if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
+ LoopDec = LoopEnd;
+ else {
+ LoopDec =
+ LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
+ if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
+ LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
+ return false;
+ }
+ }
+ LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
+
+ LoopPhi =
+ LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
+ if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
+ LoopPhi->getNumOperands() != 5 ||
+ (LoopPhi->getOperand(2).getMBB() != Latch &&
+ LoopPhi->getOperand(4).getMBB() != Latch)) {
+ LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
+
+ Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
+ ? LoopPhi->getOperand(3).getReg()
+ : LoopPhi->getOperand(1).getReg();
+ LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
+ if (!LoopStart || LoopStart->getOpcode() != ARM::t2DoLoopStart) {
+ LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
+
+ return true;
+}
+
+// This function converts loops with t2LoopEnd and t2LoopEnd instructions into
+// a single t2LoopEndDec instruction. To do that it needs to make sure that LR
+// will be valid to be used for the low overhead loop, which means nothing else
+// is using LR (especially calls) and there are no superfluous copies in the
+// loop. The t2LoopEndDec is a branching terminator that produces a value (the
+// decrement) around the loop edge, which means we need to be careful that they
+// will be valid to allocate without any spilling.
+bool MVEVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
+ if (!MergeEndDec)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
+ << "\n");
+
+ MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
+ if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
+ return false;
+
+ // Check if there is an illegal instruction (a call) in the low overhead loop
+ // and if so revert it now before we get any further.
+ for (MachineBasicBlock *MBB : ML->blocks()) {
+ for (MachineInstr &MI : *MBB) {
+ if (MI.isCall()) {
+ LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI);
+ RevertDoLoopStart(LoopStart, TII);
+ RevertLoopDec(LoopDec, TII);
+ RevertLoopEnd(LoopEnd, TII);
+ return true;
+ }
+ }
+ }
+
+ // Remove any copies from the loop, to ensure the phi that remains is both
+ // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
+ // that cannot spill, we need to be careful what remains in the loop.
+ Register PhiReg = LoopPhi->getOperand(0).getReg();
+ Register DecReg = LoopDec->getOperand(0).getReg();
+ Register StartReg = LoopStart->getOperand(0).getReg();
+ // Ensure the uses are expected, and collect any copies we want to remove.
+ SmallVector<MachineInstr *, 4> Copies;
+ auto CheckUsers = [&Copies](Register BaseReg,
+ ArrayRef<MachineInstr *> ExpectedUsers,
+ MachineRegisterInfo *MRI) {
+ SmallVector<Register, 4> Worklist;
+ Worklist.push_back(BaseReg);
+ while (!Worklist.empty()) {
+ Register Reg = Worklist.pop_back_val();
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+ if (count(ExpectedUsers, &MI))
+ continue;
+ if (MI.getOpcode() != TargetOpcode::COPY ||
+ !MI.getOperand(0).getReg().isVirtual()) {
+ LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
+ return false;
+ }
+ Worklist.push_back(MI.getOperand(0).getReg());
+ Copies.push_back(&MI);
+ }
+ }
+ return true;
+ };
+ if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
+ !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
+ !CheckUsers(StartReg, {LoopPhi}, MRI))
+ return false;
+
+ MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
+ MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
+ MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
+
+ if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
+ LoopPhi->getOperand(3).setReg(StartReg);
+ LoopPhi->getOperand(1).setReg(DecReg);
+ } else {
+ LoopPhi->getOperand(1).setReg(StartReg);
+ LoopPhi->getOperand(3).setReg(DecReg);
+ }
+
+ // Replace the loop dec and loop end as a single instruction.
+ MachineInstrBuilder MI =
+ BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
+ TII->get(ARM::t2LoopEndDec), DecReg)
+ .addReg(PhiReg)
+ .add(LoopEnd->getOperand(1));
+ (void)MI;
+ LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
+
+ LoopDec->eraseFromParent();
+ LoopEnd->eraseFromParent();
+ for (auto *MI : Copies)
+ MI->eraseFromParent();
+ return true;
+}
+
+// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
+// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
+// instruction, making the backend ARMLowOverheadLoops passes job of finding the
+// VCTP operand much simpler.
+bool MVEVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
+ MachineDominatorTree *DT) {
+ LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
+ << ML->getHeader()->getName() << "\n");
+
+ // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
+ // in the loop.
+ MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
+ if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
+ return false;
+ if (LoopDec != LoopEnd)
+ return false;
+
+ SmallVector<MachineInstr *, 4> VCTPs;
+ for (MachineBasicBlock *BB : ML->blocks())
+ for (MachineInstr &MI : *BB)
+ if (isVCTP(&MI))
+ VCTPs.push_back(&MI);
+
+ if (VCTPs.empty()) {
+ LLVM_DEBUG(dbgs() << " no VCTPs\n");
+ return false;
+ }
+
+ // Check all VCTPs are the same.
+ MachineInstr *FirstVCTP = *VCTPs.begin();
+ for (MachineInstr *VCTP : VCTPs) {
+ LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
+ if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
+ VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
+ LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
+ return false;
+ }
+ }
+
+ // Check for the register being used can be setup before the loop. We expect
+ // this to be:
+ // $vx = ...
+ // loop:
+ // $vp = PHI [ $vx ], [ $vd ]
+ // ..
+ // $vpr = VCTP $vp
+ // ..
+ // $vd = t2SUBri $vp, #n
+ // ..
+ Register CountReg = FirstVCTP->getOperand(1).getReg();
+ if (!CountReg.isVirtual()) {
+ LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
+ return false;
+ }
+ MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
+ if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
+ Phi->getNumOperands() != 5 ||
+ (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
+ Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
+ LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
+ return false;
+ }
+ CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
+ ? Phi->getOperand(3).getReg()
+ : Phi->getOperand(1).getReg();
+
+ // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
+ // the preheader and add the new CountReg to it. We attempt to place it late
+ // in the preheader, but may need to move that earlier based on uses.
+ MachineBasicBlock *MBB = LoopStart->getParent();
+ MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
+ for (MachineInstr &Use :
+ MRI->use_instructions(LoopStart->getOperand(0).getReg()))
+ if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
+ !DT->dominates(ML->getHeader(), Use.getParent())) {
+ LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
+ return false;
+ }
+
+ MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(),
+ TII->get(ARM::t2DoLoopStartTP))
+ .add(LoopStart->getOperand(0))
+ .add(LoopStart->getOperand(1))
+ .addReg(CountReg);
+ (void)MI;
+ LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
+ << *MI.getInstr());
+ MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
+ LoopStart->eraseFromParent();
+
+ return true;
+}
// Returns true if Opcode is any VCMP Opcode.
static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
@@ -356,7 +650,7 @@ bool MVEVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
}
for (MachineInstr *DeadInstruction : DeadInstructions)
- DeadInstruction->removeFromParent();
+ DeadInstruction->eraseFromParent();
return Modified;
}
@@ -430,7 +724,130 @@ bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
}
for (MachineInstr *DeadInstruction : DeadInstructions)
- DeadInstruction->removeFromParent();
+ DeadInstruction->eraseFromParent();
+
+ return !DeadInstructions.empty();
+}
+
+bool MVEVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
+ MachineDominatorTree *DT) {
+ // Scan through the block, looking for instructions that use constants moves
+ // into VPR that are the negative of one another. These are expected to be
+ // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
+ // mask is kept it or and VPNOT's of it are added or reused as we scan through
+ // the function.
+ unsigned LastVPTImm = 0;
+ Register LastVPTReg = 0;
+ SmallSet<MachineInstr *, 4> DeadInstructions;
+
+ for (MachineInstr &Instr : MBB.instrs()) {
+ // Look for predicated MVE instructions.
+ int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
+ if (PIdx == -1)
+ continue;
+ Register VPR = Instr.getOperand(PIdx + 1).getReg();
+ if (!VPR.isVirtual())
+ continue;
+
+ // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
+ MachineInstr *Copy = MRI->getVRegDef(VPR);
+ if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
+ !Copy->getOperand(1).getReg().isVirtual() ||
+ MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
+ LastVPTReg = 0;
+ continue;
+ }
+ Register GPR = Copy->getOperand(1).getReg();
+
+ // Find the Immediate used by the copy.
+ auto getImm = [&](Register GPR) -> unsigned {
+ MachineInstr *Def = MRI->getVRegDef(GPR);
+ if (Def && (Def->getOpcode() == ARM::t2MOVi ||
+ Def->getOpcode() == ARM::t2MOVi16))
+ return Def->getOperand(1).getImm();
+ return -1U;
+ };
+ unsigned Imm = getImm(GPR);
+ if (Imm == -1U) {
+ LastVPTReg = 0;
+ continue;
+ }
+
+ unsigned NotImm = ~Imm & 0xffff;
+ if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
+ Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
+ if (MRI->use_empty(VPR)) {
+ DeadInstructions.insert(Copy);
+ if (MRI->hasOneUse(GPR))
+ DeadInstructions.insert(MRI->getVRegDef(GPR));
+ }
+ LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
+ } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
+ // We have found the not of a previous constant. Create a VPNot of the
+ // earlier predicate reg and use it instead of the copy.
+ Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
+ auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
+ TII->get(ARM::MVE_VPNOT), NewVPR)
+ .addReg(LastVPTReg);
+ addUnpredicatedMveVpredNOp(VPNot);
+
+ // Use the new register and check if the def is now dead.
+ Instr.getOperand(PIdx + 1).setReg(NewVPR);
+ if (MRI->use_empty(VPR)) {
+ DeadInstructions.insert(Copy);
+ if (MRI->hasOneUse(GPR))
+ DeadInstructions.insert(MRI->getVRegDef(GPR));
+ }
+ LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
+ << Instr);
+ VPR = NewVPR;
+ }
+
+ LastVPTImm = Imm;
+ LastVPTReg = VPR;
+ }
+
+ for (MachineInstr *DI : DeadInstructions)
+ DI->eraseFromParent();
+
+ return !DeadInstructions.empty();
+}
+
+// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
+// somewhat blunt approximation to allow tail predicated with vpsel
+// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
+// different semantics under tail predication. Until that is modelled we just
+// convert to a VMOVT (via a predicated VORR) instead.
+bool MVEVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
+ bool HasVCTP = false;
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+
+ for (MachineInstr &MI : MBB.instrs()) {
+ if (isVCTP(&MI)) {
+ HasVCTP = true;
+ continue;
+ }
+
+ if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
+ continue;
+
+ MachineInstrBuilder MIBuilder =
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(1))
+ .addImm(ARMVCC::Then)
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(2));
+ // Silence unused variable warning in release builds.
+ (void)MIBuilder;
+ LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
+ dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
+ DeadInstructions.push_back(&MI);
+ }
+
+ for (MachineInstr *DeadInstruction : DeadInstructions)
+ DeadInstruction->eraseFromParent();
return !DeadInstructions.empty();
}
@@ -439,19 +856,28 @@ bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
- if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
+ if (!STI.isThumb2() || !STI.hasLOB())
return false;
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
MRI = &Fn.getRegInfo();
+ MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
<< "********** Function: " << Fn.getName() << '\n');
bool Modified = false;
+ for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
+ Modified |= MergeLoopEnd(ML);
+ Modified |= ConvertTailPredLoop(ML, DT);
+ }
+
for (MachineBasicBlock &MBB : Fn) {
+ Modified |= ReplaceConstByVPNOTs(MBB, DT);
Modified |= ReplaceVCMPsByVPNOTs(MBB);
Modified |= ReduceOldVCCRValueUses(MBB);
+ Modified |= ConvertVPSEL(MBB);
}
LLVM_DEBUG(dbgs() << "**************************************\n");
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 48c6b47f2154..d728572e2858 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -12,6 +12,7 @@
#include "Thumb2InstrInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -38,6 +39,11 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
cl::desc("Use old-style Thumb2 if-conversion heuristics"),
cl::init(false));
+static cl::opt<bool>
+PreferNoCSEL("prefer-no-csel", cl::Hidden,
+ cl::desc("Prefer predicated Move to CSEL"),
+ cl::init(false));
+
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI) {}
@@ -118,6 +124,31 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
return getITInstrPredicate(*MBBI, PredReg) == ARMCC::AL;
}
+MachineInstr *
+Thumb2InstrInfo::optimizeSelect(MachineInstr &MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool PreferFalse) const {
+ // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the
+ // MOVCC into another instruction. If that fails on 8.1-M fall back to using a
+ // CSEL.
+ MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse);
+ if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) {
+ Register DestReg = MI.getOperand(0).getReg();
+
+ if (!DestReg.isVirtual())
+ return nullptr;
+
+ MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+ get(ARM::t2CSEL), DestReg)
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(3));
+ SeenMIs.insert(NewMI);
+ return NewMI;
+ }
+ return RV;
+}
+
void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
@@ -227,6 +258,22 @@ void Thumb2InstrInfo::expandLoadStackGuard(
expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12);
}
+MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ switch (MI.getOpcode()) {
+ case ARM::MVE_VMAXNMAf16:
+ case ARM::MVE_VMAXNMAf32:
+ case ARM::MVE_VMINNMAf16:
+ case ARM::MVE_VMINNMAf32:
+ // Don't allow predicated instructions to be commuted.
+ if (getVPTInstrPredicate(MI) != ARMVCC::None)
+ return nullptr;
+ }
+ return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const DebugLoc &dl, Register DestReg,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index ec3763632239..808167bfdcbc 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -60,6 +60,14 @@ public:
///
const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
+ MachineInstr *optimizeSelect(MachineInstr &MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool) const override;
+
+ MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const override;
+
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index ae661594bdc9..0f7e19038673 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -43,7 +43,7 @@
using namespace llvm;
-#define DEBUG_TYPE "t2-reduce-size"
+#define DEBUG_TYPE "thumb2-reduce-size"
#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td
index 6730f2e1673e..9507aa40c3d8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td
@@ -195,7 +195,7 @@ def FamilyAVR6 : Family<"avr6",
def FamilyTiny : Family<"avrtiny",
[FamilyAVR0, FeatureBREAK, FeatureSRAM,
- FeatureTinyEncoding, FeatureMMR]>;
+ FeatureTinyEncoding]>;
def FamilyXMEGA : Family<"xmega",
[FamilyAVR0, FeatureLPM, FeatureIJMPCALL, FeatureADDSUBIW,
@@ -286,8 +286,10 @@ def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny441", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny841", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny861", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny87", FamilyAVR25, ELFArchAVR25>;
@@ -307,19 +309,23 @@ def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>;
def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>;
def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>;
def : Device<"attiny1634", FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega8", FamilyAVR4, ELFArchAVR4>; // FIXME: family may be wrong
+def : Device<"atmega8", FamilyAVR2, ELFArchAVR4,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
def : Device<"ata6289", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega8a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega8a", FamilyAVR2, ELFArchAVR4,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>;
def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48pb", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega48p", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega88", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega88a", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega88p", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega88pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88pb", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega8515", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
def : Device<"atmega8535", FamilyAVR2, ELFArchAVR4,
@@ -351,6 +357,7 @@ def : Device<"atmega168", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega168a", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega168p", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega168pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168pb", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega169", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega169a", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega169p", FamilyAVR5, ELFArchAVR5>;
@@ -361,6 +368,7 @@ def : Device<"atmega323", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega324a", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega324p", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega324pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324pb", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega325", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega325a", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega325p", FamilyAVR5, ELFArchAVR5>;
@@ -371,6 +379,7 @@ def : Device<"atmega3250p", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega3250pa", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega328", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega328p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328pb", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega329", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega329a", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega329p", FamilyAVR5, ELFArchAVR5>;
@@ -451,9 +460,9 @@ def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>;
def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>;
def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>;
def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32e5", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega16e5", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega8e5", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32e5", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16e5", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega8e5", FamilyXMEGAU, ELFArchXMEGA2>;
def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>;
def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>;
def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 8ee69201e932..a48d3d134bb5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -416,6 +416,44 @@ bool AVRExpandPseudo::expand<AVR::COMWRd>(Block &MBB, BlockIt MBBI) {
}
template <>
+bool AVRExpandPseudo::expand<AVR::NEGWRd>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+ bool ImpIsDead = MI.getOperand(2).isDead();
+ TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+ // Do NEG on the upper byte.
+ auto MIBHI =
+ buildMI(MBB, MBBI, AVR::NEGRd)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
+ // SREG is always implicitly dead
+ MIBHI->getOperand(2).setIsDead();
+
+ // Do NEG on the lower byte.
+ buildMI(MBB, MBBI, AVR::NEGRd)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+ // Do an extra SBCI.
+ auto MISBCI =
+ buildMI(MBB, MBBI, AVR::SBCIRdK)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addImm(0);
+ if (ImpIsDead)
+ MISBCI->getOperand(3).setIsDead();
+ // SREG is always implicitly killed
+ MISBCI->getOperand(4).setIsKill();
+
+ MI.eraseFromParent();
+ return true;
+}
+
+template <>
bool AVRExpandPseudo::expand<AVR::CPWRdRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
@@ -1438,6 +1476,111 @@ bool AVRExpandPseudo::expand<AVR::ASRWRd>(Block &MBB, BlockIt MBBI) {
return true;
}
+template <>
+bool AVRExpandPseudo::expand<AVR::LSLB7Rd>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+ bool ImpIsDead = MI.getOperand(2).isDead();
+
+ // ror r24
+ // clr r24
+ // ror r24
+
+ buildMI(MBB, MBBI, AVR::RORRd)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ buildMI(MBB, MBBI, AVR::EORRdRr)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ auto MIRRC =
+ buildMI(MBB, MBBI, AVR::RORRd)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ if (ImpIsDead)
+ MIRRC->getOperand(2).setIsDead();
+
+ // SREG is always implicitly killed
+ MIRRC->getOperand(3).setIsKill();
+
+ MI.eraseFromParent();
+ return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LSRB7Rd>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+ bool ImpIsDead = MI.getOperand(2).isDead();
+
+ // rol r24
+ // clr r24
+ // rol r24
+
+ buildMI(MBB, MBBI, AVR::ADCRdRr)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ buildMI(MBB, MBBI, AVR::EORRdRr)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ auto MIRRC =
+ buildMI(MBB, MBBI, AVR::ADCRdRr)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ if (ImpIsDead)
+ MIRRC->getOperand(3).setIsDead();
+
+ // SREG is always implicitly killed
+ MIRRC->getOperand(4).setIsKill();
+
+ MI.eraseFromParent();
+ return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ASRB7Rd>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+ bool ImpIsDead = MI.getOperand(2).isDead();
+
+ // lsl r24
+ // sbc r24, r24
+
+ buildMI(MBB, MBBI, AVR::ADDRdRr)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ auto MIRRC = buildMI(MBB, MBBI, AVR::SBCRdRr)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ if (ImpIsDead)
+ MIRRC->getOperand(3).setIsDead();
+
+ // SREG is always implicitly killed
+ MIRRC->getOperand(4).setIsKill();
+
+ MI.eraseFromParent();
+ return true;
+}
+
template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
@@ -1616,6 +1759,7 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::ORIWRdK);
EXPAND(AVR::EORWRdRr);
EXPAND(AVR::COMWRd);
+ EXPAND(AVR::NEGWRd);
EXPAND(AVR::CPWRdRr);
EXPAND(AVR::CPCWRdRr);
EXPAND(AVR::LDIWRdK);
@@ -1658,6 +1802,9 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::RORWRd);
EXPAND(AVR::ROLWRd);
EXPAND(AVR::ASRWRd);
+ EXPAND(AVR::LSLB7Rd);
+ EXPAND(AVR::LSRB7Rd);
+ EXPAND(AVR::ASRB7Rd);
EXPAND(AVR::SEXT);
EXPAND(AVR::ZEXT);
EXPAND(AVR::SPREAD);
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index c95a553b86ac..757b41466c3f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -131,6 +131,26 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
+static void restoreStatusRegister(MachineFunction &MF, MachineBasicBlock &MBB) {
+ const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+
+ DebugLoc DL = MBBI->getDebugLoc();
+ const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+ const AVRInstrInfo &TII = *STI.getInstrInfo();
+
+ // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal
+ // handlers at the very end of the function, just before reti.
+ if (AFI->isInterruptOrSignalHandler()) {
+ BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0);
+ BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr))
+ .addImm(0x3f)
+ .addReg(AVR::R0, RegState::Kill);
+ BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0);
+ }
+}
+
void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
@@ -151,18 +171,9 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
const AVRInstrInfo &TII = *STI.getInstrInfo();
- // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal
- // handlers at the very end of the function, just before reti.
- if (AFI->isInterruptOrSignalHandler()) {
- BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0);
- BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr))
- .addImm(0x3f)
- .addReg(AVR::R0, RegState::Kill);
- BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0);
- }
-
// Early exit if there is no need to restore the frame pointer.
if (!FrameSize) {
+ restoreStatusRegister(MF, MBB);
return;
}
@@ -198,6 +209,8 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
// Write back R29R28 to SP and temporarily disable interrupts.
BuildMI(MBB, MBBI, DL, TII.get(AVR::SPWRITE), AVR::SP)
.addReg(AVR::R29R28, RegState::Kill);
+
+ restoreStatusRegister(MF, MBB);
}
// Return true if the specified function should have a dedicated frame
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index fe31fa42c403..df382d553753 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -242,10 +242,7 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
ConstantSDNode *ImmNode = dyn_cast<ConstantSDNode>(ImmOp);
unsigned Reg;
- bool CanHandleRegImmOpt = true;
-
- CanHandleRegImmOpt &= ImmNode != 0;
- CanHandleRegImmOpt &= ImmNode->getAPIntValue().getZExtValue() < 64;
+ bool CanHandleRegImmOpt = ImmNode && ImmNode->getAPIntValue().ult(64);
if (CopyFromRegOp->getOpcode() == ISD::CopyFromReg) {
RegisterSDNode *RegNode =
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
index bf9b32e1278e..3e7c2984655a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -334,6 +334,36 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Invalid shift opcode");
}
+ // Optimize int8 shifts.
+ if (VT.getSizeInBits() == 8) {
+ if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) {
+ // Optimize LSL when 4 <= ShiftAmount <= 6.
+ Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
+ Victim =
+ DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT));
+ ShiftAmount -= 4;
+ } else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount &&
+ ShiftAmount < 7) {
+ // Optimize LSR when 4 <= ShiftAmount <= 6.
+ Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
+ Victim =
+ DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT));
+ ShiftAmount -= 4;
+ } else if (Op.getOpcode() == ISD::SHL && ShiftAmount == 7) {
+ // Optimize LSL when ShiftAmount == 7.
+ Victim = DAG.getNode(AVRISD::LSL7, dl, VT, Victim);
+ ShiftAmount = 0;
+ } else if (Op.getOpcode() == ISD::SRL && ShiftAmount == 7) {
+ // Optimize LSR when ShiftAmount == 7.
+ Victim = DAG.getNode(AVRISD::LSR7, dl, VT, Victim);
+ ShiftAmount = 0;
+ } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) {
+ // Optimize ASR when ShiftAmount == 7.
+ Victim = DAG.getNode(AVRISD::ASR7, dl, VT, Victim);
+ ShiftAmount = 0;
+ }
+ }
+
while (ShiftAmount--) {
Victim = DAG.getNode(Opc8, dl, VT, Victim);
}
@@ -437,6 +467,36 @@ static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) {
}
}
+/// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands.
+SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, SDLoc DL) const {
+ assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) &&
+ "LHS and RHS have different types");
+ assert(((LHS.getSimpleValueType() == MVT::i16) ||
+ (LHS.getSimpleValueType() == MVT::i8)) && "invalid comparison type");
+
+ SDValue Cmp;
+
+ if (LHS.getSimpleValueType() == MVT::i16 && dyn_cast<ConstantSDNode>(RHS)) {
+ // Generate a CPI/CPC pair if RHS is a 16-bit constant.
+ SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
+ DAG.getIntPtrConstant(1, DL));
+ SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
+ DAG.getIntPtrConstant(1, DL));
+ Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
+ Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
+ } else {
+ // Generate ordinary 16-bit comparison.
+ Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);
+ }
+
+ return Cmp;
+}
+
/// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for
/// the given operands.
SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
@@ -549,7 +609,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
DAG.getIntPtrConstant(1, DL));
Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
} else {
- Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
+ Cmp = getAVRCmp(LHSlo, RHSlo, DAG, DL);
Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
}
} else if (VT == MVT::i64) {
@@ -587,7 +647,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
DAG.getIntPtrConstant(1, DL));
Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
} else {
- Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS0, RHS0);
+ Cmp = getAVRCmp(LHS0, RHS0, DAG, DL);
Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp);
Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp);
Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp);
@@ -601,7 +661,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8,
LHS, DAG.getIntPtrConstant(1, DL)));
} else {
- Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);
+ Cmp = getAVRCmp(LHS, RHS, DAG, DL);
}
} else {
llvm_unreachable("Invalid comparison size");
@@ -676,7 +736,7 @@ SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL));
return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
- MachinePointerInfo(SV), 0);
+ MachinePointerInfo(SV));
}
SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -1096,8 +1156,7 @@ SDValue AVRTargetLowering::LowerFormalArguments(
// from this parameter.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL));
InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(MF, FI),
- 0));
+ MachinePointerInfo::getFixedStack(MF, FI)));
}
}
@@ -1230,8 +1289,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain =
DAG.getStore(Chain, DL, Arg, PtrOff,
- MachinePointerInfo::getStack(MF, VA.getLocMemOffset()),
- 0);
+ MachinePointerInfo::getStack(MF, VA.getLocMemOffset()));
}
}
@@ -1460,9 +1518,11 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
// Create loop block.
MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(I, LoopBB);
+ F->insert(I, CheckBB);
F->insert(I, RemBB);
// Update machine-CFG edges by transferring all successors of the current
@@ -1471,14 +1531,14 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
BB->end());
RemBB->transferSuccessorsAndUpdatePHIs(BB);
- // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB.
- BB->addSuccessor(LoopBB);
- BB->addSuccessor(RemBB);
- LoopBB->addSuccessor(RemBB);
- LoopBB->addSuccessor(LoopBB);
+ // Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB.
+ BB->addSuccessor(CheckBB);
+ LoopBB->addSuccessor(CheckBB);
+ CheckBB->addSuccessor(LoopBB);
+ CheckBB->addSuccessor(RemBB);
- Register ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass);
- Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass);
+ Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass);
+ Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass);
Register ShiftReg = RI.createVirtualRegister(RC);
Register ShiftReg2 = RI.createVirtualRegister(RC);
Register ShiftAmtSrcReg = MI.getOperand(2).getReg();
@@ -1486,44 +1546,41 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
Register DstReg = MI.getOperand(0).getReg();
// BB:
- // cpi N, 0
- // breq RemBB
- BuildMI(BB, dl, TII.get(AVR::CPIRdK)).addReg(ShiftAmtSrcReg).addImm(0);
- BuildMI(BB, dl, TII.get(AVR::BREQk)).addMBB(RemBB);
+ // rjmp CheckBB
+ BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB);
// LoopBB:
- // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
- // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
// ShiftReg2 = shift ShiftReg
+ auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
+ if (HasRepeatedOperand)
+ ShiftMI.addReg(ShiftReg);
+
+ // CheckBB:
+ // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+ // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
+ // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
// ShiftAmt2 = ShiftAmt - 1;
- BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftReg)
+ // if (ShiftAmt2 >= 0) goto LoopBB;
+ BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg)
.addReg(SrcReg)
.addMBB(BB)
.addReg(ShiftReg2)
.addMBB(LoopBB);
- BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftAmtReg)
+ BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg)
.addReg(ShiftAmtSrcReg)
.addMBB(BB)
.addReg(ShiftAmtReg2)
.addMBB(LoopBB);
-
- auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
- if (HasRepeatedOperand)
- ShiftMI.addReg(ShiftReg);
-
- BuildMI(LoopBB, dl, TII.get(AVR::SUBIRdK), ShiftAmtReg2)
- .addReg(ShiftAmtReg)
- .addImm(1);
- BuildMI(LoopBB, dl, TII.get(AVR::BRNEk)).addMBB(LoopBB);
-
- // RemBB:
- // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
- BuildMI(*RemBB, RemBB->begin(), dl, TII.get(AVR::PHI), DstReg)
+ BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg)
.addReg(SrcReg)
.addMBB(BB)
.addReg(ShiftReg2)
.addMBB(LoopBB);
+ BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2)
+ .addReg(ShiftAmtReg);
+ BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB);
+
MI.eraseFromParent(); // The pseudo instruction is gone now.
return RemBB;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
index d1eaf53b15e9..7aff4159211b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -38,6 +38,9 @@ enum NodeType {
LSL, ///< Logical shift left.
LSR, ///< Logical shift right.
ASR, ///< Arithmetic shift right.
+ LSL7, ///< Logical shift left 7 bits.
+ LSR7, ///< Logical shift right 7 bits.
+ ASR7, ///< Arithmetic shift right 7 bits.
ROR, ///< Bit rotate right.
ROL, ///< Bit rotate left.
LSLLOOP, ///< A loop of single logical shift left instructions.
@@ -56,6 +59,8 @@ enum NodeType {
CMPC,
/// Test for zero or minus instruction.
TST,
+ /// Swap Rd[7:4] <-> Rd[3:0].
+ SWAP,
/// Operand 0 and operand 1 are selection variable, operand 2
/// is condition code and operand 3 is flag operand.
SELECT_CC
@@ -136,6 +141,8 @@ public:
private:
SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc,
SelectionDAG &DAG, SDLoc dl) const;
+ SDValue getAVRCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+ SDLoc dl) const;
SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
index f03c254382b4..9f7c16fc96d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -59,6 +59,9 @@ def AVRlsr : SDNode<"AVRISD::LSR", SDTIntUnaryOp>;
def AVRrol : SDNode<"AVRISD::ROL", SDTIntUnaryOp>;
def AVRror : SDNode<"AVRISD::ROR", SDTIntUnaryOp>;
def AVRasr : SDNode<"AVRISD::ASR", SDTIntUnaryOp>;
+def AVRlsl7 : SDNode<"AVRISD::LSL7", SDTIntUnaryOp>;
+def AVRlsr7 : SDNode<"AVRISD::LSR7", SDTIntUnaryOp>;
+def AVRasr7 : SDNode<"AVRISD::ASR7", SDTIntUnaryOp>;
// Pseudo shift nodes for non-constant shift amounts.
def AVRlslLoop : SDNode<"AVRISD::LSLLOOP", SDTIntShiftOp>;
@@ -67,6 +70,9 @@ def AVRrolLoop : SDNode<"AVRISD::ROLLOOP", SDTIntShiftOp>;
def AVRrorLoop : SDNode<"AVRISD::RORLOOP", SDTIntShiftOp>;
def AVRasrLoop : SDNode<"AVRISD::ASRLOOP", SDTIntShiftOp>;
+// SWAP node.
+def AVRSwap : SDNode<"AVRISD::SWAP", SDTIntUnaryOp>;
+
//===----------------------------------------------------------------------===//
// AVR Operands, Complex Patterns and Transformations Definitions.
//===----------------------------------------------------------------------===//
@@ -732,13 +738,23 @@ Defs = [SREG] in
"comw\t$rd",
[(set i16:$rd, (not i16:$src)), (implicit SREG)]>;
- //:TODO: optimize NEG for wider types
def NEGRd : FRd<0b1001,
0b0100001,
(outs GPR8:$rd),
(ins GPR8:$src),
"neg\t$rd",
[(set i8:$rd, (ineg i8:$src)), (implicit SREG)]>;
+
+ // NEGW Rd+1:Rd
+ //
+ // Expands to:
+ // neg Rd+1
+ // neg Rd
+ // sbci Rd+1, 0
+ def NEGWRd : Pseudo<(outs DREGS:$rd),
+ (ins DREGS:$src),
+ "negw\t$rd",
+ [(set i16:$rd, (ineg i16:$src)), (implicit SREG)]>;
}
// TST Rd
@@ -1653,6 +1669,11 @@ Defs = [SREG] in
"lslw\t$rd",
[(set i16:$rd, (AVRlsl i16:$src)), (implicit SREG)]>;
+ def LSLB7Rd : Pseudo<(outs GPR8:$rd),
+ (ins GPR8:$src),
+ "lslb7\t$rd",
+ [(set i8:$rd, (AVRlsl7 i8:$src)), (implicit SREG)]>;
+
def LSRRd : FRd<0b1001,
0b0100110,
(outs GPR8:$rd),
@@ -1660,6 +1681,11 @@ Defs = [SREG] in
"lsr\t$rd",
[(set i8:$rd, (AVRlsr i8:$src)), (implicit SREG)]>;
+ def LSRB7Rd : Pseudo<(outs GPR8:$rd),
+ (ins GPR8:$src),
+ "lsrb7\t$rd",
+ [(set i8:$rd, (AVRlsr7 i8:$src)), (implicit SREG)]>;
+
def LSRWRd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"lsrw\t$rd",
@@ -1672,6 +1698,11 @@ Defs = [SREG] in
"asr\t$rd",
[(set i8:$rd, (AVRasr i8:$src)), (implicit SREG)]>;
+ def ASRB7Rd : Pseudo<(outs GPR8:$rd),
+ (ins GPR8:$src),
+ "asrb7\t$rd",
+ [(set i8:$rd, (AVRasr7 i8:$src)), (implicit SREG)]>;
+
def ASRWRd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"asrw\t$rd",
@@ -1719,7 +1750,7 @@ def SWAPRd : FRd<0b1001,
(outs GPR8:$rd),
(ins GPR8:$src),
"swap\t$rd",
- [(set i8:$rd, (bswap i8:$src))]>;
+ [(set i8:$rd, (AVRSwap i8:$src))]>;
// IO register bit set/clear operations.
//:TODO: add patterns when popcount(imm)==2 to be expanded with 2 sbi/cbi
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp
index 195ca95bc3bd..601865120491 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp
@@ -29,7 +29,7 @@ namespace llvm {
AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const AVRTargetMachine &TM)
- : AVRGenSubtargetInfo(TT, CPU, FS), ELFArch(0),
+ : AVRGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), ELFArch(0),
// Subtarget features
m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false),
@@ -43,14 +43,14 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
InstrInfo(), FrameLowering(),
TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo() {
// Parse features string.
- ParseSubtargetFeatures(CPU, FS);
+ ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
}
AVRSubtarget &
AVRSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
const TargetMachine &TM) {
// Parse features string.
- ParseSubtargetFeatures(CPU, FS);
+ ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
return *this;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h
index 81d883eb30d9..7d49e43a83f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -46,7 +46,7 @@ public:
/// Parses a subtarget feature string, setting appropriate options.
/// \note Definition of function is auto generated by `tblgen`.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
AVRSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
const TargetMachine &TM);
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 0c7136e6f77e..0fa8623e2fb7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -37,7 +37,7 @@ static StringRef getCPU(StringRef CPU) {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- return RM.hasValue() ? *RM : Reloc::Static;
+ return RM.getValueOr(Reloc::Static);
}
AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 230bc7adc07a..19f769270569 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -14,7 +14,6 @@
#include "TargetInfo/AVRTargetInfo.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -166,13 +165,13 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The operand is actually a imm8, but we have its bitwise
// negation in the assembly source, so twiddle it here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const auto *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(~(uint8_t)CE->getValue()));
}
bool isImmCom8() const {
if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const auto *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return isUInt<8>(Value);
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index ac72abe0d9f6..49840672bf9a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -13,12 +13,12 @@
#include "MCTargetDesc/AVRAsmBackend.h"
#include "MCTargetDesc/AVRFixupKinds.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
-
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index 9e150f120dd4..46dc914adf78 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -22,6 +22,7 @@
namespace llvm {
class MCAssembler;
+class MCContext;
struct MCFixupKindInfo;
/// Utilities for manipulating generated AVR machine code.
@@ -47,11 +48,6 @@ public:
return AVR::NumTargetFixupKinds;
}
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- return false;
- }
-
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
index 910fd3455dee..8976ef28f3dc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
@@ -45,6 +45,7 @@ private:
void printMemri(const MCInst *MI, unsigned OpNo, raw_ostream &O);
// Autogenerated by TableGen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &O);
void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
index 0a53e5346779..9eff554a082b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
@@ -189,9 +189,10 @@ void AVRMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
}
const char *AVRMCExpr::getName() const {
- const auto &Modifier = std::find_if(
- std::begin(ModifierNames), std::end(ModifierNames),
- [this](ModifierEntry const &Mod) { return Mod.VariantKind == Kind; });
+ const auto &Modifier =
+ llvm::find_if(ModifierNames, [this](ModifierEntry const &Mod) {
+ return Mod.VariantKind == Kind;
+ });
if (Modifier != std::end(ModifierNames)) {
return Modifier->Spelling;
@@ -200,9 +201,10 @@ const char *AVRMCExpr::getName() const {
}
AVRMCExpr::VariantKind AVRMCExpr::getKindByName(StringRef Name) {
- const auto &Modifier = std::find_if(
- std::begin(ModifierNames), std::end(ModifierNames),
- [&Name](ModifierEntry const &Mod) { return Mod.Spelling == Name; });
+ const auto &Modifier =
+ llvm::find_if(ModifierNames, [&Name](ModifierEntry const &Mod) {
+ return Mod.Spelling == Name;
+ });
if (Modifier != std::end(ModifierNames)) {
return Modifier->VariantKind;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index bfc274d9cdcc..95f4465924cc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -53,7 +53,7 @@ static MCRegisterInfo *createAVRMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *createAVRMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- return createAVRMCSubtargetInfoImpl(TT, CPU, FS);
+ return createAVRMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCInstPrinter *createAVRMCInstPrinter(const Triple &T,
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h
index 4a46b11e5e08..a98a3e08d5de 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h
@@ -10,14 +10,17 @@
#define LLVM_LIB_TARGET_BPF_BPF_H
#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class BPFTargetMachine;
-ModulePass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
-ModulePass *createBPFPreserveDIType();
+ModulePass *createBPFAdjustOpt();
+ModulePass *createBPFCheckAndAdjustIR();
+FunctionPass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
+FunctionPass *createBPFPreserveDIType();
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
@@ -25,13 +28,39 @@ FunctionPass *createBPFMIPeepholeTruncElimPass();
FunctionPass *createBPFMIPreEmitPeepholePass();
FunctionPass *createBPFMIPreEmitCheckingPass();
-void initializeBPFAbstractMemberAccessPass(PassRegistry&);
+void initializeBPFAdjustOptPass(PassRegistry&);
+void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
+
+void initializeBPFAbstractMemberAccessLegacyPassPass(PassRegistry &);
void initializeBPFPreserveDITypePass(PassRegistry&);
void initializeBPFMISimplifyPatchablePass(PassRegistry&);
void initializeBPFMIPeepholePass(PassRegistry&);
void initializeBPFMIPeepholeTruncElimPass(PassRegistry&);
void initializeBPFMIPreEmitPeepholePass(PassRegistry&);
void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
-}
+
+class BPFAbstractMemberAccessPass
+ : public PassInfoMixin<BPFAbstractMemberAccessPass> {
+ BPFTargetMachine *TM;
+
+public:
+ BPFAbstractMemberAccessPass(BPFTargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ static bool isRequired() { return true; }
+};
+
+class BPFPreserveDITypePass : public PassInfoMixin<BPFPreserveDITypePass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ static bool isRequired() { return true; }
+};
+
+class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 16708c4d1ce6..cd994a9c8365 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -81,7 +81,9 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsBPF.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -93,26 +95,30 @@
namespace llvm {
constexpr StringRef BPFCoreSharedInfo::AmaAttr;
+uint32_t BPFCoreSharedInfo::SeqNum;
+
+Instruction *BPFCoreSharedInfo::insertPassThrough(Module *M, BasicBlock *BB,
+ Instruction *Input,
+ Instruction *Before) {
+ Function *Fn = Intrinsic::getDeclaration(
+ M, Intrinsic::bpf_passthrough, {Input->getType(), Input->getType()});
+ Constant *SeqNumVal = ConstantInt::get(Type::getInt32Ty(BB->getContext()),
+ BPFCoreSharedInfo::SeqNum++);
+
+ auto *NewInst = CallInst::Create(Fn, {SeqNumVal, Input});
+ BB->getInstList().insert(Before->getIterator(), NewInst);
+ return NewInst;
+}
} // namespace llvm
using namespace llvm;
namespace {
-
-class BPFAbstractMemberAccess final : public ModulePass {
- StringRef getPassName() const override {
- return "BPF Abstract Member Access";
- }
-
- bool runOnModule(Module &M) override;
-
+class BPFAbstractMemberAccess final {
public:
- static char ID;
- TargetMachine *TM;
- // Add optional BPFTargetMachine parameter so that BPF backend can add the phase
- // with target machine to find out the endianness. The default constructor (without
- // parameters) is used by the pass manager for managing purposes.
- BPFAbstractMemberAccess(BPFTargetMachine *TM = nullptr) : ModulePass(ID), TM(TM) {}
+ BPFAbstractMemberAccess(BPFTargetMachine *TM) : TM(TM) {}
+
+ bool run(Function &F);
struct CallInfo {
uint32_t Kind;
@@ -131,9 +137,11 @@ private:
BPFPreserveFieldInfoAI = 4,
};
+ TargetMachine *TM;
const DataLayout *DL = nullptr;
+ Module *M = nullptr;
- std::map<std::string, GlobalVariable *> GEPGlobals;
+ static std::map<std::string, GlobalVariable *> GEPGlobals;
// A map to link preserve_*_access_index instrinsic calls.
std::map<CallInst *, std::pair<CallInst *, CallInfo>> AIChain;
// A map to hold all the base preserve_*_access_index instrinsic calls.
@@ -141,19 +149,19 @@ private:
// intrinsics.
std::map<CallInst *, CallInfo> BaseAICalls;
- bool doTransformation(Module &M);
+ bool doTransformation(Function &F);
void traceAICall(CallInst *Call, CallInfo &ParentInfo);
void traceBitCast(BitCastInst *BitCast, CallInst *Parent,
CallInfo &ParentInfo);
void traceGEP(GetElementPtrInst *GEP, CallInst *Parent,
CallInfo &ParentInfo);
- void collectAICallChains(Module &M, Function &F);
+ void collectAICallChains(Function &F);
bool IsPreserveDIAccessIndexCall(const CallInst *Call, CallInfo &Cinfo);
bool IsValidAIChain(const MDNode *ParentMeta, uint32_t ParentAI,
const MDNode *ChildMeta);
- bool removePreserveAccessIndexIntrinsic(Module &M);
+ bool removePreserveAccessIndexIntrinsic(Function &F);
void replaceWithGEP(std::vector<CallInst *> &CallList,
uint32_t NumOfZerosIndex, uint32_t DIIndex);
bool HasPreserveFieldInfoCall(CallInfoStack &CallStack);
@@ -165,28 +173,55 @@ private:
Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo,
std::string &AccessKey, MDNode *&BaseMeta);
+ MDNode *computeAccessKey(CallInst *Call, CallInfo &CInfo,
+ std::string &AccessKey, bool &IsInt32Ret);
uint64_t getConstant(const Value *IndexValue);
- bool transformGEPChain(Module &M, CallInst *Call, CallInfo &CInfo);
+ bool transformGEPChain(CallInst *Call, CallInfo &CInfo);
};
+
+std::map<std::string, GlobalVariable *> BPFAbstractMemberAccess::GEPGlobals;
+
+class BPFAbstractMemberAccessLegacyPass final : public FunctionPass {
+ BPFTargetMachine *TM;
+
+ bool runOnFunction(Function &F) override {
+ return BPFAbstractMemberAccess(TM).run(F);
+ }
+
+public:
+ static char ID;
+
+ // Add optional BPFTargetMachine parameter so that BPF backend can add the
+ // phase with target machine to find out the endianness. The default
+ // constructor (without parameters) is used by the pass manager for managing
+ // purposes.
+ BPFAbstractMemberAccessLegacyPass(BPFTargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {}
+};
+
} // End anonymous namespace
-char BPFAbstractMemberAccess::ID = 0;
-INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE,
- "abstracting struct/union member accessees", false, false)
+char BPFAbstractMemberAccessLegacyPass::ID = 0;
+INITIALIZE_PASS(BPFAbstractMemberAccessLegacyPass, DEBUG_TYPE,
+ "BPF Abstract Member Access", false, false)
-ModulePass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) {
- return new BPFAbstractMemberAccess(TM);
+FunctionPass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) {
+ return new BPFAbstractMemberAccessLegacyPass(TM);
}
-bool BPFAbstractMemberAccess::runOnModule(Module &M) {
+bool BPFAbstractMemberAccess::run(Function &F) {
LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n");
+ M = F.getParent();
+ if (!M)
+ return false;
+
// Bail out if no debug info.
- if (M.debug_compile_units().empty())
+ if (M->debug_compile_units().empty())
return false;
- DL = &M.getDataLayout();
- return doTransformation(M);
+ DL = &M->getDataLayout();
+ return doTransformation(F);
}
static bool SkipDIDerivedTag(unsigned Tag, bool skipTypedef) {
@@ -285,6 +320,34 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CInfo.AccessIndex = InfoKind;
return true;
}
+ if (GV->getName().startswith("llvm.bpf.preserve.type.info")) {
+ CInfo.Kind = BPFPreserveFieldInfoAI;
+ CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
+ if (!CInfo.Metadata)
+ report_fatal_error("Missing metadata for llvm.preserve.type.info intrinsic");
+ uint64_t Flag = getConstant(Call->getArgOperand(1));
+ if (Flag >= BPFCoreSharedInfo::MAX_PRESERVE_TYPE_INFO_FLAG)
+ report_fatal_error("Incorrect flag for llvm.bpf.preserve.type.info intrinsic");
+ if (Flag == BPFCoreSharedInfo::PRESERVE_TYPE_INFO_EXISTENCE)
+ CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_EXISTENCE;
+ else
+ CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_SIZE;
+ return true;
+ }
+ if (GV->getName().startswith("llvm.bpf.preserve.enum.value")) {
+ CInfo.Kind = BPFPreserveFieldInfoAI;
+ CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
+ if (!CInfo.Metadata)
+ report_fatal_error("Missing metadata for llvm.preserve.enum.value intrinsic");
+ uint64_t Flag = getConstant(Call->getArgOperand(2));
+ if (Flag >= BPFCoreSharedInfo::MAX_PRESERVE_ENUM_VALUE_FLAG)
+ report_fatal_error("Incorrect flag for llvm.bpf.preserve.enum.value intrinsic");
+ if (Flag == BPFCoreSharedInfo::PRESERVE_ENUM_VALUE_EXISTENCE)
+ CInfo.AccessIndex = BPFCoreSharedInfo::ENUM_VALUE_EXISTENCE;
+ else
+ CInfo.AccessIndex = BPFCoreSharedInfo::ENUM_VALUE;
+ return true;
+ }
return false;
}
@@ -311,28 +374,27 @@ void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList,
}
}
-bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) {
+bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Function &F) {
std::vector<CallInst *> PreserveArrayIndexCalls;
std::vector<CallInst *> PreserveUnionIndexCalls;
std::vector<CallInst *> PreserveStructIndexCalls;
bool Found = false;
- for (Function &F : M)
- for (auto &BB : F)
- for (auto &I : BB) {
- auto *Call = dyn_cast<CallInst>(&I);
- CallInfo CInfo;
- if (!IsPreserveDIAccessIndexCall(Call, CInfo))
- continue;
-
- Found = true;
- if (CInfo.Kind == BPFPreserveArrayAI)
- PreserveArrayIndexCalls.push_back(Call);
- else if (CInfo.Kind == BPFPreserveUnionAI)
- PreserveUnionIndexCalls.push_back(Call);
- else
- PreserveStructIndexCalls.push_back(Call);
- }
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ auto *Call = dyn_cast<CallInst>(&I);
+ CallInfo CInfo;
+ if (!IsPreserveDIAccessIndexCall(Call, CInfo))
+ continue;
+
+ Found = true;
+ if (CInfo.Kind == BPFPreserveArrayAI)
+ PreserveArrayIndexCalls.push_back(Call);
+ else if (CInfo.Kind == BPFPreserveUnionAI)
+ PreserveUnionIndexCalls.push_back(Call);
+ else
+ PreserveStructIndexCalls.push_back(Call);
+ }
// do the following transformation:
// . addr = preserve_array_access_index(base, dimension, index)
@@ -498,7 +560,7 @@ void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent,
}
}
-void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) {
+void BPFAbstractMemberAccess::collectAICallChains(Function &F) {
AIChain.clear();
BaseAICalls.clear();
@@ -847,28 +909,94 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
return Base;
}
+MDNode *BPFAbstractMemberAccess::computeAccessKey(CallInst *Call,
+ CallInfo &CInfo,
+ std::string &AccessKey,
+ bool &IsInt32Ret) {
+ DIType *Ty = stripQualifiers(cast<DIType>(CInfo.Metadata), false);
+ assert(!Ty->getName().empty());
+
+ int64_t PatchImm;
+ std::string AccessStr("0");
+ if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_EXISTENCE) {
+ PatchImm = 1;
+ } else if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_SIZE) {
+ // typedef debuginfo type has size 0, get the eventual base type.
+ DIType *BaseTy = stripQualifiers(Ty, true);
+ PatchImm = BaseTy->getSizeInBits() / 8;
+ } else {
+ // ENUM_VALUE_EXISTENCE and ENUM_VALUE
+ IsInt32Ret = false;
+
+ const auto *CE = cast<ConstantExpr>(Call->getArgOperand(1));
+ const GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ assert(GV->hasInitializer());
+ const ConstantDataArray *DA = cast<ConstantDataArray>(GV->getInitializer());
+ assert(DA->isString());
+ StringRef ValueStr = DA->getAsString();
+
+ // ValueStr format: <EnumeratorStr>:<Value>
+ size_t Separator = ValueStr.find_first_of(':');
+ StringRef EnumeratorStr = ValueStr.substr(0, Separator);
+
+ // Find enumerator index in the debuginfo
+ DIType *BaseTy = stripQualifiers(Ty, true);
+ const auto *CTy = cast<DICompositeType>(BaseTy);
+ assert(CTy->getTag() == dwarf::DW_TAG_enumeration_type);
+ int EnumIndex = 0;
+ for (const auto Element : CTy->getElements()) {
+ const auto *Enum = cast<DIEnumerator>(Element);
+ if (Enum->getName() == EnumeratorStr) {
+ AccessStr = std::to_string(EnumIndex);
+ break;
+ }
+ EnumIndex++;
+ }
+
+ if (CInfo.AccessIndex == BPFCoreSharedInfo::ENUM_VALUE) {
+ StringRef EValueStr = ValueStr.substr(Separator + 1);
+ PatchImm = std::stoll(std::string(EValueStr));
+ } else {
+ PatchImm = 1;
+ }
+ }
+
+ AccessKey = "llvm." + Ty->getName().str() + ":" +
+ std::to_string(CInfo.AccessIndex) + std::string(":") +
+ std::to_string(PatchImm) + std::string("$") + AccessStr;
+
+ return Ty;
+}
+
/// Call/Kind is the base preserve_*_access_index() call. Attempts to do
/// transformation to a chain of relocable GEPs.
-bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
+bool BPFAbstractMemberAccess::transformGEPChain(CallInst *Call,
CallInfo &CInfo) {
std::string AccessKey;
MDNode *TypeMeta;
- Value *Base =
- computeBaseAndAccessKey(Call, CInfo, AccessKey, TypeMeta);
- if (!Base)
- return false;
+ Value *Base = nullptr;
+ bool IsInt32Ret;
+
+ IsInt32Ret = CInfo.Kind == BPFPreserveFieldInfoAI;
+ if (CInfo.Kind == BPFPreserveFieldInfoAI && CInfo.Metadata) {
+ TypeMeta = computeAccessKey(Call, CInfo, AccessKey, IsInt32Ret);
+ } else {
+ Base = computeBaseAndAccessKey(Call, CInfo, AccessKey, TypeMeta);
+ if (!Base)
+ return false;
+ }
BasicBlock *BB = Call->getParent();
GlobalVariable *GV;
if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
IntegerType *VarType;
- if (CInfo.Kind == BPFPreserveFieldInfoAI)
+ if (IsInt32Ret)
VarType = Type::getInt32Ty(BB->getContext()); // 32bit return value
else
- VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr arith
+ VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr or enum value
- GV = new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage,
+ GV = new GlobalVariable(*M, VarType, false, GlobalVariable::ExternalLinkage,
NULL, AccessKey);
GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta);
@@ -879,9 +1007,15 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
if (CInfo.Kind == BPFPreserveFieldInfoAI) {
// Load the global variable which represents the returned field info.
- auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV, "",
- Call);
- Call->replaceAllUsesWith(LDInst);
+ LoadInst *LDInst;
+ if (IsInt32Ret)
+ LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV, "", Call);
+ else
+ LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV, "", Call);
+
+ Instruction *PassThroughInst =
+ BPFCoreSharedInfo::insertPassThrough(M, BB, LDInst, Call);
+ Call->replaceAllUsesWith(PassThroughInst);
Call->eraseFromParent();
return true;
}
@@ -889,7 +1023,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
// For any original GEP Call and Base %2 like
// %4 = bitcast %struct.net_device** %dev1 to i64*
// it is transformed to:
- // %6 = load sk_buff:50:$0:0:0:2:0
+ // %6 = load llvm.sk_buff:0:50$0:0:0:2:0
// %7 = bitcast %struct.sk_buff* %2 to i8*
// %8 = getelementptr i8, i8* %7, %6
// %9 = bitcast i8* %8 to i64*
@@ -912,24 +1046,75 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
auto *BCInst2 = new BitCastInst(GEP, Call->getType());
BB->getInstList().insert(Call->getIterator(), BCInst2);
- Call->replaceAllUsesWith(BCInst2);
+ // For the following code,
+ // Block0:
+ // ...
+ // if (...) goto Block1 else ...
+ // Block1:
+ // %6 = load llvm.sk_buff:0:50$0:0:0:2:0
+ // %7 = bitcast %struct.sk_buff* %2 to i8*
+ // %8 = getelementptr i8, i8* %7, %6
+ // ...
+ // goto CommonExit
+ // Block2:
+ // ...
+ // if (...) goto Block3 else ...
+ // Block3:
+ // %6 = load llvm.bpf_map:0:40$0:0:0:2:0
+ // %7 = bitcast %struct.sk_buff* %2 to i8*
+ // %8 = getelementptr i8, i8* %7, %6
+ // ...
+ // goto CommonExit
+ // CommonExit
+ // SimplifyCFG may generate:
+ // Block0:
+ // ...
+ // if (...) goto Block_Common else ...
+ // Block2:
+ // ...
+ // if (...) goto Block_Common else ...
+ // Block_Common:
+ // PHI = [llvm.sk_buff:0:50$0:0:0:2:0, llvm.bpf_map:0:40$0:0:0:2:0]
+ // %6 = load PHI
+ // %7 = bitcast %struct.sk_buff* %2 to i8*
+ // %8 = getelementptr i8, i8* %7, %6
+ // ...
+ // goto CommonExit
+ // For the above code, we cannot perform proper relocation since
+ // "load PHI" has two possible relocations.
+ //
+ // To prevent above tail merging, we use __builtin_bpf_passthrough()
+ // where one of its parameters is a seq_num. Since two
+ // __builtin_bpf_passthrough() funcs will always have different seq_num,
+ // tail merging cannot happen. The __builtin_bpf_passthrough() will be
+ // removed in the beginning of Target IR passes.
+ //
+ // This approach is also used in other places when global var
+ // representing a relocation is used.
+ Instruction *PassThroughInst =
+ BPFCoreSharedInfo::insertPassThrough(M, BB, BCInst2, Call);
+ Call->replaceAllUsesWith(PassThroughInst);
Call->eraseFromParent();
return true;
}
-bool BPFAbstractMemberAccess::doTransformation(Module &M) {
+bool BPFAbstractMemberAccess::doTransformation(Function &F) {
bool Transformed = false;
- for (Function &F : M) {
- // Collect PreserveDIAccessIndex Intrinsic call chains.
- // The call chains will be used to generate the access
- // patterns similar to GEP.
- collectAICallChains(M, F);
+ // Collect PreserveDIAccessIndex Intrinsic call chains.
+ // The call chains will be used to generate the access
+ // patterns similar to GEP.
+ collectAICallChains(F);
- for (auto &C : BaseAICalls)
- Transformed = transformGEPChain(M, C.first, C.second) || Transformed;
- }
+ for (auto &C : BaseAICalls)
+ Transformed = transformGEPChain(C.first, C.second) || Transformed;
+
+ return removePreserveAccessIndexIntrinsic(F) || Transformed;
+}
- return removePreserveAccessIndexIntrinsic(M) || Transformed;
+PreservedAnalyses
+BPFAbstractMemberAccessPass::run(Function &F, FunctionAnalysisManager &AM) {
+ return BPFAbstractMemberAccess(TM).run(F) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAdjustOpt.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
new file mode 100644
index 000000000000..da543e7eba53
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
@@ -0,0 +1,323 @@
+//===---------------- BPFAdjustOpt.cpp - Adjust Optimization --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Adjust optimization to make the code more kernel verifier friendly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "BPFTargetMachine.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "bpf-adjust-opt"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ DisableBPFserializeICMP("bpf-disable-serialize-icmp", cl::Hidden,
+ cl::desc("BPF: Disable Serializing ICMP insns."),
+ cl::init(false));
+
+static cl::opt<bool> DisableBPFavoidSpeculation(
+ "bpf-disable-avoid-speculation", cl::Hidden,
+ cl::desc("BPF: Disable Avoiding Speculative Code Motion."),
+ cl::init(false));
+
+namespace {
+
+class BPFAdjustOpt final : public ModulePass {
+public:
+ static char ID;
+
+ BPFAdjustOpt() : ModulePass(ID) {}
+ bool runOnModule(Module &M) override;
+};
+
+class BPFAdjustOptImpl {
+ struct PassThroughInfo {
+ Instruction *Input;
+ Instruction *UsedInst;
+ uint32_t OpIdx;
+ PassThroughInfo(Instruction *I, Instruction *U, uint32_t Idx)
+ : Input(I), UsedInst(U), OpIdx(Idx) {}
+ };
+
+public:
+ BPFAdjustOptImpl(Module *M) : M(M) {}
+
+ bool run();
+
+private:
+ Module *M;
+ SmallVector<PassThroughInfo, 16> PassThroughs;
+
+ void adjustBasicBlock(BasicBlock &BB);
+ bool serializeICMPCrossBB(BasicBlock &BB);
+ void adjustInst(Instruction &I);
+ bool serializeICMPInBB(Instruction &I);
+ bool avoidSpeculation(Instruction &I);
+ bool insertPassThrough();
+};
+
+} // End anonymous namespace
+
+char BPFAdjustOpt::ID = 0;
+INITIALIZE_PASS(BPFAdjustOpt, "bpf-adjust-opt", "BPF Adjust Optimization",
+ false, false)
+
+ModulePass *llvm::createBPFAdjustOpt() { return new BPFAdjustOpt(); }
+
+bool BPFAdjustOpt::runOnModule(Module &M) { return BPFAdjustOptImpl(&M).run(); }
+
+bool BPFAdjustOptImpl::run() {
+ for (Function &F : *M)
+ for (auto &BB : F) {
+ adjustBasicBlock(BB);
+ for (auto &I : BB)
+ adjustInst(I);
+ }
+
+ return insertPassThrough();
+}
+
+bool BPFAdjustOptImpl::insertPassThrough() {
+ for (auto &Info : PassThroughs) {
+ auto *CI = BPFCoreSharedInfo::insertPassThrough(
+ M, Info.UsedInst->getParent(), Info.Input, Info.UsedInst);
+ Info.UsedInst->setOperand(Info.OpIdx, CI);
+ }
+
+ return !PassThroughs.empty();
+}
+
+// To avoid combining conditionals in the same basic block by
+// instrcombine optimization.
+bool BPFAdjustOptImpl::serializeICMPInBB(Instruction &I) {
+ // For:
+ // comp1 = icmp <opcode> ...;
+ // comp2 = icmp <opcode> ...;
+ // ... or comp1 comp2 ...
+ // changed to:
+ // comp1 = icmp <opcode> ...;
+ // comp2 = icmp <opcode> ...;
+ // new_comp1 = __builtin_bpf_passthrough(seq_num, comp1)
+ // ... or new_comp1 comp2 ...
+ if (I.getOpcode() != Instruction::Or)
+ return false;
+ auto *Icmp1 = dyn_cast<ICmpInst>(I.getOperand(0));
+ if (!Icmp1)
+ return false;
+ auto *Icmp2 = dyn_cast<ICmpInst>(I.getOperand(1));
+ if (!Icmp2)
+ return false;
+
+ Value *Icmp1Op0 = Icmp1->getOperand(0);
+ Value *Icmp2Op0 = Icmp2->getOperand(0);
+ if (Icmp1Op0 != Icmp2Op0)
+ return false;
+
+ // Now we got two icmp instructions which feed into
+ // an "or" instruction.
+ PassThroughInfo Info(Icmp1, &I, 0);
+ PassThroughs.push_back(Info);
+ return true;
+}
+
+// To avoid combining conditionals in the same basic block by
+// instrcombine optimization.
+bool BPFAdjustOptImpl::serializeICMPCrossBB(BasicBlock &BB) {
+ // For:
+ // B1:
+ // comp1 = icmp <opcode> ...;
+ // if (comp1) goto B2 else B3;
+ // B2:
+ // comp2 = icmp <opcode> ...;
+ // if (comp2) goto B4 else B5;
+ // B4:
+ // ...
+ // changed to:
+ // B1:
+ // comp1 = icmp <opcode> ...;
+ // comp1 = __builtin_bpf_passthrough(seq_num, comp1);
+ // if (comp1) goto B2 else B3;
+ // B2:
+ // comp2 = icmp <opcode> ...;
+ // if (comp2) goto B4 else B5;
+ // B4:
+ // ...
+
+ // Check basic predecessors, if two of them (say B1, B2) are using
+ // icmp instructions to generate conditions and one is the predesessor
+ // of another (e.g., B1 is the predecessor of B2). Add a passthrough
+ // barrier after icmp inst of block B1.
+ BasicBlock *B2 = BB.getSinglePredecessor();
+ if (!B2)
+ return false;
+
+ BasicBlock *B1 = B2->getSinglePredecessor();
+ if (!B1)
+ return false;
+
+ Instruction *TI = B2->getTerminator();
+ auto *BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isConditional())
+ return false;
+ auto *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond || B2->getFirstNonPHI() != Cond)
+ return false;
+ Value *B2Op0 = Cond->getOperand(0);
+ auto Cond2Op = Cond->getPredicate();
+
+ TI = B1->getTerminator();
+ BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isConditional())
+ return false;
+ Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return false;
+ Value *B1Op0 = Cond->getOperand(0);
+ auto Cond1Op = Cond->getPredicate();
+
+ if (B1Op0 != B2Op0)
+ return false;
+
+ if (Cond1Op == ICmpInst::ICMP_SGT || Cond1Op == ICmpInst::ICMP_SGE) {
+ if (Cond2Op != ICmpInst::ICMP_SLT && Cond1Op != ICmpInst::ICMP_SLE)
+ return false;
+ } else if (Cond1Op == ICmpInst::ICMP_SLT || Cond1Op == ICmpInst::ICMP_SLE) {
+ if (Cond2Op != ICmpInst::ICMP_SGT && Cond1Op != ICmpInst::ICMP_SGE)
+ return false;
+ } else {
+ return false;
+ }
+
+ PassThroughInfo Info(Cond, BI, 0);
+ PassThroughs.push_back(Info);
+
+ return true;
+}
+
+// To avoid speculative hoisting certain computations out of
+// a basic block.
+bool BPFAdjustOptImpl::avoidSpeculation(Instruction &I) {
+ if (auto *LdInst = dyn_cast<LoadInst>(&I)) {
+ if (auto *GV = dyn_cast<GlobalVariable>(LdInst->getOperand(0))) {
+ if (GV->hasAttribute(BPFCoreSharedInfo::AmaAttr) ||
+ GV->hasAttribute(BPFCoreSharedInfo::TypeIdAttr))
+ return false;
+ }
+ }
+
+ if (!isa<LoadInst>(&I) && !isa<CallInst>(&I))
+ return false;
+
+ // For:
+ // B1:
+ // var = ...
+ // ...
+ // /* icmp may not be in the same block as var = ... */
+ // comp1 = icmp <opcode> var, <const>;
+ // if (comp1) goto B2 else B3;
+ // B2:
+ // ... var ...
+ // change to:
+ // B1:
+ // var = ...
+ // ...
+ // /* icmp may not be in the same block as var = ... */
+ // comp1 = icmp <opcode> var, <const>;
+ // if (comp1) goto B2 else B3;
+ // B2:
+ // var = __builtin_bpf_passthrough(seq_num, var);
+ // ... var ...
+ bool isCandidate = false;
+ SmallVector<PassThroughInfo, 4> Candidates;
+ for (User *U : I.users()) {
+ Instruction *Inst = dyn_cast<Instruction>(U);
+ if (!Inst)
+ continue;
+
+ // May cover a little bit more than the
+ // above pattern.
+ if (auto *Icmp1 = dyn_cast<ICmpInst>(Inst)) {
+ Value *Icmp1Op1 = Icmp1->getOperand(1);
+ if (!isa<Constant>(Icmp1Op1))
+ return false;
+ isCandidate = true;
+ continue;
+ }
+
+ // Ignore the use in the same basic block as the definition.
+ if (Inst->getParent() == I.getParent())
+ continue;
+
+ // use in a different basic block, If there is a call or
+ // load/store insn before this instruction in this basic
+ // block. Most likely it cannot be hoisted out. Skip it.
+ for (auto &I2 : *Inst->getParent()) {
+ if (dyn_cast<CallInst>(&I2))
+ return false;
+ if (dyn_cast<LoadInst>(&I2) || dyn_cast<StoreInst>(&I2))
+ return false;
+ if (&I2 == Inst)
+ break;
+ }
+
+ // It should be used in a GEP or a simple arithmetic like
+ // ZEXT/SEXT which is used for GEP.
+ if (Inst->getOpcode() == Instruction::ZExt ||
+ Inst->getOpcode() == Instruction::SExt) {
+ PassThroughInfo Info(&I, Inst, 0);
+ Candidates.push_back(Info);
+ } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
+ // traverse GEP inst to find Use operand index
+ unsigned i, e;
+ for (i = 1, e = GI->getNumOperands(); i != e; ++i) {
+ Value *V = GI->getOperand(i);
+ if (V == &I)
+ break;
+ }
+ if (i == e)
+ continue;
+
+ PassThroughInfo Info(&I, GI, i);
+ Candidates.push_back(Info);
+ }
+ }
+
+ if (!isCandidate || Candidates.empty())
+ return false;
+
+ llvm::append_range(PassThroughs, Candidates);
+ return true;
+}
+
+void BPFAdjustOptImpl::adjustBasicBlock(BasicBlock &BB) {
+ if (!DisableBPFserializeICMP && serializeICMPCrossBB(BB))
+ return;
+}
+
+void BPFAdjustOptImpl::adjustInst(Instruction &I) {
+ if (!DisableBPFserializeICMP && serializeICMPInBB(I))
+ return;
+ if (!DisableBPFavoidSpeculation && avoidSpeculation(I))
+ return;
+}
+
+PreservedAnalyses BPFAdjustOptPass::run(Module &M, ModuleAnalysisManager &AM) {
+ return BPFAdjustOptImpl(&M).run() ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h
index af6425b16fa0..0c504412480d 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h
@@ -13,6 +13,10 @@
namespace llvm {
+class BasicBlock;
+class Instruction;
+class Module;
+
class BPFCoreSharedInfo {
public:
enum PatchableRelocKind : uint32_t {
@@ -24,6 +28,10 @@ public:
FIELD_RSHIFT_U64,
BTF_TYPE_ID_LOCAL,
BTF_TYPE_ID_REMOTE,
+ TYPE_EXISTENCE,
+ TYPE_SIZE,
+ ENUM_VALUE_EXISTENCE,
+ ENUM_VALUE,
MAX_FIELD_RELOC_KIND,
};
@@ -35,10 +43,32 @@ public:
MAX_BTF_TYPE_ID_FLAG,
};
+ enum PreserveTypeInfo : uint32_t {
+ PRESERVE_TYPE_INFO_EXISTENCE = 0,
+ PRESERVE_TYPE_INFO_SIZE,
+
+ MAX_PRESERVE_TYPE_INFO_FLAG,
+ };
+
+ enum PreserveEnumValue : uint32_t {
+ PRESERVE_ENUM_VALUE_EXISTENCE = 0,
+ PRESERVE_ENUM_VALUE,
+
+ MAX_PRESERVE_ENUM_VALUE_FLAG,
+ };
+
/// The attribute attached to globals representing a field access
static constexpr StringRef AmaAttr = "btf_ama";
/// The attribute attached to globals representing a type id
static constexpr StringRef TypeIdAttr = "btf_type_id";
+
+ /// llvm.bpf.passthrough builtin seq number
+ static uint32_t SeqNum;
+
+ /// Insert a bpf passthrough builtin function.
+ static Instruction *insertPassThrough(Module *M, BasicBlock *BB,
+ Instruction *Input,
+ Instruction *Before);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
new file mode 100644
index 000000000000..5239218ad003
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -0,0 +1,130 @@
+//===------------ BPFCheckAndAdjustIR.cpp - Check and Adjust IR -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Check IR and adjust IR for verifier friendly codes.
+// The following are done for IR checking:
+// - no relocation globals in PHI node.
+// The following are done for IR adjustment:
+// - remove __builtin_bpf_passthrough builtins. Target independent IR
+// optimizations are done and those builtins can be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "BPFTargetMachine.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "bpf-check-and-opt-ir"
+
+using namespace llvm;
+
+namespace {
+
+class BPFCheckAndAdjustIR final : public ModulePass {
+ bool runOnModule(Module &F) override;
+
+public:
+ static char ID;
+ BPFCheckAndAdjustIR() : ModulePass(ID) {}
+
+private:
+ void checkIR(Module &M);
+ bool adjustIR(Module &M);
+ bool removePassThroughBuiltin(Module &M);
+};
+} // End anonymous namespace
+
+char BPFCheckAndAdjustIR::ID = 0;
+INITIALIZE_PASS(BPFCheckAndAdjustIR, DEBUG_TYPE, "BPF Check And Adjust IR",
+ false, false)
+
+ModulePass *llvm::createBPFCheckAndAdjustIR() {
+ return new BPFCheckAndAdjustIR();
+}
+
+void BPFCheckAndAdjustIR::checkIR(Module &M) {
+ // Ensure relocation global won't appear in PHI node
+ // This may happen if the compiler generated the following code:
+ // B1:
+ // g1 = @llvm.skb_buff:0:1...
+ // ...
+ // goto B_COMMON
+ // B2:
+ // g2 = @llvm.skb_buff:0:2...
+ // ...
+ // goto B_COMMON
+ // B_COMMON:
+ // g = PHI(g1, g2)
+ // x = load g
+ // ...
+ // If anything likes the above "g = PHI(g1, g2)", issue a fatal error.
+ for (Function &F : M)
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ PHINode *PN = dyn_cast<PHINode>(&I);
+ if (!PN || PN->use_empty())
+ continue;
+ for (int i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
+ auto *GV = dyn_cast<GlobalVariable>(PN->getIncomingValue(i));
+ if (!GV)
+ continue;
+ if (GV->hasAttribute(BPFCoreSharedInfo::AmaAttr) ||
+ GV->hasAttribute(BPFCoreSharedInfo::TypeIdAttr))
+ report_fatal_error("relocation global in PHI node");
+ }
+ }
+}
+
+bool BPFCheckAndAdjustIR::removePassThroughBuiltin(Module &M) {
+ // Remove __builtin_bpf_passthrough()'s which are used to prevent
+ // certain IR optimizations. Now major IR optimizations are done,
+ // remove them.
+ bool Changed = false;
+ CallInst *ToBeDeleted = nullptr;
+ for (Function &F : M)
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ if (ToBeDeleted) {
+ ToBeDeleted->eraseFromParent();
+ ToBeDeleted = nullptr;
+ }
+
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ continue;
+ auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
+ if (!GV)
+ continue;
+ if (!GV->getName().startswith("llvm.bpf.passthrough"))
+ continue;
+ Changed = true;
+ Value *Arg = Call->getArgOperand(1);
+ Call->replaceAllUsesWith(Arg);
+ ToBeDeleted = Call;
+ }
+ return Changed;
+}
+
+bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
+ return removePassThroughBuiltin(M);
+}
+
+bool BPFCheckAndAdjustIR::runOnModule(Module &M) {
+ checkIR(M);
+ return adjustIR(M);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 77f565fb5957..f10a0d4c0077 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -494,8 +494,6 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV);
I++;
CurDAG->DeleteNode(Node);
-
- return;
}
FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
index a02556a39909..3322b8d93b3a 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td
index 9f00dc85d789..a809065014e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -44,6 +44,9 @@ def BPF_MOV : BPFArithOp<0xb>;
def BPF_ARSH : BPFArithOp<0xc>;
def BPF_END : BPFArithOp<0xd>;
+def BPF_XCHG : BPFArithOp<0xe>;
+def BPF_CMPXCHG : BPFArithOp<0xf>;
+
class BPFEndDir<bits<1> val> {
bits<1> Value = val;
}
@@ -86,7 +89,13 @@ def BPF_IMM : BPFModeModifer<0x0>;
def BPF_ABS : BPFModeModifer<0x1>;
def BPF_IND : BPFModeModifer<0x2>;
def BPF_MEM : BPFModeModifer<0x3>;
-def BPF_XADD : BPFModeModifer<0x6>;
+def BPF_ATOMIC : BPFModeModifer<0x6>;
+
+class BPFAtomicFlag<bits<4> val> {
+ bits<4> Value = val;
+}
+
+def BPF_FETCH : BPFAtomicFlag<0x1>;
class InstBPF<dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
index 4298e2eaec04..082e1f4a92c2 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -617,9 +617,9 @@ let Predicates = [BPFNoALU32] in {
def : Pat<(i64 (extloadi32 ADDRri:$src)), (i64 (LDW ADDRri:$src))>;
}
-// Atomics
+// Atomic XADD for BPFNoALU32
class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
- : TYPE_LD_ST<BPF_XADD.Value, SizeOp.Value,
+ : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
(outs GPR:$dst),
(ins MEMri:$addr, GPR:$val),
"lock *("#OpcodeStr#" *)($addr) += $val",
@@ -630,14 +630,88 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
let Inst{51-48} = addr{19-16}; // base reg
let Inst{55-52} = dst;
let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = BPF_ADD.Value;
let BPFClass = BPF_STX;
}
-class XADD32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
- : TYPE_LD_ST<BPF_XADD.Value, SizeOp.Value,
+let Constraints = "$dst = $val" in {
+ let Predicates = [BPFNoALU32] in {
+ def XADDW : XADD<BPF_W, "u32", atomic_load_add_32>;
+ }
+}
+
+// Atomic add, and, or, xor
+class ATOMIC_NOFETCH<BPFArithOp Opc, string Opstr>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, BPF_DW.Value,
+ (outs GPR:$dst),
+ (ins MEMri:$addr, GPR:$val),
+ "lock *(u64 *)($addr) " #Opstr# "= $val",
+ []> {
+ bits<4> dst;
+ bits<20> addr;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{55-52} = dst;
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = Opc.Value;
+ let BPFClass = BPF_STX;
+}
+
+class ATOMIC32_NOFETCH<BPFArithOp Opc, string Opstr>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, BPF_W.Value,
(outs GPR32:$dst),
(ins MEMri:$addr, GPR32:$val),
- "lock *("#OpcodeStr#" *)($addr) += $val",
+ "lock *(u32 *)($addr) " #Opstr# "= $val",
+ []> {
+ bits<4> dst;
+ bits<20> addr;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{55-52} = dst;
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = Opc.Value;
+ let BPFClass = BPF_STX;
+}
+
+let Constraints = "$dst = $val" in {
+ let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
+ def XADDW32 : ATOMIC32_NOFETCH<BPF_ADD, "+">;
+ def XANDW32 : ATOMIC32_NOFETCH<BPF_AND, "&">;
+ def XORW32 : ATOMIC32_NOFETCH<BPF_OR, "|">;
+ def XXORW32 : ATOMIC32_NOFETCH<BPF_XOR, "^">;
+ }
+
+ def XADDD : ATOMIC_NOFETCH<BPF_ADD, "+">;
+ def XANDD : ATOMIC_NOFETCH<BPF_AND, "&">;
+ def XORD : ATOMIC_NOFETCH<BPF_OR, "|">;
+ def XXORD : ATOMIC_NOFETCH<BPF_XOR, "^">;
+}
+
+// Atomic Fetch-and-<add, and, or, xor> operations
+class XFALU64<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
+ string OpcStr, PatFrag OpNode>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+ (outs GPR:$dst),
+ (ins MEMri:$addr, GPR:$val),
+ "$dst = atomic_fetch_"#OpcStr#"(("#OpcodeStr#" *)($addr), $val)",
+ [(set GPR:$dst, (OpNode ADDRri:$addr, GPR:$val))]> {
+ bits<4> dst;
+ bits<20> addr;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{55-52} = dst;
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = Opc.Value;
+ let Inst{3-0} = BPF_FETCH.Value;
+ let BPFClass = BPF_STX;
+}
+
+class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
+ string OpcStr, PatFrag OpNode>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+ (outs GPR32:$dst),
+ (ins MEMri:$addr, GPR32:$val),
+ "$dst = atomic_fetch_"#OpcStr#"(("#OpcodeStr#" *)($addr), $val)",
[(set GPR32:$dst, (OpNode ADDRri:$addr, GPR32:$val))]> {
bits<4> dst;
bits<20> addr;
@@ -645,19 +719,117 @@ class XADD32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
let Inst{51-48} = addr{19-16}; // base reg
let Inst{55-52} = dst;
let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = Opc.Value;
+ let Inst{3-0} = BPF_FETCH.Value;
let BPFClass = BPF_STX;
}
let Constraints = "$dst = $val" in {
- let Predicates = [BPFNoALU32] in {
- def XADDW : XADD<BPF_W, "u32", atomic_load_add_32>;
+ let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
+ def XFADDW32 : XFALU32<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_32>;
+ def XFANDW32 : XFALU32<BPF_W, BPF_AND, "u32", "and", atomic_load_and_32>;
+ def XFORW32 : XFALU32<BPF_W, BPF_OR, "u32", "or", atomic_load_or_32>;
+ def XFXORW32 : XFALU32<BPF_W, BPF_XOR, "u32", "xor", atomic_load_xor_32>;
}
+ def XFADDD : XFALU64<BPF_DW, BPF_ADD, "u64", "add", atomic_load_add_64>;
+ def XFANDD : XFALU64<BPF_DW, BPF_AND, "u64", "and", atomic_load_and_64>;
+ def XFORD : XFALU64<BPF_DW, BPF_OR, "u64", "or", atomic_load_or_64>;
+ def XFXORD : XFALU64<BPF_DW, BPF_XOR, "u64", "xor", atomic_load_xor_64>;
+}
+
+// atomic_load_sub can be represented as a neg followed
+// by an atomic_load_add.
+def : Pat<(atomic_load_sub_32 ADDRri:$addr, GPR32:$val),
+ (XFADDW32 ADDRri:$addr, (NEG_32 GPR32:$val))>;
+def : Pat<(atomic_load_sub_64 ADDRri:$addr, GPR:$val),
+ (XFADDD ADDRri:$addr, (NEG_64 GPR:$val))>;
+
+// Atomic Exchange
+class XCHG<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+ (outs GPR:$dst),
+ (ins MEMri:$addr, GPR:$val),
+ "$dst = xchg_"#OpcodeStr#"($addr, $val)",
+ [(set GPR:$dst, (OpNode ADDRri:$addr,GPR:$val))]> {
+ bits<4> dst;
+ bits<20> addr;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{55-52} = dst;
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = BPF_XCHG.Value;
+ let Inst{3-0} = BPF_FETCH.Value;
+ let BPFClass = BPF_STX;
+}
+
+class XCHG32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+ (outs GPR32:$dst),
+ (ins MEMri:$addr, GPR32:$val),
+ "$dst = xchg32_"#OpcodeStr#"($addr, $val)",
+ [(set GPR32:$dst, (OpNode ADDRri:$addr,GPR32:$val))]> {
+ bits<4> dst;
+ bits<20> addr;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{55-52} = dst;
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = BPF_XCHG.Value;
+ let Inst{3-0} = BPF_FETCH.Value;
+ let BPFClass = BPF_STX;
+}
+
+let Constraints = "$dst = $val" in {
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
- def XADDW32 : XADD32<BPF_W, "u32", atomic_load_add_32>;
+ def XCHGW32 : XCHG32<BPF_W, "32", atomic_swap_32>;
}
- def XADDD : XADD<BPF_DW, "u64", atomic_load_add_64>;
+ def XCHGD : XCHG<BPF_DW, "64", atomic_swap_64>;
+}
+
+// Compare-And-Exchange
+class CMPXCHG<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+ (outs),
+ (ins MEMri:$addr, GPR:$new),
+ "r0 = cmpxchg_"#OpcodeStr#"($addr, r0, $new)",
+ [(set R0, (OpNode ADDRri:$addr, R0, GPR:$new))]> {
+ bits<4> new;
+ bits<20> addr;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{55-52} = new;
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = BPF_CMPXCHG.Value;
+ let Inst{3-0} = BPF_FETCH.Value;
+ let BPFClass = BPF_STX;
+}
+
+class CMPXCHG32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+ (outs),
+ (ins MEMri:$addr, GPR32:$new),
+ "w0 = cmpxchg32_"#OpcodeStr#"($addr, w0, $new)",
+ [(set W0, (OpNode ADDRri:$addr, W0, GPR32:$new))]> {
+ bits<4> new;
+ bits<20> addr;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{55-52} = new;
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = BPF_CMPXCHG.Value;
+ let Inst{3-0} = BPF_FETCH.Value;
+ let BPFClass = BPF_STX;
+}
+
+let Predicates = [BPFHasALU32], Defs = [W0], Uses = [W0],
+ DecoderNamespace = "BPFALU32" in {
+ def CMPXCHGW32 : CMPXCHG32<BPF_W, "32", atomic_cmp_swap_32>;
+}
+
+let Defs = [R0], Uses = [R0] in {
+ def CMPXCHGD : CMPXCHG<BPF_DW, "64", atomic_cmp_swap_64>;
}
// bswap16, bswap32, bswap64
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIChecking.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIChecking.cpp
index f82f166eda4d..4e24e3d911b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIChecking.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -41,7 +41,7 @@ private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
- void checkingIllegalXADD(void);
+ bool processAtomicInsts(void);
public:
@@ -49,7 +49,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override {
if (!skipFunction(MF.getFunction())) {
initialize(MF);
- checkingIllegalXADD();
+ return processAtomicInsts();
}
return false;
}
@@ -143,17 +143,15 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
return true;
// Otherwise, return true if any aliased SuperReg of GPR32 is not dead.
- std::vector<unsigned>::iterator search_begin = GPR64DeadDefs.begin();
- std::vector<unsigned>::iterator search_end = GPR64DeadDefs.end();
for (auto I : GPR32LiveDefs)
for (MCSuperRegIterator SR(I, TRI); SR.isValid(); ++SR)
- if (std::find(search_begin, search_end, *SR) == search_end)
- return true;
+ if (!llvm::is_contained(GPR64DeadDefs, *SR))
+ return true;
return false;
}
-void BPFMIPreEmitChecking::checkingIllegalXADD(void) {
+bool BPFMIPreEmitChecking::processAtomicInsts(void) {
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() != BPF::XADDW &&
@@ -174,7 +172,71 @@ void BPFMIPreEmitChecking::checkingIllegalXADD(void) {
}
}
- return;
+ // Check return values of atomic_fetch_and_{add,and,or,xor}.
+ // If the return is not used, the atomic_fetch_and_<op> instruction
+ // is replaced with atomic_<op> instruction.
+ MachineInstr *ToErase = nullptr;
+ bool Changed = false;
+ const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ if (MI.getOpcode() != BPF::XFADDW32 && MI.getOpcode() != BPF::XFADDD &&
+ MI.getOpcode() != BPF::XFANDW32 && MI.getOpcode() != BPF::XFANDD &&
+ MI.getOpcode() != BPF::XFXORW32 && MI.getOpcode() != BPF::XFXORD &&
+ MI.getOpcode() != BPF::XFORW32 && MI.getOpcode() != BPF::XFORD)
+ continue;
+
+ if (hasLiveDefs(MI, TRI))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Transforming "; MI.dump());
+ unsigned newOpcode;
+ switch (MI.getOpcode()) {
+ case BPF::XFADDW32:
+ newOpcode = BPF::XADDW32;
+ break;
+ case BPF::XFADDD:
+ newOpcode = BPF::XADDD;
+ break;
+ case BPF::XFANDW32:
+ newOpcode = BPF::XANDW32;
+ break;
+ case BPF::XFANDD:
+ newOpcode = BPF::XANDD;
+ break;
+ case BPF::XFXORW32:
+ newOpcode = BPF::XXORW32;
+ break;
+ case BPF::XFXORD:
+ newOpcode = BPF::XXORD;
+ break;
+ case BPF::XFORW32:
+ newOpcode = BPF::XORW32;
+ break;
+ case BPF::XFORD:
+ newOpcode = BPF::XORD;
+ break;
+ default:
+ llvm_unreachable("Incorrect Atomic Instruction Opcode");
+ }
+
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(newOpcode))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+
+ ToErase = &MI;
+ Changed = true;
+ }
+ }
+
+ return Changed;
}
} // end default namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index df870314fffe..354980e4bf3c 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -475,6 +475,9 @@ bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
if (MI.getOpcode() == BPF::SRL_ri &&
MI.getOperand(2).getImm() == 32) {
SrcReg = MI.getOperand(1).getReg();
+ if (!MRI->hasOneNonDBGUse(SrcReg))
+ continue;
+
MI2 = MRI->getVRegDef(SrcReg);
DstReg = MI.getOperand(0).getReg();
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
index c3cb7647aa79..0348e2200acb 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -33,58 +34,32 @@ using namespace llvm;
namespace {
-class BPFPreserveDIType final : public ModulePass {
- StringRef getPassName() const override {
- return "BPF Preserve DebugInfo Type";
- }
-
- bool runOnModule(Module &M) override;
-
-public:
- static char ID;
- BPFPreserveDIType() : ModulePass(ID) {}
-
-private:
- bool doTransformation(Module &M);
-};
-} // End anonymous namespace
-
-char BPFPreserveDIType::ID = 0;
-INITIALIZE_PASS(BPFPreserveDIType, DEBUG_TYPE, "preserve debuginfo type", false,
- false)
-
-ModulePass *llvm::createBPFPreserveDIType() { return new BPFPreserveDIType(); }
-
-bool BPFPreserveDIType::runOnModule(Module &M) {
+static bool BPFPreserveDITypeImpl(Function &F) {
LLVM_DEBUG(dbgs() << "********** preserve debuginfo type **********\n");
+ Module *M = F.getParent();
+
// Bail out if no debug info.
- if (M.debug_compile_units().empty())
+ if (M->debug_compile_units().empty())
return false;
- return doTransformation(M);
-}
-
-bool BPFPreserveDIType::doTransformation(Module &M) {
std::vector<CallInst *> PreserveDITypeCalls;
- for (auto &F : M) {
- for (auto &BB : F) {
- for (auto &I : BB) {
- auto *Call = dyn_cast<CallInst>(&I);
- if (!Call)
- continue;
-
- const auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
- if (!GV)
- continue;
-
- if (GV->getName().startswith("llvm.bpf.btf.type.id")) {
- if (!Call->getMetadata(LLVMContext::MD_preserve_access_index))
- report_fatal_error(
- "Missing metadata for llvm.bpf.btf.type.id intrinsic");
- PreserveDITypeCalls.push_back(Call);
- }
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ continue;
+
+ const auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
+ if (!GV)
+ continue;
+
+ if (GV->getName().startswith("llvm.bpf.btf.type.id")) {
+ if (!Call->getMetadata(LLVMContext::MD_preserve_access_index))
+ report_fatal_error(
+ "Missing metadata for llvm.bpf.btf.type.id intrinsic");
+ PreserveDITypeCalls.push_back(Call);
}
}
}
@@ -93,39 +68,81 @@ bool BPFPreserveDIType::doTransformation(Module &M) {
return false;
std::string BaseName = "llvm.btf_type_id.";
- int Count = 0;
+ static int Count = 0;
for (auto Call : PreserveDITypeCalls) {
- const ConstantInt *Flag = dyn_cast<ConstantInt>(Call->getArgOperand(2));
+ const ConstantInt *Flag = dyn_cast<ConstantInt>(Call->getArgOperand(1));
assert(Flag);
uint64_t FlagValue = Flag->getValue().getZExtValue();
if (FlagValue >= BPFCoreSharedInfo::MAX_BTF_TYPE_ID_FLAG)
report_fatal_error("Incorrect flag for llvm.bpf.btf.type.id intrinsic");
+ MDNode *MD = Call->getMetadata(LLVMContext::MD_preserve_access_index);
+
uint32_t Reloc;
- if (FlagValue == BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL_RELOC)
+ if (FlagValue == BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL_RELOC) {
Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL;
- else
+ } else {
Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE;
+ DIType *Ty = cast<DIType>(MD);
+ while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ unsigned Tag = DTy->getTag();
+ if (Tag != dwarf::DW_TAG_const_type &&
+ Tag != dwarf::DW_TAG_volatile_type)
+ break;
+ Ty = DTy->getBaseType();
+ }
+
+ if (Ty->getName().empty())
+ report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc");
+ MD = Ty;
+ }
BasicBlock *BB = Call->getParent();
- IntegerType *VarType = Type::getInt32Ty(BB->getContext());
+ IntegerType *VarType = Type::getInt64Ty(BB->getContext());
std::string GVName = BaseName + std::to_string(Count) + "$" +
std::to_string(Reloc);
- GlobalVariable *GV =
- new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage,
- NULL, GVName);
+ GlobalVariable *GV = new GlobalVariable(
+ *M, VarType, false, GlobalVariable::ExternalLinkage, NULL, GVName);
GV->addAttribute(BPFCoreSharedInfo::TypeIdAttr);
- MDNode *MD = Call->getMetadata(LLVMContext::MD_preserve_access_index);
GV->setMetadata(LLVMContext::MD_preserve_access_index, MD);
// Load the global variable which represents the type info.
- auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV, "",
- Call);
- Call->replaceAllUsesWith(LDInst);
+ auto *LDInst =
+ new LoadInst(Type::getInt64Ty(BB->getContext()), GV, "", Call);
+ Instruction *PassThroughInst =
+ BPFCoreSharedInfo::insertPassThrough(M, BB, LDInst, Call);
+ Call->replaceAllUsesWith(PassThroughInst);
Call->eraseFromParent();
Count++;
}
return true;
}
+
+class BPFPreserveDIType final : public FunctionPass {
+ bool runOnFunction(Function &F) override;
+
+public:
+ static char ID;
+ BPFPreserveDIType() : FunctionPass(ID) {}
+};
+} // End anonymous namespace
+
+char BPFPreserveDIType::ID = 0;
+INITIALIZE_PASS(BPFPreserveDIType, DEBUG_TYPE, "BPF Preserve Debuginfo Type",
+ false, false)
+
+FunctionPass *llvm::createBPFPreserveDIType() {
+ return new BPFPreserveDIType();
+}
+
+bool BPFPreserveDIType::runOnFunction(Function &F) {
+ return BPFPreserveDITypeImpl(F);
+}
+
+PreservedAnalyses BPFPreserveDITypePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return BPFPreserveDITypeImpl(F) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
index f3cb03b1f1f5..fac02e6476b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -29,7 +29,7 @@ BPFSubtarget &BPFSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
initSubtargetFeatures(CPU, FS);
- ParseSubtargetFeatures(CPU, FS);
+ ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
return *this;
}
@@ -59,6 +59,6 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : BPFGenSubtargetInfo(TT, CPU, FS), InstrInfo(),
+ : BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(),
FrameLowering(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h
index 3da6a026ab7e..7649e0e92222 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -67,7 +67,7 @@ public:
// ParseSubtargetFeatures - Parses features string setting specified
// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
bool getHasJmpExt() const { return HasJmpExt; }
bool getHasJmp32() const { return HasJmp32; }
bool getHasAlu32() const { return HasAlu32; }
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 54204ee197ec..a8fef2517b03 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -12,15 +12,22 @@
#include "BPFTargetMachine.h"
#include "BPF.h"
+#include "BPFTargetTransformInfo.h"
#include "MCTargetDesc/BPFMCAsmInfo.h"
#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
using namespace llvm;
static cl::
@@ -34,8 +41,10 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
RegisterTargetMachine<BPFTargetMachine> Z(getTheBPFTarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
- initializeBPFAbstractMemberAccessPass(PR);
+ initializeBPFAbstractMemberAccessLegacyPassPass(PR);
initializeBPFPreserveDITypePass(PR);
+ initializeBPFAdjustOptPass(PR);
+ initializeBPFCheckAndAdjustIRPass(PR);
initializeBPFMIPeepholePass(PR);
initializeBPFMIPeepholeTruncElimPass(PR);
}
@@ -49,9 +58,7 @@ static std::string computeDataLayout(const Triple &TT) {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::PIC_;
- return *RM;
+ return RM.getValueOr(Reloc::PIC_);
}
BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
@@ -94,14 +101,56 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
return new BPFPassConfig(*this, PM);
}
-void BPFPassConfig::addIRPasses() {
+void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+ Builder.addExtension(
+ PassManagerBuilder::EP_EarlyAsPossible,
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ PM.add(createBPFAbstractMemberAccess(this));
+ PM.add(createBPFPreserveDIType());
+ });
+
+ Builder.addExtension(
+ PassManagerBuilder::EP_Peephole,
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ PM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().hoistCommonInsts(true)));
+ });
+ Builder.addExtension(
+ PassManagerBuilder::EP_ModuleOptimizerEarly,
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ PM.add(createBPFAdjustOpt());
+ });
+}
- addPass(createBPFAbstractMemberAccess(&getBPFTargetMachine()));
- addPass(createBPFPreserveDIType());
+void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) {
+ PB.registerPipelineStartEPCallback(
+ [=](ModulePassManager &MPM, PassBuilder::OptimizationLevel) {
+ FunctionPassManager FPM(DebugPassManager);
+ FPM.addPass(BPFAbstractMemberAccessPass(this));
+ FPM.addPass(BPFPreserveDITypePass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ });
+ PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM,
+ PassBuilder::OptimizationLevel Level) {
+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
+ });
+ PB.registerPipelineEarlySimplificationEPCallback(
+ [=](ModulePassManager &MPM, PassBuilder::OptimizationLevel) {
+ MPM.addPass(BPFAdjustOptPass());
+ });
+}
+void BPFPassConfig::addIRPasses() {
+ addPass(createBPFCheckAndAdjustIR());
TargetPassConfig::addIRPasses();
}
+TargetTransformInfo
+BPFTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(BPFTTIImpl(this, F));
+}
+
// Install an instruction selector pass using
// the ISelDag to gen BPF code.
bool BPFPassConfig::addInstSelector() {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h
index beac7bd862da..61c8a44cc402 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h
@@ -34,9 +34,15 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+
+ void adjustPassManager(PassManagerBuilder &) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) override;
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
new file mode 100644
index 000000000000..62055497e685
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
@@ -0,0 +1,61 @@
+//===------ BPFTargetTransformInfo.h - BPF specific TTI ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file uses the target's specific information to
+// provide more precise answers to certain TTI queries, while letting the
+// target independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
+
+#include "BPFTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+
+namespace llvm {
+class BPFTTIImpl : public BasicTTIImplBase<BPFTTIImpl> {
+ typedef BasicTTIImplBase<BPFTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const BPFSubtarget *ST;
+ const BPFTargetLowering *TLI;
+
+ const BPFSubtarget *getST() const { return ST; }
+ const BPFTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit BPFTTIImpl(const BPFTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) {
+ if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+
+ return TTI::TCC_Basic;
+ }
+
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
+ const llvm::Instruction *I = nullptr) {
+ if (Opcode == Instruction::Select)
+ return SCEVCheapExpansionBudget;
+
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
+ I);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTF.def b/contrib/llvm-project/llvm/lib/Target/BPF/BTF.def
index 2d2e9a04aa6d..66cf2c90ead4 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTF.def
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTF.def
@@ -30,5 +30,6 @@ HANDLE_BTF_KIND(12, FUNC)
HANDLE_BTF_KIND(13, FUNC_PROTO)
HANDLE_BTF_KIND(14, VAR)
HANDLE_BTF_KIND(15, DATASEC)
+HANDLE_BTF_KIND(16, FLOAT)
#undef HANDLE_BTF_KIND
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
index 4510e9357489..9249d679c7bd 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/LineIterator.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
@@ -370,6 +371,21 @@ void BTFKindDataSec::emitType(MCStreamer &OS) {
}
}
+BTFTypeFloat::BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName)
+ : Name(TypeName) {
+ Kind = BTF::BTF_KIND_FLOAT;
+ BTFType.Info = Kind << 24;
+ BTFType.Size = roundupToBytes(SizeInBits);
+}
+
+void BTFTypeFloat::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
+ BTFType.NameOff = BDebug.addString(Name);
+}
+
uint32_t BTFStringTable::addString(StringRef S) {
// Check whether the string already exists.
for (auto &OffsetM : OffsetToIdMap) {
@@ -408,18 +424,28 @@ uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
}
void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) {
- // Only int types are supported in BTF.
+ // Only int and binary floating point types are supported in BTF.
uint32_t Encoding = BTy->getEncoding();
- if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed &&
- Encoding != dwarf::DW_ATE_signed_char &&
- Encoding != dwarf::DW_ATE_unsigned &&
- Encoding != dwarf::DW_ATE_unsigned_char)
+ std::unique_ptr<BTFTypeBase> TypeEntry;
+ switch (Encoding) {
+ case dwarf::DW_ATE_boolean:
+ case dwarf::DW_ATE_signed:
+ case dwarf::DW_ATE_signed_char:
+ case dwarf::DW_ATE_unsigned:
+ case dwarf::DW_ATE_unsigned_char:
+ // Create a BTF type instance for this DIBasicType and put it into
+ // DIToIdMap for cross-type reference check.
+ TypeEntry = std::make_unique<BTFTypeInt>(
+ Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
+ break;
+ case dwarf::DW_ATE_float:
+ TypeEntry =
+ std::make_unique<BTFTypeFloat>(BTy->getSizeInBits(), BTy->getName());
+ break;
+ default:
return;
+ }
- // Create a BTF type instance for this DIBasicType and put it into
- // DIToIdMap for cross-type reference check.
- auto TypeEntry = std::make_unique<BTFTypeInt>(
- Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
TypeId = addType(std::move(TypeEntry), BTy);
}
@@ -993,12 +1019,13 @@ void BTFDebug::generatePatchImmReloc(const MCSymbol *ORSym, uint32_t RootId,
FieldReloc.OffsetNameOff = addString(IndexPattern);
FieldReloc.RelocKind = std::stoull(std::string(RelocKindStr));
- PatchImms[GVar] = std::stoul(std::string(PatchImmStr));
+ PatchImms[GVar] = std::make_pair(std::stoll(std::string(PatchImmStr)),
+ FieldReloc.RelocKind);
} else {
StringRef RelocStr = AccessPattern.substr(FirstDollar + 1);
FieldReloc.OffsetNameOff = addString("0");
FieldReloc.RelocKind = std::stoull(std::string(RelocStr));
- PatchImms[GVar] = RootId;
+ PatchImms[GVar] = std::make_pair(RootId, FieldReloc.RelocKind);
}
FieldRelocTable[SecNameOff].push_back(FieldReloc);
}
@@ -1074,6 +1101,9 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
}
}
+ if (!CurMI) // no debug info
+ return;
+
// Skip this instruction if no DebugLoc or the DebugLoc
// is the same as the previous instruction.
const DebugLoc &DL = MI->getDebugLoc();
@@ -1125,6 +1155,20 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
if (ProcessingMapDef != SecName.startswith(".maps"))
continue;
+ // Create a .rodata datasec if the global variable is an initialized
+ // constant with private linkage and if it won't be in .rodata.str<#>
+ // and .rodata.cst<#> sections.
+ if (SecName == ".rodata" && Global.hasPrivateLinkage() &&
+ DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) {
+ SectionKind GVKind =
+ TargetLoweringObjectFile::getKindForGlobal(&Global, Asm->TM);
+ // skip .rodata.str<#> and .rodata.cst<#> sections
+ if (!GVKind.isMergeableCString() && !GVKind.isMergeableConst()) {
+ DataSecEntries[std::string(SecName)] =
+ std::make_unique<BTFKindDataSec>(Asm, std::string(SecName));
+ }
+ }
+
SmallVector<DIGlobalVariableExpression *, 1> GVs;
Global.getDebugInfo(GVs);
@@ -1152,6 +1196,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
if (Linkage != GlobalValue::InternalLinkage &&
Linkage != GlobalValue::ExternalLinkage &&
Linkage != GlobalValue::WeakAnyLinkage &&
+ Linkage != GlobalValue::WeakODRLinkage &&
Linkage != GlobalValue::ExternalWeakLinkage)
continue;
@@ -1180,8 +1225,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
const DataLayout &DL = Global.getParent()->getDataLayout();
uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
- DataSecEntries[std::string(SecName)]->addVar(VarId, Asm->getSymbol(&Global),
- Size);
+ DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId,
+ Asm->getSymbol(&Global), Size);
}
}
@@ -1194,14 +1239,23 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
auto *GVar = dyn_cast<GlobalVariable>(GVal);
if (GVar) {
// Emit "mov ri, <imm>"
- uint32_t Imm;
+ int64_t Imm;
+ uint32_t Reloc;
if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr) ||
- GVar->hasAttribute(BPFCoreSharedInfo::TypeIdAttr))
- Imm = PatchImms[GVar];
- else
+ GVar->hasAttribute(BPFCoreSharedInfo::TypeIdAttr)) {
+ Imm = PatchImms[GVar].first;
+ Reloc = PatchImms[GVar].second;
+ } else {
return false;
+ }
- OutMI.setOpcode(BPF::MOV_ri);
+ if (Reloc == BPFCoreSharedInfo::ENUM_VALUE_EXISTENCE ||
+ Reloc == BPFCoreSharedInfo::ENUM_VALUE ||
+ Reloc == BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL ||
+ Reloc == BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE)
+ OutMI.setOpcode(BPF::LD_imm64);
+ else
+ OutMI.setOpcode(BPF::MOV_ri);
OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
OutMI.addOperand(MCOperand::createImm(Imm));
return true;
@@ -1215,7 +1269,7 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
const GlobalValue *GVal = MO.getGlobal();
auto *GVar = dyn_cast<GlobalVariable>(GVal);
if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
- uint32_t Imm = PatchImms[GVar];
+ uint32_t Imm = PatchImms[GVar].first;
OutMI.setOpcode(MI->getOperand(1).getImm());
if (MI->getOperand(0).isImm())
OutMI.addOperand(MCOperand::createImm(MI->getOperand(0).getImm()));
@@ -1250,7 +1304,19 @@ void BTFDebug::processFuncPrototypes(const Function *F) {
uint8_t Scope = BTF::FUNC_EXTERN;
auto FuncTypeEntry =
std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
- addType(std::move(FuncTypeEntry));
+ uint32_t FuncId = addType(std::move(FuncTypeEntry));
+ if (F->hasSection()) {
+ StringRef SecName = F->getSection();
+
+ if (DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) {
+ DataSecEntries[std::string(SecName)] =
+ std::make_unique<BTFKindDataSec>(Asm, std::string(SecName));
+ }
+
+ // We really don't know func size, set it to 0.
+ DataSecEntries[std::string(SecName)]->addDataSecEntry(FuncId,
+ Asm->getSymbol(F), 0);
+ }
}
void BTFDebug::endModule() {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h
index 2f39f665299a..76f1901779bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h
@@ -16,7 +16,8 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/DebugHandlerBase.h"
-#include "llvm/CodeGen/MachineInstr.h"
+#include <cstdint>
+#include <map>
#include <set>
#include <unordered_map>
#include "BTF.h"
@@ -27,9 +28,12 @@ class AsmPrinter;
class BTFDebug;
class DIType;
class GlobalVariable;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class MCInst;
class MCStreamer;
class MCSymbol;
-class MachineFunction;
/// The base class for BTF type generation.
class BTFTypeBase {
@@ -183,7 +187,7 @@ public:
uint32_t getSize() override {
return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size();
}
- void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
+ void addDataSecEntry(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
Vars.push_back(std::make_tuple(Id, Sym, Size));
}
std::string getName() { return Name; }
@@ -191,6 +195,15 @@ public:
void emitType(MCStreamer &OS) override;
};
+/// Handle binary floating point type.
+class BTFTypeFloat : public BTFTypeBase {
+ StringRef Name;
+
+public:
+ BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName);
+ void completeType(BTFDebug &BDebug) override;
+};
+
/// String table.
class BTFStringTable {
/// String table size in bytes.
@@ -251,7 +264,7 @@ class BTFDebug : public DebugHandlerBase {
StringMap<std::vector<std::string>> FileContent;
std::map<std::string, std::unique_ptr<BTFKindDataSec>> DataSecEntries;
std::vector<BTFTypeStruct *> StructTypes;
- std::map<const GlobalVariable *, uint32_t> PatchImms;
+ std::map<const GlobalVariable *, std::pair<int64_t, uint32_t>> PatchImms;
std::map<StringRef, std::pair<bool, std::vector<BTFTypeDerived *>>>
FixupDerivedTypes;
std::set<const Function *>ProtoFunctions;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 4d98dc7341d0..3a1492743bf4 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -58,7 +58,7 @@ public:
BPF_MEM = 0x3,
BPF_LEN = 0x4,
BPF_MSH = 0x5,
- BPF_XADD = 0x6
+ BPF_ATOMIC = 0x6
};
BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
@@ -176,7 +176,7 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
uint8_t InstMode = getInstMode(Insn);
if ((InstClass == BPF_LDX || InstClass == BPF_STX) &&
getInstSize(Insn) != BPF_DW &&
- (InstMode == BPF_MEM || InstMode == BPF_XADD) &&
+ (InstMode == BPF_MEM || InstMode == BPF_ATOMIC) &&
STI.getFeatureBits()[BPF::ALU32])
Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address,
this, STI);
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 9d829ac45a10..29e9d5da0836 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -43,11 +43,6 @@ public:
unsigned getNumFixupKinds() const override { return 1; }
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- return false;
- }
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
index 2181bb575cdd..e76067ea41ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
@@ -32,6 +32,7 @@ public:
void printBrTargetOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index f9abe76c976b..12af92e0d198 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -13,6 +13,7 @@
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -158,12 +159,18 @@ void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
uint64_t BPFMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
+ // For CMPXCHG instructions, output is implicitly in R0/W0,
+ // so memory operand starts from operand 0.
+ int MemOpStartIndex = 1, Opcode = MI.getOpcode();
+ if (Opcode == BPF::CMPXCHGW32 || Opcode == BPF::CMPXCHGD)
+ MemOpStartIndex = 0;
+
uint64_t Encoding;
- const MCOperand Op1 = MI.getOperand(1);
+ const MCOperand Op1 = MI.getOperand(MemOpStartIndex);
assert(Op1.isReg() && "First operand is not register.");
Encoding = MRI.getEncodingValue(Op1.getReg());
Encoding <<= 16;
- MCOperand Op2 = MI.getOperand(2);
+ MCOperand Op2 = MI.getOperand(MemOpStartIndex + 1);
assert(Op2.isImm() && "Second operand is not immediate.");
Encoding |= Op2.getImm() & 0xffff;
return Encoding;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 58da0830d002..8fb7d7e89f09 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -46,7 +46,7 @@ static MCRegisterInfo *createBPFMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- return createBPFMCSubtargetInfoImpl(TT, CPU, FS);
+ return createBPFMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCStreamer *createBPFMCStreamer(const Triple &T, MCContext &Ctx,
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKY.td b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKY.td
new file mode 100644
index 000000000000..da6151befa1b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKY.td
@@ -0,0 +1,32 @@
+//===-- CSKY.td - Describe the CSKY Target Machine ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Registers, calling conventions, instruction descriptions.
+//===----------------------------------------------------------------------===//
+
+include "CSKYRegisterInfo.td"
+include "CSKYInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// CSKY processors supported.
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"generic-csky", NoSchedModel, []>;
+
+//===----------------------------------------------------------------------===//
+// Define the CSKY target.
+//===----------------------------------------------------------------------===//
+
+def CSKYInstrInfo : InstrInfo;
+
+def CSKY : Target {
+ let InstructionSet = CSKYInstrInfo;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrFormats.td
new file mode 100644
index 000000000000..86f9dd0b7da3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrFormats.td
@@ -0,0 +1,528 @@
+//===-- CSKYInstrFormats.td - CSKY Instruction Formats -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+class AddrMode<bits<5> val> {
+ bits<5> Value = val;
+}
+
+def AddrModeNone : AddrMode<0>;
+def AddrMode32B : AddrMode<1>; // ld32.b, ld32.bs, st32.b, st32.bs, +4kb
+def AddrMode32H : AddrMode<2>; // ld32.h, ld32.hs, st32.h, st32.hs, +8kb
+def AddrMode32WD : AddrMode<3>; // ld32.w, st32.w, ld32.d, st32.d, +16kb
+def AddrMode16B : AddrMode<4>; // ld16.b, +32b
+def AddrMode16H : AddrMode<5>; // ld16.h, +64b
+def AddrMode16W : AddrMode<6>; // ld16.w, +128b or +1kb
+def AddrMode32SDF : AddrMode<7>; // flds, fldd, +1kb
+
+class CSKYInst<AddrMode am, int sz, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : Instruction {
+ let Namespace = "CSKY";
+ int Size = sz;
+ AddrMode AM = am;
+
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = NoItinerary;
+ let TSFlags{4 - 0} = AM.Value;
+}
+
+class CSKYPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : CSKYInst<AddrModeNone, 0, outs, ins, asmstr, pattern> {
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+class CSKY32Inst<AddrMode am, bits<6> opcode, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : CSKYInst<am, 4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ let Inst{31 - 26} = opcode;
+}
+
+// CSKY 32-bit instruction
+// Format< OP[6] | Offset[26] >
+// Instruction(1): bsr32
+class J<bits<6> opcode, dag outs, dag ins, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, opcode, outs, ins, !strconcat(op, "\t$offset"),
+ pattern> {
+ bits<26> offset;
+ let Inst{25 - 0} = offset;
+}
+
+// Format< OP[6] | RZ[5] | SOP[3] | OFFSET[18] >
+// Instructions(7): grs, lrs32.b, lrs32.h, lrs32.w, srs32.b, srs32.h, srs32.w
+class I_18_Z_L<bits<3> sop, string op, Operand operand, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x33, (outs GPR:$rz), (ins operand:$offset),
+ !strconcat(op, "\t$rz, $offset"), pattern> {
+ bits<5> rz;
+ bits<18> offset;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 18} = sop;
+ let Inst{17 - 0} = offset;
+}
+
+// Format< OP[6] | RZ[5] | RX[5] | IMM[16] >
+// Instructions(1): ori32
+class I_16_ZX<string op, ImmLeaf ImmType, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x3b,
+ (outs GPR:$rz), (ins GPR:$rx,ImmType:$imm16),
+ !strconcat(op, "\t$rz, $rx, $imm16"), pattern> {
+ bits<5> rz;
+ bits<5> rx;
+ bits<16> imm16;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 0} = imm16;
+}
+
+// Format< OP[6] | SOP[5] | RZ[5] | IMM[16] >
+// Instructions(3): movi32, movih32, (bgeni32)
+class I_16_MOV<bits<5> sop, string op, ImmLeaf ImmType>
+ : CSKY32Inst<AddrModeNone, 0x3a, (outs GPR:$rz), (ins ImmType:$imm16),
+ !strconcat(op, "\t$rz, $imm16"),
+ [(set GPR:$rz, ImmType:$imm16)]> {
+ bits<5> rz;
+ bits<16> imm16;
+ let Inst{25 - 21} = sop;
+ let Inst{20 - 16} = rz;
+ let Inst{15 - 0} = imm16;
+ let isReMaterializable = 1;
+ let isAsCheapAsAMove = 1;
+ let isMoveImm = 1;
+}
+
+// Format< OP[6] | SOP[5] | RZ[5] | OFFSET[16] >
+// Instructions(1): lrw32
+class I_16_Z_L<bits<5> sop, string op, Operand operand, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x3a,
+ (outs GPR:$rz), (ins operand:$imm16),
+ !strconcat(op, "\t$rz, [$imm16]"), pattern> {
+ bits<5> rz;
+ bits<16> imm16;
+ let Inst{25 - 21} = sop;
+ let Inst{20 - 16} = rz;
+ let Inst{15 - 0} = imm16;
+}
+
+// Format< OP[6] | SOP[5] | 00000[5] | OFFSET[16] >
+// Instructions(5): bt32, bf32, br32, jmpi32, jsri32
+class I_16_L<bits<5> sop, dag outs, dag ins, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x3a, outs, ins, !strconcat(op, "\t$imm16"),
+ pattern> {
+ bits<16> imm16;
+ let Inst{25 - 21} = sop;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 0} = imm16;
+}
+
+// bt32, bf32, br32, jmpi32
+class I_16_L_B<bits<5> sop, string op, Operand operand, list<dag> pattern>
+ : I_16_L<sop, (outs), (ins operand:$imm16, CARRY:$ca), op, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+// Format< OP[6] | SOP[5] | RX[5] | 0000000000000000[16] >
+// Instructions(2): jmp32, jsr32
+class I_16_JX<bits<5> sop, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x3a, (outs), (ins GPR:$rx),
+ !strconcat(op, "\t$rx"), pattern> {
+ bits<5> rx;
+ bits<16> imm16;
+ let Inst{25 - 21} = sop;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 0} = 0;
+}
+
+// Format< OP[6] | SOP[5] | RX[5] | 00000000000000[14] | IMM[2] >
+// Instructions(1): jmpix32
+class I_16_J_XI<bits<5> sop, string op, Operand operand, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x3a, (outs),
+ (ins GPR:$rx, operand:$imm2),
+ !strconcat(op, "\t$rx, $imm2"), pattern> {
+ bits<5> rx;
+ bits<2> imm2;
+ let Inst{25 - 21} = sop;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 2} = 0;
+ let Inst{1 - 0} = imm2;
+}
+
+// Format< OP[6] | SOP[5] | PCODE[5] | 0000000000000000[16] >
+// Instructions(1): rts32
+class I_16_RET<bits<5> sop, bits<5> pcode, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x3a, (outs), (ins), op, pattern> {
+ let Inst{25 - 21} = sop;
+ let Inst{20 - 16} = pcode;
+ let Inst{15 - 0} = 0;
+ let isTerminator = 1;
+ let isReturn = 1;
+ let isBarrier = 1;
+}
+
+// Format< OP[6] | SOP[5] | RX[5] | IMM16[16] >
+// Instructions(3): cmpnei32, cmphsi32, cmplti32
+class I_16_X<bits<5> sop, string op>
+ : CSKY32Inst<AddrModeNone, 0x3a, (outs CARRY:$ca),
+ (ins GPR:$rx, i32imm:$imm16), !strconcat(op, "\t$rx, $imm16"), []> {
+ bits<16> imm16;
+ bits<5> rx;
+ let Inst{25 - 21} = sop;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 0} = imm16;
+ let isCompare = 1;
+}
+
+// Format< OP[6] | SOP[5] | RX[5] | OFFSET[16] >
+// Instructions(7): bez32, bnez32, bnezad32, bhz32, blsz32, blz32, bhsz32
+class I_16_X_L<bits<5> sop, string op, Operand operand>
+ : CSKY32Inst<AddrModeNone, 0x3a, (outs), (ins GPR:$rx, operand:$imm16),
+ !strconcat(op, "\t$rx, $imm16"), []> {
+ bits<5> rx;
+ bits<16> imm16;
+ let Inst{25 - 21} = sop;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 0} = imm16;
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+// Format< OP[6] | RZ[5] | RX[5] | SOP[4] | IMM[12] >
+// Instructions(5): addi32, subi32, andi32, andni32, xori32
+class I_12<bits<4> sop, string op, SDNode node, ImmLeaf ImmType>
+ : CSKY32Inst<AddrModeNone, 0x39, (outs GPR:$rz),
+ (ins GPR:$rx, ImmType:$imm12), !strconcat(op, "\t$rz, $rx, $imm12"),
+ [(set GPR:$rz, (node GPR:$rx, ImmType:$imm12))]> {
+ bits<5> rz;
+ bits<5> rx;
+ bits<12> imm12;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 12} = sop;
+ let Inst{11 - 0} = imm12;
+}
+
+class I_LDST<AddrMode am, bits<6> opcode, bits<4> sop, dag outs, dag ins,
+ string op, list<dag> pattern>
+ : CSKY32Inst<am, opcode, outs, ins, !strconcat(op, "\t$rz, ($rx, $imm12)"),
+ pattern> {
+ bits<5> rx;
+ bits<5> rz;
+ bits<12> imm12;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 12} = sop;
+ let Inst{11 - 0} = imm12;
+}
+
+// Format< OP[6] | RZ[5] | RX[5] | SOP[4] | OFFSET[12] >
+// Instructions(6): ld32.b, ld32.bs, ld32.h, ld32.hs, ld32.w, ld32.d
+class I_LD<AddrMode am, bits<4> sop, string op, Operand operand>
+ : I_LDST<am, 0x36, sop,
+ (outs GPR:$rz), (ins GPR:$rx, operand:$imm12), op, []>;
+
+// Format< OP[6] | RZ[5] | RX[5] | SOP[4] | OFFSET[12] >
+// Instructions(4): st32.b, st32.h, st32.w, st32.d
+class I_ST<AddrMode am, bits<4> sop, string op, Operand operand>
+ : I_LDST<am, 0x37, sop, (outs),
+ (ins GPR:$rz, GPR:$rx, operand:$imm12), op, []>;
+
+// Format< OP[6] | SOP[5] | PCODE[5] | 0000[4] | 000 | R28 | LIST2[3] | R15 |
+// LIST1[4] >
+// Instructions(2): push32, pop32
+class I_12_PP<bits<5> sop, bits<5> pcode, dag outs, dag ins, string op>
+ : CSKY32Inst<AddrModeNone, 0x3a, outs, ins, !strconcat(op, "\t$regs"), []> {
+ bits<12> regs;
+ let Inst{25 - 21} = sop;
+ let Inst{20 - 16} = pcode;
+ let Inst{15 - 12} = 0;
+ let Inst{11 - 0} = regs;
+}
+
+// Format< OP[6] | RZ[5] | RX[5] | SOP[6] | PCODE[5] | IMM[5]>
+// Instructions(4): incf32, inct32, decf32, dect32
+class I_5_ZX<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
+ list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz),
+ (ins GPR:$false, GPR:$rx, ImmType:$imm5),
+ !strconcat(op, "\t$rz, $rx, $imm5"), pattern> {
+ bits<5> rz;
+ bits<5> rx;
+ bits<5> imm5;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = imm5;
+ let Constraints = "$rz = $false";
+}
+
+// Format< OP[6] | IMM[5] | RX[5] | SOP[6] | PCODE[5] | RZ[5]>
+// Instructions(13): decgt32, declt32, decne32, lsli32, lslc32, lsri32
+// lsrc32, asri32, asrc32, rotli32, xsr32, bclri32, bseti32
+class I_5_XZ<bits<6> sop, bits<5> pcode, string op, dag ins, dag outs,
+ list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, ins, outs,
+ !strconcat(op, "\t$rz, $rx, $imm5"), pattern> {
+ bits<5> imm5;
+ bits<5> rx;
+ bits<5> rz;
+ let Inst{25 - 21} = imm5;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = rz;
+}
+
+// Format< OP[6] | RY[5] | RX[5] | SOP[6] | PCODE[5] | IMM[5]>
+// Instructions(2): ldm32, (ldq32), stm32, (stq32)
+class I_5_YX<bits<6> opcode, dag outs, dag ins, string op, list<dag> pattern,
+ bits<5> imm5>
+ : CSKY32Inst<AddrModeNone, opcode, outs, ins,
+ op #"\t${ry}, (${rx}), " #!cast<int>(imm5), pattern> {
+ bits<5> rx;
+ bits<5> ry;
+ let Inst{25 - 21} = ry; // ry
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = 0b000111;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = imm5{4 - 0}; // imm5
+}
+
+// Format< OP[6] | LSB[5] | RX[5] | SOP[6] | MSB[5] | RZ[5]>
+// Instructions(6): zext32, zextb32, zexth32, sext32, sextb32, sexth32
+class I_5_XZ_U<bits<6> sop, bits<5> lsb, bits<5> msb, dag outs, dag ins,
+ string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins,
+ op #"\t$rz, $rx, " #!cast<int>(msb) #", " #!cast<int>(lsb),
+ pattern> {
+ bits<5> rx;
+ bits<5> rz;
+ let Inst{25 - 21} = lsb; // lsb
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = msb; // msb
+ let Inst{4 - 0} = rz;
+}
+
+// sextb, sexth
+class I_5_XZ_US<bits<6> sop, bits<5> lsb, bits<5> msb, string op, SDNode opnode,
+ ValueType type> : I_5_XZ_U<sop, lsb, msb,
+ (outs GPR:$rz), (ins GPR:$rx),op, [(set GPR:$rz, (opnode GPR:$rx, type))]>;
+
+class I_5_XZ_UZ<bits<6> sop, bits<5> lsb, bits<5> msb, string op, int v>
+ : I_5_XZ_U<sop, lsb, msb, (outs GPR:$rz), (ins GPR:$rx), op,
+ [(set GPR:$rz, (and GPR:$rx, (i32 v)))]>;
+
+// Format< OP[6] | RZ[5] | RX[5] | SOP[6] | SIZE[5] | LSB[5]>
+// Instructions(1): ins32
+class I_5_ZX_U<bits<6> sop, string op, Operand operand, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz), (ins operand:$size_lsb),
+ !strconcat(op, "\t$rz, operand:$size_lsb"), pattern> {
+ bits<10> size_lsb;
+ bits<5> rz;
+ bits<5> rx;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = size_lsb{9 - 5}; // size
+ let Inst{4 - 0} = size_lsb{4 - 0}; // lsb
+}
+
+// Format< OP[6] | IMM[5] | RX[5] | SOP[6] | PCODE[5] | 00000 >
+// Instructions(1): btsti32
+class I_5_X<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
+ list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31,
+ (outs CARRY:$ca), (ins GPR:$rx, ImmType:$imm5),
+ !strconcat(op, "\t$rx, $imm5"), pattern> {
+ bits<5> imm5;
+ bits<5> rx;
+ let Inst{25 - 21} = imm5;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+ let isCompare = 1;
+}
+
+// Format< OP[6] | IMM[5] | 00000[5] | SOP[6] | PCODE[5] | RZ[5]>
+// Instructions(1): bmaski32
+class I_5_Z<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
+ list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz), (ins ImmType:$imm5),
+ !strconcat(op, "\t$rz, $imm5"), pattern> {
+ bits<5> imm5;
+ bits<5> rz;
+ let Inst{25 - 21} = imm5;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = rz;
+}
+
+// Format< OP[6] | RY[5] | RX[5] | SOP[6] | PCODE[5] | RZ[5] >
+// Instructions(24): addu32, addc32, subu32, subc32, (rsub32), ixh32, ixw32,
+// ixd32, and32, andn32, or32, xor32, nor32, lsl32, lsr32, asr32, rotl32
+// mult32, divu32, divs32, mul.(u/s)32, mula.32.l, mula.u32, mulall.s16.s
+class R_YXZ<bits<6> opcode, bits<6> sop, bits<5> pcode, dag outs, dag ins,
+ string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, opcode, outs, ins,
+ !strconcat(op, "\t$rz, $rx, $ry"), pattern> {
+ bits<5> ry;
+ bits<5> rx;
+ bits<5> rz;
+ let Inst{25 - 21} = ry;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = rz;
+}
+
+// R_YXZ instructions with simple pattern
+// Output: GPR:rz
+// Input: GPR:rx, GPR:ry
+// Asm string: op rz, rx, ry
+// Instructions: addu32, subu32, ixh32, ixw32, ixd32, and32, andn32, or32,
+// xor32, nor32, lsl32, lsr32, asr32, mult32, divu32, divs32
+class R_YXZ_SP_F1<bits<6> sop, bits<5> pcode, PatFrag opnode, string op,
+ bit Commutable = 0> : R_YXZ<0x31, sop, pcode, (outs GPR:$rz),
+ (ins GPR:$rx, GPR:$ry), op, [(set GPR:$rz, (opnode GPR:$rx, GPR:$ry))]> {
+ let isCommutable = Commutable;
+}
+
+// Format< OP[6] | RY[5] | RX[5] | SOP[6] | PCODE[5] | RZ[5] >
+// Instructions:(8) ldr32.b, ldr32.h, ldr32.bs, ldr32.hs, ldr32.w,
+// str32.b, str32.h, str32.w
+class R_YXZ_LDST<bits<6> opcode, bits<6> sop, bits<5> pcode, int no, dag outs,
+ dag ins, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, opcode, outs, ins,
+ op #"\t$rz, ($rx, $ry << " #no #")", pattern> {
+ bits<5> rx;
+ bits<5> ry;
+ bits<5> rz;
+ let Inst{25 - 21} = ry; // ry;
+ let Inst{20 - 16} = rx; // rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode; // pcode;
+ let Inst{4 - 0} = rz;
+}
+
+class I_LDR<bits<6> sop, bits<5> pcode, string op, int no>
+ : R_YXZ_LDST<0x34, sop, pcode, no,
+ (outs GPR:$rz), (ins GPR:$rx, GPR:$ry), op, []>;
+
+class I_STR<bits<6> sop, bits<5> pcode, string op, int no>
+ : R_YXZ_LDST<0x35, sop, pcode, no, (outs),
+ (ins GPR:$rz, GPR:$rx, GPR:$ry), op, []>;
+
+// Format< OP[6] | RX[5] | RX[5] | SOP[6] | PCODE[5] | RZ[5] >
+// Instructions:(1) not32
+class R_XXZ<bits<6> sop, bits<5> pcode, dag outs, dag ins, string op,
+ list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins, !strconcat(op, "\t$rz, $rx"),
+ pattern> {
+ bits<5> rx;
+ bits<5> rz;
+ let Inst{25 - 21} = rx;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = rz;
+}
+
+// Format< OP[6] | RY[5] | RX[5] | SOP[6] | PCODE[5] | 00000[5] >
+// Instructions:(4) cmpne32, cmphs32, cmplt32, tst32
+class R_YX<bits<6> sop, bits<5> pcode, string op>
+ : CSKY32Inst<AddrModeNone, 0x31, (outs CARRY:$ca),
+ (ins GPR:$rx, GPR:$ry),
+ !strconcat(op, "\t$rx, $ry"), []> {
+ bits<5> ry;
+ bits<5> rx;
+ let Inst{25 - 21} = ry;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+ let isCompare = 1;
+}
+
+// Format< OP[6] | 00000[5] | RX[5] | SOP[6] | PCODE[5] | RZ[5] >
+// Instructions:(12)
+// mov32, xtrb0.32, xtrb1.32, xtrb2.32, xtrb3.32, brev32, revb32
+// revh32, abs32, ff0.32, ff1.32, bgenr32
+class R_XZ<bits<6> sop, bits<5> pcode, string op>
+ : CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz), (ins GPR:$rx),
+ !strconcat(op, "\t$rz, $rx"), []> {
+ bits<5> rx;
+ bits<5> rz;
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = rz;
+}
+
+// Format< OP[6] | RZ[5] | RX[5] | SOP[6] | PCODE[5] | 00000[5] >
+// Instructions:(2) movf32, movt32
+class R_ZX<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz),
+ (ins CARRY:$ca, GPR:$rx, GPR:$false),
+ !strconcat(op, "\t$rz, $rx"), pattern> {
+ bits<5> rz;
+ bits<5> rx;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+ let Constraints = "$rz = $false";
+ let isSelect = 1;
+}
+
+// Format< OP[6] | 00000[5] | RX[5] | SOP[6] | PCODE[5] | 00000[5] >
+// Instructions:(1) tstnbz32
+class R_X<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, (outs CARRY:$ca),(ins GPR:$rx),
+ !strconcat(op, "\t$rx"), pattern> {
+ bits<5> rx;
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+}
+
+// Format< OP[6] | 00000[5] | 00000[5] | SOP[6] | PCODE[5] | RZ[5] >
+// Instructions:(2) mvc32, mvcv32
+class R_Z_1<bits<6> sop, bits<5> pcode, string op>
+ : CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz),
+ (ins CARRY:$ca), !strconcat(op, "\t$rz"), []> {
+ bits<5> rz;
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = rz;
+}
+
+// Format< OP[6] | RZ[5] | 00000[5] | SOP[6] | PCODE[5] | 00000[5] >
+// Instructions:(2) clrf32, clrt32
+class R_Z_2<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz),
+ (ins CARRY:$ca, GPR:$false), !strconcat(op, "\t$rz"), []> {
+ bits<5> rz;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+ let Constraints = "$rz = $false";
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.td
new file mode 100644
index 000000000000..7add217530e1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -0,0 +1,108 @@
+//===-- CSKYInstrInfo.td - Target Description for CSKY -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the CSKY instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "CSKYInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// CSKY specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+// TODO: Add CSKY specific DAG Nodes.
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+class oimm<int num> : Operand<i32>,
+ ImmLeaf<i32, "return isUInt<"#num#">(Imm - 1);"> {
+ let EncoderMethod = "getOImmOpValue";
+}
+
+class uimm<int num, int shift = 0> : Operand<i32>,
+ ImmLeaf<i32, "return isShiftedUInt<"#num#", "#shift#">(Imm);"> {
+ let EncoderMethod = "getImmOpValue<"#shift#">";
+}
+
+class simm<int num, int shift = 0> : Operand<i32>,
+ ImmLeaf<i32, "return isShiftedInt<"#num#", "#shift#">(Imm);"> {
+ let EncoderMethod = "getImmOpValue<"#shift#">";
+}
+
+def nimm_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+class nimm<int num> : Operand<i32>,
+ ImmLeaf<i32, "return isUInt<"#num#">(~Imm);", nimm_XFORM> {
+}
+
+
+def oimm12 : oimm<12>;
+
+def nimm12 : nimm<12>;
+
+def uimm5 : uimm<5>;
+def uimm12 : uimm<12>;
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions.
+//===----------------------------------------------------------------------===//
+
+class TriOpFrag<dag res> : PatFrag<(ops node: $LHS, node:$MHS, node:$RHS), res>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
+
+def ADDI32 : I_12<0x0, "addi32", add, oimm12>;
+def SUBI32 : I_12<0x1, "subi32", sub, oimm12>;
+def ANDI32 : I_12<0x2, "andi32", and, uimm12>;
+def ANDNI32 : I_12<0x3, "andni32", and, nimm12>;
+def XORI32 : I_12<0x4, "xori32", xor, uimm12>;
+def LSLI32 : I_5_XZ<0x12, 0x1, "lsli32",
+ (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5),
+ [(set GPR:$rz, (shl GPR:$rx, uimm5:$imm5))]>;
+def LSRI32 : I_5_XZ<0x12, 0x2, "lsri32",
+ (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5),
+ [(set GPR:$rz, (srl GPR:$rx, uimm5:$imm5))]>;
+def ASRI32 : I_5_XZ<0x12, 0x4, "asri32",
+ (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5),
+ [(set GPR:$rz, (sra GPR:$rx, uimm5:$imm5))]>;
+
+
+
+def ADDU32 : R_YXZ_SP_F1<0x0, 0x1,
+ BinOpFrag<(add node:$LHS, node:$RHS)>, "addu32", 1>;
+def SUBU32 : R_YXZ_SP_F1<0x0, 0x4,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>, "subu32">;
+def AND32 : R_YXZ_SP_F1<0x8, 0x1,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, "and32", 1>;
+def ANDN32 : R_YXZ_SP_F1<0x8, 0x2,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>, "andn32">;
+def OR32: R_YXZ_SP_F1<0x9, 0x1,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, "or32", 1>;
+def XOR32 : R_YXZ_SP_F1<0x9, 0x2,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, "xor32", 1>;
+def NOR32 : R_YXZ_SP_F1<0x9, 0x4,
+ BinOpFrag<(not (or node:$LHS, node:$RHS))>, "nor32", 1>;
+def LSL32 : R_YXZ_SP_F1<0x10, 0x1,
+ BinOpFrag<(shl node:$LHS, node:$RHS)>, "lsl32">;
+def LSR32 : R_YXZ_SP_F1<0x10, 0x2,
+ BinOpFrag<(srl node:$LHS, node:$RHS)>, "lsr32">;
+def ASR32 : R_YXZ_SP_F1<0x10, 0x4,
+ BinOpFrag<(sra node:$LHS, node:$RHS)>, "asr32">;
+def MULT32 : R_YXZ_SP_F1<0x21, 0x1,
+ BinOpFrag<(mul node:$LHS, node:$RHS)>, "mult32", 1>;
+def DIVS32 : R_YXZ_SP_F1<0x20, 0x2,
+ BinOpFrag<(sdiv node:$LHS, node:$RHS)>, "divs32">;
+def DIVU32 : R_YXZ_SP_F1<0x20, 0x1,
+ BinOpFrag<(udiv node:$LHS, node:$RHS)>, "divu32">;
+
+def NOT32 : R_XXZ<0b001001, 0b00100, (outs GPR:$rz), (ins GPR:$rx),
+ "not", [(set GPR:$rz, (not GPR:$rx))]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
new file mode 100644
index 000000000000..aef4589a67f2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
@@ -0,0 +1,182 @@
+//===-- CSKYRegisterInfo.td - CSKY Register defs -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the CSKY registers.
+//===----------------------------------------------------------------------===//
+
+let Namespace = "CSKY" in {
+ class CSKYReg<bits<6> Enc, string n, list<string> alt = []> : Register<n> {
+ let HWEncoding{5 - 0} = Enc;
+ let AltNames = alt;
+ }
+
+ class CSKYFReg32<bits<5> Enc, string n, list<string> alt = []> : Register<n> {
+ let HWEncoding{4 - 0} = Enc;
+ let AltNames = alt;
+ }
+
+ // Because CSKYFReg64 register have AsmName and AltNames that alias with their
+ // 32-bit sub-register, CSKYAsmParser will need to coerce a register number
+ // from a CSKYFReg32 to the equivalent CSKYFReg64 when appropriate.
+ def sub32_0 : SubRegIndex<32, 0>;
+ def sub32_32 : SubRegIndex<32, 32>;
+ def sub64_0 : SubRegIndex<64, 0>;
+ def sub64_64 : SubRegIndex<64,64>;
+
+ class CSKYFReg64<CSKYFReg32 subreg> : Register<""> {
+ let HWEncoding{4 - 0} = subreg.HWEncoding{4 - 0};
+ let SubRegs = [subreg];
+ let SubRegIndices = [sub32_0];
+ let AsmName = subreg.AsmName;
+ let AltNames = subreg.AltNames;
+ }
+
+ class CSKYFReg128<CSKYFReg64 subreg> : Register<""> {
+ let HWEncoding{4 - 0} = subreg.HWEncoding{4 - 0};
+ let SubRegs = [subreg];
+ let SubRegIndices = [sub64_0];
+ let AsmName = subreg.AsmName;
+ let AltNames = subreg.AltNames;
+ }
+
+ def ABIRegAltName : RegAltNameIndex;
+} // Namespace = "CSKY"
+
+let RegAltNameIndices = [ABIRegAltName] in {
+ def R0 : CSKYReg<0, "r0", ["a0"]>, DwarfRegNum<[0]>;
+ def R1 : CSKYReg<1, "r1", ["a1"]>, DwarfRegNum<[1]>;
+ def R2 : CSKYReg<2, "r2", ["a2"]>, DwarfRegNum<[2]>;
+ def R3 : CSKYReg<3, "r3", ["a3"]>, DwarfRegNum<[3]>;
+ def R4 : CSKYReg<4, "r4", ["l0"]>, DwarfRegNum<[4]>;
+ def R5 : CSKYReg<5, "r5", ["l1"]>, DwarfRegNum<[5]>;
+ def R6 : CSKYReg<6, "r6", ["l2"]>, DwarfRegNum<[6]>;
+ def R7 : CSKYReg<7, "r7", ["l3"]>, DwarfRegNum<[7]>;
+ def R8 : CSKYReg<8, "r8", ["l4"]>, DwarfRegNum<[8]>;
+ def R9 : CSKYReg<9, "r9", ["l5"]>, DwarfRegNum<[9]>;
+ def R10 : CSKYReg<10, "r10", ["l6"]>, DwarfRegNum<[10]>;
+ def R11 : CSKYReg<11, "r11", ["l7"]>, DwarfRegNum<[11]>;
+ def R12 : CSKYReg<12, "r12", ["t0"]>, DwarfRegNum<[12]>;
+ def R13 : CSKYReg<13, "r13", ["t1"]>, DwarfRegNum<[13]>;
+ def R14 : CSKYReg<14, "r14", ["sp"]>, DwarfRegNum<[14]>;
+ def R15 : CSKYReg<15, "r15", ["lr"]>, DwarfRegNum<[15]>;
+ def R16 : CSKYReg<16, "r16", ["l8"]>, DwarfRegNum<[16]>;
+ def R17 : CSKYReg<17, "r17", ["l9"]>, DwarfRegNum<[17]>;
+ def R18 : CSKYReg<18, "r18", ["t2"]>, DwarfRegNum<[18]>;
+ def R19 : CSKYReg<19, "r19", ["t3"]>, DwarfRegNum<[19]>;
+ def R20 : CSKYReg<20, "r20", ["t4"]>, DwarfRegNum<[20]>;
+ def R21 : CSKYReg<21, "r21", ["t5"]>, DwarfRegNum<[21]>;
+ def R22 : CSKYReg<22, "r22", ["t6"]>, DwarfRegNum<[22]>;
+ def R23 : CSKYReg<23, "r23", ["t7"]>, DwarfRegNum<[23]>;
+ def R24 : CSKYReg<24, "r24", ["t8"]>, DwarfRegNum<[24]>;
+ def R25 : CSKYReg<25, "r25", ["t9"]>, DwarfRegNum<[25]>;
+ def R26 : CSKYReg<26, "r26", ["r26"]>, DwarfRegNum<[26]>;
+ def R27 : CSKYReg<27, "r27", ["r27"]>, DwarfRegNum<[27]>;
+ def R28 : CSKYReg<28, "r28", ["rgb"]>, DwarfRegNum<[28]>;
+ def R29 : CSKYReg<29, "r29", ["rtb"]>, DwarfRegNum<[29]>;
+ def R30 : CSKYReg<30, "r30", ["svbr"]>, DwarfRegNum<[30]>;
+ def R31 : CSKYReg<31, "r31", ["tls"]>, DwarfRegNum<[31]>;
+ def C : CSKYReg<32, "cr0", ["psr"]>;
+
+}
+
+def GPRTuple : RegisterTuples<
+ [sub32_0, sub32_32],
+ [(add (sequence "R%u", 0, 30)), (add (sequence "R%u", 1, 31))],
+ [ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30"
+ ]>;
+
+// Floating point registers
+let RegAltNameIndices = [ABIRegAltName] in {
+ def F0_32 : CSKYFReg32<0, "fr0", ["vr0"]>, DwarfRegNum<[32]>;
+ def F1_32 : CSKYFReg32<1, "fr1", ["vr1"]>, DwarfRegNum<[33]>;
+ def F2_32 : CSKYFReg32<2, "fr2", ["vr2"]>, DwarfRegNum<[34]>;
+ def F3_32 : CSKYFReg32<3, "fr3", ["vr3"]>, DwarfRegNum<[35]>;
+ def F4_32 : CSKYFReg32<4, "fr4", ["vr4"]>, DwarfRegNum<[36]>;
+ def F5_32 : CSKYFReg32<5, "fr5", ["vr5"]>, DwarfRegNum<[37]>;
+ def F6_32 : CSKYFReg32<6, "fr6", ["vr6"]>, DwarfRegNum<[38]>;
+ def F7_32 : CSKYFReg32<7, "fr7", ["vr7"]>, DwarfRegNum<[39]>;
+ def F8_32 : CSKYFReg32<8, "fr8", ["vr8"]>, DwarfRegNum<[40]>;
+ def F9_32 : CSKYFReg32<9, "fr9", ["vr9"]>, DwarfRegNum<[41]>;
+ def F10_32 : CSKYFReg32<10, "fr10", ["vr10"]>, DwarfRegNum<[42]>;
+ def F11_32 : CSKYFReg32<11, "fr11", ["vr11"]>, DwarfRegNum<[43]>;
+ def F12_32 : CSKYFReg32<12, "fr12", ["vr12"]>, DwarfRegNum<[44]>;
+ def F13_32 : CSKYFReg32<13, "fr13", ["vr13"]>, DwarfRegNum<[45]>;
+ def F14_32 : CSKYFReg32<14, "fr14", ["vr14"]>, DwarfRegNum<[46]>;
+ def F15_32 : CSKYFReg32<15, "fr15", ["vr15"]>, DwarfRegNum<[47]>;
+ def F16_32 : CSKYFReg32<16, "fr16", ["vr16"]>, DwarfRegNum<[48]>;
+ def F17_32 : CSKYFReg32<17, "fr17", ["vr17"]>, DwarfRegNum<[49]>;
+ def F18_32 : CSKYFReg32<18, "fr18", ["vr18"]>, DwarfRegNum<[50]>;
+ def F19_32 : CSKYFReg32<19, "fr19", ["vr19"]>, DwarfRegNum<[51]>;
+ def F20_32 : CSKYFReg32<20, "fr20", ["vr20"]>, DwarfRegNum<[52]>;
+ def F21_32 : CSKYFReg32<21, "fr21", ["vr21"]>, DwarfRegNum<[53]>;
+ def F22_32 : CSKYFReg32<22, "fr22", ["vr22"]>, DwarfRegNum<[54]>;
+ def F23_32 : CSKYFReg32<23, "fr23", ["vr23"]>, DwarfRegNum<[55]>;
+ def F24_32 : CSKYFReg32<24, "fr24", ["vr24"]>, DwarfRegNum<[56]>;
+ def F25_32 : CSKYFReg32<25, "fr25", ["vr25"]>, DwarfRegNum<[57]>;
+ def F26_32 : CSKYFReg32<26, "fr26", ["vr26"]>, DwarfRegNum<[58]>;
+ def F27_32 : CSKYFReg32<27, "fr27", ["vr27"]>, DwarfRegNum<[59]>;
+ def F28_32 : CSKYFReg32<28, "fr28", ["vr28"]>, DwarfRegNum<[60]>;
+ def F29_32 : CSKYFReg32<29, "fr29", ["vr29"]>, DwarfRegNum<[61]>;
+ def F30_32 : CSKYFReg32<30, "fr30", ["vr30"]>, DwarfRegNum<[62]>;
+ def F31_32 : CSKYFReg32<31, "fr31", ["vr31"]>, DwarfRegNum<[63]>;
+
+ foreach Index = 0 - 31 in {
+ def F#Index#_64 : CSKYFReg64<!cast<CSKYFReg32>("F"#Index#"_32")>,
+ DwarfRegNum<[!add(Index, 32)]>;
+
+ def F#Index#_128 : CSKYFReg128<!cast<CSKYFReg64>("F"#Index#"_64")>,
+ DwarfRegNum<[!add(Index, 32)]>;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the CSKY register class.
+//===----------------------------------------------------------------------===//
+
+// The order of registers represents the preferred allocation sequence.
+// Registers are listed in the order caller-save, callee-save, specials.
+def GPR : RegisterClass<"CSKY", [i32], 32,
+ (add (sequence "R%u", 0, 3), (sequence "R%u", 12, 13),
+ (sequence "R%u", 18, 25), R15, (sequence "R%u", 4, 11),
+ (sequence "R%u", 16, 17), (sequence "R%u", 26, 27), R28,
+ (sequence "R%u", 29, 30), R14, R31)> {
+ let Size = 32;
+}
+
+def GPRPair : RegisterClass<"CSKY", [untyped], 32, (add GPRTuple)> {
+ let Size = 64;
+}
+
+def CARRY : RegisterClass<"CSKY", [i32], 32, (add C)> {
+ let Size = 32;
+ let CopyCost = -1;
+}
+
+// The order of registers represents the preferred allocation sequence.
+// Registers are listed in the order caller-save, callee-save, specials.
+def FPR32 : RegisterClass<"CSKY", [f32], 32,
+ (add (sequence "F%u_32", 0, 31))>;
+def sFPR32 : RegisterClass<"CSKY", [f32], 32,
+ (add (sequence "F%u_32", 0, 15))>;
+
+def FPR64 : RegisterClass<"CSKY", [f64], 64,
+ (add (sequence "F%u_64", 0, 31))>;
+def sFPR64 : RegisterClass<"CSKY", [f64], 64,
+ (add (sequence "F%u_64", 0, 15))>;
+
+def FPR128 : RegisterClass<"CSKY",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
+ (add (sequence "F%u_128", 0, 31))>;
+def sFPR128 : RegisterClass<"CSKY",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
+ (add (sequence "F%u_128", 0, 15))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
new file mode 100644
index 000000000000..1c13796e84b6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
@@ -0,0 +1,68 @@
+//===--- CSKYTargetMachine.cpp - Define TargetMachine for CSKY ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about CSKY target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYTargetMachine.h"
+#include "TargetInfo/CSKYTargetInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTarget() {
+ RegisterTargetMachine<CSKYTargetMachine> X(getTheCSKYTarget());
+}
+
+static std::string computeDataLayout(const Triple &TT) {
+ std::string Ret;
+
+ // Only support little endian for now.
+ // TODO: Add support for big endian.
+ Ret += "e";
+
+ // CSKY is always 32-bit target with the CSKYv2 ABI as prefer now.
+ // It's a 4-byte aligned stack with ELF mangling only.
+ Ret += "-m:e-S32-p:32:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:32"
+ "-v128:32:32-a:0:32-Fi32-n32";
+
+ return Ret;
+}
+
+CSKYTargetMachine::CSKYTargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT)
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
+ RM.getValueOr(Reloc::Static),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
+ TLOF(std::make_unique<TargetLoweringObjectFileELF>()) {
+ initAsmInfo();
+}
+
+namespace {
+class CSKYPassConfig : public TargetPassConfig {
+public:
+ CSKYPassConfig(CSKYTargetMachine &TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ CSKYTargetMachine &getCSKYTargetMachine() const {
+ return getTM<CSKYTargetMachine>();
+ }
+};
+
+} // namespace
+
+TargetPassConfig *CSKYTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new CSKYPassConfig(*this, PM);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.h
new file mode 100644
index 000000000000..d50e3877b550
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.h
@@ -0,0 +1,38 @@
+//===--- CSKYTargetMachine.h - Define TargetMachine for CSKY ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CSKY specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYTARGETMACHINE_H
+#define LLVM_LIB_TARGET_CSKY_CSKYTARGETMACHINE_H
+
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class CSKYTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+
+public:
+ CSKYTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT);
+
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
new file mode 100644
index 000000000000..e30123d64755
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
@@ -0,0 +1,69 @@
+//===-- CSKYAsmBackend.cpp - CSKY Assembler Backend -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYAsmBackend.h"
+#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "csky-asmbackend"
+
+using namespace llvm;
+
+std::unique_ptr<MCObjectTargetWriter>
+CSKYAsmBackend::createObjectTargetWriter() const {
+ return createCSKYELFObjectWriter();
+}
+
+unsigned int CSKYAsmBackend::getNumFixupKinds() const { return 1; }
+
+void CSKYAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsResolved,
+ const MCSubtargetInfo *STI) const {
+ return;
+}
+
+bool CSKYAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ return false;
+}
+
+void CSKYAsmBackend::relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
+ llvm_unreachable("CSKYAsmBackend::relaxInstruction() unimplemented");
+}
+
+bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+ if (Count % 2)
+ return false;
+
+ // MOV32 r0, r0
+ while (Count >= 4) {
+ OS.write("\xc4\x00\x48\x20", 4);
+ Count -= 4;
+ }
+ // MOV16 r0, r0
+ if (Count)
+ OS.write("\x6c\x03", 2);
+
+ return true;
+}
+
+MCAsmBackend *llvm::createCSKYAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options) {
+ return new CSKYAsmBackend(STI, Options);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
new file mode 100644
index 000000000000..b4cba4264e03
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
@@ -0,0 +1,39 @@
+//===-- CSKYAsmBackend.h - CSKY Assembler Backend -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYASMBACKEND_H
+#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYASMBACKEND_H
+
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCTargetOptions.h"
+
+namespace llvm {
+
+class CSKYAsmBackend : public MCAsmBackend {
+
+public:
+ CSKYAsmBackend(const MCSubtargetInfo &STI, const MCTargetOptions &OP)
+ : MCAsmBackend(support::little) {}
+
+ unsigned int getNumFixupKinds() const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const override;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override;
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override;
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYASMBACKEND_H
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
new file mode 100644
index 000000000000..163632632290
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
@@ -0,0 +1,45 @@
+//===-- CSKYELFObjectWriter.cpp - CSKY ELF Writer -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+
+#define DEBUG_TYPE "csky-elf-object-writer"
+
+using namespace llvm;
+
+namespace {
+
+class CSKYELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ CSKYELFObjectWriter(uint8_t OSABI = 0)
+ : MCELFObjectTargetWriter(false, OSABI, ELF::EM_CSKY, true){};
+ ~CSKYELFObjectWriter() {}
+
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
+};
+
+} // namespace
+
+unsigned CSKYELFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ // Determine the type of the relocation.
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("invalid fixup kind!");
+ }
+}
+
+std::unique_ptr<MCObjectTargetWriter> llvm::createCSKYELFObjectWriter() {
+ return std::make_unique<CSKYELFObjectWriter>();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp
new file mode 100644
index 000000000000..668247bbbd87
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp
@@ -0,0 +1,25 @@
+//===-- CSKYMCAsmInfo.cpp - CSKY Asm properties ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the CSKYMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYMCAsmInfo.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+void CSKYMCAsmInfo::anchor() {}
+
+CSKYMCAsmInfo::CSKYMCAsmInfo(const Triple &TargetTriple) {
+ AlignmentIsInBytes = false;
+ SupportsDebugInformation = true;
+ CommentString = "#";
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h
new file mode 100644
index 000000000000..3e0609f19531
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h
@@ -0,0 +1,29 @@
+//===-- CSKYMCAsmInfo.h - CSKY Asm Info ------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the CSKYMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCASMINFO_H
+#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+class Triple;
+
+class CSKYMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit CSKYMCAsmInfo(const Triple &TargetTriple);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCASMINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
new file mode 100644
index 000000000000..ed2b0e77b81a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
@@ -0,0 +1,71 @@
+//===-- CSKYMCCodeEmitter.cpp - CSKY Code Emitter interface ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CSKYMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYMCCodeEmitter.h"
+#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/EndianStream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-mccode-emitter"
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+unsigned CSKYMCCodeEmitter::getOImmOpValue(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Idx);
+ assert(MO.isImm() && "Unexpected MO type.");
+ return MO.getImm() - 1;
+}
+
+void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCInstrDesc &Desc = MII.get(MI.getOpcode());
+ unsigned Size = Desc.getSize();
+ uint32_t Bin = getBinaryCodeForInstr(MI, Fixups, STI);
+
+ uint16_t LO16 = static_cast<uint16_t>(Bin);
+ uint16_t HI16 = static_cast<uint16_t>(Bin >> 16);
+
+ if (Size == 4)
+ support::endian::write<uint16_t>(OS, HI16, support::little);
+
+ support::endian::write<uint16_t>(OS, LO16, support::little);
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+unsigned
+CSKYMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg())
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ llvm_unreachable("Unhandled expression!");
+ return 0;
+}
+
+MCCodeEmitter *llvm::createCSKYMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new CSKYMCCodeEmitter(Ctx, MCII);
+}
+
+#include "CSKYGenMCCodeEmitter.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
new file mode 100644
index 000000000000..c850a4bab745
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
@@ -0,0 +1,61 @@
+//===-- CSKYMCCodeEmitter.cpp - CSKY Code Emitter interface ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CSKYMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H
+#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+
+namespace llvm {
+
+class CSKYMCCodeEmitter : public MCCodeEmitter {
+ MCContext &Ctx;
+ const MCInstrInfo &MII;
+
+public:
+ CSKYMCCodeEmitter(MCContext &Ctx, const MCInstrInfo &MII)
+ : Ctx(Ctx), MII(MII) {}
+
+ ~CSKYMCCodeEmitter() {}
+
+ void encodeInstruction(const MCInst &Inst, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ // Generated by tablegen.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // Default encoding method used by tablegen.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ template <int shift = 0>
+ unsigned getImmOpValue(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Idx);
+ assert(MO.isImm() && "Unexpected MO type.");
+ return (MO.getImm() >> shift);
+ }
+
+ unsigned getOImmOpValue(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
new file mode 100644
index 000000000000..876000a37004
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
@@ -0,0 +1,62 @@
+//===-- CSKYMCTargetDesc.cpp - CSKY Target Descriptions -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file provides CSKY specific target descriptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "CSKYMCTargetDesc.h"
+#include "CSKYAsmBackend.h"
+#include "CSKYMCAsmInfo.h"
+#include "CSKYMCCodeEmitter.h"
+#include "TargetInfo/CSKYTargetInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "CSKYGenInstrInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "CSKYGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCAsmInfo *createCSKYMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TT,
+ const MCTargetOptions &Options) {
+ MCAsmInfo *MAI = new CSKYMCAsmInfo(TT);
+
+ // Initial state of the frame pointer is SP.
+ unsigned Reg = MRI.getDwarfRegNum(CSKY::R14, true);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, Reg, 0);
+ MAI->addInitialFrameState(Inst);
+ return MAI;
+}
+
+static MCInstrInfo *createCSKYMCInstrInfo() {
+ MCInstrInfo *Info = new MCInstrInfo();
+ InitCSKYMCInstrInfo(Info);
+ return Info;
+}
+
+static MCRegisterInfo *createCSKYMCRegisterInfo(const Triple &TT) {
+ MCRegisterInfo *Info = new MCRegisterInfo();
+ InitCSKYMCRegisterInfo(Info, CSKY::R15);
+ return Info;
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTargetMC() {
+ auto &CSKYTarget = getTheCSKYTarget();
+ TargetRegistry::RegisterMCAsmBackend(CSKYTarget, createCSKYAsmBackend);
+ TargetRegistry::RegisterMCAsmInfo(CSKYTarget, createCSKYMCAsmInfo);
+ TargetRegistry::RegisterMCInstrInfo(CSKYTarget, createCSKYMCInstrInfo);
+ TargetRegistry::RegisterMCRegInfo(CSKYTarget, createCSKYMCRegisterInfo);
+ TargetRegistry::RegisterMCCodeEmitter(CSKYTarget, createCSKYMCCodeEmitter);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
new file mode 100644
index 000000000000..da8a3b63a2f9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
@@ -0,0 +1,48 @@
+//===-- CSKYMCTargetDesc.h - CSKY Target Descriptions -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides CSKY specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCTARGETDESC_H
+#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCTARGETDESC_H
+
+#include "llvm/MC/MCTargetOptions.h"
+#include <memory>
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+class MCObjectTargetWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class Triple;
+
+std::unique_ptr<MCObjectTargetWriter> createCSKYELFObjectWriter();
+
+MCAsmBackend *createCSKYAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options);
+
+MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+} // namespace llvm
+
+#define GET_REGINFO_ENUM
+#include "CSKYGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#include "CSKYGenInstrInfo.inc"
+
+#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCTARGETDESC_H
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp
new file mode 100644
index 000000000000..1af2e672ff42
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- CSKYTargetInfo.cpp - CSKY Target Implementation -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TargetInfo/CSKYTargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target &llvm::getTheCSKYTarget() {
+ static Target TheCSKYTarget;
+ return TheCSKYTarget;
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTargetInfo() {
+ RegisterTarget<Triple::csky> X(getTheCSKYTarget(), "csky", "C-SKY", "CSKY");
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.h b/contrib/llvm-project/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.h
new file mode 100644
index 000000000000..c317c5401f03
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.h
@@ -0,0 +1,20 @@
+//===-- CSKYTargetInfo.cpp - CSKY Target Implementation -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_TARGETINFO_CSKYTARGETINFO_H
+#define LLVM_LIB_TARGET_CSKY_TARGETINFO_CSKYTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheCSKYTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_TARGETINFO_CSKYTARGETINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 1e7862c36ea0..b6763fd9aef0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -641,7 +641,7 @@ bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
return finishBundle(IDLoc, Out);
}
- MCInst *SubInst = new (getParser().getContext()) MCInst;
+ MCInst *SubInst = getParser().getContext().createMCInst();
if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo,
MatchingInlineAsm)) {
if (InBrackets)
@@ -945,7 +945,7 @@ bool HexagonAsmParser::isLabel(AsmToken &Token) {
StringRef Raw(String.data(), Third.getString().data() - String.data() +
Third.getString().size());
std::string Collapsed = std::string(Raw);
- Collapsed.erase(llvm::remove_if(Collapsed, isSpace), Collapsed.end());
+ llvm::erase_if(Collapsed, isSpace);
StringRef Whole = Collapsed;
std::pair<StringRef, StringRef> DotSplit = Whole.split('.');
if (!matchRegister(DotSplit.first.lower()))
@@ -997,7 +997,7 @@ OperandMatchResultTy HexagonAsmParser::tryParseRegister(unsigned &RegNo,
NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type));
}
std::string Collapsed = std::string(RawString);
- Collapsed.erase(llvm::remove_if(Collapsed, isSpace), Collapsed.end());
+ llvm::erase_if(Collapsed, isSpace);
StringRef FullString = Collapsed;
std::pair<StringRef, StringRef> DotSplit = FullString.split('.');
unsigned DotReg = matchRegister(DotSplit.first.lower());
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
index 7ef23ef35a74..8bced3cec082 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -198,10 +198,10 @@ BitTracker::~BitTracker() {
// the actual bits of the "self" register.
// While this cannot happen in the current implementation, I'm not sure
// if this should be ruled out in the future.
-bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
+bool BT::RegisterCell::meet(const RegisterCell &RC, Register SelfR) {
// An example when "meet" can be invoked with SelfR == 0 is a phi node
// with a physical register as an operand.
- assert(SelfR == 0 || Register::isVirtualRegister(SelfR));
+ assert(SelfR == 0 || SelfR.isVirtual());
bool Changed = false;
for (uint16_t i = 0, n = Bits.size(); i < n; ++i) {
const BitValue &RCV = RC[i];
@@ -335,13 +335,13 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
// 1. find a physical register PhysR from the same class as RR.Reg,
// 2. find a physical register PhysS that corresponds to PhysR:RR.Sub,
// 3. find a register class that contains PhysS.
- if (Register::isVirtualRegister(RR.Reg)) {
+ if (RR.Reg.isVirtual()) {
const auto &VC = composeWithSubRegIndex(*MRI.getRegClass(RR.Reg), RR.Sub);
return TRI.getRegSizeInBits(VC);
}
- assert(Register::isPhysicalRegister(RR.Reg));
- Register PhysR =
- (RR.Sub == 0) ? Register(RR.Reg) : TRI.getSubReg(RR.Reg, RR.Sub);
+ assert(RR.Reg.isPhysical());
+ MCRegister PhysR =
+ (RR.Sub == 0) ? RR.Reg.asMCReg() : TRI.getSubReg(RR.Reg, RR.Sub);
return getPhysRegBitWidth(PhysR);
}
@@ -351,10 +351,10 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
// Physical registers are assumed to be present in the map with an unknown
// value. Don't actually insert anything in the map, just return the cell.
- if (Register::isPhysicalRegister(RR.Reg))
+ if (RR.Reg.isPhysical())
return RegisterCell::self(0, BW);
- assert(Register::isVirtualRegister(RR.Reg));
+ assert(RR.Reg.isVirtual());
// For virtual registers that belong to a class that is not tracked,
// generate an "unknown" value as well.
const TargetRegisterClass *C = MRI.getRegClass(RR.Reg);
@@ -377,7 +377,7 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
// While updating the cell map can be done in a meaningful way for
// a part of a register, it makes little sense to implement it as the
// SSA representation would never contain such "partial definitions".
- if (!Register::isVirtualRegister(RR.Reg))
+ if (!RR.Reg.isVirtual())
return;
assert(RR.Sub == 0 && "Unexpected sub-register in definition");
// Eliminate all ref-to-reg-0 bit values: replace them with "self".
@@ -704,15 +704,14 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
return Res;
}
-BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
+BT::BitMask BT::MachineEvaluator::mask(Register Reg, unsigned Sub) const {
assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
uint16_t W = getRegBitWidth(Reg);
assert(W > 0 && "Cannot generate mask for empty register");
return BitMask(0, W-1);
}
-uint16_t BT::MachineEvaluator::getPhysRegBitWidth(unsigned Reg) const {
- assert(Register::isPhysicalRegister(Reg));
+uint16_t BT::MachineEvaluator::getPhysRegBitWidth(MCRegister Reg) const {
const TargetRegisterClass &PC = *TRI.getMinimalPhysRegClass(Reg);
return TRI.getRegSizeInBits(PC);
}
@@ -875,7 +874,7 @@ void BT::visitNonBranch(const MachineInstr &MI) {
continue;
RegisterRef RD(MO);
assert(RD.Sub == 0 && "Unexpected sub-register in definition");
- if (!Register::isVirtualRegister(RD.Reg))
+ if (!RD.Reg.isVirtual())
continue;
bool Changed = false;
@@ -980,7 +979,7 @@ void BT::visitBranchesFrom(const MachineInstr &BI) {
FlowQ.push(CFGEdge(ThisN, TB->getNumber()));
}
-void BT::visitUsesOf(unsigned Reg) {
+void BT::visitUsesOf(Register Reg) {
if (Trace)
dbgs() << "queuing uses of modified reg " << printReg(Reg, &ME.TRI)
<< " cell: " << ME.getCell(Reg, Map) << '\n';
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.h
index efb21805b801..08c0359a4b7f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.h
@@ -62,7 +62,7 @@ private:
void visitPHI(const MachineInstr &PI);
void visitNonBranch(const MachineInstr &MI);
void visitBranchesFrom(const MachineInstr &BI);
- void visitUsesOf(unsigned Reg);
+ void visitUsesOf(Register Reg);
using CFGEdge = std::pair<int, int>;
using EdgeSetType = std::set<CFGEdge>;
@@ -131,19 +131,20 @@ struct BitTracker::BitRef {
return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
}
- unsigned Reg;
+ Register Reg;
uint16_t Pos;
};
// Abstraction of a register reference in MachineOperand. It contains the
// register number and the subregister index.
+// FIXME: Consolidate duplicate definitions of RegisterRef
struct BitTracker::RegisterRef {
- RegisterRef(unsigned R = 0, unsigned S = 0)
- : Reg(R), Sub(S) {}
+ RegisterRef(Register R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
RegisterRef(const MachineOperand &MO)
: Reg(MO.getReg()), Sub(MO.getSubReg()) {}
- unsigned Reg, Sub;
+ Register Reg;
+ unsigned Sub;
};
// Value that a single bit can take. This is outside of the context of
@@ -312,7 +313,7 @@ struct BitTracker::RegisterCell {
return Bits[BitN];
}
- bool meet(const RegisterCell &RC, unsigned SelfR);
+ bool meet(const RegisterCell &RC, Register SelfR);
RegisterCell &insert(const RegisterCell &RC, const BitMask &M);
RegisterCell extract(const BitMask &M) const; // Returns a new cell.
RegisterCell &rol(uint16_t Sh); // Rotate left.
@@ -461,7 +462,7 @@ struct BitTracker::MachineEvaluator {
// Sub == 0, in this case, the function should return a mask that spans
// the entire register Reg (which is what the default implementation
// does).
- virtual BitMask mask(unsigned Reg, unsigned Sub) const;
+ virtual BitMask mask(Register Reg, unsigned Sub) const;
// Indicate whether a given register class should be tracked.
virtual bool track(const TargetRegisterClass *RC) const { return true; }
// Evaluate a non-branching machine instruction, given the cell map with
@@ -484,7 +485,7 @@ struct BitTracker::MachineEvaluator {
llvm_unreachable("Unimplemented composeWithSubRegIndex");
}
// Return the size in bits of the physical register Reg.
- virtual uint16_t getPhysRegBitWidth(unsigned Reg) const;
+ virtual uint16_t getPhysRegBitWidth(MCRegister Reg) const;
const TargetRegisterInfo &TRI;
MachineRegisterInfo &MRI;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index f3a87ef20a60..aeaeac65de96 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -175,7 +175,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
while (Result == Success && !Complete) {
if (Bytes.size() < HEXAGON_INSTR_SIZE)
return MCDisassembler::Fail;
- MCInst *Inst = new (getContext()) MCInst;
+ MCInst *Inst = getContext().createMCInst();
Result = getSingleInstruction(*Inst, MI, Bytes, Address, cs, Complete);
MI.addOperand(MCOperand::createInst(Inst));
Size += HEXAGON_INSTR_SIZE;
@@ -384,8 +384,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
break;
}
MI.setOpcode(Hexagon::DuplexIClass0 + duplexIClass);
- MCInst *MILow = new (getContext()) MCInst;
- MCInst *MIHigh = new (getContext()) MCInst;
+ MCInst *MILow = getContext().createMCInst();
+ MCInst *MIHigh = getContext().createMCInst();
auto TmpExtender = CurrentExtender;
CurrentExtender =
nullptr; // constant extenders in duplex must always be in slot 1
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.h
index 58dadf012da5..98e5710d4fc1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.h
@@ -14,12 +14,9 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
#define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-
namespace llvm {
class HexagonTargetMachine;
+ class ImmutablePass;
/// Creates a Hexagon-specific Target Transformation Info pass.
ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 49edb0d99492..54aa14849dd9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -206,14 +206,14 @@ namespace {
uint16_t W);
static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
uint16_t W, uint64_t &U);
- static bool replaceReg(unsigned OldR, unsigned NewR,
- MachineRegisterInfo &MRI);
+ static bool replaceReg(Register OldR, Register NewR,
+ MachineRegisterInfo &MRI);
static bool getSubregMask(const BitTracker::RegisterRef &RR,
unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
- static bool replaceRegWithSub(unsigned OldR, unsigned NewR,
- unsigned NewSR, MachineRegisterInfo &MRI);
- static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
- unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
+ static bool replaceRegWithSub(Register OldR, Register NewR, unsigned NewSR,
+ MachineRegisterInfo &MRI);
+ static bool replaceSubWithSub(Register OldR, unsigned OldSR, Register NewR,
+ unsigned NewSR, MachineRegisterInfo &MRI);
static bool parseRegSequence(const MachineInstr &I,
BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH,
const MachineRegisterInfo &MRI);
@@ -292,7 +292,7 @@ void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
if (!Op.isReg() || !Op.isDef())
continue;
Register R = Op.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
Defs.insert(R);
}
@@ -304,7 +304,7 @@ void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
if (!Op.isReg() || !Op.isUse())
continue;
Register R = Op.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
Uses.insert(R);
}
@@ -352,9 +352,9 @@ bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
return true;
}
-bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
- MachineRegisterInfo &MRI) {
- if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR))
+bool HexagonBitSimplify::replaceReg(Register OldR, Register NewR,
+ MachineRegisterInfo &MRI) {
+ if (!OldR.isVirtual() || !NewR.isVirtual())
return false;
auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
decltype(End) NextI;
@@ -365,9 +365,10 @@ bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
return Begin != End;
}
-bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
- unsigned NewSR, MachineRegisterInfo &MRI) {
- if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR))
+bool HexagonBitSimplify::replaceRegWithSub(Register OldR, Register NewR,
+ unsigned NewSR,
+ MachineRegisterInfo &MRI) {
+ if (!OldR.isVirtual() || !NewR.isVirtual())
return false;
if (hasTiedUse(OldR, MRI, NewSR))
return false;
@@ -381,9 +382,10 @@ bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
return Begin != End;
}
-bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
- unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
- if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR))
+bool HexagonBitSimplify::replaceSubWithSub(Register OldR, unsigned OldSR,
+ Register NewR, unsigned NewSR,
+ MachineRegisterInfo &MRI) {
+ if (!OldR.isVirtual() || !NewR.isVirtual())
return false;
if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))
return false;
@@ -894,7 +896,7 @@ bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
// register class.
const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
- if (!Register::isVirtualRegister(RR.Reg))
+ if (!RR.Reg.isVirtual())
return nullptr;
auto *RC = MRI.getRegClass(RR.Reg);
if (RR.Sub == 0)
@@ -925,8 +927,7 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
// with a 32-bit register.
bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
- if (!Register::isVirtualRegister(RD.Reg) ||
- !Register::isVirtualRegister(RS.Reg))
+ if (!RD.Reg.isVirtual() || !RS.Reg.isVirtual())
return false;
// Return false if one (or both) classes are nullptr.
auto *DRC = getFinalVRegClass(RD, MRI);
@@ -1017,7 +1018,7 @@ bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
if (!Op.isReg() || !Op.isDef())
continue;
Register R = Op.getReg();
- if (!Register::isVirtualRegister(R) || !isDead(R)) {
+ if (!R.isVirtual() || !isDead(R)) {
AllDead = false;
break;
}
@@ -1219,7 +1220,7 @@ bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
MachineInstr &UseI = *I->getParent();
if (UseI.isPHI() || UseI.isCopy()) {
Register DefR = UseI.getOperand(0).getReg();
- if (!Register::isVirtualRegister(DefR))
+ if (!DefR.isVirtual())
return false;
Pending.push_back(DefR);
} else {
@@ -1380,8 +1381,9 @@ namespace {
static bool isTfrConst(const MachineInstr &MI);
private:
- unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
- MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
+ Register genTfrConst(const TargetRegisterClass *RC, int64_t C,
+ MachineBasicBlock &B, MachineBasicBlock::iterator At,
+ DebugLoc &DL);
const HexagonInstrInfo &HII;
MachineRegisterInfo &MRI;
@@ -1408,8 +1410,10 @@ bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
// Generate a transfer-immediate instruction that is appropriate for the
// register class and the actual value being transferred.
-unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
- MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) {
+Register ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
+ MachineBasicBlock &B,
+ MachineBasicBlock::iterator At,
+ DebugLoc &DL) {
Register Reg = MRI.createVirtualRegister(RC);
if (RC == &Hexagon::IntRegsRegClass) {
BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
@@ -1473,8 +1477,8 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
HBS::getInstrDefs(*I, Defs);
if (Defs.count() != 1)
continue;
- unsigned DR = Defs.find_first();
- if (!Register::isVirtualRegister(DR))
+ Register DR = Defs.find_first();
+ if (!DR.isVirtual())
continue;
uint64_t U;
const BitTracker::RegisterCell &DRC = BT.lookup(DR);
@@ -1482,7 +1486,7 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
int64_t C = U;
DebugLoc DL = I->getDebugLoc();
auto At = I->isPHI() ? B.getFirstNonPHI() : I;
- unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
+ Register ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
if (ImmReg) {
HBS::replaceReg(DR, ImmReg, MRI);
BT.put(ImmReg, DRC);
@@ -1549,7 +1553,7 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
if (!HBS::getSubregMask(Inp, B, W, MRI))
return false;
- for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
+ for (Register R = AVs.find_first(); R; R = AVs.find_next(R)) {
if (!BT.has(R) || Forbidden[R])
continue;
const BitTracker::RegisterCell &RC = BT.lookup(R);
@@ -1608,7 +1612,7 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B,
DebugLoc DL = I->getDebugLoc();
auto At = I->isPHI() ? B.getFirstNonPHI() : I;
- for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
+ for (Register R = Defs.find_first(); R; R = Defs.find_next(R)) {
BitTracker::RegisterRef MR;
auto *FRC = HBS::getFinalVRegClass(R, MRI);
@@ -1815,7 +1819,7 @@ bool BitSimplification::matchHalf(unsigned SelfR,
if (I == B+16)
return false;
- unsigned Reg = RC[I].RefI.Reg;
+ Register Reg = RC[I].RefI.Reg;
unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B.
if (P < I-B)
return false;
@@ -1823,7 +1827,7 @@ bool BitSimplification::matchHalf(unsigned SelfR,
if (Reg == 0 || Reg == SelfR) // Don't match "self".
return false;
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return false;
if (!BT.has(Reg))
return false;
@@ -2363,7 +2367,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
P = V.RefI.Pos;
}
if (P != std::numeric_limits<unsigned>::max()) {
- unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
.addReg(RR.Reg, 0, RR.Sub)
.addImm(P);
@@ -3165,8 +3169,8 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
HBS::getInstrDefs(*I, Defs);
if (Defs.count() != 1)
continue;
- unsigned DefR = Defs.find_first();
- if (!Register::isVirtualRegister(DefR))
+ Register DefR = Defs.find_first();
+ if (!DefR.isVirtual())
continue;
if (!isBitShuffle(&*I, DefR))
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 1e4030b84bc1..0f6dedeb28c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -86,7 +86,7 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
}
}
-BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
+BT::BitMask HexagonEvaluator::mask(Register Reg, unsigned Sub) const {
if (Sub == 0)
return MachineEvaluator::mask(Reg, 0);
const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
@@ -110,9 +110,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
llvm_unreachable("Unexpected register/subregister");
}
-uint16_t HexagonEvaluator::getPhysRegBitWidth(unsigned Reg) const {
- assert(Register::isPhysicalRegister(Reg));
-
+uint16_t HexagonEvaluator::getPhysRegBitWidth(MCRegister Reg) const {
using namespace Hexagon;
const auto &HST = MF.getSubtarget<HexagonSubtarget>();
if (HST.useHVXOps()) {
@@ -1043,7 +1041,7 @@ unsigned HexagonEvaluator::getUniqueDefVReg(const MachineInstr &MI) const {
if (!Op.isReg() || !Op.isDef())
continue;
Register R = Op.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
if (DefReg != 0)
return 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.h
index 02607d50f686..2d24e859e761 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.h
@@ -36,9 +36,9 @@ struct HexagonEvaluator : public BitTracker::MachineEvaluator {
bool evaluate(const MachineInstr &BI, const CellMapType &Inputs,
BranchTargetList &Targets, bool &FallsThru) const override;
- BitTracker::BitMask mask(unsigned Reg, unsigned Sub) const override;
+ BitTracker::BitMask mask(Register Reg, unsigned Sub) const override;
- uint16_t getPhysRegBitWidth(unsigned Reg) const override;
+ uint16_t getPhysRegBitWidth(MCRegister Reg) const override;
const TargetRegisterClass &composeWithSubRegIndex(
const TargetRegisterClass &RC, unsigned Idx) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
index d1d1b8ee7d41..56ee3cd60c17 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -84,7 +84,7 @@ void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) {
if (empty())
return;
- llvm::sort(begin(), end());
+ llvm::sort(*this);
iterator Iter = begin();
while (Iter != end()-1) {
@@ -275,7 +275,7 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs(
for (; I.isValid(); ++I)
SRs.insert({*I, 0});
} else {
- assert(Register::isVirtualRegister(R.Reg));
+ assert(R.Reg.isVirtual());
auto &RC = *MRI.getRegClass(R.Reg);
unsigned PReg = *RC.begin();
MCSubRegIndexIterator I(PReg, &TRI);
@@ -482,7 +482,7 @@ HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap(
}
}
for (auto &P : LiveMap)
- if (Register::isVirtualRegister(P.first.Reg))
+ if (P.first.Reg.isVirtual())
addDeadRanges(P.first);
LLVM_DEBUG(dbgs() << __func__ << ": dead map\n"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.h
index 61115e29a708..5a3b6433fba7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBlockRanges.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBLOCKRANGES_H
#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/Register.h"
#include <cassert>
#include <map>
#include <set>
@@ -30,8 +31,10 @@ class TargetRegisterInfo;
struct HexagonBlockRanges {
HexagonBlockRanges(MachineFunction &MF);
+ // FIXME: Consolidate duplicate definitions of RegisterRef
struct RegisterRef {
- unsigned Reg, Sub;
+ llvm::Register Reg;
+ unsigned Sub;
bool operator<(RegisterRef R) const {
return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 11a455ce4347..b456cf139c55 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "Hexagon.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 6a5192c866cc..11e7d5a17fa9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -447,7 +447,7 @@ static void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM,
Work.erase(First);
NodeChildrenMap::iterator CF = NCM.find(N);
if (CF != NCM.end()) {
- Work.insert(Work.end(), CF->second.begin(), CF->second.end());
+ llvm::append_range(Work, CF->second);
Nodes.insert(CF->second.begin(), CF->second.end());
}
}
@@ -472,10 +472,11 @@ static const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) {
// determining equality. The only purpose of the ordering is to eliminate
// duplication due to the commutativity of equality/non-equality.
static NodePair node_pair(GepNode *N1, GepNode *N2) {
- uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2);
- if (P1 <= P2)
- return std::make_pair(N1, N2);
- return std::make_pair(N2, N1);
+ uintptr_t P1 = reinterpret_cast<uintptr_t>(N1);
+ uintptr_t P2 = reinterpret_cast<uintptr_t>(N2);
+ if (P1 <= P2)
+ return std::make_pair(N1, N2);
+ return std::make_pair(N2, N1);
}
static unsigned node_hash(GepNode *N) {
@@ -650,8 +651,7 @@ void HexagonCommonGEP::common() {
// Node for removal.
Erase.insert(*I);
}
- NodeVect::iterator NewE = remove_if(Nodes, in_set(Erase));
- Nodes.resize(std::distance(Nodes.begin(), NewE));
+ erase_if(Nodes, in_set(Erase));
LLVM_DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes);
}
@@ -1145,7 +1145,7 @@ void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values,
NodeChildrenMap::iterator CF = NCM.find(N);
if (CF != NCM.end()) {
NodeVect &Cs = CF->second;
- Work.insert(Work.end(), Cs.begin(), Cs.end());
+ llvm::append_range(Work, Cs);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index 05b95d8b7314..a774baaa48e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -242,18 +242,15 @@ namespace {
return *this;
}
bool isVReg() const {
- return Reg != 0 && !llvm::Register::isStackSlot(Reg) &&
- llvm::Register::isVirtualRegister(Reg);
- }
- bool isSlot() const {
- return Reg != 0 && llvm::Register::isStackSlot(Reg);
+ return Reg != 0 && !Reg.isStack() && Reg.isVirtual();
}
+ bool isSlot() const { return Reg != 0 && Reg.isStack(); }
operator MachineOperand() const {
if (isVReg())
return MachineOperand::CreateReg(Reg, /*Def*/false, /*Imp*/false,
/*Kill*/false, /*Dead*/false, /*Undef*/false,
/*EarlyClobber*/false, Sub);
- if (llvm::Register::isStackSlot(Reg)) {
+ if (Reg.isStack()) {
int FI = llvm::Register::stackSlot2Index(Reg);
return MachineOperand::CreateFI(FI);
}
@@ -265,7 +262,8 @@ namespace {
// For std::map.
return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub);
}
- unsigned Reg = 0, Sub = 0;
+ llvm::Register Reg;
+ unsigned Sub = 0;
};
struct ExtExpr {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 77578378b058..4a2b0600f42b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -83,7 +83,8 @@ namespace {
// FIXME: Use TargetInstrInfo::RegSubRegPair. Also duplicated in
// HexagonGenPredicate
struct RegisterSubReg {
- unsigned Reg, SubReg;
+ Register Reg;
+ unsigned SubReg;
explicit RegisterSubReg(unsigned R, unsigned SR = 0) : Reg(R), SubReg(SR) {}
explicit RegisterSubReg(const MachineOperand &MO)
@@ -216,16 +217,16 @@ namespace {
void clear() { Map.clear(); }
- bool has(unsigned R) const {
+ bool has(Register R) const {
// All non-virtual registers are considered "bottom".
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
return true;
MapType::const_iterator F = Map.find(R);
return F != Map.end();
}
- const LatticeCell &get(unsigned R) const {
- if (!Register::isVirtualRegister(R))
+ const LatticeCell &get(Register R) const {
+ if (!R.isVirtual())
return Bottom;
MapType::const_iterator F = Map.find(R);
if (F != Map.end())
@@ -234,14 +235,12 @@ namespace {
}
// Invalidates any const references.
- void update(unsigned R, const LatticeCell &L) {
- Map[R] = L;
- }
+ void update(Register R, const LatticeCell &L) { Map[R] = L; }
void print(raw_ostream &os, const TargetRegisterInfo &TRI) const;
private:
- using MapType = std::map<unsigned, LatticeCell>;
+ using MapType = std::map<Register, LatticeCell>;
MapType Map;
// To avoid creating "top" entries, return a const reference to
@@ -633,7 +632,7 @@ void MachineConstPropagator::visitPHI(const MachineInstr &PN) {
const MachineOperand &MD = PN.getOperand(0);
RegisterSubReg DefR(MD);
- assert(Register::isVirtualRegister(DefR.Reg));
+ assert(DefR.Reg.isVirtual());
bool Changed = false;
@@ -662,7 +661,7 @@ Bottomize:
RegisterSubReg UseR(SO);
// If the input is not a virtual register, we don't really know what
// value it holds.
- if (!Register::isVirtualRegister(UseR.Reg))
+ if (!UseR.Reg.isVirtual())
goto Bottomize;
// If there is no cell for an input register, it means top.
if (!Cells.has(UseR.Reg))
@@ -704,7 +703,7 @@ void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) {
continue;
RegisterSubReg DefR(MO);
// Only track virtual registers.
- if (!Register::isVirtualRegister(DefR.Reg))
+ if (!DefR.Reg.isVirtual())
continue;
bool Changed = false;
// If the evaluation failed, set cells for all output registers to bottom.
@@ -1086,7 +1085,7 @@ bool MachineConstPropagator::run(MachineFunction &MF) {
bool MachineConstEvaluator::getCell(const RegisterSubReg &R, const CellMap &Inputs,
LatticeCell &RC) {
- if (!Register::isVirtualRegister(R.Reg))
+ if (!R.Reg.isVirtual())
return false;
const LatticeCell &L = Inputs.get(R.Reg);
if (!R.SubReg) {
@@ -1884,7 +1883,7 @@ namespace {
bool evaluateHexVector2(const MachineInstr &MI, const CellMap &Inputs,
CellMap &Outputs);
- void replaceAllRegUsesWith(unsigned FromReg, unsigned ToReg);
+ void replaceAllRegUsesWith(Register FromReg, Register ToReg);
bool rewriteHexBranch(MachineInstr &BrI, const CellMap &Inputs);
bool rewriteHexConstDefs(MachineInstr &MI, const CellMap &Inputs,
bool &AllDefs);
@@ -1942,7 +1941,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
unsigned Opc = MI.getOpcode();
RegisterSubReg DefR(MD);
assert(!DefR.SubReg);
- if (!Register::isVirtualRegister(DefR.Reg))
+ if (!DefR.Reg.isVirtual())
return false;
if (MI.isCopy()) {
@@ -2809,7 +2808,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
if (!MO.isReg() || !MO.isUse() || MO.isImplicit())
continue;
RegisterSubReg R(MO);
- if (!Register::isVirtualRegister(R.Reg))
+ if (!R.Reg.isVirtual())
continue;
HasUse = true;
// PHIs can legitimately have "top" cells after propagation.
@@ -2851,7 +2850,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
if (!MO.isReg() || !MO.isDef())
continue;
Register R = MO.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
assert(!MO.getSubReg());
assert(Inputs.has(R));
@@ -3130,10 +3129,10 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
return Changed;
}
-void HexagonConstEvaluator::replaceAllRegUsesWith(unsigned FromReg,
- unsigned ToReg) {
- assert(Register::isVirtualRegister(FromReg));
- assert(Register::isVirtualRegister(ToReg));
+void HexagonConstEvaluator::replaceAllRegUsesWith(Register FromReg,
+ Register ToReg) {
+ assert(FromReg.isVirtual());
+ assert(ToReg.isVirtual());
for (auto I = MRI->use_begin(FromReg), E = MRI->use_end(); I != E;) {
MachineOperand &O = *I;
++I;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 587527d8c32c..23d0cc829e52 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -10,6 +10,7 @@
// to move them together. If we can move them next to each other we do so and
// replace them with a combine instruction.
//===----------------------------------------------------------------------===//
+
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/ADT/DenseMap.h"
@@ -26,6 +27,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index a431af17e6d0..d36ffc3da641 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -386,7 +386,7 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
if (!MO.isReg() || !MO.isDef())
continue;
Register R = MO.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
if (!isPredicate(R))
continue;
@@ -403,7 +403,7 @@ bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
if (!MO.isReg() || !MO.isUse())
continue;
Register R = MO.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
const MachineInstr *DefI = MRI->getVRegDef(R);
// "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
@@ -493,7 +493,7 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs(
if (!MO.isReg() || !MO.isDef())
continue;
Register R = MO.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
if (isPredicate(R))
PredDefs++;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index c1d0599830cc..fcc880463925 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -174,6 +174,7 @@ namespace {
unsigned CoaCounter = 0;
unsigned TfrCounter = 0;
+ // FIXME: Consolidate duplicate definitions of RegisterRef
struct RegisterRef {
RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
Sub(Op.getSubReg()) {}
@@ -187,7 +188,8 @@ namespace {
return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub);
}
- unsigned Reg, Sub;
+ Register Reg;
+ unsigned Sub;
};
using ReferenceMap = DenseMap<unsigned, unsigned>;
@@ -196,25 +198,25 @@ namespace {
unsigned getMaskForSub(unsigned Sub);
bool isCondset(const MachineInstr &MI);
- LaneBitmask getLaneMask(unsigned Reg, unsigned Sub);
+ LaneBitmask getLaneMask(Register Reg, unsigned Sub);
void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
- void updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range);
- void updateKillFlags(unsigned Reg);
- void updateDeadFlags(unsigned Reg);
- void recalculateLiveInterval(unsigned Reg);
+ void updateDeadsInRange(Register Reg, LaneBitmask LM, LiveRange &Range);
+ void updateKillFlags(Register Reg);
+ void updateDeadFlags(Register Reg);
+ void recalculateLiveInterval(Register Reg);
void removeInstr(MachineInstr &MI);
- void updateLiveness(std::set<unsigned> &RegSet, bool Recalc,
- bool UpdateKills, bool UpdateDeads);
+ void updateLiveness(std::set<Register> &RegSet, bool Recalc,
+ bool UpdateKills, bool UpdateDeads);
unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
MachineInstr *genCondTfrFor(MachineOperand &SrcOp,
MachineBasicBlock::iterator At, unsigned DstR,
unsigned DstSR, const MachineOperand &PredOp, bool PredSense,
bool ReadUndef, bool ImpUse);
- bool split(MachineInstr &MI, std::set<unsigned> &UpdRegs);
+ bool split(MachineInstr &MI, std::set<Register> &UpdRegs);
bool isPredicable(MachineInstr *MI);
MachineInstr *getReachingDefForPred(RegisterRef RD,
@@ -224,19 +226,18 @@ namespace {
void predicateAt(const MachineOperand &DefOp, MachineInstr &MI,
MachineBasicBlock::iterator Where,
const MachineOperand &PredOp, bool Cond,
- std::set<unsigned> &UpdRegs);
+ std::set<Register> &UpdRegs);
void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
bool Cond, MachineBasicBlock::iterator First,
MachineBasicBlock::iterator Last);
- bool predicate(MachineInstr &TfrI, bool Cond, std::set<unsigned> &UpdRegs);
- bool predicateInBlock(MachineBasicBlock &B,
- std::set<unsigned> &UpdRegs);
+ bool predicate(MachineInstr &TfrI, bool Cond, std::set<Register> &UpdRegs);
+ bool predicateInBlock(MachineBasicBlock &B, std::set<Register> &UpdRegs);
bool isIntReg(RegisterRef RR, unsigned &BW);
bool isIntraBlocks(LiveInterval &LI);
bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
- bool coalesceSegments(const SmallVectorImpl<MachineInstr*> &Condsets,
- std::set<unsigned> &UpdRegs);
+ bool coalesceSegments(const SmallVectorImpl<MachineInstr *> &Condsets,
+ std::set<Register> &UpdRegs);
};
} // end anonymous namespace
@@ -285,8 +286,8 @@ bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) {
return false;
}
-LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) {
- assert(Register::isVirtualRegister(Reg));
+LaneBitmask HexagonExpandCondsets::getLaneMask(Register Reg, unsigned Sub) {
+ assert(Reg.isVirtual());
return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
: MRI->getMaxLaneMaskForVReg(Reg);
}
@@ -312,7 +313,7 @@ bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
return false;
}
-void HexagonExpandCondsets::updateKillFlags(unsigned Reg) {
+void HexagonExpandCondsets::updateKillFlags(Register Reg) {
auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void {
// Set the <kill> flag on a use of Reg whose lane mask is contained in LM.
MachineInstr *MI = LIS->getInstructionFromIndex(K);
@@ -363,9 +364,9 @@ void HexagonExpandCondsets::updateKillFlags(unsigned Reg) {
}
}
-void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
- LiveRange &Range) {
- assert(Register::isVirtualRegister(Reg));
+void HexagonExpandCondsets::updateDeadsInRange(Register Reg, LaneBitmask LM,
+ LiveRange &Range) {
+ assert(Reg.isVirtual());
if (Range.empty())
return;
@@ -374,7 +375,7 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
if (!Op.isReg() || !Op.isDef())
return { false, false };
Register DR = Op.getReg(), DSR = Op.getSubReg();
- if (!Register::isVirtualRegister(DR) || DR != Reg)
+ if (!DR.isVirtual() || DR != Reg)
return { false, false };
LaneBitmask SLM = getLaneMask(DR, DSR);
LaneBitmask A = SLM & LM;
@@ -524,7 +525,7 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
}
}
-void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) {
+void HexagonExpandCondsets::updateDeadFlags(Register Reg) {
LiveInterval &LI = LIS->getInterval(Reg);
if (LI.hasSubRanges()) {
for (LiveInterval::SubRange &S : LI.subranges()) {
@@ -538,7 +539,7 @@ void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) {
}
}
-void HexagonExpandCondsets::recalculateLiveInterval(unsigned Reg) {
+void HexagonExpandCondsets::recalculateLiveInterval(Register Reg) {
LIS->removeInterval(Reg);
LIS->createAndComputeVirtRegInterval(Reg);
}
@@ -548,12 +549,13 @@ void HexagonExpandCondsets::removeInstr(MachineInstr &MI) {
MI.eraseFromParent();
}
-void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet,
- bool Recalc, bool UpdateKills, bool UpdateDeads) {
+void HexagonExpandCondsets::updateLiveness(std::set<Register> &RegSet,
+ bool Recalc, bool UpdateKills,
+ bool UpdateDeads) {
UpdateKills |= UpdateDeads;
- for (unsigned R : RegSet) {
- if (!Register::isVirtualRegister(R)) {
- assert(Register::isPhysicalRegister(R));
+ for (Register R : RegSet) {
+ if (!R.isVirtual()) {
+ assert(R.isPhysical());
// There shouldn't be any physical registers as operands, except
// possibly reserved registers.
assert(MRI->isReserved(R));
@@ -580,17 +582,16 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
using namespace Hexagon;
if (SO.isReg()) {
- Register PhysR;
+ MCRegister PhysR;
RegisterRef RS = SO;
- if (Register::isVirtualRegister(RS.Reg)) {
+ if (RS.Reg.isVirtual()) {
const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
assert(VC->begin() != VC->end() && "Empty register class");
PhysR = *VC->begin();
} else {
- assert(Register::isPhysicalRegister(RS.Reg));
PhysR = RS.Reg;
}
- Register PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
+ MCRegister PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
switch (TRI->getRegSizeInBits(*RC)) {
case 32:
@@ -661,7 +662,7 @@ MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp,
/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
/// performs all necessary changes to complete the replacement.
bool HexagonExpandCondsets::split(MachineInstr &MI,
- std::set<unsigned> &UpdRegs) {
+ std::set<Register> &UpdRegs) {
if (TfrLimitActive) {
if (TfrCounter >= TfrLimit)
return false;
@@ -803,7 +804,7 @@ bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs,
// For physical register we would need to check register aliases, etc.
// and we don't want to bother with that. It would be of little value
// before the actual register rewriting (from virtual to physical).
- if (!Register::isVirtualRegister(RR.Reg))
+ if (!RR.Reg.isVirtual())
return false;
// No redefs for any operand.
if (isRefInMap(RR, Defs, Exec_Then))
@@ -855,7 +856,7 @@ void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
MachineInstr &MI,
MachineBasicBlock::iterator Where,
const MachineOperand &PredOp, bool Cond,
- std::set<unsigned> &UpdRegs) {
+ std::set<Register> &UpdRegs) {
// The problem with updating live intervals is that we can move one def
// past another def. In particular, this can happen when moving an A2_tfrt
// over an A2_tfrf defining the same register. From the point of view of
@@ -933,7 +934,7 @@ void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
/// the copy under the given condition (using the same predicate register as
/// the copy).
bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
- std::set<unsigned> &UpdRegs) {
+ std::set<Register> &UpdRegs) {
// TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
unsigned Opc = TfrI.getOpcode();
(void)Opc;
@@ -1000,7 +1001,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
// subregisters are other physical registers, and we are not checking
// that.
RegisterRef RR = Op;
- if (!Register::isVirtualRegister(RR.Reg))
+ if (!RR.Reg.isVirtual())
return false;
ReferenceMap &Map = Op.isDef() ? Defs : Uses;
@@ -1067,7 +1068,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
/// Predicate all cases of conditional copies in the specified block.
bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
- std::set<unsigned> &UpdRegs) {
+ std::set<Register> &UpdRegs) {
bool Changed = false;
MachineBasicBlock::iterator I, E, NextI;
for (I = B.begin(), E = B.end(); I != E; I = NextI) {
@@ -1092,7 +1093,7 @@ bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
}
bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
- if (!Register::isVirtualRegister(RR.Reg))
+ if (!RR.Reg.isVirtual())
return false;
const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
if (RC == &Hexagon::IntRegsRegClass) {
@@ -1172,7 +1173,7 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
}
L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
}
- while (L2.begin() != L2.end())
+ while (!L2.empty())
L2.removeSegment(*L2.begin());
LIS->removeInterval(R2.Reg);
@@ -1187,8 +1188,8 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
/// the destination register. This could lead to having only one predicated
/// instruction in the end instead of two.
bool HexagonExpandCondsets::coalesceSegments(
- const SmallVectorImpl<MachineInstr*> &Condsets,
- std::set<unsigned> &UpdRegs) {
+ const SmallVectorImpl<MachineInstr *> &Condsets,
+ std::set<Register> &UpdRegs) {
SmallVector<MachineInstr*,16> TwoRegs;
for (MachineInstr *MI : Condsets) {
MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
@@ -1262,7 +1263,7 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
MF.getFunction().getParent()));
bool Changed = false;
- std::set<unsigned> CoalUpd, PredUpd;
+ std::set<Register> CoalUpd, PredUpd;
SmallVector<MachineInstr*,16> Condsets;
for (auto &B : MF)
@@ -1279,7 +1280,7 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
// in the IR (they have been removed by live range analysis).
// Updating them right before we split is the easiest, because splitting
// adds definitions which would interfere with updating kills afterwards.
- std::set<unsigned> KillUpd;
+ std::set<Register> KillUpd;
for (MachineInstr *MI : Condsets)
for (MachineOperand &Op : MI->operands())
if (Op.isReg() && Op.isUse())
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 010b7171ce17..a62610ae2b7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -306,7 +306,7 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
Register R = MO.getReg();
// Virtual registers will need scavenging, which then may require
// a stack slot.
- if (Register::isVirtualRegister(R))
+ if (R.isVirtual())
return true;
for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
if (CSR[*S])
@@ -1104,7 +1104,8 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
Offset = MFI.getObjectOffset(F->getFrameIdx());
} else {
Register FrameReg;
- Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg);
+ Offset =
+ getFrameIndexReference(MF, F->getFrameIdx(), FrameReg).getFixed();
}
// Subtract 8 to make room for R30 and R31, which are added above.
Offset -= 8;
@@ -1256,9 +1257,9 @@ static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType,
return nullptr;
}
-int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
+StackOffset
+HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
auto &MFI = MF.getFrameInfo();
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
@@ -1354,7 +1355,7 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int RealOffset = Offset;
if (!UseFP && !UseAP)
RealOffset = FrameSize+Offset;
- return RealOffset;
+ return StackOffset::getFixed(RealOffset);
}
bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 87d385e1ce3c..4ffd31b670e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -11,6 +11,7 @@
#include "Hexagon.h"
#include "HexagonBlockRanges.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -82,8 +83,8 @@ public:
return true;
}
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
bool hasFP(const MachineFunction &MF) const override;
const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 2f29e88bc989..f2026877b22c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -613,7 +613,7 @@ void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
if (MO.isReg() && MO.isDef()) {
Register R = MO.getReg();
assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
- if (Register::isVirtualRegister(R))
+ if (R.isVirtual())
RO.insert(std::make_pair(R, Index++));
}
}
@@ -730,7 +730,7 @@ void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
if (!MO.isReg() || !MO.isDef())
continue;
Register R = MO.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
Defs.insert(R);
}
@@ -743,7 +743,7 @@ void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
if (!MO.isReg() || !MO.isUse())
continue;
Register R = MO.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
Uses.insert(R);
}
@@ -1089,9 +1089,7 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
return IR.second.empty();
};
- auto End = llvm::remove_if(LL, IsEmpty);
- if (End != LL.end())
- LL.erase(End, LL.end());
+ llvm::erase_if(LL, IsEmpty);
} else {
// The definition of VR is constant-extended, and all candidates have
// empty removable-register sets. Pick the maximum candidate, and remove
@@ -1179,9 +1177,7 @@ void HexagonGenInsert::pruneRegCopies(unsigned VR) {
auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
};
- auto End = llvm::remove_if(LL, IsCopy);
- if (End != LL.end())
- LL.erase(End, LL.end());
+ llvm::erase_if(LL, IsCopy);
}
void HexagonGenInsert::pruneCandidates() {
@@ -1483,7 +1479,7 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
if (!MO.isReg() || !MO.isDef())
continue;
Register R = MO.getReg();
- if (!Register::isVirtualRegister(R) || !MRI->use_nodbg_empty(R)) {
+ if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) {
AllDead = false;
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 903287e68c99..d8d2025c5d27 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -48,7 +48,8 @@ namespace {
// FIXME: Use TargetInstrInfo::RegSubRegPair
struct RegisterSubReg {
- unsigned R, S;
+ Register R;
+ unsigned S;
RegisterSubReg(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
RegisterSubReg(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
@@ -111,7 +112,7 @@ namespace {
VectOfInst PUsers;
RegToRegMap G2P;
- bool isPredReg(unsigned R);
+ bool isPredReg(Register R);
void collectPredicateGPR(MachineFunction &MF);
void processPredicateGPR(const RegisterSubReg &Reg);
unsigned getPredForm(unsigned Opc);
@@ -133,8 +134,8 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred",
"Hexagon generate predicate operations", false, false)
-bool HexagonGenPredicate::isPredReg(unsigned R) {
- if (!Register::isVirtualRegister(R))
+bool HexagonGenPredicate::isPredReg(Register R) {
+ if (!R.isVirtual())
return false;
const TargetRegisterClass *RC = MRI->getRegClass(R);
return RC == &Hexagon::PredRegsRegClass;
@@ -214,7 +215,7 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
case TargetOpcode::COPY:
if (isPredReg(MI->getOperand(1).getReg())) {
RegisterSubReg RD = MI->getOperand(0);
- if (Register::isVirtualRegister(RD.R))
+ if (RD.R.isVirtual())
PredGPRs.insert(RD);
}
break;
@@ -246,7 +247,7 @@ RegisterSubReg HexagonGenPredicate::getPredRegFor(const RegisterSubReg &Reg) {
// Create a predicate register for a given Reg. The newly created register
// will have its value copied from Reg, so that it can be later used as
// an operand in other instructions.
- assert(Register::isVirtualRegister(Reg.R));
+ assert(Reg.R.isVirtual());
RegToRegMap::iterator F = G2P.find(Reg);
if (F != G2P.end())
return F->second;
@@ -472,9 +473,9 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
continue;
RegisterSubReg DR = MI.getOperand(0);
RegisterSubReg SR = MI.getOperand(1);
- if (!Register::isVirtualRegister(DR.R))
+ if (!DR.R.isVirtual())
continue;
- if (!Register::isVirtualRegister(SR.R))
+ if (!SR.R.isVirtual())
continue;
if (MRI->getRegClass(DR.R) != PredRC)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 4833935f8d24..2f23e8643720 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -390,7 +390,7 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
TRI = HST.getRegisterInfo();
for (auto &L : *MLI)
- if (!L->getParentLoop()) {
+ if (L->isOutermost()) {
bool L0Used = false;
bool L1Used = false;
Changed |= convertToHardwareLoop(L, L0Used, L1Used);
@@ -1432,7 +1432,7 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
Register Reg = InitVal->getReg();
// We don't know the value of a physical register.
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return true;
MachineInstr *Def = MRI->getVRegDef(Reg);
@@ -1510,7 +1510,7 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
int64_t TV;
Register R = MO.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
return false;
MachineInstr *DI = MRI->getVRegDef(R);
unsigned DOpc = DI->getOpcode();
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index b4b389a7b956..bdd5c7dd151e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -231,10 +231,10 @@ SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN,
if (Size >= 4)
TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
- Size);
+ Align(Size));
else
TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
- PI, MVT::getIntegerVT(Size * 8), Size);
+ PI, MVT::getIntegerVT(Size * 8), Align(Size));
SDNode *StoreN;
{
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index c0f92042e5da..29e76b53910e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -789,6 +789,12 @@ struct ShuffleMask {
OS << " }";
}
};
+
+LLVM_ATTRIBUTE_UNUSED
+raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
+ SM.print(OS);
+ return OS;
+}
} // namespace
// --------------------------------------------------------------------
@@ -828,6 +834,7 @@ namespace llvm {
void selectVAlign(SDNode *N);
private:
+ void select(SDNode *ISelN);
void materialize(const ResultStack &Results);
SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
@@ -931,46 +938,19 @@ bool HvxSelector::selectVectorConstants(SDNode *N) {
SmallVector<SDNode*,4> Nodes;
SetVector<SDNode*> WorkQ;
- // The one-use test for VSPLATW's operand may fail due to dead nodes
- // left over in the DAG.
- DAG.RemoveDeadNodes();
-
// The DAG can change (due to CSE) during selection, so cache all the
// unselected nodes first to avoid traversing a mutating DAG.
-
- auto IsNodeToSelect = [] (SDNode *N) {
- if (N->isMachineOpcode())
- return false;
- switch (N->getOpcode()) {
- case HexagonISD::VZERO:
- case HexagonISD::VSPLATW:
- return true;
- case ISD::LOAD: {
- SDValue Addr = cast<LoadSDNode>(N)->getBasePtr();
- unsigned AddrOpc = Addr.getOpcode();
- if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP)
- if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool)
- return true;
- }
- break;
- }
- // Make sure to select the operand of VSPLATW.
- bool IsSplatOp = N->hasOneUse() &&
- N->use_begin()->getOpcode() == HexagonISD::VSPLATW;
- return IsSplatOp;
- };
-
WorkQ.insert(N);
for (unsigned i = 0; i != WorkQ.size(); ++i) {
SDNode *W = WorkQ[i];
- if (IsNodeToSelect(W))
+ if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
Nodes.push_back(W);
for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
WorkQ.insert(W->getOperand(j).getNode());
}
for (SDNode *L : Nodes)
- ISel.Select(L);
+ select(L);
return !Nodes.empty();
}
@@ -1358,6 +1338,82 @@ namespace {
};
}
+void HvxSelector::select(SDNode *ISelN) {
+ // What's important here is to select the right set of nodes. The main
+ // selection algorithm loops over nodes in a topological order, i.e. users
+ // are visited before their operands.
+ //
+ // It is an error to have an unselected node with a selected operand, and
+ // there is an assertion in the main selector code to enforce that.
+ //
+ // Such a situation could occur if we selected a node, which is both a
+ // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
+ // node in the DAG.
+ assert(ISelN->getOpcode() == HexagonISD::ISEL);
+ SDNode *N0 = ISelN->getOperand(0).getNode();
+ if (N0->isMachineOpcode()) {
+ ISel.ReplaceNode(ISelN, N0);
+ return;
+ }
+
+ // There could have been nodes created (i.e. inserted into the DAG)
+ // that are now dead. Remove them, in case they use any of the nodes
+ // to select (and make them look shared).
+ DAG.RemoveDeadNodes();
+
+ SetVector<SDNode*> SubNodes, TmpQ;
+ std::map<SDNode*,unsigned> NumOps;
+
+ // Don't want to select N0 if it's shared with another node, except if
+ // it's shared with other ISELs.
+ auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
+ if (llvm::all_of(N0->uses(), IsISelN))
+ SubNodes.insert(N0);
+
+ auto InSubNodes = [&SubNodes](SDNode *T) { return SubNodes.count(T); };
+ for (unsigned I = 0; I != SubNodes.size(); ++I) {
+ SDNode *S = SubNodes[I];
+ unsigned OpN = 0;
+ // Only add subnodes that are only reachable from N0.
+ for (SDValue Op : S->ops()) {
+ SDNode *O = Op.getNode();
+ if (llvm::all_of(O->uses(), InSubNodes)) {
+ SubNodes.insert(O);
+ ++OpN;
+ }
+ }
+ NumOps.insert({S, OpN});
+ if (OpN == 0)
+ TmpQ.insert(S);
+ }
+
+ for (unsigned I = 0; I != TmpQ.size(); ++I) {
+ SDNode *S = TmpQ[I];
+ for (SDNode *U : S->uses()) {
+ if (U == ISelN)
+ continue;
+ auto F = NumOps.find(U);
+ assert(F != NumOps.end());
+ if (F->second > 0 && !--F->second)
+ TmpQ.insert(F->first);
+ }
+ }
+
+ // Remove the marker.
+ ISel.ReplaceNode(ISelN, N0);
+
+ assert(SubNodes.size() == TmpQ.size());
+ NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
+
+ Deleter DUQ(DAG, Queue);
+ for (SDNode *S : reverse(Queue)) {
+ if (S == nullptr)
+ continue;
+ DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
+ ISel.Select(S);
+ }
+}
+
bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
MVT ResTy, SDValue Va, SDValue Vb,
SDNode *N) {
@@ -1379,12 +1435,7 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
// nodes, these nodes would not be selected (since the "local" selection
// only visits nodes that are not in AllNodes).
// To avoid this issue, remove all dead nodes from the DAG now.
- DAG.RemoveDeadNodes();
- DenseSet<SDNode*> AllNodes;
- for (SDNode &S : DAG.allnodes())
- AllNodes.insert(&S);
-
- Deleter DUA(DAG, AllNodes);
+// DAG.RemoveDeadNodes();
SmallVector<SDValue,128> Ops;
LLVMContext &Ctx = *DAG.getContext();
@@ -1434,57 +1485,9 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
}
assert(!N->use_empty());
- ISel.ReplaceNode(N, LV.getNode());
-
- if (AllNodes.count(LV.getNode())) {
- DAG.RemoveDeadNodes();
- return true;
- }
-
- // The lowered build-vector node will now need to be selected. It needs
- // to be done here because this node and its submodes are not included
- // in the main selection loop.
- // Implement essentially the same topological ordering algorithm as is
- // used in SelectionDAGISel.
-
- SetVector<SDNode*> SubNodes, TmpQ;
- std::map<SDNode*,unsigned> NumOps;
-
- SubNodes.insert(LV.getNode());
- for (unsigned I = 0; I != SubNodes.size(); ++I) {
- unsigned OpN = 0;
- SDNode *S = SubNodes[I];
- for (SDValue Op : S->ops()) {
- if (AllNodes.count(Op.getNode()))
- continue;
- SubNodes.insert(Op.getNode());
- ++OpN;
- }
- NumOps.insert({S, OpN});
- if (OpN == 0)
- TmpQ.insert(S);
- }
-
- for (unsigned I = 0; I != TmpQ.size(); ++I) {
- SDNode *S = TmpQ[I];
- for (SDNode *U : S->uses()) {
- if (!SubNodes.count(U))
- continue;
- auto F = NumOps.find(U);
- assert(F != NumOps.end());
- assert(F->second > 0);
- if (!--F->second)
- TmpQ.insert(F->first);
- }
- }
- assert(SubNodes.size() == TmpQ.size());
- NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
-
- Deleter DUQ(DAG, Queue);
- for (SDNode *S : reverse(Queue))
- if (S != nullptr)
- ISel.Select(S);
-
+ SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
+ ISel.ReplaceNode(N, IS.getNode());
+ select(IS.getNode());
DAG.RemoveDeadNodes();
return true;
}
@@ -1683,7 +1686,7 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
// The result length must be the same as the length of a single vector,
// or a vector pair.
assert(LogLen == HwLog || LogLen == HwLog+1);
- bool Extend = (LogLen == HwLog);
+ bool HavePairs = LogLen == HwLog+1;
if (!isPermutation(SM.Mask))
return OpRef::fail();
@@ -1767,6 +1770,22 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
// E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
// F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
+ // There is one special case that is not a perfect shuffle, but
+ // can be turned into one easily: when the shuffle operates on
+ // a vector pair, but the two vectors in the pair are swapped.
+ // The code below that identifies perfect shuffles will reject
+ // it, unless the order is reversed.
+ SmallVector<int,128> MaskStorage(SM.Mask.begin(), SM.Mask.end());
+ bool InvertedPair = false;
+ if (HavePairs && SM.Mask[0] >= int(HwLen)) {
+ for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
+ int M = SM.Mask[i];
+ MaskStorage[i] = M >= int(HwLen) ? M-HwLen : M+HwLen;
+ }
+ InvertedPair = true;
+ }
+ ArrayRef<int> LocalMask(MaskStorage);
+
auto XorPow2 = [] (ArrayRef<int> Mask, unsigned Num) {
unsigned X = Mask[0] ^ Mask[Num/2];
// Check that the first half has the X's bits clear.
@@ -1786,12 +1805,12 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
assert(VecLen > 2);
for (unsigned I = VecLen; I >= 2; I >>= 1) {
// Examine the initial segment of Mask of size I.
- unsigned X = XorPow2(SM.Mask, I);
+ unsigned X = XorPow2(LocalMask, I);
if (!isPowerOf2_32(X))
return OpRef::fail();
// Check the other segments of Mask.
for (int J = I; J < VecLen; J += I) {
- if (XorPow2(SM.Mask.slice(J, I), I) != X)
+ if (XorPow2(LocalMask.slice(J, I), I) != X)
return OpRef::fail();
}
Perm[Log2_32(X)] = Log2_32(I)-1;
@@ -1895,20 +1914,40 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
}
}
+ // From the cycles, construct the sequence of values that will
+ // then form the control values for vdealvdd/vshuffvdd, i.e.
+ // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
+ // This essentially strips the M value from the cycles where
+ // it's present, and performs the insertion of M (then stripping)
+ // for cycles without M (as described in an earlier comment).
SmallVector<unsigned,8> SwapElems;
- if (HwLen == unsigned(VecLen))
+ // When the input is extended (i.e. single vector becomes a pair),
+ // this is done by using an "undef" vector as the second input.
+ // However, then we get
+ // input 1: GOODBITS
+ // input 2: ........
+ // but we need
+ // input 1: ....BITS
+ // input 2: ....GOOD
+ // Then at the end, this needs to be undone. To accomplish this,
+ // artificially add "LogLen-1" at both ends of the sequence.
+ if (!HavePairs)
SwapElems.push_back(LogLen-1);
-
for (const CycleType &C : Cycles) {
+ // Do the transformation: (a1..an) -> (M a1..an)(M a1).
unsigned First = (C[0] == LogLen-1) ? 1 : 0;
SwapElems.append(C.begin()+First, C.end());
if (First == 0)
SwapElems.push_back(C[0]);
}
+ if (!HavePairs)
+ SwapElems.push_back(LogLen-1);
const SDLoc &dl(Results.InpNode);
- OpRef Arg = !Extend ? Va
- : concat(Va, OpRef::undef(SingleTy), Results);
+ OpRef Arg = HavePairs ? Va
+ : concat(Va, OpRef::undef(SingleTy), Results);
+ if (InvertedPair)
+ Arg = concat(OpRef::hi(Arg), OpRef::lo(Arg), Results);
for (unsigned I = 0, E = SwapElems.size(); I != E; ) {
bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1];
@@ -1932,7 +1971,7 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
Arg = OpRef::res(Results.top());
}
- return !Extend ? Arg : OpRef::lo(Arg);
+ return HavePairs ? Arg : OpRef::lo(Arg);
}
OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
@@ -1996,7 +2035,7 @@ SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
SDValue LV = Lower.LowerOperation(BV, DAG);
DAG.RemoveDeadNode(BV.getNode());
- return LV;
+ return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
}
void HvxSelector::selectShuffle(SDNode *N) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 768fea639cf9..c8994a3a28a3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1517,8 +1517,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::ABS, MVT::i32, Legal);
- setOperationAction(ISD::ABS, MVT::i64, Legal);
+ for (unsigned LegalIntOp :
+ {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) {
+ setOperationAction(LegalIntOp, MVT::i32, Legal);
+ setOperationAction(LegalIntOp, MVT::i64, Legal);
+ }
// Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
// but they only operate on i64.
@@ -1620,7 +1623,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR,
ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT,
ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
- ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE
+ ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE,
+ ISD::SPLAT_VECTOR,
};
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -1677,6 +1681,16 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::AND, NativeVT, Legal);
setOperationAction(ISD::OR, NativeVT, Legal);
setOperationAction(ISD::XOR, NativeVT, Legal);
+
+ if (NativeVT.getVectorElementType() != MVT::i1)
+ setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
+ }
+
+ for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
}
// Custom lower unaligned loads.
@@ -1843,15 +1857,12 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VASL: return "HexagonISD::VASL";
case HexagonISD::VASR: return "HexagonISD::VASR";
case HexagonISD::VLSR: return "HexagonISD::VLSR";
- case HexagonISD::VSPLAT: return "HexagonISD::VSPLAT";
case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW";
case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
case HexagonISD::VROR: return "HexagonISD::VROR";
case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
case HexagonISD::PTRUE: return "HexagonISD::PTRUE";
case HexagonISD::PFALSE: return "HexagonISD::PFALSE";
- case HexagonISD::VZERO: return "HexagonISD::VZERO";
- case HexagonISD::VSPLATW: return "HexagonISD::VSPLATW";
case HexagonISD::D2P: return "HexagonISD::D2P";
case HexagonISD::P2D: return "HexagonISD::P2D";
case HexagonISD::V2Q: return "HexagonISD::V2Q";
@@ -1862,6 +1873,10 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST";
case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
+ case HexagonISD::VPACKL: return "HexagonISD::VPACKL";
+ case HexagonISD::VUNPACK: return "HexagonISD::VUNPACK";
+ case HexagonISD::VUNPACKU: return "HexagonISD::VUNPACKU";
+ case HexagonISD::ISEL: return "HexagonISD::ISEL";
case HexagonISD::OP_END: break;
}
return nullptr;
@@ -2064,20 +2079,9 @@ HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::TypeScalarizeVector;
if (Subtarget.useHVXOps()) {
- unsigned HwLen = Subtarget.getVectorLength();
- // If the size of VT is at least half of the vector length,
- // widen the vector. Note: the threshold was not selected in
- // any scientific way.
- ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
- if (llvm::find(Tys, ElemTy) != Tys.end()) {
- unsigned HwWidth = 8*HwLen;
- unsigned VecWidth = VT.getSizeInBits();
- if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
- return TargetLoweringBase::TypeWidenVector;
- }
- // Split vectors of i1 that correspond to (byte) vector pairs.
- if (ElemTy == MVT::i1 && VecLen == 2*HwLen)
- return TargetLoweringBase::TypeSplitVector;
+ unsigned Action = getPreferredHvxVectorAction(VT);
+ if (Action != ~0u)
+ return static_cast<TargetLoweringBase::LegalizeTypeAction>(Action);
}
// Always widen (remaining) vectors of i1.
@@ -2229,26 +2233,33 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
SDValue
HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
const {
- if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) {
- if (SDValue S = BVN->getSplatValue()) {
- unsigned NewOpc;
- switch (Op.getOpcode()) {
- case ISD::SHL:
- NewOpc = HexagonISD::VASL;
- break;
- case ISD::SRA:
- NewOpc = HexagonISD::VASR;
- break;
- case ISD::SRL:
- NewOpc = HexagonISD::VLSR;
- break;
- default:
- llvm_unreachable("Unexpected shift opcode");
- }
- return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), S);
- }
+ unsigned NewOpc;
+ switch (Op.getOpcode()) {
+ case ISD::SHL:
+ NewOpc = HexagonISD::VASL;
+ break;
+ case ISD::SRA:
+ NewOpc = HexagonISD::VASR;
+ break;
+ case ISD::SRL:
+ NewOpc = HexagonISD::VLSR;
+ break;
+ default:
+ llvm_unreachable("Unexpected shift opcode");
}
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ const SDLoc &dl(Op);
+
+ switch (Op1.getOpcode()) {
+ case ISD::BUILD_VECTOR:
+ if (SDValue S = cast<BuildVectorSDNode>(Op1)->getSplatValue())
+ return DAG.getNode(NewOpc, dl, ty(Op), Op0, S);
+ break;
+ case ISD::SPLAT_VECTOR:
+ return DAG.getNode(NewOpc, dl, ty(Op), Op0, Op1.getOperand(0));
+ }
return SDValue();
}
@@ -2325,9 +2336,10 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
unsigned First, Num = Elem.size();
- for (First = 0; First != Num; ++First)
+ for (First = 0; First != Num; ++First) {
if (!isUndef(Elem[First]))
break;
+ }
if (First == Num)
return DAG.getUNDEF(VecTy);
@@ -2359,18 +2371,16 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
// Then try splat.
bool IsSplat = true;
- for (unsigned i = 0; i != Num; ++i) {
- if (i == First)
- continue;
+ for (unsigned i = First+1; i != Num; ++i) {
if (Elem[i] == Elem[First] || isUndef(Elem[i]))
continue;
IsSplat = false;
break;
}
if (IsSplat) {
- // Legalize the operand to VSPLAT.
+ // Legalize the operand of SPLAT_VECTOR.
SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
- return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
}
// Generate
@@ -2408,9 +2418,10 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
unsigned First, Num = Elem.size();
- for (First = 0; First != Num; ++First)
+ for (First = 0; First != Num; ++First) {
if (!isUndef(Elem[First]))
break;
+ }
if (First == Num)
return DAG.getUNDEF(VecTy);
@@ -2421,18 +2432,16 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
// First try splat if possible.
if (ElemTy == MVT::i16) {
bool IsSplat = true;
- for (unsigned i = 0; i != Num; ++i) {
- if (i == First)
- continue;
+ for (unsigned i = First+1; i != Num; ++i) {
if (Elem[i] == Elem[First] || isUndef(Elem[i]))
continue;
IsSplat = false;
break;
}
if (IsSplat) {
- // Legalize the operand to VSPLAT.
+ // Legalize the operand of SPLAT_VECTOR
SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
- return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
}
}
@@ -2650,7 +2659,7 @@ HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
unsigned W = Ty.getSizeInBits();
if (W <= 64)
return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
- return DAG.getNode(HexagonISD::VZERO, dl, Ty);
+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, Ty, getZero(dl, MVT::i32, DAG));
}
if (Ty.isInteger())
@@ -2661,6 +2670,28 @@ HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
}
SDValue
+HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
+ const {
+ MVT ValTy = ty(Val);
+ assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
+
+ unsigned ValLen = ValTy.getVectorNumElements();
+ unsigned ResLen = ResTy.getVectorNumElements();
+ if (ValLen == ResLen)
+ return Val;
+
+ const SDLoc &dl(Val);
+ assert(ValLen < ResLen);
+ assert(ResLen % ValLen == 0);
+
+ SmallVector<SDValue, 4> Concats = {Val};
+ for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
+ Concats.push_back(DAG.getUNDEF(ValTy));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats);
+}
+
+SDValue
HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
MVT VecTy = ty(Op);
unsigned BW = VecTy.getSizeInBits();
@@ -2910,8 +2941,10 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
DAG.getConstant(NeedAlign, dl, MVT::i32))
: BO.first;
- SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl);
- SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl);
+ SDValue Base0 =
+ DAG.getMemBasePlusOffset(BaseNoOff, TypeSize::Fixed(BO.second), dl);
+ SDValue Base1 = DAG.getMemBasePlusOffset(
+ BaseNoOff, TypeSize::Fixed(BO.second + LoadLen), dl);
MachineMemOperand *WideMMO = nullptr;
if (MachineMemOperand *MMO = LN->getMemOperand()) {
@@ -3023,7 +3056,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
return LowerINLINEASM(Op, DAG);
- if (isHvxOperation(Op)) {
+ if (isHvxOperation(Op.getNode(), DAG)) {
// If HVX lowering returns nothing, try the default lowering.
if (SDValue V = LowerHvxOperation(Op, DAG))
return V;
@@ -3084,7 +3117,7 @@ void
HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
- if (isHvxOperation(N)) {
+ if (isHvxOperation(N, DAG)) {
LowerHvxOperationWrapper(N, Results, DAG);
if (!Results.empty())
return;
@@ -3103,7 +3136,7 @@ void
HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
- if (isHvxOperation(N)) {
+ if (isHvxOperation(N, DAG)) {
ReplaceHvxNodeResults(N, Results, DAG);
if (!Results.empty())
return;
@@ -3118,10 +3151,12 @@ HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
// Handle a bitcast from v8i1 to i8.
if (N->getValueType(0) == MVT::i8) {
- SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
- N->getOperand(0), DAG);
- SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
- Results.push_back(T);
+ if (N->getOperand(0).getValueType() == MVT::v8i1) {
+ SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
+ N->getOperand(0), DAG);
+ SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
+ Results.push_back(T);
+ }
}
break;
}
@@ -3130,13 +3165,16 @@ HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue
HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
- SDValue Op(N, 0);
- if (isHvxOperation(Op)) {
+ if (isHvxOperation(N, DCI.DAG)) {
if (SDValue V = PerformHvxDAGCombine(N, DCI))
return V;
return SDValue();
}
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Op(N, 0);
const SDLoc &dl(Op);
unsigned Opc = Op.getOpcode();
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 7d6e6b6185c8..cfccb14a09c9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
#include "Hexagon.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -30,464 +31,478 @@ namespace llvm {
namespace HexagonISD {
- enum NodeType : unsigned {
- OP_BEGIN = ISD::BUILTIN_OP_END,
-
- CONST32 = OP_BEGIN,
- CONST32_GP, // For marking data present in GP.
- ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout).
- SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout).
- ALLOCA,
-
- AT_GOT, // Index in GOT.
- AT_PCREL, // Offset relative to PC.
-
- CALL, // Function call.
- CALLnr, // Function call that does not return.
- CALLR,
-
- RET_FLAG, // Return with a flag operand.
- BARRIER, // Memory barrier.
- JT, // Jump table.
- CP, // Constant pool.
-
- COMBINE,
- VSPLAT, // Generic splat, selection depends on argument/return
- // types.
- VASL,
- VASR,
- VLSR,
-
- TSTBIT,
- INSERT,
- EXTRACTU,
- VEXTRACTW,
- VINSERTW0,
- VROR,
- TC_RETURN,
- EH_RETURN,
- DCFETCH,
- READCYCLE,
- PTRUE,
- PFALSE,
- D2P, // Convert 8-byte value to 8-bit predicate register. [*]
- P2D, // Convert 8-bit predicate register to 8-byte value. [*]
- V2Q, // Convert HVX vector to a vector predicate reg. [*]
- Q2V, // Convert vector predicate to an HVX vector. [*]
- // [*] The equivalence is defined as "Q <=> (V != 0)",
- // where the != operation compares bytes.
- // Note: V != 0 is implemented as V >u 0.
- QCAT,
- QTRUE,
- QFALSE,
- VZERO,
- VSPLATW, // HVX splat of a 32-bit word with an arbitrary result type.
- TYPECAST, // No-op that's used to convert between different legal
- // types in a register.
- VALIGN, // Align two vectors (in Op0, Op1) to one that would have
- // been loaded from address in Op2.
- VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is
- // an address in a vector load, then it's a no-op.
- OP_END
- };
+enum NodeType : unsigned {
+ OP_BEGIN = ISD::BUILTIN_OP_END,
+
+ CONST32 = OP_BEGIN,
+ CONST32_GP, // For marking data present in GP.
+ ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout).
+ SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout).
+ ALLOCA,
+
+ AT_GOT, // Index in GOT.
+ AT_PCREL, // Offset relative to PC.
+
+ CALL, // Function call.
+ CALLnr, // Function call that does not return.
+ CALLR,
+
+ RET_FLAG, // Return with a flag operand.
+ BARRIER, // Memory barrier.
+ JT, // Jump table.
+ CP, // Constant pool.
+
+ COMBINE,
+ VASL,
+ VASR,
+ VLSR,
+
+ TSTBIT,
+ INSERT,
+ EXTRACTU,
+ VEXTRACTW,
+ VINSERTW0,
+ VROR,
+ TC_RETURN,
+ EH_RETURN,
+ DCFETCH,
+ READCYCLE,
+ PTRUE,
+ PFALSE,
+ D2P, // Convert 8-byte value to 8-bit predicate register. [*]
+ P2D, // Convert 8-bit predicate register to 8-byte value. [*]
+ V2Q, // Convert HVX vector to a vector predicate reg. [*]
+ Q2V, // Convert vector predicate to an HVX vector. [*]
+ // [*] The equivalence is defined as "Q <=> (V != 0)",
+ // where the != operation compares bytes.
+ // Note: V != 0 is implemented as V >u 0.
+ QCAT,
+ QTRUE,
+ QFALSE,
+ TYPECAST, // No-op that's used to convert between different legal
+ // types in a register.
+ VALIGN, // Align two vectors (in Op0, Op1) to one that would have
+ // been loaded from address in Op2.
+ VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is
+ // an address in a vector load, then it's a no-op.
+ VPACKL, // Pack low parts of the input vector to the front of the
+ // output. For example v64i16 VPACKL(v32i32) will pick
+ // the low halfwords and pack them into the first 32
+ // halfwords of the output. The rest of the output is
+ // unspecified.
+ VUNPACK, // Unpacking into low elements with sign extension.
+ VUNPACKU, // Unpacking into low elements with zero extension.
+ ISEL, // Marker for nodes that were created during ISel, and
+ // which need explicit selection (would have been left
+ // unselected otherwise).
+ OP_END
+};
} // end namespace HexagonISD
- class HexagonSubtarget;
-
- class HexagonTargetLowering : public TargetLowering {
- int VarArgsFrameOffset; // Frame offset to start of varargs area.
- const HexagonTargetMachine &HTM;
- const HexagonSubtarget &Subtarget;
-
- bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
- const;
-
- public:
- explicit HexagonTargetLowering(const TargetMachine &TM,
- const HexagonSubtarget &ST);
-
- bool isHVXVectorType(MVT Ty) const;
-
- /// IsEligibleForTailCallOptimization - Check whether the call is eligible
- /// for tail call optimization. Targets which want to do tail call
- /// optimization should implement this function.
- bool IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet,
- bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
-
- bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
- MachineFunction &MF,
- unsigned Intrinsic) const override;
-
- bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
- bool isTruncateFree(EVT VT1, EVT VT2) const override;
-
- bool isCheapToSpeculateCttz() const override { return true; }
- bool isCheapToSpeculateCtlz() const override { return true; }
- bool isCtlzFast() const override { return true; }
-
- bool hasBitTest(SDValue X, SDValue Y) const override;
-
- bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
-
- /// Return true if an FMA operation is faster than a pair of mul and add
- /// instructions. fmuladd intrinsics will be expanded to FMAs when this
- /// method returns true (and FMAs are legal), otherwise fmuladd is
- /// expanded to mul + add.
- bool isFMAFasterThanFMulAndFAdd(const MachineFunction &,
- EVT) const override;
-
- // Should we expand the build vector with shuffles?
- bool shouldExpandBuildVectorWithShuffles(EVT VT,
- unsigned DefinedValues) const override;
-
- bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
- TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
- const override;
-
- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const override;
- void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const override;
-
- const char *getTargetNodeName(unsigned Opcode) const override;
-
- SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
- SDValue
- LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const override;
- SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
- SelectionDAG &DAG) const;
- SDValue LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
- SelectionDAG &DAG) const;
- SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
- SelectionDAG &DAG) const;
- SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
- GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT,
- unsigned ReturnReg, unsigned char OperandFlags) const;
- SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const override;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- const SmallVectorImpl<SDValue> &OutVals,
- SDValue Callee) const;
-
- SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
- SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
-
- bool CanLowerReturn(CallingConv::ID CallConv,
- MachineFunction &MF, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const override;
-
- SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SDLoc &dl, SelectionDAG &DAG) const override;
-
- SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
-
- bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
-
- Register getRegisterByName(const char* RegName, LLT VT,
- const MachineFunction &MF) const override;
-
- /// If a physical register, this returns the register that receives the
- /// exception address on entry to an EH pad.
- Register
- getExceptionPointerRegister(const Constant *PersonalityFn) const override {
- return Hexagon::R0;
- }
-
- /// If a physical register, this returns the register that receives the
- /// exception typeid on entry to a landing pad.
- Register
- getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
- return Hexagon::R1;
- }
-
- SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
-
- EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
- EVT VT) const override {
- if (!VT.isVector())
- return MVT::i1;
- else
- return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
- }
-
- bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
- SDValue &Base, SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const override;
-
- ConstraintType getConstraintType(StringRef Constraint) const override;
-
- std::pair<unsigned, const TargetRegisterClass *>
- getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- StringRef Constraint, MVT VT) const override;
-
- unsigned
- getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
- if (ConstraintCode == "o")
- return InlineAsm::Constraint_o;
- return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
- }
-
- // Intrinsics
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
- /// isLegalAddressingMode - Return true if the addressing mode represented
- /// by AM is legal for this target, for a load/store of the specified type.
- /// The type may be VoidTy, in which case only return true if the addressing
- /// mode is legal for a load/store of any legal type.
- /// TODO: Handle pre/postinc as well.
- bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
- Type *Ty, unsigned AS,
- Instruction *I = nullptr) const override;
- /// Return true if folding a constant offset with the given GlobalAddress
- /// is legal. It is frequently not legal in PIC relocation models.
- bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
-
- bool isFPImmLegal(const APFloat &Imm, EVT VT,
- bool ForCodeSize) const override;
-
- /// isLegalICmpImmediate - Return true if the specified immediate is legal
- /// icmp immediate, that is the target has icmp instructions which can
- /// compare a register against the immediate without having to materialize
- /// the immediate into a register.
- bool isLegalICmpImmediate(int64_t Imm) const override;
-
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
-
- bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
- unsigned AddrSpace, Align Alignment,
- MachineMemOperand::Flags Flags,
- bool *Fast) const override;
-
- bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
- unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast)
- const override;
-
- /// Returns relocation base for the given PIC jumptable.
- SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
- const override;
-
- bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
- EVT NewVT) const override;
-
- // Handling of atomic RMW instructions.
- Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
- AtomicOrdering Ord) const override;
- Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
- Value *Addr, AtomicOrdering Ord) const override;
- AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
- bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- AtomicExpansionKind
- shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
-
- AtomicExpansionKind
- shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
- return AtomicExpansionKind::LLSC;
- }
-
- private:
- void initializeHVXLowering();
- void validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
- unsigned NeedAlign) const;
-
- std::pair<SDValue,int> getBaseAndOffset(SDValue Addr) const;
-
- bool getBuildVectorConstInts(ArrayRef<SDValue> Values, MVT VecTy,
- SelectionDAG &DAG,
- MutableArrayRef<ConstantInt*> Consts) const;
- SDValue buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy,
+class HexagonSubtarget;
+
+class HexagonTargetLowering : public TargetLowering {
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+ const HexagonTargetMachine &HTM;
+ const HexagonSubtarget &Subtarget;
+
+ bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
+ const;
+
+public:
+ explicit HexagonTargetLowering(const TargetMachine &TM,
+ const HexagonSubtarget &ST);
+
+ bool isHVXVectorType(MVT Ty) const;
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet,
+ bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
+
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const override;
+
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
+
+ bool isCheapToSpeculateCttz() const override { return true; }
+ bool isCheapToSpeculateCtlz() const override { return true; }
+ bool isCtlzFast() const override { return true; }
+
+ bool hasBitTest(SDValue X, SDValue Y) const override;
+
+ bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
+
+ /// Return true if an FMA operation is faster than a pair of mul and add
+ /// instructions. fmuladd intrinsics will be expanded to FMAs when this
+ /// method returns true (and FMAs are legal), otherwise fmuladd is
+ /// expanded to mul + add.
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &,
+ EVT) const override;
+
+ // Should we expand the build vector with shuffles?
+ bool shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const override;
+
+ bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
+ const override;
+
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
+ GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT,
+ unsigned ReturnReg, unsigned char OperandFlags) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const;
+
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+
+ bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SDLoc &dl, SelectionDAG &DAG) const override;
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+
+ Register getRegisterByName(const char* RegName, LLT VT,
+ const MachineFunction &MF) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ Register
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return Hexagon::R0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ Register
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return Hexagon::R1;
+ }
+
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+
+ EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
+ EVT VT) const override {
+ if (!VT.isVector())
+ return MVT::i1;
+ else
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
+ }
+
+ bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+
+ ConstraintType getConstraintType(StringRef Constraint) const override;
+
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+ if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
+ // Intrinsics
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ /// The type may be VoidTy, in which case only return true if the addressing
+ /// mode is legal for a load/store of any legal type.
+ /// TODO: Handle pre/postinc as well.
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS,
+ Instruction *I = nullptr) const override;
+ /// Return true if folding a constant offset with the given GlobalAddress
+ /// is legal. It is frequently not legal in PIC relocation models.
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+
+ EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
+
+ bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
+ unsigned AddrSpace, Align Alignment,
+ MachineMemOperand::Flags Flags,
+ bool *Fast) const override;
+
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
+ unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast)
+ const override;
+
+ /// Returns relocation base for the given PIC jumptable.
+ SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
+ const override;
+
+ bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
+ EVT NewVT) const override;
+
+ // Handling of atomic RMW instructions.
+ Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const override;
+ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr, AtomicOrdering Ord) const override;
+ AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+
+ AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
+ return AtomicExpansionKind::LLSC;
+ }
+
+private:
+ void initializeHVXLowering();
+ unsigned getPreferredHvxVectorAction(MVT VecTy) const;
+
+ void validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
+ unsigned NeedAlign) const;
+
+ std::pair<SDValue,int> getBaseAndOffset(SDValue Addr) const;
+
+ bool getBuildVectorConstInts(ArrayRef<SDValue> Values, MVT VecTy,
+ SelectionDAG &DAG,
+ MutableArrayRef<ConstantInt*> Consts) const;
+ SDValue buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy,
+ SelectionDAG &DAG) const;
+ SDValue buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy,
+ SelectionDAG &DAG) const;
+ SDValue extractVector(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+ MVT ValTy, MVT ResTy, SelectionDAG &DAG) const;
+ SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
+ const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const;
+ SDValue expandPredicate(SDValue Vec32, const SDLoc &dl,
SelectionDAG &DAG) const;
- SDValue buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy,
- SelectionDAG &DAG) const;
- SDValue extractVector(SDValue VecV, SDValue IdxV, const SDLoc &dl,
- MVT ValTy, MVT ResTy, SelectionDAG &DAG) const;
- SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
- const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const;
- SDValue expandPredicate(SDValue Vec32, const SDLoc &dl,
+ SDValue contractPredicate(SDValue Vec64, const SDLoc &dl,
SelectionDAG &DAG) const;
- SDValue contractPredicate(SDValue Vec64, const SDLoc &dl,
+ SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG) const;
+
+ bool isUndef(SDValue Op) const {
+ if (Op.isMachineOpcode())
+ return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
+ return Op.getOpcode() == ISD::UNDEF;
+ }
+ SDValue getInstr(unsigned MachineOpc, const SDLoc &dl, MVT Ty,
+ ArrayRef<SDValue> Ops, SelectionDAG &DAG) const {
+ SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops);
+ return SDValue(N, 0);
+ }
+ SDValue getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const;
+
+ using VectorPair = std::pair<SDValue, SDValue>;
+ using TypePair = std::pair<MVT, MVT>;
+
+ SDValue getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
+ const SDLoc &dl, SelectionDAG &DAG) const;
+
+ MVT ty(SDValue Op) const {
+ return Op.getValueType().getSimpleVT();
+ }
+ TypePair ty(const VectorPair &Ops) const {
+ return { Ops.first.getValueType().getSimpleVT(),
+ Ops.second.getValueType().getSimpleVT() };
+ }
+ MVT tyScalar(MVT Ty) const {
+ if (!Ty.isVector())
+ return Ty;
+ return MVT::getIntegerVT(Ty.getSizeInBits());
+ }
+ MVT tyVector(MVT Ty, MVT ElemTy) const {
+ if (Ty.isVector() && Ty.getVectorElementType() == ElemTy)
+ return Ty;
+ unsigned TyWidth = Ty.getSizeInBits();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+ assert((TyWidth % ElemWidth) == 0);
+ return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth);
+ }
+
+ MVT typeJoin(const TypePair &Tys) const;
+ TypePair typeSplit(MVT Ty) const;
+ MVT typeExtElem(MVT VecTy, unsigned Factor) const;
+ MVT typeTruncElem(MVT VecTy, unsigned Factor) const;
+
+ SDValue opJoin(const VectorPair &Ops, const SDLoc &dl,
+ SelectionDAG &DAG) const;
+ VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const;
+ SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;
+
+ bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags,
+ bool *Fast) const;
+ bool allowsHvxMisalignedMemoryAccesses(MVT VecTy,
+ MachineMemOperand::Flags Flags,
+ bool *Fast) const;
+
+ bool isHvxSingleTy(MVT Ty) const;
+ bool isHvxPairTy(MVT Ty) const;
+ bool isHvxBoolTy(MVT Ty) const;
+ SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
+ SelectionDAG &DAG) const;
+ SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const;
+ SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1,
+ ArrayRef<int> Mask, SelectionDAG &DAG) const;
+
+ SDValue buildHvxVectorReg(ArrayRef<SDValue> Values, const SDLoc &dl,
+ MVT VecTy, SelectionDAG &DAG) const;
+ SDValue buildHvxVectorPred(ArrayRef<SDValue> Values, const SDLoc &dl,
+ MVT VecTy, SelectionDAG &DAG) const;
+ SDValue createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
+ unsigned BitBytes, bool ZeroFill,
SelectionDAG &DAG) const;
- SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const;
-
- bool isUndef(SDValue Op) const {
- if (Op.isMachineOpcode())
- return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
- return Op.getOpcode() == ISD::UNDEF;
- }
- SDValue getInstr(unsigned MachineOpc, const SDLoc &dl, MVT Ty,
- ArrayRef<SDValue> Ops, SelectionDAG &DAG) const {
- SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops);
- return SDValue(N, 0);
- }
- SDValue getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const;
-
- using VectorPair = std::pair<SDValue, SDValue>;
- using TypePair = std::pair<MVT, MVT>;
-
- SDValue getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
- const SDLoc &dl, SelectionDAG &DAG) const;
-
- MVT ty(SDValue Op) const {
- return Op.getValueType().getSimpleVT();
- }
- TypePair ty(const VectorPair &Ops) const {
- return { Ops.first.getValueType().getSimpleVT(),
- Ops.second.getValueType().getSimpleVT() };
- }
- MVT tyScalar(MVT Ty) const {
- if (!Ty.isVector())
- return Ty;
- return MVT::getIntegerVT(Ty.getSizeInBits());
- }
- MVT tyVector(MVT Ty, MVT ElemTy) const {
- if (Ty.isVector() && Ty.getVectorElementType() == ElemTy)
- return Ty;
- unsigned TyWidth = Ty.getSizeInBits();
- unsigned ElemWidth = ElemTy.getSizeInBits();
- assert((TyWidth % ElemWidth) == 0);
- return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth);
- }
-
- MVT typeJoin(const TypePair &Tys) const;
- TypePair typeSplit(MVT Ty) const;
- MVT typeExtElem(MVT VecTy, unsigned Factor) const;
- MVT typeTruncElem(MVT VecTy, unsigned Factor) const;
-
- SDValue opJoin(const VectorPair &Ops, const SDLoc &dl,
- SelectionDAG &DAG) const;
- VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const;
- SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;
-
- bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags,
- bool *Fast) const;
- bool allowsHvxMisalignedMemoryAccesses(MVT VecTy,
- MachineMemOperand::Flags Flags,
- bool *Fast) const;
-
- bool isHvxSingleTy(MVT Ty) const;
- bool isHvxPairTy(MVT Ty) const;
- bool isHvxBoolTy(MVT Ty) const;
- SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
- SelectionDAG &DAG) const;
- SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const;
- SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1,
- ArrayRef<int> Mask, SelectionDAG &DAG) const;
-
- SDValue buildHvxVectorReg(ArrayRef<SDValue> Values, const SDLoc &dl,
- MVT VecTy, SelectionDAG &DAG) const;
- SDValue buildHvxVectorPred(ArrayRef<SDValue> Values, const SDLoc &dl,
- MVT VecTy, SelectionDAG &DAG) const;
- SDValue createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
- unsigned BitBytes, bool ZeroFill,
- SelectionDAG &DAG) const;
- SDValue extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+ SDValue extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+ MVT ResTy, SelectionDAG &DAG) const;
+ SDValue extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+ MVT ResTy, SelectionDAG &DAG) const;
+ SDValue insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV,
+ const SDLoc &dl, SelectionDAG &DAG) const;
+ SDValue insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV,
+ const SDLoc &dl, SelectionDAG &DAG) const;
+ SDValue extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl,
MVT ResTy, SelectionDAG &DAG) const;
- SDValue extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+ SDValue extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl,
MVT ResTy, SelectionDAG &DAG) const;
- SDValue insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV,
+ SDValue insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV,
const SDLoc &dl, SelectionDAG &DAG) const;
- SDValue insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV,
+ SDValue insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV,
const SDLoc &dl, SelectionDAG &DAG) const;
- SDValue extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl,
- MVT ResTy, SelectionDAG &DAG) const;
- SDValue extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl,
- MVT ResTy, SelectionDAG &DAG) const;
- SDValue insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV,
- const SDLoc &dl, SelectionDAG &DAG) const;
- SDValue insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV,
- const SDLoc &dl, SelectionDAG &DAG) const;
- SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy,
- bool ZeroExt, SelectionDAG &DAG) const;
- SDValue compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy,
- SelectionDAG &DAG) const;
+ SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy,
+ bool ZeroExt, SelectionDAG &DAG) const;
+ SDValue compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy,
+ SelectionDAG &DAG) const;
- SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxStore(SDValue Op, SelectionDAG &DAG) const;
- SDValue HvxVecPredBitcastComputation(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
- SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const;
-
- std::pair<const TargetRegisterClass*, uint8_t>
- findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
- const override;
-
- bool isHvxOperation(SDValue Op) const;
- bool isHvxOperation(SDNode *N) const;
- SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const;
- void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
- void ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
- SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- };
+ SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const;
+ SDValue WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const;
+
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
+ const override;
+
+ bool shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const;
+ bool isHvxOperation(SDNode *N, SelectionDAG &DAG) const;
+ SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const;
+ void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+ void ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+ SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 7cda915fffe9..29b75814da6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -14,6 +14,10 @@
using namespace llvm;
+static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
+ cl::Hidden, cl::init(16),
+ cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
+
static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
@@ -87,17 +91,28 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::XOR, T, Legal);
setOperationAction(ISD::ADD, T, Legal);
setOperationAction(ISD::SUB, T, Legal);
+ setOperationAction(ISD::MUL, T, Legal);
setOperationAction(ISD::CTPOP, T, Legal);
setOperationAction(ISD::CTLZ, T, Legal);
+ setOperationAction(ISD::SELECT, T, Legal);
+ setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
if (T != ByteV) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
setOperationAction(ISD::BSWAP, T, Legal);
}
+ setOperationAction(ISD::SMIN, T, Legal);
+ setOperationAction(ISD::SMAX, T, Legal);
+ if (T.getScalarType() != MVT::i32) {
+ setOperationAction(ISD::UMIN, T, Legal);
+ setOperationAction(ISD::UMAX, T, Legal);
+ }
+
setOperationAction(ISD::CTTZ, T, Custom);
setOperationAction(ISD::LOAD, T, Custom);
- setOperationAction(ISD::MUL, T, Custom);
+ setOperationAction(ISD::MLOAD, T, Custom);
+ setOperationAction(ISD::MSTORE, T, Custom);
setOperationAction(ISD::MULHS, T, Custom);
setOperationAction(ISD::MULHU, T, Custom);
setOperationAction(ISD::BUILD_VECTOR, T, Custom);
@@ -147,9 +162,12 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
+ setOperationAction(ISD::SPLAT_VECTOR, T, Custom);
setOperationAction(ISD::LOAD, T, Custom);
setOperationAction(ISD::STORE, T, Custom);
+ setOperationAction(ISD::MLOAD, T, Custom);
+ setOperationAction(ISD::MSTORE, T, Custom);
setOperationAction(ISD::CTLZ, T, Custom);
setOperationAction(ISD::CTTZ, T, Custom);
setOperationAction(ISD::CTPOP, T, Custom);
@@ -172,6 +190,13 @@ HexagonTargetLowering::initializeHVXLowering() {
// Promote all shuffles to operate on vectors of bytes.
setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
}
+
+ setOperationAction(ISD::SMIN, T, Custom);
+ setOperationAction(ISD::SMAX, T, Custom);
+ if (T.getScalarType() != MVT::i32) {
+ setOperationAction(ISD::UMIN, T, Custom);
+ setOperationAction(ISD::UMAX, T, Custom);
+ }
}
// Boolean vectors.
@@ -188,6 +213,9 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::AND, BoolW, Custom);
setOperationAction(ISD::OR, BoolW, Custom);
setOperationAction(ISD::XOR, BoolW, Custom);
+ // Masked load/store takes a mask that may need splitting.
+ setOperationAction(ISD::MLOAD, BoolW, Custom);
+ setOperationAction(ISD::MSTORE, BoolW, Custom);
}
for (MVT T : LegalV) {
@@ -198,6 +226,7 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom);
+ setOperationAction(ISD::SELECT, BoolV, Custom);
setOperationAction(ISD::AND, BoolV, Legal);
setOperationAction(ISD::OR, BoolV, Legal);
setOperationAction(ISD::XOR, BoolV, Legal);
@@ -211,16 +240,82 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
}
+ // Handle store widening for short vectors.
+ unsigned HwLen = Subtarget.getVectorLength();
+ for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
+ if (ElemTy == MVT::i1)
+ continue;
+ int ElemWidth = ElemTy.getFixedSizeInBits();
+ int MaxElems = (8*HwLen) / ElemWidth;
+ for (int N = 2; N < MaxElems; N *= 2) {
+ MVT VecTy = MVT::getVectorVT(ElemTy, N);
+ auto Action = getPreferredVectorAction(VecTy);
+ if (Action == TargetLoweringBase::TypeWidenVector) {
+ setOperationAction(ISD::LOAD, VecTy, Custom);
+ setOperationAction(ISD::STORE, VecTy, Custom);
+ setOperationAction(ISD::SETCC, VecTy, Custom);
+ setOperationAction(ISD::TRUNCATE, VecTy, Custom);
+ setOperationAction(ISD::ANY_EXTEND, VecTy, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom);
+
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
+ if (!isTypeLegal(BoolTy))
+ setOperationAction(ISD::SETCC, BoolTy, Custom);
+ }
+ }
+ }
+
+ setTargetDAGCombine(ISD::SPLAT_VECTOR);
setTargetDAGCombine(ISD::VSELECT);
}
+unsigned
+HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
+ MVT ElemTy = VecTy.getVectorElementType();
+ unsigned VecLen = VecTy.getVectorNumElements();
+ unsigned HwLen = Subtarget.getVectorLength();
+
+ // Split vectors of i1 that exceed byte vector length.
+ if (ElemTy == MVT::i1 && VecLen > HwLen)
+ return TargetLoweringBase::TypeSplitVector;
+
+ ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
+ // For shorter vectors of i1, widen them if any of the corresponding
+ // vectors of integers needs to be widened.
+ if (ElemTy == MVT::i1) {
+ for (MVT T : Tys) {
+ assert(T != MVT::i1);
+ auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
+ if (A != ~0u)
+ return A;
+ }
+ return ~0u;
+ }
+
+ // If the size of VecTy is at least half of the vector length,
+ // widen the vector. Note: the threshold was not selected in
+ // any scientific way.
+ if (llvm::is_contained(Tys, ElemTy)) {
+ unsigned VecWidth = VecTy.getSizeInBits();
+ bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
+ if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
+ return TargetLoweringBase::TypeWidenVector;
+ unsigned HwWidth = 8*HwLen;
+ if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
+ return TargetLoweringBase::TypeWidenVector;
+ }
+
+ // Defer to default.
+ return ~0u;
+}
+
SDValue
HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
const SDLoc &dl, SelectionDAG &DAG) const {
SmallVector<SDValue,4> IntOps;
IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
- for (const SDValue &Op : Ops)
- IntOps.push_back(Op);
+ append_range(IntOps, Ops);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
}
@@ -432,7 +527,9 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode());
if (IdxN && IdxN->isNullValue())
return getZero(dl, VecTy, DAG);
- return DAG.getNode(HexagonISD::VSPLATW, dl, VecTy, SplatV);
+ MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
+ SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
+ return DAG.getBitcast(VecTy, S);
}
// Delay recognizing constant vectors until here, so that we can generate
@@ -571,6 +668,9 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
if (!ZeroFill)
return S;
// Fill the bytes beyond BlockLen with 0s.
+ // V6_pred_scalar2 cannot fill the entire predicate, so it only works
+ // when BlockLen < HwLen.
+ assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
{DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
@@ -1034,6 +1134,7 @@ HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
// ByteVec is the target vector VecV rotated in such a way that the
// subvector should be inserted at index 0. Generate a predicate mask
// and use vmux to do the insertion.
+ assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
{DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
@@ -1058,7 +1159,7 @@ HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
- SDValue True = DAG.getNode(HexagonISD::VSPLAT, dl, ResTy,
+ SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
DAG.getConstant(1, dl, MVT::i32));
SDValue False = getZero(dl, ResTy, DAG);
return DAG.getSelect(dl, ResTy, VecV, True, False);
@@ -1180,12 +1281,19 @@ HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
continue;
}
// A few less complicated cases.
- if (V.getOpcode() == ISD::Constant)
- Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy);
- else if (V.isUndef())
- Elems[i] = DAG.getUNDEF(NTy);
- else
- llvm_unreachable("Unexpected vector element");
+ switch (V.getOpcode()) {
+ case ISD::Constant:
+ Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy);
+ break;
+ case ISD::UNDEF:
+ Elems[i] = DAG.getUNDEF(NTy);
+ break;
+ case ISD::TRUNCATE:
+ Elems[i] = V.getOperand(0);
+ break;
+ default:
+ llvm_unreachable("Unexpected vector element");
+ }
}
}
return DAG.getBuildVector(VecTy, dl, Elems);
@@ -1346,19 +1454,14 @@ HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
// Calculate the vectors of 1 and bitwidth(x).
MVT ElemTy = ty(InpV).getVectorElementType();
unsigned ElemWidth = ElemTy.getSizeInBits();
- // Using uint64_t because a shift by 32 can happen.
- uint64_t Splat1 = 0, SplatW = 0;
- assert(isPowerOf2_32(ElemWidth) && ElemWidth <= 32);
- for (unsigned i = 0; i != 32/ElemWidth; ++i) {
- Splat1 = (Splat1 << ElemWidth) | 1;
- SplatW = (SplatW << ElemWidth) | ElemWidth;
- }
- SDValue Vec1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy,
- DAG.getConstant(uint32_t(Splat1), dl, MVT::i32));
- SDValue VecW = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy,
- DAG.getConstant(uint32_t(SplatW), dl, MVT::i32));
- SDValue VecN1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy,
+
+ SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
+ DAG.getConstant(1, dl, MVT::i32));
+ SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
+ DAG.getConstant(ElemWidth, dl, MVT::i32));
+ SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
DAG.getConstant(-1, dl, MVT::i32));
+
// Do not use DAG.getNOT, because that would create BUILD_VECTOR with
// a BITCAST. Here we can skip the BITCAST (so we don't have to handle
// it separately in custom combine or selection).
@@ -1370,60 +1473,6 @@ HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
-HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
- MVT ResTy = ty(Op);
- assert(ResTy.isVector() && isHvxSingleTy(ResTy));
- const SDLoc &dl(Op);
- SmallVector<int,256> ShuffMask;
-
- MVT ElemTy = ResTy.getVectorElementType();
- unsigned VecLen = ResTy.getVectorNumElements();
- SDValue Vs = Op.getOperand(0);
- SDValue Vt = Op.getOperand(1);
-
- switch (ElemTy.SimpleTy) {
- case MVT::i8: {
- // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
- // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
- // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
- MVT ExtTy = typeExtElem(ResTy, 2);
- unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv
- : Hexagon::V6_vmpyhv;
- SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
-
- // Discard high halves of the resulting values, collect the low halves.
- for (unsigned I = 0; I < VecLen; I += 2) {
- ShuffMask.push_back(I); // Pick even element.
- ShuffMask.push_back(I+VecLen); // Pick odd element.
- }
- VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
- SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
- return DAG.getBitcast(ResTy, BS);
- }
- case MVT::i16:
- // For i16 there is V6_vmpyih, which acts exactly like the MUL opcode.
- // (There is also V6_vmpyhv, which behaves in an analogous way to
- // V6_vmpybv.)
- return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG);
- case MVT::i32: {
- // Use the following sequence for signed word multiply:
- // T0 = V6_vmpyiowh Vs, Vt
- // T1 = V6_vaslw T0, 16
- // T2 = V6_vmpyiewuh_acc T1, Vs, Vt
- SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
- SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG);
- SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG);
- SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
- {T1, Vs, Vt}, DAG);
- return T2;
- }
- default:
- break;
- }
- return SDValue();
-}
-
-SDValue
HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
MVT ResTy = ty(Op);
assert(ResTy.isVector());
@@ -1462,7 +1511,7 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
assert(ElemTy == MVT::i32);
SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
- if (IsSigned) {
+ auto MulHS_V60 = [&](SDValue Vs, SDValue Vt) {
// mulhs(Vs,Vt) =
// = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32
// = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16
@@ -1489,6 +1538,20 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
// Add:
SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2});
return T3;
+ };
+
+ auto MulHS_V62 = [&](SDValue Vs, SDValue Vt) {
+ MVT PairTy = typeJoin({ResTy, ResTy});
+ SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {Vs, Vt}, DAG);
+ SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy,
+ {T0, Vs, Vt}, DAG);
+ return opSplit(T1, dl, DAG).second;
+ };
+
+ if (IsSigned) {
+ if (Subtarget.useHVXV62Ops())
+ return MulHS_V62(Vs, Vt);
+ return MulHS_V60(Vs, Vt);
}
// Unsigned mulhw. (Would expansion using signed mulhw be better?)
@@ -1585,6 +1648,26 @@ HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
+HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
+ MVT ResTy = ty(Op);
+ if (ResTy.getVectorElementType() != MVT::i1)
+ return Op;
+
+ const SDLoc &dl(Op);
+ unsigned HwLen = Subtarget.getVectorLength();
+ unsigned VecLen = ResTy.getVectorNumElements();
+ assert(HwLen % VecLen == 0);
+ unsigned ElemSize = HwLen / VecLen;
+
+ MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
+ SDValue S =
+ DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
+ DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
+ DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
+ return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
+}
+
+SDValue
HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
if (SDValue S = getVectorShiftByInt(Op, DAG))
return S;
@@ -1593,7 +1676,7 @@ HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
- const SDLoc &dl(Op);
+ const SDLoc &dl(Op);
MVT ResTy = ty(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -1614,6 +1697,76 @@ HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
+HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ unsigned HwLen = Subtarget.getVectorLength();
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
+ SDValue Mask = MaskN->getMask();
+ SDValue Chain = MaskN->getChain();
+ SDValue Base = MaskN->getBasePtr();
+ auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
+
+ unsigned Opc = Op->getOpcode();
+ assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
+
+ if (Opc == ISD::MLOAD) {
+ MVT ValTy = ty(Op);
+ SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
+ SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
+ if (isUndef(Thru))
+ return Load;
+ SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
+ return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
+ }
+
+ // MSTORE
+ // HVX only has aligned masked stores.
+
+ // TODO: Fold negations of the mask into the store.
+ unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
+ SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
+ SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
+
+ if (MaskN->getAlign().value() % HwLen == 0) {
+ SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
+ {Mask, Base, Offset0, Value, Chain}, DAG);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
+ return Store;
+ }
+
+ // Unaligned case.
+ auto StoreAlign = [&](SDValue V, SDValue A) {
+ SDValue Z = getZero(dl, ty(V), DAG);
+ // TODO: use funnel shifts?
+ // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
+ // upper half.
+ SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
+ SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
+ return std::make_pair(LoV, HiV);
+ };
+
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
+ SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
+ VectorPair Tmp = StoreAlign(MaskV, Base);
+ VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
+ DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
+ VectorPair ValueU = StoreAlign(Value, Base);
+
+ SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
+ SDValue StoreLo =
+ getInstr(StoreOpc, dl, MVT::Other,
+ {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
+ SDValue StoreHi =
+ getInstr(StoreOpc, dl, MVT::Other,
+ {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
+ DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
+}
+
+SDValue
HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
assert(!Op.isMachineOpcode());
SmallVector<SDValue,2> OpsL, OpsH;
@@ -1648,45 +1801,252 @@ HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
- LSBaseSDNode *BN = cast<LSBaseSDNode>(Op.getNode());
- assert(BN->isUnindexed());
- MVT MemTy = BN->getMemoryVT().getSimpleVT();
+ auto *MemN = cast<MemSDNode>(Op.getNode());
+
+ MVT MemTy = MemN->getMemoryVT().getSimpleVT();
if (!isHvxPairTy(MemTy))
return Op;
const SDLoc &dl(Op);
unsigned HwLen = Subtarget.getVectorLength();
MVT SingleTy = typeSplit(MemTy).first;
- SDValue Chain = BN->getChain();
- SDValue Base0 = BN->getBasePtr();
- SDValue Base1 = DAG.getMemBasePlusOffset(Base0, HwLen, dl);
+ SDValue Chain = MemN->getChain();
+ SDValue Base0 = MemN->getBasePtr();
+ SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl);
MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
- if (MachineMemOperand *MMO = BN->getMemOperand()) {
+ if (MachineMemOperand *MMO = MemN->getMemOperand()) {
MachineFunction &MF = DAG.getMachineFunction();
MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen);
MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen);
}
- unsigned MemOpc = BN->getOpcode();
- SDValue NewOp;
+ unsigned MemOpc = MemN->getOpcode();
if (MemOpc == ISD::LOAD) {
+ assert(cast<LoadSDNode>(Op)->isUnindexed());
SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
- NewOp = DAG.getMergeValues(
- { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
- DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Load0.getValue(1), Load1.getValue(1)) }, dl);
- } else {
- assert(MemOpc == ISD::STORE);
+ return DAG.getMergeValues(
+ { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Load0.getValue(1), Load1.getValue(1)) }, dl);
+ }
+ if (MemOpc == ISD::STORE) {
+ assert(cast<StoreSDNode>(Op)->isUnindexed());
VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
- NewOp = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
}
- return NewOp;
+ assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
+
+ auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
+ assert(MaskN->isUnindexed());
+ VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
+ SDValue Offset = DAG.getUNDEF(MVT::i32);
+
+ if (MemOpc == ISD::MLOAD) {
+ VectorPair Thru =
+ opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
+ SDValue MLoad0 =
+ DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
+ Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
+ ISD::NON_EXTLOAD, false);
+ SDValue MLoad1 =
+ DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
+ Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
+ ISD::NON_EXTLOAD, false);
+ return DAG.getMergeValues(
+ { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
+ }
+ if (MemOpc == ISD::MSTORE) {
+ VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
+ SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
+ Masks.first, SingleTy, MOp0,
+ ISD::UNINDEXED, false, false);
+ SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
+ Masks.second, SingleTy, MOp1,
+ ISD::UNINDEXED, false, false);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
+ }
+
+ std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
+ llvm_unreachable(Name.c_str());
+}
+
+SDValue
+HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ auto *LoadN = cast<LoadSDNode>(Op.getNode());
+ assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
+ assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
+ "Not widening loads of i1 yet");
+
+ SDValue Chain = LoadN->getChain();
+ SDValue Base = LoadN->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(MVT::i32);
+
+ MVT ResTy = ty(Op);
+ unsigned HwLen = Subtarget.getVectorLength();
+ unsigned ResLen = ResTy.getStoreSize();
+ assert(ResLen < HwLen && "vsetq(v1) prerequisite");
+
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
+ SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
+ {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
+
+ MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
+
+ SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
+ DAG.getUNDEF(LoadTy), LoadTy, MemOp,
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
+ SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
+ return DAG.getMergeValues({Value, Chain}, dl);
+}
+
+SDValue
+HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ auto *StoreN = cast<StoreSDNode>(Op.getNode());
+ assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
+ assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
+ "Not widening stores of i1 yet");
+
+ SDValue Chain = StoreN->getChain();
+ SDValue Base = StoreN->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(MVT::i32);
+
+ SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
+ MVT ValueTy = ty(Value);
+ unsigned ValueLen = ValueTy.getVectorNumElements();
+ unsigned HwLen = Subtarget.getVectorLength();
+ assert(isPowerOf2_32(ValueLen));
+
+ for (unsigned Len = ValueLen; Len < HwLen; ) {
+ Value = opJoin({DAG.getUNDEF(ty(Value)), Value}, dl, DAG);
+ Len = ty(Value).getVectorNumElements(); // This is Len *= 2
+ }
+ assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
+
+ assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
+ SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
+ {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
+ return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
+ MemOp, ISD::UNINDEXED, false, false);
+}
+
+SDValue
+HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ MVT ElemTy = ty(Op0).getVectorElementType();
+ unsigned HwLen = Subtarget.getVectorLength();
+
+ unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
+ assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
+ MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
+
+ SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
+ SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
+ EVT ResTy =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
+ SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
+ {WideOp0, WideOp1, Op.getOperand(2)});
+
+ EVT RetTy = getTypeToTransformTo(*DAG.getContext(), ty(Op));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
+ {SetCC, getZero(dl, MVT::i32, DAG)});
+}
+
+SDValue
+HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ unsigned HwWidth = 8*Subtarget.getVectorLength();
+
+ SDValue Op0 = Op.getOperand(0);
+ MVT ResTy = ty(Op);
+ MVT OpTy = ty(Op0);
+ if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
+ return SDValue();
+
+ // .-res, op-> ScalarVec Illegal HVX
+ // Scalar ok - -
+ // Illegal widen(insert) widen -
+ // HVX - widen ok
+
+ auto getFactor = [HwWidth](MVT Ty) {
+ unsigned Width = Ty.getSizeInBits();
+ return HwWidth > Width ? HwWidth / Width : 1;
+ };
+
+ auto getWideTy = [getFactor](MVT Ty) {
+ unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
+ return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
+ };
+
+ unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK
+ : HexagonISD::VUNPACKU;
+ SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
+ SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp);
+ return WideRes;
+}
+
+SDValue
+HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ unsigned HwWidth = 8*Subtarget.getVectorLength();
+
+ SDValue Op0 = Op.getOperand(0);
+ MVT ResTy = ty(Op);
+ MVT OpTy = ty(Op0);
+ if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
+ return SDValue();
+
+ // .-res, op-> ScalarVec Illegal HVX
+ // Scalar ok extract(widen) -
+ // Illegal - widen widen
+ // HVX - - ok
+
+ auto getFactor = [HwWidth](MVT Ty) {
+ unsigned Width = Ty.getSizeInBits();
+ assert(HwWidth % Width == 0);
+ return HwWidth / Width;
+ };
+
+ auto getWideTy = [getFactor](MVT Ty) {
+ unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
+ return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
+ };
+
+ if (Subtarget.isHVXVectorType(OpTy))
+ return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0);
+
+ assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?");
+
+ SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
+ SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy),
+ WideOp);
+ // If the original result wasn't legal and was supposed to be widened,
+ // we're done.
+ if (shouldWidenToHvx(ResTy, DAG))
+ return WideRes;
+
+ // The original result type wasn't meant to be widened to HVX, so
+ // leave it as it is. Standard legalization should be able to deal
+ // with it (since now it's a result of a target-idendependent ISD
+ // node).
+ assert(ResTy.isVector());
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy,
+ {WideRes, getZero(dl, MVT::i32, DAG)});
}
SDValue
@@ -1703,6 +2063,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
break;
case ISD::LOAD:
case ISD::STORE:
+ case ISD::MLOAD:
+ case ISD::MSTORE:
return SplitHvxMemOp(Op, DAG);
case ISD::CTPOP:
case ISD::CTLZ:
@@ -1716,11 +2078,16 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA:
case ISD::SHL:
case ISD::SRL:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
case ISD::SETCC:
case ISD::VSELECT:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND_INREG:
+ case ISD::SPLAT_VECTOR:
return SplitHvxPairOp(Op, DAG);
}
}
@@ -1739,16 +2106,18 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
+ case ISD::SELECT: return LowerHvxSelect(Op, DAG);
case ISD::SRA:
case ISD::SHL:
case ISD::SRL: return LowerHvxShift(Op, DAG);
- case ISD::MUL: return LowerHvxMul(Op, DAG);
case ISD::MULHS:
case ISD::MULHU: return LowerHvxMulh(Op, DAG);
case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
case ISD::SETCC:
case ISD::INTRINSIC_VOID: return Op;
case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
+ case ISD::MLOAD:
+ case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
// Unaligned loads will be handled by the default lowering.
case ISD::LOAD: return SDValue();
}
@@ -1761,13 +2130,91 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
void
HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+ unsigned Opc = N->getOpcode();
+ SDValue Op(N, 0);
+
+ switch (Opc) {
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) {
+ if (SDValue T = WidenHvxExtend(Op, DAG))
+ Results.push_back(T);
+ }
+ break;
+ case ISD::SETCC:
+ if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) {
+ if (SDValue T = WidenHvxSetCC(Op, DAG))
+ Results.push_back(T);
+ }
+ break;
+ case ISD::TRUNCATE:
+ if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) {
+ if (SDValue T = WidenHvxTruncate(Op, DAG))
+ Results.push_back(T);
+ }
+ break;
+ case ISD::STORE: {
+ if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
+ SDValue Store = WidenHvxStore(Op, DAG);
+ Results.push_back(Store);
+ }
+ break;
+ }
+ case ISD::MLOAD:
+ if (isHvxPairTy(ty(Op))) {
+ SDValue S = SplitHvxMemOp(Op, DAG);
+ assert(S->getOpcode() == ISD::MERGE_VALUES);
+ Results.push_back(S.getOperand(0));
+ Results.push_back(S.getOperand(1));
+ }
+ break;
+ case ISD::MSTORE:
+ if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
+ SDValue S = SplitHvxMemOp(Op, DAG);
+ Results.push_back(S);
+ }
+ break;
+ default:
+ break;
+ }
}
void
HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
unsigned Opc = N->getOpcode();
+ SDValue Op(N, 0);
switch (Opc) {
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ if (shouldWidenToHvx(ty(Op), DAG)) {
+ if (SDValue T = WidenHvxExtend(Op, DAG))
+ Results.push_back(T);
+ }
+ break;
+ case ISD::SETCC:
+ if (shouldWidenToHvx(ty(Op), DAG)) {
+ if (SDValue T = WidenHvxSetCC(Op, DAG))
+ Results.push_back(T);
+ }
+ break;
+ case ISD::TRUNCATE:
+ if (shouldWidenToHvx(ty(Op), DAG)) {
+ if (SDValue T = WidenHvxTruncate(Op, DAG))
+ Results.push_back(T);
+ }
+ break;
+ case ISD::LOAD: {
+ if (shouldWidenToHvx(ty(Op), DAG)) {
+ SDValue Load = WidenHvxLoad(Op, DAG);
+ assert(Load->getOpcode() == ISD::MERGE_VALUES);
+ Results.push_back(Load.getOperand(0));
+ Results.push_back(Load.getOperand(1));
+ }
+ break;
+ }
case ISD::BITCAST:
if (isHvxBoolTy(ty(N->getOperand(0)))) {
SDValue Op(N, 0);
@@ -1784,44 +2231,95 @@ SDValue
HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
const SDLoc &dl(N);
+ SelectionDAG &DAG = DCI.DAG;
SDValue Op(N, 0);
-
unsigned Opc = Op.getOpcode();
- if (Opc == ISD::VSELECT) {
- // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
- SDValue Cond = Op.getOperand(0);
- if (Cond->getOpcode() == ISD::XOR) {
- SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
- if (C1->getOpcode() == HexagonISD::QTRUE) {
- SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
- Op.getOperand(2), Op.getOperand(1));
- return VSel;
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SmallVector<SDValue, 4> Ops(N->ops().begin(), N->ops().end());
+
+ switch (Opc) {
+ case ISD::VSELECT: {
+ // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
+ SDValue Cond = Ops[0];
+ if (Cond->getOpcode() == ISD::XOR) {
+ SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
+ if (C1->getOpcode() == HexagonISD::QTRUE)
+ return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
+ }
+ break;
+ }
+ case HexagonISD::V2Q:
+ if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
+ if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
+ return C->isNullValue() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
+ : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
+ }
+ break;
+ case HexagonISD::Q2V:
+ if (Ops[0].getOpcode() == HexagonISD::QTRUE)
+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
+ DAG.getConstant(-1, dl, MVT::i32));
+ if (Ops[0].getOpcode() == HexagonISD::QFALSE)
+ return getZero(dl, ty(Op), DAG);
+ break;
+ case HexagonISD::VINSERTW0:
+ if (isUndef(Ops[1]))
+ return Ops[0];;
+ break;
+ case HexagonISD::VROR: {
+ if (Ops[0].getOpcode() == HexagonISD::VROR) {
+ SDValue Vec = Ops[0].getOperand(0);
+ SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
+ SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
+ return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
}
+ break;
}
}
+
return SDValue();
}
bool
-HexagonTargetLowering::isHvxOperation(SDValue Op) const {
- // If the type of the result, or any operand type are HVX vector types,
- // this is an HVX operation.
- return Subtarget.isHVXVectorType(ty(Op), true) ||
- llvm::any_of(Op.getNode()->ops(),
- [this] (SDValue V) {
- return Subtarget.isHVXVectorType(ty(V), true);
- });
+HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
+ auto Action = getPreferredHvxVectorAction(Ty);
+ if (Action == TargetLoweringBase::TypeWidenVector) {
+ EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty);
+ assert(WideTy.isSimple());
+ return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true);
+ }
+ return false;
}
bool
-HexagonTargetLowering::isHvxOperation(SDNode *N) const {
+HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
+ if (!Subtarget.useHVXOps())
+ return false;
// If the type of any result, or any operand type are HVX vector types,
// this is an HVX operation.
- auto IsHvxTy = [this] (EVT Ty) {
+ auto IsHvxTy = [this](EVT Ty) {
return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
};
- auto IsHvxOp = [this] (SDValue Op) {
- return Subtarget.isHVXVectorType(ty(Op), true);
+ auto IsHvxOp = [this](SDValue Op) {
+ return Op.getValueType().isSimple() &&
+ Subtarget.isHVXVectorType(ty(Op), true);
+ };
+ if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
+ return true;
+
+ // Check if this could be an HVX operation after type widening.
+ auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
+ if (!Op.getValueType().isSimple())
+ return false;
+ MVT ValTy = ty(Op);
+ return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
};
- return llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp);
+
+ for (int i = 0, e = N->getNumValues(); i != e; ++i) {
+ if (IsWidenedToHvx(SDValue(N, i)))
+ return true;
+ }
+ return llvm::any_of(N->ops(), IsWidenedToHvx);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index d1cd23c3be3e..26fc093d15a7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1639,8 +1639,9 @@ bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
return false;
}
-bool HexagonInstrInfo::DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const {
+bool HexagonInstrInfo::ClobbersPredicate(MachineInstr &MI,
+ std::vector<MachineOperand> &Pred,
+ bool SkipDead) const {
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) {
@@ -2721,6 +2722,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::PS_vloadrw_nt_ai:
case Hexagon::V6_vL32b_ai:
case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vS32b_qpred_ai:
+ case Hexagon::V6_vS32b_nqpred_ai:
case Hexagon::V6_vL32b_nt_ai:
case Hexagon::V6_vS32b_nt_ai:
case Hexagon::V6_vL32Ub_ai:
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 847b9a672891..11717996935d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -238,8 +238,8 @@ public:
/// If the specified instruction defines any predicate
/// or condition code register(s) used for predication, returns true as well
/// as the definition predicate(s) by reference.
- bool DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const override;
+ bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred,
+ bool SkipDead) const override;
/// Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
index 1245ee7974b5..796979e59061 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -1,4 +1,4 @@
-//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=//
+//===- HexagonIntrinsicsV60.td - V60 instruction intrinsics -*- tablegen *-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 2c1e0cadd9ee..76cc8f402c5a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "HexagonLoopIdiomRecognition.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
@@ -16,6 +17,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -40,6 +42,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -108,136 +111,151 @@ static const char *HexagonVolatileMemcpyName
namespace llvm {
- void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
- Pass *createHexagonLoopIdiomPass();
+void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
+Pass *createHexagonLoopIdiomPass();
} // end namespace llvm
namespace {
- class HexagonLoopIdiomRecognize : public LoopPass {
- public:
- static char ID;
-
- explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
- initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- }
+class HexagonLoopIdiomRecognize {
+public:
+ explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
+ LoopInfo *LF, const TargetLibraryInfo *TLI,
+ ScalarEvolution *SE)
+ : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}
+
+ bool run(Loop *L);
+
+private:
+ int getSCEVStride(const SCEVAddRecExpr *StoreEv);
+ bool isLegalStore(Loop *CurLoop, StoreInst *SI);
+ void collectStores(Loop *CurLoop, BasicBlock *BB,
+ SmallVectorImpl<StoreInst *> &Stores);
+ bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
+ bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const;
+ bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks);
+ bool runOnCountableLoop(Loop *L);
+
+ AliasAnalysis *AA;
+ const DataLayout *DL;
+ DominatorTree *DT;
+ LoopInfo *LF;
+ const TargetLibraryInfo *TLI;
+ ScalarEvolution *SE;
+ bool HasMemcpy, HasMemmove;
+};
+
+class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass {
+public:
+ static char ID;
+
+ explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
+ initializeHexagonLoopIdiomRecognizeLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
- StringRef getPassName() const override {
- return "Recognize Hexagon-specific loop idioms";
- }
+ StringRef getPassName() const override {
+ return "Recognize Hexagon-specific loop idioms";
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<TargetLibraryInfoWrapperPass>();
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ }
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
- private:
- int getSCEVStride(const SCEVAddRecExpr *StoreEv);
- bool isLegalStore(Loop *CurLoop, StoreInst *SI);
- void collectStores(Loop *CurLoop, BasicBlock *BB,
- SmallVectorImpl<StoreInst*> &Stores);
- bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
- bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
- bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
- SmallVectorImpl<BasicBlock*> &ExitBlocks);
- bool runOnCountableLoop(Loop *L);
-
- AliasAnalysis *AA;
- const DataLayout *DL;
- DominatorTree *DT;
- LoopInfo *LF;
- const TargetLibraryInfo *TLI;
- ScalarEvolution *SE;
- bool HasMemcpy, HasMemmove;
+struct Simplifier {
+ struct Rule {
+ using FuncType = std::function<Value *(Instruction *, LLVMContext &)>;
+ Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
+ StringRef Name; // For debugging.
+ FuncType Fn;
};
- struct Simplifier {
- struct Rule {
- using FuncType = std::function<Value* (Instruction*, LLVMContext&)>;
- Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
- StringRef Name; // For debugging.
- FuncType Fn;
- };
-
- void addRule(StringRef N, const Rule::FuncType &F) {
- Rules.push_back(Rule(N, F));
- }
+ void addRule(StringRef N, const Rule::FuncType &F) {
+ Rules.push_back(Rule(N, F));
+ }
- private:
- struct WorkListType {
- WorkListType() = default;
+private:
+ struct WorkListType {
+ WorkListType() = default;
- void push_back(Value* V) {
- // Do not push back duplicates.
- if (!S.count(V)) { Q.push_back(V); S.insert(V); }
+ void push_back(Value *V) {
+ // Do not push back duplicates.
+ if (!S.count(V)) {
+ Q.push_back(V);
+ S.insert(V);
}
+ }
- Value *pop_front_val() {
- Value *V = Q.front(); Q.pop_front(); S.erase(V);
- return V;
- }
+ Value *pop_front_val() {
+ Value *V = Q.front();
+ Q.pop_front();
+ S.erase(V);
+ return V;
+ }
- bool empty() const { return Q.empty(); }
+ bool empty() const { return Q.empty(); }
- private:
- std::deque<Value*> Q;
- std::set<Value*> S;
- };
+ private:
+ std::deque<Value *> Q;
+ std::set<Value *> S;
+ };
- using ValueSetType = std::set<Value *>;
+ using ValueSetType = std::set<Value *>;
- std::vector<Rule> Rules;
+ std::vector<Rule> Rules;
- public:
- struct Context {
- using ValueMapType = DenseMap<Value *, Value *>;
+public:
+ struct Context {
+ using ValueMapType = DenseMap<Value *, Value *>;
- Value *Root;
- ValueSetType Used; // The set of all cloned values used by Root.
- ValueSetType Clones; // The set of all cloned values.
- LLVMContext &Ctx;
+ Value *Root;
+ ValueSetType Used; // The set of all cloned values used by Root.
+ ValueSetType Clones; // The set of all cloned values.
+ LLVMContext &Ctx;
- Context(Instruction *Exp)
+ Context(Instruction *Exp)
: Ctx(Exp->getParent()->getParent()->getContext()) {
- initialize(Exp);
- }
-
- ~Context() { cleanup(); }
+ initialize(Exp);
+ }
- void print(raw_ostream &OS, const Value *V) const;
- Value *materialize(BasicBlock *B, BasicBlock::iterator At);
+ ~Context() { cleanup(); }
- private:
- friend struct Simplifier;
+ void print(raw_ostream &OS, const Value *V) const;
+ Value *materialize(BasicBlock *B, BasicBlock::iterator At);
- void initialize(Instruction *Exp);
- void cleanup();
+ private:
+ friend struct Simplifier;
- template <typename FuncT> void traverse(Value *V, FuncT F);
- void record(Value *V);
- void use(Value *V);
- void unuse(Value *V);
+ void initialize(Instruction *Exp);
+ void cleanup();
- bool equal(const Instruction *I, const Instruction *J) const;
- Value *find(Value *Tree, Value *Sub) const;
- Value *subst(Value *Tree, Value *OldV, Value *NewV);
- void replace(Value *OldV, Value *NewV);
- void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
- };
+ template <typename FuncT> void traverse(Value *V, FuncT F);
+ void record(Value *V);
+ void use(Value *V);
+ void unuse(Value *V);
- Value *simplify(Context &C);
+ bool equal(const Instruction *I, const Instruction *J) const;
+ Value *find(Value *Tree, Value *Sub) const;
+ Value *subst(Value *Tree, Value *OldV, Value *NewV);
+ void replace(Value *OldV, Value *NewV);
+ void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
};
+ Value *simplify(Context &C);
+};
+
struct PE {
PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
@@ -253,10 +271,10 @@ namespace {
} // end anonymous namespace
-char HexagonLoopIdiomRecognize::ID = 0;
+char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",
- "Recognize Hexagon-specific loop idioms", false, false)
+INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",
+ "Recognize Hexagon-specific loop idioms", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
@@ -264,8 +282,8 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",
- "Recognize Hexagon-specific loop idioms", false, false)
+INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",
+ "Recognize Hexagon-specific loop idioms", false, false)
template <typename FuncT>
void Simplifier::Context::traverse(Value *V, FuncT F) {
@@ -1973,7 +1991,7 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// Get the location that may be stored across the loop. Since the access
// is strided positively through memory, we say that the modified location
// starts at the pointer and has infinite size.
- LocationSize AccessSize = LocationSize::unknown();
+ LocationSize AccessSize = LocationSize::afterPointer();
// If the loop iterates a fixed number of times, we can refine the access
// size to be exactly the size of the memset, which is (BECount+1)*StoreSize
@@ -2404,14 +2422,11 @@ bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
return Changed;
}
-bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool HexagonLoopIdiomRecognize::run(Loop *L) {
const Module &M = *L->getHeader()->getParent()->getParent();
if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
return false;
- if (skipLoop(L))
- return false;
-
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
if (!L->getLoopPreheader())
@@ -2422,13 +2437,7 @@ bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
if (Name == "memset" || Name == "memcpy" || Name == "memmove")
return false;
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DL = &L->getHeader()->getModule()->getDataLayout();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
- *L->getHeader()->getParent());
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
HasMemcpy = TLI->has(LibFunc_memcpy);
HasMemmove = TLI->has(LibFunc_memmove);
@@ -2438,6 +2447,30 @@ bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
+bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,
+ LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent());
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
+}
+
Pass *llvm::createHexagonLoopIdiomPass() {
- return new HexagonLoopIdiomRecognize();
+ return new HexagonLoopIdiomRecognizeLegacyPass();
+}
+
+PreservedAnalyses
+HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE)
+ .run(&L)
+ ? getLoopPassPreservedAnalyses()
+ : PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.h
new file mode 100644
index 000000000000..28ec83b05dac
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.h
@@ -0,0 +1,24 @@
+//===- HexagonLoopIdiomRecognition.h --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONLOOPIDIOMRECOGNITION_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONLOOPIDIOMRECOGNITION_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+struct HexagonLoopIdiomRecognitionPass
+ : PassInfoMixin<HexagonLoopIdiomRecognitionPass> {
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONLOOPIDIOMRECOGNITION_H
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 188d91355a35..9507de95231f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -104,7 +104,7 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
HexagonMCInstrInfo::setOuterLoop(MCB);
return;
}
- MCInst *MCI = new (AP.OutContext) MCInst;
+ MCInst *MCI = AP.OutContext.createMCInst();
MCI->setOpcode(MI->getOpcode());
assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
"MCI opcode should have been set on construction");
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index c718e5f2d9fb..2cdfbe7845b6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -246,7 +246,7 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
for (NodeAddr<DefNode *> DA : SA.Addr->members_if(DFG->IsDef, *DFG)) {
LLVM_DEBUG(dbgs() << "\t\t[DefNode]: "
<< Print<NodeAddr<DefNode *>>(DA, *DFG) << "\n");
- RegisterRef DR = DFG->getPRI().normalize(DA.Addr->getRegRef(*DFG));
+ RegisterRef DR = DA.Addr->getRegRef(*DFG);
auto UseSet = LV->getAllReachedUses(DR, DA);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
index d818e0897f75..e026bb6d601d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -11,7 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "Hexagon.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -19,8 +21,6 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
-#include "Hexagon.h"
-
using namespace llvm;
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
index cc10627955fb..d216c511a994 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -1,4 +1,4 @@
-//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===//
+//===- HexagonPatterns.td - Selection Patterns for Hexagon -*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -229,6 +229,21 @@ def NegImm32: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
}]>;
+def SplatB: SDNodeXForm<imm, [{
+ uint32_t V = N->getZExtValue();
+ assert(isUInt<8>(V) || V >> 8 == 0xFFFFFF);
+ V &= 0xFF;
+ uint32_t S = V << 24 | V << 16 | V << 8 | V;
+ return CurDAG->getTargetConstant(S, SDLoc(N), MVT::i32);
+}]>;
+
+def SplatH: SDNodeXForm<imm, [{
+ uint32_t V = N->getZExtValue();
+ assert(isUInt<16>(V) || V >> 16 == 0xFFFF);
+ V &= 0xFFFF;
+ return CurDAG->getTargetConstant(V << 16 | V, SDLoc(N), MVT::i32);
+}]>;
+
// Helpers for type promotions/contractions.
def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
@@ -351,12 +366,14 @@ multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> {
def: Pat<(Ty2 (bitconvert (Ty1 RC:$Val))), (Ty2 RC:$Val)>;
}
-
// Frags for commonly used SDNodes.
def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>;
def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>;
+def Smin: pf2<smin>; def Smax: pf2<smax>;
+def Umin: pf2<umin>; def Umax: pf2<umax>;
+
def Rol: pf2<rotl>;
// --(1) Immediate -------------------------------------------------------
@@ -909,25 +926,14 @@ let AddedComplexity = 200 in {
defm: SelMinMax16_pats<setult, A2_minu, A2_maxu>;
}
-let AddedComplexity = 200 in {
- defm: MinMax_pats<A2_min, A2_max, select, setgt, i1, I32>;
- defm: MinMax_pats<A2_min, A2_max, select, setge, i1, I32>;
- defm: MinMax_pats<A2_max, A2_min, select, setlt, i1, I32>;
- defm: MinMax_pats<A2_max, A2_min, select, setle, i1, I32>;
- defm: MinMax_pats<A2_minu, A2_maxu, select, setugt, i1, I32>;
- defm: MinMax_pats<A2_minu, A2_maxu, select, setuge, i1, I32>;
- defm: MinMax_pats<A2_maxu, A2_minu, select, setult, i1, I32>;
- defm: MinMax_pats<A2_maxu, A2_minu, select, setule, i1, I32>;
-
- defm: MinMax_pats<A2_minp, A2_maxp, select, setgt, i1, I64>;
- defm: MinMax_pats<A2_minp, A2_maxp, select, setge, i1, I64>;
- defm: MinMax_pats<A2_maxp, A2_minp, select, setlt, i1, I64>;
- defm: MinMax_pats<A2_maxp, A2_minp, select, setle, i1, I64>;
- defm: MinMax_pats<A2_minup, A2_maxup, select, setugt, i1, I64>;
- defm: MinMax_pats<A2_minup, A2_maxup, select, setuge, i1, I64>;
- defm: MinMax_pats<A2_maxup, A2_minup, select, setult, i1, I64>;
- defm: MinMax_pats<A2_maxup, A2_minup, select, setule, i1, I64>;
-}
+def: OpR_RR_pat<A2_min, Smin, i32, I32, I32>;
+def: OpR_RR_pat<A2_max, Smax, i32, I32, I32>;
+def: OpR_RR_pat<A2_minu, Umin, i32, I32, I32>;
+def: OpR_RR_pat<A2_maxu, Umax, i32, I32, I32>;
+def: OpR_RR_pat<A2_minp, Smin, i64, I64, I64>;
+def: OpR_RR_pat<A2_maxp, Smax, i64, I64, I64>;
+def: OpR_RR_pat<A2_minup, Umin, i64, I64, I64>;
+def: OpR_RR_pat<A2_maxup, Umax, i64, I64, I64>;
let AddedComplexity = 100 in {
defm: MinMax_pats<F2_sfmin, F2_sfmax, select, setogt, i1, F32>;
@@ -943,18 +949,20 @@ let AddedComplexity = 100, Predicates = [HasV67] in {
defm: MinMax_pats<F2_dfmax, F2_dfmin, select, setole, i1, F64>;
}
-defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setgt, v8i1, V8I8>;
-defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setge, v8i1, V8I8>;
-defm: MinMax_pats<A2_vminh, A2_vmaxh, vselect, setgt, v4i1, V4I16>;
-defm: MinMax_pats<A2_vminh, A2_vmaxh, vselect, setge, v4i1, V4I16>;
-defm: MinMax_pats<A2_vminw, A2_vmaxw, vselect, setgt, v2i1, V2I32>;
-defm: MinMax_pats<A2_vminw, A2_vmaxw, vselect, setge, v2i1, V2I32>;
-defm: MinMax_pats<A2_vminub, A2_vmaxub, vselect, setugt, v8i1, V8I8>;
-defm: MinMax_pats<A2_vminub, A2_vmaxub, vselect, setuge, v8i1, V8I8>;
-defm: MinMax_pats<A2_vminuh, A2_vmaxuh, vselect, setugt, v4i1, V4I16>;
-defm: MinMax_pats<A2_vminuh, A2_vmaxuh, vselect, setuge, v4i1, V4I16>;
-defm: MinMax_pats<A2_vminuw, A2_vmaxuw, vselect, setugt, v2i1, V2I32>;
-defm: MinMax_pats<A2_vminuw, A2_vmaxuw, vselect, setuge, v2i1, V2I32>;
+def: OpR_RR_pat<A2_vminb, Smin, v8i8, V8I8>;
+def: OpR_RR_pat<A2_vmaxb, Smax, v8i8, V8I8>;
+def: OpR_RR_pat<A2_vminub, Umin, v8i8, V8I8>;
+def: OpR_RR_pat<A2_vmaxub, Umax, v8i8, V8I8>;
+
+def: OpR_RR_pat<A2_vminh, Smin, v4i16, V4I16>;
+def: OpR_RR_pat<A2_vmaxh, Smax, v4i16, V4I16>;
+def: OpR_RR_pat<A2_vminuh, Umin, v4i16, V4I16>;
+def: OpR_RR_pat<A2_vmaxuh, Umax, v4i16, V4I16>;
+
+def: OpR_RR_pat<A2_vminw, Smin, v2i32, V2I32>;
+def: OpR_RR_pat<A2_vmaxw, Smax, v2i32, V2I32>;
+def: OpR_RR_pat<A2_vminuw, Umin, v2i32, V2I32>;
+def: OpR_RR_pat<A2_vmaxuw, Umax, v2i32, V2I32>;
// --(7) Insert/extract --------------------------------------------------
//
@@ -991,21 +999,26 @@ def: Pat<(HexagonEXTRACTU I32:$Rs, I32:$Width, I32:$Off),
def: Pat<(HexagonEXTRACTU I64:$Rs, I32:$Width, I32:$Off),
(S2_extractup_rp I64:$Rs, (Combinew $Width, $Off))>;
-def SDTHexagonVSPLAT:
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
-
-def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>;
-
-def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
-def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
-def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)),
+def: Pat<(v4i8 (splat_vector anyint:$V)), (ToI32 (SplatB $V))>;
+def: Pat<(v2i16 (splat_vector anyint:$V)), (ToI32 (SplatH $V))>;
+def: Pat<(v8i8 (splat_vector anyint:$V)),
+ (Combinew (ToI32 (SplatB $V)), (ToI32 (SplatB $V)))>;
+def: Pat<(v4i16 (splat_vector anyint:$V)),
+ (Combinew (ToI32 (SplatH $V)), (ToI32 (SplatH $V)))>;
+let AddedComplexity = 10 in
+def: Pat<(v2i32 (splat_vector s8_0ImmPred:$s8)),
(A2_combineii imm:$s8, imm:$s8)>;
-def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>;
+def: Pat<(v2i32 (splat_vector anyimm:$V)), (Combinew (ToI32 $V), (ToI32 $V))>;
+
+def: Pat<(v4i8 (splat_vector I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+def: Pat<(v2i16 (splat_vector I32:$Rs)), (LoReg (S2_vsplatrh I32:$Rs))>;
+def: Pat<(v4i16 (splat_vector I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+def: Pat<(v2i32 (splat_vector I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>;
let AddedComplexity = 10 in
-def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>,
+def: Pat<(v8i8 (splat_vector I32:$Rs)), (S6_vsplatrbp I32:$Rs)>,
Requires<[HasV62]>;
-def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)),
+def: Pat<(v8i8 (splat_vector I32:$Rs)),
(Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>;
@@ -1082,9 +1095,9 @@ def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
(HiReg (S2_asl_r_p (Combinew $Rs, $Rt), $Ru))>;
def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
- (S2_lsr_i_p_or (S2_asl_i_p $Rt, $S), $Rs, (Subi<64> $S))>;
+ (S2_lsr_i_p_or (S2_asl_i_p $Rs, $S), $Rt, (Subi<64> $S))>;
def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
- (S2_lsr_r_p_or (S2_asl_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
+ (S2_lsr_r_p_or (S2_asl_r_p $Rs, $Ru), $Rt, (A2_subri 64, $Ru))>;
// Combined SDNodeXForm: (Divu8 (Subi<64> $S))
def Divu64_8: SDNodeXForm<imm, [{
@@ -1307,17 +1320,17 @@ def: OpR_RR_pat<S2_asr_r_vh, pf2<HexagonVASR>, v4i16, V4I16, I32>;
def: OpR_RR_pat<S2_lsr_r_vw, pf2<HexagonVLSR>, v2i32, V2I32, I32>;
def: OpR_RR_pat<S2_lsr_r_vh, pf2<HexagonVLSR>, v4i16, V4I16, I32>;
-def: Pat<(sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))),
+def: Pat<(sra V2I32:$b, (v2i32 (splat_vector u5_0ImmPred:$c))),
(S2_asr_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))),
+def: Pat<(srl V2I32:$b, (v2i32 (splat_vector u5_0ImmPred:$c))),
(S2_lsr_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))),
+def: Pat<(shl V2I32:$b, (v2i32 (splat_vector u5_0ImmPred:$c))),
(S2_asl_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
+def: Pat<(sra V4I16:$b, (v4i16 (splat_vector u4_0ImmPred:$c))),
(S2_asr_i_vh V4I16:$b, imm:$c)>;
-def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
+def: Pat<(srl V4I16:$b, (v4i16 (splat_vector u4_0ImmPred:$c))),
(S2_lsr_i_vh V4I16:$b, imm:$c)>;
-def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
+def: Pat<(shl V4I16:$b, (v4i16 (splat_vector u4_0ImmPred:$c))),
(S2_asl_i_vh V4I16:$b, imm:$c)>;
def: Pat<(HexagonVASR V2I16:$Rs, u4_0ImmPred:$S),
@@ -1688,8 +1701,6 @@ def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
(F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
-def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
- (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
(PS_vmulw V2I32:$Rs, V2I32:$Rt)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 078a7135c55b..cd894c555adc 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -1,3 +1,15 @@
+//===- HexagonPatternsHVX.td - Selection Patterns for HVX --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+
+def SDTVecUnaryOp:
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+
def SDTVecBinOp:
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>;
@@ -9,9 +21,6 @@ def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
-def SDTHexagonVSPLATW: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
-def HexagonVSPLATW: SDNode<"HexagonISD::VSPLATW", SDTHexagonVSPLATW>;
-
def HwLen2: SDNodeXForm<imm, [{
const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget());
return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32);
@@ -33,37 +42,29 @@ def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt),
def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>;
def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>;
-def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>;
def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>;
def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>;
def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>;
+def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>;
+def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>;
+def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>;
-def vzero: PatFrag<(ops), (HexagonVZERO)>;
+def vzero: PatFrag<(ops), (splat_vector (i32 0))>;
def qtrue: PatFrag<(ops), (HexagonQTRUE)>;
def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
def qcat: PatFrag<(ops node:$Qs, node:$Qt),
(HexagonQCAT node:$Qs, node:$Qt)>;
-def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
+def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
+def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>;
+def vunpack: PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>;
+def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>;
def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>;
def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>;
def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;
-def SplatB: SDNodeXForm<imm, [{
- uint32_t V = N->getZExtValue();
- assert(isUInt<8>(V));
- uint32_t S = V << 24 | V << 16 | V << 8 | V;
- return CurDAG->getTargetConstant(S, SDLoc(N), MVT::i32);
-}]>;
-
-def SplatH: SDNodeXForm<imm, [{
- uint32_t V = N->getZExtValue();
- assert(isUInt<16>(V));
- return CurDAG->getTargetConstant(V << 16 | V, SDLoc(N), MVT::i32);
-}]>;
-
def IsVecOff : PatLeaf<(i32 imm), [{
int32_t V = N->getSExtValue();
int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass);
@@ -171,16 +172,19 @@ let Predicates = [UseHVX] in {
}
let Predicates = [UseHVX] in {
- def: Pat<(VecI8 vzero), (V6_vd0)>;
- def: Pat<(VecI16 vzero), (V6_vd0)>;
- def: Pat<(VecI32 vzero), (V6_vd0)>;
- def: Pat<(VecPI8 vzero), (PS_vdd0)>;
- def: Pat<(VecPI16 vzero), (PS_vdd0)>;
- def: Pat<(VecPI32 vzero), (PS_vdd0)>;
-
- def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>;
- def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
- def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>;
+ let AddedComplexity = 100 in {
+ // These should be preferred over a vsplat of 0.
+ def: Pat<(VecI8 vzero), (V6_vd0)>;
+ def: Pat<(VecI16 vzero), (V6_vd0)>;
+ def: Pat<(VecI32 vzero), (V6_vd0)>;
+ def: Pat<(VecPI8 vzero), (PS_vdd0)>;
+ def: Pat<(VecPI16 vzero), (PS_vdd0)>;
+ def: Pat<(VecPI32 vzero), (PS_vdd0)>;
+
+ def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>;
+ def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
+ def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>;
+ }
def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
(Combinev HvxVR:$Vt, HvxVR:$Vs)>;
@@ -207,63 +211,70 @@ let Predicates = [UseHVX] in {
(V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
}
-def Vsplatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>;
-def Vsplatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>;
-def Vsplatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>;
-
-def Vsplatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>;
-def Vsplatrh: OutPatFrag<(ops node:$Rs),
- (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>;
-def Vsplatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
+// Splats for HvxV60
+def V60splatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>;
+def V60splatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>;
+def V60splatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>;
+def V60splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>;
+def V60splatrh: OutPatFrag<(ops node:$Rs),
+ (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>;
+def V60splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
+
+// Splats for HvxV62+
+def V62splatib: OutPatFrag<(ops node:$V), (V6_lvsplatb (ToI32 $V))>;
+def V62splatih: OutPatFrag<(ops node:$V), (V6_lvsplath (ToI32 $V))>;
+def V62splatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>;
+def V62splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatb $Rs)>;
+def V62splatrh: OutPatFrag<(ops node:$Rs), (V6_lvsplath $Rs)>;
+def V62splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>;
-let Predicates = [UseHVX] in {
+let Predicates = [UseHVX,UseHVXV60] in {
let AddedComplexity = 10 in {
- def: Pat<(VecI8 (HexagonVSPLAT u8_0ImmPred:$V)), (Vsplatib $V)>;
- def: Pat<(VecI16 (HexagonVSPLAT u16_0ImmPred:$V)), (Vsplatih $V)>;
- def: Pat<(VecI32 (HexagonVSPLAT anyimm:$V)), (Vsplatiw $V)>;
- def: Pat<(VecPI8 (HexagonVSPLAT u8_0ImmPred:$V)), (Rep (Vsplatib $V))>;
- def: Pat<(VecPI16 (HexagonVSPLAT u16_0ImmPred:$V)), (Rep (Vsplatih $V))>;
- def: Pat<(VecPI32 (HexagonVSPLAT anyimm:$V)), (Rep (Vsplatiw $V))>;
+ def: Pat<(VecI8 (splat_vector u8_0ImmPred:$V)), (V60splatib $V)>;
+ def: Pat<(VecI16 (splat_vector u16_0ImmPred:$V)), (V60splatih $V)>;
+ def: Pat<(VecI32 (splat_vector anyimm:$V)), (V60splatiw $V)>;
+ def: Pat<(VecPI8 (splat_vector u8_0ImmPred:$V)), (Rep (V60splatib $V))>;
+ def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), (Rep (V60splatih $V))>;
+ def: Pat<(VecPI32 (splat_vector anyimm:$V)), (Rep (V60splatiw $V))>;
+ }
+ def: Pat<(VecI8 (splat_vector I32:$Rs)), (V60splatrb $Rs)>;
+ def: Pat<(VecI16 (splat_vector I32:$Rs)), (V60splatrh $Rs)>;
+ def: Pat<(VecI32 (splat_vector I32:$Rs)), (V60splatrw $Rs)>;
+ def: Pat<(VecPI8 (splat_vector I32:$Rs)), (Rep (V60splatrb $Rs))>;
+ def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V60splatrh $Rs))>;
+ def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V60splatrw $Rs))>;
+}
+let Predicates = [UseHVX,UseHVXV62] in {
+ let AddedComplexity = 30 in {
+ def: Pat<(VecI8 (splat_vector u8_0ImmPred:$V)), (V62splatib imm:$V)>;
+ def: Pat<(VecI16 (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>;
+ def: Pat<(VecI32 (splat_vector anyimm:$V)), (V62splatiw imm:$V)>;
+ def: Pat<(VecPI8 (splat_vector u8_0ImmPred:$V)),
+ (Rep (V62splatib imm:$V))>;
+ def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)),
+ (Rep (V62splatih imm:$V))>;
+ def: Pat<(VecPI32 (splat_vector anyimm:$V)),
+ (Rep (V62splatiw imm:$V))>;
+ }
+ let AddedComplexity = 20 in {
+ def: Pat<(VecI8 (splat_vector I32:$Rs)), (V62splatrb $Rs)>;
+ def: Pat<(VecI16 (splat_vector I32:$Rs)), (V62splatrh $Rs)>;
+ def: Pat<(VecI32 (splat_vector I32:$Rs)), (V62splatrw $Rs)>;
+ def: Pat<(VecPI8 (splat_vector I32:$Rs)), (Rep (V62splatrb $Rs))>;
+ def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V62splatrh $Rs))>;
+ def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>;
}
- def: Pat<(VecI8 (HexagonVSPLAT I32:$Rs)), (Vsplatrb $Rs)>;
- def: Pat<(VecI16 (HexagonVSPLAT I32:$Rs)), (Vsplatrh $Rs)>;
- def: Pat<(VecI32 (HexagonVSPLAT I32:$Rs)), (Vsplatrw $Rs)>;
- def: Pat<(VecPI8 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrb $Rs))>;
- def: Pat<(VecPI16 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrh $Rs))>;
- def: Pat<(VecPI32 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrw $Rs))>;
-
- def: Pat<(VecI8 (HexagonVSPLATW I32:$Rs)), (Vsplatrw $Rs)>;
- def: Pat<(VecI16 (HexagonVSPLATW I32:$Rs)), (Vsplatrw $Rs)>;
- def: Pat<(VecI32 (HexagonVSPLATW I32:$Rs)), (Vsplatrw $Rs)>;
- def: Pat<(VecPI8 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>;
- def: Pat<(VecPI16 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>;
- def: Pat<(VecPI32 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>;
}
class Vneg1<ValueType VecTy>
- : PatFrag<(ops), (VecTy (HexagonVSPLATW (i32 -1)))>;
+ : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>;
class Vnot<ValueType VecTy>
: PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
let Predicates = [UseHVX] in {
- let AddedComplexity = 220 in {
- defm: MinMax_pats<V6_vminb, V6_vmaxb, vselect, setgt, VecQ8, HVI8>;
- defm: MinMax_pats<V6_vminb, V6_vmaxb, vselect, setge, VecQ8, HVI8>;
- defm: MinMax_pats<V6_vminub, V6_vmaxub, vselect, setugt, VecQ8, HVI8>;
- defm: MinMax_pats<V6_vminub, V6_vmaxub, vselect, setuge, VecQ8, HVI8>;
- defm: MinMax_pats<V6_vminh, V6_vmaxh, vselect, setgt, VecQ16, HVI16>;
- defm: MinMax_pats<V6_vminh, V6_vmaxh, vselect, setge, VecQ16, HVI16>;
- defm: MinMax_pats<V6_vminuh, V6_vmaxuh, vselect, setugt, VecQ16, HVI16>;
- defm: MinMax_pats<V6_vminuh, V6_vmaxuh, vselect, setuge, VecQ16, HVI16>;
- defm: MinMax_pats<V6_vminw, V6_vmaxw, vselect, setgt, VecQ32, HVI32>;
- defm: MinMax_pats<V6_vminw, V6_vmaxw, vselect, setge, VecQ32, HVI32>;
- }
-}
-
-let Predicates = [UseHVX] in {
let AddedComplexity = 200 in {
def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>;
@@ -292,6 +303,17 @@ let Predicates = [UseHVX] in {
def: OpR_RR_pat<V6_vxor, Xor, VecI16, HVI16>;
def: OpR_RR_pat<V6_vxor, Xor, VecI32, HVI32>;
+ def: OpR_RR_pat<V6_vminb, Smin, VecI8, HVI8>;
+ def: OpR_RR_pat<V6_vmaxb, Smax, VecI8, HVI8>;
+ def: OpR_RR_pat<V6_vminub, Umin, VecI8, HVI8>;
+ def: OpR_RR_pat<V6_vmaxub, Umax, VecI8, HVI8>;
+ def: OpR_RR_pat<V6_vminh, Smin, VecI16, HVI16>;
+ def: OpR_RR_pat<V6_vmaxh, Smax, VecI16, HVI16>;
+ def: OpR_RR_pat<V6_vminuh, Umin, VecI16, HVI16>;
+ def: OpR_RR_pat<V6_vmaxuh, Umax, VecI16, HVI16>;
+ def: OpR_RR_pat<V6_vminw, Smin, VecI32, HVI32>;
+ def: OpR_RR_pat<V6_vmaxw, Smax, VecI32, HVI32>;
+
def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt),
(V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt),
@@ -308,6 +330,20 @@ let Predicates = [UseHVX] in {
}
let Predicates = [UseHVX] in {
+ // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
+ // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
+ // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
+ def: Pat<(mul HVI8:$Vs, HVI8:$Vt),
+ (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)),
+ (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>;
+ def: Pat<(mul HVI16:$Vs, HVI16:$Vt),
+ (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(mul HVI32:$Vs, HVI32:$Vt),
+ (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt),
+ HvxVR:$Vs, HvxVR:$Vt)>;
+}
+
+let Predicates = [UseHVX] in {
def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>;
def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>;
@@ -364,6 +400,14 @@ let Predicates = [UseHVX] in {
(V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
}
+ // Take a pair of vectors Vt:Vs and shift them towards LSB by (Rt & HwLen).
+ def: Pat<(VecI8 (valign HVI8:$Vt, HVI8:$Vs, I32:$Rt)),
+ (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
+ def: Pat<(VecI16 (valign HVI16:$Vt, HVI16:$Vs, I32:$Rt)),
+ (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
+ def: Pat<(VecI32 (valign HVI32:$Vt, HVI32:$Vs, I32:$Rt)),
+ (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
+
def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt),
(V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
(V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
@@ -393,10 +437,43 @@ let Predicates = [UseHVX] in {
def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(VecI16 (bswap HVI16:$Vs)),
- (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>;
- def: Pat<(VecI32 (bswap HVI32:$Vs)),
- (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x03030303)))>;
+ // Vpackl is a pseudo-op that is used when legalizing widened truncates.
+ // It should never be produced with a register pair in the output, but
+ // it can happen to have a pair as an input.
+ def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>;
+ def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>;
+ def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>;
+ def: Pat<(VecI8 (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>;
+ def: Pat<(VecI8 (vpackl HWI32:$Vs)),
+ (V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>;
+ def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>;
+
+ def: Pat<(VecI16 (vunpack HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
+ def: Pat<(VecI32 (vunpack HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
+ def: Pat<(VecI32 (vunpack HVI16:$Vs)), (LoVec (VSxth $Vs))>;
+ def: Pat<(VecPI16 (vunpack HVI8:$Vs)), (VSxtb $Vs)>;
+ def: Pat<(VecPI32 (vunpack HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>;
+ def: Pat<(VecPI32 (vunpack HVI32:$Vs)), (VSxth $Vs)>;
+
+ def: Pat<(VecI16 (vunpacku HVI8:$Vs)), (LoVec (VZxtb $Vs))>;
+ def: Pat<(VecI32 (vunpacku HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
+ def: Pat<(VecI32 (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>;
+ def: Pat<(VecPI16 (vunpacku HVI8:$Vs)), (VZxtb $Vs)>;
+ def: Pat<(VecPI32 (vunpacku HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>;
+ def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>;
+
+ let Predicates = [UseHVX,UseHVXV60] in {
+ def: Pat<(VecI16 (bswap HVI16:$Vs)),
+ (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x01)))>;
+ def: Pat<(VecI32 (bswap HVI32:$Vs)),
+ (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x03)))>;
+ }
+ let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in {
+ def: Pat<(VecI16 (bswap HVI16:$Vs)),
+ (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x01)))>;
+ def: Pat<(VecI32 (bswap HVI32:$Vs)),
+ (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x03)))>;
+ }
def: Pat<(VecI8 (ctpop HVI8:$Vs)),
(V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))),
@@ -406,10 +483,17 @@ let Predicates = [UseHVX] in {
(V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))),
(HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>;
+ let Predicates = [UseHVX,UseHVXV60] in
def: Pat<(VecI8 (ctlz HVI8:$Vs)),
(V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
(V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
- (V6_lvsplatw (A2_tfrsi 0x08080808)))>;
+ (V60splatib (i32 0x08)))>;
+ let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in
+ def: Pat<(VecI8 (ctlz HVI8:$Vs)),
+ (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
+ (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
+ (V62splatib (i32 0x08)))>;
+
def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>;
def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index d0b02f035d1e..fc31139e13ce 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -139,8 +139,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
Register DstReg = Dst.getReg();
Register SrcReg = Src.getReg();
// Just handle virtual registers.
- if (Register::isVirtualRegister(DstReg) &&
- Register::isVirtualRegister(SrcReg)) {
+ if (DstReg.isVirtual() && SrcReg.isVirtual()) {
// Map the following:
// %170 = SXTW %166
// PeepholeMap[170] = %166
@@ -188,8 +187,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
Register DstReg = Dst.getReg();
Register SrcReg = Src.getReg();
// Just handle virtual registers.
- if (Register::isVirtualRegister(DstReg) &&
- Register::isVirtualRegister(SrcReg)) {
+ if (DstReg.isVirtual() && SrcReg.isVirtual()) {
// Map the following:
// %170 = NOT_xx %166
// PeepholeMap[170] = %166
@@ -210,8 +208,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
Register DstReg = Dst.getReg();
Register SrcReg = Src.getReg();
- if (Register::isVirtualRegister(DstReg) &&
- Register::isVirtualRegister(SrcReg)) {
+ if (DstReg.isVirtual() && SrcReg.isVirtual()) {
// Try to find in the map.
if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
// Change the 1st operand.
@@ -242,7 +239,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
if (RC0->getID() == Hexagon::PredRegsRegClassID) {
// Handle instructions that have a prediate register in op0
// (most cases of predicable instructions).
- if (Register::isVirtualRegister(Reg0)) {
+ if (Reg0.isVirtual()) {
// Try to find in the map.
if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
// Change the 1st operand and, flip the opcode.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 52f247977094..5ece577e8285 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -207,7 +207,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FI = MI.getOperand(FIOp).getIndex();
// Select the base pointer (BP) and calculate the actual offset from BP
// to the beginning of the object at index FI.
- int Offset = HFI.getFrameIndexReference(MF, FI, BP);
+ int Offset = HFI.getFrameIndexReference(MF, FI, BP).getFixed();
// Add the offset from the instruction.
int RealOffset = Offset + MI.getOperand(FIOp+1).getImm();
bool IsKill = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
index b45d871e04d6..c8c66ebb69cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -97,7 +97,7 @@ namespace {
bool isFixedInstr(const MachineInstr *MI) const;
void partitionRegisters(UUSetMap &P2Rs);
int32_t profit(const MachineInstr *MI) const;
- int32_t profit(unsigned Reg) const;
+ int32_t profit(Register Reg) const;
bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
@@ -211,7 +211,7 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
if (!Op.isReg())
continue;
Register R = Op.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
return true;
}
return false;
@@ -259,7 +259,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
if (&MO == &Op || !MO.isReg() || MO.getSubReg())
continue;
Register T = MO.getReg();
- if (!Register::isVirtualRegister(T)) {
+ if (!T.isVirtual()) {
FixedRegs.set(x);
continue;
}
@@ -399,8 +399,8 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
return 0;
}
-int32_t HexagonSplitDoubleRegs::profit(unsigned Reg) const {
- assert(Register::isVirtualRegister(Reg));
+int32_t HexagonSplitDoubleRegs::profit(Register Reg) const {
+ assert(Reg.isVirtual());
const MachineInstr *DefI = MRI->getVRegDef(Reg);
switch (DefI->getOpcode()) {
@@ -574,12 +574,9 @@ void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
LoopVector WorkQ;
- for (auto I : *MLI)
- WorkQ.push_back(I);
- for (unsigned i = 0; i < WorkQ.size(); ++i) {
- for (auto I : *WorkQ[i])
- WorkQ.push_back(I);
- }
+ append_range(WorkQ, *MLI);
+ for (unsigned i = 0; i < WorkQ.size(); ++i)
+ append_range(WorkQ, *WorkQ[i]);
USet Rs;
for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
@@ -605,7 +602,7 @@ void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
// For register operands, set the subregister.
Register R = Op.getReg();
unsigned SR = Op.getSubReg();
- bool isVirtReg = Register::isVirtualRegister(R);
+ bool isVirtReg = R.isVirtual();
bool isKill = Op.isKill();
if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
isKill = false;
@@ -1106,7 +1103,7 @@ void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
if (!Op.isReg() || !Op.isUse())
continue;
Register R = Op.getReg();
- if (!Register::isVirtualRegister(R))
+ if (!R.isVirtual())
continue;
if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 2b7e1bcba9a3..87b1c43961d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -10,10 +10,10 @@
//
//===----------------------------------------------------------------------===//
+#include "HexagonSubtarget.h"
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
-#include "HexagonSubtarget.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <map>
@@ -38,7 +39,6 @@ using namespace llvm;
#define GET_SUBTARGETINFO_TARGET_DESC
#include "HexagonGenSubtargetInfo.inc"
-
static cl::opt<bool> EnableBSBSched("enable-bsb-sched",
cl::Hidden, cl::ZeroOrMore, cl::init(true));
@@ -77,7 +77,8 @@ static cl::opt<bool> EnableCheckBankConflict("hexagon-check-bank-conflict",
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
StringRef FS, const TargetMachine &TM)
- : HexagonGenSubtargetInfo(TT, CPU, FS), OptLevel(TM.getOptLevel()),
+ : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ OptLevel(TM.getOptLevel()),
CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
RegInfo(getHwMode()), TLInfo(TM, *this),
@@ -104,7 +105,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
UseBSBScheduling = hasV60Ops() && EnableBSBSched;
- ParseSubtargetFeatures(CPUString, FS);
+ ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
if (OverrideLongCalls.getPosition())
UseLongCalls = OverrideLongCalls;
@@ -124,6 +125,76 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
return *this;
}
+bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
+ if (!useHVXOps())
+ return false;
+ if (Ty.isVector())
+ Ty = Ty.getVectorElementType();
+ if (IncludeBool && Ty == MVT::i1)
+ return true;
+ ArrayRef<MVT> ElemTypes = getHVXElementTypes();
+ return llvm::is_contained(ElemTypes, Ty);
+}
+
+bool HexagonSubtarget::isHVXVectorType(MVT VecTy, bool IncludeBool) const {
+ if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
+ return false;
+ MVT ElemTy = VecTy.getVectorElementType();
+ if (!IncludeBool && ElemTy == MVT::i1)
+ return false;
+
+ unsigned HwLen = getVectorLength();
+ unsigned NumElems = VecTy.getVectorNumElements();
+ ArrayRef<MVT> ElemTypes = getHVXElementTypes();
+
+ if (IncludeBool && ElemTy == MVT::i1) {
+ // Boolean HVX vector types are formed from regular HVX vector types
+ // by replacing the element type with i1.
+ for (MVT T : ElemTypes)
+ if (NumElems * T.getSizeInBits() == 8 * HwLen)
+ return true;
+ return false;
+ }
+
+ unsigned VecWidth = VecTy.getSizeInBits();
+ if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
+ return false;
+ return llvm::is_contained(ElemTypes, ElemTy);
+}
+
+bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
+ if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
+ return false;
+ // Avoid types like <2 x i32*>.
+ if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
+ return false;
+ // The given type may be something like <17 x i32>, which is not MVT,
+ // but can be represented as (non-simple) EVT.
+ EVT Ty = EVT::getEVT(VecTy, /*HandleUnknown*/false);
+ if (Ty.getSizeInBits() <= 64 || !Ty.getVectorElementType().isSimple())
+ return false;
+
+ auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
+ if (isHVXVectorType(SimpleTy, IncludeBool))
+ return true;
+ auto Action = getTargetLowering()->getPreferredVectorAction(SimpleTy);
+ return Action == TargetLoweringBase::TypeWidenVector;
+ };
+
+ // Round up EVT to have power-of-2 elements, and keep checking if it
+ // qualifies for HVX, dividing it in half after each step.
+ MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
+ unsigned VecLen = PowerOf2Ceil(Ty.getVectorNumElements());
+ while (ElemTy.getSizeInBits() * VecLen > 64) {
+ MVT SimpleTy = MVT::getVectorVT(ElemTy, VecLen);
+ if (SimpleTy.isValid() && isHvxTy(SimpleTy))
+ return true;
+ VecLen /= 2;
+ }
+
+ return false;
+}
+
void HexagonSubtarget::UsrOverflowMutation::apply(ScheduleDAGInstrs *DAG) {
for (SUnit &SU : DAG->SUnits) {
if (!SU.isInstr())
@@ -420,14 +491,14 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
for (auto &I : Src->Succs) {
if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
continue;
- unsigned DepR = I.getReg();
+ Register DepR = I.getReg();
int DefIdx = -1;
for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
const MachineOperand &MO = SrcI->getOperand(OpNum);
bool IsSameOrSubReg = false;
if (MO.isReg()) {
- unsigned MOReg = MO.getReg();
- if (Register::isVirtualRegister(DepR)) {
+ Register MOReg = MO.getReg();
+ if (DepR.isVirtual()) {
IsSameOrSubReg = (MOReg == DepR);
} else {
IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
@@ -456,7 +527,7 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
// Update the latency of opposite edge too.
T.setSUnit(Src);
- auto F = std::find(Dst->Preds.begin(), Dst->Preds.end(), T);
+ auto F = find(Dst->Preds, T);
assert(F != Dst->Preds.end());
F->setLatency(I.getLatency());
}
@@ -473,7 +544,7 @@ void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
// Update the latency of opposite edge too.
T.setSUnit(Src);
- auto F = std::find(Dst->Preds.begin(), Dst->Preds.end(), T);
+ auto F = find(Dst->Preds, T);
assert(F != Dst->Preds.end());
F->setLatency(Lat);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index de4f245519e4..7b7fb8d04f47 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -135,7 +135,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
bool hasV5Ops() const {
return getHexagonArchVersion() >= Hexagon::ArchEnum::V5;
@@ -275,31 +275,9 @@ public:
return makeArrayRef(Types);
}
- bool isHVXVectorType(MVT VecTy, bool IncludeBool = false) const {
- if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
- return false;
- MVT ElemTy = VecTy.getVectorElementType();
- if (!IncludeBool && ElemTy == MVT::i1)
- return false;
-
- unsigned HwLen = getVectorLength();
- unsigned NumElems = VecTy.getVectorNumElements();
- ArrayRef<MVT> ElemTypes = getHVXElementTypes();
-
- if (IncludeBool && ElemTy == MVT::i1) {
- // Boolean HVX vector types are formed from regular HVX vector types
- // by replacing the element type with i1.
- for (MVT T : ElemTypes)
- if (NumElems * T.getSizeInBits() == 8*HwLen)
- return true;
- return false;
- }
-
- unsigned VecWidth = VecTy.getSizeInBits();
- if (VecWidth != 8*HwLen && VecWidth != 16*HwLen)
- return false;
- return llvm::any_of(ElemTypes, [ElemTy] (MVT T) { return ElemTy == T; });
- }
+ bool isHVXElementType(MVT Ty, bool IncludeBool = false) const;
+ bool isHVXVectorType(MVT VecTy, bool IncludeBool = false) const;
+ bool isTypeForHVX(Type *VecTy, bool IncludeBool = false) const;
unsigned getTypeAlignment(MVT Ty) const {
if (isHVXVectorType(Ty, true))
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 3fe42ea13f51..9195bb3dc725 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -13,14 +13,17 @@
#include "HexagonTargetMachine.h"
#include "Hexagon.h"
#include "HexagonISelLowering.h"
+#include "HexagonLoopIdiomRecognition.h"
#include "HexagonMachineScheduler.h"
#include "HexagonTargetObjectFile.h"
#include "HexagonTargetTransformInfo.h"
+#include "HexagonVectorLoopCarriedReuse.h"
#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -97,10 +100,17 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
static cl::opt<bool> EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden,
cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization"));
+static cl::opt<bool> EnableVectorCombine("hexagon-vector-combine", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true), cl::desc("Enable HVX vector combining"));
+
static cl::opt<bool> EnableInitialCFGCleanup("hexagon-initial-cfg-cleanup",
cl::Hidden, cl::ZeroOrMore, cl::init(true),
cl::desc("Simplify the CFG after atomic expansion pass"));
+static cl::opt<bool> EnableInstSimplify("hexagon-instsimplify", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true),
+ cl::desc("Enable instsimplify"));
+
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
/// library. In particular, it seems that it is not possible to get
@@ -132,16 +142,17 @@ namespace llvm {
void initializeHexagonExpandCondsetsPass(PassRegistry&);
void initializeHexagonGenMuxPass(PassRegistry&);
void initializeHexagonHardwareLoopsPass(PassRegistry&);
- void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
- void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
+ void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
void initializeHexagonNewValueJumpPass(PassRegistry&);
void initializeHexagonOptAddrModePass(PassRegistry&);
void initializeHexagonPacketizerPass(PassRegistry&);
void initializeHexagonRDFOptPass(PassRegistry&);
void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
+ void initializeHexagonVectorCombineLegacyPass(PassRegistry&);
+ void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &);
void initializeHexagonVExtractPass(PassRegistry&);
Pass *createHexagonLoopIdiomPass();
- Pass *createHexagonVectorLoopCarriedReusePass();
+ Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
FunctionPass *createHexagonBitSimplify();
FunctionPass *createHexagonBranchRelaxation();
@@ -162,22 +173,21 @@ namespace llvm {
CodeGenOpt::Level OptLevel);
FunctionPass *createHexagonLoopRescheduling();
FunctionPass *createHexagonNewValueJump();
- FunctionPass *createHexagonOptimizeSZextends();
FunctionPass *createHexagonOptAddrMode();
+ FunctionPass *createHexagonOptimizeSZextends();
FunctionPass *createHexagonPacketizer(bool Minimal);
FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonRDFOpt();
FunctionPass *createHexagonSplitConst32AndConst64();
FunctionPass *createHexagonSplitDoubleRegs();
FunctionPass *createHexagonStoreWidening();
+ FunctionPass *createHexagonVectorCombineLegacyPass();
FunctionPass *createHexagonVectorPrint();
FunctionPass *createHexagonVExtract();
} // end namespace llvm;
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
+ return RM.getValueOr(Reloc::Static);
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
@@ -191,13 +201,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
initializeHexagonEarlyIfConversionPass(PR);
initializeHexagonGenMuxPass(PR);
initializeHexagonHardwareLoopsPass(PR);
- initializeHexagonLoopIdiomRecognizePass(PR);
- initializeHexagonVectorLoopCarriedReusePass(PR);
+ initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR);
initializeHexagonNewValueJumpPass(PR);
initializeHexagonOptAddrModePass(PR);
initializeHexagonPacketizerPass(PR);
initializeHexagonRDFOptPass(PR);
initializeHexagonSplitDoubleRegsPass(PR);
+ initializeHexagonVectorCombineLegacyPass(PR);
+ initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PR);
initializeHexagonVExtractPass(PR);
}
@@ -231,12 +242,10 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute FSAttr =
FnAttrs.getAttribute(AttributeList::FunctionIndex, "target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
// Append the preexisting target features last, so that +mattr overrides
// the "unsafe-fp-math" function attribute.
// Creating a separate target feature is not strictly necessary, it only
@@ -264,10 +273,22 @@ void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) {
PM.add(createHexagonLoopIdiomPass());
});
PMB.addExtension(
- PassManagerBuilder::EP_LoopOptimizerEnd,
- [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
- PM.add(createHexagonVectorLoopCarriedReusePass());
- });
+ PassManagerBuilder::EP_LoopOptimizerEnd,
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ PM.add(createHexagonVectorLoopCarriedReuseLegacyPass());
+ });
+}
+
+void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) {
+ PB.registerLateLoopOptimizationsEPCallback(
+ [=](LoopPassManager &LPM, PassBuilder::OptimizationLevel Level) {
+ LPM.addPass(HexagonLoopIdiomRecognitionPass());
+ });
+ PB.registerLoopOptimizerEndEPCallback(
+ [=](LoopPassManager &LPM, PassBuilder::OptimizationLevel Level) {
+ LPM.addPass(HexagonVectorLoopCarriedReusePass());
+ });
}
TargetTransformInfo
@@ -312,7 +333,8 @@ void HexagonPassConfig::addIRPasses() {
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
if (!NoOpt) {
- addPass(createConstantPropagationPass());
+ if (EnableInstSimplify)
+ addPass(createInstSimplifyLegacyPass());
addPass(createDeadCodeEliminationPass());
}
@@ -320,9 +342,16 @@ void HexagonPassConfig::addIRPasses() {
if (!NoOpt) {
if (EnableInitialCFGCleanup)
- addPass(createCFGSimplificationPass(1, true, true, false, true));
+ addPass(createCFGSimplificationPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
if (EnableLoopPrefetch)
addPass(createLoopDataPrefetchPass());
+ if (EnableVectorCombine)
+ addPass(createHexagonVectorCombineLegacyPass());
if (EnableCommGEP)
addPass(createHexagonCommonGEP());
// Replace certain combinations of shifts and ands with extracts.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 7ee4474e90e3..fa174128f708 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -37,6 +37,8 @@ public:
static unsigned getModuleMatchQuality(const Module &M);
void adjustPassManager(PassManagerBuilder &PMB) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) override;
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index cfc8ed813c92..595cf94e3f1d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -331,6 +331,7 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty,
case Type::LabelTyID:
case Type::MetadataTyID:
case Type::X86_MMXTyID:
+ case Type::X86_AMXTyID:
case Type::TokenTyID:
return 0;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 80c8736cb74a..1cefa6a04640 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
using namespace llvm;
@@ -34,6 +35,9 @@ static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
cl::init(true), cl::Hidden,
cl::desc("Control lookup table emission on Hexagon target"));
+static cl::opt<bool> HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true),
+ cl::Hidden, cl::desc("Enable masked loads/stores for HVX"));
+
// Constant "cost factor" to make floating point operations more expensive
// in terms of vectorization cost. This isn't the best way, but it should
// do. Ultimately, the cost should use cycles.
@@ -43,22 +47,6 @@ bool HexagonTTIImpl::useHVX() const {
return ST.useHVXOps() && HexagonAutoHVX;
}
-bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
- assert(VecTy->isVectorTy());
- if (isa<ScalableVectorType>(VecTy))
- return false;
- // Avoid types like <2 x i32*>.
- if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
- return false;
- EVT VecVT = EVT::getEVT(VecTy);
- if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64)
- return false;
- if (ST.isHVXVectorType(VecVT.getSimpleVT()))
- return true;
- auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT());
- return Action == TargetLoweringBase::TypeWidenVector;
-}
-
unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
return VTy->getNumElements();
@@ -84,7 +72,7 @@ void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
// Only try to peel innermost loops with small runtime trip counts.
- if (L && L->empty() && canPeel(L) &&
+ if (L && L->isInnermost() && canPeel(L) &&
SE.getSmallConstantTripCount(L) == 0 &&
SE.getSmallConstantMaxTripCount(L) > 0 &&
SE.getSmallConstantMaxTripCount(L) <= 5) {
@@ -105,7 +93,7 @@ unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
}
unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
- return useHVX() ? 2 : 0;
+ return useHVX() ? 2 : 1;
}
unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
@@ -113,7 +101,7 @@ unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
}
unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
- return useHVX() ? ST.getVectorLength()*8 : 0;
+ return useHVX() ? ST.getVectorLength()*8 : 32;
}
unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
@@ -168,7 +156,7 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
if (Src->isVectorTy()) {
VectorType *VecTy = cast<VectorType>(Src);
unsigned VecWidth = VecTy->getPrimitiveSizeInBits().getFixedSize();
- if (useHVX() && isTypeForHVX(VecTy)) {
+ if (useHVX() && ST.isTypeForHVX(VecTy)) {
unsigned RegWidth = getRegisterBitWidth(true);
assert(RegWidth && "Non-zero vector register width expected");
// Cost of HVX loads.
@@ -239,13 +227,16 @@ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(
}
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) {
+ Type *CondTy,
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
if (Opcode == Instruction::FCmp)
return LT.first + FloatFactor * getTypeNumElements(ValTy);
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
unsigned HexagonTTIImpl::getArithmeticInstrCost(
@@ -270,7 +261,9 @@ unsigned HexagonTTIImpl::getArithmeticInstrCost(
}
unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
- Type *SrcTy, TTI::TargetCostKind CostKind, const Instruction *I) {
+ Type *SrcTy, TTI::CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
@@ -305,6 +298,14 @@ unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return 1;
}
+bool HexagonTTIImpl::isLegalMaskedStore(Type *DataType, Align /*Alignment*/) {
+ return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
+}
+
+bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/) {
+ return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
+}
+
/// --- Vector TTI end ---
unsigned HexagonTTIImpl::getPrefetchDistance() const {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 5fe397486402..835358d3fed0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -43,7 +43,6 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
const HexagonTargetLowering *getTLI() const { return &TLI; }
bool useHVX() const;
- bool isTypeForHVX(Type *VecTy) const;
// Returns the number of vector elements of Ty, if Ty is a vector type,
// or 1 if Ty is a scalar type. It is incorrect to call this function
@@ -134,6 +133,8 @@ public:
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
unsigned getArithmeticInstrCost(
@@ -146,14 +147,18 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
+ TTI::CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
return 1;
}
+ bool isLegalMaskedStore(Type *DataType, Align Alignment);
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment);
+
/// @}
int getUserCost(const User *U, ArrayRef<const Value *> Operands,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
new file mode 100644
index 000000000000..a605fdfcf100
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -0,0 +1,1487 @@
+//===-- HexagonVectorCombine.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// HexagonVectorCombine is a utility class implementing a variety of functions
+// that assist in vector-based optimizations.
+//
+// AlignVectors: replace unaligned vector loads and stores with aligned ones.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+
+#include <algorithm>
+#include <deque>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
+#define DEBUG_TYPE "hexagon-vc"
+
+using namespace llvm;
+
+namespace {
+class HexagonVectorCombine {
+public:
+ HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
+ DominatorTree &DT_, TargetLibraryInfo &TLI_,
+ const TargetMachine &TM_)
+ : F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
+ TLI(TLI_),
+ HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
+
+ bool run();
+
+ // Common integer type.
+ IntegerType *getIntTy() const;
+ // Byte type: either scalar (when Length = 0), or vector with given
+ // element count.
+ Type *getByteTy(int ElemCount = 0) const;
+ // Boolean type: either scalar (when Length = 0), or vector with given
+ // element count.
+ Type *getBoolTy(int ElemCount = 0) const;
+ // Create a ConstantInt of type returned by getIntTy with the value Val.
+ ConstantInt *getConstInt(int Val) const;
+ // Get the integer value of V, if it exists.
+ Optional<APInt> getIntValue(const Value *Val) const;
+ // Is V a constant 0, or a vector of 0s?
+ bool isZero(const Value *Val) const;
+ // Is V an undef value?
+ bool isUndef(const Value *Val) const;
+
+ int getSizeOf(const Value *Val) const;
+ int getSizeOf(const Type *Ty) const;
+ int getTypeAlignment(Type *Ty) const;
+
+ VectorType *getByteVectorTy(int ScLen) const;
+ Constant *getNullValue(Type *Ty) const;
+ Constant *getFullValue(Type *Ty) const;
+
+ Value *insertb(IRBuilder<> &Builder, Value *Dest, Value *Src, int Start,
+ int Length, int Where) const;
+ Value *vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
+ Value *vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
+ Value *concat(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) const;
+ Value *vresize(IRBuilder<> &Builder, Value *Val, int NewSize,
+ Value *Pad) const;
+ Value *rescale(IRBuilder<> &Builder, Value *Mask, Type *FromTy,
+ Type *ToTy) const;
+ Value *vlsb(IRBuilder<> &Builder, Value *Val) const;
+ Value *vbytes(IRBuilder<> &Builder, Value *Val) const;
+
+ Value *createHvxIntrinsic(IRBuilder<> &Builder, Intrinsic::ID IntID,
+ Type *RetTy, ArrayRef<Value *> Args) const;
+
+ Optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
+
+ template <typename T = std::vector<Instruction *>>
+ bool isSafeToMoveBeforeInBB(const Instruction &In,
+ BasicBlock::const_iterator To,
+ const T &Ignore = {}) const;
+
+ Function &F;
+ const DataLayout &DL;
+ AliasAnalysis &AA;
+ AssumptionCache &AC;
+ DominatorTree &DT;
+ TargetLibraryInfo &TLI;
+ const HexagonSubtarget &HST;
+
+private:
+#ifndef NDEBUG
+ // These two functions are only used for assertions at the moment.
+ bool isByteVecTy(Type *Ty) const;
+ bool isSectorTy(Type *Ty) const;
+#endif
+ Value *getElementRange(IRBuilder<> &Builder, Value *Lo, Value *Hi, int Start,
+ int Length) const;
+};
+
+class AlignVectors {
+public:
+ AlignVectors(HexagonVectorCombine &HVC_) : HVC(HVC_) {}
+
+ bool run();
+
+private:
+ using InstList = std::vector<Instruction *>;
+
+ struct Segment {
+ void *Data;
+ int Start;
+ int Size;
+ };
+
+ struct AddrInfo {
+ AddrInfo(const AddrInfo &) = default;
+ AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T,
+ Align H)
+ : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
+ NeedAlign(HVC.getTypeAlignment(ValTy)) {}
+
+ // XXX: add Size member?
+ Instruction *Inst;
+ Value *Addr;
+ Type *ValTy;
+ Align HaveAlign;
+ Align NeedAlign;
+ int Offset = 0; // Offset (in bytes) from the first member of the
+ // containing AddrList.
+ };
+ using AddrList = std::vector<AddrInfo>;
+
+ struct InstrLess {
+ bool operator()(const Instruction *A, const Instruction *B) const {
+ return A->comesBefore(B);
+ }
+ };
+ using DepList = std::set<Instruction *, InstrLess>;
+
+ struct MoveGroup {
+ MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
+ : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {}
+ Instruction *Base; // Base instruction of the parent address group.
+ InstList Main; // Main group of instructions.
+ InstList Deps; // List of dependencies.
+ bool IsHvx; // Is this group of HVX instructions?
+ bool IsLoad; // Is this a load group?
+ };
+ using MoveList = std::vector<MoveGroup>;
+
+ struct ByteSpan {
+ struct Segment {
+ Segment(Value *Val, int Begin, int Len)
+ : Val(Val), Start(Begin), Size(Len) {}
+ Segment(const Segment &Seg) = default;
+ Value *Val;
+ int Start;
+ int Size;
+ };
+
+ struct Block {
+ Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
+ Block(Value *Val, int Off, int Len, int Pos)
+ : Seg(Val, Off, Len), Pos(Pos) {}
+ Block(const Block &Blk) = default;
+ Segment Seg;
+ int Pos;
+ };
+
+ int extent() const;
+ ByteSpan section(int Start, int Length) const;
+ ByteSpan &shift(int Offset);
+
+ int size() const { return Blocks.size(); }
+ Block &operator[](int i) { return Blocks[i]; }
+
+ std::vector<Block> Blocks;
+
+ using iterator = decltype(Blocks)::iterator;
+ iterator begin() { return Blocks.begin(); }
+ iterator end() { return Blocks.end(); }
+ using const_iterator = decltype(Blocks)::const_iterator;
+ const_iterator begin() const { return Blocks.begin(); }
+ const_iterator end() const { return Blocks.end(); }
+ };
+
+ Align getAlignFromValue(const Value *V) const;
+ Optional<MemoryLocation> getLocation(const Instruction &In) const;
+ Optional<AddrInfo> getAddrInfo(Instruction &In) const;
+ bool isHvx(const AddrInfo &AI) const;
+
+ Value *getPayload(Value *Val) const;
+ Value *getMask(Value *Val) const;
+ Value *getPassThrough(Value *Val) const;
+
+ Value *createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
+ int Adjust) const;
+ Value *createAlignedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
+ int Alignment) const;
+ Value *createAlignedLoad(IRBuilder<> &Builder, Type *ValTy, Value *Ptr,
+ int Alignment, Value *Mask, Value *PassThru) const;
+ Value *createAlignedStore(IRBuilder<> &Builder, Value *Val, Value *Ptr,
+ int Alignment, Value *Mask) const;
+
+ bool createAddressGroups();
+ MoveList createLoadGroups(const AddrList &Group) const;
+ MoveList createStoreGroups(const AddrList &Group) const;
+ bool move(const MoveGroup &Move) const;
+ bool realignGroup(const MoveGroup &Move) const;
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
+ friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
+ friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS);
+
+ std::map<Instruction *, AddrList> AddrGroups;
+ HexagonVectorCombine &HVC;
+};
+
+LLVM_ATTRIBUTE_UNUSED
+raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
+ OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
+ OS << "Addr: " << *AI.Addr << '\n';
+ OS << "Type: " << *AI.ValTy << '\n';
+ OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
+ OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
+ OS << "Offset: " << AI.Offset;
+ return OS;
+}
+
+LLVM_ATTRIBUTE_UNUSED
+raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
+ OS << "Main\n";
+ for (Instruction *I : MG.Main)
+ OS << " " << *I << '\n';
+ OS << "Deps\n";
+ for (Instruction *I : MG.Deps)
+ OS << " " << *I << '\n';
+ return OS;
+}
+
+LLVM_ATTRIBUTE_UNUSED
+raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
+ OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
+ for (const AlignVectors::ByteSpan::Block &B : BS) {
+ OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "
+ << *B.Seg.Val << '\n';
+ }
+ OS << ']';
+ return OS;
+}
+
+} // namespace
+
+namespace {
+
+template <typename T> T *getIfUnordered(T *MaybeT) {
+ return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
+}
+template <typename T> T *isCandidate(Instruction *In) {
+ return dyn_cast<T>(In);
+}
+template <> LoadInst *isCandidate<LoadInst>(Instruction *In) {
+ return getIfUnordered(dyn_cast<LoadInst>(In));
+}
+template <> StoreInst *isCandidate<StoreInst>(Instruction *In) {
+ return getIfUnordered(dyn_cast<StoreInst>(In));
+}
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1924
+// VS2017 has trouble compiling this:
+// error C2976: 'std::map': too few template arguments
+template <typename Pred, typename... Ts>
+void erase_if(std::map<Ts...> &map, Pred p)
+#else
+template <typename Pred, typename T, typename U>
+void erase_if(std::map<T, U> &map, Pred p)
+#endif
+{
+ for (auto i = map.begin(), e = map.end(); i != e;) {
+ if (p(*i))
+ i = map.erase(i);
+ else
+ i = std::next(i);
+ }
+}
+
+// Forward other erase_ifs to the LLVM implementations.
+template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
+ llvm::erase_if(std::forward<T>(container), p);
+}
+
+} // namespace
+
+// --- Begin AlignVectors
+
+auto AlignVectors::ByteSpan::extent() const -> int {
+ if (size() == 0)
+ return 0;
+ int Min = Blocks[0].Pos;
+ int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
+ for (int i = 1, e = size(); i != e; ++i) {
+ Min = std::min(Min, Blocks[i].Pos);
+ Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
+ }
+ return Max - Min;
+}
+
+auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
+ ByteSpan Section;
+ for (const ByteSpan::Block &B : Blocks) {
+ int L = std::max(B.Pos, Start); // Left end.
+ int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1.
+ if (L < R) {
+ // How much to chop off the beginning of the segment:
+ int Off = L > B.Pos ? L - B.Pos : 0;
+ Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
+ }
+ }
+ return Section;
+}
+
+auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
+ for (Block &B : Blocks)
+ B.Pos += Offset;
+ return *this;
+}
+
+auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
+ const auto *C = dyn_cast<ConstantInt>(V);
+ assert(C && "Alignment must be a compile-time constant integer");
+ return C->getAlignValue();
+}
+
+auto AlignVectors::getAddrInfo(Instruction &In) const -> Optional<AddrInfo> {
+ if (auto *L = isCandidate<LoadInst>(&In))
+ return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
+ L->getAlign());
+ if (auto *S = isCandidate<StoreInst>(&In))
+ return AddrInfo(HVC, S, S->getPointerOperand(),
+ S->getValueOperand()->getType(), S->getAlign());
+ if (auto *II = isCandidate<IntrinsicInst>(&In)) {
+ Intrinsic::ID ID = II->getIntrinsicID();
+ switch (ID) {
+ case Intrinsic::masked_load:
+ return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
+ getAlignFromValue(II->getArgOperand(1)));
+ case Intrinsic::masked_store:
+ return AddrInfo(HVC, II, II->getArgOperand(1),
+ II->getArgOperand(0)->getType(),
+ getAlignFromValue(II->getArgOperand(2)));
+ }
+ }
+ return Optional<AddrInfo>();
+}
+
+auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
+ return HVC.HST.isTypeForHVX(AI.ValTy);
+}
+
+auto AlignVectors::getPayload(Value *Val) const -> Value * {
+ if (auto *In = dyn_cast<Instruction>(Val)) {
+ Intrinsic::ID ID = 0;
+ if (auto *II = dyn_cast<IntrinsicInst>(In))
+ ID = II->getIntrinsicID();
+ if (isa<StoreInst>(In) || ID == Intrinsic::masked_store)
+ return In->getOperand(0);
+ }
+ return Val;
+}
+
+auto AlignVectors::getMask(Value *Val) const -> Value * {
+ if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ return II->getArgOperand(2);
+ case Intrinsic::masked_store:
+ return II->getArgOperand(3);
+ }
+ }
+
+ Type *ValTy = getPayload(Val)->getType();
+ if (auto *VecTy = dyn_cast<VectorType>(ValTy)) {
+ int ElemCount = VecTy->getElementCount().getFixedValue();
+ return HVC.getFullValue(HVC.getBoolTy(ElemCount));
+ }
+ return HVC.getFullValue(HVC.getBoolTy());
+}
+
+auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
+ if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getArgOperand(3);
+ }
+ return UndefValue::get(getPayload(Val)->getType());
+}
+
+auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
+ Type *ValTy, int Adjust) const
+ -> Value * {
+ // The adjustment is in bytes, but if it's a multiple of the type size,
+ // we don't need to do pointer casts.
+ Type *ElemTy = cast<PointerType>(Ptr->getType())->getElementType();
+ int ElemSize = HVC.getSizeOf(ElemTy);
+ if (Adjust % ElemSize == 0) {
+ Value *Tmp0 = Builder.CreateGEP(Ptr, HVC.getConstInt(Adjust / ElemSize));
+ return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
+ }
+
+ PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext());
+ Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy);
+ Value *Tmp1 = Builder.CreateGEP(Tmp0, HVC.getConstInt(Adjust));
+ return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo());
+}
+
+auto AlignVectors::createAlignedPointer(IRBuilder<> &Builder, Value *Ptr,
+ Type *ValTy, int Alignment) const
+ -> Value * {
+ Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy());
+ Value *Mask = HVC.getConstInt(-Alignment);
+ Value *And = Builder.CreateAnd(AsInt, Mask);
+ return Builder.CreateIntToPtr(And, ValTy->getPointerTo());
+}
+
+auto AlignVectors::createAlignedLoad(IRBuilder<> &Builder, Type *ValTy,
+ Value *Ptr, int Alignment, Value *Mask,
+ Value *PassThru) const -> Value * {
+ assert(!HVC.isUndef(Mask)); // Should this be allowed?
+ if (HVC.isZero(Mask))
+ return PassThru;
+ if (Mask == ConstantInt::getTrue(Mask->getType()))
+ return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment));
+ return Builder.CreateMaskedLoad(Ptr, Align(Alignment), Mask, PassThru);
+}
+
+auto AlignVectors::createAlignedStore(IRBuilder<> &Builder, Value *Val,
+ Value *Ptr, int Alignment,
+ Value *Mask) const -> Value * {
+ if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
+ return UndefValue::get(Val->getType());
+ if (Mask == ConstantInt::getTrue(Mask->getType()))
+ return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
+ return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
+}
+
+auto AlignVectors::createAddressGroups() -> bool {
+ // An address group created here may contain instructions spanning
+ // multiple basic blocks.
+ AddrList WorkStack;
+
+ auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
+ for (AddrInfo &W : WorkStack) {
+ if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
+ return std::make_pair(W.Inst, *D);
+ }
+ return std::make_pair(nullptr, 0);
+ };
+
+ auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
+ BasicBlock &Block = *DomN->getBlock();
+ for (Instruction &I : Block) {
+ auto AI = this->getAddrInfo(I); // Use this-> for gcc6.
+ if (!AI)
+ continue;
+ auto F = findBaseAndOffset(*AI);
+ Instruction *GroupInst;
+ if (Instruction *BI = F.first) {
+ AI->Offset = F.second;
+ GroupInst = BI;
+ } else {
+ WorkStack.push_back(*AI);
+ GroupInst = AI->Inst;
+ }
+ AddrGroups[GroupInst].push_back(*AI);
+ }
+
+ for (DomTreeNode *C : DomN->children())
+ Visit(C, Visit);
+
+ while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
+ WorkStack.pop_back();
+ };
+
+ traverseBlock(HVC.DT.getRootNode(), traverseBlock);
+ assert(WorkStack.empty());
+
+ // AddrGroups are formed.
+
+ // Remove groups of size 1.
+ erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
+ // Remove groups that don't use HVX types.
+ erase_if(AddrGroups, [&](auto &G) {
+ return !llvm::any_of(
+ G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
+ });
+ // Remove groups where everything is properly aligned.
+ erase_if(AddrGroups, [&](auto &G) {
+ return llvm::all_of(G.second,
+ [&](auto &I) { return I.HaveAlign >= I.NeedAlign; });
+ });
+
+ return !AddrGroups.empty();
+}
+
+auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
+ // Form load groups.
+ // To avoid complications with moving code across basic blocks, only form
+ // groups that are contained within a single basic block.
+
+ auto getUpwardDeps = [](Instruction *In, Instruction *Base) {
+ BasicBlock *Parent = Base->getParent();
+ assert(In->getParent() == Parent &&
+ "Base and In should be in the same block");
+ assert(Base->comesBefore(In) && "Base should come before In");
+
+ DepList Deps;
+ std::deque<Instruction *> WorkQ = {In};
+ while (!WorkQ.empty()) {
+ Instruction *D = WorkQ.front();
+ WorkQ.pop_front();
+ Deps.insert(D);
+ for (Value *Op : D->operands()) {
+ if (auto *I = dyn_cast<Instruction>(Op)) {
+ if (I->getParent() == Parent && Base->comesBefore(I))
+ WorkQ.push_back(I);
+ }
+ }
+ }
+ return Deps;
+ };
+
+ auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
+ assert(!Move.Main.empty() && "Move group should have non-empty Main");
+ // Don't mix HVX and non-HVX instructions.
+ if (Move.IsHvx != isHvx(Info))
+ return false;
+ // Leading instruction in the load group.
+ Instruction *Base = Move.Main.front();
+ if (Base->getParent() != Info.Inst->getParent())
+ return false;
+
+ auto isSafeToMoveToBase = [&](const Instruction *I) {
+ return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator());
+ };
+ DepList Deps = getUpwardDeps(Info.Inst, Base);
+ if (!llvm::all_of(Deps, isSafeToMoveToBase))
+ return false;
+
+ // The dependencies will be moved together with the load, so make sure
+ // that none of them could be moved independently in another group.
+ Deps.erase(Info.Inst);
+ auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; };
+ if (llvm::any_of(Deps, inAddrMap))
+ return false;
+ Move.Main.push_back(Info.Inst);
+ llvm::append_range(Move.Deps, Deps);
+ return true;
+ };
+
+ MoveList LoadGroups;
+
+ for (const AddrInfo &Info : Group) {
+ if (!Info.Inst->mayReadFromMemory())
+ continue;
+ if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
+ LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
+ }
+
+ // Erase singleton groups.
+ erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
+ return LoadGroups;
+}
+
+auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
+ // Form store groups.
+ // To avoid complications with moving code across basic blocks, only form
+ // groups that are contained within a single basic block.
+
+ auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
+ assert(!Move.Main.empty() && "Move group should have non-empty Main");
+ // For stores with return values we'd have to collect downward depenencies.
+ // There are no such stores that we handle at the moment, so omit that.
+ assert(Info.Inst->getType()->isVoidTy() &&
+ "Not handling stores with return values");
+ // Don't mix HVX and non-HVX instructions.
+ if (Move.IsHvx != isHvx(Info))
+ return false;
+ // For stores we need to be careful whether it's safe to move them.
+ // Stores that are otherwise safe to move together may not appear safe
+ // to move over one another (i.e. isSafeToMoveBefore may return false).
+ Instruction *Base = Move.Main.front();
+ if (Base->getParent() != Info.Inst->getParent())
+ return false;
+ if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
+ return false;
+ Move.Main.push_back(Info.Inst);
+ return true;
+ };
+
+ MoveList StoreGroups;
+
+ for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
+ const AddrInfo &Info = *I;
+ if (!Info.Inst->mayWriteToMemory())
+ continue;
+ if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
+ StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
+ }
+
+ // Erase singleton groups.
+ erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
+ return StoreGroups;
+}
+
+auto AlignVectors::move(const MoveGroup &Move) const -> bool {
+ assert(!Move.Main.empty() && "Move group should have non-empty Main");
+ Instruction *Where = Move.Main.front();
+
+ if (Move.IsLoad) {
+ // Move all deps to before Where, keeping order.
+ for (Instruction *D : Move.Deps)
+ D->moveBefore(Where);
+ // Move all main instructions to after Where, keeping order.
+ ArrayRef<Instruction *> Main(Move.Main);
+ for (Instruction *M : Main.drop_front(1)) {
+ M->moveAfter(Where);
+ Where = M;
+ }
+ } else {
+ // NOTE: Deps are empty for "store" groups. If they need to be
+ // non-empty, decide on the order.
+ assert(Move.Deps.empty());
+ // Move all main instructions to before Where, inverting order.
+ ArrayRef<Instruction *> Main(Move.Main);
+ for (Instruction *M : Main.drop_front(1)) {
+ M->moveBefore(Where);
+ Where = M;
+ }
+ }
+
+ return Move.Main.size() + Move.Deps.size() > 1;
+}
+
+auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
+ // TODO: Needs support for masked loads/stores of "scalar" vectors.
+ if (!Move.IsHvx)
+ return false;
+
+ // Return the element with the maximum alignment from Range,
+ // where GetValue obtains the value to compare from an element.
+ auto getMaxOf = [](auto Range, auto GetValue) {
+ return *std::max_element(
+ Range.begin(), Range.end(),
+ [&GetValue](auto &A, auto &B) { return GetValue(A) < GetValue(B); });
+ };
+
+ const AddrList &BaseInfos = AddrGroups.at(Move.Base);
+
+ // Conceptually, there is a vector of N bytes covering the addresses
+ // starting from the minimum offset (i.e. Base.Addr+Start). This vector
+ // represents a contiguous memory region that spans all accessed memory
+ // locations.
+ // The correspondence between loaded or stored values will be expressed
+ // in terms of this vector. For example, the 0th element of the vector
+ // from the Base address info will start at byte Start from the beginning
+ // of this conceptual vector.
+ //
+ // This vector will be loaded/stored starting at the nearest down-aligned
+ // address and the amount od the down-alignment will be AlignVal:
+ // valign(load_vector(align_down(Base+Start)), AlignVal)
+
+ std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
+ AddrList MoveInfos;
+ llvm::copy_if(
+ BaseInfos, std::back_inserter(MoveInfos),
+ [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
+
+ // Maximum alignment present in the whole address group.
+ const AddrInfo &WithMaxAlign =
+ getMaxOf(BaseInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
+ Align MaxGiven = WithMaxAlign.HaveAlign;
+
+ // Minimum alignment present in the move address group.
+ const AddrInfo &WithMinOffset =
+ getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
+
+ const AddrInfo &WithMaxNeeded =
+ getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
+ Align MinNeeded = WithMaxNeeded.NeedAlign;
+
+ // Set the builder at the top instruction in the move group.
+ Instruction *TopIn = Move.IsLoad ? Move.Main.front() : Move.Main.back();
+ IRBuilder<> Builder(TopIn);
+ Value *AlignAddr = nullptr; // Actual aligned address.
+ Value *AlignVal = nullptr; // Right-shift amount (for valign).
+
+ if (MinNeeded <= MaxGiven) {
+ int Start = WithMinOffset.Offset;
+ int OffAtMax = WithMaxAlign.Offset;
+ // Shift the offset of the maximally aligned instruction (OffAtMax)
+ // back by just enough multiples of the required alignment to cover the
+ // distance from Start to OffAtMax.
+ // Calculate the address adjustment amount based on the address with the
+ // maximum alignment. This is to allow a simple gep instruction instead
+ // of potential bitcasts to i8*.
+ int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
+ AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
+ WithMaxAlign.ValTy, Adjust);
+ int Diff = Start - (OffAtMax + Adjust);
+ AlignVal = HVC.getConstInt(Diff);
+ // Sanity.
+ assert(Diff >= 0);
+ assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
+ } else {
+ // WithMinOffset is the lowest address in the group,
+ // WithMinOffset.Addr = Base+Start.
+ // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb)
+ // mask off unnecessary bits, so it's ok to just the original pointer as
+ // the alignment amount.
+ // Do an explicit down-alignment of the address to avoid creating an
+ // aligned instruction with an address that is not really aligned.
+ AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr,
+ WithMinOffset.ValTy, MinNeeded.value());
+ AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
+ }
+
+ ByteSpan VSpan;
+ for (const AddrInfo &AI : MoveInfos) {
+ VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
+ AI.Offset - WithMinOffset.Offset);
+ }
+
+ // The aligned loads/stores will use blocks that are either scalars,
+ // or HVX vectors. Let "sector" be the unified term for such a block.
+ // blend(scalar, vector) -> sector...
+ int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
+ : std::max<int>(MinNeeded.value(), 4);
+ assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
+ assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
+
+ Type *SecTy = HVC.getByteTy(ScLen);
+ int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
+
+ if (Move.IsLoad) {
+ ByteSpan ASpan;
+ auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
+ auto *Undef = UndefValue::get(SecTy);
+
+ for (int i = 0; i != NumSectors + 1; ++i) {
+ Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
+ // FIXME: generate a predicated load?
+ Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef);
+ ASpan.Blocks.emplace_back(Load, ScLen, i * ScLen);
+ }
+
+ for (int j = 0; j != NumSectors; ++j) {
+ ASpan[j].Seg.Val = HVC.vralignb(Builder, ASpan[j].Seg.Val,
+ ASpan[j + 1].Seg.Val, AlignVal);
+ }
+
+ for (ByteSpan::Block &B : VSpan) {
+ ByteSpan Section = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
+ Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
+ for (ByteSpan::Block &S : Section) {
+ Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val));
+ Accum =
+ HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos);
+ }
+ // Instead of casting everything to bytes for the vselect, cast to the
+ // original value type. This will avoid complications with casting masks.
+ // For example, in cases when the original mask applied to i32, it could
+ // be converted to a mask applicable to i8 via pred_typecast intrinsic,
+ // but if the mask is not exactly of HVX length, extra handling would be
+ // needed to make it work.
+ Type *ValTy = getPayload(B.Seg.Val)->getType();
+ Value *Cast = Builder.CreateBitCast(Accum, ValTy);
+ Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
+ getPassThrough(B.Seg.Val));
+ B.Seg.Val->replaceAllUsesWith(Sel);
+ }
+ } else {
+ // Stores.
+ ByteSpan ASpanV, ASpanM;
+
+ // Return a vector value corresponding to the input value Val:
+ // either <1 x Val> for scalar Val, or Val itself for vector Val.
+ auto MakeVec = [](IRBuilder<> &Builder, Value *Val) -> Value * {
+ Type *Ty = Val->getType();
+ if (Ty->isVectorTy())
+ return Val;
+ auto *VecTy = VectorType::get(Ty, 1, /*Scalable*/ false);
+ return Builder.CreateBitCast(Val, VecTy);
+ };
+
+ // Create an extra "undef" sector at the beginning and at the end.
+ // They will be used as the left/right filler in the vlalign step.
+ for (int i = -1; i != NumSectors + 1; ++i) {
+ // For stores, the size of each section is an aligned vector length.
+ // Adjust the store offsets relative to the section start offset.
+ ByteSpan Section = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
+ Value *AccumV = UndefValue::get(SecTy);
+ Value *AccumM = HVC.getNullValue(SecTy);
+ for (ByteSpan::Block &S : Section) {
+ Value *Pay = getPayload(S.Seg.Val);
+ Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
+ Pay->getType(), HVC.getByteTy());
+ AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask),
+ S.Seg.Start, S.Seg.Size, S.Pos);
+ AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
+ S.Seg.Start, S.Seg.Size, S.Pos);
+ }
+ ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
+ ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
+ }
+
+ // vlalign
+ for (int j = 1; j != NumSectors + 2; ++j) {
+ ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanV[j - 1].Seg.Val,
+ ASpanV[j].Seg.Val, AlignVal);
+ ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanM[j - 1].Seg.Val,
+ ASpanM[j].Seg.Val, AlignVal);
+ }
+
+ for (int i = 0; i != NumSectors + 1; ++i) {
+ Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
+ Value *Val = ASpanV[i].Seg.Val;
+ Value *Mask = ASpanM[i].Seg.Val; // bytes
+ if (!HVC.isUndef(Val) && !HVC.isZero(Mask))
+ createAlignedStore(Builder, Val, Ptr, ScLen, HVC.vlsb(Builder, Mask));
+ }
+ }
+
+ for (auto *Inst : Move.Main)
+ Inst->eraseFromParent();
+
+ return true;
+}
+
+auto AlignVectors::run() -> bool {
+ if (!createAddressGroups())
+ return false;
+
+ bool Changed = false;
+ MoveList LoadGroups, StoreGroups;
+
+ for (auto &G : AddrGroups) {
+ llvm::append_range(LoadGroups, createLoadGroups(G.second));
+ llvm::append_range(StoreGroups, createStoreGroups(G.second));
+ }
+
+ for (auto &M : LoadGroups)
+ Changed |= move(M);
+ for (auto &M : StoreGroups)
+ Changed |= move(M);
+
+ for (auto &M : LoadGroups)
+ Changed |= realignGroup(M);
+ for (auto &M : StoreGroups)
+ Changed |= realignGroup(M);
+
+ return Changed;
+}
+
+// --- End AlignVectors
+
+auto HexagonVectorCombine::run() -> bool {
+ if (!HST.useHVXOps())
+ return false;
+
+ bool Changed = AlignVectors(*this).run();
+ return Changed;
+}
+
+auto HexagonVectorCombine::getIntTy() const -> IntegerType * {
+ return Type::getInt32Ty(F.getContext());
+}
+
+auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
+ assert(ElemCount >= 0);
+ IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
+ if (ElemCount == 0)
+ return ByteTy;
+ return VectorType::get(ByteTy, ElemCount, /*Scalable*/ false);
+}
+
+auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
+ assert(ElemCount >= 0);
+ IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
+ if (ElemCount == 0)
+ return BoolTy;
+ return VectorType::get(BoolTy, ElemCount, /*Scalable*/ false);
+}
+
+auto HexagonVectorCombine::getConstInt(int Val) const -> ConstantInt * {
+ return ConstantInt::getSigned(getIntTy(), Val);
+}
+
+auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
+ if (auto *C = dyn_cast<Constant>(Val))
+ return C->isZeroValue();
+ return false;
+}
+
+auto HexagonVectorCombine::getIntValue(const Value *Val) const
+ -> Optional<APInt> {
+ if (auto *CI = dyn_cast<ConstantInt>(Val))
+ return CI->getValue();
+ return None;
+}
+
+auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
+ return isa<UndefValue>(Val);
+}
+
+auto HexagonVectorCombine::getSizeOf(const Value *Val) const -> int {
+ return getSizeOf(Val->getType());
+}
+
+auto HexagonVectorCombine::getSizeOf(const Type *Ty) const -> int {
+ return DL.getTypeStoreSize(const_cast<Type *>(Ty)).getFixedValue();
+}
+
+auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
+ // The actual type may be shorter than the HVX vector, so determine
+ // the alignment based on subtarget info.
+ if (HST.isTypeForHVX(Ty))
+ return HST.getVectorLength();
+ return DL.getABITypeAlign(Ty).value();
+}
+
+auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
+ assert(Ty->isIntOrIntVectorTy());
+ auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
+ if (auto *VecTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
+ return Zero;
+}
+
+auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
+ assert(Ty->isIntOrIntVectorTy());
+ auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
+ if (auto *VecTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
+ return Minus1;
+}
+
+// Insert bytes [Start..Start+Length) of Src into Dst at byte Where.
+auto HexagonVectorCombine::insertb(IRBuilder<> &Builder, Value *Dst, Value *Src,
+ int Start, int Length, int Where) const
+ -> Value * {
+ assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
+ int SrcLen = getSizeOf(Src);
+ int DstLen = getSizeOf(Dst);
+ assert(0 <= Start && Start + Length <= SrcLen);
+ assert(0 <= Where && Where + Length <= DstLen);
+
+ int P2Len = PowerOf2Ceil(SrcLen | DstLen);
+ auto *Undef = UndefValue::get(getByteTy());
+ Value *P2Src = vresize(Builder, Src, P2Len, Undef);
+ Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
+
+ SmallVector<int, 256> SMask(P2Len);
+ for (int i = 0; i != P2Len; ++i) {
+ // If i is in [Where, Where+Length), pick Src[Start+(i-Where)].
+ // Otherwise, pick Dst[i];
+ SMask[i] =
+ (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
+ }
+
+ Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
+ return vresize(Builder, P2Insert, DstLen, Undef);
+}
+
+auto HexagonVectorCombine::vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
+ Value *Amt) const -> Value * {
+ assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
+ assert(isSectorTy(Hi->getType()));
+ if (isZero(Amt))
+ return Hi;
+ int VecLen = getSizeOf(Hi);
+ if (auto IntAmt = getIntValue(Amt))
+ return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
+ VecLen);
+
+ if (HST.isTypeForHVX(Hi->getType())) {
+ int HwLen = HST.getVectorLength();
+ assert(VecLen == HwLen && "Expecting an exact HVX type");
+ Intrinsic::ID V6_vlalignb = HwLen == 64
+ ? Intrinsic::hexagon_V6_vlalignb
+ : Intrinsic::hexagon_V6_vlalignb_128B;
+ return createHvxIntrinsic(Builder, V6_vlalignb, Hi->getType(),
+ {Hi, Lo, Amt});
+ }
+
+ if (VecLen == 4) {
+ Value *Pair = concat(Builder, {Lo, Hi});
+ Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32);
+ Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
+ return Builder.CreateBitCast(Trunc, Hi->getType());
+ }
+ if (VecLen == 8) {
+ Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt);
+ return vralignb(Builder, Lo, Hi, Sub);
+ }
+ llvm_unreachable("Unexpected vector length");
+}
+
+auto HexagonVectorCombine::vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
+ Value *Amt) const -> Value * {
+ assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
+ assert(isSectorTy(Lo->getType()));
+ if (isZero(Amt))
+ return Lo;
+ int VecLen = getSizeOf(Lo);
+ if (auto IntAmt = getIntValue(Amt))
+ return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
+
+ if (HST.isTypeForHVX(Lo->getType())) {
+ int HwLen = HST.getVectorLength();
+ assert(VecLen == HwLen && "Expecting an exact HVX type");
+ Intrinsic::ID V6_valignb = HwLen == 64 ? Intrinsic::hexagon_V6_valignb
+ : Intrinsic::hexagon_V6_valignb_128B;
+ return createHvxIntrinsic(Builder, V6_valignb, Lo->getType(),
+ {Hi, Lo, Amt});
+ }
+
+ if (VecLen == 4) {
+ Value *Pair = concat(Builder, {Lo, Hi});
+ Value *Shift = Builder.CreateLShr(Pair, Amt);
+ Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
+ return Builder.CreateBitCast(Trunc, Lo->getType());
+ }
+ if (VecLen == 8) {
+ Type *Int64Ty = Type::getInt64Ty(F.getContext());
+ Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty);
+ Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty);
+ Function *FI = Intrinsic::getDeclaration(F.getParent(),
+ Intrinsic::hexagon_S2_valignrb);
+ Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt});
+ return Builder.CreateBitCast(Call, Lo->getType());
+ }
+ llvm_unreachable("Unexpected vector length");
+}
+
+// Concatenates a sequence of vectors of the same type.
+auto HexagonVectorCombine::concat(IRBuilder<> &Builder,
+ ArrayRef<Value *> Vecs) const -> Value * {
+ assert(!Vecs.empty());
+ SmallVector<int, 256> SMask;
+ std::vector<Value *> Work[2];
+ int ThisW = 0, OtherW = 1;
+
+ Work[ThisW].assign(Vecs.begin(), Vecs.end());
+ while (Work[ThisW].size() > 1) {
+ auto *Ty = cast<VectorType>(Work[ThisW].front()->getType());
+ int ElemCount = Ty->getElementCount().getFixedValue();
+ SMask.resize(ElemCount * 2);
+ std::iota(SMask.begin(), SMask.end(), 0);
+
+ Work[OtherW].clear();
+ if (Work[ThisW].size() % 2 != 0)
+ Work[ThisW].push_back(UndefValue::get(Ty));
+ for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
+ Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i],
+ Work[ThisW][i + 1], SMask);
+ Work[OtherW].push_back(Joined);
+ }
+ std::swap(ThisW, OtherW);
+ }
+
+ // Since there may have been some undefs appended to make shuffle operands
+ // have the same type, perform the last shuffle to only pick the original
+ // elements.
+ SMask.resize(Vecs.size() * getSizeOf(Vecs.front()->getType()));
+ std::iota(SMask.begin(), SMask.end(), 0);
+ Value *Total = Work[OtherW].front();
+ return Builder.CreateShuffleVector(Total, SMask);
+}
+
+auto HexagonVectorCombine::vresize(IRBuilder<> &Builder, Value *Val,
+ int NewSize, Value *Pad) const -> Value * {
+ assert(isa<VectorType>(Val->getType()));
+ auto *ValTy = cast<VectorType>(Val->getType());
+ assert(ValTy->getElementType() == Pad->getType());
+
+ int CurSize = ValTy->getElementCount().getFixedValue();
+ if (CurSize == NewSize)
+ return Val;
+ // Truncate?
+ if (CurSize > NewSize)
+ return getElementRange(Builder, Val, /*Unused*/ Val, 0, NewSize);
+ // Extend.
+ SmallVector<int, 128> SMask(NewSize);
+ std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
+ std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
+ Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad);
+ return Builder.CreateShuffleVector(Val, PadVec, SMask);
+}
+
+auto HexagonVectorCombine::rescale(IRBuilder<> &Builder, Value *Mask,
+ Type *FromTy, Type *ToTy) const -> Value * {
+ // Mask is a vector <N x i1>, where each element corresponds to an
+ // element of FromTy. Remap it so that each element will correspond
+ // to an element of ToTy.
+ assert(isa<VectorType>(Mask->getType()));
+
+ Type *FromSTy = FromTy->getScalarType();
+ Type *ToSTy = ToTy->getScalarType();
+ if (FromSTy == ToSTy)
+ return Mask;
+
+ int FromSize = getSizeOf(FromSTy);
+ int ToSize = getSizeOf(ToSTy);
+ assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
+
+ auto *MaskTy = cast<VectorType>(Mask->getType());
+ int FromCount = MaskTy->getElementCount().getFixedValue();
+ int ToCount = (FromCount * FromSize) / ToSize;
+ assert((FromCount * FromSize) % ToSize == 0);
+
+ // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
+ // -> trunc to <M x i1>.
+ Value *Ext = Builder.CreateSExt(
+ Mask, VectorType::get(FromSTy, FromCount, /*Scalable*/ false));
+ Value *Cast = Builder.CreateBitCast(
+ Ext, VectorType::get(ToSTy, ToCount, /*Scalable*/ false));
+ return Builder.CreateTrunc(
+ Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false));
+}
+
+// Bitcast to bytes, and return least significant bits.
+auto HexagonVectorCombine::vlsb(IRBuilder<> &Builder, Value *Val) const
+ -> Value * {
+ Type *ScalarTy = Val->getType()->getScalarType();
+ if (ScalarTy == getBoolTy())
+ return Val;
+
+ Value *Bytes = vbytes(Builder, Val);
+ if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
+ return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
+ // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
+ // <1 x i1>.
+ return Builder.CreateTrunc(Bytes, getBoolTy());
+}
+
+// Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
+auto HexagonVectorCombine::vbytes(IRBuilder<> &Builder, Value *Val) const
+ -> Value * {
+ Type *ScalarTy = Val->getType()->getScalarType();
+ if (ScalarTy == getByteTy())
+ return Val;
+
+ if (ScalarTy != getBoolTy())
+ return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
+ // For bool, return a sext from i1 to i8.
+ if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
+ return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy));
+ return Builder.CreateSExt(Val, getByteTy());
+}
+
+auto HexagonVectorCombine::createHvxIntrinsic(IRBuilder<> &Builder,
+ Intrinsic::ID IntID, Type *RetTy,
+ ArrayRef<Value *> Args) const
+ -> Value * {
+ int HwLen = HST.getVectorLength();
+ Type *BoolTy = Type::getInt1Ty(F.getContext());
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ // HVX vector -> v16i32/v32i32
+ // HVX vector predicate -> v512i1/v1024i1
+ auto getTypeForIntrin = [&](Type *Ty) -> Type * {
+ if (HST.isTypeForHVX(Ty, /*IncludeBool*/ true)) {
+ Type *ElemTy = cast<VectorType>(Ty)->getElementType();
+ if (ElemTy == Int32Ty)
+ return Ty;
+ if (ElemTy == BoolTy)
+ return VectorType::get(BoolTy, 8 * HwLen, /*Scalable*/ false);
+ return VectorType::get(Int32Ty, HwLen / 4, /*Scalable*/ false);
+ }
+ // Non-HVX type. It should be a scalar.
+ assert(Ty == Int32Ty || Ty->isIntegerTy(64));
+ return Ty;
+ };
+
+ auto getCast = [&](IRBuilder<> &Builder, Value *Val,
+ Type *DestTy) -> Value * {
+ Type *SrcTy = Val->getType();
+ if (SrcTy == DestTy)
+ return Val;
+ if (HST.isTypeForHVX(SrcTy, /*IncludeBool*/ true)) {
+ if (cast<VectorType>(SrcTy)->getElementType() == BoolTy) {
+ // This should take care of casts the other way too, for example
+ // v1024i1 -> v32i1.
+ Intrinsic::ID TC = HwLen == 64
+ ? Intrinsic::hexagon_V6_pred_typecast
+ : Intrinsic::hexagon_V6_pred_typecast_128B;
+ Function *FI = Intrinsic::getDeclaration(F.getParent(), TC,
+ {DestTy, Val->getType()});
+ return Builder.CreateCall(FI, {Val});
+ }
+ // Non-predicate HVX vector.
+ return Builder.CreateBitCast(Val, DestTy);
+ }
+ // Non-HVX type. It should be a scalar, and it should already have
+ // a valid type.
+ llvm_unreachable("Unexpected type");
+ };
+
+ SmallVector<Value *, 4> IntOps;
+ for (Value *A : Args)
+ IntOps.push_back(getCast(Builder, A, getTypeForIntrin(A->getType())));
+ Function *FI = Intrinsic::getDeclaration(F.getParent(), IntID);
+ Value *Call = Builder.CreateCall(FI, IntOps);
+
+ Type *CallTy = Call->getType();
+ if (CallTy == RetTy)
+ return Call;
+ // Scalar types should have RetTy matching the call return type.
+ assert(HST.isTypeForHVX(CallTy, /*IncludeBool*/ true));
+ if (cast<VectorType>(CallTy)->getElementType() == BoolTy)
+ return getCast(Builder, Call, RetTy);
+ return Builder.CreateBitCast(Call, RetTy);
+}
+
+auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
+ Value *Ptr1) const
+ -> Optional<int> {
+ struct Builder : IRBuilder<> {
+ Builder(BasicBlock *B) : IRBuilder<>(B) {}
+ ~Builder() {
+ for (Instruction *I : llvm::reverse(ToErase))
+ I->eraseFromParent();
+ }
+ SmallVector<Instruction *, 8> ToErase;
+ };
+
+#define CallBuilder(B, F) \
+ [&](auto &B_) { \
+ Value *V = B_.F; \
+ if (auto *I = dyn_cast<Instruction>(V)) \
+ B_.ToErase.push_back(I); \
+ return V; \
+ }(B)
+
+ auto Simplify = [&](Value *V) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ SimplifyQuery Q(DL, &TLI, &DT, &AC, I);
+ if (Value *S = SimplifyInstruction(I, Q))
+ return S;
+ }
+ return V;
+ };
+
+ auto StripBitCast = [](Value *V) {
+ while (auto *C = dyn_cast<BitCastInst>(V))
+ V = C->getOperand(0);
+ return V;
+ };
+
+ Ptr0 = StripBitCast(Ptr0);
+ Ptr1 = StripBitCast(Ptr1);
+ if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
+ return None;
+
+ auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
+ auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
+ if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
+ return None;
+
+ Builder B(Gep0->getParent());
+ Value *BasePtr = Gep0->getPointerOperand();
+ int Scale = DL.getTypeStoreSize(BasePtr->getType()->getPointerElementType());
+
+ // FIXME: for now only check GEPs with a single index.
+ if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
+ return None;
+
+ Value *Idx0 = Gep0->getOperand(1);
+ Value *Idx1 = Gep1->getOperand(1);
+
+ // First, try to simplify the subtraction directly.
+ if (auto *Diff = dyn_cast<ConstantInt>(
+ Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
+ return Diff->getSExtValue() * Scale;
+
+ KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT);
+ KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT);
+ APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
+ if (Unknown.isAllOnesValue())
+ return None;
+
+ Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
+ Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
+ Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
+ Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
+ int Diff0 = 0;
+ if (auto *C = dyn_cast<ConstantInt>(SubU)) {
+ Diff0 = C->getSExtValue();
+ } else {
+ return None;
+ }
+
+ Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown);
+ Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
+ Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
+ Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
+ int Diff1 = 0;
+ if (auto *C = dyn_cast<ConstantInt>(SubK)) {
+ Diff1 = C->getSExtValue();
+ } else {
+ return None;
+ }
+
+ return (Diff0 + Diff1) * Scale;
+
+#undef CallBuilder
+}
+
+template <typename T>
+auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
+ BasicBlock::const_iterator To,
+ const T &Ignore) const
+ -> bool {
+ auto getLocOrNone = [this](const Instruction &I) -> Optional<MemoryLocation> {
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ return MemoryLocation::getForArgument(II, 0, TLI);
+ case Intrinsic::masked_store:
+ return MemoryLocation::getForArgument(II, 1, TLI);
+ }
+ }
+ return MemoryLocation::getOrNone(&I);
+ };
+
+ // The source and the destination must be in the same basic block.
+ const BasicBlock &Block = *In.getParent();
+ assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block);
+ // No PHIs.
+ if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
+ return false;
+
+ if (!mayBeMemoryDependent(In))
+ return true;
+ bool MayWrite = In.mayWriteToMemory();
+ auto MaybeLoc = getLocOrNone(In);
+
+ auto From = In.getIterator();
+ if (From == To)
+ return true;
+ bool MoveUp = (To != Block.end() && To->comesBefore(&In));
+ auto Range =
+ MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
+ for (auto It = Range.first; It != Range.second; ++It) {
+ const Instruction &I = *It;
+ if (llvm::is_contained(Ignore, &I))
+ continue;
+ // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp.
+ if (I.mayThrow())
+ return false;
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (!CB->hasFnAttr(Attribute::WillReturn))
+ return false;
+ if (!CB->hasFnAttr(Attribute::NoSync))
+ return false;
+ }
+ if (I.mayReadOrWriteMemory()) {
+ auto MaybeLocI = getLocOrNone(I);
+ if (MayWrite || I.mayWriteToMemory()) {
+ if (!MaybeLoc || !MaybeLocI)
+ return false;
+ if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+#ifndef NDEBUG
+auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
+ if (auto *VecTy = dyn_cast<VectorType>(Ty))
+ return VecTy->getElementType() == getByteTy();
+ return false;
+}
+
+auto HexagonVectorCombine::isSectorTy(Type *Ty) const -> bool {
+ if (!isByteVecTy(Ty))
+ return false;
+ int Size = getSizeOf(Ty);
+ if (HST.isTypeForHVX(Ty))
+ return Size == static_cast<int>(HST.getVectorLength());
+ return Size == 4 || Size == 8;
+}
+#endif
+
+auto HexagonVectorCombine::getElementRange(IRBuilder<> &Builder, Value *Lo,
+ Value *Hi, int Start,
+ int Length) const -> Value * {
+ assert(0 <= Start && Start < Length);
+ SmallVector<int, 128> SMask(Length);
+ std::iota(SMask.begin(), SMask.end(), Start);
+ return Builder.CreateShuffleVector(Lo, Hi, SMask);
+}
+
+// Pass management.
+
+namespace llvm {
+void initializeHexagonVectorCombineLegacyPass(PassRegistry &);
+FunctionPass *createHexagonVectorCombineLegacyPass();
+} // namespace llvm
+
+namespace {
+class HexagonVectorCombineLegacy : public FunctionPass {
+public:
+ static char ID;
+
+ HexagonVectorCombineLegacy() : FunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "Hexagon Vector Combine"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnFunction(Function &F) override {
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
+ HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM);
+ return HVC.run();
+ }
+};
+} // namespace
+
+char HexagonVectorCombineLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
+ "Hexagon Vector Combine", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
+ "Hexagon Vector Combine", false, false)
+
+FunctionPass *llvm::createHexagonVectorCombineLegacyPass() {
+ return new HexagonVectorCombineLegacy();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index 42451e02ba36..310536458de9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -11,110 +11,9 @@
// to identify loop carried dependences. This is scalar replacement for vector
// types.
//
-//-----------------------------------------------------------------------------
-// Motivation: Consider the case where we have the following loop structure.
-//
-// Loop:
-// t0 = a[i];
-// t1 = f(t0);
-// t2 = g(t1);
-// ...
-// t3 = a[i+1];
-// t4 = f(t3);
-// t5 = g(t4);
-// t6 = op(t2, t5)
-// cond_branch <Loop>
-//
-// This can be converted to
-// t00 = a[0];
-// t10 = f(t00);
-// t20 = g(t10);
-// Loop:
-// t2 = t20;
-// t3 = a[i+1];
-// t4 = f(t3);
-// t5 = g(t4);
-// t6 = op(t2, t5)
-// t20 = t5
-// cond_branch <Loop>
-//
-// SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
-// Such a loop comes to this pass in the following form.
-//
-// LoopPreheader:
-// X0 = a[0];
-// Loop:
-// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
-// t1 = f(X2) <-- I1
-// t2 = g(t1)
-// ...
-// X1 = a[i+1]
-// t4 = f(X1) <-- I2
-// t5 = g(t4)
-// t6 = op(t2, t5)
-// cond_branch <Loop>
-//
-// In this pass, we look for PHIs such as X2 whose incoming values come only
-// from the Loop Preheader and over the backedge and additionaly, both these
-// values are the results of the same operation in terms of opcode. We call such
-// a PHI node a dependence chain or DepChain. In this case, the dependence of X2
-// over X1 is carried over only one iteration and so the DepChain is only one
-// PHI node long.
-//
-// Then, we traverse the uses of the PHI (X2) and the uses of the value of the
-// PHI coming over the backedge (X1). We stop at the first pair of such users
-// I1 (of X2) and I2 (of X1) that meet the following conditions.
-// 1. I1 and I2 are the same operation, but with different operands.
-// 2. X2 and X1 are used at the same operand number in the two instructions.
-// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
-// a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
-//
-// We then make the following transformation
-// LoopPreheader:
-// X0 = a[0];
-// Y0 = f(X0);
-// Loop:
-// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
-// Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
-// t1 = f(X2) <-- Will be removed by DCE.
-// t2 = g(Y2)
-// ...
-// X1 = a[i+1]
-// t4 = f(X1)
-// t5 = g(t4)
-// t6 = op(t2, t5)
-// cond_branch <Loop>
-//
-// We proceed until we cannot find any more such instructions I1 and I2.
-//
-// --- DepChains & Loop carried dependences ---
-// Consider a single basic block loop such as
-//
-// LoopPreheader:
-// X0 = ...
-// Y0 = ...
-// Loop:
-// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
-// Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
-// ...
-// X1 = ...
-// ...
-// cond_branch <Loop>
-//
-// Then there is a dependence between X2 and X1 that goes back one iteration,
-// i.e. X1 is used as X2 in the very next iteration. We represent this as a
-// DepChain from X2 to X1 (X2->X1).
-// Similarly, there is a dependence between Y2 and X1 that goes back two
-// iterations. X1 is used as Y2 two iterations after it is computed. This is
-// represented by a DepChain as (Y2->X2->X1).
-//
-// A DepChain has the following properties.
-// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
-// iterations of carried dependence + 1.
-// 2. All instructions in the DepChain except the last are PHIs.
-//
//===----------------------------------------------------------------------===//
+#include "HexagonVectorLoopCarriedReuse.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -161,8 +60,8 @@ static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
namespace llvm {
-void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
-Pass *createHexagonVectorLoopCarriedReusePass();
+void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &);
+Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
} // end namespace llvm
@@ -262,13 +161,13 @@ namespace {
return OS;
}
- class HexagonVectorLoopCarriedReuse : public LoopPass {
+ class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {
public:
static char ID;
- explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
+ explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {
PassRegistry *PR = PassRegistry::getPassRegistry();
- initializeHexagonVectorLoopCarriedReusePass(*PR);
+ initializeHexagonVectorLoopCarriedReuseLegacyPassPass(*PR);
}
StringRef getPassName() const override {
@@ -276,7 +175,6 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
@@ -284,6 +182,13 @@ namespace {
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ };
+
+ class HexagonVectorLoopCarriedReuse {
+ public:
+ HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){};
+
+ bool run();
private:
SetVector<DepChain *> Dependences;
@@ -305,33 +210,49 @@ namespace {
} // end anonymous namespace
-char HexagonVectorLoopCarriedReuse::ID = 0;
+char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
- "Hexagon-specific predictive commoning for HVX vectors", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
+ "Hexagon-specific predictive commoning for HVX vectors",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
-INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
- "Hexagon-specific predictive commoning for HVX vectors", false, false)
+INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
+ "Hexagon-specific predictive commoning for HVX vectors",
+ false, false)
+
+PreservedAnalyses
+HexagonVectorLoopCarriedReusePass::run(Loop &L, LoopAnalysisManager &LAM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ HexagonVectorLoopCarriedReuse Vlcr(&L);
+ if (!Vlcr.run())
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
-bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,
+ LPPassManager &LPM) {
if (skipLoop(L))
return false;
+ HexagonVectorLoopCarriedReuse Vlcr(L);
+ return Vlcr.run();
+}
- if (!L->getLoopPreheader())
+bool HexagonVectorLoopCarriedReuse::run() {
+ if (!CurLoop->getLoopPreheader())
return false;
// Work only on innermost loops.
- if (!L->getSubLoops().empty())
+ if (!CurLoop->getSubLoops().empty())
return false;
// Work only on single basic blocks loops.
- if (L->getNumBlocks() != 1)
+ if (CurLoop->getNumBlocks() != 1)
return false;
- CurLoop = L;
-
return doVLCR();
}
@@ -745,6 +666,6 @@ void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
++i) { dbgs() << *Dependences[i] << "\n"; });
}
-Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
- return new HexagonVectorLoopCarriedReuse();
+Pass *llvm::createHexagonVectorLoopCarriedReuseLegacyPass() {
+ return new HexagonVectorLoopCarriedReuseLegacyPass();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h
new file mode 100644
index 000000000000..f1e0c5804ace
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h
@@ -0,0 +1,139 @@
+//===- HexagonVectorLoopCarriedReuse.h ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass removes the computation of provably redundant expressions that have
+// been computed earlier in a previous iteration. It relies on the use of PHIs
+// to identify loop carried dependences. This is scalar replacement for vector
+// types.
+//
+//-----------------------------------------------------------------------------
+// Motivation: Consider the case where we have the following loop structure.
+//
+// Loop:
+// t0 = a[i];
+// t1 = f(t0);
+// t2 = g(t1);
+// ...
+// t3 = a[i+1];
+// t4 = f(t3);
+// t5 = g(t4);
+// t6 = op(t2, t5)
+// cond_branch <Loop>
+//
+// This can be converted to
+// t00 = a[0];
+// t10 = f(t00);
+// t20 = g(t10);
+// Loop:
+// t2 = t20;
+// t3 = a[i+1];
+// t4 = f(t3);
+// t5 = g(t4);
+// t6 = op(t2, t5)
+// t20 = t5
+// cond_branch <Loop>
+//
+// SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
+// Such a loop comes to this pass in the following form.
+//
+// LoopPreheader:
+// X0 = a[0];
+// Loop:
+// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
+// t1 = f(X2) <-- I1
+// t2 = g(t1)
+// ...
+// X1 = a[i+1]
+// t4 = f(X1) <-- I2
+// t5 = g(t4)
+// t6 = op(t2, t5)
+// cond_branch <Loop>
+//
+// In this pass, we look for PHIs such as X2 whose incoming values come only
+// from the Loop Preheader and over the backedge and additionaly, both these
+// values are the results of the same operation in terms of opcode. We call such
+// a PHI node a dependence chain or DepChain. In this case, the dependence of X2
+// over X1 is carried over only one iteration and so the DepChain is only one
+// PHI node long.
+//
+// Then, we traverse the uses of the PHI (X2) and the uses of the value of the
+// PHI coming over the backedge (X1). We stop at the first pair of such users
+// I1 (of X2) and I2 (of X1) that meet the following conditions.
+// 1. I1 and I2 are the same operation, but with different operands.
+// 2. X2 and X1 are used at the same operand number in the two instructions.
+// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
+// a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
+//
+// We then make the following transformation
+// LoopPreheader:
+// X0 = a[0];
+// Y0 = f(X0);
+// Loop:
+// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
+// Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
+// t1 = f(X2) <-- Will be removed by DCE.
+// t2 = g(Y2)
+// ...
+// X1 = a[i+1]
+// t4 = f(X1)
+// t5 = g(t4)
+// t6 = op(t2, t5)
+// cond_branch <Loop>
+//
+// We proceed until we cannot find any more such instructions I1 and I2.
+//
+// --- DepChains & Loop carried dependences ---
+// Consider a single basic block loop such as
+//
+// LoopPreheader:
+// X0 = ...
+// Y0 = ...
+// Loop:
+// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
+// Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
+// ...
+// X1 = ...
+// ...
+// cond_branch <Loop>
+//
+// Then there is a dependence between X2 and X1 that goes back one iteration,
+// i.e. X1 is used as X2 in the very next iteration. We represent this as a
+// DepChain from X2 to X1 (X2->X1).
+// Similarly, there is a dependence between Y2 and X1 that goes back two
+// iterations. X1 is used as Y2 two iterations after it is computed. This is
+// represented by a DepChain as (Y2->X2->X1).
+//
+// A DepChain has the following properties.
+// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
+// iterations of carried dependence + 1.
+// 2. All instructions in the DepChain except the last are PHIs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H
+
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+class Loop;
+
+/// Hexagon Vector Loop Carried Reuse Pass
+struct HexagonVectorLoopCarriedReusePass
+ : public PassInfoMixin<HexagonVectorLoopCarriedReusePass> {
+ HexagonVectorLoopCarriedReusePass() {}
+
+ /// Run pass over the Loop.
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index e7069819fa57..627c53cadd84 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -74,7 +74,7 @@ public:
void setExtender(MCContext &Context) const {
if (Extender == nullptr)
- const_cast<HexagonAsmBackend *>(this)->Extender = new (Context) MCInst;
+ const_cast<HexagonAsmBackend *>(this)->Extender = Context.createMCInst();
}
MCInst *takeExtender() const {
@@ -736,7 +736,7 @@ public:
auto &Inst = const_cast<MCInst &>(RF.getInst());
while (Size > 0 &&
HexagonMCInstrInfo::bundleSize(Inst) < MaxPacketSize) {
- MCInst *Nop = new (Context) MCInst;
+ MCInst *Nop = Context.createMCInst();
Nop->setOpcode(Hexagon::A2_nop);
Inst.addOperand(MCOperand::createInst(Nop));
Size -= 4;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index cd96a23e1b94..76658378c0cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -34,6 +34,7 @@ public:
static char const *getRegisterName(unsigned RegNo);
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 2b0bbdafa381..e7ade7834a9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCShuffler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -209,7 +210,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
case Hexagon::A2_tfrsi:
Rt = L.getOperand(0);
compoundOpcode = J4_jumpseti;
- CompoundInsn = new (Context) MCInst;
+ CompoundInsn = Context.createMCInst();
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rt);
@@ -222,7 +223,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
Rs = L.getOperand(1);
compoundOpcode = J4_jumpsetr;
- CompoundInsn = new (Context) MCInst;
+ CompoundInsn = Context.createMCInst();
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rt);
CompoundInsn->addOperand(Rs);
@@ -236,7 +237,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
Rt = L.getOperand(2);
compoundOpcode = cmpeqBitOpcode[getCompoundOp(R)];
- CompoundInsn = new (Context) MCInst;
+ CompoundInsn = Context.createMCInst();
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
CompoundInsn->addOperand(Rt);
@@ -249,7 +250,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
Rt = L.getOperand(2);
compoundOpcode = cmpgtBitOpcode[getCompoundOp(R)];
- CompoundInsn = new (Context) MCInst;
+ CompoundInsn = Context.createMCInst();
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
CompoundInsn->addOperand(Rt);
@@ -262,7 +263,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
Rt = L.getOperand(2);
compoundOpcode = cmpgtuBitOpcode[getCompoundOp(R)];
- CompoundInsn = new (Context) MCInst;
+ CompoundInsn = Context.createMCInst();
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
CompoundInsn->addOperand(Rt);
@@ -280,7 +281,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)];
Rs = L.getOperand(1);
- CompoundInsn = new (Context) MCInst;
+ CompoundInsn = Context.createMCInst();
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
CompoundInsn->addOperand(L.getOperand(2));
@@ -298,7 +299,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)];
Rs = L.getOperand(1);
- CompoundInsn = new (Context) MCInst;
+ CompoundInsn = Context.createMCInst();
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
CompoundInsn->addOperand(L.getOperand(2));
@@ -309,7 +310,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
LLVM_DEBUG(dbgs() << "CX: C2_cmpgtui\n");
Rs = L.getOperand(1);
compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)];
- CompoundInsn = new (Context) MCInst;
+ CompoundInsn = Context.createMCInst();
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
CompoundInsn->addOperand(L.getOperand(2));
@@ -320,7 +321,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
LLVM_DEBUG(dbgs() << "CX: S2_tstbit_i\n");
Rs = L.getOperand(1);
compoundOpcode = tstBitOpcode[getCompoundOp(R)];
- CompoundInsn = new (Context) MCInst;
+ CompoundInsn = Context.createMCInst();
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
CompoundInsn->addOperand(R.getOperand(1));
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index f9f342a07f6d..fa12fe1da448 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -110,7 +110,7 @@ HexagonMCInstrInfo::bundleInstructions(MCInstrInfo const &MCII,
iterator_range<MCInst::const_iterator>
HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) {
assert(isBundle(MCI));
- return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end());
+ return drop_begin(MCI, bundleInstructionsOffset);
}
size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 7514d0e67744..5e4138ae6e09 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -468,7 +468,8 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
StringRef CPUName = Features.first;
StringRef ArchFS = Features.second;
- MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS);
+ MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(
+ TT, CPUName, /*TuneCPU*/ CPUName, ArchFS);
if (X != nullptr && (CPUName == "hexagonv67t"))
addArchSubtarget(X, ArchFS);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 2788b86181e2..8a44ba32606e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -326,7 +326,7 @@ bool HexagonShuffler::ValidResourceUsage(HexagonPacketSummary const &Summary) {
}
// Verify the CVI slot subscriptions.
- std::stable_sort(begin(), end(), HexagonInstr::lessCVI);
+ llvm::stable_sort(*this, HexagonInstr::lessCVI);
// create vector of hvx instructions to check
HVXInstsT hvxInsts;
hvxInsts.clear();
@@ -609,8 +609,7 @@ llvm::Optional<HexagonShuffler::HexagonPacket>
HexagonShuffler::tryAuction(HexagonPacketSummary const &Summary) const {
HexagonPacket PacketResult = Packet;
HexagonUnitAuction AuctionCore(Summary.ReservedSlotMask);
- std::stable_sort(PacketResult.begin(), PacketResult.end(),
- HexagonInstr::lessCore);
+ llvm::stable_sort(PacketResult, HexagonInstr::lessCore);
const bool ValidSlots =
llvm::all_of(insts(PacketResult), [&AuctionCore](HexagonInstr const &I) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
index 5a98debd3c00..894bdf38fe17 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -195,8 +195,7 @@ bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
// If it's a code node, add all ref nodes from it.
uint16_t Kind = BA.Addr->getKind();
if (Kind == NodeAttrs::Stmt || Kind == NodeAttrs::Phi) {
- for (auto N : NodeAddr<CodeNode*>(BA).Addr->members(DFG))
- DRNs.push_back(N);
+ append_range(DRNs, NodeAddr<CodeNode*>(BA).Addr->members(DFG));
DINs.push_back(DFG.addr<InstrNode*>(I));
} else {
llvm_unreachable("Unexpected code node");
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 32ccf7172594..b96e178109d0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -1020,7 +1020,7 @@ SDValue LanaiTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Size = Op.getOperand(1);
SDLoc DL(Op);
- unsigned SPReg = getStackPointerRegisterToSaveRestore();
+ Register SPReg = getStackPointerRegisterToSaveRestore();
// Get a reference to the stack pointer.
SDValue StackPointer = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i32);
@@ -1500,8 +1500,7 @@ void LanaiTargetLowering::computeKnownBitsForTargetNode(
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
- Known.Zero &= Known2.Zero;
- Known.One &= Known2.One;
+ Known = KnownBits::commonBits(Known, Known2);
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
index ebf91e08fbc8..d9d7847a0c5a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
@@ -27,7 +27,7 @@ void LanaiSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUName.empty())
CPUName = "generic";
- ParseSubtargetFeatures(CPUName, FS);
+ ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);
}
LanaiSubtarget &LanaiSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -41,6 +41,6 @@ LanaiSubtarget::LanaiSubtarget(const Triple &TargetTriple, StringRef Cpu,
const TargetOptions & /*Options*/,
CodeModel::Model /*CodeModel*/,
CodeGenOpt::Level /*OptLevel*/)
- : LanaiGenSubtargetInfo(TargetTriple, Cpu, FeatureString),
+ : LanaiGenSubtargetInfo(TargetTriple, Cpu, /*TuneCPU*/ Cpu, FeatureString),
FrameLowering(initializeSubtargetDependencies(Cpu, FeatureString)),
InstrInfo(), TLInfo(TM, *this), TSInfo() {}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h
index 116c83a4df91..7955bfe0d8b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h
@@ -17,7 +17,6 @@
#include "LanaiISelLowering.h"
#include "LanaiInstrInfo.h"
#include "LanaiSelectionDAGInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -38,7 +37,7 @@ public:
// ParseSubtargetFeatures - Parses features string setting specified
// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
LanaiSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index 69387119f1f4..a31f59214ec7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -48,9 +48,7 @@ static std::string computeDataLayout() {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::PIC_;
- return *RM;
+ return RM.getValueOr(Reloc::PIC_);
}
LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h
index fb2bc0644fe8..00922f44f33a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h
@@ -13,12 +13,10 @@
#ifndef LLVM_LIB_TARGET_LANAI_LANAITARGETMACHINE_H
#define LLVM_LIB_TARGET_LANAI_LANAITARGETMACHINE_H
-#include "LanaiFrameLowering.h"
#include "LanaiISelLowering.h"
#include "LanaiInstrInfo.h"
#include "LanaiSelectionDAGInfo.h"
#include "LanaiSubtarget.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
index 7366d5059c9f..263f838e44a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
@@ -67,7 +67,8 @@ public:
}
int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr) {
return getIntImmCost(Imm, Ty, CostKind);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index 0fb27a926003..a17afe5e62f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -69,11 +69,6 @@ public:
return Lanai::NumTargetFixupKinds;
}
- bool mayNeedRelaxation(const MCInst & /*Inst*/,
- const MCSubtargetInfo &STI) const override {
- return false;
- }
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
index ce6df2969d73..f0d287c858d8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
@@ -43,6 +43,7 @@ public:
void printMemImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index 2ff893273c92..e850b98de806 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -56,7 +56,7 @@ createLanaiMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
if (CPUName.empty())
CPUName = "generic";
- return createLanaiMCSubtargetInfoImpl(TT, CPUName, FS);
+ return createLanaiMCSubtargetInfoImpl(TT, CPUName, /*TuneCPU*/ CPUName, FS);
}
static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 9529b5e802d5..f32418c5be55 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -12,7 +12,6 @@
#include "TargetInfo/MSP430TargetInfo.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index 958212dc77c9..071e1484196b 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -90,11 +90,6 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- return false;
- }
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
index 6a6b07f2eba0..08c466377ee3 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
@@ -26,6 +26,7 @@ namespace llvm {
const MCSubtargetInfo &STI, raw_ostream &O) override;
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &O);
void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index f207d24ce04b..c352ea563454 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -44,7 +44,7 @@ static MCRegisterInfo *createMSP430MCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *
createMSP430MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- return createMSP430MCSubtargetInfoImpl(TT, CPU, FS);
+ return createMSP430MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 821339f50355..9c6d44bf92de 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
@@ -1209,9 +1208,8 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SR;
} else {
SDValue Zero = DAG.getConstant(0, dl, VT);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = {One, Zero, TargetCC, Flag};
- return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, Ops);
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, Op.getValueType(), Ops);
}
}
@@ -1227,10 +1225,9 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
SDValue TargetCC;
SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = {TrueV, FalseV, TargetCC, Flag};
- return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, Ops);
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, Op.getValueType(), Ops);
}
SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
@@ -1392,14 +1389,15 @@ bool MSP430TargetLowering::isTruncateFree(Type *Ty1,
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
- return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits());
+ return (Ty1->getPrimitiveSizeInBits().getFixedSize() >
+ Ty2->getPrimitiveSizeInBits().getFixedSize());
}
bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isInteger() || !VT2.isInteger())
return false;
- return (VT1.getSizeInBits() > VT2.getSizeInBits());
+ return (VT1.getFixedSizeInBits() > VT2.getFixedSizeInBits());
}
bool MSP430TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
index 1f3c1d34f76f..5a117404d772 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -47,7 +47,7 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
if (CPUName.empty())
CPUName = "msp430";
- ParseSubtargetFeatures(CPUName, FS);
+ ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);
if (HWMultModeOption != NoHWMult)
HWMultMode = HWMultModeOption;
@@ -57,5 +57,5 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : MSP430GenSubtargetInfo(TT, CPU, FS), FrameLowering(),
+ : MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h
index 2348d984d7e2..079af2c75ec1 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h
@@ -54,7 +54,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
bool hasHWMult16() const { return HWMultMode == HWMult16; }
bool hasHWMult32() const { return HWMultMode == HWMult32; }
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h
index 96fbc3ba0377..ef757dc7cb78 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -15,10 +15,10 @@
#define LLVM_LIB_TARGET_MSP430_MSP430TARGETMACHINE_H
#include "MSP430Subtarget.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+class StringRef;
/// MSP430TargetMachine
///
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 9dbbdeb34dba..e4d61f8c210e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -352,8 +352,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandSaaAddr(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
- bool reportParseError(Twine ErrorMsg);
- bool reportParseError(SMLoc Loc, Twine ErrorMsg);
+ bool reportParseError(const Twine &ErrorMsg);
+ bool reportParseError(SMLoc Loc, const Twine &ErrorMsg);
bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
@@ -6982,12 +6982,12 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// FIXME: Given that these have the same name, these should both be
// consistent on affecting the Parser.
-bool MipsAsmParser::reportParseError(Twine ErrorMsg) {
+bool MipsAsmParser::reportParseError(const Twine &ErrorMsg) {
SMLoc Loc = getLexer().getLoc();
return Error(Loc, ErrorMsg);
}
-bool MipsAsmParser::reportParseError(SMLoc Loc, Twine ErrorMsg) {
+bool MipsAsmParser::reportParseError(SMLoc Loc, const Twine &ErrorMsg) {
return Error(Loc, ErrorMsg);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 37e970f2f15b..3315a8ba18d6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -9,7 +9,6 @@
#include "MipsABIInfo.h"
#include "MipsRegisterInfo.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 1126b871cb11..16c7befb2670 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -53,15 +53,6 @@ public:
/// @name Target Relaxation Interfaces
/// @{
- /// MayNeedRelaxation - Check whether the given instruction may need
- /// relaxation.
- ///
- /// \param Inst - The instruction to test.
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- return false;
- }
-
/// fixupNeedsRelaxation - Target specific predicate for whether a given
/// fixup requires the associated instruction to be relaxed.
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
index 3f534a2f1843..68b13bf1fcc3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
@@ -79,6 +79,7 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index de582bd60cbf..454f79926dd0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -77,7 +77,7 @@ static MCRegisterInfo *createMipsMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *createMipsMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
CPU = MIPS_MC::selectMipsCPU(TT, CPU);
- return createMipsMCSubtargetInfoImpl(TT, CPU, FS);
+ return createMipsMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td
index 7fe750249c58..792960332bcc 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td
@@ -191,7 +191,7 @@ def FeatureUseTCCInDIV : SubtargetFeature<
"UseTCCInDIV", "false",
"Force the assembler to use trapping">;
-def FeatureMadd4
+def FeatureNoMadd4
: SubtargetFeature<"nomadd4", "DisableMadd4", "true",
"Disable 4-operand madd.fmt and related instructions">;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index cc073fbf5231..b460bc71b11f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1233,7 +1233,7 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
.addImm(0x34));
}
- recordSled(CurSled, MI, Kind);
+ recordSled(CurSled, MI, Kind, 2);
}
void MipsAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
index cffd99affac1..377aa4825b43 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -87,9 +87,10 @@ bool MipsCallLowering::MipsHandler::handle(
}
namespace {
-class IncomingValueHandler : public MipsCallLowering::MipsHandler {
+class MipsIncomingValueHandler : public MipsCallLowering::MipsHandler {
public:
- IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
+ MipsIncomingValueHandler(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI)
: MipsHandler(MIRBuilder, MRI) {}
private:
@@ -117,11 +118,11 @@ private:
}
};
-class CallReturnHandler : public IncomingValueHandler {
+class CallReturnHandler : public MipsIncomingValueHandler {
public:
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder &MIB)
- : IncomingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+ : MipsIncomingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
private:
void markPhysRegUsed(unsigned PhysReg) override {
@@ -133,9 +134,9 @@ private:
} // end anonymous namespace
-void IncomingValueHandler::assignValueToReg(Register ValVReg,
- const CCValAssign &VA,
- const EVT &VT) {
+void MipsIncomingValueHandler::assignValueToReg(Register ValVReg,
+ const CCValAssign &VA,
+ const EVT &VT) {
Register PhysReg = VA.getLocReg();
if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
const MipsSubtarget &STI =
@@ -167,8 +168,8 @@ void IncomingValueHandler::assignValueToReg(Register ValVReg,
}
}
-Register IncomingValueHandler::getStackAddress(const CCValAssign &VA,
- MachineMemOperand *&MMO) {
+Register MipsIncomingValueHandler::getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) {
MachineFunction &MF = MIRBuilder.getMF();
unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
unsigned Offset = VA.getLocMemOffset();
@@ -186,8 +187,8 @@ Register IncomingValueHandler::getStackAddress(const CCValAssign &VA,
return MIRBuilder.buildFrameIndex(LLT::pointer(0, 32), FI).getReg(0);
}
-void IncomingValueHandler::assignValueToAddress(Register ValVReg,
- const CCValAssign &VA) {
+void MipsIncomingValueHandler::assignValueToAddress(Register ValVReg,
+ const CCValAssign &VA) {
if (VA.getLocInfo() == CCValAssign::SExt ||
VA.getLocInfo() == CCValAssign::ZExt ||
VA.getLocInfo() == CCValAssign::AExt) {
@@ -197,10 +198,10 @@ void IncomingValueHandler::assignValueToAddress(Register ValVReg,
buildLoad(ValVReg, VA);
}
-bool IncomingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
- ArrayRef<CCValAssign> ArgLocs,
- unsigned ArgLocsStartIndex,
- Register ArgsReg, const EVT &VT) {
+bool MipsIncomingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex,
+ Register ArgsReg, const EVT &VT) {
if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT))
return false;
setLeastSignificantFirst(VRegs);
@@ -209,10 +210,10 @@ bool IncomingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
}
namespace {
-class OutgoingValueHandler : public MipsCallLowering::MipsHandler {
+class MipsOutgoingValueHandler : public MipsCallLowering::MipsHandler {
public:
- OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder &MIB)
+ MipsOutgoingValueHandler(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, MachineInstrBuilder &MIB)
: MipsHandler(MIRBuilder, MRI), MIB(MIB) {}
private:
@@ -234,9 +235,9 @@ private:
};
} // end anonymous namespace
-void OutgoingValueHandler::assignValueToReg(Register ValVReg,
- const CCValAssign &VA,
- const EVT &VT) {
+void MipsOutgoingValueHandler::assignValueToReg(Register ValVReg,
+ const CCValAssign &VA,
+ const EVT &VT) {
Register PhysReg = VA.getLocReg();
if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
const MipsSubtarget &STI =
@@ -254,8 +255,8 @@ void OutgoingValueHandler::assignValueToReg(Register ValVReg,
}
}
-Register OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
- MachineMemOperand *&MMO) {
+Register MipsOutgoingValueHandler::getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) {
MachineFunction &MF = MIRBuilder.getMF();
const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
@@ -278,16 +279,16 @@ Register OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
return AddrReg.getReg(0);
}
-void OutgoingValueHandler::assignValueToAddress(Register ValVReg,
- const CCValAssign &VA) {
+void MipsOutgoingValueHandler::assignValueToAddress(Register ValVReg,
+ const CCValAssign &VA) {
MachineMemOperand *MMO;
Register Addr = getStackAddress(VA, MMO);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
-Register OutgoingValueHandler::extendRegister(Register ValReg,
- const CCValAssign &VA) {
+Register MipsOutgoingValueHandler::extendRegister(Register ValReg,
+ const CCValAssign &VA) {
LLT LocTy{VA.getLocVT()};
switch (VA.getLocInfo()) {
case CCValAssign::SExt: {
@@ -308,10 +309,10 @@ Register OutgoingValueHandler::extendRegister(Register ValReg,
llvm_unreachable("unable to extend register");
}
-bool OutgoingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
- ArrayRef<CCValAssign> ArgLocs,
- unsigned ArgLocsStartIndex,
- Register ArgsReg, const EVT &VT) {
+bool MipsOutgoingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex,
+ Register ArgsReg, const EVT &VT) {
MIRBuilder.buildUnmerge(VRegs, ArgsReg);
setLeastSignificantFirst(VRegs);
if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT))
@@ -346,7 +347,7 @@ static CCValAssign::LocInfo determineLocInfo(const MVT RegisterVT, const EVT VT,
const ISD::ArgFlagsTy &Flags) {
// > does not mean loss of information as type RegisterVT can't hold type VT,
// it means that type VT is split into multiple registers of type RegisterVT
- if (VT.getSizeInBits() >= RegisterVT.getSizeInBits())
+ if (VT.getFixedSizeInBits() >= RegisterVT.getFixedSizeInBits())
return CCValAssign::LocInfo::Full;
if (Flags.isSExt())
return CCValAssign::LocInfo::SExt;
@@ -373,8 +374,8 @@ static void setLocInfo(SmallVectorImpl<CCValAssign> &ArgLocs,
}
bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val,
- ArrayRef<Register> VRegs) const {
+ const Value *Val, ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const {
MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA);
@@ -403,7 +404,7 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
CCInfo.AnalyzeReturn(Outs, TLI.CCAssignFnForReturn());
setLocInfo(ArgLocs, Outs);
- OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
+ MipsOutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
if (!RetHandler.handle(ArgLocs, RetInfos)) {
return false;
}
@@ -412,9 +413,10 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
return true;
}
-bool MipsCallLowering::lowerFormalArguments(
- MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
+bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
// Quick exit if there aren't any args.
if (F.arg_empty())
@@ -455,7 +457,7 @@ bool MipsCallLowering::lowerFormalArguments(
CCInfo.AnalyzeFormalArguments(Ins, TLI.CCAssignFnForCall());
setLocInfo(ArgLocs, Ins);
- IncomingValueHandler Handler(MIRBuilder, MF.getRegInfo());
+ MipsIncomingValueHandler Handler(MIRBuilder, MF.getRegInfo());
if (!Handler.handle(ArgLocs, ArgInfos))
return false;
@@ -579,7 +581,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CCInfo.AnalyzeCallOperands(Outs, TLI.CCAssignFnForCall(), FuncOrigArgs, Call);
setLocInfo(ArgLocs, Outs);
- OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), MIB);
+ MipsOutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), MIB);
if (!RetHandler.handle(ArgLocs, ArgInfos)) {
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.h
index a284cf5e26cf..1c1c2080a76a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.h
@@ -18,6 +18,7 @@
namespace llvm {
+class MachineMemOperand;
class MipsTargetLowering;
class MipsCallLowering : public CallLowering {
@@ -63,10 +64,12 @@ public:
MipsCallLowering(const MipsTargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs) const override;
+ ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index faf7160e63e2..8e619549f01c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -552,7 +552,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
- unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ unsigned Size = CPs[i].getSizeInBytes(TD);
assert(Size >= 4 && "Too small constant pool entry");
Align Alignment = CPs[i].getAlign();
// Verify that all constant pool entries are a multiple of their alignment.
@@ -593,12 +593,7 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
return false;
MachineBasicBlock *NextBB = &*std::next(MBBI);
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I)
- if (*I == NextBB)
- return true;
-
- return false;
+ return llvm::is_contained(MBB->successors(), NextBB);
}
/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 155d19ba6959..797d81204305 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -182,7 +182,7 @@ namespace {
/// memory instruction can be moved to a delay slot.
class MemDefsUses : public InspectMemInstr {
public:
- MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI);
+ explicit MemDefsUses(const MachineFrameInfo *MFI);
private:
using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
@@ -200,7 +200,6 @@ namespace {
const MachineFrameInfo *MFI;
SmallPtrSet<ValueType, 4> Uses, Defs;
- const DataLayout &DL;
/// Flags indicating whether loads or stores with no underlying objects have
/// been seen.
@@ -492,8 +491,8 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
return true;
}
-MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_)
- : InspectMemInstr(false), MFI(MFI_), DL(DL) {}
+MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
+ : InspectMemInstr(false), MFI(MFI_) {}
bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool HasHazard = false;
@@ -542,7 +541,7 @@ getUnderlyingObjects(const MachineInstr &MI,
if (const Value *V = MMO.getValue()) {
SmallVector<const Value *, 4> Objs;
- GetUnderlyingObjects(V, Objs, DL);
+ ::getUnderlyingObjects(V, Objs);
for (const Value *UValue : Objs) {
if (!isIdentifiedObject(V))
@@ -566,7 +565,11 @@ Iter MipsDelaySlotFiller::replaceWithCompactBranch(MachineBasicBlock &MBB,
unsigned NewOpcode = TII->getEquivalentCompactForm(Branch);
Branch = TII->genInstrWithNewOpc(NewOpcode, Branch);
- std::next(Branch)->eraseFromParent();
+ auto *ToErase = cast<MachineInstr>(&*std::next(Branch));
+ // Update call site info for the Branch.
+ if (ToErase->shouldUpdateCallSiteInfo())
+ ToErase->getMF()->moveCallSiteInfo(ToErase, cast<MachineInstr>(&*Branch));
+ ToErase->eraseFromParent();
return Branch;
}
@@ -775,7 +778,7 @@ bool MipsDelaySlotFiller::searchBackward(MachineBasicBlock &MBB,
auto *Fn = MBB.getParent();
RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo());
- MemDefsUses MemDU(Fn->getDataLayout(), &Fn->getFrameInfo());
+ MemDefsUses MemDU(&Fn->getFrameInfo());
ReverseIter Filler;
RegDU.init(Slot);
@@ -851,7 +854,7 @@ bool MipsDelaySlotFiller::searchSuccBBs(MachineBasicBlock &MBB,
IM.reset(new LoadFromStackOrConst());
} else {
const MachineFrameInfo &MFI = Fn->getFrameInfo();
- IM.reset(new MemDefsUses(Fn->getDataLayout(), &MFI));
+ IM.reset(new MemDefsUses(&MFI));
}
if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
index b1abf4a33717..f72dc1da4131 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -733,10 +733,10 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
assert(I->getNumOperands() == 5 &&
"Atomics min|max|umin|umax use an additional register");
- Register Scratch2 = I->getOperand(4).getReg();
+ MCRegister Scratch2 = I->getOperand(4).getReg().asMCReg();
// On Mips64 result of slt is GPR32.
- Register Scratch2_32 =
+ MCRegister Scratch2_32 =
(Size == 8) ? STI->getRegisterInfo()->getSubReg(Scratch2, Mips::sub_32)
: Scratch2;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 2da35020006e..8b599bca3915 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -134,7 +134,7 @@ unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
// Break down vector types to either 2 i64s or 4 i32s.
RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT);
IntermediateVT = RegisterVT;
- NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
+ NumIntermediates = VT.getFixedSizeInBits() < RegisterVT.getFixedSizeInBits()
? VT.getVectorNumElements()
: VT.getSizeInBits() / RegisterVT.getSizeInBits();
@@ -1198,17 +1198,6 @@ bool MipsTargetLowering::shouldFoldConstantShiftPairToMask(
}
void
-MipsTargetLowering::LowerOperationWrapper(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
- SDValue Res = LowerOperation(SDValue(N, 0), DAG);
-
- if (Res)
- for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
- Results.push_back(Res.getValue(I));
-}
-
-void
MipsTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -3025,8 +3014,8 @@ SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
int FI = MFI.CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(),
- /* Alignment = */ 0, MachineMemOperand::MOVolatile);
+ return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), MaybeAlign(),
+ MachineMemOperand::MOVolatile);
}
void MipsTargetLowering::
@@ -4404,7 +4393,7 @@ void MipsTargetLowering::passByValArg(
SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
DAG.getConstant(OffsetInBytes, DL, PtrTy));
SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
- MachinePointerInfo(), Alignment.value());
+ MachinePointerInfo(), Alignment);
MemOpChains.push_back(LoadVal.getValue(1));
unsigned ArgReg = ArgRegs[FirstReg + I];
RegsToPass.push_back(std::make_pair(ArgReg, LoadVal));
@@ -4431,7 +4420,7 @@ void MipsTargetLowering::passByValArg(
PtrTy));
SDValue LoadVal = DAG.getExtLoad(
ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(),
- MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment.value());
+ MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment);
MemOpChains.push_back(LoadVal.getValue(1));
// Shift the loaded value.
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
index 16b4d51d3ca6..3820c42ba8aa 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -40,8 +40,6 @@
namespace llvm {
class Argument;
-class CCState;
-class CCValAssign;
class FastISel;
class FunctionLoweringInfo;
class MachineBasicBlock;
@@ -316,10 +314,6 @@ class TargetRegisterClass;
return ISD::SIGN_EXTEND;
}
- void LowerOperationWrapper(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const override;
-
/// LowerOperation - Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -365,14 +359,6 @@ class TargetRegisterClass;
return ABI.IsN64() ? Mips::A1_64 : Mips::A1;
}
- /// Returns true if a cast between SrcAS and DestAS is a noop.
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
- // Mips doesn't have any special address spaces so we just reserve
- // the first 256 for software use (e.g. OpenCL) and treat casts
- // between them as noops.
- return SrcAS < 256 && DestAS < 256;
- }
-
bool isJumpTableRelative() const override {
return getTargetMachine().isPositionIndependent();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td
index 5696df96e798..14590ddacfcb 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -452,6 +452,12 @@ let AdditionalPredicates = [NotInMicroMips] in {
let DecoderNamespace = "MipsFP64" in {
let AdditionalPredicates = [NotInMicroMips] in {
+ def FADD_PS64 : ADDS_FT<"add.ps", FGR64Opnd, II_ADD_PS, 0>,
+ ADDS_FM<0x0, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def FMUL_PS64 : ADDS_FT<"mul.ps", FGR64Opnd, II_MUL_PS, 0>,
+ ADDS_FM<0x2, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
def PLL_PS64 : ADDS_FT<"pll.ps", FGR64Opnd, II_CVT, 0>,
ADDS_FM<0x2C, 22>,
ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
@@ -464,6 +470,9 @@ let DecoderNamespace = "MipsFP64" in {
def PUU_PS64 : ADDS_FT<"puu.ps", FGR64Opnd, II_CVT, 0>,
ADDS_FM<0x2F, 22>,
ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def FSUB_PS64 : ADDS_FT<"sub.ps", FGR64Opnd, II_SUB_PS, 0>,
+ ADDS_FM<0x1, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
def CVT_S_PU64 : ABSS_FT<"cvt.s.pu", FGR32Opnd, FGR64Opnd, II_CVT>,
ABSS_FM<0x20, 22>,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 0c6080258a3a..94828a976695 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -894,4 +894,4 @@ Optional<RegImmPair> MipsInstrInfo::isAddImmediate(const MachineInstr &MI,
}
}
return None;
-} \ No newline at end of file
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
index a3b928870f3f..089fed9ec0bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -242,7 +242,7 @@ def HasEVA : Predicate<"Subtarget->hasEVA()">,
def HasMSA : Predicate<"Subtarget->hasMSA()">,
AssemblerPredicate<(all_of FeatureMSA)>;
def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">,
- AssemblerPredicate<(all_of (not FeatureMadd4))>;
+ AssemblerPredicate<(all_of (not FeatureNoMadd4))>;
def HasMT : Predicate<"Subtarget->hasMT()">,
AssemblerPredicate<(all_of FeatureMT)>;
def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index b489c8137769..2692c08b93de 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -322,6 +322,8 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
+
computeTables();
verify(*ST.getInstrInfo());
}
@@ -500,7 +502,6 @@ static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
const MipsSubtarget &ST =
static_cast<const MipsSubtarget &>(MI.getMF()->getSubtarget());
const MipsInstrInfo &TII = *ST.getInstrInfo();
@@ -508,14 +509,6 @@ bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
const RegisterBankInfo &RBI = *ST.getRegBankInfo();
switch (MI.getIntrinsicID()) {
- case Intrinsic::memcpy:
- case Intrinsic::memset:
- case Intrinsic::memmove:
- if (createMemLibcall(MIRBuilder, MRI, MI) ==
- LegalizerHelper::UnableToLegalize)
- return false;
- MI.eraseFromParent();
- return true;
case Intrinsic::trap: {
MachineInstr *Trap = MIRBuilder.buildInstr(Mips::TRAP);
MI.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 6325e513f9f8..3101820d476e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -716,10 +716,10 @@ void MipsRegisterBankInfo::setRegBank(MachineInstr &MI,
static void
combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner,
- MachineInstr &MI, GISelObserverWrapper &Observer) {
+ MachineInstr &MI, GISelChangeObserver &Observer) {
SmallVector<Register, 4> UpdatedDefs;
SmallVector<MachineInstr *, 2> DeadInstrs;
- ArtCombiner.tryCombineMerges(MI, DeadInstrs, UpdatedDefs, Observer);
+ ArtCombiner.tryCombineUnmergeValues(MI, DeadInstrs, UpdatedDefs, Observer);
for (MachineInstr *DeadMI : DeadInstrs)
DeadMI->eraseFromParent();
}
@@ -728,14 +728,13 @@ void MipsRegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
InstListTy NewInstrs;
- MachineIRBuilder B(MI);
MachineFunction *MF = MI.getMF();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
const LegalizerInfo &LegInfo = *MF->getSubtarget().getLegalizerInfo();
InstManager NewInstrObserver(NewInstrs);
- GISelObserverWrapper WrapperObserver(&NewInstrObserver);
- LegalizerHelper Helper(*MF, WrapperObserver, B);
+ MachineIRBuilder B(MI, NewInstrObserver);
+ LegalizerHelper Helper(*MF, NewInstrObserver, B);
LegalizationArtifactCombiner ArtCombiner(B, MF->getRegInfo(), LegInfo);
switch (MI.getOpcode()) {
@@ -752,7 +751,7 @@ void MipsRegisterBankInfo::applyMappingImpl(
// not be considered for regbank selection. RegBankSelect for mips
// visits/makes corresponding G_MERGE first. Combine them here.
if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES)
- combineAwayG_UNMERGE_VALUES(ArtCombiner, *NewMI, WrapperObserver);
+ combineAwayG_UNMERGE_VALUES(ArtCombiner, *NewMI, NewInstrObserver);
// This G_MERGE will be combined away when its corresponding G_UNMERGE
// gets regBankSelected.
else if (NewMI->getOpcode() == TargetOpcode::G_MERGE_VALUES)
@@ -764,7 +763,7 @@ void MipsRegisterBankInfo::applyMappingImpl(
return;
}
case TargetOpcode::G_UNMERGE_VALUES:
- combineAwayG_UNMERGE_VALUES(ArtCombiner, MI, WrapperObserver);
+ combineAwayG_UNMERGE_VALUES(ArtCombiner, MI, NewInstrObserver);
return;
default:
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
index 55eeaf096b14..df51606e1e8a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
@@ -150,7 +150,7 @@ private:
class TypeInfoForMF {
/// MachineFunction name is used to recognise when MF changes.
- std::string MFName = "";
+ std::string MFName;
/// <key, value> : value is vector of all MachineInstrs that are waiting for
/// key to figure out type of some of its ambiguous operands.
DenseMap<const MachineInstr *, SmallVector<const MachineInstr *, 2>>
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index a657bb44ac78..f31ba06a1e7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -774,9 +774,9 @@ void MipsSEFrameLowering::emitInterruptEpilogueStub(
.addImm(0);
}
-int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
+StackOffset
+MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
MipsABIInfo ABI = STI.getABI();
@@ -785,8 +785,9 @@ int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
else
FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr();
- return MFI.getObjectOffset(FI) + MFI.getStackSize() -
- getOffsetOfLocalArea() + MFI.getOffsetAdjustment();
+ return StackOffset::getFixed(MFI.getObjectOffset(FI) + MFI.getStackSize() -
+ getOffsetOfLocalArea() +
+ MFI.getOffsetAdjustment());
}
bool MipsSEFrameLowering::spillCalleeSavedRegisters(
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h
index c818a65f5b14..bed2776c28da 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_MIPS_MIPSSEFRAMELOWERING_H
#include "MipsFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
#include <vector>
namespace llvm {
@@ -27,8 +28,8 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index bdf29c53cbd5..4a448a5f7c68 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -2307,7 +2307,7 @@ static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
- /* Alignment = */ 16);
+ Align(16));
}
SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
@@ -2382,7 +2382,7 @@ static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
- /* Alignment = */ 16);
+ Align(16));
}
SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td
index 568c85af655d..3a5b3fe3b34b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td
@@ -26,6 +26,7 @@ def II_ADDIUPC : InstrItinClass;
def II_ADD : InstrItinClass;
def II_ADDU : InstrItinClass;
def II_ADD_D : InstrItinClass;
+def II_ADD_PS : InstrItinClass;
def II_ADD_S : InstrItinClass;
def II_ADDR_PS : InstrItinClass;
def II_ALIGN : InstrItinClass;
@@ -279,6 +280,7 @@ def II_MUL : InstrItinClass;
def II_MUH : InstrItinClass;
def II_MUHU : InstrItinClass;
def II_MULU : InstrItinClass;
+def II_MUL_PS : InstrItinClass;
def II_MULR_PS : InstrItinClass;
def II_MULT : InstrItinClass;
def II_MULTU : InstrItinClass;
@@ -341,6 +343,7 @@ def II_SRLV : InstrItinClass;
def II_SUB : InstrItinClass;
def II_SUBU : InstrItinClass;
def II_SUB_D : InstrItinClass;
+def II_SUB_PS : InstrItinClass;
def II_SUB_S : InstrItinClass;
def II_SUXC1 : InstrItinClass;
def II_SW : InstrItinClass;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index 3888ca4e82f5..f076f2f9cf10 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -829,10 +829,11 @@ def : InstRW<[GenericWriteFPUL], (instrs ADDR_PS64,
CVT_L_S, CVT_S_D32, CVT_S_D64, CVT_S_L,
CVT_S_W, CVT_W_D32, CVT_W_D64, CVT_W_S,
CVT_PS_S64, CVT_S_PL64, CVT_S_PU64,
- CVT_PS_PW64, CVT_PW_PS64,
+ CVT_PS_PW64, CVT_PW_PS64, FADD_PS64,
FLOOR_L_D64, FLOOR_L_S, FLOOR_W_D32,
FLOOR_W_D64, FLOOR_W_S, FMUL_D32, FMUL_D64,
- MADD_D32, MADD_D64, MSUB_D32, MSUB_D64, MULR_PS64,
+ FMUL_PS64, FSUB_PS64, MADD_D32, MADD_D64,
+ MSUB_D32, MSUB_D64, MULR_PS64,
NMADD_D32, NMADD_D64, NMSUB_D32, NMSUB_D64,
PLL_PS64, PLU_PS64, PUL_PS64, PUU_PS64,
ROUND_L_D64, ROUND_L_S, ROUND_W_D32,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
index 3d159d412489..466b5c6af696 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
@@ -449,8 +449,8 @@ def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(NLOC|NLZC)_[BHWD]$")>;
// cvt.ps.[sw], cvt.s.(pl|pu), c.<cc>.[ds], c.<cc>.ps, mul.[ds], mul.ps,
// pl[lu].ps, sub.[ds], sub.ps, trunc.w.[ds], trunc.w.ps
def : InstRW<[P5600WriteFPUL],
- (instrs FADD_D32, FADD_D64, FADD_S, FMUL_D32, FMUL_D64, FMUL_S,
- FSUB_D32, FSUB_D64, FSUB_S)>;
+ (instrs FADD_D32, FADD_D64, FADD_PS64, FADD_S, FMUL_D32, FMUL_D64,
+ FMUL_PS64, FMUL_S, FSUB_D32, FSUB_D64, FSUB_PS64, FSUB_S)>;
def : InstRW<[P5600WriteFPUL], (instregex "^TRUNC_(L|W)_(S|D32|D64)$")>;
def : InstRW<[P5600WriteFPUL],
(instregex "^CVT_(S|D32|D64|L|W)_(S|D32|D64|L|W)$")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
index ef4191cec3df..8bb9d75e9173 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -70,21 +70,21 @@ void MipsSubtarget::anchor() {}
MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
bool little, const MipsTargetMachine &TM,
MaybeAlign StackAlignOverride)
- : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault),
- IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false),
- NoABICalls(false), Abs2008(false), IsFP64bit(false), UseOddSPReg(true),
- IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false),
- HasCnMipsP(false), HasMips3_32(false), HasMips3_32r2(false),
- HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false),
- InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
- InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), HasDSPR3(false),
- AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
- UseTCCInDIV(false), HasSym32(false), HasEVA(false), DisableMadd4(false),
- HasMT(false), HasCRC(false), HasVirt(false), HasGINV(false),
- UseIndirectJumpsHazard(false), StackAlignOverride(StackAlignOverride),
- TM(TM), TargetTriple(TT), TSInfo(),
- InstrInfo(
- MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
+ : MipsGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ MipsArchVersion(MipsDefault), IsLittle(little), IsSoftFloat(false),
+ IsSingleFloat(false), IsFPXX(false), NoABICalls(false), Abs2008(false),
+ IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false),
+ IsGP64bit(false), HasVFPU(false), HasCnMips(false), HasCnMipsP(false),
+ HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
+ HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
+ InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
+ HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
+ Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
+ HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false),
+ HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false),
+ StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT),
+ TSInfo(), InstrInfo(MipsInstrInfo::create(
+ initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
TLInfo(MipsTargetLowering::create(TM, *this)) {
@@ -240,7 +240,7 @@ MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
StringRef CPUName = MIPS_MC::selectMipsCPU(TM.getTargetTriple(), CPU);
// Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
+ ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h
index 26ee961fc95d..2b4c2b19a95d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -240,7 +240,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
bool hasMips1() const { return MipsArchVersion >= Mips1; }
bool hasMips2() const { return MipsArchVersion >= Mips2; }
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 80cb6ce7ac0c..7e2c43164d52 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -163,21 +163,15 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
- bool hasMips16Attr =
- !F.getFnAttribute("mips16").hasAttribute(Attribute::None);
- bool hasNoMips16Attr =
- !F.getFnAttribute("nomips16").hasAttribute(Attribute::None);
-
- bool HasMicroMipsAttr =
- !F.getFnAttribute("micromips").hasAttribute(Attribute::None);
- bool HasNoMicroMipsAttr =
- !F.getFnAttribute("nomicromips").hasAttribute(Attribute::None);
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
+ bool hasMips16Attr = F.getFnAttribute("mips16").isValid();
+ bool hasNoMips16Attr = F.getFnAttribute("nomips16").isValid();
+
+ bool HasMicroMipsAttr = F.getFnAttribute("micromips").isValid();
+ bool HasNoMicroMipsAttr = F.getFnAttribute("nomicromips").isValid();
// FIXME: This is related to the code below to reset the target options,
// we need to know whether or not the soft float flag is set on the
@@ -295,8 +289,7 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) {
}
// Implemented by targets that want to run passes immediately before
-// machine code is emitted. return true if -print-machineinstrs should
-// print out the code after the passes.
+// machine code is emitted.
void MipsPassConfig::addPreEmitPass() {
// Expand pseudo instructions that are sensitive to register allocation.
addPass(createMipsExpandPseudoPass());
@@ -323,7 +316,7 @@ void MipsPassConfig::addPreEmitPass() {
}
bool MipsPassConfig::addIRTranslator() {
- addPass(new IRTranslator());
+ addPass(new IRTranslator(getOptLevel()));
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h
index 25300504a02d..e0de924be4fd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -63,6 +63,14 @@ public:
return TLOF.get();
}
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Mips doesn't have any special address spaces so we just reserve
+ // the first 256 for software use (e.g. OpenCL) and treat casts
+ // between them as noops.
+ return SrcAS < 256 && DestAS < 256;
+ }
+
bool isLittleEndian() const { return isLittle; }
const MipsABIInfo &getABI() const { return ABI; }
};
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index cee0e7eec54a..503f0497b6f0 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -29,6 +29,7 @@ public:
const MCSubtargetInfo &STI, raw_ostream &OS) override;
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
// End
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index aef0eed6ab9a..f275011018a3 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -47,6 +47,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple,
AscizDirective = nullptr; // not supported
SupportsQuotedNames = false;
SupportsExtendedDwarfLocDirective = false;
+ SupportsSignedData = false;
// @TODO: Can we just disable this?
WeakDirective = "\t// .weak\t";
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index d758c2c86959..d69166feb042 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -46,7 +46,7 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *
createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- return createNVPTXMCSubtargetInfoImpl(TT, CPU, FS);
+ return createNVPTXMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h
index dfe0b9cb5ee6..c2fd090da084 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
@@ -47,6 +48,24 @@ FunctionPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
+struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
+ NVVMIntrRangePass();
+ NVVMIntrRangePass(unsigned SmVersion) : SmVersion(SmVersion) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ unsigned SmVersion;
+};
+
+struct NVVMReflectPass : PassInfoMixin<NVVMReflectPass> {
+ NVVMReflectPass();
+ NVVMReflectPass(unsigned SmVersion) : SmVersion(SmVersion) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ unsigned SmVersion;
+};
+
namespace NVPTX {
enum DrvInterface {
NVCL,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index da1a398a68f0..38844ff4ddf9 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1272,9 +1272,6 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
std::string
NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
switch (Ty->getTypeID()) {
- default:
- llvm_unreachable("unexpected type");
- break;
case Type::IntegerTyID: {
unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
if (NumBits == 1)
@@ -1305,9 +1302,10 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
return "b32";
else
return "u32";
+ default:
+ break;
}
llvm_unreachable("unexpected type");
- return nullptr;
}
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index c533921842e4..024e51e5f488 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -63,12 +63,13 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
}
}
-int NVPTXFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
+StackOffset
+NVPTXFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
FrameReg = NVPTX::VRDepot;
- return MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
+ return StackOffset::getFixed(MFI.getObjectOffset(FI) -
+ getOffsetOfLocalArea());
}
void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
index e4c2b9e77f70..a5d49ac3ab29 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -24,8 +25,8 @@ public:
bool hasFP(const MachineFunction &MF) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 4296eca6a8df..08f4ab87c68d 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -700,12 +700,11 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
bool IsKernelFn = isKernelFunction(F->getFunction());
- // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
+ // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
// because the former looks through phi nodes while the latter does not. We
// need to look through phi nodes to handle pointer induction variables.
SmallVector<const Value *, 8> Objs;
- GetUnderlyingObjects(N->getMemOperand()->getValue(),
- Objs, F->getDataLayout());
+ getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
return all_of(Objs, [&](const Value *V) {
if (auto *A = dyn_cast<const Argument>(V))
@@ -2855,7 +2854,7 @@ bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
}
// Copy over operands
- SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
+ SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
Ops.push_back(N->getOperand(0)); // Move chain to the back.
ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
@@ -3364,7 +3363,7 @@ bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
}
// Copy over operands
- SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
+ SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
Ops.push_back(N->getOperand(0)); // Move chain to the back.
ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f45cc06e0a0a..8860e90f2806 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -19,6 +19,7 @@
#include "NVPTXTargetObjectFile.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Analysis.h"
@@ -64,7 +65,7 @@
using namespace llvm;
-static unsigned int uniqueCallSite = 0;
+static std::atomic<unsigned> GlobalUniqueCallSite;
static cl::opt<bool> sched4reg(
"nvptx-sched4reg",
@@ -1242,7 +1243,7 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
std::string NVPTXTargetLowering::getPrototype(
const DataLayout &DL, Type *retTy, const ArgListTy &Args,
const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
- const CallBase &CB) const {
+ const CallBase &CB, unsigned UniqueCallSite) const {
auto PtrVT = getPointerTy(DL);
bool isABI = (STI.getSmVersion() >= 20);
@@ -1251,7 +1252,7 @@ std::string NVPTXTargetLowering::getPrototype(
return "";
std::stringstream O;
- O << "prototype_" << uniqueCallSite << " : .callprototype ";
+ O << "prototype_" << UniqueCallSite << " : .callprototype ";
if (retTy->getTypeID() == Type::VoidTyID) {
O << "()";
@@ -1421,8 +1422,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!isABI)
return Chain;
+ unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
SDValue tempChain = Chain;
- Chain = DAG.getCALLSEQ_START(Chain, uniqueCallSite, 0, dl);
+ Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
SDValue InFlag = Chain.getValue(1);
unsigned paramCount = 0;
@@ -1677,7 +1679,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB);
+ std::string Proto =
+ getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB, UniqueCallSite);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
@@ -1733,9 +1736,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isIndirectCall) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue PrototypeOps[] = { Chain,
- DAG.getConstant(uniqueCallSite, dl, MVT::i32),
- InFlag };
+ SDValue PrototypeOps[] = {
+ Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
InFlag = Chain.getValue(1);
}
@@ -1831,13 +1833,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(uniqueCallSite, dl, true),
- DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
- true),
- InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(
+ Chain, DAG.getIntPtrConstant(UniqueCallSite, dl, true),
+ DAG.getIntPtrConstant(UniqueCallSite + 1, dl, true), InFlag, dl);
InFlag = Chain.getValue(1);
- uniqueCallSite++;
// Append ProxyReg instructions to the chain to make sure that `callseq_end`
// will not get lost. Otherwise, during libcalls expansion, the nodes can become
@@ -2438,8 +2437,7 @@ static bool isImageOrSamplerVal(const Value *arg, const Module *context) {
if (!STy || STy->isLiteral())
return false;
- return std::find(std::begin(specialTypes), std::end(specialTypes),
- STy->getName()) != std::end(specialTypes);
+ return llvm::is_contained(specialTypes, STy->getName());
}
SDValue NVPTXTargetLowering::LowerFormalArguments(
@@ -2590,7 +2588,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// Extend the element if necessary (e.g. an i8 is loaded
// into an i16 register)
if (Ins[InsIdx].VT.isInteger() &&
- Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) {
+ Ins[InsIdx].VT.getFixedSizeInBits() >
+ LoadVT.getFixedSizeInBits()) {
unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND;
Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
@@ -4564,13 +4563,13 @@ static bool IsMulWideOperandDemotable(SDValue Op,
if (Op.getOpcode() == ISD::SIGN_EXTEND ||
Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() <= OptSize) {
+ if (OrigVT.getFixedSizeInBits() <= OptSize) {
S = Signed;
return true;
}
} else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() <= OptSize) {
+ if (OrigVT.getFixedSizeInBits() <= OptSize) {
S = Unsigned;
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index df9cd4159962..13829b924d4b 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -491,7 +491,8 @@ public:
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
- MaybeAlign retAlignment, const CallBase &CB) const;
+ MaybeAlign retAlignment, const CallBase &CB,
+ unsigned UniqueCallSite) const;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
index 77961c386827..9220f4766d92 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
@@ -31,14 +31,14 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// TSFlagFields
bits<4> VecInstType = VecNOP.Value;
- bit IsSimpleMove = 0;
- bit IsLoad = 0;
- bit IsStore = 0;
+ bit IsSimpleMove = false;
+ bit IsLoad = false;
+ bit IsStore = false;
- bit IsTex = 0;
- bit IsSust = 0;
- bit IsSurfTexQuery = 0;
- bit IsTexModeUnified = 0;
+ bit IsTex = false;
+ bit IsSust = false;
+ bit IsSurfTexQuery = false;
+ bit IsTexModeUnified = false;
// The following field is encoded as log2 of the vector size minus one,
// with 0 meaning the operation is not a surface instruction. For example,
@@ -46,13 +46,13 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// 2**(2-1) = 2.
bits<2> IsSuld = 0;
- let TSFlags{3-0} = VecInstType;
- let TSFlags{4-4} = IsSimpleMove;
- let TSFlags{5-5} = IsLoad;
- let TSFlags{6-6} = IsStore;
- let TSFlags{7} = IsTex;
- let TSFlags{9-8} = IsSuld;
- let TSFlags{10} = IsSust;
- let TSFlags{11} = IsSurfTexQuery;
- let TSFlags{12} = IsTexModeUnified;
+ let TSFlags{3...0} = VecInstType;
+ let TSFlags{4...4} = IsSimpleMove;
+ let TSFlags{5...5} = IsLoad;
+ let TSFlags{6...6} = IsStore;
+ let TSFlags{7} = IsTex;
+ let TSFlags{9...8} = IsSuld;
+ let TSFlags{10} = IsSust;
+ let TSFlags{11} = IsSurfTexQuery;
+ let TSFlags{12} = IsTexModeUnified;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index fe7a84f9a361..381ed4dd6887 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -13,7 +13,7 @@
include "NVPTXInstrFormats.td"
// A NOP instruction
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
def NOP : NVPTXInst<(outs), (ins), "", []>;
}
@@ -137,7 +137,7 @@ def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
-def true : Predicate<"true">;
+def True : Predicate<"true">;
def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
@@ -407,7 +407,7 @@ multiclass F2<string OpcStr, SDNode OpNode> {
// Type Conversion
//-----------------------------------
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
// Generate a cvt to the given type from all possible types. Each instance
// takes a CvtMode immediate that defines the conversion mode to use. It can
// be CvtNONE to omit a conversion mode.
@@ -1022,12 +1022,12 @@ multiclass FMA_F16<string OpcStr, RegisterClass RC, Predicate Pred> {
}
defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", Float16Regs, doF32FTZ>;
-defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, true>;
+defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, True>;
defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", Float16x2Regs, doF32FTZ>;
-defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, true>;
+defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, True>;
defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
-defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, true>;
-defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>;
+defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>;
+defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>;
// sin/cos
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
@@ -1367,7 +1367,7 @@ multiclass BFE<string TyStr, RegisterClass RC> {
!strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
}
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
defm BFE_S32 : BFE<"s32", Int32Regs>;
defm BFE_U32 : BFE<"u32", Int32Regs>;
defm BFE_S64 : BFE<"s64", Int64Regs>;
@@ -1381,7 +1381,7 @@ let hasSideEffects = 0 in {
// FIXME: This doesn't cover versions of set and setp that combine with a
// boolean predicate, e.g. setp.eq.and.b16.
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
@@ -1427,7 +1427,7 @@ def SETP_f16x2rr :
// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
// reg, either u32, s32, or f32. Anyway these aren't used at the moment.
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, RC:$b, CmpMode:$cmp),
@@ -1462,7 +1462,7 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
// selp instructions that don't have any pattern matches; we explicitly use
// them within this file.
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
@@ -1572,7 +1572,7 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
[(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
// Get pointer to local stack.
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
"mov.u32 \t$d, __local_depot$num;", []>;
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
@@ -1988,7 +1988,7 @@ def ProxyReg :
SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-let mayLoad = 1 in {
+let mayLoad = true in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
!strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
@@ -2013,7 +2013,7 @@ class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
!strconcat("mov", opstr, " \t$dst, retval$b;"),
[(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
-let mayStore = 1 in {
+let mayStore = true in {
class StoreParamInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
!strconcat("st.param", opstr, " \t[param$a+$b], $val;"),
@@ -2823,7 +2823,7 @@ def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
// pack a set of smaller int registers to a larger int register
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2,
@@ -2856,7 +2856,7 @@ let hasSideEffects = 0 in {
}
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
// Extract element of f16x2 register. PTX does not provide any way
// to access elements of f16x2 vector directly, so we need to
// extract it using a temporary register.
@@ -2899,7 +2899,7 @@ let hasSideEffects = 0 in {
}
// Count leading zeros
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
"clz.b32 \t$d, $a;", []>;
def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
@@ -2937,7 +2937,7 @@ def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))),
(SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;
// Population count
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
"popc.b32 \t$d, $a;", []>;
def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 76a4a1d4030a..8ccd47c0fcfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -51,19 +51,19 @@ def ptx : PTX;
// Generates list of n sequential register names.
// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
class RegSeq<int n, string prefix> {
- list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
- [prefix # !add(n, -1)]),
+ list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
+ [prefix # !sub(n, 1)]),
[]);
}
class THREADMASK_INFO<bit sync> {
- list<bit> ret = !if(sync, [0,1], [0]);
+ list<bit> ret = !if(sync, [0, 1], [0]);
}
//-----------------------------------
// Synchronization and shuffle functions
//-----------------------------------
-let isConvergent = 1 in {
+let isConvergent = true in {
def INT_BARRIER0 : NVPTXInst<(outs), (ins),
"bar.sync \t0;",
[(int_nvvm_barrier0)]>;
@@ -173,12 +173,12 @@ class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
)];
}
-foreach sync = [0, 1] in {
+foreach sync = [false, true] in {
foreach mode = ["up", "down", "bfly", "idx"] in {
foreach regclass = ["i32", "f32"] in {
- foreach return_pred = [0, 1] in {
- foreach offset_imm = [0, 1] in {
- foreach mask_imm = [0, 1] in {
+ foreach return_pred = [false, true] in {
+ foreach offset_imm = [false, true] in {
+ foreach mask_imm = [false, true] in {
foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
def : SHFL_INSTR<sync, mode, regclass, return_pred,
offset_imm, mask_imm, threadmask_imm>,
@@ -274,7 +274,7 @@ defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_s
defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
i64imm>;
-} // isConvergent = 1
+} // isConvergent = true
//-----------------------------------
// Explicit Memory Fence Functions
@@ -1548,7 +1548,7 @@ multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
!cast<Intrinsic>(
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
- # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
+ # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
regclass, ImmType, Imm, ImmTy, Preds>;
}
multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
@@ -1562,7 +1562,7 @@ multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
!cast<Intrinsic>(
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
- # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
+ # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
regclass, ImmType, Imm, ImmTy, Preds>;
}
@@ -2131,7 +2131,7 @@ def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
(ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
Requires<[noHWROT32]> ;
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
!strconcat("{{\n\t",
".reg .b32 %dummy;\n\t",
@@ -2147,7 +2147,7 @@ let hasSideEffects = 0 in {
[]> ;
}
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
def PACK_TWO_INT32
: NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
"mov.b64 \t$dst, {{$lo, $hi}};", []> ;
@@ -2159,7 +2159,7 @@ def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
// no side effects.
-let hasSideEffects = 0 in {
+let hasSideEffects = false in {
def SHF_L_WRAP_B32_IMM
: NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
@@ -2242,7 +2242,7 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
// also defined in NVPTXReplaceImageHandles.cpp
// texmode_independent
-let IsTex = 1, IsTexModeUnified = 0 in {
+let IsTex = true, IsTexModeUnified = false in {
// Texture fetch instructions using handles
def TEX_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
@@ -2925,7 +2925,7 @@ def TLD4_A_2D_U32_F32
// texmode_unified
-let IsTex = 1, IsTexModeUnified = 1 in {
+let IsTex = true, IsTexModeUnified = true in {
// Texture fetch instructions using handles
def TEX_UNIFIED_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
@@ -3610,7 +3610,7 @@ def TLD4_UNIFIED_A_2D_U32_F32
//=== Surface load instructions
// .clamp variant
-let IsSuld = 1 in {
+let IsSuld = true in {
def SULD_1D_I8_CLAMP
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
@@ -3922,7 +3922,7 @@ def SULD_3D_V4I32_CLAMP
// .trap variant
-let IsSuld = 1 in {
+let IsSuld = true in {
def SULD_1D_I8_TRAP
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
@@ -4233,7 +4233,7 @@ def SULD_3D_V4I32_TRAP
}
// .zero variant
-let IsSuld = 1 in {
+let IsSuld = true in {
def SULD_1D_I8_ZERO
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
@@ -4547,7 +4547,7 @@ def SULD_3D_V4I32_ZERO
// Texture Query Intrinsics
//-----------------------------------
-let IsSurfTexQuery = 1 in {
+let IsSurfTexQuery = true in {
def TXQ_CHANNEL_ORDER
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_order.b32 \t$d, [$a];",
@@ -4604,7 +4604,7 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
// Surface Query Intrinsics
//-----------------------------------
-let IsSurfTexQuery = 1 in {
+let IsSurfTexQuery = true in {
def SUQ_CHANNEL_ORDER
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_order.b32 \t$d, [$a];",
@@ -4663,7 +4663,7 @@ def ISTYPEP_TEXTURE
//===- Surface Stores -----------------------------------------------------===//
-let IsSust = 1 in {
+let IsSust = true in {
// Unformatted
// .clamp variant
def SUST_B_1D_B8_CLAMP
@@ -7361,16 +7361,13 @@ class WMMA_REGINFO<WMMA_REGS r>
!eq(ptx_elt_type, "b1") : Int32Regs);
// Instruction input/output arguments for the fragment.
- list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
+ list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
// List of register names for the fragment -- ["ra0", "ra1",...]
list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
// Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
- string regstring = "{{$" # !head(reg_names)
- # !foldl("", !tail(reg_names), a, b,
- !strconcat(a, ", $", b))
- # "}}";
+ string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
// Predicates for particular fragment variant. Technically those are
// per-instruction predicates, but currently all fragments that can be used in
@@ -7453,12 +7450,13 @@ class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
// To match the right intrinsic, we need to build AS-constrained PatFrag.
// Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
+ dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
// Build PatFrag that only matches particular address space.
PatFrag IntrFrag = PatFrag<PFOperands,
- !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
+ PFOperandsIntr,
!cond(!eq(Space, ".shared"): AS_match.shared,
!eq(Space, ".global"): AS_match.global,
- 1: AS_match.generic)>;
+ true: AS_match.generic)>;
// Build AS-constrained pattern.
let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
@@ -7493,14 +7491,14 @@ class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
// To match the right intrinsic, we need to build AS-constrained PatFrag.
// Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
dag PFOperands = !con((ops node:$dst),
- !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
+ !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
!if(WithStride, (ops node:$ldm), (ops)));
// Build PatFrag that only matches particular address space.
PatFrag IntrFrag = PatFrag<PFOperands,
!foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
!cond(!eq(Space, ".shared"): AS_match.shared,
!eq(Space, ".global"): AS_match.global,
- 1: AS_match.generic)>;
+ true: AS_match.generic)>;
// Build AS-constrained pattern.
let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
@@ -7521,14 +7519,14 @@ class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
// Create all load/store variants
defset list<WMMA_INSTR> MMA_LDSTs = {
foreach layout = ["row", "col"] in {
- foreach stride = [0, 1] in {
+ foreach stride = [false, true] in {
foreach space = [".global", ".shared", ""] in {
foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
foreach frag = NVVM_MMA_OPS.all_ld_ops in
- foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+ if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
foreach frag = NVVM_MMA_OPS.all_st_ops in
- foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+ if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
} // addr
} // space
@@ -7586,7 +7584,7 @@ defset list<WMMA_INSTR> MMAs = {
foreach layout_b = ["row", "col"] in {
foreach satf = [0, 1] in {
foreach op = NVVM_MMA_OPS.all_mma_ops in {
- foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
+ if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
def : WMMA_MMA<WMMA_REGINFO<op[0]>,
WMMA_REGINFO<op[1]>,
WMMA_REGINFO<op[2]>,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index e60b5eeacdae..fd58ff13788d 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -172,8 +172,12 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
Value *ArgInParam = new AddrSpaceCastInst(
Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(),
FirstInst);
+ // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
+ // addrspacecast preserves alignment. Since params are constant, this load is
+ // definitely not volatile.
LoadInst *LI =
- new LoadInst(StructType, ArgInParam, Arg->getName(), FirstInst);
+ new LoadInst(StructType, ArgInParam, Arg->getName(),
+ /*isVolatile=*/false, AllocA->getAlign(), FirstInst);
new StoreInst(LI, AllocA, FirstInst);
}
@@ -214,8 +218,7 @@ bool NVPTXLowerArgs::runOnKernelFunction(Function &F) {
for (auto &I : B) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (LI->getType()->isPointerTy()) {
- Value *UO = GetUnderlyingObject(LI->getPointerOperand(),
- F.getParent()->getDataLayout());
+ Value *UO = getUnderlyingObject(LI->getPointerOperand());
if (Argument *Arg = dyn_cast<Argument>(UO)) {
if (Arg->hasByValAttr()) {
// LI is a load from a pointer within a byval kernel parameter.
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index ea2274f394e6..756355f75e3d 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -69,7 +69,8 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
"operand of a DBG_VALUE machine instruction");
Register Reg;
int64_t Offset =
- TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
+ TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg)
+ .getFixed();
MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false);
MI.getOperand(0).setIsDebug();
auto *DIExpr = DIExpression::prepend(
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 4b755dcb55ff..19895a20bacf 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -30,7 +30,7 @@ def VRDepot : NVPTXReg<"%Depot">;
// We use virtual registers, but define a few physical registers here to keep
// SDAG and the MachineInstr layers happy.
-foreach i = 0-4 in {
+foreach i = 0...4 in {
def P#i : NVPTXReg<"%p"#i>; // Predicate
def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
def R#i : NVPTXReg<"%r"#i>; // 32-bit
@@ -47,7 +47,7 @@ foreach i = 0-4 in {
def da#i : NVPTXReg<"%da"#i>;
}
-foreach i = 0-31 in {
+foreach i = 0...31 in {
def ENVREG#i : NVPTXReg<"%envreg"#i>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index f1fa6416f15f..05c20369abf4 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -35,7 +35,7 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
// Provide the default CPU if we don't have one.
TargetName = std::string(CPU.empty() ? "sm_20" : CPU);
- ParseSubtargetFeatures(TargetName, FS);
+ ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
// Set default to PTX 3.2 (CUDA 5.5)
if (PTXVersion == 0) {
@@ -48,9 +48,9 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
- : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM),
- InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)),
- FrameLowering() {}
+ : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
+ SmVersion(20), TM(TM), InstrInfo(),
+ TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), FrameLowering() {}
bool NVPTXSubtarget::hasImageHandles() const {
// Enable handles for Kepler+, where CUDA supports indirect surfaces and
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 0e9fa1fd3e56..9a249d3da3d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -83,7 +83,7 @@ public:
unsigned getPTXVersion() const { return PTXVersion; }
NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 85709eb731e2..f1a82f1cf607 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Pass.h"
+#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
@@ -170,11 +171,11 @@ public:
void addFastRegAlloc() override;
void addOptimizedRegAlloc() override;
- bool addRegAssignmentFast() override {
+ bool addRegAssignAndRewriteFast() override {
llvm_unreachable("should not be used");
}
- bool addRegAssignmentOptimized() override {
+ bool addRegAssignAndRewriteOptimized() override {
llvm_unreachable("should not be used");
}
@@ -205,6 +206,32 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
});
}
+void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) {
+ PB.registerPipelineParsingCallback(
+ [](StringRef PassName, FunctionPassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+ if (PassName == "nvvm-reflect") {
+ PM.addPass(NVVMReflectPass());
+ return true;
+ }
+ if (PassName == "nvvm-intr-range") {
+ PM.addPass(NVVMIntrRangePass());
+ return true;
+ }
+ return false;
+ });
+
+ PB.registerPipelineStartEPCallback(
+ [this, DebugPassManager](ModulePassManager &PM,
+ PassBuilder::OptimizationLevel Level) {
+ FunctionPassManager FPM(DebugPassManager);
+ FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
+ FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion()));
+ PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ });
+}
+
TargetTransformInfo
NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(NVPTXTTIImpl(this, F));
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index d84600c74e29..bef541c2b28d 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -15,8 +15,6 @@
#include "ManagedStringPool.h"
#include "NVPTXSubtarget.h"
-#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -64,6 +62,8 @@ public:
}
void adjustPassManager(PassManagerBuilder &) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) override;
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 3873c73fb2e0..d4b2ae384068 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -111,6 +111,263 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
return false;
}
+// Convert NVVM intrinsics to target-generic LLVM code where possible.
+static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
+ // Each NVVM intrinsic we can simplify can be replaced with one of:
+ //
+ // * an LLVM intrinsic,
+ // * an LLVM cast operation,
+ // * an LLVM binary operation, or
+ // * ad-hoc LLVM IR for the particular operation.
+
+ // Some transformations are only valid when the module's
+ // flush-denormals-to-zero (ftz) setting is true/false, whereas other
+ // transformations are valid regardless of the module's ftz setting.
+ enum FtzRequirementTy {
+ FTZ_Any, // Any ftz setting is ok.
+ FTZ_MustBeOn, // Transformation is valid only if ftz is on.
+ FTZ_MustBeOff, // Transformation is valid only if ftz is off.
+ };
+ // Classes of NVVM intrinsics that can't be replaced one-to-one with a
+ // target-generic intrinsic, cast op, or binary op but that we can nonetheless
+ // simplify.
+ enum SpecialCase {
+ SPC_Reciprocal,
+ };
+
+ // SimplifyAction is a poor-man's variant (plus an additional flag) that
+ // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
+ struct SimplifyAction {
+ // Invariant: At most one of these Optionals has a value.
+ Optional<Intrinsic::ID> IID;
+ Optional<Instruction::CastOps> CastOp;
+ Optional<Instruction::BinaryOps> BinaryOp;
+ Optional<SpecialCase> Special;
+
+ FtzRequirementTy FtzRequirement = FTZ_Any;
+
+ SimplifyAction() = default;
+
+ SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
+ : IID(IID), FtzRequirement(FtzReq) {}
+
+ // Cast operations don't have anything to do with FTZ, so we skip that
+ // argument.
+ SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
+
+ SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
+ : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
+
+ SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
+ : Special(Special), FtzRequirement(FtzReq) {}
+ };
+
+ // Try to generate a SimplifyAction describing how to replace our
+ // IntrinsicInstr with target-generic LLVM IR.
+ const SimplifyAction Action = [II]() -> SimplifyAction {
+ switch (II->getIntrinsicID()) {
+ // NVVM intrinsics that map directly to LLVM intrinsics.
+ case Intrinsic::nvvm_ceil_d:
+ return {Intrinsic::ceil, FTZ_Any};
+ case Intrinsic::nvvm_ceil_f:
+ return {Intrinsic::ceil, FTZ_MustBeOff};
+ case Intrinsic::nvvm_ceil_ftz_f:
+ return {Intrinsic::ceil, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fabs_d:
+ return {Intrinsic::fabs, FTZ_Any};
+ case Intrinsic::nvvm_fabs_f:
+ return {Intrinsic::fabs, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fabs_ftz_f:
+ return {Intrinsic::fabs, FTZ_MustBeOn};
+ case Intrinsic::nvvm_floor_d:
+ return {Intrinsic::floor, FTZ_Any};
+ case Intrinsic::nvvm_floor_f:
+ return {Intrinsic::floor, FTZ_MustBeOff};
+ case Intrinsic::nvvm_floor_ftz_f:
+ return {Intrinsic::floor, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fma_rn_d:
+ return {Intrinsic::fma, FTZ_Any};
+ case Intrinsic::nvvm_fma_rn_f:
+ return {Intrinsic::fma, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fma_rn_ftz_f:
+ return {Intrinsic::fma, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmax_d:
+ return {Intrinsic::maxnum, FTZ_Any};
+ case Intrinsic::nvvm_fmax_f:
+ return {Intrinsic::maxnum, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fmax_ftz_f:
+ return {Intrinsic::maxnum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmin_d:
+ return {Intrinsic::minnum, FTZ_Any};
+ case Intrinsic::nvvm_fmin_f:
+ return {Intrinsic::minnum, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fmin_ftz_f:
+ return {Intrinsic::minnum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_round_d:
+ return {Intrinsic::round, FTZ_Any};
+ case Intrinsic::nvvm_round_f:
+ return {Intrinsic::round, FTZ_MustBeOff};
+ case Intrinsic::nvvm_round_ftz_f:
+ return {Intrinsic::round, FTZ_MustBeOn};
+ case Intrinsic::nvvm_sqrt_rn_d:
+ return {Intrinsic::sqrt, FTZ_Any};
+ case Intrinsic::nvvm_sqrt_f:
+ // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
+ // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
+ // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
+ // the versions with explicit ftz-ness.
+ return {Intrinsic::sqrt, FTZ_Any};
+ case Intrinsic::nvvm_sqrt_rn_f:
+ return {Intrinsic::sqrt, FTZ_MustBeOff};
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
+ return {Intrinsic::sqrt, FTZ_MustBeOn};
+ case Intrinsic::nvvm_trunc_d:
+ return {Intrinsic::trunc, FTZ_Any};
+ case Intrinsic::nvvm_trunc_f:
+ return {Intrinsic::trunc, FTZ_MustBeOff};
+ case Intrinsic::nvvm_trunc_ftz_f:
+ return {Intrinsic::trunc, FTZ_MustBeOn};
+
+ // NVVM intrinsics that map to LLVM cast operations.
+ //
+ // Note that llvm's target-generic conversion operators correspond to the rz
+ // (round to zero) versions of the nvvm conversion intrinsics, even though
+ // most everything else here uses the rn (round to nearest even) nvvm ops.
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_f2ll_rz:
+ return {Instruction::FPToSI};
+ case Intrinsic::nvvm_d2ui_rz:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_d2ull_rz:
+ case Intrinsic::nvvm_f2ull_rz:
+ return {Instruction::FPToUI};
+ case Intrinsic::nvvm_i2d_rz:
+ case Intrinsic::nvvm_i2f_rz:
+ case Intrinsic::nvvm_ll2d_rz:
+ case Intrinsic::nvvm_ll2f_rz:
+ return {Instruction::SIToFP};
+ case Intrinsic::nvvm_ui2d_rz:
+ case Intrinsic::nvvm_ui2f_rz:
+ case Intrinsic::nvvm_ull2d_rz:
+ case Intrinsic::nvvm_ull2f_rz:
+ return {Instruction::UIToFP};
+
+ // NVVM intrinsics that map to LLVM binary ops.
+ case Intrinsic::nvvm_add_rn_d:
+ return {Instruction::FAdd, FTZ_Any};
+ case Intrinsic::nvvm_add_rn_f:
+ return {Instruction::FAdd, FTZ_MustBeOff};
+ case Intrinsic::nvvm_add_rn_ftz_f:
+ return {Instruction::FAdd, FTZ_MustBeOn};
+ case Intrinsic::nvvm_mul_rn_d:
+ return {Instruction::FMul, FTZ_Any};
+ case Intrinsic::nvvm_mul_rn_f:
+ return {Instruction::FMul, FTZ_MustBeOff};
+ case Intrinsic::nvvm_mul_rn_ftz_f:
+ return {Instruction::FMul, FTZ_MustBeOn};
+ case Intrinsic::nvvm_div_rn_d:
+ return {Instruction::FDiv, FTZ_Any};
+ case Intrinsic::nvvm_div_rn_f:
+ return {Instruction::FDiv, FTZ_MustBeOff};
+ case Intrinsic::nvvm_div_rn_ftz_f:
+ return {Instruction::FDiv, FTZ_MustBeOn};
+
+ // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
+ // need special handling.
+ //
+ // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
+ // as well.
+ case Intrinsic::nvvm_rcp_rn_d:
+ return {SPC_Reciprocal, FTZ_Any};
+ case Intrinsic::nvvm_rcp_rn_f:
+ return {SPC_Reciprocal, FTZ_MustBeOff};
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ return {SPC_Reciprocal, FTZ_MustBeOn};
+
+ // We do not currently simplify intrinsics that give an approximate
+ // answer. These include:
+ //
+ // - nvvm_cos_approx_{f,ftz_f}
+ // - nvvm_ex2_approx_{d,f,ftz_f}
+ // - nvvm_lg2_approx_{d,f,ftz_f}
+ // - nvvm_sin_approx_{f,ftz_f}
+ // - nvvm_sqrt_approx_{f,ftz_f}
+ // - nvvm_rsqrt_approx_{d,f,ftz_f}
+ // - nvvm_div_approx_{ftz_d,ftz_f,f}
+ // - nvvm_rcp_approx_ftz_d
+ //
+ // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
+ // means that fastmath is enabled in the intrinsic. Unfortunately only
+ // binary operators (currently) have a fastmath bit in SelectionDAG, so
+ // this information gets lost and we can't select on it.
+ //
+ // TODO: div and rcp are lowered to a binary op, so these we could in
+ // theory lower them to "fast fdiv".
+
+ default:
+ return {};
+ }
+ }();
+
+ // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
+ // can bail out now. (Notice that in the case that IID is not an NVVM
+ // intrinsic, we don't have to look up any module metadata, as
+ // FtzRequirementTy will be FTZ_Any.)
+ if (Action.FtzRequirement != FTZ_Any) {
+ StringRef Attr = II->getFunction()
+ ->getFnAttribute("denormal-fp-math-f32")
+ .getValueAsString();
+ DenormalMode Mode = parseDenormalFPAttribute(Attr);
+ bool FtzEnabled = Mode.Output != DenormalMode::IEEE;
+
+ if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
+ return nullptr;
+ }
+
+ // Simplify to target-generic intrinsic.
+ if (Action.IID) {
+ SmallVector<Value *, 4> Args(II->arg_operands());
+ // All the target-generic intrinsics currently of interest to us have one
+ // type argument, equal to that of the nvvm intrinsic's argument.
+ Type *Tys[] = {II->getArgOperand(0)->getType()};
+ return CallInst::Create(
+ Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
+ }
+
+ // Simplify to target-generic binary op.
+ if (Action.BinaryOp)
+ return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
+ II->getArgOperand(1), II->getName());
+
+ // Simplify to target-generic cast op.
+ if (Action.CastOp)
+ return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
+ II->getName());
+
+ // All that's left are the special cases.
+ if (!Action.Special)
+ return nullptr;
+
+ switch (*Action.Special) {
+ case SPC_Reciprocal:
+ // Simplify reciprocal.
+ return BinaryOperator::Create(
+ Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
+ II->getArgOperand(0), II->getName());
+ }
+ llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
+}
+
+Optional<Instruction *>
+NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+ if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) {
+ return I;
+ }
+ return None;
+}
+
int NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index cb832031f1ad..6f071040dd9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -48,6 +48,9 @@ public:
return AddressSpace::ADDRESS_SPACE_GENERIC;
}
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+
// Loads and stores can be vectorized if the alignment is at least as big as
// the load/store we want to vectorize.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
index baaedc7ac87c..5381646434eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -32,21 +33,13 @@ static cl::opt<unsigned> NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20),
namespace {
class NVVMIntrRange : public FunctionPass {
private:
- struct {
- unsigned x, y, z;
- } MaxBlockSize, MaxGridSize;
+ unsigned SmVersion;
public:
static char ID;
NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {}
- NVVMIntrRange(unsigned int SmVersion) : FunctionPass(ID) {
- MaxBlockSize.x = 1024;
- MaxBlockSize.y = 1024;
- MaxBlockSize.z = 64;
-
- MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff;
- MaxGridSize.y = 0xffff;
- MaxGridSize.z = 0xffff;
+ NVVMIntrRange(unsigned int SmVersion)
+ : FunctionPass(ID), SmVersion(SmVersion) {
initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
}
@@ -79,7 +72,18 @@ static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) {
return true;
}
-bool NVVMIntrRange::runOnFunction(Function &F) {
+static bool runNVVMIntrRange(Function &F, unsigned SmVersion) {
+ struct {
+ unsigned x, y, z;
+ } MaxBlockSize, MaxGridSize;
+ MaxBlockSize.x = 1024;
+ MaxBlockSize.y = 1024;
+ MaxBlockSize.z = 64;
+
+ MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff;
+ MaxGridSize.y = 0xffff;
+ MaxGridSize.z = 0xffff;
+
// Go through the calls in this function.
bool Changed = false;
for (Instruction &I : instructions(F)) {
@@ -151,3 +155,15 @@ bool NVVMIntrRange::runOnFunction(Function &F) {
return Changed;
}
+
+bool NVVMIntrRange::runOnFunction(Function &F) {
+ return runNVVMIntrRange(F, SmVersion);
+}
+
+NVVMIntrRangePass::NVVMIntrRangePass() : NVVMIntrRangePass(NVVMIntrRangeSM) {}
+
+PreservedAnalyses NVVMIntrRangePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return runNVVMIntrRange(F, SmVersion) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index ae166dc5a8d5..339f51d21087 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -73,7 +74,7 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
"Replace occurrences of __nvvm_reflect() calls with 0/1", false,
false)
-bool NVVMReflect::runOnFunction(Function &F) {
+static bool runNVVMReflect(Function &F, unsigned SmVersion) {
if (!NVVMReflectEnabled)
return false;
@@ -179,3 +180,15 @@ bool NVVMReflect::runOnFunction(Function &F) {
return ToRemove.size() > 0;
}
+
+bool NVVMReflect::runOnFunction(Function &F) {
+ return runNVVMReflect(F, SmVersion);
+}
+
+NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
+
+PreservedAnalyses NVVMReflectPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return runNVVMReflect(F, SmVersion) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 13fd7d05ab9f..197fd3c7aa74 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -11,7 +11,6 @@
#include "PPCTargetStreamer.h"
#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -99,12 +98,10 @@ struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
bool IsPPC64;
- bool IsDarwin;
void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
bool isPPC64() const { return IsPPC64; }
- bool isDarwin() const { return IsDarwin; }
bool MatchRegisterName(unsigned &RegNo, int64_t &IntVal);
@@ -116,14 +113,12 @@ class PPCAsmParser : public MCTargetAsmParser {
PPCMCExpr::VariantKind &Variant);
const MCExpr *FixupVariantKind(const MCExpr *E);
bool ParseExpression(const MCExpr *&EVal);
- bool ParseDarwinExpression(const MCExpr *&EVal);
bool ParseOperand(OperandVector &Operands);
bool ParseDirectiveWord(unsigned Size, AsmToken ID);
bool ParseDirectiveTC(unsigned Size, AsmToken ID);
bool ParseDirectiveMachine(SMLoc L);
- bool ParseDarwinDirectiveMachine(SMLoc L);
bool ParseDirectiveAbiVersion(SMLoc L);
bool ParseDirectiveLocalEntry(SMLoc L);
@@ -150,7 +145,6 @@ public:
// Check for 64-bit vs. 32-bit pointer mode.
const Triple &TheTriple = STI.getTargetTriple();
IsPPC64 = TheTriple.isPPC64();
- IsDarwin = TheTriple.isMacOSX();
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
@@ -290,6 +284,16 @@ public:
return (unsigned) Imm.Val;
}
+ unsigned getACCReg() const {
+ assert(isACCRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
+ unsigned getVSRpEvenReg() const {
+ assert(isVSRpEvenRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val >> 1;
+ }
+
unsigned getCCReg() const {
assert(isCCRegNumber() && "Invalid access!");
return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
@@ -402,6 +406,12 @@ public:
(getImm() & 3) == 0); }
bool isImmZero() const { return Kind == Immediate && getImm() == 0; }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isACCRegNumber() const {
+ return Kind == Immediate && isUInt<3>(getImm());
+ }
+ bool isVSRpEvenRegNumber() const {
+ return Kind == Immediate && isUInt<6>(getImm()) && ((getImm() & 1) == 0);
+ }
bool isVSRegNumber() const {
return Kind == Immediate && isUInt<6>(getImm());
}
@@ -492,29 +502,29 @@ public:
Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()]));
}
- void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
+ void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
}
- void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
+ void addRegSPERCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(SPERegs[getReg()]));
}
- void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
+ void addRegACCRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(ACCRegs[getACCReg()]));
}
- void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const {
+ void addRegVSRpRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()]));
}
- void addRegSPERCOperands(MCInst &Inst, unsigned N) const {
+ void addRegVSRpEvenRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(SPERegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()]));
}
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
@@ -666,7 +676,8 @@ public:
return CreateImm(CE->getValue(), S, E, IsPPC64);
if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Val))
- if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS)
+ if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS_PCREL)
return CreateTLSReg(SRE, S, E, IsPPC64);
if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) {
@@ -762,12 +773,18 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
}
case PPC::DCBFx:
case PPC::DCBFL:
- case PPC::DCBFLP: {
+ case PPC::DCBFLP:
+ case PPC::DCBFPS:
+ case PPC::DCBSTPS: {
int L = 0;
if (Opcode == PPC::DCBFL)
L = 1;
else if (Opcode == PPC::DCBFLP)
L = 3;
+ else if (Opcode == PPC::DCBFPS)
+ L = 4;
+ else if (Opcode == PPC::DCBSTPS)
+ L = 6;
MCInst TmpInst;
TmpInst.setOpcode(PPC::DCBF);
@@ -1184,41 +1201,41 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) {
- if (getParser().getTok().is(AsmToken::Identifier)) {
- StringRef Name = getParser().getTok().getString();
- if (Name.equals_lower("lr")) {
- RegNo = isPPC64()? PPC::LR8 : PPC::LR;
- IntVal = 8;
- } else if (Name.equals_lower("ctr")) {
- RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
- IntVal = 9;
- } else if (Name.equals_lower("vrsave")) {
- RegNo = PPC::VRSAVE;
- IntVal = 256;
- } else if (Name.startswith_lower("r") &&
- !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
- RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
- } else if (Name.startswith_lower("f") &&
- !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
- RegNo = FRegs[IntVal];
- } else if (Name.startswith_lower("vs") &&
- !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) {
- RegNo = VSRegs[IntVal];
- } else if (Name.startswith_lower("v") &&
- !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
- RegNo = VRegs[IntVal];
- } else if (Name.startswith_lower("q") &&
- !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
- RegNo = QFRegs[IntVal];
- } else if (Name.startswith_lower("cr") &&
- !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
- RegNo = CRRegs[IntVal];
- } else
- return true;
- getParser().Lex();
- return false;
- }
- return true;
+ if (getParser().getTok().is(AsmToken::Percent))
+ getParser().Lex(); // Eat the '%'.
+
+ if (!getParser().getTok().is(AsmToken::Identifier))
+ return true;
+
+ StringRef Name = getParser().getTok().getString();
+ if (Name.equals_lower("lr")) {
+ RegNo = isPPC64() ? PPC::LR8 : PPC::LR;
+ IntVal = 8;
+ } else if (Name.equals_lower("ctr")) {
+ RegNo = isPPC64() ? PPC::CTR8 : PPC::CTR;
+ IntVal = 9;
+ } else if (Name.equals_lower("vrsave")) {
+ RegNo = PPC::VRSAVE;
+ IntVal = 256;
+ } else if (Name.startswith_lower("r") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = isPPC64() ? XRegs[IntVal] : RRegs[IntVal];
+ } else if (Name.startswith_lower("f") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = FRegs[IntVal];
+ } else if (Name.startswith_lower("vs") &&
+ !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) {
+ RegNo = VSRegs[IntVal];
+ } else if (Name.startswith_lower("v") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = VRegs[IntVal];
+ } else if (Name.startswith_lower("cr") &&
+ !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
+ RegNo = CRRegs[IntVal];
+ } else
+ return true;
+ getParser().Lex();
+ return false;
}
bool PPCAsmParser::
@@ -1387,10 +1404,6 @@ FixupVariantKind(const MCExpr *E) {
/// it handles modifiers.
bool PPCAsmParser::
ParseExpression(const MCExpr *&EVal) {
-
- if (isDarwin())
- return ParseDarwinExpression(EVal);
-
// (ELF Platforms)
// Handle \code @l/@ha \endcode
if (getParser().parseExpression(EVal))
@@ -1406,53 +1419,6 @@ ParseExpression(const MCExpr *&EVal) {
return false;
}
-/// ParseDarwinExpression. (MachO Platforms)
-/// This differs from the default "parseExpression" in that it handles detection
-/// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present,
-/// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO
-/// syntax form so it is done here. TODO: Determine if there is merit in
-/// arranging for this to be done at a higher level.
-bool PPCAsmParser::
-ParseDarwinExpression(const MCExpr *&EVal) {
- MCAsmParser &Parser = getParser();
- PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
- switch (getLexer().getKind()) {
- default:
- break;
- case AsmToken::Identifier:
- // Compiler-generated Darwin identifiers begin with L,l,_ or "; thus
- // something starting with any other char should be part of the
- // asm syntax. If handwritten asm includes an identifier like lo16,
- // then all bets are off - but no-one would do that, right?
- StringRef poss = Parser.getTok().getString();
- if (poss.equals_lower("lo16")) {
- Variant = PPCMCExpr::VK_PPC_LO;
- } else if (poss.equals_lower("hi16")) {
- Variant = PPCMCExpr::VK_PPC_HI;
- } else if (poss.equals_lower("ha16")) {
- Variant = PPCMCExpr::VK_PPC_HA;
- }
- if (Variant != PPCMCExpr::VK_PPC_None) {
- Parser.Lex(); // Eat the xx16
- if (getLexer().isNot(AsmToken::LParen))
- return Error(Parser.getTok().getLoc(), "expected '('");
- Parser.Lex(); // Eat the '('
- }
- break;
- }
-
- if (getParser().parseExpression(EVal))
- return true;
-
- if (Variant != PPCMCExpr::VK_PPC_None) {
- if (getLexer().isNot(AsmToken::RParen))
- return Error(Parser.getTok().getLoc(), "expected ')'");
- Parser.Lex(); // Eat the ')'
- EVal = PPCMCExpr::create(Variant, EVal, getParser().getContext());
- }
- return false;
-}
-
/// ParseOperand
/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
/// rNN for MachO.
@@ -1466,8 +1432,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
switch (getLexer().getKind()) {
// Special handling for register names. These are interpreted
// as immediates corresponding to the register number.
- case AsmToken::Percent:
- Parser.Lex(); // Eat the '%'.
+ case AsmToken::Percent: {
unsigned RegNo;
int64_t IntVal;
if (MatchRegisterName(RegNo, IntVal))
@@ -1475,7 +1440,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
return false;
-
+ }
case AsmToken::Identifier:
case AsmToken::LParen:
case AsmToken::Plus:
@@ -1485,20 +1450,6 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
case AsmToken::Dollar:
case AsmToken::Exclaim:
case AsmToken::Tilde:
- // Note that non-register-name identifiers from the compiler will begin
- // with '_', 'L'/'l' or '"'. Of course, handwritten asm could include
- // identifiers like r31foo - so we fall through in the event that parsing
- // a register name fails.
- if (isDarwin()) {
- unsigned RegNo;
- int64_t IntVal;
- if (!MatchRegisterName(RegNo, IntVal)) {
- Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
- return false;
- }
- }
- // All other expressions
-
if (!ParseExpression(EVal))
break;
// Fall-through
@@ -1537,29 +1488,18 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
int64_t IntVal;
switch (getLexer().getKind()) {
- case AsmToken::Percent:
- Parser.Lex(); // Eat the '%'.
+ case AsmToken::Percent: {
unsigned RegNo;
if (MatchRegisterName(RegNo, IntVal))
return Error(S, "invalid register name");
break;
-
+ }
case AsmToken::Integer:
- if (isDarwin())
- return Error(S, "unexpected integer value");
- else if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 ||
- IntVal > 31)
+ if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 ||
+ IntVal > 31)
return Error(S, "invalid register number");
break;
- case AsmToken::Identifier:
- if (isDarwin()) {
- unsigned RegNo;
- if (!MatchRegisterName(RegNo, IntVal)) {
- break;
- }
- }
- LLVM_FALLTHROUGH;
-
+ case AsmToken::Identifier:
default:
return Error(S, "invalid memory operand");
}
@@ -1643,12 +1583,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
/// ParseDirective parses the PPC specific directives
bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
- if (isDarwin()) {
- if (IDVal == ".machine")
- ParseDarwinDirectiveMachine(DirectiveID.getLoc());
- else
- return true;
- } else if (IDVal == ".word")
+ if (IDVal == ".word")
ParseDirectiveWord(2, DirectiveID);
else if (IDVal == ".llong")
ParseDirectiveWord(8, DirectiveID);
@@ -1720,11 +1655,7 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
// FIXME: Right now, the parser always allows any available
// instruction, so the .machine directive is not useful.
- // Implement ".machine any" (by doing nothing) for the benefit
- // of existing assembler code. Likewise, we can then implement
- // ".machine push" and ".machine pop" as no-op.
- if (CPU != "any" && CPU != "push" && CPU != "pop")
- return TokError("unrecognized machine type");
+ // In the wild, any/push/pop/ppc64/altivec/power[4-9] are seen.
Parser.Lex();
@@ -1739,31 +1670,6 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
return false;
}
-/// ParseDarwinDirectiveMachine (Mach-o platforms)
-/// ::= .machine cpu-identifier
-bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
- MCAsmParser &Parser = getParser();
- if (Parser.getTok().isNot(AsmToken::Identifier) &&
- Parser.getTok().isNot(AsmToken::String))
- return Error(L, "unexpected token in directive");
-
- StringRef CPU = Parser.getTok().getIdentifier();
- Parser.Lex();
-
- // FIXME: this is only the 'default' set of cpu variants.
- // However we don't act on this information at present, this is simply
- // allowing parsing to proceed with minimal sanity checking.
- if (check(CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64", L,
- "unrecognized cpu type") ||
- check(isPPC64() && (CPU == "ppc7400" || CPU == "ppc"), L,
- "wrong cpu type specified for 64bit") ||
- check(!isPPC64() && CPU == "ppc64", L,
- "wrong cpu type specified for 32bit") ||
- parseToken(AsmToken::EndOfStatement))
- return addErrorSuffix(" in '.machine' directive");
- return false;
-}
-
/// ParseDirectiveAbiVersion
/// ::= .abiversion constant-expression
bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
@@ -1809,8 +1715,9 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
/// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmParser() {
RegisterMCAsmParser<PPCAsmParser> A(getThePPC32Target());
- RegisterMCAsmParser<PPCAsmParser> B(getThePPC64Target());
- RegisterMCAsmParser<PPCAsmParser> C(getThePPC64LETarget());
+ RegisterMCAsmParser<PPCAsmParser> B(getThePPC32LETarget());
+ RegisterMCAsmParser<PPCAsmParser> C(getThePPC64Target());
+ RegisterMCAsmParser<PPCAsmParser> D(getThePPC64LETarget());
}
#define GET_REGISTER_MATCHER
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 74c6fd3733f0..3e9286fb0b30 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -54,6 +54,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() {
// Register the disassembler for each target.
TargetRegistry::RegisterMCDisassembler(getThePPC32Target(),
createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(getThePPC32LETarget(),
+ createPPCLEDisassembler);
TargetRegistry::RegisterMCDisassembler(getThePPC64Target(),
createPPCDisassembler);
TargetRegistry::RegisterMCDisassembler(getThePPC64LETarget(),
@@ -167,18 +169,24 @@ static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
-static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, QFRegs);
-}
-
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
return decodeRegisterClass(Inst, RegNo, SPERegs);
}
+static DecodeStatus DecodeACCRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, ACCRegs);
+}
+
+static DecodeStatus DecodeVSRpRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSRpRegs);
+}
+
#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass
@@ -206,6 +214,15 @@ static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo & 1)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(VSRpRegs[RegNo >> 1]));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
// Decode the memri field (imm, reg), which has the low 16-bits as the
@@ -401,14 +418,9 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Read the instruction in the proper endianness.
uint64_t Inst = ReadFunc(Bytes.data());
- if (STI.getFeatureBits()[PPC::FeatureQPX]) {
- DecodeStatus result =
- decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
- if (result != MCDisassembler::Fail)
- return result;
- } else if (STI.getFeatureBits()[PPC::FeatureSPE]) {
+ if (STI.getFeatureBits()[PPC::FeatureSPE]) {
DecodeStatus result =
- decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
+ decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
if (result != MCDisassembler::Fail)
return result;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
new file mode 100644
index 000000000000..e8f8cbfee6ee
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
@@ -0,0 +1,53 @@
+//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the lowering of LLVM calls to machine code calls for
+/// GlobalISel.
+///
+//===----------------------------------------------------------------------===//
+
+#include "PPCCallLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ppc-call-lowering"
+
+using namespace llvm;
+
+PPCCallLowering::PPCCallLowering(const PPCTargetLowering &TLI)
+ : CallLowering(&TLI) {}
+
+bool PPCCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+ const Value *Val, ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI,
+ Register SwiftErrorVReg) const {
+ assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
+ "Return value without a vreg");
+ if (VRegs.size() > 0)
+ return false;
+
+ MIRBuilder.buildInstr(PPC::BLR8);
+ return true;
+}
+
+bool PPCCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
+
+ // If VRegs is empty, then there are no formal arguments to lower and thus can
+ // always return true. If there are formal arguments, we currently do not
+ // handle them and thus return false.
+ return VRegs.empty();
+}
+
+bool PPCCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const {
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
new file mode 100644
index 000000000000..5a449f4cab1b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
@@ -0,0 +1,40 @@
+//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes how to lower LLVM calls to machine code calls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H
+
+#include "PPCISelLowering.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+class PPCTargetLowering;
+
+class PPCCallLowering : public CallLowering {
+public:
+ PPCCallLowering(const PPCTargetLowering &TLI);
+
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
+ Register SwiftErrorVReg) const override;
+ bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
new file mode 100644
index 000000000000..7d64816ed6c7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
@@ -0,0 +1,92 @@
+//===- PPCInstructionSelector.cpp --------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for
+/// PowerPC.
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPCRegisterBankInfo.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ppc-gisel"
+
+using namespace llvm;
+
+namespace {
+
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
+class PPCInstructionSelector : public InstructionSelector {
+public:
+ PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI,
+ const PPCRegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) override;
+ static const char *getName() { return DEBUG_TYPE; }
+
+private:
+ /// tblgen generated 'select' implementation that is used as the initial
+ /// selector for the patterns that do not require complex C++.
+ bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+
+ const PPCInstrInfo &TII;
+ const PPCRegisterInfo &TRI;
+ const PPCRegisterBankInfo &RBI;
+
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
+
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+
+} // end anonymous namespace
+
+#define GET_GLOBALISEL_IMPL
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
+PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM,
+ const PPCSubtarget &STI,
+ const PPCRegisterBankInfo &RBI)
+ : InstructionSelector(), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI),
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
+
+bool PPCInstructionSelector::select(MachineInstr &I) {
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+ return false;
+}
+
+namespace llvm {
+InstructionSelector *
+createPPCInstructionSelector(const PPCTargetMachine &TM,
+ const PPCSubtarget &Subtarget,
+ const PPCRegisterBankInfo &RBI) {
+ return new PPCInstructionSelector(TM, Subtarget, RBI);
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
new file mode 100644
index 000000000000..c16bcaea592b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
@@ -0,0 +1,20 @@
+//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for PowerPC
+//===----------------------------------------------------------------------===//
+
+#include "PPCLegalizerInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ppc-legalinfo"
+
+using namespace llvm;
+using namespace LegalizeActions;
+
+PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) { computeTables(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h
new file mode 100644
index 000000000000..c73186d3d0c1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h
@@ -0,0 +1,28 @@
+//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for PowerPC
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class PPCSubtarget;
+
+/// This class provides the information for the PowerPC target legalizer for
+/// GlobalISel.
+class PPCLegalizerInfo : public LegalizerInfo {
+public:
+ PPCLegalizerInfo(const PPCSubtarget &ST);
+};
+} // namespace llvm
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
new file mode 100644
index 000000000000..6af79324919c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -0,0 +1,27 @@
+//===- PPCRegisterBankInfo.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for
+/// PowerPC.
+//===----------------------------------------------------------------------===//
+
+#include "PPCRegisterBankInfo.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ppc-reg-bank-info"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "PPCGenRegisterBank.inc"
+
+using namespace llvm;
+
+PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI)
+ : PPCGenRegisterBankInfo() {}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
new file mode 100644
index 000000000000..358d5ed3cf14
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
@@ -0,0 +1,39 @@
+//===-- PPCRegisterBankInfo.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for PowerPC.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H
+
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_REGBANK_DECLARATIONS
+#include "PPCGenRegisterBank.inc"
+
+namespace llvm {
+class TargetRegisterInfo;
+
+class PPCGenRegisterBankInfo : public RegisterBankInfo {
+protected:
+#define GET_TARGET_REGBANK_CLASS
+#include "PPCGenRegisterBank.inc"
+};
+
+class PPCRegisterBankInfo final : public PPCGenRegisterBankInfo {
+public:
+ PPCRegisterBankInfo(const TargetRegisterInfo &TRI);
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
new file mode 100644
index 000000000000..0e8a4b7061c5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
@@ -0,0 +1,15 @@
+//===-- PPCRegisterBanks.td - Describe the PPC Banks -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Define the PPC register banks used for GlobalISel.
+///
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers
+def GPRRegBank : RegisterBank<"GPR", [G8RC]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index dbaf221db9fc..72401668c8d0 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -46,6 +46,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
case PPC::fixup_ppc_half16ds:
return Value & 0xfffc;
case PPC::fixup_ppc_pcrel34:
+ case PPC::fixup_ppc_imm34:
return Value & 0x3ffffffff;
}
}
@@ -68,6 +69,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case PPC::fixup_ppc_br24_notoc:
return 4;
case PPC::fixup_ppc_pcrel34:
+ case PPC::fixup_ppc_imm34:
case FK_Data_8:
return 8;
case PPC::fixup_ppc_nofixup:
@@ -100,6 +102,7 @@ public:
{ "fixup_ppc_half16", 0, 16, 0 },
{ "fixup_ppc_half16ds", 0, 14, 0 },
{ "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_imm34", 0, 34, 0 },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
@@ -112,6 +115,7 @@ public:
{ "fixup_ppc_half16", 0, 16, 0 },
{ "fixup_ppc_half16ds", 2, 14, 0 },
{ "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_imm34", 0, 34, 0 },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
@@ -178,12 +182,6 @@ public:
}
}
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- // FIXME.
- return false;
- }
-
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCRelaxableFragment *DF,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index d8b3301e97f1..94ef7b45434f 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -138,6 +138,15 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_GOT_PCREL:
Type = ELF::R_PPC64_GOT_PCREL34;
break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL:
+ Type = ELF::R_PPC64_GOT_TLSGD_PCREL34;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL:
+ Type = ELF::R_PPC64_GOT_TLSLD_PCREL34;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL:
+ Type = ELF::R_PPC64_GOT_TPREL_PCREL34;
+ break;
}
break;
case FK_Data_4:
@@ -407,6 +416,21 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
else
Type = ELF::R_PPC_TLS;
break;
+ case MCSymbolRefExpr::VK_PPC_TLS_PCREL:
+ Type = ELF::R_PPC64_TLS;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_imm34:
+ switch (Modifier) {
+ default:
+ report_fatal_error("Unsupported Modifier for fixup_ppc_imm34.");
+ case MCSymbolRefExpr::VK_DTPREL:
+ Type = ELF::R_PPC64_DTPREL34;
+ break;
+ case MCSymbolRefExpr::VK_TPREL:
+ Type = ELF::R_PPC64_TPREL34;
+ break;
}
break;
case FK_Data_8:
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index 4373778cc96c..386d59266096 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -20,6 +20,7 @@
#include "PPCELFStreamer.h"
+#include "PPCFixupKinds.h"
#include "PPCInstrInfo.h"
#include "PPCMCCodeEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -89,12 +90,33 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst,
PPCMCCodeEmitter *Emitter =
static_cast<PPCMCCodeEmitter*>(getAssembler().getEmitterPtr());
+ // If the instruction is a part of the GOT to PC-Rel link time optimization
+ // instruction pair, return a value, otherwise return None. A true returned
+ // value means the instruction is the PLDpc and a false value means it is
+ // the user instruction.
+ Optional<bool> IsPartOfGOTToPCRelPair = isPartOfGOTToPCRelPair(Inst, STI);
+
+ // User of the GOT-indirect address.
+ // For example, the load that will get the relocation as follows:
+ // .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+ // lwa 3, 4(3)
+ if (IsPartOfGOTToPCRelPair.hasValue() && !IsPartOfGOTToPCRelPair.getValue())
+ emitGOTToPCRelReloc(Inst);
+
// Special handling is only for prefixed instructions.
if (!Emitter->isPrefixedInstruction(Inst)) {
MCELFStreamer::emitInstruction(Inst, STI);
return;
}
emitPrefixedInstruction(Inst, STI);
+
+ // Producer of the GOT-indirect address.
+ // For example, the prefixed load from the got that will get the label as
+ // follows:
+ // pld 3, vec@got@pcrel(0), 1
+ // .Lpcrel1:
+ if (IsPartOfGOTToPCRelPair.hasValue() && IsPartOfGOTToPCRelPair.getValue())
+ emitGOTToPCRelLabel(Inst);
}
void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
@@ -103,6 +125,102 @@ void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
MCELFStreamer::emitLabel(Symbol);
}
+// This linker time GOT PC Relative optimization relocation will look like this:
+// pld <reg> symbol@got@pcrel
+// <Label###>:
+// .reloc Label###-8,R_PPC64_PCREL_OPT,.-(Label###-8)
+// load <loadedreg>, 0(<reg>)
+// The reason we place the label after the PLDpc instruction is that there
+// may be an alignment nop before it since prefixed instructions must not
+// cross a 64-byte boundary (please see
+// PPCELFStreamer::emitPrefixedInstruction()). When referring to the
+// label, we subtract the width of a prefixed instruction (8 bytes) to ensure
+// we refer to the PLDpc.
+void PPCELFStreamer::emitGOTToPCRelReloc(const MCInst &Inst) {
+ // Get the last operand which contains the symbol.
+ const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1);
+ assert(Operand.isExpr() && "Expecting an MCExpr.");
+ // Cast the last operand to MCSymbolRefExpr to get the symbol.
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+ assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT &&
+ "Expecting a symbol of type VK_PPC_PCREL_OPT");
+ MCSymbol *LabelSym =
+ getContext().getOrCreateSymbol(SymExpr->getSymbol().getName());
+ const MCExpr *LabelExpr = MCSymbolRefExpr::create(LabelSym, getContext());
+ const MCExpr *Eight = MCConstantExpr::create(8, getContext());
+ // SubExpr is just Label###-8
+ const MCExpr *SubExpr =
+ MCBinaryExpr::createSub(LabelExpr, Eight, getContext());
+ MCSymbol *CurrentLocation = getContext().createTempSymbol();
+ const MCExpr *CurrentLocationExpr =
+ MCSymbolRefExpr::create(CurrentLocation, getContext());
+ // SubExpr2 is .-(Label###-8)
+ const MCExpr *SubExpr2 =
+ MCBinaryExpr::createSub(CurrentLocationExpr, SubExpr, getContext());
+
+ MCDataFragment *DF = static_cast<MCDataFragment *>(LabelSym->getFragment());
+ assert(DF && "Expecting a valid data fragment.");
+ MCFixupKind FixupKind = static_cast<MCFixupKind>(FirstLiteralRelocationKind +
+ ELF::R_PPC64_PCREL_OPT);
+ DF->getFixups().push_back(
+ MCFixup::create(LabelSym->getOffset() - 8, SubExpr2,
+ FixupKind, Inst.getLoc()));
+ emitLabel(CurrentLocation, Inst.getLoc());
+}
+
+// Emit the label that immediately follows the PLDpc for a link time GOT PC Rel
+// optimization.
+void PPCELFStreamer::emitGOTToPCRelLabel(const MCInst &Inst) {
+ // Get the last operand which contains the symbol.
+ const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1);
+ assert(Operand.isExpr() && "Expecting an MCExpr.");
+ // Cast the last operand to MCSymbolRefExpr to get the symbol.
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+ assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT &&
+ "Expecting a symbol of type VK_PPC_PCREL_OPT");
+ MCSymbol *LabelSym =
+ getContext().getOrCreateSymbol(SymExpr->getSymbol().getName());
+ emitLabel(LabelSym, Inst.getLoc());
+}
+
+// This funciton checks if the parameter Inst is part of the setup for a link
+// time GOT PC Relative optimization. For example in this situation:
+// <MCInst PLDpc <MCOperand Reg:282> <MCOperand Expr:(glob_double@got@pcrel)>
+// <MCOperand Imm:0> <MCOperand Expr:(.Lpcrel@<<invalid>>)>>
+// <MCInst SOME_LOAD <MCOperand Reg:22> <MCOperand Imm:0> <MCOperand Reg:282>
+// <MCOperand Expr:(.Lpcrel@<<invalid>>)>>
+// The above is a pair of such instructions and this function will not return
+// None for either one of them. In both cases we are looking for the last
+// operand <MCOperand Expr:(.Lpcrel@<<invalid>>)> which needs to be an MCExpr
+// and has the flag MCSymbolRefExpr::VK_PPC_PCREL_OPT. After that we just look
+// at the opcode and in the case of PLDpc we will return true. For the load
+// (or store) this function will return false indicating it has found the second
+// instruciton in the pair.
+Optional<bool> llvm::isPartOfGOTToPCRelPair(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ // Need at least two operands.
+ if (Inst.getNumOperands() < 2)
+ return None;
+
+ unsigned LastOp = Inst.getNumOperands() - 1;
+ // The last operand needs to be an MCExpr and it needs to have a variant kind
+ // of VK_PPC_PCREL_OPT. If it does not satisfy these conditions it is not a
+ // link time GOT PC Rel opt instruction and we can ignore it and return None.
+ const MCOperand &Operand = Inst.getOperand(LastOp);
+ if (!Operand.isExpr())
+ return None;
+
+ // Check for the variant kind VK_PPC_PCREL_OPT in this expression.
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+ if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_PPC_PCREL_OPT)
+ return None;
+
+ return (Inst.getOpcode() == PPC::PLDpc);
+}
+
MCELFStreamer *llvm::createPPCELFStreamer(
MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
index 51863232d071..f44200104f32 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
@@ -43,8 +43,15 @@ public:
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
private:
void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
+ void emitGOTToPCRelReloc(const MCInst &Inst);
+ void emitGOTToPCRelLabel(const MCInst &Inst);
};
+// Check if the instruction Inst is part of a pair of instructions that make up
+// a link time GOT PC Rel optimization.
+Optional<bool> isPartOfGOTToPCRelPair(const MCInst &Inst,
+ const MCSubtargetInfo &STI);
+
MCELFStreamer *createPPCELFStreamer(MCContext &Context,
std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 2fb8947fd4e0..73292f7b7938 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -43,6 +43,9 @@ enum Fixups {
// A 34-bit fixup corresponding to PC-relative paddi.
fixup_ppc_pcrel34,
+ // A 34-bit fixup corresponding to Non-PC-relative paddi.
+ fixup_ppc_imm34,
+
/// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the
/// TLS general and local dynamic models, or inserts the thread-pointer
/// register number.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 16da62a74b8c..a291a34d4c52 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -49,18 +49,6 @@ FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden,
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
const char *RegName = getRegisterName(RegNo);
- if (RegName[0] == 'q' /* QPX */) {
- // The system toolchain on the BG/Q does not understand QPX register names
- // in .cfi_* directives, so print the name of the floating-point
- // subregister instead.
- std::string RN(RegName);
-
- RN[0] = 'f';
- OS << RN;
-
- return;
- }
-
OS << RegName;
}
@@ -83,15 +71,45 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
"reference expression if it is an expression at all.");
O << "\taddis ";
- printOperand(MI, 0, O);
+ printOperand(MI, 0, STI, O);
O << ", ";
- printOperand(MI, 2, O);
+ printOperand(MI, 2, STI, O);
O << "(";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ")";
return;
}
+ // Check if the last operand is an expression with the variant kind
+ // VK_PPC_PCREL_OPT. If this is the case then this is a linker optimization
+ // relocation and the .reloc directive needs to be added.
+ unsigned LastOp = MI->getNumOperands() - 1;
+ if (MI->getNumOperands() > 1) {
+ const MCOperand &Operand = MI->getOperand(LastOp);
+ if (Operand.isExpr()) {
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr =
+ static_cast<const MCSymbolRefExpr *>(Expr);
+
+ if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT) {
+ const MCSymbol &Symbol = SymExpr->getSymbol();
+ if (MI->getOpcode() == PPC::PLDpc) {
+ printInstruction(MI, Address, STI, O);
+ O << "\n";
+ Symbol.print(O, &MAI);
+ O << ":";
+ return;
+ } else {
+ O << "\t.reloc ";
+ Symbol.print(O, &MAI);
+ O << "-8,R_PPC64_PCREL_OPT,.-(";
+ Symbol.print(O, &MAI);
+ O << "-8)\n";
+ }
+ }
+ }
+ }
+
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
@@ -106,9 +124,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
SH = 32-SH;
}
if (useSubstituteMnemonic) {
- printOperand(MI, 0, O);
+ printOperand(MI, 0, STI, O);
O << ", ";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ", " << (unsigned int)SH;
printAnnotation(O, Annot);
@@ -123,9 +141,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
if (63-SH == ME) {
O << "\tsldi ";
- printOperand(MI, 0, O);
+ printOperand(MI, 0, STI, O);
O << ", ";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ", " << (unsigned int)SH;
printAnnotation(O, Annot);
return;
@@ -153,9 +171,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (IsBookE && TH != 0 && TH != 16)
O << (unsigned int) TH << ", ";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ", ";
- printOperand(MI, 2, O);
+ printOperand(MI, 2, STI, O);
if (!IsBookE && TH != 0 && TH != 16)
O << ", " << (unsigned int) TH;
@@ -166,29 +184,36 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (MI->getOpcode() == PPC::DCBF) {
unsigned char L = MI->getOperand(0).getImm();
- if (!L || L == 1 || L == 3) {
- O << "\tdcbf";
- if (L == 1 || L == 3)
+ if (!L || L == 1 || L == 3 || L == 4 || L == 6) {
+ O << "\tdcb";
+ if (L != 6)
+ O << "f";
+ if (L == 1)
O << "l";
if (L == 3)
- O << "p";
+ O << "lp";
+ if (L == 4)
+ O << "ps";
+ if (L == 6)
+ O << "stps";
O << " ";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ", ";
- printOperand(MI, 2, O);
+ printOperand(MI, 2, STI, O);
printAnnotation(O, Annot);
return;
}
}
- if (!printAliasInstr(MI, Address, O))
- printInstruction(MI, Address, O);
+ if (!printAliasInstr(MI, Address, STI, O))
+ printInstruction(MI, Address, STI, O);
printAnnotation(O, Annot);
}
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O,
const char *Modifier) {
unsigned Code = MI->getOperand(OpNo).getImm();
@@ -282,10 +307,11 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
assert(StringRef(Modifier) == "reg" &&
"Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!");
- printOperand(MI, OpNo+1, O);
+ printOperand(MI, OpNo + 1, STI, O);
}
void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Code = MI->getOperand(OpNo).getImm();
if (Code == 2)
@@ -295,6 +321,7 @@ void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 1 && "Invalid u1imm argument!");
@@ -302,6 +329,7 @@ void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 3 && "Invalid u2imm argument!");
@@ -309,6 +337,7 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 8 && "Invalid u3imm argument!");
@@ -316,6 +345,7 @@ void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 15 && "Invalid u4imm argument!");
@@ -323,6 +353,7 @@ void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm();
Value = SignExtend32<5>(Value);
@@ -330,6 +361,7 @@ void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value == 0 && "Operand must be zero");
@@ -337,6 +369,7 @@ void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 31 && "Invalid u5imm argument!");
@@ -344,6 +377,7 @@ void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 63 && "Invalid u6imm argument!");
@@ -351,6 +385,7 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 127 && "Invalid u7imm argument!");
@@ -361,12 +396,14 @@ void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo,
// of XXSPLTIB which are unsigned. So we simply truncate to 8 bits and
// print as unsigned.
void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned char Value = MI->getOperand(OpNo).getImm();
O << (unsigned int)Value;
}
void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned short Value = MI->getOperand(OpNo).getImm();
assert(Value <= 1023 && "Invalid u10imm argument!");
@@ -374,6 +411,7 @@ void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned short Value = MI->getOperand(OpNo).getImm();
assert(Value <= 4095 && "Invalid u12imm argument!");
@@ -381,14 +419,16 @@ void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
O << (short)MI->getOperand(OpNo).getImm();
else
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, STI, O);
}
void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm()) {
long long Value = MI->getOperand(OpNo).getImm();
@@ -396,21 +436,24 @@ void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo,
O << (long long)Value;
}
else
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, STI, O);
}
void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
O << (unsigned short)MI->getOperand(OpNo).getImm();
else
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, STI, O);
}
void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
- unsigned OpNo, raw_ostream &O) {
+ unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
if (!MI->getOperand(OpNo).isImm())
- return printOperand(MI, OpNo, O);
+ return printOperand(MI, OpNo, STI, O);
int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
if (PrintBranchImmAsAddress) {
uint64_t Target = Address + Imm;
@@ -433,16 +476,16 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
}
void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
if (!MI->getOperand(OpNo).isImm())
- return printOperand(MI, OpNo, O);
+ return printOperand(MI, OpNo, STI, O);
O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
}
-
void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned CCReg = MI->getOperand(OpNo).getReg();
unsigned RegNo;
switch (CCReg) {
@@ -460,33 +503,37 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- printS16ImmOperand(MI, OpNo, O);
+ printS16ImmOperand(MI, OpNo, STI, O);
O << '(';
if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
O << "0";
else
- printOperand(MI, OpNo+1, O);
+ printOperand(MI, OpNo + 1, STI, O);
O << ')';
}
void PPCInstPrinter::printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- printS34ImmOperand(MI, OpNo, O);
+ printS34ImmOperand(MI, OpNo, STI, O);
O << '(';
- printImmZeroOperand(MI, OpNo + 1, O);
+ printImmZeroOperand(MI, OpNo + 1, STI, O);
O << ')';
}
void PPCInstPrinter::printMemRegImm34(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printS34ImmOperand(MI, OpNo, O);
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printS34ImmOperand(MI, OpNo, STI, O);
O << '(';
- printOperand(MI, OpNo + 1, O);
+ printOperand(MI, OpNo + 1, STI, O);
O << ')';
}
void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
// When used as the base register, r0 reads constant zero rather than
// the value contained in the register. For this reason, the darwin
@@ -494,13 +541,13 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
if (MI->getOperand(OpNo).getReg() == PPC::R0)
O << "0";
else
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, STI, O);
O << ", ";
- printOperand(MI, OpNo+1, O);
+ printOperand(MI, OpNo + 1, STI, O);
}
void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
// On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
// come at the _end_ of the expression.
const MCOperand &Op = MI->getOperand(OpNo);
@@ -513,10 +560,17 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
RefExp = cast<MCSymbolRefExpr>(Op.getExpr());
O << RefExp->getSymbol().getName();
+ // The variant kind VK_PPC_NOTOC needs to be handled as a special case
+ // because we do not want the assembly to print out the @notoc at the
+ // end like __tls_get_addr(x@tlsgd)@notoc. Instead we want it to look
+ // like __tls_get_addr@notoc(x@tlsgd).
+ if (RefExp->getKind() == MCSymbolRefExpr::VK_PPC_NOTOC)
+ O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
O << '(';
- printOperand(MI, OpNo+1, O);
+ printOperand(MI, OpNo + 1, STI, O);
O << ')';
- if (RefExp->getKind() != MCSymbolRefExpr::VK_None)
+ if (RefExp->getKind() != MCSymbolRefExpr::VK_None &&
+ RefExp->getKind() != MCSymbolRefExpr::VK_PPC_NOTOC)
O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
if (ConstExp != nullptr)
O << '+' << ConstExp->getValue();
@@ -525,7 +579,7 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
/// showRegistersWithPercentPrefix - Check if this register name should be
/// printed with a percentage symbol as prefix.
bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const {
- if (!FullRegNamesWithPercent || TT.isOSDarwin() || TT.getOS() == Triple::AIX)
+ if (!FullRegNamesWithPercent || TT.getOS() == Triple::AIX)
return false;
switch (RegName[0]) {
@@ -545,7 +599,7 @@ bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const {
const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
unsigned RegEncoding)
const {
- if (!TT.isOSDarwin() && !FullRegNames)
+ if (!FullRegNames)
return nullptr;
if (RegNum < PPC::CR0EQ || RegNum > PPC::CR7UN)
return nullptr;
@@ -567,11 +621,11 @@ const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
bool PPCInstPrinter::showRegistersWithPrefix() const {
if (TT.getOS() == Triple::AIX)
return false;
- return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames;
+ return FullRegNamesWithPercent || FullRegNames;
}
void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
unsigned Reg = Op.getReg();
@@ -600,4 +654,3 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
assert(Op.isExpr() && "unknown operand kind in printOperand");
Op.getExpr()->print(O, &MAI);
}
-
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index 9763aeceef94..5e9b01494416 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -36,45 +36,73 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O) override;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
unsigned OpIdx, unsigned PrintMethodIdx,
- raw_ostream &OS);
+ const MCSubtargetInfo &STI, raw_ostream &OS);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier = nullptr);
- void printATBitsAsHint(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O,
+ const char *Modifier = nullptr);
+ void printATBitsAsHint(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU7ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printS34ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printImmZeroOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU7ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU8ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU10ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printS34ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printImmZeroOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo,
- raw_ostream &O);
- void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printTLSCall(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
- void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printcrbitm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
- void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemRegImm34(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImm(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMemRegImm34(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMemRegReg(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 593dc2843c3d..2b76af279ce6 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -26,7 +26,8 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
if (is64Bit) {
CodePointerSize = CalleeSaveStackSlotSize = 8;
}
- IsLittleEndian = T.getArch() == Triple::ppc64le;
+ IsLittleEndian =
+ T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
@@ -56,7 +57,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
void PPCXCOFFMCAsmInfo::anchor() {}
PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
- if (T.getArch() == Triple::ppc64le)
+ if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle)
report_fatal_error("XCOFF is not supported for little-endian targets");
CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 27c687686641..48806051f581 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
-#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/MC/MCAsmInfoXCOFF.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index fb65e7320f2b..5f0769fd21f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -44,11 +44,13 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+ if (MO.isReg() || MO.isImm())
+ return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- ((MI.getOpcode() == PPC::BL8_NOTOC)
+ ((MI.getOpcode() == PPC::BL8_NOTOC ||
+ MI.getOpcode() == PPC::BL8_NOTOC_TLS)
? (MCFixupKind)PPC::fixup_ppc_br24_notoc
: (MCFixupKind)PPC::fixup_ppc_br24)));
return 0;
@@ -92,6 +94,16 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
return 0;
}
+unsigned
+PPCMCCodeEmitter::getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(MI.getOperand(OpNo).isReg() && "Operand should be a register");
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI)
+ << 1;
+ return RegBits;
+}
+
unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -104,20 +116,36 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
return 0;
}
-uint64_t
-PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI,
+ MCFixupKind Fixup) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm())
+ assert(!MO.isReg() && "Not expecting a register for this operand.");
+ if (MO.isImm())
return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the immediate field.
- Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_pcrel34));
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Fixup));
return 0;
}
+uint64_t
+PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getImm34Encoding(MI, OpNo, Fixups, STI,
+ (MCFixupKind)PPC::fixup_ppc_imm34);
+}
+
+uint64_t
+PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getImm34Encoding(MI, OpNo, Fixups, STI,
+ (MCFixupKind)PPC::fixup_ppc_pcrel34);
+}
+
unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -213,8 +241,13 @@ PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
(void)SRE;
// Currently these are the only valid PCRelative Relocations.
assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL ||
- SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) &&
- "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL");
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL) &&
+ "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL or "
+ "VK_PPC_GOT_TLSGD_PCREL or VK_PPC_GOT_TLSLD_PCREL or "
+ "VK_PPC_GOT_TPREL_PCREL.");
// Generate the fixup for the relocation.
Fixups.push_back(
MCFixup::create(0, Expr,
@@ -326,8 +359,12 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the TLS register, which simply provides a relocation
// hint to the linker that this statement is part of a relocation sequence.
- // Return the thread-pointer register's encoding.
- Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ // Return the thread-pointer register's encoding. Add a one byte displacement
+ // if using PC relative memops.
+ const MCExpr *Expr = MO.getExpr();
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(Expr);
+ bool IsPCRel = SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS_PCREL;
+ Fixups.push_back(MCFixup::create(IsPCRel ? 1 : 0, Expr,
(MCFixupKind)PPC::fixup_ppc_nofixup));
const Triple &TT = STI.getTargetTriple();
bool isPPC64 = TT.isPPC64();
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index 588aa76bd806..347e163c9515 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -52,7 +52,14 @@ public:
const MCSubtargetInfo &STI) const;
uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI,
+ MCFixupKind Fixup) const;
+ uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -86,6 +93,9 @@ public:
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 3092d56da1c5..bf9c6feb541e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -20,8 +20,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
@@ -30,6 +30,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -77,7 +78,17 @@ static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- return createPPCMCSubtargetInfoImpl(TT, CPU, FS);
+ // Set some default feature to MC layer.
+ std::string FullFS = std::string(FS);
+
+ if (TT.isOSAIX()) {
+ if (!FullFS.empty())
+ FullFS = "+aix," + FullFS;
+ else
+ FullFS = "+aix";
+ }
+
+ return createPPCMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FullFS);
}
static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
@@ -122,11 +133,12 @@ public:
void emitTCEntry(const MCSymbol &S) override {
if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) {
MCSymbolXCOFF *TCSym =
- cast<MCSymbolXCOFF>(Streamer.getContext().getOrCreateSymbol(
- XSym->getSymbolTableName() + "[TC]"));
+ cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
+ ->getQualNameSymbol();
+ OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
+
if (TCSym->hasRename())
Streamer.emitXCOFFRenameDirective(TCSym, TCSym->getSymbolTableName());
- OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
return;
}
@@ -334,8 +346,8 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
- for (Target *T :
- {&getThePPC32Target(), &getThePPC64Target(), &getThePPC64LETarget()}) {
+ for (Target *T : {&getThePPC32Target(), &getThePPC32LETarget(),
+ &getThePPC64Target(), &getThePPC64LETarget()}) {
// Register the MC asm info.
RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 719e005d9813..03b316341717 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -124,6 +124,11 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
#define GET_SUBTARGETINFO_ENUM
#include "PPCGenSubtargetInfo.inc"
+#define PPC_REGS0_7(X) \
+ { \
+ X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7 \
+ }
+
#define PPC_REGS0_31(X) \
{ \
X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \
@@ -156,10 +161,10 @@ using llvm::MCPhysReg;
static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \
static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \
static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \
+ static const MCPhysReg VSRpRegs[32] = PPC_REGS0_31(PPC::VSRp); \
static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
- static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
static const MCPhysReg RRegsNoR0[32] = \
PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
static const MCPhysReg XRegsNoX0[32] = \
@@ -179,8 +184,6 @@ using llvm::MCPhysReg;
PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \
PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \
PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \
- static const MCPhysReg CRRegs[8] = { \
- PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \
- PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7}
-
+ static const MCPhysReg CRRegs[8] = PPC_REGS0_7(PPC::CR); \
+ static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC)
#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index d672d54772e0..77b0331bb14c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -58,14 +58,19 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
switch ((unsigned)Fixup.getKind()) {
default:
report_fatal_error("Unimplemented fixup kind.");
- case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_half16: {
+ const uint8_t SignAndSizeForHalf16 = EncodedSignednessIndicator | 15;
switch (Modifier) {
default:
report_fatal_error("Unsupported modifier for half16 fixup.");
case MCSymbolRefExpr::VK_None:
- return {XCOFF::RelocationType::R_TOC, EncodedSignednessIndicator | 15};
+ return {XCOFF::RelocationType::R_TOC, SignAndSizeForHalf16};
+ case MCSymbolRefExpr::VK_PPC_U:
+ return {XCOFF::RelocationType::R_TOCU, SignAndSizeForHalf16};
+ case MCSymbolRefExpr::VK_PPC_L:
+ return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16};
}
- break;
+ } break;
case PPC::fixup_ppc_br24:
// Branches are 4 byte aligned, so the 24 bits we encode in
// the instruction actually represents a 26 bit offset.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
index d7e3519d5539..63531f72adfb 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -94,7 +94,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instregex "CMPRB(8)?$"),
(instregex "TD(I)?$"),
(instregex "TW(I)?$"),
- (instregex "FCMPU(S|D)$"),
+ (instregex "FCMP(O|U)(S|D)$"),
(instregex "XSTSTDC(S|D)P$"),
FTDIV,
FTSQRT,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
index 7e0aa2c6061d..264582b244a7 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
@@ -20,17 +20,20 @@
#undef PPC
namespace llvm {
- class PPCTargetMachine;
- class PassRegistry;
- class FunctionPass;
- class MachineInstr;
- class MachineOperand;
- class AsmPrinter;
- class MCInst;
- class MCOperand;
- class ModulePass;
-
- FunctionPass *createPPCCTRLoops();
+class PPCRegisterBankInfo;
+class PPCSubtarget;
+class PPCTargetMachine;
+class PassRegistry;
+class FunctionPass;
+class InstructionSelector;
+class MachineInstr;
+class MachineOperand;
+class AsmPrinter;
+class MCInst;
+class MCOperand;
+class ModulePass;
+
+FunctionPass *createPPCCTRLoops();
#ifndef NDEBUG
FunctionPass *createPPCCTRLoopsVerify();
#endif
@@ -44,7 +47,6 @@ namespace llvm {
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCBranchCoalescingPass();
- FunctionPass *createPPCQPXLoadSplatPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
@@ -68,7 +70,6 @@ namespace llvm {
void initializePPCReduceCRLogicalsPass(PassRegistry&);
void initializePPCBSelPass(PassRegistry&);
void initializePPCBranchCoalescingPass(PassRegistry&);
- void initializePPCQPXLoadSplatPass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
void initializePPCPreEmitPeepholePass(PassRegistry &);
@@ -80,7 +81,10 @@ namespace llvm {
ModulePass *createPPCLowerMASSVEntriesPass();
void initializePPCLowerMASSVEntriesPass(PassRegistry &);
extern char &PPCLowerMASSVEntriesID;
-
+
+ InstructionSelector *
+ createPPCInstructionSelector(const PPCTargetMachine &, const PPCSubtarget &,
+ const PPCRegisterBankInfo &);
namespace PPCII {
/// Target Operand Flag enum.
@@ -107,6 +111,37 @@ namespace llvm {
/// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL.
MO_GOT_FLAG = 8,
+ // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a
+ // PC Relative linker optimization.
+ MO_PCREL_OPT_FLAG = 16,
+
+ /// MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to
+ /// TLS General Dynamic model.
+ MO_TLSGD_FLAG = 32,
+
+ /// MO_TPREL_FLAG - If this bit is set the symbol reference is relative to
+ /// TLS Initial Exec model.
+ MO_TPREL_FLAG = 64,
+
+ /// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to
+ /// TLS Local Dynamic model.
+ MO_TLSLD_FLAG = 128,
+
+ /// MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set
+ /// they should produce the relocation @got@tlsgd@pcrel.
+ /// Fix up is VK_PPC_GOT_TLSGD_PCREL
+ MO_GOT_TLSGD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSGD_FLAG,
+
+ /// MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set
+ /// they should produce the relocation @got@tlsld@pcrel.
+ /// Fix up is VK_PPC_GOT_TLSLD_PCREL
+ MO_GOT_TLSLD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSLD_FLAG,
+
+ /// MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set
+ /// they should produce the relocation @got@tprel@pcrel.
+ /// Fix up is VK_PPC_GOT_TPREL_PCREL
+ MO_GOT_TPREL_PCREL_FLAG = MO_GOT_FLAG | MO_TPREL_FLAG | MO_PCREL_FLAG,
+
/// The next are not flags but distinct values.
MO_ACCESS_MASK = 0xf00,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
index 9ad78bf67fe6..1e6ded231585 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
@@ -57,6 +57,10 @@ def DirectivePwrFuture
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
+def AIXOS: SubtargetFeature<"aix", "IsAIX", "true", "AIX OS">;
+def FeatureModernAIXAs
+ : SubtargetFeature<"modern-aix-as", "HasModernAIXAs", "true",
+ "AIX system assembler is modern enough to support new mnes">;
def FeatureHardFloat : SubtargetFeature<"hard-float", "HasHardFloat", "true",
"Enable floating-point instructions">;
def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
@@ -72,6 +76,9 @@ def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true",
"Enable SPE instructions",
[FeatureHardFloat]>;
+def FeatureEFPU2 : SubtargetFeature<"efpu2", "HasEFPU2", "true",
+ "Enable Embedded Floating-Point APU 2 instructions",
+ [FeatureSPE]>;
def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
@@ -132,9 +139,6 @@ def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
"Enable PPC 4xx instructions">;
def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
"Enable PPC 6xx instructions">;
-def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
- "Enable QPX instructions",
- [FeatureFPU]>;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
@@ -177,6 +181,9 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
"HasAddisLoadFusion", "true",
"Power8 Addis-Load fusion",
[FeatureFusion]>;
+def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true",
+ "Target supports store clustering",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
@@ -193,7 +200,7 @@ def FeatureFloat128 :
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD",
"POPCNTD_Fast",
"Enable the popcnt[dw] instructions">;
-// Note that for the a2/a2q processor models we should not use popcnt[dw] by
+// Note that for the a2 processor models we should not use popcnt[dw] by
// default. These processors do support the instructions, but they're
// microcoded, and the software emulation is about twice as fast.
def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD",
@@ -236,7 +243,15 @@ def FeaturePrefixInstrs : SubtargetFeature<"prefix-instrs", "HasPrefixInstrs",
def FeaturePCRelativeMemops :
SubtargetFeature<"pcrelative-memops", "HasPCRelativeMemops", "true",
"Enable PC relative Memory Ops",
+ [FeatureISA3_0, FeaturePrefixInstrs]>;
+def FeaturePairedVectorMemops:
+ SubtargetFeature<"paired-vector-memops", "PairedVectorMemops", "true",
+ "32Byte load and store instructions",
[FeatureISA3_0]>;
+def FeatureMMA : SubtargetFeature<"mma", "HasMMA", "true",
+ "Enable MMA instructions",
+ [FeatureP8Vector, FeatureP9Altivec,
+ FeaturePairedVectorMemops]>;
def FeaturePredictableSelectIsExpensive :
SubtargetFeature<"predictable-select-expensive",
@@ -320,6 +335,8 @@ def ProcessorFeatures {
[DirectivePwr9,
FeatureP9Altivec,
FeatureP9Vector,
+ FeaturePPCPreRASched,
+ FeaturePPCPostRASched,
FeatureISA3_0,
FeaturePredictableSelectIsExpensive
];
@@ -329,9 +346,7 @@ def ProcessorFeatures {
// dispatch for vector operations than scalar ones. For the time being,
// this list also includes scheduling-related features since we do not have
// enough info to create custom scheduling strategies for future CPUs.
- list<SubtargetFeature> P9SpecificFeatures = [FeatureVectorsUseTwoUnits,
- FeaturePPCPreRASched,
- FeaturePPCPostRASched];
+ list<SubtargetFeature> P9SpecificFeatures = [FeatureVectorsUseTwoUnits];
list<SubtargetFeature> P9InheritableFeatures =
!listconcat(P8InheritableFeatures, P9AdditionalFeatures);
list<SubtargetFeature> P9Features =
@@ -340,9 +355,12 @@ def ProcessorFeatures {
// Power10
// For P10 CPU we assume that all of the existing features from Power9
// still exist with the exception of those we know are Power9 specific.
+ list<SubtargetFeature> FusionFeatures = [FeatureStoreFusion];
list<SubtargetFeature> P10AdditionalFeatures =
- [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
- FeaturePCRelativeMemops, FeatureP10Vector];
+ !listconcat(FusionFeatures, [
+ DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
+ FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA,
+ FeaturePairedVectorMemops]);
list<SubtargetFeature> P10SpecificFeatures = [];
list<SubtargetFeature> P10InheritableFeatures =
!listconcat(P9InheritableFeatures, P10AdditionalFeatures);
@@ -427,6 +445,7 @@ def getAltVSXFMAOpcode : InstrMapping {
include "PPCRegisterInfo.td"
include "PPCSchedule.td"
+include "GISel/PPCRegisterBanks.td"
//===----------------------------------------------------------------------===//
// PowerPC processors supported.
@@ -514,15 +533,6 @@ def : ProcessorModel<"a2", PPCA2Model,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>;
-def : ProcessorModel<"a2q", PPCA2Model,
- [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
- FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
- FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
- FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */, FeatureQPX,
- FeatureMFTB]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
@@ -561,7 +571,7 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
// No scheduler model yet.
-def : ProcessorModel<"pwr10", NoSchedModel, ProcessorFeatures.P10Features>;
+def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>;
// No scheduler model for future CPU.
def : ProcessorModel<"future", NoSchedModel,
ProcessorFeatures.FutureFeatures>;
@@ -592,6 +602,13 @@ def PPCInstrInfo : InstrInfo {
let noNamedPositionallyEncodedOperands = 1;
}
+def PPCAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 1;
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
def PPCAsmParser : AsmParser {
let ShouldEmitMatchRegisterName = 0;
}
@@ -610,6 +627,7 @@ def PPC : Target {
// Information about the instructions.
let InstructionSet = PPCInstrInfo;
+ let AssemblyWriters = [PPCAsmWriter];
let AssemblyParsers = [PPCAsmParser];
let AssemblyParserVariants = [PPCAsmParserVariant];
let AllowRegisterRenaming = 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index bf5fe741bac8..6257709731b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -27,11 +27,11 @@
#include "PPCTargetStreamer.h"
#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MachO.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -47,11 +47,11 @@
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -62,9 +62,11 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -72,6 +74,7 @@
#include <new>
using namespace llvm;
+using namespace llvm::XCOFF;
#define DEBUG_TYPE "asmprinter"
@@ -147,7 +150,21 @@ public:
class PPCAIXAsmPrinter : public PPCAsmPrinter {
private:
+ /// Symbols lowered from ExternalSymbolSDNodes, we will need to emit extern
+ /// linkage for them in AIX.
+ SmallPtrSet<MCSymbol *, 8> ExtSymSDNodeSymbols;
+
+ /// A format indicator and unique trailing identifier to form part of the
+ /// sinit/sterm function names.
+ std::string FormatIndicatorAndUniqueModId;
+
static void ValidateGV(const GlobalVariable *GV);
+ // Record a list of GlobalAlias associated with a GlobalObject.
+ // This is used for AIX's extra-label-at-definition aliasing strategy.
+ DenseMap<const GlobalObject *, SmallVector<const GlobalAlias *, 1>>
+ GOAliasMap;
+
+ void emitTracebackTable();
public:
PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
@@ -161,15 +178,28 @@ public:
bool doInitialization(Module &M) override;
+ void emitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool IsCtor) override;
+
void SetupMachineFunction(MachineFunction &MF) override;
void emitGlobalVariable(const GlobalVariable *GV) override;
void emitFunctionDescriptor() override;
+ void emitFunctionEntryLabel() override;
+
+ void emitFunctionBodyEnd() override;
+
void emitEndOfAsmFile(Module &) override;
void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override;
+
+ void emitInstruction(const MachineInstr *MI) override;
+
+ bool doFinalization(Module &M) override;
+
+ void emitTTypeReference(const GlobalValue *GV, unsigned Encoding) override;
};
} // end anonymous namespace
@@ -291,6 +321,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
O << "0, ";
printOperand(MI, OpNo, O);
return false;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
case 'U': // Print 'u' for update form.
case 'X': // Print 'x' for indexed form.
// FIXME: Currently for PowerPC memory operands are always loaded
@@ -463,6 +499,14 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ unsigned Opcode = PPC::BL8_NOP_TLS;
+
+ assert(MI->getNumOperands() >= 3 && "Expecting at least 3 operands from MI");
+ if (MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
+ MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG) {
+ Kind = MCSymbolRefExpr::VK_PPC_NOTOC;
+ Opcode = PPC::BL8_NOTOC_TLS;
+ }
const Module *M = MF->getFunction().getParent();
assert(MI->getOperand(0).isReg() &&
@@ -490,10 +534,10 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, VK, OutContext);
EmitToStreamer(*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ?
- PPC::BL8_NOP_TLS : PPC::BL_TLS)
- .addExpr(TlsRef)
- .addExpr(SymVar));
+ MCInstBuilder(Subtarget->isPPC64() ? Opcode
+ : (unsigned)PPC::BL_TLS)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
}
/// Map a machine operand for a TOC pseudo-machine instruction to its
@@ -533,9 +577,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
if (Subtarget->hasSPE()) {
if (PPC::F4RCRegClass.contains(Reg) ||
PPC::F8RCRegClass.contains(Reg) ||
- PPC::QBRCRegClass.contains(Reg) ||
- PPC::QFRCRegClass.contains(Reg) ||
- PPC::QSRCRegClass.contains(Reg) ||
PPC::VFRCRegClass.contains(Reg) ||
PPC::VRRCRegClass.contains(Reg) ||
PPC::VSFRCRegClass.contains(Reg) ||
@@ -550,6 +591,38 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
}
#endif
+
+ auto getTOCRelocAdjustedExprForXCOFF = [this](const MCExpr *Expr,
+ ptrdiff_t OriginalOffset) {
+ // Apply an offset to the TOC-based expression such that the adjusted
+ // notional offset from the TOC base (to be encoded into the instruction's D
+ // or DS field) is the signed 16-bit truncation of the original notional
+ // offset from the TOC base.
+ // This is consistent with the treatment used both by XL C/C++ and
+ // by AIX ld -r.
+ ptrdiff_t Adjustment =
+ OriginalOffset - llvm::SignExtend32<16>(OriginalOffset);
+ return MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(-Adjustment, OutContext), OutContext);
+ };
+
+ auto getTOCEntryLoadingExprForXCOFF =
+ [IsPPC64, getTOCRelocAdjustedExprForXCOFF,
+ this](const MCSymbol *MOSymbol, const MCExpr *Expr) -> const MCExpr * {
+ const unsigned EntryByteSize = IsPPC64 ? 8 : 4;
+ const auto TOCEntryIter = TOC.find(MOSymbol);
+ assert(TOCEntryIter != TOC.end() &&
+ "Could not find the TOC entry for this symbol.");
+ const ptrdiff_t EntryDistanceFromTOCBase =
+ (TOCEntryIter - TOC.begin()) * EntryByteSize;
+ constexpr int16_t PositiveTOCRange = INT16_MAX;
+
+ if (EntryDistanceFromTOCBase > PositiveTOCRange)
+ return getTOCRelocAdjustedExprForXCOFF(Expr, EntryDistanceFromTOCBase);
+
+ return Expr;
+ };
+
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
@@ -696,6 +769,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
assert(
TM.getCodeModel() == CodeModel::Small &&
"This pseudo should only be selected for 32-bit small code model.");
+ Exp = getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
@@ -724,17 +798,20 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
"Invalid operand!");
+ // Map the operand to its corresponding MCSymbol.
+ const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
// Map the machine operand to its corresponding MCSymbol, then map the
// global address operand to be a reference to the TOC entry we will
// synthesize later.
- MCSymbol *TOCEntry =
- lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
const MCSymbolRefExpr::VariantKind VK =
IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
const MCExpr *Exp =
MCSymbolRefExpr::create(TOCEntry, VK, OutContext);
- TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ TmpInst.getOperand(1) = MCOperand::createExpr(
+ IsAIX ? getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp) : Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
@@ -1010,6 +1087,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::GETtlsADDR:
// Transform: %x3 = GETtlsADDR %x3, @sym
// Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
+ case PPC::GETtlsADDRPCREL:
case PPC::GETtlsADDR32: {
// Transform: %r3 = GETtlsADDR32 %r3, @sym
// Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
@@ -1055,6 +1133,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::GETtlsldADDR:
// Transform: %x3 = GETtlsldADDR %x3, @sym
// Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
+ case PPC::GETtlsldADDRPCREL:
case PPC::GETtlsldADDR32: {
// Transform: %r3 = GETtlsldADDR32 %r3, @sym
// Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT
@@ -1081,6 +1160,21 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addExpr(SymDtprel));
return;
}
+ case PPC::PADDIdtprel: {
+ // Transform: %rd = PADDIdtprel %rs, @sym
+ // Into: %rd = PADDI8 %rs, sym@dtprel
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymDtprel = MCSymbolRefExpr::create(
+ MOSymbol, MCSymbolRefExpr::VK_DTPREL, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::PADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
+ return;
+ }
+
case PPC::ADDIdtprelL:
// Transform: %xd = ADDIdtprelL %xs, @sym
// Into: %xd = ADDI8 %xs, sym@dtprel@l
@@ -1137,10 +1231,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::LWA: {
// Verify alignment is legal, so we don't create relocations
// that can't be supported.
- // FIXME: This test is currently disabled for Darwin. The test
- // suite shows a handful of test cases that fail this check for
- // Darwin. Those need to be investigated before this sanity test
- // can be enabled for those subtargets.
unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isGlobal()) {
@@ -1621,17 +1711,19 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
assert(LinkageAttr != MCSA_Invalid && "LinkageAttr should not MCSA_Invalid.");
MCSymbolAttr VisibilityAttr = MCSA_Invalid;
- switch (GV->getVisibility()) {
+ if (!TM.getIgnoreXCOFFVisibility()) {
+ switch (GV->getVisibility()) {
- // TODO: "exported" and "internal" Visibility needs to go here.
- case GlobalValue::DefaultVisibility:
- break;
- case GlobalValue::HiddenVisibility:
- VisibilityAttr = MAI->getHiddenVisibilityAttr();
- break;
- case GlobalValue::ProtectedVisibility:
- VisibilityAttr = MAI->getProtectedVisibilityAttr();
- break;
+ // TODO: "exported" and "internal" Visibility needs to go here.
+ case GlobalValue::DefaultVisibility:
+ break;
+ case GlobalValue::HiddenVisibility:
+ VisibilityAttr = MAI->getHiddenVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ VisibilityAttr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
}
OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
@@ -1650,18 +1742,305 @@ void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
return AsmPrinter::SetupMachineFunction(MF);
}
+void PPCAIXAsmPrinter::emitFunctionBodyEnd() {
+
+ if (!TM.getXCOFFTracebackTable())
+ return;
+
+ emitTracebackTable();
+}
+
+void PPCAIXAsmPrinter::emitTracebackTable() {
+
+ // Create a symbol for the end of function.
+ MCSymbol *FuncEnd = createTempSymbol(MF->getName());
+ OutStreamer->emitLabel(FuncEnd);
+
+ OutStreamer->AddComment("Traceback table begin");
+ // Begin with a fullword of zero.
+ OutStreamer->emitIntValueInHexWithPadding(0, 4 /*size*/);
+
+ SmallString<128> CommentString;
+ raw_svector_ostream CommentOS(CommentString);
+
+ auto EmitComment = [&]() {
+ OutStreamer->AddComment(CommentOS.str());
+ CommentString.clear();
+ };
+
+ auto EmitCommentAndValue = [&](uint64_t Value, int Size) {
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(Value, Size);
+ };
+
+ unsigned int Version = 0;
+ CommentOS << "Version = " << Version;
+ EmitCommentAndValue(Version, 1);
+
+ // There is a lack of information in the IR to assist with determining the
+ // source language. AIX exception handling mechanism would only search for
+ // personality routine and LSDA area when such language supports exception
+ // handling. So to be conservatively correct and allow runtime to do its job,
+ // we need to set it to C++ for now.
+ TracebackTable::LanguageID LanguageIdentifier =
+ TracebackTable::CPlusPlus; // C++
+
+ CommentOS << "Language = "
+ << getNameForTracebackTableLanguageId(LanguageIdentifier);
+ EmitCommentAndValue(LanguageIdentifier, 1);
+
+ // This is only populated for the third and fourth bytes.
+ uint32_t FirstHalfOfMandatoryField = 0;
+
+ // Emit the 3rd byte of the mandatory field.
+
+ // We always set traceback offset bit to true.
+ FirstHalfOfMandatoryField |= TracebackTable::HasTraceBackTableOffsetMask;
+
+ const PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Check the function uses floating-point processor instructions or not
+ for (unsigned Reg = PPC::F0; Reg <= PPC::F31; ++Reg) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ FirstHalfOfMandatoryField |= TracebackTable::IsFloatingPointPresentMask;
+ break;
+ }
+ }
+
+#define GENBOOLCOMMENT(Prefix, V, Field) \
+ CommentOS << (Prefix) << ((V) & (TracebackTable::Field##Mask) ? "+" : "-") \
+ << #Field
+
+#define GENVALUECOMMENT(PrefixAndName, V, Field) \
+ CommentOS << (PrefixAndName) << " = " \
+ << static_cast<unsigned>(((V) & (TracebackTable::Field##Mask)) >> \
+ (TracebackTable::Field##Shift))
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsGlobaLinkage);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsOutOfLineEpilogOrPrologue);
+ EmitComment();
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, HasTraceBackTableOffset);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsInternalProcedure);
+ EmitComment();
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, HasControlledStorage);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsTOCless);
+ EmitComment();
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsFloatingPointPresent);
+ EmitComment();
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField,
+ IsFloatingPointOperationLogOrAbortEnabled);
+ EmitComment();
+
+ OutStreamer->emitIntValueInHexWithPadding(
+ (FirstHalfOfMandatoryField & 0x0000ff00) >> 8, 1);
+
+ // Set the 4th byte of the mandatory field.
+ FirstHalfOfMandatoryField |= TracebackTable::IsFunctionNamePresentMask;
+
+ static_assert(XCOFF::AllocRegNo == 31, "Unexpected register usage!");
+ if (MRI.isPhysRegUsed(Subtarget->isPPC64() ? PPC::X31 : PPC::R31))
+ FirstHalfOfMandatoryField |= TracebackTable::IsAllocaUsedMask;
+
+ const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
+ if (!MustSaveCRs.empty())
+ FirstHalfOfMandatoryField |= TracebackTable::IsCRSavedMask;
+
+ if (FI->mustSaveLR())
+ FirstHalfOfMandatoryField |= TracebackTable::IsLRSavedMask;
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsInterruptHandler);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsFunctionNamePresent);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsAllocaUsed);
+ EmitComment();
+ GENVALUECOMMENT("OnConditionDirective", FirstHalfOfMandatoryField,
+ OnConditionDirective);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsCRSaved);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsLRSaved);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding((FirstHalfOfMandatoryField & 0xff),
+ 1);
+
+ // Set the 5th byte of mandatory field.
+ uint32_t SecondHalfOfMandatoryField = 0;
+
+ // Always store back chain.
+ SecondHalfOfMandatoryField |= TracebackTable::IsBackChainStoredMask;
+
+ uint32_t FPRSaved = 0;
+ for (unsigned Reg = PPC::F14; Reg <= PPC::F31; ++Reg) {
+ if (MRI.isPhysRegModified(Reg)) {
+ FPRSaved = PPC::F31 - Reg + 1;
+ break;
+ }
+ }
+ SecondHalfOfMandatoryField |= (FPRSaved << TracebackTable::FPRSavedShift) &
+ TracebackTable::FPRSavedMask;
+ GENBOOLCOMMENT("", SecondHalfOfMandatoryField, IsBackChainStored);
+ GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, IsFixup);
+ GENVALUECOMMENT(", NumOfFPRsSaved", SecondHalfOfMandatoryField, FPRSaved);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(
+ (SecondHalfOfMandatoryField & 0xff000000) >> 24, 1);
+
+ // Set the 6th byte of mandatory field.
+ bool ShouldEmitEHBlock = TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF);
+ if (ShouldEmitEHBlock)
+ SecondHalfOfMandatoryField |= TracebackTable::HasExtensionTableMask;
+
+ uint32_t GPRSaved = 0;
+
+ // X13 is reserved under 64-bit environment.
+ unsigned GPRBegin = Subtarget->isPPC64() ? PPC::X14 : PPC::R13;
+ unsigned GPREnd = Subtarget->isPPC64() ? PPC::X31 : PPC::R31;
+
+ for (unsigned Reg = GPRBegin; Reg <= GPREnd; ++Reg) {
+ if (MRI.isPhysRegModified(Reg)) {
+ GPRSaved = GPREnd - Reg + 1;
+ break;
+ }
+ }
+
+ SecondHalfOfMandatoryField |= (GPRSaved << TracebackTable::GPRSavedShift) &
+ TracebackTable::GPRSavedMask;
+
+ GENBOOLCOMMENT("", SecondHalfOfMandatoryField, HasVectorInfo);
+ GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, HasExtensionTable);
+ GENVALUECOMMENT(", NumOfGPRsSaved", SecondHalfOfMandatoryField, GPRSaved);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(
+ (SecondHalfOfMandatoryField & 0x00ff0000) >> 16, 1);
+
+ // Set the 7th byte of mandatory field.
+ uint32_t NumberOfFixedPara = FI->getFixedParamNum();
+ SecondHalfOfMandatoryField |=
+ (NumberOfFixedPara << TracebackTable::NumberOfFixedParmsShift) &
+ TracebackTable::NumberOfFixedParmsMask;
+ GENVALUECOMMENT("NumberOfFixedParms", SecondHalfOfMandatoryField,
+ NumberOfFixedParms);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(
+ (SecondHalfOfMandatoryField & 0x0000ff00) >> 8, 1);
+
+ // Set the 8th byte of mandatory field.
+
+ // Always set parameter on stack.
+ SecondHalfOfMandatoryField |= TracebackTable::HasParmsOnStackMask;
+
+ uint32_t NumberOfFPPara = FI->getFloatingPointParamNum();
+ SecondHalfOfMandatoryField |=
+ (NumberOfFPPara << TracebackTable::NumberOfFloatingPointParmsShift) &
+ TracebackTable::NumberOfFloatingPointParmsMask;
+
+ GENVALUECOMMENT("NumberOfFPParms", SecondHalfOfMandatoryField,
+ NumberOfFloatingPointParms);
+ GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, HasParmsOnStack);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(SecondHalfOfMandatoryField & 0xff,
+ 1);
+
+ // Generate the optional fields of traceback table.
+
+ // Parameter type.
+ if (NumberOfFixedPara || NumberOfFPPara) {
+ assert((SecondHalfOfMandatoryField & TracebackTable::HasVectorInfoMask) ==
+ 0 &&
+ "VectorInfo has not been implemented.");
+ uint32_t ParaType = FI->getParameterType();
+ CommentOS << "Parameter type = "
+ << XCOFF::parseParmsType(ParaType,
+ NumberOfFixedPara + NumberOfFPPara);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(ParaType, sizeof(ParaType));
+ }
+
+ // Traceback table offset.
+ OutStreamer->AddComment("Function size");
+ if (FirstHalfOfMandatoryField & TracebackTable::HasTraceBackTableOffsetMask) {
+ MCSymbol *FuncSectSym = getObjFileLowering().getFunctionEntryPointSymbol(
+ &(MF->getFunction()), TM);
+ OutStreamer->emitAbsoluteSymbolDiff(FuncEnd, FuncSectSym, 4);
+ }
+
+ // Since we unset the Int_Handler.
+ if (FirstHalfOfMandatoryField & TracebackTable::IsInterruptHandlerMask)
+ report_fatal_error("Hand_Mask not implement yet");
+
+ if (FirstHalfOfMandatoryField & TracebackTable::HasControlledStorageMask)
+ report_fatal_error("Ctl_Info not implement yet");
+
+ if (FirstHalfOfMandatoryField & TracebackTable::IsFunctionNamePresentMask) {
+ StringRef Name = MF->getName().substr(0, INT16_MAX);
+ int16_t NameLength = Name.size();
+ CommentOS << "Function name len = "
+ << static_cast<unsigned int>(NameLength);
+ EmitCommentAndValue(NameLength, 2);
+ OutStreamer->AddComment("Function Name");
+ OutStreamer->emitBytes(Name);
+ }
+
+ if (FirstHalfOfMandatoryField & TracebackTable::IsAllocaUsedMask) {
+ uint8_t AllocReg = XCOFF::AllocRegNo;
+ OutStreamer->AddComment("AllocaUsed");
+ OutStreamer->emitIntValueInHex(AllocReg, sizeof(AllocReg));
+ }
+
+ uint8_t ExtensionTableFlag = 0;
+ if (SecondHalfOfMandatoryField & TracebackTable::HasExtensionTableMask) {
+ if (ShouldEmitEHBlock)
+ ExtensionTableFlag |= ExtendedTBTableFlag::TB_EH_INFO;
+
+ CommentOS << "ExtensionTableFlag = "
+ << getExtendedTBTableFlagString(ExtensionTableFlag);
+ EmitCommentAndValue(ExtensionTableFlag, sizeof(ExtensionTableFlag));
+ }
+
+ if (ExtensionTableFlag & ExtendedTBTableFlag::TB_EH_INFO) {
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *EHInfoSym =
+ TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF);
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym);
+ const MCSymbol *TOCBaseSym =
+ cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
+ const MCExpr *Exp =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx),
+ MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx);
+
+ const DataLayout &DL = getDataLayout();
+ OutStreamer->emitValueToAlignment(4);
+ OutStreamer->AddComment("EHInfo Table");
+ OutStreamer->emitValue(Exp, DL.getPointerSize());
+ }
+
+#undef GENBOOLCOMMENT
+#undef GENVALUECOMMENT
+}
+
void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) {
// Early error checking limiting what is supported.
if (GV->isThreadLocal())
report_fatal_error("Thread local not yet supported on AIX.");
- if (GV->hasSection())
- report_fatal_error("Custom section for Data not yet supported.");
-
if (GV->hasComdat())
report_fatal_error("COMDAT not yet supported by AIX.");
}
+static bool isSpecialLLVMGlobalArrayToSkip(const GlobalVariable *GV) {
+ return GV->hasAppendingLinkage() &&
+ StringSwitch<bool>(GV->getName())
+ // TODO: Linker could still eliminate the GV if we just skip
+ // handling llvm.used array. Skipping them for now until we or the
+ // AIX OS team come up with a good solution.
+ .Case("llvm.used", true)
+ // It's correct to just skip llvm.compiler.used array here.
+ .Case("llvm.compiler.used", true)
+ .Default(false);
+}
+
static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
return StringSwitch<bool>(GV->getName())
.Cases("llvm.global_ctors", "llvm.global_dtors", true)
@@ -1669,19 +2048,15 @@ static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
}
void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
- ValidateGV(GV);
-
- // TODO: Update the handling of global arrays for static init when we support
- // the ".ref" directive.
- // Otherwise, we can skip these arrays, because the AIX linker collects
- // static init functions simply based on their name.
- if (isSpecialLLVMGlobalArrayForStaticInit(GV))
+ // Special LLVM global arrays have been handled at the initialization.
+ if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV))
return;
- // Create the symbol, set its storage class.
+ assert(!GV->getName().startswith("llvm.") &&
+ "Unhandled intrinsic global variable.");
+ ValidateGV(GV);
+
MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
- GVSym->setStorageClass(
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
if (GV->isDeclarationForLinker()) {
emitLinkage(GV, GVSym);
@@ -1705,10 +2080,12 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (GVKind.isCommon() || GVKind.isBSSLocal()) {
Align Alignment = GV->getAlign().getValueOr(DL.getPreferredAlign(GV));
uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+ GVSym->setStorageClass(
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
if (GVKind.isBSSLocal())
OutStreamer->emitXCOFFLocalCommonSymbol(
- OutContext.getOrCreateSymbol(GVSym->getUnqualifiedName()), Size,
+ OutContext.getOrCreateSymbol(GVSym->getSymbolTableName()), Size,
GVSym, Alignment.value());
else
OutStreamer->emitCommonSymbol(GVSym, Size, Alignment.value());
@@ -1718,7 +2095,18 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
MCSymbol *EmittedInitSym = GVSym;
emitLinkage(GV, EmittedInitSym);
emitAlignment(getGVAlignment(GV, DL), GV);
- OutStreamer->emitLabel(EmittedInitSym);
+
+ // When -fdata-sections is enabled, every GlobalVariable will
+ // be put into its own csect; therefore, label is not necessary here.
+ if (!TM.getDataSections() || GV->hasSection()) {
+ OutStreamer->emitLabel(EmittedInitSym);
+ }
+
+ // Emit aliasing label for global variable.
+ llvm::for_each(GOAliasMap[GV], [this](const GlobalAlias *Alias) {
+ OutStreamer->emitLabel(getSymbol(Alias));
+ });
+
emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
}
@@ -1730,6 +2118,13 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() {
// Emit function descriptor.
OutStreamer->SwitchSection(
cast<MCSymbolXCOFF>(CurrentFnDescSym)->getRepresentedCsect());
+
+ // Emit aliasing label for function descriptor csect.
+ llvm::for_each(GOAliasMap[&MF->getFunction()],
+ [this](const GlobalAlias *Alias) {
+ OutStreamer->emitLabel(getSymbol(Alias));
+ });
+
// Emit function entry point address.
OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
PointerSize);
@@ -1745,6 +2140,20 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() {
OutStreamer->SwitchSection(Current.first, Current.second);
}
+void PPCAIXAsmPrinter::emitFunctionEntryLabel() {
+ // It's not necessary to emit the label when we have individual
+ // function in its own csect.
+ if (!TM.getFunctionSections())
+ PPCAsmPrinter::emitFunctionEntryLabel();
+
+ // Emit aliasing label for function entry point label.
+ llvm::for_each(
+ GOAliasMap[&MF->getFunction()], [this](const GlobalAlias *Alias) {
+ OutStreamer->emitLabel(
+ getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM));
+ });
+}
+
void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
// If there are no functions in this module, we will never need to reference
// the TOC base.
@@ -1757,20 +2166,10 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
- const unsigned EntryByteSize = Subtarget->isPPC64() ? 8 : 4;
- const unsigned TOCEntriesByteSize = TOC.size() * EntryByteSize;
- // TODO: If TOC entries' size is larger than 32768, then we run out of
- // positive displacement to reach the TOC entry. We need to decide how to
- // handle entries' size larger than that later.
- if (TOCEntriesByteSize > 32767) {
- report_fatal_error("Handling of TOC entry displacement larger than 32767 "
- "is not yet implemented.");
- }
-
for (auto &I : TOC) {
// Setup the csect for the current TC entry.
MCSectionXCOFF *TCEntry = cast<MCSectionXCOFF>(
- getObjFileLowering().getSectionForTOCEntry(I.first));
+ getObjFileLowering().getSectionForTOCEntry(I.first, TM));
OutStreamer->SwitchSection(TCEntry);
OutStreamer->emitLabel(I.second);
@@ -1780,10 +2179,6 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
}
bool PPCAIXAsmPrinter::doInitialization(Module &M) {
- if (M.alias_size() > 0u)
- report_fatal_error(
- "module has aliases, which LLVM does not yet support for AIX");
-
const bool Result = PPCAsmPrinter::doInitialization(M);
auto setCsectAlignment = [this](const GlobalObject *GO) {
@@ -1803,19 +2198,174 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
// We need to know, up front, the alignment of csects for the assembly path,
// because once a .csect directive gets emitted, we could not change the
// alignment value on it.
- for (const auto &G : M.globals())
+ for (const auto &G : M.globals()) {
+ if (isSpecialLLVMGlobalArrayToSkip(&G))
+ continue;
+
+ if (isSpecialLLVMGlobalArrayForStaticInit(&G)) {
+ // Generate a format indicator and a unique module id to be a part of
+ // the sinit and sterm function names.
+ if (FormatIndicatorAndUniqueModId.empty()) {
+ std::string UniqueModuleId = getUniqueModuleId(&M);
+ if (UniqueModuleId != "")
+ // TODO: Use source file full path to generate the unique module id
+ // and add a format indicator as a part of function name in case we
+ // will support more than one format.
+ FormatIndicatorAndUniqueModId = "clang_" + UniqueModuleId.substr(1);
+ else
+ // Use the Pid and current time as the unique module id when we cannot
+ // generate one based on a module's strong external symbols.
+ // FIXME: Adjust the comment accordingly after we use source file full
+ // path instead.
+ FormatIndicatorAndUniqueModId =
+ "clangPidTime_" + llvm::itostr(sys::Process::getProcessId()) +
+ "_" + llvm::itostr(time(nullptr));
+ }
+
+ emitSpecialLLVMGlobal(&G);
+ continue;
+ }
+
setCsectAlignment(&G);
+ }
for (const auto &F : M)
setCsectAlignment(&F);
+ // Construct an aliasing list for each GlobalObject.
+ for (const auto &Alias : M.aliases()) {
+ const GlobalObject *Base = Alias.getBaseObject();
+ if (!Base)
+ report_fatal_error(
+ "alias without a base object is not yet supported on AIX");
+ GOAliasMap[Base].push_back(&Alias);
+ }
+
return Result;
}
-/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
-/// for a MachineFunction to the given output stream, in a format that the
-/// Darwin assembler can deal with.
-///
+void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case PPC::BL8:
+ case PPC::BL:
+ case PPC::BL8_NOP:
+ case PPC::BL_NOP: {
+ const MachineOperand &MO = MI->getOperand(0);
+ if (MO.isSymbol()) {
+ MCSymbolXCOFF *S =
+ cast<MCSymbolXCOFF>(OutContext.getOrCreateSymbol(MO.getSymbolName()));
+ ExtSymSDNodeSymbols.insert(S);
+ }
+ } break;
+ case PPC::BL_TLS:
+ case PPC::BL8_TLS:
+ case PPC::BL8_TLS_:
+ case PPC::BL8_NOP_TLS:
+ report_fatal_error("TLS call not yet implemented");
+ case PPC::TAILB:
+ case PPC::TAILB8:
+ case PPC::TAILBA:
+ case PPC::TAILBA8:
+ case PPC::TAILBCTR:
+ case PPC::TAILBCTR8:
+ if (MI->getOperand(0).isSymbol())
+ report_fatal_error("Tail call for extern symbol not yet supported.");
+ break;
+ }
+ return PPCAsmPrinter::emitInstruction(MI);
+}
+
+bool PPCAIXAsmPrinter::doFinalization(Module &M) {
+ for (MCSymbol *Sym : ExtSymSDNodeSymbols)
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern);
+ return PPCAsmPrinter::doFinalization(M);
+}
+
+static unsigned mapToSinitPriority(int P) {
+ if (P < 0 || P > 65535)
+ report_fatal_error("invalid init priority");
+
+ if (P <= 20)
+ return P;
+
+ if (P < 81)
+ return 20 + (P - 20) * 16;
+
+ if (P <= 1124)
+ return 1004 + (P - 81);
+
+ if (P < 64512)
+ return 2047 + (P - 1124) * 33878;
+
+ return 2147482625u + (P - 64512);
+}
+
+static std::string convertToSinitPriority(int Priority) {
+ // This helper function converts clang init priority to values used in sinit
+ // and sterm functions.
+ //
+ // The conversion strategies are:
+ // We map the reserved clang/gnu priority range [0, 100] into the sinit/sterm
+ // reserved priority range [0, 1023] by
+ // - directly mapping the first 21 and the last 20 elements of the ranges
+ // - linear interpolating the intermediate values with a step size of 16.
+ //
+ // We map the non reserved clang/gnu priority range of [101, 65535] into the
+ // sinit/sterm priority range [1024, 2147483648] by:
+ // - directly mapping the first and the last 1024 elements of the ranges
+ // - linear interpolating the intermediate values with a step size of 33878.
+ unsigned int P = mapToSinitPriority(Priority);
+
+ std::string PrioritySuffix;
+ llvm::raw_string_ostream os(PrioritySuffix);
+ os << llvm::format_hex_no_prefix(P, 8);
+ os.flush();
+ return PrioritySuffix;
+}
+
+void PPCAIXAsmPrinter::emitXXStructorList(const DataLayout &DL,
+ const Constant *List, bool IsCtor) {
+ SmallVector<Structor, 8> Structors;
+ preprocessXXStructorList(DL, List, Structors);
+ if (Structors.empty())
+ return;
+
+ unsigned Index = 0;
+ for (Structor &S : Structors) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(S.Func))
+ S.Func = CE->getOperand(0);
+
+ llvm::GlobalAlias::create(
+ GlobalValue::ExternalLinkage,
+ (IsCtor ? llvm::Twine("__sinit") : llvm::Twine("__sterm")) +
+ llvm::Twine(convertToSinitPriority(S.Priority)) +
+ llvm::Twine("_", FormatIndicatorAndUniqueModId) +
+ llvm::Twine("_", llvm::utostr(Index++)),
+ cast<Function>(S.Func));
+ }
+}
+
+void PPCAIXAsmPrinter::emitTTypeReference(const GlobalValue *GV,
+ unsigned Encoding) {
+ if (GV) {
+ MCSymbol *TypeInfoSym = TM.getSymbol(GV);
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(TypeInfoSym);
+ const MCSymbol *TOCBaseSym =
+ cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
+ auto &Ctx = OutStreamer->getContext();
+ const MCExpr *Exp =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx),
+ MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx);
+ OutStreamer->emitValue(Exp, GetSizeOfEncodedValue(Encoding));
+ } else
+ OutStreamer->emitIntValue(0, GetSizeOfEncodedValue(Encoding));
+}
+
+// Return a pass that prints the PPC assembly code for a MachineFunction to the
+// given output stream.
static AsmPrinter *
createPPCAsmPrinterPass(TargetMachine &tm,
std::unique_ptr<MCStreamer> &&Streamer) {
@@ -1829,6 +2379,8 @@ createPPCAsmPrinterPass(TargetMachine &tm,
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(getThePPC32Target(),
createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(getThePPC32LETarget(),
+ createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(getThePPC64Target(),
createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(getThePPC64LETarget(),
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index f125ca011cd2..3c6b1f84b821 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -59,7 +59,7 @@ using namespace llvm;
namespace {
-#define DEBUG_TYPE "bool-ret-to-int"
+#define DEBUG_TYPE "ppc-bool-ret-to-int"
STATISTIC(NumBoolRetPromotion,
"Number of times a bool feeding a RetInst was promoted to an int");
@@ -75,8 +75,7 @@ class PPCBoolRetToInt : public FunctionPass {
WorkList.push_back(V);
Defs.insert(V);
while (!WorkList.empty()) {
- Value *Curr = WorkList.back();
- WorkList.pop_back();
+ Value *Curr = WorkList.pop_back_val();
auto *CurrUser = dyn_cast<User>(Curr);
// Operands of CallInst/Constant are skipped because they may not be Bool
// type. For CallInst, their positions are defined by ABI.
@@ -283,8 +282,8 @@ private:
} // end anonymous namespace
char PPCBoolRetToInt::ID = 0;
-INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
- "Convert i1 constants to i32/i64 if they are returned",
- false, false)
+INITIALIZE_PASS(PPCBoolRetToInt, "ppc-bool-ret-to-int",
+ "Convert i1 constants to i32/i64 if they are returned", false,
+ false)
FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.cpp
index 5116f0d121f4..79ffc6627a61 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.cpp
@@ -32,4 +32,4 @@ void PPCCCState::PreAnalyzeFormalArguments(
OriginalArgWasPPCF128.push_back(false);
}
}
-} \ No newline at end of file
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index bb12e05173a6..b9518d6d7064 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -1,4 +1,4 @@
-//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
+//===-- PPCCTRLoops.cpp - Verify CTR loops -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,74 +6,48 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass identifies loops where we can generate the PPC branch instructions
-// that decrement and test the count register (CTR) (bdnz and friends).
-//
-// The pattern that defines the induction variable can changed depending on
-// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
-// normalizes induction variables, and the Loop Strength Reduction pass
-// run by 'llc' may also make changes to the induction variable.
-//
-// Criteria for CTR loops:
-// - Countable loops (w/ ind. var for a trip count)
-// - Try inner-most loops first
-// - No nested CTR loops.
-// - No function calls in loops.
+// This pass verifies that all bdnz/bdz instructions are dominated by a loop
+// mtctr before any other instructions that might clobber the ctr register.
//
//===----------------------------------------------------------------------===//
+// CTR loops are produced by the HardwareLoops pass and this pass is simply a
+// verification that no invalid CTR loops are produced. As such, it isn't
+// something that needs to be run (or even defined) for Release builds so the
+// entire file is guarded by NDEBUG.
+#ifndef NDEBUG
+#include <vector>
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
-#include "PPCSubtarget.h"
-#include "PPCTargetMachine.h"
-#include "PPCTargetTransformInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/ValueHandle.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-
-#ifndef NDEBUG
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#endif
using namespace llvm;
-#define DEBUG_TYPE "ctrloops"
-
-#ifndef NDEBUG
-static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
-#endif
+#define DEBUG_TYPE "ppc-ctrloops-verify"
namespace {
-#ifndef NDEBUG
struct PPCCTRLoopsVerify : public MachineFunctionPass {
public:
static char ID;
@@ -94,10 +68,8 @@ namespace {
};
char PPCCTRLoopsVerify::ID = 0;
-#endif // NDEBUG
} // end anonymous namespace
-#ifndef NDEBUG
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
"PowerPC CTR Loops Verify", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -107,9 +79,7 @@ INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
FunctionPass *llvm::createPPCCTRLoopsVerify() {
return new PPCCTRLoopsVerify();
}
-#endif // NDEBUG
-#ifndef NDEBUG
static bool clobbersCTR(const MachineInstr &MI) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
@@ -178,9 +148,7 @@ queue_preds:
return false;
}
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PIE = MBB->pred_end(); PI != PIE; ++PI)
- Preds.push_back(*PI);
+ append_range(Preds, MBB->predecessors());
}
do {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 1eaa7f7a44b3..cc3486718179 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -59,10 +59,7 @@ def RetCC_PPC_Cold : CallingConv<[
CCIfType<[f32], CCAssignToReg<[F1]>>,
CCIfType<[f64], CCAssignToReg<[F1]>>,
- CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>,
-
- CCIfType<[v4f64, v4f32, v4i1],
- CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1]>>>,
+ CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2]>>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
@@ -95,13 +92,9 @@ def RetCC_PPC : CallingConv<[
// For P9, f128 are passed in vector registers.
CCIfType<[f128],
- CCIfSubtarget<"hasP9Vector()",
+ CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
- // QPX vectors are returned in QF1 and QF2.
- CCIfType<[v4f64, v4f32, v4i1],
- CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
-
// Vector types returned as "direct" go into V2 .. V9; note that only the
// ELFv2 ABI fully utilizes all these registers.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
@@ -156,10 +149,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f128],
- CCIfSubtarget<"hasP9Vector()",
+ CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
- CCIfType<[v4f64, v4f32, v4i1],
- CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
@@ -223,12 +214,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToStack<4, 4>>>,
CCIfType<[f64], CCIfSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>,
- // QPX vectors that are stored in double precision need 32-byte alignment.
- CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
-
// Vectors and float128 get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>,
- CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToStack<16, 16>>>
+ CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToStack<16, 16>>>
]>;
// This calling convention puts vector arguments always on the stack. It is used
@@ -243,10 +231,6 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
// put vector arguments in vector registers before putting them on the stack.
let Entry = 1 in
def CC_PPC32_SVR4 : CallingConv<[
- // QPX vectors mirror the scalar FP convention.
- CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
- CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
-
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
@@ -254,7 +238,7 @@ def CC_PPC32_SVR4 : CallingConv<[
// Float128 types treated as vector arguments.
CCIfType<[f128],
- CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
+ CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
V8, V9, V10, V11, V12, V13]>>>,
CCDelegateTo<CC_PPC32_SVR4_Common>
@@ -307,6 +291,8 @@ def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
+def CSR_AIX32_Altivec : CalleeSavedRegs<(add CSR_AIX32, CSR_Altivec)>;
+
// Common CalleeSavedRegs for SVR4 and AIX.
def CSR_PPC64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index c9f74bbf861c..08b7bdb3ac1e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -77,8 +77,9 @@ protected:
if (J->getOperand(0).getMBB() == &ReturnMBB) {
// This is an unconditional branch to the return. Replace the
// branch with a blr.
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()))
- .copyImplicitOps(*I);
+ MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
+ (*PI)->insert(J, MI);
+
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -89,10 +90,13 @@ protected:
if (J->getOperand(2).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+ MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
+ MI->setDesc(TII->get(PPC::BCCLR));
+ MachineInstrBuilder(*ReturnMBB.getParent(), MI)
.add(J->getOperand(0))
- .add(J->getOperand(1))
- .copyImplicitOps(*I);
+ .add(J->getOperand(1));
+ (*PI)->insert(J, MI);
+
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -103,11 +107,13 @@ protected:
if (J->getOperand(1).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- BuildMI(
- **PI, J, J->getDebugLoc(),
- TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn))
- .add(J->getOperand(0))
- .copyImplicitOps(*I);
+ MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
+ MI->setDesc(
+ TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn));
+ MachineInstrBuilder(*ReturnMBB.getParent(), MI)
+ .add(J->getOperand(0));
+ (*PI)->insert(J, MI);
+
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 39790ac9a8aa..c181816e31c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -86,7 +86,6 @@ typedef struct Address {
class PPCFastISel final : public FastISel {
const TargetMachine &TM;
- const PPCSubtarget *PPCSubTarget;
const PPCSubtarget *Subtarget;
PPCFunctionInfo *PPCFuncInfo;
const TargetInstrInfo &TII;
@@ -97,7 +96,6 @@ class PPCFastISel final : public FastISel {
explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
- PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
@@ -1567,6 +1565,10 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (IsVarArg)
return false;
+ // If this is a PC-Rel function, let SDISel handle the call.
+ if (Subtarget->isUsingPCRelativeCalls())
+ return false;
+
// Handle simple calls for now, with legal return types and
// those that can be extended.
Type *RetTy = CLI.RetTy;
@@ -1622,7 +1624,10 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
return false;
- if (ArgVT.isVector())
+ // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
+ // types, which is passed through vector register. Skip these types and
+ // fallback to default SelectionDAG based selection.
+ if (ArgVT.isVector() || ArgVT == MVT::f128)
return false;
unsigned Arg = getRegForValue(ArgValue);
@@ -1991,6 +1996,10 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
// Materialize a floating-point constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
+ // If this is a PC-Rel function, let SDISel handle constant pool.
+ if (Subtarget->isUsingPCRelativeCalls())
+ return false;
+
// No plans to handle long double here.
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
@@ -2055,6 +2064,10 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
// Materialize the address of a global value into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
+ // If this is a PC-Rel function, let SDISel handle GV materialization.
+ if (Subtarget->isUsingPCRelativeCalls())
+ return false;
+
assert(VT == MVT::i64 && "Non-address!");
const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
unsigned DestReg = createResultReg(RC);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 2ee394e9259d..16536bf23deb 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -39,15 +39,6 @@ EnablePEVectorSpills("ppc-enable-pe-vector-spills",
cl::desc("Enable spills in prologue to vector registers."),
cl::init(false), cl::Hidden);
-/// VRRegNo - Map from a numbered VR register to its enum value.
-///
-static const MCPhysReg VRRegNo[] = {
- PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
- PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
if (STI.isAIXABI())
return STI.isPPC64() ? 16 : 8;
@@ -227,19 +218,14 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
CALLEE_SAVED_VRS
};
- static const SpillSlot AIXOffsets32[] = {
- CALLEE_SAVED_FPRS,
- CALLEE_SAVED_GPRS32,
- // Add AIX's extra CSR.
- {PPC::R13, -76},
- // TODO: Update when we add vector support for AIX.
- };
+ static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS32,
+ // Add AIX's extra CSR.
+ {PPC::R13, -76},
+ CALLEE_SAVED_VRS};
static const SpillSlot AIXOffsets64[] = {
- CALLEE_SAVED_FPRS,
- CALLEE_SAVED_GPRS64,
- // TODO: Update when we add vector support for AIX.
- };
+ CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
if (Subtarget.is64BitELFABI()) {
NumEntries = array_lengthof(ELFOffsets64);
@@ -262,153 +248,11 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
return AIXOffsets32;
}
-/// RemoveVRSaveCode - We have found that this function does not need any code
-/// to manipulate the VRSAVE register, even though it uses vector registers.
-/// This can happen when the only registers used are known to be live in or out
-/// of the function. Remove all of the VRSAVE related code from the function.
-/// FIXME: The removal of the code results in a compile failure at -O0 when the
-/// function contains a function call, as the GPR containing original VRSAVE
-/// contents is spilled and reloaded around the call. Without the prolog code,
-/// the spill instruction refers to an undefined register. This code needs
-/// to account for all uses of that GPR.
-static void RemoveVRSaveCode(MachineInstr &MI) {
- MachineBasicBlock *Entry = MI.getParent();
- MachineFunction *MF = Entry->getParent();
-
- // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
- MachineBasicBlock::iterator MBBI = MI;
- ++MBBI;
- assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
- MBBI->eraseFromParent();
-
- bool RemovedAllMTVRSAVEs = true;
- // See if we can find and remove the MTVRSAVE instruction from all of the
- // epilog blocks.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
- // If last instruction is a return instruction, add an epilogue
- if (I->isReturnBlock()) {
- bool FoundIt = false;
- for (MBBI = I->end(); MBBI != I->begin(); ) {
- --MBBI;
- if (MBBI->getOpcode() == PPC::MTVRSAVE) {
- MBBI->eraseFromParent(); // remove it.
- FoundIt = true;
- break;
- }
- }
- RemovedAllMTVRSAVEs &= FoundIt;
- }
- }
-
- // If we found and removed all MTVRSAVE instructions, remove the read of
- // VRSAVE as well.
- if (RemovedAllMTVRSAVEs) {
- MBBI = MI;
- assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
- --MBBI;
- assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
- MBBI->eraseFromParent();
- }
-
- // Finally, nuke the UPDATE_VRSAVE.
- MI.eraseFromParent();
-}
-
-// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
-// instruction selector. Based on the vector registers that have been used,
-// transform this into the appropriate ORI instruction.
-static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
- MachineFunction *MF = MI.getParent()->getParent();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- DebugLoc dl = MI.getDebugLoc();
-
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned UsedRegMask = 0;
- for (unsigned i = 0; i != 32; ++i)
- if (MRI.isPhysRegModified(VRRegNo[i]))
- UsedRegMask |= 1 << (31-i);
-
- // Live in and live out values already must be in the mask, so don't bother
- // marking them.
- for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
- unsigned RegNo = TRI->getEncodingValue(LI.first);
- if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg.
- UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
- }
-
- // Live out registers appear as use operands on return instructions.
- for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
- UsedRegMask != 0 && BI != BE; ++BI) {
- const MachineBasicBlock &MBB = *BI;
- if (!MBB.isReturnBlock())
- continue;
- const MachineInstr &Ret = MBB.back();
- for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = Ret.getOperand(I);
- if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
- continue;
- unsigned RegNo = TRI->getEncodingValue(MO.getReg());
- UsedRegMask &= ~(1 << (31-RegNo));
- }
- }
-
- // If no registers are used, turn this into a copy.
- if (UsedRegMask == 0) {
- // Remove all VRSAVE code.
- RemoveVRSaveCode(MI);
- return;
- }
-
- Register SrcReg = MI.getOperand(1).getReg();
- Register DstReg = MI.getOperand(0).getReg();
-
- if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
- if (DstReg != SrcReg)
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask);
- else
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask);
- } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
- if (DstReg != SrcReg)
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask >> 16);
- else
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask >> 16);
- } else {
- if (DstReg != SrcReg)
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask >> 16);
- else
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask >> 16);
-
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(DstReg, RegState::Kill)
- .addImm(UsedRegMask & 0xFFFF);
- }
-
- // Remove the old UPDATE_VRSAVE instruction.
- MI.eraseFromParent();
-}
-
static bool spillsCR(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->isCRSpilled();
}
-static bool spillsVRSAVE(const MachineFunction &MF) {
- const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- return FuncInfo->isVRSAVESpilled();
-}
-
static bool hasSpills(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->hasSpills();
@@ -474,7 +318,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
!FI->mustSaveTOC() && // No need to save TOC.
!RegInfo->hasBasePointer(MF); // No special alignment.
- // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
+ // Note: for PPC32 SVR4ABI, we can still generate stackless
// code if all local vars are reg-allocated.
bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
@@ -531,9 +375,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
- MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
- (MF.getTarget().Options.GuaranteedTailCallOpt &&
- MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+ MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
+ MF.exposesReturnsTwice() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
@@ -681,6 +526,8 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
// register is available, we can adjust for that by not overlapping the spill
// code. However, if we need to realign the stack (i.e. have a base pointer)
// and the stack frame is large, we need two scratch registers.
+// Also, stack probe requires two scratch registers, one for old sp, one for
+// large frame and large probe size.
bool
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -692,8 +539,10 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
Align MaxAlign = MFI.getMaxAlign();
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
- return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
+ return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
+ TLI.hasInlineStackProbe(MF);
}
bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
@@ -736,8 +585,8 @@ bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
// Frame pointers and base pointers complicate matters so don't do anything
// if we have them. For example having a frame pointer will sometimes require
// a copy of r1 into r31 and that makes keeping track of updates to r1 more
- // difficult.
- if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+ // difficult. Similar situation exists with setjmp.
+ if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
return false;
// Calls to fast_cc functions use different rules for passing parameters on
@@ -771,24 +620,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
bool isSVR4ABI = Subtarget.isSVR4ABI();
- bool isAIXABI = Subtarget.isAIXABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
-
- // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
- // process it.
- if (!isSVR4ABI)
- for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
- if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
- if (isAIXABI)
- report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
- HandleVRSaveUpdate(*MBBI, TII);
- break;
- }
- }
-
- // Move MBBI back to the beginning of the prologue block.
- MBBI = MBB.begin();
+ assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
// Work out frame sizes.
unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
@@ -848,12 +681,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
// Using the same bool variable as below to suppress compiler warnings.
- // Stack probe requires two scratch registers, one for old sp, one for large
- // frame and large probe size.
bool SingleScratchReg = findScratchRegister(
- &MBB, false,
- twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
- &ScratchReg, &TempReg);
+ &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
assert(SingleScratchReg &&
"Required number of registers not available in this block");
@@ -863,26 +692,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
int FPOffset = 0;
if (HasFP) {
- if (isSVR4ABI) {
- MachineFrameInfo &MFI = MF.getFrameInfo();
- int FPIndex = FI->getFramePointerSaveIndex();
- assert(FPIndex && "No Frame Pointer Save Slot!");
- FPOffset = MFI.getObjectOffset(FPIndex);
- } else {
- FPOffset = getFramePointerSaveOffset();
- }
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = MFI.getObjectOffset(FPIndex);
}
int BPOffset = 0;
if (HasBP) {
- if (isSVR4ABI) {
- MachineFrameInfo &MFI = MF.getFrameInfo();
- int BPIndex = FI->getBasePointerSaveIndex();
- assert(BPIndex && "No Base Pointer Save Slot!");
- BPOffset = MFI.getObjectOffset(BPIndex);
- } else {
- BPOffset = getBasePointerSaveOffset();
- }
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = MFI.getObjectOffset(BPIndex);
}
int PBPOffset = 0;
@@ -1038,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl,
TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
: PPC::PROBED_STACKALLOC_32))
- .addDef(ScratchReg)
- .addDef(TempReg) // TempReg stores the old sp.
+ .addDef(TempReg)
+ .addDef(ScratchReg) // ScratchReg stores the old sp.
.addImm(NegFrameSize);
// FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
// update the ScratchReg to meet the assumption that ScratchReg contains
// the NegFrameSize. This solution is rather tricky.
if (!HasRedZone) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
- .addReg(TempReg)
+ .addReg(ScratchReg)
.addReg(SPReg);
HasSTUX = true;
}
@@ -1366,7 +1187,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const {
- // TODO: Generate CFI instructions.
bool isPPC64 = Subtarget.isPPC64();
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -1382,10 +1202,12 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
if (StackAllocMIPos == PrologMBB.end())
return;
const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
+ MachineBasicBlock *CurrentMBB = &PrologMBB;
DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
MachineInstr &MI = *StackAllocMIPos;
int64_t NegFrameSize = MI.getOperand(2).getImm();
- int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
+ unsigned ProbeSize = TLI.getStackProbeSize(MF);
+ int64_t NegProbeSize = -(int64_t)ProbeSize;
assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
int64_t NumBlocks = NegFrameSize / NegProbeSize;
int64_t NegResidualSize = NegFrameSize % NegProbeSize;
@@ -1394,10 +1216,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
Register FPReg = MI.getOperand(1).getReg();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
bool HasBP = RegInfo->hasBasePointer(MF);
+ Register BPReg = RegInfo->getBaseRegister(MF);
Align MaxAlign = MFI.getMaxAlign();
- // Initialize current frame pointer.
+ bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
- BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
// Subroutines to generate .cfi_* directives.
auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, Register Reg) {
@@ -1437,96 +1259,234 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
// Subroutine to store frame pointer and decrease stack pointer by probe size.
auto allocateAndProbe = [&](MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, int64_t NegSize,
- Register NegSizeReg, bool UseDForm) {
+ Register NegSizeReg, bool UseDForm,
+ Register StoreReg) {
if (UseDForm)
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
- .addReg(FPReg)
+ .addReg(StoreReg)
.addImm(NegSize)
.addReg(SPReg);
else
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
- .addReg(FPReg)
+ .addReg(StoreReg)
.addReg(SPReg)
.addReg(NegSizeReg);
};
- // Use FPReg to calculate CFA.
- if (needsCFI)
- buildDefCFA(PrologMBB, {MI}, FPReg, 0);
- // For case HasBP && MaxAlign > 1, we have to align the SP by performing
- // SP = SP - SP % MaxAlign.
+ // Used to probe stack when realignment is required.
+ // Note that, according to ABI's requirement, *sp must always equals the
+ // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
+ // Following is pseudo code:
+ // final_sp = (sp & align) + negframesize;
+ // neg_gap = final_sp - sp;
+ // while (neg_gap < negprobesize) {
+ // stdu fp, negprobesize(sp);
+ // neg_gap -= negprobesize;
+ // }
+ // stdux fp, sp, neg_gap
+ //
+ // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
+ // before probe code, we don't need to save it, so we get one additional reg
+ // that can be used to materialize the probeside if needed to use xform.
+ // Otherwise, we can NOT materialize probeside, so we can only use Dform for
+ // now.
+ //
+ // The allocations are:
+ // if (HasBP && HasRedzone) {
+ // r0: materialize the probesize if needed so that we can use xform.
+ // r12: `neg_gap`
+ // } else {
+ // r0: back-chain pointer
+ // r12: `neg_gap`.
+ // }
+ auto probeRealignedStack = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ Register ScratchReg, Register TempReg) {
+ assert(HasBP && "The function is supposed to have base pointer when its "
+ "stack is realigned.");
+ assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
+
+ // FIXME: We can eliminate this limitation if we get more infomation about
+ // which part of redzone are already used. Used redzone can be treated
+ // probed. But there might be `holes' in redzone probed, this could
+ // complicate the implementation.
+ assert(ProbeSize >= Subtarget.getRedZoneSize() &&
+ "Probe size should be larger or equal to the size of red-zone so "
+ "that red-zone is not clobbered by probing.");
+
+ Register &FinalStackPtr = TempReg;
+ // FIXME: We only support NegProbeSize materializable by DForm currently.
+ // When HasBP && HasRedzone, we can use xform if we have an additional idle
+ // register.
+ NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
+ assert(isInt<16>(NegProbeSize) &&
+ "NegProbeSize should be materializable by DForm");
+ Register CRReg = PPC::CR0;
+ // Layout of output assembly kinda like:
+ // bb.0:
+ // ...
+ // sub $scratchreg, $finalsp, r1
+ // cmpdi $scratchreg, <negprobesize>
+ // bge bb.2
+ // bb.1:
+ // stdu <backchain>, <negprobesize>(r1)
+ // sub $scratchreg, $scratchreg, negprobesize
+ // cmpdi $scratchreg, <negprobesize>
+ // blt bb.1
+ // bb.2:
+ // stdux <backchain>, r1, $scratchreg
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
+ MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ProbeExitMBB);
+ // bb.2
+ {
+ Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+ allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
+ BackChainPointer);
+ if (HasRedZone)
+ // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
+ // to TempReg to satisfy it.
+ BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
+ .addReg(BPReg)
+ .addReg(BPReg);
+ ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
+ ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ }
+ // bb.0
+ {
+ BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
+ .addReg(SPReg)
+ .addReg(FinalStackPtr);
+ if (!HasRedZone)
+ BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
+ BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
+ .addReg(ScratchReg)
+ .addImm(NegProbeSize);
+ BuildMI(&MBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_GE)
+ .addReg(CRReg)
+ .addMBB(ProbeExitMBB);
+ MBB.addSuccessor(ProbeLoopBodyMBB);
+ MBB.addSuccessor(ProbeExitMBB);
+ }
+ // bb.1
+ {
+ Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+ allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
+ 0, true /*UseDForm*/, BackChainPointer);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
+ ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-NegProbeSize);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
+ CRReg)
+ .addReg(ScratchReg)
+ .addImm(NegProbeSize);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_LT)
+ .addReg(CRReg)
+ .addMBB(ProbeLoopBodyMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
+ }
+ // Update liveins.
+ recomputeLiveIns(*ProbeLoopBodyMBB);
+ recomputeLiveIns(*ProbeExitMBB);
+ return ProbeExitMBB;
+ };
+ // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
+ // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
+ // the offset subtracted from SP is determined by SP's runtime value.
if (HasBP && MaxAlign > 1) {
+ // Calculate final stack pointer.
if (isPPC64)
- BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(FPReg)
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
.addImm(0)
.addImm(64 - Log2(MaxAlign));
else
- BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
- .addReg(FPReg)
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
.addImm(0)
.addImm(32 - Log2(MaxAlign))
.addImm(31);
- BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC),
- SPReg)
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
+ FPReg)
.addReg(ScratchReg)
.addReg(SPReg);
- }
- // Probe residual part.
- if (NegResidualSize) {
- bool ResidualUseDForm = CanUseDForm(NegResidualSize);
- if (!ResidualUseDForm)
- MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
- allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
- ResidualUseDForm);
- }
- bool UseDForm = CanUseDForm(NegProbeSize);
- // If number of blocks is small, just probe them directly.
- if (NumBlocks < 3) {
- if (!UseDForm)
- MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
- for (int i = 0; i < NumBlocks; ++i)
- allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
- if (needsCFI) {
- // Restore using SPReg to calculate CFA.
- buildDefCFAReg(PrologMBB, {MI}, SPReg);
- }
+ MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
+ FPReg)
+ .addReg(ScratchReg)
+ .addReg(FPReg);
+ CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
+ if (needsCFI)
+ buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
} else {
- // Since CTR is a volatile register and current shrinkwrap implementation
- // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
- // CTR loop to probe.
- // Calculate trip count and stores it in CTRReg.
- MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
- BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
- .addReg(ScratchReg, RegState::Kill);
- if (!UseDForm)
- MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
- // Create MBBs of the loop.
- MachineFunction::iterator MBBInsertPoint =
- std::next(PrologMBB.getIterator());
- MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, LoopMBB);
- MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, ExitMBB);
- // Synthesize the loop body.
- allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
- UseDForm);
- BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
- .addMBB(LoopMBB);
- LoopMBB->addSuccessor(ExitMBB);
- LoopMBB->addSuccessor(LoopMBB);
- // Synthesize the exit MBB.
- ExitMBB->splice(ExitMBB->end(), &PrologMBB,
- std::next(MachineBasicBlock::iterator(MI)),
- PrologMBB.end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
- PrologMBB.addSuccessor(LoopMBB);
- if (needsCFI) {
- // Restore using SPReg to calculate CFA.
- buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ // Initialize current frame pointer.
+ BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
+ // Use FPReg to calculate CFA.
+ if (needsCFI)
+ buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
+ // Probe residual part.
+ if (NegResidualSize) {
+ bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+ if (!ResidualUseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
+ allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
+ ResidualUseDForm, FPReg);
+ }
+ bool UseDForm = CanUseDForm(NegProbeSize);
+ // If number of blocks is small, just probe them directly.
+ if (NumBlocks < 3) {
+ if (!UseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+ for (int i = 0; i < NumBlocks; ++i)
+ allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
+ FPReg);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
+ }
+ } else {
+ // Since CTR is a volatile register and current shrinkwrap implementation
+ // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+ // CTR loop to probe.
+ // Calculate trip count and stores it in CTRReg.
+ MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+ .addReg(ScratchReg, RegState::Kill);
+ if (!UseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+ // Create MBBs of the loop.
+ MachineFunction::iterator MBBInsertPoint =
+ std::next(CurrentMBB->getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ExitMBB);
+ // Synthesize the loop body.
+ allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+ UseDForm, FPReg);
+ BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+ .addMBB(LoopMBB);
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), CurrentMBB,
+ std::next(MachineBasicBlock::iterator(MI)),
+ CurrentMBB->end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
+ CurrentMBB->addSuccessor(LoopMBB);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ }
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
}
- // Update liveins.
- recomputeLiveIns(*LoopMBB);
- recomputeLiveIns(*ExitMBB);
}
++NumPrologProbed;
MI.eraseFromParent();
@@ -1551,8 +1511,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
- // Get the ABI.
- bool isSVR4ABI = Subtarget.isSVR4ABI();
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1600,24 +1558,16 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
SingleScratchReg = ScratchReg == TempReg;
if (HasFP) {
- if (isSVR4ABI) {
- int FPIndex = FI->getFramePointerSaveIndex();
- assert(FPIndex && "No Frame Pointer Save Slot!");
- FPOffset = MFI.getObjectOffset(FPIndex);
- } else {
- FPOffset = getFramePointerSaveOffset();
- }
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = MFI.getObjectOffset(FPIndex);
}
int BPOffset = 0;
if (HasBP) {
- if (isSVR4ABI) {
int BPIndex = FI->getBasePointerSaveIndex();
assert(BPIndex && "No Base Pointer Save Slot!");
BPOffset = MFI.getObjectOffset(BPIndex);
- } else {
- BPOffset = getBasePointerSaveOffset();
- }
}
int PBPOffset = 0;
@@ -1703,11 +1653,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
// zone add this offset back now.
+ // If the function has a base pointer, the stack pointer has been copied
+ // to it so we can restore it by copying in the other direction.
+ if (HasRedZone && HasBP) {
+ BuildMI(MBB, MBBI, dl, OrInst, RBReg).
+ addReg(BPReg).
+ addReg(BPReg);
+ }
// If this function contained a fastcc call and GuaranteedTailCallOpt is
// enabled (=> hasFastCall()==true) the fastcc call might contain a tail
// call which invalidates the stack pointer value in SP(0). So we use the
- // value of R31 in this case.
- if (FI->hasFastCall()) {
+ // value of R31 in this case. Similar situation exists with setjmp.
+ else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
assert(HasFP && "Expecting a valid frame pointer.");
if (!HasRedZone)
RBReg = FPReg;
@@ -2053,7 +2010,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
bool HasGPSaveArea = false;
bool HasG8SaveArea = false;
bool HasFPSaveArea = false;
- bool HasVRSAVESaveArea = false;
bool HasVRSaveArea = false;
SmallVector<CalleeSavedInfo, 18> GPRegs;
@@ -2093,8 +2049,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
} else if (PPC::CRBITRCRegClass.contains(Reg) ||
PPC::CRRCRegClass.contains(Reg)) {
; // do nothing, as we already know whether CRs are spilled
- } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
- HasVRSAVESaveArea = true;
} else if (PPC::VRRCRegClass.contains(Reg) ||
PPC::SPERCRegClass.contains(Reg)) {
// Altivec and SPE are mutually exclusive, but have the same stack
@@ -2217,23 +2171,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
LowerBound -= 4; // The CR save area is always 4 bytes long.
}
- if (HasVRSAVESaveArea) {
- // FIXME SVR4: Is it actually possible to have multiple elements in CSI
- // which have the VRSAVE register class?
- // Adjust the frame index of the VRSAVE spill slot.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
-
- if (PPC::VRSAVERCRegClass.contains(Reg)) {
- int FI = CSI[i].getFrameIdx();
-
- MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
- }
- }
-
- LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
- }
-
// Both Altivec and SPE have the same alignment and padding requirements
// within the stack frame.
if (HasVRSaveArea) {
@@ -2273,8 +2210,8 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// needed alignment padding.
unsigned StackSize = determineFrameLayout(MF, true);
MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
- hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+ if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
+ (hasSpills(MF) && !isInt<16>(StackSize))) {
const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
@@ -2288,7 +2225,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
// These kinds of spills might need two registers.
- if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
+ if (spillsCR(MF) || HasAlVars)
RS->addScavengingFrameIndex(
MFI.CreateStackObject(Size, Alignment, false));
}
@@ -2365,9 +2302,6 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
- if (Reg == PPC::VRSAVE)
- continue;
// CR2 through CR4 are the nonvolatile CR fields.
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
@@ -2532,10 +2466,6 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
- if (Reg == PPC::VRSAVE)
- continue;
-
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 8ffd89ef5ccd..2604218da160 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -138,7 +139,6 @@ namespace {
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
- const PPCSubtarget *PPCSubTarget = nullptr;
const PPCSubtarget *Subtarget = nullptr;
const PPCTargetLowering *PPCLowering = nullptr;
unsigned GlobalBaseReg = 0;
@@ -150,14 +150,10 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
- PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
Subtarget = &MF.getSubtarget<PPCSubtarget>();
PPCLowering = Subtarget->getTargetLowering();
SelectionDAGISel::runOnMachineFunction(MF);
- if (!Subtarget->isSVR4ABI())
- InsertVRSaveCode(MF);
-
return true;
}
@@ -218,7 +214,7 @@ namespace {
/// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression.
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- const SDLoc &dl);
+ const SDLoc &dl, SDValue Chain = SDValue());
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
/// immediate field. Note that the operand at this point is already the
@@ -295,6 +291,13 @@ namespace {
Align(16));
}
+ /// SelectAddrImmX34 - Returns true if the address N can be represented by
+ /// a base register plus a signed 34-bit displacement. Suitable for use by
+ /// PSTXVP and friends.
+ bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
+ }
+
// Select an address into a single register.
bool SelectAddr(SDValue N, SDValue &Base) {
Base = N;
@@ -340,8 +343,6 @@ namespace {
return true;
}
- void InsertVRSaveCode(MachineFunction &MF);
-
StringRef getPassName() const override {
return "PowerPC DAG->DAG Pattern Instruction Selection";
}
@@ -351,6 +352,7 @@ namespace {
private:
bool trySETCC(SDNode *N);
+ bool tryFoldSWTestBRCC(SDNode *N);
bool tryAsSingleRLDICL(SDNode *N);
bool tryAsSingleRLDICR(SDNode *N);
bool tryAsSingleRLWINM(SDNode *N);
@@ -375,70 +377,6 @@ private:
} // end anonymous namespace
-/// InsertVRSaveCode - Once the entire function has been instruction selected,
-/// all virtual registers are created and all machine instructions are built,
-/// check to see if we need to save/restore VRSAVE. If so, do it.
-void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
- // Check to see if this function uses vector registers, which means we have to
- // save and restore the VRSAVE register and update it with the regs we use.
- //
- // In this case, there will be virtual registers of vector type created
- // by the scheduler. Detect them now.
- bool HasVectorVReg = false;
- for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
- if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
- HasVectorVReg = true;
- break;
- }
- }
- if (!HasVectorVReg) return; // nothing to do.
-
- // If we have a vector register, we want to emit code into the entry and exit
- // blocks to save and restore the VRSAVE register. We do this here (instead
- // of marking all vector instructions as clobbering VRSAVE) for two reasons:
- //
- // 1. This (trivially) reduces the load on the register allocator, by not
- // having to represent the live range of the VRSAVE register.
- // 2. This (more significantly) allows us to create a temporary virtual
- // register to hold the saved VRSAVE value, allowing this temporary to be
- // register allocated, instead of forcing it to be spilled to the stack.
-
- // Create two vregs - one to hold the VRSAVE register that is live-in to the
- // function and one for the value after having bits or'd into it.
- Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
-
- const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
- MachineBasicBlock &EntryBB = *Fn.begin();
- DebugLoc dl;
- // Emit the following code into the entry block:
- // InVRSAVE = MFVRSAVE
- // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
- // MTVRSAVE UpdatedVRSAVE
- MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
- BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
- BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
- UpdatedVRSAVE).addReg(InVRSAVE);
- BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
-
- // Find all return blocks, outputting a restore in each epilog.
- for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (BB->isReturnBlock()) {
- IP = BB->end(); --IP;
-
- // Skip over all terminator instructions, which are part of the return
- // sequence.
- MachineBasicBlock::iterator I2 = IP;
- while (I2 != BB->begin() && (--I2)->isTerminator())
- IP = I2;
-
- // Emit: MTVRSAVE InVRSave
- BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
- }
- }
-}
-
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
@@ -648,6 +586,8 @@ bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
SDValue Offset = ST->getOffset();
if (!Offset.isUndef())
return false;
+ if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
+ return false;
SDLoc dl(ST);
EVT MemVT = ST->getMemoryVT();
@@ -691,6 +631,8 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
SDValue Offset = LD->getOffset();
if (!Offset.isUndef())
return false;
+ if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
+ return false;
SDLoc dl(LD);
EVT MemVT = LD->getMemoryVT();
@@ -800,251 +742,6 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
return false;
}
-// Predict the number of instructions that would be generated by calling
-// selectI64Imm(N).
-static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
- // Assume no remaining bits.
- unsigned Remainder = 0;
- // Assume no shift required.
- unsigned Shift = 0;
-
- // If it can't be represented as a 32 bit value.
- if (!isInt<32>(Imm)) {
- Shift = countTrailingZeros<uint64_t>(Imm);
- int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
-
- // If the shifted value fits 32 bits.
- if (isInt<32>(ImmSh)) {
- // Go with the shifted value.
- Imm = ImmSh;
- } else {
- // Still stuck with a 64 bit value.
- Remainder = Imm;
- Shift = 32;
- Imm >>= 32;
- }
- }
-
- // Intermediate operand.
- unsigned Result = 0;
-
- // Handle first 32 bits.
- unsigned Lo = Imm & 0xFFFF;
-
- // Simple value.
- if (isInt<16>(Imm)) {
- // Just the Lo bits.
- ++Result;
- } else if (Lo) {
- // Handle the Hi bits and Lo bits.
- Result += 2;
- } else {
- // Just the Hi bits.
- ++Result;
- }
-
- // If no shift, we're done.
- if (!Shift) return Result;
-
- // If Hi word == Lo word,
- // we can use rldimi to insert the Lo word into Hi word.
- if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
- ++Result;
- return Result;
- }
-
- // Shift for next step if the upper 32-bits were not zero.
- if (Imm)
- ++Result;
-
- // Add in the last bits as required.
- if ((Remainder >> 16) & 0xFFFF)
- ++Result;
- if (Remainder & 0xFFFF)
- ++Result;
-
- return Result;
-}
-
-static uint64_t Rot64(uint64_t Imm, unsigned R) {
- return (Imm << R) | (Imm >> (64 - R));
-}
-
-static unsigned selectI64ImmInstrCount(int64_t Imm) {
- unsigned Count = selectI64ImmInstrCountDirect(Imm);
-
- // If the instruction count is 1 or 2, we do not need further analysis
- // since rotate + load constant requires at least 2 instructions.
- if (Count <= 2)
- return Count;
-
- for (unsigned r = 1; r < 63; ++r) {
- uint64_t RImm = Rot64(Imm, r);
- unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
- Count = std::min(Count, RCount);
-
- // See comments in selectI64Imm for an explanation of the logic below.
- unsigned LS = findLastSet(RImm);
- if (LS != r-1)
- continue;
-
- uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
- uint64_t RImmWithOnes = RImm | OnesMask;
-
- RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
- Count = std::min(Count, RCount);
- }
-
- return Count;
-}
-
-// Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
-// (above) needs to be kept in sync with this function.
-static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
- int64_t Imm) {
- // Assume no remaining bits.
- unsigned Remainder = 0;
- // Assume no shift required.
- unsigned Shift = 0;
-
- // If it can't be represented as a 32 bit value.
- if (!isInt<32>(Imm)) {
- Shift = countTrailingZeros<uint64_t>(Imm);
- int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
-
- // If the shifted value fits 32 bits.
- if (isInt<32>(ImmSh)) {
- // Go with the shifted value.
- Imm = ImmSh;
- } else {
- // Still stuck with a 64 bit value.
- Remainder = Imm;
- Shift = 32;
- Imm >>= 32;
- }
- }
-
- // Intermediate operand.
- SDNode *Result;
-
- // Handle first 32 bits.
- unsigned Lo = Imm & 0xFFFF;
- unsigned Hi = (Imm >> 16) & 0xFFFF;
-
- auto getI32Imm = [CurDAG, dl](unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
- };
-
- // Simple value.
- if (isInt<16>(Imm)) {
- uint64_t SextImm = SignExtend64(Lo, 16);
- SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
- // Just the Lo bits.
- Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
- } else if (Lo) {
- // Handle the Hi bits.
- unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
- Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
- // And Lo bits.
- Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Lo));
- } else {
- // Just the Hi bits.
- Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
- }
-
- // If no shift, we're done.
- if (!Shift) return Result;
-
- // If Hi word == Lo word,
- // we can use rldimi to insert the Lo word into Hi word.
- if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
- SDValue Ops[] =
- { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
- return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
- }
-
- // Shift for next step if the upper 32-bits were not zero.
- if (Imm) {
- Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
- SDValue(Result, 0),
- getI32Imm(Shift),
- getI32Imm(63 - Shift));
- }
-
- // Add in the last bits as required.
- if ((Hi = (Remainder >> 16) & 0xFFFF)) {
- Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Hi));
- }
- if ((Lo = Remainder & 0xFFFF)) {
- Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Lo));
- }
-
- return Result;
-}
-
-static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
- int64_t Imm) {
- unsigned Count = selectI64ImmInstrCountDirect(Imm);
-
- // If the instruction count is 1 or 2, we do not need further analysis
- // since rotate + load constant requires at least 2 instructions.
- if (Count <= 2)
- return selectI64ImmDirect(CurDAG, dl, Imm);
-
- unsigned RMin = 0;
-
- int64_t MatImm;
- unsigned MaskEnd;
-
- for (unsigned r = 1; r < 63; ++r) {
- uint64_t RImm = Rot64(Imm, r);
- unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
- if (RCount < Count) {
- Count = RCount;
- RMin = r;
- MatImm = RImm;
- MaskEnd = 63;
- }
-
- // If the immediate to generate has many trailing zeros, it might be
- // worthwhile to generate a rotated value with too many leading ones
- // (because that's free with li/lis's sign-extension semantics), and then
- // mask them off after rotation.
-
- unsigned LS = findLastSet(RImm);
- // We're adding (63-LS) higher-order ones, and we expect to mask them off
- // after performing the inverse rotation by (64-r). So we need that:
- // 63-LS == 64-r => LS == r-1
- if (LS != r-1)
- continue;
-
- uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
- uint64_t RImmWithOnes = RImm | OnesMask;
-
- RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
- if (RCount < Count) {
- Count = RCount;
- RMin = r;
- MatImm = RImmWithOnes;
- MaskEnd = LS;
- }
- }
-
- if (!RMin)
- return selectI64ImmDirect(CurDAG, dl, Imm);
-
- auto getI32Imm = [CurDAG, dl](unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
- };
-
- SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
- return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
- getI32Imm(64 - RMin), getI32Imm(MaskEnd));
-}
-
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
unsigned MaxTruncation = 0;
// Cannot use range-based for loop here as we need the actual use (i.e. we
@@ -1101,6 +798,274 @@ static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
return MaxTruncation;
}
+// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
+// zeros and return the number of bits by the left of these consecutive zeros.
+static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
+ unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
+ unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
+ if ((HiTZ + LoLZ) >= Num)
+ return (32 + HiTZ);
+ return 0;
+}
+
+// Direct materialization of 64-bit constants by enumerated patterns.
+static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
+ uint64_t Imm, unsigned &InstCnt) {
+ unsigned TZ = countTrailingZeros<uint64_t>(Imm);
+ unsigned LZ = countLeadingZeros<uint64_t>(Imm);
+ unsigned TO = countTrailingOnes<uint64_t>(Imm);
+ unsigned LO = countLeadingOnes<uint64_t>(Imm);
+ unsigned Hi32 = Hi_32(Imm);
+ unsigned Lo32 = Lo_32(Imm);
+ SDNode *Result = nullptr;
+ unsigned Shift = 0;
+
+ auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+ };
+
+ // Following patterns use 1 instructions to materialize the Imm.
+ InstCnt = 1;
+ // 1-1) Patterns : {zeros}{15-bit valve}
+ // {ones}{15-bit valve}
+ if (isInt<16>(Imm)) {
+ SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
+ return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
+ }
+ // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
+ // {ones}{15-bit valve}{16 zeros}
+ if (TZ > 15 && (LZ > 32 || LO > 32))
+ return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
+ getI32Imm((Imm >> 16) & 0xffff));
+
+ // Following patterns use 2 instructions to materialize the Imm.
+ InstCnt = 2;
+ assert(LZ < 64 && "Unexpected leading zeros here.");
+ // Count of ones follwing the leading zeros.
+ unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
+ // 2-1) Patterns : {zeros}{31-bit value}
+ // {ones}{31-bit value}
+ if (isInt<32>(Imm)) {
+ uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
+ unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
+ return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Imm & 0xffff));
+ }
+ // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
+ // {zeros}{15-bit value}{zeros}
+ // {zeros}{ones}{15-bit value}
+ // {ones}{15-bit value}{zeros}
+ // We can take advantage of LI's sign-extension semantics to generate leading
+ // ones, and then use RLDIC to mask off the ones in both sides after rotation.
+ if ((LZ + FO + TZ) > 48) {
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm((Imm >> TZ) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(TZ), getI32Imm(LZ));
+ }
+ // 2-3) Pattern : {zeros}{15-bit value}{ones}
+ // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
+ // therefore we can take advantage of LI's sign-extension semantics, and then
+ // mask them off after rotation.
+ //
+ // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
+ // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
+ // +------------------------+ +------------------------+
+ // 63 0 63 0
+ // Imm (Imm >> (48 - LZ) & 0xffff)
+ // +----sext-----|--16-bit--+ +clear-|-----------------+
+ // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
+ // +------------------------+ +------------------------+
+ // 63 0 63 0
+ // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
+ if ((LZ + TO) > 48) {
+ // Since the immediates with (LZ > 32) have been handled by previous
+ // patterns, here we have (LZ <= 32) to make sure we will not shift right
+ // the Imm by a negative value.
+ assert(LZ <= 32 && "Unexpected shift value.");
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm((Imm >> (48 - LZ) & 0xffff)));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(48 - LZ), getI32Imm(LZ));
+ }
+ // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
+ // {ones}{15-bit value}{ones}
+ // We can take advantage of LI's sign-extension semantics to generate leading
+ // ones, and then use RLDICL to mask off the ones in left sides (if required)
+ // after rotation.
+ //
+ // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
+ // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
+ // +------------------------+ +------------------------+
+ // 63 0 63 0
+ // Imm (Imm >> TO) & 0xffff
+ // +----sext-----|--16-bit--+ +LZ|---------------------+
+ // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
+ // +------------------------+ +------------------------+
+ // 63 0 63 0
+ // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
+ if ((LZ + FO + TO) > 48) {
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm((Imm >> TO) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(TO), getI32Imm(LZ));
+ }
+ // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
+ // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
+ // value, we can use LI for Lo16 without generating leading ones then add the
+ // Hi16(in Lo32).
+ if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm(Lo32 & 0xffff));
+ return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Lo32 >> 16));
+ }
+ // 2-6) Patterns : {******}{49 zeros}{******}
+ // {******}{49 ones}{******}
+ // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
+ // bits remain on both sides. Rotate right the Imm to construct an int<16>
+ // value, use LI for int<16> value and then use RLDICL without mask to rotate
+ // it back.
+ //
+ // 1) findContiguousZerosAtLeast(Imm, 49)
+ // +------|--zeros-|------+ +---ones--||---15 bit--+
+ // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
+ // +----------------------+ +----------------------+
+ // 63 0 63 0
+ //
+ // 2) findContiguousZerosAtLeast(~Imm, 49)
+ // +------|--ones--|------+ +---ones--||---15 bit--+
+ // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
+ // +----------------------+ +----------------------+
+ // 63 0 63 0
+ if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
+ (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
+ uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift));
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm(RotImm & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Shift), getI32Imm(0));
+ }
+
+ // Following patterns use 3 instructions to materialize the Imm.
+ InstCnt = 3;
+ // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
+ // {zeros}{31-bit value}{zeros}
+ // {zeros}{ones}{31-bit value}
+ // {ones}{31-bit value}{zeros}
+ // We can take advantage of LIS's sign-extension semantics to generate leading
+ // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
+ // ones in both sides after rotation.
+ if ((LZ + FO + TZ) > 32) {
+ uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
+ unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm((Imm >> TZ) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(TZ), getI32Imm(LZ));
+ }
+ // 3-2) Pattern : {zeros}{31-bit value}{ones}
+ // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,
+ // therefore we can take advantage of LIS's sign-extension semantics, add
+ // the remaining bits with ORI, and then mask them off after rotation.
+ // This is similar to Pattern 2-3, please refer to the diagram there.
+ if ((LZ + TO) > 32) {
+ // Since the immediates with (LZ > 32) have been handled by previous
+ // patterns, here we have (LZ <= 32) to make sure we will not shift right
+ // the Imm by a negative value.
+ assert(LZ <= 32 && "Unexpected shift value.");
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
+ getI32Imm((Imm >> (48 - LZ)) & 0xffff));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm((Imm >> (32 - LZ)) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(32 - LZ), getI32Imm(LZ));
+ }
+ // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
+ // {ones}{31-bit value}{ones}
+ // We can take advantage of LIS's sign-extension semantics to generate leading
+ // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
+ // ones in left sides (if required) after rotation.
+ // This is similar to Pattern 2-4, please refer to the diagram there.
+ if ((LZ + FO + TO) > 32) {
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
+ getI32Imm((Imm >> (TO + 16)) & 0xffff));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm((Imm >> TO) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(TO), getI32Imm(LZ));
+ }
+ // 3-4) Patterns : High word == Low word
+ if (Hi32 == Lo32) {
+ // Handle the first 32 bits.
+ uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
+ unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Lo32 & 0xffff));
+ // Use rldimi to insert the Low word into High word.
+ SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
+ getI32Imm(0)};
+ return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+ }
+ // 3-5) Patterns : {******}{33 zeros}{******}
+ // {******}{33 ones}{******}
+ // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
+ // bits remain on both sides. Rotate right the Imm to construct an int<32>
+ // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
+ // rotate it back.
+ // This is similar to Pattern 2-6, please refer to the diagram there.
+ if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
+ (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
+ uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift));
+ uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
+ unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(RotImm & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Shift), getI32Imm(0));
+ }
+
+ InstCnt = 0;
+ return nullptr;
+}
+
+static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
+ unsigned *InstCnt = nullptr) {
+ unsigned InstCntDirect = 0;
+ // No more than 3 instructions is used if we can select the i64 immediate
+ // directly.
+ SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
+ if (Result) {
+ if (InstCnt)
+ *InstCnt = InstCntDirect;
+ return Result;
+ }
+ auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+ };
+ // Handle the upper 32 bit value.
+ Result =
+ selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
+ // Add in the last bits as required.
+ if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
+ Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi16));
+ ++InstCntDirect;
+ }
+ if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Lo16));
+ ++InstCntDirect;
+ }
+ if (InstCnt)
+ *InstCnt = InstCntDirect;
+ return Result;
+}
+
// Select a 64-bit constant.
static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
SDLoc dl(N);
@@ -1253,6 +1218,7 @@ class BitPermutationSelector {
}
break;
case ISD::SHL:
+ case PPCISD::SHL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
@@ -1268,6 +1234,7 @@ class BitPermutationSelector {
}
break;
case ISD::SRL:
+ case PPCISD::SRL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
@@ -2147,11 +2114,14 @@ class BitPermutationSelector {
unsigned NumAndInsts = (unsigned) NeedsRotate +
(unsigned) (bool) Res;
+ unsigned NumOfSelectInsts = 0;
+ selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
+ assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
if (Use32BitInsts)
NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
(unsigned) (ANDIMask != 0 && ANDISMask != 0);
else
- NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
+ NumAndInsts += NumOfSelectInsts + /* and */ 1;
unsigned NumRLInsts = 0;
bool FirstBG = true;
@@ -2375,12 +2345,14 @@ class BitPermutationSelector {
Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
} else {
- if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
-
- SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
- Res =
- SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
- ExtendToInt64(Res, dl), MaskVal), 0);
+ unsigned NumOfSelectInsts = 0;
+ SDValue MaskVal =
+ SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
+ Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
+ ExtendToInt64(Res, dl), MaskVal),
+ 0);
+ if (InstCnt)
+ *InstCnt += NumOfSelectInsts + /* and */ 1;
}
}
@@ -2411,7 +2383,7 @@ class BitPermutationSelector {
}
void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
- BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
+ erase_if(BitGroups, F);
}
SmallVector<ValueBit, 64> Bits;
@@ -3661,6 +3633,12 @@ bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
return false;
+ // For POWER10, it is more profitable to use the set boolean extension
+ // instructions rather than the integer compare elimination codegen.
+ // Users can override this via the command line option, `--ppc-gpr-icmps`.
+ if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
+ return false;
+
switch (N->getOpcode()) {
default: break;
case ISD::ZERO_EXTEND:
@@ -3708,7 +3686,7 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
/// SelectCC - Select a comparison of the specified values with the specified
/// condition code, returning the CR# of the expression.
SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- const SDLoc &dl) {
+ const SDLoc &dl, SDValue Chain) {
// Always select the LHS.
unsigned Opc;
@@ -3861,7 +3839,12 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
assert(Subtarget->hasVSX() && "__float128 requires VSX");
Opc = PPC::XSCMPUQP;
}
- return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+ if (Chain)
+ return SDValue(
+ CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
+ 0);
+ else
+ return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
@@ -3936,7 +3919,8 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
-// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
+// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
+// and v4f32).
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
bool HasVSX, bool &Swap, bool &Negate) {
Swap = false;
@@ -4017,6 +4001,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPEQUW;
else if (VecVT == MVT::v2i64)
return PPC::VCMPEQUD;
+ else if (VecVT == MVT::v1i128)
+ return PPC::VCMPEQUQ;
break;
case ISD::SETGT:
if (VecVT == MVT::v16i8)
@@ -4027,6 +4013,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTSW;
else if (VecVT == MVT::v2i64)
return PPC::VCMPGTSD;
+ else if (VecVT == MVT::v1i128)
+ return PPC::VCMPGTSQ;
break;
case ISD::SETUGT:
if (VecVT == MVT::v16i8)
@@ -4037,6 +4025,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTUW;
else if (VecVT == MVT::v2i64)
return PPC::VCMPGTUD;
+ else if (VecVT == MVT::v1i128)
+ return PPC::VCMPGTUQ;
break;
default:
break;
@@ -4048,17 +4038,23 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
SDLoc dl(N);
unsigned Imm;
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ bool IsStrict = N->isStrictFPOpcode();
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
EVT PtrVT =
CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+
+ SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
+ SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
- if (!Subtarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) {
+ if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
// We can codegen setcc op, imm very efficiently compared to a brcond.
// Check for those cases here.
// setcc op, 0
if (Imm == 0) {
- SDValue Op = N->getOperand(0);
+ SDValue Op = LHS;
switch (CC) {
default: break;
case ISD::SETEQ: {
@@ -4093,7 +4089,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
}
}
} else if (Imm == ~0U) { // setcc op, -1
- SDValue Op = N->getOperand(0);
+ SDValue Op = LHS;
switch (CC) {
default: break;
case ISD::SETEQ:
@@ -4136,13 +4132,10 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
}
}
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
- if (LHS.getValueType().isVector()) {
- if (Subtarget->hasQPX() || Subtarget->hasSPE())
+ if (!IsStrict && LHS.getValueType().isVector()) {
+ if (Subtarget->hasSPE())
return false;
EVT VecVT = LHS.getValueType();
@@ -4169,7 +4162,9 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
bool Inv;
unsigned Idx = getCRIdxForSetCC(CC, Inv);
- SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
+ SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
+ if (IsStrict)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
SDValue IntCR;
// SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
@@ -4272,8 +4267,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
(FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
return false;
- bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
- SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
+ SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
+ ? FalseRes
+ : FalseRes.getOperand(0);
+ bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
if (SetOrSelCC.getOpcode() != ISD::SETCC &&
SetOrSelCC.getOpcode() != ISD::SELECT_CC)
return false;
@@ -4382,6 +4379,81 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
return true;
}
+// Return true if it's a software square-root/divide operand.
+static bool isSWTestOp(SDValue N) {
+ if (N.getOpcode() == PPCISD::FTSQRT)
+ return true;
+ if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
+ return false;
+ switch (N.getConstantOperandVal(0)) {
+ case Intrinsic::ppc_vsx_xvtdivdp:
+ case Intrinsic::ppc_vsx_xvtdivsp:
+ case Intrinsic::ppc_vsx_xvtsqrtdp:
+ case Intrinsic::ppc_vsx_xvtsqrtsp:
+ return true;
+ }
+ return false;
+}
+
+bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
+ assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
+ // We are looking for following patterns, where `truncate to i1` actually has
+ // the same semantic with `and 1`.
+ // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
+ // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
+ // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
+ // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
+ // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
+ // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
+ // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
+ // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ if (CC != ISD::SETEQ && CC != ISD::SETNE)
+ return false;
+
+ SDValue CmpRHS = N->getOperand(3);
+ if (!isa<ConstantSDNode>(CmpRHS) ||
+ cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
+ return false;
+
+ SDValue CmpLHS = N->getOperand(2);
+ if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
+ return false;
+
+ unsigned PCC = 0;
+ bool IsCCNE = CC == ISD::SETNE;
+ if (CmpLHS.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(CmpLHS.getOperand(1)))
+ switch (CmpLHS.getConstantOperandVal(1)) {
+ case 1:
+ PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
+ break;
+ case 2:
+ PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 4:
+ PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
+ break;
+ case 8:
+ PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ default:
+ return false;
+ }
+ else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
+ CmpLHS.getValueType() == MVT::i1)
+ PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
+
+ if (PCC) {
+ SDLoc dl(N);
+ SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
+ N->getOperand(0)};
+ CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
+ return true;
+ }
+ return false;
+}
+
bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
unsigned Imm;
@@ -4661,7 +4733,48 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ if (!Subtarget->isISA3_1())
+ break;
+ unsigned Opcode = 0;
+ switch (N->getConstantOperandVal(0)) {
+ default:
+ break;
+ case Intrinsic::ppc_altivec_vstribr_p:
+ Opcode = PPC::VSTRIBR_rec;
+ break;
+ case Intrinsic::ppc_altivec_vstribl_p:
+ Opcode = PPC::VSTRIBL_rec;
+ break;
+ case Intrinsic::ppc_altivec_vstrihr_p:
+ Opcode = PPC::VSTRIHR_rec;
+ break;
+ case Intrinsic::ppc_altivec_vstrihl_p:
+ Opcode = PPC::VSTRIHL_rec;
+ break;
+ }
+ if (!Opcode)
+ break;
+
+ // Generate the appropriate vector string isolate intrinsic to match.
+ EVT VTs[] = {MVT::v16i8, MVT::Glue};
+ SDValue VecStrOp =
+ SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
+ // Vector string isolate instructions update the EQ bit of CR6.
+ // Generate a SETBC instruction to extract the bit and place it in a GPR.
+ SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
+ SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
+ SDValue CRBit = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
+ 0);
+ CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
+ return;
+ }
+
case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
if (trySETCC(N))
return;
break;
@@ -4813,8 +4926,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
- case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
- case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
@@ -4961,6 +5072,32 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
+ case ISD::MUL: {
+ SDValue Op1 = N->getOperand(1);
+ if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64)
+ break;
+
+ // If the multiplier fits int16, we can handle it with mulli.
+ int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
+ unsigned Shift = countTrailingZeros<uint64_t>(Imm);
+ if (isInt<16>(Imm) || !Shift)
+ break;
+
+ // If the shifted value fits int16, we can do this transformation:
+ // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
+ // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
+ uint64_t ImmSh = Imm >> Shift;
+ if (isInt<16>(ImmSh)) {
+ uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
+ SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
+ SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
+ N->getOperand(0), SDImm);
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0),
+ getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl));
+ return;
+ }
+ break;
+ }
// FIXME: Remove this once the ANDI glue bug is fixed:
case PPCISD::ANDI_rec_1_EQ_BIT:
case PPCISD::ANDI_rec_1_GT_BIT: {
@@ -5095,12 +5232,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectCCOp = PPC::SELECT_CC_F16;
else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
- else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
- SelectCCOp = PPC::SELECT_CC_QFRC;
- else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
- SelectCCOp = PPC::SELECT_CC_QSRC;
- else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
- SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
@@ -5192,6 +5323,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case ISD::BR_CC: {
+ if (tryFoldSWTestBRCC(N))
+ return;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
unsigned PCC =
getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
@@ -5763,7 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
User->getMachineOpcode() != PPC::SELECT_I8)
return false;
+ SDNode *Op1 = User->getOperand(1).getNode();
SDNode *Op2 = User->getOperand(2).getNode();
+ // If we have a degenerate select with two equal operands, swapping will
+ // not do anything, and we may run into an infinite loop.
+ if (Op1 == Op2)
+ return false;
+
if (!Op2->isMachineOpcode())
return false;
@@ -5856,9 +5995,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
- case PPC::SELECT_QFRC:
- case PPC::SELECT_QSRC:
- case PPC::SELECT_QBRC:
case PPC::SELECT_SPE:
case PPC::SELECT_SPE4:
case PPC::SELECT_VRRC:
@@ -6177,9 +6313,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
- case PPC::SELECT_QFRC:
- case PPC::SELECT_QSRC:
- case PPC::SELECT_QBRC:
case PPC::SELECT_SPE:
case PPC::SELECT_SPE4:
case PPC::SELECT_VRRC:
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 641b2facdc41..26dc3afc899e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -74,6 +74,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
@@ -120,6 +121,11 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
cl::desc("use absolute jump tables on ppc"), cl::Hidden);
+// TODO - Remove this option if soft fp128 has been fully supported .
+static cl::opt<bool>
+ EnableSoftFP128("enable-soft-fp128",
+ cl::desc("temp option to enable soft fp128"), cl::Hidden);
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
@@ -145,7 +151,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (!useSoftFloat()) {
if (hasSPE()) {
addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
- addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
+ // EFPU2 APU only supports f32
+ if (!Subtarget.hasEFPU2())
+ addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
} else {
addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
@@ -159,6 +167,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+ // Custom lower inline assembly to check for special registers.
+ setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+ setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
+
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@@ -215,13 +227,36 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (isPPC64 || Subtarget.hasFPCVT()) {
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
+
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
+ AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
+ AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
} else {
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
}
@@ -247,6 +282,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// PPC (the libcall is not available).
setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
// We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
@@ -299,8 +336,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
- if (Subtarget.hasVSX())
- setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal);
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
+ }
if (Subtarget.hasFSQRT()) {
setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
@@ -338,6 +377,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FMA , MVT::f32, Legal);
}
+ if (Subtarget.hasSPE())
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
@@ -415,6 +457,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (!Subtarget.useCRBits())
setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ if (Subtarget.hasFPU()) {
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
+
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
+ }
+
// PowerPC does not have BRCOND which requires SetCC
if (!Subtarget.useCRBits())
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
@@ -431,9 +483,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
} else {
// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
// PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
}
@@ -561,36 +616,56 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
+
if (Subtarget.has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
// This is just the low 32 bits of a (signed) fp->i64 conversion.
// We cannot do this with Promote because i64 is not a legal type.
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
+ if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
+ }
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
if (Subtarget.hasSPE()) {
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- } else
+ } else {
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
}
// With the instructions enabled under FPCVT, we can do everything.
if (Subtarget.hasFPCVT()) {
if (Subtarget.has64BitSupport()) {
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
}
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
@@ -613,6 +688,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
+ // PowerPC has better expansions for funnel shifts than the generic
+ // TargetLowering::expandFunnelShift.
+ if (Subtarget.has64BitSupport()) {
+ setOperationAction(ISD::FSHL, MVT::i64, Custom);
+ setOperationAction(ISD::FSHR, MVT::i64, Custom);
+ }
+ setOperationAction(ISD::FSHL, MVT::i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::i32, Custom);
+
if (Subtarget.hasVSX()) {
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
@@ -745,9 +829,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
}
- for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
- setOperationAction(ISD::ABS, VT, Custom);
-
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
@@ -767,6 +848,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v4i32,
Subtarget.useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
@@ -776,11 +861,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
- // Without hasP8Altivec set, v2i64 SMAX isn't available.
- // But ABS custom lowering requires SMAX support.
- if (!Subtarget.hasP8Altivec())
- setOperationAction(ISD::ABS, MVT::v2i64, Expand);
-
// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
// With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
@@ -809,6 +889,27 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
else
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ if (Subtarget.isISA3_1()) {
+ setOperationAction(ISD::MUL, MVT::v2i64, Legal);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
+ setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
+ setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
+ setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
+ setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
+ setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
+ setOperationAction(ISD::UREM, MVT::v2i64, Legal);
+ setOperationAction(ISD::SREM, MVT::v2i64, Legal);
+ setOperationAction(ISD::UREM, MVT::v4i32, Legal);
+ setOperationAction(ISD::SREM, MVT::v4i32, Legal);
+ setOperationAction(ISD::UREM, MVT::v1i128, Legal);
+ setOperationAction(ISD::SREM, MVT::v1i128, Legal);
+ setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
+ setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
+ setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
+ }
+
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -920,7 +1021,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SUB, MVT::v2i64, Expand);
}
- setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
+ if (Subtarget.isISA3_1())
+ setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
+ else
+ setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
@@ -929,6 +1033,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
@@ -937,6 +1045,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Custom handling for partial vectors of integers converted to
// floating point. We already have optimal handling for v2i32 through
// the DAG combine, so those aren't necessary.
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
@@ -968,7 +1084,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
@@ -982,7 +1098,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
@@ -1065,6 +1181,48 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
+ } else if (Subtarget.hasAltivec() && EnableSoftFP128) {
+ addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
+
+ for (MVT FPT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
+
+ setOperationAction(ISD::LOAD, MVT::f128, Promote);
+ setOperationAction(ISD::STORE, MVT::f128, Promote);
+
+ AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
+ AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
+
+ // Set FADD/FSUB as libcall to avoid the legalizer to expand the
+ // fp_to_uint and int_to_fp.
+ setOperationAction(ISD::FADD, MVT::f128, LibCall);
+ setOperationAction(ISD::FSUB, MVT::f128, LibCall);
+
+ setOperationAction(ISD::FMUL, MVT::f128, Expand);
+ setOperationAction(ISD::FDIV, MVT::f128, Expand);
+ setOperationAction(ISD::FNEG, MVT::f128, Expand);
+ setOperationAction(ISD::FABS, MVT::f128, Expand);
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f128, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f128, Expand);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
+
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+
+ // Expand the fp_extend if the target type is fp128.
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
+
+ // Expand the fp_round if the source type is fp128.
+ for (MVT VT : {MVT::f32, MVT::f64}) {
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
+ }
}
if (Subtarget.hasP9Altivec()) {
@@ -1081,164 +1239,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
}
- if (Subtarget.hasQPX()) {
- setOperationAction(ISD::FADD, MVT::v4f64, Legal);
- setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
- setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
- setOperationAction(ISD::FREM, MVT::v4f64, Expand);
-
- setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
- setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
-
- setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
- setOperationAction(ISD::STORE , MVT::v4f64, Custom);
-
- setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
-
- if (!Subtarget.useCRBits())
- setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
- setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
- setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
- setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
-
- setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
- setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
-
- setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
-
- setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
- setOperationAction(ISD::FABS , MVT::v4f64, Legal);
- setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
- setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
- setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
- setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
- setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
- setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
- setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
- setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
-
- setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
-
- setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
- setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
-
- addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
-
- setOperationAction(ISD::FADD, MVT::v4f32, Legal);
- setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
- setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
- setOperationAction(ISD::FREM, MVT::v4f32, Expand);
-
- setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
- setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
-
- setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
- setOperationAction(ISD::STORE , MVT::v4f32, Custom);
-
- if (!Subtarget.useCRBits())
- setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
- setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
- setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
-
- setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
- setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
-
- setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
- setOperationAction(ISD::FABS , MVT::v4f32, Legal);
- setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
- setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
- setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
- setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
- setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
- setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
- setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
- setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
-
- setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
-
- setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
- setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
-
- addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
-
- setOperationAction(ISD::AND , MVT::v4i1, Legal);
- setOperationAction(ISD::OR , MVT::v4i1, Legal);
- setOperationAction(ISD::XOR , MVT::v4i1, Legal);
-
- if (!Subtarget.useCRBits())
- setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
- setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
-
- setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
- setOperationAction(ISD::STORE , MVT::v4i1, Custom);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
- setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
-
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
-
- addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
-
- setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
- setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
-
- setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
- setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
-
- setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
-
- // These need to set FE_INEXACT, and so cannot be vectorized here.
- setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
- setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
-
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
- setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
-
- setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
- setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
- } else {
- setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
- setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
-
- setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
- setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
- }
-
- // TODO: Handle constrained floating-point operations of v4f64
+ if (Subtarget.pairedVectorMemops()) {
+ addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
+ setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v256i1, Custom);
+ }
+ if (Subtarget.hasMMA()) {
+ addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
+ setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v512i1, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
}
if (Subtarget.has64BitSupport())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ if (Subtarget.isISA3_1())
+ setOperationAction(ISD::SRA, MVT::v1i128, Legal);
+
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
if (!isPPC64) {
@@ -1315,8 +1333,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::POW_F128, "powf128");
setLibcallName(RTLIB::FMIN_F128, "fminf128");
setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
- setLibcallName(RTLIB::POWI_F128, "__powikf2");
setLibcallName(RTLIB::REM_F128, "fmodf128");
+ setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
+ setLibcallName(RTLIB::CEIL_F128, "ceilf128");
+ setLibcallName(RTLIB::FLOOR_F128, "floorf128");
+ setLibcallName(RTLIB::TRUNC_F128, "truncf128");
+ setLibcallName(RTLIB::ROUND_F128, "roundf128");
+ setLibcallName(RTLIB::LROUND_F128, "lroundf128");
+ setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
+ setLibcallName(RTLIB::RINT_F128, "rintf128");
+ setLibcallName(RTLIB::LRINT_F128, "lrintf128");
+ setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
+ setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
+ setLibcallName(RTLIB::FMA_F128, "fmaf128");
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
@@ -1379,6 +1408,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxLoadsPerMemcmpOptSize = 4;
}
+ IsStrictFPEnabled = true;
+
// Let the subtarget (CPU) decide if a predictable select is more expensive
// than the corresponding branch. This information is used in CGP to decide
// when to convert selects into branches.
@@ -1421,8 +1452,8 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
- if (Subtarget.hasAltivec() || Subtarget.hasQPX())
- getMaxByValAlign(Ty, Alignment, Subtarget.hasQPX() ? Align(32) : Align(16));
+ if (Subtarget.hasAltivec())
+ getMaxByValAlign(Ty, Alignment, Align(16));
return Alignment.value();
}
@@ -1438,16 +1469,6 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
-/// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
-/// type is cheaper than a multiply followed by a shift.
-/// This is true for words and doublewords on 64-bit PowerPC.
-bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const {
- if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) ||
- isOperationLegal(ISD::MULHU, Type)))
- return true;
- return TargetLowering::isMulhCheaperThanMulShift(Type);
-}
-
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -1468,6 +1489,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "PPCISD::FP_TO_SINT_IN_VSR";
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
+ case PPCISD::FTSQRT:
+ return "PPCISD::FTSQRT";
+ case PPCISD::FSQRT:
+ return "PPCISD::FSQRT";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
@@ -1515,7 +1540,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ANDI_rec_1_GT_BIT:
return "PPCISD::ANDI_rec_1_GT_BIT";
case PPCISD::VCMP: return "PPCISD::VCMP";
- case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
@@ -1552,6 +1577,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::PADDI_DTPREL:
+ return "PPCISD::PADDI_DTPREL";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
case PPCISD::SC: return "PPCISD::SC";
case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
@@ -1560,12 +1587,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
case PPCISD::VABSD: return "PPCISD::VABSD";
- case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
- case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
- case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
- case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
- case PPCISD::QBFLT: return "PPCISD::QBFLT";
- case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
@@ -1573,8 +1594,35 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
+ case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
+ return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
+ case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
+ return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
+ case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
+ case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
+ case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
+ case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
+ case PPCISD::STRICT_FADDRTZ:
+ return "PPCISD::STRICT_FADDRTZ";
+ case PPCISD::STRICT_FCTIDZ:
+ return "PPCISD::STRICT_FCTIDZ";
+ case PPCISD::STRICT_FCTIWZ:
+ return "PPCISD::STRICT_FCTIWZ";
+ case PPCISD::STRICT_FCTIDUZ:
+ return "PPCISD::STRICT_FCTIDUZ";
+ case PPCISD::STRICT_FCTIWUZ:
+ return "PPCISD::STRICT_FCTIWUZ";
+ case PPCISD::STRICT_FCFID:
+ return "PPCISD::STRICT_FCFID";
+ case PPCISD::STRICT_FCFIDU:
+ return "PPCISD::STRICT_FCFIDU";
+ case PPCISD::STRICT_FCFIDS:
+ return "PPCISD::STRICT_FCFIDS";
+ case PPCISD::STRICT_FCFIDUS:
+ return "PPCISD::STRICT_FCFIDUS";
+ case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
}
return nullptr;
}
@@ -1584,9 +1632,6 @@ EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
if (!VT.isVector())
return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
- if (Subtarget.hasQPX())
- return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
-
return VT.changeVectorElementTypeToInteger();
}
@@ -2360,36 +2405,6 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
return SDValue();
}
-/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
-/// amount, otherwise return -1.
-int PPC::isQVALIGNIShuffleMask(SDNode *N) {
- EVT VT = N->getValueType(0);
- if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
- return -1;
-
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-
- // Find the first non-undef value in the shuffle mask.
- unsigned i;
- for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
- /*search*/;
-
- if (i == 4) return -1; // all undef.
-
- // Otherwise, check to see if the rest of the elements are consecutively
- // numbered from this value.
- unsigned ShiftAmt = SVOp->getMaskElt(i);
- if (ShiftAmt < i) return -1;
- ShiftAmt -= i;
-
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 4; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
- return -1;
-
- return ShiftAmt;
-}
-
//===----------------------------------------------------------------------===//
// Addressing Mode Selection
//===----------------------------------------------------------------------===//
@@ -2431,6 +2446,20 @@ bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
return false;
}
+/// isIntS34Immediate - This method tests if value of node given can be
+/// accurately represented as a sign extension from a 34-bit value. If so,
+/// this returns true and the immediate.
+bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
+
+ Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+ return isInt<34>(Imm);
+}
+bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
+ return isIntS34Immediate(Op.getNode(), Imm);
+}
+
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
@@ -2631,6 +2660,55 @@ bool PPCTargetLowering::SelectAddressRegImm(
return true; // [r+0]
}
+/// Similar to the 16-bit case but for instructions that take a 34-bit
+/// displacement field (prefixed loads/stores).
+bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // Only on 64-bit targets.
+ if (N.getValueType() != MVT::i64)
+ return false;
+
+ SDLoc dl(N);
+ int64_t Imm = 0;
+
+ if (N.getOpcode() == ISD::ADD) {
+ if (!isIntS34Immediate(N.getOperand(1), Imm))
+ return false;
+ Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N.getOperand(0);
+ return true;
+ }
+
+ if (N.getOpcode() == ISD::OR) {
+ if (!isIntS34Immediate(N.getOperand(1), Imm))
+ return false;
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
+ if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
+ return false;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
+ return true;
+ }
+
+ if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
+ Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
+ Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
+ return true;
+ }
+
+ return false;
+}
+
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
@@ -2760,16 +2838,9 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
return false;
}
- // PowerPC doesn't have preinc load/store instructions for vectors (except
- // for QPX, which does have preinc r+r forms).
- if (VT.isVector()) {
- if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
- return false;
- } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
- AM = ISD::PRE_INC;
- return true;
- }
- }
+ // PowerPC doesn't have preinc load/store instructions for vectors
+ if (VT.isVector())
+ return false;
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
// Common code will reject creating a pre-inc form if the base pointer
@@ -3064,6 +3135,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
TLSModel::Model Model = TM.getTLSModel(GV);
if (Model == TLSModel::LocalExec) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
+ SDValue MatAddr =
+ DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
+ }
+
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
@@ -3076,29 +3156,44 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::InitialExec) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
- SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TLS);
- SDValue GOTPtr;
- if (is64bit) {
- setUsesTOCBasePtr(DAG);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
- PtrVT, GOTReg, TGA);
+ bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
+ SDValue TGATLS = DAG.getTargetGlobalAddress(
+ GV, dl, PtrVT, 0,
+ IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
+ SDValue TPOffset;
+ if (IsPCRel) {
+ SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
+ TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
+ MachinePointerInfo());
} else {
- if (!TM.isPositionIndependent())
- GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
- else if (picLevel == PICLevel::SmallPIC)
- GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
- else
- GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ SDValue GOTPtr;
+ if (is64bit) {
+ setUsesTOCBasePtr(DAG);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr =
+ DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
+ } else {
+ if (!TM.isPositionIndependent())
+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+ else if (picLevel == PICLevel::SmallPIC)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
+ TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
}
- SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
- PtrVT, TGA, GOTPtr);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
if (Model == TLSModel::GeneralDynamic) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_GOT_TLSGD_PCREL_FLAG);
+ return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
+ }
+
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
@@ -3117,6 +3212,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::LocalDynamic) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_GOT_TLSLD_PCREL_FLAG);
+ SDValue MatPCRel =
+ DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
+ return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
+ }
+
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
@@ -3362,6 +3465,57 @@ SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
return Op.getOperand(0);
}
+SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
+
+ assert((Op.getOpcode() == ISD::INLINEASM ||
+ Op.getOpcode() == ISD::INLINEASM_BR) &&
+ "Expecting Inline ASM node.");
+
+ // If an LR store is already known to be required then there is not point in
+ // checking this ASM as well.
+ if (MFI.isLRStoreRequired())
+ return Op;
+
+ // Inline ASM nodes have an optional last operand that is an incoming Flag of
+ // type MVT::Glue. We want to ignore this last operand if that is the case.
+ unsigned NumOps = Op.getNumOperands();
+ if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
+ --NumOps;
+
+ // Check all operands that may contain the LR.
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default:
+ llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Mem:
+ i += NumVals;
+ break;
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
+ if (Reg != PPC::LR && Reg != PPC::LR8)
+ continue;
+ MFI.setLRStoreRequired();
+ return Op;
+ }
+ break;
+ }
+ }
+ }
+
+ return Op;
+}
+
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
if (Subtarget.isAIXABI())
@@ -3491,11 +3645,6 @@ static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
PPC::F11, PPC::F12, PPC::F13};
-/// QFPR - The set of QPX registers that should be allocated for arguments.
-static const MCPhysReg QFPR[] = {
- PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
-
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
@@ -3525,10 +3674,6 @@ static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
Alignment = Align(16);
- // QPX vector types stored in double-precision are padded to a 32 byte
- // boundary.
- else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
- Alignment = Align(32);
// ByVal parameters are aligned as requested.
if (Flags.isByVal()) {
@@ -3560,14 +3705,11 @@ static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
/// stack slot (instead of being passed in registers). ArgOffset,
/// AvailableFPRs, and AvailableVRs must hold the current argument
/// position, and will be updated to account for this argument.
-static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
- ISD::ArgFlagsTy Flags,
- unsigned PtrByteSize,
- unsigned LinkageSize,
- unsigned ParamAreaSize,
- unsigned &ArgOffset,
+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize, unsigned LinkageSize,
+ unsigned ParamAreaSize, unsigned &ArgOffset,
unsigned &AvailableFPRs,
- unsigned &AvailableVRs, bool HasQPX) {
+ unsigned &AvailableVRs) {
bool UseMemory = false;
// Respect alignment of argument on the stack.
@@ -3591,11 +3733,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
// However, if the argument is actually passed in an FPR or a VR,
// we don't use memory after all.
if (!Flags.isByVal()) {
- if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
- // QPX registers overlap with the scalar FP registers.
- (HasQPX && (ArgVT == MVT::v4f32 ||
- ArgVT == MVT::v4f64 ||
- ArgVT == MVT::v4i1)))
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
if (AvailableFPRs > 0) {
--AvailableFPRs;
return false;
@@ -3630,11 +3768,8 @@ SDValue PPCTargetLowering::LowerFormalArguments(
if (Subtarget.is64BitELFABI())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
- if (Subtarget.is32BitELFABI())
- return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
- InVals);
-
- return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ assert(Subtarget.is32BitELFABI());
+ return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
}
@@ -3734,18 +3869,12 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
RC = &PPC::VRRCRegClass;
break;
case MVT::v4f32:
- RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
+ RC = &PPC::VRRCRegClass;
break;
case MVT::v2f64:
case MVT::v2i64:
RC = &PPC::VRRCRegClass;
break;
- case MVT::v4f64:
- RC = &PPC::QFRCRegClass;
- break;
- case MVT::v4i1:
- RC = &PPC::QBRCRegClass;
- break;
}
SDValue ArgValue;
@@ -3944,7 +4073,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
- const unsigned Num_QFPR_Regs = Num_FPR_Regs;
// Do a first pass over the arguments to determine whether the ABI
// guarantees that our caller has allocated the parameter save area
@@ -3963,8 +4091,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytes, AvailableFPRs, AvailableVRs,
- Subtarget.hasQPX()))
+ NumBytes, AvailableFPRs, AvailableVRs))
HasParameterArea = true;
}
@@ -3974,7 +4101,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned ArgOffset = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- unsigned &QFPR_idx = FPR_idx;
SmallVector<SDValue, 8> MemOps;
Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
unsigned CurArgIdx = 0;
@@ -4217,51 +4343,20 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v2i64:
case MVT::v1i128:
case MVT::f128:
- if (!Subtarget.hasQPX()) {
- // These can be scalar arguments or elements of a vector array type
- // passed directly. The latter are used to implement ELFv2 homogenous
- // vector aggregates.
- if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++VR_idx;
- } else {
- if (CallConv == CallingConv::Fast)
- ComputeArgOffset();
- needsLoad = true;
- }
- if (CallConv != CallingConv::Fast || needsLoad)
- ArgOffset += 16;
- break;
- } // not QPX
-
- assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
- "Invalid QPX parameter type");
- LLVM_FALLTHROUGH;
-
- case MVT::v4f64:
- case MVT::v4i1:
- // QPX vectors are treated like their scalar floating-point subregisters
- // (except that they're larger).
- unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
- if (QFPR_idx != Num_QFPR_Regs) {
- const TargetRegisterClass *RC;
- switch (ObjectVT.getSimpleVT().SimpleTy) {
- case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
- case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
- default: RC = &PPC::QBRCRegClass; break;
- }
-
- unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++QFPR_idx;
+ ++VR_idx;
} else {
if (CallConv == CallingConv::Fast)
ComputeArgOffset();
needsLoad = true;
}
if (CallConv != CallingConv::Fast || needsLoad)
- ArgOffset += Sz;
+ ArgOffset += 16;
break;
}
@@ -4328,366 +4423,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
return Chain;
}
-SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- // TODO: add description of PPC stack frame format, or at least some docs.
- //
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo &MFI = MF.getFrameInfo();
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
-
- EVT PtrVT = getPointerTy(MF.getDataLayout());
- bool isPPC64 = PtrVT == MVT::i64;
- // Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
- (CallConv == CallingConv::Fast));
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
- unsigned ArgOffset = LinkageSize;
- // Area that is at least reserved in caller of this function.
- unsigned MinReservedArea = ArgOffset;
-
- static const MCPhysReg GPR_32[] = { // 32-bit registers.
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10,
- };
- static const MCPhysReg GPR_64[] = { // 64-bit registers.
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10,
- };
- static const MCPhysReg VR[] = {
- PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
- PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
- };
-
- const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
- const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
- const unsigned Num_VR_Regs = array_lengthof( VR);
-
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
-
- const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
-
- // In 32-bit non-varargs functions, the stack space for vectors is after the
- // stack space for non-vectors. We do not use this space unless we have
- // too many vectors to fit in registers, something that only occurs in
- // constructed examples:), but we have to walk the arglist to figure
- // that out...for the pathological case, compute VecArgOffset as the
- // start of the vector parameter area. Computing VecArgOffset is the
- // entire point of the following loop.
- unsigned VecArgOffset = ArgOffset;
- if (!isVarArg && !isPPC64) {
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
- ++ArgNo) {
- EVT ObjectVT = Ins[ArgNo].VT;
- ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
-
- if (Flags.isByVal()) {
- // ObjSize is the true size, ArgSize rounded up to multiple of regs.
- unsigned ObjSize = Flags.getByValSize();
- unsigned ArgSize =
- ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
- VecArgOffset += ArgSize;
- continue;
- }
-
- switch(ObjectVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unhandled argument type!");
- case MVT::i1:
- case MVT::i32:
- case MVT::f32:
- VecArgOffset += 4;
- break;
- case MVT::i64: // PPC64
- case MVT::f64:
- // FIXME: We are guaranteed to be !isPPC64 at this point.
- // Does MVT::i64 apply?
- VecArgOffset += 8;
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- // Nothing to do, we're only looking at Nonvector args here.
- break;
- }
- }
- }
- // We've found where the vector parameter area in memory is. Skip the
- // first 12 parameters; these don't use that memory.
- VecArgOffset = ((VecArgOffset+15)/16)*16;
- VecArgOffset += 12*16;
-
- // Add DAG nodes to load the arguments or copy them out of registers. On
- // entry to a function on PPC, the arguments start after the linkage area,
- // although the first ones are often in registers.
-
- SmallVector<SDValue, 8> MemOps;
- unsigned nAltivecParamsAtEnd = 0;
- Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
- unsigned CurArgIdx = 0;
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
- SDValue ArgVal;
- bool needsLoad = false;
- EVT ObjectVT = Ins[ArgNo].VT;
- unsigned ObjSize = ObjectVT.getSizeInBits()/8;
- unsigned ArgSize = ObjSize;
- ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
- if (Ins[ArgNo].isOrigArg()) {
- std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
- CurArgIdx = Ins[ArgNo].getOrigArgIndex();
- }
- unsigned CurArgOffset = ArgOffset;
-
- // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
- if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
- if (isVarArg || isPPC64) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += CalculateStackSlotSize(ObjectVT,
- Flags,
- PtrByteSize);
- } else nAltivecParamsAtEnd++;
- } else
- // Calculate min reserved area.
- MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
- Flags,
- PtrByteSize);
-
- // FIXME the codegen can be much improved in some cases.
- // We do not have to keep everything in memory.
- if (Flags.isByVal()) {
- assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
-
- // ObjSize is the true size, ArgSize rounded up to multiple of registers.
- ObjSize = Flags.getByValSize();
- ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
- // Objects of size 1 and 2 are right justified, everything else is
- // left justified. This means the memory address is adjusted forwards.
- if (ObjSize==1 || ObjSize==2) {
- CurArgOffset = CurArgOffset + (4 - ObjSize);
- }
- // The value of the object is its address.
- int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(FIN);
- if (ObjSize==1 || ObjSize==2) {
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg;
- if (isPPC64)
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- else
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
- SDValue Store =
- DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(&*FuncArg), ObjType);
- MemOps.push_back(Store);
- ++GPR_idx;
- }
-
- ArgOffset += PtrByteSize;
-
- continue;
- }
- for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
- // Store whatever pieces of the object are in registers
- // to memory. ArgOffset will be the address of the beginning
- // of the object.
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg;
- if (isPPC64)
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- else
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(&*FuncArg, j));
- MemOps.push_back(Store);
- ++GPR_idx;
- ArgOffset += PtrByteSize;
- } else {
- ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
- break;
- }
- }
- continue;
- }
-
- switch (ObjectVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unhandled argument type!");
- case MVT::i1:
- case MVT::i32:
- if (!isPPC64) {
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-
- if (ObjectVT == MVT::i1)
- ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
-
- ++GPR_idx;
- } else {
- needsLoad = true;
- ArgSize = PtrByteSize;
- }
- // All int arguments reserve stack space in the Darwin ABI.
- ArgOffset += PtrByteSize;
- break;
- }
- LLVM_FALLTHROUGH;
- case MVT::i64: // PPC64
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
-
- if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
- // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
- // value to MVT::i64 and then truncate to the correct register size.
- ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
-
- ++GPR_idx;
- } else {
- needsLoad = true;
- ArgSize = PtrByteSize;
- }
- // All int arguments reserve stack space in the Darwin ABI.
- ArgOffset += 8;
- break;
-
- case MVT::f32:
- case MVT::f64:
- // Every 4 bytes of argument space consumes one of the GPRs available for
- // argument passing.
- if (GPR_idx != Num_GPR_Regs) {
- ++GPR_idx;
- if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
- ++GPR_idx;
- }
- if (FPR_idx != Num_FPR_Regs) {
- unsigned VReg;
-
- if (ObjectVT == MVT::f32)
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
- else
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
-
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++FPR_idx;
- } else {
- needsLoad = true;
- }
-
- // All FP arguments reserve stack space in the Darwin ABI.
- ArgOffset += isPPC64 ? 8 : ObjSize;
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- // Note that vector arguments in registers don't reserve stack space,
- // except in varargs functions.
- if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- if (isVarArg) {
- while ((ArgOffset % 16) != 0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != Num_GPR_Regs)
- GPR_idx++;
- }
- ArgOffset += 16;
- GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
- }
- ++VR_idx;
- } else {
- if (!isVarArg && !isPPC64) {
- // Vectors go after all the nonvectors.
- CurArgOffset = VecArgOffset;
- VecArgOffset += 16;
- } else {
- // Vectors are aligned.
- ArgOffset = ((ArgOffset+15)/16)*16;
- CurArgOffset = ArgOffset;
- ArgOffset += 16;
- }
- needsLoad = true;
- }
- break;
- }
-
- // We need to load the argument to a virtual register if we determined above
- // that we ran out of physical registers of the appropriate type.
- if (needsLoad) {
- int FI = MFI.CreateFixedObject(ObjSize,
- CurArgOffset + (ArgSize - ObjSize),
- isImmutable);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
- }
-
- InVals.push_back(ArgVal);
- }
-
- // Allow for Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += 16*nAltivecParamsAtEnd;
- }
-
- // Area that is at least reserved in the caller of this function.
- MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
-
- // Set the size that is at least reserved in caller of this function. Tail
- // call optimized functions' reserved stack space needs to be aligned so that
- // taking the difference between two stack areas will result in an aligned
- // stack.
- MinReservedArea =
- EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
- FuncInfo->setMinReservedArea(MinReservedArea);
-
- // If the function takes variable number of arguments, make a frame index for
- // the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg) {
- int Depth = ArgOffset;
-
- FuncInfo->setVarArgsFrameIndex(
- MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
- Depth, true));
- SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
-
- // If this function is vararg, store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by dereferencing
- // the result of va_next.
- for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
- unsigned VReg;
-
- if (isPPC64)
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- else
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
-
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
- MemOps.push_back(Store);
- // Increment the address by four for the next argument to store
- SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
- FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
- }
- }
-
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
-
- return Chain;
-}
-
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
/// adjusted to accommodate the arguments for the tailcall.
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
@@ -4758,6 +4493,13 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
if (STICallee->isUsingPCRelativeCalls())
return false;
+ // If the GV is not a strong definition then we need to assume it can be
+ // replaced by another function at link time. The function that replaces
+ // it may not share the same TOC as the caller since the callee may be
+ // replaced by a PC Relative version of the same function.
+ if (!GV->isStrongDefinitionForLinker())
+ return false;
+
// The medium and large code models are expected to provide a sufficiently
// large TOC to provide all data addressing needs of a module with a
// single TOC.
@@ -4765,12 +4507,6 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
CodeModel::Large == TM.getCodeModel())
return true;
- // Otherwise we need to ensure callee and caller are in the same section,
- // since the linker may allocate multiple TOCs, and we don't know which
- // sections will belong to the same TOC base.
- if (!GV->isStrongDefinitionForLinker())
- return false;
-
// Any explicitly-specified sections and section prefixes must also match.
// Also, if we're using -ffunction-sections, then each function is always in
// a different section (the same is true for COMDAT functions).
@@ -4814,10 +4550,9 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
for (const ISD::OutputArg& Param : Outs) {
if (Param.Flags.isNest()) continue;
- if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
- PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytes, AvailableFPRs, AvailableVRs,
- Subtarget.hasQPX()))
+ if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
+ LinkageSize, ParamAreaSize, NumBytes,
+ AvailableFPRs, AvailableVRs))
return true;
}
return false;
@@ -5331,66 +5066,53 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
Subtarget.is32BitELFABI() && !isLocalCallee() &&
Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
- // On AIX, direct function calls reference the symbol for the function's
- // entry point, which is named by prepending a "." before the function's
- // C-linkage name.
- const auto getAIXFuncEntryPointSymbolSDNode =
- [&](StringRef FuncName, bool IsDeclaration,
- const XCOFF::StorageClass &SC) {
- auto &Context = DAG.getMachineFunction().getMMI().getContext();
+ const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
+ const TargetMachine &TM = Subtarget.getTargetMachine();
+ const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
+ MCSymbolXCOFF *S =
+ cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
- MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
- Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
-
- if (IsDeclaration && !S->hasRepresentedCsectSet()) {
- // On AIX, an undefined symbol needs to be associated with a
- // MCSectionXCOFF to get the correct storage mapping class.
- // In this case, XCOFF::XMC_PR.
- MCSectionXCOFF *Sec = Context.getXCOFFSection(
- S->getSymbolTableName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
- SectionKind::getMetadata());
- S->setRepresentedCsect(Sec);
- }
-
- MVT PtrVT =
- DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
- return DAG.getMCSymbol(S, PtrVT);
- };
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ return DAG.getMCSymbol(S, PtrVT);
+ };
if (isFunctionGlobalAddress(Callee)) {
- const GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
- const GlobalValue *GV = G->getGlobal();
-
- if (!Subtarget.isAIXABI())
- return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
- UsePlt ? PPCII::MO_PLT : 0);
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
- assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
- const GlobalObject *GO = cast<GlobalObject>(GV);
- const XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
- return getAIXFuncEntryPointSymbolSDNode(GO->getName(), GO->isDeclaration(),
- SC);
+ if (Subtarget.isAIXABI()) {
+ assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
+ return getAIXFuncEntryPointSymbolSDNode(GV);
+ }
+ return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
+ UsePlt ? PPCII::MO_PLT : 0);
}
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *SymName = S->getSymbol();
- if (!Subtarget.isAIXABI())
- return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
- UsePlt ? PPCII::MO_PLT : 0);
+ if (Subtarget.isAIXABI()) {
+ // If there exists a user-declared function whose name is the same as the
+ // ExternalSymbol's, then we pick up the user-declared version.
+ const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+ if (const Function *F =
+ dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
+ return getAIXFuncEntryPointSymbolSDNode(F);
+
+ // On AIX, direct function calls reference the symbol for the function's
+ // entry point, which is named by prepending a "." before the function's
+ // C-linkage name. A Qualname is returned here because an external
+ // function entry point is a csect with XTY_ER property.
+ const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
+ auto &Context = DAG.getMachineFunction().getMMI().getContext();
+ MCSectionXCOFF *Sec = Context.getXCOFFSection(
+ (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
+ SectionKind::getMetadata());
+ return Sec->getQualNameSymbol();
+ };
- // If there exists a user-declared function whose name is the same as the
- // ExternalSymbol's, then we pick up the user-declared version.
- const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
- if (const Function *F =
- dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) {
- const XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F);
- return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(),
- SC);
+ SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
}
-
- return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
+ return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
+ UsePlt ? PPCII::MO_PLT : 0);
}
// No transformation needed.
@@ -5735,19 +5457,15 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
CLI.NoMerge);
- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
- return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
- InVals, CB);
-
- if (Subtarget.isSVR4ABI())
- return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
- InVals, CB);
-
if (Subtarget.isAIXABI())
return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
InVals, CB);
- return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ assert(Subtarget.isSVR4ABI());
+ if (Subtarget.isPPC64())
+ return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
+ return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
InVals, CB);
}
@@ -6044,7 +5762,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned NumBytes = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- unsigned &QFPR_idx = FPR_idx;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -6058,7 +5775,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
const unsigned NumVRs = array_lengthof(VR);
- const unsigned NumQFPRs = NumFPRs;
// On ELFv2, we can avoid allocating the parameter area if all the arguments
// can be passed to the callee in registers.
@@ -6073,9 +5789,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
for (unsigned i = 0; i != NumOps; ++i) {
if (Outs[i].Flags.isNest()) continue;
if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
- PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytesTmp, AvailableFPRs, AvailableVRs,
- Subtarget.hasQPX()))
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytesTmp, AvailableFPRs, AvailableVRs))
HasParameterArea = true;
}
}
@@ -6123,20 +5838,11 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
continue;
break;
case MVT::v4f32:
- // When using QPX, this is handled like a FP register, otherwise, it
- // is an Altivec register.
- if (Subtarget.hasQPX()) {
- if (++NumFPRsUsed <= NumFPRs)
- continue;
- } else {
- if (++NumVRsUsed <= NumVRs)
- continue;
- }
+ if (++NumVRsUsed <= NumVRs)
+ continue;
break;
case MVT::f32:
case MVT::f64:
- case MVT::v4f64: // QPX
- case MVT::v4i1: // QPX
if (++NumFPRsUsed <= NumFPRs)
continue;
break;
@@ -6498,7 +6204,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v2i64:
case MVT::v1i128:
case MVT::f128:
- if (!Subtarget.hasQPX()) {
// These can be scalar arguments or elements of a vector array type
// passed directly. The latter are used to implement ELFv2 homogenous
// vector aggregates.
@@ -6554,63 +6259,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (!IsFastCall)
ArgOffset += 16;
break;
- } // not QPX
-
- assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
- "Invalid QPX parameter type");
-
- LLVM_FALLTHROUGH;
- case MVT::v4f64:
- case MVT::v4i1: {
- bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
- if (CFlags.IsVarArg) {
- assert(HasParameterArea &&
- "Parameter area must exist if we have a varargs call.");
- // We could elide this store in the case where the object fits
- // entirely in R registers. Maybe later.
- SDValue Store =
- DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Store);
- if (QFPR_idx != NumQFPRs) {
- SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
- PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
- }
- ArgOffset += (IsF32 ? 16 : 32);
- for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
- if (GPR_idx == NumGPRs)
- break;
- SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
- DAG.getConstant(i, dl, PtrVT));
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- break;
- }
-
- // Non-varargs QPX params go into registers or on the stack.
- if (QFPR_idx != NumQFPRs) {
- RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
- } else {
- if (IsFastCall)
- ComputePtrOff();
-
- assert(HasParameterArea &&
- "Parameter area must exist to pass an argument in memory.");
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, CFlags.IsTailCall, true, MemOpChains,
- TailCallArguments, dl);
- if (IsFastCall)
- ArgOffset += (IsF32 ? 16 : 32);
- }
-
- if (!IsFastCall)
- ArgOffset += (IsF32 ? 16 : 32);
- break;
- }
}
}
@@ -6664,384 +6312,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
-SDValue PPCTargetLowering::LowerCall_Darwin(
- SDValue Chain, SDValue Callee, CallFlags CFlags,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- const CallBase *CB) const {
- unsigned NumOps = Outs.size();
-
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- bool isPPC64 = PtrVT == MVT::i64;
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
- MachineFunction &MF = DAG.getMachineFunction();
-
- // Mark this function as potentially containing a function that contains a
- // tail call. As a consequence the frame pointer will be used for dynamicalloc
- // and restoring the callers stack pointer in this functions epilog. This is
- // done because by tail calling the called function might overwrite the value
- // in this function's (MF) stack pointer stack slot 0(SP).
- if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CFlags.CallConv == CallingConv::Fast)
- MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
-
- // Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with 24/48 bytes, which is
- // prereserved space for [SP][CR][LR][3 x unused].
- unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
- unsigned NumBytes = LinkageSize;
-
- // Add up all the space actually used.
- // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
- // they all go in registers, but we must reserve stack space for them for
- // possible use by the caller. In varargs or 64-bit calls, parameters are
- // assigned stack space in order, with padding so Altivec parameters are
- // 16-byte aligned.
- unsigned nAltivecParamsAtEnd = 0;
- for (unsigned i = 0; i != NumOps; ++i) {
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
- EVT ArgVT = Outs[i].VT;
- // Varargs Altivec parameters are padded to a 16 byte boundary.
- if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
- ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
- ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
- if (!CFlags.IsVarArg && !isPPC64) {
- // Non-varargs Altivec parameters go after all the non-Altivec
- // parameters; handle those later so we know how much padding we need.
- nAltivecParamsAtEnd++;
- continue;
- }
- // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
- NumBytes = ((NumBytes+15)/16)*16;
- }
- NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
- }
-
- // Allow for Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- NumBytes = ((NumBytes+15)/16)*16;
- NumBytes += 16*nAltivecParamsAtEnd;
- }
-
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
-
- // Tail call needs the stack to be aligned.
- if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CFlags.CallConv == CallingConv::Fast)
- NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
-
- // Calculate by how many bytes the stack has to be adjusted in case of tail
- // call optimization.
- int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
-
- // To protect arguments on the stack from being clobbered in a tail call,
- // force all the loads to happen before doing any other lowering.
- if (CFlags.IsTailCall)
- Chain = DAG.getStackArgumentTokenFactor(Chain);
-
- // Adjust the stack pointer for the new arguments...
- // These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
- SDValue CallSeqStart = Chain;
-
- // Load the return address and frame pointer so it can be move somewhere else
- // later.
- SDValue LROp, FPOp;
- Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
-
- // Set up a copy of the stack pointer for use loading and storing any
- // arguments that may not fit in the registers available for argument
- // passing.
- SDValue StackPtr;
- if (isPPC64)
- StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
- else
- StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
-
- // Figure out which arguments are going to go in registers, and which in
- // memory. Also, if this is a vararg function, floating point operations
- // must be stored to our stack, and loaded into integer regs as well, if
- // any integer regs are available for argument passing.
- unsigned ArgOffset = LinkageSize;
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
-
- static const MCPhysReg GPR_32[] = { // 32-bit registers.
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10,
- };
- static const MCPhysReg GPR_64[] = { // 64-bit registers.
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10,
- };
- static const MCPhysReg VR[] = {
- PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
- PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
- };
- const unsigned NumGPRs = array_lengthof(GPR_32);
- const unsigned NumFPRs = 13;
- const unsigned NumVRs = array_lengthof(VR);
-
- const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
-
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
- SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
-
- SmallVector<SDValue, 8> MemOpChains;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
-
- // PtrOff will be used to store the current argument to the stack if a
- // register cannot be found for it.
- SDValue PtrOff;
-
- PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
-
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-
- // On PPC64, promote integers to 64-bit values.
- if (isPPC64 && Arg.getValueType() == MVT::i32) {
- // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
- unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
- }
-
- // FIXME memcpy is used way more than necessary. Correctness first.
- // Note: "by value" is code for passing a structure by value, not
- // basic types.
- if (Flags.isByVal()) {
- unsigned Size = Flags.getByValSize();
- // Very small objects are passed right-justified. Everything else is
- // passed left-justified.
- if (Size==1 || Size==2) {
- EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
- if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
- MachinePointerInfo(), VT);
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
-
- ArgOffset += PtrByteSize;
- } else {
- SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
- PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
- Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
- CallSeqStart,
- Flags, DAG, dl);
- ArgOffset += PtrByteSize;
- }
- continue;
- }
- // Copy entire object into memory. There are cases where gcc-generated
- // code assumes it is there, even if it could be put entirely into
- // registers. (This is not what the doc says.)
- Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
- CallSeqStart,
- Flags, DAG, dl);
-
- // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
- // copy the pieces of the object that fit into registers from the
- // parameter save area.
- for (unsigned j=0; j<Size; j+=PtrByteSize) {
- SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
- SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
- if (GPR_idx != NumGPRs) {
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- ArgOffset += PtrByteSize;
- } else {
- ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
- break;
- }
- }
- continue;
- }
-
- switch (Arg.getSimpleValueType().SimpleTy) {
- default: llvm_unreachable("Unexpected ValueType for argument!");
- case MVT::i1:
- case MVT::i32:
- case MVT::i64:
- if (GPR_idx != NumGPRs) {
- if (Arg.getValueType() == MVT::i1)
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
-
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
- } else {
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, CFlags.IsTailCall, false, MemOpChains,
- TailCallArguments, dl);
- }
- ArgOffset += PtrByteSize;
- break;
- case MVT::f32:
- case MVT::f64:
- if (FPR_idx != NumFPRs) {
- RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
-
- if (CFlags.IsVarArg) {
- SDValue Store =
- DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Store);
-
- // Float varargs are always shadowed in available integer registers
- if (GPR_idx != NumGPRs) {
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
- SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- } else {
- // If we have any FPRs remaining, we may also have GPRs remaining.
- // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
- // GPRs.
- if (GPR_idx != NumGPRs)
- ++GPR_idx;
- if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
- !isPPC64) // PPC64 has 64-bit GPR's obviously :)
- ++GPR_idx;
- }
- } else
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, CFlags.IsTailCall, false, MemOpChains,
- TailCallArguments, dl);
- if (isPPC64)
- ArgOffset += 8;
- else
- ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- if (CFlags.IsVarArg) {
- // These go aligned on the stack, or in the corresponding R registers
- // when within range. The Darwin PPC ABI doc claims they also go in
- // V registers; in fact gcc does this only for arguments that are
- // prototyped, not for those that match the ... We do it for all
- // arguments, seems to work.
- while (ArgOffset % 16 !=0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != NumGPRs)
- GPR_idx++;
- }
- // We could elide this store in the case where the object fits
- // entirely in R registers. Maybe later.
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, dl, PtrVT));
- SDValue Store =
- DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Store);
- if (VR_idx != NumVRs) {
- SDValue Load =
- DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
- }
- ArgOffset += 16;
- for (unsigned i=0; i<16; i+=PtrByteSize) {
- if (GPR_idx == NumGPRs)
- break;
- SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
- DAG.getConstant(i, dl, PtrVT));
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- break;
- }
-
- // Non-varargs Altivec params generally go in registers, but have
- // stack space allocated at the end.
- if (VR_idx != NumVRs) {
- // Doesn't have GPR space allocated.
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
- } else if (nAltivecParamsAtEnd==0) {
- // We are emitting Altivec params in order.
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, CFlags.IsTailCall, true, MemOpChains,
- TailCallArguments, dl);
- ArgOffset += 16;
- }
- break;
- }
- }
- // If all Altivec parameters fit in registers, as they usually do,
- // they get stack space following the non-Altivec parameters. We
- // don't track this here because nobody below needs it.
- // If there are more Altivec parameters than fit in registers emit
- // the stores here.
- if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
- unsigned j = 0;
- // Offset is aligned; skip 1st 12 params which go in V registers.
- ArgOffset = ((ArgOffset+15)/16)*16;
- ArgOffset += 12*16;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = OutVals[i];
- EVT ArgType = Outs[i].VT;
- if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
- ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
- if (++j > NumVRs) {
- SDValue PtrOff;
- // We are emitting Altivec params in order.
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, CFlags.IsTailCall, true, MemOpChains,
- TailCallArguments, dl);
- ArgOffset += 16;
- }
- }
- }
- }
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
-
- // On Darwin, R12 must contain the address of an indirect callee. This does
- // not mean the MTCTR instruction must use R12; it's easier to model this as
- // an extra parameter, so do that.
- if (CFlags.IsIndirect) {
- assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
- RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
- PPC::R12), Callee));
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- if (CFlags.IsTailCall)
- PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
- TailCallArguments);
-
- return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
- Callee, SPDiff, NumBytes, Ins, InVals, CB);
-}
-
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State) {
@@ -7052,9 +6322,10 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
- assert((!ValVT.isInteger() ||
- (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&
- "Integer argument exceeds register size: should have been legalized");
+ if (ValVT.isVector() && !State.getMachineFunction()
+ .getTarget()
+ .Options.EnableAIXExtendedAltivecABI)
+ report_fatal_error("the default Altivec AIX ABI is not yet supported");
if (ValVT == MVT::f128)
report_fatal_error("f128 is unimplemented on AIX.");
@@ -7062,9 +6333,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
if (ArgFlags.isNest())
report_fatal_error("Nest arguments are unimplemented.");
- if (ValVT.isVector() || LocVT.isVector())
- report_fatal_error("Vector arguments are unimplemented on AIX.");
-
static const MCPhysReg GPR_32[] = {// 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10};
@@ -7072,6 +6340,11 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+ static const MCPhysReg VR[] = {// Vector registers.
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5,
+ PPC::V6, PPC::V7, PPC::V8, PPC::V9,
+ PPC::V10, PPC::V11, PPC::V12, PPC::V13};
+
if (ArgFlags.isByVal()) {
if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
report_fatal_error("Pass-by-value arguments with alignment greater than "
@@ -7116,7 +6389,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
case MVT::i32: {
const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
// AIX integer arguments are always passed in register width.
- if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
+ if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
: CCValAssign::LocInfo::ZExt;
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
@@ -7167,6 +6440,25 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
}
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v1i128: {
+ if (State.isVarArg())
+ report_fatal_error(
+ "variadic arguments for vector types are unimplemented for AIX");
+
+ if (unsigned VReg = State.AllocateReg(VR))
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
+ else {
+ report_fatal_error(
+ "passing vector parameters to the stack is unimplemented for AIX");
+ }
+ return false;
+ }
}
return true;
}
@@ -7187,6 +6479,14 @@ static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
return &PPC::F4RCRegClass;
case MVT::f64:
return &PPC::F8RCRegClass;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v1i128:
+ return &PPC::VRRCRegClass;
}
}
@@ -7194,7 +6494,7 @@ static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
SelectionDAG &DAG, SDValue ArgValue,
MVT LocVT, const SDLoc &dl) {
assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
- assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
+ assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
if (Flags.isSExt())
ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
@@ -7281,8 +6581,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
const PPCSubtarget &Subtarget =
static_cast<const PPCSubtarget &>(DAG.getSubtarget());
- if (Subtarget.hasQPX())
- report_fatal_error("QPX support is not supported on AIX.");
const bool IsPPC64 = Subtarget.isPPC64();
const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
@@ -7291,6 +6589,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
SmallVector<CCValAssign, 16> ArgLocs;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
const EVT PtrVT = getPointerTy(MF.getDataLayout());
@@ -7305,6 +6604,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
CCValAssign &VA = ArgLocs[I++];
MVT LocVT = VA.getLocVT();
ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
+ if (VA.isMemLoc() && VA.getValVT().isVector())
+ report_fatal_error(
+ "passing vector parameters to the stack is unimplemented for AIX");
// For compatibility with the AIX XL compiler, the float args in the
// parameter save area are initialized even if the argument is available
@@ -7315,6 +6617,15 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
if (VA.isMemLoc() && VA.needsCustom())
continue;
+ if (VA.isRegLoc()) {
+ if (VA.getValVT().isScalarInteger())
+ FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
+ else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector())
+ FuncInfo->appendParameterType(VA.getValVT().SimpleTy == MVT::f32
+ ? PPCFunctionInfo::ShortFloatPoint
+ : PPCFunctionInfo::LongFloatPoint);
+ }
+
if (Flags.isByVal() && VA.isMemLoc()) {
const unsigned Size =
alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
@@ -7360,10 +6671,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// to extracting the value from the register directly, and elide the
// stores when the arguments address is not taken, but that will need to
// be future work.
- SDValue Store =
- DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom,
- DAG.getObjectPtrOffset(dl, FIN, Offset),
- MachinePointerInfo::getFixedStack(MF, FI, Offset));
+ SDValue Store = DAG.getStore(
+ CopyFrom.getValue(1), dl, CopyFrom,
+ DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
+ MachinePointerInfo::getFixedStack(MF, FI, Offset));
MemOps.push_back(Store);
};
@@ -7378,6 +6689,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
const CCValAssign RL = ArgLocs[I++];
HandleRegLoc(RL.getLocReg(), Offset);
+ FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
}
if (Offset != StackSize) {
@@ -7399,7 +6711,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
if (ValVT.isScalarInteger() &&
- (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
+ (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
ArgValue =
truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
}
@@ -7440,7 +6752,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// aligned stack.
CallerReservedArea =
EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setMinReservedArea(CallerReservedArea);
if (isVarArg) {
@@ -7502,10 +6813,6 @@ SDValue PPCTargetLowering::LowerCall_AIX(
const PPCSubtarget& Subtarget =
static_cast<const PPCSubtarget&>(DAG.getSubtarget());
- if (Subtarget.hasQPX())
- report_fatal_error("QPX is not supported on AIX.");
- if (Subtarget.hasAltivec())
- report_fatal_error("Altivec support is unimplemented on AIX.");
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<CCValAssign, 16> ArgLocs;
@@ -7562,11 +6869,12 @@ SDValue PPCTargetLowering::LowerCall_AIX(
}
auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
- return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
- (LoadOffset != 0)
- ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
- : Arg,
- MachinePointerInfo(), VT);
+ return DAG.getExtLoad(
+ ISD::ZEXTLOAD, dl, PtrVT, Chain,
+ (LoadOffset != 0)
+ ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
+ : Arg,
+ MachinePointerInfo(), VT);
};
unsigned LoadOffset = 0;
@@ -7596,9 +6904,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// Only memcpy the bytes that don't pass in register.
MemcpyFlags.setByValSize(ByValSize - LoadOffset);
Chain = CallSeqStart = createMemcpyOutsideCallSeq(
- (LoadOffset != 0) ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
- : Arg,
- DAG.getObjectPtrOffset(dl, StackPtr, ByValVA.getLocMemOffset()),
+ (LoadOffset != 0)
+ ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
+ : Arg,
+ DAG.getObjectPtrOffset(dl, StackPtr,
+ TypeSize::Fixed(ByValVA.getLocMemOffset())),
CallSeqStart, MemcpyFlags, DAG, dl);
continue;
}
@@ -7648,6 +6958,10 @@ SDValue PPCTargetLowering::LowerCall_AIX(
const MVT LocVT = VA.getLocVT();
const MVT ValVT = VA.getValVT();
+ if (VA.isMemLoc() && VA.getValVT().isVector())
+ report_fatal_error(
+ "passing vector parameters to the stack is unimplemented for AIX");
+
switch (VA.getLocInfo()) {
default:
report_fatal_error("Unexpected argument extension type.");
@@ -7689,7 +7003,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// f32 in 32-bit GPR
// f64 in 64-bit GPR
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
- else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
+ else if (Arg.getValueType().getFixedSizeInBits() <
+ LocVT.getFixedSizeInBits())
// f32 in 64-bit GPR.
RegsToPass.push_back(std::make_pair(
VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
@@ -8048,20 +7363,45 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
// <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
- assert(Op.getValueType().isVector() && "Vector type expected.");
-
- SDLoc DL(Op);
- SDValue N1 = Op.getOperand(0);
- unsigned SrcSize = N1.getValueType().getSizeInBits();
- assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
- SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
-
EVT TrgVT = Op.getValueType();
+ assert(TrgVT.isVector() && "Vector type expected.");
unsigned TrgNumElts = TrgVT.getVectorNumElements();
EVT EltVT = TrgVT.getVectorElementType();
+ if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
+ TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
+ !isPowerOf2_32(EltVT.getSizeInBits()))
+ return SDValue();
+
+ SDValue N1 = Op.getOperand(0);
+ EVT SrcVT = N1.getValueType();
+ unsigned SrcSize = SrcVT.getSizeInBits();
+ if (SrcSize > 256 ||
+ !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
+ !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
+ return SDValue();
+ if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
+ return SDValue();
+
unsigned WideNumElts = 128 / EltVT.getSizeInBits();
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+ SDLoc DL(Op);
+ SDValue Op1, Op2;
+ if (SrcSize == 256) {
+ EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
+ EVT SplitVT =
+ N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned SplitNumElts = SplitVT.getVectorNumElements();
+ Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
+ DAG.getConstant(0, DL, VecIdxTy));
+ Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
+ DAG.getConstant(SplitNumElts, DL, VecIdxTy));
+ }
+ else {
+ Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
+ Op2 = DAG.getUNDEF(WideVT);
+ }
+
// First list the elements we want to keep.
unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
SmallVector<int, 16> ShuffV;
@@ -8077,16 +7417,17 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
// ShuffV.push_back(i + WideNumElts);
ShuffV.push_back(WideNumElts + 1);
- SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
- return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
+ Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
+ return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
}
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- // Not FP? Not a fsel.
+ // Not FP, or using SPE? Not a fsel.
if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
- !Op.getOperand(2).getValueType().isFloatingPoint())
+ !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
return Op;
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
@@ -8202,54 +7543,105 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
-void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
- SelectionDAG &DAG,
- const SDLoc &dl) const {
- assert(Op.getOperand(0).getValueType().isFloatingPoint());
- SDValue Src = Op.getOperand(0);
- if (Src.getValueType() == MVT::f32)
- Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
-
- SDValue Tmp;
+static unsigned getPPCStrictOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("No strict version of this opcode!");
+ case PPCISD::FCTIDZ:
+ return PPCISD::STRICT_FCTIDZ;
+ case PPCISD::FCTIWZ:
+ return PPCISD::STRICT_FCTIWZ;
+ case PPCISD::FCTIDUZ:
+ return PPCISD::STRICT_FCTIDUZ;
+ case PPCISD::FCTIWUZ:
+ return PPCISD::STRICT_FCTIWUZ;
+ case PPCISD::FCFID:
+ return PPCISD::STRICT_FCFID;
+ case PPCISD::FCFIDU:
+ return PPCISD::STRICT_FCFIDU;
+ case PPCISD::FCFIDS:
+ return PPCISD::STRICT_FCFIDS;
+ case PPCISD::FCFIDUS:
+ return PPCISD::STRICT_FCFIDUS;
+ }
+}
+
+static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ SDLoc dl(Op);
+ bool IsStrict = Op->isStrictFPOpcode();
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
+ Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
+
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
+ // For strict nodes, source is the second operand.
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ assert(Src.getValueType().isFloatingPoint());
+ if (Src.getValueType() == MVT::f32) {
+ if (IsStrict) {
+ Src =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
+ DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
+ Chain = Src.getValue(1);
+ } else
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+ }
+ SDValue Conv;
+ unsigned Opc = ISD::DELETED_NODE;
switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
- Tmp = DAG.getNode(
- Op.getOpcode() == ISD::FP_TO_SINT
- ? PPCISD::FCTIWZ
- : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
- dl, MVT::f64, Src);
+ Opc = IsSigned ? PPCISD::FCTIWZ
+ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
break;
case MVT::i64:
- assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+ assert((IsSigned || Subtarget.hasFPCVT()) &&
"i64 FP_TO_UINT is supported only with FPCVT");
- Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
- PPCISD::FCTIDUZ,
- dl, MVT::f64, Src);
- break;
+ Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
+ }
+ if (IsStrict) {
+ Opc = getPPCStrictOpcode(Opc);
+ Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
+ {Chain, Src}, Flags);
+ } else {
+ Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
}
+ return Conv;
+}
+
+void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ const SDLoc &dl) const {
+ SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
+ Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
+ bool IsStrict = Op->isStrictFPOpcode();
// Convert the FP value to an int value through memory.
bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
- (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
+ (IsSigned || Subtarget.hasFPCVT());
SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Emit a store to the stack slot.
- SDValue Chain;
+ SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
if (i32Stack) {
MachineFunction &MF = DAG.getMachineFunction();
Alignment = Align(4);
MachineMemOperand *MMO =
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
- SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+ SDValue Ops[] = { Chain, Tmp, FIPtr };
Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
} else
- Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI, Alignment);
+ Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias on big endian.
@@ -8271,76 +7663,100 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
SelectionDAG &DAG,
const SDLoc &dl) const {
- assert(Op.getOperand(0).getValueType().isFloatingPoint());
- SDValue Src = Op.getOperand(0);
-
- if (Src.getValueType() == MVT::f32)
- Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
-
- SDValue Tmp;
- switch (Op.getSimpleValueType().SimpleTy) {
- default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
- case MVT::i32:
- Tmp = DAG.getNode(
- Op.getOpcode() == ISD::FP_TO_SINT
- ? PPCISD::FCTIWZ
- : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
- dl, MVT::f64, Src);
- Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
- break;
- case MVT::i64:
- assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
- "i64 FP_TO_UINT is supported only with FPCVT");
- Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
- PPCISD::FCTIDUZ,
- dl, MVT::f64, Src);
- Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
- break;
- }
- return Tmp;
+ SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
+ SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
+ if (Op->isStrictFPOpcode())
+ return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
+ else
+ return Mov;
}
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const {
+ bool IsStrict = Op->isStrictFPOpcode();
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
+ Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
// FP to INT conversions are legal for f128.
- if (Op->getOperand(0).getValueType() == MVT::f128)
- return Op;
+ if (SrcVT == MVT::f128)
+ return Subtarget.hasP9Vector() ? Op : SDValue();
// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
// PPC (the libcall is not available).
- if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
- if (Op.getValueType() == MVT::i32) {
- if (Op.getOpcode() == ISD::FP_TO_SINT) {
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- MVT::f64, Op.getOperand(0),
+ if (SrcVT == MVT::ppcf128) {
+ if (DstVT == MVT::i32) {
+ // TODO: Conservatively pass only nofpexcept flag here. Need to check and
+ // set other fast-math flags to FP operations in both strict and
+ // non-strict cases. (FP_TO_SINT, FSUB)
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
+ if (IsSigned) {
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
DAG.getIntPtrConstant(0, dl));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- MVT::f64, Op.getOperand(0),
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
DAG.getIntPtrConstant(1, dl));
- // Add the two halves of the long double in round-to-zero mode.
- SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
-
- // Now use a smaller FP_TO_SINT.
- return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
- }
- if (Op.getOpcode() == ISD::FP_TO_UINT) {
+ // Add the two halves of the long double in round-to-zero mode, and use
+ // a smaller FP_TO_SINT.
+ if (IsStrict) {
+ SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ {Op.getOperand(0), Lo, Hi}, Flags);
+ return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ {Res.getValue(1), Res}, Flags);
+ } else {
+ SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+ } else {
const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
- SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
- // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
- // FIXME: generated code sucks.
- // TODO: Are there fast-math-flags to propagate to this FSUB?
- SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
- Op.getOperand(0), Tmp);
- True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
- True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
- DAG.getConstant(0x80000000, dl, MVT::i32));
- SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
- Op.getOperand(0));
- return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
- ISD::SETGE);
+ SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
+ SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
+ if (IsStrict) {
+ // Sel = Src < 0x80000000
+ // FltOfs = select Sel, 0.0, 0x80000000
+ // IntOfs = select Sel, 0, 0x80000000
+ // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
+ SDValue Chain = Op.getOperand(0);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ EVT DstSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
+ SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
+ Chain, true);
+ Chain = Sel.getValue(1);
+
+ SDValue FltOfs = DAG.getSelect(
+ dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+
+ SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
+ DAG.getVTList(SrcVT, MVT::Other),
+ {Chain, Src, FltOfs}, Flags);
+ Chain = Val.getValue(1);
+ SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
+ DAG.getVTList(DstVT, MVT::Other),
+ {Chain, Val}, Flags);
+ Chain = SInt.getValue(1);
+ SDValue IntOfs = DAG.getSelect(
+ dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
+ SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
+ return DAG.getMergeValues({Result, Chain}, dl);
+ } else {
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
+ True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
+ SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
+ return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
+ }
}
}
@@ -8369,6 +7785,10 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
ReuseLoadInfo &RLI,
SelectionDAG &DAG,
ISD::LoadExtType ET) const {
+ // Conservatively skip reusing for constrained FP nodes.
+ if (Op->isStrictFPOpcode())
+ return false;
+
SDLoc dl(Op);
bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
(Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
@@ -8388,6 +7808,13 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
if (LD->getMemoryVT() != MemVT)
return false;
+ // If the result of the load is an illegal type, then we can't build a
+ // valid chain for reuse since the legalised loads and token factor node that
+ // ties the legalised loads together uses a different output chain then the
+ // illegal load.
+ if (!isTypeLegal(LD->getValueType(0)))
+ return false;
+
RLI.Ptr = LD->getBasePtr();
if (LD->isIndexed() && !LD->getOffset().isUndef()) {
assert(LD->getAddressingMode() == ISD::PRE_INC &&
@@ -8452,13 +7879,41 @@ bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
continue;
if (UI->getOpcode() != ISD::SINT_TO_FP &&
- UI->getOpcode() != ISD::UINT_TO_FP)
+ UI->getOpcode() != ISD::UINT_TO_FP &&
+ UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
+ UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
return true;
}
return false;
}
+static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget,
+ SDValue Chain = SDValue()) {
+ bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
+ SDLoc dl(Op);
+
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
+ unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
+ : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
+ EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
+ if (Op->isStrictFPOpcode()) {
+ if (!Chain)
+ Chain = Op.getOperand(0);
+ return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
+ DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
+ } else
+ return DAG.getNode(ConvOpc, dl, ConvTy, Src);
+}
+
/// Custom lowers integer to floating point conversions to use
/// the direct move instructions available in ISA 2.07 to avoid the
/// need for load/store combinations.
@@ -8470,25 +7925,13 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
"Invalid floating point type as target of conversion");
assert(Subtarget.hasFPCVT() &&
"Int to FP conversions with direct moves require FPCVT");
- SDValue FP;
- SDValue Src = Op.getOperand(0);
- bool SinglePrec = Op.getValueType() == MVT::f32;
+ SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
- bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
- unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
- (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
-
- if (WordInt) {
- FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
- dl, MVT::f64, Src);
- FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
- }
- else {
- FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
- FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
- }
-
- return FP;
+ bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
+ unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
+ SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
+ return convertIntToFP(Op, Mov, DAG, Subtarget);
}
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
@@ -8513,17 +7956,23 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const {
-
+ bool IsStrict = Op->isStrictFPOpcode();
unsigned Opc = Op.getOpcode();
- assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
+ Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type");
assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
"Supports conversions to v2f64/v4f32 only.");
- bool SignedConv = Opc == ISD::SINT_TO_FP;
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
+ bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
bool FourEltRes = Op.getValueType() == MVT::v4f32;
- SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
+ SDValue Wide = widenVec(DAG, Src, dl);
EVT WideVT = Wide.getValueType();
unsigned WideNumElts = WideVT.getVectorNumElements();
MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
@@ -8548,7 +7997,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
SDValue Extend;
if (SignedConv) {
Arrange = DAG.getBitcast(IntermediateVT, Arrange);
- EVT ExtVT = Op.getOperand(0).getValueType();
+ EVT ExtVT = Src.getValueType();
if (Subtarget.hasP9Altivec())
ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
IntermediateVT.getVectorNumElements());
@@ -8558,14 +8007,27 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
} else
Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
+ if (IsStrict)
+ return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
+ {Op.getOperand(0), Extend}, Flags);
+
return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
}
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
+
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
- EVT InVT = Op.getOperand(0).getValueType();
+ EVT InVT = Src.getValueType();
EVT OutVT = Op.getValueType();
if (OutVT.isVector() && OutVT.isFloatingPoint() &&
isOperationCustom(Op.getOpcode(), InVT))
@@ -8573,37 +8035,21 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// Conversions to f128 are legal.
if (Op.getValueType() == MVT::f128)
- return Op;
-
- if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
- if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
- return SDValue();
-
- SDValue Value = Op.getOperand(0);
- // The values are now known to be -1 (false) or 1 (true). To convert this
- // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
- // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
- Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
-
- SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
-
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
-
- if (Op.getValueType() != MVT::v4f64)
- Value = DAG.getNode(ISD::FP_ROUND, dl,
- Op.getValueType(), Value,
- DAG.getIntPtrConstant(1, dl));
- return Value;
- }
+ return Subtarget.hasP9Vector() ? Op : SDValue();
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
- if (Op.getOperand(0).getValueType() == MVT::i1)
- return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
- DAG.getConstantFP(1.0, dl, Op.getValueType()),
- DAG.getConstantFP(0.0, dl, Op.getValueType()));
+ if (Src.getValueType() == MVT::i1) {
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
+ DAG.getConstantFP(1.0, dl, Op.getValueType()),
+ DAG.getConstantFP(0.0, dl, Op.getValueType()));
+ if (IsStrict)
+ return DAG.getMergeValues({Sel, Chain}, dl);
+ else
+ return Sel;
+ }
// If we have direct moves, we can do all the conversion, skip the store/load
// however, without FPCVT we can't do most conversions.
@@ -8611,22 +8057,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
Subtarget.isPPC64() && Subtarget.hasFPCVT())
return LowerINT_TO_FPDirectMove(Op, DAG, dl);
- assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
+ assert((IsSigned || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
- // If we have FCFIDS, then use it when converting to single-precision.
- // Otherwise, convert to double-precision and then round.
- unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
- ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
- : PPCISD::FCFIDS)
- : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
- : PPCISD::FCFID);
- MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
- ? MVT::f32
- : MVT::f64;
-
- if (Op.getOperand(0).getValueType() == MVT::i64) {
- SDValue SINT = Op.getOperand(0);
+ if (Src.getValueType() == MVT::i64) {
+ SDValue SINT = Src;
// When converting to single-precision, we actually need to convert
// to double-precision first and then round to single-precision.
// To avoid double-rounding effects during that operation, we have
@@ -8714,16 +8149,16 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FrameIdx));
+ SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FrameIdx));
+ Chain = Store;
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
RLI.Ptr = FIdx;
- RLI.Chain = Store;
+ RLI.Chain = Chain;
RLI.MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
RLI.Alignment = Align(4);
@@ -8736,18 +8171,27 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
PPCISD::LFIWZX : PPCISD::LFIWAX,
dl, DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
+ Chain = Bits.getValue(1);
} else
Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
- SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+ SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
+ if (IsStrict)
+ Chain = FP.getValue(1);
- if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
- FP = DAG.getNode(ISD::FP_ROUND, dl,
- MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
+ if (IsStrict)
+ FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
+ DAG.getVTList(MVT::f32, MVT::Other),
+ {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
+ else
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
+ DAG.getIntPtrConstant(0, dl));
+ }
return FP;
}
- assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ assert(Src.getValueType() == MVT::i32 &&
"Unhandled INT_TO_FP type in custom expander!");
// Since we only generate this in 64-bit mode, we can take advantage of
// 64-bit registers. In particular, sign extend the input value into the
@@ -8761,21 +8205,20 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
ReuseLoadInfo RLI;
bool ReusingLoad;
- if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
- DAG))) {
+ if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FrameIdx));
+ SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FrameIdx));
+ Chain = Store;
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
RLI.Ptr = FIdx;
- RLI.Chain = Store;
+ RLI.Chain = Chain;
RLI.MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
RLI.Alignment = Align(4);
@@ -8785,10 +8228,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
RLI.Alignment, RLI.AAInfo, RLI.Ranges);
SDValue Ops[] = { RLI.Chain, RLI.Ptr };
- Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
- PPCISD::LFIWZX : PPCISD::LFIWAX,
- dl, DAG.getVTList(MVT::f64, MVT::Other),
- Ops, MVT::i32, MMO);
+ Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other), Ops,
+ MVT::i32, MMO);
+ Chain = Ld.getValue(1);
if (ReusingLoad)
spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
} else {
@@ -8798,25 +8241,34 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
- Op.getOperand(0));
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
// STD the extended value into the stack slot.
SDValue Store = DAG.getStore(
- DAG.getEntryNode(), dl, Ext64, FIdx,
+ Chain, dl, Ext64, FIdx,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
+ Chain = Store;
// Load the value as a double.
Ld = DAG.getLoad(
- MVT::f64, dl, Store, FIdx,
+ MVT::f64, dl, Chain, FIdx,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
+ Chain = Ld.getValue(1);
}
// FCFID it and return it.
- SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
- if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
- FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
- DAG.getIntPtrConstant(0, dl));
+ SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
+ if (IsStrict)
+ Chain = FP.getValue(1);
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
+ if (IsStrict)
+ FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
+ DAG.getVTList(MVT::f32, MVT::Other),
+ {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
+ else
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
+ DAG.getIntPtrConstant(0, dl));
+ }
return FP;
}
@@ -8851,16 +8303,24 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
Chain = MFFS.getValue(1);
- // Save FP register to stack slot
- int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
+ SDValue CWD;
+ if (isTypeLegal(MVT::i64)) {
+ CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
+ } else {
+ // Save FP register to stack slot
+ int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
- // Load FP Control Word from low 32 bits of stack slot.
- SDValue Four = DAG.getConstant(4, dl, PtrVT);
- SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
- SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
- Chain = CWD.getValue(1);
+ // Load FP Control Word from low 32 bits of stack slot.
+ assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
+ "Stack slot adjustment is valid only on big endian subtargets!");
+ SDValue Four = DAG.getConstant(4, dl, PtrVT);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+ CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
+ Chain = CWD.getValue(1);
+ }
// Transform as necessary
SDValue CWD1 =
@@ -8971,6 +8431,31 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(OutOps, dl);
}
+SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+
+ bool IsFSHL = Op.getOpcode() == ISD::FSHL;
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+ SDValue Z = Op.getOperand(2);
+ EVT AmtVT = Z.getValueType();
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ // This is simpler than TargetLowering::expandFunnelShift because we can rely
+ // on PowerPC shift by BW being well defined.
+ Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
+ DAG.getConstant(BitWidth - 1, dl, AmtVT));
+ SDValue SubZ =
+ DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
+ X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
+ Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
+ return DAG.getNode(ISD::OR, dl, VT, X, Y);
+}
+
//===----------------------------------------------------------------------===//
// Vector related lowering.
//
@@ -8986,7 +8471,7 @@ static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
// For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
- if (Val == ((1LU << (SplatSize * 8)) - 1)) {
+ if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
SplatSize = 1;
Val = 0xFF;
}
@@ -9164,110 +8649,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
- if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
- // We first build an i32 vector, load it into a QPX register,
- // then convert it to a floating-point vector and compare it
- // to a zero vector to get the boolean result.
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
- MachinePointerInfo PtrInfo =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
-
- assert(BVN->getNumOperands() == 4 &&
- "BUILD_VECTOR for v4i1 does not have 4 operands");
-
- bool IsConst = true;
- for (unsigned i = 0; i < 4; ++i) {
- if (BVN->getOperand(i).isUndef()) continue;
- if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
- IsConst = false;
- break;
- }
- }
-
- if (IsConst) {
- Constant *One =
- ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
- Constant *NegOne =
- ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
-
- Constant *CV[4];
- for (unsigned i = 0; i < 4; ++i) {
- if (BVN->getOperand(i).isUndef())
- CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
- else if (isNullConstant(BVN->getOperand(i)))
- CV[i] = NegOne;
- else
- CV[i] = One;
- }
-
- Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx =
- DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), Align(16));
-
- SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
- SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
- return DAG.getMemIntrinsicNode(
- PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
- }
-
- SmallVector<SDValue, 4> Stores;
- for (unsigned i = 0; i < 4; ++i) {
- if (BVN->getOperand(i).isUndef()) continue;
-
- unsigned Offset = 4*i;
- SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
-
- unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
- if (StoreSize > 4) {
- Stores.push_back(
- DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
- PtrInfo.getWithOffset(Offset), MVT::i32));
- } else {
- SDValue StoreValue = BVN->getOperand(i);
- if (StoreSize < 4)
- StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
-
- Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
- PtrInfo.getWithOffset(Offset)));
- }
- }
-
- SDValue StoreChain;
- if (!Stores.empty())
- StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
- else
- StoreChain = DAG.getEntryNode();
-
- // Now load from v4i32 into the QPX register; this will extend it to
- // v4i64 but not yet convert it to a floating point. Nevertheless, this
- // is typed as v4f64 because the QPX register integer states are not
- // explicitly represented.
-
- SDValue Ops[] = {StoreChain,
- DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
- FIdx};
- SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
-
- SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
- dl, VTs, Ops, MVT::v4i32, PtrInfo);
- LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
- DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
- LoadedVect);
-
- SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
-
- return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
- }
-
- // All other QPX vectors are handled by generic code.
- if (Subtarget.hasQPX())
- return SDValue();
-
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
@@ -9279,13 +8660,44 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// If it is a splat of a double, check if we can shrink it to a 32 bit
// non-denormal float which when converted back to double gives us the same
// double. This is to exploit the XXSPLTIDP instruction.
- if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
- (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
- convertToNonDenormSingle(APSplatBits)) {
- SDValue SplatNode = DAG.getNode(
- PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
- DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
- return DAG.getBitcast(Op.getValueType(), SplatNode);
+ // If we lose precision, we use XXSPLTI32DX.
+ if (BVNIsConstantSplat && (SplatBitSize == 64) &&
+ Subtarget.hasPrefixInstrs()) {
+ // Check the type first to short-circuit so we don't modify APSplatBits if
+ // this block isn't executed.
+ if ((Op->getValueType(0) == MVT::v2f64) &&
+ convertToNonDenormSingle(APSplatBits)) {
+ SDValue SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
+ DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
+ return DAG.getBitcast(Op.getValueType(), SplatNode);
+ } else {
+ // We may lose precision, so we have to use XXSPLTI32DX.
+
+ uint32_t Hi =
+ (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
+ uint32_t Lo =
+ (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
+ SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
+
+ if (!Hi || !Lo)
+ // If either load is 0, then we should generate XXLXOR to set to 0.
+ SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
+
+ if (Hi)
+ SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+ DAG.getTargetConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(Hi, dl, MVT::i32));
+
+ if (Lo)
+ SplatNode =
+ DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+ DAG.getTargetConstant(1, dl, MVT::i32),
+ DAG.getTargetConstant(Lo, dl, MVT::i32));
+
+ return DAG.getBitcast(Op.getValueType(), SplatNode);
+ }
}
if (!BVNIsConstantSplat || SplatBitSize > 32) {
@@ -9304,7 +8716,12 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// Checking for a single use of this load, we have to check for vector
// width (128 bits) / ElementSize uses (since each operand of the
// BUILD_VECTOR is a separate use of the value.
- if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
+ unsigned NumUsesOfInputLD = 128 / ElementSize;
+ for (SDValue BVInOp : Op->ops())
+ if (BVInOp.isUndef())
+ NumUsesOfInputLD--;
+ assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
+ if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
((Subtarget.hasVSX() && ElementSize == 64) ||
(Subtarget.hasP9Vector() && ElementSize == 32))) {
SDValue Ops[] = {
@@ -9312,17 +8729,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
LD->getBasePtr(), // Ptr
DAG.getValueType(Op.getValueType()) // VT
};
- return
- DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
- DAG.getVTList(Op.getValueType(), MVT::Other),
- Ops, LD->getMemoryVT(), LD->getMemOperand());
+ SDValue LdSplt = DAG.getMemIntrinsicNode(
+ PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+ // Replace all uses of the output chain of the original load with the
+ // output chain of the new load.
+ DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
+ LdSplt.getValue(1));
+ return LdSplt;
}
}
- // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
- // lowered to VSX instructions under certain conditions.
+ // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
+ // 32-bits can be lowered to VSX instructions under certain conditions.
// Without VSX, there is no pattern more efficient than expanding the node.
- if (Subtarget.hasVSX() &&
+ if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
Subtarget.hasP8Vector()))
return Op;
@@ -9351,7 +8772,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
// turned into a 4-byte splat of 0xABABABAB.
if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
- return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
+ return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
Op.getValueType(), DAG, dl);
if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
@@ -9447,17 +8868,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
- // vsplti + sra self.
- if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
- SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
- static const unsigned IIDs[] = { // Intrinsic to use for each size.
- Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
- Intrinsic::ppc_altivec_vsraw
- };
- Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
- }
-
// vsplti + rol self.
if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
@@ -9957,6 +9367,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue LdSplt =
DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
Ops, LD->getMemoryVT(), LD->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
if (LdSplt.getValueType() != SVOp->getValueType(0))
LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
return LdSplt;
@@ -10060,42 +9471,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
}
}
- if (Subtarget.hasQPX()) {
- if (VT.getVectorNumElements() != 4)
- return SDValue();
-
- if (V2.isUndef()) V2 = V1;
-
- int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
- if (AlignIdx != -1) {
- return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
- DAG.getConstant(AlignIdx, dl, MVT::i32));
- } else if (SVOp->isSplat()) {
- int SplatIdx = SVOp->getSplatIndex();
- if (SplatIdx >= 4) {
- std::swap(V1, V2);
- SplatIdx -= 4;
- }
-
- return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
- DAG.getConstant(SplatIdx, dl, MVT::i32));
- }
-
- // Lower this into a qvgpci/qvfperm pair.
-
- // Compute the qvgpci literal
- unsigned idx = 0;
- for (unsigned i = 0; i < 4; ++i) {
- int m = SVOp->getMaskElt(i);
- unsigned mm = m >= 0 ? (unsigned) m : i;
- idx |= mm << (3-i)*3;
- }
-
- SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
- DAG.getConstant(idx, dl, MVT::i32));
- return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
- }
-
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
@@ -10357,6 +9732,26 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
return false;
break;
+ case Intrinsic::ppc_altivec_vcmpequq:
+ case Intrinsic::ppc_altivec_vcmpgtsq:
+ case Intrinsic::ppc_altivec_vcmpgtuq:
+ if (!Subtarget.isISA3_1())
+ return false;
+ switch (IntrinsicID) {
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpequq:
+ CompareOpc = 455;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsq:
+ CompareOpc = 903;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuq:
+ CompareOpc = 647;
+ break;
+ }
+ break;
+
// VSX predicate comparisons use the same infrastructure
case Intrinsic::ppc_vsx_xvcmpeqdp_p:
case Intrinsic::ppc_vsx_xvcmpgedp_p:
@@ -10480,6 +9875,26 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
else
return false;
break;
+ case Intrinsic::ppc_altivec_vcmpequq_p:
+ case Intrinsic::ppc_altivec_vcmpgtsq_p:
+ case Intrinsic::ppc_altivec_vcmpgtuq_p:
+ if (!Subtarget.isISA3_1())
+ return false;
+ switch (IntrinsicID) {
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpequq_p:
+ CompareOpc = 455;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsq_p:
+ CompareOpc = 903;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuq_p:
+ CompareOpc = 647;
+ break;
+ }
+ isDot = true;
+ break;
}
return true;
}
@@ -10493,11 +9908,32 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
- if (IntrinsicID == Intrinsic::thread_pointer) {
+ switch (IntrinsicID) {
+ case Intrinsic::thread_pointer:
// Reads the thread pointer register, used for __builtin_thread_pointer.
if (Subtarget.isPPC64())
return DAG.getRegister(PPC::X13, MVT::i64);
return DAG.getRegister(PPC::R2, MVT::i32);
+
+ case Intrinsic::ppc_mma_disassemble_acc:
+ case Intrinsic::ppc_vsx_disassemble_pair: {
+ int NumVecs = 2;
+ SDValue WideVec = Op.getOperand(1);
+ if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
+ NumVecs = 4;
+ WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
+ }
+ SmallVector<SDValue, 4> RetOps;
+ for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
+ SDValue Extract = DAG.getNode(
+ PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
+ DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
+ : VecNo,
+ dl, MVT::i64));
+ RetOps.push_back(Extract);
+ }
+ return DAG.getMergeValues(RetOps, dl);
+ }
}
// If this is a lowered altivec predicate compare, CompareOpc is set to the
@@ -10522,7 +9958,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(CompareOpc, dl, MVT::i32)
};
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
@@ -10683,154 +10119,51 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return Op;
}
-SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc dl(Op);
- SDNode *N = Op.getNode();
-
- assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
- "Unknown extract_vector_elt type");
-
- SDValue Value = N->getOperand(0);
-
- // The first part of this is like the store lowering except that we don't
- // need to track the chain.
-
- // The values are now known to be -1 (false) or 1 (true). To convert this
- // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
- // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
- Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
-
- // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
- // understand how to form the extending load.
- SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
-
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
-
- // Now convert to an integer and store.
- Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
- DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
- Value);
-
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
- MachinePointerInfo PtrInfo =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
-
- SDValue StoreChain = DAG.getEntryNode();
- SDValue Ops[] = {StoreChain,
- DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
- Value, FIdx};
- SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
-
- StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
- dl, VTs, Ops, MVT::v4i32, PtrInfo);
-
- // Extract the value requested.
- unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
-
- SDValue IntVal =
- DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
-
- if (!Subtarget.useCRBits())
- return IntVal;
-
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
-}
-
-/// Lowering for QPX v4i1 loads
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
SDValue LoadChain = LN->getChain();
SDValue BasePtr = LN->getBasePtr();
+ EVT VT = Op.getValueType();
- if (Op.getValueType() == MVT::v4f64 ||
- Op.getValueType() == MVT::v4f32) {
- EVT MemVT = LN->getMemoryVT();
- unsigned Alignment = LN->getAlignment();
-
- // If this load is properly aligned, then it is legal.
- if (Alignment >= MemVT.getStoreSize())
- return Op;
-
- EVT ScalarVT = Op.getValueType().getScalarType(),
- ScalarMemVT = MemVT.getScalarType();
- unsigned Stride = ScalarMemVT.getStoreSize();
-
- SDValue Vals[4], LoadChains[4];
- for (unsigned Idx = 0; Idx < 4; ++Idx) {
- SDValue Load;
- if (ScalarVT != ScalarMemVT)
- Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
- BasePtr,
- LN->getPointerInfo().getWithOffset(Idx * Stride),
- ScalarMemVT, MinAlign(Alignment, Idx * Stride),
- LN->getMemOperand()->getFlags(), LN->getAAInfo());
- else
- Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
- LN->getPointerInfo().getWithOffset(Idx * Stride),
- MinAlign(Alignment, Idx * Stride),
- LN->getMemOperand()->getFlags(), LN->getAAInfo());
-
- if (Idx == 0 && LN->isIndexed()) {
- assert(LN->getAddressingMode() == ISD::PRE_INC &&
- "Unknown addressing mode on vector load");
- Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
- LN->getAddressingMode());
- }
-
- Vals[Idx] = Load;
- LoadChains[Idx] = Load.getValue(1);
-
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Stride, dl,
- BasePtr.getValueType()));
- }
-
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
-
- if (LN->isIndexed()) {
- SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
- return DAG.getMergeValues(RetOps, dl);
- }
-
- SDValue RetOps[] = { Value, TF };
- return DAG.getMergeValues(RetOps, dl);
- }
-
- assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
- assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
-
- // To lower v4i1 from a byte array, we load the byte elements of the
- // vector and then reuse the BUILD_VECTOR logic.
-
- SDValue VectElmts[4], VectElmtChains[4];
- for (unsigned i = 0; i < 4; ++i) {
- SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
-
- VectElmts[i] = DAG.getExtLoad(
- ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
- LN->getPointerInfo().getWithOffset(i), MVT::i8,
- /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
- VectElmtChains[i] = VectElmts[i].getValue(1);
- }
-
- LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
- SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
+ if (VT != MVT::v256i1 && VT != MVT::v512i1)
+ return Op;
- SDValue RVals[] = { Value, LoadChain };
- return DAG.getMergeValues(RVals, dl);
+ // Type v256i1 is used for pairs and v512i1 is used for accumulators.
+ // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
+ // 2 or 4 vsx registers.
+ assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
+ "Type unsupported without MMA");
+ assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
+ "Type unsupported without paired vector support");
+ Align Alignment = LN->getAlign();
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> LoadChains;
+ unsigned NumVecs = VT.getSizeInBits() / 128;
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ SDValue Load =
+ DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx * 16),
+ commonAlignment(Alignment, Idx * 16),
+ LN->getMemOperand()->getFlags(), LN->getAAInfo());
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(16, dl, BasePtr.getValueType()));
+ Loads.push_back(Load);
+ LoadChains.push_back(Load.getValue(1));
+ }
+ if (Subtarget.isLittleEndian()) {
+ std::reverse(Loads.begin(), Loads.end());
+ std::reverse(LoadChains.begin(), LoadChains.end());
+ }
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ SDValue Value =
+ DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
+ dl, VT, Loads);
+ SDValue RetOps[] = {Value, TF};
+ return DAG.getMergeValues(RetOps, dl);
}
-/// Lowering for QPX v4i1 stores
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -10838,122 +10171,40 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue StoreChain = SN->getChain();
SDValue BasePtr = SN->getBasePtr();
SDValue Value = SN->getValue();
+ EVT StoreVT = Value.getValueType();
- if (Value.getValueType() == MVT::v4f64 ||
- Value.getValueType() == MVT::v4f32) {
- EVT MemVT = SN->getMemoryVT();
- unsigned Alignment = SN->getAlignment();
-
- // If this store is properly aligned, then it is legal.
- if (Alignment >= MemVT.getStoreSize())
- return Op;
-
- EVT ScalarVT = Value.getValueType().getScalarType(),
- ScalarMemVT = MemVT.getScalarType();
- unsigned Stride = ScalarMemVT.getStoreSize();
-
- SDValue Stores[4];
- for (unsigned Idx = 0; Idx < 4; ++Idx) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
- DAG.getVectorIdxConstant(Idx, dl));
- SDValue Store;
- if (ScalarVT != ScalarMemVT)
- Store =
- DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
- SN->getPointerInfo().getWithOffset(Idx * Stride),
- ScalarMemVT, MinAlign(Alignment, Idx * Stride),
- SN->getMemOperand()->getFlags(), SN->getAAInfo());
- else
- Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
- SN->getPointerInfo().getWithOffset(Idx * Stride),
- MinAlign(Alignment, Idx * Stride),
- SN->getMemOperand()->getFlags(), SN->getAAInfo());
-
- if (Idx == 0 && SN->isIndexed()) {
- assert(SN->getAddressingMode() == ISD::PRE_INC &&
- "Unknown addressing mode on vector store");
- Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
- SN->getAddressingMode());
- }
-
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Stride, dl,
- BasePtr.getValueType()));
- Stores[Idx] = Store;
- }
-
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
-
- if (SN->isIndexed()) {
- SDValue RetOps[] = { TF, Stores[0].getValue(1) };
- return DAG.getMergeValues(RetOps, dl);
- }
-
- return TF;
- }
-
- assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
- assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
-
- // The values are now known to be -1 (false) or 1 (true). To convert this
- // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
- // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
- Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
-
- // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
- // understand how to form the extending load.
- SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
-
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
-
- // Now convert to an integer and store.
- Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
- DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
- Value);
-
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
- MachinePointerInfo PtrInfo =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
-
- SDValue Ops[] = {StoreChain,
- DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
- Value, FIdx};
- SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
-
- StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
- dl, VTs, Ops, MVT::v4i32, PtrInfo);
-
- // Move data into the byte array.
- SDValue Loads[4], LoadChains[4];
- for (unsigned i = 0; i < 4; ++i) {
- unsigned Offset = 4*i;
- SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
-
- Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
- PtrInfo.getWithOffset(Offset));
- LoadChains[i] = Loads[i].getValue(1);
- }
-
- StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
-
- SDValue Stores[4];
- for (unsigned i = 0; i < 4; ++i) {
- SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
-
- Stores[i] = DAG.getTruncStore(
- StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
- MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
- SN->getAAInfo());
- }
-
- StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
+ return Op;
- return StoreChain;
+ // Type v256i1 is used for pairs and v512i1 is used for accumulators.
+ // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
+ // underlying registers individually.
+ assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
+ "Type unsupported without MMA");
+ assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
+ "Type unsupported without paired vector support");
+ Align Alignment = SN->getAlign();
+ SmallVector<SDValue, 4> Stores;
+ unsigned NumVecs = 2;
+ if (StoreVT == MVT::v512i1) {
+ Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
+ NumVecs = 4;
+ }
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
+ SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
+ DAG.getConstant(VecNum, dl, MVT::i64));
+ SDValue Store =
+ DAG.getStore(StoreChain, dl, Elt, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx * 16),
+ commonAlignment(Alignment, Idx * 16),
+ SN->getMemOperand()->getFlags(), SN->getAAInfo());
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(16, dl, BasePtr.getValueType()));
+ Stores.push_back(Store);
+ }
+ SDValue TF = DAG.getTokenFactor(dl, Stores);
+ return TF;
}
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
@@ -11020,42 +10271,13 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
}
}
-SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
-
- assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
-
- EVT VT = Op.getValueType();
- assert(VT.isVector() &&
- "Only set vector abs as custom, scalar abs shouldn't reach here!");
- assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
- VT == MVT::v16i8) &&
- "Unexpected vector element type!");
- assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
- "Current subtarget doesn't support smax v2i64!");
-
- // For vector abs, it can be lowered to:
- // abs x
- // ==>
- // y = -x
- // smax(x, y)
-
- SDLoc dl(Op);
- SDValue X = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
-
- // SMAX patch https://reviews.llvm.org/D47332
- // hasn't landed yet, so use intrinsic first here.
- // TODO: Should use SMAX directly once SMAX patch landed
- Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
- if (VT == MVT::v2i64)
- BifID = Intrinsic::ppc_altivec_vmaxsd;
- else if (VT == MVT::v8i16)
- BifID = Intrinsic::ppc_altivec_vmaxsh;
- else if (VT == MVT::v16i8)
- BifID = Intrinsic::ppc_altivec_vmaxsb;
+SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ bool IsStrict = Op->isStrictFPOpcode();
+ if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
+ !Subtarget.hasP9Vector())
+ return SDValue();
- return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
+ return Op;
}
// Custom lowering for fpext vf32 to v2f64
@@ -11149,6 +10371,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
// Variable argument lowering.
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
@@ -11168,8 +10392,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
@@ -11179,16 +10407,20 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+ case ISD::FSHL: return LowerFunnelShift(Op, DAG);
+ case ISD::FSHR: return LowerFunnelShift(Op, DAG);
+
// Vector-related lowering.
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
- case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
- case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND:
+ return LowerFP_ROUND(Op, DAG);
case ISD::ROTL: return LowerROTL(Op, DAG);
// For counter-based loop handling.
@@ -11256,23 +10488,28 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
// LowerFP_TO_INT() can only handle f32 and f64.
- if (N->getOperand(0).getValueType() == MVT::ppcf128)
+ if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
+ MVT::ppcf128)
return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
case ISD::TRUNCATE: {
- EVT TrgVT = N->getValueType(0);
- EVT OpVT = N->getOperand(0).getValueType();
- if (TrgVT.isVector() &&
- isOperationCustom(N->getOpcode(), TrgVT) &&
- OpVT.getSizeInBits() <= 128 &&
- isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
- Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
+ if (!N->getValueType(0).isVector())
+ return;
+ SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
+ if (Lowered)
+ Results.push_back(Lowered);
return;
}
+ case ISD::FSHL:
+ case ISD::FSHR:
+ // Don't handle funnel shifts here.
+ return;
case ISD::BITCAST:
// Don't handle bitcast here.
return;
@@ -11444,17 +10681,88 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
+static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
+ switch(MI.getOpcode()) {
+ default:
+ return false;
+ case PPC::COPY:
+ return TII->isSignExtended(MI);
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHAU:
+ case PPC::LHAU8:
+ case PPC::LHAUX:
+ case PPC::LHAUX8:
+ case PPC::LHAX:
+ case PPC::LHAX8:
+ case PPC::LWA:
+ case PPC::LWAUX:
+ case PPC::LWAX:
+ case PPC::LWAX_32:
+ case PPC::LWA_32:
+ case PPC::PLHA:
+ case PPC::PLHA8:
+ case PPC::PLHA8pc:
+ case PPC::PLHApc:
+ case PPC::PLWA:
+ case PPC::PLWA8:
+ case PPC::PLWA8pc:
+ case PPC::PLWApc:
+ case PPC::EXTSB:
+ case PPC::EXTSB8:
+ case PPC::EXTSB8_32_64:
+ case PPC::EXTSB8_rec:
+ case PPC::EXTSB_rec:
+ case PPC::EXTSH:
+ case PPC::EXTSH8:
+ case PPC::EXTSH8_32_64:
+ case PPC::EXTSH8_rec:
+ case PPC::EXTSH_rec:
+ case PPC::EXTSW:
+ case PPC::EXTSWSLI:
+ case PPC::EXTSWSLI_32_64:
+ case PPC::EXTSWSLI_32_64_rec:
+ case PPC::EXTSWSLI_rec:
+ case PPC::EXTSW_32:
+ case PPC::EXTSW_32_64:
+ case PPC::EXTSW_32_64_rec:
+ case PPC::EXTSW_rec:
+ case PPC::SRAW:
+ case PPC::SRAWI:
+ case PPC::SRAWI_rec:
+ case PPC::SRAW_rec:
+ return true;
+ }
+ return false;
+}
+
MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
MachineInstr &MI, MachineBasicBlock *BB,
bool is8bit, // operation
unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const PPCInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // If this is a signed comparison and the value being compared is not known
+ // to be sign extended, sign extend it here.
+ DebugLoc dl = MI.getDebugLoc();
+ MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ Register incr = MI.getOperand(3).getReg();
+ bool IsSignExtended = Register::isVirtualRegister(incr) &&
+ isSignExtended(*RegInfo.getVRegDef(incr), TII);
+
+ if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
+ Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
+ .addReg(MI.getOperand(3).getReg());
+ MI.getOperand(3).setReg(ValueReg);
+ }
// If we support part-word atomic mnemonics, just use them
if (Subtarget.hasPartwordAtomics())
return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
CmpPred);
- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
@@ -11464,14 +10772,11 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *F = BB->getParent();
MachineFunction::iterator It = ++BB->getIterator();
Register dest = MI.getOperand(0).getReg();
Register ptrA = MI.getOperand(1).getReg();
Register ptrB = MI.getOperand(2).getReg();
- Register incr = MI.getOperand(3).getReg();
- DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB =
@@ -11485,7 +10790,6 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- MachineRegisterInfo &RegInfo = F->getRegInfo();
const TargetRegisterClass *RC =
is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -12128,9 +11432,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
} else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
MI.getOpcode() == PPC::SELECT_CC_F8 ||
MI.getOpcode() == PPC::SELECT_CC_F16 ||
- MI.getOpcode() == PPC::SELECT_CC_QFRC ||
- MI.getOpcode() == PPC::SELECT_CC_QSRC ||
- MI.getOpcode() == PPC::SELECT_CC_QBRC ||
MI.getOpcode() == PPC::SELECT_CC_VRRC ||
MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
@@ -12140,9 +11441,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
MI.getOpcode() == PPC::SELECT_F16 ||
- MI.getOpcode() == PPC::SELECT_QFRC ||
- MI.getOpcode() == PPC::SELECT_QSRC ||
- MI.getOpcode() == PPC::SELECT_QBRC ||
MI.getOpcode() == PPC::SELECT_SPE ||
MI.getOpcode() == PPC::SELECT_SPE4 ||
MI.getOpcode() == PPC::SELECT_VRRC ||
@@ -12180,9 +11478,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_F16 ||
MI.getOpcode() == PPC::SELECT_SPE4 ||
MI.getOpcode() == PPC::SELECT_SPE ||
- MI.getOpcode() == PPC::SELECT_QFRC ||
- MI.getOpcode() == PPC::SELECT_QSRC ||
- MI.getOpcode() == PPC::SELECT_QBRC ||
MI.getOpcode() == PPC::SELECT_VRRC ||
MI.getOpcode() == PPC::SELECT_VSFRC ||
MI.getOpcode() == PPC::SELECT_VSSRC ||
@@ -12665,11 +11960,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
// Set rounding mode to round-to-zero.
- BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
- BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
+ .addImm(31)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
+
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
+ .addImm(30)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
// Perform addition.
- BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+ auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
+ .addReg(Src1)
+ .addReg(Src2);
+ if (MI.getFlag(MachineInstr::NoFPExcept))
+ MIB.setMIFlag(MachineInstr::NoFPExcept);
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
@@ -12728,10 +12032,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// the immediate to set the bits 62:63 of FPSCR.
unsigned Mode = MI.getOperand(1).getImm();
BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
- .addImm(31);
+ .addImm(31)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
- .addImm(30);
+ .addImm(30)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
} else if (MI.getOpcode() == PPC::SETRND) {
DebugLoc dl = MI.getDebugLoc();
@@ -12841,6 +12147,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(NewFPSCRReg)
.addImm(0)
.addImm(0);
+ } else if (MI.getOpcode() == PPC::SETFLM) {
+ DebugLoc Dl = MI.getDebugLoc();
+
+ // Result of setflm is previous FPSCR content, so we need to save it first.
+ Register OldFPSCRReg = MI.getOperand(0).getReg();
+ BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+ // Put bits in 32:63 to FPSCR.
+ Register NewFPSCRReg = MI.getOperand(1).getReg();
+ BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
+ .addImm(255)
+ .addReg(NewFPSCRReg)
+ .addImm(0)
+ .addImm(0);
} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
return emitProbedAlloca(MI, BB);
@@ -12867,6 +12187,47 @@ static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
return RefinementSteps;
}
+SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
+ const DenormalMode &Mode) const {
+ // We only have VSX Vector Test for software Square Root.
+ EVT VT = Op.getValueType();
+ if (!isTypeLegal(MVT::i1) ||
+ (VT != MVT::f64 &&
+ ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
+ return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
+
+ SDLoc DL(Op);
+ // The output register of FTSQRT is CR field.
+ SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
+ // ftsqrt BF,FRB
+ // Let e_b be the unbiased exponent of the double-precision
+ // floating-point operand in register FRB.
+ // fe_flag is set to 1 if either of the following conditions occurs.
+ // - The double-precision floating-point operand in register FRB is a zero,
+ // a NaN, or an infinity, or a negative value.
+ // - e_b is less than or equal to -970.
+ // Otherwise fe_flag is set to 0.
+ // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
+ // not eligible for iteration. (zero/negative/infinity/nan or unbiased
+ // exponent is less than -970)
+ SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
+ return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
+ FTSQRT, SRIdxVal),
+ 0);
+}
+
+SDValue
+PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
+ SelectionDAG &DAG) const {
+ // We only have VSX Vector Square Root.
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f64 &&
+ ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
+ return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
+
+ return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
+}
+
SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
int Enabled, int &RefinementSteps,
bool &UseOneConstNR,
@@ -12875,9 +12236,7 @@ SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
- (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
- (VT == MVT::v4f64 && Subtarget.hasQPX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
@@ -12896,9 +12255,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
- (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
- (VT == MVT::v4f64 && Subtarget.hasQPX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
@@ -12996,24 +12353,6 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
- case Intrinsic::ppc_qpx_qvlfd:
- case Intrinsic::ppc_qpx_qvlfda:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvlfs:
- case Intrinsic::ppc_qpx_qvlfsa:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvlfcd:
- case Intrinsic::ppc_qpx_qvlfcda:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvlfcs:
- case Intrinsic::ppc_qpx_qvlfcsa:
- VT = MVT::v2f32;
- break;
- case Intrinsic::ppc_qpx_qvlfiwa:
- case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_vsx_lxvw4x:
@@ -13042,24 +12381,6 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
- case Intrinsic::ppc_qpx_qvstfd:
- case Intrinsic::ppc_qpx_qvstfda:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvstfs:
- case Intrinsic::ppc_qpx_qvstfsa:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvstfcd:
- case Intrinsic::ppc_qpx_qvstfcda:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvstfcs:
- case Intrinsic::ppc_qpx_qvstfcsa:
- VT = MVT::v2f32;
- break;
- case Intrinsic::ppc_qpx_qvstfiw:
- case Intrinsic::ppc_qpx_qvstfiwa:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_vsx_stxvw4x:
@@ -13287,11 +12608,13 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
// We don't really care about what is known about the first bit (if
- // anything), so clear it in all masks prior to comparing them.
- Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
- Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
+ // anything), so pretend that it is known zero for both to ensure they can
+ // be compared as constants.
+ Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
+ Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
- if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
+ if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
+ Op1Known.getConstant() != Op2Known.getConstant())
return SDValue();
}
}
@@ -13343,8 +12666,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
// Visit all inputs, collect all binary operations (and, or, xor and
// select) that are all fed by extensions.
while (!BinOps.empty()) {
- SDValue BinOp = BinOps.back();
- BinOps.pop_back();
+ SDValue BinOp = BinOps.pop_back_val();
if (!Visited.insert(BinOp.getNode()).second)
continue;
@@ -13559,8 +12881,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
// Visit all inputs, collect all binary operations (and, or, xor and
// select) that are all fed by truncations.
while (!BinOps.empty()) {
- SDValue BinOp = BinOps.back();
- BinOps.pop_back();
+ SDValue BinOp = BinOps.pop_back_val();
if (!Visited.insert(BinOp.getNode()).second)
continue;
@@ -14157,6 +13478,46 @@ static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Look for the pattern of a load from a narrow width to i128, feeding
+// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
+// (LXVRZX). This node represents a zero extending load that will be matched
+// to the Load VSX Vector Rightmost instructions.
+static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ // This combine is only eligible for a BUILD_VECTOR of v1i128.
+ if (N->getValueType(0) != MVT::v1i128)
+ return SDValue();
+
+ SDValue Operand = N->getOperand(0);
+ // Proceed with the transformation if the operand to the BUILD_VECTOR
+ // is a load instruction.
+ if (Operand.getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
+ EVT MemoryType = LD->getMemoryVT();
+
+ // This transformation is only valid if the we are loading either a byte,
+ // halfword, word, or doubleword.
+ bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
+ MemoryType == MVT::i32 || MemoryType == MVT::i64;
+
+ // Ensure that the load from the narrow width is being zero extended to i128.
+ if (!ValidLDType ||
+ (LD->getExtensionType() != ISD::ZEXTLOAD &&
+ LD->getExtensionType() != ISD::EXTLOAD))
+ return SDValue();
+
+ SDValue LoadOps[] = {
+ LD->getChain(), LD->getBasePtr(),
+ DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
+
+ return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
+ DAG.getVTList(MVT::v1i128, MVT::Other),
+ LoadOps, MemoryType, LD->getMemOperand());
+}
+
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@@ -14194,6 +13555,14 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
return Reduced;
}
+ // On Power10, the Load VSX Vector Rightmost instructions can be utilized
+ // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
+ // is a load from <valid narrow width> to i128.
+ if (Subtarget.isISA3_1()) {
+ SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
+ if (BVOfZLoad)
+ return BVOfZLoad;
+ }
if (N->getValueType(0) != MVT::v2f64)
return SDValue();
@@ -14495,8 +13864,7 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
EVT Op1VT = N->getOperand(1).getValueType();
EVT ResVT = Val.getValueType();
- // Floating point types smaller than 32 bits are not legal on Power.
- if (ResVT.getScalarSizeInBits() < 32)
+ if (!isTypeLegal(ResVT))
return SDValue();
// Only perform combine for conversion to i64/i32 or power9 i16/i8.
@@ -14590,7 +13958,6 @@ static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
ShuffV[i] += HalfVec;
}
- return;
}
// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
@@ -15059,18 +14426,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT MemVT = LD->getMemoryVT();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
- Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
- Align ScalarABIAlignment = DAG.getDataLayout().getABITypeAlign(STy);
if (LD->isUnindexed() && VT.isVector() &&
((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
// P8 and later hardware should just use LOAD.
!Subtarget.hasP8Vector() &&
(VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v4f32)) ||
- (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
- LD->getAlign() >= ScalarABIAlignment)) &&
+ VT == MVT::v4f32))) &&
LD->getAlign() < ABIAlignment) {
- // This is a type-legal unaligned Altivec or QPX load.
+ // This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
bool isLittleEndian = Subtarget.isLittleEndian();
@@ -15101,24 +14464,13 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// optimization later.
Intrinsic::ID Intr, IntrLD, IntrPerm;
MVT PermCntlTy, PermTy, LDTy;
- if (Subtarget.hasAltivec()) {
- Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
- Intrinsic::ppc_altivec_lvsl;
- IntrLD = Intrinsic::ppc_altivec_lvx;
- IntrPerm = Intrinsic::ppc_altivec_vperm;
- PermCntlTy = MVT::v16i8;
- PermTy = MVT::v4i32;
- LDTy = MVT::v4i32;
- } else {
- Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
- Intrinsic::ppc_qpx_qvlpcls;
- IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
- Intrinsic::ppc_qpx_qvlfs;
- IntrPerm = Intrinsic::ppc_qpx_qvfperm;
- PermCntlTy = MVT::v4f64;
- PermTy = MVT::v4f64;
- LDTy = MemVT.getSimpleVT();
- }
+ Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
+ : Intrinsic::ppc_altivec_lvsl;
+ IntrLD = Intrinsic::ppc_altivec_lvx;
+ IntrPerm = Intrinsic::ppc_altivec_vperm;
+ PermCntlTy = MVT::v16i8;
+ PermTy = MVT::v4i32;
+ LDTy = MVT::v4i32;
SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
@@ -15189,10 +14541,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
BaseLoad, ExtraLoad, PermCntl, DAG, dl);
if (VT != PermTy)
- Perm = Subtarget.hasAltivec() ?
- DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
- DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
- DAG.getTargetConstant(1, dl, MVT::i64));
+ Perm = Subtarget.hasAltivec()
+ ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
+ : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
+ DAG.getTargetConstant(1, dl, MVT::i64));
// second argument is 1 because this rounding
// is always exact.
@@ -15208,14 +14560,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
: Intrinsic::ppc_altivec_lvsl);
- if ((IID == Intr ||
- IID == Intrinsic::ppc_qpx_qvlpcld ||
- IID == Intrinsic::ppc_qpx_qvlpcls) &&
- N->getOperand(1)->getOpcode() == ISD::ADD) {
+ if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
SDValue Add = N->getOperand(1);
- int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
- 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
+ int Bits = 4 /* 16 byte alignment */;
if (DAG.MaskedValueIsZero(Add->getOperand(1),
APInt::getAllOnesValue(Bits /* alignment */)
@@ -15225,7 +14573,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
UE = BasePtr->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+ IID) {
// We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
@@ -15357,43 +14706,43 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
case PPCISD::VCMP:
- // If a VCMPo node already exists with exactly the same operands as this
- // node, use its result instead of this node (VCMPo computes both a CR6 and
- // a normal output).
+ // If a VCMP_rec node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMP_rec computes both a CR6
+ // and a normal output).
//
if (!N->getOperand(0).hasOneUse() &&
!N->getOperand(1).hasOneUse() &&
!N->getOperand(2).hasOneUse()) {
- // Scan all of the users of the LHS, looking for VCMPo's that match.
- SDNode *VCMPoNode = nullptr;
+ // Scan all of the users of the LHS, looking for VCMP_rec's that match.
+ SDNode *VCMPrecNode = nullptr;
SDNode *LHSN = N->getOperand(0).getNode();
for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
UI != E; ++UI)
- if (UI->getOpcode() == PPCISD::VCMPo &&
+ if (UI->getOpcode() == PPCISD::VCMP_rec &&
UI->getOperand(1) == N->getOperand(1) &&
UI->getOperand(2) == N->getOperand(2) &&
UI->getOperand(0) == N->getOperand(0)) {
- VCMPoNode = *UI;
+ VCMPrecNode = *UI;
break;
}
- // If there is no VCMPo node, or if the flag value has a single use, don't
- // transform this.
- if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ // If there is no VCMP_rec node, or if the flag value has a single use,
+ // don't transform this.
+ if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
break;
// Look at the (necessarily single) use of the flag value. If it has a
// chain, this transformation is more complex. Note that multiple things
// could use the value result, which we should ignore.
SDNode *FlagUser = nullptr;
- for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
FlagUser == nullptr; ++UI) {
- assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
- if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+ if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
FlagUser = User;
break;
}
@@ -15403,7 +14752,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// If the user is a MFOCRF instruction, we know this is safe.
// Otherwise we give up for right now.
if (FlagUser->getOpcode() == PPCISD::MFOCRF)
- return SDValue(VCMPoNode, 0);
+ return SDValue(VCMPrecNode, 0);
}
break;
case ISD::BRCOND: {
@@ -15492,7 +14841,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAG.getConstant(CompareOpc, dl, MVT::i32)
};
EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
// Unpack the result based on how the target uses it.
PPC::Predicate CompOpc;
@@ -15587,16 +14936,19 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::ppc_altivec_vcmpequh_p:
case Intrinsic::ppc_altivec_vcmpequw_p:
case Intrinsic::ppc_altivec_vcmpequd_p:
+ case Intrinsic::ppc_altivec_vcmpequq_p:
case Intrinsic::ppc_altivec_vcmpgefp_p:
case Intrinsic::ppc_altivec_vcmpgtfp_p:
case Intrinsic::ppc_altivec_vcmpgtsb_p:
case Intrinsic::ppc_altivec_vcmpgtsh_p:
case Intrinsic::ppc_altivec_vcmpgtsw_p:
case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ case Intrinsic::ppc_altivec_vcmpgtsq_p:
case Intrinsic::ppc_altivec_vcmpgtub_p:
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
case Intrinsic::ppc_altivec_vcmpgtud_p:
+ case Intrinsic::ppc_altivec_vcmpgtuq_p:
Known.Zero = ~1U; // All bits but the low one are known to be zero.
break;
}
@@ -15774,17 +15126,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::F4RCRegClass);
if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::F8RCRegClass);
- if (VT == MVT::v4f64 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QFRCRegClass);
- if (VT == MVT::v4f32 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QSRCRegClass);
}
break;
case 'v':
- if (VT == MVT::v4f64 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QFRCRegClass);
- if (VT == MVT::v4f32 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QSRCRegClass);
if (Subtarget.hasAltivec())
return std::make_pair(0U, &PPC::VRRCRegClass);
break;
@@ -15803,19 +15147,45 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::VSSRCRegClass);
else
return std::make_pair(0U, &PPC::VSFRCRegClass);
- }
+ } else if (Constraint == "lr") {
+ if (VT == MVT::i64)
+ return std::make_pair(0U, &PPC::LR8RCRegClass);
+ else
+ return std::make_pair(0U, &PPC::LRRCRegClass);
+ }
+
+ // Handle special cases of physical registers that are not properly handled
+ // by the base class.
+ if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
+ // If we name a VSX register, we can't defer to the base class because it
+ // will not recognize the correct register (their names will be VSL{0-31}
+ // and V{0-31} so they won't match). So we match them here.
+ if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
+ int VSNum = atoi(Constraint.data() + 3);
+ assert(VSNum >= 0 && VSNum <= 63 &&
+ "Attempted to access a vsr out of range");
+ if (VSNum < 32)
+ return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
+ return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
+ }
- // If we name a VSX register, we can't defer to the base class because it
- // will not recognize the correct register (their names will be VSL{0-31}
- // and V{0-31} so they won't match). So we match them here.
- if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
- int VSNum = atoi(Constraint.data() + 3);
- assert(VSNum >= 0 && VSNum <= 63 &&
- "Attempted to access a vsr out of range");
- if (VSNum < 32)
- return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
- return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
+ // For float registers, we can't defer to the base class as it will match
+ // the SPILLTOVSRRC class.
+ if (Constraint.size() > 3 && Constraint[1] == 'f') {
+ int RegNum = atoi(Constraint.data() + 2);
+ if (RegNum > 31 || RegNum < 0)
+ report_fatal_error("Invalid floating point register number");
+ if (VT == MVT::f32 || VT == MVT::i32)
+ return Subtarget.hasSPE()
+ ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
+ : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
+ if (VT == MVT::f64 || VT == MVT::i64)
+ return Subtarget.hasSPE()
+ ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
+ : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
+ }
}
+
std::pair<unsigned, const TargetRegisterClass *> R =
TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
@@ -15920,9 +15290,15 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS, Instruction *I) const {
- // PPC does not allow r+i addressing modes for vectors!
- if (Ty->isVectorTy() && AM.BaseOffs != 0)
+ unsigned AS,
+ Instruction *I) const {
+ // Vector type r+i form is supported since power9 as DQ form. We don't check
+ // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
+ // imm form is preferred and the offset can be adjusted to use imm form later
+ // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
+ // max offset to check legal addressing mode, we should be a little aggressive
+ // to contain other offsets for that LSRUse.
+ if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
return false;
// PPC allows a sign-extended 16-bit immediate field.
@@ -16076,19 +15452,17 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
- case Intrinsic::ppc_qpx_qvlfd:
- case Intrinsic::ppc_qpx_qvlfs:
- case Intrinsic::ppc_qpx_qvlfcd:
- case Intrinsic::ppc_qpx_qvlfcs:
- case Intrinsic::ppc_qpx_qvlfiwa:
- case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_altivec_lvebx:
case Intrinsic::ppc_altivec_lvehx:
case Intrinsic::ppc_altivec_lvewx:
case Intrinsic::ppc_vsx_lxvd2x:
- case Intrinsic::ppc_vsx_lxvw4x: {
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x_be:
+ case Intrinsic::ppc_vsx_lxvw4x_be:
+ case Intrinsic::ppc_vsx_lxvl:
+ case Intrinsic::ppc_vsx_lxvll: {
EVT VT;
switch (Intrinsic) {
case Intrinsic::ppc_altivec_lvebx:
@@ -16101,20 +15475,9 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
VT = MVT::i32;
break;
case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvd2x_be:
VT = MVT::v2f64;
break;
- case Intrinsic::ppc_qpx_qvlfd:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvlfs:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvlfcd:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvlfcs:
- VT = MVT::v2f32;
- break;
default:
VT = MVT::v4i32;
break;
@@ -16129,52 +15492,17 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad;
return true;
}
- case Intrinsic::ppc_qpx_qvlfda:
- case Intrinsic::ppc_qpx_qvlfsa:
- case Intrinsic::ppc_qpx_qvlfcda:
- case Intrinsic::ppc_qpx_qvlfcsa:
- case Intrinsic::ppc_qpx_qvlfiwaa:
- case Intrinsic::ppc_qpx_qvlfiwza: {
- EVT VT;
- switch (Intrinsic) {
- case Intrinsic::ppc_qpx_qvlfda:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvlfsa:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvlfcda:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvlfcsa:
- VT = MVT::v2f32;
- break;
- default:
- VT = MVT::v4i32;
- break;
- }
-
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = VT;
- Info.ptrVal = I.getArgOperand(0);
- Info.offset = 0;
- Info.size = VT.getStoreSize();
- Info.align = Align(1);
- Info.flags = MachineMemOperand::MOLoad;
- return true;
- }
- case Intrinsic::ppc_qpx_qvstfd:
- case Intrinsic::ppc_qpx_qvstfs:
- case Intrinsic::ppc_qpx_qvstfcd:
- case Intrinsic::ppc_qpx_qvstfcs:
- case Intrinsic::ppc_qpx_qvstfiw:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_altivec_stvebx:
case Intrinsic::ppc_altivec_stvehx:
case Intrinsic::ppc_altivec_stvewx:
case Intrinsic::ppc_vsx_stxvd2x:
- case Intrinsic::ppc_vsx_stxvw4x: {
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x_be:
+ case Intrinsic::ppc_vsx_stxvw4x_be:
+ case Intrinsic::ppc_vsx_stxvl:
+ case Intrinsic::ppc_vsx_stxvll: {
EVT VT;
switch (Intrinsic) {
case Intrinsic::ppc_altivec_stvebx:
@@ -16187,20 +15515,9 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
VT = MVT::i32;
break;
case Intrinsic::ppc_vsx_stxvd2x:
+ case Intrinsic::ppc_vsx_stxvd2x_be:
VT = MVT::v2f64;
break;
- case Intrinsic::ppc_qpx_qvstfd:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvstfs:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvstfcd:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvstfcs:
- VT = MVT::v2f32;
- break;
default:
VT = MVT::v4i32;
break;
@@ -16215,39 +15532,6 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOStore;
return true;
}
- case Intrinsic::ppc_qpx_qvstfda:
- case Intrinsic::ppc_qpx_qvstfsa:
- case Intrinsic::ppc_qpx_qvstfcda:
- case Intrinsic::ppc_qpx_qvstfcsa:
- case Intrinsic::ppc_qpx_qvstfiwa: {
- EVT VT;
- switch (Intrinsic) {
- case Intrinsic::ppc_qpx_qvstfda:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvstfsa:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvstfcda:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvstfcsa:
- VT = MVT::v2f32;
- break;
- default:
- VT = MVT::v4i32;
- break;
- }
-
- Info.opc = ISD::INTRINSIC_VOID;
- Info.memVT = VT;
- Info.ptrVal = I.getArgOperand(1);
- Info.offset = 0;
- Info.size = VT.getStoreSize();
- Info.align = Align(1);
- Info.flags = MachineMemOperand::MOStore;
- return true;
- }
default:
break;
}
@@ -16260,14 +15544,6 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
EVT PPCTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
- // When expanding a memset, require at least two QPX instructions to cover
- // the cost of loading the value to be stored from the constant pool.
- if (Subtarget.hasQPX() && Op.size() >= 32 &&
- (Op.isMemcpy() || Op.size() >= 64) && Op.isAligned(Align(32)) &&
- !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
- return MVT::v4f64;
- }
-
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
if (Subtarget.hasAltivec() && Op.size() >= 16 &&
@@ -16386,6 +15662,33 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
+bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const {
+ // Check integral scalar types.
+ if (!VT.isScalarInteger())
+ return false;
+ if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
+ if (!ConstNode->getAPIntValue().isSignedIntN(64))
+ return false;
+ // This transformation will generate >= 2 operations. But the following
+ // cases will generate <= 2 instructions during ISEL. So exclude them.
+ // 1. If the constant multiplier fits 16 bits, it can be handled by one
+ // HW instruction, ie. MULLI
+ // 2. If the multiplier after shifted fits 16 bits, an extra shift
+ // instruction is needed than case 1, ie. MULLI and RLDICR
+ int64_t Imm = ConstNode->getSExtValue();
+ unsigned Shift = countTrailingZeros<uint64_t>(Imm);
+ Imm >>= Shift;
+ if (isInt<16>(Imm))
+ return false;
+ uint64_t UImm = static_cast<uint64_t>(Imm);
+ if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
+ isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
+ return true;
+ }
+ return false;
+}
+
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
return isFMAFasterThanFMulAndFAdd(
@@ -16405,31 +15708,56 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
}
}
-// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist.
-// FIXME: add more patterns which are profitable to hoist.
+// FIXME: add more patterns which are not profitable to hoist.
bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
- if (I->getOpcode() != Instruction::FMul)
- return true;
-
if (!I->hasOneUse())
return true;
Instruction *User = I->user_back();
assert(User && "A single use instruction with no uses.");
- if (User->getOpcode() != Instruction::FSub &&
- User->getOpcode() != Instruction::FAdd)
- return true;
+ switch (I->getOpcode()) {
+ case Instruction::FMul: {
+ // Don't break FMA, PowerPC prefers FMA.
+ if (User->getOpcode() != Instruction::FSub &&
+ User->getOpcode() != Instruction::FAdd)
+ return true;
- const TargetOptions &Options = getTargetMachine().Options;
- const Function *F = I->getFunction();
- const DataLayout &DL = F->getParent()->getDataLayout();
- Type *Ty = User->getOperand(0)->getType();
+ const TargetOptions &Options = getTargetMachine().Options;
+ const Function *F = I->getFunction();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ Type *Ty = User->getOperand(0)->getType();
+
+ return !(
+ isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+ }
+ case Instruction::Load: {
+ // Don't break "store (load float*)" pattern, this pattern will be combined
+ // to "store (load int32)" in later InstCombine pass. See function
+ // combineLoadToOperationType. On PowerPC, loading a float point takes more
+ // cycles than loading a 32 bit integer.
+ LoadInst *LI = cast<LoadInst>(I);
+ // For the loads that combineLoadToOperationType does nothing, like
+ // ordered load, it should be profitable to hoist them.
+ // For swifterror load, it can only be used for pointer to pointer type, so
+ // later type check should get rid of this case.
+ if (!LI->isUnordered())
+ return true;
+
+ if (User->getOpcode() != Instruction::Store)
+ return true;
+
+ if (I->getType()->getTypeID() != Type::FloatTyID)
+ return true;
- return !(
- isFMAFasterThanFMulAndFAdd(*F, Ty) &&
- isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+ return false;
+ }
+ default:
+ return true;
+ }
+ return true;
}
const MCPhysReg *
@@ -16461,7 +15789,7 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
if (VT == MVT::v2i64)
return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
- if (Subtarget.hasVSX() || Subtarget.hasQPX())
+ if (Subtarget.hasVSX())
return true;
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
@@ -16507,8 +15835,7 @@ SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
switch (Opc) {
case PPCISD::FNMSUB:
- // TODO: QPX subtarget is deprecated. No transformation here.
- if (!Op.hasOneUse() || !isTypeLegal(VT) || Subtarget.hasQPX())
+ if (!Op.hasOneUse() || !isTypeLegal(VT))
break;
const TargetOptions &Options = getTargetMachine().Options;
@@ -16637,10 +15964,10 @@ SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
SDValue N0 = N->getOperand(0);
ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!Subtarget.isISA3_0() ||
+ if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
N0.getOpcode() != ISD::SIGN_EXTEND ||
- N0.getOperand(0).getValueType() != MVT::i32 ||
- CN1 == nullptr || N->getValueType(0) != MVT::i64)
+ N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
+ N->getValueType(0) != MVT::i64)
return SDValue();
// We can't save an operation here if the value is already extended, and
@@ -16989,8 +16316,7 @@ SDValue PPCTargetLowering::combineFMALike(SDNode *N,
bool LegalOps = !DCI.isBeforeLegalizeOps();
SDLoc Loc(N);
- // TODO: QPX subtarget is deprecated. No transformation here.
- if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT))
+ if (!isOperationLegal(ISD::FMA, VT))
return SDValue();
// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 768eaa43e013..836c52bdff95 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -89,6 +89,12 @@ namespace llvm {
FRE,
FRSQRTE,
+ /// Test instruction for software square root.
+ FTSQRT,
+
+ /// Square root instruction.
+ FSQRT,
+
/// VPERM - The PPC VPERM Instruction.
///
VPERM,
@@ -146,8 +152,7 @@ namespace llvm {
/// probed.
PROBED_ALLOCA,
- /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
- /// at function entry, used for PIC code.
+ /// The result of the mflr at function entry, used for PIC code.
GlobalBaseReg,
/// These nodes represent PPC shifts.
@@ -265,11 +270,11 @@ namespace llvm {
/// is VCMPGTSH.
VCMP,
- /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
- /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*_rec instructions. For lack of better number, we use the
/// opcode number encoding for the OPC field to identify the compare. For
/// example, 838 is VCMPGTSH.
- VCMPo,
+ VCMP_rec,
/// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
/// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
@@ -381,6 +386,10 @@ namespace llvm {
/// sym\@got\@dtprel\@l.
ADDI_DTPREL_L,
+ /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS
+ /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel.
+ PADDI_DTPREL,
+
/// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
/// during instruction selection to optimize a BUILD_VECTOR into
/// operations on splats. This is necessary to avoid losing these
@@ -427,22 +436,6 @@ namespace llvm {
/// => VABSDUW((XVNEGSP a), (XVNEGSP b))
VABSD,
- /// QVFPERM = This corresponds to the QPX qvfperm instruction.
- QVFPERM,
-
- /// QVGPCI = This corresponds to the QPX qvgpci instruction.
- QVGPCI,
-
- /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
- QVALIGNI,
-
- /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
- QVESPLATI,
-
- /// QBFLT = Access the underlying QPX floating-point boolean
- /// representation.
- QBFLT,
-
/// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
/// lower (IDX=1) half of v4f32 to v2f64.
FP_EXTEND_HALF,
@@ -452,6 +445,46 @@ namespace llvm {
/// PLD.
MAT_PCREL_ADDR,
+ /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for
+ /// TLS global address when using dynamic access models. This can be done
+ /// through an add like PADDI.
+ TLS_DYNAMIC_MAT_PCREL_ADDR,
+
+ /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address
+ /// when using local exec access models, and when prefixed instructions are
+ /// available. This is used with ADD_TLS to produce an add like PADDI.
+ TLS_LOCAL_EXEC_MAT_ADDR,
+
+ /// ACC_BUILD = Build an accumulator register from 4 VSX registers.
+ ACC_BUILD,
+
+ /// PAIR_BUILD = Build a vector pair register from 2 VSX registers.
+ PAIR_BUILD,
+
+ /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of
+ /// an accumulator or pair register. This node is needed because
+ /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same
+ /// element type.
+ EXTRACT_VSX_REG,
+
+ /// XXMFACC = This corresponds to the xxmfacc instruction.
+ XXMFACC,
+
+ // Constrained conversion from floating point to int
+ STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+ STRICT_FCTIWZ,
+ STRICT_FCTIDUZ,
+ STRICT_FCTIWUZ,
+
+ /// Constrained integer-to-floating-point conversion instructions.
+ STRICT_FCFID,
+ STRICT_FCFIDU,
+ STRICT_FCFIDS,
+ STRICT_FCFIDUS,
+
+ /// Constrained floating point add in round-to-zero mode.
+ STRICT_FADDRTZ,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -493,6 +526,12 @@ namespace llvm {
/// an xxswapd.
LXVD2X,
+ /// LXVRZX - Load VSX Vector Rightmost and Zero Extend
+ /// This node represents v1i128 BUILD_VECTOR of a zero extending load
+ /// instruction from <byte, halfword, word, or doubleword> to i128.
+ /// Allows utilization of the Load VSX Vector Rightmost Instructions.
+ LXVRZX,
+
/// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
/// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
/// the vector type to load vector in big-endian element order.
@@ -519,10 +558,6 @@ namespace llvm {
/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,
- /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
- /// The 4xf32 load used for v4i1 constants.
- QVLFSb,
-
/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
/// except they ensure that the compare input is zero-extended for
/// sub-word versions because the atomic loads zero-extend.
@@ -627,10 +662,6 @@ namespace llvm {
/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
- /// If this is a qvaligni shuffle mask, return the shift
- /// amount, otherwise return -1.
- int isQVALIGNIShuffleMask(SDNode *N);
-
} // end namespace PPC
class PPCTargetLowering : public TargetLowering {
@@ -740,6 +771,8 @@ namespace llvm {
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG,
MaybeAlign EncodingAlignment) const;
+ bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
@@ -895,6 +928,9 @@ namespace llvm {
return true;
}
+ bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const override;
+
bool isDesirableToTransformToIntegerOp(unsigned Opc,
EVT VT) const override {
// Only handle float load/store pair because float(fpr) load/store
@@ -951,6 +987,9 @@ namespace llvm {
shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const override;
+ // Keep the zero-extensions for arguments to libcalls.
+ bool shouldKeepZExtForFP16Conv() const override { return true; }
+
/// createFastISel - This method returns a target-specific FastISel object,
/// or null if the target does not support "fast" instruction selection.
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
@@ -980,11 +1019,6 @@ namespace llvm {
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
- /// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a
- /// specific type is cheaper than a multiply followed by a shift.
- /// This is true for words and doublewords on 64-bit PowerPC.
- bool isMulhCheaperThanMulShift(EVT Type) const override;
-
/// Override to support customized stack guard loading.
bool useLoadStackGuardNode() const override;
void insertSSPDeclarations(Module &M) const override;
@@ -1042,11 +1076,6 @@ namespace llvm {
}
};
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
- // Addrspacecasts are always noops.
- return true;
- }
-
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
SelectionDAG &DAG,
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
@@ -1099,6 +1128,7 @@ namespace llvm {
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
@@ -1117,19 +1147,18 @@ namespace llvm {
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
@@ -1176,10 +1205,6 @@ namespace llvm {
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
- SDValue LowerFormalArguments_Darwin(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerFormalArguments_64SVR4(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -1194,13 +1219,6 @@ namespace llvm {
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
const SDLoc &dl) const;
- SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, CallFlags CFlags,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- const CallBase *CB) const;
SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
@@ -1257,6 +1275,10 @@ namespace llvm {
bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;
+ SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
+ const DenormalMode &Mode) const override;
+ SDValue getSqrtResultForDenormInput(SDValue Operand,
+ SelectionDAG &DAG) const override;
unsigned combineRepeatedFPDivisors() const override;
SDValue
@@ -1295,6 +1317,8 @@ namespace llvm {
bool isIntS16Immediate(SDNode *N, int16_t &Imm);
bool isIntS16Immediate(SDValue Op, int16_t &Imm);
+ bool isIntS34Immediate(SDNode *N, int64_t &Imm);
+ bool isIntS34Immediate(SDValue Op, int64_t &Imm);
bool convertToNonDenormSingle(APInt &ArgAPInt);
bool convertToNonDenormSingle(APFloat &ArgAPFloat);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index de42d354a048..03e9d6970a30 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -19,12 +19,14 @@ def s16imm64 : Operand<i64> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def u16imm64 : Operand<i64> {
let PrintMethod = "printU16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def s17imm64 : Operand<i64> {
// This operand type is used for addis/lis to allow the assembler parser
@@ -34,6 +36,7 @@ def s17imm64 : Operand<i64> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS17ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def tocentry : Operand<iPTR> {
let MIOperandInfo = (ops i64imm:$imm);
@@ -148,6 +151,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
def BL8_NOTOC : IForm<18, 0, 1, (outs),
(ins calltarget:$func),
"bl $func", IIC_BrB, []>;
+ def BL8_NOTOC_TLS : IForm<18, 0, 1, (outs),
+ (ins tlscall:$func),
+ "bl $func", IIC_BrB, []>;
}
}
let Uses = [CTR8, RM] in {
@@ -840,7 +846,7 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
}
-def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins i32imm:$L),
+def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins u2imm:$L),
"darn $RT, $L", IIC_LdStLD>, isPPC64;
def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D),
"addpcis $RT, $D", IIC_BrB, []>, isPPC64;
@@ -981,8 +987,11 @@ def : InstAlias<"cntlzw. $rA, $rS", (CNTLZW8_rec g8rc:$rA, g8rc:$rS)>;
def : InstAlias<"mtxer $Rx", (MTSPR8 1, g8rc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR8 g8rc:$Rx, 1)>;
-def : InstAlias<"mtudscr $Rx", (MTSPR8 3, g8rc:$Rx)>;
-def : InstAlias<"mfudscr $Rx", (MFSPR8 g8rc:$Rx, 3)>;
+//Disable this alias on AIX for now because as does not support them.
+let Predicates = [ModernAs] in {
+ def : InstAlias<"mtudscr $Rx", (MTSPR8 3, g8rc:$Rx)>;
+ def : InstAlias<"mfudscr $Rx", (MFSPR8 g8rc:$Rx, 3)>;
+}
def : InstAlias<"mfrtcu $Rx", (MFSPR8 g8rc:$Rx, 4)>;
def : InstAlias<"mfrtcl $Rx", (MFSPR8 g8rc:$Rx, 5)>;
@@ -1056,7 +1065,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
"lwa $rD, $src", IIC_LdStLWA,
[(set i64:$rD,
- (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64,
+ (DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
@@ -1167,7 +1176,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
- [(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64;
+ [(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64;
// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@@ -1262,17 +1271,36 @@ def ADDItlsgdL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm6
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+
+class GETtlsADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ asmstr,
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+class GETtlsldADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ asmstr,
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1 in {
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
// explicitly defined when this op is created, so not mentioned here.
// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be
// correct because the branch select pass is relying on it.
-let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8,
- Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsADDR",
- [(set i64:$rD,
- (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
+let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in
+def GETtlsADDR : GETtlsADDRPseudo <"#GETtlsADDR">;
+let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in
+def GETtlsADDRPCREL : GETtlsADDRPseudo <"#GETtlsADDRPCREL">;
+
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR : GETtlsldADDRPseudo <"#GETtlsldADDR">;
+let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDRPCREL : GETtlsldADDRPseudo <"#GETtlsldADDRPCREL">;
+}
+
// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
@@ -1296,15 +1324,6 @@ def ADDItlsldL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm6
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
-// explicitly defined when this op is created, so not mentioned here.
-let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
- Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsldADDR",
- [(set i64:$rD,
- (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
// Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8
// are true defines, while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
@@ -1329,6 +1348,11 @@ def ADDIdtprelL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm
[(set i64:$rD,
(PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
+def PADDIdtprel : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#PADDIdtprel",
+ [(set i64:$rD,
+ (PPCpaddiDtprel i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
@@ -1359,7 +1383,7 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
"std $rS, $dst", IIC_LdStSTD,
- [(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64;
+ [(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64;
def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
"stdx $rS, $dst", IIC_LdStSTD,
[(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
@@ -1426,7 +1450,7 @@ def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
-def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+def : Pat<(DSFormPreStore i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STDU $rS, iaddroff:$ptroff, $ptrreg)>;
def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
@@ -1444,11 +1468,11 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
//
-let PPC970_Unit = 3, hasSideEffects = 0,
+let PPC970_Unit = 3, hasSideEffects = 0, mayRaiseFPException = 1,
Uses = [RM] in { // FPU Operations.
defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
"fcfid", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fcfid f64:$frB))]>, isPPC64;
defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
"fctid", "$frD, $frB", IIC_FPGeneral,
[]>, isPPC64;
@@ -1457,23 +1481,23 @@ defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB),
[]>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
"fctidz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fctidz f64:$frB))]>, isPPC64;
defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
"fcfidu", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fcfidu f64:$frB))]>, isPPC64;
defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
"fcfids", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+ [(set f32:$frD, (PPCany_fcfids f64:$frB))]>, isPPC64;
defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
"fcfidus", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+ [(set f32:$frD, (PPCany_fcfidus f64:$frB))]>, isPPC64;
defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiduz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fctiduz f64:$frB))]>, isPPC64;
defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiwuz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fctiwuz f64:$frB))]>, isPPC64;
}
@@ -1570,11 +1594,11 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
// Patterns to match r+r indexed loads and stores for
// addresses without at least 4-byte alignment.
-def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)),
(LWAX xoaddr:$src)>;
-def : Pat<(i64 (unaligned4load xoaddr:$src)),
+def : Pat<(i64 (NonDSFormLoad xoaddr:$src)),
(LDX xoaddr:$src)>;
-def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst),
(STDX $rS, xoaddr:$dst)>;
// 64-bits atomic loads and stores
@@ -1585,6 +1609,11 @@ def : Pat<(atomic_store_64 iaddrX4:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr
def : Pat<(atomic_store_64 xaddrX4:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
let Predicates = [IsISA3_0] in {
+// DARN (deliver random number)
+// L=0 for 32-bit, L=1 for conditioned random, L=2 for raw random
+def : Pat<(int_ppc_darn32), (EXTRACT_SUBREG (DARN 0), sub_32)>;
+def : Pat<(int_ppc_darn), (DARN 1)>;
+def : Pat<(int_ppc_darnraw), (DARN 2)>;
class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty,
InstrItinClass itin, list<dag> pattern>
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 920eeed9d41f..1a34aa09315b 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -404,12 +404,14 @@ let isCodeGenOnly = 1 in {
Deprecated<DeprecatedDST>;
}
-def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
- "mfvscr $vD", IIC_LdStStore,
- [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
-def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
- "mtvscr $vB", IIC_LdStLoad,
- [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+let hasSideEffects = 1 in {
+ def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
+ "mfvscr $vD", IIC_LdStStore,
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
+ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
+ "mtvscr $vB", IIC_LdStLoad,
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+}
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src),
@@ -469,10 +471,11 @@ def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
"vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP,
[(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
(fneg v4f32:$vB))))]>;
-
-def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
-def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
- v8i16>;
+let hasSideEffects = 1 in {
+ def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+ def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+ v8i16>;
+}
def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
} // isCommutable
@@ -608,14 +611,16 @@ def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
v4i32, v16i8, v4i32>;
def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
v4i32, v8i16, v4i32>;
-def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
- v4i32, v8i16, v4i32>;
def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
v4i32, v16i8, v4i32>;
def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
v4i32, v8i16, v4i32>;
-def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
- v4i32, v8i16, v4i32>;
+let hasSideEffects = 1 in {
+ def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+ v4i32, v8i16, v4i32>;
+ def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+ v4i32, v8i16, v4i32>;
+}
let isCommutable = 1 in {
def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
@@ -665,15 +670,17 @@ def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
-def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
-def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+let hasSideEffects = 1 in {
+ def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+ def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
-def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs,
- v4i32, v16i8, v4i32>;
-def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
- v4i32, v8i16, v4i32>;
-def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
- v4i32, v16i8, v4i32>;
+ def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+ v4i32, v16i8, v4i32>;
+ def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+ v4i32, v8i16, v4i32>;
+ def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+ v4i32, v16i8, v4i32>;
+}
def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vnor $vD, $vA, $vB", IIC_VecFP,
@@ -742,26 +749,28 @@ def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
// Vector Pack.
def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
v8i16, v4i32>;
-def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
- v16i8, v8i16>;
-def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
- v16i8, v8i16>;
-def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
- v8i16, v4i32>;
-def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
- v8i16, v4i32>;
+let hasSideEffects = 1 in {
+ def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+ v16i8, v8i16>;
+ def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+ v16i8, v8i16>;
+ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+ v8i16, v4i32>;
+ def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+ v8i16, v4i32>;
+ def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+ v16i8, v8i16>;
+ def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+ v8i16, v4i32>;
+}
def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpkuhum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
- v16i8, v8i16>;
def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpkuwum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
- v8i16, v4i32>;
// Vector Unpack.
def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
@@ -784,47 +793,47 @@ class VCMP<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
IIC_VecFPCompare,
[(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
-class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+class VCMP_rec<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
IIC_VecFPCompare,
- [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
+ [(set Ty:$vD, (Ty (PPCvcmp_rec Ty:$vA, Ty:$vB, xo)))]> {
let Defs = [CR6];
let RC = 1;
}
// f32 element comparisons.0
def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
-def VCMPBFP_rec : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPBFP_rec : VCMP_rec<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
-def VCMPEQFP_rec : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPEQFP_rec : VCMP_rec<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
-def VCMPGEFP_rec : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP_rec : VCMP_rec<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
-def VCMPGTFP_rec : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP_rec : VCMP_rec<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
// i8 element comparisons.
def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
-def VCMPEQUB_rec : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPEQUB_rec : VCMP_rec< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
-def VCMPGTSB_rec : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB_rec : VCMP_rec<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
-def VCMPGTUB_rec : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB_rec : VCMP_rec<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
// i16 element comparisons.
def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
-def VCMPEQUH_rec : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPEQUH_rec : VCMP_rec< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
-def VCMPGTSH_rec : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH_rec : VCMP_rec<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
-def VCMPGTUH_rec : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH_rec : VCMP_rec<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
// i32 element comparisons.
def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
-def VCMPEQUW_rec : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPEQUW_rec : VCMP_rec<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
-def VCMPGTSW_rec : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW_rec : VCMP_rec<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
-def VCMPGTUW_rec : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW_rec : VCMP_rec<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isReMaterializable = 1 in {
@@ -933,6 +942,18 @@ def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v16i8 VRRC:$src))), (f128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v8i16 VRRC:$src))), (f128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v4i32 VRRC:$src))), (f128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v4f32 VRRC:$src))), (f128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v2f64 VRRC:$src))), (f128 VRRC:$src)>;
+
+def : Pat<(v16i8 (bitconvert (f128 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (f128 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (f128 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (f128 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v2f64 (bitconvert (f128 VRRC:$src))), (v2f64 VRRC:$src)>;
+
// Max/Min
def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)),
(v16i8 (VMAXUB $src1, $src2))>;
@@ -1291,11 +1312,11 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
// i64 element comparisons.
def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
-def VCMPEQUD_rec : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPEQUD_rec : VCMP_rec<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
-def VCMPGTSD_rec : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD_rec : VCMP_rec<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
-def VCMPGTUD_rec : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD_rec : VCMP_rec<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
// The cryptography instructions that do not require Category:Vector.Crypto
def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
@@ -1306,20 +1327,22 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
int_ppc_altivec_crypto_vpmsumw, v4i32>;
def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
int_ppc_altivec_crypto_vpmsumd, v2i64>;
-def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
- int_ppc_altivec_crypto_vpermxor, v16i8>;
+def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC),
+ "vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>;
// Vector doubleword integer pack and unpack.
-def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss,
- v4i32, v2i64>;
-def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus,
- v4i32, v2i64>;
+let hasSideEffects = 1 in {
+ def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss,
+ v4i32, v2i64>;
+ def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus,
+ v4i32, v2i64>;
+ def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus,
+ v4i32, v2i64>;
+}
def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpkudum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkudum_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus,
- v4i32, v2i64>;
def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw,
v2i64, v4i32>;
def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
@@ -1363,21 +1386,21 @@ def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
-def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
+def VCMPNEB_rec : VCMP_rec < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
def VCMPNEZB : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
-def VCMPNEZB_rec : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
+def VCMPNEZB_rec : VCMP_rec<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
// i16 element comparisons.
def VCMPNEH : VCMP < 71, "vcmpneh $vD, $vA, $vB" , v8i16>;
-def VCMPNEH_rec : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
+def VCMPNEH_rec : VCMP_rec< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
def VCMPNEZH : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
-def VCMPNEZH_rec : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
+def VCMPNEZH_rec : VCMP_rec<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
// i32 element comparisons.
def VCMPNEW : VCMP <135, "vcmpnew $vD, $vA, $vB" , v4i32>;
-def VCMPNEW_rec : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
+def VCMPNEW_rec : VCMP_rec<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
def VCMPNEZW : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
-def VCMPNEZW_rec : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
+def VCMPNEZW_rec : VCMP_rec<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
// VX-Form: [PO VRT / UIM VRB XO].
// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
@@ -1449,11 +1472,16 @@ def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd",
[(set v2i64:$vD, (cttz v2i64:$vB))]>;
// Vector Extend Sign
-def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>;
-def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>;
-def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>;
-def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>;
-def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>;
+def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w",
+ [(set v4i32:$vD, (int_ppc_altivec_vextsb2w v16i8:$vB))]>;
+def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w",
+ [(set v4i32:$vD, (int_ppc_altivec_vextsh2w v8i16:$vB))]>;
+def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d",
+ [(set v2i64:$vD, (int_ppc_altivec_vextsb2d v16i8:$vB))]>;
+def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d",
+ [(set v2i64:$vD, (int_ppc_altivec_vextsh2d v8i16:$vB))]>;
+def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d",
+ [(set v2i64:$vD, (int_ppc_altivec_vextsw2d v4i32:$vB))]>;
let isCodeGenOnly = 1 in {
def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>;
def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 632d4d9deb8a..646efe64a22c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -637,12 +637,12 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
}
class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin>
+ InstrItinClass itin, list<dag> pattern>
: XForm_17<opcode, xo, OOL, IOL, asmstr, itin > {
let FRA = 0;
+ let Pattern = pattern;
}
-// Used for QPX
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -1781,14 +1781,6 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
-// Used for QPX
-class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let FRA = 0;
- let FRC = 0;
-}
-
// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -2099,49 +2091,6 @@ class VX_RD5_RSp5_PS1_XO9<bits<9> xo, dag OOL, dag IOL, string asmstr,
let Inst{23-31} = xo;
}
-// Z23-Form (used by QPX)
-class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> FRT;
- bits<5> FRA;
- bits<5> FRB;
- bits<2> idx;
-
- let Pattern = pattern;
-
- bit RC = 0; // set by isRecordForm
-
- let Inst{6-10} = FRT;
- let Inst{11-15} = FRA;
- let Inst{16-20} = FRB;
- let Inst{21-22} = idx;
- let Inst{23-30} = xo;
- let Inst{31} = RC;
-}
-
-class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let FRB = 0;
-}
-
-class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> FRT;
- bits<12> idx;
-
- let Pattern = pattern;
-
- bit RC = 0; // set by isRecordForm
-
- let Inst{6-10} = FRT;
- let Inst{11-22} = idx;
- let Inst{23-30} = xo;
- let Inst{31} = RC;
-}
-
class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 992ad8216f3b..e59a08774dc5 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -164,9 +164,8 @@ def : Pat<(int_ppc_tsuspend),
(TSR 0)>;
def : Pat<(i64 (int_ppc_ttest)),
- (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (TABORTWCI 0, (LI 0), 0), sub_32)),
- 36, 28)>;
+ (i64 (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), (TABORTWCI 0, (LI 0), 0), sub_32))>;
} // [HasHTM]
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index e428e7155e5e..9e3c6c569bd7 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -21,12 +21,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -73,6 +76,14 @@ static cl::opt<bool>
UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
cl::desc("Use the old (incorrect) instruction latency calculation"));
+static cl::opt<float>
+ FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
+ cl::desc("register pressure factor for the transformations."));
+
+static cl::opt<bool> EnableFMARegPressureReduction(
+ "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
+ cl::desc("enable register pressure reduce in machine combiner pass."));
+
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
@@ -259,14 +270,6 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case PPC::XVMULDP:
case PPC::XVMULSP:
case PPC::XSMULSP:
- // QPX Add:
- case PPC::QVFADD:
- case PPC::QVFADDS:
- case PPC::QVFADDSs:
- // QPX Multiply:
- case PPC::QVFMUL:
- case PPC::QVFMULS:
- case PPC::QVFMULSs:
return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
Inst.getFlag(MachineInstr::MIFlag::FmNsz);
// Fixed point:
@@ -286,23 +289,23 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
#define InfoArrayIdxFMULInst 2
#define InfoArrayIdxAddOpIdx 3
#define InfoArrayIdxMULOpIdx 4
+#define InfoArrayIdxFSubInst 5
// Array keeps info for FMA instructions:
// Index 0(InfoArrayIdxFMAInst): FMA instruction;
-// Index 1(InfoArrayIdxFAddInst): ADD instruction assoaicted with FMA;
-// Index 2(InfoArrayIdxFMULInst): MUL instruction assoaicted with FMA;
+// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
+// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
-// second MUL operand index is plus 1.
-static const uint16_t FMAOpIdxInfo[][5] = {
+// second MUL operand index is plus 1;
+// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
+static const uint16_t FMAOpIdxInfo[][6] = {
// FIXME: Add more FMA instructions like XSNMADDADP and so on.
- {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2},
- {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2},
- {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
- {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
- {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
- {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
- {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
- {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
+ {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
+ {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
+ {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
+ {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
+ {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
+ {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
// Check if an opcode is a FMA instruction. If it is, return the index in array
// FMAOpIdxInfo. Otherwise, return -1.
@@ -313,6 +316,8 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
return -1;
}
+// On PowerPC target, we have two kinds of patterns related to FMA:
+// 1: Improve ILP.
// Try to reassociate FMA chains like below:
//
// Pattern 1:
@@ -336,11 +341,35 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
//
// breaking the dependency between A and B, allowing FMA to be executed in
// parallel (or back-to-back in a pipeline) instead of depending on each other.
+//
+// 2: Reduce register pressure.
+// Try to reassociate FMA with FSUB and a constant like below:
+// C is a floatint point const.
+//
+// Pattern 1:
+// A = FSUB X, Y (Leaf)
+// D = FMA B, C, A (Root)
+// -->
+// A = FMA B, Y, -C
+// D = FMA A, X, C
+//
+// Pattern 2:
+// A = FSUB X, Y (Leaf)
+// D = FMA B, A, C (Root)
+// -->
+// A = FMA B, Y, -C
+// D = FMA A, X, C
+//
+// Before the transformation, A must be assigned with different hardware
+// register with D. After the transformation, A and D must be assigned with
+// same hardware register due to TIE attricute of FMA instructions.
+//
bool PPCInstrInfo::getFMAPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
MachineBasicBlock *MBB = Root.getParent();
- const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
for (const auto &MO : Instr.explicit_operands())
@@ -349,16 +378,35 @@ bool PPCInstrInfo::getFMAPatterns(
return true;
};
- auto IsReassociable = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
- bool IsLeaf, bool IsAdd) {
- int16_t Idx = -1;
- if (!IsAdd) {
- Idx = getFMAOpIdxInfo(Instr.getOpcode());
- if (Idx < 0)
- return false;
- } else if (Instr.getOpcode() !=
- FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())]
- [InfoArrayIdxFAddInst])
+ auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
+ unsigned OpType) {
+ if (Instr.getOpcode() !=
+ FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
+ return false;
+
+ // Instruction can be reassociated.
+ // fast math flags may prohibit reassociation.
+ if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
+ return false;
+
+ // Instruction operands are virtual registers for reassociation.
+ if (!IsAllOpsVirtualReg(Instr))
+ return false;
+
+ // For register pressure reassociation, the FSub must have only one use as
+ // we want to delete the sub to save its def.
+ if (OpType == InfoArrayIdxFSubInst &&
+ !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
+ return false;
+
+ return true;
+ };
+
+ auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
+ int16_t &MulOpIdx, bool IsLeaf) {
+ int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
+ if (Idx < 0)
return false;
// Instruction can be reassociated.
@@ -371,65 +419,356 @@ bool PPCInstrInfo::getFMAPatterns(
if (!IsAllOpsVirtualReg(Instr))
return false;
- if (IsAdd && IsLeaf)
+ MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
+ if (IsLeaf)
return true;
AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
- MachineInstr *MIAdd = MRI.getUniqueVRegDef(OpAdd.getReg());
+ MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
// If 'add' operand's def is not in current block, don't do ILP related opt.
if (!MIAdd || MIAdd->getParent() != MBB)
return false;
// If this is not Leaf FMA Instr, its 'add' operand should only have one use
// as this fma will be changed later.
- return IsLeaf ? true : MRI.hasOneNonDBGUse(OpAdd.getReg());
+ return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
};
int16_t AddOpIdx = -1;
+ int16_t MulOpIdx = -1;
+
+ bool IsUsedOnceL = false;
+ bool IsUsedOnceR = false;
+ MachineInstr *MULInstrL = nullptr;
+ MachineInstr *MULInstrR = nullptr;
+
+ auto IsRPReductionCandidate = [&]() {
+ // Currently, we only support float and double.
+ // FIXME: add support for other types.
+ unsigned Opcode = Root.getOpcode();
+ if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
+ return false;
+
+ // Root must be a valid FMA like instruction.
+ // Treat it as leaf as we don't care its add operand.
+ if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
+ assert((MulOpIdx >= 0) && "mul operand index not right!");
+ Register MULRegL = TRI->lookThruSingleUseCopyChain(
+ Root.getOperand(MulOpIdx).getReg(), MRI);
+ Register MULRegR = TRI->lookThruSingleUseCopyChain(
+ Root.getOperand(MulOpIdx + 1).getReg(), MRI);
+ if (!MULRegL && !MULRegR)
+ return false;
+
+ if (MULRegL && !MULRegR) {
+ MULRegR =
+ TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
+ IsUsedOnceL = true;
+ } else if (!MULRegL && MULRegR) {
+ MULRegL =
+ TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
+ IsUsedOnceR = true;
+ } else {
+ IsUsedOnceL = true;
+ IsUsedOnceR = true;
+ }
+
+ if (!Register::isVirtualRegister(MULRegL) ||
+ !Register::isVirtualRegister(MULRegR))
+ return false;
+
+ MULInstrL = MRI->getVRegDef(MULRegL);
+ MULInstrR = MRI->getVRegDef(MULRegR);
+ return true;
+ }
+ return false;
+ };
+
+ // Register pressure fma reassociation patterns.
+ if (DoRegPressureReduce && IsRPReductionCandidate()) {
+ assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
+ // Register pressure pattern 1
+ if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
+ IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
+ LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BCA);
+ return true;
+ }
+
+ // Register pressure pattern 2
+ if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
+ IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
+ LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BAC);
+ return true;
+ }
+ }
+
+ // ILP fma reassociation patterns.
// Root must be a valid FMA like instruction.
- if (!IsReassociable(Root, AddOpIdx, false, false))
+ AddOpIdx = -1;
+ if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
return false;
assert((AddOpIdx >= 0) && "add operand index not right!");
Register RegB = Root.getOperand(AddOpIdx).getReg();
- MachineInstr *Prev = MRI.getUniqueVRegDef(RegB);
+ MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
// Prev must be a valid FMA like instruction.
AddOpIdx = -1;
- if (!IsReassociable(*Prev, AddOpIdx, false, false))
+ if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
return false;
assert((AddOpIdx >= 0) && "add operand index not right!");
Register RegA = Prev->getOperand(AddOpIdx).getReg();
- MachineInstr *Leaf = MRI.getUniqueVRegDef(RegA);
+ MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
AddOpIdx = -1;
- if (IsReassociable(*Leaf, AddOpIdx, true, false)) {
+ if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+ LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
return true;
}
- if (IsReassociable(*Leaf, AddOpIdx, true, true)) {
+ if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM);
+ LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
return true;
}
return false;
}
+void PPCInstrInfo::finalizeInsInstrs(
+ MachineInstr &Root, MachineCombinerPattern &P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const {
+ assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
+
+ MachineFunction *MF = Root.getMF();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineConstantPool *MCP = MF->getConstantPool();
+
+ int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
+ if (Idx < 0)
+ return;
+
+ uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
+
+ // For now we only need to fix up placeholder for register pressure reduce
+ // patterns.
+ Register ConstReg = 0;
+ switch (P) {
+ case MachineCombinerPattern::REASSOC_XY_BCA:
+ ConstReg =
+ TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
+ break;
+ case MachineCombinerPattern::REASSOC_XY_BAC:
+ ConstReg =
+ TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
+ break;
+ default:
+ // Not register pressure reduce patterns.
+ return;
+ }
+
+ MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
+ // Get const value from const pool.
+ const Constant *C = getConstantFromConstantPool(ConstDefInstr);
+ assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
+
+ // Get negative fp const.
+ APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
+ F1.changeSign();
+ Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
+ Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
+
+ // Put negative fp const into constant pool.
+ unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
+
+ MachineOperand *Placeholder = nullptr;
+ // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
+ for (auto *Inst : InsInstrs) {
+ for (MachineOperand &Operand : Inst->explicit_operands()) {
+ assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
+ if (Operand.getReg() == PPC::ZERO8) {
+ Placeholder = &Operand;
+ break;
+ }
+ }
+ }
+
+ assert(Placeholder && "Placeholder does not exist!");
+
+ // Generate instructions to load the const fp from constant pool.
+ // We only support PPC64 and medium code model.
+ Register LoadNewConst =
+ generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
+
+ // Fill the placeholder with the new load from constant pool.
+ Placeholder->setReg(LoadNewConst);
+}
+
+bool PPCInstrInfo::shouldReduceRegisterPressure(
+ MachineBasicBlock *MBB, RegisterClassInfo *RegClassInfo) const {
+
+ if (!EnableFMARegPressureReduction)
+ return false;
+
+ // Currently, we only enable register pressure reducing in machine combiner
+ // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
+ // support.
+ //
+ // So we need following instructions to access a TOC entry:
+ //
+ // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
+ // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
+ // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
+ //
+ // FIXME: add more supported targets, like Small and Large code model, PPC32,
+ // AIX.
+ if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
+ Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium))
+ return false;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+
+ auto GetMBBPressure = [&](MachineBasicBlock *MBB) -> std::vector<unsigned> {
+ RegionPressure Pressure;
+ RegPressureTracker RPTracker(Pressure);
+
+ // Initialize the register pressure tracker.
+ RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
+ /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
+
+ for (MachineBasicBlock::iterator MII = MBB->instr_end(),
+ MIE = MBB->instr_begin();
+ MII != MIE; --MII) {
+ MachineInstr &MI = *std::prev(MII);
+ if (MI.isDebugValue() || MI.isDebugLabel())
+ continue;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, false, false);
+ RPTracker.recedeSkipDebugValues();
+ assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
+ RPTracker.recede(RegOpers);
+ }
+
+ // Close the RPTracker to finalize live ins.
+ RPTracker.closeRegion();
+
+ return RPTracker.getPressure().MaxSetPressure;
+ };
+
+ // For now we only care about float and double type fma.
+ unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
+ *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
+
+ // Only reduce register pressure when pressure is high.
+ return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
+ (float)VSSRCLimit * FMARPFactor;
+}
+
+bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr *I) const {
+ // I has only one memory operand which is load from constant pool.
+ if (!I->hasOneMemOperand())
+ return false;
+
+ MachineMemOperand *Op = I->memoperands()[0];
+ return Op->isLoad() && Op->getPseudoValue() &&
+ Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
+}
+
+Register PPCInstrInfo::generateLoadForNewConst(
+ unsigned Idx, MachineInstr *MI, Type *Ty,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const {
+ // Now we only support PPC64, Medium code model and P9 with vector.
+ // We have immutable pattern to access const pool. See function
+ // shouldReduceRegisterPressure.
+ assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
+ Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium) &&
+ "Target not supported!\n");
+
+ MachineFunction *MF = MI->getMF();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+
+ // Generate ADDIStocHA8
+ Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ MachineInstrBuilder TOCOffset =
+ BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
+ .addReg(PPC::X2)
+ .addConstantPoolIndex(Idx);
+
+ assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
+ "Only float and double are supported!");
+
+ unsigned LoadOpcode;
+ // Should be float type or double type.
+ if (Ty->isFloatTy())
+ LoadOpcode = PPC::DFLOADf32;
+ else
+ LoadOpcode = PPC::DFLOADf64;
+
+ const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
+ Register VReg2 = MRI->createVirtualRegister(RC);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad,
+ Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty));
+
+ // Generate Load from constant pool.
+ MachineInstrBuilder Load =
+ BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
+ .addConstantPoolIndex(Idx)
+ .addReg(VReg1, getKillRegState(true))
+ .addMemOperand(MMO);
+
+ Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
+
+ // Insert the toc load instructions into InsInstrs.
+ InsInstrs.insert(InsInstrs.begin(), Load);
+ InsInstrs.insert(InsInstrs.begin(), TOCOffset);
+ return VReg2;
+}
+
+// This function returns the const value in constant pool if the \p I is a load
+// from constant pool.
+const Constant *
+PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const {
+ MachineFunction *MF = I->getMF();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ MachineConstantPool *MCP = MF->getConstantPool();
+ assert(I->mayLoad() && "Should be a load instruction.\n");
+ for (auto MO : I->uses()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0 || !Register::isVirtualRegister(Reg))
+ continue;
+ // Find the toc address.
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ for (auto MO2 : DefMI->uses())
+ if (MO2.isCPI())
+ return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
+ }
+ return nullptr;
+}
+
bool PPCInstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
// Using the machine combiner in this way is potentially expensive, so
// restrict to when aggressive optimizations are desired.
if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
return false;
- if (getFMAPatterns(Root, Patterns))
+ if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
return true;
- return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
+ DoRegPressureReduce);
}
void PPCInstrInfo::genAlternativeCodeSequence(
@@ -440,6 +779,8 @@ void PPCInstrInfo::genAlternativeCodeSequence(
switch (Pattern) {
case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XY_BCA:
+ case MachineCombinerPattern::REASSOC_XY_BAC:
reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
break;
default:
@@ -450,8 +791,6 @@ void PPCInstrInfo::genAlternativeCodeSequence(
}
}
-// Currently, only handle two patterns REASSOC_XY_AMM_BMM and
-// REASSOC_XMM_AMM_BMM. See comments for getFMAPatterns.
void PPCInstrInfo::reassociateFMA(
MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -459,6 +798,7 @@ void PPCInstrInfo::reassociateFMA(
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
MachineFunction *MF = Root.getMF();
MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
MachineOperand &OpC = Root.getOperand(0);
Register RegC = OpC.getReg();
const TargetRegisterClass *RC = MRI.getRegClass(RegC);
@@ -468,13 +808,42 @@ void PPCInstrInfo::reassociateFMA(
int16_t Idx = getFMAOpIdxInfo(FmaOp);
assert(Idx >= 0 && "Root must be a FMA instruction");
+ bool IsILPReassociate =
+ (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) ||
+ (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+
uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
- MachineInstr *Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
- MachineInstr *Leaf =
- MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
- uint16_t IntersectedFlags =
- Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
+
+ MachineInstr *Prev = nullptr;
+ MachineInstr *Leaf = nullptr;
+ switch (Pattern) {
+ default:
+ llvm_unreachable("not recognized pattern!");
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
+ Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
+ break;
+ case MachineCombinerPattern::REASSOC_XY_BAC: {
+ Register MULReg =
+ TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
+ Leaf = MRI.getVRegDef(MULReg);
+ break;
+ }
+ case MachineCombinerPattern::REASSOC_XY_BCA: {
+ Register MULReg = TRI->lookThruCopyLike(
+ Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
+ Leaf = MRI.getVRegDef(MULReg);
+ break;
+ }
+ }
+
+ uint16_t IntersectedFlags = 0;
+ if (IsILPReassociate)
+ IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
+ else
+ IntersectedFlags = Root.getFlags() & Leaf->getFlags();
auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
bool &KillFlag) {
@@ -484,36 +853,51 @@ void PPCInstrInfo::reassociateFMA(
};
auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
- Register &MulOp2, bool &MulOp1KillFlag,
- bool &MulOp2KillFlag) {
+ Register &MulOp2, Register &AddOp,
+ bool &MulOp1KillFlag, bool &MulOp2KillFlag,
+ bool &AddOpKillFlag) {
GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
+ GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
};
- Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32;
+ Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
+ RegA21, RegB;
bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
- KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false;
+ KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
+ KillA11 = false, KillA21 = false, KillB = false;
- GetFMAInstrInfo(Root, RegM31, RegM32, KillM31, KillM32);
- GetFMAInstrInfo(*Prev, RegM21, RegM22, KillM21, KillM22);
+ GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
+
+ if (IsILPReassociate)
+ GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
- GetFMAInstrInfo(*Leaf, RegM11, RegM12, KillM11, KillM12);
+ GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
} else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
+ } else {
+ // Get FSUB instruction info.
+ GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
+ GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
}
// Create new virtual registers for the new results instead of
// recycling legacy ones because the MachineCombiner's computation of the
// critical path requires a new register definition rather than an existing
// one.
+ // For register pressure reassociation, we only need create one virtual
+ // register for the new fma.
Register NewVRA = MRI.createVirtualRegister(RC);
InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
- Register NewVRB = MRI.createVirtualRegister(RC);
- InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
+ Register NewVRB = 0;
+ if (IsILPReassociate) {
+ NewVRB = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
+ }
Register NewVRD = 0;
if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
@@ -532,7 +916,11 @@ void PPCInstrInfo::reassociateFMA(
MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
};
- if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+ MachineInstrBuilder NewARegPressure, NewCRegPressure;
+ switch (Pattern) {
+ default:
+ llvm_unreachable("not recognized pattern!");
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM: {
// Create new instructions for insertion.
MachineInstrBuilder MINewB =
BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
@@ -565,7 +953,9 @@ void PPCInstrInfo::reassociateFMA(
InsInstrs.push_back(MINewA);
InsInstrs.push_back(MINewB);
InsInstrs.push_back(MINewC);
- } else if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ break;
+ }
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: {
assert(NewVRD && "new FMA register not created!");
// Create new instructions for insertion.
MachineInstrBuilder MINewA =
@@ -607,6 +997,47 @@ void PPCInstrInfo::reassociateFMA(
InsInstrs.push_back(MINewB);
InsInstrs.push_back(MINewD);
InsInstrs.push_back(MINewC);
+ break;
+ }
+ case MachineCombinerPattern::REASSOC_XY_BAC:
+ case MachineCombinerPattern::REASSOC_XY_BCA: {
+ Register VarReg;
+ bool KillVarReg = false;
+ if (Pattern == MachineCombinerPattern::REASSOC_XY_BCA) {
+ VarReg = RegM31;
+ KillVarReg = KillM31;
+ } else {
+ VarReg = RegM32;
+ KillVarReg = KillM32;
+ }
+ // We don't want to get negative const from memory pool too early, as the
+ // created entry will not be deleted even if it has no users. Since all
+ // operand of Leaf and Root are virtual register, we use zero register
+ // here as a placeholder. When the InsInstrs is selected in
+ // MachineCombiner, we call finalizeInsInstrs to replace the zero register
+ // with a virtual register which is a load from constant pool.
+ NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
+ .addReg(RegB, getKillRegState(RegB))
+ .addReg(RegY, getKillRegState(KillY))
+ .addReg(PPC::ZERO8);
+ NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
+ .addReg(NewVRA, getKillRegState(true))
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(VarReg, getKillRegState(KillVarReg));
+ // For now, we only support xsmaddadp/xsmaddasp, their add operand are
+ // both at index 1, no need to adjust.
+ // FIXME: when add more fma instructions support, like fma/fmas, adjust
+ // the operand index here.
+ break;
+ }
+ }
+
+ if (!IsILPReassociate) {
+ setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
+ setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
+
+ InsInstrs.push_back(NewARegPressure);
+ InsInstrs.push_back(NewCRegPressure);
}
assert(!InsInstrs.empty() &&
@@ -614,7 +1045,8 @@ void PPCInstrInfo::reassociateFMA(
// Record old instructions for deletion.
DelInstrs.push_back(Leaf);
- DelInstrs.push_back(Prev);
+ if (IsILPReassociate)
+ DelInstrs.push_back(Prev);
DelInstrs.push_back(&Root);
}
@@ -666,7 +1098,6 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::LI8:
case PPC::LIS:
case PPC::LIS8:
- case PPC::QVGPCI:
case PPC::ADDIStocHA:
case PPC::ADDIStocHA8:
case PPC::ADDItocL:
@@ -683,6 +1114,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::V_SETALLONES:
case PPC::CRSET:
case PPC::CRUNSET:
+ case PPC::XXSETACCZ:
return true;
}
return false;
@@ -1283,14 +1715,22 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(31);
return;
} else if (PPC::CRRCRegClass.contains(SrcReg) &&
- PPC::G8RCRegClass.contains(DestReg)) {
- BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg).addReg(SrcReg);
- getKillRegState(KillSrc);
- return;
- } else if (PPC::CRRCRegClass.contains(SrcReg) &&
- PPC::GPRCRegClass.contains(DestReg)) {
- BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg);
+ (PPC::G8RCRegClass.contains(DestReg) ||
+ PPC::GPRCRegClass.contains(DestReg))) {
+ bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
+ unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
+ unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
+ unsigned CRNum = TRI->getEncodingValue(SrcReg);
+ BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
+ if (CRNum == 7)
+ return;
+ // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
+ BuildMI(MBB, I, DL, get(ShCode), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(CRNum * 4 + 4)
+ .addImm(28)
+ .addImm(31);
return;
} else if (PPC::G8RCRegClass.contains(SrcReg) &&
PPC::VSFRCRegClass.contains(DestReg)) {
@@ -1343,17 +1783,53 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
- else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::QVFMR;
- else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::QVFMRs;
- else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::QVFMRb;
+ else if (Subtarget.pairedVectorMemops() &&
+ PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
+ if (SrcReg > PPC::VSRp15)
+ SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
+ else
+ SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
+ if (DestReg > PPC::VSRp15)
+ DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
+ else
+ DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
+ BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
+ addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
+ addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
+ return;
+ }
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
Opc = PPC::EVOR;
- else
+ else if ((PPC::ACCRCRegClass.contains(DestReg) ||
+ PPC::UACCRCRegClass.contains(DestReg)) &&
+ (PPC::ACCRCRegClass.contains(SrcReg) ||
+ PPC::UACCRCRegClass.contains(SrcReg))) {
+ // If primed, de-prime the source register, copy the individual registers
+ // and prime the destination if needed. The vector subregisters are
+ // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
+ // source is primed, we need to re-prime it after the copy as well.
+ PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
+ bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
+ bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
+ MCRegister VSLSrcReg =
+ PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
+ MCRegister VSLDestReg =
+ PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
+ if (SrcPrimed)
+ BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
+ for (unsigned Idx = 0; Idx < 4; Idx++)
+ BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
+ .addReg(VSLSrcReg + Idx)
+ .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
+ if (DestPrimed)
+ BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
+ if (SrcPrimed && !KillSrc)
+ BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
+ return;
+ } else
llvm_unreachable("Impossible reg-to-reg copy");
const MCInstrDesc &MCID = get(Opc);
@@ -1364,7 +1840,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
-static unsigned getSpillIndex(const TargetRegisterClass *RC) {
+unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
int OpcodeIndex = 0;
if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
@@ -1391,16 +1867,20 @@ static unsigned getSpillIndex(const TargetRegisterClass *RC) {
OpcodeIndex = SOK_VectorFloat8Spill;
} else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadBitSpill;
} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_SpillToVSR;
+ } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.pairedVectorMemops() &&
+ "Register unexpected when paired memops are disabled.");
+ OpcodeIndex = SOK_AccumulatorSpill;
+ } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.pairedVectorMemops() &&
+ "Register unexpected when paired memops are disabled.");
+ OpcodeIndex = SOK_UAccumulatorSpill;
+ } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.pairedVectorMemops() &&
+ "Register unexpected when paired memops are disabled.");
+ OpcodeIndex = SOK_PairedVecSpill;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -1437,9 +1917,6 @@ void PPCInstrInfo::StoreRegToStackSlot(
PPC::CRBITRCRegClass.hasSubClassEq(RC))
FuncInfo->setSpillsCR();
- if (PPC::VRSAVERCRegClass.hasSubClassEq(RC))
- FuncInfo->setSpillsVRSAVE();
-
if (isXFormMemOp(Opcode))
FuncInfo->setHasNonRISpills();
}
@@ -1495,9 +1972,6 @@ void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
PPC::CRBITRCRegClass.hasSubClassEq(RC))
FuncInfo->setSpillsCR();
- if (PPC::VRSAVERCRegClass.hasSubClassEq(RC))
- FuncInfo->setSpillsVRSAVE();
-
if (isXFormMemOp(Opcode))
FuncInfo->setHasNonRISpills();
}
@@ -1667,6 +2141,17 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
return false;
}
+bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
+ // across them, since some FP operations may change content of FPSCR.
+ // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
+ if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF)
+ return true;
+ return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
+}
+
bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const {
unsigned OpC = MI.getOpcode();
@@ -1675,6 +2160,10 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
bool isPPC64 = Subtarget.isPPC64();
MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
: (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
+ // Need add Def and Use for CTR implicit operand.
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg(), RegState::Implicit)
+ .addReg(Pred[1].getReg(), RegState::ImplicitDefine);
} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI.setDesc(get(PPC::BCLR));
MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
@@ -1694,6 +2183,10 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
bool isPPC64 = Subtarget.isPPC64();
MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
: (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
+ // Need add Def and Use for CTR implicit operand.
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg(), RegState::Implicit)
+ .addReg(Pred[1].getReg(), RegState::ImplicitDefine);
} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
MI.RemoveOperand(0);
@@ -1734,19 +2227,24 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
: (setLR ? PPC::BCCTRL : PPC::BCCTR)));
MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
- return true;
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
: (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
- return true;
+ } else {
+ MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
+ : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .add(Pred[1]);
}
- MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
- : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addImm(Pred[0].getImm())
- .add(Pred[1]);
+ // Need add Def and Use for LR implicit operand.
+ if (setLR)
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
+ .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
+
return true;
}
@@ -1784,8 +2282,9 @@ bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
return false;
}
-bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const {
+bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
+ std::vector<MachineOperand> &Pred,
+ bool SkipDead) const {
// Note: At the present time, the contents of Pred from this function is
// unused by IfConversion. This implementation follows ARM by pushing the
// CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
@@ -2071,6 +2570,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (NewOpC == -1)
return false;
+ // This transformation should not be performed if `nsw` is missing and is not
+ // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
+ // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
+ // CRReg can reflect if compared values are equal, this optz is still valid.
+ if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
+ Sub && !Sub->getFlag(MachineInstr::NoSWrap))
+ return false;
+
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
// needs to be updated to be based on SUB. Push the condition code
// operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
@@ -2221,6 +2728,112 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return true;
}
+bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
+ const MachineOperand *BaseOp;
+ OffsetIsScalable = false;
+ if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
+ return false;
+ BaseOps.push_back(BaseOp);
+ return true;
+}
+
+static bool isLdStSafeToCluster(const MachineInstr &LdSt,
+ const TargetRegisterInfo *TRI) {
+ // If this is a volatile load/store, don't mess with it.
+ if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
+ return false;
+
+ if (LdSt.getOperand(2).isFI())
+ return true;
+
+ assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
+ // Can't cluster if the instruction modifies the base register
+ // or it is update form. e.g. ld r2,3(r2)
+ if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
+ return false;
+
+ return true;
+}
+
+// Only cluster instruction pair that have the same opcode, and they are
+// clusterable according to PowerPC specification.
+static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
+ const PPCSubtarget &Subtarget) {
+ switch (FirstOpc) {
+ default:
+ return false;
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STXSD:
+ case PPC::DFSTOREf64:
+ return FirstOpc == SecondOpc;
+ // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
+ // 32bit and 64bit instruction selection. They are clusterable pair though
+ // they are different opcode.
+ case PPC::STW:
+ case PPC::STW8:
+ return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
+ }
+}
+
+bool PPCInstrInfo::shouldClusterMemOps(
+ ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+ unsigned NumBytes) const {
+
+ assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
+ const MachineOperand &BaseOp1 = *BaseOps1.front();
+ const MachineOperand &BaseOp2 = *BaseOps2.front();
+ assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
+ "Only base registers and frame indices are supported.");
+
+ // The NumLoads means the number of loads that has been clustered.
+ // Don't cluster memory op if there are already two ops clustered at least.
+ if (NumLoads > 2)
+ return false;
+
+ // Cluster the load/store only when they have the same base
+ // register or FI.
+ if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
+ (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
+ (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
+ return false;
+
+ // Check if the load/store are clusterable according to the PowerPC
+ // specification.
+ const MachineInstr &FirstLdSt = *BaseOp1.getParent();
+ const MachineInstr &SecondLdSt = *BaseOp2.getParent();
+ unsigned FirstOpc = FirstLdSt.getOpcode();
+ unsigned SecondOpc = SecondLdSt.getOpcode();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ // Cluster the load/store only when they have the same opcode, and they are
+ // clusterable opcode according to PowerPC specification.
+ if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
+ return false;
+
+ // Can't cluster load/store that have ordered or volatile memory reference.
+ if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
+ !isLdStSafeToCluster(SecondLdSt, TRI))
+ return false;
+
+ int64_t Offset1 = 0, Offset2 = 0;
+ unsigned Width1 = 0, Width2 = 0;
+ const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
+ if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
+ !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
+ Width1 != Width2)
+ return false;
+
+ assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
+ "getMemOperandWithOffsetWidth return incorrect base op");
+ // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
+ assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
+ return Offset1 + Width1 == Offset2;
+}
+
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
@@ -2270,7 +2883,14 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_PLT, "ppc-plt"},
{MO_PIC_FLAG, "ppc-pic"},
{MO_PCREL_FLAG, "ppc-pcrel"},
- {MO_GOT_FLAG, "ppc-got"}};
+ {MO_GOT_FLAG, "ppc-got"},
+ {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
+ {MO_TLSGD_FLAG, "ppc-tlsgd"},
+ {MO_TLSLD_FLAG, "ppc-tlsld"},
+ {MO_TPREL_FLAG, "ppc-tprel"},
+ {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
+ {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
+ {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"}};
return makeArrayRef(TargetFlags);
}
@@ -2351,6 +2971,31 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
auto DL = MI.getDebugLoc();
switch (MI.getOpcode()) {
+ case PPC::BUILD_UACC: {
+ MCRegister ACC = MI.getOperand(0).getReg();
+ MCRegister UACC = MI.getOperand(1).getReg();
+ if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
+ MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
+ MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
+ // FIXME: This can easily be improved to look up to the top of the MBB
+ // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
+ // we can just re-target any such XXLOR's to DstVSR + offset.
+ for (int VecNo = 0; VecNo < 4; VecNo++)
+ BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
+ .addReg(SrcVSR + VecNo)
+ .addReg(SrcVSR + VecNo);
+ }
+ // BUILD_UACC is expanded to 4 copies of the underlying vsx regisers.
+ // So after building the 4 copies, we can replace the BUILD_UACC instruction
+ // with a NOP.
+ LLVM_FALLTHROUGH;
+ }
+ case PPC::KILL_PAIR: {
+ MI.setDesc(get(PPC::UNENCODED_NOP));
+ MI.RemoveOperand(1);
+ MI.RemoveOperand(0);
+ return true;
+ }
case TargetOpcode::LOAD_STACK_GUARD: {
assert(Subtarget.isTargetLinux() &&
"Only Linux target is expected to contain LOAD_STACK_GUARD");
@@ -2642,7 +3287,10 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
}
unsigned PPCInstrInfo::getSpillTarget() const {
- return Subtarget.hasP9Vector() ? 1 : 0;
+ // With P10, we may need to spill paired vector registers or accumulator
+ // registers. MMA implies paired vectors, so we can just check that.
+ bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
+ return IsP10Variant ? 2 : Subtarget.hasP9Vector() ? 1 : 0;
}
const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
@@ -3033,6 +3681,143 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
+bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
+ MachineInstr **ToErase) const {
+ MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
+ unsigned FoldingReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(FoldingReg))
+ return false;
+ MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
+ if (SrcMI->getOpcode() != PPC::RLWINM &&
+ SrcMI->getOpcode() != PPC::RLWINM_rec &&
+ SrcMI->getOpcode() != PPC::RLWINM8 &&
+ SrcMI->getOpcode() != PPC::RLWINM8_rec)
+ return false;
+ assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
+ MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
+ SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
+ "Invalid PPC::RLWINM Instruction!");
+ uint64_t SHSrc = SrcMI->getOperand(2).getImm();
+ uint64_t SHMI = MI.getOperand(2).getImm();
+ uint64_t MBSrc = SrcMI->getOperand(3).getImm();
+ uint64_t MBMI = MI.getOperand(3).getImm();
+ uint64_t MESrc = SrcMI->getOperand(4).getImm();
+ uint64_t MEMI = MI.getOperand(4).getImm();
+
+ assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
+ "Invalid PPC::RLWINM Instruction!");
+ // If MBMI is bigger than MEMI, we always can not get run of ones.
+ // RotatedSrcMask non-wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: B---E B---E
+ // MaskMI: -----------|--E B------
+ // Result: ----- --- (Bad candidate)
+ //
+ // RotatedSrcMask wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: --E B----|--E B----
+ // MaskMI: -----------|--E B------
+ // Result: --- -----|--- ----- (Bad candidate)
+ //
+ // One special case is RotatedSrcMask is a full set mask.
+ // RotatedSrcMask full:
+ // 0........31|32........63
+ // RotatedSrcMask: ------EB---|-------EB---
+ // MaskMI: -----------|--E B------
+ // Result: -----------|--- ------- (Good candidate)
+
+ // Mark special case.
+ bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
+
+ // For other MBMI > MEMI cases, just return.
+ if ((MBMI > MEMI) && !SrcMaskFull)
+ return false;
+
+ // Handle MBMI <= MEMI cases.
+ APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
+ // In MI, we only need low 32 bits of SrcMI, just consider about low 32
+ // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
+ // while in PowerPC ISA, lowerest bit is at index 63.
+ APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
+
+ APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
+ APInt FinalMask = RotatedSrcMask & MaskMI;
+ uint32_t NewMB, NewME;
+ bool Simplified = false;
+
+ // If final mask is 0, MI result should be 0 too.
+ if (FinalMask.isNullValue()) {
+ bool Is64Bit =
+ (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Replace Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
+ // Replace MI with "LI 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.RemoveOperand(2);
+ MI.getOperand(1).ChangeToImmediate(0);
+ MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
+ } else {
+ // Replace MI with "ANDI_rec reg, 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.getOperand(2).setImm(0);
+ MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+ }
+
+ LLVM_DEBUG(dbgs() << "With: ");
+ LLVM_DEBUG(MI.dump());
+
+ } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
+ NewMB <= NewME) ||
+ SrcMaskFull) {
+ // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
+ // than NewME. Otherwise we get a 64 bit value after folding, but MI
+ // return a 32 bit value.
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Converting Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ uint16_t NewSH = (SHSrc + SHMI) % 32;
+ MI.getOperand(2).setImm(NewSH);
+ // If SrcMI mask is full, no need to update MBMI and MEMI.
+ if (!SrcMaskFull) {
+ MI.getOperand(3).setImm(NewMB);
+ MI.getOperand(4).setImm(NewME);
+ }
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+
+ LLVM_DEBUG(dbgs() << "To: ");
+ LLVM_DEBUG(MI.dump());
+ }
+ if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
+ !SrcMI->hasImplicitDef()) {
+ // If FoldingReg has no non-debug use and it has no implicit def (it
+ // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
+ // Otherwise keep it.
+ *ToErase = SrcMI;
+ LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
+ LLVM_DEBUG(SrcMI->dump());
+ }
+ return Simplified;
+}
+
bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
ImmInstrInfo &III, bool PostRA) const {
// The vast majority of the instructions would need their operand 2 replaced
@@ -3754,6 +4539,20 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
}
return false;
}
+ case PPC::SUBFIC:
+ case PPC::SUBFIC8: {
+ // Only transform this if the CARRY implicit operand is dead.
+ if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
+ return false;
+ int64_t Minuend = MI.getOperand(2).getImm();
+ if (isInt<16>(Minuend - SExtImm)) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc == PPC::SUBFIC8;
+ NewImm = Minuend - SExtImm;
+ break;
+ }
+ return false;
+ }
case PPC::RLDICL:
case PPC::RLDICL_rec:
case PPC::RLDICL_32:
@@ -4640,13 +5439,15 @@ MachineInstr *PPCInstrInfo::findLoopInstr(
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo *TRI) const {
- if (!LdSt.mayLoadOrStore())
+ if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
return false;
// Handle only loads/stores with base register followed by immediate offset.
- if (LdSt.getNumExplicitOperands() != 3)
+ if (!LdSt.getOperand(1).isImm() ||
+ (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
return false;
- if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg())
+ if (!LdSt.getOperand(1).isImm() ||
+ (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
return false;
if (!LdSt.hasOneMemOperand())
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 556c95fef3bd..c6ef1742b722 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -122,61 +122,73 @@ enum SpillOpcodeKey {
SOK_VSXVectorSpill,
SOK_VectorFloat8Spill,
SOK_VectorFloat4Spill,
- SOK_VRSaveSpill,
- SOK_QuadFloat8Spill,
- SOK_QuadFloat4Spill,
- SOK_QuadBitSpill,
SOK_SpillToVSR,
+ SOK_PairedVecSpill,
+ SOK_AccumulatorSpill,
+ SOK_UAccumulatorSpill,
SOK_SPESpill,
SOK_LastOpcodeSpill // This must be last on the enum.
};
// Define list of load and store spill opcodes.
+#define NoInstr PPC::INSTRUCTION_LIST_END
#define Pwr8LoadOpcodes \
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \
- PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, \
- PPC::SPILLTOVSR_LD, PPC::EVLDD \
+ PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, PPC::EVLDD \
}
#define Pwr9LoadOpcodes \
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
- PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, \
- PPC::QVLFDXb, PPC::SPILLTOVSR_LD \
+ PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr \
+ }
+
+#define Pwr10LoadOpcodes \
+ { \
+ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
+ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
+ PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \
+ PPC::RESTORE_UACC, NoInstr \
}
#define Pwr8StoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
- PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \
- PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, PPC::SPILLTOVSR_ST, \
- PPC::EVSTDD \
+ PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, \
+ PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, PPC::EVSTDD \
}
#define Pwr9StoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
- PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, \
- PPC::SPILLTOVSR_ST \
+ PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr \
+ }
+
+#define Pwr10StoreOpcodes \
+ { \
+ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
+ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
+ PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \
+ NoInstr \
}
// Initialize arrays for load and store spill opcodes on supported subtargets.
#define StoreOpcodesForSpill \
- { Pwr8StoreOpcodes, Pwr9StoreOpcodes }
+ { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes }
#define LoadOpcodesForSpill \
- { Pwr8LoadOpcodes, Pwr9LoadOpcodes }
+ { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes }
class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
- const unsigned StoreSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
+ const unsigned StoreSpillOpcodesArray[3][SOK_LastOpcodeSpill] =
StoreOpcodesForSpill;
- const unsigned LoadSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
+ const unsigned LoadSpillOpcodesArray[3][SOK_LastOpcodeSpill] =
LoadOpcodesForSpill;
void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill,
@@ -234,11 +246,17 @@ class PPCInstrInfo : public PPCGenInstrInfo {
unsigned getSpillTarget() const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
+ unsigned getSpillIndex(const TargetRegisterClass *RC) const;
int16_t getFMAOpIdxInfo(unsigned Opcode) const;
void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+ bool isLoadFromConstantPool(MachineInstr *I) const;
+ Register
+ generateLoadForNewConst(unsigned Idx, MachineInstr *MI, Type *Ty,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const;
+ const Constant *getConstantFromConstantPool(MachineInstr *I) const;
virtual void anchor();
protected:
@@ -273,10 +291,10 @@ public:
}
static bool isSameClassPhysRegCopy(unsigned Opcode) {
- unsigned CopyOpcodes[] =
- { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
- PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
- PPC::CROR, PPC::EVOR, -1U };
+ unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR,
+ PPC::VOR, PPC::XXLOR, PPC::XXLORf,
+ PPC::XSCPSGNDP, PPC::MCRF, PPC::CROR,
+ PPC::EVOR, -1U};
for (int i = 0; CopyOpcodes[i] != -1U; i++)
if (Opcode == CopyOpcodes[i])
return true;
@@ -330,14 +348,29 @@ public:
/// chain ending in \p Root. All potential patterns are output in the \p
/// P array.
bool getFMAPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &P) const;
+ SmallVectorImpl<MachineCombinerPattern> &P,
+ bool DoRegPressureReduce) const;
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in <Root>. All potential patterns are
/// output in the <Pattern> array.
- bool getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &P) const override;
+ bool getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &P,
+ bool DoRegPressureReduce) const override;
+
+ /// On PowerPC, we leverage machine combiner pass to reduce register pressure
+ /// when the register pressure is high for one BB.
+ /// Return true if register pressure for \p MBB is high and ABI is supported
+ /// to reduce register pressure. Otherwise return false.
+ bool
+ shouldReduceRegisterPressure(MachineBasicBlock *MBB,
+ RegisterClassInfo *RegClassInfo) const override;
+
+ /// Fixup the placeholders we put in genAlternativeCodeSequence() for
+ /// MachineCombiner.
+ void
+ finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const override;
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
@@ -470,14 +503,18 @@ public:
// Predication support.
bool isPredicated(const MachineInstr &MI) const override;
+ bool isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
bool PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const override;
bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
ArrayRef<MachineOperand> Pred2) const override;
- bool DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const override;
+ bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred,
+ bool SkipDead) const override;
// Comparison optimization.
@@ -497,6 +534,20 @@ public:
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;
+ /// Get the base operand and byte offset of an instruction that reads/writes
+ /// memory.
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
+
+ /// Returns true if the two given memory operations should be scheduled
+ /// adjacent.
+ bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2,
+ unsigned NumLoads, unsigned NumBytes) const override;
+
/// Return true if two MIs access different memory addresses and false
/// otherwise
bool
@@ -554,6 +605,7 @@ public:
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
bool foldFrameOffset(MachineInstr &MI) const;
+ bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const;
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index fedbf592af39..724af23542d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -74,6 +74,9 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
]>;
+def SDT_PPCFtsqrt : SDTypeProfile<1, 1, [
+ SDTCisVT<0, i32>]>;
+
def SDT_PPClbrx : SDTypeProfile<1, 2, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
@@ -124,6 +127,8 @@ def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [
def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+def PPCfsqrt : SDNode<"PPCISD::FSQRT", SDTFPUnaryOp, []>;
+def PPCftsqrt : SDNode<"PPCISD::FTSQRT", SDT_PPCFtsqrt,[]>;
def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
@@ -134,6 +139,28 @@ def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
+def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS",
+ SDTFPRoundOp, [SDNPHasChain]>;
+def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS",
+ SDTFPRoundOp, [SDNPHasChain]>;
+
+def PPCany_fcfid : PatFrags<(ops node:$op),
+ [(PPCfcfid node:$op),
+ (PPCstrict_fcfid node:$op)]>;
+def PPCany_fcfidu : PatFrags<(ops node:$op),
+ [(PPCfcfidu node:$op),
+ (PPCstrict_fcfidu node:$op)]>;
+def PPCany_fcfids : PatFrags<(ops node:$op),
+ [(PPCfcfids node:$op),
+ (PPCstrict_fcfids node:$op)]>;
+def PPCany_fcfidus : PatFrags<(ops node:$op),
+ [(PPCfcfidus node:$op),
+ (PPCstrict_fcfidus node:$op)]>;
+
def PPCcv_fp_to_uint_in_vsr:
SDNode<"PPCISD::FP_TO_UINT_IN_VSR", SDT_PPCcv_fp_to_int, []>;
def PPCcv_fp_to_sint_in_vsr:
@@ -160,7 +187,12 @@ def PPCmffs : SDNode<"PPCISD::MFFS",
// Perform FADD in round-to-zero mode.
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
+def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp,
+ [SDNPHasChain]>;
+def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs),
+ [(PPCfaddrtz node:$lhs, node:$rhs),
+ (PPCstrict_faddrtz node:$lhs, node:$rhs)]>;
def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
@@ -195,6 +227,7 @@ def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR",
SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
+def PPCpaddiDtprel : SDNode<"PPCISD::PADDI_DTPREL", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
@@ -203,16 +236,6 @@ def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
-def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
-def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
-def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
-def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
-
-def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
-
-def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
- [SDNPHasChain, SDNPMayLoad]>;
-
def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -225,6 +248,28 @@ def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>;
def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
+def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+
+def PPCany_fctidz : PatFrags<(ops node:$op),
+ [(PPCstrict_fctidz node:$op),
+ (PPCfctidz node:$op)]>;
+def PPCany_fctiwz : PatFrags<(ops node:$op),
+ [(PPCstrict_fctiwz node:$op),
+ (PPCfctiwz node:$op)]>;
+def PPCany_fctiduz : PatFrags<(ops node:$op),
+ [(PPCstrict_fctiduz node:$op),
+ (PPCfctiduz node:$op)]>;
+def PPCany_fctiwuz : PatFrags<(ops node:$op),
+ [(PPCstrict_fctiwuz node:$op),
+ (PPCfctiwuz node:$op)]>;
+
// Move 2 i64 values into a VSX register
def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
SDTypeProfile<1, 2,
@@ -295,7 +340,7 @@ def PPCrfebb : SDNode<"PPCISD::RFEBB", SDT_PPCsc,
[SDNPHasChain, SDNPSideEffect]>;
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
-def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
+def PPCvcmp_rec : SDNode<"PPCISD::VCMP_rec", SDT_PPCvcmp, [SDNPOutGlue]>;
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInGlue]>;
@@ -327,6 +372,10 @@ def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>;
// PC Relative Specific Nodes
def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>;
+def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR",
+ SDTIntUnaryOp, []>;
+def PPCtlslocalexecmataddr : SDNode<"PPCISD::TLS_LOCAL_EXEC_MAT_ADDR",
+ SDTIntUnaryOp, []>;
//===----------------------------------------------------------------------===//
// PowerPC specific transformation functions and pattern fragments.
@@ -446,37 +495,41 @@ def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{
return isUInt<32>(Imm);
}]>;
-// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// This is a somewhat weaker condition than actually checking for 4-byte
+// alignment. It is simply checking that the displacement can be represented
+// as an immediate that is a multiple of 4 (i.e. the requirements for DS-Form
+// instructions).
+// But some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
// restricted memrix (4-aligned) constants are alignment sensitive. If these
// offsets are hidden behind TOC entries than the values of the lower-order
// bits cannot be checked directly. As a result, we need to also incorporate
// an alignment check into the relevant patterns.
-def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 4;
+def DSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4;
}]>;
-def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+def DSFormStore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAlignment() >= 4;
+ return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4;
}]>;
-def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 4;
+def DSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4;
}]>;
-def aligned4pre_store : PatFrag<
+def DSFormPreStore : PatFrag<
(ops node:$val, node:$base, node:$offset),
(pre_store node:$val, node:$base, node:$offset), [{
- return cast<StoreSDNode>(N)->getAlignment() >= 4;
+ return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4;
}]>;
-def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() < 4;
+def NonDSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4);
}]>;
-def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+def NonDSFormStore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAlignment() < 4;
+ return cast<StoreSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4);
}]>;
-def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() < 4;
+def NonDSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4);
}]>;
// This is a somewhat weaker condition than actually checking for 16-byte
@@ -617,6 +670,7 @@ def PPCU1ImmAsmOperand : AsmOperandClass {
def u1imm : Operand<i32> {
let PrintMethod = "printU1ImmOperand";
let ParserMatchClass = PPCU1ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU2ImmAsmOperand : AsmOperandClass {
@@ -626,6 +680,7 @@ def PPCU2ImmAsmOperand : AsmOperandClass {
def u2imm : Operand<i32> {
let PrintMethod = "printU2ImmOperand";
let ParserMatchClass = PPCU2ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCATBitsAsHintAsmOperand : AsmOperandClass {
@@ -635,6 +690,7 @@ def PPCATBitsAsHintAsmOperand : AsmOperandClass {
def atimm : Operand<i32> {
let PrintMethod = "printATBitsAsHint";
let ParserMatchClass = PPCATBitsAsHintAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU3ImmAsmOperand : AsmOperandClass {
@@ -644,6 +700,7 @@ def PPCU3ImmAsmOperand : AsmOperandClass {
def u3imm : Operand<i32> {
let PrintMethod = "printU3ImmOperand";
let ParserMatchClass = PPCU3ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU4ImmAsmOperand : AsmOperandClass {
@@ -653,6 +710,7 @@ def PPCU4ImmAsmOperand : AsmOperandClass {
def u4imm : Operand<i32> {
let PrintMethod = "printU4ImmOperand";
let ParserMatchClass = PPCU4ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
@@ -662,6 +720,7 @@ def s5imm : Operand<i32> {
let PrintMethod = "printS5ImmOperand";
let ParserMatchClass = PPCS5ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<5>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU5ImmAsmOperand : AsmOperandClass {
let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
@@ -671,6 +730,7 @@ def u5imm : Operand<i32> {
let PrintMethod = "printU5ImmOperand";
let ParserMatchClass = PPCU5ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<5>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU6ImmAsmOperand : AsmOperandClass {
let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
@@ -680,6 +740,7 @@ def u6imm : Operand<i32> {
let PrintMethod = "printU6ImmOperand";
let ParserMatchClass = PPCU6ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<6>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU7ImmAsmOperand : AsmOperandClass {
let Name = "U7Imm"; let PredicateMethod = "isU7Imm";
@@ -689,6 +750,7 @@ def u7imm : Operand<i32> {
let PrintMethod = "printU7ImmOperand";
let ParserMatchClass = PPCU7ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU8ImmAsmOperand : AsmOperandClass {
let Name = "U8Imm"; let PredicateMethod = "isU8Imm";
@@ -698,6 +760,7 @@ def u8imm : Operand<i32> {
let PrintMethod = "printU8ImmOperand";
let ParserMatchClass = PPCU8ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<8>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU10ImmAsmOperand : AsmOperandClass {
let Name = "U10Imm"; let PredicateMethod = "isU10Imm";
@@ -707,6 +770,7 @@ def u10imm : Operand<i32> {
let PrintMethod = "printU10ImmOperand";
let ParserMatchClass = PPCU10ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<10>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU12ImmAsmOperand : AsmOperandClass {
let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
@@ -716,6 +780,7 @@ def u12imm : Operand<i32> {
let PrintMethod = "printU12ImmOperand";
let ParserMatchClass = PPCU12ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<12>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
@@ -726,6 +791,7 @@ def s16imm : Operand<i32> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU16ImmAsmOperand : AsmOperandClass {
let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
@@ -736,6 +802,7 @@ def u16imm : Operand<i32> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS17ImmAsmOperand : AsmOperandClass {
let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
@@ -749,6 +816,7 @@ def s17imm : Operand<i32> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS17ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS34ImmAsmOperand : AsmOperandClass {
let Name = "S34Imm";
@@ -757,9 +825,17 @@ def PPCS34ImmAsmOperand : AsmOperandClass {
}
def s34imm : Operand<i64> {
let PrintMethod = "printS34ImmOperand";
- let EncoderMethod = "getImm34Encoding";
+ let EncoderMethod = "getImm34EncodingNoPCRel";
let ParserMatchClass = PPCS34ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<34>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def s34imm_pcrel : Operand<i64> {
+ let PrintMethod = "printS34ImmOperand";
+ let EncoderMethod = "getImm34EncodingPCRel";
+ let ParserMatchClass = PPCS34ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<34>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCImmZeroAsmOperand : AsmOperandClass {
let Name = "ImmZero";
@@ -770,6 +846,7 @@ def immZero : Operand<i32> {
let PrintMethod = "printImmZeroOperand";
let ParserMatchClass = PPCImmZeroAsmOperand;
let DecoderMethod = "decodeImmZeroOperand";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
@@ -915,40 +992,47 @@ def memri : Operand<iPTR> {
let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
let DecoderMethod = "decodeMemRIOperands";
+ let OperandType = "OPERAND_MEMORY";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
+ let OperandType = "OPERAND_MEMORY";
}
def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
let DecoderMethod = "decodeMemRIXOperands";
+ let OperandType = "OPERAND_MEMORY";
}
def memrix16 : Operand<iPTR> { // memri, imm is 16-aligned, 12-bit, Inst{16:27}
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIX16Encoding";
let DecoderMethod = "decodeMemRIX16Operands";
+ let OperandType = "OPERAND_MEMORY";
}
def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getSPE8DisEncoding";
let DecoderMethod = "decodeSPE8Operands";
+ let OperandType = "OPERAND_MEMORY";
}
def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getSPE4DisEncoding";
let DecoderMethod = "decodeSPE4Operands";
+ let OperandType = "OPERAND_MEMORY";
}
def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getSPE2DisEncoding";
let DecoderMethod = "decodeSPE2Operands";
+ let OperandType = "OPERAND_MEMORY";
}
// A single-register address. This is used with the SjLj
@@ -956,6 +1040,7 @@ def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
// G8RC_NOX0 registers.
def memr : Operand<iPTR> {
let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg);
+ let OperandType = "OPERAND_MEMORY";
}
def PPCTLSRegOperand : AsmOperandClass {
let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
@@ -981,11 +1066,13 @@ def pred : Operand<OtherVT> {
// Define PowerPC specific addressing mode.
// d-form
-def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb"
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb"
// ds-form
-def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
// dq-form
-def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
+def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
+// 8LS:d-form
+def iaddrX34 : ComplexPattern<iPTR, 2, "SelectAddrImmX34", [], []>; // "pstxvp"
// Below forms are all x-form addressing mode, use three different ones so we
// can make a accurate check for x-form instructions in ISEL.
@@ -1031,6 +1118,11 @@ def HasExtDiv : Predicate<"Subtarget->hasExtDiv()">;
def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">;
def HasFPU : Predicate<"Subtarget->hasFPU()">;
def PCRelativeMemops : Predicate<"Subtarget->hasPCRelativeMemops()">;
+def IsNotISA3_1 : Predicate<"!Subtarget->isISA3_1()">;
+
+// AIX assembler may not be modern enough to support some extended mne.
+def ModernAs: Predicate<"!Subtarget->isAIXABI() || Subtarget->HasModernAIXAs">,
+ AssemblerPredicate<(any_of (not AIXOS), FeatureModernAIXAs)>;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -1389,10 +1481,7 @@ def ADJCALLSTACKUP : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2
"#ADJCALLSTACKUP $amt1 $amt2",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-
-def UPDATE_VRSAVE : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$rS),
- "UPDATE_VRSAVE $rD, $rS", []>;
-}
+} // hasCtrlDep
let Defs = [R1], Uses = [R1] in
def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
@@ -1518,6 +1607,9 @@ def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in),
"#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>;
+
+def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM),
+ "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>;
}
let Defs = [LR] in
@@ -1567,11 +1659,12 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
"bc 4, $bi, $dst">;
- let isReturn = 1, Uses = [LR, RM] in
+ let isReturn = 1, Uses = [LR, RM] in {
def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi),
"bclr 12, $bi, 0", IIC_BrB, []>;
def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi),
"bclr 4, $bi, 0", IIC_BrB, []>;
+ }
}
let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
@@ -1843,7 +1936,7 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBF : DCB_Form_hint<86, (outs), (ins u5imm:$TH, memrr:$dst),
+def DCBF : DCB_Form_hint<86, (outs), (ins u3imm:$TH, memrr:$dst),
"dcbf $dst, $TH", IIC_LdStDCBF, []>,
PPC970_DGroup_Single;
@@ -2378,7 +2471,7 @@ let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
"stmw $rS, $dst", IIC_LdStLMW, []>;
-def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
+def SYNC : XForm_24_sync<31, 598, (outs), (ins u2imm:$L),
"sync $L", IIC_LdStSync, []>;
let isCodeGenOnly = 1 in {
@@ -2573,37 +2666,26 @@ let isCompare = 1, hasSideEffects = 0 in {
}
}
let PPC970_Unit = 3, Predicates = [HasFPU] in { // FPU Operations.
-//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
-// "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
-let isCompare = 1, hasSideEffects = 0 in {
+let isCompare = 1, mayRaiseFPException = 1, hasSideEffects = 0 in {
def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
"fcmpu $crD, $fA, $fB", IIC_FPCompare>;
- let Interpretation64Bit = 1, isCodeGenOnly = 1 in
- def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
- "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+ def FCMPOS : XForm_17<63, 32, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
+ "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+ def FCMPOD : XForm_17<63, 32, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
+ }
}
def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
"ftdiv $crD, $fA, $fB", IIC_FPCompare>;
def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
- "ftsqrt $crD, $fB", IIC_FPCompare>;
-
-let Uses = [RM], mayRaiseFPException = 1 in {
- let hasSideEffects = 0 in {
- defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiw", "$frD, $frB", IIC_FPGeneral,
- []>;
- defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwu", "$frD, $frB", IIC_FPGeneral,
- []>;
- defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
-
- defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
- "frsp", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (any_fpround f64:$frB))]>;
+ "ftsqrt $crD, $fB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt f64:$fB))]>;
+let mayRaiseFPException = 1, hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,
@@ -2611,9 +2693,7 @@ let Uses = [RM], mayRaiseFPException = 1 in {
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (any_fround f32:$frB))]>;
- }
- let hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
"frip", "$frD, $frB", IIC_FPGeneral,
@@ -2635,6 +2715,22 @@ let Uses = [RM], mayRaiseFPException = 1 in {
defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
"frim", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (any_ffloor f32:$frB))]>;
+}
+
+let Uses = [RM], mayRaiseFPException = 1, hasSideEffects = 0 in {
+ defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiw", "$frD, $frB", IIC_FPGeneral,
+ []>;
+ defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwu", "$frD, $frB", IIC_FPGeneral,
+ []>;
+ defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCany_fctiwz f64:$frB))]>;
+
+ defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
+ "frsp", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (any_fpround f64:$frB))]>;
defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
"fsqrt", "$frD, $frB", IIC_FPSqrtD,
@@ -2642,9 +2738,10 @@ let Uses = [RM], mayRaiseFPException = 1 in {
defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
"fsqrts", "$frD, $frB", IIC_FPSqrtS,
[(set f32:$frD, (any_fsqrt f32:$frB))]>;
- }
- }
}
+}
+
+def : Pat<(PPCfsqrt f64:$frA), (FSQRT $frA)>;
/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
/// often coalesced away and we don't want the dispatch group builder to think
@@ -2689,6 +2786,7 @@ defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
[(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
// Reciprocal estimates.
+let mayRaiseFPException = 1 in {
defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
"fre", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfre f64:$frB))]>;
@@ -2702,6 +2800,7 @@ defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
"frsqrtes", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
}
+}
// XL-Form instructions. condition register logical ops.
//
@@ -2862,18 +2961,6 @@ let isCodeGenOnly = 1 in {
def : InstAlias<"mtvrsave $rS", (MTVRSAVE gprc:$rS)>;
def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>;
-// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
-// so we'll need to scavenge a register for it.
-let mayStore = 1 in
-def SPILL_VRSAVE : PPCEmitTimePseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
- "#SPILL_VRSAVE", []>;
-
-// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
-// spilled), so we'll need to scavenge a register for it.
-let mayLoad = 1 in
-def RESTORE_VRSAVE : PPCEmitTimePseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
- "#RESTORE_VRSAVE", []>;
-
let hasSideEffects = 0 in {
// mtocrf's input needs to be prepared by shifting by an amount dependent
// on the cr register selected. Thus, post-ra anti-dep breaking must not
@@ -2913,20 +3000,24 @@ def : InstAlias<"mtcr $rA", (MTCRF 255, gprc:$rA)>;
let Predicates = [HasFPU] in {
// Custom inserter instruction to perform FADD in round-to-zero mode.
-let Uses = [RM] in {
+let Uses = [RM], mayRaiseFPException = 1 in {
def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
- [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+ [(set f64:$FRT, (PPCany_faddrtz f64:$FRA, f64:$FRB))]>;
}
// The above pseudo gets expanded to make use of the following instructions
// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
-let Uses = [RM], Defs = [RM] in {
- def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
- "mtfsb0 $FM", IIC_IntMTFSB0, []>,
- PPC970_DGroup_Single, PPC970_Unit_FPU;
- def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
- "mtfsb1 $FM", IIC_IntMTFSB0, []>,
- PPC970_DGroup_Single, PPC970_Unit_FPU;
+
+// When FM is 30/31, we are setting the 62/63 bit of FPSCR, the implicit-def
+// RM should be set.
+def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+ "mtfsb0 $FM", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+ "mtfsb1 $FM", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+
+let Defs = [RM] in {
let isCodeGenOnly = 1 in
def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
"mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
@@ -3065,7 +3156,7 @@ def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC_rec gprc:$rA, gprc:$rC, gprc:$rB)>
// this type.
//
let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations.
-let Uses = [RM] in {
+let mayRaiseFPException = 1, Uses = [RM] in {
let isCommutable = 1 in {
defm FMADD : AForm_1r<63, 29,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
@@ -3251,9 +3342,13 @@ def : Pat<(PPCcall (i32 texternalsym:$dst)),
// Calls for AIX only
def : Pat<(PPCcall (i32 mcsym:$dst)),
(BL mcsym:$dst)>;
+
def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
(BL_NOP mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i32 texternalsym:$dst)),
+ (BL_NOP texternalsym:$dst)>;
+
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -3263,7 +3358,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
(TCRETURNri CTRRC:$dst, imm:$imm)>;
-
+def : Pat<(int_ppc_readflm), (MFFS)>;
// Hi and Lo for Darwin Global Addresses.
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
@@ -3417,7 +3512,7 @@ def : Pat<(f64 (extloadf32 iaddr:$src)),
def : Pat<(f64 (extloadf32 xaddr:$src)),
(COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
-def : Pat<(f64 (fpextend f32:$src)),
+def : Pat<(f64 (any_fpextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
}
@@ -3457,7 +3552,6 @@ include "PPCInstrAltivec.td"
include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
-include "PPCInstrQPX.td"
include "PPCInstrHTM.td"
def crnot : OutPatFrag<(ops node:$in),
@@ -3841,6 +3935,7 @@ def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
(EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+let Predicates = [IsNotISA3_1] in {
// Instantiations of CRNotPat for i32.
defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
(EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
@@ -3898,106 +3993,62 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)),
(EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)),
(EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+}
-let Predicates = [HasFPU] in {
-// Instantiations of CRNotPat for f32.
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
-
-// Instantiations of CRNotPat for f64.
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
-
-// Instantiations of CRNotPat for f128.
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
+multiclass FSetCCPat<SDNode SetCC, ValueType Ty, PatLeaf FCmp> {
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
+
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
}
-// SETCC for f32.
let Predicates = [HasFPU] in {
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+// FCMPU: If either of the operands is a Signaling NaN, then VXSNAN is set.
+// SETCC for f32.
+defm : FSetCCPat<any_fsetcc, f32, FCMPUS>;
// SETCC for f64.
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+defm : FSetCCPat<any_fsetcc, f64, FCMPUD>;
// SETCC for f128.
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETLT)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOGT)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETGT)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOEQ)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
+defm : FSetCCPat<any_fsetcc, f128, XSCMPUQP>;
+
+// FCMPO: If either of the operands is a Signaling NaN, then VXSNAN is set and,
+// if neither operand is a Signaling NaN but at least one operand is a Quiet NaN,
+// then VXVC is set.
+// SETCCS for f32.
+defm : FSetCCPat<strict_fsetccs, f32, FCMPOS>;
+
+// SETCCS for f64.
+defm : FSetCCPat<strict_fsetccs, f64, FCMPOD>;
+// SETCCS for f128.
+defm : FSetCCPat<strict_fsetccs, f128, XSCMPOQP>;
}
// This must be in this file because it relies on patterns defined in this file
@@ -4266,7 +4317,7 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
"icbi $src", IIC_LdStICBI, []>;
-def WAIT : XForm_24_sync<31, 30, (outs), (ins i32imm:$L),
+def WAIT : XForm_24_sync<31, 30, (outs), (ins u2imm:$L),
"wait $L", IIC_LdStLoad, []>;
def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO),
@@ -4284,7 +4335,7 @@ def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB),
def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB),
"mfsrin $RS, $RB", IIC_SprMFSR>;
-def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
+def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, u1imm:$L),
"mtmsr $RS, $L", IIC_SprMTMSR>;
def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS),
@@ -4313,15 +4364,17 @@ def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
"mfmsr $RT", IIC_SprMFMSR, []>;
-def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
+def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, u1imm:$L),
"mtmsrd $RS, $L", IIC_SprMTMSRD>;
def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
"mcrfs $BF, $BFA", IIC_BrMCR>;
+// If W is 0 and BF is 7, the 60:63 bits will be set, we should set the
+// implicit-def RM.
def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
"mtfsfi $BF, $U, $W", IIC_IntMFFS>;
-
+let Defs = [CR1] in
def MTFSFI_rec : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
"mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isRecordForm;
@@ -4329,12 +4382,15 @@ def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>;
def : InstAlias<"mtfsfi. $BF, $U", (MTFSFI_rec crrc:$BF, i32imm:$U, 0)>;
let Predicates = [HasFPU] in {
+let Defs = [RM] in {
def MTFSF : XFLForm_1<63, 711, (outs),
- (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+ (ins i32imm:$FLM, f8rc:$FRB, u1imm:$L, i32imm:$W),
"mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>;
+let Defs = [CR1] in
def MTFSF_rec : XFLForm_1<63, 711, (outs),
- (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+ (ins i32imm:$FLM, f8rc:$FRB, u1imm:$L, i32imm:$W),
"mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isRecordForm;
+}
def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>;
def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSF_rec i32imm:$FLM, f8rc:$FRB, 0, 0)>;
@@ -4561,6 +4617,16 @@ def : Pat<(int_ppc_dcbfl xoaddr:$dst),
def : Pat<(int_ppc_dcbflp xoaddr:$dst),
(DCBF 3, xoaddr:$dst)>;
+let Predicates = [IsISA3_1] in {
+ def DCBFPS : PPCAsmPseudo<"dcbfps $dst", (ins memrr:$dst)>;
+ def DCBSTPS : PPCAsmPseudo<"dcbstps $dst", (ins memrr:$dst)>;
+
+ def : Pat<(int_ppc_dcbfps xoaddr:$dst),
+ (DCBF 4, xoaddr:$dst)>;
+ def : Pat<(int_ppc_dcbstps xoaddr:$dst),
+ (DCBF 6, xoaddr:$dst)>;
+}
+
def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
@@ -4587,8 +4653,11 @@ def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+//Disable this alias on AIX for now because as does not support them.
+let Predicates = [ModernAs] in {
def : InstAlias<"mtudscr $Rx", (MTSPR 3, gprc:$Rx)>;
def : InstAlias<"mfudscr $Rx", (MFSPR gprc:$Rx, 3)>;
+}
def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>;
def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 2bab73418e10..b9eb3b3b7d37 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1,3 +1,8 @@
+// Mask immediates for MMA instructions (2, 4 and 8 bits).
+def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
+def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
+def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
+
//===----------------------------------------------------------------------===//
// PowerPC ISA 3.1 specific type constraints.
//
@@ -5,12 +10,35 @@
def SDT_PPCSplat32 : SDTypeProfile<1, 3, [ SDTCisVT<0, v2i64>,
SDTCisVec<1>, SDTCisInt<2>, SDTCisInt<3>
]>;
+def SDT_PPCAccBuild : SDTypeProfile<1, 4, [
+ SDTCisVT<0, v512i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>,
+ SDTCisVT<3, v4i32>, SDTCisVT<4, v4i32>
+]>;
+def SDT_PPCPairBuild : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v256i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>
+]>;
+def SDT_PPCAccExtractVsx : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v4i32>, SDTCisVT<1, v512i1>, SDTCisInt<2>
+]>;
+def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v4i32>, SDTCisVT<1, v256i1>, SDTCisInt<2>
+]>;
+def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
+]>;
//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
//
def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;
+def PPCAccBuild : SDNode<"PPCISD::ACC_BUILD", SDT_PPCAccBuild, []>;
+def PPCPairBuild : SDNode<"PPCISD::PAIR_BUILD", SDT_PPCPairBuild, []>;
+def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
+ []>;
+def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
+ []>;
+def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
//===----------------------------------------------------------------------===//
@@ -18,6 +46,15 @@ def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;
// address computations).
class isPCRel { bit PCRel = 1; }
+// PowerPC specific type constraints.
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+// PPC Specific DAG Nodes.
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+ [SDNPHasChain, SDNPMayLoad]>;
+
// Top-level class for prefixed instructions.
class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin> : Instruction {
@@ -59,6 +96,39 @@ class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
string BaseName = "";
}
+// VX-Form: [ PO VT R VB RC XO ]
+class VXForm_VTB5_RC<bits<10> xo, bits<5> R, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VT;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VT;
+ let Inst{11-15} = R;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+// Multiclass definition to account for record and non-record form
+// instructions of VXRForm.
+multiclass VXForm_VTB5_RCr<bits<10> xo, bits<5> R, dag OOL, dag IOL,
+ string asmbase, string asmstr,
+ InstrItinClass itin, list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : VXForm_VTB5_RC<xo, R, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)),
+ itin, pattern>, RecFormRel;
+ let Defs = [CR6] in
+ def _rec : VXForm_VTB5_RC<xo, R, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)),
+ itin, []>, isRecordForm, RecFormRel;
+ }
+}
+
class MLS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: PI<1, opcode, OOL, IOL, asmstr, itin> {
@@ -242,29 +312,37 @@ class VXForm_RD5_N3_VB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
}
-// VX-Form: [PO VRT / UIM RB XO].
-// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
-// "/ UIM" (unused bit followed by a 4-bit immediate)
-// Destructive (insert) forms are suffixed with _ins.
-class VXForm_VRT5_UIM5_RB5_ins<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB),
- !strconcat(opc, " $vD, $rB, $UIM"), IIC_VecGeneral, pattern>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
-
// VX-Form: [PO VRT RA VRB XO].
// Destructive (insert) forms are suffixed with _ins.
class VXForm_VTB5_RA5_ins<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, vrrc:$vB),
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, vrrc:$vB),
!strconcat(opc, " $vD, $rA, $vB"), IIC_VecGeneral, pattern>,
RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
// VX-Form: [PO VRT RA RB XO].
// Destructive (insert) forms are suffixed with _ins.
class VXForm_VRT5_RAB5_ins<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, gprc:$rB),
!strconcat(opc, " $vD, $rA, $rB"), IIC_VecGeneral, pattern>,
RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+// VX-Form: [ PO BF // VRA VRB XO ]
+class VXForm_BF3_VAB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
// VN-Form: [PO VRT VRA VRB PS SD XO]
// SD is "Shift Direction"
class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr,
@@ -285,6 +363,22 @@ class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr,
let Inst{26-31} = xo;
}
+class VXForm_RD5_MP_VB5<bits<11> xo, bits<4> eo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RD;
+ bits<5> VB;
+ bit MP;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RD;
+ let Inst{11-14} = eo;
+ let Inst{15} = MP;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
// 8RR:D-Form: [ 1 1 0 // // imm0
// PO T XO TX imm1 ].
class 8RR_DForm_IMM32_XT6<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
@@ -415,6 +509,13 @@ class XX2_BF3_XO5_XB6_XO9<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL,
let Inst{31} = 0;
}
+// X-Form: [ PO RT BI /// XO / ]
+class XForm_XT5_BI5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let B = 0;
+}
+
multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
dag PCRel_IOL, string asmstr,
InstrItinClass itin> {
@@ -444,14 +545,307 @@ multiclass 8LS_DForm_R_SI34_XT6_RA5_p<bits<5> opcode, dag OOL, dag IOL,
isPCRel;
}
+def PPCRegVSRpRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRpRC"; let PredicateMethod = "isVSRpEvenRegNumber";
+}
+
+def vsrprc : RegisterOperand<VSRpRC> {
+ let ParserMatchClass = PPCRegVSRpRCAsmOperand;
+}
+
+def PPCRegVSRpEvenRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRpEvenRC"; let PredicateMethod = "isVSRpEvenRegNumber";
+}
+
+def vsrpevenrc : RegisterOperand<VSRpRC> {
+ let ParserMatchClass = PPCRegVSRpEvenRCAsmOperand;
+ let EncoderMethod = "getVSRpEvenEncoding";
+ let DecoderMethod = "decodeVSRpEvenOperands";
+}
+
+class DQForm_XTp5_RA17_MEM<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> XTp;
+ bits<17> DQ_RA;
+ let Pattern = pattern;
+
+ let Inst{6-9} = XTp{3-0};
+ let Inst{10} = XTp{4};
+ let Inst{11-15} = DQ_RA{16-12}; // Register #
+ let Inst{16-27} = DQ_RA{11-0}; // Displacement.
+ let Inst{28-31} = xo;
+}
+
+class XForm_XTp5_XAB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin>, XFormMemOp {
+ bits<5> XTp;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+ let Inst{6-9} = XTp{3-0};
+ let Inst{10} = XTp{4};
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class 8LS_DForm_R_XTp5_SI34_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> XTp;
+ bits<39> D_RA;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-10} = 0;
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = D_RA{33-16}; // Imm18
+
+ // The instruction.
+ let Inst{38-41} = XTp{3-0};
+ let Inst{42} = XTp{4};
+ let Inst{43-47} = D_RA{38-34}; // Register #
+ let Inst{48-63} = D_RA{15-0}; // D
+}
+
+multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> pref, bits<6> opcode, dag OOL,
+ dag IOL, dag PCRel_IOL,
+ string asmstr, InstrItinClass itin> {
+ def NAME : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>,
+ isPCRel;
+}
+
+def PPCRegACCRCAsmOperand : AsmOperandClass {
+ let Name = "RegACCRC"; let PredicateMethod = "isACCRegNumber";
+}
+
+def acc : RegisterOperand<ACCRC> {
+ let ParserMatchClass = PPCRegACCRCAsmOperand;
+}
+
+def uacc : RegisterOperand<UACCRC> {
+ let ParserMatchClass = PPCRegACCRCAsmOperand;
+}
+
+// [PO AS XO2 XO]
+class XForm_AT3<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = AT;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = xo2;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XX3Form_AT3_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = AT;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class MMIRR_XX3Form_XY4P2_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<4> YMSK;
+ bits<2> PMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-15} = 0;
+ let Inst{16-17} = PMSK;
+ let Inst{18-23} = 0;
+ let Inst{24-27} = XMSK;
+ let Inst{28-31} = YMSK;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
+class MMIRR_XX3Form_XY4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<4> YMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-23} = 0;
+ let Inst{24-27} = XMSK;
+ let Inst{28-31} = YMSK;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
+class MMIRR_XX3Form_X4Y2_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<2> YMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-23} = 0;
+ let Inst{24-27} = XMSK;
+ let Inst{28-29} = YMSK;
+ let Inst{30-31} = 0;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
+class MMIRR_XX3Form_XY4P8_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<4> YMSK;
+ bits<8> PMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-15} = 0;
+ let Inst{16-23} = PMSK;
+ let Inst{24-27} = XMSK;
+ let Inst{28-31} = YMSK;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
+class MMIRR_XX3Form_XYP4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<4> YMSK;
+ bits<4> PMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-15} = 0;
+ let Inst{16-19} = PMSK;
+ let Inst{20-23} = 0;
+ let Inst{24-27} = XMSK;
+ let Inst{28-31} = YMSK;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">;
def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
+def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">;
+def MMA : Predicate<"Subtarget->hasMMA()">;
+
+def RCCp {
+ dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC);
+ dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC);
+}
let Predicates = [PrefixInstrs] in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm PADDI8 :
MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI),
- (ins immZero:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm_pcrel:$SI),
"paddi $RT, $RA, $SI", IIC_LdStLFD>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT),
@@ -461,7 +855,7 @@ let Predicates = [PrefixInstrs] in {
}
defm PADDI :
MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI),
- (ins immZero:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm_pcrel:$SI),
"paddi $RT, $RA, $SI", IIC_LdStLFD>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT),
@@ -592,6 +986,695 @@ let Predicates = [PrefixInstrs] in {
}
}
+// Multiclass definitions for MMA accumulator instructions.
+// ----------------------------------------------------------------------------
+
+// Defines 2 unmasked instructions where the xo field for acc/non-acc version
+// is even/odd.
+multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ let Predicates = [MMA] in {
+ def NAME :
+ XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL,
+ !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PP :
+ XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits.
+// The XO field for acc/non-acc version is even/odd.
+multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4P8_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4P8_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits.
+// The XO field for acc/non-acc version is even/odd.
+multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XYP4_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XYP4_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
+// The XO field for acc/non-acc version is even/odd.
+multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
+// Upper nibble of XO field for acc/non-acc version is 0x4/0x6.
+multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ let Predicates = [MMA] in {
+ def NAME :
+ XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL,
+ !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PP :
+ XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x20), (outs acc:$AT),
+ !con((ins acc:$ATi),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4
+// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA] in {
+ def PN : XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NP : XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NN : XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME#PN :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 5 instructions, unmasked, operand negating.
+// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA] in {
+ def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits.
+// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#PN :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits.
+// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#PN :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// End of class definitions.
+//-----------------------------------------------------------------------------
+
+let Predicates = [MMA] in {
+ def XXMFACC :
+ XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
+ IIC_VecGeneral,
+ [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
+ RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
+ def XXMTACC :
+ XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
+ IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
+ "#KILL_PAIR", []>,
+ RegConstraint<"$XTp = $XSp">;
+ def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS),
+ "#BUILD_UACC $AT, $AS", []>;
+ // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in
+ // the backend. We avoid CSE here because it generates a copy of the acc
+ // register and this copy is more expensive than calling the intrinsic again.
+ let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
+ def XXSETACCZ :
+ XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
+ }
+ def XVI8GER4SPP :
+ XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
+ "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ let mayStore = 1 in {
+ def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst),
+ "#SPILL_ACC", []>;
+ def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst),
+ "#SPILL_UACC", []>;
+ }
+ let mayLoad = 1, hasSideEffects = 0 in {
+ def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src),
+ "#RESTORE_ACC", []>;
+ def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src),
+ "#RESTORE_UACC", []>;
+ }
+}
+
+let Predicates = [MMA, PrefixInstrs] in {
+ def PMXVI8GER4SPP :
+ MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
+ (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK,
+ u4imm:$YMSK, u4imm:$PMSK),
+ "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK",
+ IIC_VecGeneral, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+}
+
+// MMA accumulating/non-accumulating instructions.
+//------------------------------------------------------------------------------
+
+// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN
+// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN
+defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB),
+ "xvbf16ger2", "$AT, $XA, $XB">;
+
+// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP
+defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB),
+ "xvi4ger8", "$AT, $XA, $XB">;
+
+// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP
+defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB),
+ "xvi8ger4", "$AT, $XA, $XB">;
+
+// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP
+defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB),
+ "xvi16ger2", "$AT, $XA, $XB">;
+
+// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP
+defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB),
+ "xvi16ger2s", "$AT, $XA, $XB">;
+
+// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN
+// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN
+defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB),
+ "xvf16ger2", "$AT, $XA, $XB">;
+
+// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP
+// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP
+defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB),
+ "xvf32ger", "$AT, $XA, $XB">;
+
+// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN
+// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN
+defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
+ "xvf64ger", "$AT, $XA, $XB">;
+//------------------------------------------------------------------------------
+
+// MMA Intrinsics
+let Predicates = [MMA] in {
+ def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
+ (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
+ (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
+ (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
+ (XVF64GER $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+}
+
+// MMA Intrinsics
+let Predicates = [MMA, PrefixInstrs] in {
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
+ (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk8Imm:$PMSK)),
+ (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
+ (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk4Imm:$PMSK)),
+ (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)),
+ (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)),
+ (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+}
+
+def Concats {
+ dag VecsToVecPair0 =
+ (v256i1 (INSERT_SUBREG
+ (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1),
+ $vs1, sub_vsx0));
+ dag VecsToVecPair1 =
+ (v256i1 (INSERT_SUBREG
+ (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
+ $vs3, sub_vsx0));
+ dag VecsToVecQuad =
+ (BUILD_UACC (INSERT_SUBREG
+ (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)),
+ (KILL_PAIR VecsToVecPair0), sub_pair0),
+ (KILL_PAIR VecsToVecPair1), sub_pair1));
+}
+
+def Extracts {
+ dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0));
+ dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1));
+ dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0));
+ dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1));
+ dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0));
+ dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1));
+}
+
+let Predicates = [MMA] in {
+ def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
+ (XXMTACC Concats.VecsToVecQuad)>;
+ def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
+ v16i8:$vs3, v16i8:$vs2)),
+ (XXMTACC Concats.VecsToVecQuad)>;
+ def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 0))),
+ Extracts.Vec0>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 1))),
+ Extracts.Vec1>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 2))),
+ Extracts.Vec2>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 3))),
+ Extracts.Vec3>;
+}
+
+let Predicates = [PairedVectorMemops] in {
+ def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)),
+ Concats.VecsToVecPair0>;
+ def : Pat<(v256i1 (int_ppc_vsx_assemble_pair v16i8:$vs1, v16i8:$vs0)),
+ Concats.VecsToVecPair0>;
+ def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 0))),
+ (v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>;
+ def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 1))),
+ (v4i32 (EXTRACT_SUBREG $v, sub_vsx1))>;
+}
+
+let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops] in {
+ def LXVP : DQForm_XTp5_RA17_MEM<6, 0, (outs vsrprc:$XTp),
+ (ins memrix16:$DQ_RA), "lxvp $XTp, $DQ_RA",
+ IIC_LdStLFD, []>;
+ def LXVPX : XForm_XTp5_XAB5<31, 333, (outs vsrprc:$XTp), (ins memrr:$src),
+ "lxvpx $XTp, $src", IIC_LdStLFD,
+ []>;
+}
+
+let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in {
+ def STXVP : DQForm_XTp5_RA17_MEM<6, 1, (outs), (ins vsrprc:$XTp,
+ memrix16:$DQ_RA), "stxvp $XTp, $DQ_RA",
+ IIC_LdStLFD, []>;
+ def STXVPX : XForm_XTp5_XAB5<31, 461, (outs), (ins vsrprc:$XTp, memrr:$dst),
+ "stxvpx $XTp, $dst", IIC_LdStLFD,
+ []>;
+}
+
+let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in {
+ defm PLXVP :
+ 8LS_DForm_R_XTp5_SI34_MEM_p<1, 58, (outs vsrprc:$XTp), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plxvp $XTp, $D_RA",
+ IIC_LdStLFD>;
+}
+
+let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in {
+ defm PSTXVP :
+ 8LS_DForm_R_XTp5_SI34_MEM_p<1, 62, (outs), (ins vsrprc:$XTp, memri34:$D_RA),
+ (ins vsrprc:$XTp, memri34_pcrel:$D_RA),
+ "pstxvp $XTp, $D_RA", IIC_LdStLFD>;
+}
+
+let Predicates = [PairedVectorMemops] in {
+ // Intrinsics for Paired Vector Loads.
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
+ let Predicates = [PairedVectorMemops, PrefixInstrs] in {
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
+ }
+ // Intrinsics for Paired Vector Stores.
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX16:$dst),
+ (STXVP $XSp, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, xaddrX16:$dst),
+ (STXVPX $XSp, xaddrX16:$dst)>;
+ let Predicates = [PairedVectorMemops, PrefixInstrs] in {
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX34:$dst),
+ (PSTXVP $XSp, memri34:$dst)>;
+ }
+}
+
// TODO: We have an added complexity of 500 here. This is only a temporary
// solution to have tablegen consider these patterns first. The way we do
// addressing for PowerPC is complex depending on available D form, X form, or
@@ -753,6 +1836,13 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
// If the PPCmatpcreladdr node is not caught by any other pattern it should be
// caught here and turned into a paddi instruction to materialize the address.
def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+ // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize
+ // tls global address with paddi instruction.
+ def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+ // PPCtlslocalexecmataddr node is used for TLS local exec models to
+ // materialize tls global address with paddi instruction.
+ def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)),
+ (PADDI8 $in, $addr)>;
}
let Predicates = [PrefixInstrs] in {
@@ -797,6 +1887,26 @@ let Predicates = [PrefixInstrs] in {
}
let Predicates = [IsISA3_1] in {
+ def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI),
+ "setbc $RT, $BI", IIC_IntCompare, []>;
+ def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RT), (ins crbitrc:$BI),
+ "setbcr $RT, $BI", IIC_IntCompare, []>;
+ def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RT), (ins crbitrc:$BI),
+ "setnbc $RT, $BI", IIC_IntCompare, []>;
+ def SETNBCR : XForm_XT5_BI5<31, 480, (outs gprc:$RT), (ins crbitrc:$BI),
+ "setnbcr $RT, $BI", IIC_IntCompare, []>;
+
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def SETBC8 : XForm_XT5_BI5<31, 384, (outs g8rc:$RT), (ins crbitrc:$BI),
+ "setbc $RT, $BI", IIC_IntCompare, []>;
+ def SETBCR8 : XForm_XT5_BI5<31, 416, (outs g8rc:$RT), (ins crbitrc:$BI),
+ "setbcr $RT, $BI", IIC_IntCompare, []>;
+ def SETNBC8 : XForm_XT5_BI5<31, 448, (outs g8rc:$RT), (ins crbitrc:$BI),
+ "setnbc $RT, $BI", IIC_IntCompare, []>;
+ def SETNBCR8 : XForm_XT5_BI5<31, 480, (outs g8rc:$RT), (ins crbitrc:$BI),
+ "setnbcr $RT, $BI", IIC_IntCompare, []>;
+ }
+
def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT),
(ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
"vsldbi $VRT, $VRA, $VRB, $SH",
@@ -813,87 +1923,254 @@ let Predicates = [IsISA3_1] in {
(int_ppc_altivec_vsrdbi v16i8:$VRA,
v16i8:$VRB,
i32:$SH))]>;
- def VINSW :
- VXForm_VRT5_UIM5_RB5_ins<207, "vinsw",
- [(set v4i32:$vD,
- (int_ppc_altivec_vinsw v4i32:$vDi, i64:$rB,
- timm:$UIM))]>;
+ defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstribr", "$vT, $vB", IIC_VecGeneral,
+ [(set v16i8:$vT,
+ (int_ppc_altivec_vstribr v16i8:$vB))]>;
+ defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstribl", "$vT, $vB", IIC_VecGeneral,
+ [(set v16i8:$vT,
+ (int_ppc_altivec_vstribl v16i8:$vB))]>;
+ defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstrihr", "$vT, $vB", IIC_VecGeneral,
+ [(set v8i16:$vT,
+ (int_ppc_altivec_vstrihr v8i16:$vB))]>;
+ defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstrihl", "$vT, $vB", IIC_VecGeneral,
+ [(set v8i16:$vT,
+ (int_ppc_altivec_vstrihl v8i16:$vB))]>;
+ def VINSW :
+ VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB),
+ "vinsw $vD, $rB, $UIM", IIC_VecGeneral,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinsw v4i32:$vDi, i32:$rB, timm:$UIM))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSD :
- VXForm_VRT5_UIM5_RB5_ins<463, "vinsd",
- [(set v2i64:$vD,
- (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB,
- timm:$UIM))]>;
+ VXForm_1<463, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB),
+ "vinsd $vD, $rB, $UIM", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, timm:$UIM))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSBVLX :
VXForm_VTB5_RA5_ins<15, "vinsbvlx",
[(set v16i8:$vD,
- (int_ppc_altivec_vinsbvlx v16i8:$vDi, i64:$rA,
+ (int_ppc_altivec_vinsbvlx v16i8:$vDi, i32:$rA,
v16i8:$vB))]>;
def VINSBVRX :
VXForm_VTB5_RA5_ins<271, "vinsbvrx",
[(set v16i8:$vD,
- (int_ppc_altivec_vinsbvrx v16i8:$vDi, i64:$rA,
+ (int_ppc_altivec_vinsbvrx v16i8:$vDi, i32:$rA,
v16i8:$vB))]>;
def VINSHVLX :
VXForm_VTB5_RA5_ins<79, "vinshvlx",
[(set v8i16:$vD,
- (int_ppc_altivec_vinshvlx v8i16:$vDi, i64:$rA,
+ (int_ppc_altivec_vinshvlx v8i16:$vDi, i32:$rA,
v8i16:$vB))]>;
def VINSHVRX :
VXForm_VTB5_RA5_ins<335, "vinshvrx",
[(set v8i16:$vD,
- (int_ppc_altivec_vinshvrx v8i16:$vDi, i64:$rA,
+ (int_ppc_altivec_vinshvrx v8i16:$vDi, i32:$rA,
v8i16:$vB))]>;
def VINSWVLX :
VXForm_VTB5_RA5_ins<143, "vinswvlx",
[(set v4i32:$vD,
- (int_ppc_altivec_vinswvlx v4i32:$vDi, i64:$rA,
+ (int_ppc_altivec_vinswvlx v4i32:$vDi, i32:$rA,
v4i32:$vB))]>;
def VINSWVRX :
VXForm_VTB5_RA5_ins<399, "vinswvrx",
[(set v4i32:$vD,
- (int_ppc_altivec_vinswvrx v4i32:$vDi, i64:$rA,
+ (int_ppc_altivec_vinswvrx v4i32:$vDi, i32:$rA,
v4i32:$vB))]>;
def VINSBLX :
VXForm_VRT5_RAB5_ins<527, "vinsblx",
[(set v16i8:$vD,
- (int_ppc_altivec_vinsblx v16i8:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinsblx v16i8:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSBRX :
VXForm_VRT5_RAB5_ins<783, "vinsbrx",
[(set v16i8:$vD,
- (int_ppc_altivec_vinsbrx v16i8:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinsbrx v16i8:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSHLX :
VXForm_VRT5_RAB5_ins<591, "vinshlx",
[(set v8i16:$vD,
- (int_ppc_altivec_vinshlx v8i16:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinshlx v8i16:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSHRX :
VXForm_VRT5_RAB5_ins<847, "vinshrx",
[(set v8i16:$vD,
- (int_ppc_altivec_vinshrx v8i16:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinshrx v8i16:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSWLX :
VXForm_VRT5_RAB5_ins<655, "vinswlx",
[(set v4i32:$vD,
- (int_ppc_altivec_vinswlx v4i32:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinswlx v4i32:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSWRX :
VXForm_VRT5_RAB5_ins<911, "vinswrx",
[(set v4i32:$vD,
- (int_ppc_altivec_vinswrx v4i32:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinswrx v4i32:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSDLX :
- VXForm_VRT5_RAB5_ins<719, "vinsdlx",
- [(set v2i64:$vD,
- (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA,
- i64:$rB))]>;
+ VXForm_1<719, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+ "vinsdlx $vD, $rA, $rB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, i64:$rB))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSDRX :
- VXForm_VRT5_RAB5_ins<975, "vinsdrx",
- [(set v2i64:$vD,
- (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA,
- i64:$rB))]>;
-
+ VXForm_1<975, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+ "vinsdrx $vD, $rA, $rB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, i64:$rB))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextractbm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextractbm v16i8:$vB))]>;
+ def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextracthm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextracthm v8i16:$vB))]>;
+ def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextractwm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextractwm v4i32:$vB))]>;
+ def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextractdm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextractdm v2i64:$vB))]>;
+ def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextractqm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextractqm v1i128:$vB))]>;
+ def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpandbm $vD, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (int_ppc_altivec_vexpandbm
+ v16i8:$vB))]>;
+ def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpandhm $vD, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (int_ppc_altivec_vexpandhm
+ v8i16:$vB))]>;
+ def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpandwm $vD, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (int_ppc_altivec_vexpandwm
+ v4i32:$vB))]>;
+ def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpanddm $vD, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (int_ppc_altivec_vexpanddm
+ v2i64:$vB))]>;
+ def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpandqm $vD, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vexpandqm
+ v1i128:$vB))]>;
+ def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrbm $vD, $rB", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_mtvsrbm i64:$rB))]>;
+ def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrhm $vD, $rB", IIC_VecGeneral,
+ [(set v8i16:$vD,
+ (int_ppc_altivec_mtvsrhm i64:$rB))]>;
+ def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrwm $vD, $rB", IIC_VecGeneral,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_mtvsrwm i64:$rB))]>;
+ def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrdm $vD, $rB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_mtvsrdm i64:$rB))]>;
+ def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrqm $vD, $rB", IIC_VecGeneral,
+ [(set v1i128:$vD,
+ (int_ppc_altivec_mtvsrqm i64:$rB))]>;
+ def MTVSRBMI : DXForm<4, 10, (outs vrrc:$vD), (ins u16imm64:$D),
+ "mtvsrbmi $vD, $D", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_mtvsrbm imm:$D))]>;
+ def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD),
+ (ins vrrc:$vB, u1imm:$MP),
+ "vcntmbb $rD, $vB, $MP", IIC_VecGeneral,
+ [(set i64:$rD, (int_ppc_altivec_vcntmbb
+ v16i8:$vB, timm:$MP))]>;
+ def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD),
+ (ins vrrc:$vB, u1imm:$MP),
+ "vcntmbh $rD, $vB, $MP", IIC_VecGeneral,
+ [(set i64:$rD, (int_ppc_altivec_vcntmbh
+ v8i16:$vB, timm:$MP))]>;
+ def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD),
+ (ins vrrc:$vB, u1imm:$MP),
+ "vcntmbw $rD, $vB, $MP", IIC_VecGeneral,
+ [(set i64:$rD, (int_ppc_altivec_vcntmbw
+ v4i32:$vB, timm:$MP))]>;
+ def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD),
+ (ins vrrc:$vB, u1imm:$MP),
+ "vcntmbd $rD, $vB, $MP", IIC_VecGeneral,
+ [(set i64:$rD, (int_ppc_altivec_vcntmbd
+ v2i64:$vB, timm:$MP))]>;
+ def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextdubvlx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextdubvlx v16i8:$vA,
+ v16i8:$vB,
+ i32:$rC))]>;
+ def VEXTDUBVRX : VAForm_1a<25, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextdubvrx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextdubvrx v16i8:$vA,
+ v16i8:$vB,
+ i32:$rC))]>;
+ def VEXTDUHVLX : VAForm_1a<26, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextduhvlx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextduhvlx v8i16:$vA,
+ v8i16:$vB,
+ i32:$rC))]>;
+ def VEXTDUHVRX : VAForm_1a<27, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextduhvrx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextduhvrx v8i16:$vA,
+ v8i16:$vB,
+ i32:$rC))]>;
+ def VEXTDUWVLX : VAForm_1a<28, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextduwvlx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextduwvlx v4i32:$vA,
+ v4i32:$vB,
+ i32:$rC))]>;
+ def VEXTDUWVRX : VAForm_1a<29, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextduwvrx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextduwvrx v4i32:$vA,
+ v4i32:$vB,
+ i32:$rC))]>;
+ def VEXTDDVLX : VAForm_1a<30, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextddvlx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextddvlx v2i64:$vA,
+ v2i64:$vB,
+ i32:$rC))]>;
+ def VEXTDDVRX : VAForm_1a<31, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextddvrx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextddvrx v2i64:$vA,
+ v2i64:$vB,
+ i32:$rC))]>;
def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpdepd $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD,
@@ -961,7 +2238,61 @@ let Predicates = [IsISA3_1] in {
"vclrrb $vD, $vA, $rB", IIC_VecGeneral,
[(set v16i8:$vD,
(int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>;
-
+ def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulld $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>;
+ def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulhsw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>;
+ def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulhuw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>;
+ def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulhsd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>;
+ def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulhud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>;
+ def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmodsw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>;
+ def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmoduw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>;
+ def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmodsd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>;
+ def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmodud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>;
+ def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivsw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>;
+ def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivuw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>;
+ def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivsd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>;
+ def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>;
+ def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivesw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (int_ppc_altivec_vdivesw v4i32:$vA,
+ v4i32:$vB))]>;
+ def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdiveuw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (int_ppc_altivec_vdiveuw v4i32:$vA,
+ v4i32:$vB))]>;
+ def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivesd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (int_ppc_altivec_vdivesd v2i64:$vA,
+ v2i64:$vB))]>;
+ def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdiveud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (int_ppc_altivec_vdiveud v2i64:$vA,
+ v2i64:$vB))]>;
def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
"xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
@@ -980,10 +2311,204 @@ let Predicates = [IsISA3_1] in {
def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>;
def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>;
}
+
+ def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulesd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA,
+ v2i64:$vB))]>;
+ def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmuleud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA,
+ v2i64:$vB))]>;
+ def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulosd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA,
+ v2i64:$vB))]>;
+ def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmuloud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA,
+ v2i64:$vB))]>;
+ def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+ "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmsumcud
+ v2i64:$vA, v2i64:$vB, v1i128:$vC))]>;
+ def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivsq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>;
+ def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivuq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>;
+ def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivesq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vdivesq v1i128:$vA,
+ v1i128:$vB))]>;
+ def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdiveuq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vdiveuq v1i128:$vA,
+ v1i128:$vB))]>;
+ def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>;
+ def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>;
+ def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>;
+ def VCMPEQUQ_rec : VCMP_rec <455, "vcmpequq. $vD, $vA, $vB" , v1i128>;
+ def VCMPGTSQ_rec : VCMP_rec <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>;
+ def VCMPGTUQ_rec : VCMP_rec <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>;
+ def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmodsq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>;
+ def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmoduq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>;
+ def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vextsd2q $vD, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>;
+ def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
+ "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
+ def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
+ "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
+ def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
+ [(set v1i128:$vD,
+ (int_ppc_altivec_vrlqnm v1i128:$vA,
+ v1i128:$vB))]>;
+ def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+ "vrlqmi $vD, $vA, $vB", IIC_VecFP,
+ [(set v1i128:$vD,
+ (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
+ v1i128:$vDi))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
+ def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
+ def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
+ def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
+ def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
+ def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
+ def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
+ def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
+}
+
+let Predicates = [IsISA3_1, HasVSX] in {
+ def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
+ def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
+}
+
+// Multiclass defining patterns for Set Boolean Extension Reverse Instructions.
+// This is analogous to the CRNotPat multiclass but specifically for Power10
+// and newer subtargets since the extended forms use Set Boolean instructions.
+// The first two anonymous patterns defined are actually a duplicate of those
+// in CRNotPat, but it is preferable to define both multiclasses as complete
+// ones rather than pulling that small common section out.
+multiclass P10ReverseSetBool<dag pattern, dag result> {
+ def : Pat<pattern, (crnot result)>;
+ def : Pat<(not pattern), result>;
+
+ def : Pat<(i32 (zext pattern)),
+ (SETBCR result)>;
+ def : Pat<(i64 (zext pattern)),
+ (SETBCR8 result)>;
+
+ def : Pat<(i32 (sext pattern)),
+ (SETNBCR result)>;
+ def : Pat<(i64 (sext pattern)),
+ (SETNBCR8 result)>;
+
+ def : Pat<(i32 (anyext pattern)),
+ (SETBCR result)>;
+ def : Pat<(i64 (anyext pattern)),
+ (SETBCR8 result)>;
+}
+
+multiclass IntSetP10RevSetBool<SDNode SetCC, ValueType Ty, ImmLeaf ZExtTy,
+ ImmLeaf SExtTy, PatLeaf Cmpi, PatLeaf Cmpli,
+ PatLeaf Cmp, PatLeaf Cmpl> {
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)),
+ (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)),
+ (EXTRACT_SUBREG (Cmp $s1, $s2), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)),
+ (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)),
+ (EXTRACT_SUBREG (Cmp $s1, $s2), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)),
+ (EXTRACT_SUBREG (Cmp $s1, $s2), sub_eq)>;
+
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETUGE)),
+ (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETGE)),
+ (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETULE)),
+ (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETLE)),
+ (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETNE)),
+ (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_eq)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETNE)),
+ (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_eq)>;
+}
+
+multiclass FSetP10RevSetBool<SDNode SetCC, ValueType Ty, PatLeaf FCmp> {
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
+}
+
+let Predicates = [IsISA3_1] in {
+ def : Pat<(i32 (zext i1:$in)),
+ (SETBC $in)>;
+ def : Pat<(i64 (zext i1:$in)),
+ (SETBC8 $in)>;
+ def : Pat<(i32 (sext i1:$in)),
+ (SETNBC $in)>;
+ def : Pat<(i64 (sext i1:$in)),
+ (SETNBC8 $in)>;
+ def : Pat<(i32 (anyext i1:$in)),
+ (SETBC $in)>;
+ def : Pat<(i64 (anyext i1:$in)),
+ (SETBC8 $in)>;
+
+ // Instantiation of the set boolean reverse patterns for 32-bit integers.
+ defm : IntSetP10RevSetBool<setcc, i32, immZExt16, imm32SExt16,
+ CMPWI, CMPLWI, CMPW, CMPLW>;
+ defm : P10ReverseSetBool<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+ // Instantiation of the set boolean reverse patterns for 64-bit integers.
+ defm : IntSetP10RevSetBool<setcc, i64, immZExt16, imm64SExt16,
+ CMPDI, CMPLDI, CMPD, CMPLD>;
+ defm : P10ReverseSetBool<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+}
+
+// Instantiation of the set boolean reverse patterns for f32, f64, f128.
+let Predicates = [IsISA3_1, HasFPU] in {
+ defm : FSetP10RevSetBool<setcc, f32, FCMPUS>;
+ defm : FSetP10RevSetBool<setcc, f64, FCMPUD>;
+ defm : FSetP10RevSetBool<setcc, f128, XSCMPUQP>;
}
//---------------------------- Anonymous Patterns ----------------------------//
let Predicates = [IsISA3_1] in {
+ // Exploit the vector multiply high instructions using intrinsics.
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VMULHSW $vA, $vB))>;
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VMULHUW $vA, $vB))>;
+ def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VMULHSD $vA, $vB))>;
+ def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VMULHUD $vA, $vB))>;
def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)),
(v16i8 (COPY_TO_REGCLASS (XXGENPCVBM $VRB, imm:$IMM), VRRC))>;
def : Pat<(v8i16 (int_ppc_vsx_xxgenpcvhm v8i16:$VRB, imm:$IMM)),
@@ -992,12 +2517,82 @@ let Predicates = [IsISA3_1] in {
(v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>;
def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)),
(v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>;
- def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, -1)),
+ def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 1)),
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
+
+ def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
+
+ def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)),
+ (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>;
+}
+
+let Predicates = [IsISA3_1, HasVSX] in {
+ def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)),
+ (COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>;
+ def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)),
+ (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;
}
+let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
+ // Store element 0 of a VSX register to memory
+ def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst),
+ (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>;
+ def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst),
+ (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>;
+ def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst),
+ (STXVRWX $src, xoaddr:$dst)>;
+ def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst),
+ (STXVRWX $src, xoaddr:$dst)>;
+ def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst),
+ (STXVRDX $src, xoaddr:$dst)>;
+ def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst),
+ (STXVRDX $src, xoaddr:$dst)>;
+ }
+
+// FIXME: The swap is overkill when the shift amount is a constant.
+// We should just fix the constant in the DAG.
+let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
+ def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSLQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSLQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSRQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSRQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSRAQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSRAQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+}
+
+class xxevalPattern <dag pattern, bits<8> imm> :
+ Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
+
let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
i32immNonAllOneNonZero:$A,
@@ -1010,6 +2605,44 @@ let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
def : Pat<(f64 nzFPImmAsi32:$A),
(COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
VSFRC)>;
+
+ // Anonymous patterns for XXEVAL
+ // AND
+ // and(A, B, C)
+ def : xxevalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
+ // and(A, xor(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
+ // and(A, or(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
+ // and(A, nor(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (or v4i32:$vB, v4i32:$vC))),
+ 8>;
+ // and(A, eqv(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (xor v4i32:$vB, v4i32:$vC))),
+ 9>;
+ // and(A, nand(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (and v4i32:$vB, v4i32:$vC))),
+ 14>;
+
+ // NAND
+ // nand(A, B, C)
+ def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
+ !sub(255, 1)>;
+ // nand(A, xor(B, C))
+ def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
+ !sub(255, 6)>;
+ // nand(A, or(B, C))
+ def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
+ !sub(255, 7)>;
+ // nand(A, nor(B, C))
+ def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
+ !sub(255, 8)>;
+ // nand(A, eqv(B, C))
+ def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
+ !sub(255, 9)>;
+ // nand(A, nand(B, C))
+ def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
+ !sub(255, 14)>;
}
let Predicates = [PrefixInstrs] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td
deleted file mode 100644
index 2265af2815cb..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ /dev/null
@@ -1,1212 +0,0 @@
-//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the QPX extension to the PowerPC instruction set.
-// Reference:
-// Book Q: QPX Architecture Definition. IBM (as updated in) 2011.
-//
-//===----------------------------------------------------------------------===//
-
-def PPCRegQFRCAsmOperand : AsmOperandClass {
- let Name = "RegQFRC"; let PredicateMethod = "isRegNumber";
-}
-def qfrc : RegisterOperand<QFRC> {
- let ParserMatchClass = PPCRegQFRCAsmOperand;
-}
-def PPCRegQSRCAsmOperand : AsmOperandClass {
- let Name = "RegQSRC"; let PredicateMethod = "isRegNumber";
-}
-def qsrc : RegisterOperand<QSRC> {
- let ParserMatchClass = PPCRegQSRCAsmOperand;
-}
-def PPCRegQBRCAsmOperand : AsmOperandClass {
- let Name = "RegQBRC"; let PredicateMethod = "isRegNumber";
-}
-def qbrc : RegisterOperand<QBRC> {
- let ParserMatchClass = PPCRegQBRCAsmOperand;
-}
-
-//===----------------------------------------------------------------------===//
-// Helpers for defining instructions that directly correspond to intrinsics.
-
-// QPXA1_Int - A AForm_1 intrinsic definition.
-class QPXA1_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
- !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_FPFused,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
-// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions).
-class QPXA1s_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
- !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_VecPerm,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
-// QPXA2_Int - A AForm_2 intrinsic definition.
-class QPXA2_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_2<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPGeneral,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
-// QPXA3_Int - A AForm_3 intrinsic definition.
-class QPXA3_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_3<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
- !strconcat(opc, " $FRT, $FRA, $FRC"), IIC_FPGeneral,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRC))]>;
-// QPXA4_Int - A AForm_4a intrinsic definition.
-class QPXA4_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_4a<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
- !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
- [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
-// QPXX18_Int - A XForm_18 intrinsic definition.
-class QPXX18_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
- : XForm_18<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPCompare,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
-// QPXX19_Int - A XForm_19 intrinsic definition.
-class QPXX19_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
- : XForm_19<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
- !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
- [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
-
-//===----------------------------------------------------------------------===//
-// Pattern Frags.
-
-def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4f32;
-}]>;
-
-def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
- (truncstore node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
-}]>;
-def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset),
- (pre_truncst node:$val,
- node:$base, node:$offset), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
-}]>;
-
-def fround_inexact : PatFrag<(ops node:$val), (fpround node:$val), [{
- return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0;
-}]>;
-
-def fround_exact : PatFrag<(ops node:$val), (fpround node:$val), [{
- return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1;
-}]>;
-
-let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
- def u12 : ImmLeaf<i32, [{ return (Imm & 0xFFF) == Imm; }]>;
-
-//===----------------------------------------------------------------------===//
-// Instruction Definitions.
-
-def HasQPX : Predicate<"Subtarget->hasQPX()">;
-let Predicates = [HasQPX] in {
-let DecoderNamespace = "QPX" in {
-let hasSideEffects = 0 in { // QPX instructions don't have side effects.
-let Uses = [RM] in {
- // Add Instructions
- let isCommutable = 1 in {
- def QVFADD : AForm_2<4, 21,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>;
- def QVFADDSs : AForm_2<0, 21,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>;
- }
- def QVFSUB : AForm_2<4, 20,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>;
- def QVFSUBSs : AForm_2<0, 20,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>;
-
- // Estimate Instructions
- def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfre $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>;
- def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>;
- let isCodeGenOnly = 1 in
- def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfres $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>;
-
- def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfrsqrte $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>;
- def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>;
- let isCodeGenOnly = 1 in
- def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>;
-
- // Multiply Instructions
- let isCommutable = 1 in {
- def QVFMUL : AForm_3<4, 25,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
- "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral,
- [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>;
- let isCodeGenOnly = 1 in
- def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>;
- def QVFMULSs : AForm_3<0, 25,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC),
- "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral,
- [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>;
- }
- def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>;
- def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>;
-
- // Multiply-add instructions
- def QVFMADD : AForm_1<4, 29,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
- "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
- def QVFMADDSs : AForm_1<0, 29,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
- "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
- def QVFNMADD : AForm_1<4, 31,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
- "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
- v4f64:$FRB)))]>;
- let isCodeGenOnly = 1 in
- def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
- def QVFNMADDSs : AForm_1<0, 31,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
- "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
- v4f32:$FRB)))]>;
- def QVFMSUB : AForm_1<4, 28,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
- "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
- (fneg v4f64:$FRB)))]>;
- let isCodeGenOnly = 1 in
- def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
- def QVFMSUBSs : AForm_1<0, 28,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
- "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
- (fneg v4f32:$FRB)))]>;
- def QVFNMSUB : AForm_1<4, 30,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
- "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
- (fneg v4f64:$FRB))))]>;
- let isCodeGenOnly = 1 in
- def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
- def QVFNMSUBSs : AForm_1<0, 30,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
- "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
- (fneg v4f32:$FRB))))]>;
- def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>;
- def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>;
- def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>;
- def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>;
- def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>;
- def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>;
- def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>;
- def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>;
-
- // Select Instruction
- let isCodeGenOnly = 1 in
- def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>;
- def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT),
- (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC),
- "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (vselect v4i1:$FRA,
- v4f64:$FRC, v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT),
- (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC),
- "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (vselect v4i1:$FRA,
- v4f32:$FRC, v4f32:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT),
- (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC),
- "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
- [(set v4i1:$FRT, (vselect v4i1:$FRA,
- v4i1:$FRC, v4i1:$FRB))]>;
-
- // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
- // instruction selection into a branch sequence.
- def SELECT_CC_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QFRC",
- []>;
- def SELECT_CC_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QSRC",
- []>;
- def SELECT_CC_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QBRC",
- []>;
-
- // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
- // register bit directly.
- def SELECT_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
- qfrc:$T, qfrc:$F), "#SELECT_QFRC",
- [(set v4f64:$dst,
- (select i1:$cond, v4f64:$T, v4f64:$F))]>;
- def SELECT_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
- qsrc:$T, qsrc:$F), "#SELECT_QSRC",
- [(set v4f32:$dst,
- (select i1:$cond, v4f32:$T, v4f32:$F))]>;
- def SELECT_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
- qbrc:$T, qbrc:$F), "#SELECT_QBRC",
- [(set v4i1:$dst,
- (select i1:$cond, v4i1:$T, v4i1:$F))]>;
-
- // Convert and Round Instructions
- def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
- let isCodeGenOnly = 1 in
- def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB),
- "qvfctid $FRT, $FRB", IIC_FPGeneral, []>;
-
- def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>;
- def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>;
- def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>;
- def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>;
- def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>;
- def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>;
- def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>;
- def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>;
- let isCodeGenOnly = 1 in
- def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB),
- "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>;
-
- def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>;
- def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>;
- def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>;
-
- let isCodeGenOnly = 1 in
- def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>;
- def QVFRSPs : XForm_19<4, 12,
- (outs qsrc:$FRT), (ins qfrc:$FRB),
- "qvfrsp $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>;
-
- def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfriz $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfriz $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>;
-
- def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfrin $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (fround v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfrin $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fround v4f32:$FRB))]>;
-
- def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfrip $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (fceil v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfrip $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fceil v4f32:$FRB))]>;
-
- def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfrim $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfrim $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>;
-
- // Move Instructions
- def QVFMR : XForm_19<4, 72,
- (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfmr $FRT, $FRB", IIC_VecPerm,
- [/* (set v4f64:$FRT, v4f64:$FRB) */]>;
- let isCodeGenOnly = 1 in {
- def QVFMRs : XForm_19<4, 72,
- (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfmr $FRT, $FRB", IIC_VecPerm,
- [/* (set v4f32:$FRT, v4f32:$FRB) */]>;
- def QVFMRb : XForm_19<4, 72,
- (outs qbrc:$FRT), (ins qbrc:$FRB),
- "qvfmr $FRT, $FRB", IIC_VecPerm,
- [/* (set v4i1:$FRT, v4i1:$FRB) */]>;
- }
- def QVFNEG : XForm_19<4, 40,
- (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfneg $FRT, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (fneg v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFNEGs : XForm_19<4, 40,
- (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfneg $FRT, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (fneg v4f32:$FRB))]>;
- def QVFABS : XForm_19<4, 264,
- (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfabs $FRT, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (fabs v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFABSs : XForm_19<4, 264,
- (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfabs $FRT, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (fabs v4f32:$FRB))]>;
- def QVFNABS : XForm_19<4, 136,
- (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfnabs $FRT, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>;
- let isCodeGenOnly = 1 in
- def QVFNABSs : XForm_19<4, 136,
- (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfnabs $FRT, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>;
- def QVFCPSGN : XForm_18<4, 8,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>;
- let isCodeGenOnly = 1 in
- def QVFCPSGNs : XForm_18<4, 8,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>;
-
- def QVALIGNI : Z23Form_1<4, 5,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx),
- "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
- [(set v4f64:$FRT,
- (PPCqvaligni v4f64:$FRA, v4f64:$FRB,
- (i32 imm:$idx)))]>;
- let isCodeGenOnly = 1 in
- def QVALIGNIs : Z23Form_1<4, 5,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx),
- "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
- [(set v4f32:$FRT,
- (PPCqvaligni v4f32:$FRA, v4f32:$FRB,
- (i32 imm:$idx)))]>;
- let isCodeGenOnly = 1 in
- def QVALIGNIb : Z23Form_1<4, 5,
- (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx),
- "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
- [(set v4i1:$FRT,
- (PPCqvaligni v4i1:$FRA, v4i1:$FRB,
- (i32 imm:$idx)))]>;
-
- def QVESPLATI : Z23Form_2<4, 37,
- (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx),
- "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
- [(set v4f64:$FRT,
- (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>;
- let isCodeGenOnly = 1 in
- def QVESPLATIs : Z23Form_2<4, 37,
- (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx),
- "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
- [(set v4f32:$FRT,
- (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>;
- let isCodeGenOnly = 1 in
- def QVESPLATIb : Z23Form_2<4, 37,
- (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx),
- "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
- [(set v4i1:$FRT,
- (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>;
-
- def QVFPERM : AForm_1<4, 6,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
- "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
- [(set v4f64:$FRT,
- (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
- let isCodeGenOnly = 1 in
- def QVFPERMs : AForm_1<4, 6,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC),
- "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
- [(set v4f32:$FRT,
- (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>;
-
- let isReMaterializable = 1, isAsCheapAsAMove = 1 in
- def QVGPCI : Z23Form_3<4, 133,
- (outs qfrc:$FRT), (ins u12imm:$idx),
- "qvgpci $FRT, $idx", IIC_VecPerm,
- [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>;
-
- // Compare Instruction
- let isCodeGenOnly = 1 in
- def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>;
- def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>;
- let isCodeGenOnly = 1 in
- def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>;
- def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>;
- def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>;
- def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>;
-
- let isCodeGenOnly = 1 in
- def QVFLOGICAL : XForm_20<4, 4,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt),
- "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
- def QVFLOGICALb : XForm_20<4, 4,
- (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
- "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
- let isCodeGenOnly = 1 in
- def QVFLOGICALs : XForm_20<4, 4,
- (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
- "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
-
- // Load indexed instructions
- let mayLoad = 1 in {
- def QVLFDX : XForm_1_memOp<31, 583,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfdx $FRT, $src", IIC_LdStLFD,
- [(set v4f64:$FRT, (load xoaddr:$src))]>;
- let isCodeGenOnly = 1 in
- def QVLFDXb : XForm_1_memOp<31, 583,
- (outs qbrc:$FRT), (ins memrr:$src),
- "qvlfdx $FRT, $src", IIC_LdStLFD, []>;
-
- let RC = 1 in
- def QVLFDXA : XForm_1<31, 583,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfdxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFDUX : XForm_1<31, 615,
- (outs qfrc:$FRT, ptr_rc_nor0:$ea_result),
- (ins memrr:$src),
- "qvlfdux $FRT, $src", IIC_LdStLFDU, []>,
- RegConstraint<"$src.ptrreg = $ea_result">,
- NoEncode<"$ea_result">;
- let RC = 1 in
- def QVLFDUXA : XForm_1<31, 615,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfduxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFSX : XForm_1_memOp<31, 519,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfsx $FRT, $src", IIC_LdStLFD,
- [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>;
-
- let isCodeGenOnly = 1 in
- def QVLFSXb : XForm_1<31, 519,
- (outs qbrc:$FRT), (ins memrr:$src),
- "qvlfsx $FRT, $src", IIC_LdStLFD,
- [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>;
- let isCodeGenOnly = 1 in
- def QVLFSXs : XForm_1_memOp<31, 519,
- (outs qsrc:$FRT), (ins memrr:$src),
- "qvlfsx $FRT, $src", IIC_LdStLFD,
- [(set v4f32:$FRT, (load xoaddr:$src))]>;
-
- let RC = 1 in
- def QVLFSXA : XForm_1<31, 519,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfsxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFSUX : XForm_1<31, 551,
- (outs qsrc:$FRT, ptr_rc_nor0:$ea_result),
- (ins memrr:$src),
- "qvlfsux $FRT, $src", IIC_LdStLFDU, []>,
- RegConstraint<"$src.ptrreg = $ea_result">,
- NoEncode<"$ea_result">;
-
- let RC = 1 in
- def QVLFSUXA : XForm_1<31, 551,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFCDX : XForm_1<31, 71,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcdx $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFCDXA : XForm_1<31, 71,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFCDUX : XForm_1<31, 103,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcdux $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFCDUXA : XForm_1<31, 103,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFCSX : XForm_1<31, 7,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
- let isCodeGenOnly = 1 in
- def QVLFCSXs : XForm_1<31, 7,
- (outs qsrc:$FRT), (ins memrr:$src),
- "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
-
- let RC = 1 in
- def QVLFCSXA : XForm_1<31, 7,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFCSUX : XForm_1<31, 39,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcsux $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFCSUXA : XForm_1<31, 39,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFIWAX : XForm_1<31, 871,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfiwax $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFIWAXA : XForm_1<31, 871,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFIWZX : XForm_1<31, 839,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFIWZXA : XForm_1<31, 839,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>;
- }
-
-
- def QVLPCLDX : XForm_1<31, 582,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlpcldx $FRT, $src", IIC_LdStLFD, []>;
- def QVLPCLSX : XForm_1<31, 518,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlpclsx $FRT, $src", IIC_LdStLFD, []>;
- let isCodeGenOnly = 1 in
- def QVLPCLSXint : XForm_11<31, 518,
- (outs qfrc:$FRT), (ins G8RC:$src),
- "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>;
- def QVLPCRDX : XForm_1<31, 70,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>;
- def QVLPCRSX : XForm_1<31, 6,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>;
-
- // Store indexed instructions
- let mayStore = 1 in {
- def QVSTFDX : XForm_8_memOp<31, 711,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfdx $FRT, $dst", IIC_LdStSTFD,
- [(store qfrc:$FRT, xoaddr:$dst)]>;
- let isCodeGenOnly = 1 in
- def QVSTFDXb : XForm_8_memOp<31, 711,
- (outs), (ins qbrc:$FRT, memrr:$dst),
- "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>;
-
- let RC = 1 in
- def QVSTFDXA : XForm_8<31, 711,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res),
- (ins qfrc:$FRT, memrr:$dst),
- "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
- NoEncode<"$ea_res">;
-
- let RC = 1 in
- def QVSTFDUXA : XForm_8<31, 743,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFDXI : XForm_8<31, 709,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFDXIA : XForm_8<31, 709,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFDUXI : XForm_8<31, 741,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFDUXIA : XForm_8<31, 741,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFSX : XForm_8_memOp<31, 647,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsx $FRT, $dst", IIC_LdStSTFD,
- [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>;
- let isCodeGenOnly = 1 in
- def QVSTFSXs : XForm_8_memOp<31, 647,
- (outs), (ins qsrc:$FRT, memrr:$dst),
- "qvstfsx $FRT, $dst", IIC_LdStSTFD,
- [(store qsrc:$FRT, xoaddr:$dst)]>;
-
- let RC = 1 in
- def QVSTFSXA : XForm_8<31, 647,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
- (ins qsrc:$FRT, memrr:$dst),
- "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
- NoEncode<"$ea_res">;
- let isCodeGenOnly = 1 in
- def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
- (ins qfrc:$FRT, memrr:$dst),
- "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
- NoEncode<"$ea_res">;
-
- let RC = 1 in
- def QVSTFSUXA : XForm_8<31, 679,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFSXI : XForm_8<31, 645,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFSXIA : XForm_8<31, 645,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFSUXI : XForm_8<31, 677,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFSUXIA : XForm_8<31, 677,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCDX : XForm_8<31, 199,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCDXA : XForm_8<31, 199,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCSX : XForm_8<31, 135,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
- let isCodeGenOnly = 1 in
- def QVSTFCSXs : XForm_8<31, 135,
- (outs), (ins qsrc:$FRT, memrr:$dst),
- "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
-
- let RC = 1 in
- def QVSTFCSXA : XForm_8<31, 135,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCDUX : XForm_8<31, 231,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCDUXA : XForm_8<31, 231,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCSUX : XForm_8<31, 167,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCSUXA : XForm_8<31, 167,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCDXI : XForm_8<31, 197,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCDXIA : XForm_8<31, 197,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCSXI : XForm_8<31, 133,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCSXIA : XForm_8<31, 133,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCDUXI : XForm_8<31, 229,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCDUXIA : XForm_8<31, 229,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCSUXI : XForm_8<31, 165,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCSUXIA : XForm_8<31, 165,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFIWX : XForm_8<31, 967,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFIWXA : XForm_8<31, 967,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>;
- }
-}
-
-} // neverHasSideEffects
-}
-
-def : InstAlias<"qvfclr $FRT",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>;
-def : InstAlias<"qvfand $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>;
-def : InstAlias<"qvfandc $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>;
-def : InstAlias<"qvfctfb $FRT, $FRA",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>;
-def : InstAlias<"qvfxor $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>;
-def : InstAlias<"qvfor $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>;
-def : InstAlias<"qvfnor $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>;
-def : InstAlias<"qvfequ $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>;
-def : InstAlias<"qvfnot $FRT, $FRA",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>;
-def : InstAlias<"qvforc $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>;
-def : InstAlias<"qvfnand $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>;
-def : InstAlias<"qvfset $FRT",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>;
-
-//===----------------------------------------------------------------------===//
-// Additional QPX Patterns
-//
-
-def : Pat<(v4f64 (scalar_to_vector f64:$A)),
- (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>;
-def : Pat<(v4f32 (scalar_to_vector f32:$A)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
-
-def : Pat<(f64 (extractelt v4f64:$S, 0)),
- (EXTRACT_SUBREG $S, sub_64)>;
-def : Pat<(f32 (extractelt v4f32:$S, 0)),
- (EXTRACT_SUBREG $S, sub_64)>;
-
-def : Pat<(f64 (extractelt v4f64:$S, 1)),
- (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
-def : Pat<(f64 (extractelt v4f64:$S, 2)),
- (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
-def : Pat<(f64 (extractelt v4f64:$S, 3)),
- (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
-
-def : Pat<(f32 (extractelt v4f32:$S, 1)),
- (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
-def : Pat<(f32 (extractelt v4f32:$S, 2)),
- (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
-def : Pat<(f32 (extractelt v4f32:$S, 3)),
- (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
-
-def : Pat<(f64 (extractelt v4f64:$S, i64:$F)),
- (EXTRACT_SUBREG (QVFPERM $S, $S,
- (QVLPCLSXint (RLDICR $F, 2,
- /* 63-2 = */ 61))),
- sub_64)>;
-def : Pat<(f32 (extractelt v4f32:$S, i64:$F)),
- (EXTRACT_SUBREG (QVFPERMs $S, $S,
- (QVLPCLSXint (RLDICR $F, 2,
- /* 63-2 = */ 61))),
- sub_64)>;
-
-def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFPERM $A, $B, $C)>;
-
-def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B),
- (QVFCPSGN $A, $B)>;
-
-// FCOPYSIGN's operand types need not agree.
-def : Pat<(fcopysign v4f64:$frB, v4f32:$frA),
- (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>;
-def : Pat<(fcopysign QSRC:$frB, QFRC:$frA),
- (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>;
-
-def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>;
-def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>;
-def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>;
-
-def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>;
-def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>;
-def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>;
-def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>;
-
-def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>;
-def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>;
-
-def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B),
- (QVFADD $A, $B)>;
-def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B),
- (QVFSUB $A, $B)>;
-def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
- (QVFMUL $A, $B)>;
-
-// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
-def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
- (QVFNMSUB $A, $C, $B)>;
-def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
- (QVFNMSUB $A, $C, $B)>;
-def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
- (QVFNMSUBSs $A, $C, $B)>;
-def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
- (QVFNMSUBSs $A, $C, $B)>;
-
-def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFMADD $A, $B, $C)>;
-def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFNMADD $A, $B, $C)>;
-def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFMSUB $A, $B, $C)>;
-def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFNMSUB $A, $B, $C)>;
-
-def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src),
- (QVLFDX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
- (QVLFDXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src),
- (QVLFSX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
- (QVLFSXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src),
- (QVLFCDXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src),
- (QVLFCDX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src),
- (QVLFCSXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src),
- (QVLFCSX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
- (QVLFDXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src),
- (QVLFIWAXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src),
- (QVLFIWAX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src),
- (QVLFIWZXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src),
- (QVLFIWZX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
- (QVLFSXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src),
- (QVLPCLDX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src),
- (QVLPCLSX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src),
- (QVLPCRDX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src),
- (QVLPCRSX xoaddr:$src)>;
-
-def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst),
- (QVSTFDX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst),
- (QVSTFSX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst),
- (QVSTFCDXA $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst),
- (QVSTFCDX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst),
- (QVSTFCSXA $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst),
- (QVSTFCSX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst),
- (QVSTFDXA $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst),
- (QVSTFIWXA $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst),
- (QVSTFIWX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst),
- (QVSTFSXA $T, xoaddr:$dst)>;
-
-def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
- (QVSTFDUX $rS, $ptrreg, $ptroff)>;
-def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
- (QVSTFSUX $rS, $ptrreg, $ptroff)>;
-def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
- (QVSTFSUXs $rS, $ptrreg, $ptroff)>;
-
-def : Pat<(int_ppc_qpx_qvflogical v4f64:$A, v4f64:$B, (i32 imm:$idx)),
- (QVFLOGICAL $A, $B, imm:$idx)>;
-def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)),
- (QVGPCI imm:$idx)>;
-
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE),
- (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE),
- (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE),
- (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO),
- (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ),
- (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT),
- (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE),
- (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
- (QVFCMPLTb $FRA, $FRB), (i32 13))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT),
- (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE),
- (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
- (QVFCMPGTb $FRA, $FRB), (i32 13))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE),
- (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
- (QVFCMPEQb $FRA, $FRB), (i32 13))>;
-
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ),
- (QVFCMPEQb $FRA, $FRB)>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT),
- (QVFCMPGTb $FRA, $FRB)>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE),
- (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
- (QVFCMPLTb $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT),
- (QVFCMPLTb $FRA, $FRB)>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE),
- (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
- (QVFCMPGTb $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE),
- (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
- (QVFCMPEQb $FRA, $FRB), (i32 10))>;
-
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE),
- (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE),
- (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE),
- (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO),
- (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ),
- (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT),
- (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE),
- (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
- (QVFCMPLTbs $FRA, $FRB), (i32 13))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT),
- (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE),
- (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
- (QVFCMPGTbs $FRA, $FRB), (i32 13))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE),
- (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
- (QVFCMPEQbs $FRA, $FRB), (i32 13))>;
-
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ),
- (QVFCMPEQbs $FRA, $FRB)>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT),
- (QVFCMPGTbs $FRA, $FRB)>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE),
- (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
- (QVFCMPLTbs $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT),
- (QVFCMPLTbs $FRA, $FRB)>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE),
- (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
- (QVFCMPGTbs $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE),
- (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
- (QVFCMPEQbs $FRA, $FRB), (i32 10))>;
-
-def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 4))>;
-def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 8))>;
-def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 9))>;
-def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 13))>;
-def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 14))>;
-
-def : Pat<(and v4i1:$FRA, v4i1:$FRB),
- (QVFLOGICALb $FRA, $FRB, (i32 1))>;
-def : Pat<(or v4i1:$FRA, v4i1:$FRB),
- (QVFLOGICALb $FRA, $FRB, (i32 7))>;
-def : Pat<(xor v4i1:$FRA, v4i1:$FRB),
- (QVFLOGICALb $FRA, $FRB, (i32 6))>;
-def : Pat<(not v4i1:$FRA),
- (QVFLOGICALb $FRA, $FRA, (i32 10))>;
-
-def : Pat<(v4f64 (fpextend v4f32:$src)),
- (COPY_TO_REGCLASS $src, QFRC)>;
-
-def : Pat<(v4f32 (fround_exact v4f64:$src)),
- (COPY_TO_REGCLASS $src, QSRC)>;
-
-// Extract the underlying floating-point values from the
-// QPX (-1.0, 1.0) boolean representation.
-def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
- (COPY_TO_REGCLASS $src, QFRC)>;
-
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
- (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)),
- (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
- (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)),
- (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
- (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
- (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)),
- (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
- (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)),
- (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
- (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
- (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)),
- (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
- (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)),
- (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
- (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
- (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)),
- (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
- (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)),
- (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
- (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
- (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)),
- (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
- (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)),
- (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
- (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
- (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)),
- (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
- (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)),
- (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
- (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-} // end HasQPX
-
-let Predicates = [HasQPX, NoNaNsFPMath] in {
-def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
- (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>;
-def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
- (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>;
-
-def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
- (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>;
-def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
- (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>;
-}
-
-let Predicates = [HasQPX, NaNsFPMath] in {
-// When either of these operands is NaN, we should return the other operand.
-// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need
-// to explicitly or with a NaN test on the second operand.
-def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
- (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
- (QVFTSTNANb $FRB, $FRB), (i32 7)),
- $FRB, $FRA)>;
-def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
- (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
- (QVFTSTNANb $FRB, $FRB), (i32 7)),
- $FRB, $FRA)>;
-
-def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
- (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
- (QVFTSTNANbs $FRB, $FRB), (i32 7)),
- $FRB, $FRA)>;
-def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
- (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
- (QVFTSTNANbs $FRB, $FRB), (i32 7)),
- $FRB, $FRA)>;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 858eb0c9fe50..299b34ca8283 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -820,16 +820,6 @@ def SPESTWX : XForm_8<31, 151, (outs), (ins spe4rc:$rS, memrr:$dst),
} // HasSPE
let Predicates = [HasSPE] in {
-def : Pat<(f64 (extloadf32 iaddr:$src)),
- (COPY_TO_REGCLASS (SPELWZ iaddr:$src), SPERC)>;
-def : Pat<(f64 (extloadf32 xaddr:$src)),
- (COPY_TO_REGCLASS (SPELWZX xaddr:$src), SPERC)>;
-
-def : Pat<(f64 (fpextend f32:$src)),
- (COPY_TO_REGCLASS $src, SPERC)>;
-}
-
-let Predicates = [HasSPE] in {
def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst),
(ins crrc:$cond, spe4rc:$T, spe4rc:$F,
i32imm:$BROPC), "#SELECT_CC_SPE4",
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 9ba5058a6f81..db6e00c71b89 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -145,6 +145,7 @@ def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED",
def HasVSX : Predicate<"Subtarget->hasVSX()">;
def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
def IsBigEndian : Predicate<"!Subtarget->isLittleEndian()">;
+def IsPPC64 : Predicate<"Subtarget->isPPC64()">;
def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">;
def HasP8Vector : Predicate<"Subtarget->hasP8Vector()">;
def HasDirectMove : Predicate<"Subtarget->hasDirectMove()">;
@@ -167,7 +168,7 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
def _rec : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[(set InTy:$XT,
- (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>,
+ (InTy (PPCvcmp_rec InTy:$XA, InTy:$XB, xo)))]>,
isRecordForm;
}
}
@@ -362,7 +363,8 @@ let hasSideEffects = 0 in {
}
} // mayStore
- let Uses = [RM], mayRaiseFPException = 1 in {
+ let mayRaiseFPException = 1 in {
+ let Uses = [RM] in {
// Add/Mul Instructions
let isCommutable = 1 in {
def XSADDDP : XX3Form<60, 32,
@@ -622,12 +624,30 @@ let hasSideEffects = 0 in {
"xsrsqrtedp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+ let mayRaiseFPException = 0 in {
def XSTDIVDP : XX3Form_1<60, 61,
(outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
"xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
def XSTSQRTDP : XX2Form_1<60, 106,
(outs crrc:$crD), (ins vsfrc:$XB),
- "xstsqrtdp $crD, $XB", IIC_FPCompare, []>;
+ "xstsqrtdp $crD, $XB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt f64:$XB))]>;
+ def XVTDIVDP : XX3Form_1<60, 125,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XVTDIVSP : XX3Form_1<60, 93,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ def XVTSQRTDP : XX2Form_1<60, 234,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtdp $crD, $XB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt v2f64:$XB))]>;
+ def XVTSQRTSP : XX2Form_1<60, 170,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtsp $crD, $XB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt v4f32:$XB))]>;
+ }
def XVDIVDP : XX3Form<60, 120,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
@@ -647,20 +667,6 @@ let hasSideEffects = 0 in {
"xvsqrtsp $XT, $XB", IIC_FPSqrtS,
[(set v4f32:$XT, (any_fsqrt v4f32:$XB))]>;
- def XVTDIVDP : XX3Form_1<60, 125,
- (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
- "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
- def XVTDIVSP : XX3Form_1<60, 93,
- (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
- "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;
-
- def XVTSQRTDP : XX2Form_1<60, 234,
- (outs crrc:$crD), (ins vsrc:$XB),
- "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>;
- def XVTSQRTSP : XX2Form_1<60, 170,
- (outs crrc:$crD), (ins vsrc:$XB),
- "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>;
-
def XVREDP : XX2Form<60, 218,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvredp $XT, $XB", IIC_VecFP,
@@ -707,6 +713,7 @@ let hasSideEffects = 0 in {
int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>;
// Move Instructions
+ let mayRaiseFPException = 0 in {
def XSABSDP : XX2Form<60, 345,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsabsdp $XT, $XB", IIC_VecFP,
@@ -760,6 +767,7 @@ let hasSideEffects = 0 in {
(outs vsrc:$XT), (ins vsrc:$XB),
"xvnegsp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (fneg v4f32:$XB))]>;
+ }
// Conversion Instructions
def XSCVDPSP : XX2Form<60, 265,
@@ -768,50 +776,50 @@ let hasSideEffects = 0 in {
def XSCVDPSXDS : XX2Form<60, 344,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpsxds $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfctidz f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fctidz f64:$XB))]>;
let isCodeGenOnly = 1 in
def XSCVDPSXDSs : XX2Form<60, 344,
(outs vssrc:$XT), (ins vssrc:$XB),
"xscvdpsxds $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfctidz f32:$XB))]>;
+ [(set f32:$XT, (PPCany_fctidz f32:$XB))]>;
def XSCVDPSXWS : XX2Form<60, 88,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpsxws $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfctiwz f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fctiwz f64:$XB))]>;
let isCodeGenOnly = 1 in
def XSCVDPSXWSs : XX2Form<60, 88,
(outs vssrc:$XT), (ins vssrc:$XB),
"xscvdpsxws $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfctiwz f32:$XB))]>;
+ [(set f32:$XT, (PPCany_fctiwz f32:$XB))]>;
def XSCVDPUXDS : XX2Form<60, 328,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpuxds $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfctiduz f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fctiduz f64:$XB))]>;
let isCodeGenOnly = 1 in
def XSCVDPUXDSs : XX2Form<60, 328,
(outs vssrc:$XT), (ins vssrc:$XB),
"xscvdpuxds $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfctiduz f32:$XB))]>;
+ [(set f32:$XT, (PPCany_fctiduz f32:$XB))]>;
def XSCVDPUXWS : XX2Form<60, 72,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpuxws $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fctiwuz f64:$XB))]>;
let isCodeGenOnly = 1 in
def XSCVDPUXWSs : XX2Form<60, 72,
(outs vssrc:$XT), (ins vssrc:$XB),
"xscvdpuxws $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfctiwuz f32:$XB))]>;
+ [(set f32:$XT, (PPCany_fctiwuz f32:$XB))]>;
def XSCVSPDP : XX2Form<60, 329,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvspdp $XT, $XB", IIC_VecFP, []>;
def XSCVSXDDP : XX2Form<60, 376,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvsxddp $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfcfid f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fcfid f64:$XB))]>;
def XSCVUXDDP : XX2Form<60, 360,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvuxddp $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfcfidu f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fcfidu f64:$XB))]>;
def XVCVDPSP : XX2Form<60, 393,
(outs vsrc:$XT), (ins vsrc:$XB),
@@ -820,7 +828,7 @@ let hasSideEffects = 0 in {
def XVCVDPSXDS : XX2Form<60, 472,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpsxds $XT, $XB", IIC_VecFP,
- [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>;
+ [(set v2i64:$XT, (any_fp_to_sint v2f64:$XB))]>;
def XVCVDPSXWS : XX2Form<60, 216,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpsxws $XT, $XB", IIC_VecFP,
@@ -828,7 +836,7 @@ let hasSideEffects = 0 in {
def XVCVDPUXDS : XX2Form<60, 456,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpuxds $XT, $XB", IIC_VecFP,
- [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>;
+ [(set v2i64:$XT, (any_fp_to_uint v2f64:$XB))]>;
def XVCVDPUXWS : XX2Form<60, 200,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpuxws $XT, $XB", IIC_VecFP,
@@ -844,56 +852,105 @@ let hasSideEffects = 0 in {
def XVCVSPSXWS : XX2Form<60, 152,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspsxws $XT, $XB", IIC_VecFP,
- [(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>;
+ [(set v4i32:$XT, (any_fp_to_sint v4f32:$XB))]>;
def XVCVSPUXDS : XX2Form<60, 392,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspuxds $XT, $XB", IIC_VecFP, []>;
def XVCVSPUXWS : XX2Form<60, 136,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspuxws $XT, $XB", IIC_VecFP,
- [(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>;
+ [(set v4i32:$XT, (any_fp_to_uint v4f32:$XB))]>;
def XVCVSXDDP : XX2Form<60, 504,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxddp $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>;
+ [(set v2f64:$XT, (any_sint_to_fp v2i64:$XB))]>;
def XVCVSXDSP : XX2Form<60, 440,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxdsp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (int_ppc_vsx_xvcvsxdsp v2i64:$XB))]>;
- def XVCVSXWDP : XX2Form<60, 248,
- (outs vsrc:$XT), (ins vsrc:$XB),
- "xvcvsxwdp $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>;
def XVCVSXWSP : XX2Form<60, 184,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxwsp $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (sint_to_fp v4i32:$XB))]>;
+ [(set v4f32:$XT, (any_sint_to_fp v4i32:$XB))]>;
def XVCVUXDDP : XX2Form<60, 488,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxddp $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>;
+ [(set v2f64:$XT, (any_uint_to_fp v2i64:$XB))]>;
def XVCVUXDSP : XX2Form<60, 424,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxdsp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (int_ppc_vsx_xvcvuxdsp v2i64:$XB))]>;
+ def XVCVUXWSP : XX2Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (any_uint_to_fp v4i32:$XB))]>;
+
+ let mayRaiseFPException = 0 in {
+ def XVCVSXWDP : XX2Form<60, 248,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>;
def XVCVUXWDP : XX2Form<60, 232,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxwdp $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (int_ppc_vsx_xvcvuxwdp v4i32:$XB))]>;
- def XVCVUXWSP : XX2Form<60, 168,
+ }
+
+ // Rounding Instructions respecting current rounding mode
+ def XSRDPIC : XX2Form<60, 107,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpic $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ def XVRDPIC : XX2Form<60, 235,
(outs vsrc:$XT), (ins vsrc:$XB),
- "xvcvuxwsp $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>;
+ "xvrdpic $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ def XVRSPIC : XX2Form<60, 171,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspic $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ // Max/Min Instructions
+ let isCommutable = 1 in {
+ def XSMAXDP : XX3Form<60, 160,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
+ def XSMINDP : XX3Form<60, 168,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
+
+ def XVMAXDP : XX3Form<60, 224,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
+ def XVMINDP : XX3Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
+
+ def XVMAXSP : XX3Form<60, 192,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
+ def XVMINSP : XX3Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvminsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
+ } // isCommutable
+ } // Uses = [RM]
- // Rounding Instructions
+ // Rounding Instructions with static direction.
def XSRDPI : XX2Form<60, 73,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpi $XT, $XB", IIC_VecFP,
[(set f64:$XT, (any_fround f64:$XB))]>;
- def XSRDPIC : XX2Form<60, 107,
- (outs vsfrc:$XT), (ins vsfrc:$XB),
- "xsrdpic $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (any_fnearbyint f64:$XB))]>;
def XSRDPIM : XX2Form<60, 121,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpim $XT, $XB", IIC_VecFP,
@@ -911,10 +968,6 @@ let hasSideEffects = 0 in {
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpi $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (any_fround v2f64:$XB))]>;
- def XVRDPIC : XX2Form<60, 235,
- (outs vsrc:$XT), (ins vsrc:$XB),
- "xvrdpic $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>;
def XVRDPIM : XX2Form<60, 249,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpim $XT, $XB", IIC_VecFP,
@@ -932,10 +985,6 @@ let hasSideEffects = 0 in {
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspi $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (any_fround v4f32:$XB))]>;
- def XVRSPIC : XX2Form<60, 171,
- (outs vsrc:$XT), (ins vsrc:$XB),
- "xvrspic $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>;
def XVRSPIM : XX2Form<60, 185,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspim $XT, $XB", IIC_VecFP,
@@ -948,43 +997,7 @@ let hasSideEffects = 0 in {
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspiz $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (any_ftrunc v4f32:$XB))]>;
-
- // Max/Min Instructions
- let isCommutable = 1 in {
- def XSMAXDP : XX3Form<60, 160,
- (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
- [(set vsfrc:$XT,
- (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
- def XSMINDP : XX3Form<60, 168,
- (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmindp $XT, $XA, $XB", IIC_VecFP,
- [(set vsfrc:$XT,
- (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
-
- def XVMAXDP : XX3Form<60, 224,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
- [(set vsrc:$XT,
- (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
- def XVMINDP : XX3Form<60, 232,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmindp $XT, $XA, $XB", IIC_VecFP,
- [(set vsrc:$XT,
- (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
-
- def XVMAXSP : XX3Form<60, 192,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
- [(set vsrc:$XT,
- (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
- def XVMINSP : XX3Form<60, 200,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvminsp $XT, $XA, $XB", IIC_VecFP,
- [(set vsrc:$XT,
- (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
- } // isCommutable
- } // Uses = [RM], mayRaiseFPException
+ } // mayRaiseFPException
// Logical Instructions
let isCommutable = 1 in
@@ -1170,7 +1183,7 @@ let Predicates = [HasVSX, HasP8Vector] in {
"xsresp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfre f32:$XB))]>;
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
- let hasSideEffects = 1, mayRaiseFPException = 1 in
+ let hasSideEffects = 1 in
def XSRSP : XX2Form<60, 281,
(outs vssrc:$XT), (ins vsfrc:$XB),
"xsrsp $XT, $XB", IIC_VecFP,
@@ -1268,18 +1281,18 @@ let Predicates = [HasVSX, HasP8Vector] in {
def XSCVSXDSP : XX2Form<60, 312,
(outs vssrc:$XT), (ins vsfrc:$XB),
"xscvsxdsp $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfcfids f64:$XB))]>;
+ [(set f32:$XT, (PPCany_fcfids f64:$XB))]>;
def XSCVUXDSP : XX2Form<60, 296,
(outs vssrc:$XT), (ins vsfrc:$XB),
"xscvuxdsp $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfcfidus f64:$XB))]>;
+ [(set f32:$XT, (PPCany_fcfidus f64:$XB))]>;
+ } // mayRaiseFPException
// Conversions between vector and scalar single precision
def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
"xscvdpspn $XT, $XB", IIC_VecFP, []>;
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
- } // mayRaiseFPException
let Predicates = [HasVSX, HasDirectMove] in {
// VSX direct move instructions
@@ -1440,15 +1453,16 @@ let Predicates = [HasVSX, HasP9Vector] in {
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
// QP Compare Ordered/Unordered
let hasSideEffects = 1 in {
- def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
- def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
-
// DP/QP Compare Exponents
def XSCMPEXPDP : XX3Form_1<60, 59,
(outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
"xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
+ let mayRaiseFPException = 1 in {
+ def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
+ def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
+
// DP Compare ==, >=, >, !=
// Use vsrc for XT, because the entire register of XT is set.
// XT.dword[1] = 0x0000_0000_0000_0000
@@ -1458,6 +1472,7 @@ let Predicates = [HasVSX, HasP9Vector] in {
IIC_FPCompare, []>;
def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
IIC_FPCompare, []>;
+ }
}
//===--------------------------------------------------------------------===//
@@ -1476,9 +1491,8 @@ let Predicates = [HasVSX, HasP9Vector] in {
f128:$vB))]>;
}
- // FIXME: Setting the hasSideEffects flag here to match current behaviour.
// Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
- let hasSideEffects = 1 in {
+ let mayRaiseFPException = 1 in {
def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>;
def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
@@ -1494,11 +1508,12 @@ let Predicates = [HasVSX, HasP9Vector] in {
// vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
// but we still use vsfrc for it.
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
- let hasSideEffects = 1 in {
+ let hasSideEffects = 1, mayRaiseFPException = 1 in {
def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
}
+ let mayRaiseFPException = 1 in {
// Vector HP -> SP
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let hasSideEffects = 1 in
@@ -1507,16 +1522,15 @@ let Predicates = [HasVSX, HasP9Vector] in {
[(set v4f32:$XT,
(int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
- let mayRaiseFPException = 1 in {
- // Round to Quad-Precision Integer [with Inexact]
- def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
- def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
- }
+ // Round to Quad-Precision Integer [with Inexact]
+ def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
+ def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
// Round Quad-Precision to Double-Extended Precision (fp80)
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let hasSideEffects = 1 in
def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
+ }
//===--------------------------------------------------------------------===//
// Insert/Extract Instructions
@@ -1607,6 +1621,7 @@ let Predicates = [HasVSX, HasP9Vector] in {
(int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
// Maximum/Minimum Type-C/Type-J DP
+ let mayRaiseFPException = 1 in {
def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
IIC_VecFP,
[(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
@@ -1621,6 +1636,7 @@ let Predicates = [HasVSX, HasP9Vector] in {
def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
IIC_VecFP, []>;
}
+ }
// Vector Byte-Reverse H/W/D/Q Word
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
@@ -2392,33 +2408,48 @@ def MrgWords {
// arbitrarily chosen to be Big, Little.
//
// Predicate combinations available:
+// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr.
+// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr.
// [HasVSX]
// [HasVSX, IsBigEndian]
// [HasVSX, IsLittleEndian]
// [HasVSX, NoP9Vector]
+// [HasVSX, NoP9Vector, IsLittleEndian]
// [HasVSX, HasOnlySwappingMemOps]
// [HasVSX, HasOnlySwappingMemOps, IsBigEndian]
// [HasVSX, HasP8Vector]
-// [HasVSX, HasP8Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasP8Vector, IsLittleEndian]
-// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
// [HasVSX, HasDirectMove]
// [HasVSX, HasDirectMove, IsBigEndian]
// [HasVSX, HasDirectMove, IsLittleEndian]
-// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian]
+// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian, IsPPC64]
+// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian]
-// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian]
// [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian]
// [HasVSX, HasP9Vector]
-// [HasVSX, HasP9Vector, IsBigEndian]
+// [HasVSX, HasP9Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasP9Vector, IsLittleEndian]
// [HasVSX, HasP9Altivec]
-// [HasVSX, HasP9Altivec, IsBigEndian]
+// [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64]
// [HasVSX, HasP9Altivec, IsLittleEndian]
-// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian]
+// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64]
// [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
+// These Altivec patterns are here because we need a VSX instruction to match
+// the intrinsic (but only for little endian system).
+let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in
+ def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+ v16i8:$b, v16i8:$c)),
+ (v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC),
+ (COPY_TO_REGCLASS $c, VSRC))))>;
+let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
+ def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+ v16i8:$b, v16i8:$c)),
+ (v16i8 (VPERMXOR $a, $b, $c))>;
+
let AddedComplexity = 400 in {
// Valid for any VSX subtarget, regardless of endianness.
let Predicates = [HasVSX] in {
@@ -2450,6 +2481,10 @@ def : Pat<(fneg (PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C)),
def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)),
(XVNMADDASP $C, $A, $B)>;
+def : Pat<(PPCfsqrt f64:$frA), (XSSQRTDP $frA)>;
+def : Pat<(PPCfsqrt v2f64:$frA), (XVSQRTDP $frA)>;
+def : Pat<(PPCfsqrt v4f32:$frA), (XVSQRTSP $frA)>;
+
def : Pat<(v2f64 (bitconvert v4f32:$A)),
(COPY_TO_REGCLASS $A, VSRC)>;
def : Pat<(v2f64 (bitconvert v4i32:$A)),
@@ -2579,6 +2614,16 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
(XVDIVDP $A, $B)>;
+// Vector test for software divide and sqrt.
+def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)),
+ (COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)),
+ (COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)),
+ (COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)),
+ (COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>;
+
// Reciprocal estimate
def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
(XVRESP $A)>;
@@ -2679,7 +2724,7 @@ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(f32 (any_fround f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPI
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (any_fnearbyint f32:$S)),
+def : Pat<(f32 (fnearbyint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(f32 (any_ffloor f32:$S)),
@@ -2694,11 +2739,11 @@ def : Pat<(f32 (any_ftrunc f32:$S)),
def : Pat<(f32 (any_frint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
+def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
// Rounding for double precision.
-def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
-def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
+def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>;
+def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
// Materialize a zero-vector of long long
def : Pat<(v2i64 immAllZerosV),
@@ -2975,6 +3020,19 @@ defm : ScalToVecWPermute<
VSFRC)), sub_64)>;
} // HasVSX, NoP9Vector
+// Any little endian pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, NoP9Vector, IsLittleEndian] in {
+// Load-and-splat using only X-Form VSX loads.
+defm : ScalToVecWPermute<
+ v2i64, (i64 (load xoaddr:$src)),
+ (XXPERMDIs (XFLOADf64 xoaddr:$src), 2),
+ (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v2f64, (f64 (load xoaddr:$src)),
+ (XXPERMDIs (XFLOADf64 xoaddr:$src), 2),
+ (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>;
+} // HasVSX, NoP9Vector, IsLittleEndian
+
// Any VSX subtarget that only has loads and stores that load in big endian
// order regardless of endianness. This is really pre-Power9 subtargets.
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
@@ -2986,8 +3044,8 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
} // HasVSX, HasOnlySwappingMemOps
-// Big endian VSX subtarget that only has loads and stores that always load
-// in big endian order. Really big endian pre-Power9 subtargets.
+// Big endian VSX subtarget that only has loads and stores that always
+// load in big endian order. Really big endian pre-Power9 subtargets.
let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in {
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -3080,7 +3138,7 @@ def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
} // HasVSX, HasP8Vector
// Big endian Power8 VSX subtarget.
-let Predicates = [HasVSX, HasP8Vector, IsBigEndian] in {
+let Predicates = [HasVSX, HasP8Vector, IsBigEndian, IsPPC64] in {
def : Pat<DWToSPExtractConv.El0SS1,
(f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
def : Pat<DWToSPExtractConv.El1SS1,
@@ -3158,7 +3216,7 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
sub_64), xoaddr:$src)>;
}
-} // HasVSX, HasP8Vector, IsBigEndian
+} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
// Little endian Power8 VSX subtarget.
let Predicates = [HasVSX, HasP8Vector, IsLittleEndian] in {
@@ -3257,7 +3315,7 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
} // HasVSX, HasP8Vector, IsLittleEndian
// Big endian pre-Power9 VSX subtarget.
-let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian] in {
+let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] in {
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
@@ -3268,7 +3326,7 @@ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
(XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
xoaddr:$src)>;
-} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian
+} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64
// Little endian pre-Power9 VSX subtarget.
let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in {
@@ -3525,8 +3583,8 @@ def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
(i32 VectorExtractions.LE_VARIABLE_WORD)>;
} // HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian
-// Big endian pre-Power9 VSX subtarget that has direct moves.
-let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian] in {
+// Big endian pre-Power9 64Bit VSX subtarget that has direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] in {
// Big endian integer vectors using direct moves.
def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
(v2i64 (XXPERMDI
@@ -3540,7 +3598,7 @@ def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
(MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
-} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian
+} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64
// Little endian pre-Power9 VSX subtarget that has direct moves.
let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in {
@@ -3569,25 +3627,25 @@ def : Pat<(fneg (PPCfnmsub f128:$A, f128:$B, f128:$C)),
def : Pat<(PPCfnmsub f128:$A, f128:$B, (fneg f128:$C)),
(XSNMADDQP $C, $A, $B)>;
-def : Pat<(f128 (sint_to_fp i64:$src)),
+def : Pat<(f128 (any_sint_to_fp i64:$src)),
(f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
-def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))),
+def : Pat<(f128 (any_sint_to_fp (i64 (PPCmfvsr f64:$src)))),
(f128 (XSCVSDQP $src))>;
-def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))),
+def : Pat<(f128 (any_sint_to_fp (i32 (PPCmfvsr f64:$src)))),
(f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
-def : Pat<(f128 (uint_to_fp i64:$src)),
+def : Pat<(f128 (any_uint_to_fp i64:$src)),
(f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
-def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))),
+def : Pat<(f128 (any_uint_to_fp (i64 (PPCmfvsr f64:$src)))),
(f128 (XSCVUDQP $src))>;
// Convert (Un)Signed Word -> QP.
-def : Pat<(f128 (sint_to_fp i32:$src)),
+def : Pat<(f128 (any_sint_to_fp i32:$src)),
(f128 (XSCVSDQP (MTVSRWA $src)))>;
-def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))),
+def : Pat<(f128 (any_sint_to_fp (i32 (load xoaddr:$src)))),
(f128 (XSCVSDQP (LIWAX xoaddr:$src)))>;
-def : Pat<(f128 (uint_to_fp i32:$src)),
+def : Pat<(f128 (any_uint_to_fp i32:$src)),
(f128 (XSCVUDQP (MTVSRWZ $src)))>;
-def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
+def : Pat<(f128 (any_uint_to_fp (i32 (load xoaddr:$src)))),
(f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
@@ -3761,11 +3819,11 @@ def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)),
(f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>;
// Truncate & Convert QP -> (Un)Signed (D)Word.
-def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
-def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
-def : Pat<(i32 (fp_to_sint f128:$src)),
+def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
+def : Pat<(i64 (any_fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
+def : Pat<(i32 (any_fp_to_sint f128:$src)),
(i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>;
-def : Pat<(i32 (fp_to_uint f128:$src)),
+def : Pat<(i32 (any_fp_to_uint f128:$src)),
(i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
// Instructions for store(fptosi).
@@ -3893,8 +3951,8 @@ def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
(v4i32 (LXVWSX xoaddr:$A))>;
} // HasVSX, HasP9Vector
-// Big endian Power9 subtarget.
-let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in {
+// Big endian 64Bit Power9 subtarget.
+let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in {
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
@@ -4067,7 +4125,7 @@ foreach Idx = 0-15 in {
def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
(f128 (XSCVUDQP
(XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
-} // HasVSX, HasP9Vector, IsBigEndian
+} // HasVSX, HasP9Vector, IsBigEndian, IsPPC64
// Little endian Power9 subtarget.
let Predicates = [HasVSX, HasP9Vector, IsLittleEndian] in {
@@ -4292,8 +4350,8 @@ def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
(v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
} // HasVSX, HasP9Altivec
-// Big endian Power9 VSX subtargets with P9 Altivec support.
-let Predicates = [HasVSX, HasP9Altivec, IsBigEndian] in {
+// Big endian Power9 64Bit VSX subtargets with P9 Altivec support.
+let Predicates = [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64] in {
def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
(VEXTUBLX $Idx, $S)>;
@@ -4426,7 +4484,7 @@ def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
(v4i32 (VEXTSB2W $A))>;
def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
(v2i64 (VEXTSB2D $A))>;
-} // HasVSX, HasP9Altivec, IsBigEndian
+} // HasVSX, HasP9Altivec, IsBigEndian, IsPPC64
// Little endian Power9 VSX subtargets with P9 Altivec support.
let Predicates = [HasVSX, HasP9Altivec, IsLittleEndian] in {
@@ -4563,8 +4621,9 @@ def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
(v2i64 (VEXTSB2D $A))>;
} // HasVSX, HasP9Altivec, IsLittleEndian
-// Big endian VSX subtarget that supports additional direct moves from ISA3.0.
-let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian] in {
+// Big endian 64Bit VSX subtarget that supports additional direct moves from
+// ISA3.0.
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] in {
def : Pat<(i64 (extractelt v2i64:$A, 1)),
(i64 (MFVSRLD $A))>;
// Better way to build integer vectors if we have MTVSRDD. Big endian.
@@ -4577,7 +4636,7 @@ def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)),
(f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
-} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian
+} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64
// Little endian VSX subtarget that supports direct moves from ISA3.0.
let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] in {
@@ -4602,20 +4661,24 @@ def : InstAlias<"xvmovdp $XT, $XB",
def : InstAlias<"xvmovsp $XT, $XB",
(XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+// Certain versions of the AIX assembler may missassemble these mnemonics.
+let Predicates = [ModernAs] in {
+ def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+ def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+ def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
+ def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
+}
+
def : InstAlias<"xxmrghd $XT, $XA, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
def : InstAlias<"xxmrgld $XT, $XA, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
def : InstAlias<"xxswapd $XT, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
def : InstAlias<"xxswapd $XT, $XB",
(XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
def : InstAlias<"mfvrd $rA, $XT",
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index a7546d2be5d8..c24240909797 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -60,6 +60,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -80,10 +81,8 @@
using namespace llvm;
-// By default, we limit this to creating 16 common bases out of loops per
-// function. 16 is a little over half of the allocatable register set.
static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars",
- cl::Hidden, cl::init(16),
+ cl::Hidden, cl::init(24),
cl::desc("Potential common base number threshold per function for PPC loop "
"prep"));
@@ -93,8 +92,7 @@ static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
// Sum of following 3 per loop thresholds for all loops can not be larger
// than MaxVarsPrep.
-// By default, we limit this to creating 9 PHIs for one loop.
-// 9 and 3 for each kind prep are exterimental values on Power9.
+// now the thresholds for each kind prep are exterimental values on Power9.
static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",
cl::Hidden, cl::init(3),
cl::desc("Potential PHI threshold per loop for PPC loop prep of update "
@@ -105,7 +103,7 @@ static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",
cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));
static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
- cl::Hidden, cl::init(3),
+ cl::Hidden, cl::init(8),
cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
@@ -277,8 +275,11 @@ static Value *GetPointerOperand(Value *MemI) {
} else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
return SMemI->getPointerOperand();
} else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
- if (IMemI->getIntrinsicID() == Intrinsic::prefetch)
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
+ IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp)
return IMemI->getArgOperand(0);
+ if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)
+ return IMemI->getArgOperand(1);
}
return nullptr;
@@ -345,9 +346,13 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
MemI = SMemI;
PtrValue = SMemI->getPointerOperand();
} else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
- if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
+ IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
MemI = IMemI;
PtrValue = IMemI->getArgOperand(0);
+ } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
+ MemI = IMemI;
+ PtrValue = IMemI->getArgOperand(1);
} else continue;
} else continue;
@@ -606,6 +611,10 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
NewBasePtr = NewPHI;
}
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted below, causing the AssertingVH in the cache to trigger.
+ SCEVE.clear();
+
if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
BBChanged.insert(IDel->getParent());
BasePtr->replaceAllUsesWith(NewBasePtr);
@@ -791,7 +800,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
bool MadeChange = false;
// Only prep. the inner-most loop
- if (!L->empty())
+ if (!L->isInnermost())
return MadeChange;
// Return if already done enough preparation.
@@ -823,6 +832,11 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
if (ST && ST->hasAltivec() &&
PtrValue->getType()->getPointerElementType()->isVectorTy())
return false;
+ // There are no update forms for P10 lxvp/stxvp intrinsic.
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||
+ II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))
+ return false;
// See getPreIndexedAddressParts, the displacement for LDU/STDU has to
// be 4's multiple (DS-form). For i64 loads/stores when the displacement
// fits in a 16-bit signed field but isn't a multiple of 4, it will be
@@ -860,7 +874,13 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
// Check if a load/store has DQ form.
auto isDQFormCandidate = [&] (const Instruction *I, const Value *PtrValue) {
assert((PtrValue && I) && "Invalid parameter!");
- return !isa<IntrinsicInst>(I) && ST && ST->hasP9Vector() &&
+ // Check if it is a P10 lxvp/stxvp intrinsic.
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (II)
+ return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||
+ II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;
+ // Check if it is a P9 vector load/store.
+ return ST && ST->hasP9Vector() &&
(PtrValue->getType()->getPointerElementType()->isVectorTy());
};
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
index 2b0e604e0ccd..27b2c9a628d0 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
@@ -16,6 +16,7 @@
#include "PPC.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Instructions.h"
@@ -64,8 +65,7 @@ private:
/// Checks if the specified function name represents an entry in the MASSV
/// library.
bool PPCLowerMASSVEntries::isMASSVFunc(StringRef Name) {
- auto Iter = std::find(std::begin(MASSVFuncs), std::end(MASSVFuncs), Name);
- return Iter != std::end(MASSVFuncs);
+ return llvm::is_contained(MASSVFuncs, Name);
}
// FIXME:
@@ -105,7 +105,7 @@ bool PPCLowerMASSVEntries::handlePowSpecialCases(CallInst *CI, Function &Func,
return false;
if (Constant *Exp = dyn_cast<Constant>(CI->getArgOperand(1)))
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(Exp->getSplatValue())) {
+ if (ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(Exp->getSplatValue())) {
// If the argument is 0.75 or 0.25 it is cheaper to turn it into pow
// intrinsic so that it could be optimzed as sequence of sqrt's.
if (!CI->hasNoInfs() || !CI->hasApproxFunc())
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 236f98f32e18..5cc180d770b2 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -74,7 +74,9 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PPC_TOC_LO;
break;
case PPCII::MO_TLS:
- RefKind = MCSymbolRefExpr::VK_PPC_TLS;
+ bool IsPCRel = (MO.getTargetFlags() & ~access) == PPCII::MO_PCREL_FLAG;
+ RefKind = IsPCRel ? MCSymbolRefExpr::VK_PPC_TLS_PCREL
+ : MCSymbolRefExpr::VK_PPC_TLS;
break;
}
@@ -84,6 +86,14 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PCREL;
else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG))
RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
+ else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG))
+ RefKind = MCSymbolRefExpr::VK_TPREL;
+ else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL;
+ else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL;
+ else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL;
const MachineInstr *MI = MO.getParent();
const MachineFunction *MF = MI->getMF();
@@ -100,6 +110,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
MIOpcode == PPC::BL8_NOTOC) {
RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
}
+ if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PPC_PCREL_OPT;
}
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 227c863685ae..c8b01aaef828 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -267,6 +267,113 @@ void PPCMIPeephole::UpdateTOCSaves(
TOCSaves[MI] = Keep;
}
+// This function returns a list of all PHI nodes in the tree starting from
+// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
+// The list initially only contains the root PHI. When we visit a PHI node, we
+// add it to the list. We continue to look for other PHI node operands while
+// there are nodes to visit in the list. The function returns false if the
+// optimization cannot be applied on this tree.
+static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
+ MachineInstr *RootPHI,
+ SmallVectorImpl<MachineInstr *> &PHIs) {
+ PHIs.push_back(RootPHI);
+ unsigned VisitedIndex = 0;
+ while (VisitedIndex < PHIs.size()) {
+ MachineInstr *VisitedPHI = PHIs[VisitedIndex];
+ for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
+ PHIOp != NumOps; PHIOp += 2) {
+ Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
+ if (!Register::isVirtualRegister(RegOp))
+ return false;
+ MachineInstr *Instr = MRI->getVRegDef(RegOp);
+ // While collecting the PHI nodes, we check if they can be converted (i.e.
+ // all the operands are either copies, implicit defs or PHI nodes).
+ unsigned Opcode = Instr->getOpcode();
+ if (Opcode == PPC::COPY) {
+ Register Reg = Instr->getOperand(1).getReg();
+ if (!Register::isVirtualRegister(Reg) ||
+ MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
+ return false;
+ } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
+ return false;
+ // If we detect a cycle in the PHI nodes, we exit. It would be
+ // possible to change cycles as well, but that would add a lot
+ // of complexity for a case that is unlikely to occur with MMA
+ // code.
+ if (Opcode != PPC::PHI)
+ continue;
+ if (llvm::is_contained(PHIs, Instr))
+ return false;
+ PHIs.push_back(Instr);
+ }
+ VisitedIndex++;
+ }
+ return true;
+}
+
+// This function changes the unprimed accumulator PHI nodes in the PHIs list to
+// primed accumulator PHI nodes. The list is traversed in reverse order to
+// change all the PHI operands of a PHI node before changing the node itself.
+// We keep a map to associate each changed PHI node to its non-changed form.
+static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
+ MachineRegisterInfo *MRI,
+ SmallVectorImpl<MachineInstr *> &PHIs,
+ Register Dst) {
+ DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
+ for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) {
+ MachineInstr *PHI = *It;
+ SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
+ // We check if the current PHI node can be changed by looking at its
+ // operands. If all the operands are either copies from primed
+ // accumulators, implicit definitions or other unprimed accumulator
+ // PHI nodes, we change it.
+ for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
+ PHIOp += 2) {
+ Register RegOp = PHI->getOperand(PHIOp).getReg();
+ MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
+ unsigned Opcode = PHIInput->getOpcode();
+ assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
+ Opcode == PPC::PHI) &&
+ "Unexpected instruction");
+ if (Opcode == PPC::COPY) {
+ assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
+ &PPC::ACCRCRegClass &&
+ "Unexpected register class");
+ PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
+ } else if (Opcode == PPC::IMPLICIT_DEF) {
+ Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
+ BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
+ TII->get(PPC::IMPLICIT_DEF), AccReg);
+ PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
+ PHI->getOperand(PHIOp + 1)});
+ } else if (Opcode == PPC::PHI) {
+ // We found a PHI operand. At this point we know this operand
+ // has already been changed so we get its associated changed form
+ // from the map.
+ assert(ChangedPHIMap.count(PHIInput) == 1 &&
+ "This PHI node should have already been changed.");
+ MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
+ PHIOps.push_back({MachineOperand::CreateReg(
+ PrimedAccPHI->getOperand(0).getReg(), false),
+ PHI->getOperand(PHIOp + 1)});
+ }
+ }
+ Register AccReg = Dst;
+ // If the PHI node we are changing is the root node, the register it defines
+ // will be the destination register of the original copy (of the PHI def).
+ // For all other PHI's in the list, we need to create another primed
+ // accumulator virtual register as the PHI will no longer define the
+ // unprimed accumulator.
+ if (PHI != PHIs[0])
+ AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
+ MachineInstrBuilder NewPHI = BuildMI(
+ *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
+ for (auto RegMBB : PHIOps)
+ NewPHI.add(RegMBB.first).add(RegMBB.second);
+ ChangedPHIMap[PHI] = NewPHI.getInstr();
+ }
+}
+
// Perform peephole optimizations.
bool PPCMIPeephole::simplifyCode(void) {
bool Simplified = false;
@@ -321,6 +428,38 @@ bool PPCMIPeephole::simplifyCode(void) {
default:
break;
+ case PPC::COPY: {
+ Register Src = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ if (!Register::isVirtualRegister(Src) ||
+ !Register::isVirtualRegister(Dst))
+ break;
+ if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
+ MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
+ break;
+
+ // We are copying an unprimed accumulator to a primed accumulator.
+ // If the input to the copy is a PHI that is fed only by (i) copies in
+ // the other direction (ii) implicitly defined unprimed accumulators or
+ // (iii) other PHI nodes satisfying (i) and (ii), we can change
+ // the PHI to a PHI on primed accumulators (as long as we also change
+ // its operands). To detect and change such copies, we first get a list
+ // of all the PHI nodes starting from the root PHI node in BFS order.
+ // We then visit all these PHI nodes to check if they can be changed to
+ // primed accumulator PHI nodes and if so, we change them.
+ MachineInstr *RootPHI = MRI->getVRegDef(Src);
+ if (RootPHI->getOpcode() != PPC::PHI)
+ break;
+
+ SmallVector<MachineInstr *, 4> PHIs;
+ if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
+ break;
+
+ convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
+
+ ToErase = &MI;
+ break;
+ }
case PPC::LI:
case PPC::LI8: {
// If we are materializing a zero, look for any use operands for which
@@ -573,7 +712,7 @@ bool PPCMIPeephole::simplifyCode(void) {
Simplified = true;
Register ConvReg1 = RoundInstr->getOperand(1).getReg();
Register FRSPDefines = RoundInstr->getOperand(0).getReg();
- MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
+ MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
if (Use.getOperand(i).isReg() &&
Use.getOperand(i).getReg() == FRSPDefines)
@@ -848,142 +987,9 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::RLWINM_rec:
case PPC::RLWINM8:
case PPC::RLWINM8_rec: {
- unsigned FoldingReg = MI.getOperand(1).getReg();
- if (!Register::isVirtualRegister(FoldingReg))
- break;
-
- MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
- if (SrcMI->getOpcode() != PPC::RLWINM &&
- SrcMI->getOpcode() != PPC::RLWINM_rec &&
- SrcMI->getOpcode() != PPC::RLWINM8 &&
- SrcMI->getOpcode() != PPC::RLWINM8_rec)
- break;
- assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
- MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
- SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
- "Invalid PPC::RLWINM Instruction!");
- uint64_t SHSrc = SrcMI->getOperand(2).getImm();
- uint64_t SHMI = MI.getOperand(2).getImm();
- uint64_t MBSrc = SrcMI->getOperand(3).getImm();
- uint64_t MBMI = MI.getOperand(3).getImm();
- uint64_t MESrc = SrcMI->getOperand(4).getImm();
- uint64_t MEMI = MI.getOperand(4).getImm();
-
- assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
- "Invalid PPC::RLWINM Instruction!");
-
- // If MBMI is bigger than MEMI, we always can not get run of ones.
- // RotatedSrcMask non-wrap:
- // 0........31|32........63
- // RotatedSrcMask: B---E B---E
- // MaskMI: -----------|--E B------
- // Result: ----- --- (Bad candidate)
- //
- // RotatedSrcMask wrap:
- // 0........31|32........63
- // RotatedSrcMask: --E B----|--E B----
- // MaskMI: -----------|--E B------
- // Result: --- -----|--- ----- (Bad candidate)
- //
- // One special case is RotatedSrcMask is a full set mask.
- // RotatedSrcMask full:
- // 0........31|32........63
- // RotatedSrcMask: ------EB---|-------EB---
- // MaskMI: -----------|--E B------
- // Result: -----------|--- ------- (Good candidate)
-
- // Mark special case.
- bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
-
- // For other MBMI > MEMI cases, just return.
- if ((MBMI > MEMI) && !SrcMaskFull)
- break;
-
- // Handle MBMI <= MEMI cases.
- APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
- // In MI, we only need low 32 bits of SrcMI, just consider about low 32
- // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
- // while in PowerPC ISA, lowerest bit is at index 63.
- APInt MaskSrc =
- APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
-
- APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
- APInt FinalMask = RotatedSrcMask & MaskMI;
- uint32_t NewMB, NewME;
-
- // If final mask is 0, MI result should be 0 too.
- if (FinalMask.isNullValue()) {
- bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 ||
- MI.getOpcode() == PPC::RLWINM8_rec);
-
- Simplified = true;
-
- LLVM_DEBUG(dbgs() << "Replace Instr: ");
- LLVM_DEBUG(MI.dump());
-
- if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
- // Replace MI with "LI 0"
- MI.RemoveOperand(4);
- MI.RemoveOperand(3);
- MI.RemoveOperand(2);
- MI.getOperand(1).ChangeToImmediate(0);
- MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI));
- } else {
- // Replace MI with "ANDI_rec reg, 0"
- MI.RemoveOperand(4);
- MI.RemoveOperand(3);
- MI.getOperand(2).setImm(0);
- MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
- MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
- if (SrcMI->getOperand(1).isKill()) {
- MI.getOperand(1).setIsKill(true);
- SrcMI->getOperand(1).setIsKill(false);
- } else
- // About to replace MI.getOperand(1), clear its kill flag.
- MI.getOperand(1).setIsKill(false);
- }
-
- LLVM_DEBUG(dbgs() << "With: ");
- LLVM_DEBUG(MI.dump());
- } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB,
- NewME) && NewMB <= NewME)|| SrcMaskFull) {
- // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
- // than NewME. Otherwise we get a 64 bit value after folding, but MI
- // return a 32 bit value.
-
- Simplified = true;
- LLVM_DEBUG(dbgs() << "Converting Instr: ");
- LLVM_DEBUG(MI.dump());
-
- uint16_t NewSH = (SHSrc + SHMI) % 32;
- MI.getOperand(2).setImm(NewSH);
- // If SrcMI mask is full, no need to update MBMI and MEMI.
- if (!SrcMaskFull) {
- MI.getOperand(3).setImm(NewMB);
- MI.getOperand(4).setImm(NewME);
- }
- MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
- if (SrcMI->getOperand(1).isKill()) {
- MI.getOperand(1).setIsKill(true);
- SrcMI->getOperand(1).setIsKill(false);
- } else
- // About to replace MI.getOperand(1), clear its kill flag.
- MI.getOperand(1).setIsKill(false);
-
- LLVM_DEBUG(dbgs() << "To: ");
- LLVM_DEBUG(MI.dump());
- }
- if (Simplified) {
- // If FoldingReg has no non-debug use and it has no implicit def (it
- // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
- // Otherwise keep it.
+ Simplified = TII->combineRLWINM(MI, &ToErase);
+ if (Simplified)
++NumRotatesCollapsed;
- if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) {
- ToErase = SrcMI;
- LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
- LLVM_DEBUG(SrcMI->dump());
- }
- }
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index daf88589bb52..c976a9c62d3b 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -8,6 +8,7 @@
#include "PPCMachineFunctionInfo.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/CommandLine.h"
@@ -63,3 +64,36 @@ bool PPCFunctionInfo::isLiveInZExt(Register VReg) const {
return LiveIn.second.isZExt();
return false;
}
+
+void PPCFunctionInfo::appendParameterType(ParamType Type) {
+ uint32_t CopyParamType = ParameterType;
+ int Bits = 0;
+
+ // If it is fixed type, we only need to increase the FixedParamNum, for
+ // the bit encode of fixed type is bit of zero, we do not need to change the
+ // ParamType.
+ if (Type == FixedType) {
+ ++FixedParamNum;
+ return;
+ }
+
+ ++FloatingPointParamNum;
+
+ for (int I = 0;
+ I < static_cast<int>(FloatingPointParamNum + FixedParamNum - 1); ++I) {
+ if (CopyParamType & XCOFF::TracebackTable::ParmTypeIsFloatingBit) {
+ // '10'b => floating point short parameter.
+ // '11'b => floating point long parameter.
+ CopyParamType <<= 2;
+ Bits += 2;
+ } else {
+ // '0'b => fixed parameter.
+ CopyParamType <<= 1;
+ ++Bits;
+ }
+ }
+
+ assert(Type != FixedType && "FixedType should already be handled.");
+ if (Bits < 31)
+ ParameterType |= Type << (30 - Bits);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 29ca53e273d7..4b73b36318b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -22,6 +22,16 @@ namespace llvm {
/// PPCFunctionInfo - This class is derived from MachineFunction private
/// PowerPC target-specific information for each MachineFunction.
class PPCFunctionInfo : public MachineFunctionInfo {
+public:
+ // The value in the ParamType are used to indicate the bitstrings used in the
+ // encoding format.
+ enum ParamType {
+ FixedType = 0x0,
+ ShortFloatPoint = 0x2,
+ LongFloatPoint = 0x3
+ };
+
+private:
virtual void anchor();
/// FramePointerSaveIndex - Frame index of where the old frame pointer is
@@ -69,9 +79,6 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// disabled.
bool DisableNonVolatileCR = false;
- /// Indicates whether VRSAVE is spilled in the current function.
- bool SpillsVRSAVE = false;
-
/// LRStoreRequired - The bool indicates whether there is some explicit use of
/// the LR/LR8 stack slot that is not obvious from scanning the code. This
/// requires that the code generator produce a store of LR to the stack on
@@ -110,6 +117,20 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// register for parameter passing.
unsigned VarArgsNumFPR = 0;
+ /// FixedParamNum - Number of fixed parameter.
+ unsigned FixedParamNum = 0;
+
+ /// FloatingParamNum - Number of floating point parameter.
+ unsigned FloatingPointParamNum = 0;
+
+ /// ParamType - Encode type for every parameter
+ /// in the order of parameters passing in.
+ /// Bitstring starts from the most significant (leftmost) bit.
+ /// '0'b => fixed parameter.
+ /// '10'b => floating point short parameter.
+ /// '11'b => floating point long parameter.
+ uint32_t ParameterType = 0;
+
/// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
int CRSpillFrameIndex = 0;
@@ -175,9 +196,6 @@ public:
void setDisableNonVolatileCR() { DisableNonVolatileCR = true; }
bool isNonVolatileCRDisabled() const { return DisableNonVolatileCR; }
- void setSpillsVRSAVE() { SpillsVRSAVE = true; }
- bool isVRSAVESpilled() const { return SpillsVRSAVE; }
-
void setLRStoreRequired() { LRStoreRequired = true; }
bool isLRStoreRequired() const { return LRStoreRequired; }
@@ -196,6 +214,13 @@ public:
unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+ unsigned getFixedParamNum() const { return FixedParamNum; }
+
+ unsigned getFloatingPointParamNum() const { return FloatingPointParamNum; }
+
+ uint32_t getParameterType() const { return ParameterType; }
+ void appendParameterType(ParamType Type);
+
unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index 5649d7d13966..ce615e554d94 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -49,10 +49,103 @@ bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary *Zone) const {
- GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+ // From GenericScheduler::tryCandidate
- if (!Cand.isValid() || !Zone)
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
return;
+ }
+
+ // Bias PhysReg Defs and copies to their uses and defined respectively.
+ if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+ biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
+ return;
+
+ // Avoid exceeding the target's limit.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
+ RegExcess, TRI, DAG->MF))
+ return;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
+ TryCand, Cand, RegCritical, TRI, DAG->MF))
+ return;
+
+ // We only compare a subset of features when comparing nodes between
+ // Top and Bottom boundary. Some properties are simply incomparable, in many
+ // other instances we should only override the other boundary if something
+ // is a clear good pick on one boundary. Skip heuristics that are more
+ // "tie-breaking" in nature.
+ bool SameBoundary = Zone != nullptr;
+ if (SameBoundary) {
+ // For loops that are acyclic path limited, aggressively schedule for
+ // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
+ // heuristics to take precedence.
+ if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
+ tryLatency(TryCand, Cand, *Zone))
+ return;
+
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
+ Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
+ }
+
+ // Keep clustered nodes together to encourage downstream peephole
+ // optimizations which may reduce resource requirements.
+ //
+ // This is a best effort to set things up for a post-RA pass. Optimizations
+ // like generating loads of multiple registers should ideally be done within
+ // the scheduler pass by combining the loads during DAG postprocessing.
+ const SUnit *CandNextClusterSU =
+ Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ const SUnit *TryCandNextClusterSU =
+ TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == TryCandNextClusterSU,
+ Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
+ return;
+
+ if (SameBoundary) {
+ // Weak edges are for clustering and other constraints.
+ if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
+ getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
+ return;
+ }
+
+ // Avoid increasing the max pressure of the entire region.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
+ Cand, RegMax, TRI, DAG->MF))
+ return;
+
+ if (SameBoundary) {
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources, TryCand, Cand,
+ ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ // For acyclic path limited loops, latency was already checked above.
+ if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
+ return;
+
+ // Fall through to original instruction order.
+ if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
+ (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
+ }
+
+ // GenericScheduler::tryCandidate end
// Add powerpc specific heuristic only when TryCand isn't selected or
// selected as node order.
@@ -61,8 +154,10 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
// There are some benefits to schedule the ADDI before the load to hide the
// latency, as RA may create a true dependency between the load and addi.
- if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
- return;
+ if (SameBoundary) {
+ if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
+ return;
+ }
}
bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
@@ -79,11 +174,44 @@ bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand) {
- PostGenericScheduler::tryCandidate(Cand, TryCand);
+ // From PostGenericScheduler::tryCandidate
+
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
+ Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
- if (!Cand.isValid())
+ // Keep clustered nodes together.
+ if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
+ Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster))
return;
+ // Avoid critical resource consumption and balance the schedule.
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources, TryCand, Cand,
+ ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
+ return;
+ }
+
+ // Fall through to original instruction order.
+ if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ TryCand.Reason = NodeOrder;
+
+ // PostGenericScheduler::tryCandidate end
+
// Add powerpc post ra specific heuristic only when TryCand isn't selected or
// selected as node order.
if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index 815dfd1402f4..d12c6d9cd406 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -51,8 +51,8 @@ public:
Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First),
OpSet2(Second) {}
- bool hasOp1(unsigned Opc) const { return OpSet1.count(Opc) != 0; }
- bool hasOp2(unsigned Opc) const { return OpSet2.count(Opc) != 0; }
+ bool hasOp1(unsigned Opc) const { return OpSet1.contains(Opc); }
+ bool hasOp2(unsigned Opc) const { return OpSet2.contains(Opc); }
bool isSupported() const { return Supported; }
Optional<unsigned> depOpIdx() const {
if (DepOpIdx < 0)
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 4ea714ff15f7..a8853609a7c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -39,10 +39,54 @@ STATISTIC(NumFrameOffFoldInPreEmit,
"Number of folding frame offset by using r+r in pre-emit peephole");
static cl::opt<bool>
+EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
+ cl::desc("enable PC Relative linker optimization"));
+
+static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
cl::desc("Run pre-emit peephole optimizations."));
namespace {
+
+static bool hasPCRelativeForm(MachineInstr &Use) {
+ switch (Use.getOpcode()) {
+ default:
+ return false;
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::LWA:
+ case PPC::LXSD:
+ case PPC::LXSSP:
+ case PPC::LXV:
+ case PPC::STXSD:
+ case PPC::STXSSP:
+ case PPC::STXV:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::DFLOADf32:
+ case PPC::DFLOADf64:
+ case PPC::DFSTOREf32:
+ case PPC::DFSTOREf64:
+ return true;
+ }
+}
+
class PPCPreEmitPeephole : public MachineFunctionPass {
public:
static char ID;
@@ -77,7 +121,7 @@ namespace {
for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
// Skip load immediate that is marked to be erased later because it
// cannot be used to replace any other instructions.
- if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
+ if (InstrsToErase.contains(&*BBI))
continue;
// Skip non-load immediate.
unsigned Opc = BBI->getOpcode();
@@ -172,6 +216,196 @@ namespace {
return !InstrsToErase.empty();
}
+ // Check if this instruction is a PLDpc that is part of a GOT indirect
+ // access.
+ bool isGOTPLDpc(MachineInstr &Instr) {
+ if (Instr.getOpcode() != PPC::PLDpc)
+ return false;
+
+ // The result must be a register.
+ const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
+ if (!LoadedAddressReg.isReg())
+ return false;
+
+ // Make sure that this is a global symbol.
+ const MachineOperand &SymbolOp = Instr.getOperand(1);
+ if (!SymbolOp.isGlobal())
+ return false;
+
+ // Finally return true only if the GOT flag is present.
+ return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
+ }
+
+ bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
+ MachineFunction *MF = MBB.getParent();
+ // If the linker opt is disabled then just return.
+ if (!EnablePCRelLinkerOpt)
+ return false;
+
+ // Add this linker opt only if we are using PC Relative memops.
+ if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
+ return false;
+
+ // Struct to keep track of one def/use pair for a GOT indirect access.
+ struct GOTDefUsePair {
+ MachineBasicBlock::iterator DefInst;
+ MachineBasicBlock::iterator UseInst;
+ Register DefReg;
+ Register UseReg;
+ bool StillValid;
+ };
+ // Vector of def/ues pairs in this basic block.
+ SmallVector<GOTDefUsePair, 4> CandPairs;
+ SmallVector<GOTDefUsePair, 4> ValidPairs;
+ bool MadeChange = false;
+
+ // Run through all of the instructions in the basic block and try to
+ // collect potential pairs of GOT indirect access instructions.
+ for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+ // Look for the initial GOT indirect load.
+ if (isGOTPLDpc(*BBI)) {
+ GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
+ BBI->getOperand(0).getReg(),
+ PPC::NoRegister, true};
+ CandPairs.push_back(CurrentPair);
+ continue;
+ }
+
+ // We haven't encountered any new PLD instructions, nothing to check.
+ if (CandPairs.empty())
+ continue;
+
+ // Run through the candidate pairs and see if any of the registers
+ // defined in the PLD instructions are used by this instruction.
+ // Note: the size of CandPairs can change in the loop.
+ for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
+ GOTDefUsePair &Pair = CandPairs[Idx];
+ // The instruction does not use or modify this PLD's def reg,
+ // ignore it.
+ if (!BBI->readsRegister(Pair.DefReg, TRI) &&
+ !BBI->modifiesRegister(Pair.DefReg, TRI))
+ continue;
+
+ // The use needs to be used in the address compuation and not
+ // as the register being stored for a store.
+ const MachineOperand *UseOp =
+ hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
+
+ // Check for a valid use.
+ if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
+ UseOp->isUse() && UseOp->isKill()) {
+ Pair.UseInst = BBI;
+ Pair.UseReg = BBI->getOperand(0).getReg();
+ ValidPairs.push_back(Pair);
+ }
+ CandPairs.erase(CandPairs.begin() + Idx);
+ }
+ }
+
+ // Go through all of the pairs and check for any more valid uses.
+ for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
+ // We shouldn't be here if we don't have a valid pair.
+ assert(Pair->UseInst.isValid() && Pair->StillValid &&
+ "Kept an invalid def/use pair for GOT PCRel opt");
+ // We have found a potential pair. Search through the instructions
+ // between the def and the use to see if it is valid to mark this as a
+ // linker opt.
+ MachineBasicBlock::iterator BBI = Pair->DefInst;
+ ++BBI;
+ for (; BBI != Pair->UseInst; ++BBI) {
+ if (BBI->readsRegister(Pair->UseReg, TRI) ||
+ BBI->modifiesRegister(Pair->UseReg, TRI)) {
+ Pair->StillValid = false;
+ break;
+ }
+ }
+
+ if (!Pair->StillValid)
+ continue;
+
+ // The load/store instruction that uses the address from the PLD will
+ // either use a register (for a store) or define a register (for the
+ // load). That register will be added as an implicit def to the PLD
+ // and as an implicit use on the second memory op. This is a precaution
+ // to prevent future passes from using that register between the two
+ // instructions.
+ MachineOperand ImplDef =
+ MachineOperand::CreateReg(Pair->UseReg, true, true);
+ MachineOperand ImplUse =
+ MachineOperand::CreateReg(Pair->UseReg, false, true);
+ Pair->DefInst->addOperand(ImplDef);
+ Pair->UseInst->addOperand(ImplUse);
+
+ // Create the symbol.
+ MCContext &Context = MF->getContext();
+ MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel");
+ MachineOperand PCRelLabel =
+ MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
+ Pair->DefInst->addOperand(*MF, PCRelLabel);
+ Pair->UseInst->addOperand(*MF, PCRelLabel);
+ MadeChange |= true;
+ }
+ return MadeChange;
+ }
+
+ // This function removes redundant pairs of accumulator prime/unprime
+ // instructions. In some situations, it's possible the compiler inserts an
+ // accumulator prime instruction followed by an unprime instruction (e.g.
+ // when we store an accumulator after restoring it from a spill). If the
+ // accumulator is not used between the two, they can be removed. This
+ // function removes these redundant pairs from basic blocks.
+ // The algorithm is quite straightforward - every time we encounter a prime
+ // instruction, the primed register is added to a candidate set. Any use
+ // other than a prime removes the candidate from the set and any de-prime
+ // of a current candidate marks both the prime and de-prime for removal.
+ // This way we ensure we only remove prime/de-prime *pairs* with no
+ // intervening uses.
+ bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
+ DenseSet<MachineInstr *> InstrsToErase;
+ // Initially, none of the acc registers are candidates.
+ SmallVector<MachineInstr *, 8> Candidates(
+ PPC::UACCRCRegClass.getNumRegs(), nullptr);
+
+ for (MachineInstr &BBI : MBB.instrs()) {
+ unsigned Opc = BBI.getOpcode();
+ // If we are visiting a xxmtacc instruction, we add it and its operand
+ // register to the candidate set.
+ if (Opc == PPC::XXMTACC) {
+ Register Acc = BBI.getOperand(0).getReg();
+ assert(PPC::ACCRCRegClass.contains(Acc) &&
+ "Unexpected register for XXMTACC");
+ Candidates[Acc - PPC::ACC0] = &BBI;
+ }
+ // If we are visiting a xxmfacc instruction and its operand register is
+ // in the candidate set, we mark the two instructions for removal.
+ else if (Opc == PPC::XXMFACC) {
+ Register Acc = BBI.getOperand(0).getReg();
+ assert(PPC::ACCRCRegClass.contains(Acc) &&
+ "Unexpected register for XXMFACC");
+ if (!Candidates[Acc - PPC::ACC0])
+ continue;
+ InstrsToErase.insert(&BBI);
+ InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
+ }
+ // If we are visiting an instruction using an accumulator register
+ // as operand, we remove it from the candidate set.
+ else {
+ for (MachineOperand &Operand : BBI.operands()) {
+ if (!Operand.isReg())
+ continue;
+ Register Reg = Operand.getReg();
+ if (PPC::ACCRCRegClass.contains(Reg))
+ Candidates[Reg - PPC::ACC0] = nullptr;
+ }
+ }
+ }
+
+ for (MachineInstr *MI : InstrsToErase)
+ MI->eraseFromParent();
+ NumRemovedInPreEmit += InstrsToErase.size();
+ return !InstrsToErase.empty();
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
// Remove UNENCODED_NOP even when this pass is disabled.
@@ -192,6 +426,8 @@ namespace {
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
Changed |= removeRedundantLIs(MBB, TRI);
+ Changed |= addLinkerOpt(MBB, TRI);
+ Changed |= removeAccPrimeUnprime(MBB);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
if (Opc == PPC::UNENCODED_NOP) {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
deleted file mode 100644
index 6e9042643820..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// The QPX vector registers overlay the scalar floating-point registers, and
-// any scalar floating-point loads splat their value across all vector lanes.
-// Thus, if we have a scalar load followed by a splat, we can remove the splat
-// (i.e. replace the load with a load-and-splat pseudo instruction).
-//
-// This pass must run after anything that might do store-to-load forwarding.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPC.h"
-#include "PPCInstrBuilder.h"
-#include "PPCInstrInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "ppc-qpx-load-splat"
-
-STATISTIC(NumSimplified, "Number of QPX load splats simplified");
-
-namespace {
- struct PPCQPXLoadSplat : public MachineFunctionPass {
- static char ID;
- PPCQPXLoadSplat() : MachineFunctionPass(ID) {
- initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &Fn) override;
-
- StringRef getPassName() const override {
- return "PowerPC QPX Load Splat Simplification";
- }
- };
- char PPCQPXLoadSplat::ID = 0;
-}
-
-INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
- "PowerPC QPX Load Splat Simplification",
- false, false)
-
-FunctionPass *llvm::createPPCQPXLoadSplatPass() {
- return new PPCQPXLoadSplat();
-}
-
-bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
-
- bool MadeChange = false;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-
- for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
- MachineBasicBlock *MBB = &*MFI;
- SmallVector<MachineInstr *, 4> Splats;
-
- for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
- MachineInstr *MI = &*MBBI;
-
- if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
- Splats.clear();
- continue;
- }
-
- // We're looking for a sequence like this:
- // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
- // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
-
- for (auto SI = Splats.begin(); SI != Splats.end();) {
- MachineInstr *SMI = *SI;
- Register SplatReg = SMI->getOperand(0).getReg();
- Register SrcReg = SMI->getOperand(1).getReg();
-
- if (MI->modifiesRegister(SrcReg, TRI)) {
- switch (MI->getOpcode()) {
- default:
- SI = Splats.erase(SI);
- continue;
- case PPC::LFS:
- case PPC::LFD:
- case PPC::LFSU:
- case PPC::LFDU:
- case PPC::LFSUX:
- case PPC::LFDUX:
- case PPC::LFSX:
- case PPC::LFDX:
- case PPC::LFIWAX:
- case PPC::LFIWZX:
- if (SplatReg != SrcReg) {
- // We need to change the load to define the scalar subregister of
- // the QPX splat source register.
- unsigned SubRegIndex =
- TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
- Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
-
- // Substitute both the explicit defined register, and also the
- // implicit def of the containing QPX register.
- MI->getOperand(0).setReg(SplatSubReg);
- MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
- }
-
- SI = Splats.erase(SI);
-
- // If SMI is directly after MI, then MBBI's base iterator is
- // pointing at SMI. Adjust MBBI around the call to erase SMI to
- // avoid invalidating MBBI.
- ++MBBI;
- SMI->eraseFromParent();
- --MBBI;
-
- ++NumSimplified;
- MadeChange = true;
- continue;
- }
- }
-
- // If this instruction defines the splat register, then we cannot move
- // the previous definition above it. If it reads from the splat
- // register, then it must already be alive from some previous
- // definition, and if the splat register is different from the source
- // register, then this definition must not be the load for which we're
- // searching.
- if (MI->modifiesRegister(SplatReg, TRI) ||
- (SrcReg != SplatReg &&
- MI->readsRegister(SplatReg, TRI))) {
- SI = Splats.erase(SI);
- continue;
- }
-
- ++SI;
- }
-
- if (MI->getOpcode() != PPC::QVESPLATI &&
- MI->getOpcode() != PPC::QVESPLATIs &&
- MI->getOpcode() != PPC::QVESPLATIb)
- continue;
- if (MI->getOperand(2).getImm() != 0)
- continue;
-
- // If there are other uses of the scalar value after this, replacing
- // those uses might be non-trivial.
- if (!MI->getOperand(1).isKill())
- continue;
-
- Splats.push_back(MI);
- }
- }
-
- return MadeChange;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 90cc81beb89d..5cee00c61fc1 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -206,9 +206,9 @@ static bool splitMBB(BlockSplitInfo &BSI) {
NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
NewMBB->transferSuccessors(ThisMBB);
if (!ProbOrigTarget.isUnknown()) {
- auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
+ auto MBBI = find(NewMBB->successors(), OrigTarget);
NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
- MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
+ MBBI = find(NewMBB->successors(), OrigFallThrough);
NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index ed8948a63972..178a13443e2a 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -75,6 +75,21 @@ MaxCRBitSpillDist("ppc-max-crbit-spill-dist",
"spill on ppc"),
cl::Hidden, cl::init(100));
+// Copies/moves of physical accumulators are expensive operations
+// that should be avoided whenever possible. MMA instructions are
+// meant to be used in performance-sensitive computational kernels.
+// This option is provided, at least for the time being, to give the
+// user a tool to detect this expensive operation and either rework
+// their code or report a compiler bug if that turns out to be the
+// cause.
+#ifndef NDEBUG
+static cl::opt<bool>
+ReportAccMoves("ppc-report-acc-moves",
+ cl::desc("Emit information about accumulator register spills "
+ "and copies"),
+ cl::Hidden, cl::init(false));
+#endif
+
static unsigned offsetMinAlignForOpcode(unsigned OpC);
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
@@ -141,6 +156,10 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
+ if (Subtarget.isAIXABI() &&
+ (Subtarget.hasAltivec() && !TM.getAIXExtendedAltivecABI()))
+ report_fatal_error("the default AIX Altivec ABI is not yet "
+ "supported.");
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) {
if (!TM.isPPC64() && Subtarget.isAIXABI())
report_fatal_error("AnyReg unimplemented on 32-bit AIX.");
@@ -187,8 +206,11 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return SaveR2 ? CSR_PPC64_R2_SaveList : CSR_PPC64_SaveList;
}
// 32-bit targets.
- if (Subtarget.isAIXABI())
+ if (Subtarget.isAIXABI()) {
+ if (Subtarget.hasAltivec())
+ return CSR_AIX32_Altivec_SaveList;
return CSR_AIX32_SaveList;
+ }
if (Subtarget.hasAltivec())
return CSR_SVR432_Altivec_SaveList;
else if (Subtarget.hasSPE())
@@ -209,8 +231,10 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
}
if (Subtarget.isAIXABI()) {
- assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
- return TM.isPPC64() ? CSR_PPC64_RegMask : CSR_AIX32_RegMask;
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
+ : CSR_PPC64_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_AIX32_Altivec_RegMask
+ : CSR_AIX32_RegMask);
}
if (CC == CallingConv::Cold) {
@@ -404,9 +428,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
- case PPC::QFRCRegClassID:
- case PPC::QSRCRegClassID:
- case PPC::QBRCRegClassID:
case PPC::VRRCRegClassID:
case PPC::VFRCRegClassID:
case PPC::VSLRCRegClassID:
@@ -830,6 +851,16 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
SpillsKnownBit = true;
break;
default:
+ // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all
+ // bits (specifically, it produces a -1 if the CR bit is set). Ultimately,
+ // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit
+ // register), and SETNBC will set this.
+ if (Subtarget.isISA3_1()) {
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg)
+ .addReg(SrcReg, RegState::Undef);
+ break;
+ }
+
// On Power9, we can use SETB to extract the LT bit. This only works for
// the LT bit since SETB produces -1/1/0 for LT/GT/<neither>. So the value
// of the bit we care about (32-bit sign bit) will be set to the value of
@@ -929,54 +960,104 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
- unsigned FrameIndex) const {
- // Get the instruction.
- MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
- // Get the instruction's basic block.
+void PPCRegisterInfo::emitAccCopyInfo(MachineBasicBlock &MBB,
+ MCRegister DestReg, MCRegister SrcReg) {
+#ifdef NDEBUG
+ return;
+#else
+ if (ReportAccMoves) {
+ std::string Dest = PPC::ACCRCRegClass.contains(DestReg) ? "acc" : "uacc";
+ std::string Src = PPC::ACCRCRegClass.contains(SrcReg) ? "acc" : "uacc";
+ dbgs() << "Emitting copy from " << Src << " to " << Dest << ":\n";
+ MBB.dump();
+ }
+#endif
+}
+
+static void emitAccSpillRestoreInfo(MachineBasicBlock &MBB, bool IsPrimed,
+ bool IsRestore) {
+#ifdef NDEBUG
+ return;
+#else
+ if (ReportAccMoves) {
+ dbgs() << "Emitting " << (IsPrimed ? "acc" : "uacc") << " register "
+ << (IsRestore ? "restore" : "spill") << ":\n";
+ MBB.dump();
+ }
+#endif
+}
+
+/// lowerACCSpilling - Generate the code for spilling the accumulator register.
+/// Similarly to other spills/reloads that use pseudo-ops, we do not actually
+/// eliminate the FrameIndex here nor compute the stack offset. We simply
+/// create a real instruction with an FI and rely on eliminateFrameIndex to
+/// handle the FI elimination.
+void PPCRegisterInfo::lowerACCSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ MachineInstr &MI = *II; // SPILL_ACC <SrcReg>, <offset>
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- DebugLoc dl = MI.getDebugLoc();
-
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- Register Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ DebugLoc DL = MI.getDebugLoc();
Register SrcReg = MI.getOperand(0).getReg();
-
- BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
- .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
- addFrameReference(
- BuildMI(MBB, II, dl, TII.get(PPC::STW)).addReg(Reg, RegState::Kill),
- FrameIndex);
+ bool IsKilled = MI.getOperand(0).isKill();
+
+ bool IsPrimed = PPC::ACCRCRegClass.contains(SrcReg);
+ Register Reg =
+ PPC::VSRp0 + (SrcReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2;
+ bool IsLittleEndian = Subtarget.isLittleEndian();
+
+ emitAccSpillRestoreInfo(MBB, IsPrimed, false);
+
+ // De-prime the register being spilled, create two stores for the pair
+ // subregisters accounting for endianness and then re-prime the register if
+ // it isn't killed. This uses the Offset parameter to addFrameReference() to
+ // adjust the offset of the store that is within the 64-byte stack slot.
+ if (IsPrimed)
+ BuildMI(MBB, II, DL, TII.get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(Reg, getKillRegState(IsKilled)),
+ FrameIndex, IsLittleEndian ? 32 : 0);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(Reg + 1, getKillRegState(IsKilled)),
+ FrameIndex, IsLittleEndian ? 0 : 32);
+ if (IsPrimed && !IsKilled)
+ BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
// Discard the pseudo instruction.
MBB.erase(II);
}
-void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
- unsigned FrameIndex) const {
- // Get the instruction.
- MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
- // Get the instruction's basic block.
+/// lowerACCRestore - Generate the code to restore the accumulator register.
+void PPCRegisterInfo::lowerACCRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ MachineInstr &MI = *II; // <DestReg> = RESTORE_ACC <offset>
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- DebugLoc dl = MI.getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- Register Reg = MF.getRegInfo().createVirtualRegister(GPRC);
Register DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
- "RESTORE_VRSAVE does not define its destination");
+ "RESTORE_ACC does not define its destination");
- addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
- Reg), FrameIndex);
+ bool IsPrimed = PPC::ACCRCRegClass.contains(DestReg);
+ Register Reg =
+ PPC::VSRp0 + (DestReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2;
+ bool IsLittleEndian = Subtarget.isLittleEndian();
- BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
- .addReg(Reg, RegState::Kill);
+ emitAccSpillRestoreInfo(MBB, IsPrimed, true);
+
+ // Create two loads for the pair subregisters accounting for endianness and
+ // then prime the accumulator register being restored.
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg),
+ FrameIndex, IsLittleEndian ? 32 : 0);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg + 1),
+ FrameIndex, IsLittleEndian ? 0 : 32);
+ if (IsPrimed)
+ BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), DestReg).addReg(DestReg);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -1113,11 +1194,11 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else if (OpC == PPC::RESTORE_CRBIT) {
lowerCRBitRestore(II, FrameIndex);
return;
- } else if (OpC == PPC::SPILL_VRSAVE) {
- lowerVRSAVESpilling(II, FrameIndex);
+ } else if (OpC == PPC::SPILL_ACC || OpC == PPC::SPILL_UACC) {
+ lowerACCSpilling(II, FrameIndex);
return;
- } else if (OpC == PPC::RESTORE_VRSAVE) {
- lowerVRSAVERestore(II, FrameIndex);
+ } else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) {
+ lowerACCRestore(II, FrameIndex);
return;
}
@@ -1294,10 +1375,9 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
/// Insert defining instruction(s) for BaseReg to
/// be a pointer to FrameIdx at the beginning of the basic block.
-void PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- Register BaseReg,
- int FrameIdx,
- int64_t Offset) const {
+Register PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ int FrameIdx,
+ int64_t Offset) const {
unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
MachineBasicBlock::iterator Ins = MBB->begin();
@@ -1310,10 +1390,14 @@ void PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = getPointerRegClass(MF);
+ Register BaseReg = MRI.createVirtualRegister(RC);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
BuildMI(*MBB, Ins, DL, MCID, BaseReg)
.addFrameIndex(FrameIdx).addImm(Offset);
+
+ return BaseReg;
}
void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 61acd955e1cb..93f330ab56b6 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -119,10 +119,14 @@ public:
unsigned FrameIndex) const;
void lowerCRBitRestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
- void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
- unsigned FrameIndex) const;
- void lowerVRSAVERestore(MachineBasicBlock::iterator II,
- unsigned FrameIndex) const;
+
+ void lowerACCSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerACCRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+
+ static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg,
+ MCRegister SrcReg);
bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
int &FrameIdx) const override;
@@ -132,9 +136,8 @@ public:
// Support for virtual base registers.
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
- int FrameIdx,
- int64_t Offset) const override;
+ Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
+ int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
@@ -151,12 +154,18 @@ public:
/// register name so that only the number is left. Used by for linux asm.
static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
+ case 'a':
+ if (RegName[1] == 'c' && RegName[2] == 'c')
+ return RegName + 3;
+ break;
case 'r':
case 'f':
- case 'q': // for QPX
case 'v':
- if (RegName[1] == 's')
+ if (RegName[1] == 's') {
+ if (RegName[2] == 'p')
+ return RegName + 3;
return RegName + 2;
+ }
return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index b45757c1acc5..551735c85b51 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -16,6 +16,10 @@ def sub_eq : SubRegIndex<1, 2>;
def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
def sub_64 : SubRegIndex<64>;
+def sub_vsx0 : SubRegIndex<128>;
+def sub_vsx1 : SubRegIndex<128, 128>;
+def sub_pair0 : SubRegIndex<256>;
+def sub_pair1 : SubRegIndex<256, 256>;
}
@@ -54,13 +58,6 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
-// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
-class QFPR<FPR SubReg, string n> : PPCReg<n> {
- let HWEncoding = SubReg.HWEncoding;
- let SubRegs = [SubReg];
- let SubRegIndices = [sub_64];
-}
-
// VF - One of the 32 64-bit floating-point subregisters of the vector
// registers (used by VSX).
class VF<bits<5> num, string n> : PPCReg<n> {
@@ -101,6 +98,27 @@ class CRBIT<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
+// ACC - One of the 8 512-bit VSX accumulators.
+class ACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{2-0} = num;
+ let SubRegs = subregs;
+}
+
+// UACC - One of the 8 512-bit VSX accumulators prior to being primed.
+// Without using this register class, the register allocator has no way to
+// differentiate a primed accumulator from an unprimed accumulator.
+// This may result in invalid copies between primed and unprimed accumulators.
+class UACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{2-0} = num;
+ let SubRegs = subregs;
+}
+
+// VSR Pairs - One of the 32 paired even-odd consecutive VSRs.
+class VSRPair<bits<5> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+ let SubRegs = subregs;
+}
+
// General-purpose registers
foreach Index = 0-31 in {
def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
@@ -132,12 +150,6 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
-// QPX Floating-point registers
-foreach Index = 0-31 in {
- def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
- DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
-}
-
// Vector registers
foreach Index = 0-31 in {
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
@@ -156,6 +168,23 @@ foreach Index = 32-63 in {
def VSX#Index : VSXReg<Index, "vs"#Index>;
}
+let SubRegIndices = [sub_vsx0, sub_vsx1] in {
+ // VSR pairs 0 - 15 (corresponding to VSRs 0 - 30 paired with 1 - 31).
+ foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
+ def VSRp#!srl(Index, 1) : VSRPair<!srl(Index, 1), "vsp"#Index,
+ [!cast<VSRL>("VSL"#Index), !cast<VSRL>("VSL"#!add(Index, 1))]>,
+ DwarfRegNum<[-1, -1]>;
+ }
+
+ // VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63).
+ foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
+ def VSRp#!add(!srl(Index, 1), 16) :
+ VSRPair<!add(!srl(Index, 1), 16), "vsp"#!add(Index, 32),
+ [!cast<VR>("V"#Index), !cast<VR>("V"#!add(Index, 1))]>,
+ DwarfRegNum<[-1, -1]>;
+ }
+}
+
// The representation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">, DwarfRegAlias<R0>;
def ZERO8 : GP8<ZERO, "0">, DwarfRegAlias<X0>;
@@ -343,16 +372,6 @@ def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC,
// Register class for single precision scalars in VSX registers
def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
-// For QPX
-def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
- (sequence "QF%u", 31, 14))>;
-def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
-def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
- // These are actually stored as floating-point values where a positive
- // number is true and anything else (including NaN) is false.
- let Size = 256;
-}
-
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
@@ -390,8 +409,56 @@ def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
let isAllocatable = 0;
}
+def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> {
+ let isAllocatable = 0;
+}
+def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> {
+ let isAllocatable = 0;
+}
+
def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> {
let CopyCost = -1;
}
+let SubRegIndices = [sub_pair0, sub_pair1] in {
+ def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
+ def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
+ def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
+ def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
+ def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
+ def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
+ def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
+ def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
+}
+def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
+ ACC4, ACC5, ACC6, ACC7)> {
+ let Size = 512;
+}
+
+let SubRegIndices = [sub_pair0, sub_pair1] in {
+ def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
+ def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
+ def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
+ def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
+ def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
+ def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
+ def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
+ def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
+}
+def UACCRC : RegisterClass<"PPC", [v512i1], 128,
+ (add UACC0, UACC1, UACC2, UACC3,
+ UACC4, UACC5, UACC6, UACC7)> {
+ let Size = 512;
+}
+
+// Allocate in the same order as the underlying VSX registers.
+def VSRpRC :
+ RegisterClass<"PPC", [v256i1], 128,
+ (add (sequence "VSRp%u", 0, 6),
+ (sequence "VSRp%u", 15, 7), VSRp17, VSRp18,
+ VSRp16, VSRp19, VSRp20, VSRp21, VSRp22, VSRp23,
+ VSRp24, VSRp25, VSRp31, VSRp30, VSRp29, VSRp28,
+ VSRp27, VSRp26)> {
+ let Size = 256;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 0a1ae7e55b3c..571cc219ff2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -40,12 +40,11 @@ def P9Model : SchedMachineModel {
let CompleteModel = 1;
- // Do not support QPX (Quad Processing eXtension), SPE (Signal Processing
- // Engine), prefixed instructions on Power 9, PC relative mem ops, or
- // instructions introduced in ISA 3.1.
- let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops,
- IsISA3_1];
-
+ // Do not support SPE (Signal Processing Engine), prefixed instructions on
+ // Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions
+ // introduced in ISA 3.1.
+ let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA,
+ PCRelativeMemops, IsISA3_1];
}
let SchedModel = P9Model in {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 3836cc960394..d31195f67ef1 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -11,9 +11,13 @@
//===----------------------------------------------------------------------===//
#include "PPCSubtarget.h"
+#include "GISel/PPCCallLowering.h"
+#include "GISel/PPCLegalizerInfo.h"
+#include "GISel/PPCRegisterBankInfo.h"
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
@@ -35,10 +39,6 @@ using namespace llvm;
static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
-static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
- cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
- cl::Hidden);
-
static cl::opt<bool>
EnableMachinePipeliner("ppc-enable-pipeliner",
cl::desc("Enable Machine Pipeliner for PPC"),
@@ -53,11 +53,19 @@ PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const PPCTargetMachine &TM)
- : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ : PPCGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TargetTriple(TT),
IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
TargetTriple.getArch() == Triple::ppc64le),
TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)),
- InstrInfo(*this), TLInfo(TM, *this) {}
+ InstrInfo(*this), TLInfo(TM, *this) {
+ CallLoweringInfo.reset(new PPCCallLowering(*getTargetLowering()));
+ Legalizer.reset(new PPCLegalizerInfo(*this));
+ auto *RBI = new PPCRegisterBankInfo(*getRegisterInfo());
+ RegBankInfo.reset(RBI);
+
+ InstSelector.reset(createPPCInstructionSelector(
+ *static_cast<const PPCTargetMachine *>(&TM), *this, *RBI));
+}
void PPCSubtarget::initializeEnvironment() {
StackAlignment = Align(16);
@@ -69,8 +77,8 @@ void PPCSubtarget::initializeEnvironment() {
HasHardFloat = false;
HasAltivec = false;
HasSPE = false;
+ HasEFPU2 = false;
HasFPU = false;
- HasQPX = false;
HasVSX = false;
NeedsTwoConstNR = false;
HasP8Vector = false;
@@ -78,6 +86,7 @@ void PPCSubtarget::initializeEnvironment() {
HasP8Crypto = false;
HasP9Vector = false;
HasP9Altivec = false;
+ HasMMA = false;
HasP10Vector = false;
HasPrefixInstrs = false;
HasPCRelativeMemops = false;
@@ -109,10 +118,10 @@ void PPCSubtarget::initializeEnvironment() {
HasInvariantFunctionDescriptors = false;
HasPartwordAtomics = false;
HasDirectMove = false;
- IsQPXStackUnaligned = false;
HasHTM = false;
HasFloat128 = false;
HasFusion = false;
+ HasStoreFusion = false;
HasAddiLoadFusion = false;
HasAddisLoadFusion = false;
IsISA3_0 = false;
@@ -122,7 +131,10 @@ void PPCSubtarget::initializeEnvironment() {
VectorsUseTwoUnits = false;
UsePPCPreRASchedStrategy = false;
UsePPCPostRASchedStrategy = false;
+ PairedVectorMemops = false;
PredictableSelectIsExpensive = false;
+ HasModernAIXAs = false;
+ IsAIX = false;
HasPOPCNTD = POPCNTD_Unavailable;
}
@@ -144,7 +156,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
InstrItins = getInstrItineraryForCPU(CPUName);
// Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
+ ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);
// If the user requested use of 64-bit regs, but the cpu selected doesn't
// support it, ignore.
@@ -158,7 +170,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (HasSPE && IsPPC64)
report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
- if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
+ if (HasSPE && (HasAltivec || HasVSX || HasFPU))
report_fatal_error(
"SPE and traditional floating point cannot both be enabled.\n", false);
@@ -166,15 +178,12 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (!HasSPE)
HasFPU = true;
- // QPX requires a 32-byte aligned stack. Note that we need to do this if
- // we're compiling for a BG/Q system regardless of whether or not QPX
- // is enabled because external functions will assume this alignment.
- IsQPXStackUnaligned = QPXStackUnaligned;
StackAlignment = getPlatformStackAlignment();
// Determine endianness.
// FIXME: Part of the TargetMachine.
- IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
+ IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le ||
+ TargetTriple.getArch() == Triple::ppcle);
}
bool PPCSubtarget::enableMachineScheduler() const { return true; }
@@ -235,3 +244,20 @@ bool PPCSubtarget::isUsingPCRelativeCalls() const {
return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() &&
CodeModel::Medium == getTargetMachine().getCodeModel();
}
+
+// GlobalISEL
+const CallLowering *PPCSubtarget::getCallLowering() const {
+ return CallLoweringInfo.get();
+}
+
+const RegisterBankInfo *PPCSubtarget::getRegBankInfo() const {
+ return RegBankInfo.get();
+}
+
+const LegalizerInfo *PPCSubtarget::getLegalizerInfo() const {
+ return Legalizer.get();
+}
+
+InstructionSelector *PPCSubtarget::getInstructionSelector() const {
+ return InstSelector.get();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
index ec329022c457..50d89390d5bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -17,6 +17,9 @@
#include "PPCISelLowering.h"
#include "PPCInstrInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -97,7 +100,7 @@ protected:
bool HasAltivec;
bool HasFPU;
bool HasSPE;
- bool HasQPX;
+ bool HasEFPU2;
bool HasVSX;
bool NeedsTwoConstNR;
bool HasP8Vector;
@@ -108,6 +111,7 @@ protected:
bool HasP10Vector;
bool HasPrefixInstrs;
bool HasPCRelativeMemops;
+ bool HasMMA;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -137,6 +141,7 @@ protected:
bool HasHTM;
bool HasFloat128;
bool HasFusion;
+ bool HasStoreFusion;
bool HasAddiLoadFusion;
bool HasAddisLoadFusion;
bool IsISA3_0;
@@ -146,21 +151,25 @@ protected:
bool VectorsUseTwoUnits;
bool UsePPCPreRASchedStrategy;
bool UsePPCPostRASchedStrategy;
+ bool PairedVectorMemops;
bool PredictableSelectIsExpensive;
+ bool HasModernAIXAs;
+ bool IsAIX;
POPCNTDKind HasPOPCNTD;
- /// When targeting QPX running a stock PPC64 Linux kernel where the stack
- /// alignment has not been changed, we need to keep the 16-byte alignment
- /// of the stack.
- bool IsQPXStackUnaligned;
-
const PPCTargetMachine &TM;
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
PPCTargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
+ /// GlobalISel related APIs.
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
@@ -170,16 +179,13 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
Align getStackAlignment() const { return StackAlignment; }
- /// getDarwinDirective - Returns the -m directive specified for the cpu.
- unsigned getDarwinDirective() const { return CPUDirective; }
-
/// getCPUDirective - Returns the -m directive specified for the cpu.
///
unsigned getCPUDirective() const { return CPUDirective; }
@@ -254,8 +260,8 @@ public:
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
bool hasSPE() const { return HasSPE; }
+ bool hasEFPU2() const { return HasEFPU2; }
bool hasFPU() const { return HasFPU; }
- bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
bool needsTwoConstNR() const { return NeedsTwoConstNR; }
bool hasP8Vector() const { return HasP8Vector; }
@@ -266,6 +272,8 @@ public:
bool hasP10Vector() const { return HasP10Vector; }
bool hasPrefixInstrs() const { return HasPrefixInstrs; }
bool hasPCRelativeMemops() const { return HasPCRelativeMemops; }
+ bool hasMMA() const { return HasMMA; }
+ bool pairedVectorMemops() const { return PairedVectorMemops; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasBPERMD() const { return HasBPERMD; }
@@ -291,11 +299,7 @@ public:
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool hasDirectMove() const { return HasDirectMove; }
- bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
Align getPlatformStackAlignment() const {
- if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
- return Align(32);
-
return Align(16);
}
@@ -315,6 +319,7 @@ public:
bool isISA3_1() const { return IsISA3_1; }
bool useLongCalls() const { return UseLongCalls; }
bool hasFusion() const { return HasFusion; }
+ bool hasStoreFusion() const { return HasStoreFusion; }
bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
bool needsSwapsForVSXMemOps() const {
@@ -325,9 +330,6 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
- /// isBGQ - True if this is a BG/Q platform.
- bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
-
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
@@ -404,6 +406,12 @@ public:
bool isPredictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
}
+
+ // GlobalISEL
+ const CallLowering *getCallLowering() const override;
+ const RegisterBankInfo *getRegBankInfo() const override;
+ const LegalizerInfo *getLegalizerInfo() const override;
+ InstructionSelector *getInstructionSelector() const override;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 4b809e0c8553..43dcc5844c4e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -50,16 +50,17 @@ protected:
bool Changed = false;
bool NeedFence = true;
bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
+ bool IsPCREL = false;
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE;) {
MachineInstr &MI = *I;
+ IsPCREL = isPCREL(MI);
if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
- MI.getOpcode() != PPC::ADDItlsldLADDR32) {
-
+ MI.getOpcode() != PPC::ADDItlsldLADDR32 && !IsPCREL) {
// Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
// as scheduling fences, we skip creating fences if we already
// have existing ADJCALLSTACKDOWN/UP to avoid nesting,
@@ -76,12 +77,16 @@ protected:
LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI);
Register OutReg = MI.getOperand(0).getReg();
- Register InReg = MI.getOperand(1).getReg();
- DebugLoc DL = MI.getDebugLoc();
+ Register InReg = PPC::NoRegister;
Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
- unsigned Opc1, Opc2;
- const Register OrigRegs[] = {OutReg, InReg, GPR3};
+ SmallVector<Register, 3> OrigRegs = {OutReg, GPR3};
+ if (!IsPCREL) {
+ InReg = MI.getOperand(1).getReg();
+ OrigRegs.push_back(InReg);
+ }
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Opc1, Opc2;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Opcode inconsistency error");
@@ -101,6 +106,13 @@ protected:
Opc1 = PPC::ADDItlsldL32;
Opc2 = PPC::GETtlsldADDR32;
break;
+ case PPC::PADDI8pc:
+ assert(IsPCREL && "Expecting General/Local Dynamic PCRel");
+ Opc1 = PPC::PADDI8pc;
+ Opc2 = MI.getOperand(2).getTargetFlags() ==
+ PPCII::MO_GOT_TLSGD_PCREL_FLAG
+ ? PPC::GETtlsADDRPCREL
+ : PPC::GETtlsldADDRPCREL;
}
// We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr
@@ -113,9 +125,15 @@ protected:
BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
.addImm(0);
- // Expand into two ops built prior to the existing instruction.
- MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
- .addReg(InReg);
+ MachineInstr *Addi;
+ if (IsPCREL) {
+ Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0);
+ } else {
+ // Expand into two ops built prior to the existing instruction.
+ assert(InReg != PPC::NoRegister && "Operand must be a register");
+ Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addReg(InReg);
+ }
+
Addi->addOperand(MI.getOperand(2));
// The ADDItls* instruction is the first instruction in the
@@ -125,7 +143,10 @@ protected:
MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
.addReg(GPR3));
- Call->addOperand(MI.getOperand(3));
+ if (IsPCREL)
+ Call->addOperand(MI.getOperand(2));
+ else
+ Call->addOperand(MI.getOperand(3));
if (NeedFence)
BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
@@ -150,6 +171,14 @@ protected:
}
public:
+ bool isPCREL(const MachineInstr &MI) {
+ return (MI.getOpcode() == PPC::PADDI8pc) &&
+ (MI.getOperand(2).getTargetFlags() ==
+ PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
+ MI.getOperand(2).getTargetFlags() ==
+ PPCII::MO_GOT_TLSLD_PCREL_FLAG);
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index f15f9c7f4942..0634833e64dc 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -24,12 +24,18 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
@@ -64,10 +70,6 @@ opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
cl::desc("Disable VSX Swap Removal for PPC"));
static cl::
-opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
- cl::desc("Disable QPX load splat simplification"));
-
-static cl::
opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
cl::desc("Disable machine peepholes for PPC"));
@@ -98,8 +100,9 @@ static cl::opt<bool>
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
- RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
- RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
+ RegisterTargetMachine<PPCTargetMachine> B(getThePPC32LETarget());
+ RegisterTargetMachine<PPCTargetMachine> C(getThePPC64Target());
+ RegisterTargetMachine<PPCTargetMachine> D(getThePPC64LETarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
#ifndef NDEBUG
@@ -114,13 +117,13 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
initializePPCReduceCRLogicalsPass(PR);
initializePPCBSelPass(PR);
initializePPCBranchCoalescingPass(PR);
- initializePPCQPXLoadSplatPass(PR);
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
initializePPCPreEmitPeepholePass(PR);
initializePPCTLSDynamicCallPass(PR);
initializePPCMIPeepholePass(PR);
initializePPCLowerMASSVEntriesPass(PR);
+ initializeGlobalISel(PR);
}
/// Return the datalayout string of a subtarget.
@@ -128,8 +131,8 @@ static std::string getDataLayoutString(const Triple &T) {
bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
std::string Ret;
- // Most PPC* platforms are big endian, PPC64LE is little endian.
- if (T.getArch() == Triple::ppc64le)
+ // Most PPC* platforms are big endian, PPC(64)LE is little endian.
+ if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle)
Ret = "e";
else
Ret = "E";
@@ -143,10 +146,7 @@ static std::string getDataLayoutString(const Triple &T) {
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
// documentation are wrong; these are correct (i.e. "what gcc does").
- if (is64Bit || !T.isOSDarwin())
- Ret += "-i64:64";
- else
- Ret += "-f64:32:64";
+ Ret += "-i64:64";
// PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
if (is64Bit)
@@ -154,6 +154,13 @@ static std::string getDataLayoutString(const Triple &T) {
else
Ret += "-n32";
+ // Specify the vector alignment explicitly. For v256i1 and v512i1, the
+ // calculated alignment would be 256*alignment(i1) and 512*alignment(i1),
+ // which is 256 and 512 bytes - way over aligned.
+ if ((T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppc64) &&
+ (T.isOSAIX() || T.isOSLinux()))
+ Ret += "-v256:256:256-v512:512:512";
+
return Ret;
}
@@ -183,13 +190,17 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
FullFS = "+invariant-function-descriptors";
}
+ if (TT.isOSAIX()) {
+ if (!FullFS.empty())
+ FullFS = "+aix," + FullFS;
+ else
+ FullFS = "+aix";
+ }
+
return FullFS;
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
- if (TT.isOSDarwin())
- return std::make_unique<TargetLoweringObjectFileMachO>();
-
if (TT.isOSAIX())
return std::make_unique<TargetLoweringObjectFileXCOFF>();
@@ -198,9 +209,6 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
const TargetOptions &Options) {
- if (TT.isOSDarwin())
- report_fatal_error("Darwin is no longer supported for PowerPC");
-
if (Options.MCOptions.getABIName().startswith("elfv1"))
return PPCTargetMachine::PPC_ABI_ELFv1;
else if (Options.MCOptions.getABIName().startswith("elfv2"))
@@ -230,10 +238,6 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
if (RM.hasValue())
return *RM;
- // Darwin defaults to dynamic-no-pic.
- if (TT.isOSDarwin())
- return Reloc::DynamicNoPIC;
-
// Big Endian PPC and AIX default to PIC.
if (TT.getArch() == Triple::ppc64 || TT.isOSAIX())
return Reloc::PIC_;
@@ -276,6 +280,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
std::make_unique<GenericScheduler>(C));
// add DAG Mutations here.
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.hasStoreFusion())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.hasFusion())
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
@@ -290,6 +296,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
std::make_unique<PPCPostRASchedStrategy>(C) :
std::make_unique<PostGenericScheduler>(C), true);
// add DAG Mutations here.
+ if (ST.hasStoreFusion())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.hasFusion())
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
return DAG;
@@ -321,12 +329,10 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
// FIXME: This is related to the code below to reset the target options,
// we need to know whether or not the soft float flag is set on the
@@ -388,6 +394,12 @@ public:
void addPreRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+ // GlobalISEL
+ bool addIRTranslator() override;
+ bool addLegalizeMachineIR() override;
+ bool addRegBankSelect() override;
+ bool addGlobalInstructionSelect() override;
+
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
return createPPCMachineScheduler(C);
@@ -411,14 +423,9 @@ void PPCPassConfig::addIRPasses() {
// Lower generic MASSV routines to PowerPC subtarget-specific entries.
addPass(createPPCLowerMASSVEntriesPass());
-
- // For the BG/Q (or if explicitly requested), add explicit data prefetch
- // intrinsics.
- bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
- getOptLevel() != CodeGenOpt::None;
+
+ // If explicitly requested, add explicit data prefetch intrinsics.
if (EnablePrefetch.getNumOccurrences() > 0)
- UsePrefetching = EnablePrefetch;
- if (UsePrefetching)
addPass(createLoopDataPrefetchPass());
if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
@@ -515,15 +522,8 @@ void PPCPassConfig::addPreRegAlloc() {
}
void PPCPassConfig::addPreSched2() {
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
-
- // This optimization must happen after anything that might do store-to-load
- // forwarding. Here we're after RA (and, thus, when spills are inserted)
- // but before post-RA scheduling.
- if (!DisableQPXLoadSplat)
- addPass(createPPCQPXLoadSplatPass());
- }
}
void PPCPassConfig::addPreEmitPass() {
@@ -550,3 +550,24 @@ static MachineSchedRegistry
PPCPostRASchedRegistry("ppc-postra",
"Run PowerPC PostRA specific scheduler",
createPPCPostMachineScheduler);
+
+// Global ISEL
+bool PPCPassConfig::addIRTranslator() {
+ addPass(new IRTranslator());
+ return false;
+}
+
+bool PPCPassConfig::addLegalizeMachineIR() {
+ addPass(new Legalizer());
+ return false;
+}
+
+bool PPCPassConfig::addRegBankSelect() {
+ addPass(new RegBankSelect());
+ return false;
+}
+
+bool PPCPassConfig::addGlobalInstructionSelect() {
+ addPass(new InstructionSelect());
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index fd1d14ae32d4..21faa4e710e3 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -58,6 +58,11 @@ public:
const Triple &TT = getTargetTriple();
return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
};
+
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index dc10dd80c8fa..c90ff8b7d59d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -8,13 +8,19 @@
#include "PPCTargetTransformInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
#define DEBUG_TYPE "ppctti"
@@ -22,8 +28,7 @@ using namespace llvm;
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
-// This is currently only used for the data prefetch pass which is only enabled
-// for BG/Q by default.
+// This is currently only used for the data prefetch pass
static cl::opt<unsigned>
CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
cl::desc("The loop prefetch cache line size"));
@@ -59,6 +64,109 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
+Optional<Instruction *>
+PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
+ Value *Ptr = IC.Builder.CreateBitCast(
+ II.getArgOperand(0), PointerType::getUnqual(II.getType()));
+ return new LoadInst(II.getType(), Ptr, "", false, Align(16));
+ }
+ break;
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x: {
+ // Turn PPC VSX loads into normal loads.
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
+ PointerType::getUnqual(II.getType()));
+ return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
+ Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
+ }
+ break;
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x: {
+ // Turn PPC VSX stores into normal stores.
+ Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
+ }
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ // Note that ppc_altivec_vperm has a big-endian bias, so when creating
+ // a vectorshuffle for little endian, we must undo the transformation
+ // performed on vec_perm in altivec.h. That is, we must complement
+ // the permutation mask with respect to 31 and reverse the order of
+ // V1 and V2.
+ if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
+ assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
+ "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ Constant *Elt = Mask->getAggregateElement(i);
+ if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
+ AllEltsOk = false;
+ break;
+ }
+ }
+
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 =
+ IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
+ Value *Op1 =
+ IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
+
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getAggregateElement(i)))
+ continue;
+ unsigned Idx =
+ cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
+ if (DL.isLittleEndian())
+ Idx = 31 - Idx;
+
+ if (!ExtractedElts[Idx]) {
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
+ ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
+ Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
+ }
+
+ // Insert this value into the result vector.
+ Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
+ IC.Builder.getInt32(i));
+ }
+ return CastInst::Create(Instruction::BitCast, Result, II.getType());
+ }
+ }
+ break;
+ }
+ return None;
+}
+
int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist)
@@ -126,9 +234,10 @@ int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
+ return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
assert(Ty->isIntegerTy());
@@ -274,8 +383,34 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
return false;
};
+ auto supportedHalfPrecisionOp = [](Instruction *Inst) {
+ switch (Inst->getOpcode()) {
+ default:
+ return false;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::FPToUI:
+ case Instruction::UIToFP:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ return true;
+ }
+ };
+
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
+ // There are no direct operations on half precision so assume that
+ // anything with that type requires a call except for a few select
+ // operations with Power9.
+ if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
+ for (const auto &Op : CurrInst->operands()) {
+ if (Op->getType()->getScalarType()->isHalfTy() ||
+ CurrInst->getType()->getScalarType()->isHalfTy())
+ return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
+ }
+ }
if (CallInst *CI = dyn_cast<CallInst>(J)) {
// Inline ASM is okay, unless it clobbers the ctr register.
if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
@@ -297,6 +432,30 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::loop_decrement:
return true;
+ // Binary operations on 128-bit value will use CTR.
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ if (F->getType()->getScalarType()->isFP128Ty() ||
+ F->getType()->getScalarType()->isPPC_FP128Ty())
+ return true;
+ break;
+
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
+ case Intrinsic::experimental_constrained_sitofp:
+ case Intrinsic::experimental_constrained_uitofp: {
+ Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
+ Type *DstType = CI->getType()->getScalarType();
+ if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
+ isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
+ isLargeIntegerTy(!TM.isPPC64(), DstType))
+ return true;
+ break;
+ }
+
// Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
// because, although it does clobber the counter register, the
// control can't then return to inside the loop unless there is also
@@ -315,6 +474,15 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::pow:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::experimental_constrained_powi:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_exp:
+ case Intrinsic::experimental_constrained_exp2:
+ case Intrinsic::experimental_constrained_pow:
+ case Intrinsic::experimental_constrained_sin:
+ case Intrinsic::experimental_constrained_cos:
return true;
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
@@ -336,6 +504,54 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::llround: Opcode = ISD::LLROUND; break;
case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::experimental_constrained_fcmp:
+ Opcode = ISD::STRICT_FSETCC;
+ break;
+ case Intrinsic::experimental_constrained_fcmps:
+ Opcode = ISD::STRICT_FSETCCS;
+ break;
+ case Intrinsic::experimental_constrained_fma:
+ Opcode = ISD::STRICT_FMA;
+ break;
+ case Intrinsic::experimental_constrained_sqrt:
+ Opcode = ISD::STRICT_FSQRT;
+ break;
+ case Intrinsic::experimental_constrained_floor:
+ Opcode = ISD::STRICT_FFLOOR;
+ break;
+ case Intrinsic::experimental_constrained_ceil:
+ Opcode = ISD::STRICT_FCEIL;
+ break;
+ case Intrinsic::experimental_constrained_trunc:
+ Opcode = ISD::STRICT_FTRUNC;
+ break;
+ case Intrinsic::experimental_constrained_rint:
+ Opcode = ISD::STRICT_FRINT;
+ break;
+ case Intrinsic::experimental_constrained_lrint:
+ Opcode = ISD::STRICT_LRINT;
+ break;
+ case Intrinsic::experimental_constrained_llrint:
+ Opcode = ISD::STRICT_LLRINT;
+ break;
+ case Intrinsic::experimental_constrained_nearbyint:
+ Opcode = ISD::STRICT_FNEARBYINT;
+ break;
+ case Intrinsic::experimental_constrained_round:
+ Opcode = ISD::STRICT_FROUND;
+ break;
+ case Intrinsic::experimental_constrained_lround:
+ Opcode = ISD::STRICT_LROUND;
+ break;
+ case Intrinsic::experimental_constrained_llround:
+ Opcode = ISD::STRICT_LLROUND;
+ break;
+ case Intrinsic::experimental_constrained_minnum:
+ Opcode = ISD::STRICT_FMINNUM;
+ break;
+ case Intrinsic::experimental_constrained_maxnum:
+ Opcode = ISD::STRICT_FMAXNUM;
+ break;
case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
}
@@ -597,10 +813,7 @@ bool PPCTTIImpl::useColdCCForColdCall(Function &F) {
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
- // On the A2, always unroll aggressively. For QPX unaligned loads, we depend
- // on combining the loads generated for consecutive accesses, and failure to
- // do so is particularly expensive. This makes it much more likely (compared
- // to only using concatenation unrolling).
+ // On the A2, always unroll aggressively.
if (ST->getCPUDirective() == PPC::DIR_A2)
return true;
@@ -660,7 +873,6 @@ const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector) {
- if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
return 0;
}
@@ -689,8 +901,6 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
}
unsigned PPCTTIImpl::getPrefetchDistance() const {
- // This seems like a reasonable default for the BG/Q (this pass is enabled, by
- // default, only on the BG/Q).
return 300;
}
@@ -779,7 +989,7 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
@@ -796,11 +1006,12 @@ int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
}
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
- int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src);
// TODO: Allow non-throughput costs that aren't binary.
if (CostKind != TTI::TCK_RecipThroughput)
@@ -809,9 +1020,11 @@ int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
}
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
- int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ int Cost =
+ BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
@@ -835,13 +1048,6 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return Cost;
- } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
- // Floating point scalars are already located in index #0.
- if (Index == 0)
- return 0;
-
- return Cost;
-
} else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
if (ST->hasP9Altivec()) {
if (ISD == ISD::INSERT_VECTOR_ELT)
@@ -865,7 +1071,7 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
// The cost of the load constant for a vector extract is disregarded
// (invariant, easily schedulable).
return vectorCostAdjustment(1, Opcode, Val, nullptr);
-
+
} else if (ST->hasDirectMove())
// Assume permute has standard cost.
// Assume move-to/move-from VSR have 2x standard cost.
@@ -916,8 +1122,6 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
bool IsVSXType = ST->hasVSX() &&
(LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
- bool IsQPXType = ST->hasQPX() &&
- (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
// VSX has 32b/64b load instructions. Legalization can handle loading of
// 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
@@ -940,8 +1144,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// for Altivec types using the VSX instructions, but that's more expensive
// than using the permutation-based load sequence. On the P8, that's no
// longer true.
- if (Opcode == Instruction::Load &&
- ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
+ if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
*Alignment >= LT.second.getScalarType().getStoreSize())
return Cost + LT.first; // Add the cost of the permutations.
@@ -994,7 +1197,7 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
CostKind);
- // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). For each result vector, we need one shuffle per incoming
// vector (except that the first shuffle can take two incoming vectors
@@ -1009,6 +1212,27 @@ unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
+bool PPCTTIImpl::areFunctionArgsABICompatible(
+ const Function *Caller, const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const {
+
+ // We need to ensure that argument promotion does not
+ // attempt to promote pointers to MMA types (__vector_pair
+ // and __vector_quad) since these types explicitly cannot be
+ // passed as arguments. Both of these types are larger than
+ // the 128-bit Altivec vectors and have a scalar size of 1 bit.
+ if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
+ return false;
+
+ return llvm::none_of(Args, [](Argument *A) {
+ auto *EltTy = cast<PointerType>(A->getType())->getElementType();
+ if (EltTy->isSized())
+ return (EltTy->isIntOrIntVectorTy(1) &&
+ EltTy->getPrimitiveSizeInBits() > 128);
+ return false;
+ });
+}
+
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
@@ -1044,3 +1268,51 @@ bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
else
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
}
+
+bool PPCTTIImpl::isNumRegsMajorCostOfLSR() {
+ return false;
+}
+
+bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
+ MemIntrinsicInfo &Info) {
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_altivec_lvebx:
+ case Intrinsic::ppc_altivec_lvehx:
+ case Intrinsic::ppc_altivec_lvewx:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x_be:
+ case Intrinsic::ppc_vsx_lxvw4x_be:
+ case Intrinsic::ppc_vsx_lxvl:
+ case Intrinsic::ppc_vsx_lxvll:
+ case Intrinsic::ppc_vsx_lxvp: {
+ Info.PtrVal = Inst->getArgOperand(0);
+ Info.ReadMem = true;
+ Info.WriteMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_altivec_stvebx:
+ case Intrinsic::ppc_altivec_stvehx:
+ case Intrinsic::ppc_altivec_stvewx:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x_be:
+ case Intrinsic::ppc_vsx_stxvw4x_be:
+ case Intrinsic::ppc_vsx_stxvl:
+ case Intrinsic::ppc_vsx_stxvll:
+ case Intrinsic::ppc_vsx_stxvp: {
+ Info.PtrVal = Inst->getArgOperand(1);
+ Info.ReadMem = false;
+ Info.WriteMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index d998521084e1..c38ae90bc7dc 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -41,6 +41,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+
/// \name Scalar TTI Implementations
/// @{
@@ -49,7 +52,8 @@ public:
TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind);
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
@@ -64,12 +68,14 @@ public:
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo);
+ bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
+ bool isNumRegsMajorCostOfLSR();
/// @}
@@ -103,10 +109,11 @@ public:
const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::TargetCostKind CostKind,
+ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
@@ -122,6 +129,9 @@ public:
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
+ bool areFunctionArgsABICompatible(const Function *Caller,
+ const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const;
/// @}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 3e6d1c7939f1..e72e29112da7 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -315,9 +315,9 @@ protected:
// Extend the live interval of the addend source (it might end at the
// copy to be removed, or somewhere in between there and here). This
// is necessary only if it is a physical register.
- if (!Register::isVirtualRegister(AddendSrcReg))
- for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid();
- ++Units) {
+ if (!AddendSrcReg.isVirtual())
+ for (MCRegUnitIterator Units(AddendSrcReg.asMCReg(), TRI);
+ Units.isValid(); ++Units) {
unsigned Unit = *Units;
LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index c3729da0b07b..ff251f55afff 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -254,10 +254,11 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (isAnyVecReg(Reg, Partial)) {
+ // All operands need to be checked because there are instructions that
+ // operate on a partial register and produce a full register (such as
+ // XXPERMDIs).
+ if (isAnyVecReg(Reg, Partial))
RelevantInstr = true;
- break;
- }
}
if (!RelevantInstr)
@@ -689,6 +690,29 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
LLVM_DEBUG(UseMI.dump());
LLVM_DEBUG(dbgs() << "\n");
}
+
+ // It is possible that the load feeds a swap and that swap feeds a
+ // store. In such a case, the code is actually trying to store a swapped
+ // vector. We must reject such webs.
+ if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad &&
+ !SwapVector[UseIdx].IsStore) {
+ Register SwapDefReg = UseMI.getOperand(0).getReg();
+ for (MachineInstr &UseOfUseMI :
+ MRI->use_nodbg_instructions(SwapDefReg)) {
+ int UseOfUseIdx = SwapMap[&UseOfUseMI];
+ if (SwapVector[UseOfUseIdx].IsStore) {
+ SwapVector[Repr].WebRejected = 1;
+ LLVM_DEBUG(
+ dbgs() << format(
+ "Web %d rejected for load/swap feeding a store\n", Repr));
+ LLVM_DEBUG(dbgs() << " def " << EntryIdx << ": ");
+ LLVM_DEBUG(MI->dump());
+ LLVM_DEBUG(dbgs() << " use " << UseIdx << ": ");
+ LLVM_DEBUG(UseMI.dump());
+ LLVM_DEBUG(dbgs() << "\n");
+ }
+ }
+ }
}
// Reject webs that contain swapping stores that are fed by something
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 649bd648a6cf..6bb952f27fee 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -14,6 +14,10 @@ Target &llvm::getThePPC32Target() {
static Target ThePPC32Target;
return ThePPC32Target;
}
+Target &llvm::getThePPC32LETarget() {
+ static Target ThePPC32LETarget;
+ return ThePPC32LETarget;
+}
Target &llvm::getThePPC64Target() {
static Target ThePPC64Target;
return ThePPC64Target;
@@ -24,9 +28,12 @@ Target &llvm::getThePPC64LETarget() {
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetInfo() {
- RegisterTarget<Triple::ppc, /*HasJIT=*/true> X(getThePPC32Target(), "ppc32",
+ RegisterTarget<Triple::ppc, /*HasJIT=*/true> W(getThePPC32Target(), "ppc32",
"PowerPC 32", "PPC");
+ RegisterTarget<Triple::ppcle, /*HasJIT=*/true> X(
+ getThePPC32LETarget(), "ppc32le", "PowerPC 32 LE", "PPC");
+
RegisterTarget<Triple::ppc64, /*HasJIT=*/true> Y(getThePPC64Target(), "ppc64",
"PowerPC 64", "PPC");
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
index 2d0afbfb1be0..f9d20ef00df8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
@@ -14,6 +14,7 @@ namespace llvm {
class Target;
Target &getThePPC32Target();
+Target &getThePPC32LETarget();
Target &getThePPC64Target();
Target &getThePPC64LETarget();
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 407f980bd35e..dcf7525d7458 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -7,20 +7,18 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/RISCVAsmBackend.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "MCTargetDesc/RISCVInstPrinter.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "MCTargetDesc/RISCVMatInt.h"
#include "MCTargetDesc/RISCVTargetStreamer.h"
-#include "RISCVInstrInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
-#include "Utils/RISCVBaseInfo.h"
-#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -33,6 +31,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/RISCVAttributes.h"
@@ -99,7 +98,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
// Helper to emit a combination of LUI, ADDI(W), and SLLI instructions that
// synthesize the desired immedate value into the destination register.
- void emitLoadImm(Register DestReg, int64_t Value, MCStreamer &Out);
+ void emitLoadImm(MCRegister DestReg, int64_t Value, MCStreamer &Out);
// Helper to emit a combination of AUIPC and SecondOpcode. Used to implement
// helpers such as emitLoadLocalAddress and emitLoadAddress.
@@ -125,6 +124,13 @@ class RISCVAsmParser : public MCTargetAsmParser {
void emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
MCStreamer &Out, bool HasTmpReg);
+ // Helper to emit pseudo sign/zero extend instruction.
+ void emitPseudoExtend(MCInst &Inst, bool SignExtend, int64_t Width,
+ SMLoc IDLoc, MCStreamer &Out);
+
+ // Helper to emit pseudo vmsge{u}.vx instruction.
+ void emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, MCStreamer &Out);
+
// Checks that a PseudoAddTPRel is using x4/tp in its second input operand.
// Enforcing this using a restricted register class for the second input
// operand of PseudoAddTPRel results in a poor diagnostic due to the fact
@@ -217,8 +223,7 @@ public:
};
static bool classifySymbolRef(const MCExpr *Expr,
- RISCVMCExpr::VariantKind &Kind,
- int64_t &Addend);
+ RISCVMCExpr::VariantKind &Kind);
RISCVAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
@@ -262,7 +267,7 @@ struct RISCVOperand : public MCParsedAsmOperand {
bool IsRV64;
struct RegOp {
- Register RegNum;
+ MCRegister RegNum;
};
struct ImmOp {
@@ -277,23 +282,8 @@ struct RISCVOperand : public MCParsedAsmOperand {
// e.g.: read/write or user/supervisor/machine privileges.
};
- enum class VSEW {
- SEW_8 = 0,
- SEW_16,
- SEW_32,
- SEW_64,
- SEW_128,
- SEW_256,
- SEW_512,
- SEW_1024,
- };
-
- enum class VLMUL { LMUL_1 = 0, LMUL_2, LMUL_4, LMUL_8 };
-
struct VTypeOp {
- VSEW Sew;
- VLMUL Lmul;
- unsigned Encoding;
+ unsigned Val;
};
SMLoc StartLoc, EndLoc;
@@ -373,7 +363,7 @@ public:
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
bool IsValid;
if (!IsConstantImm)
- IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
+ IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
else
IsValid = isShiftedInt<N - 1, 1>(Imm);
return IsValid && VK == RISCVMCExpr::VK_RISCV_None;
@@ -387,7 +377,7 @@ public:
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
- return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+ return RISCVAsmParser::classifySymbolRef(getImm(), VK) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
@@ -397,7 +387,7 @@ public:
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
- return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+ return RISCVAsmParser::classifySymbolRef(getImm(), VK) &&
(VK == RISCVMCExpr::VK_RISCV_CALL ||
VK == RISCVMCExpr::VK_RISCV_CALL_PLT);
}
@@ -408,7 +398,7 @@ public:
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
- return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+ return RISCVAsmParser::classifySymbolRef(getImm(), VK) &&
VK == RISCVMCExpr::VK_RISCV_CALL;
}
@@ -418,7 +408,7 @@ public:
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
- return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+ return RISCVAsmParser::classifySymbolRef(getImm(), VK) &&
VK == RISCVMCExpr::VK_RISCV_TPREL_ADD;
}
@@ -523,16 +513,6 @@ public:
return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
- bool isUImm5NonZero() const {
- int64_t Imm;
- RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
- if (!isImm())
- return false;
- bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
- return IsConstantImm && isUInt<5>(Imm) && (Imm != 0) &&
- VK == RISCVMCExpr::VK_RISCV_None;
- }
-
bool isSImm5() const {
if (!isImm())
return false;
@@ -549,7 +529,7 @@ public:
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isInt<6>(Imm) &&
- VK == RISCVMCExpr::VK_RISCV_None;
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6NonZero() const {
@@ -633,7 +613,7 @@ public:
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm)
- IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
+ IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
else
IsValid = isInt<12>(Imm);
return IsValid && ((IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None) ||
@@ -664,7 +644,7 @@ public:
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm) {
- IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
+ IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
return IsValid && (VK == RISCVMCExpr::VK_RISCV_HI ||
VK == RISCVMCExpr::VK_RISCV_TPREL_HI);
} else {
@@ -682,7 +662,7 @@ public:
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm) {
- IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
+ IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
return IsValid && (VK == RISCVMCExpr::VK_RISCV_PCREL_HI ||
VK == RISCVMCExpr::VK_RISCV_GOT_HI ||
VK == RISCVMCExpr::VK_RISCV_TLS_GOT_HI ||
@@ -730,7 +710,7 @@ public:
}
StringRef getSysReg() const {
- assert(Kind == KindTy::SystemRegister && "Invalid access!");
+ assert(Kind == KindTy::SystemRegister && "Invalid type access!");
return StringRef(SysReg.Data, SysReg.Length);
}
@@ -744,59 +724,25 @@ public:
return Tok;
}
- static StringRef getSEWStr(VSEW Sew) {
- switch (Sew) {
- case VSEW::SEW_8:
- return "e8";
- case VSEW::SEW_16:
- return "e16";
- case VSEW::SEW_32:
- return "e32";
- case VSEW::SEW_64:
- return "e64";
- case VSEW::SEW_128:
- return "e128";
- case VSEW::SEW_256:
- return "e256";
- case VSEW::SEW_512:
- return "e512";
- case VSEW::SEW_1024:
- return "e1024";
- }
- return "";
- }
-
- static StringRef getLMULStr(VLMUL Lmul) {
- switch (Lmul) {
- case VLMUL::LMUL_1:
- return "m1";
- case VLMUL::LMUL_2:
- return "m2";
- case VLMUL::LMUL_4:
- return "m4";
- case VLMUL::LMUL_8:
- return "m8";
- }
- return "";
- }
-
- StringRef getVType(SmallString<32> &Buf) const {
- assert(Kind == KindTy::VType && "Invalid access!");
- Buf.append(getSEWStr(VType.Sew));
- Buf.append(",");
- Buf.append(getLMULStr(VType.Lmul));
-
- return Buf.str();
+ unsigned getVType() const {
+ assert(Kind == KindTy::VType && "Invalid type access!");
+ return VType.Val;
}
void print(raw_ostream &OS) const override {
+ auto RegName = [](unsigned Reg) {
+ if (Reg)
+ return RISCVInstPrinter::getRegisterName(Reg);
+ else
+ return "noreg";
+ };
+
switch (Kind) {
case KindTy::Immediate:
OS << *getImm();
break;
case KindTy::Register:
- OS << "<register x";
- OS << getReg() << ">";
+ OS << "<register " << RegName(getReg()) << ">";
break;
case KindTy::Token:
OS << "'" << getToken() << "'";
@@ -805,8 +751,9 @@ public:
OS << "<sysreg: " << getSysReg() << '>';
break;
case KindTy::VType:
- SmallString<32> VTypeBuf;
- OS << "<vtype: " << getVType(VTypeBuf) << '>';
+ OS << "<vtype: ";
+ RISCVVType::printVType(getVType(), OS);
+ OS << '>';
break;
}
}
@@ -852,15 +799,10 @@ public:
return Op;
}
- static std::unique_ptr<RISCVOperand> createVType(APInt Sew, APInt Lmul,
- SMLoc S, bool IsRV64) {
+ static std::unique_ptr<RISCVOperand> createVType(unsigned VTypeI, SMLoc S,
+ bool IsRV64) {
auto Op = std::make_unique<RISCVOperand>(KindTy::VType);
- Sew.ashrInPlace(3);
- unsigned SewLog2 = Sew.logBase2();
- unsigned LmulLog2 = Lmul.logBase2();
- Op->VType.Sew = static_cast<VSEW>(SewLog2);
- Op->VType.Lmul = static_cast<VLMUL>(LmulLog2);
- Op->VType.Encoding = (SewLog2 << 2) | LmulLog2;
+ Op->VType.Val = VTypeI;
Op->StartLoc = S;
Op->IsRV64 = IsRV64;
return Op;
@@ -889,16 +831,6 @@ public:
addExpr(Inst, getImm());
}
- void addSImm5Plus1Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- int64_t Imm = 0;
- RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
- bool IsConstant = evaluateConstantImm(getImm(), Imm, VK);
- assert(IsConstant && "Expect constant value!");
- (void)IsConstant;
- Inst.addOperand(MCOperand::createImm(Imm - 1));
- }
-
void addFenceArgOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// isFenceArg has validated the operand, meaning this cast is safe
@@ -925,7 +857,7 @@ public:
void addVTypeIOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createImm(VType.Encoding));
+ Inst.addOperand(MCOperand::createImm(getVType()));
}
// Returns the rounding mode represented by this RISCVOperand. Should only
@@ -952,7 +884,12 @@ public:
#define GET_MNEMONIC_SPELL_CHECKER
#include "RISCVGenAsmMatcher.inc"
-static Register convertFPR64ToFPR32(Register Reg) {
+static MCRegister convertFPR64ToFPR16(MCRegister Reg) {
+ assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register");
+ return Reg - RISCV::F0_D + RISCV::F0_H;
+}
+
+static MCRegister convertFPR64ToFPR32(MCRegister Reg) {
assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register");
return Reg - RISCV::F0_D + RISCV::F0_F;
}
@@ -963,7 +900,7 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
if (!Op.isReg())
return Match_InvalidOperand;
- Register Reg = Op.getReg();
+ MCRegister Reg = Op.getReg();
bool IsRegFPR64 =
RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg);
bool IsRegFPR64C =
@@ -976,6 +913,12 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
Op.Reg.RegNum = convertFPR64ToFPR32(Reg);
return Match_Success;
}
+ // As the parser couldn't differentiate an FPR16 from an FPR64, coerce the
+ // register from FPR64 to FPR16 if necessary.
+ if (IsRegFPR64 && Kind == MCK_FPR16) {
+ Op.Reg.RegNum = convertFPR64ToFPR16(Reg);
+ return Match_Success;
+ }
return Match_InvalidOperand;
}
@@ -1079,6 +1022,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1);
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
+ case Match_InvalidSImm5:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 4),
+ (1 << 4) - 1);
case Match_InvalidSImm6:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
(1 << 5) - 1);
@@ -1181,8 +1127,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
case Match_InvalidVTypeI: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
- return Error(ErrorLoc,
- "operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8]");
+ return Error(
+ ErrorLoc,
+ "operand must be "
+ "e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]");
}
case Match_InvalidVMaskRegister: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
@@ -1202,13 +1150,15 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// alternative ABI names), setting RegNo to the matching register. Upon
// failure, returns true and sets RegNo to 0. If IsRV32E then registers
// x16-x31 will be rejected.
-static bool matchRegisterNameHelper(bool IsRV32E, Register &RegNo,
+static bool matchRegisterNameHelper(bool IsRV32E, MCRegister &RegNo,
StringRef Name) {
RegNo = MatchRegisterName(Name);
- // The 32- and 64-bit FPRs have the same asm name. Check that the initial
- // match always matches the 64-bit variant, and not the 32-bit one.
+ // The 16-/32- and 64-bit FPRs have the same asm name. Check that the initial
+ // match always matches the 64-bit variant, and not the 16/32-bit one.
+ assert(!(RegNo >= RISCV::F0_H && RegNo <= RISCV::F31_H));
assert(!(RegNo >= RISCV::F0_F && RegNo <= RISCV::F31_F));
// The default FPR register class is based on the tablegen enum ordering.
+ static_assert(RISCV::F0_D < RISCV::F0_H, "FPR matching must be updated");
static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated");
if (RegNo == RISCV::NoRegister)
RegNo = MatchRegisterAltName(Name);
@@ -1233,7 +1183,7 @@ OperandMatchResultTy RISCVAsmParser::tryParseRegister(unsigned &RegNo,
RegNo = 0;
StringRef Name = getLexer().getTok().getIdentifier();
- if (matchRegisterNameHelper(isRV32E(), (Register &)RegNo, Name))
+ if (matchRegisterNameHelper(isRV32E(), (MCRegister &)RegNo, Name))
return MatchOperand_NoMatch;
getParser().Lex(); // Eat identifier token.
@@ -1265,7 +1215,7 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
return MatchOperand_NoMatch;
case AsmToken::Identifier:
StringRef Name = getLexer().getTok().getIdentifier();
- Register RegNo;
+ MCRegister RegNo;
matchRegisterNameHelper(isRV32E(), RegNo, Name);
if (RegNo == RISCV::NoRegister) {
@@ -1549,39 +1499,75 @@ OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
if (getLexer().getKind() != AsmToken::Identifier)
return MatchOperand_NoMatch;
- // Parse "e8,m1"
+ // Parse "e8,m1,t[a|u],m[a|u]"
StringRef Name = getLexer().getTok().getIdentifier();
if (!Name.consume_front("e"))
return MatchOperand_NoMatch;
- APInt Sew(16, Name, 10);
- if (Sew != 8 && Sew != 16 && Sew != 32 && Sew != 64 && Sew != 128 &&
- Sew != 256 && Sew != 512 && Sew != 1024)
+ unsigned Sew;
+ if (Name.getAsInteger(10, Sew))
+ return MatchOperand_NoMatch;
+ if (!RISCVVType::isValidSEW(Sew))
return MatchOperand_NoMatch;
getLexer().Lex();
- if (getLexer().getKind() == AsmToken::EndOfStatement) {
- Operands.push_back(
- RISCVOperand::createVType(Sew, APInt(16, 1), S, isRV64()));
+ if (!getLexer().is(AsmToken::Comma))
+ return MatchOperand_NoMatch;
+ getLexer().Lex();
- return MatchOperand_Success;
- }
+ Name = getLexer().getTok().getIdentifier();
+ if (!Name.consume_front("m"))
+ return MatchOperand_NoMatch;
+ // "m" or "mf"
+ bool Fractional = Name.consume_front("f");
+ unsigned Lmul;
+ if (Name.getAsInteger(10, Lmul))
+ return MatchOperand_NoMatch;
+ if (!RISCVVType::isValidLMUL(Lmul, Fractional))
+ return MatchOperand_NoMatch;
+ getLexer().Lex();
if (!getLexer().is(AsmToken::Comma))
return MatchOperand_NoMatch;
getLexer().Lex();
Name = getLexer().getTok().getIdentifier();
- if (!Name.consume_front("m"))
+ // ta or tu
+ bool TailAgnostic;
+ if (Name == "ta")
+ TailAgnostic = true;
+ else if (Name == "tu")
+ TailAgnostic = false;
+ else
+ return MatchOperand_NoMatch;
+ getLexer().Lex();
+
+ if (!getLexer().is(AsmToken::Comma))
return MatchOperand_NoMatch;
- APInt Lmul(16, Name, 10);
- if (Lmul != 1 && Lmul != 2 && Lmul != 4 && Lmul != 8)
+ getLexer().Lex();
+
+ Name = getLexer().getTok().getIdentifier();
+ // ma or mu
+ bool MaskAgnostic;
+ if (Name == "ma")
+ MaskAgnostic = true;
+ else if (Name == "mu")
+ MaskAgnostic = false;
+ else
return MatchOperand_NoMatch;
getLexer().Lex();
if (getLexer().getKind() != AsmToken::EndOfStatement)
return MatchOperand_NoMatch;
- Operands.push_back(RISCVOperand::createVType(Sew, Lmul, S, isRV64()));
+ unsigned SewLog2 = Log2_32(Sew / 8);
+ unsigned LmulLog2 = Log2_32(Lmul);
+ RISCVVSEW VSEW = static_cast<RISCVVSEW>(SewLog2);
+ RISCVVLMUL VLMUL =
+ static_cast<RISCVVLMUL>(Fractional ? 8 - LmulLog2 : LmulLog2);
+
+ unsigned VTypeI =
+ RISCVVType::encodeVTYPE(VLMUL, VSEW, TailAgnostic, MaskAgnostic);
+ Operands.push_back(RISCVOperand::createVType(VTypeI, S, isRV64()));
return MatchOperand_Success;
}
@@ -1596,7 +1582,7 @@ OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) {
Error(getLoc(), "expected '.t' suffix");
return MatchOperand_ParseFail;
}
- Register RegNo;
+ MCRegister RegNo;
matchRegisterNameHelper(isRV32E(), RegNo, Name);
if (RegNo == RISCV::NoRegister)
@@ -1788,48 +1774,19 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
}
bool RISCVAsmParser::classifySymbolRef(const MCExpr *Expr,
- RISCVMCExpr::VariantKind &Kind,
- int64_t &Addend) {
+ RISCVMCExpr::VariantKind &Kind) {
Kind = RISCVMCExpr::VK_RISCV_None;
- Addend = 0;
if (const RISCVMCExpr *RE = dyn_cast<RISCVMCExpr>(Expr)) {
Kind = RE->getKind();
Expr = RE->getSubExpr();
}
- // It's a simple symbol reference or constant with no addend.
- if (isa<MCConstantExpr>(Expr) || isa<MCSymbolRefExpr>(Expr))
- return true;
-
- const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr);
- if (!BE)
- return false;
-
- if (!isa<MCSymbolRefExpr>(BE->getLHS()))
- return false;
-
- if (BE->getOpcode() != MCBinaryExpr::Add &&
- BE->getOpcode() != MCBinaryExpr::Sub)
- return false;
-
- // We are able to support the subtraction of two symbol references
- if (BE->getOpcode() == MCBinaryExpr::Sub &&
- isa<MCSymbolRefExpr>(BE->getRHS()))
- return true;
-
- // See if the addend is a constant, otherwise there's more going
- // on here than we can deal with.
- auto AddendExpr = dyn_cast<MCConstantExpr>(BE->getRHS());
- if (!AddendExpr)
- return false;
-
- Addend = AddendExpr->getValue();
- if (BE->getOpcode() == MCBinaryExpr::Sub)
- Addend = -Addend;
-
- // It's some symbol reference + a constant addend
- return Kind != RISCVMCExpr::VK_RISCV_Invalid;
+ MCValue Res;
+ MCFixup Fixup;
+ if (Expr->evaluateAsRelocatable(Res, nullptr, &Fixup))
+ return Res.getRefKind() == RISCVMCExpr::VK_RISCV_None;
+ return false;
}
bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) {
@@ -2040,7 +1997,33 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
else
return Error(ValueExprLoc, "bad arch string " + Arch);
+ // .attribute arch overrides the current architecture, so unset all
+ // currently enabled extensions
+ clearFeatureBits(RISCV::FeatureRV32E, "e");
+ clearFeatureBits(RISCV::FeatureStdExtM, "m");
+ clearFeatureBits(RISCV::FeatureStdExtA, "a");
+ clearFeatureBits(RISCV::FeatureStdExtF, "f");
+ clearFeatureBits(RISCV::FeatureStdExtD, "d");
+ clearFeatureBits(RISCV::FeatureStdExtC, "c");
+ clearFeatureBits(RISCV::FeatureStdExtB, "experimental-b");
+ clearFeatureBits(RISCV::FeatureStdExtV, "experimental-v");
+ clearFeatureBits(RISCV::FeatureExtZfh, "experimental-zfh");
+ clearFeatureBits(RISCV::FeatureExtZba, "experimental-zba");
+ clearFeatureBits(RISCV::FeatureExtZbb, "experimental-zbb");
+ clearFeatureBits(RISCV::FeatureExtZbc, "experimental-zbc");
+ clearFeatureBits(RISCV::FeatureExtZbe, "experimental-zbe");
+ clearFeatureBits(RISCV::FeatureExtZbf, "experimental-zbf");
+ clearFeatureBits(RISCV::FeatureExtZbm, "experimental-zbm");
+ clearFeatureBits(RISCV::FeatureExtZbp, "experimental-zbp");
+ clearFeatureBits(RISCV::FeatureExtZbproposedc, "experimental-zbproposedc");
+ clearFeatureBits(RISCV::FeatureExtZbr, "experimental-zbr");
+ clearFeatureBits(RISCV::FeatureExtZbs, "experimental-zbs");
+ clearFeatureBits(RISCV::FeatureExtZbt, "experimental-zbt");
+ clearFeatureBits(RISCV::FeatureExtZvamo, "experimental-zvamo");
+ clearFeatureBits(RISCV::FeatureStdExtZvlsseg, "experimental-zvlsseg");
+
while (!Arch.empty()) {
+ bool DropFirst = true;
if (Arch[0] == 'i')
clearFeatureBits(RISCV::FeatureRV32E, "e");
else if (Arch[0] == 'e')
@@ -2062,19 +2045,57 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
setFeatureBits(RISCV::FeatureStdExtD, "d");
} else if (Arch[0] == 'c') {
setFeatureBits(RISCV::FeatureStdExtC, "c");
+ } else if (Arch[0] == 'b') {
+ setFeatureBits(RISCV::FeatureStdExtB, "experimental-b");
+ } else if (Arch[0] == 'v') {
+ setFeatureBits(RISCV::FeatureStdExtV, "experimental-v");
+ } else if (Arch[0] == 's' || Arch[0] == 'x' || Arch[0] == 'z') {
+ StringRef Ext =
+ Arch.take_until([](char c) { return ::isdigit(c) || c == '_'; });
+ if (Ext == "zba")
+ setFeatureBits(RISCV::FeatureExtZba, "experimental-zba");
+ else if (Ext == "zbb")
+ setFeatureBits(RISCV::FeatureExtZbb, "experimental-zbb");
+ else if (Ext == "zbc")
+ setFeatureBits(RISCV::FeatureExtZbc, "experimental-zbc");
+ else if (Ext == "zbe")
+ setFeatureBits(RISCV::FeatureExtZbe, "experimental-zbe");
+ else if (Ext == "zbf")
+ setFeatureBits(RISCV::FeatureExtZbf, "experimental-zbf");
+ else if (Ext == "zbm")
+ setFeatureBits(RISCV::FeatureExtZbm, "experimental-zbm");
+ else if (Ext == "zbp")
+ setFeatureBits(RISCV::FeatureExtZbp, "experimental-zbp");
+ else if (Ext == "zbproposedc")
+ setFeatureBits(RISCV::FeatureExtZbproposedc,
+ "experimental-zbproposedc");
+ else if (Ext == "zbr")
+ setFeatureBits(RISCV::FeatureExtZbr, "experimental-zbr");
+ else if (Ext == "zbs")
+ setFeatureBits(RISCV::FeatureExtZbs, "experimental-zbs");
+ else if (Ext == "zbt")
+ setFeatureBits(RISCV::FeatureExtZbt, "experimental-zbt");
+ else if (Ext == "zfh")
+ setFeatureBits(RISCV::FeatureExtZfh, "experimental-zfh");
+ else if (Ext == "zvamo")
+ setFeatureBits(RISCV::FeatureExtZvamo, "experimental-zvamo");
+ else if (Ext == "zvlsseg")
+ setFeatureBits(RISCV::FeatureStdExtZvlsseg, "experimental-zvlsseg");
+ else
+ return Error(ValueExprLoc, "bad arch string " + Ext);
+ Arch = Arch.drop_until([](char c) { return ::isdigit(c) || c == '_'; });
+ DropFirst = false;
} else
return Error(ValueExprLoc, "bad arch string " + Arch);
- Arch = Arch.drop_front(1);
+ if (DropFirst)
+ Arch = Arch.drop_front(1);
int major = 0;
int minor = 0;
Arch.consumeInteger(10, major);
Arch.consume_front("p");
Arch.consumeInteger(10, minor);
- if (major != 0 || minor != 0) {
- Arch = Arch.drop_until([](char c) { return c == '_' || c == '"'; });
- Arch = Arch.drop_while([](char c) { return c == '_'; });
- }
+ Arch = Arch.drop_while([](char c) { return c == '_'; });
}
}
@@ -2102,6 +2123,38 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
formalArchStr = (Twine(formalArchStr) + "_d2p0").str();
if (getFeatureBits(RISCV::FeatureStdExtC))
formalArchStr = (Twine(formalArchStr) + "_c2p0").str();
+ if (getFeatureBits(RISCV::FeatureStdExtB))
+ formalArchStr = (Twine(formalArchStr) + "_b0p93").str();
+ if (getFeatureBits(RISCV::FeatureStdExtV))
+ formalArchStr = (Twine(formalArchStr) + "_v0p10").str();
+ if (getFeatureBits(RISCV::FeatureExtZfh))
+ formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str();
+ if (getFeatureBits(RISCV::FeatureExtZba))
+ formalArchStr = (Twine(formalArchStr) + "_zba0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbb))
+ formalArchStr = (Twine(formalArchStr) + "_zbb0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbc))
+ formalArchStr = (Twine(formalArchStr) + "_zbc0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbe))
+ formalArchStr = (Twine(formalArchStr) + "_zbe0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbf))
+ formalArchStr = (Twine(formalArchStr) + "_zbf0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbm))
+ formalArchStr = (Twine(formalArchStr) + "_zbm0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbp))
+ formalArchStr = (Twine(formalArchStr) + "_zbp0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbproposedc))
+ formalArchStr = (Twine(formalArchStr) + "_zbproposedc0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbr))
+ formalArchStr = (Twine(formalArchStr) + "_zbr0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbs))
+ formalArchStr = (Twine(formalArchStr) + "_zbs0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZbt))
+ formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str();
+ if (getFeatureBits(RISCV::FeatureExtZvamo))
+ formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str();
+ if (getFeatureBits(RISCV::FeatureStdExtZvlsseg))
+ formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str();
getTargetStreamer().emitTextAttribute(Tag, formalArchStr);
}
@@ -2118,12 +2171,12 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
S.emitInstruction((Res ? CInst : Inst), getSTI());
}
-void RISCVAsmParser::emitLoadImm(Register DestReg, int64_t Value,
+void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
MCStreamer &Out) {
RISCVMatInt::InstSeq Seq;
RISCVMatInt::generateInstSeq(Value, isRV64(), Seq);
- Register SrcReg = RISCV::X0;
+ MCRegister SrcReg = RISCV::X0;
for (RISCVMatInt::Inst &Inst : Seq) {
if (Inst.Opc == RISCV::LUI) {
emitToStreamer(
@@ -2149,8 +2202,7 @@ void RISCVAsmParser::emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg,
// OP DestReg, TmpReg, %pcrel_lo(TmpLabel)
MCContext &Ctx = getContext();
- MCSymbol *TmpLabel = Ctx.createTempSymbol(
- "pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false);
+ MCSymbol *TmpLabel = Ctx.createNamedTempSymbol("pcrel_hi");
Out.emitLabel(TmpLabel);
const RISCVMCExpr *SymbolHi = RISCVMCExpr::create(Symbol, VKHi, Ctx);
@@ -2254,6 +2306,88 @@ void RISCVAsmParser::emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode,
Opcode, IDLoc, Out);
}
+void RISCVAsmParser::emitPseudoExtend(MCInst &Inst, bool SignExtend,
+ int64_t Width, SMLoc IDLoc,
+ MCStreamer &Out) {
+ // The sign/zero extend pseudo-instruction does two shifts, with the shift
+ // amounts dependent on the XLEN.
+ //
+ // The expansion looks like this
+ //
+ // SLLI rd, rs, XLEN - Width
+ // SR[A|R]I rd, rd, XLEN - Width
+ MCOperand DestReg = Inst.getOperand(0);
+ MCOperand SourceReg = Inst.getOperand(1);
+
+ unsigned SecondOpcode = SignExtend ? RISCV::SRAI : RISCV::SRLI;
+ int64_t ShAmt = (isRV64() ? 64 : 32) - Width;
+
+ assert(ShAmt > 0 && "Shift amount must be non-zero.");
+
+ emitToStreamer(Out, MCInstBuilder(RISCV::SLLI)
+ .addOperand(DestReg)
+ .addOperand(SourceReg)
+ .addImm(ShAmt));
+
+ emitToStreamer(Out, MCInstBuilder(SecondOpcode)
+ .addOperand(DestReg)
+ .addOperand(DestReg)
+ .addImm(ShAmt));
+}
+
+void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
+ MCStreamer &Out) {
+ if (Inst.getNumOperands() == 3) {
+ // unmasked va >= x
+ //
+ // pseudoinstruction: vmsge{u}.vx vd, va, x
+ // expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
+ emitToStreamer(Out, MCInstBuilder(Opcode)
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(1))
+ .addOperand(Inst.getOperand(2))
+ .addReg(RISCV::NoRegister));
+ emitToStreamer(Out, MCInstBuilder(RISCV::VMNAND_MM)
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(0)));
+ } else if (Inst.getNumOperands() == 4) {
+ // masked va >= x, vd != v0
+ //
+ // pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t
+ // expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
+ assert(Inst.getOperand(0).getReg() != RISCV::V0 &&
+ "The destination register should not be V0.");
+ emitToStreamer(Out, MCInstBuilder(Opcode)
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(1))
+ .addOperand(Inst.getOperand(2))
+ .addOperand(Inst.getOperand(3)));
+ emitToStreamer(Out, MCInstBuilder(RISCV::VMXOR_MM)
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(0))
+ .addReg(RISCV::V0));
+ } else if (Inst.getNumOperands() == 5) {
+ // masked va >= x, vd == v0
+ //
+ // pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
+ // expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt
+ assert(Inst.getOperand(0).getReg() == RISCV::V0 &&
+ "The destination register should be V0.");
+ assert(Inst.getOperand(1).getReg() != RISCV::V0 &&
+ "The temporary vector register should not be V0.");
+ emitToStreamer(Out, MCInstBuilder(Opcode)
+ .addOperand(Inst.getOperand(1))
+ .addOperand(Inst.getOperand(2))
+ .addOperand(Inst.getOperand(3))
+ .addOperand(Inst.getOperand(4)));
+ emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM)
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(1)));
+ }
+}
+
bool RISCVAsmParser::checkPseudoAddTPRel(MCInst &Inst,
OperandVector &Operands) {
assert(Inst.getOpcode() == RISCV::PseudoAddTPRel && "Invalid instruction");
@@ -2275,77 +2409,48 @@ std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultMaskRegOp() const {
bool RISCVAsmParser::validateInstruction(MCInst &Inst,
OperandVector &Operands) {
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
- unsigned TargetFlags =
- (MCID.TSFlags >> RISCV::ConstraintOffset) & RISCV::ConstraintMask;
- if (TargetFlags == RISCV::NoConstraint)
+ unsigned Constraints =
+ (MCID.TSFlags & RISCVII::ConstraintMask) >> RISCVII::ConstraintShift;
+ if (Constraints == RISCVII::NoConstraint)
return false;
unsigned DestReg = Inst.getOperand(0).getReg();
// Operands[1] will be the first operand, DestReg.
SMLoc Loc = Operands[1]->getStartLoc();
- if ((TargetFlags == RISCV::WidenV) || (TargetFlags == RISCV::WidenW) ||
- (TargetFlags == RISCV::SlideUp) || (TargetFlags == RISCV::Vrgather) ||
- (TargetFlags == RISCV::Vcompress)) {
- if (TargetFlags != RISCV::WidenW) {
- unsigned Src2Reg = Inst.getOperand(1).getReg();
- if (DestReg == Src2Reg)
- return Error(Loc, "The destination vector register group cannot overlap"
- " the source vector register group.");
- if (TargetFlags == RISCV::WidenV) {
- // Assume DestReg LMUL is 2 at least for widening/narrowing operations.
- if (DestReg + 1 == Src2Reg)
- return Error(Loc,
- "The destination vector register group cannot overlap"
- " the source vector register group.");
- }
- }
- if (Inst.getOperand(2).isReg()) {
- unsigned Src1Reg = Inst.getOperand(2).getReg();
- if (DestReg == Src1Reg)
- return Error(Loc, "The destination vector register group cannot overlap"
- " the source vector register group.");
- if (TargetFlags == RISCV::WidenV || TargetFlags == RISCV::WidenW) {
- // Assume DestReg LMUL is 2 at least for widening/narrowing operations.
- if (DestReg + 1 == Src1Reg)
- return Error(Loc,
- "The destination vector register group cannot overlap"
- " the source vector register group.");
- }
- }
- if (Inst.getNumOperands() == 4) {
- unsigned MaskReg = Inst.getOperand(3).getReg();
-
- if (DestReg == MaskReg)
- return Error(Loc, "The destination vector register group cannot overlap"
- " the mask register.");
- }
- } else if (TargetFlags == RISCV::Narrow) {
- unsigned Src2Reg = Inst.getOperand(1).getReg();
- if (DestReg == Src2Reg)
+ if (Constraints & RISCVII::VS2Constraint) {
+ unsigned CheckReg = Inst.getOperand(1).getReg();
+ if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
- // Assume Src2Reg LMUL is 2 at least for widening/narrowing operations.
- if (DestReg == Src2Reg + 1)
+ }
+ if ((Constraints & RISCVII::VS1Constraint) && (Inst.getOperand(2).isReg())) {
+ unsigned CheckReg = Inst.getOperand(2).getReg();
+ if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
- } else if (TargetFlags == RISCV::WidenCvt || TargetFlags == RISCV::Iota) {
- unsigned Src2Reg = Inst.getOperand(1).getReg();
- if (DestReg == Src2Reg)
+ }
+ if ((Constraints & RISCVII::VMConstraint) && (DestReg == RISCV::V0)) {
+ // vadc, vsbc are special cases. These instructions have no mask register.
+ // The destination register could not be V0.
+ unsigned Opcode = Inst.getOpcode();
+ if (Opcode == RISCV::VADC_VVM || Opcode == RISCV::VADC_VXM ||
+ Opcode == RISCV::VADC_VIM || Opcode == RISCV::VSBC_VVM ||
+ Opcode == RISCV::VSBC_VXM || Opcode == RISCV::VFMERGE_VFM ||
+ Opcode == RISCV::VMERGE_VIM || Opcode == RISCV::VMERGE_VVM ||
+ Opcode == RISCV::VMERGE_VXM)
+ return Error(Loc, "The destination vector register group cannot be V0.");
+
+ // Regardless masked or unmasked version, the number of operands is the
+ // same. For example, "viota.m v0, v2" is "viota.m v0, v2, NoRegister"
+ // actually. We need to check the last operand to ensure whether it is
+ // masked or not.
+ unsigned CheckReg = Inst.getOperand(Inst.getNumOperands() - 1).getReg();
+ assert((CheckReg == RISCV::V0 || CheckReg == RISCV::NoRegister) &&
+ "Unexpected register for mask operand");
+
+ if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
- " the source vector register group.");
- if (TargetFlags == RISCV::WidenCvt) {
- // Assume DestReg LMUL is 2 at least for widening/narrowing operations.
- if (DestReg + 1 == Src2Reg)
- return Error(Loc, "The destination vector register group cannot overlap"
- " the source vector register group.");
- }
- if (Inst.getNumOperands() == 3) {
- unsigned MaskReg = Inst.getOperand(2).getReg();
-
- if (DestReg == MaskReg)
- return Error(Loc, "The destination vector register group cannot overlap"
- " the mask register.");
- }
+ " the mask register.");
}
return false;
}
@@ -2359,7 +2464,7 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
default:
break;
case RISCV::PseudoLI: {
- Register Reg = Inst.getOperand(0).getReg();
+ MCRegister Reg = Inst.getOperand(0).getReg();
const MCOperand &Op1 = Inst.getOperand(1);
if (Op1.isExpr()) {
// We must have li reg, %lo(sym) or li reg, %pcrel_lo(sym) or similar.
@@ -2412,6 +2517,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
case RISCV::PseudoLD:
emitLoadStoreSymbol(Inst, RISCV::LD, IDLoc, Out, /*HasTmpReg=*/false);
return false;
+ case RISCV::PseudoFLH:
+ emitLoadStoreSymbol(Inst, RISCV::FLH, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
case RISCV::PseudoFLW:
emitLoadStoreSymbol(Inst, RISCV::FLW, IDLoc, Out, /*HasTmpReg=*/true);
return false;
@@ -2430,6 +2538,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
case RISCV::PseudoSD:
emitLoadStoreSymbol(Inst, RISCV::SD, IDLoc, Out, /*HasTmpReg=*/true);
return false;
+ case RISCV::PseudoFSH:
+ emitLoadStoreSymbol(Inst, RISCV::FSH, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
case RISCV::PseudoFSW:
emitLoadStoreSymbol(Inst, RISCV::FSW, IDLoc, Out, /*HasTmpReg=*/true);
return false;
@@ -2440,6 +2551,72 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
if (checkPseudoAddTPRel(Inst, Operands))
return true;
break;
+ case RISCV::PseudoSEXT_B:
+ emitPseudoExtend(Inst, /*SignExtend=*/true, /*Width=*/8, IDLoc, Out);
+ return false;
+ case RISCV::PseudoSEXT_H:
+ emitPseudoExtend(Inst, /*SignExtend=*/true, /*Width=*/16, IDLoc, Out);
+ return false;
+ case RISCV::PseudoZEXT_H:
+ emitPseudoExtend(Inst, /*SignExtend=*/false, /*Width=*/16, IDLoc, Out);
+ return false;
+ case RISCV::PseudoZEXT_W:
+ emitPseudoExtend(Inst, /*SignExtend=*/false, /*Width=*/32, IDLoc, Out);
+ return false;
+ case RISCV::PseudoVMSGEU_VX:
+ case RISCV::PseudoVMSGEU_VX_M:
+ case RISCV::PseudoVMSGEU_VX_M_T:
+ emitVMSGE(Inst, RISCV::VMSLTU_VX, IDLoc, Out);
+ return false;
+ case RISCV::PseudoVMSGE_VX:
+ case RISCV::PseudoVMSGE_VX_M:
+ case RISCV::PseudoVMSGE_VX_M_T:
+ emitVMSGE(Inst, RISCV::VMSLT_VX, IDLoc, Out);
+ return false;
+ case RISCV::PseudoVMSGE_VI:
+ case RISCV::PseudoVMSLT_VI: {
+ // These instructions are signed and so is immediate so we can subtract one
+ // and change the opcode.
+ int64_t Imm = Inst.getOperand(2).getImm();
+ unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGE_VI ? RISCV::VMSGT_VI
+ : RISCV::VMSLE_VI;
+ emitToStreamer(Out, MCInstBuilder(Opc)
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(1))
+ .addImm(Imm - 1)
+ .addOperand(Inst.getOperand(3)));
+ return false;
+ }
+ case RISCV::PseudoVMSGEU_VI:
+ case RISCV::PseudoVMSLTU_VI: {
+ int64_t Imm = Inst.getOperand(2).getImm();
+ // Unsigned comparisons are tricky because the immediate is signed. If the
+ // immediate is 0 we can't just subtract one. vmsltu.vi v0, v1, 0 is always
+ // false, but vmsle.vi v0, v1, -1 is always true. Instead we use
+ // vmsne v0, v1, v1 which is always false.
+ if (Imm == 0) {
+ unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGEU_VI
+ ? RISCV::VMSEQ_VV
+ : RISCV::VMSNE_VV;
+ emitToStreamer(Out, MCInstBuilder(Opc)
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(1))
+ .addOperand(Inst.getOperand(1))
+ .addOperand(Inst.getOperand(3)));
+ } else {
+ // Other immediate values can subtract one like signed.
+ unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGEU_VI
+ ? RISCV::VMSGTU_VI
+ : RISCV::VMSLEU_VI;
+ emitToStreamer(Out, MCInstBuilder(Opc)
+ .addOperand(Inst.getOperand(0))
+ .addOperand(Inst.getOperand(1))
+ .addImm(Imm - 1)
+ .addOperand(Inst.getOperand(3)));
+ }
+
+ return false;
+ }
}
emitToStreamer(Out, Inst);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 37edc19398a5..623552390f53 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -10,10 +10,9 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "TargetInfo/RISCVTargetInfo.h"
-#include "Utils/RISCVBaseInfo.h"
-#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -71,7 +70,18 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
if (RegNo >= 32 || (IsRV32E && RegNo >= 16))
return MCDisassembler::Fail;
- Register Reg = RISCV::X0 + RegNo;
+ MCRegister Reg = RISCV::X0 + RegNo;
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+
+ MCRegister Reg = RISCV::F0_H + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -82,7 +92,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
if (RegNo >= 32)
return MCDisassembler::Fail;
- Register Reg = RISCV::F0_F + RegNo;
+ MCRegister Reg = RISCV::F0_F + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -93,7 +103,7 @@ static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo,
if (RegNo >= 8) {
return MCDisassembler::Fail;
}
- Register Reg = RISCV::F8_F + RegNo;
+ MCRegister Reg = RISCV::F8_F + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -104,7 +114,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
if (RegNo >= 32)
return MCDisassembler::Fail;
- Register Reg = RISCV::F0_D + RegNo;
+ MCRegister Reg = RISCV::F0_D + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -115,7 +125,7 @@ static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo,
if (RegNo >= 8) {
return MCDisassembler::Fail;
}
- Register Reg = RISCV::F8_D + RegNo;
+ MCRegister Reg = RISCV::F8_D + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -146,7 +156,7 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
if (RegNo >= 8)
return MCDisassembler::Fail;
- Register Reg = RISCV::X8 + RegNo;
+ MCRegister Reg = RISCV::X8 + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -157,14 +167,14 @@ static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo,
if (RegNo >= 32)
return MCDisassembler::Fail;
- Register Reg = RISCV::V0 + RegNo;
+ MCRegister Reg = RISCV::V0 + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo,
uint64_t Address, const void *Decoder) {
- Register Reg = RISCV::NoRegister;
+ MCRegister Reg = RISCV::NoRegister;
switch (RegNo) {
default:
return MCDisassembler::Fail;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index 090132af3585..56991ccf010a 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -9,9 +9,9 @@
#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H
+#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVFixupKinds.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
-#include "Utils/RISCVBaseInfo.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 43b1f8b80c5f..fa36234d0f5f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -1,3 +1,16 @@
+//===-- RISCVBaseInfo.cpp - Top level definitions for RISCV MC ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone enum definitions for the RISCV target
+// useful for the compiler back-end and the MC libraries.
+//
+//===----------------------------------------------------------------------===//
+
#include "RISCVBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Triple.h"
@@ -6,7 +19,7 @@
namespace llvm {
namespace RISCVSysReg {
#define GET_SysRegsList_IMPL
-#include "RISCVGenSystemOperands.inc"
+#include "RISCVGenSearchableTables.inc"
} // namespace RISCVSysReg
namespace RISCVABI {
@@ -65,7 +78,10 @@ ABI getTargetABI(StringRef ABIName) {
// To avoid the BP value clobbered by a function call, we need to choose a
// callee saved register to save the value. RV32E only has X8 and X9 as callee
// saved registers and X8 will be used as fp. So we choose X9 as bp.
-Register getBPReg() { return RISCV::X9; }
+MCRegister getBPReg() { return RISCV::X9; }
+
+// Returns the register holding shadow call stack pointer.
+MCRegister getSCSPReg() { return RISCV::X18; }
} // namespace RISCVABI
@@ -78,4 +94,49 @@ void validate(const Triple &TT, const FeatureBitset &FeatureBits) {
} // namespace RISCVFeatures
+namespace RISCVVPseudosTable {
+
+#define GET_RISCVVPseudosTable_IMPL
+#include "RISCVGenSearchableTables.inc"
+
+} // namespace RISCVVPseudosTable
+
+void RISCVVType::printVType(unsigned VType, raw_ostream &OS) {
+ RISCVVSEW VSEW = getVSEW(VType);
+ RISCVVLMUL VLMUL = getVLMUL(VType);
+
+ unsigned Sew = 1 << (static_cast<unsigned>(VSEW) + 3);
+ OS << "e" << Sew;
+
+ switch (VLMUL) {
+ case RISCVVLMUL::LMUL_RESERVED:
+ llvm_unreachable("Unexpected LMUL value!");
+ case RISCVVLMUL::LMUL_1:
+ case RISCVVLMUL::LMUL_2:
+ case RISCVVLMUL::LMUL_4:
+ case RISCVVLMUL::LMUL_8: {
+ unsigned LMul = 1 << static_cast<unsigned>(VLMUL);
+ OS << ",m" << LMul;
+ break;
+ }
+ case RISCVVLMUL::LMUL_F2:
+ case RISCVVLMUL::LMUL_F4:
+ case RISCVVLMUL::LMUL_F8: {
+ unsigned LMul = 1 << (8 - static_cast<unsigned>(VLMUL));
+ OS << ",mf" << LMul;
+ break;
+ }
+ }
+
+ if (isTailAgnostic(VType))
+ OS << ",ta";
+ else
+ OS << ",tu";
+
+ if (isMaskAgnostic(VType))
+ OS << ",ma";
+ else
+ OS << ",mu";
+}
+
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
new file mode 100644
index 000000000000..6c9f860c204c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -0,0 +1,406 @@
+//===-- RISCVBaseInfo.h - Top level definitions for RISCV MC ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone enum definitions for the RISCV target
+// useful for the compiler back-end and the MC libraries.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
+
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/MachineValueType.h"
+
+namespace llvm {
+
+// RISCVII - This namespace holds all of the target specific flags that
+// instruction info tracks. All definitions must match RISCVInstrFormats.td.
+namespace RISCVII {
+enum {
+ InstFormatPseudo = 0,
+ InstFormatR = 1,
+ InstFormatR4 = 2,
+ InstFormatI = 3,
+ InstFormatS = 4,
+ InstFormatB = 5,
+ InstFormatU = 6,
+ InstFormatJ = 7,
+ InstFormatCR = 8,
+ InstFormatCI = 9,
+ InstFormatCSS = 10,
+ InstFormatCIW = 11,
+ InstFormatCL = 12,
+ InstFormatCS = 13,
+ InstFormatCA = 14,
+ InstFormatCB = 15,
+ InstFormatCJ = 16,
+ InstFormatOther = 17,
+
+ InstFormatMask = 31,
+
+ ConstraintShift = 5,
+ ConstraintMask = 0b111 << ConstraintShift,
+
+ VLMulShift = ConstraintShift + 3,
+ VLMulMask = 0b111 << VLMulShift,
+
+ // Do we need to add a dummy mask op when converting RVV Pseudo to MCInst.
+ HasDummyMaskOpShift = VLMulShift + 3,
+ HasDummyMaskOpMask = 1 << HasDummyMaskOpShift,
+
+ // Does this instruction only update element 0 the destination register.
+ WritesElement0Shift = HasDummyMaskOpShift + 1,
+ WritesElement0Mask = 1 << WritesElement0Shift,
+
+ // Does this instruction have a merge operand that must be removed when
+ // converting to MCInst. It will be the first explicit use operand. Used by
+ // RVV Pseudos.
+ HasMergeOpShift = WritesElement0Shift + 1,
+ HasMergeOpMask = 1 << HasMergeOpShift,
+
+ // Does this instruction have a SEW operand. It will be the last explicit
+ // operand. Used by RVV Pseudos.
+ HasSEWOpShift = HasMergeOpShift + 1,
+ HasSEWOpMask = 1 << HasSEWOpShift,
+
+ // Does this instruction have a VL operand. It will be the second to last
+ // explicit operand. Used by RVV Pseudos.
+ HasVLOpShift = HasSEWOpShift + 1,
+ HasVLOpMask = 1 << HasVLOpShift,
+};
+
+// Match with the definitions in RISCVInstrFormatsV.td
+enum RVVConstraintType {
+ NoConstraint = 0,
+ VS2Constraint = 0b001,
+ VS1Constraint = 0b010,
+ VMConstraint = 0b100,
+};
+
+// RISC-V Specific Machine Operand Flags
+enum {
+ MO_None = 0,
+ MO_CALL = 1,
+ MO_PLT = 2,
+ MO_LO = 3,
+ MO_HI = 4,
+ MO_PCREL_LO = 5,
+ MO_PCREL_HI = 6,
+ MO_GOT_HI = 7,
+ MO_TPREL_LO = 8,
+ MO_TPREL_HI = 9,
+ MO_TPREL_ADD = 10,
+ MO_TLS_GOT_HI = 11,
+ MO_TLS_GD_HI = 12,
+
+ // Used to differentiate between target-specific "direct" flags and "bitmask"
+ // flags. A machine operand can only have one "direct" flag, but can have
+ // multiple "bitmask" flags.
+ MO_DIRECT_FLAG_MASK = 15
+};
+} // namespace RISCVII
+
+namespace RISCVOp {
+enum OperandType : unsigned {
+ OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_UIMM4 = OPERAND_FIRST_RISCV_IMM,
+ OPERAND_UIMM5,
+ OPERAND_UIMM12,
+ OPERAND_SIMM12,
+ OPERAND_UIMM20,
+ OPERAND_UIMMLOG2XLEN,
+ OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN
+};
+} // namespace RISCVOp
+
+// Describes the predecessor/successor bits used in the FENCE instruction.
+namespace RISCVFenceField {
+enum FenceField {
+ I = 8,
+ O = 4,
+ R = 2,
+ W = 1
+};
+}
+
+// Describes the supported floating point rounding mode encodings.
+namespace RISCVFPRndMode {
+enum RoundingMode {
+ RNE = 0,
+ RTZ = 1,
+ RDN = 2,
+ RUP = 3,
+ RMM = 4,
+ DYN = 7,
+ Invalid
+};
+
+inline static StringRef roundingModeToString(RoundingMode RndMode) {
+ switch (RndMode) {
+ default:
+ llvm_unreachable("Unknown floating point rounding mode");
+ case RISCVFPRndMode::RNE:
+ return "rne";
+ case RISCVFPRndMode::RTZ:
+ return "rtz";
+ case RISCVFPRndMode::RDN:
+ return "rdn";
+ case RISCVFPRndMode::RUP:
+ return "rup";
+ case RISCVFPRndMode::RMM:
+ return "rmm";
+ case RISCVFPRndMode::DYN:
+ return "dyn";
+ }
+}
+
+inline static RoundingMode stringToRoundingMode(StringRef Str) {
+ return StringSwitch<RoundingMode>(Str)
+ .Case("rne", RISCVFPRndMode::RNE)
+ .Case("rtz", RISCVFPRndMode::RTZ)
+ .Case("rdn", RISCVFPRndMode::RDN)
+ .Case("rup", RISCVFPRndMode::RUP)
+ .Case("rmm", RISCVFPRndMode::RMM)
+ .Case("dyn", RISCVFPRndMode::DYN)
+ .Default(RISCVFPRndMode::Invalid);
+}
+
+inline static bool isValidRoundingMode(unsigned Mode) {
+ switch (Mode) {
+ default:
+ return false;
+ case RISCVFPRndMode::RNE:
+ case RISCVFPRndMode::RTZ:
+ case RISCVFPRndMode::RDN:
+ case RISCVFPRndMode::RUP:
+ case RISCVFPRndMode::RMM:
+ case RISCVFPRndMode::DYN:
+ return true;
+ }
+}
+} // namespace RISCVFPRndMode
+
+namespace RISCVSysReg {
+struct SysReg {
+ const char *Name;
+ unsigned Encoding;
+ const char *AltName;
+ // FIXME: add these additional fields when needed.
+ // Privilege Access: Read, Write, Read-Only.
+ // unsigned ReadWrite;
+ // Privilege Mode: User, System or Machine.
+ // unsigned Mode;
+ // Check field name.
+ // unsigned Extra;
+ // Register number without the privilege bits.
+ // unsigned Number;
+ FeatureBitset FeaturesRequired;
+ bool isRV32Only;
+
+ bool haveRequiredFeatures(FeatureBitset ActiveFeatures) const {
+ // Not in 32-bit mode.
+ if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit])
+ return false;
+ // No required feature associated with the system register.
+ if (FeaturesRequired.none())
+ return true;
+ return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+ }
+};
+
+#define GET_SysRegsList_DECL
+#include "RISCVGenSearchableTables.inc"
+} // end namespace RISCVSysReg
+
+namespace RISCVABI {
+
+enum ABI {
+ ABI_ILP32,
+ ABI_ILP32F,
+ ABI_ILP32D,
+ ABI_ILP32E,
+ ABI_LP64,
+ ABI_LP64F,
+ ABI_LP64D,
+ ABI_Unknown
+};
+
+// Returns the target ABI, or else a StringError if the requested ABIName is
+// not supported for the given TT and FeatureBits combination.
+ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
+ StringRef ABIName);
+
+ABI getTargetABI(StringRef ABIName);
+
+// Returns the register used to hold the stack pointer after realignment.
+MCRegister getBPReg();
+
+// Returns the register holding shadow call stack pointer.
+MCRegister getSCSPReg();
+
+} // namespace RISCVABI
+
+namespace RISCVFeatures {
+
+// Validates if the given combination of features are valid for the target
+// triple. Exits with report_fatal_error if not.
+void validate(const Triple &TT, const FeatureBitset &FeatureBits);
+
+} // namespace RISCVFeatures
+
+namespace RISCVVMVTs {
+
+constexpr MVT vint8mf8_t = MVT::nxv1i8;
+constexpr MVT vint8mf4_t = MVT::nxv2i8;
+constexpr MVT vint8mf2_t = MVT::nxv4i8;
+constexpr MVT vint8m1_t = MVT::nxv8i8;
+constexpr MVT vint8m2_t = MVT::nxv16i8;
+constexpr MVT vint8m4_t = MVT::nxv32i8;
+constexpr MVT vint8m8_t = MVT::nxv64i8;
+
+constexpr MVT vint16mf4_t = MVT::nxv1i16;
+constexpr MVT vint16mf2_t = MVT::nxv2i16;
+constexpr MVT vint16m1_t = MVT::nxv4i16;
+constexpr MVT vint16m2_t = MVT::nxv8i16;
+constexpr MVT vint16m4_t = MVT::nxv16i16;
+constexpr MVT vint16m8_t = MVT::nxv32i16;
+
+constexpr MVT vint32mf2_t = MVT::nxv1i32;
+constexpr MVT vint32m1_t = MVT::nxv2i32;
+constexpr MVT vint32m2_t = MVT::nxv4i32;
+constexpr MVT vint32m4_t = MVT::nxv8i32;
+constexpr MVT vint32m8_t = MVT::nxv16i32;
+
+constexpr MVT vint64m1_t = MVT::nxv1i64;
+constexpr MVT vint64m2_t = MVT::nxv2i64;
+constexpr MVT vint64m4_t = MVT::nxv4i64;
+constexpr MVT vint64m8_t = MVT::nxv8i64;
+
+constexpr MVT vfloat16mf4_t = MVT::nxv1f16;
+constexpr MVT vfloat16mf2_t = MVT::nxv2f16;
+constexpr MVT vfloat16m1_t = MVT::nxv4f16;
+constexpr MVT vfloat16m2_t = MVT::nxv8f16;
+constexpr MVT vfloat16m4_t = MVT::nxv16f16;
+constexpr MVT vfloat16m8_t = MVT::nxv32f16;
+
+constexpr MVT vfloat32mf2_t = MVT::nxv1f32;
+constexpr MVT vfloat32m1_t = MVT::nxv2f32;
+constexpr MVT vfloat32m2_t = MVT::nxv4f32;
+constexpr MVT vfloat32m4_t = MVT::nxv8f32;
+constexpr MVT vfloat32m8_t = MVT::nxv16f32;
+
+constexpr MVT vfloat64m1_t = MVT::nxv1f64;
+constexpr MVT vfloat64m2_t = MVT::nxv2f64;
+constexpr MVT vfloat64m4_t = MVT::nxv4f64;
+constexpr MVT vfloat64m8_t = MVT::nxv8f64;
+
+constexpr MVT vbool1_t = MVT::nxv64i1;
+constexpr MVT vbool2_t = MVT::nxv32i1;
+constexpr MVT vbool4_t = MVT::nxv16i1;
+constexpr MVT vbool8_t = MVT::nxv8i1;
+constexpr MVT vbool16_t = MVT::nxv4i1;
+constexpr MVT vbool32_t = MVT::nxv2i1;
+constexpr MVT vbool64_t = MVT::nxv1i1;
+
+} // namespace RISCVVMVTs
+
+enum class RISCVVSEW {
+ SEW_8 = 0,
+ SEW_16,
+ SEW_32,
+ SEW_64,
+ SEW_128,
+ SEW_256,
+ SEW_512,
+ SEW_1024,
+};
+
+enum class RISCVVLMUL {
+ LMUL_1 = 0,
+ LMUL_2,
+ LMUL_4,
+ LMUL_8,
+ LMUL_RESERVED,
+ LMUL_F8,
+ LMUL_F4,
+ LMUL_F2
+};
+
+namespace RISCVVType {
+// Is this a SEW value that can be encoded into the VTYPE format.
+inline static bool isValidSEW(unsigned SEW) {
+ return isPowerOf2_32(SEW) && SEW >= 8 && SEW <= 1024;
+}
+
+// Is this a LMUL value that can be encoded into the VTYPE format.
+inline static bool isValidLMUL(unsigned LMUL, bool Fractional) {
+ return isPowerOf2_32(LMUL) && LMUL <= 8 && (!Fractional || LMUL != 1);
+}
+
+// Encode VTYPE into the binary format used by the the VSETVLI instruction which
+// is used by our MC layer representation.
+//
+// Bits | Name | Description
+// -----+------------+------------------------------------------------
+// 7 | vma | Vector mask agnostic
+// 6 | vta | Vector tail agnostic
+// 5:3 | vsew[2:0] | Standard element width (SEW) setting
+// 2:0 | vlmul[2:0] | Vector register group multiplier (LMUL) setting
+inline static unsigned encodeVTYPE(RISCVVLMUL VLMUL, RISCVVSEW VSEW,
+ bool TailAgnostic, bool MaskAgnostic) {
+ unsigned VLMULBits = static_cast<unsigned>(VLMUL);
+ unsigned VSEWBits = static_cast<unsigned>(VSEW);
+ unsigned VTypeI = (VSEWBits << 3) | (VLMULBits & 0x7);
+ if (TailAgnostic)
+ VTypeI |= 0x40;
+ if (MaskAgnostic)
+ VTypeI |= 0x80;
+
+ return VTypeI;
+}
+
+inline static RISCVVLMUL getVLMUL(unsigned VType) {
+ unsigned VLMUL = VType & 0x7;
+ return static_cast<RISCVVLMUL>(VLMUL);
+}
+
+inline static RISCVVSEW getVSEW(unsigned VType) {
+ unsigned VSEW = (VType >> 3) & 0x7;
+ return static_cast<RISCVVSEW>(VSEW);
+}
+
+inline static bool isTailAgnostic(unsigned VType) { return VType & 0x40; }
+
+inline static bool isMaskAgnostic(unsigned VType) { return VType & 0x80; }
+
+void printVType(unsigned VType, raw_ostream &OS);
+
+} // namespace RISCVVType
+
+namespace RISCVVPseudosTable {
+
+struct PseudoInfo {
+#include "MCTargetDesc/RISCVBaseInfo.h"
+ uint16_t Pseudo;
+ uint16_t BaseInstr;
+};
+
+using namespace RISCV;
+
+#define GET_RISCVVPseudosTable_DECL
+#include "RISCVGenSearchableTables.inc"
+
+} // end namespace RISCVVPseudosTable
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 079dc919928a..7df454be8729 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -11,9 +11,9 @@
//===----------------------------------------------------------------------===//
#include "RISCVELFStreamer.h"
-#include "MCTargetDesc/RISCVAsmBackend.h"
+#include "RISCVAsmBackend.h"
+#include "RISCVBaseInfo.h"
#include "RISCVMCTargetDesc.h"
-#include "Utils/RISCVBaseInfo.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index eae3e13dbe40..5f8d6e137518 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//
#include "RISCVInstPrinter.h"
-#include "MCTargetDesc/RISCVMCExpr.h"
-#include "Utils/RISCVBaseInfo.h"
+#include "RISCVBaseInfo.h"
+#include "RISCVMCExpr.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -102,6 +102,24 @@ void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
MO.getExpr()->print(O, &MAI);
}
+void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (!MO.isImm())
+ return printOperand(MI, OpNo, STI, O);
+
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + MO.getImm();
+ if (!STI.hasFeature(RISCV::Feature64Bit))
+ Target &= 0xffffffff;
+ O << formatHex(Target);
+ } else {
+ O << MO.getImm();
+ }
+}
+
void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -147,18 +165,12 @@ void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo,
O << "(";
printRegName(O, MO.getReg());
O << ")";
- return;
}
void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
- unsigned Sew = (Imm >> 2) & 0x7;
- unsigned Lmul = Imm & 0x3;
-
- Lmul = 0x1 << Lmul;
- Sew = 0x1 << (Sew + 3);
- O << "e" << Sew << ",m" << Lmul;
+ RISCVVType::printVType(Imm, O);
}
void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo,
@@ -174,15 +186,6 @@ void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo,
O << ".t";
}
-void RISCVInstPrinter::printSImm5Plus1(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNo);
-
- assert(MO.isImm() && "printSImm5Plus1 can only print constant operands");
- O << MO.getImm() + 1;
-}
-
const char *RISCVInstPrinter::getRegisterName(unsigned RegNo) {
return getRegisterName(RegNo, ArchRegNames ? RISCV::NoRegAltName
: RISCV::ABIRegAltName);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index fdaa00c5f8eb..d078ead2c8ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -32,6 +32,8 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O, const char *Modifier = nullptr);
+ void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printFenceArg(const MCInst *MI, unsigned OpNo,
@@ -44,10 +46,9 @@ public:
raw_ostream &O);
void printVMaskReg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printSImm5Plus1(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 816206c477df..b299541939ec 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -10,12 +10,11 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVFixupKinds.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
-#include "Utils/RISCVBaseInfo.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -84,6 +83,12 @@ public:
unsigned getVMaskReg(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+
+private:
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
@@ -106,7 +111,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
const MCSubtargetInfo &STI) const {
MCInst TmpInst;
MCOperand Func;
- Register Ra;
+ MCRegister Ra;
if (MI.getOpcode() == RISCV::PseudoTAIL) {
Func = MI.getOperand(0);
Ra = RISCV::X6;
@@ -185,6 +190,9 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS,
void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
+ verifyInstructionPredicates(MI,
+ computeAvailableFeatures(STI.getFeatureBits()));
+
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
// Get byte count of instruction.
unsigned Size = Desc.getSize();
@@ -397,4 +405,5 @@ unsigned RISCVMCCodeEmitter::getVMaskReg(const MCInst &MI, unsigned OpNo,
}
}
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "RISCVGenMCCodeEmitter.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index 2a6f372e50be..8ce2184c7a41 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -13,7 +13,6 @@
#include "RISCVMCExpr.h"
#include "MCTargetDesc/RISCVAsmBackend.h"
-#include "RISCV.h"
#include "RISCVFixupKinds.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmLayout.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index a474224e1a4e..093118518db6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -11,14 +11,13 @@
//===----------------------------------------------------------------------===//
#include "RISCVMCTargetDesc.h"
+#include "RISCVBaseInfo.h"
#include "RISCVELFStreamer.h"
#include "RISCVInstPrinter.h"
#include "RISCVMCAsmInfo.h"
#include "RISCVTargetStreamer.h"
#include "TargetInfo/RISCVTargetInfo.h"
-#include "Utils/RISCVBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -56,7 +55,7 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
MCAsmInfo *MAI = new RISCVMCAsmInfo(TT);
- Register SP = MRI.getDwarfRegNum(RISCV::X2, true);
+ MCRegister SP = MRI.getDwarfRegNum(RISCV::X2, true);
MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0);
MAI->addInitialFrameState(Inst);
@@ -68,7 +67,7 @@ static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT,
std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = TT.isArch64Bit() ? "generic-rv64" : "generic-rv32";
- return createRISCVMCSubtargetInfoImpl(TT, CPUName, FS);
+ return createRISCVMCSubtargetInfoImpl(TT, CPUName, /*TuneCPU*/ CPUName, FS);
}
static MCInstPrinter *createRISCVMCInstPrinter(const Triple &T,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index f390ddb89e3c..1f3dead61011 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -8,10 +8,8 @@
#include "RISCVMatInt.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/MachineValueType.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/Support/MathExtras.h"
-#include <cstdint>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
index b12ae2eade99..17ca57458b49 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
@@ -9,12 +9,11 @@
#ifndef LLVM_LIB_TARGET_RISCV_MATINT_H
#define LLVM_LIB_TARGET_RISCV_MATINT_H
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/MachineValueType.h"
#include <cstdint>
namespace llvm {
+class APInt;
namespace RISCVMatInt {
struct Inst {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 54a2fb288579..13c4b84aa300 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "RISCVTargetStreamer.h"
-#include "RISCVSubtarget.h"
+#include "RISCVMCTargetDesc.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/RISCVAttributes.h"
@@ -60,6 +60,38 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
Arch += "_d2p0";
if (STI.hasFeature(RISCV::FeatureStdExtC))
Arch += "_c2p0";
+ if (STI.hasFeature(RISCV::FeatureStdExtB))
+ Arch += "_b0p93";
+ if (STI.hasFeature(RISCV::FeatureStdExtV))
+ Arch += "_v0p10";
+ if (STI.hasFeature(RISCV::FeatureExtZfh))
+ Arch += "_zfh0p1";
+ if (STI.hasFeature(RISCV::FeatureExtZba))
+ Arch += "_zba0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbb))
+ Arch += "_zbb0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbc))
+ Arch += "_zbc0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbe))
+ Arch += "_zbe0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbf))
+ Arch += "_zbf0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbm))
+ Arch += "_zbm0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbp))
+ Arch += "_zbp0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbproposedc))
+ Arch += "_zbproposedc0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbr))
+ Arch += "_zbr0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbs))
+ Arch += "_zbs0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZbt))
+ Arch += "_zbt0p93";
+ if (STI.hasFeature(RISCV::FeatureExtZvamo))
+ Arch += "_zvamo0p10";
+ if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg))
+ Arch += "_zvlsseg0p10";
emitTextAttribute(RISCVAttrs::ARCH, Arch);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
index 9baa2cc2741a..2538d9992de7 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCV_H
#define LLVM_LIB_TARGET_RISCV_RISCV_H
-#include "Utils/RISCVBaseInfo.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -46,6 +46,9 @@ void initializeRISCVExpandPseudoPass(PassRegistry &);
FunctionPass *createRISCVExpandAtomicPseudoPass();
void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
+FunctionPass *createRISCVCleanupVSETVLIPass();
+void initializeRISCVCleanupVSETVLIPass(PassRegistry &);
+
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVSubtarget &,
RISCVRegisterBankInfo &);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
index 57e7c41c4271..83811dadc9ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
@@ -41,6 +41,14 @@ def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">,
AssemblerPredicate<(all_of FeatureStdExtD),
"'D' (Double-Precision Floating-Point)">;
+def FeatureExtZfh
+ : SubtargetFeature<"experimental-zfh", "HasStdExtZfh", "true",
+ "'Zfh' (Half-Precision Floating-Point)",
+ [FeatureStdExtF]>;
+def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">,
+ AssemblerPredicate<(all_of FeatureExtZfh),
+ "'Zfh' (Half-Precision Floating-Point)">;
+
def FeatureStdExtC
: SubtargetFeature<"c", "HasStdExtC", "true",
"'C' (Compressed Instructions)">;
@@ -48,6 +56,14 @@ def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">,
AssemblerPredicate<(all_of FeatureStdExtC),
"'C' (Compressed Instructions)">;
+def FeatureExtZba
+ : SubtargetFeature<"experimental-zba", "HasStdExtZba", "true",
+ "'Zba' (Address calculation 'B' Instructions)">;
+def HasStdExtZba : Predicate<"Subtarget->hasStdExtZba()">,
+ AssemblerPredicate<(all_of FeatureExtZba),
+ "'Zba' (Address calculation 'B' Instructions)">;
+def NotHasStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">;
+
def FeatureExtZbb
: SubtargetFeature<"experimental-zbb", "HasStdExtZbb", "true",
"'Zbb' (Base 'B' Instructions)">;
@@ -115,7 +131,9 @@ def HasStdExtZbt : Predicate<"Subtarget->hasStdExtZbt()">,
// subextensions. They should be enabled if either has been specified.
def HasStdExtZbbOrZbp
: Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()">,
- AssemblerPredicate<(any_of FeatureExtZbb, FeatureExtZbp)>;
+ AssemblerPredicate<(any_of FeatureExtZbb, FeatureExtZbp),
+ "'Zbb' (Base 'B' Instructions) or "
+ "'Zbp' (Permutation 'B' Instructions)">;
def FeatureExtZbproposedc
: SubtargetFeature<"experimental-zbproposedc", "HasStdExtZbproposedc", "true",
@@ -127,7 +145,8 @@ def HasStdExtZbproposedc : Predicate<"Subtarget->hasStdExtZbproposedc()">,
def FeatureStdExtB
: SubtargetFeature<"experimental-b", "HasStdExtB", "true",
"'B' (Bit Manipulation Instructions)",
- [FeatureExtZbb,
+ [FeatureExtZba,
+ FeatureExtZbb,
FeatureExtZbc,
FeatureExtZbe,
FeatureExtZbf,
@@ -145,16 +164,30 @@ def FeatureNoRVCHints
"Disable RVC Hint Instructions.">;
def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">,
AssemblerPredicate<(all_of(not FeatureNoRVCHints)),
- "RVC Hint Instructions">;
+ "RVC Hint Instructions">;
def FeatureStdExtV
: SubtargetFeature<"experimental-v", "HasStdExtV", "true",
- "'V' (Vector Instructions)",
- [FeatureStdExtF]>;
+ "'V' (Vector Instructions)">;
def HasStdExtV : Predicate<"Subtarget->hasStdExtV()">,
AssemblerPredicate<(all_of FeatureStdExtV),
"'V' (Vector Instructions)">;
+def FeatureStdExtZvlsseg
+ : SubtargetFeature<"experimental-zvlsseg", "HasStdExtZvlsseg", "true",
+ "'Zvlsseg' (Vector segment load/store instructions)",
+ [FeatureStdExtV]>;
+def HasStdExtZvlsseg : Predicate<"Subtarget->hasStdExtZvlsseg()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvlsseg),
+ "'Zvlsseg' (Vector segment load/store instructions)">;
+def FeatureExtZvamo
+ : SubtargetFeature<"experimental-zvamo", "HasStdExtZvamo", "true",
+ "'Zvamo'(Vector AMO Operations)",
+ [FeatureStdExtV]>;
+def HasStdExtZvamo : Predicate<"Subtarget->hasStdExtZvamo()">,
+ AssemblerPredicate<(all_of FeatureExtZvamo),
+ "'Zvamo'(Vector AMO Operations)">;
+
def Feature64Bit
: SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">;
def IsRV64 : Predicate<"Subtarget->is64Bit()">,
@@ -164,8 +197,8 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<(all_of (not Feature64Bit)),
"RV32I Base Instruction Set">;
+defvar RV32 = DefaultMode;
def RV64 : HwMode<"+64bit">;
-def RV32 : HwMode<"-64bit">;
def FeatureRV32E
: SubtargetFeature<"e", "IsRV32E", "true",
@@ -200,31 +233,44 @@ include "RISCVRegisterInfo.td"
include "RISCVCallingConv.td"
include "RISCVInstrInfo.td"
include "RISCVRegisterBanks.td"
-include "RISCVSchedRocket32.td"
-include "RISCVSchedRocket64.td"
+include "RISCVSchedRocket.td"
+include "RISCVSchedSiFive7.td"
//===----------------------------------------------------------------------===//
// RISC-V processors supported.
//===----------------------------------------------------------------------===//
def : ProcessorModel<"generic-rv32", NoSchedModel, []>;
-
def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>;
-def : ProcessorModel<"rocket-rv32", Rocket32Model, []>;
-
-def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit]>;
-
-def : ProcessorModel<"sifive-e31", Rocket32Model, [FeatureStdExtM,
- FeatureStdExtA,
- FeatureStdExtC]>;
-
-def : ProcessorModel<"sifive-u54", Rocket64Model, [Feature64Bit,
- FeatureStdExtM,
- FeatureStdExtA,
- FeatureStdExtF,
- FeatureStdExtD,
- FeatureStdExtC]>;
+def : ProcessorModel<"rocket-rv32", RocketModel, []>;
+def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>;
+
+def : ProcessorModel<"sifive-7-rv32", SiFive7Model, []>;
+def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit]>;
+
+def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC]>;
//===----------------------------------------------------------------------===//
// Define the RISC-V target.
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 8955994b1c2e..0a915cbcc1af 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -198,7 +198,9 @@ void RISCVAsmPrinter::emitAttributes() {
StringRef CPU = TM.getTargetCPU();
StringRef FS = TM.getTargetFeatureString();
const RISCVTargetMachine &RTM = static_cast<const RISCVTargetMachine &>(TM);
- const RISCVSubtarget STI(TT, CPU, FS, /*ABIName=*/"", RTM);
+ /* TuneCPU doesn't impact emission of ELF attributes, ELF attributes only
+ care about arch related features, so we can set TuneCPU as CPU. */
+ const RISCVSubtarget STI(TT, CPU, /*TuneCPU=*/CPU, FS, /*ABIName=*/"", RTM);
RTS.emitTargetAttributes(STI);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.cpp
index c63a84739c4a..d265f3a12b7f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.cpp
@@ -22,8 +22,8 @@ RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI)
: CallLowering(&TLI) {}
bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val,
- ArrayRef<Register> VRegs) const {
+ const Value *Val, ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const {
MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(RISCV::PseudoRET);
@@ -34,9 +34,10 @@ bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
return true;
}
-bool RISCVCallLowering::lowerFormalArguments(
- MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
+bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
if (F.arg_empty())
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.h
index 7ce074a61f0a..cd7fc4c76123 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.h
@@ -28,10 +28,12 @@ public:
RISCVCallLowering(const RISCVTargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- ArrayRef<Register> VRegs) const override;
+ ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
new file mode 100644
index 000000000000..ae32cbd1ae59
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
@@ -0,0 +1,154 @@
+//===- RISCVCleanupVSETVLI.cpp - Cleanup unneeded VSETVLI instructions ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function pass that removes duplicate vsetvli
+// instructions within a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-cleanup-vsetvli"
+#define RISCV_CLEANUP_VSETVLI_NAME "RISCV Cleanup VSETVLI pass"
+
+namespace {
+
+class RISCVCleanupVSETVLI : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVCleanupVSETVLI() : MachineFunctionPass(ID) {
+ initializeRISCVCleanupVSETVLIPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+ // This pass modifies the program, but does not modify the CFG
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return RISCV_CLEANUP_VSETVLI_NAME; }
+};
+
+} // end anonymous namespace
+
+char RISCVCleanupVSETVLI::ID = 0;
+
+INITIALIZE_PASS(RISCVCleanupVSETVLI, DEBUG_TYPE,
+ RISCV_CLEANUP_VSETVLI_NAME, false, false)
+
+bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ MachineInstr *PrevVSETVLI = nullptr;
+
+ for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) {
+ MachineInstr &MI = *MII++;
+
+ if (MI.getOpcode() != RISCV::PseudoVSETVLI &&
+ MI.getOpcode() != RISCV::PseudoVSETIVLI) {
+ if (PrevVSETVLI &&
+ (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
+ MI.modifiesRegister(RISCV::VTYPE))) {
+ // Old VL/VTYPE is overwritten.
+ PrevVSETVLI = nullptr;
+ }
+ continue;
+ }
+
+ // If we don't have a previous VSET{I}VLI or the VL output isn't dead, we
+ // can't remove this VSETVLI.
+ if (!PrevVSETVLI || !MI.getOperand(0).isDead()) {
+ PrevVSETVLI = &MI;
+ continue;
+ }
+
+ // If a previous "set vl" instruction opcode is different from this one, we
+ // can't differentiate the AVL values.
+ if (PrevVSETVLI->getOpcode() != MI.getOpcode()) {
+ PrevVSETVLI = &MI;
+ continue;
+ }
+
+ // The remaining two cases are
+ // 1. PrevVSETVLI = PseudoVSETVLI
+ // MI = PseudoVSETVLI
+ //
+ // 2. PrevVSETVLI = PseudoVSETIVLI
+ // MI = PseudoVSETIVLI
+ Register AVLReg;
+ bool SameAVL = false;
+ if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
+ AVLReg = MI.getOperand(1).getReg();
+ SameAVL = PrevVSETVLI->getOperand(1).getReg() == AVLReg;
+ } else { // RISCV::PseudoVSETIVLI
+ SameAVL =
+ PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm();
+ }
+ int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm();
+ int64_t VTYPEImm = MI.getOperand(2).getImm();
+
+ // Does this VSET{I}VLI use the same AVL register/value and VTYPE immediate?
+ if (!SameAVL || PrevVTYPEImm != VTYPEImm) {
+ PrevVSETVLI = &MI;
+ continue;
+ }
+
+ // If the AVLReg is X0 we need to look at the output VL of both VSETVLIs.
+ if ((MI.getOpcode() == RISCV::PseudoVSETVLI) && (AVLReg == RISCV::X0)) {
+ assert((PrevVSETVLI->getOpcode() == RISCV::PseudoVSETVLI) &&
+ "Unexpected vsetvli opcode.");
+ Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
+ Register OutVL = MI.getOperand(0).getReg();
+ // We can't remove if the previous VSETVLI left VL unchanged and the
+ // current instruction is setting it to VLMAX. Without knowing the VL
+ // before the previous instruction we don't know if this is a change.
+ if (PrevOutVL == RISCV::X0 && OutVL != RISCV::X0) {
+ PrevVSETVLI = &MI;
+ continue;
+ }
+ }
+
+ // This VSETVLI is redundant, remove it.
+ MI.eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool RISCVCleanupVSETVLI::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ // Skip if the vector extension is not enabled.
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ if (!ST.hasStdExtV())
+ return false;
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF)
+ Changed |= runOnMachineBasicBlock(MBB);
+
+ return Changed;
+}
+
+/// Returns an instance of the Cleanup VSETVLI pass.
+FunctionPass *llvm::createRISCVCleanupVSETVLIPass() {
+ return new RISCVCleanupVSETVLI();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 504355fb8bf8..ec9a39569952 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -59,6 +59,9 @@ private:
bool expandLoadTLSGDAddress(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+ bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, unsigned Opcode);
};
char RISCVExpandPseudo::ID = 0;
@@ -99,6 +102,27 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandLoadTLSIEAddress(MBB, MBBI, NextMBBI);
case RISCV::PseudoLA_TLS_GD:
return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
+ case RISCV::PseudoVSETVLI:
+ case RISCV::PseudoVSETIVLI:
+ return expandVSetVL(MBB, MBBI);
+ case RISCV::PseudoVMCLR_M_B1:
+ case RISCV::PseudoVMCLR_M_B2:
+ case RISCV::PseudoVMCLR_M_B4:
+ case RISCV::PseudoVMCLR_M_B8:
+ case RISCV::PseudoVMCLR_M_B16:
+ case RISCV::PseudoVMCLR_M_B32:
+ case RISCV::PseudoVMCLR_M_B64:
+ // vmclr.m vd => vmxor.mm vd, vd, vd
+ return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXOR_MM);
+ case RISCV::PseudoVMSET_M_B1:
+ case RISCV::PseudoVMSET_M_B2:
+ case RISCV::PseudoVMSET_M_B4:
+ case RISCV::PseudoVMSET_M_B8:
+ case RISCV::PseudoVMSET_M_B16:
+ case RISCV::PseudoVMSET_M_B32:
+ case RISCV::PseudoVMSET_M_B64:
+ // vmset.m vd => vmxnor.mm vd, vd, vd
+ return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXNOR_MM);
}
return false;
@@ -188,6 +212,47 @@ bool RISCVExpandPseudo::expandLoadTLSGDAddress(
RISCV::ADDI);
}
+bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ assert(MBBI->getNumOperands() == 5 && "Unexpected instruction format");
+
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ assert((MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
+ MBBI->getOpcode() == RISCV::PseudoVSETIVLI) &&
+ "Unexpected pseudo instruction");
+ unsigned Opcode;
+ if (MBBI->getOpcode() == RISCV::PseudoVSETVLI)
+ Opcode = RISCV::VSETVLI;
+ else
+ Opcode = RISCV::VSETIVLI;
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ assert(Desc.getNumOperands() == 3 && "Unexpected instruction format");
+
+ Register DstReg = MBBI->getOperand(0).getReg();
+ bool DstIsDead = MBBI->getOperand(0).isDead();
+ BuildMI(MBB, MBBI, DL, Desc)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .add(MBBI->getOperand(1)) // VL
+ .add(MBBI->getOperand(2)); // VType
+
+ MBBI->eraseFromParent(); // The pseudo instruction is gone now.
+ return true;
+}
+
+bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned Opcode) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ Register DstReg = MBBI->getOperand(0).getReg();
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ BuildMI(MBB, MBBI, DL, Desc, DstReg)
+ .addReg(DstReg, RegState::Undef)
+ .addReg(DstReg, RegState::Undef);
+ MBBI->eraseFromParent(); // The pseudo instruction is gone now.
+ return true;
+}
+
} // end of anonymous namespace
INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 43adc7426c79..564d97f47d9e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -23,6 +23,105 @@
using namespace llvm;
+// For now we use x18, a.k.a s2, as pointer to shadow call stack.
+// User should explicitly set -ffixed-x18 and not use x18 in their asm.
+static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL) {
+ if (!MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack))
+ return;
+
+ const auto &STI = MF.getSubtarget<RISCVSubtarget>();
+ Register RAReg = STI.getRegisterInfo()->getRARegister();
+
+ // Do not save RA to the SCS if it's not saved to the regular stack,
+ // i.e. RA is not at risk of being overwritten.
+ std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
+ if (std::none_of(CSI.begin(), CSI.end(),
+ [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
+ return;
+
+ Register SCSPReg = RISCVABI::getSCSPReg();
+
+ auto &Ctx = MF.getFunction().getContext();
+ if (!STI.isRegisterReservedByUser(SCSPReg)) {
+ Ctx.diagnose(DiagnosticInfoUnsupported{
+ MF.getFunction(), "x18 not reserved by user for Shadow Call Stack."});
+ return;
+ }
+
+ const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+ if (RVFI->useSaveRestoreLibCalls(MF)) {
+ Ctx.diagnose(DiagnosticInfoUnsupported{
+ MF.getFunction(),
+ "Shadow Call Stack cannot be combined with Save/Restore LibCalls."});
+ return;
+ }
+
+ const RISCVInstrInfo *TII = STI.getInstrInfo();
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ int64_t SlotSize = STI.getXLen() / 8;
+ // Store return address to shadow call stack
+ // s[w|d] ra, 0(s2)
+ // addi s2, s2, [4|8]
+ BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RAReg)
+ .addReg(SCSPReg)
+ .addImm(0);
+ BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI))
+ .addReg(SCSPReg, RegState::Define)
+ .addReg(SCSPReg)
+ .addImm(SlotSize);
+}
+
+static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL) {
+ if (!MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack))
+ return;
+
+ const auto &STI = MF.getSubtarget<RISCVSubtarget>();
+ Register RAReg = STI.getRegisterInfo()->getRARegister();
+
+ // See emitSCSPrologue() above.
+ std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
+ if (std::none_of(CSI.begin(), CSI.end(),
+ [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
+ return;
+
+ Register SCSPReg = RISCVABI::getSCSPReg();
+
+ auto &Ctx = MF.getFunction().getContext();
+ if (!STI.isRegisterReservedByUser(SCSPReg)) {
+ Ctx.diagnose(DiagnosticInfoUnsupported{
+ MF.getFunction(), "x18 not reserved by user for Shadow Call Stack."});
+ return;
+ }
+
+ const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+ if (RVFI->useSaveRestoreLibCalls(MF)) {
+ Ctx.diagnose(DiagnosticInfoUnsupported{
+ MF.getFunction(),
+ "Shadow Call Stack cannot be combined with Save/Restore LibCalls."});
+ return;
+ }
+
+ const RISCVInstrInfo *TII = STI.getInstrInfo();
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ int64_t SlotSize = STI.getXLen() / 8;
+ // Load return address from shadow call stack
+ // l[w|d] ra, -[4|8](s2)
+ // addi s2, s2, -[4|8]
+ BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::LD : RISCV::LW))
+ .addReg(RAReg, RegState::Define)
+ .addReg(SCSPReg)
+ .addImm(-SlotSize);
+ BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI))
+ .addReg(SCSPReg, RegState::Define)
+ .addReg(SCSPReg)
+ .addImm(-SlotSize);
+}
+
// Get the ID of the libcall used for spilling and restoring callee saved
// registers. The ID is representative of the number of registers saved or
// restored by the libcall, except it is zero-indexed - ID 0 corresponds to a
@@ -39,7 +138,7 @@ static int getLibCallID(const MachineFunction &MF,
// RISCVRegisterInfo::hasReservedSpillSlot assigns negative frame indexes to
// registers which can be saved by libcall.
if (CS.getFrameIdx() < 0)
- MaxReg = std::max(MaxReg.id(), CS.getReg());
+ MaxReg = std::max(MaxReg.id(), CS.getReg().id());
if (MaxReg == RISCV::NoRegister)
return -1;
@@ -136,18 +235,12 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
// Determines the size of the frame and maximum call frame size.
void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
- const RISCVRegisterInfo *RI = STI.getRegisterInfo();
// Get the number of bytes to allocate from the FrameInfo.
uint64_t FrameSize = MFI.getStackSize();
// Get the alignment.
Align StackAlign = getStackAlign();
- if (RI->needsStackRealignment(MF)) {
- Align MaxStackAlign = std::max(StackAlign, MFI.getMaxAlign());
- FrameSize += (MaxStackAlign.value() - StackAlign.value());
- StackAlign = MaxStackAlign;
- }
// Set Max Call Frame Size
uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign);
@@ -222,15 +315,23 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
Register SPReg = getSPReg(STI);
Register BPReg = RISCVABI::getBPReg();
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
+
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+ return;
+
+ // Emit prologue for shadow call stack.
+ emitSCSPrologue(MF, MBB, MBBI, DL);
+
// Since spillCalleeSavedRegisters may have inserted a libcall, skip past
// any instructions marked as FrameSetup
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
++MBBI;
- // Debug location must be unknown since the first debug location is used
- // to determine the end of the prologue.
- DebugLoc DL;
-
// Determine the correct frame layout
determineFrameLayout(MF);
@@ -398,6 +499,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
Register FPReg = getFPReg(STI);
Register SPReg = getSPReg(STI);
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+ return;
+
// Get the insert location for the epilogue. If there were no terminators in
// the block, get the last instruction.
MachineBasicBlock::iterator MBBI = MBB.end();
@@ -457,11 +563,14 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
// Deallocate stack
adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy);
+
+ // Emit epilogue for shadow call stack.
+ emitSCSEpilogue(MF, MBB, MBBI, DL);
}
-int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
+StackOffset
+RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
@@ -513,7 +622,7 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
Offset += RVFI->getLibCallStackSize();
}
}
- return Offset;
+ return StackOffset::getFixed(Offset);
}
void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -547,14 +656,14 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
for (unsigned i = 0; CSRegs[i]; ++i)
SavedRegs.set(CSRegs[i]);
- if (MF.getSubtarget<RISCVSubtarget>().hasStdExtD() ||
- MF.getSubtarget<RISCVSubtarget>().hasStdExtF()) {
+ if (MF.getSubtarget<RISCVSubtarget>().hasStdExtF()) {
// If interrupt is enabled, this list contains all FP registers.
const MCPhysReg * Regs = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; Regs[i]; ++i)
- if (RISCV::FPR32RegClass.contains(Regs[i]) ||
+ if (RISCV::FPR16RegClass.contains(Regs[i]) ||
+ RISCV::FPR32RegClass.contains(Regs[i]) ||
RISCV::FPR64RegClass.contains(Regs[i]))
SavedRegs.set(Regs[i]);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 1517c847a04c..889b9ce2e1a9 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
class RISCVSubtarget;
@@ -29,8 +30,8 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 7570385e38e3..43bf16c53a62 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -12,8 +12,9 @@
#include "RISCVISelDAGToDAG.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
-#include "Utils/RISCVMatInt.h"
+#include "MCTargetDesc/RISCVMatInt.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
@@ -48,15 +49,439 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
return Result;
}
-// Returns true if the Node is an ISD::AND with a constant argument. If so,
-// set Mask to that constant value.
-static bool isConstantMask(SDNode *Node, uint64_t &Mask) {
- if (Node->getOpcode() == ISD::AND &&
- Node->getOperand(1).getOpcode() == ISD::Constant) {
- Mask = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- return true;
+static RISCVVLMUL getLMUL(EVT VT) {
+ switch (VT.getSizeInBits().getKnownMinValue() / 8) {
+ default:
+ llvm_unreachable("Invalid LMUL.");
+ case 1:
+ return RISCVVLMUL::LMUL_F8;
+ case 2:
+ return RISCVVLMUL::LMUL_F4;
+ case 4:
+ return RISCVVLMUL::LMUL_F2;
+ case 8:
+ return RISCVVLMUL::LMUL_1;
+ case 16:
+ return RISCVVLMUL::LMUL_2;
+ case 32:
+ return RISCVVLMUL::LMUL_4;
+ case 64:
+ return RISCVVLMUL::LMUL_8;
}
- return false;
+}
+
+static unsigned getSubregIndexByEVT(EVT VT, unsigned Index) {
+ RISCVVLMUL LMUL = getLMUL(VT);
+ if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 ||
+ LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) {
+ static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
+ "Unexpected subreg numbering");
+ return RISCV::sub_vrm1_0 + Index;
+ } else if (LMUL == RISCVVLMUL::LMUL_2) {
+ static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
+ "Unexpected subreg numbering");
+ return RISCV::sub_vrm2_0 + Index;
+ } else if (LMUL == RISCVVLMUL::LMUL_4) {
+ static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
+ "Unexpected subreg numbering");
+ return RISCV::sub_vrm4_0 + Index;
+ }
+ llvm_unreachable("Invalid vector type.");
+}
+
+static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
+ unsigned RegClassID, unsigned SubReg0) {
+ assert(Regs.size() >= 2 && Regs.size() <= 8);
+
+ SDLoc DL(Regs[0]);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
+
+ for (unsigned I = 0; I < Regs.size(); ++I) {
+ Ops.push_back(Regs[I]);
+ Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
+ }
+ SDNode *N =
+ CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
+ return SDValue(N, 0);
+}
+
+static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
+ unsigned NF) {
+ static const unsigned RegClassIDs[] = {
+ RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
+ RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
+ RISCV::VRN8M1RegClassID};
+
+ return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0);
+}
+
+static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
+ unsigned NF) {
+ static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID,
+ RISCV::VRN3M2RegClassID,
+ RISCV::VRN4M2RegClassID};
+
+ return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0);
+}
+
+static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
+ unsigned NF) {
+ return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID,
+ RISCV::sub_vrm4_0);
+}
+
+static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
+ unsigned NF, RISCVVLMUL LMUL) {
+ switch (LMUL) {
+ default:
+ llvm_unreachable("Invalid LMUL.");
+ case RISCVVLMUL::LMUL_F8:
+ case RISCVVLMUL::LMUL_F4:
+ case RISCVVLMUL::LMUL_F2:
+ case RISCVVLMUL::LMUL_1:
+ return createM1Tuple(CurDAG, Regs, NF);
+ case RISCVVLMUL::LMUL_2:
+ return createM2Tuple(CurDAG, Regs, NF);
+ case RISCVVLMUL::LMUL_4:
+ return createM4Tuple(CurDAG, Regs, NF);
+ }
+}
+
+void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned IntNo,
+ bool IsStrided) {
+ SDLoc DL(Node);
+ unsigned NF = Node->getNumValues() - 1;
+ EVT VT = Node->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SmallVector<SDValue, 5> Operands;
+ Operands.push_back(Node->getOperand(2)); // Base pointer.
+ if (IsStrided) {
+ Operands.push_back(Node->getOperand(3)); // Stride.
+ Operands.push_back(Node->getOperand(4)); // VL.
+ } else {
+ Operands.push_back(Node->getOperand(3)); // VL.
+ }
+ Operands.push_back(SEW);
+ Operands.push_back(Node->getOperand(0)); // Chain.
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, ScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(RISCVVLMUL::LMUL_1));
+ SDNode *Load =
+ CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
+ SDValue SuperReg = SDValue(Load, 0);
+ for (unsigned I = 0; I < NF; ++I)
+ ReplaceUses(SDValue(Node, I),
+ CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL,
+ VT, SuperReg));
+
+ ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
+ CurDAG->RemoveDeadNode(Node);
+}
+
+void RISCVDAGToDAGISel::selectVLSEGMask(SDNode *Node, unsigned IntNo,
+ bool IsStrided) {
+ SDLoc DL(Node);
+ unsigned NF = Node->getNumValues() - 1;
+ EVT VT = Node->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
+ SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
+ SmallVector<SDValue, 7> Operands;
+ Operands.push_back(MaskedOff);
+ Operands.push_back(Node->getOperand(NF + 2)); // Base pointer.
+ if (IsStrided) {
+ Operands.push_back(Node->getOperand(NF + 3)); // Stride.
+ Operands.push_back(Node->getOperand(NF + 4)); // Mask.
+ Operands.push_back(Node->getOperand(NF + 5)); // VL.
+ } else {
+ Operands.push_back(Node->getOperand(NF + 3)); // Mask.
+ Operands.push_back(Node->getOperand(NF + 4)); // VL.
+ }
+ Operands.push_back(SEW);
+ Operands.push_back(Node->getOperand(0)); /// Chain.
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, ScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(RISCVVLMUL::LMUL_1));
+ SDNode *Load =
+ CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
+ SDValue SuperReg = SDValue(Load, 0);
+ for (unsigned I = 0; I < NF; ++I)
+ ReplaceUses(SDValue(Node, I),
+ CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL,
+ VT, SuperReg));
+
+ ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
+ CurDAG->RemoveDeadNode(Node);
+}
+
+void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node) {
+ SDLoc DL(Node);
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned NF = Node->getNumValues() - 2; // Do not count Chain and Glue.
+ EVT VT = Node->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SmallVector<SDValue, 5> Operands;
+ Operands.push_back(Node->getOperand(2)); // Base pointer.
+ Operands.push_back(Node->getOperand(3)); // VL.
+ Operands.push_back(SEW);
+ Operands.push_back(Node->getOperand(0)); // Chain.
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, ScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(RISCVVLMUL::LMUL_1));
+ SDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other,
+ MVT::Glue, Operands);
+ SDValue SuperReg = SDValue(Load, 0);
+ for (unsigned I = 0; I < NF; ++I)
+ ReplaceUses(SDValue(Node, I),
+ CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL,
+ VT, SuperReg));
+
+ ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // Chain.
+ ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Glue.
+ CurDAG->RemoveDeadNode(Node);
+}
+
+void RISCVDAGToDAGISel::selectVLSEGFFMask(SDNode *Node) {
+ SDLoc DL(Node);
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned NF = Node->getNumValues() - 2; // Do not count Chain and Glue.
+ EVT VT = Node->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
+ SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
+ SmallVector<SDValue, 7> Operands;
+ Operands.push_back(MaskedOff);
+ Operands.push_back(Node->getOperand(NF + 2)); // Base pointer.
+ Operands.push_back(Node->getOperand(NF + 3)); // Mask.
+ Operands.push_back(Node->getOperand(NF + 4)); // VL.
+ Operands.push_back(SEW);
+ Operands.push_back(Node->getOperand(0)); /// Chain.
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, ScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(RISCVVLMUL::LMUL_1));
+ SDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other,
+ MVT::Glue, Operands);
+ SDValue SuperReg = SDValue(Load, 0);
+ for (unsigned I = 0; I < NF; ++I)
+ ReplaceUses(SDValue(Node, I),
+ CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL,
+ VT, SuperReg));
+
+ ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // Chain.
+ ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Glue.
+ CurDAG->RemoveDeadNode(Node);
+}
+
+void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned IntNo) {
+ SDLoc DL(Node);
+ unsigned NF = Node->getNumValues() - 1;
+ EVT VT = Node->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SDValue Operands[] = {
+ Node->getOperand(2), // Base pointer.
+ Node->getOperand(3), // Index.
+ Node->getOperand(4), // VL.
+ SEW, Node->getOperand(0) // Chain.
+ };
+
+ EVT IndexVT = Node->getOperand(3)->getValueType(0);
+ RISCVVLMUL IndexLMUL = getLMUL(IndexVT);
+ unsigned IndexScalarSize = IndexVT.getScalarSizeInBits();
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, IndexScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(IndexLMUL));
+ SDNode *Load =
+ CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
+ SDValue SuperReg = SDValue(Load, 0);
+ for (unsigned I = 0; I < NF; ++I)
+ ReplaceUses(SDValue(Node, I),
+ CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL,
+ VT, SuperReg));
+
+ ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
+ CurDAG->RemoveDeadNode(Node);
+}
+
+void RISCVDAGToDAGISel::selectVLXSEGMask(SDNode *Node, unsigned IntNo) {
+ SDLoc DL(Node);
+ unsigned NF = Node->getNumValues() - 1;
+ EVT VT = Node->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
+ SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
+ SDValue Operands[] = {
+ MaskedOff,
+ Node->getOperand(NF + 2), // Base pointer.
+ Node->getOperand(NF + 3), // Index.
+ Node->getOperand(NF + 4), // Mask.
+ Node->getOperand(NF + 5), // VL.
+ SEW,
+ Node->getOperand(0) // Chain.
+ };
+
+ EVT IndexVT = Node->getOperand(NF + 3)->getValueType(0);
+ RISCVVLMUL IndexLMUL = getLMUL(IndexVT);
+ unsigned IndexScalarSize = IndexVT.getScalarSizeInBits();
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, IndexScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(IndexLMUL));
+ SDNode *Load =
+ CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
+ SDValue SuperReg = SDValue(Load, 0);
+ for (unsigned I = 0; I < NF; ++I)
+ ReplaceUses(SDValue(Node, I),
+ CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL,
+ VT, SuperReg));
+
+ ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
+ CurDAG->RemoveDeadNode(Node);
+}
+
+void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned IntNo,
+ bool IsStrided) {
+ SDLoc DL(Node);
+ unsigned NF = Node->getNumOperands() - 4;
+ if (IsStrided)
+ NF--;
+ EVT VT = Node->getOperand(2)->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
+ SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
+ SmallVector<SDValue, 6> Operands;
+ Operands.push_back(StoreVal);
+ Operands.push_back(Node->getOperand(2 + NF)); // Base pointer.
+ if (IsStrided) {
+ Operands.push_back(Node->getOperand(3 + NF)); // Stride.
+ Operands.push_back(Node->getOperand(4 + NF)); // VL.
+ } else {
+ Operands.push_back(Node->getOperand(3 + NF)); // VL.
+ }
+ Operands.push_back(SEW);
+ Operands.push_back(Node->getOperand(0)); // Chain.
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, ScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(RISCVVLMUL::LMUL_1));
+ SDNode *Store =
+ CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
+ ReplaceNode(Node, Store);
+}
+
+void RISCVDAGToDAGISel::selectVSSEGMask(SDNode *Node, unsigned IntNo,
+ bool IsStrided) {
+ SDLoc DL(Node);
+ unsigned NF = Node->getNumOperands() - 5;
+ if (IsStrided)
+ NF--;
+ EVT VT = Node->getOperand(2)->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
+ SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
+ SmallVector<SDValue, 7> Operands;
+ Operands.push_back(StoreVal);
+ Operands.push_back(Node->getOperand(2 + NF)); // Base pointer.
+ if (IsStrided) {
+ Operands.push_back(Node->getOperand(3 + NF)); // Stride.
+ Operands.push_back(Node->getOperand(4 + NF)); // Mask.
+ Operands.push_back(Node->getOperand(5 + NF)); // VL.
+ } else {
+ Operands.push_back(Node->getOperand(3 + NF)); // Mask.
+ Operands.push_back(Node->getOperand(4 + NF)); // VL.
+ }
+ Operands.push_back(SEW);
+ Operands.push_back(Node->getOperand(0)); // Chain.
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, ScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(RISCVVLMUL::LMUL_1));
+ SDNode *Store =
+ CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
+ ReplaceNode(Node, Store);
+}
+
+void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned IntNo) {
+ SDLoc DL(Node);
+ unsigned NF = Node->getNumOperands() - 5;
+ EVT VT = Node->getOperand(2)->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
+ SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
+ SDValue Operands[] = {
+ StoreVal,
+ Node->getOperand(2 + NF), // Base pointer.
+ Node->getOperand(3 + NF), // Index.
+ Node->getOperand(4 + NF), // VL.
+ SEW,
+ Node->getOperand(0) // Chain.
+ };
+
+ EVT IndexVT = Node->getOperand(3 + NF)->getValueType(0);
+ RISCVVLMUL IndexLMUL = getLMUL(IndexVT);
+ unsigned IndexScalarSize = IndexVT.getScalarSizeInBits();
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, IndexScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(IndexLMUL));
+ SDNode *Store =
+ CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
+ ReplaceNode(Node, Store);
+}
+
+void RISCVDAGToDAGISel::selectVSXSEGMask(SDNode *Node, unsigned IntNo) {
+ SDLoc DL(Node);
+ unsigned NF = Node->getNumOperands() - 6;
+ EVT VT = Node->getOperand(2)->getValueType(0);
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ MVT XLenVT = Subtarget->getXLenVT();
+ RISCVVLMUL LMUL = getLMUL(VT);
+ SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
+ SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
+ SDValue Operands[] = {
+ StoreVal,
+ Node->getOperand(2 + NF), // Base pointer.
+ Node->getOperand(3 + NF), // Index.
+ Node->getOperand(4 + NF), // Mask.
+ Node->getOperand(5 + NF), // VL.
+ SEW,
+ Node->getOperand(0) // Chain.
+ };
+
+ EVT IndexVT = Node->getOperand(3 + NF)->getValueType(0);
+ RISCVVLMUL IndexLMUL = getLMUL(IndexVT);
+ unsigned IndexScalarSize = IndexVT.getScalarSizeInBits();
+ const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo(
+ IntNo, IndexScalarSize, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(IndexLMUL));
+ SDNode *Store =
+ CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
+ ReplaceNode(Node, Store);
}
void RISCVDAGToDAGISel::Select(SDNode *Node) {
@@ -86,7 +511,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!(-4096 <= Imm && Imm <= -2049) && !(2048 <= Imm && Imm <= 4094))
break;
// Break the imm to imm0+imm1.
- SDLoc DL(Node);
EVT VT = Node->getValueType(0);
const SDValue ImmOp0 = CurDAG->getTargetConstant(Imm - Imm / 2, DL, VT);
const SDValue ImmOp1 = CurDAG->getTargetConstant(Imm / 2, DL, VT);
@@ -102,14 +526,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
case ISD::Constant: {
auto ConstNode = cast<ConstantSDNode>(Node);
if (VT == XLenVT && ConstNode->isNullValue()) {
- SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
- RISCV::X0, XLenVT);
+ SDValue New =
+ CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT);
ReplaceNode(Node, New.getNode());
return;
}
int64_t Imm = ConstNode->getSExtValue();
if (XLenVT == MVT::i64) {
- ReplaceNode(Node, selectImm(CurDAG, SDLoc(Node), Imm, XLenVT));
+ ReplaceNode(Node, selectImm(CurDAG, DL, Imm, XLenVT));
return;
}
break;
@@ -121,38 +545,235 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
return;
}
- case ISD::SRL: {
- if (!Subtarget->is64Bit())
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ // By default we do not custom select any intrinsic.
+ default:
break;
- SDValue Op0 = Node->getOperand(0);
- SDValue Op1 = Node->getOperand(1);
- uint64_t Mask;
- // Match (srl (and val, mask), imm) where the result would be a
- // zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
- // is equivalent to this (SimplifyDemandedBits may have removed lower bits
- // from the mask that aren't necessary due to the right-shifting).
- if (Op1.getOpcode() == ISD::Constant &&
- isConstantMask(Op0.getNode(), Mask)) {
- uint64_t ShAmt = cast<ConstantSDNode>(Op1.getNode())->getZExtValue();
-
- if ((Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff) {
- SDValue ShAmtVal =
- CurDAG->getTargetConstant(ShAmt, SDLoc(Node), XLenVT);
- CurDAG->SelectNodeTo(Node, RISCV::SRLIW, XLenVT, Op0.getOperand(0),
- ShAmtVal);
- return;
+
+ case Intrinsic::riscv_vsetvli: {
+ if (!Subtarget->hasStdExtV())
+ break;
+
+ assert(Node->getNumOperands() == 5);
+
+ RISCVVSEW VSEW =
+ static_cast<RISCVVSEW>(Node->getConstantOperandVal(3) & 0x7);
+ RISCVVLMUL VLMul =
+ static_cast<RISCVVLMUL>(Node->getConstantOperandVal(4) & 0x7);
+
+ unsigned VTypeI = RISCVVType::encodeVTYPE(
+ VLMul, VSEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false);
+ SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
+
+ SDValue VLOperand = Node->getOperand(2);
+ if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
+ uint64_t AVL = C->getZExtValue();
+ if (isUInt<5>(AVL)) {
+ SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
+ ReplaceNode(Node,
+ CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT,
+ MVT::Other, VLImm, VTypeIOp,
+ /* Chain */ Node->getOperand(0)));
+ return;
+ }
}
+
+ ReplaceNode(Node,
+ CurDAG->getMachineNode(RISCV::PseudoVSETVLI, DL, XLenVT,
+ MVT::Other, VLOperand, VTypeIOp,
+ /* Chain */ Node->getOperand(0)));
+ return;
+ }
+ case Intrinsic::riscv_vsetvlimax: {
+ if (!Subtarget->hasStdExtV())
+ break;
+
+ assert(Node->getNumOperands() == 4);
+
+ RISCVVSEW VSEW =
+ static_cast<RISCVVSEW>(Node->getConstantOperandVal(2) & 0x7);
+ RISCVVLMUL VLMul =
+ static_cast<RISCVVLMUL>(Node->getConstantOperandVal(3) & 0x7);
+
+ unsigned VTypeI = RISCVVType::encodeVTYPE(
+ VLMul, VSEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false);
+ SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
+
+ SDValue VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
+ ReplaceNode(Node,
+ CurDAG->getMachineNode(RISCV::PseudoVSETVLI, DL, XLenVT,
+ MVT::Other, VLOperand, VTypeIOp,
+ /* Chain */ Node->getOperand(0)));
+ return;
+ }
+ case Intrinsic::riscv_vlseg2:
+ case Intrinsic::riscv_vlseg3:
+ case Intrinsic::riscv_vlseg4:
+ case Intrinsic::riscv_vlseg5:
+ case Intrinsic::riscv_vlseg6:
+ case Intrinsic::riscv_vlseg7:
+ case Intrinsic::riscv_vlseg8: {
+ selectVLSEG(Node, IntNo, /*IsStrided=*/false);
+ return;
+ }
+ case Intrinsic::riscv_vlseg2_mask:
+ case Intrinsic::riscv_vlseg3_mask:
+ case Intrinsic::riscv_vlseg4_mask:
+ case Intrinsic::riscv_vlseg5_mask:
+ case Intrinsic::riscv_vlseg6_mask:
+ case Intrinsic::riscv_vlseg7_mask:
+ case Intrinsic::riscv_vlseg8_mask: {
+ selectVLSEGMask(Node, IntNo, /*IsStrided=*/false);
+ return;
+ }
+ case Intrinsic::riscv_vlsseg2:
+ case Intrinsic::riscv_vlsseg3:
+ case Intrinsic::riscv_vlsseg4:
+ case Intrinsic::riscv_vlsseg5:
+ case Intrinsic::riscv_vlsseg6:
+ case Intrinsic::riscv_vlsseg7:
+ case Intrinsic::riscv_vlsseg8: {
+ selectVLSEG(Node, IntNo, /*IsStrided=*/true);
+ return;
+ }
+ case Intrinsic::riscv_vlsseg2_mask:
+ case Intrinsic::riscv_vlsseg3_mask:
+ case Intrinsic::riscv_vlsseg4_mask:
+ case Intrinsic::riscv_vlsseg5_mask:
+ case Intrinsic::riscv_vlsseg6_mask:
+ case Intrinsic::riscv_vlsseg7_mask:
+ case Intrinsic::riscv_vlsseg8_mask: {
+ selectVLSEGMask(Node, IntNo, /*IsStrided=*/true);
+ return;
+ }
+ case Intrinsic::riscv_vloxseg2:
+ case Intrinsic::riscv_vloxseg3:
+ case Intrinsic::riscv_vloxseg4:
+ case Intrinsic::riscv_vloxseg5:
+ case Intrinsic::riscv_vloxseg6:
+ case Intrinsic::riscv_vloxseg7:
+ case Intrinsic::riscv_vloxseg8:
+ case Intrinsic::riscv_vluxseg2:
+ case Intrinsic::riscv_vluxseg3:
+ case Intrinsic::riscv_vluxseg4:
+ case Intrinsic::riscv_vluxseg5:
+ case Intrinsic::riscv_vluxseg6:
+ case Intrinsic::riscv_vluxseg7:
+ case Intrinsic::riscv_vluxseg8: {
+ selectVLXSEG(Node, IntNo);
+ return;
+ }
+ case Intrinsic::riscv_vloxseg2_mask:
+ case Intrinsic::riscv_vloxseg3_mask:
+ case Intrinsic::riscv_vloxseg4_mask:
+ case Intrinsic::riscv_vloxseg5_mask:
+ case Intrinsic::riscv_vloxseg6_mask:
+ case Intrinsic::riscv_vloxseg7_mask:
+ case Intrinsic::riscv_vloxseg8_mask:
+ case Intrinsic::riscv_vluxseg2_mask:
+ case Intrinsic::riscv_vluxseg3_mask:
+ case Intrinsic::riscv_vluxseg4_mask:
+ case Intrinsic::riscv_vluxseg5_mask:
+ case Intrinsic::riscv_vluxseg6_mask:
+ case Intrinsic::riscv_vluxseg7_mask:
+ case Intrinsic::riscv_vluxseg8_mask: {
+ selectVLXSEGMask(Node, IntNo);
+ return;
+ }
}
break;
}
- case RISCVISD::READ_CYCLE_WIDE:
- assert(!Subtarget->is64Bit() && "READ_CYCLE_WIDE is only used on riscv32");
-
- ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ReadCycleWide, DL, MVT::i32,
- MVT::i32, MVT::Other,
- Node->getOperand(0)));
+ case ISD::INTRINSIC_VOID: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ case Intrinsic::riscv_vsseg2:
+ case Intrinsic::riscv_vsseg3:
+ case Intrinsic::riscv_vsseg4:
+ case Intrinsic::riscv_vsseg5:
+ case Intrinsic::riscv_vsseg6:
+ case Intrinsic::riscv_vsseg7:
+ case Intrinsic::riscv_vsseg8: {
+ selectVSSEG(Node, IntNo, /*IsStrided=*/false);
+ return;
+ }
+ case Intrinsic::riscv_vsseg2_mask:
+ case Intrinsic::riscv_vsseg3_mask:
+ case Intrinsic::riscv_vsseg4_mask:
+ case Intrinsic::riscv_vsseg5_mask:
+ case Intrinsic::riscv_vsseg6_mask:
+ case Intrinsic::riscv_vsseg7_mask:
+ case Intrinsic::riscv_vsseg8_mask: {
+ selectVSSEGMask(Node, IntNo, /*IsStrided=*/false);
+ return;
+ }
+ case Intrinsic::riscv_vssseg2:
+ case Intrinsic::riscv_vssseg3:
+ case Intrinsic::riscv_vssseg4:
+ case Intrinsic::riscv_vssseg5:
+ case Intrinsic::riscv_vssseg6:
+ case Intrinsic::riscv_vssseg7:
+ case Intrinsic::riscv_vssseg8: {
+ selectVSSEG(Node, IntNo, /*IsStrided=*/true);
+ return;
+ }
+ case Intrinsic::riscv_vssseg2_mask:
+ case Intrinsic::riscv_vssseg3_mask:
+ case Intrinsic::riscv_vssseg4_mask:
+ case Intrinsic::riscv_vssseg5_mask:
+ case Intrinsic::riscv_vssseg6_mask:
+ case Intrinsic::riscv_vssseg7_mask:
+ case Intrinsic::riscv_vssseg8_mask: {
+ selectVSSEGMask(Node, IntNo, /*IsStrided=*/true);
+ return;
+ }
+ case Intrinsic::riscv_vsoxseg2:
+ case Intrinsic::riscv_vsoxseg3:
+ case Intrinsic::riscv_vsoxseg4:
+ case Intrinsic::riscv_vsoxseg5:
+ case Intrinsic::riscv_vsoxseg6:
+ case Intrinsic::riscv_vsoxseg7:
+ case Intrinsic::riscv_vsoxseg8:
+ case Intrinsic::riscv_vsuxseg2:
+ case Intrinsic::riscv_vsuxseg3:
+ case Intrinsic::riscv_vsuxseg4:
+ case Intrinsic::riscv_vsuxseg5:
+ case Intrinsic::riscv_vsuxseg6:
+ case Intrinsic::riscv_vsuxseg7:
+ case Intrinsic::riscv_vsuxseg8: {
+ selectVSXSEG(Node, IntNo);
+ return;
+ }
+ case Intrinsic::riscv_vsoxseg2_mask:
+ case Intrinsic::riscv_vsoxseg3_mask:
+ case Intrinsic::riscv_vsoxseg4_mask:
+ case Intrinsic::riscv_vsoxseg5_mask:
+ case Intrinsic::riscv_vsoxseg6_mask:
+ case Intrinsic::riscv_vsoxseg7_mask:
+ case Intrinsic::riscv_vsoxseg8_mask:
+ case Intrinsic::riscv_vsuxseg2_mask:
+ case Intrinsic::riscv_vsuxseg3_mask:
+ case Intrinsic::riscv_vsuxseg4_mask:
+ case Intrinsic::riscv_vsuxseg5_mask:
+ case Intrinsic::riscv_vsuxseg6_mask:
+ case Intrinsic::riscv_vsuxseg7_mask:
+ case Intrinsic::riscv_vsuxseg8_mask: {
+ selectVSXSEGMask(Node, IntNo);
+ return;
+ }
+ }
+ break;
+ }
+ case RISCVISD::VLSEGFF: {
+ selectVLSEGFF(Node);
return;
}
+ case RISCVISD::VLSEGFF_MASK: {
+ selectVLSEGFFMask(Node);
+ return;
+ }
+ }
// Select the default instruction.
SelectCode(Node);
@@ -184,328 +805,132 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
return false;
}
-// Check that it is a SLOI (Shift Left Ones Immediate). We first check that
-// it is the right node tree:
-//
-// (OR (SHL RS1, VC2), VC1)
-//
-// and then we check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-// VC1 == maskTrailingOnes<uint64_t>(VC2)
+// Match (srl (and val, mask), imm) where the result would be a
+// zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
+// is equivalent to this (SimplifyDemandedBits may have removed lower bits
+// from the mask that aren't necessary due to the right-shifting).
+bool RISCVDAGToDAGISel::MatchSRLIW(SDNode *N) const {
+ assert(N->getOpcode() == ISD::SRL);
+ assert(N->getOperand(0).getOpcode() == ISD::AND);
+ assert(isa<ConstantSDNode>(N->getOperand(1)));
+ assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-bool RISCVDAGToDAGISel::SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt) {
- MVT XLenVT = Subtarget->getXLenVT();
- if (N.getOpcode() == ISD::OR) {
- SDValue Or = N;
- if (Or.getOperand(0).getOpcode() == ISD::SHL) {
- SDValue Shl = Or.getOperand(0);
- if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
- isa<ConstantSDNode>(Or.getOperand(1))) {
- if (XLenVT == MVT::i64) {
- uint64_t VC1 = Or.getConstantOperandVal(1);
- uint64_t VC2 = Shl.getConstantOperandVal(1);
- if (VC1 == maskTrailingOnes<uint64_t>(VC2)) {
- RS1 = Shl.getOperand(0);
- Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
- Shl.getOperand(1).getValueType());
- return true;
- }
- }
- if (XLenVT == MVT::i32) {
- uint32_t VC1 = Or.getConstantOperandVal(1);
- uint32_t VC2 = Shl.getConstantOperandVal(1);
- if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
- RS1 = Shl.getOperand(0);
- Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
- Shl.getOperand(1).getValueType());
- return true;
- }
- }
- }
- }
- }
- return false;
-}
-
-// Check that it is a SROI (Shift Right Ones Immediate). We first check that
-// it is the right node tree:
-//
-// (OR (SRL RS1, VC2), VC1)
-//
-// and then we check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-// VC1 == maskLeadingOnes<uint64_t>(VC2)
-
-bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
- MVT XLenVT = Subtarget->getXLenVT();
- if (N.getOpcode() == ISD::OR) {
- SDValue Or = N;
- if (Or.getOperand(0).getOpcode() == ISD::SRL) {
- SDValue Srl = Or.getOperand(0);
- if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
- isa<ConstantSDNode>(Or.getOperand(1))) {
- if (XLenVT == MVT::i64) {
- uint64_t VC1 = Or.getConstantOperandVal(1);
- uint64_t VC2 = Srl.getConstantOperandVal(1);
- if (VC1 == maskLeadingOnes<uint64_t>(VC2)) {
- RS1 = Srl.getOperand(0);
- Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
- Srl.getOperand(1).getValueType());
- return true;
- }
- }
- if (XLenVT == MVT::i32) {
- uint32_t VC1 = Or.getConstantOperandVal(1);
- uint32_t VC2 = Srl.getConstantOperandVal(1);
- if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
- RS1 = Srl.getOperand(0);
- Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
- Srl.getOperand(1).getValueType());
- return true;
- }
- }
- }
- }
- }
- return false;
-}
-
-// Check that it is a RORI (Rotate Right Immediate). We first check that
-// it is the right node tree:
-//
-// (ROTL RS1, VC)
-//
-// The compiler translates immediate rotations to the right given by the call
-// to the rotateright32/rotateright64 intrinsics as rotations to the left.
-// Since the rotation to the left can be easily emulated as a rotation to the
-// right by negating the constant, there is no encoding for ROLI.
-// We then select the immediate left rotations as RORI by the complementary
-// constant:
-//
-// Shamt == XLen - VC
+ // The IsRV64 predicate is checked after PatFrag predicates so we can get
+ // here even on RV32.
+ if (!Subtarget->is64Bit())
+ return false;
-bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) {
- MVT XLenVT = Subtarget->getXLenVT();
- if (N.getOpcode() == ISD::ROTL) {
- if (isa<ConstantSDNode>(N.getOperand(1))) {
- if (XLenVT == MVT::i64) {
- uint64_t VC = N.getConstantOperandVal(1);
- Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N),
- N.getOperand(1).getValueType());
- RS1 = N.getOperand(0);
- return true;
- }
- if (XLenVT == MVT::i32) {
- uint32_t VC = N.getConstantOperandVal(1);
- Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N),
- N.getOperand(1).getValueType());
- RS1 = N.getOperand(0);
- return true;
- }
- }
- }
- return false;
+ SDValue And = N->getOperand(0);
+ uint64_t ShAmt = N->getConstantOperandVal(1);
+ uint64_t Mask = And.getConstantOperandVal(1);
+ return (Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff;
}
-
// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
// on RV64).
// SLLIUW is the same as SLLI except for the fact that it clears the bits
// XLEN-1:32 of the input RS1 before shifting.
-// We first check that it is the right node tree:
+// A PatFrag has already checked that it has the right structure:
//
// (AND (SHL RS1, VC2), VC1)
//
// We check that VC2, the shamt is less than 32, otherwise the pattern is
// exactly the same as SLLI and we give priority to that.
-// Eventually we check that that VC1, the mask used to clear the upper 32 bits
+// Eventually we check that VC1, the mask used to clear the upper 32 bits
// of RS1, is correct:
//
// VC1 == (0xFFFFFFFF << VC2)
+//
+bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const {
+ assert(N->getOpcode() == ISD::AND);
+ assert(N->getOperand(0).getOpcode() == ISD::SHL);
+ assert(isa<ConstantSDNode>(N->getOperand(1)));
+ assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-bool RISCVDAGToDAGISel::SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt) {
- if (N.getOpcode() == ISD::AND && Subtarget->getXLenVT() == MVT::i64) {
- SDValue And = N;
- if (And.getOperand(0).getOpcode() == ISD::SHL) {
- SDValue Shl = And.getOperand(0);
- if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
- isa<ConstantSDNode>(And.getOperand(1))) {
- uint64_t VC1 = And.getConstantOperandVal(1);
- uint64_t VC2 = Shl.getConstantOperandVal(1);
- if (VC2 < 32 && VC1 == ((uint64_t)0xFFFFFFFF << VC2)) {
- RS1 = Shl.getOperand(0);
- Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
- Shl.getOperand(1).getValueType());
- return true;
- }
- }
- }
- }
- return false;
+ // The IsRV64 predicate is checked after PatFrag predicates so we can get
+ // here even on RV32.
+ if (!Subtarget->is64Bit())
+ return false;
+
+ SDValue Shl = N->getOperand(0);
+ uint64_t VC1 = N->getConstantOperandVal(1);
+ uint64_t VC2 = Shl.getConstantOperandVal(1);
+
+ // Immediate range should be enforced by uimm5 predicate.
+ assert(VC2 < 32 && "Unexpected immediate");
+ return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF);
}
-// Check that it is a SLOIW (Shift Left Ones Immediate i32 on RV64).
-// We first check that it is the right node tree:
-//
-// (SIGN_EXTEND_INREG (OR (SHL RS1, VC2), VC1))
-//
-// and then we check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-// VC1 == maskTrailingOnes<uint32_t>(VC2)
-
-bool RISCVDAGToDAGISel::SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
- if (Subtarget->getXLenVT() == MVT::i64 &&
- N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
- if (N.getOperand(0).getOpcode() == ISD::OR) {
- SDValue Or = N.getOperand(0);
- if (Or.getOperand(0).getOpcode() == ISD::SHL) {
- SDValue Shl = Or.getOperand(0);
- if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
- isa<ConstantSDNode>(Or.getOperand(1))) {
- uint32_t VC1 = Or.getConstantOperandVal(1);
- uint32_t VC2 = Shl.getConstantOperandVal(1);
- if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
- RS1 = Shl.getOperand(0);
- Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
- Shl.getOperand(1).getValueType());
- return true;
- }
- }
- }
- }
- }
- return false;
+// X0 has special meaning for vsetvl/vsetvli.
+// rd | rs1 | AVL value | Effect on vl
+//--------------------------------------------------------------
+// !X0 | X0 | VLMAX | Set vl to VLMAX
+// X0 | X0 | Value in vl | Keep current vl, just change vtype.
+bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
+ // If the VL value is a constant 0, manually select it to an ADDI with 0
+ // immediate to prevent the default selection path from matching it to X0.
+ auto *C = dyn_cast<ConstantSDNode>(N);
+ if (C && C->isNullValue())
+ VL = SDValue(selectImm(CurDAG, SDLoc(N), 0, Subtarget->getXLenVT()), 0);
+ else
+ VL = N;
+
+ return true;
}
-// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64).
-// We first check that it is the right node tree:
-//
-// (OR (SHL RS1, VC2), VC1)
-//
-// and then we check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-// VC1 == maskLeadingOnes<uint32_t>(VC2)
-
-bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
- if (N.getOpcode() == ISD::OR && Subtarget->getXLenVT() == MVT::i64) {
- SDValue Or = N;
- if (Or.getOperand(0).getOpcode() == ISD::SRL) {
- SDValue Srl = Or.getOperand(0);
- if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
- isa<ConstantSDNode>(Or.getOperand(1))) {
- uint32_t VC1 = Or.getConstantOperandVal(1);
- uint32_t VC2 = Srl.getConstantOperandVal(1);
- if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
- RS1 = Srl.getOperand(0);
- Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
- Srl.getOperand(1).getValueType());
- return true;
- }
- }
- }
- }
- return false;
+bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
+ if (N.getOpcode() != ISD::SPLAT_VECTOR &&
+ N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64)
+ return false;
+ SplatVal = N.getOperand(0);
+ return true;
}
-// Check that it is a RORIW (i32 Right Rotate Immediate on RV64).
-// We first check that it is the right node tree:
-//
-// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
-// (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
-//
-// Then we check that the constant operands respect these constraints:
-//
-// VC2 == 32 - VC1
-// VC3 == maskLeadingOnes<uint32_t>(VC2)
-//
-// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
-// and VC3 a 32 bit mask of (32 - VC1) leading ones.
-
-bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
- if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- Subtarget->getXLenVT() == MVT::i64 &&
- cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
- if (N.getOperand(0).getOpcode() == ISD::OR) {
- SDValue Or = N.getOperand(0);
- if (Or.getOperand(0).getOpcode() == ISD::SHL &&
- Or.getOperand(1).getOpcode() == ISD::SRL) {
- SDValue Shl = Or.getOperand(0);
- SDValue Srl = Or.getOperand(1);
- if (Srl.getOperand(0).getOpcode() == ISD::AND) {
- SDValue And = Srl.getOperand(0);
- if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
- isa<ConstantSDNode>(Shl.getOperand(1)) &&
- isa<ConstantSDNode>(And.getOperand(1))) {
- uint32_t VC1 = Srl.getConstantOperandVal(1);
- uint32_t VC2 = Shl.getConstantOperandVal(1);
- uint32_t VC3 = And.getConstantOperandVal(1);
- if (VC2 == (32 - VC1) &&
- VC3 == maskLeadingOnes<uint32_t>(VC2)) {
- RS1 = Shl.getOperand(0);
- Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
- Srl.getOperand(1).getValueType());
- return true;
- }
- }
- }
- }
- }
+bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
+ if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
+ N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) ||
+ !isa<ConstantSDNode>(N.getOperand(0)))
+ return false;
+
+ int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
+
+ // Both ISD::SPLAT_VECTOR and RISCVISD::SPLAT_VECTOR_I64 share semantics when
+ // the operand type is wider than the resulting vector element type: an
+ // implicit truncation first takes place. Therefore, perform a manual
+ // truncation/sign-extension in order to ignore any truncated bits and catch
+ // any zero-extended immediate.
+ // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
+ // sign-extending to (XLenVT -1).
+ auto XLenVT = Subtarget->getXLenVT();
+ assert(XLenVT == N.getOperand(0).getSimpleValueType() &&
+ "Unexpected splat operand type");
+ auto EltVT = N.getValueType().getVectorElementType();
+ if (EltVT.bitsLT(XLenVT)) {
+ SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
}
- return false;
+
+ if (!isInt<5>(SplatImm))
+ return false;
+
+ SplatVal = CurDAG->getTargetConstant(SplatImm, SDLoc(N), XLenVT);
+ return true;
}
-// Check that it is a FSRIW (i32 Funnel Shift Right Immediate on RV64).
-// We first check that it is the right node tree:
-//
-// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
-// (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
-//
-// Then we check that the constant operands respect these constraints:
-//
-// VC2 == 32 - VC1
-// VC3 == maskLeadingOnes<uint32_t>(VC2)
-//
-// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
-// and VC3 a 32 bit mask of (32 - VC1) leading ones.
-
-bool RISCVDAGToDAGISel::SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2,
- SDValue &Shamt) {
- if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- Subtarget->getXLenVT() == MVT::i64 &&
- cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
- if (N.getOperand(0).getOpcode() == ISD::OR) {
- SDValue Or = N.getOperand(0);
- if (Or.getOperand(0).getOpcode() == ISD::SHL &&
- Or.getOperand(1).getOpcode() == ISD::SRL) {
- SDValue Shl = Or.getOperand(0);
- SDValue Srl = Or.getOperand(1);
- if (Srl.getOperand(0).getOpcode() == ISD::AND) {
- SDValue And = Srl.getOperand(0);
- if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
- isa<ConstantSDNode>(Shl.getOperand(1)) &&
- isa<ConstantSDNode>(And.getOperand(1))) {
- uint32_t VC1 = Srl.getConstantOperandVal(1);
- uint32_t VC2 = Shl.getConstantOperandVal(1);
- uint32_t VC3 = And.getConstantOperandVal(1);
- if (VC2 == (32 - VC1) &&
- VC3 == maskLeadingOnes<uint32_t>(VC2)) {
- RS1 = Shl.getOperand(0);
- RS2 = And.getOperand(0);
- Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
- Srl.getOperand(1).getValueType());
- return true;
- }
- }
- }
- }
- }
- }
- return false;
+bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
+ if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
+ N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) ||
+ !isa<ConstantSDNode>(N.getOperand(0)))
+ return false;
+
+ int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
+
+ if (!isUInt<5>(SplatImm))
+ return false;
+
+ SplatVal =
+ CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
+
+ return true;
}
// Merge an ADDI into the offset of a load/store instruction where possible.
@@ -536,6 +961,7 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
case RISCV::LHU:
case RISCV::LWU:
case RISCV::LD:
+ case RISCV::FLH:
case RISCV::FLW:
case RISCV::FLD:
BaseOpIdx = 0;
@@ -545,6 +971,7 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
case RISCV::SH:
case RISCV::SW:
case RISCV::SD:
+ case RISCV::FSH:
case RISCV::FSW:
case RISCV::FSD:
BaseOpIdx = 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 0ca12510a230..6099586d049d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -45,14 +45,25 @@ public:
bool SelectAddrFI(SDValue Addr, SDValue &Base);
- bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
- bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
- bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt);
- bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
- bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
- bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
- bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
- bool SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, SDValue &Shamt);
+ bool MatchSRLIW(SDNode *N) const;
+ bool MatchSLLIUW(SDNode *N) const;
+
+ bool selectVLOp(SDValue N, SDValue &VL);
+
+ bool selectVSplat(SDValue N, SDValue &SplatVal);
+ bool selectVSplatSimm5(SDValue N, SDValue &SplatVal);
+ bool selectVSplatUimm5(SDValue N, SDValue &SplatVal);
+
+ void selectVLSEG(SDNode *Node, unsigned IntNo, bool IsStrided);
+ void selectVLSEGMask(SDNode *Node, unsigned IntNo, bool IsStrided);
+ void selectVLSEGFF(SDNode *Node);
+ void selectVLSEGFFMask(SDNode *Node);
+ void selectVLXSEG(SDNode *Node, unsigned IntNo);
+ void selectVLXSEGMask(SDNode *Node, unsigned IntNo);
+ void selectVSSEG(SDNode *Node, unsigned IntNo, bool IsStrided);
+ void selectVSSEGMask(SDNode *Node, unsigned IntNo, bool IsStrided);
+ void selectVSXSEG(SDNode *Node, unsigned IntNo);
+ void selectVSXSEGMask(SDNode *Node, unsigned IntNo);
// Include the pieces autogenerated from the target description.
#include "RISCVGenDAGISel.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 03d9eefd59d0..97f46d9731b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
#include "RISCVISelLowering.h"
+#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVRegisterInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
-#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -33,6 +32,7 @@
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -83,11 +83,73 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Set up the register classes.
addRegisterClass(XLenVT, &RISCV::GPRRegClass);
+ if (Subtarget.hasStdExtZfh())
+ addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtF())
addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD())
addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
+ if (Subtarget.hasStdExtV()) {
+ addRegisterClass(RISCVVMVTs::vbool64_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vbool32_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vbool16_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vbool8_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vbool4_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vbool2_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vbool1_t, &RISCV::VRRegClass);
+
+ addRegisterClass(RISCVVMVTs::vint8mf8_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint8mf4_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint8mf2_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint8m1_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint8m2_t, &RISCV::VRM2RegClass);
+ addRegisterClass(RISCVVMVTs::vint8m4_t, &RISCV::VRM4RegClass);
+ addRegisterClass(RISCVVMVTs::vint8m8_t, &RISCV::VRM8RegClass);
+
+ addRegisterClass(RISCVVMVTs::vint16mf4_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint16mf2_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint16m1_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint16m2_t, &RISCV::VRM2RegClass);
+ addRegisterClass(RISCVVMVTs::vint16m4_t, &RISCV::VRM4RegClass);
+ addRegisterClass(RISCVVMVTs::vint16m8_t, &RISCV::VRM8RegClass);
+
+ addRegisterClass(RISCVVMVTs::vint32mf2_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint32m1_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint32m2_t, &RISCV::VRM2RegClass);
+ addRegisterClass(RISCVVMVTs::vint32m4_t, &RISCV::VRM4RegClass);
+ addRegisterClass(RISCVVMVTs::vint32m8_t, &RISCV::VRM8RegClass);
+
+ addRegisterClass(RISCVVMVTs::vint64m1_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vint64m2_t, &RISCV::VRM2RegClass);
+ addRegisterClass(RISCVVMVTs::vint64m4_t, &RISCV::VRM4RegClass);
+ addRegisterClass(RISCVVMVTs::vint64m8_t, &RISCV::VRM8RegClass);
+
+ if (Subtarget.hasStdExtZfh()) {
+ addRegisterClass(RISCVVMVTs::vfloat16mf4_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vfloat16mf2_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vfloat16m1_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vfloat16m2_t, &RISCV::VRM2RegClass);
+ addRegisterClass(RISCVVMVTs::vfloat16m4_t, &RISCV::VRM4RegClass);
+ addRegisterClass(RISCVVMVTs::vfloat16m8_t, &RISCV::VRM8RegClass);
+ }
+
+ if (Subtarget.hasStdExtF()) {
+ addRegisterClass(RISCVVMVTs::vfloat32mf2_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vfloat32m1_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vfloat32m2_t, &RISCV::VRM2RegClass);
+ addRegisterClass(RISCVVMVTs::vfloat32m4_t, &RISCV::VRM4RegClass);
+ addRegisterClass(RISCVVMVTs::vfloat32m8_t, &RISCV::VRM8RegClass);
+ }
+
+ if (Subtarget.hasStdExtD()) {
+ addRegisterClass(RISCVVMVTs::vfloat64m1_t, &RISCV::VRRegClass);
+ addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass);
+ addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass);
+ addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass);
+ }
+ }
+
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
@@ -101,7 +163,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, XLenVT, Expand);
- setOperationAction(ISD::SELECT, XLenVT, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -112,8 +173,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
- for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ if (!Subtarget.hasStdExtZbb()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ }
if (Subtarget.is64Bit()) {
setOperationAction(ISD::ADD, MVT::i32, Custom);
@@ -135,6 +199,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
setOperationAction(ISD::MUL, MVT::i32, Custom);
+
+ setOperationAction(ISD::SDIV, MVT::i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::i8, Custom);
+ setOperationAction(ISD::UREM, MVT::i8, Custom);
+ setOperationAction(ISD::SDIV, MVT::i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::i16, Custom);
+ setOperationAction(ISD::UREM, MVT::i16, Custom);
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Custom);
@@ -149,46 +220,90 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
- if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) {
+ if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::ROTL, MVT::i32, Custom);
+ setOperationAction(ISD::ROTR, MVT::i32, Custom);
+ }
+ } else {
setOperationAction(ISD::ROTL, XLenVT, Expand);
setOperationAction(ISD::ROTR, XLenVT, Expand);
}
- if (!Subtarget.hasStdExtZbp())
- setOperationAction(ISD::BSWAP, XLenVT, Expand);
+ if (Subtarget.hasStdExtZbp()) {
+ // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
+ // more combining.
+ setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
+ setOperationAction(ISD::BSWAP, XLenVT, Custom);
- if (!Subtarget.hasStdExtZbb()) {
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
+ setOperationAction(ISD::BSWAP, MVT::i32, Custom);
+ }
+ } else {
+ // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
+ // pattern match it directly in isel.
+ setOperationAction(ISD::BSWAP, XLenVT,
+ Subtarget.hasStdExtZbb() ? Legal : Expand);
+ }
+
+ if (Subtarget.hasStdExtZbb()) {
+ setOperationAction(ISD::SMIN, XLenVT, Legal);
+ setOperationAction(ISD::SMAX, XLenVT, Legal);
+ setOperationAction(ISD::UMIN, XLenVT, Legal);
+ setOperationAction(ISD::UMAX, XLenVT, Legal);
+ } else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
setOperationAction(ISD::CTLZ, XLenVT, Expand);
setOperationAction(ISD::CTPOP, XLenVT, Expand);
}
- if (Subtarget.hasStdExtZbp())
- setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
-
if (Subtarget.hasStdExtZbt()) {
setOperationAction(ISD::FSHL, XLenVT, Legal);
setOperationAction(ISD::FSHR, XLenVT, Legal);
+ setOperationAction(ISD::SELECT, XLenVT, Legal);
+
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::FSHL, MVT::i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::i32, Custom);
+ }
+ } else {
+ setOperationAction(ISD::SELECT, XLenVT, Custom);
}
- ISD::CondCode FPCCToExtend[] = {
+ ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
- ISD::SETGE, ISD::SETNE};
+ ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
- ISD::NodeType FPOpToExtend[] = {
+ ISD::NodeType FPOpToExpand[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
ISD::FP_TO_FP16};
+ if (Subtarget.hasStdExtZfh())
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+
+ if (Subtarget.hasStdExtZfh()) {
+ setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
+ for (auto CC : FPCCToExpand)
+ setCondCodeAction(CC, MVT::f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::f16, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f16, Expand);
+ for (auto Op : FPOpToExpand)
+ setOperationAction(Op, MVT::f16, Expand);
+ }
+
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
- for (auto CC : FPCCToExtend)
+ for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
- for (auto Op : FPOpToExtend)
+ for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
@@ -200,21 +315,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtD()) {
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
- for (auto CC : FPCCToExtend)
+ for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
- for (auto Op : FPOpToExtend)
+ for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
}
- if (Subtarget.is64Bit() &&
- !(Subtarget.hasStdExtD() || Subtarget.hasStdExtF())) {
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
@@ -224,6 +338,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
+ setOperationAction(ISD::JumpTable, XLenVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
@@ -245,25 +360,133 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setBooleanContents(ZeroOrOneBooleanContent);
+ if (Subtarget.hasStdExtV()) {
+ setBooleanVectorContents(ZeroOrOneBooleanContent);
+
+ setOperationAction(ISD::VSCALE, XLenVT, Custom);
+
+ // RVV intrinsics may have illegal operands.
+ // We also need to custom legalize vmv.x.s.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
+
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
+ }
+
+ for (auto VT : MVT::integer_scalable_vector_valuetypes()) {
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+
+ if (isTypeLegal(VT)) {
+ // Custom-lower extensions and truncations from/to mask types.
+ setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+
+ // We custom-lower all legally-typed vector truncates:
+ // 1. Mask VTs are custom-expanded into a series of standard nodes
+ // 2. Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR"
+ // nodes which truncate by one power of two at a time.
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+
+ // Custom-lower insert/extract operations to simplify patterns.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+ }
+
+ // We must custom-lower certain vXi64 operations on RV32 due to the vector
+ // element type being illegal.
+ if (!Subtarget.is64Bit()) {
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
+ }
+
+ // Expand various CCs to best match the RVV ISA, which natively supports UNE
+ // but no other unordered comparisons, and supports all ordered comparisons
+ // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
+ // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
+ // and we pattern-match those back to the "original", swapping operands once
+ // more. This way we catch both operations and both "vf" and "fv" forms with
+ // fewer patterns.
+ ISD::CondCode VFPCCToExpand[] = {
+ ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
+ ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
+ ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
+ };
+
+ // Sets common operation actions on RVV floating-point vector types.
+ const auto SetCommonVFPActions = [&](MVT VT) {
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+ // Custom-lower insert/extract operations to simplify patterns.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ for (auto CC : VFPCCToExpand)
+ setCondCodeAction(CC, VT, Expand);
+ };
+
+ if (Subtarget.hasStdExtZfh()) {
+ for (auto VT : {RISCVVMVTs::vfloat16mf4_t, RISCVVMVTs::vfloat16mf2_t,
+ RISCVVMVTs::vfloat16m1_t, RISCVVMVTs::vfloat16m2_t,
+ RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t})
+ SetCommonVFPActions(VT);
+ }
+
+ if (Subtarget.hasStdExtF()) {
+ for (auto VT : {RISCVVMVTs::vfloat32mf2_t, RISCVVMVTs::vfloat32m1_t,
+ RISCVVMVTs::vfloat32m2_t, RISCVVMVTs::vfloat32m4_t,
+ RISCVVMVTs::vfloat32m8_t})
+ SetCommonVFPActions(VT);
+ }
+
+ if (Subtarget.hasStdExtD()) {
+ for (auto VT : {RISCVVMVTs::vfloat64m1_t, RISCVVMVTs::vfloat64m2_t,
+ RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t})
+ SetCommonVFPActions(VT);
+ }
+ }
+
// Function alignments.
const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
setPrefFunctionAlignment(FunctionAlignment);
- // Effectively disable jump table generation.
- setMinimumJumpTableEntries(INT_MAX);
+ setMinimumJumpTableEntries(5);
// Jumps are expensive, compared to logic
setJumpIsExpensive();
// We can use any register for comparisons
setHasMultipleConditionRegisters();
+
+ setTargetDAGCombine(ISD::SETCC);
+ if (Subtarget.hasStdExtZbp()) {
+ setTargetDAGCombine(ISD::OR);
+ }
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
+ if (Subtarget.hasStdExtV())
+ return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
@@ -367,8 +590,18 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
+bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
+ return Subtarget.hasStdExtZbb();
+}
+
+bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
+ return Subtarget.hasStdExtZbb();
+}
+
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
+ if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
+ return false;
if (VT == MVT::f32 && !Subtarget.hasStdExtF())
return false;
if (VT == MVT::f64 && !Subtarget.hasStdExtD())
@@ -379,7 +612,8 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
}
bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
- return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
+ return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
+ (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
(VT == MVT::f64 && Subtarget.hasStdExtD());
}
@@ -433,6 +667,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerBlockAddress(Op, DAG);
case ISD::ConstantPool:
return lowerConstantPool(Op, DAG);
+ case ISD::JumpTable:
+ return lowerJumpTable(Op, DAG);
case ISD::GlobalTLSAddress:
return lowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT:
@@ -450,18 +686,105 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::SRL_PARTS:
return lowerShiftRightParts(Op, DAG, false);
case ISD::BITCAST: {
- assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
+ assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
+ Subtarget.hasStdExtZfh()) &&
"Unexpected custom legalisation");
SDLoc DL(Op);
SDValue Op0 = Op.getOperand(0);
- if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
- return SDValue();
- SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
- SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
- return FPConv;
+ if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
+ if (Op0.getValueType() != MVT::i16)
+ return SDValue();
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
+ SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
+ return FPConv;
+ } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtF()) {
+ if (Op0.getValueType() != MVT::i32)
+ return SDValue();
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+ SDValue FPConv =
+ DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
+ return FPConv;
+ }
+ return SDValue();
}
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN:
+ return LowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::BSWAP:
+ case ISD::BITREVERSE: {
+ // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
+ assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ // Start with the maximum immediate value which is the bitwidth - 1.
+ unsigned Imm = VT.getSizeInBits() - 1;
+ // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
+ if (Op.getOpcode() == ISD::BSWAP)
+ Imm &= ~0x7U;
+ return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
+ DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
+ }
+ case ISD::TRUNCATE: {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ // Only custom-lower vector truncates
+ if (!VT.isVector())
+ return Op;
+
+ // Truncates to mask types are handled differently
+ if (VT.getVectorElementType() == MVT::i1)
+ return lowerVectorMaskTrunc(Op, DAG);
+
+ // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
+ // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which
+ // truncate by one power of two at a time.
+ EVT DstEltVT = VT.getVectorElementType();
+
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcEltVT = SrcVT.getVectorElementType();
+
+ assert(DstEltVT.bitsLT(SrcEltVT) &&
+ isPowerOf2_64(DstEltVT.getSizeInBits()) &&
+ isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
+ "Unexpected vector truncate lowering");
+
+ SDValue Result = Src;
+ LLVMContext &Context = *DAG.getContext();
+ const ElementCount Count = SrcVT.getVectorElementCount();
+ do {
+ SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2);
+ EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
+ Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result);
+ } while (SrcEltVT != DstEltVT);
+
+ return Result;
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
+ case ISD::SIGN_EXTEND:
+ return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
+ case ISD::SPLAT_VECTOR:
+ return lowerSPLATVECTOR(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::VSCALE: {
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
+ // We define our scalable vector types for lmul=1 to use a 64 bit known
+ // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
+ // vscale as VLENB / 8.
+ SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
+ DAG.getConstant(3, DL, VT));
+ return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
+ }
}
}
@@ -482,6 +805,11 @@ static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
N->getOffset(), Flags);
}
+static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) {
+ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
+}
+
template <class NodeTy>
SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
bool IsLocal) const {
@@ -559,6 +887,13 @@ SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
return getAddr(N, DAG);
}
+SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
+ SelectionDAG &DAG) const {
+ JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
+
+ return getAddr(N, DAG);
+}
+
SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
SelectionDAG &DAG,
bool UseGOT) const {
@@ -642,6 +977,10 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
+ if (DAG.getMachineFunction().getFunction().getCallingConv() ==
+ CallingConv::GHC)
+ report_fatal_error("In GHC calling convention TLS is not supported");
+
SDValue Addr;
switch (Model) {
case TLSModel::LocalExec:
@@ -689,9 +1028,8 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
normaliseSetCC(LHS, RHS, CCVal);
SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
- return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
+ return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
}
// Otherwise:
@@ -700,10 +1038,9 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
- return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
+ return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
}
SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -865,10 +1202,226 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
return DAG.getMergeValues(Parts, DL);
}
+// Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is
+// illegal (currently only vXi64 RV32).
+// FIXME: We could also catch non-constant sign-extended i32 values and lower
+// them to SPLAT_VECTOR_I64
+SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VecVT = Op.getValueType();
+ assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
+ "Unexpected SPLAT_VECTOR lowering");
+ SDValue SplatVal = Op.getOperand(0);
+
+ // If we can prove that the value is a sign-extended 32-bit value, lower this
+ // as a custom node in order to try and match RVV vector/scalar instructions.
+ if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) {
+ if (isInt<32>(CVal->getSExtValue()))
+ return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
+ DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
+ }
+
+ if (SplatVal.getOpcode() == ISD::SIGN_EXTEND &&
+ SplatVal.getOperand(0).getValueType() == MVT::i32) {
+ return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
+ SplatVal.getOperand(0));
+ }
+
+ // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
+ // to accidentally sign-extend the 32-bit halves to the e64 SEW:
+ // vmv.v.x vX, hi
+ // vsll.vx vX, vX, /*32*/
+ // vmv.v.x vY, lo
+ // vsll.vx vY, vY, /*32*/
+ // vsrl.vx vY, vY, /*32*/
+ // vor.vv vX, vX, vY
+ SDValue One = DAG.getConstant(1, DL, MVT::i32);
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
+ SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One);
+
+ Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
+ Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
+ Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
+
+ if (isNullConstant(Hi))
+ return Lo;
+
+ Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
+
+ return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
+}
+
+// Custom-lower extensions from mask vectors by using a vselect either with 1
+// for zero/any-extension or -1 for sign-extension:
+// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
+// Note that any-extension is lowered identically to zero-extension.
+SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
+ int64_t ExtTrueVal) const {
+ SDLoc DL(Op);
+ EVT VecVT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ // Only custom-lower extensions from mask types
+ if (!Src.getValueType().isVector() ||
+ Src.getValueType().getVectorElementType() != MVT::i1)
+ return Op;
+
+ // Be careful not to introduce illegal scalar types at this stage, and be
+ // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
+ // illegal and must be expanded. Since we know that the constants are
+ // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
+ bool IsRV32E64 =
+ !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
+ SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
+ SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT());
+
+ if (!IsRV32E64) {
+ SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
+ SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
+ } else {
+ SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
+ SplatTrueVal =
+ DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
+ }
+
+ return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
+}
+
+// Custom-lower truncations from vectors to mask vectors by using a mask and a
+// setcc operation:
+// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
+SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT MaskVT = Op.getValueType();
+ // Only expect to custom-lower truncations to mask types
+ assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
+ "Unexpected type for vector mask lowering");
+ SDValue Src = Op.getOperand(0);
+ EVT VecVT = Src.getValueType();
+
+ // Be careful not to introduce illegal scalar types at this stage, and be
+ // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
+ // illegal and must be expanded. Since we know that the constants are
+ // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
+ bool IsRV32E64 =
+ !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
+ SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
+ SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
+
+ if (!IsRV32E64) {
+ SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne);
+ SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
+ } else {
+ SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne);
+ SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
+ }
+
+ SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
+
+ return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
+}
+
+SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VecVT = Op.getValueType();
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+
+ // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
+ // first slid down into position, the value is inserted into the first
+ // position, and the vector is slid back up. We do this to simplify patterns.
+ // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
+ if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) {
+ if (isNullConstant(Idx))
+ return Op;
+ SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
+ DAG.getUNDEF(VecVT), Vec, Idx);
+ SDValue InsertElt0 =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val,
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+
+ return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx);
+ }
+
+ // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
+ // is illegal (currently only vXi64 RV32).
+ // Since there is no easy way of getting a single element into a vector when
+ // XLEN<SEW, we lower the operation to the following sequence:
+ // splat vVal, rVal
+ // vid.v vVid
+ // vmseq.vx mMask, vVid, rIdx
+ // vmerge.vvm vDest, vSrc, vVal, mMask
+ // This essentially merges the original vector with the inserted element by
+ // using a mask whose only set bit is that corresponding to the insert
+ // index.
+ SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val);
+ SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx);
+
+ SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT);
+ auto SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT);
+ SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ);
+
+ return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec);
+}
+
+// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
+// extract the first element: (extractelt (slidedown vec, idx), 0). This is
+// done to maintain partity with the legalization of RV32 vXi64 legalization.
+SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Idx = Op.getOperand(1);
+ if (isNullConstant(Idx))
+ return Op;
+
+ SDValue Vec = Op.getOperand(0);
+ EVT EltVT = Op.getValueType();
+ EVT VecVT = Vec.getValueType();
+ SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
+ DAG.getUNDEF(VecVT), Vec, Idx);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Slidedown,
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+}
+
SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc DL(Op);
+
+ if (Subtarget.hasStdExtV()) {
+ // Some RVV intrinsics may claim that they want an integer operand to be
+ // extended.
+ if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
+ RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
+ if (II->ExtendedOperand) {
+ assert(II->ExtendedOperand < Op.getNumOperands());
+ SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
+ SDValue &ScalarOp = Operands[II->ExtendedOperand];
+ EVT OpVT = ScalarOp.getValueType();
+ if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
+ (OpVT == MVT::i32 && Subtarget.is64Bit())) {
+ // If the operand is a constant, sign extend to increase our chances
+ // of being able to use a .vi instruction. ANY_EXTEND would become a
+ // a zero extend and the simm5 check in isel would fail.
+ // FIXME: Should we ignore the upper bits in isel instead?
+ unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
+ : ISD::ANY_EXTEND;
+ ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
+ Operands);
+ }
+ }
+ }
+ }
+
switch (IntNo) {
default:
return SDValue(); // Don't custom lower most intrinsics.
@@ -876,6 +1429,151 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getRegister(RISCV::X4, PtrVT);
}
+ case Intrinsic::riscv_vmv_x_s:
+ assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
+ return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
+ Op.getOperand(1));
+ }
+}
+
+SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ SDLoc DL(Op);
+
+ if (Subtarget.hasStdExtV()) {
+ // Some RVV intrinsics may claim that they want an integer operand to be
+ // extended.
+ if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
+ RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
+ if (II->ExtendedOperand) {
+ // The operands start from the second argument in INTRINSIC_W_CHAIN.
+ unsigned ExtendOp = II->ExtendedOperand + 1;
+ assert(ExtendOp < Op.getNumOperands());
+ SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
+ SDValue &ScalarOp = Operands[ExtendOp];
+ EVT OpVT = ScalarOp.getValueType();
+ if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
+ (OpVT == MVT::i32 && Subtarget.is64Bit())) {
+ // If the operand is a constant, sign extend to increase our chances
+ // of being able to use a .vi instruction. ANY_EXTEND would become a
+ // a zero extend and the simm5 check in isel would fail.
+ // FIXME: Should we ignore the upper bits in isel instead?
+ unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
+ : ISD::ANY_EXTEND;
+ ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
+ return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(),
+ Operands);
+ }
+ }
+ }
+ }
+
+ unsigned NF = 1;
+ switch (IntNo) {
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::riscv_vleff: {
+ SDLoc DL(Op);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
+ SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0),
+ Op.getOperand(2), Op.getOperand(3));
+ VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
+ SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
+ return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
+ }
+ case Intrinsic::riscv_vleff_mask: {
+ SDLoc DL(Op);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
+ SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0),
+ Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4), Op.getOperand(5));
+ VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
+ SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
+ return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
+ }
+ case Intrinsic::riscv_vlseg8ff:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg7ff:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg6ff:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg5ff:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg4ff:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg3ff:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg2ff: {
+ NF++;
+ SDLoc DL(Op);
+ SmallVector<EVT, 8> EVTs(NF, Op.getValueType());
+ EVTs.push_back(MVT::Other);
+ EVTs.push_back(MVT::Glue);
+ SDVTList VTs = DAG.getVTList(EVTs);
+ SDValue Load =
+ DAG.getNode(RISCVISD::VLSEGFF, DL, VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other);
+ SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs,
+ /*Glue*/ Load.getValue(NF + 1));
+ SmallVector<SDValue, 8> Results;
+ for (unsigned i = 0; i < NF; ++i)
+ Results.push_back(Load.getValue(i));
+ Results.push_back(ReadVL);
+ Results.push_back(Load.getValue(NF)); // Chain.
+ return DAG.getMergeValues(Results, DL);
+ }
+ case Intrinsic::riscv_vlseg8ff_mask:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg7ff_mask:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg6ff_mask:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg5ff_mask:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg4ff_mask:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg3ff_mask:
+ NF++;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::riscv_vlseg2ff_mask: {
+ NF++;
+ SDLoc DL(Op);
+ SmallVector<EVT, 8> EVTs(NF, Op.getValueType());
+ EVTs.push_back(MVT::Other);
+ EVTs.push_back(MVT::Glue);
+ SDVTList VTs = DAG.getVTList(EVTs);
+ SmallVector<SDValue, 13> LoadOps;
+ LoadOps.push_back(Op.getOperand(0)); // Chain.
+ LoadOps.push_back(Op.getOperand(1)); // Intrinsic ID.
+ for (unsigned i = 0; i < NF; ++i)
+ LoadOps.push_back(Op.getOperand(2 + i)); // MaskedOff.
+ LoadOps.push_back(Op.getOperand(2 + NF)); // Base.
+ LoadOps.push_back(Op.getOperand(3 + NF)); // Mask.
+ LoadOps.push_back(Op.getOperand(4 + NF)); // VL.
+ SDValue Load = DAG.getNode(RISCVISD::VLSEGFF_MASK, DL, VTs, LoadOps);
+ VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other);
+ SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs,
+ /*Glue*/ Load.getValue(NF + 1));
+ SmallVector<SDValue, 8> Results;
+ for (unsigned i = 0; i < NF; ++i)
+ Results.push_back(Load.getValue(i));
+ Results.push_back(ReadVL);
+ Results.push_back(Load.getValue(NF)); // Chain.
+ return DAG.getMergeValues(Results, DL);
+ }
}
}
@@ -897,6 +1595,14 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
return RISCVISD::DIVUW;
case ISD::UREM:
return RISCVISD::REMUW;
+ case ISD::ROTL:
+ return RISCVISD::ROLW;
+ case ISD::ROTR:
+ return RISCVISD::RORW;
+ case RISCVISD::GREVI:
+ return RISCVISD::GREVIW;
+ case RISCVISD::GORCI:
+ return RISCVISD::GORCIW;
}
}
@@ -905,14 +1611,15 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
// be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
// later one because the fact the operation was originally of type i32 is
// lost.
-static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
+static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
+ unsigned ExtOpc = ISD::ANY_EXTEND) {
SDLoc DL(N);
RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
- SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
- SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
+ SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
// ReplaceNodeResults requires we maintain the same type for the return value.
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
}
// Converts the given 32-bit operation to a i64 operation with signed extension
@@ -942,6 +1649,13 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
+ // If the FP type needs to be softened, emit a library call using the 'si'
+ // version. If we left it to default legalization we'd end up with 'di'. If
+ // the FP type doesn't need to be softened just let generic type
+ // legalization promote the result type.
+ if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
+ TargetLowering::TypeSoftenFloat)
+ return;
RTLIB::Libcall LC;
if (N->getOpcode() == ISD::FP_TO_SINT ||
N->getOpcode() == ISD::STRICT_FP_TO_SINT)
@@ -991,31 +1705,377 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
Results.push_back(customLegalizeToWOp(N, DAG));
break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ Results.push_back(customLegalizeToWOp(N, DAG));
+ break;
case ISD::SDIV:
case ISD::UDIV:
- case ISD::UREM:
- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
- Subtarget.hasStdExtM() && "Unexpected custom legalisation");
+ case ISD::UREM: {
+ MVT VT = N->getSimpleValueType(0);
+ assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
+ Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
+ "Unexpected custom legalisation");
if (N->getOperand(0).getOpcode() == ISD::Constant ||
N->getOperand(1).getOpcode() == ISD::Constant)
return;
- Results.push_back(customLegalizeToWOp(N, DAG));
+
+ // If the input is i32, use ANY_EXTEND since the W instructions don't read
+ // the upper 32 bits. For other types we need to sign or zero extend
+ // based on the opcode.
+ unsigned ExtOpc = ISD::ANY_EXTEND;
+ if (VT != MVT::i32)
+ ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+
+ Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
break;
+ }
case ISD::BITCAST: {
+ assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtF()) ||
+ (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
+ "Unexpected custom legalisation");
+ SDValue Op0 = N->getOperand(0);
+ if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
+ if (Op0.getValueType() != MVT::f16)
+ return;
+ SDValue FPConv =
+ DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
+ } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtF()) {
+ if (Op0.getValueType() != MVT::f32)
+ return;
+ SDValue FPConv =
+ DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
+ }
+ break;
+ }
+ case RISCVISD::GREVI:
+ case RISCVISD::GORCI: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
- Subtarget.hasStdExtF() && "Unexpected custom legalisation");
+ "Unexpected custom legalisation");
+ // This is similar to customLegalizeToWOp, except that we pass the second
+ // operand (a TargetConstant) straight through: it is already of type
+ // XLenVT.
SDLoc DL(N);
- SDValue Op0 = N->getOperand(0);
- if (Op0.getValueType() != MVT::f32)
- return;
- SDValue FPConv =
- DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
+ RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue NewRes =
+ DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
+ // ReplaceNodeResults requires we maintain the same type for the return
+ // value.
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
+ break;
+ }
+ case ISD::BSWAP:
+ case ISD::BITREVERSE: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
+ N->getOperand(0));
+ unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
+ SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
+ DAG.getTargetConstant(Imm, DL,
+ Subtarget.getXLenVT()));
+ // ReplaceNodeResults requires we maintain the same type for the return
+ // value.
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
+ break;
+ }
+ case ISD::FSHL:
+ case ISD::FSHR: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewOp2 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+ // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
+ // Mask the shift amount to 5 bits.
+ NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
+ DAG.getConstant(0x1f, DL, MVT::i64));
+ unsigned Opc =
+ N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
+ SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
+ // type is illegal (currently only vXi64 RV32).
+ // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
+ // transferred to the destination register. We issue two of these from the
+ // upper- and lower- halves of the SEW-bit vector element, slid down to the
+ // first element.
+ SDLoc DL(N);
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec.getValueType();
+ assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
+ VecVT.getVectorElementType() == MVT::i64 &&
+ "Unexpected EXTRACT_VECTOR_ELT legalization");
+
+ SDValue Slidedown = Vec;
+ // Unless the index is known to be 0, we must slide the vector down to get
+ // the desired element into index 0.
+ if (!isNullConstant(Idx))
+ Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
+ DAG.getUNDEF(VecVT), Vec, Idx);
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ // Extract the lower XLEN bits of the correct vector element.
+ SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx);
+
+ // To extract the upper XLEN bits of the vector element, shift the first
+ // element right by 32 bits and re-extract the lower XLEN bits.
+ SDValue ThirtyTwoV =
+ DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
+ DAG.getConstant(32, DL, Subtarget.getXLenVT()));
+ SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV);
+
+ SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx);
+
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ llvm_unreachable(
+ "Don't know how to custom type legalize this intrinsic!");
+ case Intrinsic::riscv_vmv_x_s: {
+ EVT VT = N->getValueType(0);
+ assert((VT == MVT::i8 || VT == MVT::i16 ||
+ (Subtarget.is64Bit() && VT == MVT::i32)) &&
+ "Unexpected custom legalisation!");
+ SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
+ Subtarget.getXLenVT(), N->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
+ break;
+ }
+ }
break;
}
}
}
+// A structure to hold one of the bit-manipulation patterns below. Together, a
+// SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
+// (or (and (shl x, 1), 0xAAAAAAAA),
+// (and (srl x, 1), 0x55555555))
+struct RISCVBitmanipPat {
+ SDValue Op;
+ unsigned ShAmt;
+ bool IsSHL;
+
+ bool formsPairWith(const RISCVBitmanipPat &Other) const {
+ return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
+ }
+};
+
+// Matches any of the following bit-manipulation patterns:
+// (and (shl x, 1), (0x55555555 << 1))
+// (and (srl x, 1), 0x55555555)
+// (shl (and x, 0x55555555), 1)
+// (srl (and x, (0x55555555 << 1)), 1)
+// where the shift amount and mask may vary thus:
+// [1] = 0x55555555 / 0xAAAAAAAA
+// [2] = 0x33333333 / 0xCCCCCCCC
+// [4] = 0x0F0F0F0F / 0xF0F0F0F0
+// [8] = 0x00FF00FF / 0xFF00FF00
+// [16] = 0x0000FFFF / 0xFFFFFFFF
+// [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
+static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
+ Optional<uint64_t> Mask;
+ // Optionally consume a mask around the shift operation.
+ if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Op.getConstantOperandVal(1);
+ Op = Op.getOperand(0);
+ }
+ if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
+ return None;
+ bool IsSHL = Op.getOpcode() == ISD::SHL;
+
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return None;
+ auto ShAmt = Op.getConstantOperandVal(1);
+
+ if (!isPowerOf2_64(ShAmt))
+ return None;
+
+ // These are the unshifted masks which we use to match bit-manipulation
+ // patterns. They may be shifted left in certain circumstances.
+ static const uint64_t BitmanipMasks[] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
+ 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
+ };
+
+ unsigned MaskIdx = Log2_64(ShAmt);
+ if (MaskIdx >= array_lengthof(BitmanipMasks))
+ return None;
+
+ auto Src = Op.getOperand(0);
+
+ unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
+ auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
+
+ // The expected mask is shifted left when the AND is found around SHL
+ // patterns.
+ // ((x >> 1) & 0x55555555)
+ // ((x << 1) & 0xAAAAAAAA)
+ bool SHLExpMask = IsSHL;
+
+ if (!Mask) {
+ // Sometimes LLVM keeps the mask as an operand of the shift, typically when
+ // the mask is all ones: consume that now.
+ if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
+ Mask = Src.getConstantOperandVal(1);
+ Src = Src.getOperand(0);
+ // The expected mask is now in fact shifted left for SRL, so reverse the
+ // decision.
+ // ((x & 0xAAAAAAAA) >> 1)
+ // ((x & 0x55555555) << 1)
+ SHLExpMask = !SHLExpMask;
+ } else {
+ // Use a default shifted mask of all-ones if there's no AND, truncated
+ // down to the expected width. This simplifies the logic later on.
+ Mask = maskTrailingOnes<uint64_t>(Width);
+ *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
+ }
+ }
+
+ if (SHLExpMask)
+ ExpMask <<= ShAmt;
+
+ if (Mask != ExpMask)
+ return None;
+
+ return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
+}
+
+// Match the following pattern as a GREVI(W) operation
+// (or (BITMANIP_SHL x), (BITMANIP_SRL x))
+static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ EVT VT = Op.getValueType();
+
+ if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
+ auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
+ auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
+ if (LHS && RHS && LHS->formsPairWith(*RHS)) {
+ SDLoc DL(Op);
+ return DAG.getNode(
+ RISCVISD::GREVI, DL, VT, LHS->Op,
+ DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
+ }
+ }
+ return SDValue();
+}
+
+// Matches any the following pattern as a GORCI(W) operation
+// 1. (or (GREVI x, shamt), x) if shamt is a power of 2
+// 2. (or x, (GREVI x, shamt)) if shamt is a power of 2
+// 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
+// Note that with the variant of 3.,
+// (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
+// the inner pattern will first be matched as GREVI and then the outer
+// pattern will be matched to GORC via the first rule above.
+// 4. (or (rotl/rotr x, bitwidth/2), x)
+static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ EVT VT = Op.getValueType();
+
+ if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
+ if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
+ isPowerOf2_32(Reverse.getConstantOperandVal(1)))
+ return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
+ // We can also form GORCI from ROTL/ROTR by half the bitwidth.
+ if ((Reverse.getOpcode() == ISD::ROTL ||
+ Reverse.getOpcode() == ISD::ROTR) &&
+ Reverse.getOperand(0) == X &&
+ isa<ConstantSDNode>(Reverse.getOperand(1))) {
+ uint64_t RotAmt = Reverse.getConstantOperandVal(1);
+ if (RotAmt == (VT.getSizeInBits() / 2))
+ return DAG.getNode(
+ RISCVISD::GORCI, DL, VT, X,
+ DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
+ }
+ return SDValue();
+ };
+
+ // Check for either commutable permutation of (or (GREVI x, shamt), x)
+ if (SDValue V = MatchOROfReverse(Op0, Op1))
+ return V;
+ if (SDValue V = MatchOROfReverse(Op1, Op0))
+ return V;
+
+ // OR is commutable so canonicalize its OR operand to the left
+ if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue OrOp0 = Op0.getOperand(0);
+ SDValue OrOp1 = Op0.getOperand(1);
+ auto LHS = matchRISCVBitmanipPat(OrOp0);
+ // OR is commutable so swap the operands and try again: x might have been
+ // on the left
+ if (!LHS) {
+ std::swap(OrOp0, OrOp1);
+ LHS = matchRISCVBitmanipPat(OrOp0);
+ }
+ auto RHS = matchRISCVBitmanipPat(Op1);
+ if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
+ return DAG.getNode(
+ RISCVISD::GORCI, DL, VT, LHS->Op,
+ DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
+ }
+ }
+ return SDValue();
+}
+
+// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
+// non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
+// Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
+// not undo itself, but they are redundant.
+static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
+ unsigned ShAmt1 = N->getConstantOperandVal(1);
+ SDValue Src = N->getOperand(0);
+
+ if (Src.getOpcode() != N->getOpcode())
+ return SDValue();
+
+ unsigned ShAmt2 = Src.getConstantOperandVal(1);
+ Src = Src.getOperand(0);
+
+ unsigned CombinedShAmt;
+ if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
+ CombinedShAmt = ShAmt1 | ShAmt2;
+ else
+ CombinedShAmt = ShAmt1 ^ ShAmt2;
+
+ if (CombinedShAmt == 0)
+ return Src;
+
+ SDLoc DL(N);
+ return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
+ DAG.getTargetConstant(CombinedShAmt, DL,
+ N->getOperand(1).getValueType()));
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -1067,17 +2127,53 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
case RISCVISD::SLLW:
case RISCVISD::SRAW:
- case RISCVISD::SRLW: {
+ case RISCVISD::SRLW:
+ case RISCVISD::ROLW:
+ case RISCVISD::RORW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
- if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
- (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
- return SDValue();
+ if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
+ SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ break;
+ }
+ case RISCVISD::FSLW:
+ case RISCVISD::FSRW: {
+ // Only the lower 32 bits of Values and lower 6 bits of shift amount are
+ // read.
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue ShAmt = N->getOperand(2);
+ APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
+ APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
+ if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
+ SimplifyDemandedBits(Op1, OpMask, DCI) ||
+ SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
break;
}
+ case RISCVISD::GREVIW:
+ case RISCVISD::GORCIW: {
+ // Only the lower 32 bits of the first operand are read
+ SDValue Op0 = N->getOperand(0);
+ APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
+ if (SimplifyDemandedBits(Op0, Mask, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+
+ return combineGREVI_GORCI(N, DCI.DAG);
+ }
case RISCVISD::FMV_X_ANYEXTW_RV64: {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
@@ -1085,9 +2181,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// conversion is unnecessary and can be replaced with an ANY_EXTEND
// of the FMV_W_X_RV64 operand.
if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
- SDValue AExtOp =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
- return DCI.CombineTo(N, AExtOp);
+ assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
+ "Unexpected value type!");
+ return Op0.getOperand(0);
}
// This is a target-specific version of a DAGCombine performed in
@@ -1100,15 +2196,61 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
Op0.getOperand(0));
APInt SignBit = APInt::getSignMask(32).sext(64);
- if (Op0.getOpcode() == ISD::FNEG) {
- return DCI.CombineTo(N,
- DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
- DAG.getConstant(SignBit, DL, MVT::i64)));
- }
+ if (Op0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
+ DAG.getConstant(SignBit, DL, MVT::i64));
+
assert(Op0.getOpcode() == ISD::FABS);
- return DCI.CombineTo(N,
- DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
- DAG.getConstant(~SignBit, DL, MVT::i64)));
+ return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
+ DAG.getConstant(~SignBit, DL, MVT::i64));
+ }
+ case RISCVISD::GREVI:
+ case RISCVISD::GORCI:
+ return combineGREVI_GORCI(N, DCI.DAG);
+ case ISD::OR:
+ if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
+ return GREV;
+ if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
+ return GORC;
+ break;
+ case RISCVISD::SELECT_CC: {
+ // Transform
+ // (select_cc (xor X, 1), 0, setne, trueV, falseV) ->
+ // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
+ // This can occur when legalizing some floating point comparisons.
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
+ APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
+ if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) &&
+ LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) &&
+ DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) {
+ SDLoc DL(N);
+ CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
+ SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
+ return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
+ {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3),
+ N->getOperand(4)});
+ }
+ break;
+ }
+ case ISD::SETCC: {
+ // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1.
+ // Comparing with 0 may allow us to fold into bnez/beqz.
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (LHS.getValueType().isScalableVector())
+ break;
+ auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
+ if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) &&
+ DAG.MaskedValueIsZero(LHS, Mask)) {
+ SDLoc DL(N);
+ SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType());
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
+ return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC);
+ }
+ break;
}
}
@@ -1129,7 +2271,7 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (C1 && C2) {
- APInt C1Int = C1->getAPIntValue();
+ const APInt &C1Int = C1->getAPIntValue();
APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
// We can materialise `c1 << c2` into an add immediate, so it's "free",
@@ -1161,6 +2303,116 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
return true;
}
+bool RISCVTargetLowering::targetShrinkDemandedConstant(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ TargetLoweringOpt &TLO) const {
+ // Delay this optimization as late as possible.
+ if (!TLO.LegalOps)
+ return false;
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return false;
+
+ // Only handle AND for now.
+ if (Op.getOpcode() != ISD::AND)
+ return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C)
+ return false;
+
+ const APInt &Mask = C->getAPIntValue();
+
+ // Clear all non-demanded bits initially.
+ APInt ShrunkMask = Mask & DemandedBits;
+
+ // If the shrunk mask fits in sign extended 12 bits, let the target
+ // independent code apply it.
+ if (ShrunkMask.isSignedIntN(12))
+ return false;
+
+ // Try to make a smaller immediate by setting undemanded bits.
+
+ // We need to be able to make a negative number through a combination of mask
+ // and undemanded bits.
+ APInt ExpandedMask = Mask | ~DemandedBits;
+ if (!ExpandedMask.isNegative())
+ return false;
+
+ // What is the fewest number of bits we need to represent the negative number.
+ unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
+
+ // Try to make a 12 bit negative immediate. If that fails try to make a 32
+ // bit negative immediate unless the shrunk immediate already fits in 32 bits.
+ APInt NewMask = ShrunkMask;
+ if (MinSignedBits <= 12)
+ NewMask.setBitsFrom(11);
+ else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
+ NewMask.setBitsFrom(31);
+ else
+ return false;
+
+ // Sanity check that our new mask is a subset of the demanded mask.
+ assert(NewMask.isSubsetOf(ExpandedMask));
+
+ // If we aren't changing the mask, just return true to keep it and prevent
+ // the caller from optimizing.
+ if (NewMask == Mask)
+ return true;
+
+ // Replace the constant with the new mask.
+ SDLoc DL(Op);
+ SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+}
+
+void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned BitWidth = Known.getBitWidth();
+ unsigned Opc = Op.getOpcode();
+ assert((Opc >= ISD::BUILTIN_OP_END ||
+ Opc == ISD::INTRINSIC_WO_CHAIN ||
+ Opc == ISD::INTRINSIC_W_CHAIN ||
+ Opc == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+
+ Known.resetAll();
+ switch (Opc) {
+ default: break;
+ case RISCVISD::REMUW: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ // We only care about the lower 32 bits.
+ Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
+ // Restore the original width by sign extending.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case RISCVISD::DIVUW: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ // We only care about the lower 32 bits.
+ Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
+ // Restore the original width by sign extending.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case RISCVISD::READ_VLENB:
+ // We assume VLENB is at least 8 bytes.
+ // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
+ Known.Zero.setLowBits(3);
+ break;
+ }
+}
+
unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
@@ -1173,10 +2425,25 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::DIVW:
case RISCVISD::DIVUW:
case RISCVISD::REMUW:
+ case RISCVISD::ROLW:
+ case RISCVISD::RORW:
+ case RISCVISD::GREVIW:
+ case RISCVISD::GORCIW:
+ case RISCVISD::FSLW:
+ case RISCVISD::FSRW:
// TODO: As the result is sign-extended, this is conservatively correct. A
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
return 33;
+ case RISCVISD::VMV_X_S:
+ // The number of sign bits of the scalar result is computed by obtaining the
+ // element type of the input vector operand, subtracting its width from the
+ // XLEN, and then adding one (sign bit within the element type). If the
+ // element type is wider than XLen, the least-significant XLEN bits are
+ // taken.
+ if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
+ return 1;
+ return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
}
return 1;
@@ -1260,17 +2527,19 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
RI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
- MachineMemOperand::MOLoad, 8, Align(8));
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
+ MachineMemOperand *MMOLo =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
+ MachineMemOperand *MMOHi = MF.getMachineMemOperand(
+ MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
.addFrameIndex(FI)
.addImm(0)
- .addMemOperand(MMO);
+ .addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
.addFrameIndex(FI)
.addImm(4)
- .addMemOperand(MMO);
+ .addMemOperand(MMOHi);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -1290,19 +2559,21 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
- MachineMemOperand::MOStore, 8, Align(8));
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
+ MachineMemOperand *MMOLo =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
+ MachineMemOperand *MMOHi = MF.getMachineMemOperand(
+ MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
.addFrameIndex(FI)
.addImm(0)
- .addMemOperand(MMO);
+ .addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
.addFrameIndex(FI)
.addImm(4)
- .addMemOperand(MMO);
+ .addMemOperand(MMOHi);
TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -1313,6 +2584,7 @@ static bool isSelectPseudo(MachineInstr &MI) {
default:
return false;
case RISCV::Select_GPR_Using_CC_GPR:
+ case RISCV::Select_FPR16_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
return true;
@@ -1442,9 +2714,80 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
return TailMBB;
}
+static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
+ int VLIndex, unsigned SEWIndex,
+ RISCVVLMUL VLMul, bool WritesElement0) {
+ MachineFunction &MF = *BB->getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+ unsigned SEW = MI.getOperand(SEWIndex).getImm();
+ assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+ RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // VL and VTYPE are alive here.
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI));
+
+ if (VLIndex >= 0) {
+ // Set VL (rs1 != X0).
+ Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ MIB.addReg(DestReg, RegState::Define | RegState::Dead)
+ .addReg(MI.getOperand(VLIndex).getReg());
+ } else
+ // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
+ MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill);
+
+ // Default to tail agnostic unless the destination is tied to a source. In
+ // that case the user would have some control over the tail values. The tail
+ // policy is also ignored on instructions that only update element 0 like
+ // vmv.s.x or reductions so use agnostic there to match the common case.
+ // FIXME: This is conservatively correct, but we might want to detect that
+ // the input is undefined.
+ bool TailAgnostic = true;
+ unsigned UseOpIdx;
+ if (MI.isRegTiedToUseOperand(0, &UseOpIdx) && !WritesElement0) {
+ TailAgnostic = false;
+ // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
+ const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+ MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
+ if (UseMI && UseMI->isImplicitDef())
+ TailAgnostic = true;
+ }
+
+ // For simplicity we reuse the vtype representation here.
+ MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
+ /*TailAgnostic*/ TailAgnostic,
+ /*MaskAgnostic*/ false));
+
+ // Remove (now) redundant operands from pseudo
+ MI.getOperand(SEWIndex).setImm(-1);
+ if (VLIndex >= 0) {
+ MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
+ MI.getOperand(VLIndex).setIsKill(false);
+ }
+
+ return BB;
+}
+
MachineBasicBlock *
RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+
+ if (TSFlags & RISCVII::HasSEWOpMask) {
+ unsigned NumOperands = MI.getNumExplicitOperands();
+ int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
+ unsigned SEWIndex = NumOperands - 1;
+ bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask;
+
+ RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
+ RISCVII::VLMulShift);
+ return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0);
+ }
+
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instr type to insert");
@@ -1453,6 +2796,7 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
"ReadCycleWrite is only to be used on riscv32");
return emitReadCycleWidePseudo(MI, BB);
case RISCV::Select_GPR_Using_CC_GPR:
+ case RISCV::Select_FPR16_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
return emitSelectPseudo(MI, BB);
@@ -1492,6 +2836,10 @@ static const MCPhysReg ArgGPRs[] = {
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
};
+static const MCPhysReg ArgFPR16s[] = {
+ RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
+ RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
+};
static const MCPhysReg ArgFPR32s[] = {
RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
@@ -1500,6 +2848,17 @@ static const MCPhysReg ArgFPR64s[] = {
RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
};
+// This is an interim calling convention and it may be changed in the future.
+static const MCPhysReg ArgVRs[] = {
+ RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
+ RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
+ RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
+static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
+ RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
+ RISCV::V20M2, RISCV::V22M2};
+static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
+ RISCV::V20M4};
+static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
@@ -1544,7 +2903,8 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
- bool IsRet, Type *OrigTy) {
+ bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
+ Optional<unsigned> FirstMaskArgument) {
unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
assert(XLen == 32 || XLen == 64);
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
@@ -1554,9 +2914,9 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
if (IsRet && ValNo > 1)
return true;
- // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
- // variadic argument, or if no F32 argument registers are available.
- bool UseGPRForF32 = true;
+ // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
+ // variadic argument, or if no F16/F32 argument registers are available.
+ bool UseGPRForF16_F32 = true;
// UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
// variadic argument, or if no F64 argument registers are available.
bool UseGPRForF64 = true;
@@ -1569,24 +2929,26 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
break;
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_LP64F:
- UseGPRForF32 = !IsFixed;
+ UseGPRForF16_F32 = !IsFixed;
break;
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64D:
- UseGPRForF32 = !IsFixed;
+ UseGPRForF16_F32 = !IsFixed;
UseGPRForF64 = !IsFixed;
break;
}
- if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s))
- UseGPRForF32 = true;
- if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s))
+ // FPR16, FPR32, and FPR64 alias each other.
+ if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
+ UseGPRForF16_F32 = true;
UseGPRForF64 = true;
+ }
- // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
- // variables rather than directly checking against the target ABI.
+ // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
+ // similar local variables rather than directly checking against the target
+ // ABI.
- if (UseGPRForF32 && ValVT == MVT::f32) {
+ if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
LocVT = XLenVT;
LocInfo = CCValAssign::BCvt;
} else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
@@ -1669,11 +3031,40 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
// Allocate to a register if possible, or else a stack slot.
Register Reg;
- if (ValVT == MVT::f32 && !UseGPRForF32)
- Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s);
+ if (ValVT == MVT::f16 && !UseGPRForF16_F32)
+ Reg = State.AllocateReg(ArgFPR16s);
+ else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
+ Reg = State.AllocateReg(ArgFPR32s);
else if (ValVT == MVT::f64 && !UseGPRForF64)
- Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s);
- else
+ Reg = State.AllocateReg(ArgFPR64s);
+ else if (ValVT.isScalableVector()) {
+ const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
+ if (RC == &RISCV::VRRegClass) {
+ // Assign the first mask argument to V0.
+ // This is an interim calling convention and it may be changed in the
+ // future.
+ if (FirstMaskArgument.hasValue() &&
+ ValNo == FirstMaskArgument.getValue()) {
+ Reg = State.AllocateReg(RISCV::V0);
+ } else {
+ Reg = State.AllocateReg(ArgVRs);
+ }
+ } else if (RC == &RISCV::VRM2RegClass) {
+ Reg = State.AllocateReg(ArgVRM2s);
+ } else if (RC == &RISCV::VRM4RegClass) {
+ Reg = State.AllocateReg(ArgVRM4s);
+ } else if (RC == &RISCV::VRM8RegClass) {
+ Reg = State.AllocateReg(ArgVRM8s);
+ } else {
+ llvm_unreachable("Unhandled class register for ValueType");
+ }
+ if (!Reg) {
+ LocInfo = CCValAssign::Indirect;
+ // Try using a GPR to pass the address
+ Reg = State.AllocateReg(ArgGPRs);
+ LocVT = XLenVT;
+ }
+ } else
Reg = State.AllocateReg(ArgGPRs);
unsigned StackOffset =
Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
@@ -1696,16 +3087,18 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
return false;
}
- assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) &&
- "Expected an XLenVT at this stage");
+ assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
+ (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
+ "Expected an XLenVT or scalable vector types at this stage");
if (Reg) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
- // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
- if (ValVT == MVT::f32 || ValVT == MVT::f64) {
+ // When a floating-point value is passed on the stack, no bit-conversion is
+ // needed.
+ if (ValVT.isFloatingPoint()) {
LocVT = ValVT;
LocInfo = CCValAssign::Full;
}
@@ -1713,12 +3106,27 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
return false;
}
+template <typename ArgTy>
+static Optional<unsigned> preAssignMask(const ArgTy &Args) {
+ for (const auto &ArgIdx : enumerate(Args)) {
+ MVT ArgVT = ArgIdx.value().VT;
+ if (ArgVT.isScalableVector() &&
+ ArgVT.getVectorElementType().SimpleTy == MVT::i1)
+ return ArgIdx.index();
+ }
+ return None;
+}
+
void RISCVTargetLowering::analyzeInputArgs(
MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
unsigned NumArgs = Ins.size();
FunctionType *FType = MF.getFunction().getFunctionType();
+ Optional<unsigned> FirstMaskArgument;
+ if (Subtarget.hasStdExtV())
+ FirstMaskArgument = preAssignMask(Ins);
+
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Ins[i].VT;
ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
@@ -1731,7 +3139,8 @@ void RISCVTargetLowering::analyzeInputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
- ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
+ ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
+ FirstMaskArgument)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << '\n');
llvm_unreachable(nullptr);
@@ -1745,6 +3154,10 @@ void RISCVTargetLowering::analyzeOutputArgs(
CallLoweringInfo *CLI) const {
unsigned NumArgs = Outs.size();
+ Optional<unsigned> FirstMaskArgument;
+ if (Subtarget.hasStdExtV())
+ FirstMaskArgument = preAssignMask(Outs);
+
for (unsigned i = 0; i != NumArgs; i++) {
MVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
@@ -1752,7 +3165,8 @@ void RISCVTargetLowering::analyzeOutputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
- ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
+ ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
+ FirstMaskArgument)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << "\n");
llvm_unreachable(nullptr);
@@ -1770,11 +3184,12 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
- if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+ if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
+ Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
+ else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
- break;
- }
- Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+ else
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
}
return Val;
@@ -1783,28 +3198,13 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
// The caller is responsible for loading the full value if the argument is
// passed with CCValAssign::Indirect.
static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
- const CCValAssign &VA, const SDLoc &DL) {
+ const CCValAssign &VA, const SDLoc &DL,
+ const RISCVTargetLowering &TLI) {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
EVT LocVT = VA.getLocVT();
SDValue Val;
- const TargetRegisterClass *RC;
-
- switch (LocVT.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("Unexpected register type");
- case MVT::i32:
- case MVT::i64:
- RC = &RISCV::GPRRegClass;
- break;
- case MVT::f32:
- RC = &RISCV::FPR32RegClass;
- break;
- case MVT::f64:
- RC = &RISCV::FPR64RegClass;
- break;
- }
-
+ const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
Register VReg = RegInfo.createVirtualRegister(RC);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
@@ -1825,11 +3225,12 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
- if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+ if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
+ Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
+ else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
- break;
- }
- Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
+ else
+ Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
break;
}
return Val;
@@ -1920,6 +3321,18 @@ static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
+ if (LocVT == MVT::f16) {
+ static const MCPhysReg FPR16List[] = {
+ RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
+ RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
+ RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
+ RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
+ if (unsigned Reg = State.AllocateReg(FPR16List)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
if (LocVT == MVT::f32) {
static const MCPhysReg FPR32List[] = {
RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
@@ -1959,22 +3372,71 @@ static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
return true; // CC didn't match.
}
+static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (LocVT == MVT::i32 || LocVT == MVT::i64) {
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
+ // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
+ static const MCPhysReg GPRList[] = {
+ RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
+ RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
+ if (unsigned Reg = State.AllocateReg(GPRList)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::f32) {
+ // Pass in STG registers: F1, ..., F6
+ // fs0 ... fs5
+ static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
+ RISCV::F18_F, RISCV::F19_F,
+ RISCV::F20_F, RISCV::F21_F};
+ if (unsigned Reg = State.AllocateReg(FPR32List)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::f64) {
+ // Pass in STG registers: D1, ..., D6
+ // fs6 ... fs11
+ static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
+ RISCV::F24_D, RISCV::F25_D,
+ RISCV::F26_D, RISCV::F27_D};
+ if (unsigned Reg = State.AllocateReg(FPR64List)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ report_fatal_error("No registers left in GHC calling convention");
+ return true;
+}
+
// Transform physical registers into virtual registers.
SDValue RISCVTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
switch (CallConv) {
default:
report_fatal_error("Unsupported calling convention");
case CallingConv::C:
case CallingConv::Fast:
break;
+ case CallingConv::GHC:
+ if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
+ !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
+ report_fatal_error(
+ "GHC calling convention requires the F and D instruction set extensions");
}
- MachineFunction &MF = DAG.getMachineFunction();
-
const Function &Func = MF.getFunction();
if (Func.hasFnAttribute("interrupt")) {
if (!Func.arg_empty())
@@ -2001,6 +3463,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
if (CallConv == CallingConv::Fast)
CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
+ else if (CallConv == CallingConv::GHC)
+ CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
else
analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
@@ -2012,7 +3476,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
else if (VA.isRegLoc())
- ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
+ ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
else
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
@@ -2201,6 +3665,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (CallConv == CallingConv::Fast)
ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
+ else if (CallConv == CallingConv::GHC)
+ ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
else
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
@@ -2458,12 +3924,18 @@ bool RISCVTargetLowering::CanLowerReturn(
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+
+ Optional<unsigned> FirstMaskArgument;
+ if (Subtarget.hasStdExtV())
+ FirstMaskArgument = preAssignMask(Outs);
+
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
- ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
+ ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
+ *this, FirstMaskArgument))
return false;
}
return true;
@@ -2488,6 +3960,9 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
nullptr);
+ if (CallConv == CallingConv::GHC && !RVLocs.empty())
+ report_fatal_error("GHC functions return void only");
+
SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -2574,7 +4049,7 @@ void RISCVTargetLowering::validateCCReservedRegs(
const Function &F = MF.getFunction();
const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
- if (std::any_of(std::begin(Regs), std::end(Regs), [&STI](auto Reg) {
+ if (llvm::any_of(Regs, [&STI](auto Reg) {
return STI.isRegisterReservedByUser(Reg.first);
}))
F.getContext().diagnose(DiagnosticInfoUnsupported{
@@ -2586,47 +4061,57 @@ bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
}
const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define NODE_NAME_CASE(NODE) \
+ case RISCVISD::NODE: \
+ return "RISCVISD::" #NODE;
+ // clang-format off
switch ((RISCVISD::NodeType)Opcode) {
case RISCVISD::FIRST_NUMBER:
break;
- case RISCVISD::RET_FLAG:
- return "RISCVISD::RET_FLAG";
- case RISCVISD::URET_FLAG:
- return "RISCVISD::URET_FLAG";
- case RISCVISD::SRET_FLAG:
- return "RISCVISD::SRET_FLAG";
- case RISCVISD::MRET_FLAG:
- return "RISCVISD::MRET_FLAG";
- case RISCVISD::CALL:
- return "RISCVISD::CALL";
- case RISCVISD::SELECT_CC:
- return "RISCVISD::SELECT_CC";
- case RISCVISD::BuildPairF64:
- return "RISCVISD::BuildPairF64";
- case RISCVISD::SplitF64:
- return "RISCVISD::SplitF64";
- case RISCVISD::TAIL:
- return "RISCVISD::TAIL";
- case RISCVISD::SLLW:
- return "RISCVISD::SLLW";
- case RISCVISD::SRAW:
- return "RISCVISD::SRAW";
- case RISCVISD::SRLW:
- return "RISCVISD::SRLW";
- case RISCVISD::DIVW:
- return "RISCVISD::DIVW";
- case RISCVISD::DIVUW:
- return "RISCVISD::DIVUW";
- case RISCVISD::REMUW:
- return "RISCVISD::REMUW";
- case RISCVISD::FMV_W_X_RV64:
- return "RISCVISD::FMV_W_X_RV64";
- case RISCVISD::FMV_X_ANYEXTW_RV64:
- return "RISCVISD::FMV_X_ANYEXTW_RV64";
- case RISCVISD::READ_CYCLE_WIDE:
- return "RISCVISD::READ_CYCLE_WIDE";
+ NODE_NAME_CASE(RET_FLAG)
+ NODE_NAME_CASE(URET_FLAG)
+ NODE_NAME_CASE(SRET_FLAG)
+ NODE_NAME_CASE(MRET_FLAG)
+ NODE_NAME_CASE(CALL)
+ NODE_NAME_CASE(SELECT_CC)
+ NODE_NAME_CASE(BuildPairF64)
+ NODE_NAME_CASE(SplitF64)
+ NODE_NAME_CASE(TAIL)
+ NODE_NAME_CASE(SLLW)
+ NODE_NAME_CASE(SRAW)
+ NODE_NAME_CASE(SRLW)
+ NODE_NAME_CASE(DIVW)
+ NODE_NAME_CASE(DIVUW)
+ NODE_NAME_CASE(REMUW)
+ NODE_NAME_CASE(ROLW)
+ NODE_NAME_CASE(RORW)
+ NODE_NAME_CASE(FSLW)
+ NODE_NAME_CASE(FSRW)
+ NODE_NAME_CASE(FMV_H_X)
+ NODE_NAME_CASE(FMV_X_ANYEXTH)
+ NODE_NAME_CASE(FMV_W_X_RV64)
+ NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
+ NODE_NAME_CASE(READ_CYCLE_WIDE)
+ NODE_NAME_CASE(GREVI)
+ NODE_NAME_CASE(GREVIW)
+ NODE_NAME_CASE(GORCI)
+ NODE_NAME_CASE(GORCIW)
+ NODE_NAME_CASE(VMV_X_S)
+ NODE_NAME_CASE(SPLAT_VECTOR_I64)
+ NODE_NAME_CASE(READ_VLENB)
+ NODE_NAME_CASE(TRUNCATE_VECTOR)
+ NODE_NAME_CASE(VLEFF)
+ NODE_NAME_CASE(VLEFF_MASK)
+ NODE_NAME_CASE(VLSEGFF)
+ NODE_NAME_CASE(VLSEGFF_MASK)
+ NODE_NAME_CASE(READ_VL)
+ NODE_NAME_CASE(VSLIDEUP)
+ NODE_NAME_CASE(VSLIDEDOWN)
+ NODE_NAME_CASE(VID)
}
+ // clang-format on
return nullptr;
+#undef NODE_NAME_CASE
}
/// getConstraintType - Given a constraint letter, return the type of
@@ -2661,6 +4146,8 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'r':
return std::make_pair(0U, &RISCV::GPRRegClass);
case 'f':
+ if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
+ return std::make_pair(0U, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtF() && VT == MVT::f32)
return std::make_pair(0U, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD() && VT == MVT::f64)
@@ -2675,7 +4162,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// official names. However, other frontends like `rustc` do not. This allows
// users of these frontends to use the ABI names for registers in LLVM-style
// register constraints.
- Register XRegFromAlias = StringSwitch<Register>(Constraint.lower())
+ unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
.Case("{zero}", RISCV::X0)
.Case("{ra}", RISCV::X1)
.Case("{sp}", RISCV::X2)
@@ -2719,46 +4206,50 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
//
// The second case is the ABI name of the register, so that frontends can also
// use the ABI names in register constraint lists.
- if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) {
- std::pair<Register, Register> FReg =
- StringSwitch<std::pair<Register, Register>>(Constraint.lower())
- .Cases("{f0}", "{ft0}", {RISCV::F0_F, RISCV::F0_D})
- .Cases("{f1}", "{ft1}", {RISCV::F1_F, RISCV::F1_D})
- .Cases("{f2}", "{ft2}", {RISCV::F2_F, RISCV::F2_D})
- .Cases("{f3}", "{ft3}", {RISCV::F3_F, RISCV::F3_D})
- .Cases("{f4}", "{ft4}", {RISCV::F4_F, RISCV::F4_D})
- .Cases("{f5}", "{ft5}", {RISCV::F5_F, RISCV::F5_D})
- .Cases("{f6}", "{ft6}", {RISCV::F6_F, RISCV::F6_D})
- .Cases("{f7}", "{ft7}", {RISCV::F7_F, RISCV::F7_D})
- .Cases("{f8}", "{fs0}", {RISCV::F8_F, RISCV::F8_D})
- .Cases("{f9}", "{fs1}", {RISCV::F9_F, RISCV::F9_D})
- .Cases("{f10}", "{fa0}", {RISCV::F10_F, RISCV::F10_D})
- .Cases("{f11}", "{fa1}", {RISCV::F11_F, RISCV::F11_D})
- .Cases("{f12}", "{fa2}", {RISCV::F12_F, RISCV::F12_D})
- .Cases("{f13}", "{fa3}", {RISCV::F13_F, RISCV::F13_D})
- .Cases("{f14}", "{fa4}", {RISCV::F14_F, RISCV::F14_D})
- .Cases("{f15}", "{fa5}", {RISCV::F15_F, RISCV::F15_D})
- .Cases("{f16}", "{fa6}", {RISCV::F16_F, RISCV::F16_D})
- .Cases("{f17}", "{fa7}", {RISCV::F17_F, RISCV::F17_D})
- .Cases("{f18}", "{fs2}", {RISCV::F18_F, RISCV::F18_D})
- .Cases("{f19}", "{fs3}", {RISCV::F19_F, RISCV::F19_D})
- .Cases("{f20}", "{fs4}", {RISCV::F20_F, RISCV::F20_D})
- .Cases("{f21}", "{fs5}", {RISCV::F21_F, RISCV::F21_D})
- .Cases("{f22}", "{fs6}", {RISCV::F22_F, RISCV::F22_D})
- .Cases("{f23}", "{fs7}", {RISCV::F23_F, RISCV::F23_D})
- .Cases("{f24}", "{fs8}", {RISCV::F24_F, RISCV::F24_D})
- .Cases("{f25}", "{fs9}", {RISCV::F25_F, RISCV::F25_D})
- .Cases("{f26}", "{fs10}", {RISCV::F26_F, RISCV::F26_D})
- .Cases("{f27}", "{fs11}", {RISCV::F27_F, RISCV::F27_D})
- .Cases("{f28}", "{ft8}", {RISCV::F28_F, RISCV::F28_D})
- .Cases("{f29}", "{ft9}", {RISCV::F29_F, RISCV::F29_D})
- .Cases("{f30}", "{ft10}", {RISCV::F30_F, RISCV::F30_D})
- .Cases("{f31}", "{ft11}", {RISCV::F31_F, RISCV::F31_D})
- .Default({RISCV::NoRegister, RISCV::NoRegister});
- if (FReg.first != RISCV::NoRegister)
- return Subtarget.hasStdExtD()
- ? std::make_pair(FReg.second, &RISCV::FPR64RegClass)
- : std::make_pair(FReg.first, &RISCV::FPR32RegClass);
+ if (Subtarget.hasStdExtF()) {
+ unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
+ .Cases("{f0}", "{ft0}", RISCV::F0_F)
+ .Cases("{f1}", "{ft1}", RISCV::F1_F)
+ .Cases("{f2}", "{ft2}", RISCV::F2_F)
+ .Cases("{f3}", "{ft3}", RISCV::F3_F)
+ .Cases("{f4}", "{ft4}", RISCV::F4_F)
+ .Cases("{f5}", "{ft5}", RISCV::F5_F)
+ .Cases("{f6}", "{ft6}", RISCV::F6_F)
+ .Cases("{f7}", "{ft7}", RISCV::F7_F)
+ .Cases("{f8}", "{fs0}", RISCV::F8_F)
+ .Cases("{f9}", "{fs1}", RISCV::F9_F)
+ .Cases("{f10}", "{fa0}", RISCV::F10_F)
+ .Cases("{f11}", "{fa1}", RISCV::F11_F)
+ .Cases("{f12}", "{fa2}", RISCV::F12_F)
+ .Cases("{f13}", "{fa3}", RISCV::F13_F)
+ .Cases("{f14}", "{fa4}", RISCV::F14_F)
+ .Cases("{f15}", "{fa5}", RISCV::F15_F)
+ .Cases("{f16}", "{fa6}", RISCV::F16_F)
+ .Cases("{f17}", "{fa7}", RISCV::F17_F)
+ .Cases("{f18}", "{fs2}", RISCV::F18_F)
+ .Cases("{f19}", "{fs3}", RISCV::F19_F)
+ .Cases("{f20}", "{fs4}", RISCV::F20_F)
+ .Cases("{f21}", "{fs5}", RISCV::F21_F)
+ .Cases("{f22}", "{fs6}", RISCV::F22_F)
+ .Cases("{f23}", "{fs7}", RISCV::F23_F)
+ .Cases("{f24}", "{fs8}", RISCV::F24_F)
+ .Cases("{f25}", "{fs9}", RISCV::F25_F)
+ .Cases("{f26}", "{fs10}", RISCV::F26_F)
+ .Cases("{f27}", "{fs11}", RISCV::F27_F)
+ .Cases("{f28}", "{ft8}", RISCV::F28_F)
+ .Cases("{f29}", "{ft9}", RISCV::F29_F)
+ .Cases("{f30}", "{ft10}", RISCV::F30_F)
+ .Cases("{f31}", "{ft11}", RISCV::F31_F)
+ .Default(RISCV::NoRegister);
+ if (FReg != RISCV::NoRegister) {
+ assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
+ if (Subtarget.hasStdExtD()) {
+ unsigned RegNo = FReg - RISCV::F0_F;
+ unsigned DReg = RISCV::F0_D + RegNo;
+ return std::make_pair(DReg, &RISCV::FPR64RegClass);
+ }
+ return std::make_pair(FReg, &RISCV::FPR32RegClass);
+ }
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
@@ -2974,6 +4465,27 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
return Result;
}
+bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ return Subtarget.hasStdExtZfh();
+ case MVT::f32:
+ return Subtarget.hasStdExtF();
+ case MVT::f64:
+ return Subtarget.hasStdExtD();
+ default:
+ break;
+ }
+
+ return false;
+}
+
Register RISCVTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return RISCV::X10;
@@ -2994,20 +4506,39 @@ bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
return true;
}
+bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
+ if (Subtarget.is64Bit() && Type == MVT::i32)
+ return true;
+
+ return IsSigned;
+}
+
bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const {
// Check integral scalar types.
if (VT.isScalarInteger()) {
- // Do not perform the transformation on riscv32 with the M extension.
- if (!Subtarget.is64Bit() && Subtarget.hasStdExtM())
+ // Omit the optimization if the sub target has the M extension and the data
+ // size exceeds XLen.
+ if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
return false;
if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
- if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t))
+ // Break the MUL to a SLLI and an ADD/SUB.
+ const APInt &Imm = ConstNode->getAPIntValue();
+ if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
+ (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
+ return true;
+ // Omit the following optimization if the sub target has the M extension
+ // and the data size >= XLen.
+ if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
return false;
- int64_t Imm = ConstNode->getSExtValue();
- if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) ||
- isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm))
+ // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
+ // a pair of LUI/ADDI.
+ if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
+ APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
+ if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
+ (1 - ImmS).isPowerOf2())
return true;
+ }
}
}
@@ -3032,3 +4563,19 @@ RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
StringRef(RegName) + "\"."));
return Reg;
}
+
+namespace llvm {
+namespace RISCVVIntrinsicsTable {
+
+#define GET_RISCVVIntrinsicsTable_IMPL
+#include "RISCVGenSearchableTables.inc"
+
+} // namespace RISCVVIntrinsicsTable
+
+namespace RISCVZvlssegTable {
+
+#define GET_RISCVZvlssegTable_IMPL
+#include "RISCVGenSearchableTables.inc"
+
+} // namespace RISCVZvlssegTable
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
index e420e879efc9..40b1a45c6d15 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -28,6 +28,12 @@ enum NodeType : unsigned {
SRET_FLAG,
MRET_FLAG,
CALL,
+ /// Select with condition operator - This selects between a true value and
+ /// a false value (ops #3 and #4) based on the boolean result of comparing
+ /// the lhs and rhs (ops #0 and #1) of a conditional expression with the
+ /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum.
+ /// The lhs and rhs are XLenVT integers. The true and false values can be
+ /// integer or floating point.
SELECT_CC,
BuildPairF64,
SplitF64,
@@ -38,22 +44,75 @@ enum NodeType : unsigned {
SRAW,
SRLW,
// 32-bit operations from RV64M that can't be simply matched with a pattern
- // at instruction selection time.
+ // at instruction selection time. These have undefined behavior for division
+ // by 0 or overflow (divw) like their target independent counterparts.
DIVW,
DIVUW,
REMUW,
- // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast
- // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X.
+ // RV64IB rotates, directly matching the semantics of the named RISC-V
+ // instructions.
+ ROLW,
+ RORW,
+ // RV64IB funnel shifts, with the semantics of the named RISC-V instructions,
+ // but the same operand order as fshl/fshr intrinsics.
+ FSRW,
+ FSLW,
+ // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as
+ // XLEN is the only legal integer width.
+ //
+ // FMV_H_X matches the semantics of the FMV.H.X.
+ // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result.
+ // FMV_W_X_RV64 matches the semantics of the FMV.W.X.
// FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
+ //
// This is a more convenient semantic for producing dagcombines that remove
// unnecessary GPR->FPR->GPR moves.
+ FMV_H_X,
+ FMV_X_ANYEXTH,
FMV_W_X_RV64,
FMV_X_ANYEXTW_RV64,
// READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
// (returns (Lo, Hi)). It takes a chain operand.
- READ_CYCLE_WIDE
+ READ_CYCLE_WIDE,
+ // Generalized Reverse and Generalized Or-Combine - directly matching the
+ // semantics of the named RISC-V instructions. Lowered as custom nodes as
+ // TableGen chokes when faced with commutative permutations in deeply-nested
+ // DAGs. Each node takes an input operand and a TargetConstant immediate
+ // shift amount, and outputs a bit-manipulated version of input. All operands
+ // are of type XLenVT.
+ GREVI,
+ GREVIW,
+ GORCI,
+ GORCIW,
+ // Vector Extension
+ // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT
+ // sign extended from the vector element size. NOTE: The result size will
+ // never be less than the vector element size.
+ VMV_X_S,
+ // Splats an i64 scalar to a vector type (with element type i64) where the
+ // scalar is a sign-extended i32.
+ SPLAT_VECTOR_I64,
+ // Read VLENB CSR
+ READ_VLENB,
+ // Truncates a RVV integer vector by one power-of-two.
+ TRUNCATE_VECTOR,
+ // Unit-stride fault-only-first load
+ VLEFF,
+ VLEFF_MASK,
+ // Unit-stride fault-only-first segment load
+ VLSEGFF,
+ VLSEGFF_MASK,
+ // read vl CSR
+ READ_VL,
+ // Matches the semantics of vslideup/vslidedown. The first operand is the
+ // pass-thru operand, the second is the source vector, and the third is the
+ // XLenVT index (either constant or non-constant).
+ VSLIDEUP,
+ VSLIDEDOWN,
+ // Matches the semantics of the unmasked vid.v instruction.
+ VID,
};
-}
+} // namespace RISCVISD
class RISCVTargetLowering : public TargetLowering {
const RISCVSubtarget &Subtarget;
@@ -62,6 +121,8 @@ public:
explicit RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI);
+ const RISCVSubtarget &getSubtarget() const { return Subtarget; }
+
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
@@ -74,6 +135,8 @@ public:
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
@@ -86,6 +149,15 @@ public:
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ TargetLoweringOpt &TLO) const override;
+
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const override;
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,
@@ -126,6 +198,9 @@ public:
Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
+
ISD::NodeType getExtendForAtomicOps() const override {
return ISD::SIGN_EXTEND;
}
@@ -153,6 +228,7 @@ public:
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
bool shouldExtendTypeInLibCall(EVT Type) const override;
+ bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
/// Returns the register with the specified architectural or ABI name. This
/// method is necessary to lower the llvm.read_register.* and
@@ -220,6 +296,7 @@ private:
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
@@ -227,7 +304,14 @@ private:
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
+ SDValue lowerSPLATVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
+ int64_t ExtTrueVal) const;
+ SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
bool isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
@@ -239,6 +323,37 @@ private:
const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
MachineFunction &MF) const;
};
+
+namespace RISCVVIntrinsicsTable {
+
+struct RISCVVIntrinsicInfo {
+ unsigned int IntrinsicID;
+ unsigned int ExtendedOperand;
+};
+
+using namespace RISCV;
+
+#define GET_RISCVVIntrinsicsTable_DECL
+#include "RISCVGenSearchableTables.inc"
+
+} // end namespace RISCVVIntrinsicsTable
+
+namespace RISCVZvlssegTable {
+
+struct RISCVZvlsseg {
+ unsigned int IntrinsicID;
+ unsigned int SEW;
+ unsigned int LMUL;
+ unsigned int IndexLMUL;
+ unsigned int Pseudo;
+};
+
+using namespace RISCV;
+
+#define GET_RISCVZvlssegTable_DECL
+#include "RISCVGenSearchableTables.inc"
+
+} // namespace RISCVZvlssegTable
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index a47945a6a515..7be74b79d99b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -49,18 +49,61 @@ def InstFormatCB : InstFormat<15>;
def InstFormatCJ : InstFormat<16>;
def InstFormatOther : InstFormat<17>;
-class RISCVVConstraint<bits<4> val> {
- bits<4> Value = val;
+class RISCVVConstraint<bits<3> val> {
+ bits<3> Value = val;
}
-def NoConstraint : RISCVVConstraint<0>;
-def WidenV : RISCVVConstraint<1>;
-def WidenW : RISCVVConstraint<2>;
-def WidenCvt : RISCVVConstraint<3>;
-def Narrow : RISCVVConstraint<4>;
-def Iota : RISCVVConstraint<5>;
-def SlideUp : RISCVVConstraint<6>;
-def Vrgather : RISCVVConstraint<7>;
-def Vcompress : RISCVVConstraint<8>;
+def NoConstraint : RISCVVConstraint<0b000>;
+def VS2Constraint : RISCVVConstraint<0b001>;
+def VS1Constraint : RISCVVConstraint<0b010>;
+def VMConstraint : RISCVVConstraint<0b100>;
+
+// Illegal instructions:
+//
+// * The destination vector register group for a masked vector instruction
+// cannot overlap the source mask register (v0), unless the destination vector
+// register is being written with a mask value (e.g., comparisons) or the
+// scalar result of a reduction.
+//
+// * Widening: The destination EEW is greater than the source EEW, the source
+// EMUL is at least 1. The destination vector register group cannot overlap
+// with the source vector register groups besides the highest-numbered part of
+// the destination register group.
+//
+// * Narrowing: The destination EEW is smaller than the source EEW. The
+// destination vector register group cannot overlap with the source vector
+// register groups besides the lowest-numbered part of the source register
+// group.
+//
+// * vmsbf.m/vmsif.m/vmsof.m: The destination register cannot overlap the
+// source register and, if masked, cannot overlap the mask register ('v0').
+//
+// * viota: The destination register cannot overlap the source register and,
+// if masked, cannot overlap the mask register ('v0').
+//
+// * v[f]slide[1]up: The destination vector register group for vslideup cannot
+// overlap the source vector register group.
+//
+// * vrgather: The destination vector register group cannot overlap with the
+// source vector register groups.
+//
+// * vcompress: The destination vector register group cannot overlap the
+// source vector register group or the source mask register
+def WidenV : RISCVVConstraint<!or(VS2Constraint.Value,
+ VS1Constraint.Value,
+ VMConstraint.Value)>;
+def WidenW : RISCVVConstraint<!or(VS1Constraint.Value,
+ VMConstraint.Value)>;
+def WidenCvt : RISCVVConstraint<!or(VS2Constraint.Value,
+ VMConstraint.Value)>;
+def Iota : RISCVVConstraint<!or(VS2Constraint.Value,
+ VMConstraint.Value)>;
+def SlideUp : RISCVVConstraint<!or(VS2Constraint.Value,
+ VMConstraint.Value)>;
+def Vrgather : RISCVVConstraint<!or(VS2Constraint.Value,
+ VS1Constraint.Value,
+ VMConstraint.Value)>;
+def Vcompress : RISCVVConstraint<!or(VS2Constraint.Value,
+ VS1Constraint.Value)>;
// The following opcode names match those given in Table 19.1 in the
// RISC-V User-level ISA specification ("RISC-V base opcode map").
@@ -116,7 +159,25 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
// Defaults
RISCVVConstraint RVVConstraint = NoConstraint;
- let TSFlags{8-5} = RVVConstraint.Value;
+ let TSFlags{7-5} = RVVConstraint.Value;
+
+ bits<3> VLMul = 0;
+ let TSFlags{10-8} = VLMul;
+
+ bit HasDummyMask = 0;
+ let TSFlags{11} = HasDummyMask;
+
+ bit WritesElement0 = 0;
+ let TSFlags{12} = WritesElement0;
+
+ bit HasMergeOp = 0;
+ let TSFlags{13} = HasMergeOp;
+
+ bit HasSEWOp = 0;
+ let TSFlags{14} = HasSEWOp;
+
+ bit HasVLOp = 0;
+ let TSFlags{15} = HasVLOp;
}
// Pseudo instructions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
index e5f154966ba6..80f46b73bfd7 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
@@ -21,37 +21,67 @@ def OPIVX : RISCVVFormat<0b100>;
def OPFVF : RISCVVFormat<0b101>;
def OPMVX : RISCVVFormat<0b110>;
-class RISCVMOP<bits<3> val> {
- bits<3> Value = val;
+class RISCVMOP<bits<2> val> {
+ bits<2> Value = val;
}
-def MOPLDUnitStrideU : RISCVMOP<0b000>;
-def MOPLDStridedU : RISCVMOP<0b010>;
-def MOPLDIndexedU : RISCVMOP<0b011>;
-def MOPLDUnitStrideS : RISCVMOP<0b100>;
-def MOPLDStridedS : RISCVMOP<0b110>;
-def MOPLDIndexedS : RISCVMOP<0b111>;
-
-def MOPSTUnitStride : RISCVMOP<0b000>;
-def MOPSTStrided : RISCVMOP<0b010>;
-def MOPSTIndexedOrder: RISCVMOP<0b011>;
-def MOPSTIndexedUnOrd: RISCVMOP<0b111>;
+def MOPLDUnitStride : RISCVMOP<0b00>;
+def MOPLDIndexedUnord : RISCVMOP<0b01>;
+def MOPLDStrided : RISCVMOP<0b10>;
+def MOPLDIndexedOrder : RISCVMOP<0b11>;
+
+def MOPSTUnitStride : RISCVMOP<0b00>;
+def MOPSTIndexedUnord : RISCVMOP<0b01>;
+def MOPSTStrided : RISCVMOP<0b10>;
+def MOPSTIndexedOrder : RISCVMOP<0b11>;
class RISCVLSUMOP<bits<5> val> {
bits<5> Value = val;
}
def LUMOPUnitStride : RISCVLSUMOP<0b00000>;
+def LUMOPUnitStrideMask : RISCVLSUMOP<0b01011>;
def LUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
def LUMOPUnitStrideFF: RISCVLSUMOP<0b10000>;
def SUMOPUnitStride : RISCVLSUMOP<0b00000>;
+def SUMOPUnitStrideMask : RISCVLSUMOP<0b01011>;
def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
-class RISCVWidth<bits<3> val> {
- bits<3> Value = val;
+class RISCVAMOOP<bits<5> val> {
+ bits<5> Value = val;
+}
+def AMOOPVamoSwap : RISCVAMOOP<0b00001>;
+def AMOOPVamoAdd : RISCVAMOOP<0b00000>;
+def AMOOPVamoXor : RISCVAMOOP<0b00100>;
+def AMOOPVamoAnd : RISCVAMOOP<0b01100>;
+def AMOOPVamoOr : RISCVAMOOP<0b01000>;
+def AMOOPVamoMin : RISCVAMOOP<0b10000>;
+def AMOOPVamoMax : RISCVAMOOP<0b10100>;
+def AMOOPVamoMinu : RISCVAMOOP<0b11000>;
+def AMOOPVamoMaxu : RISCVAMOOP<0b11100>;
+
+class RISCVWidth<bits<4> val> {
+ bits<4> Value = val;
+}
+def LSWidth8 : RISCVWidth<0b0000>;
+def LSWidth16 : RISCVWidth<0b0101>;
+def LSWidth32 : RISCVWidth<0b0110>;
+def LSWidth64 : RISCVWidth<0b0111>;
+
+class RVInstSetiVLi<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<5> uimm;
+ bits<5> rd;
+ bits<10> vtypei;
+
+ let Inst{31} = 1;
+ let Inst{30} = 1;
+ let Inst{29-20} = vtypei{9-0};
+ let Inst{19-15} = uimm;
+ let Inst{14-12} = 0b111;
+ let Inst{11-7} = rd;
+ let Opcode = OPC_OP_V.Value;
+
+ let Defs = [VTYPE, VL];
}
-def LSWidthVByte : RISCVWidth<0b000>;
-def LSWidthVHalf : RISCVWidth<0b101>;
-def LSWidthVWord : RISCVWidth<0b110>;
-def LSWidthVSEW : RISCVWidth<0b111>;
class RVInstSetVLi<dag outs, dag ins, string opcodestr, string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
@@ -103,6 +133,7 @@ class RVInstVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
let Opcode = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
+ let RVVConstraint = VMConstraint;
}
class RVInstVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
@@ -122,6 +153,7 @@ class RVInstVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
let Opcode = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
+ let RVVConstraint = VMConstraint;
}
class RVInstV2<bits<6> funct6, bits<5> vs2, RISCVVFormat opv, dag outs, dag ins,
@@ -140,6 +172,7 @@ class RVInstV2<bits<6> funct6, bits<5> vs2, RISCVVFormat opv, dag outs, dag ins,
let Opcode = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
+ let RVVConstraint = VMConstraint;
}
class RVInstIVI<bits<6> funct6, dag outs, dag ins, string opcodestr,
@@ -159,6 +192,7 @@ class RVInstIVI<bits<6> funct6, dag outs, dag ins, string opcodestr,
let Opcode = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
+ let RVVConstraint = VMConstraint;
}
class RVInstV<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, dag outs,
@@ -177,10 +211,11 @@ class RVInstV<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, dag outs,
let Opcode = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
+ let RVVConstraint = VMConstraint;
}
-class RVInstVLU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP lumop,
- RISCVWidth width, dag outs, dag ins, string opcodestr,
+class RVInstVLU<bits<3> nf, bit mew, RISCVLSUMOP lumop,
+ bits<3> width, dag outs, dag ins, string opcodestr,
string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
bits<5> rs1;
@@ -188,18 +223,20 @@ class RVInstVLU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP lumop,
bit vm;
let Inst{31-29} = nf;
- let Inst{28-26} = mop.Value;
+ let Inst{28} = mew;
+ let Inst{27-26} = MOPLDUnitStride.Value;
let Inst{25} = vm;
let Inst{24-20} = lumop.Value;
let Inst{19-15} = rs1;
- let Inst{14-12} = width.Value;
+ let Inst{14-12} = width;
let Inst{11-7} = vd;
let Opcode = OPC_LOAD_FP.Value;
let Uses = [VTYPE, VL];
+ let RVVConstraint = VMConstraint;
}
-class RVInstVLS<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+class RVInstVLS<bits<3> nf, bit mew, bits<3> width,
dag outs, dag ins, string opcodestr, string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
bits<5> rs2;
@@ -208,18 +245,20 @@ class RVInstVLS<bits<3> nf, RISCVMOP mop, RISCVWidth width,
bit vm;
let Inst{31-29} = nf;
- let Inst{28-26} = mop.Value;
+ let Inst{28} = mew;
+ let Inst{27-26} = MOPLDStrided.Value;
let Inst{25} = vm;
let Inst{24-20} = rs2;
let Inst{19-15} = rs1;
- let Inst{14-12} = width.Value;
+ let Inst{14-12} = width;
let Inst{11-7} = vd;
let Opcode = OPC_LOAD_FP.Value;
let Uses = [VTYPE, VL];
+ let RVVConstraint = VMConstraint;
}
-class RVInstVLX<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+class RVInstVLX<bits<3> nf, bit mew, RISCVMOP mop, bits<3> width,
dag outs, dag ins, string opcodestr, string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
bits<5> vs2;
@@ -228,19 +267,21 @@ class RVInstVLX<bits<3> nf, RISCVMOP mop, RISCVWidth width,
bit vm;
let Inst{31-29} = nf;
- let Inst{28-26} = mop.Value;
+ let Inst{28} = mew;
+ let Inst{27-26} = mop.Value;
let Inst{25} = vm;
let Inst{24-20} = vs2;
let Inst{19-15} = rs1;
- let Inst{14-12} = width.Value;
+ let Inst{14-12} = width;
let Inst{11-7} = vd;
let Opcode = OPC_LOAD_FP.Value;
let Uses = [VTYPE, VL];
+ let RVVConstraint = VMConstraint;
}
-class RVInstVSU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP sumop,
- RISCVWidth width, dag outs, dag ins, string opcodestr,
+class RVInstVSU<bits<3> nf, bit mew, RISCVLSUMOP sumop,
+ bits<3> width, dag outs, dag ins, string opcodestr,
string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
bits<5> rs1;
@@ -248,18 +289,19 @@ class RVInstVSU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP sumop,
bit vm;
let Inst{31-29} = nf;
- let Inst{28-26} = mop.Value;
+ let Inst{28} = mew;
+ let Inst{27-26} = MOPSTUnitStride.Value;
let Inst{25} = vm;
let Inst{24-20} = sumop.Value;
let Inst{19-15} = rs1;
- let Inst{14-12} = width.Value;
+ let Inst{14-12} = width;
let Inst{11-7} = vs3;
let Opcode = OPC_STORE_FP.Value;
let Uses = [VTYPE, VL];
}
-class RVInstVSS<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+class RVInstVSS<bits<3> nf, bit mew, bits<3> width,
dag outs, dag ins, string opcodestr, string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
bits<5> rs2;
@@ -268,18 +310,19 @@ class RVInstVSS<bits<3> nf, RISCVMOP mop, RISCVWidth width,
bit vm;
let Inst{31-29} = nf;
- let Inst{28-26} = mop.Value;
+ let Inst{28} = mew;
+ let Inst{27-26} = MOPSTStrided.Value;
let Inst{25} = vm;
let Inst{24-20} = rs2;
let Inst{19-15} = rs1;
- let Inst{14-12} = width.Value;
+ let Inst{14-12} = width;
let Inst{11-7} = vs3;
let Opcode = OPC_STORE_FP.Value;
let Uses = [VTYPE, VL];
}
-class RVInstVSX<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+class RVInstVSX<bits<3> nf, bit mew, RISCVMOP mop, bits<3> width,
dag outs, dag ins, string opcodestr, string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
bits<5> vs2;
@@ -288,13 +331,33 @@ class RVInstVSX<bits<3> nf, RISCVMOP mop, RISCVWidth width,
bit vm;
let Inst{31-29} = nf;
- let Inst{28-26} = mop.Value;
+ let Inst{28} = mew;
+ let Inst{27-26} = mop.Value;
let Inst{25} = vm;
let Inst{24-20} = vs2;
let Inst{19-15} = rs1;
- let Inst{14-12} = width.Value;
+ let Inst{14-12} = width;
let Inst{11-7} = vs3;
let Opcode = OPC_STORE_FP.Value;
let Uses = [VTYPE, VL];
}
+
+class RVInstVAMO<RISCVAMOOP amoop, bits<3> width, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> vs2;
+ bits<5> rs1;
+ bit wd;
+ bit vm;
+
+ let Inst{31-27} = amoop.Value;
+ let Inst{26} = wd;
+ let Inst{25} = vm;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = width;
+ let Opcode = OPC_AMO.Value;
+
+ let Uses = [VTYPE, VL];
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 7b6ea002c7b7..45a5e10e26a3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -11,10 +11,10 @@
//===----------------------------------------------------------------------===//
#include "RISCVInstrInfo.h"
+#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
-#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -45,6 +45,7 @@ unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case RISCV::LBU:
case RISCV::LH:
case RISCV::LHU:
+ case RISCV::FLH:
case RISCV::LW:
case RISCV::FLW:
case RISCV::LWU:
@@ -70,6 +71,7 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
+ case RISCV::FSH:
case RISCV::FSW:
case RISCV::SD:
case RISCV::FSD:
@@ -96,18 +98,37 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // FPR->FPR copies
+ // FPR->FPR copies and VR->VR copies.
unsigned Opc;
- if (RISCV::FPR32RegClass.contains(DstReg, SrcReg))
+ bool IsScalableVector = false;
+ if (RISCV::FPR16RegClass.contains(DstReg, SrcReg))
+ Opc = RISCV::FSGNJ_H;
+ else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg))
Opc = RISCV::FSGNJ_S;
else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg))
Opc = RISCV::FSGNJ_D;
- else
+ else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
+ Opc = RISCV::PseudoVMV1R_V;
+ IsScalableVector = true;
+ } else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
+ Opc = RISCV::PseudoVMV2R_V;
+ IsScalableVector = true;
+ } else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
+ Opc = RISCV::PseudoVMV4R_V;
+ IsScalableVector = true;
+ } else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
+ Opc = RISCV::PseudoVMV8R_V;
+ IsScalableVector = true;
+ } else
llvm_unreachable("Impossible reg-to-reg copy");
- BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addReg(SrcReg, getKillRegState(KillSrc));
+ if (IsScalableVector)
+ BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -119,11 +140,18 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (I != MBB.end())
DL = I->getDebugLoc();
- unsigned Opcode;
+ MachineFunction *MF = MBB.getParent();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+ unsigned Opcode;
if (RISCV::GPRRegClass.hasSubClassEq(RC))
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::SW : RISCV::SD;
+ else if (RISCV::FPR16RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::FSH;
else if (RISCV::FPR32RegClass.hasSubClassEq(RC))
Opcode = RISCV::FSW;
else if (RISCV::FPR64RegClass.hasSubClassEq(RC))
@@ -134,7 +162,8 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
- .addImm(0);
+ .addImm(0)
+ .addMemOperand(MMO);
}
void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -146,11 +175,18 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (I != MBB.end())
DL = I->getDebugLoc();
- unsigned Opcode;
+ MachineFunction *MF = MBB.getParent();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+ unsigned Opcode;
if (RISCV::GPRRegClass.hasSubClassEq(RC))
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::LW : RISCV::LD;
+ else if (RISCV::FPR16RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::FLH;
else if (RISCV::FPR32RegClass.hasSubClassEq(RC))
Opcode = RISCV::FLW;
else if (RISCV::FPR64RegClass.hasSubClassEq(RC))
@@ -158,7 +194,10 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
else
llvm_unreachable("Can't load this register from stack slot");
- BuildMI(MBB, I, DL, get(Opcode), DstReg).addFrameIndex(FI).addImm(0);
+ BuildMI(MBB, I, DL, get(Opcode), DstReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
}
void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
@@ -512,17 +551,48 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
const unsigned Opcode = MI.getOpcode();
- switch(Opcode) {
- default:
- break;
- case RISCV::ADDI:
- case RISCV::ORI:
- case RISCV::XORI:
- return (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0);
+ switch (Opcode) {
+ default:
+ break;
+ case RISCV::FSGNJ_D:
+ case RISCV::FSGNJ_S:
+ // The canonical floating-point move is fsgnj rd, rs, rs.
+ return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
+ MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
+ case RISCV::ADDI:
+ case RISCV::ORI:
+ case RISCV::XORI:
+ return (MI.getOperand(1).isReg() &&
+ MI.getOperand(1).getReg() == RISCV::X0) ||
+ (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
}
return MI.isAsCheapAsAMove();
}
+Optional<DestSourcePair>
+RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
+ if (MI.isMoveReg())
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case RISCV::ADDI:
+ // Operand 1 can be a frameindex but callers expect registers
+ if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0)
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+ break;
+ case RISCV::FSGNJ_D:
+ case RISCV::FSGNJ_S:
+ // The canonical floating-point move is fsgnj rd, rs, rs.
+ if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
+ MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+ break;
+ }
+ return None;
+}
+
bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const {
const MCInstrInfo *MCII = STI.getInstrInfo();
@@ -551,15 +621,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_SIMM12:
Ok = isInt<12>(Imm);
break;
- case RISCVOp::OPERAND_SIMM13_LSB0:
- Ok = isShiftedInt<12, 1>(Imm);
- break;
case RISCVOp::OPERAND_UIMM20:
Ok = isUInt<20>(Imm);
break;
- case RISCVOp::OPERAND_SIMM21_LSB0:
- Ok = isShiftedInt<20, 1>(Imm);
- break;
case RISCVOp::OPERAND_UIMMLOG2XLEN:
if (STI.getTargetTriple().isArch64Bit())
Ok = isUInt<6>(Imm);
@@ -699,10 +763,7 @@ outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo(
return !LRU.available(RISCV::X5);
};
- RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- CannotInsertCall),
- RepeatedSequenceLocs.end());
+ llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 21bc508cdc9c..0b034210aa55 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -83,6 +83,9 @@ public:
bool isAsCheapAsAMove(const MachineInstr &MI) const override;
+ Optional<DestSourcePair>
+ isCopyInstrImpl(const MachineInstr &MI) const override;
+
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
@@ -134,23 +137,5 @@ protected:
const RISCVSubtarget &STI;
};
-namespace RISCV {
-// Match with the definitions in RISCVInstrFormatsV.td
-enum RVVConstraintType {
- NoConstraint = 0,
- WidenV = 1,
- WidenW = 2,
- WidenCvt = 3,
- Narrow = 4,
- Iota = 5,
- SlideUp = 6,
- Vrgather = 7,
- Vcompress = 8,
-
- ConstraintOffset = 5,
- ConstraintMask = 0b1111
-};
-} // end namespace RISCV
-
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 8547f791092b..a07b589e77fb 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -25,6 +25,8 @@ def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>;
def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
SDTCisSameAs<0, 4>,
SDTCisSameAs<4, 5>]>;
+def SDT_RISCVReadCycleWide : SDTypeProfile<2, 0, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
// Target-independent nodes, but with target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
@@ -44,8 +46,7 @@ def riscv_sret_flag : SDNode<"RISCVISD::SRET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
def riscv_mret_flag : SDNode<"RISCVISD::MRET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
-def riscv_selectcc : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC,
- [SDNPInGlue]>;
+def riscv_selectcc : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC>;
def riscv_tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
@@ -53,6 +54,10 @@ def riscv_sllw : SDNode<"RISCVISD::SLLW", SDTIntShiftOp>;
def riscv_sraw : SDNode<"RISCVISD::SRAW", SDTIntShiftOp>;
def riscv_srlw : SDNode<"RISCVISD::SRLW", SDTIntShiftOp>;
+def riscv_read_cycle_wide : SDNode<"RISCVISD::READ_CYCLE_WIDE",
+ SDT_RISCVReadCycleWide,
+ [SDNPHasChain, SDNPSideEffect]>;
+
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@@ -161,6 +166,7 @@ def simm12_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
// A 13-bit signed immediate where the least significant bit is zero.
def simm13_lsb0 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<13, "Lsb0">;
+ let PrintMethod = "printBranchOperand";
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<13>";
let MCOperandPredicate = [{
@@ -169,8 +175,7 @@ def simm13_lsb0 : Operand<OtherVT> {
return isShiftedInt<12, 1>(Imm);
return MCOp.isBareSymbolRef();
}];
- let OperandType = "OPERAND_SIMM13_LSB0";
- let OperandNamespace = "RISCVOp";
+ let OperandType = "OPERAND_PCREL";
}
class UImm20Operand : Operand<XLenVT> {
@@ -200,6 +205,7 @@ def Simm21Lsb0JALAsmOperand : SImmAsmOperand<21, "Lsb0JAL"> {
// A 21-bit signed immediate where the least significant bit is zero.
def simm21_lsb0_jal : Operand<OtherVT> {
let ParserMatchClass = Simm21Lsb0JALAsmOperand;
+ let PrintMethod = "printBranchOperand";
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<21>";
let MCOperandPredicate = [{
@@ -208,8 +214,7 @@ def simm21_lsb0_jal : Operand<OtherVT> {
return isShiftedInt<20, 1>(Imm);
return MCOp.isBareSymbolRef();
}];
- let OperandType = "OPERAND_SIMM21_LSB0";
- let OperandNamespace = "RISCVOp";
+ let OperandType = "OPERAND_PCREL";
}
def BareSymbol : AsmOperandClass {
@@ -291,6 +296,11 @@ def immbottomxlenset : ImmLeaf<XLenVT, [{
return countTrailingOnes<uint64_t>(Imm) >= 5;
}]>;
+// A 6-bit constant greater than 32.
+def uimm6gt32 : ImmLeaf<XLenVT, [{
+ return isUInt<6>(Imm) && Imm > 32;
+}]>;
+
// Addressing modes.
// Necessary because a frameindex can't be matched directly in a pattern.
def AddrFI : ComplexPattern<iPTR, 1, "SelectAddrFI", [frameindex], []>;
@@ -316,6 +326,25 @@ def NegImm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
+// Return an immediate value minus 32.
+def ImmSub32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() - 32, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+// Return an immediate subtracted from XLen.
+def ImmSubFromXLen : SDNodeXForm<imm, [{
+ uint64_t XLen = Subtarget->getXLen();
+ return CurDAG->getTargetConstant(XLen - N->getZExtValue(), SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+// Return an immediate subtracted from 32.
+def ImmSubFrom32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N),
+ N->getValueType(0));
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction Formats
//===----------------------------------------------------------------------===//
@@ -368,12 +397,14 @@ class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstR<funct7, funct3, OPC_OP, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
opcodestr, "$rd, $rs1, $rs2">;
-let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+let hasNoSchedulingInfo = 1,
+ hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
class CSR_ir<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, GPR:$rs1),
opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR, ReadCSR]>;
-let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+let hasNoSchedulingInfo = 1,
+ hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
class CSR_ii<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd),
(ins csr_sysreg:$imm12, uimm5:$rs1),
@@ -791,6 +822,11 @@ def : MnemonicAlias<"move", "mv">;
def : MnemonicAlias<"scall", "ecall">;
def : MnemonicAlias<"sbreak", "ebreak">;
+// This alias was added to the spec in December 2020. Don't print it by default
+// to allow assembly we print to be compatible with versions of GNU assembler
+// that don't support this alias.
+def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>;
+
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//
@@ -815,18 +851,30 @@ def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
return isOrEquivalentToAdd(N);
}]>;
def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
- return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+ return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
}]>;
def sexti32 : PatFrags<(ops node:$src),
[(sext_inreg node:$src, i32),
(assertsexti32 node:$src)]>;
def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
- return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+ return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
}]>;
def zexti32 : PatFrags<(ops node:$src),
[(and node:$src, 0xffffffff),
(assertzexti32 node:$src)]>;
+def SRLIWPat : PatFrag<(ops node:$A, node:$B),
+ (srl (and node:$A, imm), node:$B), [{
+ return MatchSRLIW(N);
+}]>;
+
+// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
+// on RV64). Also used to optimize the same sequence without SLLIUW.
+def SLLIUWPat : PatFrag<(ops node:$A, node:$B),
+ (and (shl node:$A, node:$B), imm), [{
+ return MatchSLLIUW(N);
+}]>;
+
/// Immediates
def : Pat<(simm12:$imm), (ADDI X0, simm12:$imm)>;
@@ -857,6 +905,10 @@ class shiftop<SDPatternOperator operator>
: PatFrags<(ops node:$val, node:$count),
[(operator node:$val, node:$count),
(operator node:$val, (and node:$count, immbottomxlenset))]>;
+class shiftopw<SDPatternOperator operator>
+ : PatFrags<(ops node:$val, node:$count),
+ [(operator node:$val, node:$count),
+ (operator node:$val, (and node:$count, (XLenVT 31)))]>;
def : PatGprGpr<shiftop<shl>, SLL>;
def : PatGprGpr<shiftop<srl>, SRL>;
@@ -873,10 +925,10 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
/// FrameIndex calculations
-def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12),
- (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>;
-def : Pat<(IsOrAdd (i32 AddrFI:$Rs), simm12:$imm12),
- (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>;
+def : Pat<(add (XLenVT AddrFI:$Rs), simm12:$imm12),
+ (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>;
+def : Pat<(IsOrAdd (XLenVT AddrFI:$Rs), simm12:$imm12),
+ (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>;
/// Setcc
@@ -938,15 +990,18 @@ def : BccSwapPat<setle, BGE>;
def : BccSwapPat<setugt, BLTU>;
def : BccSwapPat<setule, BGEU>;
-// An extra pattern is needed for a brcond without a setcc (i.e. where the
+// Extra patterns are needed for a brcond without a setcc (i.e. where the
// condition was calculated elsewhere).
def : Pat<(brcond GPR:$cond, bb:$imm12), (BNE GPR:$cond, X0, bb:$imm12)>;
+// In this pattern, the `(xor $cond, 1)` functions like (boolean) `not`, as the
+// `brcond` only uses the lowest bit.
+def : Pat<(brcond (XLenVT (xor GPR:$cond, 1)), bb:$imm12),
+ (BEQ GPR:$cond, X0, bb:$imm12)>;
let isBarrier = 1, isBranch = 1, isTerminator = 1 in
def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>,
PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>;
-let isCall = 1, Defs=[X1] in
let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in
def PseudoBRIND : Pseudo<(outs), (ins GPR:$rs1, simm12:$imm12), []>,
PseudoInstExpansion<(JALR X0, GPR:$rs1, simm12:$imm12)>;
@@ -1038,6 +1093,25 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.gd", "$dst, $src">;
+
+/// Sign/Zero Extends
+
+// There are single-instruction versions of these in Zbb, so disable these
+// Pseudos if that extension is present.
+let hasSideEffects = 0, mayLoad = 0,
+ mayStore = 0, isCodeGenOnly = 0, isAsmParserOnly = 1 in {
+def PseudoSEXT_B : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "sext.b", "$rd, $rs">;
+def PseudoSEXT_H : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "sext.h", "$rd, $rs">;
+// rv64's sext.w is defined above, using InstAlias<"sext.w ...
+// zext.b is defined above, using InstAlias<"zext.b ...
+def PseudoZEXT_H : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.h", "$rd, $rs">;
+} // hasSideEffects = 0, ...
+
+let Predicates = [IsRV64], hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+ isCodeGenOnly = 0, isAsmParserOnly = 1 in {
+def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs">;
+} // Predicates = [IsRV64], ...
+
/// Loads
multiclass LdPat<PatFrag LoadOp, RVInst Inst> {
@@ -1108,12 +1182,23 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
/// RV64 patterns
+let Predicates = [IsRV64, NotHasStdExtZba] in {
+def : Pat<(and GPR:$rs1, 0xffffffff), (SRLI (SLLI GPR:$rs1, 32), 32)>;
+
+// If we're shifting a 32-bit zero extended value left by 0-31 bits, use 2
+// shifts instead of 3. This can occur when unsigned is used to index an array.
+def : Pat<(shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt),
+ (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
+// shl/and can appear in the other order too.
+def : Pat<(SLLIUWPat GPR:$rs1, uimm5:$shamt),
+ (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
+}
+
let Predicates = [IsRV64] in {
/// sext and zext
def : Pat<(sext_inreg GPR:$rs1, i32), (ADDIW GPR:$rs1, 0)>;
-def : Pat<(and GPR:$rs1, 0xffffffff), (SRLI (SLLI GPR:$rs1, 32), 32)>;
/// ALU operations
@@ -1125,14 +1210,18 @@ def : Pat<(sext_inreg (sub GPR:$rs1, GPR:$rs2), i32),
(SUBW GPR:$rs1, GPR:$rs2)>;
def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
(SLLIW GPR:$rs1, uimm5:$shamt)>;
-// (srl (zexti32 ...), uimm5:$shamt) is matched with custom code due to the
-// need to undo manipulation of the mask value performed by DAGCombine.
+def : Pat<(SRLIWPat GPR:$rs1, uimm5:$shamt),
+ (SRLIW GPR:$rs1, uimm5:$shamt)>;
+def : Pat<(srl (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt),
+ (SRLIW GPR:$rs1, (ImmSub32 uimm6gt32:$shamt))>;
def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
(SRAIW GPR:$rs1, uimm5:$shamt)>;
+def : Pat<(sra (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt),
+ (SRAIW GPR:$rs1, (ImmSub32 uimm6gt32:$shamt))>;
-def : PatGprGpr<riscv_sllw, SLLW>;
-def : PatGprGpr<riscv_srlw, SRLW>;
-def : PatGprGpr<riscv_sraw, SRAW>;
+def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>;
+def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>;
+def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>;
/// Loads
@@ -1153,9 +1242,10 @@ let Predicates = [IsRV64] in
def : Pat<(readcyclecounter), (CSRRS CYCLE.Encoding, X0)>;
// On RV32, ReadCycleWide will be expanded to the suggested loop reading both
// halves of the 64-bit "cycle" CSR.
-let Predicates = [IsRV32], usesCustomInserter = 1, hasSideEffects = 0,
-mayLoad = 0, mayStore = 0, hasNoSchedulingInfo = 1 in
-def ReadCycleWide : Pseudo<(outs GPR:$lo, GPR:$hi), (ins), [], "", "">;
+let Predicates = [IsRV32], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
+def ReadCycleWide : Pseudo<(outs GPR:$lo, GPR:$hi), (ins),
+ [(set GPR:$lo, GPR:$hi, (riscv_read_cycle_wide))],
+ "", "">;
/// traps
@@ -1178,3 +1268,4 @@ include "RISCVInstrInfoD.td"
include "RISCVInstrInfoC.td"
include "RISCVInstrInfoB.td"
include "RISCVInstrInfoV.td"
+include "RISCVInstrInfoZfh.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index afac509f743d..7888ac7bac8e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -7,16 +7,21 @@
//===----------------------------------------------------------------------===//
//
// This file describes the RISC-V instructions from the standard 'B' Bitmanip
-// extension, version 0.92.
+// extension, version 0.93.
// This version is still experimental as the 'B' extension hasn't been
// ratified yet.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Operand definitions.
+// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
+def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>;
+def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>;
+def riscv_fslw : SDNode<"RISCVISD::FSLW", SDTIntShiftDOp>;
+def riscv_fsrw : SDNode<"RISCVISD::FSRW", SDTIntShiftDOp>;
+
def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
let Name = "UImmLog2XLenHalf";
let RenderMethod = "addImmOperands";
@@ -40,6 +45,44 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
}];
}
+// Checks if this mask has a single 0 bit and cannot be used with ANDI.
+def BCLRMask : ImmLeaf<XLenVT, [{
+ if (Subtarget->is64Bit())
+ return !isInt<12>(Imm) && isPowerOf2_64(~Imm);
+ return !isInt<12>(Imm) && isPowerOf2_32(~Imm);
+}]>;
+
+// Checks if this mask has a single 1 bit and cannot be used with ORI/XORI.
+def BSETINVMask : ImmLeaf<XLenVT, [{
+ if (Subtarget->is64Bit())
+ return !isInt<12>(Imm) && isPowerOf2_64(Imm);
+ return !isInt<12>(Imm) && isPowerOf2_32(Imm);
+}]>;
+
+def BCLRXForm : SDNodeXForm<imm, [{
+ // Find the lowest 0.
+ return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(),
+ SDLoc(N), N->getValueType(0));
+}]>;
+
+def BSETINVXForm : SDNodeXForm<imm, [{
+ // Find the lowest 1.
+ return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
+ SDLoc(N), N->getValueType(0));
+}]>;
+
+// Similar to above, but makes sure the immediate has 33 sign bits. When used
+// with an AND/OR/XOR where the other operand has at least 33 sign bits, the
+// result will have 33 sign bits. This can match BCLRIW/BSETIW/BINVIW.
+def BCLRWMask : ImmLeaf<i64, [{
+ // After checking the sign bits, truncate to 32 bits for power of 2 check.
+ return isInt<32>(Imm) && !isInt<12>(Imm) && isPowerOf2_32(~Imm);
+}]>;
+
+def BSETINVWMask : ImmLeaf<i64, [{
+ return isInt<32>(Imm) && !isInt<12>(Imm) && isPowerOf2_32(Imm);
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -56,11 +99,6 @@ class RVBUnary<bits<7> funct7, bits<5> funct5, bits<3> funct3,
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVBALUW_ri<bits<3> funct3, string opcodestr>
- : RVInstI<funct3, OPC_OP_IMM_32, (outs GPR:$rd),
- (ins GPR:$rs1, simm12:$imm12), opcodestr, "$rd, $rs1, $imm12">;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVBShift_ri<bits<5> funct5, bits<3> funct3, RISCVOpcode opcode,
string opcodestr>
: RVInstI<funct3, opcode, (outs GPR:$rd),
@@ -147,10 +185,11 @@ def ORN : ALU_rr<0b0100000, 0b110, "orn">, Sched<[]>;
def XNOR : ALU_rr<0b0100000, 0b100, "xnor">, Sched<[]>;
} // Predicates = [HasStdExtZbbOrZbp]
-let Predicates = [HasStdExtZbb] in {
-def SLO : ALU_rr<0b0010000, 0b001, "slo">, Sched<[]>;
-def SRO : ALU_rr<0b0010000, 0b101, "sro">, Sched<[]>;
-} // Predicates = [HasStdExtZbb]
+let Predicates = [HasStdExtZba] in {
+def SH1ADD : ALU_rr<0b0010000, 0b010, "sh1add">, Sched<[]>;
+def SH2ADD : ALU_rr<0b0010000, 0b100, "sh2add">, Sched<[]>;
+def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">, Sched<[]>;
+} // Predicates = [HasStdExtZba]
let Predicates = [HasStdExtZbbOrZbp] in {
def ROL : ALU_rr<0b0110000, 0b001, "rol">, Sched<[]>;
@@ -158,10 +197,10 @@ def ROR : ALU_rr<0b0110000, 0b101, "ror">, Sched<[]>;
} // Predicates = [HasStdExtZbbOrZbp]
let Predicates = [HasStdExtZbs] in {
-def SBCLR : ALU_rr<0b0100100, 0b001, "sbclr">, Sched<[]>;
-def SBSET : ALU_rr<0b0010100, 0b001, "sbset">, Sched<[]>;
-def SBINV : ALU_rr<0b0110100, 0b001, "sbinv">, Sched<[]>;
-def SBEXT : ALU_rr<0b0100100, 0b101, "sbext">, Sched<[]>;
+def BCLR : ALU_rr<0b0100100, 0b001, "bclr">, Sched<[]>;
+def BSET : ALU_rr<0b0010100, 0b001, "bset">, Sched<[]>;
+def BINV : ALU_rr<0b0110100, 0b001, "binv">, Sched<[]>;
+def BEXT : ALU_rr<0b0100100, 0b101, "bext">, Sched<[]>;
} // Predicates = [HasStdExtZbs]
let Predicates = [HasStdExtZbp] in {
@@ -169,19 +208,20 @@ def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>;
def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>;
} // Predicates = [HasStdExtZbp]
-let Predicates = [HasStdExtZbb] in {
-def SLOI : RVBShift_ri<0b00100, 0b001, OPC_OP_IMM, "sloi">, Sched<[]>;
-def SROI : RVBShift_ri<0b00100, 0b101, OPC_OP_IMM, "sroi">, Sched<[]>;
-} // Predicates = [HasStdExtZbb]
+let Predicates = [HasStdExtZbp] in {
+def XPERMN : ALU_rr<0b0010100, 0b010, "xperm.n">, Sched<[]>;
+def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>;
+def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
+} // Predicates = [HasStdExtZbp]
let Predicates = [HasStdExtZbbOrZbp] in
def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, Sched<[]>;
let Predicates = [HasStdExtZbs] in {
-def SBCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "sbclri">, Sched<[]>;
-def SBSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "sbseti">, Sched<[]>;
-def SBINVI : RVBShift_ri<0b01101, 0b001, OPC_OP_IMM, "sbinvi">, Sched<[]>;
-def SBEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "sbexti">, Sched<[]>;
+def BCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "bclri">, Sched<[]>;
+def BSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "bseti">, Sched<[]>;
+def BINVI : RVBShift_ri<0b01101, 0b001, OPC_OP_IMM, "binvi">, Sched<[]>;
+def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">, Sched<[]>;
} // Predicates = [HasStdExtZbs]
let Predicates = [HasStdExtZbp] in {
@@ -207,7 +247,7 @@ def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0010011>, "clz">,
Sched<[]>;
def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0010011>, "ctz">,
Sched<[]>;
-def PCNT : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0010011>, "pcnt">,
+def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0010011>, "cpop">,
Sched<[]>;
} // Predicates = [HasStdExtZbb]
@@ -256,8 +296,8 @@ def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh">, Sched<[]>;
let Predicates = [HasStdExtZbb] in {
def MIN : ALU_rr<0b0000101, 0b100, "min">, Sched<[]>;
-def MAX : ALU_rr<0b0000101, 0b101, "max">, Sched<[]>;
-def MINU : ALU_rr<0b0000101, 0b110, "minu">, Sched<[]>;
+def MINU : ALU_rr<0b0000101, 0b101, "minu">, Sched<[]>;
+def MAX : ALU_rr<0b0000101, 0b110, "max">, Sched<[]>;
def MAXU : ALU_rr<0b0000101, 0b111, "maxu">, Sched<[]>;
} // Predicates = [HasStdExtZbb]
@@ -267,23 +307,23 @@ def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>;
} // Predicates = [HasStdExtZbp]
let Predicates = [HasStdExtZbe] in {
-def BDEP : ALU_rr<0b0100100, 0b110, "bdep">, Sched<[]>;
-def BEXT : ALU_rr<0b0000100, 0b110, "bext">, Sched<[]>;
+// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with
+// bext in the 0.93 spec.
+def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>;
+def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>;
} // Predicates = [HasStdExtZbe]
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbp] in {
def PACK : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>;
def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>;
-} // Predicates = [HasStdExtZbbOrZbp]
+def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>;
+} // Predicates = [HasStdExtZbp]
let Predicates = [HasStdExtZbm, IsRV64] in {
def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>;
def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">, Sched<[]>;
} // Predicates = [HasStdExtZbm, IsRV64]
-let Predicates = [HasStdExtZbbOrZbp] in
-def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>;
-
let Predicates = [HasStdExtZbf] in
def BFP : ALU_rr<0b0100100, 0b111, "bfp">, Sched<[]>;
@@ -292,18 +332,12 @@ def SHFLI : RVBShfl_ri<0b000010, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>;
def UNSHFLI : RVBShfl_ri<0b000010, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>;
} // Predicates = [HasStdExtZbp]
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def ADDIWU : RVBALUW_ri<0b100, "addiwu">, Sched<[]>;
-def SLLIUW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slliu.w">, Sched<[]>;
-def ADDWU : ALUW_rr<0b0000101, 0b000, "addwu">, Sched<[]>;
-def SUBWU : ALUW_rr<0b0100101, 0b000, "subwu">, Sched<[]>;
-def ADDUW : ALUW_rr<0b0000100, 0b000, "addu.w">, Sched<[]>;
-def SUBUW : ALUW_rr<0b0100100, 0b000, "subu.w">, Sched<[]>;
-} // Predicates = [HasStdExtZbb, IsRV64]
-
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def SLOW : ALUW_rr<0b0010000, 0b001, "slow">, Sched<[]>;
-def SROW : ALUW_rr<0b0010000, 0b101, "srow">, Sched<[]>;
+let Predicates = [HasStdExtZba, IsRV64] in {
+def SLLIUW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slli.uw">, Sched<[]>;
+def ADDUW : ALUW_rr<0b0000100, 0b000, "add.uw">, Sched<[]>;
+def SH1ADDUW : ALUW_rr<0b0010000, 0b010, "sh1add.uw">, Sched<[]>;
+def SH2ADDUW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">, Sched<[]>;
+def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, Sched<[]>;
} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
@@ -312,10 +346,10 @@ def RORW : ALUW_rr<0b0110000, 0b101, "rorw">, Sched<[]>;
} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
let Predicates = [HasStdExtZbs, IsRV64] in {
-def SBCLRW : ALUW_rr<0b0100100, 0b001, "sbclrw">, Sched<[]>;
-def SBSETW : ALUW_rr<0b0010100, 0b001, "sbsetw">, Sched<[]>;
-def SBINVW : ALUW_rr<0b0110100, 0b001, "sbinvw">, Sched<[]>;
-def SBEXTW : ALUW_rr<0b0100100, 0b101, "sbextw">, Sched<[]>;
+def BCLRW : ALUW_rr<0b0100100, 0b001, "bclrw">, Sched<[]>;
+def BSETW : ALUW_rr<0b0010100, 0b001, "bsetw">, Sched<[]>;
+def BINVW : ALUW_rr<0b0110100, 0b001, "binvw">, Sched<[]>;
+def BEXTW : ALUW_rr<0b0100100, 0b101, "bextw">, Sched<[]>;
} // Predicates = [HasStdExtZbs, IsRV64]
let Predicates = [HasStdExtZbp, IsRV64] in {
@@ -323,20 +357,19 @@ def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>;
def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>;
} // Predicates = [HasStdExtZbp, IsRV64]
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def SLOIW : RVBShiftW_ri<0b0010000, 0b001, OPC_OP_IMM_32, "sloiw">, Sched<[]>;
-def SROIW : RVBShiftW_ri<0b0010000, 0b101, OPC_OP_IMM_32, "sroiw">, Sched<[]>;
-} // Predicates = [HasStdExtZbb, IsRV64]
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
+} // Predicates = [HasStdExtZbp, IsRV64]
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, Sched<[]>;
let Predicates = [HasStdExtZbs, IsRV64] in {
-def SBCLRIW : RVBShiftW_ri<0b0100100, 0b001, OPC_OP_IMM_32, "sbclriw">,
+def BCLRIW : RVBShiftW_ri<0b0100100, 0b001, OPC_OP_IMM_32, "bclriw">,
Sched<[]>;
-def SBSETIW : RVBShiftW_ri<0b0010100, 0b001, OPC_OP_IMM_32, "sbsetiw">,
+def BSETIW : RVBShiftW_ri<0b0010100, 0b001, OPC_OP_IMM_32, "bsetiw">,
Sched<[]>;
-def SBINVIW : RVBShiftW_ri<0b0110100, 0b001, OPC_OP_IMM_32, "sbinviw">,
+def BINVIW : RVBShiftW_ri<0b0110100, 0b001, OPC_OP_IMM_32, "binviw">,
Sched<[]>;
} // Predicates = [HasStdExtZbs, IsRV64]
@@ -359,34 +392,77 @@ def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0011011>,
"clzw">, Sched<[]>;
def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0011011>,
"ctzw">, Sched<[]>;
-def PCNTW : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0011011>,
- "pcntw">, Sched<[]>;
+def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0011011>,
+ "cpopw">, Sched<[]>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbc, IsRV64] in {
-def CLMULW : ALUW_rr<0b0000101, 0b001, "clmulw">, Sched<[]>;
-def CLMULRW : ALUW_rr<0b0000101, 0b010, "clmulrw">, Sched<[]>;
-def CLMULHW : ALUW_rr<0b0000101, 0b011, "clmulhw">, Sched<[]>;
-} // Predicates = [HasStdExtZbc, IsRV64]
-
let Predicates = [HasStdExtZbp, IsRV64] in {
def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>;
def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>;
} // Predicates = [HasStdExtZbp, IsRV64]
let Predicates = [HasStdExtZbe, IsRV64] in {
-def BDEPW : ALUW_rr<0b0100100, 0b110, "bdepw">, Sched<[]>;
-def BEXTW : ALUW_rr<0b0000100, 0b110, "bextw">, Sched<[]>;
+// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with
+// bextw in the 0.93 spec.
+def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>;
+def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>;
} // Predicates = [HasStdExtZbe, IsRV64]
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+let Predicates = [HasStdExtZbp, IsRV64] in {
def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>;
def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>;
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+} // Predicates = [HasStdExtZbp, IsRV64]
let Predicates = [HasStdExtZbf, IsRV64] in
def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">, Sched<[]>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def ZEXTH_RV32 : RVInstR<0b0000100, 0b100, OPC_OP, (outs GPR:$rd),
+ (ins GPR:$rs1), "zext.h", "$rd, $rs1">, Sched<[]> {
+ let rs2 = 0b00000;
+}
+} // Predicates = [HasStdExtZbbOrZbp, IsRV32]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd),
+ (ins GPR:$rs1), "zext.h", "$rd, $rs1">, Sched<[]> {
+ let rs2 = 0b00000;
+}
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
+// We treat rev8 and orc.b as standalone instructions even though they use a
+// portion of the encodings for grevi and gorci. This allows us to support only
+// those encodings when only Zbb is enabled. We do this even when grevi and
+// gorci are available with Zbp. Trying to use 'HasStdExtZbb, NotHasStdExtZbp'
+// causes diagnostics to suggest that Zbp rather than Zbb is required for rev8
+// or gorci. Since Zbb is closer to being finalized than Zbp this will be
+// misleading to users.
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def REV8_RV32 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
+ "rev8", "$rd, $rs1">, Sched<[]> {
+ let imm12 = { 0b01101, 0b0011000 };
+}
+} // Predicates = [HasStdExtZbbOrZbp, IsRV32]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def REV8_RV64 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
+ "rev8", "$rd, $rs1">, Sched<[]> {
+ let imm12 = { 0b01101, 0b0111000 };
+}
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def ORCB : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
+ "orc.b", "$rd, $rs1">, Sched<[]> {
+ let imm12 = { 0b00101, 0b0000111 };
+}
+} // Predicates = [HasStdExtZbbOrZbp]
+
//===----------------------------------------------------------------------===//
// Future compressed instructions
//===----------------------------------------------------------------------===//
@@ -415,208 +491,123 @@ def C_NOT : RVBInstC<0b00, "c.not">, Sched<[]>;
def C_NEG : RVBInstC<0b01, "c.neg">, Sched<[]>;
} // DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtC]
-let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] in
+let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtZba, HasStdExtC, IsRV64] in
def C_ZEXTW : RVBInstC<0b10, "c.zext.w">, Sched<[]>;
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZbb, IsRV32] in {
-def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF)>;
-def : InstAlias<"zext.h $rd, $rs", (PACK GPR:$rd, GPR:$rs, X0)>;
-} // Predicates = [HasStdExtZbb, IsRV32]
-
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF)>;
-def : InstAlias<"zext.h $rd, $rs", (PACKW GPR:$rd, GPR:$rs, X0)>;
-def : InstAlias<"zext.w $rd, $rs", (PACK GPR:$rd, GPR:$rs, X0)>;
-} // Predicates = [HasStdExtZbb, IsRV64]
+let Predicates = [HasStdExtZba, IsRV64] in {
+// NOTE: The 0.93 spec shows zext.w as an alias of pack/packw. It has been
+// changed to add.uw in a draft after 0.94.
+def : InstAlias<"zext.w $rd, $rs", (ADDUW GPR:$rd, GPR:$rs, X0)>;
+}
-let Predicates = [HasStdExtZbbOrZbp] in {
-def : InstAlias<"rev.p $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00001)>,
- Sched<[]>;
-def : InstAlias<"rev2.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00010)>,
- Sched<[]>;
-def : InstAlias<"rev.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00011)>,
- Sched<[]>;
-def : InstAlias<"rev4.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00100)>,
- Sched<[]>;
-def : InstAlias<"rev2.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00110)>,
- Sched<[]>;
-def : InstAlias<"rev.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00111)>,
- Sched<[]>;
-def : InstAlias<"rev8.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01000)>,
- Sched<[]>;
-def : InstAlias<"rev4.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01100)>,
- Sched<[]>;
-def : InstAlias<"rev2.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01110)>,
- Sched<[]>;
-def : InstAlias<"rev.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01111)>,
- Sched<[]>;
-
-def : InstAlias<"zip.n $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0001)>,
- Sched<[]>;
-def : InstAlias<"unzip.n $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0001)>,
- Sched<[]>;
-def : InstAlias<"zip2.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0010)>,
- Sched<[]>;
-def : InstAlias<"unzip2.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0010)>,
- Sched<[]>;
-def : InstAlias<"zip.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0011)>,
- Sched<[]>;
-def : InstAlias<"unzip.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0011)>,
- Sched<[]>;
-def : InstAlias<"zip4.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0100)>,
- Sched<[]>;
-def : InstAlias<"unzip4.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0100)>,
- Sched<[]>;
-def : InstAlias<"zip2.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0110)>,
- Sched<[]>;
-def : InstAlias<"unzip2.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0110)>,
- Sched<[]>;
-def : InstAlias<"zip.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0111)>,
- Sched<[]>;
-def : InstAlias<"unzip.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0111)>,
- Sched<[]>;
-
-def : InstAlias<"orc.p $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00001)>,
- Sched<[]>;
-def : InstAlias<"orc2.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00010)>,
- Sched<[]>;
-def : InstAlias<"orc.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00011)>,
- Sched<[]>;
-def : InstAlias<"orc4.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00100)>,
- Sched<[]>;
-def : InstAlias<"orc2.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00110)>,
- Sched<[]>;
-def : InstAlias<"orc.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00111)>,
- Sched<[]>;
-def : InstAlias<"orc8.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01000)>,
- Sched<[]>;
-def : InstAlias<"orc4.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01100)>,
- Sched<[]>;
-def : InstAlias<"orc2.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01110)>,
- Sched<[]>;
-def : InstAlias<"orc.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01111)>,
- Sched<[]>;
-} // Predicates = [HasStdExtZbbOrZbp]
+let Predicates = [HasStdExtZbp] in {
+def : InstAlias<"rev.p $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00001)>;
+def : InstAlias<"rev2.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00010)>;
+def : InstAlias<"rev.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00011)>;
+def : InstAlias<"rev4.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00100)>;
+def : InstAlias<"rev2.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00110)>;
+def : InstAlias<"rev.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00111)>;
+def : InstAlias<"rev8.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01000)>;
+def : InstAlias<"rev4.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01100)>;
+def : InstAlias<"rev2.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01110)>;
+def : InstAlias<"rev.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01111)>;
+
+def : InstAlias<"zip.n $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0001)>;
+def : InstAlias<"unzip.n $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0001)>;
+def : InstAlias<"zip2.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0010)>;
+def : InstAlias<"unzip2.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0010)>;
+def : InstAlias<"zip.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0011)>;
+def : InstAlias<"unzip.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0011)>;
+def : InstAlias<"zip4.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0100)>;
+def : InstAlias<"unzip4.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0100)>;
+def : InstAlias<"zip2.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0110)>;
+def : InstAlias<"unzip2.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0110)>;
+def : InstAlias<"zip.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0111)>;
+def : InstAlias<"unzip.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0111)>;
+
+def : InstAlias<"orc.p $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00001)>;
+def : InstAlias<"orc2.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00010)>;
+def : InstAlias<"orc.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00011)>;
+def : InstAlias<"orc4.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00100)>;
+def : InstAlias<"orc2.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00110)>;
+// orc.b is considered an instruction rather than an alias.
+def : InstAlias<"orc8.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01000)>;
+def : InstAlias<"orc4.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01100)>;
+def : InstAlias<"orc2.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01110)>;
+def : InstAlias<"orc.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01111)>;
+} // Predicates = [HasStdExtZbp]
-let Predicates = [HasStdExtZbbOrZbp, IsRV32] in {
-def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b10000)>, Sched<[]>;
-def : InstAlias<"rev8 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11000)>, Sched<[]>;
-def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11100)>, Sched<[]>;
-def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11110)>, Sched<[]>;
-def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11111)>, Sched<[]>;
-
-def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1000)>,
- Sched<[]>;
-def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1000)>,
- Sched<[]>;
-def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1100)>,
- Sched<[]>;
-def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>,
- Sched<[]>;
-def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1110)>,
- Sched<[]>;
-def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>,
- Sched<[]>;
-def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1111)>,
- Sched<[]>;
-def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1111)>,
- Sched<[]>;
-
-def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>, Sched<[]>;
-def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11000)>, Sched<[]>;
-def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11100)>, Sched<[]>;
-def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11110)>, Sched<[]>;
-def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11111)>, Sched<[]>;
-} // Predicates = [HasStdExtZbbOrZbp, IsRV32]
+let Predicates = [HasStdExtZbp, IsRV32] in {
+def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b10000)>;
+// rev8 is considered an instruction rather than an alias.
+def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11100)>;
+def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11110)>;
+def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11111)>;
+
+def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1000)>;
+def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1000)>;
+def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1100)>;
+def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>;
+def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1110)>;
+def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>;
+def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1111)>;
+def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1111)>;
+
+def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>;
+def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11000)>;
+def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11100)>;
+def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11110)>;
+def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11111)>;
+} // Predicates = [HasStdExtZbp, IsRV32]
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
-def : InstAlias<"rev16.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b010000)>,
- Sched<[]>;
-def : InstAlias<"rev8.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011000)>,
- Sched<[]>;
-def : InstAlias<"rev4.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011100)>,
- Sched<[]>;
-def : InstAlias<"rev2.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011110)>,
- Sched<[]>;
-def : InstAlias<"rev.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011111)>,
- Sched<[]>;
-def : InstAlias<"rev32 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b100000)>,
- Sched<[]>;
-def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b110000)>,
- Sched<[]>;
-def : InstAlias<"rev8 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111000)>,
- Sched<[]>;
-def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111100)>,
- Sched<[]>;
-def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111110)>,
- Sched<[]>;
-def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111111)>,
- Sched<[]>;
-
-def : InstAlias<"zip8.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01000)>,
- Sched<[]>;
-def : InstAlias<"unzip8.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01000)>,
- Sched<[]>;
-def : InstAlias<"zip4.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01100)>,
- Sched<[]>;
-def : InstAlias<"unzip4.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01100)>,
- Sched<[]>;
-def : InstAlias<"zip2.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01110)>,
- Sched<[]>;
-def : InstAlias<"unzip2.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01110)>,
- Sched<[]>;
-def : InstAlias<"zip.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01111)>,
- Sched<[]>;
-def : InstAlias<"unzip.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01111)>,
- Sched<[]>;
-def : InstAlias<"zip16 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b10000)>,
- Sched<[]>;
-def : InstAlias<"unzip16 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b10000)>,
- Sched<[]>;
-def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11000)>,
- Sched<[]>;
-def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11000)>,
- Sched<[]>;
-def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11100)>,
- Sched<[]>;
-def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11100)>,
- Sched<[]>;
-def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11110)>,
- Sched<[]>;
-def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11110)>,
- Sched<[]>;
-def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11111)>,
- Sched<[]>;
-def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11111)>,
- Sched<[]>;
-
-def : InstAlias<"orc16.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b010000)>,
- Sched<[]>;
-def : InstAlias<"orc8.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011000)>,
- Sched<[]>;
-def : InstAlias<"orc4.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011100)>,
- Sched<[]>;
-def : InstAlias<"orc2.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011110)>,
- Sched<[]>;
-def : InstAlias<"orc.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011111)>,
- Sched<[]>;
-def : InstAlias<"orc32 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b100000)>,
- Sched<[]>;
-def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b110000)>,
- Sched<[]>;
-def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111000)>,
- Sched<[]>;
-def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111100)>,
- Sched<[]>;
-def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111110)>,
- Sched<[]>;
-def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111111)>,
- Sched<[]>;
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : InstAlias<"rev16.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b010000)>;
+def : InstAlias<"rev8.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011000)>;
+def : InstAlias<"rev4.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011100)>;
+def : InstAlias<"rev2.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011110)>;
+def : InstAlias<"rev.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011111)>;
+def : InstAlias<"rev32 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b100000)>;
+def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b110000)>;
+// rev8 is considered an instruction rather than an alias.
+def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111100)>;
+def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111110)>;
+def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111111)>;
+
+def : InstAlias<"zip8.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01000)>;
+def : InstAlias<"unzip8.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01000)>;
+def : InstAlias<"zip4.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01100)>;
+def : InstAlias<"unzip4.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01100)>;
+def : InstAlias<"zip2.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01110)>;
+def : InstAlias<"unzip2.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01110)>;
+def : InstAlias<"zip.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01111)>;
+def : InstAlias<"unzip.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01111)>;
+def : InstAlias<"zip16 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b10000)>;
+def : InstAlias<"unzip16 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b10000)>;
+def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11000)>;
+def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11000)>;
+def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11100)>;
+def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11100)>;
+def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11110)>;
+def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11110)>;
+def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11111)>;
+def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11111)>;
+
+def : InstAlias<"orc16.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b010000)>;
+def : InstAlias<"orc8.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011000)>;
+def : InstAlias<"orc4.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011100)>;
+def : InstAlias<"orc2.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011110)>;
+def : InstAlias<"orc.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011111)>;
+def : InstAlias<"orc32 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b100000)>;
+def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b110000)>;
+def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111000)>;
+def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111100)>;
+def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111110)>;
+def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111111)>;
+} // Predicates = [HasStdExtZbp, IsRV64]
//===----------------------------------------------------------------------===//
// Compressed Instruction patterns
@@ -628,22 +619,14 @@ def : CompressPat<(SUB GPRC:$rs1, X0, GPRC:$rs1),
(C_NEG GPRC:$rs1)>;
} // Predicates = [HasStdExtZbproposedc, HasStdExtC]
-let Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] in {
-def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
+let Predicates = [HasStdExtZbproposedc, HasStdExtZba, HasStdExtC, IsRV64] in {
+def : CompressPat<(ADDUW GPRC:$rs1, GPRC:$rs1, X0),
(C_ZEXTW GPRC:$rs1)>;
} // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64]
//===----------------------------------------------------------------------===//
// Codegen patterns
//===----------------------------------------------------------------------===//
-def SLOIPat : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
-def SROIPat : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
-def RORIPat : ComplexPattern<XLenVT, 2, "SelectRORI", [rotl]>;
-def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
-def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
-def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
-def RORIWPat : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
-def FSRIWPat : ComplexPattern<i64, 3, "SelectFSRIW", [sext_inreg]>;
let Predicates = [HasStdExtZbbOrZbp] in {
def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
@@ -651,221 +634,180 @@ def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>;
def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbbOrZbp]
-let Predicates = [HasStdExtZbb] in {
-def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1),
- (SLO GPR:$rs1, GPR:$rs2)>;
-def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1),
- (SRO GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbb]
-
let Predicates = [HasStdExtZbbOrZbp] in {
def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
-def : Pat<(fshl GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
-def : Pat<(fshr GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbbOrZbp]
-let Predicates = [HasStdExtZbs, IsRV32] in
-def : Pat<(and (xor (shl 1, (and GPR:$rs2, 31)), -1), GPR:$rs1),
- (SBCLR GPR:$rs1, GPR:$rs2)>;
-let Predicates = [HasStdExtZbs, IsRV64] in
-def : Pat<(and (xor (shl 1, (and GPR:$rs2, 63)), -1), GPR:$rs1),
- (SBCLR GPR:$rs1, GPR:$rs2)>;
-
-let Predicates = [HasStdExtZbs] in
-def : Pat<(and (rotl -2, GPR:$rs2), GPR:$rs1), (SBCLR GPR:$rs1, GPR:$rs2)>;
-
-let Predicates = [HasStdExtZbs, IsRV32] in
-def : Pat<(or (shl 1, (and GPR:$rs2, 31)), GPR:$rs1),
- (SBSET GPR:$rs1, GPR:$rs2)>;
-let Predicates = [HasStdExtZbs, IsRV64] in
-def : Pat<(or (shl 1, (and GPR:$rs2, 63)), GPR:$rs1),
- (SBSET GPR:$rs1, GPR:$rs2)>;
-
-let Predicates = [HasStdExtZbs, IsRV32] in
-def : Pat<(xor (shl 1, (and GPR:$rs2, 31)), GPR:$rs1),
- (SBINV GPR:$rs1, GPR:$rs2)>;
-let Predicates = [HasStdExtZbs, IsRV64] in
-def : Pat<(xor (shl 1, (and GPR:$rs2, 63)), GPR:$rs1),
- (SBINV GPR:$rs1, GPR:$rs2)>;
-
-let Predicates = [HasStdExtZbs, IsRV32] in
-def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 31)), 1),
- (SBEXT GPR:$rs1, GPR:$rs2)>;
-
-let Predicates = [HasStdExtZbs, IsRV64] in
-def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 63)), 1),
- (SBEXT GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbs] in {
+def : Pat<(and (not (shiftop<shl> 1, GPR:$rs2)), GPR:$rs1),
+ (BCLR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(and (rotl -2, GPR:$rs2), GPR:$rs1), (BCLR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (shiftop<shl> 1, GPR:$rs2), GPR:$rs1),
+ (BSET GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor (shiftop<shl> 1, GPR:$rs2), GPR:$rs1),
+ (BINV GPR:$rs1, GPR:$rs2)>;
+def : Pat<(and (shiftop<srl> GPR:$rs1, GPR:$rs2), 1),
+ (BEXT GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(shiftop<shl> 1, GPR:$rs2),
+ (BSET X0, GPR:$rs2)>;
+
+def : Pat<(and GPR:$rs1, BCLRMask:$mask),
+ (BCLRI GPR:$rs1, (BCLRXForm imm:$mask))>;
+def : Pat<(or GPR:$rs1, BSETINVMask:$mask),
+ (BSETI GPR:$rs1, (BSETINVXForm imm:$mask))>;
+def : Pat<(xor GPR:$rs1, BSETINVMask:$mask),
+ (BINVI GPR:$rs1, (BSETINVXForm imm:$mask))>;
-let Predicates = [HasStdExtZbb] in {
-def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
- (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
-def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
- (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
-} // Predicates = [HasStdExtZbb]
+def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)),
+ (BEXTI GPR:$rs1, uimmlog2xlen:$shamt)>;
+}
-// There's no encoding for roli in the current version of the 'B' extension
-// (v0.92) as it can be implemented with rori by negating the immediate.
-// For this reason we pattern-match only against rori[w].
-let Predicates = [HasStdExtZbbOrZbp] in
-def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt),
+// There's no encoding for roli in the the 'B' extension as it can be
+// implemented with rori by negating the immediate.
+let Predicates = [HasStdExtZbbOrZbp] in {
+def : Pat<(rotr GPR:$rs1, uimmlog2xlen:$shamt),
(RORI GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
+ (RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
+}
-// We don't pattern-match sbclri[w], sbseti[w], sbinvi[w] because they are
-// pattern-matched by simple andi, ori, and xori.
-let Predicates = [HasStdExtZbs] in
-def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)),
- (SBEXTI GPR:$rs1, uimmlog2xlen:$shamt)>;
+def riscv_grevi : SDNode<"RISCVISD::GREVI", SDTIntBinOp, []>;
+def riscv_greviw : SDNode<"RISCVISD::GREVIW", SDTIntBinOp, []>;
+def riscv_gorci : SDNode<"RISCVISD::GORCI", SDTIntBinOp, []>;
+def riscv_gorciw : SDNode<"RISCVISD::GORCIW", SDTIntBinOp, []>;
-let Predicates = [HasStdExtZbp, IsRV32] in {
-def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1),
- (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))),
- (GORCI GPR:$rs1, (i32 1))>;
-def : Pat<(or (or (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333)), GPR:$rs1),
- (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC))),
- (GORCI GPR:$rs1, (i32 2))>;
-def : Pat<(or (or (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F)), GPR:$rs1),
- (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0))),
- (GORCI GPR:$rs1, (i32 4))>;
-def : Pat<(or (or (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF)), GPR:$rs1),
- (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00))),
- (GORCI GPR:$rs1, (i32 8))>;
-def : Pat<(or (or (srl GPR:$rs1, (i32 16)), GPR:$rs1),
- (shl GPR:$rs1, (i32 16))),
- (GORCI GPR:$rs1, (i32 16))>;
-} // Predicates = [HasStdExtZbp, IsRV32]
+let Predicates = [HasStdExtZbp] in {
+def : Pat<(riscv_grevi GPR:$rs1, timm:$shamt), (GREVI GPR:$rs1, timm:$shamt)>;
+def : Pat<(riscv_gorci GPR:$rs1, timm:$shamt), (GORCI GPR:$rs1, timm:$shamt)>;
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA))),
- (GORCI GPR:$rs1, (i64 1))>;
-def : Pat<(or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC))),
- (GORCI GPR:$rs1, (i64 2))>;
-def : Pat<(or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0))),
- (GORCI GPR:$rs1, (i64 4))>;
-def : Pat<(or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00))),
- (GORCI GPR:$rs1, (i64 8))>;
-def : Pat<(or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000))),
- (GORCI GPR:$rs1, (i64 16))>;
-def : Pat<(or (or (srl GPR:$rs1, (i64 32)), GPR:$rs1),
- (shl GPR:$rs1, (i64 32))),
- (GORCI GPR:$rs1, (i64 32))>;
-} // Predicates = [HasStdExtZbp, IsRV64]
+// We treat orc.b as a separate instruction, so match it directly.
+def : Pat<(riscv_gorci GPR:$rs1, (XLenVT 7)), (ORCB GPR:$rs1)>;
+} // Predicates = [HasStdExtZbp]
let Predicates = [HasStdExtZbp, IsRV32] in {
-def : Pat<(or (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA)),
- (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555))),
- (GREVI GPR:$rs1, (i32 1))>;
-def : Pat<(or (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC)),
- (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333))),
- (GREVI GPR:$rs1, (i32 2))>;
-def : Pat<(or (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0)),
- (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F))),
- (GREVI GPR:$rs1, (i32 4))>;
-def : Pat<(or (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00)),
- (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF))),
- (GREVI GPR:$rs1, (i32 8))>;
-def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>;
-def : Pat<(or (shl GPR:$rs1, (i32 16)), (srl GPR:$rs1, (i32 16))),
- (GREVI GPR:$rs1, (i32 16))>;
-def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
-def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>;
-def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>;
+def : Pat<(rotr (riscv_grevi GPR:$rs1, (i32 24)), (i32 16)), (GREVI GPR:$rs1, 8)>;
+def : Pat<(rotl (riscv_grevi GPR:$rs1, (i32 24)), (i32 16)), (GREVI GPR:$rs1, 8)>;
+
+// We treat rev8 as a separate instruction, so match it directly.
+def : Pat<(riscv_grevi GPR:$rs1, (i32 24)), (REV8_RV32 GPR:$rs1)>;
} // Predicates = [HasStdExtZbp, IsRV32]
let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA)),
- (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555))),
- (GREVI GPR:$rs1, (i64 1))>;
-def : Pat<(or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC)),
- (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333))),
- (GREVI GPR:$rs1, (i64 2))>;
-def : Pat<(or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0)),
- (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F))),
- (GREVI GPR:$rs1, (i64 4))>;
-def : Pat<(or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00)),
- (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF))),
- (GREVI GPR:$rs1, (i64 8))>;
-def : Pat<(or (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000)),
- (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF))),
- (GREVI GPR:$rs1, (i64 16))>;
-def : Pat<(or (shl GPR:$rs1, (i64 32)), (srl GPR:$rs1, (i64 32))),
- (GREVI GPR:$rs1, (i64 32))>;
-def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
-def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
-def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
+// We treat rev8 as a separate instruction, so match it directly.
+def : Pat<(riscv_grevi GPR:$rs1, (i64 56)), (REV8_RV64 GPR:$rs1)>;
} // Predicates = [HasStdExtZbp, IsRV64]
let Predicates = [HasStdExtZbt] in {
-def : Pat<(or (and (xor GPR:$rs2, -1), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)),
+def : Pat<(or (and (not GPR:$rs2), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)),
(CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(riscv_selectcc GPR:$rs2, (XLenVT 0), (XLenVT 17), GPR:$rs3, GPR:$rs1),
+
+def : Pat<(select (XLenVT (setne GPR:$rs2, 0)), GPR:$rs1, GPR:$rs3),
(CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(fshl GPR:$rs1, GPR:$rs2, GPR:$rs3),
- (FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(fshr GPR:$rs1, GPR:$rs2, GPR:$rs3),
- (FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(fshr GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt),
- (FSRI GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>;
+def : Pat<(select (XLenVT (seteq GPR:$rs2, 0)), GPR:$rs3, GPR:$rs1),
+ (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(select (XLenVT (setne GPR:$x, simm12_plus1:$y)), GPR:$rs1, GPR:$rs3),
+ (CMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>;
+def : Pat<(select (XLenVT (seteq GPR:$x, simm12_plus1:$y)), GPR:$rs3, GPR:$rs1),
+ (CMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setne GPR:$x, GPR:$y)), GPR:$rs1, GPR:$rs3),
+ (CMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (seteq GPR:$x, GPR:$y)), GPR:$rs3, GPR:$rs1),
+ (CMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setuge GPR:$x, GPR:$y)), GPR:$rs3, GPR:$rs1),
+ (CMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setule GPR:$y, GPR:$x)), GPR:$rs3, GPR:$rs1),
+ (CMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setge GPR:$x, GPR:$y)), GPR:$rs3, GPR:$rs1),
+ (CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setle GPR:$y, GPR:$x)), GPR:$rs3, GPR:$rs1),
+ (CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select GPR:$rs2, GPR:$rs1, GPR:$rs3),
+ (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+} // Predicates = [HasStdExtZbt]
+
+// fshl and fshr concatenate their operands in the same order. fsr and fsl
+// instruction use different orders. fshl will return its first operand for
+// shift of zero, fshr will return its second operand. fsl and fsr both return
+// $rs1 so the patterns need to have different operand orders.
+//
+// fshl and fshr only read the lower log2(xlen) bits of the shift amount, but
+// fsl/fsr instructions read log2(xlen)+1 bits. DAG combine may have removed
+// an AND mask on the shift amount that we need to add back to avoid a one in
+// the extra bit.
+// FIXME: If we can prove that the extra bit in the shift amount is zero, we
+// don't need this mask.
+let Predicates = [HasStdExtZbt, IsRV32] in {
+def : Pat<(fshl GPR:$rs1, GPR:$rs3, GPR:$rs2),
+ (FSL GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
+def : Pat<(fshr GPR:$rs3, GPR:$rs1, GPR:$rs2),
+ (FSR GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
+}
+let Predicates = [HasStdExtZbt, IsRV64] in {
+def : Pat<(fshl GPR:$rs1, GPR:$rs3, GPR:$rs2),
+ (FSL GPR:$rs1, (ANDI GPR:$rs2, 63), GPR:$rs3)>;
+def : Pat<(fshr GPR:$rs3, GPR:$rs1, GPR:$rs2),
+ (FSR GPR:$rs1, (ANDI GPR:$rs2, 63), GPR:$rs3)>;
+}
+let Predicates = [HasStdExtZbt] in {
+def : Pat<(fshr GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
+ (FSRI GPR:$rs1, GPR:$rs3, uimmlog2xlen:$shamt)>;
+// We can use FSRI for fshl by immediate if we subtract the immediate from
+// XLen and swap the operands.
+def : Pat<(fshl GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
+ (FSRI GPR:$rs1, GPR:$rs3, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
} // Predicates = [HasStdExtZbt]
let Predicates = [HasStdExtZbb] in {
def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>;
def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>;
-def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>;
+def : Pat<(ctpop GPR:$rs1), (CPOP GPR:$rs1)>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbb, IsRV32] in
-def : Pat<(sra (shl GPR:$rs1, (i32 24)), (i32 24)), (SEXTB GPR:$rs1)>;
-let Predicates = [HasStdExtZbb, IsRV64] in
-def : Pat<(sra (shl GPR:$rs1, (i64 56)), (i64 56)), (SEXTB GPR:$rs1)>;
-
-let Predicates = [HasStdExtZbb, IsRV32] in
-def : Pat<(sra (shl GPR:$rs1, (i32 16)), (i32 16)), (SEXTH GPR:$rs1)>;
-let Predicates = [HasStdExtZbb, IsRV64] in
-def : Pat<(sra (shl GPR:$rs1, (i64 48)), (i64 48)), (SEXTH GPR:$rs1)>;
+let Predicates = [HasStdExtZbb] in {
+def : Pat<(sext_inreg GPR:$rs1, i8), (SEXTB GPR:$rs1)>;
+def : Pat<(sext_inreg GPR:$rs1, i16), (SEXTH GPR:$rs1)>;
+}
let Predicates = [HasStdExtZbb] in {
def : Pat<(smin GPR:$rs1, GPR:$rs2), (MIN GPR:$rs1, GPR:$rs2)>;
-def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 20), GPR:$rs1, GPR:$rs2),
- (MIN GPR:$rs1, GPR:$rs2)>;
def : Pat<(smax GPR:$rs1, GPR:$rs2), (MAX GPR:$rs1, GPR:$rs2)>;
-def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 20), GPR:$rs1, GPR:$rs2),
- (MAX GPR:$rs1, GPR:$rs2)>;
def : Pat<(umin GPR:$rs1, GPR:$rs2), (MINU GPR:$rs1, GPR:$rs2)>;
-def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 12), GPR:$rs1, GPR:$rs2),
- (MINU GPR:$rs1, GPR:$rs2)>;
def : Pat<(umax GPR:$rs1, GPR:$rs2), (MAXU GPR:$rs1, GPR:$rs2)>;
-def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
- (MAXU GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
+let Predicates = [HasStdExtZbb, IsRV32] in {
+def : Pat<(bswap GPR:$rs1), (REV8_RV32 GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV32]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : Pat<(bswap GPR:$rs1), (REV8_RV64 GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbp, IsRV32] in
def : Pat<(or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16))),
(PACK GPR:$rs1, GPR:$rs2)>;
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+let Predicates = [HasStdExtZbp, IsRV64] in
def : Pat<(or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32))),
(PACK GPR:$rs1, GPR:$rs2)>;
-let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
+let Predicates = [HasStdExtZbp, IsRV32] in
def : Pat<(or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16))),
(PACKU GPR:$rs1, GPR:$rs2)>;
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+let Predicates = [HasStdExtZbp, IsRV64] in
def : Pat<(or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32))),
(PACKU GPR:$rs1, GPR:$rs2)>;
-let Predicates = [HasStdExtZbbOrZbp] in
+let Predicates = [HasStdExtZbp] in
def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFF00),
(and GPR:$rs1, 0x00FF)),
(PACKH GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
+def : Pat<(and GPR:$rs, 0x0000FFFF), (ZEXTH_RV32 GPR:$rs)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def : Pat<(and GPR:$rs, 0x000000000000FFFF), (ZEXTH_RV64 GPR:$rs)>;
+}
+
let Predicates = [HasStdExtZbp, IsRV32] in {
def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)),
(and GPR:$rs1, (i32 0xFF0000FF))),
@@ -908,156 +850,115 @@ def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)),
(SHFLI GPR:$rs1, (i64 1))>;
} // Predicates = [HasStdExtZbp, IsRV64]
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)),
- (ADDIWU GPR:$rs, simm12:$simm12)>;
-def : Pat<(SLLIUWPat GPR:$rs1, uimmlog2xlen:$shamt),
- (SLLIUW GPR:$rs1, uimmlog2xlen:$shamt)>;
-def : Pat<(and (add GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
- (ADDWU GPR:$rs1, GPR:$rs2)>;
-def : Pat<(and (sub GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
- (SUBWU GPR:$rs1, GPR:$rs2)>;
-def : Pat<(add GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
+let Predicates = [HasStdExtZba] in {
+def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), GPR:$rs2),
+ (SH1ADD GPR:$rs1, GPR:$rs2)>;
+def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), GPR:$rs2),
+ (SH2ADD GPR:$rs1, GPR:$rs2)>;
+def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), GPR:$rs2),
+ (SH3ADD GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZba]
+
+let Predicates = [HasStdExtZba, IsRV64] in {
+def : Pat<(SLLIUWPat GPR:$rs1, uimm5:$shamt),
+ (SLLIUW GPR:$rs1, uimm5:$shamt)>;
+def : Pat<(shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt),
+ (SLLIUW GPR:$rs1, uimm5:$shamt)>;
+def : Pat<(add (and GPR:$rs1, (i64 0xFFFFFFFF)), GPR:$rs2),
(ADDUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sub GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
- (SUBUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1),
- (SLOW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1),
- (SROW GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbb, IsRV64]
+def : Pat<(and GPR:$rs, 0x00000000FFFFFFFF), (ADDUW GPR:$rs, X0)>;
+
+def : Pat<(add (shl (and GPR:$rs1, (i64 0xFFFFFFFF)), (XLenVT 1)), GPR:$rs2),
+ (SH1ADDUW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(add (shl (and GPR:$rs1, (i64 0xFFFFFFFF)), (XLenVT 2)), GPR:$rs2),
+ (SH2ADDUW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(add (shl (and GPR:$rs1, (i64 0xFFFFFFFF)), (XLenVT 3)), GPR:$rs2),
+ (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 1)), GPR:$rs2),
+ (SH1ADDUW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 2)), GPR:$rs2),
+ (SH2ADDUW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 3)), GPR:$rs2),
+ (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZba, IsRV64]
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
-def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)),
- (riscv_srlw (assertsexti32 GPR:$rs1),
- (sub (i64 0), (assertsexti32 GPR:$rs2)))),
+def : Pat<(riscv_rolw GPR:$rs1, GPR:$rs2),
(ROLW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1),
- (sub (i64 0), (assertsexti32 GPR:$rs2))),
- (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2))),
+def : Pat<(riscv_rorw GPR:$rs1, GPR:$rs2),
(RORW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_rorw GPR:$rs1, uimm5:$rs2),
+ (RORIW GPR:$rs1, uimm5:$rs2)>;
+def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
+ (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
let Predicates = [HasStdExtZbs, IsRV64] in {
-def : Pat<(and (xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)), -1),
- (assertsexti32 GPR:$rs1)),
- (SBCLRW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (riscv_sllw 1, (assertsexti32 GPR:$rs2)),
- (assertsexti32 GPR:$rs1)),
- (SBSETW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)),
- (assertsexti32 GPR:$rs1)),
- (SBINVW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(and (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)),
- 1),
- (SBEXTW GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbs, IsRV64]
-
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
- (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
-def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
- (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
-} // Predicates = [HasStdExtZbb, IsRV64]
+def : Pat<(and (not (riscv_sllw 1, GPR:$rs2)), (assertsexti32 GPR:$rs1)),
+ (BCLRW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (and (not (riscv_sllw 1, GPR:$rs2)), GPR:$rs1), i32),
+ (BCLRW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (riscv_sllw 1, GPR:$rs2), (assertsexti32 GPR:$rs1)),
+ (BSETW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (or (riscv_sllw 1, GPR:$rs2), GPR:$rs1), i32),
+ (BSETW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor (riscv_sllw 1, GPR:$rs2), (assertsexti32 GPR:$rs1)),
+ (BINVW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (xor (riscv_sllw 1, GPR:$rs2), GPR:$rs1), i32),
+ (BINVW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(and (riscv_srlw GPR:$rs1, GPR:$rs2), 1),
+ (BEXTW GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(riscv_sllw 1, GPR:$rs2),
+ (BSETW X0, GPR:$rs2)>;
+
+def : Pat<(and (assertsexti32 GPR:$rs1), BCLRWMask:$mask),
+ (BCLRIW GPR:$rs1, (BCLRXForm imm:$mask))>;
+def : Pat<(or (assertsexti32 GPR:$rs1), BSETINVWMask:$mask),
+ (BSETIW GPR:$rs1, (BSETINVXForm imm:$mask))>;
+def : Pat<(xor (assertsexti32 GPR:$rs1), BSETINVWMask:$mask),
+ (BINVIW GPR:$rs1, (BSETINVXForm imm:$mask))>;
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
-def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt),
- (RORIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+} // Predicates = [HasStdExtZbs, IsRV64]
let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA))),
- i32),
- (GORCIW GPR:$rs1, (i64 1))>;
-def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC))),
- i32),
- (GORCIW GPR:$rs1, (i64 2))>;
-def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0))),
- i32),
- (GORCIW GPR:$rs1, (i64 4))>;
-def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00))),
- i32),
- (GORCIW GPR:$rs1, (i64 8))>;
-def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF)),
- GPR:$rs1),
- (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000))),
- i32),
- (GORCIW GPR:$rs1, (i64 16))>;
-def : Pat<(sext_inreg (or (or (srl (and GPR:$rs1, (i64 0xFFFF0000)), (i64 16)),
- GPR:$rs1),
- (shl GPR:$rs1, (i64 16))), i32),
- (GORCIW GPR:$rs1, (i64 16))>;
-
-def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA)),
- (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555))),
- i32),
- (GREVIW GPR:$rs1, (i64 1))>;
-def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC)),
- (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333))),
- i32),
- (GREVIW GPR:$rs1, (i64 2))>;
-def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0)),
- (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F))),
- i32),
- (GREVIW GPR:$rs1, (i64 4))>;
-def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00)),
- (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF))),
- i32),
- (GREVIW GPR:$rs1, (i64 8))>;
-def : Pat<(sext_inreg (or (shl GPR:$rs1, (i64 16)),
- (srl (and GPR:$rs1, 0xFFFF0000), (i64 16))), i32),
- (GREVIW GPR:$rs1, (i64 16))>;
-def : Pat<(sra (bswap GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 24))>;
-def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>;
+def : Pat<(riscv_rorw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>;
+def : Pat<(riscv_rolw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>;
+def : Pat<(riscv_greviw GPR:$rs1, timm:$shamt), (GREVIW GPR:$rs1, timm:$shamt)>;
+def : Pat<(riscv_gorciw GPR:$rs1, timm:$shamt), (GORCIW GPR:$rs1, timm:$shamt)>;
} // Predicates = [HasStdExtZbp, IsRV64]
let Predicates = [HasStdExtZbt, IsRV64] in {
-def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
- (i64 0),
- (i64 17),
- (assertsexti32 GPR:$rs1),
- (or (riscv_sllw (assertsexti32 GPR:$rs1),
- (and (assertsexti32 GPR:$rs3), 31)),
- (riscv_srlw (assertsexti32 GPR:$rs2),
- (sub (i64 32),
- (assertsexti32 GPR:$rs3))))),
+def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2),
(FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
- (i64 0),
- (i64 17),
- (assertsexti32 GPR:$rs2),
- (or (riscv_sllw (assertsexti32 GPR:$rs1),
- (sub (i64 32),
- (assertsexti32 GPR:$rs3))),
- (riscv_srlw (assertsexti32 GPR:$rs2),
- (and (assertsexti32 GPR:$rs3), 31)))),
+def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, GPR:$rs2),
(FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(FSRIWPat GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt),
- (FSRIW GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>;
+def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
+ (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>;
+def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
+ (FSRIW GPR:$rs1, GPR:$rs3, (ImmSubFrom32 uimm5:$shamt))>;
} // Predicates = [HasStdExtZbt, IsRV64]
let Predicates = [HasStdExtZbb, IsRV64] in {
def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
(CLZW GPR:$rs1)>;
-// We don't pattern-match CTZW here as it has the same pattern and result as
-// RV64 CTZ
-def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
+// computeKnownBits can't figure out that the and mask on the add result is
+// unnecessary so we need to pattern match it away.
+def : Pat<(and (add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
+ (i64 0xFFFFFFFF)),
+ (CLZW GPR:$rs1)>;
+def : Pat<(cttz (or GPR:$rs1, (i64 0x100000000))),
+ (CTZW GPR:$rs1)>;
+def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (CPOPW GPR:$rs1)>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
-def : Pat<(sext_inreg (or (shl (assertsexti32 GPR:$rs2), (i64 16)),
- (and (assertsexti32 GPR:$rs1), 0x000000000000FFFF)),
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : Pat<(sext_inreg (or (shl GPR:$rs2, (i64 16)),
+ (and GPR:$rs1, 0x000000000000FFFF)),
i32),
(PACKW GPR:$rs1, GPR:$rs2)>;
def : Pat<(or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
- (srl (and (assertsexti32 GPR:$rs1), 0x00000000FFFF0000),
- (i64 16))),
+ (SRLIWPat GPR:$rs1, (i64 16))),
(PACKUW GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+} // Predicates = [HasStdExtZbp, IsRV64]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index f68767847ade..30df455c1927 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -140,6 +140,7 @@ def uimm8_lsb000 : Operand<XLenVT>,
def simm9_lsb0 : Operand<OtherVT>,
ImmLeaf<XLenVT, [{return isShiftedInt<8, 1>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<9, "Lsb0">;
+ let PrintMethod = "printBranchOperand";
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<9>";
let MCOperandPredicate = [{
@@ -149,6 +150,7 @@ def simm9_lsb0 : Operand<OtherVT>,
return MCOp.isBareSymbolRef();
}];
+ let OperandType = "OPERAND_PCREL";
}
// A 9-bit unsigned immediate where the least significant three bits are zero.
@@ -200,6 +202,7 @@ def simm10_lsb0000nonzero : Operand<XLenVT>,
def simm12_lsb0 : Operand<XLenVT>,
ImmLeaf<XLenVT, [{return isShiftedInt<11, 1>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<12, "Lsb0">;
+ let PrintMethod = "printBranchOperand";
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<12>";
let MCOperandPredicate = [{
@@ -208,6 +211,7 @@ def simm12_lsb0 : Operand<XLenVT>,
return isShiftedInt<11, 1>(Imm);
return MCOp.isBareSymbolRef();
}];
+ let OperandType = "OPERAND_PCREL";
}
//===----------------------------------------------------------------------===//
@@ -239,7 +243,7 @@ class CStore_rri<bits<3> funct3, string OpcodeStr,
OpcodeStr, "$rs2, ${imm}(${rs1})">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class Bcz<bits<3> funct3, string OpcodeStr, PatFrag CondOp,
+class Bcz<bits<3> funct3, string OpcodeStr,
RegisterClass cls>
: RVInst16CB<funct3, 0b01, (outs), (ins cls:$rs1, simm9_lsb0:$imm),
OpcodeStr, "$rs1, $imm"> {
@@ -469,8 +473,8 @@ def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset),
let isBarrier=1;
}
-def C_BEQZ : Bcz<0b110, "c.beqz", seteq, GPRC>, Sched<[WriteJmp]>;
-def C_BNEZ : Bcz<0b111, "c.bnez", setne, GPRC>, Sched<[WriteJmp]>;
+def C_BEQZ : Bcz<0b110, "c.beqz", GPRC>, Sched<[WriteJmp]>;
+def C_BNEZ : Bcz<0b111, "c.bnez", GPRC>, Sched<[WriteJmp]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb),
@@ -519,7 +523,8 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1),
let rs2 = 0;
}
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isMoveReg = 1,
+ isAsCheapAsAMove = 1 in
def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2),
"c.mv", "$rs1, $rs2">,
Sched<[WriteIALU, ReadIALU]>;
@@ -744,6 +749,7 @@ class CompressPat<dag input, dag output> {
dag Input = input;
dag Output = output;
list<Predicate> Predicates = [];
+ bit isCompressOnly = false;
}
// Patterns are defined in the same order the compressed instructions appear
@@ -829,25 +835,30 @@ def : CompressPat<(SUB GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
(C_SUB GPRC:$rs1, GPRC:$rs2)>;
def : CompressPat<(XOR GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
(C_XOR GPRC:$rs1, GPRC:$rs2)>;
+let isCompressOnly = true in
def : CompressPat<(XOR GPRC:$rs1, GPRC:$rs2, GPRC:$rs1),
(C_XOR GPRC:$rs1, GPRC:$rs2)>;
def : CompressPat<(OR GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
(C_OR GPRC:$rs1, GPRC:$rs2)>;
+let isCompressOnly = true in
def : CompressPat<(OR GPRC:$rs1, GPRC:$rs2, GPRC:$rs1),
(C_OR GPRC:$rs1, GPRC:$rs2)>;
def : CompressPat<(AND GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
(C_AND GPRC:$rs1, GPRC:$rs2)>;
+let isCompressOnly = true in
def : CompressPat<(AND GPRC:$rs1, GPRC:$rs2, GPRC:$rs1),
(C_AND GPRC:$rs1, GPRC:$rs2)>;
} // Predicates = [HasStdExtC]
let Predicates = [HasStdExtC, IsRV64] in {
+let isCompressOnly = true in
def : CompressPat<(ADDIW GPRNoX0:$rd, X0, simm6:$imm),
(C_LI GPRNoX0:$rd, simm6:$imm)>;
def : CompressPat<(SUBW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
(C_SUBW GPRC:$rs1, GPRC:$rs2)>;
def : CompressPat<(ADDW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
(C_ADDW GPRC:$rs1, GPRC:$rs2)>;
+let isCompressOnly = true in
def : CompressPat<(ADDW GPRC:$rs1, GPRC:$rs2, GPRC:$rs1),
(C_ADDW GPRC:$rs1, GPRC:$rs2)>;
} // Predicates = [HasStdExtC, IsRV64]
@@ -890,10 +901,12 @@ def : CompressPat<(LD GPRNoX0:$rd, SP:$rs1, uimm9_lsb000:$imm),
let Predicates = [HasStdExtC] in {
def : CompressPat<(JALR X0, GPRNoX0:$rs1, 0),
(C_JR GPRNoX0:$rs1)>;
+let isCompressOnly = true in {
def : CompressPat<(ADD GPRNoX0:$rs1, X0, GPRNoX0:$rs2),
(C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>;
def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, X0),
(C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>;
+}
def : CompressPat<(ADDI GPRNoX0:$rs1, GPRNoX0:$rs2, 0),
(C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>;
def : CompressPat<(EBREAK), (C_EBREAK)>;
@@ -902,6 +915,7 @@ def : CompressPat<(JALR X1, GPRNoX0:$rs1, 0),
(C_JALR GPRNoX0:$rs1)>;
def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs1, GPRNoX0:$rs2),
(C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>;
+let isCompressOnly = true in
def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs1),
(C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>;
} // Predicates = [HasStdExtC]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 6c36f53cd563..133599e13b8b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -299,23 +299,6 @@ def : PatFpr64Fpr64<setolt, FLT_D>;
def : PatFpr64Fpr64<setle, FLE_D>;
def : PatFpr64Fpr64<setole, FLE_D>;
-// Define pattern expansions for setcc operations which aren't directly
-// handled by a RISC-V instruction and aren't expanded in the SelectionDAG
-// Legalizer.
-
-def : Pat<(seto FPR64:$rs1, FPR64:$rs2),
- (AND (FEQ_D FPR64:$rs1, FPR64:$rs1),
- (FEQ_D FPR64:$rs2, FPR64:$rs2))>;
-def : Pat<(seto FPR64:$rs1, FPR64:$rs1),
- (FEQ_D $rs1, $rs1)>;
-
-def : Pat<(setuo FPR64:$rs1, FPR64:$rs2),
- (SLTIU (AND (FEQ_D FPR64:$rs1, FPR64:$rs1),
- (FEQ_D FPR64:$rs2, FPR64:$rs2)),
- 1)>;
-def : Pat<(setuo FPR64:$rs1, FPR64:$rs1),
- (SLTIU (FEQ_D $rs1, $rs1), 1)>;
-
def Select_FPR64_Using_CC_GPR : SelectCC_rrirr<FPR64, GPR>;
/// Loads
@@ -361,6 +344,7 @@ let Predicates = [HasStdExtD, IsRV64] in {
/// Float constants
def : Pat<(f64 (fpimm0)), (FMV_D_X X0)>;
+// Moves (no conversion)
def : Pat<(bitconvert GPR:$rs1), (FMV_D_X GPR:$rs1)>;
def : Pat<(bitconvert FPR64:$rs1), (FMV_X_D FPR64:$rs1)>;
@@ -368,11 +352,11 @@ def : Pat<(bitconvert FPR64:$rs1), (FMV_X_D FPR64:$rs1)>;
// because fpto[u|s]i produce poison if the value can't fit into the target.
// We match the single case below because fcvt.wu.d sign-extends its result so
// is cheaper than fcvt.lu.d+sext.w.
-def : Pat<(sext_inreg (zexti32 (fp_to_uint FPR64:$rs1)), i32),
+def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR64:$rs1)), i32),
(FCVT_WU_D $rs1, 0b001)>;
// [u]int32->fp
-def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_D_W $rs1)>;
+def : Pat<(sint_to_fp (sexti32 GPR:$rs1)), (FCVT_D_W $rs1)>;
def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_D_WU $rs1)>;
def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_L_D FPR64:$rs1, 0b001)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index ce5c3abb6a06..4529949f693e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -303,10 +303,6 @@ def : Pat<(f32 (fpimm0)), (FMV_W_X X0)>;
/// Float conversion operations
-// Moves (no conversion)
-def : Pat<(bitconvert GPR:$rs1), (FMV_W_X GPR:$rs1)>;
-def : Pat<(bitconvert FPR32:$rs1), (FMV_X_W FPR32:$rs1)>;
-
// [u]int32<->float conversion patterns must be gated on IsRV32 or IsRV64, so
// are defined later.
@@ -359,23 +355,6 @@ def : PatFpr32Fpr32<setolt, FLT_S>;
def : PatFpr32Fpr32<setle, FLE_S>;
def : PatFpr32Fpr32<setole, FLE_S>;
-// Define pattern expansions for setcc operations which aren't directly
-// handled by a RISC-V instruction and aren't expanded in the SelectionDAG
-// Legalizer.
-
-def : Pat<(seto FPR32:$rs1, FPR32:$rs2),
- (AND (FEQ_S FPR32:$rs1, FPR32:$rs1),
- (FEQ_S FPR32:$rs2, FPR32:$rs2))>;
-def : Pat<(seto FPR32:$rs1, FPR32:$rs1),
- (FEQ_S $rs1, $rs1)>;
-
-def : Pat<(setuo FPR32:$rs1, FPR32:$rs2),
- (SLTIU (AND (FEQ_S FPR32:$rs1, FPR32:$rs1),
- (FEQ_S FPR32:$rs2, FPR32:$rs2)),
- 1)>;
-def : Pat<(setuo FPR32:$rs1, FPR32:$rs1),
- (SLTIU (FEQ_S $rs1, $rs1), 1)>;
-
def Select_FPR32_Using_CC_GPR : SelectCC_rrirr<FPR32, GPR>;
/// Loads
@@ -389,6 +368,10 @@ defm : StPat<store, FSW, FPR32>;
} // Predicates = [HasStdExtF]
let Predicates = [HasStdExtF, IsRV32] in {
+// Moves (no conversion)
+def : Pat<(bitconvert GPR:$rs1), (FMV_W_X GPR:$rs1)>;
+def : Pat<(bitconvert FPR32:$rs1), (FMV_X_W FPR32:$rs1)>;
+
// float->[u]int. Round-to-zero must be used.
def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
@@ -399,9 +382,10 @@ def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
} // Predicates = [HasStdExtF, IsRV32]
let Predicates = [HasStdExtF, IsRV64] in {
+// Moves (no conversion)
def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (FMV_W_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyextw_rv64 FPR32:$src), (FMV_X_W FPR32:$src)>;
-def : Pat<(sexti32 (riscv_fmv_x_anyextw_rv64 FPR32:$src)),
+def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32),
(FMV_X_W FPR32:$src)>;
// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
@@ -416,7 +400,7 @@ def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_L_S $rs1, 0b001)>;
def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_LU_S $rs1, 0b001)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(sint_to_fp (sexti32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>;
def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>;
def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_L $rs1, 0b111)>;
def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_LU $rs1, 0b111)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 987534aadd79..8cfb903a173c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -81,9 +81,11 @@ def : PatGprGpr<riscv_remuw, REMUW>;
// Handle the specific cases where using DIVU/REMU would be correct and result
// in fewer instructions than emitting DIVUW/REMUW then zero-extending the
// result.
-def : Pat<(zexti32 (riscv_divuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
+def : Pat<(and (riscv_divuw (assertzexti32 GPR:$rs1),
+ (assertzexti32 GPR:$rs2)), 0xffffffff),
(DIVU GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (riscv_remuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
+def : Pat<(and (riscv_remuw (assertzexti32 GPR:$rs1),
+ (assertzexti32 GPR:$rs2)), 0xffffffff),
(REMU GPR:$rs1, GPR:$rs2)>;
// Although the sexti32 operands may not have originated from an i32 srem,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 1c7f53fecb8c..b3fc76aee161 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// This file describes the RISC-V instructions from the standard 'V' Vector
-/// extension, version 0.8.
+/// extension, version 0.10.
/// This version is still experimental as the 'V' extension hasn't been
/// ratified yet.
///
@@ -31,18 +31,6 @@ def VTypeIOp : Operand<XLenVT> {
let DecoderMethod = "decodeUImmOperand<11>";
}
-def VRegAsmOperand : AsmOperandClass {
- let Name = "RVVRegOpOperand";
- let RenderMethod = "addRegOperands";
- let PredicateMethod = "isReg";
- let ParserMethod = "parseRegister";
-}
-
-def VRegOp : RegisterOperand<VR> {
- let ParserMatchClass = VRegAsmOperand;
- let PrintMethod = "printOperand";
-}
-
def VMaskAsmOperand : AsmOperandClass {
let Name = "RVVMaskRegOpOperand";
let RenderMethod = "addRegOperands";
@@ -74,14 +62,13 @@ def simm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> {
def SImm5Plus1AsmOperand : AsmOperandClass {
let Name = "SImm5Plus1";
- let RenderMethod = "addSImm5Plus1Operands";
+ let RenderMethod = "addImmOperands";
let DiagnosticType = "InvalidSImm5Plus1";
}
def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
[{return isInt<5>(Imm - 1);}]> {
let ParserMatchClass = SImm5Plus1AsmOperand;
- let PrintMethod = "printSImm5Plus1";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -95,162 +82,242 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+// load vd, (rs1)
+class VUnitStrideLoadMask<string opcodestr>
+ : RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1), opcodestr, "$vd, (${rs1})">;
+
// load vd, (rs1), vm
-class VUnitStrideLoad<RISCVMOP mop, RISCVLSUMOP lumop, RISCVWidth width,
- string opcodestr>
- : RVInstVLU<0b000, mop, lumop, width, (outs VRegOp:$vd),
+class VUnitStrideLoad<RISCVLSUMOP lumop, RISCVWidth width,
+ string opcodestr>
+ : RVInstVLU<0b000, width.Value{3}, lumop, width.Value{2-0},
+ (outs VR:$vd),
(ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
// load vd, (rs1), rs2, vm
-class VStridedLoad<RISCVMOP mop, RISCVWidth width, string opcodestr>
- : RVInstVLS<0b000, mop, width, (outs VRegOp:$vd),
+class VStridedLoad<RISCVWidth width, string opcodestr>
+ : RVInstVLS<0b000, width.Value{3}, width.Value{2-0},
+ (outs VR:$vd),
(ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
"$vd, (${rs1}), $rs2$vm">;
// load vd, (rs1), vs2, vm
class VIndexedLoad<RISCVMOP mop, RISCVWidth width, string opcodestr>
- : RVInstVLX<0b000, mop, width, (outs VRegOp:$vd),
- (ins GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm), opcodestr,
+ : RVInstVLX<0b000, width.Value{3}, mop, width.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr,
"$vd, (${rs1}), $vs2$vm">;
// vl<nf>r.v vd, (rs1)
-class VWholeLoad<bits<3> nf, string opcodestr>
- : RVInstVLU<nf, MOPLDUnitStrideU, LUMOPUnitStrideWholeReg,
- LSWidthVSEW, (outs VRegOp:$vd), (ins GPR:$rs1),
+class VWholeLoad<bits<3> nf, RISCVWidth width, string opcodestr>
+ : RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideWholeReg,
+ width.Value{2-0}, (outs VR:$vd), (ins GPR:$rs1),
opcodestr, "$vd, (${rs1})"> {
let vm = 1;
let Uses = [];
+ let RVVConstraint = NoConstraint;
}
+
+// segment load vd, (rs1), vm
+class VUnitStrideSegmentLoad<bits<3> nf, RISCVLSUMOP lumop,
+ RISCVWidth width, string opcodestr>
+ : RVInstVLU<nf, width.Value{3}, lumop, width.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
+
+// segment load vd, (rs1), rs2, vm
+class VStridedSegmentLoad<bits<3> nf, RISCVWidth width, string opcodestr>
+ : RVInstVLS<nf, width.Value{3}, width.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
+ "$vd, (${rs1}), $rs2$vm">;
+
+// segment load vd, (rs1), vs2, vm
+class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+ string opcodestr>
+ : RVInstVLX<nf, width.Value{3}, mop, width.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr,
+ "$vd, (${rs1}), $vs2$vm">;
} // hasSideEffects = 0, mayLoad = 1, mayStore = 0
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
// store vd, vs3, (rs1), vm
-class VUnitStrideStore<RISCVMOP mop, RISCVLSUMOP sumop, RISCVWidth width,
+class VUnitStrideStoreMask<string opcodestr>
+ : RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0},
+ (outs), (ins VR:$vs3, GPR:$rs1), opcodestr,
+ "$vs3, (${rs1})">;
+
+// store vd, vs3, (rs1), vm
+class VUnitStrideStore<RISCVLSUMOP sumop, RISCVWidth width,
string opcodestr>
- : RVInstVSU<0b000, mop, sumop, width, (outs),
- (ins VRegOp:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr,
+ : RVInstVSU<0b000, width.Value{3}, sumop, width.Value{2-0},
+ (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr,
"$vs3, (${rs1})$vm">;
// store vd, vs3, (rs1), rs2, vm
-class VStridedStore<RISCVMOP mop, RISCVWidth width, string opcodestr>
- : RVInstVSS<0b000, mop, width, (outs),
- (ins VRegOp:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm),
+class VStridedStore<RISCVWidth width, string opcodestr>
+ : RVInstVSS<0b000, width.Value{3}, width.Value{2-0}, (outs),
+ (ins VR:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm),
opcodestr, "$vs3, (${rs1}), $rs2$vm">;
// store vd, vs3, (rs1), vs2, vm
class VIndexedStore<RISCVMOP mop, RISCVWidth width, string opcodestr>
- : RVInstVSX<0b000, mop, width, (outs),
- (ins VRegOp:$vs3, GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm),
+ : RVInstVSX<0b000, width.Value{3}, mop, width.Value{2-0}, (outs),
+ (ins VR:$vs3, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
opcodestr, "$vs3, (${rs1}), $vs2$vm">;
// vs<nf>r.v vd, (rs1)
class VWholeStore<bits<3> nf, string opcodestr>
- : RVInstVSU<nf, MOPSTUnitStride, SUMOPUnitStrideWholeReg,
- LSWidthVSEW, (outs), (ins VRegOp:$vs3, GPR:$rs1),
+ : RVInstVSU<nf, 0, SUMOPUnitStrideWholeReg,
+ 0b000, (outs), (ins VR:$vs3, GPR:$rs1),
opcodestr, "$vs3, (${rs1})"> {
let vm = 1;
let Uses = [];
}
+
+// segment store vd, vs3, (rs1), vm
+class VUnitStrideSegmentStore<bits<3> nf, RISCVWidth width, string opcodestr>
+ : RVInstVSU<nf, width.Value{3}, SUMOPUnitStride, width.Value{2-0},
+ (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr,
+ "$vs3, (${rs1})$vm">;
+
+// segment store vd, vs3, (rs1), rs2, vm
+class VStridedSegmentStore<bits<3> nf, RISCVWidth width, string opcodestr>
+ : RVInstVSS<nf, width.Value{3}, width.Value{2-0}, (outs),
+ (ins VR:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm),
+ opcodestr, "$vs3, (${rs1}), $rs2$vm">;
+
+// segment store vd, vs3, (rs1), vs2, vm
+class VIndexedSegmentStore<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+ string opcodestr>
+ : RVInstVSX<nf, width.Value{3}, mop, width.Value{2-0}, (outs),
+ (ins VR:$vs3, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vs3, (${rs1}), $vs2$vm">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// op vd, vs2, vs1, vm
class VALUVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVV<funct6, opv, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, VRegOp:$vs1, VMaskOp:$vm),
+ : RVInstVV<funct6, opv, (outs VR:$vd),
+ (ins VR:$vs2, VR:$vs1, VMaskOp:$vm),
opcodestr, "$vd, $vs2, $vs1$vm">;
// op vd, vs2, vs1, v0 (without mask, use v0 as carry input)
class VALUmVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVV<funct6, opv, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, VRegOp:$vs1, VMV0:$v0),
+ : RVInstVV<funct6, opv, (outs VR:$vd),
+ (ins VR:$vs2, VR:$vs1, VMV0:$v0),
opcodestr, "$vd, $vs2, $vs1, v0"> {
let vm = 0;
}
// op vd, vs1, vs2, vm (reverse the order of vs1 and vs2)
class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVV<funct6, opv, (outs VRegOp:$vd),
- (ins VRegOp:$vs1, VRegOp:$vs2, VMaskOp:$vm),
+ : RVInstVV<funct6, opv, (outs VR:$vd),
+ (ins VR:$vs1, VR:$vs2, VMaskOp:$vm),
opcodestr, "$vd, $vs1, $vs2$vm">;
-// op vd, vs1, vs2
+// op vd, vs2, vs1
class VALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVV<funct6, opv, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, VRegOp:$vs1),
+ : RVInstVV<funct6, opv, (outs VR:$vd),
+ (ins VR:$vs2, VR:$vs1),
opcodestr, "$vd, $vs2, $vs1"> {
let vm = 1;
}
// op vd, vs2, rs1, vm
class VALUVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, GPR:$rs1, VMaskOp:$vm),
+ : RVInstVX<funct6, opv, (outs VR:$vd),
+ (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm),
opcodestr, "$vd, $vs2, $rs1$vm">;
// op vd, vs2, rs1, v0 (without mask, use v0 as carry input)
class VALUmVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, GPR:$rs1, VMV0:$v0),
+ : RVInstVX<funct6, opv, (outs VR:$vd),
+ (ins VR:$vs2, GPR:$rs1, VMV0:$v0),
opcodestr, "$vd, $vs2, $rs1, v0"> {
let vm = 0;
}
// op vd, rs1, vs2, vm (reverse the order of rs1 and vs2)
class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VRegOp:$vd),
- (ins GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm),
+ : RVInstVX<funct6, opv, (outs VR:$vd),
+ (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm),
opcodestr, "$vd, $rs1, $vs2$vm">;
// op vd, vs1, vs2
class VALUVXNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, GPR:$rs1),
+ : RVInstVX<funct6, opv, (outs VR:$vd),
+ (ins VR:$vs2, GPR:$rs1),
opcodestr, "$vd, $vs2, $rs1"> {
let vm = 1;
}
// op vd, vs2, imm, vm
class VALUVI<bits<6> funct6, string opcodestr, Operand optype = simm5>
- : RVInstIVI<funct6, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, optype:$imm, VMaskOp:$vm),
+ : RVInstIVI<funct6, (outs VR:$vd),
+ (ins VR:$vs2, optype:$imm, VMaskOp:$vm),
opcodestr, "$vd, $vs2, $imm$vm">;
// op vd, vs2, imm, v0 (without mask, use v0 as carry input)
class VALUmVI<bits<6> funct6, string opcodestr, Operand optype = simm5>
- : RVInstIVI<funct6, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, optype:$imm, VMV0:$v0),
+ : RVInstIVI<funct6, (outs VR:$vd),
+ (ins VR:$vs2, optype:$imm, VMV0:$v0),
opcodestr, "$vd, $vs2, $imm, v0"> {
let vm = 0;
}
// op vd, vs2, imm, vm
class VALUVINoVm<bits<6> funct6, string opcodestr, Operand optype = simm5>
- : RVInstIVI<funct6, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, optype:$imm),
+ : RVInstIVI<funct6, (outs VR:$vd),
+ (ins VR:$vs2, optype:$imm),
opcodestr, "$vd, $vs2, $imm"> {
let vm = 1;
}
// op vd, vs2, rs1, vm (Float)
class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, FPR32:$rs1, VMaskOp:$vm),
+ : RVInstVX<funct6, opv, (outs VR:$vd),
+ (ins VR:$vs2, FPR32:$rs1, VMaskOp:$vm),
opcodestr, "$vd, $vs2, $rs1$vm">;
// op vd, rs1, vs2, vm (Float) (with mask, reverse the order of rs1 and vs2)
class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VRegOp:$vd),
- (ins FPR32:$rs1, VRegOp:$vs2, VMaskOp:$vm),
+ : RVInstVX<funct6, opv, (outs VR:$vd),
+ (ins FPR32:$rs1, VR:$vs2, VMaskOp:$vm),
opcodestr, "$vd, $rs1, $vs2$vm">;
// op vd, vs2, vm (use vs1 as instruction encoding)
class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
- : RVInstV<funct6, vs1, opv, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, VMaskOp:$vm),
+ : RVInstV<funct6, vs1, opv, (outs VR:$vd),
+ (ins VR:$vs2, VMaskOp:$vm),
opcodestr, "$vd, $vs2$vm">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in {
+// vamo vd, (rs1), vs2, vd, vm
+class VAMOWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr>
+ : RVInstVAMO<amoop, width.Value{2-0}, (outs VR:$vd_wd),
+ (ins GPR:$rs1, VR:$vs2, VR:$vd, VMaskOp:$vm),
+ opcodestr, "$vd_wd, (${rs1}), $vs2, $vd$vm"> {
+ let Constraints = "$vd_wd = $vd";
+ let wd = 1;
+ bits<5> vd;
+ let Inst{11-7} = vd;
+}
+
+// vamo x0, (rs1), vs2, vs3, vm
+class VAMONoWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr>
+ : RVInstVAMO<amoop, width.Value{2-0}, (outs),
+ (ins GPR:$rs1, VR:$vs2, VR:$vs3, VMaskOp:$vm),
+ opcodestr, "x0, (${rs1}), $vs2, $vs3$vm"> {
+ bits<5> vs3;
+ let Inst{11-7} = vs3;
+}
+
+} // hasSideEffects = 0, mayLoad = 1, mayStore = 1
+
//===----------------------------------------------------------------------===//
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
@@ -358,6 +425,18 @@ multiclass VALU_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>;
}
+multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> {
+ def _WD : VAMOWd<amoop, width, opcodestr>;
+ def _UNWD : VAMONoWd<amoop, width, opcodestr>;
+}
+
+multiclass VWholeLoad<bits<3> nf, string opcodestr> {
+ def E8_V : VWholeLoad<nf, LSWidth8, opcodestr # "e8.v">;
+ def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v">;
+ def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v">;
+ def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v">;
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -367,82 +446,85 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
def VSETVLI : RVInstSetVLi<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei),
"vsetvli", "$rd, $rs1, $vtypei">;
+def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp:$vtypei),
+ "vsetivli", "$rd, $uimm, $vtypei">;
+
def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
"vsetvl", "$rd, $rs1, $rs2">;
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
// Vector Unit-Stride Instructions
-def VLB_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStride, LSWidthVByte, "vlb.v">;
-def VLH_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStride, LSWidthVHalf, "vlh.v">;
-def VLW_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStride, LSWidthVWord, "vlw.v">;
-
-def VLBU_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVByte, "vlbu.v">;
-def VLHU_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVHalf, "vlhu.v">;
-def VLWU_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVWord, "vlwu.v">;
-
-def VLE_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVSEW, "vle.v">;
-
-def VLBFF_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStrideFF, LSWidthVByte, "vlbff.v">;
-def VLHFF_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStrideFF, LSWidthVHalf, "vlhff.v">;
-def VLWFF_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStrideFF, LSWidthVWord, "vlwff.v">;
-
-def VLBUFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVByte, "vlbuff.v">;
-def VLHUFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVHalf, "vlhuff.v">;
-def VLWUFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVWord, "vlwuff.v">;
+def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">;
+def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">;
+def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">;
+def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">;
-def VLEFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVSEW, "vleff.v">;
+def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">;
+def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">;
+def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">;
+def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">;
-def VSB_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVByte, "vsb.v">;
-def VSH_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVHalf, "vsh.v">;
-def VSW_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVWord, "vsw.v">;
+def VLE1_V : VUnitStrideLoadMask<"vle1.v">;
+def VSE1_V : VUnitStrideStoreMask<"vse1.v">;
-def VSE_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVSEW, "vse.v">;
+def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">;
+def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">;
+def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">;
+def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">;
// Vector Strided Instructions
-def VLSB_V : VStridedLoad<MOPLDStridedS, LSWidthVByte, "vlsb.v">;
-def VLSH_V : VStridedLoad<MOPLDStridedS, LSWidthVHalf, "vlsh.v">;
-def VLSW_V : VStridedLoad<MOPLDStridedS, LSWidthVWord, "vlsw.v">;
+def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">;
+def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">;
+def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">;
+def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">;
-def VLSBU_V : VStridedLoad<MOPLDStridedU, LSWidthVByte, "vlsbu.v">;
-def VLSHU_V : VStridedLoad<MOPLDStridedU, LSWidthVHalf, "vlshu.v">;
-def VLSWU_V : VStridedLoad<MOPLDStridedU, LSWidthVWord, "vlswu.v">;
-
-def VLSE_V : VStridedLoad<MOPLDStridedU, LSWidthVSEW, "vlse.v">;
-
-def VSSB_V : VStridedStore<MOPSTStrided, LSWidthVByte, "vssb.v">;
-def VSSH_V : VStridedStore<MOPSTStrided, LSWidthVHalf, "vssh.v">;
-def VSSW_V : VStridedStore<MOPSTStrided, LSWidthVWord, "vssw.v">;
-def VSSE_V : VStridedStore<MOPSTStrided, LSWidthVSEW, "vsse.v">;
+def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">;
+def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">;
+def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">;
+def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">;
// Vector Indexed Instructions
-def VLXB_V : VIndexedLoad<MOPLDIndexedS, LSWidthVByte, "vlxb.v">;
-def VLXH_V : VIndexedLoad<MOPLDIndexedS, LSWidthVHalf, "vlxh.v">;
-def VLXW_V : VIndexedLoad<MOPLDIndexedS, LSWidthVWord, "vlxw.v">;
-
-def VLXBU_V : VIndexedLoad<MOPLDIndexedU, LSWidthVByte, "vlxbu.v">;
-def VLXHU_V : VIndexedLoad<MOPLDIndexedU, LSWidthVHalf, "vlxhu.v">;
-def VLXWU_V : VIndexedLoad<MOPLDIndexedU, LSWidthVWord, "vlxwu.v">;
-
-def VLXE_V : VIndexedLoad<MOPLDIndexedU, LSWidthVSEW, "vlxe.v">;
-
-def VSXB_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVByte, "vsxb.v">;
-def VSXH_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVHalf, "vsxh.v">;
-def VSXW_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVWord, "vsxw.v">;
-def VSXE_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVSEW, "vsxe.v">;
+def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">;
+def VLUXEI16_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth16, "vluxei16.v">;
+def VLUXEI32_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth32, "vluxei32.v">;
+def VLUXEI64_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth64, "vluxei64.v">;
+
+def VLOXEI8_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth8, "vloxei8.v">;
+def VLOXEI16_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth16, "vloxei16.v">;
+def VLOXEI32_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth32, "vloxei32.v">;
+def VLOXEI64_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth64, "vloxei64.v">;
+
+def VSUXEI8_V : VIndexedStore<MOPSTIndexedUnord, LSWidth8, "vsuxei8.v">;
+def VSUXEI16_V : VIndexedStore<MOPSTIndexedUnord, LSWidth16, "vsuxei16.v">;
+def VSUXEI32_V : VIndexedStore<MOPSTIndexedUnord, LSWidth32, "vsuxei32.v">;
+def VSUXEI64_V : VIndexedStore<MOPSTIndexedUnord, LSWidth64, "vsuxei64.v">;
+
+def VSOXEI8_V : VIndexedStore<MOPSTIndexedOrder, LSWidth8, "vsoxei8.v">;
+def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">;
+def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">;
+def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">;
+
+defm VL1R : VWholeLoad<0, "vl1r">;
+defm VL2R : VWholeLoad<1, "vl2r">;
+defm VL4R : VWholeLoad<3, "vl4r">;
+defm VL8R : VWholeLoad<7, "vl8r">;
+def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
+def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>;
+def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>;
+def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>;
-def VSUXB_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVByte, "vsuxb.v">;
-def VSUXH_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVHalf, "vsuxh.v">;
-def VSUXW_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVWord, "vsuxw.v">;
-def VSUXE_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVSEW, "vsuxe.v">;
-
-def VL1R_V : VWholeLoad<0, "vl1r.v">;
def VS1R_V : VWholeStore<0, "vs1r.v">;
+def VS2R_V : VWholeStore<1, "vs2r.v">;
+def VS4R_V : VWholeStore<3, "vs4r.v">;
+def VS8R_V : VWholeStore<7, "vs8r.v">;
// Vector Single-Width Integer Add and Subtract
defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
defm VSUB_V : VALU_IV_V_X<"vsub", 0b000010>;
defm VRSUB_V : VALU_IV_X_I<"vrsub", 0b000011>;
+def : InstAlias<"vneg.v $vd, $vs$vm", (VRSUB_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>;
+
// Vector Widening Integer Add/Subtract
// Refer to 11.2 Widening Vector Arithmetic Instructions
// The destination vector register group cannot overlap a source vector
@@ -468,17 +550,29 @@ defm VWSUB_W : VALU_MV_V_X<"vwsub", 0b110111, "w">;
} // Constraints = "@earlyclobber $vd"
def : InstAlias<"vwcvt.x.x.v $vd, $vs$vm",
- (VWADD_VX VRegOp:$vd, VRegOp:$vs, X0, VMaskOp:$vm)>;
+ (VWADD_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>;
def : InstAlias<"vwcvtu.x.x.v $vd, $vs$vm",
- (VWADDU_VX VRegOp:$vd, VRegOp:$vs, X0, VMaskOp:$vm)>;
+ (VWADDU_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>;
+
+// Vector Integer Extension
+defm VZEXT_VF8 : VALU_MV_VS2<"vzext.vf8", 0b010010, 0b00010>;
+defm VSEXT_VF8 : VALU_MV_VS2<"vsext.vf8", 0b010010, 0b00011>;
+defm VZEXT_VF4 : VALU_MV_VS2<"vzext.vf4", 0b010010, 0b00100>;
+defm VSEXT_VF4 : VALU_MV_VS2<"vsext.vf4", 0b010010, 0b00101>;
+defm VZEXT_VF2 : VALU_MV_VS2<"vzext.vf2", 0b010010, 0b00110>;
+defm VSEXT_VF2 : VALU_MV_VS2<"vsext.vf2", 0b010010, 0b00111>;
// Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
defm VADC_V : VALUm_IV_V_X_I<"vadc", 0b010000>;
+let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>;
defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
defm VSBC_V : VALUm_IV_V_X<"vsbc", 0b010010>;
+let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
defm VMSBC_V : VALUm_IV_V_X<"vmsbc", 0b010011>;
defm VMSBC_V : VALUNoVm_IV_V_X<"vmsbc", 0b010011>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
// Vector Bitwise Logical Instructions
defm VAND_V : VALU_IV_V_X_I<"vand", 0b001001>;
@@ -486,7 +580,7 @@ defm VOR_V : VALU_IV_V_X_I<"vor", 0b001010>;
defm VXOR_V : VALU_IV_V_X_I<"vxor", 0b001011>;
def : InstAlias<"vnot.v $vd, $vs$vm",
- (VXOR_VI VRegOp:$vd, VRegOp:$vs, -1, VMaskOp:$vm)>;
+ (VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>;
// Vector Single-Width Bit Shift Instructions
defm VSLL_V : VALU_IV_V_X_I<"vsll", 0b100101, uimm5>;
@@ -498,12 +592,16 @@ defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>;
// The destination vector register group cannot overlap the first source
// vector register group (specified by vs2). The destination vector register
// group cannot overlap the mask register if used, unless LMUL=1.
-let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in {
+let Constraints = "@earlyclobber $vd" in {
defm VNSRL_W : VALU_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">;
defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow
+} // Constraints = "@earlyclobber $vd"
+
+def : InstAlias<"vncvt.x.x.w $vd, $vs$vm",
+ (VNSRL_WX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>;
// Vector Integer Comparison Instructions
+let RVVConstraint = NoConstraint in {
defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>;
defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>;
defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>;
@@ -512,27 +610,61 @@ defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>;
defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>;
defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>;
defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>;
+} // RVVConstraint = NoConstraint
def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm",
- (VMSLTU_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+ (VMSLTU_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
def : InstAlias<"vmsgt.vv $vd, $va, $vb$vm",
- (VMSLT_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+ (VMSLT_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
def : InstAlias<"vmsgeu.vv $vd, $va, $vb$vm",
- (VMSLEU_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+ (VMSLEU_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
def : InstAlias<"vmsge.vv $vd, $va, $vb$vm",
- (VMSLE_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
-def : InstAlias<"vmsltu.vi $vd, $va, $imm$vm",
- (VMSLEU_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm,
- VMaskOp:$vm), 0>;
-def : InstAlias<"vmslt.vi $vd, $va, $imm$vm",
- (VMSLE_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm,
- VMaskOp:$vm), 0>;
-def : InstAlias<"vmsgeu.vi $vd, $va, $imm$vm",
- (VMSGTU_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm,
- VMaskOp:$vm), 0>;
-def : InstAlias<"vmsge.vi $vd, $va, $imm$vm",
- (VMSGT_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm,
- VMaskOp:$vm), 0>;
+ (VMSLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
+
+let isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 0,
+ mayStore = 0 in {
+// For unsigned comparisons we need to special case 0 immediate to maintain
+// the always true/false semantics we would invert if we just decremented the
+// immediate like we do for signed. To match the GNU assembler we will use
+// vmseq/vmsne.vv with the same register for both operands which we can't do
+// from an InstAlias.
+def PseudoVMSGEU_VI : Pseudo<(outs VR:$vd),
+ (ins VR:$vs2, simm5_plus1:$imm, VMaskOp:$vm),
+ [], "vmsgeu.vi", "$vd, $vs2, $imm$vm">;
+def PseudoVMSLTU_VI : Pseudo<(outs VR:$vd),
+ (ins VR:$vs2, simm5_plus1:$imm, VMaskOp:$vm),
+ [], "vmsltu.vi", "$vd, $vs2, $imm$vm">;
+// Handle signed with pseudos as well for more consistency in the
+// implementation.
+def PseudoVMSGE_VI : Pseudo<(outs VR:$vd),
+ (ins VR:$vs2, simm5_plus1:$imm, VMaskOp:$vm),
+ [], "vmsge.vi", "$vd, $vs2, $imm$vm">;
+def PseudoVMSLT_VI : Pseudo<(outs VR:$vd),
+ (ins VR:$vs2, simm5_plus1:$imm, VMaskOp:$vm),
+ [], "vmslt.vi", "$vd, $vs2, $imm$vm">;
+}
+
+let isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 0,
+ mayStore = 0 in {
+def PseudoVMSGEU_VX : Pseudo<(outs VR:$vd),
+ (ins VR:$vs2, GPR:$rs1),
+ [], "vmsgeu.vx", "$vd, $vs2, $rs1">;
+def PseudoVMSGE_VX : Pseudo<(outs VR:$vd),
+ (ins VR:$vs2, GPR:$rs1),
+ [], "vmsge.vx", "$vd, $vs2, $rs1">;
+def PseudoVMSGEU_VX_M : Pseudo<(outs VRNoV0:$vd),
+ (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm),
+ [], "vmsgeu.vx", "$vd, $vs2, $rs1$vm">;
+def PseudoVMSGE_VX_M : Pseudo<(outs VRNoV0:$vd),
+ (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm),
+ [], "vmsge.vx", "$vd, $vs2, $rs1$vm">;
+def PseudoVMSGEU_VX_M_T : Pseudo<(outs VMV0:$vd, VR:$scratch),
+ (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm),
+ [], "vmsgeu.vx", "$vd, $vs2, $rs1$vm, $scratch">;
+def PseudoVMSGE_VX_M_T : Pseudo<(outs VMV0:$vd, VR:$scratch),
+ (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm),
+ [], "vmsge.vx", "$vd, $vs2, $rs1$vm, $scratch">;
+}
// Vector Integer Min/Max Instructions
defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>;
@@ -577,15 +709,16 @@ defm VWMACCUS_V : VALUr_MV_X<"vwmaccus", 0b111110>;
defm VMERGE_V : VALUm_IV_V_X_I<"vmerge", 0b010111>;
// Vector Integer Move Instructions
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1 in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1,
+ RVVConstraint = NoConstraint in {
// op vd, vs1
-def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VRegOp:$vd),
- (ins VRegOp:$vs1), "vmv.v.v", "$vd, $vs1">;
+def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd),
+ (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">;
// op vd, rs1
-def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VRegOp:$vd),
+def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd),
(ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">;
// op vd, imm
-def VMV_V_I : RVInstIVI<0b010111, (outs VRegOp:$vd),
+def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd),
(ins simm5:$imm), "vmv.v.i", "$vd, $imm">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
@@ -609,11 +742,13 @@ defm VSSRL_V : VALU_IV_V_X_I<"vssrl", 0b101010, uimm5>;
defm VSSRA_V : VALU_IV_V_X_I<"vssra", 0b101011, uimm5>;
// Vector Narrowing Fixed-Point Clip Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in {
+let Constraints = "@earlyclobber $vd" in {
defm VNCLIPU_W : VALU_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
defm VNCLIP_W : VALU_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow
+} // Constraints = "@earlyclobber $vd"
+} // Predicates = [HasStdExtV]
+let Predicates = [HasStdExtV, HasStdExtF] in {
// Vector Single-Width Floating-Point Add/Subtract Instructions
defm VFADD_V : VALU_FV_V_F<"vfadd", 0b000000>;
defm VFSUB_V : VALU_FV_V_F<"vfsub", 0b000010>;
@@ -664,7 +799,9 @@ defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Floating-Point Square-Root Instruction
-defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b100011, 0b00000>;
+defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
+defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
+defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
// Vector Floating-Point MIN/MAX Instructions
defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>;
@@ -675,32 +812,38 @@ defm VFSGNJ_V : VALU_FV_V_F<"vfsgnj", 0b001000>;
defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>;
defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>;
+def : InstAlias<"vfneg.v $vd, $vs$vm",
+ (VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>;
+
// Vector Floating-Point Compare Instructions
+let RVVConstraint = NoConstraint in {
defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>;
defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>;
defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>;
defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>;
defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>;
defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>;
+} // RVVConstraint = NoConstraint
def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm",
- (VMFLT_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+ (VMFLT_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
def : InstAlias<"vmfge.vv $vd, $va, $vb$vm",
- (VMFLE_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+ (VMFLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
// Vector Floating-Point Classify Instruction
-defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b100011, 0b10000>;
+defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b010011, 0b10000>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// Vector Floating-Point Merge Instruction
-def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VRegOp:$vd),
- (ins VRegOp:$vs2, FPR32:$rs1, VMV0:$v0),
+def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
+ (ins VR:$vs2, FPR32:$rs1, VMV0:$v0),
"vfmerge.vfm", "$vd, $vs2, $rs1, v0"> {
let vm = 0;
}
// Vector Floating-Point Move Instruction
-def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VRegOp:$vd),
+let RVVConstraint = NoConstraint in
+def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1"> {
let vs2 = 0;
let vm = 1;
@@ -708,31 +851,40 @@ def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VRegOp:$vd),
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Single-Width Floating-Point/Integer Type-Convert Instructions
-defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b100010, 0b00000>;
-defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b100010, 0b00001>;
-defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b100010, 0b00010>;
-defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b100010, 0b00011>;
+defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>;
+defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>;
+defm VFCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>;
+defm VFCVT_RTZ_X_F_V : VALU_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>;
+defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>;
+defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>;
// Widening Floating-Point/Integer Type-Convert Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in {
-defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b100010, 0b01000>;
-defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b100010, 0b01001>;
-defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b100010, 0b01010>;
-defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b100010, 0b01011>;
-defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b100010, 0b01100>;
+defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>;
+defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>;
+defm VFWCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>;
+defm VFWCVT_RTZ_X_F_V : VALU_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>;
+defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>;
+defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>;
+defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt
// Narrowing Floating-Point/Integer Type-Convert Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in {
-defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b100010, 0b10000>;
-defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b100010, 0b10001>;
-defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b100010, 0b10010>;
-defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b100010, 0b10011>;
-defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b100010, 0b10100>;
-defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b100010, 0b10101>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow
+let Constraints = "@earlyclobber $vd" in {
+defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>;
+defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>;
+defm VFNCVT_RTZ_XU_F_W : VALU_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>;
+defm VFNCVT_RTZ_X_F_W : VALU_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>;
+defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>;
+defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>;
+defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>;
+defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>;
+} // Constraints = "@earlyclobber $vd"
+} // Predicates = [HasStdExtV, HasStdExtF]
+let Predicates = [HasStdExtV] in {
// Vector Single-Width Integer Reduction Instructions
+let RVVConstraint = NoConstraint in {
defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>;
defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>;
defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>;
@@ -741,34 +893,42 @@ defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>;
defm VREDAND : VALU_MV_V<"vredand", 0b000001>;
defm VREDOR : VALU_MV_V<"vredor", 0b000010>;
defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>;
+} // RVVConstraint = NoConstraint
// Vector Widening Integer Reduction Instructions
-let Constraints = "@earlyclobber $vd" in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
// Set earlyclobber for following instructions for second and mask operands.
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>;
defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>;
-} // Constraints = "@earlyclobber $vd"
+} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
+} // Predicates = [HasStdExtV]
+let Predicates = [HasStdExtV, HasStdExtF] in {
// Vector Single-Width Floating-Point Reduction Instructions
+let RVVConstraint = NoConstraint in {
defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>;
defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>;
defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>;
defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>;
+} // RVVConstraint = NoConstraint
// Vector Widening Floating-Point Reduction Instructions
-let Constraints = "@earlyclobber $vd" in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
// Set earlyclobber for following instructions for second and mask operands.
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>;
defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>;
-} // Constraints = "@earlyclobber $vd"
+} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
+} // Predicates = [HasStdExtV, HasStdExtF]
+let Predicates = [HasStdExtV] in {
// Vector Mask-Register Logical Instructions
+let RVVConstraint = NoConstraint in {
defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">;
defm VMNAND_M : VALU_MV_Mask<"vmnand", 0b011101, "m">;
defm VMANDNOT_M : VALU_MV_Mask<"vmandnot", 0b011000, "m">;
@@ -777,82 +937,95 @@ defm VMOR_M : VALU_MV_Mask<"vmor", 0b011010, "m">;
defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">;
defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">;
defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">;
+}
-def : InstAlias<"vmcpy.m $vd, $vs",
- (VMAND_MM VRegOp:$vd, VRegOp:$vs, VRegOp:$vs)>;
+def : InstAlias<"vmmv.m $vd, $vs",
+ (VMAND_MM VR:$vd, VR:$vs, VR:$vs)>;
def : InstAlias<"vmclr.m $vd",
- (VMXOR_MM VRegOp:$vd, VRegOp:$vd, VRegOp:$vd)>;
+ (VMXOR_MM VR:$vd, VR:$vd, VR:$vd)>;
def : InstAlias<"vmset.m $vd",
- (VMXNOR_MM VRegOp:$vd, VRegOp:$vd, VRegOp:$vd)>;
+ (VMXNOR_MM VR:$vd, VR:$vd, VR:$vd)>;
def : InstAlias<"vmnot.m $vd, $vs",
- (VMNAND_MM VRegOp:$vd, VRegOp:$vs, VRegOp:$vs)>;
+ (VMNAND_MM VR:$vd, VR:$vs, VR:$vs)>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+ RVVConstraint = NoConstraint in {
// Vector mask population count vpopc
def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
- (ins VRegOp:$vs2, VMaskOp:$vm),
+ (ins VR:$vs2, VMaskOp:$vm),
"vpopc.m", "$vd, $vs2$vm">;
// vfirst find-first-set mask bit
def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
- (ins VRegOp:$vs2, VMaskOp:$vm),
+ (ins VR:$vs2, VMaskOp:$vm),
"vfirst.m", "$vd, $vs2$vm">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in {
// vmsbf.m set-before-first mask bit
defm VMSBF_M : VALU_MV_VS2<"vmsbf.m", 0b010100, 0b00001>;
-
// vmsif.m set-including-first mask bit
defm VMSIF_M : VALU_MV_VS2<"vmsif.m", 0b010100, 0b00011>;
-
// vmsof.m set-only-first mask bit
defm VMSOF_M : VALU_MV_VS2<"vmsof.m", 0b010100, 0b00010>;
-
// Vector Iota Instruction
-let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in {
defm VIOTA_M : VALU_MV_VS2<"viota.m", 0b010100, 0b10000>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota
// Vector Element Index Instruction
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
-def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VRegOp:$vd),
+def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd),
(ins VMaskOp:$vm), "vid.v", "$vd$vm"> {
let vs2 = 0;
}
// Integer Scalar Move Instructions
-let vm = 1 in {
+let vm = 1, RVVConstraint = NoConstraint in {
def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd),
- (ins VRegOp:$vs2), "vmv.x.s", "$vd, $vs2">;
-def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VRegOp:$vd),
- (ins GPR:$rs1), "vmv.s.x", "$vd, $rs1">;
+ (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">;
+let Constraints = "$vd = $vd_wb" in
+def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb),
+ (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">;
}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+} // Predicates = [HasStdExtV]
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1 in {
+let Predicates = [HasStdExtV, HasStdExtF] in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1,
+ RVVConstraint = NoConstraint in {
// Floating-Point Scalar Move Instructions
def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd),
- (ins VRegOp:$vs2), "vfmv.f.s", "$vd, $vs2">;
-def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VRegOp:$vd),
- (ins FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">;
+ (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">;
+let Constraints = "$vd = $vd_wb" in
+def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb),
+ (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1
+} // Predicates = [HasStdExtV, HasStdExtF]
+let Predicates = [HasStdExtV] in {
// Vector Slide Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
defm VSLIDEUP_V : VALU_IV_X_I<"vslideup", 0b001110, uimm5>;
+defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>;
+defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>;
+} // Predicates = [HasStdExtV]
+let Predicates = [HasStdExtV, HasStdExtF] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
-defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>;
+defm VFSLIDE1UP_V : VALU_FV_F<"vfslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
-defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>;
+defm VFSLIDE1DOWN_V : VALU_FV_F<"vfslide1down", 0b001111>;
+} // Predicates = [HasStdExtV, HasStdExtF]
+let Predicates = [HasStdExtV] in {
// Vector Register Gather Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
defm VRGATHER_V : VALU_IV_V_X_I<"vrgather", 0b001100, uimm5>;
+def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather
// Vector Compress Instruction
@@ -860,10 +1033,11 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in {
defm VCOMPRESS_V : VALU_MV_Mask<"vcompress", 0b010111>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+ RVVConstraint = NoConstraint in {
foreach nf = [1, 2, 4, 8] in {
- def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VRegOp:$vd),
- (ins VRegOp:$vs2), "vmv" # nf # "r.v",
+ def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VR:$vd),
+ (ins VR:$vs2), "vmv" # nf # "r.v",
"$vd, $vs2"> {
let Uses = [];
let vm = 1;
@@ -871,3 +1045,122 @@ foreach nf = [1, 2, 4, 8] in {
}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtZvlsseg] in {
+ foreach nf=2-8 in {
+ def VLSEG#nf#E8_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth8, "vlseg"#nf#"e8.v">;
+ def VLSEG#nf#E16_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth16, "vlseg"#nf#"e16.v">;
+ def VLSEG#nf#E32_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth32, "vlseg"#nf#"e32.v">;
+ def VLSEG#nf#E64_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth64, "vlseg"#nf#"e64.v">;
+
+ def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth8, "vlseg"#nf#"e8ff.v">;
+ def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth16, "vlseg"#nf#"e16ff.v">;
+ def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth32, "vlseg"#nf#"e32ff.v">;
+ def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth64, "vlseg"#nf#"e64ff.v">;
+
+ def VSSEG#nf#E8_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth8, "vsseg"#nf#"e8.v">;
+ def VSSEG#nf#E16_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth16, "vsseg"#nf#"e16.v">;
+ def VSSEG#nf#E32_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth32, "vsseg"#nf#"e32.v">;
+ def VSSEG#nf#E64_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">;
+
+ // Vector Strided Instructions
+ def VLSSEG#nf#E8_V : VStridedSegmentLoad<!add(nf, -1), LSWidth8, "vlsseg"#nf#"e8.v">;
+ def VLSSEG#nf#E16_V : VStridedSegmentLoad<!add(nf, -1), LSWidth16, "vlsseg"#nf#"e16.v">;
+ def VLSSEG#nf#E32_V : VStridedSegmentLoad<!add(nf, -1), LSWidth32, "vlsseg"#nf#"e32.v">;
+ def VLSSEG#nf#E64_V : VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">;
+
+ def VSSSEG#nf#E8_V : VStridedSegmentStore<!add(nf, -1), LSWidth8, "vssseg"#nf#"e8.v">;
+ def VSSSEG#nf#E16_V : VStridedSegmentStore<!add(nf, -1), LSWidth16, "vssseg"#nf#"e16.v">;
+ def VSSSEG#nf#E32_V : VStridedSegmentStore<!add(nf, -1), LSWidth32, "vssseg"#nf#"e32.v">;
+ def VSSSEG#nf#E64_V : VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">;
+
+ // Vector Indexed Instructions
+ def VLUXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
+ LSWidth8, "vluxseg"#nf#"ei8.v">;
+ def VLUXSEG#nf#EI16_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
+ LSWidth16, "vluxseg"#nf#"ei16.v">;
+ def VLUXSEG#nf#EI32_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
+ LSWidth32, "vluxseg"#nf#"ei32.v">;
+ def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
+ LSWidth64, "vluxseg"#nf#"ei64.v">;
+
+ def VLOXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
+ LSWidth8, "vloxseg"#nf#"ei8.v">;
+ def VLOXSEG#nf#EI16_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
+ LSWidth16, "vloxseg"#nf#"ei16.v">;
+ def VLOXSEG#nf#EI32_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
+ LSWidth32, "vloxseg"#nf#"ei32.v">;
+ def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
+ LSWidth64, "vloxseg"#nf#"ei64.v">;
+
+ def VSUXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
+ LSWidth8, "vsuxseg"#nf#"ei8.v">;
+ def VSUXSEG#nf#EI16_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
+ LSWidth16, "vsuxseg"#nf#"ei16.v">;
+ def VSUXSEG#nf#EI32_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
+ LSWidth32, "vsuxseg"#nf#"ei32.v">;
+ def VSUXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
+ LSWidth64, "vsuxseg"#nf#"ei64.v">;
+
+ def VSOXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
+ LSWidth8, "vsoxseg"#nf#"ei8.v">;
+ def VSOXSEG#nf#EI16_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
+ LSWidth16, "vsoxseg"#nf#"ei16.v">;
+ def VSOXSEG#nf#EI32_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
+ LSWidth32, "vsoxseg"#nf#"ei32.v">;
+ def VSOXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
+ LSWidth64, "vsoxseg"#nf#"ei64.v">;
+ }
+} // Predicates = [HasStdExtZvlsseg]
+
+let Predicates = [HasStdExtZvamo, HasStdExtA] in {
+ defm VAMOSWAPEI8 : VAMO<AMOOPVamoSwap, LSWidth8, "vamoswapei8.v">;
+ defm VAMOSWAPEI16 : VAMO<AMOOPVamoSwap, LSWidth16, "vamoswapei16.v">;
+ defm VAMOSWAPEI32 : VAMO<AMOOPVamoSwap, LSWidth32, "vamoswapei32.v">;
+
+ defm VAMOADDEI8 : VAMO<AMOOPVamoAdd, LSWidth8, "vamoaddei8.v">;
+ defm VAMOADDEI16 : VAMO<AMOOPVamoAdd, LSWidth16, "vamoaddei16.v">;
+ defm VAMOADDEI32 : VAMO<AMOOPVamoAdd, LSWidth32, "vamoaddei32.v">;
+
+ defm VAMOXOREI8 : VAMO<AMOOPVamoXor, LSWidth8, "vamoxorei8.v">;
+ defm VAMOXOREI16 : VAMO<AMOOPVamoXor, LSWidth16, "vamoxorei16.v">;
+ defm VAMOXOREI32 : VAMO<AMOOPVamoXor, LSWidth32, "vamoxorei32.v">;
+
+ defm VAMOANDEI8 : VAMO<AMOOPVamoAnd, LSWidth8, "vamoandei8.v">;
+ defm VAMOANDEI16 : VAMO<AMOOPVamoAnd, LSWidth16, "vamoandei16.v">;
+ defm VAMOANDEI32 : VAMO<AMOOPVamoAnd, LSWidth32, "vamoandei32.v">;
+
+ defm VAMOOREI8 : VAMO<AMOOPVamoOr, LSWidth8, "vamoorei8.v">;
+ defm VAMOOREI16 : VAMO<AMOOPVamoOr, LSWidth16, "vamoorei16.v">;
+ defm VAMOOREI32 : VAMO<AMOOPVamoOr, LSWidth32, "vamoorei32.v">;
+
+ defm VAMOMINEI8 : VAMO<AMOOPVamoMin, LSWidth8, "vamominei8.v">;
+ defm VAMOMINEI16 : VAMO<AMOOPVamoMin, LSWidth16, "vamominei16.v">;
+ defm VAMOMINEI32 : VAMO<AMOOPVamoMin, LSWidth32, "vamominei32.v">;
+
+ defm VAMOMAXEI8 : VAMO<AMOOPVamoMax, LSWidth8, "vamomaxei8.v">;
+ defm VAMOMAXEI16 : VAMO<AMOOPVamoMax, LSWidth16, "vamomaxei16.v">;
+ defm VAMOMAXEI32 : VAMO<AMOOPVamoMax, LSWidth32, "vamomaxei32.v">;
+
+ defm VAMOMINUEI8 : VAMO<AMOOPVamoMinu, LSWidth8, "vamominuei8.v">;
+ defm VAMOMINUEI16 : VAMO<AMOOPVamoMinu, LSWidth16, "vamominuei16.v">;
+ defm VAMOMINUEI32 : VAMO<AMOOPVamoMinu, LSWidth32, "vamominuei32.v">;
+
+ defm VAMOMAXUEI8 : VAMO<AMOOPVamoMaxu, LSWidth8, "vamomaxuei8.v">;
+ defm VAMOMAXUEI16 : VAMO<AMOOPVamoMaxu, LSWidth16, "vamomaxuei16.v">;
+ defm VAMOMAXUEI32 : VAMO<AMOOPVamoMaxu, LSWidth32, "vamomaxuei32.v">;
+} // Predicates = [HasStdExtZvamo, HasStdExtA]
+
+let Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64] in {
+ defm VAMOSWAPEI64 : VAMO<AMOOPVamoSwap, LSWidth64, "vamoswapei64.v">;
+ defm VAMOADDEI64 : VAMO<AMOOPVamoAdd, LSWidth64, "vamoaddei64.v">;
+ defm VAMOXOREI64 : VAMO<AMOOPVamoXor, LSWidth64, "vamoxorei64.v">;
+ defm VAMOANDEI64 : VAMO<AMOOPVamoAnd, LSWidth64, "vamoandei64.v">;
+ defm VAMOOREI64 : VAMO<AMOOPVamoOr, LSWidth64, "vamoorei64.v">;
+ defm VAMOMINEI64 : VAMO<AMOOPVamoMin, LSWidth64, "vamominei64.v">;
+ defm VAMOMAXEI64 : VAMO<AMOOPVamoMax, LSWidth64, "vamomaxei64.v">;
+ defm VAMOMINUEI64 : VAMO<AMOOPVamoMinu, LSWidth64, "vamominuei64.v">;
+ defm VAMOMAXUEI64 : VAMO<AMOOPVamoMaxu, LSWidth64, "vamomaxuei64.v">;
+} // Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64]
+
+include "RISCVInstrInfoVPseudos.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
new file mode 100644
index 000000000000..5c228820f0cc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -0,0 +1,4416 @@
+//===-- RISCVInstrInfoVPseudos.td - RISC-V 'V' Pseudos -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file contains the required infrastructure to support code generation
+/// for the standard 'V' (Vector) extension, version 0.10. This version is still
+/// experimental as the 'V' extension hasn't been ratified yet.
+///
+/// This file is included from RISCVInstrInfoV.td
+///
+//===----------------------------------------------------------------------===//
+
+def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>,
+ SDTCisInt<1>]>>;
+def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
+ SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
+
+def riscv_vleff : SDNode<"RISCVISD::VLEFF",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
+ SDTCisVT<2, XLenVT>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+ SDNPSideEffect]>;
+def riscv_vleff_mask : SDNode<"RISCVISD::VLEFF_MASK",
+ SDTypeProfile<1, 4, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisVT<4, XLenVT>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+ SDNPSideEffect]>;
+def riscv_read_vl : SDNode<"RISCVISD::READ_VL",
+ SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>,
+ [SDNPInGlue]>;
+
+// X0 has special meaning for vsetvl/vsetvli.
+// rd | rs1 | AVL value | Effect on vl
+//--------------------------------------------------------------
+// !X0 | X0 | VLMAX | Set vl to VLMAX
+// X0 | X0 | Value in vl | Keep current vl, just change vtype.
+def VLOp : ComplexPattern<XLenVT, 1, "selectVLOp">;
+
+def DecImm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() - 1, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+// This class describes information associated to the LMUL.
+class LMULInfo<int lmul, VReg regclass, VReg wregclass,
+ VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx> {
+ bits<3> value = lmul; // This is encoded as the vlmul field of vtype.
+ VReg vrclass = regclass;
+ VReg wvrclass = wregclass;
+ VReg f8vrclass = f8regclass;
+ VReg f4vrclass = f4regclass;
+ VReg f2vrclass = f2regclass;
+ string MX = mx;
+}
+
+// Associate LMUL with tablegen records of register classes.
+def V_M1 : LMULInfo<0b000, VR, VRM2, VR, VR, VR, "M1">;
+def V_M2 : LMULInfo<0b001, VRM2, VRM4, VR, VR, VR, "M2">;
+def V_M4 : LMULInfo<0b010, VRM4, VRM8, VRM2, VR, VR, "M4">;
+def V_M8 : LMULInfo<0b011, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8">;
+
+def V_MF8 : LMULInfo<0b101, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF8">;
+def V_MF4 : LMULInfo<0b110, VR, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF4">;
+def V_MF2 : LMULInfo<0b111, VR, VR, VR, VR,/*NoVReg*/VR, "MF2">;
+
+// Used to iterate over all possible LMULs.
+def MxList {
+ list<LMULInfo> m = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
+}
+
+class FPR_Info<RegisterClass regclass, string fx> {
+ RegisterClass fprclass = regclass;
+ string FX = fx;
+}
+
+def SCALAR_F16 : FPR_Info<FPR16, "F16">;
+def SCALAR_F32 : FPR_Info<FPR32, "F32">;
+def SCALAR_F64 : FPR_Info<FPR64, "F64">;
+
+def FPList {
+ list<FPR_Info> fpinfo = [SCALAR_F16, SCALAR_F32, SCALAR_F64];
+}
+
+class MxSet<int eew> {
+ list<LMULInfo> m = !cond(!eq(eew, 8) : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
+ !eq(eew, 16) : [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
+ !eq(eew, 32) : [V_MF2, V_M1, V_M2, V_M4, V_M8],
+ !eq(eew, 64) : [V_M1, V_M2, V_M4, V_M8]);
+}
+
+class NFSet<LMULInfo m> {
+ list<int> L = !cond(!eq(m.value, V_M8.value): [],
+ !eq(m.value, V_M4.value): [2],
+ !eq(m.value, V_M2.value): [2, 3, 4],
+ true: [2, 3, 4, 5, 6, 7, 8]);
+}
+
+class shift_amount<int num> {
+ int val = !if(!eq(num, 1), 0, !add(1, shift_amount<!srl(num, 1)>.val));
+}
+
+class octuple_from_str<string MX> {
+ int ret = !cond(!eq(MX, "MF8") : 1,
+ !eq(MX, "MF4") : 2,
+ !eq(MX, "MF2") : 4,
+ !eq(MX, "M1") : 8,
+ !eq(MX, "M2") : 16,
+ !eq(MX, "M4") : 32,
+ !eq(MX, "M8") : 64);
+}
+
+class octuple_to_str<int octuple> {
+ string ret = !if(!eq(octuple, 1), "MF8",
+ !if(!eq(octuple, 2), "MF4",
+ !if(!eq(octuple, 4), "MF2",
+ !if(!eq(octuple, 8), "M1",
+ !if(!eq(octuple, 16), "M2",
+ !if(!eq(octuple, 32), "M4",
+ !if(!eq(octuple, 64), "M8",
+ "NoDef")))))));
+}
+
+// Output pattern for X0 used to represent VLMAX in the pseudo instructions.
+def VLMax : OutPatFrag<(ops), (XLenVT X0)>;
+
+// List of EEW.
+defvar EEWList = [8, 16, 32, 64];
+
+class SegRegClass<LMULInfo m, int nf> {
+ VReg RC = !cast<VReg>("VRN" # nf # !cond(!eq(m.value, V_MF8.value): V_M1.MX,
+ !eq(m.value, V_MF4.value): V_M1.MX,
+ !eq(m.value, V_MF2.value): V_M1.MX,
+ true: m.MX));
+}
+
+//===----------------------------------------------------------------------===//
+// Vector register and vector group type information.
+//===----------------------------------------------------------------------===//
+
+class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
+ ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR>
+{
+ ValueType Vector = Vec;
+ ValueType Mask = Mas;
+ int SEW = Sew;
+ VReg RegClass = Reg;
+ LMULInfo LMul = M;
+ ValueType Scalar = Scal;
+ RegisterClass ScalarRegClass = ScalarReg;
+ // The pattern fragment which produces the AVL operand, representing the
+ // "natural" vector length for this type. For scalable vectors this is VLMax.
+ OutPatFrag AVL = VLMax;
+
+ string ScalarSuffix = !cond(!eq(Scal, XLenVT) : "X",
+ !eq(Scal, f16) : "F16",
+ !eq(Scal, f32) : "F32",
+ !eq(Scal, f64) : "F64");
+}
+
+class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew,
+ VReg Reg, LMULInfo M, ValueType Scal = XLenVT,
+ RegisterClass ScalarReg = GPR>
+ : VTypeInfo<Vec, Mas, Sew, Reg, M, Scal, ScalarReg>
+{
+ ValueType VectorM1 = VecM1;
+}
+
+defset list<VTypeInfo> AllVectors = {
+ defset list<VTypeInfo> AllIntegerVectors = {
+ defset list<VTypeInfo> NoGroupIntegerVectors = {
+ def VI8MF8: VTypeInfo<vint8mf8_t, vbool64_t, 8, VR, V_MF8>;
+ def VI8MF4: VTypeInfo<vint8mf4_t, vbool32_t, 8, VR, V_MF4>;
+ def VI8MF2: VTypeInfo<vint8mf2_t, vbool16_t, 8, VR, V_MF2>;
+ def VI8M1: VTypeInfo<vint8m1_t, vbool8_t, 8, VR, V_M1>;
+ def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, VR, V_MF4>;
+ def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, VR, V_MF2>;
+ def VI16M1: VTypeInfo<vint16m1_t, vbool16_t, 16, VR, V_M1>;
+ def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, VR, V_MF2>;
+ def VI32M1: VTypeInfo<vint32m1_t, vbool32_t, 32, VR, V_M1>;
+ def VI64M1: VTypeInfo<vint64m1_t, vbool64_t, 64, VR, V_M1>;
+ }
+ defset list<GroupVTypeInfo> GroupIntegerVectors = {
+ def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, VRM2, V_M2>;
+ def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, VRM4, V_M4>;
+ def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, VRM8, V_M8>;
+
+ def VI16M2: GroupVTypeInfo<vint16m2_t,vint16m1_t,vbool8_t, 16,VRM2, V_M2>;
+ def VI16M4: GroupVTypeInfo<vint16m4_t,vint16m1_t,vbool4_t, 16,VRM4, V_M4>;
+ def VI16M8: GroupVTypeInfo<vint16m8_t,vint16m1_t,vbool2_t, 16,VRM8, V_M8>;
+
+ def VI32M2: GroupVTypeInfo<vint32m2_t,vint32m1_t,vbool16_t,32,VRM2, V_M2>;
+ def VI32M4: GroupVTypeInfo<vint32m4_t,vint32m1_t,vbool8_t, 32,VRM4, V_M4>;
+ def VI32M8: GroupVTypeInfo<vint32m8_t,vint32m1_t,vbool4_t, 32,VRM8, V_M8>;
+
+ def VI64M2: GroupVTypeInfo<vint64m2_t,vint64m1_t,vbool32_t,64,VRM2, V_M2>;
+ def VI64M4: GroupVTypeInfo<vint64m4_t,vint64m1_t,vbool16_t,64,VRM4, V_M4>;
+ def VI64M8: GroupVTypeInfo<vint64m8_t,vint64m1_t,vbool8_t, 64,VRM8, V_M8>;
+ }
+ }
+
+ defset list<VTypeInfo> AllFloatVectors = {
+ defset list<VTypeInfo> NoGroupFloatVectors = {
+ def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, VR, V_MF4, f16, FPR16>;
+ def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, VR, V_MF2, f16, FPR16>;
+ def VF16M1: VTypeInfo<vfloat16m1_t, vbool16_t, 16, VR, V_M1, f16, FPR16>;
+
+ def VF32MF2: VTypeInfo<vfloat32mf2_t,vbool64_t, 32, VR, V_MF2, f32, FPR32>;
+ def VF32M1: VTypeInfo<vfloat32m1_t, vbool32_t, 32, VR, V_M1, f32, FPR32>;
+
+ def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, VR, V_M1, f64, FPR64>;
+ }
+
+ defset list<GroupVTypeInfo> GroupFloatVectors = {
+ def VF16M2: GroupVTypeInfo<vfloat16m2_t, vfloat16m1_t, vbool8_t, 16,
+ VRM2, V_M2, f16, FPR16>;
+ def VF16M4: GroupVTypeInfo<vfloat16m4_t, vfloat16m1_t, vbool4_t, 16,
+ VRM4, V_M4, f16, FPR16>;
+ def VF16M8: GroupVTypeInfo<vfloat16m8_t, vfloat16m1_t, vbool2_t, 16,
+ VRM8, V_M8, f16, FPR16>;
+
+ def VF32M2: GroupVTypeInfo<vfloat32m2_t, vfloat32m1_t, vbool16_t, 32,
+ VRM2, V_M2, f32, FPR32>;
+ def VF32M4: GroupVTypeInfo<vfloat32m4_t, vfloat32m1_t, vbool8_t, 32,
+ VRM4, V_M4, f32, FPR32>;
+ def VF32M8: GroupVTypeInfo<vfloat32m8_t, vfloat32m1_t, vbool4_t, 32,
+ VRM8, V_M8, f32, FPR32>;
+
+ def VF64M2: GroupVTypeInfo<vfloat64m2_t, vfloat64m1_t, vbool32_t, 64,
+ VRM2, V_M2, f64, FPR64>;
+ def VF64M4: GroupVTypeInfo<vfloat64m4_t, vfloat64m1_t, vbool16_t, 64,
+ VRM4, V_M4, f64, FPR64>;
+ def VF64M8: GroupVTypeInfo<vfloat64m8_t, vfloat64m1_t, vbool8_t, 64,
+ VRM8, V_M8, f64, FPR64>;
+ }
+ }
+}
+
+// This functor is used to obtain the int vector type that has the same SEW and
+// multiplier as the input parameter type
+class GetIntVTypeInfo<VTypeInfo vti>
+{
+ // Equivalent integer vector type. Eg.
+ // VI8M1 → VI8M1 (identity)
+ // VF64M4 → VI64M4
+ VTypeInfo Vti = !cast<VTypeInfo>(!subst("VF", "VI", !cast<string>(vti)));
+}
+
+class MTypeInfo<ValueType Mas, LMULInfo M, string Bx> {
+ ValueType Mask = Mas;
+ // {SEW, VLMul} values set a valid VType to deal with this mask type.
+ // we assume SEW=8 and set corresponding LMUL.
+ int SEW = 8;
+ LMULInfo LMul = M;
+ string BX = Bx; // Appendix of mask operations.
+ // The pattern fragment which produces the AVL operand, representing the
+ // "natural" vector length for this mask type. For scalable masks this is
+ // VLMax.
+ OutPatFrag AVL = VLMax;
+}
+
+defset list<MTypeInfo> AllMasks = {
+ // vbool<n>_t, <n> = SEW/LMUL, we assume SEW=8 and corresponding LMUL.
+ def : MTypeInfo<vbool64_t, V_MF8, "B1">;
+ def : MTypeInfo<vbool32_t, V_MF4, "B2">;
+ def : MTypeInfo<vbool16_t, V_MF2, "B4">;
+ def : MTypeInfo<vbool8_t, V_M1, "B8">;
+ def : MTypeInfo<vbool4_t, V_M2, "B16">;
+ def : MTypeInfo<vbool2_t, V_M4, "B32">;
+ def : MTypeInfo<vbool1_t, V_M8, "B64">;
+}
+
+class VTypeInfoToWide<VTypeInfo vti, VTypeInfo wti>
+{
+ VTypeInfo Vti = vti;
+ VTypeInfo Wti = wti;
+}
+
+class VTypeInfoToFraction<VTypeInfo vti, VTypeInfo fti>
+{
+ VTypeInfo Vti = vti;
+ VTypeInfo Fti = fti;
+}
+
+defset list<VTypeInfoToWide> AllWidenableIntVectors = {
+ def : VTypeInfoToWide<VI8MF8, VI16MF4>;
+ def : VTypeInfoToWide<VI8MF4, VI16MF2>;
+ def : VTypeInfoToWide<VI8MF2, VI16M1>;
+ def : VTypeInfoToWide<VI8M1, VI16M2>;
+ def : VTypeInfoToWide<VI8M2, VI16M4>;
+ def : VTypeInfoToWide<VI8M4, VI16M8>;
+
+ def : VTypeInfoToWide<VI16MF4, VI32MF2>;
+ def : VTypeInfoToWide<VI16MF2, VI32M1>;
+ def : VTypeInfoToWide<VI16M1, VI32M2>;
+ def : VTypeInfoToWide<VI16M2, VI32M4>;
+ def : VTypeInfoToWide<VI16M4, VI32M8>;
+
+ def : VTypeInfoToWide<VI32MF2, VI64M1>;
+ def : VTypeInfoToWide<VI32M1, VI64M2>;
+ def : VTypeInfoToWide<VI32M2, VI64M4>;
+ def : VTypeInfoToWide<VI32M4, VI64M8>;
+}
+
+defset list<VTypeInfoToWide> AllWidenableFloatVectors = {
+ def : VTypeInfoToWide<VF16MF4, VF32MF2>;
+ def : VTypeInfoToWide<VF16MF2, VF32M1>;
+ def : VTypeInfoToWide<VF16M1, VF32M2>;
+ def : VTypeInfoToWide<VF16M2, VF32M4>;
+ def : VTypeInfoToWide<VF16M4, VF32M8>;
+
+ def : VTypeInfoToWide<VF32MF2, VF64M1>;
+ def : VTypeInfoToWide<VF32M1, VF64M2>;
+ def : VTypeInfoToWide<VF32M2, VF64M4>;
+ def : VTypeInfoToWide<VF32M4, VF64M8>;
+}
+
+defset list<VTypeInfoToFraction> AllFractionableVF2IntVectors = {
+ def : VTypeInfoToFraction<VI16MF4, VI8MF8>;
+ def : VTypeInfoToFraction<VI16MF2, VI8MF4>;
+ def : VTypeInfoToFraction<VI16M1, VI8MF2>;
+ def : VTypeInfoToFraction<VI16M2, VI8M1>;
+ def : VTypeInfoToFraction<VI16M4, VI8M2>;
+ def : VTypeInfoToFraction<VI16M8, VI8M4>;
+ def : VTypeInfoToFraction<VI32MF2, VI16MF4>;
+ def : VTypeInfoToFraction<VI32M1, VI16MF2>;
+ def : VTypeInfoToFraction<VI32M2, VI16M1>;
+ def : VTypeInfoToFraction<VI32M4, VI16M2>;
+ def : VTypeInfoToFraction<VI32M8, VI16M4>;
+ def : VTypeInfoToFraction<VI64M1, VI32MF2>;
+ def : VTypeInfoToFraction<VI64M2, VI32M1>;
+ def : VTypeInfoToFraction<VI64M4, VI32M2>;
+ def : VTypeInfoToFraction<VI64M8, VI32M4>;
+}
+
+defset list<VTypeInfoToFraction> AllFractionableVF4IntVectors = {
+ def : VTypeInfoToFraction<VI32MF2, VI8MF8>;
+ def : VTypeInfoToFraction<VI32M1, VI8MF4>;
+ def : VTypeInfoToFraction<VI32M2, VI8MF2>;
+ def : VTypeInfoToFraction<VI32M4, VI8M1>;
+ def : VTypeInfoToFraction<VI32M8, VI8M2>;
+ def : VTypeInfoToFraction<VI64M1, VI16MF4>;
+ def : VTypeInfoToFraction<VI64M2, VI16MF2>;
+ def : VTypeInfoToFraction<VI64M4, VI16M1>;
+ def : VTypeInfoToFraction<VI64M8, VI16M2>;
+}
+
+defset list<VTypeInfoToFraction> AllFractionableVF8IntVectors = {
+ def : VTypeInfoToFraction<VI64M1, VI8MF8>;
+ def : VTypeInfoToFraction<VI64M2, VI8MF4>;
+ def : VTypeInfoToFraction<VI64M4, VI8MF2>;
+ def : VTypeInfoToFraction<VI64M8, VI8M1>;
+}
+
+defset list<VTypeInfoToWide> AllWidenableIntToFloatVectors = {
+ def : VTypeInfoToWide<VI8MF8, VF16MF4>;
+ def : VTypeInfoToWide<VI8MF4, VF16MF2>;
+ def : VTypeInfoToWide<VI8MF2, VF16M1>;
+ def : VTypeInfoToWide<VI8M1, VF16M2>;
+ def : VTypeInfoToWide<VI8M2, VF16M4>;
+ def : VTypeInfoToWide<VI8M4, VF16M8>;
+
+ def : VTypeInfoToWide<VI16MF4, VF32MF2>;
+ def : VTypeInfoToWide<VI16MF2, VF32M1>;
+ def : VTypeInfoToWide<VI16M1, VF32M2>;
+ def : VTypeInfoToWide<VI16M2, VF32M4>;
+ def : VTypeInfoToWide<VI16M4, VF32M8>;
+
+ def : VTypeInfoToWide<VI32MF2, VF64M1>;
+ def : VTypeInfoToWide<VI32M1, VF64M2>;
+ def : VTypeInfoToWide<VI32M2, VF64M4>;
+ def : VTypeInfoToWide<VI32M4, VF64M8>;
+}
+
+// This class holds the record of the RISCVVPseudoTable below.
+// This represents the information we need in codegen for each pseudo.
+// The definition should be consistent with `struct PseudoInfo` in
+// RISCVBaseInfo.h.
+class CONST8b<bits<8> val> {
+ bits<8> V = val;
+}
+def InvalidIndex : CONST8b<0x80>;
+class RISCVVPseudo {
+ Pseudo Pseudo = !cast<Pseudo>(NAME); // Used as a key.
+ Instruction BaseInstr;
+}
+
+// The actual table.
+def RISCVVPseudosTable : GenericTable {
+ let FilterClass = "RISCVVPseudo";
+ let CppTypeName = "PseudoInfo";
+ let Fields = [ "Pseudo", "BaseInstr" ];
+ let PrimaryKey = [ "Pseudo" ];
+ let PrimaryKeyName = "getPseudoInfo";
+}
+
+def RISCVVIntrinsicsTable : GenericTable {
+ let FilterClass = "RISCVVIntrinsic";
+ let CppTypeName = "RISCVVIntrinsicInfo";
+ let Fields = ["IntrinsicID", "ExtendOperand"];
+ let PrimaryKey = ["IntrinsicID"];
+ let PrimaryKeyName = "getRISCVVIntrinsicInfo";
+}
+
+class RISCVZvlsseg<string IntrName, bits<11> S, bits<3> L, bits<3> IL = V_M1.value> {
+ Intrinsic IntrinsicID = !cast<Intrinsic>(IntrName);
+ bits<11> SEW = S;
+ bits<3> LMUL = L;
+ bits<3> IndexLMUL = IL;
+ Pseudo Pseudo = !cast<Pseudo>(NAME);
+}
+
+def RISCVZvlssegTable : GenericTable {
+ let FilterClass = "RISCVZvlsseg";
+ let Fields = ["IntrinsicID", "SEW", "LMUL", "IndexLMUL", "Pseudo"];
+ let PrimaryKey = ["IntrinsicID", "SEW", "LMUL", "IndexLMUL"];
+ let PrimaryKeyName = "getPseudo";
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers to define the different pseudo instructions.
+//===----------------------------------------------------------------------===//
+
+class PseudoToVInst<string PseudoInst> {
+ string VInst = !subst("_M8", "",
+ !subst("_M4", "",
+ !subst("_M2", "",
+ !subst("_M1", "",
+ !subst("_MF2", "",
+ !subst("_MF4", "",
+ !subst("_MF8", "",
+ !subst("_B1", "",
+ !subst("_B2", "",
+ !subst("_B4", "",
+ !subst("_B8", "",
+ !subst("_B16", "",
+ !subst("_B32", "",
+ !subst("_B64", "",
+ !subst("_MASK", "",
+ !subst("F16", "F",
+ !subst("F32", "F",
+ !subst("F64", "F",
+ !subst("Pseudo", "", PseudoInst)))))))))))))))))));
+}
+
+class ToLowerCase<string Upper> {
+ string L = !subst("FF", "ff",
+ !subst("VLSEG", "vlseg",
+ !subst("VLSSEG", "vlsseg",
+ !subst("VSSEG", "vsseg",
+ !subst("VSSSEG", "vssseg",
+ !subst("VLOXSEG", "vloxseg",
+ !subst("VLUXSEG", "vluxseg",
+ !subst("VSOXSEG", "vsoxseg",
+ !subst("VSUXSEG", "vsuxseg", Upper)))))))));
+}
+
+// Example: PseudoVLSEG2E32_V_M2 -> int_riscv_vlseg2
+// Example: PseudoVLSEG2E32_V_M2_MASK -> int_riscv_vlseg2_mask
+class PseudoToIntrinsic<string PseudoInst, bit IsMasked> {
+ string Intrinsic = !strconcat("int_riscv_",
+ ToLowerCase<
+ !subst("E8", "",
+ !subst("E16", "",
+ !subst("E32", "",
+ !subst("E64", "",
+ !subst("EI8", "",
+ !subst("EI16", "",
+ !subst("EI32", "",
+ !subst("EI64", "",
+ !subst("_V", "", PseudoToVInst<PseudoInst>.VInst)))))))))>.L,
+ !if(IsMasked, "_mask", ""));
+}
+
+// The destination vector register group for a masked vector instruction cannot
+// overlap the source mask register (v0), unless the destination vector register
+// is being written with a mask value (e.g., comparisons) or the scalar result
+// of a reduction.
+class GetVRegNoV0<VReg VRegClass> {
+ VReg R = !cond(!eq(VRegClass, VR) : VRNoV0,
+ !eq(VRegClass, VRM2) : VRM2NoV0,
+ !eq(VRegClass, VRM4) : VRM4NoV0,
+ !eq(VRegClass, VRM8) : VRM8NoV0,
+ !eq(1, 1) : VRegClass);
+}
+
+// Join strings in list using separator and ignoring empty elements
+class Join<list<string> strings, string separator> {
+ string ret = !foldl(!head(strings), !tail(strings), a, b,
+ !cond(
+ !and(!empty(a), !empty(b)) : "",
+ !empty(a) : b,
+ !empty(b) : a,
+ 1 : a#separator#b));
+}
+
+class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins> :
+ Pseudo<outs, ins, []>, RISCVVPseudo {
+ let BaseInstr = instr;
+ let VLMul = m.value;
+}
+
+class VPseudoUSLoadNoMask<VReg RetClass>:
+ Pseudo<(outs RetClass:$rd),
+ (ins GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoUSLoadMask<VReg RetClass>:
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPR:$rs1,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "$rd = $merge";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSLoadNoMask<VReg RetClass>:
+ Pseudo<(outs RetClass:$rd),
+ (ins GPR:$rs1, GPR:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSLoadMask<VReg RetClass>:
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPR:$rs1, GPR:$rs2,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "$rd = $merge";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass>:
+ Pseudo<(outs RetClass:$rd),
+ (ins GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoILoadMask<VReg RetClass, VReg IdxClass>:
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPR:$rs1, IdxClass:$rs2,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "$rd = $merge";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoUSStoreNoMask<VReg StClass>:
+ Pseudo<(outs),
+ (ins StClass:$rd, GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoUSStoreMask<VReg StClass>:
+ Pseudo<(outs),
+ (ins StClass:$rd, GPR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSStoreNoMask<VReg StClass>:
+ Pseudo<(outs),
+ (ins StClass:$rd, GPR:$rs1, GPR:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSStoreMask<VReg StClass>:
+ Pseudo<(outs),
+ (ins StClass:$rd, GPR:$rs1, GPR:$rs2, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+// Unary instruction that is never masked so HasDummyMask=0.
+class VPseudoUnaryNoDummyMask<VReg RetClass,
+ DAGOperand Op2Class> :
+ Pseudo<(outs RetClass:$rd),
+ (ins Op2Class:$rs1, GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoNullaryNoMask<VReg RegClass>:
+ Pseudo<(outs RegClass:$rd),
+ (ins GPR:$vl, ixlenimm:$sew),
+ []>, RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoNullaryMask<VReg RegClass>:
+ Pseudo<(outs GetVRegNoV0<RegClass>.R:$rd),
+ (ins GetVRegNoV0<RegClass>.R:$merge, VMaskOp:$vm, GPR:$vl,
+ ixlenimm:$sew), []>, RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints ="$rd = $merge";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+// Nullary for pseudo instructions. They are expanded in
+// RISCVExpandPseudoInsts pass.
+class VPseudoNullaryPseudoM<string BaseInst>
+ : Pseudo<(outs VR:$rd), (ins GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ // BaseInstr is not used in RISCVExpandPseudoInsts pass.
+ // Just fill a corresponding real v-inst to pass tablegen check.
+ let BaseInstr = !cast<Instruction>(BaseInst);
+}
+
+// RetClass could be GPR or VReg.
+class VPseudoUnaryNoMask<DAGOperand RetClass, VReg OpClass, string Constraint = ""> :
+ Pseudo<(outs RetClass:$rd),
+ (ins OpClass:$rs2, GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = Constraint;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+// mask unary operation without maskedoff
+class VPseudoMaskUnarySOutMask:
+ Pseudo<(outs GPR:$rd),
+ (ins VR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+// Masked mask operation have no $rd=$merge constraints
+class VPseudoUnaryMOutMask:
+ Pseudo<(outs VR:$rd),
+ (ins VR:$merge, VR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "$rd = $merge";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+// Mask can be V0~V31
+class VPseudoUnaryAnyMask<VReg RetClass,
+ VReg Op1Class> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge,
+ Op1Class:$rs2,
+ VR:$vm, GPR:$vl, ixlenimm:$sew),
+ []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "@earlyclobber $rd, $rd = $merge";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoBinaryNoMask<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins Op1Class:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = Constraint;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass>:
+ Pseudo<(outs),
+ (ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoIStoreMask<VReg StClass, VReg IdxClass>:
+ Pseudo<(outs),
+ (ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoBinaryMask<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoBinaryCarryIn<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ bit CarryIn,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ !if(CarryIn,
+ (ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, GPR:$vl,
+ ixlenimm:$sew),
+ (ins Op1Class:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew)), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = Constraint;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 0;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let VLMul = MInfo.value;
+}
+
+class VPseudoTernaryNoMask<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ GPR:$vl, ixlenimm:$sew),
+ []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoAMOWDNoMask<VReg RetClass,
+ VReg Op1Class> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
+ (ins GPR:$rs1,
+ Op1Class:$vs2,
+ GetVRegNoV0<RetClass>.R:$vd,
+ GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 1;
+ let usesCustomInserter = 1;
+ let Constraints = "$vd_wd = $vd";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoAMOWDMask<VReg RetClass,
+ VReg Op1Class> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
+ (ins GPR:$rs1,
+ Op1Class:$vs2,
+ GetVRegNoV0<RetClass>.R:$vd,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 1;
+ let usesCustomInserter = 1;
+ let Constraints = "$vd_wd = $vd";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+multiclass VPseudoAMOEI<int eew> {
+ // Standard scalar AMO supports 32, 64, and 128 Mem data bits,
+ // and in the base vector "V" extension, only SEW up to ELEN = max(XLEN, FLEN)
+ // are required to be supported.
+ // therefore only [32, 64] is allowed here.
+ foreach sew = [32, 64] in {
+ foreach lmul = MxSet<sew>.m in {
+ defvar octuple_lmul = octuple_from_str<lmul.MX>.ret;
+ // Calculate emul = eew * lmul / sew
+ defvar octuple_emul = !srl(!mul(eew, octuple_lmul), shift_amount<sew>.val);
+ if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
+ defvar emulMX = octuple_to_str<octuple_emul>.ret;
+ defvar lmulMX = octuple_to_str<octuple_lmul>.ret;
+ defvar emul= !cast<LMULInfo>("V_" # emulMX);
+ defvar lmul = !cast<LMULInfo>("V_" # lmulMX);
+ let VLMul = lmul.value in {
+ def "_WD_" # lmulMX # "_" # emulMX : VPseudoAMOWDNoMask<lmul.vrclass, emul.vrclass>;
+ def "_WD_" # lmulMX # "_" # emulMX # "_MASK" : VPseudoAMOWDMask<lmul.vrclass, emul.vrclass>;
+ }
+ }
+ }
+ }
+}
+
+multiclass VPseudoAMO {
+ foreach eew = EEWList in
+ defm "EI" # eew : VPseudoAMOEI<eew>;
+}
+
+class VPseudoUSSegLoadNoMask<VReg RetClass, bits<11> EEW>:
+ Pseudo<(outs RetClass:$rd),
+ (ins GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoUSSegLoadMask<VReg RetClass, bits<11> EEW>:
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "$rd = $merge";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSSegLoadNoMask<VReg RetClass, bits<11> EEW>:
+ Pseudo<(outs RetClass:$rd),
+ (ins GPR:$rs1, GPR:$offset, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSSegLoadMask<VReg RetClass, bits<11> EEW>:
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
+ GPR:$offset, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "$rd = $merge";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, bits<11> EEW, bits<3> LMUL>:
+ Pseudo<(outs RetClass:$rd),
+ (ins GPR:$rs1, IdxClass:$offset, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul, LMUL> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ // For vector indexed segment loads, the destination vector register groups
+ // cannot overlap the source vector register group
+ let Constraints = "@earlyclobber $rd";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, bits<11> EEW, bits<3> LMUL>:
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
+ IdxClass:$offset, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul, LMUL> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ // For vector indexed segment loads, the destination vector register groups
+ // cannot overlap the source vector register group
+ let Constraints = "@earlyclobber $rd, $rd = $merge";
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoUSSegStoreNoMask<VReg ValClass, bits<11> EEW>:
+ Pseudo<(outs),
+ (ins ValClass:$rd, GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul> {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoUSSegStoreMask<VReg ValClass, bits<11> EEW>:
+ Pseudo<(outs),
+ (ins ValClass:$rd, GPR:$rs1,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul> {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSSegStoreNoMask<VReg ValClass, bits<11> EEW>:
+ Pseudo<(outs),
+ (ins ValClass:$rd, GPR:$rs1, GPR: $offset, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul> {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSSegStoreMask<VReg ValClass, bits<11> EEW>:
+ Pseudo<(outs),
+ (ins ValClass:$rd, GPR:$rs1, GPR: $offset,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul> {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, bits<11> EEW, bits<3> LMUL>:
+ Pseudo<(outs),
+ (ins ValClass:$rd, GPR:$rs1, IdxClass: $index,
+ GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul, LMUL> {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, bits<11> EEW, bits<3> LMUL>:
+ Pseudo<(outs),
+ (ins ValClass:$rd, GPR:$rs1, IdxClass: $index,
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul, LMUL> {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let usesCustomInserter = 1;
+ let Uses = [VL, VTYPE];
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+multiclass VPseudoUSLoad {
+ foreach lmul = MxList.m in {
+ defvar LInfo = lmul.MX;
+ defvar vreg = lmul.vrclass;
+ let VLMul = lmul.value in {
+ def "_V_" # LInfo : VPseudoUSLoadNoMask<vreg>;
+ def "_V_" # LInfo # "_MASK" : VPseudoUSLoadMask<vreg>;
+ }
+ }
+}
+
+multiclass VPseudoLoadMask {
+ foreach mti = AllMasks in {
+ let VLMul = mti.LMul.value in {
+ def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR>;
+ }
+ }
+}
+
+multiclass VPseudoSLoad {
+ foreach lmul = MxList.m in {
+ defvar LInfo = lmul.MX;
+ defvar vreg = lmul.vrclass;
+ let VLMul = lmul.value in {
+ def "_V_" # LInfo : VPseudoSLoadNoMask<vreg>;
+ def "_V_" # LInfo # "_MASK" : VPseudoSLoadMask<vreg>;
+ }
+ }
+}
+
+multiclass VPseudoILoad {
+ foreach lmul = MxList.m in
+ foreach idx_lmul = MxList.m in {
+ defvar LInfo = lmul.MX;
+ defvar Vreg = lmul.vrclass;
+ defvar IdxLInfo = idx_lmul.MX;
+ defvar IdxVreg = idx_lmul.vrclass;
+ let VLMul = lmul.value in {
+ def "_V_" # IdxLInfo # "_" # LInfo : VPseudoILoadNoMask<Vreg, IdxVreg>;
+ def "_V_" # IdxLInfo # "_" # LInfo # "_MASK" : VPseudoILoadMask<Vreg, IdxVreg>;
+ }
+ }
+}
+
+multiclass VPseudoUSStore {
+ foreach lmul = MxList.m in {
+ defvar LInfo = lmul.MX;
+ defvar vreg = lmul.vrclass;
+ let VLMul = lmul.value in {
+ def "_V_" # LInfo : VPseudoUSStoreNoMask<vreg>;
+ def "_V_" # LInfo # "_MASK" : VPseudoUSStoreMask<vreg>;
+ }
+ }
+}
+
+multiclass VPseudoStoreMask {
+ foreach mti = AllMasks in {
+ let VLMul = mti.LMul.value in {
+ def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR>;
+ }
+ }
+}
+
+multiclass VPseudoSStore {
+ foreach lmul = MxList.m in {
+ defvar LInfo = lmul.MX;
+ defvar vreg = lmul.vrclass;
+ let VLMul = lmul.value in {
+ def "_V_" # LInfo : VPseudoSStoreNoMask<vreg>;
+ def "_V_" # LInfo # "_MASK" : VPseudoSStoreMask<vreg>;
+ }
+ }
+}
+
+multiclass VPseudoIStore {
+ foreach lmul = MxList.m in
+ foreach idx_lmul = MxList.m in {
+ defvar LInfo = lmul.MX;
+ defvar Vreg = lmul.vrclass;
+ defvar IdxLInfo = idx_lmul.MX;
+ defvar IdxVreg = idx_lmul.vrclass;
+ let VLMul = lmul.value in {
+ def "_V_" # IdxLInfo # "_" # LInfo : VPseudoIStoreNoMask<Vreg, IdxVreg>;
+ def "_V_" # IdxLInfo # "_" # LInfo # "_MASK" : VPseudoIStoreMask<Vreg, IdxVreg>;
+ }
+ }
+}
+
+multiclass VPseudoUnaryS_M {
+ foreach mti = AllMasks in
+ {
+ let VLMul = mti.LMul.value in {
+ def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>;
+ def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask;
+ }
+ }
+}
+
+multiclass VPseudoUnaryM_M {
+ defvar constraint = "@earlyclobber $rd";
+ foreach mti = AllMasks in
+ {
+ let VLMul = mti.LMul.value in {
+ def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>;
+ def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>;
+ }
+ }
+}
+
+multiclass VPseudoMaskNullaryV {
+ foreach m = MxList.m in {
+ let VLMul = m.value in {
+ def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>;
+ def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>;
+ }
+ }
+}
+
+multiclass VPseudoNullaryPseudoM <string BaseInst> {
+ foreach mti = AllMasks in {
+ let VLMul = mti.LMul.value in {
+ def "_M_" # mti.BX : VPseudoNullaryPseudoM<BaseInst # "_MM">;
+ }
+ }
+}
+
+multiclass VPseudoUnaryV_M {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m in {
+ let VLMul = m.value in {
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>;
+ }
+ }
+}
+
+multiclass VPseudoUnaryV_V_AnyMask {
+ foreach m = MxList.m in {
+ let VLMul = m.value in
+ def _VM # "_" # m.MX : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>;
+ }
+}
+
+multiclass VPseudoBinary<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in {
+ def "_" # MInfo.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
+ Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class,
+ Constraint>;
+ }
+}
+
+multiclass VPseudoBinaryEmul<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo lmul,
+ LMULInfo emul,
+ string Constraint = ""> {
+ let VLMul = lmul.value in {
+ def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
+ Constraint>;
+ def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class,
+ Constraint>;
+ }
+}
+
+multiclass VPseudoBinaryV_VV<string Constraint = ""> {
+ foreach m = MxList.m in
+ defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
+}
+
+multiclass VPseudoBinaryV_VV_EEW<int eew, string Constraint = ""> {
+ foreach m = MxList.m in {
+ foreach sew = EEWList in {
+ defvar octuple_lmul = octuple_from_str<m.MX>.ret;
+ // emul = lmul * eew / sew
+ defvar octuple_emul = !srl(!mul(octuple_lmul, eew), shift_amount<sew>.val);
+ if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
+ defvar emulMX = octuple_to_str<octuple_emul>.ret;
+ defvar emul = !cast<LMULInfo>("V_" # emulMX);
+ defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint>;
+ }
+ }
+ }
+}
+
+multiclass VPseudoBinaryV_VX<string Constraint = ""> {
+ foreach m = MxList.m in
+ defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>;
+}
+
+multiclass VPseudoBinaryV_VF<string Constraint = ""> {
+ foreach m = MxList.m in
+ foreach f = FPList.fpinfo in
+ defm "_V" # f.FX : VPseudoBinary<m.vrclass, m.vrclass,
+ f.fprclass, m, Constraint>;
+}
+
+multiclass VPseudoBinaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
+ foreach m = MxList.m in
+ defm _VI : VPseudoBinary<m.vrclass, m.vrclass, ImmType, m, Constraint>;
+}
+
+multiclass VPseudoBinaryM_MM {
+ foreach m = MxList.m in
+ let VLMul = m.value in {
+ def "_MM_" # m.MX : VPseudoBinaryNoMask<VR, VR, VR, "">;
+ }
+}
+
+// We use earlyclobber here due to
+// * The destination EEW is smaller than the source EEW and the overlap is
+// in the lowest-numbered part of the source register group is legal.
+// Otherwise, it is illegal.
+// * The destination EEW is greater than the source EEW, the source EMUL is
+// at least 1, and the overlap is in the highest-numbered part of the
+// destination register group is legal. Otherwise, it is illegal.
+multiclass VPseudoBinaryW_VV {
+ foreach m = MxList.m[0-5] in
+ defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m,
+ "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryW_VX {
+ foreach m = MxList.m[0-5] in
+ defm "_VX" : VPseudoBinary<m.wvrclass, m.vrclass, GPR, m,
+ "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryW_VF {
+ foreach m = MxList.m[0-5] in
+ foreach f = FPList.fpinfo[0-1] in
+ defm "_V" # f.FX : VPseudoBinary<m.wvrclass, m.vrclass,
+ f.fprclass, m,
+ "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryW_WV {
+ foreach m = MxList.m[0-5] in
+ defm _WV : VPseudoBinary<m.wvrclass, m.wvrclass, m.vrclass, m,
+ "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryW_WX {
+ foreach m = MxList.m[0-5] in
+ defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m,
+ "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryW_WF {
+ foreach m = MxList.m[0-5] in
+ foreach f = FPList.fpinfo[0-1] in
+ defm "_W" # f.FX : VPseudoBinary<m.wvrclass, m.wvrclass,
+ f.fprclass, m,
+ "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryV_WV {
+ foreach m = MxList.m[0-5] in
+ defm _WV : VPseudoBinary<m.vrclass, m.wvrclass, m.vrclass, m,
+ "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryV_WX {
+ foreach m = MxList.m[0-5] in
+ defm _WX : VPseudoBinary<m.vrclass, m.wvrclass, GPR, m,
+ "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryV_WI {
+ foreach m = MxList.m[0-5] in
+ defm _WI : VPseudoBinary<m.vrclass, m.wvrclass, uimm5, m,
+ "@earlyclobber $rd">;
+}
+
+// For vadc and vsbc, the instruction encoding is reserved if the destination
+// vector register is v0.
+// For vadc and vsbc, CarryIn == 1 and CarryOut == 0
+multiclass VPseudoBinaryV_VM<bit CarryOut = 0, bit CarryIn = 1,
+ string Constraint = ""> {
+ foreach m = MxList.m in
+ def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX :
+ VPseudoBinaryCarryIn<!if(CarryOut, VR,
+ !if(!and(CarryIn, !not(CarryOut)),
+ GetVRegNoV0<m.vrclass>.R, m.vrclass)),
+ m.vrclass, m.vrclass, m, CarryIn, Constraint>;
+}
+
+multiclass VPseudoBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1,
+ string Constraint = ""> {
+ foreach m = MxList.m in
+ def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX :
+ VPseudoBinaryCarryIn<!if(CarryOut, VR,
+ !if(!and(CarryIn, !not(CarryOut)),
+ GetVRegNoV0<m.vrclass>.R, m.vrclass)),
+ m.vrclass, GPR, m, CarryIn, Constraint>;
+}
+
+multiclass VPseudoBinaryV_FM {
+ foreach m = MxList.m in
+ foreach f = FPList.fpinfo in
+ def "_V" # f.FX # "M_" # m.MX :
+ VPseudoBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">;
+}
+
+multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1,
+ string Constraint = ""> {
+ foreach m = MxList.m in
+ def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX :
+ VPseudoBinaryCarryIn<!if(CarryOut, VR,
+ !if(!and(CarryIn, !not(CarryOut)),
+ GetVRegNoV0<m.vrclass>.R, m.vrclass)),
+ m.vrclass, simm5, m, CarryIn, Constraint>;
+}
+
+multiclass VPseudoUnaryV_V_X_I_NoDummyMask {
+ foreach m = MxList.m in {
+ let VLMul = m.value in {
+ def "_V_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, m.vrclass>;
+ def "_X_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, GPR>;
+ def "_I_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, simm5>;
+ }
+ }
+}
+
+multiclass VPseudoUnaryV_F_NoDummyMask {
+ foreach m = MxList.m in {
+ foreach f = FPList.fpinfo in {
+ let VLMul = m.value in {
+ def "_" # f.FX # "_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, f.fprclass>;
+ }
+ }
+ }
+}
+
+multiclass VPseudoUnaryV_V {
+ foreach m = MxList.m in {
+ let VLMul = m.value in {
+ def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
+ def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>;
+ }
+ }
+}
+
+multiclass PseudoUnaryV_VF2 {
+ defvar constraints = "@earlyclobber $rd";
+ foreach m = MxList.m[1-6] in
+ {
+ let VLMul = m.value in {
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f2vrclass,
+ constraints>;
+ }
+ }
+}
+
+multiclass PseudoUnaryV_VF4 {
+ defvar constraints = "@earlyclobber $rd";
+ foreach m = MxList.m[2-6] in
+ {
+ let VLMul = m.value in {
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f4vrclass,
+ constraints>;
+ }
+ }
+}
+
+multiclass PseudoUnaryV_VF8 {
+ defvar constraints = "@earlyclobber $rd";
+ foreach m = MxList.m[3-6] in
+ {
+ let VLMul = m.value in {
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f8vrclass,
+ constraints>;
+ }
+ }
+}
+
+// The destination EEW is 1.
+// The source EEW is 8, 16, 32, or 64.
+// When the destination EEW is different from source EEW, we need to use
+// @earlyclobber to avoid the overlap between destination and source registers.
+multiclass VPseudoBinaryM_VV {
+ foreach m = MxList.m in
+ defm _VV : VPseudoBinary<VR, m.vrclass, m.vrclass, m, "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryM_VX {
+ foreach m = MxList.m in
+ defm "_VX" :
+ VPseudoBinary<VR, m.vrclass, GPR, m, "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryM_VF {
+ foreach m = MxList.m in
+ foreach f = FPList.fpinfo in
+ defm "_V" # f.FX :
+ VPseudoBinary<VR, m.vrclass, f.fprclass, m, "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryM_VI {
+ foreach m = MxList.m in
+ defm _VI : VPseudoBinary<VR, m.vrclass, simm5, m, "@earlyclobber $rd">;
+}
+
+multiclass VPseudoBinaryV_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+ defm "" : VPseudoBinaryV_VV<Constraint>;
+ defm "" : VPseudoBinaryV_VX<Constraint>;
+ defm "" : VPseudoBinaryV_VI<ImmType, Constraint>;
+}
+
+multiclass VPseudoBinaryV_VV_VX {
+ defm "" : VPseudoBinaryV_VV;
+ defm "" : VPseudoBinaryV_VX;
+}
+
+multiclass VPseudoBinaryV_VV_VF {
+ defm "" : VPseudoBinaryV_VV;
+ defm "" : VPseudoBinaryV_VF;
+}
+
+multiclass VPseudoBinaryV_VX_VI<Operand ImmType = simm5> {
+ defm "" : VPseudoBinaryV_VX;
+ defm "" : VPseudoBinaryV_VI<ImmType>;
+}
+
+multiclass VPseudoBinaryW_VV_VX {
+ defm "" : VPseudoBinaryW_VV;
+ defm "" : VPseudoBinaryW_VX;
+}
+
+multiclass VPseudoBinaryW_VV_VF {
+ defm "" : VPseudoBinaryW_VV;
+ defm "" : VPseudoBinaryW_VF;
+}
+
+multiclass VPseudoBinaryW_WV_WX {
+ defm "" : VPseudoBinaryW_WV;
+ defm "" : VPseudoBinaryW_WX;
+}
+
+multiclass VPseudoBinaryW_WV_WF {
+ defm "" : VPseudoBinaryW_WV;
+ defm "" : VPseudoBinaryW_WF;
+}
+
+multiclass VPseudoBinaryV_VM_XM_IM {
+ defm "" : VPseudoBinaryV_VM;
+ defm "" : VPseudoBinaryV_XM;
+ defm "" : VPseudoBinaryV_IM;
+}
+
+multiclass VPseudoBinaryV_VM_XM {
+ defm "" : VPseudoBinaryV_VM;
+ defm "" : VPseudoBinaryV_XM;
+}
+
+multiclass VPseudoBinaryM_VM_XM_IM<string Constraint> {
+ defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
+ defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
+ defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
+}
+
+multiclass VPseudoBinaryM_VM_XM<string Constraint> {
+ defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
+ defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
+}
+
+multiclass VPseudoBinaryM_V_X_I<string Constraint> {
+ defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
+ defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
+ defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
+}
+
+multiclass VPseudoBinaryM_V_X<string Constraint> {
+ defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
+ defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
+}
+
+multiclass VPseudoBinaryV_WV_WX_WI {
+ defm "" : VPseudoBinaryV_WV;
+ defm "" : VPseudoBinaryV_WX;
+ defm "" : VPseudoBinaryV_WI;
+}
+
+multiclass VPseudoTernary<VReg RetClass,
+ VReg Op1Class,
+ RegisterClass Op2Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in {
+ def "_" # MInfo.MX : VPseudoTernaryNoMask<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class, Constraint>;
+ }
+}
+
+multiclass VPseudoTernaryV_VV<string Constraint = ""> {
+ foreach m = MxList.m in
+ defm _VV : VPseudoTernary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
+}
+
+multiclass VPseudoTernaryV_VX<string Constraint = ""> {
+ foreach m = MxList.m in
+ defm _VX : VPseudoTernary<m.vrclass, m.vrclass, GPR, m, Constraint>;
+}
+
+multiclass VPseudoTernaryV_VX_AAXA<string Constraint = ""> {
+ foreach m = MxList.m in
+ defm "_VX" : VPseudoTernary<m.vrclass, GPR, m.vrclass, m, Constraint>;
+}
+
+multiclass VPseudoTernaryV_VF_AAXA<string Constraint = ""> {
+ foreach m = MxList.m in
+ foreach f = FPList.fpinfo in
+ defm "_V" # f.FX : VPseudoTernary<m.vrclass, f.fprclass, m.vrclass,
+ m, Constraint>;
+}
+
+multiclass VPseudoTernaryW_VV {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ defm _VV : VPseudoTernary<m.wvrclass, m.vrclass, m.vrclass, m, constraint>;
+}
+
+multiclass VPseudoTernaryW_VX {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ defm "_VX" : VPseudoTernary<m.wvrclass, GPR, m.vrclass, m, constraint>;
+}
+
+multiclass VPseudoTernaryW_VF {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ foreach f = FPList.fpinfo[0-1] in
+ defm "_V" # f.FX : VPseudoTernary<m.wvrclass, f.fprclass, m.vrclass, m,
+ constraint>;
+}
+
+multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
+ foreach m = MxList.m in
+ defm _VI : VPseudoTernary<m.vrclass, m.vrclass, ImmType, m, Constraint>;
+}
+
+multiclass VPseudoTernaryV_VV_VX_AAXA<string Constraint = ""> {
+ defm "" : VPseudoTernaryV_VV<Constraint>;
+ defm "" : VPseudoTernaryV_VX_AAXA<Constraint>;
+}
+
+multiclass VPseudoTernaryV_VV_VF_AAXA<string Constraint = ""> {
+ defm "" : VPseudoTernaryV_VV<Constraint>;
+ defm "" : VPseudoTernaryV_VF_AAXA<Constraint>;
+}
+
+multiclass VPseudoTernaryV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+ defm "" : VPseudoTernaryV_VX<Constraint>;
+ defm "" : VPseudoTernaryV_VI<ImmType, Constraint>;
+}
+
+multiclass VPseudoTernaryW_VV_VX {
+ defm "" : VPseudoTernaryW_VV;
+ defm "" : VPseudoTernaryW_VX;
+}
+
+multiclass VPseudoTernaryW_VV_VF {
+ defm "" : VPseudoTernaryW_VV;
+ defm "" : VPseudoTernaryW_VF;
+}
+
+multiclass VPseudoBinaryM_VV_VX_VI {
+ defm "" : VPseudoBinaryM_VV;
+ defm "" : VPseudoBinaryM_VX;
+ defm "" : VPseudoBinaryM_VI;
+}
+
+multiclass VPseudoBinaryM_VV_VX {
+ defm "" : VPseudoBinaryM_VV;
+ defm "" : VPseudoBinaryM_VX;
+}
+
+multiclass VPseudoBinaryM_VV_VF {
+ defm "" : VPseudoBinaryM_VV;
+ defm "" : VPseudoBinaryM_VF;
+}
+
+multiclass VPseudoBinaryM_VX_VI {
+ defm "" : VPseudoBinaryM_VX;
+ defm "" : VPseudoBinaryM_VI;
+}
+
+multiclass VPseudoReductionV_VS {
+ foreach m = MxList.m in {
+ let WritesElement0 = 1 in
+ defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>;
+ }
+}
+
+multiclass VPseudoConversion<VReg RetClass,
+ VReg Op1Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in {
+ def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
+ Constraint>;
+ }
+}
+
+multiclass VPseudoConversionV_V {
+ foreach m = MxList.m in
+ defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>;
+}
+
+multiclass VPseudoConversionW_V {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>;
+}
+
+multiclass VPseudoConversionV_W {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>;
+}
+
+multiclass VPseudoUSSegLoad<bit isFF> {
+ foreach eew = EEWList in {
+ foreach lmul = MxSet<eew>.m in {
+ defvar LInfo = lmul.MX;
+ let VLMul = lmul.value in {
+ foreach nf = NFSet<lmul>.L in {
+ defvar vreg = SegRegClass<lmul, nf>.RC;
+ defvar FFStr = !if(isFF, "FF", "");
+ def nf # "E" # eew # FFStr # "_V_" # LInfo :
+ VPseudoUSSegLoadNoMask<vreg, eew>;
+ def nf # "E" # eew # FFStr # "_V_" # LInfo # "_MASK" :
+ VPseudoUSSegLoadMask<vreg, eew>;
+ }
+ }
+ }
+ }
+}
+
+multiclass VPseudoSSegLoad {
+ foreach eew = EEWList in {
+ foreach lmul = MxSet<eew>.m in {
+ defvar LInfo = lmul.MX;
+ let VLMul = lmul.value in {
+ foreach nf = NFSet<lmul>.L in {
+ defvar vreg = SegRegClass<lmul, nf>.RC;
+ def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask<vreg, eew>;
+ def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask<vreg, eew>;
+ }
+ }
+ }
+ }
+}
+
+multiclass VPseudoISegLoad {
+ foreach idx_eew = EEWList in { // EEW for index argument.
+ foreach idx_lmul = MxSet<idx_eew>.m in { // LMUL for index argument.
+ foreach val_lmul = MxList.m in { // LMUL for the value.
+ defvar IdxLInfo = idx_lmul.MX;
+ defvar IdxVreg = idx_lmul.vrclass;
+ defvar ValLInfo = val_lmul.MX;
+ let VLMul = val_lmul.value in {
+ foreach nf = NFSet<val_lmul>.L in {
+ defvar ValVreg = SegRegClass<val_lmul, nf>.RC;
+ def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo :
+ VPseudoISegLoadNoMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value>;
+ def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" :
+ VPseudoISegLoadMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value>;
+ }
+ }
+ }
+ }
+ }
+}
+
+multiclass VPseudoUSSegStore {
+ foreach eew = EEWList in {
+ foreach lmul = MxSet<eew>.m in {
+ defvar LInfo = lmul.MX;
+ let VLMul = lmul.value in {
+ foreach nf = NFSet<lmul>.L in {
+ defvar vreg = SegRegClass<lmul, nf>.RC;
+ def nf # "E" # eew # "_V_" # LInfo : VPseudoUSSegStoreNoMask<vreg, eew>;
+ def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSSegStoreMask<vreg, eew>;
+ }
+ }
+ }
+ }
+}
+
+multiclass VPseudoSSegStore {
+ foreach eew = EEWList in {
+ foreach lmul = MxSet<eew>.m in {
+ defvar LInfo = lmul.MX;
+ let VLMul = lmul.value in {
+ foreach nf = NFSet<lmul>.L in {
+ defvar vreg = SegRegClass<lmul, nf>.RC;
+ def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegStoreNoMask<vreg, eew>;
+ def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegStoreMask<vreg, eew>;
+ }
+ }
+ }
+ }
+}
+
+multiclass VPseudoISegStore {
+ foreach idx_eew = EEWList in { // EEW for index argument.
+ foreach idx_lmul = MxSet<idx_eew>.m in { // LMUL for index argument.
+ foreach val_lmul = MxList.m in { // LMUL for the value.
+ defvar IdxLInfo = idx_lmul.MX;
+ defvar IdxVreg = idx_lmul.vrclass;
+ defvar ValLInfo = val_lmul.MX;
+ let VLMul = val_lmul.value in {
+ foreach nf = NFSet<val_lmul>.L in {
+ defvar ValVreg = SegRegClass<val_lmul, nf>.RC;
+ def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo :
+ VPseudoISegStoreNoMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value>;
+ def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" :
+ VPseudoISegStoreMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value>;
+ }
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers to define the intrinsic patterns.
+//===----------------------------------------------------------------------===//
+
+class VPatUnaryNoMask<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg op2_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (op2_type op2_reg_class:$rs2),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (op2_type op2_reg_class:$rs2),
+ GPR:$vl, sew)>;
+
+class VPatUnaryMask<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ (mask_type V0),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ (mask_type V0), GPR:$vl, sew)>;
+
+class VPatMaskUnaryNoMask<string intrinsic_name,
+ string inst,
+ MTypeInfo mti> :
+ Pat<(mti.Mask (!cast<Intrinsic>(intrinsic_name)
+ (mti.Mask VR:$rs2),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_M_"#mti.BX)
+ (mti.Mask VR:$rs2),
+ GPR:$vl, mti.SEW)>;
+
+class VPatMaskUnaryMask<string intrinsic_name,
+ string inst,
+ MTypeInfo mti> :
+ Pat<(mti.Mask (!cast<Intrinsic>(intrinsic_name#"_mask")
+ (mti.Mask VR:$merge),
+ (mti.Mask VR:$rs2),
+ (mti.Mask V0),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK")
+ (mti.Mask VR:$merge),
+ (mti.Mask VR:$rs2),
+ (mti.Mask V0), GPR:$vl, mti.SEW)>;
+
+class VPatUnaryAnyMask<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op1_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (mask_type VR:$rs2),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (mask_type VR:$rs2),
+ GPR:$vl, sew)>;
+
+class VPatBinaryNoMask<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst)
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ GPR:$vl, sew)>;
+
+class VPatBinaryMask<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_MASK")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0), GPR:$vl, sew)>;
+
+class VPatTernaryNoMask<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ GPR:$vl, sew)>;
+
+class VPatTernaryMask<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK")
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ (mask_type V0),
+ GPR:$vl, sew)>;
+
+class VPatAMOWDNoMask<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ int sew,
+ LMULInfo vlmul,
+ LMULInfo emul,
+ VReg op1_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ GPR:$rs1,
+ (op1_type op1_reg_class:$vs2),
+ (result_type vlmul.vrclass:$vd),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX)
+ $rs1, $vs2, $vd,
+ GPR:$vl, sew)>;
+
+class VPatAMOWDMask<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ LMULInfo emul,
+ VReg op1_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name # "_mask")
+ GPR:$rs1,
+ (op1_type op1_reg_class:$vs2),
+ (result_type vlmul.vrclass:$vd),
+ (mask_type V0),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK")
+ $rs1, $vs2, $vd,
+ (mask_type V0), GPR:$vl, sew)>;
+
+multiclass VPatUSLoad<string intrinsic,
+ string inst,
+ LLVMType type,
+ LLVMType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg reg_class>
+{
+ defvar Intr = !cast<Intrinsic>(intrinsic);
+ defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
+ def : Pat<(type (Intr GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+ (Pseudo $rs1, GPR:$vl, sew)>;
+ defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
+ defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
+ def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
+ GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
+ (PseudoMask $merge,
+ $rs1, (mask_type V0), GPR:$vl, sew)>;
+}
+
+multiclass VPatUSLoadFF<string inst,
+ LLVMType type,
+ LLVMType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg reg_class>
+{
+ defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
+ def : Pat<(type (riscv_vleff GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+ (Pseudo $rs1, GPR:$vl, sew)>;
+ defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
+ def : Pat<(type (riscv_vleff_mask (type GetVRegNoV0<reg_class>.R:$merge),
+ GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
+ (PseudoMask $merge,
+ $rs1, (mask_type V0), GPR:$vl, sew)>;
+}
+
+multiclass VPatSLoad<string intrinsic,
+ string inst,
+ LLVMType type,
+ LLVMType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg reg_class>
+{
+ defvar Intr = !cast<Intrinsic>(intrinsic);
+ defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
+ def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
+ (Pseudo $rs1, $rs2, GPR:$vl, sew)>;
+ defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
+ defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
+ def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
+ GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
+ (PseudoMask $merge,
+ $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
+}
+
+multiclass VPatILoad<string intrinsic,
+ string inst,
+ LLVMType type,
+ LLVMType idx_type,
+ LLVMType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ LMULInfo idx_vlmul,
+ VReg reg_class,
+ VReg idx_reg_class>
+{
+ defvar Intr = !cast<Intrinsic>(intrinsic);
+ defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX);
+ def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl)))),
+ (Pseudo $rs1, $rs2, GPR:$vl, sew)>;
+
+ defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
+ defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK");
+ def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
+ GPR:$rs1, (idx_type idx_reg_class:$rs2),
+ (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
+ (PseudoMask $merge,
+ $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
+}
+
+multiclass VPatUSStore<string intrinsic,
+ string inst,
+ LLVMType type,
+ LLVMType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg reg_class>
+{
+ defvar Intr = !cast<Intrinsic>(intrinsic);
+ defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
+ def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))),
+ (Pseudo $rs3, $rs1, GPR:$vl, sew)>;
+ defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
+ defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
+ def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+ (PseudoMask $rs3, $rs1, (mask_type V0), GPR:$vl, sew)>;
+}
+
+multiclass VPatSStore<string intrinsic,
+ string inst,
+ LLVMType type,
+ LLVMType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg reg_class>
+{
+ defvar Intr = !cast<Intrinsic>(intrinsic);
+ defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
+ def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl))),
+ (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>;
+ defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
+ defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
+ def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+ (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
+}
+
+multiclass VPatIStore<string intrinsic,
+ string inst,
+ LLVMType type,
+ LLVMType idx_type,
+ LLVMType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ LMULInfo idx_vlmul,
+ VReg reg_class,
+ VReg idx_reg_class>
+{
+ defvar Intr = !cast<Intrinsic>(intrinsic);
+ defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX);
+ def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1,
+ (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl))),
+ (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>;
+ defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
+ defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK");
+ def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1,
+ (idx_type idx_reg_class:$rs2), (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+ (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
+}
+
+multiclass VPatUnaryS_M<string intrinsic_name,
+ string inst>
+{
+ foreach mti = AllMasks in {
+ def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name)
+ (mti.Mask VR:$rs1), (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_M_"#mti.BX) $rs1,
+ GPR:$vl, mti.SEW)>;
+ def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name # "_mask")
+ (mti.Mask VR:$rs1), (mti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK") $rs1,
+ (mti.Mask V0), GPR:$vl, mti.SEW)>;
+ }
+}
+
+multiclass VPatUnaryV_V_AnyMask<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in {
+ def : VPatUnaryAnyMask<intrinsic, instruction, "VM",
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass,
+ vti.RegClass>;
+ }
+}
+
+multiclass VPatUnaryM_M<string intrinsic,
+ string inst>
+{
+ foreach mti = AllMasks in {
+ def : VPatMaskUnaryNoMask<intrinsic, inst, mti>;
+ def : VPatMaskUnaryMask<intrinsic, inst, mti>;
+ }
+}
+
+multiclass VPatUnaryV_M<string intrinsic, string instruction>
+{
+ foreach vti = AllIntegerVectors in {
+ def : VPatUnaryNoMask<intrinsic, instruction, "M", vti.Vector, vti.Mask,
+ vti.SEW, vti.LMul, VR>;
+ def : VPatUnaryMask<intrinsic, instruction, "M", vti.Vector, vti.Mask,
+ vti.Mask, vti.SEW, vti.LMul, vti.RegClass, VR>;
+ }
+}
+
+multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
+ list<VTypeInfoToFraction> fractionList>
+{
+ foreach vtiTofti = fractionList in
+ {
+ defvar vti = vtiTofti.Vti;
+ defvar fti = vtiTofti.Fti;
+ def : VPatUnaryNoMask<intrinsic, instruction, suffix,
+ vti.Vector, fti.Vector,
+ vti.SEW, vti.LMul, fti.RegClass>;
+ def : VPatUnaryMask<intrinsic, instruction, suffix,
+ vti.Vector, fti.Vector, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass, fti.RegClass>;
+ }
+}
+
+multiclass VPatUnaryV_V<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in {
+ def : VPatUnaryNoMask<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector,
+ vti.SEW, vti.LMul, vti.RegClass>;
+ def : VPatUnaryMask<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass, vti.RegClass>;
+ }
+}
+
+multiclass VPatNullaryV<string intrinsic, string instruction>
+{
+ foreach vti = AllIntegerVectors in {
+ def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic)
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX)
+ GPR:$vl, vti.SEW)>;
+ def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic # "_mask")
+ (vti.Vector vti.RegClass:$merge),
+ (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_MASK")
+ vti.RegClass:$merge, (vti.Mask V0),
+ GPR:$vl, vti.SEW)>;
+ }
+}
+
+multiclass VPatNullaryM<string intrinsic, string inst> {
+ foreach mti = AllMasks in
+ def : Pat<(mti.Mask (!cast<Intrinsic>(intrinsic)
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_M_"#mti.BX)
+ GPR:$vl, mti.SEW)>;
+}
+
+multiclass VPatBinary<string intrinsic,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind>
+{
+ def : VPatBinaryNoMask<intrinsic, inst, result_type, op1_type, op2_type,
+ sew, op1_reg_class, op2_kind>;
+ def : VPatBinaryMask<intrinsic, inst, result_type, op1_type, op2_type,
+ mask_type, sew, result_reg_class, op1_reg_class,
+ op2_kind>;
+}
+
+multiclass VPatBinaryCarryIn<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg op1_reg_class,
+ DAGOperand op2_kind>
+{
+ def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0), GPR:$vl, sew)>;
+}
+
+multiclass VPatBinaryMaskOut<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg op1_reg_class,
+ DAGOperand op2_kind>
+{
+ def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ GPR:$vl, sew)>;
+}
+
+multiclass VPatConversion<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op1_reg_class>
+{
+ def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type,
+ sew, vlmul, op1_reg_class>;
+ def : VPatUnaryMask<intrinsic, inst, kind, result_type, op1_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
+}
+
+multiclass VPatBinaryV_VV<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, vti.Vector,vti.Mask,
+ vti.SEW, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryV_VV_INT<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in {
+ defvar ivti = GetIntVTypeInfo<vti>.Vti;
+ defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
+ vti.SEW, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+ }
+}
+
+multiclass VPatBinaryV_VV_INT_EEW<string intrinsic, string instruction,
+ int eew, list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in {
+ // emul = lmul * eew / sew
+ defvar vlmul = vti.LMul;
+ defvar octuple_lmul = octuple_from_str<vlmul.MX>.ret;
+ defvar octuple_emul = !srl(!mul(octuple_lmul, eew), shift_amount<vti.SEW>.val);
+ if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
+ defvar emul_str = octuple_to_str<octuple_emul>.ret;
+ defvar ivti = !cast<VTypeInfo>("VI" # eew # emul_str);
+ defvar inst = instruction # "_VV_" # vti.LMul.MX # "_" # emul_str;
+ defm : VPatBinary<intrinsic, inst,
+ vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
+ vti.SEW, vti.RegClass,
+ vti.RegClass, ivti.RegClass>;
+ }
+ }
+}
+
+multiclass VPatBinaryV_VX<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in {
+ defvar kind = "V"#vti.ScalarSuffix;
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
+ vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
+ vti.SEW, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryV_VX_INT<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ defm : VPatBinary<intrinsic, instruction # "_VX_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.SEW, vti.RegClass,
+ vti.RegClass, GPR>;
+}
+
+multiclass VPatBinaryV_VI<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand imm_type> {
+ foreach vti = vtilist in
+ defm : VPatBinary<intrinsic, instruction # "_VI_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.SEW, vti.RegClass,
+ vti.RegClass, imm_type>;
+}
+
+multiclass VPatBinaryM_MM<string intrinsic, string instruction> {
+ foreach mti = AllMasks in
+ def : VPatBinaryNoMask<intrinsic, instruction # "_MM_" # mti.LMul.MX,
+ mti.Mask, mti.Mask, mti.Mask,
+ mti.SEW, VR, VR>;
+}
+
+multiclass VPatBinaryW_VV<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defm : VPatBinary<intrinsic, instruction # "_VV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask,
+ Vti.SEW, Wti.RegClass,
+ Vti.RegClass, Vti.RegClass>;
+ }
+}
+
+multiclass VPatBinaryW_VX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defvar kind = "V"#Vti.ScalarSuffix;
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.SEW, Wti.RegClass,
+ Vti.RegClass, Vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryW_WV<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.SEW, Wti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
+ }
+}
+
+multiclass VPatBinaryW_WX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defvar kind = "W"#Vti.ScalarSuffix;
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.SEW, Wti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryV_WV<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.SEW, Vti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
+ }
+}
+
+multiclass VPatBinaryV_WX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defvar kind = "W"#Vti.ScalarSuffix;
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.SEW, Vti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryV_WI<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defm : VPatBinary<intrinsic, instruction # "_WI_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, XLenVT, Vti.Mask,
+ Vti.SEW, Vti.RegClass,
+ Wti.RegClass, uimm5>;
+ }
+}
+
+multiclass VPatBinaryV_VM<string intrinsic, string instruction,
+ bit CarryOut = 0,
+ list<VTypeInfo> vtilist = AllIntegerVectors> {
+ foreach vti = vtilist in
+ defm : VPatBinaryCarryIn<intrinsic, instruction, "VVM",
+ !if(CarryOut, vti.Mask, vti.Vector),
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.SEW, vti.LMul,
+ vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryV_XM<string intrinsic, string instruction,
+ bit CarryOut = 0,
+ list<VTypeInfo> vtilist = AllIntegerVectors> {
+ foreach vti = vtilist in
+ defm : VPatBinaryCarryIn<intrinsic, instruction,
+ "V"#vti.ScalarSuffix#"M",
+ !if(CarryOut, vti.Mask, vti.Vector),
+ vti.Vector, vti.Scalar, vti.Mask,
+ vti.SEW, vti.LMul,
+ vti.RegClass, vti.ScalarRegClass>;
+}
+
+multiclass VPatBinaryV_IM<string intrinsic, string instruction,
+ bit CarryOut = 0> {
+ foreach vti = AllIntegerVectors in
+ defm : VPatBinaryCarryIn<intrinsic, instruction, "VIM",
+ !if(CarryOut, vti.Mask, vti.Vector),
+ vti.Vector, XLenVT, vti.Mask,
+ vti.SEW, vti.LMul,
+ vti.RegClass, simm5>;
+}
+
+multiclass VPatBinaryV_V<string intrinsic, string instruction> {
+ foreach vti = AllIntegerVectors in
+ defm : VPatBinaryMaskOut<intrinsic, instruction, "VV",
+ vti.Mask, vti.Vector, vti.Vector,
+ vti.SEW, vti.LMul,
+ vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryV_X<string intrinsic, string instruction> {
+ foreach vti = AllIntegerVectors in
+ defm : VPatBinaryMaskOut<intrinsic, instruction, "VX",
+ vti.Mask, vti.Vector, XLenVT,
+ vti.SEW, vti.LMul,
+ vti.RegClass, GPR>;
+}
+
+multiclass VPatBinaryV_I<string intrinsic, string instruction> {
+ foreach vti = AllIntegerVectors in
+ defm : VPatBinaryMaskOut<intrinsic, instruction, "VI",
+ vti.Mask, vti.Vector, XLenVT,
+ vti.SEW, vti.LMul,
+ vti.RegClass, simm5>;
+}
+
+multiclass VPatBinaryM_VV<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Mask, vti.Vector, vti.Vector, vti.Mask,
+ vti.SEW, VR,
+ vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryM_VX<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in {
+ defvar kind = "V"#vti.ScalarSuffix;
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
+ vti.Mask, vti.Vector, vti.Scalar, vti.Mask,
+ vti.SEW, VR,
+ vti.RegClass, vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryM_VI<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ defm : VPatBinary<intrinsic, instruction # "_VI_" # vti.LMul.MX,
+ vti.Mask, vti.Vector, XLenVT, vti.Mask,
+ vti.SEW, VR,
+ vti.RegClass, simm5>;
+}
+
+multiclass VPatBinaryV_VV_VX_VI<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand ImmType = simm5>
+{
+ defm "" : VPatBinaryV_VV<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryV_VX<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryV_VI<intrinsic, instruction, vtilist, ImmType>;
+}
+
+multiclass VPatBinaryV_VV_VX<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist>
+{
+ defm "" : VPatBinaryV_VV<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryV_VX<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatBinaryV_VX_VI<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist>
+{
+ defm "" : VPatBinaryV_VX<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryV_VI<intrinsic, instruction, vtilist, simm5>;
+}
+
+multiclass VPatBinaryW_VV_VX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+{
+ defm "" : VPatBinaryW_VV<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryW_VX<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatBinaryW_WV_WX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+{
+ defm "" : VPatBinaryW_WV<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryW_WX<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatBinaryV_WV_WX_WI<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+{
+ defm "" : VPatBinaryV_WV<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryV_WX<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryV_WI<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatBinaryV_VM_XM_IM<string intrinsic, string instruction>
+{
+ defm "" : VPatBinaryV_VM<intrinsic, instruction>;
+ defm "" : VPatBinaryV_XM<intrinsic, instruction>;
+ defm "" : VPatBinaryV_IM<intrinsic, instruction>;
+}
+
+multiclass VPatBinaryM_VM_XM_IM<string intrinsic, string instruction>
+{
+ defm "" : VPatBinaryV_VM<intrinsic, instruction, /*CarryOut=*/1>;
+ defm "" : VPatBinaryV_XM<intrinsic, instruction, /*CarryOut=*/1>;
+ defm "" : VPatBinaryV_IM<intrinsic, instruction, /*CarryOut=*/1>;
+}
+
+multiclass VPatBinaryM_V_X_I<string intrinsic, string instruction>
+{
+ defm "" : VPatBinaryV_V<intrinsic, instruction>;
+ defm "" : VPatBinaryV_X<intrinsic, instruction>;
+ defm "" : VPatBinaryV_I<intrinsic, instruction>;
+}
+
+multiclass VPatBinaryV_VM_XM<string intrinsic, string instruction>
+{
+ defm "" : VPatBinaryV_VM<intrinsic, instruction>;
+ defm "" : VPatBinaryV_XM<intrinsic, instruction>;
+}
+
+multiclass VPatBinaryM_VM_XM<string intrinsic, string instruction>
+{
+ defm "" : VPatBinaryV_VM<intrinsic, instruction, /*CarryOut=*/1>;
+ defm "" : VPatBinaryV_XM<intrinsic, instruction, /*CarryOut=*/1>;
+}
+
+multiclass VPatBinaryM_V_X<string intrinsic, string instruction>
+{
+ defm "" : VPatBinaryV_V<intrinsic, instruction>;
+ defm "" : VPatBinaryV_X<intrinsic, instruction>;
+}
+
+multiclass VPatTernary<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> {
+ def : VPatTernaryNoMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class,
+ op2_kind>;
+ def : VPatTernaryMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class,
+ op2_kind>;
+}
+
+multiclass VPatTernaryV_VV<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ defm : VPatTernary<intrinsic, instruction, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatTernaryV_VX<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ defm : VPatTernary<intrinsic, instruction, "VX",
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, GPR>;
+}
+
+multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ defm : VPatTernary<intrinsic, instruction,
+ "V"#vti.ScalarSuffix,
+ vti.Vector, vti.Scalar, vti.Vector, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass,
+ vti.ScalarRegClass, vti.RegClass>;
+}
+
+multiclass VPatTernaryV_VI<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand Imm_type> {
+ foreach vti = vtilist in
+ defm : VPatTernary<intrinsic, instruction, "VI",
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, Imm_type>;
+}
+
+multiclass VPatTernaryW_VV<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach vtiToWti = vtilist in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ defm : VPatTernary<intrinsic, instruction, "VV",
+ wti.Vector, vti.Vector, vti.Vector,
+ vti.Mask, vti.SEW, vti.LMul,
+ wti.RegClass, vti.RegClass, vti.RegClass>;
+ }
+}
+
+multiclass VPatTernaryW_VX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach vtiToWti = vtilist in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ defm : VPatTernary<intrinsic, instruction,
+ "V"#vti.ScalarSuffix,
+ wti.Vector, vti.Scalar, vti.Vector,
+ vti.Mask, vti.SEW, vti.LMul,
+ wti.RegClass, vti.ScalarRegClass, vti.RegClass>;
+ }
+}
+
+multiclass VPatTernaryV_VV_VX_AAXA<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ defm "" : VPatTernaryV_VV<intrinsic, instruction, vtilist>;
+ defm "" : VPatTernaryV_VX_AAXA<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatTernaryV_VX_VI<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand Imm_type = simm5> {
+ defm "" : VPatTernaryV_VX<intrinsic, instruction, vtilist>;
+ defm "" : VPatTernaryV_VI<intrinsic, instruction, vtilist, Imm_type>;
+}
+
+multiclass VPatBinaryM_VV_VX_VI<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist>
+{
+ defm "" : VPatBinaryM_VV<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryM_VX<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryM_VI<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatTernaryW_VV_VX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ defm "" : VPatTernaryW_VV<intrinsic, instruction, vtilist>;
+ defm "" : VPatTernaryW_VX<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatBinaryM_VV_VX<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist>
+{
+ defm "" : VPatBinaryM_VV<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryM_VX<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatBinaryM_VX_VI<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist>
+{
+ defm "" : VPatBinaryM_VX<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryM_VI<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatBinaryV_VV_VX_VI_INT<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand ImmType = simm5>
+{
+ defm "" : VPatBinaryV_VV_INT<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryV_VX_INT<intrinsic, instruction, vtilist>;
+ defm "" : VPatBinaryV_VI<intrinsic, instruction, vtilist, ImmType>;
+}
+
+multiclass VPatReductionV_VS<string intrinsic, string instruction, bit IsFloat = 0> {
+ foreach vti = !if(IsFloat, NoGroupFloatVectors, NoGroupIntegerVectors) in
+ {
+ defvar vectorM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # vti.SEW # "M1");
+ defm : VPatTernary<intrinsic, instruction, "VS",
+ vectorM1.Vector, vti.Vector,
+ vectorM1.Vector, vti.Mask,
+ vti.SEW, vti.LMul,
+ VR, vti.RegClass, VR>;
+ }
+ foreach gvti = !if(IsFloat, GroupFloatVectors, GroupIntegerVectors) in
+ {
+ defm : VPatTernary<intrinsic, instruction, "VS",
+ gvti.VectorM1, gvti.Vector,
+ gvti.VectorM1, gvti.Mask,
+ gvti.SEW, gvti.LMul,
+ VR, gvti.RegClass, VR>;
+ }
+}
+
+multiclass VPatReductionW_VS<string intrinsic, string instruction, bit IsFloat = 0> {
+ foreach vti = !if(IsFloat, AllFloatVectors, AllIntegerVectors) in
+ {
+ defvar wtiSEW = !mul(vti.SEW, 2);
+ if !le(wtiSEW, 64) then {
+ defvar wtiM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # wtiSEW # "M1");
+ defm : VPatTernary<intrinsic, instruction, "VS",
+ wtiM1.Vector, vti.Vector,
+ wtiM1.Vector, vti.Mask,
+ vti.SEW, vti.LMul,
+ wtiM1.RegClass, vti.RegClass,
+ wtiM1.RegClass>;
+ }
+ }
+}
+
+multiclass VPatConversionVI_VF<string intrinsic,
+ string instruction>
+{
+ foreach fvti = AllFloatVectors in
+ {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+
+ defm : VPatConversion<intrinsic, instruction, "V",
+ ivti.Vector, fvti.Vector, ivti.Mask, fvti.SEW,
+ fvti.LMul, ivti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_VI<string intrinsic,
+ string instruction>
+{
+ foreach fvti = AllFloatVectors in
+ {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fvti.Vector, ivti.Vector, fvti.Mask, ivti.SEW,
+ ivti.LMul, fvti.RegClass, ivti.RegClass>;
+ }
+}
+
+multiclass VPatConversionWI_VF<string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in
+ {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+
+ defm : VPatConversion<intrinsic, instruction, "V",
+ iwti.Vector, fvti.Vector, iwti.Mask, fvti.SEW,
+ fvti.LMul, iwti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionWF_VI<string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToFloatVectors in
+ {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, vti.Vector, fwti.Mask, vti.SEW,
+ vti.LMul, fwti.RegClass, vti.RegClass>;
+ }
+}
+
+multiclass VPatConversionWF_VF <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in
+ {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVI_WF <string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToFloatVectors in
+ {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+
+ defm : VPatConversion<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_WI <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in
+ {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+
+ defm : VPatConversion<intrinsic, instruction, "W",
+ fvti.Vector, iwti.Vector, fvti.Mask, fvti.SEW,
+ fvti.LMul, fvti.RegClass, iwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in
+ {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+
+ defm : VPatConversion<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatAMOWD<string intrinsic,
+ string inst,
+ ValueType result_type,
+ ValueType offset_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ LMULInfo emul,
+ VReg op1_reg_class>
+{
+ def : VPatAMOWDNoMask<intrinsic, inst, result_type, offset_type,
+ sew, vlmul, emul, op1_reg_class>;
+ def : VPatAMOWDMask<intrinsic, inst, result_type, offset_type,
+ mask_type, sew, vlmul, emul, op1_reg_class>;
+}
+
+multiclass VPatAMOV_WD<string intrinsic,
+ string inst,
+ list<VTypeInfo> vtilist> {
+ foreach eew = EEWList in {
+ foreach vti = vtilist in {
+ if !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64)) then {
+ defvar octuple_lmul = octuple_from_str<vti.LMul.MX>.ret;
+ // Calculate emul = eew * lmul / sew
+ defvar octuple_emul = !srl(!mul(eew, octuple_lmul), shift_amount<vti.SEW>.val);
+ if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
+ defvar emulMX = octuple_to_str<octuple_emul>.ret;
+ defvar offsetVti = !cast<VTypeInfo>("VI" # eew # emulMX);
+ defvar inst_ei = inst # "EI" # eew;
+ defm : VPatAMOWD<intrinsic, inst_ei,
+ vti.Vector, offsetVti.Vector,
+ vti.Mask, vti.SEW, vti.LMul, offsetVti.LMul, offsetVti.RegClass>;
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV] in {
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions for CodeGen
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+ def PseudoVMV1R_V : VPseudo<VMV1R_V, V_M1, (outs VR:$vd), (ins VR:$vs2)>;
+ def PseudoVMV2R_V : VPseudo<VMV2R_V, V_M2, (outs VRM2:$vd), (ins VRM2:$vs2)>;
+ def PseudoVMV4R_V : VPseudo<VMV4R_V, V_M4, (outs VRM4:$vd), (ins VRM4:$vs2)>;
+ def PseudoVMV8R_V : VPseudo<VMV8R_V, V_M8, (outs VRM8:$vd), (ins VRM8:$vs2)>;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
+ def PseudoReadVLENB : Pseudo<(outs GPR:$rd), (ins),
+ [(set GPR:$rd, (riscv_read_vlenb))]>;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1,
+ Uses = [VL] in
+def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins),
+ [(set GPR:$rd, (riscv_read_vl))]>;
+
+//===----------------------------------------------------------------------===//
+// 6. Configuration-Setting Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudos.
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
+def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), []>;
+def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// 7. Vector Loads and Stores
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 7.4 Vector Unit-Stride Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudos Unit-Stride Loads and Stores
+foreach eew = EEWList in {
+ defm PseudoVLE # eew : VPseudoUSLoad;
+ defm PseudoVSE # eew : VPseudoUSStore;
+}
+
+defm PseudoVLE1 : VPseudoLoadMask;
+defm PseudoVSE1 : VPseudoStoreMask;
+
+//===----------------------------------------------------------------------===//
+// 7.5 Vector Strided Instructions
+//===----------------------------------------------------------------------===//
+
+// Vector Strided Loads and Stores
+foreach eew = EEWList in {
+ defm PseudoVLSE # eew : VPseudoSLoad;
+ defm PseudoVSSE # eew : VPseudoSStore;
+}
+
+//===----------------------------------------------------------------------===//
+// 7.6 Vector Indexed Instructions
+//===----------------------------------------------------------------------===//
+
+// Vector Indexed Loads and Stores
+foreach eew = EEWList in {
+ defm PseudoVLUXEI # eew : VPseudoILoad;
+ defm PseudoVLOXEI # eew : VPseudoILoad;
+ defm PseudoVSOXEI # eew : VPseudoIStore;
+ defm PseudoVSUXEI # eew : VPseudoIStore;
+}
+
+//===----------------------------------------------------------------------===//
+// 7.7. Unit-stride Fault-Only-First Loads
+//===----------------------------------------------------------------------===//
+
+// vleff may update VL register
+let hasSideEffects = 1, Defs = [VL] in
+foreach eew = EEWList in {
+ defm PseudoVLE # eew # FF : VPseudoUSLoad;
+}
+
+//===----------------------------------------------------------------------===//
+// 7.8. Vector Load/Store Segment Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVLSEG : VPseudoUSSegLoad</*fault-only-first*/false>;
+defm PseudoVLSSEG : VPseudoSSegLoad;
+defm PseudoVLOXSEG : VPseudoISegLoad;
+defm PseudoVLUXSEG : VPseudoISegLoad;
+defm PseudoVSSEG : VPseudoUSSegStore;
+defm PseudoVSSSEG : VPseudoSSegStore;
+defm PseudoVSOXSEG : VPseudoISegStore;
+defm PseudoVSUXSEG : VPseudoISegStore;
+
+// vlseg<nf>e<eew>ff.v may update VL register
+let hasSideEffects = 1, Defs = [VL] in
+defm PseudoVLSEG : VPseudoUSSegLoad</*fault-only-first*/true>;
+
+//===----------------------------------------------------------------------===//
+// 8. Vector AMO Operations
+//===----------------------------------------------------------------------===//
+defm PseudoVAMOSWAP : VPseudoAMO;
+defm PseudoVAMOADD : VPseudoAMO;
+defm PseudoVAMOXOR : VPseudoAMO;
+defm PseudoVAMOAND : VPseudoAMO;
+defm PseudoVAMOOR : VPseudoAMO;
+defm PseudoVAMOMIN : VPseudoAMO;
+defm PseudoVAMOMAX : VPseudoAMO;
+defm PseudoVAMOMINU : VPseudoAMO;
+defm PseudoVAMOMAXU : VPseudoAMO;
+
+//===----------------------------------------------------------------------===//
+// 12. Vector Integer Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 12.1. Vector Single-Width Integer Add and Subtract
+//===----------------------------------------------------------------------===//
+defm PseudoVADD : VPseudoBinaryV_VV_VX_VI;
+defm PseudoVSUB : VPseudoBinaryV_VV_VX;
+defm PseudoVRSUB : VPseudoBinaryV_VX_VI;
+
+//===----------------------------------------------------------------------===//
+// 12.2. Vector Widening Integer Add/Subtract
+//===----------------------------------------------------------------------===//
+defm PseudoVWADDU : VPseudoBinaryW_VV_VX;
+defm PseudoVWSUBU : VPseudoBinaryW_VV_VX;
+defm PseudoVWADD : VPseudoBinaryW_VV_VX;
+defm PseudoVWSUB : VPseudoBinaryW_VV_VX;
+defm PseudoVWADDU : VPseudoBinaryW_WV_WX;
+defm PseudoVWSUBU : VPseudoBinaryW_WV_WX;
+defm PseudoVWADD : VPseudoBinaryW_WV_WX;
+defm PseudoVWSUB : VPseudoBinaryW_WV_WX;
+
+//===----------------------------------------------------------------------===//
+// 12.3. Vector Integer Extension
+//===----------------------------------------------------------------------===//
+defm PseudoVZEXT_VF2 : PseudoUnaryV_VF2;
+defm PseudoVZEXT_VF4 : PseudoUnaryV_VF4;
+defm PseudoVZEXT_VF8 : PseudoUnaryV_VF8;
+defm PseudoVSEXT_VF2 : PseudoUnaryV_VF2;
+defm PseudoVSEXT_VF4 : PseudoUnaryV_VF4;
+defm PseudoVSEXT_VF8 : PseudoUnaryV_VF8;
+
+//===----------------------------------------------------------------------===//
+// 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVADC : VPseudoBinaryV_VM_XM_IM;
+defm PseudoVMADC : VPseudoBinaryM_VM_XM_IM<"@earlyclobber $rd">;
+defm PseudoVMADC : VPseudoBinaryM_V_X_I<"@earlyclobber $rd">;
+
+defm PseudoVSBC : VPseudoBinaryV_VM_XM;
+defm PseudoVMSBC : VPseudoBinaryM_VM_XM<"@earlyclobber $rd">;
+defm PseudoVMSBC : VPseudoBinaryM_V_X<"@earlyclobber $rd">;
+
+//===----------------------------------------------------------------------===//
+// 12.5. Vector Bitwise Logical Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVAND : VPseudoBinaryV_VV_VX_VI;
+defm PseudoVOR : VPseudoBinaryV_VV_VX_VI;
+defm PseudoVXOR : VPseudoBinaryV_VV_VX_VI;
+
+//===----------------------------------------------------------------------===//
+// 12.6. Vector Single-Width Bit Shift Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVSLL : VPseudoBinaryV_VV_VX_VI<uimm5>;
+defm PseudoVSRL : VPseudoBinaryV_VV_VX_VI<uimm5>;
+defm PseudoVSRA : VPseudoBinaryV_VV_VX_VI<uimm5>;
+
+//===----------------------------------------------------------------------===//
+// 12.7. Vector Narrowing Integer Right Shift Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVNSRL : VPseudoBinaryV_WV_WX_WI;
+defm PseudoVNSRA : VPseudoBinaryV_WV_WX_WI;
+
+//===----------------------------------------------------------------------===//
+// 12.8. Vector Integer Comparison Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVMSEQ : VPseudoBinaryM_VV_VX_VI;
+defm PseudoVMSNE : VPseudoBinaryM_VV_VX_VI;
+defm PseudoVMSLTU : VPseudoBinaryM_VV_VX;
+defm PseudoVMSLT : VPseudoBinaryM_VV_VX;
+defm PseudoVMSLEU : VPseudoBinaryM_VV_VX_VI;
+defm PseudoVMSLE : VPseudoBinaryM_VV_VX_VI;
+defm PseudoVMSGTU : VPseudoBinaryM_VX_VI;
+defm PseudoVMSGT : VPseudoBinaryM_VX_VI;
+
+//===----------------------------------------------------------------------===//
+// 12.9. Vector Integer Min/Max Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVMINU : VPseudoBinaryV_VV_VX;
+defm PseudoVMIN : VPseudoBinaryV_VV_VX;
+defm PseudoVMAXU : VPseudoBinaryV_VV_VX;
+defm PseudoVMAX : VPseudoBinaryV_VV_VX;
+
+//===----------------------------------------------------------------------===//
+// 12.10. Vector Single-Width Integer Multiply Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVMUL : VPseudoBinaryV_VV_VX;
+defm PseudoVMULH : VPseudoBinaryV_VV_VX;
+defm PseudoVMULHU : VPseudoBinaryV_VV_VX;
+defm PseudoVMULHSU : VPseudoBinaryV_VV_VX;
+
+//===----------------------------------------------------------------------===//
+// 12.11. Vector Integer Divide Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVDIVU : VPseudoBinaryV_VV_VX;
+defm PseudoVDIV : VPseudoBinaryV_VV_VX;
+defm PseudoVREMU : VPseudoBinaryV_VV_VX;
+defm PseudoVREM : VPseudoBinaryV_VV_VX;
+
+//===----------------------------------------------------------------------===//
+// 12.12. Vector Widening Integer Multiply Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVWMUL : VPseudoBinaryW_VV_VX;
+defm PseudoVWMULU : VPseudoBinaryW_VV_VX;
+defm PseudoVWMULSU : VPseudoBinaryW_VV_VX;
+
+//===----------------------------------------------------------------------===//
+// 12.13. Vector Single-Width Integer Multiply-Add Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVMACC : VPseudoTernaryV_VV_VX_AAXA;
+defm PseudoVNMSAC : VPseudoTernaryV_VV_VX_AAXA;
+defm PseudoVMADD : VPseudoTernaryV_VV_VX_AAXA;
+defm PseudoVNMSUB : VPseudoTernaryV_VV_VX_AAXA;
+
+//===----------------------------------------------------------------------===//
+// 12.14. Vector Widening Integer Multiply-Add Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVWMACCU : VPseudoTernaryW_VV_VX;
+defm PseudoVWMACC : VPseudoTernaryW_VV_VX;
+defm PseudoVWMACCSU : VPseudoTernaryW_VV_VX;
+defm PseudoVWMACCUS : VPseudoTernaryW_VX;
+
+//===----------------------------------------------------------------------===//
+// 12.16. Vector Integer Merge Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVMERGE : VPseudoBinaryV_VM_XM_IM;
+
+//===----------------------------------------------------------------------===//
+// 12.17. Vector Integer Move Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVMV_V : VPseudoUnaryV_V_X_I_NoDummyMask;
+
+//===----------------------------------------------------------------------===//
+// 13.1. Vector Single-Width Saturating Add and Subtract
+//===----------------------------------------------------------------------===//
+let Defs = [VXSAT], hasSideEffects = 1 in {
+ defm PseudoVSADDU : VPseudoBinaryV_VV_VX_VI;
+ defm PseudoVSADD : VPseudoBinaryV_VV_VX_VI;
+ defm PseudoVSSUBU : VPseudoBinaryV_VV_VX;
+ defm PseudoVSSUB : VPseudoBinaryV_VV_VX;
+}
+
+//===----------------------------------------------------------------------===//
+// 13.2. Vector Single-Width Averaging Add and Subtract
+//===----------------------------------------------------------------------===//
+let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
+ defm PseudoVAADDU : VPseudoBinaryV_VV_VX;
+ defm PseudoVAADD : VPseudoBinaryV_VV_VX;
+ defm PseudoVASUBU : VPseudoBinaryV_VV_VX;
+ defm PseudoVASUB : VPseudoBinaryV_VV_VX;
+}
+
+//===----------------------------------------------------------------------===//
+// 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+//===----------------------------------------------------------------------===//
+let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
+ defm PseudoVSMUL : VPseudoBinaryV_VV_VX;
+}
+
+//===----------------------------------------------------------------------===//
+// 13.4. Vector Single-Width Scaling Shift Instructions
+//===----------------------------------------------------------------------===//
+let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
+ defm PseudoVSSRL : VPseudoBinaryV_VV_VX_VI<uimm5>;
+ defm PseudoVSSRA : VPseudoBinaryV_VV_VX_VI<uimm5>;
+}
+
+//===----------------------------------------------------------------------===//
+// 13.5. Vector Narrowing Fixed-Point Clip Instructions
+//===----------------------------------------------------------------------===//
+let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
+ defm PseudoVNCLIP : VPseudoBinaryV_WV_WX_WI;
+ defm PseudoVNCLIPU : VPseudoBinaryV_WV_WX_WI;
+}
+
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+//===----------------------------------------------------------------------===//
+// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFADD : VPseudoBinaryV_VV_VF;
+defm PseudoVFSUB : VPseudoBinaryV_VV_VF;
+defm PseudoVFRSUB : VPseudoBinaryV_VF;
+
+//===----------------------------------------------------------------------===//
+// 14.3. Vector Widening Floating-Point Add/Subtract Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFWADD : VPseudoBinaryW_VV_VF;
+defm PseudoVFWSUB : VPseudoBinaryW_VV_VF;
+defm PseudoVFWADD : VPseudoBinaryW_WV_WF;
+defm PseudoVFWSUB : VPseudoBinaryW_WV_WF;
+
+//===----------------------------------------------------------------------===//
+// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFMUL : VPseudoBinaryV_VV_VF;
+defm PseudoVFDIV : VPseudoBinaryV_VV_VF;
+defm PseudoVFRDIV : VPseudoBinaryV_VF;
+
+//===----------------------------------------------------------------------===//
+// 14.5. Vector Widening Floating-Point Multiply
+//===----------------------------------------------------------------------===//
+defm PseudoVFWMUL : VPseudoBinaryW_VV_VF;
+
+//===----------------------------------------------------------------------===//
+// 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFMACC : VPseudoTernaryV_VV_VF_AAXA;
+defm PseudoVFNMACC : VPseudoTernaryV_VV_VF_AAXA;
+defm PseudoVFMSAC : VPseudoTernaryV_VV_VF_AAXA;
+defm PseudoVFNMSAC : VPseudoTernaryV_VV_VF_AAXA;
+defm PseudoVFMADD : VPseudoTernaryV_VV_VF_AAXA;
+defm PseudoVFNMADD : VPseudoTernaryV_VV_VF_AAXA;
+defm PseudoVFMSUB : VPseudoTernaryV_VV_VF_AAXA;
+defm PseudoVFNMSUB : VPseudoTernaryV_VV_VF_AAXA;
+
+//===----------------------------------------------------------------------===//
+// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFWMACC : VPseudoTernaryW_VV_VF;
+defm PseudoVFWNMACC : VPseudoTernaryW_VV_VF;
+defm PseudoVFWMSAC : VPseudoTernaryW_VV_VF;
+defm PseudoVFWNMSAC : VPseudoTernaryW_VV_VF;
+
+//===----------------------------------------------------------------------===//
+// 14.8. Vector Floating-Point Square-Root Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVFSQRT : VPseudoUnaryV_V;
+
+//===----------------------------------------------------------------------===//
+// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVFRSQRT7 : VPseudoUnaryV_V;
+
+//===----------------------------------------------------------------------===//
+// 14.10. Vector Floating-Point Reciprocal Estimate Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVFREC7 : VPseudoUnaryV_V;
+
+//===----------------------------------------------------------------------===//
+// 14.11. Vector Floating-Point Min/Max Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFMIN : VPseudoBinaryV_VV_VF;
+defm PseudoVFMAX : VPseudoBinaryV_VV_VF;
+
+//===----------------------------------------------------------------------===//
+// 14.12. Vector Floating-Point Sign-Injection Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFSGNJ : VPseudoBinaryV_VV_VF;
+defm PseudoVFSGNJN : VPseudoBinaryV_VV_VF;
+defm PseudoVFSGNJX : VPseudoBinaryV_VV_VF;
+
+//===----------------------------------------------------------------------===//
+// 14.13. Vector Floating-Point Compare Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVMFEQ : VPseudoBinaryM_VV_VF;
+defm PseudoVMFNE : VPseudoBinaryM_VV_VF;
+defm PseudoVMFLT : VPseudoBinaryM_VV_VF;
+defm PseudoVMFLE : VPseudoBinaryM_VV_VF;
+defm PseudoVMFGT : VPseudoBinaryM_VF;
+defm PseudoVMFGE : VPseudoBinaryM_VF;
+
+//===----------------------------------------------------------------------===//
+// 14.14. Vector Floating-Point Classify Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVFCLASS : VPseudoUnaryV_V;
+
+//===----------------------------------------------------------------------===//
+// 14.15. Vector Floating-Point Merge Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVFMERGE : VPseudoBinaryV_FM;
+
+//===----------------------------------------------------------------------===//
+// 14.16. Vector Floating-Point Move Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVFMV_V : VPseudoUnaryV_F_NoDummyMask;
+
+//===----------------------------------------------------------------------===//
+// 14.17. Single-Width Floating-Point/Integer Type-Convert Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFCVT_XU_F : VPseudoConversionV_V;
+defm PseudoVFCVT_X_F : VPseudoConversionV_V;
+defm PseudoVFCVT_RTZ_XU_F : VPseudoConversionV_V;
+defm PseudoVFCVT_RTZ_X_F : VPseudoConversionV_V;
+defm PseudoVFCVT_F_XU : VPseudoConversionV_V;
+defm PseudoVFCVT_F_X : VPseudoConversionV_V;
+
+//===----------------------------------------------------------------------===//
+// 14.18. Widening Floating-Point/Integer Type-Convert Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFWCVT_XU_F : VPseudoConversionW_V;
+defm PseudoVFWCVT_X_F : VPseudoConversionW_V;
+defm PseudoVFWCVT_RTZ_XU_F : VPseudoConversionW_V;
+defm PseudoVFWCVT_RTZ_X_F : VPseudoConversionW_V;
+defm PseudoVFWCVT_F_XU : VPseudoConversionW_V;
+defm PseudoVFWCVT_F_X : VPseudoConversionW_V;
+defm PseudoVFWCVT_F_F : VPseudoConversionW_V;
+
+//===----------------------------------------------------------------------===//
+// 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFNCVT_XU_F : VPseudoConversionV_W;
+defm PseudoVFNCVT_X_F : VPseudoConversionV_W;
+defm PseudoVFNCVT_RTZ_XU_F : VPseudoConversionV_W;
+defm PseudoVFNCVT_RTZ_X_F : VPseudoConversionV_W;
+defm PseudoVFNCVT_F_XU : VPseudoConversionV_W;
+defm PseudoVFNCVT_F_X : VPseudoConversionV_W;
+defm PseudoVFNCVT_F_F : VPseudoConversionV_W;
+defm PseudoVFNCVT_ROD_F_F : VPseudoConversionV_W;
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+let Predicates = [HasStdExtV] in {
+//===----------------------------------------------------------------------===//
+// 15.1. Vector Single-Width Integer Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVREDSUM : VPseudoReductionV_VS;
+defm PseudoVREDAND : VPseudoReductionV_VS;
+defm PseudoVREDOR : VPseudoReductionV_VS;
+defm PseudoVREDXOR : VPseudoReductionV_VS;
+defm PseudoVREDMINU : VPseudoReductionV_VS;
+defm PseudoVREDMIN : VPseudoReductionV_VS;
+defm PseudoVREDMAXU : VPseudoReductionV_VS;
+defm PseudoVREDMAX : VPseudoReductionV_VS;
+
+//===----------------------------------------------------------------------===//
+// 15.2. Vector Widening Integer Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVWREDSUMU : VPseudoReductionV_VS;
+defm PseudoVWREDSUM : VPseudoReductionV_VS;
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+//===----------------------------------------------------------------------===//
+// 15.3. Vector Single-Width Floating-Point Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFREDOSUM : VPseudoReductionV_VS;
+defm PseudoVFREDSUM : VPseudoReductionV_VS;
+defm PseudoVFREDMIN : VPseudoReductionV_VS;
+defm PseudoVFREDMAX : VPseudoReductionV_VS;
+
+//===----------------------------------------------------------------------===//
+// 15.4. Vector Widening Floating-Point Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFWREDSUM : VPseudoReductionV_VS;
+defm PseudoVFWREDOSUM : VPseudoReductionV_VS;
+
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// 16. Vector Mask Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 16.1 Vector Mask-Register Logical Instructions
+//===----------------------------------------------------------------------===//
+
+defm PseudoVMAND: VPseudoBinaryM_MM;
+defm PseudoVMNAND: VPseudoBinaryM_MM;
+defm PseudoVMANDNOT: VPseudoBinaryM_MM;
+defm PseudoVMXOR: VPseudoBinaryM_MM;
+defm PseudoVMOR: VPseudoBinaryM_MM;
+defm PseudoVMNOR: VPseudoBinaryM_MM;
+defm PseudoVMORNOT: VPseudoBinaryM_MM;
+defm PseudoVMXNOR: VPseudoBinaryM_MM;
+
+// Pseudo insturctions
+defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">;
+defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">;
+
+//===----------------------------------------------------------------------===//
+// 16.2. Vector mask population count vpopc
+//===----------------------------------------------------------------------===//
+
+defm PseudoVPOPC: VPseudoUnaryS_M;
+
+//===----------------------------------------------------------------------===//
+// 16.3. vfirst find-first-set mask bit
+//===----------------------------------------------------------------------===//
+
+defm PseudoVFIRST: VPseudoUnaryS_M;
+
+//===----------------------------------------------------------------------===//
+// 16.4. vmsbf.m set-before-first mask bit
+//===----------------------------------------------------------------------===//
+defm PseudoVMSBF: VPseudoUnaryM_M;
+
+//===----------------------------------------------------------------------===//
+// 16.5. vmsif.m set-including-first mask bit
+//===----------------------------------------------------------------------===//
+defm PseudoVMSIF: VPseudoUnaryM_M;
+
+//===----------------------------------------------------------------------===//
+// 16.6. vmsof.m set-only-first mask bit
+//===----------------------------------------------------------------------===//
+defm PseudoVMSOF: VPseudoUnaryM_M;
+
+//===----------------------------------------------------------------------===//
+// 16.8. Vector Iota Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVIOTA_M: VPseudoUnaryV_M;
+
+//===----------------------------------------------------------------------===//
+// 16.9. Vector Element Index Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVID : VPseudoMaskNullaryV;
+
+//===----------------------------------------------------------------------===//
+// 17. Vector Permutation Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 17.1. Integer Scalar Move Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV] in {
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
+ Uses = [VL, VTYPE] in {
+ foreach m = MxList.m in {
+ let VLMul = m.value in {
+ let HasSEWOp = 1, BaseInstr = VMV_X_S in
+ def PseudoVMV_X_S # "_" # m.MX: Pseudo<(outs GPR:$rd),
+ (ins m.vrclass:$rs2, ixlenimm:$sew),
+ []>, RISCVVPseudo;
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, WritesElement0 = 1,
+ Constraints = "$rd = $rs1" in
+ def PseudoVMV_S_X # "_" # m.MX: Pseudo<(outs m.vrclass:$rd),
+ (ins m.vrclass:$rs1, GPR:$rs2,
+ GPR:$vl, ixlenimm:$sew),
+ []>, RISCVVPseudo;
+ }
+ }
+}
+} // Predicates = [HasStdExtV]
+
+//===----------------------------------------------------------------------===//
+// 17.2. Floating-Point Scalar Move Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
+ Uses = [VL, VTYPE] in {
+ foreach m = MxList.m in {
+ foreach f = FPList.fpinfo in {
+ let VLMul = m.value in {
+ let HasSEWOp = 1, BaseInstr = VFMV_F_S in
+ def "PseudoVFMV_" # f.FX # "_S_" # m.MX :
+ Pseudo<(outs f.fprclass:$rd),
+ (ins m.vrclass:$rs2,
+ ixlenimm:$sew),
+ []>, RISCVVPseudo;
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, WritesElement0 = 1,
+ Constraints = "$rd = $rs1" in
+ def "PseudoVFMV_S_" # f.FX # "_" # m.MX :
+ Pseudo<(outs m.vrclass:$rd),
+ (ins m.vrclass:$rs1, f.fprclass:$rs2,
+ GPR:$vl, ixlenimm:$sew),
+ []>, RISCVVPseudo;
+ }
+ }
+ }
+}
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// 17.3. Vector Slide Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtV] in {
+ defm PseudoVSLIDEUP : VPseudoTernaryV_VX_VI<uimm5, "@earlyclobber $rd">;
+ defm PseudoVSLIDEDOWN : VPseudoTernaryV_VX_VI<uimm5>;
+ defm PseudoVSLIDE1UP : VPseudoBinaryV_VX<"@earlyclobber $rd">;
+ defm PseudoVSLIDE1DOWN : VPseudoBinaryV_VX;
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+ defm PseudoVFSLIDE1UP : VPseudoBinaryV_VF<"@earlyclobber $rd">;
+ defm PseudoVFSLIDE1DOWN : VPseudoBinaryV_VF;
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// 17.4. Vector Register Gather Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVRGATHER : VPseudoBinaryV_VV_VX_VI<uimm5, "@earlyclobber $rd">;
+defm PseudoVRGATHEREI16 : VPseudoBinaryV_VV_EEW</* eew */ 16, "@earlyclobber $rd">;
+
+//===----------------------------------------------------------------------===//
+// 17.5. Vector Compress Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVCOMPRESS : VPseudoUnaryV_V_AnyMask;
+
+//===----------------------------------------------------------------------===//
+// Patterns.
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtV] in {
+
+//===----------------------------------------------------------------------===//
+// 7. Vector Loads and Stores
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 7.4 Vector Unit-Stride Instructions
+//===----------------------------------------------------------------------===//
+
+foreach vti = AllVectors in
+{
+ defm : VPatUSLoad<"int_riscv_vle",
+ "PseudoVLE" # vti.SEW,
+ vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
+ defm : VPatUSLoadFF<"PseudoVLE" # vti.SEW # "FF",
+ vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
+ defm : VPatUSStore<"int_riscv_vse",
+ "PseudoVSE" # vti.SEW,
+ vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
+}
+
+foreach vti = AllMasks in {
+ defvar PseudoVLE1 = !cast<Instruction>("PseudoVLE1_V_"#vti.BX);
+ def : Pat<(vti.Mask (int_riscv_vle1 GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+ (PseudoVLE1 $rs1, GPR:$vl, vti.SEW)>;
+ defvar PseudoVSE1 = !cast<Instruction>("PseudoVSE1_V_"#vti.BX);
+ def : Pat<(int_riscv_vse1 (vti.Mask VR:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))),
+ (PseudoVSE1 $rs3, $rs1, GPR:$vl, vti.SEW)>;
+}
+
+//===----------------------------------------------------------------------===//
+// 7.5 Vector Strided Instructions
+//===----------------------------------------------------------------------===//
+
+foreach vti = AllVectors in
+{
+ defm : VPatSLoad<"int_riscv_vlse",
+ "PseudoVLSE" # vti.SEW,
+ vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
+ defm : VPatSStore<"int_riscv_vsse",
+ "PseudoVSSE" # vti.SEW,
+ vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
+}
+
+//===----------------------------------------------------------------------===//
+// 7.6 Vector Indexed Instructions
+//===----------------------------------------------------------------------===//
+
+foreach vti = AllVectors in
+foreach eew = EEWList in {
+ defvar vlmul = vti.LMul;
+ defvar octuple_lmul = octuple_from_str<vti.LMul.MX>.ret;
+ defvar log_sew = shift_amount<vti.SEW>.val;
+ // The data vector register group has EEW=SEW, EMUL=LMUL, while the offset
+ // vector register group has EEW encoding in the instruction and EMUL=(EEW/SEW)*LMUL.
+ // calculate octuple elmul which is (eew * octuple_lmul) >> log_sew
+ defvar octuple_elmul = !srl(!mul(eew, octuple_lmul), log_sew);
+ // legal octuple elmul should be more than 0 and less than equal 64
+ if !gt(octuple_elmul, 0) then {
+ if !le(octuple_elmul, 64) then {
+ defvar elmul_str = octuple_to_str<octuple_elmul>.ret;
+ defvar elmul =!cast<LMULInfo>("V_" # elmul_str);
+ defvar idx_vti = !cast<VTypeInfo>("VI" # eew # elmul_str);
+
+ defm : VPatILoad<"int_riscv_vluxei",
+ "PseudoVLUXEI"#eew,
+ vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW,
+ vlmul, elmul, vti.RegClass, idx_vti.RegClass>;
+ defm : VPatILoad<"int_riscv_vloxei",
+ "PseudoVLOXEI"#eew,
+ vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW,
+ vlmul, elmul, vti.RegClass, idx_vti.RegClass>;
+ defm : VPatIStore<"int_riscv_vsoxei",
+ "PseudoVSOXEI"#eew,
+ vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW,
+ vlmul, elmul, vti.RegClass, idx_vti.RegClass>;
+ defm : VPatIStore<"int_riscv_vsuxei",
+ "PseudoVSUXEI"#eew,
+ vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW,
+ vlmul, elmul, vti.RegClass, idx_vti.RegClass>;
+ }
+ }
+}
+} // Predicates = [HasStdExtV]
+
+//===----------------------------------------------------------------------===//
+// 8. Vector AMO Operations
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtZvamo] in {
+ defm "" : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllIntegerVectors>;
+ defm "" : VPatAMOV_WD<"int_riscv_vamoadd", "PseudoVAMOADD", AllIntegerVectors>;
+ defm "" : VPatAMOV_WD<"int_riscv_vamoxor", "PseudoVAMOXOR", AllIntegerVectors>;
+ defm "" : VPatAMOV_WD<"int_riscv_vamoand", "PseudoVAMOAND", AllIntegerVectors>;
+ defm "" : VPatAMOV_WD<"int_riscv_vamoor", "PseudoVAMOOR", AllIntegerVectors>;
+ defm "" : VPatAMOV_WD<"int_riscv_vamomin", "PseudoVAMOMIN", AllIntegerVectors>;
+ defm "" : VPatAMOV_WD<"int_riscv_vamomax", "PseudoVAMOMAX", AllIntegerVectors>;
+ defm "" : VPatAMOV_WD<"int_riscv_vamominu", "PseudoVAMOMINU", AllIntegerVectors>;
+ defm "" : VPatAMOV_WD<"int_riscv_vamomaxu", "PseudoVAMOMAXU", AllIntegerVectors>;
+} // Predicates = [HasStdExtZvamo]
+
+let Predicates = [HasStdExtZvamo, HasStdExtF] in {
+ defm "" : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllFloatVectors>;
+} // Predicates = [HasStdExtZvamo, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// 12. Vector Integer Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV] in {
+//===----------------------------------------------------------------------===//
+// 12.1. Vector Single-Width Integer Add and Subtract
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vadd", "PseudoVADD", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vsub", "PseudoVSUB", AllIntegerVectors>;
+defm "" : VPatBinaryV_VX_VI<"int_riscv_vrsub", "PseudoVRSUB", AllIntegerVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.2. Vector Widening Integer Add/Subtract
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vwaddu", "PseudoVWADDU", AllWidenableIntVectors>;
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vwsubu", "PseudoVWSUBU", AllWidenableIntVectors>;
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vwadd", "PseudoVWADD", AllWidenableIntVectors>;
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vwsub", "PseudoVWSUB", AllWidenableIntVectors>;
+defm "" : VPatBinaryW_WV_WX<"int_riscv_vwaddu_w", "PseudoVWADDU", AllWidenableIntVectors>;
+defm "" : VPatBinaryW_WV_WX<"int_riscv_vwsubu_w", "PseudoVWSUBU", AllWidenableIntVectors>;
+defm "" : VPatBinaryW_WV_WX<"int_riscv_vwadd_w", "PseudoVWADD", AllWidenableIntVectors>;
+defm "" : VPatBinaryW_WV_WX<"int_riscv_vwsub_w", "PseudoVWSUB", AllWidenableIntVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.3. Vector Integer Extension
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryV_VF<"int_riscv_vzext", "PseudoVZEXT", "VF2",
+ AllFractionableVF2IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vzext", "PseudoVZEXT", "VF4",
+ AllFractionableVF4IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vzext", "PseudoVZEXT", "VF8",
+ AllFractionableVF8IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vsext", "PseudoVSEXT", "VF2",
+ AllFractionableVF2IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vsext", "PseudoVSEXT", "VF4",
+ AllFractionableVF4IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vsext", "PseudoVSEXT", "VF8",
+ AllFractionableVF8IntVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VM_XM_IM<"int_riscv_vadc", "PseudoVADC">;
+defm "" : VPatBinaryM_VM_XM_IM<"int_riscv_vmadc_carry_in", "PseudoVMADC">;
+defm "" : VPatBinaryM_V_X_I<"int_riscv_vmadc", "PseudoVMADC">;
+
+defm "" : VPatBinaryV_VM_XM<"int_riscv_vsbc", "PseudoVSBC">;
+defm "" : VPatBinaryM_VM_XM<"int_riscv_vmsbc_borrow_in", "PseudoVMSBC">;
+defm "" : VPatBinaryM_V_X<"int_riscv_vmsbc", "PseudoVMSBC">;
+
+//===----------------------------------------------------------------------===//
+// 12.5. Vector Bitwise Logical Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vand", "PseudoVAND", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vor", "PseudoVOR", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vxor", "PseudoVXOR", AllIntegerVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.6. Vector Single-Width Bit Shift Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsll", "PseudoVSLL", AllIntegerVectors,
+ uimm5>;
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsrl", "PseudoVSRL", AllIntegerVectors,
+ uimm5>;
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors,
+ uimm5>;
+
+//===----------------------------------------------------------------------===//
+// 12.7. Vector Narrowing Integer Right Shift Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnsrl", "PseudoVNSRL", AllWidenableIntVectors>;
+defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnsra", "PseudoVNSRA", AllWidenableIntVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.8. Vector Integer Comparison Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmseq", "PseudoVMSEQ", AllIntegerVectors>;
+defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsne", "PseudoVMSNE", AllIntegerVectors>;
+defm "" : VPatBinaryM_VV_VX<"int_riscv_vmsltu", "PseudoVMSLTU", AllIntegerVectors>;
+defm "" : VPatBinaryM_VV_VX<"int_riscv_vmslt", "PseudoVMSLT", AllIntegerVectors>;
+defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsleu", "PseudoVMSLEU", AllIntegerVectors>;
+defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsle", "PseudoVMSLE", AllIntegerVectors>;
+
+defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgtu", "PseudoVMSGTU", AllIntegerVectors>;
+defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgt", "PseudoVMSGT", AllIntegerVectors>;
+
+// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16. This
+// avoids the user needing to know that there is no vmslt(u).vi instruction.
+// This is limited to vmslt(u).vx as there is no vmsge().vx intrinsic or
+// instruction.
+foreach vti = AllIntegerVectors in {
+ def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
+ (DecImm simm5_plus1:$rs2),
+ GPR:$vl,
+ vti.SEW)>;
+ def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar simm5_plus1:$rs2),
+ (vti.Mask V0),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK")
+ VR:$merge,
+ vti.RegClass:$rs1,
+ (DecImm simm5_plus1:$rs2),
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.SEW)>;
+
+ def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar simm5_plus1:$rs2),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
+ (DecImm simm5_plus1:$rs2),
+ GPR:$vl,
+ vti.SEW)>;
+ def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar simm5_plus1:$rs2),
+ (vti.Mask V0),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK")
+ VR:$merge,
+ vti.RegClass:$rs1,
+ (DecImm simm5_plus1:$rs2),
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.SEW)>;
+
+ // Special cases to avoid matching vmsltu.vi 0 (always false) to
+ // vmsleu.vi -1 (always true). Instead match to vmsne.vv.
+ def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar 0), (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
+ vti.RegClass:$rs1,
+ GPR:$vl,
+ vti.SEW)>;
+ def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar 0),
+ (vti.Mask V0),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK")
+ VR:$merge,
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs1,
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.SEW)>;
+}
+
+//===----------------------------------------------------------------------===//
+// 12.9. Vector Integer Min/Max Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vminu", "PseudoVMINU", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vmin", "PseudoVMIN", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vmaxu", "PseudoVMAXU", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vmax", "PseudoVMAX", AllIntegerVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.10. Vector Single-Width Integer Multiply Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vmul", "PseudoVMUL", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vmulh", "PseudoVMULH", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vmulhu", "PseudoVMULHU", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vmulhsu", "PseudoVMULHSU", AllIntegerVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.11. Vector Integer Divide Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vdivu", "PseudoVDIVU", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vdiv", "PseudoVDIV", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vremu", "PseudoVREMU", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vrem", "PseudoVREM", AllIntegerVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.12. Vector Widening Integer Multiply Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vwmul", "PseudoVWMUL", AllWidenableIntVectors>;
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vwmulu", "PseudoVWMULU", AllWidenableIntVectors>;
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vwmulsu", "PseudoVWMULSU", AllWidenableIntVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.13. Vector Single-Width Integer Multiply-Add Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vmadd", "PseudoVMADD", AllIntegerVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vnmsub", "PseudoVNMSUB", AllIntegerVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vmacc", "PseudoVMACC", AllIntegerVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vnmsac", "PseudoVNMSAC", AllIntegerVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.14. Vector Widening Integer Multiply-Add Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmaccu", "PseudoVWMACCU", AllWidenableIntVectors>;
+defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmacc", "PseudoVWMACC", AllWidenableIntVectors>;
+defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmaccsu", "PseudoVWMACCSU", AllWidenableIntVectors>;
+defm "" : VPatTernaryW_VX<"int_riscv_vwmaccus", "PseudoVWMACCUS", AllWidenableIntVectors>;
+
+//===----------------------------------------------------------------------===//
+// 12.16. Vector Integer Merge Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">;
+
+//===----------------------------------------------------------------------===//
+// 12.17. Vector Integer Move Instructions
+//===----------------------------------------------------------------------===//
+foreach vti = AllVectors in {
+ def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$rs1),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
+ $rs1, GPR:$vl, vti.SEW)>;
+}
+
+foreach vti = AllIntegerVectors in {
+ def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
+ $rs2, GPR:$vl, vti.SEW)>;
+ def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
+ simm5:$imm5, GPR:$vl, vti.SEW)>;
+}
+
+//===----------------------------------------------------------------------===//
+// 13.1. Vector Single-Width Saturating Add and Subtract
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsaddu", "PseudoVSADDU", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsadd", "PseudoVSADD", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vssubu", "PseudoVSSUBU", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vssub", "PseudoVSSUB", AllIntegerVectors>;
+
+//===----------------------------------------------------------------------===//
+// 13.2. Vector Single-Width Averaging Add and Subtract
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vaaddu", "PseudoVAADDU", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vaadd", "PseudoVAADD", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vasubu", "PseudoVASUBU", AllIntegerVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vasub", "PseudoVASUB", AllIntegerVectors>;
+
+//===----------------------------------------------------------------------===//
+// 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vsmul", "PseudoVSMUL", AllIntegerVectors>;
+
+//===----------------------------------------------------------------------===//
+// 13.4. Vector Single-Width Scaling Shift Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vssrl", "PseudoVSSRL", AllIntegerVectors,
+ uimm5>;
+defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vssra", "PseudoVSSRA", AllIntegerVectors,
+ uimm5>;
+
+//===----------------------------------------------------------------------===//
+// 13.5. Vector Narrowing Fixed-Point Clip Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnclipu", "PseudoVNCLIPU", AllWidenableIntVectors>;
+defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnclip", "PseudoVNCLIP", AllWidenableIntVectors>;
+
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+//===----------------------------------------------------------------------===//
+// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vfadd", "PseudoVFADD", AllFloatVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsub", "PseudoVFSUB", AllFloatVectors>;
+defm "" : VPatBinaryV_VX<"int_riscv_vfrsub", "PseudoVFRSUB", AllFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.3. Vector Widening Floating-Point Add/Subtract Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vfwadd", "PseudoVFWADD", AllWidenableFloatVectors>;
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vfwsub", "PseudoVFWSUB", AllWidenableFloatVectors>;
+defm "" : VPatBinaryW_WV_WX<"int_riscv_vfwadd_w", "PseudoVFWADD", AllWidenableFloatVectors>;
+defm "" : VPatBinaryW_WV_WX<"int_riscv_vfwsub_w", "PseudoVFWSUB", AllWidenableFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vfmul", "PseudoVFMUL", AllFloatVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vfdiv", "PseudoVFDIV", AllFloatVectors>;
+defm "" : VPatBinaryV_VX<"int_riscv_vfrdiv", "PseudoVFRDIV", AllFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.5. Vector Widening Floating-Point Multiply
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryW_VV_VX<"int_riscv_vfwmul", "PseudoVFWMUL", AllWidenableFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmacc", "PseudoVFMACC", AllFloatVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmacc", "PseudoVFNMACC", AllFloatVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsac", "PseudoVFMSAC", AllFloatVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsac", "PseudoVFNMSAC", AllFloatVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmadd", "PseudoVFMADD", AllFloatVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmadd", "PseudoVFNMADD", AllFloatVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsub", "PseudoVFMSUB", AllFloatVectors>;
+defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsub", "PseudoVFNMSUB", AllFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwmacc", "PseudoVFWMACC", AllWidenableFloatVectors>;
+defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmacc", "PseudoVFWNMACC", AllWidenableFloatVectors>;
+defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwmsac", "PseudoVFWMSAC", AllWidenableFloatVectors>;
+defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmsac", "PseudoVFWNMSAC", AllWidenableFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.8. Vector Floating-Point Square-Root Instruction
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryV_V<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7", AllFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.10. Vector Floating-Point Reciprocal Estimate Instruction
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryV_V<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.11. Vector Floating-Point Min/Max Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN", AllFloatVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX", AllFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.12. Vector Floating-Point Sign-Injection Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ", AllFloatVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN", AllFloatVectors>;
+defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX", AllFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.13. Vector Floating-Point Compare Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfeq", "PseudoVMFEQ", AllFloatVectors>;
+defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfle", "PseudoVMFLE", AllFloatVectors>;
+defm "" : VPatBinaryM_VV_VX<"int_riscv_vmflt", "PseudoVMFLT", AllFloatVectors>;
+defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfne", "PseudoVMFNE", AllFloatVectors>;
+defm "" : VPatBinaryM_VX<"int_riscv_vmfgt", "PseudoVMFGT", AllFloatVectors>;
+defm "" : VPatBinaryM_VX<"int_riscv_vmfge", "PseudoVMFGE", AllFloatVectors>;
+
+//===----------------------------------------------------------------------===//
+// 14.14. Vector Floating-Point Classify Instruction
+//===----------------------------------------------------------------------===//
+defm "" : VPatConversionVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
+
+//===----------------------------------------------------------------------===//
+// 14.15. Vector Floating-Point Merge Instruction
+//===----------------------------------------------------------------------===//
+// We can use vmerge.vvm to support vector-vector vfmerge.
+defm "" : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE",
+ /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
+defm "" : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE",
+ /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
+
+foreach fvti = AllFloatVectors in {
+ defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
+ def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$rs2),
+ (fvti.Scalar (fpimm0)),
+ (fvti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
+ (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.SEW)>;
+}
+
+//===----------------------------------------------------------------------===//
+// 14.16. Vector Floating-Point Move Instruction
+//===----------------------------------------------------------------------===//
+foreach fvti = AllFloatVectors in {
+ // If we're splatting fpimm0, use vmv.v.x vd, x0.
+ def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
+ (fvti.Scalar (fpimm0)), (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
+ 0, GPR:$vl, fvti.SEW)>;
+
+ def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
+ (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
+ fvti.LMul.MX)
+ (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ GPR:$vl, fvti.SEW)>;
+}
+
+//===----------------------------------------------------------------------===//
+// 14.17. Single-Width Floating-Point/Integer Type-Convert Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatConversionVI_VF<"int_riscv_vfcvt_xu_f_v", "PseudoVFCVT_XU_F">;
+defm "" : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_xu_f_v", "PseudoVFCVT_RTZ_XU_F">;
+defm "" : VPatConversionVI_VF<"int_riscv_vfcvt_x_f_v", "PseudoVFCVT_X_F">;
+defm "" : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_x_f_v", "PseudoVFCVT_RTZ_X_F">;
+defm "" : VPatConversionVF_VI<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X">;
+defm "" : VPatConversionVF_VI<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU">;
+
+//===----------------------------------------------------------------------===//
+// 14.18. Widening Floating-Point/Integer Type-Convert Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatConversionWI_VF<"int_riscv_vfwcvt_xu_f_v", "PseudoVFWCVT_XU_F">;
+defm "" : VPatConversionWI_VF<"int_riscv_vfwcvt_x_f_v", "PseudoVFWCVT_X_F">;
+defm "" : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_xu_f_v", "PseudoVFWCVT_RTZ_XU_F">;
+defm "" : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F">;
+defm "" : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">;
+defm "" : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">;
+defm "" : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">;
+
+//===----------------------------------------------------------------------===//
+// 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatConversionVI_WF<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F">;
+defm "" : VPatConversionVI_WF<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F">;
+defm "" : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F">;
+defm "" : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">;
+defm "" : VPatConversionVF_WI <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
+defm "" : VPatConversionVF_WI <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
+defm "" : VPatConversionVF_WF<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">;
+defm "" : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">;
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+let Predicates = [HasStdExtV] in {
+//===----------------------------------------------------------------------===//
+// 15.1. Vector Single-Width Integer Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatReductionV_VS<"int_riscv_vredsum", "PseudoVREDSUM">;
+defm "" : VPatReductionV_VS<"int_riscv_vredand", "PseudoVREDAND">;
+defm "" : VPatReductionV_VS<"int_riscv_vredor", "PseudoVREDOR">;
+defm "" : VPatReductionV_VS<"int_riscv_vredxor", "PseudoVREDXOR">;
+defm "" : VPatReductionV_VS<"int_riscv_vredminu", "PseudoVREDMINU">;
+defm "" : VPatReductionV_VS<"int_riscv_vredmin", "PseudoVREDMIN">;
+defm "" : VPatReductionV_VS<"int_riscv_vredmaxu", "PseudoVREDMAXU">;
+defm "" : VPatReductionV_VS<"int_riscv_vredmax", "PseudoVREDMAX">;
+
+//===----------------------------------------------------------------------===//
+// 15.2. Vector Widening Integer Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatReductionW_VS<"int_riscv_vwredsumu", "PseudoVWREDSUMU">;
+defm "" : VPatReductionW_VS<"int_riscv_vwredsum", "PseudoVWREDSUM">;
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+//===----------------------------------------------------------------------===//
+// 15.3. Vector Single-Width Floating-Point Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatReductionV_VS<"int_riscv_vfredosum", "PseudoVFREDOSUM", /*IsFloat=*/1>;
+defm "" : VPatReductionV_VS<"int_riscv_vfredsum", "PseudoVFREDSUM", /*IsFloat=*/1>;
+defm "" : VPatReductionV_VS<"int_riscv_vfredmin", "PseudoVFREDMIN", /*IsFloat=*/1>;
+defm "" : VPatReductionV_VS<"int_riscv_vfredmax", "PseudoVFREDMAX", /*IsFloat=*/1>;
+
+//===----------------------------------------------------------------------===//
+// 15.4. Vector Widening Floating-Point Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatReductionW_VS<"int_riscv_vfwredsum", "PseudoVFWREDSUM", /*IsFloat=*/1>;
+defm "" : VPatReductionW_VS<"int_riscv_vfwredosum", "PseudoVFWREDOSUM", /*IsFloat=*/1>;
+
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// 16. Vector Mask Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV] in {
+//===----------------------------------------------------------------------===//
+// 16.1 Vector Mask-Register Logical Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatBinaryM_MM<"int_riscv_vmand", "PseudoVMAND">;
+defm "" : VPatBinaryM_MM<"int_riscv_vmnand", "PseudoVMNAND">;
+defm "" : VPatBinaryM_MM<"int_riscv_vmandnot", "PseudoVMANDNOT">;
+defm "" : VPatBinaryM_MM<"int_riscv_vmxor", "PseudoVMXOR">;
+defm "" : VPatBinaryM_MM<"int_riscv_vmor", "PseudoVMOR">;
+defm "" : VPatBinaryM_MM<"int_riscv_vmnor", "PseudoVMNOR">;
+defm "" : VPatBinaryM_MM<"int_riscv_vmornot", "PseudoVMORNOT">;
+defm "" : VPatBinaryM_MM<"int_riscv_vmxnor", "PseudoVMXNOR">;
+
+// pseudo instructions
+defm "" : VPatNullaryM<"int_riscv_vmclr", "PseudoVMCLR">;
+defm "" : VPatNullaryM<"int_riscv_vmset", "PseudoVMSET">;
+
+//===----------------------------------------------------------------------===//
+// 16.2. Vector mask population count vpopc
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryS_M<"int_riscv_vpopc", "PseudoVPOPC">;
+
+//===----------------------------------------------------------------------===//
+// 16.3. vfirst find-first-set mask bit
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryS_M<"int_riscv_vfirst", "PseudoVFIRST">;
+
+//===----------------------------------------------------------------------===//
+// 16.4. vmsbf.m set-before-first mask bit
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryM_M<"int_riscv_vmsbf", "PseudoVMSBF">;
+
+//===----------------------------------------------------------------------===//
+// 16.5. vmsif.m set-including-first mask bit
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryM_M<"int_riscv_vmsif", "PseudoVMSIF">;
+
+//===----------------------------------------------------------------------===//
+// 16.6. vmsof.m set-only-first mask bit
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryM_M<"int_riscv_vmsof", "PseudoVMSOF">;
+
+//===----------------------------------------------------------------------===//
+// 16.8. Vector Iota Instruction
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryV_M<"int_riscv_viota", "PseudoVIOTA">;
+
+//===----------------------------------------------------------------------===//
+// 16.9. Vector Element Index Instruction
+//===----------------------------------------------------------------------===//
+defm "" : VPatNullaryV<"int_riscv_vid", "PseudoVID">;
+
+} // Predicates = [HasStdExtV]
+
+//===----------------------------------------------------------------------===//
+// 17. Vector Permutation Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 17.1. Integer Scalar Move Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV] in {
+foreach vti = AllIntegerVectors in {
+ def : Pat<(riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMV_X_S_" # vti.LMul.MX) $rs2, vti.SEW)>;
+ def : Pat<(vti.Vector (int_riscv_vmv_s_x (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMV_S_X_" # vti.LMul.MX)
+ (vti.Vector $rs1), $rs2, GPR:$vl, vti.SEW)>;
+}
+} // Predicates = [HasStdExtV]
+
+//===----------------------------------------------------------------------===//
+// 17.2. Floating-Point Scalar Move Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+foreach fvti = AllFloatVectors in {
+ defvar instr = !cast<Instruction>("PseudoVFMV_"#fvti.ScalarSuffix#"_S_" #
+ fvti.LMul.MX);
+ def : Pat<(fvti.Scalar (int_riscv_vfmv_f_s (fvti.Vector fvti.RegClass:$rs2))),
+ (instr $rs2, fvti.SEW)>;
+
+ def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVFMV_S_"#fvti.ScalarSuffix#"_" #
+ fvti.LMul.MX)
+ (fvti.Vector $rs1),
+ (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ GPR:$vl, fvti.SEW)>;
+}
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// 17.3. Vector Slide Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtV] in {
+ defm "" : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllIntegerVectors, uimm5>;
+ defm "" : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllIntegerVectors, uimm5>;
+ defm "" : VPatBinaryV_VX<"int_riscv_vslide1up", "PseudoVSLIDE1UP", AllIntegerVectors>;
+ defm "" : VPatBinaryV_VX<"int_riscv_vslide1down", "PseudoVSLIDE1DOWN", AllIntegerVectors>;
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+ defm "" : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllFloatVectors, uimm5>;
+ defm "" : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllFloatVectors, uimm5>;
+ defm "" : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP", AllFloatVectors>;
+ defm "" : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN", AllFloatVectors>;
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// 17.4. Vector Register Gather Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtV] in {
+ defm "" : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
+ AllIntegerVectors, uimm5>;
+ defm "" : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16", "PseudoVRGATHEREI16",
+ /* eew */ 16, AllIntegerVectors>;
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+ defm "" : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
+ AllFloatVectors, uimm5>;
+ defm "" : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16", "PseudoVRGATHEREI16",
+ /* eew */ 16, AllFloatVectors>;
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// 17.5. Vector Compress Instruction
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtV] in {
+ defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+ defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+// Include the non-intrinsic ISel patterns
+include "RISCVInstrInfoVSDPatterns.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
new file mode 100644
index 000000000000..dee67708bed1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -0,0 +1,643 @@
+//===- RISCVInstrInfoVSDPatterns.td - RVV SDNode patterns --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file contains the required infrastructure and SDNode patterns to
+/// support code generation for the standard 'V' (Vector) extension, version
+/// 0.10. This version is still experimental as the 'V' extension hasn't been
+/// ratified yet.
+///
+/// This file is included from and depends upon RISCVInstrInfoVPseudos.td
+///
+/// Note: the patterns for RVV intrinsics are found in
+/// RISCVInstrInfoVPseudos.td.
+///
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Helpers to define the SDNode patterns.
+//===----------------------------------------------------------------------===//
+
+def SDTSplatI64 : SDTypeProfile<1, 1, [
+ SDTCVecEltisVT<0, i64>, SDTCisVT<1, i32>
+]>;
+
+def rv32_splat_i64 : SDNode<"RISCVISD::SPLAT_VECTOR_I64", SDTSplatI64>;
+
+def riscv_trunc_vector : SDNode<"RISCVISD::TRUNCATE_VECTOR",
+ SDTypeProfile<1, 1,
+ [SDTCisVec<0>, SDTCisVec<1>]>>;
+
+// Penalize the generic form with Complexity=1 to give the simm5/uimm5 variants
+// precedence
+def SplatPat : ComplexPattern<vAny, 1, "selectVSplat", [], [], 1>;
+
+def SplatPat_simm5 : ComplexPattern<vAny, 1, "selectVSplatSimm5", []>;
+def SplatPat_uimm5 : ComplexPattern<vAny, 1, "selectVSplatUimm5", []>;
+
+class SwapHelper<dag Prefix, dag A, dag B, dag Suffix, bit swap> {
+ dag Value = !con(Prefix, !if(swap, B, A), !if(swap, A, B), Suffix);
+}
+
+multiclass VPatUSLoadStoreSDNode<LLVMType type,
+ LLVMType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ OutPatFrag avl,
+ RegisterClass reg_rs1,
+ VReg reg_class>
+{
+ defvar load_instr = !cast<Instruction>("PseudoVLE"#sew#"_V_"#vlmul.MX);
+ defvar store_instr = !cast<Instruction>("PseudoVSE"#sew#"_V_"#vlmul.MX);
+ // Load
+ def : Pat<(type (load reg_rs1:$rs1)),
+ (load_instr reg_rs1:$rs1, avl, sew)>;
+ // Store
+ def : Pat<(store type:$rs2, reg_rs1:$rs1),
+ (store_instr reg_class:$rs2, reg_rs1:$rs1, avl, sew)>;
+}
+
+multiclass VPatUSLoadStoreSDNodes<RegisterClass reg_rs1> {
+ foreach vti = AllVectors in
+ defm "" : VPatUSLoadStoreSDNode<vti.Vector, vti.Mask, vti.SEW, vti.LMul,
+ vti.AVL, reg_rs1, vti.RegClass>;
+}
+
+class VPatBinarySDNode_VV<SDNode vop,
+ string instruction_name,
+ ValueType result_type,
+ ValueType op_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ OutPatFrag avl,
+ VReg RetClass,
+ VReg op_reg_class> :
+ Pat<(result_type (vop
+ (op_type op_reg_class:$rs1),
+ (op_type op_reg_class:$rs2))),
+ (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX)
+ op_reg_class:$rs1,
+ op_reg_class:$rs2,
+ avl, sew)>;
+
+class VPatBinarySDNode_XI<SDNode vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType vop_type,
+ ValueType xop_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ OutPatFrag avl,
+ VReg RetClass,
+ VReg vop_reg_class,
+ ComplexPattern SplatPatKind,
+ DAGOperand xop_kind> :
+ Pat<(result_type (vop
+ (vop_type vop_reg_class:$rs1),
+ (vop_type (SplatPatKind xop_kind:$rs2)))),
+ (!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX)
+ vop_reg_class:$rs1,
+ xop_kind:$rs2,
+ avl, sew)>;
+
+multiclass VPatBinarySDNode_VV_VX<SDNode vop, string instruction_name>
+{
+ foreach vti = AllIntegerVectors in {
+ def : VPatBinarySDNode_VV<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Mask, vti.SEW,
+ vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>;
+ def : VPatBinarySDNode_XI<vop, instruction_name, "VX",
+ vti.Vector, vti.Vector, XLenVT, vti.Mask, vti.SEW,
+ vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ SplatPat, GPR>;
+ }
+}
+
+multiclass VPatBinarySDNode_VV_VX_VI<SDNode vop, string instruction_name,
+ Operand ImmType = simm5>
+{
+ foreach vti = AllIntegerVectors in {
+ def : VPatBinarySDNode_VV<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Mask, vti.SEW,
+ vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>;
+ def : VPatBinarySDNode_XI<vop, instruction_name, "VX",
+ vti.Vector, vti.Vector, XLenVT, vti.Mask, vti.SEW,
+ vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ SplatPat, GPR>;
+ def : VPatBinarySDNode_XI<vop, instruction_name, "VI",
+ vti.Vector, vti.Vector, XLenVT, vti.Mask, vti.SEW,
+ vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ !cast<ComplexPattern>(SplatPat#_#ImmType),
+ ImmType>;
+ }
+}
+
+class VPatBinarySDNode_VF<SDNode vop,
+ string instruction_name,
+ ValueType result_type,
+ ValueType vop_type,
+ ValueType xop_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ OutPatFrag avl,
+ VReg RetClass,
+ VReg vop_reg_class,
+ DAGOperand xop_kind> :
+ Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
+ (vop_type (splat_vector xop_kind:$rs2)))),
+ (!cast<Instruction>(instruction_name#"_"#vlmul.MX)
+ vop_reg_class:$rs1,
+ (xop_type xop_kind:$rs2),
+ avl, sew)>;
+
+multiclass VPatBinaryFPSDNode_VV_VF<SDNode vop, string instruction_name> {
+ foreach vti = AllFloatVectors in {
+ def : VPatBinarySDNode_VV<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Mask, vti.SEW,
+ vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>;
+ def : VPatBinarySDNode_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
+ vti.SEW, vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryFPSDNode_R_VF<SDNode vop, string instruction_name> {
+ foreach fvti = AllFloatVectors in
+ def : Pat<(fvti.Vector (vop (fvti.Vector (splat_vector fvti.Scalar:$rs2)),
+ (fvti.Vector fvti.RegClass:$rs1))),
+ (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ fvti.RegClass:$rs1,
+ (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ fvti.AVL, fvti.SEW)>;
+}
+
+multiclass VPatIntegerSetCCSDNode_VV<CondCode cc,
+ string instruction_name,
+ bit swap = 0> {
+ foreach vti = AllIntegerVectors in {
+ defvar instruction = !cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX);
+ def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2), cc)),
+ SwapHelper<(instruction),
+ (instruction vti.RegClass:$rs1),
+ (instruction vti.RegClass:$rs2),
+ (instruction vti.AVL, vti.SEW),
+ swap>.Value>;
+ }
+}
+
+multiclass VPatIntegerSetCCSDNode_XI<CondCode cc,
+ string instruction_name,
+ string kind,
+ ComplexPattern SplatPatKind,
+ DAGOperand xop_kind,
+ bit swap = 0> {
+ foreach vti = AllIntegerVectors in {
+ defvar instruction = !cast<Instruction>(instruction_name#_#kind#_#vti.LMul.MX);
+ def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPatKind xop_kind:$rs2)), cc)),
+ SwapHelper<(instruction),
+ (instruction vti.RegClass:$rs1),
+ (instruction xop_kind:$rs2),
+ (instruction vti.AVL, vti.SEW),
+ swap>.Value>;
+ }
+}
+
+multiclass VPatIntegerSetCCSDNode_VV_VX_VI<CondCode cc,
+ string instruction_name,
+ bit swap = 0> {
+ defm : VPatIntegerSetCCSDNode_VV<cc, instruction_name, swap>;
+ defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VX",
+ SplatPat, GPR, swap>;
+ defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VI",
+ SplatPat_simm5, simm5, swap>;
+}
+
+multiclass VPatIntegerSetCCSDNode_VV_VX<CondCode cc,
+ string instruction_name,
+ bit swap = 0> {
+ defm : VPatIntegerSetCCSDNode_VV<cc, instruction_name, swap>;
+ defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VX",
+ SplatPat, GPR, swap>;
+}
+
+multiclass VPatIntegerSetCCSDNode_VX_VI<CondCode cc,
+ string instruction_name,
+ bit swap = 0> {
+ defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VX",
+ SplatPat, GPR, swap>;
+ defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VI",
+ SplatPat_simm5, simm5, swap>;
+}
+
+multiclass VPatFPSetCCSDNode_VV<CondCode cc, string instruction_name> {
+ foreach fvti = AllFloatVectors in
+ def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Vector fvti.RegClass:$rs2),
+ cc)),
+ (!cast<Instruction>(instruction_name#"_VV_"#fvti.LMul.MX)
+ fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>;
+}
+
+multiclass VPatFPSetCCSDNode_VF<CondCode cc, string instruction_name> {
+ foreach fvti = AllFloatVectors in
+ def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)),
+ cc)),
+ (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ fvti.RegClass:$rs1,
+ (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ fvti.AVL, fvti.SEW)>;
+}
+
+multiclass VPatFPSetCCSDNode_FV<CondCode cc, string swapped_op_instruction_name> {
+ foreach fvti = AllFloatVectors in
+ def : Pat<(fvti.Mask (setcc (fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)),
+ (fvti.Vector fvti.RegClass:$rs1),
+ cc)),
+ (!cast<Instruction>(swapped_op_instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ fvti.RegClass:$rs1,
+ (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ fvti.AVL, fvti.SEW)>;
+}
+
+multiclass VPatFPSetCCSDNode_VV_VF_FV<CondCode cc,
+ string inst_name,
+ string swapped_op_inst_name> {
+ defm : VPatFPSetCCSDNode_VV<cc, inst_name>;
+ defm : VPatFPSetCCSDNode_VF<cc, inst_name>;
+ defm : VPatFPSetCCSDNode_FV<cc, swapped_op_inst_name>;
+}
+
+multiclass VPatExtendSDNode_V<list<SDNode> ops, string inst_name, string suffix,
+ list <VTypeInfoToFraction> fraction_list> {
+ foreach vtiTofti = fraction_list in {
+ defvar vti = vtiTofti.Vti;
+ defvar fti = vtiTofti.Fti;
+ foreach op = ops in
+ def : Pat<(vti.Vector (op (fti.Vector fti.RegClass:$rs2))),
+ (!cast<Instruction>(inst_name#"_"#suffix#"_"#vti.LMul.MX)
+ fti.RegClass:$rs2, fti.AVL, vti.SEW)>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV] in {
+
+// 7.4. Vector Unit-Stride Instructions
+defm "" : VPatUSLoadStoreSDNodes<GPR>;
+defm "" : VPatUSLoadStoreSDNodes<AddrFI>;
+
+// 12.1. Vector Single-Width Integer Add and Subtract
+defm "" : VPatBinarySDNode_VV_VX_VI<add, "PseudoVADD">;
+defm "" : VPatBinarySDNode_VV_VX<sub, "PseudoVSUB">;
+// Handle VRSUB specially since it's the only integer binary op with reversed
+// pattern operands
+foreach vti = AllIntegerVectors in {
+ def : Pat<(sub (vti.Vector (SplatPat XLenVT:$rs2)),
+ (vti.Vector vti.RegClass:$rs1)),
+ (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
+ vti.RegClass:$rs1, GPR:$rs2, vti.AVL, vti.SEW)>;
+ def : Pat<(sub (vti.Vector (SplatPat_simm5 XLenVT:$rs2)),
+ (vti.Vector vti.RegClass:$rs1)),
+ (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
+ vti.RegClass:$rs1, simm5:$rs2, vti.AVL, vti.SEW)>;
+}
+
+// 12.3. Vector Integer Extension
+defm "" : VPatExtendSDNode_V<[zext, anyext], "PseudoVZEXT", "VF2",
+ AllFractionableVF2IntVectors>;
+defm "" : VPatExtendSDNode_V<[sext], "PseudoVSEXT", "VF2",
+ AllFractionableVF2IntVectors>;
+defm "" : VPatExtendSDNode_V<[zext, anyext], "PseudoVZEXT", "VF4",
+ AllFractionableVF4IntVectors>;
+defm "" : VPatExtendSDNode_V<[sext], "PseudoVSEXT", "VF4",
+ AllFractionableVF4IntVectors>;
+defm "" : VPatExtendSDNode_V<[zext, anyext], "PseudoVZEXT", "VF8",
+ AllFractionableVF8IntVectors>;
+defm "" : VPatExtendSDNode_V<[sext], "PseudoVSEXT", "VF8",
+ AllFractionableVF8IntVectors>;
+
+// 12.5. Vector Bitwise Logical Instructions
+defm "" : VPatBinarySDNode_VV_VX_VI<and, "PseudoVAND">;
+defm "" : VPatBinarySDNode_VV_VX_VI<or, "PseudoVOR">;
+defm "" : VPatBinarySDNode_VV_VX_VI<xor, "PseudoVXOR">;
+
+// 12.6. Vector Single-Width Bit Shift Instructions
+defm "" : VPatBinarySDNode_VV_VX_VI<shl, "PseudoVSLL", uimm5>;
+defm "" : VPatBinarySDNode_VV_VX_VI<srl, "PseudoVSRL", uimm5>;
+defm "" : VPatBinarySDNode_VV_VX_VI<sra, "PseudoVSRA", uimm5>;
+
+// 12.7. Vector Narrowing Integer Right Shift Instructions
+foreach vtiTofti = AllFractionableVF2IntVectors in {
+ defvar vti = vtiTofti.Vti;
+ defvar fti = vtiTofti.Fti;
+ def : Pat<(fti.Vector (riscv_trunc_vector (vti.Vector vti.RegClass:$rs1))),
+ (!cast<Instruction>("PseudoVNSRL_WI_"#fti.LMul.MX)
+ vti.RegClass:$rs1, 0, fti.AVL, fti.SEW)>;
+}
+
+// 12.8. Vector Integer Comparison Instructions
+defm "" : VPatIntegerSetCCSDNode_VV_VX_VI<SETEQ, "PseudoVMSEQ">;
+defm "" : VPatIntegerSetCCSDNode_VV_VX_VI<SETNE, "PseudoVMSNE">;
+
+// FIXME: Support immediate forms of these by choosing SLE decrementing the
+// immediate
+defm "" : VPatIntegerSetCCSDNode_VV_VX<SETLT, "PseudoVMSLT">;
+defm "" : VPatIntegerSetCCSDNode_VV_VX<SETULT, "PseudoVMSLTU">;
+
+defm "" : VPatIntegerSetCCSDNode_VV<SETGT, "PseudoVMSLT", /*swap*/1>;
+defm "" : VPatIntegerSetCCSDNode_VV<SETUGT, "PseudoVMSLTU", /*swap*/1>;
+defm "" : VPatIntegerSetCCSDNode_VX_VI<SETGT, "PseudoVMSGT">;
+defm "" : VPatIntegerSetCCSDNode_VX_VI<SETUGT, "PseudoVMSGTU">;
+
+defm "" : VPatIntegerSetCCSDNode_VV_VX_VI<SETLE, "PseudoVMSLE">;
+defm "" : VPatIntegerSetCCSDNode_VV_VX_VI<SETULE, "PseudoVMSLEU">;
+
+// FIXME: Support immediate forms of these by choosing SGT and decrementing the
+// immediate
+defm "" : VPatIntegerSetCCSDNode_VV<SETGE, "PseudoVMSLE", /*swap*/1>;
+defm "" : VPatIntegerSetCCSDNode_VV<SETUGE, "PseudoVMSLEU", /*swap*/1>;
+
+// 12.9. Vector Integer Min/Max Instructions
+defm "" : VPatBinarySDNode_VV_VX<umin, "PseudoVMINU">;
+defm "" : VPatBinarySDNode_VV_VX<smin, "PseudoVMIN">;
+defm "" : VPatBinarySDNode_VV_VX<umax, "PseudoVMAXU">;
+defm "" : VPatBinarySDNode_VV_VX<smax, "PseudoVMAX">;
+
+// 12.10. Vector Single-Width Integer Multiply Instructions
+defm "" : VPatBinarySDNode_VV_VX<mul, "PseudoVMUL">;
+defm "" : VPatBinarySDNode_VV_VX<mulhs, "PseudoVMULH">;
+defm "" : VPatBinarySDNode_VV_VX<mulhu, "PseudoVMULHU">;
+
+// 12.11. Vector Integer Divide Instructions
+defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIVU">;
+defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV">;
+defm "" : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU">;
+defm "" : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">;
+
+// 12.16. Vector Integer Merge Instructions
+foreach vti = AllIntegerVectors in {
+ def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), vti.RegClass:$rs1,
+ vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
+ vti.RegClass:$rs2, vti.RegClass:$rs1, VMV0:$vm,
+ vti.AVL, vti.SEW)>;
+
+ def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat XLenVT:$rs1),
+ vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
+ vti.RegClass:$rs2, GPR:$rs1, VMV0:$vm, vti.AVL, vti.SEW)>;
+
+ def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat_simm5 simm5:$rs1),
+ vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
+ vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, vti.AVL, vti.SEW)>;
+}
+
+// 16.1. Vector Mask-Register Logical Instructions
+foreach mti = AllMasks in {
+ def : Pat<(mti.Mask (and VR:$rs1, VR:$rs2)),
+ (!cast<Instruction>("PseudoVMAND_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>;
+ def : Pat<(mti.Mask (or VR:$rs1, VR:$rs2)),
+ (!cast<Instruction>("PseudoVMOR_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>;
+ def : Pat<(mti.Mask (xor VR:$rs1, VR:$rs2)),
+ (!cast<Instruction>("PseudoVMXOR_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>;
+
+ def : Pat<(mti.Mask (vnot (and VR:$rs1, VR:$rs2))),
+ (!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>;
+ def : Pat<(mti.Mask (vnot (or VR:$rs1, VR:$rs2))),
+ (!cast<Instruction>("PseudoVMNOR_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>;
+ def : Pat<(mti.Mask (vnot (xor VR:$rs1, VR:$rs2))),
+ (!cast<Instruction>("PseudoVMXNOR_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>;
+
+ def : Pat<(mti.Mask (and VR:$rs1, (vnot VR:$rs2))),
+ (!cast<Instruction>("PseudoVMANDNOT_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>;
+ def : Pat<(mti.Mask (or VR:$rs1, (vnot VR:$rs2))),
+ (!cast<Instruction>("PseudoVMORNOT_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>;
+}
+
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+
+// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+defm "" : VPatBinaryFPSDNode_VV_VF<fadd, "PseudoVFADD">;
+defm "" : VPatBinaryFPSDNode_VV_VF<fsub, "PseudoVFSUB">;
+defm "" : VPatBinaryFPSDNode_R_VF<fsub, "PseudoVFRSUB">;
+
+// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+defm "" : VPatBinaryFPSDNode_VV_VF<fmul, "PseudoVFMUL">;
+defm "" : VPatBinaryFPSDNode_VV_VF<fdiv, "PseudoVFDIV">;
+defm "" : VPatBinaryFPSDNode_R_VF<fdiv, "PseudoVFRDIV">;
+
+// 14.11. Vector Floating-Point Compare Instructions
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
+
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETNE, "PseudoVMFNE", "PseudoVMFNE">;
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETUNE, "PseudoVMFNE", "PseudoVMFNE">;
+
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETLT, "PseudoVMFLT", "PseudoVMFGT">;
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOLT, "PseudoVMFLT", "PseudoVMFGT">;
+
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETLE, "PseudoVMFLE", "PseudoVMFGE">;
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
+
+// Floating-point vselects:
+// 12.16. Vector Integer Merge Instructions
+// 14.13. Vector Floating-Point Merge Instruction
+foreach fvti = AllFloatVectors in {
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, VMV0:$vm,
+ fvti.AVL, fvti.SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (splat_vector fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ VMV0:$vm, fvti.AVL, fvti.SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (splat_vector (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, 0, VMV0:$vm, fvti.AVL, fvti.SEW)>;
+}
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// Vector Splats
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV] in {
+foreach vti = AllIntegerVectors in {
+ def : Pat<(vti.Vector (splat_vector GPR:$rs1)),
+ (!cast<Instruction>("PseudoVMV_V_X_" # vti.LMul.MX)
+ GPR:$rs1, vti.AVL, vti.SEW)>;
+ def : Pat<(vti.Vector (splat_vector simm5:$rs1)),
+ (!cast<Instruction>("PseudoVMV_V_I_" # vti.LMul.MX)
+ simm5:$rs1, vti.AVL, vti.SEW)>;
+}
+
+foreach mti = AllMasks in {
+ def : Pat<(mti.Mask immAllOnesV),
+ (!cast<Instruction>("PseudoVMSET_M_"#mti.BX) mti.AVL, mti.SEW)>;
+ def : Pat<(mti.Mask immAllZerosV),
+ (!cast<Instruction>("PseudoVMCLR_M_"#mti.BX) mti.AVL, mti.SEW)>;
+}
+} // Predicates = [HasStdExtV]
+
+let Predicates = [HasStdExtV, IsRV32] in {
+foreach vti = AllIntegerVectors in {
+ if !eq(vti.SEW, 64) then {
+ def : Pat<(vti.Vector (rv32_splat_i64 GPR:$rs1)),
+ (!cast<Instruction>("PseudoVMV_V_X_" # vti.LMul.MX)
+ GPR:$rs1, vti.AVL, vti.SEW)>;
+ def : Pat<(vti.Vector (rv32_splat_i64 simm5:$rs1)),
+ (!cast<Instruction>("PseudoVMV_V_I_" # vti.LMul.MX)
+ simm5:$rs1, vti.AVL, vti.SEW)>;
+ }
+}
+} // Predicates = [HasStdExtV, IsRV32]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+foreach fvti = AllFloatVectors in {
+ def : Pat<(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs1)),
+ (!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ fvti.AVL, fvti.SEW)>;
+
+ def : Pat<(fvti.Vector (splat_vector (fvti.Scalar fpimm0))),
+ (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
+ 0, fvti.AVL, fvti.SEW)>;
+}
+} // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// Vector Element Inserts/Extracts
+//===----------------------------------------------------------------------===//
+
+// The built-in TableGen 'extractelt' and 'insertelt' nodes must return the
+// same type as the vector element type. On RISC-V, XLenVT is the only legal
+// integer type, so for integer inserts/extracts we use a custom node which
+// returns XLenVT.
+def riscv_insert_vector_elt
+ : SDNode<"ISD::INSERT_VECTOR_ELT",
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, XLenVT>,
+ SDTCisPtrTy<3>]>, []>;
+def riscv_extract_vector_elt
+ : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+ SDTypeProfile<1, 2, [SDTCisVT<0, XLenVT>, SDTCisPtrTy<2>]>, []>;
+
+multiclass VPatInsertExtractElt_XI_Idx<bit IsFloat> {
+ defvar vtilist = !if(IsFloat, AllFloatVectors, AllIntegerVectors);
+ defvar insertelt_node = !if(IsFloat, insertelt, riscv_insert_vector_elt);
+ defvar extractelt_node = !if(IsFloat, extractelt, riscv_extract_vector_elt);
+ foreach vti = vtilist in {
+ defvar MX = vti.LMul.MX;
+ defvar vmv_xf_s_inst = !cast<Instruction>(!strconcat("PseudoV",
+ !if(IsFloat, "F", ""),
+ "MV_",
+ vti.ScalarSuffix,
+ "_S_", MX));
+ defvar vmv_s_xf_inst = !cast<Instruction>(!strconcat("PseudoV",
+ !if(IsFloat, "F", ""),
+ "MV_S_",
+ vti.ScalarSuffix,
+ "_", MX));
+ // Only pattern-match insert/extract-element operations where the index is
+ // 0. Any other index will have been custom-lowered to slide the vector
+ // correctly into place (and, in the case of insert, slide it back again
+ // afterwards).
+ def : Pat<(vti.Scalar (extractelt_node (vti.Vector vti.RegClass:$rs2), 0)),
+ (vmv_xf_s_inst vti.RegClass:$rs2, vti.SEW)>;
+
+ def : Pat<(vti.Vector (insertelt_node (vti.Vector vti.RegClass:$merge),
+ vti.ScalarRegClass:$rs1, 0)),
+ (vmv_s_xf_inst vti.RegClass:$merge,
+ (vti.Scalar vti.ScalarRegClass:$rs1),
+ vti.AVL, vti.SEW)>;
+ }
+}
+
+let Predicates = [HasStdExtV] in
+defm "" : VPatInsertExtractElt_XI_Idx</*IsFloat*/0>;
+let Predicates = [HasStdExtV, HasStdExtF] in
+defm "" : VPatInsertExtractElt_XI_Idx</*IsFloat*/1>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous RISCVISD SDNodes
+//===----------------------------------------------------------------------===//
+
+def riscv_vid
+ : SDNode<"RISCVISD::VID", SDTypeProfile<1, 0, [SDTCisVec<0>]>, []>;
+
+def SDTRVVSlide : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisVT<3, XLenVT>
+]>;
+
+def riscv_slideup : SDNode<"RISCVISD::VSLIDEUP", SDTRVVSlide, []>;
+def riscv_slidedown : SDNode<"RISCVISD::VSLIDEDOWN", SDTRVVSlide, []>;
+
+let Predicates = [HasStdExtV] in {
+
+foreach vti = AllIntegerVectors in
+ def : Pat<(vti.Vector riscv_vid),
+ (!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX) vti.AVL, vti.SEW)>;
+
+foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
+ def : Pat<(vti.Vector (riscv_slideup (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ uimm5:$rs2)),
+ (!cast<Instruction>("PseudoVSLIDEUP_VI_"#vti.LMul.MX)
+ vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
+ vti.AVL, vti.SEW)>;
+
+ def : Pat<(vti.Vector (riscv_slideup (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2)),
+ (!cast<Instruction>("PseudoVSLIDEUP_VX_"#vti.LMul.MX)
+ vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
+ vti.AVL, vti.SEW)>;
+
+ def : Pat<(vti.Vector (riscv_slidedown (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ uimm5:$rs2)),
+ (!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX)
+ vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
+ vti.AVL, vti.SEW)>;
+
+ def : Pat<(vti.Vector (riscv_slidedown (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2)),
+ (!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX)
+ vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
+ vti.AVL, vti.SEW)>;
+}
+} // Predicates = [HasStdExtV]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
new file mode 100644
index 000000000000..85ebe054499e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -0,0 +1,371 @@
+//===-- RISCVInstrInfoFH.td - RISC-V 'FH' instructions -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard 'Zfh'
+// half-precision floating-point extension, version 0.1.
+// This version is still experimental as the 'Zfh' extension hasn't been
+// ratified yet.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_RISCVFMV_H_X
+ : SDTypeProfile<1, 1, [SDTCisVT<0, f16>, SDTCisVT<1, XLenVT>]>;
+def SDT_RISCVFMV_X_ANYEXTH
+ : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisVT<1, f16>]>;
+
+def riscv_fmv_h_x
+ : SDNode<"RISCVISD::FMV_H_X", SDT_RISCVFMV_H_X>;
+def riscv_fmv_x_anyexth
+ : SDNode<"RISCVISD::FMV_X_ANYEXTH", SDT_RISCVFMV_X_ANYEXTH>;
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class FPFMAH_rrr_frm<RISCVOpcode opcode, string opcodestr>
+ : RVInstR4<0b10, opcode, (outs FPR16:$rd),
+ (ins FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, frmarg:$funct3),
+ opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">;
+
+class FPFMAHDynFrmAlias<FPFMAH_rrr_frm Inst, string OpcodeStr>
+ : InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3",
+ (Inst FPR16:$rd, FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class FPALUH_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR16:$rd),
+ (ins FPR16:$rs1, FPR16:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class FPALUH_rr_frm<bits<7> funct7, string opcodestr>
+ : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR16:$rd),
+ (ins FPR16:$rs1, FPR16:$rs2, frmarg:$funct3), opcodestr,
+ "$rd, $rs1, $rs2, $funct3">;
+
+class FPALUHDynFrmAlias<FPALUH_rr_frm Inst, string OpcodeStr>
+ : InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
+ (Inst FPR16:$rd, FPR16:$rs1, FPR16:$rs2, 0b111)>;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class FPCmpH_rr<bits<3> funct3, string opcodestr>
+ : RVInstR<0b1010010, funct3, OPC_OP_FP, (outs GPR:$rd),
+ (ins FPR16:$rs1, FPR16:$rs2), opcodestr, "$rd, $rs1, $rs2">,
+ Sched<[]>;
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZfh] in {
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+def FLH : RVInstI<0b001, OPC_LOAD_FP, (outs FPR16:$rd),
+ (ins GPR:$rs1, simm12:$imm12),
+ "flh", "$rd, ${imm12}(${rs1})">,
+ Sched<[]>;
+
+// Operands for stores are in the order srcreg, base, offset rather than
+// reflecting the order these fields are specified in the instruction
+// encoding.
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+def FSH : RVInstS<0b001, OPC_STORE_FP, (outs),
+ (ins FPR16:$rs2, GPR:$rs1, simm12:$imm12),
+ "fsh", "$rs2, ${imm12}(${rs1})">,
+ Sched<[]>;
+
+def FMADD_H : FPFMAH_rrr_frm<OPC_MADD, "fmadd.h">,
+ Sched<[]>;
+def : FPFMAHDynFrmAlias<FMADD_H, "fmadd.h">;
+def FMSUB_H : FPFMAH_rrr_frm<OPC_MSUB, "fmsub.h">,
+ Sched<[]>;
+def : FPFMAHDynFrmAlias<FMSUB_H, "fmsub.h">;
+def FNMSUB_H : FPFMAH_rrr_frm<OPC_NMSUB, "fnmsub.h">,
+ Sched<[]>;
+def : FPFMAHDynFrmAlias<FNMSUB_H, "fnmsub.h">;
+def FNMADD_H : FPFMAH_rrr_frm<OPC_NMADD, "fnmadd.h">,
+ Sched<[]>;
+def : FPFMAHDynFrmAlias<FNMADD_H, "fnmadd.h">;
+
+def FADD_H : FPALUH_rr_frm<0b0000010, "fadd.h">,
+ Sched<[]>;
+def : FPALUHDynFrmAlias<FADD_H, "fadd.h">;
+def FSUB_H : FPALUH_rr_frm<0b0000110, "fsub.h">,
+ Sched<[]>;
+def : FPALUHDynFrmAlias<FSUB_H, "fsub.h">;
+def FMUL_H : FPALUH_rr_frm<0b0001010, "fmul.h">,
+ Sched<[]>;
+def : FPALUHDynFrmAlias<FMUL_H, "fmul.h">;
+def FDIV_H : FPALUH_rr_frm<0b0001110, "fdiv.h">,
+ Sched<[]>;
+def : FPALUHDynFrmAlias<FDIV_H, "fdiv.h">;
+
+def FSQRT_H : FPUnaryOp_r_frm<0b0101110, FPR16, FPR16, "fsqrt.h">,
+ Sched<[]> {
+ let rs2 = 0b00000;
+}
+def : FPUnaryOpDynFrmAlias<FSQRT_H, "fsqrt.h", FPR16, FPR16>;
+
+def FSGNJ_H : FPALUH_rr<0b0010010, 0b000, "fsgnj.h">,
+ Sched<[]>;
+def FSGNJN_H : FPALUH_rr<0b0010010, 0b001, "fsgnjn.h">,
+ Sched<[]>;
+def FSGNJX_H : FPALUH_rr<0b0010010, 0b010, "fsgnjx.h">,
+ Sched<[]>;
+
+def FMIN_H : FPALUH_rr<0b0010110, 0b000, "fmin.h">,
+ Sched<[]>;
+def FMAX_H : FPALUH_rr<0b0010110, 0b001, "fmax.h">,
+ Sched<[]>;
+
+def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.w.h">,
+ Sched<[]> {
+ let rs2 = 0b00000;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_W_H, "fcvt.w.h", GPR, FPR16>;
+
+def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.wu.h">,
+ Sched<[]> {
+ let rs2 = 0b00001;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_WU_H, "fcvt.wu.h", GPR, FPR16>;
+
+def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.w">,
+ Sched<[]> {
+ let rs2 = 0b00000;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_H_W, "fcvt.h.w", FPR16, GPR>;
+
+def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.wu">,
+ Sched<[]> {
+ let rs2 = 0b00001;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>;
+
+def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, FPR16, FPR32, "fcvt.h.s">,
+ Sched<[]> {
+ let rs2 = 0b00000;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_H_S, "fcvt.h.s", FPR16, FPR32>;
+
+def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b000, FPR32, FPR16, "fcvt.s.h">,
+ Sched<[]> {
+ let rs2 = 0b00010;
+}
+
+def FMV_X_H : FPUnaryOp_r<0b1110010, 0b000, GPR, FPR16, "fmv.x.h">,
+ Sched<[]> {
+ let rs2 = 0b00000;
+}
+
+def FMV_H_X : FPUnaryOp_r<0b1111010, 0b000, FPR16, GPR, "fmv.h.x">,
+ Sched<[]> {
+ let rs2 = 0b00000;
+}
+
+def FEQ_H : FPCmpH_rr<0b010, "feq.h">;
+def FLT_H : FPCmpH_rr<0b001, "flt.h">;
+def FLE_H : FPCmpH_rr<0b000, "fle.h">;
+
+def FCLASS_H : FPUnaryOp_r<0b1110010, 0b001, GPR, FPR16, "fclass.h">,
+ Sched<[]> {
+ let rs2 = 0b00000;
+}
+} // Predicates = [HasStdExtZfh]
+
+let Predicates = [HasStdExtZfh, IsRV64] in {
+def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.l.h">,
+ Sched<[]> {
+ let rs2 = 0b00010;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_L_H, "fcvt.l.h", GPR, FPR16>;
+
+def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.lu.h">,
+ Sched<[]> {
+ let rs2 = 0b00011;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_LU_H, "fcvt.lu.h", GPR, FPR16>;
+
+def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.l">,
+ Sched<[]> {
+ let rs2 = 0b00010;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_H_L, "fcvt.h.l", FPR16, GPR>;
+
+def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.lu">,
+ Sched<[]> {
+ let rs2 = 0b00011;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>;
+} // Predicates = [HasStdExtZfh, IsRV64]
+
+let Predicates = [HasStdExtZfh, HasStdExtD] in {
+def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, FPR16, FPR64, "fcvt.h.d">,
+ Sched<[]> {
+ let rs2 = 0b00001;
+}
+def : FPUnaryOpDynFrmAlias<FCVT_H_D, "fcvt.h.d", FPR16, FPR64>;
+
+def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR16, "fcvt.d.h">,
+ Sched<[]> {
+ let rs2 = 0b00010;
+}
+} // Predicates = [HasStdExtZfh, HasStdExtD]
+
+//===----------------------------------------------------------------------===//
+// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZfh] in {
+def : InstAlias<"flh $rd, (${rs1})", (FLH FPR16:$rd, GPR:$rs1, 0), 0>;
+def : InstAlias<"fsh $rs2, (${rs1})", (FSH FPR16:$rs2, GPR:$rs1, 0), 0>;
+
+def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
+def : InstAlias<"fabs.h $rd, $rs", (FSGNJX_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
+def : InstAlias<"fneg.h $rd, $rs", (FSGNJN_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
+
+// fgt.h/fge.h are recognised by the GNU assembler but the canonical
+// flt.h/fle.h forms will always be printed. Therefore, set a zero weight.
+def : InstAlias<"fgt.h $rd, $rs, $rt",
+ (FLT_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
+def : InstAlias<"fge.h $rd, $rs, $rt",
+ (FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
+
+def PseudoFLH : PseudoFloatLoad<"flh", FPR16>;
+def PseudoFSH : PseudoStore<"fsh", FPR16>;
+} // Predicates = [HasStdExtZfh]
+
+//===----------------------------------------------------------------------===//
+// Pseudo-instructions and codegen patterns
+//===----------------------------------------------------------------------===//
+
+/// Generic pattern classes
+class PatFpr16Fpr16<SDPatternOperator OpNode, RVInstR Inst>
+ : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2)>;
+
+class PatFpr16Fpr16DynFrm<SDPatternOperator OpNode, RVInstRFrm Inst>
+ : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2, 0b111)>;
+
+let Predicates = [HasStdExtZfh] in {
+
+/// Float constants
+def : Pat<(f16 (fpimm0)), (FMV_H_X X0)>;
+
+/// Float conversion operations
+
+// [u]int32<->float conversion patterns must be gated on IsRV32 or IsRV64, so
+// are defined later.
+
+/// Float arithmetic operations
+
+def : PatFpr16Fpr16DynFrm<fadd, FADD_H>;
+def : PatFpr16Fpr16DynFrm<fsub, FSUB_H>;
+def : PatFpr16Fpr16DynFrm<fmul, FMUL_H>;
+def : PatFpr16Fpr16DynFrm<fdiv, FDIV_H>;
+
+def : Pat<(fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>;
+
+def : Pat<(fneg FPR16:$rs1), (FSGNJN_H $rs1, $rs1)>;
+def : Pat<(fabs FPR16:$rs1), (FSGNJX_H $rs1, $rs1)>;
+
+def : PatFpr16Fpr16<fcopysign, FSGNJ_H>;
+def : Pat<(fcopysign FPR16:$rs1, (fneg FPR16:$rs2)), (FSGNJN_H $rs1, $rs2)>;
+def : Pat<(fcopysign FPR16:$rs1, FPR32:$rs2),
+ (FSGNJ_H $rs1, (FCVT_H_S $rs2, 0b111))>;
+def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2),
+ (FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>;
+def : Pat<(fcopysign FPR32:$rs1, FPR16:$rs2), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>;
+def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>;
+
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(fma FPR16:$rs1, FPR16:$rs2, FPR16:$rs3),
+ (FMADD_H $rs1, $rs2, $rs3, 0b111)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(fma FPR16:$rs1, FPR16:$rs2, (fneg FPR16:$rs3)),
+ (FMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3),
+ (FNMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)),
+ (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+
+def : PatFpr16Fpr16<fminnum, FMIN_H>;
+def : PatFpr16Fpr16<fmaxnum, FMAX_H>;
+
+/// Setcc
+
+def : PatFpr16Fpr16<seteq, FEQ_H>;
+def : PatFpr16Fpr16<setoeq, FEQ_H>;
+def : PatFpr16Fpr16<setlt, FLT_H>;
+def : PatFpr16Fpr16<setolt, FLT_H>;
+def : PatFpr16Fpr16<setle, FLE_H>;
+def : PatFpr16Fpr16<setole, FLE_H>;
+
+def Select_FPR16_Using_CC_GPR : SelectCC_rrirr<FPR16, GPR>;
+
+/// Loads
+
+defm : LdPat<load, FLH>;
+
+/// Stores
+
+defm : StPat<store, FSH, FPR16>;
+
+/// Float conversion operations
+
+// f32 -> f16, f16 -> f32
+def : Pat<(fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>;
+def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>;
+
+// Moves (no conversion)
+def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>;
+def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>;
+} // Predicates = [HasStdExtZfh]
+
+let Predicates = [HasStdExtZfh, IsRV32] in {
+// float->[u]int. Round-to-zero must be used.
+def : Pat<(fp_to_sint FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>;
+
+// [u]int->float. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_H_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_H_WU $rs1, 0b111)>;
+} // Predicates = [HasStdExtZfh, IsRV32]
+
+let Predicates = [HasStdExtZfh, IsRV64] in {
+// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
+// because fpto[u|s]i produces poison if the value can't fit into the target.
+// We match the single case below because fcvt.wu.s sign-extends its result so
+// is cheaper than fcvt.lu.h+sext.w.
+def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR16:$rs1)), i32),
+ (FCVT_WU_H $rs1, 0b001)>;
+
+// FP->[u]int64
+def : Pat<(fp_to_sint FPR16:$rs1), (FCVT_L_H $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR16:$rs1), (FCVT_LU_H $rs1, 0b001)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp (sexti32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>;
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_H_L $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_H_LU $rs1, 0b111)>;
+} // Predicates = [HasStdExtZfh, IsRV64]
+
+let Predicates = [HasStdExtZfh, HasStdExtD] in {
+/// Float conversion operations
+// f64 -> f16, f16 -> f64
+def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>;
+def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
index b1dbcfa7f738..3c38dd1bf64d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "RISCV.h"
+#include "RISCVSubtarget.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -121,12 +122,93 @@ bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
case MachineOperand::MO_ConstantPoolIndex:
MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP);
break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = lowerSymbolOperand(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+ break;
+ }
+ return true;
+}
+
+static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
+ MCInst &OutMI) {
+ const RISCVVPseudosTable::PseudoInfo *RVV =
+ RISCVVPseudosTable::getPseudoInfo(MI->getOpcode());
+ if (!RVV)
+ return false;
+
+ OutMI.setOpcode(RVV->BaseInstr);
+
+ const MachineBasicBlock *MBB = MI->getParent();
+ assert(MBB && "MI expected to be in a basic block");
+ const MachineFunction *MF = MBB->getParent();
+ assert(MF && "MBB expected to be in a machine function");
+
+ const TargetRegisterInfo *TRI =
+ MF->getSubtarget<RISCVSubtarget>().getRegisterInfo();
+ assert(TRI && "TargetRegisterInfo expected");
+
+ uint64_t TSFlags = MI->getDesc().TSFlags;
+ int NumOps = MI->getNumExplicitOperands();
+
+ for (const MachineOperand &MO : MI->explicit_operands()) {
+ int OpNo = (int)MI->getOperandNo(&MO);
+ assert(OpNo >= 0 && "Operand number doesn't fit in an 'int' type");
+
+ // Skip VL and SEW operands which are the last two operands if present.
+ if ((TSFlags & RISCVII::HasVLOpMask) && OpNo == (NumOps - 2))
+ continue;
+ if ((TSFlags & RISCVII::HasSEWOpMask) && OpNo == (NumOps - 1))
+ continue;
+
+ // Skip merge op. It should be the first operand after the result.
+ if ((TSFlags & RISCVII::HasMergeOpMask) && OpNo == 1) {
+ assert(MI->getNumExplicitDefs() == 1);
+ continue;
+ }
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unknown operand type");
+ case MachineOperand::MO_Register: {
+ unsigned Reg = MO.getReg();
+
+ if (RISCV::VRM2RegClass.contains(Reg) ||
+ RISCV::VRM4RegClass.contains(Reg) ||
+ RISCV::VRM8RegClass.contains(Reg)) {
+ Reg = TRI->getSubReg(Reg, RISCV::sub_vrm1_0);
+ assert(Reg && "Subregister does not exist");
+ } else if (RISCV::FPR16RegClass.contains(Reg)) {
+ Reg = TRI->getMatchingSuperReg(Reg, RISCV::sub_16, &RISCV::FPR32RegClass);
+ assert(Reg && "Subregister does not exist");
+ } else if (RISCV::FPR64RegClass.contains(Reg)) {
+ Reg = TRI->getSubReg(Reg, RISCV::sub_32);
+ assert(Reg && "Superregister does not exist");
+ }
+
+ MCOp = MCOperand::createReg(Reg);
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ }
+ OutMI.addOperand(MCOp);
}
+
+ // Unmasked pseudo instructions need to append dummy mask operand to
+ // V instructions. All V instructions are modeled as the masked version.
+ if (TSFlags & RISCVII::HasDummyMaskOpMask)
+ OutMI.addOperand(MCOperand::createReg(RISCV::NoRegister));
+
return true;
}
void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
const AsmPrinter &AP) {
+ if (lowerRISCVVMachineInstrToMCInst(MI, OutMI))
+ return;
+
OutMI.setOpcode(MI->getOpcode());
for (const MachineOperand &MO : MI->operands()) {
@@ -134,4 +216,20 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
OutMI.addOperand(MCOp);
}
+
+ if (OutMI.getOpcode() == RISCV::PseudoReadVLENB) {
+ OutMI.setOpcode(RISCV::CSRRS);
+ OutMI.addOperand(MCOperand::createImm(
+ RISCVSysReg::lookupSysRegByName("VLENB")->Encoding));
+ OutMI.addOperand(MCOperand::createReg(RISCV::X0));
+ return;
+ }
+
+ if (OutMI.getOpcode() == RISCV::PseudoReadVL) {
+ OutMI.setOpcode(RISCV::CSRRS);
+ OutMI.addOperand(MCOperand::createImm(
+ RISCVSysReg::lookupSysRegByName("VL")->Encoding));
+ OutMI.addOperand(MCOperand::createReg(RISCV::X0));
+ return;
+ }
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 4c9013aa1e23..87586023caa4 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -64,7 +64,7 @@ private:
} // end anonymous namespace
char RISCVMergeBaseOffsetOpt::ID = 0;
-INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset",
+INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
RISCV_MERGE_BASE_OFFSET_NAME, false, false)
// Detect the pattern:
@@ -216,12 +216,14 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
case RISCV::LHU:
case RISCV::LWU:
case RISCV::LD:
+ case RISCV::FLH:
case RISCV::FLW:
case RISCV::FLD:
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
case RISCV::SD:
+ case RISCV::FSH:
case RISCV::FSW:
case RISCV::FSD: {
// Transforms the sequence: Into:
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index cb7d55eb0f0c..631077ef83f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -29,6 +29,9 @@ using namespace llvm;
static_assert(RISCV::X1 == RISCV::X0 + 1, "Register list not consecutive");
static_assert(RISCV::X31 == RISCV::X0 + 31, "Register list not consecutive");
+static_assert(RISCV::F1_H == RISCV::F0_H + 1, "Register list not consecutive");
+static_assert(RISCV::F31_H == RISCV::F0_H + 31,
+ "Register list not consecutive");
static_assert(RISCV::F1_F == RISCV::F0_F + 1, "Register list not consecutive");
static_assert(RISCV::F31_F == RISCV::F0_F + 31,
"Register list not consecutive");
@@ -45,6 +48,8 @@ RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode)
const MCPhysReg *
RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
auto &Subtarget = MF->getSubtarget<RISCVSubtarget>();
+ if (MF->getFunction().getCallingConv() == CallingConv::GHC)
+ return CSR_NoRegs_SaveList;
if (MF->getFunction().hasFnAttribute("interrupt")) {
if (Subtarget.hasStdExtD())
return CSR_XLEN_F64_Interrupt_SaveList;
@@ -89,6 +94,13 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// variable-sized objects at runtime.
if (TFI->hasBP(MF))
markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp
+
+ // V registers for code generation. We handle them manually.
+ markSuperRegs(Reserved, RISCV::VL);
+ markSuperRegs(Reserved, RISCV::VTYPE);
+ markSuperRegs(Reserved, RISCV::VXSAT);
+ markSuperRegs(Reserved, RISCV::VXRM);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
@@ -152,9 +164,10 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
Register FrameReg;
- int Offset =
- getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg) +
- MI.getOperand(FIOperandNum + 1).getImm();
+ int Offset = getFrameLowering(MF)
+ ->getFrameIndexReference(MF, FrameIndex, FrameReg)
+ .getFixed() +
+ MI.getOperand(FIOperandNum + 1).getImm();
if (!isInt<32>(Offset)) {
report_fatal_error(
@@ -190,9 +203,11 @@ Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const uint32_t *
RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF,
- CallingConv::ID /*CC*/) const {
+ CallingConv::ID CC) const {
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ if (CC == CallingConv::GHC)
+ return CSR_NoRegs_RegMask;
switch (Subtarget.getTargetABI()) {
default:
llvm_unreachable("Unrecognized ABI");
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 7544b4b3b845..e1a11fd9389f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -16,14 +16,23 @@ class RISCVReg<bits<5> Enc, string n, list<string> alt = []> : Register<n> {
let AltNames = alt;
}
-class RISCVReg32<bits<5> Enc, string n, list<string> alt = []> : Register<n> {
+class RISCVReg16<bits<5> Enc, string n, list<string> alt = []> : Register<n> {
let HWEncoding{4-0} = Enc;
let AltNames = alt;
}
+def sub_16 : SubRegIndex<16>;
+class RISCVReg32<RISCVReg16 subreg> : Register<""> {
+ let HWEncoding{4-0} = subreg.HWEncoding{4-0};
+ let SubRegs = [subreg];
+ let SubRegIndices = [sub_16];
+ let AsmName = subreg.AsmName;
+ let AltNames = subreg.AltNames;
+}
+
// Because RISCVReg64 register have AsmName and AltNames that alias with their
-// 32-bit sub-register, RISCVAsmParser will need to coerce a register number
-// from a RISCVReg32 to the equivalent RISCVReg64 when appropriate.
+// 16/32-bit sub-register, RISCVAsmParser will need to coerce a register number
+// from a RISCVReg16/RISCVReg32 to the equivalent RISCVReg64 when appropriate.
def sub_32 : SubRegIndex<32>;
class RISCVReg64<RISCVReg32 subreg> : Register<""> {
let HWEncoding{4-0} = subreg.HWEncoding{4-0};
@@ -42,12 +51,21 @@ class RISCVRegWithSubRegs<bits<5> Enc, string n, list<Register> subregs,
def ABIRegAltName : RegAltNameIndex;
-def sub_vrm2 : SubRegIndex<64, -1>;
-def sub_vrm2_hi : SubRegIndex<64, -1>;
-def sub_vrm4 : SubRegIndex<128, -1>;
-def sub_vrm4_hi : SubRegIndex<128, -1>;
-def sub_vrm8 : SubRegIndex<256, -1>;
-def sub_vrm8_hi : SubRegIndex<256, -1>;
+def sub_vrm1_0 : SubRegIndex<64, -1>;
+def sub_vrm1_1 : SubRegIndex<64, -1>;
+def sub_vrm1_2 : SubRegIndex<64, -1>;
+def sub_vrm1_3 : SubRegIndex<64, -1>;
+def sub_vrm1_4 : SubRegIndex<64, -1>;
+def sub_vrm1_5 : SubRegIndex<64, -1>;
+def sub_vrm1_6 : SubRegIndex<64, -1>;
+def sub_vrm1_7 : SubRegIndex<64, -1>;
+def sub_vrm2_0 : SubRegIndex<128, -1>;
+def sub_vrm2_1 : SubRegIndex<128, -1>;
+def sub_vrm2_2 : SubRegIndex<128, -1>;
+def sub_vrm2_3 : SubRegIndex<128, -1>;
+def sub_vrm4_0 : SubRegIndex<256, -1>;
+def sub_vrm4_1 : SubRegIndex<256, -1>;
+
} // Namespace = "RISCV"
// Integer registers
@@ -97,8 +115,8 @@ let RegAltNameIndices = [ABIRegAltName] in {
}
}
-def XLenVT : ValueTypeByHwMode<[RV32, RV64, DefaultMode],
- [i32, i64, i32]>;
+def XLenVT : ValueTypeByHwMode<[RV32, RV64],
+ [i32, i64]>;
// The order of registers represents the preferred allocation sequence.
// Registers are listed in the order caller-save, callee-save, specials.
@@ -111,14 +129,14 @@ def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 0, 4)
)> {
let RegInfos = RegInfoByHwMode<
- [RV32, RV64, DefaultMode],
- [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>;
+ [RV32, RV64],
+ [RegInfo<32,32,32>, RegInfo<64,64,64>]>;
}
def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
let RegInfos = RegInfoByHwMode<
- [RV32, RV64, DefaultMode],
- [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>;
+ [RV32, RV64],
+ [RegInfo<32,32,32>, RegInfo<64,64,64>]>;
}
// The order of registers represents the preferred allocation sequence.
@@ -132,8 +150,8 @@ def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 1, 4)
)> {
let RegInfos = RegInfoByHwMode<
- [RV32, RV64, DefaultMode],
- [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>;
+ [RV32, RV64],
+ [RegInfo<32,32,32>, RegInfo<64,64,64>]>;
}
def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add
@@ -145,8 +163,8 @@ def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add
X1, X3, X4
)> {
let RegInfos = RegInfoByHwMode<
- [RV32, RV64, DefaultMode],
- [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>;
+ [RV32, RV64],
+ [RegInfo<32,32,32>, RegInfo<64,64,64>]>;
}
def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
@@ -154,8 +172,8 @@ def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 8, 9)
)> {
let RegInfos = RegInfoByHwMode<
- [RV32, RV64, DefaultMode],
- [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>;
+ [RV32, RV64],
+ [RegInfo<32,32,32>, RegInfo<64,64,64>]>;
}
// For indirect tail calls, we can't use callee-saved registers, as they are
@@ -167,50 +185,55 @@ def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 28, 31)
)> {
let RegInfos = RegInfoByHwMode<
- [RV32, RV64, DefaultMode],
- [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>;
+ [RV32, RV64],
+ [RegInfo<32,32,32>, RegInfo<64,64,64>]>;
}
def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> {
let RegInfos = RegInfoByHwMode<
- [RV32, RV64, DefaultMode],
- [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>;
+ [RV32, RV64],
+ [RegInfo<32,32,32>, RegInfo<64,64,64>]>;
}
// Floating point registers
let RegAltNameIndices = [ABIRegAltName] in {
- def F0_F : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>;
- def F1_F : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>;
- def F2_F : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>;
- def F3_F : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>;
- def F4_F : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>;
- def F5_F : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>;
- def F6_F : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>;
- def F7_F : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>;
- def F8_F : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>;
- def F9_F : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>;
- def F10_F : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>;
- def F11_F : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>;
- def F12_F : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>;
- def F13_F : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>;
- def F14_F : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>;
- def F15_F : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>;
- def F16_F : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>;
- def F17_F : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>;
- def F18_F : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>;
- def F19_F : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>;
- def F20_F : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>;
- def F21_F : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>;
- def F22_F : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>;
- def F23_F : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>;
- def F24_F : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>;
- def F25_F : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>;
- def F26_F : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>;
- def F27_F : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>;
- def F28_F : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>;
- def F29_F : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>;
- def F30_F : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>;
- def F31_F : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>;
+ def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>;
+ def F1_H : RISCVReg16<1, "f1", ["ft1"]>, DwarfRegNum<[33]>;
+ def F2_H : RISCVReg16<2, "f2", ["ft2"]>, DwarfRegNum<[34]>;
+ def F3_H : RISCVReg16<3, "f3", ["ft3"]>, DwarfRegNum<[35]>;
+ def F4_H : RISCVReg16<4, "f4", ["ft4"]>, DwarfRegNum<[36]>;
+ def F5_H : RISCVReg16<5, "f5", ["ft5"]>, DwarfRegNum<[37]>;
+ def F6_H : RISCVReg16<6, "f6", ["ft6"]>, DwarfRegNum<[38]>;
+ def F7_H : RISCVReg16<7, "f7", ["ft7"]>, DwarfRegNum<[39]>;
+ def F8_H : RISCVReg16<8, "f8", ["fs0"]>, DwarfRegNum<[40]>;
+ def F9_H : RISCVReg16<9, "f9", ["fs1"]>, DwarfRegNum<[41]>;
+ def F10_H : RISCVReg16<10,"f10", ["fa0"]>, DwarfRegNum<[42]>;
+ def F11_H : RISCVReg16<11,"f11", ["fa1"]>, DwarfRegNum<[43]>;
+ def F12_H : RISCVReg16<12,"f12", ["fa2"]>, DwarfRegNum<[44]>;
+ def F13_H : RISCVReg16<13,"f13", ["fa3"]>, DwarfRegNum<[45]>;
+ def F14_H : RISCVReg16<14,"f14", ["fa4"]>, DwarfRegNum<[46]>;
+ def F15_H : RISCVReg16<15,"f15", ["fa5"]>, DwarfRegNum<[47]>;
+ def F16_H : RISCVReg16<16,"f16", ["fa6"]>, DwarfRegNum<[48]>;
+ def F17_H : RISCVReg16<17,"f17", ["fa7"]>, DwarfRegNum<[49]>;
+ def F18_H : RISCVReg16<18,"f18", ["fs2"]>, DwarfRegNum<[50]>;
+ def F19_H : RISCVReg16<19,"f19", ["fs3"]>, DwarfRegNum<[51]>;
+ def F20_H : RISCVReg16<20,"f20", ["fs4"]>, DwarfRegNum<[52]>;
+ def F21_H : RISCVReg16<21,"f21", ["fs5"]>, DwarfRegNum<[53]>;
+ def F22_H : RISCVReg16<22,"f22", ["fs6"]>, DwarfRegNum<[54]>;
+ def F23_H : RISCVReg16<23,"f23", ["fs7"]>, DwarfRegNum<[55]>;
+ def F24_H : RISCVReg16<24,"f24", ["fs8"]>, DwarfRegNum<[56]>;
+ def F25_H : RISCVReg16<25,"f25", ["fs9"]>, DwarfRegNum<[57]>;
+ def F26_H : RISCVReg16<26,"f26", ["fs10"]>, DwarfRegNum<[58]>;
+ def F27_H : RISCVReg16<27,"f27", ["fs11"]>, DwarfRegNum<[59]>;
+ def F28_H : RISCVReg16<28,"f28", ["ft8"]>, DwarfRegNum<[60]>;
+ def F29_H : RISCVReg16<29,"f29", ["ft9"]>, DwarfRegNum<[61]>;
+ def F30_H : RISCVReg16<30,"f30", ["ft10"]>, DwarfRegNum<[62]>;
+ def F31_H : RISCVReg16<31,"f31", ["ft11"]>, DwarfRegNum<[63]>;
+
+ foreach Index = 0-31 in {
+ def F#Index#_F : RISCVReg32<!cast<RISCVReg16>("F"#Index#"_H")>,
+ DwarfRegNum<[!add(Index, 32)]>;
+ }
foreach Index = 0-31 in {
def F#Index#_D : RISCVReg64<!cast<RISCVReg32>("F"#Index#"_F")>,
@@ -220,6 +243,14 @@ let RegAltNameIndices = [ABIRegAltName] in {
// The order of registers represents the preferred allocation sequence,
// meaning caller-save regs are listed before callee-save.
+def FPR16 : RegisterClass<"RISCV", [f16], 16, (add
+ (sequence "F%u_H", 0, 7),
+ (sequence "F%u_H", 10, 17),
+ (sequence "F%u_H", 28, 31),
+ (sequence "F%u_H", 8, 9),
+ (sequence "F%u_H", 18, 27)
+)>;
+
def FPR32 : RegisterClass<"RISCV", [f32], 32, (add
(sequence "F%u_F", 0, 7),
(sequence "F%u_F", 10, 17),
@@ -248,10 +279,139 @@ def FPR64C : RegisterClass<"RISCV", [f64], 64, (add
(sequence "F%u_D", 8, 9)
)>;
+// Vector type mapping to LLVM types.
+//
+// Though the V extension allows that VLEN be as small as 8,
+// this approach assumes that VLEN>=64.
+// Additionally, the only supported ELEN values are 32 and 64,
+// thus `vscale` can be defined as VLEN/64,
+// allowing the same types with either ELEN value.
+//
+// MF8 MF4 MF2 M1 M2 M4 M8
+// i64* N/A N/A N/A nxv1i64 nxv2i64 nxv4i64 nxv8i64
+// i32 N/A N/A nxv1i32 nxv2i32 nxv4i32 nxv8i32 nxv16i32
+// i16 N/A nxv1i16 nxv2i16 nxv4i16 nxv8i16 nxv16i16 nxv32i16
+// i8 nxv1i8 nxv2i8 nxv4i8 nxv8i8 nxv16i8 nxv32i8 nxv64i8
+// double* N/A N/A N/A nxv1f64 nxv2f64 nxv4f64 nxv8f64
+// float N/A N/A nxv1f32 nxv2f32 nxv4f32 nxv8f32 nxv16f32
+// half N/A nxv1f16 nxv2f16 nxv4f16 nxv8f16 nxv16f16 nxv32f16
+// * ELEN=64
+
+defvar vint8mf8_t = nxv1i8;
+defvar vint8mf4_t = nxv2i8;
+defvar vint8mf2_t = nxv4i8;
+defvar vint8m1_t = nxv8i8;
+defvar vint8m2_t = nxv16i8;
+defvar vint8m4_t = nxv32i8;
+defvar vint8m8_t = nxv64i8;
+
+defvar vint16mf4_t = nxv1i16;
+defvar vint16mf2_t = nxv2i16;
+defvar vint16m1_t = nxv4i16;
+defvar vint16m2_t = nxv8i16;
+defvar vint16m4_t = nxv16i16;
+defvar vint16m8_t = nxv32i16;
+
+defvar vint32mf2_t = nxv1i32;
+defvar vint32m1_t = nxv2i32;
+defvar vint32m2_t = nxv4i32;
+defvar vint32m4_t = nxv8i32;
+defvar vint32m8_t = nxv16i32;
+
+defvar vint64m1_t = nxv1i64;
+defvar vint64m2_t = nxv2i64;
+defvar vint64m4_t = nxv4i64;
+defvar vint64m8_t = nxv8i64;
+
+defvar vfloat16mf4_t = nxv1f16;
+defvar vfloat16mf2_t = nxv2f16;
+defvar vfloat16m1_t = nxv4f16;
+defvar vfloat16m2_t = nxv8f16;
+defvar vfloat16m4_t = nxv16f16;
+defvar vfloat16m8_t = nxv32f16;
+
+defvar vfloat32mf2_t = nxv1f32;
+defvar vfloat32m1_t = nxv2f32;
+defvar vfloat32m2_t = nxv4f32;
+defvar vfloat32m4_t = nxv8f32;
+defvar vfloat32m8_t = nxv16f32;
+
+defvar vfloat64m1_t = nxv1f64;
+defvar vfloat64m2_t = nxv2f64;
+defvar vfloat64m4_t = nxv4f64;
+defvar vfloat64m8_t = nxv8f64;
+
+defvar vbool1_t = nxv64i1;
+defvar vbool2_t = nxv32i1;
+defvar vbool4_t = nxv16i1;
+defvar vbool8_t = nxv8i1;
+defvar vbool16_t = nxv4i1;
+defvar vbool32_t = nxv2i1;
+defvar vbool64_t = nxv1i1;
+
+// There is no need to define register classes for fractional LMUL.
+def LMULList {
+ list<int> m = [1, 2, 4, 8];
+}
+
+//===----------------------------------------------------------------------===//
+// Utility classes for segment load/store.
+//===----------------------------------------------------------------------===//
+// The set of legal NF for LMUL = lmul.
+// LMUL == 1, NF = 2, 3, 4, 5, 6, 7, 8
+// LMUL == 2, NF = 2, 3, 4
+// LMUL == 4, NF = 2
+class NFList<int lmul> {
+ list<int> L = !cond(!eq(lmul, 1): [2, 3, 4, 5, 6, 7, 8],
+ !eq(lmul, 2): [2, 3, 4],
+ !eq(lmul, 4): [2],
+ !eq(lmul, 8): []);
+}
+
+// Generate [start, end) SubRegIndex list.
+class SubRegSet<list<SubRegIndex> LIn, int start, int nf, int lmul> {
+ list<SubRegIndex> L = !foldl([]<SubRegIndex>,
+ [0, 1, 2, 3, 4, 5, 6, 7],
+ AccList, i,
+ !listconcat(AccList,
+ !if(!lt(i, nf),
+ [!cast<SubRegIndex>("sub_vrm" # lmul # "_" # i)],
+ [])));
+}
+
+class IndexSet<int index, int nf, int lmul> {
+ list<int> R =
+ !foldl([]<int>,
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+ 23, 24, 25, 26, 27, 28, 29, 30, 31],
+ L, i,
+ !listconcat(L,
+ !if(!and(
+ !le(!mul(index, lmul), !mul(i, lmul)),
+ !le(!mul(i, lmul),
+ !sub(!add(32, !mul(index, lmul)), !mul(nf, lmul)))
+ ), [!mul(i, lmul)], [])));
+}
+
+class VRegList<list<dag> LIn, int start, int nf, int lmul> {
+ list<dag> L =
+ !if(!ge(start, nf),
+ LIn,
+ !listconcat(
+ [!dag(add,
+ !foreach(i, IndexSet<start, nf, lmul>.R,
+ !cast<Register>("V" # i # !cond(!eq(lmul, 2): "M2",
+ !eq(lmul, 4): "M4",
+ true: ""))),
+ !listsplat("", !size(IndexSet<start, nf, lmul>.R)))],
+ VRegList<LIn, !add(start, 1), nf, lmul>.L));
+}
+
// Vector registers
let RegAltNameIndices = [ABIRegAltName] in {
foreach Index = 0-31 in {
- def V#Index : RISCVReg<Index, "v"#Index, ["v"#Index]>, DwarfRegNum<[!add(Index, 64)]>;
+ def V#Index : RISCVReg<Index, "v"#Index, ["v"#Index]>, DwarfRegNum<[!add(Index, 96)]>;
}
foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
@@ -261,7 +421,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
!cast<Register>("V"#!add(Index, 1))],
["v"#Index]>,
DwarfRegAlias<!cast<Register>("V"#Index)> {
- let SubRegIndices = [sub_vrm2, sub_vrm2_hi];
+ let SubRegIndices = [sub_vrm1_0, sub_vrm1_1];
}
}
@@ -271,7 +431,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
!cast<Register>("V"#!add(Index, 2)#"M2")],
["v"#Index]>,
DwarfRegAlias<!cast<Register>("V"#Index)> {
- let SubRegIndices = [sub_vrm4, sub_vrm4_hi];
+ let SubRegIndices = [sub_vrm2_0, sub_vrm2_1];
}
}
@@ -281,54 +441,91 @@ let RegAltNameIndices = [ABIRegAltName] in {
!cast<Register>("V"#!add(Index, 4)#"M4")],
["v"#Index]>,
DwarfRegAlias<!cast<Register>("V"#Index)> {
- let SubRegIndices = [sub_vrm8, sub_vrm8_hi];
+ let SubRegIndices = [sub_vrm4_0, sub_vrm4_1];
}
}
def VTYPE : RISCVReg<0, "vtype", ["vtype"]>;
def VL : RISCVReg<0, "vl", ["vl"]>;
+ def VXSAT : RISCVReg<0, "vxsat", ["vxsat"]>;
+ def VXRM : RISCVReg<0, "vxrm", ["vxrm"]>;
}
-class RegisterTypes<list<ValueType> reg_types> {
- list<ValueType> types = reg_types;
-}
-
-// The order of registers represents the preferred allocation sequence,
-// meaning caller-save regs are listed before callee-save.
-def VR : RegisterClass<"RISCV", [nxv8i8, nxv4i16, nxv2i32, nxv1i64],
- 64, (add
- (sequence "V%u", 25, 31),
- (sequence "V%u", 8, 24),
- (sequence "V%u", 0, 7)
- )> {
- let Size = 64;
-}
-
-def VRM2 : RegisterClass<"RISCV", [nxv16i8, nxv8i16, nxv4i32, nxv2i64], 64,
- (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2,
- V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2)> {
- let Size = 128;
-}
-
-def VRM4 : RegisterClass<"RISCV", [nxv32i8, nxv16i16, nxv8i32, nxv4i64], 64,
- (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V0M4, V4M4)> {
- let Size = 256;
+foreach m = [1, 2, 4] in {
+ foreach n = NFList<m>.L in {
+ def "VN" # n # "M" # m: RegisterTuples<SubRegSet<[], 0, n, m>.L,
+ VRegList<[], 0, n, m>.L>;
+ }
}
-def VRM8 : RegisterClass<"RISCV", [nxv32i16, nxv16i32, nxv8i64], 64,
- (add V8M8, V16M8, V24M8, V0M8)> {
- let Size = 512;
+class VReg<list<ValueType> regTypes, dag regList, int Vlmul>
+ : RegisterClass<"RISCV",
+ regTypes,
+ 64, // The maximum supported ELEN is 64.
+ regList> {
+ int VLMul = Vlmul;
+ int Size = !mul(Vlmul, 64);
}
-def VMaskVT : RegisterTypes<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, nxv32i1]>;
-
-def VM : RegisterClass<"RISCV", VMaskVT.types, 64, (add
- (sequence "V%u", 25, 31),
- (sequence "V%u", 8, 24),
- (sequence "V%u", 0, 7))> {
+def VR : VReg<[vint8mf2_t, vint8mf4_t, vint8mf8_t,
+ vint16mf2_t, vint16mf4_t, vint32mf2_t,
+ vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
+ vfloat16mf4_t, vfloat16mf2_t, vfloat16m1_t,
+ vfloat32mf2_t, vfloat32m1_t, vfloat64m1_t,
+ vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t,
+ vbool2_t, vbool1_t],
+ (add (sequence "V%u", 25, 31),
+ (sequence "V%u", 8, 24),
+ (sequence "V%u", 0, 7)), 1>;
+
+def VRNoV0 : VReg<[vint8mf2_t, vint8mf4_t, vint8mf8_t,
+ vint16mf2_t, vint16mf4_t, vint32mf2_t,
+ vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
+ vfloat16mf4_t, vfloat16mf2_t, vfloat16m1_t,
+ vfloat32mf2_t, vfloat32m1_t, vfloat64m1_t,
+ vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t,
+ vbool2_t, vbool1_t],
+ (add (sequence "V%u", 25, 31),
+ (sequence "V%u", 8, 24),
+ (sequence "V%u", 1, 7)), 1>;
+
+def VRM2 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t,
+ vfloat16m2_t, vfloat32m2_t, vfloat64m2_t],
+ (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2,
+ V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2), 2>;
+
+def VRM2NoV0 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t,
+ vfloat16m2_t, vfloat32m2_t, vfloat64m2_t],
+ (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2,
+ V18M2, V20M2, V22M2, V24M2, V2M2, V4M2, V6M2), 2>;
+
+def VRM4 : VReg<[vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t,
+ vfloat16m4_t, vfloat32m4_t, vfloat64m4_t],
+ (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V0M4, V4M4), 4>;
+
+def VRM4NoV0 : VReg<[vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t,
+ vfloat16m4_t, vfloat32m4_t, vfloat64m4_t],
+ (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V4M4), 4>;
+
+def VRM8 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
+ vfloat16m8_t, vfloat32m8_t, vfloat64m8_t],
+ (add V8M8, V16M8, V24M8, V0M8), 8>;
+
+def VRM8NoV0 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
+ vfloat16m8_t, vfloat32m8_t, vfloat64m8_t],
+ (add V8M8, V16M8, V24M8), 8>;
+
+defvar VMaskVTs = [vbool64_t, vbool32_t, vbool16_t, vbool8_t,
+ vbool4_t, vbool2_t, vbool1_t];
+
+def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> {
let Size = 64;
}
-def VMV0 : RegisterClass<"RISCV", VMaskVT.types, 64, (add V0)> {
- let Size = 64;
+foreach m = LMULList.m in {
+ foreach nf = NFList<m>.L in {
+ def "VRN" # nf # "M" # m : VReg<[untyped],
+ (add !cast<RegisterTuples>("VN" # nf # "M" # m)),
+ !mul(nf, m)>;
+ }
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td
new file mode 100644
index 000000000000..de2cdf512e87
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -0,0 +1,233 @@
+//==- RISCVSchedRocket.td - Rocket Scheduling Definitions ----*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedule.h for details.
+
+// Rocket machine model for scheduling and other instruction cost heuristics.
+def RocketModel : SchedMachineModel {
+ let MicroOpBufferSize = 0; // Rocket is in-order.
+ let IssueWidth = 1; // 1 micro-op is dispatched per cycle.
+ let LoadLatency = 3;
+ let MispredictPenalty = 3;
+ let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg];
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
+// Rocket is in-order.
+
+let BufferSize = 0 in {
+def RocketUnitALU : ProcResource<1>; // Int ALU
+def RocketUnitIMul : ProcResource<1>; // Int Multiply
+def RocketUnitMem : ProcResource<1>; // Load/Store
+def RocketUnitB : ProcResource<1>; // Branch
+
+def RocketUnitFPALU : ProcResource<1>; // FP ALU
+}
+
+let BufferSize = 1 in {
+def RocketUnitIDiv : ProcResource<1>; // Int Division
+def RocketUnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt
+}
+
+//===----------------------------------------------------------------------===//
+
+let SchedModel = RocketModel in {
+
+// Branching
+def : WriteRes<WriteJmp, [RocketUnitB]>;
+def : WriteRes<WriteJal, [RocketUnitB]>;
+def : WriteRes<WriteJalr, [RocketUnitB]>;
+def : WriteRes<WriteJmpReg, [RocketUnitB]>;
+
+// Integer arithmetic and logic
+def : WriteRes<WriteIALU32, [RocketUnitALU]>;
+def : WriteRes<WriteIALU, [RocketUnitALU]>;
+def : WriteRes<WriteShift32, [RocketUnitALU]>;
+def : WriteRes<WriteShift, [RocketUnitALU]>;
+
+// Integer multiplication
+let Latency = 4 in {
+def : WriteRes<WriteIMul, [RocketUnitIMul]>;
+def : WriteRes<WriteIMul32, [RocketUnitIMul]>;
+}
+
+// Integer division
+// Worst case latency is used.
+def : WriteRes<WriteIDiv32, [RocketUnitIDiv]> {
+ let Latency = 34;
+ let ResourceCycles = [34];
+}
+def : WriteRes<WriteIDiv, [RocketUnitIDiv]> {
+ let Latency = 33;
+ let ResourceCycles = [33];
+}
+
+// Memory
+def : WriteRes<WriteSTB, [RocketUnitMem]>;
+def : WriteRes<WriteSTH, [RocketUnitMem]>;
+def : WriteRes<WriteSTW, [RocketUnitMem]>;
+def : WriteRes<WriteSTD, [RocketUnitMem]>;
+def : WriteRes<WriteFST32, [RocketUnitMem]>;
+def : WriteRes<WriteFST64, [RocketUnitMem]>;
+
+let Latency = 3 in {
+def : WriteRes<WriteLDB, [RocketUnitMem]>;
+def : WriteRes<WriteLDH, [RocketUnitMem]>;
+}
+
+let Latency = 2 in {
+def : WriteRes<WriteLDW, [RocketUnitMem]>;
+def : WriteRes<WriteLDWU, [RocketUnitMem]>;
+def : WriteRes<WriteLDD, [RocketUnitMem]>;
+def : WriteRes<WriteFLD32, [RocketUnitMem]>;
+def : WriteRes<WriteFLD64, [RocketUnitMem]>;
+
+// Atomic memory
+def : WriteRes<WriteAtomicW, [RocketUnitMem]>;
+def : WriteRes<WriteAtomicD, [RocketUnitMem]>;
+
+def : WriteRes<WriteAtomicLDW, [RocketUnitMem]>;
+def : WriteRes<WriteAtomicLDD, [RocketUnitMem]>;
+}
+
+def : WriteRes<WriteAtomicSTW, [RocketUnitMem]>;
+def : WriteRes<WriteAtomicSTD, [RocketUnitMem]>;
+
+// Single precision.
+let Latency = 4 in {
+def : WriteRes<WriteFALU32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFSGNJ32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMinMax32, [RocketUnitFPALU]>;
+}
+
+// Double precision
+let Latency = 6 in {
+def : WriteRes<WriteFALU64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFSGNJ64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMinMax64, [RocketUnitFPALU]>;
+}
+
+// Conversions
+let Latency = 2 in {
+def : WriteRes<WriteFCvtI32ToF32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCvtI32ToF64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCvtI64ToF32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCvtI64ToF64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToI32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToI64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToI32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToI64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToF64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToF32, [RocketUnitFPALU]>;
+
+def : WriteRes<WriteFClass32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFClass64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCmp32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFCmp64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMovF32ToI32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMovI32ToF32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMovF64ToI64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMovI64ToF64, [RocketUnitFPALU]>;
+}
+
+// FP multiplication
+let Latency = 5 in {
+def : WriteRes<WriteFMul32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMulAdd32, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMulSub32, [RocketUnitFPALU]>;
+}
+
+let Latency = 7 in {
+def : WriteRes<WriteFMul64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMulAdd64, [RocketUnitFPALU]>;
+def : WriteRes<WriteFMulSub64, [RocketUnitFPALU]>;
+}
+
+// FP division
+// FP division unit on Rocket is not pipelined, so set resource cycles to latency.
+let Latency = 20, ResourceCycles = [20] in {
+def : WriteRes<WriteFDiv32, [RocketUnitFPDivSqrt]>;
+def : WriteRes<WriteFDiv64, [RocketUnitFPDivSqrt]>;
+}
+
+// FP square root unit on Rocket is not pipelined, so set resource cycles to latency.
+def : WriteRes<WriteFSqrt32, [RocketUnitFPDivSqrt]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def : WriteRes<WriteFSqrt64, [RocketUnitFPDivSqrt]> { let Latency = 25;
+ let ResourceCycles = [25]; }
+
+// Others
+def : WriteRes<WriteCSR, []>;
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+//===----------------------------------------------------------------------===//
+// Bypass and advance
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShift, 0>;
+def : ReadAdvance<ReadShift32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFALU32, 0>;
+def : ReadAdvance<ReadFALU64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMulAdd32, 0>;
+def : ReadAdvance<ReadFMulSub32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMulAdd64, 0>;
+def : ReadAdvance<ReadFMulSub64, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td
deleted file mode 100644
index 305e2b9b5927..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td
+++ /dev/null
@@ -1,227 +0,0 @@
-//==- RISCVSchedRocket32.td - Rocket Scheduling Definitions -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// ===---------------------------------------------------------------------===//
-// The following definitions describe the simpler per-operand machine model.
-// This works with MachineScheduler. See MCSchedule.h for details.
-
-// Rocket machine model for scheduling and other instruction cost heuristics.
-def Rocket32Model : SchedMachineModel {
- let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order.
- let IssueWidth = 1; // 1 micro-ops are dispatched per cycle.
- let LoadLatency = 3;
- let MispredictPenalty = 3;
- let CompleteModel = 1;
- let UnsupportedFeatures = [HasStdExtV];
-}
-
-//===----------------------------------------------------------------------===//
-// Define each kind of processor resource and number available.
-
-// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
-// Rocket is in-order.
-
-let BufferSize = 0 in {
-def Rocket32UnitALU : ProcResource<1>; // Int ALU
-def Rocket32UnitIMul : ProcResource<1>; // Int Multiply
-def Rocket32UnitMem : ProcResource<1>; // Load/Store
-def Rocket32UnitB : ProcResource<1>; // Branch
-
-def Rocket32UnitFPALU : ProcResource<1>; // FP ALU
-}
-
-let BufferSize = 1 in {
-def Rocket32UnitIDiv : ProcResource<1>; // Int Division
-def Rocket32UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt'
-}
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedWrite types which both map the ProcResources and
-// set the latency.
-
-let SchedModel = Rocket32Model in {
-
-def : WriteRes<WriteJmp, [Rocket32UnitB]>;
-def : WriteRes<WriteJal, [Rocket32UnitB]>;
-def : WriteRes<WriteJalr, [Rocket32UnitB]>;
-def : WriteRes<WriteJmpReg, [Rocket32UnitB]>;
-
-def : WriteRes<WriteIALU, [Rocket32UnitALU]>;
-def : WriteRes<WriteShift, [Rocket32UnitALU]>;
-
-// Multiplies on Rocket differ by implementation; placeholder until
-// we can determine how to read from command line
-def : WriteRes<WriteIMul, [Rocket32UnitIMul]> { let Latency = 4; }
-
-// 32-bit divides have worse case latency of 34 cycle
-def : WriteRes<WriteIDiv, [Rocket32UnitIDiv]> {
- let Latency = 34;
- let ResourceCycles = [34];
-}
-
-// Memory
-def : WriteRes<WriteSTB, [Rocket32UnitMem]>;
-def : WriteRes<WriteSTH, [Rocket32UnitMem]>;
-def : WriteRes<WriteSTW, [Rocket32UnitMem]>;
-def : WriteRes<WriteFST32, [Rocket32UnitMem]>;
-def : WriteRes<WriteFST64, [Rocket32UnitMem]>;
-
-let Latency = 3 in {
-def : WriteRes<WriteLDB, [Rocket32UnitMem]>;
-def : WriteRes<WriteLDH, [Rocket32UnitMem]>;
-def : WriteRes<WriteCSR, [Rocket32UnitALU]>;
-}
-
-let Latency = 2 in {
-def : WriteRes<WriteLDW, [Rocket32UnitMem]>;
-def : WriteRes<WriteFLD32, [Rocket32UnitMem]>;
-def : WriteRes<WriteFLD64, [Rocket32UnitMem]>;
-
-def : WriteRes<WriteAtomicW, [Rocket32UnitMem]>;
-def : WriteRes<WriteAtomicLDW, [Rocket32UnitMem]>;
-}
-
-def : WriteRes<WriteAtomicSTW, [Rocket32UnitMem]>;
-
-// Most FP single precision operations are 4 cycles
-let Latency = 4 in {
-def : WriteRes<WriteFALU32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFSGNJ32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFMinMax32, [Rocket32UnitFPALU]>;
-}
-
-// Most FP double precision operations are 6 cycles
-let Latency = 6 in {
-def : WriteRes<WriteFALU64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFSGNJ64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFMinMax64, [Rocket32UnitFPALU]>;
-}
-
-let Latency = 2 in {
-def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>;
-
-def : WriteRes<WriteFClass32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFClass64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCmp32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCmp64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFMovF32ToI32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFMovI32ToF32, [Rocket32UnitFPALU]>;
-}
-
-let Latency = 5 in {
-def : WriteRes<WriteFMul32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFMulAdd32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFMulSub32, [Rocket32UnitFPALU]>;
-}
-
-let Latency = 7 in {
-def : WriteRes<WriteFMul64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFMulAdd64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFMulSub64, [Rocket32UnitFPALU]>;
-}
-
-// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency
-let Latency = 20, ResourceCycles = [20] in {
-def : WriteRes<WriteFDiv32, [Rocket32UnitFPDivSqrt]>;
-def : WriteRes<WriteFDiv64, [Rocket32UnitFPDivSqrt]>;
-}
-
-// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency
-def : WriteRes<WriteFSqrt32, [Rocket32UnitFPDivSqrt]> { let Latency = 20;
- let ResourceCycles = [20];}
-def : WriteRes<WriteFSqrt64, [Rocket32UnitFPDivSqrt]> { let Latency = 25;
- let ResourceCycles = [25];}
-
-def : WriteRes<WriteNop, []>;
-
-def : InstRW<[WriteIALU], (instrs COPY)>;
-
-let Unsupported = 1 in {
-def : WriteRes<WriteIALU32, []>;
-def : WriteRes<WriteShift32, []>;
-def : WriteRes<WriteIMul32, []>;
-def : WriteRes<WriteIDiv32, []>;
-def : WriteRes<WriteSTD, []>;
-def : WriteRes<WriteLDWU, []>;
-def : WriteRes<WriteLDD, []>;
-def : WriteRes<WriteAtomicD, []>;
-def : WriteRes<WriteAtomicLDD, []>;
-def : WriteRes<WriteAtomicSTD, []>;
-def : WriteRes<WriteFCvtI64ToF32, []>;
-def : WriteRes<WriteFCvtI64ToF64, []>;
-def : WriteRes<WriteFCvtF64ToI64, []>;
-def : WriteRes<WriteFCvtF32ToI64, []>;
-def : WriteRes<WriteFMovI64ToF64, []>;
-def : WriteRes<WriteFMovF64ToI64, []>;
-}
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedRead types with cycles.
-// Dummy definitions for RocketCore.
-def : ReadAdvance<ReadJmp, 0>;
-def : ReadAdvance<ReadJalr, 0>;
-def : ReadAdvance<ReadCSR, 0>;
-def : ReadAdvance<ReadStoreData, 0>;
-def : ReadAdvance<ReadMemBase, 0>;
-def : ReadAdvance<ReadIALU, 0>;
-def : ReadAdvance<ReadIALU32, 0>;
-def : ReadAdvance<ReadShift, 0>;
-def : ReadAdvance<ReadShift32, 0>;
-def : ReadAdvance<ReadIDiv, 0>;
-def : ReadAdvance<ReadIDiv32, 0>;
-def : ReadAdvance<ReadIMul, 0>;
-def : ReadAdvance<ReadIMul32, 0>;
-def : ReadAdvance<ReadAtomicWA, 0>;
-def : ReadAdvance<ReadAtomicWD, 0>;
-def : ReadAdvance<ReadAtomicDA, 0>;
-def : ReadAdvance<ReadAtomicDD, 0>;
-def : ReadAdvance<ReadAtomicLDW, 0>;
-def : ReadAdvance<ReadAtomicLDD, 0>;
-def : ReadAdvance<ReadAtomicSTW, 0>;
-def : ReadAdvance<ReadAtomicSTD, 0>;
-def : ReadAdvance<ReadFMemBase, 0>;
-def : ReadAdvance<ReadFALU32, 0>;
-def : ReadAdvance<ReadFALU64, 0>;
-def : ReadAdvance<ReadFMul32, 0>;
-def : ReadAdvance<ReadFMulAdd32, 0>;
-def : ReadAdvance<ReadFMulSub32, 0>;
-def : ReadAdvance<ReadFMul64, 0>;
-def : ReadAdvance<ReadFMulAdd64, 0>;
-def : ReadAdvance<ReadFMulSub64, 0>;
-def : ReadAdvance<ReadFDiv32, 0>;
-def : ReadAdvance<ReadFDiv64, 0>;
-def : ReadAdvance<ReadFSqrt32, 0>;
-def : ReadAdvance<ReadFSqrt64, 0>;
-def : ReadAdvance<ReadFCmp32, 0>;
-def : ReadAdvance<ReadFCmp64, 0>;
-def : ReadAdvance<ReadFSGNJ32, 0>;
-def : ReadAdvance<ReadFSGNJ64, 0>;
-def : ReadAdvance<ReadFMinMax32, 0>;
-def : ReadAdvance<ReadFMinMax64, 0>;
-def : ReadAdvance<ReadFCvtF32ToI32, 0>;
-def : ReadAdvance<ReadFCvtF32ToI64, 0>;
-def : ReadAdvance<ReadFCvtF64ToI32, 0>;
-def : ReadAdvance<ReadFCvtF64ToI64, 0>;
-def : ReadAdvance<ReadFCvtI32ToF32, 0>;
-def : ReadAdvance<ReadFCvtI32ToF64, 0>;
-def : ReadAdvance<ReadFCvtI64ToF32, 0>;
-def : ReadAdvance<ReadFCvtI64ToF64, 0>;
-def : ReadAdvance<ReadFCvtF32ToF64, 0>;
-def : ReadAdvance<ReadFCvtF64ToF32, 0>;
-def : ReadAdvance<ReadFMovF32ToI32, 0>;
-def : ReadAdvance<ReadFMovI32ToF32, 0>;
-def : ReadAdvance<ReadFMovF64ToI64, 0>;
-def : ReadAdvance<ReadFMovI64ToF64, 0>;
-def : ReadAdvance<ReadFClass32, 0>;
-def : ReadAdvance<ReadFClass64, 0>;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td
deleted file mode 100644
index e8514a275c45..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td
+++ /dev/null
@@ -1,228 +0,0 @@
-//==- RISCVSchedRocket64.td - Rocket Scheduling Definitions -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// ===---------------------------------------------------------------------===//
-// The following definitions describe the simpler per-operand machine model.
-// This works with MachineScheduler. See MCSchedule.h for details.
-
-// Rocket machine model for scheduling and other instruction cost heuristics.
-def Rocket64Model : SchedMachineModel {
- let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order.
- let IssueWidth = 1; // 1 micro-ops are dispatched per cycle.
- let LoadLatency = 3;
- let MispredictPenalty = 3;
- let UnsupportedFeatures = [HasStdExtV];
-}
-
-//===----------------------------------------------------------------------===//
-// Define each kind of processor resource and number available.
-
-// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
-// Rocket is in-order.
-
-let BufferSize = 0 in {
-def Rocket64UnitALU : ProcResource<1>; // Int ALU
-def Rocket64UnitIMul : ProcResource<1>; // Int Multiply
-def Rocket64UnitMem : ProcResource<1>; // Load/Store
-def Rocket64UnitB : ProcResource<1>; // Branch
-
-def Rocket64UnitFPALU : ProcResource<1>; // FP ALU
-}
-
-let BufferSize = 1 in {
-def Rocket64UnitIDiv : ProcResource<1>; // Int Division
-def Rocket64UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt
-}
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedWrite types which both map the ProcResources and
-// set the latency.
-
-let SchedModel = Rocket64Model in {
-
-def : WriteRes<WriteJmp, [Rocket64UnitB]>;
-def : WriteRes<WriteJal, [Rocket64UnitB]>;
-def : WriteRes<WriteJalr, [Rocket64UnitB]>;
-def : WriteRes<WriteJmpReg, [Rocket64UnitB]>;
-
-def : WriteRes<WriteIALU32, [Rocket64UnitALU]>;
-def : WriteRes<WriteIALU, [Rocket64UnitALU]>;
-def : WriteRes<WriteShift32, [Rocket64UnitALU]>;
-def : WriteRes<WriteShift, [Rocket64UnitALU]>;
-
-let Latency = 4 in {
-def : WriteRes<WriteIMul, [Rocket64UnitIMul]>;
-def : WriteRes<WriteIMul32, [Rocket64UnitIMul]>;
-}
-
-// Integer divide varies based on operand magnitude and sign; worse case latency is 34.
-def : WriteRes<WriteIDiv32, [Rocket64UnitIDiv]> {
- let Latency = 34;
- let ResourceCycles = [34];
-}
-def : WriteRes<WriteIDiv, [Rocket64UnitIDiv]> {
- let Latency = 33;
- let ResourceCycles = [33];
-}
-
-// Memory
-def : WriteRes<WriteSTB, [Rocket64UnitMem]>;
-def : WriteRes<WriteSTH, [Rocket64UnitMem]>;
-def : WriteRes<WriteSTW, [Rocket64UnitMem]>;
-def : WriteRes<WriteSTD, [Rocket64UnitMem]>;
-def : WriteRes<WriteFST32, [Rocket64UnitMem]>;
-def : WriteRes<WriteFST64, [Rocket64UnitMem]>;
-
-let Latency = 3 in {
-def : WriteRes<WriteLDB, [Rocket64UnitMem]>;
-def : WriteRes<WriteLDH, [Rocket64UnitMem]>;
-def : WriteRes<WriteCSR, [Rocket64UnitALU]>;
-}
-
-let Latency = 2 in {
-def : WriteRes<WriteLDW, [Rocket64UnitMem]>;
-def : WriteRes<WriteLDWU, [Rocket64UnitMem]>;
-def : WriteRes<WriteLDD, [Rocket64UnitMem]>;
-def : WriteRes<WriteFLD32, [Rocket64UnitMem]>;
-def : WriteRes<WriteFLD64, [Rocket64UnitMem]>;
-
-def : WriteRes<WriteAtomicW, [Rocket64UnitMem]>;
-def : WriteRes<WriteAtomicD, [Rocket64UnitMem]>;
-
-def : WriteRes<WriteAtomicLDW, [Rocket64UnitMem]>;
-def : WriteRes<WriteAtomicLDD, [Rocket64UnitMem]>;
-}
-
-def : WriteRes<WriteAtomicSTW, [Rocket64UnitMem]>;
-def : WriteRes<WriteAtomicSTD, [Rocket64UnitMem]>;
-
-// Most FP single precision operations are 4 cycles
-let Latency = 4 in {
-def : WriteRes<WriteFALU32, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFSGNJ32, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMinMax32, [Rocket64UnitFPALU]>;
-}
-
-let Latency = 6 in {
-// Most FP double precision operations are 6 cycles
-def : WriteRes<WriteFALU64, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFSGNJ64, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMinMax64, [Rocket64UnitFPALU]>;
-}
-
-// Conversion instructions
-let Latency = 2 in {
-def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtI64ToF32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtI64ToF64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF32ToI64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF64ToI64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>;
-def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>;
-
-def : WriteRes<WriteFClass32, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFClass64, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFCmp32, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFCmp64, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMovF32ToI32, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMovI32ToF32, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMovF64ToI64, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMovI64ToF64, [Rocket64UnitFPALU]>;
-}
-
-let Latency = 5 in {
-def : WriteRes<WriteFMul32, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMulAdd32, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMulSub32, [Rocket64UnitFPALU]>;
-}
-
-let Latency = 7 in {
-def : WriteRes<WriteFMul64, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMulAdd64, [Rocket64UnitFPALU]>;
-def : WriteRes<WriteFMulSub64, [Rocket64UnitFPALU]>;
-}
-
-// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency
-let Latency = 20, ResourceCycles = [20] in {
-def : WriteRes<WriteFDiv32, [Rocket64UnitFPDivSqrt]>;
-def : WriteRes<WriteFDiv64, [Rocket64UnitFPDivSqrt]>;
-}
-
-// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency
-def : WriteRes<WriteFSqrt32, [Rocket64UnitFPDivSqrt]> { let Latency = 20;
- let ResourceCycles = [20]; }
-def : WriteRes<WriteFSqrt64, [Rocket64UnitFPDivSqrt]> { let Latency = 25;
- let ResourceCycles = [25]; }
-
-def : WriteRes<WriteNop, []>;
-
-def : InstRW<[WriteIALU], (instrs COPY)>;
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedRead types with cycles.
-// Dummy definitions for RocketCore.
-def : ReadAdvance<ReadJmp, 0>;
-def : ReadAdvance<ReadJalr, 0>;
-def : ReadAdvance<ReadCSR, 0>;
-def : ReadAdvance<ReadStoreData, 0>;
-def : ReadAdvance<ReadMemBase, 0>;
-def : ReadAdvance<ReadIALU, 0>;
-def : ReadAdvance<ReadIALU32, 0>;
-def : ReadAdvance<ReadShift, 0>;
-def : ReadAdvance<ReadShift32, 0>;
-def : ReadAdvance<ReadIDiv, 0>;
-def : ReadAdvance<ReadIDiv32, 0>;
-def : ReadAdvance<ReadIMul, 0>;
-def : ReadAdvance<ReadIMul32, 0>;
-def : ReadAdvance<ReadAtomicWA, 0>;
-def : ReadAdvance<ReadAtomicWD, 0>;
-def : ReadAdvance<ReadAtomicDA, 0>;
-def : ReadAdvance<ReadAtomicDD, 0>;
-def : ReadAdvance<ReadAtomicLDW, 0>;
-def : ReadAdvance<ReadAtomicLDD, 0>;
-def : ReadAdvance<ReadAtomicSTW, 0>;
-def : ReadAdvance<ReadAtomicSTD, 0>;
-def : ReadAdvance<ReadFMemBase, 0>;
-def : ReadAdvance<ReadFALU32, 0>;
-def : ReadAdvance<ReadFALU64, 0>;
-def : ReadAdvance<ReadFMul32, 0>;
-def : ReadAdvance<ReadFMulAdd32, 0>;
-def : ReadAdvance<ReadFMulSub32, 0>;
-def : ReadAdvance<ReadFMul64, 0>;
-def : ReadAdvance<ReadFMulAdd64, 0>;
-def : ReadAdvance<ReadFMulSub64, 0>;
-def : ReadAdvance<ReadFDiv32, 0>;
-def : ReadAdvance<ReadFDiv64, 0>;
-def : ReadAdvance<ReadFSqrt32, 0>;
-def : ReadAdvance<ReadFSqrt64, 0>;
-def : ReadAdvance<ReadFCmp32, 0>;
-def : ReadAdvance<ReadFCmp64, 0>;
-def : ReadAdvance<ReadFSGNJ32, 0>;
-def : ReadAdvance<ReadFSGNJ64, 0>;
-def : ReadAdvance<ReadFMinMax32, 0>;
-def : ReadAdvance<ReadFMinMax64, 0>;
-def : ReadAdvance<ReadFCvtF32ToI32, 0>;
-def : ReadAdvance<ReadFCvtF32ToI64, 0>;
-def : ReadAdvance<ReadFCvtF64ToI32, 0>;
-def : ReadAdvance<ReadFCvtF64ToI64, 0>;
-def : ReadAdvance<ReadFCvtI32ToF32, 0>;
-def : ReadAdvance<ReadFCvtI32ToF64, 0>;
-def : ReadAdvance<ReadFCvtI64ToF32, 0>;
-def : ReadAdvance<ReadFCvtI64ToF64, 0>;
-def : ReadAdvance<ReadFCvtF32ToF64, 0>;
-def : ReadAdvance<ReadFCvtF64ToF32, 0>;
-def : ReadAdvance<ReadFMovF32ToI32, 0>;
-def : ReadAdvance<ReadFMovI32ToF32, 0>;
-def : ReadAdvance<ReadFMovF64ToI64, 0>;
-def : ReadAdvance<ReadFMovI64ToF64, 0>;
-def : ReadAdvance<ReadFClass32, 0>;
-def : ReadAdvance<ReadFClass64, 0>;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
new file mode 100644
index 000000000000..e57ba4f61b98
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -0,0 +1,222 @@
+//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+// SiFive7 machine model for scheduling and other instruction cost heuristics.
+def SiFive7Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let LoadLatency = 3;
+ let MispredictPenalty = 3;
+ let CompleteModel = 0;
+ let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg];
+}
+
+// The SiFive7 microarchitecure has two pipelines: A and B.
+// Pipe A can handle memory, integer alu and vector operations.
+// Pipe B can handle integer alu, control flow, integer multiply and divide,
+// and floating point computation.
+let SchedModel = SiFive7Model in {
+let BufferSize = 0 in {
+def SiFive7PipeA : ProcResource<1>;
+def SiFive7PipeB : ProcResource<1>;
+}
+
+let BufferSize = 1 in {
+def SiFive7IDiv : ProcResource<1> { let Super = SiFive7PipeB; } // Int Division
+def SiFive7FDiv : ProcResource<1> { let Super = SiFive7PipeB; } // FP Division/Sqrt
+}
+
+def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
+
+// Branching
+def : WriteRes<WriteJmp, [SiFive7PipeB]>;
+def : WriteRes<WriteJal, [SiFive7PipeB]>;
+def : WriteRes<WriteJalr, [SiFive7PipeB]>;
+def : WriteRes<WriteJmpReg, [SiFive7PipeB]>;
+
+// Integer arithmetic and logic
+let Latency = 3 in {
+def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
+def : WriteRes<WriteIALU32, [SiFive7PipeAB]>;
+def : WriteRes<WriteShift, [SiFive7PipeAB]>;
+def : WriteRes<WriteShift32, [SiFive7PipeAB]>;
+}
+
+// Integer multiplication
+let Latency = 3 in {
+def : WriteRes<WriteIMul, [SiFive7PipeB]>;
+def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
+}
+
+// Integer division
+def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
+ let Latency = 16;
+ let ResourceCycles = [1, 15];
+}
+def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> {
+ let Latency = 16;
+ let ResourceCycles = [1, 15];
+}
+
+// Memory
+def : WriteRes<WriteSTB, [SiFive7PipeA]>;
+def : WriteRes<WriteSTH, [SiFive7PipeA]>;
+def : WriteRes<WriteSTW, [SiFive7PipeA]>;
+def : WriteRes<WriteSTD, [SiFive7PipeA]>;
+def : WriteRes<WriteFST32, [SiFive7PipeA]>;
+def : WriteRes<WriteFST64, [SiFive7PipeA]>;
+
+let Latency = 3 in {
+def : WriteRes<WriteLDB, [SiFive7PipeA]>;
+def : WriteRes<WriteLDH, [SiFive7PipeA]>;
+def : WriteRes<WriteLDW, [SiFive7PipeA]>;
+def : WriteRes<WriteLDWU, [SiFive7PipeA]>;
+def : WriteRes<WriteLDD, [SiFive7PipeA]>;
+}
+
+let Latency = 2 in {
+def : WriteRes<WriteFLD32, [SiFive7PipeA]>;
+def : WriteRes<WriteFLD64, [SiFive7PipeA]>;
+}
+
+// Atomic memory
+def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>;
+def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>;
+
+let Latency = 3 in {
+def : WriteRes<WriteAtomicW, [SiFive7PipeA]>;
+def : WriteRes<WriteAtomicD, [SiFive7PipeA]>;
+def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>;
+def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>;
+}
+
+// Single precision.
+let Latency = 5 in {
+def : WriteRes<WriteFALU32, [SiFive7PipeB]>;
+def : WriteRes<WriteFMul32, [SiFive7PipeB]>;
+def : WriteRes<WriteFMulAdd32, [SiFive7PipeB]>;
+def : WriteRes<WriteFMulSub32, [SiFive7PipeB]>;
+}
+let Latency = 3 in {
+def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>;
+def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
+}
+
+def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
+ let ResourceCycles = [1, 26]; }
+def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
+ let ResourceCycles = [1, 26]; }
+
+// Double precision
+let Latency = 7 in {
+def : WriteRes<WriteFALU64, [SiFive7PipeB]>;
+def : WriteRes<WriteFMul64, [SiFive7PipeB]>;
+def : WriteRes<WriteFMulAdd64, [SiFive7PipeB]>;
+def : WriteRes<WriteFMulSub64, [SiFive7PipeB]>;
+}
+let Latency = 3 in {
+def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>;
+def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
+}
+
+def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
+ let ResourceCycles = [1, 55]; }
+def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
+ let ResourceCycles = [1, 55]; }
+
+// Conversions
+let Latency = 3 in {
+def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>;
+
+def : WriteRes<WriteFClass32, [SiFive7PipeB]>;
+def : WriteRes<WriteFClass64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCmp32, [SiFive7PipeB]>;
+def : WriteRes<WriteFCmp64, [SiFive7PipeB]>;
+def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>;
+def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>;
+def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>;
+def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>;
+}
+
+// Others
+def : WriteRes<WriteCSR, [SiFive7PipeB]>;
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+
+//===----------------------------------------------------------------------===//
+// Bypass and advance
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShift, 0>;
+def : ReadAdvance<ReadShift32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFALU32, 0>;
+def : ReadAdvance<ReadFALU64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMulAdd32, 0>;
+def : ReadAdvance<ReadFMulSub32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMulAdd64, 0>;
+def : ReadAdvance<ReadFMulSub64, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
index bbcd03d46236..0806be8a8d87 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -1,4 +1,4 @@
-//===-- RISCVSchedule.td - RISCV Scheduling Definitions -------*- tablegen -*-===//
+//===-- RISCVSchedule.td - RISCV Scheduling Definitions ----*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 47a48c820a29..df11d237a16c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -30,13 +30,16 @@ using namespace llvm;
void RISCVSubtarget::anchor() {}
RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
- const Triple &TT, StringRef CPU, StringRef FS, StringRef ABIName) {
+ const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, StringRef ABIName) {
// Determine default and user-specified characteristics
bool Is64Bit = TT.isArch64Bit();
std::string CPUName = std::string(CPU);
+ std::string TuneCPUName = std::string(TuneCPU);
if (CPUName.empty())
CPUName = Is64Bit ? "generic-rv64" : "generic-rv32";
- ParseSubtargetFeatures(CPUName, FS);
+ if (TuneCPUName.empty())
+ TuneCPUName = CPUName;
+ ParseSubtargetFeatures(CPUName, TuneCPUName, FS);
if (Is64Bit) {
XLenVT = MVT::i64;
XLen = 64;
@@ -47,11 +50,12 @@ RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
return *this;
}
-RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
+ StringRef TuneCPU, StringRef FS,
StringRef ABIName, const TargetMachine &TM)
- : RISCVGenSubtargetInfo(TT, CPU, FS),
+ : RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS),
UserReservedRegister(RISCV::NUM_TARGET_REGS),
- FrameLowering(initializeSubtargetDependencies(TT, CPU, FS, ABIName)),
+ FrameLowering(initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {
CallLoweringInfo.reset(new RISCVCallLowering(*getTargetLowering()));
Legalizer.reset(new RISCVLegalizerInfo(*this));
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
index fe1285f23b15..561b04cc0b44 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -13,10 +13,10 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCVSUBTARGET_H
#define LLVM_LIB_TARGET_RISCV_RISCVSUBTARGET_H
+#include "MCTargetDesc/RISCVBaseInfo.h"
#include "RISCVFrameLowering.h"
#include "RISCVISelLowering.h"
#include "RISCVInstrInfo.h"
-#include "Utils/RISCVBaseInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -40,6 +40,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtD = false;
bool HasStdExtC = false;
bool HasStdExtB = false;
+ bool HasStdExtZba = false;
bool HasStdExtZbb = false;
bool HasStdExtZbc = false;
bool HasStdExtZbe = false;
@@ -51,6 +52,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtZbt = false;
bool HasStdExtZbproposedc = false;
bool HasStdExtV = false;
+ bool HasStdExtZvlsseg = false;
+ bool HasStdExtZvamo = false;
+ bool HasStdExtZfh = false;
bool HasRV64 = false;
bool IsRV32E = false;
bool EnableLinkerRelax = false;
@@ -69,17 +73,19 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
/// Initializes using the passed in CPU and feature strings so that we can
/// use initializer lists for subtarget initialization.
RISCVSubtarget &initializeSubtargetDependencies(const Triple &TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU,
+ StringRef TuneCPU,
+ StringRef FS,
StringRef ABIName);
public:
// Initializes the data members to match that of the specified triple.
- RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
- StringRef ABIName, const TargetMachine &TM);
+ RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, StringRef ABIName, const TargetMachine &TM);
// Parses features string setting specified subtarget options. The
// definition of this function is auto-generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
const RISCVFrameLowering *getFrameLowering() const override {
return &FrameLowering;
@@ -101,6 +107,7 @@ public:
bool hasStdExtD() const { return HasStdExtD; }
bool hasStdExtC() const { return HasStdExtC; }
bool hasStdExtB() const { return HasStdExtB; }
+ bool hasStdExtZba() const { return HasStdExtZba; }
bool hasStdExtZbb() const { return HasStdExtZbb; }
bool hasStdExtZbc() const { return HasStdExtZbc; }
bool hasStdExtZbe() const { return HasStdExtZbe; }
@@ -112,6 +119,9 @@ public:
bool hasStdExtZbt() const { return HasStdExtZbt; }
bool hasStdExtZbproposedc() const { return HasStdExtZbproposedc; }
bool hasStdExtV() const { return HasStdExtV; }
+ bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; }
+ bool hasStdExtZvamo() const { return HasStdExtZvamo; }
+ bool hasStdExtZfh() const { return HasStdExtZfh; }
bool is64Bit() const { return HasRV64; }
bool isRV32E() const { return IsRV32E; }
bool enableLinkerRelax() const { return EnableLinkerRelax; }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index 8e75647bd4a9..16399fea150e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -310,7 +310,9 @@ def: SysReg<"mhpmcounter31h", 0xB9F>;
//===--------------------------
// Machine Counter Setup
//===--------------------------
+let AltName = "mucounteren" in // Privileged spec v1.9.1 Name
def : SysReg<"mcountinhibit", 0x320>;
+
def : SysReg<"mhpmevent3", 0x323>;
def : SysReg<"mhpmevent4", 0x324>;
def : SysReg<"mhpmevent5", 0x325>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 75683e2fd8e9..32fb7cd6753c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -11,11 +11,11 @@
//===----------------------------------------------------------------------===//
#include "RISCVTargetMachine.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
#include "RISCV.h"
#include "RISCVTargetObjectFile.h"
#include "RISCVTargetTransformInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
-#include "Utils/RISCVBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
@@ -35,18 +35,18 @@ using namespace llvm;
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
- auto PR = PassRegistry::getPassRegistry();
+ auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
+ initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
+ initializeRISCVCleanupVSETVLIPass(*PR);
}
static StringRef computeDataLayout(const Triple &TT) {
- if (TT.isArch64Bit()) {
+ if (TT.isArch64Bit())
return "e-m:e-p:64:64-i64:64-i128:128-n64-S128";
- } else {
- assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
- return "e-m:e-p:32:32-i64:64-n32-S128";
- }
+ assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
+ return "e-m:e-p:32:32-i64:64-n32-S128";
}
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
@@ -75,15 +75,16 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
const RISCVSubtarget *
RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute TuneAttr = F.getFnAttribute("tune-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
- std::string Key = CPU + FS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string TuneCPU =
+ TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
+ std::string Key = CPU + TuneCPU + FS;
auto &I = SubtargetMap[Key];
if (!I) {
// This needs to be done before we create a new subtarget since any
@@ -100,7 +101,7 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
}
ABIName = ModuleTargetABI->getString();
}
- I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, FS, ABIName, *this);
+ I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, TuneCPU, FS, ABIName, *this);
}
return I.get();
}
@@ -110,6 +111,15 @@ RISCVTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(RISCVTTIImpl(this, F));
}
+// A RISC-V hart has a single byte-addressable address space of 2^XLEN bytes
+// for all memory accesses, so it is reasonable to assume that an
+// implementation has no-op address space casts. If an implementation makes a
+// change to this, they can override it here.
+bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DstAS) const {
+ return true;
+}
+
namespace {
class RISCVPassConfig : public TargetPassConfig {
public:
@@ -131,7 +141,7 @@ public:
void addPreSched2() override;
void addPreRegAlloc() override;
};
-}
+} // namespace
TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
return new RISCVPassConfig(*this, PM);
@@ -149,7 +159,7 @@ bool RISCVPassConfig::addInstSelector() {
}
bool RISCVPassConfig::addIRTranslator() {
- addPass(new IRTranslator());
+ addPass(new IRTranslator(getOptLevel()));
return false;
}
@@ -181,5 +191,8 @@ void RISCVPassConfig::addPreEmitPass2() {
}
void RISCVPassConfig::addPreRegAlloc() {
- addPass(createRISCVMergeBaseOffsetOptPass());
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ addPass(createRISCVMergeBaseOffsetOptPass());
+ addPass(createRISCVCleanupVSETVLIPass());
+ }
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
index a4476fa40a7d..3156333f7ee1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -43,7 +43,10 @@ public:
}
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+
+ virtual bool isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DstAS) const override;
};
-}
+} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index bd78f801c59a..27714cffc989 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "RISCVTargetTransformInfo.h"
-#include "Utils/RISCVMatInt.h"
+#include "MCTargetDesc/RISCVMatInt.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -30,8 +30,10 @@ int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
getST()->is64Bit());
}
-int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind) {
+int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
assert(Ty->isIntegerTy() &&
"getIntImmCost can only estimate cost of materialising integers");
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 392700707760..8d077e946305 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -42,8 +42,9 @@ public:
TLI(ST->getTargetLowering()) {}
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
- int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
deleted file mode 100644
index 4e6cdd8606b1..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
+++ /dev/null
@@ -1,223 +0,0 @@
-//===-- RISCVBaseInfo.h - Top level definitions for RISCV MC ----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone enum definitions for the RISCV target
-// useful for the compiler back-end and the MC libraries.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
-#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
-
-#include "RISCVRegisterInfo.h"
-#include "MCTargetDesc/RISCVMCTargetDesc.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/SubtargetFeature.h"
-
-namespace llvm {
-
-// RISCVII - This namespace holds all of the target specific flags that
-// instruction info tracks. All definitions must match RISCVInstrFormats.td.
-namespace RISCVII {
-enum {
- InstFormatPseudo = 0,
- InstFormatR = 1,
- InstFormatR4 = 2,
- InstFormatI = 3,
- InstFormatS = 4,
- InstFormatB = 5,
- InstFormatU = 6,
- InstFormatJ = 7,
- InstFormatCR = 8,
- InstFormatCI = 9,
- InstFormatCSS = 10,
- InstFormatCIW = 11,
- InstFormatCL = 12,
- InstFormatCS = 13,
- InstFormatCA = 14,
- InstFormatCB = 15,
- InstFormatCJ = 16,
- InstFormatOther = 17,
-
- InstFormatMask = 31,
-};
-
-// RISC-V Specific Machine Operand Flags
-enum {
- MO_None = 0,
- MO_CALL = 1,
- MO_PLT = 2,
- MO_LO = 3,
- MO_HI = 4,
- MO_PCREL_LO = 5,
- MO_PCREL_HI = 6,
- MO_GOT_HI = 7,
- MO_TPREL_LO = 8,
- MO_TPREL_HI = 9,
- MO_TPREL_ADD = 10,
- MO_TLS_GOT_HI = 11,
- MO_TLS_GD_HI = 12,
-
- // Used to differentiate between target-specific "direct" flags and "bitmask"
- // flags. A machine operand can only have one "direct" flag, but can have
- // multiple "bitmask" flags.
- MO_DIRECT_FLAG_MASK = 15
-};
-} // namespace RISCVII
-
-namespace RISCVOp {
-enum OperandType : unsigned {
- OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET,
- OPERAND_UIMM4 = OPERAND_FIRST_RISCV_IMM,
- OPERAND_UIMM5,
- OPERAND_UIMM12,
- OPERAND_SIMM12,
- OPERAND_SIMM13_LSB0,
- OPERAND_UIMM20,
- OPERAND_SIMM21_LSB0,
- OPERAND_UIMMLOG2XLEN,
- OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN
-};
-} // namespace RISCVOp
-
-// Describes the predecessor/successor bits used in the FENCE instruction.
-namespace RISCVFenceField {
-enum FenceField {
- I = 8,
- O = 4,
- R = 2,
- W = 1
-};
-}
-
-// Describes the supported floating point rounding mode encodings.
-namespace RISCVFPRndMode {
-enum RoundingMode {
- RNE = 0,
- RTZ = 1,
- RDN = 2,
- RUP = 3,
- RMM = 4,
- DYN = 7,
- Invalid
-};
-
-inline static StringRef roundingModeToString(RoundingMode RndMode) {
- switch (RndMode) {
- default:
- llvm_unreachable("Unknown floating point rounding mode");
- case RISCVFPRndMode::RNE:
- return "rne";
- case RISCVFPRndMode::RTZ:
- return "rtz";
- case RISCVFPRndMode::RDN:
- return "rdn";
- case RISCVFPRndMode::RUP:
- return "rup";
- case RISCVFPRndMode::RMM:
- return "rmm";
- case RISCVFPRndMode::DYN:
- return "dyn";
- }
-}
-
-inline static RoundingMode stringToRoundingMode(StringRef Str) {
- return StringSwitch<RoundingMode>(Str)
- .Case("rne", RISCVFPRndMode::RNE)
- .Case("rtz", RISCVFPRndMode::RTZ)
- .Case("rdn", RISCVFPRndMode::RDN)
- .Case("rup", RISCVFPRndMode::RUP)
- .Case("rmm", RISCVFPRndMode::RMM)
- .Case("dyn", RISCVFPRndMode::DYN)
- .Default(RISCVFPRndMode::Invalid);
-}
-
-inline static bool isValidRoundingMode(unsigned Mode) {
- switch (Mode) {
- default:
- return false;
- case RISCVFPRndMode::RNE:
- case RISCVFPRndMode::RTZ:
- case RISCVFPRndMode::RDN:
- case RISCVFPRndMode::RUP:
- case RISCVFPRndMode::RMM:
- case RISCVFPRndMode::DYN:
- return true;
- }
-}
-} // namespace RISCVFPRndMode
-
-namespace RISCVSysReg {
-struct SysReg {
- const char *Name;
- unsigned Encoding;
- const char *AltName;
- // FIXME: add these additional fields when needed.
- // Privilege Access: Read, Write, Read-Only.
- // unsigned ReadWrite;
- // Privilege Mode: User, System or Machine.
- // unsigned Mode;
- // Check field name.
- // unsigned Extra;
- // Register number without the privilege bits.
- // unsigned Number;
- FeatureBitset FeaturesRequired;
- bool isRV32Only;
-
- bool haveRequiredFeatures(FeatureBitset ActiveFeatures) const {
- // Not in 32-bit mode.
- if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit])
- return false;
- // No required feature associated with the system register.
- if (FeaturesRequired.none())
- return true;
- return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
- }
-};
-
-#define GET_SysRegsList_DECL
-#include "RISCVGenSystemOperands.inc"
-} // end namespace RISCVSysReg
-
-namespace RISCVABI {
-
-enum ABI {
- ABI_ILP32,
- ABI_ILP32F,
- ABI_ILP32D,
- ABI_ILP32E,
- ABI_LP64,
- ABI_LP64F,
- ABI_LP64D,
- ABI_Unknown
-};
-
-// Returns the target ABI, or else a StringError if the requested ABIName is
-// not supported for the given TT and FeatureBits combination.
-ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
- StringRef ABIName);
-
-ABI getTargetABI(StringRef ABIName);
-
-// Returns the register used to hold the stack pointer after realignment.
-Register getBPReg();
-
-} // namespace RISCVABI
-
-namespace RISCVFeatures {
-
-// Validates if the given combination of features are valid for the target
-// triple. Exits with report_fatal_error if not.
-void validate(const Triple &TT, const FeatureBitset &FeatureBits);
-
-} // namespace RISCVFeatures
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 16e159621672..5f1bf316e871 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -82,6 +82,11 @@ class SparcAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseMembarTag(OperandVector &Operands);
+ template <unsigned N>
+ OperandMatchResultTy parseShiftAmtImm(OperandVector &Operands);
+
+ OperandMatchResultTy parseCallTarget(OperandVector &Operands);
+
OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
OperandMatchResultTy
@@ -262,6 +267,36 @@ public:
bool isMEMri() const { return Kind == k_MemoryImm; }
bool isMembarTag() const { return Kind == k_Immediate; }
+ bool isCallTarget() const {
+ if (!isImm())
+ return false;
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val))
+ return CE->getValue() % 4 == 0;
+
+ return true;
+ }
+
+ bool isShiftAmtImm5() const {
+ if (!isImm())
+ return false;
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val))
+ return isUInt<5>(CE->getValue());
+
+ return false;
+ }
+
+ bool isShiftAmtImm6() const {
+ if (!isImm())
+ return false;
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val))
+ return isUInt<6>(CE->getValue());
+
+ return false;
+ }
+
bool isIntReg() const {
return (Kind == k_Register && Reg.Kind == rk_IntReg);
}
@@ -343,6 +378,15 @@ public:
addExpr(Inst, Expr);
}
+ void addShiftAmtImm5Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+ void addShiftAmtImm6Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const{
// Add as immediate when possible. Null MCExpr = 0.
if (!Expr)
@@ -377,6 +421,11 @@ public:
addExpr(Inst, Expr);
}
+ void addCallTargetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
static std::unique_ptr<SparcOperand> CreateToken(StringRef Str, SMLoc S) {
auto Op = std::make_unique<SparcOperand>(k_Token);
Op->Tok.Data = Str.data();
@@ -645,7 +694,7 @@ OperandMatchResultTy SparcAsmParser::tryParseRegister(unsigned &RegNo,
EndLoc = Tok.getEndLoc();
RegNo = 0;
if (getLexer().getKind() != AsmToken::Percent)
- return MatchOperand_Success;
+ return MatchOperand_NoMatch;
Parser.Lex();
unsigned regKind = SparcOperand::rk_None;
if (matchRegisterName(Tok, RegNo, regKind)) {
@@ -729,37 +778,74 @@ ParseDirective(AsmToken DirectiveID)
OperandMatchResultTy
SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
SMLoc S, E;
- unsigned BaseReg = 0;
- if (ParseRegister(BaseReg, S, E)) {
+ std::unique_ptr<SparcOperand> LHS;
+ if (parseSparcAsmOperand(LHS) != MatchOperand_Success)
return MatchOperand_NoMatch;
+
+ // Single immediate operand
+ if (LHS->isImm()) {
+ Operands.push_back(SparcOperand::MorphToMEMri(Sparc::G0, std::move(LHS)));
+ return MatchOperand_Success;
}
- switch (getLexer().getKind()) {
- default: return MatchOperand_NoMatch;
+ if (!LHS->isIntReg()) {
+ Error(LHS->getStartLoc(), "invalid register kind for this operand");
+ return MatchOperand_ParseFail;
+ }
- case AsmToken::Comma:
- case AsmToken::RBrac:
- case AsmToken::EndOfStatement:
- Operands.push_back(SparcOperand::CreateMEMr(BaseReg, S, E));
- return MatchOperand_Success;
+ AsmToken Tok = getLexer().getTok();
+ // The plus token may be followed by a register or an immediate value, the
+ // minus one is always interpreted as sign for the immediate value
+ if (Tok.is(AsmToken::Plus) || Tok.is(AsmToken::Minus)) {
+ (void)Parser.parseOptionalToken(AsmToken::Plus);
- case AsmToken:: Plus:
- Parser.Lex(); // Eat the '+'
- break;
- case AsmToken::Minus:
- break;
+ std::unique_ptr<SparcOperand> RHS;
+ if (parseSparcAsmOperand(RHS) != MatchOperand_Success)
+ return MatchOperand_NoMatch;
+
+ if (RHS->isReg() && !RHS->isIntReg()) {
+ Error(RHS->getStartLoc(), "invalid register kind for this operand");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(
+ RHS->isImm()
+ ? SparcOperand::MorphToMEMri(LHS->getReg(), std::move(RHS))
+ : SparcOperand::MorphToMEMrr(LHS->getReg(), std::move(RHS)));
+
+ return MatchOperand_Success;
}
- std::unique_ptr<SparcOperand> Offset;
- OperandMatchResultTy ResTy = parseSparcAsmOperand(Offset);
- if (ResTy != MatchOperand_Success || !Offset)
+ Operands.push_back(SparcOperand::CreateMEMr(LHS->getReg(), S, E));
+ return MatchOperand_Success;
+}
+
+template <unsigned N>
+OperandMatchResultTy SparcAsmParser::parseShiftAmtImm(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+
+ // This is a register, not an immediate
+ if (getLexer().getKind() == AsmToken::Percent)
return MatchOperand_NoMatch;
- Operands.push_back(
- Offset->isImm() ? SparcOperand::MorphToMEMri(BaseReg, std::move(Offset))
- : SparcOperand::MorphToMEMrr(BaseReg, std::move(Offset)));
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr))
+ return MatchOperand_ParseFail;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE) {
+ Error(S, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ if (!isUInt<N>(CE->getValue())) {
+ Error(S, "immediate shift value out of range");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(SparcOperand::CreateImm(Expr, S, E));
return MatchOperand_Success;
}
@@ -809,6 +895,33 @@ OperandMatchResultTy SparcAsmParser::parseMembarTag(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy SparcAsmParser::parseCallTarget(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_NoMatch;
+ case AsmToken::LParen:
+ case AsmToken::Integer:
+ case AsmToken::Identifier:
+ case AsmToken::Dot:
+ break;
+ }
+
+ const MCExpr *DestValue;
+ if (getParser().parseExpression(DestValue))
+ return MatchOperand_NoMatch;
+
+ bool IsPic = getContext().getObjectFileInfo()->isPositionIndependent();
+ SparcMCExpr::VariantKind Kind =
+ IsPic ? SparcMCExpr::VK_Sparc_WPLT30 : SparcMCExpr::VK_Sparc_WDISP30;
+
+ const MCExpr *DestExpr = SparcMCExpr::create(Kind, DestValue, getContext());
+ Operands.push_back(SparcOperand::CreateImm(DestExpr, S, E));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
@@ -936,6 +1049,7 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
}
break;
+ case AsmToken::Plus:
case AsmToken::Minus:
case AsmToken::Integer:
case AsmToken::LParen:
@@ -1272,7 +1386,7 @@ const SparcMCExpr *
SparcAsmParser::adjustPICRelocation(SparcMCExpr::VariantKind VK,
const MCExpr *subExpr) {
// When in PIC mode, "%lo(...)" and "%hi(...)" behave differently.
- // If the expression refers contains _GLOBAL_OFFSETE_TABLE, it is
+ // If the expression refers contains _GLOBAL_OFFSET_TABLE, it is
// actually a %pc10 or %pc22 relocation. Otherwise, they are interpreted
// as %got10 or %got22 relocation.
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp
index e9d3aaeb9cfe..6ad6940c6b51 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp
@@ -10,14 +10,13 @@
//===----------------------------------------------------------------------===//
#include "LeonPasses.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID)
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h
index b165bc93780f..9bc4569a1298 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h
@@ -12,14 +12,11 @@
#ifndef LLVM_LIB_TARGET_SPARC_LEON_PASSES_H
#define LLVM_LIB_TARGET_SPARC_LEON_PASSES_H
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
-
-#include "Sparc.h"
-#include "SparcSubtarget.h"
namespace llvm {
+class SparcSubtarget;
+
class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass
: public MachineFunctionPass {
protected:
@@ -33,13 +30,11 @@ protected:
protected:
LEONMachineFunctionPass(char &ID);
- int GetRegIndexForOperand(MachineInstr &MI, int OperandIndex);
void clearUsedRegisterList() { UsedRegisters.clear(); }
void markRegisterUsed(int registerIndex) {
UsedRegisters.push_back(registerIndex);
}
- int getUnusedFPRegister(MachineRegisterInfo &MRI);
};
class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass {
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 83c44e0682ce..5a9ecfe74ecc 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -255,12 +255,6 @@ namespace {
}
}
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- // FIXME.
- return false;
- }
-
/// fixupNeedsRelaxation - Target specific predicate for whether a given
/// fixup requires the associated instruction to be relaxed.
bool fixupNeedsRelaxation(const MCFixup &Fixup,
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
index f6728a070736..8e4621946008 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
@@ -141,24 +141,34 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
const MCSubtargetInfo &STI,
raw_ostream &O, const char *Modifier) {
- printOperand(MI, opNum, STI, O);
-
// If this is an ADD operand, emit it like normal operands.
if (Modifier && !strcmp(Modifier, "arith")) {
+ printOperand(MI, opNum, STI, O);
O << ", ";
- printOperand(MI, opNum+1, STI, O);
+ printOperand(MI, opNum + 1, STI, O);
return;
}
- const MCOperand &MO = MI->getOperand(opNum+1);
- if (MO.isReg() && MO.getReg() == SP::G0)
- return; // don't print "+%g0"
- if (MO.isImm() && MO.getImm() == 0)
- return; // don't print "+0"
+ const MCOperand &Op1 = MI->getOperand(opNum);
+ const MCOperand &Op2 = MI->getOperand(opNum + 1);
+
+ bool PrintedFirstOperand = false;
+ if (Op1.isReg() && Op1.getReg() != SP::G0) {
+ printOperand(MI, opNum, STI, O);
+ PrintedFirstOperand = true;
+ }
- O << "+";
+ // Skip the second operand iff it adds nothing (literal 0 or %g0) and we've
+ // already printed the first one
+ const bool SkipSecondOperand =
+ PrintedFirstOperand && ((Op2.isReg() && Op2.getReg() == SP::G0) ||
+ (Op2.isImm() && Op2.getImm() == 0));
- printOperand(MI, opNum+1, STI, O);
+ if (!SkipSecondOperand) {
+ if (PrintedFirstOperand)
+ O << '+';
+ printOperand(MI, opNum + 1, STI, O);
+ }
}
void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
index 11587f165ef2..91b78bd03fc3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
@@ -31,6 +31,7 @@ public:
bool isV9(const MCSubtargetInfo &STI) const;
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 7e908011bd50..9f8522541332 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -68,13 +69,15 @@ public:
unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
-
unsigned getCallTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getSImm13OpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getBranchPredTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -146,20 +149,50 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return 0;
}
+unsigned
+SparcMCCodeEmitter::getSImm13OpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ if (MO.isImm())
+ return MO.getImm();
+
+ assert(MO.isExpr() &&
+ "getSImm13OpValue expects only expressions or an immediate");
+
+ const MCExpr *Expr = MO.getExpr();
+
+ // Constant value, no fixup is needed
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ return CE->getValue();
+
+ MCFixupKind Kind;
+ if (const SparcMCExpr *SExpr = dyn_cast<SparcMCExpr>(Expr)) {
+ Kind = MCFixupKind(SExpr->getFixupKind());
+ } else {
+ bool IsPic = Ctx.getObjectFileInfo()->isPositionIndependent();
+ Kind = IsPic ? MCFixupKind(Sparc::fixup_sparc_got13)
+ : MCFixupKind(Sparc::fixup_sparc_13);
+ }
+
+ Fixups.push_back(MCFixup::create(0, Expr, Kind));
+ return 0;
+}
+
unsigned SparcMCCodeEmitter::
getCallTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm())
- return getMachineOpValue(MI, MO, Fixups, STI);
+ const MCExpr *Expr = MO.getExpr();
+ const SparcMCExpr *SExpr = dyn_cast<SparcMCExpr>(Expr);
if (MI.getOpcode() == SP::TLS_CALL) {
// No fixups for __tls_get_addr. Will emit for fixups for tls_symbol in
// encodeInstruction.
#ifndef NDEBUG
// Verify that the callee is actually __tls_get_addr.
- const SparcMCExpr *SExpr = dyn_cast<SparcMCExpr>(MO.getExpr());
assert(SExpr && SExpr->getSubExpr()->getKind() == MCExpr::SymbolRef &&
"Unexpected expression in TLS_CALL");
const MCSymbolRefExpr *SymExpr = cast<MCSymbolRefExpr>(SExpr->getSubExpr());
@@ -169,15 +202,8 @@ getCallTargetOpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
- MCFixupKind fixupKind = (MCFixupKind)Sparc::fixup_sparc_call30;
-
- if (const SparcMCExpr *SExpr = dyn_cast<SparcMCExpr>(MO.getExpr())) {
- if (SExpr->getKind() == SparcMCExpr::VK_Sparc_WPLT30)
- fixupKind = (MCFixupKind)Sparc::fixup_sparc_wplt30;
- }
-
- Fixups.push_back(MCFixup::create(0, MO.getExpr(), fixupKind));
-
+ MCFixupKind Kind = MCFixupKind(SExpr->getFixupKind());
+ Fixups.push_back(MCFixup::create(0, Expr, Kind));
return 0;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index 00f319fc37e1..b84ecf074455 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -41,46 +41,46 @@ void SparcMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
bool SparcMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind)
{
- bool closeParen = true;
switch (Kind) {
- case VK_Sparc_None: closeParen = false; break;
- case VK_Sparc_LO: OS << "%lo("; break;
- case VK_Sparc_HI: OS << "%hi("; break;
- case VK_Sparc_H44: OS << "%h44("; break;
- case VK_Sparc_M44: OS << "%m44("; break;
- case VK_Sparc_L44: OS << "%l44("; break;
- case VK_Sparc_HH: OS << "%hh("; break;
- case VK_Sparc_HM: OS << "%hm("; break;
+ case VK_Sparc_None: return false;
+ case VK_Sparc_LO: OS << "%lo("; return true;
+ case VK_Sparc_HI: OS << "%hi("; return true;
+ case VK_Sparc_H44: OS << "%h44("; return true;
+ case VK_Sparc_M44: OS << "%m44("; return true;
+ case VK_Sparc_L44: OS << "%l44("; return true;
+ case VK_Sparc_HH: OS << "%hh("; return true;
+ case VK_Sparc_HM: OS << "%hm("; return true;
// FIXME: use %pc22/%pc10, if system assembler supports them.
- case VK_Sparc_PC22: OS << "%hi("; break;
- case VK_Sparc_PC10: OS << "%lo("; break;
+ case VK_Sparc_PC22: OS << "%hi("; return true;
+ case VK_Sparc_PC10: OS << "%lo("; return true;
// FIXME: use %got22/%got10, if system assembler supports them.
- case VK_Sparc_GOT22: OS << "%hi("; break;
- case VK_Sparc_GOT10: OS << "%lo("; break;
- case VK_Sparc_GOT13: closeParen = false; break;
- case VK_Sparc_13: closeParen = false; break;
- case VK_Sparc_WPLT30: closeParen = false; break;
- case VK_Sparc_R_DISP32: OS << "%r_disp32("; break;
- case VK_Sparc_TLS_GD_HI22: OS << "%tgd_hi22("; break;
- case VK_Sparc_TLS_GD_LO10: OS << "%tgd_lo10("; break;
- case VK_Sparc_TLS_GD_ADD: OS << "%tgd_add("; break;
- case VK_Sparc_TLS_GD_CALL: OS << "%tgd_call("; break;
- case VK_Sparc_TLS_LDM_HI22: OS << "%tldm_hi22("; break;
- case VK_Sparc_TLS_LDM_LO10: OS << "%tldm_lo10("; break;
- case VK_Sparc_TLS_LDM_ADD: OS << "%tldm_add("; break;
- case VK_Sparc_TLS_LDM_CALL: OS << "%tldm_call("; break;
- case VK_Sparc_TLS_LDO_HIX22: OS << "%tldo_hix22("; break;
- case VK_Sparc_TLS_LDO_LOX10: OS << "%tldo_lox10("; break;
- case VK_Sparc_TLS_LDO_ADD: OS << "%tldo_add("; break;
- case VK_Sparc_TLS_IE_HI22: OS << "%tie_hi22("; break;
- case VK_Sparc_TLS_IE_LO10: OS << "%tie_lo10("; break;
- case VK_Sparc_TLS_IE_LD: OS << "%tie_ld("; break;
- case VK_Sparc_TLS_IE_LDX: OS << "%tie_ldx("; break;
- case VK_Sparc_TLS_IE_ADD: OS << "%tie_add("; break;
- case VK_Sparc_TLS_LE_HIX22: OS << "%tle_hix22("; break;
- case VK_Sparc_TLS_LE_LOX10: OS << "%tle_lox10("; break;
+ case VK_Sparc_GOT22: OS << "%hi("; return true;
+ case VK_Sparc_GOT10: OS << "%lo("; return true;
+ case VK_Sparc_GOT13: return false;
+ case VK_Sparc_13: return false;
+ case VK_Sparc_WDISP30: return false;
+ case VK_Sparc_WPLT30: return false;
+ case VK_Sparc_R_DISP32: OS << "%r_disp32("; return true;
+ case VK_Sparc_TLS_GD_HI22: OS << "%tgd_hi22("; return true;
+ case VK_Sparc_TLS_GD_LO10: OS << "%tgd_lo10("; return true;
+ case VK_Sparc_TLS_GD_ADD: OS << "%tgd_add("; return true;
+ case VK_Sparc_TLS_GD_CALL: OS << "%tgd_call("; return true;
+ case VK_Sparc_TLS_LDM_HI22: OS << "%tldm_hi22("; return true;
+ case VK_Sparc_TLS_LDM_LO10: OS << "%tldm_lo10("; return true;
+ case VK_Sparc_TLS_LDM_ADD: OS << "%tldm_add("; return true;
+ case VK_Sparc_TLS_LDM_CALL: OS << "%tldm_call("; return true;
+ case VK_Sparc_TLS_LDO_HIX22: OS << "%tldo_hix22("; return true;
+ case VK_Sparc_TLS_LDO_LOX10: OS << "%tldo_lox10("; return true;
+ case VK_Sparc_TLS_LDO_ADD: OS << "%tldo_add("; return true;
+ case VK_Sparc_TLS_IE_HI22: OS << "%tie_hi22("; return true;
+ case VK_Sparc_TLS_IE_LO10: OS << "%tie_lo10("; return true;
+ case VK_Sparc_TLS_IE_LD: OS << "%tie_ld("; return true;
+ case VK_Sparc_TLS_IE_LDX: OS << "%tie_ldx("; return true;
+ case VK_Sparc_TLS_IE_ADD: OS << "%tie_add("; return true;
+ case VK_Sparc_TLS_LE_HIX22: OS << "%tle_hix22("; return true;
+ case VK_Sparc_TLS_LE_LOX10: OS << "%tle_lox10("; return true;
}
- return closeParen;
+ llvm_unreachable("Unhandled SparcMCExpr::VariantKind");
}
SparcMCExpr::VariantKind SparcMCExpr::parseVariantKind(StringRef name)
@@ -137,6 +137,7 @@ Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) {
case VK_Sparc_GOT13: return Sparc::fixup_sparc_got13;
case VK_Sparc_13: return Sparc::fixup_sparc_13;
case VK_Sparc_WPLT30: return Sparc::fixup_sparc_wplt30;
+ case VK_Sparc_WDISP30: return Sparc::fixup_sparc_call30;
case VK_Sparc_TLS_GD_HI22: return Sparc::fixup_sparc_tls_gd_hi22;
case VK_Sparc_TLS_GD_LO10: return Sparc::fixup_sparc_tls_gd_lo10;
case VK_Sparc_TLS_GD_ADD: return Sparc::fixup_sparc_tls_gd_add;
@@ -205,10 +206,8 @@ void SparcMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
MCSymbol *Symbol = Asm.getContext().getOrCreateSymbol("__tls_get_addr");
Asm.registerSymbol(*Symbol);
auto ELFSymbol = cast<MCSymbolELF>(Symbol);
- if (!ELFSymbol->isBindingSet()) {
+ if (!ELFSymbol->isBindingSet())
ELFSymbol->setBinding(ELF::STB_GLOBAL);
- ELFSymbol->setExternal(true);
- }
LLVM_FALLTHROUGH;
}
case VK_Sparc_TLS_GD_HI22:
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index c2467faca257..76603530e521 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -38,6 +38,7 @@ public:
VK_Sparc_GOT13,
VK_Sparc_13,
VK_Sparc_WPLT30,
+ VK_Sparc_WDISP30,
VK_Sparc_R_DISP32,
VK_Sparc_TLS_GD_HI22,
VK_Sparc_TLS_GD_LO10,
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index fb2bcdc6c91b..9531e3105fe2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -68,7 +68,7 @@ static MCSubtargetInfo *
createSparcMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
if (CPU.empty())
CPU = (TT.getArch() == Triple::sparcv9) ? "v9" : "v8";
- return createSparcMCSubtargetInfoImpl(TT, CPU, FS);
+ return createSparcMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCTargetStreamer *
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 7845a18b14c1..ee0b85292cfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -80,7 +80,7 @@ static MCOperand createSparcMCOperand(SparcMCExpr::VariantKind Kind,
}
static MCOperand createPCXCallOP(MCSymbol *Label,
MCContext &OutContext) {
- return createSparcMCOperand(SparcMCExpr::VK_Sparc_None, Label, OutContext);
+ return createSparcMCOperand(SparcMCExpr::VK_Sparc_WDISP30, Label, OutContext);
}
static MCOperand createPCXRelExprOp(SparcMCExpr::VariantKind Kind,
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index 8d8424641cd9..63187fdce999 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -257,9 +257,9 @@ bool SparcFrameLowering::hasFP(const MachineFunction &MF) const {
MFI.isFrameAddressTaken();
}
-int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
+StackOffset
+SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const SparcRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -295,10 +295,10 @@ int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF,
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return FrameOffset;
+ return StackOffset::getFixed(FrameOffset);
} else {
FrameReg = SP::O6; // %sp
- return FrameOffset + MF.getFrameInfo().getStackSize();
+ return StackOffset::getFixed(FrameOffset + MF.getFrameInfo().getStackSize());
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h
index 3ec9dc8b85dd..ab0ceb6591c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -15,6 +15,7 @@
#include "Sparc.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -38,8 +39,8 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
/// targetHandlesStackFrameRounding - Returns true if the target is
/// responsible for rounding up the stack frame (probably at emitPrologue
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 116352e08382..e5c7794b7d2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -939,7 +939,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
- unsigned TF = isPositionIndependent() ? SparcMCExpr::VK_Sparc_WPLT30 : 0;
+ unsigned TF = isPositionIndependent() ? SparcMCExpr::VK_Sparc_WPLT30
+ : SparcMCExpr::VK_Sparc_WDISP30;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32, 0, TF);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
@@ -1242,7 +1243,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Likewise ExternalSymbol -> TargetExternalSymbol.
SDValue Callee = CLI.Callee;
bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CB);
- unsigned TF = isPositionIndependent() ? SparcMCExpr::VK_Sparc_WPLT30 : 0;
+ unsigned TF = isPositionIndependent() ? SparcMCExpr::VK_Sparc_WPLT30
+ : SparcMCExpr::VK_Sparc_WDISP30;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT, 0, TF);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
@@ -1877,8 +1879,7 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
}
}
@@ -2139,7 +2140,7 @@ SDValue SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain,
int FI = MFI.CreateStackObject(16, Align(8), false);
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
Chain = DAG.getStore(Chain, DL, Entry.Node, FIPtr, MachinePointerInfo(),
- /* Alignment = */ 8);
+ Align(8));
Entry.Node = FIPtr;
Entry.Ty = PointerType::getUnqual(ArgTy);
@@ -2198,7 +2199,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
// Load RetPtr to get the return value.
return DAG.getLoad(Op.getValueType(), SDLoc(Op), Chain, RetPtr,
- MachinePointerInfo(), /* Alignment = */ 8);
+ MachinePointerInfo(), Align(8));
}
SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
@@ -2541,8 +2542,9 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
MachinePointerInfo(SV));
// Load the actual argument out of the pointer VAList.
// We can't count on greater alignment than the word size.
- return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
- std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
+ return DAG.getLoad(
+ VT, DL, InChain, VAList, MachinePointerInfo(),
+ std::min(PtrVT.getFixedSizeInBits(), VT.getFixedSizeInBits()) / 8);
}
static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
@@ -2731,23 +2733,21 @@ static SDValue LowerF64Op(SDValue SrcReg64, const SDLoc &dl, SelectionDAG &DAG,
static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
{
SDLoc dl(Op);
- LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
- assert(LdNode && LdNode->getOffset().isUndef()
- && "Unexpected node type");
+ LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
+ assert(LdNode->getOffset().isUndef() && "Unexpected node type");
- unsigned alignment = LdNode->getAlignment();
- if (alignment > 8)
- alignment = 8;
+ Align Alignment = commonAlignment(LdNode->getOriginalAlign(), 8);
SDValue Hi64 =
DAG.getLoad(MVT::f64, dl, LdNode->getChain(), LdNode->getBasePtr(),
- LdNode->getPointerInfo(), alignment);
+ LdNode->getPointerInfo(), Alignment);
EVT addrVT = LdNode->getBasePtr().getValueType();
SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
LdNode->getBasePtr(),
DAG.getConstant(8, dl, addrVT));
SDValue Lo64 = DAG.getLoad(MVT::f64, dl, LdNode->getChain(), LoPtr,
- LdNode->getPointerInfo(), alignment);
+ LdNode->getPointerInfo().getWithOffset(8),
+ Alignment);
SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, dl, MVT::i32);
SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, dl, MVT::i32);
@@ -2785,9 +2785,9 @@ static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG)
// Lower a f128 store into two f64 stores.
static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
- StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
- assert(StNode && StNode->getOffset().isUndef()
- && "Unexpected node type");
+ StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
+ assert(StNode->getOffset().isUndef() && "Unexpected node type");
+
SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, dl, MVT::i32);
SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, dl, MVT::i32);
@@ -2802,20 +2802,20 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
StNode->getValue(),
SubRegOdd);
- unsigned alignment = StNode->getAlignment();
- if (alignment > 8)
- alignment = 8;
+ Align Alignment = commonAlignment(StNode->getOriginalAlign(), 8);
SDValue OutChains[2];
OutChains[0] =
DAG.getStore(StNode->getChain(), dl, SDValue(Hi64, 0),
- StNode->getBasePtr(), MachinePointerInfo(), alignment);
+ StNode->getBasePtr(), StNode->getPointerInfo(),
+ Alignment);
EVT addrVT = StNode->getBasePtr().getValueType();
SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
StNode->getBasePtr(),
DAG.getConstant(8, dl, addrVT));
OutChains[1] = DAG.getStore(StNode->getChain(), dl, SDValue(Lo64, 0), LoPtr,
- MachinePointerInfo(), alignment);
+ StNode->getPointerInfo().getWithOffset(8),
+ Alignment);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
@@ -2834,7 +2834,8 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
SDValue Chain = DAG.getStore(
St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
- St->getAlignment(), St->getMemOperand()->getFlags(), St->getAAInfo());
+ St->getOriginalAlign(), St->getMemOperand()->getFlags(),
+ St->getAAInfo());
return Chain;
}
@@ -3400,8 +3401,9 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
SDLoc dl(N);
SDValue LoadRes = DAG.getExtLoad(
Ld->getExtensionType(), dl, MVT::v2i32, Ld->getChain(),
- Ld->getBasePtr(), Ld->getPointerInfo(), MVT::v2i32, Ld->getAlignment(),
- Ld->getMemOperand()->getFlags(), Ld->getAAInfo());
+ Ld->getBasePtr(), Ld->getPointerInfo(), MVT::v2i32,
+ Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags(),
+ Ld->getAAInfo());
SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes);
Results.push_back(Res);
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index 9a200a36cd3e..df65c5457c1d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -42,9 +42,9 @@ def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>;
def : Pat<(i64 (and i64:$val, 0xffffffff)), (SRLri $val, 0)>;
def : Pat<(i64 (sext_inreg i64:$val, i32)), (SRAri $val, 0)>;
-defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>;
-defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>;
-defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
+defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, shift_imm6, I64Regs>;
+defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, shift_imm6, I64Regs>;
+defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, shift_imm6, I64Regs>;
} // Predicates = [Is64Bit]
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td
index 2d8f063f7ed1..da53307bcb1c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -224,13 +224,13 @@ class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
// Define rr and ri shift instructions with patterns.
multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
- ValueType VT, RegisterClass RC,
+ ValueType VT, ValueType SIT, RegisterClass RC,
InstrItinClass itin = IIC_iu_instr> {
def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs1, IntRegs:$rs2),
!strconcat(OpcStr, " $rs1, $rs2, $rd"),
[(set VT:$rd, (OpNode VT:$rs1, i32:$rs2))],
itin>;
- def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs1, i32imm:$shcnt),
+ def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs1, SIT:$shcnt),
!strconcat(OpcStr, " $rs1, $shcnt, $rd"),
[(set VT:$rd, (OpNode VT:$rs1, (i32 imm:$shcnt)))],
itin>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 8b01313c7911..d1190ae03d2c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -113,6 +113,18 @@ def SETHIimm_not : PatLeaf<(i32 imm), [{
def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
+// Constrained operands for the shift operations.
+class ShiftAmtImmAsmOperand<int Bits> : AsmOperandClass {
+ let Name = "ShiftAmtImm" # Bits;
+ let ParserMethod = "parseShiftAmtImm<" # Bits # ">";
+}
+def shift_imm5 : Operand<i32> {
+ let ParserMatchClass = ShiftAmtImmAsmOperand<5>;
+}
+def shift_imm6 : Operand<i32> {
+ let ParserMatchClass = ShiftAmtImmAsmOperand<6>;
+}
+
// Address operands
def SparcMEMrrAsmOperand : AsmOperandClass {
let Name = "MEMrr";
@@ -160,13 +172,20 @@ def bprtarget16 : Operand<OtherVT> {
let EncoderMethod = "getBranchOnRegTargetOpValue";
}
+def SparcCallTargetAsmOperand : AsmOperandClass {
+ let Name = "CallTarget";
+ let ParserMethod = "parseCallTarget";
+}
+
def calltarget : Operand<i32> {
let EncoderMethod = "getCallTargetOpValue";
let DecoderMethod = "DecodeCall";
+ let ParserMatchClass = SparcCallTargetAsmOperand;
}
def simm13Op : Operand<i32> {
let DecoderMethod = "DecodeSIMM13";
+ let EncoderMethod = "getSImm13OpValue";
}
// Operand for printing out a condition code.
@@ -691,9 +710,9 @@ let Defs = [ICC] in {
}
// Section B.12 - Shift Instructions, p. 107
-defm SLL : F3_12<"sll", 0b100101, shl, IntRegs, i32, simm13Op>;
-defm SRL : F3_12<"srl", 0b100110, srl, IntRegs, i32, simm13Op>;
-defm SRA : F3_12<"sra", 0b100111, sra, IntRegs, i32, simm13Op>;
+defm SLL : F3_S<"sll", 0b100101, 0, shl, i32, shift_imm5, IntRegs>;
+defm SRL : F3_S<"srl", 0b100110, 0, srl, i32, shift_imm5, IntRegs>;
+defm SRA : F3_S<"sra", 0b100111, 0, sra, i32, shift_imm5, IntRegs>;
// Section B.13 - Add Instructions, p. 108
defm ADD : F3_12<"add", 0b000000, add, IntRegs, i32, simm13Op>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index 990dbe23e7ac..21dced23210c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -175,7 +175,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register FrameReg;
int Offset;
- Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
+ Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed();
Offset += MI.getOperand(FIOperandNum + 1).getImm();
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index dbc6cf8e5b86..abc47ef51563 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -55,7 +55,7 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
CPUName = (Is64Bit) ? "v9" : "v8";
// Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
+ ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);
// Popc is a v9-only instruction.
if (!IsV9)
@@ -67,9 +67,9 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
SparcSubtarget::SparcSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM,
bool is64Bit)
- : SparcGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), Is64Bit(is64Bit),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(*this) {}
+ : SparcGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TargetTriple(TT),
+ Is64Bit(is64Bit), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo(TM, *this), FrameLowering(*this) {}
int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.h
index db19f99e3c9c..82a4aa510355 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -16,8 +16,8 @@
#include "SparcFrameLowering.h"
#include "SparcISelLowering.h"
#include "SparcInstrInfo.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include <string>
@@ -101,7 +101,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
SparcSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
bool is64Bit() const { return Is64Bit; }
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index d48d94e2faf1..ae5228db5827 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -55,9 +55,7 @@ static std::string computeDataLayout(const Triple &T, bool is64Bit) {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
+ return RM.getValueOr(Reloc::Static);
}
// Code models. Some only make sense for 64-bit code.
@@ -111,12 +109,10 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
// FIXME: This is related to the code below to reset the target options,
// we need to know whether or not the soft float flag is set on the
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index d5a3a19446c7..2b815a366ccd 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -565,7 +565,7 @@ struct InsnMatchEntry {
StringRef Format;
uint64_t Opcode;
int32_t NumOperands;
- MatchClassKind OperandKinds[5];
+ MatchClassKind OperandKinds[7];
};
// For equal_range comparison.
@@ -633,7 +633,20 @@ static struct InsnMatchEntry InsnMatchTable[] = {
{ "sse", SystemZ::InsnSSE, 3,
{ MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12 } },
{ "ssf", SystemZ::InsnSSF, 4,
- { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }
+ { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } },
+ { "vri", SystemZ::InsnVRI, 6,
+ { MCK_U48Imm, MCK_VR128, MCK_VR128, MCK_U12Imm, MCK_U4Imm, MCK_U4Imm } },
+ { "vrr", SystemZ::InsnVRR, 7,
+ { MCK_U48Imm, MCK_VR128, MCK_VR128, MCK_VR128, MCK_U4Imm, MCK_U4Imm,
+ MCK_U4Imm } },
+ { "vrs", SystemZ::InsnVRS, 5,
+ { MCK_U48Imm, MCK_AnyReg, MCK_VR128, MCK_BDAddr64Disp12, MCK_U4Imm } },
+ { "vrv", SystemZ::InsnVRV, 4,
+ { MCK_U48Imm, MCK_VR128, MCK_BDVAddr64Disp12, MCK_U4Imm } },
+ { "vrx", SystemZ::InsnVRX, 4,
+ { MCK_U48Imm, MCK_VR128, MCK_BDXAddr64Disp12, MCK_U4Imm } },
+ { "vsi", SystemZ::InsnVSI, 4,
+ { MCK_U48Imm, MCK_VR128, MCK_BDAddr64Disp12, MCK_U8Imm } }
};
static void printMCExpr(const MCExpr *E, raw_ostream &OS) {
@@ -838,10 +851,11 @@ SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) {
// Parse any type of register (including integers) and add it to Operands.
OperandMatchResultTy
SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
+ SMLoc StartLoc = Parser.getTok().getLoc();
+
// Handle integer values.
if (Parser.getTok().is(AsmToken::Integer)) {
const MCExpr *Register;
- SMLoc StartLoc = Parser.getTok().getLoc();
if (Parser.parseExpression(Register))
return MatchOperand_ParseFail;
@@ -863,6 +877,11 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
if (parseRegister(Reg))
return MatchOperand_ParseFail;
+ if (Reg.Num > 15) {
+ Error(StartLoc, "invalid register");
+ return MatchOperand_ParseFail;
+ }
+
// Map to the correct register kind.
RegisterKind Kind;
unsigned RegNo;
@@ -1195,10 +1214,14 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
OperandMatchResultTy ResTy;
if (Kind == MCK_AnyReg)
ResTy = parseAnyReg(Operands);
+ else if (Kind == MCK_VR128)
+ ResTy = parseVR128(Operands);
else if (Kind == MCK_BDXAddr64Disp12 || Kind == MCK_BDXAddr64Disp20)
ResTy = parseBDXAddr64(Operands);
else if (Kind == MCK_BDAddr64Disp12 || Kind == MCK_BDAddr64Disp20)
ResTy = parseBDAddr64(Operands);
+ else if (Kind == MCK_BDVAddr64Disp12)
+ ResTy = parseBDVAddr64(Operands);
else if (Kind == MCK_PCRel32)
ResTy = parsePCRel32(Operands);
else if (Kind == MCK_PCRel16)
@@ -1243,6 +1266,8 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
ZOperand.addBDAddrOperands(Inst, 2);
else if (ZOperand.isMem(BDXMem))
ZOperand.addBDXAddrOperands(Inst, 3);
+ else if (ZOperand.isMem(BDVMem))
+ ZOperand.addBDVAddrOperands(Inst, 3);
else if (ZOperand.isImm())
ZOperand.addImmOperands(Inst, 1);
else
@@ -1297,6 +1322,11 @@ OperandMatchResultTy SystemZAsmParser::tryParseRegister(unsigned &RegNo,
bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
+
+ // Apply mnemonic aliases first, before doing anything else, in
+ // case the target uses it.
+ applyMnemonicAliases(Name, getAvailableFeatures(), 0 /*VariantID*/);
+
Operands.push_back(SystemZOperand::createToken(Name, NameLoc));
// Read the remaining operands.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index e42aa14fe589..e81db1030c01 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -468,8 +468,10 @@ DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
// Read any remaining bytes.
- if (Bytes.size() < Size)
+ if (Bytes.size() < Size) {
+ Size = Bytes.size();
return MCDisassembler::Fail;
+ }
// Construct the instruction.
uint64_t Inst = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index cfe1bd89c3eb..0db7279a06c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -27,6 +27,7 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
// Automatically generated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index e62f5040898f..5f276f793578 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -54,10 +54,6 @@ public:
const MCValue &Target, MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
const MCSubtargetInfo *STI) const override;
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- return false;
- }
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *Fragment,
const MCAsmLayout &Layout) const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index e540ff4e4811..76df8cf0f3b2 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -17,6 +17,8 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
CalleeSaveStackSlotSize = 8;
IsLittleEndian = false;
+ MaxInstLength = 6;
+
CommentString = "#";
ZeroDirective = "\t.space\t";
Data64bitsDirective = "\t.quad\t";
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index f2ef1ad6c698..5c191d17ebc5 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -171,7 +171,7 @@ static MCRegisterInfo *createSystemZMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *
createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- return createSystemZMCSubtargetInfoImpl(TT, CPU, FS);
+ return createSystemZMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 4109bfc11337..584737e1d940 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -236,14 +236,15 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
break;
case SystemZ::CallBR:
- LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
+ LoweredMI = MCInstBuilder(SystemZ::BR)
+ .addReg(MI->getOperand(0).getReg());
break;
case SystemZ::CallBCR:
LoweredMI = MCInstBuilder(SystemZ::BCR)
.addImm(MI->getOperand(0).getImm())
.addImm(MI->getOperand(1).getImm())
- .addReg(SystemZ::R1D);
+ .addReg(MI->getOperand(2).getReg());
break;
case SystemZ::CRBCall:
@@ -251,7 +252,7 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addImm(MI->getOperand(2).getImm())
- .addReg(SystemZ::R1D)
+ .addReg(MI->getOperand(3).getReg())
.addImm(0);
break;
@@ -260,7 +261,7 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addImm(MI->getOperand(2).getImm())
- .addReg(SystemZ::R1D)
+ .addReg(MI->getOperand(3).getReg())
.addImm(0);
break;
@@ -269,7 +270,7 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(MI->getOperand(0).getReg())
.addImm(MI->getOperand(1).getImm())
.addImm(MI->getOperand(2).getImm())
- .addReg(SystemZ::R1D)
+ .addReg(MI->getOperand(3).getReg())
.addImm(0);
break;
@@ -278,7 +279,7 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(MI->getOperand(0).getReg())
.addImm(MI->getOperand(1).getImm())
.addImm(MI->getOperand(2).getImm())
- .addReg(SystemZ::R1D)
+ .addReg(MI->getOperand(3).getReg())
.addImm(0);
break;
@@ -287,7 +288,7 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addImm(MI->getOperand(2).getImm())
- .addReg(SystemZ::R1D)
+ .addReg(MI->getOperand(3).getReg())
.addImm(0);
break;
@@ -296,7 +297,7 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addImm(MI->getOperand(2).getImm())
- .addReg(SystemZ::R1D)
+ .addReg(MI->getOperand(3).getReg())
.addImm(0);
break;
@@ -305,7 +306,7 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(MI->getOperand(0).getReg())
.addImm(MI->getOperand(1).getImm())
.addImm(MI->getOperand(2).getImm())
- .addReg(SystemZ::R1D)
+ .addReg(MI->getOperand(3).getReg())
.addImm(0);
break;
@@ -314,7 +315,7 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(MI->getOperand(0).getReg())
.addImm(MI->getOperand(1).getImm())
.addImm(MI->getOperand(2).getImm())
- .addReg(SystemZ::R1D)
+ .addReg(MI->getOperand(3).getReg())
.addImm(0);
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 2f0cf0317029..19b703bbb226 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -640,18 +640,22 @@ bool SystemZElimCompare::fuseCompareOperations(
MachineOperand CCMask(MBBI->getOperand(1));
assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
"Invalid condition-code mask for integer comparison");
- // This is only valid for CompareAndBranch.
+ // This is only valid for CompareAndBranch and CompareAndSibcall.
MachineOperand Target(MBBI->getOperand(
- Type == SystemZII::CompareAndBranch ? 2 : 0));
+ (Type == SystemZII::CompareAndBranch ||
+ Type == SystemZII::CompareAndSibcall) ? 2 : 0));
const uint32_t *RegMask;
if (Type == SystemZII::CompareAndSibcall)
- RegMask = MBBI->getOperand(2).getRegMask();
+ RegMask = MBBI->getOperand(3).getRegMask();
// Clear out all current operands.
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
assert(CCUse >= 0 && "BRC/BCR must use CC");
Branch->RemoveOperand(CCUse);
- // Remove target (branch) or regmask (sibcall).
+ // Remove regmask (sibcall).
+ if (Type == SystemZII::CompareAndSibcall)
+ Branch->RemoveOperand(3);
+ // Remove target (branch or sibcall).
if (Type == SystemZII::CompareAndBranch ||
Type == SystemZII::CompareAndSibcall)
Branch->RemoveOperand(2);
@@ -678,8 +682,10 @@ bool SystemZElimCompare::fuseCompareOperations(
RegState::ImplicitDefine | RegState::Dead);
}
- if (Type == SystemZII::CompareAndSibcall)
+ if (Type == SystemZII::CompareAndSibcall) {
+ MIB.add(Target);
MIB.addRegMask(RegMask);
+ }
// Clear any intervening kills of SrcReg and SrcReg2.
MBBI = Compare;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
index 28f58cb310af..b1706a4a899a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
@@ -196,7 +196,7 @@ def Arch11NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
//
-// New features added in the Twelvth Edition of the z/Architecture
+// New features added in the Twelfth Edition of the z/Architecture
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 985722fdcab4..994f471b75b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -316,6 +316,8 @@ void SystemZFrameLowering::
processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
bool BackChain = MF.getFunction().hasFnAttribute("backchain");
if (!usePackedStack(MF) || BackChain)
@@ -344,6 +346,14 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF,
RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false));
RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false));
}
+
+ // If R6 is used as an argument register it is still callee saved. If it in
+ // this case is not clobbered (and restored) it should never be marked as
+ // killed.
+ if (MF.front().isLiveIn(SystemZ::R6D) &&
+ ZFI->getRestoreGPRRegs().LowGPR != SystemZ::R6D)
+ for (auto &MO : MRI->use_nodbg_operands(SystemZ::R6D))
+ MO.setIsKill(false);
}
// Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
@@ -478,15 +488,6 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
MFFrame.setStackSize(StackSize);
if (StackSize) {
- // Determine if we want to store a backchain.
- bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
-
- // If we need backchain, save current stack pointer. R1 is free at this
- // point.
- if (StoreBackchain)
- BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR))
- .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
-
// Allocate StackSize bytes.
int64_t Delta = -int64_t(StackSize);
const unsigned ProbeSize = TLI.getStackProbeSize(MF);
@@ -502,18 +503,20 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
.addImm(StackSize);
}
else {
+ bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
+ // If we need backchain, save current stack pointer. R1 is free at
+ // this point.
+ if (StoreBackchain)
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII);
+ if (StoreBackchain)
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
+ .addImm(getBackchainOffset(MF)).addReg(0);
}
SPOffsetFromCFA += Delta;
-
- if (StoreBackchain) {
- // The back chain is stored topmost with packed-stack.
- int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0;
- BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
- .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
- .addImm(Offset).addReg(0);
- }
}
if (HasFP) {
@@ -555,7 +558,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
Register IgnoredFrameReg;
int64_t Offset =
- getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg);
+ getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg)
+ .getFixed();
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, DwarfReg, SPOffsetFromCFA + Offset));
@@ -657,6 +661,13 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
.addMemOperand(MMO);
};
+ bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
+ if (StoreBackchain)
+ BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
+
+ MachineBasicBlock *DoneMBB = nullptr;
+ MachineBasicBlock *LoopMBB = nullptr;
if (NumFullBlocks < 3) {
// Emit unrolled probe statements.
for (unsigned int i = 0; i < NumFullBlocks; i++)
@@ -666,15 +677,16 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
uint64_t LoopAlloc = ProbeSize * NumFullBlocks;
SPOffsetFromCFA -= LoopAlloc;
- BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D)
+ // Use R0D to hold the exit value.
+ BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R0D)
.addReg(SystemZ::R15D);
- buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII);
- emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII);
+ buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R0D, ZII);
+ emitIncrement(*MBB, MBBI, DL, SystemZ::R0D, -int64_t(LoopAlloc), ZII);
buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc),
ZII);
- MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
- MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB);
+ DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
+ LoopMBB = SystemZ::emitBlockAfter(MBB);
MBB->addSuccessor(LoopMBB);
LoopMBB->addSuccessor(LoopMBB);
LoopMBB->addSuccessor(DoneMBB);
@@ -682,22 +694,29 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
MBB = LoopMBB;
allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/);
BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR))
- .addReg(SystemZ::R15D).addReg(SystemZ::R1D);
+ .addReg(SystemZ::R15D).addReg(SystemZ::R0D);
BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB);
MBB = DoneMBB;
MBBI = DoneMBB->begin();
buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII);
-
- recomputeLiveIns(*DoneMBB);
- recomputeLiveIns(*LoopMBB);
}
if (Residual)
allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/);
+ if (StoreBackchain)
+ BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
+ .addImm(getBackchainOffset(MF)).addReg(0);
+
StackAllocMI->eraseFromParent();
+ if (DoneMBB != nullptr) {
+ // Compute the live-in lists for the new blocks.
+ recomputeLiveIns(*DoneMBB);
+ recomputeLiveIns(*LoopMBB);
+ }
}
bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
@@ -715,14 +734,14 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return true;
}
-int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
+StackOffset
+SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
// Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so
// add that difference here.
- int64_t Offset =
- TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg);
- return Offset + SystemZMC::CallFrameSize;
+ StackOffset Offset =
+ TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg);
+ return Offset + StackOffset::getFixed(SystemZMC::CallFrameSize);
}
MachineBasicBlock::iterator SystemZFrameLowering::
@@ -765,8 +784,7 @@ getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {
int FI = ZFI->getFramePointerSaveIndex();
if (!FI) {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
- // The back chain is stored topmost with packed-stack.
- int Offset = usePackedStack(MF) ? -8 : -SystemZMC::CallFrameSize;
+ int Offset = getBackchainOffset(MF) - SystemZMC::CallFrameSize;
FI = MFFrame.CreateFixedObject(8, Offset, false);
ZFI->setFramePointerSaveIndex(FI);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 8752acc7e5ae..085c31ca0f18 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -9,8 +9,10 @@
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
class SystemZTargetMachine;
@@ -47,8 +49,8 @@ public:
MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
@@ -62,6 +64,12 @@ public:
int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const;
bool usePackedStack(MachineFunction &MF) const;
+
+ // Return the offset of the backchain.
+ unsigned getBackchainOffset(MachineFunction &MF) const {
+ // The back chain is stored topmost with packed-stack.
+ return usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index e2af02227999..c0a173df7ba2 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -74,8 +74,8 @@ unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
}
ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
-getHazardType(SUnit *m, int Stalls) {
- return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
+getHazardType(SUnit *SU, int Stalls) {
+ return (fitsIntoCurrentGroup(SU) ? NoHazard : Hazard);
}
void SystemZHazardRecognizer::Reset() {
@@ -179,7 +179,7 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
*SchedModel->getProcResource(PI->ProcResourceIdx);
std::string FU(PRD.Name);
// trim e.g. Z13_FXaUnit -> FXa
- FU = FU.substr(FU.find("_") + 1);
+ FU = FU.substr(FU.find('_') + 1);
size_t Pos = FU.find("Unit");
if (Pos != std::string::npos)
FU.resize(Pos);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
index 38bf41ebe96a..b2ee64a1bb4a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -113,7 +113,7 @@ public:
Reset();
}
- HazardType getHazardType(SUnit *m, int Stalls = 0) override;
+ HazardType getHazardType(SUnit *SU, int Stalls = 0) override;
void Reset() override;
void EmitInstruction(SUnit *SU) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 37328684399b..9d90a4940cba 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -338,6 +338,10 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// to X.
bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const;
+ // Return true if N (a load or a store) fullfills the alignment
+ // requirements for a PC-relative access.
+ bool storeLoadIsAligned(SDNode *N) const;
+
// Try to expand a boolean SELECT_CCMASK using an IPM sequence.
SDValue expandSelectBoolean(SDNode *Node);
@@ -1460,6 +1464,46 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
canUseBlockOperation(StoreA, LoadB);
}
+bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
+
+ auto *MemAccess = cast<LSBaseSDNode>(N);
+ TypeSize StoreSize = MemAccess->getMemoryVT().getStoreSize();
+ SDValue BasePtr = MemAccess->getBasePtr();
+ MachineMemOperand *MMO = MemAccess->getMemOperand();
+ assert(MMO && "Expected a memory operand.");
+
+ // The memory access must have a proper alignment and no index register.
+ if (MemAccess->getAlignment() < StoreSize ||
+ !MemAccess->getOffset().isUndef())
+ return false;
+
+ // The MMO must not have an unaligned offset.
+ if (MMO->getOffset() % StoreSize != 0)
+ return false;
+
+ // An access to GOT or the Constant Pool is aligned.
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue())
+ if ((PSV->isGOT() || PSV->isConstantPool()))
+ return true;
+
+ // Check the alignment of a Global Address.
+ if (BasePtr.getNumOperands())
+ if (GlobalAddressSDNode *GA =
+ dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0))) {
+ // The immediate offset must be aligned.
+ if (GA->getOffset() % StoreSize != 0)
+ return false;
+
+ // The alignment of the symbol itself must be at least the store size.
+ const GlobalValue *GV = GA->getGlobal();
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ if (GV->getPointerAlignment(DL).value() < StoreSize)
+ return false;
+ }
+
+ return true;
+}
+
void SystemZDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index eb1e51341ec4..270134d84c61 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -164,6 +164,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
++I) {
MVT VT = MVT::SimpleValueType(I);
if (isTypeLegal(VT)) {
+ setOperationAction(ISD::ABS, VT, Legal);
+
// Expand individual DIV and REMs into DIVREMs.
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
@@ -283,10 +285,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
- // FIXME: Can we support these natively?
+ // Expand 128 bit shifts without using a libcall.
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
// We have native instructions for i8, i16 and i32 extensions, but not i1.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -358,6 +363,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUB, VT, Legal);
if (VT != MVT::v2i64)
setOperationAction(ISD::MUL, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::AND, VT, Legal);
setOperationAction(ISD::OR, VT, Legal);
setOperationAction(ISD::XOR, VT, Legal);
@@ -784,10 +790,11 @@ bool SystemZVectorConstantInfo::isVectorConstantLegal(
SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
-
- // Find the smallest splat.
SplatBits = FPImm.bitcastToAPInt();
unsigned Width = SplatBits.getBitWidth();
+ IntBits <<= (SystemZ::VectorBits - Width);
+
+ // Find the smallest splat.
while (Width > 8) {
unsigned HalfSize = Width / 2;
APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
@@ -981,16 +988,16 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
return false;
- unsigned FromBits = FromType->getPrimitiveSizeInBits();
- unsigned ToBits = ToType->getPrimitiveSizeInBits();
+ unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
+ unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
return FromBits > ToBits;
}
bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
if (!FromVT.isInteger() || !ToVT.isInteger())
return false;
- unsigned FromBits = FromVT.getSizeInBits();
- unsigned ToBits = ToVT.getSizeInBits();
+ unsigned FromBits = FromVT.getFixedSizeInBits();
+ unsigned ToBits = ToVT.getFixedSizeInBits();
return FromBits > ToBits;
}
@@ -1543,6 +1550,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy(MF.getDataLayout());
+ LLVMContext &Ctx = *DAG.getContext();
// Detect unsupported vector argument and return types.
if (Subtarget.hasVector()) {
@@ -1552,7 +1560,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+ SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
// We don't support GuaranteedTailCallOpt, only automatically-detected
@@ -1577,14 +1585,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
- SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
+ unsigned ArgIndex = Outs[I].OrigArgIndex;
+ EVT SlotVT;
+ if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
+ // Allocate the full stack space for a promoted (and split) argument.
+ Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
+ EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
+ MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
+ unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
+ SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
+ } else {
+ SlotVT = Outs[I].ArgVT;
+ }
+ SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
MachinePointerInfo::getFixedStack(MF, FI)));
// If the original argument was split (e.g. i128), we need
// to store all parts of it here (and pass just one address).
- unsigned ArgIndex = Outs[I].OrigArgIndex;
assert (Outs[I].PartOffset == 0);
while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
SDValue PartValue = OutVals[I + 1];
@@ -1594,6 +1613,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
MemOpChains.push_back(
DAG.getStore(Chain, DL, PartValue, Address,
MachinePointerInfo::getFixedStack(MF, FI)));
+ assert((PartOffset + PartValue.getValueType().getStoreSize() <=
+ SlotVT.getStoreSize()) && "Not enough space for argument part!");
++I;
}
ArgValue = SpillSlot;
@@ -1687,7 +1708,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RetLocs;
- CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
+ CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
// Copy all of the result registers out of their specified physreg.
@@ -2285,7 +2306,8 @@ static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
C.Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
- if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
+ if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
+ C.Op0.getValueSizeInBits().getFixedSize()) {
unsigned Type = L->getExtensionType();
if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
(Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
@@ -2958,7 +2980,7 @@ static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
// Return the absolute or negative absolute of Op; IsNegative decides which.
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
bool IsNegative) {
- Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
+ Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
if (IsNegative)
Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
DAG.getConstant(0, DL, Op.getValueType()), Op);
@@ -3414,14 +3436,14 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
// If user has set the no alignment function attribute, ignore
// alloca alignments.
- uint64_t AlignVal = (RealignOpt ?
- dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
+ uint64_t AlignVal =
+ (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
uint64_t StackAlign = TFI->getStackAlignment();
uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
- unsigned SPReg = getStackPointerRegisterToSaveRestore();
+ Register SPReg = getStackPointerRegisterToSaveRestore();
SDValue NeededSpace = Size;
// Get a reference to the stack pointer.
@@ -3430,7 +3452,8 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
// If we need a backchain, save it now.
SDValue Backchain;
if (StoreBackchain)
- Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
+ Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
+ MachinePointerInfo());
// Add extra space for alignment if needed.
if (ExtraAlignSpace)
@@ -3467,7 +3490,8 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
}
if (StoreBackchain)
- Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
+ Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
+ MachinePointerInfo());
SDValue Ops[2] = { Result, Chain };
return DAG.getMergeValues(Ops, DL);
@@ -4090,13 +4114,15 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
if (StoreBackchain) {
SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
- Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
+ Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
+ MachinePointerInfo());
}
Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
if (StoreBackchain)
- Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
+ Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
+ MachinePointerInfo());
return Chain;
}
@@ -5557,7 +5583,6 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(TLS_LDCALL);
OPCODE(PCREL_WRAPPER);
OPCODE(PCREL_OFFSET);
- OPCODE(IABS);
OPCODE(ICMP);
OPCODE(FCMP);
OPCODE(STRICT_FCMP);
@@ -6815,8 +6840,7 @@ static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
KnownBits RHSKnown =
DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
- Known.Zero = LHSKnown.Zero & RHSKnown.Zero;
- Known.One = LHSKnown.One & RHSKnown.One;
+ Known = KnownBits::commonBits(LHSKnown, RHSKnown);
}
void
@@ -7246,6 +7270,15 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
+ // ISel pattern matching also adds a load memory operand of the same
+ // address, so take special care to find the storing memory operand.
+ MachineMemOperand *MMO = nullptr;
+ for (auto *I : MI.memoperands())
+ if (I->isStore()) {
+ MMO = I;
+ break;
+ }
+
// Use STOCOpcode if possible. We could use different store patterns in
// order to avoid matching the index register, but the performance trade-offs
// might be more complicated in that case.
@@ -7253,15 +7286,6 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
if (Invert)
CCMask ^= CCValid;
- // ISel pattern matching also adds a load memory operand of the same
- // address, so take special care to find the storing memory operand.
- MachineMemOperand *MMO = nullptr;
- for (auto *I : MI.memoperands())
- if (I->isStore()) {
- MMO = I;
- break;
- }
-
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
.addReg(SrcReg)
.add(Base)
@@ -7306,7 +7330,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
.addReg(SrcReg)
.add(Base)
.addImm(Disp)
- .addReg(IndexReg);
+ .addReg(IndexReg)
+ .addMemOperand(MMO);
MBB->addSuccessor(JoinMBB);
MI.eraseFromParent();
@@ -8140,6 +8165,16 @@ MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
return DoneMBB;
}
+SDValue SystemZTargetLowering::
+getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto *TFL =
+ static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ SDLoc DL(SP);
+ return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
+ DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
+}
+
MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 27637762296a..955587da626f 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -50,9 +50,6 @@ enum NodeType : unsigned {
// as a register base.
PCREL_OFFSET,
- // Integer absolute.
- IABS,
-
// Integer comparisons. There are three operands: the two values
// to compare, and an integer of type SystemZICMP.
ICMP,
@@ -701,6 +698,8 @@ private:
MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+ SDValue getBackchainAddress(SDValue SP, SelectionDAG &DAG) const;
+
MachineMemOperand::Flags
getTargetMMOFlags(const Instruction &I) const override;
const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 50f1e09c6ee5..95e94c4c8e1c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -1764,6 +1764,55 @@ class DirectiveInsnSSF<dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{35-32} = enc{35-32};
}
+class DirectiveInsnVRI<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstVRIe<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnVRR<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstVRRc<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnVRS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstVRSc<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnVRV<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstVRV<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnVRX<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstVRX<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnVSI<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstVSI<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+
//===----------------------------------------------------------------------===//
// Variants of instructions with condition mask
//===----------------------------------------------------------------------===//
@@ -1862,6 +1911,11 @@ class ICV<string name>
!cast<CondVariant>("IntCondVariant"#name).suffix,
!cast<CondVariant>("IntCondVariant"#name).alternate>;
+// Defines a class that makes it easier to define
+// a MnemonicAlias when CondVariant's are involved.
+class MnemonicCondBranchAlias<CondVariant V, string from, string to>
+ : MnemonicAlias<!subst("#", V.suffix, from), !subst("#", V.suffix, to)>;
+
//===----------------------------------------------------------------------===//
// Instruction definitions with semantics
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 223cfcba2fac..bf01c262afe1 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -752,11 +752,14 @@ bool SystemZInstrInfo::PredicateInstruction(
return true;
}
if (Opcode == SystemZ::CallBR) {
- const uint32_t *RegMask = MI.getOperand(0).getRegMask();
+ MachineOperand Target = MI.getOperand(0);
+ const uint32_t *RegMask = MI.getOperand(1).getRegMask();
+ MI.RemoveOperand(1);
MI.RemoveOperand(0);
MI.setDesc(get(SystemZ::CallBCR));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(CCValid).addImm(CCMask)
+ .add(Target)
.addRegMask(RegMask)
.addReg(SystemZ::CC, RegState::Implicit);
return true;
@@ -999,7 +1002,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
unsigned Opcode = MI.getOpcode();
// Check CC liveness if new instruction introduces a dead def of CC.
- MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
+ MCRegUnitIterator CCUnit(MCRegister::from(SystemZ::CC), TRI);
SlotIndex MISlot = SlotIndex();
LiveRange *CCLiveRange = nullptr;
bool CCLiveAtMI = true;
@@ -1196,7 +1199,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
if (RC == &SystemZ::VR32BitRegClass || RC == &SystemZ::VR64BitRegClass) {
Register Reg = MI.getOperand(I).getReg();
Register PhysReg = Register::isVirtualRegister(Reg)
- ? (VRM ? VRM->getPhys(Reg) : Register())
+ ? (VRM ? Register(VRM->getPhys(Reg)) : Register())
: Reg;
if (!PhysReg ||
!(SystemZ::FP32BitRegClass.contains(PhysReg) ||
@@ -1242,7 +1245,8 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
else {
Register DstReg = MI.getOperand(0).getReg();
Register DstPhys =
- (Register::isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
+ (Register::isVirtualRegister(DstReg) ? Register(VRM->getPhys(DstReg))
+ : DstReg);
Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()
: ((OpNum == 1 && MI.isCommutable())
? MI.getOperand(2).getReg()
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index d5d56ecf6e47..6e4f9e7f4922 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -101,10 +101,20 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
}
}
-// NOPs. These are again variants of the conditional branches,
-// with the condition mask set to "never".
+// NOPs. These are again variants of the conditional branches, with the
+// condition mask set to "never". NOP_bare can't be an InstAlias since it
+// would need R0D hard coded which is not part of ADDR64BitRegClass.
def NOP : InstAlias<"nop\t$XBD", (BCAsm 0, bdxaddr12only:$XBD), 0>;
+let isAsmParserOnly = 1, hasNoSchedulingInfo = 1, M1 = 0, XBD2 = 0 in
+ def NOP_bare : InstRXb<0x47,(outs), (ins), "nop", []>;
def NOPR : InstAlias<"nopr\t$R", (BCRAsm 0, GR64:$R), 0>;
+def NOPR_bare : InstAlias<"nopr", (BCRAsm 0, R0D), 0>;
+
+// An alias of BRC 0, label
+def JNOP : InstAlias<"jnop\t$RI2", (BRCAsm 0, brtarget16:$RI2), 0>;
+
+// An alias of BRCL 0, label
+def JGNOP : InstAlias<"jgnop\t$RI2", (BRCLAsm 0, brtarget32:$RI2), 0>;
// Fused compare-and-branch instructions.
//
@@ -280,33 +290,32 @@ let isCall = 1, Defs = [R14D, CC] in {
[(z_tls_ldcall tglobaltlsaddr:$I2)]>;
}
-// Sibling calls. Indirect sibling calls must be via R1, since R2 upwards
-// are argument registers and since branching to R0 is a no-op.
+// Sibling calls.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def CallJG : Alias<6, (outs), (ins pcrel32:$I2),
[(z_sibcall pcrel32:$I2)]>;
- let Uses = [R1D] in
- def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
+ def CallBR : Alias<2, (outs), (ins ADDR64:$R2),
+ [(z_sibcall ADDR64:$R2)]>;
}
// Conditional sibling calls.
let CCMaskFirst = 1, isCall = 1, isTerminator = 1, isReturn = 1 in {
def CallBRCL : Alias<6, (outs), (ins cond4:$valid, cond4:$R1,
pcrel32:$I2), []>;
- let Uses = [R1D] in
- def CallBCR : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+ def CallBCR : Alias<2, (outs), (ins cond4:$valid, cond4:$R1,
+ ADDR64:$R2), []>;
}
// Fused compare and conditional sibling calls.
-let isCall = 1, isTerminator = 1, isReturn = 1, Uses = [R1D] in {
- def CRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
- def CGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
- def CIBCall : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
- def CGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
- def CLRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
- def CLGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
- def CLIBCall : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
- def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
+let isCall = 1, isTerminator = 1, isReturn = 1 in {
+ def CRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3, ADDR64:$R4), []>;
+ def CGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3, ADDR64:$R4), []>;
+ def CIBCall : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3, ADDR64:$R4), []>;
+ def CGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3, ADDR64:$R4), []>;
+ def CLRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3, ADDR64:$R4), []>;
+ def CLGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3, ADDR64:$R4), []>;
+ def CLIBCall : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3, ADDR64:$R4), []>;
+ def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3, ADDR64:$R4), []>;
}
// A return instruction (br %r14).
@@ -828,16 +837,13 @@ def GOT : Alias<6, (outs GR64:$R1), (ins),
let Defs = [CC] in {
let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
- def LPR : UnaryRR <"lpr", 0x10, z_iabs, GR32, GR32>;
- def LPGR : UnaryRRE<"lpgr", 0xB900, z_iabs, GR64, GR64>;
+ def LPR : UnaryRR <"lpr", 0x10, abs, GR32, GR32>;
+ def LPGR : UnaryRRE<"lpgr", 0xB900, abs, GR64, GR64>;
}
let CCValues = 0xE, CompareZeroCCMask = 0xE in
def LPGFR : UnaryRRE<"lpgfr", 0xB910, null_frag, GR64, GR32>;
}
-def : Pat<(z_iabs32 GR32:$src), (LPR GR32:$src)>;
-def : Pat<(z_iabs64 GR64:$src), (LPGR GR64:$src)>;
-defm : SXU<z_iabs, LPGFR>;
-defm : SXU<z_iabs64, LPGFR>;
+defm : SXU<abs, LPGFR>;
let Defs = [CC] in {
let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
@@ -847,10 +853,7 @@ let Defs = [CC] in {
let CCValues = 0xE, CompareZeroCCMask = 0xE in
def LNGFR : UnaryRRE<"lngfr", 0xB911, null_frag, GR64, GR32>;
}
-def : Pat<(z_inegabs32 GR32:$src), (LNR GR32:$src)>;
-def : Pat<(z_inegabs64 GR64:$src), (LNGR GR64:$src)>;
-defm : SXU<z_inegabs, LNGFR>;
-defm : SXU<z_inegabs64, LNGFR>;
+defm : SXU<z_inegabs, LNGFR>;
let Defs = [CC] in {
let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
@@ -2242,6 +2245,31 @@ let isCodeGenOnly = 1, hasSideEffects = 1 in {
(ins imm64zx48:$enc, bdaddr12only:$BD1,
bdaddr12only:$BD2, AnyReg:$R3),
".insn ssf,$enc,$BD1,$BD2,$R3", []>;
+ def InsnVRI : DirectiveInsnVRI<(outs),
+ (ins imm64zx48:$enc, VR128:$V1, VR128:$V2,
+ imm32zx12:$I3, imm32zx4:$M4, imm32zx4:$M5),
+ ".insn vri,$enc,$V1,$V2,$I3,$M4,$M5", []>;
+ def InsnVRR : DirectiveInsnVRR<(outs),
+ (ins imm64zx48:$enc, VR128:$V1, VR128:$V2,
+ VR128:$V3, imm32zx4:$M4, imm32zx4:$M5,
+ imm32zx4:$M6),
+ ".insn vrr,$enc,$V1,$V2,$V3,$M4,$M5,$M6", []>;
+ def InsnVRS : DirectiveInsnVRS<(outs),
+ (ins imm64zx48:$enc, AnyReg:$R1, VR128:$V3,
+ bdaddr12only:$BD2, imm32zx4:$M4),
+ ".insn vrs,$enc,$BD2,$M4", []>;
+ def InsnVRV : DirectiveInsnVRV<(outs),
+ (ins imm64zx48:$enc, VR128:$V1,
+ bdvaddr12only:$VBD2, imm32zx4:$M3),
+ ".insn vrv,$enc,$V1,$VBD2,$M3", []>;
+ def InsnVRX : DirectiveInsnVRX<(outs),
+ (ins imm64zx48:$enc, VR128:$V1,
+ bdxaddr12only:$XBD2, imm32zx4:$M3),
+ ".insn vrx,$enc,$V1,$XBD2,$M3", []>;
+ def InsnVSI : DirectiveInsnVSI<(outs),
+ (ins imm64zx48:$enc, VR128:$V1,
+ bdaddr12only:$BD2, imm32zx8:$I3),
+ ".insn vsi,$enc,$V1,$BD2,$I3", []>;
}
//===----------------------------------------------------------------------===//
@@ -2315,3 +2343,25 @@ defm : BlockLoadStore<anyextloadi32, i64, MVCSequence, NCSequence, OCSequence,
XCSequence, 4>;
defm : BlockLoadStore<load, i64, MVCSequence, NCSequence, OCSequence,
XCSequence, 8>;
+
+//===----------------------------------------------------------------------===//
+// Mnemonic Aliases
+//===----------------------------------------------------------------------===//
+
+def JCT : MnemonicAlias<"jct", "brct">;
+def JCTG : MnemonicAlias<"jctg", "brctg">;
+def JAS : MnemonicAlias<"jas", "bras">;
+def JASL : MnemonicAlias<"jasl", "brasl">;
+def JXH : MnemonicAlias<"jxh", "brxh">;
+def JXLE : MnemonicAlias<"jxle", "brxle">;
+def JXHG : MnemonicAlias<"jxhg", "brxhg">;
+def JXLEG : MnemonicAlias<"jxleg", "brxlg">;
+
+def BRU : MnemonicAlias<"bru", "j">;
+def BRUL : MnemonicAlias<"brul", "jg">;
+
+foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+ "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+ def BRUAsm#V : MnemonicCondBranchAlias <CV<V>, "br#", "j#">;
+ def BRULAsm#V : MnemonicCondBranchAlias <CV<V>, "br#l", "jg#">;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index e73f1e429c3c..a85eb1623e1c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -571,10 +571,10 @@ let Predicates = [FeatureVector] in {
// Load positive.
def VLP : UnaryVRRaGeneric<"vlp", 0xE7DF>;
- def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8, v128b, v128b, 0>;
- def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>;
- def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;
- def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>;
+ def VLPB : UnaryVRRa<"vlpb", 0xE7DF, abs, v128b, v128b, 0>;
+ def VLPH : UnaryVRRa<"vlph", 0xE7DF, abs, v128h, v128h, 1>;
+ def VLPF : UnaryVRRa<"vlpf", 0xE7DF, abs, v128f, v128f, 2>;
+ def VLPG : UnaryVRRa<"vlpg", 0xE7DF, abs, v128g, v128g, 3>;
let isCommutable = 1 in {
// Maximum.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index 3fc25034dded..9bee5e8d1864 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -72,6 +72,7 @@ advanceTo(MachineBasicBlock::iterator NextBegin) {
}
void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
+ Available.clear(); // -misched-cutoff.
LLVM_DEBUG(HazardRec->dumpState(););
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
index 81af5fd854db..992b1512a077 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -259,7 +259,6 @@ def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall,
def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
SDT_ZWrapOffset, []>;
-def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>;
def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>;
def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>;
def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp,
@@ -572,10 +571,8 @@ def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
// Aligned loads.
class AlignedLoad<SDPatternOperator load>
- : PatFrag<(ops node:$addr), (load node:$addr), [{
- auto *Load = cast<LoadSDNode>(N);
- return Load->getAlignment() >= Load->getMemoryVT().getStoreSize();
-}]>;
+ : PatFrag<(ops node:$addr), (load node:$addr),
+ [{ return storeLoadIsAligned(N); }]>;
def aligned_load : AlignedLoad<load>;
def aligned_asextloadi16 : AlignedLoad<asextloadi16>;
def aligned_asextloadi32 : AlignedLoad<asextloadi32>;
@@ -584,10 +581,8 @@ def aligned_azextloadi32 : AlignedLoad<azextloadi32>;
// Aligned stores.
class AlignedStore<SDPatternOperator store>
- : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
- auto *Store = cast<StoreSDNode>(N);
- return Store->getAlignment() >= Store->getMemoryVT().getStoreSize();
-}]>;
+ : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr),
+ [{ return storeLoadIsAligned(N); }]>;
def aligned_store : AlignedStore<store>;
def aligned_truncstorei16 : AlignedStore<truncstorei16>;
def aligned_truncstorei32 : AlignedStore<truncstorei32>;
@@ -671,17 +666,7 @@ def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
}]>;
// Negative integer absolute.
-def z_inegabs : PatFrag<(ops node:$src), (ineg (z_iabs node:$src))>;
-
-// Integer absolute, matching the canonical form generated by DAGCombiner.
-def z_iabs32 : PatFrag<(ops node:$src),
- (xor (add node:$src, (sra node:$src, (i32 31))),
- (sra node:$src, (i32 31)))>;
-def z_iabs64 : PatFrag<(ops node:$src),
- (xor (add node:$src, (sra node:$src, (i32 63))),
- (sra node:$src, (i32 63)))>;
-def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
-def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
+def z_inegabs : PatFrag<(ops node:$src), (ineg (abs node:$src))>;
// Integer multiply-and-add
def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -898,16 +883,6 @@ def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>;
// Signed "integer less than zero" on vectors.
def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph immAllZerosV, node:$x)>;
-// Integer absolute on vectors.
-class z_viabs<int shift>
- : PatFrag<(ops node:$src),
- (xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))),
- (z_vsra_by_scalar node:$src, (i32 shift)))>;
-def z_viabs8 : z_viabs<7>;
-def z_viabs16 : z_viabs<15>;
-def z_viabs32 : z_viabs<31>;
-def z_viabs64 : z_viabs<63>;
-
// Sign-extend the i64 elements of a vector.
class z_vse<int shift>
: PatFrag<(ops node:$src),
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index fe2aaca8429a..5139cc39d2af 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -109,8 +109,9 @@ bool SystemZRegisterInfo::getRegAllocationHints(
auto tryAddHint = [&](const MachineOperand *MO) -> void {
Register Reg = MO->getReg();
- Register PhysReg =
- Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
+ Register PhysReg = Register::isPhysicalRegister(Reg)
+ ? Reg
+ : Register(VRM->getPhys(Reg));
if (PhysReg) {
if (MO->getSubReg())
PhysReg = getSubReg(PhysReg, MO->getSubReg());
@@ -265,8 +266,9 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// Decompose the frame index into a base and offset.
int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
Register BasePtr;
- int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) +
- MI->getOperand(FIOperandNum + 1).getImm());
+ int64_t Offset =
+ (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed() +
+ MI->getOperand(FIOperandNum + 1).getImm());
// Special handling of dbg_value instructions.
if (MI->isDebugValue()) {
@@ -321,8 +323,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// Load the high offset into the scratch register and use it as
// an index.
TII->loadImmediate(MBB, MI, ScratchReg, HighOffset);
- BuildMI(MBB, MI, DL, TII->get(SystemZ::AGR),ScratchReg)
- .addReg(ScratchReg, RegState::Kill).addReg(BasePtr);
+ BuildMI(MBB, MI, DL, TII->get(SystemZ::LA), ScratchReg)
+ .addReg(BasePtr, RegState::Kill).addImm(0).addReg(ScratchReg);
}
// Use the scratch register as the base. It then dies here.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
index b3266051da4e..de49106a5a60 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -204,7 +204,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>;
-def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>;
// Load and zero rightmost byte
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index df7282a2961b..5ea269cb891d 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -205,7 +205,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>;
-def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>;
// Load and zero rightmost byte
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
index 56ceb88f35d4..6a28aec6f846 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
@@ -206,7 +206,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>;
-def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>;
// Load and zero rightmost byte
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
index ca714ef1a702..9a306591a34f 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -182,7 +182,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLIL(F|H|L)$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LG(F|H)I$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LHI(Mux)?$")>;
-def : InstRW<[WLat1, FXU, NormalGr], (instregex "LR(Mux)?$")>;
+def : InstRW<[WLat1, FXU, NormalGr], (instregex "LR$")>;
// Load and test
def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXU, NormalGr], (instregex "LT(G)?$")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
index fb226be678da..f3ff1dfaba75 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -187,7 +187,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "LLIL(F|H|L)$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LG(F|H)I$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LHI(Mux)?$")>;
-def : InstRW<[WLat1, FXU, NormalGr], (instregex "LR(Mux)?$")>;
+def : InstRW<[WLat1, FXU, NormalGr], (instregex "LR$")>;
// Load and trap
def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "L(FH|G)?AT$")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 6b4f35e5ba2b..ca5ca7257bab 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -117,9 +117,8 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
return Chain1;
SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(1, DL, PtrVT));
- SDValue Chain2 =
- DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1),
- /* Alignment = */ 1);
+ SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2,
+ DstPtrInfo.getWithOffset(1), Align(1));
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 68e0b7ae66a4..d24e264b03a5 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -33,24 +33,32 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
if (CPUName.empty())
CPUName = "generic";
// Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
+ ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);
// -msoft-float implies -mno-vx.
if (HasSoftFloat)
HasVector = false;
+ // -mno-vx implicitly disables all vector-related features.
+ if (!HasVector) {
+ HasVectorEnhancements1 = false;
+ HasVectorEnhancements2 = false;
+ HasVectorPackedDecimal = false;
+ HasVectorPackedDecimalEnhancement = false;
+ }
+
return *this;
}
SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM)
- : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
- HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- HasPopulationCount(false), HasMessageSecurityAssist3(false),
- HasMessageSecurityAssist4(false), HasResetReferenceBitsMultiple(false),
- HasFastSerialization(false), HasInterlockedAccess1(false),
- HasMiscellaneousExtensions(false),
+ : SystemZGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false),
+ HasFPExtension(false), HasPopulationCount(false),
+ HasMessageSecurityAssist3(false), HasMessageSecurityAssist4(false),
+ HasResetReferenceBitsMultiple(false), HasFastSerialization(false),
+ HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
HasExecutionHint(false), HasLoadAndTrap(false),
HasTransactionalExecution(false), HasProcessorAssist(false),
HasDFPZonedConversion(false), HasEnhancedDAT2(false),
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 4b49c37fe4e6..3841063d2f61 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -112,7 +112,7 @@ public:
bool enableSubRegLiveness() const override;
// Automatically generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
// Return true if the target has the distinct-operands facility.
bool hasDistinctOps() const { return HasDistinctOps; }
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 3f467b200852..7b78dc4ad13a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -171,12 +171,10 @@ SystemZTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
// FIXME: This is related to the code below to reset the target options,
// we need to know whether or not the soft float flag is set on the
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 864200e5f71c..e7ac2391512f 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -64,8 +64,9 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
}
int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -340,8 +341,8 @@ unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses,
// Emit prefetch instructions for smaller strides in cases where we think
// the hardware prefetcher might not be able to keep up.
- if (NumStridedMemAccesses > 32 &&
- NumStridedMemAccesses == NumMemAccesses && !HasCall)
+ if (NumStridedMemAccesses > 32 && !HasCall &&
+ (NumMemAccesses - NumStridedMemAccesses) * 32 <= NumStridedMemAccesses)
return 1;
return ST->hasMiscellaneousExtensions3() ? 8192 : 2048;
@@ -592,8 +593,9 @@ static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) {
unsigned SystemZTTIImpl::
getVectorTruncCost(Type *SrcTy, Type *DstTy) {
assert (SrcTy->isVectorTy() && DstTy->isVectorTy());
- assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&
- "Packing must reduce size of vector type.");
+ assert(SrcTy->getPrimitiveSizeInBits().getFixedSize() >
+ DstTy->getPrimitiveSizeInBits().getFixedSize() &&
+ "Packing must reduce size of vector type.");
assert(cast<FixedVectorType>(SrcTy)->getNumElements() ==
cast<FixedVectorType>(DstTy)->getNumElements() &&
"Packing should not change number of elements.");
@@ -699,11 +701,12 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
}
int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
// FIXME: Can the logic below also be used for these cost kinds?
if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) {
- int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
return BaseCost == 0 ? BaseCost : 1;
}
@@ -786,8 +789,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values. Base implementation does not
// realize float->int gets scalarized.
- unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
- Src->getScalarType(), CostKind);
+ unsigned ScalarCost = getCastInstrCost(
+ Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind);
unsigned TotCost = VF * ScalarCost;
bool NeedsInserts = true, NeedsExtracts = true;
// FP128 registers do not get inserted or extracted.
@@ -828,7 +831,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
}
}
- return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
}
// Scalar i8 / i16 operations will typically be made after first extending
@@ -844,11 +847,11 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {
}
int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy,
+ Type *CondTy, CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
if (CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
if (!ValTy->isVectorTy()) {
switch (Opcode) {
@@ -860,7 +863,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
- C->getZExtValue() == 0)
+ C->isZero())
return 0;
unsigned Cost = 1;
@@ -924,7 +927,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
}
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
}
int SystemZTTIImpl::
@@ -1019,7 +1022,7 @@ isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) {
// Comparison between memory and immediate.
if (UserI->getOpcode() == Instruction::ICmp)
if (ConstantInt *CI = dyn_cast<ConstantInt>(UserI->getOperand(1)))
- if (isUInt<16>(CI->getZExtValue()))
+ if (CI->getValue().isIntN(16))
return true;
return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 7f8f7f6f923f..c97e099f9943 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -41,7 +41,8 @@ public:
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind);
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
@@ -93,9 +94,10 @@ public:
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
const Instruction *I);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::TargetCostKind CostKind,
+ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp
index eea0aeea2c45..81af4eead6d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
@@ -49,12 +50,23 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
// Reset various EH DWARF encodings.
PersonalityEncoding = LSDAEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr;
CallSiteEncoding = dwarf::DW_EH_PE_uleb128;
+
+ this->TM = &TM;
}
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
delete Mang;
}
+unsigned TargetLoweringObjectFile::getCallSiteEncoding() const {
+ // If target does not have LEB128 directives, we would need the
+ // call site encoding to be udata4 so that the alternative path
+ // for not having LEB128 directives could work.
+ if (!getContext().getAsmInfo()->hasLEB128Directives())
+ return dwarf::DW_EH_PE_udata4;
+ return CallSiteEncoding;
+}
+
static bool isNullOrUndef(const Constant *C) {
// Check that the constant isn't all zeros or undefs.
if (C->isNullValue() || isa<UndefValue>(C))
@@ -136,6 +148,52 @@ void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
const MCSymbol *Sym) const {
}
+void TargetLoweringObjectFile::emitCGProfileMetadata(MCStreamer &Streamer,
+ Module &M) const {
+ MCContext &C = getContext();
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+
+ MDNode *CFGProfile = nullptr;
+
+ for (const auto &MFE : ModuleFlags) {
+ StringRef Key = MFE.Key->getString();
+ if (Key == "CG Profile") {
+ CFGProfile = cast<MDNode>(MFE.Val);
+ break;
+ }
+ }
+
+ if (!CFGProfile)
+ return;
+
+ auto GetSym = [this](const MDOperand &MDO) -> MCSymbol * {
+ if (!MDO)
+ return nullptr;
+ auto *V = cast<ValueAsMetadata>(MDO);
+ const Function *F = cast<Function>(V->getValue()->stripPointerCasts());
+ if (F->hasDLLImportStorageClass())
+ return nullptr;
+ return TM->getSymbol(F);
+ };
+
+ for (const auto &Edge : CFGProfile->operands()) {
+ MDNode *E = cast<MDNode>(Edge);
+ const MCSymbol *From = GetSym(E->getOperand(0));
+ const MCSymbol *To = GetSym(E->getOperand(1));
+ // Skip null functions. This can happen if functions are dead stripped after
+ // the CGProfile pass has been run.
+ if (!From || !To)
+ continue;
+ uint64_t Count = cast<ConstantAsMetadata>(E->getOperand(2))
+ ->getValue()
+ ->getUniqueInteger()
+ .getZExtValue();
+ Streamer.emitCGProfileEntry(
+ MCSymbolRefExpr::create(From, MCSymbolRefExpr::VK_None, C),
+ MCSymbolRefExpr::create(To, MCSymbolRefExpr::VK_None, C), Count);
+ }
+}
/// getKindForGlobal - This is a top-level target-independent classifier for
/// a global object. Given a global variable and information from the TM, this
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
index 074e9fde79e6..2aee0e5c3fb8 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
@@ -93,34 +93,30 @@ static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) {
bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
const GlobalValue *GV) const {
- // If the IR producer requested that this GV be treated as dso local, obey.
- if (GV && GV->isDSOLocal())
- return true;
-
- // If we are not supossed to use a PLT, we cannot assume that intrinsics are
- // local since the linker can convert some direct access to access via plt.
- if (M.getRtLibUseGOT() && !GV)
- return false;
+ const Triple &TT = getTargetTriple();
+ Reloc::Model RM = getRelocationModel();
// According to the llvm language reference, we should be able to
// just return false in here if we have a GV, as we know it is
// dso_preemptable. At this point in time, the various IR producers
// have not been transitioned to always produce a dso_local when it
// is possible to do so.
- // In the case of intrinsics, GV is null and there is nowhere to put
- // dso_local. Returning false for those will produce worse code in some
- // architectures. For example, on x86 the caller has to set ebx before calling
- // a plt.
+ // In the case of ExternalSymbolSDNode, GV is null and we should just return
+ // false. However, COFF currently relies on this to be true
+ //
// As a result we still have some logic in here to improve the quality of the
// generated code.
// FIXME: Add a module level metadata for whether intrinsics should be assumed
// local.
+ if (!GV)
+ return TT.isOSBinFormatCOFF();
- Reloc::Model RM = getRelocationModel();
- const Triple &TT = getTargetTriple();
+ // If the IR producer requested that this GV be treated as dso local, obey.
+ if (GV->isDSOLocal())
+ return true;
// DLLImport explicitly marks the GV as external.
- if (GV && GV->hasDLLImportStorageClass())
+ if (GV->hasDLLImportStorageClass())
return false;
// On MinGW, variables that haven't been declared with DLLImport may still
@@ -128,14 +124,14 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
// don't assume the variables to be DSO local unless we actually know
// that for sure. This only has to be done for variables; for functions
// the linker can insert thunks for calling functions from another DLL.
- if (TT.isWindowsGNUEnvironment() && TT.isOSBinFormatCOFF() && GV &&
+ if (TT.isWindowsGNUEnvironment() && TT.isOSBinFormatCOFF() &&
GV->isDeclarationForLinker() && isa<GlobalVariable>(GV))
return false;
// On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols
// remain unresolved in the link, they can be resolved to zero, which is
// outside the current DSO.
- if (TT.isOSBinFormatCOFF() && GV && GV->hasExternalWeakLinkage())
+ if (TT.isOSBinFormatCOFF() && GV->hasExternalWeakLinkage())
return false;
// Every other GV is local on COFF.
@@ -147,20 +143,10 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
if (TT.isOSBinFormatCOFF() || TT.isOSWindows())
return true;
- // Most PIC code sequences that assume that a symbol is local cannot
- // produce a 0 if it turns out the symbol is undefined. While this
- // is ABI and relocation depended, it seems worth it to handle it
- // here.
- if (GV && isPositionIndependent() && GV->hasExternalWeakLinkage())
- return false;
-
- if (GV && !GV->hasDefaultVisibility())
- return true;
-
if (TT.isOSBinFormatMachO()) {
if (RM == Reloc::Static)
return true;
- return GV && GV->isStrongDefinitionForLinker();
+ return GV->isStrongDefinitionForLinker();
}
// Due to the AIX linkage model, any global with default visibility is
@@ -170,40 +156,6 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm());
assert(RM != Reloc::DynamicNoPIC);
-
- bool IsExecutable =
- RM == Reloc::Static || M.getPIELevel() != PIELevel::Default;
- if (IsExecutable) {
- // If the symbol is defined, it cannot be preempted.
- if (GV && !GV->isDeclarationForLinker())
- return true;
-
- // A symbol marked nonlazybind should not be accessed with a plt. If the
- // symbol turns out to be external, the linker will convert a direct
- // access to an access via the plt, so don't assume it is local.
- const Function *F = dyn_cast_or_null<Function>(GV);
- if (F && F->hasFnAttribute(Attribute::NonLazyBind))
- return false;
- Triple::ArchType Arch = TT.getArch();
-
- // PowerPC prefers avoiding copy relocations.
- if (Arch == Triple::ppc || TT.isPPC64())
- return false;
-
- // Check if we can use copy relocations.
- if (!(GV && GV->isThreadLocal()) && RM == Reloc::Static)
- return true;
- } else if (TT.isOSBinFormatELF()) {
- // If dso_local allows AsmPrinter::getSymbolPreferLocal to use a local
- // alias, set the flag. We cannot set dso_local for other global values,
- // because otherwise direct accesses to a probably interposable symbol (even
- // if the codegen assumes not) will be rejected by the linker.
- if (!GV || !GV->canBenefitFromLocalAlias())
- return false;
- return TT.isX86() && M.noSemanticInterposition();
- }
-
- // ELF & wasm support preemption of other symbols.
return false;
}
@@ -281,3 +233,12 @@ TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
return TargetIRAnalysis(
[this](const Function &F) { return this->getTargetTransformInfo(F); });
}
+
+std::pair<int, int> TargetMachine::parseBinutilsVersion(StringRef Version) {
+ if (Version == "none")
+ return {INT_MAX, INT_MAX}; // Make binutilsIsAtLeast() return true.
+ std::pair<int, int> Ret;
+ if (!Version.consumeInteger(10, Ret.first) && Version.consume_front("."))
+ Version.consumeInteger(10, Ret.second);
+ return Ret;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index 7a899b4b38e2..a3309a68c76d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -125,6 +125,9 @@ static const MCPhysReg F128Regs[32] = {
VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
+static const MCPhysReg VM512Regs[8] = {VE::VMP0, VE::VMP1, VE::VMP2, VE::VMP3,
+ VE::VMP4, VE::VMP5, VE::VMP6, VE::VMP7};
+
static const MCPhysReg MISCRegs[31] = {
VE::USRCC, VE::PSW, VE::SAR, VE::NoRegister,
VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
@@ -277,6 +280,17 @@ public:
}
return false;
}
+ bool isUImm4() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return isUInt<4>(Value);
+ }
+ return false;
+ }
bool isUImm6() {
if (!isImm())
return false;
@@ -476,6 +490,10 @@ public:
addImmOperands(Inst, N);
}
+ void addUImm4Operands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
void addUImm6Operands(MCInst &Inst, unsigned N) const {
addImmOperands(Inst, N);
}
@@ -648,6 +666,15 @@ public:
return true;
}
+ static bool MorphToVM512Reg(VEOperand &Op) {
+ unsigned Reg = Op.getReg();
+ unsigned regIdx = Reg - VE::VM0;
+ if (regIdx % 2 || regIdx > 15)
+ return false;
+ Op.Reg.RegNum = VM512Regs[regIdx / 2];
+ return true;
+ }
+
static bool MorphToMISCReg(VEOperand &Op) {
const auto *ConstExpr = dyn_cast<MCConstantExpr>(Op.getImm());
if (!ConstExpr)
@@ -902,6 +929,24 @@ StringRef VEAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
Mnemonic = parseRD(Name, 10, NameLoc, Operands);
} else if (Name.startswith("cvt.l.d")) {
Mnemonic = parseRD(Name, 7, NameLoc, Operands);
+ } else if (Name.startswith("vcvt.w.d.sx") || Name.startswith("vcvt.w.d.zx") ||
+ Name.startswith("vcvt.w.s.sx") || Name.startswith("vcvt.w.s.zx")) {
+ Mnemonic = parseRD(Name, 11, NameLoc, Operands);
+ } else if (Name.startswith("vcvt.l.d")) {
+ Mnemonic = parseRD(Name, 8, NameLoc, Operands);
+ } else if (Name.startswith("pvcvt.w.s.lo") ||
+ Name.startswith("pvcvt.w.s.up")) {
+ Mnemonic = parseRD(Name, 12, NameLoc, Operands);
+ } else if (Name.startswith("pvcvt.w.s")) {
+ Mnemonic = parseRD(Name, 9, NameLoc, Operands);
+ } else if (Name.startswith("vfmk.l.") || Name.startswith("vfmk.w.") ||
+ Name.startswith("vfmk.d.") || Name.startswith("vfmk.s.")) {
+ bool ICC = Name[5] == 'l' || Name[5] == 'w' ? true : false;
+ Mnemonic = parseCC(Name, 7, Name.size(), ICC, true, NameLoc, Operands);
+ } else if (Name.startswith("pvfmk.w.lo.") || Name.startswith("pvfmk.w.up.") ||
+ Name.startswith("pvfmk.s.lo.") || Name.startswith("pvfmk.s.up.")) {
+ bool ICC = Name[6] == 'l' || Name[6] == 'w' ? true : false;
+ Mnemonic = parseCC(Name, 11, Name.size(), ICC, true, NameLoc, Operands);
} else {
Operands->push_back(VEOperand::CreateToken(Mnemonic, NameLoc));
}
@@ -1362,9 +1407,38 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
return ResTy;
switch (getLexer().getKind()) {
- case AsmToken::LParen:
- // FIXME: Parsing "(" + %vreg + ", " + %vreg + ")"
- // FALLTHROUGH
+ case AsmToken::LParen: {
+ // Parsing "(" + %vreg + ", " + %vreg + ")"
+ const AsmToken Tok1 = Parser.getTok();
+ Parser.Lex(); // Eat the '('.
+
+ unsigned RegNo1;
+ SMLoc S1, E1;
+ if (tryParseRegister(RegNo1, S1, E1) != MatchOperand_Success) {
+ getLexer().UnLex(Tok1);
+ return MatchOperand_NoMatch;
+ }
+
+ if (!Parser.getTok().is(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+ Parser.Lex(); // Eat the ','.
+
+ unsigned RegNo2;
+ SMLoc S2, E2;
+ if (tryParseRegister(RegNo2, S2, E2) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ if (!Parser.getTok().is(AsmToken::RParen))
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(VEOperand::CreateToken(Tok1.getString(), Tok1.getLoc()));
+ Operands.push_back(VEOperand::CreateReg(RegNo1, S1, E1));
+ Operands.push_back(VEOperand::CreateReg(RegNo2, S2, E2));
+ Operands.push_back(VEOperand::CreateToken(Parser.getTok().getString(),
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the ')'.
+ break;
+ }
default: {
std::unique_ptr<VEOperand> Op;
ResTy = parseVEAsmOperand(Op);
@@ -1377,7 +1451,24 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
if (!Parser.getTok().is(AsmToken::LParen))
break;
- // FIXME: Parsing %vec-reg + "(" + %sclar-reg/number + ")"
+ // Parsing %vec-reg + "(" + %sclar-reg/number + ")"
+ std::unique_ptr<VEOperand> Op1 = VEOperand::CreateToken(
+ Parser.getTok().getString(), Parser.getTok().getLoc());
+ Parser.Lex(); // Eat the '('.
+
+ std::unique_ptr<VEOperand> Op2;
+ ResTy = parseVEAsmOperand(Op2);
+ if (ResTy != MatchOperand_Success || !Op2)
+ return MatchOperand_ParseFail;
+
+ if (!Parser.getTok().is(AsmToken::RParen))
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(std::move(Op1));
+ Operands.push_back(std::move(Op2));
+ Operands.push_back(VEOperand::CreateToken(Parser.getTok().getString(),
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the ')'.
break;
}
}
@@ -1445,6 +1536,10 @@ unsigned VEAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
if (Op.isReg() && VEOperand::MorphToF128Reg(Op))
return MCTargetAsmParser::Match_Success;
break;
+ case MCK_VM512:
+ if (Op.isReg() && VEOperand::MorphToVM512Reg(Op))
+ return MCTargetAsmParser::Match_Success;
+ break;
case MCK_MISC:
if (Op.isImm() && VEOperand::MorphToMISCReg(Op))
return MCTargetAsmParser::Match_Success;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
index 35885a4e3cae..20d609bc6b32 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
@@ -47,7 +47,7 @@ static MCDisassembler *createVEDisassembler(const Target &T,
return new VEDisassembler(STI, Ctx);
}
-extern "C" void LLVMInitializeVEDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheVETarget(),
createVEDisassembler);
@@ -95,6 +95,25 @@ static const unsigned F128RegDecoderTable[] = {
VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
+static const unsigned V64RegDecoderTable[] = {
+ VE::V0, VE::V1, VE::V2, VE::V3, VE::V4, VE::V5, VE::V6, VE::V7,
+ VE::V8, VE::V9, VE::V10, VE::V11, VE::V12, VE::V13, VE::V14, VE::V15,
+ VE::V16, VE::V17, VE::V18, VE::V19, VE::V20, VE::V21, VE::V22, VE::V23,
+ VE::V24, VE::V25, VE::V26, VE::V27, VE::V28, VE::V29, VE::V30, VE::V31,
+ VE::V32, VE::V33, VE::V34, VE::V35, VE::V36, VE::V37, VE::V38, VE::V39,
+ VE::V40, VE::V41, VE::V42, VE::V43, VE::V44, VE::V45, VE::V46, VE::V47,
+ VE::V48, VE::V49, VE::V50, VE::V51, VE::V52, VE::V53, VE::V54, VE::V55,
+ VE::V56, VE::V57, VE::V58, VE::V59, VE::V60, VE::V61, VE::V62, VE::V63};
+
+static const unsigned VMRegDecoderTable[] = {
+ VE::VM0, VE::VM1, VE::VM2, VE::VM3, VE::VM4, VE::VM5,
+ VE::VM6, VE::VM7, VE::VM8, VE::VM9, VE::VM10, VE::VM11,
+ VE::VM12, VE::VM13, VE::VM14, VE::VM15};
+
+static const unsigned VM512RegDecoderTable[] = {VE::VMP0, VE::VMP1, VE::VMP2,
+ VE::VMP3, VE::VMP4, VE::VMP5,
+ VE::VMP6, VE::VMP7};
+
static const unsigned MiscRegDecoderTable[] = {
VE::USRCC, VE::PSW, VE::SAR, VE::NoRegister,
VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
@@ -145,6 +164,40 @@ static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeV64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Reg = VE::NoRegister;
+ if (RegNo == 255)
+ Reg = VE::VIX;
+ else if (RegNo > 63)
+ return MCDisassembler::Fail;
+ else
+ Reg = V64RegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVMRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ unsigned Reg = VMRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVM512RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo % 2 || RegNo > 15)
+ return MCDisassembler::Fail;
+ unsigned Reg = VM512RegDecoderTable[RegNo / 2];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp b/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp
new file mode 100644
index 000000000000..c4588926af9e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp
@@ -0,0 +1,137 @@
+//===-- LVLGen.cpp - LVL instruction generator ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VE.h"
+#include "VESubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lvl-gen"
+
+namespace {
+struct LVLGen : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ static char ID;
+ LVLGen() : MachineFunctionPass(ID) {}
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ unsigned getVL(const MachineInstr &MI);
+ int getVLIndex(unsigned Opcode);
+};
+char LVLGen::ID = 0;
+
+} // end of anonymous namespace
+
+FunctionPass *llvm::createLVLGenPass() { return new LVLGen; }
+
+int LVLGen::getVLIndex(unsigned Opcode) {
+ const MCInstrDesc &MCID = TII->get(Opcode);
+
+ // If an instruction has VLIndex information, return it.
+ if (HAS_VLINDEX(MCID.TSFlags))
+ return GET_VLINDEX(MCID.TSFlags);
+
+ return -1;
+}
+
+// returns a register holding a vector length. NoRegister is returned when
+// this MI does not have a vector length.
+unsigned LVLGen::getVL(const MachineInstr &MI) {
+ int Index = getVLIndex(MI.getOpcode());
+ if (Index >= 0)
+ return MI.getOperand(Index).getReg();
+
+ return VE::NoRegister;
+}
+
+bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+#define RegName(no) \
+ (MBB.getParent()->getSubtarget<VESubtarget>().getRegisterInfo()->getName(no))
+
+ bool Changed = false;
+ bool HasRegForVL = false;
+ unsigned RegForVL;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+ MachineBasicBlock::iterator MI = I;
+
+ // Check whether MI uses a vector length operand. If so, we prepare for VL
+ // register. We would like to reuse VL register as much as possible. We
+ // also would like to keep the number of LEA instructions as fewer as
+ // possible. Therefore, we use a regular scalar register to hold immediate
+ // values to load VL register. And try to reuse identical scalar registers
+ // to avoid new LVLr instructions as much as possible.
+ unsigned Reg = getVL(*MI);
+ if (Reg != VE::NoRegister) {
+ LLVM_DEBUG(dbgs() << "Vector instruction found: ");
+ LLVM_DEBUG(MI->dump());
+ LLVM_DEBUG(dbgs() << "Vector length is " << RegName(Reg) << ". ");
+ LLVM_DEBUG(dbgs() << "Current VL is "
+ << (HasRegForVL ? RegName(RegForVL) : "unknown")
+ << ". ");
+
+ if (!HasRegForVL || RegForVL != Reg) {
+ // Use VL, but a different value in a different scalar register.
+ // So, generate new LVL instruction just before the current instruction.
+ LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
+ << RegName(Reg) << ".\n");
+ BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
+ HasRegForVL = true;
+ RegForVL = Reg;
+ Changed = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
+ }
+ }
+ // Check the update of a given scalar register holding an immediate value
+ // for VL register. Also, a call doesn't preserve VL register.
+ if (HasRegForVL) {
+ if (MI->definesRegister(RegForVL, TRI) ||
+ MI->modifiesRegister(RegForVL, TRI) ||
+ MI->killsRegister(RegForVL, TRI) || MI->isCall()) {
+ // The latest VL is needed to be updated, so disable HasRegForVL.
+ LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: ");
+ LLVM_DEBUG(MI->dump());
+ HasRegForVL = false;
+ }
+ }
+
+ ++I;
+ }
+ return Changed;
+}
+
+bool LVLGen::runOnMachineFunction(MachineFunction &F) {
+ LLVM_DEBUG(dbgs() << "********** Begin LVLGen **********\n");
+ LLVM_DEBUG(dbgs() << "********** Function: " << F.getName() << '\n');
+ LLVM_DEBUG(F.dump());
+
+ bool Changed = false;
+
+ const VESubtarget &Subtarget = F.getSubtarget<VESubtarget>();
+ TII = Subtarget.getInstrInfo();
+ TRI = Subtarget.getRegisterInfo();
+
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+
+ if (Changed) {
+ LLVM_DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(F.dump());
+ }
+ LLVM_DEBUG(dbgs() << "********** End LVLGen **********\n");
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
index 657cc513b3c5..6995007c6dc6 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
@@ -29,6 +29,7 @@ public:
const MCSubtargetInfo &STI, raw_ostream &OS) override;
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
bool printAliasInstr(const MCInst *, uint64_t Address,
const MCSubtargetInfo &, raw_ostream &);
void printInstruction(const MCInst *, uint64_t, const MCSubtargetInfo &,
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
index a39cffc8f4a6..4c480c050274 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -56,8 +56,8 @@ static MCRegisterInfo *createVEMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *createVEMCSubtargetInfo(const Triple &TT, StringRef CPU,
StringRef FS) {
if (CPU.empty())
- CPU = "ve";
- return createVEMCSubtargetInfoImpl(TT, CPU, FS);
+ CPU = "generic";
+ return createVEMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS);
}
static MCTargetStreamer *
@@ -80,7 +80,7 @@ static MCInstPrinter *createVEMCInstPrinter(const Triple &T,
return new VEInstPrinter(MAI, MII, MRI);
}
-extern "C" void LLVMInitializeVETargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(getTheVETarget(), createVEMCAsmInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
index 65bd142fe0db..a95a299def88 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
@@ -16,7 +16,7 @@ Target &llvm::getTheVETarget() {
return TheVETarget;
}
-extern "C" void LLVMInitializeVETargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetInfo() {
RegisterTarget<Triple::ve, /*HasJIT=*/false> X(getTheVETarget(), "ve",
"VE", "VE");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.h b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
index 7ed7797cbb83..8c1fa840f19c 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
@@ -29,6 +29,7 @@ class MachineInstr;
FunctionPass *createVEISelDag(VETargetMachine &TM);
FunctionPass *createVEPromoteToI1Pass();
+FunctionPass *createLVLGenPass();
void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP);
@@ -333,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) {
return true;
}
// (m)1 patterns
- return (Val & (1UL << 63)) && isShiftedMask_64(Val);
+ return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val);
}
inline static bool isMImm32Val(uint32_t Val) {
@@ -346,7 +347,25 @@ inline static bool isMImm32Val(uint32_t Val) {
return true;
}
// (m)1 patterns
- return (Val & (1 << 31)) && isShiftedMask_32(Val);
+ return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val);
+}
+
+/// val2MImm - Convert an integer immediate value to target MImm immediate.
+inline static uint64_t val2MImm(uint64_t Val) {
+ if (Val == 0)
+ return 0; // (0)1
+ if (Val & (UINT64_C(1) << 63))
+ return countLeadingOnes(Val); // (m)1
+ return countLeadingZeros(Val) | 0x40; // (m)0
+}
+
+/// mimm2Val - Convert a target MImm immediate to an integer immediate value.
+inline static uint64_t mimm2Val(uint64_t Val) {
+ if (Val == 0)
+ return 0; // (0)1
+ if ((Val & 0x40) == 0)
+ return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1
+ return ((uint64_t)INT64_C(-1) >> (Val & 0x3f)); // (m)0
}
inline unsigned M0(unsigned Val) { return Val + 64; }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.td b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
index 617a6ea458b6..9e8adcd42077 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
@@ -18,6 +18,9 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// VE Subtarget features.
//
+def FeatureEnableVPU
+ : SubtargetFeature<"vpu", "EnableVPU", "true",
+ "Enable the VPU">;
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
@@ -43,7 +46,7 @@ def VEAsmParser : AsmParser {
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
-def : Proc<"ve", []>;
+def : Proc<"generic", []>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
index 86e3aa3d3fa1..08a75b6b8c55 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -60,6 +60,9 @@ public:
static const char *getRegisterName(unsigned RegNo) {
return VEInstPrinter::getRegisterName(RegNo);
}
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &OS);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) override;
};
} // end of anonymous namespace
@@ -203,7 +206,7 @@ void VEAsmPrinter::lowerGETGOTAndEmitMCInsts(const MachineInstr *MI,
// lea %got, _GLOBAL_OFFSET_TABLE_@PC_LO(-24)
// and %got, %got, (32)0
// sic %plt
- // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%got, %plt)
+ // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%plt, %got)
MCOperand cim24 = MCOperand::createImm(-24);
MCOperand loImm =
createGOTRelExprOp(VEMCExpr::VK_VE_PC_LO32, GOTLabel, OutContext);
@@ -248,10 +251,10 @@ void VEAsmPrinter::lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI,
MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT
- // lea %dst, %plt_lo(func)(-24)
+ // lea %dst, func@plt_lo(-24)
// and %dst, %dst, (32)0
// sic %plt ; FIXME: is it safe to use %plt here?
- // lea.sl %dst, %plt_hi(func)(%dst, %plt)
+ // lea.sl %dst, func@plt_hi(%plt, %dst)
MCOperand cim24 = MCOperand::createImm(-24);
MCOperand loImm =
createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, AddrSym, OutContext);
@@ -295,7 +298,7 @@ void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
// lea %s0, sym@tls_gd_lo(-24)
// and %s0, %s0, (32)0
// sic %lr
- // lea.sl %s0, sym@tls_gd_hi(%s0, %lr)
+ // lea.sl %s0, sym@tls_gd_hi(%lr, %s0)
// lea %s12, __tls_get_addr@plt_lo(8)
// and %s12, %s12, (32)0
// lea.sl %s12, __tls_get_addr@plt_hi(%s12, %lr)
@@ -349,7 +352,42 @@ void VEAsmPrinter::emitInstruction(const MachineInstr *MI) {
} while ((++I != E) && I->isInsideBundle()); // Delay slot check.
}
+void VEAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+}
+
+// PrintAsmOperand - Print out an operand for an inline asm expression.
+bool VEAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
+ case 'r':
+ case 'v':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
// Force static initialization.
-extern "C" void LLVMInitializeVEAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEAsmPrinter() {
RegisterAsmPrinter<VEAsmPrinter> X(getTheVETarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
index 4f04dae884ab..93899c2cae3d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
@@ -14,71 +14,133 @@
// Aurora VE
//===----------------------------------------------------------------------===//
def CC_VE_C_Stack: CallingConv<[
- // float --> need special handling like below.
- // 0 4
- // +------+------+
- // | empty| float|
- // +------+------+
- CCIfType<[f32], CCCustom<"allocateFloat">>,
+ // F128 are assigned to the stack in 16-byte aligned units
+ CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>,
// All of the rest are assigned to the stack in 8-byte aligned units.
CCAssignToStack<0, 8>
]>;
-def CC_VE : CallingConv<[
+///// C Calling Convention (VE ABI v2.1) /////
+//
+// Reference: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v2.1.pdf
+//
+def CC_VE_C : CallingConv<[
// All arguments get passed in generic registers if there is space.
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-
- // bool, char, int, enum, long --> generic integer 32 bit registers
- CCIfType<[i32], CCAssignToRegWithShadow<
- [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
- [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+ // Promote i1/i8/i16/i32 arguments to i64.
+ CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>,
- // float --> generic floating point 32 bit registers
- CCIfType<[f32], CCAssignToRegWithShadow<
- [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
- [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+ // Convert float arguments to i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ CCIfType<[f32], CCBitConvertToType<i64>>,
- // long long/double --> generic 64 bit registers
+ // bool, char, int, enum, long, long long, float, double
+ // --> generic 64 bit registers
CCIfType<[i64, f64],
CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+ // long double --> pair of generic 64 bit registers
+ //
+ // NOTE: If Q1 is allocated while SX1 is free, llvm tries to allocate SX1 for
+ // following operands, this masks SX1 to avoid such behavior.
+ CCIfType<[f128],
+ CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3],
+ [SX0, SX1, SX3, SX5]>>,
+
// Alternatively, they are assigned to the stack in 8-byte aligned units.
CCDelegateTo<CC_VE_C_Stack>
]>;
+///// Standard vararg C Calling Convention (VE ABI v2.1) /////
// All arguments get passed in stack for varargs function or non-prototyped
// function.
def CC_VE2 : CallingConv<[
- // float --> need special handling like below.
- // 0 4
+ // Promote i1/i8/i16/i32 arguments to i64.
+ CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Convert float arguments to i64 with padding.
+ // 63 31 0
// +------+------+
- // | empty| float|
+ // | float| 0 |
// +------+------+
- CCIfType<[f32], CCCustom<"allocateFloat">>,
+ CCIfType<[f32], CCBitConvertToType<i64>>,
+
+ // F128 are assigned to the stack in 16-byte aligned units
+ CCIfType<[f128], CCAssignToStack<16, 16>>,
CCAssignToStack<0, 8>
]>;
-def RetCC_VE : CallingConv<[
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+def RetCC_VE_C : CallingConv<[
+ // Promote i1/i8/i16/i32 return values to i64.
+ CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>,
- // bool, char, int, enum, long --> generic integer 32 bit registers
- CCIfType<[i32], CCAssignToRegWithShadow<
- [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
- [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
-
- // float --> generic floating point 32 bit registers
- CCIfType<[f32], CCAssignToRegWithShadow<
- [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
- [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+ // Convert float return values to i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ CCIfType<[f32], CCBitConvertToType<i64>>,
- // long long/double --> generic 64 bit registers
+ // bool, char, int, enum, long, long long, float, double
+ // --> generic 64 bit registers
CCIfType<[i64, f64],
CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+ // long double --> pair of generic 64 bit registers
+ CCIfType<[f128],
+ CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3],
+ [SX0, SX1, SX3, SX5]>>,
+]>;
+
+///// Custom fastcc /////
+//
+// This passes vector params and return values in registers. Scalar values are
+// handled conforming to the standard cc.
+def CC_VE_Fast : CallingConv<[
+ // vector --> generic vector registers
+ CCIfType<[v256i32, v256f32, v256i64, v256f64],
+ CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+ // TODO: make this conditional on packed mode
+ CCIfType<[v512i32, v512f32],
+ CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+
+ // vector mask --> generic vector mask registers
+ CCIfType<[v256i1],
+ CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>,
+
+ // pair of vector mask --> generic vector mask registers
+ CCIfType<[v512i1],
+ CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
+ [VM1, VM3, VM5]>>,
+
+ // Follow the standard C CC for scalars.
+ CCDelegateTo<CC_VE_C>
+]>;
+
+def RetCC_VE_Fast : CallingConv<[
+ // vector --> generic vector registers
+ CCIfType<[v256i32, v256f32, v256i64, v256f64],
+ CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+ // TODO: make this conditional on packed mode
+ CCIfType<[v512i32, v512f32],
+ CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+
+ // vector mask --> generic vector mask registers
+ CCIfType<[v256i1],
+ CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>,
+
+ // pair of vector mask --> generic vector mask registers
+ CCIfType<[v512i1],
+ CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
+ [VM1, VM3, VM5]>>,
+
+ // Follow the standard C CC for scalars.
+ CCDelegateTo<RetCC_VE_C>
]>;
// Callee-saved registers
@@ -86,4 +148,6 @@ def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
def CSR_NoRegs : CalleeSavedRegs<(add)>;
// PreserveAll (clobbers s62,s63) - used for ve_grow_stack
-def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>;
+def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61),
+ (sequence "V%u", 0, 63),
+ (sequence "VM%u", 1, 15))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
index 8b10e6466123..9e97d0eca833 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -8,6 +8,105 @@
//
// This file contains the VE implementation of TargetFrameLowering class.
//
+// On VE, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// | | Higher address
+// |----------------------------------------------|
+// | Parameter area for this function |
+// |----------------------------------------------|
+// | Register save area (RSA) for this function |
+// |----------------------------------------------|
+// | Return address for this function |
+// |----------------------------------------------|
+// | Frame pointer for this function |
+// |----------------------------------------------| <- sp
+// | | Lower address
+//
+// VE doesn't use on demand stack allocation, so user code generated by LLVM
+// needs to call VEOS to allocate stack frame. VE's ABI want to reduce the
+// number of VEOS calls, so ABI requires to allocate not only RSA (in general
+// CSR, callee saved register) area but also call frame at the prologue of
+// caller function.
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that technically the last frame area (VLAs) doesn't get created until
+// in the main function body, after the prologue is run. However, it's depicted
+// here for completeness.
+//
+// | | Higher address
+// |----------------------------------------------|
+// | Parameter area for this function |
+// |----------------------------------------------|
+// | Register save area (RSA) for this function |
+// |----------------------------------------------|
+// | Return address for this function |
+// |----------------------------------------------|
+// | Frame pointer for this function |
+// |----------------------------------------------| <- fp(=old sp)
+// |.empty.space.to.make.part.below.aligned.in....|
+// |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
+// |.alignment....................................| unknown at compile time)
+// |----------------------------------------------|
+// | Local variables of fixed size including spill|
+// | slots |
+// |----------------------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables.(VLAs)........| LLVM chooses SX17)
+// |..............................................| (size of this area is
+// |..............................................| unknown at compile time)
+// |----------------------------------------------| <- stack top (returned by
+// | Parameter area for callee | alloca)
+// |----------------------------------------------|
+// | Register save area (RSA) for callee |
+// |----------------------------------------------|
+// | Return address for callee |
+// |----------------------------------------------|
+// | Frame pointer for callee |
+// |----------------------------------------------| <- sp
+// | | Lower address
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitely needed when there are both VLAs and local
+// variables with more-than-default alignment requirements.
+// * A frame pointer is definitely needed when there are local variables with
+// more-than-default alignment requirements.
+//
+// In addition, VE ABI defines RSA frame, return address, and frame pointer
+// as follows:
+//
+// |----------------------------------------------| <- sp+176
+// | %s18...%s33 |
+// |----------------------------------------------| <- sp+48
+// | Linkage area register (%s17) |
+// |----------------------------------------------| <- sp+40
+// | Procedure linkage table register (%plt=%s16) |
+// |----------------------------------------------| <- sp+32
+// | Global offset table register (%got=%s15) |
+// |----------------------------------------------| <- sp+24
+// | Thread pointer register (%tp=%s14) |
+// |----------------------------------------------| <- sp+16
+// | Return address |
+// |----------------------------------------------| <- sp+8
+// | Frame pointer |
+// |----------------------------------------------| <- sp+0
+//
+// NOTE: This description is based on VE ABI and description in
+// AArch64FrameLowering.cpp. Thanks a lot.
//===----------------------------------------------------------------------===//
#include "VEFrameLowering.h"
@@ -38,48 +137,47 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
MachineBasicBlock::iterator MBBI,
uint64_t NumBytes,
bool RequireFPUpdate) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ DebugLoc DL;
+ const VEInstrInfo &TII = *STI.getInstrInfo();
- DebugLoc dl;
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
// Insert following codes here as prologue
//
- // st %fp, 0(,%sp)
- // st %lr, 8(,%sp)
- // st %got, 24(,%sp)
- // st %plt, 32(,%sp)
- // st %s17, 40(,%sp) iff this function is using s17 as BP
- // or %fp, 0, %sp
-
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(0)
- .addReg(VE::SX9);
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(8)
- .addReg(VE::SX10);
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(24)
- .addReg(VE::SX15);
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(32)
- .addReg(VE::SX16);
+ // st %fp, 0(, %sp) iff !isLeafProc
+ // st %lr, 8(, %sp) iff !isLeafProc
+ // st %got, 24(, %sp) iff hasGOT
+ // st %plt, 32(, %sp) iff hasGOT
+ // st %s17, 40(, %sp) iff hasBP
+ if (!FuncInfo->isLeafProc()) {
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(0)
+ .addReg(VE::SX9);
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(8)
+ .addReg(VE::SX10);
+ }
+ if (hasGOT(MF)) {
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(24)
+ .addReg(VE::SX15);
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(32)
+ .addReg(VE::SX16);
+ }
if (hasBP(MF))
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
.addReg(VE::SX11)
.addImm(0)
.addImm(40)
.addReg(VE::SX17);
- BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
- .addReg(VE::SX11)
- .addImm(0);
}
void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
@@ -87,43 +185,42 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
MachineBasicBlock::iterator MBBI,
uint64_t NumBytes,
bool RequireFPUpdate) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ DebugLoc DL;
+ const VEInstrInfo &TII = *STI.getInstrInfo();
- DebugLoc dl;
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
// Insert following codes here as epilogue
//
- // or %sp, 0, %fp
- // ld %s17, 40(,%sp) iff this function is using s17 as BP
- // ld %got, 32(,%sp)
- // ld %plt, 24(,%sp)
- // ld %lr, 8(,%sp)
- // ld %fp, 0(,%sp)
-
- BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
- .addReg(VE::SX9)
- .addImm(0);
+ // ld %s17, 40(, %sp) iff hasBP
+ // ld %plt, 32(, %sp) iff hasGOT
+ // ld %got, 24(, %sp) iff hasGOT
+ // ld %lr, 8(, %sp) iff !isLeafProc
+ // ld %fp, 0(, %sp) iff !isLeafProc
if (hasBP(MF))
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX17)
.addReg(VE::SX11)
.addImm(0)
.addImm(40);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16)
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(32);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX15)
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(24);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX10)
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(8);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX9)
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(0);
+ if (hasGOT(MF)) {
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX16)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(32);
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX15)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(24);
+ }
+ if (!FuncInfo->isLeafProc()) {
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX10)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(8);
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX9)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(0);
+ }
}
void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
@@ -131,37 +228,44 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock::iterator MBBI,
int64_t NumBytes,
MaybeAlign MaybeAlign) const {
- DebugLoc dl;
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ DebugLoc DL;
+ const VEInstrInfo &TII = *STI.getInstrInfo();
- if (NumBytes >= -64 && NumBytes < 63) {
- BuildMI(MBB, MBBI, dl, TII.get(VE::ADDSLri), VE::SX11)
+ if (NumBytes == 0) {
+ // Nothing to do here.
+ } else if (isInt<7>(NumBytes)) {
+ // adds.l %s11, NumBytes@lo, %s11
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ADDSLri), VE::SX11)
.addReg(VE::SX11)
.addImm(NumBytes);
- return;
+ } else if (isInt<32>(NumBytes)) {
+ // lea %s11, NumBytes@lo(, %s11)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LEArii), VE::SX11)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(Lo_32(NumBytes));
+ } else {
+ // Emit following codes. This clobbers SX13 which we always know is
+ // available here.
+ // lea %s13, NumBytes@lo
+ // and %s13, %s13, (32)0
+ // lea.sl %sp, NumBytes@hi(%s13, %sp)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LEAzii), VE::SX13)
+ .addImm(0)
+ .addImm(0)
+ .addImm(Lo_32(NumBytes));
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ANDrm), VE::SX13)
+ .addReg(VE::SX13)
+ .addImm(M0(32));
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LEASLrri), VE::SX11)
+ .addReg(VE::SX11)
+ .addReg(VE::SX13)
+ .addImm(Hi_32(NumBytes));
}
- // Emit following codes. This clobbers SX13 which we always know is
- // available here.
- // lea %s13,%lo(NumBytes)
- // and %s13,%s13,(32)0
- // lea.sl %sp,%hi(NumBytes)(%sp, %s13)
- BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzii), VE::SX13)
- .addImm(0)
- .addImm(0)
- .addImm(Lo_32(NumBytes));
- BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX13)
- .addReg(VE::SX13)
- .addImm(M0(32));
- BuildMI(MBB, MBBI, dl, TII.get(VE::LEASLrri), VE::SX11)
- .addReg(VE::SX11)
- .addReg(VE::SX13)
- .addImm(Hi_32(NumBytes));
-
if (MaybeAlign) {
// and %sp, %sp, Align-1
- BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ANDrm), VE::SX11)
.addReg(VE::SX11)
.addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value())));
}
@@ -169,9 +273,8 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {
- DebugLoc dl;
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ DebugLoc DL;
+ const VEInstrInfo &TII = *STI.getInstrInfo();
// Emit following codes. It is not possible to insert multiple
// BasicBlocks in PEI pass, so we emit two pseudo instructions here.
@@ -198,22 +301,23 @@ void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
// EXTEND_STACK_GUARD pseudo will be simply eliminated by ExpandPostRA
// pass. This pseudo is required to be at the next of EXTEND_STACK
// pseudo in order to protect iteration loop in ExpandPostRA.
-
- BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK));
- BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK_GUARD));
+ BuildMI(MBB, MBBI, DL, TII.get(VE::EXTEND_STACK));
+ BuildMI(MBB, MBBI, DL, TII.get(VE::EXTEND_STACK_GUARD));
}
void VEFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineFrameInfo &MFI = MF.getFrameInfo();
const VEInstrInfo &TII = *STI.getInstrInfo();
const VERegisterInfo &RegInfo = *STI.getRegisterInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
+ bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
- DebugLoc dl;
- bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+ DebugLoc DL;
// FIXME: unfortunately, returning false from canRealignStack
// actually just causes needsStackRealignment to return false,
@@ -226,12 +330,17 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
"stack re-alignment, but LLVM couldn't handle it "
"(probably because it has a dynamic alloca).");
- // Get the number of bytes to allocate from the FrameInfo
+ // Get the number of bytes to allocate from the FrameInfo.
+ // This number of bytes is already aligned to ABI stack alignment.
uint64_t NumBytes = MFI.getStackSize();
- // The VE ABI requires a reserved 176 bytes area at the top
- // of stack as described in VESubtarget.cpp. So, we adjust it here.
- NumBytes = STI.getAdjustedFrameSize(NumBytes);
+ // Adjust stack size if this function is not a leaf function since the
+ // VE ABI requires a reserved area at the top of stack as described in
+ // VEFrameLowering.cpp.
+ if (!FuncInfo->isLeafProc()) {
+ // NOTE: The number is aligned to ABI stack alignment after adjustment.
+ NumBytes = STI.getAdjustedFrameSize(NumBytes);
+ }
// Finally, ensure that the size is sufficiently aligned for the
// data on the stack.
@@ -240,36 +349,34 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
// Update stack size with corrected value.
MFI.setStackSize(NumBytes);
- // Emit Prologue instructions to save %lr
+ // Emit Prologue instructions to save multiple registers.
emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
+ // Emit instructions to save SP in FP as follows if this is not a leaf
+ // function:
+ // or %fp, 0, %sp
+ if (!FuncInfo->isLeafProc())
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX9)
+ .addReg(VE::SX11)
+ .addImm(0);
+
// Emit stack adjust instructions
MaybeAlign RuntimeAlign =
NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None;
+ assert((RuntimeAlign == None || !FuncInfo->isLeafProc()) &&
+ "SP has to be saved in order to align variable sized stack object!");
emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign);
if (hasBP(MF)) {
// Copy SP to BP.
- BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX17)
.addReg(VE::SX11)
.addImm(0);
}
// Emit stack extend instructions
- emitSPExtend(MF, MBB, MBBI);
-
- Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true);
-
- // Emit ".cfi_def_cfa_register 30".
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // Emit ".cfi_window_save".
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (NumBytes != 0)
+ emitSPExtend(MF, MBB, MBBI);
}
MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
@@ -289,21 +396,33 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
void VEFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ DebugLoc DL;
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ const VEInstrInfo &TII = *STI.getInstrInfo();
uint64_t NumBytes = MFI.getStackSize();
- // Emit Epilogue instructions to restore %lr
+ // Emit instructions to retrieve original SP.
+ if (!FuncInfo->isLeafProc()) {
+ // If SP is saved in FP, retrieve it as follows:
+ // or %sp, 0, %fp iff !isLeafProc
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX11)
+ .addReg(VE::SX9)
+ .addImm(0);
+ } else {
+ // Emit stack adjust instructions.
+ emitSPAdjustment(MF, MBB, MBBI, NumBytes, None);
+ }
+
+ // Emit Epilogue instructions to restore multiple registers.
emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
}
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas
-// or if frame pointer elimination is disabled. For the case of VE, we don't
-// implement FP eliminator yet, but we returns false from this function to
-// not refer fp from generated code.
+// or if frame pointer elimination is disabled.
bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -320,34 +439,41 @@ bool VEFrameLowering::hasBP(const MachineFunction &MF) const {
return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
}
-int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
+bool VEFrameLowering::hasGOT(const MachineFunction &MF) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+
+ // If a global base register is assigned (!= 0), GOT is used.
+ return FuncInfo->getGlobalBaseReg() != 0;
+}
+
+StackOffset VEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const VERegisterInfo *RegInfo = STI.getRegisterInfo();
- const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
bool isFixed = MFI.isFixedObjectIndex(FI);
int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
- if (FuncInfo->isLeafProc()) {
- // If there's a leaf proc, all offsets need to be %sp-based,
- // because we haven't caused %fp to actually point to our frame.
+ if (!hasFP(MF)) {
+ // If FP is not used, frame indexies are based on a %sp regiter.
FrameReg = VE::SX11; // %sp
- return FrameOffset + MF.getFrameInfo().getStackSize();
+ return StackOffset::getFixed(FrameOffset +
+ MF.getFrameInfo().getStackSize());
}
if (RegInfo->needsStackRealignment(MF) && !isFixed) {
- // If there is dynamic stack realignment, all local object
- // references need to be via %sp or %s17 (bp), to take account
- // of the re-alignment.
+ // If data on stack require realignemnt, frame indexies are based on a %sp
+ // or %s17 (bp) register. If there is a variable sized object, bp is used.
if (hasBP(MF))
FrameReg = VE::SX17; // %bp
else
FrameReg = VE::SX11; // %sp
- return FrameOffset + MF.getFrameInfo().getStackSize();
+ return StackOffset::getFixed(FrameOffset +
+ MF.getFrameInfo().getStackSize());
}
- // Finally, default to using %fp.
+ // Use %fp by default.
FrameReg = RegInfo->getFrameRegister(MF);
- return FrameOffset;
+ return StackOffset::getFixed(FrameOffset);
}
bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {
@@ -367,8 +493,10 @@ void VEFrameLowering::determineCalleeSaves(MachineFunction &MF,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
- if (isLeafProc(MF)) {
- VEMachineFunctionInfo *MFI = MF.getInfo<VEMachineFunctionInfo>();
- MFI->setLeafProc(true);
+ // Functions having BP need to emit prologue and epilogue to allocate local
+ // buffer on the stack even if the function is a leaf function.
+ if (isLeafProc(MF) && !hasBP(MF)) {
+ VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ FuncInfo->setLeafProc(true);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
index b548d663c504..99eb41189b25 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
@@ -15,6 +15,7 @@
#include "VE.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -38,8 +39,10 @@ public:
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
- bool hasBP(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const override;
+ bool hasBP(const MachineFunction &MF) const;
+ bool hasGOT(const MachineFunction &MF) const;
+
// VE reserves argument space always for call sites in the function
// immediately on entry of the current function.
bool hasReservedCallFrame(const MachineFunction &MF) const override {
@@ -48,8 +51,8 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index f3d067d55fdb..761baa79b4ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -113,15 +113,6 @@ inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
return Val;
}
-/// convMImmVal - Convert a mimm integer immediate value to target immediate.
-inline static uint64_t convMImmVal(uint64_t Val) {
- if (Val == 0)
- return 0; // (0)1
- if (Val & (1UL << 63))
- return countLeadingOnes(Val); // (m)1
- return countLeadingZeros(Val) | 0x40; // (m)0
-}
-
//===--------------------------------------------------------------------===//
/// VEDAGToDAGISel - VE specific code to select VE machine
/// instructions for SelectionDAG operations.
@@ -148,6 +139,7 @@ public:
bool selectADDRzri(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
bool selectADDRzii(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
bool selectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+ bool selectADDRzi(SDValue N, SDValue &Base, SDValue &Offset);
StringRef getPassName() const override {
return "VE DAG->DAG Pattern Instruction Selection";
@@ -183,6 +175,14 @@ bool VEDAGToDAGISel::selectADDRrri(SDValue Addr, SDValue &Base, SDValue &Index,
return false;
}
if (matchADDRrr(Addr, LHS, RHS)) {
+ // If the input is a pair of a frame-index and a register, move a
+ // frame-index to LHS. This generates MI with following operands.
+ // %dest, #FI, %reg, offset
+ // In the eliminateFrameIndex, above MI is converted to the following.
+ // %dest, %fp, %reg, fi_offset + offset
+ if (dyn_cast<FrameIndexSDNode>(RHS))
+ std::swap(LHS, RHS);
+
if (matchADDRri(RHS, Index, Offset)) {
Base = LHS;
return true;
@@ -228,7 +228,7 @@ bool VEDAGToDAGISel::selectADDRzii(SDValue Addr, SDValue &Base, SDValue &Index,
Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
return false; // direct calls.
- if (ConstantSDNode *CN = cast<ConstantSDNode>(Addr)) {
+ if (auto *CN = dyn_cast<ConstantSDNode>(Addr)) {
if (isInt<32>(CN->getSExtValue())) {
Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
@@ -250,6 +250,26 @@ bool VEDAGToDAGISel::selectADDRri(SDValue Addr, SDValue &Base,
return true;
}
+bool VEDAGToDAGISel::selectADDRzi(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (dyn_cast<FrameIndexSDNode>(Addr))
+ return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false; // direct calls.
+
+ if (auto *CN = dyn_cast<ConstantSDNode>(Addr)) {
+ if (isInt<32>(CN->getSExtValue())) {
+ Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ Offset =
+ CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
bool VEDAGToDAGISel::matchADDRrr(SDValue Addr, SDValue &Base, SDValue &Index) {
if (dyn_cast<FrameIndexSDNode>(Addr))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
index ab720545dd83..d377f8e27cfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -13,6 +13,7 @@
#include "VEISelLowering.h"
#include "MCTargetDesc/VEMCExpr.h"
+#include "VEInstrBuilder.h"
#include "VEMachineFunctionInfo.h"
#include "VERegisterInfo.h"
#include "VETargetMachine.h"
@@ -21,6 +22,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -38,39 +40,280 @@ using namespace llvm;
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
-static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- switch (LocVT.SimpleTy) {
- case MVT::f32: {
- // Allocate stack like below
- // 0 4
- // +------+------+
- // | empty| float|
- // +------+------+
- // Use align=8 for dummy area to align the beginning of these 2 area.
- State.AllocateStack(4, Align(8)); // for empty area
- // Use align=4 for value to place it at just after the dummy area.
- unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return true;
- }
+#include "VEGenCallingConv.inc"
+
+CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
+ switch (CallConv) {
default:
- return false;
+ return RetCC_VE_C;
+ case CallingConv::Fast:
+ return RetCC_VE_Fast;
}
}
-#include "VEGenCallingConv.inc"
+CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
+ if (IsVarArg)
+ return CC_VE2;
+ switch (CallConv) {
+ default:
+ return CC_VE_C;
+ case CallingConv::Fast:
+ return CC_VE_Fast;
+ }
+}
bool VETargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
- CCAssignFn *RetCC = RetCC_VE;
+ CCAssignFn *RetCC = getReturnCC(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC);
}
+static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
+ MVT::v256f32, MVT::v512f32, MVT::v256f64};
+
+static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
+
+void VETargetLowering::initRegisterClasses() {
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &VE::I32RegClass);
+ addRegisterClass(MVT::i64, &VE::I64RegClass);
+ addRegisterClass(MVT::f32, &VE::F32RegClass);
+ addRegisterClass(MVT::f64, &VE::I64RegClass);
+ addRegisterClass(MVT::f128, &VE::F128RegClass);
+
+ if (Subtarget->enableVPU()) {
+ for (MVT VecVT : AllVectorVTs)
+ addRegisterClass(VecVT, &VE::V64RegClass);
+ addRegisterClass(MVT::v256i1, &VE::VMRegClass);
+ addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
+ }
+}
+
+void VETargetLowering::initSPUActions() {
+ const auto &TM = getTargetMachine();
+ /// Load & Store {
+
+ // VE doesn't have i1 sign extending load.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
+
+ // VE doesn't have floating point extload/truncstore, so expand them.
+ for (MVT FPVT : MVT::fp_valuetypes()) {
+ for (MVT OtherFPVT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
+ setTruncStoreAction(FPVT, OtherFPVT, Expand);
+ }
+ }
+
+ // VE doesn't have fp128 load/store, so expand them in custom lower.
+ setOperationAction(ISD::LOAD, MVT::f128, Custom);
+ setOperationAction(ISD::STORE, MVT::f128, Custom);
+
+ /// } Load & Store
+
+ // Custom legalize address nodes into LO/HI parts.
+ MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
+ setOperationAction(ISD::BlockAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+ setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+ setOperationAction(ISD::JumpTable, PtrVT, Custom);
+
+ /// VAARG handling {
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ // VAARG needs to be lowered to access with 8 bytes alignment.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ /// } VAARG handling
+
+ /// Stack {
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ /// } Stack
+
+ /// Branch {
+
+ // VE doesn't have BRCOND
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ // BR_JT is not implemented yet.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ /// } Branch
+
+ /// Int Ops {
+ for (MVT IntVT : {MVT::i32, MVT::i64}) {
+ // VE has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, IntVT, Expand);
+ setOperationAction(ISD::SREM, IntVT, Expand);
+ setOperationAction(ISD::SDIVREM, IntVT, Expand);
+ setOperationAction(ISD::UDIVREM, IntVT, Expand);
+
+ // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
+ setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
+ setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
+ setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
+
+ // VE has no MULHU/S or U/SMUL_LOHI operations.
+ // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
+ setOperationAction(ISD::MULHU, IntVT, Expand);
+ setOperationAction(ISD::MULHS, IntVT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
+
+ // VE has no CTTZ, ROTL, ROTR operations.
+ setOperationAction(ISD::CTTZ, IntVT, Expand);
+ setOperationAction(ISD::ROTL, IntVT, Expand);
+ setOperationAction(ISD::ROTR, IntVT, Expand);
+
+ // VE has 64 bits instruction which works as i64 BSWAP operation. This
+ // instruction works fine as i32 BSWAP operation with an additional
+ // parameter. Use isel patterns to lower BSWAP.
+ setOperationAction(ISD::BSWAP, IntVT, Legal);
+
+ // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
+ // operations. Use isel patterns for i64, promote for i32.
+ LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
+ setOperationAction(ISD::BITREVERSE, IntVT, Act);
+ setOperationAction(ISD::CTLZ, IntVT, Act);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
+ setOperationAction(ISD::CTPOP, IntVT, Act);
+
+ // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
+ // Use isel patterns for i64, promote for i32.
+ setOperationAction(ISD::AND, IntVT, Act);
+ setOperationAction(ISD::OR, IntVT, Act);
+ setOperationAction(ISD::XOR, IntVT, Act);
+ }
+ /// } Int Ops
+
+ /// Conversion {
+ // VE doesn't have instructions for fp<->uint, so expand them by llvm
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+
+ // fp16 not supported
+ for (MVT FPVT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
+ setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
+ }
+ /// } Conversion
+
+ /// Floating-point Ops {
+ /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
+ /// and fcmp.
+
+ // VE doesn't have following floating point operations.
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ }
+
+ // VE doesn't have fdiv of f128.
+ setOperationAction(ISD::FDIV, MVT::f128, Expand);
+
+ for (MVT FPVT : {MVT::f32, MVT::f64}) {
+ // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
+ setOperationAction(ISD::ConstantFP, FPVT, Legal);
+ }
+ /// } Floating-point Ops
+
+ /// Floating-point math functions {
+
+ // VE doesn't have following floating point math functions.
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ }
+
+ /// } Floating-point math functions
+
+ /// Atomic instructions {
+
+ setMaxAtomicSizeInBitsSupported(64);
+ setMinCmpXchgSizeInBits(32);
+ setSupportsUnalignedAtomics(false);
+
+ // Use custom inserter for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ // Other atomic instructions.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ // Support i8/i16 atomic swap.
+ setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
+
+ // FIXME: Support "atmam" instructions.
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
+
+ // VE doesn't have follwing instructions.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
+ }
+
+ /// } Atomic instructions
+
+ /// SJLJ instructions {
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
+ if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
+ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
+ /// } SJLJ instructions
+
+ // Intrinsic instructions
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+}
+
+void VETargetLowering::initVPUActions() {
+ for (MVT LegalVecVT : AllVectorVTs) {
+ setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
+ // Translate all vector instructions with legal element types to VVP_*
+ // nodes.
+ // TODO We will custom-widen into VVP_* nodes in the future. While we are
+ // buildling the infrastructure for this, we only do this for legal vector
+ // VTs.
+#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
+ setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
+#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
+ setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
+#include "VVPNodes.def"
+ }
+
+ for (MVT LegalPackedVT : AllPackedVTs) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
+ }
+}
+
SDValue
VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
@@ -85,7 +328,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
*DAG.getContext());
// Analyze return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_VE);
+ CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -94,6 +337,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
+ assert(!VA.needsCustom() && "Unexpected custom lowering");
SDValue OutVal = OutVals[i];
// Integer return values must be sign or zero extended by the callee.
@@ -109,12 +353,26 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
case CCValAssign::AExt:
OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
break;
+ case CCValAssign::BCvt: {
+ // Convert a float return value to i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT() == MVT::f32);
+ SDValue Undef = SDValue(
+ DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
+ SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+ OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ MVT::i64, Undef, OutVal, Sub_f32),
+ 0);
+ break;
+ }
default:
llvm_unreachable("Unknown loc info!");
}
- assert(!VA.needsCustom() && "Unexpected custom lowering");
-
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
// Guarantee that all emitted copies are stuck together with flags.
@@ -138,7 +396,7 @@ SDValue VETargetLowering::LowerFormalArguments(
MachineFunction &MF = DAG.getMachineFunction();
// Get the base offset of the incoming arguments stack space.
- unsigned ArgsBaseOffset = 176;
+ unsigned ArgsBaseOffset = Subtarget->getRsaSize();
// Get the size of the preserved arguments area
unsigned ArgsPreserved = 64;
@@ -150,10 +408,11 @@ SDValue VETargetLowering::LowerFormalArguments(
CCInfo.AllocateStack(ArgsPreserved, Align(8));
// We already allocated the preserved area, so the stack offset computed
// by CC_VE would be correct now.
- CCInfo.AnalyzeFormalArguments(Ins, CC_VE);
+ CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
+ assert(!VA.needsCustom() && "Unexpected custom lowering");
if (VA.isRegLoc()) {
// This argument is passed in a register.
// All integer register arguments are promoted by the caller to i64.
@@ -163,11 +422,6 @@ SDValue VETargetLowering::LowerFormalArguments(
MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
- // Get the high bits for i32 struct elements.
- if (VA.getValVT() == MVT::i32 && VA.needsCustom())
- Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
- DAG.getConstant(32, DL, MVT::i32));
-
// The caller promoted the argument, so insert an Assert?ext SDNode so we
// won't promote the value again in this function.
switch (VA.getLocInfo()) {
@@ -179,6 +433,20 @@ SDValue VETargetLowering::LowerFormalArguments(
Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
DAG.getValueType(VA.getValVT()));
break;
+ case CCValAssign::BCvt: {
+ // Extract a float argument from i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT() == MVT::f32);
+ SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+ Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::f32, Arg, Sub_f32),
+ 0);
+ break;
+ }
default:
break;
}
@@ -194,9 +462,23 @@ SDValue VETargetLowering::LowerFormalArguments(
// The registers are exhausted. This argument was passed on the stack.
assert(VA.isMemLoc());
// The CC_VE_Full/Half functions compute stack offsets relative to the
- // beginning of the arguments area at %fp+176.
+ // beginning of the arguments area at %fp + the size of reserved area.
unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+
+ // Adjust offset for a float argument by adding 4 since the argument is
+ // stored in 8 bytes buffer with offset like below. LLVM generates
+ // 4 bytes load instruction, so need to adjust offset here. This
+ // adjustment is required in only LowerFormalArguments. In LowerCall,
+ // a float argument is converted to i64 first, and stored as 8 bytes
+ // data, which is required by ABI, so no need for adjustment.
+ // 0 4
+ // +------+------+
+ // | empty| float|
+ // +------+------+
+ if (VA.getValVT() == MVT::f32)
+ Offset += 4;
+
int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
InVals.push_back(
DAG.getLoad(VA.getValVT(), DL, Chain,
@@ -215,7 +497,7 @@ SDValue VETargetLowering::LowerFormalArguments(
// TODO: need to calculate offset correctly once we support f128.
unsigned ArgOffset = ArgLocs.size() * 8;
VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
- // Skip the 176 bytes of register save area.
+ // Skip the reserved area at the top of stack.
FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
return Chain;
@@ -258,7 +540,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CLI.IsTailCall = false;
// Get the base offset of the outgoing arguments stack space.
- unsigned ArgsBaseOffset = 176;
+ unsigned ArgsBaseOffset = Subtarget->getRsaSize();
// Get the size of the preserved arguments area
unsigned ArgsPreserved = 8 * 8u;
@@ -270,7 +552,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCInfo.AllocateStack(ArgsPreserved, Align(8));
// We already allocated the preserved area, so the stack offset computed
// by CC_VE would be correct now.
- CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
+ CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
// VE requires to use both register and stack for varargs or no-prototyped
// functions.
@@ -281,7 +563,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
ArgLocs2, *DAG.getContext());
if (UseBoth)
- CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2);
+ CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
// Get the size of the outgoing arguments stack space requirement.
unsigned ArgsSize = CCInfo.getNextStackOffset();
@@ -371,6 +653,22 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
+ case CCValAssign::BCvt: {
+ // Convert a float argument to i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT() == MVT::f32);
+ SDValue Undef = SDValue(
+ DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
+ SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+ Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ MVT::i64, Undef, Arg, Sub_f32),
+ 0);
+ break;
+ }
}
if (VA.isRegLoc()) {
@@ -384,8 +682,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Create a store off the stack pointer for this argument.
SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
- // The argument area starts at %fp+176 in the callee frame,
- // %sp+176 in ours.
+ // The argument area starts at %fp/%sp + the size of reserved area.
SDValue PtrOff =
DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
@@ -450,11 +747,12 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
CLI.Ins[0].Flags.setInReg();
- RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
+ RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
+ assert(!VA.needsCustom() && "Unexpected custom lowering");
unsigned Reg = VA.getLocReg();
// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
@@ -472,11 +770,6 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InGlue = Chain.getValue(2);
}
- // Get the high bits for i32 struct elements.
- if (VA.getValVT() == MVT::i32 && VA.needsCustom())
- RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
- DAG.getConstant(32, DL, MVT::i32));
-
// The callee promoted the return value, so insert an Assert?ext SDNode so
// we won't promote the value again in this function.
switch (VA.getLocInfo()) {
@@ -488,6 +781,20 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
DAG.getValueType(VA.getValVT()));
break;
+ case CCValAssign::BCvt: {
+ // Extract a float return value from i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT() == MVT::f32);
+ SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+ RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::f32, RV, Sub_f32),
+ 0);
+ break;
+ }
default:
break;
}
@@ -502,6 +809,15 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
return Chain;
}
+bool VETargetLowering::isOffsetFoldingLegal(
+ const GlobalAddressSDNode *GA) const {
+ // VE uses 64 bit addressing, so we need multiple instructions to generate
+ // an address. Folding address with offset increases the number of
+ // instructions, so that we disable it here. Offsets will be folded in
+ // the DAG combine later if it worth to do so.
+ return false;
+}
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
@@ -531,30 +847,6 @@ bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
-bool VETargetLowering::hasAndNot(SDValue Y) const {
- EVT VT = Y.getValueType();
-
- // VE doesn't have vector and not instruction.
- if (VT.isVector())
- return false;
-
- // VE allows different immediate values for X and Y where ~X & Y.
- // Only simm7 works for X, and only mimm works for Y on VE. However, this
- // function is used to check whether an immediate value is OK for and-not
- // instruction as both X and Y. Generating additional instruction to
- // retrieve an immediate value is no good since the purpose of this
- // function is to convert a series of 3 instructions to another series of
- // 3 instructions with better parallelism. Therefore, we return false
- // for all immediate values now.
- // FIXME: Change hasAndNot function to have two operands to make it work
- // correctly with Aurora VE.
- if (isa<ConstantSDNode>(Y))
- return false;
-
- // It's ok for generic registers.
- return true;
-}
-
VETargetLowering::VETargetLowering(const TargetMachine &TM,
const VESubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -566,91 +858,15 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent);
- // Set up the register classes.
- addRegisterClass(MVT::i32, &VE::I32RegClass);
- addRegisterClass(MVT::i64, &VE::I64RegClass);
- addRegisterClass(MVT::f32, &VE::F32RegClass);
- addRegisterClass(MVT::f64, &VE::I64RegClass);
-
- /// Load & Store {
- for (MVT FPVT : MVT::fp_valuetypes()) {
- for (MVT OtherFPVT : MVT::fp_valuetypes()) {
- // Turn FP extload into load/fpextend
- setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
-
- // Turn FP truncstore into trunc + store.
- setTruncStoreAction(FPVT, OtherFPVT, Expand);
- }
- }
-
- // VE doesn't have i1 sign extending load
- for (MVT VT : MVT::integer_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
- setTruncStoreAction(VT, MVT::i1, Expand);
- }
- /// } Load & Store
-
- // Custom legalize address nodes into LO/HI parts.
- MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
- setOperationAction(ISD::BlockAddress, PtrVT, Custom);
- setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
- setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
-
- /// VAARG handling {
- setOperationAction(ISD::VASTART, MVT::Other, Custom);
- // VAARG needs to be lowered to access with 8 bytes alignment.
- setOperationAction(ISD::VAARG, MVT::Other, Custom);
- // Use the default implementation.
- setOperationAction(ISD::VACOPY, MVT::Other, Expand);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
- /// } VAARG handling
-
- /// Stack {
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
- /// } Stack
-
- /// Int Ops {
- for (MVT IntVT : {MVT::i32, MVT::i64}) {
- // VE has no REM or DIVREM operations.
- setOperationAction(ISD::UREM, IntVT, Expand);
- setOperationAction(ISD::SREM, IntVT, Expand);
- setOperationAction(ISD::SDIVREM, IntVT, Expand);
- setOperationAction(ISD::UDIVREM, IntVT, Expand);
-
- setOperationAction(ISD::CTTZ, IntVT, Expand);
- setOperationAction(ISD::ROTL, IntVT, Expand);
- setOperationAction(ISD::ROTR, IntVT, Expand);
-
- // Use isel patterns for i32 and i64
- setOperationAction(ISD::BSWAP, IntVT, Legal);
- setOperationAction(ISD::CTLZ, IntVT, Legal);
- setOperationAction(ISD::CTPOP, IntVT, Legal);
-
- // Use isel patterns for i64, Promote i32
- LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
- setOperationAction(ISD::BITREVERSE, IntVT, Act);
- }
- /// } Int Ops
-
- /// Conversion {
- // VE doesn't have instructions for fp<->uint, so expand them by llvm
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
-
- // fp16 not supported
- for (MVT FPVT : MVT::fp_valuetypes()) {
- setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
- setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
- }
- /// } Conversion
+ initRegisterClasses();
+ initSPUActions();
+ initVPUActions();
setStackPointerRegisterToSaveRestore(VE::SX11);
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::TRUNCATE);
+
// Set function alignment to 16 bytes
setMinFunctionAlignment(Align(16));
@@ -667,14 +883,24 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((VEISD::NodeType)Opcode) {
case VEISD::FIRST_NUMBER:
break;
- TARGET_NODE_CASE(Lo)
- TARGET_NODE_CASE(Hi)
+ TARGET_NODE_CASE(CALL)
+ TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
+ TARGET_NODE_CASE(EH_SJLJ_SETJMP)
+ TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
TARGET_NODE_CASE(GETFUNPLT)
TARGET_NODE_CASE(GETSTACKTOP)
TARGET_NODE_CASE(GETTLSADDR)
- TARGET_NODE_CASE(CALL)
- TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(GLOBAL_BASE_REG)
+ TARGET_NODE_CASE(Hi)
+ TARGET_NODE_CASE(Lo)
+ TARGET_NODE_CASE(MEMBARRIER)
+ TARGET_NODE_CASE(RET_FLAG)
+ TARGET_NODE_CASE(TS1AM)
+ TARGET_NODE_CASE(VEC_BROADCAST)
+
+ // Register the VVP_* SDNodes.
+#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
+#include "VVPNodes.def"
}
#undef TARGET_NODE_CASE
return nullptr;
@@ -696,10 +922,17 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
0, TF);
+ if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
+ return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
+ CP->getAlign(), CP->getOffset(), TF);
+
if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
TF);
+ if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
+ return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
+
llvm_unreachable("Unhandled address SDNode");
}
@@ -722,32 +955,24 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
// Handle PIC mode first. VE needs a got load for every variable!
if (isPositionIndependent()) {
- // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
- // function has calls.
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- MFI.setHasCalls(true);
auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
- if (isa<ConstantPoolSDNode>(Op) ||
+ if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
// Create following instructions for local linkage PIC code.
- // lea %s35, %gotoff_lo(.LCPI0_0)
- // and %s35, %s35, (32)0
- // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
- // adds.l %s35, %s15, %s35 ; %s15 is GOT
- // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+ // lea %reg, label@gotoff_lo
+ // and %reg, %reg, (32)0
+ // lea.sl %reg, label@gotoff_hi(%reg, %got)
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
}
// Create following instructions for not local linkage PIC code.
- // lea %s35, %got_lo(.LCPI0_0)
- // and %s35, %s35, (32)0
- // lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
- // adds.l %s35, %s15, %s35 ; %s15 is GOT
- // ld %s35, (,%s35)
- // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+ // lea %reg, label@got_lo
+ // and %reg, %reg, (32)0
+ // lea.sl %reg, label@got_hi(%reg)
+ // ld %reg, (%reg, %got)
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
VEMCExpr::VK_VE_GOT_LO32, DAG);
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
@@ -770,20 +995,222 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
/// Custom Lower {
-SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
+// The mappings for emitLeading/TrailingFence for VE is designed by following
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction *VETargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ switch (Ord) {
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
+ llvm_unreachable("Invalid fence: unordered/non-atomic");
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Acquire:
+ return nullptr; // Nothing to do
+ case AtomicOrdering::Release:
+ case AtomicOrdering::AcquireRelease:
+ return Builder.CreateFence(AtomicOrdering::Release);
+ case AtomicOrdering::SequentiallyConsistent:
+ if (!Inst->hasAtomicStore())
+ return nullptr; // Nothing to do
+ return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
+ }
+ llvm_unreachable("Unknown fence ordering in emitLeadingFence");
+}
+
+Instruction *VETargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ switch (Ord) {
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
+ llvm_unreachable("Invalid fence: unordered/not-atomic");
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Release:
+ return nullptr; // Nothing to do
+ case AtomicOrdering::Acquire:
+ case AtomicOrdering::AcquireRelease:
+ return Builder.CreateFence(AtomicOrdering::Acquire);
+ case AtomicOrdering::SequentiallyConsistent:
+ return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
+ }
+ llvm_unreachable("Unknown fence ordering in emitTrailingFence");
+}
+
+SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // VE uses Release consistency, so need a fence instruction if it is a
+ // cross-thread fence.
+ if (FenceSSID == SyncScope::System) {
+ switch (FenceOrdering) {
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
+ case AtomicOrdering::Monotonic:
+ // No need to generate fencem instruction here.
+ break;
+ case AtomicOrdering::Acquire:
+ // Generate "fencem 2" as acquire fence.
+ return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+ DAG.getTargetConstant(2, DL, MVT::i32),
+ Op.getOperand(0)),
+ 0);
+ case AtomicOrdering::Release:
+ // Generate "fencem 1" as release fence.
+ return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+ DAG.getTargetConstant(1, DL, MVT::i32),
+ Op.getOperand(0)),
+ 0);
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ // Generate "fencem 3" as acq_rel and seq_cst fence.
+ // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
+ // so seq_cst may require more instruction for them.
+ return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+ DAG.getTargetConstant(3, DL, MVT::i32),
+ Op.getOperand(0)),
+ 0);
+ }
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
+TargetLowering::AtomicExpansionKind
+VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ // We have TS1AM implementation for i8/i16/i32/i64, so use it.
+ if (AI->getOperation() == AtomicRMWInst::Xchg) {
+ return AtomicExpansionKind::None;
+ }
+ // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
+
+ // Otherwise, expand it using compare and exchange instruction to not call
+ // __sync_fetch_and_* functions.
+ return AtomicExpansionKind::CmpXChg;
+}
+
+static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
+ SDValue &Bits) {
+ SDLoc DL(Op);
+ AtomicSDNode *N = cast<AtomicSDNode>(Op);
+ SDValue Ptr = N->getOperand(1);
+ SDValue Val = N->getOperand(2);
+ EVT PtrVT = Ptr.getValueType();
+ bool Byte = N->getMemoryVT() == MVT::i8;
+ // Remainder = AND Ptr, 3
+ // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
+ // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
+ // Bits = Remainder << 3
+ // NewVal = Val << Bits
+ SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
+ SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
+ SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
+ : DAG.getConstant(3, DL, MVT::i32);
+ Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
+ Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
+ return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
+}
+
+static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
+ SDValue Bits) {
+ SDLoc DL(Op);
+ EVT VT = Data.getValueType();
+ bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
+ // NewData = Data >> Bits
+ // Result = NewData & 0xff ; If Byte is true (1 byte)
+ // Result = NewData & 0xffff ; If Byte is false (2 bytes)
+
+ SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
+ return DAG.getNode(ISD::AND, DL, VT,
+ {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
+}
+
+SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ AtomicSDNode *N = cast<AtomicSDNode>(Op);
+
+ if (N->getMemoryVT() == MVT::i8) {
+ // For i8, use "ts1am"
+ // Input:
+ // ATOMIC_SWAP Ptr, Val, Order
+ //
+ // Output:
+ // Remainder = AND Ptr, 3
+ // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
+ // Bits = Remainder << 3
+ // NewVal = Val << Bits
+ //
+ // Aligned = AND Ptr, -4
+ // Data = TS1AM Aligned, Flag, NewVal
+ //
+ // NewData = Data >> Bits
+ // Result = NewData & 0xff ; 1 byte result
+ SDValue Flag;
+ SDValue Bits;
+ SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
+
+ SDValue Ptr = N->getOperand(1);
+ SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
+ {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
+ SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
+ DAG.getVTList(Op.getNode()->getValueType(0),
+ Op.getNode()->getValueType(1)),
+ {N->getChain(), Aligned, Flag, NewVal},
+ N->getMemOperand());
+
+ SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
+ SDValue Chain = TS1AM.getValue(1);
+ return DAG.getMergeValues({Result, Chain}, DL);
+ }
+ if (N->getMemoryVT() == MVT::i16) {
+ // For i16, use "ts1am"
+ SDValue Flag;
+ SDValue Bits;
+ SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
+
+ SDValue Ptr = N->getOperand(1);
+ SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
+ {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
+ SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
+ DAG.getVTList(Op.getNode()->getValueType(0),
+ Op.getNode()->getValueType(1)),
+ {N->getChain(), Aligned, Flag, NewVal},
+ N->getMemOperand());
+
+ SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
+ SDValue Chain = TS1AM.getValue(1);
+ return DAG.getMergeValues({Result, Chain}, DL);
+ }
+ // Otherwise, let llvm legalize it.
+ return Op;
+}
+
+SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
}
-SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
+SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ return makeAddress(Op, DAG);
+}
+
+SDValue VETargetLowering::lowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
}
SDValue
-VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
+VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
SelectionDAG &DAG) const {
- SDLoc dl(Op);
+ SDLoc DL(Op);
// Generate the following code:
// t1: ch,glue = callseq_start t0, 0, 0
@@ -799,13 +1226,13 @@ VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
DAG.getMachineFunction(), CallingConv::C);
- Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
+ Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
- Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
- DAG.getIntPtrConstant(0, dl, true),
- Chain.getValue(1), dl);
- Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
+ Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, DL, true),
+ DAG.getIntPtrConstant(0, DL, true),
+ Chain.getValue(1), DL);
+ Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
// GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
@@ -820,17 +1247,133 @@ VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
return Chain;
}
-SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
+SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
// The current implementation of nld (2.26) doesn't allow local exec model
// code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
// generate the general dynamic model code sequence.
//
// *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
- return LowerToTLSGeneralDynamicModel(Op, DAG);
+ return lowerToTLSGeneralDynamicModel(Op, DAG);
+}
+
+SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ return makeAddress(Op, DAG);
+}
+
+// Lower a f128 load into two f64 loads.
+static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
+ assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
+ unsigned Alignment = LdNode->getAlign().value();
+ if (Alignment > 8)
+ Alignment = 8;
+
+ SDValue Lo64 =
+ DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
+ LdNode->getPointerInfo(), Alignment,
+ LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ EVT AddrVT = LdNode->getBasePtr().getValueType();
+ SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
+ DAG.getConstant(8, DL, AddrVT));
+ SDValue Hi64 =
+ DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
+ LdNode->getPointerInfo(), Alignment,
+ LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+
+ SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
+ SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
+
+ // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
+ SDNode *InFP128 =
+ DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
+ InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
+ SDValue(InFP128, 0), Hi64, SubRegEven);
+ InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
+ SDValue(InFP128, 0), Lo64, SubRegOdd);
+ SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
+ SDValue(Hi64.getNode(), 1)};
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
+ return DAG.getMergeValues(Ops, DL);
}
-SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
+
+ SDValue BasePtr = LdNode->getBasePtr();
+ if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
+ // Do not expand store instruction with frame index here because of
+ // dependency problems. We expand it later in eliminateFrameIndex().
+ return Op;
+ }
+
+ EVT MemVT = LdNode->getMemoryVT();
+ if (MemVT == MVT::f128)
+ return lowerLoadF128(Op, DAG);
+
+ return Op;
+}
+
+// Lower a f128 store into two f64 stores.
+static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
+ assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
+
+ SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
+ SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
+
+ SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
+ StNode->getValue(), SubRegEven);
+ SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
+ StNode->getValue(), SubRegOdd);
+
+ unsigned Alignment = StNode->getAlign().value();
+ if (Alignment > 8)
+ Alignment = 8;
+
+ // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
+ SDValue OutChains[2];
+ OutChains[0] =
+ DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
+ StNode->getBasePtr(), MachinePointerInfo(), Alignment,
+ StNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ EVT AddrVT = StNode->getBasePtr().getValueType();
+ SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
+ DAG.getConstant(8, DL, AddrVT));
+ OutChains[1] =
+ DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
+ MachinePointerInfo(), Alignment,
+ StNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+}
+
+SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
+ assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
+
+ SDValue BasePtr = StNode->getBasePtr();
+ if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
+ // Do not expand store instruction with frame index here because of
+ // dependency problems. We expand it later in eliminateFrameIndex().
+ return Op;
+ }
+
+ EVT MemVT = StNode->getMemoryVT();
+ if (MemVT == MVT::f128)
+ return lowerStoreF128(Op, DAG);
+
+ // Otherwise, ask llvm to expand it.
+ return SDValue();
+}
+
+SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -849,7 +1392,7 @@ SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV));
}
-SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
EVT VT = Node->getValueType(0);
SDValue InChain = Node->getOperand(0);
@@ -862,7 +1405,19 @@ SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = VAList.getValue(1);
SDValue NextPtr;
- if (VT == MVT::f32) {
+ if (VT == MVT::f128) {
+ // VE f128 values must be stored with 16 bytes alignment. We doesn't
+ // know the actual alignment of VAList, so we take alignment of it
+ // dyanmically.
+ int Align = 16;
+ VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Align - 1, DL, PtrVT));
+ VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
+ DAG.getConstant(-Align, DL, PtrVT));
+ // Increment the pointer, VAList, by 16 to the next vaarg.
+ NextPtr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
+ } else if (VT == MVT::f32) {
// float --> need special handling like below.
// 0 4
// +------+------+
@@ -955,22 +1510,1325 @@ SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops, DL);
}
+SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
+ Op.getOperand(0));
+}
+
+static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
+ const VETargetLowering &TLI,
+ const VESubtarget *Subtarget) {
+ SDLoc DL(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
+
+ unsigned Depth = Op.getConstantOperandVal(0);
+ const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned FrameReg = RegInfo->getFrameRegister(MF);
+ SDValue FrameAddr =
+ DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
+ FrameAddr, MachinePointerInfo());
+ return FrameAddr;
+}
+
+static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
+ const VETargetLowering &TLI,
+ const VESubtarget *Subtarget) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setReturnAddressIsTaken(true);
+
+ if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
+
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Offset = DAG.getConstant(8, DL, VT);
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
+ MachinePointerInfo());
+}
+
+SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: // Don't custom lower most intrinsics.
+ return SDValue();
+ case Intrinsic::eh_sjlj_lsda: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT VT = Op.getSimpleValueType();
+ const VETargetMachine *TM =
+ static_cast<const VETargetMachine *>(&DAG.getTarget());
+
+ // Create GCC_except_tableXX string. The real symbol for that will be
+ // generated in EHStreamer::emitExceptionTable() later. So, we just
+ // borrow it's name here.
+ TM->getStrList()->push_back(std::string(
+ (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
+ SDValue Addr =
+ DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
+ if (isPositionIndependent()) {
+ Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
+ VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
+ SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
+ }
+ return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+ }
+ }
+}
+
+static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
+ if (!isa<BuildVectorSDNode>(N))
+ return false;
+ const auto *BVN = cast<BuildVectorSDNode>(N);
+
+ // Find first non-undef insertion.
+ unsigned Idx;
+ for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
+ auto ElemV = BVN->getOperand(Idx);
+ if (!ElemV->isUndef())
+ break;
+ }
+ // Catch the (hypothetical) all-undef case.
+ if (Idx == BVN->getNumOperands())
+ return false;
+ // Remember insertion.
+ UniqueIdx = Idx++;
+ // Verify that all other insertions are undef.
+ for (; Idx < BVN->getNumOperands(); ++Idx) {
+ auto ElemV = BVN->getOperand(Idx);
+ if (!ElemV->isUndef())
+ return false;
+ }
+ return true;
+}
+
+static SDValue getSplatValue(SDNode *N) {
+ if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
+ return BuildVec->getSplatValue();
+ }
+ return SDValue();
+}
+
+SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ unsigned NumEls = Op.getValueType().getVectorNumElements();
+ MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
+
+ // If there is just one element, expand to INSERT_VECTOR_ELT.
+ unsigned UniqueIdx;
+ if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
+ SDValue AccuV = DAG.getUNDEF(Op.getValueType());
+ auto ElemV = Op->getOperand(UniqueIdx);
+ SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64);
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV,
+ ElemV, IdxV);
+ }
+
+ // Else emit a broadcast.
+ if (SDValue ScalarV = getSplatValue(Op.getNode())) {
+ // lower to VEC_BROADCAST
+ MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
+
+ auto AVL = DAG.getConstant(NumEls, DL, MVT::i32);
+ return DAG.getNode(VEISD::VEC_BROADCAST, DL, LegalResVT, Op.getOperand(0),
+ AVL);
+ }
+
+ // Expand
+ return SDValue();
+}
+
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
+ unsigned Opcode = Op.getOpcode();
+ if (ISD::isVPOpcode(Opcode))
+ return lowerToVVP(Op, DAG);
+
+ switch (Opcode) {
default:
llvm_unreachable("Should not custom lower this!");
+ case ISD::ATOMIC_FENCE:
+ return lowerATOMIC_FENCE(Op, DAG);
+ case ISD::ATOMIC_SWAP:
+ return lowerATOMIC_SWAP(Op, DAG);
case ISD::BlockAddress:
- return LowerBlockAddress(Op, DAG);
+ return lowerBlockAddress(Op, DAG);
+ case ISD::ConstantPool:
+ return lowerConstantPool(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return lowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP:
+ return lowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP:
+ return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_SETUP_DISPATCH:
+ return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
+ case ISD::FRAMEADDR:
+ return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG);
+ return lowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
- return LowerGlobalTLSAddress(Op, DAG);
+ return lowerGlobalTLSAddress(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::JumpTable:
+ return lowerJumpTable(Op, DAG);
+ case ISD::LOAD:
+ return lowerLOAD(Op, DAG);
+ case ISD::RETURNADDR:
+ return lowerRETURNADDR(Op, DAG, *this, Subtarget);
+ case ISD::BUILD_VECTOR:
+ return lowerBUILD_VECTOR(Op, DAG);
+ case ISD::STORE:
+ return lowerSTORE(Op, DAG);
case ISD::VASTART:
- return LowerVASTART(Op, DAG);
+ return lowerVASTART(Op, DAG);
case ISD::VAARG:
- return LowerVAARG(Op, DAG);
+ return lowerVAARG(Op, DAG);
+
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+
+#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
+#include "VVPNodes.def"
+ return lowerToVVP(Op, DAG);
}
}
/// } Custom Lower
+
+void VETargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ case ISD::ATOMIC_SWAP:
+ // Let LLVM expand atomic swap instruction through LowerOperation.
+ return;
+ default:
+ LLVM_DEBUG(N->dumpr(&DAG));
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+ }
+}
+
+/// JumpTable for VE.
+///
+/// VE cannot generate relocatable symbol in jump table. VE cannot
+/// generate expressions using symbols in both text segment and data
+/// segment like below.
+/// .4byte .LBB0_2-.LJTI0_0
+/// So, we generate offset from the top of function like below as
+/// a custom label.
+/// .4byte .LBB0_2-<function name>
+
+unsigned VETargetLowering::getJumpTableEncoding() const {
+ // Use custom label for PIC.
+ if (isPositionIndependent())
+ return MachineJumpTableInfo::EK_Custom32;
+
+ // Otherwise, use the normal jump table encoding heuristics.
+ return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
+ const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
+ unsigned Uid, MCContext &Ctx) const {
+ assert(isPositionIndependent());
+
+ // Generate custom label for PIC like below.
+ // .4bytes .LBB0_2-<function name>
+ const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
+ const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
+ return MCBinaryExpr::createSub(Value, Base, Ctx);
+}
+
+SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ assert(isPositionIndependent());
+ SDLoc DL(Table);
+ Function *Function = &DAG.getMachineFunction().getFunction();
+ assert(Function != nullptr);
+ auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
+
+ // In the jump table, we have following values in PIC mode.
+ // .4bytes .LBB0_2-<function name>
+ // We need to add this value and the address of this function to generate
+ // .LBB0_2 label correctly under PIC mode. So, we want to generate following
+ // instructions:
+ // lea %reg, fun@gotoff_lo
+ // and %reg, %reg, (32)0
+ // lea.sl %reg, fun@gotoff_hi(%reg, %got)
+ // In order to do so, we need to genarate correctly marked DAG node using
+ // makeHiLoPair.
+ SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
+ SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
+ VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
+ SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
+ return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
+}
+
+Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock *TargetBB,
+ const DebugLoc &DL) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const VEInstrInfo *TII = Subtarget->getInstrInfo();
+
+ const TargetRegisterClass *RC = &VE::I64RegClass;
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+ Register Result = MRI.createVirtualRegister(RC);
+
+ if (isPositionIndependent()) {
+ // Create following instructions for local linkage PIC code.
+ // lea %Tmp1, TargetBB@gotoff_lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
+ .addReg(VE::SX15)
+ .addReg(Tmp2, getKillRegState(true))
+ .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
+ } else {
+ // Create following instructions for non-PIC code.
+ // lea %Tmp1, TargetBB@lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Result, TargetBB@hi(%Tmp2)
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
+ .addReg(Tmp2, getKillRegState(true))
+ .addImm(0)
+ .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
+ }
+ return Result;
+}
+
+Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ StringRef Symbol, const DebugLoc &DL,
+ bool IsLocal = false,
+ bool IsCall = false) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const VEInstrInfo *TII = Subtarget->getInstrInfo();
+
+ const TargetRegisterClass *RC = &VE::I64RegClass;
+ Register Result = MRI.createVirtualRegister(RC);
+
+ if (isPositionIndependent()) {
+ if (IsCall && !IsLocal) {
+ // Create following instructions for non-local linkage PIC code function
+ // calls. These instructions uses IC and magic number -24, so we expand
+ // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
+ // lea %Reg, Symbol@plt_lo(-24)
+ // and %Reg, %Reg, (32)0
+ // sic %s16
+ // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
+ BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
+ .addExternalSymbol("abort");
+ } else if (IsLocal) {
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+ // Create following instructions for local linkage PIC code.
+ // lea %Tmp1, Symbol@gotoff_lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
+ .addReg(VE::SX15)
+ .addReg(Tmp2, getKillRegState(true))
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
+ } else {
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+ // Create following instructions for not local linkage PIC code.
+ // lea %Tmp1, Symbol@got_lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+ // ld %Result, 0(%Tmp3)
+ Register Tmp3 = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
+ .addReg(VE::SX15)
+ .addReg(Tmp2, getKillRegState(true))
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
+ BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
+ .addReg(Tmp3, getKillRegState(true))
+ .addImm(0)
+ .addImm(0);
+ }
+ } else {
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+ // Create following instructions for non-PIC code.
+ // lea %Tmp1, Symbol@lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Result, Symbol@hi(%Tmp2)
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
+ .addReg(Tmp2, getKillRegState(true))
+ .addImm(0)
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
+ }
+ return Result;
+}
+
+void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB,
+ int FI, int Offset) const {
+ DebugLoc DL = MI.getDebugLoc();
+ const VEInstrInfo *TII = Subtarget->getInstrInfo();
+
+ Register LabelReg =
+ prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
+
+ // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
+ // referenced by longjmp (throw) later.
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ addFrameReference(MIB, FI, Offset); // jmpbuf[1]
+ MIB.addReg(LabelReg, getKillRegState(true));
+}
+
+MachineBasicBlock *
+VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = ++MBB->getIterator();
+
+ // Memory Reference.
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
+ Register BufReg = MI.getOperand(1).getReg();
+
+ Register DstReg;
+
+ DstReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+ (void)TRI;
+ Register MainDestReg = MRI.createVirtualRegister(RC);
+ Register RestoreDestReg = MRI.createVirtualRegister(RC);
+
+ // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
+ // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
+ //
+ // ThisMBB:
+ // buf[3] = %s17 iff %s17 is used as BP
+ // buf[1] = RestoreMBB as IC after longjmp
+ // # SjLjSetup RestoreMBB
+ //
+ // MainMBB:
+ // v_main = 0
+ //
+ // SinkMBB:
+ // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
+ // ...
+ //
+ // RestoreMBB:
+ // %s17 = buf[3] = iff %s17 is used as BP
+ // v_restore = 1
+ // goto SinkMBB
+
+ MachineBasicBlock *ThisMBB = MBB;
+ MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, MainMBB);
+ MF->insert(I, SinkMBB);
+ MF->push_back(RestoreMBB);
+ RestoreMBB->setHasAddressTaken();
+
+ // Transfer the remainder of BB and its successor edges to SinkMBB.
+ SinkMBB->splice(SinkMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // ThisMBB:
+ Register LabelReg =
+ prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
+
+ // Store BP in buf[3] iff this function is using BP.
+ const VEFrameLowering *TFI = Subtarget->getFrameLowering();
+ if (TFI->hasBP(*MF)) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ MIB.addReg(BufReg);
+ MIB.addImm(0);
+ MIB.addImm(24);
+ MIB.addReg(VE::SX17);
+ MIB.setMemRefs(MMOs);
+ }
+
+ // Store IP in buf[1].
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
+ MIB.addImm(0);
+ MIB.addImm(8);
+ MIB.addReg(LabelReg, getKillRegState(true));
+ MIB.setMemRefs(MMOs);
+
+ // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
+
+ // Insert setup.
+ MIB =
+ BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
+
+ const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ MIB.addRegMask(RegInfo->getNoPreservedMask());
+ ThisMBB->addSuccessor(MainMBB);
+ ThisMBB->addSuccessor(RestoreMBB);
+
+ // MainMBB:
+ BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+ MainMBB->addSuccessor(SinkMBB);
+
+ // SinkMBB:
+ BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
+ .addReg(MainDestReg)
+ .addMBB(MainMBB)
+ .addReg(RestoreDestReg)
+ .addMBB(RestoreMBB);
+
+ // RestoreMBB:
+ // Restore BP from buf[3] iff this function is using BP. The address of
+ // buf is in SX10.
+ // FIXME: Better to not use SX10 here
+ if (TFI->hasBP(*MF)) {
+ MachineInstrBuilder MIB =
+ BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
+ MIB.addReg(VE::SX10);
+ MIB.addImm(0);
+ MIB.addImm(24);
+ MIB.setMemRefs(MMOs);
+ }
+ BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1);
+ BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
+ RestoreMBB->addSuccessor(SinkMBB);
+
+ MI.eraseFromParent();
+ return SinkMBB;
+}
+
+MachineBasicBlock *
+VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference.
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
+ Register BufReg = MI.getOperand(0).getReg();
+
+ Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ Register FP = VE::SX9;
+ Register SP = VE::SX11;
+
+ MachineInstrBuilder MIB;
+
+ MachineBasicBlock *ThisMBB = MBB;
+
+ // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
+ //
+ // ThisMBB:
+ // %fp = load buf[0]
+ // %jmp = load buf[1]
+ // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
+ // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
+ // jmp %jmp
+
+ // Reload FP.
+ MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
+ MIB.addReg(BufReg);
+ MIB.addImm(0);
+ MIB.addImm(0);
+ MIB.setMemRefs(MMOs);
+
+ // Reload IP.
+ MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
+ MIB.addReg(BufReg);
+ MIB.addImm(0);
+ MIB.addImm(8);
+ MIB.setMemRefs(MMOs);
+
+ // Copy BufReg to SX10 for later use in setjmp.
+ // FIXME: Better to not use SX10 here
+ BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
+ .addReg(BufReg)
+ .addImm(0);
+
+ // Reload SP.
+ MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
+ MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
+ MIB.addImm(0);
+ MIB.addImm(16);
+ MIB.setMemRefs(MMOs);
+
+ // Jump.
+ BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
+ .addReg(Tmp, getKillRegState(true))
+ .addImm(0);
+
+ MI.eraseFromParent();
+ return ThisMBB;
+}
+
+MachineBasicBlock *
+VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = BB->getParent();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const VEInstrInfo *TII = Subtarget->getInstrInfo();
+ int FI = MFI.getFunctionContextIndex();
+
+ // Get a mapping of the call site numbers to all of the landing pads they're
+ // associated with.
+ DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
+ unsigned MaxCSNum = 0;
+ for (auto &MBB : *MF) {
+ if (!MBB.isEHPad())
+ continue;
+
+ MCSymbol *Sym = nullptr;
+ for (const auto &MI : MBB) {
+ if (MI.isDebugInstr())
+ continue;
+
+ assert(MI.isEHLabel() && "expected EH_LABEL");
+ Sym = MI.getOperand(0).getMCSymbol();
+ break;
+ }
+
+ if (!MF->hasCallSiteLandingPad(Sym))
+ continue;
+
+ for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
+ CallSiteNumToLPad[CSI].push_back(&MBB);
+ MaxCSNum = std::max(MaxCSNum, CSI);
+ }
+ }
+
+ // Get an ordered list of the machine basic blocks for the jump table.
+ std::vector<MachineBasicBlock *> LPadList;
+ SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
+ LPadList.reserve(CallSiteNumToLPad.size());
+
+ for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
+ for (auto &LP : CallSiteNumToLPad[CSI]) {
+ LPadList.push_back(LP);
+ InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
+ }
+ }
+
+ assert(!LPadList.empty() &&
+ "No landing pad destinations for the dispatch jump table!");
+
+ // The %fn_context is allocated like below (from --print-after=sjljehprepare):
+ // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
+ //
+ // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
+ // First `i64` is callsite, so callsite is FI+8.
+ static const int OffsetIC = 72;
+ static const int OffsetCS = 8;
+
+ // Create the MBBs for the dispatch code like following:
+ //
+ // ThisMBB:
+ // Prepare DispatchBB address and store it to buf[1].
+ // ...
+ //
+ // DispatchBB:
+ // %s15 = GETGOT iff isPositionIndependent
+ // %callsite = load callsite
+ // brgt.l.t #size of callsites, %callsite, DispContBB
+ //
+ // TrapBB:
+ // Call abort.
+ //
+ // DispContBB:
+ // %breg = address of jump table
+ // %pc = load and calculate next pc from %breg and %callsite
+ // jmp %pc
+
+ // Shove the dispatch's address into the return slot in the function context.
+ MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
+ DispatchBB->setIsEHPad(true);
+
+ // Trap BB will causes trap like `assert(0)`.
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ DispatchBB->addSuccessor(TrapBB);
+
+ MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
+ DispatchBB->addSuccessor(DispContBB);
+
+ // Insert MBBs.
+ MF->push_back(DispatchBB);
+ MF->push_back(DispContBB);
+ MF->push_back(TrapBB);
+
+ // Insert code to call abort in the TrapBB.
+ Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
+ /* Local */ false, /* Call */ true);
+ BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
+ .addReg(Abort, getKillRegState(true))
+ .addImm(0)
+ .addImm(0);
+
+ // Insert code into the entry block that creates and registers the function
+ // context.
+ setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
+
+ // Create the jump table and associated information
+ unsigned JTE = getJumpTableEncoding();
+ MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
+ unsigned MJTI = JTI->createJumpTableIndex(LPadList);
+
+ const VERegisterInfo &RI = TII->getRegisterInfo();
+ // Add a register mask with no preserved registers. This results in all
+ // registers being marked as clobbered.
+ BuildMI(DispatchBB, DL, TII->get(VE::NOP))
+ .addRegMask(RI.getNoPreservedMask());
+
+ if (isPositionIndependent()) {
+ // Force to generate GETGOT, since current implementation doesn't store GOT
+ // register.
+ BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
+ }
+
+ // IReg is used as an index in a memory operand and therefore can't be SP
+ const TargetRegisterClass *RC = &VE::I64RegClass;
+ Register IReg = MRI.createVirtualRegister(RC);
+ addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
+ OffsetCS);
+ if (LPadList.size() < 64) {
+ BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
+ .addImm(VECC::CC_ILE)
+ .addImm(LPadList.size())
+ .addReg(IReg)
+ .addMBB(TrapBB);
+ } else {
+ assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
+ Register TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(LPadList.size());
+ BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
+ .addImm(VECC::CC_ILE)
+ .addReg(TmpReg, getKillRegState(true))
+ .addReg(IReg)
+ .addMBB(TrapBB);
+ }
+
+ Register BReg = MRI.createVirtualRegister(RC);
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+
+ if (isPositionIndependent()) {
+ // Create following instructions for local linkage PIC code.
+ // lea %Tmp1, .LJTI0_0@gotoff_lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+ BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
+ BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
+ .addReg(VE::SX15)
+ .addReg(Tmp2, getKillRegState(true))
+ .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
+ } else {
+ // Create following instructions for non-PIC code.
+ // lea %Tmp1, .LJTI0_0@lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
+ BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
+ BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
+ .addReg(Tmp2, getKillRegState(true))
+ .addImm(0)
+ .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
+ }
+
+ switch (JTE) {
+ case MachineJumpTableInfo::EK_BlockAddress: {
+ // Generate simple block address code for no-PIC model.
+ // sll %Tmp1, %IReg, 3
+ // lds %TReg, 0(%Tmp1, %BReg)
+ // bcfla %TReg
+
+ Register TReg = MRI.createVirtualRegister(RC);
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+
+ BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
+ .addReg(IReg, getKillRegState(true))
+ .addImm(3);
+ BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
+ .addReg(BReg, getKillRegState(true))
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(0);
+ BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
+ .addReg(TReg, getKillRegState(true))
+ .addImm(0);
+ break;
+ }
+ case MachineJumpTableInfo::EK_Custom32: {
+ // Generate block address code using differences from the function pointer
+ // for PIC model.
+ // sll %Tmp1, %IReg, 2
+ // ldl.zx %OReg, 0(%Tmp1, %BReg)
+ // Prepare function address in BReg2.
+ // adds.l %TReg, %BReg2, %OReg
+ // bcfla %TReg
+
+ assert(isPositionIndependent());
+ Register OReg = MRI.createVirtualRegister(RC);
+ Register TReg = MRI.createVirtualRegister(RC);
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+
+ BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
+ .addReg(IReg, getKillRegState(true))
+ .addImm(2);
+ BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
+ .addReg(BReg, getKillRegState(true))
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(0);
+ Register BReg2 =
+ prepareSymbol(*DispContBB, DispContBB->end(),
+ DispContBB->getParent()->getName(), DL, /* Local */ true);
+ BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
+ .addReg(OReg, getKillRegState(true))
+ .addReg(BReg2, getKillRegState(true));
+ BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
+ .addReg(TReg, getKillRegState(true))
+ .addImm(0);
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected jump table encoding");
+ }
+
+ // Add the jump table entries as successors to the MBB.
+ SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
+ for (auto &LP : LPadList)
+ if (SeenMBBs.insert(LP).second)
+ DispContBB->addSuccessor(LP);
+
+ // N.B. the order the invoke BBs are processed in doesn't matter here.
+ SmallVector<MachineBasicBlock *, 64> MBBLPads;
+ const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
+ for (MachineBasicBlock *MBB : InvokeBBs) {
+ // Remove the landing pad successor from the invoke block and replace it
+ // with the new dispatch block.
+ // Keep a copy of Successors since it's modified inside the loop.
+ SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
+ MBB->succ_rend());
+ // FIXME: Avoid quadratic complexity.
+ for (auto MBBS : Successors) {
+ if (MBBS->isEHPad()) {
+ MBB->removeSuccessor(MBBS);
+ MBBLPads.push_back(MBBS);
+ }
+ }
+
+ MBB->addSuccessor(DispatchBB);
+
+ // Find the invoke call and mark all of the callee-saved registers as
+ // 'implicit defined' so that they're spilled. This prevents code from
+ // moving instructions to before the EH block, where they will never be
+ // executed.
+ for (auto &II : reverse(*MBB)) {
+ if (!II.isCall())
+ continue;
+
+ DenseMap<Register, bool> DefRegs;
+ for (auto &MOp : II.operands())
+ if (MOp.isReg())
+ DefRegs[MOp.getReg()] = true;
+
+ MachineInstrBuilder MIB(*MF, &II);
+ for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
+ Register Reg = SavedRegs[RI];
+ if (!DefRegs[Reg])
+ MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
+ }
+
+ break;
+ }
+ }
+
+ // Mark all former landing pads as non-landing pads. The dispatch is the only
+ // landing pad now.
+ for (auto &LP : MBBLPads)
+ LP->setIsEHPad(false);
+
+ // The instruction is gone now.
+ MI.eraseFromParent();
+ return BB;
+}
+
+MachineBasicBlock *
+VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown Custom Instruction!");
+ case VE::EH_SjLj_LongJmp:
+ return emitEHSjLjLongJmp(MI, BB);
+ case VE::EH_SjLj_SetJmp:
+ return emitEHSjLjSetJmp(MI, BB);
+ case VE::EH_SjLj_Setup_Dispatch:
+ return emitSjLjDispatchBlock(MI, BB);
+ }
+}
+
+static bool isI32Insn(const SDNode *User, const SDNode *N) {
+ switch (User->getOpcode()) {
+ default:
+ return false;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SETCC:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::BSWAP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::BR_CC:
+ case ISD::BITCAST:
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ return true;
+ case ISD::SRL:
+ if (N->getOperand(0).getOpcode() != ISD::SRL)
+ return true;
+ // (srl (trunc (srl ...))) may be optimized by combining srl, so
+ // doesn't optimize trunc now.
+ return false;
+ case ISD::SELECT_CC:
+ if (User->getOperand(2).getNode() != N &&
+ User->getOperand(3).getNode() != N)
+ return true;
+ LLVM_FALLTHROUGH;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SELECT:
+ case ISD::CopyToReg:
+ // Check all use of selections, bit operations, and copies. If all of them
+ // are safe, optimize truncate to extract_subreg.
+ for (SDNode::use_iterator UI = User->use_begin(), UE = User->use_end();
+ UI != UE; ++UI) {
+ switch ((*UI)->getOpcode()) {
+ default:
+ // If the use is an instruction which treats the source operand as i32,
+ // it is safe to avoid truncate here.
+ if (isI32Insn(*UI, N))
+ continue;
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ // Special optimizations to the combination of ext and trunc.
+ // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
+ // since this truncate instruction clears higher 32 bits which is filled
+ // by one of ext instructions later.
+ assert(N->getValueType(0) == MVT::i32 &&
+ "find truncate to not i32 integer");
+ if (User->getOpcode() == ISD::SELECT_CC ||
+ User->getOpcode() == ISD::SELECT)
+ continue;
+ break;
+ }
+ }
+ return false;
+ }
+ return true;
+ }
+}
+
+// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
+// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
+// is sometime too late. So, doing it at here.
+SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert(N->getOpcode() == ISD::TRUNCATE &&
+ "Should be called with a TRUNCATE node");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // We prefer to do this when all types are legal.
+ if (!DCI.isAfterLegalizeDAG())
+ return SDValue();
+
+ // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
+ if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
+ isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
+ isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
+ return SDValue();
+
+ // Check all use of this TRUNCATE.
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
+ ++UI) {
+ SDNode *User = *UI;
+
+ // Make sure that we're not going to replace TRUNCATE for non i32
+ // instructions.
+ //
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (isI32Insn(User, N))
+ continue;
+
+ return SDValue();
+ }
+
+ SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
+ return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
+ N->getOperand(0), SubI32),
+ 0);
+}
+
+SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::TRUNCATE:
+ return combineTRUNCATE(N, DCI);
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// VE Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+VETargetLowering::ConstraintType
+VETargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ break;
+ case 'v': // vector registers
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint,
+ MVT VT) const {
+ const TargetRegisterClass *RC = nullptr;
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ case 'r':
+ RC = &VE::I64RegClass;
+ break;
+ case 'v':
+ RC = &VE::V64RegClass;
+ break;
+ }
+ return std::make_pair(0U, RC);
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// VE Target Optimization Support
+//===----------------------------------------------------------------------===//
+
+unsigned VETargetLowering::getMinimumJumpTableEntries() const {
+ // Specify 8 for PIC model to relieve the impact of PIC load instructions.
+ if (isJumpTableRelative())
+ return 8;
+
+ return TargetLowering::getMinimumJumpTableEntries();
+}
+
+bool VETargetLowering::hasAndNot(SDValue Y) const {
+ EVT VT = Y.getValueType();
+
+ // VE doesn't have vector and not instruction.
+ if (VT.isVector())
+ return false;
+
+ // VE allows different immediate values for X and Y where ~X & Y.
+ // Only simm7 works for X, and only mimm works for Y on VE. However, this
+ // function is used to check whether an immediate value is OK for and-not
+ // instruction as both X and Y. Generating additional instruction to
+ // retrieve an immediate value is no good since the purpose of this
+ // function is to convert a series of 3 instructions to another series of
+ // 3 instructions with better parallelism. Therefore, we return false
+ // for all immediate values now.
+ // FIXME: Change hasAndNot function to have two operands to make it work
+ // correctly with Aurora VE.
+ if (isa<ConstantSDNode>(Y))
+ return false;
+
+ // It's ok for generic registers.
+ return true;
+}
+
+/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
+static Optional<unsigned> getVVPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \
+ case ISD::VPOPC: \
+ return VEISD::VVPNAME;
+#define ADD_VVP_OP(VVPNAME, SDNAME) \
+ case VEISD::VVPNAME: \
+ case ISD::SDNAME: \
+ return VEISD::VVPNAME;
+#include "VVPNodes.def"
+ }
+ return None;
+}
+
+SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
+ // Can we represent this as a VVP node.
+ const unsigned Opcode = Op->getOpcode();
+ auto VVPOpcodeOpt = getVVPOpcode(Opcode);
+ if (!VVPOpcodeOpt.hasValue())
+ return SDValue();
+ unsigned VVPOpcode = VVPOpcodeOpt.getValue();
+ const bool FromVP = ISD::isVPOpcode(Opcode);
+
+ // The representative and legalized vector type of this operation.
+ SDLoc DL(Op);
+ MVT MaskVT = MVT::v256i1; // TODO: packed mode.
+ EVT OpVecVT = Op.getValueType();
+ EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
+
+ SDValue AVL;
+ SDValue Mask;
+
+ if (FromVP) {
+ // All upstream VP SDNodes always have a mask and avl.
+ auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue();
+ auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue();
+ Mask = Op->getOperand(MaskIdx);
+ AVL = Op->getOperand(AVLIdx);
+
+ } else {
+ // Materialize the VL parameter.
+ AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
+ SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
+ Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
+ ConstTrue); // emit a VEISD::VEC_BROADCAST here.
+ }
+
+ // Categories we are interested in.
+ bool IsBinaryOp = false;
+
+ switch (VVPOpcode) {
+#define ADD_BINARY_VVP_OP(VVPNAME, ...) \
+ case VEISD::VVPNAME: \
+ IsBinaryOp = true; \
+ break;
+#include "VVPNodes.def"
+ }
+
+ if (IsBinaryOp) {
+ assert(LegalVecVT.isSimple());
+ return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
+ Op->getOperand(1), Mask, AVL);
+ }
+ llvm_unreachable("lowerToVVP called for unexpected SDNode.");
+}
+
+SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
+ MVT VT = Op.getOperand(0).getSimpleValueType();
+
+ // Special treatment for packed V64 types.
+ assert(VT == MVT::v512i32 || VT == MVT::v512f32);
+ // Example of codes:
+ // %packed_v = extractelt %vr, %idx / 2
+ // %v = %packed_v >> (%idx % 2 * 32)
+ // %res = %v & 0xffffffff
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ SDLoc DL(Op);
+ SDValue Result = Op;
+ if (0 /* Idx->isConstant() */) {
+ // TODO: optimized implementation using constant values
+ } else {
+ SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
+ SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
+ SDValue PackedElt =
+ SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
+ SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
+ SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
+ SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
+ Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
+ PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
+ SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
+ PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
+ SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
+ Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::i32, PackedElt, SubI32),
+ 0);
+
+ if (Op.getSimpleValueType() == MVT::f32) {
+ Result = DAG.getBitcast(MVT::f32, Result);
+ } else {
+ assert(Op.getSimpleValueType() == MVT::i32);
+ }
+ }
+ return Result;
+}
+
+SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
+ MVT VT = Op.getOperand(0).getSimpleValueType();
+
+ // Special treatment for packed V64 types.
+ assert(VT == MVT::v512i32 || VT == MVT::v512f32);
+ // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
+ // bits" required `val << 32` from C implementation's point of view.
+ //
+ // Example of codes:
+ // %packed_elt = extractelt %vr, (%idx >> 1)
+ // %shift = ((%idx & 1) ^ 1) << 5
+ // %packed_elt &= 0xffffffff00000000 >> shift
+ // %packed_elt |= (zext %val) << shift
+ // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
+
+ SDLoc DL(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ if (Idx.getSimpleValueType() == MVT::i32)
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
+ if (Val.getSimpleValueType() == MVT::f32)
+ Val = DAG.getBitcast(MVT::i32, Val);
+ assert(Val.getSimpleValueType() == MVT::i32);
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+
+ SDValue Result = Op;
+ if (0 /* Idx->isConstant()*/) {
+ // TODO: optimized implementation using constant values
+ } else {
+ SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
+ SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
+ SDValue PackedElt =
+ SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
+ SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
+ SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
+ SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
+ Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
+ SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
+ Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
+ PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
+ Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
+ PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
+ Result =
+ SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
+ {HalfIdx, PackedElt, Vec}),
+ 0);
+ }
+ return Result;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
index 4633220efaa1..a6e1bf396035 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
@@ -24,23 +24,36 @@ namespace VEISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- Hi,
- Lo, // Hi/Lo operations, typically on a global address.
-
- GETFUNPLT, // load function address through %plt insturction
- GETTLSADDR, // load address for TLS access
- GETSTACKTOP, // retrieve address of stack top (first address of
- // locals and temporaries)
-
- CALL, // A call instruction.
- RET_FLAG, // Return with a flag operand.
- GLOBAL_BASE_REG, // Global base reg for PIC.
+ CALL, // A call instruction.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
+ GETFUNPLT, // Load function address through %plt insturction.
+ GETTLSADDR, // Load address for TLS access.
+ GETSTACKTOP, // Retrieve address of stack top (first address of
+ // locals and temporaries).
+ GLOBAL_BASE_REG, // Global base reg for PIC.
+ Hi, // Hi/Lo operations, typically on a global address.
+ Lo, // Hi/Lo operations, typically on a global address.
+ MEMBARRIER, // Compiler barrier only; generate a no-op.
+ RET_FLAG, // Return with a flag operand.
+ TS1AM, // A TS1AM instruction used for 1/2 bytes swap.
+ VEC_BROADCAST, // A vector broadcast instruction.
+ // 0: scalar value, 1: VL
+
+// VVP_* nodes.
+#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
+#include "VVPNodes.def"
};
}
class VETargetLowering : public TargetLowering {
const VESubtarget *Subtarget;
+ void initRegisterClasses();
+ void initSPUActions();
+ void initVPUActions();
+
public:
VETargetLowering(const TargetMachine &TM, const VESubtarget &STI);
@@ -74,23 +87,98 @@ public:
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
SelectionDAG &DAG) const override;
+ /// Helper functions for atomic operations.
+ bool shouldInsertFencesForAtomic(const Instruction *I) const override {
+ // VE uses release consistency, so need fence for each atomics.
+ return true;
+ }
+ Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
+ Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
/// Custom Lower {
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
-
- SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+ unsigned getJumpTableEncoding() const override;
+ const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned Uid,
+ MCContext &Ctx) const override;
+ SDValue getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const override;
+ // VE doesn't need getPICJumpTableRelocBaseExpr since it is used for only
+ // EK_LabelDifference32.
+
+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
/// } Custom Lower
+ /// Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
+ /// Custom Inserter {
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *MBB) const override;
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitSjLjDispatchBlock(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
+ void setupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI,
+ int Offset) const;
+ // Setup basic block address.
+ Register prepareMBB(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ MachineBasicBlock *TargetBB, const DebugLoc &DL) const;
+ // Prepare function/variable address.
+ Register prepareSymbol(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ StringRef Symbol, const DebugLoc &DL, bool IsLocal,
+ bool IsCall) const;
+ /// } Custom Inserter
+
+ /// VVP Lowering {
+ SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
+ /// } VVPLowering
+
+ /// Custom DAGCombine {
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+ SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
+ /// } Custom DAGCombine
+
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
SelectionDAG &DAG) const;
SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
/// Returns true if the target allows unaligned memory accesses of the
@@ -99,10 +187,32 @@ public:
MachineMemOperand::Flags Flags,
bool *Fast) const override;
- // Block s/udiv lowering for now
- bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; }
+ /// Inline Assembly {
+
+ ConstraintType getConstraintType(StringRef Constraint) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+
+ /// } Inline Assembly
+ /// Target Optimization {
+
+ // Return lower limit for number of blocks in a jump table.
+ unsigned getMinimumJumpTableEntries() const override;
+
+ // SX-Aurora VE's s/udiv is 5-9 times slower than multiply.
+ bool isIntDivCheap(EVT, AttributeList) const override { return false; }
+ // VE doesn't have rem.
+ bool hasStandaloneRem(EVT) const override { return false; }
+ // VE LDZ instruction returns 64 if the input is zero.
+ bool isCheapToSpeculateCtlz() const override { return true; }
+ // VE LDZ instruction is fast.
+ bool isCtlzFast() const override { return true; }
+ // VE has NND instruction.
bool hasAndNot(SDValue Y) const override;
+
+ /// } Target Optimization
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h
new file mode 100644
index 000000000000..1b0e07546931
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h
@@ -0,0 +1,41 @@
+//===-- VEInstrBuilder.h - Aides for building VE insts ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H
+#define LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool ThreeOp = true) {
+ if (ThreeOp)
+ return MIB.addFrameIndex(FI).addImm(0).addImm(Offset);
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
index 0c02411ff916..f43c9755f1b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
@@ -35,6 +35,25 @@ class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
let AsmString = asmstr;
let Pattern = pattern;
+ bits<1> VE_Vector = 0;
+ bits<1> VE_VLInUse = 0;
+ bits<3> VE_VLIndex = 0;
+ bits<1> VE_VLWithMask = 0;
+
+ /// These fields correspond to the fields in VEInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ ///
+ /// VLIndex is the index of VL register in MI's operands. The HW instruction
+ /// doesn't have that field, but we add is in MI for the ease of optimization.
+ /// For example, the index of VL of (VST $sy, $sz, $sx, $vl) is 3 (beginning
+ /// from 0), and the index of VL of (VST $sy, $sz, $sx, $vm, $vl) is 4. We
+ /// define vector instructions hierarchically, so use VE_VLIndex which is
+ /// defined by the type of instruction and VE_VLWithMask which is defined
+ /// whether the insturction use mask or not.
+ let TSFlags{0} = VE_Vector;
+ let TSFlags{1} = VE_VLInUse;
+ let TSFlags{4-2} = !add(VE_VLIndex, VE_VLWithMask);
+
let DecoderNamespace = "VE";
field bits<64> SoftFail = 0;
}
@@ -179,12 +198,82 @@ class RRFENCE<bits<8>opVal, dag outs, dag ins, string asmstr,
//-----------------------------------------------------------------------------
// Section 5.6 RVM Type
+//
+// RVM type is for vector transfer instructions.
//-----------------------------------------------------------------------------
+class RVM<bits<8>opVal, dag outs, dag ins, string asmstr,
+ list<dag> pattern = []>
+ : InstVE<outs, ins, asmstr, pattern> {
+ bits<1> cx = 0;
+ bits<1> vc = 0;
+ bits<1> cs = 0;
+ bits<4> m = 0;
+ bits<1> cy = 1;
+ bits<7> sy;
+ bits<1> cz = 1;
+ bits<7> sz;
+ bits<8> vx;
+ bits<8> vy = 0;
+ bits<7> sw = 0;
+ let op = opVal;
+ let Inst{55} = cx;
+ let Inst{54} = vc;
+ let Inst{53} = cs;
+ let Inst{52} = 0;
+ let Inst{51-48} = m;
+ let Inst{47} = cy;
+ let Inst{46-40} = sy;
+ let Inst{39} = cz;
+ let Inst{38-32} = sz;
+ let Inst{31-24} = vx;
+ let Inst{23-16} = vy;
+ let Inst{15-8} = 0;
+ let Inst{7} = 0;
+ let Inst{6-0} = sw;
+
+ let VE_Vector = 1;
+}
+
//-----------------------------------------------------------------------------
// Section 5.7 RV Type
+//
+// RV type is for vector instructions.
//-----------------------------------------------------------------------------
+class RV<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
+ : InstVE<outs, ins, asmstr, pattern> {
+ bits<1> cx = 0;
+ bits<1> cx2 = 0;
+ bits<1> cs = 0;
+ bits<1> cs2 = 0;
+ bits<4> m = 0;
+ bits<1> cy = 1;
+ bits<7> sy;
+ bits<1> cz = 0;
+ bits<7> sz = 0;
+ bits<8> vx = 0;
+ bits<8> vy = 0;
+ bits<8> vz = 0;
+ bits<8> vw = 0;
+ let op = opVal;
+ let Inst{55} = cx;
+ let Inst{54} = cx2;
+ let Inst{53} = cs;
+ let Inst{52} = cs2;
+ let Inst{51-48} = m;
+ let Inst{47} = cy;
+ let Inst{46-40} = sy;
+ let Inst{39} = cz;
+ let Inst{38-32} = sz;
+ let Inst{31-24} = vx;
+ let Inst{23-16} = vy;
+ let Inst{15-8} = vz;
+ let Inst{7-0} = vw;
+
+ let VE_Vector = 1;
+}
+
// Pseudo instructions.
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstVE<outs, ins, asmstr, pattern> {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
index 86b2ac2078b1..9770052ff913 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -92,38 +92,46 @@ static VECC::CondCode GetOppositeBranchCondition(VECC::CondCode CC) {
llvm_unreachable("Invalid cond code");
}
-// Treat br.l [BRCF AT] as unconditional branch
+// Treat a branch relative long always instruction as unconditional branch.
+// For example, br.l.t and br.l.
static bool isUncondBranchOpcode(int Opc) {
- return Opc == VE::BRCFLa || Opc == VE::BRCFWa ||
- Opc == VE::BRCFLa_nt || Opc == VE::BRCFWa_nt ||
- Opc == VE::BRCFLa_t || Opc == VE::BRCFWa_t ||
- Opc == VE::BRCFDa || Opc == VE::BRCFSa ||
- Opc == VE::BRCFDa_nt || Opc == VE::BRCFSa_nt ||
- Opc == VE::BRCFDa_t || Opc == VE::BRCFSa_t;
+ using namespace llvm::VE;
+
+#define BRKIND(NAME) (Opc == NAME##a || Opc == NAME##a_nt || Opc == NAME##a_t)
+ // VE has other branch relative always instructions for word/double/float,
+ // but we use only long branches in our lower. So, sanity check it here.
+ assert(!BRKIND(BRCFW) && !BRKIND(BRCFD) && !BRKIND(BRCFS) &&
+ "Branch relative word/double/float always instructions should not be "
+ "used!");
+ return BRKIND(BRCFL);
+#undef BRKIND
}
+// Treat branch relative conditional as conditional branch instructions.
+// For example, brgt.l.t and brle.s.nt.
static bool isCondBranchOpcode(int Opc) {
- return Opc == VE::BRCFLrr || Opc == VE::BRCFLir ||
- Opc == VE::BRCFLrr_nt || Opc == VE::BRCFLir_nt ||
- Opc == VE::BRCFLrr_t || Opc == VE::BRCFLir_t ||
- Opc == VE::BRCFWrr || Opc == VE::BRCFWir ||
- Opc == VE::BRCFWrr_nt || Opc == VE::BRCFWir_nt ||
- Opc == VE::BRCFWrr_t || Opc == VE::BRCFWir_t ||
- Opc == VE::BRCFDrr || Opc == VE::BRCFDir ||
- Opc == VE::BRCFDrr_nt || Opc == VE::BRCFDir_nt ||
- Opc == VE::BRCFDrr_t || Opc == VE::BRCFDir_t ||
- Opc == VE::BRCFSrr || Opc == VE::BRCFSir ||
- Opc == VE::BRCFSrr_nt || Opc == VE::BRCFSir_nt ||
- Opc == VE::BRCFSrr_t || Opc == VE::BRCFSir_t;
+ using namespace llvm::VE;
+
+#define BRKIND(NAME) \
+ (Opc == NAME##rr || Opc == NAME##rr_nt || Opc == NAME##rr_t || \
+ Opc == NAME##ir || Opc == NAME##ir_nt || Opc == NAME##ir_t)
+ return BRKIND(BRCFL) || BRKIND(BRCFW) || BRKIND(BRCFD) || BRKIND(BRCFS);
+#undef BRKIND
}
+// Treat branch long always instructions as indirect branch.
+// For example, b.l.t and b.l.
static bool isIndirectBranchOpcode(int Opc) {
- return Opc == VE::BCFLari || Opc == VE::BCFLari ||
- Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
- Opc == VE::BCFLari_t || Opc == VE::BCFLari_t ||
- Opc == VE::BCFLari || Opc == VE::BCFLari ||
- Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
- Opc == VE::BCFLari_t || Opc == VE::BCFLari_t;
+ using namespace llvm::VE;
+
+#define BRKIND(NAME) \
+ (Opc == NAME##ari || Opc == NAME##ari_nt || Opc == NAME##ari_t)
+ // VE has other branch always instructions for word/double/float, but
+ // we use only long branches in our lower. So, sanity check it here.
+ assert(!BRKIND(BCFW) && !BRKIND(BCFD) && !BRKIND(BCFS) &&
+ "Branch word/double/float always instructions should not be used!");
+ return BRKIND(BCFL);
+#undef BRKIND
}
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
@@ -311,11 +319,43 @@ bool VEInstrInfo::reverseBranchCondition(
}
static bool IsAliasOfSX(Register Reg) {
- return VE::I8RegClass.contains(Reg) || VE::I16RegClass.contains(Reg) ||
- VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) ||
+ return VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) ||
VE::F32RegClass.contains(Reg);
}
+static void copyPhysSubRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+ const MCInstrDesc &MCID, unsigned int NumSubRegs,
+ const unsigned *SubRegIdx,
+ const TargetRegisterInfo *TRI) {
+ MachineInstr *MovMI = nullptr;
+
+ for (unsigned Idx = 0; Idx != NumSubRegs; ++Idx) {
+ Register SubDest = TRI->getSubReg(DestReg, SubRegIdx[Idx]);
+ Register SubSrc = TRI->getSubReg(SrcReg, SubRegIdx[Idx]);
+ assert(SubDest && SubSrc && "Bad sub-register");
+
+ if (MCID.getOpcode() == VE::ORri) {
+ // generate "ORri, dest, src, 0" instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, DL, MCID, SubDest).addReg(SubSrc).addImm(0);
+ MovMI = MIB.getInstr();
+ } else if (MCID.getOpcode() == VE::ANDMmm) {
+ // generate "ANDM, dest, vm0, src" instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, DL, MCID, SubDest).addReg(VE::VM0).addReg(SubSrc);
+ MovMI = MIB.getInstr();
+ } else {
+ llvm_unreachable("Unexpected reg-to-reg copy instruction");
+ }
+ }
+ // Add implicit super-register defs and kills to the last MovMI.
+ MovMI->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ MovMI->addRegisterKilled(SrcReg, TRI, true);
+}
+
void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
MCRegister DestReg, MCRegister SrcReg,
@@ -325,6 +365,41 @@ void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(VE::ORri), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(0);
+ } else if (VE::V64RegClass.contains(DestReg, SrcReg)) {
+ // Generate following instructions
+ // %sw16 = LEA32zii 256
+ // VORmvl %dest, (0)1, %src, %sw16
+ // TODO: reuse a register if vl is already assigned to a register
+ // FIXME: it would be better to scavenge a register here instead of
+ // reserving SX16 all of the time.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ Register TmpReg = VE::SX16;
+ Register SubTmp = TRI->getSubReg(TmpReg, VE::sub_i32);
+ BuildMI(MBB, I, DL, get(VE::LEAzii), TmpReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(256);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(VE::VORmvl), DestReg)
+ .addImm(M1(0)) // Represent (0)1.
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SubTmp, getKillRegState(true));
+ MIB.getInstr()->addRegisterKilled(TmpReg, TRI, true);
+ } else if (VE::VMRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(VE::ANDMmm), DestReg)
+ .addReg(VE::VM0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (VE::VM512RegClass.contains(DestReg, SrcReg)) {
+ // Use two instructions.
+ const unsigned SubRegIdx[] = {VE::sub_vm_even, VE::sub_vm_odd};
+ unsigned int NumSubRegs = 2;
+ copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ANDMmm),
+ NumSubRegs, SubRegIdx, &getRegisterInfo());
+ } else if (VE::F128RegClass.contains(DestReg, SrcReg)) {
+ // Use two instructions.
+ const unsigned SubRegIdx[] = {VE::sub_even, VE::sub_odd};
+ unsigned int NumSubRegs = 2;
+ copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ORri),
+ NumSubRegs, SubRegIdx, &getRegisterInfo());
} else {
const TargetRegisterInfo *TRI = &getRegisterInfo();
dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI)
@@ -342,7 +417,8 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
if (MI.getOpcode() == VE::LDrii || // I64
MI.getOpcode() == VE::LDLSXrii || // I32
- MI.getOpcode() == VE::LDUrii // F32
+ MI.getOpcode() == VE::LDUrii || // F32
+ MI.getOpcode() == VE::LDQrii // F128 (pseudo)
) {
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() &&
@@ -363,7 +439,8 @@ unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
if (MI.getOpcode() == VE::STrii || // I64
MI.getOpcode() == VE::STLrii || // I32
- MI.getOpcode() == VE::STUrii // F32
+ MI.getOpcode() == VE::STUrii || // F32
+ MI.getOpcode() == VE::STQrii // F128 (pseudo)
) {
if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() &&
@@ -412,6 +489,13 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addImm(0)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO);
+ } else if (VE::F128RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(VE::STQrii))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO);
} else
report_fatal_error("Can't store this register to stack slot");
}
@@ -449,10 +533,194 @@ void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addImm(0)
.addImm(0)
.addMemOperand(MMO);
+ } else if (VE::F128RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(VE::LDQrii), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addMemOperand(MMO);
} else
report_fatal_error("Can't load this register from stack slot");
}
+bool VEInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ Register Reg, MachineRegisterInfo *MRI) const {
+ LLVM_DEBUG(dbgs() << "FoldImmediate\n");
+
+ LLVM_DEBUG(dbgs() << "checking DefMI\n");
+ int64_t ImmVal;
+ switch (DefMI.getOpcode()) {
+ default:
+ return false;
+ case VE::ORim:
+ // General move small immediate instruction on VE.
+ LLVM_DEBUG(dbgs() << "checking ORim\n");
+ LLVM_DEBUG(DefMI.dump());
+ // FIXME: We may need to support FPImm too.
+ assert(DefMI.getOperand(1).isImm());
+ assert(DefMI.getOperand(2).isImm());
+ ImmVal =
+ DefMI.getOperand(1).getImm() + mimm2Val(DefMI.getOperand(2).getImm());
+ LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n");
+ break;
+ case VE::LEAzii:
+ // General move immediate instruction on VE.
+ LLVM_DEBUG(dbgs() << "checking LEAzii\n");
+ LLVM_DEBUG(DefMI.dump());
+ // FIXME: We may need to support FPImm too.
+ assert(DefMI.getOperand(2).isImm());
+ if (!DefMI.getOperand(3).isImm())
+ // LEAzii may refer label
+ return false;
+ ImmVal = DefMI.getOperand(2).getImm() + DefMI.getOperand(3).getImm();
+ LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n");
+ break;
+ }
+
+ // Try to fold like below:
+ // %1:i64 = ORim 0, 0(1)
+ // %2:i64 = CMPSLrr %0, %1
+ // To
+ // %2:i64 = CMPSLrm %0, 0(1)
+ //
+ // Another example:
+ // %1:i64 = ORim 6, 0(1)
+ // %2:i64 = CMPSLrr %1, %0
+ // To
+ // %2:i64 = CMPSLir 6, %0
+ //
+ // Support commutable instructions like below:
+ // %1:i64 = ORim 6, 0(1)
+ // %2:i64 = ADDSLrr %1, %0
+ // To
+ // %2:i64 = ADDSLri %0, 6
+ //
+ // FIXME: Need to support i32. Current implementtation requires
+ // EXTRACT_SUBREG, so input has following COPY and it avoids folding:
+ // %1:i64 = ORim 6, 0(1)
+ // %2:i32 = COPY %1.sub_i32
+ // %3:i32 = ADDSWSXrr %0, %2
+ // FIXME: Need to support shift, cmov, and more instructions.
+ // FIXME: Need to support lvl too, but LVLGen runs after peephole-opt.
+
+ LLVM_DEBUG(dbgs() << "checking UseMI\n");
+ LLVM_DEBUG(UseMI.dump());
+ unsigned NewUseOpcSImm7;
+ unsigned NewUseOpcMImm;
+ enum InstType {
+ rr2ri_rm, // rr -> ri or rm, commutable
+ rr2ir_rm, // rr -> ir or rm
+ } InstType;
+
+ using namespace llvm::VE;
+#define INSTRKIND(NAME) \
+ case NAME##rr: \
+ NewUseOpcSImm7 = NAME##ri; \
+ NewUseOpcMImm = NAME##rm; \
+ InstType = rr2ri_rm; \
+ break
+#define NCINSTRKIND(NAME) \
+ case NAME##rr: \
+ NewUseOpcSImm7 = NAME##ir; \
+ NewUseOpcMImm = NAME##rm; \
+ InstType = rr2ir_rm; \
+ break
+
+ switch (UseMI.getOpcode()) {
+ default:
+ return false;
+
+ INSTRKIND(ADDUL);
+ INSTRKIND(ADDSWSX);
+ INSTRKIND(ADDSWZX);
+ INSTRKIND(ADDSL);
+ NCINSTRKIND(SUBUL);
+ NCINSTRKIND(SUBSWSX);
+ NCINSTRKIND(SUBSWZX);
+ NCINSTRKIND(SUBSL);
+ INSTRKIND(MULUL);
+ INSTRKIND(MULSWSX);
+ INSTRKIND(MULSWZX);
+ INSTRKIND(MULSL);
+ NCINSTRKIND(DIVUL);
+ NCINSTRKIND(DIVSWSX);
+ NCINSTRKIND(DIVSWZX);
+ NCINSTRKIND(DIVSL);
+ NCINSTRKIND(CMPUL);
+ NCINSTRKIND(CMPSWSX);
+ NCINSTRKIND(CMPSWZX);
+ NCINSTRKIND(CMPSL);
+ INSTRKIND(MAXSWSX);
+ INSTRKIND(MAXSWZX);
+ INSTRKIND(MAXSL);
+ INSTRKIND(MINSWSX);
+ INSTRKIND(MINSWZX);
+ INSTRKIND(MINSL);
+ INSTRKIND(AND);
+ INSTRKIND(OR);
+ INSTRKIND(XOR);
+ INSTRKIND(EQV);
+ NCINSTRKIND(NND);
+ NCINSTRKIND(MRG);
+ }
+
+#undef INSTRKIND
+
+ unsigned NewUseOpc;
+ unsigned UseIdx;
+ bool Commute = false;
+ LLVM_DEBUG(dbgs() << "checking UseMI operands\n");
+ switch (InstType) {
+ case rr2ri_rm:
+ UseIdx = 2;
+ if (UseMI.getOperand(1).getReg() == Reg) {
+ Commute = true;
+ } else {
+ assert(UseMI.getOperand(2).getReg() == Reg);
+ }
+ if (isInt<7>(ImmVal)) {
+ // This ImmVal matches to SImm7 slot, so change UseOpc to an instruction
+ // holds a simm7 slot.
+ NewUseOpc = NewUseOpcSImm7;
+ } else if (isMImmVal(ImmVal)) {
+ // Similarly, change UseOpc to an instruction holds a mimm slot.
+ NewUseOpc = NewUseOpcMImm;
+ ImmVal = val2MImm(ImmVal);
+ } else
+ return false;
+ break;
+ case rr2ir_rm:
+ if (UseMI.getOperand(1).getReg() == Reg) {
+ // Check immediate value whether it matchs to the UseMI instruction.
+ if (!isInt<7>(ImmVal))
+ return false;
+ NewUseOpc = NewUseOpcSImm7;
+ UseIdx = 1;
+ } else {
+ assert(UseMI.getOperand(2).getReg() == Reg);
+ // Check immediate value whether it matchs to the UseMI instruction.
+ if (!isMImmVal(ImmVal))
+ return false;
+ NewUseOpc = NewUseOpcMImm;
+ ImmVal = val2MImm(ImmVal);
+ UseIdx = 2;
+ }
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << "modifying UseMI\n");
+ bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ UseMI.setDesc(get(NewUseOpc));
+ if (Commute) {
+ UseMI.getOperand(1).setReg(UseMI.getOperand(UseIdx).getReg());
+ }
+ UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
+ if (DeleteDef)
+ DefMI.eraseFromParent();
+
+ return true;
+}
+
Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
VEMachineFunctionInfo *VEFI = MF->getInfo<VEMachineFunctionInfo>();
Register GlobalBaseReg = VEFI->getGlobalBaseReg();
@@ -472,6 +740,106 @@ Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
return GlobalBaseReg;
}
+static Register getVM512Upper(Register reg) {
+ return (reg - VE::VMP0) * 2 + VE::VM0;
+}
+
+static Register getVM512Lower(Register reg) { return getVM512Upper(reg) + 1; }
+
+// Expand pseudo logical vector instructions for VM512 registers.
+static void expandPseudoLogM(MachineInstr &MI, const MCInstrDesc &MCID) {
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register VMXu = getVM512Upper(MI.getOperand(0).getReg());
+ Register VMXl = getVM512Lower(MI.getOperand(0).getReg());
+ Register VMYu = getVM512Upper(MI.getOperand(1).getReg());
+ Register VMYl = getVM512Lower(MI.getOperand(1).getReg());
+
+ switch (MI.getOpcode()) {
+ default: {
+ Register VMZu = getVM512Upper(MI.getOperand(2).getReg());
+ Register VMZl = getVM512Lower(MI.getOperand(2).getReg());
+ BuildMI(*MBB, MI, DL, MCID).addDef(VMXu).addUse(VMYu).addUse(VMZu);
+ BuildMI(*MBB, MI, DL, MCID).addDef(VMXl).addUse(VMYl).addUse(VMZl);
+ break;
+ }
+ case VE::NEGMy:
+ BuildMI(*MBB, MI, DL, MCID).addDef(VMXu).addUse(VMYu);
+ BuildMI(*MBB, MI, DL, MCID).addDef(VMXl).addUse(VMYl);
+ break;
+ }
+ MI.eraseFromParent();
+}
+
+static void addOperandsForVFMK(MachineInstrBuilder &MIB, MachineInstr &MI,
+ bool Upper) {
+ // VM512
+ MIB.addReg(Upper ? getVM512Upper(MI.getOperand(0).getReg())
+ : getVM512Lower(MI.getOperand(0).getReg()));
+
+ switch (MI.getNumExplicitOperands()) {
+ default:
+ report_fatal_error("unexpected number of operands for pvfmk");
+ case 2: // _Ml: VM512, VL
+ // VL
+ MIB.addReg(MI.getOperand(1).getReg());
+ break;
+ case 4: // _Mvl: VM512, CC, VR, VL
+ // CC
+ MIB.addImm(MI.getOperand(1).getImm());
+ // VR
+ MIB.addReg(MI.getOperand(2).getReg());
+ // VL
+ MIB.addReg(MI.getOperand(3).getReg());
+ break;
+ case 5: // _MvMl: VM512, CC, VR, VM512, VL
+ // CC
+ MIB.addImm(MI.getOperand(1).getImm());
+ // VR
+ MIB.addReg(MI.getOperand(2).getReg());
+ // VM512
+ MIB.addReg(Upper ? getVM512Upper(MI.getOperand(3).getReg())
+ : getVM512Lower(MI.getOperand(3).getReg()));
+ // VL
+ MIB.addReg(MI.getOperand(4).getReg());
+ break;
+ }
+}
+
+static void expandPseudoVFMK(const TargetInstrInfo &TI, MachineInstr &MI) {
+ // replace to pvfmk.w.up and pvfmk.w.lo
+ // replace to pvfmk.s.up and pvfmk.s.lo
+
+ static std::map<unsigned, std::pair<unsigned, unsigned>> VFMKMap = {
+ {VE::VFMKyal, {VE::VFMKLal, VE::VFMKLal}},
+ {VE::VFMKynal, {VE::VFMKLnal, VE::VFMKLnal}},
+ {VE::VFMKWyvl, {VE::PVFMKWUPvl, VE::PVFMKWLOvl}},
+ {VE::VFMKWyvyl, {VE::PVFMKWUPvml, VE::PVFMKWLOvml}},
+ {VE::VFMKSyvl, {VE::PVFMKSUPvl, VE::PVFMKSLOvl}},
+ {VE::VFMKSyvyl, {VE::PVFMKSUPvml, VE::PVFMKSLOvml}},
+ };
+
+ unsigned Opcode = MI.getOpcode();
+
+ auto Found = VFMKMap.find(Opcode);
+ if (Found == VFMKMap.end())
+ report_fatal_error("unexpected opcode for pseudo vfmk");
+
+ unsigned OpcodeUpper = (*Found).second.first;
+ unsigned OpcodeLower = (*Found).second.second;
+
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineInstrBuilder Bu = BuildMI(*MBB, MI, DL, TI.get(OpcodeUpper));
+ addOperandsForVFMK(Bu, MI, /* Upper */ true);
+ MachineInstrBuilder Bl = BuildMI(*MBB, MI, DL, TI.get(OpcodeLower));
+ addOperandsForVFMK(Bl, MI, /* Upper */ false);
+
+ MI.eraseFromParent();
+}
+
bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
switch (MI.getOpcode()) {
case VE::EXTEND_STACK: {
@@ -484,6 +852,110 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case VE::GETSTACKTOP: {
return expandGetStackTopPseudo(MI);
}
+
+ case VE::ANDMyy:
+ expandPseudoLogM(MI, get(VE::ANDMmm));
+ return true;
+ case VE::ORMyy:
+ expandPseudoLogM(MI, get(VE::ORMmm));
+ return true;
+ case VE::XORMyy:
+ expandPseudoLogM(MI, get(VE::XORMmm));
+ return true;
+ case VE::EQVMyy:
+ expandPseudoLogM(MI, get(VE::EQVMmm));
+ return true;
+ case VE::NNDMyy:
+ expandPseudoLogM(MI, get(VE::NNDMmm));
+ return true;
+ case VE::NEGMy:
+ expandPseudoLogM(MI, get(VE::NEGMm));
+ return true;
+
+ case VE::LVMyir:
+ case VE::LVMyim:
+ case VE::LVMyir_y:
+ case VE::LVMyim_y: {
+ Register VMXu = getVM512Upper(MI.getOperand(0).getReg());
+ Register VMXl = getVM512Lower(MI.getOperand(0).getReg());
+ int64_t Imm = MI.getOperand(1).getImm();
+ bool IsSrcReg =
+ MI.getOpcode() == VE::LVMyir || MI.getOpcode() == VE::LVMyir_y;
+ Register Src = IsSrcReg ? MI.getOperand(2).getReg() : VE::NoRegister;
+ int64_t MImm = IsSrcReg ? 0 : MI.getOperand(2).getImm();
+ bool KillSrc = IsSrcReg ? MI.getOperand(2).isKill() : false;
+ Register VMX = VMXl;
+ if (Imm >= 4) {
+ VMX = VMXu;
+ Imm -= 4;
+ }
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ switch (MI.getOpcode()) {
+ case VE::LVMyir:
+ BuildMI(*MBB, MI, DL, get(VE::LVMir))
+ .addDef(VMX)
+ .addImm(Imm)
+ .addReg(Src, getKillRegState(KillSrc));
+ break;
+ case VE::LVMyim:
+ BuildMI(*MBB, MI, DL, get(VE::LVMim))
+ .addDef(VMX)
+ .addImm(Imm)
+ .addImm(MImm);
+ break;
+ case VE::LVMyir_y:
+ assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() &&
+ "LVMyir_y has different register in 3rd operand");
+ BuildMI(*MBB, MI, DL, get(VE::LVMir_m))
+ .addDef(VMX)
+ .addImm(Imm)
+ .addReg(Src, getKillRegState(KillSrc))
+ .addReg(VMX);
+ break;
+ case VE::LVMyim_y:
+ assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() &&
+ "LVMyim_y has different register in 3rd operand");
+ BuildMI(*MBB, MI, DL, get(VE::LVMim_m))
+ .addDef(VMX)
+ .addImm(Imm)
+ .addImm(MImm)
+ .addReg(VMX);
+ break;
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+ case VE::SVMyi: {
+ Register Dest = MI.getOperand(0).getReg();
+ Register VMZu = getVM512Upper(MI.getOperand(1).getReg());
+ Register VMZl = getVM512Lower(MI.getOperand(1).getReg());
+ bool KillSrc = MI.getOperand(1).isKill();
+ int64_t Imm = MI.getOperand(2).getImm();
+ Register VMZ = VMZl;
+ if (Imm >= 4) {
+ VMZ = VMZu;
+ Imm -= 4;
+ }
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, DL, get(VE::SVMmi), Dest).addReg(VMZ).addImm(Imm);
+ MachineInstr *Inst = MIB.getInstr();
+ MI.eraseFromParent();
+ if (KillSrc) {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ Inst->addRegisterKilled(MI.getOperand(1).getReg(), TRI, true);
+ }
+ return true;
+ }
+ case VE::VFMKyal:
+ case VE::VFMKynal:
+ case VE::VFMKWyvl:
+ case VE::VFMKWyvyl:
+ case VE::VFMKSyvl:
+ case VE::VFMKSyvyl:
+ expandPseudoVFMK(*this, MI);
}
return false;
}
@@ -586,8 +1058,8 @@ bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const VEFrameLowering &TFL = *STI.getFrameLowering();
- // The VE ABI requires a reserved 176 bytes area at the top
- // of stack as described in VESubtarget.cpp. So, we adjust it here.
+ // The VE ABI requires a reserved area at the top of stack as described
+ // in VEFrameLowering.cpp. So, we adjust it here.
unsigned NumBytes = STI.getAdjustedFrameSize(0);
// Also adds the size of parameter area.
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
index 7b6662df1d60..ed1f49182150 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
@@ -23,6 +23,31 @@ namespace llvm {
class VESubtarget;
+/// VEII - This namespace holds all of the Aurora VE target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// VEInstrFormats.td.
+namespace VEII {
+enum {
+ // Aurora VE Instruction Flags. These flags describe the characteristics of
+ // the Aurora VE instructions for vector handling.
+
+ /// VE_Vector - This instruction is Vector Instruction.
+ VE_Vector = 0x1,
+
+ /// VE_VLInUse - This instruction has a vector register in its operands.
+ VE_VLInUse = 0x2,
+
+ /// VE_VLMask/Shift - This is a bitmask that selects the index number where
+ /// an instruction holds vector length informatio (0 to 6, 7 means undef).n
+ VE_VLShift = 2,
+ VE_VLMask = 0x07 << VE_VLShift,
+};
+
+#define HAS_VLINDEX(TSF) ((TSF)&VEII::VE_VLInUse)
+#define GET_VLINDEX(TSF) \
+ (HAS_VLINDEX(TSF) ? (int)(((TSF)&VEII::VE_VLMask) >> VEII::VE_VLShift) : -1)
+} // end namespace VEII
+
class VEInstrInfo : public VEGenInstrInfo {
const VERegisterInfo RI;
virtual void anchor();
@@ -75,6 +100,13 @@ public:
const TargetRegisterInfo *TRI) const override;
/// } Stack Spill & Reload
+ /// Optimization {
+
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
+ MachineRegisterInfo *MRI) const override;
+
+ /// } Optimization
+
Register getGlobalBaseReg(MachineFunction *MF) const;
// Lower pseudo instructions after register allocation.
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
index 8500f8ef1292..b6862cf7b30d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
@@ -48,7 +48,7 @@ def LO7 : SDNodeXForm<imm, [{
SDLoc(N), MVT::i32);
}]>;
def MIMM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(convMImmVal(getImmVal(N)),
+ return CurDAG->getTargetConstant(val2MImm(getImmVal(N)),
SDLoc(N), MVT::i32);
}]>;
def LO32 : SDNodeXForm<imm, [{
@@ -66,7 +66,7 @@ def LO7FP : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(SignExtend32(Val, 7), SDLoc(N), MVT::i32);
}]>;
def MIMMFP : SDNodeXForm<fpimm, [{
- return CurDAG->getTargetConstant(convMImmVal(getFpImmVal(N)),
+ return CurDAG->getTargetConstant(val2MImm(getFpImmVal(N)),
SDLoc(N), MVT::i32);
}]>;
def LOFP32 : SDNodeXForm<fpimm, [{
@@ -157,6 +157,15 @@ def uimm3 : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = UImm3AsmOperand;
}
+// uimm4 - Generic immediate value.
+def UImm4AsmOperand : AsmOperandClass {
+ let Name = "UImm4";
+}
+def uimm4 : Operand<i32>, PatLeaf<(imm), [{
+ return isUInt<4>(N->getZExtValue()); }], ULO7> {
+ let ParserMatchClass = UImm4AsmOperand;
+}
+
// uimm6 - Generic immediate value.
def UImm6AsmOperand : AsmOperandClass {
let Name = "UImm6";
@@ -196,6 +205,12 @@ def mimm : Operand<i32>, PatLeaf<(imm), [{
let PrintMethod = "printMImmOperand";
}
+// zerofp - Generic fp immediate zero value.
+def zerofp : Operand<i32>, PatLeaf<(fpimm), [{
+ return getFpImmVal(N) == 0; }]> {
+ let ParserMatchClass = ZeroAsmOperand;
+}
+
// simm7fp - Generic fp immediate value.
def simm7fp : Operand<i32>, PatLeaf<(fpimm), [{
return isInt<7>(getFpImmVal(N));
@@ -230,6 +245,7 @@ def fplomsbzero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0x80000000)
== 0; }]>;
def fplozero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff)
== 0; }]>;
+def nonzero : PatLeaf<(imm), [{ return N->getSExtValue() !=0 ; }]>;
def CCSIOp : PatLeaf<(cond), [{
switch (N->get()) {
@@ -430,6 +446,17 @@ def retflag : SDNode<"VEISD::RET_FLAG", SDTNone,
def getGOT : Operand<iPTR>;
+def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def VEeh_sjlj_longjmp: SDNode<"VEISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def VEeh_sjlj_setup_dispatch: SDNode<"VEISD::EH_SJLJ_SETUP_DISPATCH",
+ SDTypeProfile<0, 0, []>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
// GETFUNPLT for PIC
def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>;
@@ -442,6 +469,16 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
+// MEMBARRIER
+def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+// TS1AM
+def SDT_TS1AM : SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>, SDTCisInt<3>]>;
+def ts1am : SDNode<"VEISD::TS1AM", SDT_TS1AM,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// VE Flag Conditions
@@ -497,7 +534,8 @@ multiclass RRbm<string opcStr, bits<8>opc,
RegisterClass RCo, ValueType Tyo,
RegisterClass RCi, ValueType Tyi,
SDPatternOperator OpNode = null_frag,
- Operand immOp = simm7, Operand mOp = mimm> {
+ Operand immOp = simm7, Operand mOp = mimm,
+ bit MoveImm = 0> {
def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
!strconcat(opcStr, " $sx, $sy, $sz"),
[(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>;
@@ -514,7 +552,12 @@ multiclass RRbm<string opcStr, bits<8>opc,
let cy = 0, cz = 0 in
def im : RR<opc, (outs RCo:$sx), (ins immOp:$sy, mOp:$sz),
!strconcat(opcStr, " $sx, $sy, $sz"),
- [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]>;
+ [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]> {
+ // VE uses ORim as a move immediate instruction, so declare it here.
+ // An instruction declared as MoveImm will be optimized in FoldImmediate
+ // later.
+ let isMoveImm = MoveImm;
+ }
}
// Multiclass for non-commutative RR type instructions
@@ -546,8 +589,8 @@ multiclass RRNCbm<string opcStr, bits<8>opc,
multiclass RRm<string opcStr, bits<8>opc,
RegisterClass RC, ValueType Ty,
SDPatternOperator OpNode = null_frag,
- Operand immOp = simm7, Operand mOp = mimm> :
- RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
+ Operand immOp = simm7, Operand mOp = mimm, bit MoveImm = 0> :
+ RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp, MoveImm>;
// Generic RR multiclass for non-commutative instructions with 2 arguments.
// e.g. SUBUL, SUBUW, SUBSWSX, and etc.
@@ -775,10 +818,10 @@ multiclass BCbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond,
let bpf = 0 /* NONE */ in
def "" : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
!strconcat(opcStr, " ", cmpStr, "$addr")>;
- let bpf = 2 /* NOT TaKEN */ in
+ let bpf = 2 /* NOT TAKEN */ in
def _nt : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
!strconcat(opcStr, ".nt ", cmpStr, "$addr")>;
- let bpf = 3 /* TaKEN */ in
+ let bpf = 3 /* TAKEN */ in
def _t : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
!strconcat(opcStr, ".t ", cmpStr, "$addr")>;
}
@@ -807,18 +850,25 @@ multiclass BCRbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
let bpf = 0 /* NONE */ in
def "" : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
!strconcat(opcStr, " ", cmpStr, "$imm32")>;
- let bpf = 2 /* NOT TaKEN */ in
+ let bpf = 2 /* NOT TAKEN */ in
def _nt : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
!strconcat(opcStr, ".nt ", cmpStr, "$imm32")>;
- let bpf = 3 /* TaKEN */ in
+ let bpf = 3 /* TAKEN */ in
def _t : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
!strconcat(opcStr, ".t ", cmpStr, "$imm32")>;
}
multiclass BCRm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
- RegisterClass RC, Operand immOp> {
+ RegisterClass RC, Operand immOp, Operand zeroOp> {
defm rr : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy, RC:$sz)>;
let cy = 0 in
- defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy, RC:$sz)>;
+ defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy,
+ RC:$sz)>;
+ let cz = 0 in
+ defm rz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy,
+ zeroOp:$sz)>;
+ let cy = 0, cz = 0 in
+ defm iz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy,
+ zeroOp:$sz)>;
let cy = 0, sy = 0, cz = 0, sz = 0, cf = 15 /* AT */, isBarrier = 1 in
defm a : BCRbpfm<opcStrAt, "", opc, (ins)>;
let cy = 0, sy = 0, cz = 0, sz = 0, cf = 0 /* AF */ in
@@ -898,7 +948,7 @@ multiclass SHMm<string opcStr, bits<8> opc, RegisterClass RC> {
//-----------------------------------------------------------------------------
// Multiclass for generic RM instructions
-multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
+multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC, bit MoveImm = 0> {
def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
!strconcat(opcStr, " $dest, $addr"), []>;
let cy = 0 in
@@ -909,36 +959,27 @@ multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
!strconcat(opcStr, " $dest, $addr"), []>;
let cy = 0, cz = 0 in
def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
- !strconcat(opcStr, " $dest, $addr"), []>;
+ !strconcat(opcStr, " $dest, $addr"), []> {
+ // VE uses LEAzii and LEASLzii as a move immediate instruction, so declare
+ // it here. An instruction declared as MoveImm will be optimized in
+ // FoldImmediate later.
+ let isMoveImm = MoveImm;
+ }
}
// Section 8.2.1 - LEA
-let cx = 0, DecoderMethod = "DecodeLoadI64" in
-defm LEA : RMm<"lea", 0x06, I64>;
-let cx = 1, DecoderMethod = "DecodeLoadI64" in
-defm LEASL : RMm<"lea.sl", 0x06, I64>;
-let cx = 0, DecoderMethod = "DecodeLoadI32", isCodeGenOnly = 1 in
-defm LEA32 : RMm<"lea", 0x06, I32>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+ DecoderMethod = "DecodeLoadI64" in {
+ let cx = 0 in defm LEA : RMm<"lea", 0x06, I64, /* MoveImm */ 1>;
+ let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64, /* MoveImm */ 1>;
+}
+// LEA basic patterns.
+// Need to be defined here to prioritize LEA over ADX.
def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>;
def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>;
def : Pat<(add I64:$base, simm32:$disp), (LEArii $base, 0, (LO32 $disp))>;
def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>;
-def : Pat<(add I32:$base, simm32:$disp),
- (LEA32rii (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $base, sub_i32), 0,
- (LO32 $disp))>;
-
-def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
- [(add (add node:$base, node:$idx), node:$disp),
- (add (add node:$base, node:$disp), node:$idx)]>;
-def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
- (LEArii $base, (LO7 $idx), (LO32 $disp))>;
-def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
- (LEArri $base, $idx, (LO32 $disp))>;
-def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
- (LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
-def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
- (LEASLrri $base, $idx, (HI32 $disp))>;
// Multiclass for load instructions.
let mayLoad = 1, hasSideEffects = 0 in
@@ -991,6 +1032,13 @@ defm LD1BSX : LOADm<"ld1b.sx", 0x05, I32, i32, sextloadi8>;
let cx = 1, DecoderMethod = "DecodeLoadI32" in
defm LD1BZX : LOADm<"ld1b.zx", 0x05, I32, i32, zextloadi8>;
+// LDQ pseudo instructions
+let mayLoad = 1, hasSideEffects = 0 in {
+ def LDQrii : Pseudo<(outs F128:$dest), (ins MEMrii:$addr),
+ "# pseudo ldq $dest, $addr",
+ [(set f128:$dest, (load ADDRrii:$addr))]>;
+}
+
// Multiclass for store instructions.
let mayStore = 1 in
multiclass STOREm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
@@ -1036,6 +1084,13 @@ defm ST2B : STOREm<"st2b", 0x14, I32, i32, truncstorei16>;
let DecoderMethod = "DecodeStoreI32" in
defm ST1B : STOREm<"st1b", 0x15, I32, i32, truncstorei8>;
+// STQ pseudo instructions
+let mayStore = 1, hasSideEffects = 0 in {
+ def STQrii : Pseudo<(outs), (ins MEMrii:$addr, F128:$sx),
+ "# pseudo stq $sx, $addr",
+ [(store f128:$sx, ADDRrii:$addr)]>;
+}
+
// Section 8.2.12 - DLDS
let DecoderMethod = "DecodeLoadI64" in
defm DLD : LOADm<"dld", 0x09, I64, i64, load>;
@@ -1074,9 +1129,9 @@ defm ATMAM : RRCASm<"atmam", 0x53, I64, i64, uimm0to2>;
// Section 8.2.20 - CAS (Compare and Swap)
let DecoderMethod = "DecodeCASI64" in
-defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7>;
+defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7, atomic_cmp_swap_64>;
let DecoderMethod = "DecodeCASI32", cx = 1 in
-defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7>;
+defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7, atomic_cmp_swap_32>;
//-----------------------------------------------------------------------------
// Section 8.3 - Transfer Control Instructions
@@ -1106,6 +1161,8 @@ def SVOB : RR<0x30, (outs), (ins), "svob">;
// Section 8.4 - Fixed-point Operation Instructions
//-----------------------------------------------------------------------------
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
// Section 8.4.1 - ADD (Add)
defm ADDUL : RRm<"addu.l", 0x48, I64, i64>;
let cx = 1 in defm ADDUW : RRm<"addu.w", 0x48, I32, i32>;
@@ -1128,6 +1185,8 @@ let cx = 1 in defm SUBSWZX : RRNCm<"subs.w.zx", 0x5A, I32, i32>;
// Section 8.4.6 - SBX (Subtract)
defm SUBSL : RRNCm<"subs.l", 0x5B, I64, i64, sub>;
+} // isReMaterializable, isAsCheapAsAMove
+
// Section 8.4.7 - MPY (Multiply)
defm MULUL : RRm<"mulu.l", 0x49, I64, i64>;
let cx = 1 in defm MULUW : RRm<"mulu.w", 0x49, I32, i32>;
@@ -1153,6 +1212,8 @@ let cx = 1 in defm DIVSWZX : RRNCm<"divs.w.zx", 0x7B, I32, i32>;
// Section 8.4.13 - DVX (Divide)
defm DIVSL : RRNCm<"divs.l", 0x7F, I64, i64, sdiv>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
// Section 8.4.14 - CMP (Compare)
defm CMPUL : RRNCm<"cmpu.l", 0x55, I64, i64>;
let cx = 1 in defm CMPUW : RRNCm<"cmpu.w", 0x55, I32, i32>;
@@ -1175,45 +1236,66 @@ let cx = 1, cw = 1 in defm MINSWZX : RRm<"mins.w.zx", 0x78, I32, i32>;
defm MAXSL : RRm<"maxs.l", 0x68, I64, i64>;
let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64>;
+} // isReMaterializable, isAsCheapAsAMove
+
//-----------------------------------------------------------------------------
// Section 8.5 - Logical Operation Instructions
//-----------------------------------------------------------------------------
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
// Section 8.5.1 - AND (AND)
defm AND : RRm<"and", 0x44, I64, i64, and>;
-let isCodeGenOnly = 1 in defm AND32 : RRm<"and", 0x44, I32, i32, and>;
// Section 8.5.2 - OR (OR)
-defm OR : RRm<"or", 0x45, I64, i64, or>;
-let isCodeGenOnly = 1 in defm OR32 : RRm<"or", 0x45, I32, i32, or>;
+defm OR : RRm<"or", 0x45, I64, i64, or, simm7, mimm, /* MoveImm */ 1>;
// Section 8.5.3 - XOR (Exclusive OR)
defm XOR : RRm<"xor", 0x46, I64, i64, xor>;
-let isCodeGenOnly = 1 in defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>;
// Section 8.5.4 - EQV (Equivalence)
defm EQV : RRm<"eqv", 0x47, I64, i64>;
+} // isReMaterializable, isAsCheapAsAMove
+
// Section 8.5.5 - NND (Negate AND)
def and_not : PatFrags<(ops node:$x, node:$y),
[(and (not node:$x), node:$y)]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
// Section 8.5.6 - MRG (Merge)
defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
// Section 8.5.7 - LDZ (Leading Zero Count)
-defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz>;
+def ctlz_pat : PatFrags<(ops node:$src),
+ [(ctlz node:$src),
+ (ctlz_zero_undef node:$src)]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz_pat>;
// Section 8.5.8 - PCNT (Population Count)
defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>;
// Section 8.5.9 - BRV (Bit Reverse)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm BRV : RRI1m<"brv", 0x39, I64, i64, bitreverse>;
// Section 8.5.10 - BSWP (Byte Swap)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>;
+def : Pat<(i64 (bswap i64:$src)),
+ (BSWPri $src, 0)>;
+def : Pat<(i64 (bswap (i64 mimm:$src))),
+ (BSWPmi (MIMM $src), 0)>;
+def : Pat<(i32 (bswap i32:$src)),
+ (EXTRACT_SUBREG
+ (BSWPri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub_i32), 1),
+ sub_i32)>;
+def : Pat<(i32 (bswap (i32 mimm:$src))),
+ (EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>;
+
// Section 8.5.11 - CMOV (Conditional Move)
let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>;
let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32>;
@@ -1229,17 +1311,21 @@ def : MnemonicAlias<"cmov.s", "cmov.s.at">;
//-----------------------------------------------------------------------------
// Section 8.6.1 - SLL (Shift Left Logical)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm SLL : RRIm<"sll", 0x65, I64, i64, shl>;
// Section 8.6.2 - SLD (Shift Left Double)
defm SLD : RRILDm<"sld", 0x64, I64, i64>;
// Section 8.6.3 - SRL (Shift Right Logical)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm SRL : RRIm<"srl", 0x75, I64, i64, srl>;
// Section 8.6.4 - SRD (Shift Right Double)
defm SRD : RRIRDm<"srd", 0x74, I64, i64>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
// Section 8.6.5 - SLA (Shift Left Arithmetic)
defm SLAWSX : RRIm<"sla.w.sx", 0x66, I32, i32, shl>;
let cx = 1 in defm SLAWZX : RRIm<"sla.w.zx", 0x66, I32, i32>;
@@ -1254,6 +1340,8 @@ let cx = 1 in defm SRAWZX : RRIm<"sra.w.zx", 0x76, I32, i32>;
// Section 8.6.8 - SRAX (Shift Right Arithmetic)
defm SRAL : RRIm<"sra.l", 0x77, I64, i64, sra>;
+} // isReMaterializable, isAsCheapAsAMove
+
def : Pat<(i32 (srl i32:$src, (i32 simm7:$val))),
(EXTRACT_SUBREG (SRLri (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
$src, sub_i32), !add(32, 64)), imm:$val), sub_i32)>;
@@ -1302,13 +1390,13 @@ let cw = 1, cx = 1 in
defm FMINS : RRFm<"fmin.s", 0x3E, F32, f32, fminnum, simm7fp, mimmfp32>;
// Section 8.7.7 - FAQ (Floating Add Quadruple)
-defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128>;
+defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128, fadd>;
// Section 8.7.8 - FSQ (Floating Subtract Quadruple)
-defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128>;
+defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128, fsub>;
// Section 8.7.9 - FMQ (Floating Subtract Quadruple)
-defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128>;
+defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128, fmul>;
// Section 8.7.10 - FCQ (Floating Compare Quadruple)
defm FCMPQ : RRNCbm<"fcmp.q", 0x7D, I64, f64, F128, f128, null_frag, simm7fp,
@@ -1339,17 +1427,17 @@ defm CVTDL : CVTm<"cvt.d.l", 0x5F, I64, f64, I64, i64, sint_to_fp>;
// Section 8.7.15 - CVS (Convert to Single-format)
defm CVTSD : CVTm<"cvt.s.d", 0x1F, F32, f32, I64, f64, fpround>;
let cx = 1 in
-defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128>;
+defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128, fpround>;
// Section 8.7.16 - CVD (Convert to Double-format)
defm CVTDS : CVTm<"cvt.d.s", 0x0F, I64, f64, F32, f32, fpextend>;
let cx = 1 in
-defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128>;
+defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128, fpround>;
// Section 8.7.17 - CVQ (Convert to Single-format)
-defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64>;
+defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64, fpextend>;
let cx = 1 in
-defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32>;
+defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32, fpextend>;
//-----------------------------------------------------------------------------
// Section 8.8 - Branch instructions
@@ -1378,13 +1466,13 @@ defm BCFS : BCm<"b${cond}.s", "b.s", "baf.s", 0x1C, F32, simm7fp>;
// Section 8.8.4 - BCR (Branch on Condition Relative)
let cx = 0, cx2 = 0 in
-defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7>;
+defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7, zero>;
let cx = 1, cx2 = 0 in
-defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7>;
+defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7, zero>;
let cx = 0, cx2 = 1 in
-defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp>;
+defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp, zerofp>;
let cx = 1, cx2 = 1 in
-defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp>;
+defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp, zerofp>;
// Section 8.8.5 - BSIC (Branch and Save IC)
let isCall = 1, hasSideEffects = 0, DecoderMethod = "DecodeCall" in
@@ -1481,11 +1569,23 @@ defm SHMB : SHMm<"shm.b", 0x31, I64>;
// Pattern Matchings
//===----------------------------------------------------------------------===//
+// Basic cast between registers. This is often used in ISel patterns, so make
+// them as OutPatFrag.
+def i2l : OutPatFrag<(ops node:$exp),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_i32)>;
+def l2i : OutPatFrag<(ops node:$exp),
+ (EXTRACT_SUBREG $exp, sub_i32)>;
+def f2l : OutPatFrag<(ops node:$exp),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_f32)>;
+def l2f : OutPatFrag<(ops node:$exp),
+ (EXTRACT_SUBREG $exp, sub_f32)>;
+
// Small immediates.
-def : Pat<(i32 simm7:$val), (OR32im (LO7 $val), 0)>;
+def : Pat<(i32 simm7:$val), (EXTRACT_SUBREG (ORim (LO7 $val), 0), sub_i32)>;
def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
// Medium immediates.
-def : Pat<(i32 simm32:$val), (LEA32zii 0, 0, (LO32 $val))>;
+def : Pat<(i32 simm32:$val),
+ (EXTRACT_SUBREG (LEAzii 0, 0, (LO32 $val)), sub_i32)>;
def : Pat<(i64 simm32:$val), (LEAzii 0, 0, (LO32 $val))>;
def : Pat<(i64 uimm32:$val), (ANDrm (LEAzii 0, 0, (LO32 $val)), !add(32, 64))>;
// Arbitrary immediates.
@@ -1497,6 +1597,54 @@ def : Pat<(i64 imm:$val),
(LEASLrii (ANDrm (LEAzii 0, 0, (LO32 imm:$val)), !add(32, 64)), 0,
(HI32 imm:$val))>;
+// LEA patterns
+def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
+ [(add (add node:$base, node:$idx), node:$disp),
+ (add (add node:$base, node:$disp), node:$idx),
+ (add node:$base, (add $idx, $disp))]>;
+def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
+ (LEArii $base, (LO7 $idx), (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
+ (LEArri $base, $idx, (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
+ (LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
+ (LEASLrri $base, $idx, (HI32 $disp))>;
+
+// Address calculation patterns and optimizations
+//
+// Generate following instructions:
+// 1. LEA %reg, label@LO32
+// AND %reg, %reg, (32)0
+// 2. LEASL %reg, label@HI32
+// 3. (LEA %reg, label@LO32)
+// (AND %reg, %reg, (32)0)
+// LEASL %reg, label@HI32(, %reg)
+// 4. (LEA %reg, label@LO32)
+// (AND %reg, %reg, (32)0)
+// LEASL %reg, label@HI32(%reg, %got)
+//
+def velo_only : OutPatFrag<(ops node:$lo),
+ (ANDrm (LEAzii 0, 0, $lo), !add(32, 64))>;
+def vehi_only : OutPatFrag<(ops node:$hi),
+ (LEASLzii 0, 0, $hi)>;
+def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo),
+ (LEASLrii $lo, 0, $hi)>;
+def vehi_lo_imm : OutPatFrag<(ops node:$hi, node:$lo, node:$idx),
+ (LEASLrii $lo, $idx, $hi)>;
+def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo),
+ (LEASLrri $base, $lo, $hi)>;
+foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr",
+ "tglobaltlsaddr", "tjumptable" ] in {
+ def : Pat<(VElo !cast<SDNode>(type):$lo), (velo_only $lo)>;
+ def : Pat<(VEhi !cast<SDNode>(type):$hi), (vehi_only $hi)>;
+ def : Pat<(add (VEhi !cast<SDNode>(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>;
+ def : Pat<(add (add (VEhi !cast<SDNode>(type):$hi), I64:$lo), simm7:$val),
+ (vehi_lo_imm $hi, $lo, (LO7 $val))>;
+ def : Pat<(add I64:$base, (add (VEhi !cast<SDNode>(type):$hi), I64:$lo)),
+ (vehi_baselo $base, $hi, $lo)>;
+}
+
// floating point
def : Pat<(f32 fpimm:$val),
(EXTRACT_SUBREG (LEASLzii 0, 0, (HIFP32 $val)), sub_f32)>;
@@ -1526,8 +1674,8 @@ def : Pat<(sext_inreg I64:$src, i8),
(SRALri (SLLri $src, 56), 56)>;
def : Pat<(sext_inreg (i32 (trunc i64:$src)), i8),
(EXTRACT_SUBREG (SRALri (SLLri $src, 56), 56), sub_i32)>;
-def : Pat<(and (trunc i64:$src), 0xff),
- (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(56, 64))>;
+def : Pat<(i32 (and (trunc i64:$src), 0xff)),
+ (EXTRACT_SUBREG (ANDrm $src, !add(56, 64)), sub_i32)>;
// Cast to i16
def : Pat<(sext_inreg I32:$src, i16),
@@ -1536,28 +1684,34 @@ def : Pat<(sext_inreg I64:$src, i16),
(SRALri (SLLri $src, 48), 48)>;
def : Pat<(sext_inreg (i32 (trunc i64:$src)), i16),
(EXTRACT_SUBREG (SRALri (SLLri $src, 48), 48), sub_i32)>;
-def : Pat<(and (trunc i64:$src), 0xffff),
- (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(48, 64))>;
+def : Pat<(i32 (and (trunc i64:$src), 0xffff)),
+ (EXTRACT_SUBREG (ANDrm $src, !add(48, 64)), sub_i32)>;
// Cast to i32
def : Pat<(i32 (trunc i64:$src)),
- (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0)>;
-def : Pat<(i32 (fp_to_sint I64:$reg)), (CVTWDSXr RD_RZ, $reg)>;
-def : Pat<(i32 (fp_to_sint F32:$reg)), (CVTWSSXr RD_RZ, $reg)>;
+ (EXTRACT_SUBREG (ANDrm $src, !add(32, 64)), sub_i32)>;
+def : Pat<(i32 (fp_to_sint f32:$src)), (CVTWSSXr RD_RZ, $src)>;
+def : Pat<(i32 (fp_to_sint f64:$src)), (CVTWDSXr RD_RZ, $src)>;
+def : Pat<(i32 (fp_to_sint f128:$src)), (CVTWDSXr RD_RZ, (CVTDQr $src))>;
// Cast to i64
-def : Pat<(sext_inreg I64:$src, i32),
+def : Pat<(sext_inreg i64:$src, i32),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0), sub_i32)>;
-def : Pat<(i64 (sext i32:$sy)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $sy, 0), sub_i32)>;
-def : Pat<(i64 (zext i32:$sy)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $sy, 0), sub_i32)>;
-def : Pat<(i64 (fp_to_sint f32:$sy)), (CVTLDr RD_RZ, (CVTDSr $sy))>;
-def : Pat<(i64 (fp_to_sint I64:$reg)), (CVTLDr RD_RZ, $reg)>;
+def : Pat<(i64 (sext i32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $src, 0), sub_i32)>;
+def : Pat<(i64 (zext i32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $src, 0), sub_i32)>;
+def : Pat<(i64 (fp_to_sint f32:$src)), (CVTLDr RD_RZ, (CVTDSr $src))>;
+def : Pat<(i64 (fp_to_sint f64:$src)), (CVTLDr RD_RZ, $src)>;
+def : Pat<(i64 (fp_to_sint f128:$src)), (CVTLDr RD_RZ, (CVTDQr $src))>;
// Cast to f32
-def : Pat<(f32 (sint_to_fp i64:$sy)), (CVTSDr (CVTDLr i64:$sy))>;
+def : Pat<(f32 (sint_to_fp i64:$src)), (CVTSDr (CVTDLr i64:$src))>;
+
+// Cast to f128
+def : Pat<(f128 (sint_to_fp i32:$src)), (CVTQDr (CVTDWr $src))>;
+def : Pat<(f128 (sint_to_fp i64:$src)), (CVTQDr (CVTDLr $src))>;
def : Pat<(i64 (anyext i32:$sy)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, sub_i32)>;
@@ -1625,29 +1779,150 @@ defm : TRUNC64m<truncstorei8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
defm : TRUNC64m<truncstorei16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
defm : TRUNC64m<truncstorei32, STLrri, STLrii, STLzri, ST1Bzii>;
-// Address calculation and its optimization
-def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>;
-def : Pat<(VElo tglobaladdr:$in),
- (ANDrm (LEAzii 0, 0, tglobaladdr:$in), !add(32, 64))>;
-def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
- (LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0,
- (tglobaladdr:$in1))>;
-
-// GlobalTLS address calculation and its optimization
-def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>;
-def : Pat<(VElo tglobaltlsaddr:$in),
- (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in), !add(32, 64))>;
-def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)),
- (LEASLrii (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in2), !add(32, 64)), 0,
- (tglobaltlsaddr:$in1))>;
-
-// Address calculation and its optimization
-def : Pat<(VEhi texternalsym:$in), (LEASLzii 0, 0, texternalsym:$in)>;
-def : Pat<(VElo texternalsym:$in),
- (ANDrm (LEAzii 0, 0, texternalsym:$in), !add(32, 64))>;
-def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)),
- (LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0,
- (texternalsym:$in1))>;
+// Atomic loads
+multiclass ATMLDm<SDPatternOperator from,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(from ADDRrri:$addr), (torri MEMrri:$addr)>;
+ def : Pat<(from ADDRrii:$addr), (torii MEMrii:$addr)>;
+ def : Pat<(from ADDRzri:$addr), (tozri MEMzri:$addr)>;
+ def : Pat<(from ADDRzii:$addr), (tozii MEMzii:$addr)>;
+}
+defm : ATMLDm<atomic_load_8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
+defm : ATMLDm<atomic_load_16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
+defm : ATMLDm<atomic_load_32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
+defm : ATMLDm<atomic_load_64, LDrri, LDrii, LDzri, LDzii>;
+
+// Optimized atomic loads with sext
+multiclass SXATMLDm<SDPatternOperator from, Operand TY,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRrri:$addr))), TY)),
+ (i2l (torri MEMrri:$addr))>;
+ def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRrii:$addr))), TY)),
+ (i2l (torii MEMrii:$addr))>;
+ def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRzri:$addr))), TY)),
+ (i2l (tozri MEMzri:$addr))>;
+ def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRzii:$addr))), TY)),
+ (i2l (tozii MEMzii:$addr))>;
+}
+multiclass SXATMLD32m<SDPatternOperator from,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(i64 (sext (from ADDRrri:$addr))),
+ (i2l (torri MEMrri:$addr))>;
+ def : Pat<(i64 (sext (from ADDRrii:$addr))),
+ (i2l (torii MEMrii:$addr))>;
+ def : Pat<(i64 (sext (from ADDRzri:$addr))),
+ (i2l (tozri MEMzri:$addr))>;
+ def : Pat<(i64 (sext (from ADDRzii:$addr))),
+ (i2l (tozii MEMzii:$addr))>;
+}
+defm : SXATMLDm<atomic_load_8, i8, LD1BSXrri, LD1BSXrii, LD1BSXzri, LD1BSXzii>;
+defm : SXATMLDm<atomic_load_16, i16, LD2BSXrri, LD2BSXrii, LD2BSXzri,
+ LD2BSXzii>;
+defm : SXATMLD32m<atomic_load_32, LDLSXrri, LDLSXrii, LDLSXzri, LDLSXzii>;
+
+// Optimized atomic loads with zext
+multiclass ZXATMLDm<SDPatternOperator from, Operand VAL,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(i64 (and (anyext (from ADDRrri:$addr)), VAL)),
+ (i2l (torri MEMrri:$addr))>;
+ def : Pat<(i64 (and (anyext (from ADDRrii:$addr)), VAL)),
+ (i2l (torii MEMrii:$addr))>;
+ def : Pat<(i64 (and (anyext (from ADDRzri:$addr)), VAL)),
+ (i2l (tozri MEMzri:$addr))>;
+ def : Pat<(i64 (and (anyext (from ADDRzii:$addr)), VAL)),
+ (i2l (tozii MEMzii:$addr))>;
+}
+multiclass ZXATMLD32m<SDPatternOperator from, Operand VAL,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(i64 (zext (from ADDRrri:$addr))),
+ (i2l (torri MEMrri:$addr))>;
+ def : Pat<(i64 (zext (from ADDRrii:$addr))),
+ (i2l (torii MEMrii:$addr))>;
+ def : Pat<(i64 (zext (from ADDRzri:$addr))),
+ (i2l (tozri MEMzri:$addr))>;
+ def : Pat<(i64 (zext (from ADDRzii:$addr))),
+ (i2l (tozii MEMzii:$addr))>;
+}
+defm : ZXATMLDm<atomic_load_8, 0xFF, LD1BZXrri, LD1BZXrii, LD1BZXzri,
+ LD1BZXzii>;
+defm : ZXATMLDm<atomic_load_16, 0xFFFF, LD2BZXrri, LD2BZXrii, LD2BZXzri,
+ LD2BZXzii>;
+defm : ZXATMLD32m<atomic_load_32, 0xFFFFFFFF, LDLZXrri, LDLZXrii, LDLZXzri,
+ LDLZXzii>;
+
+// Atomic stores
+multiclass ATMSTm<SDPatternOperator from, ValueType ty,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(from ADDRrri:$addr, ty:$src), (torri MEMrri:$addr, $src)>;
+ def : Pat<(from ADDRrii:$addr, ty:$src), (torii MEMrii:$addr, $src)>;
+ def : Pat<(from ADDRzri:$addr, ty:$src), (tozri MEMzri:$addr, $src)>;
+ def : Pat<(from ADDRzii:$addr, ty:$src), (tozii MEMzii:$addr, $src)>;
+}
+defm : ATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : ATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : ATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
+defm : ATMSTm<atomic_store_64, i64, STrri, STrii, STzri, STzii>;
+
+// Optimized atomic stores with truncate
+multiclass TRATMSTm<SDPatternOperator from,
+ ValueType ty,
+ SDPatternOperator torri,
+ SDPatternOperator torii,
+ SDPatternOperator tozri,
+ SDPatternOperator tozii> {
+ def : Pat<(from ADDRrri:$addr, (i32 (trunc i64:$src))),
+ (torri MEMrri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from ADDRrii:$addr, (i32 (trunc i64:$src))),
+ (torii MEMrii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from ADDRzri:$addr, (i32 (trunc i64:$src))),
+ (tozri MEMzri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from ADDRzii:$addr, (i32 (trunc i64:$src))),
+ (tozii MEMzii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+}
+defm : TRATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : TRATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : TRATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
+
+// Atomic swaps
+def : Pat<(i32 (ts1am i64:$src, i32:$flag, i32:$new)),
+ (TS1AMWrir $src, 0, $flag, $new)>;
+def : Pat<(i32 (atomic_swap_32 ADDRri:$src, i32:$new)),
+ (TS1AMWrii MEMriRRM:$src, 15, $new)>;
+def : Pat<(i64 (atomic_swap_64 ADDRri:$src, i64:$new)),
+ (TS1AMLrir MEMriRRM:$src, (LEAzii 0, 0, 255), i64:$new)>;
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling patterns
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp : Pseudo<(outs), (ins I64:$buf),
+ "# EH_SJLJ_LONGJMP",
+ [(VEeh_sjlj_longjmp I64:$buf)]>;
+
+ def EH_SjLj_SetJmp : Pseudo<(outs I32:$dst), (ins I64:$buf),
+ "# EH_SJLJ_SETJMP",
+ [(set I32:$dst, (VEeh_sjlj_setjmp I64:$buf))]>;
+
+ def EH_SjLj_Setup_Dispatch : Pseudo<(outs), (ins), "# EH_SJLJ_SETUP_DISPATCH",
+ [(VEeh_sjlj_setup_dispatch)]>;
+}
+
+let isTerminator = 1, isBranch = 1, isCodeGenOnly = 1 in
+ def EH_SjLj_Setup : Pseudo<(outs), (ins brtarget32:$dst),
+ "# EH_SJlJ_SETUP $dst">;
+
+//===----------------------------------------------------------------------===//
+// Branch related patterns
+//===----------------------------------------------------------------------===//
// Branches
def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>;
@@ -1681,6 +1956,8 @@ multiclass BRCCFm<ValueType ty, SDPatternOperator BrOpNode1,
}
defm : BRCCFm<f32, BRCFSrr, BRCFSir>;
defm : BRCCFm<f64, BRCFDrr, BRCFDir>;
+def : Pat<(brcc cond:$cond, f128:$l, f128:$r, bb:$addr),
+ (BRCFDir (fcond2cc $cond), 0, (FCMPQrr $r, $l), bb:$addr)>;
//===----------------------------------------------------------------------===//
// Pseudo Instructions
@@ -1737,53 +2014,42 @@ let Uses = [SX11], hasSideEffects = 1 in
def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
"# GET STACK TOP",
[(set iPTR:$dst, (GetStackTop))]>;
+
+// MEMBARRIER
+let hasSideEffects = 1 in
+def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >;
+
+//===----------------------------------------------------------------------===//
+// Other patterns
+//===----------------------------------------------------------------------===//
+
// SETCC pattern matches
//
// CMP %tmp, lhs, rhs ; compare lhs and rhs
// or %res, 0, (0)1 ; initialize by 0
// CMOV %res, (63)0, %tmp ; set 1 if %tmp is true
-def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrm (icond2cc $cond),
- (CMPSLrr i64:$LHS, i64:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrm (icond2cc $cond),
- (CMPULrr i64:$LHS, i64:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrm (icond2cc $cond),
- (CMPSWSXrr i32:$LHS, i32:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrm (icond2cc $cond),
- (CMPUWrr i32:$LHS, i32:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc f64:$LHS, f64:$RHS, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVDrm (fcond2cc $cond),
- (FCMPDrr f64:$LHS, f64:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc f32:$LHS, f32:$RHS, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVSrm (fcond2cc $cond),
- (FCMPSrr f32:$LHS, f32:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
+class setccrr<Instruction INSN> :
+ OutPatFrag<(ops node:$cond, node:$comp),
+ (EXTRACT_SUBREG
+ (INSN $cond, $comp,
+ !add(63, 64), // means (63)0 == 1
+ (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc i32:$l, i32:$r, CCSIOp:$cond)),
+ (setccrr<CMOVWrm> (icond2cc $cond), (CMPSWSXrr $l, $r))>;
+def : Pat<(i32 (setcc i32:$l, i32:$r, CCUIOp:$cond)),
+ (setccrr<CMOVWrm> (icond2cc $cond), (CMPUWrr $l, $r))>;
+def : Pat<(i32 (setcc i64:$l, i64:$r, CCSIOp:$cond)),
+ (setccrr<CMOVLrm> (icond2cc $cond), (CMPSLrr $l, $r))>;
+def : Pat<(i32 (setcc i64:$l, i64:$r, CCUIOp:$cond)),
+ (setccrr<CMOVLrm> (icond2cc $cond), (CMPULrr $l, $r))>;
+def : Pat<(i32 (setcc f32:$l, f32:$r, cond:$cond)),
+ (setccrr<CMOVSrm> (fcond2cc $cond), (FCMPSrr $l, $r))>;
+def : Pat<(i32 (setcc f64:$l, f64:$r, cond:$cond)),
+ (setccrr<CMOVDrm> (fcond2cc $cond), (FCMPDrr $l, $r))>;
+def : Pat<(i32 (setcc f128:$l, f128:$r, cond:$cond)),
+ (setccrr<CMOVDrm> (fcond2cc $cond), (FCMPQrr $l, $r))>;
// Special SELECTCC pattern matches
// Use min/max for better performance.
@@ -1824,152 +2090,171 @@ def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLE)),
def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLE)),
(MINSWSXrr $LHS, $RHS)>;
+// Helper classes to construct cmov patterns for the ease.
+//
+// Hiding INSERT_SUBREG/EXTRACT_SUBREG patterns.
+
+class cmovrr<Instruction INSN> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (INSN $cond, $comp, $t, $f)>;
+class cmovrm<Instruction INSN, SDNodeXForm MOP = MIMM> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (INSN $cond, $comp, (MOP $t), $f)>;
+class cmov32rr<Instruction INSN, SubRegIndex sub_oty> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (EXTRACT_SUBREG
+ (INSN $cond, $comp,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_oty),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_oty)),
+ sub_oty)>;
+class cmov32rm<Instruction INSN, SubRegIndex sub_oty, SDNodeXForm MOP = MIMM> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (EXTRACT_SUBREG
+ (INSN $cond, $comp,
+ (MOP $t),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_oty)),
+ sub_oty)>;
+class cmov128rr<Instruction INSN> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+ (INSN $cond, $comp,
+ (EXTRACT_SUBREG $t, sub_odd),
+ (EXTRACT_SUBREG $f, sub_odd)), sub_odd),
+ (INSN $cond, $comp,
+ (EXTRACT_SUBREG $t, sub_even),
+ (EXTRACT_SUBREG $f, sub_even)), sub_even)>;
+
// Generic SELECTCC pattern matches
//
// CMP %tmp, %l, %r ; compare %l and %r
// or %res, %f, (0)1 ; initialize by %f
// CMOV %res, %t, %tmp ; set %t if %tmp is true
-// selectcc for i64 result
-def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)),
- (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)),
- (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)),
- (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)),
- (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)),
- (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)),
- (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
-
-// selectcc for i32 result
def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrr (icond2cc $cond),
- (CMPSWSXrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVWrr, sub_i32> (icond2cc $cond), (CMPSWSXrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrr (icond2cc $cond),
- (CMPUWrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVWrr, sub_i32> (icond2cc $cond), (CMPUWrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrr (icond2cc $cond),
- (CMPSLrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVLrr, sub_i32> (icond2cc $cond), (CMPSLrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrr (icond2cc $cond),
- (CMPULrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVLrr, sub_i32> (icond2cc $cond), (CMPULrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc f32:$l, f32:$r, i32:$t, i32:$f, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVSrr (fcond2cc $cond),
- (FCMPSrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVSrr, sub_i32> (fcond2cc $cond), (FCMPSrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc f64:$l, f64:$r, i32:$t, i32:$f, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVDrr (fcond2cc $cond),
- (FCMPDrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVDrr, sub_i32> (fcond2cc $cond), (FCMPDrr $l, $r),
+ $t, $f)>;
+def : Pat<(i32 (selectcc f128:$l, f128:$r, i32:$t, i32:$f, cond:$cond)),
+ (cmov32rr<CMOVDrr, sub_i32> (fcond2cc $cond), (FCMPQrr $l, $r),
+ $t, $f)>;
-// selectcc for f64 result
-def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)),
- (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)),
- (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)),
- (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)),
- (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)),
- (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)),
- (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+ (cmovrr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+ (cmovrr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+ (cmovrr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+ (cmovrr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)),
+ (cmovrr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)),
+ (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f128:$l, f128:$r, i64:$t, i64:$f, cond:$cond)),
+ (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
-// selectcc for f32 result
def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrr (icond2cc $cond),
- (CMPSWSXrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVWrr, sub_f32> (icond2cc $cond), (CMPSWSXrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrr (icond2cc $cond),
- (CMPUWrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVWrr, sub_f32> (icond2cc $cond), (CMPUWrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrr (icond2cc $cond),
- (CMPSLrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVLrr, sub_f32> (icond2cc $cond), (CMPSLrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrr (icond2cc $cond),
- (CMPULrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVLrr, sub_f32> (icond2cc $cond), (CMPULrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc f32:$l, f32:$r, f32:$t, f32:$f, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVSrr (fcond2cc $cond),
- (FCMPSrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVSrr, sub_f32> (fcond2cc $cond), (FCMPSrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc f64:$l, f64:$r, f32:$t, f32:$f, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVDrr (fcond2cc $cond),
- (FCMPDrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVDrr, sub_f32> (fcond2cc $cond), (FCMPDrr $l, $r),
+ $t, $f)>;
+def : Pat<(f32 (selectcc f128:$l, f128:$r, f32:$t, f32:$f, cond:$cond)),
+ (cmov32rr<CMOVDrr, sub_f32> (fcond2cc $cond), (FCMPQrr $l, $r),
+ $t, $f)>;
+
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+ (cmovrr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+ (cmovrr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+ (cmovrr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+ (cmovrr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)),
+ (cmovrr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)),
+ (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f128:$l, f128:$r, f64:$t, f64:$f, cond:$cond)),
+ (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
+
+def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCSIOp:$cond)),
+ (cmov128rr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCUIOp:$cond)),
+ (cmov128rr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCSIOp:$cond)),
+ (cmov128rr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCUIOp:$cond)),
+ (cmov128rr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc f32:$l, f32:$r, f128:$t, f128:$f, cond:$cond)),
+ (cmov128rr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc f64:$l, f64:$r, f128:$t, f128:$f, cond:$cond)),
+ (cmov128rr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc f128:$l, f128:$r, f128:$t, f128:$f, cond:$cond)),
+ (cmov128rr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
// Generic SELECT pattern matches
// Use cmov.w for all cases since %pred holds i32.
//
// CMOV.w.ne %res, %tval, %tmp ; set tval if %tmp is true
+def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)),
+ (cmov32rr<CMOVWrr, sub_i32> CC_INE, $pred, $t, $f)>;
+def : Pat<(i32 (select i32:$pred, (i32 mimm:$t), i32:$f)),
+ (cmov32rm<CMOVWrm, sub_i32> CC_INE, $pred, $t, $f)>;
+def : Pat<(i32 (select i32:$pred, i32:$t, (i32 mimm:$f))),
+ (cmov32rm<CMOVWrm, sub_i32> CC_IEQ, $pred, $f, $t)>;
+
def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)),
- (CMOVWrr CC_INE, $pred, $t, $f)>;
+ (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>;
+def : Pat<(i64 (select i32:$pred, (i64 mimm:$t), i64:$f)),
+ (cmovrm<CMOVWrm, MIMM> CC_INE, $pred, $t, $f)>;
+def : Pat<(i64 (select i32:$pred, i64:$t, (i64 mimm:$f))),
+ (cmovrm<CMOVWrm, MIMM> CC_IEQ, $pred, $f, $t)>;
-def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)),
- (EXTRACT_SUBREG
- (CMOVWrr CC_INE, $pred,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)),
+ (cmov32rr<CMOVWrr, sub_f32> CC_INE, $pred, $t, $f)>;
+def : Pat<(f32 (select i32:$pred, (f32 mimmfp:$t), f32:$f)),
+ (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_INE, $pred, $t, $f)>;
+def : Pat<(f32 (select i32:$pred, f32:$t, (f32 mimmfp:$f))),
+ (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_IEQ, $pred, $f, $t)>;
def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)),
- (CMOVWrr CC_INE, $pred, $t, $f)>;
+ (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>;
+def : Pat<(f64 (select i32:$pred, (f64 mimmfp:$t), f64:$f)),
+ (cmovrm<CMOVWrm, MIMMFP> CC_INE, $pred, $t, $f)>;
+def : Pat<(f64 (select i32:$pred, f64:$t, (f64 mimmfp:$f))),
+ (cmovrm<CMOVWrm, MIMMFP> CC_IEQ, $pred, $f, $t)>;
-def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)),
- (EXTRACT_SUBREG
- (CMOVWrr CC_INE, $pred,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+def : Pat<(f128 (select i32:$pred, f128:$t, f128:$f)),
+ (cmov128rr<CMOVWrr> CC_INE, $pred, $t, $f)>;
// bitconvert
def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>;
@@ -1982,24 +2267,48 @@ def : Pat<(f32 (bitconvert i32:$op)),
(EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
$op, sub_i32), 32), sub_f32)>;
-// Bits operations pattern matchings.
-def : Pat<(i32 (ctpop i32:$src)),
- (EXTRACT_SUBREG (PCNTr (ANDrm (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), $src, sub_i32), !add(32, 64))), sub_i32)>;
-def : Pat<(i32 (ctlz i32:$src)),
- (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>;
-def : Pat<(i64 (bswap i64:$src)),
- (BSWPri $src, 0)>;
-def : Pat<(i32 (bswap i32:$src)),
- (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>;
+// Optimize code A generated by `(unsigned char)c << 5` to B.
+// A) sla.w.sx %s0, %s0, 5
+// lea %s1, 224 ; 0xE0
+// and %s0, %s0, %s1
+// B) sla.w.sx %s0, %s0, 5
+// and %s0, %s0, (56)0
+
+def : Pat<(i32 (and i32:$val, 0xff)),
+ (EXTRACT_SUBREG
+ (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $val, sub_i32),
+ !add(56, 64)), sub_i32)>;
+def : Pat<(i32 (and i32:$val, 0xffff)),
+ (EXTRACT_SUBREG
+ (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $val, sub_i32),
+ !add(48, 64)), sub_i32)>;
+def : Pat<(i64 (and i64:$val, 0xffffffff)),
+ (ANDrm $val, !add(32, 64))>;
+
+//===----------------------------------------------------------------------===//
+// Vector Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+// Custom intermediate ISDs.
+class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
+def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
+ [SDTCisVec<0>, IsVLVT<2>]>>;
+
+// Whether this is an all-true mask (assuming undef-bits above VL are all-true).
+def true_mask : PatLeaf<
+ (vec_broadcast (i32 nonzero), (i32 srcvalue))>;
+// Match any broadcast (ignoring VL).
+def any_broadcast : PatFrag<(ops node:$sx),
+ (vec_broadcast node:$sx, (i32 srcvalue))>;
+
+// Vector instructions.
+include "VEInstrVec.td"
+
+// The vevlintrin
+include "VEInstrIntrinsicVL.td"
-// Several special pattern matches to optimize code
+// Patterns and intermediate SD nodes (VEC_*).
+include "VEInstrPatternsVec.td"
-def : Pat<(i32 (and i32:$lhs, 0xff)),
- (AND32rm $lhs, !add(56, 64))>;
-def : Pat<(i32 (and i32:$lhs, 0xffff)),
- (AND32rm $lhs, !add(48, 64))>;
-def : Pat<(i32 (and i32:$lhs, 0xffffffff)),
- (AND32rm $lhs, !add(32, 64))>;
+// Patterns and intermediate SD nodes (VVP_*).
+include "VVPInstrPatternsVec.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
new file mode 100644
index 000000000000..9ec10838db05
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
@@ -0,0 +1,1604 @@
+def : Pat<(int_ve_vl_vld_vssl i64:$sy, i64:$sz, i32:$vl), (VLDrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld_vssl simm7:$I, i64:$sz, i32:$vl), (VLDirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldunc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldunc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldunc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldunc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2Drrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2Dirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2DNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2DNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2Drrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2Dirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2DNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2DNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchv_ssl i64:$sy, i64:$sz, i32:$vl), (PFCHVrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchv_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchvnc_ssl i64:$sy, i64:$sz, i32:$vl), (PFCHVNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchvnc_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_lvm_mmss v256i1:$ptm, uimm6:$N, i64:$sz), (LVMir_m (ULO7 $N), i64:$sz, v256i1:$ptm)>;
+def : Pat<(int_ve_vl_lvm_MMss v512i1:$ptm, uimm6:$N, i64:$sz), (LVMyir_y (ULO7 $N), i64:$sz, v512i1:$ptm)>;
+def : Pat<(int_ve_vl_svm_sms v256i1:$vmz, uimm6:$N), (SVMmi v256i1:$vmz, (ULO7 $N))>;
+def : Pat<(int_ve_vl_svm_sMs v512i1:$vmz, uimm6:$N), (SVMyi v512i1:$vmz, (ULO7 $N))>;
+def : Pat<(int_ve_vl_vbrdd_vsl f64:$sy, i32:$vl), (VBRDrl f64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdd_vsvl f64:$sy, v256f64:$pt, i32:$vl), (VBRDrl_v f64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdd_vsmvl f64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDrml_v f64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsl i64:$sy, i32:$vl), (VBRDrl i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdl_vsvl i64:$sy, v256f64:$pt, i32:$vl), (VBRDrl_v i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsmvl i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDrml_v i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsl simm7:$I, i32:$vl), (VBRDil (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdl_vsvl simm7:$I, v256f64:$pt, i32:$vl), (VBRDil_v (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsmvl simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDiml_v (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrds_vsl f32:$sy, i32:$vl), (VBRDUrl f32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrds_vsvl f32:$sy, v256f64:$pt, i32:$vl), (VBRDUrl_v f32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrds_vsmvl f32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDUrml_v f32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsl i32:$sy, i32:$vl), (VBRDLrl i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdw_vsvl i32:$sy, v256f64:$pt, i32:$vl), (VBRDLrl_v i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsmvl i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDLrml_v i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsl simm7:$I, i32:$vl), (VBRDLil (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdw_vsvl simm7:$I, v256f64:$pt, i32:$vl), (VBRDLil_v (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsmvl simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDLiml_v (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrd_vsl i64:$sy, i32:$vl), (PVBRDrl i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvbrd_vsvl i64:$sy, v256f64:$pt, i32:$vl), (PVBRDrl_v i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrd_vsMvl i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVBRDrml_v i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmv_vsvl uimm7:$N, v256f64:$vz, i32:$vl), (VMVivl (ULO7 $N), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmv_vsvvl uimm7:$N, v256f64:$vz, v256f64:$pt, i32:$vl), (VMVivl_v (ULO7 $N), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmv_vsvmvl uimm7:$N, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMVivml_v (ULO7 $N), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VADDULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vadduw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vadduw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vadduw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVADDUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvaddu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVADDUrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvaddu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVADDSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvadds_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVADDSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvadds_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VADDSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VSUBULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVSUBUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVSUBUrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVSUBSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVSUBSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubs_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VSUBSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMULULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmuluw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmuluw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmuluw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMULSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulslw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSLWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulslw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulslw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSLWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulslw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulslw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSLWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulslw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VDIVULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsl v256f64:$vy, i64:$sy, i32:$vl), (VDIVULvrl v256f64:$vy, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vvsvl v256f64:$vy, i64:$sy, v256f64:$pt, i32:$vl), (VDIVULvrl_v v256f64:$vy, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVULvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVULvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsmvl v256f64:$vy, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULvrml_v v256f64:$vy, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVUWvrl v256f64:$vy, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVUWvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVUWvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVUWvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVSWSXvrl v256f64:$vy, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVSWSXvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSWSXvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSWSXvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVSWZXvrl v256f64:$vy, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVSWZXvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSWZXvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSWZXvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VDIVSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsl v256f64:$vy, i64:$sy, i32:$vl), (VDIVSLvrl v256f64:$vy, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vvsvl v256f64:$vy, i64:$sy, v256f64:$pt, i32:$vl), (VDIVSLvrl_v v256f64:$vy, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSLvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSLvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsmvl v256f64:$vy, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLvrml_v v256f64:$vy, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VCMPULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVCMPUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmpu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVCMPUrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmpu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVCMPSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmps_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVCMPSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmps_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VCMPSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMAXSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMAXSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVMAXSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmaxs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMAXSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVMAXSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmaxs_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMAXSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMAXSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMAXSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMINSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMINSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVMINSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmins_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMINSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVMINSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmins_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMINSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMINSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMINSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMAXSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMINSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VANDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vand_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VANDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VANDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vand_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VANDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VANDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VANDrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVANDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvand_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVANDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVANDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvand_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVANDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVANDvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVANDrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VORvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VORrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVORvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVORrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VXORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vxor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VXORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VXORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vxor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VXORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VXORvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VXORrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVXORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvxor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVXORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVXORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvxor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVXORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVXORvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVXORrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VEQVvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_veqv_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VEQVvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VEQVrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_veqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEQVvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEQVrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVEQVvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pveqv_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVEQVrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pveqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vseq_vl i32:$vl), (VSEQl i32:$vl)>;
+def : Pat<(int_ve_vl_vseq_vvl v256f64:$pt, i32:$vl), (VSEQl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvseqlo_vl i32:$vl), (PVSEQLOl i32:$vl)>;
+def : Pat<(int_ve_vl_pvseqlo_vvl v256f64:$pt, i32:$vl), (PVSEQLOl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsequp_vl i32:$vl), (PVSEQUPl i32:$vl)>;
+def : Pat<(int_ve_vl_pvsequp_vvl v256f64:$pt, i32:$vl), (PVSEQUPl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvseq_vl i32:$vl), (PVSEQl i32:$vl)>;
+def : Pat<(int_ve_vl_pvseq_vvl v256f64:$pt, i32:$vl), (PVSEQl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsll_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSLLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsll_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSLLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLLvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsll_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLLvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSLLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsll_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSLLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSLLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsll_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSLLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLLvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLLvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrl_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSRLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrl_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSRLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRLvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsrl_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRLvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSRLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsrl_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSRLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSRLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsrl_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSRLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRLvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRLvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLAWSXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawsx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLAWSXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSLAWSXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawsx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSLAWSXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLAWSXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vslawsx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLAWSXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLAWZXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawzx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLAWZXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSLAWZXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawzx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSLAWZXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLAWZXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vslawzx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLAWZXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSLAvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsla_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSLAvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSLAvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsla_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSLAvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLAvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLAvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLALvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslal_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLALvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSLALvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslal_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSLALvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLALvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vslal_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLALvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRAWSXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawsx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRAWSXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSRAWSXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSRAWSXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRAWSXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRAWSXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRAWZXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawzx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRAWZXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSRAWZXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSRAWZXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRAWZXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRAWZXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSRAvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsra_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSRAvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSRAvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsra_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSRAvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRAvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRAvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRALvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsral_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRALvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSRALvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsral_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSRALvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRALvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsral_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRALvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssl v256f64:$vz, i64:$sy, i64:$sz, i32:$vl), (VSFAvrrl v256f64:$vz, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsfa_vvssvl v256f64:$vz, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VSFAvrrl_v v256f64:$vz, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssl v256f64:$vz, simm7:$I, i64:$sz, i32:$vl), (VSFAvirl v256f64:$vz, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsfa_vvssvl v256f64:$vz, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VSFAvirl_v v256f64:$vz, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvrrml_v v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvirml_v v256f64:$vz, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfaddd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFADDDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfaddd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfadds_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFADDSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfadds_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFADDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFADDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfadd_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFSUBDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFSUBSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFSUBvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfsub_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFSUBrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfsub_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuld_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMULDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuld_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuls_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMULSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuls_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFDIVDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFDIVSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsqrtd_vvl v256f64:$vy, i32:$vl), (VFSQRTDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsqrts_vvl v256f64:$vy, i32:$vl), (VFSQRTSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFCMPDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmps_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFCMPSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmps_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFCMPvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFCMPrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMAXDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMAXSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMAXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmax_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMAXrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmax_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmind_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMINDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmind_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmins_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMINSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmins_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMINvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmin_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMINrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmin_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmadd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmadd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFMADDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmadd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmads_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmads_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFMADSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmads_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMADvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmad_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMADrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmad_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFMADvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmad_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFMSBDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbs_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbs_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFMSBSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbs_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMSBvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmsb_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMSBrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmsb_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFMSBvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmsb_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmadd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmadd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFNMADDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmadd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmads_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmads_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFNMADSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmads_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMADvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmad_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMADrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmad_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFNMADvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmad_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFNMSBDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbs_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFNMSBSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMSBvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMSBrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmsb_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFNMSBvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrcpd_vvl v256f64:$vy, i32:$vl), (VRCPDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrcpd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrcps_vvl v256f64:$vy, i32:$vl), (VRCPSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrcps_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrcp_vvl v256f64:$vy, i32:$vl), (PVRCPvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrcp_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRCPvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtd_vvl v256f64:$vy, i32:$vl), (VRSQRTDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrts_vvl v256f64:$vy, i32:$vl), (VRSQRTSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrsqrt_vvl v256f64:$vy, i32:$vl), (PVRSQRTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrsqrt_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtdnex_vvl v256f64:$vy, i32:$vl), (VRSQRTDNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtdnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtsnex_vvl v256f64:$vy, i32:$vl), (VRSQRTSNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtsnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrsqrtnex_vvl v256f64:$vy, i32:$vl), (PVRSQRTNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrsqrtnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtws_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtws_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtws_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_NONE, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_RZ, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtld_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtld_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtld_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtdw_vvl v256f64:$vy, i32:$vl), (VCVTDWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtdw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtsw_vvl v256f64:$vy, i32:$vl), (VCVTSWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtsw_vvl v256f64:$vy, i32:$vl), (PVCVTSWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtdl_vvl v256f64:$vy, i32:$vl), (VCVTDLvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtdl_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDLvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtds_vvl v256f64:$vy, i32:$vl), (VCVTDSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtds_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtsd_vvl v256f64:$vy, i32:$vl), (VCVTSDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtsd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vsvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGrvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vsvml simm7:$I, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGivml (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrgw_vvvMl v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl), (VMRGWvvml v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrgw_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (VMRGWvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl), (VSHFvvrl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSHFvvrl_v v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, uimm6:$N, i32:$vl), (VSHFvvil v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSHFvvil_v v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcp_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vex_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEXvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmklat_ml i32:$vl), (VFMKLal i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklaf_ml i32:$vl), (VFMKLnal i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkat_Ml i32:$vl), (VFMKyal i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkaf_Ml i32:$vl), (VFMKynal i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgt_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllt_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklne_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleq_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklge_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklle_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnum_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgtnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklltnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleqnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgt_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlt_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwne_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweq_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwge_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwle_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnum_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgtnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwltnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweqnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogt_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgt_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolt_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplt_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlone_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupne_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlone_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeq_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeq_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloge_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupge_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlole_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuple_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlole_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuple_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonum_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnum_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogtnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgtnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloltnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupltnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgt_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IG, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlt_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IL, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwne_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwne_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_INE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweq_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweq_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IEQ, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwge_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwge_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IGE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwle_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwle_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_ILE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnum_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnum_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NUM, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgtnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgtnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_GNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwltnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwltnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_LNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweqnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweqnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_EQNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgt_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlt_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdne_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeq_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdge_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdle_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnum_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgtnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdltnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeqnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgt_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslt_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksne_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseq_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksge_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksle_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnum_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgtnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksltnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseqnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogt_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgt_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolt_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplt_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslone_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupne_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslone_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeq_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeq_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloge_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupge_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslole_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuple_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslole_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuple_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonum_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnum_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogtnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgtnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloltnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupltnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgt_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_G, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslt_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_L, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksne_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksne_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseq_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseq_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_EQ, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksge_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksge_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksle_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksle_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnum_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnum_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NUM, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgtnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgtnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksltnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksltnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseqnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseqnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_EQNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwsx_vvl v256f64:$vy, i32:$vl), (VSUMWSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwsx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWSXvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwzx_vvl v256f64:$vy, i32:$vl), (VSUMWZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwzx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWZXvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsuml_vvl v256f64:$vy, i32:$vl), (VSUMLvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsuml_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMLvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsumd_vvl v256f64:$vy, i32:$vl), (VFSUMDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsumd_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMDvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsums_vvl v256f64:$vy, i32:$vl), (VFSUMSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsums_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMSvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswfstsx_vvl v256f64:$vy, i32:$vl), (VRMAXSWFSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswfstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWFSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxswlstsx_vvl v256f64:$vy, i32:$vl), (VRMAXSWLSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswlstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWLSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxswfstzx_vvl v256f64:$vy, i32:$vl), (VRMAXSWFSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswfstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWFSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxswlstzx_vvl v256f64:$vy, i32:$vl), (VRMAXSWLSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswlstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWLSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswfstsx_vvl v256f64:$vy, i32:$vl), (VRMINSWFSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswfstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWFSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswlstsx_vvl v256f64:$vy, i32:$vl), (VRMINSWLSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswlstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWLSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswfstzx_vvl v256f64:$vy, i32:$vl), (VRMINSWFSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswfstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWFSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswlstzx_vvl v256f64:$vy, i32:$vl), (VRMINSWLSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswlstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWLSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxslfst_vvl v256f64:$vy, i32:$vl), (VRMAXSLFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxslfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSLFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxsllst_vvl v256f64:$vy, i32:$vl), (VRMAXSLLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxsllst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSLLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminslfst_vvl v256f64:$vy, i32:$vl), (VRMINSLFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminslfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSLFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminsllst_vvl v256f64:$vy, i32:$vl), (VRMINSLLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminsllst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSLLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxdfst_vvl v256f64:$vy, i32:$vl), (VFRMAXDFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxdfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXDFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxdlst_vvl v256f64:$vy, i32:$vl), (VFRMAXDLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxdlst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXDLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxsfst_vvl v256f64:$vy, i32:$vl), (VFRMAXSFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxsfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXSFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxslst_vvl v256f64:$vy, i32:$vl), (VFRMAXSLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxslst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXSLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmindfst_vvl v256f64:$vy, i32:$vl), (VFRMINDFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmindfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINDFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmindlst_vvl v256f64:$vy, i32:$vl), (VFRMINDLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmindlst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINDLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrminsfst_vvl v256f64:$vy, i32:$vl), (VFRMINSFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrminsfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINSFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrminslst_vvl v256f64:$vy, i32:$vl), (VFRMINSLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrminslst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINSLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrand_vvl v256f64:$vy, i32:$vl), (VRANDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrand_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRANDvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vror_vvl v256f64:$vy, i32:$vl), (VRORvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vror_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRORvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vrxor_vvl v256f64:$vy, i32:$vl), (VRXORvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrxor_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRXORvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTUvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTUvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTUvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTUvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTUvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTUvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTUvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTUvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTUvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTUvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTUvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTUvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTUNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTUNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTUNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTUNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTUNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTUNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTUNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTUNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTUNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTUNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTUNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTUNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLSXvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLSXvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLSXvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLSXvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLSXNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLSXNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLSXNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLSXNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLZXvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLZXvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLZXvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLZXvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLZXNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLZXNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLZXNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLZXNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_andm_mmm v256i1:$vmy, v256i1:$vmz), (ANDMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_andm_MMM v512i1:$vmy, v512i1:$vmz), (ANDMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_orm_mmm v256i1:$vmy, v256i1:$vmz), (ORMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_orm_MMM v512i1:$vmy, v512i1:$vmz), (ORMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_xorm_mmm v256i1:$vmy, v256i1:$vmz), (XORMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_xorm_MMM v512i1:$vmy, v512i1:$vmz), (XORMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_eqvm_mmm v256i1:$vmy, v256i1:$vmz), (EQVMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_eqvm_MMM v512i1:$vmy, v512i1:$vmz), (EQVMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_nndm_mmm v256i1:$vmy, v256i1:$vmz), (NNDMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_nndm_MMM v512i1:$vmy, v512i1:$vmz), (NNDMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_negm_mm v256i1:$vmy), (NEGMm v256i1:$vmy)>;
+def : Pat<(int_ve_vl_negm_MM v512i1:$vmy), (NEGMy v512i1:$vmy)>;
+def : Pat<(int_ve_vl_pcvm_sml v256i1:$vmy, i32:$vl), (PCVMml v256i1:$vmy, i32:$vl)>;
+def : Pat<(int_ve_vl_lzvm_sml v256i1:$vmy, i32:$vl), (LZVMml v256i1:$vmy, i32:$vl)>;
+def : Pat<(int_ve_vl_tovm_sml v256i1:$vmy, i32:$vl), (TOVMml v256i1:$vmy, i32:$vl)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td
new file mode 100644
index 000000000000..69ea133ceed0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td
@@ -0,0 +1,64 @@
+/// Pattern Matchings for VEL intrinsic instructions.
+
+/// Intrinsic patterns written by hand.
+
+// SVOB pattern.
+def : Pat<(int_ve_vl_svob), (SVOB)>;
+
+// Pack patterns.
+def : Pat<(i64 (int_ve_vl_pack_f32p ADDRrii:$addr0, ADDRrii:$addr1)),
+ (ORrr (f2l (LDUrii MEMrii:$addr0)),
+ (i2l (LDLZXrii MEMrii:$addr1)))>;
+
+def : Pat<(i64 (int_ve_vl_pack_f32a ADDRrii:$addr)),
+ (MULULrr
+ (i2l (LDLZXrii MEMrii:$addr)),
+ (LEASLrii (ANDrm (LEAzii 0, 0, (LO32 (i64 0x0000000100000001))),
+ !add(32, 64)), 0,
+ (HI32 (i64 0x0000000100000001))))>;
+
+// The extract/insert patterns.
+def : Pat<(v256i1 (int_ve_vl_extract_vm512u v512i1:$vm)),
+ (EXTRACT_SUBREG v512i1:$vm, sub_vm_even)>;
+
+def : Pat<(v256i1 (int_ve_vl_extract_vm512l v512i1:$vm)),
+ (EXTRACT_SUBREG v512i1:$vm, sub_vm_odd)>;
+
+def : Pat<(v512i1 (int_ve_vl_insert_vm512u v512i1:$vmx, v256i1:$vmy)),
+ (INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_even)>;
+
+def : Pat<(v512i1 (int_ve_vl_insert_vm512l v512i1:$vmx, v256i1:$vmy)),
+ (INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_odd)>;
+
+// VMRG patterns.
+def : Pat<(int_ve_vl_vmrgw_vsvMl i32:$sy, v256f64:$vz, v512i1:$vm, i32:$vl),
+ (VMRGWrvml (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrgw_vsvMvl i32:$sy, v256f64:$vz, v512i1:$vm,
+ v256f64:$pt, i32:$vl),
+ (VMRGWrvml_v (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl,
+ v256f64:$pt)>;
+
+// VMV patterns.
+def : Pat<(int_ve_vl_vmv_vsvl i32:$sy, v256f64:$vz, i32:$vl),
+ (VMVrvl (i2l i32:$sy), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmv_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl),
+ (VMVrvl_v (i2l i32:$sy), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmv_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt,
+ i32:$vl),
+ (VMVrvml_v (i2l i32:$sy), v256f64:$vz, v256i1:$vm, i32:$vl,
+ v256f64:$pt)>;
+
+// LSV patterns.
+def : Pat<(int_ve_vl_lsv_vvss v256f64:$pt, i32:$sy, i64:$sz),
+ (LSVrr_v (i2l i32:$sy), i64:$sz, v256f64:$pt)>;
+
+// LVS patterns.
+def : Pat<(int_ve_vl_lvsl_svs v256f64:$vx, i32:$sy),
+ (LVSvr v256f64:$vx, (i2l i32:$sy))>;
+def : Pat<(int_ve_vl_lvsd_svs v256f64:$vx, i32:$sy),
+ (LVSvr v256f64:$vx, (i2l i32:$sy))>;
+def : Pat<(int_ve_vl_lvss_svs v256f64:$vx, i32:$sy),
+ (l2f (LVSvr v256f64:$vx, (i2l i32:$sy)))>;
+
+/// Intrinsic patterns automatically generated.
+include "VEInstrIntrinsicVL.gen.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td
new file mode 100644
index 000000000000..0084876f9f1b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -0,0 +1,91 @@
+//===-- VEInstrPatternsVec.td - VEC_-type SDNodes and isel for VE Target --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VEC_* prefixed intermediate SDNodes and their
+// isel patterns.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp,
+ SDNodeXForm ImmCast, SDNodeXForm SuperRegCast> {
+ // VBRDil
+ def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)),
+ (VBRDil (ImmCast $sy), i32:$vl)>;
+
+ // VBRDrl
+ def : Pat<(v32 (vec_broadcast s32:$sy, i32:$vl)),
+ (VBRDrl (SuperRegCast $sy), i32:$vl)>;
+}
+
+multiclass vbrd_elem64<ValueType v64, ValueType s64,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+ // VBRDil
+ def : Pat<(v64 (vec_broadcast (s64 ImmOp:$sy), i32:$vl)),
+ (VBRDil (ImmCast $sy), i32:$vl)>;
+
+ // VBRDrl
+ def : Pat<(v64 (vec_broadcast s64:$sy, i32:$vl)),
+ (VBRDrl s64:$sy, i32:$vl)>;
+}
+
+multiclass extract_insert_elem32<ValueType v32, ValueType s32,
+ SDNodeXForm SubRegCast,
+ SDNodeXForm SuperRegCast> {
+ // LVSvi
+ def: Pat<(s32 (extractelt v32:$vec, uimm7:$idx)),
+ (SubRegCast (LVSvi v32:$vec, (ULO7 $idx)))>;
+ // LVSvr
+ def: Pat<(s32 (extractelt v32:$vec, i64:$idx)),
+ (SubRegCast (LVSvr v32:$vec, $idx))>;
+
+ // LSVir
+ def: Pat<(v32 (insertelt v32:$vec, s32:$val, uimm7:$idx)),
+ (LSVir_v (ULO7 $idx), (SuperRegCast $val), $vec)>;
+ // LSVrr
+ def: Pat<(v32 (insertelt v32:$vec, s32:$val, i64:$idx)),
+ (LSVrr_v $idx, (SuperRegCast $val), $vec)>;
+}
+
+multiclass extract_insert_elem64<ValueType v64, ValueType s64> {
+ // LVSvi
+ def: Pat<(s64 (extractelt v64:$vec, uimm7:$idx)),
+ (LVSvi v64:$vec, (ULO7 $idx))>;
+ // LVSvr
+ def: Pat<(s64 (extractelt v64:$vec, i64:$idx)),
+ (LVSvr v64:$vec, $idx)>;
+
+ // LSVir
+ def: Pat<(v64 (insertelt v64:$vec, s64:$val, uimm7:$idx)),
+ (LSVir_v (ULO7 $idx), $val, $vec)>;
+ // LSVrr
+ def: Pat<(v64 (insertelt v64:$vec, s64:$val, i64:$idx)),
+ (LSVrr_v $idx, $val, $vec)>;
+}
+
+multiclass patterns_elem32<ValueType v32, ValueType s32,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast,
+ SDNodeXForm SubRegCast, SDNodeXForm SuperRegCast> {
+ defm : vbrd_elem32<v32, s32, ImmOp, ImmCast, SuperRegCast>;
+ defm : extract_insert_elem32<v32, s32, SubRegCast, SuperRegCast>;
+}
+
+multiclass patterns_elem64<ValueType v64, ValueType s64,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+ defm : vbrd_elem64<v64, s64, ImmOp, ImmCast>;
+ defm : extract_insert_elem64<v64, s64>;
+}
+
+defm : patterns_elem32<v256i32, i32, simm7, LO7, l2i, i2l>;
+defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>;
+
+defm : patterns_elem64<v256i64, i64, simm7, LO7>;
+defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td
new file mode 100644
index 000000000000..4a8476f7288a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td
@@ -0,0 +1,1510 @@
+//===----------------------------------------------------------------------===//
+// Vector Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for VM512 modifications
+//===----------------------------------------------------------------------===//
+
+// LVM/SVM instructions using VM512
+let hasSideEffects = 0, isCodeGenOnly = 1 in {
+ let Constraints = "$vx = $vd", DisableEncoding = "$vd" in {
+ def LVMyir_y : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz, VM512:$vd),
+ "# pseudo LVM $vx, $sy, $sz, $vd">;
+ def LVMyim_y : Pseudo<(outs VM512:$vx),
+ (ins uimm3:$sy, mimm:$sz, VM512:$vd),
+ "# pseudo LVM $vx, $sy, $sz, $vd">;
+ }
+ def LVMyir : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz),
+ "# pseudo LVM $vx, $sy, $sz">;
+ def LVMyim : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, mimm:$sz),
+ "# pseudo LVM $vx, $sy, $sz">;
+ def SVMyi : Pseudo<(outs I64:$sx), (ins VM512:$vz, uimm3:$sy),
+ "# pseudo SVM $sx, $vz, $sy">;
+}
+
+// VFMK/VFMKW/VFMKS instructions using VM512
+let hasSideEffects = 0, isCodeGenOnly = 1, DisableEncoding = "$vl" in {
+ def VFMKyal : Pseudo<(outs VM512:$vmx), (ins I32:$vl),
+ "# pseudo-vfmk.at $vmx">;
+ def VFMKynal : Pseudo<(outs VM512:$vmx), (ins I32:$vl),
+ "# pseudo-vfmk.af $vmx">;
+ def VFMKWyvl : Pseudo<(outs VM512:$vmx),
+ (ins CCOp:$cf, V64:$vz, I32:$vl),
+ "# pseudo-vfmk.w.$cf $vmx, $vz">;
+ def VFMKWyvyl : Pseudo<(outs VM512:$vmx),
+ (ins CCOp:$cf, V64:$vz, VM512:$vm, I32:$vl),
+ "# pseudo-vfmk.w.$cf $vmx, $vz, $vm">;
+ def VFMKSyvl : Pseudo<(outs VM512:$vmx),
+ (ins CCOp:$cf, V64:$vz, I32:$vl),
+ "# pseudo-vfmk.s.$cf $vmx, $vz">;
+ def VFMKSyvyl : Pseudo<(outs VM512:$vmx),
+ (ins CCOp:$cf, V64:$vz, VM512:$vm, I32:$vl),
+ "# pseudo-vfmk.s.$cf $vmx, $vz, $vm">;
+}
+
+// ANDM/ORM/XORM/EQVM/NNDM/NEGM instructions using VM512
+let hasSideEffects = 0, isCodeGenOnly = 1 in {
+ def ANDMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# andm $vmx, $vmy, $vmz">;
+ def ORMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# orm $vmx, $vmy, $vmz">;
+ def XORMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# xorm $vmx, $vmy, $vmz">;
+ def EQVMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# eqvm $vmx, $vmy, $vmz">;
+ def NNDMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# nndm $vmx, $vmy, $vmz">;
+ def NEGMy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy),
+ "# negm $vmx, $vmy">;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//
+// Define all vector instructions defined in SX-Aurora TSUBASA Architecture
+// Guide here. As those mnemonics, we use mnemonics defined in Vector Engine
+// Assembly Language Reference Manual.
+//
+// Some instructions can update existing data by following instructions
+// sequence.
+//
+// lea %s0, 256
+// lea %s1, 128
+// lvl %s0
+// vbrd %v0, 2 # v0 = { 2, 2, 2, ..., 2, 2, 2 }
+// lvl %s1
+// vbrd %v0, 3 # v0 = { 3, 3, 3, ..., 3, 2, 2, 2, ..., 2, 2, 2 }
+//
+// In order to represent above with a virtual register, we defines instructions
+// with an additional base register and `_v` suffiex in mnemonic.
+//
+// lea t0, 256
+// lea t1, 128
+// lea t0
+// vbrd tv0, 2
+// lvl t1
+// vbrd_v tv1, 2, tv0
+//
+// We also have some instructions uses VL register with an pseudo VL value
+// with following suffixes in mnemonic.
+//
+// l: have an additional I32 register to represent the VL value.
+// L: have an additional VL register to represent the VL value.
+//===----------------------------------------------------------------------===//
+
+//-----------------------------------------------------------------------------
+// Section 8.9 - Vector Load/Store and Move Instructions
+//-----------------------------------------------------------------------------
+
+// Multiclass for VLD instructions
+let mayLoad = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VLDbm<string opcStr, bits<8>opc, RegisterClass RC, dag dag_in,
+ string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RVM<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx, $sy, $sz")>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RVM<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx, $sy, $sz")>;
+}
+multiclass VLDlm<string opcStr, bits<8>opc, RegisterClass RC, dag dag_in> {
+ defm "" : VLDbm<opcStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : VLDbm<opcStr, opc, RC, !con(dag_in, (ins I32:$vl)), "$vl,">;
+ defm L : VLDbm<opcStr, opc, RC, !con(dag_in, (ins VLS:$vl)), "$vl,">;
+ }
+}
+let VE_VLIndex = 3 in
+multiclass VLDtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+ defm rr : VLDlm<opcStr, opc, RC, (ins I64:$sy, I64:$sz)>;
+ let cy = 0 in
+ defm ir : VLDlm<opcStr, opc, RC, (ins simm7:$sy, I64:$sz)>;
+ let cz = 0 in
+ defm rz : VLDlm<opcStr, opc, RC, (ins I64:$sy, zero:$sz)>;
+ let cy = 0, cz = 0 in
+ defm iz : VLDlm<opcStr, opc, RC, (ins simm7:$sy, zero:$sz)>;
+}
+multiclass VLDm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vc = 1 in defm "" : VLDtgm<opcStr, opc, RC>;
+ let vc = 0 in defm NC : VLDtgm<opcStr#".nc", opc, RC>;
+}
+
+// Section 8.9.1 - VLD (Vector Load)
+defm VLD : VLDm<"vld", 0x81, V64>;
+
+// Section 8.9.2 - VLDU (Vector Load Upper)
+defm VLDU : VLDm<"vldu", 0x82, V64>;
+
+// Section 8.9.3 - VLDL (Vector Load Lower)
+defm VLDLSX : VLDm<"vldl.sx", 0x83, V64>;
+let cx = 1 in defm VLDLZX : VLDm<"vldl.zx", 0x83, V64>;
+
+// Section 8.9.4 - VLD2D (Vector Load 2D)
+defm VLD2D : VLDm<"vld2d", 0xc1, V64>;
+
+// Section 8.9.5 - VLDU2D (Vector Load Upper 2D)
+defm VLDU2D : VLDm<"vldu2d", 0xc2, V64>;
+
+// Section 8.9.6 - VLDL2D (Vector Load Lower 2D)
+defm VLDL2DSX : VLDm<"vldl2d.sx", 0xc3, V64>;
+let cx = 1 in defm VLDL2DZX : VLDm<"vldl2d.zx", 0xc3, V64>;
+
+// Multiclass for VST instructions
+let mayStore = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VSTbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>;
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, argStr)>;
+ def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, argStr)>;
+ }
+}
+multiclass VSTmm<string opcStr, bits<8>opc, dag dag_in> {
+ defm "" : VSTbm<opcStr, " $vx, $sy, $sz", opc, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VSTbm<opcStr, " $vx, $sy, $sz, $m", opc, !con(dag_in, (ins VM:$m))>;
+}
+let VE_VLIndex = 3 in
+multiclass VSTtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+ defm rrv : VSTmm<opcStr, opc, (ins I64:$sy, I64:$sz, RC:$vx)>;
+ let cy = 0 in
+ defm irv : VSTmm<opcStr, opc, (ins simm7:$sy, I64:$sz, RC:$vx)>;
+ let cz = 0 in
+ defm rzv : VSTmm<opcStr, opc, (ins I64:$sy, zero:$sz, RC:$vx)>;
+ let cy = 0, cz = 0 in
+ defm izv : VSTmm<opcStr, opc, (ins simm7:$sy, zero:$sz, RC:$vx)>;
+}
+multiclass VSTm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vc = 1, cx = 0 in defm "" : VSTtgm<opcStr, opc, RC>;
+ let vc = 0, cx = 0 in defm NC : VSTtgm<opcStr#".nc", opc, RC>;
+ let vc = 1, cx = 1 in defm OT : VSTtgm<opcStr#".ot", opc, RC>;
+ let vc = 0, cx = 1 in defm NCOT : VSTtgm<opcStr#".nc.ot", opc, RC>;
+}
+
+// Section 8.9.7 - VST (Vector Store)
+defm VST : VSTm<"vst", 0x91, V64>;
+
+// Section 8.9.8 - VST (Vector Store Upper)
+defm VSTU : VSTm<"vstu", 0x92, V64>;
+
+// Section 8.9.9 - VSTL (Vector Store Lower)
+defm VSTL : VSTm<"vstl", 0x93, V64>;
+
+// Section 8.9.10 - VST2D (Vector Store 2D)
+defm VST2D : VSTm<"vst2d", 0xd1, V64>;
+
+// Section 8.9.11 - VSTU2D (Vector Store Upper 2D)
+defm VSTU2D : VSTm<"vstu2d", 0xd2, V64>;
+
+// Section 8.9.12 - VSTL2D (Vector Store Lower 2D)
+defm VSTL2D : VSTm<"vstl2d", 0xd3, V64>;
+
+// Multiclass for VGT instructions
+let mayLoad = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VGTbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RVM<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RVM<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx, ", argStr)>;
+}
+multiclass VGTlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : VGTbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : VGTbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : VGTbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+multiclass VGTmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : VGTlm<opcStr, argStr, opc, RC, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VGTlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins VM:$m))>;
+}
+let VE_VLIndex = 4 in
+multiclass VGTlhm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm rr : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+ !con(dag_in, (ins I64:$sy, I64:$sz))>;
+ let cy = 0 in
+ defm ir : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+ !con(dag_in, (ins simm7:$sy, I64:$sz))>;
+ let cz = 0 in
+ defm rz : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+ !con(dag_in, (ins I64:$sy, zero:$sz))>;
+ let cy = 0, cz = 0 in
+ defm iz : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+ !con(dag_in, (ins simm7:$sy, zero:$sz))>;
+}
+multiclass VGTtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vy = ? in defm v : VGTlhm<opcStr, "$vy", opc, RC, (ins V64:$vy)>;
+ let cs = 1, sw = ? in defm s : VGTlhm<opcStr, "$sw", opc, RC, (ins I64:$sw)>;
+}
+multiclass VGTm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vc = 1 in defm "" : VGTtgm<opcStr, opc, RC>;
+ let vc = 0 in defm NC : VGTtgm<opcStr#".nc", opc, RC>;
+}
+
+// Section 8.9.13 - VGT (Vector Gather)
+defm VGT : VGTm<"vgt", 0xa1, V64>;
+
+// Section 8.9.14 - VGTU (Vector Gather Upper)
+defm VGTU : VGTm<"vgtu", 0xa2, V64>;
+
+// Section 8.9.15 - VGTL (Vector Gather Lower)
+defm VGTLSX : VGTm<"vgtl.sx", 0xa3, V64>;
+let cx = 1 in defm VGTLZX : VGTm<"vgtl.zx", 0xa3, V64>;
+def : MnemonicAlias<"vgtl", "vgtl.zx">;
+def : MnemonicAlias<"vgtl.nc", "vgtl.zx.nc">;
+
+// Multiclass for VSC instructions
+let mayStore = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VSCbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>;
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, argStr)>;
+ def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, argStr)>;
+ }
+}
+multiclass VSCmm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ defm "" : VSCbm<opcStr, argStr, opc, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VSCbm<opcStr, argStr#", $m", opc, !con(dag_in, (ins VM:$m))>;
+}
+let VE_VLIndex = 4 in
+multiclass VSClhm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm rrv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+ !con(dag_in, (ins I64:$sy, I64:$sz, RC:$vx))>;
+ let cy = 0 in
+ defm irv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+ !con(dag_in, (ins simm7:$sy, I64:$sz, RC:$vx))>;
+ let cz = 0 in
+ defm rzv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+ !con(dag_in, (ins I64:$sy, zero:$sz, RC:$vx))>;
+ let cy = 0, cz = 0 in
+ defm izv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+ !con(dag_in, (ins simm7:$sy, zero:$sz, RC:$vx))>;
+}
+multiclass VSCtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vy = ? in defm v : VSClhm<opcStr, "$vy", opc, RC, (ins V64:$vy)>;
+ let cs = 1, sw = ? in defm s : VSClhm<opcStr, "$sw", opc, RC, (ins I64:$sw)>;
+}
+multiclass VSCm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vc = 1, cx = 0 in defm "" : VSCtgm<opcStr, opc, RC>;
+ let vc = 0, cx = 0 in defm NC : VSCtgm<opcStr#".nc", opc, RC>;
+ let vc = 1, cx = 1 in defm OT : VSCtgm<opcStr#".ot", opc, RC>;
+ let vc = 0, cx = 1 in defm NCOT : VSCtgm<opcStr#".nc.ot", opc, RC>;
+}
+
+// Section 8.9.16 - VSC (Vector Scatter)
+defm VSC : VSCm<"vsc", 0xb1, V64>;
+
+// Section 8.9.17 - VSCU (Vector Scatter Upper)
+defm VSCU : VSCm<"vscu", 0xb2, V64>;
+
+// Section 8.9.18 - VSCL (Vector Scatter Lower)
+defm VSCL : VSCm<"vscl", 0xb3, V64>;
+
+// Section 8.9.19 - PFCHV (Prefetch Vector)
+let Uses = [VL] in
+multiclass PFCHVbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>;
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, argStr)>;
+ def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, argStr)>;
+ }
+}
+let VE_VLIndex = 2 in
+multiclass PFCHVm<string opcStr, bits<8>opc> {
+ defm rr : PFCHVbm<opcStr, " $sy, $sz", opc, (ins I64:$sy, I64:$sz)>;
+ let cy = 0 in
+ defm ir : PFCHVbm<opcStr, " $sy, $sz", opc, (ins simm7:$sy, I64:$sz)>;
+ let cz = 0 in
+ defm rz : PFCHVbm<opcStr, " $sy, $sz", opc, (ins I64:$sy, zero:$sz)>;
+ let cy = 0, cz = 0 in
+ defm iz : PFCHVbm<opcStr, " $sy, $sz", opc, (ins simm7:$sy, zero:$sz)>;
+}
+let vc = 1, vx = 0 in defm PFCHV : PFCHVm<"pfchv", 0x80>;
+let vc = 0, vx = 0 in defm PFCHVNC : PFCHVm<"pfchv.nc", 0x80>;
+
+// Section 8.9.20 - LSV (Load S to V)
+let sx = 0, vx = ?, hasSideEffects = 0 in
+multiclass LSVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ def "" : RR<opc, (outs RC:$vx), dag_in, !strconcat(opcStr, " ${vx}", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = "$base",
+ isCodeGenOnly = 1 in
+ def _v : RR<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " ${vx}", argStr)>;
+}
+multiclass LSVm<string opcStr, bits<8>opc, RegisterClass RC> {
+ defm rr : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins I64:$sy, I64:$sz)>;
+ let cy = 0 in
+ defm ir : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins uimm7:$sy, I64:$sz)>;
+ let cz = 0 in
+ defm rm : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins I64:$sy, mimm:$sz)>;
+ let cy = 0, cz = 0 in
+ defm im : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins uimm7:$sy, mimm:$sz)>;
+}
+defm LSV : LSVm<"lsv", 0x8e, V64>;
+
+// Section 8.9.21 - LVS (Load V to S)
+let cz = 0, sz = 0, vx = ?, hasSideEffects = 0 in
+multiclass LVSm<string opcStr, bits<8>opc, RegisterClass RC> {
+ def vr : RR<opc, (outs I64:$sx), (ins RC:$vx, I64:$sy),
+ opcStr#" $sx, ${vx}(${sy})">;
+ let cy = 0 in
+ def vi : RR<opc, (outs I64:$sx), (ins RC:$vx, uimm7:$sy),
+ opcStr#" $sx, ${vx}(${sy})">;
+}
+defm LVS : LVSm<"lvs", 0x9e, V64>;
+
+// Section 8.9.22 - LVM (Load VM)
+let sx = 0, vx = ?, hasSideEffects = 0 in
+multiclass LVMbm<string opcStr, string argStr, bits<8>opc, RegisterClass RCM,
+ dag dag_in> {
+ def "" : RR<opc, (outs RCM:$vx), dag_in,
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = "$base",
+ isCodeGenOnly = 1 in {
+ def _m : RR<opc, (outs RCM:$vx), !con(dag_in, (ins RCM:$base)),
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ }
+}
+multiclass LVMom<string opcStr, bits<8>opc, RegisterClass RCM> {
+ defm rr : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins I64:$sy, I64:$sz)>;
+ let cy = 0 in
+ defm ir : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins uimm2:$sy, I64:$sz)>;
+ let cz = 0 in
+ defm rm : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins I64:$sy, mimm:$sz)>;
+ let cy = 0, cz = 0 in
+ defm im : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins uimm2:$sy, mimm:$sz)>;
+}
+multiclass LVMm<string opcStr, bits<8>opc, RegisterClass RCM> {
+ defm "" : LVMom<opcStr, opc, RCM>;
+}
+defm LVM : LVMm<"lvm", 0xb7, VM>;
+
+// Section 8.9.23 - SVM (Save VM)
+let cz = 0, sz = 0, vz = ?, hasSideEffects = 0 in
+multiclass SVMm<string opcStr, bits<8>opc, RegisterClass RCM> {
+ def mr : RR<opc, (outs I64:$sx), (ins RCM:$vz, I64:$sy),
+ opcStr#" $sx, $vz, $sy">;
+ let cy = 0 in
+ def mi : RR<opc, (outs I64:$sx), (ins RCM:$vz, uimm2:$sy),
+ opcStr#" $sx, $vz, $sy">;
+}
+defm SVM : SVMm<"svm", 0xa7, VM>;
+
+// Section 8.9.24 - VBRD (Vector Broadcast)
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass VBRDbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RV<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx, ", argStr)>;
+}
+multiclass VBRDlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : VBRDbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : VBRDbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : VBRDbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+multiclass VBRDmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM, dag dag_in> {
+ defm "" : VBRDlm<opcStr, argStr, opc, RC, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VBRDlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins RCM:$m))>;
+}
+let VE_VLIndex = 2 in
+multiclass VBRDm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC,
+ RegisterClass RCM> {
+ defm r : VBRDmm<opcStr, "$sy", opc, VRC, RCM, (ins RC:$sy)>;
+ let cy = 0 in
+ defm i : VBRDmm<opcStr, "$sy", opc, VRC, RCM, (ins simm7:$sy)>;
+}
+let cx = 0, cx2 = 0 in
+defm VBRD : VBRDm<"vbrd", 0x8c, V64, I64, VM>;
+let cx = 0, cx2 = 1 in
+defm VBRDL : VBRDm<"vbrdl", 0x8c, V64, I32, VM>;
+let cx = 1, cx2 = 0 in
+defm VBRDU : VBRDm<"vbrdu", 0x8c, V64, F32, VM>;
+let cx = 1, cx2 = 1 in
+defm PVBRD : VBRDm<"pvbrd", 0x8c, V64, I64, VM512>;
+
+// Section 8.9.25 - VMV (Vector Move)
+let vx = ?, vz = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass VMVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RV<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx, ", argStr)>;
+}
+multiclass VMVlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : VMVbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : VMVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : VMVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+multiclass VMVmm<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM, dag dag_in> {
+ defm "" : VMVlm<opcStr, "$sy, $vz", opc, RC, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VMVlm<opcStr, "$sy, $vz, $m", opc, RC, !con(dag_in, (ins RCM:$m))>;
+}
+let VE_VLIndex = 3 in
+multiclass VMVm<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ defm rv : VMVmm<opcStr, opc, RC, RCM, (ins I64:$sy, RC:$vz)>;
+ let cy = 0 in
+ defm iv : VMVmm<opcStr, opc, RC, RCM, (ins uimm7:$sy, RC:$vz)>;
+}
+defm VMV : VMVm<"vmv", 0x9c, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.10 - Vector Fixed-Point Arithmetic Instructions
+//-----------------------------------------------------------------------------
+
+// Multiclass for generic vector calculation
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RV<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx", argStr)>;
+}
+multiclass RVlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : RVbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : RVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : RVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+multiclass RVmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM, dag dag_in> {
+ defm "" : RVlm<opcStr, argStr, opc, RC, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : RVlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins RCM:$m))>;
+}
+// Generic RV multiclass with 2 arguments.
+// e.g. VADD, VSUB, VMPY, and etc.
+let VE_VLIndex = 3 in
+multiclass RVm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC,
+ RegisterClass RCM, Operand SIMM = simm7> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm vv : RVmm<opcStr, ", $vy, $vz", opc, VRC, RCM, (ins VRC:$vy, VRC:$vz)>;
+ let cs = 1, vz = ? in
+ defm rv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins RC:$sy, VRC:$vz)>;
+ let cs = 1, cy = 0, vz = ? in
+ defm iv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins SIMM:$sy, VRC:$vz)>;
+}
+// Special RV multiclass with 2 arguments using cs2.
+// e.g. VDIV, VDVS, and VDVX.
+let VE_VLIndex = 3 in
+multiclass RVDIVm<string opcStr, bits<8>opc, RegisterClass VRC,
+ RegisterClass RC, RegisterClass RCM, Operand SIMM = simm7> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm vv : RVmm<opcStr, ", $vy, $vz", opc, VRC, RCM, (ins VRC:$vy, VRC:$vz)>;
+ let cs2 = 1, vy = ? in
+ defm vr : RVmm<opcStr, ", $vy, $sy", opc, VRC, RCM, (ins VRC:$vy, RC:$sy)>;
+ let cs2 = 1, cy = 0, vy = ? in
+ defm vi : RVmm<opcStr, ", $vy, $sy", opc, VRC, RCM, (ins VRC:$vy, SIMM:$sy)>;
+ let cs = 1, vz = ? in
+ defm rv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins RC:$sy, VRC:$vz)>;
+ let cs = 1, cy = 0, vz = ? in
+ defm iv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins SIMM:$sy, VRC:$vz)>;
+}
+// Generic RV multiclass with 2 arguments for logical operations.
+// e.g. VAND, VOR, VXOR, and etc.
+let VE_VLIndex = 3 in
+multiclass RVLm<string opcStr, bits<8>opc, RegisterClass ScaRC,
+ RegisterClass RC, RegisterClass RCM> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm vv : RVmm<opcStr, ", $vy, $vz", opc, RC, RCM, (ins RC:$vy, RC:$vz)>;
+ let cs = 1, vz = ? in
+ defm rv : RVmm<opcStr, ", $sy, $vz", opc, RC, RCM, (ins ScaRC:$sy, RC:$vz)>;
+ let cs = 1, cy = 0, vz = ? in
+ defm mv : RVmm<opcStr, ", $sy, $vz", opc, RC, RCM, (ins mimm:$sy, RC:$vz)>;
+}
+// Generic RV multiclass with 1 argument.
+// e.g. VLDZ, VPCNT, and VBRV.
+let VE_VLIndex = 2 in
+multiclass RV1m<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cy = 0, sy = 0, vz = ? in
+ defm v : RVmm<opcStr, ", $vz", opc, RC, RCM, (ins RC:$vz)>;
+}
+// Generic RV multiclass with no argument.
+// e.g. VSEQ.
+let VE_VLIndex = 1 in
+multiclass RV0m<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cy = 0, sy = 0 in
+ defm "" : RVmm<opcStr, "", opc, RC, RCM, (ins)>;
+}
+// Generic RV multiclass with 2 arguments for shift operations.
+// e.g. VSLL, VSRL, VSLA, and etc.
+let VE_VLIndex = 3 in
+multiclass RVSm<string opcStr, bits<8>opc, RegisterClass ScaRC,
+ RegisterClass RC, RegisterClass RCM> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm vv : RVmm<opcStr, ", $vz, $vy", opc, RC, RCM, (ins RC:$vz, RC:$vy)>;
+ let cs = 1, vz = ? in
+ defm vr : RVmm<opcStr, ", $vz, $sy", opc, RC, RCM, (ins RC:$vz, ScaRC:$sy)>;
+ let cs = 1, cy = 0, vz = ? in
+ defm vi : RVmm<opcStr, ", $vz, $sy", opc, RC, RCM, (ins RC:$vz, uimm7:$sy)>;
+}
+// Generic RV multiclass with 3 arguments for shift operations.
+// e.g. VSLD and VSRD.
+let VE_VLIndex = 4 in
+multiclass RVSDm<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let vy = ?, vz = ? in
+ defm vvr : RVmm<opcStr, ", ($vy, ${vz}), $sy", opc, RC, RCM,
+ (ins RC:$vy, RC:$vz, I64:$sy)>;
+ let cy = 0, vy = ?, vz = ? in
+ defm vvi : RVmm<opcStr, ", ($vy, ${vz}), $sy", opc, RC, RCM,
+ (ins RC:$vy, RC:$vz, uimm7:$sy)>;
+}
+// Special RV multiclass with 3 arguments.
+// e.g. VSFA
+let VE_VLIndex = 4 in
+multiclass RVSAm<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cz = 1, sz = ?, vz = ? in
+ defm vrr : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+ (ins RC:$vz, I64:$sy, I64:$sz)>;
+ let cz = 0, sz = ?, vz = ? in
+ defm vrm : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+ (ins RC:$vz, I64:$sy, mimm:$sz)>;
+ let cy = 0, cz = 1, sz = ?, vz = ? in
+ defm vir : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+ (ins RC:$vz, uimm3:$sy, I64:$sz)>;
+ let cy = 0, cz = 0, sz = ?, vz = ? in
+ defm vim : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+ (ins RC:$vz, uimm3:$sy, mimm:$sz)>;
+}
+// Generic RV multiclass with 1 argument using vy field.
+// e.g. VFSQRT, VRCP, and VRSQRT.
+let VE_VLIndex = 2 in
+multiclass RVF1m<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cy = 0, sy = 0, vy = ? in
+ defm v : RVmm<opcStr, ", $vy", opc, RC, RCM, (ins RC:$vy)>;
+}
+// Special RV multiclass with 3 arguments using cs2.
+// e.g. VFMAD, VFMSB, VFNMAD, and etc.
+let VE_VLIndex = 4 in
+multiclass RVMm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC,
+ RegisterClass RCM, Operand SIMM = simm7> {
+ let cy = 0, sy = 0, vy = ?, vz = ?, vw = ? in
+ defm vvv : RVmm<opcStr, ", $vy, $vz, $vw", opc, VRC, RCM,
+ (ins VRC:$vy, VRC:$vz, VRC:$vw)>;
+ let cs2 = 1, vy = ?, vw = ? in
+ defm vrv : RVmm<opcStr, ", $vy, $sy, $vw", opc, VRC, RCM,
+ (ins VRC:$vy, RC:$sy, VRC:$vw)>;
+ let cs2 = 1, cy = 0, vy = ?, vw = ? in
+ defm viv : RVmm<opcStr, ", $vy, $sy, $vw", opc, VRC, RCM,
+ (ins VRC:$vy, SIMM:$sy, VRC:$vw)>;
+ let cs = 1, vz = ?, vw = ? in
+ defm rvv : RVmm<opcStr, ", $sy, $vz, $vw", opc, VRC, RCM,
+ (ins RC:$sy, VRC:$vz, VRC:$vw)>;
+ let cs = 1, cy = 0, vz = ?, vw = ? in
+ defm ivv : RVmm<opcStr, ", $sy, $vz, $vw", opc, VRC, RCM,
+ (ins SIMM:$sy, VRC:$vz, VRC:$vw)>;
+}
+// Special RV multiclass with 2 arguments for floating point conversions.
+// e.g. VFIX and VFIXX
+let hasSideEffects = 0, VE_VLIndex = 3 in
+multiclass RVFIXm<string opcStr, bits<8> opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm v : RVmm<opcStr#"$vz", ", $vy", opc, RC, RCM, (ins RDOp:$vz, RC:$vy)>;
+}
+// Multiclass for generic iterative vector calculation
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVIbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RV<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx", argStr)>;
+ let isCodeGenOnly = 1, Constraints = "$vx = $base", DisableEncoding = disEnc#"$base" in
+ def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx", argStr)>;
+}
+multiclass RVIlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : RVIbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : RVIbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : RVIbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+// Generic RV multiclass for iterative operation with 2 argument.
+// e.g. VFIA, VFIS, and VFIM
+let VE_VLIndex = 3 in
+multiclass RVI2m<string opcStr, bits<8>opc, RegisterClass VRC,
+ RegisterClass RC> {
+ let vy = ? in
+ defm vr : RVIlm<opcStr, ", $vy, $sy", opc, VRC, (ins VRC:$vy, RC:$sy)>;
+ let cy = 0, vy = ? in
+ defm vi : RVIlm<opcStr, ", $vy, $sy", opc, VRC, (ins VRC:$vy, simm7fp:$sy)>;
+}
+// Generic RV multiclass for iterative operation with 3 argument.
+// e.g. VFIAM, VFISM, VFIMA, and etc.
+let VE_VLIndex = 4 in
+multiclass RVI3m<string opcStr, bits<8>opc, RegisterClass VRC,
+ RegisterClass RC> {
+ let vy = ?, vz = ? in
+ defm vvr : RVIlm<opcStr, ", $vy, $vz, $sy", opc, VRC,
+ (ins VRC:$vy, VRC:$vz, RC:$sy)>;
+ let cy = 0, vy = ?, vz = ? in
+ defm vvi : RVIlm<opcStr, ", $vy, $vz, $sy", opc, VRC,
+ (ins VRC:$vy, VRC:$vz, simm7fp:$sy)>;
+}
+// special RV multiclass with 3 arguments for VSHF.
+// e.g. VSHF
+let vy = ?, vz = ?, VE_VLIndex = 4 in
+multiclass RVSHFm<string opcStr, bits<8>opc, RegisterClass RC,
+ Operand SIMM = uimm4> {
+ defm vvr : RVlm<opcStr, ", $vy, $vz, $sy", opc, RC,
+ (ins RC:$vy, RC:$vz, I64:$sy)>;
+ let cy = 0 in defm vvi : RVlm<opcStr, ", $vy, $vz, $sy", opc, RC,
+ (ins RC:$vy, RC:$vz, SIMM:$sy)>;
+}
+// Multiclass for generic mask calculation
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVMKbm<string opcStr, string argStr, bits<8>opc, dag dag_out,
+ dag dag_in> {
+ def "" : RV<opc, dag_out, dag_in, !strconcat(opcStr, argStr)>;
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RV<opc, dag_out, !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, argStr)>;
+ def L : RV<opc, dag_out, !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, argStr)>;
+ }
+}
+multiclass RVMKlm<string opcStr, string argStr, bits<8>opc, RegisterClass RCM,
+ dag dag_in> {
+ defm "" : RVMKbm<opcStr, " $vx"#argStr, opc, (outs RCM:$vx), dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : RVMKbm<opcStr, " $vx"#argStr#", $m", opc, (outs RCM:$vx),
+ !con(dag_in, (ins RCM:$m))>;
+}
+// Generic RV multiclass for mask calculation with a condition.
+// e.g. VFMK, VFMS, and VFMF
+let cy = 0, sy = 0 in
+multiclass RVMKom<string opcStr, bits<8> opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let vy = ?, vz = ?, VE_VLIndex = 3 in
+ defm v : RVMKlm<opcStr#"$vy", ", $vz", opc, RCM, (ins CCOp:$vy, RC:$vz)>;
+ let vy = 15 /* AT */, VE_VLIndex = 1 in
+ defm a : RVMKlm<opcStr#"at", "", opc, RCM, (ins)>;
+ let vy = 0 /* AF */, VE_VLIndex = 1 in
+ defm na : RVMKlm<opcStr#"af", "", opc, RCM, (ins)>;
+}
+multiclass RVMKm<string opcStr, bits<8> opc, RegisterClass RC,
+ RegisterClass RCM> {
+ defm "" : RVMKom<opcStr, opc, RC, RCM>;
+}
+// Generic RV multiclass for mask calculation with 2 arguments.
+// e.g. ANDM, ORM, XORM, and etc.
+let cy = 0, sy = 0, vx = ?, vy = ?, vz = ?, hasSideEffects = 0 in
+multiclass RVM2m<string opcStr, bits<8> opc, RegisterClass RCM> {
+ def mm : RV<opc, (outs RCM:$vx), (ins RCM:$vy, RCM:$vz),
+ !strconcat(opcStr, " $vx, $vy, $vz")>;
+}
+// Generic RV multiclass for mask calculation with 1 argument.
+// e.g. NEGM
+let cy = 0, sy = 0, vx = ?, vy = ?, hasSideEffects = 0 in
+multiclass RVM1m<string opcStr, bits<8> opc, RegisterClass RCM> {
+ def m : RV<opc, (outs RCM:$vx), (ins RCM:$vy),
+ !strconcat(opcStr, " $vx, $vy")>;
+}
+// Generic RV multiclass for mask calculation with 1 argument.
+// e.g. PCVM, LZVM, and TOVM
+let cy = 0, sy = 0, vy = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVMSbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ def "" : RV<opc, (outs I64:$sx), dag_in,
+ !strconcat(opcStr, " $sx,", argStr)> {
+ bits<7> sx;
+ let Inst{54-48} = sx;
+ }
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RV<opc, (outs I64:$sx), !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, " $sx,", argStr)> {
+ bits<7> sx;
+ let Inst{54-48} = sx;
+ }
+ def L : RV<opc, (outs I64:$sx), !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, " $sx,", argStr)> {
+ bits<7> sx;
+ let Inst{54-48} = sx;
+ }
+ }
+}
+let VE_VLIndex = 2 in
+multiclass RVMSm<string opcStr, bits<8> opc, RegisterClass RCM> {
+ defm m : RVMSbm<opcStr, " $vy", opc, (ins RCM:$vy)>;
+}
+
+// Section 8.10.1 - VADD (Vector Add)
+let cx = 0, cx2 = 0 in
+defm VADDUL : RVm<"vaddu.l", 0xc8, V64, I64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVADDULO : RVm<"pvaddu.lo", 0xc8, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VADDUW : RVm<"vaddu.w", 0xc8, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVADDUUP : RVm<"pvaddu.up", 0xc8, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVADDU : RVm<"pvaddu", 0xc8, V64, I64, VM512>;
+def : MnemonicAlias<"vaddu.w", "pvaddu.lo">;
+
+// Section 8.10.2 - VADS (Vector Add Single)
+let cx = 0, cx2 = 0 in
+defm VADDSWSX : RVm<"vadds.w.sx", 0xca, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVADDSLO : RVm<"pvadds.lo", 0xca, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VADDSWZX : RVm<"vadds.w.zx", 0xca, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVADDSUP : RVm<"pvadds.up", 0xca, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVADDS : RVm<"pvadds", 0xca, V64, I64, VM512>;
+def : MnemonicAlias<"pvadds.lo.sx", "vadds.w.sx">;
+def : MnemonicAlias<"vadds.w.zx", "pvadds.lo">;
+def : MnemonicAlias<"vadds.w", "pvadds.lo">;
+def : MnemonicAlias<"pvadds.lo.zx", "pvadds.lo">;
+
+// Section 8.10.3 - VADX (Vector Add)
+defm VADDSL : RVm<"vadds.l", 0x8b, V64, I64, VM>;
+
+// Section 8.10.4 - VSUB (Vector Subtract)
+let cx = 0, cx2 = 0 in
+defm VSUBUL : RVm<"vsubu.l", 0xd8, V64, I64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVSUBULO : RVm<"pvsubu.lo", 0xd8, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VSUBUW : RVm<"vsubu.w", 0xd8, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVSUBUUP : RVm<"pvsubu.up", 0xd8, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVSUBU : RVm<"pvsubu", 0xd8, V64, I64, VM512>;
+def : MnemonicAlias<"vsubu.w", "pvsubu.lo">;
+
+// Section 8.10.5 - VSBS (Vector Subtract Single)
+let cx = 0, cx2 = 0 in
+defm VSUBSWSX : RVm<"vsubs.w.sx", 0xda, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVSUBSLO : RVm<"pvsubs.lo", 0xda, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VSUBSWZX : RVm<"vsubs.w.zx", 0xda, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVSUBSUP : RVm<"pvsubs.up", 0xda, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVSUBS : RVm<"pvsubs", 0xda, V64, I64, VM512>;
+def : MnemonicAlias<"pvsubs.lo.sx", "vsubs.w.sx">;
+def : MnemonicAlias<"vsubs.w.zx", "pvsubs.lo">;
+def : MnemonicAlias<"vsubs.w", "pvsubs.lo">;
+def : MnemonicAlias<"pvsubs.lo.zx", "pvsubs.lo">;
+
+// Section 8.10.6 - VSBX (Vector Subtract)
+defm VSUBSL : RVm<"vsubs.l", 0x9b, V64, I64, VM>;
+
+// Section 8.10.7 - VMPY (Vector Multiply)
+let cx2 = 0 in
+defm VMULUL : RVm<"vmulu.l", 0xc9, V64, I64, VM>;
+let cx2 = 1 in
+defm VMULUW : RVm<"vmulu.w", 0xc9, V64, I32, VM>;
+
+// Section 8.10.8 - VMPS (Vector Multiply Single)
+let cx2 = 0 in
+defm VMULSWSX : RVm<"vmuls.w.sx", 0xcb, V64, I32, VM>;
+let cx2 = 1 in
+defm VMULSWZX : RVm<"vmuls.w.zx", 0xcb, V64, I32, VM>;
+def : MnemonicAlias<"vmuls.w", "vmuls.w.zx">;
+
+// Section 8.10.9 - VMPX (Vector Multiply)
+defm VMULSL : RVm<"vmuls.l", 0xdb, V64, I64, VM>;
+
+// Section 8.10.10 - VMPD (Vector Multiply)
+defm VMULSLW : RVm<"vmuls.l.w", 0xd9, V64, I32, VM>;
+
+// Section 8.10.11 - VDIV (Vector Divide)
+let cx2 = 0 in
+defm VDIVUL : RVDIVm<"vdivu.l", 0xe9, V64, I64, VM>;
+let cx2 = 1 in
+defm VDIVUW : RVDIVm<"vdivu.w", 0xe9, V64, I32, VM>;
+
+// Section 8.10.12 - VDVS (Vector Divide Single)
+let cx2 = 0 in
+defm VDIVSWSX : RVDIVm<"vdivs.w.sx", 0xeb, V64, I32, VM>;
+let cx2 = 1 in
+defm VDIVSWZX : RVDIVm<"vdivs.w.zx", 0xeb, V64, I32, VM>;
+def : MnemonicAlias<"vdivs.w", "vdivs.w.zx">;
+
+// Section 8.10.13 - VDVX (Vector Divide)
+defm VDIVSL : RVDIVm<"vdivs.l", 0xfb, V64, I64, VM>;
+
+// Section 8.10.14 - VCMP (Vector Compare)
+let cx = 0, cx2 = 0 in
+defm VCMPUL : RVm<"vcmpu.l", 0xb9, V64, I64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVCMPULO : RVm<"pvcmpu.lo", 0xb9, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VCMPUW : RVm<"vcmpu.w", 0xb9, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVCMPUUP : RVm<"pvcmpu.up", 0xb9, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVCMPU : RVm<"pvcmpu", 0xb9, V64, I64, VM512>;
+def : MnemonicAlias<"vcmpu.w", "pvcmpu.lo">;
+
+// Section 8.10.15 - VCPS (Vector Compare Single)
+let cx = 0, cx2 = 0 in
+defm VCMPSWSX : RVm<"vcmps.w.sx", 0xfa, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVCMPSLO : RVm<"pvcmps.lo", 0xfa, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VCMPSWZX : RVm<"vcmps.w.zx", 0xfa, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVCMPSUP : RVm<"pvcmps.up", 0xfa, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVCMPS : RVm<"pvcmps", 0xfa, V64, I64, VM512>;
+def : MnemonicAlias<"pvcmps.lo.sx", "vcmps.w.sx">;
+def : MnemonicAlias<"vcmps.w.zx", "pvcmps.lo">;
+def : MnemonicAlias<"vcmps.w", "pvcmps.lo">;
+def : MnemonicAlias<"pvcmps.lo.zx", "pvcmps.lo">;
+
+// Section 8.10.16 - VCPX (Vector Compare)
+defm VCMPSL : RVm<"vcmps.l", 0xba, V64, I64, VM>;
+
+// Section 8.10.17 - VCMS (Vector Compare and Select Maximum/Minimum Single)
+let cx = 0, cx2 = 0 in
+defm VMAXSWSX : RVm<"vmaxs.w.sx", 0x8a, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVMAXSLO : RVm<"pvmaxs.lo", 0x8a, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VMAXSWZX : RVm<"vmaxs.w.zx", 0x8a, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVMAXSUP : RVm<"pvmaxs.up", 0x8a, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVMAXS : RVm<"pvmaxs", 0x8a, V64, I64, VM512>;
+let cs2 = 1 in {
+ let cx = 0, cx2 = 0 in
+ defm VMINSWSX : RVm<"vmins.w.sx", 0x8a, V64, I32, VM>;
+ let cx = 0, cx2 = 1 in {
+ defm PVMINSLO : RVm<"pvmins.lo", 0x8a, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VMINSWZX : RVm<"vmins.w.zx", 0x8a, V64, I32, VM>;
+ }
+ let cx = 1, cx2 = 0 in
+ defm PVMINSUP : RVm<"pvmins.up", 0x8a, V64, I64, VM>;
+ let cx = 1, cx2 = 1 in
+ defm PVMINS : RVm<"pvmins", 0x8a, V64, I64, VM512>;
+}
+def : MnemonicAlias<"pvmaxs.lo.sx", "vmaxs.w.sx">;
+def : MnemonicAlias<"vmaxs.w.zx", "pvmaxs.lo">;
+def : MnemonicAlias<"vmaxs.w", "pvmaxs.lo">;
+def : MnemonicAlias<"pvmaxs.lo.zx", "pvmaxs.lo">;
+def : MnemonicAlias<"pvmins.lo.sx", "vmins.w.sx">;
+def : MnemonicAlias<"vmins.w.zx", "pvmins.lo">;
+def : MnemonicAlias<"vmins.w", "pvmins.lo">;
+def : MnemonicAlias<"pvmins.lo.zx", "pvmins.lo">;
+
+// Section 8.10.18 - VCMX (Vector Compare and Select Maximum/Minimum)
+defm VMAXSL : RVm<"vmaxs.l", 0x9a, V64, I64, VM>;
+let cs2 = 1 in
+defm VMINSL : RVm<"vmins.l", 0x9a, V64, I64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.11 - Vector Logical Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.11.1 - VAND (Vector And)
+let cx = 0, cx2 = 0 in defm VAND : RVLm<"vand", 0xc4, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVANDLO : RVLm<"pvand.lo", 0xc4, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVANDUP : RVLm<"pvand.up", 0xc4, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVAND : RVLm<"pvand", 0xc4, I64, V64, VM512>;
+
+// Section 8.11.2 - VOR (Vector Or)
+let cx = 0, cx2 = 0 in defm VOR : RVLm<"vor", 0xc5, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVORLO : RVLm<"pvor.lo", 0xc5, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVORUP : RVLm<"pvor.up", 0xc5, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVOR : RVLm<"pvor", 0xc5, I64, V64, VM512>;
+
+// Section 8.11.3 - VXOR (Vector Exclusive Or)
+let cx = 0, cx2 = 0 in defm VXOR : RVLm<"vxor", 0xc6, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVXORLO : RVLm<"pvxor.lo", 0xc6, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVXORUP : RVLm<"pvxor.up", 0xc6, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVXOR : RVLm<"pvxor", 0xc6, I64, V64, VM512>;
+
+// Section 8.11.4 - VEQV (Vector Equivalence)
+let cx = 0, cx2 = 0 in defm VEQV : RVLm<"veqv", 0xc7, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVEQVLO : RVLm<"pveqv.lo", 0xc7, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVEQVUP : RVLm<"pveqv.up", 0xc7, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVEQV : RVLm<"pveqv", 0xc7, I64, V64, VM512>;
+
+// Section 8.11.5 - VLDZ (Vector Leading Zero Count)
+let cx = 0, cx2 = 0 in defm VLDZ : RV1m<"vldz", 0xe7, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVLDZLO : RV1m<"pvldz.lo", 0xe7, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVLDZUP : RV1m<"pvldz.up", 0xe7, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVLDZ : RV1m<"pvldz", 0xe7, V64, VM512>;
+
+// Section 8.11.6 - VPCNT (Vector Population Count)
+let cx = 0, cx2 = 0 in defm VPCNT : RV1m<"vpcnt", 0xac, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVPCNTLO : RV1m<"pvpcnt.lo", 0xac, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVPCNTUP : RV1m<"pvpcnt.up", 0xac, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVPCNT : RV1m<"pvpcnt", 0xac, V64, VM512>;
+
+// Section 8.11.7 - VBRV (Vector Bit Reverse)
+let cx = 0, cx2 = 0 in defm VBRV : RV1m<"vbrv", 0xf7, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVBRVLO : RV1m<"pvbrv.lo", 0xf7, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVBRVUP : RV1m<"pvbrv.up", 0xf7, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVBRV : RV1m<"pvbrv", 0xf7, V64, VM512>;
+
+// Section 8.11.8 - VSEQ (Vector Sequential Number)
+let cx = 0, cx2 = 0 in defm VSEQ : RV0m<"vseq", 0x99, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVSEQLO : RV0m<"pvseq.lo", 0x99, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVSEQUP : RV0m<"pvseq.up", 0x99, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSEQ : RV0m<"pvseq", 0x99, V64, VM512>;
+
+//-----------------------------------------------------------------------------
+// Section 8.12 - Vector Shift Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.12.1 - VSLL (Vector Shift Left Logical)
+let cx = 0, cx2 = 0 in defm VSLL : RVSm<"vsll", 0xe5, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVSLLLO : RVSm<"pvsll.lo", 0xe5, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVSLLUP : RVSm<"pvsll.up", 0xe5, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSLL : RVSm<"pvsll", 0xe5, I64, V64, VM512>;
+
+// Section 8.12.2 - VSLD (Vector Shift Left Double)
+defm VSLD : RVSDm<"vsld", 0xe4, V64, VM>;
+
+// Section 8.12.3 - VSRL (Vector Shift Right Logical)
+let cx = 0, cx2 = 0 in defm VSRL : RVSm<"vsrl", 0xf5, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVSRLLO : RVSm<"pvsrl.lo", 0xf5, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVSRLUP : RVSm<"pvsrl.up", 0xf5, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSRL : RVSm<"pvsrl", 0xf5, I64, V64, VM512>;
+
+// Section 8.12.4 - VSRD (Vector Shift Right Double)
+defm VSRD : RVSDm<"vsrd", 0xf4, V64, VM>;
+
+// Section 8.12.5 - VSLA (Vector Shift Left Arithmetic)
+let cx = 0, cx2 = 0 in defm VSLAWSX : RVSm<"vsla.w.sx", 0xe6, I32, V64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVSLALO : RVSm<"pvsla.lo", 0xe6, I32, V64, VM>;
+ let isCodeGenOnly = 1 in defm VSLAWZX : RVSm<"vsla.w.zx", 0xe6, I32, V64, VM>;
+}
+let cx = 1, cx2 = 0 in defm PVSLAUP : RVSm<"pvsla.up", 0xe6, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSLA : RVSm<"pvsla", 0xe6, I64, V64, VM512>;
+def : MnemonicAlias<"pvsla.lo.sx", "vsla.w.sx">;
+def : MnemonicAlias<"vsla.w.zx", "pvsla.lo">;
+def : MnemonicAlias<"vsla.w", "pvsla.lo">;
+def : MnemonicAlias<"pvsla.lo.zx", "pvsla.lo">;
+
+// Section 8.12.6 - VSLAX (Vector Shift Left Arithmetic)
+defm VSLAL : RVSm<"vsla.l", 0xd4, I64, V64, VM>;
+
+// Section 8.12.7 - VSRA (Vector Shift Right Arithmetic)
+let cx = 0, cx2 = 0 in defm VSRAWSX : RVSm<"vsra.w.sx", 0xf6, I32, V64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVSRALO : RVSm<"pvsra.lo", 0xf6, I32, V64, VM>;
+ let isCodeGenOnly = 1 in defm VSRAWZX : RVSm<"vsra.w.zx", 0xf6, I32, V64, VM>;
+}
+let cx = 1, cx2 = 0 in defm PVSRAUP : RVSm<"pvsra.up", 0xf6, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSRA : RVSm<"pvsra", 0xf6, I64, V64, VM512>;
+def : MnemonicAlias<"pvsra.lo.sx", "vsra.w.sx">;
+def : MnemonicAlias<"vsra.w.zx", "pvsra.lo">;
+def : MnemonicAlias<"vsra.w", "pvsra.lo">;
+def : MnemonicAlias<"pvsra.lo.zx", "pvsra.lo">;
+
+// Section 8.12.8 - VSRAX (Vector Shift Right Arithmetic)
+defm VSRAL : RVSm<"vsra.l", 0xd5, I64, V64, VM>;
+
+// Section 8.12.9 - VSFA (Vector Shift Left and Add)
+defm VSFA : RVSAm<"vsfa", 0xd7, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.13 - Vector Floating-Point Arithmetic Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.13.1 - VFAD (Vector Floating Add)
+let cx = 0, cx2 = 0 in
+defm VFADDD : RVm<"vfadd.d", 0xcc, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFADDLO : RVm<"pvfadd.lo", 0xcc, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFADDUP : RVm<"pvfadd.up", 0xcc, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFADDS : RVm<"vfadd.s", 0xcc, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFADD : RVm<"pvfadd", 0xcc, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfadd.s", "pvfadd.up">;
+
+// Section 8.13.2 - VFSB (Vector Floating Subtract)
+let cx = 0, cx2 = 0 in
+defm VFSUBD : RVm<"vfsub.d", 0xdc, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFSUBLO : RVm<"pvfsub.lo", 0xdc, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFSUBUP : RVm<"pvfsub.up", 0xdc, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFSUBS : RVm<"vfsub.s", 0xdc, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFSUB : RVm<"pvfsub", 0xdc, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfsub.s", "pvfsub.up">;
+
+// Section 8.13.3 - VFMP (Vector Floating Multiply)
+let cx = 0, cx2 = 0 in
+defm VFMULD : RVm<"vfmul.d", 0xcd, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMULLO : RVm<"pvfmul.lo", 0xcd, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFMULUP : RVm<"pvfmul.up", 0xcd, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMULS : RVm<"vfmul.s", 0xcd, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMUL : RVm<"pvfmul", 0xcd, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfmul.s", "pvfmul.up">;
+
+// Section 8.13.4 - VFDV (Vector Floating Divide)
+defm VFDIVD : RVDIVm<"vfdiv.d", 0xdd, V64, I64, VM, simm7fp>;
+let cx = 1 in
+defm VFDIVS : RVDIVm<"vfdiv.s", 0xdd, V64, F32, VM, simm7fp>;
+
+// Section 8.13.5 - VFSQRT (Vector Floating Square Root)
+defm VFSQRTD : RVF1m<"vfsqrt.d", 0xed, V64, VM>;
+let cx = 1 in
+defm VFSQRTS : RVF1m<"vfsqrt.s", 0xed, V64, VM>;
+
+// Section 8.13.6 - VFCP (Vector Floating Compare)
+let cx = 0, cx2 = 0 in
+defm VFCMPD : RVm<"vfcmp.d", 0xfc, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFCMPLO : RVm<"pvfcmp.lo", 0xfc, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFCMPUP : RVm<"pvfcmp.up", 0xfc, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFCMPS : RVm<"vfcmp.s", 0xfc, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFCMP : RVm<"pvfcmp", 0xfc, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfcmp.s", "pvfcmp.up">;
+
+// Section 8.13.7 - VFCM (Vector Floating Compare and Select Maximum/Minimum)
+let cx = 0, cx2 = 0 in
+defm VFMAXD : RVm<"vfmax.d", 0xbd, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMAXLO : RVm<"pvfmax.lo", 0xbd, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFMAXUP : RVm<"pvfmax.up", 0xbd, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMAXS : RVm<"vfmax.s", 0xbd, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMAX : RVm<"pvfmax", 0xbd, V64, I64, VM512, simm7fp>;
+let cs2 = 1 in {
+ let cx = 0, cx2 = 0 in
+ defm VFMIND : RVm<"vfmin.d", 0xbd, V64, I64, VM, simm7fp>;
+ let cx = 0, cx2 = 1 in
+ defm PVFMINLO : RVm<"pvfmin.lo", 0xbd, V64, I64, VM, simm7fp>;
+ let cx = 1, cx2 = 0 in {
+ defm PVFMINUP : RVm<"pvfmin.up", 0xbd, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMINS : RVm<"vfmin.s", 0xbd, V64, F32, VM, simm7fp>;
+ }
+ let cx = 1, cx2 = 1 in
+ defm PVFMIN : RVm<"pvfmin", 0xbd, V64, I64, VM512, simm7fp>;
+}
+def : MnemonicAlias<"vfmax.s", "pvfmax.up">;
+def : MnemonicAlias<"vfmin.s", "pvfmin.up">;
+
+// Section 8.13.8 - VFMAD (Vector Floating Fused Multiply Add)
+let cx = 0, cx2 = 0 in
+defm VFMADD : RVMm<"vfmad.d", 0xe2, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMADLO : RVMm<"pvfmad.lo", 0xe2, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFMADUP : RVMm<"pvfmad.up", 0xe2, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMADS : RVMm<"vfmad.s", 0xe2, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMAD : RVMm<"pvfmad", 0xe2, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfmad.s", "pvfmad.up">;
+
+// Section 8.13.9 - VFMSB (Vector Floating Fused Multiply Subtract)
+let cx = 0, cx2 = 0 in
+defm VFMSBD : RVMm<"vfmsb.d", 0xf2, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMSBLO : RVMm<"pvfmsb.lo", 0xf2, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFMSBUP : RVMm<"pvfmsb.up", 0xf2, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMSBS : RVMm<"vfmsb.s", 0xf2, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMSB : RVMm<"pvfmsb", 0xf2, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfmsb.s", "pvfmsb.up">;
+
+// Section 8.13.10 - VFNMAD (Vector Floating Fused Negative Multiply Add)
+let cx = 0, cx2 = 0 in
+defm VFNMADD : RVMm<"vfnmad.d", 0xe3, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFNMADLO : RVMm<"pvfnmad.lo", 0xe3, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFNMADUP : RVMm<"pvfnmad.up", 0xe3, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFNMADS : RVMm<"vfnmad.s", 0xe3, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFNMAD : RVMm<"pvfnmad", 0xe3, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfnmad.s", "pvfnmad.up">;
+
+// Section 8.13.11 - VFNMSB (Vector Floating Fused Negative Multiply Subtract)
+let cx = 0, cx2 = 0 in
+defm VFNMSBD : RVMm<"vfnmsb.d", 0xf3, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFNMSBLO : RVMm<"pvfnmsb.lo", 0xf3, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFNMSBUP : RVMm<"pvfnmsb.up", 0xf3, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFNMSBS : RVMm<"vfnmsb.s", 0xf3, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFNMSB : RVMm<"pvfnmsb", 0xf3, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfnmsb.s", "pvfnmsb.up">;
+
+// Section 8.13.12 - VRCP (Vector Floating Reciprocal)
+let cx = 0, cx2 = 0 in defm VRCPD : RVF1m<"vrcp.d", 0xe1, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVRCPLO : RVF1m<"pvrcp.lo", 0xe1, V64, VM>;
+let cx = 1, cx2 = 0 in {
+ defm PVRCPUP : RVF1m<"pvrcp.up", 0xe1, V64, VM>;
+ let isCodeGenOnly = 1 in defm VRCPS : RVF1m<"vrcp.s", 0xe1, V64, VM>;
+}
+let cx = 1, cx2 = 1 in defm PVRCP : RVF1m<"pvrcp", 0xe1, V64, VM512>;
+def : MnemonicAlias<"vrcp.s", "pvrcp.up">;
+
+// Section 8.13.13 - VRSQRT (Vector Floating Reciprocal Square Root)
+let cx = 0, cx2 = 0 in defm VRSQRTD : RVF1m<"vrsqrt.d", 0xf1, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVRSQRTLO : RVF1m<"pvrsqrt.lo", 0xf1, V64, VM>;
+let cx = 1, cx2 = 0 in {
+ defm PVRSQRTUP : RVF1m<"pvrsqrt.up", 0xf1, V64, VM>;
+ let isCodeGenOnly = 1 in
+ defm VRSQRTS : RVF1m<"vrsqrt.s", 0xf1, V64, VM>;
+}
+let cx = 1, cx2 = 1 in
+defm PVRSQRT : RVF1m<"pvrsqrt", 0xf1, V64, VM512>;
+let cs2 = 1 in {
+ let cx = 0, cx2 = 0 in
+ defm VRSQRTDNEX : RVF1m<"vrsqrt.d.nex", 0xf1, V64, VM>;
+ let cx = 0, cx2 = 1 in
+ defm PVRSQRTLONEX : RVF1m<"pvrsqrt.lo.nex", 0xf1, V64, VM>;
+ let cx = 1, cx2 = 0 in {
+ defm PVRSQRTUPNEX : RVF1m<"pvrsqrt.up.nex", 0xf1, V64, VM>;
+ let isCodeGenOnly = 1 in
+ defm VRSQRTSNEX : RVF1m<"vrsqrt.s.nex", 0xf1, V64, VM>;
+ }
+ let cx = 1, cx2 = 1 in
+ defm PVRSQRTNEX : RVF1m<"pvrsqrt.nex", 0xf1, V64, VM512>;
+}
+def : MnemonicAlias<"vrsqrt.s", "pvrsqrt.up">;
+def : MnemonicAlias<"vrsqrt.s.nex", "pvrsqrt.up.nex">;
+
+// Section 8.13.14 - VFIX (Vector Convert to Fixed Pointer)
+let cx = 0, cx2 = 0, cs2 = 0 in
+defm VCVTWDSX : RVFIXm<"vcvt.w.d.sx", 0xe8, V64, VM>;
+let cx = 0, cx2 = 1, cs2 = 0 in
+defm VCVTWDZX : RVFIXm<"vcvt.w.d.zx", 0xe8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 0 in
+defm VCVTWSSX : RVFIXm<"vcvt.w.s.sx", 0xe8, V64, VM>;
+let cx = 1, cx2 = 1, cs2 = 0 in
+defm VCVTWSZX : RVFIXm<"vcvt.w.s.zx", 0xe8, V64, VM>;
+let cx = 0, cx2 = 1, cs2 = 1 in
+defm PVCVTWSLO : RVFIXm<"pvcvt.w.s.lo", 0xe8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 1 in
+defm PVCVTWSUP : RVFIXm<"pvcvt.w.s.up", 0xe8, V64, VM>;
+let cx = 1, cx2 = 1, cs2 = 1 in
+defm PVCVTWS : RVFIXm<"pvcvt.w.s", 0xe8, V64, VM512>;
+
+// Section 8.13.15 - VFIXX (Vector Convert to Fixed Pointer)
+defm VCVTLD : RVFIXm<"vcvt.l.d", 0xa8, V64, VM>;
+
+// Section 8.13.16 - VFLT (Vector Convert to Floating Pointer)
+let cx = 0, cx2 = 0, cs2 = 0 in
+defm VCVTDW : RVF1m<"vcvt.d.w", 0xf8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 0 in
+defm VCVTSW : RVF1m<"vcvt.s.w", 0xf8, V64, VM>;
+let cx = 0, cx2 = 1, cs2 = 1 in
+defm PVCVTSWLO : RVF1m<"pvcvt.s.w.lo", 0xf8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 1 in
+defm PVCVTSWUP : RVF1m<"pvcvt.s.w.up", 0xf8, V64, VM>;
+let cx = 1, cx2 = 1, cs2 = 1 in
+defm PVCVTSW : RVF1m<"pvcvt.s.w", 0xf8, V64, VM512>;
+
+// Section 8.13.17 - VFLTX (Vector Convert to Floating Pointer)
+defm VCVTDL : RVF1m<"vcvt.d.l", 0xb8, V64, VM>;
+
+// Section 8.13.18 - VCVS (Vector Convert to Single-format)
+defm VCVTSD : RVF1m<"vcvt.s.d", 0x9f, V64, VM>;
+
+// Section 8.13.19 - VCVD (Vector Convert to Double-format)
+defm VCVTDS : RVF1m<"vcvt.d.s", 0x8f, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.14 - Vector Reduction Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.14.1 - VSUMS (Vector Sum Single)
+defm VSUMWSX : RVF1m<"vsum.w.sx", 0xea, V64, VM>;
+let cx2 = 1 in defm VSUMWZX : RVF1m<"vsum.w.zx", 0xea, V64, VM>;
+
+// Section 8.14.2 - VSUMX (Vector Sum)
+defm VSUML : RVF1m<"vsum.l", 0xaa, V64, VM>;
+
+// Section 8.14.3 - VFSUM (Vector Floating Sum)
+defm VFSUMD : RVF1m<"vfsum.d", 0xec, V64, VM>;
+let cx = 1 in defm VFSUMS : RVF1m<"vfsum.s", 0xec, V64, VM>;
+
+// Section 8.14.4 - VMAXS (Vector Maximum/Minimum Single)
+let cx2 = 0 in defm VRMAXSWFSTSX : RVF1m<"vrmaxs.w.fst.sx", 0xbb, V64, VM>;
+let cx2 = 1 in defm VRMAXSWFSTZX : RVF1m<"vrmaxs.w.fst.zx", 0xbb, V64, VM>;
+let cs = 1 in {
+ let cx2 = 0 in
+ defm VRMAXSWLSTSX : RVF1m<"vrmaxs.w.lst.sx", 0xbb, V64, VM>;
+ let cx2 = 1 in
+ defm VRMAXSWLSTZX : RVF1m<"vrmaxs.w.lst.zx", 0xbb, V64, VM>;
+}
+let cs2 = 1 in {
+ let cx2 = 0 in
+ defm VRMINSWFSTSX : RVF1m<"vrmins.w.fst.sx", 0xbb, V64, VM>;
+ let cx2 = 1 in
+ defm VRMINSWFSTZX : RVF1m<"vrmins.w.fst.zx", 0xbb, V64, VM>;
+ let cs = 1 in {
+ let cx2 = 0 in
+ defm VRMINSWLSTSX : RVF1m<"vrmins.w.lst.sx", 0xbb, V64, VM>;
+ let cx2 = 1 in
+ defm VRMINSWLSTZX : RVF1m<"vrmins.w.lst.zx", 0xbb, V64, VM>;
+ }
+}
+
+// Section 8.14.5 - VMAXX (Vector Maximum/Minimum)
+let cs = 0 in defm VRMAXSLFST : RVF1m<"vrmaxs.l.fst", 0xab, V64, VM>;
+let cs = 1 in defm VRMAXSLLST : RVF1m<"vrmaxs.l.lst", 0xab, V64, VM>;
+let cs2 = 1 in {
+ let cs = 0 in defm VRMINSLFST : RVF1m<"vrmins.l.fst", 0xab, V64, VM>;
+ let cs = 1 in defm VRMINSLLST : RVF1m<"vrmins.l.lst", 0xab, V64, VM>;
+}
+
+// Section 8.14.6 - VFMAX (Vector Floating Maximum/Minimum)
+let cs = 0 in defm VFRMAXDFST : RVF1m<"vfrmax.d.fst", 0xad, V64, VM>;
+let cs = 1 in defm VFRMAXDLST : RVF1m<"vfrmax.d.lst", 0xad, V64, VM>;
+let cs2 = 1 in {
+ let cs = 0 in defm VFRMINDFST : RVF1m<"vfrmin.d.fst", 0xad, V64, VM>;
+ let cs = 1 in defm VFRMINDLST : RVF1m<"vfrmin.d.lst", 0xad, V64, VM>;
+}
+let cx = 1 in {
+ let cs = 0 in defm VFRMAXSFST : RVF1m<"vfrmax.s.fst", 0xad, V64, VM>;
+ let cs = 1 in defm VFRMAXSLST : RVF1m<"vfrmax.s.lst", 0xad, V64, VM>;
+ let cs2 = 1 in {
+ let cs = 0 in defm VFRMINSFST : RVF1m<"vfrmin.s.fst", 0xad, V64, VM>;
+ let cs = 1 in defm VFRMINSLST : RVF1m<"vfrmin.s.lst", 0xad, V64, VM>;
+ }
+}
+
+// Section 8.14.7 - VRAND (Vector Reduction And)
+defm VRAND : RVF1m<"vrand", 0x88, V64, VM>;
+
+// Section 8.14.8 - VROR (Vector Reduction Or)
+defm VROR : RVF1m<"vror", 0x98, V64, VM>;
+
+// Section 8.14.9 - VRXOR (Vector Reduction Exclusive Or)
+defm VRXOR : RVF1m<"vrxor", 0x89, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.15 - Vector Iterative Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.15.1 - VFIA (Vector Floating Iteration Add)
+let cx = 0 in defm VFIAD : RVI2m<"vfia.d", 0xce, V64, I64>;
+let cx = 1 in defm VFIAS : RVI2m<"vfia.s", 0xce, V64, F32>;
+
+// Section 8.15.2 - VFIS (Vector Floating Iteration Subtract)
+let cx = 0 in defm VFISD : RVI2m<"vfis.d", 0xde, V64, I64>;
+let cx = 1 in defm VFISS : RVI2m<"vfis.s", 0xde, V64, F32>;
+
+// Section 8.15.3 - VFIM (Vector Floating Iteration Multiply)
+let cx = 0 in defm VFIMD : RVI2m<"vfim.d", 0xcf, V64, I64>;
+let cx = 1 in defm VFIMS : RVI2m<"vfim.s", 0xcf, V64, F32>;
+
+// Section 8.15.4 - VFIAM (Vector Floating Iteration Add and Multiply)
+let cx = 0 in defm VFIAMD : RVI3m<"vfiam.d", 0xee, V64, I64>;
+let cx = 1 in defm VFIAMS : RVI3m<"vfiam.s", 0xee, V64, F32>;
+
+// Section 8.15.5 - VFISM (Vector Floating Iteration Subtract and Multiply)
+let cx = 0 in defm VFISMD : RVI3m<"vfism.d", 0xfe, V64, I64>;
+let cx = 1 in defm VFISMS : RVI3m<"vfism.s", 0xfe, V64, F32>;
+
+// Section 8.15.6 - VFIMA (Vector Floating Iteration Multiply and Add)
+let cx = 0 in defm VFIMAD : RVI3m<"vfima.d", 0xef, V64, I64>;
+let cx = 1 in defm VFIMAS : RVI3m<"vfima.s", 0xef, V64, F32>;
+
+// Section 8.15.7 - VFIMS (Vector Floating Iteration Multiply and Subtract)
+let cx = 0 in defm VFIMSD : RVI3m<"vfims.d", 0xff, V64, I64>;
+let cx = 1 in defm VFIMSS : RVI3m<"vfims.s", 0xff, V64, F32>;
+
+//-----------------------------------------------------------------------------
+// Section 8.16 - Vector Merger Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.16.1 - VMRG (Vector Merge)
+let cx = 0 in defm VMRG : RVm<"vmrg", 0xd6, V64, I64, VM>;
+// FIXME: vmrg.w should be called as pvmrg, but following assembly manual.
+let cx = 1 in defm VMRGW : RVm<"vmrg.w", 0xd6, V64, I64, VM512>;
+def : MnemonicAlias<"vmrg.l", "vmrg">;
+
+// Section 8.16.2 - VSHF (Vector Shuffle)
+defm VSHF : RVSHFm<"vshf", 0xbc, V64>;
+
+// Section 8.16.3 - VCP (Vector Compress)
+defm VCP : RV1m<"vcp", 0x8d, V64, VM>;
+
+// Section 8.16.4 - VEX (Vector Expand)
+defm VEX : RV1m<"vex", 0x9d, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.17 - Vector Mask Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.17.1 - VFMK (Vector Form Mask)
+defm VFMKL : RVMKm<"vfmk.l.", 0xb4, V64, VM>;
+def : MnemonicAlias<"vfmk.l", "vfmk.l.at">;
+
+// Section 8.17.2 - VFMS (Vector Form Mask Single)
+defm VFMKW : RVMKm<"vfmk.w.", 0xb5, V64, VM>;
+let isCodeGenOnly = 1 in defm PVFMKWLO : RVMKm<"vfmk.w.", 0xb5, V64, VM>;
+let cx = 1 in defm PVFMKWUP : RVMKm<"pvfmk.w.up.", 0xb5, V64, VM>;
+def : MnemonicAlias<"vfmk.w", "vfmk.w.at">;
+def : MnemonicAlias<"pvfmk.w.up", "pvfmk.w.up.at">;
+def : MnemonicAlias<"pvfmk.w.lo", "vfmk.w.at">;
+foreach CC = [ "af", "gt", "lt", "ne", "eq", "ge", "le", "at" ] in {
+ def : MnemonicAlias<"pvfmk.w.lo."#CC, "vfmk.w."#CC>;
+}
+
+// Section 8.17.3 - VFMF (Vector Form Mask Floating Point)
+defm VFMKD : RVMKm<"vfmk.d.", 0xb6, V64, VM>;
+let cx2 = 1 in defm PVFMKSLO : RVMKm<"pvfmk.s.lo.", 0xb6, V64, VM>;
+let cx = 1 in {
+ defm PVFMKSUP : RVMKm<"pvfmk.s.up.", 0xb6, V64, VM>;
+ let isCodeGenOnly = 1 in defm VFMKS : RVMKm<"vfmk.s.", 0xb6, V64, VM>;
+}
+def : MnemonicAlias<"vfmk.d", "vfmk.d.at">;
+def : MnemonicAlias<"pvfmk.s.lo", "pvfmk.s.lo.at">;
+def : MnemonicAlias<"pvfmk.s.up", "pvfmk.s.up.at">;
+def : MnemonicAlias<"vfmk.s", "pvfmk.s.up.at">;
+foreach CC = [ "af", "gt", "lt", "ne", "eq", "ge", "le", "at", "num", "nan",
+ "gtnan", "ltnan", "nenan", "eqnan", "genan", "lenan" ] in {
+ def : MnemonicAlias<"vfmk.s."#CC, "pvfmk.s.up."#CC>;
+}
+
+// Section 8.17.4 - ANDM (And VM)
+defm ANDM : RVM2m<"andm", 0x84, VM>;
+
+// Section 8.17.5 - ORM (Or VM)
+defm ORM : RVM2m<"orm", 0x85, VM>;
+
+// Section 8.17.6 - XORM (Exclusive Or VM)
+defm XORM : RVM2m<"xorm", 0x86, VM>;
+
+// Section 8.17.7 - EQVM (Equivalence VM)
+defm EQVM : RVM2m<"eqvm", 0x87, VM>;
+
+// Section 8.17.8 - NNDM (Negate And VM)
+defm NNDM : RVM2m<"nndm", 0x94, VM>;
+
+// Section 8.17.9 - NEGM (Negate VM)
+defm NEGM : RVM1m<"negm", 0x95, VM>;
+
+// Section 8.17.10 - PCVM (Population Count of VM)
+defm PCVM : RVMSm<"pcvm", 0xa4, VM>;
+
+// Section 8.17.11 - LZVM (Leading Zero of VM)
+defm LZVM : RVMSm<"lzvm", 0xa5, VM>;
+
+// Section 8.17.12 - TOVM (Trailing One of VM)
+defm TOVM : RVMSm<"tovm", 0xa6, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.18 - Vector Control Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.18.1 - LVL (Load VL)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VL] in {
+ def LVLr : RR<0xbf, (outs), (ins I64:$sy), "lvl $sy">;
+ let cy = 0 in def LVLi : RR<0xbf, (outs), (ins simm7:$sy), "lvl $sy">;
+}
+
+// Section 8.18.2 - SVL (Save VL)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0, Uses = [VL] in
+def SVL : RR<0x2f, (outs I64:$sx), (ins), "svl $sx">;
+
+// Section 8.18.3 - SMVL (Save Maximum Vector Length)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in
+def SMVL : RR<0x2e, (outs I64:$sx), (ins), "smvl $sx">;
+
+// Section 8.18.4 - LVIX (Load Vector Data Index)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VIX] in {
+ def LVIXr : RR<0xaf, (outs), (ins I64:$sy), "lvix $sy">;
+ let cy = 0 in def LVIXi : RR<0xaf, (outs), (ins uimm6:$sy), "lvix $sy">;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
index 9815610510e1..bc5577ce4f97 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
@@ -51,6 +51,11 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
break;
return MCOperand::createReg(MO.getReg());
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(
+ MI, MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP);
+ case MachineOperand::MO_ConstantPoolIndex:
+ return LowerSymbolOperand(MI, MO, AP.GetCPISymbol(MO.getIndex()), AP);
case MachineOperand::MO_ExternalSymbol:
return LowerSymbolOperand(
MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
@@ -58,7 +63,8 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP);
case MachineOperand::MO_Immediate:
return MCOperand::createImm(MO.getImm());
-
+ case MachineOperand::MO_JumpTableIndex:
+ return LowerSymbolOperand(MI, MO, AP.GetJTISymbol(MO.getIndex()), AP);
case MachineOperand::MO_MachineBasicBlock:
return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP);
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
index 5783a8df69d2..d175ad26c742 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -35,6 +36,8 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
const MCPhysReg *
VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
switch (MF->getFunction().getCallingConv()) {
+ case CallingConv::Fast:
+ // Being explicit (same as standard CC).
default:
return CSR_SaveList;
case CallingConv::PreserveAll:
@@ -45,6 +48,8 @@ VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
switch (CC) {
+ case CallingConv::Fast:
+ // Being explicit (same as standard CC).
default:
return CSR_RegMask;
case CallingConv::PreserveAll:
@@ -82,10 +87,22 @@ BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const {
++ItAlias)
Reserved.set(*ItAlias);
+ // Reserve constant registers.
+ Reserved.set(VE::VM0);
+ Reserved.set(VE::VMP0);
+
return Reserved;
}
-bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { return false; }
+bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
+ switch (PhysReg) {
+ case VE::VM0:
+ case VE::VMP0:
+ return true;
+ default:
+ return false;
+ }
+}
const TargetRegisterClass *
VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
@@ -93,6 +110,29 @@ VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
return &VE::I64RegClass;
}
+static unsigned offsetToDisp(MachineInstr &MI) {
+ // Default offset in instruction's operands (reg+reg+imm).
+ unsigned OffDisp = 2;
+
+#define RRCAS_multi_cases(NAME) NAME##rir : case NAME##rii
+
+ {
+ using namespace llvm::VE;
+ switch (MI.getOpcode()) {
+ case RRCAS_multi_cases(TS1AML):
+ case RRCAS_multi_cases(TS1AMW):
+ case RRCAS_multi_cases(CASL):
+ case RRCAS_multi_cases(CASW):
+ // These instructions use AS format (reg+imm).
+ OffDisp = 1;
+ break;
+ }
+ }
+#undef RRCAS_multi_cases
+
+ return OffDisp;
+}
+
static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
MachineInstr &MI, const DebugLoc &dl,
unsigned FIOperandNum, int Offset, Register FrameReg) {
@@ -100,7 +140,7 @@ static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
// VE has 32 bit offset field, so no need to expand a target instruction.
// Directly encode it.
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
- MI.getOperand(FIOperandNum + 2).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum + offsetToDisp(MI)).ChangeToImmediate(Offset);
}
void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
@@ -116,9 +156,41 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register FrameReg;
int Offset;
- Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
-
- Offset += MI.getOperand(FIOperandNum + 2).getImm();
+ Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed();
+
+ Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm();
+
+ if (MI.getOpcode() == VE::STQrii) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ Register SrcReg = MI.getOperand(3).getReg();
+ Register SrcHiReg = getSubReg(SrcReg, VE::sub_even);
+ Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd);
+ // VE stores HiReg to 8(addr) and LoReg to 0(addr)
+ MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STrii))
+ .addReg(FrameReg)
+ .addImm(0)
+ .addImm(0)
+ .addReg(SrcLoReg);
+ replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
+ MI.setDesc(TII.get(VE::STrii));
+ MI.getOperand(3).setReg(SrcHiReg);
+ Offset += 8;
+ } else if (MI.getOpcode() == VE::LDQrii) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ Register DestReg = MI.getOperand(0).getReg();
+ Register DestHiReg = getSubReg(DestReg, VE::sub_even);
+ Register DestLoReg = getSubReg(DestReg, VE::sub_odd);
+ // VE loads HiReg from 8(addr) and LoReg from 0(addr)
+ MachineInstr *StMI =
+ BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDrii), DestLoReg)
+ .addReg(FrameReg)
+ .addImm(0)
+ .addImm(0);
+ replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
+ MI.setDesc(TII.get(VE::LDrii));
+ MI.getOperand(0).setReg(DestHiReg);
+ Offset += 8;
+ }
replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
}
@@ -126,26 +198,3 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return VE::SX9;
}
-
-// VE has no architectural need for stack realignment support,
-// except that LLVM unfortunately currently implements overaligned
-// stack objects by depending upon stack realignment support.
-// If that ever changes, this can probably be deleted.
-bool VERegisterInfo::canRealignStack(const MachineFunction &MF) const {
- if (!TargetRegisterInfo::canRealignStack(MF))
- return false;
-
- // VE always has a fixed frame pointer register, so don't need to
- // worry about needing to reserve it. [even if we don't have a frame
- // pointer for our frame, it still cannot be used for other things,
- // or register window traps will be SADNESS.]
-
- // If there's a reserved call frame, we can use VE to access locals.
- if (getFrameLowering(MF)->hasReservedCallFrame(MF))
- return true;
-
- // Otherwise, we'd need a base pointer, but those aren't implemented
- // for VE at the moment.
-
- return false;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
index 9a32da16bea6..334fb965a986 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
@@ -40,8 +40,6 @@ public:
RegScavenger *RS = nullptr) const override;
Register getFrameRegister(const MachineFunction &MF) const override;
-
- bool canRealignStack(const MachineFunction &MF) const override;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
index 29708d35c730..70ff104b65b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
@@ -26,13 +26,33 @@ class VEMiscReg<bits<6> enc, string n>: Register<n> {
let Namespace = "VE";
}
+class VEVecReg<bits<8> enc, string n, list<Register> subregs = [],
+ list<string> altNames = [], list<Register> aliases = []>
+ : Register<n, altNames> {
+ let HWEncoding{15-8} = 0;
+ let HWEncoding{7-0} = enc;
+ let Namespace = "VE";
+ let SubRegs = subregs;
+ let Aliases = aliases;
+}
+
+class VEMaskReg<bits<4> enc, string n, list<Register> subregs = [],
+ list<string> altNames = [], list<Register> aliases = []>
+ : Register<n, altNames> {
+ let HWEncoding{15-4} = 0;
+ let HWEncoding{3-0} = enc;
+ let Namespace = "VE";
+ let SubRegs = subregs;
+ let Aliases = aliases;
+}
+
let Namespace = "VE" in {
- def sub_i8 : SubRegIndex<8, 56>; // Low 8 bit (56..63)
- def sub_i16 : SubRegIndex<16, 48>; // Low 16 bit (48..63)
def sub_i32 : SubRegIndex<32, 32>; // Low 32 bit (32..63)
def sub_f32 : SubRegIndex<32>; // High 32 bit (0..31)
def sub_even : SubRegIndex<64>; // High 64 bit (0..63)
def sub_odd : SubRegIndex<64, 64>; // Low 64 bit (64..127)
+ def sub_vm_even : SubRegIndex<256>; // High 256 bit (0..255)
+ def sub_vm_odd : SubRegIndex<256, 256>; // Low 256 bit (256..511)
def AsmName : RegAltNameIndex;
}
@@ -66,26 +86,23 @@ def MISC : RegisterClass<"VE", [i64], 64,
def IC : VEMiscReg<62, "ic">;
//-----------------------------------------------------------------------------
-// Gneric Registers
+// Vector Length Register
//-----------------------------------------------------------------------------
-let RegAltNameIndices = [AsmName] in {
+def VL : VEMiscReg<63, "vl">;
-// Generic integer registers - 8 bits wide
-foreach I = 0-63 in
- def SB#I : VEReg<I, "sb"#I, [], ["s"#I]>, DwarfRegNum<[I]>;
+// Register classes.
+def VLS : RegisterClass<"VE", [i32], 64, (add VL)>;
-// Generic integer registers - 16 bits wide
-let SubRegIndices = [sub_i8] in
-foreach I = 0-63 in
- def SH#I : VEReg<I, "sh"#I, [!cast<VEReg>("SB"#I)], ["s"#I]>,
- DwarfRegNum<[I]>;
+//-----------------------------------------------------------------------------
+// Generic Registers
+//-----------------------------------------------------------------------------
+
+let RegAltNameIndices = [AsmName] in {
// Generic integer registers - 32 bits wide
-let SubRegIndices = [sub_i16] in
foreach I = 0-63 in
- def SW#I : VEReg<I, "sw"#I, [!cast<VEReg>("SH"#I)], ["s"#I]>,
- DwarfRegNum<[I]>;
+ def SW#I : VEReg<I, "sw"#I, [], ["s"#I]>, DwarfRegNum<[I]>;
// Generic floating point registers - 32 bits wide
// NOTE: Mark SF#I as alias of SW#I temporary to avoid register allocation
@@ -95,10 +112,21 @@ foreach I = 0-63 in
DwarfRegNum<[I]>;
// Generic integer registers - 64 bits wide
-let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in
-foreach I = 0-63 in
- def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)],
- ["s"#I]>, DwarfRegNum<[I]>;
+let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in {
+ // Several registers have specific names, so add them to one of aliases.
+ def SX8 : VEReg<8, "s8", [SW8, SF8], ["s8", "sl"]>, DwarfRegNum<[8]>;
+ def SX9 : VEReg<9, "s9", [SW9, SF9], ["s9", "fp"]>, DwarfRegNum<[9]>;
+ def SX10 : VEReg<10, "s10", [SW10, SF10], ["s10", "lr"]>, DwarfRegNum<[10]>;
+ def SX11 : VEReg<11, "s11", [SW11, SF11], ["s11", "sp"]>, DwarfRegNum<[11]>;
+ def SX14 : VEReg<14, "s14", [SW14, SF14], ["s14", "tp"]>, DwarfRegNum<[14]>;
+ def SX15 : VEReg<15, "s15", [SW15, SF15], ["s15", "got"]>, DwarfRegNum<[15]>;
+ def SX16 : VEReg<16, "s16", [SW16, SF16], ["s16", "plt"]>, DwarfRegNum<[16]>;
+
+ // Other generic registers.
+ foreach I = { 0-7, 12-13, 17-63 } in
+ def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)],
+ ["s"#I]>, DwarfRegNum<[I]>;
+}
// Aliases of the S* registers used to hold 128-bit for values (long doubles).
// Following foreach represents something like:
@@ -112,20 +140,31 @@ foreach I = 0-31 in
!cast<VEReg>("SX"#!add(!shl(I,1),1))],
["s"#!shl(I,1)]>;
+// Vector registers - 64 bits wide 256 elements
+foreach I = 0-63 in
+ def V#I : VEVecReg<I, "v"#I, [], ["v"#I]>, DwarfRegNum<[!add(64,I)]>;
+
+// Vector Index Register
+def VIX : VEVecReg<255, "vix", [], ["vix"]>;
+
+// Vector mask registers - 256 bits wide
+foreach I = 0-15 in
+ def VM#I : VEMaskReg<I, "vm"#I, [], ["vm"#I]>, DwarfRegNum<[!add(128,I)]>;
+
+// Aliases of VMs to use as a pair of two VM for packed instructions
+let SubRegIndices = [sub_vm_even, sub_vm_odd], CoveredBySubRegs = 1 in
+foreach I = 0-7 in
+ def VMP#I : VEMaskReg<!shl(I,1), "vmp"#I,
+ [!cast<VEMaskReg>("VM"#!shl(I,1)),
+ !cast<VEMaskReg>("VM"#!add(!shl(I,1),1))],
+ ["vm"#!shl(I,1)]>;
+
} // RegAltNameIndices = [AsmName]
// Register classes.
//
// The register order is defined in terms of the preferred
// allocation order.
-def I8 : RegisterClass<"VE", [i8], 8,
- (add (sequence "SB%u", 0, 7),
- (sequence "SB%u", 34, 63),
- (sequence "SB%u", 8, 33))>;
-def I16 : RegisterClass<"VE", [i16], 16,
- (add (sequence "SH%u", 0, 7),
- (sequence "SH%u", 34, 63),
- (sequence "SH%u", 8, 33))>;
def I32 : RegisterClass<"VE", [i32], 32,
(add (sequence "SW%u", 0, 7),
(sequence "SW%u", 34, 63),
@@ -142,3 +181,14 @@ def F128 : RegisterClass<"VE", [f128], 128,
(add (sequence "Q%u", 0, 3),
(sequence "Q%u", 17, 31),
(sequence "Q%u", 4, 16))>;
+
+def V64 : RegisterClass<"VE",
+ [v256f64, // default type for vector registers
+ v512i32, v512f32,
+ v256i64, v256i32, v256f32, /* v256f64, */], 64,
+ (add (sequence "V%u", 0, 63),
+ VIX)>;
+
+// vm0 is reserved for always true
+def VM : RegisterClass<"VE", [v256i1], 64, (sequence "VM%u", 0, 15)>;
+def VM512 : RegisterClass<"VE", [v512i1], 64, (sequence "VMP%u", 0, 7)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
index a0b78d95e3cf..daa6cfb8aa84 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
@@ -27,73 +27,35 @@ void VESubtarget::anchor() {}
VESubtarget &VESubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
+ // Default feature settings
+ EnableVPU = false;
+
// Determine default and user specified characteristics
std::string CPUName = std::string(CPU);
if (CPUName.empty())
- CPUName = "ve";
+ CPUName = "generic";
// Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
+ ParseSubtargetFeatures(CPUName, /*TuneCPU=*/CPU, FS);
return *this;
}
VESubtarget::VESubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : VEGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ : VEGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
FrameLowering(*this) {}
-int VESubtarget::getAdjustedFrameSize(int frameSize) const {
-
- // VE stack frame:
- //
- // +----------------------------------------+
- // | Locals and temporaries |
- // +----------------------------------------+
- // | Parameter area for callee |
- // 176(fp) | |
- // +----------------------------------------+
- // | Register save area (RSA) for callee |
- // | |
- // 16(fp) | 20 * 8 bytes |
- // +----------------------------------------+
- // 8(fp) | Return address |
- // +----------------------------------------+
- // 0(fp) | Frame pointer of caller |
- // --------+----------------------------------------+--------
- // | Locals and temporaries for callee |
- // +----------------------------------------+
- // | Parameter area for callee of callee |
- // +----------------------------------------+
- // 16(sp) | RSA for callee of callee |
- // +----------------------------------------+
- // 8(sp) | Return address |
- // +----------------------------------------+
- // 0(sp) | Frame pointer of callee |
- // +----------------------------------------+
-
- // RSA frame:
- // +----------------------------------------------+
- // 168(fp) | %s33 |
- // +----------------------------------------------+
- // | %s19...%s32 |
- // +----------------------------------------------+
- // 48(fp) | %s18 |
- // +----------------------------------------------+
- // 40(fp) | Linkage area register (%s17) |
- // +----------------------------------------------+
- // 32(fp) | Procedure linkage table register (%plt=%s16) |
- // +----------------------------------------------+
- // 24(fp) | Global offset table register (%got=%s15) |
- // +----------------------------------------------+
- // 16(fp) | Thread pointer register (%tp=%s14) |
- // +----------------------------------------------+
+uint64_t VESubtarget::getAdjustedFrameSize(uint64_t FrameSize) const {
+ // Calculate adjusted frame size by adding the size of RSA frame,
+ // return address, and frame poitner as described in VEFrameLowering.cpp.
+ const VEFrameLowering *TFL = getFrameLowering();
- frameSize += 176; // for RSA, RA, and FP
- frameSize = alignTo(frameSize, 16); // requires 16 bytes alignment
+ FrameSize += getRsaSize();
+ FrameSize = alignTo(FrameSize, TFL->getStackAlign());
- return frameSize;
+ return FrameSize;
}
bool VESubtarget::enableMachineScheduler() const { return true; }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
index f3a2c206162e..213aca2ea3f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
@@ -32,6 +32,13 @@ class VESubtarget : public VEGenSubtargetInfo {
Triple TargetTriple;
virtual void anchor();
+ /// Features {
+
+ // Emit VPU instructions
+ bool EnableVPU;
+
+ /// } Features
+
VEInstrInfo InstrInfo;
VETargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
@@ -55,15 +62,21 @@ public:
bool enableMachineScheduler() const override;
+ bool enableVPU() const { return EnableVPU; }
+
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
VESubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
/// Given a actual stack size as determined by FrameInfo, this function
- /// returns adjusted framesize which includes space for register window
- /// spills and arguments.
- int getAdjustedFrameSize(int stackSize) const;
+ /// returns adjusted framesize which includes space for RSA, return
+ /// address, and frame poitner.
+ uint64_t getAdjustedFrameSize(uint64_t FrameSize) const;
+
+ /// Get the size of RSA, return address, and frame pointer as described
+ /// in VEFrameLowering.cpp.
+ unsigned getRsaSize(void) const { return 176; };
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
};
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
index 08b55eebbc98..414ae09431c0 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
#define DEBUG_TYPE "ve"
-extern "C" void LLVMInitializeVETarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETarget() {
// Register the target.
RegisterTargetMachine<VETargetMachine> X(getTheVETarget());
}
@@ -44,13 +44,24 @@ static std::string computeDataLayout(const Triple &T) {
// Stack alignment is 128 bits
Ret += "-S128";
+ // Vector alignments are 64 bits
+ // Need to define all of them. Otherwise, each alignment becomes
+ // the size of each data by default.
+ Ret += "-v64:64:64"; // for v2f32
+ Ret += "-v128:64:64";
+ Ret += "-v256:64:64";
+ Ret += "-v512:64:64";
+ Ret += "-v1024:64:64";
+ Ret += "-v2048:64:64";
+ Ret += "-v4096:64:64";
+ Ret += "-v8192:64:64";
+ Ret += "-v16384:64:64"; // for v256f64
+
return Ret;
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
+ return RM.getValueOr(Reloc::Static);
}
class VEELFTargetObjectFile : public TargetLoweringObjectFileELF {
@@ -96,7 +107,9 @@ public:
return getTM<VETargetMachine>();
}
+ void addIRPasses() override;
bool addInstSelector() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -104,7 +117,18 @@ TargetPassConfig *VETargetMachine::createPassConfig(PassManagerBase &PM) {
return new VEPassConfig(*this, PM);
}
+void VEPassConfig::addIRPasses() {
+ // VE requires atomic expand pass.
+ addPass(createAtomicExpandPass());
+ TargetPassConfig::addIRPasses();
+}
+
bool VEPassConfig::addInstSelector() {
addPass(createVEISelDag(getVETargetMachine()));
return false;
}
+
+void VEPassConfig::addPreEmitPass() {
+ // LVLGen should be called after scheduling and register allocation
+ addPass(createLVLGenPass());
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
index c267c4d9a578..68af66597485 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
@@ -33,16 +33,35 @@ class VETTIImpl : public BasicTTIImplBase<VETTIImpl> {
const VESubtarget *getST() const { return ST; }
const VETargetLowering *getTLI() const { return TLI; }
+ bool enableVPU() const { return getST()->enableVPU(); }
+
public:
explicit VETTIImpl(const VETargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
- unsigned getNumberOfRegisters(unsigned ClassID) const { return 64; }
+ unsigned getNumberOfRegisters(unsigned ClassID) const {
+ bool VectorRegs = (ClassID == 1);
+ if (VectorRegs) {
+ // TODO report vregs once vector isel is stable.
+ return 0;
+ }
+
+ return 64;
+ }
- unsigned getRegisterBitWidth(bool Vector) const { return 64; }
+ unsigned getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ // TODO report vregs once vector isel is stable.
+ return 0;
+ }
+ return 64;
+ }
- unsigned getMinVectorRegisterBitWidth() const { return 64; }
+ unsigned getMinVectorRegisterBitWidth() const {
+ // TODO report vregs once vector isel is stable.
+ return 0;
+ }
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td
new file mode 100644
index 000000000000..2c88d5099a7b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -0,0 +1,46 @@
+//===-------------- VVPInstrInfo.td - VVP_* SDNode patterns ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the VE Vector Predicated SDNodes (VVP SDNodes). VVP
+// SDNodes are an intermediate isel layer between the vector SDNodes emitted by
+// LLVM and the actual VE vector instructions. For example:
+//
+// ADD(x,y) --> VVP_ADD(x,y,mask,evl) --> VADDSWSXrvml(x,y,mask,evl)
+// ^ ^ ^
+// The standard The VVP layer SDNode. The VE vector instruction.
+// SDNode.
+//
+// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream.
+//===----------------------------------------------------------------------===//
+
+// Binary Operators {
+
+// BinaryOp(x,y,mask,vl)
+def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<0>,
+ SDTCisSameNumEltsAs<0, 3>,
+ IsVLVT<4>
+]>;
+
+// Binary operator commutative pattern.
+class vvp_commutative<SDNode RootOp> :
+ PatFrags<
+ (ops node:$lhs, node:$rhs, node:$mask, node:$vlen),
+ [(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
+ (RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
+
+// VVP node definitions.
+def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>;
+def c_vvp_add : vvp_commutative<vvp_add>;
+
+def vvp_and : SDNode<"VEISD::VVP_AND", SDTIntBinOpVVP>;
+def c_vvp_and : vvp_commutative<vvp_and>;
+
+// } Binary Operators
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td
new file mode 100644
index 000000000000..7003fb387670
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -0,0 +1,71 @@
+//===----------- VVPInstrPatternsVec.td - VVP_* SDNode patterns -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes how VVP_* SDNodes are lowered to machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// VVP SDNode definitions.
+//
+//===----------------------------------------------------------------------===//
+include "VVPInstrInfo.td"
+
+multiclass VectorBinaryArith<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
+ string OpBaseName,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+ // No mask.
+ def : Pat<(OpNode
+ (any_broadcast ScalarVT:$sx),
+ DataVT:$vy, (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvl")
+ ScalarVT:$sx, $vy, $avl)>;
+ def : Pat<(OpNode DataVT:$vx, DataVT:$vy, (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvl")
+ $vx, $vy, $avl)>;
+
+ // Mask.
+ def : Pat<(OpNode
+ (any_broadcast ScalarVT:$sx),
+ DataVT:$vy, MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvml")
+ ScalarVT:$sx, $vy, $mask, $avl)>;
+ def : Pat<(OpNode DataVT:$vx, DataVT:$vy, MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvml")
+ $vx, $vy, $mask, $avl)>;
+
+ // TODO We do not specify patterns for the immediate variants here. There
+ // will be an immediate folding pass that takes care of switching to the
+ // immediate variant where applicable.
+
+ // TODO Fold vvp_select into passthru.
+}
+
+// Expand both 64bit and 32 bit variant (256 elements)
+multiclass VectorBinaryArith_ShortLong<
+ SDPatternOperator OpNode,
+ ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+ ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+ defm : VectorBinaryArith<OpNode,
+ LongScalarVT, LongDataVT, v256i1,
+ LongOpBaseName, simm7, LO7>;
+ defm : VectorBinaryArith<OpNode,
+ ShortScalarVT, ShortDataVT, v256i1,
+ ShortOpBaseName, simm7, LO7>;
+}
+
+
+defm : VectorBinaryArith_ShortLong<c_vvp_add,
+ i64, v256i64, "VADDSL",
+ i32, v256i32, "VADDSWSX">;
+defm : VectorBinaryArith_ShortLong<c_vvp_and,
+ i64, v256i64, "VAND",
+ i32, v256i32, "PVANDLO">;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def b/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def
new file mode 100644
index 000000000000..a68402e9ea10
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def
@@ -0,0 +1,41 @@
+//===-- VVPNodes.def - Lists & properties of VE Vector Predication Nodes --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all VVP_* SDNodes and their properties
+//
+//===----------------------------------------------------------------------===//
+
+/// HANDLE_VP_TO_VVP(VPOPC, VVPOPC)
+/// \p VPOPC is the VP_* SDNode opcode.
+/// \p VVPOPC is the VVP_* SDNode opcode.
+#ifndef HANDLE_VP_TO_VVP
+#define HANDLE_VP_TO_VVP(VPOPC, VVPOPC)
+#endif
+
+/// ADD_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP SDNode operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_VVP_OP
+#define ADD_VVP_OP(X, Y)
+#endif
+
+/// ADD_BINARY_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP Binary operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_BINARY_VVP_OP
+#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) HANDLE_VP_TO_VVP(VP_##Y, X)
+#endif
+
+// Integer arithmetic.
+ADD_BINARY_VVP_OP(VVP_ADD,ADD)
+
+ADD_BINARY_VVP_OP(VVP_AND,AND)
+
+#undef HANDLE_VP_TO_VVP
+#undef ADD_BINARY_VVP_OP
+#undef ADD_VVP_OP
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index e29d85d7588d..60ac3248b9e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -35,10 +35,12 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-asm-parser"
+static const char *getSubtargetFeatureName(uint64_t Val);
+
namespace {
/// WebAssemblyOperand - Instances of this class represent the operands in a
-/// parsed WASM machine instruction.
+/// parsed Wasm machine instruction.
struct WebAssemblyOperand : public MCParsedAsmOperand {
enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
@@ -158,6 +160,24 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
}
};
+static MCSymbolWasm *GetOrCreateFunctionTableSymbol(MCContext &Ctx,
+ const StringRef &Name) {
+ // FIXME: Duplicates functionality from
+ // MC/WasmObjectWriter::recordRelocation, as well as WebAssemblyCodegen's
+ // WebAssembly:getOrCreateFunctionTableSymbol.
+ MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
+ if (Sym) {
+ if (!Sym->isFunctionTable())
+ Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
+ } else {
+ Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
+ Sym->setFunctionTable();
+ // The default function table is synthesized by the linker.
+ Sym->setUndefined();
+ }
+ return Sym;
+}
+
class WebAssemblyAsmParser final : public MCTargetAsmParser {
MCAsmParser &Parser;
MCAsmLexer &Lexer;
@@ -320,8 +340,8 @@ public:
Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
Type == "f64x2")
return wasm::ValType::V128;
- if (Type == "exnref")
- return wasm::ValType::EXNREF;
+ if (Type == "funcref")
+ return wasm::ValType::FUNCREF;
if (Type == "externref")
return wasm::ValType::EXTERNREF;
return Optional<wasm::ValType>();
@@ -335,7 +355,8 @@ public:
.Case("f32", WebAssembly::BlockType::F32)
.Case("f64", WebAssembly::BlockType::F64)
.Case("v128", WebAssembly::BlockType::V128)
- .Case("exnref", WebAssembly::BlockType::Exnref)
+ .Case("funcref", WebAssembly::BlockType::Funcref)
+ .Case("externref", WebAssembly::BlockType::Externref)
.Case("void", WebAssembly::BlockType::Void)
.Default(WebAssembly::BlockType::Invalid);
}
@@ -403,7 +424,8 @@ public:
bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
// FIXME: there is probably a cleaner way to do this.
auto IsLoadStore = InstName.find(".load") != StringRef::npos ||
- InstName.find(".store") != StringRef::npos;
+ InstName.find(".store") != StringRef::npos ||
+ InstName.find("prefetch") != StringRef::npos;
auto IsAtomic = InstName.find("atomic.") != StringRef::npos;
if (IsLoadStore || IsAtomic) {
// Parse load/store operands of the form: offset:p2align=align
@@ -417,6 +439,12 @@ public:
return error("Expected integer constant");
parseSingleInteger(false, Operands);
} else {
+ // v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane
+ // index. We need to avoid parsing an extra alignment operand for the
+ // lane index.
+ auto IsLoadStoreLane = InstName.find("_lane") != StringRef::npos;
+ if (IsLoadStoreLane && Operands.size() == 4)
+ return false;
// Alignment not specified (or atomics, must use default alignment).
// We can't just call WebAssembly::GetDefaultP2Align since we don't have
// an opcode until after the assembly matcher, so set a default to fix
@@ -430,6 +458,13 @@ public:
return false;
}
+ WebAssembly::HeapType parseHeapType(StringRef Id) {
+ return StringSwitch<WebAssembly::HeapType>(Id)
+ .Case("extern", WebAssembly::HeapType::Externref)
+ .Case("func", WebAssembly::HeapType::Funcref)
+ .Default(WebAssembly::HeapType::Invalid);
+ }
+
void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
WebAssembly::BlockType BT) {
Operands.push_back(std::make_unique<WebAssemblyOperand>(
@@ -472,6 +507,7 @@ public:
// proper nesting.
bool ExpectBlockType = false;
bool ExpectFuncType = false;
+ bool ExpectHeapType = false;
if (Name == "block") {
push(Block);
ExpectBlockType = true;
@@ -511,6 +547,17 @@ public:
return true;
} else if (Name == "call_indirect" || Name == "return_call_indirect") {
ExpectFuncType = true;
+ // Ensure that the object file has a __indirect_function_table import, as
+ // we call_indirect against it.
+ auto &Ctx = getStreamer().getContext();
+ MCSymbolWasm *Sym =
+ GetOrCreateFunctionTableSymbol(Ctx, "__indirect_function_table");
+ // Until call_indirect emits TABLE_NUMBER relocs against this symbol, mark
+ // it as NO_STRIP so as to ensure that the indirect function table makes
+ // it to linked output.
+ Sym->setNoStrip();
+ } else if (Name == "ref.null") {
+ ExpectHeapType = true;
}
if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) {
@@ -552,6 +599,15 @@ public:
return error("Unknown block type: ", Id);
addBlockTypeOperand(Operands, NameLoc, BT);
Parser.Lex();
+ } else if (ExpectHeapType) {
+ auto HeapType = parseHeapType(Id.getString());
+ if (HeapType == WebAssembly::HeapType::Invalid) {
+ return error("Expected a heap type: ", Id);
+ }
+ Operands.push_back(std::make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Integer, Id.getLoc(), Id.getEndLoc(),
+ WebAssemblyOperand::IntOp{static_cast<int64_t>(HeapType)}));
+ Parser.Lex();
} else {
// Assume this identifier is a label.
const MCExpr *Val;
@@ -687,16 +743,52 @@ public:
auto Type = parseType(TypeName);
if (!Type)
return error("Unknown type in .globaltype directive: ", TypeTok);
+ // Optional mutable modifier. Default to mutable for historical reasons.
+ // Ideally we would have gone with immutable as the default and used `mut`
+ // as the modifier to match the `.wat` format.
+ bool Mutable = true;
+ if (isNext(AsmToken::Comma)) {
+ TypeTok = Lexer.getTok();
+ auto Id = expectIdent();
+ if (Id == "immutable")
+ Mutable = false;
+ else
+ // Should we also allow `mutable` and `mut` here for clarity?
+ return error("Unknown type in .globaltype modifier: ", TypeTok);
+ }
// Now set this symbol with the correct type.
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
WasmSym->setGlobalType(
- wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
+ wasm::WasmGlobalType{uint8_t(Type.getValue()), Mutable});
// And emit the directive again.
TOut.emitGlobalType(WasmSym);
return expect(AsmToken::EndOfStatement, "EOL");
}
+ if (DirectiveID.getString() == ".tabletype") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ if (expect(AsmToken::Comma, ","))
+ return true;
+ auto TypeTok = Lexer.getTok();
+ auto TypeName = expectIdent();
+ if (TypeName.empty())
+ return true;
+ auto Type = parseType(TypeName);
+ if (!Type)
+ return error("Unknown type in .tabletype directive: ", TypeTok);
+
+ // Now that we have the name and table type, we can actually create the
+ // symbol
+ auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
+ WasmSym->setTableType(Type.getValue());
+ TOut.emitTableType(WasmSym);
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
if (DirectiveID.getString() == ".functype") {
// This code has to send things to the streamer similar to
// WebAssemblyAsmPrinter::EmitFunctionBodyStart.
@@ -836,8 +928,9 @@ public:
bool MatchingInlineAsm) override {
MCInst Inst;
Inst.setLoc(IDLoc);
- unsigned MatchResult =
- MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+ FeatureBitset MissingFeatures;
+ unsigned MatchResult = MatchInstructionImpl(
+ Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm);
switch (MatchResult) {
case Match_Success: {
ensureLocals(Out);
@@ -866,9 +959,16 @@ public:
}
return false;
}
- case Match_MissingFeature:
- return Parser.Error(
- IDLoc, "instruction requires a WASM feature not currently enabled");
+ case Match_MissingFeature: {
+ assert(MissingFeatures.count() > 0 && "Expected missing features");
+ SmallString<128> Message;
+ raw_svector_ostream OS(Message);
+ OS << "instruction requires:";
+ for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)
+ if (MissingFeatures.test(i))
+ OS << ' ' << getSubtargetFeatureName(i);
+ return Parser.Error(IDLoc, Message);
+ }
case Match_MnemonicFail:
return Parser.Error(IDLoc, "invalid instruction");
case Match_NearMisses:
@@ -896,12 +996,27 @@ public:
auto SymName = Symbol->getName();
if (SymName.startswith(".L"))
return; // Local Symbol.
+
// Only create a new text section if we're already in one.
+ // TODO: If the user explicitly creates a new function section, we ignore
+ // its name when we create this one. It would be nice to honor their
+ // choice, while still ensuring that we create one if they forget.
+ // (that requires coordination with WasmAsmParser::parseSectionDirective)
auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
if (!CWS || !CWS->getKind().isText())
return;
auto SecName = ".text." + SymName;
- auto WS = getContext().getWasmSection(SecName, SectionKind::getText());
+
+ auto *Group = CWS->getGroup();
+ // If the current section is a COMDAT, also set the flag on the symbol.
+ // TODO: Currently the only place that the symbols' comdat flag matters is
+ // for importing comdat functions. But there's no way to specify that in
+ // assembly currently.
+ if (Group)
+ cast<MCSymbolWasm>(Symbol)->setComdat(true);
+ auto *WS =
+ getContext().getWasmSection(SecName, SectionKind::getText(), Group,
+ MCContext::GenericSectionID, nullptr);
getStreamer().SwitchSection(WS);
// Also generate DWARF for this section if requested.
if (getContext().getGenDwarfForAssembly())
@@ -932,5 +1047,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmParser() {
}
#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
#define GET_MATCHER_IMPLEMENTATION
#include "WebAssemblyGenAsmMatcher.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 42fa6d58fffd..1b7cc093f7ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -198,6 +198,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
case WebAssembly::OPERAND_LOCAL:
case WebAssembly::OPERAND_GLOBAL:
case WebAssembly::OPERAND_FUNCTION32:
+ case WebAssembly::OPERAND_TABLE:
case WebAssembly::OPERAND_OFFSET32:
case WebAssembly::OPERAND_OFFSET64:
case WebAssembly::OPERAND_P2ALIGN:
@@ -240,6 +241,28 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
}
break;
}
+ // heap_type operands, for e.g. ref.null:
+ case WebAssembly::OPERAND_HEAPTYPE: {
+ int64_t Val;
+ uint64_t PrevSize = Size;
+ if (!nextLEB(Val, Bytes, Size, true))
+ return MCDisassembler::Fail;
+ if (Val < 0 && Size == PrevSize + 1) {
+ // The HeapType encoding is like BlockType, in that encodings that
+ // decode as negative values indicate ValTypes. In practice we expect
+ // either wasm::ValType::EXTERNREF or wasm::ValType::FUNCREF here.
+ //
+ // The positive SLEB values are reserved for future expansion and are
+ // expected to be type indices in the typed function references
+ // proposal, and should disassemble as MCSymbolRefExpr as in BlockType
+ // above.
+ MI.addOperand(MCOperand::createImm(Val & 0x7f));
+ } else {
+ MI.addOperand(
+ MCOperand::createImm(int64_t(WebAssembly::HeapType::Invalid)));
+ }
+ break;
+ }
// FP operands.
case WebAssembly::OPERAND_F32IMM: {
if (!parseImmediate<float>(MI, Size, Bytes))
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 8ecd7c53621d..d88311197c1a 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -59,11 +59,6 @@ public:
return false;
}
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- return false;
- }
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index f60b5fcd14ec..fb8b0c364f30 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -94,19 +94,18 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
case WebAssembly::LOOP_S:
printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':');
ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true));
- break;
+ return;
case WebAssembly::BLOCK:
case WebAssembly::BLOCK_S:
ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
- break;
+ return;
case WebAssembly::TRY:
case WebAssembly::TRY_S:
- ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
- EHPadStack.push_back(EHPadStackCounter++);
- LastSeenEHInst = TRY;
- break;
+ ControlFlowStack.push_back(std::make_pair(ControlFlowCounter, false));
+ EHPadStack.push_back(ControlFlowCounter++);
+ return;
case WebAssembly::END_LOOP:
case WebAssembly::END_LOOP_S:
@@ -115,7 +114,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
} else {
ControlFlowStack.pop_back();
}
- break;
+ return;
case WebAssembly::END_BLOCK:
case WebAssembly::END_BLOCK_S:
@@ -125,7 +124,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
printAnnotation(
OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
}
- break;
+ return;
case WebAssembly::END_TRY:
case WebAssembly::END_TRY_S:
@@ -134,60 +133,60 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
} else {
printAnnotation(
OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
- LastSeenEHInst = END_TRY;
}
- break;
+ return;
case WebAssembly::CATCH:
case WebAssembly::CATCH_S:
+ case WebAssembly::CATCH_ALL:
+ case WebAssembly::CATCH_ALL_S:
if (EHPadStack.empty()) {
printAnnotation(OS, "try-catch mismatch!");
} else {
printAnnotation(OS, "catch" + utostr(EHPadStack.pop_back_val()) + ':');
}
- break;
- }
-
- // Annotate any control flow label references.
+ return;
- // rethrow instruction does not take any depth argument and rethrows to the
- // nearest enclosing catch scope, if any. If there's no enclosing catch
- // scope, it throws up to the caller.
- if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) {
+ case WebAssembly::RETHROW:
+ case WebAssembly::RETHROW_S:
+ // 'rethrow' rethrows to the nearest enclosing catch scope, if any. If
+ // there's no enclosing catch scope, it throws up to the caller.
if (EHPadStack.empty()) {
printAnnotation(OS, "to caller");
} else {
printAnnotation(OS, "down to catch" + utostr(EHPadStack.back()));
}
+ return;
+ }
- } else {
- unsigned NumFixedOperands = Desc.NumOperands;
- SmallSet<uint64_t, 8> Printed;
- for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I) {
- // See if this operand denotes a basic block target.
- if (I < NumFixedOperands) {
- // A non-variable_ops operand, check its type.
- if (Desc.OpInfo[I].OperandType != WebAssembly::OPERAND_BASIC_BLOCK)
- continue;
- } else {
- // A variable_ops operand, which currently can be immediates (used in
- // br_table) which are basic block targets, or for call instructions
- // when using -wasm-keep-registers (in which case they are registers,
- // and should not be processed).
- if (!MI->getOperand(I).isImm())
- continue;
- }
- uint64_t Depth = MI->getOperand(I).getImm();
- if (!Printed.insert(Depth).second)
+ // Annotate any control flow label references.
+
+ unsigned NumFixedOperands = Desc.NumOperands;
+ SmallSet<uint64_t, 8> Printed;
+ for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I) {
+ // See if this operand denotes a basic block target.
+ if (I < NumFixedOperands) {
+ // A non-variable_ops operand, check its type.
+ if (Desc.OpInfo[I].OperandType != WebAssembly::OPERAND_BASIC_BLOCK)
continue;
- if (Depth >= ControlFlowStack.size()) {
- printAnnotation(OS, "Invalid depth argument!");
- } else {
- const auto &Pair = ControlFlowStack.rbegin()[Depth];
- printAnnotation(OS, utostr(Depth) + ": " +
- (Pair.second ? "up" : "down") + " to label" +
- utostr(Pair.first));
- }
+ } else {
+ // A variable_ops operand, which currently can be immediates (used in
+ // br_table) which are basic block targets, or for call instructions
+ // when using -wasm-keep-registers (in which case they are registers,
+ // and should not be processed).
+ if (!MI->getOperand(I).isImm())
+ continue;
+ }
+ uint64_t Depth = MI->getOperand(I).getImm();
+ if (!Printed.insert(Depth).second)
+ continue;
+ if (Depth >= ControlFlowStack.size()) {
+ printAnnotation(OS, "Invalid depth argument!");
+ } else {
+ const auto &Pair = ControlFlowStack.rbegin()[Depth];
+ printAnnotation(OS, utostr(Depth) + ": " +
+ (Pair.second ? "up" : "down") + " to label" +
+ utostr(Pair.first));
}
}
}
@@ -302,6 +301,29 @@ void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
}
}
+void WebAssemblyInstPrinter::printWebAssemblyHeapTypeOperand(const MCInst *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm()) {
+ switch (Op.getImm()) {
+ case long(wasm::ValType::EXTERNREF):
+ O << "extern";
+ break;
+ case long(wasm::ValType::FUNCREF):
+ O << "func";
+ break;
+ default:
+ O << "unsupported_heap_type_value";
+ break;
+ }
+ } else {
+ // Typed function references and other subtypes of funcref and externref
+ // currently unimplemented.
+ O << "unsupported_heap_type_operand";
+ }
+}
+
// We have various enums representing a subset of these types, use this
// function to convert any of them to text.
const char *WebAssembly::anyTypeToString(unsigned Ty) {
@@ -318,10 +340,10 @@ const char *WebAssembly::anyTypeToString(unsigned Ty) {
return "v128";
case wasm::WASM_TYPE_FUNCREF:
return "funcref";
+ case wasm::WASM_TYPE_EXTERNREF:
+ return "externref";
case wasm::WASM_TYPE_FUNC:
return "func";
- case wasm::WASM_TYPE_EXNREF:
- return "exnref";
case wasm::WASM_TYPE_NORESULT:
return "void";
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
index 1387a1928b3f..2ed6d562acff 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
@@ -25,13 +25,9 @@ class MCSubtargetInfo;
class WebAssemblyInstPrinter final : public MCInstPrinter {
uint64_t ControlFlowCounter = 0;
- uint64_t EHPadStackCounter = 0;
SmallVector<std::pair<uint64_t, bool>, 4> ControlFlowStack;
SmallVector<uint64_t, 4> EHPadStack;
- enum EHInstKind { TRY, CATCH, END_TRY };
- EHInstKind LastSeenEHInst = END_TRY;
-
public:
WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI);
@@ -48,8 +44,11 @@ public:
raw_ostream &O);
void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O);
+ void printWebAssemblyHeapTypeOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O);
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index dfed3451e45b..55bf5d14fdac 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -62,12 +62,16 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
uint64_t Start = OS.tell();
uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
- if (Binary <= UINT8_MAX) {
+ if (Binary < (1 << 8)) {
OS << uint8_t(Binary);
- } else {
- assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet");
+ } else if (Binary < (1 << 16)) {
OS << uint8_t(Binary >> 8);
encodeULEB128(uint8_t(Binary), OS);
+ } else if (Binary < (1 << 24)) {
+ OS << uint8_t(Binary >> 16);
+ encodeULEB128(uint16_t(Binary), OS);
+ } else {
+ llvm_unreachable("Very large (prefix + 3 byte) opcodes not supported");
}
// For br_table instructions, encode the size of the table. In the MCInst,
@@ -102,6 +106,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
encodeSLEB128(int64_t(MO.getImm()), OS);
break;
case WebAssembly::OPERAND_SIGNATURE:
+ case WebAssembly::OPERAND_HEAPTYPE:
OS << uint8_t(MO.getImm());
break;
case WebAssembly::OPERAND_VEC_I8IMM:
@@ -151,6 +156,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
PaddedSize = 10;
break;
case WebAssembly::OPERAND_FUNCTION32:
+ case WebAssembly::OPERAND_TABLE:
case WebAssembly::OPERAND_OFFSET32:
case WebAssembly::OPERAND_SIGNATURE:
case WebAssembly::OPERAND_TYPEINDEX:
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 027e5408c633..064e613cfc8e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -76,7 +76,7 @@ static MCAsmBackend *createAsmBackend(const Target & /*T*/,
static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
StringRef FS) {
- return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, FS);
+ return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCTargetStreamer *
@@ -147,8 +147,10 @@ wasm::ValType WebAssembly::toValType(const MVT &Ty) {
case MVT::v4f32:
case MVT::v2f64:
return wasm::ValType::V128;
- case MVT::exnref:
- return wasm::ValType::EXNREF;
+ case MVT::funcref:
+ return wasm::ValType::FUNCREF;
+ case MVT::externref:
+ return wasm::ValType::EXTERNREF;
default:
llvm_unreachable("unexpected type");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 02b310628ee1..5b77b8495adf 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -76,6 +76,10 @@ enum OperandType {
OPERAND_EVENT,
/// A list of branch targets for br_list.
OPERAND_BRLIST,
+ /// 32-bit unsigned table number.
+ OPERAND_TABLE,
+ /// heap type immediate for ref.null.
+ OPERAND_HEAPTYPE,
};
} // end namespace WebAssembly
@@ -97,6 +101,11 @@ enum TOF {
MO_MEMORY_BASE_REL,
// On a symbol operand this indicates that the immediate is the symbol
+ // address relative the __tls_base wasm global.
+ // Only applicable to data symbols.
+ MO_TLS_BASE_REL,
+
+ // On a symbol operand this indicates that the immediate is the symbol
// address relative the __table_base wasm global.
// Only applicable to function symbols.
MO_TABLE_BASE_REL,
@@ -129,7 +138,8 @@ enum class BlockType : unsigned {
F32 = unsigned(wasm::ValType::F32),
F64 = unsigned(wasm::ValType::F64),
V128 = unsigned(wasm::ValType::V128),
- Exnref = unsigned(wasm::ValType::EXNREF),
+ Externref = unsigned(wasm::ValType::EXTERNREF),
+ Funcref = unsigned(wasm::ValType::FUNCREF),
// Multivalue blocks (and other non-void blocks) are only emitted when the
// blocks will never be exited and are at the ends of functions (see
// WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made
@@ -138,6 +148,13 @@ enum class BlockType : unsigned {
Multivalue = 0xffff,
};
+/// Used as immediate MachineOperands for heap types, e.g. for ref.null.
+enum class HeapType : unsigned {
+ Invalid = 0x00,
+ Externref = unsigned(wasm::ValType::EXTERNREF),
+ Funcref = unsigned(wasm::ValType::FUNCREF),
+};
+
/// Instruction opcodes emitted via means other than CodeGen.
static const unsigned Nop = 0x01;
static const unsigned End = 0x0b;
@@ -176,8 +193,12 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
WASM_LOAD_STORE(ATOMIC_RMW8_U_XCHG_I64)
WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I32)
WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I64)
- WASM_LOAD_STORE(LOAD_SPLAT_v8x16)
- return 0;
+ WASM_LOAD_STORE(LOAD8_SPLAT)
+ WASM_LOAD_STORE(LOAD_LANE_I8x16)
+ WASM_LOAD_STORE(STORE_LANE_I8x16)
+ WASM_LOAD_STORE(PREFETCH_T)
+ WASM_LOAD_STORE(PREFETCH_NT)
+ return 0;
WASM_LOAD_STORE(LOAD16_S_I32)
WASM_LOAD_STORE(LOAD16_U_I32)
WASM_LOAD_STORE(LOAD16_S_I64)
@@ -202,8 +223,10 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
WASM_LOAD_STORE(ATOMIC_RMW16_U_XCHG_I64)
WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I32)
WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I64)
- WASM_LOAD_STORE(LOAD_SPLAT_v16x8)
- return 1;
+ WASM_LOAD_STORE(LOAD16_SPLAT)
+ WASM_LOAD_STORE(LOAD_LANE_I16x8)
+ WASM_LOAD_STORE(STORE_LANE_I16x8)
+ return 1;
WASM_LOAD_STORE(LOAD_I32)
WASM_LOAD_STORE(LOAD_F32)
WASM_LOAD_STORE(STORE_I32)
@@ -229,10 +252,13 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
WASM_LOAD_STORE(ATOMIC_RMW32_U_XCHG_I64)
WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I32)
WASM_LOAD_STORE(ATOMIC_RMW32_U_CMPXCHG_I64)
- WASM_LOAD_STORE(ATOMIC_NOTIFY)
- WASM_LOAD_STORE(ATOMIC_WAIT_I32)
- WASM_LOAD_STORE(LOAD_SPLAT_v32x4)
- return 2;
+ WASM_LOAD_STORE(MEMORY_ATOMIC_NOTIFY)
+ WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT32)
+ WASM_LOAD_STORE(LOAD32_SPLAT)
+ WASM_LOAD_STORE(LOAD_ZERO_I32x4)
+ WASM_LOAD_STORE(LOAD_LANE_I32x4)
+ WASM_LOAD_STORE(STORE_LANE_I32x4)
+ return 2;
WASM_LOAD_STORE(LOAD_I64)
WASM_LOAD_STORE(LOAD_F64)
WASM_LOAD_STORE(STORE_I64)
@@ -246,15 +272,18 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
WASM_LOAD_STORE(ATOMIC_RMW_XOR_I64)
WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I64)
WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64)
- WASM_LOAD_STORE(ATOMIC_WAIT_I64)
- WASM_LOAD_STORE(LOAD_SPLAT_v64x2)
- WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16)
- WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16)
- WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32)
- WASM_LOAD_STORE(LOAD_EXTEND_U_v4i32)
- WASM_LOAD_STORE(LOAD_EXTEND_S_v2i64)
- WASM_LOAD_STORE(LOAD_EXTEND_U_v2i64)
- return 3;
+ WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT64)
+ WASM_LOAD_STORE(LOAD64_SPLAT)
+ WASM_LOAD_STORE(LOAD_EXTEND_S_I16x8)
+ WASM_LOAD_STORE(LOAD_EXTEND_U_I16x8)
+ WASM_LOAD_STORE(LOAD_EXTEND_S_I32x4)
+ WASM_LOAD_STORE(LOAD_EXTEND_U_I32x4)
+ WASM_LOAD_STORE(LOAD_EXTEND_S_I64x2)
+ WASM_LOAD_STORE(LOAD_EXTEND_U_I64x2)
+ WASM_LOAD_STORE(LOAD_ZERO_I64x2)
+ WASM_LOAD_STORE(LOAD_LANE_I64x2)
+ WASM_LOAD_STORE(STORE_LANE_I64x2)
+ return 3;
WASM_LOAD_STORE(LOAD_V128)
WASM_LOAD_STORE(STORE_V128)
return 4;
@@ -294,8 +323,10 @@ inline bool isArgument(unsigned Opc) {
case WebAssembly::ARGUMENT_v4f32_S:
case WebAssembly::ARGUMENT_v2f64:
case WebAssembly::ARGUMENT_v2f64_S:
- case WebAssembly::ARGUMENT_exnref:
- case WebAssembly::ARGUMENT_exnref_S:
+ case WebAssembly::ARGUMENT_funcref:
+ case WebAssembly::ARGUMENT_funcref_S:
+ case WebAssembly::ARGUMENT_externref:
+ case WebAssembly::ARGUMENT_externref_S:
return true;
default:
return false;
@@ -314,8 +345,10 @@ inline bool isCopy(unsigned Opc) {
case WebAssembly::COPY_F64_S:
case WebAssembly::COPY_V128:
case WebAssembly::COPY_V128_S:
- case WebAssembly::COPY_EXNREF:
- case WebAssembly::COPY_EXNREF_S:
+ case WebAssembly::COPY_FUNCREF:
+ case WebAssembly::COPY_FUNCREF_S:
+ case WebAssembly::COPY_EXTERNREF:
+ case WebAssembly::COPY_EXTERNREF_S:
return true;
default:
return false;
@@ -334,8 +367,10 @@ inline bool isTee(unsigned Opc) {
case WebAssembly::TEE_F64_S:
case WebAssembly::TEE_V128:
case WebAssembly::TEE_V128_S:
- case WebAssembly::TEE_EXNREF:
- case WebAssembly::TEE_EXNREF_S:
+ case WebAssembly::TEE_FUNCREF:
+ case WebAssembly::TEE_FUNCREF_S:
+ case WebAssembly::TEE_EXTERNREF:
+ case WebAssembly::TEE_EXTERNREF_S:
return true;
default:
return false;
@@ -398,6 +433,18 @@ inline bool isMarker(unsigned Opc) {
}
}
+inline bool isCatch(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::CATCH:
+ case WebAssembly::CATCH_S:
+ case WebAssembly::CATCH_ALL:
+ case WebAssembly::CATCH_ALL_S:
+ return true;
+ default:
+ return false;
+ }
+}
+
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index e954eeaebb14..652d7a00a63c 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -71,8 +71,17 @@ void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) {
assert(Sym->isGlobal());
OS << "\t.globaltype\t" << Sym->getName() << ", "
<< WebAssembly::typeToString(
- static_cast<wasm::ValType>(Sym->getGlobalType().Type))
- << '\n';
+ static_cast<wasm::ValType>(Sym->getGlobalType().Type));
+ if (!Sym->getGlobalType().Mutable)
+ OS << ", immutable";
+ OS << '\n';
+}
+
+void WebAssemblyTargetAsmStreamer::emitTableType(const MCSymbolWasm *Sym) {
+ assert(Sym->isTable());
+ OS << "\t.tabletype\t" << Sym->getName() << ", "
+ << WebAssembly::typeToString(Sym->getTableType());
+ OS << '\n';
}
void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) {
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index d6fba05c9986..75c9fb4e289d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -39,6 +39,8 @@ public:
virtual void emitIndIdx(const MCExpr *Value) = 0;
/// .globaltype
virtual void emitGlobalType(const MCSymbolWasm *Sym) = 0;
+ /// .tabletype
+ virtual void emitTableType(const MCSymbolWasm *Sym) = 0;
/// .eventtype
virtual void emitEventType(const MCSymbolWasm *Sym) = 0;
/// .import_module
@@ -67,6 +69,7 @@ public:
void emitFunctionType(const MCSymbolWasm *Sym) override;
void emitIndIdx(const MCExpr *Value) override;
void emitGlobalType(const MCSymbolWasm *Sym) override;
+ void emitTableType(const MCSymbolWasm *Sym) override;
void emitEventType(const MCSymbolWasm *Sym) override;
void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override;
void emitImportName(const MCSymbolWasm *Sym, StringRef ImportName) override;
@@ -83,6 +86,7 @@ public:
void emitFunctionType(const MCSymbolWasm *Sym) override {}
void emitIndIdx(const MCExpr *Value) override;
void emitGlobalType(const MCSymbolWasm *Sym) override {}
+ void emitTableType(const MCSymbolWasm *Sym) override {}
void emitEventType(const MCSymbolWasm *Sym) override {}
void emitImportModule(const MCSymbolWasm *Sym,
StringRef ImportModule) override {}
@@ -103,6 +107,7 @@ public:
void emitFunctionType(const MCSymbolWasm *) override {}
void emitIndIdx(const MCExpr *) override {}
void emitGlobalType(const MCSymbolWasm *) override {}
+ void emitTableType(const MCSymbolWasm *) override {}
void emitEventType(const MCSymbolWasm *) override {}
void emitImportModule(const MCSymbolWasm *, StringRef) override {}
void emitImportName(const MCSymbolWasm *, StringRef) override {}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 779e921c1d94..aa7e2311d240 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -76,6 +76,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
case MCSymbolRefExpr::VK_WASM_TBREL:
assert(SymA.isFunction());
return wasm::R_WASM_TABLE_INDEX_REL_SLEB;
+ case MCSymbolRefExpr::VK_WASM_TLSREL:
+ return wasm::R_WASM_MEMORY_ADDR_TLS_SLEB;
case MCSymbolRefExpr::VK_WASM_MBREL:
assert(SymA.isData());
return is64Bit() ? wasm::R_WASM_MEMORY_ADDR_REL_SLEB64
@@ -92,7 +94,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
return wasm::R_WASM_TABLE_INDEX_SLEB;
return wasm::R_WASM_MEMORY_ADDR_SLEB;
case WebAssembly::fixup_sleb128_i64:
- assert(SymA.isData());
+ if (SymA.isFunction())
+ return wasm::R_WASM_TABLE_INDEX_SLEB64;
return wasm::R_WASM_MEMORY_ADDR_SLEB64;
case WebAssembly::fixup_uleb128_i32:
if (SymA.isGlobal())
@@ -101,6 +104,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
return wasm::R_WASM_FUNCTION_INDEX_LEB;
if (SymA.isEvent())
return wasm::R_WASM_EVENT_INDEX_LEB;
+ if (SymA.isTable())
+ return wasm::R_WASM_TABLE_NUMBER_LEB;
return wasm::R_WASM_MEMORY_ADDR_LEB;
case WebAssembly::fixup_uleb128_i64:
assert(SymA.isData());
@@ -119,6 +124,17 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
}
return wasm::R_WASM_MEMORY_ADDR_I32;
case FK_Data_8:
+ if (SymA.isFunction())
+ return wasm::R_WASM_TABLE_INDEX_I64;
+ if (SymA.isGlobal())
+ llvm_unreachable("unimplemented R_WASM_GLOBAL_INDEX_I64");
+ if (auto Section = static_cast<const MCSectionWasm *>(
+ getFixupSection(Fixup.getValue()))) {
+ if (Section->getKind().isText())
+ return wasm::R_WASM_FUNCTION_OFFSET_I64;
+ else if (!Section->isWasmData())
+ llvm_unreachable("unimplemented R_WASM_SECTION_OFFSET_I64");
+ }
assert(SymA.isData());
return wasm::R_WASM_MEMORY_ADDR_I64;
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 96fa13d30729..7f1c4bb40a4c 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -49,6 +49,8 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
extern cl::opt<bool> WasmKeepRegisters;
+extern cl::opt<bool> EnableEmException;
+extern cl::opt<bool> EnableEmSjLj;
//===----------------------------------------------------------------------===//
// Helpers.
@@ -81,10 +83,92 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() {
return static_cast<WebAssemblyTargetStreamer *>(TS);
}
+// Emscripten exception handling helpers
+//
+// This converts invoke names generated by LowerEmscriptenEHSjLj to real names
+// that are expected by JavaScript glue code. The invoke names generated by
+// Emscripten JS glue code are based on their argument and return types; for
+// example, for a function that takes an i32 and returns nothing, it is
+// 'invoke_vi'. But the format of invoke generated by LowerEmscriptenEHSjLj pass
+// contains a mangled string generated from their IR types, for example,
+// "__invoke_void_%struct.mystruct*_int", because final wasm types are not
+// available in the IR pass. So we convert those names to the form that
+// Emscripten JS code expects.
+//
+// Refer to LowerEmscriptenEHSjLj pass for more details.
+
+// Returns true if the given function name is an invoke name generated by
+// LowerEmscriptenEHSjLj pass.
+static bool isEmscriptenInvokeName(StringRef Name) {
+ if (Name.front() == '"' && Name.back() == '"')
+ Name = Name.substr(1, Name.size() - 2);
+ return Name.startswith("__invoke_");
+}
+
+// Returns a character that represents the given wasm value type in invoke
+// signatures.
+static char getInvokeSig(wasm::ValType VT) {
+ switch (VT) {
+ case wasm::ValType::I32:
+ return 'i';
+ case wasm::ValType::I64:
+ return 'j';
+ case wasm::ValType::F32:
+ return 'f';
+ case wasm::ValType::F64:
+ return 'd';
+ case wasm::ValType::V128:
+ return 'V';
+ case wasm::ValType::FUNCREF:
+ return 'F';
+ case wasm::ValType::EXTERNREF:
+ return 'X';
+ }
+ llvm_unreachable("Unhandled wasm::ValType enum");
+}
+
+// Given the wasm signature, generate the invoke name in the format JS glue code
+// expects.
+static std::string getEmscriptenInvokeSymbolName(wasm::WasmSignature *Sig) {
+ assert(Sig->Returns.size() <= 1);
+ std::string Ret = "invoke_";
+ if (!Sig->Returns.empty())
+ for (auto VT : Sig->Returns)
+ Ret += getInvokeSig(VT);
+ else
+ Ret += 'v';
+ // Invokes' first argument is a pointer to the original function, so skip it
+ for (unsigned I = 1, E = Sig->Params.size(); I < E; I++)
+ Ret += getInvokeSig(Sig->Params[I]);
+ return Ret;
+}
+
//===----------------------------------------------------------------------===//
// WebAssemblyAsmPrinter Implementation.
//===----------------------------------------------------------------------===//
+MCSymbolWasm *WebAssemblyAsmPrinter::getMCSymbolForFunction(
+ const Function *F, bool EnableEmEH, wasm::WasmSignature *Sig,
+ bool &InvokeDetected) {
+ MCSymbolWasm *WasmSym = nullptr;
+ if (EnableEmEH && isEmscriptenInvokeName(F->getName())) {
+ assert(Sig);
+ InvokeDetected = true;
+ if (Sig->Returns.size() > 1) {
+ std::string Msg =
+ "Emscripten EH/SjLj does not support multivalue returns: " +
+ std::string(F->getName()) + ": " +
+ WebAssembly::signatureToString(Sig);
+ report_fatal_error(Msg);
+ }
+ WasmSym = cast<MCSymbolWasm>(
+ GetExternalSymbolSymbol(getEmscriptenInvokeSymbolName(Sig)));
+ } else {
+ WasmSym = cast<MCSymbolWasm>(getSymbol(F));
+ }
+ return WasmSym;
+}
+
void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) {
for (auto &It : OutContext.getSymbols()) {
// Emit a .globaltype and .eventtype declaration.
@@ -95,6 +179,7 @@ void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) {
getTargetStreamer()->emitEventType(Sym);
}
+ DenseSet<MCSymbol *> InvokeSymbols;
for (const auto &F : M) {
if (F.isIntrinsic())
continue;
@@ -104,31 +189,46 @@ void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) {
SmallVector<MVT, 4> Results;
SmallVector<MVT, 4> Params;
computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results);
- auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
+ // At this point these MCSymbols may or may not have been created already
+ // and thus also contain a signature, but we need to get the signature
+ // anyway here in case it is an invoke that has not yet been created. We
+ // will discard it later if it turns out not to be necessary.
+ auto Signature = signatureFromMVTs(Results, Params);
+ bool InvokeDetected = false;
+ auto *Sym = getMCSymbolForFunction(&F, EnableEmException || EnableEmSjLj,
+ Signature.get(), InvokeDetected);
+
+ // Multiple functions can be mapped to the same invoke symbol. For
+ // example, two IR functions '__invoke_void_i8*' and '__invoke_void_i32'
+ // are both mapped to '__invoke_vi'. We keep them in a set once we emit an
+ // Emscripten EH symbol so we don't emit the same symbol twice.
+ if (InvokeDetected && !InvokeSymbols.insert(Sym).second)
+ continue;
+
Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
if (!Sym->getSignature()) {
- auto Signature = signatureFromMVTs(Results, Params);
Sym->setSignature(Signature.get());
addSignature(std::move(Signature));
+ } else {
+ // This symbol has already been created and had a signature. Discard it.
+ Signature.reset();
}
- // FIXME: this was originally intended for post-linking and was only used
- // for imports that were only called indirectly (i.e. s2wasm could not
- // infer the type from a call). With object files it applies to all
- // imports. so fix the names and the tests, or rethink how import
- // delcarations work in asm files.
+
getTargetStreamer()->emitFunctionType(Sym);
- if (TM.getTargetTriple().isOSBinFormatWasm() &&
- F.hasFnAttribute("wasm-import-module")) {
+ if (F.hasFnAttribute("wasm-import-module")) {
StringRef Name =
F.getFnAttribute("wasm-import-module").getValueAsString();
Sym->setImportModule(storeName(Name));
getTargetStreamer()->emitImportModule(Sym, Name);
}
- if (TM.getTargetTriple().isOSBinFormatWasm() &&
- F.hasFnAttribute("wasm-import-name")) {
+ if (F.hasFnAttribute("wasm-import-name")) {
+ // If this is a converted Emscripten EH/SjLj symbol, we shouldn't use
+ // the original function name but the converted symbol name.
StringRef Name =
- F.getFnAttribute("wasm-import-name").getValueAsString();
+ InvokeDetected
+ ? Sym->getName()
+ : F.getFnAttribute("wasm-import-name").getValueAsString();
Sym->setImportName(storeName(Name));
getTargetStreamer()->emitImportName(Sym, Name);
}
@@ -304,7 +404,6 @@ void WebAssemblyAsmPrinter::emitFunctionBodyStart() {
addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
- // FIXME: clean up how params and results are emitted (use signatures)
getTargetStreamer()->emitFunctionType(WasmSym);
// Emit the function index.
@@ -362,14 +461,6 @@ void WebAssemblyAsmPrinter::emitInstruction(const MachineInstr *MI) {
// This is a compiler barrier that prevents instruction reordering during
// backend compilation, and should not be emitted.
break;
- case WebAssembly::EXTRACT_EXCEPTION_I32:
- case WebAssembly::EXTRACT_EXCEPTION_I32_S:
- // These are pseudo instructions that simulates popping values from stack.
- // We print these only when we have -wasm-keep-registers on for assembly
- // readability.
- if (!WasmKeepRegisters)
- break;
- LLVM_FALLTHROUGH;
default: {
WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
MCInst TmpInst;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index d9281568638d..7a6a3247a19f 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -77,6 +77,9 @@ public:
MVT getRegType(unsigned RegNo) const;
std::string regToString(const MachineOperand &MO);
WebAssemblyTargetStreamer *getTargetStreamer();
+ MCSymbolWasm *getMCSymbolForFunction(const Function *F, bool EnableEmEH,
+ wasm::WasmSignature *Sig,
+ bool &InvokeDetected);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index 8442b49e25f4..eb3e9b91d40d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -19,6 +19,7 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyExceptionInfo.h"
+#include "WebAssemblySortRegion.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
#include "llvm/ADT/PriorityQueue.h"
@@ -31,6 +32,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+using WebAssembly::SortRegion;
+using WebAssembly::SortRegionInfo;
#define DEBUG_TYPE "wasm-cfg-sort"
@@ -44,78 +47,6 @@ static cl::opt<bool> WasmDisableEHPadSort(
namespace {
-// Wrapper for loops and exceptions
-class Region {
-public:
- virtual ~Region() = default;
- virtual MachineBasicBlock *getHeader() const = 0;
- virtual bool contains(const MachineBasicBlock *MBB) const = 0;
- virtual unsigned getNumBlocks() const = 0;
- using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator;
- virtual iterator_range<block_iterator> blocks() const = 0;
- virtual bool isLoop() const = 0;
-};
-
-template <typename T> class ConcreteRegion : public Region {
- const T *Region;
-
-public:
- ConcreteRegion(const T *Region) : Region(Region) {}
- MachineBasicBlock *getHeader() const override { return Region->getHeader(); }
- bool contains(const MachineBasicBlock *MBB) const override {
- return Region->contains(MBB);
- }
- unsigned getNumBlocks() const override { return Region->getNumBlocks(); }
- iterator_range<block_iterator> blocks() const override {
- return Region->blocks();
- }
- bool isLoop() const override { return false; }
-};
-
-template <> bool ConcreteRegion<MachineLoop>::isLoop() const { return true; }
-
-// This class has information of nested Regions; this is analogous to what
-// LoopInfo is for loops.
-class RegionInfo {
- const MachineLoopInfo &MLI;
- const WebAssemblyExceptionInfo &WEI;
- DenseMap<const MachineLoop *, std::unique_ptr<Region>> LoopMap;
- DenseMap<const WebAssemblyException *, std::unique_ptr<Region>> ExceptionMap;
-
-public:
- RegionInfo(const MachineLoopInfo &MLI, const WebAssemblyExceptionInfo &WEI)
- : MLI(MLI), WEI(WEI) {}
-
- // Returns a smallest loop or exception that contains MBB
- const Region *getRegionFor(const MachineBasicBlock *MBB) {
- const auto *ML = MLI.getLoopFor(MBB);
- const auto *WE = WEI.getExceptionFor(MBB);
- if (!ML && !WE)
- return nullptr;
- // We determine subregion relationship by domination of their headers, i.e.,
- // if region A's header dominates region B's header, B is a subregion of A.
- // WebAssemblyException contains BBs in all its subregions (loops or
- // exceptions), but MachineLoop may not, because MachineLoop does not contain
- // BBs that don't have a path to its header even if they are dominated by
- // its header. So here we should use WE->contains(ML->getHeader()), but not
- // ML->contains(WE->getHeader()).
- if ((ML && !WE) || (ML && WE && WE->contains(ML->getHeader()))) {
- // If the smallest region containing MBB is a loop
- if (LoopMap.count(ML))
- return LoopMap[ML].get();
- LoopMap[ML] = std::make_unique<ConcreteRegion<MachineLoop>>(ML);
- return LoopMap[ML].get();
- } else {
- // If the smallest region containing MBB is an exception
- if (ExceptionMap.count(WE))
- return ExceptionMap[WE].get();
- ExceptionMap[WE] =
- std::make_unique<ConcreteRegion<WebAssemblyException>>(WE);
- return ExceptionMap[WE].get();
- }
- }
-};
-
class WebAssemblyCFGSort final : public MachineFunctionPass {
StringRef getPassName() const override { return "WebAssembly CFG Sort"; }
@@ -236,14 +167,14 @@ struct CompareBlockNumbersBackwards {
/// Bookkeeping for a region to help ensure that we don't mix blocks not
/// dominated by the its header among its blocks.
struct Entry {
- const Region *TheRegion;
+ const SortRegion *TheRegion;
unsigned NumBlocksLeft;
/// List of blocks not dominated by Loop's header that are deferred until
/// after all of Loop's blocks have been seen.
std::vector<MachineBasicBlock *> Deferred;
- explicit Entry(const class Region *R)
+ explicit Entry(const SortRegion *R)
: TheRegion(R), NumBlocksLeft(R->getNumBlocks()) {}
};
} // end anonymous namespace
@@ -287,10 +218,10 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
CompareBlockNumbersBackwards>
Ready;
- RegionInfo RI(MLI, WEI);
+ SortRegionInfo SRI(MLI, WEI);
SmallVector<Entry, 4> Entries;
for (MachineBasicBlock *MBB = &MF.front();;) {
- const Region *R = RI.getRegionFor(MBB);
+ const SortRegion *R = SRI.getRegionFor(MBB);
if (R) {
// If MBB is a region header, add it to the active region list. We can't
// put any blocks that it doesn't dominate until we see the end of the
@@ -373,7 +304,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
MF.RenumberBlocks();
#ifndef NDEBUG
- SmallSetVector<const Region *, 8> OnStack;
+ SmallSetVector<const SortRegion *, 8> OnStack;
// Insert a sentinel representing the degenerate loop that starts at the
// function entry block and includes the entire function as a "loop" that
@@ -382,7 +313,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
for (auto &MBB : MF) {
assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
- const Region *Region = RI.getRegionFor(&MBB);
+ const SortRegion *Region = SRI.getRegionFor(&MBB);
if (Region && &MBB == Region->getHeader()) {
// Region header.
@@ -408,10 +339,10 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
for (auto Pred : MBB.predecessors())
assert(Pred->getNumber() < MBB.getNumber() &&
"Non-loop-header predecessors should be topologically sorted");
- assert(OnStack.count(RI.getRegionFor(&MBB)) &&
+ assert(OnStack.count(SRI.getRegionFor(&MBB)) &&
"Blocks must be nested in their regions");
}
- while (OnStack.size() > 1 && &MBB == WebAssembly::getBottom(OnStack.back()))
+ while (OnStack.size() > 1 && &MBB == SRI.getBottom(OnStack.back()))
OnStack.pop_back();
}
assert(OnStack.pop_back_val() == nullptr &&
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 8cbfc98e8197..a8e0c3efea0e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -24,6 +24,7 @@
#include "WebAssembly.h"
#include "WebAssemblyExceptionInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySortRegion.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
#include "llvm/ADT/Statistic.h"
@@ -33,6 +34,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+using WebAssembly::SortRegionInfo;
#define DEBUG_TYPE "wasm-cfg-stackify"
@@ -55,6 +57,11 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
// which holds the beginning of the scope. This will allow us to quickly skip
// over scoped regions when walking blocks.
SmallVector<MachineBasicBlock *, 8> ScopeTops;
+ void updateScopeTops(MachineBasicBlock *Begin, MachineBasicBlock *End) {
+ int EndNo = End->getNumber();
+ if (!ScopeTops[EndNo] || ScopeTops[EndNo]->getNumber() > Begin->getNumber())
+ ScopeTops[EndNo] = Begin;
+ }
// Placing markers.
void placeMarkers(MachineFunction &MF);
@@ -133,10 +140,10 @@ static bool explicitlyBranchesTo(MachineBasicBlock *Pred,
// contains instructions that should go before the marker, and AfterSet contains
// ones that should go after the marker. In this function, AfterSet is only
// used for sanity checking.
+template <typename Container>
static MachineBasicBlock::iterator
-getEarliestInsertPos(MachineBasicBlock *MBB,
- const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
- const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
+getEarliestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
+ const Container &AfterSet) {
auto InsertPos = MBB->end();
while (InsertPos != MBB->begin()) {
if (BeforeSet.count(&*std::prev(InsertPos))) {
@@ -157,10 +164,10 @@ getEarliestInsertPos(MachineBasicBlock *MBB,
// contains instructions that should go before the marker, and AfterSet contains
// ones that should go after the marker. In this function, BeforeSet is only
// used for sanity checking.
+template <typename Container>
static MachineBasicBlock::iterator
-getLatestInsertPos(MachineBasicBlock *MBB,
- const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
- const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
+getLatestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
+ const Container &AfterSet) {
auto InsertPos = MBB->begin();
while (InsertPos != MBB->end()) {
if (AfterSet.count(&*InsertPos)) {
@@ -219,20 +226,12 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
// which reduces overall stack height.
MachineBasicBlock *Header = nullptr;
bool IsBranchedTo = false;
- bool IsBrOnExn = false;
- MachineInstr *BrOnExn = nullptr;
int MBBNumber = MBB.getNumber();
for (MachineBasicBlock *Pred : MBB.predecessors()) {
if (Pred->getNumber() < MBBNumber) {
Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
- if (explicitlyBranchesTo(Pred, &MBB)) {
+ if (explicitlyBranchesTo(Pred, &MBB))
IsBranchedTo = true;
- if (Pred->getFirstTerminator()->getOpcode() == WebAssembly::BR_ON_EXN) {
- IsBrOnExn = true;
- assert(!BrOnExn && "There should be only one br_on_exn per block");
- BrOnExn = &*Pred->getFirstTerminator();
- }
- }
}
}
if (!Header)
@@ -317,22 +316,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
}
// Add the BLOCK.
-
- // 'br_on_exn' extracts exnref object and pushes variable number of values
- // depending on its tag. For C++ exception, its a single i32 value, and the
- // generated code will be in the form of:
- // block i32
- // br_on_exn 0, $__cpp_exception
- // rethrow
- // end_block
WebAssembly::BlockType ReturnType = WebAssembly::BlockType::Void;
- if (IsBrOnExn) {
- const char *TagName = BrOnExn->getOperand(1).getSymbolName();
- if (std::strcmp(TagName, "__cpp_exception") != 0)
- llvm_unreachable("Only C++ exception is supported");
- ReturnType = WebAssembly::BlockType::I32;
- }
-
auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet);
MachineInstr *Begin =
BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
@@ -372,16 +356,15 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
registerScope(Begin, End);
// Track the farthest-spanning scope that ends at this point.
- int Number = MBB.getNumber();
- if (!ScopeTops[Number] ||
- ScopeTops[Number]->getNumber() > Header->getNumber())
- ScopeTops[Number] = Header;
+ updateScopeTops(Header, &MBB);
}
/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
MachineFunction &MF = *MBB.getParent();
const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
+ SortRegionInfo SRI(MLI, WEI);
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
MachineLoop *Loop = MLI.getLoopFor(&MBB);
@@ -390,7 +373,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
// The operand of a LOOP is the first block after the loop. If the loop is the
// bottom of the function, insert a dummy block at the end.
- MachineBasicBlock *Bottom = WebAssembly::getBottom(Loop);
+ MachineBasicBlock *Bottom = SRI.getBottom(Loop);
auto Iter = std::next(Bottom->getIterator());
if (Iter == MF.end()) {
getAppendixBlock(MF);
@@ -441,8 +424,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
assert((!ScopeTops[AfterLoop->getNumber()] ||
ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
"With block sorting the outermost loop for a block should be first.");
- if (!ScopeTops[AfterLoop->getNumber()])
- ScopeTops[AfterLoop->getNumber()] = &MBB;
+ updateScopeTops(&MBB, AfterLoop);
}
void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
@@ -450,7 +432,9 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
MachineFunction &MF = *MBB.getParent();
auto &MDT = getAnalysis<MachineDominatorTree>();
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
+ SortRegionInfo SRI(MLI, WEI);
const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
// Compute the nearest common dominator of all unwind predecessors
@@ -470,7 +454,7 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
// end.
WebAssemblyException *WE = WEI.getExceptionFor(&MBB);
assert(WE);
- MachineBasicBlock *Bottom = WebAssembly::getBottom(WE);
+ MachineBasicBlock *Bottom = SRI.getBottom(WE);
auto Iter = std::next(Bottom->getIterator());
if (Iter == MF.end()) {
@@ -639,11 +623,8 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
// catch |
// end_block --|
// end_try
- for (int Number : {Cont->getNumber(), MBB.getNumber()}) {
- if (!ScopeTops[Number] ||
- ScopeTops[Number]->getNumber() > Header->getNumber())
- ScopeTops[Number] = Header;
- }
+ for (auto *End : {&MBB, Cont})
+ updateScopeTops(Header, End);
}
void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
@@ -656,11 +637,32 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
// try
// ...
// br bb2 <- Not necessary
- // bb1:
+ // bb1 (ehpad):
+ // catch
+ // ...
+ // bb2: <- Continuation BB
+ // end
+ //
+ // A more involved case: When the BB where 'end' is located is an another EH
+ // pad, the Cont (= continuation) BB is that EH pad's 'end' BB. For example,
+ // bb0:
+ // try
+ // try
+ // ...
+ // br bb3 <- Not necessary
+ // bb1 (ehpad):
+ // catch
+ // bb2 (ehpad):
+ // end
// catch
// ...
- // bb2:
+ // bb3: <- Continuation BB
// end
+ //
+ // When the EH pad at hand is bb1, its matching end_try is in bb2. But it is
+ // another EH pad, so bb0's continuation BB becomes bb3. So 'br bb3' in the
+ // code can be deleted. This is why we run 'while' until 'Cont' is not an EH
+ // pad.
for (auto &MBB : MF) {
if (!MBB.isEHPad())
continue;
@@ -668,7 +670,14 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
MachineBasicBlock *EHPadLayoutPred = MBB.getPrevNode();
- MachineBasicBlock *Cont = BeginToEnd[EHPadToTry[&MBB]]->getParent();
+
+ MachineBasicBlock *Cont = &MBB;
+ while (Cont->isEHPad()) {
+ MachineInstr *Try = EHPadToTry[Cont];
+ MachineInstr *EndTry = BeginToEnd[Try];
+ Cont = EndTry->getParent();
+ }
+
bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond);
// This condition means either
// 1. This BB ends with a single unconditional branch whose destinaion is
@@ -745,18 +754,26 @@ static unsigned getCopyOpcode(const TargetRegisterClass *RC) {
return WebAssembly::COPY_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::COPY_V128;
- if (RC == &WebAssembly::EXNREFRegClass)
- return WebAssembly::COPY_EXNREF;
+ if (RC == &WebAssembly::FUNCREFRegClass)
+ return WebAssembly::COPY_FUNCREF;
+ if (RC == &WebAssembly::EXTERNREFRegClass)
+ return WebAssembly::COPY_EXTERNREF;
llvm_unreachable("Unexpected register class");
}
// When MBB is split into MBB and Split, we should unstackify defs in MBB that
// have their uses in Split.
-static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
- MachineBasicBlock &Split,
- WebAssemblyFunctionInfo &MFI,
- MachineRegisterInfo &MRI,
- const WebAssemblyInstrInfo &TII) {
+// FIXME This function will be used when fixing unwind mismatches, but the old
+// version of that function was removed for the moment and the new version has
+// not yet been added. So 'LLVM_ATTRIBUTE_UNUSED' is added to suppress the
+// warning. Remove the attribute after the new functionality is added.
+LLVM_ATTRIBUTE_UNUSED static void
+unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, MachineBasicBlock &Split) {
+ MachineFunction &MF = *MBB.getParent();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ auto &MRI = MF.getRegInfo();
+
for (auto &MI : Split) {
for (auto &MO : MI.explicit_uses()) {
if (!MO.isReg() || Register::isPhysicalRegister(MO.getReg()))
@@ -810,525 +827,8 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
}
bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- // Linearizing the control flow by placing TRY / END_TRY markers can create
- // mismatches in unwind destinations. There are two kinds of mismatches we
- // try to solve here.
-
- // 1. When an instruction may throw, but the EH pad it will unwind to can be
- // different from the original CFG.
- //
- // Example: we have the following CFG:
- // bb0:
- // call @foo (if it throws, unwind to bb2)
- // bb1:
- // call @bar (if it throws, unwind to bb3)
- // bb2 (ehpad):
- // catch
- // ...
- // bb3 (ehpad)
- // catch
- // handler body
- //
- // And the CFG is sorted in this order. Then after placing TRY markers, it
- // will look like: (BB markers are omitted)
- // try $label1
- // try
- // call @foo
- // call @bar (if it throws, unwind to bb3)
- // catch <- ehpad (bb2)
- // ...
- // end_try
- // catch <- ehpad (bb3)
- // handler body
- // end_try
- //
- // Now if bar() throws, it is going to end up ip in bb2, not bb3, where it
- // is supposed to end up. We solve this problem by
- // a. Split the target unwind EH pad (here bb3) so that the handler body is
- // right after 'end_try', which means we extract the handler body out of
- // the catch block. We do this because this handler body should be
- // somewhere branch-eable from the inner scope.
- // b. Wrap the call that has an incorrect unwind destination ('call @bar'
- // here) with a nested try/catch/end_try scope, and within the new catch
- // block, branches to the handler body.
- // c. Place a branch after the newly inserted nested end_try so it can bypass
- // the handler body, which is now outside of a catch block.
- //
- // The result will like as follows. (new: a) means this instruction is newly
- // created in the process of doing 'a' above.
- //
- // block $label0 (new: placeBlockMarker)
- // try $label1
- // try
- // call @foo
- // try (new: b)
- // call @bar
- // catch (new: b)
- // local.set n / drop (new: b)
- // br $label1 (new: b)
- // end_try (new: b)
- // catch <- ehpad (bb2)
- // end_try
- // br $label0 (new: c)
- // catch <- ehpad (bb3)
- // end_try (hoisted: a)
- // handler body
- // end_block (new: placeBlockMarker)
- //
- // Note that the new wrapping block/end_block will be generated later in
- // placeBlockMarker.
- //
- // TODO Currently local.set and local.gets are generated to move exnref value
- // created by catches. That's because we don't support yielding values from a
- // block in LLVM machine IR yet, even though it is supported by wasm. Delete
- // unnecessary local.get/local.sets once yielding values from a block is
- // supported. The full EH spec requires multi-value support to do this, but
- // for C++ we don't yet need it because we only throw a single i32.
- //
- // ---
- // 2. The same as 1, but in this case an instruction unwinds to a caller
- // function and not another EH pad.
- //
- // Example: we have the following CFG:
- // bb0:
- // call @foo (if it throws, unwind to bb2)
- // bb1:
- // call @bar (if it throws, unwind to caller)
- // bb2 (ehpad):
- // catch
- // ...
- //
- // And the CFG is sorted in this order. Then after placing TRY markers, it
- // will look like:
- // try
- // call @foo
- // call @bar (if it throws, unwind to caller)
- // catch <- ehpad (bb2)
- // ...
- // end_try
- //
- // Now if bar() throws, it is going to end up ip in bb2, when it is supposed
- // throw up to the caller.
- // We solve this problem by
- // a. Create a new 'appendix' BB at the end of the function and put a single
- // 'rethrow' instruction (+ local.get) in there.
- // b. Wrap the call that has an incorrect unwind destination ('call @bar'
- // here) with a nested try/catch/end_try scope, and within the new catch
- // block, branches to the new appendix block.
- //
- // block $label0 (new: placeBlockMarker)
- // try
- // call @foo
- // try (new: b)
- // call @bar
- // catch (new: b)
- // local.set n (new: b)
- // br $label0 (new: b)
- // end_try (new: b)
- // catch <- ehpad (bb2)
- // ...
- // end_try
- // ...
- // end_block (new: placeBlockMarker)
- // local.get n (new: a) <- appendix block
- // rethrow (new: a)
- //
- // In case there are multiple calls in a BB that may throw to the caller, they
- // can be wrapped together in one nested try scope. (In 1, this couldn't
- // happen, because may-throwing instruction there had an unwind destination,
- // i.e., it was an invoke before, and there could be only one invoke within a
- // BB.)
-
- SmallVector<const MachineBasicBlock *, 8> EHPadStack;
- // Range of intructions to be wrapped in a new nested try/catch
- using TryRange = std::pair<MachineInstr *, MachineInstr *>;
- // In original CFG, <unwind destination BB, a vector of try ranges>
- DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> UnwindDestToTryRanges;
- // In new CFG, <destination to branch to, a vector of try ranges>
- DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> BrDestToTryRanges;
- // In new CFG, <destination to branch to, register containing exnref>
- DenseMap<MachineBasicBlock *, unsigned> BrDestToExnReg;
-
- // Destinations for branches that will be newly added, for which a new
- // BLOCK/END_BLOCK markers are necessary.
- SmallVector<MachineBasicBlock *, 8> BrDests;
-
- // Gather possibly throwing calls (i.e., previously invokes) whose current
- // unwind destination is not the same as the original CFG.
- for (auto &MBB : reverse(MF)) {
- bool SeenThrowableInstInBB = false;
- for (auto &MI : reverse(MBB)) {
- if (MI.getOpcode() == WebAssembly::TRY)
- EHPadStack.pop_back();
- else if (MI.getOpcode() == WebAssembly::CATCH)
- EHPadStack.push_back(MI.getParent());
-
- // In this loop we only gather calls that have an EH pad to unwind. So
- // there will be at most 1 such call (= invoke) in a BB, so after we've
- // seen one, we can skip the rest of BB. Also if MBB has no EH pad
- // successor or MI does not throw, this is not an invoke.
- if (SeenThrowableInstInBB || !MBB.hasEHPadSuccessor() ||
- !WebAssembly::mayThrow(MI))
- continue;
- SeenThrowableInstInBB = true;
-
- // If the EH pad on the stack top is where this instruction should unwind
- // next, we're good.
- MachineBasicBlock *UnwindDest = nullptr;
- for (auto *Succ : MBB.successors()) {
- if (Succ->isEHPad()) {
- UnwindDest = Succ;
- break;
- }
- }
- if (EHPadStack.back() == UnwindDest)
- continue;
-
- // If not, record the range.
- UnwindDestToTryRanges[UnwindDest].push_back(TryRange(&MI, &MI));
- }
- }
-
- assert(EHPadStack.empty());
-
- // Gather possibly throwing calls that are supposed to unwind up to the caller
- // if they throw, but currently unwind to an incorrect destination. Unlike the
- // loop above, there can be multiple calls within a BB that unwind to the
- // caller, which we should group together in a range.
- bool NeedAppendixBlock = false;
- for (auto &MBB : reverse(MF)) {
- MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; // inclusive
- for (auto &MI : reverse(MBB)) {
- if (MI.getOpcode() == WebAssembly::TRY)
- EHPadStack.pop_back();
- else if (MI.getOpcode() == WebAssembly::CATCH)
- EHPadStack.push_back(MI.getParent());
-
- // If MBB has an EH pad successor, this inst does not unwind to caller.
- if (MBB.hasEHPadSuccessor())
- continue;
-
- // We wrap up the current range when we see a marker even if we haven't
- // finished a BB.
- if (RangeEnd && WebAssembly::isMarker(MI.getOpcode())) {
- NeedAppendixBlock = true;
- // Record the range. nullptr here means the unwind destination is the
- // caller.
- UnwindDestToTryRanges[nullptr].push_back(
- TryRange(RangeBegin, RangeEnd));
- RangeBegin = RangeEnd = nullptr; // Reset range pointers
- }
-
- // If EHPadStack is empty, that means it is correctly unwind to caller if
- // it throws, so we're good. If MI does not throw, we're good too.
- if (EHPadStack.empty() || !WebAssembly::mayThrow(MI))
- continue;
-
- // We found an instruction that unwinds to the caller but currently has an
- // incorrect unwind destination. Create a new range or increment the
- // currently existing range.
- if (!RangeEnd)
- RangeBegin = RangeEnd = &MI;
- else
- RangeBegin = &MI;
- }
-
- if (RangeEnd) {
- NeedAppendixBlock = true;
- // Record the range. nullptr here means the unwind destination is the
- // caller.
- UnwindDestToTryRanges[nullptr].push_back(TryRange(RangeBegin, RangeEnd));
- RangeBegin = RangeEnd = nullptr; // Reset range pointers
- }
- }
-
- assert(EHPadStack.empty());
- // We don't have any unwind destination mismatches to resolve.
- if (UnwindDestToTryRanges.empty())
- return false;
-
- // If we found instructions that should unwind to the caller but currently
- // have incorrect unwind destination, we create an appendix block at the end
- // of the function with a local.get and a rethrow instruction.
- if (NeedAppendixBlock) {
- auto *AppendixBB = getAppendixBlock(MF);
- Register ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
- BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW))
- .addReg(ExnReg);
- // These instruction ranges should branch to this appendix BB.
- for (auto Range : UnwindDestToTryRanges[nullptr])
- BrDestToTryRanges[AppendixBB].push_back(Range);
- BrDestToExnReg[AppendixBB] = ExnReg;
- }
-
- // We loop through unwind destination EH pads that are targeted from some
- // inner scopes. Because these EH pads are destination of more than one scope
- // now, we split them so that the handler body is after 'end_try'.
- // - Before
- // ehpad:
- // catch
- // local.set n / drop
- // handler body
- // ...
- // cont:
- // end_try
- //
- // - After
- // ehpad:
- // catch
- // local.set n / drop
- // brdest: (new)
- // end_try (hoisted from 'cont' BB)
- // handler body (taken from 'ehpad')
- // ...
- // cont:
- for (auto &P : UnwindDestToTryRanges) {
- NumUnwindMismatches += P.second.size();
-
- // This means the destination is the appendix BB, which was separately
- // handled above.
- if (!P.first)
- continue;
-
- MachineBasicBlock *EHPad = P.first;
-
- // Find 'catch' and 'local.set' or 'drop' instruction that follows the
- // 'catch'. If -wasm-disable-explicit-locals is not set, 'catch' should be
- // always followed by either 'local.set' or a 'drop', because 'br_on_exn' is
- // generated after 'catch' in LateEHPrepare and we don't support blocks
- // taking values yet.
- MachineInstr *Catch = nullptr;
- unsigned ExnReg = 0;
- for (auto &MI : *EHPad) {
- switch (MI.getOpcode()) {
- case WebAssembly::CATCH:
- Catch = &MI;
- ExnReg = Catch->getOperand(0).getReg();
- break;
- }
- }
- assert(Catch && "EH pad does not have a catch");
- assert(ExnReg != 0 && "Invalid register");
-
- auto SplitPos = std::next(Catch->getIterator());
-
- // Create a new BB that's gonna be the destination for branches from the
- // inner mismatched scope.
- MachineInstr *BeginTry = EHPadToTry[EHPad];
- MachineInstr *EndTry = BeginToEnd[BeginTry];
- MachineBasicBlock *Cont = EndTry->getParent();
- auto *BrDest = MF.CreateMachineBasicBlock();
- MF.insert(std::next(EHPad->getIterator()), BrDest);
- // Hoist up the existing 'end_try'.
- BrDest->insert(BrDest->end(), EndTry->removeFromParent());
- // Take out the handler body from EH pad to the new branch destination BB.
- BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end());
- unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI, TII);
- // Fix predecessor-successor relationship.
- BrDest->transferSuccessors(EHPad);
- EHPad->addSuccessor(BrDest);
-
- // All try ranges that were supposed to unwind to this EH pad now have to
- // branch to this new branch dest BB.
- for (auto Range : UnwindDestToTryRanges[EHPad])
- BrDestToTryRanges[BrDest].push_back(Range);
- BrDestToExnReg[BrDest] = ExnReg;
-
- // In case we fall through to the continuation BB after the catch block, we
- // now have to add a branch to it.
- // - Before
- // try
- // ...
- // (falls through to 'cont')
- // catch
- // handler body
- // end
- // <-- cont
- //
- // - After
- // try
- // ...
- // br %cont (new)
- // catch
- // end
- // handler body
- // <-- cont
- MachineBasicBlock *EHPadLayoutPred = &*std::prev(EHPad->getIterator());
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
- SmallVector<MachineOperand, 4> Cond;
- bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond);
- if (Analyzable && !TBB && !FBB) {
- DebugLoc DL = EHPadLayoutPred->empty()
- ? DebugLoc()
- : EHPadLayoutPred->rbegin()->getDebugLoc();
- BuildMI(EHPadLayoutPred, DL, TII.get(WebAssembly::BR)).addMBB(Cont);
- BrDests.push_back(Cont);
- }
- }
-
- // For possibly throwing calls whose unwind destinations are currently
- // incorrect because of CFG linearization, we wrap them with a nested
- // try/catch/end_try, and within the new catch block, we branch to the correct
- // handler.
- // - Before
- // mbb:
- // call @foo <- Unwind destination mismatch!
- // ehpad:
- // ...
- //
- // - After
- // mbb:
- // try (new)
- // call @foo
- // nested-ehpad: (new)
- // catch (new)
- // local.set n / drop (new)
- // br %brdest (new)
- // nested-end: (new)
- // end_try (new)
- // ehpad:
- // ...
- for (auto &P : BrDestToTryRanges) {
- MachineBasicBlock *BrDest = P.first;
- auto &TryRanges = P.second;
- unsigned ExnReg = BrDestToExnReg[BrDest];
-
- for (auto Range : TryRanges) {
- MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr;
- std::tie(RangeBegin, RangeEnd) = Range;
- auto *MBB = RangeBegin->getParent();
- // Store the first function call from this range, because RangeBegin can
- // be moved to point EH_LABEL before the call
- MachineInstr *RangeBeginCall = RangeBegin;
-
- // Include possible EH_LABELs in the range
- if (RangeBegin->getIterator() != MBB->begin() &&
- std::prev(RangeBegin->getIterator())->isEHLabel())
- RangeBegin = &*std::prev(RangeBegin->getIterator());
- if (std::next(RangeEnd->getIterator()) != MBB->end() &&
- std::next(RangeEnd->getIterator())->isEHLabel())
- RangeEnd = &*std::next(RangeEnd->getIterator());
-
- MachineBasicBlock *EHPad = nullptr;
- for (auto *Succ : MBB->successors()) {
- if (Succ->isEHPad()) {
- EHPad = Succ;
- break;
- }
- }
-
- // Local expression tree before the first call of this range should go
- // after the nested TRY.
- SmallPtrSet<const MachineInstr *, 4> AfterSet;
- AfterSet.insert(RangeBegin);
- AfterSet.insert(RangeBeginCall);
- for (auto I = MachineBasicBlock::iterator(RangeBeginCall),
- E = MBB->begin();
- I != E; --I) {
- if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
- continue;
- if (WebAssembly::isChild(*std::prev(I), MFI))
- AfterSet.insert(&*std::prev(I));
- else
- break;
- }
-
- // Create the nested try instruction.
- auto InsertPos = getLatestInsertPos(
- MBB, SmallPtrSet<const MachineInstr *, 4>(), AfterSet);
- MachineInstr *NestedTry =
- BuildMI(*MBB, InsertPos, RangeBegin->getDebugLoc(),
- TII.get(WebAssembly::TRY))
- .addImm(int64_t(WebAssembly::BlockType::Void));
-
- // Create the nested EH pad and fill instructions in.
- MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock();
- MF.insert(std::next(MBB->getIterator()), NestedEHPad);
- NestedEHPad->setIsEHPad();
- NestedEHPad->setIsEHScopeEntry();
- BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::CATCH),
- ExnReg);
- BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::BR))
- .addMBB(BrDest);
-
- // Create the nested continuation BB and end_try instruction.
- MachineBasicBlock *NestedCont = MF.CreateMachineBasicBlock();
- MF.insert(std::next(NestedEHPad->getIterator()), NestedCont);
- MachineInstr *NestedEndTry =
- BuildMI(*NestedCont, NestedCont->begin(), RangeEnd->getDebugLoc(),
- TII.get(WebAssembly::END_TRY));
- // In case MBB has more instructions after the try range, move them to the
- // new nested continuation BB.
- NestedCont->splice(NestedCont->end(), MBB,
- std::next(RangeEnd->getIterator()), MBB->end());
- unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI, TII);
- registerTryScope(NestedTry, NestedEndTry, NestedEHPad);
-
- // Fix predecessor-successor relationship.
- NestedCont->transferSuccessors(MBB);
- if (EHPad) {
- NestedCont->removeSuccessor(EHPad);
- // If EHPad does not have any predecessors left after removing
- // NextedCont predecessor, remove its successor too, because this EHPad
- // is not reachable from the entry BB anyway. We can't remove EHPad BB
- // itself because it can contain 'catch' or 'end', which are necessary
- // for keeping try-catch-end structure.
- if (EHPad->pred_empty())
- EHPad->removeSuccessor(BrDest);
- }
- MBB->addSuccessor(NestedEHPad);
- MBB->addSuccessor(NestedCont);
- NestedEHPad->addSuccessor(BrDest);
- }
- }
-
- // Renumber BBs and recalculate ScopeTop info because new BBs might have been
- // created and inserted above.
- MF.RenumberBlocks();
- ScopeTops.clear();
- ScopeTops.resize(MF.getNumBlockIDs());
- for (auto &MBB : reverse(MF)) {
- for (auto &MI : reverse(MBB)) {
- if (ScopeTops[MBB.getNumber()])
- break;
- switch (MI.getOpcode()) {
- case WebAssembly::END_BLOCK:
- case WebAssembly::END_LOOP:
- case WebAssembly::END_TRY:
- ScopeTops[MBB.getNumber()] = EndToBegin[&MI]->getParent();
- break;
- case WebAssembly::CATCH:
- ScopeTops[MBB.getNumber()] = EHPadToTry[&MBB]->getParent();
- break;
- }
- }
- }
-
- // Recompute the dominator tree.
- getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
-
- // Place block markers for newly added branches, if necessary.
-
- // If we've created an appendix BB and a branch to it, place a block/end_block
- // marker for that. For some new branches, those branch destination BBs start
- // with a hoisted end_try marker, so we don't need a new marker there.
- if (AppendixBB)
- BrDests.push_back(AppendixBB);
-
- llvm::sort(BrDests,
- [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
- auto ANum = A->getNumber();
- auto BNum = B->getNumber();
- return ANum < BNum;
- });
- for (auto *Dest : BrDests)
- placeBlockMarker(*Dest);
-
- return true;
+ // TODO Implement this
+ return false;
}
static unsigned
@@ -1365,22 +865,44 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
: WebAssembly::BlockType(
WebAssembly::toValType(MFI.getResults().front()));
- for (MachineBasicBlock &MBB : reverse(MF)) {
- for (MachineInstr &MI : reverse(MBB)) {
+ SmallVector<MachineBasicBlock::reverse_iterator, 4> Worklist;
+ Worklist.push_back(MF.rbegin()->rbegin());
+
+ auto Process = [&](MachineBasicBlock::reverse_iterator It) {
+ auto *MBB = It->getParent();
+ while (It != MBB->rend()) {
+ MachineInstr &MI = *It++;
if (MI.isPosition() || MI.isDebugInstr())
continue;
switch (MI.getOpcode()) {
+ case WebAssembly::END_TRY: {
+ // If a 'try''s return type is fixed, both its try body and catch body
+ // should satisfy the return type, so we need to search 'end'
+ // instructions before its corresponding 'catch' too.
+ auto *EHPad = TryToEHPad.lookup(EndToBegin[&MI]);
+ assert(EHPad);
+ auto NextIt =
+ std::next(WebAssembly::findCatch(EHPad)->getReverseIterator());
+ if (NextIt != EHPad->rend())
+ Worklist.push_back(NextIt);
+ LLVM_FALLTHROUGH;
+ }
case WebAssembly::END_BLOCK:
case WebAssembly::END_LOOP:
- case WebAssembly::END_TRY:
EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType));
continue;
default:
- // Something other than an `end`. We're done.
+ // Something other than an `end`. We're done for this BB.
return;
}
}
- }
+ // We've reached the beginning of a BB. Continue the search in the previous
+ // BB.
+ Worklist.push_back(MBB->getPrevNode()->rbegin());
+ };
+
+ while (!Worklist.empty())
+ Process(Worklist.pop_back_val());
}
// WebAssembly functions end with an end instruction, as if the function body
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
index 159fb4c00ddc..78191ae758fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -20,7 +20,19 @@ using namespace llvm;
WebAssemblyDebugValueManager::WebAssemblyDebugValueManager(
MachineInstr *Instr) {
- Instr->collectDebugValues(DbgValues);
+ // This code differs from MachineInstr::collectDebugValues in that it scans
+ // the whole BB, not just contiguous DBG_VALUEs.
+ if (!Instr->getOperand(0).isReg())
+ return;
+
+ MachineBasicBlock::iterator DI = *Instr;
+ ++DI;
+ for (MachineBasicBlock::iterator DE = Instr->getParent()->end(); DI != DE;
+ ++DI) {
+ if (DI->isDebugValue() &&
+ DI->getDebugOperandForReg(Instr->getOperand(0).getReg()))
+ DbgValues.push_back(&*DI);
+ }
}
void WebAssemblyDebugValueManager::move(MachineInstr *Insert) {
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 55925bcbe771..ac94e9e80d01 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -96,8 +96,10 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) {
return WebAssembly::DROP_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::DROP_V128;
- if (RC == &WebAssembly::EXNREFRegClass)
- return WebAssembly::DROP_EXNREF;
+ if (RC == &WebAssembly::FUNCREFRegClass)
+ return WebAssembly::DROP_FUNCREF;
+ if (RC == &WebAssembly::EXTERNREFRegClass)
+ return WebAssembly::DROP_EXTERNREF;
llvm_unreachable("Unexpected register class");
}
@@ -113,8 +115,10 @@ static unsigned getLocalGetOpcode(const TargetRegisterClass *RC) {
return WebAssembly::LOCAL_GET_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::LOCAL_GET_V128;
- if (RC == &WebAssembly::EXNREFRegClass)
- return WebAssembly::LOCAL_GET_EXNREF;
+ if (RC == &WebAssembly::FUNCREFRegClass)
+ return WebAssembly::LOCAL_GET_FUNCREF;
+ if (RC == &WebAssembly::EXTERNREFRegClass)
+ return WebAssembly::LOCAL_GET_EXTERNREF;
llvm_unreachable("Unexpected register class");
}
@@ -130,8 +134,10 @@ static unsigned getLocalSetOpcode(const TargetRegisterClass *RC) {
return WebAssembly::LOCAL_SET_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::LOCAL_SET_V128;
- if (RC == &WebAssembly::EXNREFRegClass)
- return WebAssembly::LOCAL_SET_EXNREF;
+ if (RC == &WebAssembly::FUNCREFRegClass)
+ return WebAssembly::LOCAL_SET_FUNCREF;
+ if (RC == &WebAssembly::EXTERNREFRegClass)
+ return WebAssembly::LOCAL_SET_EXTERNREF;
llvm_unreachable("Unexpected register class");
}
@@ -147,8 +153,10 @@ static unsigned getLocalTeeOpcode(const TargetRegisterClass *RC) {
return WebAssembly::LOCAL_TEE_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::LOCAL_TEE_V128;
- if (RC == &WebAssembly::EXNREFRegClass)
- return WebAssembly::LOCAL_TEE_EXNREF;
+ if (RC == &WebAssembly::FUNCREFRegClass)
+ return WebAssembly::LOCAL_TEE_FUNCREF;
+ if (RC == &WebAssembly::EXTERNREFRegClass)
+ return WebAssembly::LOCAL_TEE_EXTERNREF;
llvm_unreachable("Unexpected register class");
}
@@ -164,8 +172,10 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
return MVT::f64;
if (RC == &WebAssembly::V128RegClass)
return MVT::v16i8;
- if (RC == &WebAssembly::EXNREFRegClass)
- return MVT::exnref;
+ if (RC == &WebAssembly::FUNCREFRegClass)
+ return MVT::funcref;
+ if (RC == &WebAssembly::EXTERNREFRegClass)
+ return MVT::externref;
llvm_unreachable("unrecognized register class");
}
@@ -221,6 +231,10 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
auto Local = static_cast<unsigned>(MI.getOperand(1).getImm());
Reg2Local[Reg] = Local;
checkFrameBase(MFI, Local, Reg);
+
+ // Update debug value to point to the local before removing.
+ WebAssemblyDebugValueManager(&MI).replaceWithLocal(Local);
+
MI.eraseFromParent();
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index c2a0d3e01740..82b032267d55 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -20,12 +20,14 @@
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -128,7 +130,8 @@ private:
case MVT::i64:
case MVT::f32:
case MVT::f64:
- case MVT::exnref:
+ case MVT::funcref:
+ case MVT::externref:
return VT;
case MVT::f16:
return MVT::f32;
@@ -704,9 +707,13 @@ bool WebAssemblyFastISel::fastLowerArguments() {
Opc = WebAssembly::ARGUMENT_v2f64;
RC = &WebAssembly::V128RegClass;
break;
- case MVT::exnref:
- Opc = WebAssembly::ARGUMENT_exnref;
- RC = &WebAssembly::EXNREFRegClass;
+ case MVT::funcref:
+ Opc = WebAssembly::ARGUMENT_funcref;
+ RC = &WebAssembly::FUNCREFRegClass;
+ break;
+ case MVT::externref:
+ Opc = WebAssembly::ARGUMENT_externref;
+ RC = &WebAssembly::EXTERNREFRegClass;
break;
default:
return false;
@@ -806,8 +813,11 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
case MVT::v2f64:
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
- case MVT::exnref:
- ResultReg = createResultReg(&WebAssembly::EXNREFRegClass);
+ case MVT::funcref:
+ ResultReg = createResultReg(&WebAssembly::FUNCREFRegClass);
+ break;
+ case MVT::externref:
+ ResultReg = createResultReg(&WebAssembly::EXTERNREFRegClass);
break;
default:
return false;
@@ -862,6 +872,15 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
// Add placeholders for the type index and immediate flags
MIB.addImm(0);
MIB.addImm(0);
+
+ // Ensure that the object file has a __indirect_function_table import, as we
+ // call_indirect against it.
+ MCSymbolWasm *Sym = WebAssembly::getOrCreateFunctionTableSymbol(
+ MF->getMMI().getContext(), "__indirect_function_table");
+ // Until call_indirect emits TABLE_NUMBER relocs against this symbol, mark
+ // it as NO_STRIP so as to ensure that the indirect function table makes it
+ // to linked output.
+ Sym->setNoStrip();
}
for (unsigned ArgReg : Args)
@@ -916,9 +935,13 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
Opc = WebAssembly::SELECT_F64;
RC = &WebAssembly::F64RegClass;
break;
- case MVT::exnref:
- Opc = WebAssembly::SELECT_EXNREF;
- RC = &WebAssembly::EXNREFRegClass;
+ case MVT::funcref:
+ Opc = WebAssembly::SELECT_FUNCREF;
+ RC = &WebAssembly::FUNCREFRegClass;
+ break;
+ case MVT::externref:
+ Opc = WebAssembly::SELECT_EXTERNREF;
+ RC = &WebAssembly::EXTERNREFRegClass;
break;
default:
return false;
@@ -1321,7 +1344,8 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
- case MVT::exnref:
+ case MVT::funcref:
+ case MVT::externref:
break;
default:
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
index 7f805b34b499..52aa3534c78e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
@@ -41,13 +41,51 @@ public:
char WebAssemblyFixBrTableDefaults::ID = 0;
+// Target indepedent selection dag assumes that it is ok to use PointerTy
+// as the index for a "switch", whereas Wasm so far only has a 32-bit br_table.
+// See e.g. SelectionDAGBuilder::visitJumpTableHeader
+// We have a 64-bit br_table in the tablegen defs as a result, which does get
+// selected, and thus we get incorrect truncates/extensions happening on
+// wasm64. Here we fix that.
+void fixBrTableIndex(MachineInstr &MI, MachineBasicBlock *MBB,
+ MachineFunction &MF) {
+ // Only happens on wasm64.
+ auto &WST = MF.getSubtarget<WebAssemblySubtarget>();
+ if (!WST.hasAddr64())
+ return;
+
+ assert(MI.getDesc().getOpcode() == WebAssembly::BR_TABLE_I64 &&
+ "64-bit br_table pseudo instruction expected");
+
+ // Find extension op, if any. It sits in the previous BB before the branch.
+ auto ExtMI = MF.getRegInfo().getVRegDef(MI.getOperand(0).getReg());
+ if (ExtMI->getOpcode() == WebAssembly::I64_EXTEND_U_I32) {
+ // Unnecessarily extending a 32-bit value to 64, remove it.
+ assert(MI.getOperand(0).getReg() == ExtMI->getOperand(0).getReg());
+ MI.getOperand(0).setReg(ExtMI->getOperand(1).getReg());
+ ExtMI->eraseFromParent();
+ } else {
+ // Incoming 64-bit value that needs to be truncated.
+ Register Reg32 =
+ MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
+ BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(),
+ WST.getInstrInfo()->get(WebAssembly::I32_WRAP_I64), Reg32)
+ .addReg(MI.getOperand(0).getReg());
+ MI.getOperand(0).setReg(Reg32);
+ }
+
+ // We now have a 32-bit operand in all cases, so change the instruction
+ // accordingly.
+ MI.setDesc(WST.getInstrInfo()->get(WebAssembly::BR_TABLE_I32));
+}
+
// `MI` is a br_table instruction with a dummy default target argument. This
// function finds and adds the default target argument and removes any redundant
// range check preceding the br_table. Returns the MBB that the br_table is
// moved into so it can be removed from further consideration, or nullptr if the
// br_table cannot be optimized.
-MachineBasicBlock *fixBrTable(MachineInstr &MI, MachineBasicBlock *MBB,
- MachineFunction &MF) {
+MachineBasicBlock *fixBrTableDefault(MachineInstr &MI, MachineBasicBlock *MBB,
+ MachineFunction &MF) {
// Get the header block, which contains the redundant range check.
assert(MBB->pred_size() == 1 && "Expected a single guard predecessor");
auto *HeaderMBB = *MBB->pred_begin();
@@ -125,7 +163,8 @@ bool WebAssemblyFixBrTableDefaults::runOnMachineFunction(MachineFunction &MF) {
MBBSet.erase(MBB);
for (auto &MI : *MBB) {
if (WebAssembly::isBrTable(MI)) {
- auto *Fixed = fixBrTable(MI, MBB, MF);
+ fixBrTableIndex(MI, MBB, MF);
+ auto *Fixed = fixBrTableDefault(MI, MBB, MF);
if (Fixed != nullptr) {
MBBSet.erase(Fixed);
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index dee1c4e28149..d75afdcefb7d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -29,7 +29,12 @@ HANDLE_NODETYPE(SWIZZLE)
HANDLE_NODETYPE(VEC_SHL)
HANDLE_NODETYPE(VEC_SHR_S)
HANDLE_NODETYPE(VEC_SHR_U)
+HANDLE_NODETYPE(WIDEN_LOW_S)
+HANDLE_NODETYPE(WIDEN_LOW_U)
+HANDLE_NODETYPE(WIDEN_HIGH_S)
+HANDLE_NODETYPE(WIDEN_HIGH_U)
HANDLE_NODETYPE(THROW)
+HANDLE_NODETYPE(CATCH)
HANDLE_NODETYPE(MEMORY_COPY)
HANDLE_NODETYPE(MEMORY_FILL)
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index d1a696f854f8..b9154b09fbbc 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -80,9 +80,6 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
auto GlobalGetIns = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
: WebAssembly::GLOBAL_GET_I32;
- auto ConstIns =
- PtrVT == MVT::i64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
- auto AddIns = PtrVT == MVT::i64 ? WebAssembly::ADD_I64 : WebAssembly::ADD_I32;
// Few custom selection stuff.
SDLoc DL(Node);
@@ -126,41 +123,6 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
return;
}
- case ISD::GlobalTLSAddress: {
- const auto *GA = cast<GlobalAddressSDNode>(Node);
-
- if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
- report_fatal_error("cannot use thread-local storage without bulk memory",
- false);
-
- // Currently Emscripten does not support dynamic linking with threads.
- // Therefore, if we have thread-local storage, only the local-exec model
- // is possible.
- // TODO: remove this and implement proper TLS models once Emscripten
- // supports dynamic linking with threads.
- if (GA->getGlobal()->getThreadLocalMode() !=
- GlobalValue::LocalExecTLSModel &&
- !Subtarget->getTargetTriple().isOSEmscripten()) {
- report_fatal_error("only -ftls-model=local-exec is supported for now on "
- "non-Emscripten OSes: variable " +
- GA->getGlobal()->getName(),
- false);
- }
-
- SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT);
- SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress(
- GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0);
-
- MachineSDNode *TLSBase =
- CurDAG->getMachineNode(GlobalGetIns, DL, PtrVT, TLSBaseSym);
- MachineSDNode *TLSOffset =
- CurDAG->getMachineNode(ConstIns, DL, PtrVT, TLSOffsetSym);
- MachineSDNode *TLSAddress = CurDAG->getMachineNode(
- AddIns, DL, PtrVT, SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
- ReplaceNode(Node, TLSAddress);
- return;
- }
-
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
switch (IntNo) {
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 925636c82321..e348bba2b04c 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -16,6 +16,7 @@
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -31,6 +32,7 @@
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -68,6 +70,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
computeRegisterProperties(Subtarget->getRegisterInfo());
setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom);
setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
setOperationAction(ISD::JumpTable, MVTPtr, Custom);
setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
@@ -123,6 +126,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Hoist bitcasts out of shuffles
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ // Combine extends of extract_subvectors into widening ops
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+
// Support saturating add for i8x16 and i16x8
for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
for (auto T : {MVT::v16i8, MVT::v8i16})
@@ -156,11 +163,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// There is no i8x16.mul instruction
setOperationAction(ISD::MUL, MVT::v16i8, Expand);
- // There are no vector select instructions
- for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
- MVT::v2f64})
- setOperationAction(Op, T, Expand);
+ // There is no vector conditional select instruction
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
+ MVT::v2f64})
+ setOperationAction(ISD::SELECT_CC, T, Expand);
// Expand integer operations supported for scalars but not SIMD
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
@@ -247,6 +253,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
}
+ // And some truncating stores are legal as well
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
}
// Don't do anything clever with build_pairs
@@ -258,6 +267,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Exception handling intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setMaxAtomicSizeInBitsSupported(64);
@@ -268,7 +278,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
// Define the emscripten name for return address helper.
- // TODO: when implementing other WASM backends, make this generic or only do
+ // TODO: when implementing other Wasm backends, make this generic or only do
// this on emscripten depending on what they end up doing.
setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
@@ -442,6 +452,19 @@ static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults,
const MCInstrDesc &MCID = TII.get(CallOp);
MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
+ // See if we must truncate the function pointer.
+ // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
+ // as 64-bit for uniformity with other pointer types.
+ if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) {
+ Register Reg32 =
+ MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
+ auto &FnPtr = CallParams.getOperand(0);
+ BuildMI(*BB, CallResults.getIterator(), DL,
+ TII.get(WebAssembly::I32_WRAP_I64), Reg32)
+ .addReg(FnPtr.getReg());
+ FnPtr.setReg(Reg32);
+ }
+
// Move the function pointer to the end of the arguments for indirect calls
if (IsIndirect) {
auto FnPtr = CallParams.getOperand(0);
@@ -456,6 +479,15 @@ static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults,
if (IsIndirect) {
MIB.addImm(0);
MIB.addImm(0);
+
+ // Ensure that the object file has a __indirect_function_table import, as we
+ // call_indirect against it.
+ MCSymbolWasm *Sym = WebAssembly::getOrCreateFunctionTableSymbol(
+ MF.getContext(), "__indirect_function_table");
+ // Until call_indirect emits TABLE_NUMBER relocs against this symbol, mark
+ // it as NO_STRIP so as to ensure that the indirect function table makes it
+ // to linked output.
+ Sym->setNoStrip();
}
for (auto Use : CallParams.uses())
@@ -542,6 +574,16 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
if (VT.getSizeInBits() <= 64)
return std::make_pair(0U, &WebAssembly::I64RegClass);
}
+ if (VT.isFloatingPoint() && !VT.isVector()) {
+ switch (VT.getSizeInBits()) {
+ case 32:
+ return std::make_pair(0U, &WebAssembly::F32RegClass);
+ case 64:
+ return std::make_pair(0U, &WebAssembly::F64RegClass);
+ default:
+ break;
+ }
+ }
break;
default:
break;
@@ -626,7 +668,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
- case Intrinsic::wasm_atomic_notify:
+ case Intrinsic::wasm_memory_atomic_notify:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
@@ -640,7 +682,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// consistent. The same applies for wasm_atomic_wait intrinsics too.
Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
return true;
- case Intrinsic::wasm_atomic_wait_i32:
+ case Intrinsic::wasm_memory_atomic_wait32:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
@@ -648,7 +690,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = Align(4);
Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
return true;
- case Intrinsic::wasm_atomic_wait_i64:
+ case Intrinsic::wasm_memory_atomic_wait64:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(0);
@@ -656,6 +698,75 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = Align(8);
Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
return true;
+ case Intrinsic::wasm_load32_zero:
+ case Intrinsic::wasm_load64_zero:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8);
+ Info.flags = MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::wasm_load8_lane:
+ case Intrinsic::wasm_load16_lane:
+ case Intrinsic::wasm_load32_lane:
+ case Intrinsic::wasm_load64_lane:
+ case Intrinsic::wasm_store8_lane:
+ case Intrinsic::wasm_store16_lane:
+ case Intrinsic::wasm_store32_lane:
+ case Intrinsic::wasm_store64_lane: {
+ MVT MemVT;
+ Align MemAlign;
+ switch (Intrinsic) {
+ case Intrinsic::wasm_load8_lane:
+ case Intrinsic::wasm_store8_lane:
+ MemVT = MVT::i8;
+ MemAlign = Align(1);
+ break;
+ case Intrinsic::wasm_load16_lane:
+ case Intrinsic::wasm_store16_lane:
+ MemVT = MVT::i16;
+ MemAlign = Align(2);
+ break;
+ case Intrinsic::wasm_load32_lane:
+ case Intrinsic::wasm_store32_lane:
+ MemVT = MVT::i32;
+ MemAlign = Align(4);
+ break;
+ case Intrinsic::wasm_load64_lane:
+ case Intrinsic::wasm_store64_lane:
+ MemVT = MVT::i64;
+ MemAlign = Align(8);
+ break;
+ default:
+ llvm_unreachable("unexpected intrinsic");
+ }
+ if (Intrinsic == Intrinsic::wasm_load8_lane ||
+ Intrinsic == Intrinsic::wasm_load16_lane ||
+ Intrinsic == Intrinsic::wasm_load32_lane ||
+ Intrinsic == Intrinsic::wasm_load64_lane) {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.flags = MachineMemOperand::MOLoad;
+ } else {
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.flags = MachineMemOperand::MOStore;
+ }
+ Info.ptrVal = I.getArgOperand(0);
+ Info.memVT = MemVT;
+ Info.offset = 0;
+ Info.align = MemAlign;
+ return true;
+ }
+ case Intrinsic::wasm_prefetch_t:
+ case Intrinsic::wasm_prefetch_nt: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::i8;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = Align(1);
+ Info.flags = MachineMemOperand::MOLoad;
+ return true;
+ }
default:
return false;
}
@@ -866,8 +977,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
/*isSS=*/false);
unsigned ValNo = 0;
SmallVector<SDValue, 8> Chains;
- for (SDValue Arg :
- make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+ for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
assert(ArgLocs[ValNo].getValNo() == ValNo &&
"ArgLocs should remain in order and only hold varargs args");
unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
@@ -876,7 +986,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.getConstant(Offset, DL, PtrVT));
Chains.push_back(
DAG.getStore(Chain, DL, Arg, Add,
- MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
+ MachinePointerInfo::getFixedStack(MF, FI, Offset)));
}
if (!Chains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
@@ -1091,6 +1201,8 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerFrameIndex(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ return LowerGlobalTLSAddress(Op, DAG);
case ISD::ExternalSymbol:
return LowerExternalSymbol(Op, DAG);
case ISD::JumpTable:
@@ -1199,6 +1311,49 @@ SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
}
+SDValue
+WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ const auto *GA = cast<GlobalAddressSDNode>(Op);
+ MVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
+ report_fatal_error("cannot use thread-local storage without bulk memory",
+ false);
+
+ const GlobalValue *GV = GA->getGlobal();
+
+ // Currently Emscripten does not support dynamic linking with threads.
+ // Therefore, if we have thread-local storage, only the local-exec model
+ // is possible.
+ // TODO: remove this and implement proper TLS models once Emscripten
+ // supports dynamic linking with threads.
+ if (GV->getThreadLocalMode() != GlobalValue::LocalExecTLSModel &&
+ !Subtarget->getTargetTriple().isOSEmscripten()) {
+ report_fatal_error("only -ftls-model=local-exec is supported for now on "
+ "non-Emscripten OSes: variable " +
+ GV->getName(),
+ false);
+ }
+
+ auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
+ : WebAssembly::GLOBAL_GET_I32;
+ const char *BaseName = MF.createExternalSymbolName("__tls_base");
+
+ SDValue BaseAddr(
+ DAG.getMachineNode(GlobalGet, DL, PtrVT,
+ DAG.getTargetExternalSymbol(BaseName, PtrVT)),
+ 0);
+
+ SDValue TLSOffset = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
+ SDValue SymAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, TLSOffset);
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
+}
+
SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -1303,7 +1458,22 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
MFI->getVarargBufferVreg(), PtrVT);
return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
- MachinePointerInfo(SV), 0);
+ MachinePointerInfo(SV));
+}
+
+static SDValue getCppExceptionSymNode(SDValue Op, unsigned TagIndex,
+ SelectionDAG &DAG) {
+ // We only support C++ exceptions for now
+ int Tag =
+ cast<ConstantSDNode>(Op.getOperand(TagIndex).getNode())->getZExtValue();
+ if (Tag != WebAssembly::CPP_EXCEPTION)
+ llvm_unreachable("Invalid tag: We only support C++ exceptions for now");
+ auto &MF = DAG.getMachineFunction();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ const char *SymName = MF.createExternalSymbolName("__cpp_exception");
+ return DAG.getNode(WebAssemblyISD::Wrapper, SDLoc(Op), PtrVT,
+ DAG.getTargetExternalSymbol(SymName, PtrVT));
}
SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
@@ -1339,15 +1509,7 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
}
case Intrinsic::wasm_throw: {
- // We only support C++ exceptions for now
- int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
- if (Tag != CPP_EXCEPTION)
- llvm_unreachable("Invalid tag!");
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
- const char *SymName = MF.createExternalSymbolName("__cpp_exception");
- SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
- DAG.getTargetExternalSymbol(SymName, PtrVT));
+ SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
return DAG.getNode(WebAssemblyISD::THROW, DL,
MVT::Other, // outchain type
{
@@ -1357,6 +1519,19 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
});
}
+ case Intrinsic::wasm_catch: {
+ SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
+ return DAG.getNode(WebAssemblyISD::CATCH, DL,
+ {
+ MVT::i32, // outchain type
+ MVT::Other // return value
+ },
+ {
+ Op.getOperand(0), // inchain
+ SymNode // exception symbol
+ });
+ }
+
case Intrinsic::wasm_shuffle: {
// Drop in-chain and replace undefs, but otherwise pass through unchanged
SDValue Ops[18];
@@ -1474,8 +1649,8 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SmallVector<SwizzleEntry, 16> SwizzleCounts;
auto AddCount = [](auto &Counts, const auto &Val) {
- auto CountIt = std::find_if(Counts.begin(), Counts.end(),
- [&Val](auto E) { return E.first == Val; });
+ auto CountIt =
+ llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
if (CountIt == Counts.end()) {
Counts.emplace_back(Val, 1);
} else {
@@ -1537,6 +1712,7 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
};
} else if (NumConstantLanes >= NumSplatLanes &&
Subtarget->hasUnimplementedSIMD128()) {
+ // If we support v128.const, emit it directly
SmallVector<SDValue, 16> ConstLanes;
for (const SDValue &Lane : Op->op_values()) {
if (IsConstant(Lane)) {
@@ -1548,11 +1724,59 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
}
Result = DAG.getBuildVector(VecT, DL, ConstLanes);
- IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+ IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
return IsConstant(Lane);
};
- }
- if (!Result) {
+ } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) {
+ // Otherwise, if this is an integer vector, pack the lane values together so
+ // we can construct the 128-bit constant from a pair of i64s using a splat
+ // followed by at most one i64x2.replace_lane. Also keep track of the lanes
+ // that actually matter so we can avoid the replace_lane in more cases.
+ std::array<uint64_t, 2> I64s{{0, 0}};
+ std::array<uint64_t, 2> ConstLaneMasks{{0, 0}};
+ size_t LaneBits = 128 / Lanes;
+ size_t HalfLanes = Lanes / 2;
+ for (size_t I = 0; I < Lanes; ++I) {
+ const SDValue &Lane = Op.getOperand(I);
+ if (IsConstant(Lane)) {
+ // How much we need to shift Val to position it in an i64
+ auto Shift = LaneBits * (I % HalfLanes);
+ auto Mask = maskTrailingOnes<uint64_t>(LaneBits);
+ auto Val = cast<ConstantSDNode>(Lane.getNode())->getZExtValue() & Mask;
+ I64s[I / HalfLanes] |= Val << Shift;
+ ConstLaneMasks[I / HalfLanes] |= Mask << Shift;
+ }
+ }
+ // Check whether all constant lanes in the second half of the vector are
+ // equivalent in the first half or vice versa to determine whether splatting
+ // either side will be sufficient to materialize the constant. As a special
+ // case, if the first and second halves have no constant lanes in common, we
+ // can just combine them.
+ bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1];
+ bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0];
+ bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0;
+
+ uint64_t Splatted;
+ if (SecondHalfSufficient) {
+ Splatted = I64s[1];
+ } else if (CombinedSufficient) {
+ Splatted = I64s[0] | I64s[1];
+ } else {
+ Splatted = I64s[0];
+ }
+
+ Result = DAG.getSplatBuildVector(MVT::v2i64, DL,
+ DAG.getConstant(Splatted, DL, MVT::i64));
+ if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) {
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result,
+ DAG.getConstant(I64s[1], DL, MVT::i64),
+ DAG.getConstant(1, DL, MVT::i32));
+ }
+ Result = DAG.getBitcast(VecT, Result);
+ IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
+ return IsConstant(Lane);
+ };
+ } else {
// Use a splat, but possibly a load_splat
LoadSDNode *SplattedLoad;
if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
@@ -1565,11 +1789,14 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
} else {
Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
}
- IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+ IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
return Lane == SplatValue;
};
}
+ assert(Result);
+ assert(IsLaneConstructed);
+
// Add replace_lane instructions for any unhandled values
for (size_t I = 0; I < Lanes; ++I) {
const SDValue &Lane = Op->getOperand(I);
@@ -1730,6 +1957,49 @@ performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return DAG.getBitcast(DstType, NewShuffle);
}
+static SDValue performVectorWidenCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ auto &DAG = DCI.DAG;
+ assert(N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND);
+
+ // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
+ // possible before the extract_subvector can be expanded.
+ auto Extract = N->getOperand(0);
+ if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+ auto Source = Extract.getOperand(0);
+ auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
+ if (IndexNode == nullptr)
+ return SDValue();
+ auto Index = IndexNode->getZExtValue();
+
+ // Only v8i8 and v4i16 extracts can be widened, and only if the extracted
+ // subvector is the low or high half of its source.
+ EVT ResVT = N->getValueType(0);
+ if (ResVT == MVT::v8i16) {
+ if (Extract.getValueType() != MVT::v8i8 ||
+ Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
+ return SDValue();
+ } else if (ResVT == MVT::v4i32) {
+ if (Extract.getValueType() != MVT::v4i16 ||
+ Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
+ return SDValue();
+ } else {
+ return SDValue();
+ }
+
+ bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
+ bool IsLow = Index == 0;
+
+ unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::WIDEN_LOW_S
+ : WebAssemblyISD::WIDEN_HIGH_S)
+ : (IsLow ? WebAssemblyISD::WIDEN_LOW_U
+ : WebAssemblyISD::WIDEN_HIGH_U);
+
+ return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+}
+
SDValue
WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -1738,5 +2008,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
case ISD::VECTOR_SHUFFLE:
return performVECTOR_SHUFFLECombine(N, DCI);
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return performVectorWidenCombine(N, DCI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index b8e612377529..c8a052d01199 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -106,6 +106,7 @@ private:
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 256b77e33db9..22103b0bfb38 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -33,112 +33,117 @@ multiclass ATOMIC_NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "",
//===----------------------------------------------------------------------===//
let hasSideEffects = 1 in {
-defm ATOMIC_NOTIFY_A32 :
+defm MEMORY_ATOMIC_NOTIFY_A32 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
- "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
- "atomic.notify \t${off}${p2align}", 0x00, "false">;
-defm ATOMIC_NOTIFY_A64 :
+ "memory.atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
+ "memory.atomic.notify \t${off}${p2align}", 0x00, "false">;
+defm MEMORY_ATOMIC_NOTIFY_A64 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$count),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
- "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
- "atomic.notify \t${off}${p2align}", 0x00, "true">;
+ "memory.atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
+ "memory.atomic.notify \t${off}${p2align}", 0x00, "true">;
let mayLoad = 1 in {
-defm ATOMIC_WAIT_I32_A32 :
+defm MEMORY_ATOMIC_WAIT32_A32 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp,
I64:$timeout),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
- "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "i32.atomic.wait \t${off}${p2align}", 0x01, "false">;
-defm ATOMIC_WAIT_I32_A64 :
+ "memory.atomic.wait32 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "memory.atomic.wait32 \t${off}${p2align}", 0x01, "false">;
+defm MEMORY_ATOMIC_WAIT32_A64 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$exp,
I64:$timeout),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
- "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "i32.atomic.wait \t${off}${p2align}", 0x01, "true">;
-defm ATOMIC_WAIT_I64_A32 :
+ "memory.atomic.wait32 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "memory.atomic.wait32 \t${off}${p2align}", 0x01, "true">;
+defm MEMORY_ATOMIC_WAIT64_A32 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp,
I64:$timeout),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
- "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "i64.atomic.wait \t${off}${p2align}", 0x02, "false">;
-defm ATOMIC_WAIT_I64_A64 :
+ "memory.atomic.wait64 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "memory.atomic.wait64 \t${off}${p2align}", 0x02, "false">;
+defm MEMORY_ATOMIC_WAIT64_A64 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr, I64:$exp,
I64:$timeout),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
- "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "i64.atomic.wait \t${off}${p2align}", 0x02, "true">;
+ "memory.atomic.wait64 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "memory.atomic.wait64 \t${off}${p2align}", 0x02, "true">;
} // mayLoad = 1
} // hasSideEffects = 1
-let Predicates = [HasAtomics] in {
// Select notifys with no constant offset.
def NotifyPatNoOffset_A32 :
- Pat<(i32 (int_wasm_atomic_notify I32:$addr, I32:$count)),
- (ATOMIC_NOTIFY_A32 0, 0, I32:$addr, I32:$count)>,
- Requires<[HasAddr32]>;
+ Pat<(i32 (int_wasm_memory_atomic_notify I32:$addr, I32:$count)),
+ (MEMORY_ATOMIC_NOTIFY_A32 0, 0, I32:$addr, I32:$count)>,
+ Requires<[HasAddr32, HasAtomics]>;
def NotifyPatNoOffset_A64 :
- Pat<(i32 (int_wasm_atomic_notify I64:$addr, I32:$count)),
- (ATOMIC_NOTIFY_A64 0, 0, I64:$addr, I32:$count)>,
- Requires<[HasAddr64]>;
+ Pat<(i32 (int_wasm_memory_atomic_notify I64:$addr, I32:$count)),
+ (MEMORY_ATOMIC_NOTIFY_A64 0, 0, I64:$addr, I32:$count)>,
+ Requires<[HasAddr64, HasAtomics]>;
// Select notifys with a constant offset.
// Pattern with address + immediate offset
multiclass NotifyPatImmOff<PatFrag operand, string inst> {
- def : Pat<(i32 (int_wasm_atomic_notify (operand I32:$addr, imm:$off),
+ def : Pat<(i32 (int_wasm_memory_atomic_notify (operand I32:$addr, imm:$off),
I32:$count)),
(!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, I32:$count)>,
- Requires<[HasAddr32]>;
- def : Pat<(i32 (int_wasm_atomic_notify (operand I64:$addr, imm:$off),
+ Requires<[HasAddr32, HasAtomics]>;
+ def : Pat<(i32 (int_wasm_memory_atomic_notify (operand I64:$addr, imm:$off),
I32:$count)),
(!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, I32:$count)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
-defm : NotifyPatImmOff<regPlusImm, "ATOMIC_NOTIFY">;
-defm : NotifyPatImmOff<or_is_add, "ATOMIC_NOTIFY">;
+defm : NotifyPatImmOff<regPlusImm, "MEMORY_ATOMIC_NOTIFY">;
+defm : NotifyPatImmOff<or_is_add, "MEMORY_ATOMIC_NOTIFY">;
// Select notifys with just a constant offset.
def NotifyPatOffsetOnly_A32 :
- Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)),
- (ATOMIC_NOTIFY_A32 0, imm:$off, (CONST_I32 0), I32:$count)>,
- Requires<[HasAddr32]>;
+ Pat<(i32 (int_wasm_memory_atomic_notify imm:$off, I32:$count)),
+ (MEMORY_ATOMIC_NOTIFY_A32 0, imm:$off, (CONST_I32 0), I32:$count)>,
+ Requires<[HasAddr32, HasAtomics]>;
def NotifyPatOffsetOnly_A64 :
- Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)),
- (ATOMIC_NOTIFY_A64 0, imm:$off, (CONST_I64 0), I32:$count)>,
- Requires<[HasAddr64]>;
+ Pat<(i32 (int_wasm_memory_atomic_notify imm:$off, I32:$count)),
+ (MEMORY_ATOMIC_NOTIFY_A64 0, imm:$off, (CONST_I64 0), I32:$count)>,
+ Requires<[HasAddr64, HasAtomics]>;
def NotifyPatGlobalAddrOffOnly_A32 :
- Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
- I32:$count)),
- (ATOMIC_NOTIFY_A32 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>,
- Requires<[HasAddr32]>;
+ Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
+ I32:$count)),
+ (MEMORY_ATOMIC_NOTIFY_A32 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)
+ >,
+ Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
def NotifyPatGlobalAddrOffOnly_A64 :
- Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
- I32:$count)),
- (ATOMIC_NOTIFY_A64 0, tglobaladdr:$off, (CONST_I64 0), I32:$count)>,
- Requires<[HasAddr64]>;
+ Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
+ I32:$count)),
+ (MEMORY_ATOMIC_NOTIFY_A64 0, tglobaladdr:$off, (CONST_I64 0), I32:$count)
+ >,
+ Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
// Select waits with no constant offset.
multiclass WaitPatNoOffset<ValueType ty, Intrinsic kind,
string inst> {
def : Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
(!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$exp, I64:$timeout)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(i32 (kind I64:$addr, ty:$exp, I64:$timeout)),
(!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$exp, I64:$timeout)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
-defm : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">;
-defm : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">;
-defm : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">;
-defm : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">;
+defm : WaitPatNoOffset<i32, int_wasm_memory_atomic_wait32,
+ "MEMORY_ATOMIC_WAIT32">;
+defm : WaitPatNoOffset<i64, int_wasm_memory_atomic_wait64,
+ "MEMORY_ATOMIC_WAIT64">;
+defm : WaitPatNoOffset<i32, int_wasm_memory_atomic_wait32,
+ "MEMORY_ATOMIC_WAIT32">;
+defm : WaitPatNoOffset<i64, int_wasm_memory_atomic_wait64,
+ "MEMORY_ATOMIC_WAIT64">;
// Select waits with a constant offset.
@@ -148,52 +153,53 @@ multiclass WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand,
def : Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
(!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp,
I64:$timeout)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(i32 (kind (operand I64:$addr, imm:$off), ty:$exp, I64:$timeout)),
(!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp,
I64:$timeout)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
-defm : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm,
- "ATOMIC_WAIT_I32">;
-defm : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add,
- "ATOMIC_WAIT_I32">;
-defm : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm,
- "ATOMIC_WAIT_I64">;
-defm : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add,
- "ATOMIC_WAIT_I64">;
-
-// Select wait_i32, "ATOMIC_WAIT_I32s with just a constant offset.
+defm : WaitPatImmOff<i32, int_wasm_memory_atomic_wait32, regPlusImm,
+ "MEMORY_ATOMIC_WAIT32">;
+defm : WaitPatImmOff<i32, int_wasm_memory_atomic_wait32, or_is_add,
+ "MEMORY_ATOMIC_WAIT32">;
+defm : WaitPatImmOff<i64, int_wasm_memory_atomic_wait64, regPlusImm,
+ "MEMORY_ATOMIC_WAIT64">;
+defm : WaitPatImmOff<i64, int_wasm_memory_atomic_wait64, or_is_add,
+ "MEMORY_ATOMIC_WAIT64">;
+
+// Select waits with just a constant offset.
multiclass WaitPatOffsetOnly<ValueType ty, Intrinsic kind, string inst> {
def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
(!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$exp,
I64:$timeout)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
(!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$exp,
I64:$timeout)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
-defm : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">;
-defm : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">;
+defm : WaitPatOffsetOnly<i32, int_wasm_memory_atomic_wait32,
+ "MEMORY_ATOMIC_WAIT32">;
+defm : WaitPatOffsetOnly<i64, int_wasm_memory_atomic_wait64,
+ "MEMORY_ATOMIC_WAIT64">;
multiclass WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, string inst> {
def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp,
I64:$timeout)),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp,
I64:$timeout)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp,
I64:$timeout)),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp,
I64:$timeout)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
}
-defm : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32,
- "ATOMIC_WAIT_I32">;
-defm : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64,
- "ATOMIC_WAIT_I64">;
-} // Predicates = [HasAtomics]
+defm : WaitPatGlobalAddrOffOnly<i32, int_wasm_memory_atomic_wait32,
+ "MEMORY_ATOMIC_WAIT32">;
+defm : WaitPatGlobalAddrOffOnly<i64, int_wasm_memory_atomic_wait64,
+ "MEMORY_ATOMIC_WAIT64">;
//===----------------------------------------------------------------------===//
// Atomic fences
@@ -221,7 +227,6 @@ defm ATOMIC_LOAD_I32 : AtomicLoad<I32, "i32.atomic.load", 0x10>;
defm ATOMIC_LOAD_I64 : AtomicLoad<I64, "i64.atomic.load", 0x11>;
// Select loads with no constant offset.
-let Predicates = [HasAtomics] in {
defm : LoadPatNoOffset<i32, atomic_load_32, "ATOMIC_LOAD_I32">;
defm : LoadPatNoOffset<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
@@ -240,7 +245,6 @@ defm : LoadPatOffsetOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">;
defm : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
-} // Predicates = [HasAtomics]
// Extending loads. Note that there are only zero-extending atomic loads, no
// sign-extending loads.
@@ -285,7 +289,6 @@ def sext_aload_8_64 :
def sext_aload_16_64 :
PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>;
-let Predicates = [HasAtomics] in {
// Select zero-extending loads with no constant offset.
defm : LoadPatNoOffset<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
defm : LoadPatNoOffset<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
@@ -344,7 +347,6 @@ defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
-} // Predicates = [HasAtomics]
//===----------------------------------------------------------------------===//
// Atomic stores
@@ -363,16 +365,15 @@ defm ATOMIC_STORE_I64 : AtomicStore<I64, "i64.atomic.store", 0x18>;
// store: (store $val, $ptr)
// atomic_store: (store $ptr, $val)
-let Predicates = [HasAtomics] in {
// Select stores with no constant offset.
multiclass AStorePatNoOffset<ValueType ty, PatFrag kind, string inst> {
def : Pat<(kind I32:$addr, ty:$val),
(!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$val)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(kind I64:$addr, ty:$val),
(!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$val)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
defm : AStorePatNoOffset<i32, atomic_store_32, "ATOMIC_STORE_I32">;
defm : AStorePatNoOffset<i64, atomic_store_64, "ATOMIC_STORE_I64">;
@@ -384,10 +385,10 @@ multiclass AStorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand,
string inst> {
def : Pat<(kind (operand I32:$addr, imm:$off), ty:$val),
(!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(kind (operand I64:$addr, imm:$off), ty:$val),
(!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
defm : AStorePatImmOff<i32, atomic_store_32, regPlusImm, "ATOMIC_STORE_I32">;
defm : AStorePatImmOff<i64, atomic_store_64, regPlusImm, "ATOMIC_STORE_I64">;
@@ -396,10 +397,10 @@ defm : AStorePatImmOff<i64, atomic_store_64, regPlusImm, "ATOMIC_STORE_I64">;
multiclass AStorePatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
def : Pat<(kind imm:$off, ty:$val),
(!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(kind imm:$off, ty:$val),
(!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
defm : AStorePatOffsetOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">;
defm : AStorePatOffsetOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">;
@@ -407,15 +408,14 @@ defm : AStorePatOffsetOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">;
multiclass AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
}
defm : AStorePatGlobalAddrOffOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">;
defm : AStorePatGlobalAddrOffOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">;
-} // Predicates = [HasAtomics]
// Truncating stores.
defm ATOMIC_STORE8_I32 : AtomicStore<I32, "i32.atomic.store8", 0x19>;
@@ -436,7 +436,6 @@ def trunc_astore_8_64 : trunc_astore_64<atomic_store_8>;
def trunc_astore_16_64 : trunc_astore_64<atomic_store_16>;
def trunc_astore_32_64 : trunc_astore_64<atomic_store_32>;
-let Predicates = [HasAtomics] in {
// Truncating stores with no constant offset
defm : AStorePatNoOffset<i32, atomic_store_8, "ATOMIC_STORE8_I32">;
@@ -474,7 +473,6 @@ defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_8_64, "ATOMIC_STORE8_I64">;
defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_16_64, "ATOMIC_STORE16_I64">;
defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, "ATOMIC_STORE32_I64">;
-} // Predicates = [HasAtomics]
//===----------------------------------------------------------------------===//
// Atomic binary read-modify-writes
@@ -580,10 +578,10 @@ defm ATOMIC_RMW32_U_XCHG_I64 :
multiclass BinRMWPatNoOffset<ValueType ty, PatFrag kind, string inst> {
def : Pat<(ty (kind I32:$addr, ty:$val)),
(!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$val)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(ty (kind I64:$addr, ty:$val)),
(!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$val)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
// Select binary RMWs with a constant offset.
@@ -593,29 +591,29 @@ multiclass BinRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand,
string inst> {
def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)),
(!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$val)),
(!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
// Select binary RMWs with just a constant offset.
multiclass BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
def : Pat<(ty (kind imm:$off, ty:$val)),
(!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(ty (kind imm:$off, ty:$val)),
(!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
multiclass BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> {
def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
}
// Patterns for various addressing modes.
@@ -636,7 +634,6 @@ multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, string inst_32,
defm : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
}
-let Predicates = [HasAtomics] in {
defm : BinRMWPattern<atomic_load_add_32, atomic_load_add_64,
"ATOMIC_RMW_ADD_I32", "ATOMIC_RMW_ADD_I64">;
defm : BinRMWPattern<atomic_load_sub_32, atomic_load_sub_64,
@@ -649,7 +646,6 @@ defm : BinRMWPattern<atomic_load_xor_32, atomic_load_xor_64,
"ATOMIC_RMW_XOR_I32", "ATOMIC_RMW_XOR_I64">;
defm : BinRMWPattern<atomic_swap_32, atomic_swap_64,
"ATOMIC_RMW_XCHG_I32", "ATOMIC_RMW_XCHG_I64">;
-} // Predicates = [HasAtomics]
// Truncating & zero-extending binary RMW patterns.
// These are combined patterns of truncating store patterns and zero-extending
@@ -752,7 +748,6 @@ multiclass BinRMWTruncExtPattern<
defm : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
}
-let Predicates = [HasAtomics] in {
defm : BinRMWTruncExtPattern<
atomic_load_add_8, atomic_load_add_16, atomic_load_add_32, atomic_load_add_64,
"ATOMIC_RMW8_U_ADD_I32", "ATOMIC_RMW16_U_ADD_I32",
@@ -778,7 +773,6 @@ defm : BinRMWTruncExtPattern<
"ATOMIC_RMW8_U_XCHG_I32", "ATOMIC_RMW16_U_XCHG_I32",
"ATOMIC_RMW8_U_XCHG_I64", "ATOMIC_RMW16_U_XCHG_I64",
"ATOMIC_RMW32_U_XCHG_I64">;
-} // Predicates = [HasAtomics]
//===----------------------------------------------------------------------===//
// Atomic ternary read-modify-writes
@@ -827,10 +821,10 @@ defm ATOMIC_RMW32_U_CMPXCHG_I64 :
multiclass TerRMWPatNoOffset<ValueType ty, PatFrag kind, string inst> {
def : Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)),
(!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$exp, ty:$new)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(ty (kind I64:$addr, ty:$exp, ty:$new)),
(!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$exp, ty:$new)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
// Select ternary RMWs with a constant offset.
@@ -840,10 +834,10 @@ multiclass TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand,
string inst> {
def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)),
(!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics]>;
def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$exp, ty:$new)),
(!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp, ty:$new)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics]>;
}
// Select ternary RMWs with just a constant offset.
@@ -860,11 +854,11 @@ multiclass TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp,
ty:$new)>,
- Requires<[HasAddr32]>;
+ Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp,
ty:$new)>,
- Requires<[HasAddr64]>;
+ Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
}
// Patterns for various addressing modes.
@@ -885,7 +879,6 @@ multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, string inst_32,
defm : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
}
-let Predicates = [HasAtomics] in
defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
"ATOMIC_RMW_CMPXCHG_I32", "ATOMIC_RMW_CMPXCHG_I64">;
@@ -994,7 +987,6 @@ multiclass TerRMWTruncExtPattern<
defm : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
}
-let Predicates = [HasAtomics] in
defm : TerRMWTruncExtPattern<
atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
"ATOMIC_RMW8_U_CMPXCHG_I32", "ATOMIC_RMW16_U_CMPXCHG_I32",
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
index 3e9ef6fbc7ea..7aeae54d95a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
@@ -39,7 +39,7 @@ let mayStore = 1, hasSideEffects = 1 in
defm MEMORY_INIT_A#B :
BULK_I<(outs),
(ins i32imm_op:$seg, i32imm_op:$idx, rc:$dest,
- rc:$offset, rc:$size),
+ I32:$offset, I32:$size),
(outs), (ins i32imm_op:$seg, i32imm_op:$idx),
[],
"memory.init\t$seg, $idx, $dest, $offset, $size",
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 171dd9a67beb..702560bea100 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -103,7 +103,7 @@ defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>;
} // isReturn = 1
-let isTrap = 1 in
+let IsCanonical = 1, isTrap = 1 in
defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>;
} // isTerminator = 1
@@ -131,14 +131,11 @@ defm THROW : I<(outs), (ins event_op:$tag, variable_ops),
(outs), (ins event_op:$tag),
[(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag))],
"throw \t$tag", "throw \t$tag", 0x08>;
-defm RETHROW : I<(outs), (ins EXNREF:$exn), (outs), (ins), [],
- "rethrow \t$exn", "rethrow", 0x09>;
-// Pseudo instruction to be the lowering target of int_wasm_rethrow_in_catch
-// intrinsic. Will be converted to the real rethrow instruction later.
-let isPseudo = 1 in
-defm RETHROW_IN_CATCH : NRI<(outs), (ins), [(int_wasm_rethrow_in_catch)],
- "rethrow_in_catch", 0>;
+defm RETHROW : NRI<(outs), (ins i32imm:$depth), [], "rethrow \t$depth", 0x09>;
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+// For C++ support, we only rethrow the latest exception, thus always setting
+// the depth to 0.
+def : Pat<(int_wasm_rethrow), (RETHROW 0)>;
// Region within which an exception is caught: try / end_try
let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
@@ -146,26 +143,18 @@ defm TRY : NRI<(outs), (ins Signature:$sig), [], "try \t$sig", 0x06>;
defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
-// Catching an exception: catch / extract_exception
-let hasCtrlDep = 1, hasSideEffects = 1 in
-defm CATCH : I<(outs EXNREF:$dst), (ins), (outs), (ins), [],
- "catch \t$dst", "catch", 0x07>;
-
-// Querying / extracing exception: br_on_exn
-// br_on_exn queries an exnref to see if it matches the corresponding exception
-// tag index. If true it branches to the given label and pushes the
-// corresponding argument values of the exception onto the stack.
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in
-defm BR_ON_EXN : I<(outs), (ins bb_op:$dst, event_op:$tag, EXNREF:$exn),
- (outs), (ins bb_op:$dst, event_op:$tag), [],
- "br_on_exn \t$dst, $tag, $exn", "br_on_exn \t$dst, $tag",
- 0x0a>;
-// This is a pseudo instruction that simulates popping a value from stack, which
-// has been pushed by br_on_exn
-let isCodeGenOnly = 1, hasSideEffects = 1 in
-defm EXTRACT_EXCEPTION_I32 : NRI<(outs I32:$dst), (ins),
- [(set I32:$dst, (int_wasm_extract_exception))],
- "extract_exception\t$dst">;
+// Catching an exception: catch / catch_all
+let hasCtrlDep = 1, hasSideEffects = 1 in {
+// Currently 'catch' can only extract an i32, which is sufficient for C++
+// support, but according to the spec 'catch' can extract any number of values
+// based on the event type.
+defm CATCH : I<(outs I32:$dst), (ins event_op:$tag),
+ (outs), (ins event_op:$tag),
+ [(set I32:$dst,
+ (WebAssemblycatch (WebAssemblywrapper texternalsym:$tag)))],
+ "catch \t$dst, $tag", "catch \t$tag", 0x07>;
+defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x05>;
+}
// Pseudo instructions: cleanupret / catchret
let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 6fe1fd2b5c5a..db2ad05b4cdf 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -76,8 +76,10 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
CopyOpcode = WebAssembly::COPY_F64;
else if (RC == &WebAssembly::V128RegClass)
CopyOpcode = WebAssembly::COPY_V128;
- else if (RC == &WebAssembly::EXNREFRegClass)
- CopyOpcode = WebAssembly::COPY_EXNREF;
+ else if (RC == &WebAssembly::FUNCREFRegClass)
+ CopyOpcode = WebAssembly::COPY_FUNCREF;
+ else if (RC == &WebAssembly::EXTERNREFRegClass)
+ CopyOpcode = WebAssembly::COPY_EXTERNREF;
else
llvm_unreachable("Unexpected register class");
@@ -139,14 +141,6 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
else
FBB = MI.getOperand(0).getMBB();
break;
- case WebAssembly::BR_ON_EXN:
- if (HaveCond)
- return true;
- Cond.push_back(MachineOperand::CreateImm(true));
- Cond.push_back(MI.getOperand(2));
- TBB = MI.getOperand(0).getMBB();
- HaveCond = true;
- break;
}
if (MI.isBarrier())
break;
@@ -192,24 +186,10 @@ unsigned WebAssemblyInstrInfo::insertBranch(
assert(Cond.size() == 2 && "Expected a flag and a successor block");
- MachineFunction &MF = *MBB.getParent();
- auto &MRI = MF.getRegInfo();
- bool IsBrOnExn = Cond[1].isReg() && MRI.getRegClass(Cond[1].getReg()) ==
- &WebAssembly::EXNREFRegClass;
-
- if (Cond[0].getImm()) {
- if (IsBrOnExn) {
- const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception");
- BuildMI(&MBB, DL, get(WebAssembly::BR_ON_EXN))
- .addMBB(TBB)
- .addExternalSymbol(CPPExnSymbol)
- .add(Cond[1]);
- } else
- BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]);
- } else {
- assert(!IsBrOnExn && "br_on_exn does not have a reversed condition");
+ if (Cond[0].getImm())
+ BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]);
+ else
BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)).addMBB(TBB).add(Cond[1]);
- }
if (!FBB)
return 1;
@@ -220,14 +200,6 @@ unsigned WebAssemblyInstrInfo::insertBranch(
bool WebAssemblyInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Expected a flag and a condition expression");
-
- // br_on_exn's condition cannot be reversed
- MachineFunction &MF = *Cond[1].getParent()->getParent()->getParent();
- auto &MRI = MF.getRegInfo();
- if (Cond[1].isReg() &&
- MRI.getRegClass(Cond[1].getReg()) == &WebAssembly::EXNREFRegClass)
- return true;
-
Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 085910f01ee6..2f5a64a87a59 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -74,8 +74,6 @@ def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>,
SDTCisVT<1, iPTR>]>;
def SDT_WebAssemblyCallSeqEnd :
SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
-def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>;
def SDT_WebAssemblyBrTable : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
@@ -83,7 +81,8 @@ def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
def SDT_WebAssemblyWrapperPIC : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyThrow : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyThrow : SDTypeProfile<0, -1, []>;
+def SDT_WebAssemblyCatch : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Nodes.
@@ -109,6 +108,8 @@ def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC",
SDT_WebAssemblyWrapperPIC>;
def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow,
[SDNPHasChain, SDNPVariadic]>;
+def WebAssemblycatch : SDNode<"WebAssemblyISD::CATCH", SDT_WebAssemblyCatch,
+ [SDNPHasChain, SDNPSideEffect]>;
//===----------------------------------------------------------------------===//
// WebAssembly-specific Operands.
@@ -163,6 +164,9 @@ def vec_i64imm_op : Operand<i64>;
let OperandType = "OPERAND_FUNCTION32" in
def function32_op : Operand<i32>;
+let OperandType = "OPERAND_TABLE" in
+def table32_op : Operand<i32>;
+
let OperandType = "OPERAND_OFFSET32" in
def offset32_op : Operand<i32>;
@@ -184,6 +188,11 @@ def Signature : Operand<i32> {
let PrintMethod = "printWebAssemblySignatureOperand";
}
+let OperandType = "OPERAND_HEAPTYPE" in
+def HeapType : Operand<i32> {
+ let PrintMethod = "printWebAssemblyHeapTypeOperand";
+}
+
let OperandType = "OPERAND_TYPEINDEX" in
def TypeIndex : Operand<i32>;
@@ -236,7 +245,8 @@ defm "": ARGUMENT<I32, i32>;
defm "": ARGUMENT<I64, i64>;
defm "": ARGUMENT<F32, f32>;
defm "": ARGUMENT<F64, f64>;
-defm "": ARGUMENT<EXNREF, exnref>;
+defm "": ARGUMENT<FUNCREF, funcref>;
+defm "": ARGUMENT<EXTERNREF, externref>;
// local.get and local.set are not generated by instruction selection; they
// are implied by virtual register uses and defs.
@@ -306,7 +316,8 @@ defm "" : LOCAL<I64>;
defm "" : LOCAL<F32>;
defm "" : LOCAL<F64>;
defm "" : LOCAL<V128>, Requires<[HasSIMD128]>;
-defm "" : LOCAL<EXNREF>, Requires<[HasExceptionHandling]>;
+defm "" : LOCAL<FUNCREF>, Requires<[HasReferenceTypes]>;
+defm "" : LOCAL<EXTERNREF>, Requires<[HasReferenceTypes]>;
let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
defm CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm),
@@ -333,16 +344,25 @@ def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)),
(CONST_I64 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr64]>;
def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
- (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>;
+ (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>;
def : Pat<(i32 (WebAssemblywrapperPIC tglobaladdr:$addr)),
- (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>;
+ (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>;
+def : Pat<(i64 (WebAssemblywrapperPIC tglobaladdr:$addr)),
+ (CONST_I64 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr64]>;
+
+def : Pat<(i32 (WebAssemblywrapper tglobaltlsaddr:$addr)),
+ (CONST_I32 tglobaltlsaddr:$addr)>, Requires<[HasAddr32]>;
+def : Pat<(i64 (WebAssemblywrapper tglobaltlsaddr:$addr)),
+ (CONST_I64 tglobaltlsaddr:$addr)>, Requires<[HasAddr64]>;
def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
- (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC]>;
+ (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC, HasAddr32]>;
def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
- (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC]>;
+ (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr32]>;
+def : Pat<(i64 (WebAssemblywrapper texternalsym:$addr)),
+ (CONST_I64 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr64]>;
def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>;
def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>;
@@ -361,3 +381,4 @@ include "WebAssemblyInstrAtomics.td"
include "WebAssemblyInstrSIMD.td"
include "WebAssemblyInstrRef.td"
include "WebAssemblyInstrBulkMemory.td"
+include "WebAssemblyInstrTable.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index b3c63cc1f884..48b934457267 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -70,7 +70,7 @@ defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b, []>;
multiclass LoadPatNoOffset<ValueType ty, PatFrag kind, string inst> {
def : Pat<(ty (kind I32:$addr)), (!cast<NI>(inst # "_A32") 0, 0, I32:$addr)>,
Requires<[HasAddr32]>;
- def : Pat<(ty (kind I64:$addr)), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>,
+ def : Pat<(ty (kind (i64 I64:$addr))), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>,
Requires<[HasAddr64]>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
index 14d723750f07..7f324fc11210 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
@@ -11,15 +11,29 @@
///
//===----------------------------------------------------------------------===//
-defm SELECT_EXNREF : I<(outs EXNREF:$dst),
- (ins EXNREF:$lhs, EXNREF:$rhs, I32:$cond),
- (outs), (ins),
- [(set EXNREF:$dst,
- (select I32:$cond, EXNREF:$lhs, EXNREF:$rhs))],
- "exnref.select\t$dst, $lhs, $rhs, $cond",
- "exnref.select", 0x1b>;
+multiclass REF_I<WebAssemblyRegClass reg, ValueType vt> {
+ defm REF_NULL_#reg : I<(outs reg:$res), (ins HeapType:$heaptype),
+ (outs), (ins HeapType:$heaptype),
+ [],
+ "ref.null\t$res, $heaptype",
+ "ref.null\t$heaptype",
+ 0xd0>,
+ Requires<[HasReferenceTypes]>;
+ defm SELECT_#reg: I<(outs reg:$dst), (ins reg:$lhs, reg:$rhs, I32:$cond),
+ (outs), (ins),
+ [(set reg:$dst,
+ (select I32:$cond, reg:$lhs, reg:$rhs))],
+ vt#".select\t$dst, $lhs, $rhs, $cond",
+ vt#".select", 0x1b>,
+ Requires<[HasReferenceTypes]>;
+}
-def : Pat<(select (i32 (setne I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs),
- (SELECT_EXNREF EXNREF:$lhs, EXNREF:$rhs, I32:$cond)>;
-def : Pat<(select (i32 (seteq I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs),
- (SELECT_EXNREF EXNREF:$rhs, EXNREF:$lhs, I32:$cond)>;
+defm "" : REF_I<FUNCREF, funcref>;
+defm "" : REF_I<EXTERNREF, externref>;
+
+foreach reg = [FUNCREF, EXTERNREF] in {
+def : Pat<(select (i32 (setne I32:$cond, 0)), reg:$lhs, reg:$rhs),
+ (!cast<Instruction>("SELECT_"#reg) reg:$lhs, reg:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), reg:$lhs, reg:$rhs),
+ (!cast<Instruction>("SELECT_"#reg) reg:$rhs, reg:$lhs, I32:$cond)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 4f3da2f35c61..9f3d0f4ab2c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -16,7 +16,9 @@ multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
list<dag> pattern_r, string asmstr_r = "",
string asmstr_s = "", bits<32> simdop = -1> {
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
- !or(0xfd00, !and(0xff, simdop))>,
+ !if(!ge(simdop, 0x100),
+ !or(0xfd0000, !and(0xffff, simdop)),
+ !or(0xfd00, !and(0xff, simdop)))>,
Requires<[HasSIMD128]>;
}
@@ -35,6 +37,99 @@ def ImmI#SIZE : ImmLeaf<i32,
foreach SIZE = [2, 4, 8, 16, 32] in
def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
+// Create vector with identical lanes: splat
+def splat2 : PatFrag<(ops node:$x), (build_vector $x, $x)>;
+def splat4 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x)>;
+def splat8 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x,
+ $x, $x, $x, $x)>;
+def splat16 : PatFrag<(ops node:$x),
+ (build_vector $x, $x, $x, $x, $x, $x, $x, $x,
+ $x, $x, $x, $x, $x, $x, $x, $x)>;
+
+class Vec {
+ ValueType vt;
+ ValueType int_vt;
+ ValueType lane_vt;
+ WebAssemblyRegClass lane_rc;
+ int lane_bits;
+ ImmLeaf lane_idx;
+ PatFrag splat;
+ string prefix;
+ Vec split;
+}
+
+def I8x16 : Vec {
+ let vt = v16i8;
+ let int_vt = vt;
+ let lane_vt = i32;
+ let lane_rc = I32;
+ let lane_bits = 8;
+ let lane_idx = LaneIdx16;
+ let splat = splat16;
+ let prefix = "i8x16";
+}
+
+def I16x8 : Vec {
+ let vt = v8i16;
+ let int_vt = vt;
+ let lane_vt = i32;
+ let lane_rc = I32;
+ let lane_bits = 16;
+ let lane_idx = LaneIdx8;
+ let splat = splat8;
+ let prefix = "i16x8";
+ let split = I8x16;
+}
+
+def I32x4 : Vec {
+ let vt = v4i32;
+ let int_vt = vt;
+ let lane_vt = i32;
+ let lane_rc = I32;
+ let lane_bits = 32;
+ let lane_idx = LaneIdx4;
+ let splat = splat4;
+ let prefix = "i32x4";
+ let split = I16x8;
+}
+
+def I64x2 : Vec {
+ let vt = v2i64;
+ let int_vt = vt;
+ let lane_vt = i64;
+ let lane_rc = I64;
+ let lane_bits = 64;
+ let lane_idx = LaneIdx2;
+ let splat = splat2;
+ let prefix = "i64x2";
+ let split = I32x4;
+}
+
+def F32x4 : Vec {
+ let vt = v4f32;
+ let int_vt = v4i32;
+ let lane_vt = f32;
+ let lane_rc = F32;
+ let lane_bits = 32;
+ let lane_idx = LaneIdx4;
+ let splat = splat4;
+ let prefix = "f32x4";
+}
+
+def F64x2 : Vec {
+ let vt = v2f64;
+ let int_vt = v2i64;
+ let lane_vt = f64;
+ let lane_rc = F64;
+ let lane_bits = 64;
+ let lane_idx = LaneIdx2;
+ let splat = splat2;
+ let prefix = "f64x2";
+}
+
+defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2];
+defvar IntVecs = [I8x16, I16x8, I32x4, I64x2];
+
//===----------------------------------------------------------------------===//
// Load and store
//===----------------------------------------------------------------------===//
@@ -53,116 +148,186 @@ defm LOAD_V128_A64 :
"v128.load\t$off$p2align", 0>;
}
-// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
-foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
-defm : LoadPatNoOffset<vec_t, load, "LOAD_V128">;
-defm : LoadPatImmOff<vec_t, load, regPlusImm, "LOAD_V128">;
-defm : LoadPatImmOff<vec_t, load, or_is_add, "LOAD_V128">;
-defm : LoadPatOffsetOnly<vec_t, load, "LOAD_V128">;
-defm : LoadPatGlobalAddrOffOnly<vec_t, load, "LOAD_V128">;
+// Def load patterns from WebAssemblyInstrMemory.td for vector types
+foreach vec = AllVecs in {
+defm : LoadPatNoOffset<vec.vt, load, "LOAD_V128">;
+defm : LoadPatImmOff<vec.vt, load, regPlusImm, "LOAD_V128">;
+defm : LoadPatImmOff<vec.vt, load, or_is_add, "LOAD_V128">;
+defm : LoadPatOffsetOnly<vec.vt, load, "LOAD_V128">;
+defm : LoadPatGlobalAddrOffOnly<vec.vt, load, "LOAD_V128">;
}
-// vNxM.load_splat
-multiclass SIMDLoadSplat<string vec, bits<32> simdop> {
+// v128.loadX_splat
+multiclass SIMDLoadSplat<int size, bits<32> simdop> {
let mayLoad = 1, UseNamedOperandTable = 1 in {
- defm LOAD_SPLAT_#vec#_A32 :
+ defm LOAD#size#_SPLAT_A32 :
SIMD_I<(outs V128:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs),
(ins P2Align:$p2align, offset32_op:$off), [],
- vec#".load_splat\t$dst, ${off}(${addr})$p2align",
- vec#".load_splat\t$off$p2align", simdop>;
- defm LOAD_SPLAT_#vec#_A64 :
+ "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
+ "v128.load"#size#"_splat\t$off$p2align", simdop>;
+ defm LOAD#size#_SPLAT_A64 :
SIMD_I<(outs V128:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
(outs),
(ins P2Align:$p2align, offset64_op:$off), [],
- vec#".load_splat\t$dst, ${off}(${addr})$p2align",
- vec#".load_splat\t$off$p2align", simdop>;
+ "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
+ "v128.load"#size#"_splat\t$off$p2align", simdop>;
}
}
-defm "" : SIMDLoadSplat<"v8x16", 7>;
-defm "" : SIMDLoadSplat<"v16x8", 8>;
-defm "" : SIMDLoadSplat<"v32x4", 9>;
-defm "" : SIMDLoadSplat<"v64x2", 10>;
+defm "" : SIMDLoadSplat<8, 7>;
+defm "" : SIMDLoadSplat<16, 8>;
+defm "" : SIMDLoadSplat<32, 9>;
+defm "" : SIMDLoadSplat<64, 10>;
def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>;
def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>;
-foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"],
- ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in {
-defm : LoadPatNoOffset<!cast<ValueType>(args[0]),
- load_splat,
- "LOAD_SPLAT_"#args[1]>;
-defm : LoadPatImmOff<!cast<ValueType>(args[0]),
- load_splat,
- regPlusImm,
- "LOAD_SPLAT_"#args[1]>;
-defm : LoadPatImmOff<!cast<ValueType>(args[0]),
- load_splat,
- or_is_add,
- "LOAD_SPLAT_"#args[1]>;
-defm : LoadPatOffsetOnly<!cast<ValueType>(args[0]),
- load_splat,
- "LOAD_SPLAT_"#args[1]>;
-defm : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]),
- load_splat,
- "LOAD_SPLAT_"#args[1]>;
+foreach vec = AllVecs in {
+defvar inst = "LOAD"#vec.lane_bits#"_SPLAT";
+defm : LoadPatNoOffset<vec.vt, load_splat, inst>;
+defm : LoadPatImmOff<vec.vt, load_splat, regPlusImm, inst>;
+defm : LoadPatImmOff<vec.vt, load_splat, or_is_add, inst>;
+defm : LoadPatOffsetOnly<vec.vt, load_splat, inst>;
+defm : LoadPatGlobalAddrOffOnly<vec.vt, load_splat, inst>;
}
// Load and extend
-multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> {
+multiclass SIMDLoadExtend<Vec vec, string loadPat, bits<32> simdop> {
+ defvar signed = vec.prefix#".load"#loadPat#"_s";
+ defvar unsigned = vec.prefix#".load"#loadPat#"_u";
let mayLoad = 1, UseNamedOperandTable = 1 in {
- defm LOAD_EXTEND_S_#vec_t#_A32 :
+ defm LOAD_EXTEND_S_#vec#_A32 :
SIMD_I<(outs V128:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
- name#"_s\t$dst, ${off}(${addr})$p2align",
- name#"_s\t$off$p2align", simdop>;
- defm LOAD_EXTEND_U_#vec_t#_A32 :
+ signed#"\t$dst, ${off}(${addr})$p2align",
+ signed#"\t$off$p2align", simdop>;
+ defm LOAD_EXTEND_U_#vec#_A32 :
SIMD_I<(outs V128:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
- name#"_u\t$dst, ${off}(${addr})$p2align",
- name#"_u\t$off$p2align", !add(simdop, 1)>;
- defm LOAD_EXTEND_S_#vec_t#_A64 :
+ unsigned#"\t$dst, ${off}(${addr})$p2align",
+ unsigned#"\t$off$p2align", !add(simdop, 1)>;
+ defm LOAD_EXTEND_S_#vec#_A64 :
SIMD_I<(outs V128:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
- name#"_s\t$dst, ${off}(${addr})$p2align",
- name#"_s\t$off$p2align", simdop>;
- defm LOAD_EXTEND_U_#vec_t#_A64 :
+ signed#"\t$dst, ${off}(${addr})$p2align",
+ signed#"\t$off$p2align", simdop>;
+ defm LOAD_EXTEND_U_#vec#_A64 :
SIMD_I<(outs V128:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
- name#"_u\t$dst, ${off}(${addr})$p2align",
- name#"_u\t$off$p2align", !add(simdop, 1)>;
+ unsigned#"\t$dst, ${off}(${addr})$p2align",
+ unsigned#"\t$off$p2align", !add(simdop, 1)>;
}
}
-defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 1>;
-defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 3>;
-defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 5>;
+defm "" : SIMDLoadExtend<I16x8, "8x8", 1>;
+defm "" : SIMDLoadExtend<I32x4, "16x4", 3>;
+defm "" : SIMDLoadExtend<I64x2, "32x2", 5>;
+
+foreach vec = [I16x8, I32x4, I64x2] in
+foreach exts = [["sextloadvi", "_S"],
+ ["zextloadvi", "_U"],
+ ["extloadvi", "_U"]] in {
+defvar loadpat = !cast<PatFrag>(exts[0]#vec.split.lane_bits);
+defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec;
+defm : LoadPatNoOffset<vec.vt, loadpat, inst>;
+defm : LoadPatImmOff<vec.vt, loadpat, regPlusImm, inst>;
+defm : LoadPatImmOff<vec.vt, loadpat, or_is_add, inst>;
+defm : LoadPatOffsetOnly<vec.vt, loadpat, inst>;
+defm : LoadPatGlobalAddrOffOnly<vec.vt, loadpat, inst>;
+}
+
+// Load lane into zero vector
+multiclass SIMDLoadZero<Vec vec, bits<32> simdop> {
+ defvar name = "v128.load"#vec.lane_bits#"_zero";
+ let mayLoad = 1, UseNamedOperandTable = 1 in {
+ defm LOAD_ZERO_#vec#_A32 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ name#"\t$dst, ${off}(${addr})$p2align",
+ name#"\t$off$p2align", simdop>;
+ defm LOAD_ZERO_#vec#_A64 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ name#"\t$dst, ${off}(${addr})$p2align",
+ name#"\t$off$p2align", simdop>;
+ } // mayLoad = 1, UseNamedOperandTable = 1
+}
+
+// TODO: Also support v4f32 and v2f64 once the instructions are merged
+// to the proposal
+defm "" : SIMDLoadZero<I32x4, 252>;
+defm "" : SIMDLoadZero<I64x2, 253>;
+
+foreach vec = [I32x4, I64x2] in {
+defvar loadpat = !cast<Intrinsic>("int_wasm_load"#vec.lane_bits#"_zero");
+defvar inst = "LOAD_ZERO_"#vec;
+defm : LoadPatNoOffset<vec.vt, loadpat, inst>;
+defm : LoadPatImmOff<vec.vt, loadpat, regPlusImm, inst>;
+defm : LoadPatImmOff<vec.vt, loadpat, or_is_add, inst>;
+defm : LoadPatOffsetOnly<vec.vt, loadpat, inst>;
+defm : LoadPatGlobalAddrOffOnly<vec.vt, loadpat, inst>;
+}
+
+// Load lane
+multiclass SIMDLoadLane<Vec vec, bits<32> simdop> {
+ defvar name = "v128.load"#vec.lane_bits#"_lane";
+ let mayLoad = 1, UseNamedOperandTable = 1 in {
+ defm LOAD_LANE_#vec#_A32 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx,
+ I32:$addr, V128:$vec),
+ (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx),
+ [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx",
+ name#"\t$off$p2align, $idx", simdop>;
+ defm LOAD_LANE_#vec#_A64 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx,
+ I64:$addr, V128:$vec),
+ (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx),
+ [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx",
+ name#"\t$off$p2align, $idx", simdop>;
+ } // mayLoad = 1, UseNamedOperandTable = 1
+}
-foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in
-foreach exts = [["sextloadv", "_S"],
- ["zextloadv", "_U"],
- ["extloadv", "_U"]] in {
-defm : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]),
- "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
-defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm,
- "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
-defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add,
- "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
-defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
- "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
-defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
- "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
+// TODO: Also support v4f32 and v2f64 once the instructions are merged
+// to the proposal
+defm "" : SIMDLoadLane<I8x16, 88>;
+defm "" : SIMDLoadLane<I16x8, 89>;
+defm "" : SIMDLoadLane<I32x4, 90>;
+defm "" : SIMDLoadLane<I64x2, 91>;
+
+// Select loads with no constant offset.
+multiclass LoadLanePatNoOffset<Vec vec, PatFrag kind> {
+ defvar load_lane_a32 = !cast<NI>("LOAD_LANE_"#vec#"_A32");
+ defvar load_lane_a64 = !cast<NI>("LOAD_LANE_"#vec#"_A64");
+ def : Pat<(vec.vt (kind (i32 I32:$addr),
+ (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))),
+ (load_lane_a32 0, 0, imm:$idx, $addr, $vec)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(vec.vt (kind (i64 I64:$addr),
+ (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))),
+ (load_lane_a64 0, 0, imm:$idx, $addr, $vec)>,
+ Requires<[HasAddr64]>;
}
+defm : LoadLanePatNoOffset<I8x16, int_wasm_load8_lane>;
+defm : LoadLanePatNoOffset<I16x8, int_wasm_load16_lane>;
+defm : LoadLanePatNoOffset<I32x4, int_wasm_load32_lane>;
+defm : LoadLanePatNoOffset<I64x2, int_wasm_load64_lane>;
+
+// TODO: Also support the other load patterns for load_lane once the instructions
+// are merged to the proposal.
// Store: v128.store
let mayStore = 1, UseNamedOperandTable = 1 in {
@@ -177,30 +342,77 @@ defm STORE_V128_A64 :
"v128.store\t${off}(${addr})$p2align, $vec",
"v128.store\t$off$p2align", 11>;
}
-foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
-// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
-defm : StorePatNoOffset<vec_t, store, "STORE_V128">;
-defm : StorePatImmOff<vec_t, store, regPlusImm, "STORE_V128">;
-defm : StorePatImmOff<vec_t, store, or_is_add, "STORE_V128">;
-defm : StorePatOffsetOnly<vec_t, store, "STORE_V128">;
-defm : StorePatGlobalAddrOffOnly<vec_t, store, "STORE_V128">;
+
+// Def store patterns from WebAssemblyInstrMemory.td for vector types
+foreach vec = AllVecs in {
+defm : StorePatNoOffset<vec.vt, store, "STORE_V128">;
+defm : StorePatImmOff<vec.vt, store, regPlusImm, "STORE_V128">;
+defm : StorePatImmOff<vec.vt, store, or_is_add, "STORE_V128">;
+defm : StorePatOffsetOnly<vec.vt, store, "STORE_V128">;
+defm : StorePatGlobalAddrOffOnly<vec.vt, store, "STORE_V128">;
+}
+
+// Store lane
+multiclass SIMDStoreLane<Vec vec, bits<32> simdop> {
+ defvar name = "v128.store"#vec.lane_bits#"_lane";
+ let mayStore = 1, UseNamedOperandTable = 1 in {
+ defm STORE_LANE_#vec#_A32 :
+ SIMD_I<(outs),
+ (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx,
+ I32:$addr, V128:$vec),
+ (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx),
+ [], name#"\t${off}(${addr})$p2align, $vec, $idx",
+ name#"\t$off$p2align, $idx", simdop>;
+ defm STORE_LANE_#vec#_A64 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx,
+ I64:$addr, V128:$vec),
+ (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx),
+ [], name#"\t${off}(${addr})$p2align, $vec, $idx",
+ name#"\t$off$p2align, $idx", simdop>;
+ } // mayStore = 1, UseNamedOperandTable = 1
+}
+
+// TODO: Also support v4f32 and v2f64 once the instructions are merged
+// to the proposal
+defm "" : SIMDStoreLane<I8x16, 92>;
+defm "" : SIMDStoreLane<I16x8, 93>;
+defm "" : SIMDStoreLane<I32x4, 94>;
+defm "" : SIMDStoreLane<I64x2, 95>;
+
+// Select stores with no constant offset.
+multiclass StoreLanePatNoOffset<Vec vec, PatFrag kind> {
+ def : Pat<(kind (i32 I32:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)),
+ (!cast<NI>("STORE_LANE_"#vec#"_A32") 0, 0, imm:$idx, $addr, $vec)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind (i64 I64:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)),
+ (!cast<NI>("STORE_LANE_"#vec#"_A64") 0, 0, imm:$idx, $addr, $vec)>,
+ Requires<[HasAddr64]>;
}
+defm : StoreLanePatNoOffset<I8x16, int_wasm_store8_lane>;
+defm : StoreLanePatNoOffset<I16x8, int_wasm_store16_lane>;
+defm : StoreLanePatNoOffset<I32x4, int_wasm_store32_lane>;
+defm : StoreLanePatNoOffset<I64x2, int_wasm_store64_lane>;
+
+// TODO: Also support the other store patterns for store_lane once the
+// instructions are merged to the proposal.
+
//===----------------------------------------------------------------------===//
// Constructing SIMD values
//===----------------------------------------------------------------------===//
// Constant: v128.const
-multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
+multiclass ConstVec<Vec vec, dag ops, dag pat, string args> {
let isMoveImm = 1, isReMaterializable = 1,
Predicates = [HasUnimplementedSIMD128] in
- defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
- [(set V128:$dst, (vec_t pat))],
- "v128.const\t$dst, "#args,
- "v128.const\t"#args, 12>;
+ defm CONST_V128_#vec : SIMD_I<(outs V128:$dst), ops, (outs), ops,
+ [(set V128:$dst, (vec.vt pat))],
+ "v128.const\t$dst, "#args,
+ "v128.const\t"#args, 12>;
}
-defm "" : ConstVec<v16i8,
+defm "" : ConstVec<I8x16,
(ins vec_i8imm_op:$i0, vec_i8imm_op:$i1,
vec_i8imm_op:$i2, vec_i8imm_op:$i3,
vec_i8imm_op:$i4, vec_i8imm_op:$i5,
@@ -215,7 +427,7 @@ defm "" : ConstVec<v16i8,
ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF),
!strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ",
"$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>;
-defm "" : ConstVec<v8i16,
+defm "" : ConstVec<I16x8,
(ins vec_i16imm_op:$i0, vec_i16imm_op:$i1,
vec_i16imm_op:$i2, vec_i16imm_op:$i3,
vec_i16imm_op:$i4, vec_i16imm_op:$i5,
@@ -225,23 +437,23 @@ defm "" : ConstVec<v8i16,
ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
"$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
let IsCanonical = 1 in
-defm "" : ConstVec<v4i32,
+defm "" : ConstVec<I32x4,
(ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
vec_i32imm_op:$i2, vec_i32imm_op:$i3),
(build_vector (i32 imm:$i0), (i32 imm:$i1),
(i32 imm:$i2), (i32 imm:$i3)),
"$i0, $i1, $i2, $i3">;
-defm "" : ConstVec<v2i64,
+defm "" : ConstVec<I64x2,
(ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
(build_vector (i64 imm:$i0), (i64 imm:$i1)),
"$i0, $i1">;
-defm "" : ConstVec<v4f32,
+defm "" : ConstVec<F32x4,
(ins f32imm_op:$i0, f32imm_op:$i1,
f32imm_op:$i2, f32imm_op:$i3),
(build_vector (f32 fpimm:$i0), (f32 fpimm:$i1),
(f32 fpimm:$i2), (f32 fpimm:$i3)),
"$i0, $i1, $i2, $i3">;
-defm "" : ConstVec<v2f64,
+defm "" : ConstVec<F64x2,
(ins f64imm_op:$i0, f64imm_op:$i1),
(build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
"$i0, $i1">;
@@ -269,10 +481,10 @@ defm SHUFFLE :
vec_i8imm_op:$mC, vec_i8imm_op:$mD,
vec_i8imm_op:$mE, vec_i8imm_op:$mF),
[],
- "v8x16.shuffle\t$dst, $x, $y, "#
+ "i8x16.shuffle\t$dst, $x, $y, "#
"$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
"$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
- "v8x16.shuffle\t"#
+ "i8x16.shuffle\t"#
"$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
"$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
13>;
@@ -280,8 +492,8 @@ defm SHUFFLE :
// Shuffles after custom lowering
def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
-foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
-def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
+foreach vec = AllVecs in {
+def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y),
(i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
(i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
(i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
@@ -290,178 +502,150 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
(i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
(i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
(i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))),
- (vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y),
- (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
- (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
- (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
- (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
- (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
- (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
- (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
- (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>;
+ (SHUFFLE $x, $y,
+ imm:$m0, imm:$m1, imm:$m2, imm:$m3,
+ imm:$m4, imm:$m5, imm:$m6, imm:$m7,
+ imm:$m8, imm:$m9, imm:$mA, imm:$mB,
+ imm:$mC, imm:$mD, imm:$mE, imm:$mF)>;
}
-// Swizzle lanes: v8x16.swizzle
+// Swizzle lanes: i8x16.swizzle
def wasm_swizzle_t : SDTypeProfile<1, 2, []>;
def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>;
defm SWIZZLE :
SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
[(set (v16i8 V128:$dst),
(wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
- "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>;
+ "i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>;
def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)),
- (SWIZZLE V128:$src, V128:$mask)>;
+ (SWIZZLE $src, $mask)>;
+
+multiclass Splat<Vec vec, bits<32> simdop> {
+ defm SPLAT_#vec : SIMD_I<(outs V128:$dst), (ins vec.lane_rc:$x),
+ (outs), (ins),
+ [(set (vec.vt V128:$dst),
+ (vec.splat vec.lane_rc:$x))],
+ vec.prefix#".splat\t$dst, $x", vec.prefix#".splat",
+ simdop>;
+}
-// Create vector with identical lanes: splat
-def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>;
-def splat4 : PatFrag<(ops node:$x), (build_vector
- node:$x, node:$x, node:$x, node:$x)>;
-def splat8 : PatFrag<(ops node:$x), (build_vector
- node:$x, node:$x, node:$x, node:$x,
- node:$x, node:$x, node:$x, node:$x)>;
-def splat16 : PatFrag<(ops node:$x), (build_vector
- node:$x, node:$x, node:$x, node:$x,
- node:$x, node:$x, node:$x, node:$x,
- node:$x, node:$x, node:$x, node:$x,
- node:$x, node:$x, node:$x, node:$x)>;
-
-multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
- PatFrag splat_pat, bits<32> simdop> {
- defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins),
- [(set (vec_t V128:$dst), (splat_pat reg_t:$x))],
- vec#".splat\t$dst, $x", vec#".splat", simdop>;
-}
-
-defm "" : Splat<v16i8, "i8x16", I32, splat16, 15>;
-defm "" : Splat<v8i16, "i16x8", I32, splat8, 16>;
-defm "" : Splat<v4i32, "i32x4", I32, splat4, 17>;
-defm "" : Splat<v2i64, "i64x2", I64, splat2, 18>;
-defm "" : Splat<v4f32, "f32x4", F32, splat4, 19>;
-defm "" : Splat<v2f64, "f64x2", F64, splat2, 20>;
+defm "" : Splat<I8x16, 15>;
+defm "" : Splat<I16x8, 16>;
+defm "" : Splat<I32x4, 17>;
+defm "" : Splat<I64x2, 18>;
+defm "" : Splat<F32x4, 19>;
+defm "" : Splat<F64x2, 20>;
// scalar_to_vector leaves high lanes undefined, so can be a splat
-class ScalarSplatPat<ValueType vec_t, ValueType lane_t,
- WebAssemblyRegClass reg_t> :
- Pat<(vec_t (scalar_to_vector (lane_t reg_t:$x))),
- (!cast<Instruction>("SPLAT_"#vec_t) reg_t:$x)>;
-
-def : ScalarSplatPat<v16i8, i32, I32>;
-def : ScalarSplatPat<v8i16, i32, I32>;
-def : ScalarSplatPat<v4i32, i32, I32>;
-def : ScalarSplatPat<v2i64, i64, I64>;
-def : ScalarSplatPat<v4f32, f32, F32>;
-def : ScalarSplatPat<v2f64, f64, F64>;
+foreach vec = AllVecs in
+def : Pat<(vec.vt (scalar_to_vector (vec.lane_vt vec.lane_rc:$x))),
+ (!cast<Instruction>("SPLAT_"#vec) $x)>;
//===----------------------------------------------------------------------===//
// Accessing lanes
//===----------------------------------------------------------------------===//
// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
-multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
- bits<32> simdop, string suffix = ""> {
- defm EXTRACT_LANE_#vec_t#suffix :
- SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
+multiclass ExtractLane<Vec vec, bits<32> simdop, string suffix = ""> {
+ defm EXTRACT_LANE_#vec#suffix :
+ SIMD_I<(outs vec.lane_rc:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
(outs), (ins vec_i8imm_op:$idx), [],
- vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
- vec#".extract_lane"#suffix#"\t$idx", simdop>;
+ vec.prefix#".extract_lane"#suffix#"\t$dst, $vec, $idx",
+ vec.prefix#".extract_lane"#suffix#"\t$idx", simdop>;
}
-defm "" : ExtractLane<v16i8, "i8x16", I32, 21, "_s">;
-defm "" : ExtractLane<v16i8, "i8x16", I32, 22, "_u">;
-defm "" : ExtractLane<v8i16, "i16x8", I32, 24, "_s">;
-defm "" : ExtractLane<v8i16, "i16x8", I32, 25, "_u">;
-defm "" : ExtractLane<v4i32, "i32x4", I32, 27>;
-defm "" : ExtractLane<v2i64, "i64x2", I64, 29>;
-defm "" : ExtractLane<v4f32, "f32x4", F32, 31>;
-defm "" : ExtractLane<v2f64, "f64x2", F64, 33>;
+defm "" : ExtractLane<I8x16, 21, "_s">;
+defm "" : ExtractLane<I8x16, 22, "_u">;
+defm "" : ExtractLane<I16x8, 24, "_s">;
+defm "" : ExtractLane<I16x8, 25, "_u">;
+defm "" : ExtractLane<I32x4, 27>;
+defm "" : ExtractLane<I64x2, 29>;
+defm "" : ExtractLane<F32x4, 31>;
+defm "" : ExtractLane<F64x2, 33>;
def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)),
- (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>;
def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)),
- (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>;
def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)),
- (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_I32x4 $vec, imm:$idx)>;
def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)),
- (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_F32x4 $vec, imm:$idx)>;
def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)),
- (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_I64x2 $vec, imm:$idx)>;
def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)),
- (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_F64x2 $vec, imm:$idx)>;
def : Pat<
(sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8),
- (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_I8x16_s $vec, imm:$idx)>;
def : Pat<
(and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)),
- (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>;
def : Pat<
(sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16),
- (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_I16x8_s $vec, imm:$idx)>;
def : Pat<
(and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)),
- (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
+ (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>;
// Replace lane value: replace_lane
-multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
- WebAssemblyRegClass reg_t, ValueType lane_t,
- bits<32> simdop> {
- defm REPLACE_LANE_#vec_t :
- SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x),
- (outs), (ins vec_i8imm_op:$idx),
- [(set V128:$dst, (vector_insert
- (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))],
- vec#".replace_lane\t$dst, $vec, $idx, $x",
- vec#".replace_lane\t$idx", simdop>;
-}
-
-defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 23>;
-defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 26>;
-defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 28>;
-defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 30>;
-defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 32>;
-defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 34>;
+multiclass ReplaceLane<Vec vec, bits<32> simdop> {
+ defm REPLACE_LANE_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, vec.lane_rc:$x),
+ (outs), (ins vec_i8imm_op:$idx),
+ [(set V128:$dst, (vector_insert
+ (vec.vt V128:$vec),
+ (vec.lane_vt vec.lane_rc:$x),
+ (i32 vec.lane_idx:$idx)))],
+ vec.prefix#".replace_lane\t$dst, $vec, $idx, $x",
+ vec.prefix#".replace_lane\t$idx", simdop>;
+}
+
+defm "" : ReplaceLane<I8x16, 23>;
+defm "" : ReplaceLane<I16x8, 26>;
+defm "" : ReplaceLane<I32x4, 28>;
+defm "" : ReplaceLane<I64x2, 30>;
+defm "" : ReplaceLane<F32x4, 32>;
+defm "" : ReplaceLane<F64x2, 34>;
// Lower undef lane indices to zero
def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
- (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>;
+ (REPLACE_LANE_I8x16 $vec, 0, $x)>;
def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
- (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>;
+ (REPLACE_LANE_I16x8 $vec, 0, $x)>;
def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
- (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>;
+ (REPLACE_LANE_I32x4 $vec, 0, $x)>;
def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
- (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>;
+ (REPLACE_LANE_I64x2 $vec, 0, $x)>;
def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
- (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>;
+ (REPLACE_LANE_F32x4 $vec, 0, $x)>;
def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
- (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>;
+ (REPLACE_LANE_F64x2 $vec, 0, $x)>;
//===----------------------------------------------------------------------===//
// Comparisons
//===----------------------------------------------------------------------===//
-multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec,
- string name, CondCode cond, bits<32> simdop> {
- defm _#vec_t :
+multiclass SIMDCondition<Vec vec, string name, CondCode cond, bits<32> simdop> {
+ defm _#vec :
SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
- [(set (out_t V128:$dst),
- (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond)
- )],
- vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>;
+ [(set (vec.int_vt V128:$dst),
+ (setcc (vec.vt V128:$lhs), (vec.vt V128:$rhs), cond))],
+ vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
+ vec.prefix#"."#name, simdop>;
}
multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> {
- defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>;
- defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond,
- !add(baseInst, 10)>;
- defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond,
- !add(baseInst, 20)>;
+ defm "" : SIMDCondition<I8x16, name, cond, baseInst>;
+ defm "" : SIMDCondition<I16x8, name, cond, !add(baseInst, 10)>;
+ defm "" : SIMDCondition<I32x4, name, cond, !add(baseInst, 20)>;
}
multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
- defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>;
- defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond,
- !add(baseInst, 6)>;
+ defm "" : SIMDCondition<F32x4, name, cond, baseInst>;
+ defm "" : SIMDCondition<F64x2, name, cond, !add(baseInst, 6)>;
}
// Equality: eq
@@ -499,108 +683,157 @@ defm GE : SIMDConditionFP<"ge", SETOGE, 70>;
// Lower float comparisons that don't care about NaN to standard WebAssembly
// float comparisons. These instructions are generated with nnan and in the
// target-independent expansion of unordered comparisons and ordered ne.
-foreach nodes = [[seteq, EQ_v4f32], [setne, NE_v4f32], [setlt, LT_v4f32],
- [setgt, GT_v4f32], [setle, LE_v4f32], [setge, GE_v4f32]] in
+foreach nodes = [[seteq, EQ_F32x4], [setne, NE_F32x4], [setlt, LT_F32x4],
+ [setgt, GT_F32x4], [setle, LE_F32x4], [setge, GE_F32x4]] in
def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
- (v4i32 (nodes[1] (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
+ (nodes[1] $lhs, $rhs)>;
-foreach nodes = [[seteq, EQ_v2f64], [setne, NE_v2f64], [setlt, LT_v2f64],
- [setgt, GT_v2f64], [setle, LE_v2f64], [setge, GE_v2f64]] in
+foreach nodes = [[seteq, EQ_F64x2], [setne, NE_F64x2], [setlt, LT_F64x2],
+ [setgt, GT_F64x2], [setle, LE_F64x2], [setge, GE_F64x2]] in
def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
- (v2i64 (nodes[1] (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+ (nodes[1] $lhs, $rhs)>;
+
+// Prototype i64x2.eq
+defm EQ_v2i64 :
+ SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
+ [(set (v2i64 V128:$dst),
+ (int_wasm_eq (v2i64 V128:$lhs), (v2i64 V128:$rhs)))],
+ "i64x2.eq\t$dst, $lhs, $rhs", "i64x2.eq", 192>;
//===----------------------------------------------------------------------===//
// Bitwise operations
//===----------------------------------------------------------------------===//
-multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name,
- bits<32> simdop> {
- defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
- (outs), (ins),
- [(set (vec_t V128:$dst),
- (node (vec_t V128:$lhs), (vec_t V128:$rhs))
- )],
- vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name,
- simdop>;
+multiclass SIMDBinary<Vec vec, SDNode node, string name, bits<32> simdop> {
+ defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+ (outs), (ins),
+ [(set (vec.vt V128:$dst),
+ (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))],
+ vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
+ vec.prefix#"."#name, simdop>;
}
-multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> {
- defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>;
- defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>;
- defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>;
- defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>;
+multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop, bit commutable = false> {
+ let isCommutable = commutable in
+ defm "" : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+ (outs), (ins), [],
+ "v128."#name#"\t$dst, $lhs, $rhs", "v128."#name, simdop>;
+ foreach vec = IntVecs in
+ def : Pat<(node (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
+ (!cast<NI>(NAME) $lhs, $rhs)>;
}
-multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name,
- bits<32> simdop> {
- defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
- [(set (vec_t V128:$dst),
- (vec_t (node (vec_t V128:$vec)))
- )],
- vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
+multiclass SIMDUnary<Vec vec, SDNode node, string name, bits<32> simdop> {
+ defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins),
+ [(set (vec.vt V128:$dst),
+ (vec.vt (node (vec.vt V128:$v))))],
+ vec.prefix#"."#name#"\t$dst, $v",
+ vec.prefix#"."#name, simdop>;
}
// Bitwise logic: v128.not
-foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
-defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 77>;
+defm NOT : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), [],
+ "v128.not\t$dst, $v", "v128.not", 77>;
+foreach vec = IntVecs in
+def : Pat<(vnot (vec.vt V128:$v)), (NOT $v)>;
// Bitwise logic: v128.and / v128.or / v128.xor
-let isCommutable = 1 in {
-defm AND : SIMDBitwise<and, "and", 78>;
-defm OR : SIMDBitwise<or, "or", 80>;
-defm XOR : SIMDBitwise<xor, "xor", 81>;
-} // isCommutable = 1
+defm AND : SIMDBitwise<and, "and", 78, true>;
+defm OR : SIMDBitwise<or, "or", 80, true>;
+defm XOR : SIMDBitwise<xor, "xor", 81, true>;
// Bitwise logic: v128.andnot
def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>;
defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>;
// Bitwise select: v128.bitselect
-foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
- defm BITSELECT_#vec_t :
- SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins),
- [(set (vec_t V128:$dst),
- (vec_t (int_wasm_bitselect
- (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c)
- ))
- )],
- "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>;
+defm BITSELECT :
+ SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), [],
+ "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>;
+
+foreach vec = AllVecs in
+def : Pat<(vec.vt (int_wasm_bitselect
+ (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))),
+ (BITSELECT $v1, $v2, $c)>;
// Bitselect is equivalent to (c & v1) | (~c & v2)
-foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
- def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)),
- (and (vnot V128:$c), (vec_t V128:$v2)))),
- (!cast<Instruction>("BITSELECT_"#vec_t)
- V128:$v1, V128:$v2, V128:$c)>;
+foreach vec = IntVecs in
+def : Pat<(vec.vt (or (and (vec.vt V128:$c), (vec.vt V128:$v1)),
+ (and (vnot V128:$c), (vec.vt V128:$v2)))),
+ (BITSELECT $v1, $v2, $c)>;
+
+// Also implement vselect in terms of bitselect
+foreach vec = AllVecs in
+def : Pat<(vec.vt (vselect
+ (vec.int_vt V128:$c), (vec.vt V128:$v1), (vec.vt V128:$v2))),
+ (BITSELECT $v1, $v2, $c)>;
+
+// MVP select on v128 values
+defm SELECT_V128 :
+ I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), (outs), (ins), [],
+ "v128.select\t$dst, $lhs, $rhs, $cond", "v128.select", 0x1b>;
+
+foreach vec = AllVecs in {
+def : Pat<(select I32:$cond, (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
+ (SELECT_V128 $lhs, $rhs, $cond)>;
+
+// ISD::SELECT requires its operand to conform to getBooleanContents, but
+// WebAssembly's select interprets any non-zero value as true, so we can fold
+// a setne with 0 into a select.
+def : Pat<(select
+ (i32 (setne I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
+ (SELECT_V128 $lhs, $rhs, $cond)>;
+
+// And again, this time with seteq instead of setne and the arms reversed.
+def : Pat<(select
+ (i32 (seteq I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
+ (SELECT_V128 $rhs, $lhs, $cond)>;
+} // foreach vec
+
+// Sign select
+multiclass SIMDSignSelect<Vec vec, bits<32> simdop> {
+ defm SIGNSELECT_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst),
+ (vec.vt (int_wasm_signselect
+ (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))))],
+ vec.prefix#".signselect\t$dst, $v1, $v2, $c",
+ vec.prefix#".signselect", simdop>;
+}
+
+defm : SIMDSignSelect<I8x16, 125>;
+defm : SIMDSignSelect<I16x8, 126>;
+defm : SIMDSignSelect<I32x4, 127>;
+defm : SIMDSignSelect<I64x2, 148>;
//===----------------------------------------------------------------------===//
// Integer unary arithmetic
//===----------------------------------------------------------------------===//
multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> {
- defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>;
- defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
- defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
- defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
+ defm "" : SIMDUnary<I8x16, node, name, baseInst>;
+ defm "" : SIMDUnary<I16x8, node, name, !add(baseInst, 32)>;
+ defm "" : SIMDUnary<I32x4, node, name, !add(baseInst, 64)>;
+ defm "" : SIMDUnary<I64x2, node, name, !add(baseInst, 96)>;
}
-multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name,
- bits<32> simdop> {
- defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
- [(set I32:$dst, (i32 (op (vec_t V128:$vec))))],
- vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
+multiclass SIMDReduceVec<Vec vec, SDNode op, string name, bits<32> simdop> {
+ defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
+ [(set I32:$dst, (i32 (op (vec.vt V128:$vec))))],
+ vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name,
+ simdop>;
}
multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> {
- defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>;
- defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 32)>;
- defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 64)>;
- defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 96)>;
+ defm "" : SIMDReduceVec<I8x16, op, name, baseInst>;
+ defm "" : SIMDReduceVec<I16x8, op, name, !add(baseInst, 32)>;
+ defm "" : SIMDReduceVec<I32x4, op, name, !add(baseInst, 64)>;
+ defm "" : SIMDReduceVec<I64x2, op, name, !add(baseInst, 96)>;
}
// Integer vector negation
-def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
+def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, $in)>;
// Integer absolute value: abs
defm ABS : SIMDUnaryInt<abs, "abs", 96>;
@@ -614,64 +847,56 @@ defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 98>;
// All lanes true: all_true
defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 99>;
+// Population count: popcnt
+defm POPCNT : SIMDUnary<I8x16, int_wasm_popcnt, "popcnt", 124>;
+
// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1
// can be folded out
foreach reduction =
[["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in
-foreach ty = [v16i8, v8i16, v4i32, v2i64] in {
-def : Pat<(i32 (and
- (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
- (i32 1)
- )),
- (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
-def : Pat<(i32 (setne
- (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
- (i32 0)
- )),
- (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
-def : Pat<(i32 (seteq
- (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
- (i32 1)
- )),
- (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
-}
-
-multiclass SIMDBitmask<ValueType vec_t, string vec, bits<32> simdop> {
- defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
- [(set I32:$dst,
- (i32 (int_wasm_bitmask (vec_t V128:$vec)))
- )],
- vec#".bitmask\t$dst, $vec", vec#".bitmask", simdop>;
-}
-
-defm BITMASK : SIMDBitmask<v16i8, "i8x16", 100>;
-defm BITMASK : SIMDBitmask<v8i16, "i16x8", 132>;
-defm BITMASK : SIMDBitmask<v4i32, "i32x4", 164>;
+foreach vec = IntVecs in {
+defvar intrinsic = !cast<Intrinsic>(reduction[0]);
+defvar inst = !cast<NI>(reduction[1]#"_"#vec);
+def : Pat<(i32 (and (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
+def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>;
+def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
+}
+
+multiclass SIMDBitmask<Vec vec, bits<32> simdop> {
+ defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
+ [(set I32:$dst,
+ (i32 (int_wasm_bitmask (vec.vt V128:$vec))))],
+ vec.prefix#".bitmask\t$dst, $vec", vec.prefix#".bitmask",
+ simdop>;
+}
+
+defm BITMASK : SIMDBitmask<I8x16, 100>;
+defm BITMASK : SIMDBitmask<I16x8, 132>;
+defm BITMASK : SIMDBitmask<I32x4, 164>;
+defm BITMASK : SIMDBitmask<I64x2, 196>;
//===----------------------------------------------------------------------===//
// Bit shifts
//===----------------------------------------------------------------------===//
-multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, string name,
- bits<32> simdop> {
- defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x),
- (outs), (ins),
- [(set (vec_t V128:$dst), (node V128:$vec, I32:$x))],
- vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>;
+multiclass SIMDShift<Vec vec, SDNode node, string name, bits<32> simdop> {
+ defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), (outs), (ins),
+ [(set (vec.vt V128:$dst), (node V128:$vec, I32:$x))],
+ vec.prefix#"."#name#"\t$dst, $vec, $x",
+ vec.prefix#"."#name, simdop>;
}
multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> {
- defm "" : SIMDShift<v16i8, "i8x16", node, name, baseInst>;
- defm "" : SIMDShift<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
- defm "" : SIMDShift<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
- defm "" : SIMDShift<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
+ defm "" : SIMDShift<I8x16, node, name, baseInst>;
+ defm "" : SIMDShift<I16x8, node, name, !add(baseInst, 32)>;
+ defm "" : SIMDShift<I32x4, node, name, !add(baseInst, 64)>;
+ defm "" : SIMDShift<I64x2, node, name, !add(baseInst, 96)>;
}
// WebAssembly SIMD shifts are nonstandard in that the shift amount is
// an i32 rather than a vector, so they need custom nodes.
-def wasm_shift_t : SDTypeProfile<1, 2,
- [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]
->;
+def wasm_shift_t :
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
@@ -688,24 +913,24 @@ defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>;
//===----------------------------------------------------------------------===//
multiclass SIMDBinaryIntNoI8x16<SDNode node, string name, bits<32> baseInst> {
- defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
- defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
- defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
+ defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>;
+ defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>;
+ defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>;
}
multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> {
- defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>;
- defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
+ defm "" : SIMDBinary<I8x16, node, name, baseInst>;
+ defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>;
}
multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> {
defm "" : SIMDBinaryIntSmall<node, name, baseInst>;
- defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
+ defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>;
}
multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> {
defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
- defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
+ defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>;
}
// Integer addition: add / add_saturate_s / add_saturate_u
@@ -736,38 +961,74 @@ defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>;
// Integer unsigned rounding average: avgr_u
let isCommutable = 1 in {
-defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 123>;
-defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 155>;
+defm AVGR_U : SIMDBinary<I8x16, int_wasm_avgr_unsigned, "avgr_u", 123>;
+defm AVGR_U : SIMDBinary<I16x8, int_wasm_avgr_unsigned, "avgr_u", 155>;
}
-def add_nuw : PatFrag<(ops node:$lhs, node:$rhs),
- (add node:$lhs, node:$rhs),
+def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), (add $lhs, $rhs),
"return N->getFlags().hasNoUnsignedWrap();">;
-foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in
+foreach vec = [I8x16, I16x8] in {
+defvar inst = !cast<NI>("AVGR_U_"#vec);
def : Pat<(wasm_shr_u
(add_nuw
- (add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)),
- (nodes[1] (i32 1))
- ),
- (i32 1)
- ),
- (!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>;
+ (add_nuw (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
+ (vec.splat (i32 1))),
+ (i32 1)),
+ (inst $lhs, $rhs)>;
+}
// Widening dot product: i32x4.dot_i16x8_s
let isCommutable = 1 in
defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
[(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],
"i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
- 180>;
+ 186>;
+
+// Extending multiplication: extmul_{low,high}_P, extmul_high
+multiclass SIMDExtBinary<Vec vec, SDNode node, string name, bits<32> simdop> {
+ defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+ (outs), (ins),
+ [(set (vec.vt V128:$dst), (node
+ (vec.split.vt V128:$lhs),(vec.split.vt V128:$rhs)))],
+ vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
+ vec.prefix#"."#name, simdop>;
+}
+
+defm EXTMUL_LOW_S :
+ SIMDExtBinary<I16x8, int_wasm_extmul_low_signed, "extmul_low_i8x16_s", 154>;
+defm EXTMUL_HIGH_S :
+ SIMDExtBinary<I16x8, int_wasm_extmul_high_signed, "extmul_high_i8x16_s", 157>;
+defm EXTMUL_LOW_U :
+ SIMDExtBinary<I16x8, int_wasm_extmul_low_unsigned, "extmul_low_i8x16_u", 158>;
+defm EXTMUL_HIGH_U :
+ SIMDExtBinary<I16x8, int_wasm_extmul_high_unsigned, "extmul_high_i8x16_u", 159>;
+
+defm EXTMUL_LOW_S :
+ SIMDExtBinary<I32x4, int_wasm_extmul_low_signed, "extmul_low_i16x8_s", 187>;
+defm EXTMUL_HIGH_S :
+ SIMDExtBinary<I32x4, int_wasm_extmul_high_signed, "extmul_high_i16x8_s", 189>;
+defm EXTMUL_LOW_U :
+ SIMDExtBinary<I32x4, int_wasm_extmul_low_unsigned, "extmul_low_i16x8_u", 190>;
+defm EXTMUL_HIGH_U :
+ SIMDExtBinary<I32x4, int_wasm_extmul_high_unsigned, "extmul_high_i16x8_u", 191>;
+
+defm EXTMUL_LOW_S :
+ SIMDExtBinary<I64x2, int_wasm_extmul_low_signed, "extmul_low_i32x4_s", 210>;
+defm EXTMUL_HIGH_S :
+ SIMDExtBinary<I64x2, int_wasm_extmul_high_signed, "extmul_high_i32x4_s", 211>;
+defm EXTMUL_LOW_U :
+ SIMDExtBinary<I64x2, int_wasm_extmul_low_unsigned, "extmul_low_i32x4_u", 214>;
+defm EXTMUL_HIGH_U :
+ SIMDExtBinary<I64x2, int_wasm_extmul_high_unsigned, "extmul_high_i32x4_u", 215>;
//===----------------------------------------------------------------------===//
// Floating-point unary arithmetic
//===----------------------------------------------------------------------===//
multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
- defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>;
- defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>;
+ defm "" : SIMDUnary<F32x4, node, name, baseInst>;
+ defm "" : SIMDUnary<F64x2, node, name, !add(baseInst, 12)>;
}
// Absolute value: abs
@@ -780,22 +1041,22 @@ defm NEG : SIMDUnaryFP<fneg, "neg", 225>;
defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>;
// Rounding: ceil, floor, trunc, nearest
-defm CEIL : SIMDUnary<v4f32, "f32x4", int_wasm_ceil, "ceil", 216>;
-defm FLOOR : SIMDUnary<v4f32, "f32x4", int_wasm_floor, "floor", 217>;
-defm TRUNC: SIMDUnary<v4f32, "f32x4", int_wasm_trunc, "trunc", 218>;
-defm NEAREST: SIMDUnary<v4f32, "f32x4", int_wasm_nearest, "nearest", 219>;
-defm CEIL : SIMDUnary<v2f64, "f64x2", int_wasm_ceil, "ceil", 220>;
-defm FLOOR : SIMDUnary<v2f64, "f64x2", int_wasm_floor, "floor", 221>;
-defm TRUNC: SIMDUnary<v2f64, "f64x2", int_wasm_trunc, "trunc", 222>;
-defm NEAREST: SIMDUnary<v2f64, "f64x2", int_wasm_nearest, "nearest", 223>;
+defm CEIL : SIMDUnary<F32x4, int_wasm_ceil, "ceil", 216>;
+defm FLOOR : SIMDUnary<F32x4, int_wasm_floor, "floor", 217>;
+defm TRUNC: SIMDUnary<F32x4, int_wasm_trunc, "trunc", 218>;
+defm NEAREST: SIMDUnary<F32x4, int_wasm_nearest, "nearest", 219>;
+defm CEIL : SIMDUnary<F64x2, int_wasm_ceil, "ceil", 220>;
+defm FLOOR : SIMDUnary<F64x2, int_wasm_floor, "floor", 221>;
+defm TRUNC: SIMDUnary<F64x2, int_wasm_trunc, "trunc", 222>;
+defm NEAREST: SIMDUnary<F64x2, int_wasm_nearest, "nearest", 223>;
//===----------------------------------------------------------------------===//
// Floating-point binary arithmetic
//===----------------------------------------------------------------------===//
multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
- defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
- defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 12)>;
+ defm "" : SIMDBinary<F32x4, node, name, baseInst>;
+ defm "" : SIMDBinary<F64x2, node, name, !add(baseInst, 12)>;
}
// Addition: add
@@ -828,63 +1089,151 @@ defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>;
// Conversions
//===----------------------------------------------------------------------===//
-multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op,
- string name, bits<32> simdop> {
- defm op#_#vec_t#_#arg_t :
+multiclass SIMDConvert<Vec vec, Vec arg, SDNode op, string name,
+ bits<32> simdop> {
+ defm op#_#vec :
SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
- [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))],
- name#"\t$dst, $vec", name, simdop>;
+ [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
+ vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
}
// Floating point to integer with saturation: trunc_sat
-defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 248>;
-defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 249>;
+defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>;
+defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>;
// Integer to floating point: convert
-defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 250>;
-defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 251>;
+defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>;
+defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>;
+
+// Lower llvm.wasm.trunc.saturate.* to saturating instructions
+def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
+ (fp_to_sint_I32x4 $src)>;
+def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
+ (fp_to_uint_I32x4 $src)>;
// Widening operations
-multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg,
- bits<32> baseInst> {
- defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_signed,
- vec#".widen_low_"#arg#"_s", baseInst>;
- defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_signed,
- vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>;
- defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_unsigned,
- vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>;
- defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_unsigned,
- vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>;
+def widen_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+def widen_low_s : SDNode<"WebAssemblyISD::WIDEN_LOW_S", widen_t>;
+def widen_high_s : SDNode<"WebAssemblyISD::WIDEN_HIGH_S", widen_t>;
+def widen_low_u : SDNode<"WebAssemblyISD::WIDEN_LOW_U", widen_t>;
+def widen_high_u : SDNode<"WebAssemblyISD::WIDEN_HIGH_U", widen_t>;
+
+// TODO: refactor this to be uniform for i64x2 if the numbering is not changed.
+multiclass SIMDWiden<Vec vec, bits<32> baseInst> {
+ defm "" : SIMDConvert<vec, vec.split, widen_low_s,
+ "widen_low_"#vec.split.prefix#"_s", baseInst>;
+ defm "" : SIMDConvert<vec, vec.split, widen_high_s,
+ "widen_high_"#vec.split.prefix#"_s", !add(baseInst, 1)>;
+ defm "" : SIMDConvert<vec, vec.split, widen_low_u,
+ "widen_low_"#vec.split.prefix#"_u", !add(baseInst, 2)>;
+ defm "" : SIMDConvert<vec, vec.split, widen_high_u,
+ "widen_high_"#vec.split.prefix#"_u", !add(baseInst, 3)>;
}
-defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 135>;
-defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 167>;
+defm "" : SIMDWiden<I16x8, 135>;
+defm "" : SIMDWiden<I32x4, 167>;
+
+defm "" : SIMDConvert<I64x2, I32x4, int_wasm_widen_low_signed,
+ "widen_low_i32x4_s", 199>;
+defm "" : SIMDConvert<I64x2, I32x4, int_wasm_widen_high_signed,
+ "widen_high_i32x4_s", 200>;
+defm "" : SIMDConvert<I64x2, I32x4, int_wasm_widen_low_unsigned,
+ "widen_low_i32x4_u", 201>;
+defm "" : SIMDConvert<I64x2, I32x4, int_wasm_widen_high_unsigned,
+ "widen_high_i32x4_u", 202>;
// Narrowing operations
-multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg,
- bits<32> baseInst> {
- defm NARROW_S_#vec_t :
+multiclass SIMDNarrow<Vec vec, bits<32> baseInst> {
+ defvar name = vec.split.prefix#".narrow_"#vec.prefix;
+ defm NARROW_S_#vec.split :
SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
- [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed
- (arg_t V128:$low), (arg_t V128:$high))))],
- vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s",
- baseInst>;
- defm NARROW_U_#vec_t :
+ [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_signed
+ (vec.vt V128:$low), (vec.vt V128:$high))))],
+ name#"_s\t$dst, $low, $high", name#"_s", baseInst>;
+ defm NARROW_U_#vec.split :
SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
- [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned
- (arg_t V128:$low), (arg_t V128:$high))))],
- vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u",
- !add(baseInst, 1)>;
+ [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_unsigned
+ (vec.vt V128:$low), (vec.vt V128:$high))))],
+ name#"_u\t$dst, $low, $high", name#"_u", !add(baseInst, 1)>;
}
-defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 101>;
-defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 133>;
+defm "" : SIMDNarrow<I16x8, 101>;
+defm "" : SIMDNarrow<I32x4, 133>;
+
+// Use narrowing operations for truncating stores. Since the narrowing
+// operations are saturating instead of truncating, we need to mask
+// the stored values first.
+// TODO: Use consts instead of splats
+def store_v8i8_trunc_v8i16 :
+ OutPatFrag<(ops node:$val),
+ (EXTRACT_LANE_I64x2
+ (NARROW_U_I8x16
+ (AND (SPLAT_I32x4 (CONST_I32 0x00ff00ff)), node:$val),
+ $val), // Unused input
+ 0)>;
+
+def store_v4i16_trunc_v4i32 :
+ OutPatFrag<(ops node:$val),
+ (EXTRACT_LANE_I64x2
+ (NARROW_U_I16x8
+ (AND (SPLAT_I32x4 (CONST_I32 0x0000ffff)), node:$val),
+ $val), // Unused input
+ 0)>;
+
+// Store patterns adapted from WebAssemblyInstrMemory.td
+multiclass NarrowingStorePatNoOffset<Vec vec, OutPatFrag out> {
+ defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
+ def : Pat<(node vec.vt:$val, I32:$addr),
+ (STORE_I64_A32 0, 0, $addr, (out $val))>,
+ Requires<[HasAddr32]>;
+ def : Pat<(node vec.vt:$val, I64:$addr),
+ (STORE_I64_A64 0, 0, $addr, (out $val))>,
+ Requires<[HasAddr64]>;
+}
-// Lower llvm.wasm.trunc.saturate.* to saturating instructions
-def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
- (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
-def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
- (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
+defm : NarrowingStorePatNoOffset<I16x8, store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatNoOffset<I32x4, store_v4i16_trunc_v4i32>;
+
+multiclass NarrowingStorePatImmOff<Vec vec, PatFrag operand, OutPatFrag out> {
+ defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
+ def : Pat<(node vec.vt:$val, (operand I32:$addr, imm:$off)),
+ (STORE_I64_A32 0, imm:$off, $addr, (out $val))>,
+ Requires<[HasAddr32]>;
+ def : Pat<(node vec.vt:$val, (operand I64:$addr, imm:$off)),
+ (STORE_I64_A64 0, imm:$off, $addr, (out $val))>,
+ Requires<[HasAddr64]>;
+}
+
+defm : NarrowingStorePatImmOff<I16x8, regPlusImm, store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatImmOff<I32x4, regPlusImm, store_v4i16_trunc_v4i32>;
+defm : NarrowingStorePatImmOff<I16x8, or_is_add, store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatImmOff<I32x4, or_is_add, store_v4i16_trunc_v4i32>;
+
+multiclass NarrowingStorePatOffsetOnly<Vec vec, OutPatFrag out> {
+ defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
+ def : Pat<(node vec.vt:$val, imm:$off),
+ (STORE_I64_A32 0, imm:$off, (CONST_I32 0), (out $val))>,
+ Requires<[HasAddr32]>;
+ def : Pat<(node vec.vt:$val, imm:$off),
+ (STORE_I64_A64 0, imm:$off, (CONST_I64 0), (out $val))>,
+ Requires<[HasAddr64]>;
+}
+
+defm : NarrowingStorePatOffsetOnly<I16x8, store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatOffsetOnly<I32x4, store_v4i16_trunc_v4i32>;
+
+multiclass NarrowingStorePatGlobalAddrOffOnly<Vec vec, OutPatFrag out> {
+ defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
+ def : Pat<(node vec.vt:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_I64_A32 0, tglobaladdr:$off, (CONST_I32 0), (out $val))>,
+ Requires<[IsNotPIC, HasAddr32]>;
+ def : Pat<(node vec.vt:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_I64_A64 0, tglobaladdr:$off, (CONST_I64 0), (out $val))>,
+ Requires<[IsNotPIC, HasAddr64]>;
+}
+
+defm : NarrowingStorePatGlobalAddrOffOnly<I16x8, store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatGlobalAddrOffOnly<I32x4, store_v4i16_trunc_v4i32>;
// Bitcasts are nops
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
@@ -897,24 +1246,96 @@ foreach t2 = !foldl(
) in
def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;
+// Extended pairwise addition
+defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed,
+ "extadd_pairwise_i8x16_s", 0xc2>;
+defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_unsigned,
+ "extadd_pairwise_i8x16_u", 0xc3>;
+defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed,
+ "extadd_pairwise_i16x8_s", 0xa5>;
+defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned,
+ "extadd_pairwise_i16x8_u", 0xa6>;
+
+
+// Prototype f64x2 conversions
+defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_signed,
+ "convert_low_i32x4_s", 0x53>;
+defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_unsigned,
+ "convert_low_i32x4_u", 0x54>;
+defm "" : SIMDConvert<I32x4, F64x2, int_wasm_trunc_saturate_zero_signed,
+ "trunc_sat_zero_f64x2_s", 0x55>;
+defm "" : SIMDConvert<I32x4, F64x2, int_wasm_trunc_saturate_zero_unsigned,
+ "trunc_sat_zero_f64x2_u", 0x56>;
+defm "" : SIMDConvert<F32x4, F64x2, int_wasm_demote_zero,
+ "demote_zero_f64x2", 0x57>;
+defm "" : SIMDConvert<F64x2, F32x4, int_wasm_promote_low,
+ "promote_low_f32x4", 0x69>;
+
//===----------------------------------------------------------------------===//
// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS)
//===----------------------------------------------------------------------===//
-multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> {
- defm QFMA_#vec_t :
+multiclass SIMDQFM<Vec vec, bits<32> simdopA, bits<32> simdopS> {
+ defm QFMA_#vec :
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
(outs), (ins),
- [(set (vec_t V128:$dst),
- (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
- vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>;
- defm QFMS_#vec_t :
+ [(set (vec.vt V128:$dst), (int_wasm_qfma
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".qfma\t$dst, $a, $b, $c", vec.prefix#".qfma", simdopA>;
+ defm QFMS_#vec :
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
(outs), (ins),
- [(set (vec_t V128:$dst),
- (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
- vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>;
+ [(set (vec.vt V128:$dst), (int_wasm_qfms
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".qfms\t$dst, $a, $b, $c", vec.prefix#".qfms", simdopS>;
}
-defm "" : SIMDQFM<v4f32, "f32x4", 252>;
-defm "" : SIMDQFM<v2f64, "f64x2", 254>;
+defm "" : SIMDQFM<F32x4, 180, 212>;
+defm "" : SIMDQFM<F64x2, 254, 255>;
+
+//===----------------------------------------------------------------------===//
+// Saturating Rounding Q-Format Multiplication
+//===----------------------------------------------------------------------===//
+
+defm Q15MULR_SAT_S :
+ SIMDBinary<I16x8, int_wasm_q15mulr_saturate_signed, "q15mulr_sat_s", 156>;
+
+//===----------------------------------------------------------------------===//
+// Experimental prefetch instructions: prefetch.t, prefetch.nt
+//===----------------------------------------------------------------------===//
+
+let mayLoad = true, UseNamedOperandTable = true in {
+defm PREFETCH_T_A32 :
+ SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "prefetch.t\t${off}(${addr})$p2align",
+ "prefetch.t\t$off$p2align", 0xc5>;
+defm PREFETCH_T_A64 :
+ SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ "prefetch.t\t${off}(${addr})$p2align",
+ "prefetch.t\t$off$p2align", 0xc5>;
+defm PREFETCH_NT_A32 :
+ SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "prefetch.nt\t${off}(${addr})$p2align",
+ "prefetch.nt\t$off$p2align", 0xc6>;
+defm PREFETCH_NT_A64 :
+ SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ "prefetch.nt\t${off}(${addr})$p2align",
+ "prefetch.nt\t$off$p2align", 0xc6>;
+} // mayLoad, UseNamedOperandTable
+
+multiclass PrefetchPatNoOffset<PatFrag kind, string inst> {
+ def : Pat<(kind I32:$addr), (!cast<NI>(inst # "_A32") 0, 0, $addr)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind I64:$addr), (!cast<NI>(inst # "_A64") 0, 0, $addr)>,
+ Requires<[HasAddr64]>;
+}
+
+foreach inst = [["PREFETCH_T", "int_wasm_prefetch_t"],
+ ["PREFETCH_NT", "int_wasm_prefetch_nt"]] in {
+defvar node = !cast<Intrinsic>(inst[1]);
+defm : PrefetchPatNoOffset<node, inst[0]>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
new file mode 100644
index 000000000000..97638c3494ae
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
@@ -0,0 +1,64 @@
+// WebAssemblyInstrTable.td - WebAssembly Table codegen support -*- tablegen -*-
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// WebAssembly Table operand code-gen constructs.
+/// Instructions that handle tables
+//===----------------------------------------------------------------------===//
+
+
+multiclass TABLE<WebAssemblyRegClass rt> {
+ defm TABLE_GET_#rt : I<(outs rt:$res), (ins table32_op:$table),
+ (outs), (ins table32_op:$table),
+ [],
+ "table.get\t$res, $table",
+ "table.get\t$table",
+ 0x25>;
+
+ defm TABLE_SET_#rt : I<(outs), (ins table32_op:$table, rt:$val, I32:$i),
+ (outs), (ins table32_op:$table),
+ [],
+ "table.set\t$table, $val, $i",
+ "table.set\t$table",
+ 0x26>;
+
+ defm TABLE_GROW_#rt : I<(outs I32:$sz), (ins table32_op:$table, I32:$n, rt:$val),
+ (outs), (ins table32_op:$table),
+ [],
+ "table.grow\t$sz, $table, $n, $val",
+ "table.grow\t$table",
+ 0xfc0f>;
+
+ defm TABLE_FILL_#rt : I<(outs), (ins table32_op:$table, I32:$n, rt:$val, I32:$i),
+ (outs), (ins table32_op:$table),
+ [],
+ "table.fill\t$table, $n, $val, $i",
+ "table.fill\t$table",
+ 0xfc11>;
+
+}
+
+defm "" : TABLE<FUNCREF>, Requires<[HasReferenceTypes]>;
+defm "" : TABLE<EXTERNREF>, Requires<[HasReferenceTypes]>;
+
+defm TABLE_SIZE : I<(outs I32:$sz), (ins table32_op:$table),
+ (outs), (ins table32_op:$table),
+ [],
+ "table.size\t$sz, $table",
+ "table.size\t$table",
+ 0xfc10>,
+ Requires<[HasReferenceTypes]>;
+
+
+defm TABLE_COPY : I<(outs), (ins table32_op:$table1, table32_op:$table2, I32:$n, I32:$s, I32:$d),
+ (outs), (ins table32_op:$table1, table32_op:$table2),
+ [],
+ "table.copy\t$table1, $table2, $n, $s, $d",
+ "table.copy\t$table1, $table2",
+ 0xfc0e>,
+ Requires<[HasReferenceTypes]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index 346938daf1aa..e07dae65fc4a 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -15,7 +15,7 @@
#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -32,15 +32,17 @@ class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override;
+ bool removeUnreachableEHPads(MachineFunction &MF);
void recordCatchRetBBs(MachineFunction &MF);
- bool addCatches(MachineFunction &MF);
+ bool hoistCatches(MachineFunction &MF);
+ bool addCatchAlls(MachineFunction &MF);
bool replaceFuncletReturns(MachineFunction &MF);
bool removeUnnecessaryUnreachables(MachineFunction &MF);
- bool addExceptionExtraction(MachineFunction &MF);
+ bool ensureSingleBBTermPads(MachineFunction &MF);
bool restoreStackPointer(MachineFunction &MF);
MachineBasicBlock *getMatchingEHPad(MachineInstr *MI);
- SmallSet<MachineBasicBlock *, 8> CatchRetBBs;
+ SmallPtrSet<MachineBasicBlock *, 8> CatchRetBBs;
public:
static char ID; // Pass identification, replacement for typeid
@@ -94,15 +96,18 @@ WebAssemblyLateEHPrepare::getMatchingEHPad(MachineInstr *MI) {
template <typename Container>
static void eraseDeadBBsAndChildren(const Container &MBBs) {
SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end());
+ SmallPtrSet<MachineBasicBlock *, 8> Deleted;
while (!WL.empty()) {
MachineBasicBlock *MBB = WL.pop_back_val();
- if (!MBB->pred_empty())
+ if (Deleted.count(MBB) || !MBB->pred_empty())
continue;
- SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(),
- MBB->succ_end());
+ SmallVector<MachineBasicBlock *, 4> Succs(MBB->successors());
WL.append(MBB->succ_begin(), MBB->succ_end());
for (auto *Succ : Succs)
MBB->removeSuccessor(Succ);
+ // To prevent deleting the same BB multiple times, which can happen when
+ // 'MBBs' contain both a parent and a child
+ Deleted.insert(MBB);
MBB->eraseFromParent();
}
}
@@ -118,21 +123,33 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
if (MF.getFunction().hasPersonalityFn()) {
+ Changed |= removeUnreachableEHPads(MF);
recordCatchRetBBs(MF);
- Changed |= addCatches(MF);
+ Changed |= hoistCatches(MF);
+ Changed |= addCatchAlls(MF);
Changed |= replaceFuncletReturns(MF);
+ Changed |= ensureSingleBBTermPads(MF);
}
Changed |= removeUnnecessaryUnreachables(MF);
- if (MF.getFunction().hasPersonalityFn()) {
- Changed |= addExceptionExtraction(MF);
+ if (MF.getFunction().hasPersonalityFn())
Changed |= restoreStackPointer(MF);
- }
return Changed;
}
-// Record which BB ends with 'CATCHRET' instruction, because this will be
-// replaced with BRs later. This set of 'CATCHRET' BBs is necessary in
-// 'getMatchingEHPad' function.
+// Remove unreachable EH pads and its children. If they remain, CFG
+// stackification can be tricky.
+bool WebAssemblyLateEHPrepare::removeUnreachableEHPads(MachineFunction &MF) {
+ SmallVector<MachineBasicBlock *, 4> ToDelete;
+ for (auto &MBB : MF)
+ if (MBB.isEHPad() && MBB.pred_empty())
+ ToDelete.push_back(&MBB);
+ eraseDeadBBsAndChildren(ToDelete);
+ return !ToDelete.empty();
+}
+
+// Record which BB ends with catchret instruction, because this will be replaced
+// with 'br's later. This set of catchret BBs is necessary in 'getMatchingEHPad'
+// function.
void WebAssemblyLateEHPrepare::recordCatchRetBBs(MachineFunction &MF) {
CatchRetBBs.clear();
for (auto &MBB : MF) {
@@ -145,25 +162,69 @@ void WebAssemblyLateEHPrepare::recordCatchRetBBs(MachineFunction &MF) {
}
}
-// Add catch instruction to beginning of catchpads and cleanuppads.
-bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) {
+// Hoist catch instructions to the beginning of their matching EH pad BBs in
+// case,
+// (1) catch instruction is not the first instruction in EH pad.
+// ehpad:
+// some_other_instruction
+// ...
+// %exn = catch 0
+// (2) catch instruction is in a non-EH pad BB. For example,
+// ehpad:
+// br bb0
+// bb0:
+// %exn = catch 0
+bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
+ bool Changed = false;
+ SmallVector<MachineInstr *, 16> Catches;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ if (WebAssembly::isCatch(MI.getOpcode()))
+ Catches.push_back(&MI);
+
+ for (auto *Catch : Catches) {
+ MachineBasicBlock *EHPad = getMatchingEHPad(Catch);
+ assert(EHPad && "No matching EH pad for catch");
+ auto InsertPos = EHPad->begin();
+ // Skip EH_LABELs in the beginning of an EH pad if present. We don't use
+ // these labels at the moment, but other targets also seem to have an
+ // EH_LABEL instruction in the beginning of an EH pad.
+ while (InsertPos != EHPad->end() && InsertPos->isEHLabel())
+ InsertPos++;
+ if (InsertPos == Catch)
+ continue;
+ Changed = true;
+ EHPad->insert(InsertPos, Catch->removeFromParent());
+ }
+ return Changed;
+}
+
+// Add catch_all to beginning of cleanup pads.
+bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) {
bool Changed = false;
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
+
for (auto &MBB : MF) {
- if (MBB.isEHPad()) {
+ if (!MBB.isEHPad())
+ continue;
+ auto InsertPos = MBB.begin();
+ // Skip EH_LABELs in the beginning of an EH pad if present.
+ while (InsertPos != MBB.end() && InsertPos->isEHLabel())
+ InsertPos++;
+ // This runs after hoistCatches(), so we assume that if there is a catch,
+ // that should be the non-EH label first instruction in an EH pad.
+ if (InsertPos == MBB.end() ||
+ !WebAssembly::isCatch(InsertPos->getOpcode())) {
Changed = true;
- auto InsertPos = MBB.begin();
- if (InsertPos->isEHLabel()) // EH pad starts with an EH label
- ++InsertPos;
- Register DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
- BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(),
- TII.get(WebAssembly::CATCH), DstReg);
+ BuildMI(MBB, InsertPos, InsertPos->getDebugLoc(),
+ TII.get(WebAssembly::CATCH_ALL));
}
}
return Changed;
}
+// Replace pseudo-instructions catchret and cleanupret with br and rethrow
+// respectively.
bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
bool Changed = false;
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -185,17 +246,11 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
Changed = true;
break;
}
- case WebAssembly::CLEANUPRET:
- case WebAssembly::RETHROW_IN_CATCH: {
- // Replace a cleanupret/rethrow_in_catch with a rethrow
- auto *EHPad = getMatchingEHPad(TI);
- auto CatchPos = EHPad->begin();
- if (CatchPos->isEHLabel()) // EH pad starts with an EH label
- ++CatchPos;
- MachineInstr *Catch = &*CatchPos;
- Register ExnReg = Catch->getOperand(0).getReg();
+ case WebAssembly::CLEANUPRET: {
+ // Replace a cleanupret with a rethrow. For C++ support, currently
+ // rethrow's immediate argument is always 0 (= the latest exception).
BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
- .addReg(ExnReg);
+ .addImm(0);
TI->eraseFromParent();
Changed = true;
break;
@@ -205,6 +260,7 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
return Changed;
}
+// Remove unnecessary unreachables after a throw or rethrow.
bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
MachineFunction &MF) {
bool Changed = false;
@@ -220,8 +276,7 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
// because throw itself is a terminator, and also delete successors if
// any.
MBB.erase(std::next(MI.getIterator()), MBB.end());
- SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
- MBB.succ_end());
+ SmallVector<MachineBasicBlock *, 8> Succs(MBB.successors());
for (auto *Succ : Succs)
if (!Succ->isEHPad())
MBB.removeSuccessor(Succ);
@@ -232,154 +287,78 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
return Changed;
}
-// Wasm uses 'br_on_exn' instruction to check the tag of an exception. It takes
-// exnref type object returned by 'catch', and branches to the destination if it
-// matches a given tag. We currently use __cpp_exception symbol to represent the
-// tag for all C++ exceptions.
+// Clang-generated terminate pads are an single-BB EH pad in the form of
+// termpad:
+// %exn = catch $__cpp_exception
+// call @__clang_call_terminate(%exn)
+// unreachable
+// (There can be local.set and local.gets before the call if we didn't run
+// RegStackify)
+// But code transformations can change or add more control flow, so the call to
+// __clang_call_terminate() function may not be in the original EH pad anymore.
+// This ensures every terminate pad is a single BB in the form illustrated
+// above.
//
-// block $l (result i32)
-// ...
-// ;; exnref $e is on the stack at this point
-// br_on_exn $l $e ;; branch to $l with $e's arguments
-// ...
-// end
-// ;; Here we expect the extracted values are on top of the wasm value stack
-// ... Handle exception using values ...
-//
-// br_on_exn takes an exnref object and branches if it matches the given tag.
-// There can be multiple br_on_exn instructions if we want to match for another
-// tag, but for now we only test for __cpp_exception tag, and if it does not
-// match, i.e., it is a foreign exception, we rethrow it.
-//
-// In the destination BB that's the target of br_on_exn, extracted exception
-// values (in C++'s case a single i32, which represents an exception pointer)
-// are placed on top of the wasm stack. Because we can't model wasm stack in
-// LLVM instruction, we use 'extract_exception' pseudo instruction to retrieve
-// it. The pseudo instruction will be deleted later.
-bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) {
+// This is preparation work for the HandleEHTerminatePads pass later, which
+// duplicates terminate pads both for 'catch' and 'catch_all'. Refer to
+// WebAssemblyHandleEHTerminatePads.cpp for details.
+bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- auto *EHInfo = MF.getWasmEHFuncInfo();
- SmallVector<MachineInstr *, 16> ExtractInstrs;
- SmallVector<MachineInstr *, 8> ToDelete;
- for (auto &MBB : MF) {
- for (auto &MI : MBB) {
- if (MI.getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) {
- if (MI.getOperand(0).isDead())
- ToDelete.push_back(&MI);
- else
- ExtractInstrs.push_back(&MI);
- }
- }
- }
- bool Changed = !ToDelete.empty() || !ExtractInstrs.empty();
- for (auto *MI : ToDelete)
- MI->eraseFromParent();
- if (ExtractInstrs.empty())
- return Changed;
-
- // Find terminate pads.
- SmallSet<MachineBasicBlock *, 8> TerminatePads;
+
+ // Find calls to __clang_call_terminate()
+ SmallVector<MachineInstr *, 8> ClangCallTerminateCalls;
+ SmallPtrSet<MachineBasicBlock *, 8> TermPads;
for (auto &MBB : MF) {
for (auto &MI : MBB) {
if (MI.isCall()) {
const MachineOperand &CalleeOp = MI.getOperand(0);
if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() ==
- WebAssembly::ClangCallTerminateFn)
- TerminatePads.insert(getMatchingEHPad(&MI));
+ WebAssembly::ClangCallTerminateFn) {
+ MachineBasicBlock *EHPad = getMatchingEHPad(&MI);
+ assert(EHPad && "No matching EH pad for __clang_call_terminate");
+ // In case a __clang_call_terminate call is duplicated during code
+ // transformation so one terminate pad contains multiple
+ // __clang_call_terminate calls, we only count one of them
+ if (TermPads.insert(EHPad).second)
+ ClangCallTerminateCalls.push_back(&MI);
+ }
}
}
}
- for (auto *Extract : ExtractInstrs) {
- MachineBasicBlock *EHPad = getMatchingEHPad(Extract);
- assert(EHPad && "No matching EH pad for extract_exception");
- auto CatchPos = EHPad->begin();
- if (CatchPos->isEHLabel()) // EH pad starts with an EH label
- ++CatchPos;
- MachineInstr *Catch = &*CatchPos;
-
- if (Catch->getNextNode() != Extract)
- EHPad->insert(Catch->getNextNode(), Extract->removeFromParent());
-
- // - Before:
- // ehpad:
- // %exnref:exnref = catch
- // %exn:i32 = extract_exception
- // ... use exn ...
- //
- // - After:
- // ehpad:
- // %exnref:exnref = catch
- // br_on_exn %thenbb, $__cpp_exception, %exnref
- // br %elsebb
- // elsebb:
- // rethrow
- // thenbb:
- // %exn:i32 = extract_exception
- // ... use exn ...
- Register ExnReg = Catch->getOperand(0).getReg();
- auto *ThenMBB = MF.CreateMachineBasicBlock();
- auto *ElseMBB = MF.CreateMachineBasicBlock();
- MF.insert(std::next(MachineFunction::iterator(EHPad)), ElseMBB);
- MF.insert(std::next(MachineFunction::iterator(ElseMBB)), ThenMBB);
- ThenMBB->splice(ThenMBB->end(), EHPad, Extract, EHPad->end());
- ThenMBB->transferSuccessors(EHPad);
- EHPad->addSuccessor(ThenMBB);
- EHPad->addSuccessor(ElseMBB);
-
- DebugLoc DL = Extract->getDebugLoc();
- const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception");
- BuildMI(EHPad, DL, TII.get(WebAssembly::BR_ON_EXN))
- .addMBB(ThenMBB)
- .addExternalSymbol(CPPExnSymbol)
- .addReg(ExnReg);
- BuildMI(EHPad, DL, TII.get(WebAssembly::BR)).addMBB(ElseMBB);
-
- // When this is a terminate pad with __clang_call_terminate() call, we don't
- // rethrow it anymore and call __clang_call_terminate() with a nullptr
- // argument, which will call std::terminate().
- //
- // - Before:
- // ehpad:
- // %exnref:exnref = catch
- // %exn:i32 = extract_exception
- // call @__clang_call_terminate(%exn)
- // unreachable
- //
- // - After:
- // ehpad:
- // %exnref:exnref = catch
- // br_on_exn %thenbb, $__cpp_exception, %exnref
- // br %elsebb
- // elsebb:
- // call @__clang_call_terminate(0)
- // unreachable
- // thenbb:
- // %exn:i32 = extract_exception
- // call @__clang_call_terminate(%exn)
- // unreachable
- if (TerminatePads.count(EHPad)) {
- Function *ClangCallTerminateFn =
- MF.getFunction().getParent()->getFunction(
- WebAssembly::ClangCallTerminateFn);
- assert(ClangCallTerminateFn &&
- "There is no __clang_call_terminate() function");
- Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
- BuildMI(ElseMBB, DL, TII.get(WebAssembly::CONST_I32), Reg).addImm(0);
- BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL))
- .addGlobalAddress(ClangCallTerminateFn)
- .addReg(Reg);
- BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE));
-
- } else {
- BuildMI(ElseMBB, DL, TII.get(WebAssembly::RETHROW)).addReg(ExnReg);
- if (EHInfo->hasEHPadUnwindDest(EHPad))
- ElseMBB->addSuccessor(EHInfo->getEHPadUnwindDest(EHPad));
- }
- }
+ bool Changed = false;
+ for (auto *Call : ClangCallTerminateCalls) {
+ MachineBasicBlock *EHPad = getMatchingEHPad(Call);
+ assert(EHPad && "No matching EH pad for __clang_call_terminate");
+
+ // If it is already the form we want, skip it
+ if (Call->getParent() == EHPad &&
+ Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE)
+ continue;
- return true;
+ // In case the __clang_call_terminate() call is not in its matching EH pad,
+ // move the call to the end of EH pad and add an unreachable instruction
+ // after that. Delete all successors and their children if any, because here
+ // the program terminates.
+ Changed = true;
+ // This runs after hoistCatches(), so catch instruction should be at the top
+ MachineInstr *Catch = WebAssembly::findCatch(EHPad);
+ assert(Catch && "EH pad does not have a catch instruction");
+ // Takes the result register of the catch instruction as argument. There may
+ // have been some other local.set/local.gets in between, but at this point
+ // we don't care.
+ Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
+ auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
+ EHPad->insert(InsertPos, Call->removeFromParent());
+ BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
+ TII.get(WebAssembly::UNREACHABLE));
+ EHPad->erase(InsertPos, EHPad->end());
+ SmallVector<MachineBasicBlock *, 8> Succs(EHPad->successors());
+ for (auto *Succ : Succs)
+ EHPad->removeSuccessor(Succ);
+ eraseDeadBBsAndChildren(Succs);
+ }
+ return Changed;
}
// After the stack is unwound due to a thrown exception, the __stack_pointer
@@ -406,7 +385,7 @@ bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) {
auto InsertPos = MBB.begin();
if (InsertPos->isEHLabel()) // EH pad starts with an EH label
++InsertPos;
- if (InsertPos->getOpcode() == WebAssembly::CATCH)
+ if (WebAssembly::isCatch(InsertPos->getOpcode()))
++InsertPos;
FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB,
InsertPos, MBB.begin()->getDebugLoc());
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 5fce4a600510..ff6404c30971 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -140,8 +140,7 @@
/// 1) Lower
/// longjmp(buf, value)
/// into
-/// emscripten_longjmp_jmpbuf(buf, value)
-/// emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later.
+/// emscripten_longjmp(buf, value)
///
/// In case calls to setjmp() exists
///
@@ -196,19 +195,16 @@
/// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
/// each setjmp callsite. Label 0 means this longjmp buffer does not
/// correspond to one of the setjmp callsites in this function, so in this
-/// case we just chain the longjmp to the caller. (Here we call
-/// emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf.
-/// emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while
-/// emscripten_longjmp takes an int. Both of them will eventually be lowered
-/// to emscripten_longjmp in s2wasm, but here we need two signatures - we
-/// can't translate an int value to a jmp_buf.)
-/// Label -1 means no longjmp occurred. Otherwise we jump to the right
-/// post-setjmp BB based on the label.
+/// case we just chain the longjmp to the caller. Label -1 means no longjmp
+/// occurred. Otherwise we jump to the right post-setjmp BB based on the
+/// label.
///
///===----------------------------------------------------------------------===//
#include "WebAssembly.h"
+#include "WebAssemblyTargetMachine.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@@ -239,7 +235,6 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
Function *ResumeF = nullptr;
Function *EHTypeIDF = nullptr;
Function *EmLongjmpF = nullptr;
- Function *EmLongjmpJmpbufF = nullptr;
Function *SaveSetjmpF = nullptr;
Function *TestSetjmpF = nullptr;
@@ -314,13 +309,23 @@ static bool canThrow(const Value *V) {
// Get a global variable with the given name. If it doesn't exist declare it,
// which will generate an import and asssumes that it will exist at link time.
static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB,
+ WebAssemblyTargetMachine &TM,
const char *Name) {
-
- auto *GV =
- dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, IRB.getInt32Ty()));
+ auto Int32Ty = IRB.getInt32Ty();
+ auto *GV = dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, Int32Ty));
if (!GV)
report_fatal_error(Twine("unable to create global: ") + Name);
+ // If the target supports TLS, make this variable thread-local. We can't just
+ // unconditionally make it thread-local and depend on
+ // CoalesceFeaturesAndStripAtomics to downgrade it, because stripping TLS has
+ // the side effect of disallowing the object from being linked into a
+ // shared-memory module, which we don't want to be responsible for.
+ auto *Subtarget = TM.getSubtargetImpl();
+ auto TLS = Subtarget->hasAtomics() && Subtarget->hasBulkMemory()
+ ? GlobalValue::LocalExecTLSModel
+ : GlobalValue::NotThreadLocal;
+ GV->setThreadLocalMode(TLS);
return GV;
}
@@ -338,7 +343,7 @@ static std::string getSignature(FunctionType *FTy) {
if (FTy->isVarArg())
OS << "_...";
Sig = OS.str();
- Sig.erase(remove_if(Sig, isSpace), Sig.end());
+ erase_if(Sig, isSpace);
// When s2wasm parses .s file, a comma means the end of an argument. So a
// mangled function name can contain any character but a comma.
std::replace(Sig.begin(), Sig.end(), ',', '.');
@@ -630,6 +635,40 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
}
}
+// Replace uses of longjmp with emscripten_longjmp. emscripten_longjmp takes
+// arguments of type {i32, i32} and longjmp takes {jmp_buf*, i32}, so we need a
+// ptrtoint instruction here to make the type match. jmp_buf* will eventually be
+// lowered to i32 in the wasm backend.
+static void replaceLongjmpWithEmscriptenLongjmp(Function *LongjmpF,
+ Function *EmLongjmpF) {
+ SmallVector<CallInst *, 8> ToErase;
+ LLVMContext &C = LongjmpF->getParent()->getContext();
+ IRBuilder<> IRB(C);
+
+ // For calls to longjmp, replace it with emscripten_longjmp and cast its first
+ // argument (jmp_buf*) to int
+ for (User *U : LongjmpF->users()) {
+ auto *CI = dyn_cast<CallInst>(U);
+ if (CI && CI->getCalledFunction() == LongjmpF) {
+ IRB.SetInsertPoint(CI);
+ Value *Jmpbuf =
+ IRB.CreatePtrToInt(CI->getArgOperand(0), IRB.getInt32Ty(), "jmpbuf");
+ IRB.CreateCall(EmLongjmpF, {Jmpbuf, CI->getArgOperand(1)});
+ ToErase.push_back(CI);
+ }
+ }
+ for (auto *I : ToErase)
+ I->eraseFromParent();
+
+ // If we have any remaining uses of longjmp's function pointer, replace it
+ // with (int(*)(jmp_buf*, int))emscripten_longjmp.
+ if (!LongjmpF->uses().empty()) {
+ Value *EmLongjmp =
+ IRB.CreateBitCast(EmLongjmpF, LongjmpF->getType(), "em_longjmp");
+ LongjmpF->replaceAllUsesWith(EmLongjmp);
+ }
+}
+
bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n");
@@ -642,11 +681,19 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed);
+ if ((EnableEH || DoSjLj) &&
+ Triple(M.getTargetTriple()).getArch() == Triple::wasm64)
+ report_fatal_error("Emscripten EH/SjLj is not supported with wasm64 yet");
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ assert(TPC && "Expected a TargetPassConfig");
+ auto &TM = TPC->getTM<WebAssemblyTargetMachine>();
+
// Declare (or get) global variables __THREW__, __threwValue, and
// getTempRet0/setTempRet0 function which are used in common for both
// exception handling and setjmp/longjmp handling
- ThrewGV = getGlobalVariableI32(M, IRB, "__THREW__");
- ThrewValueGV = getGlobalVariableI32(M, IRB, "__threwValue");
+ ThrewGV = getGlobalVariableI32(M, IRB, TM, "__THREW__");
+ ThrewValueGV = getGlobalVariableI32(M, IRB, TM, "__threwValue");
GetTempRet0Func = getEmscriptenFunction(
FunctionType::get(IRB.getInt32Ty(), false), "getTempRet0", &M);
SetTempRet0Func = getEmscriptenFunction(
@@ -680,22 +727,21 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
if (DoSjLj) {
Changed = true; // We have setjmp or longjmp somewhere
- if (LongjmpF) {
- // Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is
- // defined in JS code
- EmLongjmpJmpbufF = getEmscriptenFunction(LongjmpF->getFunctionType(),
- "emscripten_longjmp_jmpbuf", &M);
- LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF);
- }
+ // Register emscripten_longjmp function
+ FunctionType *FTy = FunctionType::get(
+ IRB.getVoidTy(), {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
+ EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M);
+
+ if (LongjmpF)
+ replaceLongjmpWithEmscriptenLongjmp(LongjmpF, EmLongjmpF);
if (SetjmpF) {
// Register saveSetjmp function
FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
- FunctionType *FTy =
- FunctionType::get(Type::getInt32PtrTy(C),
- {SetjmpFTy->getParamType(0), IRB.getInt32Ty(),
- Type::getInt32PtrTy(C), IRB.getInt32Ty()},
- false);
+ FTy = FunctionType::get(Type::getInt32PtrTy(C),
+ {SetjmpFTy->getParamType(0), IRB.getInt32Ty(),
+ Type::getInt32PtrTy(C), IRB.getInt32Ty()},
+ false);
SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M);
// Register testSetjmp function
@@ -704,10 +750,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
{IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}, false);
TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M);
- FTy = FunctionType::get(IRB.getVoidTy(),
- {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
- EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M);
-
// Only traverse functions that uses setjmp in order not to insert
// unnecessary prep / cleanup code in every function
SmallPtrSet<Function *, 8> SetjmpUsers;
@@ -769,7 +811,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
} else {
// This can't throw, and we don't need this invoke, just replace it with a
// call+branch
- SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end());
+ SmallVector<Value *, 16> Args(II->args());
CallInst *NewCall =
IRB.CreateCall(II->getFunctionType(), II->getCalledOperand(), Args);
NewCall->takeName(II);
@@ -843,16 +885,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
SmallVector<Value *, 16> FMCArgs;
for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) {
Constant *Clause = LPI->getClause(I);
- // As a temporary workaround for the lack of aggregate varargs support
- // in the interface between JS and wasm, break out filter operands into
- // their component elements.
- if (LPI->isFilter(I)) {
- auto *ATy = cast<ArrayType>(Clause->getType());
- for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) {
- Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter");
- FMCArgs.push_back(EV);
- }
- } else
+ // TODO Handle filters (= exception specifications).
+ // https://bugs.llvm.org/show_bug.cgi?id=50396
+ if (LPI->isCatch(I))
FMCArgs.push_back(Clause);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 304dca2ebfe4..86d59ef807ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -38,29 +38,34 @@ cl::opt<bool>
" instruction output for test purposes only."),
cl::init(false));
+extern cl::opt<bool> EnableEmException;
+extern cl::opt<bool> EnableEmSjLj;
+
static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI);
MCSymbol *
WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
const GlobalValue *Global = MO.getGlobal();
- auto *WasmSym = cast<MCSymbolWasm>(Printer.getSymbol(Global));
-
- if (const auto *FuncTy = dyn_cast<FunctionType>(Global->getValueType())) {
- const MachineFunction &MF = *MO.getParent()->getParent()->getParent();
- const TargetMachine &TM = MF.getTarget();
- const Function &CurrentFunc = MF.getFunction();
-
- SmallVector<MVT, 1> ResultMVTs;
- SmallVector<MVT, 4> ParamMVTs;
- const auto *const F = dyn_cast<Function>(Global);
- computeSignatureVTs(FuncTy, F, CurrentFunc, TM, ParamMVTs, ResultMVTs);
-
- auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs);
- WasmSym->setSignature(Signature.get());
- Printer.addSignature(std::move(Signature));
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
- }
-
+ if (!isa<Function>(Global))
+ return cast<MCSymbolWasm>(Printer.getSymbol(Global));
+
+ const auto *FuncTy = cast<FunctionType>(Global->getValueType());
+ const MachineFunction &MF = *MO.getParent()->getParent()->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ const Function &CurrentFunc = MF.getFunction();
+
+ SmallVector<MVT, 1> ResultMVTs;
+ SmallVector<MVT, 4> ParamMVTs;
+ const auto *const F = dyn_cast<Function>(Global);
+ computeSignatureVTs(FuncTy, F, CurrentFunc, TM, ParamMVTs, ResultMVTs);
+ auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs);
+
+ bool InvokeDetected = false;
+ auto *WasmSym = Printer.getMCSymbolForFunction(
+ F, EnableEmException || EnableEmSjLj, Signature.get(), InvokeDetected);
+ WasmSym->setSignature(Signature.get());
+ Printer.addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
return WasmSym;
}
@@ -134,6 +139,9 @@ MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
case WebAssemblyII::MO_MEMORY_BASE_REL:
Kind = MCSymbolRefExpr::VK_WASM_MBREL;
break;
+ case WebAssemblyII::MO_TLS_BASE_REL:
+ Kind = MCSymbolRefExpr::VK_WASM_TLSREL;
+ break;
case WebAssemblyII::MO_TABLE_BASE_REL:
Kind = MCSymbolRefExpr::VK_WASM_TBREL;
break;
@@ -266,6 +274,11 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
SmallVector<wasm::ValType, 4>());
break;
}
+ } else if (Info.OperandType == WebAssembly::OPERAND_HEAPTYPE) {
+ assert(static_cast<WebAssembly::HeapType>(MO.getImm()) !=
+ WebAssembly::HeapType::Invalid);
+ // With typed function references, this will need a case for type
+ // index operands. Otherwise, fall through.
}
}
MCOp = MCOperand::createImm(MO.getImm());
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index a2da0ea849e0..6bfed1a7195c 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -97,7 +97,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
// values through live-range splitting and stackification, it will have to
// do.
MF.getInfo<WebAssemblyFunctionInfo>()->setFrameBaseVreg(
- SplitLIs.back()->reg);
+ SplitLIs.back()->reg());
}
SplitLIs.clear();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index a587c9d23d2b..ba1c4b7233f2 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -111,8 +111,11 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
case WebAssembly::V128RegClassID:
CopyLocalOpc = WebAssembly::COPY_V128;
break;
- case WebAssembly::EXNREFRegClassID:
- CopyLocalOpc = WebAssembly::COPY_EXNREF;
+ case WebAssembly::FUNCREFRegClassID:
+ CopyLocalOpc = WebAssembly::COPY_FUNCREF;
+ break;
+ case WebAssembly::EXTERNREFRegClassID:
+ CopyLocalOpc = WebAssembly::COPY_EXTERNREF;
break;
default:
llvm_unreachable("Unexpected register class for return operand");
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index 20fe2b2b7bfc..fe127dec8aed 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -106,8 +106,8 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
continue;
LiveInterval *LI = &Liveness->getInterval(VReg);
- assert(LI->weight == 0.0f);
- LI->weight = computeWeight(MRI, MBFI, VReg);
+ assert(LI->weight() == 0.0f);
+ LI->setWeight(computeWeight(MRI, MBFI, VReg));
LLVM_DEBUG(LI->dump());
SortedIntervals.push_back(LI);
}
@@ -118,10 +118,10 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
// TODO: Investigate more intelligent sorting heuristics. For starters, we
// should try to coalesce adjacent live intervals before non-adjacent ones.
llvm::sort(SortedIntervals, [MRI](LiveInterval *LHS, LiveInterval *RHS) {
- if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
- return MRI->isLiveIn(LHS->reg);
- if (LHS->weight != RHS->weight)
- return LHS->weight > RHS->weight;
+ if (MRI->isLiveIn(LHS->reg()) != MRI->isLiveIn(RHS->reg()))
+ return MRI->isLiveIn(LHS->reg());
+ if (LHS->weight() != RHS->weight())
+ return LHS->weight() > RHS->weight();
if (LHS->empty() || RHS->empty())
return !LHS->empty() && RHS->empty();
return *LHS < *RHS;
@@ -135,14 +135,14 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) {
LiveInterval *LI = SortedIntervals[I];
- unsigned Old = LI->reg;
+ unsigned Old = LI->reg();
size_t Color = I;
const TargetRegisterClass *RC = MRI->getRegClass(Old);
// Check if it's possible to reuse any of the used colors.
if (!MRI->isLiveIn(Old))
for (unsigned C : UsedColors.set_bits()) {
- if (MRI->getRegClass(SortedIntervals[C]->reg) != RC)
+ if (MRI->getRegClass(SortedIntervals[C]->reg()) != RC)
continue;
for (LiveInterval *OtherLI : Assignments[C])
if (!OtherLI->empty() && OtherLI->overlaps(*LI))
@@ -152,7 +152,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
continue_outer:;
}
- unsigned New = SortedIntervals[Color]->reg;
+ unsigned New = SortedIntervals[Color]->reg();
SlotMapping[I] = New;
Changed |= Old != New;
UsedColors.set(Color);
@@ -160,7 +160,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
// If we reassigned the stack pointer, update the debug frame base info.
if (Old != New && MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Old)
MFI.setFrameBaseVreg(New);
- LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg)
+ LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg())
<< " to vreg" << Register::virtReg2Index(New) << "\n");
}
if (!Changed)
@@ -168,7 +168,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
// Rewrite register operands.
for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) {
- unsigned Old = SortedIntervals[I]->reg;
+ unsigned Old = SortedIntervals[I]->reg();
unsigned New = SlotMapping[I];
if (Old != New)
MRI->replaceRegWith(Old, New);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 1d4e2e3a8f9e..d474b9a2c1ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -123,7 +123,7 @@ static void convertImplicitDefToConstZero(MachineInstr *MI,
} else if (RegClass == &WebAssembly::V128RegClass) {
// TODO: Replace this with v128.const 0 once that is supported in V8
Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
- MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32));
+ MI->setDesc(TII->get(WebAssembly::SPLAT_I32x4));
MI->addOperand(MachineOperand::CreateReg(TempReg, false));
MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(WebAssembly::CONST_I32), TempReg)
@@ -342,7 +342,7 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
// instruction in which the current value is used, we cannot
// stackify. Stackifying in this case would require that def moving below the
// current def in the stack, which cannot be achieved, even with locals.
- for (const auto &SubsequentDef : drop_begin(DefI->defs(), 1)) {
+ for (const auto &SubsequentDef : drop_begin(DefI->defs())) {
for (const auto &PriorUse : UseI->uses()) {
if (&PriorUse == Use)
break;
@@ -359,10 +359,9 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
if (NextI == Insert)
return true;
- // 'catch' and 'extract_exception' should be the first instruction of a BB and
- // cannot move.
- if (DefI->getOpcode() == WebAssembly::CATCH ||
- DefI->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32)
+ // 'catch' and 'catch_all' should be the first instruction of a BB and cannot
+ // move.
+ if (WebAssembly::isCatch(DefI->getOpcode()))
return false;
// Check for register dependencies.
@@ -595,7 +594,7 @@ static MachineInstr *rematerializeCheapDef(
if (IsDead) {
LLVM_DEBUG(dbgs() << " - Deleting original\n");
SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
- LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
+ LIS.removePhysRegDefAt(MCRegister::from(WebAssembly::ARGUMENTS), Idx);
LIS.removeInterval(Reg);
LIS.RemoveMachineInstrFromMaps(Def);
Def.eraseFromParent();
@@ -693,7 +692,7 @@ class TreeWalkerState {
public:
explicit TreeWalkerState(MachineInstr *Insert) {
const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
- if (Range.begin() != Range.end())
+ if (!Range.empty())
Worklist.push_back(reverse(Range));
}
@@ -702,11 +701,10 @@ public:
MachineOperand &pop() {
RangeTy &Range = Worklist.back();
MachineOperand &Op = *Range.begin();
- Range = drop_begin(Range, 1);
- if (Range.begin() == Range.end())
+ Range = drop_begin(Range);
+ if (Range.empty())
Worklist.pop_back();
- assert((Worklist.empty() ||
- Worklist.back().begin() != Worklist.back().end()) &&
+ assert((Worklist.empty() || !Worklist.back().empty()) &&
"Empty ranges shouldn't remain in the worklist");
return Op;
}
@@ -714,7 +712,7 @@ public:
/// Push Instr's operands onto the stack to be visited.
void pushOperands(MachineInstr *Instr) {
const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
- if (Range.begin() != Range.end())
+ if (!Range.empty())
Worklist.push_back(reverse(Range));
}
@@ -733,7 +731,7 @@ public:
if (Worklist.empty())
return false;
const RangeTy &Range = Worklist.back();
- return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
+ return !Range.empty() && Range.begin()->getParent() == Instr;
}
/// Test whether the given register is present on the stack, indicating an
@@ -865,24 +863,6 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
if (WebAssembly::isArgument(DefI->getOpcode()))
continue;
- // Currently catch's return value register cannot be stackified, because
- // the wasm LLVM backend currently does not support live-in values
- // entering blocks, which is a part of multi-value proposal.
- //
- // Once we support live-in values of wasm blocks, this can be:
- // catch ; push exnref value onto stack
- // block exnref -> i32
- // br_on_exn $__cpp_exception ; pop the exnref value
- // end_block
- //
- // But because we don't support it yet, the catch instruction's dst
- // register should be assigned to a local to be propagated across
- // 'block' boundary now.
- //
- // TODO: Fix this once we support the multivalue blocks
- if (DefI->getOpcode() == WebAssembly::CATCH)
- continue;
-
MachineOperand *Def = DefI->findRegisterDefOperand(Reg);
assert(Def != nullptr);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 6d3d6c723277..ba2936b492a9 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -43,7 +43,8 @@ def F64_0 : WebAssemblyReg<"%f64.0">;
def V128_0: WebAssemblyReg<"%v128">;
-def EXNREF_0 : WebAssemblyReg<"%exnref.0">;
+def FUNCREF_0 : WebAssemblyReg<"%funcref.0">;
+def EXTERNREF_0 : WebAssemblyReg<"%externref.0">;
// The value stack "register". This is an opaque entity which serves to order
// uses and defs that must remain in LIFO order.
@@ -64,4 +65,5 @@ def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128,
(add V128_0)>;
-def EXNREF : WebAssemblyRegClass<[exnref], 0, (add EXNREF_0)>;
+def FUNCREF : WebAssemblyRegClass<[funcref], 0, (add FUNCREF_0)>;
+def EXTERNREF : WebAssemblyRegClass<[externref], 0, (add EXTERNREF_0)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp
new file mode 100644
index 000000000000..cd84e68aed14
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp
@@ -0,0 +1,78 @@
+#include "WebAssemblySortRegion.h"
+#include "WebAssemblyExceptionInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+
+using namespace llvm;
+using namespace WebAssembly;
+
+namespace llvm {
+namespace WebAssembly {
+template <>
+bool ConcreteSortRegion<MachineLoop>::isLoop() const {
+ return true;
+}
+} // end namespace WebAssembly
+} // end namespace llvm
+
+const SortRegion *SortRegionInfo::getRegionFor(const MachineBasicBlock *MBB) {
+ const auto *ML = MLI.getLoopFor(MBB);
+ const auto *WE = WEI.getExceptionFor(MBB);
+ if (!ML && !WE)
+ return nullptr;
+ // We determine subregion relationship by domination of their headers, i.e.,
+ // if region A's header dominates region B's header, B is a subregion of A.
+ // WebAssemblyException contains BBs in all its subregions (loops or
+ // exceptions), but MachineLoop may not, because MachineLoop does not
+ // contain BBs that don't have a path to its header even if they are
+ // dominated by its header. So here we should use
+ // WE->contains(ML->getHeader()), but not ML->contains(WE->getHeader()).
+ if ((ML && !WE) || (ML && WE && WE->contains(ML->getHeader()))) {
+ // If the smallest region containing MBB is a loop
+ if (LoopMap.count(ML))
+ return LoopMap[ML].get();
+ LoopMap[ML] = std::make_unique<ConcreteSortRegion<MachineLoop>>(ML);
+ return LoopMap[ML].get();
+ } else {
+ // If the smallest region containing MBB is an exception
+ if (ExceptionMap.count(WE))
+ return ExceptionMap[WE].get();
+ ExceptionMap[WE] =
+ std::make_unique<ConcreteSortRegion<WebAssemblyException>>(WE);
+ return ExceptionMap[WE].get();
+ }
+}
+
+MachineBasicBlock *SortRegionInfo::getBottom(const SortRegion *R) {
+ if (R->isLoop())
+ return getBottom(MLI.getLoopFor(R->getHeader()));
+ else
+ return getBottom(WEI.getExceptionFor(R->getHeader()));
+}
+
+MachineBasicBlock *SortRegionInfo::getBottom(const MachineLoop *ML) {
+ MachineBasicBlock *Bottom = ML->getHeader();
+ for (MachineBasicBlock *MBB : ML->blocks()) {
+ if (MBB->getNumber() > Bottom->getNumber())
+ Bottom = MBB;
+ // MachineLoop does not contain all BBs dominated by its header. BBs that
+ // don't have a path back to the loop header aren't included. But for the
+ // purpose of CFG sorting and stackification, we need a bottom BB among all
+ // BBs that are dominated by the loop header. So we check if there is any
+ // WebAssemblyException contained in this loop, and computes the most bottom
+ // BB of them all.
+ if (MBB->isEHPad()) {
+ MachineBasicBlock *ExBottom = getBottom(WEI.getExceptionFor(MBB));
+ if (ExBottom->getNumber() > Bottom->getNumber())
+ Bottom = ExBottom;
+ }
+ }
+ return Bottom;
+}
+
+MachineBasicBlock *SortRegionInfo::getBottom(const WebAssemblyException *WE) {
+ MachineBasicBlock *Bottom = WE->getHeader();
+ for (MachineBasicBlock *MBB : WE->blocks())
+ if (MBB->getNumber() > Bottom->getNumber())
+ Bottom = MBB;
+ return Bottom;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h
new file mode 100644
index 000000000000..e92bf1764185
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h
@@ -0,0 +1,91 @@
+//===-- WebAssemblySortRegion.h - WebAssembly Sort SortRegion ----*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements regions used in CFGSort and CFGStackify.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSORTREGION_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSORTREGION_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator_range.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineLoop;
+class MachineLoopInfo;
+class WebAssemblyException;
+class WebAssemblyExceptionInfo;
+
+namespace WebAssembly {
+
+// Wrapper for loops and exceptions
+class SortRegion {
+public:
+ virtual ~SortRegion() = default;
+ virtual MachineBasicBlock *getHeader() const = 0;
+ virtual bool contains(const MachineBasicBlock *MBB) const = 0;
+ virtual unsigned getNumBlocks() const = 0;
+ using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator;
+ virtual iterator_range<block_iterator> blocks() const = 0;
+ virtual bool isLoop() const = 0;
+};
+
+template <typename T> class ConcreteSortRegion : public SortRegion {
+ const T *Unit;
+
+public:
+ ConcreteSortRegion(const T *Unit) : Unit(Unit) {}
+ MachineBasicBlock *getHeader() const override { return Unit->getHeader(); }
+ bool contains(const MachineBasicBlock *MBB) const override {
+ return Unit->contains(MBB);
+ }
+ unsigned getNumBlocks() const override { return Unit->getNumBlocks(); }
+ iterator_range<block_iterator> blocks() const override {
+ return Unit->blocks();
+ }
+ bool isLoop() const override { return false; }
+};
+
+// This class has information of nested SortRegions; this is analogous to what
+// LoopInfo is for loops.
+class SortRegionInfo {
+ friend class ConcreteSortRegion<MachineLoopInfo>;
+ friend class ConcreteSortRegion<WebAssemblyException>;
+
+ const MachineLoopInfo &MLI;
+ const WebAssemblyExceptionInfo &WEI;
+ DenseMap<const MachineLoop *, std::unique_ptr<SortRegion>> LoopMap;
+ DenseMap<const WebAssemblyException *, std::unique_ptr<SortRegion>>
+ ExceptionMap;
+
+public:
+ SortRegionInfo(const MachineLoopInfo &MLI,
+ const WebAssemblyExceptionInfo &WEI)
+ : MLI(MLI), WEI(WEI) {}
+
+ // Returns a smallest loop or exception that contains MBB
+ const SortRegion *getRegionFor(const MachineBasicBlock *MBB);
+
+ // Return the "bottom" block among all blocks dominated by the region
+ // (MachineLoop or WebAssemblyException) header. This works when the entity is
+ // discontiguous.
+ MachineBasicBlock *getBottom(const SortRegion *R);
+ MachineBasicBlock *getBottom(const MachineLoop *ML);
+ MachineBasicBlock *getBottom(const WebAssemblyException *WE);
+};
+
+} // end namespace WebAssembly
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index cacf5ab078a0..7943e1ecc8e1 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -33,7 +33,7 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU,
if (CPU.empty())
CPU = "generic";
- ParseSubtargetFeatures(CPU, FS);
+ ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
return *this;
}
@@ -41,9 +41,10 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
const std::string &CPU,
const std::string &FS,
const TargetMachine &TM)
- : WebAssemblyGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
- FrameLowering(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
- TSInfo(), TLInfo(TM, *this) {}
+ : WebAssemblyGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ TargetTriple(TT), FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TSInfo(),
+ TLInfo(TM, *this) {}
bool WebAssemblySubtarget::enableAtomicExpand() const {
// If atomics are disabled, atomic ops are lowered instead of expanded
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 8b95a3ddb837..a1c872ef2135 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -105,7 +105,7 @@ public:
/// Parses features string setting specified subtarget options. Definition of
/// function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 7bf655c925a4..135055a43afc 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -34,13 +34,13 @@ using namespace llvm;
#define DEBUG_TYPE "wasm"
// Emscripten's asm.js-style exception handling
-static cl::opt<bool> EnableEmException(
+cl::opt<bool> EnableEmException(
"enable-emscripten-cxx-exceptions",
cl::desc("WebAssembly Emscripten-style exception handling"),
cl::init(false));
// Emscripten's asm.js-style setjmp/longjmp handling
-static cl::opt<bool> EnableEmSjLj(
+cl::opt<bool> EnableEmSjLj(
"enable-emscripten-sjlj",
cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
cl::init(false));
@@ -145,6 +145,11 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor.
+const WebAssemblySubtarget *WebAssemblyTargetMachine::getSubtargetImpl() const {
+ return getSubtargetImpl(std::string(getTargetCPU()),
+ std::string(getTargetFeatureString()));
+}
+
const WebAssemblySubtarget *
WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU,
std::string FS) const {
@@ -160,12 +165,10 @@ WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
// This needs to be done before we create a new subtarget since any
// creation will depend on the TM and the code generation flags on the
@@ -193,6 +196,7 @@ public:
FeatureBitset Features = coalesceFeatures(M);
std::string FeatureStr = getFeatureString(Features);
+ WasmTM->setTargetFeatureString(FeatureStr);
for (auto &F : M)
replaceFeatures(F, FeatureStr);
@@ -273,10 +277,9 @@ private:
bool stripThreadLocals(Module &M) {
bool Stripped = false;
for (auto &GV : M.globals()) {
- if (GV.getThreadLocalMode() !=
- GlobalValue::ThreadLocalMode::NotThreadLocal) {
+ if (GV.isThreadLocal()) {
Stripped = true;
- GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal);
+ GV.setThreadLocal(false);
}
}
return Stripped;
@@ -323,10 +326,10 @@ public:
void addPreEmitPass() override;
// No reg alloc
- bool addRegAssignmentFast() override { return false; }
+ bool addRegAssignAndRewriteFast() override { return false; }
// No reg alloc
- bool addRegAssignmentOptimized() override { return false; }
+ bool addRegAssignAndRewriteOptimized() override { return false; }
};
} // end anonymous namespace
@@ -350,7 +353,7 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
//===----------------------------------------------------------------------===//
void WebAssemblyPassConfig::addIRPasses() {
- // Runs LowerAtomicPass if necessary
+ // Lower atomics and TLS if necessary
addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine()));
// This is a no-op if atomics are not used in the module
@@ -443,7 +446,8 @@ void WebAssemblyPassConfig::addPreEmitPass() {
// Do various transformations for exception handling.
// Every CFG-changing optimizations should come before this.
- addPass(createWebAssemblyLateEHPrepare());
+ if (TM->Options.ExceptionModel == ExceptionHandling::Wasm)
+ addPass(createWebAssemblyLateEHPrepare());
// Now that we have a prologue and epilogue and all frame indices are
// rewritten, eliminate SP and FP. This allows them to be stackified,
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
index dd5b39773313..29e968bfe8eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -33,6 +33,7 @@ public:
~WebAssemblyTargetMachine() override;
+ const WebAssemblySubtarget *getSubtargetImpl() const;
const WebAssemblySubtarget *getSubtargetImpl(std::string CPU,
std::string FS) const;
const WebAssemblySubtarget *
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 28703a2787e0..be1cfbaef3e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -84,3 +84,21 @@ unsigned WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return Cost;
}
+
+bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ // Allow inlining only when the Callee has a subset of the Caller's
+ // features. In principle, we should be able to inline regardless of any
+ // features because WebAssembly supports features at module granularity, not
+ // function granularity, but without this restriction it would be possible for
+ // a module to "forget" about features if all the functions that used them
+ // were inlined.
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ return (CallerBits & CalleeBits) == CalleeBits;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 79588a9f5669..41e358c159b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -67,6 +67,9 @@ public:
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
/// @}
+
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index bc2bb4fd6935..f8fb57d8a461 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -15,6 +15,7 @@
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/MC/MCContext.h"
using namespace llvm;
const char *const WebAssembly::ClangCallTerminateFn = "__clang_call_terminate";
@@ -96,3 +97,35 @@ const MachineOperand &WebAssembly::getCalleeOp(const MachineInstr &MI) {
llvm_unreachable("Not a call instruction");
}
}
+
+MCSymbolWasm *
+WebAssembly::getOrCreateFunctionTableSymbol(MCContext &Ctx,
+ const StringRef &Name) {
+ // FIXME: Duplicates functionality from
+ // MC/WasmObjectWriter::recordRelocation.
+ MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
+ if (Sym) {
+ if (!Sym->isFunctionTable())
+ Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
+ } else {
+ Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
+ Sym->setFunctionTable();
+ // The default function table is synthesized by the linker.
+ Sym->setUndefined();
+ }
+ return Sym;
+}
+
+// Find a catch instruction from an EH pad.
+MachineInstr *WebAssembly::findCatch(MachineBasicBlock *EHPad) {
+ assert(EHPad->isEHPad());
+ auto Pos = EHPad->begin();
+ // Skip any label or debug instructions. Also skip 'end' marker instructions
+ // that may exist after marker placement in CFGStackify.
+ while (Pos != EHPad->end() &&
+ (Pos->isLabel() || Pos->isDebugInstr() || isMarker(Pos->getOpcode())))
+ Pos++;
+ if (Pos != EHPad->end() && WebAssembly::isCatch(Pos->getOpcode()))
+ return &*Pos;
+ return nullptr;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
index 4f0ed43a2481..41ad7869cf46 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -15,10 +15,14 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H
-#include "llvm/CodeGen/MachineBasicBlock.h"
-
namespace llvm {
+class MachineBasicBlock;
+class MachineInstr;
+class MachineOperand;
+class MCContext;
+class MCSymbolWasm;
+class StringRef;
class WebAssemblyFunctionInfo;
namespace WebAssembly {
@@ -33,21 +37,19 @@ extern const char *const CxaRethrowFn;
extern const char *const StdTerminateFn;
extern const char *const PersonalityWrapperFn;
-/// Return the "bottom" block of an entity, which can be either a MachineLoop or
-/// WebAssemblyException. This differs from MachineLoop::getBottomBlock in that
-/// it works even if the entity is discontiguous.
-template <typename T> MachineBasicBlock *getBottom(const T *Unit) {
- MachineBasicBlock *Bottom = Unit->getHeader();
- for (MachineBasicBlock *MBB : Unit->blocks())
- if (MBB->getNumber() > Bottom->getNumber())
- Bottom = MBB;
- return Bottom;
-}
-
/// Returns the operand number of a callee, assuming the argument is a call
/// instruction.
const MachineOperand &getCalleeOp(const MachineInstr &MI);
+/// Returns the operand number of a callee, assuming the argument is a call
+/// instruction.
+MCSymbolWasm *getOrCreateFunctionTableSymbol(MCContext &Ctx,
+ const StringRef &Name);
+
+/// Find a catch instruction from an EH pad. Returns null if no catch
+/// instruction found or the catch is in an invalid location.
+MachineInstr *findCatch(MachineBasicBlock *EHPad);
+
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a3014b2aba92..9d9a20183f0f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -32,6 +32,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -56,37 +57,53 @@ static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
namespace {
static const char OpPrecedence[] = {
- 0, // IC_OR
- 1, // IC_XOR
- 2, // IC_AND
- 3, // IC_LSHIFT
- 3, // IC_RSHIFT
- 4, // IC_PLUS
- 4, // IC_MINUS
- 5, // IC_MULTIPLY
- 5, // IC_DIVIDE
- 5, // IC_MOD
- 6, // IC_NOT
- 7, // IC_NEG
- 8, // IC_RPAREN
- 9, // IC_LPAREN
- 0, // IC_IMM
- 0 // IC_REGISTER
+ 0, // IC_OR
+ 1, // IC_XOR
+ 2, // IC_AND
+ 4, // IC_LSHIFT
+ 4, // IC_RSHIFT
+ 5, // IC_PLUS
+ 5, // IC_MINUS
+ 6, // IC_MULTIPLY
+ 6, // IC_DIVIDE
+ 6, // IC_MOD
+ 7, // IC_NOT
+ 8, // IC_NEG
+ 9, // IC_RPAREN
+ 10, // IC_LPAREN
+ 0, // IC_IMM
+ 0, // IC_REGISTER
+ 3, // IC_EQ
+ 3, // IC_NE
+ 3, // IC_LT
+ 3, // IC_LE
+ 3, // IC_GT
+ 3 // IC_GE
};
class X86AsmParser : public MCTargetAsmParser {
ParseInstructionInfo *InstInfo;
bool Code16GCC;
+ unsigned ForcedDataPrefix = 0;
enum VEXEncoding {
VEXEncoding_Default,
VEXEncoding_VEX,
+ VEXEncoding_VEX2,
VEXEncoding_VEX3,
VEXEncoding_EVEX,
};
VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
+ enum DispEncoding {
+ DispEncoding_Default,
+ DispEncoding_Disp8,
+ DispEncoding_Disp32,
+ };
+
+ DispEncoding ForcedDispEncoding = DispEncoding_Default;
+
private:
SMLoc consumeToken() {
MCAsmParser &Parser = getParser();
@@ -132,7 +149,13 @@ private:
IC_RPAREN,
IC_LPAREN,
IC_IMM,
- IC_REGISTER
+ IC_REGISTER,
+ IC_EQ,
+ IC_NE,
+ IC_LT,
+ IC_LE,
+ IC_GT,
+ IC_GE
};
enum IntelOperatorKind {
@@ -142,12 +165,19 @@ private:
IOK_TYPE,
};
+ enum MasmOperatorKind {
+ MOK_INVALID = 0,
+ MOK_LENGTHOF,
+ MOK_SIZEOF,
+ MOK_TYPE,
+ };
+
class InfixCalculator {
typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
SmallVector<ICToken, 4> PostfixStack;
- bool isUnaryOperator(const InfixCalculatorTok Op) {
+ bool isUnaryOperator(InfixCalculatorTok Op) const {
return Op == IC_NEG || Op == IC_NOT;
}
@@ -314,6 +344,44 @@ private:
Val = Op1.second >> Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
+ case IC_EQ:
+ assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Equals operation with an immediate and a register!");
+ Val = (Op1.second == Op2.second) ? -1 : 0;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
+ case IC_NE:
+ assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Not-equals operation with an immediate and a register!");
+ Val = (Op1.second != Op2.second) ? -1 : 0;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
+ case IC_LT:
+ assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Less-than operation with an immediate and a register!");
+ Val = (Op1.second < Op2.second) ? -1 : 0;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
+ case IC_LE:
+ assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Less-than-or-equal operation with an immediate and a "
+ "register!");
+ Val = (Op1.second <= Op2.second) ? -1 : 0;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
+ case IC_GT:
+ assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Greater-than operation with an immediate and a register!");
+ Val = (Op1.second > Op2.second) ? -1 : 0;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
+ case IC_GE:
+ assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Greater-than-or-equal operation with an immediate and a "
+ "register!");
+ Val = (Op1.second >= Op2.second) ? -1 : 0;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
}
}
}
@@ -327,6 +395,12 @@ private:
IES_OR,
IES_XOR,
IES_AND,
+ IES_EQ,
+ IES_NE,
+ IES_LT,
+ IES_LE,
+ IES_GT,
+ IES_GE,
IES_LSHIFT,
IES_RSHIFT,
IES_PLUS,
@@ -359,7 +433,7 @@ private:
bool MemExpr;
bool OffsetOperator;
SMLoc OffsetOperatorLoc;
- StringRef CurType;
+ AsmTypeInfo CurType;
bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
if (Sym) {
@@ -378,22 +452,25 @@ private:
MemExpr(false), OffsetOperator(false) {}
void addImm(int64_t imm) { Imm += imm; }
- short getBracCount() { return BracCount; }
- bool isMemExpr() { return MemExpr; }
- bool isOffsetOperator() { return OffsetOperator; }
- SMLoc getOffsetLoc() { return OffsetOperatorLoc; }
- unsigned getBaseReg() { return BaseReg; }
- unsigned getIndexReg() { return IndexReg; }
- unsigned getScale() { return Scale; }
- const MCExpr *getSym() { return Sym; }
- StringRef getSymName() { return SymName; }
- StringRef getType() { return CurType; }
+ short getBracCount() const { return BracCount; }
+ bool isMemExpr() const { return MemExpr; }
+ bool isOffsetOperator() const { return OffsetOperator; }
+ SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
+ unsigned getBaseReg() const { return BaseReg; }
+ unsigned getIndexReg() const { return IndexReg; }
+ unsigned getScale() const { return Scale; }
+ const MCExpr *getSym() const { return Sym; }
+ StringRef getSymName() const { return SymName; }
+ StringRef getType() const { return CurType.Name; }
+ unsigned getSize() const { return CurType.Size; }
+ unsigned getElementSize() const { return CurType.ElementSize; }
+ unsigned getLength() const { return CurType.Length; }
int64_t getImm() { return Imm + IC.execute(); }
- bool isValidEndState() {
+ bool isValidEndState() const {
return State == IES_RBRAC || State == IES_INTEGER;
}
- bool hadError() { return State == IES_ERROR; }
- InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; }
+ bool hadError() const { return State == IES_ERROR; }
+ const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
void onOr() {
IntelExprState CurrState = State;
@@ -440,6 +517,96 @@ private:
}
PrevState = CurrState;
}
+ void onEq() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ case IES_REGISTER:
+ State = IES_EQ;
+ IC.pushOperator(IC_EQ);
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onNE() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ case IES_REGISTER:
+ State = IES_NE;
+ IC.pushOperator(IC_NE);
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onLT() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ case IES_REGISTER:
+ State = IES_LT;
+ IC.pushOperator(IC_LT);
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onLE() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ case IES_REGISTER:
+ State = IES_LE;
+ IC.pushOperator(IC_LE);
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onGT() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ case IES_REGISTER:
+ State = IES_GT;
+ IC.pushOperator(IC_GT);
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onGE() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ case IES_REGISTER:
+ State = IES_GE;
+ IC.pushOperator(IC_GE);
+ break;
+ }
+ PrevState = CurrState;
+ }
void onLShift() {
IntelExprState CurrState = State;
switch (State) {
@@ -510,6 +677,12 @@ private:
case IES_OR:
case IES_XOR:
case IES_AND:
+ case IES_EQ:
+ case IES_NE:
+ case IES_LT:
+ case IES_LE:
+ case IES_GT:
+ case IES_GE:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_PLUS:
@@ -565,6 +738,12 @@ private:
case IES_OR:
case IES_XOR:
case IES_AND:
+ case IES_EQ:
+ case IES_NE:
+ case IES_LT:
+ case IES_LE:
+ case IES_GT:
+ case IES_GE:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_PLUS:
@@ -620,7 +799,8 @@ private:
}
bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
const InlineAsmIdentifierInfo &IDInfo,
- bool ParsingMSInlineAsm, StringRef &ErrMsg) {
+ const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
+ StringRef &ErrMsg) {
// InlineAsm: Treat an enum value as an integer
if (ParsingMSInlineAsm)
if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
@@ -639,6 +819,7 @@ private:
case IES_NOT:
case IES_INIT:
case IES_LBRAC:
+ case IES_LPAREN:
if (setSymRef(SymRef, SymRefName, ErrMsg))
return true;
MemExpr = true;
@@ -646,6 +827,7 @@ private:
IC.pushOperand(IC_IMM);
if (ParsingMSInlineAsm)
Info = IDInfo;
+ setTypeInfo(Type);
break;
}
return false;
@@ -662,6 +844,12 @@ private:
case IES_OR:
case IES_XOR:
case IES_AND:
+ case IES_EQ:
+ case IES_NE:
+ case IES_LT:
+ case IES_LE:
+ case IES_GT:
+ case IES_GE:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_DIVIDE:
@@ -744,6 +932,8 @@ private:
case IES_RPAREN:
State = IES_PLUS;
IC.pushOperator(IC_PLUS);
+ CurType.Length = 1;
+ CurType.Size = CurType.ElementSize;
break;
case IES_INIT:
case IES_CAST:
@@ -796,6 +986,12 @@ private:
case IES_OR:
case IES_XOR:
case IES_AND:
+ case IES_EQ:
+ case IES_NE:
+ case IES_LT:
+ case IES_LE:
+ case IES_GT:
+ case IES_GE:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_MULTIPLY:
@@ -827,8 +1023,8 @@ private:
}
}
bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
- const InlineAsmIdentifierInfo &IDInfo, bool ParsingMSInlineAsm,
- StringRef &ErrMsg) {
+ const InlineAsmIdentifierInfo &IDInfo,
+ bool ParsingMSInlineAsm, StringRef &ErrMsg) {
PrevState = State;
switch (State) {
default:
@@ -852,19 +1048,19 @@ private:
}
return false;
}
- void onCast(StringRef Type) {
+ void onCast(AsmTypeInfo Info) {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_LPAREN:
- setType(Type);
+ setTypeInfo(Info);
State = IES_CAST;
break;
}
}
- void setType(StringRef Type) { CurType = Type; }
+ void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
};
bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
@@ -878,11 +1074,6 @@ private:
return Parser.Error(L, Msg, Range);
}
- std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) {
- Error(Loc, Msg, R);
- return nullptr;
- }
-
bool MatchRegisterByName(unsigned &RegNo, StringRef RegName, SMLoc StartLoc,
SMLoc EndLoc);
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
@@ -898,17 +1089,21 @@ private:
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
OperandVector &FinalOperands);
- std::unique_ptr<X86Operand> ParseOperand();
- std::unique_ptr<X86Operand> ParseATTOperand();
- std::unique_ptr<X86Operand> ParseIntelOperand();
+ bool ParseOperand(OperandVector &Operands);
+ bool ParseATTOperand(OperandVector &Operands);
+ bool ParseIntelOperand(OperandVector &Operands);
bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
InlineAsmIdentifierInfo &Info, SMLoc &End);
bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
- std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start);
+ unsigned IdentifyMasmOperator(StringRef Name);
+ bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
+ bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
bool &ParseError, SMLoc &End);
+ bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
+ bool &ParseError, SMLoc &End);
void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
SMLoc End);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
@@ -917,20 +1112,21 @@ private:
bool IsUnevaluatedOperand, SMLoc &End,
bool IsParsingOffsetOperator = false);
- std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
- const MCExpr *&Disp,
- const SMLoc &StartLoc,
- SMLoc &EndLoc);
+ bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
+ SMLoc EndLoc, OperandVector &Operands);
X86::CondCode ParseConditionCode(StringRef CCode);
bool ParseIntelMemoryOperandSize(unsigned &Size);
- std::unique_ptr<X86Operand>
- CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
- unsigned IndexReg, unsigned Scale, SMLoc Start,
- SMLoc End, unsigned Size, StringRef Identifier,
- const InlineAsmIdentifierInfo &Info);
-
+ bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
+ unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale, SMLoc Start, SMLoc End,
+ unsigned Size, StringRef Identifier,
+ const InlineAsmIdentifierInfo &Info,
+ OperandVector &Operands);
+
+ bool parseDirectiveArch();
+ bool parseDirectiveNops(SMLoc L);
bool parseDirectiveEven(SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
@@ -942,7 +1138,6 @@ private:
bool parseDirectiveFPOStackAlign(SMLoc L);
bool parseDirectiveFPOEndPrologue(SMLoc L);
bool parseDirectiveFPOEndProc(SMLoc L);
- bool parseDirectiveFPOData(SMLoc L);
/// SEH directives.
bool parseSEHRegisterNumber(unsigned RegClassID, unsigned &RegNo);
@@ -992,8 +1187,7 @@ private:
/// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
/// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
/// return false if no parsing errors occurred, true otherwise.
- bool HandleAVX512Operand(OperandVector &Operands,
- const MCParsedAsmOperand &Op);
+ bool HandleAVX512Operand(OperandVector &Operands);
bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
@@ -1188,8 +1382,6 @@ bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName,
// FIXME: This should be done using Requires<Not64BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
// checked.
- // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
- // REX prefix.
if (RegNo == X86::RIZ || RegNo == X86::RIP ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
@@ -1524,16 +1716,17 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
return false;
}
-std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
+bool X86AsmParser::ParseOperand(OperandVector &Operands) {
if (isParsingIntelSyntax())
- return ParseIntelOperand();
- return ParseATTOperand();
+ return ParseIntelOperand(Operands);
+
+ return ParseATTOperand(Operands);
}
-std::unique_ptr<X86Operand> X86AsmParser::CreateMemForMSInlineAsm(
+bool X86AsmParser::CreateMemForMSInlineAsm(
unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
- const InlineAsmIdentifierInfo &Info) {
+ const InlineAsmIdentifierInfo &Info, OperandVector &Operands) {
// If we found a decl other than a VarDecl, then assume it is a FuncDecl or
// some other label reference.
if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
@@ -1545,8 +1738,10 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForMSInlineAsm(
}
// Create an absolute memory reference in order to match against
// instructions taking a PC relative operand.
- return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
- Identifier, Info.Label.Decl);
+ Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
+ End, Size, Identifier,
+ Info.Label.Decl));
+ return false;
}
// We either have a direct symbol reference, or an offset from a symbol. The
// parser always puts the symbol on the LHS, so look there for size
@@ -1563,17 +1758,19 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForMSInlineAsm(
// It is widely common for MS InlineAsm to use a global variable and one/two
// registers in a mmory expression, and though unaccessible via rip/eip.
if (IsGlobalLV && (BaseReg || IndexReg)) {
- return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End);
+ Operands.push_back(
+ X86Operand::CreateMem(getPointerWidth(), Disp, Start, End));
+ return false;
+ }
// Otherwise, we set the base register to a non-zero value
// if we don't know the actual value at this time. This is necessary to
// get the matching correct in some cases.
- } else {
- BaseReg = BaseReg ? BaseReg : 1;
- return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
- IndexReg, Scale, Start, End, Size,
- /*DefaultBaseReg=*/X86::RIP, Identifier, Decl,
- FrontendSize);
- }
+ BaseReg = BaseReg ? BaseReg : 1;
+ Operands.push_back(X86Operand::CreateMem(
+ getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
+ Size,
+ /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
+ return false;
}
// Some binary bitwise operators have a named synonymous
@@ -1582,8 +1779,10 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForMSInlineAsm(
bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
IntelExprStateMachine &SM,
bool &ParseError, SMLoc &End) {
- // A named operator should be either lower or upper case, but not a mix
- if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
+ // A named operator should be either lower or upper case, but not a mix...
+ // except in MASM, which uses full case-insensitivity.
+ if (Name.compare(Name.lower()) && Name.compare(Name.upper()) &&
+ !getParser().isParsingMasm())
return false;
if (Name.equals_lower("not")) {
SM.onNot();
@@ -1619,15 +1818,39 @@ bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
End = consumeToken();
return true;
}
+bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
+ IntelExprStateMachine &SM,
+ bool &ParseError, SMLoc &End) {
+ if (Name.equals_lower("eq")) {
+ SM.onEq();
+ } else if (Name.equals_lower("ne")) {
+ SM.onNE();
+ } else if (Name.equals_lower("lt")) {
+ SM.onLT();
+ } else if (Name.equals_lower("le")) {
+ SM.onLE();
+ } else if (Name.equals_lower("gt")) {
+ SM.onGT();
+ } else if (Name.equals_lower("ge")) {
+ SM.onGE();
+ } else {
+ return false;
+ }
+ End = consumeToken();
+ return true;
+}
bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
StringRef ErrMsg;
AsmToken::TokenKind PrevTK = AsmToken::Error;
bool Done = false;
while (!Done) {
+ // Get a fresh reference on each loop iteration in case the previous
+ // iteration moved the token storage during UnLex().
+ const AsmToken &Tok = Parser.getTok();
+
bool UpdateLocLex = true;
AsmToken::TokenKind TK = getLexer().getKind();
@@ -1636,6 +1859,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if ((Done = SM.isValidEndState()))
break;
return Error(Tok.getLoc(), "unknown token in expression");
+ case AsmToken::Error:
+ return Error(getLexer().getErrLoc(), getLexer().getErr());
+ break;
case AsmToken::EndOfStatement:
Done = true;
break;
@@ -1645,18 +1871,73 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if (ParseIntelDotOperator(SM, End))
return true;
break;
+ case AsmToken::Dot:
+ if (!Parser.isParsingMasm()) {
+ if ((Done = SM.isValidEndState()))
+ break;
+ return Error(Tok.getLoc(), "unknown token in expression");
+ }
+ // MASM allows spaces around the dot operator (e.g., "var . x")
+ Lex();
+ UpdateLocLex = false;
+ if (ParseIntelDotOperator(SM, End))
+ return true;
+ break;
+ case AsmToken::Dollar:
+ if (!Parser.isParsingMasm()) {
+ if ((Done = SM.isValidEndState()))
+ break;
+ return Error(Tok.getLoc(), "unknown token in expression");
+ }
+ LLVM_FALLTHROUGH;
+ case AsmToken::String: {
+ if (Parser.isParsingMasm()) {
+ // MASM parsers handle strings in expressions as constants.
+ SMLoc ValueLoc = Tok.getLoc();
+ int64_t Res;
+ const MCExpr *Val;
+ if (Parser.parsePrimaryExpr(Val, End, nullptr))
+ return true;
+ UpdateLocLex = false;
+ if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
+ return Error(ValueLoc, "expected absolute value");
+ if (SM.onInteger(Res, ErrMsg))
+ return Error(ValueLoc, ErrMsg);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ }
case AsmToken::At:
- case AsmToken::String:
case AsmToken::Identifier: {
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
UpdateLocLex = false;
+ if (Parser.isParsingMasm()) {
+ size_t DotOffset = Identifier.find_first_of('.');
+ if (DotOffset != StringRef::npos) {
+ consumeToken();
+ StringRef LHS = Identifier.slice(0, DotOffset);
+ StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
+ StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
+ if (!RHS.empty()) {
+ getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
+ }
+ getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
+ if (!LHS.empty()) {
+ getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
+ }
+ break;
+ }
+ }
// (MASM only) <TYPE> PTR operator
if (Parser.isParsingMasm()) {
const AsmToken &NextTok = getLexer().peekTok();
if (NextTok.is(AsmToken::Identifier) &&
NextTok.getIdentifier().equals_lower("ptr")) {
- SM.onCast(Identifier);
+ AsmTypeInfo Info;
+ if (Parser.lookUpType(Identifier, Info))
+ return Error(Tok.getLoc(), "unknown type");
+ SM.onCast(Info);
// Eat type and PTR.
consumeToken();
End = consumeToken();
@@ -1681,16 +1962,15 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if (SM.onRegister(Reg, ErrMsg))
return Error(IdentLoc, ErrMsg);
- StringRef Type;
- unsigned Offset = 0;
+ AsmFieldInfo Info;
SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
- if (Parser.lookUpField(Field, Type, Offset))
+ if (Parser.lookUpField(Field, Info))
return Error(FieldStartLoc, "unknown offset");
else if (SM.onPlus(ErrMsg))
return Error(getTok().getLoc(), ErrMsg);
- else if (SM.onInteger(Offset, ErrMsg))
+ else if (SM.onInteger(Info.Offset, ErrMsg))
return Error(IdentLoc, ErrMsg);
- SM.setType(Type);
+ SM.setTypeInfo(Info.Type);
End = consumeToken();
break;
@@ -1704,8 +1984,15 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return true;
break;
}
+ if (Parser.isParsingMasm() &&
+ ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
+ if (ParseError)
+ return true;
+ break;
+ }
// Symbol reference, when parsing assembly content
InlineAsmIdentifierInfo Info;
+ AsmFieldInfo FieldInfo;
const MCExpr *Val;
if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
// MS Dot Operator expression
@@ -1722,8 +2009,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
if (SM.onInteger(Val, ErrMsg))
return Error(IdentLoc, ErrMsg);
- } else
+ } else {
return true;
+ }
break;
}
// MS InlineAsm identifier
@@ -1732,13 +2020,49 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(IdentLoc, "expected identifier");
if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
return true;
- else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
+ else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
+ true, ErrMsg))
return Error(IdentLoc, ErrMsg);
break;
}
- if (getParser().parsePrimaryExpr(Val, End)) {
+ if (Parser.isParsingMasm()) {
+ if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
+ int64_t Val;
+ if (ParseMasmOperator(OpKind, Val))
+ return true;
+ if (SM.onInteger(Val, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
+ break;
+ }
+ if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
+ // Field offset immediate; <TYPE>.<field specification>
+ Lex(); // eat type
+ bool EndDot = parseOptionalToken(AsmToken::Dot);
+ while (EndDot || (getTok().is(AsmToken::Identifier) &&
+ getTok().getString().startswith("."))) {
+ getParser().parseIdentifier(Identifier);
+ if (!EndDot)
+ Identifier.consume_front(".");
+ EndDot = Identifier.consume_back(".");
+ if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
+ FieldInfo)) {
+ SMLoc IDEnd =
+ SMLoc::getFromPointer(Identifier.data() + Identifier.size());
+ return Error(IdentLoc, "Unable to lookup field reference!",
+ SMRange(IdentLoc, IDEnd));
+ }
+ if (!EndDot)
+ EndDot = parseOptionalToken(AsmToken::Dot);
+ }
+ if (SM.onInteger(FieldInfo.Offset, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
+ break;
+ }
+ }
+ if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
return Error(Tok.getLoc(), "Unexpected identifier!");
- } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
+ } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
+ false, ErrMsg)) {
return Error(IdentLoc, ErrMsg);
}
break;
@@ -1761,8 +2085,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(Loc, "invalid reference to undefined symbol");
StringRef Identifier = Sym->getName();
InlineAsmIdentifierInfo Info;
- if (SM.onIdentifierExpr(Val, Identifier, Info, isParsingMSInlineAsm(),
- ErrMsg))
+ AsmTypeInfo Type;
+ if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
+ isParsingMSInlineAsm(), ErrMsg))
return Error(Loc, ErrMsg);
End = consumeToken();
} else {
@@ -1904,14 +2229,13 @@ bool X86AsmParser::ParseIntelInlineAsmIdentifier(
}
//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
-std::unique_ptr<X86Operand>
-X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
+bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
// Eat "{" and mark the current place.
const SMLoc consumedToken = consumeToken();
if (Tok.isNot(AsmToken::Identifier))
- return ErrorOperand(Tok.getLoc(), "Expected an identifier after {");
+ return Error(Tok.getLoc(), "Expected an identifier after {");
if (Tok.getIdentifier().startswith("r")){
int rndMode = StringSwitch<int>(Tok.getIdentifier())
.Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
@@ -1920,67 +2244,76 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
.Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
.Default(-1);
if (-1 == rndMode)
- return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
+ return Error(Tok.getLoc(), "Invalid rounding mode.");
Parser.Lex(); // Eat "r*" of r*-sae
if (!getLexer().is(AsmToken::Minus))
- return ErrorOperand(Tok.getLoc(), "Expected - at this point");
+ return Error(Tok.getLoc(), "Expected - at this point");
Parser.Lex(); // Eat "-"
Parser.Lex(); // Eat the sae
if (!getLexer().is(AsmToken::RCurly))
- return ErrorOperand(Tok.getLoc(), "Expected } at this point");
+ return Error(Tok.getLoc(), "Expected } at this point");
SMLoc End = Tok.getEndLoc();
Parser.Lex(); // Eat "}"
const MCExpr *RndModeOp =
MCConstantExpr::create(rndMode, Parser.getContext());
- return X86Operand::CreateImm(RndModeOp, Start, End);
+ Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
+ return false;
}
if(Tok.getIdentifier().equals("sae")){
Parser.Lex(); // Eat the sae
if (!getLexer().is(AsmToken::RCurly))
- return ErrorOperand(Tok.getLoc(), "Expected } at this point");
+ return Error(Tok.getLoc(), "Expected } at this point");
Parser.Lex(); // Eat "}"
- return X86Operand::CreateToken("{sae}", consumedToken);
+ Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
+ return false;
}
- return ErrorOperand(Tok.getLoc(), "unknown token in expression");
+ return Error(Tok.getLoc(), "unknown token in expression");
}
/// Parse the '.' operator.
bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
SMLoc &End) {
const AsmToken &Tok = getTok();
- StringRef Type;
- unsigned Offset = 0;
+ AsmFieldInfo Info;
// Drop the optional '.'.
StringRef DotDispStr = Tok.getString();
if (DotDispStr.startswith("."))
DotDispStr = DotDispStr.drop_front(1);
+ StringRef TrailingDot;
// .Imm gets lexed as a real.
if (Tok.is(AsmToken::Real)) {
APInt DotDisp;
DotDispStr.getAsInteger(10, DotDisp);
- Offset = DotDisp.getZExtValue();
+ Info.Offset = DotDisp.getZExtValue();
} else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
Tok.is(AsmToken::Identifier)) {
+ if (DotDispStr.endswith(".")) {
+ TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
+ DotDispStr = DotDispStr.drop_back(1);
+ }
const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
const StringRef Base = BaseMember.first, Member = BaseMember.second;
- if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) &&
- getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) &&
- getParser().lookUpField(DotDispStr, Type, Offset) &&
+ if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
+ getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
+ getParser().lookUpField(DotDispStr, Info) &&
(!SemaCallback ||
- SemaCallback->LookupInlineAsmField(Base, Member, Offset)))
+ SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
return Error(Tok.getLoc(), "Unable to lookup field reference!");
- } else
+ } else {
return Error(Tok.getLoc(), "Unexpected token type!");
+ }
// Eat the DotExpression and update End
End = SMLoc::getFromPointer(DotDispStr.data());
const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
while (Tok.getLoc().getPointer() < DotExprEndLoc)
Lex();
- SM.addImm(Offset);
- SM.setType(Type);
+ if (!TrailingDot.empty())
+ getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
+ SM.addImm(Info.Offset);
+ SM.setTypeInfo(Info.Type);
return false;
}
@@ -1995,7 +2328,7 @@ bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
if (!isParsingMSInlineAsm()) {
if ((getTok().isNot(AsmToken::Identifier) &&
getTok().isNot(AsmToken::String)) ||
- getParser().parsePrimaryExpr(Val, End))
+ getParser().parsePrimaryExpr(Val, End, nullptr))
return Error(Start, "unexpected token!");
} else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
return Error(Start, "unable to lookup expression");
@@ -2031,7 +2364,7 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
- /*Unevaluated=*/true, End))
+ /*IsUnevaluatedOperand=*/true, End))
return 0;
if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
@@ -2050,6 +2383,73 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
return CVal;
}
+// Query a candidate string for being an Intel assembly operator
+// Report back its kind, or IOK_INVALID if does not evaluated as a known one
+unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
+ return StringSwitch<unsigned>(Name.lower())
+ .Case("type", MOK_TYPE)
+ .Cases("size", "sizeof", MOK_SIZEOF)
+ .Cases("length", "lengthof", MOK_LENGTHOF)
+ .Default(MOK_INVALID);
+}
+
+/// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
+/// returns the number of elements in an array. It returns the value 1 for
+/// non-array variables. The SIZEOF operator returns the size of a type or
+/// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
+/// The TYPE operator returns the size of a variable. If the variable is an
+/// array, TYPE returns the size of a single element.
+bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
+ MCAsmParser &Parser = getParser();
+ SMLoc OpLoc = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat operator.
+
+ Val = 0;
+ if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
+ // Check for SIZEOF(<type>) and TYPE(<type>).
+ bool InParens = Parser.getTok().is(AsmToken::LParen);
+ const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
+ AsmTypeInfo Type;
+ if (IDTok.is(AsmToken::Identifier) &&
+ !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
+ Val = Type.Size;
+
+ // Eat tokens.
+ if (InParens)
+ parseToken(AsmToken::LParen);
+ parseToken(AsmToken::Identifier);
+ if (InParens)
+ parseToken(AsmToken::RParen);
+ }
+ }
+
+ if (!Val) {
+ IntelExprStateMachine SM;
+ SMLoc End, Start = Parser.getTok().getLoc();
+ if (ParseIntelExpression(SM, End))
+ return true;
+
+ switch (OpKind) {
+ default:
+ llvm_unreachable("Unexpected operand kind!");
+ case MOK_SIZEOF:
+ Val = SM.getSize();
+ break;
+ case MOK_LENGTHOF:
+ Val = SM.getLength();
+ break;
+ case MOK_TYPE:
+ Val = SM.getElementSize();
+ break;
+ }
+
+ if (!Val)
+ return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
+ }
+
+ return false;
+}
+
bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
Size = StringSwitch<unsigned>(getTok().getString())
.Cases("BYTE", "byte", 8)
@@ -2076,7 +2476,7 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
return false;
}
-std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
+bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
@@ -2084,28 +2484,31 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
// Parse optional Size directive.
unsigned Size;
if (ParseIntelMemoryOperandSize(Size))
- return nullptr;
+ return true;
bool PtrInOperand = bool(Size);
Start = Tok.getLoc();
// Rounding mode operand.
if (getLexer().is(AsmToken::LCurly))
- return ParseRoundingModeOp(Start);
+ return ParseRoundingModeOp(Start, Operands);
// Register operand.
unsigned RegNo = 0;
if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
if (RegNo == X86::RIP)
- return ErrorOperand(Start, "rip can only be used as a base register");
+ return Error(Start, "rip can only be used as a base register");
// A Register followed by ':' is considered a segment override
- if (Tok.isNot(AsmToken::Colon))
- return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) :
- ErrorOperand(Start, "expected memory operand after 'ptr', "
+ if (Tok.isNot(AsmToken::Colon)) {
+ if (PtrInOperand)
+ return Error(Start, "expected memory operand after 'ptr', "
"found register operand instead");
+ Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
+ return false;
+ }
// An alleged segment override. check if we have a valid segment register
if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
- return ErrorOperand(Start, "invalid segment register");
+ return Error(Start, "invalid segment register");
// Eat ':' and update Start location
Start = Lex().getLoc();
}
@@ -2113,7 +2516,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
// Immediates and Memory
IntelExprStateMachine SM;
if (ParseIntelExpression(SM, End))
- return nullptr;
+ return true;
if (isParsingMSInlineAsm())
RewriteIntelExpression(SM, Start, Tok.getLoc());
@@ -2130,22 +2533,27 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
// and we are parsing a segment override
if (!SM.isMemExpr() && !RegNo) {
if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
- const InlineAsmIdentifierInfo Info = SM.getIdentifierInfo();
+ const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
// Disp includes the address of a variable; make sure this is recorded
// for later handling.
- return X86Operand::CreateImm(Disp, Start, End, SM.getSymName(),
- Info.Var.Decl, Info.Var.IsGlobalLV);
+ Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
+ SM.getSymName(), Info.Var.Decl,
+ Info.Var.IsGlobalLV));
+ return false;
}
}
- return X86Operand::CreateImm(Disp, Start, End);
+ Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
+ return false;
}
StringRef ErrMsg;
unsigned BaseReg = SM.getBaseReg();
unsigned IndexReg = SM.getIndexReg();
unsigned Scale = SM.getScale();
+ if (!PtrInOperand)
+ Size = SM.getElementSize() << 3;
if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
(IndexReg == X86::ESP || IndexReg == X86::RSP))
@@ -2164,7 +2572,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
if (Scale != 0 &&
X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
- return ErrorOperand(Start, "16-bit addresses cannot have a scale");
+ return Error(Start, "16-bit addresses cannot have a scale");
// If there was no explicit scale specified, change it to 1.
if (Scale == 0)
@@ -2180,26 +2588,33 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
if ((BaseReg || IndexReg) &&
CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
ErrMsg))
- return ErrorOperand(Start, ErrMsg);
+ return Error(Start, ErrMsg);
if (isParsingMSInlineAsm())
return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start,
End, Size, SM.getSymName(),
- SM.getIdentifierInfo());
+ SM.getIdentifierInfo(), Operands);
// When parsing x64 MS-style assembly, all memory operands default to
// RIP-relative when interpreted as non-absolute references.
- if (Parser.isParsingMasm() && is64BitMode())
- return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, BaseReg,
- IndexReg, Scale, Start, End, Size,
- /*DefaultBaseReg=*/X86::RIP);
-
- if (!(BaseReg || IndexReg || RegNo))
- return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
- return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
- BaseReg, IndexReg, Scale, Start, End, Size);
+ if (Parser.isParsingMasm() && is64BitMode()) {
+ Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
+ BaseReg, IndexReg, Scale, Start,
+ End, Size,
+ /*DefaultBaseReg=*/X86::RIP));
+ return false;
+ }
+
+ if ((BaseReg || IndexReg || RegNo))
+ Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
+ BaseReg, IndexReg, Scale, Start,
+ End, Size));
+ else
+ Operands.push_back(
+ X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size));
+ return false;
}
-std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
+bool X86AsmParser::ParseATTOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
case AsmToken::Dollar: {
@@ -2214,12 +2629,13 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
"expected immediate expression") ||
getParser().parseExpression(Val, End) ||
check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
- return nullptr;
- return X86Operand::CreateImm(Val, Start, End);
+ return true;
+ Operands.push_back(X86Operand::CreateImm(Val, Start, End));
+ return false;
}
case AsmToken::LCurly: {
SMLoc Start = Parser.getTok().getLoc();
- return ParseRoundingModeOp(Start);
+ return ParseRoundingModeOp(Start, Operands);
}
default: {
// This a memory operand or a register. We have some parsing complications
@@ -2233,7 +2649,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
if (getLexer().isNot(AsmToken::LParen)) {
// No '(' so this is either a displacement expression or a register.
if (Parser.parseExpression(Expr, EndLoc))
- return nullptr;
+ return true;
if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
// Segment Register. Reset Expr and copy value to register.
Expr = nullptr;
@@ -2241,21 +2657,27 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
// Sanity check register.
if (Reg == X86::EIZ || Reg == X86::RIZ)
- return ErrorOperand(
+ return Error(
Loc, "%eiz and %riz can only be used as index registers",
SMRange(Loc, EndLoc));
if (Reg == X86::RIP)
- return ErrorOperand(Loc, "%rip can only be used as a base register",
- SMRange(Loc, EndLoc));
+ return Error(Loc, "%rip can only be used as a base register",
+ SMRange(Loc, EndLoc));
// Return register that are not segment prefixes immediately.
- if (!Parser.parseOptionalToken(AsmToken::Colon))
- return X86Operand::CreateReg(Reg, Loc, EndLoc);
+ if (!Parser.parseOptionalToken(AsmToken::Colon)) {
+ Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
+ return false;
+ }
if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
- return ErrorOperand(Loc, "invalid segment register");
+ return Error(Loc, "invalid segment register");
+ // Accept a '*' absolute memory reference after the segment. Place it
+ // before the full memory operand.
+ if (getLexer().is(AsmToken::Star))
+ Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
}
}
// This is a Memory operand.
- return ParseMemOperand(Reg, Expr, Loc, EndLoc);
+ return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
}
}
}
@@ -2305,8 +2727,7 @@ bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
}
// true on failure, false otherwise
-bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
- const MCParsedAsmOperand &Op) {
+bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
if (getLexer().is(AsmToken::LCurly)) {
// Eat "{" and mark the current place.
@@ -2316,21 +2737,26 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
// Parse memory broadcasting ({1to<NUM>}).
if (getLexer().getTok().getIntVal() != 1)
return TokError("Expected 1to<NUM> at this point");
- Parser.Lex(); // Eat "1" of 1to8
- if (!getLexer().is(AsmToken::Identifier) ||
- !getLexer().getTok().getIdentifier().startswith("to"))
+ StringRef Prefix = getLexer().getTok().getString();
+ Parser.Lex(); // Eat first token of 1to8
+ if (!getLexer().is(AsmToken::Identifier))
return TokError("Expected 1to<NUM> at this point");
// Recognize only reasonable suffixes.
+ SmallVector<char, 5> BroadcastVector;
+ StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
+ .toStringRef(BroadcastVector);
+ if (!BroadcastString.startswith("1to"))
+ return TokError("Expected 1to<NUM> at this point");
const char *BroadcastPrimitive =
- StringSwitch<const char*>(getLexer().getTok().getIdentifier())
- .Case("to2", "{1to2}")
- .Case("to4", "{1to4}")
- .Case("to8", "{1to8}")
- .Case("to16", "{1to16}")
- .Default(nullptr);
+ StringSwitch<const char *>(BroadcastString)
+ .Case("1to2", "{1to2}")
+ .Case("1to4", "{1to4}")
+ .Case("1to8", "{1to8}")
+ .Case("1to16", "{1to16}")
+ .Default(nullptr);
if (!BroadcastPrimitive)
return TokError("Invalid memory broadcast primitive.");
- Parser.Lex(); // Eat "toN" of 1toN
+ Parser.Lex(); // Eat trailing token of 1toN
if (!getLexer().is(AsmToken::RCurly))
return TokError("Expected } at this point");
Parser.Lex(); // Eat "}"
@@ -2390,10 +2816,9 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
/// has already been parsed if present. disp may be provided as well.
-std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
- const MCExpr *&Disp,
- const SMLoc &StartLoc,
- SMLoc &EndLoc) {
+bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
+ SMLoc StartLoc, SMLoc EndLoc,
+ OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc Loc;
// Based on the initial passed values, we may be in any of these cases, we are
@@ -2455,7 +2880,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
// Parse immediate if we're not at a mem operand yet.
if (!isAtMemOperand()) {
if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
- return nullptr;
+ return true;
assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
} else {
// Disp is implicitly zero if we haven't parsed it yet.
@@ -2468,9 +2893,12 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
if (!parseOptionalToken(AsmToken::LParen)) {
if (SegReg == 0)
- return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
- return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
- StartLoc, EndLoc);
+ Operands.push_back(
+ X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
+ else
+ Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
+ 0, 0, 1, StartLoc, EndLoc));
+ return false;
}
// If we reached here, then eat the '(' and Process
@@ -2484,14 +2912,13 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
if (Parser.parseExpression(E, EndLoc) ||
check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
- return nullptr;
+ return true;
// Sanity check register.
BaseReg = cast<X86MCExpr>(E)->getRegNo();
if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
- return ErrorOperand(BaseLoc,
- "eiz and riz can only be used as index registers",
- SMRange(BaseLoc, EndLoc));
+ return Error(BaseLoc, "eiz and riz can only be used as index registers",
+ SMRange(BaseLoc, EndLoc));
}
if (parseOptionalToken(AsmToken::Comma)) {
@@ -2503,14 +2930,14 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
// "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
if (getLexer().isNot(AsmToken::RParen)) {
if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
- return nullptr;
+ return true;
if (!isa<X86MCExpr>(E)) {
// We've parsed an unexpected Scale Value instead of an index
// register. Interpret it as an absolute.
int64_t ScaleVal;
if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
- return ErrorOperand(Loc, "expected absolute expression");
+ return Error(Loc, "expected absolute expression");
if (ScaleVal != 1)
Warning(Loc, "scale factor without index register is ignored");
Scale = 1;
@@ -2518,10 +2945,10 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
IndexReg = cast<X86MCExpr>(E)->getRegNo();
if (BaseReg == X86::RIP)
- return ErrorOperand(
- Loc, "%rip as base register can not have an index register");
+ return Error(Loc,
+ "%rip as base register can not have an index register");
if (IndexReg == X86::RIP)
- return ErrorOperand(Loc, "%rip is not allowed as an index register");
+ return Error(Loc, "%rip is not allowed as an index register");
if (parseOptionalToken(AsmToken::Comma)) {
// Parse the scale amount:
@@ -2532,15 +2959,14 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
int64_t ScaleVal;
if (Parser.parseTokenLoc(Loc) ||
Parser.parseAbsoluteExpression(ScaleVal))
- return ErrorOperand(Loc, "expected scale expression");
+ return Error(Loc, "expected scale expression");
Scale = (unsigned)ScaleVal;
// Validate the scale amount.
if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
Scale != 1)
- return ErrorOperand(Loc,
- "scale factor in 16-bit address must be 1");
+ return Error(Loc, "scale factor in 16-bit address must be 1");
if (checkScale(Scale, ErrMsg))
- return ErrorOperand(Loc, ErrMsg);
+ return Error(Loc, ErrMsg);
}
}
}
@@ -2549,23 +2975,30 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
- return nullptr;
+ return true;
// This is to support otherwise illegal operand (%dx) found in various
// unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
// be supported. Mark such DX variants separately fix only in special cases.
if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
- isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0)
- return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
+ isa<MCConstantExpr>(Disp) &&
+ cast<MCConstantExpr>(Disp)->getValue() == 0) {
+ Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
+ return false;
+ }
if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
ErrMsg))
- return ErrorOperand(BaseLoc, ErrMsg);
+ return Error(BaseLoc, ErrMsg);
if (SegReg || BaseReg || IndexReg)
- return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
- IndexReg, Scale, StartLoc, EndLoc);
- return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
+ Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
+ BaseReg, IndexReg, Scale, StartLoc,
+ EndLoc));
+ else
+ Operands.push_back(
+ X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
+ return false;
}
// Parse either a standard primary expression or a register.
@@ -2582,7 +3015,7 @@ bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Res = X86MCExpr::create(RegNo, Parser.getContext());
return false;
}
- return Parser.parsePrimaryExpr(Res, EndLoc);
+ return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
}
bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -2592,6 +3025,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Reset the forced VEX encoding.
ForcedVEXEncoding = VEXEncoding_Default;
+ ForcedDispEncoding = DispEncoding_Default;
// Parse pseudo prefixes.
while (1) {
@@ -2604,12 +3038,18 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return Error(Parser.getTok().getLoc(), "Expected '}'");
Parser.Lex(); // Eat curly.
- if (Prefix == "vex" || Prefix == "vex2")
+ if (Prefix == "vex")
ForcedVEXEncoding = VEXEncoding_VEX;
+ else if (Prefix == "vex2")
+ ForcedVEXEncoding = VEXEncoding_VEX2;
else if (Prefix == "vex3")
ForcedVEXEncoding = VEXEncoding_VEX3;
else if (Prefix == "evex")
ForcedVEXEncoding = VEXEncoding_EVEX;
+ else if (Prefix == "disp8")
+ ForcedDispEncoding = DispEncoding_Disp8;
+ else if (Prefix == "disp32")
+ ForcedDispEncoding = DispEncoding_Disp32;
else
return Error(NameLoc, "unknown prefix");
@@ -2626,10 +3066,36 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
continue;
}
+ // Parse MASM style pseudo prefixes.
+ if (isParsingMSInlineAsm()) {
+ if (Name.equals_lower("vex"))
+ ForcedVEXEncoding = VEXEncoding_VEX;
+ else if (Name.equals_lower("vex2"))
+ ForcedVEXEncoding = VEXEncoding_VEX2;
+ else if (Name.equals_lower("vex3"))
+ ForcedVEXEncoding = VEXEncoding_VEX3;
+ else if (Name.equals_lower("evex"))
+ ForcedVEXEncoding = VEXEncoding_EVEX;
+ if (ForcedVEXEncoding != VEXEncoding_Default) {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(Parser.getTok().getLoc(), "Expected identifier");
+ // FIXME: The mnemonic won't match correctly if its not in lower case.
+ Name = Parser.getTok().getString();
+ NameLoc = Parser.getTok().getLoc();
+ Parser.Lex();
+ }
+ }
break;
}
+ // Support the suffix syntax for overriding displacement size as well.
+ if (Name.consume_back(".d32")) {
+ ForcedDispEncoding = DispEncoding_Disp32;
+ } else if (Name.consume_back(".d8")) {
+ ForcedDispEncoding = DispEncoding_Disp8;
+ }
+
StringRef PatchedName = Name;
// Hack to skip "short" following Jcc.
@@ -2797,11 +3263,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// repz repnz <insn> ; GAS errors for the use of two similar prefixes
// lock addq %rax, %rbx ; Destination operand must be of memory type
// xacquire <insn> ; xacquire must be accompanied by 'lock'
- bool isPrefix = StringSwitch<bool>(Name)
- .Cases("rex64", "data32", "data16", true)
- .Cases("xacquire", "xrelease", true)
- .Cases("acquire", "release", isParsingIntelSyntax())
- .Default(false);
+ bool IsPrefix =
+ StringSwitch<bool>(Name)
+ .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
+ .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
+ .Cases("xacquire", "xrelease", true)
+ .Cases("acquire", "release", isParsingIntelSyntax())
+ .Default(false);
auto isLockRepeatNtPrefix = [](StringRef N) {
return StringSwitch<bool>(N)
@@ -2856,6 +3324,22 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return Error(NameLoc, "'data32' is not supported in 64-bit mode");
// Hack to 'data16' for the table lookup.
PatchedName = "data16";
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ StringRef Next = Parser.getTok().getString();
+ getLexer().Lex();
+ // data32 effectively changes the instruction suffix.
+ // TODO Generalize.
+ if (Next == "callw")
+ Next = "calll";
+ if (Next == "ljmpw")
+ Next = "ljmpl";
+
+ Name = Next;
+ PatchedName = Name;
+ ForcedDataPrefix = X86::Mode32Bit;
+ IsPrefix = false;
+ }
}
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
@@ -2871,20 +3355,18 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
// just want to parse the "lock" as the first instruction and the "incl" as
// the next one.
- if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
+ if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
// Parse '*' modifier.
if (getLexer().is(AsmToken::Star))
Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
// Read the operands.
while(1) {
- if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
- Operands.push_back(std::move(Op));
- if (HandleAVX512Operand(Operands, *Operands.back()))
- return true;
- } else {
- return true;
- }
+ if (ParseOperand(Operands))
+ return true;
+ if (HandleAVX512Operand(Operands))
+ return true;
+
// check for comma and eat it
if (getLexer().is(AsmToken::Comma))
Parser.Lex();
@@ -2910,7 +3392,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Consume the EndOfStatement or the prefix separator Slash
if (getLexer().is(AsmToken::EndOfStatement) ||
- (isPrefix && getLexer().is(AsmToken::Slash)))
+ (IsPrefix && getLexer().is(AsmToken::Slash)))
Parser.Lex();
else if (CurlyAsEndOfStatement)
// Add an actual EndOfStatement before the curly brace
@@ -3064,39 +3546,6 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return HadVerifyError;
}
- // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
- // "shift <op>".
- if ((Name.startswith("shr") || Name.startswith("sar") ||
- Name.startswith("shl") || Name.startswith("sal") ||
- Name.startswith("rcl") || Name.startswith("rcr") ||
- Name.startswith("rol") || Name.startswith("ror")) &&
- Operands.size() == 3) {
- if (isParsingIntelSyntax()) {
- // Intel syntax
- X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
- if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
- cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
- Operands.pop_back();
- } else {
- X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
- if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
- cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
- Operands.erase(Operands.begin() + 1);
- }
- }
-
- // Transforms "int $3" into "int3" as a size optimization. We can't write an
- // instalias with an immediate operand yet.
- if (Name == "int" && Operands.size() == 2) {
- X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
- if (Op1.isImm())
- if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
- if (CE->getValue() == 3) {
- Operands.erase(Operands.begin() + 1);
- static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
- }
- }
-
// Transforms "xlat mem8" into "xlatb"
if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
@@ -3118,6 +3567,26 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
switch (Inst.getOpcode()) {
default: return false;
+ case X86::JMP_1:
+ // {disp32} forces a larger displacement as if the instruction was relaxed.
+ // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
+ // This matches GNU assembler.
+ if (ForcedDispEncoding == DispEncoding_Disp32) {
+ Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
+ return true;
+ }
+
+ return false;
+ case X86::JCC_1:
+ // {disp32} forces a larger displacement as if the instruction was relaxed.
+ // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
+ // This matches GNU assembler.
+ if (ForcedDispEncoding == DispEncoding_Disp32) {
+ Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
+ return true;
+ }
+
+ return false;
case X86::VMOVZPQILo2PQIrr:
case X86::VMOVAPDrr:
case X86::VMOVAPDYrr:
@@ -3176,6 +3645,122 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
Inst.setOpcode(NewOpc);
return true;
}
+ case X86::RCR8ri: case X86::RCR16ri: case X86::RCR32ri: case X86::RCR64ri:
+ case X86::RCL8ri: case X86::RCL16ri: case X86::RCL32ri: case X86::RCL64ri:
+ case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
+ case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
+ case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
+ case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
+ case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: {
+ // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
+ // FIXME: It would be great if we could just do this with an InstAlias.
+ if (!Inst.getOperand(2).isImm() || Inst.getOperand(2).getImm() != 1)
+ return false;
+
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::RCR8ri: NewOpc = X86::RCR8r1; break;
+ case X86::RCR16ri: NewOpc = X86::RCR16r1; break;
+ case X86::RCR32ri: NewOpc = X86::RCR32r1; break;
+ case X86::RCR64ri: NewOpc = X86::RCR64r1; break;
+ case X86::RCL8ri: NewOpc = X86::RCL8r1; break;
+ case X86::RCL16ri: NewOpc = X86::RCL16r1; break;
+ case X86::RCL32ri: NewOpc = X86::RCL32r1; break;
+ case X86::RCL64ri: NewOpc = X86::RCL64r1; break;
+ case X86::ROR8ri: NewOpc = X86::ROR8r1; break;
+ case X86::ROR16ri: NewOpc = X86::ROR16r1; break;
+ case X86::ROR32ri: NewOpc = X86::ROR32r1; break;
+ case X86::ROR64ri: NewOpc = X86::ROR64r1; break;
+ case X86::ROL8ri: NewOpc = X86::ROL8r1; break;
+ case X86::ROL16ri: NewOpc = X86::ROL16r1; break;
+ case X86::ROL32ri: NewOpc = X86::ROL32r1; break;
+ case X86::ROL64ri: NewOpc = X86::ROL64r1; break;
+ case X86::SAR8ri: NewOpc = X86::SAR8r1; break;
+ case X86::SAR16ri: NewOpc = X86::SAR16r1; break;
+ case X86::SAR32ri: NewOpc = X86::SAR32r1; break;
+ case X86::SAR64ri: NewOpc = X86::SAR64r1; break;
+ case X86::SHR8ri: NewOpc = X86::SHR8r1; break;
+ case X86::SHR16ri: NewOpc = X86::SHR16r1; break;
+ case X86::SHR32ri: NewOpc = X86::SHR32r1; break;
+ case X86::SHR64ri: NewOpc = X86::SHR64r1; break;
+ case X86::SHL8ri: NewOpc = X86::SHL8r1; break;
+ case X86::SHL16ri: NewOpc = X86::SHL16r1; break;
+ case X86::SHL32ri: NewOpc = X86::SHL32r1; break;
+ case X86::SHL64ri: NewOpc = X86::SHL64r1; break;
+ }
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::RCR8mi: case X86::RCR16mi: case X86::RCR32mi: case X86::RCR64mi:
+ case X86::RCL8mi: case X86::RCL16mi: case X86::RCL32mi: case X86::RCL64mi:
+ case X86::ROR8mi: case X86::ROR16mi: case X86::ROR32mi: case X86::ROR64mi:
+ case X86::ROL8mi: case X86::ROL16mi: case X86::ROL32mi: case X86::ROL64mi:
+ case X86::SAR8mi: case X86::SAR16mi: case X86::SAR32mi: case X86::SAR64mi:
+ case X86::SHR8mi: case X86::SHR16mi: case X86::SHR32mi: case X86::SHR64mi:
+ case X86::SHL8mi: case X86::SHL16mi: case X86::SHL32mi: case X86::SHL64mi: {
+ // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
+ // FIXME: It would be great if we could just do this with an InstAlias.
+ if (!Inst.getOperand(X86::AddrNumOperands).isImm() ||
+ Inst.getOperand(X86::AddrNumOperands).getImm() != 1)
+ return false;
+
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::RCR8mi: NewOpc = X86::RCR8m1; break;
+ case X86::RCR16mi: NewOpc = X86::RCR16m1; break;
+ case X86::RCR32mi: NewOpc = X86::RCR32m1; break;
+ case X86::RCR64mi: NewOpc = X86::RCR64m1; break;
+ case X86::RCL8mi: NewOpc = X86::RCL8m1; break;
+ case X86::RCL16mi: NewOpc = X86::RCL16m1; break;
+ case X86::RCL32mi: NewOpc = X86::RCL32m1; break;
+ case X86::RCL64mi: NewOpc = X86::RCL64m1; break;
+ case X86::ROR8mi: NewOpc = X86::ROR8m1; break;
+ case X86::ROR16mi: NewOpc = X86::ROR16m1; break;
+ case X86::ROR32mi: NewOpc = X86::ROR32m1; break;
+ case X86::ROR64mi: NewOpc = X86::ROR64m1; break;
+ case X86::ROL8mi: NewOpc = X86::ROL8m1; break;
+ case X86::ROL16mi: NewOpc = X86::ROL16m1; break;
+ case X86::ROL32mi: NewOpc = X86::ROL32m1; break;
+ case X86::ROL64mi: NewOpc = X86::ROL64m1; break;
+ case X86::SAR8mi: NewOpc = X86::SAR8m1; break;
+ case X86::SAR16mi: NewOpc = X86::SAR16m1; break;
+ case X86::SAR32mi: NewOpc = X86::SAR32m1; break;
+ case X86::SAR64mi: NewOpc = X86::SAR64m1; break;
+ case X86::SHR8mi: NewOpc = X86::SHR8m1; break;
+ case X86::SHR16mi: NewOpc = X86::SHR16m1; break;
+ case X86::SHR32mi: NewOpc = X86::SHR32m1; break;
+ case X86::SHR64mi: NewOpc = X86::SHR64m1; break;
+ case X86::SHL8mi: NewOpc = X86::SHL8m1; break;
+ case X86::SHL16mi: NewOpc = X86::SHL16m1; break;
+ case X86::SHL32mi: NewOpc = X86::SHL32m1; break;
+ case X86::SHL64mi: NewOpc = X86::SHL64m1; break;
+ }
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ for (int i = 0; i != X86::AddrNumOperands; ++i)
+ TmpInst.addOperand(Inst.getOperand(i));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::INT: {
+ // Transforms "int $3" into "int3" as a size optimization. We can't write an
+ // instalias with an immediate operand yet.
+ if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::INT3);
+ Inst = TmpInst;
+ return true;
+ }
}
}
@@ -3275,6 +3860,33 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
}
}
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+ // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
+ // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
+ if ((MCID.TSFlags & X86II::EncodingMask) == 0) {
+ MCPhysReg HReg = X86::NoRegister;
+ bool UsesRex = MCID.TSFlags & X86II::REX_W;
+ unsigned NumOps = Inst.getNumOperands();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ const MCOperand &MO = Inst.getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
+ HReg = Reg;
+ if (X86II::isX86_64NonExtLowByteReg(Reg) ||
+ X86II::isX86_64ExtendedReg(Reg))
+ UsesRex = true;
+ }
+
+ if (UsesRex && HReg != X86::NoRegister) {
+ StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg);
+ return Error(Ops[0]->getStartLoc(),
+ "can't encode '" + RegName + "' in an instruction requiring "
+ "REX prefix");
+ }
+ }
+
return false;
}
@@ -3468,10 +4080,18 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_Unsupported;
if ((ForcedVEXEncoding == VEXEncoding_VEX ||
+ ForcedVEXEncoding == VEXEncoding_VEX2 ||
ForcedVEXEncoding == VEXEncoding_VEX3) &&
(MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
return Match_Unsupported;
+ // These instructions are only available with {vex}, {vex2} or {vex3} prefix
+ if (MCID.TSFlags & X86II::ExplicitVEXPrefix &&
+ (ForcedVEXEncoding != VEXEncoding_VEX &&
+ ForcedVEXEncoding != VEXEncoding_VEX2 &&
+ ForcedVEXEncoding != VEXEncoding_VEX3))
+ return Match_Unsupported;
+
// These instructions match ambiguously with their VEX encoded counterparts
// and appear first in the matching table. Reject them unless we're forcing
// EVEX encoding.
@@ -3510,19 +4130,39 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
MCInst Inst;
- // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
- // encoder.
- if (ForcedVEXEncoding == VEXEncoding_VEX3)
+ // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
+ // encoder and printer.
+ if (ForcedVEXEncoding == VEXEncoding_VEX)
+ Prefixes |= X86::IP_USE_VEX;
+ else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+ Prefixes |= X86::IP_USE_VEX2;
+ else if (ForcedVEXEncoding == VEXEncoding_VEX3)
Prefixes |= X86::IP_USE_VEX3;
+ else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+ Prefixes |= X86::IP_USE_EVEX;
+
+ // Set encoded flags for {disp8} and {disp32}.
+ if (ForcedDispEncoding == DispEncoding_Disp8)
+ Prefixes |= X86::IP_USE_DISP8;
+ else if (ForcedDispEncoding == DispEncoding_Disp32)
+ Prefixes |= X86::IP_USE_DISP32;
if (Prefixes)
Inst.setFlags(Prefixes);
+ // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
+ // when matching the instruction.
+ if (ForcedDataPrefix == X86::Mode32Bit)
+ SwitchMode(X86::Mode32Bit);
// First, try a direct match.
FeatureBitset MissingFeatures;
unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
MissingFeatures, MatchingInlineAsm,
isParsingIntelSyntax());
+ if (ForcedDataPrefix == X86::Mode32Bit) {
+ SwitchMode(X86::Mode16Bit);
+ ForcedDataPrefix = 0;
+ }
switch (OriginalError) {
default: llvm_unreachable("Unexpected match result!");
case Match_Success:
@@ -3631,6 +4271,15 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
unsigned NumSuccessfulMatches =
std::count(std::begin(Match), std::end(Match), Match_Success);
if (NumSuccessfulMatches == 1) {
+ if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
+ return true;
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other.
+ if (!MatchingInlineAsm)
+ while (processInstruction(Inst, Operands))
+ ;
+
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
emitInstruction(Inst, Operands, Out);
@@ -3744,10 +4393,22 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
MCInst Inst;
- // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
- // encoder.
- if (ForcedVEXEncoding == VEXEncoding_VEX3)
+ // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
+ // encoder and printer.
+ if (ForcedVEXEncoding == VEXEncoding_VEX)
+ Prefixes |= X86::IP_USE_VEX;
+ else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+ Prefixes |= X86::IP_USE_VEX2;
+ else if (ForcedVEXEncoding == VEXEncoding_VEX3)
Prefixes |= X86::IP_USE_VEX3;
+ else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+ Prefixes |= X86::IP_USE_EVEX;
+
+ // Set encoded flags for {disp8} and {disp32}.
+ if (ForcedDispEncoding == DispEncoding_Disp8)
+ Prefixes |= X86::IP_USE_DISP8;
+ else if (ForcedDispEncoding == DispEncoding_Disp32)
+ Prefixes |= X86::IP_USE_DISP32;
if (Prefixes)
Inst.setFlags(Prefixes);
@@ -3942,6 +4603,8 @@ bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
MCAsmParser &Parser = getParser();
StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal.startswith(".arch"))
+ return parseDirectiveArch();
if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
@@ -3966,7 +4629,9 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
"a '%' prefix in .intel_syntax");
}
return false;
- } else if (IDVal == ".even")
+ } else if (IDVal == ".nops")
+ return parseDirectiveNops(DirectiveID.getLoc());
+ else if (IDVal == ".even")
return parseDirectiveEven(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_proc")
return parseDirectiveFPOProc(DirectiveID.getLoc());
@@ -3982,20 +4647,67 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_endproc")
return parseDirectiveFPOEndProc(DirectiveID.getLoc());
- else if (IDVal == ".seh_pushreg")
+ else if (IDVal == ".seh_pushreg" ||
+ (Parser.isParsingMasm() && IDVal.equals_lower(".pushreg")))
return parseDirectiveSEHPushReg(DirectiveID.getLoc());
- else if (IDVal == ".seh_setframe")
+ else if (IDVal == ".seh_setframe" ||
+ (Parser.isParsingMasm() && IDVal.equals_lower(".setframe")))
return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
- else if (IDVal == ".seh_savereg")
+ else if (IDVal == ".seh_savereg" ||
+ (Parser.isParsingMasm() && IDVal.equals_lower(".savereg")))
return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
- else if (IDVal == ".seh_savexmm")
+ else if (IDVal == ".seh_savexmm" ||
+ (Parser.isParsingMasm() && IDVal.equals_lower(".savexmm128")))
return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
- else if (IDVal == ".seh_pushframe")
+ else if (IDVal == ".seh_pushframe" ||
+ (Parser.isParsingMasm() && IDVal.equals_lower(".pushframe")))
return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
return true;
}
+bool X86AsmParser::parseDirectiveArch() {
+ // Ignore .arch for now.
+ getParser().parseStringToEndOfStatement();
+ return false;
+}
+
+/// parseDirectiveNops
+/// ::= .nops size[, control]
+bool X86AsmParser::parseDirectiveNops(SMLoc L) {
+ int64_t NumBytes = 0, Control = 0;
+ SMLoc NumBytesLoc, ControlLoc;
+ const MCSubtargetInfo STI = getSTI();
+ NumBytesLoc = getTok().getLoc();
+ if (getParser().checkForValidSection() ||
+ getParser().parseAbsoluteExpression(NumBytes))
+ return true;
+
+ if (parseOptionalToken(AsmToken::Comma)) {
+ ControlLoc = getTok().getLoc();
+ if (getParser().parseAbsoluteExpression(Control))
+ return true;
+ }
+ if (getParser().parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.nops' directive"))
+ return true;
+
+ if (NumBytes <= 0) {
+ Error(NumBytesLoc, "'.nops' directive with non-positive size");
+ return false;
+ }
+
+ if (Control < 0) {
+ Error(ControlLoc, "'.nops' directive with negative NOP size");
+ return false;
+ }
+
+ /// Emit nops
+ getParser().getStreamer().emitNops(NumBytes, Control, L);
+
+ return false;
+}
+
/// parseDirectiveEven
/// ::= .even
bool X86AsmParser::parseDirectiveEven(SMLoc L) {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index a7fa1eb9a5ee..4e6d8e8e1a54 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -492,6 +492,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
insn->addressSize = (insn->hasAdSize ? 4 : 8);
insn->displacementSize = 4;
insn->immediateSize = 4;
+ insn->hasOpSize = false;
} else {
insn->registerSize = (insn->hasOpSize ? 2 : 4);
insn->addressSize = (insn->hasAdSize ? 4 : 8);
@@ -1662,9 +1663,9 @@ namespace X86 {
sib = 504,
sib64 = 505
};
-}
+} // namespace X86
-}
+} // namespace llvm
static bool translateInstruction(MCInst &target,
InternalInstruction &source,
@@ -1689,7 +1690,7 @@ private:
DisassemblerMode fMode;
};
-}
+} // namespace
X86GenericDisassembler::X86GenericDisassembler(
const MCSubtargetInfo &STI,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index 0134b4efce72..c685d7e0db81 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -16,6 +16,7 @@
#include "X86InstComments.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Casting.h"
@@ -384,6 +385,16 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
raw_ostream &O) {
+ // Do not print the exact form of the memory operand if it references a known
+ // binary object.
+ if (SymbolizeOperands && MIA) {
+ uint64_t Target;
+ if (MIA->evaluateBranch(*MI, 0, 0, Target))
+ return;
+ if (MIA->evaluateMemoryOperandAddress(*MI, 0, 0))
+ return;
+ }
+
const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg);
const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
index 51ddae61d251..f7a850571260 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
@@ -36,6 +36,7 @@ public:
raw_ostream &O);
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &OS);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index bf3b6bcb5463..95012a148d83 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -109,7 +109,7 @@ cl::opt<unsigned> X86PadMaxPrefixSize(
cl::desc("Maximum number of prefixes to use for padding"));
cl::opt<bool> X86PadForAlign(
- "x86-pad-for-align", cl::init(true), cl::Hidden,
+ "x86-pad-for-align", cl::init(false), cl::Hidden,
cl::desc("Pad previous instructions to implement align directives"));
cl::opt<bool> X86PadForBranchAlign(
@@ -207,6 +207,8 @@ public:
void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
+ unsigned getMaximumNopSize() const override;
+
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
} // end anonymous namespace
@@ -955,6 +957,9 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm,
if (!X86PadForAlign && !X86PadForBranchAlign)
return;
+ // The processed regions are delimitered by LabeledFragments. -g may have more
+ // MCSymbols and therefore different relaxation results. X86PadForAlign is
+ // disabled by default to eliminate the -g vs non -g difference.
DenseSet<MCFragment *> LabeledFragments;
for (const MCSymbol &S : Asm.symbols())
LabeledFragments.insert(S.getFragment(false));
@@ -1067,6 +1072,21 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm,
}
}
+unsigned X86AsmBackend::getMaximumNopSize() const {
+ if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
+ return 1;
+ if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
+ return 7;
+ if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
+ return 15;
+ if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
+ return 11;
+ // FIXME: handle 32-bit mode
+ // 15-bytes is the longest single NOP instruction, but 10-bytes is
+ // commonly the longest that can be efficiently decoded.
+ return 10;
+}
+
/// Write a sequence of optimal nops to the output, covering \p Count
/// bytes.
/// \return - true on success, false on failure
@@ -1094,23 +1114,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
"\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
};
- // This CPU doesn't support long nops. If needed add more.
- // FIXME: We could generated something better than plain 0x90.
- if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) {
- for (uint64_t i = 0; i < Count; ++i)
- OS << '\x90';
- return true;
- }
-
- // 15-bytes is the longest single NOP instruction, but 10-bytes is
- // commonly the longest that can be efficiently decoded.
- uint64_t MaxNopLength = 10;
- if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
- MaxNopLength = 7;
- else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
- MaxNopLength = 15;
- else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
- MaxNopLength = 11;
+ uint64_t MaxNopLength = (uint64_t)getMaximumNopSize();
// Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
// length.
@@ -1237,7 +1241,7 @@ namespace CU {
UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
};
-} // end CU namespace
+} // namespace CU
class DarwinX86AsmBackend : public X86AsmBackend {
const MCRegisterInfo &MRI;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 79f07d3c7792..4db1bfc25177 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -55,13 +55,18 @@ namespace X86 {
/// The constants to describe instr prefixes if there are
enum IPREFIXES {
IP_NO_PREFIX = 0,
- IP_HAS_OP_SIZE = 1,
- IP_HAS_AD_SIZE = 2,
- IP_HAS_REPEAT_NE = 4,
- IP_HAS_REPEAT = 8,
- IP_HAS_LOCK = 16,
- IP_HAS_NOTRACK = 32,
- IP_USE_VEX3 = 64,
+ IP_HAS_OP_SIZE = 1U << 0,
+ IP_HAS_AD_SIZE = 1U << 1,
+ IP_HAS_REPEAT_NE = 1U << 2,
+ IP_HAS_REPEAT = 1U << 3,
+ IP_HAS_LOCK = 1U << 4,
+ IP_HAS_NOTRACK = 1U << 5,
+ IP_USE_VEX = 1U << 6,
+ IP_USE_VEX2 = 1U << 7,
+ IP_USE_VEX3 = 1U << 8,
+ IP_USE_EVEX = 1U << 9,
+ IP_USE_DISP8 = 1U << 10,
+ IP_USE_DISP32 = 1U << 11,
};
enum OperandType : unsigned {
@@ -947,7 +952,11 @@ namespace X86II {
// NOTRACK prefix
NoTrackShift = EVEX_RCShift + 1,
- NOTRACK = 1ULL << NoTrackShift
+ NOTRACK = 1ULL << NoTrackShift,
+
+ // Force VEX encoding
+ ExplicitVEXShift = NoTrackShift + 1,
+ ExplicitVEXPrefix = 1ULL << ExplicitVEXShift
};
/// \returns true if the instruction with given opcode is a prefix.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 292dd17e2f51..fa937d381613 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -94,6 +94,12 @@ static void checkIs32(MCContext &Ctx, SMLoc Loc, X86_64RelType Type) {
"32 bit reloc applied to a field with a different size");
}
+static void checkIs64(MCContext &Ctx, SMLoc Loc, X86_64RelType Type) {
+ if (Type != RT64_64)
+ Ctx.reportError(Loc,
+ "64 bit reloc applied to a field with a different size");
+}
+
static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
MCSymbolRefExpr::VariantKind Modifier,
X86_64RelType Type, bool IsPCRel,
@@ -212,6 +218,9 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
return ELF::R_X86_64_REX_GOTPCRELX;
}
llvm_unreachable("unexpected relocation type!");
+ case MCSymbolRefExpr::VK_X86_PLTOFF:
+ checkIs64(Ctx, Loc, Type);
+ return ELF::R_X86_64_PLTOFF64;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index 33d70fdb1214..d8dbbbbf2779 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -295,6 +295,10 @@ void X86InstPrinterCommon::printRoundingControl(const MCInst *MI, unsigned Op,
/// \see MCInstPrinter::printInst
void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address,
unsigned OpNo, raw_ostream &O) {
+ // Do not print the numberic target address when symbolizing.
+ if (SymbolizeOperands)
+ return;
+
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm()) {
if (PrintBranchImmAsAddress) {
@@ -342,6 +346,21 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
O << "\trepne\t";
else if (Flags & X86::IP_HAS_REPEAT)
O << "\trep\t";
+
+ // These all require a pseudo prefix
+ if ((Flags & X86::IP_USE_VEX) || (TSFlags & X86II::ExplicitVEXPrefix))
+ O << "\t{vex}";
+ else if (Flags & X86::IP_USE_VEX2)
+ O << "\t{vex2}";
+ else if (Flags & X86::IP_USE_VEX3)
+ O << "\t{vex3}";
+ else if (Flags & X86::IP_USE_EVEX)
+ O << "\t{evex}";
+
+ if (Flags & X86::IP_USE_DISP8)
+ O << "\t{disp8}";
+ else if (Flags & X86::IP_USE_DISP32)
+ O << "\t{disp32}";
}
void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index d1eb4d09851d..d5b205ad9a63 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -16,6 +16,7 @@
#include "X86InstComments.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -342,6 +343,15 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
raw_ostream &O) {
+ // Do not print the exact form of the memory operand if it references a known
+ // binary object.
+ if (SymbolizeOperands && MIA) {
+ uint64_t Target;
+ if (MIA->evaluateBranch(*MI, 0, 0, Target))
+ return;
+ if (MIA->evaluateMemoryOperandAddress(*MI, 0, 0))
+ return;
+ }
const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
index 82baf611df03..aa4d0545ea46 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
@@ -37,6 +37,7 @@ public:
raw_ostream &O);
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 7dea0760a831..260253a5302d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -93,7 +93,8 @@ private:
bool emitOpcodePrefix(int MemOperand, const MCInst &MI,
const MCSubtargetInfo &STI, raw_ostream &OS) const;
- bool emitREXPrefix(int MemOperand, const MCInst &MI, raw_ostream &OS) const;
+ bool emitREXPrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI, raw_ostream &OS) const;
};
} // end anonymous namespace
@@ -113,33 +114,28 @@ static void emitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) {
}
}
-/// \returns true if this signed displacement fits in a 8-bit sign-extended
-/// field.
-static bool isDisp8(int Value) { return Value == (int8_t)Value; }
-
-/// \returns true if this signed displacement fits in a 8-bit compressed
-/// dispacement field.
-static bool isCDisp8(uint64_t TSFlags, int Value, int &CValue) {
- assert(((TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
- "Compressed 8-bit displacement is only valid for EVEX inst.");
+/// Determine if this immediate can fit in a disp8 or a compressed disp8 for
+/// EVEX instructions. \p will be set to the value to pass to the ImmOffset
+/// parameter of emitImmediate.
+static bool isDispOrCDisp8(uint64_t TSFlags, int Value, int &ImmOffset) {
+ bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
- unsigned CD8_Scale =
+ int CD8_Scale =
(TSFlags & X86II::CD8_Scale_Mask) >> X86II::CD8_Scale_Shift;
- if (CD8_Scale == 0) {
- CValue = Value;
- return isDisp8(Value);
- }
+ if (!HasEVEX || CD8_Scale == 0)
+ return isInt<8>(Value);
+
+ assert(isPowerOf2_32(CD8_Scale) && "Unexpected CD8 scale!");
+ if (Value & (CD8_Scale - 1)) // Unaligned offset
+ return false;
- unsigned Mask = CD8_Scale - 1;
- assert((CD8_Scale & Mask) == 0 && "Invalid memory object size.");
- if (Value & Mask) // Unaligned offset
+ int CDisp8 = Value / CD8_Scale;
+ if (!isInt<8>(CDisp8))
return false;
- Value /= (int)CD8_Scale;
- bool Ret = (Value == (int8_t)Value);
- if (Ret)
- CValue = Value;
- return Ret;
+ // ImmOffset will be added to Value in emitImmediate leaving just CDisp8.
+ ImmOffset = CDisp8 - Value;
+ return true;
}
/// \returns the appropriate fixup kind to use for an immediate in an
@@ -164,17 +160,18 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
/// \returns true if the specified instruction has a 16-bit memory operand.
static bool is16BitMemOperand(const MCInst &MI, unsigned Op,
const MCSubtargetInfo &STI) {
- const MCOperand &BaseReg = MI.getOperand(Op + X86::AddrBaseReg);
- const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg);
- const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp);
+ const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg);
+
+ unsigned BaseReg = Base.getReg();
+ unsigned IndexReg = Index.getReg();
- if (STI.hasFeature(X86::Mode16Bit) && BaseReg.getReg() == 0 && Disp.isImm() &&
- Disp.getImm() < 0x10000)
+ if (STI.hasFeature(X86::Mode16Bit) && BaseReg == 0 && IndexReg == 0)
return true;
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+ if ((BaseReg != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) ||
+ (IndexReg != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)))
return true;
return false;
}
@@ -390,7 +387,6 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
const MCOperand &Scale = MI.getOperand(Op + X86::AddrScaleAmt);
const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg);
unsigned BaseReg = Base.getReg();
- bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
// Handle %rip relative addressing.
if (BaseReg == X86::RIP ||
@@ -402,16 +398,33 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
emitByte(modRMByte(0, RegOpcodeField, 5), OS);
unsigned Opcode = MI.getOpcode();
- // movq loads are handled with a special relocation form which allows the
- // linker to eliminate some loads for GOT references which end up in the
- // same linkage unit.
- unsigned FixupKind = [=]() {
+ unsigned FixupKind = [&]() {
+ // Enable relaxed relocation only for a MCSymbolRefExpr. We cannot use a
+ // relaxed relocation if an offset is present (e.g. x@GOTPCREL+4).
+ if (!(Disp.isExpr() && isa<MCSymbolRefExpr>(Disp.getExpr())))
+ return X86::reloc_riprel_4byte;
+
+ // Certain loads for GOT references can be relocated against the symbol
+ // directly if the symbol ends up in the same linkage unit.
switch (Opcode) {
default:
return X86::reloc_riprel_4byte;
case X86::MOV64rm:
+ // movq loads is a subset of reloc_riprel_4byte_relax_rex. It is a
+ // special case because COFF and Mach-O don't support ELF's more
+ // flexible R_X86_64_REX_GOTPCRELX relaxation.
assert(HasREX);
return X86::reloc_riprel_4byte_movq_load;
+ case X86::ADC32rm:
+ case X86::ADD32rm:
+ case X86::AND32rm:
+ case X86::CMP32rm:
+ case X86::MOV32rm:
+ case X86::OR32rm:
+ case X86::SBB32rm:
+ case X86::SUB32rm:
+ case X86::TEST32mr:
+ case X86::XOR32rm:
case X86::CALL64m:
case X86::JMP64m:
case X86::TAILJMPm64:
@@ -484,7 +497,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
RMfield = (IndexReg16 & 1) | ((7 - RMfield) << 1);
}
- if (Disp.isImm() && isDisp8(Disp.getImm())) {
+ if (Disp.isImm() && isInt<8>(Disp.getImm())) {
if (Disp.getImm() == 0 && RMfield != 6) {
// There is no displacement; just the register.
emitByte(modRMByte(0, RegOpcodeField, RMfield), OS);
@@ -498,6 +511,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// This is the [REG]+disp16 case.
emitByte(modRMByte(2, RegOpcodeField, RMfield), OS);
} else {
+ assert(IndexReg.getReg() == 0 && "Unexpected index register!");
// There is no BaseReg; this is the plain [disp16] case.
emitByte(modRMByte(0, RegOpcodeField, 6), OS);
}
@@ -507,12 +521,18 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
return;
}
- // Determine whether a SIB byte is needed.
- // If no BaseReg, issue a RIP relative instruction only if the MCE can
- // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
- // 2-7) and absolute references.
+ // Check for presence of {disp8} or {disp32} pseudo prefixes.
+ bool UseDisp8 = MI.getFlags() & X86::IP_USE_DISP8;
+ bool UseDisp32 = MI.getFlags() & X86::IP_USE_DISP32;
- if ( // The SIB byte must be used if there is an index register.
+ // We only allow no displacement if no pseudo prefix is present.
+ bool AllowNoDisp = !UseDisp8 && !UseDisp32;
+ // Disp8 is allowed unless the {disp32} prefix is present.
+ bool AllowDisp8 = !UseDisp32;
+
+ // Determine whether a SIB byte is needed.
+ if (// The SIB byte must be used if there is an index register or the
+ // encoding requires a SIB byte.
!ForceSIB && IndexReg.getReg() == 0 &&
// The SIB byte must be used if the base is ESP/RSP/R12, all of which
// encode to an R/M value of 4, which indicates that a SIB byte is
@@ -528,12 +548,12 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
return;
}
- // If the base is not EBP/ESP and there is no displacement, use simple
- // indirect register encoding, this handles addresses like [EAX]. The
- // encoding for [EBP] with no displacement means [disp32] so we handle it
- // by emitting a displacement of 0 below.
+ // If the base is not EBP/ESP/R12/R13 and there is no displacement, use
+ // simple indirect register encoding, this handles addresses like [EAX].
+ // The encoding for [EBP] or[R13] with no displacement means [disp32] so we
+ // handle it by emitting a displacement of 0 later.
if (BaseRegNo != N86::EBP) {
- if (Disp.isImm() && Disp.getImm() == 0) {
+ if (Disp.isImm() && Disp.getImm() == 0 && AllowNoDisp) {
emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS);
return;
}
@@ -552,24 +572,22 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
}
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
- if (Disp.isImm()) {
- if (!HasEVEX && isDisp8(Disp.getImm())) {
- emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS);
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups);
- return;
- }
- // Try EVEX compressed 8-bit displacement first; if failed, fall back to
- // 32-bit displacement.
- int CDisp8 = 0;
- if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) {
+ // Including a compressed disp8 for EVEX instructions that support it.
+ // This also handles the 0 displacement for [EBP] or [R13]. We can't use
+ // disp8 if the {disp32} pseudo prefix is present.
+ if (Disp.isImm() && AllowDisp8) {
+ int ImmOffset = 0;
+ if (isDispOrCDisp8(TSFlags, Disp.getImm(), ImmOffset)) {
emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS);
emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups,
- CDisp8 - Disp.getImm());
+ ImmOffset);
return;
}
}
- // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+ // Otherwise, emit the most general non-SIB encoding: [REG+disp32].
+ // Displacement may be 0 for [EBP] or [R13] case if {disp32} pseudo prefix
+ // prevented using disp8 above.
emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), OS);
unsigned Opcode = MI.getOpcode();
unsigned FixupKind = Opcode == X86::MOV32rm ? X86::reloc_signed_4byte_relax
@@ -585,64 +603,47 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
bool ForceDisp32 = false;
bool ForceDisp8 = false;
- int CDisp8 = 0;
int ImmOffset = 0;
if (BaseReg == 0) {
// If there is no base register, we emit the special case SIB byte with
// MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+ BaseRegNo = 5;
emitByte(modRMByte(0, RegOpcodeField, 4), OS);
ForceDisp32 = true;
- } else if (!Disp.isImm()) {
- // Emit the normal disp32 encoding.
- emitByte(modRMByte(2, RegOpcodeField, 4), OS);
- ForceDisp32 = true;
- } else if (Disp.getImm() == 0 &&
- // Base reg can't be anything that ends up with '5' as the base
- // reg, it is the magic [*] nomenclature that indicates no base.
+ } else if (Disp.isImm() && Disp.getImm() == 0 && AllowNoDisp &&
+ // Base reg can't be EBP/RBP/R13 as that would end up with '5' as
+ // the base field, but that is the magic [*] nomenclature that
+ // indicates no base when mod=0. For these cases we'll emit a 0
+ // displacement instead.
BaseRegNo != N86::EBP) {
// Emit no displacement ModR/M byte
emitByte(modRMByte(0, RegOpcodeField, 4), OS);
- } else if (!HasEVEX && isDisp8(Disp.getImm())) {
- // Emit the disp8 encoding.
- emitByte(modRMByte(1, RegOpcodeField, 4), OS);
- ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
- } else if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) {
- // Emit the disp8 encoding.
+ } else if (Disp.isImm() && AllowDisp8 &&
+ isDispOrCDisp8(TSFlags, Disp.getImm(), ImmOffset)) {
+ // Displacement fits in a byte or matches an EVEX compressed disp8, use
+ // disp8 encoding. This also handles EBP/R13 base with 0 displacement unless
+ // {disp32} pseudo prefix was used.
emitByte(modRMByte(1, RegOpcodeField, 4), OS);
- ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
- ImmOffset = CDisp8 - Disp.getImm();
+ ForceDisp8 = true;
} else {
- // Emit the normal disp32 encoding.
+ // Otherwise, emit the normal disp32 encoding.
emitByte(modRMByte(2, RegOpcodeField, 4), OS);
+ ForceDisp32 = true;
}
// Calculate what the SS field value should be...
static const unsigned SSTable[] = {~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3};
unsigned SS = SSTable[Scale.getImm()];
- if (BaseReg == 0) {
- // Handle the SIB byte for the case where there is no base, see Intel
- // Manual 2A, table 2-7. The displacement has already been output.
- unsigned IndexRegNo;
- if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg);
- else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
- IndexRegNo = 4;
- emitSIBByte(SS, IndexRegNo, 5, OS);
- } else {
- unsigned IndexRegNo;
- if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg);
- else
- IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
- emitSIBByte(SS, IndexRegNo, getX86RegNum(Base), OS);
- }
+ unsigned IndexRegNo = IndexReg.getReg() ? getX86RegNum(IndexReg) : 4;
+
+ emitSIBByte(SS, IndexRegNo, BaseRegNo, OS);
// Do we need to output a displacement?
if (ForceDisp8)
emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups,
ImmOffset);
- else if (ForceDisp32 || Disp.getImm() != 0)
+ else if (ForceDisp32)
emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
StartByte, OS, Fixups);
}
@@ -1200,6 +1201,7 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
///
/// \returns true if REX prefix is used, otherwise returns false.
bool X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI,
raw_ostream &OS) const {
uint8_t REX = [&, MemOperand]() {
uint8_t REX = 0;
@@ -1220,15 +1222,28 @@ bool X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
// If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
for (unsigned i = CurOp; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
- UsesHighByteReg = true;
- if (X86II::isX86_64NonExtLowByteReg(Reg))
- // FIXME: The caller of determineREXPrefix slaps this prefix onto
- // anything that returns non-zero.
- REX |= 0x40; // REX fixed encoding prefix
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH ||
+ Reg == X86::DH)
+ UsesHighByteReg = true;
+ if (X86II::isX86_64NonExtLowByteReg(Reg))
+ // FIXME: The caller of determineREXPrefix slaps this prefix onto
+ // anything that returns non-zero.
+ REX |= 0x40; // REX fixed encoding prefix
+ } else if (MO.isExpr() &&
+ STI.getTargetTriple().getEnvironment() == Triple::GNUX32) {
+ // GOTTPOFF and TLSDESC relocations require a REX prefix to allow
+ // linker optimizations: even if the instructions we see may not require
+ // any prefix, they may be replaced by instructions that do. This is
+ // handled as a special case here so that it also works for hand-written
+ // assembly without the user needing to write REX, as with GNU as.
+ const auto *Ref = dyn_cast<MCSymbolRefExpr>(MO.getExpr());
+ if (Ref && (Ref->getKind() == MCSymbolRefExpr::VK_GOTTPOFF ||
+ Ref->getKind() == MCSymbolRefExpr::VK_TLSDESC)) {
+ REX |= 0x40; // REX fixed encoding prefix
+ }
+ }
}
switch (TSFlags & X86II::FormMask) {
@@ -1351,7 +1366,7 @@ bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI,
assert((STI.hasFeature(X86::Mode64Bit) || !(TSFlags & X86II::REX_W)) &&
"REX.W requires 64bit mode.");
bool HasREX = STI.hasFeature(X86::Mode64Bit)
- ? emitREXPrefix(MemOperand, MI, OS)
+ ? emitREXPrefix(MemOperand, MI, STI, OS)
: false;
// 0x0F escape code must be emitted just before the opcode.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 81110ba666e9..5cf8d77519d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -44,8 +44,10 @@ using namespace llvm;
std::string X86_MC::ParseX86Triple(const Triple &TT) {
std::string FS;
- if (TT.getArch() == Triple::x86_64)
- FS = "+64bit-mode,-32bit-mode,-16bit-mode";
+ // SSE2 should default to enabled in 64-bit mode, but can be turned off
+ // explicitly.
+ if (TT.isArch64Bit())
+ FS = "+64bit-mode,-32bit-mode,-16bit-mode,+sse2";
else if (TT.getEnvironment() != Triple::CODE16)
FS = "-64bit-mode,+32bit-mode,-16bit-mode";
else
@@ -290,11 +292,10 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
if (!FS.empty())
ArchFS = (Twine(ArchFS) + "," + FS).str();
- std::string CPUName = std::string(CPU);
- if (CPUName.empty())
- CPUName = "generic";
+ if (CPU.empty())
+ CPU = "generic";
- return createX86MCSubtargetInfoImpl(TT, CPUName, ArchFS);
+ return createX86MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS);
}
static MCInstrInfo *createX86MCInstrInfo() {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index e8c72be1d9b6..35604cd3ec0a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -85,11 +85,11 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T,
/// Implements X86-only directives for assembly emission.
MCTargetStreamer *createX86AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
- MCInstPrinter *InstPrint,
- bool isVerboseAsm);
+ MCInstPrinter *InstPrinter,
+ bool IsVerboseAsm);
/// Implements X86-only directives for object files.
-MCTargetStreamer *createX86ObjectTargetStreamer(MCStreamer &OS,
+MCTargetStreamer *createX86ObjectTargetStreamer(MCStreamer &S,
const MCSubtargetInfo &STI);
/// Construct an X86 Windows COFF machine code streamer which will generate
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index b67a7508fe72..b98e58d653db 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -68,7 +68,7 @@ public:
FixedValue);
}
};
-}
+} // namespace
static bool isFixupKindRIPRel(unsigned Kind) {
return Kind == X86::reloc_riprel_4byte ||
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
index 62c1c399a606..201b22d6232d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
@@ -568,4 +568,4 @@ void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
}
}
-} // llvm namespace
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 3bebcc24fd3a..c29211246123 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -26,6 +26,7 @@ public:
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
void EmitWinEHHandlerData(SMLoc Loc) override;
+ void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
void EmitWindowsUnwindTables() override;
void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override;
void finishImpl() override;
@@ -37,7 +38,11 @@ void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
// We have to emit the unwind info now, because this directive
// actually switches to the .xdata section.
if (WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo())
- EHStreamer.EmitUnwindInfo(*this, CurFrame);
+ EHStreamer.EmitUnwindInfo(*this, CurFrame, /* HandlerData = */ true);
+}
+
+void X86WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
+ EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false);
}
void X86WinCOFFStreamer::EmitWindowsUnwindTables() {
@@ -58,7 +63,7 @@ void X86WinCOFFStreamer::finishImpl() {
MCWinCOFFStreamer::finishImpl();
}
-}
+} // namespace
MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C,
std::unique_ptr<MCAsmBackend> &&AB,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.h b/contrib/llvm-project/llvm/lib/Target/X86/X86.h
index 91ba4e3d091e..e17b9ba5500b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.h
@@ -67,9 +67,6 @@ FunctionPass *createX86OptimizeLEAs();
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
FunctionPass *createX86FixupSetCC();
-/// Return a pass that folds conditional branch jumps.
-FunctionPass *createX86CondBrFolding();
-
/// Return a pass that avoids creating store forward block issues in the hardware.
FunctionPass *createX86AvoidStoreForwardingBlocks();
@@ -79,6 +76,10 @@ FunctionPass *createX86FlagsCopyLoweringPass();
/// Return a pass that expands WinAlloca pseudo-instructions.
FunctionPass *createX86WinAllocaExpander();
+FunctionPass *createX86TileConfigPass();
+
+FunctionPass *createX86PreTileConfigPass();
+
/// Return a pass that inserts int3 at the end of the function if it ends with a
/// CALL instruction. The pass does the same for each funclet as well. This
/// ensures that the open interval of function start and end PCs contains all
@@ -154,7 +155,6 @@ void initializeX86AvoidSFBPassPass(PassRegistry &);
void initializeX86AvoidTrailingCallPassPass(PassRegistry &);
void initializeX86CallFrameOptimizationPass(PassRegistry &);
void initializeX86CmovConverterPassPass(PassRegistry &);
-void initializeX86CondBrFoldingPassPass(PassRegistry &);
void initializeX86DomainReassignmentPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
void initializeX86ExpandPseudoPass(PassRegistry &);
@@ -166,6 +166,9 @@ void initializeX86OptimizeLEAPassPass(PassRegistry &);
void initializeX86PartialReductionPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &);
+void initializeX86PreTileConfigPass(PassRegistry &);
+void initializeX86TileConfigPass(PassRegistry &);
+void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &);
namespace X86AS {
enum : unsigned {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
index dc1ff72add49..c492d686c52e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
@@ -171,6 +171,9 @@ def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
"Enable AVX-512 Vector Neural Network Instructions",
[FeatureAVX512]>;
+def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
+ "Support AVX_VNNI encoding",
+ [FeatureAVX2]>;
def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true",
"Support bfloat16 floating point",
[FeatureBWI]>;
@@ -234,8 +237,8 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
-def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
- "Support LAHF and SAHF instructions">;
+def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
+ "Support LAHF and SAHF instructions in 64-bit mode">;
def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
"Enable MONITORX/MWAITX timer functionality">;
def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
@@ -244,11 +247,6 @@ def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
"Enable Cache Demote">;
def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
"Support ptwrite instruction">;
-// FIXME: This feature is deprecated in 10.0 and should not be used for
-// anything, but removing it would break IR files that may contain it in a
-// target-feature attribute.
-def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false",
- "Deprecated. Support MPX instructions">;
def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
"Support AMX-TILE instructions">;
def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
@@ -284,10 +282,20 @@ def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
"Wait and pause enhancements">;
def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
"Has ENQCMD instructions">;
+def FeatureKL : SubtargetFeature<"kl", "HasKL", "true",
+ "Support Key Locker kl Instructions",
+ [FeatureSSE2]>;
+def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true",
+ "Support Key Locker wide Instructions",
+ [FeatureKL]>;
+def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
+ "Has hreset instruction">;
def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
"Has serialize instruction">;
def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
"Support TSXLDTRK instructions">;
+def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
+ "Has UINTR Instructions">;
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
@@ -377,6 +385,12 @@ def FeatureERMSB
"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;
+// Icelake and newer processors have Fast Short REP MOV.
+def FeatureFSRM
+ : SubtargetFeature<
+ "fsrm", "HasFSRM", "true",
+ "REP MOVSB of short lengths is faster">;
+
// Bulldozer and newer processors can merge CMP/TEST (but not other
// instructions) with conditional branches.
def FeatureBranchFusion
@@ -504,12 +518,6 @@ def FeatureUseGLMDivSqrtCosts
: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
"Use Goldmont specific floating point div/sqrt costs">;
-// Merge branches using three-way conditional code.
-def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
- "ThreewayBranchProfitable", "true",
- "Merge branches to a three-way "
- "conditional branch">;
-
// Enable use of alias analysis during code generation.
def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
"Use alias analysis during codegen">;
@@ -557,59 +565,59 @@ include "X86SchedSkylakeServer.td"
//===----------------------------------------------------------------------===//
def ProcessorFeatures {
+ // x86-64 and x86-64-v[234]
+ list<SubtargetFeature> X86_64V1Features = [
+ FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureNOPL, Feature64Bit
+ ];
+ list<SubtargetFeature> X86_64V2Features = !listconcat(
+ X86_64V1Features,
+ [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
+ list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
+ FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
+ FeatureMOVBE, FeatureXSAVE
+ ]);
+ list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
+ FeatureBWI,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureVLX,
+ ]);
+
// Nehalem
- list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE42,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePOPCNT,
- FeatureLAHFSAHF,
- FeatureMacroFusion,
- FeatureInsertVZEROUPPER];
- list<SubtargetFeature> NHMSpecificFeatures = [];
- list<SubtargetFeature> NHMFeatures =
- !listconcat(NHMInheritableFeatures, NHMSpecificFeatures);
+ list<SubtargetFeature> NHMFeatures = X86_64V2Features;
+ list<SubtargetFeature> NHMTuning = [FeatureMacroFusion,
+ FeatureInsertVZEROUPPER];
// Westmere
list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
- list<SubtargetFeature> WSMSpecificFeatures = [];
- list<SubtargetFeature> WSMInheritableFeatures =
- !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures);
+ list<SubtargetFeature> WSMTuning = NHMTuning;
list<SubtargetFeature> WSMFeatures =
- !listconcat(WSMInheritableFeatures, WSMSpecificFeatures);
+ !listconcat(NHMFeatures, WSMAdditionalFeatures);
// Sandybridge
list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
- FeatureSlowDivide64,
FeatureXSAVE,
- FeatureXSAVEOPT,
- FeatureSlow3OpsLEA,
- FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate,
- FeatureMergeToThreeWayBranch,
- FeatureFast15ByteNOP];
- list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32,
- FeaturePOPCNTFalseDeps];
- list<SubtargetFeature> SNBInheritableFeatures =
- !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures);
+ FeatureXSAVEOPT];
+ list<SubtargetFeature> SNBTuning = [FeatureMacroFusion,
+ FeatureSlow3OpsLEA,
+ FeatureSlowDivide64,
+ FeatureSlowUAMem32,
+ FeatureFastScalarFSQRT,
+ FeatureFastSHLDRotate,
+ FeatureFast15ByteNOP,
+ FeaturePOPCNTFalseDeps,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> SNBFeatures =
- !listconcat(SNBInheritableFeatures, SNBSpecificFeatures);
+ !listconcat(WSMFeatures, SNBAdditionalFeatures);
// Ivybridge
list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase];
- list<SubtargetFeature> IVBSpecificFeatures = [FeatureSlowUAMem32,
- FeaturePOPCNTFalseDeps];
- list<SubtargetFeature> IVBInheritableFeatures =
- !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures);
+ list<SubtargetFeature> IVBTuning = SNBTuning;
list<SubtargetFeature> IVBFeatures =
- !listconcat(IVBInheritableFeatures, IVBSpecificFeatures);
+ !listconcat(SNBFeatures, IVBAdditionalFeatures);
// Haswell
list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
@@ -619,77 +627,89 @@ def ProcessorFeatures {
FeatureFMA,
FeatureINVPCID,
FeatureLZCNT,
- FeatureMOVBE,
- FeatureFastVariableShuffle];
- list<SubtargetFeature> HSWSpecificFeatures = [FeaturePOPCNTFalseDeps,
- FeatureLZCNTFalseDeps];
- list<SubtargetFeature> HSWInheritableFeatures =
- !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures);
+ FeatureMOVBE];
+ list<SubtargetFeature> HSWTuning = [FeatureMacroFusion,
+ FeatureSlow3OpsLEA,
+ FeatureSlowDivide64,
+ FeatureFastScalarFSQRT,
+ FeatureFastSHLDRotate,
+ FeatureFast15ByteNOP,
+ FeatureFastVariableShuffle,
+ FeaturePOPCNTFalseDeps,
+ FeatureLZCNTFalseDeps,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> HSWFeatures =
- !listconcat(HSWInheritableFeatures, HSWSpecificFeatures);
+ !listconcat(IVBFeatures, HSWAdditionalFeatures);
// Broadwell
list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
FeatureRDSEED,
FeaturePRFCHW];
- list<SubtargetFeature> BDWSpecificFeatures = [FeaturePOPCNTFalseDeps,
- FeatureLZCNTFalseDeps];
- list<SubtargetFeature> BDWInheritableFeatures =
- !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures);
+ list<SubtargetFeature> BDWTuning = HSWTuning;
list<SubtargetFeature> BDWFeatures =
- !listconcat(BDWInheritableFeatures, BDWSpecificFeatures);
+ !listconcat(HSWFeatures, BDWAdditionalFeatures);
// Skylake
list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
FeatureXSAVEC,
FeatureXSAVES,
FeatureCLFLUSHOPT,
- FeatureFastVectorFSQRT];
- list<SubtargetFeature> SKLSpecificFeatures = [FeatureHasFastGather,
- FeaturePOPCNTFalseDeps,
- FeatureSGX];
- list<SubtargetFeature> SKLInheritableFeatures =
- !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures);
+ FeatureSGX];
+ list<SubtargetFeature> SKLTuning = [FeatureHasFastGather,
+ FeatureMacroFusion,
+ FeatureSlow3OpsLEA,
+ FeatureSlowDivide64,
+ FeatureFastScalarFSQRT,
+ FeatureFastVectorFSQRT,
+ FeatureFastSHLDRotate,
+ FeatureFast15ByteNOP,
+ FeatureFastVariableShuffle,
+ FeaturePOPCNTFalseDeps,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> SKLFeatures =
- !listconcat(SKLInheritableFeatures, SKLSpecificFeatures);
+ !listconcat(BDWFeatures, SKLAdditionalFeatures);
// Skylake-AVX512
- list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512,
- FeaturePrefer256Bit,
+ list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
+ FeatureXSAVEC,
+ FeatureXSAVES,
+ FeatureCLFLUSHOPT,
+ FeatureAVX512,
FeatureCDI,
FeatureDQI,
FeatureBWI,
FeatureVLX,
FeaturePKU,
FeatureCLWB];
- list<SubtargetFeature> SKXSpecificFeatures = [FeatureHasFastGather,
- FeaturePOPCNTFalseDeps];
- list<SubtargetFeature> SKXInheritableFeatures =
- !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures);
+ list<SubtargetFeature> SKXTuning = [FeatureHasFastGather,
+ FeatureMacroFusion,
+ FeatureSlow3OpsLEA,
+ FeatureSlowDivide64,
+ FeatureFastScalarFSQRT,
+ FeatureFastVectorFSQRT,
+ FeatureFastSHLDRotate,
+ FeatureFast15ByteNOP,
+ FeatureFastVariableShuffle,
+ FeaturePrefer256Bit,
+ FeaturePOPCNTFalseDeps,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> SKXFeatures =
- !listconcat(SKXInheritableFeatures, SKXSpecificFeatures);
+ !listconcat(BDWFeatures, SKXAdditionalFeatures);
// Cascadelake
list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
- list<SubtargetFeature> CLXSpecificFeatures = [FeatureHasFastGather,
- FeaturePOPCNTFalseDeps];
- list<SubtargetFeature> CLXInheritableFeatures =
- !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures);
+ list<SubtargetFeature> CLXTuning = SKXTuning;
list<SubtargetFeature> CLXFeatures =
- !listconcat(CLXInheritableFeatures, CLXSpecificFeatures);
+ !listconcat(SKXFeatures, CLXAdditionalFeatures);
// Cooperlake
list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
- list<SubtargetFeature> CPXSpecificFeatures = [FeatureHasFastGather,
- FeaturePOPCNTFalseDeps];
- list<SubtargetFeature> CPXInheritableFeatures =
- !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures);
+ list<SubtargetFeature> CPXTuning = SKXTuning;
list<SubtargetFeature> CPXFeatures =
- !listconcat(CPXInheritableFeatures, CPXSpecificFeatures);
+ !listconcat(CLXFeatures, CPXAdditionalFeatures);
// Cannonlake
list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
- FeaturePrefer256Bit,
FeatureCDI,
FeatureDQI,
FeatureBWI,
@@ -697,13 +717,20 @@ def ProcessorFeatures {
FeaturePKU,
FeatureVBMI,
FeatureIFMA,
- FeatureSHA,
- FeatureSGX];
- list<SubtargetFeature> CNLSpecificFeatures = [FeatureHasFastGather];
- list<SubtargetFeature> CNLInheritableFeatures =
- !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures);
+ FeatureSHA];
+ list<SubtargetFeature> CNLTuning = [FeatureHasFastGather,
+ FeatureMacroFusion,
+ FeatureSlow3OpsLEA,
+ FeatureSlowDivide64,
+ FeatureFastScalarFSQRT,
+ FeatureFastVectorFSQRT,
+ FeatureFastSHLDRotate,
+ FeatureFast15ByteNOP,
+ FeatureFastVariableShuffle,
+ FeaturePrefer256Bit,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> CNLFeatures =
- !listconcat(CNLInheritableFeatures, CNLSpecificFeatures);
+ !listconcat(SKLFeatures, CNLAdditionalFeatures);
// Icelake
list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
@@ -714,72 +741,99 @@ def ProcessorFeatures {
FeatureVPOPCNTDQ,
FeatureGFNI,
FeatureCLWB,
- FeatureRDPID];
- list<SubtargetFeature> ICLSpecificFeatures = [FeatureHasFastGather];
- list<SubtargetFeature> ICLInheritableFeatures =
- !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures);
+ FeatureRDPID,
+ FeatureFSRM];
+ list<SubtargetFeature> ICLTuning = CNLTuning;
list<SubtargetFeature> ICLFeatures =
- !listconcat(ICLInheritableFeatures, ICLSpecificFeatures);
+ !listconcat(CNLFeatures, ICLAdditionalFeatures);
// Icelake Server
- list<SubtargetFeature> ICXSpecificFeatures = [FeaturePCONFIG,
- FeatureWBNOINVD,
- FeatureHasFastGather];
+ list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
+ FeatureWBNOINVD];
+ list<SubtargetFeature> ICXTuning = CNLTuning;
list<SubtargetFeature> ICXFeatures =
- !listconcat(ICLInheritableFeatures, ICXSpecificFeatures);
+ !listconcat(ICLFeatures, ICXAdditionalFeatures);
//Tigerlake
list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureSHSTK];
- list<SubtargetFeature> TGLSpecificFeatures = [FeatureHasFastGather];
- list<SubtargetFeature> TGLInheritableFeatures =
- !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures);
+ list<SubtargetFeature> TGLTuning = CNLTuning;
list<SubtargetFeature> TGLFeatures =
- !listconcat(ICLFeatures, TGLInheritableFeatures );
+ !listconcat(ICLFeatures, TGLAdditionalFeatures );
+
+ //Sapphirerapids
+ list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
+ FeatureAMXINT8,
+ FeatureAMXBF16,
+ FeatureBF16,
+ FeatureSERIALIZE,
+ FeatureCLDEMOTE,
+ FeatureWAITPKG,
+ FeaturePTWRITE,
+ FeatureAVXVNNI,
+ FeatureTSXLDTRK,
+ FeatureENQCMD,
+ FeatureSHSTK,
+ FeatureVP2INTERSECT,
+ FeatureMOVDIRI,
+ FeatureMOVDIR64B,
+ FeatureUINTR];
+ list<SubtargetFeature> SPRTuning = ICXTuning;
+ list<SubtargetFeature> SPRFeatures =
+ !listconcat(ICXFeatures, SPRAdditionalFeatures);
+
+ // Alderlake
+ list<SubtargetFeature> ADLAdditionalFeatures = [FeatureAVXVNNI,
+ FeatureCLDEMOTE,
+ FeatureHRESET,
+ FeaturePTWRITE,
+ FeatureSERIALIZE,
+ FeatureWAITPKG];
+ list<SubtargetFeature> ADLTuning = SKLTuning;
+ list<SubtargetFeature> ADLFeatures =
+ !listconcat(SKLFeatures, ADLAdditionalFeatures);
// Atom
- list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSSE3,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeatureSlowTwoMemOps,
- FeatureLAHFSAHF,
- FeatureInsertVZEROUPPER];
- list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom,
- FeatureSlowUAMem16,
- FeatureLEAForSP,
- FeatureSlowDivide32,
- FeatureSlowDivide64,
- FeatureLEAUsesAG,
- FeaturePadShortFunctions];
- list<SubtargetFeature> AtomFeatures =
- !listconcat(AtomInheritableFeatures, AtomSpecificFeatures);
+ list<SubtargetFeature> AtomFeatures = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureCMOV,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureFXSR,
+ FeatureNOPL,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeatureLAHFSAHF];
+ list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
+ FeatureSlowUAMem16,
+ FeatureLEAForSP,
+ FeatureSlowDivide32,
+ FeatureSlowDivide64,
+ FeatureSlowTwoMemOps,
+ FeatureLEAUsesAG,
+ FeaturePadShortFunctions,
+ FeatureInsertVZEROUPPER];
// Silvermont
list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
FeaturePOPCNT,
FeaturePCLMUL,
FeaturePRFCHW,
- FeatureSlowLEA,
- FeatureSlowIncDec,
FeatureRDRAND];
- list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM,
- FeatureSlowDivide64,
- FeatureSlowPMULLD,
- FeatureFast7ByteNOP,
- FeaturePOPCNTFalseDeps];
- list<SubtargetFeature> SLMInheritableFeatures =
- !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures);
+ list<SubtargetFeature> SLMTuning = [ProcIntelSLM,
+ FeatureSlowTwoMemOps,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureSlowDivide64,
+ FeatureSlowPMULLD,
+ FeatureFast7ByteNOP,
+ FeaturePOPCNTFalseDeps,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> SLMFeatures =
- !listconcat(SLMInheritableFeatures, SLMSpecificFeatures);
+ !listconcat(AtomFeatures, SLMAdditionalFeatures);
// Goldmont
list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
@@ -791,31 +845,33 @@ def ProcessorFeatures {
FeatureXSAVES,
FeatureCLFLUSHOPT,
FeatureFSGSBase];
- list<SubtargetFeature> GLMSpecificFeatures = [FeatureUseGLMDivSqrtCosts,
- FeaturePOPCNTFalseDeps];
- list<SubtargetFeature> GLMInheritableFeatures =
- !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures);
+ list<SubtargetFeature> GLMTuning = [FeatureUseGLMDivSqrtCosts,
+ FeatureSlowTwoMemOps,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeaturePOPCNTFalseDeps,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> GLMFeatures =
- !listconcat(GLMInheritableFeatures, GLMSpecificFeatures);
+ !listconcat(SLMFeatures, GLMAdditionalFeatures);
// Goldmont Plus
list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
FeatureRDPID,
FeatureSGX];
- list<SubtargetFeature> GLPSpecificFeatures = [FeatureUseGLMDivSqrtCosts];
- list<SubtargetFeature> GLPInheritableFeatures =
- !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures);
+ list<SubtargetFeature> GLPTuning = [FeatureUseGLMDivSqrtCosts,
+ FeatureSlowTwoMemOps,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> GLPFeatures =
- !listconcat(GLPInheritableFeatures, GLPSpecificFeatures);
+ !listconcat(GLMFeatures, GLPAdditionalFeatures);
// Tremont
list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
FeatureGFNI];
- list<SubtargetFeature> TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts];
- list<SubtargetFeature> TRMInheritableFeatures =
- !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures);
+ list<SubtargetFeature> TRMTuning = GLPTuning;
list<SubtargetFeature> TRMFeatures =
- !listconcat(TRMInheritableFeatures, TRMSpecificFeatures);
+ !listconcat(GLPFeatures, TRMAdditionalFeatures);
// Knights Landing
list<SubtargetFeature> KNLFeatures = [FeatureX87,
@@ -827,13 +883,10 @@ def ProcessorFeatures {
Feature64Bit,
FeatureCMPXCHG16B,
FeaturePOPCNT,
- FeatureSlowDivide64,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureLAHFSAHF,
- FeatureSlow3OpsLEA,
- FeatureSlowIncDec,
FeatureAES,
FeatureRDRAND,
FeatureF16C,
@@ -850,56 +903,56 @@ def ProcessorFeatures {
FeatureBMI,
FeatureBMI2,
FeatureFMA,
- FeaturePRFCHW,
- FeaturePreferMaskRegisters,
- FeatureSlowTwoMemOps,
- FeatureHasFastGather,
- FeatureSlowPMADDWD];
+ FeaturePRFCHW];
+ list<SubtargetFeature> KNLTuning = [FeatureSlowDivide64,
+ FeatureSlow3OpsLEA,
+ FeatureSlowIncDec,
+ FeatureSlowTwoMemOps,
+ FeaturePreferMaskRegisters,
+ FeatureHasFastGather,
+ FeatureSlowPMADDWD];
// TODO Add AVX5124FMAPS/AVX5124VNNIW features
list<SubtargetFeature> KNMFeatures =
!listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
// Barcelona
- list<SubtargetFeature> BarcelonaInheritableFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
- FeatureSSE4A,
- Feature3DNowA,
- FeatureFXSR,
- FeatureNOPL,
- FeatureCMPXCHG16B,
- FeaturePRFCHW,
- FeatureLZCNT,
- FeaturePOPCNT,
- FeatureSlowSHLD,
- FeatureLAHFSAHF,
- FeatureCMOV,
- Feature64Bit,
- FeatureFastScalarShiftMasks,
- FeatureInsertVZEROUPPER];
- list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures;
+ list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureSSE4A,
+ Feature3DNowA,
+ FeatureFXSR,
+ FeatureNOPL,
+ FeatureCMPXCHG16B,
+ FeaturePRFCHW,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureLAHFSAHF,
+ FeatureCMOV,
+ Feature64Bit];
+ list<SubtargetFeature> BarcelonaTuning = [FeatureFastScalarShiftMasks,
+ FeatureSlowSHLD,
+ FeatureInsertVZEROUPPER];
// Bobcat
- list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSSE3,
- FeatureSSE4A,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePRFCHW,
- FeatureLZCNT,
- FeaturePOPCNT,
- FeatureSlowSHLD,
- FeatureLAHFSAHF,
- FeatureFast15ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureFastVectorShiftMasks];
- list<SubtargetFeature> BtVer1SpecificFeatures = [FeatureInsertVZEROUPPER];
- list<SubtargetFeature> BtVer1Features =
- !listconcat(BtVer1InheritableFeatures, BtVer1SpecificFeatures);
+ list<SubtargetFeature> BtVer1Features = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureCMOV,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureSSE4A,
+ FeatureFXSR,
+ FeatureNOPL,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeaturePRFCHW,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureLAHFSAHF];
+ list<SubtargetFeature> BtVer1Tuning = [FeatureFast15ByteNOP,
+ FeatureFastScalarShiftMasks,
+ FeatureFastVectorShiftMasks,
+ FeatureSlowSHLD,
+ FeatureInsertVZEROUPPER];
// Jaguar
list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
@@ -910,38 +963,39 @@ def ProcessorFeatures {
FeatureMOVBE,
FeatureXSAVE,
FeatureXSAVEOPT];
- list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT,
- FeatureFastBEXTR,
- FeatureFastHorizontalOps];
- list<SubtargetFeature> BtVer2InheritableFeatures =
- !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures);
+ list<SubtargetFeature> BtVer2Tuning = [FeatureFastLZCNT,
+ FeatureFastBEXTR,
+ FeatureFastHorizontalOps,
+ FeatureFast15ByteNOP,
+ FeatureFastScalarShiftMasks,
+ FeatureFastVectorShiftMasks,
+ FeatureSlowSHLD];
list<SubtargetFeature> BtVer2Features =
- !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures);
+ !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
// Bulldozer
- list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
- FeatureCMOV,
- FeatureXOP,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureAES,
- FeaturePRFCHW,
- FeaturePCLMUL,
- FeatureMMX,
- FeatureFXSR,
- FeatureNOPL,
- FeatureLZCNT,
- FeaturePOPCNT,
- FeatureXSAVE,
- FeatureLWP,
- FeatureSlowSHLD,
- FeatureLAHFSAHF,
- FeatureFast11ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureBranchFusion,
- FeatureInsertVZEROUPPER];
- list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;
+ list<SubtargetFeature> BdVer1Features = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureCMOV,
+ FeatureXOP,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureFXSR,
+ FeatureNOPL,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureLWP,
+ FeatureLAHFSAHF];
+ list<SubtargetFeature> BdVer1Tuning = [FeatureSlowSHLD,
+ FeatureFast11ByteNOP,
+ FeatureFastScalarShiftMasks,
+ FeatureBranchFusion,
+ FeatureInsertVZEROUPPER];
// PileDriver
list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
@@ -949,16 +1003,16 @@ def ProcessorFeatures {
FeatureTBM,
FeatureFMA,
FeatureFastBEXTR];
- list<SubtargetFeature> BdVer2InheritableFeatures =
- !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures);
- list<SubtargetFeature> BdVer2Features = BdVer2InheritableFeatures;
+ list<SubtargetFeature> BdVer2Tuning = BdVer1Tuning;
+ list<SubtargetFeature> BdVer2Features =
+ !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
// Steamroller
list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
FeatureFSGSBase];
- list<SubtargetFeature> BdVer3InheritableFeatures =
- !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures);
- list<SubtargetFeature> BdVer3Features = BdVer3InheritableFeatures;
+ list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
+ list<SubtargetFeature> BdVer3Features =
+ !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
// Excavator
list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
@@ -966,9 +1020,9 @@ def ProcessorFeatures {
FeatureMOVBE,
FeatureRDRAND,
FeatureMWAITX];
- list<SubtargetFeature> BdVer4InheritableFeatures =
- !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures);
- list<SubtargetFeature> BdVer4Features = BdVer4InheritableFeatures;
+ list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
+ list<SubtargetFeature> BdVer4Features =
+ !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
// AMD Zen Processors common ISAs
@@ -987,13 +1041,8 @@ def ProcessorFeatures {
FeatureFSGSBase,
FeatureFXSR,
FeatureNOPL,
- FeatureFastLZCNT,
FeatureLAHFSAHF,
FeatureLZCNT,
- FeatureFastBEXTR,
- FeatureFast15ByteNOP,
- FeatureBranchFusion,
- FeatureFastScalarShiftMasks,
FeatureMMX,
FeatureMOVBE,
FeatureMWAITX,
@@ -1004,56 +1053,85 @@ def ProcessorFeatures {
FeatureRDSEED,
FeatureSHA,
FeatureSSE4A,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER,
FeatureX87,
FeatureXSAVE,
FeatureXSAVEC,
FeatureXSAVEOPT,
FeatureXSAVES];
+ list<SubtargetFeature> ZNTuning = [FeatureFastLZCNT,
+ FeatureFastBEXTR,
+ FeatureFast15ByteNOP,
+ FeatureBranchFusion,
+ FeatureFastScalarShiftMasks,
+ FeatureSlowSHLD,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
FeatureRDPID,
FeatureWBNOINVD];
+ list<SubtargetFeature> ZN2Tuning = ZNTuning;
list<SubtargetFeature> ZN2Features =
!listconcat(ZNFeatures, ZN2AdditionalFeatures);
+ list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
+ FeatureINVPCID,
+ FeaturePKU,
+ FeatureVAES,
+ FeatureVPCLMULQDQ];
+ list<SubtargetFeature> ZN3Tuning = ZNTuning;
+ list<SubtargetFeature> ZN3Features =
+ !listconcat(ZN2Features, ZN3AdditionalFeatures);
}
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
-class Proc<string Name, list<SubtargetFeature> Features>
- : ProcessorModel<Name, GenericModel, Features>;
+class Proc<string Name, list<SubtargetFeature> Features,
+ list<SubtargetFeature> TuneFeatures>
+ : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
+
+class ProcModel<string Name, SchedMachineModel Model,
+ list<SubtargetFeature> Features,
+ list<SubtargetFeature> TuneFeatures>
+ : ProcessorModel<Name, Model, Features, TuneFeatures>;
// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
// if i386/i486 is specifically requested.
-def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16,
- FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
-def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER]>;
-def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER]>;
-def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16,
- FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
-def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16,
- FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
-def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16,
- FeatureCMPXCHG8B, FeatureMMX,
- FeatureInsertVZEROUPPER]>;
-
-def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureCMOV, FeatureInsertVZEROUPPER]>;
-def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureCMOV, FeatureNOPL, FeatureInsertVZEROUPPER]>;
-
-def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureMMX, FeatureCMOV, FeatureFXSR,
- FeatureNOPL, FeatureInsertVZEROUPPER]>;
+// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
+// constructor checks that any CPU used in 64-bit mode has Feature64Bit enabled.
+// It has no effect on code generation.
+def : ProcModel<"generic", SandyBridgeModel,
+ [FeatureX87, FeatureCMPXCHG8B, Feature64Bit],
+ [FeatureSlow3OpsLEA,
+ FeatureSlowDivide64,
+ FeatureSlowIncDec,
+ FeatureMacroFusion,
+ FeatureInsertVZEROUPPER]>;
+
+def : Proc<"i386", [FeatureX87],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"i486", [FeatureX87],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+
+def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
+ FeatureNOPL],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+
+def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV,
+ FeatureFXSR, FeatureNOPL],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
foreach P = ["pentium3", "pentium3m"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX,
- FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV,
- FeatureInsertVZEROUPPER]>;
+ def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
+ FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
}
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -1066,35 +1144,35 @@ foreach P = ["pentium3", "pentium3m"] in {
// measure to avoid performance surprises, in case clang's default cpu
// changes slightly.
-def : ProcessorModel<"pentium-m", GenericPostRAModel,
- [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
- FeatureCMOV, FeatureInsertVZEROUPPER]>;
+def : ProcModel<"pentium-m", GenericPostRAModel,
+ [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureNOPL, FeatureCMOV],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
foreach P = ["pentium4", "pentium4m"] in {
- def : ProcessorModel<P, GenericPostRAModel,
- [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
- FeatureCMOV, FeatureInsertVZEROUPPER]>;
+ def : ProcModel<P, GenericPostRAModel,
+ [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureNOPL, FeatureCMOV],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
}
// Intel Quark.
-def : Proc<"lakemont", [FeatureInsertVZEROUPPER]>;
+def : Proc<"lakemont", [FeatureCMPXCHG8B],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
// Intel Core Duo.
-def : ProcessorModel<"yonah", SandyBridgeModel,
- [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
- FeatureCMOV, FeatureInsertVZEROUPPER]>;
+def : ProcModel<"yonah", SandyBridgeModel,
+ [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
+ FeatureFXSR, FeatureNOPL, FeatureCMOV],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
// NetBurst.
-def : ProcessorModel<"prescott", GenericPostRAModel,
- [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
- FeatureCMOV, FeatureInsertVZEROUPPER]>;
-def : ProcessorModel<"nocona", GenericPostRAModel, [
+def : ProcModel<"prescott", GenericPostRAModel,
+ [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
+ FeatureFXSR, FeatureNOPL, FeatureCMOV],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : ProcModel<"nocona", GenericPostRAModel, [
FeatureX87,
- FeatureSlowUAMem16,
FeatureCMPXCHG8B,
FeatureCMOV,
FeatureMMX,
@@ -1103,13 +1181,15 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [
FeatureNOPL,
Feature64Bit,
FeatureCMPXCHG16B,
+],
+[
+ FeatureSlowUAMem16,
FeatureInsertVZEROUPPER
]>;
// Intel Core 2 Solo/Duo.
-def : ProcessorModel<"core2", SandyBridgeModel, [
+def : ProcModel<"core2", SandyBridgeModel, [
FeatureX87,
- FeatureSlowUAMem16,
FeatureCMPXCHG8B,
FeatureCMOV,
FeatureMMX,
@@ -1118,13 +1198,15 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureNOPL,
Feature64Bit,
FeatureCMPXCHG16B,
- FeatureLAHFSAHF,
+ FeatureLAHFSAHF
+],
+[
FeatureMacroFusion,
+ FeatureSlowUAMem16,
FeatureInsertVZEROUPPER
]>;
-def : ProcessorModel<"penryn", SandyBridgeModel, [
+def : ProcModel<"penryn", SandyBridgeModel, [
FeatureX87,
- FeatureSlowUAMem16,
FeatureCMPXCHG8B,
FeatureCMOV,
FeatureMMX,
@@ -1133,140 +1215,171 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureNOPL,
Feature64Bit,
FeatureCMPXCHG16B,
- FeatureLAHFSAHF,
+ FeatureLAHFSAHF
+],
+[
FeatureMacroFusion,
+ FeatureSlowUAMem16,
FeatureInsertVZEROUPPER
]>;
// Atom CPUs.
foreach P = ["bonnell", "atom"] in {
- def : ProcessorModel<P, AtomModel, ProcessorFeatures.AtomFeatures>;
+ def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
+ ProcessorFeatures.AtomTuning>;
}
foreach P = ["silvermont", "slm"] in {
- def : ProcessorModel<P, SLMModel, ProcessorFeatures.SLMFeatures>;
+ def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
+ ProcessorFeatures.SLMTuning>;
}
-def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>;
-def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>;
-def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>;
+def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
+ ProcessorFeatures.GLMTuning>;
+def : ProcModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures,
+ ProcessorFeatures.GLPTuning>;
+def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
+ ProcessorFeatures.TRMTuning>;
// "Arrandale" along with corei3 and corei5
foreach P = ["nehalem", "corei7"] in {
- def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures>;
+ def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
+ ProcessorFeatures.NHMTuning>;
}
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : ProcessorModel<"westmere", SandyBridgeModel,
- ProcessorFeatures.WSMFeatures>;
+def : ProcModel<"westmere", SandyBridgeModel, ProcessorFeatures.WSMFeatures,
+ ProcessorFeatures.WSMTuning>;
foreach P = ["sandybridge", "corei7-avx"] in {
- def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures>;
+ def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
+ ProcessorFeatures.SNBTuning>;
}
foreach P = ["ivybridge", "core-avx-i"] in {
- def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures>;
+ def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
+ ProcessorFeatures.IVBTuning>;
}
foreach P = ["haswell", "core-avx2"] in {
- def : ProcessorModel<P, HaswellModel, ProcessorFeatures.HSWFeatures>;
+ def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
+ ProcessorFeatures.HSWTuning>;
}
-def : ProcessorModel<"broadwell", BroadwellModel,
- ProcessorFeatures.BDWFeatures>;
+def : ProcModel<"broadwell", BroadwellModel, ProcessorFeatures.BDWFeatures,
+ ProcessorFeatures.BDWTuning>;
-def : ProcessorModel<"skylake", SkylakeClientModel,
- ProcessorFeatures.SKLFeatures>;
+def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
+ ProcessorFeatures.SKLTuning>;
// FIXME: define KNL scheduler model
-def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>;
-def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>;
+def : ProcModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures,
+ ProcessorFeatures.KNLTuning>;
+def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
+ ProcessorFeatures.KNLTuning>;
foreach P = ["skylake-avx512", "skx"] in {
- def : ProcessorModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures>;
+ def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
+ ProcessorFeatures.SKXTuning>;
}
-def : ProcessorModel<"cascadelake", SkylakeServerModel,
- ProcessorFeatures.CLXFeatures>;
-def : ProcessorModel<"cooperlake", SkylakeServerModel,
- ProcessorFeatures.CPXFeatures>;
-def : ProcessorModel<"cannonlake", SkylakeServerModel,
- ProcessorFeatures.CNLFeatures>;
-def : ProcessorModel<"icelake-client", SkylakeServerModel,
- ProcessorFeatures.ICLFeatures>;
-def : ProcessorModel<"icelake-server", SkylakeServerModel,
- ProcessorFeatures.ICXFeatures>;
-def : ProcessorModel<"tigerlake", SkylakeServerModel,
- ProcessorFeatures.TGLFeatures>;
+def : ProcModel<"cascadelake", SkylakeServerModel,
+ ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
+def : ProcModel<"cooperlake", SkylakeServerModel,
+ ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
+def : ProcModel<"cannonlake", SkylakeServerModel,
+ ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
+def : ProcModel<"icelake-client", SkylakeServerModel,
+ ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
+def : ProcModel<"icelake-server", SkylakeServerModel,
+ ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
+def : ProcModel<"tigerlake", SkylakeServerModel,
+ ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
+def : ProcModel<"sapphirerapids", SkylakeServerModel,
+ ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
+def : ProcModel<"alderlake", SkylakeClientModel,
+ ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
// AMD CPUs.
-def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureMMX, FeatureInsertVZEROUPPER]>;
-def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- Feature3DNow, FeatureInsertVZEROUPPER]>;
-def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- Feature3DNow, FeatureInsertVZEROUPPER]>;
+def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
foreach P = ["athlon", "athlon-tbird"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
- Feature3DNowA, FeatureNOPL, FeatureSlowSHLD,
- FeatureInsertVZEROUPPER]>;
+ def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA,
+ FeatureNOPL],
+ [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
- FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL,
- FeatureSlowSHLD, FeatureInsertVZEROUPPER]>;
+ def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
+ FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
+ [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
- Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
- FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>;
+ def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
+ [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
+ FeatureInsertVZEROUPPER]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
- Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
- FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
- FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>;
+ def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA,
+ FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
+ Feature64Bit],
+ [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
+ FeatureInsertVZEROUPPER]>;
}
foreach P = ["amdfam10", "barcelona"] in {
- def : Proc<P, ProcessorFeatures.BarcelonaFeatures>;
+ def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
+ ProcessorFeatures.BarcelonaTuning>;
}
// Bobcat
-def : Proc<"btver1", ProcessorFeatures.BtVer1Features>;
+def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
+ ProcessorFeatures.BtVer1Tuning>;
// Jaguar
-def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>;
+def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
+ ProcessorFeatures.BtVer2Tuning>;
// Bulldozer
-def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>;
+def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
+ ProcessorFeatures.BdVer1Tuning>;
// Piledriver
-def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>;
+def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
+ ProcessorFeatures.BdVer2Tuning>;
// Steamroller
-def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>;
+def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
+ ProcessorFeatures.BdVer3Tuning>;
// Excavator
-def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>;
-
-def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>;
-def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>;
-
-def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- Feature3DNowA, FeatureInsertVZEROUPPER]>;
-
-def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureInsertVZEROUPPER]>;
-def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow,
- FeatureInsertVZEROUPPER]>;
-def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow,
- FeatureInsertVZEROUPPER]>;
-def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureMMX, FeatureSSE1, FeatureFXSR,
- FeatureCMOV, FeatureInsertVZEROUPPER]>;
+def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
+ ProcessorFeatures.BdVer4Tuning>;
+
+def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
+ ProcessorFeatures.ZNTuning>;
+def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
+ ProcessorFeatures.ZN2Tuning>;
+def : ProcModel<"znver3", Znver2Model, ProcessorFeatures.ZN3Features,
+ ProcessorFeatures.ZN3Tuning>;
+
+def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+
+def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"winchip2", [FeatureX87, Feature3DNow],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"c3", [FeatureX87, Feature3DNow],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
+ FeatureSSE1, FeatureFXSR, FeatureCMOV],
+ [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -1278,15 +1391,8 @@ def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
// covers a huge swath of x86 processors. If there are specific scheduling
// knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them.
-def : ProcessorModel<"x86-64", SandyBridgeModel, [
- FeatureX87,
- FeatureCMPXCHG8B,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE2,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
+def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
+[
FeatureSlow3OpsLEA,
FeatureSlowDivide64,
FeatureSlowIncDec,
@@ -1294,6 +1400,16 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [
FeatureInsertVZEROUPPER
]>;
+// x86-64 micro-architecture levels.
+def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
+ ProcessorFeatures.SNBTuning>;
+// Close to Haswell.
+def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
+ ProcessorFeatures.HSWTuning>;
+// Close to the AVX-512 level implemented by Xeon Scalable Processors.
+def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features,
+ ProcessorFeatures.SKXTuning>;
+
//===----------------------------------------------------------------------===//
// Calling Conventions
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
index aa03217d155d..2d434bda5530 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -404,7 +404,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
O << ']';
}
-static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
+static bool printAsmMRegister(const X86AsmPrinter &P, const MachineOperand &MO,
char Mode, raw_ostream &O) {
Register Reg = MO.getReg();
bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT;
@@ -446,9 +446,9 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
return false;
}
-static bool printAsmVRegister(X86AsmPrinter &P, const MachineOperand &MO,
- char Mode, raw_ostream &O) {
- unsigned Reg = MO.getReg();
+static bool printAsmVRegister(const MachineOperand &MO, char Mode,
+ raw_ostream &O) {
+ Register Reg = MO.getReg();
bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT;
unsigned Index;
@@ -560,7 +560,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case 't': // Print V8SFmode register
case 'g': // Print V16SFmode register
if (MO.isReg())
- return printAsmVRegister(*this, MO, ExtraCode[0], O);
+ return printAsmVRegister(MO, ExtraCode[0], O);
PrintOperand(MI, OpNo, O);
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
index eb485fa2ecef..a3b74c8ee387 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -134,9 +134,9 @@ public:
}
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- const char *ExtraCode, raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- const char *ExtraCode, raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &O) override;
bool doInitialization(Module &M) override {
SMShadowTracker.reset(0);
@@ -145,7 +145,7 @@ public:
return AsmPrinter::doInitialization(M);
}
- bool runOnMachineFunction(MachineFunction &F) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
void emitFunctionBodyStart() override;
void emitFunctionBodyEnd() override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index 9f1fece1b9dd..fdc65acffe3d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -154,7 +154,7 @@ static bool isPotentialBlockedMemCpyLd(unsigned Opcode) {
return isXMMLoadOpcode(Opcode) || isYMMLoadOpcode(Opcode);
}
-static bool isPotentialBlockedMemCpyPair(int LdOpcode, int StOpcode) {
+static bool isPotentialBlockedMemCpyPair(unsigned LdOpcode, unsigned StOpcode) {
switch (LdOpcode) {
case X86::MOVUPSrm:
case X86::MOVAPSrm:
@@ -206,7 +206,7 @@ static bool isPotentialBlockedMemCpyPair(int LdOpcode, int StOpcode) {
}
}
-static bool isPotentialBlockingStoreInst(int Opcode, int LoadOpcode) {
+static bool isPotentialBlockingStoreInst(unsigned Opcode, unsigned LoadOpcode) {
bool PBlock = false;
PBlock |= Opcode == X86::MOV64mr || Opcode == X86::MOV64mi32 ||
Opcode == X86::MOV32mr || Opcode == X86::MOV32mi ||
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index caa1f7952475..fae4e688c8b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -105,7 +105,7 @@ private:
void adjustCallSequence(MachineFunction &MF, const CallContext &Context);
MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
- unsigned Reg);
+ Register Reg);
enum InstClassification { Convert, Skip, Exit };
@@ -202,7 +202,7 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
Align StackAlign = TFL->getStackAlign();
int64_t Advantage = 0;
- for (auto CC : CallSeqVector) {
+ for (const auto &CC : CallSeqVector) {
// Call sites where no parameters are passed on the stack
// do not affect the cost, since there needs to be no
// stack adjustment.
@@ -265,7 +265,7 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
if (!isProfitable(MF, CallSeqVector))
return false;
- for (auto CC : CallSeqVector) {
+ for (const auto &CC : CallSeqVector) {
if (CC.UsePush) {
adjustCallSequence(MF, CC);
Changed = true;
@@ -288,13 +288,13 @@ X86CallFrameOptimization::classifyInstruction(
case X86::AND16mi8:
case X86::AND32mi8:
case X86::AND64mi8: {
- MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands);
+ const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands);
return ImmOp.getImm() == 0 ? Convert : Exit;
}
case X86::OR16mi8:
case X86::OR32mi8:
case X86::OR64mi8: {
- MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands);
+ const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands);
return ImmOp.getImm() == -1 ? Convert : Exit;
}
case X86::MOV32mi:
@@ -336,7 +336,7 @@ X86CallFrameOptimization::classifyInstruction(
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (!Register::isPhysicalRegister(Reg))
+ if (!Reg.isPhysical())
continue;
if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
return Exit;
@@ -454,7 +454,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
UsedRegs.insert(Reg);
}
}
@@ -506,7 +506,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
// replace uses.
for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx];
- MachineOperand PushOp = Store->getOperand(X86::AddrNumOperands);
+ const MachineOperand &PushOp = Store->getOperand(X86::AddrNumOperands);
MachineBasicBlock::iterator Push = nullptr;
unsigned PushOpcode;
switch (Store->getOpcode()) {
@@ -563,8 +563,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
unsigned NumOps = DefMov->getDesc().getNumOperands();
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
Push->addOperand(DefMov->getOperand(i));
- Push->cloneMergedMemRefs(MF, {&*DefMov, &*Store});
-
+ Push->cloneMergedMemRefs(MF, {DefMov, &*Store});
DefMov->eraseFromParent();
} else {
PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
@@ -600,7 +599,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
}
MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
- MachineBasicBlock::iterator FrameSetup, unsigned Reg) {
+ MachineBasicBlock::iterator FrameSetup, Register Reg) {
// Do an extremely restricted form of load folding.
// ISel will often create patterns like:
// movl 4(%edi), %eax
@@ -611,7 +610,7 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
// movl %eax, (%esp)
// call
// Get rid of those with prejudice.
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return nullptr;
// Make sure this is the only use of Reg.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
index 319dc9470604..53f57565d56e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -95,15 +95,14 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
namespace {
-struct OutgoingValueHandler : public CallLowering::ValueHandler {
- OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
- : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
+struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
+ X86OutgoingValueHandler(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, MachineInstrBuilder &MIB,
+ CCAssignFn *AssignFn)
+ : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
DL(MIRBuilder.getMF().getDataLayout()),
STI(MIRBuilder.getMF().getSubtarget<X86Subtarget>()) {}
- bool isIncomingArgumentHandler() const override { return false; }
-
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0));
@@ -135,9 +134,10 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
unsigned ValSize = VA.getValVT().getSizeInBits();
unsigned LocSize = VA.getLocVT().getSizeInBits();
if (PhysRegSize > ValSize && LocSize == ValSize) {
- assert((PhysRegSize == 128 || PhysRegSize == 80) && "We expect that to be 128 bit");
- auto MIB = MIRBuilder.buildAnyExt(LLT::scalar(PhysRegSize), ValVReg);
- ExtReg = MIB.getReg(0);
+ assert((PhysRegSize == 128 || PhysRegSize == 80) &&
+ "We expect that to be 128 bit");
+ ExtReg =
+ MIRBuilder.buildAnyExt(LLT::scalar(PhysRegSize), ValVReg).getReg(0);
} else
ExtReg = extendRegister(ValVReg, VA);
@@ -149,9 +149,9 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
MachineFunction &MF = MIRBuilder.getMF();
Register ExtReg = extendRegister(ValVReg, VA);
- auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore,
- VA.getLocVT().getStoreSize(),
- inferAlignFromPtrInfo(MF, MPO));
+ auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore,
+ VA.getLocVT().getStoreSize(),
+ inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
@@ -184,9 +184,9 @@ protected:
} // end anonymous namespace
-bool X86CallLowering::lowerReturn(
- MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs) const {
+bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+ const Value *Val, ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const {
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
"Return value without a vreg");
auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
@@ -195,7 +195,7 @@ bool X86CallLowering::lowerReturn(
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
- auto &DL = MF.getDataLayout();
+ const DataLayout &DL = MF.getDataLayout();
LLVMContext &Ctx = Val->getType()->getContext();
const X86TargetLowering &TLI = *getTLI<X86TargetLowering>();
@@ -215,7 +215,7 @@ bool X86CallLowering::lowerReturn(
return false;
}
- OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
+ X86OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
}
@@ -226,14 +226,12 @@ bool X86CallLowering::lowerReturn(
namespace {
-struct IncomingValueHandler : public CallLowering::ValueHandler {
- IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- CCAssignFn *AssignFn)
- : ValueHandler(MIRBuilder, MRI, AssignFn),
+struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
+ X86IncomingValueHandler(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, CCAssignFn *AssignFn)
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn),
DL(MIRBuilder.getMF().getDataLayout()) {}
- bool isIncomingArgumentHandler() const override { return true; }
-
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
auto &MFI = MIRBuilder.getMF().getFrameInfo();
@@ -248,7 +246,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
- auto MMO = MF.getMachineMemOperand(
+ auto *MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
@@ -298,10 +296,10 @@ protected:
const DataLayout &DL;
};
-struct FormalArgHandler : public IncomingValueHandler {
+struct FormalArgHandler : public X86IncomingValueHandler {
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
- : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
+ : X86IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
@@ -309,10 +307,10 @@ struct FormalArgHandler : public IncomingValueHandler {
}
};
-struct CallReturnHandler : public IncomingValueHandler {
+struct CallReturnHandler : public X86IncomingValueHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn, MachineInstrBuilder &MIB)
- : IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+ : X86IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
@@ -324,9 +322,10 @@ protected:
} // end anonymous namespace
-bool X86CallLowering::lowerFormalArguments(
- MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
+bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
if (F.arg_empty())
return true;
@@ -340,8 +339,7 @@ bool X86CallLowering::lowerFormalArguments(
SmallVector<ArgInfo, 8> SplitArgs;
unsigned Idx = 0;
- for (auto &Arg : F.args()) {
-
+ for (const auto &Arg : F.args()) {
// TODO: handle not simple cases.
if (Arg.hasAttribute(Attribute::ByVal) ||
Arg.hasAttribute(Attribute::InReg) ||
@@ -380,10 +378,10 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
- auto &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getParent()->getDataLayout();
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
- auto TRI = STI.getRegisterInfo();
+ const X86RegisterInfo *TRI = STI.getRegisterInfo();
// Handle only Linux C, X86_64_SysV calling conventions for now.
if (!STI.isTargetLinux() || !(Info.CallConv == CallingConv::C ||
@@ -421,7 +419,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
}
// Do the actual argument marshalling.
- OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, CC_X86);
+ X86OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, CC_X86);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h
index b5ea7782896b..9390122d7647 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h
@@ -29,10 +29,12 @@ public:
X86CallLowering(const X86TargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs) const override;
+ ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
index c899db60e016..c80a5d5bb332 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -330,5 +330,15 @@ static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
+static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ if (LocVT != MVT::i64) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::ZExt;
+ }
+ return false;
+}
+
// Provides entry points of CC_X86 and RetCC_X86.
#include "X86GenCallingConv.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
index 802e694999b6..3735fab818ce 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
@@ -336,6 +336,9 @@ def RetCC_X86_64_C : CallingConv<[
// MMX vector types are always returned in XMM0.
CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
+ // Pointers are always returned in full 64-bit registers.
+ CCIfPtr<CCCustom<"CC_X86_64_Pointer">>,
+
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R12]>>>,
CCDelegateTo<RetCC_X86Common>
@@ -518,6 +521,9 @@ def CC_X86_64_C : CallingConv<[
CCIfCC<"CallingConv::Swift",
CCIfSRet<CCIfType<[i64], CCAssignToReg<[RAX]>>>>,
+ // Pointers are always passed in full 64-bit registers.
+ CCIfPtr<CCCustom<"CC_X86_64_Pointer">>,
+
// The first 6 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
@@ -1096,7 +1102,7 @@ def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)
// All GPRs - except r11
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
- R8, R9, R10, RSP)>;
+ R8, R9, R10)>;
// All registers - except r11
def CSR_64_RT_AllRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs,
@@ -1154,17 +1160,16 @@ def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RSI, R14, R15,
def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
// Register calling convention preserves few GPR and XMM8-15
-def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP, ESP)>;
+def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
def CSR_32_RegCall : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE,
(sequence "XMM%u", 4, 7))>;
def CSR_Win32_CFGuard_Check_NoSSE : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE, ECX)>;
def CSR_Win32_CFGuard_Check : CalleeSavedRegs<(add CSR_32_RegCall, ECX)>;
-def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
(sequence "R%u", 10, 15))>;
def CSR_Win64_RegCall : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,
(sequence "XMM%u", 8, 15))>;
-def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
(sequence "R%u", 12, 15))>;
def CSR_SysV64_RegCall : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,
(sequence "XMM%u", 8, 15))>;
-
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp
index fe5cb3ae2bf6..a2de0dc08292 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -439,7 +439,7 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates(
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
- auto &RDM = RegDefMaps[Register::isVirtualRegister(Reg)];
+ auto &RDM = RegDefMaps[Reg.isVirtual()];
if (MachineInstr *DefMI = RDM.lookup(Reg)) {
OperandToDefMap[&MO] = DefMI;
DepthInfo Info = DepthMap.lookup(DefMI);
@@ -459,7 +459,7 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates(
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
- RegDefMaps[Register::isVirtualRegister(Reg)][Reg] = &MI;
+ RegDefMaps[Reg.isVirtual()][Reg] = &MI;
}
unsigned Latency = TSchedModel.computeInstrLatency(&MI);
@@ -537,7 +537,7 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates(
// This is another conservative check to avoid converting CMOV instruction
// used with tree-search like algorithm, where the branch is unpredicted.
auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
- if (UIs.begin() != UIs.end() && ++UIs.begin() == UIs.end()) {
+ if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
unsigned Op = UIs.begin()->getOpcode();
if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
WorthOpGroup = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CondBrFolding.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CondBrFolding.cpp
deleted file mode 100644
index 7ede94664bf6..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CondBrFolding.cpp
+++ /dev/null
@@ -1,579 +0,0 @@
-//===---- X86CondBrFolding.cpp - optimize conditional branches ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This file defines a pass that optimizes condition branches on x86 by taking
-// advantage of the three-way conditional code generated by compare
-// instructions.
-// Currently, it tries to hoisting EQ and NE conditional branch to a dominant
-// conditional branch condition where the same EQ/NE conditional code is
-// computed. An example:
-// bb_0:
-// cmp %0, 19
-// jg bb_1
-// jmp bb_2
-// bb_1:
-// cmp %0, 40
-// jg bb_3
-// jmp bb_4
-// bb_4:
-// cmp %0, 20
-// je bb_5
-// jmp bb_6
-// Here we could combine the two compares in bb_0 and bb_4 and have the
-// following code:
-// bb_0:
-// cmp %0, 20
-// jg bb_1
-// jl bb_2
-// jmp bb_5
-// bb_1:
-// cmp %0, 40
-// jg bb_3
-// jmp bb_6
-// For the case of %0 == 20 (bb_5), we eliminate two jumps, and the control
-// height for bb_6 is also reduced. bb_4 is gone after the optimization.
-//
-// There are plenty of this code patterns, especially from the switch case
-// lowing where we generate compare of "pivot-1" for the inner nodes in the
-// binary search tree.
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrInfo.h"
-#include "X86Subtarget.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/BranchProbability.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-condbr-folding"
-
-STATISTIC(NumFixedCondBrs, "Number of x86 condbr folded");
-
-namespace {
-class X86CondBrFoldingPass : public MachineFunctionPass {
-public:
- X86CondBrFoldingPass() : MachineFunctionPass(ID) { }
- StringRef getPassName() const override { return "X86 CondBr Folding"; }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineBranchProbabilityInfo>();
- }
-
-public:
- static char ID;
-};
-} // namespace
-
-char X86CondBrFoldingPass::ID = 0;
-INITIALIZE_PASS(X86CondBrFoldingPass, "X86CondBrFolding", "X86CondBrFolding", false, false)
-
-FunctionPass *llvm::createX86CondBrFolding() {
- return new X86CondBrFoldingPass();
-}
-
-namespace {
-// A class the stores the auxiliary information for each MBB.
-struct TargetMBBInfo {
- MachineBasicBlock *TBB;
- MachineBasicBlock *FBB;
- MachineInstr *BrInstr;
- MachineInstr *CmpInstr;
- X86::CondCode BranchCode;
- unsigned SrcReg;
- int CmpValue;
- bool Modified;
- bool CmpBrOnly;
-};
-
-// A class that optimizes the conditional branch by hoisting and merge CondCode.
-class X86CondBrFolding {
-public:
- X86CondBrFolding(const X86InstrInfo *TII,
- const MachineBranchProbabilityInfo *MBPI,
- MachineFunction &MF)
- : TII(TII), MBPI(MBPI), MF(MF) {}
- bool optimize();
-
-private:
- const X86InstrInfo *TII;
- const MachineBranchProbabilityInfo *MBPI;
- MachineFunction &MF;
- std::vector<std::unique_ptr<TargetMBBInfo>> MBBInfos;
- SmallVector<MachineBasicBlock *, 4> RemoveList;
-
- void optimizeCondBr(MachineBasicBlock &MBB,
- SmallVectorImpl<MachineBasicBlock *> &BranchPath);
- void replaceBrDest(MachineBasicBlock *MBB, MachineBasicBlock *OrigDest,
- MachineBasicBlock *NewDest);
- void fixupModifiedCond(MachineBasicBlock *MBB);
- std::unique_ptr<TargetMBBInfo> analyzeMBB(MachineBasicBlock &MBB);
- static bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- int &CmpValue);
- bool findPath(MachineBasicBlock *MBB,
- SmallVectorImpl<MachineBasicBlock *> &BranchPath);
- TargetMBBInfo *getMBBInfo(MachineBasicBlock *MBB) const {
- return MBBInfos[MBB->getNumber()].get();
- }
-};
-} // namespace
-
-// Find a valid path that we can reuse the CondCode.
-// The resulted path (if return true) is stored in BranchPath.
-// Return value:
-// false: is no valid path is found.
-// true: a valid path is found and the targetBB can be reached.
-bool X86CondBrFolding::findPath(
- MachineBasicBlock *MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
- TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
- assert(MBBInfo && "Expecting a candidate MBB");
- int CmpValue = MBBInfo->CmpValue;
-
- MachineBasicBlock *PredMBB = *MBB->pred_begin();
- MachineBasicBlock *SaveMBB = MBB;
- while (PredMBB) {
- TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
- if (!PredMBBInfo || PredMBBInfo->SrcReg != MBBInfo->SrcReg)
- return false;
-
- assert(SaveMBB == PredMBBInfo->TBB || SaveMBB == PredMBBInfo->FBB);
- bool IsFalseBranch = (SaveMBB == PredMBBInfo->FBB);
-
- X86::CondCode CC = PredMBBInfo->BranchCode;
- assert(CC == X86::COND_L || CC == X86::COND_G || CC == X86::COND_E);
- int PredCmpValue = PredMBBInfo->CmpValue;
- bool ValueCmpTrue = ((CmpValue < PredCmpValue && CC == X86::COND_L) ||
- (CmpValue > PredCmpValue && CC == X86::COND_G) ||
- (CmpValue == PredCmpValue && CC == X86::COND_E));
- // Check if both the result of value compare and the branch target match.
- if (!(ValueCmpTrue ^ IsFalseBranch)) {
- LLVM_DEBUG(dbgs() << "Dead BB detected!\n");
- return false;
- }
-
- BranchPath.push_back(PredMBB);
- // These are the conditions on which we could combine the compares.
- if ((CmpValue == PredCmpValue) ||
- (CmpValue == PredCmpValue - 1 && CC == X86::COND_L) ||
- (CmpValue == PredCmpValue + 1 && CC == X86::COND_G))
- return true;
-
- // If PredMBB has more than on preds, or not a pure cmp and br, we bailout.
- if (PredMBB->pred_size() != 1 || !PredMBBInfo->CmpBrOnly)
- return false;
-
- SaveMBB = PredMBB;
- PredMBB = *PredMBB->pred_begin();
- }
- return false;
-}
-
-// Fix up any PHI node in the successor of MBB.
-static void fixPHIsInSucc(MachineBasicBlock *MBB, MachineBasicBlock *OldMBB,
- MachineBasicBlock *NewMBB) {
- if (NewMBB == OldMBB)
- return;
- for (auto MI = MBB->instr_begin(), ME = MBB->instr_end();
- MI != ME && MI->isPHI(); ++MI)
- for (unsigned i = 2, e = MI->getNumOperands() + 1; i != e; i += 2) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.getMBB() == OldMBB)
- MO.setMBB(NewMBB);
- }
-}
-
-// Utility function to set branch probability for edge MBB->SuccMBB.
-static inline bool setBranchProb(MachineBasicBlock *MBB,
- MachineBasicBlock *SuccMBB,
- BranchProbability Prob) {
- auto MBBI = std::find(MBB->succ_begin(), MBB->succ_end(), SuccMBB);
- if (MBBI == MBB->succ_end())
- return false;
- MBB->setSuccProbability(MBBI, Prob);
- return true;
-}
-
-// Utility function to find the unconditional br instruction in MBB.
-static inline MachineBasicBlock::iterator
-findUncondBrI(MachineBasicBlock *MBB) {
- return std::find_if(MBB->begin(), MBB->end(), [](MachineInstr &MI) -> bool {
- return MI.getOpcode() == X86::JMP_1;
- });
-}
-
-// Replace MBB's original successor, OrigDest, with NewDest.
-// Also update the MBBInfo for MBB.
-void X86CondBrFolding::replaceBrDest(MachineBasicBlock *MBB,
- MachineBasicBlock *OrigDest,
- MachineBasicBlock *NewDest) {
- TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
- MachineInstr *BrMI;
- if (MBBInfo->TBB == OrigDest) {
- BrMI = MBBInfo->BrInstr;
- MachineInstrBuilder MIB =
- BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(X86::JCC_1))
- .addMBB(NewDest).addImm(MBBInfo->BranchCode);
- MBBInfo->TBB = NewDest;
- MBBInfo->BrInstr = MIB.getInstr();
- } else { // Should be the unconditional jump stmt.
- MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
- BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
- .addMBB(NewDest);
- MBBInfo->FBB = NewDest;
- BrMI = &*UncondBrI;
- }
- fixPHIsInSucc(NewDest, OrigDest, MBB);
- BrMI->eraseFromParent();
- MBB->addSuccessor(NewDest);
- setBranchProb(MBB, NewDest, MBPI->getEdgeProbability(MBB, OrigDest));
- MBB->removeSuccessor(OrigDest);
-}
-
-// Change the CondCode and BrInstr according to MBBInfo.
-void X86CondBrFolding::fixupModifiedCond(MachineBasicBlock *MBB) {
- TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
- if (!MBBInfo->Modified)
- return;
-
- MachineInstr *BrMI = MBBInfo->BrInstr;
- X86::CondCode CC = MBBInfo->BranchCode;
- MachineInstrBuilder MIB = BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI),
- TII->get(X86::JCC_1))
- .addMBB(MBBInfo->TBB).addImm(CC);
- BrMI->eraseFromParent();
- MBBInfo->BrInstr = MIB.getInstr();
-
- MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
- BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
- .addMBB(MBBInfo->FBB);
- MBB->erase(UncondBrI);
- MBBInfo->Modified = false;
-}
-
-//
-// Apply the transformation:
-// RootMBB -1-> ... PredMBB -3-> MBB -5-> TargetMBB
-// \-2-> \-4-> \-6-> FalseMBB
-// ==>
-// RootMBB -1-> ... PredMBB -7-> FalseMBB
-// TargetMBB <-8-/ \-2-> \-4->
-//
-// Note that PredMBB and RootMBB could be the same.
-// And in the case of dead TargetMBB, we will not have TargetMBB and edge 8.
-//
-// There are some special handling where the RootMBB is COND_E in which case
-// we directly short-cycle the brinstr.
-//
-void X86CondBrFolding::optimizeCondBr(
- MachineBasicBlock &MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
-
- X86::CondCode CC;
- TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
- assert(MBBInfo && "Expecting a candidate MBB");
- MachineBasicBlock *TargetMBB = MBBInfo->TBB;
- BranchProbability TargetProb = MBPI->getEdgeProbability(&MBB, MBBInfo->TBB);
-
- // Forward the jump from MBB's predecessor to MBB's false target.
- MachineBasicBlock *PredMBB = BranchPath.front();
- TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
- assert(PredMBBInfo && "Expecting a candidate MBB");
- if (PredMBBInfo->Modified)
- fixupModifiedCond(PredMBB);
- CC = PredMBBInfo->BranchCode;
- // Don't do this if depth of BranchPath is 1 and PredMBB is of COND_E.
- // We will short-cycle directly for this case.
- if (!(CC == X86::COND_E && BranchPath.size() == 1))
- replaceBrDest(PredMBB, &MBB, MBBInfo->FBB);
-
- MachineBasicBlock *RootMBB = BranchPath.back();
- TargetMBBInfo *RootMBBInfo = getMBBInfo(RootMBB);
- assert(RootMBBInfo && "Expecting a candidate MBB");
- if (RootMBBInfo->Modified)
- fixupModifiedCond(RootMBB);
- CC = RootMBBInfo->BranchCode;
-
- if (CC != X86::COND_E) {
- MachineBasicBlock::iterator UncondBrI = findUncondBrI(RootMBB);
- // RootMBB: Cond jump to the original not-taken MBB.
- X86::CondCode NewCC;
- switch (CC) {
- case X86::COND_L:
- NewCC = X86::COND_G;
- break;
- case X86::COND_G:
- NewCC = X86::COND_L;
- break;
- default:
- llvm_unreachable("unexpected condtional code.");
- }
- BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
- TII->get(X86::JCC_1))
- .addMBB(RootMBBInfo->FBB).addImm(NewCC);
-
- // RootMBB: Jump to TargetMBB
- BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
- TII->get(X86::JMP_1))
- .addMBB(TargetMBB);
- RootMBB->addSuccessor(TargetMBB);
- fixPHIsInSucc(TargetMBB, &MBB, RootMBB);
- RootMBB->erase(UncondBrI);
- } else {
- replaceBrDest(RootMBB, RootMBBInfo->TBB, TargetMBB);
- }
-
- // Fix RootMBB's CmpValue to MBB's CmpValue to TargetMBB. Don't set Imm
- // directly. Move MBB's stmt to here as the opcode might be different.
- if (RootMBBInfo->CmpValue != MBBInfo->CmpValue) {
- MachineInstr *NewCmp = MBBInfo->CmpInstr;
- NewCmp->removeFromParent();
- RootMBB->insert(RootMBBInfo->CmpInstr, NewCmp);
- RootMBBInfo->CmpInstr->eraseFromParent();
- }
-
- // Fix branch Probabilities.
- auto fixBranchProb = [&](MachineBasicBlock *NextMBB) {
- BranchProbability Prob;
- for (auto &I : BranchPath) {
- MachineBasicBlock *ThisMBB = I;
- if (!ThisMBB->hasSuccessorProbabilities() ||
- !ThisMBB->isSuccessor(NextMBB))
- break;
- Prob = MBPI->getEdgeProbability(ThisMBB, NextMBB);
- if (Prob.isUnknown())
- break;
- TargetProb = Prob * TargetProb;
- Prob = Prob - TargetProb;
- setBranchProb(ThisMBB, NextMBB, Prob);
- if (ThisMBB == RootMBB) {
- setBranchProb(ThisMBB, TargetMBB, TargetProb);
- }
- ThisMBB->normalizeSuccProbs();
- if (ThisMBB == RootMBB)
- break;
- NextMBB = ThisMBB;
- }
- return true;
- };
- if (CC != X86::COND_E && !TargetProb.isUnknown())
- fixBranchProb(MBBInfo->FBB);
-
- if (CC != X86::COND_E)
- RemoveList.push_back(&MBB);
-
- // Invalidate MBBInfo just in case.
- MBBInfos[MBB.getNumber()] = nullptr;
- MBBInfos[RootMBB->getNumber()] = nullptr;
-
- LLVM_DEBUG(dbgs() << "After optimization:\nRootMBB is: " << *RootMBB << "\n");
- if (BranchPath.size() > 1)
- LLVM_DEBUG(dbgs() << "PredMBB is: " << *(BranchPath[0]) << "\n");
-}
-
-// Driver function for optimization: find the valid candidate and apply
-// the transformation.
-bool X86CondBrFolding::optimize() {
- bool Changed = false;
- LLVM_DEBUG(dbgs() << "***** X86CondBr Folding on Function: " << MF.getName()
- << " *****\n");
- // Setup data structures.
- MBBInfos.resize(MF.getNumBlockIDs());
- for (auto &MBB : MF)
- MBBInfos[MBB.getNumber()] = analyzeMBB(MBB);
-
- for (auto &MBB : MF) {
- TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
- if (!MBBInfo || !MBBInfo->CmpBrOnly)
- continue;
- if (MBB.pred_size() != 1)
- continue;
- LLVM_DEBUG(dbgs() << "Work on MBB." << MBB.getNumber()
- << " CmpValue: " << MBBInfo->CmpValue << "\n");
- SmallVector<MachineBasicBlock *, 4> BranchPath;
- if (!findPath(&MBB, BranchPath))
- continue;
-
-#ifndef NDEBUG
- LLVM_DEBUG(dbgs() << "Found one path (len=" << BranchPath.size() << "):\n");
- int Index = 1;
- LLVM_DEBUG(dbgs() << "Target MBB is: " << MBB << "\n");
- for (auto I = BranchPath.rbegin(); I != BranchPath.rend(); ++I, ++Index) {
- MachineBasicBlock *PMBB = *I;
- TargetMBBInfo *PMBBInfo = getMBBInfo(PMBB);
- LLVM_DEBUG(dbgs() << "Path MBB (" << Index << " of " << BranchPath.size()
- << ") is " << *PMBB);
- LLVM_DEBUG(dbgs() << "CC=" << PMBBInfo->BranchCode
- << " Val=" << PMBBInfo->CmpValue
- << " CmpBrOnly=" << PMBBInfo->CmpBrOnly << "\n\n");
- }
-#endif
- optimizeCondBr(MBB, BranchPath);
- Changed = true;
- }
- NumFixedCondBrs += RemoveList.size();
- for (auto MBBI : RemoveList) {
- while (!MBBI->succ_empty())
- MBBI->removeSuccessor(MBBI->succ_end() - 1);
-
- MBBI->eraseFromParent();
- }
-
- return Changed;
-}
-
-// Analyze instructions that generate CondCode and extract information.
-bool X86CondBrFolding::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- int &CmpValue) {
- unsigned SrcRegIndex = 0;
- unsigned ValueIndex = 0;
- switch (MI.getOpcode()) {
- // TODO: handle test instructions.
- default:
- return false;
- case X86::CMP64ri32:
- case X86::CMP64ri8:
- case X86::CMP32ri:
- case X86::CMP32ri8:
- case X86::CMP16ri:
- case X86::CMP16ri8:
- case X86::CMP8ri:
- SrcRegIndex = 0;
- ValueIndex = 1;
- break;
- case X86::SUB64ri32:
- case X86::SUB64ri8:
- case X86::SUB32ri:
- case X86::SUB32ri8:
- case X86::SUB16ri:
- case X86::SUB16ri8:
- case X86::SUB8ri:
- SrcRegIndex = 1;
- ValueIndex = 2;
- break;
- }
- SrcReg = MI.getOperand(SrcRegIndex).getReg();
- if (!MI.getOperand(ValueIndex).isImm())
- return false;
- CmpValue = MI.getOperand(ValueIndex).getImm();
- return true;
-}
-
-// Analyze a candidate MBB and set the extract all the information needed.
-// The valid candidate will have two successors.
-// It also should have a sequence of
-// Branch_instr,
-// CondBr,
-// UnCondBr.
-// Return TargetMBBInfo if MBB is a valid candidate and nullptr otherwise.
-std::unique_ptr<TargetMBBInfo>
-X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
- MachineBasicBlock *TBB;
- MachineBasicBlock *FBB;
- MachineInstr *BrInstr;
- MachineInstr *CmpInstr;
- X86::CondCode CC;
- unsigned SrcReg;
- int CmpValue;
- bool Modified;
- bool CmpBrOnly;
-
- if (MBB.succ_size() != 2)
- return nullptr;
-
- CmpBrOnly = true;
- FBB = TBB = nullptr;
- CmpInstr = nullptr;
- MachineBasicBlock::iterator I = MBB.end();
- while (I != MBB.begin()) {
- --I;
- if (I->isDebugValue())
- continue;
- if (I->getOpcode() == X86::JMP_1) {
- if (FBB)
- return nullptr;
- FBB = I->getOperand(0).getMBB();
- continue;
- }
- if (I->isBranch()) {
- if (TBB)
- return nullptr;
- CC = X86::getCondFromBranch(*I);
- switch (CC) {
- default:
- return nullptr;
- case X86::COND_E:
- case X86::COND_L:
- case X86::COND_G:
- case X86::COND_NE:
- case X86::COND_LE:
- case X86::COND_GE:
- break;
- }
- TBB = I->getOperand(0).getMBB();
- BrInstr = &*I;
- continue;
- }
- if (analyzeCompare(*I, SrcReg, CmpValue)) {
- if (CmpInstr)
- return nullptr;
- CmpInstr = &*I;
- continue;
- }
- CmpBrOnly = false;
- break;
- }
-
- if (!TBB || !FBB || !CmpInstr)
- return nullptr;
-
- // Simplify CondCode. Note this is only to simplify the findPath logic
- // and will not change the instruction here.
- switch (CC) {
- case X86::COND_NE:
- CC = X86::COND_E;
- std::swap(TBB, FBB);
- Modified = true;
- break;
- case X86::COND_LE:
- if (CmpValue == INT_MAX)
- return nullptr;
- CC = X86::COND_L;
- CmpValue += 1;
- Modified = true;
- break;
- case X86::COND_GE:
- if (CmpValue == INT_MIN)
- return nullptr;
- CC = X86::COND_G;
- CmpValue -= 1;
- Modified = true;
- break;
- default:
- Modified = false;
- break;
- }
- return std::make_unique<TargetMBBInfo>(TargetMBBInfo{
- TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly});
-}
-
-bool X86CondBrFoldingPass::runOnMachineFunction(MachineFunction &MF) {
- const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- if (!ST.threewayBranchProfitable())
- return false;
- const X86InstrInfo *TII = ST.getInstrInfo();
- const MachineBranchProbabilityInfo *MBPI =
- &getAnalysis<MachineBranchProbabilityInfo>();
-
- X86CondBrFolding CondBr(TII, MBPI, MF);
- return CondBr.optimize();
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 488ee51f1d89..a2ae6345c006 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -141,7 +141,7 @@ public:
return false;
// It's illegal to replace an instruction that implicitly defines a register
// with an instruction that doesn't, unless that register dead.
- for (auto &MO : MI->implicit_operands())
+ for (const auto &MO : MI->implicit_operands())
if (MO.isReg() && MO.isDef() && !MO.isDead() &&
!TII->get(DstOpcode).hasImplicitDefOfPhysReg(MO.getReg()))
return false;
@@ -180,7 +180,7 @@ public:
MachineRegisterInfo *MRI) const override {
assert(isLegal(MI, TII) && "Cannot convert instruction");
MachineBasicBlock *MBB = MI->getParent();
- auto &DL = MI->getDebugLoc();
+ const DebugLoc &DL = MI->getDebugLoc();
Register Reg = MRI->createVirtualRegister(
TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(),
@@ -220,14 +220,12 @@ public:
// Don't allow copies to/flow GR8/GR16 physical registers.
// FIXME: Is there some better way to support this?
Register DstReg = MI->getOperand(0).getReg();
- if (Register::isPhysicalRegister(DstReg) &&
- (X86::GR8RegClass.contains(DstReg) ||
- X86::GR16RegClass.contains(DstReg)))
+ if (DstReg.isPhysical() && (X86::GR8RegClass.contains(DstReg) ||
+ X86::GR16RegClass.contains(DstReg)))
return false;
Register SrcReg = MI->getOperand(1).getReg();
- if (Register::isPhysicalRegister(SrcReg) &&
- (X86::GR8RegClass.contains(SrcReg) ||
- X86::GR16RegClass.contains(SrcReg)))
+ if (SrcReg.isPhysical() && (X86::GR8RegClass.contains(SrcReg) ||
+ X86::GR16RegClass.contains(SrcReg)))
return false;
return true;
@@ -237,7 +235,7 @@ public:
MachineRegisterInfo *MRI) const override {
assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY");
- for (auto &MO : MI->operands()) {
+ for (const auto &MO : MI->operands()) {
// Physical registers will not be converted. Assume that converting the
// COPY to the destination domain will eventually result in a actual
// instruction.
@@ -300,7 +298,7 @@ typedef DenseMap<InstrConverterBaseKeyTy, std::unique_ptr<InstrConverterBase>>
class Closure {
private:
/// Virtual registers in the closure.
- DenseSet<unsigned> Edges;
+ DenseSet<Register> Edges;
/// Instructions in the closure.
SmallVector<MachineInstr *, 8> Instrs;
@@ -332,11 +330,9 @@ public:
bool empty() const { return Edges.empty(); }
- bool insertEdge(unsigned Reg) {
- return Edges.insert(Reg).second;
- }
+ bool insertEdge(Register Reg) { return Edges.insert(Reg).second; }
- using const_edge_iterator = DenseSet<unsigned>::const_iterator;
+ using const_edge_iterator = DenseSet<Register>::const_iterator;
iterator_range<const_edge_iterator> edges() const {
return iterator_range<const_edge_iterator>(Edges.begin(), Edges.end());
}
@@ -352,7 +348,7 @@ public:
LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const {
dbgs() << "Registers: ";
bool First = true;
- for (unsigned Reg : Edges) {
+ for (Register Reg : Edges) {
if (!First)
dbgs() << ", ";
First = false;
@@ -407,10 +403,10 @@ private:
void initConverters();
/// Starting from \Reg, expand the closure as much as possible.
- void buildClosure(Closure &, unsigned Reg);
+ void buildClosure(Closure &, Register Reg);
/// Enqueue \p Reg to be considered for addition to the closure.
- void visitRegister(Closure &, unsigned Reg, RegDomain &Domain,
+ void visitRegister(Closure &, Register Reg, RegDomain &Domain,
SmallVectorImpl<unsigned> &Worklist);
/// Reassign the closure to \p Domain.
@@ -430,13 +426,13 @@ char X86DomainReassignment::ID = 0;
} // End anonymous namespace.
-void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg,
+void X86DomainReassignment::visitRegister(Closure &C, Register Reg,
RegDomain &Domain,
SmallVectorImpl<unsigned> &Worklist) {
if (EnclosedEdges.count(Reg))
return;
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return;
if (!MRI->hasOneDef(Reg))
@@ -507,7 +503,7 @@ void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const {
// Iterate all registers in the closure, replace them with registers in the
// destination domain.
- for (unsigned Reg : C.edges()) {
+ for (Register Reg : C.edges()) {
MRI->setRegClass(Reg, getDstRC(MRI->getRegClass(Reg), Domain));
for (auto &MO : MRI->use_operands(Reg)) {
if (MO.isReg())
@@ -517,13 +513,13 @@ void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const {
}
}
- for (auto MI : ToErase)
+ for (auto *MI : ToErase)
MI->eraseFromParent();
}
/// \returns true when \p Reg is used as part of an address calculation in \p
/// MI.
-static bool usedAsAddr(const MachineInstr &MI, unsigned Reg,
+static bool usedAsAddr(const MachineInstr &MI, Register Reg,
const TargetInstrInfo *TII) {
if (!MI.mayLoadOrStore())
return false;
@@ -537,14 +533,14 @@ static bool usedAsAddr(const MachineInstr &MI, unsigned Reg,
for (unsigned MemOpIdx = MemOpStart,
MemOpEnd = MemOpStart + X86::AddrNumOperands;
MemOpIdx < MemOpEnd; ++MemOpIdx) {
- auto &Op = MI.getOperand(MemOpIdx);
+ const MachineOperand &Op = MI.getOperand(MemOpIdx);
if (Op.isReg() && Op.getReg() == Reg)
return true;
}
return false;
}
-void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) {
+void X86DomainReassignment::buildClosure(Closure &C, Register Reg) {
SmallVector<unsigned, 4> Worklist;
RegDomain Domain = NoDomain;
visitRegister(C, Reg, Domain, Worklist);
@@ -594,7 +590,7 @@ void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) {
continue;
Register DefReg = DefOp.getReg();
- if (!Register::isVirtualRegister(DefReg)) {
+ if (!DefReg.isVirtual()) {
C.setAllIllegal();
continue;
}
@@ -753,7 +749,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
// Go over all virtual registers and calculate a closure.
unsigned ClosureID = 0;
for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) {
- unsigned Reg = Register::index2VirtReg(Idx);
+ Register Reg = Register::index2VirtReg(Idx);
// GPR only current source domain supported.
if (!isGPR(MRI->getRegClass(Reg)))
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
index 540ad98b6d54..97f843fa24eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -85,6 +85,8 @@ public:
private:
/// Machine instruction info used throughout the class.
const X86InstrInfo *TII = nullptr;
+
+ const X86Subtarget *ST = nullptr;
};
} // end anonymous namespace
@@ -94,8 +96,8 @@ char EvexToVexInstPass::ID = 0;
bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
- const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- if (!ST.hasAVX512())
+ ST = &MF.getSubtarget<X86Subtarget>();
+ if (!ST->hasAVX512())
return false;
bool Changed = false;
@@ -144,10 +146,29 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
}
// Do any custom cleanup needed to finalize the conversion.
-static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
+static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
+ const X86Subtarget *ST) {
(void)NewOpc;
unsigned Opc = MI.getOpcode();
switch (Opc) {
+ case X86::VPDPBUSDSZ256m:
+ case X86::VPDPBUSDSZ256r:
+ case X86::VPDPBUSDSZ128m:
+ case X86::VPDPBUSDSZ128r:
+ case X86::VPDPBUSDZ256m:
+ case X86::VPDPBUSDZ256r:
+ case X86::VPDPBUSDZ128m:
+ case X86::VPDPBUSDZ128r:
+ case X86::VPDPWSSDSZ256m:
+ case X86::VPDPWSSDSZ256r:
+ case X86::VPDPWSSDSZ128m:
+ case X86::VPDPWSSDSZ128r:
+ case X86::VPDPWSSDZ256m:
+ case X86::VPDPWSSDZ256r:
+ case X86::VPDPWSSDZ128m:
+ case X86::VPDPWSSDZ128r:
+ // These can only VEX convert if AVXVNNI is enabled.
+ return ST->hasAVXVNNI();
case X86::VALIGNDZ128rri:
case X86::VALIGNDZ128rmi:
case X86::VALIGNQZ128rri:
@@ -250,7 +271,7 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
(Desc.TSFlags & X86II::VEX_L) ? makeArrayRef(X86EvexToVex256CompressTable)
: makeArrayRef(X86EvexToVex128CompressTable);
- auto I = llvm::lower_bound(Table, MI.getOpcode());
+ const auto *I = llvm::lower_bound(Table, MI.getOpcode());
if (I == Table.end() || I->EvexOpcode != MI.getOpcode())
return false;
@@ -259,7 +280,7 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
if (usesExtendedRegister(MI))
return false;
- if (!performCustomAdjustments(MI, NewOpc))
+ if (!performCustomAdjustments(MI, NewOpc, ST))
return false;
MI.setDesc(TII->get(NewOpc));
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index c47ef4708e91..15af0fb2e888 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -334,32 +334,28 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MBB.erase(MBBI);
return true;
}
- case X86::LCMPXCHG8B_SAVE_EBX:
case X86::LCMPXCHG16B_SAVE_RBX: {
// Perform the following transformation.
// SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx
// =>
- // [E|R]BX = InArg
+ // RBX = InArg
// actualcmpxchg Addr
- // [E|R]BX = SaveRbx
+ // RBX = SaveRbx
const MachineOperand &InArg = MBBI->getOperand(6);
Register SaveRbx = MBBI->getOperand(7).getReg();
- unsigned ActualInArg =
- Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX;
// Copy the input argument of the pseudo into the argument of the
// actual instruction.
- TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, InArg.getReg(),
- InArg.isKill());
+ // NOTE: We don't copy the kill flag since the input might be the same reg
+ // as one of the other operands of LCMPXCHG16B.
+ TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), false);
// Create the actual instruction.
- unsigned ActualOpc =
- Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::LCMPXCHG8B : X86::LCMPXCHG16B;
- MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(ActualOpc));
+ MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B));
// Copy the operands related to the address.
for (unsigned Idx = 1; Idx < 6; ++Idx)
NewInstr->addOperand(MBBI->getOperand(Idx));
// Finally, restore the value of RBX.
- TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, SaveRbx,
+ TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx,
/*SrcIsKill*/ true);
// Delete the pseudo.
@@ -442,9 +438,68 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MBB.erase(MBBI);
return true;
}
+ case X86::MWAITX_SAVE_RBX: {
+ // Perform the following transformation.
+ // SaveRbx = pseudomwaitx InArg, SaveRbx
+ // =>
+ // [E|R]BX = InArg
+ // actualmwaitx
+ // [E|R]BX = SaveRbx
+ const MachineOperand &InArg = MBBI->getOperand(1);
+ // Copy the input argument of the pseudo into the argument of the
+ // actual instruction.
+ TII->copyPhysReg(MBB, MBBI, DL, X86::EBX, InArg.getReg(), InArg.isKill());
+ // Create the actual instruction.
+ BuildMI(MBB, MBBI, DL, TII->get(X86::MWAITXrrr));
+ // Finally, restore the value of RBX.
+ Register SaveRbx = MBBI->getOperand(2).getReg();
+ TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, /*SrcIsKill*/ true);
+ // Delete the pseudo.
+ MBBI->eraseFromParent();
+ return true;
+ }
case TargetOpcode::ICALL_BRANCH_FUNNEL:
ExpandICallBranchFunnel(&MBB, MBBI);
return true;
+ case X86::PLDTILECFG: {
+ MI.RemoveOperand(0);
+ MI.setDesc(TII->get(X86::LDTILECFG));
+ return true;
+ }
+ case X86::PSTTILECFG: {
+ MI.RemoveOperand(MI.getNumOperands() - 1); // Remove $tmmcfg
+ MI.setDesc(TII->get(X86::STTILECFG));
+ return true;
+ }
+ case X86::PTILELOADDV: {
+ MI.RemoveOperand(8); // Remove $tmmcfg
+ for (unsigned i = 2; i > 0; --i)
+ MI.RemoveOperand(i);
+ MI.setDesc(TII->get(X86::TILELOADD));
+ return true;
+ }
+ case X86::PTDPBSSDV: {
+ MI.RemoveOperand(7); // Remove $tmmcfg
+ MI.untieRegOperand(4);
+ for (unsigned i = 3; i > 0; --i)
+ MI.RemoveOperand(i);
+ MI.setDesc(TII->get(X86::TDPBSSD));
+ MI.tieOperands(0, 1);
+ return true;
+ }
+ case X86::PTILESTOREDV: {
+ MI.RemoveOperand(8); // Remove $tmmcfg
+ for (int i = 1; i >= 0; --i)
+ MI.RemoveOperand(i);
+ MI.setDesc(TII->get(X86::TILESTORED));
+ return true;
+ }
+ case X86::PTILEZEROV: {
+ for (int i = 3; i > 0; --i) // Remove row, col, $tmmcfg
+ MI.RemoveOperand(i);
+ MI.setDesc(TII->get(X86::TILEZERO));
+ return true;
+ }
}
llvm_unreachable("Previous switch has a fallthrough?");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
index b305940139c0..a1a16a19f5e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
@@ -284,6 +284,14 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
return false;
}
+ // Make sure no potentially eflags clobbering phi moves can be inserted in
+ // between.
+ auto HasPhis = [](const BasicBlock *Succ) {
+ return !llvm::empty(Succ->phis());
+ };
+ if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
+ return false;
+
CC = TmpCC;
return true;
}
@@ -779,14 +787,14 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
if (TLI.getPointerTy(DL) == MVT::i64) {
Opc = X86::MOV64rm;
RC = &X86::GR64RegClass;
-
- if (Subtarget->isPICStyleRIPRel())
- StubAM.Base.Reg = X86::RIP;
} else {
Opc = X86::MOV32rm;
RC = &X86::GR32RegClass;
}
+ if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL)
+ StubAM.Base.Reg = X86::RIP;
+
LoadReg = createResultReg(RC);
MachineInstrBuilder LoadMI =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
@@ -1082,13 +1090,35 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
// If all else fails, try to materialize the value in a register.
if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ auto GetCallRegForValue = [this](const Value *V) {
+ Register Reg = getRegForValue(V);
+
+ // In 64-bit mode, we need a 64-bit register even if pointers are 32 bits.
+ if (Reg && Subtarget->isTarget64BitILP32()) {
+ Register CopyReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32rr),
+ CopyReg)
+ .addReg(Reg);
+
+ Register ExtReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg)
+ .addImm(0)
+ .addReg(CopyReg)
+ .addImm(X86::sub_32bit);
+ Reg = ExtReg;
+ }
+
+ return Reg;
+ };
+
if (AM.Base.Reg == 0) {
- AM.Base.Reg = getRegForValue(V);
+ AM.Base.Reg = GetCallRegForValue(V);
return AM.Base.Reg != 0;
}
if (AM.IndexReg == 0) {
assert(AM.Scale == 1 && "Scale with no index!");
- AM.IndexReg = getRegForValue(V);
+ AM.IndexReg = GetCallRegForValue(V);
return AM.IndexReg != 0;
}
}
@@ -1231,13 +1261,15 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (SrcVT == MVT::i1) {
if (Outs[0].Flags.isSExt())
return false;
- SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+ // TODO
+ SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*Op0IsKill=*/false);
SrcVT = MVT::i8;
}
unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
ISD::SIGN_EXTEND;
- SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
- SrcReg, /*TODO: Kill=*/false);
+ // TODO
+ SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, SrcReg,
+ /*Op0IsKill=*/false);
}
// Make the copy.
@@ -1431,8 +1463,8 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
ResultReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
ResultReg);
- ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
- X86::sub_8bit);
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg,
+ /*Op0IsKill=*/true, X86::sub_8bit);
if (!ResultReg)
return false;
break;
@@ -1555,11 +1587,11 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
Result32).addReg(ResultReg);
- ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
- X86::sub_16bit);
+ ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32,
+ /*Op0IsKill=*/true, X86::sub_16bit);
} else if (DstVT != MVT::i8) {
ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
- ResultReg, /*Kill=*/true);
+ ResultReg, /*Op0IsKill=*/true);
if (ResultReg == 0)
return false;
}
@@ -1601,11 +1633,11 @@ bool X86FastISel::X86SelectSExt(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
Result32).addReg(ResultReg);
- ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
- X86::sub_16bit);
+ ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32,
+ /*Op0IsKill=*/true, X86::sub_16bit);
} else if (DstVT != MVT::i8) {
ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
- ResultReg, /*Kill=*/true);
+ ResultReg, /*Op0IsKill=*/true);
if (ResultReg == 0)
return false;
}
@@ -1757,7 +1789,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), OpReg)
.addReg(KOpReg);
- OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
+ OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Op0IsKill=*/true,
X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
@@ -1989,7 +2021,7 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
// Now reference the 8-bit subreg of the result.
ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
- /*Kill=*/true, X86::sub_8bit);
+ /*Op0IsKill=*/true, X86::sub_8bit);
}
// Copy the result out of the physreg if we haven't already.
if (!ResultReg) {
@@ -2103,7 +2135,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
- CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Op0IsKill=*/true,
X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
@@ -2257,12 +2289,12 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
const TargetRegisterClass *VR128 = &X86::VR128RegClass;
Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
- LHSReg, LHSIsKill);
- Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
- RHSReg, RHSIsKill);
- Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
- AndReg, /*IsKill=*/true);
+ Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg,
+ /*Op0IsKill=*/false, LHSReg, LHSIsKill);
+ Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg,
+ /*Op0IsKill=*/true, RHSReg, RHSIsKill);
+ Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*Op0IsKill=*/true,
+ AndReg, /*Op1IsKill=*/true);
ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
@@ -2321,7 +2353,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
- CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Op0IsKill=*/true,
X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
@@ -2578,7 +2610,7 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
unsigned Reg;
bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
- RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
+ RV &= X86FastEmitStore(VT, Reg, /*ValIsKill=*/true, DestAM);
assert(RV && "Failed to emit load or store??");
unsigned Size = VT.getSizeInBits()/8;
@@ -2642,15 +2674,15 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
// Explicitly zero-extend the input to 32-bit.
InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg,
- /*Kill=*/false);
+ /*Op0IsKill=*/false);
// The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
- InputReg, /*Kill=*/true);
+ InputReg, /*Op0IsKill=*/true);
unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
: X86::VCVTPH2PSrr;
- InputReg = fastEmitInst_r(Opc, RC, InputReg, /*Kill=*/true);
+ InputReg = fastEmitInst_r(Opc, RC, InputReg, /*Op0IsKill=*/true);
// The result value is in the lower 32-bits of ResultReg.
// Emit an explicit copy from register class VR128 to register class FR32.
@@ -2706,10 +2738,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// movq (%rax), %rax
// movq (%rax), %rax
// ...
- unsigned DestReg;
unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
while (Depth--) {
- DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), DestReg), SrcReg);
SrcReg = DestReg;
@@ -2879,8 +2910,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
const Value *RHS = II->getArgOperand(1);
// Canonicalize immediate to the RHS.
- if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
- isCommutativeIntrinsic(II))
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
std::swap(LHS, RHS);
unsigned BaseOpc, CondCode;
@@ -3693,10 +3723,10 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
default: llvm_unreachable("Unexpected value type");
case MVT::i1:
case MVT::i8:
- return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
+ return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Op0IsKill=*/true,
X86::sub_8bit);
case MVT::i16:
- return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
+ return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Op0IsKill=*/true,
X86::sub_16bit);
case MVT::i32:
return SrcReg;
@@ -3793,7 +3823,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
.addConstantPoolIndex(CPI, 0, OpFlag);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
- addDirectMem(MIB, AddrReg);
+ addRegReg(MIB, AddrReg, false, PICBase, false);
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getConstantPool(*FuncInfo.MF),
MachineMemOperand::MOLoad, DL.getPointerSize(), Alignment);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index 78de041329e2..f8d822aebc5b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -187,8 +187,7 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
/// If so, return that super register in \p SuperDestReg.
bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
Register &SuperDestReg) const {
- auto *TRI = &TII->getRegisterInfo();
-
+ const X86RegisterInfo *TRI = &TII->getRegisterInfo();
Register OrigDestReg = OrigMI->getOperand(0).getReg();
SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32);
@@ -320,7 +319,7 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
// This is only correct if we access the same subregister index: otherwise,
// we could try to replace "movb %ah, %al" with "movl %eax, %eax".
- auto *TRI = &TII->getRegisterInfo();
+ const X86RegisterInfo *TRI = &TII->getRegisterInfo();
if (TRI->getSubRegIndex(NewSrcReg, OldSrc.getReg()) !=
TRI->getSubRegIndex(NewDestReg, OldDest.getReg()))
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 424279038921..0054d5818a96 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -376,7 +376,8 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
- !TII->isSafeToClobberEFLAGS(MBB, I))
+ MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
+ MachineBasicBlock::LQR_Dead)
return false;
Register DestReg = MI.getOperand(0).getReg();
@@ -449,6 +450,7 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
} else
return false;
+ MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
MBB.erase(I);
I = NewMI;
return true;
@@ -484,6 +486,7 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p,
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
// now to replace with an equivalent LEA...
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
+ MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
MBB.erase(MBI);
MachineBasicBlock::iterator J =
static_cast<MachineBasicBlock::iterator>(NewMI);
@@ -505,7 +508,8 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
if (Segment.getReg() != 0 || !Offset.isImm() ||
- !TII->isSafeToClobberEFLAGS(MBB, I))
+ MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
+ MachineBasicBlock::LQR_Dead)
return;
const Register DstR = Dst.getReg();
const Register SrcR1 = Base.getReg();
@@ -536,6 +540,7 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
LLVM_DEBUG(NewMI->dump(););
}
if (NewMI) {
+ MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
MBB.erase(I);
I = NewMI;
}
@@ -555,7 +560,8 @@ void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
- !TII->isSafeToClobberEFLAGS(MBB, MI) ||
+ MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
+ MachineBasicBlock::LQR_Dead ||
Segment.getReg() != X86::NoRegister)
return;
@@ -641,6 +647,7 @@ void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
}
}
+ MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
MBB.erase(I);
I = NewMI;
return;
@@ -666,6 +673,7 @@ void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
.add(Index);
LLVM_DEBUG(NewMI->dump(););
+ MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
MBB.erase(I);
I = NewMI;
return;
@@ -688,6 +696,7 @@ void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
.add(Base);
LLVM_DEBUG(NewMI->dump(););
+ MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
MBB.erase(I);
I = NewMI;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp
index 09668d7c5468..269f8ce6bd7a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp
@@ -97,28 +97,31 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
if (FlagsDefMI->readsRegister(X86::EFLAGS))
continue;
- ++NumSubstZexts;
- Changed = true;
-
// On 32-bit, we need to be careful to force an ABCD register.
const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit()
? &X86::GR32RegClass
: &X86::GR32_ABCDRegClass;
- Register ZeroReg = MRI->createVirtualRegister(RC);
- Register InsertReg = MRI->createVirtualRegister(RC);
+ if (!MRI->constrainRegClass(ZExt->getOperand(0).getReg(), RC)) {
+ // If we cannot constrain the register, we would need an additional copy
+ // and are better off keeping the MOVZX32rr8 we have now.
+ continue;
+ }
+
+ ++NumSubstZexts;
+ Changed = true;
// Initialize a register with 0. This must go before the eflags def
+ Register ZeroReg = MRI->createVirtualRegister(RC);
BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
ZeroReg);
// X86 setcc only takes an output GR8, so fake a GR32 input by inserting
// the setcc result into the low byte of the zeroed register.
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
- TII->get(X86::INSERT_SUBREG), InsertReg)
+ TII->get(X86::INSERT_SUBREG), ZExt->getOperand(0).getReg())
.addReg(ZeroReg)
.addReg(MI.getOperand(0).getReg())
.addImm(X86::sub_8bit);
- MRI->replaceRegWith(ZExt->getOperand(0).getReg(), InsertReg);
ToErase.push_back(ZExt);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index 831695dabcd8..d43fd807a5a7 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -97,7 +97,7 @@ private:
CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
MachineBasicBlock::iterator CopyDefI);
- unsigned promoteCondToReg(MachineBasicBlock &MBB,
+ Register promoteCondToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator TestPos,
DebugLoc TestLoc, X86::CondCode Cond);
std::pair<unsigned, bool>
@@ -739,8 +739,7 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
X86::CondCode Cond = X86::getCondFromSETCC(MI);
if (Cond != X86::COND_INVALID && !MI.mayStore() &&
- MI.getOperand(0).isReg() &&
- Register::isVirtualRegister(MI.getOperand(0).getReg())) {
+ MI.getOperand(0).isReg() && MI.getOperand(0).getReg().isVirtual()) {
assert(MI.getOperand(0).isDef() &&
"A non-storing SETcc should always define a register!");
CondRegs[Cond] = MI.getOperand(0).getReg();
@@ -754,7 +753,7 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
return CondRegs;
}
-unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
+Register X86FlagsCopyLoweringPass::promoteCondToReg(
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
DebugLoc TestLoc, X86::CondCode Cond) {
Register Reg = MRI->createVirtualRegister(PromoteRC);
@@ -982,5 +981,4 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
MIB.setMemRefs(SetCCI.memoperands());
SetCCI.eraseFromParent();
- return;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
index db6b68659493..866f11364004 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
@@ -148,60 +149,6 @@ static unsigned getLEArOpcode(bool IsLP64) {
return IsLP64 ? X86::LEA64r : X86::LEA32r;
}
-/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
-/// when it reaches the "return" instruction. We can then pop a stack object
-/// to this register without worry about clobbering it.
-static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const X86RegisterInfo *TRI,
- bool Is64Bit) {
- const MachineFunction *MF = MBB.getParent();
- if (MF->callsEHReturn())
- return 0;
-
- const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF);
-
- if (MBBI == MBB.end())
- return 0;
-
- switch (MBBI->getOpcode()) {
- default: return 0;
- case TargetOpcode::PATCHABLE_RET:
- case X86::RET:
- case X86::RETL:
- case X86::RETQ:
- case X86::RETIL:
- case X86::RETIQ:
- case X86::TCRETURNdi:
- case X86::TCRETURNri:
- case X86::TCRETURNmi:
- case X86::TCRETURNdi64:
- case X86::TCRETURNri64:
- case X86::TCRETURNmi64:
- case X86::EH_RETURN:
- case X86::EH_RETURN64: {
- SmallSet<uint16_t, 8> Uses;
- for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MBBI->getOperand(i);
- if (!MO.isReg() || MO.isDef())
- continue;
- Register Reg = MO.getReg();
- if (!Reg)
- continue;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Uses.insert(*AI);
- }
-
- for (auto CS : AvailableRegs)
- if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP &&
- CS != X86::ESP)
- return CS;
- }
- }
-
- return 0;
-}
-
static bool isEAXLiveIn(MachineBasicBlock &MBB) {
for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
unsigned Reg = RegMask.PhysReg;
@@ -288,7 +235,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
if (isSub && !isEAXLiveIn(MBB))
Reg = Rax;
else
- Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ Reg = TRI->findDeadCallerSavedReg(MBB, MBBI);
unsigned MovRIOpc = Is64Bit ? X86::MOV64ri : X86::MOV32ri;
unsigned AddSubRROpc =
@@ -345,7 +292,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
// need to find a dead register when using pop.
unsigned Reg = isSub
? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
- : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ : TRI->findDeadCallerSavedReg(MBB, MBBI);
if (Reg) {
unsigned Opc = isSub
? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
@@ -490,9 +437,9 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
}
const MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const unsigned FramePtr = TRI->getFrameRegister(MF);
- const unsigned MachineFramePtr =
- STI.isTarget64BitILP32() ? unsigned(getX86SubSuperRegister(FramePtr, 64))
+ const Register FramePtr = TRI->getFrameRegister(MF);
+ const Register MachineFramePtr =
+ STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
: FramePtr;
unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
// Offset = space for return address + size of the frame pointer itself.
@@ -1743,7 +1690,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
assert(Personality == EHPersonality::MSVC_CXX);
Register FrameReg;
int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
- int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg);
+ int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
// ESP is the first field, so no extra displacement is needed.
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
false, EHRegOffset)
@@ -1764,8 +1711,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (IsWin64Prologue && IsFunclet)
Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
else
- Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) +
- SEHFrameOffset;
+ Offset =
+ getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
+ SEHFrameOffset;
HasWinCFI = true;
assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
@@ -1837,7 +1785,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
Register UsedReg;
int Offset =
- getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+ getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
+ .getFixed();
assert(UsedReg == BasePtr);
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
.addReg(FramePtr)
@@ -1915,7 +1864,8 @@ X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
Register SPReg;
int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
- /*IgnoreSPUpdates*/ true);
+ /*IgnoreSPUpdates*/ true)
+ .getFixed();
assert(Offset >= 0 && SPReg == TRI->getStackRegister());
return static_cast<unsigned>(Offset);
}
@@ -1970,7 +1920,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
const bool Is64BitILP32 = STI.isTarget64BitILP32();
Register FramePtr = TRI->getFrameRegister(MF);
- unsigned MachineFramePtr =
+ Register MachineFramePtr =
Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
@@ -2141,10 +2091,16 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
}
}
+
+ // Emit tilerelease for AMX kernel.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (!MRI.reg_nodbg_empty(X86::TMMCFG))
+ BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
}
-int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
+StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
bool IsFixed = MFI.isFixedObjectIndex(FI);
@@ -2191,7 +2147,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
if (FI && FI == X86FI->getFAIndex())
- return -SEHFrameOffset;
+ return StackOffset::getFixed(-SEHFrameOffset);
// FPDelta is the offset from the "traditional" FP location of the old base
// pointer followed by return address and the location required by the
@@ -2207,23 +2163,23 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
if (FI < 0) {
// Skip the saved EBP.
- return Offset + SlotSize + FPDelta;
+ return StackOffset::getFixed(Offset + SlotSize + FPDelta);
} else {
assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
- return Offset + StackSize;
+ return StackOffset::getFixed(Offset + StackSize);
}
} else if (TRI->needsStackRealignment(MF)) {
if (FI < 0) {
// Skip the saved EBP.
- return Offset + SlotSize + FPDelta;
+ return StackOffset::getFixed(Offset + SlotSize + FPDelta);
} else {
assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
- return Offset + StackSize;
+ return StackOffset::getFixed(Offset + StackSize);
}
// FIXME: Support tail calls
} else {
if (!HasFP)
- return Offset + StackSize;
+ return StackOffset::getFixed(Offset + StackSize);
// Skip the saved EBP.
Offset += SlotSize;
@@ -2234,7 +2190,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
Offset -= TailCallReturnAddrDelta;
}
- return Offset + FPDelta;
+ return StackOffset::getFixed(Offset + FPDelta);
}
int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
@@ -2245,24 +2201,27 @@ int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
const auto it = WinEHXMMSlotInfo.find(FI);
if (it == WinEHXMMSlotInfo.end())
- return getFrameIndexReference(MF, FI, FrameReg);
+ return getFrameIndexReference(MF, FI, FrameReg).getFixed();
FrameReg = TRI->getStackRegister();
return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) +
it->second;
}
-int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
- int FI, Register &FrameReg,
- int Adjustment) const {
+StackOffset
+X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI,
+ Register &FrameReg,
+ int Adjustment) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
FrameReg = TRI->getStackRegister();
- return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment;
+ return StackOffset::getFixed(MFI.getObjectOffset(FI) -
+ getOffsetOfLocalArea() + Adjustment);
}
-int X86FrameLowering::getFrameIndexReferencePreferSP(
- const MachineFunction &MF, int FI, Register &FrameReg,
- bool IgnoreSPUpdates) const {
+StackOffset
+X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
+ int FI, Register &FrameReg,
+ bool IgnoreSPUpdates) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
// Does not include any dynamic realign.
@@ -2939,8 +2898,8 @@ static unsigned getHiPELiteral(
// non-meta instructions between MBBI and MBB.end().
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator MBBI) {
- return std::all_of(
- MBB.succ_begin(), MBB.succ_end(),
+ return llvm::all_of(
+ MBB.successors(),
[](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
return MI.isMetaInstruction();
@@ -3101,7 +3060,6 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,
int Offset) const {
-
if (Offset <= 0)
return false;
@@ -3124,14 +3082,13 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
unsigned Regs[2];
unsigned FoundRegs = 0;
- auto &MRI = MBB.getParent()->getRegInfo();
- auto RegMask = Prev->getOperand(1);
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const MachineOperand &RegMask = Prev->getOperand(1);
auto &RegClass =
Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
// Try to find up to NumPops free registers.
for (auto Candidate : RegClass) {
-
// Poor man's liveness:
// Since we're immediately after a call, any register that is clobbered
// by the call and not defined by it can be considered dead.
@@ -3312,10 +3269,14 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
// If we may need to emit frameless compact unwind information, give
// up as this is currently broken: PR25614.
- return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) &&
- // The lowering of segmented stack and HiPE only support entry blocks
- // as prologue blocks: PR26107.
- // This limitation may be lifted if we fix:
+ bool CompactUnwind =
+ MF.getMMI().getContext().getObjectFileInfo()->getCompactUnwindSection() !=
+ nullptr;
+ return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
+ !CompactUnwind) &&
+ // The lowering of segmented stack and HiPE only support entry
+ // blocks as prologue blocks: PR26107. This limitation may be
+ // lifted if we fix:
// - adjustForSegmentedStacks
// - adjustForHiPEPrologue
MF.getFunction().getCallingConv() != CallingConv::HiPE &&
@@ -3350,7 +3311,7 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
}
Register UsedReg;
- int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg);
+ int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
int EndOffset = -EHRegOffset - EHRegSize;
FuncInfo.EHRegNodeEndOffset = EndOffset;
@@ -3373,7 +3334,8 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
// MOV32rm SavedEBPOffset(%esi), %ebp
assert(X86FI->getHasSEHFramePtrSave());
int Offset =
- getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+ getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
+ .getFixed();
assert(UsedReg == BasePtr);
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
UsedReg, true, Offset)
@@ -3418,7 +3380,7 @@ struct X86FrameSortingObject {
// at the end of our list.
struct X86FrameSortingComparator {
inline bool operator()(const X86FrameSortingObject &A,
- const X86FrameSortingObject &B) {
+ const X86FrameSortingObject &B) const {
uint64_t DensityAScaled, DensityBScaled;
// For consistency in our comparison, all invalid objects are placed
@@ -3554,13 +3516,21 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
// emitPrologue if it gets called and emits CFI.
MF.setHasWinCFI(false);
+ // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
+ // aligned. The format doesn't support misaligned stack adjustments.
+ if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
+ MF.getFrameInfo().ensureMaxAlignment(Align(SlotSize));
+
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
- const Function &F = MF.getFunction();
- if (!STI.is64Bit() || !MF.hasEHFunclets() ||
- classifyEHPersonality(F.getPersonalityFn()) != EHPersonality::MSVC_CXX)
- return;
+ if (STI.is64Bit() && MF.hasEHFunclets() &&
+ classifyEHPersonality(MF.getFunction().getPersonalityFn()) ==
+ EHPersonality::MSVC_CXX) {
+ adjustFrameForMsvcCxxEh(MF);
+ }
+}
+void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
// Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
// relative to RSP after the prologue. Find the offset of the last fixed
// object, so that we can allocate a slot immediately following it. If there
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
index bb2e83205e71..26e80811af2e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -102,16 +103,17 @@ public:
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
bool needsFrameIndexResolution(const MachineFunction &MF) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
Register &SPReg) const;
- int getFrameIndexReferenceSP(const MachineFunction &MF, int FI,
- Register &SPReg, int Adjustment) const;
- int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
- Register &FrameReg,
- bool IgnoreSPUpdates) const override;
+ StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI,
+ Register &SPReg, int Adjustment) const;
+ StackOffset
+ getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
+ Register &FrameReg,
+ bool IgnoreSPUpdates) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -222,12 +224,7 @@ private:
const DebugLoc &DL, uint64_t Offset,
uint64_t Align) const;
- /// Emit a stub to later inline the target stack probe.
- MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- bool InProlog) const;
+ void adjustFrameForMsvcCxxEh(MachineFunction &MF) const;
/// Aligns the stack pointer by ANDing it with -MaxAlign.
void BuildStackAlignAND(MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4768c5aa543d..1df9a0d1700f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -17,6 +17,7 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/ConstantRange.h"
@@ -44,6 +45,8 @@ static cl::opt<bool> EnablePromoteAnyextLoad(
"x86-promote-anyext-load", cl::init(true),
cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden);
+extern cl::opt<bool> IndirectBranchTracking;
+
//===----------------------------------------------------------------------===//
// Pattern Matcher Implementation
//===----------------------------------------------------------------------===//
@@ -204,7 +207,8 @@ namespace {
void Select(SDNode *N) override;
bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
- bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+ bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
+ bool AllowSegmentRegForX32 = false);
bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
bool matchAddress(SDValue N, X86ISelAddressMode &AM);
bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
@@ -499,6 +503,8 @@ namespace {
bool tryShiftAmountMod(SDNode *N);
bool tryShrinkShlLogicImm(SDNode *N);
bool tryVPTERNLOG(SDNode *N);
+ bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentBC,
+ SDValue A, SDValue B, SDValue C, uint8_t Imm);
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
bool tryMatchBitSelect(SDNode *N);
@@ -521,9 +527,9 @@ namespace {
// type.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
unsigned Opcode = N->getOpcode();
- if (Opcode == X86ISD::CMPM || Opcode == X86ISD::STRICT_CMPM ||
- Opcode == ISD::SETCC || Opcode == X86ISD::CMPM_SAE ||
- Opcode == X86ISD::VFPCLASS) {
+ if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM ||
+ Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC ||
+ Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) {
// We can get 256-bit 8 element types here without VLX being enabled. When
// this happens we will use 512-bit operations and the mask will not be
// zero extended.
@@ -795,12 +801,69 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
return false;
}
+static bool isEndbrImm64(uint64_t Imm) {
+// There may be some other prefix bytes between 0xF3 and 0x0F1EFA.
+// i.g: 0xF3660F1EFA, 0xF3670F1EFA
+ if ((Imm & 0x00FFFFFF) != 0x0F1EFA)
+ return false;
+
+ uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64,
+ 0x65, 0x66, 0x67, 0xf0, 0xf2};
+ int i = 24; // 24bit 0x0F1EFA has matched
+ while (i < 64) {
+ uint8_t Byte = (Imm >> i) & 0xFF;
+ if (Byte == 0xF3)
+ return true;
+ if (!llvm::is_contained(OptionalPrefixBytes, Byte))
+ return false;
+ i += 8;
+ }
+
+ return false;
+}
+
void X86DAGToDAGISel::PreprocessISelDAG() {
bool MadeChange = false;
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
+ // This is for CET enhancement.
+ //
+ // ENDBR32 and ENDBR64 have specific opcodes:
+ // ENDBR32: F3 0F 1E FB
+ // ENDBR64: F3 0F 1E FA
+ // And we want that attackers won’t find unintended ENDBR32/64
+ // opcode matches in the binary
+ // Here’s an example:
+ // If the compiler had to generate asm for the following code:
+ // a = 0xF30F1EFA
+ // it could, for example, generate:
+ // mov 0xF30F1EFA, dword ptr[a]
+ // In such a case, the binary would include a gadget that starts
+ // with a fake ENDBR64 opcode. Therefore, we split such generation
+ // into multiple operations, let it not shows in the binary
+ if (N->getOpcode() == ISD::Constant) {
+ MVT VT = N->getSimpleValueType(0);
+ int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
+ int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
+ if (Imm == EndbrImm || isEndbrImm64(Imm)) {
+ // Check that the cf-protection-branch is enabled.
+ Metadata *CFProtectionBranch =
+ MF->getMMI().getModule()->getModuleFlag("cf-protection-branch");
+ if (CFProtectionBranch || IndirectBranchTracking) {
+ SDLoc dl(N);
+ SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true);
+ Complement = CurDAG->getNOT(dl, Complement, VT);
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement);
+ ++I;
+ MadeChange = true;
+ continue;
+ }
+ }
+ }
+
// If this is a target specific AND node with no flag usages, turn it back
// into ISD::AND to enable test instruction matching.
if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
@@ -1005,6 +1068,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
case ISD::STRICT_FFLOOR:
case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:
+ case ISD::FROUNDEVEN:
+ case ISD::STRICT_FROUNDEVEN:
case ISD::FNEARBYINT:
case ISD::STRICT_FNEARBYINT:
case ISD::FRINT:
@@ -1020,6 +1085,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
case ISD::FFLOOR: Imm = 0x9; break;
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: Imm = 0xB; break;
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::FROUNDEVEN: Imm = 0x8; break;
case ISD::STRICT_FNEARBYINT:
case ISD::FNEARBYINT: Imm = 0xC; break;
case ISD::STRICT_FRINT:
@@ -1032,11 +1099,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
{N->getValueType(0), MVT::Other},
{N->getOperand(0), N->getOperand(1),
- CurDAG->getTargetConstant(Imm, dl, MVT::i8)});
+ CurDAG->getTargetConstant(Imm, dl, MVT::i32)});
else
Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
N->getOperand(0),
- CurDAG->getTargetConstant(Imm, dl, MVT::i8));
+ CurDAG->getTargetConstant(Imm, dl, MVT::i32));
--I;
CurDAG->ReplaceAllUsesWith(N, Res.getNode());
++I;
@@ -1547,20 +1614,26 @@ bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
}
-bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
+ bool AllowSegmentRegForX32) {
SDValue Address = N->getOperand(1);
// load gs:0 -> GS segment register.
// load fs:0 -> FS segment register.
//
- // This optimization is valid because the GNU TLS model defines that
- // gs:0 (or fs:0 on X86-64) contains its own address.
+ // This optimization is generally valid because the GNU TLS model defines that
+ // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode
+ // with 32-bit registers, as we get in ILP32 mode, those registers are first
+ // zero-extended to 64 bits and then added it to the base address, which gives
+ // unwanted results when the register holds a negative value.
// For more information see http://people.redhat.com/drepper/tls.pdf
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) {
if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
!IndirectTlsSegRefs &&
(Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
- Subtarget->isTargetFuchsia()))
+ Subtarget->isTargetFuchsia())) {
+ if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
+ return true;
switch (N->getPointerInfo().getAddrSpace()) {
case X86AS::GS:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -1571,6 +1644,8 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
// Address space X86AS::SS is not handled here, because it is not used to
// address TLS areas.
}
+ }
+ }
return true;
}
@@ -1654,6 +1729,21 @@ bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
if (matchAddressRecursively(N, AM, 0))
return true;
+ // Post-processing: Make a second attempt to fold a load, if we now know
+ // that there will not be any other register. This is only performed for
+ // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded
+ // any foldable load the first time.
+ if (Subtarget->isTarget64BitILP32() &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) {
+ SDValue Save_Base_Reg = AM.Base_Reg;
+ if (auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
+ AM.Base_Reg = SDValue();
+ if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true))
+ AM.Base_Reg = Save_Base_Reg;
+ }
+ }
+
// Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
// a smaller encoding and avoids a scaled-index.
if (AM.Scale == 2 &&
@@ -2628,12 +2718,12 @@ bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
AM.Disp += GA->getOffset();
AM.SymbolFlags = GA->getTargetFlags();
- MVT VT = N.getSimpleValueType();
- if (VT == MVT::i32) {
+ if (Subtarget->is32Bit()) {
AM.Scale = 1;
AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
}
+ MVT VT = N.getSimpleValueType();
getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -2723,7 +2813,10 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
return false;
Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
- return CR && CR->getSignedMin().sge(-1ull << Width) &&
+ if (!CR)
+ return Width == 32 && TM.getCodeModel() == CodeModel::Small;
+
+ return CR->getSignedMin().sge(-1ull << Width) &&
CR->getSignedMax().slt(1ull << Width);
}
@@ -3117,7 +3210,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
// ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
- unsigned NewOpc =
+ unsigned NewOpc =
((Opc == X86ISD::ADD) == IsOne)
? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
: SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
@@ -3373,7 +3466,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
// Match the shift amount as: (bitwidth - y). It should go away, too.
if (ShiftAmt.getOpcode() != ISD::SUB)
return false;
- auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
+ auto *V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
if (!V0 || V0->getZExtValue() != Bitwidth)
return false;
NBits = ShiftAmt.getOperand(1);
@@ -3926,6 +4019,129 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
return true;
}
+bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
+ SDNode *ParentBC, SDValue A, SDValue B,
+ SDValue C, uint8_t Imm) {
+ assert(A.isOperandOf(ParentA));
+ assert(B.isOperandOf(ParentBC));
+ assert(C.isOperandOf(ParentBC));
+
+ auto tryFoldLoadOrBCast =
+ [this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp, SDValue &Segment) {
+ if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
+ return true;
+
+ // Not a load, check for broadcast which may be behind a bitcast.
+ if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
+ P = L.getNode();
+ L = L.getOperand(0);
+ }
+
+ if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
+ return false;
+
+ // Only 32 and 64 bit broadcasts are supported.
+ auto *MemIntr = cast<MemIntrinsicSDNode>(L);
+ unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
+ if (Size != 32 && Size != 64)
+ return false;
+
+ return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
+ };
+
+ bool FoldedLoad = false;
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (tryFoldLoadOrBCast(Root, ParentBC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ FoldedLoad = true;
+ } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
+ Tmp4)) {
+ FoldedLoad = true;
+ std::swap(A, C);
+ // Swap bits 1/4 and 3/6.
+ uint8_t OldImm = Imm;
+ Imm = OldImm & 0xa5;
+ if (OldImm & 0x02) Imm |= 0x10;
+ if (OldImm & 0x10) Imm |= 0x02;
+ if (OldImm & 0x08) Imm |= 0x40;
+ if (OldImm & 0x40) Imm |= 0x08;
+ } else if (tryFoldLoadOrBCast(Root, ParentBC, B, Tmp0, Tmp1, Tmp2, Tmp3,
+ Tmp4)) {
+ FoldedLoad = true;
+ std::swap(B, C);
+ // Swap bits 1/2 and 5/6.
+ uint8_t OldImm = Imm;
+ Imm = OldImm & 0x99;
+ if (OldImm & 0x02) Imm |= 0x04;
+ if (OldImm & 0x04) Imm |= 0x02;
+ if (OldImm & 0x20) Imm |= 0x40;
+ if (OldImm & 0x40) Imm |= 0x20;
+ }
+
+ SDLoc DL(Root);
+
+ SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8);
+
+ MVT NVT = Root->getSimpleValueType(0);
+
+ MachineSDNode *MNode;
+ if (FoldedLoad) {
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
+
+ unsigned Opc;
+ if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(C);
+ unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
+ assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!");
+
+ bool UseD = EltSize == 32;
+ if (NVT.is128BitVector())
+ Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi;
+ else if (NVT.is256BitVector())
+ Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi;
+ else if (NVT.is512BitVector())
+ Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi;
+ else
+ llvm_unreachable("Unexpected vector size!");
+ } else {
+ bool UseD = NVT.getVectorElementType() == MVT::i32;
+ if (NVT.is128BitVector())
+ Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi;
+ else if (NVT.is256BitVector())
+ Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi;
+ else if (NVT.is512BitVector())
+ Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi;
+ else
+ llvm_unreachable("Unexpected vector size!");
+ }
+
+ SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(0)};
+ MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
+
+ // Update the chain.
+ ReplaceUses(C.getValue(1), SDValue(MNode, 1));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(C)->getMemOperand()});
+ } else {
+ bool UseD = NVT.getVectorElementType() == MVT::i32;
+ unsigned Opc;
+ if (NVT.is128BitVector())
+ Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri;
+ else if (NVT.is256BitVector())
+ Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri;
+ else if (NVT.is512BitVector())
+ Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri;
+ else
+ llvm_unreachable("Unexpected vector size!");
+
+ MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm});
+ }
+
+ ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0));
+ CurDAG->RemoveDeadNode(Root);
+ return true;
+}
+
// Try to match two logic ops to a VPTERNLOG.
// FIXME: Handle inverted inputs?
// FIXME: Handle more complex patterns that use an operand more than once?
@@ -3941,68 +4157,65 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
return false;
- unsigned Opc1 = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- auto isLogicOp = [](unsigned Opc) {
- return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
- Opc == X86ISD::ANDNP;
+ auto getFoldableLogicOp = [](SDValue Op) {
+ // Peek through single use bitcast.
+ if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
+ Op = Op.getOperand(0);
+
+ if (!Op.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Op.getOpcode();
+ if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
+ Opc == X86ISD::ANDNP)
+ return Op;
+
+ return SDValue();
};
- SDValue A, B, C;
- unsigned Opc2;
- if (isLogicOp(N1.getOpcode()) && N1.hasOneUse()) {
- Opc2 = N1.getOpcode();
+ SDValue A, FoldableOp;
+ if ((FoldableOp = getFoldableLogicOp(N1))) {
A = N0;
- B = N1.getOperand(0);
- C = N1.getOperand(1);
- } else if (isLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
- Opc2 = N0.getOpcode();
+ } else if ((FoldableOp = getFoldableLogicOp(N0))) {
A = N1;
- B = N0.getOperand(0);
- C = N0.getOperand(1);
} else
return false;
- uint64_t Imm;
- switch (Opc1) {
+ SDValue B = FoldableOp.getOperand(0);
+ SDValue C = FoldableOp.getOperand(1);
+
+ // We can build the appropriate control immediate by performing the logic
+ // operation we're matching using these constants for A, B, and C.
+ const uint8_t TernlogMagicA = 0xf0;
+ const uint8_t TernlogMagicB = 0xcc;
+ const uint8_t TernlogMagicC = 0xaa;
+
+ uint8_t Imm;
+ switch (FoldableOp.getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
- case ISD::AND:
- switch (Opc2) {
- default: llvm_unreachable("Unexpected opcode!");
- case ISD::AND: Imm = 0x80; break;
- case ISD::OR: Imm = 0xe0; break;
- case ISD::XOR: Imm = 0x60; break;
- case X86ISD::ANDNP: Imm = 0x20; break;
- }
- break;
- case ISD::OR:
- switch (Opc2) {
- default: llvm_unreachable("Unexpected opcode!");
- case ISD::AND: Imm = 0xf8; break;
- case ISD::OR: Imm = 0xfe; break;
- case ISD::XOR: Imm = 0xf6; break;
- case X86ISD::ANDNP: Imm = 0xf2; break;
- }
- break;
- case ISD::XOR:
- switch (Opc2) {
- default: llvm_unreachable("Unexpected opcode!");
- case ISD::AND: Imm = 0x78; break;
- case ISD::OR: Imm = 0x1e; break;
- case ISD::XOR: Imm = 0x96; break;
- case X86ISD::ANDNP: Imm = 0xd2; break;
- }
+ case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break;
+ case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break;
+ case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break;
+ case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break;
+ }
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case X86ISD::ANDNP:
+ if (A == N0)
+ Imm &= ~TernlogMagicA;
+ else
+ Imm = ~(Imm) & TernlogMagicA;
break;
+ case ISD::AND: Imm &= TernlogMagicA; break;
+ case ISD::OR: Imm |= TernlogMagicA; break;
+ case ISD::XOR: Imm ^= TernlogMagicA; break;
}
- SDLoc DL(N);
- SDValue New = CurDAG->getNode(X86ISD::VPTERNLOG, DL, NVT, A, B, C,
- CurDAG->getTargetConstant(Imm, DL, MVT::i8));
- ReplaceNode(N, New.getNode());
- SelectCode(New.getNode());
- return true;
+ return matchVPTERNLOG(N, N, FoldableOp.getNode(), A, B, C, Imm);
}
/// If the high bits of an 'and' operand are known zero, try setting the
@@ -4069,6 +4282,7 @@ bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
// A negative mask allows a smaller encoding. Create a new 'and' node.
SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
+ insertDAGNode(*CurDAG, SDValue(And, 0), NewMask);
SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
ReplaceNode(And, NewAnd.getNode());
SelectCode(NewAnd.getNode());
@@ -4102,15 +4316,15 @@ VPTESTM_CASE(v16i16, WZ256##SUFFIX) \
VPTESTM_CASE(v64i8, BZ##SUFFIX) \
VPTESTM_CASE(v32i16, WZ##SUFFIX)
- if (FoldedLoad) {
+ if (FoldedBCast) {
switch (TestVT.SimpleTy) {
- VPTESTM_FULL_CASES(rm)
+ VPTESTM_BROADCAST_CASES(rmb)
}
}
- if (FoldedBCast) {
+ if (FoldedLoad) {
switch (TestVT.SimpleTy) {
- VPTESTM_BROADCAST_CASES(rmb)
+ VPTESTM_FULL_CASES(rm)
}
}
@@ -4169,79 +4383,56 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
}
}
- // Without VLX we need to widen the load.
+ // Without VLX we need to widen the operation.
bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
- // We can only fold loads if the sources are unique.
- bool CanFoldLoads = Src0 != Src1;
+ auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L,
+ SDValue &Base, SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ // If we need to widen, we can't fold the load.
+ if (!Widen)
+ if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
+ return true;
- // Try to fold loads unless we need to widen.
- bool FoldedLoad = false;
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
- if (!Widen && CanFoldLoads) {
- Load = Src1;
- FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
- Tmp4);
- if (!FoldedLoad) {
- // And is computative.
- Load = Src0;
- FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
- Tmp3, Tmp4);
- if (FoldedLoad)
- std::swap(Src0, Src1);
- }
- }
+ // If we didn't fold a load, try to match broadcast. No widening limitation
+ // for this. But only 32 and 64 bit types are supported.
+ if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64)
+ return false;
- auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
// Look through single use bitcasts.
- if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) {
- Parent = Src.getNode();
- Src = Src.getOperand(0);
+ if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
+ P = L.getNode();
+ L = L.getOperand(0);
}
- if (Src.getOpcode() == X86ISD::VBROADCAST_LOAD && Src.hasOneUse()) {
- auto *MemIntr = cast<MemIntrinsicSDNode>(Src);
- if (MemIntr->getMemoryVT().getSizeInBits() == CmpSVT.getSizeInBits())
- return Src;
- }
+ if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
+ return false;
- return SDValue();
+ auto *MemIntr = cast<MemIntrinsicSDNode>(L);
+ if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits())
+ return false;
+
+ return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
};
- // If we didn't fold a load, try to match broadcast. No widening limitation
- // for this. But only 32 and 64 bit types are supported.
- bool FoldedBCast = false;
- if (!FoldedLoad && CanFoldLoads &&
- (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
- SDNode *ParentNode = N0.getNode();
- if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
- FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0,
- Tmp1, Tmp2, Tmp3, Tmp4);
- }
+ // We can only fold loads if the sources are unique.
+ bool CanFoldLoads = Src0 != Src1;
- // Try the other operand.
- if (!FoldedBCast) {
- SDNode *ParentNode = N0.getNode();
- if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
- FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0,
- Tmp1, Tmp2, Tmp3, Tmp4);
- if (FoldedBCast)
- std::swap(Src0, Src1);
- }
+ bool FoldedLoad = false;
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (CanFoldLoads) {
+ FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2,
+ Tmp3, Tmp4);
+ if (!FoldedLoad) {
+ // And is commutative.
+ FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1,
+ Tmp2, Tmp3, Tmp4);
+ if (FoldedLoad)
+ std::swap(Src0, Src1);
}
}
- auto getMaskRC = [](MVT MaskVT) {
- switch (MaskVT.SimpleTy) {
- default: llvm_unreachable("Unexpected VT!");
- case MVT::v2i1: return X86::VK2RegClassID;
- case MVT::v4i1: return X86::VK4RegClassID;
- case MVT::v8i1: return X86::VK8RegClassID;
- case MVT::v16i1: return X86::VK16RegClassID;
- case MVT::v32i1: return X86::VK32RegClassID;
- case MVT::v64i1: return X86::VK64RegClassID;
- }
- };
+ bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD;
bool IsMasked = InMask.getNode() != nullptr;
@@ -4260,13 +4451,12 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
CmpVT), 0);
Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
- assert(!FoldedLoad && "Shouldn't have folded the load");
if (!FoldedBCast)
Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
if (IsMasked) {
// Widen the mask.
- unsigned RegClass = getMaskRC(MaskVT);
+ unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID();
SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
dl, MaskVT, InMask, RC), 0);
@@ -4278,23 +4468,23 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
IsMasked);
MachineSDNode *CNode;
- if (FoldedLoad || FoldedBCast) {
+ if (FoldedLoad) {
SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
if (IsMasked) {
SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
- Load.getOperand(0) };
+ Src1.getOperand(0) };
CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
} else {
SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
- Load.getOperand(0) };
+ Src1.getOperand(0) };
CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
}
// Update the chain.
- ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
+ ReplaceUses(Src1.getValue(1), SDValue(CNode, 1));
// Record the mem-refs
- CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Load)->getMemOperand()});
+ CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()});
} else {
if (IsMasked)
CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
@@ -4304,7 +4494,7 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
// If we widened, we need to shrink the mask VT.
if (Widen) {
- unsigned RegClass = getMaskRC(ResVT);
+ unsigned RegClass = TLI->getRegClassFor(ResVT)->getID();
SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
dl, ResVT, SDValue(CNode, 0), RC);
@@ -4360,8 +4550,9 @@ bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
ReplaceNode(N, Ternlog.getNode());
- SelectCode(Ternlog.getNode());
- return true;
+
+ return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(),
+ A, B, C, 0xCA);
}
void X86DAGToDAGISel::Select(SDNode *Node) {
@@ -4377,6 +4568,95 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
default: break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = Node->getConstantOperandVal(1);
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_encodekey128:
+ case Intrinsic::x86_encodekey256: {
+ if (!Subtarget->hasKL())
+ break;
+
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128; break;
+ case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256; break;
+ }
+
+ SDValue Chain = Node->getOperand(0);
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3),
+ SDValue());
+ if (Opcode == X86::ENCODEKEY256)
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4),
+ Chain.getValue(1));
+
+ MachineSDNode *Res = CurDAG->getMachineNode(
+ Opcode, dl, Node->getVTList(),
+ {Node->getOperand(2), Chain, Chain.getValue(1)});
+ ReplaceNode(Node, Res);
+ return;
+ }
+ case Intrinsic::x86_tileloadd64_internal: {
+ if (!Subtarget->hasAMXTILE())
+ break;
+ unsigned Opc = X86::PTILELOADDV;
+ // _tile_loadd_internal(row, col, buf, STRIDE)
+ SDValue Base = Node->getOperand(4);
+ SDValue Scale = getI8Imm(1, dl);
+ SDValue Index = Node->getOperand(5);
+ SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDValue Segment = CurDAG->getRegister(0, MVT::i16);
+ SDValue CFG = CurDAG->getRegister(0, MVT::Untyped);
+ SDValue Chain = Node->getOperand(0);
+ MachineSDNode *CNode;
+ SDValue Ops[] = {Node->getOperand(2),
+ Node->getOperand(3),
+ Base,
+ Scale,
+ Index,
+ Disp,
+ Segment,
+ CFG,
+ Chain};
+ CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
+ ReplaceNode(Node, CNode);
+ return;
+ }
+ case Intrinsic::x86_tdpbssd_internal: {
+ if (!Subtarget->hasAMXTILE())
+ break;
+ SDValue Chain = Node->getOperand(0);
+ unsigned Opc = X86::PTDPBSSDV;
+ SDValue CFG = CurDAG->getRegister(0, MVT::Untyped);
+ SDValue Ops[] = {Node->getOperand(2),
+ Node->getOperand(3),
+ Node->getOperand(4),
+ Node->getOperand(5),
+ Node->getOperand(6),
+ Node->getOperand(7),
+ CFG,
+ Chain};
+ MachineSDNode *CNode =
+ CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
+ ReplaceNode(Node, CNode);
+ return;
+ }
+ case Intrinsic::x86_tilezero_internal: {
+ if (!Subtarget->hasAMXTILE())
+ break;
+ unsigned Opc = X86::PTILEZEROV;
+ SDValue Chain = Node->getOperand(0);
+ SDValue CFG = CurDAG->getRegister(0, MVT::Untyped);
+ SDValue Ops[] = {Node->getOperand(2), Node->getOperand(3), CFG, Chain};
+ MachineSDNode *CNode =
+ CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
+ ReplaceNode(Node, CNode);
+ return;
+ }
+ }
+ break;
+ }
case ISD::INTRINSIC_VOID: {
unsigned IntNo = Node->getConstantOperandVal(1);
switch (IntNo) {
@@ -4431,6 +4711,31 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
+ case Intrinsic::x86_tilestored64_internal: {
+ unsigned Opc = X86::PTILESTOREDV;
+ // _tile_stored_internal(row, col, buf, STRIDE, c)
+ SDValue Base = Node->getOperand(4);
+ SDValue Scale = getI8Imm(1, dl);
+ SDValue Index = Node->getOperand(5);
+ SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDValue Segment = CurDAG->getRegister(0, MVT::i16);
+ SDValue CFG = CurDAG->getRegister(0, MVT::Untyped);
+ SDValue Chain = Node->getOperand(0);
+ MachineSDNode *CNode;
+ SDValue Ops[] = {Node->getOperand(2),
+ Node->getOperand(3),
+ Base,
+ Scale,
+ Index,
+ Disp,
+ Segment,
+ Node->getOperand(6),
+ CFG,
+ Chain};
+ CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
+ ReplaceNode(Node, CNode);
+ return;
+ }
case Intrinsic::x86_tileloadd64:
case Intrinsic::x86_tileloaddt164:
case Intrinsic::x86_tilestored64: {
@@ -4511,6 +4816,19 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
break;
+ case X86ISD::VPTERNLOG: {
+ uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue();
+ if (matchVPTERNLOG(Node, Node, Node, Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2), Imm))
+ return;
+ break;
+ }
+
+ case X86ISD::ANDNP:
+ if (tryVPTERNLOG(Node))
+ return;
+ break;
+
case ISD::AND:
if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
// Try to form a masked VPTESTM. Operands can be in either order.
@@ -5609,6 +5927,62 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->RemoveDeadNode(Node);
return;
}
+ case X86ISD::AESENCWIDE128KL:
+ case X86ISD::AESDECWIDE128KL:
+ case X86ISD::AESENCWIDE256KL:
+ case X86ISD::AESDECWIDE256KL: {
+ if (!Subtarget->hasWIDEKL())
+ break;
+
+ unsigned Opcode;
+ switch (Node->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case X86ISD::AESENCWIDE128KL:
+ Opcode = X86::AESENCWIDE128KL;
+ break;
+ case X86ISD::AESDECWIDE128KL:
+ Opcode = X86::AESDECWIDE128KL;
+ break;
+ case X86ISD::AESENCWIDE256KL:
+ Opcode = X86::AESENCWIDE256KL;
+ break;
+ case X86ISD::AESDECWIDE256KL:
+ Opcode = X86::AESDECWIDE256KL;
+ break;
+ }
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Addr = Node->getOperand(1);
+
+ SDValue Base, Scale, Index, Disp, Segment;
+ if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment))
+ break;
+
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2),
+ SDValue());
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3),
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4),
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5),
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6),
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7),
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8),
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9),
+ Chain.getValue(1));
+
+ MachineSDNode *Res = CurDAG->getMachineNode(
+ Opcode, dl, Node->getVTList(),
+ {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)});
+ CurDAG->setNodeMemRefs(Res, cast<MemSDNode>(Node)->getMemOperand());
+ ReplaceNode(Node, Res);
+ return;
+ }
}
SelectCode(Node);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index 56690c3c555b..1e2407c7e7f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -76,6 +77,14 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
" of the loop header PC will be 0)."),
cl::Hidden);
+static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
+ "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
+ cl::desc(
+ "Sets the preferable loop alignment for experiments (as log2 bytes) "
+ "for innermost loops only. If specified, this option overrides "
+ "alignment set by x86-experimental-pref-loop-alignment."),
+ cl::Hidden);
+
static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -135,19 +144,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addBypassSlowDiv(64, 32);
}
- if (Subtarget.isTargetWindowsMSVC() ||
- Subtarget.isTargetWindowsItanium()) {
- // Setup Windows compiler runtime calls.
- setLibcallName(RTLIB::SDIV_I64, "_alldiv");
- setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
- setLibcallName(RTLIB::SREM_I64, "_allrem");
- setLibcallName(RTLIB::UREM_I64, "_aullrem");
- setLibcallName(RTLIB::MUL_I64, "_allmul");
- setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
- setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
- setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
- setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
- setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
+ // Setup Windows compiler runtime calls.
+ if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
+ { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
+ { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
+ { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
+ { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
}
if (Subtarget.getTargetTriple().isOSMSVCRT()) {
@@ -193,8 +207,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasCMov()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
setOperationAction(ISD::ABS , MVT::i32 , Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
- setOperationAction(ISD::ABS , MVT::i64 , Custom);
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
@@ -278,6 +293,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
+ if (Subtarget.hasSSE2()) {
+ // Custom lowering for saturating float to int conversions.
+ // We handle promotion to larger result types manually.
+ for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
+ }
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
+ }
+ }
+
// Handle address space casts between mixed sized pointers.
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
@@ -384,6 +412,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::f80, MVT::f16, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+ setOperationAction(ISD::PARITY, MVT::i8, Custom);
if (Subtarget.hasPOPCNT()) {
setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
} else {
@@ -394,6 +423,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
else
setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
+
+ setOperationAction(ISD::PARITY, MVT::i16, Custom);
+ setOperationAction(ISD::PARITY, MVT::i32, Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::PARITY, MVT::i64, Custom);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
@@ -487,6 +521,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -915,9 +950,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
- setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
- setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
@@ -1081,6 +1114,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
+ setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
setOperationAction(ISD::FROUND, RoundedTy, Custom);
}
@@ -1094,6 +1129,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
+ setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
+
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1134,6 +1171,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
+ if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
+ setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
+ }
+
if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
@@ -1175,6 +1216,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
setOperationAction(ISD::FROUND, VT, Custom);
@@ -1302,6 +1345,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
@@ -1560,6 +1607,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
setOperationAction(ISD::FROUND, VT, Custom);
}
@@ -1688,10 +1737,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasVBMI2()) {
- for (auto VT : { MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
+ for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
+ MVT::v16i16, MVT::v8i32, MVT::v4i64,
+ MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
}
+
+ setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
+ setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
+ setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
+ setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
}
}// useAVX512Regs
@@ -1858,20 +1914,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
- if (Subtarget.hasVBMI2()) {
- // TODO: Make these legal even without VLX?
- for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
- }
- }
-
setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
}
+ if (Subtarget.hasAMXTILE()) {
+ addRegisterClass(MVT::x86amx, &X86::TILERegClass);
+ }
+
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -1901,6 +1952,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ADDCARRY, VT, Custom);
setOperationAction(ISD::SUBCARRY, VT, Custom);
setOperationAction(ISD::SETCCCARRY, VT, Custom);
+ setOperationAction(ISD::SADDO_CARRY, VT, Custom);
+ setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
}
if (!Subtarget.is64Bit()) {
@@ -1923,8 +1976,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i128, Custom);
setOperationAction(ISD::SREM, MVT::i128, Custom);
setOperationAction(ISD::UREM, MVT::i128, Custom);
- setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
- setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
}
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
@@ -2456,13 +2507,23 @@ Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
return SegmentOffset(IRB, 0x10, getAddressSpace());
} else {
+ unsigned AddressSpace = getAddressSpace();
+ // Specially, some users may customize the base reg and offset.
+ unsigned Offset = getTargetMachine().Options.StackProtectorGuardOffset;
+ // If we don't set -stack-protector-guard-offset value:
// %fs:0x28, unless we're using a Kernel code model, in which case
// it's %gs:0x28. gs:0x14 on i386.
- unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
- return SegmentOffset(IRB, Offset, getAddressSpace());
+ if (Offset == (unsigned)-1)
+ Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
+
+ const auto &GuardReg = getTargetMachine().Options.StackProtectorGuardReg;
+ if (GuardReg == "fs")
+ AddressSpace = X86AS::FS;
+ else if (GuardReg == "gs")
+ AddressSpace = X86AS::GS;
+ return SegmentOffset(IRB, Offset, AddressSpace);
}
}
-
return TargetLowering::getIRStackGuard(IRB);
}
@@ -2484,8 +2545,13 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const {
}
return;
}
+
+ auto GuardMode = getTargetMachine().Options.StackProtectorGuard;
+
// glibc, bionic, and Fuchsia have a special slot for the stack guard.
- if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
+ if ((GuardMode == llvm::StackProtectorGuards::TLS ||
+ GuardMode == llvm::StackProtectorGuards::None)
+ && hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
return;
TargetLowering::insertSSPDeclarations(M);
}
@@ -2531,17 +2597,6 @@ Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
return TargetLowering::getSafeStackPointerLocation(IRB);
}
-bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
- unsigned DestAS) const {
- assert(SrcAS != DestAS && "Expected different address spaces!");
-
- const TargetMachine &TM = getTargetMachine();
- if (TM.getPointerSize(SrcAS) != TM.getPointerSize(DestAS))
- return false;
-
- return SrcAS < 256 && DestAS < 256;
-}
-
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -3046,8 +3101,9 @@ SDValue X86TargetLowering::LowerCallResult(
// This truncation won't change the value.
DAG.getIntPtrConstant(1, dl));
- if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
+ if (VA.isExtInLoc()) {
if (VA.getValVT().isVector() &&
+ VA.getValVT().getScalarType() == MVT::i1 &&
((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
(VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
// promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
@@ -3115,7 +3171,7 @@ argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
SDValue Chain, ISD::ArgFlagsTy Flags,
SelectionDAG &DAG, const SDLoc &dl) {
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
+ SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
return DAG.getMemcpy(
Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
@@ -3364,8 +3420,8 @@ private:
void forwardMustTailParameters(SDValue &Chain);
- bool is64Bit() { return Subtarget.is64Bit(); }
- bool isWin64() { return Subtarget.isCallingConvWin64(CallConv); }
+ bool is64Bit() const { return Subtarget.is64Bit(); }
+ bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
X86MachineFunctionInfo *FuncInfo;
const SDLoc &DL;
@@ -3476,11 +3532,10 @@ void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
SaveXMMOps.push_back(Chain);
SaveXMMOps.push_back(ALVal);
SaveXMMOps.push_back(
- DAG.getIntPtrConstant(FuncInfo->getRegSaveFrameIndex(), DL));
+ DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
SaveXMMOps.push_back(
- DAG.getIntPtrConstant(FuncInfo->getVarArgsFPOffset(), DL));
- SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
- LiveXMMRegs.end());
+ DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
+ llvm::append_range(SaveXMMOps, LiveXMMRegs);
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
MVT::Other, SaveXMMOps));
}
@@ -3754,7 +3809,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
// same, so the size of funclets' (mostly empty) frames is dictated by
// how far this slot is from the bottom (since they allocate just enough
// space to accommodate holding this slot at the correct offset).
- int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSS=*/false);
+ int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
EHInfo->PSPSymFrameIdx = PSPSymFI;
}
}
@@ -3861,6 +3916,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CB);
bool HasNoCfCheck =
(CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
+ bool IsIndirectCall = (CI && CI->isIndirectCall());
const Module *M = MF.getMMI().getModule();
Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
@@ -4100,9 +4156,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Subtarget.isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
+ // GOT pointer (except regcall).
if (!isTailCall) {
- RegsToPass.push_back(std::make_pair(
+ // Indirect call with RegCall calling convertion may use up all the
+ // general registers, so it is not suitable to bind EBX reister for
+ // GOT address, just let register allocator handle it.
+ if (CallConv != CallingConv::X86_RegCall)
+ RegsToPass.push_back(std::make_pair(
Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
getPointerTy(DAG.getDataLayout()))));
} else {
@@ -4269,7 +4329,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(Callee);
if (isTailCall)
- Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
+ Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
// Add argument registers to the end of the list so that they are known live
// into the call.
@@ -4343,7 +4403,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
return Ret;
}
- if (HasNoCfCheck && IsCFProtectionSupported) {
+ if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
} else {
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
@@ -4462,7 +4522,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!Register::isVirtualRegister(VR))
+ if (!VR.isVirtual())
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
@@ -4514,7 +4574,8 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
return false;
- if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
+ if (VA.getLocVT().getFixedSizeInBits() >
+ Arg.getValueSizeInBits().getFixedSize()) {
// If the argument location is wider than the argument type, check that any
// extension flags match.
if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
@@ -5022,13 +5083,47 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
+ Info.flags = MachineMemOperand::MONone;
+ Info.offset = 0;
const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
- if (!IntrData)
+ if (!IntrData) {
+ switch (Intrinsic) {
+ case Intrinsic::x86_aesenc128kl:
+ case Intrinsic::x86_aesdec128kl:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
+ Info.align = Align(1);
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::x86_aesenc256kl:
+ case Intrinsic::x86_aesdec256kl:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
+ Info.align = Align(1);
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::x86_aesencwide128kl:
+ case Intrinsic::x86_aesdecwide128kl:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
+ Info.align = Align(1);
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::x86_aesencwide256kl:
+ case Intrinsic::x86_aesdecwide256kl:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
+ Info.align = Align(1);
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
+ }
return false;
-
- Info.flags = MachineMemOperand::MONone;
- Info.offset = 0;
+ }
switch (IntrData->Type) {
case TRUNCATE_TO_MEM_VI8:
@@ -5098,7 +5193,7 @@ bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");
-
+
// "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
// relocation target a movq or addq instruction: don't let the load shrink.
SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
@@ -5271,6 +5366,7 @@ bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
// width.
if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
return false;
+
return true;
}
@@ -5414,6 +5510,14 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
return ((Val == SM_SentinelUndef) || (Val == CmpVal));
}
+/// Return true if every element in Mask is the undef sentinel value or equal to
+/// the specified value..
+static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
+ return llvm::all_of(Mask, [CmpVal](int M) {
+ return (M == SM_SentinelUndef) || (M == CmpVal);
+ });
+}
+
/// Val is either the undef or zero sentinel value.
static bool isUndefOrZero(int Val) {
return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
@@ -5820,7 +5924,7 @@ static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
const SDLoc &dl) {
- assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&
+ assert(Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type");
SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
@@ -6184,6 +6288,22 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
return DAG.getBitcast(VT, Vec);
}
+// Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
+static unsigned getOpcode_EXTEND(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::ANY_EXTEND:
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return ISD::ANY_EXTEND;
+ case ISD::ZERO_EXTEND:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return ISD::ZERO_EXTEND;
+ case ISD::SIGN_EXTEND:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return ISD::SIGN_EXTEND;
+ }
+ llvm_unreachable("Unknown opcode");
+}
+
// Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode.
static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) {
switch (Opcode) {
@@ -6200,8 +6320,8 @@ static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) {
llvm_unreachable("Unknown opcode");
}
-static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue In, SelectionDAG &DAG) {
+static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue In, SelectionDAG &DAG) {
EVT InVT = In.getValueType();
assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.");
assert((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode ||
@@ -6253,8 +6373,10 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG, bool OneUse = false) {
return SDValue();
}
-void llvm::createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
+void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask,
bool Lo, bool Unary) {
+ assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 &&
+ "Illegal vector type to unpack");
assert(Mask.empty() && "Expected an empty shuffle mask vector");
int NumElts = VT.getVectorNumElements();
int NumEltsInLane = 128 / VT.getScalarSizeInBits();
@@ -6283,7 +6405,7 @@ void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
}
/// Returns a vector_shuffle node for an unpackl operation.
-static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
+static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
SDValue V1, SDValue V2) {
SmallVector<int, 8> Mask;
createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
@@ -6291,7 +6413,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
}
/// Returns a vector_shuffle node for an unpackh operation.
-static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
+static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
SDValue V1, SDValue V2) {
SmallVector<int, 8> Mask;
createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
@@ -6538,15 +6660,30 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
// Extract constant bits from a subvector broadcast.
- if (Op.getOpcode() == X86ISD::SUBV_BROADCAST) {
- SmallVector<APInt, 16> SubEltBits;
- if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
- UndefElts, SubEltBits, AllowWholeUndefs,
- AllowPartialUndefs)) {
- UndefElts = APInt::getSplat(NumElts, UndefElts);
- while (EltBits.size() < NumElts)
- EltBits.append(SubEltBits.begin(), SubEltBits.end());
- return true;
+ if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
+ SDValue Ptr = MemIntr->getBasePtr();
+ if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) {
+ Type *CstTy = Cst->getType();
+ unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
+ if (!CstTy->isVectorTy() || (SizeInBits % CstSizeInBits) != 0)
+ return false;
+ unsigned SubEltSizeInBits = CstTy->getScalarSizeInBits();
+ unsigned NumSubElts = CstSizeInBits / SubEltSizeInBits;
+ unsigned NumSubVecs = SizeInBits / CstSizeInBits;
+ APInt UndefSubElts(NumSubElts, 0);
+ SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs,
+ APInt(SubEltSizeInBits, 0));
+ for (unsigned i = 0; i != NumSubElts; ++i) {
+ if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i],
+ UndefSubElts, i))
+ return false;
+ for (unsigned j = 1; j != NumSubVecs; ++j)
+ SubEltBits[i + (j * NumSubElts)] = SubEltBits[i];
+ }
+ UndefSubElts = APInt::getSplat(NumSubVecs * UndefSubElts.getBitWidth(),
+ UndefSubElts);
+ return CastBitData(UndefSubElts, SubEltBits);
}
}
@@ -6567,23 +6704,26 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
// Insert constant bits from a base and sub vector sources.
if (Op.getOpcode() == ISD::INSERT_SUBVECTOR) {
- // TODO - support insert_subvector through bitcasts.
- if (EltSizeInBits != VT.getScalarSizeInBits())
- return false;
+ // If bitcasts to larger elements we might lose track of undefs - don't
+ // allow any to be safe.
+ unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
+ bool AllowUndefs = EltSizeInBits >= SrcEltSizeInBits;
- APInt UndefSubElts;
- SmallVector<APInt, 32> EltSubBits;
- if (getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
+ APInt UndefSrcElts, UndefSubElts;
+ SmallVector<APInt, 32> EltSrcBits, EltSubBits;
+ if (getTargetConstantBitsFromNode(Op.getOperand(1), SrcEltSizeInBits,
UndefSubElts, EltSubBits,
- AllowWholeUndefs, AllowPartialUndefs) &&
- getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
- UndefElts, EltBits, AllowWholeUndefs,
- AllowPartialUndefs)) {
+ AllowWholeUndefs && AllowUndefs,
+ AllowPartialUndefs && AllowUndefs) &&
+ getTargetConstantBitsFromNode(Op.getOperand(0), SrcEltSizeInBits,
+ UndefSrcElts, EltSrcBits,
+ AllowWholeUndefs && AllowUndefs,
+ AllowPartialUndefs && AllowUndefs)) {
unsigned BaseIdx = Op.getConstantOperandVal(2);
- UndefElts.insertBits(UndefSubElts, BaseIdx);
+ UndefSrcElts.insertBits(UndefSubElts, BaseIdx);
for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
- EltBits[BaseIdx + i] = EltSubBits[i];
- return true;
+ EltSrcBits[BaseIdx + i] = EltSubBits[i];
+ return CastBitData(UndefSrcElts, EltSrcBits);
}
}
@@ -6696,7 +6836,7 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
return false;
// Insert the extracted elements into the mask.
- for (APInt Elt : EltBits)
+ for (const APInt &Elt : EltBits)
RawMask.push_back(Elt.getZExtValue());
return true;
@@ -7375,44 +7515,10 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
return true;
}
case ISD::OR: {
- // Inspect each operand at the byte level. We can merge these into a
- // blend shuffle mask if for each byte at least one is masked out (zero).
- KnownBits Known0 =
- DAG.computeKnownBits(N.getOperand(0), DemandedElts, Depth + 1);
- KnownBits Known1 =
- DAG.computeKnownBits(N.getOperand(1), DemandedElts, Depth + 1);
- if (Known0.One.isNullValue() && Known1.One.isNullValue()) {
- bool IsByteMask = true;
- APInt ZeroMask = APInt::getNullValue(NumBytesPerElt);
- APInt SelectMask = APInt::getNullValue(NumBytesPerElt);
- for (unsigned i = 0; i != NumBytesPerElt && IsByteMask; ++i) {
- unsigned LHS = Known0.Zero.extractBits(8, i * 8).getZExtValue();
- unsigned RHS = Known1.Zero.extractBits(8, i * 8).getZExtValue();
- if (LHS == 255 && RHS == 0)
- SelectMask.setBit(i);
- else if (LHS == 255 && RHS == 255)
- ZeroMask.setBit(i);
- else if (!(LHS == 0 && RHS == 255))
- IsByteMask = false;
- }
- if (IsByteMask) {
- for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) {
- for (unsigned j = 0; j != NumBytesPerElt; ++j) {
- unsigned Ofs = (SelectMask[j] ? NumSizeInBytes : 0);
- int Idx = (ZeroMask[j] ? (int)SM_SentinelZero : (i + j + Ofs));
- Mask.push_back(Idx);
- }
- }
- Ops.push_back(N.getOperand(0));
- Ops.push_back(N.getOperand(1));
- return true;
- }
- }
-
// Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other
// is a valid shuffle index.
- SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
- SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
+ SDValue N0 = peekThroughBitcasts(N.getOperand(0));
+ SDValue N1 = peekThroughBitcasts(N.getOperand(1));
if (!N0.getValueType().isVector() || !N1.getValueType().isVector())
return false;
SmallVector<int, 64> SrcMask0, SrcMask1;
@@ -7423,34 +7529,24 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
true))
return false;
- // Shuffle inputs must be the same size as the result.
- if (llvm::any_of(SrcInputs0, [VT](SDValue Op) {
- return VT.getSizeInBits() != Op.getValueSizeInBits();
- }))
- return false;
- if (llvm::any_of(SrcInputs1, [VT](SDValue Op) {
- return VT.getSizeInBits() != Op.getValueSizeInBits();
- }))
- return false;
-
size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
SmallVector<int, 64> Mask0, Mask1;
narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
- for (size_t i = 0; i != MaskSize; ++i) {
+ for (int i = 0; i != (int)MaskSize; ++i) {
if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
Mask.push_back(SM_SentinelUndef);
else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero)
Mask.push_back(SM_SentinelZero);
else if (Mask1[i] == SM_SentinelZero)
- Mask.push_back(Mask0[i]);
+ Mask.push_back(i);
else if (Mask0[i] == SM_SentinelZero)
- Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size()));
+ Mask.push_back(i + MaskSize);
else
return false;
}
- Ops.append(SrcInputs0.begin(), SrcInputs0.end());
- Ops.append(SrcInputs1.begin(), SrcInputs1.end());
+ Ops.push_back(N0);
+ Ops.push_back(N1);
return true;
}
case ISD::INSERT_SUBVECTOR: {
@@ -7482,7 +7578,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
// Subvector shuffle inputs must not be larger than the subvector.
if (llvm::any_of(SubInputs, [SubVT](SDValue SubInput) {
- return SubVT.getSizeInBits() < SubInput.getValueSizeInBits();
+ return SubVT.getFixedSizeInBits() <
+ SubInput.getValueSizeInBits().getFixedSize();
}))
return false;
@@ -7503,8 +7600,11 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
}
Ops.push_back(Src);
Ops.append(SubInputs.begin(), SubInputs.end());
- for (int i = 0; i != (int)NumElts; ++i)
- Mask.push_back(i);
+ if (ISD::isBuildVectorAllZeros(Src.getNode()))
+ Mask.append(NumElts, SM_SentinelZero);
+ else
+ for (int i = 0; i != (int)NumElts; ++i)
+ Mask.push_back(i);
for (int i = 0; i != (int)NumSubElts; ++i) {
int M = SubMask[i];
if (0 <= M) {
@@ -7605,19 +7705,33 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
APInt EltsLHS, EltsRHS;
getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS);
- // If we know input saturation won't happen we can treat this
- // as a truncation shuffle.
+ // If we know input saturation won't happen (or we don't care for particular
+ // lanes), we can treat this as a truncation shuffle.
+ bool Offset0 = false, Offset1 = false;
if (Opcode == X86ISD::PACKSS) {
- if ((!N0.isUndef() &&
+ if ((!(N0.isUndef() || EltsLHS.isNullValue()) &&
DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) ||
- (!N1.isUndef() &&
+ (!(N1.isUndef() || EltsRHS.isNullValue()) &&
DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt))
return false;
+ // We can't easily fold ASHR into a shuffle, but if it was feeding a
+ // PACKSS then it was likely being used for sign-extension for a
+ // truncation, so just peek through and adjust the mask accordingly.
+ if (N0.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N0.getNode()) &&
+ N0.getConstantOperandAPInt(1) == NumBitsPerElt) {
+ Offset0 = true;
+ N0 = N0.getOperand(0);
+ }
+ if (N1.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N1.getNode()) &&
+ N1.getConstantOperandAPInt(1) == NumBitsPerElt) {
+ Offset1 = true;
+ N1 = N1.getOperand(0);
+ }
} else {
APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
- if ((!N0.isUndef() &&
+ if ((!(N0.isUndef() || EltsLHS.isNullValue()) &&
!DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) ||
- (!N1.isUndef() &&
+ (!(N1.isUndef() || EltsRHS.isNullValue()) &&
!DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1)))
return false;
}
@@ -7629,6 +7743,13 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
Ops.push_back(N1);
createPackShuffleMask(VT, Mask, IsUnary);
+
+ if (Offset0 || Offset1) {
+ for (int &M : Mask)
+ if ((Offset0 && isInRange(M, 0, NumElts)) ||
+ (Offset1 && isInRange(M, NumElts, 2 * NumElts)))
+ ++M;
+ }
return true;
}
case X86ISD::VTRUNC: {
@@ -7916,7 +8037,7 @@ static SDValue getShuffleScalarElt(SDValue Op, unsigned Index,
}
// Use PINSRB/PINSRW/PINSRD to create a build vector.
-static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
+static SDValue LowerBuildVectorAsInsert(SDValue Op, const APInt &NonZeroMask,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -7931,7 +8052,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
bool First = true;
for (unsigned i = 0; i < NumElts; ++i) {
- bool IsNonZero = (NonZeros & (1 << i)) != 0;
+ bool IsNonZero = NonZeroMask[i];
if (!IsNonZero)
continue;
@@ -7958,7 +8079,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
}
/// Custom lower build_vector of v16i8.
-static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
+static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -7967,7 +8088,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
// SSE4.1 - use PINSRB to insert each byte directly.
if (Subtarget.hasSSE41())
- return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
+ return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
Subtarget);
SDLoc dl(Op);
@@ -7975,8 +8096,8 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
for (unsigned i = 0; i < 16; i += 2) {
- bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
- bool NextIsNonZero = (NonZeros & (1 << (i + 1))) != 0;
+ bool ThisIsNonZero = NonZeroMask[i];
+ bool NextIsNonZero = NonZeroMask[i + 1];
if (!ThisIsNonZero && !NextIsNonZero)
continue;
@@ -8024,7 +8145,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
/// Custom lower build_vector of v8i16.
-static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
+static SDValue LowerBuildVectorv8i16(SDValue Op, const APInt &NonZeroMask,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -8032,7 +8153,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
return SDValue();
// Use PINSRW to insert each byte directly.
- return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
+ return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
Subtarget);
}
@@ -8352,8 +8473,6 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// Handle Special Cases - all undef or undef/zero.
if (UndefMask.countPopulation() == NumElems)
return DAG.getUNDEF(VT);
-
- // FIXME: Should we return this as a BUILD_VECTOR instead?
if ((ZeroMask.countPopulation() + UndefMask.countPopulation()) == NumElems)
return VT.isInteger() ? DAG.getConstant(0, DL, VT)
: DAG.getConstantFP(0.0, DL, VT);
@@ -8368,7 +8487,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
assert(LDBase && "Did not find base load for merging consecutive loads");
unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
unsigned BaseSizeInBytes = BaseSizeInBits / 8;
- int LoadSizeInBits = (1 + LastLoadedElt - FirstLoadedElt) * BaseSizeInBits;
+ int NumLoadedElts = (1 + LastLoadedElt - FirstLoadedElt);
+ int LoadSizeInBits = NumLoadedElts * BaseSizeInBits;
assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected");
// TODO: Support offsetting the base load.
@@ -8430,7 +8550,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// base pointer. If the vector contains zeros, then attempt to shuffle those
// elements.
if (FirstLoadedElt == 0 &&
- (LastLoadedElt == (int)(NumElems - 1) || IsDereferenceable) &&
+ (NumLoadedElts == (int)NumElems || IsDereferenceable) &&
(IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
return SDValue();
@@ -8518,6 +8638,11 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
if (!Subtarget.hasAVX2() && ScalarSize < 32)
continue;
+ // Don't attempt a 1:N subvector broadcast - it should be caught by
+ // combineConcatVectorOps, else will cause infinite loops.
+ if (RepeatSize > ScalarSize && SubElems == 1)
+ continue;
+
bool Match = true;
SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(EltBaseVT));
for (unsigned i = 0; i != NumElems && Match; ++i) {
@@ -8549,9 +8674,14 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
if (TLI.isTypeLegal(BroadcastVT)) {
if (SDValue RepeatLoad = EltsFromConsecutiveLoads(
RepeatVT, RepeatedLoads, DL, DAG, Subtarget, isAfterLegalize)) {
- unsigned Opcode = RepeatSize > ScalarSize ? X86ISD::SUBV_BROADCAST
- : X86ISD::VBROADCAST;
- SDValue Broadcast = DAG.getNode(Opcode, DL, BroadcastVT, RepeatLoad);
+ SDValue Broadcast = RepeatLoad;
+ if (RepeatSize > ScalarSize) {
+ while (Broadcast.getValueSizeInBits() < VT.getSizeInBits())
+ Broadcast = concatSubVectors(Broadcast, Broadcast, DAG, DL);
+ } else {
+ Broadcast =
+ DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, RepeatLoad);
+ }
return DAG.getBitcast(VT, Broadcast);
}
}
@@ -8622,43 +8752,6 @@ static bool isFoldableUseOfShuffle(SDNode *N) {
return false;
}
-// Check if the current node of build vector is a zero extended vector.
-// // If so, return the value extended.
-// // For example: (0,0,0,a,0,0,0,a,0,0,0,a,0,0,0,a) returns a.
-// // NumElt - return the number of zero extended identical values.
-// // EltType - return the type of the value include the zero extend.
-static SDValue isSplatZeroExtended(const BuildVectorSDNode *Op,
- unsigned &NumElt, MVT &EltType) {
- SDValue ExtValue = Op->getOperand(0);
- unsigned NumElts = Op->getNumOperands();
- unsigned Delta = NumElts;
-
- for (unsigned i = 1; i < NumElts; i++) {
- if (Op->getOperand(i) == ExtValue) {
- Delta = i;
- break;
- }
- if (!(Op->getOperand(i).isUndef() || isNullConstant(Op->getOperand(i))))
- return SDValue();
- }
- if (!isPowerOf2_32(Delta) || Delta == 1)
- return SDValue();
-
- for (unsigned i = Delta; i < NumElts; i++) {
- if (i % Delta == 0) {
- if (Op->getOperand(i) != ExtValue)
- return SDValue();
- } else if (!(isNullConstant(Op->getOperand(i)) ||
- Op->getOperand(i).isUndef()))
- return SDValue();
- }
- unsigned EltSize = Op->getSimpleValueType(0).getScalarSizeInBits();
- unsigned ExtVTSize = EltSize * Delta;
- EltType = MVT::getIntegerVT(ExtVTSize);
- NumElt = NumElts / Delta;
- return ExtValue;
-}
-
/// Attempt to use the vbroadcast instruction to generate a splat value
/// from a splat BUILD_VECTOR which uses:
/// a. A single scalar load, or a constant.
@@ -8676,13 +8769,21 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
return SDValue();
MVT VT = BVOp->getSimpleValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
SDLoc dl(BVOp);
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Unsupported vector type for broadcast.");
+ // See if the build vector is a repeating sequence of scalars (inc. splat).
+ SDValue Ld;
BitVector UndefElements;
- SDValue Ld = BVOp->getSplatValue(&UndefElements);
+ SmallVector<SDValue, 16> Sequence;
+ if (BVOp->getRepeatedSequence(Sequence, &UndefElements)) {
+ assert((NumElts % Sequence.size()) == 0 && "Sequence doesn't fit.");
+ if (Sequence.size() == 1)
+ Ld = Sequence[0];
+ }
// Attempt to use VBROADCASTM
// From this pattern:
@@ -8690,30 +8791,38 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
// b. t1 = (build_vector t0 t0)
//
// Create (VBROADCASTM v2i1 X)
- if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) {
- MVT EltType = VT.getScalarType();
- unsigned NumElts = VT.getVectorNumElements();
- SDValue BOperand;
- SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType);
- if ((ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) ||
- (Ld && Ld.getOpcode() == ISD::ZERO_EXTEND &&
- Ld.getOperand(0).getOpcode() == ISD::BITCAST)) {
- if (ZeroExtended)
- BOperand = ZeroExtended.getOperand(0);
- else
- BOperand = Ld.getOperand(0).getOperand(0);
+ if (!Sequence.empty() && Subtarget.hasCDI()) {
+ // If not a splat, are the upper sequence values zeroable?
+ unsigned SeqLen = Sequence.size();
+ bool UpperZeroOrUndef =
+ SeqLen == 1 ||
+ llvm::all_of(makeArrayRef(Sequence).drop_front(), [](SDValue V) {
+ return !V || V.isUndef() || isNullConstant(V);
+ });
+ SDValue Op0 = Sequence[0];
+ if (UpperZeroOrUndef && ((Op0.getOpcode() == ISD::BITCAST) ||
+ (Op0.getOpcode() == ISD::ZERO_EXTEND &&
+ Op0.getOperand(0).getOpcode() == ISD::BITCAST))) {
+ SDValue BOperand = Op0.getOpcode() == ISD::BITCAST
+ ? Op0.getOperand(0)
+ : Op0.getOperand(0).getOperand(0);
MVT MaskVT = BOperand.getSimpleValueType();
- if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || // for broadcastmb2q
+ MVT EltType = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
+ if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || // for broadcastmb2q
(EltType == MVT::i32 && MaskVT == MVT::v16i1)) { // for broadcastmw2d
- SDValue Brdcst =
- DAG.getNode(X86ISD::VBROADCASTM, dl,
- MVT::getVectorVT(EltType, NumElts), BOperand);
- return DAG.getBitcast(VT, Brdcst);
+ MVT BcstVT = MVT::getVectorVT(EltType, NumElts / SeqLen);
+ if (!VT.is512BitVector() && !Subtarget.hasVLX()) {
+ unsigned Scale = 512 / VT.getSizeInBits();
+ BcstVT = MVT::getVectorVT(EltType, Scale * (NumElts / SeqLen));
+ }
+ SDValue Bcst = DAG.getNode(X86ISD::VBROADCASTM, dl, BcstVT, BOperand);
+ if (BcstVT.getSizeInBits() != VT.getSizeInBits())
+ Bcst = extractSubVector(Bcst, 0, DAG, dl, VT.getSizeInBits());
+ return DAG.getBitcast(VT, Bcst);
}
}
}
- unsigned NumElts = VT.getVectorNumElements();
unsigned NumUndefElts = UndefElements.count();
if (!Ld || (NumElts - NumUndefElts) <= 1) {
APInt SplatValue, Undef;
@@ -8755,18 +8864,19 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
}
if (SplatBitSize > 64) {
// Load the vector of constants and broadcast it.
- MVT CVT = VT.getScalarType();
Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
*Ctx);
SDValue VCP = DAG.getConstantPool(VecC, PVT);
unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
+ MVT VVT = MVT::getVectorVT(VT.getScalarType(), NumElm);
Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign();
- Ld = DAG.getLoad(
- MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- Alignment);
- SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld);
- return DAG.getBitcast(VT, Brdcst);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {DAG.getEntryNode(), VCP};
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ return DAG.getMemIntrinsicNode(
+ X86ISD::SUBV_BROADCAST_LOAD, dl, Tys, Ops, VVT, MPI, Alignment,
+ MachineMemOperand::MOLoad);
}
}
}
@@ -8787,6 +8897,8 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
(Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP);
bool IsLoad = ISD::isNormalLoad(Ld.getNode());
+ // TODO: Handle broadcasts of non-constant sequences.
+
// Make sure that all of the users of a non-constant load are from the
// BUILD_VECTOR node.
// FIXME: Is the use count needed for non-constant, non-load case?
@@ -10120,45 +10232,69 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
return VectorConstant;
- BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
- if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
- return AddSub;
- if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
- return HorizontalOp;
- if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
- return Broadcast;
- if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG))
- return BitOp;
-
unsigned EVTBits = EltVT.getSizeInBits();
-
- unsigned NumZero = 0;
- unsigned NumNonZero = 0;
- uint64_t NonZeros = 0;
+ APInt UndefMask = APInt::getNullValue(NumElems);
+ APInt ZeroMask = APInt::getNullValue(NumElems);
+ APInt NonZeroMask = APInt::getNullValue(NumElems);
bool IsAllConstants = true;
SmallSet<SDValue, 8> Values;
unsigned NumConstants = NumElems;
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Op.getOperand(i);
- if (Elt.isUndef())
+ if (Elt.isUndef()) {
+ UndefMask.setBit(i);
continue;
+ }
Values.insert(Elt);
if (!isa<ConstantSDNode>(Elt) && !isa<ConstantFPSDNode>(Elt)) {
IsAllConstants = false;
NumConstants--;
}
- if (X86::isZeroNode(Elt))
- NumZero++;
- else {
- assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range.
- NonZeros |= ((uint64_t)1 << i);
- NumNonZero++;
+ if (X86::isZeroNode(Elt)) {
+ ZeroMask.setBit(i);
+ } else {
+ NonZeroMask.setBit(i);
}
}
- // All undef vector. Return an UNDEF. All zero vectors were handled above.
- if (NumNonZero == 0)
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ if (NonZeroMask == 0) {
+ assert(UndefMask.isAllOnesValue() && "Fully undef mask expected");
return DAG.getUNDEF(VT);
+ }
+
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
+
+ // If the upper elts of a ymm/zmm are undef/zero then we might be better off
+ // lowering to a smaller build vector and padding with undef/zero.
+ if ((VT.is256BitVector() || VT.is512BitVector()) &&
+ !isFoldableUseOfShuffle(BV)) {
+ unsigned UpperElems = NumElems / 2;
+ APInt UndefOrZeroMask = UndefMask | ZeroMask;
+ unsigned NumUpperUndefsOrZeros = UndefOrZeroMask.countLeadingOnes();
+ if (NumUpperUndefsOrZeros >= UpperElems) {
+ if (VT.is512BitVector() &&
+ NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4)))
+ UpperElems = NumElems - (NumElems / 4);
+ bool UndefUpper = UndefMask.countLeadingOnes() >= UpperElems;
+ MVT LowerVT = MVT::getVectorVT(EltVT, NumElems - UpperElems);
+ SDValue NewBV =
+ DAG.getBuildVector(LowerVT, dl, Op->ops().drop_back(UpperElems));
+ return widenSubVector(VT, NewBV, !UndefUpper, Subtarget, DAG, dl);
+ }
+ }
+
+ if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
+ return AddSub;
+ if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
+ return HorizontalOp;
+ if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
+ return Broadcast;
+ if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG))
+ return BitOp;
+
+ unsigned NumZero = ZeroMask.countPopulation();
+ unsigned NumNonZero = NonZeroMask.countPopulation();
// If we are inserting one variable into a vector of non-zero constants, try
// to avoid loading each constant element as a scalar. Load the constants as a
@@ -10222,7 +10358,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1) {
- unsigned Idx = countTrailingZeros(NonZeros);
+ unsigned Idx = NonZeroMask.countTrailingZeros();
SDValue Item = Op.getOperand(Idx);
// If we have a constant or non-constant insertion into the low element of
@@ -10286,7 +10422,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
// Check if it's possible to issue this instead.
// shuffle (vload ptr)), undef, <1, 1, 1, 1>
- unsigned Idx = countTrailingZeros(NonZeros);
+ unsigned Idx = NonZeroMask.countTrailingZeros();
SDValue Item = Op.getOperand(Idx);
if (Op.getNode()->isOnlyUserOf(Item.getNode()))
return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
@@ -10355,7 +10491,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (EVTBits == 64) {
if (NumNonZero == 1) {
// One half is zero or undef.
- unsigned Idx = countTrailingZeros(NonZeros);
+ unsigned Idx = NonZeroMask.countTrailingZeros();
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
@@ -10365,12 +10501,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16)
- if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
+ if (SDValue V = LowerBuildVectorv16i8(Op, NonZeroMask, NumNonZero, NumZero,
DAG, Subtarget))
return V;
if (EVTBits == 16 && NumElems == 8)
- if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
+ if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero,
DAG, Subtarget))
return V;
@@ -10383,7 +10519,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (NumElems == 4 && NumZero > 0) {
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < 4; ++i) {
- bool isZero = !(NonZeros & (1ULL << i));
+ bool isZero = !NonZeroMask[i];
if (isZero)
Ops[i] = getZeroVector(VT, Subtarget, DAG, dl);
else
@@ -10391,7 +10527,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
for (unsigned i = 0; i < 2; ++i) {
- switch ((NonZeros >> (i*2)) & 0x3) {
+ switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) {
default: llvm_unreachable("Unexpected NonZero count");
case 0:
Ops[i] = Ops[i*2]; // Must be a zero vector.
@@ -10408,8 +10544,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
}
- bool Reverse1 = (NonZeros & 0x3) == 2;
- bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+ bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2;
+ bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2;
int MaskVec[] = {
Reverse1 ? 1 : 0,
Reverse1 ? 0 : 1,
@@ -10681,6 +10817,35 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask);
}
+/// Test whether elements in each LaneSizeInBits lane in this shuffle mask come
+/// from multiple lanes - this is different to isLaneCrossingShuffleMask to
+/// better support 'repeated mask + lane permute' style shuffles.
+static bool isMultiLaneShuffleMask(unsigned LaneSizeInBits,
+ unsigned ScalarSizeInBits,
+ ArrayRef<int> Mask) {
+ assert(LaneSizeInBits && ScalarSizeInBits &&
+ (LaneSizeInBits % ScalarSizeInBits) == 0 &&
+ "Illegal shuffle lane size");
+ int NumElts = Mask.size();
+ int NumEltsPerLane = LaneSizeInBits / ScalarSizeInBits;
+ int NumLanes = NumElts / NumEltsPerLane;
+ if (NumLanes > 1) {
+ for (int i = 0; i != NumLanes; ++i) {
+ int SrcLane = -1;
+ for (int j = 0; j != NumEltsPerLane; ++j) {
+ int M = Mask[(i * NumEltsPerLane) + j];
+ if (M < 0)
+ continue;
+ int Lane = (M % NumElts) / NumEltsPerLane;
+ if (SrcLane >= 0 && SrcLane != Lane)
+ return true;
+ SrcLane = Lane;
+ }
+ }
+ }
+ return false;
+}
+
/// Test whether a shuffle mask is equivalent within each sub-lane.
///
/// This checks a shuffle mask to see if it is performing the same
@@ -10742,10 +10907,11 @@ is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
/// Test whether a target shuffle mask is equivalent within each sub-lane.
/// Unlike isRepeatedShuffleMask we must respect SM_SentinelZero.
-static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT,
+static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits,
+ unsigned EltSizeInBits,
ArrayRef<int> Mask,
SmallVectorImpl<int> &RepeatedMask) {
- int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
+ int LaneSize = LaneSizeInBits / EltSizeInBits;
RepeatedMask.assign(LaneSize, SM_SentinelUndef);
int Size = Mask.size();
for (int i = 0; i < Size; ++i) {
@@ -10776,6 +10942,67 @@ static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT,
return true;
}
+/// Test whether a target shuffle mask is equivalent within each sub-lane.
+/// Unlike isRepeatedShuffleMask we must respect SM_SentinelZero.
+static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT,
+ ArrayRef<int> Mask,
+ SmallVectorImpl<int> &RepeatedMask) {
+ return isRepeatedTargetShuffleMask(LaneSizeInBits, VT.getScalarSizeInBits(),
+ Mask, RepeatedMask);
+}
+
+/// Checks whether the vector elements referenced by two shuffle masks are
+/// equivalent.
+static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp,
+ int Idx, int ExpectedIdx) {
+ assert(0 <= Idx && Idx < MaskSize && 0 <= ExpectedIdx &&
+ ExpectedIdx < MaskSize && "Out of range element index");
+ if (!Op || !ExpectedOp || Op.getOpcode() != ExpectedOp.getOpcode())
+ return false;
+
+ switch (Op.getOpcode()) {
+ case ISD::BUILD_VECTOR:
+ // If the values are build vectors, we can look through them to find
+ // equivalent inputs that make the shuffles equivalent.
+ // TODO: Handle MaskSize != Op.getNumOperands()?
+ if (MaskSize == (int)Op.getNumOperands() &&
+ MaskSize == (int)ExpectedOp.getNumOperands())
+ return Op.getOperand(Idx) == ExpectedOp.getOperand(ExpectedIdx);
+ break;
+ case X86ISD::VBROADCAST:
+ case X86ISD::VBROADCAST_LOAD:
+ // TODO: Handle MaskSize != Op.getValueType().getVectorNumElements()?
+ return (Op == ExpectedOp &&
+ (int)Op.getValueType().getVectorNumElements() == MaskSize);
+ case X86ISD::HADD:
+ case X86ISD::HSUB:
+ case X86ISD::FHADD:
+ case X86ISD::FHSUB:
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS:
+ // HOP(X,X) can refer to the elt from the lower/upper half of a lane.
+ // TODO: Handle MaskSize != NumElts?
+ // TODO: Handle HOP(X,Y) vs HOP(Y,X) equivalence cases.
+ if (Op == ExpectedOp && Op.getOperand(0) == Op.getOperand(1)) {
+ MVT VT = Op.getSimpleValueType();
+ int NumElts = VT.getVectorNumElements();
+ if (MaskSize == NumElts) {
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumEltsPerLane = NumElts / NumLanes;
+ int NumHalfEltsPerLane = NumEltsPerLane / 2;
+ bool SameLane =
+ (Idx / NumEltsPerLane) == (ExpectedIdx / NumEltsPerLane);
+ bool SameElt =
+ (Idx % NumHalfEltsPerLane) == (ExpectedIdx % NumHalfEltsPerLane);
+ return SameLane && SameElt;
+ }
+ }
+ break;
+ }
+
+ return false;
+}
+
/// Checks whether a shuffle mask is equivalent to an explicit list of
/// arguments.
///
@@ -10786,30 +11013,26 @@ static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT,
/// It returns true if the mask is exactly as wide as the argument list, and
/// each element of the mask is either -1 (signifying undef) or the value given
/// in the argument.
-static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef<int> Mask,
- ArrayRef<int> ExpectedMask) {
- if (Mask.size() != ExpectedMask.size())
- return false;
-
+static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask,
+ SDValue V1 = SDValue(),
+ SDValue V2 = SDValue()) {
int Size = Mask.size();
-
- // If the values are build vectors, we can look through them to find
- // equivalent inputs that make the shuffles equivalent.
- auto *BV1 = dyn_cast<BuildVectorSDNode>(V1);
- auto *BV2 = dyn_cast<BuildVectorSDNode>(V2);
+ if (Size != (int)ExpectedMask.size())
+ return false;
for (int i = 0; i < Size; ++i) {
assert(Mask[i] >= -1 && "Out of bound mask element!");
- if (Mask[i] >= 0 && Mask[i] != ExpectedMask[i]) {
- auto *MaskBV = Mask[i] < Size ? BV1 : BV2;
- auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2;
- if (!MaskBV || !ExpectedBV ||
- MaskBV->getOperand(Mask[i] % Size) !=
- ExpectedBV->getOperand(ExpectedMask[i] % Size))
+ int MaskIdx = Mask[i];
+ int ExpectedIdx = ExpectedMask[i];
+ if (0 <= MaskIdx && MaskIdx != ExpectedIdx) {
+ SDValue MaskV = MaskIdx < Size ? V1 : V2;
+ SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
+ MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size);
+ ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);
+ if (!IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx))
return false;
}
}
-
return true;
}
@@ -10822,7 +11045,7 @@ static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef<int> Mask,
///
/// SM_SentinelZero is accepted as a valid negative index but must match in
/// both.
-static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
+static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
ArrayRef<int> ExpectedMask,
SDValue V1 = SDValue(),
SDValue V2 = SDValue()) {
@@ -10836,22 +11059,23 @@ static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size))
return false;
- // If the values are build vectors, we can look through them to find
- // equivalent inputs that make the shuffles equivalent.
- auto *BV1 = dyn_cast_or_null<BuildVectorSDNode>(V1);
- auto *BV2 = dyn_cast_or_null<BuildVectorSDNode>(V2);
- BV1 = ((BV1 && Size != (int)BV1->getNumOperands()) ? nullptr : BV1);
- BV2 = ((BV2 && Size != (int)BV2->getNumOperands()) ? nullptr : BV2);
+ // Don't use V1/V2 if they're not the same size as the shuffle mask type.
+ if (V1 && V1.getValueSizeInBits() != VT.getSizeInBits())
+ V1 = SDValue();
+ if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits())
+ V2 = SDValue();
for (int i = 0; i < Size; ++i) {
- if (Mask[i] == SM_SentinelUndef || Mask[i] == ExpectedMask[i])
+ int MaskIdx = Mask[i];
+ int ExpectedIdx = ExpectedMask[i];
+ if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
continue;
- if (0 <= Mask[i] && 0 <= ExpectedMask[i]) {
- auto *MaskBV = Mask[i] < Size ? BV1 : BV2;
- auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2;
- if (MaskBV && ExpectedBV &&
- MaskBV->getOperand(Mask[i] % Size) ==
- ExpectedBV->getOperand(ExpectedMask[i] % Size))
+ if (0 <= MaskIdx && 0 <= ExpectedIdx) {
+ SDValue MaskV = MaskIdx < Size ? V1 : V2;
+ SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
+ MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size);
+ ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);
+ if (IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx))
continue;
}
// TODO - handle SM_Sentinel equivalences.
@@ -10863,20 +11087,25 @@ static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
// Attempt to create a shuffle mask from a VSELECT condition mask.
static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
SDValue Cond) {
- if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
+ EVT CondVT = Cond.getValueType();
+ unsigned EltSizeInBits = CondVT.getScalarSizeInBits();
+ unsigned NumElts = CondVT.getVectorNumElements();
+
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits,
+ true, false))
return false;
- unsigned Size = Cond.getValueType().getVectorNumElements();
- Mask.resize(Size, SM_SentinelUndef);
+ Mask.resize(NumElts, SM_SentinelUndef);
- for (int i = 0; i != (int)Size; ++i) {
- SDValue CondElt = Cond.getOperand(i);
+ for (int i = 0; i != (int)NumElts; ++i) {
Mask[i] = i;
// Arbitrarily choose from the 2nd operand if the select condition element
// is undef.
// TODO: Can we do better by matching patterns such as even/odd?
- if (CondElt.isUndef() || isNullConstant(CondElt))
- Mask[i] += Size;
+ if (UndefElts[i] || EltBits[i].isNullValue())
+ Mask[i] += NumElts;
}
return true;
@@ -10894,8 +11123,8 @@ static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
SmallVector<int, 8> Unpckhwd;
createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false,
/* Unary = */ false);
- bool IsUnpackwdMask = (isTargetShuffleEquivalent(Mask, Unpcklwd) ||
- isTargetShuffleEquivalent(Mask, Unpckhwd));
+ bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) ||
+ isTargetShuffleEquivalent(VT, Mask, Unpckhwd));
return IsUnpackwdMask;
}
@@ -10912,8 +11141,8 @@ static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
for (unsigned i = 0; i != 4; ++i) {
SmallVector<int, 16> UnpackMask;
createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2);
- if (isTargetShuffleEquivalent(Mask, UnpackMask) ||
- isTargetShuffleEquivalent(CommutedMask, UnpackMask))
+ if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) ||
+ isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask))
return true;
}
return false;
@@ -10948,6 +11177,15 @@ static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");
+ // If the mask only uses one non-undef element, then fully 'splat' it to
+ // improve later broadcast matching.
+ int FirstIndex = find_if(Mask, [](int M) { return M >= 0; }) - Mask.begin();
+ assert(0 <= FirstIndex && FirstIndex < 4 && "All undef shuffle mask");
+
+ int FirstElt = Mask[FirstIndex];
+ if (all_of(Mask, [FirstElt](int M) { return M < 0 || M == FirstElt; }))
+ return (FirstElt << 6) | (FirstElt << 4) | (FirstElt << 2) | FirstElt;
+
unsigned Imm = 0;
Imm |= (Mask[0] < 0 ? 0 : Mask[0]) << 0;
Imm |= (Mask[1] < 0 ? 1 : Mask[1]) << 2;
@@ -11097,7 +11335,8 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
// Attempt to match the target mask against the unpack lo/hi mask patterns.
SmallVector<int, 64> Unpckl, Unpckh;
createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
- if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1,
+ (IsUnary ? V1 : V2))) {
UnpackOpcode = X86ISD::UNPCKL;
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
@@ -11105,7 +11344,8 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
}
createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
- if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1,
+ (IsUnary ? V1 : V2))) {
UnpackOpcode = X86ISD::UNPCKH;
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
@@ -11143,14 +11383,14 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
// If a binary shuffle, commute and try again.
if (!IsUnary) {
ShuffleVectorSDNode::commuteMask(Unpckl);
- if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) {
UnpackOpcode = X86ISD::UNPCKL;
std::swap(V1, V2);
return true;
}
ShuffleVectorSDNode::commuteMask(Unpckh);
- if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) {
UnpackOpcode = X86ISD::UNPCKH;
std::swap(V1, V2);
return true;
@@ -11167,21 +11407,21 @@ static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT,
SelectionDAG &DAG) {
SmallVector<int, 8> Unpckl;
createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, /* Unary = */ false);
- if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+ if (isShuffleEquivalent(Mask, Unpckl, V1, V2))
return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
SmallVector<int, 8> Unpckh;
createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, /* Unary = */ false);
- if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+ if (isShuffleEquivalent(Mask, Unpckh, V1, V2))
return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
// Commute and try again.
ShuffleVectorSDNode::commuteMask(Unpckl);
- if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+ if (isShuffleEquivalent(Mask, Unpckl, V1, V2))
return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);
ShuffleVectorSDNode::commuteMask(Unpckh);
- if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+ if (isShuffleEquivalent(Mask, Unpckh, V1, V2))
return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);
return SDValue();
@@ -11197,9 +11437,9 @@ static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT,
createSplat2ShuffleMask(VT, Unpckh, /* Lo */ false);
unsigned UnpackOpcode;
- if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+ if (isShuffleEquivalent(Mask, Unpckl, V1, V2))
UnpackOpcode = X86ISD::UNPCKL;
- else if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+ else if (isShuffleEquivalent(Mask, Unpckh, V1, V2))
UnpackOpcode = X86ISD::UNPCKH;
else
return SDValue();
@@ -11215,7 +11455,6 @@ static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT,
// Check if the mask can be mapped to a TRUNCATE or VTRUNC, truncating the
// source into the lower elements and zeroing the upper elements.
-// TODO: Merge with matchShuffleAsVPMOV.
static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT,
ArrayRef<int> Mask, const APInt &Zeroable,
const X86Subtarget &Subtarget) {
@@ -11252,22 +11491,51 @@ static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT,
return false;
}
-static bool matchShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
- int Delta) {
- int Size = (int)Mask.size();
- int Split = Size / Delta;
- int TruncatedVectorStart = SwappedOps ? Size : 0;
+// Helper to create TRUNCATE/VTRUNC nodes, optionally with zero/undef upper
+// element padding to the final DstVT.
+static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG, bool ZeroUppers) {
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT DstSVT = DstVT.getScalarType();
+ unsigned NumDstElts = DstVT.getVectorNumElements();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned DstEltSizeInBits = DstVT.getScalarSizeInBits();
- // Match for mask starting with e.g.: <8, 10, 12, 14,... or <0, 2, 4, 6,...
- if (!isSequentialOrUndefInRange(Mask, 0, Split, TruncatedVectorStart, Delta))
- return false;
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
+ return SDValue();
- // The rest of the mask should not refer to the truncated vector's elements.
- if (isAnyInRange(Mask.slice(Split, Size - Split), TruncatedVectorStart,
- TruncatedVectorStart + Size))
- return false;
+ // Perform a direct ISD::TRUNCATE if possible.
+ if (NumSrcElts == NumDstElts)
+ return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
- return true;
+ if (NumSrcElts > NumDstElts) {
+ MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src);
+ return extractSubVector(Trunc, 0, DAG, DL, DstVT.getSizeInBits());
+ }
+
+ if ((NumSrcElts * DstEltSizeInBits) >= 128) {
+ MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src);
+ return widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL,
+ DstVT.getSizeInBits());
+ }
+
+ // Non-VLX targets must truncate from a 512-bit type, so we need to
+ // widen, truncate and then possibly extract the original subvector.
+ if (!Subtarget.hasVLX() && !SrcVT.is512BitVector()) {
+ SDValue NewSrc = widenSubVector(Src, ZeroUppers, Subtarget, DAG, DL, 512);
+ return getAVX512TruncNode(DL, DstVT, NewSrc, Subtarget, DAG, ZeroUppers);
+ }
+
+ // Fallback to a X86ISD::VTRUNC, padding if necessary.
+ MVT TruncVT = MVT::getVectorVT(DstSVT, 128 / DstEltSizeInBits);
+ SDValue Trunc = DAG.getNode(X86ISD::VTRUNC, DL, TruncVT, Src);
+ if (DstVT != TruncVT)
+ Trunc = widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL,
+ DstVT.getSizeInBits());
+ return Trunc;
}
// Try to lower trunc+vector_shuffle to a vpmovdb or a vpmovdw instruction.
@@ -11283,66 +11551,99 @@ static bool matchShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
// t51: v8i16 = vector_shuffle<0,2,4,6,12,13,14,15> t41, t21
// t18: v2i64 = bitcast t51
//
-// Without avx512vl, this is lowered to:
-//
-// vpmovqd %zmm0, %ymm0
-// vpshufb {{.*#+}} xmm0 =
-// xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
-//
-// But when avx512vl is available, one can just use a single vpmovdw
-// instruction.
-static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- if (VT != MVT::v16i8 && VT != MVT::v8i16)
+// One can just use a single vpmovdw instruction, without avx512vl we need to
+// use the zmm variant and extract the lower subvector, padding with zeroes.
+// TODO: Merge with lowerShuffleAsVTRUNC.
+static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert((VT == MVT::v16i8 || VT == MVT::v8i16) && "Unexpected VTRUNC type");
+ if (!Subtarget.hasAVX512())
return SDValue();
- if (Mask.size() != VT.getVectorNumElements())
- return SDValue();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned MaxScale = 64 / EltSizeInBits;
+ for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
+ unsigned NumSrcElts = NumElts / Scale;
+ unsigned UpperElts = NumElts - NumSrcElts;
+ if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) ||
+ !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ continue;
- bool SwappedOps = false;
+ SDValue Src = V1;
+ if (!Src.hasOneUse())
+ return SDValue();
- if (!ISD::isBuildVectorAllZeros(V2.getNode())) {
- if (!ISD::isBuildVectorAllZeros(V1.getNode()))
+ Src = peekThroughOneUseBitcasts(Src);
+ if (Src.getOpcode() != ISD::TRUNCATE ||
+ Src.getScalarValueSizeInBits() != (EltSizeInBits * Scale))
return SDValue();
+ Src = Src.getOperand(0);
- std::swap(V1, V2);
- SwappedOps = true;
+ // VPMOVWB is only available with avx512bw.
+ MVT SrcVT = Src.getSimpleValueType();
+ if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 &&
+ !Subtarget.hasBWI())
+ return SDValue();
+
+ bool UndefUppers = isUndefInRange(Mask, NumSrcElts, UpperElts);
+ return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers);
}
- // Look for:
- //
- // bitcast (truncate <8 x i32> %vec to <8 x i16>) to <16 x i8>
- // bitcast (truncate <4 x i64> %vec to <4 x i32>) to <8 x i16>
- //
- // and similar ones.
- if (V1.getOpcode() != ISD::BITCAST)
- return SDValue();
- if (V1.getOperand(0).getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+}
+
+// Attempt to match binary shuffle patterns as a truncate.
+static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unexpected VTRUNC type");
+ if (!Subtarget.hasAVX512())
return SDValue();
- SDValue Src = V1.getOperand(0).getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned MaxScale = 64 / EltSizeInBits;
+ for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
+ // TODO: Support non-BWI VPMOVWB truncations?
+ unsigned SrcEltBits = EltSizeInBits * Scale;
+ if (SrcEltBits < 32 && !Subtarget.hasBWI())
+ continue;
- // The vptrunc** instructions truncating 128 bit and 256 bit vectors
- // are only available with avx512vl.
- if (!SrcVT.is512BitVector() && !Subtarget.hasVLX())
- return SDValue();
+ // Match shuffle <0,Scale,2*Scale,..,undef_or_zero,undef_or_zero,...>
+ // Bail if the V2 elements are undef.
+ unsigned NumHalfSrcElts = NumElts / Scale;
+ unsigned NumSrcElts = 2 * NumHalfSrcElts;
+ if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) ||
+ isUndefInRange(Mask, NumHalfSrcElts, NumHalfSrcElts))
+ continue;
- // Down Convert Word to Byte is only available with avx512bw. The case with
- // 256-bit output doesn't contain a shuffle and is therefore not handled here.
- if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 &&
- !Subtarget.hasBWI())
- return SDValue();
+ // The elements beyond the truncation must be undef/zero.
+ unsigned UpperElts = NumElts - NumSrcElts;
+ if (UpperElts > 0 &&
+ !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ continue;
+ bool UndefUppers =
+ UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts);
- // The first half/quarter of the mask should refer to every second/fourth
- // element of the vector truncated and bitcasted.
- if (!matchShuffleAsVPMOV(Mask, SwappedOps, 2) &&
- !matchShuffleAsVPMOV(Mask, SwappedOps, 4))
- return SDValue();
+ // As we're using both sources then we need to concat them together
+ // and truncate from the double-sized src.
+ MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2);
+ SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
+
+ MVT SrcSVT = MVT::getIntegerVT(SrcEltBits);
+ MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts);
+ Src = DAG.getBitcast(SrcVT, Src);
+ return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers);
+ }
- return DAG.getNode(X86ISD::VTRUNC, DL, VT, Src);
+ return SDValue();
}
/// Check whether a compaction lowering can be done by dropping even
@@ -11460,14 +11761,14 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
// Try binary shuffle.
SmallVector<int, 32> BinaryMask;
createPackShuffleMask(VT, BinaryMask, false, NumStages);
- if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2))
+ if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2))
if (MatchPACK(V1, V2, PackVT))
return true;
// Try unary shuffle.
SmallVector<int, 32> UnaryMask;
createPackShuffleMask(VT, UnaryMask, true, NumStages);
- if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1))
+ if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1))
if (MatchPACK(V1, V1, PackVT))
return true;
}
@@ -12016,23 +12317,32 @@ static SDValue lowerShuffleAsByteRotateAndPermute(
/// This matches the extremely common pattern for handling combined
/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
/// operations. It will try to pick the best arrangement of shuffles and
-/// blends.
-static SDValue lowerShuffleAsDecomposedShuffleBlend(
+/// blends. For vXi8/vXi16 shuffles we may use unpack instead of blend.
+static SDValue lowerShuffleAsDecomposedShuffleMerge(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+ int NumElts = Mask.size();
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumEltsPerLane = NumElts / NumLanes;
+
// Shuffle the input elements into the desired positions in V1 and V2 and
- // blend them together.
- SmallVector<int, 32> V1Mask(Mask.size(), -1);
- SmallVector<int, 32> V2Mask(Mask.size(), -1);
- SmallVector<int, 32> BlendMask(Mask.size(), -1);
- for (int i = 0, Size = Mask.size(); i < Size; ++i)
- if (Mask[i] >= 0 && Mask[i] < Size) {
- V1Mask[i] = Mask[i];
- BlendMask[i] = i;
- } else if (Mask[i] >= Size) {
- V2Mask[i] = Mask[i] - Size;
- BlendMask[i] = i + Size;
+ // unpack/blend them together.
+ bool IsAlternating = true;
+ SmallVector<int, 32> V1Mask(NumElts, -1);
+ SmallVector<int, 32> V2Mask(NumElts, -1);
+ SmallVector<int, 32> FinalMask(NumElts, -1);
+ for (int i = 0; i < NumElts; ++i) {
+ int M = Mask[i];
+ if (M >= 0 && M < NumElts) {
+ V1Mask[i] = M;
+ FinalMask[i] = i;
+ IsAlternating &= (i & 1) == 0;
+ } else if (M >= NumElts) {
+ V2Mask[i] = M - NumElts;
+ FinalMask[i] = i + NumElts;
+ IsAlternating &= (i & 1) == 1;
}
+ }
// Try to lower with the simpler initial blend/unpack/rotate strategies unless
// one of the input shuffles would be a no-op. We prefer to shuffle inputs as
@@ -12056,9 +12366,30 @@ static SDValue lowerShuffleAsDecomposedShuffleBlend(
return BlendPerm;
}
+ // If the final mask is an alternating blend of vXi8/vXi16, convert to an
+ // UNPCKL(SHUFFLE, SHUFFLE) pattern.
+ // TODO: It doesn't have to be alternating - but each lane mustn't have more
+ // than half the elements coming from each source.
+ if (IsAlternating && VT.getScalarSizeInBits() < 32) {
+ V1Mask.assign(NumElts, -1);
+ V2Mask.assign(NumElts, -1);
+ FinalMask.assign(NumElts, -1);
+ for (int i = 0; i != NumElts; i += NumEltsPerLane)
+ for (int j = 0; j != NumEltsPerLane; ++j) {
+ int M = Mask[i + j];
+ if (M >= 0 && M < NumElts) {
+ V1Mask[i + (j / 2)] = M;
+ FinalMask[i + j] = i + (j / 2);
+ } else if (M >= NumElts) {
+ V2Mask[i + (j / 2)] = M - NumElts;
+ FinalMask[i + j] = i + (j / 2) + NumElts;
+ }
+ }
+ }
+
V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
- return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
+ return DAG.getVectorShuffle(VT, DL, V1, V2, FinalMask);
}
/// Try to lower a vector shuffle as a bit rotation.
@@ -12716,8 +13047,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
InputV = ShuffleOffset(InputV);
- InputV = getExtendInVec(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND, DL,
- ExtVT, InputV, DAG);
+ InputV = getEXTEND_VECTOR_INREG(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND,
+ DL, ExtVT, InputV, DAG);
return DAG.getBitcast(VT, InputV);
}
@@ -13325,7 +13656,8 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
MVT SVT = VT.getScalarType();
unsigned Offset = BroadcastIdx * SVT.getStoreSize();
assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
- SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+ SDValue NewAddr =
+ DAG.getMemBasePlusOffset(BaseAddr, TypeSize::Fixed(Offset), DL);
// Directly form VBROADCAST_LOAD if we're using VBROADCAST opcode rather
// than MOVDDUP.
@@ -13498,7 +13830,7 @@ static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2,
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
// Attempt to match the insertps pattern.
- unsigned InsertPSMask;
+ unsigned InsertPSMask = 0;
if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
return SDValue();
@@ -13686,8 +14018,8 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Try to use one of the special instruction patterns to handle two common
// blend patterns if a zero-blend above didn't work.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
- isShuffleEquivalent(V1, V2, Mask, {1, 3}))
+ if (isShuffleEquivalent(Mask, {0, 3}, V1, V2) ||
+ isShuffleEquivalent(Mask, {1, 3}, V1, V2))
if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
// We can either use a special instruction to load over the low double or
// to move just the low double.
@@ -13733,9 +14065,10 @@ static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// onward this has a single fast instruction with no scary immediates.
// We have to map the mask as it is actually a v4i32 shuffle instruction.
V1 = DAG.getBitcast(MVT::v4i32, V1);
- int WidenedMask[4] = {
- std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
- std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
+ int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2),
+ Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1),
+ Mask[1] < 0 ? -1 : (Mask[1] * 2),
+ Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)};
return DAG.getBitcast(
MVT::v2i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
@@ -13795,7 +14128,7 @@ static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have direct support for blends, we should lower by decomposing into
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2, Mask,
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v2i64, V1, V2, Mask,
Subtarget, DAG);
// We implement this with SHUFPD which is pretty lame because it will likely
@@ -13889,6 +14222,12 @@ static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
NewMask[2] = Mask[2] < 4 ? 1 : 3;
NewMask[3] = Mask[2] < 4 ? 3 : 1;
}
+ } else if (NumV2Elements == 3) {
+ // Ideally canonicalizeShuffleMaskWithCommute should have caught this, but
+ // we can get here due to other paths (e.g repeated mask matching) that we
+ // don't want to do another round of lowerVECTOR_SHUFFLE.
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ return lowerShuffleWithSHUFPS(DL, VT, NewMask, V2, V1, DAG);
}
return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV,
getV4X86ShuffleImm8ForMask(NewMask, DL, DAG));
@@ -13917,9 +14256,9 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Use even/odd duplicate instructions for masks that match their pattern.
if (Subtarget.hasSSE3()) {
- if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
+ if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3}))
+ if (isShuffleEquivalent(Mask, {1, 1, 3, 3}, V1, V2))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
}
@@ -13933,9 +14272,9 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Use MOVLHPS/MOVHLPS to simulate unary shuffles. These are only valid
// in SSE1 because otherwise they are widened to v2f64 and never get here.
if (!Subtarget.hasSSE2()) {
- if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}))
+ if (isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2))
return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 2, 3}))
+ if (isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1, V2))
return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1);
}
@@ -13977,9 +14316,9 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Use low/high mov instructions. These are only valid in SSE1 because
// otherwise they are widened to v2f64 and never get here.
if (!Subtarget.hasSSE2()) {
- if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
+ if (isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2))
return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
+ if (isShuffleEquivalent(Mask, {2, 3, 6, 7}, V1, V2))
return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
}
@@ -14027,9 +14366,9 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
- if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 1, 1}))
+ if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
Mask = UnpackLoMask;
- else if (isShuffleEquivalent(V1, V2, Mask, {2, 2, 3, 3}))
+ else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
Mask = UnpackHiMask;
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
@@ -14087,7 +14426,7 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have direct support for blends, we should lower by decomposing into
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, Mask,
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i32, V1, V2, Mask,
Subtarget, DAG);
// Try to lower by permuting the inputs into an unpack instruction.
@@ -14696,6 +15035,11 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return ZExt;
+ // Try to use lower using a truncation.
+ if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
+ return V;
+
int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });
if (NumV2Inputs == 0) {
@@ -14776,6 +15120,11 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Subtarget))
return V;
+ // Try to use lower using a truncation.
+ if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
+ return V;
+
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask,
Subtarget, DAG))
@@ -14827,22 +15176,49 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
// We can always bit-blend if we have to so the fallback strategy is to
- // decompose into single-input permutes and blends.
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
+ // decompose into single-input permutes and blends/unpacks.
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i16, V1, V2,
Mask, Subtarget, DAG);
}
+// Lowers unary/binary shuffle as VPERMV/VPERMV3, for non-VLX targets,
+// sub-512-bit shuffles are padded to 512-bits for the shuffle and then
+// the active subvector is extracted.
static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
- MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
- MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+ ArrayRef<int> Mask, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT MaskVT = VT.changeTypeToInteger();
+ SDValue MaskNode;
+ MVT ShuffleVT = VT;
+ if (!VT.is512BitVector() && !Subtarget.hasVLX()) {
+ V1 = widenSubVector(V1, false, Subtarget, DAG, DL, 512);
+ V2 = widenSubVector(V2, false, Subtarget, DAG, DL, 512);
+ ShuffleVT = V1.getSimpleValueType();
+
+ // Adjust mask to correct indices for the second input.
+ int NumElts = VT.getVectorNumElements();
+ unsigned Scale = 512 / VT.getSizeInBits();
+ SmallVector<int, 32> AdjustedMask(Mask.begin(), Mask.end());
+ for (int &M : AdjustedMask)
+ if (NumElts <= M)
+ M += (Scale - 1) * NumElts;
+ MaskNode = getConstVector(AdjustedMask, MaskVT, DAG, DL, true);
+ MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512);
+ } else {
+ MaskNode = getConstVector(Mask, MaskVT, DAG, DL, true);
+ }
- SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
+ SDValue Result;
if (V2.isUndef())
- return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+ Result = DAG.getNode(X86ISD::VPERMV, DL, ShuffleVT, MaskNode, V1);
+ else
+ Result = DAG.getNode(X86ISD::VPERMV3, DL, ShuffleVT, V1, MaskNode, V2);
- return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
+ if (VT != ShuffleVT)
+ Result = extractSubVector(Result, 0, DAG, DL, VT.getSizeInBits());
+
+ return Result;
}
/// Generic lowering of v16i8 shuffles.
@@ -14880,6 +15256,15 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return ZExt;
+ // Try to use lower using a truncation.
+ if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v16i8, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
+ return V;
+
+ if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i8, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
+ return V;
+
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
@@ -15062,9 +15447,16 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return Unpack;
- // If we have VBMI we can use one VPERM instead of multiple PSHUFBs.
- if (Subtarget.hasVBMI() && Subtarget.hasVLX())
- return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
+ // AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8).
+ if (Subtarget.hasVBMI())
+ return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, Subtarget,
+ DAG);
+
+ // If we have XOP we can use one VPPERM instead of multiple PSHUFBs.
+ if (Subtarget.hasXOP()) {
+ SDValue MaskNode = getConstVector(Mask, MVT::v16i8, DAG, DL, true);
+ return DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, V1, V2, MaskNode);
+ }
// Use PALIGNR+Permute if possible - permute might become PSHUFB but the
// PALIGNR will be cheaper than the second PSHUFB+OR.
@@ -15120,9 +15512,9 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Result;
}
- // Handle multi-input cases by blending single-input shuffles.
+ // Handle multi-input cases by blending/unpacking single-input shuffles.
if (NumV2Elements > 0)
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2, Mask,
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v16i8, V1, V2, Mask,
Subtarget, DAG);
// The fallback path for single-input shuffles widens this into two v8i16
@@ -15302,7 +15694,7 @@ static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
}
/// Either split a vector in halves or decompose the shuffles and the
-/// blend.
+/// blend/unpack.
///
/// This is provided as a good fallback for many lowerings of non-single-input
/// shuffles with more than one 128-bit lane. In those cases, we want to select
@@ -15337,8 +15729,8 @@ static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
return true;
};
if (DoBothBroadcast())
- return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
- Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget,
+ DAG);
// If the inputs all stem from a single 128-bit lane of each input, then we
// split them rather than blending because the split will decompose to
@@ -15354,9 +15746,9 @@ static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
- // Otherwise, just fall back to decomposed shuffles and a blend. This requires
- // that the decomposed single-input shuffles don't end up here.
- return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, Subtarget,
+ // Otherwise, just fall back to decomposed shuffles and a blend/unpack. This
+ // requires that the decomposed single-input shuffles don't end up here.
+ return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget,
DAG);
}
@@ -15404,53 +15796,94 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
int NumElts = VT.getVectorNumElements();
int NumLanes = VT.getSizeInBits() / 128;
int NumEltsPerLane = NumElts / NumLanes;
+ bool CanUseSublanes = Subtarget.hasAVX2() && V2.isUndef();
+
+ /// Attempts to find a sublane permute with the given size
+ /// that gets all elements into their target lanes.
+ ///
+ /// If successful, fills CrossLaneMask and InLaneMask and returns true.
+ /// If unsuccessful, returns false and may overwrite InLaneMask.
+ auto getSublanePermute = [&](int NumSublanes) -> SDValue {
+ int NumSublanesPerLane = NumSublanes / NumLanes;
+ int NumEltsPerSublane = NumElts / NumSublanes;
+
+ SmallVector<int, 16> CrossLaneMask;
+ SmallVector<int, 16> InLaneMask(NumElts, SM_SentinelUndef);
+ // CrossLaneMask but one entry == one sublane.
+ SmallVector<int, 16> CrossLaneMaskLarge(NumSublanes, SM_SentinelUndef);
+
+ for (int i = 0; i != NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
- SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
- SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
+ int SrcSublane = M / NumEltsPerSublane;
+ int DstLane = i / NumEltsPerLane;
- for (int i = 0; i != NumElts; ++i) {
- int M = Mask[i];
- if (M < 0)
- continue;
+ // We only need to get the elements into the right lane, not sublane.
+ // So search all sublanes that make up the destination lane.
+ bool Found = false;
+ int DstSubStart = DstLane * NumSublanesPerLane;
+ int DstSubEnd = DstSubStart + NumSublanesPerLane;
+ for (int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) {
+ if (!isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane))
+ continue;
- // Ensure that each lane comes from a single source lane.
- int SrcLane = M / NumEltsPerLane;
- int DstLane = i / NumEltsPerLane;
- if (!isUndefOrEqual(SrcLaneMask[DstLane], SrcLane))
- return SDValue();
- SrcLaneMask[DstLane] = SrcLane;
+ Found = true;
+ CrossLaneMaskLarge[DstSublane] = SrcSublane;
+ int DstSublaneOffset = DstSublane * NumEltsPerSublane;
+ InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane;
+ break;
+ }
+ if (!Found)
+ return SDValue();
+ }
- PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
- }
+ // Fill CrossLaneMask using CrossLaneMaskLarge.
+ narrowShuffleMaskElts(NumEltsPerSublane, CrossLaneMaskLarge, CrossLaneMask);
- // Make sure we set all elements of the lane mask, to avoid undef propagation.
- SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
- for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
- int SrcLane = SrcLaneMask[DstLane];
- if (0 <= SrcLane)
- for (int j = 0; j != NumEltsPerLane; ++j) {
- LaneMask[(DstLane * NumEltsPerLane) + j] =
- (SrcLane * NumEltsPerLane) + j;
- }
- }
+ if (!CanUseSublanes) {
+ // If we're only shuffling a single lowest lane and the rest are identity
+ // then don't bother.
+ // TODO - isShuffleMaskInputInPlace could be extended to something like
+ // this.
+ int NumIdentityLanes = 0;
+ bool OnlyShuffleLowestLane = true;
+ for (int i = 0; i != NumLanes; ++i) {
+ int LaneOffset = i * NumEltsPerLane;
+ if (isSequentialOrUndefInRange(InLaneMask, LaneOffset, NumEltsPerLane,
+ i * NumEltsPerLane))
+ NumIdentityLanes++;
+ else if (CrossLaneMask[LaneOffset] != 0)
+ OnlyShuffleLowestLane = false;
+ }
+ if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
+ return SDValue();
+ }
- // If we're only shuffling a single lowest lane and the rest are identity
- // then don't bother.
- // TODO - isShuffleMaskInputInPlace could be extended to something like this.
- int NumIdentityLanes = 0;
- bool OnlyShuffleLowestLane = true;
- for (int i = 0; i != NumLanes; ++i) {
- if (isSequentialOrUndefInRange(PermMask, i * NumEltsPerLane, NumEltsPerLane,
- i * NumEltsPerLane))
- NumIdentityLanes++;
- else if (SrcLaneMask[i] != 0 && SrcLaneMask[i] != NumLanes)
- OnlyShuffleLowestLane = false;
- }
- if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
+ SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask);
+ return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT),
+ InLaneMask);
+ };
+
+ // First attempt a solution with full lanes.
+ if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes))
+ return V;
+
+ // The rest of the solutions use sublanes.
+ if (!CanUseSublanes)
return SDValue();
- SDValue LanePermute = DAG.getVectorShuffle(VT, DL, V1, V2, LaneMask);
- return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask);
+ // Then attempt a solution with 64-bit sublanes (vpermq).
+ if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes * 2))
+ return V;
+
+ // If that doesn't work and we have fast variable shuffle,
+ // attempt 32-bit sublanes (vpermd).
+ if (!Subtarget.hasFastVariableShuffle())
+ return SDValue();
+
+ return getSublanePermute(/*NumSublanes=*/NumLanes * 4);
}
/// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one
@@ -15563,8 +15996,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
if (!IsLowZero && !IsHighZero) {
// Check for patterns which can be matched with a single insert of a 128-bit
// subvector.
- bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
- if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
+ bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2);
+ if (OnlyUsesV1 || isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2)) {
// With AVX1, use vperm2f128 (below) to allow load folding. Otherwise,
// this will likely become vinsertf128 which can't fold a 256-bit memop.
@@ -16306,7 +16739,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Broadcast;
// Use low duplicate instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
+ if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);
if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
@@ -16367,7 +16800,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have one input in place, then we can permute the other input and
// blend the result.
if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask,
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask,
Subtarget, DAG);
// Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -16395,7 +16828,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have AVX2 then we always want to lower with a blend because an v4 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask,
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask,
Subtarget, DAG);
// Otherwise fall back on generic lowering.
@@ -16477,7 +16910,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have one input in place, then we can permute the other input and
// blend the result.
if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask,
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask,
Subtarget, DAG);
// Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -16497,7 +16930,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Result;
// Otherwise fall back on generic blend lowering.
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask,
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask,
Subtarget, DAG);
}
@@ -16530,9 +16963,9 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
"Repeated masks must be half the mask width!");
// Use even/odd duplicate instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2}))
+ if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
- if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3}))
+ if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);
if (V2.isUndef())
@@ -16586,14 +17019,13 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// since after split we get a more efficient code using vpunpcklwd and
// vpunpckhwd instrs than vblend.
if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
- if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
- Subtarget, DAG))
- return V;
+ return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget,
+ DAG);
// If we have AVX2 then we always want to lower with a blend because at v8 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, Mask,
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8f32, V1, V2, Mask,
Subtarget, DAG);
// Otherwise fall back on generic lowering.
@@ -16626,9 +17058,8 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// vpunpcklwd and vpunpckhwd instrs.
if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
!Subtarget.hasAVX512())
- if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask,
- Subtarget, DAG))
- return V;
+ return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget,
+ DAG);
if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
@@ -16713,7 +17144,7 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Result;
// Otherwise fall back on generic blend lowering.
- return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2, Mask,
+ return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i32, V1, V2, Mask,
Subtarget, DAG);
}
@@ -16755,6 +17186,11 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Subtarget))
return V;
+ // Try to use lower using a truncation.
+ if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i16, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
+ return V;
+
// Try to use shift instructions.
if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
Zeroable, Subtarget, DAG))
@@ -16807,9 +17243,9 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return PSHUFB;
- // AVX512BWVL can lower to VPERMW.
- if (Subtarget.hasBWI() && Subtarget.hasVLX())
- return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG);
+ // AVX512BW can lower to VPERMW (non-VLX will pad to v32i16).
+ if (Subtarget.hasBWI())
+ return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, Subtarget, DAG);
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
@@ -16865,6 +17301,11 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Subtarget))
return V;
+ // Try to use lower using a truncation.
+ if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v32i8, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
+ return V;
+
// Try to use shift instructions.
if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
@@ -16907,9 +17348,9 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return PSHUFB;
- // AVX512VBMIVL can lower to VPERMB.
- if (Subtarget.hasVBMI() && Subtarget.hasVLX())
- return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG);
+ // AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8).
+ if (Subtarget.hasVBMI())
+ return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, Subtarget, DAG);
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
@@ -17036,9 +17477,9 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
// Check for patterns which can be matched with a single insert of a 256-bit
// subvector.
- bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 0, 1, 2, 3});
+ bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}, V1, V2);
if (OnlyUsesV1 ||
- isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 8, 9, 10, 11})) {
+ isShuffleEquivalent(Mask, {0, 1, 2, 3, 8, 9, 10, 11}, V1, V2)) {
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
SDValue SubVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2,
@@ -17123,7 +17564,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (V2.isUndef()) {
// Use low duplicate instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6}))
+ if (isShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1, V2))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1);
if (!is128BitLaneCrossingShuffleMask(MVT::v8f64, Mask)) {
@@ -17163,7 +17604,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return Blend;
- return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, Subtarget, DAG);
}
/// Handle lowering of 16-lane 32-bit floating point shuffles.
@@ -17182,9 +17623,9 @@ static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
// Use even/odd duplicate instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2}))
+ if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1);
- if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3}))
+ if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1);
if (V2.isUndef())
@@ -17222,7 +17663,7 @@ static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
V1, V2, DAG, Subtarget))
return V;
- return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, Subtarget, DAG);
}
/// Handle lowering of 8-lane 64-bit integer shuffles.
@@ -17270,12 +17711,14 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Rotate;
// Try to use PALIGNR.
- if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask,
- Subtarget, DAG))
- return Rotate;
+ if (Subtarget.hasBWI())
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask,
+ Subtarget, DAG))
+ return Rotate;
if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
return Unpck;
+
// If we have AVX512F support, we can use VEXPAND.
if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2,
DAG, Subtarget))
@@ -17285,7 +17728,7 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return Blend;
- return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, Subtarget, DAG);
}
/// Handle lowering of 16-lane 32-bit integer shuffles.
@@ -17362,7 +17805,7 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return Blend;
- return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, Subtarget, DAG);
}
/// Handle lowering of 32-lane 16-bit integer shuffles.
@@ -17425,7 +17868,7 @@ static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return PSHUFB;
- return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, Subtarget, DAG);
}
/// Handle lowering of 64-lane 8-bit integer shuffles.
@@ -17481,7 +17924,7 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// VBMI can use VPERMV/VPERMV3 byte shuffles.
if (Subtarget.hasVBMI())
- return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG);
// Try to create an in-lane repeating shuffle mask and then shuffle the
// results into the target lanes.
@@ -17935,7 +18378,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
// Modify the new Mask to take all zeros from the all-zero vector.
// Choose indices that are blend-friendly.
bool UsedZeroVector = false;
- assert(find(WidenedMask, SM_SentinelZero) != WidenedMask.end() &&
+ assert(is_contained(WidenedMask, SM_SentinelZero) &&
"V2's non-undef elements are used?!");
for (int i = 0; i != NewNumElts; ++i)
if (WidenedMask[i] == SM_SentinelZero) {
@@ -17961,9 +18404,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
std::swap(V1, V2);
}
- if (SDValue V = lowerShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget))
- return V;
-
// For each vector width, delegate to a specialized lowering routine.
if (VT.is128BitVector())
return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
@@ -17991,9 +18431,11 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
// Only non-legal VSELECTs reach this lowering, convert those into generic
// shuffles and re-use the shuffle lowering path for blends.
- SmallVector<int, 32> Mask;
- if (createShuffleMaskFromVSELECT(Mask, Cond))
- return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask);
+ if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
+ SmallVector<int, 32> Mask;
+ if (createShuffleMaskFromVSELECT(Mask, Cond))
+ return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask);
+ }
return SDValue();
}
@@ -18107,7 +18549,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec), Idx));
- SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec, Idx);
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec,
+ DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract);
}
@@ -18262,7 +18706,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec), Idx));
- SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec, Idx);
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec,
+ DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract);
}
@@ -18456,10 +18901,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
Opc = X86ISD::PINSRB;
}
- if (N1.getValueType() != MVT::i32)
- N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
- if (N2.getValueType() != MVT::i32)
- N2 = DAG.getIntPtrConstant(IdxVal, dl);
+ assert(N1.getValueType() != MVT::i32 && "Unexpected VT");
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ N2 = DAG.getTargetConstant(IdxVal, dl, MVT::i8);
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
}
@@ -18707,9 +19151,12 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
if (GV) {
// Create a target global address if this is a global. If possible, fold the
// offset into the global address reference. Otherwise, ADD it on later.
+ // Suppress the folding if Offset is negative: movl foo-1, %eax is not
+ // allowed because if the address of foo is 0, the ELF R_X86_64_32
+ // relocation will compute to a negative value, which is invalid.
int64_t GlobalOffset = 0;
- if (OpFlags == X86II::MO_NO_FLAG &&
- X86::isOffsetSuitableForCodeModel(Offset, M)) {
+ if (OpFlags == X86II::MO_NO_FLAG && Offset >= 0 &&
+ X86::isOffsetSuitableForCodeModel(Offset, M, true)) {
std::swap(GlobalOffset, Offset);
}
Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GlobalOffset, OpFlags);
@@ -18796,7 +19243,7 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
}
-// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
static SDValue
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
@@ -18804,10 +19251,17 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
X86::RAX, X86II::MO_TLSGD);
}
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
+static SDValue
+LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
+ X86::EAX, X86II::MO_TLSGD);
+}
+
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
- SelectionDAG &DAG,
- const EVT PtrVT,
- bool is64Bit) {
+ SelectionDAG &DAG, const EVT PtrVT,
+ bool Is64Bit, bool Is64BitLP64) {
SDLoc dl(GA);
// Get the start address of the TLS block for this module.
@@ -18816,8 +19270,9 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
MFI->incNumLocalDynamicTLSAccesses();
SDValue Base;
- if (is64Bit) {
- Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
+ if (Is64Bit) {
+ unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
+ Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
X86II::MO_TLSLD, /*LocalDynamic=*/true);
} else {
SDValue InFlag;
@@ -18914,12 +19369,15 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
- if (Subtarget.is64Bit())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTarget64BitLP64())
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
+ return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
+ }
return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
case TLSModel::LocalDynamic:
- return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT,
- Subtarget.is64Bit());
+ return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
+ Subtarget.isTarget64BitLP64());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
@@ -19019,7 +19477,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
else
IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo());
- auto &DL = DAG.getDataLayout();
+ const DataLayout &DL = DAG.getDataLayout();
SDValue Scale =
DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, MVT::i8);
IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale);
@@ -19112,15 +19570,29 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
if (IsFSHR)
std::swap(Op0, Op1);
+ // With AVX512, but not VLX we need to widen to get a 512-bit result type.
+ if (!Subtarget.hasVLX() && !VT.is512BitVector()) {
+ Op0 = widenSubVector(Op0, false, Subtarget, DAG, DL, 512);
+ Op1 = widenSubVector(Op1, false, Subtarget, DAG, DL, 512);
+ }
+
+ SDValue Funnel;
APInt APIntShiftAmt;
+ MVT ResultVT = Op0.getSimpleValueType();
if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
- return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0,
- Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
+ Funnel =
+ DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, ResultVT, Op0,
+ Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
+ } else {
+ if (!Subtarget.hasVLX() && !VT.is512BitVector())
+ Amt = widenSubVector(Amt, false, Subtarget, DAG, DL, 512);
+ Funnel = DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL,
+ ResultVT, Op0, Op1, Amt);
}
-
- return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
- Op0, Op1, Amt);
+ if (!Subtarget.hasVLX() && !VT.is512BitVector())
+ Funnel = extractSubVector(Funnel, 0, DAG, DL, VT.getSizeInBits());
+ return Funnel;
}
assert(
(VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
@@ -19472,7 +19944,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
}
if (VT == MVT::f128)
- return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
+ return SDValue();
SDValue ValueToStore = Src;
if (SrcVT == MVT::i64 && Subtarget.hasSSE2() && !Subtarget.is64Bit())
@@ -19553,6 +20025,10 @@ static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
/// 64-bit unsigned integer to double expansion.
static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ // We can't use this algorithm for strict fp. It produces -0.0 instead of +0.0
+ // when converting 0 when rounding toward negative infinity. Caller will
+ // fall back to Expand for when i64 or is legal or use FILD in 32-bit mode.
+ assert(!Op->isStrictFPOpcode() && "Expected non-strict uint_to_fp!");
// This algorithm is not obvious. Here it is what we're trying to output:
/*
movq %rax, %xmm0
@@ -19566,8 +20042,6 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
#endif
*/
- bool IsStrict = Op->isStrictFPOpcode();
- unsigned OpNo = IsStrict ? 1 : 0;
SDLoc dl(Op);
LLVMContext *Context = DAG.getContext();
@@ -19589,48 +20063,30 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
// Load the 64-bit value into an XMM register.
SDValue XR1 =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(OpNo));
- SDValue CLod0 =
- DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- /* Alignment = */ 16);
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0));
+ SDValue CLod0 = DAG.getLoad(
+ MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16));
SDValue Unpck1 =
getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0);
- SDValue CLod1 =
- DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- /* Alignment = */ 16);
+ SDValue CLod1 = DAG.getLoad(
+ MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16));
SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
- SDValue Sub;
- SDValue Chain;
// TODO: Are there any fast-math-flags to propagate here?
- if (IsStrict) {
- Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
- {Op.getOperand(0), XR2F, CLod1});
- Chain = Sub.getValue(1);
- } else
- Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
- if (!IsStrict && Subtarget.hasSSE3() &&
+ if (Subtarget.hasSSE3() &&
shouldUseHorizontalOp(true, DAG, Subtarget)) {
- // FIXME: Do we need a STRICT version of FHADD?
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
- if (IsStrict) {
- Result = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v2f64, MVT::Other},
- {Chain, Shuffle, Sub});
- Chain = Result.getValue(1);
- } else
- Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
+ Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
}
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
DAG.getIntPtrConstant(0, dl));
- if (IsStrict)
- return DAG.getMergeValues({Result, Chain}, dl);
-
return Result;
}
@@ -19929,7 +20385,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
if (DstVT == MVT::f128)
- return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
+ return SDValue();
if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
@@ -19956,26 +20412,30 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
- if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
+ // The transform for i64->f64 isn't correct for 0 when rounding to negative
+ // infinity. It produces -0.0, so disable under strictfp.
+ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64 && !IsStrict)
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80)
return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
- if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
+ if (Subtarget.is64Bit() && SrcVT == MVT::i64 &&
+ (DstVT == MVT::f32 || DstVT == MVT::f64))
return SDValue();
// Make a 64-bit buffer, and use it to build an FILD.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64, 8);
int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ Align SlotAlign(8);
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI);
if (SrcVT == MVT::i32) {
- SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
- SDValue Store1 =
- DAG.getStore(Chain, dl, Src, StackSlot, MPI, 8 /*Align*/);
+ SDValue OffsetSlot =
+ DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl);
+ SDValue Store1 = DAG.getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign);
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
- OffsetSlot, MPI.getWithOffset(4), 4);
+ OffsetSlot, MPI.getWithOffset(4), SlotAlign);
std::pair<SDValue, SDValue> Tmp =
- BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, Align(8), DAG);
+ BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, SlotAlign, DAG);
if (IsStrict)
return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
@@ -19991,17 +20451,15 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
}
SDValue Store =
- DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Align(8));
+ DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, SlotAlign);
// For i64 source, we need to add the appropriate power of 2 if the input
- // was negative. This is the same as the optimization in
- // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
- // we must be careful to do the computation in x87 extended precision, not
- // in SSE. (The generic code can't know it's OK to do this, or how to.)
+ // was negative. We must be careful to do the computation in x87 extended
+ // precision, not in SSE.
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot };
SDValue Fild =
DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MPI,
- Align(8), MachineMemOperand::MOLoad);
+ SlotAlign, MachineMemOperand::MOLoad);
Chain = Fild.getValue(1);
@@ -20104,8 +20562,8 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
// of a signed i64. Let Thresh be the FP equivalent of
// 0x8000000000000000ULL.
//
- // Adjust = (Value < Thresh) ? 0 : 0x80000000;
- // FltOfs = (Value < Thresh) ? 0 : 0x80000000;
+ // Adjust = (Value >= Thresh) ? 0x80000000 : 0;
+ // FltOfs = (Value >= Thresh) ? 0x80000000 : 0;
// FistSrc = (Value - FltOfs);
// Fist-to-mem64 FistSrc
// Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
@@ -20135,20 +20593,30 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
*DAG.getContext(), TheVT);
SDValue Cmp;
if (IsStrict) {
- Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT,
- Chain, /*IsSignaling*/ true);
+ Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE, Chain,
+ /*IsSignaling*/ true);
Chain = Cmp.getValue(1);
} else {
- Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT);
+ Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE);
}
- Adjust = DAG.getSelect(DL, MVT::i64, Cmp,
- DAG.getConstant(0, DL, MVT::i64),
- DAG.getConstant(APInt::getSignMask(64),
- DL, MVT::i64));
- SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp,
- DAG.getConstantFP(0.0, DL, TheVT),
- ThreshVal);
+ // Our preferred lowering of
+ //
+ // (Value >= Thresh) ? 0x8000000000000000ULL : 0
+ //
+ // is
+ //
+ // (Value >= Thresh) << 63
+ //
+ // but since we can get here after LegalOperations, DAGCombine might do the
+ // wrong thing if we create a select. So, directly create the preferred
+ // version.
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Cmp);
+ SDValue Const63 = DAG.getConstant(63, DL, MVT::i8);
+ Adjust = DAG.getNode(ISD::SHL, DL, MVT::i64, Zext, Const63);
+
+ SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp, ThreshVal,
+ DAG.getConstantFP(0.0, DL, TheVT));
if (IsStrict) {
Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other},
@@ -20607,30 +21075,29 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
+ In = DAG.getBitcast(MVT::v8i32, In);
+
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget.hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
- In = DAG.getBitcast(MVT::v8i32, In);
In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
DAG.getIntPtrConstant(0, DL));
}
- SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(0, DL));
- SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
- DAG.getIntPtrConstant(2, DL));
- OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
- OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(4, DL));
static const int ShufMask[] = {0, 2, 4, 6};
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask);
}
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
+ In = DAG.getBitcast(MVT::v32i8, In);
+
// On AVX2, v8i32 -> v8i16 becomes PSHUFB.
if (Subtarget.hasInt256()) {
- In = DAG.getBitcast(MVT::v32i8, In);
-
// The PSHUFB mask:
static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1,
@@ -20639,21 +21106,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
In = DAG.getBitcast(MVT::v4i64, In);
- static const int ShufMask2[] = {0, 2, -1, -1};
- In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
- In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
- DAG.getIntPtrConstant(0, DL));
- return DAG.getBitcast(VT, In);
+ static const int ShufMask2[] = {0, 2, -1, -1};
+ In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16,
+ DAG.getBitcast(MVT::v16i16, In),
+ DAG.getIntPtrConstant(0, DL));
}
- SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In,
DAG.getIntPtrConstant(0, DL));
-
- SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
- DAG.getIntPtrConstant(4, DL));
-
- OpLo = DAG.getBitcast(MVT::v16i8, OpLo);
- OpHi = DAG.getBitcast(MVT::v16i8, OpHi);
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In,
+ DAG.getIntPtrConstant(16, DL));
// The PSHUFB mask:
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
@@ -20989,6 +21452,155 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
}
+SDValue
+X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
+ // This is based on the TargetLowering::expandFP_TO_INT_SAT implementation,
+ // but making use of X86 specifics to produce better instruction sequences.
+ SDNode *Node = Op.getNode();
+ bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
+ unsigned FpToIntOpcode = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
+ SDLoc dl(SDValue(Node, 0));
+ SDValue Src = Node->getOperand(0);
+
+ // There are three types involved here: SrcVT is the source floating point
+ // type, DstVT is the type of the result, and TmpVT is the result of the
+ // intermediate FP_TO_*INT operation we'll use (which may be a promotion of
+ // DstVT).
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+ EVT TmpVT = DstVT;
+
+ // This code is only for floats and doubles. Fall back to generic code for
+ // anything else.
+ if (!isScalarFPTypeInSSEReg(SrcVT))
+ return SDValue();
+
+ unsigned SatWidth = Node->getConstantOperandVal(1);
+ unsigned DstWidth = DstVT.getScalarSizeInBits();
+ unsigned TmpWidth = TmpVT.getScalarSizeInBits();
+ assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
+ "Expected saturation width smaller than result width");
+
+ // Promote result of FP_TO_*INT to at least 32 bits.
+ if (TmpWidth < 32) {
+ TmpVT = MVT::i32;
+ TmpWidth = 32;
+ }
+
+ // Promote conversions to unsigned 32-bit to 64-bit, because it will allow
+ // us to use a native signed conversion instead.
+ if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) {
+ TmpVT = MVT::i64;
+ TmpWidth = 64;
+ }
+
+ // If the saturation width is smaller than the size of the temporary result,
+ // we can always use signed conversion, which is native.
+ if (SatWidth < TmpWidth)
+ FpToIntOpcode = ISD::FP_TO_SINT;
+
+ // Determine minimum and maximum integer values and their corresponding
+ // floating-point values.
+ APInt MinInt, MaxInt;
+ if (IsSigned) {
+ MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
+ MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
+ } else {
+ MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
+ MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
+ }
+
+ APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
+ APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
+
+ APFloat::opStatus MinStatus = MinFloat.convertFromAPInt(
+ MinInt, IsSigned, APFloat::rmTowardZero);
+ APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt(
+ MaxInt, IsSigned, APFloat::rmTowardZero);
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact)
+ && !(MaxStatus & APFloat::opStatus::opInexact);
+
+ SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
+ SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
+
+ // If the integer bounds are exactly representable as floats, emit a
+ // min+max+fptoi sequence. Otherwise use comparisons and selects.
+ if (AreExactFloatBounds) {
+ if (DstVT != TmpVT) {
+ // Clamp by MinFloat from below. If Src is NaN, propagate NaN.
+ SDValue MinClamped = DAG.getNode(
+ X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src);
+ // Clamp by MaxFloat from above. If Src is NaN, propagate NaN.
+ SDValue BothClamped = DAG.getNode(
+ X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped);
+ // Convert clamped value to integer.
+ SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped);
+
+ // NaN will become INDVAL, with the top bit set and the rest zero.
+ // Truncation will discard the top bit, resulting in zero.
+ return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
+ }
+
+ // Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
+ SDValue MinClamped = DAG.getNode(
+ X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
+ // Clamp by MaxFloat from above. NaN cannot occur.
+ SDValue BothClamped = DAG.getNode(
+ X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
+ // Convert clamped value to integer.
+ SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);
+
+ if (!IsSigned) {
+ // In the unsigned case we're done, because we mapped NaN to MinFloat,
+ // which is zero.
+ return FpToInt;
+ }
+
+ // Otherwise, select zero if Src is NaN.
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
+ return DAG.getSelectCC(
+ dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
+ }
+
+ SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
+ SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
+
+ // Result of direct conversion, which may be selected away.
+ SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, Src);
+
+ if (DstVT != TmpVT) {
+ // NaN will become INDVAL, with the top bit set and the rest zero.
+ // Truncation will discard the top bit, resulting in zero.
+ FpToInt = DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
+ }
+
+ SDValue Select = FpToInt;
+ // For signed conversions where we saturate to the same size as the
+ // result type of the fptoi instructions, INDVAL coincides with integer
+ // minimum, so we don't need to explicitly check it.
+ if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) {
+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
+ // MinInt if Src is NaN.
+ Select = DAG.getSelectCC(
+ dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT);
+ }
+
+ // If Src OGT MaxFloat, select MaxInt.
+ Select = DAG.getSelectCC(
+ dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT);
+
+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
+ // is already zero. The promoted case was already handled above.
+ if (!IsSigned || DstVT != TmpVT) {
+ return Select;
+ }
+
+ // Otherwise, select 0 if Src is NaN.
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
+ return DAG.getSelectCC(
+ dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
+}
+
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
@@ -20997,10 +21609,8 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
MVT SVT = In.getSimpleValueType();
- if (VT == MVT::f128) {
- RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
- return LowerF128Call(Op, DAG, LC);
- }
+ if (VT == MVT::f128)
+ return SDValue();
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
@@ -21014,31 +21624,12 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
-
- MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
- MVT SVT = In.getSimpleValueType();
-
// It's legal except when f128 is involved
- if (SVT != MVT::f128)
+ if (In.getSimpleValueType() != MVT::f128)
return Op;
- RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, VT);
-
- // FP_ROUND node has a second operand indicating whether it is known to be
- // precise. That doesn't take part in the LibCall so we can't directly use
- // LowerF128Call.
-
- SDLoc dl(Op);
- SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
- MakeLibCallOptions CallOptions;
- std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, In, CallOptions,
- dl, Chain);
-
- if (IsStrict)
- return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
-
- return Tmp.first;
+ return SDValue();
}
static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -21403,8 +21994,7 @@ static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
if (M == SrcOpMap.end()) {
VT = Src.getValueType();
// Quit if not the same type.
- if (SrcOpMap.begin() != SrcOpMap.end() &&
- VT != SrcOpMap.begin()->first.getValueType())
+ if (!SrcOpMap.empty() && VT != SrcOpMap.begin()->first.getValueType())
return false;
unsigned NumElts = VT.getVectorNumElements();
APInt EltCount = APInt::getNullValue(NumElts);
@@ -21442,8 +22032,11 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
const X86Subtarget &Subtarget,
SelectionDAG &DAG, X86::CondCode &X86CC) {
EVT VT = V.getValueType();
- assert(Mask.getBitWidth() == VT.getScalarSizeInBits() &&
- "Element Mask vs Vector bitwidth mismatch");
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ if (Mask.getBitWidth() != ScalarSize) {
+ assert(ScalarSize == 1 && "Element Mask vs Vector bitwidth mismatch");
+ return SDValue();
+ }
assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode");
X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE);
@@ -22347,7 +22940,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
- if (VT.getSizeInBits() > Op.getSimpleValueType().getSizeInBits()) {
+ if (VT.getFixedSizeInBits() >
+ Op.getSimpleValueType().getFixedSizeInBits()) {
// We emitted a compare with an XMM/YMM result. Finish converting to a
// mask register using a vptestm.
EVT CastVT = EVT(VT).changeVectorElementTypeToInteger();
@@ -22522,8 +23116,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
// Try to use SUBUS and PCMPEQ.
- if (SDValue V = LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG))
- return V;
+ if (FlipSigns)
+ if (SDValue V =
+ LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG))
+ return V;
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
@@ -23318,7 +23914,7 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
MVT SVT = VT.getVectorElementType();
MVT InSVT = InVT.getVectorElementType();
- assert(SVT.getSizeInBits() > InSVT.getSizeInBits());
+ assert(SVT.getFixedSizeInBits() > InSVT.getFixedSizeInBits());
if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
return SDValue();
@@ -23493,7 +24089,8 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
std::tie(Value0, Value1) = splitVector(StoredVal, DAG, DL);
unsigned HalfOffset = Value0.getValueType().getStoreSize();
SDValue Ptr0 = Store->getBasePtr();
- SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, HalfOffset, DL);
+ SDValue Ptr1 =
+ DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(HalfOffset), DL);
SDValue Ch0 =
DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),
Store->getOriginalAlign(),
@@ -23528,7 +24125,8 @@ static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT,
SmallVector<SDValue, 4> Stores;
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Offset = i * ScalarSize;
- SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(), Offset, DL);
+ SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(),
+ TypeSize::Fixed(Offset), DL);
SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal,
DAG.getIntPtrConstant(i, DL));
SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr,
@@ -23549,17 +24147,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
if (StoredVal.getValueType().isVector() &&
StoredVal.getValueType().getVectorElementType() == MVT::i1) {
- assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
- "Unexpected VT");
+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
+ assert(NumElts <= 8 && "Unexpected VT");
assert(!St->isTruncatingStore() && "Expected non-truncating store");
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
"Expected AVX512F without AVX512DQI");
+ // We must pad with zeros to ensure we store zeroes to any unused bits.
StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
DAG.getUNDEF(MVT::v16i1), StoredVal,
DAG.getIntPtrConstant(0, dl));
StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
+ // Make sure we store zeros in the extra bits.
+ if (NumElts < 8)
+ StoredVal = DAG.getZeroExtendInReg(
+ StoredVal, dl, EVT::getIntegerVT(*DAG.getContext(), NumElts));
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
St->getPointerInfo(), St->getOriginalAlign(),
@@ -23815,7 +24418,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Result;
if (!Lower) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
@@ -23916,7 +24519,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getMemBasePlusOffset(FIN, 4, DL);
+ FIN = DAG.getMemBasePlusOffset(FIN, TypeSize::Fixed(4), DL);
Store = DAG.getStore(
Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN,
@@ -23981,15 +24584,18 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
Subtarget.hasSSE1());
}
- // Insert VAARG_64 node into the DAG
- // VAARG_64 returns two values: Variable Argument Address, Chain
- SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32),
- DAG.getConstant(ArgMode, dl, MVT::i8),
- DAG.getConstant(Align, dl, MVT::i32)};
+ // Insert VAARG node into the DAG
+ // VAARG returns two values: Variable Argument Address, Chain
+ SDValue InstOps[] = {Chain, SrcPtr,
+ DAG.getTargetConstant(ArgSize, dl, MVT::i32),
+ DAG.getTargetConstant(ArgMode, dl, MVT::i8),
+ DAG.getTargetConstant(Align, dl, MVT::i32)};
SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other);
SDValue VAARG = DAG.getMemIntrinsicNode(
- X86ISD::VAARG_64, dl, VTs, InstOps, MVT::i64, MachinePointerInfo(SV),
- /*Align=*/None, MachineMemOperand::MOLoad | MachineMemOperand::MOStore);
+ Subtarget.isTarget64BitLP64() ? X86ISD::VAARG_64 : X86ISD::VAARG_X32, dl,
+ VTs, InstOps, MVT::i64, MachinePointerInfo(SV),
+ /*Alignment=*/None,
+ MachineMemOperand::MOLoad | MachineMemOperand::MOStore);
Chain = VAARG.getValue(1);
// Load the next argument and return it
@@ -24013,9 +24619,11 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
SDLoc DL(Op);
- return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24, DL),
- Align(8), /*isVolatile*/ false, false, false,
- MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+ return DAG.getMemcpy(
+ Chain, DL, DstPtr, SrcPtr,
+ DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 24 : 16, DL),
+ Align(Subtarget.isTarget64BitLP64() ? 8 : 4), /*isVolatile*/ false, false,
+ false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
// Helper to get immediate/variable SSE shift opcode from other shift opcodes.
@@ -24462,6 +25070,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
+ if (IntrData->Type == INTR_TYPE_3OP_IMM8 &&
+ Src3.getValueType() != MVT::i8) {
+ Src3 = DAG.getTargetConstant(
+ cast<ConstantSDNode>(Src3)->getZExtValue() & 0xff, dl, MVT::i8);
+ }
+
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -24480,9 +25094,18 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
{Src1, Src2, Src3});
}
- case INTR_TYPE_4OP:
- return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
+ case INTR_TYPE_4OP_IMM8: {
+ assert(Op.getOperand(4)->getOpcode() == ISD::TargetConstant);
+ SDValue Src4 = Op.getOperand(4);
+ if (Src4.getValueType() != MVT::i8) {
+ Src4 = DAG.getTargetConstant(
+ cast<ConstantSDNode>(Src4)->getZExtValue() & 0xff, dl, MVT::i8);
+ }
+
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ Src4);
+ }
case INTR_TYPE_1OP_MASK: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
@@ -24715,20 +25338,21 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case CMP_MASK_CC: {
MVT MaskVT = Op.getSimpleValueType();
SDValue CC = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
if (IntrData->Opc1 != 0) {
- SDValue Sae = Op.getOperand(4);
+ SDValue Sae = Op.getOperand(5);
if (isRoundModeSAE(Sae))
return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2), CC, Sae);
+ Op.getOperand(2), CC, Mask, Sae);
if (!isRoundModeCurDirection(Sae))
return SDValue();
}
//default rounding mode
return DAG.getNode(IntrData->Opc0, dl, MaskVT,
- {Op.getOperand(1), Op.getOperand(2), CC});
+ {Op.getOperand(1), Op.getOperand(2), CC, Mask});
}
case CMP_MASK_SCALAR_CC: {
SDValue Src1 = Op.getOperand(1);
@@ -24883,12 +25507,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), Op.getOperand(2), RoundingMode);
}
case BEXTRI: {
- assert(IntrData->Opc0 == X86ISD::BEXTR && "Unexpected opcode");
+ assert(IntrData->Opc0 == X86ISD::BEXTRI && "Unexpected opcode");
- // The control is a TargetConstant, but we need to convert it to a
- // ConstantSDNode.
uint64_t Imm = Op.getConstantOperandVal(2);
- SDValue Control = DAG.getConstant(Imm, dl, Op.getValueType());
+ SDValue Control = DAG.getTargetConstant(Imm & 0xffff, dl,
+ Op.getValueType());
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Control);
}
@@ -25279,9 +25902,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// MMX register.
ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
- DAG.getConstant(NewIntrinsic, DL, MVT::i32),
+ DAG.getTargetConstant(NewIntrinsic, DL,
+ getPointerTy(DAG.getDataLayout())),
Op.getOperand(1), ShAmt);
-
}
}
}
@@ -25650,6 +26273,96 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));
}
+ case Intrinsic::x86_aesenc128kl:
+ case Intrinsic::x86_aesdec128kl:
+ case Intrinsic::x86_aesenc256kl:
+ case Intrinsic::x86_aesdec256kl: {
+ SDLoc DL(Op);
+ SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::i32, MVT::Other);
+ SDValue Chain = Op.getOperand(0);
+ unsigned Opcode;
+
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case Intrinsic::x86_aesenc128kl:
+ Opcode = X86ISD::AESENC128KL;
+ break;
+ case Intrinsic::x86_aesdec128kl:
+ Opcode = X86ISD::AESDEC128KL;
+ break;
+ case Intrinsic::x86_aesenc256kl:
+ Opcode = X86ISD::AESENC256KL;
+ break;
+ case Intrinsic::x86_aesdec256kl:
+ Opcode = X86ISD::AESDEC256KL;
+ break;
+ }
+
+ MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+ MachineMemOperand *MMO = MemIntr->getMemOperand();
+ EVT MemVT = MemIntr->getMemoryVT();
+ SDValue Operation = DAG.getMemIntrinsicNode(
+ Opcode, DL, VTs, {Chain, Op.getOperand(2), Op.getOperand(3)}, MemVT,
+ MMO);
+ SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(1), DL, DAG);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
+ {ZF, Operation.getValue(0), Operation.getValue(2)});
+ }
+ case Intrinsic::x86_aesencwide128kl:
+ case Intrinsic::x86_aesdecwide128kl:
+ case Intrinsic::x86_aesencwide256kl:
+ case Intrinsic::x86_aesdecwide256kl: {
+ SDLoc DL(Op);
+ SDVTList VTs = DAG.getVTList(
+ {MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64,
+ MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other});
+ SDValue Chain = Op.getOperand(0);
+ unsigned Opcode;
+
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case Intrinsic::x86_aesencwide128kl:
+ Opcode = X86ISD::AESENCWIDE128KL;
+ break;
+ case Intrinsic::x86_aesdecwide128kl:
+ Opcode = X86ISD::AESDECWIDE128KL;
+ break;
+ case Intrinsic::x86_aesencwide256kl:
+ Opcode = X86ISD::AESENCWIDE256KL;
+ break;
+ case Intrinsic::x86_aesdecwide256kl:
+ Opcode = X86ISD::AESDECWIDE256KL;
+ break;
+ }
+
+ MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+ MachineMemOperand *MMO = MemIntr->getMemOperand();
+ EVT MemVT = MemIntr->getMemoryVT();
+ SDValue Operation = DAG.getMemIntrinsicNode(
+ Opcode, DL, VTs,
+ {Chain, Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
+ Op.getOperand(5), Op.getOperand(6), Op.getOperand(7),
+ Op.getOperand(8), Op.getOperand(9), Op.getOperand(10)},
+ MemVT, MMO);
+ SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(0), DL, DAG);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
+ {ZF, Operation.getValue(1), Operation.getValue(2),
+ Operation.getValue(3), Operation.getValue(4),
+ Operation.getValue(5), Operation.getValue(6),
+ Operation.getValue(7), Operation.getValue(8),
+ Operation.getValue(9)});
+ }
+ case Intrinsic::x86_testui: {
+ SDLoc dl(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+ SDValue Operation = DAG.getNode(X86ISD::TESTUI, dl, VTs, Chain);
+ SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
+ Operation.getValue(1));
+ }
}
return SDValue();
}
@@ -26020,9 +26733,8 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(2, dl, MVT::i64));
- OutChains[1] =
- DAG.getStore(Root, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 2),
- /* Alignment = */ 2);
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 2), Align(2));
// Load the 'nest' parameter value into R10.
// R10 is specified in X86CallingConv.td
@@ -26034,9 +26746,8 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(12, dl, MVT::i64));
- OutChains[3] =
- DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 12),
- /* Alignment = */ 2);
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12), Align(2));
// Jump to the nested function.
OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
@@ -26078,7 +26789,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
if (Attrs.hasAttribute(Idx, Attribute::InReg)) {
- auto &DL = DAG.getDataLayout();
+ const DataLayout &DL = DAG.getDataLayout();
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32;
}
@@ -26116,22 +26827,20 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(1, dl, MVT::i32));
- OutChains[1] =
- DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 1),
- /* Alignment = */ 1);
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 1), Align(1));
const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(5, dl, MVT::i32));
- OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8),
- Addr, MachinePointerInfo(TrmpAddr, 5),
- /* Alignment = */ 1);
+ OutChains[2] =
+ DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), Addr,
+ MachinePointerInfo(TrmpAddr, 5), Align(1));
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(6, dl, MVT::i32));
- OutChains[3] =
- DAG.getStore(Root, dl, Disp, Addr, MachinePointerInfo(TrmpAddr, 6),
- /* Alignment = */ 1);
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+ MachinePointerInfo(TrmpAddr, 6), Align(1));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
@@ -26425,50 +27134,47 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
MVT VT = Op.getSimpleValueType();
SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
unsigned Opcode = Op.getOpcode();
+ SDLoc DL(Op);
+
if (VT.getScalarType() == MVT::i1) {
- SDLoc dl(Op);
switch (Opcode) {
default: llvm_unreachable("Expected saturated arithmetic opcode");
case ISD::UADDSAT:
case ISD::SADDSAT:
// *addsat i1 X, Y --> X | Y
- return DAG.getNode(ISD::OR, dl, VT, X, Y);
+ return DAG.getNode(ISD::OR, DL, VT, X, Y);
case ISD::USUBSAT:
case ISD::SSUBSAT:
// *subsat i1 X, Y --> X & ~Y
- return DAG.getNode(ISD::AND, dl, VT, X, DAG.getNOT(dl, Y, VT));
+ return DAG.getNode(ISD::AND, DL, VT, X, DAG.getNOT(DL, Y, VT));
}
}
- if (VT.is128BitVector()) {
- // Avoid the generic expansion with min/max if we don't have pminu*/pmaxu*.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), VT);
- SDLoc DL(Op);
- if (Opcode == ISD::UADDSAT && !TLI.isOperationLegal(ISD::UMIN, VT)) {
- // uaddsat X, Y --> (X >u (X + Y)) ? -1 : X + Y
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, X, Y);
- SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Add, ISD::SETUGT);
- return DAG.getSelect(DL, VT, Cmp, DAG.getAllOnesConstant(DL, VT), Add);
- }
- if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
- // usubsat X, Y --> (X >u Y) ? X - Y : 0
- SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
- SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
- return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
- }
- // Use default expansion.
- return SDValue();
+ if (VT == MVT::v32i16 || VT == MVT::v64i8 ||
+ (VT.is256BitVector() && !Subtarget.hasInt256())) {
+ assert(Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX vector integer operation");
+ return splitVectorIntBinary(Op, DAG);
}
- if (VT == MVT::v32i16 || VT == MVT::v64i8)
- return splitVectorIntBinary(Op, DAG);
+ // Avoid the generic expansion with min/max if we don't have pminu*/pmaxu*.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT SetCCResultType =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- assert(Op.getSimpleValueType().is256BitVector() &&
- Op.getSimpleValueType().isInteger() &&
- "Only handle AVX 256-bit vector integer operation");
- return splitVectorIntBinary(Op, DAG);
+ if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
+ // usubsat X, Y --> (X >u Y) ? X - Y : 0
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
+ SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
+ // TODO: Move this to DAGCombiner?
+ if (SetCCResultType == VT &&
+ DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
+ return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
+ return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
+ }
+
+ // Use default expansion.
+ return SDValue();
}
static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
@@ -26518,36 +27224,8 @@ static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
if (VT == MVT::v32i16 || VT == MVT::v64i8)
return splitVectorIntBinary(Op, DAG);
- SDLoc DL(Op);
- unsigned Opcode = Op.getOpcode();
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
-
- // For pre-SSE41, we can perform UMIN/UMAX v8i16 by flipping the signbit,
- // using the SMIN/SMAX instructions and flipping the signbit back.
- if (VT == MVT::v8i16) {
- assert((Opcode == ISD::UMIN || Opcode == ISD::UMAX) &&
- "Unexpected MIN/MAX opcode");
- SDValue Sign = DAG.getConstant(APInt::getSignedMinValue(16), DL, VT);
- N0 = DAG.getNode(ISD::XOR, DL, VT, N0, Sign);
- N1 = DAG.getNode(ISD::XOR, DL, VT, N1, Sign);
- Opcode = (Opcode == ISD::UMIN ? ISD::SMIN : ISD::SMAX);
- SDValue Result = DAG.getNode(Opcode, DL, VT, N0, N1);
- return DAG.getNode(ISD::XOR, DL, VT, Result, Sign);
- }
-
- // Else, expand to a compare/select.
- ISD::CondCode CC;
- switch (Opcode) {
- case ISD::SMIN: CC = ISD::CondCode::SETLT; break;
- case ISD::SMAX: CC = ISD::CondCode::SETGT; break;
- case ISD::UMIN: CC = ISD::CondCode::SETULT; break;
- case ISD::UMAX: CC = ISD::CondCode::SETUGT; break;
- default: llvm_unreachable("Unknown MINMAX opcode");
- }
-
- SDValue Cond = DAG.getSetCC(DL, VT, N0, N1, CC);
- return DAG.getSelect(DL, VT, Cond, N0, N1);
+ // Default to expand.
+ return SDValue();
}
static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
@@ -26903,8 +27581,6 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break;
case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break;
case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break;
- case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break;
- case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break;
}
SDLoc dl(Op);
@@ -26921,8 +27597,8 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
Entry.Node = StackPtr;
- InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr,
- MPI, /* Alignment = */ 16);
+ InChain =
+ DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MPI, Align(16));
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Ty = PointerType::get(ArgTy,0);
Entry.IsSExt = false;
@@ -27213,6 +27889,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
MVT VT = Amt.getSimpleValueType();
if (!(VT == MVT::v8i16 || VT == MVT::v4i32 ||
(Subtarget.hasInt256() && VT == MVT::v16i16) ||
+ (Subtarget.hasVBMI2() && VT == MVT::v32i16) ||
(!Subtarget.hasAVX512() && VT == MVT::v16i8)))
return SDValue();
@@ -27790,6 +28467,12 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}
+ // AVX512 VBMI2 vXi16 - lower to funnel shifts.
+ if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) {
+ unsigned FunnelOpc = (Opcode == ISD::ROTL ? ISD::FSHL : ISD::FSHR);
+ return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
+ }
+
assert((Opcode == ISD::ROTL) && "Only ROTL supported");
// XOP has 128-bit vector variable + immediate rotates.
@@ -27816,7 +28499,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return splitVectorIntBinary(Op, DAG);
assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
- ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
+ ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8 ||
+ VT == MVT::v32i16) &&
Subtarget.hasAVX2())) &&
"Only vXi32/vXi16/vXi8 vector rotates supported");
@@ -28113,8 +28797,8 @@ bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
/// a) very likely accessed only by a single thread to minimize cache traffic,
/// and b) definitely dereferenceable. Returns the new Chain result.
static SDValue emitLockedStackOp(SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- SDValue Chain, SDLoc DL) {
+ const X86Subtarget &Subtarget, SDValue Chain,
+ const SDLoc &DL) {
// Implementation notes:
// 1) LOCK prefix creates a full read/write reordering barrier for memory
// operations issued by the current processor. As such, the location
@@ -28552,18 +29236,28 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
SDValue In = Op.getOperand(0);
SDLoc DL(Op);
+ assert(VT.getScalarType() == MVT::i8 &&
+ "Only byte vector BITREVERSE supported");
+
// Split v64i8 without BWI so that we can still use the PSHUFB lowering.
if (VT == MVT::v64i8 && !Subtarget.hasBWI())
return splitVectorIntUnary(Op, DAG);
- unsigned NumElts = VT.getVectorNumElements();
- assert(VT.getScalarType() == MVT::i8 &&
- "Only byte vector BITREVERSE supported");
-
// Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.
- if (VT.is256BitVector() && !Subtarget.hasInt256())
+ if (VT == MVT::v32i8 && !Subtarget.hasInt256())
return splitVectorIntUnary(Op, DAG);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If we have GFNI, we can use GF2P8AFFINEQB to reverse the bits.
+ if (Subtarget.hasGFNI()) {
+ MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8);
+ SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT);
+ Matrix = DAG.getBitcast(VT, Matrix);
+ return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, In, Matrix,
+ DAG.getTargetConstant(0, DL, MVT::i8));
+ }
+
// Perform BITREVERSE using PSHUFB lookups. Each byte is split into
// two nibbles and a PSHUFB lookup to find the bitreverse of each
// 0-15 value (moved to the other nibble).
@@ -28595,6 +29289,58 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
}
+static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ SDValue X = Op.getOperand(0);
+ MVT VT = Op.getSimpleValueType();
+
+ // Special case. If the input fits in 8-bits we can use a single 8-bit TEST.
+ if (VT == MVT::i8 ||
+ DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) {
+ X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
+ SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X,
+ DAG.getConstant(0, DL, MVT::i8));
+ // Copy the inverse of the parity flag into a register with setcc.
+ SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
+ // Extend to the original type.
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);
+ }
+
+ if (VT == MVT::i64) {
+ // Xor the high and low 16-bits together using a 32-bit operation.
+ SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i64, X,
+ DAG.getConstant(32, DL, MVT::i8)));
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+ X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi);
+ }
+
+ if (VT != MVT::i16) {
+ // Xor the high and low 16-bits together using a 32-bit operation.
+ SDValue Hi16 = DAG.getNode(ISD::SRL, DL, MVT::i32, X,
+ DAG.getConstant(16, DL, MVT::i8));
+ X = DAG.getNode(ISD::XOR, DL, MVT::i32, X, Hi16);
+ } else {
+ // If the input is 16-bits, we need to extend to use an i32 shift below.
+ X = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, X);
+ }
+
+ // Finally xor the low 2 bytes together and use a 8-bit flag setting xor.
+ // This should allow an h-reg to be used to save a shift.
+ SDValue Hi = DAG.getNode(
+ ISD::TRUNCATE, DL, MVT::i8,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, X, DAG.getConstant(8, DL, MVT::i8)));
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
+ SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32);
+ SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);
+
+ // Copy the inverse of the parity flag into a register with setcc.
+ SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
+ // Extend to the original type.
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);
+}
+
static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
unsigned NewOpc = 0;
@@ -28731,7 +29477,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
Chain =
DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr,
- MPI, /*Align*/ 0, MachineMemOperand::MOStore);
+ MPI, MaybeAlign(), MachineMemOperand::MOStore);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue LdOps[] = {Chain, StackPtr};
SDValue Value =
@@ -28771,6 +29517,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
MVT VT = N->getSimpleValueType(0);
+ unsigned Opc = Op.getOpcode();
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@@ -28785,11 +29532,14 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
Carry, DAG.getAllOnesConstant(DL, CarryVT));
- unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
- SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
- Op.getOperand(1), Carry.getValue(1));
+ bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY;
+ SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
+ Op.getOperand(0), Op.getOperand(1),
+ Carry.getValue(1));
- SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG);
+ bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY;
+ SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B,
+ Sum.getValue(1), DL, DAG);
if (N->getValueType(1) == MVT::i1)
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
@@ -29165,25 +29915,6 @@ SDValue X86TargetLowering::LowerGC_TRANSITION(SDValue Op,
return NOOP;
}
-SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
- RTLIB::Libcall Call) const {
-
- bool IsStrict = Op->isStrictFPOpcode();
- unsigned Offset = IsStrict ? 1 : 0;
- SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end());
-
- SDLoc dl(Op);
- SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
- MakeLibCallOptions CallOptions;
- std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, Call, MVT::f128, Ops,
- CallOptions, dl, Chain);
-
- if (IsStrict)
- return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
-
- return Tmp.first;
-}
-
// Custom split CVTPS2PH with wide types.
static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -29213,6 +29944,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget);
case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG);
+ case ISD::PARITY: return LowerPARITY(Op, Subtarget, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG);
@@ -29247,6 +29979,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG);
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::FP_ROUND:
@@ -29313,6 +30047,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:
@@ -29338,35 +30074,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
-/// Places new result values for the node in Results (their number
-/// and types must exactly match those of the original return values of
-/// the node), or leaves Results empty, which indicates that the node is not
-/// to be custom lowered after all.
-void X86TargetLowering::LowerOperationWrapper(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
- SDValue Res = LowerOperation(SDValue(N, 0), DAG);
-
- if (!Res.getNode())
- return;
-
- // If the original node has one result, take the return value from
- // LowerOperation as is. It might not be result number 0.
- if (N->getNumValues() == 1) {
- Results.push_back(Res);
- return;
- }
-
- // If the original node has multiple results, then the return node should
- // have the same number of results.
- assert((N->getNumValues() == Res->getNumValues()) &&
- "Lowering returned the wrong number of results!");
-
- // Places new result values base on N result number.
- for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
- Results.push_back(Res.getValue(I));
-}
-
/// Replace a node with an illegal result type with a new node built out of
/// custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
@@ -29409,6 +30116,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Chain);
return;
}
+ case X86ISD::CVTPS2PH:
+ Results.push_back(LowerCVTPS2PH(SDValue(N, 0), DAG));
+ return;
case ISD::CTPOP: {
assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
// Use a v2i64 if possible.
@@ -29477,28 +30187,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res);
return;
}
- case ISD::ABS: {
- assert(N->getValueType(0) == MVT::i64 &&
- "Unexpected type (!= i64) on ABS.");
- MVT HalfT = MVT::i32;
- SDValue Lo, Hi, Tmp;
- SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
-
- Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
- DAG.getConstant(0, dl, HalfT));
- Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
- DAG.getConstant(1, dl, HalfT));
- Tmp = DAG.getNode(
- ISD::SRA, dl, HalfT, Hi,
- DAG.getShiftAmountConstant(HalfT.getSizeInBits() - 1, HalfT, dl));
- Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
- Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
- SDValue(Lo.getNode(), 1));
- Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
- Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi));
- return;
- }
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
case X86ISD::FMINC:
case X86ISD::FMIN:
@@ -29539,10 +30227,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- LLVM_FALLTHROUGH;
- }
- case ISD::SDIVREM:
- case ISD::UDIVREM: {
SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
Results.push_back(V);
return;
@@ -29676,7 +30360,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
assert(isTypeLegal(LoVT) && "Split VT not legal?");
- SDValue Lo = getExtendInVec(N->getOpcode(), dl, LoVT, In, DAG);
+ SDValue Lo = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, LoVT, In, DAG);
// We need to shift the input over by half the number of elements.
unsigned NumElts = InVT.getVectorNumElements();
@@ -29686,7 +30370,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
ShufMask[i] = i + HalfNumElts;
SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
- Hi = getExtendInVec(N->getOpcode(), dl, HiVT, Hi, DAG);
+ Hi = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, HiVT, Hi, DAG);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
Results.push_back(Res);
@@ -30037,46 +30721,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
swapInH =
DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX,
swapInH, cpInH.getValue(1));
- // If the current function needs the base pointer, RBX,
- // we shouldn't use cmpxchg directly.
- // Indeed the lowering of that instruction will clobber
- // that register and since RBX will be a reserved register
- // the register allocator will not make sure its value will
- // be properly saved and restored around this live-range.
- const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+ // In 64-bit mode we might need the base pointer in RBX, but we can't know
+ // until later. So we keep the RBX input in a vreg and use a custom
+ // inserter.
+ // Since RBX will be a reserved register the register allocator will not
+ // make sure its value will be properly saved and restored around this
+ // live-range.
SDValue Result;
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- Register BasePtr = TRI->getBaseRegister();
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
- if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
- (BasePtr == X86::RBX || BasePtr == X86::EBX)) {
- // ISel prefers the LCMPXCHG64 variant.
- // If that assert breaks, that means it is not the case anymore,
- // and we need to teach LCMPXCHG8_SAVE_EBX_DAG how to save RBX,
- // not just EBX. This is a matter of accepting i64 input for that
- // pseudo, and restoring into the register of the right wide
- // in expand pseudo. Everything else should just work.
- assert(((Regs64bit == (BasePtr == X86::RBX)) || BasePtr == X86::EBX) &&
- "Saving only half of the RBX");
- unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_SAVE_RBX_DAG
- : X86ISD::LCMPXCHG8_SAVE_EBX_DAG;
- SDValue RBXSave = DAG.getCopyFromReg(swapInH.getValue(0), dl,
- Regs64bit ? X86::RBX : X86::EBX,
- HalfT, swapInH.getValue(1));
- SDValue Ops[] = {/*Chain*/ RBXSave.getValue(1), N->getOperand(1), swapInL,
- RBXSave,
- /*Glue*/ RBXSave.getValue(2)};
- Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
+ if (Regs64bit) {
+ SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL,
+ swapInH.getValue(1)};
+ Result =
+ DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_DAG, dl, Tys, Ops, T, MMO);
} else {
- unsigned Opcode =
- Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG;
- swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl,
- Regs64bit ? X86::RBX : X86::EBX, swapInL,
+ swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl, X86::EBX, swapInL,
swapInH.getValue(1));
SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1),
swapInL.getValue(1)};
- Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
+ Result =
+ DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, T, MMO);
}
+
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
Regs64bit ? X86::RAX : X86::EAX,
HalfT, Result.getValue(1));
@@ -30321,8 +30989,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(COMI)
NODE_NAME_CASE(UCOMI)
NODE_NAME_CASE(CMPM)
+ NODE_NAME_CASE(CMPMM)
NODE_NAME_CASE(STRICT_CMPM)
- NODE_NAME_CASE(CMPM_SAE)
+ NODE_NAME_CASE(CMPMM_SAE)
NODE_NAME_CASE(SETCC)
NODE_NAME_CASE(SETCC_CARRY)
NODE_NAME_CASE(FSETCC)
@@ -30381,7 +31050,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(LCMPXCHG_DAG)
NODE_NAME_CASE(LCMPXCHG8_DAG)
NODE_NAME_CASE(LCMPXCHG16_DAG)
- NODE_NAME_CASE(LCMPXCHG8_SAVE_EBX_DAG)
NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
NODE_NAME_CASE(LADD)
NODE_NAME_CASE(LSUB)
@@ -30441,6 +31109,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(XOR)
NODE_NAME_CASE(AND)
NODE_NAME_CASE(BEXTR)
+ NODE_NAME_CASE(BEXTRI)
NODE_NAME_CASE(BZHI)
NODE_NAME_CASE(PDEP)
NODE_NAME_CASE(PEXT)
@@ -30478,7 +31147,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VBROADCAST)
NODE_NAME_CASE(VBROADCAST_LOAD)
NODE_NAME_CASE(VBROADCASTM)
- NODE_NAME_CASE(SUBV_BROADCAST)
+ NODE_NAME_CASE(SUBV_BROADCAST_LOAD)
NODE_NAME_CASE(VPERMILPV)
NODE_NAME_CASE(VPERMILPI)
NODE_NAME_CASE(VPERM2X128)
@@ -30500,6 +31169,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(DBPSADBW)
NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
NODE_NAME_CASE(VAARG_64)
+ NODE_NAME_CASE(VAARG_X32)
NODE_NAME_CASE(WIN_ALLOCA)
NODE_NAME_CASE(MEMBARRIER)
NODE_NAME_CASE(MFENCE)
@@ -30656,6 +31326,15 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ENQCMD)
NODE_NAME_CASE(ENQCMDS)
NODE_NAME_CASE(VP2INTERSECT)
+ NODE_NAME_CASE(AESENC128KL)
+ NODE_NAME_CASE(AESDEC128KL)
+ NODE_NAME_CASE(AESENC256KL)
+ NODE_NAME_CASE(AESDEC256KL)
+ NODE_NAME_CASE(AESENCWIDE128KL)
+ NODE_NAME_CASE(AESDECWIDE128KL)
+ NODE_NAME_CASE(AESENCWIDE256KL)
+ NODE_NAME_CASE(AESDECWIDE256KL)
+ NODE_NAME_CASE(TESTUI)
}
return nullptr;
#undef NODE_NAME_CASE
@@ -31001,7 +31680,7 @@ static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr,
/// Utility function to emit xbegin specifying the start of an RTM region.
static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
const TargetInstrInfo *TII) {
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
@@ -31080,11 +31759,9 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
return sinkMBB;
}
-
-
MachineBasicBlock *
-X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
+X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
// Emit va_arg instruction on X86-64.
// Operands to this pseudo-instruction:
@@ -31095,9 +31772,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// 8 ) Align : Alignment of type
// 9 ) EFLAGS (implicit-def)
- assert(MI.getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
- static_assert(X86::AddrNumOperands == 5,
- "VAARG_64 assumes 5 address operands");
+ assert(MI.getNumOperands() == 10 && "VAARG should have 10 operands!");
+ static_assert(X86::AddrNumOperands == 5, "VAARG assumes 5 address operands");
Register DestReg = MI.getOperand(0).getReg();
MachineOperand &Base = MI.getOperand(1);
@@ -31112,7 +31788,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
MachineFunction *MF = MBB->getParent();
// Memory Reference
- assert(MI.hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+ assert(MI.hasOneMemOperand() && "Expected VAARG to have one memoperand");
MachineMemOperand *OldMMO = MI.memoperands().front();
@@ -31125,9 +31801,10 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Machine Information
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(getPointerTy(MBB->getParent()->getDataLayout()));
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
// struct va_list {
// i32 gp_offset
@@ -31236,25 +31913,35 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Read the reg_save_area address.
Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
- BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+ BuildMI(
+ offsetMBB, DL,
+ TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),
+ RegSaveReg)
.add(Base)
.add(Scale)
.add(Index)
- .addDisp(Disp, 16)
+ .addDisp(Disp, Subtarget.isTarget64BitLP64() ? 16 : 12)
.add(Segment)
.setMemRefs(LoadOnlyMMO);
- // Zero-extend the offset
- Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
- BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
- .addImm(0)
- .addReg(OffsetReg)
- .addImm(X86::sub_32bit);
+ if (Subtarget.isTarget64BitLP64()) {
+ // Zero-extend the offset
+ Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addImm(X86::sub_32bit);
- // Add the offset to the reg_save_area to get the final address.
- BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
- .addReg(OffsetReg64)
- .addReg(RegSaveReg);
+ // Add the offset to the reg_save_area to get the final address.
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ .addReg(OffsetReg64)
+ .addReg(RegSaveReg);
+ } else {
+ // Add the offset to the reg_save_area to get the final address.
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32rr), OffsetDestReg)
+ .addReg(OffsetReg)
+ .addReg(RegSaveReg);
+ }
// Compute the offset for the next argument
Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
@@ -31283,7 +31970,9 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Load the overflow_area address into a register.
Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
- BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+ BuildMI(overflowMBB, DL,
+ TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),
+ OverflowAddrReg)
.add(Base)
.add(Scale)
.add(Index)
@@ -31298,11 +31987,17 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
Register TmpReg = MRI.createVirtualRegister(AddrRegClass);
// aligned_addr = (addr + (align-1)) & ~(align-1)
- BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+ BuildMI(
+ overflowMBB, DL,
+ TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),
+ TmpReg)
.addReg(OverflowAddrReg)
.addImm(Alignment.value() - 1);
- BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+ BuildMI(
+ overflowMBB, DL,
+ TII->get(Subtarget.isTarget64BitLP64() ? X86::AND64ri32 : X86::AND32ri),
+ OverflowDestReg)
.addReg(TmpReg)
.addImm(~(uint64_t)(Alignment.value() - 1));
} else {
@@ -31313,12 +32008,16 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Compute the next overflow address after this argument.
// (the overflow address should be kept 8-byte aligned)
Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
- BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
- .addReg(OverflowDestReg)
- .addImm(ArgSizeA8);
+ BuildMI(
+ overflowMBB, DL,
+ TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),
+ NextAddrReg)
+ .addReg(OverflowDestReg)
+ .addImm(ArgSizeA8);
// Store the new overflow address.
- BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+ BuildMI(overflowMBB, DL,
+ TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64mr : X86::MOV32mr))
.add(Base)
.add(Scale)
.add(Index)
@@ -31374,10 +32073,10 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
// Now add the instructions.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
Register CountReg = MI.getOperand(0).getReg();
- int64_t RegSaveFrameIndex = MI.getOperand(1).getImm();
+ int RegSaveFrameIndex = MI.getOperand(1).getImm();
int64_t VarArgsFPOffset = MI.getOperand(2).getImm();
if (!Subtarget.isCallingConvWin64(F->getFunction().getCallingConv())) {
@@ -31686,7 +32385,7 @@ MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
MachineBasicBlock *ThisMBB) const {
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -31841,7 +32540,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const X86FrameLowering &TFI = *Subtarget.getFrameLowering();
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
const unsigned ProbeSize = getStackProbeSize(*MF);
@@ -31934,7 +32633,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
@@ -31969,7 +32668,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
const TargetRegisterClass *AddrRegClass =
getRegClassFor(getPointerTy(MF->getDataLayout()));
- unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+ Register mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
@@ -32069,7 +32768,7 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
MachineFunction *MF = BB->getParent();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
assert(!isAsynchronousEHPersonality(
classifyEHPersonality(MF->getFunction().getPersonalityFn())) &&
@@ -32107,7 +32806,7 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
// inside MC, therefore without the two markers shrink-wrapping
// may push the prologue/epilogue pass them.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
MachineFunction &MF = *BB->getParent();
// Emit CALLSEQ_START right before the instruction.
@@ -32136,7 +32835,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
// be in the normal return register.
MachineFunction *F = BB->getParent();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?");
assert(MI.getOperand(3).isGlobal() && "This should be a global");
@@ -32275,7 +32974,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
MachineBasicBlock *BB) const {
// Copy the virtual register into the R11 physical register and
// call the retpoline thunk.
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
Register CalleeVReg = MI.getOperand(0).getReg();
unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());
@@ -32337,7 +33036,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
/// \param [in] MBB The Machine Basic Block that will be modified.
void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -32380,7 +33079,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -32540,7 +33239,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -32721,7 +33420,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -32805,7 +33504,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB,
int FI) const {
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
@@ -32854,7 +33553,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *BB) const {
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
@@ -33084,7 +33783,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
auto TMMImmToTMMReg = [](unsigned Imm) {
assert (Imm < 8 && "Illegal tmm index");
@@ -33094,8 +33793,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
default: llvm_unreachable("Unexpected instr type to insert");
case X86::TLS_addr32:
case X86::TLS_addr64:
+ case X86::TLS_addrX32:
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
+ case X86::TLS_base_addrX32:
return EmitLoweredTLSAddr(MI, BB);
case X86::INDIRECT_THUNK_CALL32:
case X86::INDIRECT_THUNK_CALL64:
@@ -33251,7 +33952,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
case X86::VAARG_64:
- return EmitVAARG64WithCustomInserter(MI, BB);
+ case X86::VAARG_X32:
+ return EmitVAARGWithCustomInserter(MI, BB);
case X86::EH_SjLj_SetJmp32:
case X86::EH_SjLj_SetJmp64:
@@ -33274,10 +33976,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitPatchPoint(MI, BB);
case TargetOpcode::PATCHABLE_EVENT_CALL:
- return emitXRayCustomEvent(MI, BB);
-
case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
- return emitXRayTypedEvent(MI, BB);
+ return BB;
case X86::LCMPXCHG8B: {
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -33332,14 +34032,75 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return BB;
}
- case X86::LCMPXCHG16B:
+ case X86::LCMPXCHG16B_NO_RBX: {
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ Register BasePtr = TRI->getBaseRegister();
+ if (TRI->hasBasePointer(*MF) &&
+ (BasePtr == X86::RBX || BasePtr == X86::EBX)) {
+ if (!BB->isLiveIn(BasePtr))
+ BB->addLiveIn(BasePtr);
+ // Save RBX into a virtual register.
+ Register SaveRBX =
+ MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
+ .addReg(X86::RBX);
+ Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);
+ for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
+ MIB.add(MI.getOperand(Idx));
+ MIB.add(MI.getOperand(X86::AddrNumOperands));
+ MIB.addReg(SaveRBX);
+ } else {
+ // Simple case, just copy the virtual register to RBX.
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX)
+ .add(MI.getOperand(X86::AddrNumOperands));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B));
+ for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
+ MIB.add(MI.getOperand(Idx));
+ }
+ MI.eraseFromParent();
return BB;
- case X86::LCMPXCHG8B_SAVE_EBX:
- case X86::LCMPXCHG16B_SAVE_RBX: {
- unsigned BasePtr =
- MI.getOpcode() == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX;
- if (!BB->isLiveIn(BasePtr))
- BB->addLiveIn(BasePtr);
+ }
+ case X86::MWAITX: {
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ Register BasePtr = TRI->getBaseRegister();
+ bool IsRBX = (BasePtr == X86::RBX || BasePtr == X86::EBX);
+ // If no need to save the base pointer, we generate MWAITXrrr,
+ // else we generate pseudo MWAITX_SAVE_RBX.
+ if (!IsRBX || !TRI->hasBasePointer(*MF)) {
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI.getOperand(0).getReg());
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX)
+ .addReg(MI.getOperand(1).getReg());
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EBX)
+ .addReg(MI.getOperand(2).getReg());
+ BuildMI(*BB, MI, DL, TII->get(X86::MWAITXrrr));
+ MI.eraseFromParent();
+ } else {
+ if (!BB->isLiveIn(BasePtr)) {
+ BB->addLiveIn(BasePtr);
+ }
+ // Parameters can be copied into ECX and EAX but not EBX yet.
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI.getOperand(0).getReg());
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX)
+ .addReg(MI.getOperand(1).getReg());
+ assert(Subtarget.is64Bit() && "Expected 64-bit mode!");
+ // Save RBX into a virtual register.
+ Register SaveRBX =
+ MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
+ .addReg(X86::RBX);
+ // Generate mwaitx pseudo.
+ Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
+ BuildMI(*BB, MI, DL, TII->get(X86::MWAITX_SAVE_RBX))
+ .addDef(Dst) // Destination tied in with SaveRBX.
+ .addReg(MI.getOperand(2).getReg()) // input value of EBX.
+ .addUse(SaveRBX); // Save of base pointer.
+ MI.eraseFromParent();
+ }
return BB;
}
case TargetOpcode::PREALLOCATED_SETUP: {
@@ -33377,7 +34138,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTDPBUSD:
case X86::PTDPBUUD:
case X86::PTDPBF16PS: {
- const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc;
switch (MI.getOpcode()) {
case X86::PTDPBSSD: Opc = X86::TDPBSSD; break;
@@ -33397,7 +34157,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return BB;
}
case X86::PTILEZERO: {
- const DebugLoc &DL = MI.getDebugLoc();
unsigned Imm = MI.getOperand(0).getImm();
BuildMI(*BB, MI, DL, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
MI.eraseFromParent(); // The pseudo is gone now.
@@ -33406,7 +34165,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILELOADD:
case X86::PTILELOADDT1:
case X86::PTILESTORED: {
- const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc;
switch (MI.getOpcode()) {
case X86::PTILELOADD: Opc = X86::TILELOADD; break;
@@ -33607,13 +34365,11 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits Known2;
if (!!DemandedLHS) {
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
if (!!DemandedRHS) {
Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
if (Known.countMinLeadingZeros() < BitWidth)
@@ -33656,11 +34412,11 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
}
- case X86ISD::BEXTR: {
+ case X86ISD::BEXTR:
+ case X86ISD::BEXTRI: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -33682,6 +34438,28 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
}
+ case X86ISD::PDEP: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // Zeros are retained from the mask operand. But not ones.
+ Known.One.clearAllBits();
+ // The result will have at least as many trailing zeros as the non-mask
+ // operand since bits can only map to the same or higher bit position.
+ Known.Zero.setLowBits(Known2.countMinTrailingZeros());
+ break;
+ }
+ case X86ISD::PEXT: {
+ Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ // The result has as many leading zeros as the number of zeroes in the mask.
+ unsigned Count = Known.Zero.countPopulation();
+ Known.Zero = APInt::getHighBitsSet(BitWidth, Count);
+ Known.One.clearAllBits();
+ break;
+ }
+ case X86ISD::VTRUNC:
+ case X86ISD::VTRUNCS:
+ case X86ISD::VTRUNCUS:
case X86ISD::CVTSI2P:
case X86ISD::CVTUI2P:
case X86ISD::CVTP2SI:
@@ -33698,7 +34476,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case X86ISD::VMFPROUND:
case X86ISD::CVTPS2PH:
case X86ISD::MCVTPS2PH: {
- // Conversions - upper elements are known zero.
+ // Truncations/Conversions - upper elements are known zero.
EVT SrcVT = Op.getOperand(0).getValueType();
if (SrcVT.isVector()) {
unsigned NumSrcElts = SrcVT.getVectorNumElements();
@@ -33776,8 +34554,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
continue;
KnownBits Known2 =
DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
}
}
@@ -33956,11 +34733,18 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
// Match against a VZEXT_MOVL vXi32 zero-extending instruction.
- if (MaskEltSize == 32 && isUndefOrEqual(Mask[0], 0) &&
- isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) {
- Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
- return true;
+ if (MaskEltSize == 32 && Mask[0] == 0) {
+ if (isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) {
+ Shuffle = X86ISD::VZEXT_MOVL;
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ return true;
+ }
+ if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
+ Shuffle = X86ISD::VZEXT_MOVL;
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ return true;
+ }
}
// Match against a ANY/ZERO_EXTEND_VECTOR_INREG instruction.
@@ -34014,17 +34798,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
- if (isTargetShuffleEquivalent(Mask, {0, 0})) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v2f64;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3})) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v4f32;
return true;
@@ -34033,17 +34817,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (MaskVT.is256BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
- if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v4f64;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v8f32;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3, 5, 5, 7, 7})) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v8f32;
return true;
@@ -34053,19 +34837,21 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (MaskVT.is512BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX512() &&
"AVX512 required for 512-bit vector shuffles");
- if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v8f64;
return true;
}
if (isTargetShuffleEquivalent(
- Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14})) {
+ MaskVT, Mask,
+ {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v16f32;
return true;
}
if (isTargetShuffleEquivalent(
- Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15})) {
+ MaskVT, Mask,
+ {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v16f32;
return true;
@@ -34147,7 +34933,10 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// Handle PSHUFLW/PSHUFHW vXi16 repeated patterns.
- if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
+ if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 &&
+ ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasBWI()))) {
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4);
@@ -34217,30 +35006,31 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
SelectionDAG &DAG, const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT,
bool IsUnary) {
+ unsigned NumMaskElts = Mask.size();
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
- if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) {
V2 = V1;
V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1);
Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS;
SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) {
V2 = V1;
Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS;
SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() &&
- (AllowFloatDomain || !Subtarget.hasSSE41())) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) &&
+ Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) {
std::swap(V1, V2);
Shuffle = X86ISD::MOVSD;
SrcVT = DstVT = MVT::v2f64;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) &&
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) &&
(AllowFloatDomain || !Subtarget.hasSSE41())) {
Shuffle = X86ISD::MOVSS;
SrcVT = DstVT = MVT::v4f32;
@@ -34274,6 +35064,46 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
+ // Attempt to match against a OR if we're performing a blend shuffle and the
+ // non-blended source element is zero in each case.
+ if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 &&
+ (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) {
+ bool IsBlend = true;
+ unsigned NumV1Elts = V1.getValueType().getVectorNumElements();
+ unsigned NumV2Elts = V2.getValueType().getVectorNumElements();
+ unsigned Scale1 = NumV1Elts / NumMaskElts;
+ unsigned Scale2 = NumV2Elts / NumMaskElts;
+ APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts);
+ APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts);
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int M = Mask[i];
+ if (M == SM_SentinelUndef)
+ continue;
+ if (M == SM_SentinelZero) {
+ DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1);
+ DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2);
+ continue;
+ }
+ if (M == (int)i) {
+ DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2);
+ continue;
+ }
+ if (M == (int)(i + NumMaskElts)) {
+ DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1);
+ continue;
+ }
+ IsBlend = false;
+ break;
+ }
+ if (IsBlend &&
+ DAG.computeKnownBits(V1, DemandedZeroV1).isZero() &&
+ DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) {
+ Shuffle = ISD::OR;
+ SrcVT = DstVT = MaskVT.changeTypeToInteger();
+ return true;
+ }
+ }
+
return false;
}
@@ -34462,6 +35292,16 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
assert((Inputs.size() == 1 || Inputs.size() == 2) &&
"Unexpected number of shuffle inputs!");
+ MVT RootVT = Root.getSimpleValueType();
+ unsigned RootSizeInBits = RootVT.getSizeInBits();
+ unsigned NumRootElts = RootVT.getVectorNumElements();
+
+ // Canonicalize shuffle input op to the requested type.
+ // TODO: Support cases where Op is smaller than VT.
+ auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) {
+ return DAG.getBitcast(VT, Op);
+ };
+
// Find the inputs that enter the chain. Note that multiple uses are OK
// here, we're not going to remove the operands we find.
bool UnaryShuffle = (Inputs.size() == 1);
@@ -34471,10 +35311,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
MVT VT1 = V1.getSimpleValueType();
MVT VT2 = V2.getSimpleValueType();
- MVT RootVT = Root.getSimpleValueType();
- assert(VT1.getSizeInBits() == RootVT.getSizeInBits() &&
- VT2.getSizeInBits() == RootVT.getSizeInBits() &&
- "Vector size mismatch");
+ assert(VT1.getSizeInBits() == RootSizeInBits &&
+ VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch");
SDLoc DL(Root);
SDValue Res;
@@ -34482,12 +35320,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
unsigned NumBaseMaskElts = BaseMask.size();
if (NumBaseMaskElts == 1) {
assert(BaseMask[0] == 0 && "Invalid shuffle index found!");
- return DAG.getBitcast(RootVT, V1);
+ return CanonicalizeShuffleInput(RootVT, V1);
}
bool OptForSize = DAG.shouldOptForSize();
- unsigned RootSizeInBits = RootVT.getSizeInBits();
- unsigned NumRootElts = RootVT.getVectorNumElements();
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
(RootVT.isFloatingPoint() && Depth >= 1) ||
@@ -34508,33 +35344,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// we can just use the broadcast directly. This works for smaller broadcast
// elements as well as they already repeat across each mask element
if (UnaryShuffle && isTargetShuffleSplat(V1) && !isAnyZero(BaseMask) &&
- (BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0) {
- return DAG.getBitcast(RootVT, V1);
- }
-
- // Attempt to match a subvector broadcast.
- // shuffle(insert_subvector(undef, sub, 0), undef, 0, 0, 0, 0)
- if (UnaryShuffle &&
- (BaseMaskEltSizeInBits == 128 || BaseMaskEltSizeInBits == 256)) {
- SmallVector<int, 64> BroadcastMask(NumBaseMaskElts, 0);
- if (isTargetShuffleEquivalent(BaseMask, BroadcastMask)) {
- SDValue Src = Inputs[0];
- if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
- Src.getOperand(0).isUndef() &&
- Src.getOperand(1).getValueSizeInBits() == BaseMaskEltSizeInBits &&
- MayFoldLoad(Src.getOperand(1)) && isNullConstant(Src.getOperand(2))) {
- return DAG.getBitcast(RootVT, DAG.getNode(X86ISD::SUBV_BROADCAST, DL,
- Src.getValueType(),
- Src.getOperand(1)));
- }
- }
+ (BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0 &&
+ V1.getValueSizeInBits() >= RootSizeInBits) {
+ return CanonicalizeShuffleInput(RootVT, V1);
}
// Handle 128/256-bit lane shuffles of 512-bit vectors.
if (RootVT.is512BitVector() &&
(NumBaseMaskElts == 2 || NumBaseMaskElts == 4)) {
- MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
-
// If the upper subvectors are zeroable, then an extract+insert is more
// optimal than using X86ISD::SHUF128. The insertion is free, even if it has
// to zero the upper subvectors.
@@ -34543,12 +35360,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return SDValue(); // Nothing to do!
assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) &&
"Unexpected lane shuffle");
- Res = DAG.getBitcast(ShuffleVT, V1);
- unsigned SubIdx = BaseMask[0] * (8 / NumBaseMaskElts);
+ Res = CanonicalizeShuffleInput(RootVT, V1);
+ unsigned SubIdx = BaseMask[0] * (NumRootElts / NumBaseMaskElts);
bool UseZero = isAnyZero(BaseMask);
Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
- Res = widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
- return DAG.getBitcast(RootVT, Res);
+ return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
}
// Narrow shuffle mask to v4x128.
@@ -34557,8 +35373,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, Mask);
// Try to lower to vshuf64x2/vshuf32x4.
- auto MatchSHUF128 = [](MVT ShuffleVT, const SDLoc &DL, ArrayRef<int> Mask,
- SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL, ArrayRef<int> Mask,
+ SDValue V1, SDValue V2, SelectionDAG &DAG) {
unsigned PermMask = 0;
// Insure elements came from the same Op.
SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
@@ -34581,8 +35397,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT,
- DAG.getBitcast(ShuffleVT, Ops[0]),
- DAG.getBitcast(ShuffleVT, Ops[1]),
+ CanonicalizeShuffleInput(ShuffleVT, Ops[0]),
+ CanonicalizeShuffleInput(ShuffleVT, Ops[1]),
DAG.getTargetConstant(PermMask, DL, MVT::i8));
};
@@ -34597,6 +35413,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2));
if (!isAnyZero(Mask) && !PreferPERMQ) {
+ if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
+ return SDValue(); // Nothing to do!
+ MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG))
return DAG.getBitcast(RootVT, V);
}
@@ -34604,8 +35423,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Handle 128-bit lane shuffles of 256-bit vectors.
if (RootVT.is256BitVector() && NumBaseMaskElts == 2) {
- MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
-
// If the upper half is zeroable, then an extract+insert is more optimal
// than using X86ISD::VPERM2X128. The insertion is free, even if it has to
// zero the upper half.
@@ -34613,11 +35430,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle");
- Res = DAG.getBitcast(ShuffleVT, V1);
- Res = extract128BitVector(Res, BaseMask[0] * 2, DAG, DL);
- Res = widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG,
- DL, 256);
- return DAG.getBitcast(RootVT, Res);
+ Res = CanonicalizeShuffleInput(RootVT, V1);
+ Res = extract128BitVector(Res, BaseMask[0] * (NumRootElts / 2), DAG, DL);
+ return widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG,
+ DL, 256);
}
if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
@@ -34632,12 +35448,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
unsigned PermMask = 0;
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
-
- Res = DAG.getBitcast(ShuffleVT, V1);
- Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
- DAG.getUNDEF(ShuffleVT),
- DAG.getTargetConstant(PermMask, DL, MVT::i8));
- return DAG.getBitcast(RootVT, Res);
+ return DAG.getNode(
+ X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1),
+ DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
@@ -34653,13 +35466,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
unsigned PermMask = 0;
PermMask |= ((BaseMask[0] & 3) << 0);
PermMask |= ((BaseMask[1] & 3) << 4);
-
- Res = DAG.getNode(
- X86ISD::VPERM2X128, DL, ShuffleVT,
- DAG.getBitcast(ShuffleVT, isInRange(BaseMask[0], 0, 2) ? V1 : V2),
- DAG.getBitcast(ShuffleVT, isInRange(BaseMask[1], 0, 2) ? V1 : V2),
- DAG.getTargetConstant(PermMask, DL, MVT::i8));
- return DAG.getBitcast(RootVT, Res);
+ SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2;
+ SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2;
+ return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
+ CanonicalizeShuffleInput(RootVT, LHS),
+ CanonicalizeShuffleInput(RootVT, RHS),
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
}
}
@@ -34721,8 +35533,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if ((Subtarget.hasAVX2() ||
(Subtarget.hasAVX() && 32 <= MaskEltSizeInBits)) &&
(!IsMaskedShuffle || NumRootElts == NumMaskElts)) {
- SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
- if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
+ if (isUndefOrEqual(Mask, 0)) {
if (V1.getValueType() == MaskVT &&
V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
MayFoldLoad(V1.getOperand(0))) {
@@ -34735,7 +35546,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (Subtarget.hasAVX2()) {
if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
- Res = DAG.getBitcast(MaskVT, V1);
+ Res = CanonicalizeShuffleInput(MaskVT, V1);
Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
return DAG.getBitcast(RootVT, Res);
}
@@ -34750,7 +35561,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
+ Res = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
return DAG.getBitcast(RootVT, Res);
}
@@ -34762,7 +35573,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- Res = DAG.getBitcast(ShuffleVT, V1);
+ Res = CanonicalizeShuffleInput(ShuffleVT, V1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
@@ -34773,16 +35584,32 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// from a scalar.
// TODO: Handle other insertions here as well?
if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
- MaskEltSizeInBits == 32 && Subtarget.hasSSE41() &&
- !isTargetShuffleEquivalent(Mask, {4, 1, 2, 3})) {
- SDValue SrcV1 = V1, SrcV2 = V2;
- if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, DAG) &&
- SrcV2.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ Subtarget.hasSSE41() &&
+ !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) {
+ if (MaskEltSizeInBits == 32) {
+ SDValue SrcV1 = V1, SrcV2 = V2;
+ if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask,
+ DAG) &&
+ SrcV2.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
+ return SDValue(); // Nothing to do!
+ Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32,
+ CanonicalizeShuffleInput(MVT::v4f32, SrcV1),
+ CanonicalizeShuffleInput(MVT::v4f32, SrcV2),
+ DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
+ return DAG.getBitcast(RootVT, Res);
+ }
+ }
+ if (MaskEltSizeInBits == 64 &&
+ isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) &&
+ V2.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ V2.getScalarValueSizeInBits() <= 32) {
if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
return SDValue(); // Nothing to do!
+ PermuteImm = (/*DstIdx*/2 << 4) | (/*SrcIdx*/0 << 0);
Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32,
- DAG.getBitcast(MVT::v4f32, SrcV1),
- DAG.getBitcast(MVT::v4f32, SrcV2),
+ CanonicalizeShuffleInput(MVT::v4f32, V1),
+ CanonicalizeShuffleInput(MVT::v4f32, V2),
DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
@@ -34796,8 +35623,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
- NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
+ NewV1 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1);
+ NewV2 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV2);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
return DAG.getBitcast(RootVT, Res);
}
@@ -34810,8 +35637,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
- NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
+ NewV1 = CanonicalizeShuffleInput(ShuffleVT, NewV1);
+ NewV2 = CanonicalizeShuffleInput(ShuffleVT, NewV2);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
@@ -34828,7 +35655,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
Zeroable)) {
if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI)
return SDValue(); // Nothing to do!
- V1 = DAG.getBitcast(IntMaskVT, V1);
+ V1 = CanonicalizeShuffleInput(IntMaskVT, V1);
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
DAG.getTargetConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
@@ -34838,8 +35665,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI)
return SDValue(); // Nothing to do!
- V1 = DAG.getBitcast(IntMaskVT, V1);
- V2 = DAG.getBitcast(IntMaskVT, V2);
+ V1 = CanonicalizeShuffleInput(IntMaskVT, V1);
+ V2 = CanonicalizeShuffleInput(IntMaskVT, V2);
Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
DAG.getTargetConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
@@ -34858,7 +35685,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
IsTRUNCATE ? (unsigned)ISD::TRUNCATE : (unsigned)X86ISD::VTRUNC;
if (Depth == 0 && Root.getOpcode() == Opc)
return SDValue(); // Nothing to do!
- V1 = DAG.getBitcast(ShuffleSrcVT, V1);
+ V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
Res = DAG.getNode(Opc, DL, ShuffleVT, V1);
if (ShuffleVT.getSizeInBits() < RootSizeInBits)
Res = widenSubVector(Res, true, Subtarget, DAG, DL, RootSizeInBits);
@@ -34875,8 +35702,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return SDValue(); // Nothing to do!
ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2);
- V1 = DAG.getBitcast(ShuffleSrcVT, V1);
- V2 = DAG.getBitcast(ShuffleSrcVT, V2);
+ V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
+ V2 = CanonicalizeShuffleInput(ShuffleSrcVT, V2);
ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts);
Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShuffleSrcVT, V1, V2);
@@ -34893,49 +35720,56 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Depth threshold above which we can efficiently use variable mask shuffles.
int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2;
AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
+ // VPERMI2W/VPERMI2B are 3 uops on Skylake and Icelake so we require a
+ // higher depth before combining them.
+ bool AllowBWIVPERMV3 = (Depth >= 2 || HasVariableMask);
bool MaskContainsZeros = isAnyZero(Mask);
if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) {
// If we have a single input lane-crossing shuffle then lower to VPERMV.
- if (UnaryShuffle && AllowVariableMask && !MaskContainsZeros &&
- ((Subtarget.hasAVX2() &&
- (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
- (Subtarget.hasAVX512() &&
- (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
- MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
- (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
- (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
- (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
- (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
- SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
- Res = DAG.getBitcast(MaskVT, V1);
- Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res);
- return DAG.getBitcast(RootVT, Res);
+ if (UnaryShuffle && AllowVariableMask && !MaskContainsZeros) {
+ if (Subtarget.hasAVX2() &&
+ (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) {
+ SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
+ Res = CanonicalizeShuffleInput(MaskVT, V1);
+ Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res);
+ return DAG.getBitcast(RootVT, Res);
+ }
+ // AVX512 variants (non-VLX will pad to 512-bit shuffles).
+ if ((Subtarget.hasAVX512() &&
+ (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
+ MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
+ (Subtarget.hasBWI() &&
+ (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
+ (Subtarget.hasVBMI() &&
+ (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8))) {
+ V1 = CanonicalizeShuffleInput(MaskVT, V1);
+ V2 = DAG.getUNDEF(MaskVT);
+ Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG);
+ return DAG.getBitcast(RootVT, Res);
+ }
}
// Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero
- // vector as the second source.
+ // vector as the second source (non-VLX will pad to 512-bit shuffles).
if (UnaryShuffle && AllowVariableMask &&
((Subtarget.hasAVX512() &&
(MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
+ MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
+ MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 ||
MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
- (Subtarget.hasVLX() &&
- (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
- MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
- (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
- (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
- (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
- (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
+ (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
+ (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
+ (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
+ (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
// Adjust shuffle mask - replace SM_SentinelZero with second source index.
for (unsigned i = 0; i != NumMaskElts; ++i)
if (Mask[i] == SM_SentinelZero)
Mask[i] = NumMaskElts + i;
-
- SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
- Res = DAG.getBitcast(MaskVT, V1);
- SDValue Zero = getZeroVector(MaskVT, Subtarget, DAG, DL);
- Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero);
+ V1 = CanonicalizeShuffleInput(MaskVT, V1);
+ V2 = getZeroVector(MaskVT, Subtarget, DAG, DL);
+ Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG);
return DAG.getBitcast(RootVT, Res);
}
@@ -34946,22 +35780,21 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
DAG, Subtarget))
return WideShuffle;
- // If we have a dual input lane-crossing shuffle then lower to VPERMV3.
+ // If we have a dual input lane-crossing shuffle then lower to VPERMV3,
+ // (non-VLX will pad to 512-bit shuffles).
if (AllowVariableMask && !MaskContainsZeros &&
((Subtarget.hasAVX512() &&
(MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
- MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
- (Subtarget.hasVLX() &&
- (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
+ MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
+ MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 ||
MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
- (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
- (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
- (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
- (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
- SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
- V1 = DAG.getBitcast(MaskVT, V1);
- V2 = DAG.getBitcast(MaskVT, V2);
- Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2);
+ (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
+ (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
+ (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
+ (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
+ V1 = CanonicalizeShuffleInput(MaskVT, V1);
+ V2 = CanonicalizeShuffleInput(MaskVT, V2);
+ Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG);
return DAG.getBitcast(RootVT, Res);
}
return SDValue();
@@ -34987,7 +35820,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
EltBits[i] = AllOnes;
}
SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL);
- Res = DAG.getBitcast(MaskVT, V1);
+ Res = CanonicalizeShuffleInput(MaskVT, V1);
unsigned AndOpcode =
MaskVT.isFloatingPoint() ? unsigned(X86ISD::FAND) : unsigned(ISD::AND);
Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask);
@@ -35007,7 +35840,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
VPermIdx.push_back(Idx);
}
SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx);
- Res = DAG.getBitcast(MaskVT, V1);
+ Res = CanonicalizeShuffleInput(MaskVT, V1);
Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask);
return DAG.getBitcast(RootVT, Res);
}
@@ -35039,8 +35872,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index);
VPerm2Idx.push_back(Index);
}
- V1 = DAG.getBitcast(MaskVT, V1);
- V2 = DAG.getBitcast(MaskVT, V2);
+ V1 = CanonicalizeShuffleInput(MaskVT, V1);
+ V2 = CanonicalizeShuffleInput(MaskVT, V2);
SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
DAG.getTargetConstant(M2ZImm, DL, MVT::i8));
@@ -35074,7 +35907,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8));
}
MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
- Res = DAG.getBitcast(ByteVT, V1);
+ Res = CanonicalizeShuffleInput(ByteVT, V1);
SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask);
Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp);
return DAG.getBitcast(RootVT, Res);
@@ -35104,8 +35937,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8));
}
MVT ByteVT = MVT::v16i8;
- V1 = DAG.getBitcast(ByteVT, V1);
- V2 = DAG.getBitcast(ByteVT, V2);
+ V1 = CanonicalizeShuffleInput(ByteVT, V1);
+ V2 = CanonicalizeShuffleInput(ByteVT, V2);
SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask);
Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp);
return DAG.getBitcast(RootVT, Res);
@@ -35118,25 +35951,22 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
DAG, Subtarget))
return WideShuffle;
- // If we have a dual input shuffle then lower to VPERMV3.
+ // If we have a dual input shuffle then lower to VPERMV3,
+ // (non-VLX will pad to 512-bit shuffles)
if (!UnaryShuffle && AllowVariableMask && !MaskContainsZeros &&
((Subtarget.hasAVX512() &&
- (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
- MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
- (Subtarget.hasVLX() &&
- (MaskVT == MVT::v2f64 || MaskVT == MVT::v2i64 || MaskVT == MVT::v4f64 ||
- MaskVT == MVT::v4i64 || MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 ||
- MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
- (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
- (Subtarget.hasBWI() && Subtarget.hasVLX() &&
- (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16)) ||
- (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
- (Subtarget.hasVBMI() && Subtarget.hasVLX() &&
- (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8)))) {
- SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
- V1 = DAG.getBitcast(MaskVT, V1);
- V2 = DAG.getBitcast(MaskVT, V2);
- Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2);
+ (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v8f64 ||
+ MaskVT == MVT::v2i64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8i64 ||
+ MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 ||
+ MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 ||
+ MaskVT == MVT::v16i32)) ||
+ (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
+ (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
+ (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
+ (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
+ V1 = CanonicalizeShuffleInput(MaskVT, V1);
+ V2 = CanonicalizeShuffleInput(MaskVT, V2);
+ Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG);
return DAG.getBitcast(RootVT, Res);
}
@@ -35161,12 +35991,16 @@ static SDValue combineX86ShuffleChainWithExtract(
if (NumInputs == 0)
return SDValue();
+ EVT RootVT = Root.getValueType();
+ unsigned RootSizeInBits = RootVT.getSizeInBits();
+ assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle mask");
+
SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end());
SmallVector<unsigned, 4> Offsets(NumInputs, 0);
// Peek through subvectors.
// TODO: Support inter-mixed EXTRACT_SUBVECTORs + BITCASTs?
- unsigned WideSizeInBits = WideInputs[0].getValueSizeInBits();
+ unsigned WideSizeInBits = RootSizeInBits;
for (unsigned i = 0; i != NumInputs; ++i) {
SDValue &Src = WideInputs[i];
unsigned &Offset = Offsets[i];
@@ -35189,8 +36023,6 @@ static SDValue combineX86ShuffleChainWithExtract(
if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; }))
return SDValue();
- EVT RootVT = Root.getValueType();
- unsigned RootSizeInBits = RootVT.getSizeInBits();
unsigned Scale = WideSizeInBits / RootSizeInBits;
assert((WideSizeInBits % RootSizeInBits) == 0 &&
"Unexpected subvector extraction");
@@ -35250,6 +36082,149 @@ static SDValue combineX86ShuffleChainWithExtract(
return SDValue();
}
+// Canonicalize the combined shuffle mask chain with horizontal ops.
+// NOTE: This may update the Ops and Mask.
+static SDValue canonicalizeShuffleMaskWithHorizOp(
+ MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask,
+ unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (Mask.empty() || Ops.empty())
+ return SDValue();
+
+ SmallVector<SDValue> BC;
+ for (SDValue Op : Ops)
+ BC.push_back(peekThroughBitcasts(Op));
+
+ // All ops must be the same horizop + type.
+ SDValue BC0 = BC[0];
+ EVT VT0 = BC0.getValueType();
+ unsigned Opcode0 = BC0.getOpcode();
+ if (VT0.getSizeInBits() != RootSizeInBits || llvm::any_of(BC, [&](SDValue V) {
+ return V.getOpcode() != Opcode0 || V.getValueType() != VT0;
+ }))
+ return SDValue();
+
+ bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
+ Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
+ bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS);
+ if (!isHoriz && !isPack)
+ return SDValue();
+
+ int NumElts = VT0.getVectorNumElements();
+ int NumLanes = VT0.getSizeInBits() / 128;
+ int NumEltsPerLane = NumElts / NumLanes;
+ int NumHalfEltsPerLane = NumEltsPerLane / 2;
+
+ // See if we can remove the shuffle by resorting the HOP chain so that
+ // the HOP args are pre-shuffled.
+ // TODO: Generalize to any sized/depth chain.
+ // TODO: Add support for PACKSS/PACKUS.
+ if (isHoriz && NumEltsPerLane == 4 && VT0.is128BitVector() &&
+ shouldUseHorizontalOp(Ops.size() == 1, DAG, Subtarget)) {
+ SmallVector<int> ScaledMask;
+ if (scaleShuffleElements(Mask, 4, ScaledMask)) {
+ // Attempt to find a HOP(HOP(X,Y),HOP(Z,W)) source operand.
+ auto GetHOpSrc = [&](int M) {
+ if (M == SM_SentinelUndef)
+ return DAG.getUNDEF(VT0);
+ if (M == SM_SentinelZero)
+ return getZeroVector(VT0.getSimpleVT(), Subtarget, DAG, DL);
+ SDValue Src0 = BC[M / NumElts];
+ SDValue Src1 = Src0.getOperand((M % 4) >= 2);
+ if (Src1.getOpcode() == Opcode0 && Src0->isOnlyUserOf(Src1.getNode()))
+ return Src1.getOperand(M % 2);
+ return SDValue();
+ };
+ SDValue M0 = GetHOpSrc(ScaledMask[0]);
+ SDValue M1 = GetHOpSrc(ScaledMask[1]);
+ SDValue M2 = GetHOpSrc(ScaledMask[2]);
+ SDValue M3 = GetHOpSrc(ScaledMask[3]);
+ if (M0 && M1 && M2 && M3) {
+ SDValue LHS = DAG.getNode(Opcode0, DL, VT0, M0, M1);
+ SDValue RHS = DAG.getNode(Opcode0, DL, VT0, M2, M3);
+ return DAG.getNode(Opcode0, DL, VT0, LHS, RHS);
+ }
+ }
+ }
+
+ if (2 < Ops.size())
+ return SDValue();
+
+ SDValue BC1 = BC[BC.size() - 1];
+ if (Mask.size() == VT0.getVectorNumElements()) {
+ // Canonicalize binary shuffles of horizontal ops that use the
+ // same sources to an unary shuffle.
+ // TODO: Try to perform this fold even if the shuffle remains.
+ if (Ops.size() == 2) {
+ auto ContainsOps = [](SDValue HOp, SDValue Op) {
+ return Op == HOp.getOperand(0) || Op == HOp.getOperand(1);
+ };
+ // Commute if all BC0's ops are contained in BC1.
+ if (ContainsOps(BC1, BC0.getOperand(0)) &&
+ ContainsOps(BC1, BC0.getOperand(1))) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(Ops[0], Ops[1]);
+ std::swap(BC0, BC1);
+ }
+
+ // If BC1 can be represented by BC0, then convert to unary shuffle.
+ if (ContainsOps(BC0, BC1.getOperand(0)) &&
+ ContainsOps(BC0, BC1.getOperand(1))) {
+ for (int &M : Mask) {
+ if (M < NumElts) // BC0 element or UNDEF/Zero sentinel.
+ continue;
+ int SubLane = ((M % NumEltsPerLane) >= NumHalfEltsPerLane) ? 1 : 0;
+ M -= NumElts + (SubLane * NumHalfEltsPerLane);
+ if (BC1.getOperand(SubLane) != BC0.getOperand(0))
+ M += NumHalfEltsPerLane;
+ }
+ }
+ }
+
+ // Canonicalize unary horizontal ops to only refer to lower halves.
+ for (int i = 0; i != NumElts; ++i) {
+ int &M = Mask[i];
+ if (isUndefOrZero(M))
+ continue;
+ if (M < NumElts && BC0.getOperand(0) == BC0.getOperand(1) &&
+ (M % NumEltsPerLane) >= NumHalfEltsPerLane)
+ M -= NumHalfEltsPerLane;
+ if (NumElts <= M && BC1.getOperand(0) == BC1.getOperand(1) &&
+ (M % NumEltsPerLane) >= NumHalfEltsPerLane)
+ M -= NumHalfEltsPerLane;
+ }
+ }
+
+ // Combine binary shuffle of 2 similar 'Horizontal' instructions into a
+ // single instruction. Attempt to match a v2X64 repeating shuffle pattern that
+ // represents the LHS/RHS inputs for the lower/upper halves.
+ unsigned EltSizeInBits = RootSizeInBits / Mask.size();
+ SmallVector<int, 16> TargetMask128, WideMask128;
+ if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, TargetMask128) &&
+ scaleShuffleElements(TargetMask128, 2, WideMask128)) {
+ assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
+ bool SingleOp = (Ops.size() == 1);
+ if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
+ SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
+ SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
+ Lo = Lo.getOperand(WideMask128[0] & 1);
+ Hi = Hi.getOperand(WideMask128[1] & 1);
+ if (SingleOp) {
+ MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
+ SDValue Undef = DAG.getUNDEF(SrcVT);
+ SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
+ Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo);
+ Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi);
+ Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo);
+ Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi);
+ }
+ return DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
// Attempt to constant fold all of the constant source ops.
// Returns true if the entire shuffle is folded to a constant.
// TODO: Extend this to merge multiple constant Ops and update the mask.
@@ -35341,6 +36316,14 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
return DAG.getBitcast(VT, CstOp);
}
+namespace llvm {
+ namespace X86 {
+ enum {
+ MaxShuffleCombineDepth = 8
+ };
+ }
+} // namespace llvm
+
/// Fully generic combining of x86 shuffle instructions.
///
/// This should be the last combine run over the x86 shuffle instructions. Once
@@ -35373,31 +36356,30 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
static SDValue combineX86ShufflesRecursively(
ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
- bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+ unsigned MaxDepth, bool HasVariableMask, bool AllowVariableMask,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget) {
assert(RootMask.size() > 0 &&
(RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
"Illegal shuffle root mask");
+ assert(Root.getSimpleValueType().isVector() &&
+ "Shuffles operate on vector types!");
+ unsigned RootSizeInBits = Root.getSimpleValueType().getSizeInBits();
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
- const unsigned MaxRecursionDepth = 8;
- if (Depth >= MaxRecursionDepth)
+ if (Depth >= MaxDepth)
return SDValue();
// Directly rip through bitcasts to find the underlying operand.
SDValue Op = SrcOps[SrcOpIndex];
Op = peekThroughOneUseBitcasts(Op);
- MVT VT = Op.getSimpleValueType();
- if (!VT.isVector())
- return SDValue(); // Bail if we hit a non-vector.
+ EVT VT = Op.getValueType();
+ if (!VT.isVector() || !VT.isSimple())
+ return SDValue(); // Bail if we hit a non-simple non-vector.
- assert(Root.getSimpleValueType().isVector() &&
- "Shuffles operate on vector types!");
- unsigned RootSizeInBits = Root.getSimpleValueType().getSizeInBits();
- assert(VT.getSizeInBits() == RootSizeInBits &&
- "Can only combine shuffles of the same vector register size.");
+ assert((RootSizeInBits % VT.getSizeInBits()) == 0 &&
+ "Can only combine shuffles upto size of the root op.");
// Extract target shuffle mask and resolve sentinels and inputs.
// TODO - determine Op's demanded elts from RootMask.
@@ -35410,17 +36392,32 @@ static SDValue combineX86ShufflesRecursively(
OpZero, DAG, Depth, false))
return SDValue();
- // Shuffle inputs must be the same size as the result, bail on any larger
- // inputs and widen any smaller inputs.
- if (llvm::any_of(OpInputs, [RootSizeInBits](SDValue Op) {
- return Op.getValueSizeInBits() > RootSizeInBits;
+ // Shuffle inputs must not be larger than the shuffle result.
+ // TODO: Relax this for single input faux shuffles (trunc/extract_subvector).
+ if (llvm::any_of(OpInputs, [VT](SDValue OpInput) {
+ return OpInput.getValueSizeInBits() > VT.getSizeInBits();
}))
return SDValue();
- for (SDValue &Op : OpInputs)
- if (Op.getValueSizeInBits() < RootSizeInBits)
- Op = widenSubVector(peekThroughOneUseBitcasts(Op), false, Subtarget, DAG,
- SDLoc(Op), RootSizeInBits);
+ // If the shuffle result was smaller than the root, we need to adjust the
+ // mask indices and pad the mask with undefs.
+ if (RootSizeInBits > VT.getSizeInBits()) {
+ unsigned NumSubVecs = RootSizeInBits / VT.getSizeInBits();
+ unsigned OpMaskSize = OpMask.size();
+ if (OpInputs.size() > 1) {
+ unsigned PaddedMaskSize = NumSubVecs * OpMaskSize;
+ for (int &M : OpMask) {
+ if (M < 0)
+ continue;
+ int EltIdx = M % OpMaskSize;
+ int OpIdx = M / OpMaskSize;
+ M = (PaddedMaskSize * OpIdx) + EltIdx;
+ }
+ }
+ OpZero = OpZero.zext(NumSubVecs * OpMaskSize);
+ OpUndef = OpUndef.zext(NumSubVecs * OpMaskSize);
+ OpMask.append((NumSubVecs - 1) * OpMaskSize, SM_SentinelUndef);
+ }
SmallVector<int, 64> Mask;
SmallVector<SDValue, 16> Ops;
@@ -35561,10 +36558,6 @@ static SDValue combineX86ShufflesRecursively(
// Handle the all undef/zero cases early.
if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; }))
return DAG.getUNDEF(Root.getValueType());
-
- // TODO - should we handle the mixed zero/undef case as well? Just returning
- // a zero mask will lose information on undef elements possibly reducing
- // future combine possibilities.
if (all_of(Mask, [](int Idx) { return Idx < 0; }))
return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG,
SDLoc(Root));
@@ -35584,7 +36577,7 @@ static SDValue combineX86ShufflesRecursively(
// shuffles to avoid constant pool bloat.
// Don't recurse if we already have more source ops than we can combine in
// the remaining recursion depth.
- if (Ops.size() < (MaxRecursionDepth - Depth)) {
+ if (Ops.size() < (MaxDepth - Depth)) {
for (int i = 0, e = Ops.size(); i < e; ++i) {
// For empty roots, we need to resolve zeroable elements before combining
// them with other shuffles.
@@ -35596,7 +36589,7 @@ static SDValue combineX86ShufflesRecursively(
SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
AllowVar = AllowVariableMask;
if (SDValue Res = combineX86ShufflesRecursively(
- Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1,
+ Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth,
HasVariableMask, AllowVar, DAG, Subtarget))
return Res;
}
@@ -35607,6 +36600,24 @@ static SDValue combineX86ShufflesRecursively(
Ops, Mask, Root, HasVariableMask, DAG, Subtarget))
return Cst;
+ // Canonicalize the combined shuffle mask chain with horizontal ops.
+ // NOTE: This will update the Ops and Mask.
+ if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
+ Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget))
+ return DAG.getBitcast(Root.getValueType(), HOp);
+
+ // Widen any subvector shuffle inputs we've collected.
+ if (any_of(Ops, [RootSizeInBits](SDValue Op) {
+ return Op.getValueSizeInBits() < RootSizeInBits;
+ })) {
+ for (SDValue &Op : Ops)
+ if (Op.getValueSizeInBits() < RootSizeInBits)
+ Op = widenSubVector(Op, false, Subtarget, DAG, SDLoc(Op),
+ RootSizeInBits);
+ // Reresolve - we might have repeated subvector sources.
+ resolveTargetShuffleInputsAndMask(Ops, Mask);
+ }
+
// We can only combine unary and binary shuffle mask cases.
if (Ops.size() <= 2) {
// Minor canonicalization of the accumulated shuffle mask to make it easier
@@ -35614,8 +36625,10 @@ static SDValue combineX86ShufflesRecursively(
// elements, and shrink them to the half-width mask. It does this in a loop
// so it will reduce the size of the mask to the minimal width mask which
// performs an equivalent shuffle.
- SmallVector<int, 64> WidenedMask;
- while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
+ while (Mask.size() > 1) {
+ SmallVector<int, 64> WidenedMask;
+ if (!canWidenShuffleElements(Mask, WidenedMask))
+ break;
Mask = std::move(WidenedMask);
}
@@ -35642,6 +36655,7 @@ static SDValue combineX86ShufflesRecursively(
static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0,
+ X86::MaxShuffleCombineDepth,
/*HasVarMask*/ false,
/*AllowVarMask*/ true, DAG, Subtarget);
}
@@ -35875,6 +36889,61 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
return SDValue();
}
+/// Attempt to fold vpermf128(op(),op()) -> op(vpermf128(),vpermf128()).
+static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
+ SelectionDAG &DAG,
+ const SDLoc &DL) {
+ assert(V.getOpcode() == X86ISD::VPERM2X128 && "Unknown lane shuffle");
+
+ MVT VT = V.getSimpleValueType();
+ SDValue Src0 = peekThroughBitcasts(V.getOperand(0));
+ SDValue Src1 = peekThroughBitcasts(V.getOperand(1));
+ unsigned SrcOpc0 = Src0.getOpcode();
+ unsigned SrcOpc1 = Src1.getOpcode();
+ EVT SrcVT0 = Src0.getValueType();
+ EVT SrcVT1 = Src1.getValueType();
+
+ if (!Src1.isUndef() && (SrcVT0 != SrcVT1 || SrcOpc0 != SrcOpc1))
+ return SDValue();
+
+ switch (SrcOpc0) {
+ case X86ISD::MOVDDUP: {
+ SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0));
+ SDValue RHS =
+ DAG.getBitcast(VT, Src1.isUndef() ? Src1 : Src1.getOperand(0));
+ SDValue Res =
+ DAG.getNode(X86ISD::VPERM2X128, DL, VT, LHS, RHS, V.getOperand(2));
+ Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res));
+ return DAG.getBitcast(VT, Res);
+ }
+ case X86ISD::VPERMILPI:
+ // TODO: Handle v4f64 permutes with different low/high lane masks.
+ if (SrcVT0 == MVT::v4f64) {
+ uint64_t Mask = Src0.getConstantOperandVal(1);
+ if ((Mask & 0x3) != ((Mask >> 2) & 0x3))
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI:
+ case X86ISD::PSHUFD:
+ if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) {
+ SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0));
+ SDValue RHS =
+ DAG.getBitcast(VT, Src1.isUndef() ? Src1 : Src1.getOperand(0));
+ SDValue Res =
+ DAG.getNode(X86ISD::VPERM2X128, DL, VT, LHS, RHS, V.getOperand(2));
+ Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res),
+ Src0.getOperand(1));
+ return DAG.getBitcast(VT, Res);
+ }
+ break;
+ }
+
+ return SDValue();
+}
+
/// Try to combine x86 target specific shuffles.
static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -35884,59 +36953,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
SmallVector<int, 4> Mask;
unsigned Opcode = N.getOpcode();
- bool IsUnary;
- SmallVector<int, 64> TargetMask;
- SmallVector<SDValue, 2> TargetOps;
- if (isTargetShuffle(Opcode))
- getTargetShuffleMask(N.getNode(), VT, true, TargetOps, TargetMask, IsUnary);
-
- // Combine binary shuffle of 2 similar 'Horizontal' instructions into a
- // single instruction. Attempt to match a v2X64 repeating shuffle pattern that
- // represents the LHS/RHS inputs for the lower/upper halves.
- SmallVector<int, 16> TargetMask128;
- if (!TargetMask.empty() && 0 < TargetOps.size() && TargetOps.size() <= 2 &&
- isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128)) {
- SmallVector<int, 16> WidenedMask128 = TargetMask128;
- while (WidenedMask128.size() > 2) {
- SmallVector<int, 16> WidenedMask;
- if (!canWidenShuffleElements(WidenedMask128, WidenedMask))
- break;
- WidenedMask128 = std::move(WidenedMask);
- }
- if (WidenedMask128.size() == 2) {
- assert(isUndefOrZeroOrInRange(WidenedMask128, 0, 4) && "Illegal shuffle");
- SDValue BC0 = peekThroughBitcasts(TargetOps.front());
- SDValue BC1 = peekThroughBitcasts(TargetOps.back());
- EVT VT0 = BC0.getValueType();
- EVT VT1 = BC1.getValueType();
- unsigned Opcode0 = BC0.getOpcode();
- unsigned Opcode1 = BC1.getOpcode();
- bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
- Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
- if (Opcode0 == Opcode1 && VT0 == VT1 &&
- (isHoriz || Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS)) {
- bool SingleOp = (TargetOps.size() == 1);
- if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
- SDValue Lo = isInRange(WidenedMask128[0], 0, 2) ? BC0 : BC1;
- SDValue Hi = isInRange(WidenedMask128[1], 0, 2) ? BC0 : BC1;
- Lo = Lo.getOperand(WidenedMask128[0] & 1);
- Hi = Hi.getOperand(WidenedMask128[1] & 1);
- if (SingleOp) {
- MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
- SDValue Undef = DAG.getUNDEF(SrcVT);
- SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
- Lo = (WidenedMask128[0] == SM_SentinelZero ? Zero : Lo);
- Hi = (WidenedMask128[1] == SM_SentinelZero ? Zero : Hi);
- Lo = (WidenedMask128[0] == SM_SentinelUndef ? Undef : Lo);
- Hi = (WidenedMask128[1] == SM_SentinelUndef ? Undef : Hi);
- }
- SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
- return DAG.getBitcast(VT, Horiz);
- }
- }
- }
- }
-
if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
return R;
@@ -36000,6 +37016,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
DemandedMask[i] = i;
if (SDValue Res = combineX86ShufflesRecursively(
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
+ X86::MaxShuffleCombineDepth,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
@@ -36029,7 +37046,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
for (SDNode *User : Src->uses())
if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
Src == User->getOperand(0) &&
- User->getValueSizeInBits(0) > VT.getSizeInBits()) {
+ User->getValueSizeInBits(0).getFixedSize() >
+ VT.getFixedSizeInBits()) {
return extractSubVector(SDValue(User, 0), 0, DAG, DL,
VT.getSizeInBits());
}
@@ -36115,7 +37133,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
LN->isSimple()) {
unsigned Offset = ShiftAmt / 8;
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ptr = DAG.getMemBasePlusOffset(LN->getBasePtr(), Offset, DL);
+ SDValue Ptr = DAG.getMemBasePlusOffset(LN->getBasePtr(),
+ TypeSize::Fixed(Offset), DL);
SDValue Ops[] = { LN->getChain(), Ptr };
SDValue BcastLd = DAG.getMemIntrinsicNode(
X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
@@ -36147,15 +37166,16 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
// vbroadcast(vector load X) -> vbroadcast_load
- if (SrcVT == MVT::v2f64 && Src.hasOneUse() &&
- ISD::isNormalLoad(Src.getNode())) {
+ if ((SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v2i64 ||
+ SrcVT == MVT::v4i32) &&
+ Src.hasOneUse() && ISD::isNormalLoad(Src.getNode())) {
LoadSDNode *LN = cast<LoadSDNode>(Src);
// Unless the load is volatile or atomic.
if (LN->isSimple()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BcastLd = DAG.getMemIntrinsicNode(
- X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64,
+ X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SrcVT.getScalarType(),
LN->getPointerInfo(), LN->getOriginalAlign(),
LN->getMemOperand()->getFlags());
DCI.CombineTo(N.getNode(), BcastLd);
@@ -36242,6 +37262,27 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
}
+ // Pull subvector inserts into undef through VZEXT_MOVL by making it an
+ // insert into a zero vector. This helps get VZEXT_MOVL closer to
+ // scalar_to_vectors where 256/512 are canonicalized to an insert and a
+ // 128-bit scalar_to_vector. This reduces the number of isel patterns.
+ if (!DCI.isBeforeLegalizeOps() && N0.hasOneUse()) {
+ SDValue V = peekThroughOneUseBitcasts(N0);
+
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.getOperand(0).isUndef() &&
+ isNullConstant(V.getOperand(2))) {
+ SDValue In = V.getOperand(1);
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
+ In.getValueSizeInBits() /
+ VT.getScalarSizeInBits());
+ In = DAG.getBitcast(SubVT, In);
+ SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, SubVT, In);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ getZeroVector(VT, Subtarget, DAG, DL), Movl,
+ V.getOperand(2));
+ }
+ }
+
return SDValue();
}
case X86ISD::BLENDI: {
@@ -36283,32 +37324,51 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return SDValue();
}
case X86ISD::VPERM2X128: {
- // If both 128-bit values were inserted into high halves of 256-bit values,
- // the shuffle can be reduced to a concatenation of subvectors:
- // vperm2x128 (ins ?, X, C1), (ins ?, Y, C2), 0x31 --> concat X, Y
- // Note: We are only looking for the exact high/high shuffle mask because we
- // expect to fold other similar patterns before creating this opcode.
- SDValue Ins0 = peekThroughBitcasts(N.getOperand(0));
- SDValue Ins1 = peekThroughBitcasts(N.getOperand(1));
- unsigned Imm = N.getConstantOperandVal(2);
- if (!(Imm == 0x31 &&
- Ins0.getOpcode() == ISD::INSERT_SUBVECTOR &&
- Ins1.getOpcode() == ISD::INSERT_SUBVECTOR &&
- Ins0.getValueType() == Ins1.getValueType()))
- return SDValue();
+ // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (LHS.getOpcode() == ISD::BITCAST &&
+ (RHS.getOpcode() == ISD::BITCAST || RHS.isUndef())) {
+ EVT SrcVT = LHS.getOperand(0).getValueType();
+ if (RHS.isUndef() || SrcVT == RHS.getOperand(0).getValueType()) {
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT,
+ DAG.getBitcast(SrcVT, LHS),
+ DAG.getBitcast(SrcVT, RHS),
+ N->getOperand(2)));
+ }
+ }
+
+ // Fold vperm2x128(op(),op()) -> op(vperm2x128(),vperm2x128()).
+ if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL))
+ return Res;
- SDValue X = Ins0.getOperand(1);
- SDValue Y = Ins1.getOperand(1);
- unsigned C1 = Ins0.getConstantOperandVal(2);
- unsigned C2 = Ins1.getConstantOperandVal(2);
- MVT SrcVT = X.getSimpleValueType();
- unsigned SrcElts = SrcVT.getVectorNumElements();
- if (SrcVT != Y.getSimpleValueType() || SrcVT.getSizeInBits() != 128 ||
- C1 != SrcElts || C2 != SrcElts)
+ // Fold vperm2x128 subvector shuffle with an inner concat pattern.
+ // vperm2x128(concat(X,Y),concat(Z,W)) --> concat X,Y etc.
+ auto FindSubVector128 = [&](unsigned Idx) {
+ if (Idx > 3)
+ return SDValue();
+ SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1));
+ SmallVector<SDValue> SubOps;
+ if (collectConcatOps(Src.getNode(), SubOps) && SubOps.size() == 2)
+ return SubOps[Idx & 1];
+ unsigned NumElts = Src.getValueType().getVectorNumElements();
+ if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Src.getOperand(1).getValueSizeInBits() == 128 &&
+ Src.getConstantOperandAPInt(2) == (NumElts / 2)) {
+ return Src.getOperand(1);
+ }
return SDValue();
-
- return DAG.getBitcast(VT, DAG.getNode(ISD::CONCAT_VECTORS, DL,
- Ins1.getValueType(), X, Y));
+ };
+ unsigned Imm = N.getConstantOperandVal(2);
+ if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
+ if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
+ MVT SubVT = VT.getHalfNumVectorElementsVT();
+ SubLo = DAG.getBitcast(SubVT, SubLo);
+ SubHi = DAG.getBitcast(SubVT, SubHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi);
+ }
+ }
+ return SDValue();
}
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
@@ -36751,10 +37811,12 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
/// Eliminate a redundant shuffle of a horizontal math op.
static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
+ // TODO: Can we use getTargetShuffleInputs instead?
unsigned Opcode = N->getOpcode();
if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
- if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
- return SDValue();
+ if (Opcode != X86ISD::UNPCKL && Opcode != X86ISD::UNPCKH)
+ if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
+ return SDValue();
// For a broadcast, peek through an extract element of index 0 to find the
// horizontal op: broadcast (ext_vec_elt HOp, 0)
@@ -36773,6 +37835,28 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
HOp.getOpcode() != X86ISD::HSUB && HOp.getOpcode() != X86ISD::FHSUB)
return SDValue();
+ // unpcklo(hop(x,y),hop(z,w)) -> permute(hop(x,z)).
+ // unpckhi(hop(x,y),hop(z,w)) -> permute(hop(y,w)).
+ // Don't fold if hop(x,y) == hop(z,w).
+ if (Opcode == X86ISD::UNPCKL || Opcode == X86ISD::UNPCKH) {
+ SDValue HOp2 = N->getOperand(1);
+ if (HOp.getOpcode() != HOp2.getOpcode() || VT.getScalarSizeInBits() != 32)
+ return SDValue();
+ if (HOp == HOp2)
+ return SDValue();
+ SDLoc DL(HOp);
+ unsigned LoHi = Opcode == X86ISD::UNPCKL ? 0 : 1;
+ SDValue Res = DAG.getNode(HOp.getOpcode(), DL, VT, HOp.getOperand(LoHi),
+ HOp2.getOperand(LoHi));
+ // Use SHUFPS for the permute so this will work on SSE3 targets, shuffle
+ // combining and domain handling will simplify this later on.
+ EVT ShuffleVT = VT.changeVectorElementType(MVT::f32);
+ Res = DAG.getBitcast(ShuffleVT, Res);
+ Res = DAG.getNode(X86ISD::SHUFP, DL, ShuffleVT, Res, Res,
+ getV4X86ShuffleImm8ForMask({0, 2, 1, 3}, DL, DAG));
+ return DAG.getBitcast(VT, Res);
+ }
+
// 128-bit horizontal math instructions are defined to operate on adjacent
// lanes of each operand as:
// v4X32: A[0] + A[1] , A[2] + A[3] , B[0] + B[1] , B[2] + B[3]
@@ -36805,6 +37889,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
// replicating low and high halves (and without changing the type/length of
// the vector), we don't need the shuffle.
if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
+ if (Opcode == X86ISD::VBROADCAST && !VT.is128BitVector())
+ return SDValue();
if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) {
// movddup (hadd X, X) --> hadd X, X
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
@@ -36817,19 +37903,20 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
// shuffle (hadd X, X), undef, [low half...high half] --> hadd X, X
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
+
// TODO: Other mask possibilities like {1,1} and {1,0} could be added here,
// but this should be tied to whatever horizontal op matching and shuffle
// canonicalization are producing.
if (HOp.getValueSizeInBits() == 128 &&
- (isTargetShuffleEquivalent(Mask, {0, 0}) ||
- isTargetShuffleEquivalent(Mask, {0, 1, 0, 1}) ||
- isTargetShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3})))
+ (isShuffleEquivalent(Mask, {0, 0}) ||
+ isShuffleEquivalent(Mask, {0, 1, 0, 1}) ||
+ isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3})))
return updateHOp(HOp, DAG);
if (HOp.getValueSizeInBits() == 256 &&
- (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}) ||
- isTargetShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) ||
- isTargetShuffleEquivalent(
+ (isShuffleEquivalent(Mask, {0, 0, 2, 2}) ||
+ isShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) ||
+ isShuffleEquivalent(
Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11})))
return updateHOp(HOp, DAG);
@@ -36887,6 +37974,34 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG))
return HAddSub;
+
+ // Merge shuffles through binops if its likely we'll be able to merge it
+ // with other shuffles (as long as they aren't splats).
+ // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
+ // TODO: We might be able to move this to DAGCombiner::visitVECTOR_SHUFFLE.
+ if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N)) {
+ unsigned SrcOpcode = N->getOperand(0).getOpcode();
+ if (SrcOpcode == N->getOperand(1).getOpcode() && TLI.isBinOp(SrcOpcode) &&
+ N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+ N->isOnlyUserOf(N->getOperand(1).getNode())) {
+ SDValue Op00 = N->getOperand(0).getOperand(0);
+ SDValue Op10 = N->getOperand(1).getOperand(0);
+ SDValue Op01 = N->getOperand(0).getOperand(1);
+ SDValue Op11 = N->getOperand(1).getOperand(1);
+ auto *SVN00 = dyn_cast<ShuffleVectorSDNode>(Op00);
+ auto *SVN10 = dyn_cast<ShuffleVectorSDNode>(Op10);
+ auto *SVN01 = dyn_cast<ShuffleVectorSDNode>(Op01);
+ auto *SVN11 = dyn_cast<ShuffleVectorSDNode>(Op11);
+ if (((SVN00 && !SVN00->isSplat()) || (SVN10 && !SVN10->isSplat())) &&
+ ((SVN01 && !SVN01->isSplat()) || (SVN11 && !SVN11->isSplat()))) {
+ SDLoc DL(N);
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue LHS = DAG.getVectorShuffle(VT, DL, Op00, Op10, Mask);
+ SDValue RHS = DAG.getVectorShuffle(VT, DL, Op01, Op11, Mask);
+ return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
+ }
+ }
+ }
}
// Attempt to combine into a vector load/broadcast.
@@ -36920,32 +38035,11 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// TODO - merge this into combineX86ShufflesRecursively.
APInt KnownUndef, KnownZero;
APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
- if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
+ DCI))
return SDValue(N, 0);
}
- // Pull subvector inserts into undef through VZEXT_MOVL by making it an
- // insert into a zero vector. This helps get VZEXT_MOVL closer to
- // scalar_to_vectors where 256/512 are canonicalized to an insert and a
- // 128-bit scalar_to_vector. This reduces the number of isel patterns.
- if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps() &&
- N->getOperand(0).hasOneUse()) {
- SDValue V = peekThroughOneUseBitcasts(N->getOperand(0));
-
- if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
- V.getOperand(0).isUndef() && isNullConstant(V.getOperand(2))) {
- SDValue In = V.getOperand(1);
- MVT SubVT =
- MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
- In.getValueSizeInBits() / VT.getScalarSizeInBits());
- In = DAG.getBitcast(SubVT, In);
- SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, SubVT, In);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT,
- getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl),
- Movl, V.getOperand(2));
- }
- }
-
return SDValue();
}
@@ -37076,7 +38170,13 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO,
Depth + 1))
return true;
- // TODO convert SrcUndef to KnownUndef.
+
+ // Aggressively peek through ops to get at the demanded elts.
+ if (!DemandedElts.isAllOnesValue())
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
+ Src, DemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1)));
break;
}
case X86ISD::KSHIFTL: {
@@ -37265,7 +38365,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
if (!SrcVT.isVector())
- return false;
+ break;
// Don't bother broadcasting if we just need the 0'th element.
if (DemandedElts == 1) {
if (Src.getValueType() != VT)
@@ -37318,21 +38418,62 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
ExtSizeInBits = SizeInBits / 4;
switch (Opc) {
- // Subvector broadcast.
- case X86ISD::SUBV_BROADCAST: {
+ // Scalar broadcast.
+ case X86ISD::VBROADCAST: {
SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
if (Src.getValueSizeInBits() > ExtSizeInBits)
Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits);
- else if (Src.getValueSizeInBits() < ExtSizeInBits) {
- MVT SrcSVT = Src.getSimpleValueType().getScalarType();
- MVT SrcVT =
- MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits());
- Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src);
- }
- return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
+ EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),
+ ExtSizeInBits / VT.getScalarSizeInBits());
+ SDValue Bcst = TLO.DAG.getNode(X86ISD::VBROADCAST, DL, BcstVT, Src);
+ return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,
TLO.DAG, DL, ExtSizeInBits));
}
+ case X86ISD::VBROADCAST_LOAD: {
+ SDLoc DL(Op);
+ auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
+ EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),
+ ExtSizeInBits / VT.getScalarSizeInBits());
+ SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other);
+ SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};
+ SDValue Bcst = TLO.DAG.getMemIntrinsicNode(
+ X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MemIntr->getMemoryVT(),
+ MemIntr->getMemOperand());
+ TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
+ Bcst.getValue(1));
+ return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,
+ TLO.DAG, DL, ExtSizeInBits));
+ }
+ // Subvector broadcast.
+ case X86ISD::SUBV_BROADCAST_LOAD: {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
+ EVT MemVT = MemIntr->getMemoryVT();
+ if (ExtSizeInBits == MemVT.getStoreSizeInBits()) {
+ SDLoc DL(Op);
+ SDValue Ld =
+ TLO.DAG.getLoad(MemVT, DL, MemIntr->getChain(),
+ MemIntr->getBasePtr(), MemIntr->getMemOperand());
+ TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
+ Ld.getValue(1));
+ return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ld, 0,
+ TLO.DAG, DL, ExtSizeInBits));
+ } else if ((ExtSizeInBits % MemVT.getStoreSizeInBits()) == 0) {
+ SDLoc DL(Op);
+ EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),
+ ExtSizeInBits / VT.getScalarSizeInBits());
+ SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other);
+ SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};
+ SDValue Bcst =
+ TLO.DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys,
+ Ops, MemVT, MemIntr->getMemOperand());
+ TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
+ Bcst.getValue(1));
+ return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,
+ TLO.DAG, DL, ExtSizeInBits));
+ }
+ break;
+ }
// Byte shifts by immediate.
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
@@ -37384,7 +38525,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::BLENDI:
- // Saturated Packs.
+ // Integer ops.
+ case X86ISD::AVG:
case X86ISD::PACKSS:
case X86ISD::PACKUS:
// Horizontal Ops.
@@ -37477,16 +38619,22 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// If we don't demand all elements, then attempt to combine to a simpler
// shuffle.
- // TODO: Handle other depths, but first we need to handle the fact that
- // it might combine to the same shuffle.
- if (!DemandedElts.isAllOnesValue() && Depth == 0) {
+ // We need to convert the depth to something combineX86ShufflesRecursively
+ // can handle - so pretend its Depth == 0 again, and reduce the max depth
+ // to match. This prevents combineX86ShuffleChain from returning a
+ // combined shuffle that's the same as the original root, causing an
+ // infinite loop.
+ if (!DemandedElts.isAllOnesValue()) {
+ assert(Depth < X86::MaxShuffleCombineDepth && "Depth out of range");
+
SmallVector<int, 64> DemandedMask(NumElts, SM_SentinelUndef);
for (int i = 0; i != NumElts; ++i)
if (DemandedElts[i])
DemandedMask[i] = i;
SDValue NewShuffle = combineX86ShufflesRecursively(
- {Op}, 0, Op, DemandedMask, {}, Depth, /*HasVarMask*/ false,
+ {Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth,
+ /*HasVarMask*/ false,
/*AllowVarMask*/ true, TLO.DAG, Subtarget);
if (NewShuffle)
return TLO.CombineTo(Op, NewShuffle);
@@ -37589,7 +38737,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// Low bits known zero.
Known.Zero.setLowBits(ShAmt);
- break;
+ return false;
}
case X86ISD::VSRLI: {
unsigned ShAmt = Op.getConstantOperandVal(1);
@@ -37608,7 +38756,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// High bits known zero.
Known.Zero.setHighBits(ShAmt);
- break;
+ return false;
}
case X86ISD::VSRAI: {
SDValue Op0 = Op.getOperand(0);
@@ -37657,7 +38805,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// High bits are known one.
if (Known.One[BitWidth - ShAmt - 1])
Known.One.setHighBits(ShAmt);
- break;
+ return false;
}
case X86ISD::PEXTRB:
case X86ISD::PEXTRW: {
@@ -37723,8 +38871,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
return true;
KnownScl = KnownScl.trunc(VecVT.getScalarSizeInBits());
- Known.One = KnownVec.One & KnownScl.One;
- Known.Zero = KnownVec.Zero & KnownScl.Zero;
+ Known = KnownBits::commonBits(KnownVec, KnownScl);
return false;
}
break;
@@ -37804,34 +38951,83 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
return false;
}
- case X86ISD::BEXTR: {
+ case X86ISD::BEXTR:
+ case X86ISD::BEXTRI: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// Only bottom 16-bits of the control bits are required.
if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
// NOTE: SimplifyDemandedBits won't do this for constants.
- const APInt &Val1 = Cst1->getAPIntValue();
- APInt MaskedVal1 = Val1 & 0xFFFF;
- if (MaskedVal1 != Val1) {
+ uint64_t Val1 = Cst1->getZExtValue();
+ uint64_t MaskedVal1 = Val1 & 0xFFFF;
+ if (Opc == X86ISD::BEXTR && MaskedVal1 != Val1) {
SDLoc DL(Op);
return TLO.CombineTo(
Op, TLO.DAG.getNode(X86ISD::BEXTR, DL, VT, Op0,
TLO.DAG.getConstant(MaskedVal1, DL, VT)));
}
+
+ unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
+ unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);
+
+ // If the length is 0, the result is 0.
+ if (Length == 0) {
+ Known.setAllZero();
+ return false;
+ }
+
+ if ((Shift + Length) <= BitWidth) {
+ APInt DemandedMask = APInt::getBitsSet(BitWidth, Shift, Shift + Length);
+ if (SimplifyDemandedBits(Op0, DemandedMask, Known, TLO, Depth + 1))
+ return true;
+
+ Known = Known.extractBits(Length, Shift);
+ Known = Known.zextOrTrunc(BitWidth);
+ return false;
+ }
+ } else {
+ assert(Opc == X86ISD::BEXTR && "Unexpected opcode!");
+ KnownBits Known1;
+ APInt DemandedMask(APInt::getLowBitsSet(BitWidth, 16));
+ if (SimplifyDemandedBits(Op1, DemandedMask, Known1, TLO, Depth + 1))
+ return true;
+
+ // If the length is 0, replace with 0.
+ KnownBits LengthBits = Known1.extractBits(8, 8);
+ if (LengthBits.isZero())
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
}
- KnownBits Known1;
- APInt DemandedMask(APInt::getLowBitsSet(BitWidth, 16));
- if (SimplifyDemandedBits(Op1, DemandedMask, Known1, TLO, Depth + 1))
+ break;
+ }
+ case X86ISD::PDEP: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ unsigned DemandedBitsLZ = OriginalDemandedBits.countLeadingZeros();
+ APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
+
+ // If the demanded bits has leading zeroes, we don't demand those from the
+ // mask.
+ if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1))
return true;
- // If the length is 0, replace with 0.
- KnownBits LengthBits = Known1.extractBits(8, 8);
- if (LengthBits.isZero())
- return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ // The number of possible 1s in the mask determines the number of LSBs of
+ // operand 0 used. Undemanded bits from the mask don't matter so filter
+ // them before counting.
+ KnownBits Known2;
+ uint64_t Count = (~Known.Zero & LoMask).countPopulation();
+ APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count));
+ if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1))
+ return true;
- break;
+ // Zeroes are retained from the mask, but not ones.
+ Known.One.clearAllBits();
+ // The result will have at least as many trailing zeros as the non-mask
+ // operand since bits can only map to the same or higher bit position.
+ Known.Zero.setLowBits(Known2.countMinTrailingZeros());
+ return false;
}
}
@@ -38242,6 +39438,8 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
// Convert build vector ops to MMX data in the bottom elements.
SmallVector<SDValue, 8> Ops;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
// Broadcast - use (PUNPCKL+)PSHUFW to broadcast single element.
if (Splat) {
if (Splat.isUndef())
@@ -38254,14 +39452,16 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
if (NumElts == 8)
Splat = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx,
- DAG.getConstant(Intrinsic::x86_mmx_punpcklbw, DL, MVT::i32), Splat,
- Splat);
+ DAG.getTargetConstant(Intrinsic::x86_mmx_punpcklbw, DL,
+ TLI.getPointerTy(DAG.getDataLayout())),
+ Splat, Splat);
// Use PSHUFW to repeat 16-bit elements.
unsigned ShufMask = (NumElts > 2 ? 0 : 0x44);
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx,
- DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32),
+ DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL,
+ TLI.getPointerTy(DAG.getDataLayout())),
Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8));
}
Ops.append(NumElts, Splat);
@@ -38277,7 +39477,8 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
(NumOps == 2 ? Intrinsic::x86_mmx_punpckldq
: (NumOps == 4 ? Intrinsic::x86_mmx_punpcklwd
: Intrinsic::x86_mmx_punpcklbw));
- SDValue Intrin = DAG.getConstant(IntrinOp, DL, MVT::i32);
+ SDValue Intrin = DAG.getTargetConstant(
+ IntrinOp, DL, TLI.getPointerTy(DAG.getDataLayout()));
for (unsigned i = 0; i != NumOps; i += 2)
Ops[i / 2] = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, Intrin,
Ops[i], Ops[i + 1]);
@@ -38291,7 +39492,7 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
// a vector/float/double that got truncated/extended/bitcast to/from a scalar
// integer. If so, replace the scalar ops with bool vector equivalents back down
// the chain.
-static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, SDLoc DL,
+static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -38344,6 +39545,10 @@ static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, SDLoc DL,
case ISD::SHL: {
// If we find a suitable source, a SHL becomes a KSHIFTL.
SDValue Src0 = V.getOperand(0);
+ if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) ||
+ ((VT == MVT::v32i1 || VT == MVT::v64i1) && !Subtarget.hasBWI()))
+ break;
+
if (auto *Amt = dyn_cast<ConstantSDNode>(V.getOperand(1)))
if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget))
return DAG.getNode(
@@ -38686,8 +39891,8 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
// Attempt to replace an min/max v8i16/v16i8 horizontal reduction with
// PHMINPOSUW.
-static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// Bail without SSE41.
if (!Subtarget.hasSSE41())
return SDValue();
@@ -38760,9 +39965,8 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
}
// Attempt to replace an all_of/any_of/parity style horizontal reduction with a MOVMSK.
-static SDValue combineHorizontalPredicateResult(SDNode *Extract,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// Bail without SSE2.
if (!Subtarget.hasSSE2())
return SDValue();
@@ -38876,10 +40080,8 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32;
if (BinOp == ISD::XOR) {
- // parity -> (AND (CTPOP(MOVMSK X)), 1)
- SDValue Mask = DAG.getConstant(1, DL, CmpVT);
- SDValue Result = DAG.getNode(ISD::CTPOP, DL, CmpVT, Movmsk);
- Result = DAG.getNode(ISD::AND, DL, CmpVT, Result, Mask);
+ // parity -> (PARITY(MOVMSK X))
+ SDValue Result = DAG.getNode(ISD::PARITY, DL, CmpVT, Movmsk);
return DAG.getZExtOrTrunc(Result, DL, ExtractVT);
}
@@ -39067,10 +40269,12 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
// Handle extract(truncate(x)) for 0'th index.
// TODO: Treat this as a faux shuffle?
// TODO: When can we use this for general indices?
- if (ISD::TRUNCATE == Src.getOpcode() && SrcVT.is128BitVector() && IdxC == 0) {
+ if (ISD::TRUNCATE == Src.getOpcode() && IdxC == 0 &&
+ (SrcVT.getSizeInBits() % 128) == 0) {
Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl);
- Src = DAG.getBitcast(SrcVT, Src);
- return DAG.getNode(N->getOpcode(), dl, VT, Src, Idx);
+ MVT ExtractVT = MVT::getVectorVT(SrcSVT.getSimpleVT(), 128 / SrcEltBits);
+ return DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(ExtractVT, Src),
+ Idx);
}
// Resolve the target shuffle inputs and mask.
@@ -39146,7 +40350,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB);
SrcOp = DAG.getBitcast(SrcVT, SrcOp);
SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp,
- DAG.getIntPtrConstant(SrcIdx, dl));
+ DAG.getTargetConstant(SrcIdx, dl, MVT::i8));
return DAG.getZExtOrTrunc(ExtOp, dl, VT);
}
@@ -39253,8 +40457,8 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
/// Try to convert a vector reduction sequence composed of binops and shuffles
/// into horizontal ops.
-static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller");
// We need at least SSE2 to anything here.
@@ -39262,8 +40466,8 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
return SDValue();
ISD::NodeType Opc;
- SDValue Rdx =
- DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD, ISD::FADD}, true);
+ SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc,
+ {ISD::ADD, ISD::MUL, ISD::FADD}, true);
if (!Rdx)
return SDValue();
@@ -39278,7 +40482,46 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
SDLoc DL(ExtElt);
- // vXi8 reduction - sub 128-bit vector.
+ // vXi8 mul reduction - promote to vXi16 mul reduction.
+ if (Opc == ISD::MUL) {
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (VT != MVT::i8 || NumElts < 4 || !isPowerOf2_32(NumElts))
+ return SDValue();
+ if (VecVT.getSizeInBits() >= 128) {
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts / 2);
+ SDValue Lo = getUnpackl(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT));
+ SDValue Hi = getUnpackh(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT));
+ Lo = DAG.getBitcast(WideVT, Lo);
+ Hi = DAG.getBitcast(WideVT, Hi);
+ Rdx = DAG.getNode(Opc, DL, WideVT, Lo, Hi);
+ while (Rdx.getValueSizeInBits() > 128) {
+ std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL);
+ Rdx = DAG.getNode(Opc, DL, Lo.getValueType(), Lo, Hi);
+ }
+ } else {
+ if (VecVT == MVT::v4i8)
+ Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx,
+ DAG.getUNDEF(MVT::v4i8));
+ Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx,
+ DAG.getUNDEF(MVT::v8i8));
+ Rdx = getUnpackl(DAG, DL, MVT::v16i8, Rdx, DAG.getUNDEF(MVT::v16i8));
+ Rdx = DAG.getBitcast(MVT::v8i16, Rdx);
+ }
+ if (NumElts >= 8)
+ Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx,
+ DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx,
+ {4, 5, 6, 7, -1, -1, -1, -1}));
+ Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx,
+ DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx,
+ {2, 3, -1, -1, -1, -1, -1, -1}));
+ Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx,
+ DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx,
+ {1, -1, -1, -1, -1, -1, -1, -1}));
+ Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
+ }
+
+ // vXi8 add reduction - sub 128-bit vector.
if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) {
if (VecVT == MVT::v4i8) {
// Pad with zero.
@@ -39309,7 +40552,7 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
!isPowerOf2_32(VecVT.getVectorNumElements()))
return SDValue();
- // vXi8 reduction - sum lo/hi halves then use PSADBW.
+ // vXi8 add reduction - sum lo/hi halves then use PSADBW.
if (VT == MVT::i8) {
while (Rdx.getValueSizeInBits() > 128) {
SDValue Lo, Hi;
@@ -39415,7 +40658,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
}
// TODO - Remove this once we can handle the implicit zero-extension of
- // X86ISD::PEXTRW/X86ISD::PEXTRB in combineHorizontalPredicateResult and
+ // X86ISD::PEXTRW/X86ISD::PEXTRB in combinePredicateReduction and
// combineBasicSADPattern.
return SDValue();
}
@@ -39447,14 +40690,15 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return SAD;
// Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK.
- if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))
+ if (SDValue Cmp = combinePredicateReduction(N, DAG, Subtarget))
return Cmp;
// Attempt to replace min/max v8i16/v16i8 reductions with PHMINPOSUW.
- if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
+ if (SDValue MinMax = combineMinMaxReduction(N, DAG, Subtarget))
return MinMax;
- if (SDValue V = combineReductionToHorizontal(N, DAG, Subtarget))
+ // Attempt to optimize ADD/FADD/MUL reductions with HADD, promotion etc..
+ if (SDValue V = combineArithReduction(N, DAG, Subtarget))
return V;
if (SDValue V = scalarizeExtEltFP(N, DAG))
@@ -39578,7 +40822,7 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
if (TValIsAllOnes && FValIsAllZeros)
return DAG.getBitcast(VT, Cond);
- if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT))
+ if (!TLI.isTypeLegal(CondVT))
return SDValue();
// vselect Cond, 111..., X -> or Cond, X
@@ -39901,6 +41145,36 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
}
+ // fold vselect(cond, pshufb(x), pshufb(y)) -> or (pshufb(x), pshufb(y))
+ // by forcing the unselected elements to zero.
+ // TODO: Can we handle more shuffles with this?
+ if (N->getOpcode() == ISD::VSELECT && CondVT.isVector() &&
+ LHS.getOpcode() == X86ISD::PSHUFB && RHS.getOpcode() == X86ISD::PSHUFB &&
+ LHS.hasOneUse() && RHS.hasOneUse()) {
+ MVT SimpleVT = VT.getSimpleVT();
+ bool LHSUnary, RHSUnary;
+ SmallVector<SDValue, 1> LHSOps, RHSOps;
+ SmallVector<int, 64> LHSMask, RHSMask, CondMask;
+ if (createShuffleMaskFromVSELECT(CondMask, Cond) &&
+ getTargetShuffleMask(LHS.getNode(), SimpleVT, true, LHSOps, LHSMask,
+ LHSUnary) &&
+ getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask,
+ RHSUnary)) {
+ int NumElts = VT.getVectorNumElements();
+ for (int i = 0; i != NumElts; ++i) {
+ if (CondMask[i] < NumElts)
+ RHSMask[i] = 0x80;
+ else
+ LHSMask[i] = 0x80;
+ }
+ LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0),
+ getConstVector(LHSMask, SimpleVT, DAG, DL, true));
+ RHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, RHS.getOperand(0),
+ getConstVector(RHSMask, SimpleVT, DAG, DL, true));
+ return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
+ }
+ }
+
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
@@ -40127,13 +41401,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineSelectOfTwoConstants(N, DAG))
return V;
- // Canonicalize max and min:
- // (x > y) ? x : y -> (x >= y) ? x : y
- // (x < y) ? x : y -> (x <= y) ? x : y
+ // Canonicalize min/max:
+ // (x > 0) ? x : 0 -> (x >= 0) ? x : 0
+ // (x < -1) ? x : -1 -> (x <= -1) ? x : -1
// This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates
- // the need for an extra compare
- // against zero. e.g.
- // (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0
+ // the need for an extra compare against zero. e.g.
+ // (a - b) > 0 : (a - b) ? 0 -> (a - b) >= 0 : (a - b) ? 0
// subl %esi, %edi
// testl %edi, %edi
// movl $0, %eax
@@ -40142,142 +41415,27 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// xorl %eax, %eax
// subl %esi, $edi
// cmovsl %eax, %edi
+ //
+ // We can also canonicalize
+ // (x s> 1) ? x : 1 -> (x s>= 1) ? x : 1 -> (x s> 0) ? x : 1
+ // (x u> 1) ? x : 1 -> (x u>= 1) ? x : 1 -> (x != 0) ? x : 1
+ // This allows the use of a test instruction for the compare.
if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
Cond.hasOneUse() &&
- DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
- DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+ LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
- switch (CC) {
- default: break;
- case ISD::SETLT:
- case ISD::SETGT: {
- ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
+ if ((CC == ISD::SETGT && (isNullConstant(RHS) || isOneConstant(RHS))) ||
+ (CC == ISD::SETLT && isAllOnesConstant(RHS))) {
+ ISD::CondCode NewCC = CC == ISD::SETGT ? ISD::SETGE : ISD::SETLE;
Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(),
Cond.getOperand(0), Cond.getOperand(1), NewCC);
return DAG.getSelect(DL, VT, Cond, LHS, RHS);
}
- }
- }
-
- // Match VSELECTs into subs with unsigned saturation.
- if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
- // psubus is available in SSE2 for i8 and i16 vectors.
- Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 &&
- isPowerOf2_32(VT.getVectorNumElements()) &&
- (VT.getVectorElementType() == MVT::i8 ||
- VT.getVectorElementType() == MVT::i16)) {
- ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
- // Check if one of the arms of the VSELECT is a zero vector. If it's on the
- // left side invert the predicate to simplify logic below.
- SDValue Other;
- if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
- Other = RHS;
- CC = ISD::getSetCCInverse(CC, VT.getVectorElementType());
- } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
- Other = LHS;
- }
-
- if (Other.getNode() && Other->getNumOperands() == 2 &&
- Other->getOperand(0) == Cond.getOperand(0)) {
- SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
- SDValue CondRHS = Cond->getOperand(1);
-
- // Look for a general sub with unsigned saturation first.
- // x >= y ? x-y : 0 --> subus x, y
- // x > y ? x-y : 0 --> subus x, y
- if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
- Other->getOpcode() == ISD::SUB && OpRHS == CondRHS)
- return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
-
- if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
- if (isa<BuildVectorSDNode>(CondRHS)) {
- // If the RHS is a constant we have to reverse the const
- // canonicalization.
- // x > C-1 ? x+-C : 0 --> subus x, C
- auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
- return (!Op && !Cond) ||
- (Op && Cond &&
- Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
- };
- if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
- /*AllowUndefs*/ true)) {
- OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- OpRHS);
- return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
- }
-
- // Another special case: If C was a sign bit, the sub has been
- // canonicalized into a xor.
- // FIXME: Would it be better to use computeKnownBits to determine
- // whether it's safe to decanonicalize the xor?
- // x s< 0 ? x^C : 0 --> subus x, C
- if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
- if (CC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
- OpRHSConst->getAPIntValue().isSignMask()) {
- // Note that we have to rebuild the RHS constant here to ensure we
- // don't rely on particular values of undef lanes.
- OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
- return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
- }
- }
- }
- }
- }
- }
-
- // Match VSELECTs into add with unsigned saturation.
- if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
- // paddus is available in SSE2 for i8 and i16 vectors.
- Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 &&
- isPowerOf2_32(VT.getVectorNumElements()) &&
- (VT.getVectorElementType() == MVT::i8 ||
- VT.getVectorElementType() == MVT::i16)) {
- ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
- SDValue CondLHS = Cond->getOperand(0);
- SDValue CondRHS = Cond->getOperand(1);
-
- // Check if one of the arms of the VSELECT is vector with all bits set.
- // If it's on the left side invert the predicate to simplify logic below.
- SDValue Other;
- if (ISD::isBuildVectorAllOnes(LHS.getNode())) {
- Other = RHS;
- CC = ISD::getSetCCInverse(CC, VT.getVectorElementType());
- } else if (ISD::isBuildVectorAllOnes(RHS.getNode())) {
- Other = LHS;
- }
-
- if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
- SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
-
- // Canonicalize condition operands.
- if (CC == ISD::SETUGE) {
- std::swap(CondLHS, CondRHS);
- CC = ISD::SETULE;
- }
-
- // We can test against either of the addition operands.
- // x <= x+y ? x+y : ~0 --> addus x, y
- // x+y >= x ? x+y : ~0 --> addus x, y
- if (CC == ISD::SETULE && Other == CondRHS &&
- (OpLHS == CondLHS || OpRHS == CondLHS))
- return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
-
- if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
- CondLHS == OpLHS) {
- // If the RHS is a constant we have to reverse the const
- // canonicalization.
- // x > ~C ? x+C : ~0 --> addus x, C
- auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
- return Cond->getAPIntValue() == ~Op->getAPIntValue();
- };
- if (CC == ISD::SETULE &&
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
- return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
- }
+ if (CC == ISD::SETUGT && isOneConstant(RHS)) {
+ ISD::CondCode NewCC = ISD::SETUGE;
+ Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(),
+ Cond.getOperand(0), Cond.getOperand(1), NewCC);
+ return DAG.getSelect(DL, VT, Cond, LHS, RHS);
}
}
@@ -40308,10 +41466,18 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return V;
// select(~Cond, X, Y) -> select(Cond, Y, X)
- if (CondVT.getScalarType() != MVT::i1)
+ if (CondVT.getScalarType() != MVT::i1) {
if (SDValue CondNot = IsNOT(Cond, DAG))
return DAG.getNode(N->getOpcode(), DL, VT,
DAG.getBitcast(CondVT, CondNot), RHS, LHS);
+ // pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the signbit.
+ if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse() &&
+ ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) {
+ Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT,
+ DAG.getConstant(0, DL, CondVT), Cond.getOperand(0));
+ return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);
+ }
+ }
// Try to optimize vXi1 selects if both operands are either all constants or
// bitcasts from scalar integer type. In that case we can convert the operands
@@ -41928,73 +43094,115 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue combineVectorPackWithShuffle(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
unsigned Opcode = N->getOpcode();
- assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) &&
- "Unexpected pack opcode");
+ assert((X86ISD::HADD == Opcode || X86ISD::FHADD == Opcode ||
+ X86ISD::HSUB == Opcode || X86ISD::FHSUB == Opcode ||
+ X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) &&
+ "Unexpected hadd/hsub/pack opcode");
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- unsigned NumDstElts = VT.getVectorNumElements();
+ EVT SrcVT = N0.getValueType();
- // Attempt to fold PACK(LOSUBVECTOR(SHUFFLE(X)),HISUBVECTOR(SHUFFLE(X)))
- // to SHUFFLE(PACK(LOSUBVECTOR(X),HISUBVECTOR(X))), this is mainly for
+ // Attempt to fold HOP(LOSUBVECTOR(SHUFFLE(X)),HISUBVECTOR(SHUFFLE(X)))
+ // to SHUFFLE(HOP(LOSUBVECTOR(X),HISUBVECTOR(X))), this is mainly for
// truncation trees that help us avoid lane crossing shuffles.
// TODO: There's a lot more we can do for PACK/HADD style shuffle combines.
+ // TODO: We don't handle vXf64 shuffles yet.
if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N0.getConstantOperandAPInt(1) == 0 &&
- N1.getConstantOperandAPInt(1) == (NumDstElts / 2) &&
+ N1.getConstantOperandAPInt(1) == SrcVT.getVectorNumElements() &&
N0.getOperand(0) == N1.getOperand(0) && VT.is128BitVector() &&
- N0.getOperand(0).getValueType().is256BitVector()) {
+ N0.getOperand(0).getValueType().is256BitVector() &&
+ SrcVT.getScalarSizeInBits() <= 32) {
// TODO - support target/faux shuffles.
SDValue Vec = peekThroughBitcasts(N0.getOperand(0));
if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec)) {
- // To keep the PACK LHS/RHS coherency, we must be able to scale the unary
+ // To keep the HOP LHS/RHS coherency, we must be able to scale the unary
// shuffle to a vXi64 width - we can probably relax this in the future.
SmallVector<int, 4> ShuffleMask;
if (SVN->getOperand(1).isUndef() &&
scaleShuffleElements(SVN->getMask(), 4, ShuffleMask)) {
SDLoc DL(N);
SDValue Lo, Hi;
+ MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
std::tie(Lo, Hi) = DAG.SplitVector(SVN->getOperand(0), DL);
Lo = DAG.getBitcast(N0.getValueType(), Lo);
Hi = DAG.getBitcast(N1.getValueType(), Hi);
SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
- Res = DAG.getBitcast(MVT::v4i32, Res);
- Res = DAG.getVectorShuffle(MVT::v4i32, DL, Res, Res, ShuffleMask);
+ Res = DAG.getBitcast(ShufVT, Res);
+ Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ShuffleMask);
return DAG.getBitcast(VT, Res);
}
}
}
- // Attempt to fold PACK(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(PACK(X,Y)).
+ // Attempt to fold HOP(SHUFFLE(X),SHUFFLE(Y)) -> SHUFFLE(HOP(X,Y)).
+ // TODO: Merge with binary shuffle folds below.
+ if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) {
+ int PostShuffle[4] = {0, 1, 2, 3};
+
+ // If the op is an unary shuffle that can scale to v2x64,
+ // then we can perform this as a v4x32 post shuffle.
+ auto AdjustOp = [&](SDValue V, int Offset) {
+ auto *SVN = dyn_cast<ShuffleVectorSDNode>(V);
+ SmallVector<int, 2> ScaledMask;
+ if (!SVN || !SVN->getOperand(1).isUndef() ||
+ !scaleShuffleElements(SVN->getMask(), 2, ScaledMask) ||
+ !N->isOnlyUserOf(V.getNode()))
+ return SDValue();
+ PostShuffle[Offset + 0] = ScaledMask[0] < 0 ? -1 : Offset + ScaledMask[0];
+ PostShuffle[Offset + 1] = ScaledMask[1] < 0 ? -1 : Offset + ScaledMask[1];
+ return SVN->getOperand(0);
+ };
+
+ SDValue Src0 = AdjustOp(N0, 0);
+ SDValue Src1 = AdjustOp(N1, 2);
+ if (Src0 || Src1) {
+ Src0 = Src0 ? Src0 : N0;
+ Src1 = Src1 ? Src1 : N1;
+ SDLoc DL(N);
+ MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
+ SDValue Res = DAG.getNode(Opcode, DL, VT, Src0, Src1);
+ Res = DAG.getBitcast(ShufVT, Res);
+ Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, PostShuffle);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+
+ // Attempt to fold HOP(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(HOP(X,Y)).
// TODO: Relax shuffle scaling to support sub-128-bit subvector shuffles.
- if (VT.is256BitVector()) {
- if (auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(N0)) {
- if (auto *SVN1 = dyn_cast<ShuffleVectorSDNode>(N1)) {
- SmallVector<int, 2> ShuffleMask0, ShuffleMask1;
- if (scaleShuffleElements(SVN0->getMask(), 2, ShuffleMask0) &&
- scaleShuffleElements(SVN1->getMask(), 2, ShuffleMask1)) {
- SDValue Op00 = SVN0->getOperand(0);
- SDValue Op01 = SVN0->getOperand(1);
- SDValue Op10 = SVN1->getOperand(0);
- SDValue Op11 = SVN1->getOperand(1);
- if ((Op00 == Op11) && (Op01 == Op10)) {
- std::swap(Op10, Op11);
- ShuffleVectorSDNode::commuteMask(ShuffleMask1);
- }
- if ((Op00 == Op10) && (Op01 == Op11)) {
- SmallVector<int, 4> ShuffleMask;
- ShuffleMask.append(ShuffleMask0.begin(), ShuffleMask0.end());
- ShuffleMask.append(ShuffleMask1.begin(), ShuffleMask1.end());
- SDLoc DL(N);
- SDValue Res = DAG.getNode(Opcode, DL, VT, Op00, Op01);
- Res = DAG.getBitcast(MVT::v4i64, Res);
- Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, ShuffleMask);
- return DAG.getBitcast(VT, Res);
- }
+ if (VT.is256BitVector() && Subtarget.hasInt256()) {
+ SmallVector<int> Mask0, Mask1;
+ SmallVector<SDValue> Ops0, Ops1;
+ if (getTargetShuffleInputs(N0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) &&
+ getTargetShuffleInputs(N1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) &&
+ !Ops0.empty() && !Ops1.empty()) {
+ SDValue Op00 = Ops0.front(), Op01 = Ops0.back();
+ SDValue Op10 = Ops1.front(), Op11 = Ops1.back();
+ SmallVector<int, 2> ShuffleMask0, ShuffleMask1;
+ if (Op00.getValueType() == SrcVT && Op01.getValueType() == SrcVT &&
+ Op11.getValueType() == SrcVT && Op11.getValueType() == SrcVT &&
+ scaleShuffleElements(Mask0, 2, ShuffleMask0) &&
+ scaleShuffleElements(Mask1, 2, ShuffleMask1)) {
+ if ((Op00 == Op11) && (Op01 == Op10)) {
+ std::swap(Op10, Op11);
+ ShuffleVectorSDNode::commuteMask(ShuffleMask1);
+ }
+ if ((Op00 == Op10) && (Op01 == Op11)) {
+ SmallVector<int, 4> ShuffleMask;
+ ShuffleMask.append(ShuffleMask0.begin(), ShuffleMask0.end());
+ ShuffleMask.append(ShuffleMask1.begin(), ShuffleMask1.end());
+ SDLoc DL(N);
+ MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
+ SDValue Res = DAG.getNode(Opcode, DL, VT, Op00, Op01);
+ Res = DAG.getBitcast(ShufVT, Res);
+ Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ShuffleMask);
+ return DAG.getBitcast(VT, Res);
}
}
}
@@ -42077,7 +43285,7 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
}
// Try to fold PACK(SHUFFLE(),SHUFFLE()) -> SHUFFLE(PACK()).
- if (SDValue V = combineVectorPackWithShuffle(N, DAG))
+ if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget))
return V;
// Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
@@ -42099,6 +43307,28 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
}
}
+ // Try to fold PACK(EXTEND(X),EXTEND(Y)) -> CONCAT(X,Y) subvectors.
+ if (VT.is128BitVector()) {
+ unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Src0, Src1;
+ if (N0.getOpcode() == ExtOpc &&
+ N0.getOperand(0).getValueType().is64BitVector() &&
+ N0.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) {
+ Src0 = N0.getOperand(0);
+ }
+ if (N1.getOpcode() == ExtOpc &&
+ N1.getOperand(0).getValueType().is64BitVector() &&
+ N1.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) {
+ Src1 = N1.getOperand(0);
+ }
+ if ((Src0 || N0.isUndef()) && (Src1 || N1.isUndef())) {
+ assert((Src0 || Src1) && "Found PACK(UNDEF,UNDEF)");
+ Src0 = Src0 ? Src0 : DAG.getUNDEF(Src1.getValueType());
+ Src1 = Src1 ? Src1 : DAG.getUNDEF(Src0.getValueType());
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1);
+ }
+ }
+
// Attempt to combine as shuffle.
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
@@ -42107,6 +43337,20 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ assert((X86ISD::HADD == N->getOpcode() || X86ISD::FHADD == N->getOpcode() ||
+ X86ISD::HSUB == N->getOpcode() || X86ISD::FHSUB == N->getOpcode()) &&
+ "Unexpected horizontal add/sub opcode");
+
+ // Try to fold HOP(SHUFFLE(),SHUFFLE()) -> SHUFFLE(HOP()).
+ if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget))
+ return V;
+
+ return SDValue();
+}
+
static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -42735,74 +43979,6 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
return SDValue();
}
-// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.
-// Turn it into series of XORs and a setnp.
-static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- EVT VT = N->getValueType(0);
-
- // We only support 64-bit and 32-bit. 64-bit requires special handling
- // unless the 64-bit popcnt instruction is legal.
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT))
- return SDValue();
-
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- // LHS needs to be a single use CTPOP.
- if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse())
- return SDValue();
-
- // RHS needs to be 1.
- if (!isOneConstant(N1))
- return SDValue();
-
- SDLoc DL(N);
- SDValue X = N0.getOperand(0);
-
- // If this is 64-bit, its always best to xor the two 32-bit pieces together
- // even if we have popcnt.
- if (VT == MVT::i64) {
- SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
- DAG.getNode(ISD::SRL, DL, VT, X,
- DAG.getConstant(32, DL, MVT::i8)));
- SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
- X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi);
- // Generate a 32-bit parity idiom. This will bring us back here if we need
- // to expand it too.
- SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32,
- DAG.getNode(ISD::CTPOP, DL, MVT::i32, X),
- DAG.getConstant(1, DL, MVT::i32));
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Parity);
- }
- assert(VT == MVT::i32 && "Unexpected VT!");
-
- // Xor the high and low 16-bits together using a 32-bit operation.
- SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X,
- DAG.getConstant(16, DL, MVT::i8));
- X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16);
-
- // Finally xor the low 2 bytes together and use a 8-bit flag setting xor.
- // This should allow an h-reg to be used to save a shift.
- // FIXME: We only get an h-reg in 32-bit mode.
- SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
- DAG.getNode(ISD::SRL, DL, VT, X,
- DAG.getConstant(8, DL, MVT::i8)));
- SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
- SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32);
- SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);
-
- // Copy the inverse of the parity flag into a register with setcc.
- SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
- // Zero extend to original type.
- return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), Setnp);
-}
-
-
// Look for (and (bitcast (vXi1 (concat_vectors (vYi1 setcc), undef,))), C)
// Where C is a mask containing the same number of bits as the setcc and
// where the setcc will freely 0 upper bits of k-register. We can replace the
@@ -42894,10 +44070,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
}
- // This must be done before legalization has expanded the ctpop.
- if (SDValue V = combineParity(N, DAG, Subtarget))
- return V;
-
// Match all-of bool scalar reductions into a bitcast/movmsk + cmp.
// TODO: Support multiple SrcOps.
if (VT == MVT::i1) {
@@ -42967,7 +44139,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (VT == SrcVecVT.getScalarType() &&
N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) &&
getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) &&
- llvm::all_of(EltBits, [](APInt M) {
+ llvm::all_of(EltBits, [](const APInt &M) {
return M.isNullValue() || M.isAllOnesValue();
})) {
unsigned NumElts = SrcVecVT.getVectorNumElements();
@@ -42986,6 +44158,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue Shuffle = combineX86ShufflesRecursively(
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
+ X86::MaxShuffleCombineDepth,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
N->getOperand(0).getOperand(1));
@@ -43653,14 +44826,13 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
unsigned NumElems = VT.getVectorNumElements();
EVT ScalarVT = VT.getVectorElementType();
- if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
- NumElems >= 2 && isPowerOf2_32(NumElems)))
+ if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2))
return SDValue();
// InScalarVT is the intermediate type in AVG pattern and it should be greater
// than the original input type (i8/i16).
EVT InScalarVT = InVT.getVectorElementType();
- if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
+ if (InScalarVT.getFixedSizeInBits() <= ScalarVT.getFixedSizeInBits())
return SDValue();
if (!Subtarget.hasSSE2())
@@ -43688,8 +44860,8 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
};
// Check if each element of the vector is right-shifted by one.
- auto LHS = In.getOperand(0);
- auto RHS = In.getOperand(1);
+ SDValue LHS = In.getOperand(0);
+ SDValue RHS = In.getOperand(1);
if (!IsConstVectorInRange(RHS, 1, 1))
return SDValue();
if (LHS.getOpcode() != ISD::ADD)
@@ -43705,6 +44877,29 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops);
};
+ auto AVGSplitter = [&](SDValue Op0, SDValue Op1) {
+ // Pad to a power-of-2 vector, split+apply and extract the original vector.
+ unsigned NumElemsPow2 = PowerOf2Ceil(NumElems);
+ EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2);
+ if (NumElemsPow2 != NumElems) {
+ SmallVector<SDValue, 32> Ops0(NumElemsPow2, DAG.getUNDEF(ScalarVT));
+ SmallVector<SDValue, 32> Ops1(NumElemsPow2, DAG.getUNDEF(ScalarVT));
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Idx = DAG.getIntPtrConstant(i, DL);
+ Ops0[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op0, Idx);
+ Ops1[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op1, Idx);
+ }
+ Op0 = DAG.getBuildVector(Pow2VT, DL, Ops0);
+ Op1 = DAG.getBuildVector(Pow2VT, DL, Ops1);
+ }
+ SDValue Res =
+ SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, {Op0, Op1}, AVGBuilder);
+ if (NumElemsPow2 == NumElems)
+ return Res;
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
+ };
+
// Take care of the case when one of the operands is a constant vector whose
// element is in the range [1, 256].
if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
@@ -43715,9 +44910,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
SDValue VecOnes = DAG.getConstant(1, DL, InVT);
Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
- return SplitOpsAndApply(DAG, Subtarget, DL, VT,
- { Operands[0].getOperand(0), Operands[1] },
- AVGBuilder);
+ return AVGSplitter(Operands[0].getOperand(0), Operands[1]);
}
// Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
@@ -43764,8 +44957,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
}
// The pattern is detected, emit X86ISD::AVG instruction(s).
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]},
- AVGBuilder);
+ return AVGSplitter(Operands[0], Operands[1]);
}
return SDValue();
@@ -43798,7 +44990,8 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
unsigned HalfOffset = 16;
SDValue Ptr1 = Ld->getBasePtr();
- SDValue Ptr2 = DAG.getMemBasePlusOffset(Ptr1, HalfOffset, dl);
+ SDValue Ptr2 =
+ DAG.getMemBasePlusOffset(Ptr1, TypeSize::Fixed(HalfOffset), dl);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
NumElems / 2);
SDValue Load1 =
@@ -43832,6 +45025,29 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
}
}
+ // If we also broadcast this as a subvector to a wider type, then just extract
+ // the lowest subvector.
+ if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() &&
+ (RegVT.is128BitVector() || RegVT.is256BitVector())) {
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Chain = Ld->getChain();
+ for (SDNode *User : Ptr->uses()) {
+ if (User != N && User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
+ cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
+ cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
+ cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
+ MemVT.getSizeInBits() &&
+ !User->hasAnyUseOfValue(1) &&
+ User->getValueSizeInBits(0).getFixedSize() >
+ RegVT.getFixedSizeInBits()) {
+ SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
+ RegVT.getSizeInBits());
+ Extract = DAG.getBitcast(RegVT, Extract);
+ return DCI.CombineTo(N, Extract, SDValue(User, 1));
+ }
+ }
+ }
+
// Cast ptr32 and ptr64 pointers to the default address space before a load.
unsigned AddrSpace = Ld->getAddressSpace();
if (AddrSpace == X86AS::PTR64 || AddrSpace == X86AS::PTR32_SPTR ||
@@ -43873,7 +45089,7 @@ static int getOneTrueElt(SDValue V) {
auto *ConstNode = dyn_cast<ConstantSDNode>(Op);
if (!ConstNode)
return -1;
- if (ConstNode->getAPIntValue().isAllOnesValue()) {
+ if (ConstNode->getAPIntValue().countTrailingOnes() >= 1) {
// If we already found a one, this is too many.
if (TrueIndex >= 0)
return -1;
@@ -43889,7 +45105,8 @@ static int getOneTrueElt(SDValue V) {
/// scalar element, and the alignment for the scalar memory access.
static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
SelectionDAG &DAG, SDValue &Addr,
- SDValue &Index, unsigned &Alignment) {
+ SDValue &Index, Align &Alignment,
+ unsigned &Offset) {
int TrueMaskElt = getOneTrueElt(MaskedOp->getMask());
if (TrueMaskElt < 0)
return false;
@@ -43897,14 +45114,17 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
// Get the address of the one scalar element that is specified by the mask
// using the appropriate offset from the base pointer.
EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType();
+ Offset = 0;
Addr = MaskedOp->getBasePtr();
if (TrueMaskElt != 0) {
- unsigned Offset = TrueMaskElt * EltVT.getStoreSize();
- Addr = DAG.getMemBasePlusOffset(Addr, Offset, SDLoc(MaskedOp));
+ Offset = TrueMaskElt * EltVT.getStoreSize();
+ Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::Fixed(Offset),
+ SDLoc(MaskedOp));
}
Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp));
- Alignment = MinAlign(MaskedOp->getAlignment(), EltVT.getStoreSize());
+ Alignment = commonAlignment(MaskedOp->getOriginalAlign(),
+ EltVT.getStoreSize());
return true;
}
@@ -43914,15 +45134,17 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
/// mask have already been optimized in IR, so we don't bother with those here.
static SDValue
reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
assert(ML->isUnindexed() && "Unexpected indexed masked load!");
// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
// However, some target hooks may need to be added to know when the transform
// is profitable. Endianness would also have to be considered.
SDValue Addr, VecIndex;
- unsigned Alignment;
- if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment))
+ Align Alignment;
+ unsigned Offset;
+ if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment, Offset))
return SDValue();
// Load the one scalar element that is specified by the mask using the
@@ -43930,13 +45152,25 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
SDLoc DL(ML);
EVT VT = ML->getValueType(0);
EVT EltVT = VT.getVectorElementType();
+
+ EVT CastVT = VT;
+ if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
+ EltVT = MVT::f64;
+ CastVT =
+ EVT::getVectorVT(*DAG.getContext(), EltVT, VT.getVectorNumElements());
+ }
+
SDValue Load =
- DAG.getLoad(EltVT, DL, ML->getChain(), Addr, ML->getPointerInfo(),
+ DAG.getLoad(EltVT, DL, ML->getChain(), Addr,
+ ML->getPointerInfo().getWithOffset(Offset),
Alignment, ML->getMemOperand()->getFlags());
+ SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru());
+
// Insert the loaded element into the appropriate place in the vector.
- SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
- ML->getPassThru(), Load, VecIndex);
+ SDValue Insert =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, CastVT, PassThru, Load, VecIndex);
+ Insert = DAG.getBitcast(VT, Insert);
return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
}
@@ -43999,7 +45233,8 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
return SDValue();
if (Mld->getExtensionType() == ISD::NON_EXTLOAD) {
- if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI))
+ if (SDValue ScalarLoad =
+ reduceMaskedLoadToScalarLoad(Mld, DAG, DCI, Subtarget))
return ScalarLoad;
// TODO: Do some AVX512 subsets benefit from this transform?
@@ -44036,25 +45271,35 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
/// Note: It is expected that the degenerate cases of an all-zeros or all-ones
/// mask have already been optimized in IR, so we don't bother with those here.
static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
// However, some target hooks may need to be added to know when the transform
// is profitable. Endianness would also have to be considered.
SDValue Addr, VecIndex;
- unsigned Alignment;
- if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment))
+ Align Alignment;
+ unsigned Offset;
+ if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment, Offset))
return SDValue();
// Extract the one scalar element that is actually being stored.
SDLoc DL(MS);
- EVT VT = MS->getValue().getValueType();
+ SDValue Value = MS->getValue();
+ EVT VT = Value.getValueType();
EVT EltVT = VT.getVectorElementType();
- SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
- MS->getValue(), VecIndex);
+ if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
+ EltVT = MVT::f64;
+ EVT CastVT =
+ EVT::getVectorVT(*DAG.getContext(), EltVT, VT.getVectorNumElements());
+ Value = DAG.getBitcast(CastVT, Value);
+ }
+ SDValue Extract =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Value, VecIndex);
// Store that element at the appropriate offset from the base pointer.
- return DAG.getStore(MS->getChain(), DL, Extract, Addr, MS->getPointerInfo(),
+ return DAG.getStore(MS->getChain(), DL, Extract, Addr,
+ MS->getPointerInfo().getWithOffset(Offset),
Alignment, MS->getMemOperand()->getFlags());
}
@@ -44072,7 +45317,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
if (Mst->isTruncatingStore())
return SDValue();
- if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
+ if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget))
return ScalarStore;
// If the mask value has been legalized to a non-boolean vector, try to
@@ -44133,17 +45378,21 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR &&
StoredVal.getOperand(0).getValueType() == MVT::i8) {
- return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0),
+ SDValue Val = StoredVal.getOperand(0);
+ // We must store zeros to the unused bits.
+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
+ return DAG.getStore(St->getChain(), dl, Val,
St->getBasePtr(), St->getPointerInfo(),
St->getOriginalAlign(),
St->getMemOperand()->getFlags());
}
// Widen v2i1/v4i1 stores to v8i1.
- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
Subtarget.hasAVX512()) {
unsigned NumConcats = 8 / VT.getVectorNumElements();
- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
+ // We must store zeros to the unused bits.
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
Ops[0] = StoredVal;
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
@@ -44165,7 +45414,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
Hi = combinevXi1ConstantToInteger(Hi, DAG);
SDValue Ptr0 = St->getBasePtr();
- SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 4, dl);
+ SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(4), dl);
SDValue Ch0 =
DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(),
@@ -44244,6 +45493,36 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
VT, St->getMemOperand(), DAG);
}
+ // Try to fold a extract_element(VTRUNC) pattern into a truncating store.
+ if (!St->isTruncatingStore() && StoredVal.hasOneUse()) {
+ auto IsExtractedElement = [](SDValue V) {
+ if (V.getOpcode() == ISD::TRUNCATE && V.getOperand(0).hasOneUse())
+ V = V.getOperand(0);
+ unsigned Opc = V.getOpcode();
+ if (Opc == ISD::EXTRACT_VECTOR_ELT || Opc == X86ISD::PEXTRW) {
+ if (V.getOperand(0).hasOneUse() && isNullConstant(V.getOperand(1)))
+ return V.getOperand(0);
+ }
+ return SDValue();
+ };
+ if (SDValue Extract = IsExtractedElement(StoredVal)) {
+ SDValue Trunc = peekThroughOneUseBitcasts(Extract);
+ if (Trunc.getOpcode() == X86ISD::VTRUNC) {
+ SDValue Src = Trunc.getOperand(0);
+ MVT DstVT = Trunc.getSimpleValueType();
+ MVT SrcVT = Src.getSimpleValueType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned NumTruncBits = DstVT.getScalarSizeInBits() * NumSrcElts;
+ MVT TruncVT = MVT::getVectorVT(DstVT.getScalarType(), NumSrcElts);
+ if (NumTruncBits == VT.getSizeInBits() &&
+ TLI.isTruncStoreLegal(SrcVT, TruncVT)) {
+ return DAG.getTruncStore(St->getChain(), dl, Src, St->getBasePtr(),
+ TruncVT, St->getMemOperand());
+ }
+ }
+ }
+ }
+
// Optimize trunc store (of multiple scalars) to shuffle and store.
// First, pack all of the elements in one place. Next, store to memory
// in fewer chunks.
@@ -44386,8 +45665,9 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
/// A horizontal-op B, for some already available A and B, and if so then LHS is
/// set to A, RHS to B, and the routine returns 'true'.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
- const X86Subtarget &Subtarget, bool IsCommutative,
+static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget,
+ bool IsCommutative,
SmallVectorImpl<int> &PostShuffleMask) {
// If either operand is undef, bail out. The binop should be simplified.
if (LHS.isUndef() || RHS.isUndef())
@@ -44481,12 +45761,6 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
RMask.push_back(i);
}
- // Avoid 128-bit lane crossing if pre-AVX2 and FP (integer will split).
- if (!Subtarget.hasAVX2() && VT.isFloatingPoint() &&
- (isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), LMask) ||
- isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), RMask)))
- return false;
-
// If A and B occur in reverse order in RHS, then canonicalize by commuting
// RHS operands and shuffle mask.
if (A != C) {
@@ -44541,23 +45815,39 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
}
}
- LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
- RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
+ SDValue NewLHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
+ SDValue NewRHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
bool IsIdentityPostShuffle =
isSequentialOrUndefInRange(PostShuffleMask, 0, NumElts, 0);
if (IsIdentityPostShuffle)
PostShuffleMask.clear();
+ // Avoid 128-bit multi lane shuffles if pre-AVX2 and FP (integer will split).
+ if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() &&
+ isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask))
+ return false;
+
+ // If the source nodes are already used in HorizOps then always accept this.
+ // Shuffle folding should merge these back together.
+ bool FoundHorizLHS = llvm::any_of(NewLHS->uses(), [&](SDNode *User) {
+ return User->getOpcode() == HOpcode && User->getValueType(0) == VT;
+ });
+ bool FoundHorizRHS = llvm::any_of(NewRHS->uses(), [&](SDNode *User) {
+ return User->getOpcode() == HOpcode && User->getValueType(0) == VT;
+ });
+ bool ForceHorizOp = FoundHorizLHS && FoundHorizRHS;
+
// Assume a SingleSource HOP if we only shuffle one input and don't need to
// shuffle the result.
- if (!shouldUseHorizontalOp(LHS == RHS &&
+ if (!ForceHorizOp &&
+ !shouldUseHorizontalOp(NewLHS == NewRHS &&
(NumShuffles < 2 || !IsIdentityPostShuffle),
DAG, Subtarget))
return false;
- LHS = DAG.getBitcast(VT, LHS);
- RHS = DAG.getBitcast(VT, RHS);
+ LHS = DAG.getBitcast(VT, NewLHS);
+ RHS = DAG.getBitcast(VT, NewRHS);
return true;
}
@@ -44575,7 +45865,8 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
SmallVector<int, 8> PostShuffleMask;
if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
- isHorizontalBinOp(LHS, RHS, DAG, Subtarget, IsFadd, PostShuffleMask)) {
+ isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsFadd,
+ PostShuffleMask)) {
SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
if (!PostShuffleMask.empty())
HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,
@@ -44583,8 +45874,6 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
return HorizBinOp;
}
- // NOTE: isHorizontalBinOp may have changed LHS/RHS variables.
-
return SDValue();
}
@@ -44722,7 +46011,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
EVT OutSVT = OutVT.getVectorElementType();
EVT InSVT = InVT.getVectorElementType();
- if (!((InSVT == MVT::i32 || InSVT == MVT::i64) &&
+ if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
(OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
NumElems >= 8))
return SDValue();
@@ -44784,8 +46073,13 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
// there's no harm in trying pack.
if (Subtarget.hasAVX512() &&
!(!Subtarget.useAVX512Regs() && VT.is256BitVector() &&
- InVT.is512BitVector()))
+ InVT.is512BitVector())) {
+ // PACK should still be worth it for 128-bit vectors if the sources were
+ // originally concatenated from subvectors.
+ SmallVector<SDValue> ConcatOps;
+ if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps))
return SDValue();
+ }
unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16);
unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;
@@ -44807,9 +46101,23 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits())
return SDValue();
- if (NumSignBits > (InSVT.getSizeInBits() - NumPackedSignBits))
+ unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits;
+ if (NumSignBits > MinSignBits)
return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);
+ // If we have a srl that only generates signbits that we will discard in
+ // the truncation then we can use PACKSS by converting the srl to a sra.
+ // SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
+ if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode()))
+ if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
+ In, APInt::getAllOnesValue(VT.getVectorNumElements()))) {
+ if (*ShAmt == MinSignBits) {
+ SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops());
+ return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG,
+ Subtarget);
+ }
+ }
+
return SDValue();
}
@@ -46127,7 +47435,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- EVT InVT = N0.getValueType();
SDLoc DL(N);
// (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
@@ -46156,23 +47463,17 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
return V;
- if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
- isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
- // Invert and sign-extend a boolean is the same as zero-extend and subtract
- // 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently
- // lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1.
- // sext (xor Bool, -1) --> sub (zext Bool), 1
- SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
- return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT));
- }
-
if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
return V;
- if (VT.isVector())
+ if (VT.isVector()) {
if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget))
return R;
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
+ }
+
if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
return NewAdd;
@@ -46191,14 +47492,23 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
if (!TLI.isTypeLegal(VT))
return SDValue();
- EVT ScalarVT = VT.getScalarType();
- if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
- return SDValue();
-
SDValue A = N->getOperand(IsStrict ? 1 : 0);
SDValue B = N->getOperand(IsStrict ? 2 : 1);
SDValue C = N->getOperand(IsStrict ? 3 : 2);
+ // If the operation allows fast-math and the target does not support FMA,
+ // split this into mul+add to avoid libcall(s).
+ SDNodeFlags Flags = N->getFlags();
+ if (!IsStrict && Flags.hasAllowReassociation() &&
+ TLI.isOperationExpand(ISD::FMA, VT)) {
+ SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags);
+ return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags);
+ }
+
+ EVT ScalarVT = VT.getScalarType();
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
+ return SDValue();
+
auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
bool LegalOperations = !DCI.isBeforeLegalizeOps();
@@ -46621,7 +47931,7 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
Src.getOperand(0).getScalarValueSizeInBits() == EltWidth)
return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0));
- // Fold movmsk(not(x)) -> not(movmsk) to improve folding of movmsk results
+ // Fold movmsk(not(x)) -> not(movmsk(x)) to improve folding of movmsk results
// with scalar comparisons.
if (SDValue NotSrc = IsNOT(Src, DAG)) {
SDLoc DL(N);
@@ -46632,6 +47942,17 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(NotMask, DL, VT));
}
+ // Fold movmsk(icmp_sgt(x,-1)) -> not(movmsk(x)) to improve folding of movmsk
+ // results with scalar comparisons.
+ if (Src.getOpcode() == X86ISD::PCMPGT &&
+ ISD::isBuildVectorAllOnes(Src.getOperand(1).getNode())) {
+ SDLoc DL(N);
+ APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts);
+ return DAG.getNode(ISD::XOR, DL, VT,
+ DAG.getNode(X86ISD::MOVMSK, DL, VT, Src.getOperand(0)),
+ DAG.getConstant(NotMask, DL, VT));
+ }
+
// Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getAllOnesValue(NumBits));
@@ -46669,7 +47990,8 @@ static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS,
return DAG.getMaskedGather(Gather->getVTList(),
Gather->getMemoryVT(), DL, Ops,
Gather->getMemOperand(),
- Gather->getIndexType());
+ Gather->getIndexType(),
+ Gather->getExtensionType());
}
auto *Scatter = cast<MaskedScatterSDNode>(GorS);
SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(),
@@ -46677,7 +47999,8 @@ static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS,
return DAG.getMaskedScatter(Scatter->getVTList(),
Scatter->getMemoryVT(), DL,
Ops, Scatter->getMemOperand(),
- Scatter->getIndexType());
+ Scatter->getIndexType(),
+ Scatter->isTruncatingStore());
}
static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
@@ -47672,17 +48995,18 @@ static SDValue combineAddOrSubToHADDorHSUB(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
bool IsAdd = N->getOpcode() == ISD::ADD;
+ auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB;
assert((IsAdd || N->getOpcode() == ISD::SUB) && "Wrong opcode");
SmallVector<int, 8> PostShuffleMask;
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
Subtarget.hasSSSE3() &&
- isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsAdd, PostShuffleMask)) {
- auto HOpBuilder = [IsAdd](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- return DAG.getNode(IsAdd ? X86ISD::HADD : X86ISD::HSUB, DL,
- Ops[0].getValueType(), Ops);
+ isHorizontalBinOp(HorizOpcode, Op0, Op1, DAG, Subtarget, IsAdd,
+ PostShuffleMask)) {
+ auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops);
};
SDValue HorizBinOp =
SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, HOpBuilder);
@@ -47747,12 +49071,11 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
if (!VT.isVector())
return SDValue();
- // PSUBUS is supported, starting from SSE2, but truncation for v8i32
- // is only worth it with SSSE3 (PSHUFB).
+ // PSUBUS is supported, starting from SSE2.
EVT EltVT = VT.getVectorElementType();
- if (!(Subtarget.hasSSE2() && (EltVT == MVT::i8 || EltVT == MVT::i16)) &&
- !(Subtarget.hasSSSE3() && (VT == MVT::v8i32 || VT == MVT::v8i64)) &&
- !(Subtarget.useBWIRegs() && (VT == MVT::v16i32)))
+ if (!(Subtarget.hasSSE2() &&
+ (EltVT == MVT::i8 || EltVT == MVT::i16 || VT == MVT::v8i32 ||
+ VT == MVT::v8i64 || VT == MVT::v16i32)))
return SDValue();
SDValue SubusLHS, SubusRHS;
@@ -47788,9 +49111,9 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
SDValue MinLHS = Op1.getOperand(0).getOperand(0);
SDValue MinRHS = Op1.getOperand(0).getOperand(1);
EVT TruncVT = Op1.getOperand(0).getValueType();
- if (!(Subtarget.hasSSSE3() && (TruncVT == MVT::v8i32 ||
- TruncVT == MVT::v8i64)) &&
- !(Subtarget.useBWIRegs() && (TruncVT == MVT::v16i32)))
+ if (!(Subtarget.hasSSE2() &&
+ (TruncVT == MVT::v8i32 || TruncVT == MVT::v8i64 ||
+ TruncVT == MVT::v16i32)))
return SDValue();
SDValue OpToSaturate;
if (MinLHS.getOpcode() == ISD::ZERO_EXTEND &&
@@ -47828,7 +49151,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
// values, or first 48 bits for 64 bit values.
KnownBits Known = DAG.computeKnownBits(SubusLHS);
unsigned NumZeros = Known.countMinLeadingZeros();
- if ((VT == MVT::v8i64 && NumZeros < 48) || NumZeros < 16)
+ if (NumZeros < (VT.getScalarSizeInBits() - 16))
return SDValue();
EVT ExtType = SubusLHS.getValueType();
@@ -47928,43 +49251,47 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
SDValue Op0 = Ops[0];
bool IsSplat = llvm::all_of(Ops, [&Op0](SDValue Op) { return Op == Op0; });
- // Fold subvector loads into one.
- // If needed, look through bitcasts to get to the load.
- if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
- bool Fast;
- const X86TargetLowering *TLI = Subtarget.getTargetLowering();
- if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- *FirstLd->getMemOperand(), &Fast) &&
- Fast) {
- if (SDValue Ld =
- EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))
- return Ld;
- }
- }
-
// Repeated subvectors.
- if (IsSplat) {
- // If this broadcast/subv_broadcast is inserted into both halves, use a
- // larger broadcast/subv_broadcast.
- if (Op0.getOpcode() == X86ISD::VBROADCAST ||
- Op0.getOpcode() == X86ISD::SUBV_BROADCAST)
+ if (IsSplat &&
+ (VT.is256BitVector() || (VT.is512BitVector() && Subtarget.hasAVX512()))) {
+ // If this broadcast is inserted into both halves, use a larger broadcast.
+ if (Op0.getOpcode() == X86ISD::VBROADCAST)
return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));
- // If this broadcast_load is inserted into both halves, use a larger
- // broadcast_load. Update other uses to use an extracted subvector.
- if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD) {
+ // If this scalar/subvector broadcast_load is inserted into both halves, use
+ // a larger broadcast_load. Update other uses to use an extracted subvector.
+ if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD ||
+ Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
auto *MemIntr = cast<MemIntrinsicSDNode>(Op0);
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = {MemIntr->getChain(), MemIntr->getBasePtr()};
- SDValue BcastLd = DAG.getMemIntrinsicNode(
- X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MemIntr->getMemoryVT(),
- MemIntr->getMemOperand());
+ SDValue BcastLd = DAG.getMemIntrinsicNode(Op0.getOpcode(), DL, Tys, Ops,
+ MemIntr->getMemoryVT(),
+ MemIntr->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(
Op0, extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()));
DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
return BcastLd;
}
+ // If this is a simple subvector load repeated across multiple lanes, then
+ // broadcast the load. Update other uses to use an extracted subvector.
+ if (auto *Ld = dyn_cast<LoadSDNode>(Op0)) {
+ if (Ld->isSimple() && !Ld->isNonTemporal() &&
+ Ld->getExtensionType() == ISD::NON_EXTLOAD) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops,
+ Ld->getMemoryVT(), Ld->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(
+ Op0,
+ extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1));
+ return BcastLd;
+ }
+ }
+
// concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
(Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0))))
@@ -48042,6 +49369,38 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
return DAG.getBitcast(VT, Res);
}
break;
+ case X86ISD::VPERMV3:
+ if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {
+ MVT OpVT = Op0.getSimpleValueType();
+ int NumSrcElts = OpVT.getVectorNumElements();
+ SmallVector<int, 64> ConcatMask;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ bool IsUnary;
+ SmallVector<int, 64> SubMask;
+ SmallVector<SDValue, 2> SubOps;
+ if (!getTargetShuffleMask(Ops[i].getNode(), OpVT, false, SubOps,
+ SubMask, IsUnary))
+ break;
+ for (int M : SubMask) {
+ if (0 <= M) {
+ M += M < NumSrcElts ? 0 : NumSrcElts;
+ M += i * NumSrcElts;
+ }
+ ConcatMask.push_back(M);
+ }
+ }
+ if (ConcatMask.size() == (NumOps * NumSrcElts)) {
+ SDValue Src0 = concatSubVectors(Ops[0].getOperand(0),
+ Ops[1].getOperand(0), DAG, DL);
+ SDValue Src1 = concatSubVectors(Ops[0].getOperand(2),
+ Ops[1].getOperand(2), DAG, DL);
+ MVT IntMaskSVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
+ SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
+ return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1);
+ }
+ }
+ break;
case X86ISD::VSHLI:
case X86ISD::VSRAI:
case X86ISD::VSRLI:
@@ -48074,10 +49433,33 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
Op0.getOperand(1));
}
break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case X86ISD::ANDNP:
+ // TODO: Add 256-bit support.
+ if (!IsSplat && VT.is512BitVector()) {
+ SmallVector<SDValue, 2> LHS, RHS;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ LHS.push_back(Ops[i].getOperand(0));
+ RHS.push_back(Ops[i].getOperand(1));
+ }
+ MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
+ SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ NumOps * SrcVT.getVectorNumElements());
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, LHS),
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, RHS));
+ }
+ break;
+ case X86ISD::HADD:
+ case X86ISD::HSUB:
+ case X86ISD::FHADD:
+ case X86ISD::FHSUB:
case X86ISD::PACKSS:
case X86ISD::PACKUS:
- if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&
- Subtarget.hasInt256()) {
+ if (!IsSplat && VT.is256BitVector() &&
+ (VT.isFloatingPoint() || Subtarget.hasInt256())) {
SmallVector<SDValue, 2> LHS, RHS;
for (unsigned i = 0; i != NumOps; ++i) {
LHS.push_back(Ops[i].getOperand(0));
@@ -48112,6 +49494,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
}
+ // Fold subvector loads into one.
+ // If needed, look through bitcasts to get to the load.
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
+ bool Fast;
+ const X86TargetLowering *TLI = Subtarget.getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *FirstLd->getMemOperand(), &Fast) &&
+ Fast) {
+ if (SDValue Ld =
+ EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))
+ return Ld;
+ }
+ }
+
return SDValue();
}
@@ -48183,7 +49579,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
SDValue Ins = SubVec.getOperand(0);
if (isNullConstant(Ins.getOperand(2)) &&
ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) &&
- Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits())
+ Ins.getOperand(1).getValueSizeInBits().getFixedSize() <=
+ SubVecVT.getFixedSizeInBits())
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
getZeroVector(OpVT, Subtarget, DAG, dl),
Ins.getOperand(1), N->getOperand(2));
@@ -48336,12 +49733,14 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
unsigned IdxVal = N->getConstantOperandVal(1);
SDValue InVecBC = peekThroughBitcasts(InVec);
EVT InVecVT = InVec.getValueType();
+ unsigned SizeInBits = VT.getSizeInBits();
+ unsigned InSizeInBits = InVecVT.getSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
TLI.isTypeLegal(InVecVT) &&
- InVecVT.getSizeInBits() == 256 && InVecBC.getOpcode() == ISD::AND) {
- auto isConcatenatedNot = [] (SDValue V) {
+ InSizeInBits == 256 && InVecBC.getOpcode() == ISD::AND) {
+ auto isConcatenatedNot = [](SDValue V) {
V = peekThroughBitcasts(V);
if (!isBitwiseNot(V))
return false;
@@ -48384,53 +49783,32 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 &&
InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) &&
ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) &&
- InVec.getOperand(1).getValueSizeInBits() <= VT.getSizeInBits()) {
+ InVec.getOperand(1).getValueSizeInBits() <= SizeInBits) {
SDLoc DL(N);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
getZeroVector(VT, Subtarget, DAG, DL),
InVec.getOperand(1), InVec.getOperand(2));
}
- // If we're extracting from a broadcast then we're better off just
- // broadcasting to the smaller type directly, assuming this is the only use.
- // As its a broadcast we don't care about the extraction index.
- if (InVec.getOpcode() == X86ISD::VBROADCAST && InVec.hasOneUse() &&
- InVec.getOperand(0).getValueSizeInBits() <= VT.getSizeInBits())
- return DAG.getNode(X86ISD::VBROADCAST, SDLoc(N), VT, InVec.getOperand(0));
-
- if (InVec.getOpcode() == X86ISD::VBROADCAST_LOAD && InVec.hasOneUse()) {
- auto *MemIntr = cast<MemIntrinsicSDNode>(InVec);
- if (MemIntr->getMemoryVT().getSizeInBits() <= VT.getSizeInBits()) {
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
- SDValue BcastLd =
- DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops,
- MemIntr->getMemoryVT(),
- MemIntr->getMemOperand());
- DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
- return BcastLd;
- }
- }
-
// If we're extracting an upper subvector from a broadcast we should just
// extract the lowest subvector instead which should allow
// SimplifyDemandedVectorElts do more simplifications.
if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST ||
InVec.getOpcode() == X86ISD::VBROADCAST_LOAD))
- return extractSubVector(InVec, 0, DAG, SDLoc(N), VT.getSizeInBits());
+ return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits);
- // If we're extracting a broadcasted subvector, just use the source.
- if (InVec.getOpcode() == X86ISD::SUBV_BROADCAST &&
- InVec.getOperand(0).getValueType() == VT)
- return InVec.getOperand(0);
+ // If we're extracting a broadcasted subvector, just use the lowest subvector.
+ if (IdxVal != 0 && InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
+ cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT)
+ return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits);
// Attempt to extract from the source of a shuffle vector.
- if ((InVecVT.getSizeInBits() % VT.getSizeInBits()) == 0 &&
+ if ((InSizeInBits % SizeInBits) == 0 &&
(IdxVal % VT.getVectorNumElements()) == 0) {
SmallVector<int, 32> ShuffleMask;
SmallVector<int, 32> ScaledMask;
SmallVector<SDValue, 2> ShuffleInputs;
- unsigned NumSubVecs = InVecVT.getSizeInBits() / VT.getSizeInBits();
+ unsigned NumSubVecs = InSizeInBits / SizeInBits;
// Decode the shuffle mask and scale it so its shuffling subvectors.
if (getTargetShuffleInputs(InVecBC, ShuffleInputs, ShuffleMask, DAG) &&
scaleShuffleElements(ShuffleMask, NumSubVecs, ScaledMask)) {
@@ -48440,19 +49818,19 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
if (ScaledMask[SubVecIdx] == SM_SentinelZero)
return getZeroVector(VT, Subtarget, DAG, SDLoc(N));
SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs];
- if (Src.getValueSizeInBits() == InVecVT.getSizeInBits()) {
+ if (Src.getValueSizeInBits() == InSizeInBits) {
unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs;
unsigned SrcEltIdx = SrcSubVecIdx * VT.getVectorNumElements();
return extractSubVector(DAG.getBitcast(InVecVT, Src), SrcEltIdx, DAG,
- SDLoc(N), VT.getSizeInBits());
+ SDLoc(N), SizeInBits);
}
}
}
// If we're extracting the lowest subvector and we're the only user,
// we may be able to perform this with a smaller vector width.
+ unsigned InOpcode = InVec.getOpcode();
if (IdxVal == 0 && InVec.hasOneUse()) {
- unsigned InOpcode = InVec.getOpcode();
if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) {
// v2f64 CVTDQ2PD(v4i32).
if (InOpcode == ISD::SINT_TO_FP &&
@@ -48476,10 +49854,14 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
InOpcode == ISD::SIGN_EXTEND ||
InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) &&
- VT.is128BitVector() &&
- InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
+ (SizeInBits == 128 || SizeInBits == 256) &&
+ InVec.getOperand(0).getValueSizeInBits() >= SizeInBits) {
+ SDLoc DL(N);
+ SDValue Ext = InVec.getOperand(0);
+ if (Ext.getValueSizeInBits() > SizeInBits)
+ Ext = extractSubVector(Ext, 0, DAG, DL, SizeInBits);
unsigned ExtOp = getOpcode_EXTEND_VECTOR_INREG(InOpcode);
- return DAG.getNode(ExtOp, SDLoc(N), VT, InVec.getOperand(0));
+ return DAG.getNode(ExtOp, DL, VT, Ext);
}
if (InOpcode == ISD::VSELECT &&
InVec.getOperand(0).getValueType().is256BitVector() &&
@@ -48491,6 +49873,25 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128);
return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2);
}
+ if (InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() &&
+ (VT.is128BitVector() || VT.is256BitVector())) {
+ SDLoc DL(N);
+ SDValue InVecSrc = InVec.getOperand(0);
+ unsigned Scale = InVecSrc.getValueSizeInBits() / InSizeInBits;
+ SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits);
+ return DAG.getNode(InOpcode, DL, VT, Ext);
+ }
+ }
+
+ // Always split vXi64 logical shifts where we're extracting the upper 32-bits
+ // as this is very likely to fold into a shuffle/truncation.
+ if ((InOpcode == X86ISD::VSHLI || InOpcode == X86ISD::VSRLI) &&
+ InVecVT.getScalarSizeInBits() == 64 &&
+ InVec.getConstantOperandAPInt(1) == 32) {
+ SDLoc DL(N);
+ SDValue Ext =
+ extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits);
+ return DAG.getNode(InOpcode, DL, VT, Ext, InVec.getOperand(1));
}
return SDValue();
@@ -48574,7 +49975,7 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
// If the input is an extend_invec and the SimplifyDemandedBits call didn't
// convert it to any_extend_invec, due to the LegalOperations check, do the
// conversion directly to a vector shuffle manually. This exposes combine
- // opportunities missed by combineExtInVec not calling
+ // opportunities missed by combineEXTEND_VECTOR_INREG not calling
// combineX86ShufflesRecursively on SSE4.1 targets.
// FIXME: This is basically a hack around several other issues related to
// ANY_EXTEND_VECTOR_INREG.
@@ -48602,11 +50003,13 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue In = N->getOperand(0);
+ unsigned Opcode = N->getOpcode();
+ unsigned InOpcode = In.getOpcode();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Try to merge vector loads and extend_inreg to an extload.
@@ -48615,7 +50018,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
auto *Ld = cast<LoadSDNode>(In);
if (Ld->isSimple()) {
MVT SVT = In.getSimpleValueType().getVectorElementType();
- ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG
+ ISD::LoadExtType Ext = Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
? ISD::SEXTLOAD
: ISD::ZEXTLOAD;
EVT MemVT =
@@ -48623,8 +50026,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
if (TLI.isLoadExtLegal(Ext, VT, MemVT)) {
SDValue Load =
DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), MemVT,
- Ld->getOriginalAlign(),
+ Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
return Load;
@@ -48632,9 +50034,23 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
}
}
+ // Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X).
+ if (Opcode == InOpcode)
+ return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0));
+
+ // Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0))
+ // -> EXTEND_VECTOR_INREG(X).
+ // TODO: Handle non-zero subvector indices.
+ if (InOpcode == ISD::EXTRACT_SUBVECTOR && In.getConstantOperandVal(1) == 0 &&
+ In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) &&
+ In.getOperand(0).getOperand(0).getValueSizeInBits() ==
+ In.getValueSizeInBits())
+ return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0));
+
// Attempt to combine as a shuffle.
- // TODO: SSE41 support
- if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) {
+ // TODO: General ZERO_EXTEND_VECTOR_INREG support.
+ if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG ||
+ (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG && Subtarget.hasSSE41())) {
SDValue Op(N, 0);
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
@@ -48755,11 +50171,15 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt);
}
-// Try to find a larger VBROADCAST_LOAD that we can extract from. Limit this to
-// cases where the loads have the same input chain and the output chains are
-// unused. This avoids any memory ordering issues.
-static SDValue combineVBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+// Try to find a larger VBROADCAST_LOAD/SUBV_BROADCAST_LOAD that we can extract
+// from. Limit this to cases where the loads have the same input chain and the
+// output chains are unused. This avoids any memory ordering issues.
+static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ assert((N->getOpcode() == X86ISD::VBROADCAST_LOAD ||
+ N->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) &&
+ "Unknown broadcast load type");
+
// Only do this if the chain result is unused.
if (N->hasAnyUseOfValue(1))
return SDValue();
@@ -48774,13 +50194,13 @@ static SDValue combineVBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
// Look at other users of our base pointer and try to find a wider broadcast.
// The input chain and the size of the memory VT must match.
for (SDNode *User : Ptr->uses())
- if (User != N && User->getOpcode() == X86ISD::VBROADCAST_LOAD &&
+ if (User != N && User->getOpcode() == N->getOpcode() &&
cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
MemVT.getSizeInBits() &&
!User->hasAnyUseOfValue(1) &&
- User->getValueSizeInBits(0) > VT.getSizeInBits()) {
+ User->getValueSizeInBits(0).getFixedSize() > VT.getFixedSizeInBits()) {
SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
VT.getSizeInBits());
Extract = DAG.getBitcast(VT, Extract);
@@ -48851,6 +50271,17 @@ static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ unsigned NumBits = N->getSimpleValueType(0).getSizeInBits();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+ APInt::getAllOnesValue(NumBits), DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -48887,7 +50318,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
- case X86ISD::BEXTR: return combineBEXTR(N, DAG, DCI, Subtarget);
+ case X86ISD::BEXTR:
+ case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
@@ -48932,13 +50364,17 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, DCI,
- Subtarget);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return combineEXTEND_VECTOR_INREG(N, DAG, DCI, Subtarget);
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
case X86ISD::PACKSS:
case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget);
+ case X86ISD::HADD:
+ case X86ISD::HSUB:
+ case X86ISD::FHADD:
+ case X86ISD::FHSUB: return combineVectorHADDSUB(N, DAG, DCI, Subtarget);
case X86ISD::VSHL:
case X86ISD::VSRA:
case X86ISD::VSRL:
@@ -49015,8 +50451,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DAG, Subtarget);
case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget);
- case X86ISD::VBROADCAST_LOAD: return combineVBROADCAST_LOAD(N, DAG, DCI);
+ case X86ISD::VBROADCAST_LOAD:
+ case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI);
case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG);
+ case X86ISD::PDEP: return combinePDEP(N, DAG, DCI);
}
return SDValue();
@@ -49305,7 +50743,7 @@ static X86::CondCode parseConstraintCode(llvm::StringRef Constraint) {
.Case("{@ccnl}", X86::COND_GE)
.Case("{@ccnle}", X86::COND_G)
.Case("{@ccno}", X86::COND_NO)
- .Case("{@ccnp}", X86::COND_P)
+ .Case("{@ccnp}", X86::COND_NP)
.Case("{@ccns}", X86::COND_NS)
.Case("{@cco}", X86::COND_O)
.Case("{@ccp}", X86::COND_P)
@@ -49541,8 +50979,8 @@ LowerXConstraint(EVT ConstraintVT) const {
// Lower @cc targets via setcc.
SDValue X86TargetLowering::LowerAsmOutputForConstraint(
- SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
- SelectionDAG &DAG) const {
+ SDValue &Chain, SDValue &Flag, const SDLoc &DL,
+ const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
X86::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode);
if (Cond == X86::COND_INVALID)
return SDValue();
@@ -49978,30 +51416,35 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Not found as a standard register?
if (!Res.second) {
- // Map st(0) -> st(7) -> ST0
- if (Constraint.size() == 7 && Constraint[0] == '{' &&
- tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
- Constraint[3] == '(' &&
- (Constraint[4] >= '0' && Constraint[4] <= '7') &&
- Constraint[5] == ')' && Constraint[6] == '}') {
- // st(7) is not allocatable and thus not a member of RFP80. Return
- // singleton class in cases where we have a reference to it.
- if (Constraint[4] == '7')
- return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
- return std::make_pair(X86::FP0 + Constraint[4] - '0',
- &X86::RFP80RegClass);
- }
-
- // GCC allows "st(0)" to be called just plain "st".
- if (StringRef("{st}").equals_lower(Constraint))
- return std::make_pair(X86::FP0, &X86::RFP80RegClass);
+ // Only match x87 registers if the VT is one SelectionDAGBuilder can convert
+ // to/from f80.
+ if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) {
+ // Map st(0) -> st(7) -> ST0
+ if (Constraint.size() == 7 && Constraint[0] == '{' &&
+ tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
+ Constraint[3] == '(' &&
+ (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+ Constraint[5] == ')' && Constraint[6] == '}') {
+ // st(7) is not allocatable and thus not a member of RFP80. Return
+ // singleton class in cases where we have a reference to it.
+ if (Constraint[4] == '7')
+ return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
+ return std::make_pair(X86::FP0 + Constraint[4] - '0',
+ &X86::RFP80RegClass);
+ }
+
+ // GCC allows "st(0)" to be called just plain "st".
+ if (StringRef("{st}").equals_lower(Constraint))
+ return std::make_pair(X86::FP0, &X86::RFP80RegClass);
+ }
// flags -> EFLAGS
if (StringRef("{flags}").equals_lower(Constraint))
return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);
// dirflag -> DF
- if (StringRef("{dirflag}").equals_lower(Constraint))
+ // Only allow for clobber.
+ if (StringRef("{dirflag}").equals_lower(Constraint) && VT == MVT::Other)
return std::make_pair(X86::DF, &X86::DFCCRRegClass);
// fpsr -> FPSW
@@ -50275,3 +51718,10 @@ X86TargetLowering::getStackProbeSize(MachineFunction &MF) const {
.getAsInteger(0, StackProbeSize);
return StackProbeSize;
}
+
+Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+ if (ML->isInnermost() &&
+ ExperimentalPrefInnermostLoopAlignment.getNumOccurrences())
+ return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
+ return TargetLowering::getPrefLoopAlignment();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
index 7f3dc90a2d73..76c83b7df9eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
@@ -384,8 +384,10 @@ namespace llvm {
/// Vector comparison generating mask bits for fp and
/// integer signed and unsigned data types.
CMPM,
- // Vector comparison with SAE for FP values
- CMPM_SAE,
+ // Vector mask comparison generating mask bits for FP values.
+ CMPMM,
+ // Vector mask comparison with SAE for FP values.
+ CMPMM_SAE,
// Arithmetic operations with FLAGS results.
ADD,
@@ -400,6 +402,7 @@ namespace llvm {
// Bit field extract.
BEXTR,
+ BEXTRI,
// Zero High Bits Starting with Specified Bit Position.
BZHI,
@@ -502,8 +505,6 @@ namespace llvm {
VBROADCAST,
// Broadcast mask to vector.
VBROADCASTM,
- // Broadcast subvector to vector.
- SUBV_BROADCAST,
/// SSE4A Extraction and Insertion.
EXTRQI,
@@ -708,6 +709,9 @@ namespace llvm {
// For avx512-vp2intersect
VP2INTERSECT,
+ // User level interrupts - testui
+ TESTUI,
+
/// X86 strict FP compare instructions.
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPS,
@@ -747,11 +751,13 @@ namespace llvm {
STRICT_CVTPS2PH,
STRICT_CVTPH2PS,
+ // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
+ // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
+
// Compare and swap.
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
LCMPXCHG16_DAG,
- LCMPXCHG8_SAVE_EBX_DAG,
LCMPXCHG16_SAVE_RBX_DAG,
/// LOCK-prefixed arithmetic read-modify-write instructions.
@@ -768,9 +774,12 @@ namespace llvm {
// extract_vector_elt, store.
VEXTRACT_STORE,
- // scalar broadcast from memory
+ // scalar broadcast from memory.
VBROADCAST_LOAD,
+ // subvector broadcast from memory.
+ SUBV_BROADCAST_LOAD,
+
// Store FP control world into i16 memory.
FNSTCW16m,
@@ -806,9 +815,10 @@ namespace llvm {
/// specifies the type to store as.
FST,
- /// This instruction grabs the address of the next argument
+ /// These instructions grab the address of the next argument
/// from a va_list. (reads and modifies the va_list in memory)
VAARG_64,
+ VAARG_X32,
// Vector truncating store with unsigned/signed saturation
VTRUNCSTOREUS,
@@ -821,6 +831,16 @@ namespace llvm {
MGATHER,
MSCATTER,
+ // Key locker nodes that produce flags.
+ AESENC128KL,
+ AESDEC128KL,
+ AESENC256KL,
+ AESDEC256KL,
+ AESENCWIDE128KL,
+ AESDECWIDE128KL,
+ AESENCWIDE256KL,
+ AESDECWIDE256KL,
+
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
@@ -835,7 +855,7 @@ namespace llvm {
/// Returns true of the given offset can be
/// fit into displacement field of the instruction.
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
- bool hasSymbolicDisplacement = true);
+ bool hasSymbolicDisplacement);
/// Determines whether the callee is required to pop its
/// own arguments. Callee pop is necessary to support tail calls.
@@ -907,14 +927,6 @@ namespace llvm {
///
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- /// Places new result values for the node in Results (their number
- /// and types must exactly match those of the original return values of
- /// the node), or leaves Results empty, which indicates that the node is not
- /// to be custom lowered after all.
- void LowerOperationWrapper(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const override;
-
/// Replace the results of node with an illegal result
/// type with new values built out of custom code.
///
@@ -1116,7 +1128,8 @@ namespace llvm {
}
/// Handle Lowering flag assembly outputs.
- SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
+ SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+ const SDLoc &DL,
const AsmOperandInfo &Constraint,
SelectionDAG &DAG) const override;
@@ -1349,8 +1362,6 @@ namespace llvm {
Align Alignment,
SelectionDAG &DAG) const;
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
-
/// Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
@@ -1397,6 +1408,8 @@ namespace llvm {
SDValue Addr, SelectionDAG &DAG)
const override;
+ Align getPrefLoopAlignment(MachineLoop *ML) const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -1488,6 +1501,7 @@ namespace llvm {
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
@@ -1514,9 +1528,6 @@ namespace llvm {
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
- RTLIB::Libcall Call) const;
-
SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1572,8 +1583,7 @@ namespace llvm {
// Utility function to emit the low-level va_arg code for X86-64.
MachineBasicBlock *
- EmitVAARG64WithCustomInserter(MachineInstr &MI,
- MachineBasicBlock *MBB) const;
+ EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
/// Utility function to emit the xmm reg save portion of va_start.
MachineBasicBlock *
@@ -1689,7 +1699,7 @@ namespace llvm {
};
/// Generate unpacklo/unpackhi shuffle mask.
- void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
+ void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
bool Unary);
/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index 1628f85da808..85410c54a4d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
#define DEBUG_TYPE "x86-indirect-branch-tracking"
-static cl::opt<bool> IndirectBranchTracking(
+cl::opt<bool> IndirectBranchTracking(
"x86-indirect-branch-tracking", cl::init(false), cl::Hidden,
cl::desc("Enable X86 indirect branch tracking pass."));
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
index 828887d96129..3d96d198b409 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -95,7 +95,6 @@ struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
MF.front().addLiveIn(X86::R11);
- return;
}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
index 53925bbfd72f..004e6fa5ebf4 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -214,10 +214,10 @@ bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true);
MachineInstrBuilder MIB(MF, PFetch);
- assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
- X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
- X86::AddrSegmentReg == 4 &&
- "Unexpected change in X86 operand offset order.");
+ static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
+ X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
+ X86::AddrSegmentReg == 4,
+ "Unexpected change in X86 operand offset order.");
// This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc.
// FIXME(mtrofin): consider adding a:
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertWait.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertWait.cpp
index a82d98d88b30..56d2709f5937 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertWait.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertWait.cpp
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/Debug.h"
@@ -48,9 +47,6 @@ public:
StringRef getPassName() const override {
return "X86 insert wait instruction";
}
-
-private:
- const TargetInstrInfo *TII; // Machine instruction info.
};
} // namespace
@@ -119,7 +115,7 @@ bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
return false;
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- TII = ST.getInstrInfo();
+ const X86InstrInfo *TII = ST.getInstrInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
new file mode 100644
index 000000000000..c4150ed52854
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -0,0 +1,2017 @@
+//===-- X86InstCombineIntrinsic.cpp - X86 specific InstCombine pass -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// X86 target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetTransformInfo.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86tti"
+
+/// Return a constant boolean vector that has true elements in all positions
+/// where the input constant data vector has an element with the sign bit set.
+static Constant *getNegativeIsTrueBoolVec(Constant *V) {
+ VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
+ V = ConstantExpr::getBitCast(V, IntTy);
+ V = ConstantExpr::getICmp(CmpInst::ICMP_SGT, Constant::getNullValue(IntTy),
+ V);
+ return V;
+}
+
+/// Convert the x86 XMM integer vector mask to a vector of bools based on
+/// each element's most significant bit (the sign bit).
+static Value *getBoolVecFromMask(Value *Mask) {
+ // Fold Constant Mask.
+ if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
+ return getNegativeIsTrueBoolVec(ConstantMask);
+
+ // Mask was extended from a boolean vector.
+ Value *ExtMask;
+ if (PatternMatch::match(
+ Mask, PatternMatch::m_SExt(PatternMatch::m_Value(ExtMask))) &&
+ ExtMask->getType()->isIntOrIntVectorTy(1))
+ return ExtMask;
+
+ return nullptr;
+}
+
+// TODO: If the x86 backend knew how to convert a bool vector mask back to an
+// XMM register mask efficiently, we could transform all x86 masked intrinsics
+// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
+static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
+ Value *Ptr = II.getOperand(0);
+ Value *Mask = II.getOperand(1);
+ Constant *ZeroVec = Constant::getNullValue(II.getType());
+
+ // Zero Mask - masked load instruction creates a zero vector.
+ if (isa<ConstantAggregateZero>(Mask))
+ return IC.replaceInstUsesWith(II, ZeroVec);
+
+ // The mask is constant or extended from a bool vector. Convert this x86
+ // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
+ if (Value *BoolMask = getBoolVecFromMask(Mask)) {
+ // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
+ // the LLVM intrinsic definition for the pointer argument.
+ unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
+ PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
+ Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
+
+ // The pass-through vector for an x86 masked load is a zero vector.
+ CallInst *NewMaskedLoad =
+ IC.Builder.CreateMaskedLoad(PtrCast, Align(1), BoolMask, ZeroVec);
+ return IC.replaceInstUsesWith(II, NewMaskedLoad);
+ }
+
+ return nullptr;
+}
+
+// TODO: If the x86 backend knew how to convert a bool vector mask back to an
+// XMM register mask efficiently, we could transform all x86 masked intrinsics
+// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
+static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
+ Value *Ptr = II.getOperand(0);
+ Value *Mask = II.getOperand(1);
+ Value *Vec = II.getOperand(2);
+
+ // Zero Mask - this masked store instruction does nothing.
+ if (isa<ConstantAggregateZero>(Mask)) {
+ IC.eraseInstFromFunction(II);
+ return true;
+ }
+
+ // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
+ // anything else at this level.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
+ return false;
+
+ // The mask is constant or extended from a bool vector. Convert this x86
+ // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
+ if (Value *BoolMask = getBoolVecFromMask(Mask)) {
+ unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
+ PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
+ Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
+
+ IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask);
+
+ // 'Replace uses' doesn't work for stores. Erase the original masked store.
+ IC.eraseInstFromFunction(II);
+ return true;
+ }
+
+ return false;
+}
+
+static Value *simplifyX86immShift(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ bool LogicalShift = false;
+ bool ShiftLeft = false;
+ bool IsImm = false;
+
+ switch (II.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx512_psrai_q_128:
+ case Intrinsic::x86_avx512_psrai_q_256:
+ case Intrinsic::x86_avx512_psrai_d_512:
+ case Intrinsic::x86_avx512_psrai_q_512:
+ case Intrinsic::x86_avx512_psrai_w_512:
+ IsImm = true;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx512_psra_q_128:
+ case Intrinsic::x86_avx512_psra_q_256:
+ case Intrinsic::x86_avx512_psra_d_512:
+ case Intrinsic::x86_avx512_psra_q_512:
+ case Intrinsic::x86_avx512_psra_w_512:
+ LogicalShift = false;
+ ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx512_psrli_d_512:
+ case Intrinsic::x86_avx512_psrli_q_512:
+ case Intrinsic::x86_avx512_psrli_w_512:
+ IsImm = true;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx512_psrl_d_512:
+ case Intrinsic::x86_avx512_psrl_q_512:
+ case Intrinsic::x86_avx512_psrl_w_512:
+ LogicalShift = true;
+ ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx512_pslli_d_512:
+ case Intrinsic::x86_avx512_pslli_q_512:
+ case Intrinsic::x86_avx512_pslli_w_512:
+ IsImm = true;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx512_psll_d_512:
+ case Intrinsic::x86_avx512_psll_q_512:
+ case Intrinsic::x86_avx512_psll_w_512:
+ LogicalShift = true;
+ ShiftLeft = true;
+ break;
+ }
+ assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
+
+ auto Vec = II.getArgOperand(0);
+ auto Amt = II.getArgOperand(1);
+ auto VT = cast<FixedVectorType>(Vec->getType());
+ auto SVT = VT->getElementType();
+ auto AmtVT = Amt->getType();
+ unsigned VWidth = VT->getNumElements();
+ unsigned BitWidth = SVT->getPrimitiveSizeInBits();
+
+ // If the shift amount is guaranteed to be in-range we can replace it with a
+ // generic shift. If its guaranteed to be out of range, logical shifts combine
+ // to zero and arithmetic shifts are clamped to (BitWidth - 1).
+ if (IsImm) {
+ assert(AmtVT->isIntegerTy(32) && "Unexpected shift-by-immediate type");
+ KnownBits KnownAmtBits =
+ llvm::computeKnownBits(Amt, II.getModule()->getDataLayout());
+ if (KnownAmtBits.getMaxValue().ult(BitWidth)) {
+ Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
+ Amt = Builder.CreateVectorSplat(VWidth, Amt);
+ return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
+ : Builder.CreateLShr(Vec, Amt))
+ : Builder.CreateAShr(Vec, Amt));
+ }
+ if (KnownAmtBits.getMinValue().uge(BitWidth)) {
+ if (LogicalShift)
+ return ConstantAggregateZero::get(VT);
+ Amt = ConstantInt::get(SVT, BitWidth - 1);
+ return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
+ }
+ } else {
+ // Ensure the first element has an in-range value and the rest of the
+ // elements in the bottom 64 bits are zero.
+ assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
+ cast<VectorType>(AmtVT)->getElementType() == SVT &&
+ "Unexpected shift-by-scalar type");
+ unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
+ APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0);
+ APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2);
+ KnownBits KnownLowerBits = llvm::computeKnownBits(
+ Amt, DemandedLower, II.getModule()->getDataLayout());
+ KnownBits KnownUpperBits = llvm::computeKnownBits(
+ Amt, DemandedUpper, II.getModule()->getDataLayout());
+ if (KnownLowerBits.getMaxValue().ult(BitWidth) &&
+ (DemandedUpper.isNullValue() || KnownUpperBits.isZero())) {
+ SmallVector<int, 16> ZeroSplat(VWidth, 0);
+ Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
+ return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
+ : Builder.CreateLShr(Vec, Amt))
+ : Builder.CreateAShr(Vec, Amt));
+ }
+ }
+
+ // Simplify if count is constant vector.
+ auto CDV = dyn_cast<ConstantDataVector>(Amt);
+ if (!CDV)
+ return nullptr;
+
+ // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
+ cast<VectorType>(AmtVT)->getElementType() == SVT &&
+ "Unexpected shift-by-scalar type");
+
+ // Concatenate the sub-elements to create the 64-bit value.
+ APInt Count(64, 0);
+ for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) {
+ unsigned SubEltIdx = (NumSubElts - 1) - i;
+ auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
+ Count <<= BitWidth;
+ Count |= SubElt->getValue().zextOrTrunc(64);
+ }
+
+ // If shift-by-zero then just return the original value.
+ if (Count.isNullValue())
+ return Vec;
+
+ // Handle cases when Shift >= BitWidth.
+ if (Count.uge(BitWidth)) {
+ // If LogicalShift - just return zero.
+ if (LogicalShift)
+ return ConstantAggregateZero::get(VT);
+
+ // If ArithmeticShift - clamp Shift to (BitWidth - 1).
+ Count = APInt(64, BitWidth - 1);
+ }
+
+ // Get a constant vector of the same type as the first operand.
+ auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
+ auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
+
+ if (ShiftLeft)
+ return Builder.CreateShl(Vec, ShiftVec);
+
+ if (LogicalShift)
+ return Builder.CreateLShr(Vec, ShiftVec);
+
+ return Builder.CreateAShr(Vec, ShiftVec);
+}
+
+// Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
+// Unlike the generic IR shifts, the intrinsics have defined behaviour for out
+// of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
+static Value *simplifyX86varShift(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ bool LogicalShift = false;
+ bool ShiftLeft = false;
+
+ switch (II.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ case Intrinsic::x86_avx512_psrav_q_128:
+ case Intrinsic::x86_avx512_psrav_q_256:
+ case Intrinsic::x86_avx512_psrav_d_512:
+ case Intrinsic::x86_avx512_psrav_q_512:
+ case Intrinsic::x86_avx512_psrav_w_128:
+ case Intrinsic::x86_avx512_psrav_w_256:
+ case Intrinsic::x86_avx512_psrav_w_512:
+ LogicalShift = false;
+ ShiftLeft = false;
+ break;
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ case Intrinsic::x86_avx512_psrlv_d_512:
+ case Intrinsic::x86_avx512_psrlv_q_512:
+ case Intrinsic::x86_avx512_psrlv_w_128:
+ case Intrinsic::x86_avx512_psrlv_w_256:
+ case Intrinsic::x86_avx512_psrlv_w_512:
+ LogicalShift = true;
+ ShiftLeft = false;
+ break;
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ case Intrinsic::x86_avx512_psllv_d_512:
+ case Intrinsic::x86_avx512_psllv_q_512:
+ case Intrinsic::x86_avx512_psllv_w_128:
+ case Intrinsic::x86_avx512_psllv_w_256:
+ case Intrinsic::x86_avx512_psllv_w_512:
+ LogicalShift = true;
+ ShiftLeft = true;
+ break;
+ }
+ assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
+
+ auto Vec = II.getArgOperand(0);
+ auto Amt = II.getArgOperand(1);
+ auto VT = cast<FixedVectorType>(II.getType());
+ auto SVT = VT->getElementType();
+ int NumElts = VT->getNumElements();
+ int BitWidth = SVT->getIntegerBitWidth();
+
+ // If the shift amount is guaranteed to be in-range we can replace it with a
+ // generic shift.
+ APInt UpperBits =
+ APInt::getHighBitsSet(BitWidth, BitWidth - Log2_32(BitWidth));
+ if (llvm::MaskedValueIsZero(Amt, UpperBits,
+ II.getModule()->getDataLayout())) {
+ return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
+ : Builder.CreateLShr(Vec, Amt))
+ : Builder.CreateAShr(Vec, Amt));
+ }
+
+ // Simplify if all shift amounts are constant/undef.
+ auto *CShift = dyn_cast<Constant>(Amt);
+ if (!CShift)
+ return nullptr;
+
+ // Collect each element's shift amount.
+ // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
+ bool AnyOutOfRange = false;
+ SmallVector<int, 8> ShiftAmts;
+ for (int I = 0; I < NumElts; ++I) {
+ auto *CElt = CShift->getAggregateElement(I);
+ if (isa_and_nonnull<UndefValue>(CElt)) {
+ ShiftAmts.push_back(-1);
+ continue;
+ }
+
+ auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
+ if (!COp)
+ return nullptr;
+
+ // Handle out of range shifts.
+ // If LogicalShift - set to BitWidth (special case).
+ // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
+ APInt ShiftVal = COp->getValue();
+ if (ShiftVal.uge(BitWidth)) {
+ AnyOutOfRange = LogicalShift;
+ ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
+ continue;
+ }
+
+ ShiftAmts.push_back((int)ShiftVal.getZExtValue());
+ }
+
+ // If all elements out of range or UNDEF, return vector of zeros/undefs.
+ // ArithmeticShift should only hit this if they are all UNDEF.
+ auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
+ if (llvm::all_of(ShiftAmts, OutOfRange)) {
+ SmallVector<Constant *, 8> ConstantVec;
+ for (int Idx : ShiftAmts) {
+ if (Idx < 0) {
+ ConstantVec.push_back(UndefValue::get(SVT));
+ } else {
+ assert(LogicalShift && "Logical shift expected");
+ ConstantVec.push_back(ConstantInt::getNullValue(SVT));
+ }
+ }
+ return ConstantVector::get(ConstantVec);
+ }
+
+ // We can't handle only some out of range values with generic logical shifts.
+ if (AnyOutOfRange)
+ return nullptr;
+
+ // Build the shift amount constant vector.
+ SmallVector<Constant *, 8> ShiftVecAmts;
+ for (int Idx : ShiftAmts) {
+ if (Idx < 0)
+ ShiftVecAmts.push_back(UndefValue::get(SVT));
+ else
+ ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
+ }
+ auto ShiftVec = ConstantVector::get(ShiftVecAmts);
+
+ if (ShiftLeft)
+ return Builder.CreateShl(Vec, ShiftVec);
+
+ if (LogicalShift)
+ return Builder.CreateLShr(Vec, ShiftVec);
+
+ return Builder.CreateAShr(Vec, ShiftVec);
+}
+
+static Value *simplifyX86pack(IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder, bool IsSigned) {
+ Value *Arg0 = II.getArgOperand(0);
+ Value *Arg1 = II.getArgOperand(1);
+ Type *ResTy = II.getType();
+
+ // Fast all undef handling.
+ if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
+ return UndefValue::get(ResTy);
+
+ auto *ArgTy = cast<FixedVectorType>(Arg0->getType());
+ unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
+ unsigned NumSrcElts = ArgTy->getNumElements();
+ assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
+ "Unexpected packing types");
+
+ unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
+ unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
+ unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
+ assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
+ "Unexpected packing types");
+
+ // Constant folding.
+ if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
+ return nullptr;
+
+ // Clamp Values - signed/unsigned both use signed clamp values, but they
+ // differ on the min/max values.
+ APInt MinValue, MaxValue;
+ if (IsSigned) {
+ // PACKSS: Truncate signed value with signed saturation.
+ // Source values less than dst minint are saturated to minint.
+ // Source values greater than dst maxint are saturated to maxint.
+ MinValue =
+ APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
+ MaxValue =
+ APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
+ } else {
+ // PACKUS: Truncate signed value with unsigned saturation.
+ // Source values less than zero are saturated to zero.
+ // Source values greater than dst maxuint are saturated to maxuint.
+ MinValue = APInt::getNullValue(SrcScalarSizeInBits);
+ MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
+ }
+
+ auto *MinC = Constant::getIntegerValue(ArgTy, MinValue);
+ auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue);
+ Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
+ Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
+ Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
+ Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
+
+ // Shuffle clamped args together at the lane level.
+ SmallVector<int, 32> PackMask;
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
+ PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
+ for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
+ PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
+ }
+ auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
+
+ // Truncate to dst size.
+ return Builder.CreateTrunc(Shuffle, ResTy);
+}
+
+static Value *simplifyX86movmsk(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Arg = II.getArgOperand(0);
+ Type *ResTy = II.getType();
+
+ // movmsk(undef) -> zero as we must ensure the upper bits are zero.
+ if (isa<UndefValue>(Arg))
+ return Constant::getNullValue(ResTy);
+
+ auto *ArgTy = dyn_cast<FixedVectorType>(Arg->getType());
+ // We can't easily peek through x86_mmx types.
+ if (!ArgTy)
+ return nullptr;
+
+ // Expand MOVMSK to compare/bitcast/zext:
+ // e.g. PMOVMSKB(v16i8 x):
+ // %cmp = icmp slt <16 x i8> %x, zeroinitializer
+ // %int = bitcast <16 x i1> %cmp to i16
+ // %res = zext i16 %int to i32
+ unsigned NumElts = ArgTy->getNumElements();
+ Type *IntegerVecTy = VectorType::getInteger(ArgTy);
+ Type *IntegerTy = Builder.getIntNTy(NumElts);
+
+ Value *Res = Builder.CreateBitCast(Arg, IntegerVecTy);
+ Res = Builder.CreateICmpSLT(Res, Constant::getNullValue(IntegerVecTy));
+ Res = Builder.CreateBitCast(Res, IntegerTy);
+ Res = Builder.CreateZExtOrTrunc(Res, ResTy);
+ return Res;
+}
+
+static Value *simplifyX86addcarry(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *CarryIn = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ Value *Op2 = II.getArgOperand(2);
+ Type *RetTy = II.getType();
+ Type *OpTy = Op1->getType();
+ assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
+ RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
+ "Unexpected types for x86 addcarry");
+
+ // If carry-in is zero, this is just an unsigned add with overflow.
+ if (match(CarryIn, PatternMatch::m_ZeroInt())) {
+ Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
+ {Op1, Op2});
+ // The types have to be adjusted to match the x86 call types.
+ Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
+ Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
+ Builder.getInt8Ty());
+ Value *Res = UndefValue::get(RetTy);
+ Res = Builder.CreateInsertValue(Res, UAddOV, 0);
+ return Builder.CreateInsertValue(Res, UAddResult, 1);
+ }
+
+ return nullptr;
+}
+
+static Value *simplifyX86insertps(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
+ if (!CInt)
+ return nullptr;
+
+ auto *VecTy = cast<FixedVectorType>(II.getType());
+ assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
+
+ // The immediate permute control byte looks like this:
+ // [3:0] - zero mask for each 32-bit lane
+ // [5:4] - select one 32-bit destination lane
+ // [7:6] - select one 32-bit source lane
+
+ uint8_t Imm = CInt->getZExtValue();
+ uint8_t ZMask = Imm & 0xf;
+ uint8_t DestLane = (Imm >> 4) & 0x3;
+ uint8_t SourceLane = (Imm >> 6) & 0x3;
+
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // If all zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (ZMask == 0xf)
+ return ZeroVector;
+
+ // Initialize by passing all of the first source bits through.
+ int ShuffleMask[4] = {0, 1, 2, 3};
+
+ // We may replace the second operand with the zero vector.
+ Value *V1 = II.getArgOperand(1);
+
+ if (ZMask) {
+ // If the zero mask is being used with a single input or the zero mask
+ // overrides the destination lane, this is a shuffle with the zero vector.
+ if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
+ (ZMask & (1 << DestLane))) {
+ V1 = ZeroVector;
+ // We may still move 32-bits of the first source vector from one lane
+ // to another.
+ ShuffleMask[DestLane] = SourceLane;
+ // The zero mask may override the previous insert operation.
+ for (unsigned i = 0; i < 4; ++i)
+ if ((ZMask >> i) & 0x1)
+ ShuffleMask[i] = i + 4;
+ } else {
+ // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
+ return nullptr;
+ }
+ } else {
+ // Replace the selected destination lane with the selected source lane.
+ ShuffleMask[DestLane] = SourceLane + 4;
+ }
+
+ return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
+}
+
+/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
+/// or conversion to a shuffle vector.
+static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
+ ConstantInt *CILength, ConstantInt *CIIndex,
+ InstCombiner::BuilderTy &Builder) {
+ auto LowConstantHighUndef = [&](uint64_t Val) {
+ Type *IntTy64 = Type::getInt64Ty(II.getContext());
+ Constant *Args[] = {ConstantInt::get(IntTy64, Val),
+ UndefValue::get(IntTy64)};
+ return ConstantVector::get(Args);
+ };
+
+ // See if we're dealing with constant values.
+ Constant *C0 = dyn_cast<Constant>(Op0);
+ ConstantInt *CI0 =
+ C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
+ : nullptr;
+
+ // Attempt to constant fold.
+ if (CILength && CIIndex) {
+ // From AMD documentation: "The bit index and field length are each six
+ // bits in length other bits of the field are ignored."
+ APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
+ APInt APLength = CILength->getValue().zextOrTrunc(6);
+
+ unsigned Index = APIndex.getZExtValue();
+
+ // From AMD documentation: "a value of zero in the field length is
+ // defined as length of 64".
+ unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+ // From AMD documentation: "If the sum of the bit index + length field
+ // is greater than 64, the results are undefined".
+ unsigned End = Index + Length;
+
+ // Note that both field index and field length are 8-bit quantities.
+ // Since variables 'Index' and 'Length' are unsigned values
+ // obtained from zero-extending field index and field length
+ // respectively, their sum should never wrap around.
+ if (End > 64)
+ return UndefValue::get(II.getType());
+
+ // If we are inserting whole bytes, we can convert this to a shuffle.
+ // Lowering can recognize EXTRQI shuffle masks.
+ if ((Length % 8) == 0 && (Index % 8) == 0) {
+ // Convert bit indices to byte indices.
+ Length /= 8;
+ Index /= 8;
+
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ auto *ShufTy = FixedVectorType::get(IntTy8, 16);
+
+ SmallVector<int, 16> ShuffleMask;
+ for (int i = 0; i != (int)Length; ++i)
+ ShuffleMask.push_back(i + Index);
+ for (int i = Length; i != 8; ++i)
+ ShuffleMask.push_back(i + 16);
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(-1);
+
+ Value *SV = Builder.CreateShuffleVector(
+ Builder.CreateBitCast(Op0, ShufTy),
+ ConstantAggregateZero::get(ShufTy), ShuffleMask);
+ return Builder.CreateBitCast(SV, II.getType());
+ }
+
+ // Constant Fold - shift Index'th bit to lowest position and mask off
+ // Length bits.
+ if (CI0) {
+ APInt Elt = CI0->getValue();
+ Elt.lshrInPlace(Index);
+ Elt = Elt.zextOrTrunc(Length);
+ return LowConstantHighUndef(Elt.getZExtValue());
+ }
+
+ // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
+ Value *Args[] = {Op0, CILength, CIIndex};
+ Module *M = II.getModule();
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
+ return Builder.CreateCall(F, Args);
+ }
+ }
+
+ // Constant Fold - extraction from zero is always {zero, undef}.
+ if (CI0 && CI0->isZero())
+ return LowConstantHighUndef(0);
+
+ return nullptr;
+}
+
+/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
+/// folding or conversion to a shuffle vector.
+static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
+ APInt APLength, APInt APIndex,
+ InstCombiner::BuilderTy &Builder) {
+ // From AMD documentation: "The bit index and field length are each six bits
+ // in length other bits of the field are ignored."
+ APIndex = APIndex.zextOrTrunc(6);
+ APLength = APLength.zextOrTrunc(6);
+
+ // Attempt to constant fold.
+ unsigned Index = APIndex.getZExtValue();
+
+ // From AMD documentation: "a value of zero in the field length is
+ // defined as length of 64".
+ unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+ // From AMD documentation: "If the sum of the bit index + length field
+ // is greater than 64, the results are undefined".
+ unsigned End = Index + Length;
+
+ // Note that both field index and field length are 8-bit quantities.
+ // Since variables 'Index' and 'Length' are unsigned values
+ // obtained from zero-extending field index and field length
+ // respectively, their sum should never wrap around.
+ if (End > 64)
+ return UndefValue::get(II.getType());
+
+ // If we are inserting whole bytes, we can convert this to a shuffle.
+ // Lowering can recognize INSERTQI shuffle masks.
+ if ((Length % 8) == 0 && (Index % 8) == 0) {
+ // Convert bit indices to byte indices.
+ Length /= 8;
+ Index /= 8;
+
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ auto *ShufTy = FixedVectorType::get(IntTy8, 16);
+
+ SmallVector<int, 16> ShuffleMask;
+ for (int i = 0; i != (int)Index; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = 0; i != (int)Length; ++i)
+ ShuffleMask.push_back(i + 16);
+ for (int i = Index + Length; i != 8; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(-1);
+
+ Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
+ Builder.CreateBitCast(Op1, ShufTy),
+ ShuffleMask);
+ return Builder.CreateBitCast(SV, II.getType());
+ }
+
+ // See if we're dealing with constant values.
+ Constant *C0 = dyn_cast<Constant>(Op0);
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CI00 =
+ C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
+ : nullptr;
+ ConstantInt *CI10 =
+ C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
+ : nullptr;
+
+ // Constant Fold - insert bottom Length bits starting at the Index'th bit.
+ if (CI00 && CI10) {
+ APInt V00 = CI00->getValue();
+ APInt V10 = CI10->getValue();
+ APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
+ V00 = V00 & ~Mask;
+ V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
+ APInt Val = V00 | V10;
+ Type *IntTy64 = Type::getInt64Ty(II.getContext());
+ Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
+ UndefValue::get(IntTy64)};
+ return ConstantVector::get(Args);
+ }
+
+ // If we were an INSERTQ call, we'll save demanded elements if we convert to
+ // INSERTQI.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Constant *CILength = ConstantInt::get(IntTy8, Length, false);
+ Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
+
+ Value *Args[] = {Op0, Op1, CILength, CIIndex};
+ Module *M = II.getModule();
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+ return Builder.CreateCall(F, Args);
+ }
+
+ return nullptr;
+}
+
+/// Attempt to convert pshufb* to shufflevector if the mask is constant.
+static Value *simplifyX86pshufb(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
+ if (!V)
+ return nullptr;
+
+ auto *VecTy = cast<FixedVectorType>(II.getType());
+ unsigned NumElts = VecTy->getNumElements();
+ assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
+ "Unexpected number of elements in shuffle mask!");
+
+ // Construct a shuffle mask from constant integers or UNDEFs.
+ int Indexes[64];
+
+ // Each byte in the shuffle control mask forms an index to permute the
+ // corresponding byte in the destination operand.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ Constant *COp = V->getAggregateElement(I);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return nullptr;
+
+ if (isa<UndefValue>(COp)) {
+ Indexes[I] = -1;
+ continue;
+ }
+
+ int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
+
+ // If the most significant bit (bit[7]) of each byte of the shuffle
+ // control mask is set, then zero is written in the result byte.
+ // The zero vector is in the right-hand side of the resulting
+ // shufflevector.
+
+ // The value of each index for the high 128-bit lane is the least
+ // significant 4 bits of the respective shuffle control byte.
+ Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
+ Indexes[I] = Index;
+ }
+
+ auto V1 = II.getArgOperand(0);
+ auto V2 = Constant::getNullValue(VecTy);
+ return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, NumElts));
+}
+
+/// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
+static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
+ if (!V)
+ return nullptr;
+
+ auto *VecTy = cast<FixedVectorType>(II.getType());
+ unsigned NumElts = VecTy->getNumElements();
+ bool IsPD = VecTy->getScalarType()->isDoubleTy();
+ unsigned NumLaneElts = IsPD ? 2 : 4;
+ assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
+
+ // Construct a shuffle mask from constant integers or UNDEFs.
+ int Indexes[16];
+
+ // The intrinsics only read one or two bits, clear the rest.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ Constant *COp = V->getAggregateElement(I);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return nullptr;
+
+ if (isa<UndefValue>(COp)) {
+ Indexes[I] = -1;
+ continue;
+ }
+
+ APInt Index = cast<ConstantInt>(COp)->getValue();
+ Index = Index.zextOrTrunc(32).getLoBits(2);
+
+ // The PD variants uses bit 1 to select per-lane element index, so
+ // shift down to convert to generic shuffle mask index.
+ if (IsPD)
+ Index.lshrInPlace(1);
+
+ // The _256 variants are a bit trickier since the mask bits always index
+ // into the corresponding 128 half. In order to convert to a generic
+ // shuffle, we have to make that explicit.
+ Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
+
+ Indexes[I] = Index.getZExtValue();
+ }
+
+ auto V1 = II.getArgOperand(0);
+ return Builder.CreateShuffleVector(V1, makeArrayRef(Indexes, NumElts));
+}
+
+/// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
+static Value *simplifyX86vpermv(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ auto *V = dyn_cast<Constant>(II.getArgOperand(1));
+ if (!V)
+ return nullptr;
+
+ auto *VecTy = cast<FixedVectorType>(II.getType());
+ unsigned Size = VecTy->getNumElements();
+ assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
+ "Unexpected shuffle mask size");
+
+ // Construct a shuffle mask from constant integers or UNDEFs.
+ int Indexes[64];
+
+ for (unsigned I = 0; I < Size; ++I) {
+ Constant *COp = V->getAggregateElement(I);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return nullptr;
+
+ if (isa<UndefValue>(COp)) {
+ Indexes[I] = -1;
+ continue;
+ }
+
+ uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
+ Index &= Size - 1;
+ Indexes[I] = Index;
+ }
+
+ auto V1 = II.getArgOperand(0);
+ return Builder.CreateShuffleVector(V1, makeArrayRef(Indexes, Size));
+}
+
+Optional<Instruction *>
+X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+ auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
+ unsigned DemandedWidth) {
+ APInt UndefElts(Width, 0);
+ APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
+ return IC.SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
+ };
+
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::x86_bmi_bextr_32:
+ case Intrinsic::x86_bmi_bextr_64:
+ case Intrinsic::x86_tbm_bextri_u32:
+ case Intrinsic::x86_tbm_bextri_u64:
+ // If the RHS is a constant we can try some simplifications.
+ if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
+ uint64_t Shift = C->getZExtValue();
+ uint64_t Length = (Shift >> 8) & 0xff;
+ Shift &= 0xff;
+ unsigned BitWidth = II.getType()->getIntegerBitWidth();
+ // If the length is 0 or the shift is out of range, replace with zero.
+ if (Length == 0 || Shift >= BitWidth) {
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
+ }
+ // If the LHS is also a constant, we can completely constant fold this.
+ if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
+ uint64_t Result = InC->getZExtValue() >> Shift;
+ if (Length > BitWidth)
+ Length = BitWidth;
+ Result &= maskTrailingOnes<uint64_t>(Length);
+ return IC.replaceInstUsesWith(II,
+ ConstantInt::get(II.getType(), Result));
+ }
+ // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
+ // are only masking bits that a shift already cleared?
+ }
+ break;
+
+ case Intrinsic::x86_bmi_bzhi_32:
+ case Intrinsic::x86_bmi_bzhi_64:
+ // If the RHS is a constant we can try some simplifications.
+ if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
+ uint64_t Index = C->getZExtValue() & 0xff;
+ unsigned BitWidth = II.getType()->getIntegerBitWidth();
+ if (Index >= BitWidth) {
+ return IC.replaceInstUsesWith(II, II.getArgOperand(0));
+ }
+ if (Index == 0) {
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
+ }
+ // If the LHS is also a constant, we can completely constant fold this.
+ if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
+ uint64_t Result = InC->getZExtValue();
+ Result &= maskTrailingOnes<uint64_t>(Index);
+ return IC.replaceInstUsesWith(II,
+ ConstantInt::get(II.getType(), Result));
+ }
+ // TODO should we convert this to an AND if the RHS is constant?
+ }
+ break;
+ case Intrinsic::x86_bmi_pext_32:
+ case Intrinsic::x86_bmi_pext_64:
+ if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
+ if (MaskC->isNullValue()) {
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
+ }
+ if (MaskC->isAllOnesValue()) {
+ return IC.replaceInstUsesWith(II, II.getArgOperand(0));
+ }
+
+ if (MaskC->getValue().isShiftedMask()) {
+ // any single contingous sequence of 1s anywhere in the mask simply
+ // describes a subset of the input bits shifted to the appropriate
+ // position. Replace with the straight forward IR.
+ unsigned ShiftAmount = MaskC->getValue().countTrailingZeros();
+ Value *Input = II.getArgOperand(0);
+ Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1));
+ Value *Shifted = IC.Builder.CreateLShr(Masked,
+ ConstantInt::get(II.getType(),
+ ShiftAmount));
+ return IC.replaceInstUsesWith(II, Shifted);
+ }
+
+
+ if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
+ uint64_t Src = SrcC->getZExtValue();
+ uint64_t Mask = MaskC->getZExtValue();
+ uint64_t Result = 0;
+ uint64_t BitToSet = 1;
+
+ while (Mask) {
+ // Isolate lowest set bit.
+ uint64_t BitToTest = Mask & -Mask;
+ if (BitToTest & Src)
+ Result |= BitToSet;
+
+ BitToSet <<= 1;
+ // Clear lowest set bit.
+ Mask &= Mask - 1;
+ }
+
+ return IC.replaceInstUsesWith(II,
+ ConstantInt::get(II.getType(), Result));
+ }
+ }
+ break;
+ case Intrinsic::x86_bmi_pdep_32:
+ case Intrinsic::x86_bmi_pdep_64:
+ if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
+ if (MaskC->isNullValue()) {
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
+ }
+ if (MaskC->isAllOnesValue()) {
+ return IC.replaceInstUsesWith(II, II.getArgOperand(0));
+ }
+ if (MaskC->getValue().isShiftedMask()) {
+ // any single contingous sequence of 1s anywhere in the mask simply
+ // describes a subset of the input bits shifted to the appropriate
+ // position. Replace with the straight forward IR.
+ unsigned ShiftAmount = MaskC->getValue().countTrailingZeros();
+ Value *Input = II.getArgOperand(0);
+ Value *Shifted = IC.Builder.CreateShl(Input,
+ ConstantInt::get(II.getType(),
+ ShiftAmount));
+ Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1));
+ return IC.replaceInstUsesWith(II, Masked);
+ }
+
+ if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
+ uint64_t Src = SrcC->getZExtValue();
+ uint64_t Mask = MaskC->getZExtValue();
+ uint64_t Result = 0;
+ uint64_t BitToTest = 1;
+
+ while (Mask) {
+ // Isolate lowest set bit.
+ uint64_t BitToSet = Mask & -Mask;
+ if (BitToTest & Src)
+ Result |= BitToSet;
+
+ BitToTest <<= 1;
+ // Clear lowest set bit;
+ Mask &= Mask - 1;
+ }
+
+ return IC.replaceInstUsesWith(II,
+ ConstantInt::get(II.getType(), Result));
+ }
+ }
+ break;
+
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ case Intrinsic::x86_avx512_vcvtss2si32:
+ case Intrinsic::x86_avx512_vcvtss2si64:
+ case Intrinsic::x86_avx512_vcvtss2usi32:
+ case Intrinsic::x86_avx512_vcvtss2usi64:
+ case Intrinsic::x86_avx512_vcvtsd2si32:
+ case Intrinsic::x86_avx512_vcvtsd2si64:
+ case Intrinsic::x86_avx512_vcvtsd2usi32:
+ case Intrinsic::x86_avx512_vcvtsd2usi64:
+ case Intrinsic::x86_avx512_cvttss2si:
+ case Intrinsic::x86_avx512_cvttss2si64:
+ case Intrinsic::x86_avx512_cvttss2usi:
+ case Intrinsic::x86_avx512_cvttss2usi64:
+ case Intrinsic::x86_avx512_cvttsd2si:
+ case Intrinsic::x86_avx512_cvttsd2si64:
+ case Intrinsic::x86_avx512_cvttsd2usi:
+ case Intrinsic::x86_avx512_cvttsd2usi64: {
+ // These intrinsics only demand the 0th element of their input vectors. If
+ // we can simplify the input based on that, do so now.
+ Value *Arg = II.getArgOperand(0);
+ unsigned VWidth = cast<FixedVectorType>(Arg->getType())->getNumElements();
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
+ return IC.replaceOperand(II, 0, V);
+ }
+ break;
+ }
+
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_sse2_pmovmskb_128:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_avx2_pmovmskb:
+ if (Value *V = simplifyX86movmsk(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ case Intrinsic::x86_sse2_comige_sd:
+ case Intrinsic::x86_sse2_comigt_sd:
+ case Intrinsic::x86_sse2_comile_sd:
+ case Intrinsic::x86_sse2_comilt_sd:
+ case Intrinsic::x86_sse2_comineq_sd:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ case Intrinsic::x86_sse2_ucomineq_sd:
+ case Intrinsic::x86_avx512_vcomi_ss:
+ case Intrinsic::x86_avx512_vcomi_sd:
+ case Intrinsic::x86_avx512_mask_cmp_ss:
+ case Intrinsic::x86_avx512_mask_cmp_sd: {
+ // These intrinsics only demand the 0th element of their input vectors. If
+ // we can simplify the input based on that, do so now.
+ bool MadeChange = false;
+ Value *Arg0 = II.getArgOperand(0);
+ Value *Arg1 = II.getArgOperand(1);
+ unsigned VWidth = cast<FixedVectorType>(Arg0->getType())->getNumElements();
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
+ IC.replaceOperand(II, 0, V);
+ MadeChange = true;
+ }
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
+ IC.replaceOperand(II, 1, V);
+ MadeChange = true;
+ }
+ if (MadeChange) {
+ return &II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_avx512_add_ps_512:
+ case Intrinsic::x86_avx512_div_ps_512:
+ case Intrinsic::x86_avx512_mul_ps_512:
+ case Intrinsic::x86_avx512_sub_ps_512:
+ case Intrinsic::x86_avx512_add_pd_512:
+ case Intrinsic::x86_avx512_div_pd_512:
+ case Intrinsic::x86_avx512_mul_pd_512:
+ case Intrinsic::x86_avx512_sub_pd_512:
+ // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
+ // IR operations.
+ if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ if (R->getValue() == 4) {
+ Value *Arg0 = II.getArgOperand(0);
+ Value *Arg1 = II.getArgOperand(1);
+
+ Value *V;
+ switch (IID) {
+ default:
+ llvm_unreachable("Case stmts out of sync!");
+ case Intrinsic::x86_avx512_add_ps_512:
+ case Intrinsic::x86_avx512_add_pd_512:
+ V = IC.Builder.CreateFAdd(Arg0, Arg1);
+ break;
+ case Intrinsic::x86_avx512_sub_ps_512:
+ case Intrinsic::x86_avx512_sub_pd_512:
+ V = IC.Builder.CreateFSub(Arg0, Arg1);
+ break;
+ case Intrinsic::x86_avx512_mul_ps_512:
+ case Intrinsic::x86_avx512_mul_pd_512:
+ V = IC.Builder.CreateFMul(Arg0, Arg1);
+ break;
+ case Intrinsic::x86_avx512_div_ps_512:
+ case Intrinsic::x86_avx512_div_pd_512:
+ V = IC.Builder.CreateFDiv(Arg0, Arg1);
+ break;
+ }
+
+ return IC.replaceInstUsesWith(II, V);
+ }
+ }
+ break;
+
+ case Intrinsic::x86_avx512_mask_add_ss_round:
+ case Intrinsic::x86_avx512_mask_div_ss_round:
+ case Intrinsic::x86_avx512_mask_mul_ss_round:
+ case Intrinsic::x86_avx512_mask_sub_ss_round:
+ case Intrinsic::x86_avx512_mask_add_sd_round:
+ case Intrinsic::x86_avx512_mask_div_sd_round:
+ case Intrinsic::x86_avx512_mask_mul_sd_round:
+ case Intrinsic::x86_avx512_mask_sub_sd_round:
+ // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
+ // IR operations.
+ if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(4))) {
+ if (R->getValue() == 4) {
+ // Extract the element as scalars.
+ Value *Arg0 = II.getArgOperand(0);
+ Value *Arg1 = II.getArgOperand(1);
+ Value *LHS = IC.Builder.CreateExtractElement(Arg0, (uint64_t)0);
+ Value *RHS = IC.Builder.CreateExtractElement(Arg1, (uint64_t)0);
+
+ Value *V;
+ switch (IID) {
+ default:
+ llvm_unreachable("Case stmts out of sync!");
+ case Intrinsic::x86_avx512_mask_add_ss_round:
+ case Intrinsic::x86_avx512_mask_add_sd_round:
+ V = IC.Builder.CreateFAdd(LHS, RHS);
+ break;
+ case Intrinsic::x86_avx512_mask_sub_ss_round:
+ case Intrinsic::x86_avx512_mask_sub_sd_round:
+ V = IC.Builder.CreateFSub(LHS, RHS);
+ break;
+ case Intrinsic::x86_avx512_mask_mul_ss_round:
+ case Intrinsic::x86_avx512_mask_mul_sd_round:
+ V = IC.Builder.CreateFMul(LHS, RHS);
+ break;
+ case Intrinsic::x86_avx512_mask_div_ss_round:
+ case Intrinsic::x86_avx512_mask_div_sd_round:
+ V = IC.Builder.CreateFDiv(LHS, RHS);
+ break;
+ }
+
+ // Handle the masking aspect of the intrinsic.
+ Value *Mask = II.getArgOperand(3);
+ auto *C = dyn_cast<ConstantInt>(Mask);
+ // We don't need a select if we know the mask bit is a 1.
+ if (!C || !C->getValue()[0]) {
+ // Cast the mask to an i1 vector and then extract the lowest element.
+ auto *MaskTy = FixedVectorType::get(
+ IC.Builder.getInt1Ty(),
+ cast<IntegerType>(Mask->getType())->getBitWidth());
+ Mask = IC.Builder.CreateBitCast(Mask, MaskTy);
+ Mask = IC.Builder.CreateExtractElement(Mask, (uint64_t)0);
+ // Extract the lowest element from the passthru operand.
+ Value *Passthru =
+ IC.Builder.CreateExtractElement(II.getArgOperand(2), (uint64_t)0);
+ V = IC.Builder.CreateSelect(Mask, V, Passthru);
+ }
+
+ // Insert the result back into the original argument 0.
+ V = IC.Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
+
+ return IC.replaceInstUsesWith(II, V);
+ }
+ }
+ break;
+
+ // Constant fold ashr( <A x Bi>, Ci ).
+ // Constant fold lshr( <A x Bi>, Ci ).
+ // Constant fold shl( <A x Bi>, Ci ).
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx512_psrai_q_128:
+ case Intrinsic::x86_avx512_psrai_q_256:
+ case Intrinsic::x86_avx512_psrai_d_512:
+ case Intrinsic::x86_avx512_psrai_q_512:
+ case Intrinsic::x86_avx512_psrai_w_512:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx512_psrli_d_512:
+ case Intrinsic::x86_avx512_psrli_q_512:
+ case Intrinsic::x86_avx512_psrli_w_512:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx512_pslli_d_512:
+ case Intrinsic::x86_avx512_pslli_q_512:
+ case Intrinsic::x86_avx512_pslli_w_512:
+ if (Value *V = simplifyX86immShift(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx512_psra_q_128:
+ case Intrinsic::x86_avx512_psra_q_256:
+ case Intrinsic::x86_avx512_psra_d_512:
+ case Intrinsic::x86_avx512_psra_q_512:
+ case Intrinsic::x86_avx512_psra_w_512:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx512_psrl_d_512:
+ case Intrinsic::x86_avx512_psrl_q_512:
+ case Intrinsic::x86_avx512_psrl_w_512:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx512_psll_d_512:
+ case Intrinsic::x86_avx512_psll_q_512:
+ case Intrinsic::x86_avx512_psll_w_512: {
+ if (Value *V = simplifyX86immShift(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+
+ // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ Value *Arg1 = II.getArgOperand(1);
+ assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
+ "Unexpected packed shift size");
+ unsigned VWidth = cast<FixedVectorType>(Arg1->getType())->getNumElements();
+
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
+ return IC.replaceOperand(II, 1, V);
+ }
+ break;
+ }
+
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ case Intrinsic::x86_avx512_psllv_d_512:
+ case Intrinsic::x86_avx512_psllv_q_512:
+ case Intrinsic::x86_avx512_psllv_w_128:
+ case Intrinsic::x86_avx512_psllv_w_256:
+ case Intrinsic::x86_avx512_psllv_w_512:
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ case Intrinsic::x86_avx512_psrav_q_128:
+ case Intrinsic::x86_avx512_psrav_q_256:
+ case Intrinsic::x86_avx512_psrav_d_512:
+ case Intrinsic::x86_avx512_psrav_q_512:
+ case Intrinsic::x86_avx512_psrav_w_128:
+ case Intrinsic::x86_avx512_psrav_w_256:
+ case Intrinsic::x86_avx512_psrav_w_512:
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ case Intrinsic::x86_avx512_psrlv_d_512:
+ case Intrinsic::x86_avx512_psrlv_q_512:
+ case Intrinsic::x86_avx512_psrlv_w_128:
+ case Intrinsic::x86_avx512_psrlv_w_256:
+ case Intrinsic::x86_avx512_psrlv_w_512:
+ if (Value *V = simplifyX86varShift(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ case Intrinsic::x86_sse2_packssdw_128:
+ case Intrinsic::x86_sse2_packsswb_128:
+ case Intrinsic::x86_avx2_packssdw:
+ case Intrinsic::x86_avx2_packsswb:
+ case Intrinsic::x86_avx512_packssdw_512:
+ case Intrinsic::x86_avx512_packsswb_512:
+ if (Value *V = simplifyX86pack(II, IC.Builder, true)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ case Intrinsic::x86_sse2_packuswb_128:
+ case Intrinsic::x86_sse41_packusdw:
+ case Intrinsic::x86_avx2_packusdw:
+ case Intrinsic::x86_avx2_packuswb:
+ case Intrinsic::x86_avx512_packusdw_512:
+ case Intrinsic::x86_avx512_packuswb_512:
+ if (Value *V = simplifyX86pack(II, IC.Builder, false)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ case Intrinsic::x86_pclmulqdq:
+ case Intrinsic::x86_pclmulqdq_256:
+ case Intrinsic::x86_pclmulqdq_512: {
+ if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ unsigned Imm = C->getZExtValue();
+
+ bool MadeChange = false;
+ Value *Arg0 = II.getArgOperand(0);
+ Value *Arg1 = II.getArgOperand(1);
+ unsigned VWidth =
+ cast<FixedVectorType>(Arg0->getType())->getNumElements();
+
+ APInt UndefElts1(VWidth, 0);
+ APInt DemandedElts1 =
+ APInt::getSplat(VWidth, APInt(2, (Imm & 0x01) ? 2 : 1));
+ if (Value *V =
+ IC.SimplifyDemandedVectorElts(Arg0, DemandedElts1, UndefElts1)) {
+ IC.replaceOperand(II, 0, V);
+ MadeChange = true;
+ }
+
+ APInt UndefElts2(VWidth, 0);
+ APInt DemandedElts2 =
+ APInt::getSplat(VWidth, APInt(2, (Imm & 0x10) ? 2 : 1));
+ if (Value *V =
+ IC.SimplifyDemandedVectorElts(Arg1, DemandedElts2, UndefElts2)) {
+ IC.replaceOperand(II, 1, V);
+ MadeChange = true;
+ }
+
+ // If either input elements are undef, the result is zero.
+ if (DemandedElts1.isSubsetOf(UndefElts1) ||
+ DemandedElts2.isSubsetOf(UndefElts2)) {
+ return IC.replaceInstUsesWith(II,
+ ConstantAggregateZero::get(II.getType()));
+ }
+
+ if (MadeChange) {
+ return &II;
+ }
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse41_insertps:
+ if (Value *V = simplifyX86insertps(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ case Intrinsic::x86_sse4a_extrq: {
+ Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
+ unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+ VWidth1 == 16 && "Unexpected operand sizes");
+
+ // See if we're dealing with constant values.
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CILength =
+ C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
+ : nullptr;
+ ConstantInt *CIIndex =
+ C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
+ : nullptr;
+
+ // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
+ if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+
+ // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
+ // operands and the lowest 16-bits of the second.
+ bool MadeChange = false;
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+ IC.replaceOperand(II, 0, V);
+ MadeChange = true;
+ }
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
+ IC.replaceOperand(II, 1, V);
+ MadeChange = true;
+ }
+ if (MadeChange) {
+ return &II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse4a_extrqi: {
+ // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
+ // bits of the lower 64-bits. The upper 64-bits are undefined.
+ Value *Op0 = II.getArgOperand(0);
+ unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+ "Unexpected operand size");
+
+ // See if we're dealing with constant values.
+ ConstantInt *CILength = dyn_cast<ConstantInt>(II.getArgOperand(1));
+ ConstantInt *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(2));
+
+ // Attempt to simplify to a constant or shuffle vector.
+ if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+
+ // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
+ // operand.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+ return IC.replaceOperand(II, 0, V);
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse4a_insertq: {
+ Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+ cast<FixedVectorType>(Op1->getType())->getNumElements() == 2 &&
+ "Unexpected operand size");
+
+ // See if we're dealing with constant values.
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CI11 =
+ C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
+ : nullptr;
+
+ // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
+ if (CI11) {
+ const APInt &V11 = CI11->getValue();
+ APInt Len = V11.zextOrTrunc(6);
+ APInt Idx = V11.lshr(8).zextOrTrunc(6);
+ if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ }
+
+ // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
+ // operand.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+ return IC.replaceOperand(II, 0, V);
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse4a_insertqi: {
+ // INSERTQI: Extract lowest Length bits from lower half of second source and
+ // insert over first source starting at Index bit. The upper 64-bits are
+ // undefined.
+ Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
+ unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+ VWidth1 == 2 && "Unexpected operand sizes");
+
+ // See if we're dealing with constant values.
+ ConstantInt *CILength = dyn_cast<ConstantInt>(II.getArgOperand(2));
+ ConstantInt *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(3));
+
+ // Attempt to simplify to a constant or shuffle vector.
+ if (CILength && CIIndex) {
+ APInt Len = CILength->getValue().zextOrTrunc(6);
+ APInt Idx = CIIndex->getValue().zextOrTrunc(6);
+ if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ }
+
+ // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
+ // operands.
+ bool MadeChange = false;
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+ IC.replaceOperand(II, 0, V);
+ MadeChange = true;
+ }
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
+ IC.replaceOperand(II, 1, V);
+ MadeChange = true;
+ }
+ if (MadeChange) {
+ return &II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse41_pblendvb:
+ case Intrinsic::x86_sse41_blendvps:
+ case Intrinsic::x86_sse41_blendvpd:
+ case Intrinsic::x86_avx_blendv_ps_256:
+ case Intrinsic::x86_avx_blendv_pd_256:
+ case Intrinsic::x86_avx2_pblendvb: {
+ // fold (blend A, A, Mask) -> A
+ Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ Value *Mask = II.getArgOperand(2);
+ if (Op0 == Op1) {
+ return IC.replaceInstUsesWith(II, Op0);
+ }
+
+ // Zero Mask - select 1st argument.
+ if (isa<ConstantAggregateZero>(Mask)) {
+ return IC.replaceInstUsesWith(II, Op0);
+ }
+
+ // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
+ if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
+ Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
+ return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
+ }
+
+ // Convert to a vector select if we can bypass casts and find a boolean
+ // vector condition value.
+ Value *BoolVec;
+ Mask = InstCombiner::peekThroughBitcast(Mask);
+ if (match(Mask, PatternMatch::m_SExt(PatternMatch::m_Value(BoolVec))) &&
+ BoolVec->getType()->isVectorTy() &&
+ BoolVec->getType()->getScalarSizeInBits() == 1) {
+ assert(Mask->getType()->getPrimitiveSizeInBits() ==
+ II.getType()->getPrimitiveSizeInBits() &&
+ "Not expecting mask and operands with different sizes");
+
+ unsigned NumMaskElts =
+ cast<FixedVectorType>(Mask->getType())->getNumElements();
+ unsigned NumOperandElts =
+ cast<FixedVectorType>(II.getType())->getNumElements();
+ if (NumMaskElts == NumOperandElts) {
+ return SelectInst::Create(BoolVec, Op1, Op0);
+ }
+
+ // If the mask has less elements than the operands, each mask bit maps to
+ // multiple elements of the operands. Bitcast back and forth.
+ if (NumMaskElts < NumOperandElts) {
+ Value *CastOp0 = IC.Builder.CreateBitCast(Op0, Mask->getType());
+ Value *CastOp1 = IC.Builder.CreateBitCast(Op1, Mask->getType());
+ Value *Sel = IC.Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
+ return new BitCastInst(Sel, II.getType());
+ }
+ }
+
+ break;
+ }
+
+ case Intrinsic::x86_ssse3_pshuf_b_128:
+ case Intrinsic::x86_avx2_pshuf_b:
+ case Intrinsic::x86_avx512_pshuf_b_512:
+ if (Value *V = simplifyX86pshufb(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ case Intrinsic::x86_avx_vpermilvar_ps:
+ case Intrinsic::x86_avx_vpermilvar_ps_256:
+ case Intrinsic::x86_avx512_vpermilvar_ps_512:
+ case Intrinsic::x86_avx_vpermilvar_pd:
+ case Intrinsic::x86_avx_vpermilvar_pd_256:
+ case Intrinsic::x86_avx512_vpermilvar_pd_512:
+ if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ case Intrinsic::x86_avx2_permd:
+ case Intrinsic::x86_avx2_permps:
+ case Intrinsic::x86_avx512_permvar_df_256:
+ case Intrinsic::x86_avx512_permvar_df_512:
+ case Intrinsic::x86_avx512_permvar_di_256:
+ case Intrinsic::x86_avx512_permvar_di_512:
+ case Intrinsic::x86_avx512_permvar_hi_128:
+ case Intrinsic::x86_avx512_permvar_hi_256:
+ case Intrinsic::x86_avx512_permvar_hi_512:
+ case Intrinsic::x86_avx512_permvar_qi_128:
+ case Intrinsic::x86_avx512_permvar_qi_256:
+ case Intrinsic::x86_avx512_permvar_qi_512:
+ case Intrinsic::x86_avx512_permvar_sf_512:
+ case Intrinsic::x86_avx512_permvar_si_512:
+ if (Value *V = simplifyX86vpermv(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ case Intrinsic::x86_avx_maskload_ps:
+ case Intrinsic::x86_avx_maskload_pd:
+ case Intrinsic::x86_avx_maskload_ps_256:
+ case Intrinsic::x86_avx_maskload_pd_256:
+ case Intrinsic::x86_avx2_maskload_d:
+ case Intrinsic::x86_avx2_maskload_q:
+ case Intrinsic::x86_avx2_maskload_d_256:
+ case Intrinsic::x86_avx2_maskload_q_256:
+ if (Instruction *I = simplifyX86MaskedLoad(II, IC)) {
+ return I;
+ }
+ break;
+
+ case Intrinsic::x86_sse2_maskmov_dqu:
+ case Intrinsic::x86_avx_maskstore_ps:
+ case Intrinsic::x86_avx_maskstore_pd:
+ case Intrinsic::x86_avx_maskstore_ps_256:
+ case Intrinsic::x86_avx_maskstore_pd_256:
+ case Intrinsic::x86_avx2_maskstore_d:
+ case Intrinsic::x86_avx2_maskstore_q:
+ case Intrinsic::x86_avx2_maskstore_d_256:
+ case Intrinsic::x86_avx2_maskstore_q_256:
+ if (simplifyX86MaskedStore(II, IC)) {
+ return nullptr;
+ }
+ break;
+
+ case Intrinsic::x86_addcarry_32:
+ case Intrinsic::x86_addcarry_64:
+ if (Value *V = simplifyX86addcarry(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+
+ default:
+ break;
+ }
+ return None;
+}
+
+Optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) const {
+ switch (II.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_sse2_pmovmskb_128:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_avx2_pmovmskb: {
+ // MOVMSK copies the vector elements' sign bits to the low bits
+ // and zeros the high bits.
+ unsigned ArgWidth;
+ if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
+ ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
+ } else {
+ auto Arg = II.getArgOperand(0);
+ auto ArgType = cast<FixedVectorType>(Arg->getType());
+ ArgWidth = ArgType->getNumElements();
+ }
+
+ // If we don't need any of low bits then return zero,
+ // we know that DemandedMask is non-zero already.
+ APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
+ Type *VTy = II.getType();
+ if (DemandedElts.isNullValue()) {
+ return ConstantInt::getNullValue(VTy);
+ }
+
+ // We know that the upper bits are set to zero.
+ Known.Zero.setBitsFrom(ArgWidth);
+ KnownBitsComputed = true;
+ break;
+ }
+ }
+ return None;
+}
+
+Optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ simplifyAndSetOp) const {
+ unsigned VWidth = cast<FixedVectorType>(II.getType())->getNumElements();
+ switch (II.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::x86_xop_vfrcz_ss:
+ case Intrinsic::x86_xop_vfrcz_sd:
+ // The instructions for these intrinsics are speced to zero upper bits not
+ // pass them through like other scalar intrinsics. So we shouldn't just
+ // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
+ // Instead we should return a zero vector.
+ if (!DemandedElts[0]) {
+ IC.addToWorklist(&II);
+ return ConstantAggregateZero::get(II.getType());
+ }
+
+ // Only the lower element is used.
+ DemandedElts = 1;
+ simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
+
+ // Only the lower element is undefined. The high elements are zero.
+ UndefElts = UndefElts[0];
+ break;
+
+ // Unary scalar-as-vector operations that work column-wise.
+ case Intrinsic::x86_sse_rcp_ss:
+ case Intrinsic::x86_sse_rsqrt_ss:
+ simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (!DemandedElts[0]) {
+ IC.addToWorklist(&II);
+ return II.getArgOperand(0);
+ }
+ // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
+ // checks).
+ break;
+
+ // Binary scalar-as-vector operations that work column-wise. The high
+ // elements come from operand 0. The low element is a function of both
+ // operands.
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse2_min_sd:
+ case Intrinsic::x86_sse2_max_sd:
+ case Intrinsic::x86_sse2_cmp_sd: {
+ simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (!DemandedElts[0]) {
+ IC.addToWorklist(&II);
+ return II.getArgOperand(0);
+ }
+
+ // Only lower element is used for operand 1.
+ DemandedElts = 1;
+ simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
+
+ // Lower element is undefined if both lower elements are undefined.
+ // Consider things like undef&0. The result is known zero, not undef.
+ if (!UndefElts2[0])
+ UndefElts.clearBit(0);
+
+ break;
+ }
+
+ // Binary scalar-as-vector operations that work column-wise. The high
+ // elements come from operand 0 and the low element comes from operand 1.
+ case Intrinsic::x86_sse41_round_ss:
+ case Intrinsic::x86_sse41_round_sd: {
+ // Don't use the low element of operand 0.
+ APInt DemandedElts2 = DemandedElts;
+ DemandedElts2.clearBit(0);
+ simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (!DemandedElts[0]) {
+ IC.addToWorklist(&II);
+ return II.getArgOperand(0);
+ }
+
+ // Only lower element is used for operand 1.
+ DemandedElts = 1;
+ simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
+
+ // Take the high undef elements from operand 0 and take the lower element
+ // from operand 1.
+ UndefElts.clearBit(0);
+ UndefElts |= UndefElts2[0];
+ break;
+ }
+
+ // Three input scalar-as-vector operations that work column-wise. The high
+ // elements come from operand 0 and the low element is a function of all
+ // three inputs.
+ case Intrinsic::x86_avx512_mask_add_ss_round:
+ case Intrinsic::x86_avx512_mask_div_ss_round:
+ case Intrinsic::x86_avx512_mask_mul_ss_round:
+ case Intrinsic::x86_avx512_mask_sub_ss_round:
+ case Intrinsic::x86_avx512_mask_max_ss_round:
+ case Intrinsic::x86_avx512_mask_min_ss_round:
+ case Intrinsic::x86_avx512_mask_add_sd_round:
+ case Intrinsic::x86_avx512_mask_div_sd_round:
+ case Intrinsic::x86_avx512_mask_mul_sd_round:
+ case Intrinsic::x86_avx512_mask_sub_sd_round:
+ case Intrinsic::x86_avx512_mask_max_sd_round:
+ case Intrinsic::x86_avx512_mask_min_sd_round:
+ simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (!DemandedElts[0]) {
+ IC.addToWorklist(&II);
+ return II.getArgOperand(0);
+ }
+
+ // Only lower element is used for operand 1 and 2.
+ DemandedElts = 1;
+ simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
+ simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
+
+ // Lower element is undefined if all three lower elements are undefined.
+ // Consider things like undef&0. The result is known zero, not undef.
+ if (!UndefElts2[0] || !UndefElts3[0])
+ UndefElts.clearBit(0);
+ break;
+
+ // TODO: Add fmaddsub support?
+ case Intrinsic::x86_sse3_addsub_pd:
+ case Intrinsic::x86_sse3_addsub_ps:
+ case Intrinsic::x86_avx_addsub_pd_256:
+ case Intrinsic::x86_avx_addsub_ps_256: {
+ // If none of the even or none of the odd lanes are required, turn this
+ // into a generic FP math instruction.
+ APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1));
+ APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2));
+ bool IsSubOnly = DemandedElts.isSubsetOf(SubMask);
+ bool IsAddOnly = DemandedElts.isSubsetOf(AddMask);
+ if (IsSubOnly || IsAddOnly) {
+ assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only");
+ IRBuilderBase::InsertPointGuard Guard(IC.Builder);
+ IC.Builder.SetInsertPoint(&II);
+ Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1);
+ return IC.Builder.CreateBinOp(
+ IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
+ }
+
+ simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
+ simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
+ UndefElts &= UndefElts2;
+ break;
+ }
+
+ case Intrinsic::x86_sse2_packssdw_128:
+ case Intrinsic::x86_sse2_packsswb_128:
+ case Intrinsic::x86_sse2_packuswb_128:
+ case Intrinsic::x86_sse41_packusdw:
+ case Intrinsic::x86_avx2_packssdw:
+ case Intrinsic::x86_avx2_packsswb:
+ case Intrinsic::x86_avx2_packusdw:
+ case Intrinsic::x86_avx2_packuswb:
+ case Intrinsic::x86_avx512_packssdw_512:
+ case Intrinsic::x86_avx512_packsswb_512:
+ case Intrinsic::x86_avx512_packusdw_512:
+ case Intrinsic::x86_avx512_packuswb_512: {
+ auto *Ty0 = II.getArgOperand(0)->getType();
+ unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
+ assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
+
+ unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
+ unsigned VWidthPerLane = VWidth / NumLanes;
+ unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
+
+ // Per lane, pack the elements of the first input and then the second.
+ // e.g.
+ // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
+ // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
+ for (int OpNum = 0; OpNum != 2; ++OpNum) {
+ APInt OpDemandedElts(InnerVWidth, 0);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ unsigned LaneIdx = Lane * VWidthPerLane;
+ for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
+ unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
+ if (DemandedElts[Idx])
+ OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
+ }
+ }
+
+ // Demand elements from the operand.
+ APInt OpUndefElts(InnerVWidth, 0);
+ simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
+
+ // Pack the operand's UNDEF elements, one lane at a time.
+ OpUndefElts = OpUndefElts.zext(VWidth);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
+ LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
+ LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
+ UndefElts |= LaneElts;
+ }
+ }
+ break;
+ }
+
+ // PSHUFB
+ case Intrinsic::x86_ssse3_pshuf_b_128:
+ case Intrinsic::x86_avx2_pshuf_b:
+ case Intrinsic::x86_avx512_pshuf_b_512:
+ // PERMILVAR
+ case Intrinsic::x86_avx_vpermilvar_ps:
+ case Intrinsic::x86_avx_vpermilvar_ps_256:
+ case Intrinsic::x86_avx512_vpermilvar_ps_512:
+ case Intrinsic::x86_avx_vpermilvar_pd:
+ case Intrinsic::x86_avx_vpermilvar_pd_256:
+ case Intrinsic::x86_avx512_vpermilvar_pd_512:
+ // PERMV
+ case Intrinsic::x86_avx2_permd:
+ case Intrinsic::x86_avx2_permps: {
+ simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
+ break;
+ }
+
+ // SSE4A instructions leave the upper 64-bits of the 128-bit result
+ // in an undefined state.
+ case Intrinsic::x86_sse4a_extrq:
+ case Intrinsic::x86_sse4a_extrqi:
+ case Intrinsic::x86_sse4a_insertq:
+ case Intrinsic::x86_sse4a_insertqi:
+ UndefElts.setHighBits(VWidth / 2);
+ break;
+ }
+ return None;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
index e26dd5050a23..e4f3290cab9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
@@ -16,17 +16,21 @@
let Predicates = [HasAMXTILE, In64BitMode] in {
let SchedRW = [WriteSystem] in {
- let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
+ let hasSideEffects = 1,
+ Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
"ldtilecfg\t$src",
[(int_x86_ldtilecfg addr:$src)]>, VEX, T8PS;
+ let hasSideEffects = 1 in
def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
"sttilecfg\t$src",
[(int_x86_sttilecfg addr:$src)]>, VEX, T8PD;
+ let mayLoad = 1 in
def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src),
"tileloadd\t{$src, $dst|$dst, $src}", []>,
VEX, T8XD;
+ let mayLoad = 1 in
def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src),
"tileloaddt1\t{$src, $dst|$dst, $src}", []>,
@@ -34,6 +38,7 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
"tilerelease", [(int_x86_tilerelease)]>, VEX, T8PS;
+ let mayStore = 1 in
def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs),
(ins sibmem:$dst, TILE:$src),
"tilestored\t{$src, $dst|$dst, $src}", []>,
@@ -42,6 +47,25 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
"tilezero\t$dst", []>,
VEX, T8XD;
+ // Pseduo instruction for RA.
+ let hasSideEffects = 1, mayLoad = 1,
+ Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
+ def PLDTILECFG : PseudoI <(outs TILECFG:$cfg), (ins opaquemem:$src), []>;
+
+ let hasSideEffects = 1, mayStore = 1 in
+ def PSTTILECFG : PseudoI<(outs), (ins opaquemem:$dst, TILECFG:$cfg), []>;
+
+ def PTILELOADDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
+ GR16:$src2,
+ opaquemem:$src3,
+ TILECFG:$cfg), []>;
+ def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
+ GR16:$src2, opaquemem:$src3,
+ TILE:$src4, TILECFG:$cfg), []>;
+ def PTILEZEROV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
+ GR16:$src2,
+ TILECFG:$cfg), []>;
+
let usesCustomInserter = 1 in {
// Pseudo instructions, using immediates instead of tile registers.
// To be translated to the actual instructions in X86ISelLowering.cpp
@@ -50,7 +74,7 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
sibmem:$src2), []>;
def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
- [(int_x86_tilezero imm:$src)]>;
+ [(int_x86_tilezero timm:$src)]>;
}
} // SchedRW
} // HasAMXTILE
@@ -76,25 +100,31 @@ let Predicates = [HasAMXINT8, In64BitMode] in {
VEX_4V, T8PS;
}
+ // Pseduo instruction for RA.
+ let Constraints = "$src4 = $dst" in
+ def PTDPBSSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
+ GR16:$src2, GR16:$src3, TILE:$src4,
+ TILE:$src5, TILE:$src6, TILECFG:$cfg), []>;
+
let usesCustomInserter = 1 in {
// Pseudo instructions, using immediates instead of tile registers.
// To be translated to the actual instructions in X86ISelLowering.cpp
def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
- [(int_x86_tdpbssd imm:$src1,
- imm:$src2, imm:$src3)]>;
+ [(int_x86_tdpbssd timm:$src1,
+ timm:$src2, timm:$src3)]>;
def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
- [(int_x86_tdpbsud imm:$src1,
- imm:$src2, imm:$src3)]>;
+ [(int_x86_tdpbsud timm:$src1,
+ timm:$src2, timm:$src3)]>;
def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
- [(int_x86_tdpbusd imm:$src1,
- imm:$src2, imm:$src3)]>;
+ [(int_x86_tdpbusd timm:$src1,
+ timm:$src2, timm:$src3)]>;
def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
- [(int_x86_tdpbuud imm:$src1,
- imm:$src2, imm:$src3)]>;
+ [(int_x86_tdpbuud timm:$src1,
+ timm:$src2, timm:$src3)]>;
}
}
} // HasAMXTILE
@@ -112,8 +142,8 @@ let Predicates = [HasAMXBF16, In64BitMode] in {
// To be translated to the actual instructions in X86ISelLowering.cpp
def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
u8imm:$src2, u8imm:$src3),
- [(int_x86_tdpbf16ps imm:$src1,
- imm:$src2, imm:$src3)]>;
+ [(int_x86_tdpbf16ps timm:$src1,
+ timm:$src2, timm:$src3)]>;
}
}
} // HasAMXTILE, HasAMXBF16
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index a3ad0b1c8dd6..19012797ae9a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1123,10 +1123,10 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
EXTRACT_get_vextract256_imm, [HasAVX512]>;
// vextractps - extract 32 bits from XMM
-def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
+def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
(ins VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
+ [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
@@ -1414,11 +1414,12 @@ defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
- X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
+ SDPatternOperator OpNode,
+ X86VectorVTInfo _Dst,
+ X86VectorVTInfo _Src> {
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
- (_Dst.VT (X86SubVBroadcast
- (_Src.VT (_Src.LdFrag addr:$src))))>,
+ (_Dst.VT (OpNode addr:$src))>,
Sched<[SchedWriteShuffle.YMM.Folded]>,
AVX5128IBase, EVEX;
}
@@ -1427,13 +1428,14 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
// the unmasked patterns so that we only use the DQ instructions when masking
// is requested.
multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
- X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
+ SDPatternOperator OpNode,
+ X86VectorVTInfo _Dst,
+ X86VectorVTInfo _Src> {
let hasSideEffects = 0, mayLoad = 1 in
defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(null_frag),
- (_Dst.VT (X86SubVBroadcast
- (_Src.VT (_Src.LdFrag addr:$src))))>,
+ (_Dst.VT (OpNode addr:$src))>,
Sched<[SchedWriteShuffle.YMM.Folded]>,
AVX5128IBase, EVEX;
}
@@ -1443,225 +1445,194 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
//
defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
- v16i32_info, v4i32x_info>,
+ X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT4>;
defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
- v16f32_info, v4f32x_info>,
+ X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT4>;
defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
- v8i64_info, v4i64x_info>, VEX_W,
+ X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT4>;
defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
- v8f64_info, v4f64x_info>, VEX_W,
+ X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT4>;
let Predicates = [HasAVX512] in {
-def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
+def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTF64X4rm addr:$src)>;
-def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
+def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
+ (VBROADCASTF64X4rm addr:$src)>;
+def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
+ (VBROADCASTI64X4rm addr:$src)>;
+def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTI64X4rm addr:$src)>;
-def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
+def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTI64X4rm addr:$src)>;
-def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
+def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTI64X4rm addr:$src)>;
-// Provide fallback in case the load node that is used in the patterns above
-// is used by additional users, which prevents the pattern selection.
-def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
- (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v4f64 VR256X:$src), 1)>;
-def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
- (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v8f32 VR256X:$src), 1)>;
-def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
- (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v4i64 VR256X:$src), 1)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
- (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v8i32 VR256X:$src), 1)>;
-def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
- (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v16i16 VR256X:$src), 1)>;
-def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
- (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v32i8 VR256X:$src), 1)>;
-
-def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4rm addr:$src)>;
-def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF32X4rm addr:$src)>;
+def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTI32X4rm addr:$src)>;
+def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
+def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
+def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4rm addr:$src)>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect_mask VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
(v16f32 immAllZerosV)),
(VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
(VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
(v16i32 immAllZerosV)),
(VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
(VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
+ (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
(v8f64 immAllZerosV)),
(VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
+ (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
(VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
(v8i64 immAllZerosV)),
(VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
(VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
let Predicates = [HasVLX] in {
defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
- v8i32x_info, v4i32x_info>,
+ X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VT4>;
defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
- v8f32x_info, v4f32x_info>,
+ X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VT4>;
-def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4Z256rm addr:$src)>;
-def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF32X4Z256rm addr:$src)>;
+def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTI32X4Z256rm addr:$src)>;
+def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4Z256rm addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
+def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4Z256rm addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
+def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
(v8f32 immAllZerosV)),
(VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
VR256X:$src0),
(VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
(v8i32 immAllZerosV)),
(VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
VR256X:$src0),
(VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
-
-
-// Provide fallback in case the load node that is used in the patterns above
-// is used by additional users, which prevents the pattern selection.
-def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
- (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (v2f64 VR128X:$src), 1)>;
-def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
- (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (v4f32 VR128X:$src), 1)>;
-def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
- (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (v2i64 VR128X:$src), 1)>;
-def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
- (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (v4i32 VR128X:$src), 1)>;
-def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
- (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (v8i16 VR128X:$src), 1)>;
-def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
- (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (v16i8 VR128X:$src), 1)>;
}
let Predicates = [HasVLX, HasDQI] in {
defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
- v4i64x_info, v2i64x_info>, VEX_W1X,
+ X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
EVEX_V256, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
- v4f64x_info, v2f64x_info>, VEX_W1X,
+ X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
EVEX_V256, EVEX_CD8<64, CD8VT2>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect_mask VK4WM:$mask,
- (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
(v4f64 immAllZerosV)),
(VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK4WM:$mask,
- (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
VR256X:$src0),
(VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
(v4i64 immAllZerosV)),
(VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
VR256X:$src0),
(VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
}
let Predicates = [HasDQI] in {
defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
- v8i64_info, v2i64x_info>, VEX_W,
+ X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
- v16i32_info, v8i32x_info>,
+ X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
- v8f64_info, v2f64x_info>, VEX_W,
+ X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
- v16f32_info, v8f32x_info>,
+ X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect_mask VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
+ (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
(v16f32 immAllZerosV)),
(VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
+ (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
(VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
+ (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
(v16i32 immAllZerosV)),
(VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
+ (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
(VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
(v8f64 immAllZerosV)),
(VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
(VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
(v8i64 immAllZerosV)),
(VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
(VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
@@ -2494,10 +2465,6 @@ def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
(X86cmpm node:$src1, node:$src2, node:$cc), [{
return N->hasOneUse();
}]>;
-def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
- (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
- return N->hasOneUse();
-}]>;
def X86cmpm_imm_commute : SDNodeXForm<timm, [{
uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
@@ -2564,19 +2531,71 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in {
(!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
_.RC:$src1, addr:$src2,
(X86cmpm_imm_commute timm:$cc))>;
+
+ // Patterns for mask intrinsics.
+ def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
+ (_.KVT immAllOnesV)),
+ (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
+
+ def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
+ _.RC:$src2, timm:$cc)>;
+
+ def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
+ (_.KVT immAllOnesV)),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
+
+ def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
+ _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
+ addr:$src2, timm:$cc)>;
+
+ def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
+ (_.KVT immAllOnesV)),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
+
+ def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
+ _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
+ addr:$src2, timm:$cc)>;
+
+ // Patterns for mask intrinsics with loads in other operand.
+ def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
+ (_.KVT immAllOnesV)),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
+ (X86cmpm_imm_commute timm:$cc))>;
+
+ def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
+ _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2,
+ (X86cmpm_imm_commute timm:$cc))>;
+
+ def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
+ (_.KVT immAllOnesV)),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
+ (X86cmpm_imm_commute timm:$cc))>;
+
+ def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
+ _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2,
+ (X86cmpm_imm_commute timm:$cc))>;
}
multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
// comparison code form (VCMP[EQ/LT/LE/...]
let Uses = [MXCSR] in
- defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
- (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $cc",
- (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
- (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- timm:$cc)>,
+ [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
+ [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
EVEX_B, Sched<[sched]>;
}
@@ -2836,6 +2855,8 @@ def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
+def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
@@ -2871,9 +2892,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
// Load/store kreg
let Predicates = [HasDQI] in {
- def : Pat<(store VK1:$src, addr:$dst),
- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
-
def : Pat<(v1i1 (load addr:$src)),
(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
def : Pat<(v2i1 (load addr:$src)),
@@ -2919,10 +2937,9 @@ let Predicates = [HasAVX512] in {
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
- (COPY_TO_REGCLASS
- (KMOVWkr (AND32ri8
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
- (i32 1))), VK16)>;
+ (KMOVWkr (AND32ri8
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
+ (i32 1)))>;
}
// Mask unary operation
@@ -6487,8 +6504,8 @@ multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512vl_f64_info, "PD">, VEX_W;
}
-defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd,
- X86Fmadd, X86FmaddRnd>;
+defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
+ fma, X86FmaddRnd>;
defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
@@ -6578,8 +6595,8 @@ multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512vl_f64_info, "PD">, VEX_W;
}
-defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd,
- X86Fmadd, X86FmaddRnd>;
+defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
+ fma, X86FmaddRnd>;
defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
@@ -6670,8 +6687,8 @@ multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512vl_f64_info, "PD">, VEX_W;
}
-defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd,
- X86Fmadd, X86FmaddRnd>;
+defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
+ fma, X86FmaddRnd>;
defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
@@ -6773,7 +6790,7 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
}
}
-defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
+defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
@@ -6981,7 +6998,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
}
}
-defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
+defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
"SS", X86Movss, v4f32x_info, fp32imm0>;
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
"SS", X86Movss, v4f32x_info, fp32imm0>;
@@ -6990,7 +7007,7 @@ defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMA
defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
"SS", X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
+defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
"SD", X86Movsd, v2f64x_info, fp64imm0>;
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
"SD", X86Movsd, v2f64x_info, fp64imm0>;
@@ -7523,7 +7540,7 @@ multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd,
X86FoldableSchedWrite sched,
X86VectorVTInfo _src, X86VectorVTInfo _dst> {
- let Predicates = [HasAVX512] in {
+ let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
@@ -7534,7 +7551,7 @@ multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeSAE,
X86FoldableSchedWrite sched,
X86VectorVTInfo _src, X86VectorVTInfo _dst> {
- let Predicates = [HasAVX512] in {
+ let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
EVEX_CD8<32, CD8VT1>, XS;
@@ -10433,39 +10450,6 @@ defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
-let Predicates = [HasAVX512] in {
-// Provide fallback in case the load node that is used in the broadcast
-// patterns above is used by additional users, which prevents the pattern
-// selection.
-def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
- (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- 0)>;
-def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
- (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- 0)>;
-
-def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
- (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- 0)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
- (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- 0)>;
-
-def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
- (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- 0)>;
-
-def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
- (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
- 0)>;
-}
-
multiclass avx512_valign<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
// NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
@@ -10895,7 +10879,7 @@ multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
def mr : AVX512Ii8<opc, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
+ [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
addr:$dst)]>,
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
}
@@ -10906,7 +10890,7 @@ multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
- (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
+ (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
EVEX, TAPD, Sched<[WriteVecExtract]>;
defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
@@ -10919,7 +10903,7 @@ multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
- (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
+ (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
EVEX, PD, Sched<[WriteVecExtract]>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
@@ -10959,12 +10943,13 @@ defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _, PatFrag LdFrag> {
+ X86VectorVTInfo _, PatFrag LdFrag,
+ SDPatternOperator immoperator> {
def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
- (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
+ (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
@@ -10975,10 +10960,10 @@ multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
- (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
+ (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
Sched<[WriteVecInsert]>;
- defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
+ defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
}
}
@@ -10993,7 +10978,7 @@ multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
- _.ScalarLdFrag>, TAPD;
+ _.ScalarLdFrag, imm>, TAPD;
}
}
@@ -11205,17 +11190,6 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
- // Additional patterns for matching loads in other positions.
- def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
- (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (OpNode _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, (i8 timm:$src4))),
- (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
-
// Additional patterns for matching zero masking with loads in other
// positions.
def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
@@ -11264,17 +11238,6 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
- // Additional patterns for matching broadcasts in other positions.
- def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
- _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
- (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (OpNode _.RC:$src1,
- (_.BroadcastLdFrag addr:$src3),
- _.RC:$src2, (i8 timm:$src4))),
- (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
-
// Additional patterns for matching zero masking with broadcasts in other
// positions.
def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
@@ -11346,398 +11309,6 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
avx512vl_i64_info>, VEX_W;
-// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
-let Predicates = [HasVLX] in {
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
- timm:$src4)>;
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (loadv16i8 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
- VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
- timm:$src4)>;
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (loadv8i16 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
- VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
- timm:$src4)>;
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (loadv32i8 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
- VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
- timm:$src4)>;
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (loadv16i16 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
- VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-}
-
-let Predicates = [HasAVX512] in {
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
- timm:$src4)>;
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
- (loadv64i8 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
- VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v64i8 (X86vpternlog VR512:$src1,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v64i8 (X86vpternlog VR512:$src1,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
- (loadv32i16 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
- VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i32 (X86vpternlog VR512:$src1,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i64 (X86vpternlog VR512:$src1,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-}
-
// Patterns to implement vnot using vpternlog instead of creating all ones
// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
// so that the result is only dependent on src0. But we use the same source
@@ -12281,11 +11852,6 @@ defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul
defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
-def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
- (X86vpmaddwd node:$lhs, node:$rhs), [{
- return N->hasOneUse();
-}]>;
-
// Patterns to match VPDPWSSD from existing instructions/intrinsics.
let Predicates = [HasVNNI] in {
def : Pat<(v16i32 (add VR512:$src1,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
index f7f22285bd15..e83e1e74ff52 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1182,6 +1182,15 @@ defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
X86sub_flag, sub, 0, 1, 0>;
}
+// Version of XOR8rr_NOREX that use GR8_NOREX. This is used by the handling of
+// __builtin_parity where the last step xors an h-register with an l-register.
+let isCodeGenOnly = 1, hasSideEffects = 0, Constraints = "$src1 = $dst",
+ Defs = [EFLAGS], isCommutable = 1 in
+def XOR8rr_NOREX : I<0x30, MRMDestReg, (outs GR8_NOREX:$dst),
+ (ins GR8_NOREX:$src1, GR8_NOREX:$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}", []>,
+ Sched<[WriteALU]>;
+
// Arithmetic.
defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
1, 0>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
index 4df93fb2ed60..dc6361aecc60 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -73,25 +73,32 @@ let usesCustomInserter = 1, Defs = [EFLAGS] in {
def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
(outs),
(ins GR8:$al,
- i64imm:$regsavefi, i64imm:$offset,
+ i32imm:$regsavefi, i32imm:$offset,
variable_ops),
"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
[(X86vastart_save_xmm_regs GR8:$al,
- imm:$regsavefi,
- imm:$offset),
+ timm:$regsavefi,
+ timm:$offset),
(implicit EFLAGS)]>;
-// The VAARG_64 pseudo-instruction takes the address of the va_list,
-// and places the address of the next argument into a register.
-let Defs = [EFLAGS] in
+// The VAARG_64 and VAARG_X32 pseudo-instructions take the address of the
+// va_list, and place the address of the next argument into a register.
+let Defs = [EFLAGS] in {
def VAARG_64 : I<0, Pseudo,
(outs GR64:$dst),
(ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
"#VAARG_64 $dst, $ap, $size, $mode, $align",
[(set GR64:$dst,
- (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
- (implicit EFLAGS)]>;
-
+ (X86vaarg64 addr:$ap, timm:$size, timm:$mode, timm:$align)),
+ (implicit EFLAGS)]>, Requires<[In64BitMode, IsLP64]>;
+def VAARG_X32 : I<0, Pseudo,
+ (outs GR32:$dst),
+ (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+ "#VAARG_X32 $dst, $ap, $size, $mode, $align",
+ [(set GR32:$dst,
+ (X86vaargx32 addr:$ap, timm:$size, timm:$mode, timm:$align)),
+ (implicit EFLAGS)]>, Requires<[In64BitMode, NotLP64]>;
+}
// When using segmented stacks these are lowered into instructions which first
// check if the current stacklet has enough free memory. If it does, memory is
@@ -467,11 +474,19 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_addr64",
[(X86tlsaddr tls64addr:$sym)]>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode, IsLP64]>;
def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_base_addr64",
[(X86tlsbaseaddr tls64baseaddr:$sym)]>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode, IsLP64]>;
+def TLS_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_addrX32",
+ [(X86tlsaddr tls32addr:$sym)]>,
+ Requires<[In64BitMode, NotLP64]>;
+def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_base_addrX32",
+ [(X86tlsbaseaddr tls32baseaddr:$sym)]>,
+ Requires<[In64BitMode, NotLP64]>;
}
// Darwin TLS Support
@@ -809,15 +824,6 @@ let Predicates = [UseIncDec] in {
}
// Atomic compare and swap.
-multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
- SDPatternOperator frag, X86MemOperand x86memop> {
-let isCodeGenOnly = 1, usesCustomInserter = 1 in {
- def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
- !strconcat(mnemonic, "\t$ptr"),
- [(frag addr:$ptr)]>, TB, LOCK;
-}
-}
-
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag> {
let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
@@ -841,8 +847,19 @@ let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
- Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW] in {
-defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
+ Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW],
+ isCodeGenOnly = 1, usesCustomInserter = 1 in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
+ "cmpxchg8b\t$ptr",
+ [(X86cas8 addr:$ptr)]>, TB, LOCK;
+}
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
+ Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
+ isCodeGenOnly = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in {
+def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
+ "cmpxchg16b\t$ptr",
+ []>, TB, LOCK;
}
// This pseudo must be used when the frame uses RBX as
@@ -852,50 +869,64 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
// RBX that will happen when setting the arguments for the instrucion.
//
// Unlike the actual related instruction, we mark that this one
-// defines EBX (instead of using EBX).
+// defines RBX (instead of using RBX).
// The rationale is that we will define RBX during the expansion of
-// the pseudo. The argument feeding EBX is ebx_input.
+// the pseudo. The argument feeding RBX is rbx_input.
//
-// The additional argument, $ebx_save, is a temporary register used to
+// The additional argument, $rbx_save, is a temporary register used to
// save the value of RBX across the actual instruction.
//
-// To make sure the register assigned to $ebx_save does not interfere with
+// To make sure the register assigned to $rbx_save does not interfere with
// the definition of the actual instruction, we use a definition $dst which
// is tied to $rbx_save. That way, the live-range of $rbx_save spans across
// the instruction and we are sure we will have a valid register to restore
// the value of RBX.
-let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],
- Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW],
- isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst",
- usesCustomInserter = 1 in {
-def LCMPXCHG8B_SAVE_EBX :
- I<0, Pseudo, (outs GR32:$dst),
- (ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save),
- !strconcat("cmpxchg8b", "\t$ptr"),
- [(set GR32:$dst, (X86cas8save_ebx addr:$ptr, GR32:$ebx_input,
- GR32:$ebx_save))]>;
+let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
+ Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
+ isCodeGenOnly = 1, isPseudo = 1,
+ mayLoad = 1, mayStore = 1, hasSideEffects = 0,
+ Constraints = "$rbx_save = $dst" in {
+def LCMPXCHG16B_SAVE_RBX :
+ I<0, Pseudo, (outs GR64:$dst),
+ (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save), "", []>;
}
+// Pseudo instruction that doesn't read/write RBX. Will be turned into either
+// LCMPXCHG16B_SAVE_RBX or LCMPXCHG16B via a custom inserter.
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RCX, RDX],
+ Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
+ isCodeGenOnly = 1, isPseudo = 1,
+ mayLoad = 1, mayStore = 1, hasSideEffects = 0,
+ usesCustomInserter = 1 in {
+def LCMPXCHG16B_NO_RBX :
+ I<0, Pseudo, (outs), (ins i128mem:$ptr, GR64:$rbx_input), "",
+ [(X86cas16 addr:$ptr, GR64:$rbx_input)]>;
+}
-let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
- Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW] in {
-defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
- X86cas16, i128mem>, REX_W;
+// This pseudo must be used when the frame uses RBX/EBX as
+// the base pointer.
+// cf comment for LCMPXCHG16B_SAVE_RBX.
+let Defs = [EBX], Uses = [ECX, EAX],
+ Predicates = [HasMWAITX], SchedRW = [WriteSystem],
+ isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst" in {
+def MWAITX_SAVE_RBX :
+ I<0, Pseudo, (outs GR64:$dst),
+ (ins GR32:$ebx_input, GR64:$rbx_save),
+ "mwaitx",
+ []>;
}
-// Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant.
-let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
- Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
- isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",
+// Pseudo mwaitx instruction to use for custom insertion.
+let Predicates = [HasMWAITX], SchedRW = [WriteSystem],
+ isCodeGenOnly = 1, isPseudo = 1,
usesCustomInserter = 1 in {
-def LCMPXCHG16B_SAVE_RBX :
- I<0, Pseudo, (outs GR64:$dst),
- (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save),
- !strconcat("cmpxchg16b", "\t$ptr"),
- [(set GR64:$dst, (X86cas16save_rbx addr:$ptr, GR64:$rbx_input,
- GR64:$rbx_save))]>;
+def MWAITX :
+ I<0, Pseudo, (outs), (ins GR32:$ecx, GR32:$eax, GR32:$ebx),
+ "mwaitx",
+ [(int_x86_mwaitx GR32:$ecx, GR32:$eax, GR32:$ebx)]>;
}
+
defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", X86cas>;
// Atomic exchange and add
@@ -1182,49 +1213,49 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
return true;
}]>;
-def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
+ (TCRETURNri ptr_rc_tailcall:$dst, timm:$off)>,
Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
- (TCRETURNmi addr:$dst, imm:$off)>,
+def : Pat<(X86tcret (load addr:$dst), timm:$off),
+ (TCRETURNmi addr:$dst, timm:$off)>,
Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;
-def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
- (TCRETURNdi tglobaladdr:$dst, imm:$off)>,
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), timm:$off),
+ (TCRETURNdi tglobaladdr:$dst, timm:$off)>,
Requires<[NotLP64]>;
-def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
- (TCRETURNdi texternalsym:$dst, imm:$off)>,
+def : Pat<(X86tcret (i32 texternalsym:$dst), timm:$off),
+ (TCRETURNdi texternalsym:$dst, timm:$off)>,
Requires<[NotLP64]>;
-def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
+ (TCRETURNri64 ptr_rc_tailcall:$dst, timm:$off)>,
Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
-def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
- (TCRETURNmi64 addr:$dst, imm:$off)>,
+def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
+ (TCRETURNmi64 addr:$dst, timm:$off)>,
Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
-def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
+ (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>,
Requires<[In64BitMode, UseIndirectThunkCalls]>;
-def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
+ (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, timm:$off)>,
Requires<[Not64BitMode, UseIndirectThunkCalls]>;
-def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
- (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), timm:$off),
+ (TCRETURNdi64 tglobaladdr:$dst, timm:$off)>,
Requires<[IsLP64]>;
-def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
- (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+def : Pat<(X86tcret (i64 texternalsym:$dst), timm:$off),
+ (TCRETURNdi64 texternalsym:$dst, timm:$off)>,
Requires<[IsLP64]>;
// Normal calls, with various flavors of addresses.
@@ -1313,15 +1344,18 @@ def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>;
// Any instruction that defines a 32-bit result leaves the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. Any other 32-bit operation will zero-extend
-// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
-// 32 bits, they're probably just qualifying a CopyFromReg.
+// be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying
+// anything about the upper 32 bits, they're probably just qualifying a
+// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit
+// operation will zero-extend up to 64 bits.
def def32 : PatLeaf<(i32 GR32:$src), [{
return N->getOpcode() != ISD::TRUNCATE &&
N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
N->getOpcode() != ISD::CopyFromReg &&
N->getOpcode() != ISD::AssertSext &&
- N->getOpcode() != ISD::AssertZext;
+ N->getOpcode() != ISD::AssertZext &&
+ N->getOpcode() != ISD::AssertAlign &&
+ N->getOpcode() != ISD::FREEZE;
}]>;
// In the case of a 32-bit def that is known to implicitly zero-extend,
@@ -1698,6 +1732,16 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
(EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>,
Requires<[In64BitMode]>;
+// Special pattern to catch the last step of __builtin_parity handling. Our
+// goal is to use an xor of an h-register with the corresponding l-register.
+// The above patterns would handle this on non 64-bit targets, but for 64-bit
+// we need to be more careful. We're using a NOREX instruction here in case
+// register allocation fails to keep the two registers together. So we need to
+// make sure we can't accidentally mix R8-R15 with an h-register.
+def : Pat<(X86xor_flag (i8 (trunc GR32:$src)),
+ (i8 (trunc (srl_su GR32:$src, (i8 8))))),
+ (XOR8rr_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit),
+ (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
// (shl x, 1) ==> (add x, x)
// Note that if x is undef (immediate or otherwise), we could theoretically
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td
index 4dbd6bb8cd7e..f9be3a783279 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td
@@ -123,7 +123,7 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
- loadv4f32, loadv8f32, X86any_Fmadd, v4f32, v8f32,
+ loadv4f32, loadv8f32, any_fma, v4f32, v8f32,
SchedWriteFMA>;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
loadv4f32, loadv8f32, X86any_Fmsub, v4f32, v8f32,
@@ -138,7 +138,7 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
- loadv2f64, loadv4f64, X86any_Fmadd, v2f64,
+ loadv2f64, loadv4f64, any_fma, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
loadv2f64, loadv4f64, X86any_Fmsub, v2f64,
@@ -319,7 +319,7 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
VR128, sdmem, sched>, VEX_W;
}
-defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86any_Fmadd,
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", any_fma,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86any_Fmsub,
SchedWriteFMA.Scl>, VEX_LIG;
@@ -372,12 +372,12 @@ multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
}
}
-defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<any_fma, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86any_Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86any_Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86any_Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
-defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<any_fma, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86any_Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86any_Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86any_Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
@@ -538,7 +538,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
- defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86any_Fmadd, loadf32,
+ defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, any_fma, loadf32,
SchedWriteFMA.Scl>,
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
@@ -555,7 +555,7 @@ let ExeDomain = SSEPackedSingle in {
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
// Packed Instructions
- defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86any_Fmadd, v4f32, v8f32,
+ defm VFMADDPS4 : fma4p<0x68, "vfmaddps", any_fma, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86any_Fmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
@@ -571,7 +571,7 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
- defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86any_Fmadd, loadf64,
+ defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, any_fma, loadf64,
SchedWriteFMA.Scl>,
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
@@ -588,7 +588,7 @@ let ExeDomain = SSEPackedDouble in {
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
// Packed Instructions
- defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86any_Fmadd, v2f64, v4f64,
+ defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", any_fma, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86any_Fmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
@@ -629,12 +629,12 @@ multiclass scalar_fma4_patterns<SDNode Op, string Name,
}
}
-defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<any_fma, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<any_fma, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
index 67dcb8d00ea5..961b4e590365 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -392,13 +392,13 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW, FPCW], mayLoad = 1 in {
-def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
-def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">;
+def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src">;
+def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins anymem:$src), "frstor\t$src">;
}
let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW], mayStore = 1 in {
-def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">;
-def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">;
+def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst">;
+def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins anymem:$dst), "fnsave\t$dst">;
}
let Uses = [FPSW], mayStore = 1 in
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index e16382e956c5..17fe7f0bd310 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -300,11 +300,13 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
{ X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
{ X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
{ X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
+ { X86::MOV64toSDrr, X86::MOV64mr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
{ X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
{ X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
{ X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVDI2SSrr, X86::MOV32mr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE },
{ X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
@@ -357,6 +359,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
{ X86::VEXTRACTI64x4Zrr, X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE },
{ X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE },
{ X86::VEXTRACTPSrr, X86::VEXTRACTPSmr, TB_FOLDED_STORE },
+ { X86::VMOV64toSDZrr, X86::MOV64mr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VMOV64toSDrr, X86::MOV64mr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -367,6 +371,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
{ X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
{ X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDI2SSZrr, X86::MOV32mr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VMOVDI2SSrr, X86::MOV32mr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
@@ -3742,18 +3748,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 },
{ X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 },
{ X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 },
+ { X86::VPDPBUSDSYrr, X86::VPDPBUSDSYrm, 0 },
{ X86::VPDPBUSDSZ128r, X86::VPDPBUSDSZ128m, 0 },
{ X86::VPDPBUSDSZ256r, X86::VPDPBUSDSZ256m, 0 },
{ X86::VPDPBUSDSZr, X86::VPDPBUSDSZm, 0 },
+ { X86::VPDPBUSDSrr, X86::VPDPBUSDSrm, 0 },
+ { X86::VPDPBUSDYrr, X86::VPDPBUSDYrm, 0 },
{ X86::VPDPBUSDZ128r, X86::VPDPBUSDZ128m, 0 },
{ X86::VPDPBUSDZ256r, X86::VPDPBUSDZ256m, 0 },
{ X86::VPDPBUSDZr, X86::VPDPBUSDZm, 0 },
+ { X86::VPDPBUSDrr, X86::VPDPBUSDrm, 0 },
+ { X86::VPDPWSSDSYrr, X86::VPDPWSSDSYrm, 0 },
{ X86::VPDPWSSDSZ128r, X86::VPDPWSSDSZ128m, 0 },
{ X86::VPDPWSSDSZ256r, X86::VPDPWSSDSZ256m, 0 },
{ X86::VPDPWSSDSZr, X86::VPDPWSSDSZm, 0 },
+ { X86::VPDPWSSDSrr, X86::VPDPWSSDSrm, 0 },
+ { X86::VPDPWSSDYrr, X86::VPDPWSSDYrm, 0 },
{ X86::VPDPWSSDZ128r, X86::VPDPWSSDZ128m, 0 },
{ X86::VPDPWSSDZ256r, X86::VPDPWSSDZ256m, 0 },
{ X86::VPDPWSSDZr, X86::VPDPWSSDZm, 0 },
+ { X86::VPDPWSSDrr, X86::VPDPWSSDrm, 0 },
{ X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
{ X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
{ X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
index d7752e656b55..686b19fc0a6c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
@@ -216,6 +216,7 @@ class T8XS : T8 { Prefix OpPrefix = XS; }
class TAPS : TA { Prefix OpPrefix = PS; }
class TAPD : TA { Prefix OpPrefix = PD; }
class TAXD : TA { Prefix OpPrefix = XD; }
+class TAXS : TA { Prefix OpPrefix = XS; }
class VEX { Encoding OpEnc = EncVEX; }
class VEX_W { bit HasVEX_W = 1; }
class VEX_WIG { bit IgnoresVEX_W = 1; }
@@ -263,6 +264,9 @@ class NotMemoryFoldable { bit isMemoryFoldable = 0; }
// Prevent EVEX->VEX conversion from considering this instruction.
class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
+// Force the instruction to use VEX encoding.
+class ExplicitVEXPrefix { bit ExplicitVEXPrefix = 1; }
+
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
: Instruction {
@@ -347,6 +351,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit isMemoryFoldable = 1; // Is it allowed to memory fold/unfold this instruction?
bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
+ bit ExplicitVEXPrefix = 0; // Force the instruction to use VEX encoding.
// TSFlags layout should be kept in sync with X86BaseInfo.h.
let TSFlags{6-0} = FormBits;
@@ -375,6 +380,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{51-45} = CD8_Scale;
let TSFlags{52} = hasEVEX_RC;
let TSFlags{53} = hasNoTrackPrefix;
+ let TSFlags{54} = ExplicitVEXPrefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index f3f7d17d9b3c..777c5a158b4c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -87,16 +87,16 @@ def X86multishift : SDNode<"X86ISD::MULTISHIFT",
SDTCisSameAs<1,2>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>,
- SDTCisPtrTy<2>]>>;
+ SDTCisVT<2, i8>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v8i16>,
- SDTCisPtrTy<2>]>>;
+ SDTCisVT<2, i8>]>>;
def X86pinsrb : SDNode<"X86ISD::PINSRB",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+ SDTCisVT<2, i32>, SDTCisVT<3, i8>]>>;
def X86pinsrw : SDNode<"X86ISD::PINSRW",
SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+ SDTCisVT<2, i32>, SDTCisVT<3, i8>]>>;
def X86insertps : SDNode<"X86ISD::INSERTPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>;
@@ -109,6 +109,8 @@ def X86vextractst : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86VBroadcastld : SDNode<"X86ISD::VBROADCAST_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86SubVBroadcastld : SDNode<"X86ISD::SUBV_BROADCAST_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
@@ -207,16 +209,21 @@ def X86CmpMaskCC :
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
+def X86MaskCmpMaskCC :
+ SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
+ SDTCisVec<1>, SDTCisSameAs<2, 1>,
+ SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>, SDTCisSameAs<4, 0>]>;
def X86CmpMaskCCScalar :
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+def X86cmpmm : SDNode<"X86ISD::CMPMM", X86MaskCmpMaskCC>;
def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>;
def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_cmpm node:$src1, node:$src2, node:$src3),
(X86cmpm node:$src1, node:$src2, node:$src3)]>;
-def X86cmpmSAE : SDNode<"X86ISD::CMPM_SAE", X86CmpMaskCC>;
+def X86cmpmmSAE : SDNode<"X86ISD::CMPMM_SAE", X86MaskCmpMaskCC>;
def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>;
def X86cmpmsSAE : SDNode<"X86ISD::FSETCCM_SAE", X86CmpMaskCCScalar>;
@@ -488,10 +495,6 @@ def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS",
SDTypeProfile<1, 2, [SDTCisVT<0, v1i1>,
SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>;
-def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisSubVecOfVec<1, 0>]>, []>;
-
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
@@ -533,11 +536,6 @@ def X86fgetexpSAE : SDNode<"X86ISD::FGETEXP_SAE", SDTFPUnaryOp>;
def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>;
def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>;
-def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86strict_Fmadd : SDNode<"ISD::STRICT_FMA", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
-def X86any_Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
- [(X86strict_Fmadd node:$src1, node:$src2, node:$src3),
- (X86Fmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
@@ -963,6 +961,16 @@ def X86VBroadcastld64 : PatFrag<(ops node:$src),
return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
}]>;
+def X86SubVBroadcastld128 : PatFrag<(ops node:$src),
+ (X86SubVBroadcastld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 16;
+}]>;
+
+def X86SubVBroadcastld256 : PatFrag<(ops node:$src),
+ (X86SubVBroadcastld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 32;
+}]>;
+
// Scalar SSE intrinsic fragments to match several different types of loads.
// Used by scalar SSE intrinsic instructions which have 128 bit types, but
// only load a single element.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
index 42c111173570..d9bab14f0c08 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -28,9 +28,9 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -947,9 +947,9 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
}
/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
-static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) {
// Don't waste compile time scanning use-def chains of physregs.
- if (!Register::isVirtualRegister(BaseReg))
+ if (!BaseReg.isVirtual())
return false;
bool isPICBase = false;
for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg),
@@ -1127,7 +1127,8 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
- if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
+ if (ClobbersEFLAGS && MBB.computeRegisterLiveness(&TRI, X86::EFLAGS, I) !=
+ MachineBasicBlock::LQR_Dead) {
// The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
// effects.
int Value;
@@ -1205,8 +1206,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
isKill = Src.isKill();
assert(!Src.isUndef() && "Undef op doesn't need optimization");
- if (Register::isVirtualRegister(NewSrc) &&
- !MF.getRegInfo().constrainRegClass(NewSrc, RC))
+ if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC))
return false;
return true;
@@ -1214,7 +1214,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
// This is for an LEA64_32r and incoming registers are 32-bit. One way or
// another we need to add 64-bit registers to the final MI.
- if (Register::isPhysicalRegister(SrcReg)) {
+ if (SrcReg.isPhysical()) {
ImplicitOp = Src;
ImplicitOp.setImplicit();
@@ -1409,9 +1409,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
// LEA can't handle RSP.
- if (Register::isVirtualRegister(Src.getReg()) &&
- !MF.getRegInfo().constrainRegClass(Src.getReg(),
- &X86::GR64_NOSPRegClass))
+ if (Src.getReg().isVirtual() && !MF.getRegInfo().constrainRegClass(
+ Src.getReg(), &X86::GR64_NOSPRegClass))
return nullptr;
NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
@@ -2567,6 +2566,10 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPTERNLOGQZ256rmbikz:
case X86::VPTERNLOGQZrmbikz:
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+ case X86::VPDPWSSDYrr:
+ case X86::VPDPWSSDrr:
+ case X86::VPDPWSSDSYrr:
+ case X86::VPDPWSSDSrr:
case X86::VPDPWSSDZ128r:
case X86::VPDPWSSDZ128rk:
case X86::VPDPWSSDZ128rkz:
@@ -3527,11 +3530,10 @@ X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
return None;
}
-static unsigned getLoadStoreRegOpcode(unsigned Reg,
+static unsigned getLoadStoreRegOpcode(Register Reg,
const TargetRegisterClass *RC,
- bool isStackAligned,
- const X86Subtarget &STI,
- bool load) {
+ bool IsStackAligned,
+ const X86Subtarget &STI, bool load) {
bool HasAVX = STI.hasAVX();
bool HasAVX512 = STI.hasAVX512();
bool HasVLX = STI.hasVLX();
@@ -3604,7 +3606,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
case 16: {
if (X86::VR128XRegClass.hasSubClassEq(RC)) {
// If stack is realigned we can use aligned stores.
- if (isStackAligned)
+ if (IsStackAligned)
return load ?
(HasVLX ? X86::VMOVAPSZ128rm :
HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX :
@@ -3636,7 +3638,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
case 32:
assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
// If stack is realigned we can use aligned stores.
- if (isStackAligned)
+ if (IsStackAligned)
return load ?
(HasVLX ? X86::VMOVAPSZ256rm :
HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX :
@@ -3655,13 +3657,80 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
case 64:
assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
assert(STI.hasAVX512() && "Using 512-bit register requires AVX512");
- if (isStackAligned)
+ if (IsStackAligned)
return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
else
return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
}
}
+Optional<ExtAddrMode>
+X86InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
+ const TargetRegisterInfo *TRI) const {
+ const MCInstrDesc &Desc = MemI.getDesc();
+ int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
+ if (MemRefBegin < 0)
+ return None;
+
+ MemRefBegin += X86II::getOperandBias(Desc);
+
+ auto &BaseOp = MemI.getOperand(MemRefBegin + X86::AddrBaseReg);
+ if (!BaseOp.isReg()) // Can be an MO_FrameIndex
+ return None;
+
+ const MachineOperand &DispMO = MemI.getOperand(MemRefBegin + X86::AddrDisp);
+ // Displacement can be symbolic
+ if (!DispMO.isImm())
+ return None;
+
+ ExtAddrMode AM;
+ AM.BaseReg = BaseOp.getReg();
+ AM.ScaledReg = MemI.getOperand(MemRefBegin + X86::AddrIndexReg).getReg();
+ AM.Scale = MemI.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm();
+ AM.Displacement = DispMO.getImm();
+ return AM;
+}
+
+bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
+ const Register Reg,
+ int64_t &ImmVal) const {
+ if (MI.getOpcode() != X86::MOV32ri && MI.getOpcode() != X86::MOV64ri)
+ return false;
+ // Mov Src can be a global address.
+ if (!MI.getOperand(1).isImm() || MI.getOperand(0).getReg() != Reg)
+ return false;
+ ImmVal = MI.getOperand(1).getImm();
+ return true;
+}
+
+bool X86InstrInfo::preservesZeroValueInReg(
+ const MachineInstr *MI, const Register NullValueReg,
+ const TargetRegisterInfo *TRI) const {
+ if (!MI->modifiesRegister(NullValueReg, TRI))
+ return true;
+ switch (MI->getOpcode()) {
+ // Shift right/left of a null unto itself is still a null, i.e. rax = shl rax
+ // X.
+ case X86::SHR64ri:
+ case X86::SHR32ri:
+ case X86::SHL64ri:
+ case X86::SHL32ri:
+ assert(MI->getOperand(0).isDef() && MI->getOperand(1).isUse() &&
+ "expected for shift opcode!");
+ return MI->getOperand(0).getReg() == NullValueReg &&
+ MI->getOperand(1).getReg() == NullValueReg;
+ // Zero extend of a sub-reg of NullValueReg into itself does not change the
+ // null value.
+ case X86::MOV32rr:
+ return llvm::all_of(MI->operands(), [&](const MachineOperand &MO) {
+ return TRI->isSubRegisterEq(NullValueReg, MO.getReg());
+ });
+ default:
+ return false;
+ }
+ llvm_unreachable("Should be handled above!");
+}
+
bool X86InstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
@@ -3706,19 +3775,17 @@ bool X86InstrInfo::getMemOperandsWithOffsetWidth(
return true;
}
-static unsigned getStoreRegOpcode(unsigned SrcReg,
+static unsigned getStoreRegOpcode(Register SrcReg,
const TargetRegisterClass *RC,
- bool isStackAligned,
+ bool IsStackAligned,
const X86Subtarget &STI) {
- return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false);
+ return getLoadStoreRegOpcode(SrcReg, RC, IsStackAligned, STI, false);
}
-
-static unsigned getLoadRegOpcode(unsigned DestReg,
+static unsigned getLoadRegOpcode(Register DestReg,
const TargetRegisterClass *RC,
- bool isStackAligned,
- const X86Subtarget &STI) {
- return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true);
+ bool IsStackAligned, const X86Subtarget &STI) {
+ return getLoadStoreRegOpcode(DestReg, RC, IsStackAligned, STI, true);
}
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -3729,13 +3796,31 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const MachineFunction &MF = *MBB.getParent();
assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
"Stack slot too small for store");
- unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
- bool isAligned =
- (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
- RI.canRealignStack(MF);
- unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
- addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
- .addReg(SrcReg, getKillRegState(isKill));
+ if (RC->getID() == X86::TILERegClassID) {
+ unsigned Opc = X86::TILESTORED;
+ // tilestored %tmm, (%sp, %idx)
+ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+ Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+ BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
+ MachineInstr *NewMI =
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+ MachineOperand &MO = NewMI->getOperand(2);
+ MO.setReg(VirtReg);
+ MO.setIsKill(true);
+ } else if (RC->getID() == X86::TILECFGRegClassID) {
+ unsigned Opc = X86::PSTTILECFG;
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+ } else {
+ unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
+ bool isAligned =
+ (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+ }
}
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -3743,13 +3828,32 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- const MachineFunction &MF = *MBB.getParent();
- unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
- bool isAligned =
- (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
- RI.canRealignStack(MF);
- unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
- addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx);
+ if (RC->getID() == X86::TILERegClassID) {
+ unsigned Opc = X86::TILELOADD;
+ // tileloadd (%sp, %idx), %tmm
+ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+ Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+ MachineInstr *NewMI =
+ BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
+ NewMI = addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
+ FrameIdx);
+ MachineOperand &MO = NewMI->getOperand(3);
+ MO.setReg(VirtReg);
+ MO.setIsKill(true);
+ } else if (RC->getID() == X86::TILECFGRegClassID) {
+ unsigned Opc = X86::PLDTILECFG;
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
+ FrameIdx);
+ } else {
+ const MachineFunction &MF = *MBB.getParent();
+ unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
+ bool isAligned =
+ (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
+ FrameIdx);
+ }
}
bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
@@ -4312,7 +4416,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
/// instructions in-between do not load or store, and have no side effects.
MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
const MachineRegisterInfo *MRI,
- unsigned &FoldAsLoadDefReg,
+ Register &FoldAsLoadDefReg,
MachineInstr *&DefMI) const {
// Check whether we can move DefMI here.
DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
@@ -4375,8 +4479,8 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
/// %k4 = K_SET1
/// to:
/// %k4 = KXNORrr %k0, %k0
-static bool Expand2AddrKreg(MachineInstrBuilder &MIB,
- const MCInstrDesc &Desc, unsigned Reg) {
+static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc,
+ Register Reg) {
assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
MIB->setDesc(Desc);
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
@@ -4822,7 +4926,7 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
// If MI is marked as reading Reg, the partial register update is wanted.
const MachineOperand &MO = MI.getOperand(0);
Register Reg = MO.getReg();
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
if (MO.readsReg() || MI.readsVirtualRegister(Reg))
return 0;
} else {
@@ -5120,18 +5224,12 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
/// Like getPartialRegUpdateClearance, this makes a strong assumption that the
/// high bits that are passed-through are not live.
unsigned
-X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
+X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const {
- for (unsigned i = MI.getNumExplicitDefs(), e = MI.getNumExplicitOperands();
- i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && MO.isUndef() &&
- Register::isPhysicalRegister(MO.getReg()) &&
- hasUndefRegUpdate(MI.getOpcode(), i)) {
- OpNum = i;
- return UndefRegClearance;
- }
- }
+ const MachineOperand &MO = MI.getOperand(OpNum);
+ if (Register::isPhysicalRegister(MO.getReg()) &&
+ hasUndefRegUpdate(MI.getOpcode(), OpNum))
+ return UndefRegClearance;
return 0;
}
@@ -5213,7 +5311,7 @@ static void updateOperandRegConstraints(MachineFunction &MF,
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
auto *NewRC = MRI.constrainRegClass(
@@ -5464,6 +5562,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
if (I != nullptr) {
unsigned Opcode = I->DstOp;
+ bool FoldedLoad =
+ isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_LOAD) || OpNum > 0;
+ bool FoldedStore =
+ isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_STORE);
MaybeAlign MinAlign =
decodeMaybeAlign((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT);
if (MinAlign && Alignment < *MinAlign)
@@ -5474,20 +5576,25 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum,
&RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if (Size < RCSize) {
- // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
- // Check if it's safe to fold the load. If the size of the object is
- // narrower than the load width, then it's not.
- if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
- return nullptr;
+ // Check if it's safe to fold the load. If the size of the object is
+ // narrower than the load width, then it's not.
+ // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
+ if (FoldedLoad && Size < RCSize) {
// If this is a 64-bit load, but the spill slot is 32, then we can do
// a 32-bit load which is implicitly zero-extended. This likely is
// due to live interval analysis remat'ing a load from stack slot.
+ if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+ return nullptr;
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
return nullptr;
Opcode = X86::MOV32rm;
NarrowToMOV32rm = true;
}
+ // For stores, make sure the size of the object is equal to the size of
+ // the store. If the object is larger, the extra bits would be garbage. If
+ // the object is smaller we might overwrite another object or fault.
+ if (FoldedStore && Size != RCSize)
+ return nullptr;
}
if (isTwoAddrFold)
@@ -5500,7 +5607,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// value and zero-extend the top bits. Change the destination register
// to a 32-bit one.
Register DstReg = NewMI->getOperand(0).getReg();
- if (Register::isPhysicalRegister(DstReg))
+ if (DstReg.isPhysical())
NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
else
NewMI->getOperand(0).setSubReg(X86::sub_32bit);
@@ -6357,7 +6464,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
}
if (Load)
BeforeOps.push_back(SDValue(Load, 0));
- BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end());
+ llvm::append_range(BeforeOps, AfterOps);
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
switch (Opc) {
default: break;
@@ -6675,6 +6782,18 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
return true;
}
+bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+
+ // ENDBR instructions should not be scheduled around.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32)
+ return true;
+
+ return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
+}
+
bool X86InstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
@@ -6705,7 +6824,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
"X86-64 PIC uses RIP relative addressing");
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
- unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+ Register GlobalBaseReg = X86FI->getGlobalBaseReg();
if (GlobalBaseReg != 0)
return GlobalBaseReg;
@@ -8261,7 +8380,7 @@ describeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg,
// If the described register is a sub-register of the destination register,
// then pick out the source register's corresponding sub-register.
if (unsigned SubRegIdx = TRI->getSubRegIndex(DestReg, DescribedReg)) {
- unsigned SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx);
+ Register SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx);
return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
}
@@ -8525,7 +8644,7 @@ namespace {
return false;
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+ Register GlobalBaseReg = X86FI->getGlobalBaseReg();
// If we didn't need a GlobalBaseReg, don't insert code.
if (GlobalBaseReg == 0)
@@ -8538,7 +8657,7 @@ namespace {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
const X86InstrInfo *TII = STI.getInstrInfo();
- unsigned PC;
+ Register PC;
if (STI.isPICStyleGOT())
PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
else
@@ -8608,7 +8727,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+} // namespace
char CGBR::ID = 0;
FunctionPass*
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
index 89f2ff118c37..d7d2370c6f67 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
@@ -317,6 +317,17 @@ public:
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
+ Optional<ExtAddrMode>
+ getAddrModeFromMemoryOp(const MachineInstr &MemI,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg,
+ int64_t &ImmVal) const override;
+
+ bool preservesZeroValueInReg(const MachineInstr *MI,
+ const Register NullValueReg,
+ const TargetRegisterInfo *TRI) const override;
+
bool getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt,
SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
@@ -409,6 +420,13 @@ public:
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1,
int64_t &Offset2) const override;
+ /// isSchedulingBoundary - Overrides the isSchedulingBoundary from
+ /// Codegen/TargetInstrInfo.cpp to make it capable of identifying ENDBR
+ /// intructions and prevent it from being re-scheduled.
+ bool isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads
/// should be scheduled togther. On some targets if two loads are loading from
@@ -430,16 +448,6 @@ public:
/// instruction that defines the specified register class.
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
- /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction tha
- /// would clobber the EFLAGS condition register. Note the result may be
- /// conservative. If it cannot definitely determine the safety after visiting
- /// a few instructions in each direction it assumes it's not safe.
- bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- return MBB.computeRegisterLiveness(&RI, X86::EFLAGS, I, 4) ==
- MachineBasicBlock::LQR_Dead;
- }
-
/// True if MI has a condition code def, e.g. EFLAGS, that is
/// not marked dead.
bool hasLiveCondCodeDef(MachineInstr &MI) const;
@@ -462,7 +470,7 @@ public:
unsigned
getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const override;
- unsigned getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
+ unsigned getUndefRegClearance(const MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const override;
void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const override;
@@ -517,7 +525,7 @@ public:
/// the machine instruction generated due to folding.
MachineInstr *optimizeLoadInstr(MachineInstr &MI,
const MachineRegisterInfo *MRI,
- unsigned &FoldAsLoadDefReg,
+ Register &FoldAsLoadDefReg,
MachineInstr *&DefMI) const override;
std::pair<unsigned, unsigned>
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
index 3ea0ae8a8840..b006d1d9aa3a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
@@ -69,13 +69,8 @@ def SDTX86wrpkru : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
SDTCisVT<2, i8>]>;
-def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def SDTX86caspairSaveEbx8 : SDTypeProfile<1, 3,
- [SDTCisVT<0, i32>, SDTCisPtrTy<1>,
- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
-def SDTX86caspairSaveRbx16 : SDTypeProfile<1, 3,
- [SDTCisVT<0, i64>, SDTCisPtrTy<1>,
- SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
+def SDTX86cas8pair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86cas16pair : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i64>]>;
def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisPtrTy<1>,
@@ -99,11 +94,11 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
SDTCisVT<1, iPTR>,
SDTCisVT<2, iPTR>]>;
-def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, i32>,
- SDTCisVT<3, i8>,
- SDTCisVT<4, i32>]>;
+def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i8>,
+ SDTCisVT<4, i32>]>;
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
@@ -132,6 +127,11 @@ def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>;
+def SDT_X86AESENCDECKL : SDTypeProfile<2, 2, [SDTCisVT<0, v2i64>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, v2i64>,
+ SDTCisPtrTy<3>]>;
+
def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
[SDNPHasChain,SDNPSideEffect]>;
def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
@@ -169,20 +169,12 @@ def X86wrpkru : SDNode<"X86ISD::WRPKRU", SDTX86wrpkru,
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair,
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8pair,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
+def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86cas16pair,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-def X86cas8save_ebx : SDNode<"X86ISD::LCMPXCHG8_SAVE_EBX_DAG",
- SDTX86caspairSaveEbx8,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
- SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def X86cas16save_rbx : SDNode<"X86ISD::LCMPXCHG16_SAVE_RBX_DAG",
- SDTX86caspairSaveRbx16,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
- SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -194,7 +186,11 @@ def X86vastart_save_xmm_regs :
SDT_X86VASTART_SAVE_XMM_REGS,
[SDNPHasChain, SDNPVariadic]>;
def X86vaarg64 :
- SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+ SDNode<"X86ISD::VAARG_64", SDT_X86VAARG,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
+def X86vaargx32 :
+ SDNode<"X86ISD::VAARG_X32", SDT_X86VAARG,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
SDNPMemOperand]>;
def X86callseq_start :
@@ -284,6 +280,7 @@ def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags,
SDNPMemOperand]>;
def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
+def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>;
def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>;
@@ -323,6 +320,22 @@ def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD,
[SDNPHasChain, SDNPSideEffect]>;
def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD,
[SDNPHasChain, SDNPSideEffect]>;
+def X86testui : SDNode<"X86ISD::TESTUI",
+ SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86aesenc128kl : SDNode<"X86ISD::AESENC128KL", SDT_X86AESENCDECKL,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86aesdec128kl : SDNode<"X86ISD::AESDEC128KL", SDT_X86AESENCDECKL,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86aesenc256kl : SDNode<"X86ISD::AESENC256KL", SDT_X86AESENCDECKL,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86aesdec256kl : SDNode<"X86ISD::AESDEC256KL", SDT_X86AESENCDECKL,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
@@ -901,6 +914,8 @@ def PKU : Predicate<"Subtarget->hasPKU()">;
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">;
+def HasAVXVNNI : Predicate <"Subtarget->hasAVXVNNI()">;
+def NoVLX_Or_NoVNNI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVNNI()">;
def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
@@ -964,11 +979,15 @@ def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">;
def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">;
+def HasKL : Predicate<"Subtarget->hasKL()">;
+def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">;
+def HasHRESET : Predicate<"Subtarget->hasHRESET()">;
def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
def HasTSXLDTRK : Predicate<"Subtarget->hasTSXLDTRK()">;
def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">;
def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">;
def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
+def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
@@ -1016,6 +1035,7 @@ def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
+def HasFSRM : Predicate<"Subtarget->hasFSRM()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
@@ -1053,6 +1073,7 @@ def i16immSExt8 : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>;
def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
def i64immSExt8 : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
+def i64timmSExt32 : TImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
def i16relocImmSExt8 : PatLeaf<(i16 relocImm), [{
return isSExtAbsoluteSymbolRef(8, N);
@@ -2658,11 +2679,11 @@ let Predicates = [HasBMI2] in {
//
let Predicates = [HasTBM], Defs = [EFLAGS] in {
-multiclass tbm_ternary_imm<bits<8> opc, RegisterClass RC, string OpcodeStr,
- X86MemOperand x86memop, PatFrag ld_frag,
- SDNode OpNode, Operand immtype,
- SDPatternOperator immoperator,
- X86FoldableSchedWrite Sched> {
+multiclass tbm_bextri<bits<8> opc, RegisterClass RC, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ SDNode OpNode, Operand immtype,
+ SDPatternOperator immoperator,
+ X86FoldableSchedWrite Sched> {
def ri : Ii32<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, immtype:$cntl),
!strconcat(OpcodeStr,
"\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
@@ -2676,12 +2697,12 @@ multiclass tbm_ternary_imm<bits<8> opc, RegisterClass RC, string OpcodeStr,
XOP, XOPA, Sched<[Sched.Folded]>;
}
-defm BEXTRI32 : tbm_ternary_imm<0x10, GR32, "bextr{l}", i32mem, loadi32,
- X86bextr, i32imm, imm, WriteBEXTR>;
+defm BEXTRI32 : tbm_bextri<0x10, GR32, "bextr{l}", i32mem, loadi32,
+ X86bextri, i32imm, timm, WriteBEXTR>;
let ImmT = Imm32S in
-defm BEXTRI64 : tbm_ternary_imm<0x10, GR64, "bextr{q}", i64mem, loadi64,
- X86bextr, i64i32imm,
- i64immSExt32, WriteBEXTR>, VEX_W;
+defm BEXTRI64 : tbm_bextri<0x10, GR64, "bextr{q}", i64mem, loadi64,
+ X86bextri, i64i32imm,
+ i64timmSExt32, WriteBEXTR>, VEX_W;
multiclass tbm_binary_rm<bits<8> opc, Format FormReg, Format FormMem,
RegisterClass RC, string OpcodeStr,
@@ -2787,8 +2808,7 @@ let SchedRW = [ WriteSystem ] in {
let Uses = [ ECX, EAX, EBX ] in {
def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx",
- [(int_x86_mwaitx ECX, EAX, EBX)]>,
- TB, Requires<[ HasMWAITX ]>;
+ []>, TB, Requires<[ HasMWAITX ]>;
}
} // SchedRW
@@ -2905,6 +2925,41 @@ def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
+// INVLPGB Instruction
+// OPCODE 0F 01 FE
+//
+let SchedRW = [WriteSystem] in {
+ let Uses = [EAX, EDX] in
+ def INVLPGB32 : I<0x01, MRM_FE, (outs), (ins),
+ "invlpgb}", []>,
+ PS, Requires<[Not64BitMode]>;
+ let Uses = [RAX, EDX] in
+ def INVLPGB64 : I<0x01, MRM_FE, (outs), (ins),
+ "invlpgb", []>,
+ PS, Requires<[In64BitMode]>;
+} // SchedRW
+
+def : InstAlias<"invlpgb\t{%eax, %edx|eax, edx}", (INVLPGB32)>, Requires<[Not64BitMode]>;
+def : InstAlias<"invlpgb\t{%rax, %edx|rax, edx}", (INVLPGB64)>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// TLBSYNC Instruction
+// OPCODE 0F 01 FF
+//
+let SchedRW = [WriteSystem] in {
+ def TLBSYNC : I<0x01, MRM_FF, (outs), (ins),
+ "tlbsync", []>,
+ PS, Requires<[]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// HRESET Instruction
+//
+let Uses = [EAX], SchedRW = [WriteSystem] in
+ def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>,
+ Requires<[HasHRESET]>, TAXS;
+
+//===----------------------------------------------------------------------===//
// SERIALIZE Instruction
//
def SERIALIZE : I<0x01, MRM_E8, (outs), (ins), "serialize",
@@ -2922,6 +2977,25 @@ let Predicates = [HasTSXLDTRK] in {
}
//===----------------------------------------------------------------------===//
+// UINTR Instructions
+//
+let Predicates = [HasUINTR, In64BitMode] in {
+ def UIRET : I<0x01, MRM_EC, (outs), (ins), "uiret",
+ []>, XS;
+ def CLUI : I<0x01, MRM_EE, (outs), (ins), "clui",
+ [(int_x86_clui)]>, XS;
+ def STUI : I<0x01, MRM_EF, (outs), (ins), "stui",
+ [(int_x86_stui)]>, XS;
+
+ def SENDUIPI : I<0xC7, MRM6r, (outs), (ins GR64:$arg), "senduipi\t$arg",
+ [(int_x86_senduipi GR64:$arg)]>, XS;
+
+ let Defs = [EFLAGS] in
+ def TESTUI : I<0x01, MRM_ED, (outs), (ins), "testui",
+ [(set EFLAGS, (X86testui))]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate TBM instructions.
//===----------------------------------------------------------------------===//
@@ -3080,10 +3154,16 @@ include "X86InstrMPX.td"
include "X86InstrVMX.td"
include "X86InstrSVM.td"
+include "X86InstrSNP.td"
include "X86InstrTSX.td"
include "X86InstrSGX.td"
+include "X86InstrTDX.td"
+
+// Key Locker instructions
+include "X86InstrKL.td"
+
// AMX instructions
include "X86InstrAMX.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td
new file mode 100644
index 000000000000..b91e563a15f3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td
@@ -0,0 +1,86 @@
+//===---------------------------*-tablegen-*-------------------------------===//
+//===------------- X86InstrKL.td - KL Instruction Set Extension -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel key locker
+// instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Key Locker instructions
+
+let SchedRW = [WriteSystem], Predicates = [HasKL] in {
+ let Uses = [XMM0, EAX], Defs = [EFLAGS] in {
+ def LOADIWKEY : I<0xDC, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "loadiwkey\t{$src2, $src1|$src1, $src2}",
+ [(int_x86_loadiwkey XMM0, VR128:$src1, VR128:$src2, EAX)]>, T8XS,
+ NotMemoryFoldable;
+ }
+
+ let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, EFLAGS] in {
+ def ENCODEKEY128 : I<0xFA, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "encodekey128\t{$src, $dst|$dst, $src}", []>, T8XS,
+ NotMemoryFoldable;
+ }
+
+ let Uses = [XMM0, XMM1], Defs = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, EFLAGS] in {
+ def ENCODEKEY256 : I<0xFB, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "encodekey256\t{$src, $dst|$dst, $src}", []>, T8XS,
+ NotMemoryFoldable;
+ }
+
+ let Constraints = "$src1 = $dst",
+ Defs = [EFLAGS] in {
+ def AESENC128KL : I<0xDC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
+ "aesenc128kl\t{$src2, $src1|$src1, $src2}",
+ [(set VR128:$dst, EFLAGS,
+ (X86aesenc128kl VR128:$src1, addr:$src2))]>, T8XS,
+ NotMemoryFoldable;
+
+ def AESDEC128KL : I<0xDD, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
+ "aesdec128kl\t{$src2, $src1|$src1, $src2}",
+ [(set VR128:$dst, EFLAGS,
+ (X86aesdec128kl VR128:$src1, addr:$src2))]>, T8XS,
+ NotMemoryFoldable;
+
+ def AESENC256KL : I<0xDE, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
+ "aesenc256kl\t{$src2, $src1|$src1, $src2}",
+ [(set VR128:$dst, EFLAGS,
+ (X86aesenc256kl VR128:$src1, addr:$src2))]>, T8XS,
+ NotMemoryFoldable;
+
+ def AESDEC256KL : I<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
+ "aesdec256kl\t{$src2, $src1|$src1, $src2}",
+ [(set VR128:$dst, EFLAGS,
+ (X86aesdec256kl VR128:$src1, addr:$src2))]>, T8XS,
+ NotMemoryFoldable;
+ }
+
+} // SchedRW, Predicates
+
+let SchedRW = [WriteSystem], Predicates = [HasWIDEKL] in {
+ let Uses = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7],
+ Defs = [EFLAGS, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7],
+ mayLoad = 1 in {
+ def AESENCWIDE128KL : I<0xD8, MRM0m, (outs), (ins opaquemem:$src),
+ "aesencwide128kl\t$src", []>, T8XS,
+ NotMemoryFoldable;
+ def AESDECWIDE128KL : I<0xD8, MRM1m, (outs), (ins opaquemem:$src),
+ "aesdecwide128kl\t$src", []>, T8XS,
+ NotMemoryFoldable;
+ def AESENCWIDE256KL : I<0xD8, MRM2m, (outs), (ins opaquemem:$src),
+ "aesencwide256kl\t$src", []>, T8XS,
+ NotMemoryFoldable;
+ def AESDECWIDE256KL : I<0xD8, MRM3m, (outs), (ins opaquemem:$src),
+ "aesdecwide256kl\t$src", []>, T8XS,
+ NotMemoryFoldable;
+ }
+
+} // SchedRW, Predicates
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
index 49940204c25a..bb3e6df3bf3e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
@@ -472,6 +472,7 @@ defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb,
defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b,
SchedWriteVarShuffle.MMX>;
+let Predicates = [HasMMX, HasSSE1] in {
def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -485,6 +486,7 @@ def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
timm:$src2))]>,
Sched<[SchedWriteShuffle.MMX.Folded]>;
+}
// -- Conversion Instructions
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td
new file mode 100644
index 000000000000..de59f3fe2750
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td
@@ -0,0 +1,47 @@
+//===-- X86InstrSNP.td - SNP Instruction Set Extension -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the AMD Secure Nested
+// Paging (SNP) instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SNP instructions
+
+let SchedRW = [WriteSystem] in {
+// F3 0F 01 FF
+let Uses = [RAX] in
+def PSMASH: I<0x01, MRM_FF, (outs), (ins), "psmash", []>, XS,
+ Requires<[In64BitMode]>;
+
+// F2 0F 01 FF
+let Uses = [RAX] in
+def PVALIDATE64: I<0x01, MRM_FF, (outs), (ins), "pvalidate",[]>,
+ XD, Requires<[In64BitMode]>;
+
+let Uses = [EAX] in
+def PVALIDATE32: I<0x01, MRM_FF, (outs), (ins), "pvalidate",[]>,
+ XD, Requires<[Not64BitMode]>;
+
+// F2 0F 01 FE
+let Uses = [RAX] in
+def RMPUPDATE: I<0x01, MRM_FE, (outs), (ins), "rmpupdate", []>, XD,
+ Requires<[In64BitMode]>;
+
+// F3 0F 01 FE
+let Uses = [RAX] in
+def RMPADJUST: I<0x01, MRM_FE, (outs), (ins), "rmpadjust", []>, XS,
+ Requires<[In64BitMode]>;
+} // SchedRW
+
+def : InstAlias<"psmash\t{%rax|rax}", (PSMASH)>, Requires<[In64BitMode]>;
+def : InstAlias<"pvalidate\t{%rax|rax}", (PVALIDATE64)>, Requires<[In64BitMode]>;
+def : InstAlias<"pvalidate\t{%eax|eax}", (PVALIDATE32)>, Requires<[Not64BitMode]>;
+def : InstAlias<"rmpupdate\t{%rax|rax}", (RMPUPDATE)>, Requires<[In64BitMode]>;
+def : InstAlias<"rmpadjust\t{%rax|rax}", (RMPADJUST)>, Requires<[In64BitMode]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
index c3c9f22381f8..a185a2007b72 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1242,7 +1242,8 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
/// SSE 2 Only
// Convert scalar double to scalar single
-let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX] in {
+let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX],
+ ExeDomain = SSEPackedSingle in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -1260,7 +1261,7 @@ def : Pat<(f32 (any_fpround FR64:$src)),
(VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
Requires<[UseAVX]>;
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (any_fpround FR64:$src))]>,
@@ -1272,7 +1273,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC;
}
-let Uses = [MXCSR], mayRaiseFPException = 1 in {
+let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in {
def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1306,7 +1307,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -1326,7 +1327,7 @@ def : Pat<(f64 (any_fpextend FR32:$src)),
def : Pat<(any_fpextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (any_fpextend FR32:$src))]>,
@@ -1338,7 +1339,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC;
} // isCodeGenOnly = 1
-let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in {
+let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
+ ExeDomain = SSEPackedSingle in {
def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -3776,7 +3778,7 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
VEX_4V, VEX_WIG;
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
@@ -3792,7 +3794,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -3928,7 +3930,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
+ (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def rm : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
@@ -3938,7 +3940,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))]>,
+ timm:$src3))]>,
Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
@@ -3948,13 +3950,13 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>,
+ timm:$src2))]>,
PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>;
def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>,
+ timm:$src2))]>,
Sched<[WriteVecExtract]>;
// Insert
@@ -4754,7 +4756,7 @@ let isCommutable = 0 in {
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
@@ -4800,7 +4802,7 @@ let isCommutable = 0 in {
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
@@ -5151,14 +5153,14 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
- imm:$src2))]>,
+ timm:$src2))]>,
Sched<[WriteVecExtract]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))),
+ [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))),
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
@@ -5182,7 +5184,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
(ins i16mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), imm:$src2))),
+ [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))),
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
@@ -5272,7 +5274,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
+ (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i8mem:$src2, u8imm:$src3),
@@ -5281,7 +5283,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>,
Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
@@ -6501,7 +6503,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
+ defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ;
}
@@ -6519,7 +6521,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
+ defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">;
}
@@ -6537,7 +6539,7 @@ multiclass SS42AI_pcmpistri<string asm> {
let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
+ defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
}
@@ -6555,7 +6557,7 @@ multiclass SS42AI_pcmpestri<string asm> {
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
+ defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
}
@@ -7014,22 +7016,19 @@ def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
let Predicates = [HasAVX, NoVLX] in {
-def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))),
+def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
-}
-
// NOTE: We're using FP instructions here, but execution domain fixing can
// convert to integer when profitable.
-let Predicates = [HasAVX, NoVLX] in {
-def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
+def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
+def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
+def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
}
@@ -7165,6 +7164,68 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
WriteFMaskMove64, WriteFMaskMove64Y>;
//===----------------------------------------------------------------------===//
+// AVX_VNNI
+//===----------------------------------------------------------------------===//
+let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst" in
+multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit IsCommutable> {
+ let isCommutable = IsCommutable in
+ def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (v4i32 (OpNode VR128:$src1,
+ VR128:$src2, VR128:$src3)))]>,
+ VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+
+ def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
+ (loadv4i32 addr:$src3))))]>,
+ VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+
+ let isCommutable = IsCommutable in
+ def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
+ VR256:$src2, VR256:$src3)))]>,
+ VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
+
+ def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i256mem:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
+ (loadv8i32 addr:$src3))))]>,
+ VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
+}
+
+defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>, ExplicitVEXPrefix;
+defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>, ExplicitVEXPrefix;
+defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>, ExplicitVEXPrefix;
+defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>, ExplicitVEXPrefix;
+
+def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86vpmaddwd node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in {
+ def : Pat<(v8i32 (add VR256:$src1,
+ (X86vpmaddwd_su VR256:$src2, VR256:$src3))),
+ (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(v8i32 (add VR256:$src1,
+ (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))),
+ (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(v4i32 (add VR128:$src1,
+ (X86vpmaddwd_su VR128:$src2, VR128:$src3))),
+ (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (add VR128:$src1,
+ (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))),
+ (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>;
+}
+
+//===----------------------------------------------------------------------===//
// VPERMIL - Permute Single and Double Floating-Point Values
//
@@ -7226,16 +7287,12 @@ let ExeDomain = SSEPackedSingle in {
let isCommutable = 1 in
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
- (i8 timm:$src3))))]>, VEX_4V, VEX_L,
- Sched<[WriteFShuffle256]>;
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
- (i8 timm:$src3)))]>, VEX_4V, VEX_L,
- Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
}
// Immediate transform to help with commuting.
@@ -7243,23 +7300,27 @@ def Perm2XCommuteImm : SDNodeXForm<timm, [{
return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
}]>;
+multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
+ // Pattern with load in other operand.
+ def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
+ (Perm2XCommuteImm timm:$imm))>;
+}
+
let Predicates = [HasAVX] in {
-// Pattern with load in other operand.
-def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2),
- VR256:$src1, (i8 timm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>;
+ defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
+ defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
}
let Predicates = [HasAVX1Only] in {
-def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, timm:$imm)>;
-def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
- (loadv4i64 addr:$src2), (i8 timm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, timm:$imm)>;
-// Pattern with load in other operand.
-def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
- VR256:$src1, (i8 timm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>;
+ defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
+ defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
+ defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
+ defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
}
//===----------------------------------------------------------------------===//
@@ -7628,27 +7689,24 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
WriteFShuffle256, f256mem>, VEX_W;
//===----------------------------------------------------------------------===//
-// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
+// VPERM2I128 - Permute Integer vector Values in 128-bit chunks
//
let isCommutable = 1 in
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
- "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
- (i8 timm:$src3))))]>, Sched<[WriteShuffle256]>,
- VEX_4V, VEX_L;
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
- "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
- (i8 timm:$src3)))]>,
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
-let Predicates = [HasAVX2] in
-def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
- VR256:$src1, (i8 timm:$imm))),
- (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>;
-
+let Predicates = [HasAVX2] in {
+ defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>;
+ defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>;
+ defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>;
+ defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>;
+}
//===----------------------------------------------------------------------===//
// VINSERTI128 - Insert packed integer values
@@ -7768,37 +7826,6 @@ let Predicates = [HasAVX2] in {
}
//===----------------------------------------------------------------------===//
-// SubVector Broadcasts
-// Provide fallback in case the load node that is used in the patterns above
-// is used by additional users, which prevents the pattern selection.
-
-let Predicates = [HasAVX, NoVLX] in {
-def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))),
- (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v2f64 VR128:$src), 1)>;
-def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))),
- (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v4f32 VR128:$src), 1)>;
-}
-
-// NOTE: We're using FP instructions here, but execution domain fixing can
-// convert to integer when profitable.
-let Predicates = [HasAVX, NoVLX] in {
-def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
- (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v2i64 VR128:$src), 1)>;
-def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
- (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v4i32 VR128:$src), 1)>;
-def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
- (VINSERTF128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v8i16 VR128:$src), 1)>;
-def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
- (VINSERTF128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v16i8 VR128:$src), 1)>;
-}
-
-//===----------------------------------------------------------------------===//
// Variable Bit Shifts
//
multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSVM.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSVM.td
index 82c8e74156b2..d8f70b016c7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSVM.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSVM.td
@@ -26,37 +26,47 @@ def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB;
// 0F 01 DE
let Uses = [EAX] in
-def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|eax}", []>, TB;
+def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit", []>, TB;
// 0F 01 D8
let Uses = [EAX] in
-def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%eax|eax}", []>, TB,
+def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), "vmrun", []>, TB,
Requires<[Not64BitMode]>;
let Uses = [RAX] in
-def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%rax|rax}", []>, TB,
+def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), "vmrun", []>, TB,
Requires<[In64BitMode]>;
// 0F 01 DA
let Uses = [EAX] in
-def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%eax|eax}", []>, TB,
+def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), "vmload", []>, TB,
Requires<[Not64BitMode]>;
let Uses = [RAX] in
-def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%rax|rax}", []>, TB,
+def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), "vmload", []>, TB,
Requires<[In64BitMode]>;
// 0F 01 DB
let Uses = [EAX] in
-def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%eax|eax}", []>, TB,
+def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), "vmsave", []>, TB,
Requires<[Not64BitMode]>;
let Uses = [RAX] in
-def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%rax|rax}", []>, TB,
+def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), "vmsave", []>, TB,
Requires<[In64BitMode]>;
// 0F 01 DF
let Uses = [EAX, ECX] in
def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins),
- "invlpga\t{%eax, %ecx|eax, ecx}", []>, TB, Requires<[Not64BitMode]>;
+ "invlpga", []>, TB, Requires<[Not64BitMode]>;
let Uses = [RAX, ECX] in
def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
- "invlpga\t{%rax, %ecx|rax, ecx}", []>, TB, Requires<[In64BitMode]>;
+ "invlpga", []>, TB, Requires<[In64BitMode]>;
} // SchedRW
+
+def : InstAlias<"skinit\t{%eax|eax}", (SKINIT), 0>;
+def : InstAlias<"vmrun\t{%eax|eax}", (VMRUN32), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"vmrun\t{%rax|rax}", (VMRUN64), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"vmload\t{%eax|eax}", (VMLOAD32), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"vmload\t{%rax|rax}", (VMLOAD64), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"vmsave\t{%eax|eax}", (VMSAVE32), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"vmsave\t{%rax|rax}", (VMSAVE64), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"invlpga\t{%eax, %ecx|eax, ecx}", (INVLPGA32), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"invlpga\t{%rax, %ecx|rax, ecx}", (INVLPGA64), 0>, Requires<[In64BitMode]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
index 13659b5c456e..eb8740896e5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
@@ -49,6 +49,7 @@ let Uses = [EFLAGS] in
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
} // SchedRW
+def UBSAN_UD1 : PseudoI<(outs), (ins i32imm:$kind), [(ubsantrap (i32 timm:$kind))]>;
// The long form of "int $3" turns into int3 as a size optimization.
// FIXME: This doesn't work because InstAlias can't match immediate constants.
//def : InstAlias<"int\t$3", (INT3)>;
@@ -171,6 +172,17 @@ def GS_PREFIX : I<0x65, PrefixByte, (outs), (ins), "gs", []>;
} // SchedRW
//===----------------------------------------------------------------------===//
+// Address-size override prefixes.
+//
+
+let SchedRW = [WriteNop] in {
+def ADDR16_PREFIX : I<0x67, PrefixByte, (outs), (ins), "addr16", []>,
+ Requires<[In32BitMode]>;
+def ADDR32_PREFIX : I<0x67, PrefixByte, (outs), (ins), "addr32", []>,
+ Requires<[In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
// Moves to and from segment registers.
//
@@ -447,7 +459,7 @@ let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
// Cache instructions
let SchedRW = [WriteSystem] in {
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
-def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, PS;
// wbnoinvd is like wbinvd, except without invalidation
// encoding: like wbinvd + an 0xF3 prefix
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td
new file mode 100644
index 000000000000..8d7cd6082095
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td
@@ -0,0 +1,39 @@
+//===- X86InstrTDX.td - TDX Instruction Set Extension -*- tablegen -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel TDX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TDX instructions
+
+// 64-bit only instructions
+let SchedRW = [WriteSystem], Predicates = [In64BitMode] in {
+// SEAMCALL - Call to SEAM VMX-root Operation Module
+def SEAMCALL : I<0x01, MRM_CF, (outs), (ins),
+ "seamcall", []>, PD;
+
+// SEAMRET - Return to Legacy VMX-root Operation
+def SEAMRET : I<0x01, MRM_CD, (outs), (ins),
+ "seamret", []>, PD;
+
+// SEAMOPS - SEAM Operations
+def SEAMOPS : I<0x01, MRM_CE, (outs), (ins),
+ "seamops", []>, PD;
+
+} // SchedRW
+
+// common instructions
+let SchedRW = [WriteSystem] in {
+// TDCALL - Call SEAM Module Functions
+def TDCALL : I<0x01, MRM_CC, (outs), (ins),
+ "tdcall", []>, PD;
+
+} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 60fb4d2ef4bf..ff531713037c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -214,8 +214,8 @@ static unsigned getSubRegIndex(const TargetRegisterClass *RC) {
return SubIdx;
}
-static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) {
- assert(Register::isPhysicalRegister(Reg));
+static const TargetRegisterClass *getRegClassFromGRPhysReg(Register Reg) {
+ assert(Reg.isPhysical());
if (X86::GR64RegClass.contains(Reg))
return &X86::GR64RegClass;
if (X86::GR32RegClass.contains(Reg))
@@ -239,7 +239,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
- if (Register::isPhysicalRegister(DstReg)) {
+ if (DstReg.isPhysical()) {
assert(I.isCopy() && "Generic operators do not allow physical registers");
if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID &&
@@ -266,12 +266,12 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
return true;
}
- assert((!Register::isPhysicalRegister(SrcReg) || I.isCopy()) &&
+ assert((!SrcReg.isPhysical() || I.isCopy()) &&
"No phys reg on generic operators");
assert((DstSize == SrcSize ||
// Copies are a mean to setup initial types, the number of
// bits may not exactly match.
- (Register::isPhysicalRegister(SrcReg) &&
+ (SrcReg.isPhysical() &&
DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
"Copy with different width?!");
@@ -280,7 +280,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
if (SrcRegBank.getID() == X86::GPRRegBankID &&
DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize &&
- Register::isPhysicalRegister(SrcReg)) {
+ SrcReg.isPhysical()) {
// Change the physical register to performe truncate.
const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg);
@@ -479,7 +479,7 @@ static void X86SelectAddress(const MachineInstr &I,
"unsupported type.");
if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
- if (auto COff = getConstantVRegVal(I.getOperand(2).getReg(), MRI)) {
+ if (auto COff = getConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
int64_t Imm = *COff;
if (isInt<32>(Imm)) { // Check for displacement overflow.
AM.Disp = static_cast<int32_t>(Imm);
@@ -780,69 +780,18 @@ bool X86InstructionSelector::selectZext(MachineInstr &I,
const LLT DstTy = MRI.getType(DstReg);
const LLT SrcTy = MRI.getType(SrcReg);
+ assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(16)) &&
+ "8=>16 Zext is handled by tablegen");
assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) &&
"8=>32 Zext is handled by tablegen");
assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) &&
"16=>32 Zext is handled by tablegen");
-
- const static struct ZextEntry {
- LLT SrcTy;
- LLT DstTy;
- unsigned MovOp;
- bool NeedSubregToReg;
- } OpTable[] = {
- {LLT::scalar(8), LLT::scalar(16), X86::MOVZX16rr8, false}, // i8 => i16
- {LLT::scalar(8), LLT::scalar(64), X86::MOVZX32rr8, true}, // i8 => i64
- {LLT::scalar(16), LLT::scalar(64), X86::MOVZX32rr16, true}, // i16 => i64
- {LLT::scalar(32), LLT::scalar(64), 0, true} // i32 => i64
- };
-
- auto ZextEntryIt =
- std::find_if(std::begin(OpTable), std::end(OpTable),
- [SrcTy, DstTy](const ZextEntry &El) {
- return El.DstTy == DstTy && El.SrcTy == SrcTy;
- });
-
- // Here we try to select Zext into a MOVZ and/or SUBREG_TO_REG instruction.
- if (ZextEntryIt != std::end(OpTable)) {
- const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
- const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
- const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
- const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
-
- if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
- !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
- << " operand\n");
- return false;
- }
-
- unsigned TransitRegTo = DstReg;
- unsigned TransitRegFrom = SrcReg;
- if (ZextEntryIt->MovOp) {
- // If we select Zext into MOVZ + SUBREG_TO_REG, we need to have
- // a transit register in between: create it here.
- if (ZextEntryIt->NeedSubregToReg) {
- TransitRegFrom = MRI.createVirtualRegister(
- getRegClass(LLT::scalar(32), DstReg, MRI));
- TransitRegTo = TransitRegFrom;
- }
-
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ZextEntryIt->MovOp))
- .addDef(TransitRegTo)
- .addReg(SrcReg);
- }
- if (ZextEntryIt->NeedSubregToReg) {
- BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(TargetOpcode::SUBREG_TO_REG))
- .addDef(DstReg)
- .addImm(0)
- .addReg(TransitRegFrom)
- .addImm(X86::sub_32bit);
- }
- I.eraseFromParent();
- return true;
- }
+ assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(64)) &&
+ "8=>64 Zext is handled by tablegen");
+ assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(64)) &&
+ "16=>64 Zext is handled by tablegen");
+ assert(!(SrcTy == LLT::scalar(32) && DstTy == LLT::scalar(64)) &&
+ "32=>64 Zext is handled by tablegen");
if (SrcTy != LLT::scalar(1))
return false;
@@ -859,12 +808,17 @@ bool X86InstructionSelector::selectZext(MachineInstr &I,
else
return false;
- unsigned DefReg = SrcReg;
+ Register DefReg = SrcReg;
if (DstTy != LLT::scalar(8)) {
+ Register ImpDefReg =
+ MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImpDefReg);
+
DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(TargetOpcode::SUBREG_TO_REG), DefReg)
- .addImm(0)
+ TII.get(TargetOpcode::INSERT_SUBREG), DefReg)
+ .addReg(ImpDefReg)
.addReg(SrcReg)
.addImm(X86::sub_8bit);
}
@@ -1605,10 +1559,9 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
}}, // i64
};
- auto OpEntryIt = std::find_if(std::begin(OpTable), std::end(OpTable),
- [RegTy](const DivRemEntry &El) {
- return El.SizeInBits == RegTy.getSizeInBits();
- });
+ auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const DivRemEntry &El) {
+ return El.SizeInBits == RegTy.getSizeInBits();
+ });
if (OpEntryIt == std::end(OpTable))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index a19e12766e10..95655dd4723b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -44,8 +44,8 @@ namespace {
/// E.g. A group of interleaving access loads (Factor = 2; accessing every
/// other element)
/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
-/// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
-/// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
+/// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
+/// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
class X86InterleavedAccessGroup {
/// Reference to the wide-load instruction of an interleaved access
/// group.
@@ -211,7 +211,7 @@ void X86InterleavedAccessGroup::decompose(
VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
}
// Generate N loads of T type.
- assert(VecBaseTy->getPrimitiveSizeInBits().isByteSized() &&
+ assert(VecBaseTy->getPrimitiveSizeInBits().isKnownMultipleOf(8) &&
"VecBaseTy's size must be a multiple of 8");
const Align FirstAlignment = LI->getAlign();
const Align SubsequentAlignment = commonAlignment(
@@ -295,8 +295,7 @@ static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
if (VecElems == 16) {
for (unsigned i = 0; i < Stride; i++)
- TransposedMatrix[i] = Builder.CreateShuffleVector(
- Vec[i], UndefValue::get(Vec[i]->getType()), VPShuf);
+ TransposedMatrix[i] = Builder.CreateShuffleVector(Vec[i], VPShuf);
return;
}
@@ -577,8 +576,7 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
// Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7
for (int i = 0; i < 3; i++)
- Vec[i] = Builder.CreateShuffleVector(
- Vec[i], UndefValue::get(Vec[0]->getType()), VPShuf);
+ Vec[i] = Builder.CreateShuffleVector(Vec[i], VPShuf);
// TempVector[0]= a6 a7 a0 a1 a2 b0 b1 b2
// TempVector[1]= c0 c1 c2 c3 c4 a3 a4 a5
@@ -600,10 +598,8 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
// TransposedMatrix[1]= b0 b1 b2 b3 b4 b5 b6 b7
// TransposedMatrix[2]= c0 c1 c2 c3 c4 c5 c6 c7
- Value *TempVec = Builder.CreateShuffleVector(
- Vec[1], UndefValue::get(Vec[1]->getType()), VPAlign3);
- TransposedMatrix[0] = Builder.CreateShuffleVector(
- Vec[0], UndefValue::get(Vec[1]->getType()), VPAlign2);
+ Value *TempVec = Builder.CreateShuffleVector(Vec[1], VPAlign3);
+ TransposedMatrix[0] = Builder.CreateShuffleVector(Vec[0], VPAlign2);
TransposedMatrix[1] = VecElems == 8 ? Vec[2] : TempVec;
TransposedMatrix[2] = VecElems == 8 ? TempVec : Vec[2];
}
@@ -660,10 +656,8 @@ void X86InterleavedAccessGroup::interleave8bitStride3(
// Vec[1]= c5 c6 c7 c0 c1 c2 c3 c4
// Vec[2]= b0 b1 b2 b3 b4 b5 b6 b7
- Vec[0] = Builder.CreateShuffleVector(
- InVec[0], UndefValue::get(InVec[0]->getType()), VPAlign2);
- Vec[1] = Builder.CreateShuffleVector(
- InVec[1], UndefValue::get(InVec[1]->getType()), VPAlign3);
+ Vec[0] = Builder.CreateShuffleVector(InVec[0], VPAlign2);
+ Vec[1] = Builder.CreateShuffleVector(InVec[1], VPAlign3);
Vec[2] = InVec[2];
// Vec[0]= a6 a7 a0 a1 a2 b0 b1 b2
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 1c10c07abeee..72ab3e9cf78d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -22,7 +22,7 @@ namespace llvm {
enum IntrinsicType : uint16_t {
CVTNEPS2BF16_MASK,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
- INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
+ INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP_IMM8,
INTR_TYPE_3OP_IMM8,
CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, BEXTRI,
CVTPD2PS_MASK,
@@ -417,12 +417,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
- X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_SAE),
- X86_INTRINSIC_DATA(avx512_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_SAE),
X86_INTRINSIC_DATA(avx512_conflict_d_128, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
X86_INTRINSIC_DATA(avx512_conflict_d_256, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
X86_INTRINSIC_DATA(avx512_conflict_d_512, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
@@ -464,6 +458,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FADDS, X86ISD::FADDS_RND),
X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK,
X86ISD::FADDS, X86ISD::FADDS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPMM, X86ISD::CMPMM_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPMM, X86ISD::CMPMM_SAE),
X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC,
X86ISD::FSETCCM, X86ISD::FSETCCM_SAE),
X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC,
@@ -882,12 +882,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
- X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_pternlog_q_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_pternlog_q_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_pternlog_q_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP_IMM8, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP_IMM8, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP_IMM8, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_q_128, INTR_TYPE_4OP_IMM8, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_q_256, INTR_TYPE_4OP_IMM8, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_q_512, INTR_TYPE_4OP_IMM8, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
@@ -1098,7 +1098,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_round_sd, ROUNDS, X86ISD::VRNDSCALES, 0),
X86_INTRINSIC_DATA(sse41_round_ss, ROUNDS, X86ISD::VRNDSCALES, 0),
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
- X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
+ X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP_IMM8, X86ISD::INSERTQI, 0),
X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
@@ -1108,8 +1108,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(subborrow_32, ADX, X86ISD::SBB, X86ISD::SUB),
X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB),
- X86_INTRINSIC_DATA(tbm_bextri_u32, BEXTRI, X86ISD::BEXTR, 0),
- X86_INTRINSIC_DATA(tbm_bextri_u64, BEXTRI, X86ISD::BEXTR, 0),
+ X86_INTRINSIC_DATA(tbm_bextri_u32, BEXTRI, X86ISD::BEXTRI, 0),
+ X86_INTRINSIC_DATA(tbm_bextri_u64, BEXTRI, X86ISD::BEXTRI, 0),
X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
@@ -1132,10 +1132,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP,
X86ISD::GF2P8MULB, 0),
- X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
- X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
- X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
- X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
+ X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP_IMM8, X86ISD::VPERMIL2, 0),
+ X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP_IMM8, X86ISD::VPERMIL2, 0),
+ X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP_IMM8, X86ISD::VPERMIL2, 0),
+ X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP_IMM8, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),
X86_INTRINSIC_DATA(xop_vpshab, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
X86_INTRINSIC_DATA(xop_vpshad, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index 84f560f2f9ee..1b371ac2a108 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -70,6 +70,11 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
setLegalizerInfoAVX512DQ();
setLegalizerInfoAVX512BW();
+ getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN)
+ .scalarize(0)
+ .minScalar(0, LLT::scalar(32))
+ .libcall();
+
setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1);
for (unsigned BinOp : {G_SUB, G_MUL, G_AND, G_OR, G_XOR})
setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1);
@@ -81,25 +86,14 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
setLegalizeScalarToDifferentSizeStrategy(
G_CONSTANT, 0, widenToLargerTypesAndNarrowToLargest);
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
+
computeTables();
verify(*STI.getInstrInfo());
}
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
- MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
- switch (MI.getIntrinsicID()) {
- case Intrinsic::memcpy:
- case Intrinsic::memset:
- case Intrinsic::memmove:
- if (createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI) ==
- LegalizerHelper::UnableToLegalize)
- return false;
- MI.eraseFromParent();
- return true;
- default:
- break;
- }
return true;
}
@@ -161,6 +155,11 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
.legalFor({{s8, s8}, {s16, s8}, {s32, s8}})
.clampScalar(0, s8, s32)
.clampScalar(1, s8, s8);
+
+ // Comparison
+ getActionDefinitionsBuilder(G_ICMP)
+ .legalForCartesianProduct({s8}, {s8, s16, s32, p0})
+ .clampScalar(0, s8, s8);
}
// Control-flow
@@ -179,12 +178,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
setAction({G_ANYEXT, s128}, Legal);
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
- // Comparison
- setAction({G_ICMP, s1}, Legal);
-
- for (auto Ty : {s8, s16, s32, p0})
- setAction({G_ICMP, 1, Ty}, Legal);
-
// Merge/Unmerge
for (const auto &Ty : {s16, s32, s64}) {
setAction({G_MERGE_VALUES, Ty}, Legal);
@@ -253,7 +246,9 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
.widenScalarToNextPow2(1);
// Comparison
- setAction({G_ICMP, 1, s64}, Legal);
+ getActionDefinitionsBuilder(G_ICMP)
+ .legalForCartesianProduct({s8}, {s8, s16, s32, s64, p0})
+ .clampScalar(0, s8, s8);
getActionDefinitionsBuilder(G_FCMP)
.legalForCartesianProduct({s8}, {s32, s64})
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index 50f8b3477acc..810fee052b5a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -42,6 +42,7 @@
#include "X86TargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -104,9 +105,9 @@ static cl::opt<bool> EmitDotVerify(
cl::init(false), cl::Hidden);
static llvm::sys::DynamicLibrary OptimizeDL;
-typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
- unsigned int *edges, int *edge_values,
- int *cut_edges /* out */, unsigned int edges_size);
+typedef int (*OptimizeCutT)(unsigned int *Nodes, unsigned int NodesSize,
+ unsigned int *Edges, int *EdgeValues,
+ int *CutEdges /* out */, unsigned int EdgesSize);
static OptimizeCutT OptimizeCut = nullptr;
namespace {
@@ -148,9 +149,10 @@ public:
private:
using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>;
+ using Edge = MachineGadgetGraph::Edge;
+ using Node = MachineGadgetGraph::Node;
using EdgeSet = MachineGadgetGraph::EdgeSet;
using NodeSet = MachineGadgetGraph::NodeSet;
- using Gadget = std::pair<MachineInstr *, MachineInstr *>;
const X86Subtarget *STI;
const TargetInstrInfo *TII;
@@ -162,15 +164,13 @@ private:
const MachineDominanceFrontier &MDF) const;
int hardenLoadsWithPlugin(MachineFunction &MF,
std::unique_ptr<MachineGadgetGraph> Graph) const;
- int hardenLoadsWithGreedyHeuristic(
- MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const;
+ int hardenLoadsWithHeuristic(MachineFunction &MF,
+ std::unique_ptr<MachineGadgetGraph> Graph) const;
int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G,
EdgeSet &ElimEdges /* in, out */,
NodeSet &ElimNodes /* in, out */) const;
std::unique_ptr<MachineGadgetGraph>
trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
- void findAndCutEdges(MachineGadgetGraph &G,
- EdgeSet &CutEdges /* out */) const;
int insertFences(MachineFunction &MF, MachineGadgetGraph &G,
EdgeSet &CutEdges /* in, out */) const;
bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
@@ -198,7 +198,7 @@ struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
using ChildIteratorType = typename Traits::ChildIteratorType;
using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
- DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+ DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
std::string getNodeLabel(NodeRef Node, GraphType *) {
if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel)
@@ -243,7 +243,7 @@ void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
AU.setPreservesCFG();
}
-static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF,
+static void writeGadgetGraph(raw_ostream &OS, MachineFunction &MF,
MachineGadgetGraph *G) {
WriteGraph(OS, G, /*ShortNames*/ false,
"Speculative gadgets for \"" + MF.getName() + "\" function");
@@ -279,7 +279,7 @@ bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
return false; // didn't find any gadgets
if (EmitDotVerify) {
- WriteGadgetGraph(outs(), MF, Graph.get());
+ writeGadgetGraph(outs(), MF, Graph.get());
return false;
}
@@ -292,7 +292,7 @@ bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
raw_fd_ostream FileOut(FileName, FileError);
if (FileError)
errs() << FileError.message();
- WriteGadgetGraph(FileOut, MF, Graph.get());
+ writeGadgetGraph(FileOut, MF, Graph.get());
FileOut.close();
LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n");
if (EmitDotOnly)
@@ -313,7 +313,7 @@ bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
}
FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph));
} else { // Use the default greedy heuristic
- FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph));
+ FencesInserted = hardenLoadsWithHeuristic(MF, std::move(Graph));
}
if (FencesInserted > 0)
@@ -367,7 +367,7 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
// Use RDF to find all the uses of `Def`
rdf::NodeSet Uses;
- RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG));
+ RegisterRef DefReg = Def.Addr->getRegRef(DFG);
for (auto UseID : L.getAllReachedUses(DefReg, Def)) {
auto Use = DFG.addr<UseNode *>(UseID);
if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node
@@ -540,17 +540,17 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
// Returns the number of remaining gadget edges that could not be eliminated
int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
- MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */,
- MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const {
+ MachineGadgetGraph &G, EdgeSet &ElimEdges /* in, out */,
+ NodeSet &ElimNodes /* in, out */) const {
if (G.NumFences > 0) {
// Eliminate fences and CFG edges that ingress and egress the fence, as
// they are trivially mitigated.
- for (const auto &E : G.edges()) {
- const MachineGadgetGraph::Node *Dest = E.getDest();
+ for (const Edge &E : G.edges()) {
+ const Node *Dest = E.getDest();
if (isFence(Dest->getValue())) {
ElimNodes.insert(*Dest);
ElimEdges.insert(E);
- for (const auto &DE : Dest->edges())
+ for (const Edge &DE : Dest->edges())
ElimEdges.insert(DE);
}
}
@@ -558,29 +558,28 @@ int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
// Find and eliminate gadget edges that have been mitigated.
int MitigatedGadgets = 0, RemainingGadgets = 0;
- MachineGadgetGraph::NodeSet ReachableNodes{G};
- for (const auto &RootN : G.nodes()) {
+ NodeSet ReachableNodes{G};
+ for (const Node &RootN : G.nodes()) {
if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge))
continue; // skip this node if it isn't a gadget source
// Find all of the nodes that are CFG-reachable from RootN using DFS
ReachableNodes.clear();
- std::function<void(const MachineGadgetGraph::Node *, bool)>
- FindReachableNodes =
- [&](const MachineGadgetGraph::Node *N, bool FirstNode) {
- if (!FirstNode)
- ReachableNodes.insert(*N);
- for (const auto &E : N->edges()) {
- const MachineGadgetGraph::Node *Dest = E.getDest();
- if (MachineGadgetGraph::isCFGEdge(E) &&
- !ElimEdges.contains(E) && !ReachableNodes.contains(*Dest))
- FindReachableNodes(Dest, false);
- }
- };
+ std::function<void(const Node *, bool)> FindReachableNodes =
+ [&](const Node *N, bool FirstNode) {
+ if (!FirstNode)
+ ReachableNodes.insert(*N);
+ for (const Edge &E : N->edges()) {
+ const Node *Dest = E.getDest();
+ if (MachineGadgetGraph::isCFGEdge(E) && !ElimEdges.contains(E) &&
+ !ReachableNodes.contains(*Dest))
+ FindReachableNodes(Dest, false);
+ }
+ };
FindReachableNodes(&RootN, true);
// Any gadget whose sink is unreachable has been mitigated
- for (const auto &E : RootN.edges()) {
+ for (const Edge &E : RootN.edges()) {
if (MachineGadgetGraph::isGadgetEdge(E)) {
if (ReachableNodes.contains(*E.getDest())) {
// This gadget's sink is reachable
@@ -598,8 +597,8 @@ int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
std::unique_ptr<MachineGadgetGraph>
X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges(
std::unique_ptr<MachineGadgetGraph> Graph) const {
- MachineGadgetGraph::NodeSet ElimNodes{*Graph};
- MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
+ NodeSet ElimNodes{*Graph};
+ EdgeSet ElimEdges{*Graph};
int RemainingGadgets =
elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes);
if (ElimEdges.empty() && ElimNodes.empty()) {
@@ -630,11 +629,11 @@ int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin(
auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size());
auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size());
auto EdgeValues = std::make_unique<int[]>(Graph->edges_size());
- for (const auto &N : Graph->nodes()) {
+ for (const Node &N : Graph->nodes()) {
Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin());
}
Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node
- for (const auto &E : Graph->edges()) {
+ for (const Edge &E : Graph->edges()) {
Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest());
EdgeValues[Graph->getEdgeIndex(E)] = E.getValue();
}
@@ -651,74 +650,67 @@ int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin(
LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
- Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph},
- CutEdges);
+ Graph = GraphBuilder::trim(*Graph, NodeSet{*Graph}, CutEdges);
} while (true);
return FencesInserted;
}
-int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic(
+int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithHeuristic(
MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
- LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
- Graph = trimMitigatedEdges(std::move(Graph));
- LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+ // If `MF` does not have any fences, then no gadgets would have been
+ // mitigated at this point.
+ if (Graph->NumFences > 0) {
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+ Graph = trimMitigatedEdges(std::move(Graph));
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+ }
+
if (Graph->NumGadgets == 0)
return 0;
LLVM_DEBUG(dbgs() << "Cutting edges...\n");
- MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph};
- MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph};
- auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) {
- return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
- MachineGadgetGraph::isCFGEdge(E);
- };
- auto IsGadgetEdge = [&ElimEdges,
- &CutEdges](const MachineGadgetGraph::Edge &E) {
- return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
- MachineGadgetGraph::isGadgetEdge(E);
- };
-
- // FIXME: this is O(E^2), we could probably do better.
- do {
- // Find the cheapest CFG edge that will eliminate a gadget (by being
- // egress from a SOURCE node or ingress to a SINK node), and cut it.
- const MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
-
- // First, collect all gadget source and sink nodes.
- MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph};
- for (const auto &N : Graph->nodes()) {
- if (ElimNodes.contains(N))
+ EdgeSet CutEdges{*Graph};
+
+ // Begin by collecting all ingress CFG edges for each node
+ DenseMap<const Node *, SmallVector<const Edge *, 2>> IngressEdgeMap;
+ for (const Edge &E : Graph->edges())
+ if (MachineGadgetGraph::isCFGEdge(E))
+ IngressEdgeMap[E.getDest()].push_back(&E);
+
+ // For each gadget edge, make cuts that guarantee the gadget will be
+ // mitigated. A computationally efficient way to achieve this is to either:
+ // (a) cut all egress CFG edges from the gadget source, or
+ // (b) cut all ingress CFG edges to the gadget sink.
+ //
+ // Moreover, the algorithm tries not to make a cut into a loop by preferring
+ // to make a (b)-type cut if the gadget source resides at a greater loop depth
+ // than the gadget sink, or an (a)-type cut otherwise.
+ for (const Node &N : Graph->nodes()) {
+ for (const Edge &E : N.edges()) {
+ if (!MachineGadgetGraph::isGadgetEdge(E))
continue;
- for (const auto &E : N.edges()) {
- if (IsGadgetEdge(E)) {
- GadgetSources.insert(N);
- GadgetSinks.insert(*E.getDest());
- }
- }
- }
- // Next, look for the cheapest CFG edge which, when cut, is guaranteed to
- // mitigate at least one gadget by either:
- // (a) being egress from a gadget source, or
- // (b) being ingress to a gadget sink.
- for (const auto &N : Graph->nodes()) {
- if (ElimNodes.contains(N))
- continue;
- for (const auto &E : N.edges()) {
- if (IsCFGEdge(E)) {
- if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) {
- if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue())
- CheapestSoFar = &E;
- }
- }
- }
+ SmallVector<const Edge *, 2> EgressEdges;
+ SmallVector<const Edge *, 2> &IngressEdges = IngressEdgeMap[E.getDest()];
+ for (const Edge &EgressEdge : N.edges())
+ if (MachineGadgetGraph::isCFGEdge(EgressEdge))
+ EgressEdges.push_back(&EgressEdge);
+
+ int EgressCutCost = 0, IngressCutCost = 0;
+ for (const Edge *EgressEdge : EgressEdges)
+ if (!CutEdges.contains(*EgressEdge))
+ EgressCutCost += EgressEdge->getValue();
+ for (const Edge *IngressEdge : IngressEdges)
+ if (!CutEdges.contains(*IngressEdge))
+ IngressCutCost += IngressEdge->getValue();
+
+ auto &EdgesToCut =
+ IngressCutCost < EgressCutCost ? IngressEdges : EgressEdges;
+ for (const Edge *E : EdgesToCut)
+ CutEdges.insert(*E);
}
-
- assert(CheapestSoFar && "Failed to cut an edge");
- CutEdges.insert(*CheapestSoFar);
- ElimEdges.insert(*CheapestSoFar);
- } while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes));
+ }
LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
@@ -734,8 +726,8 @@ int X86LoadValueInjectionLoadHardeningPass::insertFences(
MachineFunction &MF, MachineGadgetGraph &G,
EdgeSet &CutEdges /* in, out */) const {
int FencesInserted = 0;
- for (const auto &N : G.nodes()) {
- for (const auto &E : N.edges()) {
+ for (const Node &N : G.nodes()) {
+ for (const Edge &E : N.edges()) {
if (CutEdges.contains(E)) {
MachineInstr *MI = N.getValue(), *Prev;
MachineBasicBlock *MBB; // Insert an LFENCE in this MBB
@@ -751,7 +743,7 @@ int X86LoadValueInjectionLoadHardeningPass::insertFences(
Prev = MI->getPrevNode();
// Remove all egress CFG edges from this branch because the inserted
// LFENCE prevents gadgets from crossing the branch.
- for (const auto &E : N.edges()) {
+ for (const Edge &E : N.edges()) {
if (MachineGadgetGraph::isCFGEdge(E))
CutEdges.insert(E);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
index 6e1134a25950..7b6276c1d87e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@@ -72,62 +72,39 @@ bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
++NumFunctionsConsidered;
const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
const X86InstrInfo *TII = Subtarget->getInstrInfo();
- unsigned ClobberReg = X86::NoRegister;
- std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s;
- UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer
- UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer
- UnclobberableGR64s.set(X86::RAX); // used for function return
- UnclobberableGR64s.set(X86::RDX); // used for function return
-
- // We can clobber any register allowed by the function's calling convention.
- for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR)
- UnclobberableGR64s.set(Reg);
- for (auto &Reg : X86::GR64RegClass) {
- if (!UnclobberableGR64s.test(Reg)) {
- ClobberReg = Reg;
- break;
- }
- }
-
- if (ClobberReg != X86::NoRegister) {
- LLVM_DEBUG(dbgs() << "Selected register "
- << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg)
- << " to clobber\n");
- } else {
- LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n");
- }
bool Modified = false;
for (auto &MBB : MF) {
- if (MBB.empty())
- continue;
-
- MachineInstr &MI = MBB.back();
- if (MI.getOpcode() != X86::RETQ)
- continue;
-
- if (ClobberReg != X86::NoRegister) {
- MBB.erase_instr(&MI);
- BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r))
- .addReg(ClobberReg, RegState::Define)
- .setMIFlag(MachineInstr::FrameDestroy);
- BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE));
- BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r))
- .addReg(ClobberReg);
- } else {
- // In case there is no available scratch register, we can still read from
- // RSP to assert that RSP points to a valid page. The write to RSP is
- // also helpful because it verifies that the stack's write permissions
- // are intact.
- MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
- addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)),
- X86::RSP, false, 0)
- .addImm(0)
- ->addRegisterDead(X86::EFLAGS, TRI);
+ for (auto MBBI = MBB.begin(); MBBI != MBB.end(); ++MBBI) {
+ if (MBBI->getOpcode() != X86::RETQ)
+ continue;
+
+ unsigned ClobberReg = TRI->findDeadCallerSavedReg(MBB, MBBI);
+ if (ClobberReg != X86::NoRegister) {
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(X86::POP64r))
+ .addReg(ClobberReg, RegState::Define)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(X86::JMP64r))
+ .addReg(ClobberReg);
+ MBB.erase(MBBI);
+ } else {
+ // In case there is no available scratch register, we can still read
+ // from RSP to assert that RSP points to a valid page. The write to RSP
+ // is also helpful because it verifies that the stack's write
+ // permissions are intact.
+ MachineInstr *Fence =
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(X86::LFENCE));
+ addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)),
+ X86::RSP, false, 0)
+ .addImm(0)
+ ->addRegisterDead(X86::EFLAGS, TRI);
+ }
+
+ ++NumFences;
+ Modified = true;
+ break;
}
-
- ++NumFences;
- Modified = true;
}
if (Modified)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXType.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXType.cpp
new file mode 100644
index 000000000000..85166decd8cd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -0,0 +1,351 @@
+//===- llvm/CodeGen/TileShapeInfo.h - ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Pass to transform <256 x i32> load/store
+/// <256 x i32> is bitcasted to x86_amx on X86, and AMX instruction set only
+/// provides simple operation on x86_amx. The basic elementwise operation
+/// is not supported by AMX. Since x86_amx is bitcasted from vector <256 x i32>
+/// and only AMX intrinsics can operate on the type, we need transform
+/// load/store <256 x i32> instruction to AMX load/store. If the bitcast can
+/// not be combined with load/store, we transform the bitcast to amx load/store
+/// and <256 x i32> store/load.
+//
+//===----------------------------------------------------------------------===//
+//
+#include "X86.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "lower-amx-type"
+
+static AllocaInst *CreateAllocaInst(IRBuilder<> &Builder, BasicBlock *BB) {
+ Function &F = *BB->getParent();
+ Module *M = BB->getModule();
+ const DataLayout &DL = M->getDataLayout();
+
+ Type *V256I32Ty = VectorType::get(Builder.getInt32Ty(), 256, false);
+ LLVMContext &Ctx = Builder.getContext();
+ auto AllocaAlignment = DL.getPrefTypeAlign(Type::getX86_AMXTy(Ctx));
+ unsigned AllocaAS = DL.getAllocaAddrSpace();
+ AllocaInst *AllocaRes =
+ new AllocaInst(V256I32Ty, AllocaAS, "", &F.getEntryBlock().front());
+ AllocaRes->setAlignment(AllocaAlignment);
+ return AllocaRes;
+}
+
+static std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo) {
+ Value *Row = nullptr, *Col = nullptr;
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Expect amx intrinsics");
+ case Intrinsic::x86_tileloadd64_internal:
+ case Intrinsic::x86_tilestored64_internal: {
+ Row = II->getArgOperand(0);
+ Col = II->getArgOperand(1);
+ break;
+ }
+ // a * b + c
+ // The shape depends on which operand.
+ case Intrinsic::x86_tdpbssd_internal: {
+ switch (OpNo) {
+ case 3:
+ Row = II->getArgOperand(0);
+ Col = II->getArgOperand(1);
+ break;
+ case 4:
+ Row = II->getArgOperand(0);
+ Col = II->getArgOperand(2);
+ break;
+ case 5:
+ Row = II->getArgOperand(2);
+ Col = II->getArgOperand(1);
+ break;
+ }
+ break;
+ }
+ }
+
+ return std::make_pair(Row, Col);
+}
+
+// %src = load <256 x i32>, <256 x i32>* %addr, align 64
+// %2 = bitcast <256 x i32> %src to x86_amx
+// -->
+// %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col,
+// i8* %addr, i64 %stride64)
+static void combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast) {
+ Value *Row = nullptr, *Col = nullptr;
+ Use &U = *(Bitcast->use_begin());
+ unsigned OpNo = U.getOperandNo();
+ auto *II = cast<IntrinsicInst>(U.getUser());
+ std::tie(Row, Col) = getShape(II, OpNo);
+ IRBuilder<> Builder(Bitcast);
+ // Use the maximun column as stride.
+ Value *Stride = Builder.getInt64(64);
+ Value *I8Ptr =
+ Builder.CreateBitCast(LD->getOperand(0), Builder.getInt8PtrTy());
+ std::array<Value *, 4> Args = {Row, Col, I8Ptr, Stride};
+
+ Value *NewInst =
+ Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, None, Args);
+ Bitcast->replaceAllUsesWith(NewInst);
+}
+
+// %src = call x86_amx @llvm.x86.tileloadd64.internal(%row, %col, %addr,
+// %stride);
+// %13 = bitcast x86_amx %src to <256 x i32>
+// store <256 x i32> %13, <256 x i32>* %addr, align 64
+// -->
+// call void @llvm.x86.tilestored64.internal(%row, %col, %addr,
+// %stride64, %13)
+static void combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST) {
+
+ Value *Tile = Bitcast->getOperand(0);
+ auto *II = cast<IntrinsicInst>(Tile);
+ // Tile is output from AMX intrinsic. The first operand of the
+ // intrinsic is row, the second operand of the intrinsic is column.
+ Value *Row = II->getOperand(0);
+ Value *Col = II->getOperand(1);
+ IRBuilder<> Builder(ST);
+ // Use the maximum column as stride. It must be the same with load
+ // stride.
+ Value *Stride = Builder.getInt64(64);
+ Value *I8Ptr =
+ Builder.CreateBitCast(ST->getOperand(1), Builder.getInt8PtrTy());
+ std::array<Value *, 5> Args = {Row, Col, I8Ptr, Stride, Tile};
+ Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args);
+ if (Bitcast->hasOneUse())
+ return;
+ // %13 = bitcast x86_amx %src to <256 x i32>
+ // store <256 x i32> %13, <256 x i32>* %addr, align 64
+ // %add = <256 x i32> %13, <256 x i32> %src2
+ // -->
+ // %13 = bitcast x86_amx %src to <256 x i32>
+ // call void @llvm.x86.tilestored64.internal(%row, %col, %addr,
+ // %stride64, %13)
+ // %14 = load <256 x i32>, %addr
+ // %add = <256 x i32> %14, <256 x i32> %src2
+ Value *Vec = Builder.CreateLoad(Bitcast->getType(), ST->getOperand(1));
+ Bitcast->replaceAllUsesWith(Vec);
+}
+
+// transform bitcast to <store, load> instructions.
+static bool transformBitcast(BitCastInst *Bitcast) {
+ IRBuilder<> Builder(Bitcast);
+ AllocaInst *AllocaAddr;
+ Value *I8Ptr, *Stride;
+ auto *Src = Bitcast->getOperand(0);
+
+ auto Prepare = [&]() {
+ AllocaAddr = CreateAllocaInst(Builder, Bitcast->getParent());
+ I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getInt8PtrTy());
+ Stride = Builder.getInt64(64);
+ };
+
+ if (Bitcast->getType()->isX86_AMXTy()) {
+ // %2 = bitcast <256 x i32> %src to x86_amx
+ // -->
+ // %addr = alloca <256 x i32>, align 64
+ // store <256 x i32> %src, <256 x i32>* %addr, align 64
+ // %addr2 = bitcast <256 x i32>* to i8*
+ // %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col,
+ // i8* %addr2,
+ // i64 64)
+ Use &U = *(Bitcast->use_begin());
+ unsigned OpNo = U.getOperandNo();
+ auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+ if (!II)
+ return false; // May be bitcast from x86amx to <256 x i32>.
+ Prepare();
+ Builder.CreateStore(Src, AllocaAddr);
+ // TODO we can pick an constant operand for the shape.
+ Value *Row = nullptr, *Col = nullptr;
+ std::tie(Row, Col) = getShape(II, OpNo);
+ std::array<Value *, 4> Args = {Row, Col, I8Ptr, Stride};
+ Value *NewInst = Builder.CreateIntrinsic(
+ Intrinsic::x86_tileloadd64_internal, None, Args);
+ Bitcast->replaceAllUsesWith(NewInst);
+ } else {
+ // %2 = bitcast x86_amx %src to <256 x i32>
+ // -->
+ // %addr = alloca <256 x i32>, align 64
+ // %addr2 = bitcast <256 x i32>* to i8*
+ // call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col,
+ // i8* %addr2, i64 %stride)
+ // %2 = load <256 x i32>, <256 x i32>* %addr, align 64
+ auto *II = dyn_cast<IntrinsicInst>(Src);
+ if (!II)
+ return false; // May be bitcast from <256 x i32> to x86amx.
+ Prepare();
+ Value *Row = II->getOperand(0);
+ Value *Col = II->getOperand(1);
+ std::array<Value *, 5> Args = {Row, Col, I8Ptr, Stride, Src};
+ Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args);
+ Value *NewInst = Builder.CreateLoad(Bitcast->getType(), AllocaAddr);
+ Bitcast->replaceAllUsesWith(NewInst);
+ }
+
+ return true;
+}
+
+namespace {
+class X86LowerAMXType {
+ Function &Func;
+
+public:
+ X86LowerAMXType(Function &F) : Func(F) {}
+ bool visit();
+};
+
+bool X86LowerAMXType::visit() {
+ SmallVector<Instruction *, 8> DeadInsts;
+
+ for (BasicBlock *BB : post_order(&Func)) {
+ for (BasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend();
+ II != IE;) {
+ Instruction &Inst = *II++;
+ auto *Bitcast = dyn_cast<BitCastInst>(&Inst);
+ if (!Bitcast)
+ continue;
+
+ Value *Src = Bitcast->getOperand(0);
+ if (Bitcast->getType()->isX86_AMXTy()) {
+ if (Bitcast->user_empty()) {
+ DeadInsts.push_back(Bitcast);
+ continue;
+ }
+ LoadInst *LD = dyn_cast<LoadInst>(Src);
+ if (!LD) {
+ if (transformBitcast(Bitcast))
+ DeadInsts.push_back(Bitcast);
+ continue;
+ }
+ // If load has mutli-user, duplicate a vector load.
+ // %src = load <256 x i32>, <256 x i32>* %addr, align 64
+ // %2 = bitcast <256 x i32> %src to x86_amx
+ // %add = add <256 x i32> %src, <256 x i32> %src2
+ // -->
+ // %src = load <256 x i32>, <256 x i32>* %addr, align 64
+ // %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col,
+ // i8* %addr, i64 %stride64)
+ // %add = add <256 x i32> %src, <256 x i32> %src2
+
+ // If load has one user, the load will be eliminated in DAG ISel.
+ // %src = load <256 x i32>, <256 x i32>* %addr, align 64
+ // %2 = bitcast <256 x i32> %src to x86_amx
+ // -->
+ // %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col,
+ // i8* %addr, i64 %stride64)
+ combineLoadBitcast(LD, Bitcast);
+ DeadInsts.push_back(Bitcast);
+ if (LD->hasOneUse())
+ DeadInsts.push_back(LD);
+ } else if (Src->getType()->isX86_AMXTy()) {
+ if (Bitcast->user_empty()) {
+ DeadInsts.push_back(Bitcast);
+ continue;
+ }
+ StoreInst *ST = nullptr;
+ for (auto UI = Bitcast->use_begin(), UE = Bitcast->use_end();
+ UI != UE;) {
+ Value *I = (UI++)->getUser();
+ ST = dyn_cast<StoreInst>(I);
+ if (ST)
+ break;
+ }
+ if (!ST) {
+ if (transformBitcast(Bitcast))
+ DeadInsts.push_back(Bitcast);
+ continue;
+ }
+ // If bitcast (%13) has one use, combine bitcast and store to amx store.
+ // %src = call x86_amx @llvm.x86.tileloadd64.internal(%row, %col, %addr,
+ // %stride);
+ // %13 = bitcast x86_amx %src to <256 x i32>
+ // store <256 x i32> %13, <256 x i32>* %addr, align 64
+ // -->
+ // call void @llvm.x86.tilestored64.internal(%row, %col, %addr,
+ // %stride64, %13)
+ //
+ // If bitcast (%13) has multi-use, transform as below.
+ // %13 = bitcast x86_amx %src to <256 x i32>
+ // store <256 x i32> %13, <256 x i32>* %addr, align 64
+ // %add = <256 x i32> %13, <256 x i32> %src2
+ // -->
+ // %13 = bitcast x86_amx %src to <256 x i32>
+ // call void @llvm.x86.tilestored64.internal(%row, %col, %addr,
+ // %stride64, %13)
+ // %14 = load <256 x i32>, %addr
+ // %add = <256 x i32> %14, <256 x i32> %src2
+ //
+ combineBitcastStore(Bitcast, ST);
+ // Delete user first.
+ DeadInsts.push_back(ST);
+ DeadInsts.push_back(Bitcast);
+ }
+ }
+ }
+
+ bool C = !DeadInsts.empty();
+
+ for (auto *Inst : DeadInsts)
+ Inst->eraseFromParent();
+
+ return C;
+}
+} // anonymous namespace
+
+namespace {
+
+class X86LowerAMXTypeLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ X86LowerAMXTypeLegacyPass() : FunctionPass(ID) {
+ initializeX86LowerAMXTypeLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ X86LowerAMXType LAT(F);
+ bool C = LAT.visit();
+ return C;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+};
+
+} // anonymous namespace
+
+static const char PassName[] = "Lower AMX type for load/store";
+char X86LowerAMXTypeLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
+ false)
+INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
+ false)
+
+FunctionPass *llvm::createX86LowerAMXTypePass() {
+ return new X86LowerAMXTypeLegacyPass();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
index 9ce2a4637e2e..89fa3ae3a3f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -977,20 +977,24 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
const MachineInstr &MI) {
NoAutoPaddingScope NoPadScope(*OutStreamer);
- bool Is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
- MI.getOpcode() == X86::TLS_base_addr64;
+ bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
+ MI.getOpcode() != X86::TLS_base_addr32;
+ bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
+ MI.getOpcode() == X86::TLS_base_addr64;
MCContext &Ctx = OutStreamer->getContext();
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
case X86::TLS_addr32:
case X86::TLS_addr64:
+ case X86::TLS_addrX32:
SRVK = MCSymbolRefExpr::VK_TLSGD;
break;
case X86::TLS_base_addr32:
SRVK = MCSymbolRefExpr::VK_TLSLDM;
break;
case X86::TLS_base_addr64:
+ case X86::TLS_base_addrX32:
SRVK = MCSymbolRefExpr::VK_TLSLD;
break;
default:
@@ -1010,7 +1014,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
if (Is64Bits) {
bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
- if (NeedsPadding)
+ if (NeedsPadding && Is64BitsLP64)
EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
.addReg(X86::RDI)
@@ -1079,29 +1083,30 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
}
}
-/// Return the longest nop which can be efficiently decoded for the given
-/// target cpu. 15-bytes is the longest single NOP instruction, but some
-/// platforms can't decode the longest forms efficiently.
-static unsigned maxLongNopLength(const X86Subtarget *Subtarget) {
- if (Subtarget->getFeatureBits()[X86::ProcIntelSLM])
- return 7;
- if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP])
- return 15;
- if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP])
- return 11;
- if (Subtarget->getFeatureBits()[X86::FeatureNOPL] || Subtarget->is64Bit())
- return 10;
- if (Subtarget->is32Bit())
- return 2;
- return 1;
-}
-
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
/// bytes. Return the size of nop emitted.
static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
const X86Subtarget *Subtarget) {
+ // Determine the longest nop which can be efficiently decoded for the given
+ // target cpu. 15-bytes is the longest single NOP instruction, but some
+ // platforms can't decode the longest forms efficiently.
+ unsigned MaxNopLength = 1;
+ if (Subtarget->is64Bit()) {
+ // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
+ // IndexReg/BaseReg below need to be updated.
+ if (Subtarget->hasFeature(X86::FeatureFast7ByteNOP))
+ MaxNopLength = 7;
+ else if (Subtarget->hasFeature(X86::FeatureFast15ByteNOP))
+ MaxNopLength = 15;
+ else if (Subtarget->hasFeature(X86::FeatureFast11ByteNOP))
+ MaxNopLength = 11;
+ else
+ MaxNopLength = 10;
+ } if (Subtarget->is32Bit())
+ MaxNopLength = 2;
+
// Cap a single nop emission at the profitable value for the target
- NumBytes = std::min(NumBytes, maxLongNopLength(Subtarget));
+ NumBytes = std::min(NumBytes, MaxNopLength);
unsigned NopSize;
unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
@@ -1329,7 +1334,7 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
MCInst MCI;
MCI.setOpcode(Opcode);
- for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end()))
+ for (auto &MO : drop_begin(MI.operands(), 2))
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
MCI.addOperand(MaybeOperand.getValue());
@@ -1705,7 +1710,7 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
unsigned OpCode = MI.getOperand(0).getImm();
MCInst Ret;
Ret.setOpcode(OpCode);
- for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
+ for (auto &MO : drop_begin(MI.operands()))
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
Ret.addOperand(MaybeOperand.getValue());
OutStreamer->emitInstruction(Ret, getSubtargetInfo());
@@ -1744,7 +1749,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
// Before emitting the instruction, add a comment to indicate that this is
// indeed a tail call.
OutStreamer->AddComment("TAILCALL");
- for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
+ for (auto &MO : drop_begin(MI.operands()))
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
TC.addOperand(MaybeOperand.getValue());
OutStreamer->emitInstruction(TC, getSubtargetInfo());
@@ -1779,10 +1784,7 @@ static const Constant *getConstantFromPool(const MachineInstr &MI,
if (ConstantEntry.isMachineConstantPoolEntry())
return nullptr;
- const Constant *C = ConstantEntry.Val.ConstVal;
- assert((!C || ConstantEntry.getType() == C->getType()) &&
- "Expected a constant of the same type!");
- return C;
+ return ConstantEntry.Val.ConstVal;
}
static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
@@ -2444,8 +2446,10 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
case X86::TLS_addr32:
case X86::TLS_addr64:
+ case X86::TLS_addrX32:
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
+ case X86::TLS_base_addrX32:
return LowerTlsAddr(MCInstLowering, *MI);
case X86::MOVPC32r: {
@@ -2594,6 +2598,15 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
}
return;
}
+ case X86::UBSAN_UD1:
+ EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
+ .addReg(X86::EAX)
+ .addReg(X86::EAX)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(MI->getOperand(0).getImm())
+ .addReg(X86::NoRegister));
+ return;
}
MCInst TmpInst;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PartialReduction.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86PartialReduction.cpp
index 8784a3df1773..babd923e7496 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86PartialReduction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -392,8 +392,7 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
break;
// Push incoming values to the worklist.
- for (Value *InV : PN->incoming_values())
- Worklist.push_back(InV);
+ append_range(Worklist, PN->incoming_values());
continue;
}
@@ -402,8 +401,7 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
if (BO->getOpcode() == Instruction::Add) {
// Simple case. Single use, just push its operands to the worklist.
if (BO->hasNUses(BO == Root ? 2 : 1)) {
- for (Value *Op : BO->operands())
- Worklist.push_back(Op);
+ append_range(Worklist, BO->operands());
continue;
}
@@ -426,8 +424,7 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
continue;
// The phi forms a loop with this Add, push its operands.
- for (Value *Op : BO->operands())
- Worklist.push_back(Op);
+ append_range(Worklist, BO->operands());
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp
new file mode 100644
index 000000000000..05ee6c6c8384
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -0,0 +1,265 @@
+//===-- X86PreTileConfig.cpp - Tile Register Configure---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Pass to pre-config the shape of AMX register
+/// AMX register need to be configured before use. The shape of AMX register
+/// is encoded in the 1st and 2nd machine operand of AMX pseudo instructions.
+/// The pldtilecfg is to config tile registers. It should dominator all AMX
+/// instructions. The pldtilecfg produce a virtual cfg register and the cfg
+/// register is used by all AMX instructions.
+/// This pass is to find the common dominator of all AMX instructions and
+/// insert the pldtilecfg instruction. Besides the cfg register that pldtilecfg
+/// produces is inserted as the last operand of each AMX instruction. We use
+/// this scheme to model the def-use relationship between AMX config instruction
+/// and other AMX instructions. Below is an example.
+///
+/// ----B1----
+/// / \
+/// / \
+/// B2 B3
+/// %1:tile = PTILELOADDV %2:tile = PTILELOADDV
+///
+/// is transformed to
+///
+/// B1
+/// %25:tilecfg = PLDTILECFG
+/// / \
+/// / \
+/// %1:tile = PTILELOADDV %25 %2:tile = PTILELOADDV %25
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TileShapeInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "tile-pre-config"
+
+namespace {
+
+class X86PreTileConfig : public MachineFunctionPass {
+ // context
+ MachineFunction *MF = nullptr;
+ const X86Subtarget *ST = nullptr;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+
+ MachineInstr *getTileConfigPoint();
+
+public:
+ X86PreTileConfig() : MachineFunctionPass(ID) {}
+
+ /// Return the pass name.
+ StringRef getPassName() const override {
+ return "Tile Register Pre-configure";
+ }
+
+ /// X86PreTileConfig analysis usage.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Perform register allocation.
+ bool runOnMachineFunction(MachineFunction &mf) override;
+
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char X86PreTileConfig::ID = 0;
+
+INITIALIZE_PASS_BEGIN(X86PreTileConfig, "tilepreconfig",
+ "Tile Register Configure", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig",
+ "Tile Register Configure", false, false)
+
+void X86PreTileConfig::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static Register buildConfigMI(MachineBasicBlock::iterator MI, int FrameIdx,
+ const TargetInstrInfo *TII,
+ MachineRegisterInfo *MRI,
+ const X86Subtarget *ST) {
+ auto *MBB = MI->getParent();
+
+ // FIXME: AMX should assume AVX512 enabled.
+ if (ST->hasAVX512()) {
+ // Zero stack slot.
+ Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
+ BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORDZrr), Zmm)
+ .addReg(Zmm, RegState::Undef)
+ .addReg(Zmm, RegState::Undef);
+ addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSZmr)),
+ FrameIdx)
+ .addReg(Zmm);
+ }
+
+ // build psuedo ldtilecfg
+ Register VReg = MRI->createVirtualRegister(&X86::TILECFGRegClass);
+
+ addFrameReference(
+ BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::PLDTILECFG), VReg), FrameIdx);
+
+ return VReg;
+}
+
+static ShapeT getShape(const MachineInstr &MI, MachineRegisterInfo *MRI) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected machine instruction on tile");
+ case X86::PTILELOADDV:
+ case X86::PTDPBSSDV:
+ case X86::PTILEZEROV:
+ MachineOperand &MO1 = const_cast<MachineOperand &>(MI.getOperand(1));
+ MachineOperand &MO2 = const_cast<MachineOperand &>(MI.getOperand(2));
+ ShapeT Shape(&MO1, &MO2, MRI);
+ return Shape;
+ }
+}
+
+MachineInstr *X86PreTileConfig::getTileConfigPoint() {
+ DenseMap<Register, ShapeT> PhysShapeInfo;
+ MachineBasicBlock *MBB = nullptr;
+ DenseSet<const MachineInstr *> MIs;
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register VirtReg = Register::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(VirtReg))
+ continue;
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ if (RC.getID() != X86::TILERegClassID)
+ continue;
+
+ // Find the common dominator for all MI that define tile register.
+ for (const MachineOperand &MO : MRI->def_operands(VirtReg)) {
+ if (MO.isUndef())
+ continue;
+ const auto *MI = MO.getParent();
+ // PHI or IMPLICIT_DEF instructiion.
+ // There must be a input tile before PHI instruction.
+ if (MI->isTransient())
+ continue;
+ if (!MBB)
+ MBB = const_cast<MachineBasicBlock *>(MI->getParent());
+ MBB = DomTree->findNearestCommonDominator(
+ MBB, const_cast<MachineBasicBlock *>(MI->getParent()));
+
+ // Collect the instructions that define shape.
+ ShapeT Shape = getShape(*MI, MRI);
+ std::array<MachineOperand *, 2> ShapeMOs = {Shape.getRow(),
+ Shape.getCol()};
+ for (auto *ShapeMO : ShapeMOs) {
+ Register ShapeReg = ShapeMO->getReg();
+ for (const MachineOperand &MO : MRI->def_operands(ShapeReg)) {
+ const auto *ShapeMI = MO.getParent();
+ MIs.insert(ShapeMI);
+ }
+ }
+ }
+ }
+ if (!MBB)
+ return nullptr;
+ // This pass is before the pass of eliminating PHI node, so it
+ // is in SSA form.
+ assert(MRI->isSSA() && "Not SSA form in pre-tile config");
+ // Shape def should dominate tile config MBB.
+ // def s s1 s2
+ // / \ \ /
+ // / \ \ /
+ // conf s3=phi(s1,s2)
+ // |
+ // c
+ //
+ for (const auto *MI : MIs) {
+ const MachineBasicBlock *ShapeMBB = MI->getParent();
+ if (DomTree->dominates(ShapeMBB, MBB))
+ continue;
+ if (MI->isMoveImmediate())
+ continue;
+ report_fatal_error(MF->getName() + ": Failed to config tile register, "
+ "please define the shape earlier");
+ }
+
+ // ldtilecfg should be inserted after the MI that define the shape.
+ MachineBasicBlock::reverse_instr_iterator I, E;
+ for (I = MBB->instr_rbegin(), E = MBB->instr_rend(); I != E; ++I) {
+ auto *MI = &*I;
+ if (MIs.count(MI) && (!MI->isMoveImmediate()))
+ break;
+ }
+ MachineBasicBlock::iterator MII;
+ if (I == E)
+ MII = MBB->getFirstNonPHI();
+ else {
+ MII = MachineBasicBlock::iterator(&*I);
+ MII++;
+ }
+ return &*MII;
+}
+
+static void addTileCFGUse(MachineFunction &MF, Register CFG) {
+ for (MachineBasicBlock &MBB : MF) {
+
+ // Traverse the basic block.
+ for (MachineInstr &MI : MBB) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ break;
+ case X86::PTILELOADDV:
+ case X86::PTILESTOREDV:
+ case X86::PTDPBSSDV:
+ case X86::PTILEZEROV:
+ unsigned NumOperands = MI.getNumOperands();
+ MI.RemoveOperand(NumOperands - 1);
+ MI.addOperand(MF, MachineOperand::CreateReg(CFG, false));
+ break;
+ }
+ }
+ }
+}
+
+bool X86PreTileConfig::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ ST = &mf.getSubtarget<X86Subtarget>();
+ TRI = ST->getRegisterInfo();
+ TII = mf.getSubtarget().getInstrInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+
+ MachineInstr *MI = getTileConfigPoint();
+ if (!MI)
+ return false;
+ unsigned Size = ST->getTileConfigSize();
+ Align Alignment = ST->getTileConfigAlignment();
+ int SS = mf.getFrameInfo().CreateStackObject(Size, Alignment, false);
+ Register CFG = buildConfigMI(MI, SS, TII, MRI, ST);
+ addTileCFGUse(mf, CFG);
+ return true;
+}
+
+FunctionPass *llvm::createX86PreTileConfigPass() {
+ return new X86PreTileConfig();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
index f456728cf47b..d90b4e7bdc7e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -18,6 +18,8 @@
#include "X86Subtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -664,13 +666,6 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
return true;
}
-bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
- Register Reg, int &FrameIdx) const {
- // Since X86 defines assignCalleeSavedSpillSlots which always return true
- // this function neither used nor tested.
- llvm_unreachable("Unused function on X86. Otherwise need a test case.");
-}
-
// tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
// of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
// TODO: In this case we should be really trying first to entirely eliminate
@@ -731,11 +726,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert((!needsStackRealignment(MF) ||
MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
"Return instruction can only reference SP relative frame objects");
- FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0);
+ FIOffset =
+ TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
} else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
} else {
- FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr);
+ FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
}
// LOCAL_ESCAPE uses a single offset, with no register. It only works in the
@@ -790,6 +786,55 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
+unsigned X86RegisterInfo::findDeadCallerSavedReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
+ const MachineFunction *MF = MBB.getParent();
+ if (MF->callsEHReturn())
+ return 0;
+
+ const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
+
+ if (MBBI == MBB.end())
+ return 0;
+
+ switch (MBBI->getOpcode()) {
+ default:
+ return 0;
+ case TargetOpcode::PATCHABLE_RET:
+ case X86::RET:
+ case X86::RETL:
+ case X86::RETQ:
+ case X86::RETIL:
+ case X86::RETIQ:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ SmallSet<uint16_t, 8> Uses;
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBI->getOperand(I);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
+ }
+
+ for (auto CS : AvailableRegs)
+ if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
+ return CS;
+ }
+ }
+
+ return 0;
+}
+
Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const X86FrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? FramePtr : StackPtr;
@@ -812,3 +857,79 @@ X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
StackReg = getX86SubSuperRegister(StackReg, 32);
return StackReg;
}
+
+static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
+ const MachineRegisterInfo *MRI) {
+ if (VRM->hasShape(VirtReg))
+ return VRM->getShape(VirtReg);
+
+ const MachineOperand &Def = *MRI->def_begin(VirtReg);
+ MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
+ unsigned OpCode = MI->getOpcode();
+ switch (OpCode) {
+ default:
+ llvm_unreachable("Unexpected machine instruction on tile register!");
+ break;
+ // We only collect the tile shape that is defined.
+ case X86::PTILELOADDV:
+ case X86::PTDPBSSDV:
+ case X86::PTILEZEROV:
+ MachineOperand &MO1 = MI->getOperand(1);
+ MachineOperand &MO2 = MI->getOperand(2);
+ ShapeT Shape(&MO1, &MO2, MRI);
+ VRM->assignVirt2Shape(VirtReg, Shape);
+ return Shape;
+ }
+}
+
+bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const {
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
+ VirtReg, Order, Hints, MF, VRM, Matrix);
+
+ if (RC.getID() != X86::TILERegClassID)
+ return BaseImplRetVal;
+
+ ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
+ auto AddHint = [&](MCPhysReg PhysReg) {
+ Register VReg = Matrix->getOneVReg(PhysReg);
+ if (VReg == MCRegister::NoRegister) { // Not allocated yet
+ Hints.push_back(PhysReg);
+ return;
+ }
+ ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
+ if (PhysShape == VirtShape)
+ Hints.push_back(PhysReg);
+ };
+
+ SmallSet<MCPhysReg, 4> CopyHints;
+ CopyHints.insert(Hints.begin(), Hints.end());
+ Hints.clear();
+ for (auto Hint : CopyHints) {
+ if (RC.contains(Hint) && !MRI->isReserved(Hint))
+ AddHint(Hint);
+ }
+ for (MCPhysReg PhysReg : Order) {
+ if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
+ !MRI->isReserved(PhysReg))
+ AddHint(PhysReg);
+ }
+
+#define DEBUG_TYPE "tile-hint"
+ LLVM_DEBUG({
+ dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
+ for (auto Hint : Hints) {
+ dbgs() << "tmm" << Hint << ",";
+ }
+ dbgs() << "\n";
+ });
+#undef DEBUG_TYPE
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h
index 3435c0a10b04..7fd10ddd1a15 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -121,13 +121,16 @@ public:
bool canRealignStack(const MachineFunction &MF) const override;
- bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
- int &FrameIdx) const override;
-
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
+ /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+ /// when it reaches the "return" instruction. We can then pop a stack object
+ /// to this register without worry about clobbering it.
+ unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) const;
+
// Debug information queries.
Register getFrameRegister(const MachineFunction &MF) const override;
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const;
@@ -141,6 +144,11 @@ public:
Register getFramePtr() const { return FramePtr; }
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
+
+ bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF, const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const override;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
index 8de5b94bbffa..75cbd4e1cff1 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -265,6 +265,9 @@ let SubRegIndices = [sub_ymm] in {
}
}
+// Tile config registers.
+def TMMCFG: X86Reg<"tmmcfg", 0>;
+
// Tile "registers".
def TMM0: X86Reg<"tmm0", 0>;
def TMM1: X86Reg<"tmm1", 1>;
@@ -633,6 +636,11 @@ def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
// Tiles
-let isAllocatable = 0 in
-def TILE : RegisterClass<"X86", [untyped], 0,
+let CopyCost = -1 in // Don't allow copying of tile registers
+def TILE : RegisterClass<"X86", [x86amx], 8192,
(sequence "TMM%u", 0, 7)> {let Size = 8192;}
+def TILECFG : RegisterClass<"X86", [untyped], 512, (add TMMCFG)> {
+ let CopyCost = -1; // Don't allow copying of tile config registers.
+ let isAllocatable = 1;
+ let Size = 512;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index ce8d1d464da9..e76908ef4bc4 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
#define DEBUG_TYPE "x86-selectiondag-info"
+static cl::opt<bool>
+ UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
+ cl::desc("Use fast short rep mov in memcpy lowering"));
+
bool X86SelectionDAGInfo::isBaseRegConflictPossible(
SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
// We cannot use TRI->hasBasePointer() until *after* we select all basic
@@ -306,6 +310,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
const X86Subtarget &Subtarget =
DAG.getMachineFunction().getSubtarget<X86Subtarget>();
+ // If enabled and available, use fast short rep mov.
+ if (UseFSRMForMemcpy && Subtarget.hasFSRM())
+ return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
+
/// Handle constant sizes,
if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
return emitConstantSizeRepmov(
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index de528299654c..14a3fea240e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -293,55 +293,4 @@ void DecodeVPPERMMask(const Constant *C, unsigned Width,
}
}
-void DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
- SmallVectorImpl<int> &ShuffleMask) {
- assert((Width == 128 || Width == 256 || Width == 512) &&
- C->getType()->getPrimitiveSizeInBits() >= Width &&
- "Unexpected vector size.");
- assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
- "Unexpected vector element size.");
-
- // The shuffle mask requires elements the same size as the target.
- APInt UndefElts;
- SmallVector<uint64_t, 64> RawMask;
- if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
- return;
-
- unsigned NumElts = Width / ElSize;
-
- for (unsigned i = 0; i != NumElts; ++i) {
- if (UndefElts[i]) {
- ShuffleMask.push_back(SM_SentinelUndef);
- continue;
- }
- int Index = RawMask[i] & (NumElts - 1);
- ShuffleMask.push_back(Index);
- }
-}
-
-void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
- SmallVectorImpl<int> &ShuffleMask) {
- assert((Width == 128 || Width == 256 || Width == 512) &&
- C->getType()->getPrimitiveSizeInBits() >= Width &&
- "Unexpected vector size.");
- assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
- "Unexpected vector element size.");
-
- // The shuffle mask requires elements the same size as the target.
- APInt UndefElts;
- SmallVector<uint64_t, 64> RawMask;
- if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
- return;
-
- unsigned NumElts = Width / ElSize;
-
- for (unsigned i = 0; i != NumElts; ++i) {
- if (UndefElts[i]) {
- ShuffleMask.push_back(SM_SentinelUndef);
- continue;
- }
- int Index = RawMask[i] & (NumElts*2 - 1);
- ShuffleMask.push_back(Index);
- }
-}
-} // llvm namespace
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
index 51229a69a626..77236f6aac9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
@@ -38,14 +38,6 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
void DecodeVPPERMMask(const Constant *C, unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
-/// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
- SmallVectorImpl<int> &ShuffleMask);
-
-/// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
- SmallVectorImpl<int> &ShuffleMask);
-
} // llvm namespace
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
index 7e91c37367d2..d57871130b0c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
@@ -161,6 +161,7 @@ bool X86SpeculativeExecutionSideEffectSuppression::runOnMachineFunction(
// This branch requires adding an LFENCE.
if (!PrevInstIsLFENCE) {
+ assert(FirstTerminator && "Unknown terminator instruction");
BuildMI(MBB, FirstTerminator, DebugLoc(), TII->get(X86::LFENCE));
NumLFENCEsInserted++;
Modified = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index fe5b9a05f811..aa73d4bce65a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -184,7 +184,7 @@ private:
MachineBasicBlock::iterator InsertPt, DebugLoc Loc);
void restoreEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
- unsigned OFReg);
+ Register Reg);
void mergePredStateIntoSP(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
@@ -200,8 +200,8 @@ private:
MachineInstr *
sinkPostLoadHardenedInst(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
- bool canHardenRegister(unsigned Reg);
- unsigned hardenValueInRegister(unsigned Reg, MachineBasicBlock &MBB,
+ bool canHardenRegister(Register Reg);
+ unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt,
DebugLoc Loc);
unsigned hardenPostLoad(MachineInstr &MI);
@@ -1520,7 +1520,7 @@ unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
/// reliably lower.
void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
- unsigned Reg) {
+ Register Reg) {
BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
++NumInstsInserted;
}
@@ -1842,8 +1842,7 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
// just bail. Also check that its register class is one of the ones we
// can harden.
Register UseDefReg = UseMI.getOperand(0).getReg();
- if (!Register::isVirtualRegister(UseDefReg) ||
- !canHardenRegister(UseDefReg))
+ if (!UseDefReg.isVirtual() || !canHardenRegister(UseDefReg))
return {};
SingleUseMI = &UseMI;
@@ -1865,7 +1864,7 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
return MI;
}
-bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) {
+bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
auto *RC = MRI->getRegClass(Reg);
int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
if (RegBytes > 8)
@@ -1909,10 +1908,10 @@ bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) {
/// The new, hardened virtual register is returned. It will have the same
/// register class as `Reg`.
unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
- unsigned Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
+ Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
DebugLoc Loc) {
assert(canHardenRegister(Reg) && "Cannot harden this register!");
- assert(Register::isVirtualRegister(Reg) && "Cannot harden a physical register!");
+ assert(Reg.isVirtual() && "Cannot harden a physical register!");
auto *RC = MRI->getRegClass(Reg);
int Bytes = TRI->getRegSizeInBits(*RC) / 8;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
index 975cbabb30fd..c95213c3539d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -166,6 +166,10 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
}
+ // 32-bit ELF references GlobalAddress directly in static relocation model.
+ // We cannot use MO_GOT because EBX may not be set up.
+ if (TM.getRelocationModel() == Reloc::Static)
+ return X86II::MO_NO_FLAG;
return X86II::MO_GOT;
}
@@ -202,6 +206,9 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
(!F && M.getRtLibUseGOT())) &&
is64Bit())
return X86II::MO_GOTPCREL;
+ // Reference ExternalSymbol directly in static relocation model.
+ if (!is64Bit() && !GV && TM.getRelocationModel() == Reloc::Static)
+ return X86II::MO_NO_FLAG;
return X86II::MO_PLT;
}
@@ -227,39 +234,22 @@ bool X86Subtarget::isLegalToCallImmediateAddr() const {
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
}
-void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
- std::string CPUName = std::string(CPU);
- if (CPUName.empty())
- CPUName = "generic";
-
- std::string FullFS = std::string(FS);
- if (In64BitMode) {
- // SSE2 should default to enabled in 64-bit mode, but can be turned off
- // explicitly.
- if (!FullFS.empty())
- FullFS = "+sse2," + FullFS;
- else
- FullFS = "+sse2";
-
- // If no CPU was specified, enable 64bit feature to satisy later check.
- if (CPUName == "generic") {
- if (!FullFS.empty())
- FullFS = "+64bit," + FullFS;
- else
- FullFS = "+64bit";
- }
- }
+void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
+ StringRef FS) {
+ if (CPU.empty())
+ CPU = "generic";
- // LAHF/SAHF are always supported in non-64-bit mode.
- if (!In64BitMode) {
- if (!FullFS.empty())
- FullFS = "+sahf," + FullFS;
- else
- FullFS = "+sahf";
- }
+ if (TuneCPU.empty())
+ TuneCPU = "i586"; // FIXME: "generic" is more modern than llc tests expect.
+
+ std::string FullFS = X86_MC::ParseX86Triple(TargetTriple);
+ assert(!FullFS.empty() && "Failed to parse X86 triple");
+
+ if (!FS.empty())
+ FullFS = (Twine(FullFS) + "," + FS).str();
// Parse features string and set the CPU.
- ParseSubtargetFeatures(CPUName, FullFS);
+ ParseSubtargetFeatures(CPU, TuneCPU, FullFS);
// All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
// 16-bytes and under that are reasonably fast. These features were
@@ -268,17 +258,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (hasSSE42() || hasSSE4A())
IsUAMem16Slow = false;
- // It's important to keep the MCSubtargetInfo feature bits in sync with
- // target data structure which is shared with MC code emitter, etc.
- if (In64BitMode)
- ToggleFeature(X86::Mode64Bit);
- else if (In32BitMode)
- ToggleFeature(X86::Mode32Bit);
- else if (In16BitMode)
- ToggleFeature(X86::Mode16Bit);
- else
- llvm_unreachable("Not 16-bit, 32-bit or 64-bit mode!");
-
LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
<< ", 3DNowLevel " << X863DNowLevel << ", 64bit "
<< HasX86_64 << "\n");
@@ -286,25 +265,15 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
report_fatal_error("64-bit code requested on a subtarget that doesn't "
"support it!");
- // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
- // 32 and 64 bit) and for all 64-bit targets.
+ // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and for all
+ // 64-bit targets. On Solaris (32-bit), stack alignment is 4 bytes
+ // following the i386 psABI, while on Illumos it is always 16 bytes.
if (StackAlignOverride)
stackAlignment = *StackAlignOverride;
- else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
- isTargetKFreeBSD() || In64BitMode)
+ else if (isTargetDarwin() || isTargetLinux() || isTargetKFreeBSD() ||
+ In64BitMode)
stackAlignment = Align(16);
- // Some CPUs have more overhead for gather. The specified overhead is relative
- // to the Load operation. "2" is the number provided by Intel architects. This
- // parameter is used for cost estimation of Gather Op and comparison with
- // other alternatives.
- // TODO: Remove the explicit hasAVX512()?, That would mean we would only
- // enable gather with a -march.
- if (hasAVX512() || (hasAVX2() && hasFastGather()))
- GatherOverhead = 2;
- if (hasAVX512())
- ScatterOverhead = 2;
-
// Consume the vector width attribute or apply any target specific limit.
if (PreferVectorWidthOverride)
PreferVectorWidth = PreferVectorWidthOverride;
@@ -315,27 +284,24 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
}
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef TuneCPU,
StringRef FS) {
- initSubtargetFeatures(CPU, FS);
+ initSubtargetFeatures(CPU, TuneCPU, FS);
return *this;
}
-X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
- const X86TargetMachine &TM,
+X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, const X86TargetMachine &TM,
MaybeAlign StackAlignOverride,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth)
- : X86GenSubtargetInfo(TT, CPU, FS), PICStyle(PICStyles::Style::None),
- TM(TM), TargetTriple(TT), StackAlignOverride(StackAlignOverride),
+ : X86GenSubtargetInfo(TT, CPU, TuneCPU, FS),
+ PICStyle(PICStyles::Style::None), TM(TM), TargetTriple(TT),
+ StackAlignOverride(StackAlignOverride),
PreferVectorWidthOverride(PreferVectorWidthOverride),
RequiredVectorWidth(RequiredVectorWidth),
- In64BitMode(TargetTriple.getArch() == Triple::x86_64),
- In32BitMode(TargetTriple.getArch() == Triple::x86 &&
- TargetTriple.getEnvironment() != Triple::CODE16),
- In16BitMode(TargetTriple.getArch() == Triple::x86 &&
- TargetTriple.getEnvironment() == Triple::CODE16),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(*this, getStackAlignment()) {
+ InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)),
+ TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
if (!isPositionIndependent())
setPICStyle(PICStyles::Style::None);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
index de45d357e3c2..fa2622333d60 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
@@ -50,7 +50,6 @@ enum class Style {
} // end namespace PICStyles
class X86Subtarget final : public X86GenSubtargetInfo {
-public:
// NOTE: Do not add anything new to this list. Coarse, CPU name based flags
// are not a good idea. We should be migrating away from these.
enum X86ProcFamilyEnum {
@@ -59,7 +58,6 @@ public:
IntelSLM
};
-protected:
enum X86SSEEnum {
NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
};
@@ -191,8 +189,8 @@ protected:
/// Processor has RDSEED instructions.
bool HasRDSEED = false;
- /// Processor has LAHF/SAHF instructions.
- bool HasLAHFSAHF = false;
+ /// Processor has LAHF/SAHF instructions in 64-bit mode.
+ bool HasLAHFSAHF64 = false;
/// Processor has MONITORX/MWAITX instructions.
bool HasMWAITX = false;
@@ -304,6 +302,9 @@ protected:
/// True if the processor has enhanced REP MOVSB/STOSB.
bool HasERMSB = false;
+ /// True if the processor has fast short REP MOV.
+ bool HasFSRM = false;
+
/// True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions = false;
@@ -354,6 +355,9 @@ protected:
/// Processor has AVX-512 Vector Neural Network Instructions
bool HasVNNI = false;
+ /// Processor has AVX Vector Neural Network Instructions
+ bool HasAVXVNNI = false;
+
/// Processor has AVX-512 bfloat16 floating-point extensions
bool HasBF16 = false;
@@ -366,9 +370,6 @@ protected:
/// Processor has AVX-512 vp2intersect instructions
bool HasVP2INTERSECT = false;
- /// Deprecated flag for MPX instructions.
- bool DeprecatedHasMPX = false;
-
/// Processor supports CET SHSTK - Control-Flow Enforcement Technology
/// using Shadow Stack
bool HasSHSTK = false;
@@ -397,6 +398,15 @@ protected:
/// Processor supports PCONFIG instruction
bool HasPCONFIG = false;
+ /// Processor support key locker instructions
+ bool HasKL = false;
+
+ /// Processor support key locker wide instructions
+ bool HasWIDEKL = false;
+
+ /// Processor supports HRESET instruction
+ bool HasHRESET = false;
+
/// Processor supports SERIALIZE instruction
bool HasSERIALIZE = false;
@@ -408,6 +418,9 @@ protected:
bool HasAMXBF16 = false;
bool HasAMXINT8 = false;
+ /// Processor supports User Level Interrupt instructions
+ bool HasUINTR = false;
+
/// Processor has a single uop BEXTR implementation.
bool HasFastBEXTR = false;
@@ -459,6 +472,8 @@ protected:
/// entry to the function and which must be maintained by every function.
Align stackAlignment = Align(4);
+ Align TileConfigAlignment = Align(4);
+
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
// FIXME: this is a known good value for Yonah. How about others?
@@ -473,9 +488,6 @@ protected:
/// Indicates target prefers AVX512 mask registers.
bool PreferMaskRegisters = false;
- /// Threeway branch is profitable in this subtarget.
- bool ThreewayBranchProfitable = false;
-
/// Use Goldmont specific floating point div/sqrt costs.
bool UseGLMDivSqrtCosts = false;
@@ -503,17 +515,13 @@ private:
unsigned RequiredVectorWidth;
/// True if compiling for 64-bit, false for 16-bit or 32-bit.
- bool In64BitMode;
+ bool In64BitMode = false;
/// True if compiling for 32-bit, false for 16-bit or 64-bit.
- bool In32BitMode;
+ bool In32BitMode = false;
/// True if compiling for 16-bit, false for 32-bit or 64-bit.
- bool In16BitMode;
-
- /// Contains the Overhead of gather\scatter instructions
- int GatherOverhead = 1024;
- int ScatterOverhead = 1024;
+ bool In16BitMode = false;
X86SelectionDAGInfo TSInfo;
// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
@@ -526,7 +534,7 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
- X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth);
@@ -549,6 +557,9 @@ public:
return &getInstrInfo()->getRegisterInfo();
}
+ unsigned getTileConfigSize() const { return 64; }
+ Align getTileConfigAlignment() const { return TileConfigAlignment; }
+
/// Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
@@ -560,7 +571,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
/// Methods used by Global ISel
const CallLowering *getCallLowering() const override;
@@ -571,8 +582,10 @@ public:
private:
/// Initialize the full set of dependencies so we can use an initializer
/// list for X86Subtarget.
- X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
- void initSubtargetFeatures(StringRef CPU, StringRef FS);
+ X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
+ StringRef TuneCPU,
+ StringRef FS);
+ void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
public:
/// Is this x86_64? (disregarding specific ABI / programming model)
@@ -671,7 +684,7 @@ public:
return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
}
bool hasRDSEED() const { return HasRDSEED; }
- bool hasLAHFSAHF() const { return HasLAHFSAHF; }
+ bool hasLAHFSAHF() const { return HasLAHFSAHF64 || !is64Bit(); }
bool hasMWAITX() const { return HasMWAITX; }
bool hasCLZERO() const { return HasCLZERO; }
bool hasCLDEMOTE() const { return HasCLDEMOTE; }
@@ -683,8 +696,6 @@ public:
bool isPMADDWDSlow() const { return IsPMADDWDSlow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
- int getGatherOverhead() const { return GatherOverhead; }
- int getScatterOverhead() const { return ScatterOverhead; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); }
bool useLeaForSP() const { return UseLeaForSP; }
@@ -706,6 +717,7 @@ public:
bool hasMacroFusion() const { return HasMacroFusion; }
bool hasBranchFusion() const { return HasBranchFusion; }
bool hasERMSB() const { return HasERMSB; }
+ bool hasFSRM() const { return HasFSRM; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
@@ -734,15 +746,19 @@ public:
bool hasWAITPKG() const { return HasWAITPKG; }
bool hasPCONFIG() const { return HasPCONFIG; }
bool hasSGX() const { return HasSGX; }
- bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
bool hasINVPCID() const { return HasINVPCID; }
bool hasENQCMD() const { return HasENQCMD; }
+ bool hasKL() const { return HasKL; }
+ bool hasWIDEKL() const { return HasWIDEKL; }
+ bool hasHRESET() const { return HasHRESET; }
bool hasSERIALIZE() const { return HasSERIALIZE; }
bool hasTSXLDTRK() const { return HasTSXLDTRK; }
+ bool hasUINTR() const { return HasUINTR; }
bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
bool useRetpolineIndirectBranches() const {
return UseRetpolineIndirectBranches;
}
+ bool hasAVXVNNI() const { return HasAVXVNNI; }
bool hasAMXTILE() const { return HasAMXTILE; }
bool hasAMXBF16() const { return HasAMXBF16; }
bool hasAMXINT8() const { return HasAMXINT8; }
@@ -792,8 +808,6 @@ public:
bool isXRaySupported() const override { return is64Bit(); }
- X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }
-
/// TODO: to be removed later and replaced with suitable properties
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
index 7344116e14af..c8f76c210a3f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -56,17 +56,13 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
-static cl::opt<bool> EnableCondBrFoldingPass("x86-condbr-folding",
- cl::desc("Enable the conditional branch "
- "folding pass"),
- cl::init(false), cl::Hidden);
-
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86TargetMachine> X(getTheX86_32Target());
RegisterTargetMachine<X86TargetMachine> Y(getTheX86_64Target());
PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeX86LowerAMXTypeLegacyPassPass(PR);
initializeGlobalISel(PR);
initializeWinEHStatePassPass(PR);
initializeFixupBWInstPassPass(PR);
@@ -76,6 +72,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86FixupSetCCPassPass(PR);
initializeX86CallFrameOptimizationPass(PR);
initializeX86CmovConverterPassPass(PR);
+ initializeX86TileConfigPass(PR);
initializeX86ExpandPseudoPass(PR);
initializeX86ExecutionDomainFixPass(PR);
initializeX86DomainReassignmentPass(PR);
@@ -84,11 +81,11 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86SpeculativeLoadHardeningPassPass(PR);
initializeX86SpeculativeExecutionSideEffectSuppressionPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
- initializeX86CondBrFoldingPassPass(PR);
initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
initializeX86LoadValueInjectionRetHardeningPassPass(PR);
initializeX86OptimizeLEAPassPass(PR);
initializeX86PartialReductionPass(PR);
+ initializePseudoProbeInserterPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -239,43 +236,30 @@ X86TargetMachine::~X86TargetMachine() = default;
const X86Subtarget *
X86TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute TuneAttr = F.getFnAttribute("tune-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- StringRef CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString()
- : (StringRef)TargetCPU;
- StringRef FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString()
- : (StringRef)TargetFS;
+ StringRef CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString() : (StringRef)TargetCPU;
+ StringRef TuneCPU =
+ TuneAttr.isValid() ? TuneAttr.getValueAsString() : (StringRef)CPU;
+ StringRef FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString() : (StringRef)TargetFS;
SmallString<512> Key;
- Key.reserve(CPU.size() + FS.size());
- Key += CPU;
- Key += FS;
-
- // FIXME: This is related to the code below to reset the target options,
- // we need to know whether or not the soft float flag is set on the
- // function before we can generate a subtarget. We also need to use
- // it as a key for the subtarget since that can be the only difference
- // between two functions.
- bool SoftFloat =
- F.getFnAttribute("use-soft-float").getValueAsString() == "true";
- // If the soft float attribute is set on the function turn on the soft float
- // subtarget feature.
- if (SoftFloat)
- Key += FS.empty() ? "+soft-float" : ",+soft-float";
-
- // Keep track of the key width after all features are added so we can extract
- // the feature string out later.
- unsigned CPUFSWidth = Key.size();
+ // The additions here are ordered so that the definitely short strings are
+ // added first so we won't exceed the small size. We append the
+ // much longer FS string at the end so that we only heap allocate at most
+ // one time.
// Extract prefer-vector-width attribute.
unsigned PreferVectorWidthOverride = 0;
- if (F.hasFnAttribute("prefer-vector-width")) {
- StringRef Val = F.getFnAttribute("prefer-vector-width").getValueAsString();
+ Attribute PreferVecWidthAttr = F.getFnAttribute("prefer-vector-width");
+ if (PreferVecWidthAttr.isValid()) {
+ StringRef Val = PreferVecWidthAttr.getValueAsString();
unsigned Width;
if (!Val.getAsInteger(0, Width)) {
- Key += ",prefer-vector-width=";
+ Key += "prefer-vector-width=";
Key += Val;
PreferVectorWidthOverride = Width;
}
@@ -283,21 +267,44 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
// Extract min-legal-vector-width attribute.
unsigned RequiredVectorWidth = UINT32_MAX;
- if (F.hasFnAttribute("min-legal-vector-width")) {
- StringRef Val =
- F.getFnAttribute("min-legal-vector-width").getValueAsString();
+ Attribute MinLegalVecWidthAttr = F.getFnAttribute("min-legal-vector-width");
+ if (MinLegalVecWidthAttr.isValid()) {
+ StringRef Val = MinLegalVecWidthAttr.getValueAsString();
unsigned Width;
if (!Val.getAsInteger(0, Width)) {
- Key += ",min-legal-vector-width=";
+ Key += "min-legal-vector-width=";
Key += Val;
RequiredVectorWidth = Width;
}
}
- // Extracted here so that we make sure there is backing for the StringRef. If
- // we assigned earlier, its possible the SmallString reallocated leaving a
- // dangling StringRef.
- FS = Key.slice(CPU.size(), CPUFSWidth);
+ // Add CPU to the Key.
+ Key += CPU;
+
+ // Add tune CPU to the Key.
+ Key += "tune=";
+ Key += TuneCPU;
+
+ // Keep track of the start of the feature portion of the string.
+ unsigned FSStart = Key.size();
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ bool SoftFloat =
+ F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+ // If the soft float attribute is set on the function turn on the soft float
+ // subtarget feature.
+ if (SoftFloat)
+ Key += FS.empty() ? "+soft-float" : "+soft-float,";
+
+ Key += FS;
+
+ // We may have added +soft-float to the features so move the StringRef to
+ // point to the full string in the Key.
+ FS = Key.substr(FSStart);
auto &I = SubtargetMap[Key];
if (!I) {
@@ -306,13 +313,21 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = std::make_unique<X86Subtarget>(
- TargetTriple, CPU, FS, *this,
+ TargetTriple, CPU, TuneCPU, FS, *this,
MaybeAlign(Options.StackAlignmentOverride), PreferVectorWidthOverride,
RequiredVectorWidth);
}
return I.get();
}
+bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ assert(SrcAS != DestAS && "Expected different address spaces!");
+ if (getPointerSize(SrcAS) != getPointerSize(DestAS))
+ return false;
+ return SrcAS < 256 && DestAS < 256;
+}
+
//===----------------------------------------------------------------------===//
// X86 TTI query.
//===----------------------------------------------------------------------===//
@@ -366,6 +381,7 @@ public:
void addPreEmitPass() override;
void addPreEmitPass2() override;
void addPreSched2() override;
+ bool addPreRewrite() override;
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
@@ -394,6 +410,7 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
void X86PassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
+ addPass(createX86LowerAMXTypePass());
TargetPassConfig::addIRPasses();
@@ -432,7 +449,7 @@ bool X86PassConfig::addInstSelector() {
}
bool X86PassConfig::addIRTranslator() {
- addPass(new IRTranslator());
+ addPass(new IRTranslator(getOptLevel()));
return false;
}
@@ -452,8 +469,6 @@ bool X86PassConfig::addGlobalInstructionSelect() {
}
bool X86PassConfig::addILPOpts() {
- if (EnableCondBrFoldingPass)
- addPass(createX86CondBrFolding());
addPass(&EarlyIfConverterID);
if (EnableMachineCombinerPass)
addPass(&MachineCombinerID);
@@ -481,7 +496,12 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86SpeculativeLoadHardeningPass());
addPass(createX86FlagsCopyLoweringPass());
addPass(createX86WinAllocaExpander());
+
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(createX86PreTileConfigPass());
+ }
}
+
void X86PassConfig::addMachineSSAOptimization() {
addPass(createX86DomainReassignmentPass());
TargetPassConfig::addMachineSSAOptimization();
@@ -554,6 +574,11 @@ void X86PassConfig::addPreEmitPass2() {
addPass(createX86LoadValueInjectionRetHardeningPass());
}
+bool X86PassConfig::addPreRewrite() {
+ addPass(createX86TileConfigPass());
+ return true;
+}
+
std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
return getStandardCSEConfigForOpt(TM->getOptLevel());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h
index 8d98474a39c0..69d7e48b8977 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h
@@ -54,6 +54,8 @@ public:
}
bool isJIT() const { return IsJIT; }
+
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index 2b48baccc01f..b88ad5a478f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -7,16 +7,8 @@
//===----------------------------------------------------------------------===//
#include "X86TargetObjectFile.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h
index acea772eb036..f4bf52c83771 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -36,7 +36,7 @@ namespace llvm {
MCStreamer &Streamer) const override;
};
- /// This implemenatation is used for X86 ELF targets that don't
+ /// This implementation is used for X86 ELF targets that don't
/// have a further specialization.
class X86ELFTargetObjectFile : public TargetLoweringObjectFileELF {
public:
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index cc18e55656ef..71455237fb61 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -232,16 +232,16 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
bool Op2Signed = false;
unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed);
- bool signedMode = Op1Signed | Op2Signed;
+ bool SignedMode = Op1Signed || Op2Signed;
unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
if (OpMinSize <= 7)
return LT.first * 3; // pmullw/sext
- if (!signedMode && OpMinSize <= 8)
+ if (!SignedMode && OpMinSize <= 8)
return LT.first * 3; // pmullw/zext
if (OpMinSize <= 15)
return LT.first * 5; // pmullw/pmulhw/pshuf
- if (!signedMode && OpMinSize <= 16)
+ if (!SignedMode && OpMinSize <= 16)
return LT.first * 5; // pmullw/pmulhw/pshuf
}
@@ -321,6 +321,11 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SHL, MVT::v64i8, 4 }, // psllw + pand.
{ ISD::SRL, MVT::v64i8, 4 }, // psrlw + pand.
{ ISD::SRA, MVT::v64i8, 8 }, // psrlw, pand, pxor, psubb.
+
+ { ISD::SDIV, MVT::v16i32, 6 }, // pmuludq sequence
+ { ISD::SREM, MVT::v16i32, 8 }, // pmuludq+mul+sub sequence
+ { ISD::UDIV, MVT::v16i32, 5 }, // pmuludq sequence
+ { ISD::UREM, MVT::v16i32, 7 }, // pmuludq+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -336,6 +341,11 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
+
+ { ISD::SDIV, MVT::v8i32, 6 }, // pmuludq sequence
+ { ISD::SREM, MVT::v8i32, 8 }, // pmuludq+mul+sub sequence
+ { ISD::UDIV, MVT::v8i32, 5 }, // pmuludq sequence
+ { ISD::UREM, MVT::v8i32, 7 }, // pmuludq+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -353,6 +363,15 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split.
{ ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split.
{ ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split.
+
+ { ISD::SDIV, MVT::v8i32, 12+2 }, // 2*pmuludq sequence + split.
+ { ISD::SREM, MVT::v8i32, 16+2 }, // 2*pmuludq+mul+sub sequence + split.
+ { ISD::SDIV, MVT::v4i32, 6 }, // pmuludq sequence
+ { ISD::SREM, MVT::v4i32, 8 }, // pmuludq+mul+sub sequence
+ { ISD::UDIV, MVT::v8i32, 10+2 }, // 2*pmuludq sequence + split.
+ { ISD::UREM, MVT::v8i32, 14+2 }, // 2*pmuludq+mul+sub sequence + split.
+ { ISD::UDIV, MVT::v4i32, 5 }, // pmuludq sequence
+ { ISD::UREM, MVT::v4i32, 7 }, // pmuludq+mul+sub sequence
};
// XOP has faster vXi8 shifts.
@@ -1109,6 +1128,9 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
{TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w
{TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, // vpermt2w
{TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
+
+ {TTI::SK_Select, MVT::v32i16, 1}, // vblendmw
+ {TTI::SK_Select, MVT::v64i8, 1}, // vblendmb
};
if (ST->hasBWI())
@@ -1162,6 +1184,13 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
{TTI::SK_PermuteSingleSrc, MVT::v64i8, 14},
{TTI::SK_PermuteTwoSrc, MVT::v32i16, 42},
{TTI::SK_PermuteTwoSrc, MVT::v64i8, 42},
+
+ {TTI::SK_Select, MVT::v32i16, 1}, // vpternlogq
+ {TTI::SK_Select, MVT::v64i8, 1}, // vpternlogq
+ {TTI::SK_Select, MVT::v8f64, 1}, // vblendmpd
+ {TTI::SK_Select, MVT::v16f32, 1}, // vblendmps
+ {TTI::SK_Select, MVT::v8i64, 1}, // vblendmq
+ {TTI::SK_Select, MVT::v16i32, 1}, // vblendmd
};
if (ST->hasAVX512())
@@ -1367,6 +1396,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
}
int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -1988,7 +2018,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// The function getSimpleVT only handles simple value types.
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind));
+ return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind));
MVT SimpleSrcTy = SrcTy.getSimpleVT();
MVT SimpleDstTy = DstTy.getSimpleVT();
@@ -2049,15 +2079,18 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return AdjustCost(Entry->Cost);
}
- return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
+ return AdjustCost(
+ BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
}
int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
+ I);
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
@@ -2241,7 +2274,7 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
return LT.first * (ExtraCost + Entry->Cost);
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
@@ -2255,6 +2288,9 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
// CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll
// CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll
// CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll
+
+ // TODO: Overflow intrinsics (*ADDO, *SUBO, *MULO) with vector types are not
+ // specialized in these tables yet.
static const CostTblEntry AVX512CDCostTbl[] = {
{ ISD::CTLZ, MVT::v8i64, 1 },
{ ISD::CTLZ, MVT::v16i32, 1 },
@@ -2270,6 +2306,8 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::CTLZ, MVT::v16i8, 4 },
};
static const CostTblEntry AVX512BWCostTbl[] = {
+ { ISD::ABS, MVT::v32i16, 1 },
+ { ISD::ABS, MVT::v64i8, 1 },
{ ISD::BITREVERSE, MVT::v8i64, 5 },
{ ISD::BITREVERSE, MVT::v16i32, 5 },
{ ISD::BITREVERSE, MVT::v32i16, 5 },
@@ -2288,14 +2326,28 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::CTTZ, MVT::v64i8, 9 },
{ ISD::SADDSAT, MVT::v32i16, 1 },
{ ISD::SADDSAT, MVT::v64i8, 1 },
+ { ISD::SMAX, MVT::v32i16, 1 },
+ { ISD::SMAX, MVT::v64i8, 1 },
+ { ISD::SMIN, MVT::v32i16, 1 },
+ { ISD::SMIN, MVT::v64i8, 1 },
{ ISD::SSUBSAT, MVT::v32i16, 1 },
{ ISD::SSUBSAT, MVT::v64i8, 1 },
{ ISD::UADDSAT, MVT::v32i16, 1 },
{ ISD::UADDSAT, MVT::v64i8, 1 },
+ { ISD::UMAX, MVT::v32i16, 1 },
+ { ISD::UMAX, MVT::v64i8, 1 },
+ { ISD::UMIN, MVT::v32i16, 1 },
+ { ISD::UMIN, MVT::v64i8, 1 },
{ ISD::USUBSAT, MVT::v32i16, 1 },
{ ISD::USUBSAT, MVT::v64i8, 1 },
};
static const CostTblEntry AVX512CostTbl[] = {
+ { ISD::ABS, MVT::v8i64, 1 },
+ { ISD::ABS, MVT::v16i32, 1 },
+ { ISD::ABS, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::ABS, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::ABS, MVT::v4i64, 1 },
+ { ISD::ABS, MVT::v2i64, 1 },
{ ISD::BITREVERSE, MVT::v8i64, 36 },
{ ISD::BITREVERSE, MVT::v16i32, 24 },
{ ISD::BITREVERSE, MVT::v32i16, 10 },
@@ -2312,6 +2364,30 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::CTTZ, MVT::v16i32, 28 },
{ ISD::CTTZ, MVT::v32i16, 24 },
{ ISD::CTTZ, MVT::v64i8, 18 },
+ { ISD::SMAX, MVT::v8i64, 1 },
+ { ISD::SMAX, MVT::v16i32, 1 },
+ { ISD::SMAX, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::SMAX, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SMAX, MVT::v4i64, 1 },
+ { ISD::SMAX, MVT::v2i64, 1 },
+ { ISD::SMIN, MVT::v8i64, 1 },
+ { ISD::SMIN, MVT::v16i32, 1 },
+ { ISD::SMIN, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::SMIN, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SMIN, MVT::v4i64, 1 },
+ { ISD::SMIN, MVT::v2i64, 1 },
+ { ISD::UMAX, MVT::v8i64, 1 },
+ { ISD::UMAX, MVT::v16i32, 1 },
+ { ISD::UMAX, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::UMAX, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::UMAX, MVT::v4i64, 1 },
+ { ISD::UMAX, MVT::v2i64, 1 },
+ { ISD::UMIN, MVT::v8i64, 1 },
+ { ISD::UMIN, MVT::v16i32, 1 },
+ { ISD::UMIN, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::UMIN, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::UMIN, MVT::v4i64, 1 },
+ { ISD::UMIN, MVT::v2i64, 1 },
{ ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
{ ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
{ ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
@@ -2352,6 +2428,10 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::BITREVERSE, MVT::i8, 3 }
};
static const CostTblEntry AVX2CostTbl[] = {
+ { ISD::ABS, MVT::v4i64, 2 }, // VBLENDVPD(X,VPSUBQ(0,X),X)
+ { ISD::ABS, MVT::v8i32, 1 },
+ { ISD::ABS, MVT::v16i16, 1 },
+ { ISD::ABS, MVT::v32i8, 1 },
{ ISD::BITREVERSE, MVT::v4i64, 5 },
{ ISD::BITREVERSE, MVT::v8i32, 5 },
{ ISD::BITREVERSE, MVT::v16i16, 5 },
@@ -2373,14 +2453,28 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::CTTZ, MVT::v32i8, 9 },
{ ISD::SADDSAT, MVT::v16i16, 1 },
{ ISD::SADDSAT, MVT::v32i8, 1 },
+ { ISD::SMAX, MVT::v8i32, 1 },
+ { ISD::SMAX, MVT::v16i16, 1 },
+ { ISD::SMAX, MVT::v32i8, 1 },
+ { ISD::SMIN, MVT::v8i32, 1 },
+ { ISD::SMIN, MVT::v16i16, 1 },
+ { ISD::SMIN, MVT::v32i8, 1 },
{ ISD::SSUBSAT, MVT::v16i16, 1 },
{ ISD::SSUBSAT, MVT::v32i8, 1 },
{ ISD::UADDSAT, MVT::v16i16, 1 },
{ ISD::UADDSAT, MVT::v32i8, 1 },
{ ISD::UADDSAT, MVT::v8i32, 3 }, // not + pminud + paddd
+ { ISD::UMAX, MVT::v8i32, 1 },
+ { ISD::UMAX, MVT::v16i16, 1 },
+ { ISD::UMAX, MVT::v32i8, 1 },
+ { ISD::UMIN, MVT::v8i32, 1 },
+ { ISD::UMIN, MVT::v16i16, 1 },
+ { ISD::UMIN, MVT::v32i8, 1 },
{ ISD::USUBSAT, MVT::v16i16, 1 },
{ ISD::USUBSAT, MVT::v32i8, 1 },
{ ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd
+ { ISD::FMAXNUM, MVT::v8f32, 3 }, // MAXPS + CMPUNORDPS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v4f64, 3 }, // MAXPD + CMPUNORDPD + BLENDVPD
{ ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
@@ -2389,6 +2483,10 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::FSQRT, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
};
static const CostTblEntry AVX1CostTbl[] = {
+ { ISD::ABS, MVT::v4i64, 5 }, // VBLENDVPD(X,VPSUBQ(0,X),X)
+ { ISD::ABS, MVT::v8i32, 3 },
+ { ISD::ABS, MVT::v16i16, 3 },
+ { ISD::ABS, MVT::v32i8, 3 },
{ ISD::BITREVERSE, MVT::v4i64, 12 }, // 2 x 128-bit Op + extract/insert
{ ISD::BITREVERSE, MVT::v8i32, 12 }, // 2 x 128-bit Op + extract/insert
{ ISD::BITREVERSE, MVT::v16i16, 12 }, // 2 x 128-bit Op + extract/insert
@@ -2410,20 +2508,32 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
{ ISD::SADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::SADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMAX, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMAX, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMAX, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMIN, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMIN, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMIN, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::SSUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v8i32, 8 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMAX, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMAX, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMAX, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMIN, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMIN, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMIN, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
- { ISD::FMAXNUM, MVT::f32, 3 },
- { ISD::FMAXNUM, MVT::v4f32, 3 },
- { ISD::FMAXNUM, MVT::v8f32, 5 },
- { ISD::FMAXNUM, MVT::f64, 3 },
- { ISD::FMAXNUM, MVT::v2f64, 3 },
- { ISD::FMAXNUM, MVT::v4f64, 5 },
+ { ISD::FMAXNUM, MVT::f32, 3 }, // MAXSS + CMPUNORDSS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v4f32, 3 }, // MAXPS + CMPUNORDPS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v8f32, 5 }, // MAXPS + CMPUNORDPS + BLENDVPS + ?
+ { ISD::FMAXNUM, MVT::f64, 3 }, // MAXSD + CMPUNORDSD + BLENDVPD
+ { ISD::FMAXNUM, MVT::v2f64, 3 }, // MAXPD + CMPUNORDPD + BLENDVPD
+ { ISD::FMAXNUM, MVT::v4f64, 5 }, // MAXPD + CMPUNORDPD + BLENDVPD + ?
{ ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
@@ -2449,7 +2559,21 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
};
+ static const CostTblEntry SSE41CostTbl[] = {
+ { ISD::ABS, MVT::v2i64, 2 }, // BLENDVPD(X,PSUBQ(0,X),X)
+ { ISD::SMAX, MVT::v4i32, 1 },
+ { ISD::SMAX, MVT::v16i8, 1 },
+ { ISD::SMIN, MVT::v4i32, 1 },
+ { ISD::SMIN, MVT::v16i8, 1 },
+ { ISD::UMAX, MVT::v4i32, 1 },
+ { ISD::UMAX, MVT::v8i16, 1 },
+ { ISD::UMIN, MVT::v4i32, 1 },
+ { ISD::UMIN, MVT::v8i16, 1 },
+ };
static const CostTblEntry SSSE3CostTbl[] = {
+ { ISD::ABS, MVT::v4i32, 1 },
+ { ISD::ABS, MVT::v8i16, 1 },
+ { ISD::ABS, MVT::v16i8, 1 },
{ ISD::BITREVERSE, MVT::v2i64, 5 },
{ ISD::BITREVERSE, MVT::v4i32, 5 },
{ ISD::BITREVERSE, MVT::v8i16, 5 },
@@ -2471,6 +2595,10 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::CTTZ, MVT::v16i8, 9 }
};
static const CostTblEntry SSE2CostTbl[] = {
+ { ISD::ABS, MVT::v2i64, 4 },
+ { ISD::ABS, MVT::v4i32, 3 },
+ { ISD::ABS, MVT::v8i16, 2 },
+ { ISD::ABS, MVT::v16i8, 2 },
{ ISD::BITREVERSE, MVT::v2i64, 29 },
{ ISD::BITREVERSE, MVT::v4i32, 27 },
{ ISD::BITREVERSE, MVT::v8i16, 27 },
@@ -2492,10 +2620,16 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::CTTZ, MVT::v16i8, 13 },
{ ISD::SADDSAT, MVT::v8i16, 1 },
{ ISD::SADDSAT, MVT::v16i8, 1 },
+ { ISD::SMAX, MVT::v8i16, 1 },
+ { ISD::SMIN, MVT::v8i16, 1 },
{ ISD::SSUBSAT, MVT::v8i16, 1 },
{ ISD::SSUBSAT, MVT::v16i8, 1 },
{ ISD::UADDSAT, MVT::v8i16, 1 },
{ ISD::UADDSAT, MVT::v16i8, 1 },
+ { ISD::UMAX, MVT::v8i16, 2 },
+ { ISD::UMAX, MVT::v16i8, 1 },
+ { ISD::UMIN, MVT::v8i16, 2 },
+ { ISD::UMIN, MVT::v16i8, 1 },
{ ISD::USUBSAT, MVT::v8i16, 1 },
{ ISD::USUBSAT, MVT::v16i8, 1 },
{ ISD::FMAXNUM, MVT::f64, 4 },
@@ -2534,14 +2668,18 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::CTPOP, MVT::i8, 1 },
};
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
+ { ISD::ABS, MVT::i64, 2 }, // SUB+CMOV
{ ISD::BITREVERSE, MVT::i64, 14 },
{ ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTTZ, MVT::i64, 3 }, // TEST+BSF+CMOV/BRANCH
{ ISD::CTPOP, MVT::i64, 10 },
{ ISD::SADDO, MVT::i64, 1 },
{ ISD::UADDO, MVT::i64, 1 },
+ { ISD::UMULO, MVT::i64, 2 }, // mulq + seto
};
static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
+ { ISD::ABS, MVT::i32, 2 }, // SUB+CMOV
+ { ISD::ABS, MVT::i16, 2 }, // SUB+CMOV
{ ISD::BITREVERSE, MVT::i32, 14 },
{ ISD::BITREVERSE, MVT::i16, 14 },
{ ISD::BITREVERSE, MVT::i8, 11 },
@@ -2560,6 +2698,9 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
{ ISD::UADDO, MVT::i32, 1 },
{ ISD::UADDO, MVT::i16, 1 },
{ ISD::UADDO, MVT::i8, 1 },
+ { ISD::UMULO, MVT::i32, 2 }, // mul + seto
+ { ISD::UMULO, MVT::i16, 2 },
+ { ISD::UMULO, MVT::i8, 2 },
};
Type *RetTy = ICA.getReturnType();
@@ -2569,6 +2710,9 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
switch (IID) {
default:
break;
+ case Intrinsic::abs:
+ ISD = ISD::ABS;
+ break;
case Intrinsic::bitreverse:
ISD = ISD::BITREVERSE;
break;
@@ -2592,12 +2736,24 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
case Intrinsic::sadd_sat:
ISD = ISD::SADDSAT;
break;
+ case Intrinsic::smax:
+ ISD = ISD::SMAX;
+ break;
+ case Intrinsic::smin:
+ ISD = ISD::SMIN;
+ break;
case Intrinsic::ssub_sat:
ISD = ISD::SSUBSAT;
break;
case Intrinsic::uadd_sat:
ISD = ISD::UADDSAT;
break;
+ case Intrinsic::umax:
+ ISD = ISD::UMAX;
+ break;
+ case Intrinsic::umin:
+ ISD = ISD::UMIN;
+ break;
case Intrinsic::usub_sat:
ISD = ISD::USUBSAT;
break;
@@ -2616,6 +2772,12 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
ISD = ISD::UADDO;
OpTy = RetTy->getContainedType(0);
break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ // SMULO has same costs so don't duplicate.
+ ISD = ISD::UMULO;
+ OpTy = RetTy->getContainedType(0);
+ break;
}
if (ISD != ISD::DELETED_NODE) {
@@ -2624,89 +2786,121 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
MVT MTy = LT.second;
// Attempt to lookup cost.
+ if (ISD == ISD::BITREVERSE && ST->hasGFNI() && ST->hasSSSE3() &&
+ MTy.isVector()) {
+ // With PSHUFB the code is very similar for all types. If we have integer
+ // byte operations, we just need a GF2P8AFFINEQB for vXi8. For other types
+ // we also need a PSHUFB.
+ unsigned Cost = MTy.getVectorElementType() == MVT::i8 ? 1 : 2;
+
+ // Without byte operations, we need twice as many GF2P8AFFINEQB and PSHUFB
+ // instructions. We also need an extract and an insert.
+ if (!(MTy.is128BitVector() || (ST->hasAVX2() && MTy.is256BitVector()) ||
+ (ST->hasBWI() && MTy.is512BitVector())))
+ Cost = Cost * 2 + 2;
+
+ return LT.first * Cost;
+ }
+
+ auto adjustTableCost = [](const CostTblEntry &Entry, int LegalizationCost,
+ FastMathFlags FMF) {
+ // If there are no NANs to deal with, then these are reduced to a
+ // single MIN** or MAX** instruction instead of the MIN/CMP/SELECT that we
+ // assume is used in the non-fast case.
+ if (Entry.ISD == ISD::FMAXNUM || Entry.ISD == ISD::FMINNUM) {
+ if (FMF.noNaNs())
+ return LegalizationCost * 1;
+ }
+ return LegalizationCost * (int)Entry.Cost;
+ };
+
if (ST->useGLMDivSqrtCosts())
if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->isSLM())
if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasCDI())
if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasBWI())
if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasAVX512())
if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasXOP())
if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasSSE42())
if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
+
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasSSSE3())
if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasSSE1())
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (ST->hasBMI()) {
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (const auto *Entry = CostTableLookup(BMI32CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
}
if (ST->hasLZCNT()) {
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
}
if (ST->hasPOPCNT()) {
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
}
// TODO - add BMI (TZCNT) scalar handling
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
}
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
@@ -2714,9 +2908,6 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
- if (CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getIntrinsicInstrCost(ICA, CostKind);
-
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
@@ -2928,8 +3119,32 @@ unsigned X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
Cost +=
BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false);
} else {
- unsigned NumSubVecs = LT.second.getSizeInBits() / 128;
- Cost += (PowerOf2Ceil(NumSubVecs) - 1) * LT.first;
+ // In each 128-lane, if at least one index is demanded but not all
+ // indices are demanded and this 128-lane is not the first 128-lane of
+ // the legalized-vector, then this 128-lane needs a extracti128; If in
+ // each 128-lane, there is at least one demanded index, this 128-lane
+ // needs a inserti128.
+
+ // The following cases will help you build a better understanding:
+ // Assume we insert several elements into a v8i32 vector in avx2,
+ // Case#1: inserting into 1th index needs vpinsrd + inserti128.
+ // Case#2: inserting into 5th index needs extracti128 + vpinsrd +
+ // inserti128.
+ // Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128.
+ unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * LT.first;
+ unsigned NumElts = LT.second.getVectorNumElements() * LT.first;
+ APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts);
+ unsigned Scale = NumElts / Num128Lanes;
+ // We iterate each 128-lane, and check if we need a
+ // extracti128/inserti128 for this 128-lane.
+ for (unsigned I = 0; I < NumElts; I += Scale) {
+ APInt Mask = WidenedDemandedElts.getBitsSet(NumElts, I, I + Scale);
+ APInt MaskedDE = Mask & WidenedDemandedElts;
+ unsigned Population = MaskedDE.countPopulation();
+ Cost += (Population > 0 && Population != Scale &&
+ I % LT.second.getVectorNumElements() != 0);
+ Cost += Population > 0;
+ }
Cost += DemandedElts.countPopulation();
// For vXf32 cases, insertion into the 0'th index in each v4f32
@@ -2973,11 +3188,10 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
const Instruction *I) {
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput) {
- if (isa_and_nonnull<StoreInst>(I)) {
- Value *Ptr = I->getOperand(1);
+ if (auto *SI = dyn_cast_or_null<StoreInst>(I)) {
// Store instruction with index and scale costs 2 Uops.
// Check the preceding GEP to identify non-const indices.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); }))
return TTI::TCC_Basic * 2;
}
@@ -3056,7 +3270,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
getScalarizationOverhead(MaskTy, DemandedElts, false, true);
int ScalarCompareCost = getCmpSelInstrCost(
Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr,
- CostKind);
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
int ValueSplitCost =
@@ -3477,8 +3691,10 @@ int X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// Otherwise fall back to cmp+select.
- return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
- getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind);
+ return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CmpInst::BAD_ICMP_PREDICATE,
+ CostKind) +
+ getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
}
int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
@@ -3707,8 +3923,10 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
return std::max(1, Cost);
}
-int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind) {
+int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -3848,7 +4066,28 @@ X86TTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
return CostKind == TTI::TCK_RecipThroughput ? 0 : 1;
}
-// Return an average cost of Gather / Scatter instruction, maybe improved later
+int X86TTIImpl::getGatherOverhead() const {
+ // Some CPUs have more overhead for gather. The specified overhead is relative
+ // to the Load operation. "2" is the number provided by Intel architects. This
+ // parameter is used for cost estimation of Gather Op and comparison with
+ // other alternatives.
+ // TODO: Remove the explicit hasAVX512()?, That would mean we would only
+ // enable gather with a -march.
+ if (ST->hasAVX512() || (ST->hasAVX2() && ST->hasFastGather()))
+ return 2;
+
+ return 1024;
+}
+
+int X86TTIImpl::getScatterOverhead() const {
+ if (ST->hasAVX512())
+ return 2;
+
+ return 1024;
+}
+
+// Return an average cost of Gather / Scatter instruction, maybe improved later.
+// FIXME: Add TargetCostKind support.
int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, const Value *Ptr,
Align Alignment, unsigned AddressSpace) {
@@ -3906,8 +4145,8 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, const Value *Ptr,
// The gather / scatter cost is given by Intel architects. It is a rough
// number since we are looking at one instruction in a time.
const int GSOverhead = (Opcode == Instruction::Load)
- ? ST->getGatherOverhead()
- : ST->getScatterOverhead();
+ ? getGatherOverhead()
+ : getScatterOverhead();
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
MaybeAlign(Alignment), AddressSpace,
TTI::TCK_RecipThroughput);
@@ -3921,6 +4160,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, const Value *Ptr,
/// Alignment - Alignment for one element.
/// AddressSpace - pointer[s] address space.
///
+/// FIXME: Add TargetCostKind support.
int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
bool VariableMask, Align Alignment,
unsigned AddressSpace) {
@@ -3934,9 +4174,9 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF);
MaskUnpackCost =
getScalarizationOverhead(MaskTy, DemandedElts, false, true);
- int ScalarCompareCost =
- getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()),
- nullptr, CostKind);
+ int ScalarCompareCost = getCmpSelInstrCost(
+ Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
}
@@ -3967,9 +4207,15 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
-
- if (CostKind != TTI::TCK_RecipThroughput)
- return 1;
+ if (CostKind != TTI::TCK_RecipThroughput) {
+ if ((Opcode == Instruction::Load &&
+ isLegalMaskedGather(SrcVTy, Align(Alignment))) ||
+ (Opcode == Instruction::Store &&
+ isLegalMaskedScatter(SrcVTy, Align(Alignment))))
+ return 1;
+ return BaseT::getGatherScatterOpCost(Opcode, SrcVTy, Ptr, VariableMask,
+ Alignment, CostKind, I);
+ }
assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
@@ -4129,7 +4375,7 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) {
// scalarize it.
if (auto *DataVTy = dyn_cast<FixedVectorType>(DataTy)) {
unsigned NumElts = DataVTy->getNumElements();
- if (NumElts == 1 || !isPowerOf2_32(NumElts))
+ if (NumElts == 1)
return false;
}
Type *ScalarTy = DataTy->getScalarType();
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
index d462e1f96ca2..17570f1c04a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -22,6 +22,8 @@
namespace llvm {
+class InstCombiner;
+
class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
typedef BasicTTIImplBase<X86TTIImpl> BaseT;
typedef TargetTransformInfo TTI;
@@ -60,7 +62,6 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::FeatureLZCNTFalseDeps,
X86::FeatureBranchFusion,
X86::FeatureMacroFusion,
- X86::FeatureMergeToThreeWayBranch,
X86::FeaturePadShortFunctions,
X86::FeaturePOPCNTFalseDeps,
X86::FeatureSSEUnalignedMem,
@@ -129,9 +130,10 @@ public:
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::TargetCostKind CostKind,
+ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
@@ -151,6 +153,18 @@ public:
int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
const SCEV *Ptr);
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+ Optional<Value *>
+ simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+ APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) const;
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const;
+
unsigned getAtomicMemIntrinsicMaxElementSize() const;
int getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
@@ -190,8 +204,9 @@ public:
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
- int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
@@ -230,6 +245,9 @@ private:
int getGSVectorCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
Align Alignment, unsigned AddressSpace);
+ int getGatherOverhead() const;
+ int getScatterOverhead() const;
+
/// @}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TileConfig.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TileConfig.cpp
new file mode 100644
index 000000000000..ef010bcd38b7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TileConfig.cpp
@@ -0,0 +1,248 @@
+//===-- X86TileConfig.cpp - Tile Register Configure----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Pass to config the shape of AMX physical registers
+/// AMX register need to be configured before use. In X86PreTileConfig pass
+/// the pldtilecfg instruction is inserted, however at that time we don't
+/// know the shape of each physical tile registers, because the register
+/// allocation is not done yet. This pass runs after egister allocation
+/// pass. It collects the shape information of each physical tile register
+/// and store the shape in the stack slot that is allocated for load config
+/// to tile config register.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TileShapeInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "tile-config"
+
+namespace {
+
+class X86TileConfig : public MachineFunctionPass {
+ // context
+ MachineFunction *MF = nullptr;
+ const X86Subtarget *ST = nullptr;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ VirtRegMap *VRM = nullptr;
+ LiveIntervals *LIS = nullptr;
+
+ MachineInstr *getTileConfigPoint();
+ void tileConfig();
+
+public:
+ X86TileConfig() : MachineFunctionPass(ID) {}
+
+ /// Return the pass name.
+ StringRef getPassName() const override { return "Tile Register Configure"; }
+
+ /// X86TileConfig analysis usage.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Perform register allocation.
+ bool runOnMachineFunction(MachineFunction &mf) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char X86TileConfig::ID = 0;
+
+INITIALIZE_PASS_BEGIN(X86TileConfig, "tileconfig", "Tile Register Configure",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(X86TileConfig, "tileconfig", "Tile Register Configure",
+ false, false)
+
+void X86TileConfig::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<VirtRegMap>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static unsigned getTilePhysRegIndex(Register PhysReg) {
+ assert((PhysReg >= X86::TMM0 && X86::TMM0 <= X86::TMM7) &&
+ "Tile register number is invalid");
+ return (PhysReg - X86::TMM0);
+}
+
+static MachineInstr *
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ Register SrcReg, unsigned BitSize, int FrameIdx, int Offset,
+ const TargetInstrInfo *TII, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) {
+
+ unsigned SubIdx = (BitSize == 8) ? X86::sub_8bit : X86::sub_16bit;
+ unsigned Opc = (BitSize == 8) ? X86::MOV8mr : X86::MOV16mr;
+ if (BitSize == TRI->getRegSizeInBits(*RC))
+ SubIdx = 0;
+ MachineInstr *NewMI =
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), TII->get(Opc)), FrameIdx,
+ Offset)
+ .addReg(SrcReg, 0, SubIdx);
+ return NewMI;
+}
+
+static MachineInstr *storeImmToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ int64_t Imm, unsigned BitSize,
+ int FrameIdx, int Offset,
+ const TargetInstrInfo *TII) {
+ unsigned Opc = (BitSize == 8) ? X86::MOV8mi : X86::MOV16mi;
+ return addFrameReference(BuildMI(MBB, MI, DebugLoc(), TII->get(Opc)),
+ FrameIdx, Offset)
+ .addImm(Imm);
+}
+
+MachineInstr *X86TileConfig::getTileConfigPoint() {
+ for (MachineBasicBlock &MBB : *MF) {
+
+ // Traverse the basic block.
+ for (MachineInstr &MI : MBB)
+ // Refer X86PreTileConfig.cpp.
+ // We only support one tile config for now.
+ if (MI.getOpcode() == X86::PLDTILECFG)
+ return &MI;
+ }
+
+ return nullptr;
+}
+
+void X86TileConfig::tileConfig() {
+ MachineInstr *MI = getTileConfigPoint();
+ if (!MI)
+ return;
+ MachineBasicBlock *MBB = MI->getParent();
+ int SS = MI->getOperand(1).getIndex();
+ BitVector PhysRegs(TRI->getNumRegs());
+
+ // Fill in the palette first.
+ auto *NewMI = storeImmToStackSlot(*MBB, *MI, 1, 8, SS, 0, TII);
+ LIS->InsertMachineInstrInMaps(*NewMI);
+ // Fill in the shape of each tile physical register.
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register VirtReg = Register::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(VirtReg))
+ continue;
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ if (RC.getID() != X86::TILERegClassID)
+ continue;
+ Register PhysReg = VRM->getPhys(VirtReg);
+ if (PhysRegs.test(PhysReg))
+ continue;
+ PhysRegs.set(PhysReg);
+ ShapeT Shape = VRM->getShape(VirtReg);
+ Register RowReg = Shape.getRow()->getReg();
+ Register ColReg = Shape.getCol()->getReg();
+
+ // Here is the data format for the tile config.
+ // 0 palette
+ // 1 start_row
+ // 2-15 reserved, must be zero
+ // 16-17 tile0.colsb Tile 0 bytes per row.
+ // 18-19 tile1.colsb Tile 1 bytes per row.
+ // 20-21 tile2.colsb Tile 2 bytes per row.
+ // ... (sequence continues)
+ // 30-31 tile7.colsb Tile 7 bytes per row.
+ // 32-47 reserved, must be zero
+ // 48 tile0.rows Tile 0 rows.
+ // 49 tile1.rows Tile 1 rows.
+ // 50 tile2.rows Tile 2 rows.
+ // ... (sequence continues)
+ // 55 tile7.rows Tile 7 rows.
+ // 56-63 reserved, must be zero
+ unsigned Index = getTilePhysRegIndex(PhysReg);
+ int RowOffset = 48 + Index;
+ int ColOffset = 16 + Index * 2;
+
+ unsigned BitSize = 8;
+ for (const auto &Pair : {std::make_pair(RowReg, RowOffset),
+ std::make_pair(ColReg, ColOffset)}) {
+ int64_t Imm;
+ int ImmCount = 0;
+ // All def must be the same value, otherwise it is invalid MIs.
+ // Immediate is prefered.
+ for (const MachineOperand &MO : MRI->def_operands(Pair.first)) {
+ const auto *Inst = MO.getParent();
+ if (Inst->isMoveImmediate()) {
+ ImmCount++;
+ Imm = Inst->getOperand(1).getImm();
+ break;
+ }
+ }
+ auto StoreConfig = [&](int Offset) {
+ MachineInstr *NewMI = nullptr;
+ if (ImmCount)
+ NewMI = storeImmToStackSlot(*MBB, *MI, Imm, BitSize, SS, Offset, TII);
+ else {
+ const TargetRegisterClass *RC = MRI->getRegClass(Pair.first);
+ NewMI = storeRegToStackSlot(*MBB, *MI, Pair.first, BitSize, SS,
+ Offset, TII, RC, TRI);
+ }
+ SlotIndex SIdx = LIS->InsertMachineInstrInMaps(*NewMI);
+ if (!ImmCount) {
+ // Extend the live interval.
+ SmallVector<SlotIndex, 8> EndPoints = {SIdx.getRegSlot()};
+ LiveInterval &Int = LIS->getInterval(Pair.first);
+ LIS->extendToIndices(Int, EndPoints);
+ }
+ };
+ StoreConfig(Pair.second);
+ BitSize += 8;
+ }
+ }
+}
+
+bool X86TileConfig::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ ST = &mf.getSubtarget<X86Subtarget>();
+ TRI = ST->getRegisterInfo();
+ TII = mf.getSubtarget().getInstrInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ VRM = &getAnalysis<VirtRegMap>();
+ LIS = &getAnalysis<LiveIntervals>();
+
+ if (VRM->isShapeMapEmpty())
+ return false;
+
+ tileConfig();
+ return true;
+}
+
+FunctionPass *llvm::createX86TileConfigPass() { return new X86TileConfig(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
index 8627bbbf18d2..8d8bd5e6b326 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -109,7 +109,7 @@ private:
/// The linked list node subobject inside of RegNode.
Value *Link = nullptr;
};
-}
+} // namespace
FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
index c7868bf4cf8e..0ea47106434c 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
@@ -27,6 +27,7 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 4de252548961..b44984ff6b4c 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -51,7 +51,7 @@ static MCRegisterInfo *createXCoreMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *
createXCoreMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- return createXCoreMCSubtargetInfoImpl(TT, CPU, FS);
+ return createXCoreMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index c32653137a10..db3dd7fb1438 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -444,16 +443,15 @@ SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
if (LD->getAlignment() == 2) {
- SDValue Low =
- DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr,
- LD->getPointerInfo(), MVT::i16,
- /* Alignment = */ 2, LD->getMemOperand()->getFlags());
+ SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr,
+ LD->getPointerInfo(), MVT::i16, Align(2),
+ LD->getMemOperand()->getFlags());
SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(2, DL, MVT::i32));
SDValue High =
DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain, HighAddr,
LD->getPointerInfo().getWithOffset(2), MVT::i16,
- /* Alignment = */ 2, LD->getMemOperand()->getFlags());
+ Align(2), LD->getMemOperand()->getFlags());
SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
DAG.getConstant(16, DL, MVT::i32));
SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
@@ -503,14 +501,14 @@ SDValue XCoreTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Low = Value;
SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
DAG.getConstant(16, dl, MVT::i32));
- SDValue StoreLow = DAG.getTruncStore(
- Chain, dl, Low, BasePtr, ST->getPointerInfo(), MVT::i16,
- /* Alignment = */ 2, ST->getMemOperand()->getFlags());
+ SDValue StoreLow =
+ DAG.getTruncStore(Chain, dl, Low, BasePtr, ST->getPointerInfo(),
+ MVT::i16, Align(2), ST->getMemOperand()->getFlags());
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
DAG.getConstant(2, dl, MVT::i32));
SDValue StoreHigh = DAG.getTruncStore(
Chain, dl, High, HighAddr, ST->getPointerInfo().getWithOffset(2),
- MVT::i16, /* Alignment = */ 2, ST->getMemOperand()->getFlags());
+ MVT::i16, Align(2), ST->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 83fc16ed98fc..6528154ab0e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/IntrinsicsXCore.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
+#include "llvm/IR/ReplaceConstant.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -74,61 +75,10 @@ createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) {
return ConstantArray::get(NewType, Elements);
}
-static Instruction *
-createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
- IRBuilder<NoFolder> Builder(Instr);
- unsigned OpCode = CE->getOpcode();
- switch (OpCode) {
- case Instruction::GetElementPtr: {
- SmallVector<Value *,4> CEOpVec(CE->op_begin(), CE->op_end());
- ArrayRef<Value *> CEOps(CEOpVec);
- return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(
- cast<GEPOperator>(CE)->getSourceElementType(), CEOps[0],
- CEOps.slice(1)));
- }
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- return dyn_cast<Instruction>(
- Builder.CreateBinOp((Instruction::BinaryOps)OpCode,
- CE->getOperand(0), CE->getOperand(1),
- CE->getName()));
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- return dyn_cast<Instruction>(
- Builder.CreateCast((Instruction::CastOps)OpCode,
- CE->getOperand(0), CE->getType(),
- CE->getName()));
- default:
- llvm_unreachable("Unhandled constant expression!\n");
- }
-}
static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
do {
- SmallVector<WeakTrackingVH, 8> WUsers(CE->user_begin(), CE->user_end());
+ SmallVector<WeakTrackingVH, 8> WUsers(CE->users());
llvm::sort(WUsers);
WUsers.erase(std::unique(WUsers.begin(), WUsers.end()), WUsers.end());
while (!WUsers.empty())
@@ -201,7 +151,7 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
GV->isExternallyInitialized());
// Update uses.
- SmallVector<User *, 16> Users(GV->user_begin(), GV->user_end());
+ SmallVector<User *, 16> Users(GV->users());
for (unsigned I = 0, E = Users.size(); I != E; ++I) {
User *U = Users[I];
Instruction *Inst = cast<Instruction>(U);
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.cpp
index ffeb0862c945..4b29751c7d06 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -26,5 +26,5 @@ void XCoreSubtarget::anchor() { }
XCoreSubtarget::XCoreSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : XCoreGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this),
- TLInfo(TM, *this), TSInfo() {}
+ : XCoreGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(),
+ FrameLowering(*this), TLInfo(TM, *this), TSInfo() {}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.h
index 68139da9d1d0..d3979b275beb 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSubtarget.h
@@ -44,7 +44,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
const XCoreInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const XCoreFrameLowering *getFrameLowering() const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 1eea1e37c253..046cd6b5db7d 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -26,9 +26,7 @@
using namespace llvm;
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
+ return RM.getValueOr(Reloc::Static);
}
static CodeModel::Model
diff --git a/contrib/llvm-project/llvm/lib/Testing/Support/Annotations.cpp b/contrib/llvm-project/llvm/lib/Testing/Support/Annotations.cpp
index 09c572011d36..24607bd4c7d8 100644
--- a/contrib/llvm-project/llvm/lib/Testing/Support/Annotations.cpp
+++ b/contrib/llvm-project/llvm/lib/Testing/Support/Annotations.cpp
@@ -72,8 +72,10 @@ size_t Annotations::point(llvm::StringRef Name) const {
}
std::vector<size_t> Annotations::points(llvm::StringRef Name) const {
- auto P = Points.lookup(Name);
- return {P.begin(), P.end()};
+ auto I = Points.find(Name);
+ if (I == Points.end())
+ return {};
+ return {I->getValue().begin(), I->getValue().end()};
}
Annotations::Range Annotations::range(llvm::StringRef Name) const {
@@ -85,8 +87,10 @@ Annotations::Range Annotations::range(llvm::StringRef Name) const {
std::vector<Annotations::Range>
Annotations::ranges(llvm::StringRef Name) const {
- auto R = Ranges.lookup(Name);
- return {R.begin(), R.end()};
+ auto I = Ranges.find(Name);
+ if (I == Ranges.end())
+ return {};
+ return {I->getValue().begin(), I->getValue().end()};
}
llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &O,
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/MachO/InterfaceFile.cpp b/contrib/llvm-project/llvm/lib/TextAPI/MachO/InterfaceFile.cpp
index 64d2c3e865ab..cfc1c584d496 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/MachO/InterfaceFile.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/MachO/InterfaceFile.cpp
@@ -69,7 +69,6 @@ void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) {
}
ParentUmbrellas.emplace(Iter, Target_, std::string(Parent));
- return;
}
void InterfaceFile::addUUID(const Target &Target_, StringRef UUID) {
@@ -83,7 +82,6 @@ void InterfaceFile::addUUID(const Target &Target_, StringRef UUID) {
}
UUIDs.emplace(Iter, Target_, std::string(UUID));
- return;
}
void InterfaceFile::addUUID(const Target &Target, uint8_t UUID[16]) {
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/MachO/Platform.cpp b/contrib/llvm-project/llvm/lib/TextAPI/MachO/Platform.cpp
index 588ec9a4d83b..f454c1cb6b16 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/MachO/Platform.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/MachO/Platform.cpp
@@ -49,7 +49,7 @@ PlatformKind mapToPlatformKind(const Triple &Target) {
case Triple::WatchOS:
return Target.isSimulatorEnvironment() ? PlatformKind::watchOSSimulator
: PlatformKind::watchOS;
- // TODO: add bridgeOS once in llvm::Triple
+ // TODO: add bridgeOS & driverKit once in llvm::Triple
}
llvm_unreachable("Unknown Target Triple");
}
@@ -83,6 +83,8 @@ StringRef getPlatformName(PlatformKind Platform) {
return "tvOS Simulator";
case PlatformKind::watchOSSimulator:
return "watchOS Simulator";
+ case PlatformKind::driverKit:
+ return "DriverKit";
}
llvm_unreachable("Unknown llvm.MachO.PlatformKind enum");
}
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/MachO/Target.cpp b/contrib/llvm-project/llvm/lib/TextAPI/MachO/Target.cpp
index aee8ef421425..6f8d9bb4e19a 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/MachO/Target.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/MachO/Target.cpp
@@ -33,6 +33,7 @@ Expected<Target> Target::create(StringRef TargetValue) {
.Case("ios-simulator", PlatformKind::iOSSimulator)
.Case("tvos-simulator", PlatformKind::tvOSSimulator)
.Case("watchos-simulator", PlatformKind::watchOSSimulator)
+ .Case("driverkit", PlatformKind::driverKit)
.Default(PlatformKind::unknown);
if (Platform == PlatformKind::unknown) {
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStub.cpp b/contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStub.cpp
index 141f897fb564..1d6352b2e126 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStub.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStub.cpp
@@ -407,6 +407,9 @@ template <> struct ScalarTraits<Target> {
case PlatformKind::watchOSSimulator:
OS << "watchos-simulator";
break;
+ case PlatformKind::driverKit:
+ OS << "driverkit";
+ break;
}
}
@@ -518,13 +521,12 @@ template <> struct MappingTraits<const InterfaceFile *> {
break;
}
}
- llvm::sort(Section.Symbols.begin(), Section.Symbols.end());
- llvm::sort(Section.Classes.begin(), Section.Classes.end());
- llvm::sort(Section.ClassEHs.begin(), Section.ClassEHs.end());
- llvm::sort(Section.IVars.begin(), Section.IVars.end());
- llvm::sort(Section.WeakDefSymbols.begin(),
- Section.WeakDefSymbols.end());
- llvm::sort(Section.TLVSymbols.begin(), Section.TLVSymbols.end());
+ llvm::sort(Section.Symbols);
+ llvm::sort(Section.Classes);
+ llvm::sort(Section.ClassEHs);
+ llvm::sort(Section.IVars);
+ llvm::sort(Section.WeakDefSymbols);
+ llvm::sort(Section.TLVSymbols);
Exports.emplace_back(std::move(Section));
}
@@ -576,12 +578,11 @@ template <> struct MappingTraits<const InterfaceFile *> {
break;
}
}
- llvm::sort(Section.Symbols.begin(), Section.Symbols.end());
- llvm::sort(Section.Classes.begin(), Section.Classes.end());
- llvm::sort(Section.ClassEHs.begin(), Section.ClassEHs.end());
- llvm::sort(Section.IVars.begin(), Section.IVars.end());
- llvm::sort(Section.WeakRefSymbols.begin(),
- Section.WeakRefSymbols.end());
+ llvm::sort(Section.Symbols);
+ llvm::sort(Section.Classes);
+ llvm::sort(Section.ClassEHs);
+ llvm::sort(Section.IVars);
+ llvm::sort(Section.WeakRefSymbols);
Undefineds.emplace_back(std::move(Section));
}
}
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStubCommon.cpp b/contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStubCommon.cpp
index 4a82df6beac1..0d3614b0a24c 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStubCommon.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/MachO/TextStubCommon.cpp
@@ -84,6 +84,9 @@ void ScalarTraits<PlatformSet>::output(const PlatformSet &Values, void *IO,
case PlatformKind::macCatalyst:
OS << "iosmac";
break;
+ case PlatformKind::driverKit:
+ OS << "driverkit";
+ break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index cd39428b9c38..f3904b921e60 100644
--- a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -139,35 +139,28 @@ static void doList(opt::InputArgList& Args) {
fatalOpenError(std::move(Err), B->getBufferIdentifier());
}
-static COFF::MachineTypes getCOFFFileMachine(MemoryBufferRef MB) {
+static Expected<COFF::MachineTypes> getCOFFFileMachine(MemoryBufferRef MB) {
std::error_code EC;
auto Obj = object::COFFObjectFile::create(MB);
- if (!Obj) {
- llvm::errs() << MB.getBufferIdentifier()
- << ": failed to open: " << Obj.takeError() << '\n';
- exit(1);
- }
+ if (!Obj)
+ return Obj.takeError();
uint16_t Machine = (*Obj)->getMachine();
if (Machine != COFF::IMAGE_FILE_MACHINE_I386 &&
Machine != COFF::IMAGE_FILE_MACHINE_AMD64 &&
Machine != COFF::IMAGE_FILE_MACHINE_ARMNT &&
Machine != COFF::IMAGE_FILE_MACHINE_ARM64) {
- llvm::errs() << MB.getBufferIdentifier() << ": unknown machine: " << Machine
- << '\n';
- exit(1);
+ return createStringError(inconvertibleErrorCode(),
+ "unknown machine: " + std::to_string(Machine));
}
return static_cast<COFF::MachineTypes>(Machine);
}
-static COFF::MachineTypes getBitcodeFileMachine(MemoryBufferRef MB) {
+static Expected<COFF::MachineTypes> getBitcodeFileMachine(MemoryBufferRef MB) {
Expected<std::string> TripleStr = getBitcodeTargetTriple(MB);
- if (!TripleStr) {
- llvm::errs() << MB.getBufferIdentifier()
- << ": failed to get target triple from bitcode\n";
- exit(1);
- }
+ if (!TripleStr)
+ return TripleStr.takeError();
switch (Triple(*TripleStr).getArch()) {
case Triple::x86:
@@ -179,9 +172,8 @@ static COFF::MachineTypes getBitcodeFileMachine(MemoryBufferRef MB) {
case Triple::aarch64:
return COFF::IMAGE_FILE_MACHINE_ARM64;
default:
- llvm::errs() << MB.getBufferIdentifier()
- << ": unknown arch in target triple " << *TripleStr << '\n';
- exit(1);
+ return createStringError(inconvertibleErrorCode(),
+ "unknown arch in target triple: " + *TripleStr);
}
}
@@ -201,7 +193,7 @@ static void appendFile(std::vector<NewArchiveMember> &Members,
// If a user attempts to add an archive to another archive, llvm-lib doesn't
// handle the first archive file as a single file. Instead, it extracts all
- // members from the archive and add them to the second archive. This beahvior
+ // members from the archive and add them to the second archive. This behavior
// is for compatibility with Microsoft's lib command.
if (Magic == file_magic::archive) {
Error Err = Error::success();
@@ -233,9 +225,17 @@ static void appendFile(std::vector<NewArchiveMember> &Members,
// in writeArchive() which needs to support many tools, can't assume the
// input is COFF, and doesn't have a good way to report errors.
if (Magic == file_magic::coff_object || Magic == file_magic::bitcode) {
- COFF::MachineTypes FileMachine = (Magic == file_magic::coff_object)
- ? getCOFFFileMachine(MB)
- : getBitcodeFileMachine(MB);
+ Expected<COFF::MachineTypes> MaybeFileMachine =
+ (Magic == file_magic::coff_object) ? getCOFFFileMachine(MB)
+ : getBitcodeFileMachine(MB);
+ if (!MaybeFileMachine) {
+ handleAllErrors(MaybeFileMachine.takeError(), [&](const ErrorInfoBase &EIB) {
+ llvm::errs() << MB.getBufferIdentifier() << ": " << EIB.message()
+ << "\n";
+ });
+ exit(1);
+ }
+ COFF::MachineTypes FileMachine = *MaybeFileMachine;
// FIXME: Once lld-link rejects multiple resource .obj files:
// Call convertResToCOFF() on .res files and add the resulting
diff --git a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index d315c7f13ac2..a7ae10d156d5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -21,8 +21,10 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -38,6 +40,8 @@ using namespace PatternMatch;
STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
STATISTIC(NumGuardedRotates,
"Number of guarded rotates transformed into funnel shifts");
+STATISTIC(NumGuardedFunnelShifts,
+ "Number of guarded funnel shifts transformed into funnel shifts");
STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
namespace {
@@ -66,96 +70,127 @@ public:
};
} // namespace
-/// Match a pattern for a bitwise rotate operation that partially guards
-/// against undefined behavior by branching around the rotation when the shift
-/// amount is 0.
-static bool foldGuardedRotateToFunnelShift(Instruction &I) {
+/// Match a pattern for a bitwise funnel/rotate operation that partially guards
+/// against undefined behavior by branching around the funnel-shift/rotation
+/// when the shift amount is 0.
+static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
return false;
// As with the one-use checks below, this is not strictly necessary, but we
// are being cautious to avoid potential perf regressions on targets that
- // do not actually have a rotate instruction (where the funnel shift would be
- // expanded back into math/shift/logic ops).
+ // do not actually have a funnel/rotate instruction (where the funnel shift
+ // would be expanded back into math/shift/logic ops).
if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
return false;
- // Match V to funnel shift left/right and capture the source operand and
- // shift amount in X and Y.
- auto matchRotate = [](Value *V, Value *&X, Value *&Y) {
- Value *L0, *L1, *R0, *R1;
+ // Match V to funnel shift left/right and capture the source operands and
+ // shift amount.
+ auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,
+ Value *&ShAmt) {
+ Value *SubAmt;
unsigned Width = V->getType()->getScalarSizeInBits();
- auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1));
-
- // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y))
- auto RotL = m_OneUse(
- m_c_Or(m_Shl(m_Value(L0), m_Value(L1)), m_LShr(m_Value(R0), Sub)));
- if (RotL.match(V) && L0 == R0 && L1 == R1) {
- X = L0;
- Y = L1;
- return Intrinsic::fshl;
+
+ // fshl(ShVal0, ShVal1, ShAmt)
+ // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
+ if (match(V, m_OneUse(m_c_Or(
+ m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
+ m_LShr(m_Value(ShVal1),
+ m_Sub(m_SpecificInt(Width), m_Value(SubAmt))))))) {
+ if (ShAmt == SubAmt) // TODO: Use m_Specific
+ return Intrinsic::fshl;
}
- // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y))
- auto RotR = m_OneUse(
- m_c_Or(m_LShr(m_Value(L0), m_Value(L1)), m_Shl(m_Value(R0), Sub)));
- if (RotR.match(V) && L0 == R0 && L1 == R1) {
- X = L0;
- Y = L1;
- return Intrinsic::fshr;
+ // fshr(ShVal0, ShVal1, ShAmt)
+ // == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))
+ if (match(V,
+ m_OneUse(m_c_Or(m_Shl(m_Value(ShVal0), m_Sub(m_SpecificInt(Width),
+ m_Value(SubAmt))),
+ m_LShr(m_Value(ShVal1), m_Value(ShAmt)))))) {
+ if (ShAmt == SubAmt) // TODO: Use m_Specific
+ return Intrinsic::fshr;
}
return Intrinsic::not_intrinsic;
};
- // One phi operand must be a rotate operation, and the other phi operand must
- // be the source value of that rotate operation:
- // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ]
+ // One phi operand must be a funnel/rotate operation, and the other phi
+ // operand must be the source value of that funnel/rotate operation:
+ // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
+ // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]
+ // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]
PHINode &Phi = cast<PHINode>(I);
+ unsigned FunnelOp = 0, GuardOp = 1;
Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
- Value *RotSrc, *RotAmt;
- Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt);
- if (IID == Intrinsic::not_intrinsic || RotSrc != P1) {
- IID = matchRotate(P1, RotSrc, RotAmt);
- if (IID == Intrinsic::not_intrinsic || RotSrc != P0)
+ Value *ShVal0, *ShVal1, *ShAmt;
+ Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
+ if (IID == Intrinsic::not_intrinsic ||
+ (IID == Intrinsic::fshl && ShVal0 != P1) ||
+ (IID == Intrinsic::fshr && ShVal1 != P1)) {
+ IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
+ if (IID == Intrinsic::not_intrinsic ||
+ (IID == Intrinsic::fshl && ShVal0 != P0) ||
+ (IID == Intrinsic::fshr && ShVal1 != P0))
return false;
assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
"Pattern must match funnel shift left or right");
+ std::swap(FunnelOp, GuardOp);
}
// The incoming block with our source operand must be the "guard" block.
- // That must contain a cmp+branch to avoid the rotate when the shift amount
- // is equal to 0. The other incoming block is the block with the rotate.
- BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
- BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
+ // That must contain a cmp+branch to avoid the funnel/rotate when the shift
+ // amount is equal to 0. The other incoming block is the block with the
+ // funnel/rotate.
+ BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
+ BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
Instruction *TermI = GuardBB->getTerminator();
+
+ // Ensure that the shift values dominate each block.
+ if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))
+ return false;
+
ICmpInst::Predicate Pred;
BasicBlock *PhiBB = Phi.getParent();
- if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()),
- m_SpecificBB(PhiBB), m_SpecificBB(RotBB))))
+ if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()),
+ m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))
return false;
if (Pred != CmpInst::ICMP_EQ)
return false;
+ IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
+
+ if (ShVal0 == ShVal1)
+ ++NumGuardedRotates;
+ else
+ ++NumGuardedFunnelShifts;
+
+ // If this is not a rotate then the select was blocking poison from the
+ // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
+ bool IsFshl = IID == Intrinsic::fshl;
+ if (ShVal0 != ShVal1) {
+ if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))
+ ShVal1 = Builder.CreateFreeze(ShVal1);
+ else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))
+ ShVal0 = Builder.CreateFreeze(ShVal0);
+ }
+
// We matched a variation of this IR pattern:
// GuardBB:
- // %cmp = icmp eq i32 %RotAmt, 0
- // br i1 %cmp, label %PhiBB, label %RotBB
- // RotBB:
- // %sub = sub i32 32, %RotAmt
- // %shr = lshr i32 %X, %sub
- // %shl = shl i32 %X, %RotAmt
- // %rot = or i32 %shr, %shl
+ // %cmp = icmp eq i32 %ShAmt, 0
+ // br i1 %cmp, label %PhiBB, label %FunnelBB
+ // FunnelBB:
+ // %sub = sub i32 32, %ShAmt
+ // %shr = lshr i32 %ShVal1, %sub
+ // %shl = shl i32 %ShVal0, %ShAmt
+ // %fsh = or i32 %shr, %shl
// br label %PhiBB
// PhiBB:
- // %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ]
+ // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
// -->
- // llvm.fshl.i32(i32 %X, i32 %RotAmt)
- IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
+ // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
- Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt}));
- ++NumGuardedRotates;
+ Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt}));
return true;
}
@@ -202,8 +237,8 @@ static bool matchAndOrChain(Value *V, MaskOps &MOps) {
// We need a shift-right or a bare value representing a compare of bit 0 of
// the original source operand.
Value *Candidate;
- uint64_t BitIndex = 0;
- if (!match(V, m_LShr(m_Value(Candidate), m_ConstantInt(BitIndex))))
+ const APInt *BitIndex = nullptr;
+ if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))
Candidate = V;
// Initialize result source operand.
@@ -211,11 +246,11 @@ static bool matchAndOrChain(Value *V, MaskOps &MOps) {
MOps.Root = Candidate;
// The shift constant is out-of-range? This code hasn't been simplified.
- if (BitIndex >= MOps.Mask.getBitWidth())
+ if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))
return false;
// Fill in the mask bit derived from the shift constant.
- MOps.Mask.setBit(BitIndex);
+ MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);
return MOps.Root == Candidate;
}
@@ -344,7 +379,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
// iteratively in this loop rather than waiting until the end.
for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
MadeChange |= foldAnyOrAllBitsSet(I);
- MadeChange |= foldGuardedRotateToFunnelShift(I);
+ MadeChange |= foldGuardedFunnelShift(I, DT);
MadeChange |= tryToRecognizePopCount(I);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index 5cd40c66227f..16b82219e8ca 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -31,8 +31,8 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
using namespace llvm;
@@ -130,8 +130,7 @@ bool TruncInstCombine::buildTruncExpressionDag() {
case Instruction::Select: {
SmallVector<Value *, 2> Operands;
getRelevantOperands(I, Operands);
- for (Value *Operand : Operands)
- Worklist.push_back(Operand);
+ append_range(Worklist, Operands);
break;
}
default:
@@ -289,10 +288,8 @@ Type *TruncInstCombine::getBestTruncatedType() {
/// version of \p Ty, otherwise return \p Ty.
static Type *getReducedType(Value *V, Type *Ty) {
assert(Ty && !Ty->isVectorTy() && "Expect Scalar Type");
- if (auto *VTy = dyn_cast<VectorType>(V->getType())) {
- // FIXME: should this handle scalable vectors?
- return FixedVectorType::get(Ty, VTy->getNumElements());
- }
+ if (auto *VTy = dyn_cast<VectorType>(V->getType()))
+ return VectorType::get(Ty, VTy->getElementCount());
return Ty;
}
@@ -344,7 +341,7 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
// 1. Update Old-TruncInst -> New-TruncInst.
// 2. Remove Old-TruncInst (if New node is not TruncInst).
// 3. Add New-TruncInst (if Old node was not TruncInst).
- auto Entry = find(Worklist, I);
+ auto *Entry = find(Worklist, I);
if (Entry != Worklist.end()) {
if (auto *NewCI = dyn_cast<TruncInst>(Res))
*Entry = NewCI;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index 233eae37c497..298149f8b546 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -74,6 +74,7 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
case Intrinsic::coro_id:
case Intrinsic::coro_id_retcon:
case Intrinsic::coro_id_retcon_once:
+ case Intrinsic::coro_id_async:
II->replaceAllUsesWith(ConstantTokenNone::get(Context));
break;
case Intrinsic::coro_subfn_addr:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index 242e6c3f6b23..5e5e513cdfda 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -149,6 +149,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
bool Changed = false;
CoroIdInst *CoroId = nullptr;
SmallVector<CoroFreeInst *, 4> CoroFrees;
+ bool HasCoroSuspend = false;
for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) {
Instruction &I = *IB++;
if (auto *CB = dyn_cast<CallBase>(&I)) {
@@ -163,11 +164,13 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
// pass expects that there is at most one final suspend point.
if (cast<CoroSuspendInst>(&I)->isFinal())
CB->setCannotDuplicate();
+ HasCoroSuspend = true;
break;
+ case Intrinsic::coro_end_async:
case Intrinsic::coro_end:
// Make sure that fallthrough coro.end is not duplicated as CoroSplit
// pass expects that there is at most one fallthrough coro.end.
- if (cast<CoroEndInst>(&I)->isFallthrough())
+ if (cast<AnyCoroEndInst>(&I)->isFallthrough())
CB->setCannotDuplicate();
break;
case Intrinsic::coro_noop:
@@ -187,6 +190,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
break;
case Intrinsic::coro_id_retcon:
case Intrinsic::coro_id_retcon_once:
+ case Intrinsic::coro_id_async:
F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
break;
case Intrinsic::coro_resume:
@@ -211,15 +215,23 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
if (CoroId)
for (CoroFreeInst *CF : CoroFrees)
CF->setArgOperand(0, CoroId);
+ // Coroutine suspention could potentially lead to any argument modified
+ // outside of the function, hence arguments should not have noalias
+ // attributes.
+ if (HasCoroSuspend)
+ for (Argument &A : F.args())
+ if (A.hasNoAliasAttr())
+ A.removeAttr(Attribute::NoAlias);
return Changed;
}
static bool declaresCoroEarlyIntrinsics(const Module &M) {
return coro::declaresIntrinsics(
M, {"llvm.coro.id", "llvm.coro.id.retcon", "llvm.coro.id.retcon.once",
- "llvm.coro.destroy", "llvm.coro.done", "llvm.coro.end",
- "llvm.coro.noop", "llvm.coro.free", "llvm.coro.promise",
- "llvm.coro.resume", "llvm.coro.suspend"});
+ "llvm.coro.id.async", "llvm.coro.destroy", "llvm.coro.done",
+ "llvm.coro.end", "llvm.coro.end.async", "llvm.coro.noop",
+ "llvm.coro.free", "llvm.coro.promise", "llvm.coro.resume",
+ "llvm.coro.suspend"});
}
PreservedAnalyses CoroEarlyPass::run(Function &F, FunctionAnalysisManager &) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index 9d364b3097c1..07a183cfc66b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -83,14 +83,8 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
Function &F = *Frame->getFunction();
for (Instruction &I : instructions(F))
if (auto *Call = dyn_cast<CallInst>(&I))
- if (Call->isTailCall() && operandReferences(Call, Frame, AA)) {
- // FIXME: If we ever hit this check. Evaluate whether it is more
- // appropriate to retain musttail and allow the code to compile.
- if (Call->isMustTailCall())
- report_fatal_error("Call referring to the coroutine frame cannot be "
- "marked as musttail");
+ if (Call->isTailCall() && operandReferences(Call, Frame, AA))
Call->setTailCall(false);
- }
}
// Given a resume function @f.resume(%f.frame* %frame), returns the size
@@ -252,7 +246,20 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// If size of the set is the same as total number of coro.begin, that means we
// found a coro.free or coro.destroy referencing each coro.begin, so we can
// perform heap elision.
- return ReferencedCoroBegins.size() == CoroBegins.size();
+ if (ReferencedCoroBegins.size() != CoroBegins.size())
+ return false;
+
+ // If any call in the function is a musttail call, it usually won't work
+ // because we cannot drop the tailcall attribute, and a tail call will reuse
+ // the entire stack where we are going to put the new frame. In theory a more
+ // precise analysis can be done to check whether the new frame aliases with
+ // the call, however it's challenging to do so before the elision actually
+ // happened.
+ for (BasicBlock &BB : *F)
+ if (BB.getTerminatingMustTailCall())
+ return false;
+
+ return true;
}
void Lowerer::collectPostSplitCoroIds(Function *F) {
@@ -366,7 +373,7 @@ static bool replaceDevirtTrigger(Function &F) {
}
static bool declaresCoroElideIntrinsics(Module &M) {
- return coro::declaresIntrinsics(M, {"llvm.coro.id"});
+ return coro::declaresIntrinsics(M, {"llvm.coro.id", "llvm.coro.id.async"});
}
PreservedAnalyses CoroElidePass::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index f55501a05d85..e1e0d50979dc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -20,16 +20,18 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/PtrUseVisitor.h"
+#include "llvm/Analysis/StackLifetime.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/circular_raw_ostream.h"
#include "llvm/Support/OptimizedStructLayout.h"
+#include "llvm/Support/circular_raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -41,6 +43,13 @@ using namespace llvm;
// "coro-frame", which results in leaner debug spew.
#define DEBUG_TYPE "coro-suspend-crossing"
+static cl::opt<bool> EnableReuseStorageInFrame(
+ "reuse-storage-in-coroutine-frame", cl::Hidden,
+ cl::desc(
+ "Enable the optimization which would reuse the storage in the coroutine \
+ frame for allocas whose liferanges are not overlapped, for testing purposes"),
+ llvm::cl::init(false));
+
enum { SmallVectorThreshold = 32 };
// Provides two way mapping between the blocks and numbers.
@@ -126,10 +135,10 @@ struct SuspendCrossingInfo {
BasicBlock *UseBB = I->getParent();
- // As a special case, treat uses by an llvm.coro.suspend.retcon
- // as if they were uses in the suspend's single predecessor: the
- // uses conceptually occur before the suspend.
- if (isa<CoroSuspendRetconInst>(I)) {
+ // As a special case, treat uses by an llvm.coro.suspend.retcon or an
+ // llvm.coro.suspend.async as if they were uses in the suspend's single
+ // predecessor: the uses conceptually occur before the suspend.
+ if (isa<CoroSuspendRetconInst>(I) || isa<CoroSuspendAsyncInst>(I)) {
UseBB = UseBB->getSinglePredecessor();
assert(UseBB && "should have split coro.suspend into its own block");
}
@@ -283,71 +292,96 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
#undef DEBUG_TYPE // "coro-suspend-crossing"
#define DEBUG_TYPE "coro-frame"
-// We build up the list of spills for every case where a use is separated
-// from the definition by a suspend point.
-
-static const unsigned InvalidFieldIndex = ~0U;
-
namespace {
-class Spill {
- Value *Def = nullptr;
- Instruction *User = nullptr;
- unsigned FieldNo = InvalidFieldIndex;
-
-public:
- Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {}
-
- Value *def() const { return Def; }
- Instruction *user() const { return User; }
- BasicBlock *userBlock() const { return User->getParent(); }
+class FrameTypeBuilder;
+// Mapping from the to-be-spilled value to all the users that need reload.
+using SpillInfo = SmallMapVector<Value *, SmallVector<Instruction *, 2>, 8>;
+struct AllocaInfo {
+ AllocaInst *Alloca;
+ DenseMap<Instruction *, llvm::Optional<APInt>> Aliases;
+ bool MayWriteBeforeCoroBegin;
+ AllocaInfo(AllocaInst *Alloca,
+ DenseMap<Instruction *, llvm::Optional<APInt>> Aliases,
+ bool MayWriteBeforeCoroBegin)
+ : Alloca(Alloca), Aliases(std::move(Aliases)),
+ MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {}
+};
+struct FrameDataInfo {
+ // All the values (that are not allocas) that needs to be spilled to the
+ // frame.
+ SpillInfo Spills;
+ // Allocas contains all values defined as allocas that need to live in the
+ // frame.
+ SmallVector<AllocaInfo, 8> Allocas;
+
+ SmallVector<Value *, 8> getAllDefs() const {
+ SmallVector<Value *, 8> Defs;
+ for (const auto &P : Spills)
+ Defs.push_back(P.first);
+ for (const auto &A : Allocas)
+ Defs.push_back(A.Alloca);
+ return Defs;
+ }
- // Note that field index is stored in the first SpillEntry for a particular
- // definition. Subsequent mentions of a defintion do not have fieldNo
- // assigned. This works out fine as the users of Spills capture the info about
- // the definition the first time they encounter it. Consider refactoring
- // SpillInfo into two arrays to normalize the spill representation.
- unsigned fieldIndex() const {
- assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field");
- return FieldNo;
+ uint32_t getFieldIndex(Value *V) const {
+ auto Itr = FieldIndexMap.find(V);
+ assert(Itr != FieldIndexMap.end() &&
+ "Value does not have a frame field index");
+ return Itr->second;
}
- void setFieldIndex(unsigned FieldNumber) {
- assert(FieldNo == InvalidFieldIndex && "Reassigning field number");
- FieldNo = FieldNumber;
+
+ void setFieldIndex(Value *V, uint32_t Index) {
+ assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
+ "Cannot set the index for the same field twice.");
+ FieldIndexMap[V] = Index;
}
+
+ // Remap the index of every field in the frame, using the final layout index.
+ void updateLayoutIndex(FrameTypeBuilder &B);
+
+private:
+ // LayoutIndexUpdateStarted is used to avoid updating the index of any field
+ // twice by mistake.
+ bool LayoutIndexUpdateStarted = false;
+ // Map from values to their slot indexes on the frame. They will be first set
+ // with their original insertion field index. After the frame is built, their
+ // indexes will be updated into the final layout index.
+ DenseMap<Value *, uint32_t> FieldIndexMap;
};
} // namespace
-// Note that there may be more than one record with the same value of Def in
-// the SpillInfo vector.
-using SpillInfo = SmallVector<Spill, 8>;
-
#ifndef NDEBUG
-static void dump(StringRef Title, SpillInfo const &Spills) {
+static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
dbgs() << "------------- " << Title << "--------------\n";
- Value *CurrentValue = nullptr;
- for (auto const &E : Spills) {
- if (CurrentValue != E.def()) {
- CurrentValue = E.def();
- CurrentValue->dump();
- }
+ for (const auto &E : Spills) {
+ E.first->dump();
dbgs() << " user: ";
- E.user()->dump();
+ for (auto *I : E.second)
+ I->dump();
+ }
+}
+
+static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) {
+ dbgs() << "------------- Allocas --------------\n";
+ for (const auto &A : Allocas) {
+ A.Alloca->dump();
}
}
#endif
namespace {
+using FieldIDType = size_t;
// We cannot rely solely on natural alignment of a type when building a
// coroutine frame and if the alignment specified on the Alloca instruction
// differs from the natural alignment of the alloca type we will need to insert
// padding.
class FrameTypeBuilder {
+private:
struct Field {
uint64_t Size;
uint64_t Offset;
- Spill *ForSpill;
Type *Ty;
- unsigned FieldIndex;
+ FieldIDType LayoutFieldIndex;
Align Alignment;
Align TyAlignment;
};
@@ -363,19 +397,12 @@ class FrameTypeBuilder {
public:
FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
- : DL(DL), Context(Context) {}
-
- class FieldId {
- size_t Value;
- explicit FieldId(size_t Value) : Value(Value) {}
-
- friend class FrameTypeBuilder;
- };
+ : DL(DL), Context(Context) {}
/// Add a field to this structure for the storage of an `alloca`
/// instruction.
- FieldId addFieldForAlloca(AllocaInst *AI, Spill *ForSpill = nullptr,
- bool IsHeader = false) {
+ LLVM_NODISCARD FieldIDType addFieldForAlloca(AllocaInst *AI,
+ bool IsHeader = false) {
Type *Ty = AI->getAllocatedType();
// Make an array type if this is a static array allocation.
@@ -386,13 +413,42 @@ public:
report_fatal_error("Coroutines cannot handle non static allocas yet");
}
- return addField(Ty, AI->getAlign(), ForSpill, IsHeader);
+ return addField(Ty, AI->getAlign(), IsHeader);
}
+ /// We want to put the allocas whose lifetime-ranges are not overlapped
+ /// into one slot of coroutine frame.
+ /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
+ ///
+ /// cppcoro::task<void> alternative_paths(bool cond) {
+ /// if (cond) {
+ /// big_structure a;
+ /// process(a);
+ /// co_await something();
+ /// } else {
+ /// big_structure b;
+ /// process2(b);
+ /// co_await something();
+ /// }
+ /// }
+ ///
+ /// We want to put variable a and variable b in the same slot to
+ /// reduce the size of coroutine frame.
+ ///
+ /// This function use StackLifetime algorithm to partition the AllocaInsts in
+ /// Spills to non-overlapped sets in order to put Alloca in the same
+ /// non-overlapped set into the same slot in the Coroutine Frame. Then add
+ /// field for the allocas in the same non-overlapped set by using the largest
+ /// type as the field type.
+ ///
+ /// Side Effects: Because We sort the allocas, the order of allocas in the
+ /// frame may be different with the order in the source code.
+ void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
+ coro::Shape &Shape);
+
/// Add a field to this structure.
- FieldId addField(Type *Ty, MaybeAlign FieldAlignment,
- Spill *ForSpill = nullptr,
- bool IsHeader = false) {
+ LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
+ bool IsHeader = false) {
assert(!IsFinished && "adding fields to a finished builder");
assert(Ty && "must provide a type for a field");
@@ -415,9 +471,8 @@ public:
Offset = OptimizedStructLayoutField::FlexibleOffset;
}
- Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0,
- *FieldAlignment, TyAlignment});
- return FieldId(Fields.size() - 1);
+ Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment});
+ return Fields.size() - 1;
}
/// Finish the layout and set the body on the given type.
@@ -433,13 +488,162 @@ public:
return StructAlign;
}
- unsigned getFieldIndex(FieldId Id) const {
+ FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
assert(IsFinished && "not yet finished!");
- return Fields[Id.Value].FieldIndex;
+ return Fields[Id].LayoutFieldIndex;
}
};
} // namespace
+void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
+ auto Updater = [&](Value *I) {
+ setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I)));
+ };
+ LayoutIndexUpdateStarted = true;
+ for (auto &S : Spills)
+ Updater(S.first);
+ for (const auto &A : Allocas)
+ Updater(A.Alloca);
+ LayoutIndexUpdateStarted = false;
+}
+
+void FrameTypeBuilder::addFieldForAllocas(const Function &F,
+ FrameDataInfo &FrameData,
+ coro::Shape &Shape) {
+ DenseMap<AllocaInst *, unsigned int> AllocaIndex;
+ using AllocaSetType = SmallVector<AllocaInst *, 4>;
+ SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
+
+ // We need to add field for allocas at the end of this function. However, this
+ // function has multiple exits, so we use this helper to avoid redundant code.
+ struct RTTIHelper {
+ std::function<void()> func;
+ RTTIHelper(std::function<void()> &&func) : func(func) {}
+ ~RTTIHelper() { func(); }
+ } Helper([&]() {
+ for (auto AllocaList : NonOverlapedAllocas) {
+ auto *LargestAI = *AllocaList.begin();
+ FieldIDType Id = addFieldForAlloca(LargestAI);
+ for (auto *Alloca : AllocaList)
+ FrameData.setFieldIndex(Alloca, Id);
+ }
+ });
+
+ if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) {
+ for (const auto &A : FrameData.Allocas) {
+ AllocaInst *Alloca = A.Alloca;
+ AllocaIndex[Alloca] = NonOverlapedAllocas.size();
+ NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
+ }
+ return;
+ }
+
+ // Because there are pathes from the lifetime.start to coro.end
+ // for each alloca, the liferanges for every alloca is overlaped
+ // in the blocks who contain coro.end and the successor blocks.
+ // So we choose to skip there blocks when we calculates the liferange
+ // for each alloca. It should be reasonable since there shouldn't be uses
+ // in these blocks and the coroutine frame shouldn't be used outside the
+ // coroutine body.
+ //
+ // Note that the user of coro.suspend may not be SwitchInst. However, this
+ // case seems too complex to handle. And it is harmless to skip these
+ // patterns since it just prevend putting the allocas to live in the same
+ // slot.
+ DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
+ for (auto CoroSuspendInst : Shape.CoroSuspends) {
+ for (auto U : CoroSuspendInst->users()) {
+ if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
+ auto *SWI = const_cast<SwitchInst *>(ConstSWI);
+ DefaultSuspendDest[SWI] = SWI->getDefaultDest();
+ SWI->setDefaultDest(SWI->getSuccessor(1));
+ }
+ }
+ }
+
+ auto ExtractAllocas = [&]() {
+ AllocaSetType Allocas;
+ Allocas.reserve(FrameData.Allocas.size());
+ for (const auto &A : FrameData.Allocas)
+ Allocas.push_back(A.Alloca);
+ return Allocas;
+ };
+ StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
+ StackLifetime::LivenessType::May);
+ StackLifetimeAnalyzer.run();
+ auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
+ return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
+ StackLifetimeAnalyzer.getLiveRange(AI2));
+ };
+ auto GetAllocaSize = [&](const AllocaInfo &A) {
+ Optional<TypeSize> RetSize = A.Alloca->getAllocationSizeInBits(DL);
+ assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
+ assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
+ return RetSize->getFixedSize();
+ };
+ // Put larger allocas in the front. So the larger allocas have higher
+ // priority to merge, which can save more space potentially. Also each
+ // AllocaSet would be ordered. So we can get the largest Alloca in one
+ // AllocaSet easily.
+ sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) {
+ return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
+ });
+ for (const auto &A : FrameData.Allocas) {
+ AllocaInst *Alloca = A.Alloca;
+ bool Merged = false;
+ // Try to find if the Alloca is not inferenced with any existing
+ // NonOverlappedAllocaSet. If it is true, insert the alloca to that
+ // NonOverlappedAllocaSet.
+ for (auto &AllocaSet : NonOverlapedAllocas) {
+ assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
+ bool NoInference = none_of(AllocaSet, [&](auto Iter) {
+ return IsAllocaInferenre(Alloca, Iter);
+ });
+ // If the alignment of A is multiple of the alignment of B, the address
+ // of A should satisfy the requirement for aligning for B.
+ //
+ // There may be other more fine-grained strategies to handle the alignment
+ // infomation during the merging process. But it seems hard to handle
+ // these strategies and benefit little.
+ bool Alignable = [&]() -> bool {
+ auto *LargestAlloca = *AllocaSet.begin();
+ return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
+ 0;
+ }();
+ bool CouldMerge = NoInference && Alignable;
+ if (!CouldMerge)
+ continue;
+ AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()];
+ AllocaSet.push_back(Alloca);
+ Merged = true;
+ break;
+ }
+ if (!Merged) {
+ AllocaIndex[Alloca] = NonOverlapedAllocas.size();
+ NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
+ }
+ }
+ // Recover the default target destination for each Switch statement
+ // reserved.
+ for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
+ SwitchInst *SWI = SwitchAndDefaultDest.first;
+ BasicBlock *DestBB = SwitchAndDefaultDest.second;
+ SWI->setDefaultDest(DestBB);
+ }
+ // This Debug Info could tell us which allocas are merged into one slot.
+ LLVM_DEBUG(for (auto &AllocaSet
+ : NonOverlapedAllocas) {
+ if (AllocaSet.size() > 1) {
+ dbgs() << "In Function:" << F.getName() << "\n";
+ dbgs() << "Find Union Set "
+ << "\n";
+ dbgs() << "\tAllocas are \n";
+ for (auto Alloca : AllocaSet)
+ dbgs() << "\t\t" << *Alloca << "\n";
+ }
+ });
+}
+
void FrameTypeBuilder::finish(StructType *Ty) {
assert(!IsFinished && "already finished!");
@@ -491,13 +695,8 @@ void FrameTypeBuilder::finish(StructType *Ty) {
Offset - LastOffset));
}
- // Record the layout information into both the Field and the
- // original Spill, if there is one.
F.Offset = Offset;
- F.FieldIndex = FieldTypes.size();
- if (F.ForSpill) {
- F.ForSpill->setFieldIndex(F.FieldIndex);
- }
+ F.LayoutFieldIndex = FieldTypes.size();
FieldTypes.push_back(F.Ty);
LastOffset = Offset + F.Size;
@@ -509,8 +708,8 @@ void FrameTypeBuilder::finish(StructType *Ty) {
// Check that the IR layout matches the offsets we expect.
auto Layout = DL.getStructLayout(Ty);
for (auto &F : Fields) {
- assert(Ty->getElementType(F.FieldIndex) == F.Ty);
- assert(Layout->getElementOffset(F.FieldIndex) == F.Offset);
+ assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
+ assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
}
#endif
@@ -526,7 +725,7 @@ void FrameTypeBuilder::finish(StructType *Ty) {
// ... spills ...
// };
static StructType *buildFrameType(Function &F, coro::Shape &Shape,
- SpillInfo &Spills) {
+ FrameDataInfo &FrameData) {
LLVMContext &C = F.getContext();
const DataLayout &DL = F.getParent()->getDataLayout();
StructType *FrameTy = [&] {
@@ -538,8 +737,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
FrameTypeBuilder B(C, DL);
AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
- Optional<FrameTypeBuilder::FieldId> PromiseFieldId;
- Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId;
+ Optional<FieldIDType> SwitchIndexFieldId;
if (Shape.ABI == coro::ABI::Switch) {
auto *FramePtrTy = FrameTy->getPointerTo();
@@ -549,14 +747,15 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
// Add header fields for the resume and destroy functions.
// We can rely on these being perfectly packed.
- B.addField(FnPtrTy, None, nullptr, /*header*/ true);
- B.addField(FnPtrTy, None, nullptr, /*header*/ true);
+ (void)B.addField(FnPtrTy, None, /*header*/ true);
+ (void)B.addField(FnPtrTy, None, /*header*/ true);
- // Add a header field for the promise if there is one.
- if (PromiseAlloca) {
- PromiseFieldId =
- B.addFieldForAlloca(PromiseAlloca, nullptr, /*header*/ true);
- }
+ // PromiseAlloca field needs to be explicitly added here because it's
+ // a header field with a fixed offset based on its alignment. Hence it
+ // needs special handling and cannot be added to FrameData.Allocas.
+ if (PromiseAlloca)
+ FrameData.setFieldIndex(
+ PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true));
// Add a field to store the suspend index. This doesn't need to
// be in the header.
@@ -568,40 +767,40 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
}
- Value *CurrentDef = nullptr;
-
+ // Because multiple allocas may own the same field slot,
+ // we add allocas to field here.
+ B.addFieldForAllocas(F, FrameData, Shape);
+ // Add PromiseAlloca to Allocas list so that
+ // 1. updateLayoutIndex could update its index after
+ // `performOptimizedStructLayout`
+ // 2. it is processed in insertSpills.
+ if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
+ // We assume that the promise alloca won't be modified before
+ // CoroBegin and no alias will be create before CoroBegin.
+ FrameData.Allocas.emplace_back(
+ PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false);
// Create an entry for every spilled value.
- for (auto &S : Spills) {
- // We can have multiple entries in Spills for a single value, but
- // they should form a contiguous run. Ignore all but the first.
- if (CurrentDef == S.def())
- continue;
-
- CurrentDef = S.def();
-
- assert(CurrentDef != PromiseAlloca &&
- "recorded spill use of promise alloca?");
-
- if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
- B.addFieldForAlloca(AI, &S);
- } else {
- Type *Ty = CurrentDef->getType();
- B.addField(Ty, None, &S);
- }
+ for (auto &S : FrameData.Spills) {
+ Type *FieldType = S.first->getType();
+ // For byval arguments, we need to store the pointed value in the frame,
+ // instead of the pointer itself.
+ if (const Argument *A = dyn_cast<Argument>(S.first))
+ if (A->hasByValAttr())
+ FieldType = FieldType->getPointerElementType();
+ FieldIDType Id = B.addField(FieldType, None);
+ FrameData.setFieldIndex(S.first, Id);
}
B.finish(FrameTy);
+ FrameData.updateLayoutIndex(B);
Shape.FrameAlign = B.getStructAlign();
Shape.FrameSize = B.getStructSize();
switch (Shape.ABI) {
- // In the switch ABI, remember the field indices for the promise and
- // switch-index fields.
case coro::ABI::Switch:
+ // In the switch ABI, remember the switch-index field.
Shape.SwitchLowering.IndexField =
- B.getFieldIndex(*SwitchIndexFieldId);
- Shape.SwitchLowering.PromiseField =
- (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0);
+ B.getLayoutFieldIndex(*SwitchIndexFieldId);
// Also round the frame size up to a multiple of its alignment, as is
// generally expected in C/C++.
@@ -617,65 +816,246 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
B.getStructAlign() <= Id->getStorageAlignment());
break;
}
+ case coro::ABI::Async: {
+ Shape.AsyncLowering.FrameOffset =
+ alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign);
+ // Also make the final context size a multiple of the context alignment to
+ // make allocation easier for allocators.
+ Shape.AsyncLowering.ContextSize =
+ alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
+ Shape.AsyncLowering.getContextAlignment());
+ if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
+ report_fatal_error(
+ "The alignment requirment of frame variables cannot be higher than "
+ "the alignment of the async function context");
+ }
+ break;
+ }
}
return FrameTy;
}
-// We use a pointer use visitor to discover if there are any writes into an
-// alloca that dominates CoroBegin. If that is the case, insertSpills will copy
-// the value from the alloca into the coroutine frame spill slot corresponding
-// to that alloca.
+// We use a pointer use visitor to track how an alloca is being used.
+// The goal is to be able to answer the following three questions:
+// 1. Should this alloca be allocated on the frame instead.
+// 2. Could the content of the alloca be modified prior to CoroBegn, which would
+// require copying the data from alloca to the frame after CoroBegin.
+// 3. Is there any alias created for this alloca prior to CoroBegin, but used
+// after CoroBegin. In that case, we will need to recreate the alias after
+// CoroBegin based off the frame. To answer question 1, we track two things:
+// a. List of all BasicBlocks that use this alloca or any of the aliases of
+// the alloca. In the end, we check if there exists any two basic blocks that
+// cross suspension points. If so, this alloca must be put on the frame. b.
+// Whether the alloca or any alias of the alloca is escaped at some point,
+// either by storing the address somewhere, or the address is used in a
+// function call that might capture. If it's ever escaped, this alloca must be
+// put on the frame conservatively.
+// To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin.
+// Whenever a potential write happens, either through a store instruction, a
+// function call or any of the memory intrinsics, we check whether this
+// instruction is prior to CoroBegin. To answer question 3, we track the offsets
+// of all aliases created for the alloca prior to CoroBegin but used after
+// CoroBegin. llvm::Optional is used to be able to represent the case when the
+// offset is unknown (e.g. when you have a PHINode that takes in different
+// offset values). We cannot handle unknown offsets and will assert. This is the
+// potential issue left out. An ideal solution would likely require a
+// significant redesign.
namespace {
struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
using Base = PtrUseVisitor<AllocaUseVisitor>;
AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
- const CoroBeginInst &CB)
- : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {}
+ const CoroBeginInst &CB, const SuspendCrossingInfo &Checker)
+ : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}
- // We are only interested in uses that dominate coro.begin.
void visit(Instruction &I) {
- if (DT.dominates(&I, &CoroBegin))
- Base::visit(I);
+ UserBBs.insert(I.getParent());
+ Base::visit(I);
+ // If the pointer is escaped prior to CoroBegin, we have to assume it would
+ // be written into before CoroBegin as well.
+ if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) {
+ MayWriteBeforeCoroBegin = true;
+ }
}
// We need to provide this overload as PtrUseVisitor uses a pointer based
// visiting function.
void visit(Instruction *I) { return visit(*I); }
- void visitLoadInst(LoadInst &) {} // Good. Nothing to do.
+ void visitPHINode(PHINode &I) {
+ enqueueUsers(I);
+ handleAlias(I);
+ }
+
+ void visitSelectInst(SelectInst &I) {
+ enqueueUsers(I);
+ handleAlias(I);
+ }
+
+ void visitStoreInst(StoreInst &SI) {
+ // Regardless whether the alias of the alloca is the value operand or the
+ // pointer operand, we need to assume the alloca is been written.
+ handleMayWrite(SI);
+
+ if (SI.getValueOperand() != U->get())
+ return;
+
+ // We are storing the pointer into a memory location, potentially escaping.
+ // As an optimization, we try to detect simple cases where it doesn't
+ // actually escape, for example:
+ // %ptr = alloca ..
+ // %addr = alloca ..
+ // store %ptr, %addr
+ // %x = load %addr
+ // ..
+ // If %addr is only used by loading from it, we could simply treat %x as
+ // another alias of %ptr, and not considering %ptr being escaped.
+ auto IsSimpleStoreThenLoad = [&]() {
+ auto *AI = dyn_cast<AllocaInst>(SI.getPointerOperand());
+ // If the memory location we are storing to is not an alloca, it
+ // could be an alias of some other memory locations, which is difficult
+ // to analyze.
+ if (!AI)
+ return false;
+ // StoreAliases contains aliases of the memory location stored into.
+ SmallVector<Instruction *, 4> StoreAliases = {AI};
+ while (!StoreAliases.empty()) {
+ Instruction *I = StoreAliases.pop_back_val();
+ for (User *U : I->users()) {
+ // If we are loading from the memory location, we are creating an
+ // alias of the original pointer.
+ if (auto *LI = dyn_cast<LoadInst>(U)) {
+ enqueueUsers(*LI);
+ handleAlias(*LI);
+ continue;
+ }
+ // If we are overriding the memory location, the pointer certainly
+ // won't escape.
+ if (auto *S = dyn_cast<StoreInst>(U))
+ if (S->getPointerOperand() == I)
+ continue;
+ if (auto *II = dyn_cast<IntrinsicInst>(U))
+ if (II->isLifetimeStartOrEnd())
+ continue;
+ // BitCastInst creats aliases of the memory location being stored
+ // into.
+ if (auto *BI = dyn_cast<BitCastInst>(U)) {
+ StoreAliases.push_back(BI);
+ continue;
+ }
+ return false;
+ }
+ }
+
+ return true;
+ };
+
+ if (!IsSimpleStoreThenLoad())
+ PI.setEscaped(&SI);
+ }
- // If the use is an operand, the pointer escaped and anything can write into
- // that memory. If the use is the pointer, we are definitely writing into the
- // alloca and therefore we need to copy.
- void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); }
+ // All mem intrinsics modify the data.
+ void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); }
- // Any other instruction that is not filtered out by PtrUseVisitor, will
- // result in the copy.
- void visitInstruction(Instruction &I) { PI.setAborted(&I); }
+ void visitBitCastInst(BitCastInst &BC) {
+ Base::visitBitCastInst(BC);
+ handleAlias(BC);
+ }
+
+ void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ Base::visitAddrSpaceCastInst(ASC);
+ handleAlias(ASC);
+ }
+
+ void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ // The base visitor will adjust Offset accordingly.
+ Base::visitGetElementPtrInst(GEPI);
+ handleAlias(GEPI);
+ }
+
+ void visitCallBase(CallBase &CB) {
+ for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)
+ if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
+ PI.setEscaped(&CB);
+ handleMayWrite(CB);
+ }
+
+ bool getShouldLiveOnFrame() const {
+ if (!ShouldLiveOnFrame)
+ ShouldLiveOnFrame = computeShouldLiveOnFrame();
+ return ShouldLiveOnFrame.getValue();
+ }
+
+ bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; }
+
+ DenseMap<Instruction *, llvm::Optional<APInt>> getAliasesCopy() const {
+ assert(getShouldLiveOnFrame() && "This method should only be called if the "
+ "alloca needs to live on the frame.");
+ for (const auto &P : AliasOffetMap)
+ if (!P.second)
+ report_fatal_error("Unable to handle an alias with unknown offset "
+ "created before CoroBegin.");
+ return AliasOffetMap;
+ }
private:
const DominatorTree &DT;
const CoroBeginInst &CoroBegin;
-};
-} // namespace
-static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT,
- const CoroBeginInst &CB) {
- const DataLayout &DL = A.getModule()->getDataLayout();
- AllocaUseVisitor Visitor(DL, DT, CB);
- auto PtrI = Visitor.visitPtr(A);
- if (PtrI.isEscaped() || PtrI.isAborted()) {
- auto *PointerEscapingInstr = PtrI.getEscapingInst()
- ? PtrI.getEscapingInst()
- : PtrI.getAbortingInst();
- if (PointerEscapingInstr) {
- LLVM_DEBUG(
- dbgs() << "AllocaInst copy was triggered by instruction: "
- << *PointerEscapingInstr << "\n");
+ const SuspendCrossingInfo &Checker;
+ // All alias to the original AllocaInst, created before CoroBegin and used
+ // after CoroBegin. Each entry contains the instruction and the offset in the
+ // original Alloca. They need to be recreated after CoroBegin off the frame.
+ DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{};
+ SmallPtrSet<BasicBlock *, 2> UserBBs{};
+ bool MayWriteBeforeCoroBegin{false};
+
+ mutable llvm::Optional<bool> ShouldLiveOnFrame{};
+
+ bool computeShouldLiveOnFrame() const {
+ if (PI.isEscaped())
+ return true;
+
+ for (auto *BB1 : UserBBs)
+ for (auto *BB2 : UserBBs)
+ if (Checker.hasPathCrossingSuspendPoint(BB1, BB2))
+ return true;
+
+ return false;
+ }
+
+ void handleMayWrite(const Instruction &I) {
+ if (!DT.dominates(&CoroBegin, &I))
+ MayWriteBeforeCoroBegin = true;
+ }
+
+ bool usedAfterCoroBegin(Instruction &I) {
+ for (auto &U : I.uses())
+ if (DT.dominates(&CoroBegin, U))
+ return true;
+ return false;
+ }
+
+ void handleAlias(Instruction &I) {
+ // We track all aliases created prior to CoroBegin but used after.
+ // These aliases may need to be recreated after CoroBegin if the alloca
+ // need to live on the frame.
+ if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I))
+ return;
+
+ if (!IsOffsetKnown) {
+ AliasOffetMap[&I].reset();
+ } else {
+ auto Itr = AliasOffetMap.find(&I);
+ if (Itr == AliasOffetMap.end()) {
+ AliasOffetMap[&I] = Offset;
+ } else if (Itr->second.hasValue() && Itr->second.getValue() != Offset) {
+ // If we have seen two different possible values for this alias, we set
+ // it to empty.
+ AliasOffetMap[&I].reset();
+ }
}
- return true;
}
- return false;
-}
+};
+} // namespace
// We need to make room to insert a spill after initial PHIs, but before
// catchswitch instruction. Placing it before violates the requirement that
@@ -720,7 +1100,8 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
// whatever
//
//
-static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
+static Instruction *insertSpills(const FrameDataInfo &FrameData,
+ coro::Shape &Shape) {
auto *CB = Shape.CoroBegin;
LLVMContext &C = CB->getContext();
IRBuilder<> Builder(CB->getNextNode());
@@ -729,32 +1110,13 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
auto *FramePtr =
cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
DominatorTree DT(*CB->getFunction());
-
- Value *CurrentValue = nullptr;
- BasicBlock *CurrentBlock = nullptr;
- Value *CurrentReload = nullptr;
-
- // Proper field number will be read from field definition.
- unsigned Index = InvalidFieldIndex;
-
- // We need to keep track of any allocas that need "spilling"
- // since they will live in the coroutine frame now, all access to them
- // need to be changed, not just the access across suspend points
- // we remember allocas and their indices to be handled once we processed
- // all the spills.
- SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
-
- // Promise alloca (if present) doesn't show in the spills and has a
- // special field number.
- if (auto *PromiseAlloca = Shape.getPromiseAlloca()) {
- assert(Shape.ABI == coro::ABI::Switch);
- Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField());
- }
+ SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
// Create a GEP with the given index into the coroutine frame for the original
// value Orig. Appends an extra 0 index for array-allocas, preserving the
// original type.
- auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * {
+ auto GetFramePointer = [&](Value *Orig) -> Value * {
+ FieldIDType Index = FrameData.getFieldIndex(Orig);
SmallVector<Value *, 3> Indices = {
ConstantInt::get(Type::getInt32Ty(C), 0),
ConstantInt::get(Type::getInt32Ty(C), Index),
@@ -771,147 +1133,160 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
}
}
- return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices);
- };
-
- // Create a load instruction to reload the spilled value from the coroutine
- // frame. Populates the Value pointer reference provided with the frame GEP.
- auto CreateReload = [&](Instruction *InsertBefore, Value *&G) {
- assert(Index != InvalidFieldIndex && "accessing unassigned field number");
- Builder.SetInsertPoint(InsertBefore);
-
- G = GetFramePointer(Index, CurrentValue);
- G->setName(CurrentValue->getName() + Twine(".reload.addr"));
-
- return isa<AllocaInst>(CurrentValue)
- ? G
- : Builder.CreateLoad(FrameTy->getElementType(Index), G,
- CurrentValue->getName() + Twine(".reload"));
+ auto GEP = cast<GetElementPtrInst>(
+ Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
+ if (isa<AllocaInst>(Orig)) {
+ // If the type of GEP is not equal to the type of AllocaInst, it implies
+ // that the AllocaInst may be reused in the Frame slot of other
+ // AllocaInst. So We cast GEP to the AllocaInst here to re-use
+ // the Frame storage.
+ //
+ // Note: If we change the strategy dealing with alignment, we need to refine
+ // this casting.
+ if (GEP->getResultElementType() != Orig->getType())
+ return Builder.CreateBitCast(GEP, Orig->getType(),
+ Orig->getName() + Twine(".cast"));
+ }
+ return GEP;
};
- Value *GEP = nullptr, *CurrentGEP = nullptr;
- for (auto const &E : Spills) {
- // If we have not seen the value, generate a spill.
- if (CurrentValue != E.def()) {
- CurrentValue = E.def();
- CurrentBlock = nullptr;
- CurrentReload = nullptr;
-
- Index = E.fieldIndex();
-
- if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) {
- // Spilled AllocaInst will be replaced with GEP from the coroutine frame
- // there is no spill required.
- Allocas.emplace_back(AI, Index);
- if (!AI->isStaticAlloca())
- report_fatal_error("Coroutines cannot handle non static allocas yet");
+ for (auto const &E : FrameData.Spills) {
+ Value *Def = E.first;
+ // Create a store instruction storing the value into the
+ // coroutine frame.
+ Instruction *InsertPt = nullptr;
+ bool NeedToCopyArgPtrValue = false;
+ if (auto *Arg = dyn_cast<Argument>(Def)) {
+ // For arguments, we will place the store instruction right after
+ // the coroutine frame pointer instruction, i.e. bitcast of
+ // coro.begin from i8* to %f.frame*.
+ InsertPt = FramePtr->getNextNode();
+
+ // If we're spilling an Argument, make sure we clear 'nocapture'
+ // from the coroutine function.
+ Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
+
+ if (Arg->hasByValAttr())
+ NeedToCopyArgPtrValue = true;
+
+ } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
+ // Don't spill immediately after a suspend; splitting assumes
+ // that the suspend will be followed by a branch.
+ InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
+ } else {
+ auto *I = cast<Instruction>(Def);
+ if (!DT.dominates(CB, I)) {
+ // If it is not dominated by CoroBegin, then spill should be
+ // inserted immediately after CoroFrame is computed.
+ InsertPt = FramePtr->getNextNode();
+ } else if (auto *II = dyn_cast<InvokeInst>(I)) {
+ // If we are spilling the result of the invoke instruction, split
+ // the normal edge and insert the spill in the new block.
+ auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
+ InsertPt = NewBB->getTerminator();
+ } else if (isa<PHINode>(I)) {
+ // Skip the PHINodes and EH pads instructions.
+ BasicBlock *DefBlock = I->getParent();
+ if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
+ InsertPt = splitBeforeCatchSwitch(CSI);
+ else
+ InsertPt = &*DefBlock->getFirstInsertionPt();
} else {
- // Otherwise, create a store instruction storing the value into the
- // coroutine frame.
-
- Instruction *InsertPt = nullptr;
- if (auto Arg = dyn_cast<Argument>(CurrentValue)) {
- // For arguments, we will place the store instruction right after
- // the coroutine frame pointer instruction, i.e. bitcast of
- // coro.begin from i8* to %f.frame*.
- InsertPt = FramePtr->getNextNode();
-
- // If we're spilling an Argument, make sure we clear 'nocapture'
- // from the coroutine function.
- Arg->getParent()->removeParamAttr(Arg->getArgNo(),
- Attribute::NoCapture);
-
- } else if (auto *II = dyn_cast<InvokeInst>(CurrentValue)) {
- // If we are spilling the result of the invoke instruction, split the
- // normal edge and insert the spill in the new block.
- auto NewBB = SplitEdge(II->getParent(), II->getNormalDest());
- InsertPt = NewBB->getTerminator();
- } else if (isa<PHINode>(CurrentValue)) {
- // Skip the PHINodes and EH pads instructions.
- BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent();
- if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
- InsertPt = splitBeforeCatchSwitch(CSI);
- else
- InsertPt = &*DefBlock->getFirstInsertionPt();
- } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) {
- // Don't spill immediately after a suspend; splitting assumes
- // that the suspend will be followed by a branch.
- InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
- } else {
- auto *I = cast<Instruction>(E.def());
- assert(!I->isTerminator() && "unexpected terminator");
- // For all other values, the spill is placed immediately after
- // the definition.
- if (DT.dominates(CB, I)) {
- InsertPt = I->getNextNode();
- } else {
- // Unless, it is not dominated by CoroBegin, then it will be
- // inserted immediately after CoroFrame is computed.
- InsertPt = FramePtr->getNextNode();
- }
- }
-
- Builder.SetInsertPoint(InsertPt);
- auto *G = Builder.CreateConstInBoundsGEP2_32(
- FrameTy, FramePtr, 0, Index,
- CurrentValue->getName() + Twine(".spill.addr"));
- Builder.CreateStore(CurrentValue, G);
+ assert(!I->isTerminator() && "unexpected terminator");
+ // For all other values, the spill is placed immediately after
+ // the definition.
+ InsertPt = I->getNextNode();
}
}
- // If we have not seen the use block, generate a reload in it.
- if (CurrentBlock != E.userBlock()) {
- CurrentBlock = E.userBlock();
- CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP);
+ auto Index = FrameData.getFieldIndex(Def);
+ Builder.SetInsertPoint(InsertPt);
+ auto *G = Builder.CreateConstInBoundsGEP2_32(
+ FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
+ if (NeedToCopyArgPtrValue) {
+ // For byval arguments, we need to store the pointed value in the frame,
+ // instead of the pointer itself.
+ auto *Value =
+ Builder.CreateLoad(Def->getType()->getPointerElementType(), Def);
+ Builder.CreateStore(Value, G);
+ } else {
+ Builder.CreateStore(Def, G);
}
- // If we have a single edge PHINode, remove it and replace it with a reload
- // from the coroutine frame. (We already took care of multi edge PHINodes
- // by rewriting them in the rewritePHIs function).
- if (auto *PN = dyn_cast<PHINode>(E.user())) {
- assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
- "values in the PHINode");
- PN->replaceAllUsesWith(CurrentReload);
- PN->eraseFromParent();
- continue;
- }
+ BasicBlock *CurrentBlock = nullptr;
+ Value *CurrentReload = nullptr;
+ for (auto *U : E.second) {
+ // If we have not seen the use block, create a load instruction to reload
+ // the spilled value from the coroutine frame. Populates the Value pointer
+ // reference provided with the frame GEP.
+ if (CurrentBlock != U->getParent()) {
+ CurrentBlock = U->getParent();
+ Builder.SetInsertPoint(&*CurrentBlock->getFirstInsertionPt());
+
+ auto *GEP = GetFramePointer(E.first);
+ GEP->setName(E.first->getName() + Twine(".reload.addr"));
+ if (NeedToCopyArgPtrValue)
+ CurrentReload = GEP;
+ else
+ CurrentReload = Builder.CreateLoad(
+ FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
+ E.first->getName() + Twine(".reload"));
+
+ TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
+ for (DbgDeclareInst *DDI : DIs) {
+ bool AllowUnresolved = false;
+ // This dbg.declare is preserved for all coro-split function
+ // fragments. It will be unreachable in the main function, and
+ // processed by coro::salvageDebugInfo() by CoroCloner.
+ DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
+ .insertDeclare(CurrentReload, DDI->getVariable(),
+ DDI->getExpression(), DDI->getDebugLoc(),
+ &*Builder.GetInsertPoint());
+ // This dbg.declare is for the main function entry point. It
+ // will be deleted in all coro-split functions.
+ coro::salvageDebugInfo(DbgPtrAllocaCache, DDI);
+ }
+ }
- // If we have not seen this GEP instruction, migrate any dbg.declare from
- // the alloca to it.
- if (CurrentGEP != GEP) {
- CurrentGEP = GEP;
- TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue);
- if (!DIs.empty())
- DIBuilder(*CurrentBlock->getParent()->getParent(),
- /*AllowUnresolved*/ false)
- .insertDeclare(CurrentGEP, DIs.front()->getVariable(),
- DIs.front()->getExpression(),
- DIs.front()->getDebugLoc(), DIs.front());
- }
+ // If we have a single edge PHINode, remove it and replace it with a
+ // reload from the coroutine frame. (We already took care of multi edge
+ // PHINodes by rewriting them in the rewritePHIs function).
+ if (auto *PN = dyn_cast<PHINode>(U)) {
+ assert(PN->getNumIncomingValues() == 1 &&
+ "unexpected number of incoming "
+ "values in the PHINode");
+ PN->replaceAllUsesWith(CurrentReload);
+ PN->eraseFromParent();
+ continue;
+ }
- // Replace all uses of CurrentValue in the current instruction with reload.
- E.user()->replaceUsesOfWith(CurrentValue, CurrentReload);
+ // Replace all uses of CurrentValue in the current instruction with
+ // reload.
+ U->replaceUsesOfWith(Def, CurrentReload);
+ }
}
BasicBlock *FramePtrBB = FramePtr->getParent();
auto SpillBlock =
- FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
+ FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
Shape.AllocaSpillBlock = SpillBlock;
// retcon and retcon.once lowering assumes all uses have been sunk.
- if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) {
+ if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
+ Shape.ABI == coro::ABI::Async) {
// If we found any allocas, replace all of their remaining uses with Geps.
Builder.SetInsertPoint(&SpillBlock->front());
- for (auto &P : Allocas) {
- auto *G = GetFramePointer(P.second, P.first);
+ for (const auto &P : FrameData.Allocas) {
+ AllocaInst *Alloca = P.Alloca;
+ auto *G = GetFramePointer(Alloca);
// We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
// here, as we are changing location of the instruction.
- G->takeName(P.first);
- P.first->replaceAllUsesWith(G);
- P.first->eraseFromParent();
+ G->takeName(Alloca);
+ Alloca->replaceAllUsesWith(G);
+ Alloca->eraseFromParent();
}
return FramePtr;
}
@@ -921,49 +1296,63 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
// we also delete the original dbg.declare and replace other uses with undef.
// Note: We cannot replace the alloca with GEP instructions indiscriminately,
// as some of the uses may not be dominated by CoroBegin.
- bool MightNeedToCopy = false;
Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
SmallVector<Instruction *, 4> UsersToUpdate;
- for (auto &P : Allocas) {
- AllocaInst *const A = P.first;
-
- for (auto *DI : FindDbgDeclareUses(A))
- DI->eraseFromParent();
- replaceDbgUsesWithUndef(A);
-
+ for (const auto &A : FrameData.Allocas) {
+ AllocaInst *Alloca = A.Alloca;
UsersToUpdate.clear();
- for (User *U : A->users()) {
+ for (User *U : Alloca->users()) {
auto *I = cast<Instruction>(U);
if (DT.dominates(CB, I))
UsersToUpdate.push_back(I);
- else
- MightNeedToCopy = true;
- }
- if (!UsersToUpdate.empty()) {
- auto *G = GetFramePointer(P.second, A);
- G->takeName(A);
- for (Instruction *I : UsersToUpdate)
- I->replaceUsesOfWith(A, G);
}
- }
- // If we discovered such uses not dominated by CoroBegin, see if any of them
- // preceed coro begin and have instructions that can modify the
- // value of the alloca and therefore would require a copying the value into
- // the spill slot in the coroutine frame.
- if (MightNeedToCopy) {
- Builder.SetInsertPoint(FramePtr->getNextNode());
-
- for (auto &P : Allocas) {
- AllocaInst *const A = P.first;
- if (mightWriteIntoAllocaPtr(*A, DT, *CB)) {
- if (A->isArrayAllocation())
- report_fatal_error(
- "Coroutines cannot handle copying of array allocas yet");
+ if (UsersToUpdate.empty())
+ continue;
+ auto *G = GetFramePointer(Alloca);
+ G->setName(Alloca->getName() + Twine(".reload.addr"));
+
+ SmallPtrSet<BasicBlock *, 4> SeenDbgBBs;
+ TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Alloca);
+ if (!DIs.empty())
+ DIBuilder(*Alloca->getModule(),
+ /*AllowUnresolved*/ false)
+ .insertDeclare(G, DIs.front()->getVariable(),
+ DIs.front()->getExpression(),
+ DIs.front()->getDebugLoc(), DIs.front());
+ for (auto *DI : FindDbgDeclareUses(Alloca))
+ DI->eraseFromParent();
+ replaceDbgUsesWithUndef(Alloca);
- auto *G = GetFramePointer(P.second, A);
- auto *Value = Builder.CreateLoad(A->getAllocatedType(), A);
- Builder.CreateStore(Value, G);
- }
+ for (Instruction *I : UsersToUpdate)
+ I->replaceUsesOfWith(Alloca, G);
+ }
+ Builder.SetInsertPoint(FramePtr->getNextNode());
+ for (const auto &A : FrameData.Allocas) {
+ AllocaInst *Alloca = A.Alloca;
+ if (A.MayWriteBeforeCoroBegin) {
+ // isEscaped really means potentially modified before CoroBegin.
+ if (Alloca->isArrayAllocation())
+ report_fatal_error(
+ "Coroutines cannot handle copying of array allocas yet");
+
+ auto *G = GetFramePointer(Alloca);
+ auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca);
+ Builder.CreateStore(Value, G);
+ }
+ // For each alias to Alloca created before CoroBegin but used after
+ // CoroBegin, we recreate them after CoroBegin by appplying the offset
+ // to the pointer in the frame.
+ for (const auto &Alias : A.Aliases) {
+ auto *FramePtr = GetFramePointer(Alloca);
+ auto *FramePtrRaw =
+ Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
+ auto *AliasPtr = Builder.CreateGEP(
+ FramePtrRaw,
+ ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue()));
+ auto *AliasPtrTyped =
+ Builder.CreateBitCast(AliasPtr, Alias.first->getType());
+ Alias.first->replaceUsesWithIf(
+ AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); });
}
}
return FramePtr;
@@ -984,15 +1373,14 @@ static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
// Replaces all uses of OldPred with the NewPred block in all PHINodes in a
// block.
static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
- BasicBlock *NewPred,
- PHINode *LandingPadReplacement) {
+ BasicBlock *NewPred, PHINode *Until = nullptr) {
unsigned BBIdx = 0;
for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
// We manually update the LandingPadReplacement PHINode and it is the last
// PHI Node. So, if we find it, we are done.
- if (LandingPadReplacement == PN)
+ if (Until == PN)
break;
// Reuse the previous value of BBIdx if it lines up. In cases where we
@@ -1041,6 +1429,101 @@ static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
return NewBB;
}
+// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
+// PHI in InsertedBB.
+static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
+ BasicBlock *InsertedBB,
+ BasicBlock *PredBB,
+ PHINode *UntilPHI = nullptr) {
+ auto *PN = cast<PHINode>(&SuccBB->front());
+ do {
+ int Index = PN->getBasicBlockIndex(InsertedBB);
+ Value *V = PN->getIncomingValue(Index);
+ PHINode *InputV = PHINode::Create(
+ V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(),
+ &InsertedBB->front());
+ InputV->addIncoming(V, PredBB);
+ PN->setIncomingValue(Index, InputV);
+ PN = dyn_cast<PHINode>(PN->getNextNode());
+ } while (PN != UntilPHI);
+}
+
+// Rewrites the PHI Nodes in a cleanuppad.
+static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
+ CleanupPadInst *CleanupPad) {
+ // For every incoming edge to a CleanupPad we will create a new block holding
+ // all incoming values in single-value PHI nodes. We will then create another
+ // block to act as a dispather (as all unwind edges for related EH blocks
+ // must be the same).
+ //
+ // cleanuppad:
+ // %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
+ // %3 = cleanuppad within none []
+ //
+ // It will create:
+ //
+ // cleanuppad.corodispatch
+ // %2 = phi i8[0, %catchswitch], [1, %catch.1]
+ // %3 = cleanuppad within none []
+ // switch i8 % 2, label %unreachable
+ // [i8 0, label %cleanuppad.from.catchswitch
+ // i8 1, label %cleanuppad.from.catch.1]
+ // cleanuppad.from.catchswitch:
+ // %4 = phi i32 [%0, %catchswitch]
+ // br %label cleanuppad
+ // cleanuppad.from.catch.1:
+ // %6 = phi i32 [%1, %catch.1]
+ // br %label cleanuppad
+ // cleanuppad:
+ // %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
+ // [%6, %cleanuppad.from.catch.1]
+
+ // Unreachable BB, in case switching on an invalid value in the dispatcher.
+ auto *UnreachBB = BasicBlock::Create(
+ CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
+ IRBuilder<> Builder(UnreachBB);
+ Builder.CreateUnreachable();
+
+ // Create a new cleanuppad which will be the dispatcher.
+ auto *NewCleanupPadBB =
+ BasicBlock::Create(CleanupPadBB->getContext(),
+ CleanupPadBB->getName() + Twine(".corodispatch"),
+ CleanupPadBB->getParent(), CleanupPadBB);
+ Builder.SetInsertPoint(NewCleanupPadBB);
+ auto *SwitchType = Builder.getInt8Ty();
+ auto *SetDispatchValuePN =
+ Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
+ CleanupPad->removeFromParent();
+ CleanupPad->insertAfter(SetDispatchValuePN);
+ auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
+ pred_size(CleanupPadBB));
+
+ int SwitchIndex = 0;
+ SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB));
+ for (BasicBlock *Pred : Preds) {
+ // Create a new cleanuppad and move the PHI values to there.
+ auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
+ CleanupPadBB->getName() +
+ Twine(".from.") + Pred->getName(),
+ CleanupPadBB->getParent(), CleanupPadBB);
+ updatePhiNodes(CleanupPadBB, Pred, CaseBB);
+ CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
+ Pred->getName());
+ Builder.SetInsertPoint(CaseBB);
+ Builder.CreateBr(CleanupPadBB);
+ movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
+
+ // Update this Pred to the new unwind point.
+ setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
+
+ // Setup the switch in the dispatcher.
+ auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
+ SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
+ SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
+ SwitchIndex++;
+ }
+}
+
static void rewritePHIs(BasicBlock &BB) {
// For every incoming edge we will create a block holding all
// incoming values in a single PHI nodes.
@@ -1063,6 +1546,24 @@ static void rewritePHIs(BasicBlock &BB) {
// TODO: Simplify PHINodes in the basic block to remove duplicate
// predecessors.
+ // Special case for CleanupPad: all EH blocks must have the same unwind edge
+ // so we need to create an additional "dispatcher" block.
+ if (auto *CleanupPad =
+ dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
+ SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
+ for (BasicBlock *Pred : Preds) {
+ if (CatchSwitchInst *CS =
+ dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
+ // CleanupPad with a CatchSwitch predecessor: therefore this is an
+ // unwind destination that needs to be handle specially.
+ assert(CS->getUnwindDest() == &BB);
+ (void)CS;
+ rewritePHIsForCleanupPad(&BB, CleanupPad);
+ return;
+ }
+ }
+ }
+
LandingPadInst *LandingPad = nullptr;
PHINode *ReplPHI = nullptr;
if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
@@ -1076,22 +1577,14 @@ static void rewritePHIs(BasicBlock &BB) {
// ehAwareSplitEdge cloned it in the transition blocks.
}
- SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
+ SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
for (BasicBlock *Pred : Preds) {
auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
- auto *PN = cast<PHINode>(&BB.front());
- do {
- int Index = PN->getBasicBlockIndex(IncomingBB);
- Value *V = PN->getIncomingValue(Index);
- PHINode *InputV = PHINode::Create(
- V->getType(), 1, V->getName() + Twine(".") + BB.getName(),
- &IncomingBB->front());
- InputV->addIncoming(V, Pred);
- PN->setIncomingValue(Index, InputV);
- PN = dyn_cast<PHINode>(PN->getNextNode());
- } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced
- // the landing pad.
+
+ // Stop the moving of values at ReplPHI, as this is either null or the PHI
+ // that replaced the landing pad.
+ movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
}
if (LandingPad) {
@@ -1130,39 +1623,32 @@ static bool isCoroutineStructureIntrinsic(Instruction &I) {
// For every use of the value that is across suspend point, recreate that value
// after a suspend point.
static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
- SpillInfo const &Spills) {
- BasicBlock *CurrentBlock = nullptr;
- Instruction *CurrentMaterialization = nullptr;
- Instruction *CurrentDef = nullptr;
-
- for (auto const &E : Spills) {
- // If it is a new definition, update CurrentXXX variables.
- if (CurrentDef != E.def()) {
- CurrentDef = cast<Instruction>(E.def());
- CurrentBlock = nullptr;
- CurrentMaterialization = nullptr;
- }
-
- // If we have not seen this block, materialize the value.
- if (CurrentBlock != E.userBlock()) {
- CurrentBlock = E.userBlock();
- CurrentMaterialization = cast<Instruction>(CurrentDef)->clone();
- CurrentMaterialization->setName(CurrentDef->getName());
- CurrentMaterialization->insertBefore(
- &*CurrentBlock->getFirstInsertionPt());
- }
-
- if (auto *PN = dyn_cast<PHINode>(E.user())) {
- assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
- "values in the PHINode");
- PN->replaceAllUsesWith(CurrentMaterialization);
- PN->eraseFromParent();
- continue;
+ const SpillInfo &Spills) {
+ for (const auto &E : Spills) {
+ Value *Def = E.first;
+ BasicBlock *CurrentBlock = nullptr;
+ Instruction *CurrentMaterialization = nullptr;
+ for (Instruction *U : E.second) {
+ // If we have not seen this block, materialize the value.
+ if (CurrentBlock != U->getParent()) {
+ CurrentBlock = U->getParent();
+ CurrentMaterialization = cast<Instruction>(Def)->clone();
+ CurrentMaterialization->setName(Def->getName());
+ CurrentMaterialization->insertBefore(
+ &*CurrentBlock->getFirstInsertionPt());
+ }
+ if (auto *PN = dyn_cast<PHINode>(U)) {
+ assert(PN->getNumIncomingValues() == 1 &&
+ "unexpected number of incoming "
+ "values in the PHINode");
+ PN->replaceAllUsesWith(CurrentMaterialization);
+ PN->eraseFromParent();
+ continue;
+ }
+ // Replace all uses of Def in the current instruction with the
+ // CurrentMaterialization for the block.
+ U->replaceUsesOfWith(Def, CurrentMaterialization);
}
-
- // Replace all uses of CurrentDef in the current instruction with the
- // CurrentMaterialization for the block.
- E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization);
}
}
@@ -1501,7 +1987,8 @@ static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
/// retcon and retcon.once conventions assume that all spill uses can be sunk
/// after the coro.begin intrinsic.
-static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills,
+static void sinkSpillUsesAfterCoroBegin(Function &F,
+ const FrameDataInfo &FrameData,
CoroBeginInst *CoroBegin) {
DominatorTree Dom(F);
@@ -1509,9 +1996,8 @@ static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills,
SmallVector<Instruction *, 32> Worklist;
// Collect all users that precede coro.begin.
- for (auto const &Entry : Spills) {
- auto *SpillDef = Entry.def();
- for (User *U : SpillDef->users()) {
+ for (auto *Def : FrameData.getAllDefs()) {
+ for (User *U : Def->users()) {
auto Inst = cast<Instruction>(U);
if (Inst->getParent() != CoroBegin->getParent() ||
Dom.dominates(CoroBegin, Inst))
@@ -1522,8 +2008,7 @@ static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills,
}
// Recursively collect users before coro.begin.
while (!Worklist.empty()) {
- auto *Def = Worklist.back();
- Worklist.pop_back();
+ auto *Def = Worklist.pop_back_val();
for (User *U : Def->users()) {
auto Inst = cast<Instruction>(U);
if (Dom.dominates(CoroBegin, Inst))
@@ -1535,17 +2020,14 @@ static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills,
// Sort by dominance.
SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
- std::sort(InsertionList.begin(), InsertionList.end(),
- [&Dom](Instruction *A, Instruction *B) -> bool {
- // If a dominates b it should preceed (<) b.
- return Dom.dominates(A, B);
- });
+ llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool {
+ // If a dominates b it should preceed (<) b.
+ return Dom.dominates(A, B);
+ });
Instruction *InsertPt = CoroBegin->getNextNode();
for (Instruction *Inst : InsertionList)
Inst->moveBefore(InsertPt);
-
- return;
}
/// For each local variable that all of its user are only used inside one of
@@ -1567,56 +2049,186 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
}
for (Instruction &I : instructions(F)) {
- if (!isa<AllocaInst>(&I))
+ AllocaInst* AI = dyn_cast<AllocaInst>(&I);
+ if (!AI)
continue;
for (BasicBlock *DomBB : DomSet) {
bool Valid = true;
- SmallVector<Instruction *, 1> BCInsts;
+ SmallVector<Instruction *, 1> Lifetimes;
- auto isUsedByLifetimeStart = [&](Instruction *I) {
- if (isa<BitCastInst>(I) && I->hasOneUse())
- if (auto *IT = dyn_cast<IntrinsicInst>(I->user_back()))
- return IT->getIntrinsicID() == Intrinsic::lifetime_start;
+ auto isLifetimeStart = [](Instruction* I) {
+ if (auto* II = dyn_cast<IntrinsicInst>(I))
+ return II->getIntrinsicID() == Intrinsic::lifetime_start;
return false;
};
- for (User *U : I.users()) {
+ auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
+ if (isLifetimeStart(U)) {
+ Lifetimes.push_back(U);
+ return true;
+ }
+ if (!U->hasOneUse() || U->stripPointerCasts() != AI)
+ return false;
+ if (isLifetimeStart(U->user_back())) {
+ Lifetimes.push_back(U->user_back());
+ return true;
+ }
+ return false;
+ };
+
+ for (User *U : AI->users()) {
Instruction *UI = cast<Instruction>(U);
// For all users except lifetime.start markers, if they are all
// dominated by one of the basic blocks and do not cross
// suspend points as well, then there is no need to spill the
// instruction.
if (!DT.dominates(DomBB, UI->getParent()) ||
- Checker.isDefinitionAcrossSuspend(DomBB, U)) {
- // Skip bitcast used by lifetime.start markers.
- if (isUsedByLifetimeStart(UI)) {
- BCInsts.push_back(UI);
+ Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
+ // Skip lifetime.start, GEP and bitcast used by lifetime.start
+ // markers.
+ if (collectLifetimeStart(UI, AI))
continue;
- }
Valid = false;
break;
}
}
// Sink lifetime.start markers to dominate block when they are
// only used outside the region.
- if (Valid && BCInsts.size() != 0) {
- auto *NewBitcast = BCInsts[0]->clone();
- auto *NewLifetime = cast<Instruction>(BCInsts[0]->user_back())->clone();
- NewLifetime->replaceUsesOfWith(BCInsts[0], NewBitcast);
- NewBitcast->insertBefore(DomBB->getTerminator());
+ if (Valid && Lifetimes.size() != 0) {
+ // May be AI itself, when the type of AI is i8*
+ auto *NewBitCast = [&](AllocaInst *AI) -> Value* {
+ if (isa<AllocaInst>(Lifetimes[0]->getOperand(1)))
+ return AI;
+ auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext());
+ return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "",
+ DomBB->getTerminator());
+ }(AI);
+
+ auto *NewLifetime = Lifetimes[0]->clone();
+ NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast);
NewLifetime->insertBefore(DomBB->getTerminator());
// All the outsided lifetime.start markers are no longer necessary.
- for (Instruction *S : BCInsts) {
- S->user_back()->eraseFromParent();
- }
+ for (Instruction *S : Lifetimes)
+ S->eraseFromParent();
+
break;
}
}
}
}
+static void collectFrameAllocas(Function &F, coro::Shape &Shape,
+ const SuspendCrossingInfo &Checker,
+ SmallVectorImpl<AllocaInfo> &Allocas) {
+ // Collect lifetime.start info for each alloca.
+ using LifetimeStart = SmallPtrSet<Instruction *, 2>;
+ llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap;
+ for (Instruction &I : instructions(F)) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)
+ continue;
+
+ if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) {
+ if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) {
+
+ if (LifetimeMap.find(AI) == LifetimeMap.end())
+ LifetimeMap[AI] = std::make_unique<LifetimeStart>();
+ LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst);
+ }
+ }
+ }
+
+ for (Instruction &I : instructions(F)) {
+ auto *AI = dyn_cast<AllocaInst>(&I);
+ if (!AI)
+ continue;
+ // The PromiseAlloca will be specially handled since it needs to be in a
+ // fixed position in the frame.
+ if (AI == Shape.SwitchLowering.PromiseAlloca) {
+ continue;
+ }
+ bool ShouldLiveOnFrame = false;
+ auto Iter = LifetimeMap.find(AI);
+ if (Iter != LifetimeMap.end()) {
+ // Check against lifetime.start if the instruction has the info.
+ for (User *U : I.users()) {
+ for (auto *S : *Iter->second)
+ if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U)))
+ break;
+ if (ShouldLiveOnFrame)
+ break;
+ }
+ if (!ShouldLiveOnFrame)
+ continue;
+ }
+ // At this point, either ShouldLiveOnFrame is true or we didn't have
+ // lifetime information. We will need to rely on more precise pointer
+ // tracking.
+ DominatorTree DT(F);
+ AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,
+ *Shape.CoroBegin, Checker};
+ Visitor.visitPtr(*AI);
+ if (!Visitor.getShouldLiveOnFrame())
+ continue;
+ Allocas.emplace_back(AI, Visitor.getAliasesCopy(),
+ Visitor.getMayWriteBeforeCoroBegin());
+ }
+}
+
+void coro::salvageDebugInfo(
+ SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
+ DbgDeclareInst *DDI, bool LoadFromFramePtr) {
+ Function *F = DDI->getFunction();
+ IRBuilder<> Builder(F->getContext());
+ auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
+ while (isa<IntrinsicInst>(InsertPt))
+ ++InsertPt;
+ Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
+ DIExpression *Expr = DDI->getExpression();
+ // Follow the pointer arithmetic all the way to the incoming
+ // function argument and convert into a DIExpression.
+ Value *Storage = DDI->getAddress();
+ while (Storage) {
+ if (auto *LdInst = dyn_cast<LoadInst>(Storage)) {
+ Storage = LdInst->getOperand(0);
+ } else if (auto *StInst = dyn_cast<StoreInst>(Storage)) {
+ Storage = StInst->getOperand(0);
+ } else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) {
+ Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr,
+ /*WithStackValue=*/false);
+ Storage = GEPInst->getOperand(0);
+ } else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage))
+ Storage = BCInst->getOperand(0);
+ else
+ break;
+ }
+ // Store a pointer to the coroutine frame object in an alloca so it
+ // is available throughout the function when producing unoptimized
+ // code. Extending the lifetime this way is correct because the
+ // variable has been declared by a dbg.declare intrinsic.
+ if (auto Arg = dyn_cast_or_null<llvm::Argument>(Storage)) {
+ auto &Cached = DbgPtrAllocaCache[Storage];
+ if (!Cached) {
+ Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
+ Arg->getName() + ".debug");
+ Builder.CreateStore(Storage, Cached);
+ }
+ Storage = Cached;
+ }
+ // The FramePtr object adds one extra layer of indirection that
+ // needs to be unwrapped.
+ if (LoadFromFramePtr)
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ auto &VMContext = DDI->getFunction()->getContext();
+ DDI->setOperand(
+ 0, MetadataAsValue::get(VMContext, ValueAsMetadata::get(Storage)));
+ DDI->setOperand(2, MetadataAsValue::get(VMContext, Expr));
+ if (auto *InsertPt = dyn_cast_or_null<Instruction>(Storage))
+ DDI->moveAfter(InsertPt);
+}
+
void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
eliminateSwiftError(F, Shape);
@@ -1635,9 +2247,26 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
}
// Put CoroEnds into their own blocks.
- for (CoroEndInst *CE : Shape.CoroEnds)
+ for (AnyCoroEndInst *CE : Shape.CoroEnds) {
splitAround(CE, "CoroEnd");
+ // Emit the musttail call function in a new block before the CoroEnd.
+ // We do this here so that the right suspend crossing info is computed for
+ // the uses of the musttail call function call. (Arguments to the coro.end
+ // instructions would be ignored)
+ if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) {
+ auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
+ if (!MustTailCallFn)
+ continue;
+ IRBuilder<> Builder(AsyncEnd);
+ SmallVector<Value *, 8> Args(AsyncEnd->args());
+ auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
+ auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn,
+ Arguments, Builder);
+ splitAround(Call, "MustTailCall.Before.CoroEnd");
+ }
+ }
+
// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
// never has its definition separated from the PHI by the suspend point.
rewritePHIs(F);
@@ -1646,51 +2275,40 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
SuspendCrossingInfo Checker(F, Shape);
IRBuilder<> Builder(F.getContext());
- SpillInfo Spills;
+ FrameDataInfo FrameData;
SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
SmallVector<Instruction*, 4> DeadInstructions;
- for (int Repeat = 0; Repeat < 4; ++Repeat) {
- // See if there are materializable instructions across suspend points.
- for (Instruction &I : instructions(F))
- if (materializable(I))
- for (User *U : I.users())
- if (Checker.isDefinitionAcrossSuspend(I, U))
- Spills.emplace_back(&I, U);
-
- if (Spills.empty())
- break;
+ {
+ SpillInfo Spills;
+ for (int Repeat = 0; Repeat < 4; ++Repeat) {
+ // See if there are materializable instructions across suspend points.
+ for (Instruction &I : instructions(F))
+ if (materializable(I))
+ for (User *U : I.users())
+ if (Checker.isDefinitionAcrossSuspend(I, U))
+ Spills[&I].push_back(cast<Instruction>(U));
+
+ if (Spills.empty())
+ break;
- // Rewrite materializable instructions to be materialized at the use point.
- LLVM_DEBUG(dump("Materializations", Spills));
- rewriteMaterializableInstructions(Builder, Spills);
- Spills.clear();
+ // Rewrite materializable instructions to be materialized at the use
+ // point.
+ LLVM_DEBUG(dumpSpills("Materializations", Spills));
+ rewriteMaterializableInstructions(Builder, Spills);
+ Spills.clear();
+ }
}
sinkLifetimeStartMarkers(F, Shape, Checker);
- // Collect lifetime.start info for each alloca.
- using LifetimeStart = SmallPtrSet<Instruction *, 2>;
- llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap;
- for (Instruction &I : instructions(F)) {
- auto *II = dyn_cast<IntrinsicInst>(&I);
- if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)
- continue;
-
- if (auto *OpInst = dyn_cast<BitCastInst>(I.getOperand(1)))
- if (auto *AI = dyn_cast<AllocaInst>(OpInst->getOperand(0))) {
-
- if (LifetimeMap.find(AI) == LifetimeMap.end())
- LifetimeMap[AI] = std::make_unique<LifetimeStart>();
-
- LifetimeMap[AI]->insert(OpInst);
- }
- }
+ collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);
+ LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
// Collect the spills for arguments and other not-materializable values.
for (Argument &A : F.args())
for (User *U : A.users())
if (Checker.isDefinitionAcrossSuspend(A, U))
- Spills.emplace_back(&A, U);
+ FrameData.Spills[&A].push_back(cast<Instruction>(U));
for (Instruction &I : instructions(F)) {
// Values returned from coroutine structure intrinsics should not be part
@@ -1721,43 +2339,33 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
for (User *U : Alloc->users()) {
if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
- Spills.emplace_back(Alloc, U);
+ FrameData.Spills[Alloc].push_back(cast<Instruction>(U));
}
continue;
}
// Ignore alloca.get; we process this as part of coro.alloca.alloc.
- if (isa<CoroAllocaGetInst>(I)) {
+ if (isa<CoroAllocaGetInst>(I))
continue;
- }
- auto Iter = LifetimeMap.find(&I);
- for (User *U : I.users()) {
- bool NeedSpill = false;
-
- // Check against lifetime.start if the instruction has the info.
- if (Iter != LifetimeMap.end())
- for (auto *S : *Iter->second) {
- if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U)))
- break;
- }
- else
- NeedSpill = Checker.isDefinitionAcrossSuspend(I, U);
+ if (isa<AllocaInst>(I))
+ continue;
- if (NeedSpill) {
+ for (User *U : I.users())
+ if (Checker.isDefinitionAcrossSuspend(I, U)) {
// We cannot spill a token.
if (I.getType()->isTokenTy())
report_fatal_error(
"token definition is separated from the use by a suspend point");
- Spills.emplace_back(&I, U);
+ FrameData.Spills[&I].push_back(cast<Instruction>(U));
}
- }
}
- LLVM_DEBUG(dump("Spills", Spills));
- if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce)
- sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin);
- Shape.FrameTy = buildFrameType(F, Shape, Spills);
- Shape.FramePtr = insertSpills(Spills, Shape);
+ LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
+ if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
+ Shape.ABI == coro::ABI::Async)
+ sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);
+ Shape.FrameTy = buildFrameType(F, Shape, FrameData);
+ Shape.FramePtr = insertSpills(FrameData, Shape);
lowerLocalAllocas(LocalAllocas, DeadInstructions);
for (auto I : DeadInstructions)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
index 320137526db8..9fa2fd12f80b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -99,9 +99,9 @@ public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
auto ID = I->getIntrinsicID();
- return ID == Intrinsic::coro_id ||
- ID == Intrinsic::coro_id_retcon ||
- ID == Intrinsic::coro_id_retcon_once;
+ return ID == Intrinsic::coro_id || ID == Intrinsic::coro_id_retcon ||
+ ID == Intrinsic::coro_id_retcon_once ||
+ ID == Intrinsic::coro_id_async;
}
static bool classof(const Value *V) {
@@ -273,6 +273,109 @@ public:
}
};
+/// This represents the llvm.coro.id.async instruction.
+class LLVM_LIBRARY_VISIBILITY CoroIdAsyncInst : public AnyCoroIdInst {
+ enum { SizeArg, AlignArg, StorageArg, AsyncFuncPtrArg };
+
+public:
+ void checkWellFormed() const;
+
+ /// The initial async function context size. The fields of which are reserved
+ /// for use by the frontend. The frame will be allocated as a tail of this
+ /// context.
+ uint64_t getStorageSize() const {
+ return cast<ConstantInt>(getArgOperand(SizeArg))->getZExtValue();
+ }
+
+ /// The alignment of the initial async function context.
+ Align getStorageAlignment() const {
+ return cast<ConstantInt>(getArgOperand(AlignArg))->getAlignValue();
+ }
+
+ /// The async context parameter.
+ Value *getStorage() const {
+ return getParent()->getParent()->getArg(getStorageArgumentIndex());
+ }
+
+ unsigned getStorageArgumentIndex() const {
+ auto *Arg = cast<ConstantInt>(getArgOperand(StorageArg));
+ return Arg->getZExtValue();
+ }
+
+ /// Return the async function pointer address. This should be the address of
+ /// a async function pointer struct for the current async function.
+ /// struct async_function_pointer {
+ /// uint32_t context_size;
+ /// uint32_t relative_async_function_pointer;
+ /// };
+ GlobalVariable *getAsyncFunctionPointer() const {
+ return cast<GlobalVariable>(
+ getArgOperand(AsyncFuncPtrArg)->stripPointerCasts());
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ auto ID = I->getIntrinsicID();
+ return ID == Intrinsic::coro_id_async;
+ }
+
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.context.alloc instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAsyncContextAllocInst : public IntrinsicInst {
+ enum { AsyncFuncPtrArg };
+
+public:
+ GlobalVariable *getAsyncFunctionPointer() const {
+ return cast<GlobalVariable>(
+ getArgOperand(AsyncFuncPtrArg)->stripPointerCasts());
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_async_context_alloc;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.context.dealloc instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAsyncContextDeallocInst
+ : public IntrinsicInst {
+ enum { AsyncContextArg };
+
+public:
+ Value *getAsyncContext() const {
+ return getArgOperand(AsyncContextArg)->stripPointerCasts();
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_async_context_dealloc;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.async.resume instruction.
+/// During lowering this is replaced by the resume function of a suspend point
+/// (the continuation function).
+class LLVM_LIBRARY_VISIBILITY CoroAsyncResumeInst : public IntrinsicInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_async_resume;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This represents the llvm.coro.frame instruction.
class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst {
public:
@@ -366,6 +469,7 @@ public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_suspend ||
+ I->getIntrinsicID() == Intrinsic::coro_suspend_async ||
I->getIntrinsicID() == Intrinsic::coro_suspend_retcon;
}
static bool classof(const Value *V) {
@@ -405,6 +509,37 @@ inline CoroSaveInst *AnyCoroSuspendInst::getCoroSave() const {
return nullptr;
}
+/// This represents the llvm.coro.suspend.async instruction.
+class LLVM_LIBRARY_VISIBILITY CoroSuspendAsyncInst : public AnyCoroSuspendInst {
+ enum { ResumeFunctionArg, AsyncContextProjectionArg, MustTailCallFuncArg };
+
+public:
+ void checkWellFormed() const;
+
+ Function *getAsyncContextProjectionFunction() const {
+ return cast<Function>(
+ getArgOperand(AsyncContextProjectionArg)->stripPointerCasts());
+ }
+
+ CoroAsyncResumeInst *getResumeFunction() const {
+ return cast<CoroAsyncResumeInst>(
+ getArgOperand(ResumeFunctionArg)->stripPointerCasts());
+ }
+
+ Function *getMustTailCallFunction() const {
+ return cast<Function>(
+ getArgOperand(MustTailCallFuncArg)->stripPointerCasts());
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_suspend_async;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This represents the llvm.coro.suspend.retcon instruction.
class LLVM_LIBRARY_VISIBILITY CoroSuspendRetconInst : public AnyCoroSuspendInst {
public:
@@ -442,8 +577,7 @@ public:
}
};
-/// This represents the llvm.coro.end instruction.
-class LLVM_LIBRARY_VISIBILITY CoroEndInst : public IntrinsicInst {
+class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst {
enum { FrameArg, UnwindArg };
public:
@@ -454,6 +588,19 @@ public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
+ auto ID = I->getIntrinsicID();
+ return ID == Intrinsic::coro_end || ID == Intrinsic::coro_end_async;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.end instruction.
+class LLVM_LIBRARY_VISIBILITY CoroEndInst : public AnyCoroEndInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_end;
}
static bool classof(const Value *V) {
@@ -461,6 +608,30 @@ public:
}
};
+/// This represents the llvm.coro.end instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAsyncEndInst : public AnyCoroEndInst {
+ enum { FrameArg, UnwindArg, MustTailCallFuncArg };
+
+public:
+ void checkWellFormed() const;
+
+ Function *getMustTailCallFunction() const {
+ if (getNumArgOperands() < 3)
+ return nullptr;
+
+ return cast<Function>(
+ getArgOperand(MustTailCallFuncArg)->stripPointerCasts());
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_end_async;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This represents the llvm.coro.alloca.alloc instruction.
class LLVM_LIBRARY_VISIBILITY CoroAllocaAllocInst : public IntrinsicInst {
enum { SizeArg, AlignArg };
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
index bd76e93c9124..6c0e52f24542 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -34,10 +34,12 @@ void initializeCoroCleanupLegacyPass(PassRegistry &);
// CoroElide pass that triggers a restart of the pipeline by CGPassManager.
// When CoroSplit pass sees the same coroutine the second time, it splits it up,
// adds coroutine subfunctions to the SCC to be processed by IPO pipeline.
-
+// Async lowering similarily triggers a restart of the pipeline after it has
+// split the coroutine.
#define CORO_PRESPLIT_ATTR "coroutine.presplit"
#define UNPREPARED_FOR_SPLIT "0"
#define PREPARED_FOR_SPLIT "1"
+#define ASYNC_RESTART_AFTER_SPLIT "2"
#define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger"
@@ -45,11 +47,14 @@ namespace coro {
bool declaresIntrinsics(const Module &M,
const std::initializer_list<StringRef>);
-void replaceAllCoroAllocs(CoroBeginInst *CB, bool Replacement);
-void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement);
void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
void updateCallGraph(Function &Caller, ArrayRef<Function *> Funcs,
CallGraph &CG, CallGraphSCC &SCC);
+/// Recover a dbg.declare prepared by the frontend and emit an alloca
+/// holding a pointer to the coroutine frame.
+void salvageDebugInfo(
+ SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
+ DbgDeclareInst *DDI, bool LoadFromCoroFrame = false);
// Keeps data and helper functions for lowering coroutine intrinsics.
struct LowererBase {
@@ -81,13 +86,18 @@ enum class ABI {
/// suspend at most once during its execution, and the return value of
/// the continuation is void.
RetconOnce,
+
+ /// The "async continuation" lowering, where each suspend point creates a
+ /// single continuation function. The continuation function is available as an
+ /// intrinsic.
+ Async,
};
// Holds structural Coroutine Intrinsics for a particular function and other
// values used during CoroSplit pass.
struct LLVM_LIBRARY_VISIBILITY Shape {
CoroBeginInst *CoroBegin;
- SmallVector<CoroEndInst *, 4> CoroEnds;
+ SmallVector<AnyCoroEndInst *, 4> CoroEnds;
SmallVector<CoroSizeInst *, 2> CoroSizes;
SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
SmallVector<CallInst*, 2> SwiftErrorOps;
@@ -115,12 +125,13 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
Instruction *FramePtr;
BasicBlock *AllocaSpillBlock;
+ bool ReuseFrameSlot;
+
struct SwitchLoweringStorage {
SwitchInst *ResumeSwitch;
AllocaInst *PromiseAlloca;
BasicBlock *ResumeEntryBlock;
unsigned IndexField;
- unsigned PromiseField;
bool HasFinalSuspend;
};
@@ -132,9 +143,23 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
bool IsFrameInlineInStorage;
};
+ struct AsyncLoweringStorage {
+ FunctionType *AsyncFuncTy;
+ Value *Context;
+ unsigned ContextArgNo;
+ uint64_t ContextHeaderSize;
+ uint64_t ContextAlignment;
+ uint64_t FrameOffset; // Start of the frame.
+ uint64_t ContextSize; // Includes frame size.
+ GlobalVariable *AsyncFuncPointer;
+
+ Align getContextAlignment() const { return Align(ContextAlignment); }
+ };
+
union {
SwitchLoweringStorage SwitchLowering;
RetconLoweringStorage RetconLowering;
+ AsyncLoweringStorage AsyncLowering;
};
CoroIdInst *getSwitchCoroId() const {
@@ -148,6 +173,11 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
return cast<AnyCoroIdRetconInst>(CoroBegin->getId());
}
+ CoroIdAsyncInst *getAsyncCoroId() const {
+ assert(ABI == coro::ABI::Async);
+ return cast<CoroIdAsyncInst>(CoroBegin->getId());
+ }
+
unsigned getSwitchIndexField() const {
assert(ABI == coro::ABI::Switch);
assert(FrameTy && "frame type not assigned");
@@ -177,7 +207,10 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
return RetconLowering.ResumePrototype->getFunctionType();
+ case coro::ABI::Async:
+ return AsyncLowering.AsyncFuncTy;
}
+
llvm_unreachable("Unknown coro::ABI enum");
}
@@ -211,6 +244,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
return RetconLowering.ResumePrototype->getCallingConv();
+ case coro::ABI::Async:
+ return CallingConv::Swift;
}
llvm_unreachable("Unknown coro::ABI enum");
}
@@ -220,12 +255,6 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
return SwitchLowering.PromiseAlloca;
return nullptr;
}
- unsigned getPromiseField() const {
- assert(ABI == coro::ABI::Switch);
- assert(FrameTy && "frame type not assigned");
- assert(SwitchLowering.PromiseAlloca && "no promise alloca");
- return SwitchLowering.PromiseField;
- }
/// Allocate memory according to the rules of the active lowering.
///
@@ -238,12 +267,16 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const;
Shape() = default;
- explicit Shape(Function &F) { buildFrom(F); }
+ explicit Shape(Function &F, bool ReuseFrameSlot = false)
+ : ReuseFrameSlot(ReuseFrameSlot) {
+ buildFrom(F);
+ }
void buildFrom(Function &F);
};
void buildCoroutineFrame(Function &F, Shape &Shape);
-
+CallInst *createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
+ ArrayRef<Value *> Arguments, IRBuilder<> &);
} // End namespace coro.
} // End namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 9c4392e7999b..c4d7db9153e2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -90,7 +91,11 @@ public:
/// An individual continuation function.
Continuation,
+
+ /// An async resume function.
+ Async,
};
+
private:
Function &OrigF;
Function *NewF;
@@ -100,9 +105,9 @@ private:
ValueToValueMapTy VMap;
IRBuilder<> Builder;
Value *NewFramePtr = nullptr;
- Value *SwiftErrorSlot = nullptr;
- /// The active suspend instruction; meaningful only for continuation ABIs.
+ /// The active suspend instruction; meaningful only for continuation and async
+ /// ABIs.
AnyCoroSuspendInst *ActiveSuspend = nullptr;
public:
@@ -117,11 +122,11 @@ public:
/// Create a cloner for a continuation lowering.
CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
- : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
- FKind(Kind::Continuation), Builder(OrigF.getContext()),
- ActiveSuspend(ActiveSuspend) {
+ : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
+ FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation),
+ Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) {
assert(Shape.ABI == coro::ABI::Retcon ||
- Shape.ABI == coro::ABI::RetconOnce);
+ Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async);
assert(NewF && "need existing function for continuation");
assert(ActiveSuspend && "need active suspend point for continuation");
}
@@ -136,6 +141,7 @@ public:
private:
bool isSwitchDestroyFunction() {
switch (FKind) {
+ case Kind::Async:
case Kind::Continuation:
case Kind::SwitchResume:
return false;
@@ -146,15 +152,14 @@ private:
llvm_unreachable("Unknown CoroCloner::Kind enum");
}
- void createDeclaration();
void replaceEntryBlock();
Value *deriveNewFramePointer();
- void replaceRetconSuspendUses();
+ void replaceRetconOrAsyncSuspendUses();
void replaceCoroSuspends();
void replaceCoroEnds();
void replaceSwiftErrorOps();
+ void salvageDebugInfo();
void handleFinalSuspend();
- void maybeFreeContinuationStorage();
};
} // end anonymous namespace
@@ -170,8 +175,53 @@ static void maybeFreeRetconStorage(IRBuilder<> &Builder,
Shape.emitDealloc(Builder, FramePtr, CG);
}
+/// Replace an llvm.coro.end.async.
+/// Will inline the must tail call function call if there is one.
+/// \returns true if cleanup of the coro.end block is needed, false otherwise.
+static bool replaceCoroEndAsync(AnyCoroEndInst *End) {
+ IRBuilder<> Builder(End);
+
+ auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End);
+ if (!EndAsync) {
+ Builder.CreateRetVoid();
+ return true /*needs cleanup of coro.end block*/;
+ }
+
+ auto *MustTailCallFunc = EndAsync->getMustTailCallFunction();
+ if (!MustTailCallFunc) {
+ Builder.CreateRetVoid();
+ return true /*needs cleanup of coro.end block*/;
+ }
+
+ // Move the must tail call from the predecessor block into the end block.
+ auto *CoroEndBlock = End->getParent();
+ auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor();
+ assert(MustTailCallFuncBlock && "Must have a single predecessor block");
+ auto It = MustTailCallFuncBlock->getTerminator()->getIterator();
+ auto *MustTailCall = cast<CallInst>(&*std::prev(It));
+ CoroEndBlock->getInstList().splice(
+ End->getIterator(), MustTailCallFuncBlock->getInstList(), MustTailCall);
+
+ // Insert the return instruction.
+ Builder.SetInsertPoint(End);
+ Builder.CreateRetVoid();
+ InlineFunctionInfo FnInfo;
+
+ // Remove the rest of the block, by splitting it into an unreachable block.
+ auto *BB = End->getParent();
+ BB->splitBasicBlock(End);
+ BB->getTerminator()->eraseFromParent();
+
+ auto InlineRes = InlineFunction(*MustTailCall, FnInfo);
+ assert(InlineRes.isSuccess() && "Expected inlining to succeed");
+ (void)InlineRes;
+
+ // We have cleaned up the coro.end block above.
+ return false;
+}
+
/// Replace a non-unwind call to llvm.coro.end.
-static void replaceFallthroughCoroEnd(CoroEndInst *End,
+static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
const coro::Shape &Shape, Value *FramePtr,
bool InResume, CallGraph *CG) {
// Start inserting right before the coro.end.
@@ -188,6 +238,14 @@ static void replaceFallthroughCoroEnd(CoroEndInst *End,
Builder.CreateRetVoid();
break;
+ // In async lowering this returns.
+ case coro::ABI::Async: {
+ bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End);
+ if (!CoroEndBlockNeedsCleanup)
+ return;
+ break;
+ }
+
// In unique continuation lowering, the continuations always return void.
// But we may have implicitly allocated storage.
case coro::ABI::RetconOnce:
@@ -221,8 +279,9 @@ static void replaceFallthroughCoroEnd(CoroEndInst *End,
}
/// Replace an unwind call to llvm.coro.end.
-static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
- Value *FramePtr, bool InResume, CallGraph *CG){
+static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
+ Value *FramePtr, bool InResume,
+ CallGraph *CG) {
IRBuilder<> Builder(End);
switch (Shape.ABI) {
@@ -231,7 +290,9 @@ static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
if (!InResume)
return;
break;
-
+ // In async lowering this does nothing.
+ case coro::ABI::Async:
+ break;
// In continuation-lowering, this frees the continuation storage.
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
@@ -248,7 +309,7 @@ static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
}
}
-static void replaceCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
+static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
Value *FramePtr, bool InResume, CallGraph *CG) {
if (End->isUnwind())
replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
@@ -403,20 +464,24 @@ static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
OrigF.getName() + Suffix);
NewF->addParamAttr(0, Attribute::NonNull);
- NewF->addParamAttr(0, Attribute::NoAlias);
+
+ // For the async lowering ABI we can't guarantee that the context argument is
+ // not access via a different pointer not based on the argument.
+ if (Shape.ABI != coro::ABI::Async)
+ NewF->addParamAttr(0, Attribute::NoAlias);
M->getFunctionList().insert(InsertBefore, NewF);
return NewF;
}
-/// Replace uses of the active llvm.coro.suspend.retcon call with the
+/// Replace uses of the active llvm.coro.suspend.retcon/async call with the
/// arguments to the continuation function.
///
/// This assumes that the builder has a meaningful insertion point.
-void CoroCloner::replaceRetconSuspendUses() {
- assert(Shape.ABI == coro::ABI::Retcon ||
- Shape.ABI == coro::ABI::RetconOnce);
+void CoroCloner::replaceRetconOrAsyncSuspendUses() {
+ assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
+ Shape.ABI == coro::ABI::Async);
auto NewS = VMap[ActiveSuspend];
if (NewS->use_empty()) return;
@@ -424,7 +489,11 @@ void CoroCloner::replaceRetconSuspendUses() {
// Copy out all the continuation arguments after the buffer pointer into
// an easily-indexed data structure for convenience.
SmallVector<Value*, 8> Args;
- for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I)
+ // The async ABI includes all arguments -- including the first argument.
+ bool IsAsyncABI = Shape.ABI == coro::ABI::Async;
+ for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()),
+ E = NewF->arg_end();
+ I != E; ++I)
Args.push_back(&*I);
// If the suspend returns a single scalar value, we can just do a simple
@@ -470,6 +539,10 @@ void CoroCloner::replaceCoroSuspends() {
SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
break;
+ // In async lowering there are no uses of the result.
+ case coro::ABI::Async:
+ return;
+
// In returned-continuation lowering, the arguments from earlier
// continuations are theoretically arbitrary, and they should have been
// spilled.
@@ -489,10 +562,10 @@ void CoroCloner::replaceCoroSuspends() {
}
void CoroCloner::replaceCoroEnds() {
- for (CoroEndInst *CE : Shape.CoroEnds) {
+ for (AnyCoroEndInst *CE : Shape.CoroEnds) {
// We use a null call graph because there's no call graph node for
// the cloned function yet. We'll just be rebuilding that later.
- auto NewCE = cast<CoroEndInst>(VMap[CE]);
+ auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]);
replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
}
}
@@ -559,6 +632,39 @@ void CoroCloner::replaceSwiftErrorOps() {
::replaceSwiftErrorOps(*NewF, Shape, &VMap);
}
+void CoroCloner::salvageDebugInfo() {
+ SmallVector<DbgDeclareInst *, 8> Worklist;
+ SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
+ for (auto &BB : *NewF)
+ for (auto &I : BB)
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(&I))
+ Worklist.push_back(DDI);
+ for (DbgDeclareInst *DDI : Worklist) {
+ // This is a heuristic that detects declares left by CoroFrame.
+ bool LoadFromFramePtr = !isa<AllocaInst>(DDI->getAddress());
+ coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, LoadFromFramePtr);
+ }
+ // Remove all salvaged dbg.declare intrinsics that became
+ // either unreachable or stale due to the CoroSplit transformation.
+ auto IsUnreachableBlock = [&](BasicBlock *BB) {
+ return BB->hasNPredecessors(0) && BB != &NewF->getEntryBlock();
+ };
+ for (DbgDeclareInst *DDI : Worklist) {
+ if (IsUnreachableBlock(DDI->getParent()))
+ DDI->eraseFromParent();
+ else if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ // Count all non-debuginfo uses in reachable blocks.
+ unsigned Uses = 0;
+ for (auto *User : DDI->getAddress()->users())
+ if (auto *I = dyn_cast<Instruction>(User))
+ if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent()))
+ ++Uses;
+ if (!Uses)
+ DDI->eraseFromParent();
+ }
+ }
+}
+
void CoroCloner::replaceEntryBlock() {
// In the original function, the AllocaSpillBlock is a block immediately
// following the allocation of the frame object which defines GEPs for
@@ -581,15 +687,6 @@ void CoroCloner::replaceEntryBlock() {
Builder.CreateUnreachable();
BranchToEntry->eraseFromParent();
- // Move any allocas into Entry that weren't moved into the frame.
- for (auto IT = OldEntry->begin(), End = OldEntry->end(); IT != End;) {
- Instruction &I = *IT++;
- if (!isa<AllocaInst>(&I) || I.use_empty())
- continue;
-
- I.moveBefore(*Entry, Entry->getFirstInsertionPt());
- }
-
// Branch from the entry to the appropriate place.
Builder.SetInsertPoint(Entry);
switch (Shape.ABI) {
@@ -601,19 +698,37 @@ void CoroCloner::replaceEntryBlock() {
Builder.CreateBr(SwitchBB);
break;
}
-
+ case coro::ABI::Async:
case coro::ABI::Retcon:
case coro::ABI::RetconOnce: {
// In continuation ABIs, we want to branch to immediately after the
// active suspend point. Earlier phases will have put the suspend in its
// own basic block, so just thread our jump directly to its successor.
- auto MappedCS = cast<CoroSuspendRetconInst>(VMap[ActiveSuspend]);
+ assert((Shape.ABI == coro::ABI::Async &&
+ isa<CoroSuspendAsyncInst>(ActiveSuspend)) ||
+ ((Shape.ABI == coro::ABI::Retcon ||
+ Shape.ABI == coro::ABI::RetconOnce) &&
+ isa<CoroSuspendRetconInst>(ActiveSuspend)));
+ auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]);
auto Branch = cast<BranchInst>(MappedCS->getNextNode());
assert(Branch->isUnconditional());
Builder.CreateBr(Branch->getSuccessor(0));
break;
}
}
+
+ // Any alloca that's still being used but not reachable from the new entry
+ // needs to be moved to the new entry.
+ Function *F = OldEntry->getParent();
+ DominatorTree DT{*F};
+ for (auto IT = inst_begin(F), End = inst_end(F); IT != End;) {
+ Instruction &I = *IT++;
+ if (!isa<AllocaInst>(&I) || I.use_empty())
+ continue;
+ if (DT.isReachableFromEntry(I.getParent()))
+ continue;
+ I.moveBefore(*Entry, Entry->getFirstInsertionPt());
+ }
}
/// Derive the value of the new frame pointer.
@@ -624,7 +739,36 @@ Value *CoroCloner::deriveNewFramePointer() {
// In switch-lowering, the argument is the frame pointer.
case coro::ABI::Switch:
return &*NewF->arg_begin();
-
+ // In async-lowering, one of the arguments is an async context as determined
+ // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of
+ // the resume function from the async context projection function associated
+ // with the active suspend. The frame is located as a tail to the async
+ // context header.
+ case coro::ABI::Async: {
+ auto *CalleeContext = NewF->getArg(Shape.AsyncLowering.ContextArgNo);
+ auto *FramePtrTy = Shape.FrameTy->getPointerTo();
+ auto *ProjectionFunc = cast<CoroSuspendAsyncInst>(ActiveSuspend)
+ ->getAsyncContextProjectionFunction();
+ auto DbgLoc =
+ cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc();
+ // Calling i8* (i8*)
+ auto *CallerContext = Builder.CreateCall(
+ cast<FunctionType>(ProjectionFunc->getType()->getPointerElementType()),
+ ProjectionFunc, CalleeContext);
+ CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
+ CallerContext->setDebugLoc(DbgLoc);
+ // The frame is located after the async_context header.
+ auto &Context = Builder.getContext();
+ auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32(
+ Type::getInt8Ty(Context), CallerContext,
+ Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr");
+ // Inline the projection function.
+ InlineFunctionInfo InlineInfo;
+ auto InlineRes = InlineFunction(*CallerContext, InlineInfo);
+ assert(InlineRes.isSuccess());
+ (void)InlineRes;
+ return Builder.CreateBitCast(FramePtrAddr, FramePtrTy);
+ }
// In continuation-lowering, the argument is the opaque storage.
case coro::ABI::Retcon:
case coro::ABI::RetconOnce: {
@@ -707,7 +851,8 @@ void CoroCloner::create() {
addFramePointerAttrs(NewAttrs, Context, 0,
Shape.FrameSize, Shape.FrameAlign);
break;
-
+ case coro::ABI::Async:
+ break;
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
// If we have a continuation prototype, just use its attributes,
@@ -737,6 +882,12 @@ void CoroCloner::create() {
// so we want to leave any returns in place.
case coro::ABI::Retcon:
break;
+ // Async lowering will insert musttail call functions at all suspend points
+ // followed by a return.
+ // Don't change returns to unreachable because that will trip up the verifier.
+ // These returns should be unreachable from the clone.
+ case coro::ABI::Async:
+ break;
}
NewF->setAttributes(NewAttrs);
@@ -767,14 +918,14 @@ void CoroCloner::create() {
if (Shape.SwitchLowering.HasFinalSuspend)
handleFinalSuspend();
break;
-
+ case coro::ABI::Async:
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
// Replace uses of the active suspend with the corresponding
// continuation-function arguments.
assert(ActiveSuspend != nullptr &&
"no active suspend when lowering a continuation-style coroutine");
- replaceRetconSuspendUses();
+ replaceRetconOrAsyncSuspendUses();
break;
}
@@ -787,6 +938,9 @@ void CoroCloner::create() {
// Remove coro.end intrinsics.
replaceCoroEnds();
+ // Salvage debug info that points into the coroutine frame.
+ salvageDebugInfo();
+
// Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
// to suppress deallocation code.
if (Shape.ABI == coro::ABI::Switch)
@@ -811,7 +965,25 @@ static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) {
}
}
+static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
+ assert(Shape.ABI == coro::ABI::Async);
+
+ auto *FuncPtrStruct = cast<ConstantStruct>(
+ Shape.AsyncLowering.AsyncFuncPointer->getInitializer());
+ auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0);
+ auto *OrigContextSize = FuncPtrStruct->getOperand(1);
+ auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(),
+ Shape.AsyncLowering.ContextSize);
+ auto *NewFuncPtrStruct = ConstantStruct::get(
+ FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize);
+
+ Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
+}
+
static void replaceFrameSize(coro::Shape &Shape) {
+ if (Shape.ABI == coro::ABI::Async)
+ updateAsyncFuncPointerContextSize(Shape);
+
if (Shape.CoroSizes.empty())
return;
@@ -1075,7 +1247,7 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) {
}
break;
}
-
+ case coro::ABI::Async:
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
@@ -1271,6 +1443,133 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
setCoroInfo(F, Shape, Clones);
}
+static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
+ Value *Continuation) {
+ auto *ResumeIntrinsic = Suspend->getResumeFunction();
+ auto &Context = Suspend->getParent()->getParent()->getContext();
+ auto *Int8PtrTy = Type::getInt8PtrTy(Context);
+
+ IRBuilder<> Builder(ResumeIntrinsic);
+ auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy);
+ ResumeIntrinsic->replaceAllUsesWith(Val);
+ ResumeIntrinsic->eraseFromParent();
+ Suspend->setOperand(0, UndefValue::get(Int8PtrTy));
+}
+
+/// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.
+static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
+ ArrayRef<Value *> FnArgs,
+ SmallVectorImpl<Value *> &CallArgs) {
+ size_t ArgIdx = 0;
+ for (auto paramTy : FnTy->params()) {
+ assert(ArgIdx < FnArgs.size());
+ if (paramTy != FnArgs[ArgIdx]->getType())
+ CallArgs.push_back(
+ Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy));
+ else
+ CallArgs.push_back(FnArgs[ArgIdx]);
+ ++ArgIdx;
+ }
+}
+
+CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
+ ArrayRef<Value *> Arguments,
+ IRBuilder<> &Builder) {
+ auto *FnTy =
+ cast<FunctionType>(MustTailCallFn->getType()->getPointerElementType());
+ // Coerce the arguments, llvm optimizations seem to ignore the types in
+ // vaarg functions and throws away casts in optimized mode.
+ SmallVector<Value *, 8> CallArgs;
+ coerceArguments(Builder, FnTy, Arguments, CallArgs);
+
+ auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs);
+ TailCall->setTailCallKind(CallInst::TCK_MustTail);
+ TailCall->setDebugLoc(Loc);
+ TailCall->setCallingConv(MustTailCallFn->getCallingConv());
+ return TailCall;
+}
+
+static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
+ SmallVectorImpl<Function *> &Clones) {
+ assert(Shape.ABI == coro::ABI::Async);
+ assert(Clones.empty());
+ // Reset various things that the optimizer might have decided it
+ // "knows" about the coroutine function due to not seeing a return.
+ F.removeFnAttr(Attribute::NoReturn);
+ F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+
+ auto &Context = F.getContext();
+ auto *Int8PtrTy = Type::getInt8PtrTy(Context);
+
+ auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId());
+ IRBuilder<> Builder(Id);
+
+ auto *FramePtr = Id->getStorage();
+ FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy);
+ FramePtr = Builder.CreateConstInBoundsGEP1_32(
+ Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset,
+ "async.ctx.frameptr");
+
+ // Map all uses of llvm.coro.begin to the allocated frame pointer.
+ {
+ // Make sure we don't invalidate Shape.FramePtr.
+ TrackingVH<Instruction> Handle(Shape.FramePtr);
+ Shape.CoroBegin->replaceAllUsesWith(FramePtr);
+ Shape.FramePtr = Handle.getValPtr();
+ }
+
+ // Create all the functions in order after the main function.
+ auto NextF = std::next(F.getIterator());
+
+ // Create a continuation function for each of the suspend points.
+ Clones.reserve(Shape.CoroSuspends.size());
+ for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
+ auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]);
+
+ // Create the clone declaration.
+ auto *Continuation =
+ createCloneDeclaration(F, Shape, ".resume." + Twine(Idx), NextF);
+ Clones.push_back(Continuation);
+
+ // Insert a branch to a new return block immediately before the suspend
+ // point.
+ auto *SuspendBB = Suspend->getParent();
+ auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
+ auto *Branch = cast<BranchInst>(SuspendBB->getTerminator());
+
+ // Place it before the first suspend.
+ auto *ReturnBB =
+ BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB);
+ Branch->setSuccessor(0, ReturnBB);
+
+ IRBuilder<> Builder(ReturnBB);
+
+ // Insert the call to the tail call function and inline it.
+ auto *Fn = Suspend->getMustTailCallFunction();
+ SmallVector<Value *, 8> Args(Suspend->args());
+ auto FnArgs = ArrayRef<Value *>(Args).drop_front(3);
+ auto *TailCall =
+ coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder);
+ Builder.CreateRetVoid();
+ InlineFunctionInfo FnInfo;
+ auto InlineRes = InlineFunction(*TailCall, FnInfo);
+ assert(InlineRes.isSuccess() && "Expected inlining to succeed");
+ (void)InlineRes;
+
+ // Replace the lvm.coro.async.resume intrisic call.
+ replaceAsyncResumeFunction(Suspend, Continuation);
+ }
+
+ assert(Clones.size() == Shape.CoroSuspends.size());
+ for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
+ auto *Suspend = Shape.CoroSuspends[Idx];
+ auto *Clone = Clones[Idx];
+
+ CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create();
+ }
+}
+
static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
SmallVectorImpl<Function *> &Clones) {
assert(Shape.ABI == coro::ABI::Retcon ||
@@ -1416,14 +1715,15 @@ namespace {
}
static coro::Shape splitCoroutine(Function &F,
- SmallVectorImpl<Function *> &Clones) {
+ SmallVectorImpl<Function *> &Clones,
+ bool ReuseFrameSlot) {
PrettyStackTraceFunction prettyStackTrace(F);
// The suspend-crossing algorithm in buildCoroutineFrame get tripped
// up by uses in unreachable blocks, so remove them as a first pass.
removeUnreachableBlocks(F);
- coro::Shape Shape(F);
+ coro::Shape Shape(F, ReuseFrameSlot);
if (!Shape.CoroBegin)
return Shape;
@@ -1440,6 +1740,9 @@ static coro::Shape splitCoroutine(Function &F,
case coro::ABI::Switch:
splitSwitchCoroutine(F, Shape, Clones);
break;
+ case coro::ABI::Async:
+ splitAsyncCoroutine(F, Shape, Clones);
+ break;
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
splitRetconCoroutine(F, Shape, Clones);
@@ -1476,40 +1779,46 @@ static void updateCallGraphAfterCoroutineSplit(
if (!Shape.CoroBegin)
return;
- for (llvm::CoroEndInst *End : Shape.CoroEnds) {
+ for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
auto &Context = End->getContext();
End->replaceAllUsesWith(ConstantInt::getFalse(Context));
End->eraseFromParent();
}
- postSplitCleanup(N.getFunction());
+ if (!Clones.empty()) {
+ switch (Shape.ABI) {
+ case coro::ABI::Switch:
+ // Each clone in the Switch lowering is independent of the other clones.
+ // Let the LazyCallGraph know about each one separately.
+ for (Function *Clone : Clones)
+ CG.addSplitFunction(N.getFunction(), *Clone);
+ break;
+ case coro::ABI::Async:
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ // Each clone in the Async/Retcon lowering references of the other clones.
+ // Let the LazyCallGraph know about all of them at once.
+ CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones);
+ break;
+ }
- // To insert the newly created coroutine funclets 'f.resume', 'f.destroy', and
- // 'f.cleanup' into the same SCC as the coroutine 'f' they were outlined from,
- // we make use of the CallGraphUpdater class, which can modify the internal
- // state of the LazyCallGraph.
- for (Function *Clone : Clones)
- CG.addNewFunctionIntoRefSCC(*Clone, C.getOuterRefSCC());
-
- // We've inserted instructions into coroutine 'f' that reference the three new
- // coroutine funclets. We must now update the call graph so that reference
- // edges between 'f' and its funclets are added to it. LazyCallGraph only
- // allows CGSCC passes to insert "trivial" reference edges. We've ensured
- // above, by inserting the funclets into the same SCC as the corutine, that
- // the edges are trivial.
- //
- // N.B.: If we didn't update the call graph here, a CGSCCToFunctionPassAdaptor
- // later in this CGSCC pass pipeline may be run, triggering a call graph
- // update of its own. Function passes run by the adaptor are not permitted to
- // add new edges of any kind to the graph, and the new edges inserted by this
- // pass would be misattributed to that unrelated function pass.
- updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
+ // Let the CGSCC infra handle the changes to the original function.
+ updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
+ }
+
+ // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
+ // to the split functions.
+ postSplitCleanup(N.getFunction());
+ updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM);
}
// When we see the coroutine the first time, we insert an indirect call to a
// devirt trigger function and mark the coroutine that it is now ready for
// split.
-static void prepareForSplit(Function &F, CallGraph &CG) {
+// Async lowering uses this after it has split the function to restart the
+// pipeline.
+static void prepareForSplit(Function &F, CallGraph &CG,
+ bool MarkForAsyncRestart = false) {
Module &M = *F.getParent();
LLVMContext &Context = F.getContext();
#ifndef NDEBUG
@@ -1517,7 +1826,9 @@ static void prepareForSplit(Function &F, CallGraph &CG) {
assert(DevirtFn && "coro.devirt.trigger function not found");
#endif
- F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
+ F.addFnAttr(CORO_PRESPLIT_ATTR, MarkForAsyncRestart
+ ? ASYNC_RESTART_AFTER_SPLIT
+ : PREPARED_FOR_SPLIT);
// Insert an indirect call sequence that will be devirtualized by CoroElide
// pass:
@@ -1525,7 +1836,9 @@ static void prepareForSplit(Function &F, CallGraph &CG) {
// %1 = bitcast i8* %0 to void(i8*)*
// call void %1(i8* null)
coro::LowererBase Lowerer(M);
- Instruction *InsertPt = F.getEntryBlock().getTerminator();
+ Instruction *InsertPt =
+ MarkForAsyncRestart ? F.getEntryBlock().getFirstNonPHIOrDbgOrLifetime()
+ : F.getEntryBlock().getTerminator();
auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
auto *DevirtFnAddr =
Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
@@ -1564,6 +1877,42 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
}
/// Replace a call to llvm.coro.prepare.retcon.
+static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
+ LazyCallGraph::SCC &C) {
+ auto CastFn = Prepare->getArgOperand(0); // as an i8*
+ auto Fn = CastFn->stripPointerCasts(); // as its original type
+
+ // Attempt to peephole this pattern:
+ // %0 = bitcast [[TYPE]] @some_function to i8*
+ // %1 = call @llvm.coro.prepare.retcon(i8* %0)
+ // %2 = bitcast %1 to [[TYPE]]
+ // ==>
+ // %2 = @some_function
+ for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); UI != UE;) {
+ // Look for bitcasts back to the original function type.
+ auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
+ if (!Cast || Cast->getType() != Fn->getType())
+ continue;
+
+ // Replace and remove the cast.
+ Cast->replaceAllUsesWith(Fn);
+ Cast->eraseFromParent();
+ }
+
+ // Replace any remaining uses with the function as an i8*.
+ // This can never directly be a callee, so we don't need to update CG.
+ Prepare->replaceAllUsesWith(CastFn);
+ Prepare->eraseFromParent();
+
+ // Kill dead bitcasts.
+ while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
+ if (!Cast->use_empty())
+ break;
+ CastFn = Cast->getOperand(0);
+ Cast->eraseFromParent();
+ }
+}
+/// Replace a call to llvm.coro.prepare.retcon.
static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
auto CastFn = Prepare->getArgOperand(0); // as an i8*
auto Fn = CastFn->stripPointerCasts(); // as its original type
@@ -1618,6 +1967,19 @@ static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
}
}
+static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
+ LazyCallGraph::SCC &C) {
+ bool Changed = false;
+ for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); PI != PE;) {
+ // Intrinsics can only be used in calls.
+ auto *Prepare = cast<CallInst>((PI++)->getUser());
+ replacePrepare(Prepare, CG, C);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
/// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent
/// IPO from operating on calls to a retcon coroutine before it's been
/// split. This is only safe to do after we've split all retcon
@@ -1638,8 +2000,17 @@ static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
}
static bool declaresCoroSplitIntrinsics(const Module &M) {
- return coro::declaresIntrinsics(
- M, {"llvm.coro.begin", "llvm.coro.prepare.retcon"});
+ return coro::declaresIntrinsics(M, {"llvm.coro.begin",
+ "llvm.coro.prepare.retcon",
+ "llvm.coro.prepare.async"});
+}
+
+static void addPrepareFunction(const Module &M,
+ SmallVectorImpl<Function *> &Fns,
+ StringRef Name) {
+ auto *PrepareFn = M.getFunction(Name);
+ if (PrepareFn && !PrepareFn->use_empty())
+ Fns.push_back(PrepareFn);
}
PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
@@ -1655,10 +2026,10 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
if (!declaresCoroSplitIntrinsics(M))
return PreservedAnalyses::all();
- // Check for uses of llvm.coro.prepare.retcon.
- const auto *PrepareFn = M.getFunction("llvm.coro.prepare.retcon");
- if (PrepareFn && PrepareFn->use_empty())
- PrepareFn = nullptr;
+ // Check for uses of llvm.coro.prepare.retcon/async.
+ SmallVector<Function *, 2> PrepareFns;
+ addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
+ addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async");
// Find coroutines for processing.
SmallVector<LazyCallGraph::Node *, 4> Coroutines;
@@ -1666,12 +2037,14 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
if (N.getFunction().hasFnAttribute(CORO_PRESPLIT_ATTR))
Coroutines.push_back(&N);
- if (Coroutines.empty() && !PrepareFn)
+ if (Coroutines.empty() && PrepareFns.empty())
return PreservedAnalyses::all();
- if (Coroutines.empty())
- llvm_unreachable("new pass manager cannot yet handle "
- "'llvm.coro.prepare.retcon'");
+ if (Coroutines.empty()) {
+ for (auto *PrepareFn : PrepareFns) {
+ replaceAllPrepares(PrepareFn, CG, C);
+ }
+ }
// Split all the coroutines.
for (LazyCallGraph::Node *N : Coroutines) {
@@ -1681,17 +2054,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
<< "' state: " << Value << "\n");
if (Value == UNPREPARED_FOR_SPLIT) {
- // Enqueue a second iteration of the CGSCC pipeline.
- // N.B.:
- // The CoroSplitLegacy pass "triggers" a restart of the CGSCC pass
- // pipeline by inserting an indirect function call that the
- // CoroElideLegacy pass then replaces with a direct function call. The
- // legacy CGSCC pipeline's implicit behavior was as if wrapped in the new
- // pass manager abstraction DevirtSCCRepeatedPass.
- //
- // This pass does not need to "trigger" another run of the pipeline.
- // Instead, it simply enqueues the same RefSCC onto the pipeline's
- // worklist.
+ // Enqueue a second iteration of the CGSCC pipeline on this SCC.
UR.CWorklist.insert(&C);
F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
continue;
@@ -1699,13 +2062,23 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
F.removeFnAttr(CORO_PRESPLIT_ATTR);
SmallVector<Function *, 4> Clones;
- const coro::Shape Shape = splitCoroutine(F, Clones);
+ const coro::Shape Shape = splitCoroutine(F, Clones, ReuseFrameSlot);
updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
+
+ if ((Shape.ABI == coro::ABI::Async || Shape.ABI == coro::ABI::Retcon ||
+ Shape.ABI == coro::ABI::RetconOnce) &&
+ !Shape.CoroSuspends.empty()) {
+ // Run the CGSCC pipeline on the newly split functions.
+ // All clones will be in the same RefSCC, so choose a random clone.
+ UR.RCWorklist.insert(CG.lookupRefSCC(CG.get(*Clones[0])));
+ }
}
- if (PrepareFn)
- llvm_unreachable("new pass manager cannot yet handle "
- "'llvm.coro.prepare.retcon'");
+ if (!PrepareFns.empty()) {
+ for (auto *PrepareFn : PrepareFns) {
+ replaceAllPrepares(PrepareFn, CG, C);
+ }
+ }
return PreservedAnalyses::none();
}
@@ -1723,11 +2096,13 @@ namespace {
struct CoroSplitLegacy : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- CoroSplitLegacy() : CallGraphSCCPass(ID) {
+ CoroSplitLegacy(bool ReuseFrameSlot = false)
+ : CallGraphSCCPass(ID), ReuseFrameSlot(ReuseFrameSlot) {
initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry());
}
bool Run = false;
+ bool ReuseFrameSlot;
// A coroutine is identified by the presence of coro.begin intrinsic, if
// we don't have any, this pass has nothing to do.
@@ -1741,10 +2116,10 @@ struct CoroSplitLegacy : public CallGraphSCCPass {
return false;
// Check for uses of llvm.coro.prepare.retcon.
- auto PrepareFn =
- SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon");
- if (PrepareFn && PrepareFn->use_empty())
- PrepareFn = nullptr;
+ SmallVector<Function *, 2> PrepareFns;
+ auto &M = SCC.getCallGraph().getModule();
+ addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
+ addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async");
// Find coroutines for processing.
SmallVector<Function *, 4> Coroutines;
@@ -1753,13 +2128,17 @@ struct CoroSplitLegacy : public CallGraphSCCPass {
if (F->hasFnAttribute(CORO_PRESPLIT_ATTR))
Coroutines.push_back(F);
- if (Coroutines.empty() && !PrepareFn)
+ if (Coroutines.empty() && PrepareFns.empty())
return false;
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- if (Coroutines.empty())
- return replaceAllPrepares(PrepareFn, CG);
+ if (Coroutines.empty()) {
+ bool Changed = false;
+ for (auto *PrepareFn : PrepareFns)
+ Changed |= replaceAllPrepares(PrepareFn, CG);
+ return Changed;
+ }
createDevirtTriggerFunc(CG, SCC);
@@ -1769,6 +2148,12 @@ struct CoroSplitLegacy : public CallGraphSCCPass {
StringRef Value = Attr.getValueAsString();
LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName()
<< "' state: " << Value << "\n");
+ // Async lowering marks coroutines to trigger a restart of the pipeline
+ // after it has split them.
+ if (Value == ASYNC_RESTART_AFTER_SPLIT) {
+ F->removeFnAttr(CORO_PRESPLIT_ATTR);
+ continue;
+ }
if (Value == UNPREPARED_FOR_SPLIT) {
prepareForSplit(*F, CG);
continue;
@@ -1776,11 +2161,17 @@ struct CoroSplitLegacy : public CallGraphSCCPass {
F->removeFnAttr(CORO_PRESPLIT_ATTR);
SmallVector<Function *, 4> Clones;
- const coro::Shape Shape = splitCoroutine(*F, Clones);
+ const coro::Shape Shape = splitCoroutine(*F, Clones, ReuseFrameSlot);
updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC);
+ if (Shape.ABI == coro::ABI::Async) {
+ // Restart SCC passes.
+ // Mark function for CoroElide pass. It will devirtualize causing a
+ // restart of the SCC pipeline.
+ prepareForSplit(*F, CG, true /*MarkForAsyncRestart*/);
+ }
}
- if (PrepareFn)
+ for (auto *PrepareFn : PrepareFns)
replaceAllPrepares(PrepareFn, CG);
return true;
@@ -1807,4 +2198,6 @@ INITIALIZE_PASS_END(
"Split coroutine into a set of functions driving its state machine", false,
false)
-Pass *llvm::createCoroSplitLegacyPass() { return new CoroSplitLegacy(); }
+Pass *llvm::createCoroSplitLegacyPass(bool ReuseFrameSlot) {
+ return new CoroSplitLegacy(ReuseFrameSlot);
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 87c3a8b0d0cf..6699a5c46313 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -69,7 +69,7 @@ static void addCoroutineScalarOptimizerPasses(const PassManagerBuilder &Builder,
static void addCoroutineSCCPasses(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
- PM.add(createCoroSplitLegacyPass());
+ PM.add(createCoroSplitLegacyPass(Builder.OptLevel != 0));
}
static void addCoroutineOptimizerLastPasses(const PassManagerBuilder &Builder,
@@ -124,17 +124,23 @@ static bool isCoroutineIntrinsicName(StringRef Name) {
// NOTE: Must be sorted!
static const char *const CoroIntrinsics[] = {
"llvm.coro.alloc",
+ "llvm.coro.async.context.alloc",
+ "llvm.coro.async.context.dealloc",
+ "llvm.coro.async.store_resume",
"llvm.coro.begin",
"llvm.coro.destroy",
"llvm.coro.done",
"llvm.coro.end",
+ "llvm.coro.end.async",
"llvm.coro.frame",
"llvm.coro.free",
"llvm.coro.id",
+ "llvm.coro.id.async",
"llvm.coro.id.retcon",
"llvm.coro.id.retcon.once",
"llvm.coro.noop",
"llvm.coro.param",
+ "llvm.coro.prepare.async",
"llvm.coro.prepare.retcon",
"llvm.coro.promise",
"llvm.coro.resume",
@@ -142,6 +148,7 @@ static bool isCoroutineIntrinsicName(StringRef Name) {
"llvm.coro.size",
"llvm.coro.subfn.addr",
"llvm.coro.suspend",
+ "llvm.coro.suspend.async",
"llvm.coro.suspend.retcon",
};
return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1;
@@ -269,6 +276,12 @@ void coro::Shape::buildFrom(Function &F) {
if (II->use_empty())
UnusedCoroSaves.push_back(cast<CoroSaveInst>(II));
break;
+ case Intrinsic::coro_suspend_async: {
+ auto *Suspend = cast<CoroSuspendAsyncInst>(II);
+ Suspend->checkWellFormed();
+ CoroSuspends.push_back(Suspend);
+ break;
+ }
case Intrinsic::coro_suspend_retcon: {
auto Suspend = cast<CoroSuspendRetconInst>(II);
CoroSuspends.push_back(Suspend);
@@ -304,11 +317,16 @@ void coro::Shape::buildFrom(Function &F) {
CoroBegin = CB;
break;
}
+ case Intrinsic::coro_end_async:
case Intrinsic::coro_end:
- CoroEnds.push_back(cast<CoroEndInst>(II));
- if (CoroEnds.back()->isFallthrough()) {
+ CoroEnds.push_back(cast<AnyCoroEndInst>(II));
+ if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(II)) {
+ AsyncEnd->checkWellFormed();
+ }
+ if (CoroEnds.back()->isFallthrough() && isa<CoroEndInst>(II)) {
// Make sure that the fallthrough coro.end is the first element in the
// CoroEnds vector.
+ // Note: I don't think this is neccessary anymore.
if (CoroEnds.size() > 1) {
if (CoroEnds.front()->isFallthrough())
report_fatal_error(
@@ -341,7 +359,7 @@ void coro::Shape::buildFrom(Function &F) {
}
// Replace all coro.ends with unreachable instruction.
- for (CoroEndInst *CE : CoroEnds)
+ for (AnyCoroEndInst *CE : CoroEnds)
changeToUnreachable(CE, /*UseLLVMTrap=*/false);
return;
@@ -371,7 +389,23 @@ void coro::Shape::buildFrom(Function &F) {
}
break;
}
-
+ case Intrinsic::coro_id_async: {
+ auto *AsyncId = cast<CoroIdAsyncInst>(Id);
+ AsyncId->checkWellFormed();
+ this->ABI = coro::ABI::Async;
+ this->AsyncLowering.Context = AsyncId->getStorage();
+ this->AsyncLowering.ContextArgNo = AsyncId->getStorageArgumentIndex();
+ this->AsyncLowering.ContextHeaderSize = AsyncId->getStorageSize();
+ this->AsyncLowering.ContextAlignment =
+ AsyncId->getStorageAlignment().value();
+ this->AsyncLowering.AsyncFuncPointer = AsyncId->getAsyncFunctionPointer();
+ auto &Context = F.getContext();
+ auto *Int8PtrTy = Type::getInt8PtrTy(Context);
+ auto *VoidTy = Type::getVoidTy(Context);
+ this->AsyncLowering.AsyncFuncTy =
+ FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy, Int8PtrTy}, false);
+ break;
+ };
case Intrinsic::coro_id_retcon:
case Intrinsic::coro_id_retcon_once: {
auto ContinuationId = cast<AnyCoroIdRetconInst>(Id);
@@ -512,6 +546,8 @@ Value *coro::Shape::emitAlloc(IRBuilder<> &Builder, Value *Size,
addCallToCallGraph(CG, Call, Alloc);
return Call;
}
+ case coro::ABI::Async:
+ llvm_unreachable("can't allocate memory in coro async-lowering");
}
llvm_unreachable("Unknown coro::ABI enum");
}
@@ -532,6 +568,8 @@ void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr,
addCallToCallGraph(CG, Call, Dealloc);
return;
}
+ case coro::ABI::Async:
+ llvm_unreachable("can't allocate memory in coro async-lowering");
}
llvm_unreachable("Unknown coro::ABI enum");
}
@@ -633,6 +671,67 @@ void AnyCoroIdRetconInst::checkWellFormed() const {
checkWFDealloc(this, getArgOperand(DeallocArg));
}
+static void checkAsyncFuncPointer(const Instruction *I, Value *V) {
+ auto *AsyncFuncPtrAddr = dyn_cast<GlobalVariable>(V->stripPointerCasts());
+ if (!AsyncFuncPtrAddr)
+ fail(I, "llvm.coro.id.async async function pointer not a global", V);
+
+ auto *StructTy =
+ cast<StructType>(AsyncFuncPtrAddr->getType()->getPointerElementType());
+ if (StructTy->isOpaque() || !StructTy->isPacked() ||
+ StructTy->getNumElements() != 2 ||
+ !StructTy->getElementType(0)->isIntegerTy(32) ||
+ !StructTy->getElementType(1)->isIntegerTy(32))
+ fail(I,
+ "llvm.coro.id.async async function pointer argument's type is not "
+ "<{i32, i32}>",
+ V);
+}
+
+void CoroIdAsyncInst::checkWellFormed() const {
+ checkConstantInt(this, getArgOperand(SizeArg),
+ "size argument to coro.id.async must be constant");
+ checkConstantInt(this, getArgOperand(AlignArg),
+ "alignment argument to coro.id.async must be constant");
+ checkConstantInt(this, getArgOperand(StorageArg),
+ "storage argument offset to coro.id.async must be constant");
+ checkAsyncFuncPointer(this, getArgOperand(AsyncFuncPtrArg));
+}
+
+static void checkAsyncContextProjectFunction(const Instruction *I,
+ Function *F) {
+ auto *FunTy = cast<FunctionType>(F->getType()->getPointerElementType());
+ if (!FunTy->getReturnType()->isPointerTy() ||
+ !FunTy->getReturnType()->getPointerElementType()->isIntegerTy(8))
+ fail(I,
+ "llvm.coro.suspend.async resume function projection function must "
+ "return an i8* type",
+ F);
+ if (FunTy->getNumParams() != 1 || !FunTy->getParamType(0)->isPointerTy() ||
+ !FunTy->getParamType(0)->getPointerElementType()->isIntegerTy(8))
+ fail(I,
+ "llvm.coro.suspend.async resume function projection function must "
+ "take one i8* type as parameter",
+ F);
+}
+
+void CoroSuspendAsyncInst::checkWellFormed() const {
+ checkAsyncContextProjectFunction(this, getAsyncContextProjectionFunction());
+}
+
+void CoroAsyncEndInst::checkWellFormed() const {
+ auto *MustTailCallFunc = getMustTailCallFunction();
+ if (!MustTailCallFunc)
+ return;
+ auto *FnTy =
+ cast<FunctionType>(MustTailCallFunc->getType()->getPointerElementType());
+ if (FnTy->getNumParams() != (getNumArgOperands() - 3))
+ fail(this,
+ "llvm.coro.end.async must tail call function argument type must "
+ "match the tail arguments",
+ MustTailCallFunc);
+}
+
void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCoroEarlyLegacyPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/HelloNew/HelloWorld.cpp b/contrib/llvm-project/llvm/lib/Transforms/HelloNew/HelloWorld.cpp
new file mode 100644
index 000000000000..dea94f8a8f62
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/HelloNew/HelloWorld.cpp
@@ -0,0 +1,17 @@
+//===-- HelloWorld.cpp - Example Transformations --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/HelloNew/HelloWorld.h"
+
+using namespace llvm;
+
+PreservedAnalyses HelloWorldPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ errs() << F.getName() << "\n";
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 53f9512f86f3..532599b42e0d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -13,8 +13,10 @@
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -39,12 +41,19 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
- InlineFunctionInfo IFI(/*cg=*/nullptr, GetAssumptionCache);
+ auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
SmallSetVector<CallBase *, 16> Calls;
bool Changed = false;
SmallVector<Function *, 16> InlinedFunctions;
- for (Function &F : M)
+ for (Function &F : M) {
+ // When callee coroutine function is inlined into caller coroutine function
+ // before coro-split pass,
+ // coro-early pass can not handle this quiet well.
+ // So we won't inline the coroutine function if it have not been unsplited
+ if (F.isPresplitCoroutine())
+ continue;
+
if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) &&
isInlineViable(F).isSuccess()) {
Calls.clear();
@@ -54,18 +63,41 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
if (CB->getCalledFunction() == &F)
Calls.insert(CB);
- for (CallBase *CB : Calls)
- // FIXME: We really shouldn't be able to fail to inline at this point!
- // We should do something to log or check the inline failures here.
- Changed |=
- InlineFunction(*CB, IFI, /*CalleeAAR=*/nullptr, InsertLifetime)
- .isSuccess();
+ for (CallBase *CB : Calls) {
+ Function *Caller = CB->getCaller();
+ OptimizationRemarkEmitter ORE(Caller);
+ auto OIC = shouldInline(
+ *CB,
+ [&](CallBase &CB) {
+ return InlineCost::getAlways("always inline attribute");
+ },
+ ORE);
+ assert(OIC);
+ emitInlinedInto(ORE, CB->getDebugLoc(), CB->getParent(), F, *Caller,
+ *OIC, false, DEBUG_TYPE);
+
+ InlineFunctionInfo IFI(
+ /*cg=*/nullptr, GetAssumptionCache, &PSI,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(F));
+
+ InlineResult Res = InlineFunction(
+ *CB, IFI, &FAM.getResult<AAManager>(F), InsertLifetime);
+ assert(Res.isSuccess() && "unexpected failure to inline");
+ (void)Res;
+
+ // Merge the attributes based on the inlining.
+ AttributeFuncs::mergeAttributesForInlining(*Caller, F);
+
+ Changed = true;
+ }
// Remember to try and delete this function afterward. This both avoids
// re-walking the rest of the module and avoids dealing with any iterator
// invalidation issues while deleting functions.
InlinedFunctions.push_back(&F);
}
+ }
// Remove any live functions.
erase_if(InlinedFunctions, [&](Function *F) {
@@ -158,6 +190,13 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) {
if (!Callee)
return InlineCost::getNever("indirect call");
+ // When callee coroutine function is inlined into caller coroutine function
+ // before coro-split pass,
+ // coro-early pass can not handle this quiet well.
+ // So we won't inline the coroutine function if it have not been unsplited
+ if (Callee->isPresplitCoroutine())
+ return InlineCost::getNever("unsplited coroutine call");
+
// FIXME: We shouldn't even get here for declarations.
if (Callee->isDeclaration())
return InlineCost::getNever("no definition");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Annotation2Metadata.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Annotation2Metadata.cpp
new file mode 100644
index 000000000000..5ca4e24df8fc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Annotation2Metadata.cpp
@@ -0,0 +1,106 @@
+//===-- Annotation2Metadata.cpp - Add !annotation metadata. ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Add !annotation metadata for entries in @llvm.global.anotations, generated
+// using __attribute__((annotate("_name"))) on functions in Clang.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/Annotation2Metadata.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "annotation2metadata"
+
+static bool convertAnnotation2Metadata(Module &M) {
+ // Only add !annotation metadata if the corresponding remarks pass is also
+ // enabled.
+ if (!OptimizationRemarkEmitter::allowExtraAnalysis(M.getContext(),
+ "annotation-remarks"))
+ return false;
+
+ auto *Annotations = M.getGlobalVariable("llvm.global.annotations");
+ auto *C = dyn_cast_or_null<Constant>(Annotations);
+ if (!C || C->getNumOperands() != 1)
+ return false;
+
+ C = cast<Constant>(C->getOperand(0));
+
+ // Iterate over all entries in C and attach !annotation metadata to suitable
+ // entries.
+ for (auto &Op : C->operands()) {
+ // Look at the operands to check if we can use the entry to generate
+ // !annotation metadata.
+ auto *OpC = dyn_cast<ConstantStruct>(&Op);
+ if (!OpC || OpC->getNumOperands() != 4)
+ continue;
+ auto *StrGEP = dyn_cast<ConstantExpr>(OpC->getOperand(1));
+ if (!StrGEP || StrGEP->getNumOperands() < 2)
+ continue;
+ auto *StrC = dyn_cast<GlobalValue>(StrGEP->getOperand(0));
+ if (!StrC)
+ continue;
+ auto *StrData = dyn_cast<ConstantDataSequential>(StrC->getOperand(0));
+ if (!StrData)
+ continue;
+ // Look through bitcast.
+ auto *Bitcast = dyn_cast<ConstantExpr>(OpC->getOperand(0));
+ if (!Bitcast || Bitcast->getOpcode() != Instruction::BitCast)
+ continue;
+ auto *Fn = dyn_cast<Function>(Bitcast->getOperand(0));
+ if (!Fn)
+ continue;
+
+ // Add annotation to all instructions in the function.
+ for (auto &I : instructions(Fn))
+ I.addAnnotationMetadata(StrData->getAsCString());
+ }
+ return true;
+}
+
+namespace {
+struct Annotation2MetadataLegacy : public ModulePass {
+ static char ID;
+
+ Annotation2MetadataLegacy() : ModulePass(ID) {
+ initializeAnnotation2MetadataLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override { return convertAnnotation2Metadata(M); }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+
+} // end anonymous namespace
+
+char Annotation2MetadataLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(Annotation2MetadataLegacy, DEBUG_TYPE,
+ "Annotation2Metadata", false, false)
+INITIALIZE_PASS_END(Annotation2MetadataLegacy, DEBUG_TYPE,
+ "Annotation2Metadata", false, false)
+
+ModulePass *llvm::createAnnotation2MetadataLegacyPass() {
+ return new Annotation2MetadataLegacy();
+}
+
+PreservedAnalyses Annotation2MetadataPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ convertAnnotation2Metadata(M);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index ad0d7eb51507..7998a1ae5c6e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -33,11 +33,11 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
@@ -142,7 +142,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Simple byval argument? Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
- Params.insert(Params.end(), STy->element_begin(), STy->element_end());
+ llvm::append_range(Params, STy->elements());
ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
AttributeSet());
++NumByValArgsPromoted;
@@ -153,10 +153,6 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
-
- // There may be remaining metadata uses of the argument for things like
- // llvm.dbg.value. Replace them with undef.
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
} else {
// Okay, this is being promoted. This means that the only uses are loads
// or GEPs which are only used by loads
@@ -164,13 +160,19 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// In this table, we will track which indices are loaded from the argument
// (where direct loads are tracked as no indices).
ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- for (User *U : I->users()) {
+ for (User *U : make_early_inc_range(I->users())) {
Instruction *UI = cast<Instruction>(U);
Type *SrcTy;
if (LoadInst *L = dyn_cast<LoadInst>(UI))
SrcTy = L->getType();
else
SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
+ // Skip dead GEPs and remove them.
+ if (isa<GetElementPtrInst>(UI) && UI->use_empty()) {
+ UI->eraseFromParent();
+ continue;
+ }
+
IndicesVector Indices;
Indices.reserve(UI->getNumOperands() - 1);
// Since loads will only have a single operand, and GEPs only a single
@@ -218,9 +220,11 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
F->getName());
NF->copyAttributesFrom(F);
+ NF->copyMetadata(F, 0);
- // Patch the pointer to LLVM function in debug info descriptor.
- NF->setSubprogram(F->getSubprogram());
+ // The new function will have the !dbg metadata copied from the original
+ // function. The original function may not be deleted, and dbg metadata need
+ // to be unique so we need to drop it.
F->setSubprogram(nullptr);
LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
@@ -414,6 +418,11 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
continue;
}
+ // There potentially are metadata uses for things like llvm.dbg.value.
+ // Replace them with undef, after handling the other regular uses.
+ auto RauwUndefMetadata = make_scope_exit(
+ [&]() { I->replaceAllUsesWith(UndefValue::get(I->getType())); });
+
if (I->use_empty())
continue;
@@ -433,6 +442,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
<< "' in function '" << F->getName() << "'\n");
} else {
GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+ assert(!GEP->use_empty() &&
+ "GEPs without uses should be cleaned up already");
IndicesVector Operands;
Operands.reserve(GEP->getNumIndices());
for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
@@ -465,7 +476,6 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
GEP->eraseFromParent();
}
}
-
// Increment I2 past all of the arguments added for this promoted pointer.
std::advance(I2, ArgIndices.size());
}
@@ -672,11 +682,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
if (GEP->use_empty()) {
// Dead GEP's cause trouble later. Just remove them if we run into
// them.
- GEP->eraseFromParent();
- // TODO: This runs the above loop over and over again for dead GEPs
- // Couldn't we just do increment the UI iterator earlier and erase the
- // use?
- return isSafeToPromoteArgument(Arg, ByValTy, AAR, MaxElements);
+ continue;
}
if (!UpdateBaseTy(GEP->getSourceElementType()))
@@ -816,14 +822,12 @@ static bool canPaddingBeAccessed(Argument *arg) {
// Scan through the uses recursively to make sure the pointer is always used
// sanely.
- SmallVector<Value *, 16> WorkList;
- WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
+ SmallVector<Value *, 16> WorkList(arg->users());
while (!WorkList.empty()) {
- Value *V = WorkList.back();
- WorkList.pop_back();
+ Value *V = WorkList.pop_back_val();
if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
if (PtrValues.insert(V).second)
- WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
+ llvm::append_range(WorkList, V->users());
} else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
Stores.push_back(Store);
} else if (!isa<LoadInst>(V)) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
index f96dac5f3515..03ad45135001 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -15,31 +15,47 @@
#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
+#include <string>
using namespace llvm;
#define DEBUG_TYPE "attributor"
+DEBUG_COUNTER(ManifestDBGCounter, "attributor-manifest",
+ "Determine what attributes are manifested in the IR");
+
STATISTIC(NumFnDeleted, "Number of function deleted");
STATISTIC(NumFnWithExactDefinition,
"Number of functions with exact definitions");
STATISTIC(NumFnWithoutExactDefinition,
"Number of functions without exact definitions");
-STATISTIC(NumFnShallowWrapperCreated, "Number of shallow wrappers created");
+STATISTIC(NumFnShallowWrappersCreated, "Number of shallow wrappers created");
STATISTIC(NumAttributesTimedOut,
"Number of abstract attributes timed out before fixpoint");
STATISTIC(NumAttributesValidFixpoint,
@@ -61,6 +77,14 @@ static cl::opt<unsigned>
MaxFixpointIterations("attributor-max-iterations", cl::Hidden,
cl::desc("Maximal number of fixpoint iterations."),
cl::init(32));
+
+static cl::opt<unsigned, true> MaxInitializationChainLengthX(
+ "attributor-max-initialization-chain-length", cl::Hidden,
+ cl::desc(
+ "Maximal number of chained initializations (to avoid stack overflows)"),
+ cl::location(MaxInitializationChainLength), cl::init(1024));
+unsigned llvm::MaxInitializationChainLength;
+
static cl::opt<bool> VerifyMaxFixpointIterations(
"attributor-max-iterations-verify", cl::Hidden,
cl::desc("Verify that max-iterations is a tight bound for a fixpoint"),
@@ -79,20 +103,52 @@ static cl::opt<bool>
"wrappers for non-exact definitions."),
cl::init(false));
+static cl::opt<bool>
+ AllowDeepWrapper("attributor-allow-deep-wrappers", cl::Hidden,
+ cl::desc("Allow the Attributor to use IP information "
+ "derived from non-exact functions via cloning"),
+ cl::init(false));
+
+// These options can only used for debug builds.
+#ifndef NDEBUG
static cl::list<std::string>
SeedAllowList("attributor-seed-allow-list", cl::Hidden,
- cl::desc("Comma seperated list of attrbute names that are "
+ cl::desc("Comma seperated list of attribute names that are "
"allowed to be seeded."),
cl::ZeroOrMore, cl::CommaSeparated);
+static cl::list<std::string> FunctionSeedAllowList(
+ "attributor-function-seed-allow-list", cl::Hidden,
+ cl::desc("Comma seperated list of function names that are "
+ "allowed to be seeded."),
+ cl::ZeroOrMore, cl::CommaSeparated);
+#endif
+
+static cl::opt<bool>
+ DumpDepGraph("attributor-dump-dep-graph", cl::Hidden,
+ cl::desc("Dump the dependency graph to dot files."),
+ cl::init(false));
+
+static cl::opt<std::string> DepGraphDotFileNamePrefix(
+ "attributor-depgraph-dot-filename-prefix", cl::Hidden,
+ cl::desc("The prefix used for the CallGraph dot file names."));
+
+static cl::opt<bool> ViewDepGraph("attributor-view-dep-graph", cl::Hidden,
+ cl::desc("View the dependency graph."),
+ cl::init(false));
+
+static cl::opt<bool> PrintDependencies("attributor-print-dep", cl::Hidden,
+ cl::desc("Print attribute dependencies"),
+ cl::init(false));
+
/// Logic operators for the change status enum class.
///
///{
-ChangeStatus llvm::operator|(ChangeStatus l, ChangeStatus r) {
- return l == ChangeStatus::CHANGED ? l : r;
+ChangeStatus llvm::operator|(ChangeStatus L, ChangeStatus R) {
+ return L == ChangeStatus::CHANGED ? L : R;
}
-ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
- return l == ChangeStatus::UNCHANGED ? l : r;
+ChangeStatus llvm::operator&(ChangeStatus L, ChangeStatus R) {
+ return L == ChangeStatus::UNCHANGED ? L : R;
}
///}
@@ -145,7 +201,7 @@ Argument *IRPosition::getAssociatedArgument() const {
// Not an Argument and no argument number means this is not a call site
// argument, thus we cannot find a callback argument to return.
- int ArgNo = getArgNo();
+ int ArgNo = getCallSiteArgNo();
if (ArgNo < 0)
return nullptr;
@@ -273,6 +329,13 @@ const IRPosition
SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
IRPositions.emplace_back(IRP);
+ // Helper to determine if operand bundles on a call site are benin or
+ // potentially problematic. We handle only llvm.assume for now.
+ auto CanIgnoreOperandBundles = [](const CallBase &CB) {
+ return (isa<IntrinsicInst>(CB) &&
+ cast<IntrinsicInst>(CB).getIntrinsicID() == Intrinsic ::assume);
+ };
+
const auto *CB = dyn_cast<CallBase>(&IRP.getAnchorValue());
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
@@ -287,7 +350,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
- if (!CB->hasOperandBundles())
+ if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB))
if (const Function *Callee = CB->getCalledFunction())
IRPositions.emplace_back(IRPosition::function(*Callee));
return;
@@ -295,7 +358,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
- if (!CB->hasOperandBundles()) {
+ if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
if (const Function *Callee = CB->getCalledFunction()) {
IRPositions.emplace_back(IRPosition::returned(*Callee));
IRPositions.emplace_back(IRPosition::function(*Callee));
@@ -312,16 +375,16 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
IRPositions.emplace_back(IRPosition::callsite_function(*CB));
return;
case IRPosition::IRP_CALL_SITE_ARGUMENT: {
- int ArgNo = IRP.getArgNo();
- assert(CB && ArgNo >= 0 && "Expected call site!");
+ assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
- if (!CB->hasOperandBundles()) {
+ if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
const Function *Callee = CB->getCalledFunction();
- if (Callee && Callee->arg_size() > unsigned(ArgNo))
- IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo)));
- if (Callee)
+ if (Callee) {
+ if (Argument *Arg = IRP.getAssociatedArgument())
+ IRPositions.emplace_back(IRPosition::argument(*Arg));
IRPositions.emplace_back(IRPosition::function(*Callee));
+ }
}
IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue()));
return;
@@ -459,7 +522,7 @@ void IRPosition::verify() {
"Expected call base argument operand for a 'call site argument' "
"position");
assert(cast<CallBase>(U->getUser())->getArgOperandNo(U) ==
- unsigned(getArgNo()) &&
+ unsigned(getCallSiteArgNo()) &&
"Argument number mismatch!");
assert(U->get() == &getAssociatedValue() && "Associated value mismatch!");
return;
@@ -498,8 +561,10 @@ Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA,
Attributor::~Attributor() {
// The abstract attributes are allocated via the BumpPtrAllocator Allocator,
// thus we cannot delete them. We can, and want to, destruct them though.
- for (AbstractAttribute *AA : AllAbstractAttributes)
+ for (auto &DepAA : DG.SyntheticRoot.Deps) {
+ AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
AA->~AbstractAttribute();
+ }
}
bool Attributor::isAssumedDead(const AbstractAttribute &AA,
@@ -864,13 +929,15 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
// TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
- const auto &LivenessAA =
- getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false);
+ const auto *LivenessAA =
+ CheckBBLivenessOnly ? nullptr
+ : &(getAAFor<AAIsDead>(QueryingAA, QueryIRP,
+ /* TrackDependence */ false));
auto &OpcodeInstMap =
InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
- &LivenessAA, Opcodes, CheckBBLivenessOnly))
+ LivenessAA, Opcodes, CheckBBLivenessOnly))
return false;
return true;
@@ -903,8 +970,9 @@ bool Attributor::checkForAllReadWriteInstructions(
}
void Attributor::runTillFixpoint() {
+ TimeTraceScope TimeScope("Attributor::runTillFixpoint");
LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
- << AllAbstractAttributes.size()
+ << DG.SyntheticRoot.Deps.size()
<< " abstract attributes.\n");
// Now that all abstract attributes are collected and initialized we start
@@ -914,11 +982,11 @@ void Attributor::runTillFixpoint() {
SmallVector<AbstractAttribute *, 32> ChangedAAs;
SetVector<AbstractAttribute *> Worklist, InvalidAAs;
- Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
+ Worklist.insert(DG.SyntheticRoot.begin(), DG.SyntheticRoot.end());
do {
// Remember the size to determine new attributes.
- size_t NumAAs = AllAbstractAttributes.size();
+ size_t NumAAs = DG.SyntheticRoot.Deps.size();
LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
<< ", Worklist size: " << Worklist.size() << "\n");
@@ -935,7 +1003,7 @@ void Attributor::runTillFixpoint() {
while (!InvalidAA->Deps.empty()) {
const auto &Dep = InvalidAA->Deps.back();
InvalidAA->Deps.pop_back();
- AbstractAttribute *DepAA = Dep.getPointer();
+ AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
Worklist.insert(DepAA);
continue;
@@ -953,7 +1021,8 @@ void Attributor::runTillFixpoint() {
// changed to the work list.
for (AbstractAttribute *ChangedAA : ChangedAAs)
while (!ChangedAA->Deps.empty()) {
- Worklist.insert(ChangedAA->Deps.back().getPointer());
+ Worklist.insert(
+ cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
ChangedAA->Deps.pop_back();
}
@@ -981,8 +1050,8 @@ void Attributor::runTillFixpoint() {
// Add attributes to the changed set if they have been created in the last
// iteration.
- ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs,
- AllAbstractAttributes.end());
+ ChangedAAs.append(DG.SyntheticRoot.begin() + NumAAs,
+ DG.SyntheticRoot.end());
// Reset the work list and repopulate with the changed abstract attributes.
// Note that dependent ones are added above.
@@ -1015,7 +1084,8 @@ void Attributor::runTillFixpoint() {
}
while (!ChangedAA->Deps.empty()) {
- ChangedAAs.push_back(ChangedAA->Deps.back().getPointer());
+ ChangedAAs.push_back(
+ cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
ChangedAA->Deps.pop_back();
}
}
@@ -1037,12 +1107,14 @@ void Attributor::runTillFixpoint() {
}
ChangeStatus Attributor::manifestAttributes() {
- size_t NumFinalAAs = AllAbstractAttributes.size();
+ TimeTraceScope TimeScope("Attributor::manifestAttributes");
+ size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
unsigned NumManifested = 0;
unsigned NumAtFixpoint = 0;
ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
- for (AbstractAttribute *AA : AllAbstractAttributes) {
+ for (auto &DepAA : DG.SyntheticRoot.Deps) {
+ AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
AbstractState &State = AA->getState();
// If there is not already a fixpoint reached, we can now take the
@@ -1059,6 +1131,10 @@ ChangeStatus Attributor::manifestAttributes() {
// Skip dead code.
if (isAssumedDead(*AA, nullptr, /* CheckBBLivenessOnly */ true))
continue;
+ // Check if the manifest debug counter that allows skipping manifestation of
+ // AAs
+ if (!DebugCounter::shouldExecute(ManifestDBGCounter))
+ continue;
// Manifest the state and record if we changed the IR.
ChangeStatus LocalChange = AA->manifest(*this);
if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled())
@@ -1082,11 +1158,14 @@ ChangeStatus Attributor::manifestAttributes() {
NumAttributesValidFixpoint += NumAtFixpoint;
(void)NumFinalAAs;
- if (NumFinalAAs != AllAbstractAttributes.size()) {
- for (unsigned u = NumFinalAAs; u < AllAbstractAttributes.size(); ++u)
- errs() << "Unexpected abstract attribute: " << *AllAbstractAttributes[u]
+ if (NumFinalAAs != DG.SyntheticRoot.Deps.size()) {
+ for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size(); ++u)
+ errs() << "Unexpected abstract attribute: "
+ << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
<< " :: "
- << AllAbstractAttributes[u]->getIRPosition().getAssociatedValue()
+ << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
+ ->getIRPosition()
+ .getAssociatedValue()
<< "\n";
llvm_unreachable("Expected the final number of abstract attributes to "
"remain unchanged!");
@@ -1094,7 +1173,50 @@ ChangeStatus Attributor::manifestAttributes() {
return ManifestChange;
}
+void Attributor::identifyDeadInternalFunctions() {
+ // Identify dead internal functions and delete them. This happens outside
+ // the other fixpoint analysis as we might treat potentially dead functions
+ // as live to lower the number of iterations. If they happen to be dead, the
+ // below fixpoint loop will identify and eliminate them.
+ SmallVector<Function *, 8> InternalFns;
+ for (Function *F : Functions)
+ if (F->hasLocalLinkage())
+ InternalFns.push_back(F);
+
+ SmallPtrSet<Function *, 8> LiveInternalFns;
+ bool FoundLiveInternal = true;
+ while (FoundLiveInternal) {
+ FoundLiveInternal = false;
+ for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) {
+ Function *F = InternalFns[u];
+ if (!F)
+ continue;
+
+ bool AllCallSitesKnown;
+ if (checkForAllCallSites(
+ [&](AbstractCallSite ACS) {
+ Function *Callee = ACS.getInstruction()->getFunction();
+ return ToBeDeletedFunctions.count(Callee) ||
+ (Functions.count(Callee) && Callee->hasLocalLinkage() &&
+ !LiveInternalFns.count(Callee));
+ },
+ *F, true, nullptr, AllCallSitesKnown)) {
+ continue;
+ }
+
+ LiveInternalFns.insert(F);
+ InternalFns[u] = nullptr;
+ FoundLiveInternal = true;
+ }
+ }
+
+ for (unsigned u = 0, e = InternalFns.size(); u < e; ++u)
+ if (Function *F = InternalFns[u])
+ ToBeDeletedFunctions.insert(F);
+}
+
ChangeStatus Attributor::cleanupIR() {
+ TimeTraceScope TimeScope("Attributor::cleanupIR");
// Delete stuff at the end to avoid invalid references and a nice order.
LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least "
<< ToBeDeletedFunctions.size() << " functions and "
@@ -1205,50 +1327,45 @@ ChangeStatus Attributor::cleanupIR() {
DetatchDeadBlocks(ToBeDeletedBBs, nullptr);
}
- // Identify dead internal functions and delete them. This happens outside
- // the other fixpoint analysis as we might treat potentially dead functions
- // as live to lower the number of iterations. If they happen to be dead, the
- // below fixpoint loop will identify and eliminate them.
- SmallVector<Function *, 8> InternalFns;
- for (Function *F : Functions)
- if (F->hasLocalLinkage())
- InternalFns.push_back(F);
-
- bool FoundDeadFn = true;
- while (FoundDeadFn) {
- FoundDeadFn = false;
- for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) {
- Function *F = InternalFns[u];
- if (!F)
- continue;
-
- bool AllCallSitesKnown;
- if (!checkForAllCallSites(
- [this](AbstractCallSite ACS) {
- return ToBeDeletedFunctions.count(
- ACS.getInstruction()->getFunction());
- },
- *F, true, nullptr, AllCallSitesKnown))
- continue;
-
- ToBeDeletedFunctions.insert(F);
- InternalFns[u] = nullptr;
- FoundDeadFn = true;
- }
- }
+ identifyDeadInternalFunctions();
// Rewrite the functions as requested during manifest.
ChangeStatus ManifestChange = rewriteFunctionSignatures(CGModifiedFunctions);
for (Function *Fn : CGModifiedFunctions)
- CGUpdater.reanalyzeFunction(*Fn);
+ if (!ToBeDeletedFunctions.count(Fn))
+ CGUpdater.reanalyzeFunction(*Fn);
- for (Function *Fn : ToBeDeletedFunctions)
+ for (Function *Fn : ToBeDeletedFunctions) {
+ if (!Functions.count(Fn))
+ continue;
CGUpdater.removeFunction(*Fn);
+ }
+
+ if (!ToBeChangedUses.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!ToBeChangedToUnreachableInsts.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!ToBeDeletedFunctions.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!ToBeDeletedBlocks.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!ToBeDeletedInsts.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!InvokeWithDeadSuccessor.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!DeadInsts.empty())
+ ManifestChange = ChangeStatus::CHANGED;
NumFnDeleted += ToBeDeletedFunctions.size();
- LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << NumFnDeleted
+ LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << ToBeDeletedFunctions.size()
<< " functions after manifest.\n");
#ifdef EXPENSIVE_CHECKS
@@ -1263,14 +1380,37 @@ ChangeStatus Attributor::cleanupIR() {
}
ChangeStatus Attributor::run() {
- SeedingPeriod = false;
+ TimeTraceScope TimeScope("Attributor::run");
+
+ Phase = AttributorPhase::UPDATE;
runTillFixpoint();
+
+ // dump graphs on demand
+ if (DumpDepGraph)
+ DG.dumpGraph();
+
+ if (ViewDepGraph)
+ DG.viewGraph();
+
+ if (PrintDependencies)
+ DG.print();
+
+ Phase = AttributorPhase::MANIFEST;
ChangeStatus ManifestChange = manifestAttributes();
+
+ Phase = AttributorPhase::CLEANUP;
ChangeStatus CleanupChange = cleanupIR();
+
return ManifestChange | CleanupChange;
}
ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
+ TimeTraceScope TimeScope(
+ AA.getName() + std::to_string(AA.getIRPosition().getPositionKind()) +
+ "::updateAA");
+ assert(Phase == AttributorPhase::UPDATE &&
+ "We can update AA only in the update stage!");
+
// Use a new dependence vector for this update.
DependenceVector DV;
DependenceStack.push_back(&DV);
@@ -1298,23 +1438,7 @@ ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
return CS;
}
-/// Create a shallow wrapper for \p F such that \p F has internal linkage
-/// afterwards. It also sets the original \p F 's name to anonymous
-///
-/// A wrapper is a function with the same type (and attributes) as \p F
-/// that will only call \p F and return the result, if any.
-///
-/// Assuming the declaration of looks like:
-/// rty F(aty0 arg0, ..., atyN argN);
-///
-/// The wrapper will then look as follows:
-/// rty wrapper(aty0 arg0, ..., atyN argN) {
-/// return F(arg0, ..., argN);
-/// }
-///
-static void createShallowWrapper(Function &F) {
- assert(AllowShallowWrappers &&
- "Cannot create a wrapper if it is not allowed!");
+void Attributor::createShallowWrapper(Function &F) {
assert(!F.isDeclaration() && "Cannot create a wrapper around a declaration!");
Module &M = *F.getParent();
@@ -1347,7 +1471,7 @@ static void createShallowWrapper(Function &F) {
BasicBlock *EntryBB = BasicBlock::Create(Ctx, "entry", Wrapper);
SmallVector<Value *, 8> Args;
- auto FArgIt = F.arg_begin();
+ Argument *FArgIt = F.arg_begin();
for (Argument &Arg : Wrapper->args()) {
Args.push_back(&Arg);
Arg.setName((FArgIt++)->getName());
@@ -1358,7 +1482,57 @@ static void createShallowWrapper(Function &F) {
CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
ReturnInst::Create(Ctx, CI->getType()->isVoidTy() ? nullptr : CI, EntryBB);
- NumFnShallowWrapperCreated++;
+ NumFnShallowWrappersCreated++;
+}
+
+/// Make another copy of the function \p F such that the copied version has
+/// internal linkage afterwards and can be analysed. Then we replace all uses
+/// of the original function to the copied one
+///
+/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr`
+/// linkage can be internalized because these linkages guarantee that other
+/// definitions with the same name have the same semantics as this one
+///
+static Function *internalizeFunction(Function &F) {
+ assert(AllowDeepWrapper && "Cannot create a copy if not allowed.");
+ assert(!F.isDeclaration() && !F.hasExactDefinition() &&
+ !GlobalValue::isInterposableLinkage(F.getLinkage()) &&
+ "Trying to internalize function which cannot be internalized.");
+
+ Module &M = *F.getParent();
+ FunctionType *FnTy = F.getFunctionType();
+
+ // create a copy of the current function
+ Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(),
+ F.getName() + ".internalized");
+ ValueToValueMapTy VMap;
+ auto *NewFArgIt = Copied->arg_begin();
+ for (auto &Arg : F.args()) {
+ auto ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ }
+ SmallVector<ReturnInst *, 8> Returns;
+
+ // Copy the body of the original function to the new one
+ CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns);
+
+ // Set the linakage and visibility late as CloneFunctionInto has some implicit
+ // requirements.
+ Copied->setVisibility(GlobalValue::DefaultVisibility);
+ Copied->setLinkage(GlobalValue::PrivateLinkage);
+
+ // Copy metadata
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F.getAllMetadata(MDs);
+ for (auto MDIt : MDs)
+ Copied->addMetadata(MDIt.first, *MDIt.second);
+
+ M.getFunctionList().insert(F.getIterator(), Copied);
+ F.replaceAllUsesWith(Copied);
+ Copied->setDSOLocal(true);
+
+ return Copied;
}
bool Attributor::isValidFunctionSignatureRewrite(
@@ -1461,9 +1635,17 @@ bool Attributor::registerFunctionSignatureRewrite(
}
bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
- if (SeedAllowList.size() == 0)
- return true;
- return std::count(SeedAllowList.begin(), SeedAllowList.end(), AA.getName());
+ bool Result = true;
+#ifndef NDEBUG
+ if (SeedAllowList.size() != 0)
+ Result =
+ std::count(SeedAllowList.begin(), SeedAllowList.end(), AA.getName());
+ Function *Fn = AA.getAnchorScope();
+ if (FunctionSeedAllowList.size() != 0 && Fn)
+ Result &= std::count(FunctionSeedAllowList.begin(),
+ FunctionSeedAllowList.end(), Fn->getName());
+#endif
+ return Result;
}
ChangeStatus Attributor::rewriteFunctionSignatures(
@@ -1474,7 +1656,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
Function *OldFn = It.getFirst();
// Deleted functions do not require rewrites.
- if (ToBeDeletedFunctions.count(OldFn))
+ if (!Functions.count(OldFn) || ToBeDeletedFunctions.count(OldFn))
continue;
const SmallVectorImpl<std::unique_ptr<ArgumentReplacementInfo>> &ARIs =
@@ -1617,8 +1799,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
assert(Success && "Assumed call site replacement to succeed!");
// Rewire the arguments.
- auto OldFnArgIt = OldFn->arg_begin();
- auto NewFnArgIt = NewFn->arg_begin();
+ Argument *OldFnArgIt = OldFn->arg_begin();
+ Argument *NewFnArgIt = NewFn->arg_begin();
for (unsigned OldArgNum = 0; OldArgNum < ARIs.size();
++OldArgNum, ++OldFnArgIt) {
if (const std::unique_ptr<ArgumentReplacementInfo> &ARI =
@@ -1727,6 +1909,10 @@ void InformationCache::initializeInformationCache(const Function &CF,
InlineableFunctions.insert(&F);
}
+AAResults *InformationCache::getAAResultsForFunction(const Function &F) {
+ return AG.getAnalysis<AAManager>(F);
+}
+
InformationCache::FunctionInfo::~FunctionInfo() {
// The instruction vectors are allocated using a BumpPtrAllocator, we need to
// manually destroy them.
@@ -1827,6 +2013,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function might be simplified.
getOrCreateAAFor<AAValueSimplify>(RetPos);
+ // Every returned value might be marked noundef.
+ getOrCreateAAFor<AANoUndef>(RetPos);
+
if (ReturnType->isPointerTy()) {
// Every function with pointer return type might be marked align.
@@ -1853,6 +2042,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every argument might be dead.
getOrCreateAAFor<AAIsDead>(ArgPos);
+ // Every argument might be marked noundef.
+ getOrCreateAAFor<AANoUndef>(ArgPos);
+
if (Arg.getType()->isPointerTy()) {
// Every argument with pointer type might be marked nonnull.
getOrCreateAAFor<AANonNull>(ArgPos);
@@ -1920,6 +2112,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Call site argument might be simplified.
getOrCreateAAFor<AAValueSimplify>(CBArgPos);
+ // Every call site argument might be marked "noundef".
+ getOrCreateAAFor<AANoUndef>(CBArgPos);
+
if (!CB.getArgOperand(I)->getType()->isPointerTy())
continue;
@@ -2005,7 +2200,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) {
raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) {
const Value &AV = Pos.getAssociatedValue();
return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " ["
- << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}";
+ << Pos.getAnchorValue().getName() << "@" << Pos.getCallSiteArgNo()
+ << "]}";
}
raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerRangeState &S) {
@@ -2027,9 +2223,48 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
return OS;
}
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+ const PotentialConstantIntValuesState &S) {
+ OS << "set-state(< {";
+ if (!S.isValidState())
+ OS << "full-set";
+ else {
+ for (auto &it : S.getAssumedSet())
+ OS << it << ", ";
+ if (S.undefIsContained())
+ OS << "undef ";
+ }
+ OS << "} >)";
+
+ return OS;
+}
+
void AbstractAttribute::print(raw_ostream &OS) const {
- OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState()
- << "]";
+ OS << "[";
+ OS << getName();
+ OS << "] for CtxI ";
+
+ if (auto *I = getCtxI()) {
+ OS << "'";
+ I->print(OS);
+ OS << "'";
+ } else
+ OS << "<<null inst>>";
+
+ OS << " at position " << getIRPosition() << " with state " << getAsStr()
+ << '\n';
+}
+
+void AbstractAttribute::printWithDeps(raw_ostream &OS) const {
+ print(OS);
+
+ for (const auto &DepAA : Deps) {
+ auto *AA = DepAA.getPointer();
+ OS << " updates ";
+ AA->print(OS);
+ }
+
+ OS << '\n';
}
///}
@@ -2055,7 +2290,31 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
if (AllowShallowWrappers)
for (Function *F : Functions)
if (!A.isFunctionIPOAmendable(*F))
- createShallowWrapper(*F);
+ Attributor::createShallowWrapper(*F);
+
+ // Internalize non-exact functions
+ // TODO: for now we eagerly internalize functions without calculating the
+ // cost, we need a cost interface to determine whether internalizing
+ // a function is "benefitial"
+ if (AllowDeepWrapper) {
+ unsigned FunSize = Functions.size();
+ for (unsigned u = 0; u < FunSize; u++) {
+ Function *F = Functions[u];
+ if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() &&
+ !GlobalValue::isInterposableLinkage(F->getLinkage())) {
+ Function *NewF = internalizeFunction(*F);
+ Functions.insert(NewF);
+
+ // Update call graph
+ CGUpdater.replaceFunctionWith(*F, *NewF);
+ for (const Use &U : NewF->uses())
+ if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) {
+ auto *CallerF = CB->getCaller();
+ CGUpdater.reanalyzeFunction(*CallerF);
+ }
+ }
+ }
+ }
for (Function *F : Functions) {
if (F->hasExactDefinition())
@@ -2064,8 +2323,8 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
NumFnWithoutExactDefinition++;
// We look at internal functions only on-demand but if any use is not a
- // direct call or outside the current set of analyzed functions, we have to
- // do it eagerly.
+ // direct call or outside the current set of analyzed functions, we have
+ // to do it eagerly.
if (F->hasLocalLinkage()) {
if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
const auto *CB = dyn_cast<CallBase>(U.getUser());
@@ -2081,11 +2340,41 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
}
ChangeStatus Changed = A.run();
+
LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
<< " functions, result: " << Changed << ".\n");
return Changed == ChangeStatus::CHANGED;
}
+void AADepGraph::viewGraph() { llvm::ViewGraph(this, "Dependency Graph"); }
+
+void AADepGraph::dumpGraph() {
+ static std::atomic<int> CallTimes;
+ std::string Prefix;
+
+ if (!DepGraphDotFileNamePrefix.empty())
+ Prefix = DepGraphDotFileNamePrefix;
+ else
+ Prefix = "dep_graph";
+ std::string Filename =
+ Prefix + "_" + std::to_string(CallTimes.load()) + ".dot";
+
+ outs() << "Dependency graph dump to " << Filename << ".\n";
+
+ std::error_code EC;
+
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+ if (!EC)
+ llvm::WriteGraph(File, this);
+
+ CallTimes++;
+}
+
+void AADepGraph::print() {
+ for (auto DepAA : SyntheticRoot.Deps)
+ cast<AbstractAttribute>(DepAA.getPointer())->printWithDeps(outs());
+}
+
PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -2127,11 +2416,58 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater)) {
// FIXME: Think about passes we will preserve and add them here.
- return PreservedAnalyses::none();
+ PreservedAnalyses PA;
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ return PA;
}
return PreservedAnalyses::all();
}
+namespace llvm {
+
+template <> struct GraphTraits<AADepGraphNode *> {
+ using NodeRef = AADepGraphNode *;
+ using DepTy = PointerIntPair<AADepGraphNode *, 1>;
+ using EdgeRef = PointerIntPair<AADepGraphNode *, 1>;
+
+ static NodeRef getEntryNode(AADepGraphNode *DGN) { return DGN; }
+ static NodeRef DepGetVal(DepTy &DT) { return DT.getPointer(); }
+
+ using ChildIteratorType =
+ mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+ using ChildEdgeIteratorType = TinyPtrVector<DepTy>::iterator;
+
+ static ChildIteratorType child_begin(NodeRef N) { return N->child_begin(); }
+
+ static ChildIteratorType child_end(NodeRef N) { return N->child_end(); }
+};
+
+template <>
+struct GraphTraits<AADepGraph *> : public GraphTraits<AADepGraphNode *> {
+ static NodeRef getEntryNode(AADepGraph *DG) { return DG->GetEntryNode(); }
+
+ using nodes_iterator =
+ mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+
+ static nodes_iterator nodes_begin(AADepGraph *DG) { return DG->begin(); }
+
+ static nodes_iterator nodes_end(AADepGraph *DG) { return DG->end(); }
+};
+
+template <> struct DOTGraphTraits<AADepGraph *> : public DefaultDOTGraphTraits {
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getNodeLabel(const AADepGraphNode *Node,
+ const AADepGraph *DG) {
+ std::string AAString;
+ raw_string_ostream O(AAString);
+ Node->print(O);
+ return AAString;
+ }
+};
+
+} // end namespace llvm
+
namespace {
struct AttributorLegacyPass : public ModulePass {
@@ -2163,7 +2499,6 @@ struct AttributorLegacyPass : public ModulePass {
};
struct AttributorCGSCCLegacyPass : public CallGraphSCCPass {
- CallGraphUpdater CGUpdater;
static char ID;
AttributorCGSCCLegacyPass() : CallGraphSCCPass(ID) {
@@ -2185,6 +2520,7 @@ struct AttributorCGSCCLegacyPass : public CallGraphSCCPass {
AnalysisGetter AG;
CallGraph &CG = const_cast<CallGraph &>(SCC.getCallGraph());
+ CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, SCC);
Module &M = *Functions.back()->getParent();
BumpPtrAllocator Allocator;
@@ -2192,8 +2528,6 @@ struct AttributorCGSCCLegacyPass : public CallGraphSCCPass {
return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater);
}
- bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
-
void getAnalysisUsage(AnalysisUsage &AU) const override {
// FIXME: Think about passes we will preserve and add them here.
AU.addRequired<TargetLibraryInfoWrapperPass>();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 7e9fd61eeb41..d6127a8df628 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -13,15 +13,20 @@
#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/Support/CommandLine.h"
@@ -42,6 +47,16 @@ static cl::opt<bool> ManifestInternal(
static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128),
cl::Hidden);
+template <>
+unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0;
+
+static cl::opt<unsigned, true> MaxPotentialValues(
+ "attributor-max-potential-values", cl::Hidden,
+ cl::desc("Maximum number of potential values to be "
+ "tracked for each position."),
+ cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
+ cl::init(7));
+
STATISTIC(NumAAs, "Number of abstract attributes created");
// Some helper macros to deal with statistics tracking.
@@ -117,6 +132,8 @@ PIPE_OPERATOR(AAMemoryLocation)
PIPE_OPERATOR(AAValueConstantRange)
PIPE_OPERATOR(AAPrivatizablePtr)
PIPE_OPERATOR(AAUndefinedBehavior)
+PIPE_OPERATOR(AAPotentialValues)
+PIPE_OPERATOR(AANoUndef)
#undef PIPE_OPERATOR
} // namespace llvm
@@ -435,7 +452,7 @@ static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA,
const AAType &AA = A.getAAFor<AAType>(QueryingAA, RVPos);
LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
<< " @ " << RVPos << "\n");
- const StateType &AAS = static_cast<const StateType &>(AA.getState());
+ const StateType &AAS = AA.getState();
if (T.hasValue())
*T &= AAS;
else
@@ -485,7 +502,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
Optional<StateType> T;
// The argument number which is also the call site argument number.
- unsigned ArgNo = QueryingAA.getIRPosition().getArgNo();
+ unsigned ArgNo = QueryingAA.getIRPosition().getCallSiteArgNo();
auto CallSiteCheck = [&](AbstractCallSite ACS) {
const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
@@ -497,7 +514,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
const AAType &AA = A.getAAFor<AAType>(QueryingAA, ACSArgPos);
LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
<< " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
- const StateType &AAS = static_cast<const StateType &>(AA.getState());
+ const StateType &AAS = AA.getState();
if (T.hasValue())
*T &= AAS;
else
@@ -554,8 +571,7 @@ struct AACallSiteReturnedFromReturned : public BaseType {
IRPosition FnPos = IRPosition::returned(*AssociatedFunction);
const AAType &AA = A.getAAFor<AAType>(*this, FnPos);
- return clampStateAndIndicateChange(
- S, static_cast<const StateType &>(AA.getState()));
+ return clampStateAndIndicateChange(S, AA.getState());
}
};
@@ -722,7 +738,7 @@ struct AANoUnwindCallSite final : AANoUnwindImpl {
void initialize(Attributor &A) override {
AANoUnwindImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F)
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -735,9 +751,7 @@ struct AANoUnwindCallSite final : AANoUnwindImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AANoUnwind::StateType &>(FnAA.getState()));
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -783,7 +797,7 @@ public:
ReturnedValues.clear();
Function *F = getAssociatedFunction();
- if (!F) {
+ if (!F || F->isDeclaration()) {
indicatePessimisticFixpoint();
return;
}
@@ -1052,9 +1066,10 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
// map, NewRVsMap.
decltype(ReturnedValues) NewRVsMap;
- auto HandleReturnValue = [&](Value *RV, SmallSetVector<ReturnInst *, 4> &RIs) {
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV
- << " by #" << RIs.size() << " RIs\n");
+ auto HandleReturnValue = [&](Value *RV,
+ SmallSetVector<ReturnInst *, 4> &RIs) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV << " by #"
+ << RIs.size() << " RIs\n");
CallBase *CB = dyn_cast<CallBase>(RV);
if (!CB || UnresolvedCalls.count(CB))
return;
@@ -1128,11 +1143,13 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
RVState RVS({NewRVsMap, Unused, RetValAAIt.second});
VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS, CB);
continue;
- } else if (isa<CallBase>(RetVal)) {
+ }
+ if (isa<CallBase>(RetVal)) {
// Call sites are resolved by the callee attribute over time, no need to
// do anything for us.
continue;
- } else if (isa<Constant>(RetVal)) {
+ }
+ if (isa<Constant>(RetVal)) {
// Constants are valid everywhere, we can simply take them.
NewRVsMap[RetVal].insert(RIs.begin(), RIs.end());
continue;
@@ -1373,7 +1390,7 @@ struct AANoSyncCallSite final : AANoSyncImpl {
void initialize(Attributor &A) override {
AANoSyncImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F)
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -1386,8 +1403,7 @@ struct AANoSyncCallSite final : AANoSyncImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AANoSync::StateType &>(FnAA.getState()));
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -1439,7 +1455,7 @@ struct AANoFreeCallSite final : AANoFreeImpl {
void initialize(Attributor &A) override {
AANoFreeImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F)
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -1452,8 +1468,7 @@ struct AANoFreeCallSite final : AANoFreeImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AANoFree::StateType &>(FnAA.getState()));
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -1535,8 +1550,7 @@ struct AANoFreeCallSiteArgument final : AANoFreeFloating {
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AANoFree>(*this, ArgPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AANoFree::StateType &>(ArgAA.getState()));
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -1672,21 +1686,33 @@ struct AANonNullImpl : AANonNull {
Value &V = getAssociatedValue();
if (!NullIsDefined &&
hasAttr({Attribute::NonNull, Attribute::Dereferenceable},
- /* IgnoreSubsumingPositions */ false, &A))
+ /* IgnoreSubsumingPositions */ false, &A)) {
indicateOptimisticFixpoint();
- else if (isa<ConstantPointerNull>(V))
+ return;
+ }
+
+ if (isa<ConstantPointerNull>(V)) {
indicatePessimisticFixpoint();
- else
- AANonNull::initialize(A);
+ return;
+ }
+
+ AANonNull::initialize(A);
bool CanBeNull = true;
- if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull))
- if (!CanBeNull)
+ if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull)) {
+ if (!CanBeNull) {
indicateOptimisticFixpoint();
+ return;
+ }
+ }
- if (!getState().isAtFixpoint())
- if (Instruction *CtxI = getCtxI())
- followUsesInMBEC(*this, A, getState(), *CtxI);
+ if (isa<GlobalValue>(&getAssociatedValue())) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ if (Instruction *CtxI = getCtxI())
+ followUsesInMBEC(*this, A, getState(), *CtxI);
}
/// See followUsesInMBEC
@@ -1717,13 +1743,6 @@ struct AANonNullFloating : public AANonNullImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- if (!NullIsDefined) {
- const auto &DerefAA =
- A.getAAFor<AADereferenceable>(*this, getIRPosition());
- if (DerefAA.getAssumedDereferenceableBytes())
- return ChangeStatus::UNCHANGED;
- }
-
const DataLayout &DL = A.getDataLayout();
DominatorTree *DT = nullptr;
@@ -1742,8 +1761,7 @@ struct AANonNullFloating : public AANonNullImpl {
T.indicatePessimisticFixpoint();
} else {
// Use abstract attribute information.
- const AANonNull::StateType &NS =
- static_cast<const AANonNull::StateType &>(AA.getState());
+ const AANonNull::StateType &NS = AA.getState();
T ^= NS;
}
return T.isValidState();
@@ -1763,9 +1781,14 @@ struct AANonNullFloating : public AANonNullImpl {
/// NonNull attribute for function return value.
struct AANonNullReturned final
- : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl> {
+ : AAReturnedFromReturnedValues<AANonNull, AANonNull> {
AANonNullReturned(const IRPosition &IRP, Attributor &A)
- : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl>(IRP, A) {}
+ : AAReturnedFromReturnedValues<AANonNull, AANonNull>(IRP, A) {}
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nonnull" : "may-null";
+ }
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
@@ -1879,7 +1902,7 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
void initialize(Attributor &A) override {
AANoRecurseImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F)
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -1892,9 +1915,7 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AANoRecurse::StateType &>(FnAA.getState()));
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -1979,6 +2000,98 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
return true;
};
+ auto InspectCallSiteForUB = [&](Instruction &I) {
+ // Check whether a callsite always cause UB or not
+
+ // Skip instructions that are already saved.
+ if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
+ return true;
+
+ // Check nonnull and noundef argument attribute violation for each
+ // callsite.
+ CallBase &CB = cast<CallBase>(I);
+ Function *Callee = CB.getCalledFunction();
+ if (!Callee)
+ return true;
+ for (unsigned idx = 0; idx < CB.getNumArgOperands(); idx++) {
+ // If current argument is known to be simplified to null pointer and the
+ // corresponding argument position is known to have nonnull attribute,
+ // the argument is poison. Furthermore, if the argument is poison and
+ // the position is known to have noundef attriubte, this callsite is
+ // considered UB.
+ if (idx >= Callee->arg_size())
+ break;
+ Value *ArgVal = CB.getArgOperand(idx);
+ if (!ArgVal)
+ continue;
+ // Here, we handle three cases.
+ // (1) Not having a value means it is dead. (we can replace the value
+ // with undef)
+ // (2) Simplified to undef. The argument violate noundef attriubte.
+ // (3) Simplified to null pointer where known to be nonnull.
+ // The argument is a poison value and violate noundef attribute.
+ IRPosition CalleeArgumentIRP = IRPosition::callsite_argument(CB, idx);
+ auto &NoUndefAA = A.getAAFor<AANoUndef>(*this, CalleeArgumentIRP,
+ /* TrackDependence */ false);
+ if (!NoUndefAA.isKnownNoUndef())
+ continue;
+ auto &ValueSimplifyAA = A.getAAFor<AAValueSimplify>(
+ *this, IRPosition::value(*ArgVal), /* TrackDependence */ false);
+ if (!ValueSimplifyAA.isKnown())
+ continue;
+ Optional<Value *> SimplifiedVal =
+ ValueSimplifyAA.getAssumedSimplifiedValue(A);
+ if (!SimplifiedVal.hasValue() ||
+ isa<UndefValue>(*SimplifiedVal.getValue())) {
+ KnownUBInsts.insert(&I);
+ continue;
+ }
+ if (!ArgVal->getType()->isPointerTy() ||
+ !isa<ConstantPointerNull>(*SimplifiedVal.getValue()))
+ continue;
+ auto &NonNullAA = A.getAAFor<AANonNull>(*this, CalleeArgumentIRP,
+ /* TrackDependence */ false);
+ if (NonNullAA.isKnownNonNull())
+ KnownUBInsts.insert(&I);
+ }
+ return true;
+ };
+
+ auto InspectReturnInstForUB =
+ [&](Value &V, const SmallSetVector<ReturnInst *, 4> RetInsts) {
+ // Check if a return instruction always cause UB or not
+ // Note: It is guaranteed that the returned position of the anchor
+ // scope has noundef attribute when this is called.
+ // We also ensure the return position is not "assumed dead"
+ // because the returned value was then potentially simplified to
+ // `undef` in AAReturnedValues without removing the `noundef`
+ // attribute yet.
+
+ // When the returned position has noundef attriubte, UB occur in the
+ // following cases.
+ // (1) Returned value is known to be undef.
+ // (2) The value is known to be a null pointer and the returned
+ // position has nonnull attribute (because the returned value is
+ // poison).
+ bool FoundUB = false;
+ if (isa<UndefValue>(V)) {
+ FoundUB = true;
+ } else {
+ if (isa<ConstantPointerNull>(V)) {
+ auto &NonNullAA = A.getAAFor<AANonNull>(
+ *this, IRPosition::returned(*getAnchorScope()),
+ /* TrackDependence */ false);
+ if (NonNullAA.isKnownNonNull())
+ FoundUB = true;
+ }
+ }
+
+ if (FoundUB)
+ for (ReturnInst *RI : RetInsts)
+ KnownUBInsts.insert(RI);
+ return true;
+ };
+
A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
{Instruction::Load, Instruction::Store,
Instruction::AtomicCmpXchg,
@@ -1986,6 +2099,22 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
/* CheckBBLivenessOnly */ true);
A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br},
/* CheckBBLivenessOnly */ true);
+ A.checkForAllCallLikeInstructions(InspectCallSiteForUB, *this);
+
+ // If the returned position of the anchor scope has noundef attriubte, check
+ // all returned instructions.
+ if (!getAnchorScope()->getReturnType()->isVoidTy()) {
+ const IRPosition &ReturnIRP = IRPosition::returned(*getAnchorScope());
+ if (!A.isAssumedDead(ReturnIRP, this, nullptr)) {
+ auto &RetPosNoUndefAA =
+ A.getAAFor<AANoUndef>(*this, ReturnIRP,
+ /* TrackDependence */ false);
+ if (RetPosNoUndefAA.isKnownNoUndef())
+ A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB,
+ *this);
+ }
+ }
+
if (NoUBPrevSize != AssumedNoUBInsts.size() ||
UBPrevSize != KnownUBInsts.size())
return ChangeStatus::CHANGED;
@@ -2153,7 +2282,7 @@ struct AAWillReturnImpl : public AAWillReturn {
AAWillReturn::initialize(A);
Function *F = getAnchorScope();
- if (!F || !A.isFunctionIPOAmendable(*F) || mayContainUnboundedCycle(*F, A))
+ if (!F || F->isDeclaration() || mayContainUnboundedCycle(*F, A))
indicatePessimisticFixpoint();
}
@@ -2197,9 +2326,9 @@ struct AAWillReturnCallSite final : AAWillReturnImpl {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- AAWillReturnImpl::initialize(A);
+ AAWillReturn::initialize(A);
Function *F = getAssociatedFunction();
- if (!F)
+ if (!F || !A.isFunctionIPOAmendable(*F))
indicatePessimisticFixpoint();
}
@@ -2212,9 +2341,7 @@ struct AAWillReturnCallSite final : AAWillReturnImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AAWillReturn::StateType &>(FnAA.getState()));
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -2374,7 +2501,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
void initialize(Attributor &A) override {
// See callsite argument attribute and callee argument attribute.
const auto &CB = cast<CallBase>(getAnchorValue());
- if (CB.paramHasAttr(getArgNo(), Attribute::NoAlias))
+ if (CB.paramHasAttr(getCallSiteArgNo(), Attribute::NoAlias))
indicateOptimisticFixpoint();
Value &Val = getAssociatedValue();
if (isa<ConstantPointerNull>(Val) &&
@@ -2389,7 +2516,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
const AAMemoryBehavior &MemBehaviorAA,
const CallBase &CB, unsigned OtherArgNo) {
// We do not need to worry about aliasing with the underlying IRP.
- if (this->getArgNo() == (int)OtherArgNo)
+ if (this->getCalleeArgNo() == (int)OtherArgNo)
return false;
// If it is not a pointer or pointer vector we do not alias.
@@ -2451,6 +2578,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL);
const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
+ const Function *ScopeFn = VIRP.getAnchorScope();
auto &NoCaptureAA =
A.getAAFor<AANoCapture>(*this, VIRP, /* TrackDependence */ false);
// Check whether the value is captured in the scope using AANoCapture.
@@ -2459,16 +2587,18 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
auto UsePred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
- // If user if curr instr and only use.
- if (UserI == getCtxI() && UserI->hasOneUse())
+ // If UserI is the curr instruction and there is a single potential use of
+ // the value in UserI we allow the use.
+ // TODO: We should inspect the operands and allow those that cannot alias
+ // with the value.
+ if (UserI == getCtxI() && UserI->getNumOperands() == 1)
return true;
- const Function *ScopeFn = VIRP.getAnchorScope();
if (ScopeFn) {
const auto &ReachabilityAA =
A.getAAFor<AAReachability>(*this, IRPosition::function(*ScopeFn));
- if (!ReachabilityAA.isAssumedReachable(UserI, getCtxI()))
+ if (!ReachabilityAA.isAssumedReachable(A, *UserI, *getCtxI()))
return true;
if (auto *CB = dyn_cast<CallBase>(UserI)) {
@@ -2554,6 +2684,14 @@ struct AANoAliasReturned final : AANoAliasImpl {
AANoAliasReturned(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoAliasImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || F->isDeclaration())
+ indicatePessimisticFixpoint();
+ }
+
/// See AbstractAttribute::updateImpl(...).
virtual ChangeStatus updateImpl(Attributor &A) override {
@@ -2595,7 +2733,7 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl {
void initialize(Attributor &A) override {
AANoAliasImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F)
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -2608,8 +2746,7 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::returned(*F);
auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AANoAlias::StateType &>(FnAA.getState()));
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -2799,14 +2936,13 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AAIsDead::StateType &>(ArgAA.getState()));
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
CallBase &CB = cast<CallBase>(getAnchorValue());
- Use &U = CB.getArgOperandUse(getArgNo());
+ Use &U = CB.getArgOperandUse(getCallSiteArgNo());
assert(!isa<UndefValue>(U.get()) &&
"Expected undef values to be filtered out!");
UndefValue &UV = *UndefValue::get(U->getType());
@@ -2921,8 +3057,14 @@ struct AAIsDeadFunction : public AAIsDead {
void initialize(Attributor &A) override {
const Function *F = getAnchorScope();
if (F && !F->isDeclaration()) {
- ToBeExploredFrom.insert(&F->getEntryBlock().front());
- assumeLive(A, F->getEntryBlock());
+ // We only want to compute liveness once. If the function is not part of
+ // the SCC, skip it.
+ if (A.isRunOn(*const_cast<Function *>(F))) {
+ ToBeExploredFrom.insert(&F->getEntryBlock().front());
+ assumeLive(A, F->getEntryBlock());
+ } else {
+ indicatePessimisticFixpoint();
+ }
}
}
@@ -2985,6 +3127,10 @@ struct AAIsDeadFunction : public AAIsDead {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
+ bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override {
+ return !AssumedLiveEdges.count(std::make_pair(From, To));
+ }
+
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
@@ -3062,6 +3208,9 @@ struct AAIsDeadFunction : public AAIsDead {
/// Collection of instructions that are known to not transfer control.
SmallSetVector<const Instruction *, 8> KnownDeadEnds;
+ /// Collection of all assumed live edges
+ DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> AssumedLiveEdges;
+
/// Collection of all assumed live BasicBlocks.
DenseSet<const BasicBlock *> AssumedLiveBlocks;
};
@@ -3177,18 +3326,23 @@ ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) {
const Instruction *I = Worklist.pop_back_val();
LLVM_DEBUG(dbgs() << "[AAIsDead] Exploration inst: " << *I << "\n");
+ // Fast forward for uninteresting instructions. We could look for UB here
+ // though.
+ while (!I->isTerminator() && !isa<CallBase>(I)) {
+ Change = ChangeStatus::CHANGED;
+ I = I->getNextNode();
+ }
+
AliveSuccessors.clear();
bool UsedAssumedInformation = false;
switch (I->getOpcode()) {
// TODO: look for (assumed) UB to backwards propagate "deadness".
default:
- if (I->isTerminator()) {
- for (const BasicBlock *SuccBB : successors(I->getParent()))
- AliveSuccessors.push_back(&SuccBB->front());
- } else {
- AliveSuccessors.push_back(I->getNextNode());
- }
+ assert(I->isTerminator() &&
+ "Expected non-terminators to be handled already!");
+ for (const BasicBlock *SuccBB : successors(I->getParent()))
+ AliveSuccessors.push_back(&SuccBB->front());
break;
case Instruction::Call:
UsedAssumedInformation = identifyAliveSuccessors(A, cast<CallInst>(*I),
@@ -3227,6 +3381,9 @@ ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) {
"Non-terminator expected to have a single successor!");
Worklist.push_back(AliveSuccessor);
} else {
+ // record the assumed live edge
+ AssumedLiveEdges.insert(
+ std::make_pair(I->getParent(), AliveSuccessor->getParent()));
if (assumeLive(A, *AliveSuccessor->getParent()))
Worklist.push_back(AliveSuccessor);
}
@@ -3342,7 +3499,6 @@ struct AADereferenceableImpl : AADereferenceable {
State.addAccessedBytes(Offset, Size);
}
}
- return;
}
/// See followUsesInMBEC
@@ -3420,12 +3576,11 @@ struct AADereferenceableFloating : AADereferenceableImpl {
DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull);
T.GlobalState.indicatePessimisticFixpoint();
} else {
- const DerefState &DS = static_cast<const DerefState &>(AA.getState());
+ const DerefState &DS = AA.getState();
DerefBytes = DS.DerefBytesState.getAssumed();
T.GlobalState &= DS.GlobalState;
}
-
// For now we do not try to "increase" dereferenceability due to negative
// indices as we first have to come up with code to deal with loops and
// for overflows of the dereferenceable bytes.
@@ -3697,14 +3852,27 @@ struct AAAlignFloating : AAAlignImpl {
AAAlign::StateType &T, bool Stripped) -> bool {
const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V));
if (!Stripped && this == &AA) {
+ int64_t Offset;
+ unsigned Alignment = 1;
+ if (const Value *Base =
+ GetPointerBaseWithConstantOffset(&V, Offset, DL)) {
+ Align PA = Base->getPointerAlignment(DL);
+ // BasePointerAddr + Offset = Alignment * Q for some integer Q.
+ // So we can say that the maximum power of two which is a divisor of
+ // gcd(Offset, Alignment) is an alignment.
+
+ uint32_t gcd = greatestCommonDivisor(uint32_t(abs((int32_t)Offset)),
+ uint32_t(PA.value()));
+ Alignment = llvm::PowerOf2Floor(gcd);
+ } else {
+ Alignment = V.getPointerAlignment(DL).value();
+ }
// Use only IR information if we did not strip anything.
- Align PA = V.getPointerAlignment(DL);
- T.takeKnownMaximum(PA.value());
+ T.takeKnownMaximum(Alignment);
T.indicatePessimisticFixpoint();
} else {
// Use abstract attribute information.
- const AAAlign::StateType &DS =
- static_cast<const AAAlign::StateType &>(AA.getState());
+ const AAAlign::StateType &DS = AA.getState();
T ^= DS;
}
return T.isValidState();
@@ -3727,8 +3895,16 @@ struct AAAlignFloating : AAAlignImpl {
/// Align attribute for function return value.
struct AAAlignReturned final
: AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> {
- AAAlignReturned(const IRPosition &IRP, Attributor &A)
- : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>(IRP, A) {}
+ using Base = AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>;
+ AAAlignReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || F->isDeclaration())
+ indicatePessimisticFixpoint();
+ }
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) }
@@ -3802,7 +3978,7 @@ struct AAAlignCallSiteReturned final
void initialize(Attributor &A) override {
Base::initialize(A);
Function *F = getAssociatedFunction();
- if (!F)
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -3818,7 +3994,7 @@ struct AANoReturnImpl : public AANoReturn {
void initialize(Attributor &A) override {
AANoReturn::initialize(A);
Function *F = getAssociatedFunction();
- if (!F)
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -3850,6 +4026,17 @@ struct AANoReturnCallSite final : AANoReturnImpl {
AANoReturnCallSite(const IRPosition &IRP, Attributor &A)
: AANoReturnImpl(IRP, A) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoReturnImpl::initialize(A);
+ if (Function *F = getAssociatedFunction()) {
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos);
+ if (!FnAA.isAssumedNoReturn())
+ indicatePessimisticFixpoint();
+ }
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -3859,9 +4046,7 @@ struct AANoReturnCallSite final : AANoReturnImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AANoReturn::StateType &>(FnAA.getState()));
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -3894,7 +4079,8 @@ struct AANoCaptureImpl : public AANoCapture {
return;
}
- const Function *F = getArgNo() >= 0 ? getAssociatedFunction() : AnchorScope;
+ const Function *F =
+ isArgumentPosition() ? getAssociatedFunction() : AnchorScope;
// Check what state the associated function can actually capture.
if (F)
@@ -3913,7 +4099,7 @@ struct AANoCaptureImpl : public AANoCapture {
if (!isAssumedNoCaptureMaybeReturned())
return;
- if (getArgNo() >= 0) {
+ if (isArgumentPosition()) {
if (isAssumedNoCapture())
Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture));
else if (ManifestInternal)
@@ -3949,7 +4135,7 @@ struct AANoCaptureImpl : public AANoCapture {
State.addKnownBits(NOT_CAPTURED_IN_RET);
// Check existing "returned" attributes.
- int ArgNo = IRP.getArgNo();
+ int ArgNo = IRP.getCalleeArgNo();
if (F.doesNotThrow() && ArgNo >= 0) {
for (unsigned u = 0, e = F.arg_size(); u < e; ++u)
if (F.hasParamAttribute(u, Attribute::Returned)) {
@@ -4125,13 +4311,13 @@ private:
ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
const IRPosition &IRP = getIRPosition();
- const Value *V =
- getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue();
+ const Value *V = isArgumentPosition() ? IRP.getAssociatedArgument()
+ : &IRP.getAssociatedValue();
if (!V)
return indicatePessimisticFixpoint();
const Function *F =
- getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
+ isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
assert(F && "Expected a function!");
const IRPosition &FnPos = IRPosition::function(*F);
const auto &IsDeadAA =
@@ -4248,9 +4434,7 @@ struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AANoCapture::StateType &>(ArgAA.getState()));
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -4366,24 +4550,35 @@ struct AAValueSimplifyImpl : AAValueSimplify {
return true;
}
- bool askSimplifiedValueForAAValueConstantRange(Attributor &A) {
+ /// Returns a candidate is found or not
+ template <typename AAType> bool askSimplifiedValueFor(Attributor &A) {
if (!getAssociatedValue().getType()->isIntegerTy())
return false;
- const auto &ValueConstantRangeAA =
- A.getAAFor<AAValueConstantRange>(*this, getIRPosition());
+ const auto &AA =
+ A.getAAFor<AAType>(*this, getIRPosition(), /* TrackDependence */ false);
- Optional<ConstantInt *> COpt =
- ValueConstantRangeAA.getAssumedConstantInt(A);
- if (COpt.hasValue()) {
- if (auto *C = COpt.getValue())
- SimplifiedAssociatedValue = C;
- else
- return false;
- } else {
+ Optional<ConstantInt *> COpt = AA.getAssumedConstantInt(A);
+
+ if (!COpt.hasValue()) {
SimplifiedAssociatedValue = llvm::None;
+ A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
+ return true;
}
- return true;
+ if (auto *C = COpt.getValue()) {
+ SimplifiedAssociatedValue = C;
+ A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
+ return true;
+ }
+ return false;
+ }
+
+ bool askSimplifiedValueForOtherAAs(Attributor &A) {
+ if (askSimplifiedValueFor<AAValueConstantRange>(A))
+ return true;
+ if (askSimplifiedValueFor<AAPotentialValues>(A))
+ return true;
+ return false;
}
/// See AbstractAttribute::manifest(...).
@@ -4468,7 +4663,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
auto PredForCallSite = [&](AbstractCallSite ACS) {
const IRPosition &ACSArgPos =
- IRPosition::callsite_argument(ACS, getArgNo());
+ IRPosition::callsite_argument(ACS, getCallSiteArgNo());
// Check if a coresponding argument was found or if it is on not
// associated (which can happen for callback calls).
if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
@@ -4490,7 +4685,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
bool AllCallSitesKnown;
if (!A.checkForAllCallSites(PredForCallSite, *this, true,
AllCallSitesKnown))
- if (!askSimplifiedValueForAAValueConstantRange(A))
+ if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
@@ -4518,7 +4713,7 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl {
};
if (!A.checkForAllReturnedValues(PredForReturned, *this))
- if (!askSimplifiedValueForAAValueConstantRange(A))
+ if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
@@ -4587,10 +4782,76 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
indicatePessimisticFixpoint();
}
+ /// Check if \p ICmp is an equality comparison (==/!=) with at least one
+ /// nullptr. If so, try to simplify it using AANonNull on the other operand.
+ /// Return true if successful, in that case SimplifiedAssociatedValue will be
+ /// updated and \p Changed is set appropriately.
+ bool checkForNullPtrCompare(Attributor &A, ICmpInst *ICmp,
+ ChangeStatus &Changed) {
+ if (!ICmp)
+ return false;
+ if (!ICmp->isEquality())
+ return false;
+
+ // This is a comparison with == or !-. We check for nullptr now.
+ bool Op0IsNull = isa<ConstantPointerNull>(ICmp->getOperand(0));
+ bool Op1IsNull = isa<ConstantPointerNull>(ICmp->getOperand(1));
+ if (!Op0IsNull && !Op1IsNull)
+ return false;
+
+ LLVMContext &Ctx = ICmp->getContext();
+ // Check for `nullptr ==/!= nullptr` first:
+ if (Op0IsNull && Op1IsNull) {
+ Value *NewVal = ConstantInt::get(
+ Type::getInt1Ty(Ctx), ICmp->getPredicate() == CmpInst::ICMP_EQ);
+ assert(!SimplifiedAssociatedValue.hasValue() &&
+ "Did not expect non-fixed value for constant comparison");
+ SimplifiedAssociatedValue = NewVal;
+ indicateOptimisticFixpoint();
+ Changed = ChangeStatus::CHANGED;
+ return true;
+ }
+
+ // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the
+ // non-nullptr operand and if we assume it's non-null we can conclude the
+ // result of the comparison.
+ assert((Op0IsNull || Op1IsNull) &&
+ "Expected nullptr versus non-nullptr comparison at this point");
+
+ // The index is the operand that we assume is not null.
+ unsigned PtrIdx = Op0IsNull;
+ auto &PtrNonNullAA = A.getAAFor<AANonNull>(
+ *this, IRPosition::value(*ICmp->getOperand(PtrIdx)));
+ if (!PtrNonNullAA.isAssumedNonNull())
+ return false;
+
+ // The new value depends on the predicate, true for != and false for ==.
+ Value *NewVal = ConstantInt::get(Type::getInt1Ty(Ctx),
+ ICmp->getPredicate() == CmpInst::ICMP_NE);
+
+ assert((!SimplifiedAssociatedValue.hasValue() ||
+ SimplifiedAssociatedValue == NewVal) &&
+ "Did not expect to change value for zero-comparison");
+
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+ SimplifiedAssociatedValue = NewVal;
+
+ if (PtrNonNullAA.isKnownNonNull())
+ indicateOptimisticFixpoint();
+
+ Changed = HasValueBefore ? ChangeStatus::UNCHANGED : ChangeStatus ::CHANGED;
+ return true;
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+ ChangeStatus Changed;
+ if (checkForNullPtrCompare(A, dyn_cast<ICmpInst>(&getAnchorValue()),
+ Changed))
+ return Changed;
+
auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &,
bool Stripped) -> bool {
auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V));
@@ -4608,7 +4869,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
if (!genericValueTraversal<AAValueSimplify, bool>(
A, getIRPosition(), *this, Dummy, VisitValueCB, getCtxI(),
/* UseValueSimplify */ false))
- if (!askSimplifiedValueForAAValueConstantRange(A))
+ if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
@@ -4683,7 +4944,8 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())
: UndefValue::get(V.getType());
if (C) {
- Use &U = cast<CallBase>(&getAnchorValue())->getArgOperandUse(getArgNo());
+ Use &U = cast<CallBase>(&getAnchorValue())
+ ->getArgOperandUse(getCallSiteArgNo());
// We can replace the AssociatedValue with the constant.
if (&V != C && V.getType() == C->getType()) {
if (A.changeUseAfterManifest(U, *C))
@@ -5002,7 +5264,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
return getAssociatedValue().getType()->getPointerElementType();
Optional<Type *> Ty;
- unsigned ArgNo = getIRPosition().getArgNo();
+ unsigned ArgNo = getIRPosition().getCallSiteArgNo();
// Make sure the associated call site argument has the same type at all call
// sites and it is an allocation we know is safe to privatize, for now that
@@ -5265,8 +5527,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
- Type *PointeePtrTy = PrivArrayType->getElementType()->getPointerTo();
- uint64_t PointeeTySize = DL.getTypeStoreSize(PointeePtrTy);
+ Type *PointeeTy = PrivArrayType->getElementType();
+ Type *PointeePtrTy = PointeeTy->getPointerTo();
+ uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
Value *Ptr =
constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL);
@@ -5312,7 +5575,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
Value *Ptr =
constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
- LoadInst *L = new LoadInst(PointeePtrTy, Ptr, "", IP);
+ LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
L->setAlignment(Alignment);
ReplacementValues.push_back(L);
}
@@ -5356,10 +5619,14 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Function &ReplacementFn, Function::arg_iterator ArgIt) {
BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
Instruction *IP = &*EntryBB.getFirstInsertionPt();
- auto *AI = new AllocaInst(PrivatizableType.getValue(), 0,
- Arg->getName() + ".priv", IP);
+ Instruction *AI = new AllocaInst(PrivatizableType.getValue(), 0,
+ Arg->getName() + ".priv", IP);
createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
ArgIt->getArgNo(), *IP);
+
+ if (AI->getType() != Arg->getType())
+ AI =
+ BitCastInst::CreateBitOrPointerCast(AI, Arg->getType(), "", IP);
Arg->replaceAllUsesWith(AI);
for (CallInst *CI : TailCalls)
@@ -5418,8 +5685,7 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
/// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
Optional<Type *> identifyPrivatizableType(Attributor &A) override {
- Value *Obj =
- GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL());
+ Value *Obj = getUnderlyingObject(&getAssociatedValue());
if (!Obj) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n");
return nullptr;
@@ -5539,7 +5805,7 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
void initialize(Attributor &A) override {
intersectAssumedBits(BEST_STATE);
getKnownStateFromValue(getIRPosition(), getState());
- IRAttribute::initialize(A);
+ AAMemoryBehavior::initialize(A);
}
/// Return the memory behavior information encoded in the IR for \p IRP.
@@ -5634,9 +5900,7 @@ struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAMemoryBehaviorImpl::initialize(A);
- // Initialize the use vector with all direct uses of the associated value.
- for (const Use &U : getAssociatedValue().uses())
- Uses.insert(&U);
+ addUsesOf(A, getAssociatedValue());
}
/// See AbstractAttribute::updateImpl(...).
@@ -5662,8 +5926,14 @@ private:
void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI);
protected:
+ /// Add the uses of \p V to the `Uses` set we look at during the update step.
+ void addUsesOf(Attributor &A, const Value &V);
+
/// Container for (transitive) uses of the associated argument.
- SetVector<const Use *> Uses;
+ SmallVector<const Use *, 8> Uses;
+
+ /// Set to remember the uses we already traversed.
+ SmallPtrSet<const Use *, 8> Visited;
};
/// Memory behavior attribute for function argument.
@@ -5688,9 +5958,7 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
if (!Arg || !A.isFunctionIPOAmendable(*(Arg->getParent()))) {
indicatePessimisticFixpoint();
} else {
- // Initialize the use vector with all direct uses of the associated value.
- for (const Use &U : Arg->uses())
- Uses.insert(&U);
+ addUsesOf(A, *Arg);
}
}
@@ -5725,14 +5993,21 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- if (Argument *Arg = getAssociatedArgument()) {
- if (Arg->hasByValAttr()) {
- addKnownBits(NO_WRITES);
- removeKnownBits(NO_READS);
- removeAssumedBits(NO_READS);
- }
+ // If we don't have an associated attribute this is either a variadic call
+ // or an indirect call, either way, nothing to do here.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ if (Arg->hasByValAttr()) {
+ addKnownBits(NO_WRITES);
+ removeKnownBits(NO_READS);
+ removeAssumedBits(NO_READS);
}
AAMemoryBehaviorArgument::initialize(A);
+ if (getAssociatedFunction()->isDeclaration())
+ indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
@@ -5744,9 +6019,7 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
Argument *Arg = getAssociatedArgument();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AAMemoryBehavior::StateType &>(ArgAA.getState()));
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -5765,6 +6038,14 @@ struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating {
AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAMemoryBehaviorFloating(IRP, A) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || F->isDeclaration())
+ indicatePessimisticFixpoint();
+ }
+
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
// We do not annotate returned values.
@@ -5814,10 +6095,8 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
void initialize(Attributor &A) override {
AAMemoryBehaviorImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || !A.isFunctionIPOAmendable(*F)) {
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
- return;
- }
}
/// See AbstractAttribute::updateImpl(...).
@@ -5829,9 +6108,7 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AAMemoryBehavior::StateType &>(FnAA.getState()));
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -5933,8 +6210,7 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
// Check if the users of UserI should also be visited.
if (followUsersOfUseIn(A, U, UserI))
- for (const Use &UserIUse : UserI->uses())
- Uses.insert(&UserIUse);
+ addUsesOf(A, *UserI);
// If UserI might touch memory we analyze the use in detail.
if (UserI->mayReadOrWriteMemory())
@@ -5945,6 +6221,28 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
: ChangeStatus::UNCHANGED;
}
+void AAMemoryBehaviorFloating::addUsesOf(Attributor &A, const Value &V) {
+ SmallVector<const Use *, 8> WL;
+ for (const Use &U : V.uses())
+ WL.push_back(&U);
+
+ while (!WL.empty()) {
+ const Use *U = WL.pop_back_val();
+ if (!Visited.insert(U).second)
+ continue;
+
+ const Instruction *UserI = cast<Instruction>(U->getUser());
+ if (UserI->mayReadOrWriteMemory()) {
+ Uses.push_back(U);
+ continue;
+ }
+ if (!followUsersOfUseIn(A, U, UserI))
+ continue;
+ for (const Use &UU : UserI->uses())
+ WL.push_back(&UU);
+ }
+}
+
bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
const Instruction *UserI) {
// The loaded value is unrelated to the pointer argument, no need to
@@ -6096,7 +6394,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
void initialize(Attributor &A) override {
intersectAssumedBits(BEST_STATE);
getKnownStateFromValue(A, getIRPosition(), getState());
- IRAttribute::initialize(A);
+ AAMemoryLocation::initialize(A);
}
/// Return the memory behavior information encoded in the IR for \p IRP.
@@ -6259,6 +6557,13 @@ protected:
using AccessSet = SmallSet<AccessInfo, 2, AccessInfo>;
AccessSet *AccessKind2Accesses[llvm::CTLog2<VALID_STATE>()];
+ /// Categorize the pointer arguments of CB that might access memory in
+ /// AccessedLoc and update the state and access map accordingly.
+ void
+ categorizeArgumentPointerLocations(Attributor &A, CallBase &CB,
+ AAMemoryLocation::StateType &AccessedLocs,
+ bool &Changed);
+
/// Return the kind(s) of location that may be accessed by \p V.
AAMemoryLocation::MemoryLocationsKind
categorizeAccessedLocations(Attributor &A, Instruction &I, bool &Changed);
@@ -6324,6 +6629,7 @@ void AAMemoryLocationImpl::categorizePtrValue(
auto VisitValueCB = [&](Value &V, const Instruction *,
AAMemoryLocation::StateType &T,
bool Stripped) -> bool {
+ // TODO: recognize the TBAA used for constant accesses.
MemoryLocationsKind MLK = NO_LOCATIONS;
assert(!isa<GEPOperator>(V) && "GEPs should have been stripped.");
if (isa<UndefValue>(V))
@@ -6334,6 +6640,13 @@ void AAMemoryLocationImpl::categorizePtrValue(
else
MLK = NO_ARGUMENT_MEM;
} else if (auto *GV = dyn_cast<GlobalValue>(&V)) {
+ // Reading constant memory is not treated as a read "effect" by the
+ // function attr pass so we won't neither. Constants defined by TBAA are
+ // similar. (We know we do not write it because it is constant.)
+ if (auto *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isConstant())
+ return true;
+
if (GV->hasLocalLinkage())
MLK = NO_GLOBAL_INTERNAL_MEM;
else
@@ -6380,6 +6693,30 @@ void AAMemoryLocationImpl::categorizePtrValue(
}
}
+void AAMemoryLocationImpl::categorizeArgumentPointerLocations(
+ Attributor &A, CallBase &CB, AAMemoryLocation::StateType &AccessedLocs,
+ bool &Changed) {
+ for (unsigned ArgNo = 0, E = CB.getNumArgOperands(); ArgNo < E; ++ArgNo) {
+
+ // Skip non-pointer arguments.
+ const Value *ArgOp = CB.getArgOperand(ArgNo);
+ if (!ArgOp->getType()->isPtrOrPtrVectorTy())
+ continue;
+
+ // Skip readnone arguments.
+ const IRPosition &ArgOpIRP = IRPosition::callsite_argument(CB, ArgNo);
+ const auto &ArgOpMemLocationAA = A.getAAFor<AAMemoryBehavior>(
+ *this, ArgOpIRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
+
+ if (ArgOpMemLocationAA.isAssumedReadNone())
+ continue;
+
+ // Categorize potentially accessed pointer arguments as if there was an
+ // access instruction with them as pointer.
+ categorizePtrValue(A, CB, *ArgOp, AccessedLocs, Changed);
+ }
+}
+
AAMemoryLocation::MemoryLocationsKind
AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
bool &Changed) {
@@ -6441,28 +6778,8 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
// Now handle argument memory if it might be accessed.
bool HasArgAccesses = ((~CBAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
- if (HasArgAccesses) {
- for (unsigned ArgNo = 0, E = CB->getNumArgOperands(); ArgNo < E;
- ++ArgNo) {
-
- // Skip non-pointer arguments.
- const Value *ArgOp = CB->getArgOperand(ArgNo);
- if (!ArgOp->getType()->isPtrOrPtrVectorTy())
- continue;
-
- // Skip readnone arguments.
- const IRPosition &ArgOpIRP = IRPosition::callsite_argument(*CB, ArgNo);
- const auto &ArgOpMemLocationAA = A.getAAFor<AAMemoryBehavior>(
- *this, ArgOpIRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
-
- if (ArgOpMemLocationAA.isAssumedReadNone())
- continue;
-
- // Categorize potentially accessed pointer arguments as if there was an
- // access instruction with them as pointer.
- categorizePtrValue(A, I, *ArgOp, AccessedLocs, Changed);
- }
- }
+ if (HasArgAccesses)
+ categorizeArgumentPointerLocations(A, *CB, AccessedLocs, Changed);
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Accessed state after argument handling: "
@@ -6514,7 +6831,9 @@ struct AAMemoryLocationFunction final : public AAMemoryLocationImpl {
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Accessed locations for " << I
<< ": " << getMemoryLocationsAsStr(MLK) << "\n");
removeAssumedBits(inverseLocation(MLK, false, false));
- return true;
+ // Stop once only the valid bit set in the *not assumed location*, thus
+ // once we don't actually exclude any memory locations in the state.
+ return getAssumedNotAccessedLocation() != VALID_STATE;
};
if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this))
@@ -6546,10 +6865,8 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
void initialize(Attributor &A) override {
AAMemoryLocationImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || !A.isFunctionIPOAmendable(*F)) {
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
- return;
- }
}
/// See AbstractAttribute::updateImpl(...).
@@ -6655,7 +6972,6 @@ struct AAValueConstantRangeImpl : AAValueConstantRange {
if (!LVI || !CtxI)
return getWorstState(getBitWidth());
return LVI->getConstantRange(&getAssociatedValue(),
- const_cast<BasicBlock *>(CtxI->getParent()),
const_cast<Instruction *>(CtxI));
}
@@ -6759,10 +7075,13 @@ struct AAValueConstantRangeImpl : AAValueConstantRange {
auto &V = getAssociatedValue();
if (!AssumedConstantRange.isEmptySet() &&
!AssumedConstantRange.isSingleElement()) {
- if (Instruction *I = dyn_cast<Instruction>(&V))
+ if (Instruction *I = dyn_cast<Instruction>(&V)) {
+ assert(I == getCtxI() && "Should not annotate an instruction which is "
+ "not the context instruction");
if (isa<CallInst>(I) || isa<LoadInst>(I))
if (setRangeMetadataIfisBetterRange(I, AssumedConstantRange))
Changed = ChangeStatus::CHANGED;
+ }
}
return Changed;
@@ -6831,6 +7150,9 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
return;
}
+ if (isa<CallBase>(&V))
+ return;
+
if (isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<CastInst>(&V))
return;
// If it is a load instruction with range metadata, use it.
@@ -7068,11 +7390,641 @@ struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating {
AAValueConstantRangeCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAValueConstantRangeFloating(IRP, A) {}
+ /// See AbstractAttribute::manifest()
+ ChangeStatus manifest(Attributor &A) override {
+ return ChangeStatus::UNCHANGED;
+ }
+
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSARG_ATTR(value_range)
}
};
+
+/// ------------------ Potential Values Attribute -------------------------
+
+struct AAPotentialValuesImpl : AAPotentialValues {
+ using StateType = PotentialConstantIntValuesState;
+
+ AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValues(IRP, A) {}
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ std::string Str;
+ llvm::raw_string_ostream OS(Str);
+ OS << getState();
+ return OS.str();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+};
+
+struct AAPotentialValuesArgument final
+ : AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
+ PotentialConstantIntValuesState> {
+ using Base =
+ AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
+ PotentialConstantIntValuesState>;
+ AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ if (!getAnchorScope() || getAnchorScope()->isDeclaration()) {
+ indicatePessimisticFixpoint();
+ } else {
+ Base::initialize(A);
+ }
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesReturned
+ : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> {
+ using Base =
+ AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>;
+ AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesFloating : AAPotentialValuesImpl {
+ AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ Value &V = getAssociatedValue();
+
+ if (auto *C = dyn_cast<ConstantInt>(&V)) {
+ unionAssumed(C->getValue());
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (isa<UndefValue>(&V)) {
+ unionAssumedWithUndef();
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (isa<BinaryOperator>(&V) || isa<ICmpInst>(&V) || isa<CastInst>(&V))
+ return;
+
+ if (isa<SelectInst>(V) || isa<PHINode>(V))
+ return;
+
+ indicatePessimisticFixpoint();
+
+ LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: "
+ << getAssociatedValue() << "\n");
+ }
+
+ static bool calculateICmpInst(const ICmpInst *ICI, const APInt &LHS,
+ const APInt &RHS) {
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ switch (Pred) {
+ case ICmpInst::ICMP_UGT:
+ return LHS.ugt(RHS);
+ case ICmpInst::ICMP_SGT:
+ return LHS.sgt(RHS);
+ case ICmpInst::ICMP_EQ:
+ return LHS.eq(RHS);
+ case ICmpInst::ICMP_UGE:
+ return LHS.uge(RHS);
+ case ICmpInst::ICMP_SGE:
+ return LHS.sge(RHS);
+ case ICmpInst::ICMP_ULT:
+ return LHS.ult(RHS);
+ case ICmpInst::ICMP_SLT:
+ return LHS.slt(RHS);
+ case ICmpInst::ICMP_NE:
+ return LHS.ne(RHS);
+ case ICmpInst::ICMP_ULE:
+ return LHS.ule(RHS);
+ case ICmpInst::ICMP_SLE:
+ return LHS.sle(RHS);
+ default:
+ llvm_unreachable("Invalid ICmp predicate!");
+ }
+ }
+
+ static APInt calculateCastInst(const CastInst *CI, const APInt &Src,
+ uint32_t ResultBitWidth) {
+ Instruction::CastOps CastOp = CI->getOpcode();
+ switch (CastOp) {
+ default:
+ llvm_unreachable("unsupported or not integer cast");
+ case Instruction::Trunc:
+ return Src.trunc(ResultBitWidth);
+ case Instruction::SExt:
+ return Src.sext(ResultBitWidth);
+ case Instruction::ZExt:
+ return Src.zext(ResultBitWidth);
+ case Instruction::BitCast:
+ return Src;
+ }
+ }
+
+ static APInt calculateBinaryOperator(const BinaryOperator *BinOp,
+ const APInt &LHS, const APInt &RHS,
+ bool &SkipOperation, bool &Unsupported) {
+ Instruction::BinaryOps BinOpcode = BinOp->getOpcode();
+ // Unsupported is set to true when the binary operator is not supported.
+ // SkipOperation is set to true when UB occur with the given operand pair
+ // (LHS, RHS).
+ // TODO: we should look at nsw and nuw keywords to handle operations
+ // that create poison or undef value.
+ switch (BinOpcode) {
+ default:
+ Unsupported = true;
+ return LHS;
+ case Instruction::Add:
+ return LHS + RHS;
+ case Instruction::Sub:
+ return LHS - RHS;
+ case Instruction::Mul:
+ return LHS * RHS;
+ case Instruction::UDiv:
+ if (RHS.isNullValue()) {
+ SkipOperation = true;
+ return LHS;
+ }
+ return LHS.udiv(RHS);
+ case Instruction::SDiv:
+ if (RHS.isNullValue()) {
+ SkipOperation = true;
+ return LHS;
+ }
+ return LHS.sdiv(RHS);
+ case Instruction::URem:
+ if (RHS.isNullValue()) {
+ SkipOperation = true;
+ return LHS;
+ }
+ return LHS.urem(RHS);
+ case Instruction::SRem:
+ if (RHS.isNullValue()) {
+ SkipOperation = true;
+ return LHS;
+ }
+ return LHS.srem(RHS);
+ case Instruction::Shl:
+ return LHS.shl(RHS);
+ case Instruction::LShr:
+ return LHS.lshr(RHS);
+ case Instruction::AShr:
+ return LHS.ashr(RHS);
+ case Instruction::And:
+ return LHS & RHS;
+ case Instruction::Or:
+ return LHS | RHS;
+ case Instruction::Xor:
+ return LHS ^ RHS;
+ }
+ }
+
+ bool calculateBinaryOperatorAndTakeUnion(const BinaryOperator *BinOp,
+ const APInt &LHS, const APInt &RHS) {
+ bool SkipOperation = false;
+ bool Unsupported = false;
+ APInt Result =
+ calculateBinaryOperator(BinOp, LHS, RHS, SkipOperation, Unsupported);
+ if (Unsupported)
+ return false;
+ // If SkipOperation is true, we can ignore this operand pair (L, R).
+ if (!SkipOperation)
+ unionAssumed(Result);
+ return isValidState();
+ }
+
+ ChangeStatus updateWithICmpInst(Attributor &A, ICmpInst *ICI) {
+ auto AssumedBefore = getAssumed();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return indicatePessimisticFixpoint();
+
+ auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS));
+ if (!LHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS));
+ if (!RHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
+ const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
+
+ // TODO: make use of undef flag to limit potential values aggressively.
+ bool MaybeTrue = false, MaybeFalse = false;
+ const APInt Zero(RHS->getType()->getIntegerBitWidth(), 0);
+ if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
+ // The result of any comparison between undefs can be soundly replaced
+ // with undef.
+ unionAssumedWithUndef();
+ } else if (LHSAA.undefIsContained()) {
+ bool MaybeTrue = false, MaybeFalse = false;
+ for (const APInt &R : RHSAAPVS) {
+ bool CmpResult = calculateICmpInst(ICI, Zero, R);
+ MaybeTrue |= CmpResult;
+ MaybeFalse |= !CmpResult;
+ if (MaybeTrue & MaybeFalse)
+ return indicatePessimisticFixpoint();
+ }
+ } else if (RHSAA.undefIsContained()) {
+ for (const APInt &L : LHSAAPVS) {
+ bool CmpResult = calculateICmpInst(ICI, L, Zero);
+ MaybeTrue |= CmpResult;
+ MaybeFalse |= !CmpResult;
+ if (MaybeTrue & MaybeFalse)
+ return indicatePessimisticFixpoint();
+ }
+ } else {
+ for (const APInt &L : LHSAAPVS) {
+ for (const APInt &R : RHSAAPVS) {
+ bool CmpResult = calculateICmpInst(ICI, L, R);
+ MaybeTrue |= CmpResult;
+ MaybeFalse |= !CmpResult;
+ if (MaybeTrue & MaybeFalse)
+ return indicatePessimisticFixpoint();
+ }
+ }
+ }
+ if (MaybeTrue)
+ unionAssumed(APInt(/* numBits */ 1, /* val */ 1));
+ if (MaybeFalse)
+ unionAssumed(APInt(/* numBits */ 1, /* val */ 0));
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus updateWithSelectInst(Attributor &A, SelectInst *SI) {
+ auto AssumedBefore = getAssumed();
+ Value *LHS = SI->getTrueValue();
+ Value *RHS = SI->getFalseValue();
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return indicatePessimisticFixpoint();
+
+ // TODO: Use assumed simplified condition value
+ auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS));
+ if (!LHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS));
+ if (!RHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ if (LHSAA.undefIsContained() && RHSAA.undefIsContained())
+ // select i1 *, undef , undef => undef
+ unionAssumedWithUndef();
+ else {
+ unionAssumed(LHSAA);
+ unionAssumed(RHSAA);
+ }
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus updateWithCastInst(Attributor &A, CastInst *CI) {
+ auto AssumedBefore = getAssumed();
+ if (!CI->isIntegerCast())
+ return indicatePessimisticFixpoint();
+ assert(CI->getNumOperands() == 1 && "Expected cast to be unary!");
+ uint32_t ResultBitWidth = CI->getDestTy()->getIntegerBitWidth();
+ Value *Src = CI->getOperand(0);
+ auto &SrcAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*Src));
+ if (!SrcAA.isValidState())
+ return indicatePessimisticFixpoint();
+ const DenseSet<APInt> &SrcAAPVS = SrcAA.getAssumedSet();
+ if (SrcAA.undefIsContained())
+ unionAssumedWithUndef();
+ else {
+ for (const APInt &S : SrcAAPVS) {
+ APInt T = calculateCastInst(CI, S, ResultBitWidth);
+ unionAssumed(T);
+ }
+ }
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus updateWithBinaryOperator(Attributor &A, BinaryOperator *BinOp) {
+ auto AssumedBefore = getAssumed();
+ Value *LHS = BinOp->getOperand(0);
+ Value *RHS = BinOp->getOperand(1);
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return indicatePessimisticFixpoint();
+
+ auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS));
+ if (!LHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS));
+ if (!RHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
+ const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
+ const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0);
+
+ // TODO: make use of undef flag to limit potential values aggressively.
+ if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
+ if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, Zero))
+ return indicatePessimisticFixpoint();
+ } else if (LHSAA.undefIsContained()) {
+ for (const APInt &R : RHSAAPVS) {
+ if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, R))
+ return indicatePessimisticFixpoint();
+ }
+ } else if (RHSAA.undefIsContained()) {
+ for (const APInt &L : LHSAAPVS) {
+ if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, Zero))
+ return indicatePessimisticFixpoint();
+ }
+ } else {
+ for (const APInt &L : LHSAAPVS) {
+ for (const APInt &R : RHSAAPVS) {
+ if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, R))
+ return indicatePessimisticFixpoint();
+ }
+ }
+ }
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus updateWithPHINode(Attributor &A, PHINode *PHI) {
+ auto AssumedBefore = getAssumed();
+ for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
+ Value *IncomingValue = PHI->getIncomingValue(u);
+ auto &PotentialValuesAA = A.getAAFor<AAPotentialValues>(
+ *this, IRPosition::value(*IncomingValue));
+ if (!PotentialValuesAA.isValidState())
+ return indicatePessimisticFixpoint();
+ if (PotentialValuesAA.undefIsContained())
+ unionAssumedWithUndef();
+ else
+ unionAssumed(PotentialValuesAA.getAssumed());
+ }
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ Value &V = getAssociatedValue();
+ Instruction *I = dyn_cast<Instruction>(&V);
+
+ if (auto *ICI = dyn_cast<ICmpInst>(I))
+ return updateWithICmpInst(A, ICI);
+
+ if (auto *SI = dyn_cast<SelectInst>(I))
+ return updateWithSelectInst(A, SI);
+
+ if (auto *CI = dyn_cast<CastInst>(I))
+ return updateWithCastInst(A, CI);
+
+ if (auto *BinOp = dyn_cast<BinaryOperator>(I))
+ return updateWithBinaryOperator(A, BinOp);
+
+ if (auto *PHI = dyn_cast<PHINode>(I))
+ return updateWithPHINode(A, PHI);
+
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesFunction : AAPotentialValuesImpl {
+ AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will "
+ "not be called");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FN_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesCallSite : AAPotentialValuesFunction {
+ AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesFunction(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesCallSiteReturned
+ : AACallSiteReturnedFromReturned<AAPotentialValues, AAPotentialValuesImpl> {
+ AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AACallSiteReturnedFromReturned<AAPotentialValues,
+ AAPotentialValuesImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating {
+ AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesFloating(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ Value &V = getAssociatedValue();
+
+ if (auto *C = dyn_cast<ConstantInt>(&V)) {
+ unionAssumed(C->getValue());
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (isa<UndefValue>(&V)) {
+ unionAssumedWithUndef();
+ indicateOptimisticFixpoint();
+ return;
+ }
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ Value &V = getAssociatedValue();
+ auto AssumedBefore = getAssumed();
+ auto &AA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(V));
+ const auto &S = AA.getAssumed();
+ unionAssumed(S);
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(potential_values)
+ }
+};
+
+/// ------------------------ NoUndef Attribute ---------------------------------
+struct AANoUndefImpl : AANoUndef {
+ AANoUndefImpl(const IRPosition &IRP, Attributor &A) : AANoUndef(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (getIRPosition().hasAttr({Attribute::NoUndef})) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+ Value &V = getAssociatedValue();
+ if (isa<UndefValue>(V))
+ indicatePessimisticFixpoint();
+ else if (isa<FreezeInst>(V))
+ indicateOptimisticFixpoint();
+ else if (getPositionKind() != IRPosition::IRP_RETURNED &&
+ isGuaranteedNotToBeUndefOrPoison(&V))
+ indicateOptimisticFixpoint();
+ else
+ AANoUndef::initialize(A);
+ }
+
+ /// See followUsesInMBEC
+ bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
+ AANoUndef::StateType &State) {
+ const Value *UseV = U->get();
+ const DominatorTree *DT = nullptr;
+ AssumptionCache *AC = nullptr;
+ InformationCache &InfoCache = A.getInfoCache();
+ if (Function *F = getAnchorScope()) {
+ DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F);
+ AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F);
+ }
+ State.setKnown(isGuaranteedNotToBeUndefOrPoison(UseV, AC, I, DT));
+ bool TrackUse = false;
+ // Track use for instructions which must produce undef or poison bits when
+ // at least one operand contains such bits.
+ if (isa<CastInst>(*I) || isa<GetElementPtrInst>(*I))
+ TrackUse = true;
+ return TrackUse;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "noundef" : "may-undef-or-poison";
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ // We don't manifest noundef attribute for dead positions because the
+ // associated values with dead positions would be replaced with undef
+ // values.
+ if (A.isAssumedDead(getIRPosition(), nullptr, nullptr))
+ return ChangeStatus::UNCHANGED;
+ // A position whose simplified value does not have any value is
+ // considered to be dead. We don't manifest noundef in such positions for
+ // the same reason above.
+ auto &ValueSimplifyAA = A.getAAFor<AAValueSimplify>(
+ *this, getIRPosition(), /* TrackDependence */ false);
+ if (!ValueSimplifyAA.getAssumedSimplifiedValue(A).hasValue())
+ return ChangeStatus::UNCHANGED;
+ return AANoUndef::manifest(A);
+ }
+};
+
+struct AANoUndefFloating : public AANoUndefImpl {
+ AANoUndefFloating(const IRPosition &IRP, Attributor &A)
+ : AANoUndefImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoUndefImpl::initialize(A);
+ if (!getState().isAtFixpoint())
+ if (Instruction *CtxI = getCtxI())
+ followUsesInMBEC(*this, A, getState(), *CtxI);
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
+ AANoUndef::StateType &T, bool Stripped) -> bool {
+ const auto &AA = A.getAAFor<AANoUndef>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ T.indicatePessimisticFixpoint();
+ } else {
+ const AANoUndef::StateType &S =
+ static_cast<const AANoUndef::StateType &>(AA.getState());
+ T ^= S;
+ }
+ return T.isValidState();
+ };
+
+ StateType T;
+ if (!genericValueTraversal<AANoUndef, StateType>(
+ A, getIRPosition(), *this, T, VisitValueCB, getCtxI()))
+ return indicatePessimisticFixpoint();
+
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
+};
+
+struct AANoUndefReturned final
+ : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl> {
+ AANoUndefReturned(const IRPosition &IRP, Attributor &A)
+ : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
+};
+
+struct AANoUndefArgument final
+ : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl> {
+ AANoUndefArgument(const IRPosition &IRP, Attributor &A)
+ : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noundef) }
+};
+
+struct AANoUndefCallSiteArgument final : AANoUndefFloating {
+ AANoUndefCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AANoUndefFloating(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noundef) }
+};
+
+struct AANoUndefCallSiteReturned final
+ : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl> {
+ AANoUndefCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) }
+};
} // namespace
const char AAReturnedValues::ID = 0;
@@ -7096,6 +8048,8 @@ const char AAPrivatizablePtr::ID = 0;
const char AAMemoryBehavior::ID = 0;
const char AAMemoryLocation::ID = 0;
const char AAValueConstantRange::ID = 0;
+const char AAPotentialValues::ID = 0;
+const char AANoUndef::ID = 0;
// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
@@ -7205,6 +8159,8 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp
index 1d1300c6cd1d..c6e222a096eb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp
@@ -11,10 +11,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/BlockExtractor.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -38,13 +40,10 @@ cl::opt<bool> BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
cl::desc("Erase the existing functions"),
cl::Hidden);
namespace {
-class BlockExtractor : public ModulePass {
- SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
- bool EraseFunctions;
- /// Map a function name to groups of blocks.
- SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
- BlocksByName;
-
+class BlockExtractor {
+public:
+ BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {}
+ bool runOnModule(Module &M);
void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
&GroupsOfBlocksToExtract) {
for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks :
@@ -57,11 +56,26 @@ class BlockExtractor : public ModulePass {
loadFile();
}
+private:
+ SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
+ bool EraseFunctions;
+ /// Map a function name to groups of blocks.
+ SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
+ BlocksByName;
+
+ void loadFile();
+ void splitLandingPadPreds(Function &F);
+};
+
+class BlockExtractorLegacyPass : public ModulePass {
+ BlockExtractor BE;
+ bool runOnModule(Module &M) override;
+
public:
static char ID;
- BlockExtractor(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
- bool EraseFunctions)
- : ModulePass(ID), EraseFunctions(EraseFunctions) {
+ BlockExtractorLegacyPass(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
+ bool EraseFunctions)
+ : ModulePass(ID), BE(EraseFunctions) {
// We want one group per element of the input list.
SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks;
for (BasicBlock *BB : BlocksToExtract) {
@@ -69,39 +83,38 @@ public:
NewGroup.push_back(BB);
MassagedGroupsOfBlocks.push_back(NewGroup);
}
- init(MassagedGroupsOfBlocks);
+ BE.init(MassagedGroupsOfBlocks);
}
- BlockExtractor(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
- &GroupsOfBlocksToExtract,
- bool EraseFunctions)
- : ModulePass(ID), EraseFunctions(EraseFunctions) {
- init(GroupsOfBlocksToExtract);
+ BlockExtractorLegacyPass(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+ &GroupsOfBlocksToExtract,
+ bool EraseFunctions)
+ : ModulePass(ID), BE(EraseFunctions) {
+ BE.init(GroupsOfBlocksToExtract);
}
- BlockExtractor() : BlockExtractor(SmallVector<BasicBlock *, 0>(), false) {}
- bool runOnModule(Module &M) override;
-
-private:
- void loadFile();
- void splitLandingPadPreds(Function &F);
+ BlockExtractorLegacyPass()
+ : BlockExtractorLegacyPass(SmallVector<BasicBlock *, 0>(), false) {}
};
+
} // end anonymous namespace
-char BlockExtractor::ID = 0;
-INITIALIZE_PASS(BlockExtractor, "extract-blocks",
+char BlockExtractorLegacyPass::ID = 0;
+INITIALIZE_PASS(BlockExtractorLegacyPass, "extract-blocks",
"Extract basic blocks from module", false, false)
-ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractor(); }
+ModulePass *llvm::createBlockExtractorPass() {
+ return new BlockExtractorLegacyPass();
+}
ModulePass *llvm::createBlockExtractorPass(
const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) {
- return new BlockExtractor(BlocksToExtract, EraseFunctions);
+ return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions);
}
ModulePass *llvm::createBlockExtractorPass(
const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
&GroupsOfBlocksToExtract,
bool EraseFunctions) {
- return new BlockExtractor(GroupsOfBlocksToExtract, EraseFunctions);
+ return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions);
}
/// Gets all of the blocks specified in the input file.
@@ -233,3 +246,15 @@ bool BlockExtractor::runOnModule(Module &M) {
return Changed;
}
+
+bool BlockExtractorLegacyPass::runOnModule(Module &M) {
+ return BE.runOnModule(M);
+}
+
+PreservedAnalyses BlockExtractorPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ BlockExtractor BE(false);
+ BE.init(SmallVector<SmallVector<BasicBlock *, 16>, 0>());
+ return BE.runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 67f1438b9b6a..8e81f4bad4af 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -95,6 +95,8 @@ isUnmergeableGlobal(GlobalVariable *GV,
// Only process constants with initializers in the default address space.
return !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch thread-local variables.
+ GV->isThreadLocal() ||
// Don't touch values marked with attribute(used).
UsedGlobals.count(GV);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index f2588938d964..0b763e423fe0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -289,7 +289,7 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
for (Argument &Arg : Fn.args()) {
if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() &&
- !Arg.hasPassPointeeByValueAttr()) {
+ !Arg.hasPassPointeeByValueCopyAttr()) {
if (Arg.isUsedByMetadata()) {
Arg.replaceAllUsesWith(UndefValue::get(Arg.getType()));
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 2cb184e8d4f4..1a8bb225a626 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -26,6 +26,13 @@ static cl::list<std::string>
"example -force-attribute=foo:noinline. This "
"option can be specified multiple times."));
+static cl::list<std::string> ForceRemoveAttributes(
+ "force-remove-attribute", cl::Hidden,
+ cl::desc("Remove an attribute from a function. This should be a "
+ "pair of 'function-name:attribute-name', for "
+ "example -force-remove-attribute=foo:noinline. This "
+ "option can be specified multiple times."));
+
static Attribute::AttrKind parseAttrKind(StringRef Kind) {
return StringSwitch<Attribute::AttrKind>(Kind)
.Case("alwaysinline", Attribute::AlwaysInline)
@@ -70,31 +77,49 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) {
}
/// If F has any forced attributes given on the command line, add them.
-static void addForcedAttributes(Function &F) {
- for (auto &S : ForceAttributes) {
+/// If F has any forced remove attributes given on the command line, remove
+/// them. When both force and force-remove are given to a function, the latter
+/// takes precedence.
+static void forceAttributes(Function &F) {
+ auto ParseFunctionAndAttr = [&](StringRef S) {
+ auto Kind = Attribute::None;
auto KV = StringRef(S).split(':');
if (KV.first != F.getName())
- continue;
-
- auto Kind = parseAttrKind(KV.second);
+ return Kind;
+ Kind = parseAttrKind(KV.second);
if (Kind == Attribute::None) {
LLVM_DEBUG(dbgs() << "ForcedAttribute: " << KV.second
<< " unknown or not handled!\n");
- continue;
}
- if (F.hasFnAttribute(Kind))
+ return Kind;
+ };
+
+ for (auto &S : ForceAttributes) {
+ auto Kind = ParseFunctionAndAttr(S);
+ if (Kind == Attribute::None || F.hasFnAttribute(Kind))
continue;
F.addFnAttr(Kind);
}
+
+ for (auto &S : ForceRemoveAttributes) {
+ auto Kind = ParseFunctionAndAttr(S);
+ if (Kind == Attribute::None || !F.hasFnAttribute(Kind))
+ continue;
+ F.removeFnAttr(Kind);
+ }
+}
+
+static bool hasForceAttributes() {
+ return !ForceAttributes.empty() || !ForceRemoveAttributes.empty();
}
PreservedAnalyses ForceFunctionAttrsPass::run(Module &M,
ModuleAnalysisManager &) {
- if (ForceAttributes.empty())
+ if (!hasForceAttributes())
return PreservedAnalyses::all();
for (Function &F : M.functions())
- addForcedAttributes(F);
+ forceAttributes(F);
// Just conservatively invalidate analyses, this isn't likely to be important.
return PreservedAnalyses::none();
@@ -109,11 +134,11 @@ struct ForceFunctionAttrsLegacyPass : public ModulePass {
}
bool runOnModule(Module &M) override {
- if (ForceAttributes.empty())
+ if (!hasForceAttributes())
return false;
for (Function &F : M.functions())
- addForcedAttributes(F);
+ forceAttributes(F);
// Conservatively assume we changed something.
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 4baeaa6e1630..6730824e860a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -13,15 +13,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/FunctionAttrs.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
@@ -63,7 +64,7 @@
using namespace llvm;
-#define DEBUG_TYPE "functionattrs"
+#define DEBUG_TYPE "function-attrs"
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
@@ -77,6 +78,7 @@ STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
STATISTIC(NumNoFree, "Number of functions marked as nofree");
+STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
static cl::opt<bool> EnableNonnullArgPropagation(
"enable-nonnull-arg-prop", cl::init(true), cl::Hidden,
@@ -147,6 +149,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
if (isNoModRef(MRI))
continue;
+ // A pseudo probe call shouldn't change any function attribute since it
+ // doesn't translate to a real instruction. It comes with a memory access
+ // tag to prevent itself being removed by optimizations and not block
+ // other instructions being optimized.
+ if (isa<PseudoProbeInst>(I))
+ continue;
+
if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
// The call could access any memory. If that includes writes, note it.
if (isModSet(MRI))
@@ -166,7 +175,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
AAMDNodes AAInfo;
I->getAAMetadata(AAInfo);
- MemoryLocation Loc(Arg, LocationSize::unknown(), AAInfo);
+ MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo);
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
@@ -281,16 +290,18 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
MadeChange = true;
// Clear out any existing attributes.
- F->removeFnAttr(Attribute::ReadOnly);
- F->removeFnAttr(Attribute::ReadNone);
- F->removeFnAttr(Attribute::WriteOnly);
+ AttrBuilder AttrsToRemove;
+ AttrsToRemove.addAttribute(Attribute::ReadOnly);
+ AttrsToRemove.addAttribute(Attribute::ReadNone);
+ AttrsToRemove.addAttribute(Attribute::WriteOnly);
if (!WritesMemory && !ReadsMemory) {
// Clear out any "access range attributes" if readnone was deduced.
- F->removeFnAttr(Attribute::ArgMemOnly);
- F->removeFnAttr(Attribute::InaccessibleMemOnly);
- F->removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+ AttrsToRemove.addAttribute(Attribute::ArgMemOnly);
+ AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly);
+ AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
}
+ F->removeAttributes(AttributeList::FunctionIndex, AttrsToRemove);
// Add in the new attribute.
if (WritesMemory && !ReadsMemory)
@@ -639,7 +650,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (auto *CalledFunc = CB->getCalledFunction()) {
for (auto &CSArg : CalledFunc->args()) {
- if (!CSArg.hasNonNullAttr())
+ if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false))
continue;
// If the non-null callsite argument operand is an argument to 'F'
@@ -1216,6 +1227,11 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
return Changed;
}
+struct SCCNodesResult {
+ SCCNodeSet SCCNodes;
+ bool HasUnknownCall;
+};
+
} // end anonymous namespace
/// Helper for non-Convergent inference predicate InstrBreaksAttribute.
@@ -1237,7 +1253,7 @@ static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
// I is a may-throw call to a function inside our SCC. This doesn't
// invalidate our current working assumption that the SCC is no-throw; we
// just have to scan that other function.
- if (SCCNodes.count(Callee) > 0)
+ if (SCCNodes.contains(Callee))
return false;
}
}
@@ -1257,21 +1273,16 @@ static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
if (Callee->doesNotFreeMemory())
return false;
- if (SCCNodes.count(Callee) > 0)
+ if (SCCNodes.contains(Callee))
return false;
return true;
}
-/// Infer attributes from all functions in the SCC by scanning every
-/// instruction for compliance to the attribute assumptions. Currently it
-/// does:
-/// - removal of Convergent attribute
-/// - addition of NoUnwind attribute
+/// Attempt to remove convergent function attribute when possible.
///
/// Returns true if any changes to function attributes were made.
-static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
-
+static bool inferConvergent(const SCCNodeSet &SCCNodes) {
AttributeInferer AI;
// Request to remove the convergent attribute from all functions in the SCC
@@ -1293,6 +1304,18 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
F.setNotConvergent();
},
/* RequiresExactDefinition= */ false});
+ // Perform all the requested attribute inference actions.
+ return AI.run(SCCNodes);
+}
+
+/// Infer attributes from all functions in the SCC by scanning every
+/// instruction for compliance to the attribute assumptions. Currently it
+/// does:
+/// - addition of NoUnwind attribute
+///
+/// Returns true if any changes to function attributes were made.
+static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
+ AttributeInferer AI;
if (!DisableNoUnwindInference)
// Request to infer nounwind attribute for all the functions in the SCC if
@@ -1343,14 +1366,6 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
return AI.run(SCCNodes);
}
-static bool setDoesNotRecurse(Function &F) {
- if (F.doesNotRecurse())
- return false;
- F.setDoesNotRecurse();
- ++NumNoRecurse;
- return true;
-}
-
static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
// Try and identify functions that do not recurse.
@@ -1377,30 +1392,139 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
// Every call was to a non-recursive function other than this function, and
// we have no indirect recursion as the SCC size is one. This function cannot
// recurse.
- return setDoesNotRecurse(*F);
+ F->setDoesNotRecurse();
+ ++NumNoRecurse;
+ return true;
+}
+
+static bool instructionDoesNotReturn(Instruction &I) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ Function *Callee = CB->getCalledFunction();
+ return Callee && Callee->doesNotReturn();
+ }
+ return false;
+}
+
+// A basic block can only return if it terminates with a ReturnInst and does not
+// contain calls to noreturn functions.
+static bool basicBlockCanReturn(BasicBlock &BB) {
+ if (!isa<ReturnInst>(BB.getTerminator()))
+ return false;
+ return none_of(BB, instructionDoesNotReturn);
+}
+
+// Set the noreturn function attribute if possible.
+static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) {
+ bool Changed = false;
+
+ for (Function *F : SCCNodes) {
+ if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) ||
+ F->doesNotReturn())
+ continue;
+
+ // The function can return if any basic blocks can return.
+ // FIXME: this doesn't handle recursion or unreachable blocks.
+ if (none_of(*F, basicBlockCanReturn)) {
+ F->setDoesNotReturn();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+static bool functionWillReturn(const Function &F) {
+ // Must-progress function without side-effects must return.
+ if (F.mustProgress() && F.onlyReadsMemory())
+ return true;
+
+ // Can only analyze functions with a definition.
+ if (F.isDeclaration())
+ return false;
+
+ // Functions with loops require more sophisticated analysis, as the loop
+ // may be infinite. For now, don't try to handle them.
+ SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges;
+ FindFunctionBackedges(F, Backedges);
+ if (!Backedges.empty())
+ return false;
+
+ // If there are no loops, then the function is willreturn if all calls in
+ // it are willreturn.
+ return all_of(instructions(F), [](const Instruction &I) {
+ return I.willReturn();
+ });
+}
+
+// Set the willreturn function attribute if possible.
+static bool addWillReturn(const SCCNodeSet &SCCNodes) {
+ bool Changed = false;
+
+ for (Function *F : SCCNodes) {
+ if (!F || F->willReturn() || !functionWillReturn(*F))
+ continue;
+
+ F->setWillReturn();
+ NumWillReturn++;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
+ SCCNodesResult Res;
+ Res.HasUnknownCall = false;
+ for (Function *F : Functions) {
+ if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
+ // Treat any function we're trying not to optimize as if it were an
+ // indirect call and omit it from the node set used below.
+ Res.HasUnknownCall = true;
+ continue;
+ }
+ // Track whether any functions in this SCC have an unknown call edge.
+ // Note: if this is ever a performance hit, we can common it with
+ // subsequent routines which also do scans over the instructions of the
+ // function.
+ if (!Res.HasUnknownCall) {
+ for (Instruction &I : instructions(*F)) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (!CB->getCalledFunction()) {
+ Res.HasUnknownCall = true;
+ break;
+ }
+ }
+ }
+ }
+ Res.SCCNodes.insert(F);
+ }
+ return Res;
}
template <typename AARGetterT>
-static bool deriveAttrsInPostOrder(SCCNodeSet &SCCNodes,
- AARGetterT &&AARGetter,
- bool HasUnknownCall) {
+static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
+ AARGetterT &&AARGetter) {
+ SCCNodesResult Nodes = createSCCNodeSet(Functions);
bool Changed = false;
// Bail if the SCC only contains optnone functions.
- if (SCCNodes.empty())
+ if (Nodes.SCCNodes.empty())
return Changed;
- Changed |= addArgumentReturnedAttrs(SCCNodes);
- Changed |= addReadAttrs(SCCNodes, AARGetter);
- Changed |= addArgumentAttrs(SCCNodes);
+ Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes);
+ Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter);
+ Changed |= addArgumentAttrs(Nodes.SCCNodes);
+ Changed |= inferConvergent(Nodes.SCCNodes);
+ Changed |= addNoReturnAttrs(Nodes.SCCNodes);
+ Changed |= addWillReturn(Nodes.SCCNodes);
// If we have no external nodes participating in the SCC, we can deduce some
// more precise attributes as well.
- if (!HasUnknownCall) {
- Changed |= addNoAliasAttrs(SCCNodes);
- Changed |= addNonNullAttrs(SCCNodes);
- Changed |= inferAttrsFromFunctionBodies(SCCNodes);
- Changed |= addNoRecurseAttrs(SCCNodes);
+ if (!Nodes.HasUnknownCall) {
+ Changed |= addNoAliasAttrs(Nodes.SCCNodes);
+ Changed |= addNonNullAttrs(Nodes.SCCNodes);
+ Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes);
+ Changed |= addNoRecurseAttrs(Nodes.SCCNodes);
}
return Changed;
@@ -1419,35 +1543,12 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
return FAM.getResult<AAManager>(F);
};
- // Fill SCCNodes with the elements of the SCC. Also track whether there are
- // any external or opt-none nodes that will prevent us from optimizing any
- // part of the SCC.
- SCCNodeSet SCCNodes;
- bool HasUnknownCall = false;
+ SmallVector<Function *, 8> Functions;
for (LazyCallGraph::Node &N : C) {
- Function &F = N.getFunction();
- if (F.hasOptNone() || F.hasFnAttribute(Attribute::Naked)) {
- // Treat any function we're trying not to optimize as if it were an
- // indirect call and omit it from the node set used below.
- HasUnknownCall = true;
- continue;
- }
- // Track whether any functions in this SCC have an unknown call edge.
- // Note: if this is ever a performance hit, we can common it with
- // subsequent routines which also do scans over the instructions of the
- // function.
- if (!HasUnknownCall)
- for (Instruction &I : instructions(F))
- if (auto *CB = dyn_cast<CallBase>(&I))
- if (!CB->getCalledFunction()) {
- HasUnknownCall = true;
- break;
- }
-
- SCCNodes.insert(&F);
+ Functions.push_back(&N.getFunction());
}
- if (deriveAttrsInPostOrder(SCCNodes, AARGetter, HasUnknownCall))
+ if (deriveAttrsInPostOrder(Functions, AARGetter))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -1477,11 +1578,11 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
} // end anonymous namespace
char PostOrderFunctionAttrsLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "functionattrs",
+INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs",
"Deduce function attributes", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "functionattrs",
+INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs",
"Deduce function attributes", false, false)
Pass *llvm::createPostOrderFunctionAttrsLegacyPass() {
@@ -1490,26 +1591,12 @@ Pass *llvm::createPostOrderFunctionAttrsLegacyPass() {
template <typename AARGetterT>
static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly looking up
- // whether a given CallGraphNode is in this SCC. Also track whether there are
- // any external or opt-none nodes that will prevent us from optimizing any
- // part of the SCC.
- SCCNodeSet SCCNodes;
- bool ExternalNode = false;
+ SmallVector<Function *, 8> Functions;
for (CallGraphNode *I : SCC) {
- Function *F = I->getFunction();
- if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
- // External node or function we're trying not to optimize - we both avoid
- // transform them and avoid leveraging information they provide.
- ExternalNode = true;
- continue;
- }
-
- SCCNodes.insert(F);
+ Functions.push_back(I->getFunction());
}
- return deriveAttrsInPostOrder(SCCNodes, AARGetter, ExternalNode);
+ return deriveAttrsInPostOrder(Functions, AARGetter);
}
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
@@ -1542,11 +1629,13 @@ struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass {
char ReversePostOrderFunctionAttrsLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs",
- "Deduce function attributes in RPO", false, false)
+INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass,
+ "rpo-function-attrs", "Deduce function attributes in RPO",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs",
- "Deduce function attributes in RPO", false, false)
+INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass,
+ "rpo-function-attrs", "Deduce function attributes in RPO",
+ false, false)
Pass *llvm::createReversePostOrderFunctionAttrsPass() {
return new ReversePostOrderFunctionAttrsLegacyPass();
@@ -1578,7 +1667,9 @@ static bool addNoRecurseAttrsTopDown(Function &F) {
if (!CB || !CB->getParent()->getParent()->doesNotRecurse())
return false;
}
- return setDoesNotRecurse(F);
+ F.setDoesNotRecurse();
+ ++NumNoRecurse;
+ return true;
}
static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 468bf19f2e48..18343030bc6a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -124,14 +124,8 @@ static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
cl::desc("Compute dead symbols"));
static cl::opt<bool> EnableImportMetadata(
- "enable-import-metadata", cl::init(
-#if !defined(NDEBUG)
- true /*Enabled with asserts.*/
-#else
- false
-#endif
- ),
- cl::Hidden, cl::desc("Enable import metadata like 'thinlto_src_module'"));
+ "enable-import-metadata", cl::init(false), cl::Hidden,
+ cl::desc("Enable import metadata like 'thinlto_src_module'"));
/// Summary file to use for function importing when using -function-import from
/// the command line.
@@ -261,8 +255,8 @@ selectCallee(const ModuleSummaryIndex &Index,
namespace {
-using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */,
- GlobalValue::GUID>;
+using EdgeInfo =
+ std::tuple<const GlobalValueSummary *, unsigned /* Threshold */>;
} // anonymous namespace
@@ -282,8 +276,9 @@ updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) {
}
static void computeImportForReferencedGlobals(
- const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
+ const GlobalValueSummary &Summary, const ModuleSummaryIndex &Index,
const GVSummaryMapTy &DefinedGVSummaries,
+ SmallVectorImpl<EdgeInfo> &Worklist,
FunctionImporter::ImportMapTy &ImportList,
StringMap<FunctionImporter::ExportSetTy> *ExportLists) {
for (auto &VI : Summary.refs()) {
@@ -321,6 +316,11 @@ static void computeImportForReferencedGlobals(
// which is more efficient than adding them here.
if (ExportLists)
(*ExportLists)[RefSummary->modulePath()].insert(VI);
+
+ // If variable is not writeonly we attempt to recursively analyze
+ // its references in order to import referenced constants.
+ if (!Index.isWriteOnly(cast<GlobalVarSummary>(RefSummary.get())))
+ Worklist.emplace_back(RefSummary.get(), 0);
break;
}
}
@@ -360,7 +360,7 @@ static void computeImportForFunction(
StringMap<FunctionImporter::ExportSetTy> *ExportLists,
FunctionImporter::ImportThresholdsTy &ImportThresholds) {
computeImportForReferencedGlobals(Summary, Index, DefinedGVSummaries,
- ImportList, ExportLists);
+ Worklist, ImportList, ExportLists);
static int ImportCount = 0;
for (auto &Edge : Summary.calls()) {
ValueInfo VI = Edge.first;
@@ -508,7 +508,7 @@ static void computeImportForFunction(
ImportCount++;
// Insert the newly imported function to the worklist.
- Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, VI.getGUID());
+ Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
}
}
@@ -549,13 +549,17 @@ static void ComputeImportForModule(
// Process the newly imported functions and add callees to the worklist.
while (!Worklist.empty()) {
- auto FuncInfo = Worklist.pop_back_val();
- auto *Summary = std::get<0>(FuncInfo);
- auto Threshold = std::get<1>(FuncInfo);
-
- computeImportForFunction(*Summary, Index, Threshold, DefinedGVSummaries,
- Worklist, ImportList, ExportLists,
- ImportThresholds);
+ auto GVInfo = Worklist.pop_back_val();
+ auto *Summary = std::get<0>(GVInfo);
+ auto Threshold = std::get<1>(GVInfo);
+
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary))
+ computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
+ Worklist, ImportList, ExportLists,
+ ImportThresholds);
+ else
+ computeImportForReferencedGlobals(*Summary, Index, DefinedGVSummaries,
+ Worklist, ImportList, ExportLists);
}
// Print stats about functions considered but rejected for importing
@@ -884,6 +888,7 @@ void llvm::computeDeadSymbols(
while (!Worklist.empty()) {
auto VI = Worklist.pop_back_val();
for (auto &Summary : VI.getSummaryList()) {
+ Summary->setLive(true);
if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
// If this is an alias, visit the aliasee VI to ensure that all copies
// are marked live and it is added to the worklist for further
@@ -891,8 +896,6 @@ void llvm::computeDeadSymbols(
visit(AS->getAliaseeVI(), true);
continue;
}
-
- Summary->setLive(true);
for (auto Ref : Summary->refs())
visit(Ref, false);
if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
@@ -1311,7 +1314,7 @@ static bool doImportingForModule(Module &M) {
// Next we need to promote to global scope and rename any local values that
// are potentially exported to other modules.
- if (renameModuleForThinLTO(M, *Index, /*clearDSOOnDeclarations=*/false,
+ if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
/*GlobalsToImport=*/nullptr)) {
errs() << "Error renaming module\n";
return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 9524d9a36204..223a05e8ea02 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -268,6 +268,7 @@ CleanupPointerRootUsers(GlobalVariable *GV,
I = J;
} while (true);
I->eraseFromParent();
+ Changed = true;
}
}
@@ -285,7 +286,7 @@ static bool CleanupConstantGlobalUsers(
// we delete a constant array, we may also be holding pointer to one of its
// elements (or an element of one of its elements if we're dealing with an
// array of arrays) in the worklist.
- SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());
+ SmallVector<WeakTrackingVH, 8> WorkList(V->users());
while (!WorkList.empty()) {
Value *UV = WorkList.pop_back_val();
if (!UV)
@@ -1879,7 +1880,8 @@ static bool isPointerValueDeadOnEntryToFunction(
// and the number of bits loaded in L is less than or equal to
// the number of bits stored in S.
return DT.dominates(S, L) &&
- DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
+ DL.getTypeStoreSize(LTy).getFixedSize() <=
+ DL.getTypeStoreSize(STy).getFixedSize();
}))
return false;
}
@@ -1931,8 +1933,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
SmallVector<Value*,4> UUsers;
for (auto *U : Users) {
UUsers.clear();
- for (auto *UU : U->users())
- UUsers.push_back(UU);
+ append_range(UUsers, U->users());
for (auto *UU : UUsers) {
Instruction *UI = cast<Instruction>(UU);
Instruction *NewU = U->getAsInstruction();
@@ -1989,12 +1990,13 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
return true;
}
+ bool Changed = false;
+
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
if (!GS.IsLoaded) {
LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
- bool Changed;
if (isLeakCheckerRoot(GV)) {
// Delete any constant stores to the global.
Changed = CleanupPointerRootUsers(GV, GetTLI);
@@ -2020,11 +2022,14 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
// Don't actually mark a global constant if it's atomic because atomic loads
// are implemented by a trivial cmpxchg in some edge-cases and that usually
// requires write access to the variable even if it's not actually changed.
- if (GS.Ordering == AtomicOrdering::NotAtomic)
+ if (GS.Ordering == AtomicOrdering::NotAtomic) {
+ assert(!GV->isConstant() && "Expected a non-constant global");
GV->setConstant(true);
+ Changed = true;
+ }
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+ Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -2084,7 +2089,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
}
}
- return false;
+ return Changed;
}
/// Analyze the specified global variable and optimize it if possible. If we
@@ -2219,8 +2224,7 @@ isValidCandidateForColdCC(Function &F,
BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
if (!isColdCallSite(CB, CallerBFI))
return false;
- auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc);
- if (It == AllCallsCold.end())
+ if (!llvm::is_contained(AllCallsCold, CallerFunc))
return false;
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index d0bd0166534a..aa708ee520b1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -29,7 +29,6 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
@@ -68,7 +67,9 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
+#include <limits>
#include <cassert>
+#include <string>
#define DEBUG_TYPE "hotcoldsplit"
@@ -77,14 +78,29 @@ STATISTIC(NumColdRegionsOutlined, "Number of cold regions outlined.");
using namespace llvm;
-static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
- cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableStaticAnalysis("hot-cold-static-analysis",
+ cl::init(true), cl::Hidden);
static cl::opt<int>
SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden,
cl::desc("Base penalty for splitting cold code (as a "
"multiple of TCC_Basic)"));
+static cl::opt<bool> EnableColdSection(
+ "enable-cold-section", cl::init(false), cl::Hidden,
+ cl::desc("Enable placement of extracted cold functions"
+ " into a separate section after hot-cold splitting."));
+
+static cl::opt<std::string>
+ ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"),
+ cl::Hidden,
+ cl::desc("Name for the section containing cold functions "
+ "extracted by hot-cold splitting."));
+
+static cl::opt<int> MaxParametersForSplit(
+ "hotcoldsplit-max-params", cl::init(4), cl::Hidden,
+ cl::desc("Maximum number of parameters for a split function"));
+
namespace {
// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.
@@ -221,11 +237,11 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
}
/// Get the benefit score of outlining \p Region.
-static int getOutliningBenefit(ArrayRef<BasicBlock *> Region,
- TargetTransformInfo &TTI) {
+static InstructionCost getOutliningBenefit(ArrayRef<BasicBlock *> Region,
+ TargetTransformInfo &TTI) {
// Sum up the code size costs of non-terminator instructions. Tight coupling
// with \ref getOutliningPenalty is needed to model the costs of terminators.
- int Benefit = 0;
+ InstructionCost Benefit = 0;
for (BasicBlock *BB : Region)
for (Instruction &I : BB->instructionsWithoutDebug())
if (&I != BB->getTerminator())
@@ -246,18 +262,6 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
if (SplittingThreshold <= 0)
return Penalty;
- // The typical code size cost for materializing an argument for the outlined
- // call.
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
- const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
- Penalty += CostForArgMaterialization * NumInputs;
-
- // The typical code size cost for an output alloca, its associated store, and
- // its associated reload.
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
- const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
- Penalty += CostForRegionOutput * NumOutputs;
-
// Find the number of distinct exit blocks for the region. Use a conservative
// check to determine whether control returns from the region.
bool NoBlocksReturn = true;
@@ -271,13 +275,55 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
}
for (BasicBlock *SuccBB : successors(BB)) {
- if (find(Region, SuccBB) == Region.end()) {
+ if (!is_contained(Region, SuccBB)) {
NoBlocksReturn = false;
SuccsOutsideRegion.insert(SuccBB);
}
}
}
+ // Count the number of phis in exit blocks with >= 2 incoming values from the
+ // outlining region. These phis are split (\ref severSplitPHINodesOfExits),
+ // and new outputs are created to supply the split phis. CodeExtractor can't
+ // report these new outputs until extraction begins, but it's important to
+ // factor the cost of the outputs into the cost calculation.
+ unsigned NumSplitExitPhis = 0;
+ for (BasicBlock *ExitBB : SuccsOutsideRegion) {
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ int NumIncomingVals = 0;
+ for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
+ if (find(Region, PN.getIncomingBlock(i)) != Region.end()) {
+ ++NumIncomingVals;
+ if (NumIncomingVals > 1) {
+ ++NumSplitExitPhis;
+ break;
+ }
+ }
+ }
+ }
+
+ // Apply a penalty for calling the split function. Factor in the cost of
+ // materializing all of the parameters.
+ int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
+ int NumParams = NumInputs + NumOutputsAndSplitPhis;
+ if (NumParams > MaxParametersForSplit) {
+ LLVM_DEBUG(dbgs() << NumInputs << " inputs and " << NumOutputsAndSplitPhis
+ << " outputs exceeds parameter limit ("
+ << MaxParametersForSplit << ")\n");
+ return std::numeric_limits<int>::max();
+ }
+ const int CostForArgMaterialization = 2 * TargetTransformInfo::TCC_Basic;
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumParams << " params\n");
+ Penalty += CostForArgMaterialization * NumParams;
+
+ // Apply the typical code size cost for an output alloca and its associated
+ // reload in the caller. Also penalize the associated store in the callee.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputsAndSplitPhis
+ << " outputs/split phis\n");
+ const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
+ Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
+
// Apply a `noreturn` bonus.
if (NoBlocksReturn) {
LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
@@ -287,7 +333,7 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
// Apply a penalty for having more than one successor outside of the region.
// This penalty accounts for the switch needed in the caller.
- if (!SuccsOutsideRegion.empty()) {
+ if (SuccsOutsideRegion.size() > 1) {
LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
<< " non-region successors\n");
Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
@@ -312,12 +358,12 @@ Function *HotColdSplitting::extractColdRegion(
// splitting.
SetVector<Value *> Inputs, Outputs, Sinks;
CE.findInputsOutputs(Inputs, Outputs, Sinks);
- int OutliningBenefit = getOutliningBenefit(Region, TTI);
+ InstructionCost OutliningBenefit = getOutliningBenefit(Region, TTI);
int OutliningPenalty =
getOutliningPenalty(Region, Inputs.size(), Outputs.size());
LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
<< ", penalty = " << OutliningPenalty << "\n");
- if (OutliningBenefit <= OutliningPenalty)
+ if (!OutliningBenefit.isValid() || OutliningBenefit <= OutliningPenalty)
return nullptr;
Function *OrigF = Region[0]->getParent();
@@ -331,8 +377,12 @@ Function *HotColdSplitting::extractColdRegion(
}
CI->setIsNoInline();
- if (OrigF->hasSection())
- OutF->setSection(OrigF->getSection());
+ if (EnableColdSection)
+ OutF->setSection(ColdSectionName);
+ else {
+ if (OrigF->hasSection())
+ OutF->setSection(OrigF->getSection());
+ }
markFunctionCold(*OutF, BFI != nullptr);
@@ -575,7 +625,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
continue;
bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) ||
- (EnableStaticAnalyis && unlikelyExecuted(*BB));
+ (EnableStaticAnalysis && unlikelyExecuted(*BB));
if (!Cold)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
deleted file mode 100644
index 8d05a72d68da..000000000000
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ /dev/null
@@ -1,308 +0,0 @@
-//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass implements an _extremely_ simple interprocedural constant
-// propagation pass. It could certainly be improved in many different ways,
-// like using a worklist. This pass makes arguments dead, but does not remove
-// them. The existing dead argument elimination pass should be run after this
-// to clean up the mess.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/AbstractCallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/IPO.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "ipconstprop"
-
-STATISTIC(NumArgumentsProped, "Number of args turned into constants");
-STATISTIC(NumReturnValProped, "Number of return values turned into constants");
-
-namespace {
- /// IPCP - The interprocedural constant propagation pass
- ///
- struct IPCP : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- IPCP() : ModulePass(ID) {
- initializeIPCPPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
- };
-}
-
-/// PropagateConstantsIntoArguments - Look at all uses of the specified
-/// function. If all uses are direct call sites, and all pass a particular
-/// constant in for an argument, propagate that constant in as the argument.
-///
-static bool PropagateConstantsIntoArguments(Function &F) {
- if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
-
- // For each argument, keep track of its constant value and whether it is a
- // constant or not. The bool is driven to true when found to be non-constant.
- SmallVector<PointerIntPair<Constant *, 1, bool>, 16> ArgumentConstants;
- ArgumentConstants.resize(F.arg_size());
-
- unsigned NumNonconstant = 0;
- for (Use &U : F.uses()) {
- User *UR = U.getUser();
- // Ignore blockaddress uses.
- if (isa<BlockAddress>(UR)) continue;
-
- // If no abstract call site was created we did not understand the use, bail.
- AbstractCallSite ACS(&U);
- if (!ACS)
- return false;
-
- // Mismatched argument count is undefined behavior. Simply bail out to avoid
- // handling of such situations below (avoiding asserts/crashes).
- unsigned NumActualArgs = ACS.getNumArgOperands();
- if (F.isVarArg() ? ArgumentConstants.size() > NumActualArgs
- : ArgumentConstants.size() != NumActualArgs)
- return false;
-
- // Check out all of the potentially constant arguments. Note that we don't
- // inspect varargs here.
- Function::arg_iterator Arg = F.arg_begin();
- for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) {
-
- // If this argument is known non-constant, ignore it.
- if (ArgumentConstants[i].getInt())
- continue;
-
- Value *V = ACS.getCallArgOperand(i);
- Constant *C = dyn_cast_or_null<Constant>(V);
-
- // Mismatched argument type is undefined behavior. Simply bail out to avoid
- // handling of such situations below (avoiding asserts/crashes).
- if (C && Arg->getType() != C->getType())
- return false;
-
- // We can only propagate thread independent values through callbacks.
- // This is different to direct/indirect call sites because for them we
- // know the thread executing the caller and callee is the same. For
- // callbacks this is not guaranteed, thus a thread dependent value could
- // be different for the caller and callee, making it invalid to propagate.
- if (C && ACS.isCallbackCall() && C->isThreadDependent()) {
- // Argument became non-constant. If all arguments are non-constant now,
- // give up on this function.
- if (++NumNonconstant == ArgumentConstants.size())
- return false;
-
- ArgumentConstants[i].setInt(true);
- continue;
- }
-
- if (C && ArgumentConstants[i].getPointer() == nullptr) {
- ArgumentConstants[i].setPointer(C); // First constant seen.
- } else if (C && ArgumentConstants[i].getPointer() == C) {
- // Still the constant value we think it is.
- } else if (V == &*Arg) {
- // Ignore recursive calls passing argument down.
- } else {
- // Argument became non-constant. If all arguments are non-constant now,
- // give up on this function.
- if (++NumNonconstant == ArgumentConstants.size())
- return false;
- ArgumentConstants[i].setInt(true);
- }
- }
- }
-
- // If we got to this point, there is a constant argument!
- assert(NumNonconstant != ArgumentConstants.size());
- bool MadeChange = false;
- Function::arg_iterator AI = F.arg_begin();
- for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
- // Do we have a constant argument?
- if (ArgumentConstants[i].getInt() || AI->use_empty() ||
- (AI->hasByValAttr() && !F.onlyReadsMemory()))
- continue;
-
- Value *V = ArgumentConstants[i].getPointer();
- if (!V) V = UndefValue::get(AI->getType());
- AI->replaceAllUsesWith(V);
- ++NumArgumentsProped;
- MadeChange = true;
- }
- return MadeChange;
-}
-
-
-// Check to see if this function returns one or more constants. If so, replace
-// all callers that use those return values with the constant value. This will
-// leave in the actual return values and instructions, but deadargelim will
-// clean that up.
-//
-// Additionally if a function always returns one of its arguments directly,
-// callers will be updated to use the value they pass in directly instead of
-// using the return value.
-static bool PropagateConstantReturn(Function &F) {
- if (F.getReturnType()->isVoidTy())
- return false; // No return value.
-
- // We can infer and propagate the return value only when we know that the
- // definition we'll get at link time is *exactly* the definition we see now.
- // For more details, see GlobalValue::mayBeDerefined.
- if (!F.isDefinitionExact())
- return false;
-
- // Don't touch naked functions. The may contain asm returning
- // value we don't see, so we may end up interprocedurally propagating
- // the return value incorrectly.
- if (F.hasFnAttribute(Attribute::Naked))
- return false;
-
- // Check to see if this function returns a constant.
- SmallVector<Value *,4> RetVals;
- StructType *STy = dyn_cast<StructType>(F.getReturnType());
- if (STy)
- for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
- RetVals.push_back(UndefValue::get(STy->getElementType(i)));
- else
- RetVals.push_back(UndefValue::get(F.getReturnType()));
-
- unsigned NumNonConstant = 0;
- for (BasicBlock &BB : F)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
- for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
- // Already found conflicting return values?
- Value *RV = RetVals[i];
- if (!RV)
- continue;
-
- // Find the returned value
- Value *V;
- if (!STy)
- V = RI->getOperand(0);
- else
- V = FindInsertedValue(RI->getOperand(0), i);
-
- if (V) {
- // Ignore undefs, we can change them into anything
- if (isa<UndefValue>(V))
- continue;
-
- // Try to see if all the rets return the same constant or argument.
- if (isa<Constant>(V) || isa<Argument>(V)) {
- if (isa<UndefValue>(RV)) {
- // No value found yet? Try the current one.
- RetVals[i] = V;
- continue;
- }
- // Returning the same value? Good.
- if (RV == V)
- continue;
- }
- }
- // Different or no known return value? Don't propagate this return
- // value.
- RetVals[i] = nullptr;
- // All values non-constant? Stop looking.
- if (++NumNonConstant == RetVals.size())
- return false;
- }
- }
-
- // If we got here, the function returns at least one constant value. Loop
- // over all users, replacing any uses of the return value with the returned
- // constant.
- bool MadeChange = false;
- for (Use &U : F.uses()) {
- CallBase *CB = dyn_cast<CallBase>(U.getUser());
-
- // Not a call instruction or a call instruction that's not calling F
- // directly?
- if (!CB || !CB->isCallee(&U))
- continue;
-
- // Call result not used?
- if (CB->use_empty())
- continue;
-
- MadeChange = true;
-
- if (!STy) {
- Value* New = RetVals[0];
- if (Argument *A = dyn_cast<Argument>(New))
- // Was an argument returned? Then find the corresponding argument in
- // the call instruction and use that.
- New = CB->getArgOperand(A->getArgNo());
- CB->replaceAllUsesWith(New);
- continue;
- }
-
- for (auto I = CB->user_begin(), E = CB->user_end(); I != E;) {
- Instruction *Ins = cast<Instruction>(*I);
-
- // Increment now, so we can remove the use
- ++I;
-
- // Find the index of the retval to replace with
- int index = -1;
- if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
- if (EV->getNumIndices() == 1)
- index = *EV->idx_begin();
-
- // If this use uses a specific return value, and we have a replacement,
- // replace it.
- if (index != -1) {
- Value *New = RetVals[index];
- if (New) {
- if (Argument *A = dyn_cast<Argument>(New))
- // Was an argument returned? Then find the corresponding argument in
- // the call instruction and use that.
- New = CB->getArgOperand(A->getArgNo());
- Ins->replaceAllUsesWith(New);
- Ins->eraseFromParent();
- }
- }
- }
- }
-
- if (MadeChange) ++NumReturnValProped;
- return MadeChange;
-}
-
-char IPCP::ID = 0;
-INITIALIZE_PASS(IPCP, "ipconstprop",
- "Interprocedural constant propagation", false, false)
-
-ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
-
-bool IPCP::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- bool Changed = false;
- bool LocalChange = true;
-
- // FIXME: instead of using smart algorithms, we just iterate until we stop
- // making changes.
- while (LocalChange) {
- LocalChange = false;
- for (Function &F : M)
- if (!F.isDeclaration()) {
- // Delete any klingons.
- F.removeDeadConstantUsers();
- if (F.hasLocalLinkage())
- LocalChange |= PropagateConstantsIntoArguments(F);
- Changed |= PropagateConstantReturn(F);
- }
- Changed |= LocalChange;
- }
- return Changed;
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp
index d37b9236380d..f4c12dd7f4cd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeOpenMPOptLegacyPassPass(Registry);
initializeArgPromotionPass(Registry);
+ initializeAnnotation2MetadataLegacyPass(Registry);
initializeCalledValuePropagationLegacyPassPass(Registry);
initializeConstantMergeLegacyPassPass(Registry);
initializeCrossDSOCFIPass(Registry);
@@ -35,13 +36,13 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeGlobalOptLegacyPassPass(Registry);
initializeGlobalSplitPass(Registry);
initializeHotColdSplittingLegacyPassPass(Registry);
- initializeIPCPPass(Registry);
+ initializeIROutlinerLegacyPassPass(Registry);
initializeAlwaysInlinerLegacyPassPass(Registry);
initializeSimpleInlinerPass(Registry);
initializeInferFunctionAttrsLegacyPassPass(Registry);
initializeInternalizeLegacyPassPass(Registry);
- initializeLoopExtractorPass(Registry);
- initializeBlockExtractorPass(Registry);
+ initializeLoopExtractorLegacyPassPass(Registry);
+ initializeBlockExtractorLegacyPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
initializeLowerTypeTestsPass(Registry);
initializeMergeFunctionsLegacyPassPass(Registry);
@@ -104,10 +105,6 @@ void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createGlobalOptimizerPass());
}
-void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createIPConstantPropagationPass());
-}
-
void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPruneEHPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
new file mode 100644
index 000000000000..4b6a4f3d8fc4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -0,0 +1,1764 @@
+//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+// Implementation for the IROutliner which is used by the IROutliner Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/IROutliner.h"
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO.h"
+#include <map>
+#include <set>
+#include <vector>
+
+#define DEBUG_TYPE "iroutliner"
+
+using namespace llvm;
+using namespace IRSimilarity;
+
+// Set to true if the user wants the ir outliner to run on linkonceodr linkage
+// functions. This is false by default because the linker can dedupe linkonceodr
+// functions. Since the outliner is confined to a single module (modulo LTO),
+// this is off by default. It should, however, be the default behavior in
+// LTO.
+static cl::opt<bool> EnableLinkOnceODRIROutlining(
+ "enable-linkonceodr-ir-outlining", cl::Hidden,
+ cl::desc("Enable the IR outliner on linkonceodr functions"),
+ cl::init(false));
+
+// This is a debug option to test small pieces of code to ensure that outlining
+// works correctly.
+static cl::opt<bool> NoCostModel(
+ "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
+ cl::desc("Debug option to outline greedily, without restriction that "
+ "calculated benefit outweighs cost"));
+
+/// The OutlinableGroup holds all the overarching information for outlining
+/// a set of regions that are structurally similar to one another, such as the
+/// types of the overall function, the output blocks, the sets of stores needed
+/// and a list of the different regions. This information is used in the
+/// deduplication of extracted regions with the same structure.
+struct OutlinableGroup {
+ /// The sections that could be outlined
+ std::vector<OutlinableRegion *> Regions;
+
+ /// The argument types for the function created as the overall function to
+ /// replace the extracted function for each region.
+ std::vector<Type *> ArgumentTypes;
+ /// The FunctionType for the overall function.
+ FunctionType *OutlinedFunctionType = nullptr;
+ /// The Function for the collective overall function.
+ Function *OutlinedFunction = nullptr;
+
+ /// Flag for whether we should not consider this group of OutlinableRegions
+ /// for extraction.
+ bool IgnoreGroup = false;
+
+ /// The return block for the overall function.
+ BasicBlock *EndBB = nullptr;
+
+ /// A set containing the different GVN store sets needed. Each array contains
+ /// a sorted list of the different values that need to be stored into output
+ /// registers.
+ DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
+
+ /// Flag for whether the \ref ArgumentTypes have been defined after the
+ /// extraction of the first region.
+ bool InputTypesSet = false;
+
+ /// The number of input values in \ref ArgumentTypes. Anything after this
+ /// index in ArgumentTypes is an output argument.
+ unsigned NumAggregateInputs = 0;
+
+ /// The number of instructions that will be outlined by extracting \ref
+ /// Regions.
+ InstructionCost Benefit = 0;
+ /// The number of added instructions needed for the outlining of the \ref
+ /// Regions.
+ InstructionCost Cost = 0;
+
+ /// The argument that needs to be marked with the swifterr attribute. If not
+ /// needed, there is no value.
+ Optional<unsigned> SwiftErrorArgument;
+
+ /// For the \ref Regions, we look at every Value. If it is a constant,
+ /// we check whether it is the same in Region.
+ ///
+ /// \param [in,out] NotSame contains the global value numbers where the
+ /// constant is not always the same, and must be passed in as an argument.
+ void findSameConstants(DenseSet<unsigned> &NotSame);
+
+ /// For the regions, look at each set of GVN stores needed and account for
+ /// each combination. Add an argument to the argument types if there is
+ /// more than one combination.
+ ///
+ /// \param [in] M - The module we are outlining from.
+ void collectGVNStoreSets(Module &M);
+};
+
+/// Move the contents of \p SourceBB to before the last instruction of \p
+/// TargetBB.
+/// \param SourceBB - the BasicBlock to pull Instructions from.
+/// \param TargetBB - the BasicBlock to put Instruction into.
+static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
+ BasicBlock::iterator BBCurr, BBEnd, BBNext;
+ for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
+ BBCurr = BBNext) {
+ BBNext = std::next(BBCurr);
+ BBCurr->moveBefore(TargetBB, TargetBB.end());
+ }
+}
+
+void OutlinableRegion::splitCandidate() {
+ assert(!CandidateSplit && "Candidate already split!");
+
+ Instruction *StartInst = (*Candidate->begin()).Inst;
+ Instruction *EndInst = (*Candidate->end()).Inst;
+ assert(StartInst && EndInst && "Expected a start and end instruction?");
+ StartBB = StartInst->getParent();
+ PrevBB = StartBB;
+
+ // The basic block gets split like so:
+ // block: block:
+ // inst1 inst1
+ // inst2 inst2
+ // region1 br block_to_outline
+ // region2 block_to_outline:
+ // region3 -> region1
+ // region4 region2
+ // inst3 region3
+ // inst4 region4
+ // br block_after_outline
+ // block_after_outline:
+ // inst3
+ // inst4
+
+ std::string OriginalName = PrevBB->getName().str();
+
+ StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
+
+ // This is the case for the inner block since we do not have to include
+ // multiple blocks.
+ EndBB = StartBB;
+ FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+
+ CandidateSplit = true;
+}
+
+void OutlinableRegion::reattachCandidate() {
+ assert(CandidateSplit && "Candidate is not split!");
+
+ // The basic block gets reattached like so:
+ // block: block:
+ // inst1 inst1
+ // inst2 inst2
+ // br block_to_outline region1
+ // block_to_outline: -> region2
+ // region1 region3
+ // region2 region4
+ // region3 inst3
+ // region4 inst4
+ // br block_after_outline
+ // block_after_outline:
+ // inst3
+ // inst4
+ assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
+ assert(FollowBB != nullptr && "StartBB for Candidate is not defined!");
+
+ // StartBB should only have one predecessor since we put an unconditional
+ // branch at the end of PrevBB when we split the BasicBlock.
+ PrevBB = StartBB->getSinglePredecessor();
+ assert(PrevBB != nullptr &&
+ "No Predecessor for the region start basic block!");
+
+ assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
+ assert(EndBB->getTerminator() && "Terminator removed from EndBB!");
+ PrevBB->getTerminator()->eraseFromParent();
+ EndBB->getTerminator()->eraseFromParent();
+
+ moveBBContents(*StartBB, *PrevBB);
+
+ BasicBlock *PlacementBB = PrevBB;
+ if (StartBB != EndBB)
+ PlacementBB = EndBB;
+ moveBBContents(*FollowBB, *PlacementBB);
+
+ PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
+ PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
+ StartBB->eraseFromParent();
+ FollowBB->eraseFromParent();
+
+ // Make sure to save changes back to the StartBB.
+ StartBB = PrevBB;
+ EndBB = nullptr;
+ PrevBB = nullptr;
+ FollowBB = nullptr;
+
+ CandidateSplit = false;
+}
+
+/// Find whether \p V matches the Constants previously found for the \p GVN.
+///
+/// \param V - The value to check for consistency.
+/// \param GVN - The global value number assigned to \p V.
+/// \param GVNToConstant - The mapping of global value number to Constants.
+/// \returns true if the Value matches the Constant mapped to by V and false if
+/// it \p V is a Constant but does not match.
+/// \returns None if \p V is not a Constant.
+static Optional<bool>
+constantMatches(Value *V, unsigned GVN,
+ DenseMap<unsigned, Constant *> &GVNToConstant) {
+ // See if we have a constants
+ Constant *CST = dyn_cast<Constant>(V);
+ if (!CST)
+ return None;
+
+ // Holds a mapping from a global value number to a Constant.
+ DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
+ bool Inserted;
+
+
+ // If we have a constant, try to make a new entry in the GVNToConstant.
+ std::tie(GVNToConstantIt, Inserted) =
+ GVNToConstant.insert(std::make_pair(GVN, CST));
+ // If it was found and is not equal, it is not the same. We do not
+ // handle this case yet, and exit early.
+ if (Inserted || (GVNToConstantIt->second == CST))
+ return true;
+
+ return false;
+}
+
+InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
+ InstructionCost Benefit = 0;
+
+ // Estimate the benefit of outlining a specific sections of the program. We
+ // delegate mostly this task to the TargetTransformInfo so that if the target
+ // has specific changes, we can have a more accurate estimate.
+
+ // However, getInstructionCost delegates the code size calculation for
+ // arithmetic instructions to getArithmeticInstrCost in
+ // include/Analysis/TargetTransformImpl.h, where it always estimates that the
+ // code size for a division and remainder instruction to be equal to 4, and
+ // everything else to 1. This is not an accurate representation of the
+ // division instruction for targets that have a native division instruction.
+ // To be overly conservative, we only add 1 to the number of instructions for
+ // each division instruction.
+ for (Instruction &I : *StartBB) {
+ switch (I.getOpcode()) {
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ Benefit += 1;
+ break;
+ default:
+ Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+ break;
+ }
+ }
+
+ return Benefit;
+}
+
+/// Find whether \p Region matches the global value numbering to Constant
+/// mapping found so far.
+///
+/// \param Region - The OutlinableRegion we are checking for constants
+/// \param GVNToConstant - The mapping of global value number to Constants.
+/// \param NotSame - The set of global value numbers that do not have the same
+/// constant in each region.
+/// \returns true if all Constants are the same in every use of a Constant in \p
+/// Region and false if not
+static bool
+collectRegionsConstants(OutlinableRegion &Region,
+ DenseMap<unsigned, Constant *> &GVNToConstant,
+ DenseSet<unsigned> &NotSame) {
+ bool ConstantsTheSame = true;
+
+ IRSimilarityCandidate &C = *Region.Candidate;
+ for (IRInstructionData &ID : C) {
+
+ // Iterate over the operands in an instruction. If the global value number,
+ // assigned by the IRSimilarityCandidate, has been seen before, we check if
+ // the the number has been found to be not the same value in each instance.
+ for (Value *V : ID.OperVals) {
+ Optional<unsigned> GVNOpt = C.getGVN(V);
+ assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
+ unsigned GVN = GVNOpt.getValue();
+
+ // Check if this global value has been found to not be the same already.
+ if (NotSame.contains(GVN)) {
+ if (isa<Constant>(V))
+ ConstantsTheSame = false;
+ continue;
+ }
+
+ // If it has been the same so far, we check the value for if the
+ // associated Constant value match the previous instances of the same
+ // global value number. If the global value does not map to a Constant,
+ // it is considered to not be the same value.
+ Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
+ if (ConstantMatches.hasValue()) {
+ if (ConstantMatches.getValue())
+ continue;
+ else
+ ConstantsTheSame = false;
+ }
+
+ // While this value is a register, it might not have been previously,
+ // make sure we don't already have a constant mapped to this global value
+ // number.
+ if (GVNToConstant.find(GVN) != GVNToConstant.end())
+ ConstantsTheSame = false;
+
+ NotSame.insert(GVN);
+ }
+ }
+
+ return ConstantsTheSame;
+}
+
+void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
+ DenseMap<unsigned, Constant *> GVNToConstant;
+
+ for (OutlinableRegion *Region : Regions)
+ collectRegionsConstants(*Region, GVNToConstant, NotSame);
+}
+
+void OutlinableGroup::collectGVNStoreSets(Module &M) {
+ for (OutlinableRegion *OS : Regions)
+ OutputGVNCombinations.insert(OS->GVNStores);
+
+ // We are adding an extracted argument to decide between which output path
+ // to use in the basic block. It is used in a switch statement and only
+ // needs to be an integer.
+ if (OutputGVNCombinations.size() > 1)
+ ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
+}
+
+Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
+ unsigned FunctionNameSuffix) {
+ assert(!Group.OutlinedFunction && "Function is already defined!");
+
+ Group.OutlinedFunctionType = FunctionType::get(
+ Type::getVoidTy(M.getContext()), Group.ArgumentTypes, false);
+
+ // These functions will only be called from within the same module, so
+ // we can set an internal linkage.
+ Group.OutlinedFunction = Function::Create(
+ Group.OutlinedFunctionType, GlobalValue::InternalLinkage,
+ "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
+
+ // Transfer the swifterr attribute to the correct function parameter.
+ if (Group.SwiftErrorArgument.hasValue())
+ Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
+ Attribute::SwiftError);
+
+ Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
+ Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
+
+ return Group.OutlinedFunction;
+}
+
+/// Move each BasicBlock in \p Old to \p New.
+///
+/// \param [in] Old - the function to move the basic blocks from.
+/// \param [in] New - The function to move the basic blocks to.
+/// \returns the first return block for the function in New.
+static BasicBlock *moveFunctionData(Function &Old, Function &New) {
+ Function::iterator CurrBB, NextBB, FinalBB;
+ BasicBlock *NewEnd = nullptr;
+ std::vector<Instruction *> DebugInsts;
+ for (CurrBB = Old.begin(), FinalBB = Old.end(); CurrBB != FinalBB;
+ CurrBB = NextBB) {
+ NextBB = std::next(CurrBB);
+ CurrBB->removeFromParent();
+ CurrBB->insertInto(&New);
+ Instruction *I = CurrBB->getTerminator();
+ if (isa<ReturnInst>(I))
+ NewEnd = &(*CurrBB);
+ }
+
+ assert(NewEnd && "No return instruction for new function?");
+ return NewEnd;
+}
+
+/// Find the the constants that will need to be lifted into arguments
+/// as they are not the same in each instance of the region.
+///
+/// \param [in] C - The IRSimilarityCandidate containing the region we are
+/// analyzing.
+/// \param [in] NotSame - The set of global value numbers that do not have a
+/// single Constant across all OutlinableRegions similar to \p C.
+/// \param [out] Inputs - The list containing the global value numbers of the
+/// arguments needed for the region of code.
+static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
+ std::vector<unsigned> &Inputs) {
+ DenseSet<unsigned> Seen;
+ // Iterate over the instructions, and find what constants will need to be
+ // extracted into arguments.
+ for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
+ IDIt != EndIDIt; IDIt++) {
+ for (Value *V : (*IDIt).OperVals) {
+ // Since these are stored before any outlining, they will be in the
+ // global value numbering.
+ unsigned GVN = C.getGVN(V).getValue();
+ if (isa<Constant>(V))
+ if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
+ Inputs.push_back(GVN);
+ Seen.insert(GVN);
+ }
+ }
+ }
+}
+
+/// Find the GVN for the inputs that have been found by the CodeExtractor.
+///
+/// \param [in] C - The IRSimilarityCandidate containing the region we are
+/// analyzing.
+/// \param [in] CurrentInputs - The set of inputs found by the
+/// CodeExtractor.
+/// \param [out] EndInputNumbers - The global value numbers for the extracted
+/// arguments.
+/// \param [in] OutputMappings - The mapping of values that have been replaced
+/// by a new output value.
+/// \param [out] EndInputs - The global value numbers for the extracted
+/// arguments.
+static void mapInputsToGVNs(IRSimilarityCandidate &C,
+ SetVector<Value *> &CurrentInputs,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ std::vector<unsigned> &EndInputNumbers) {
+ // Get the Global Value Number for each input. We check if the Value has been
+ // replaced by a different value at output, and use the original value before
+ // replacement.
+ for (Value *Input : CurrentInputs) {
+ assert(Input && "Have a nullptr as an input");
+ if (OutputMappings.find(Input) != OutputMappings.end())
+ Input = OutputMappings.find(Input)->second;
+ assert(C.getGVN(Input).hasValue() &&
+ "Could not find a numbering for the given input");
+ EndInputNumbers.push_back(C.getGVN(Input).getValue());
+ }
+}
+
+/// Find the original value for the \p ArgInput values if any one of them was
+/// replaced during a previous extraction.
+///
+/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
+/// \param [in] OutputMappings - The mapping of values that have been replaced
+/// by a new output value.
+/// \param [out] RemappedArgInputs - The remapped values according to
+/// \p OutputMappings that will be extracted.
+static void
+remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ SetVector<Value *> &RemappedArgInputs) {
+ // Get the global value number for each input that will be extracted as an
+ // argument by the code extractor, remapping if needed for reloaded values.
+ for (Value *Input : ArgInputs) {
+ if (OutputMappings.find(Input) != OutputMappings.end())
+ Input = OutputMappings.find(Input)->second;
+ RemappedArgInputs.insert(Input);
+ }
+}
+
+/// Find the input GVNs and the output values for a region of Instructions.
+/// Using the code extractor, we collect the inputs to the extracted function.
+///
+/// The \p Region can be identified as needing to be ignored in this function.
+/// It should be checked whether it should be ignored after a call to this
+/// function.
+///
+/// \param [in,out] Region - The region of code to be analyzed.
+/// \param [out] InputGVNs - The global value numbers for the extracted
+/// arguments.
+/// \param [in] NotSame - The global value numbers in the region that do not
+/// have the same constant value in the regions structurally similar to
+/// \p Region.
+/// \param [in] OutputMappings - The mapping of values that have been replaced
+/// by a new output value after extraction.
+/// \param [out] ArgInputs - The values of the inputs to the extracted function.
+/// \param [out] Outputs - The set of values extracted by the CodeExtractor
+/// as outputs.
+static void getCodeExtractorArguments(
+ OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
+ DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
+ SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
+ IRSimilarityCandidate &C = *Region.Candidate;
+
+ // OverallInputs are the inputs to the region found by the CodeExtractor,
+ // SinkCands and HoistCands are used by the CodeExtractor to find sunken
+ // allocas of values whose lifetimes are contained completely within the
+ // outlined region. PremappedInputs are the arguments found by the
+ // CodeExtractor, removing conditions such as sunken allocas, but that
+ // may need to be remapped due to the extracted output values replacing
+ // the original values. We use DummyOutputs for this first run of finding
+ // inputs and outputs since the outputs could change during findAllocas,
+ // the correct set of extracted outputs will be in the final Outputs ValueSet.
+ SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
+ DummyOutputs;
+
+ // Use the code extractor to get the inputs and outputs, without sunken
+ // allocas or removing llvm.assumes.
+ CodeExtractor *CE = Region.CE;
+ CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
+ assert(Region.StartBB && "Region must have a start BasicBlock!");
+ Function *OrigF = Region.StartBB->getParent();
+ CodeExtractorAnalysisCache CEAC(*OrigF);
+ BasicBlock *Dummy = nullptr;
+
+ // The region may be ineligible due to VarArgs in the parent function. In this
+ // case we ignore the region.
+ if (!CE->isEligible()) {
+ Region.IgnoreRegion = true;
+ return;
+ }
+
+ // Find if any values are going to be sunk into the function when extracted
+ CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
+ CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
+
+ // TODO: Support regions with sunken allocas: values whose lifetimes are
+ // contained completely within the outlined region. These are not guaranteed
+ // to be the same in every region, so we must elevate them all to arguments
+ // when they appear. If these values are not equal, it means there is some
+ // Input in OverallInputs that was removed for ArgInputs.
+ if (OverallInputs.size() != PremappedInputs.size()) {
+ Region.IgnoreRegion = true;
+ return;
+ }
+
+ findConstants(C, NotSame, InputGVNs);
+
+ mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
+
+ remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
+ ArgInputs);
+
+ // Sort the GVNs, since we now have constants included in the \ref InputGVNs
+ // we need to make sure they are in a deterministic order.
+ stable_sort(InputGVNs);
+}
+
+/// Look over the inputs and map each input argument to an argument in the
+/// overall function for the OutlinableRegions. This creates a way to replace
+/// the arguments of the extracted function with the arguments of the new
+/// overall function.
+///
+/// \param [in,out] Region - The region of code to be analyzed.
+/// \param [in] InputsGVNs - The global value numbering of the input values
+/// collected.
+/// \param [in] ArgInputs - The values of the arguments to the extracted
+/// function.
+static void
+findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
+ std::vector<unsigned> &InputGVNs,
+ SetVector<Value *> &ArgInputs) {
+
+ IRSimilarityCandidate &C = *Region.Candidate;
+ OutlinableGroup &Group = *Region.Parent;
+
+ // This counts the argument number in the overall function.
+ unsigned TypeIndex = 0;
+
+ // This counts the argument number in the extracted function.
+ unsigned OriginalIndex = 0;
+
+ // Find the mapping of the extracted arguments to the arguments for the
+ // overall function. Since there may be extra arguments in the overall
+ // function to account for the extracted constants, we have two different
+ // counters as we find extracted arguments, and as we come across overall
+ // arguments.
+ for (unsigned InputVal : InputGVNs) {
+ Optional<Value *> InputOpt = C.fromGVN(InputVal);
+ assert(InputOpt.hasValue() && "Global value number not found?");
+ Value *Input = InputOpt.getValue();
+
+ if (!Group.InputTypesSet) {
+ Group.ArgumentTypes.push_back(Input->getType());
+ // If the input value has a swifterr attribute, make sure to mark the
+ // argument in the overall function.
+ if (Input->isSwiftError()) {
+ assert(
+ !Group.SwiftErrorArgument.hasValue() &&
+ "Argument already marked with swifterr for this OutlinableGroup!");
+ Group.SwiftErrorArgument = TypeIndex;
+ }
+ }
+
+ // Check if we have a constant. If we do add it to the overall argument
+ // number to Constant map for the region, and continue to the next input.
+ if (Constant *CST = dyn_cast<Constant>(Input)) {
+ Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
+ TypeIndex++;
+ continue;
+ }
+
+ // It is not a constant, we create the mapping from extracted argument list
+ // to the overall argument list.
+ assert(ArgInputs.count(Input) && "Input cannot be found!");
+
+ Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
+ Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
+ OriginalIndex++;
+ TypeIndex++;
+ }
+
+ // If the function type definitions for the OutlinableGroup holding the region
+ // have not been set, set the length of the inputs here. We should have the
+ // same inputs for all of the different regions contained in the
+ // OutlinableGroup since they are all structurally similar to one another.
+ if (!Group.InputTypesSet) {
+ Group.NumAggregateInputs = TypeIndex;
+ Group.InputTypesSet = true;
+ }
+
+ Region.NumExtractedInputs = OriginalIndex;
+}
+
+/// Create a mapping of the output arguments for the \p Region to the output
+/// arguments of the overall outlined function.
+///
+/// \param [in,out] Region - The region of code to be analyzed.
+/// \param [in] Outputs - The values found by the code extractor.
+static void
+findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
+ ArrayRef<Value *> Outputs) {
+ OutlinableGroup &Group = *Region.Parent;
+ IRSimilarityCandidate &C = *Region.Candidate;
+
+ // This counts the argument number in the extracted function.
+ unsigned OriginalIndex = Region.NumExtractedInputs;
+
+ // This counts the argument number in the overall function.
+ unsigned TypeIndex = Group.NumAggregateInputs;
+ bool TypeFound;
+ DenseSet<unsigned> AggArgsUsed;
+
+ // Iterate over the output types and identify if there is an aggregate pointer
+ // type whose base type matches the current output type. If there is, we mark
+ // that we will use this output register for this value. If not we add another
+ // type to the overall argument type list. We also store the GVNs used for
+ // stores to identify which values will need to be moved into an special
+ // block that holds the stores to the output registers.
+ for (Value *Output : Outputs) {
+ TypeFound = false;
+ // We can do this since it is a result value, and will have a number
+ // that is necessarily the same. BUT if in the future, the instructions
+ // do not have to be in same order, but are functionally the same, we will
+ // have to use a different scheme, as one-to-one correspondence is not
+ // guaranteed.
+ unsigned GlobalValue = C.getGVN(Output).getValue();
+ unsigned ArgumentSize = Group.ArgumentTypes.size();
+
+ for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
+ if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
+ continue;
+
+ if (AggArgsUsed.contains(Jdx))
+ continue;
+
+ TypeFound = true;
+ AggArgsUsed.insert(Jdx);
+ Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
+ Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
+ Region.GVNStores.push_back(GlobalValue);
+ break;
+ }
+
+ // We were unable to find an unused type in the output type set that matches
+ // the output, so we add a pointer type to the argument types of the overall
+ // function to handle this output and create a mapping to it.
+ if (!TypeFound) {
+ Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
+ AggArgsUsed.insert(Group.ArgumentTypes.size() - 1);
+ Region.ExtractedArgToAgg.insert(
+ std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1));
+ Region.AggArgToExtracted.insert(
+ std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex));
+ Region.GVNStores.push_back(GlobalValue);
+ }
+
+ stable_sort(Region.GVNStores);
+ OriginalIndex++;
+ TypeIndex++;
+ }
+}
+
+void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
+ DenseSet<unsigned> &NotSame) {
+ std::vector<unsigned> Inputs;
+ SetVector<Value *> ArgInputs, Outputs;
+
+ getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
+ Outputs);
+
+ if (Region.IgnoreRegion)
+ return;
+
+ // Map the inputs found by the CodeExtractor to the arguments found for
+ // the overall function.
+ findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
+
+ // Map the outputs found by the CodeExtractor to the arguments found for
+ // the overall function.
+ findExtractedOutputToOverallOutputMapping(Region, Outputs.getArrayRef());
+}
+
+/// Replace the extracted function in the Region with a call to the overall
+/// function constructed from the deduplicated similar regions, replacing and
+/// remapping the values passed to the extracted function as arguments to the
+/// new arguments of the overall function.
+///
+/// \param [in] M - The module to outline from.
+/// \param [in] Region - The regions of extracted code to be replaced with a new
+/// function.
+/// \returns a call instruction with the replaced function.
+CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
+ std::vector<Value *> NewCallArgs;
+ DenseMap<unsigned, unsigned>::iterator ArgPair;
+
+ OutlinableGroup &Group = *Region.Parent;
+ CallInst *Call = Region.Call;
+ assert(Call && "Call to replace is nullptr?");
+ Function *AggFunc = Group.OutlinedFunction;
+ assert(AggFunc && "Function to replace with is nullptr?");
+
+ // If the arguments are the same size, there are not values that need to be
+ // made argument, or different output registers to handle. We can simply
+ // replace the called function in this case.
+ if (AggFunc->arg_size() == Call->arg_size()) {
+ LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
+ << *AggFunc << " with same number of arguments\n");
+ Call->setCalledFunction(AggFunc);
+ return Call;
+ }
+
+ // We have a different number of arguments than the new function, so
+ // we need to use our previously mappings off extracted argument to overall
+ // function argument, and constants to overall function argument to create the
+ // new argument list.
+ for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
+
+ if (AggArgIdx == AggFunc->arg_size() - 1 &&
+ Group.OutputGVNCombinations.size() > 1) {
+ // If we are on the last argument, and we need to differentiate between
+ // output blocks, add an integer to the argument list to determine
+ // what block to take
+ LLVM_DEBUG(dbgs() << "Set switch block argument to "
+ << Region.OutputBlockNum << "\n");
+ NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
+ Region.OutputBlockNum));
+ continue;
+ }
+
+ ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
+ if (ArgPair != Region.AggArgToExtracted.end()) {
+ Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
+ // If we found the mapping from the extracted function to the overall
+ // function, we simply add it to the argument list. We use the same
+ // value, it just needs to honor the new order of arguments.
+ LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
+ << *ArgumentValue << "\n");
+ NewCallArgs.push_back(ArgumentValue);
+ continue;
+ }
+
+ // If it is a constant, we simply add it to the argument list as a value.
+ if (Region.AggArgToConstant.find(AggArgIdx) !=
+ Region.AggArgToConstant.end()) {
+ Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
+ LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
+ << *CST << "\n");
+ NewCallArgs.push_back(CST);
+ continue;
+ }
+
+ // Add a nullptr value if the argument is not found in the extracted
+ // function. If we cannot find a value, it means it is not in use
+ // for the region, so we should not pass anything to it.
+ LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
+ NewCallArgs.push_back(ConstantPointerNull::get(
+ static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
+ }
+
+ LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
+ << *AggFunc << " with new set of arguments\n");
+ // Create the new call instruction and erase the old one.
+ Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
+ Call);
+
+ // It is possible that the call to the outlined function is either the first
+ // instruction is in the new block, the last instruction, or both. If either
+ // of these is the case, we need to make sure that we replace the instruction
+ // in the IRInstructionData struct with the new call.
+ CallInst *OldCall = Region.Call;
+ if (Region.NewFront->Inst == OldCall)
+ Region.NewFront->Inst = Call;
+ if (Region.NewBack->Inst == OldCall)
+ Region.NewBack->Inst = Call;
+
+ // Transfer any debug information.
+ Call->setDebugLoc(Region.Call->getDebugLoc());
+
+ // Remove the old instruction.
+ OldCall->eraseFromParent();
+ Region.Call = Call;
+
+ // Make sure that the argument in the new function has the SwiftError
+ // argument.
+ if (Group.SwiftErrorArgument.hasValue())
+ Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
+ Attribute::SwiftError);
+
+ return Call;
+}
+
+// Within an extracted function, replace the argument uses of the extracted
+// region with the arguments of the function for an OutlinableGroup.
+//
+/// \param [in] Region - The region of extracted code to be changed.
+/// \param [in,out] OutputBB - The BasicBlock for the output stores for this
+/// region.
+static void replaceArgumentUses(OutlinableRegion &Region,
+ BasicBlock *OutputBB) {
+ OutlinableGroup &Group = *Region.Parent;
+ assert(Region.ExtractedFunction && "Region has no extracted function?");
+
+ for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
+ ArgIdx++) {
+ assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
+ Region.ExtractedArgToAgg.end() &&
+ "No mapping from extracted to outlined?");
+ unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
+ Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
+ Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
+ // The argument is an input, so we can simply replace it with the overall
+ // argument value
+ if (ArgIdx < Region.NumExtractedInputs) {
+ LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
+ << *Region.ExtractedFunction << " with " << *AggArg
+ << " in function " << *Group.OutlinedFunction << "\n");
+ Arg->replaceAllUsesWith(AggArg);
+ continue;
+ }
+
+ // If we are replacing an output, we place the store value in its own
+ // block inside the overall function before replacing the use of the output
+ // in the function.
+ assert(Arg->hasOneUse() && "Output argument can only have one use");
+ User *InstAsUser = Arg->user_back();
+ assert(InstAsUser && "User is nullptr!");
+
+ Instruction *I = cast<Instruction>(InstAsUser);
+ I->setDebugLoc(DebugLoc());
+ LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
+ << *OutputBB << "\n");
+
+ I->moveBefore(*OutputBB, OutputBB->end());
+
+ LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
+ << *Region.ExtractedFunction << " with " << *AggArg
+ << " in function " << *Group.OutlinedFunction << "\n");
+ Arg->replaceAllUsesWith(AggArg);
+ }
+}
+
+/// Within an extracted function, replace the constants that need to be lifted
+/// into arguments with the actual argument.
+///
+/// \param Region [in] - The region of extracted code to be changed.
+void replaceConstants(OutlinableRegion &Region) {
+ OutlinableGroup &Group = *Region.Parent;
+ // Iterate over the constants that need to be elevated into arguments
+ for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
+ unsigned AggArgIdx = Const.first;
+ Function *OutlinedFunction = Group.OutlinedFunction;
+ assert(OutlinedFunction && "Overall Function is not defined?");
+ Constant *CST = Const.second;
+ Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
+ // Identify the argument it will be elevated to, and replace instances of
+ // that constant in the function.
+
+ // TODO: If in the future constants do not have one global value number,
+ // i.e. a constant 1 could be mapped to several values, this check will
+ // have to be more strict. It cannot be using only replaceUsesWithIf.
+
+ LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
+ << " in function " << *OutlinedFunction << " with "
+ << *Arg << "\n");
+ CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
+ if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+ return I->getFunction() == OutlinedFunction;
+ return false;
+ });
+ }
+}
+
+/// For the given function, find all the nondebug or lifetime instructions,
+/// and return them as a vector. Exclude any blocks in \p ExludeBlocks.
+///
+/// \param [in] F - The function we collect the instructions from.
+/// \param [in] ExcludeBlocks - BasicBlocks to ignore.
+/// \returns the list of instructions extracted.
+static std::vector<Instruction *>
+collectRelevantInstructions(Function &F,
+ DenseSet<BasicBlock *> &ExcludeBlocks) {
+ std::vector<Instruction *> RelevantInstructions;
+
+ for (BasicBlock &BB : F) {
+ if (ExcludeBlocks.contains(&BB))
+ continue;
+
+ for (Instruction &Inst : BB) {
+ if (Inst.isLifetimeStartOrEnd())
+ continue;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ RelevantInstructions.push_back(&Inst);
+ }
+ }
+
+ return RelevantInstructions;
+}
+
+/// It is possible that there is a basic block that already performs the same
+/// stores. This returns a duplicate block, if it exists
+///
+/// \param OutputBB [in] the block we are looking for a duplicate of.
+/// \param OutputStoreBBs [in] The existing output blocks.
+/// \returns an optional value with the number output block if there is a match.
+Optional<unsigned>
+findDuplicateOutputBlock(BasicBlock *OutputBB,
+ ArrayRef<BasicBlock *> OutputStoreBBs) {
+
+ bool WrongInst = false;
+ bool WrongSize = false;
+ unsigned MatchingNum = 0;
+ for (BasicBlock *CompBB : OutputStoreBBs) {
+ WrongInst = false;
+ if (CompBB->size() - 1 != OutputBB->size()) {
+ WrongSize = true;
+ MatchingNum++;
+ continue;
+ }
+
+ WrongSize = false;
+ BasicBlock::iterator NIt = OutputBB->begin();
+ for (Instruction &I : *CompBB) {
+ if (isa<BranchInst>(&I))
+ continue;
+
+ if (!I.isIdenticalTo(&(*NIt))) {
+ WrongInst = true;
+ break;
+ }
+
+ NIt++;
+ }
+ if (!WrongInst && !WrongSize)
+ return MatchingNum;
+
+ MatchingNum++;
+ }
+
+ return None;
+}
+
+/// For the outlined section, move needed the StoreInsts for the output
+/// registers into their own block. Then, determine if there is a duplicate
+/// output block already created.
+///
+/// \param [in] OG - The OutlinableGroup of regions to be outlined.
+/// \param [in] Region - The OutlinableRegion that is being analyzed.
+/// \param [in,out] OutputBB - the block that stores for this region will be
+/// placed in.
+/// \param [in] EndBB - the final block of the extracted function.
+/// \param [in] OutputMappings - OutputMappings the mapping of values that have
+/// been replaced by a new output value.
+/// \param [in,out] OutputStoreBBs - The existing output blocks.
+static void
+alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
+ BasicBlock *OutputBB, BasicBlock *EndBB,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ std::vector<BasicBlock *> &OutputStoreBBs) {
+ DenseSet<unsigned> ValuesToFind(Region.GVNStores.begin(),
+ Region.GVNStores.end());
+
+ // We iterate over the instructions in the extracted function, and find the
+ // global value number of the instructions. If we find a value that should
+ // be contained in a store, we replace the uses of the value with the value
+ // from the overall function, so that the store is storing the correct
+ // value from the overall function.
+ DenseSet<BasicBlock *> ExcludeBBs(OutputStoreBBs.begin(),
+ OutputStoreBBs.end());
+ ExcludeBBs.insert(OutputBB);
+ std::vector<Instruction *> ExtractedFunctionInsts =
+ collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs);
+ std::vector<Instruction *> OverallFunctionInsts =
+ collectRelevantInstructions(*OG.OutlinedFunction, ExcludeBBs);
+
+ assert(ExtractedFunctionInsts.size() == OverallFunctionInsts.size() &&
+ "Number of relevant instructions not equal!");
+
+ unsigned NumInstructions = ExtractedFunctionInsts.size();
+ for (unsigned Idx = 0; Idx < NumInstructions; Idx++) {
+ Value *V = ExtractedFunctionInsts[Idx];
+
+ if (OutputMappings.find(V) != OutputMappings.end())
+ V = OutputMappings.find(V)->second;
+ Optional<unsigned> GVN = Region.Candidate->getGVN(V);
+
+ // If we have found one of the stored values for output, replace the value
+ // with the corresponding one from the overall function.
+ if (GVN.hasValue() && ValuesToFind.erase(GVN.getValue())) {
+ V->replaceAllUsesWith(OverallFunctionInsts[Idx]);
+ if (ValuesToFind.size() == 0)
+ break;
+ }
+
+ if (ValuesToFind.size() == 0)
+ break;
+ }
+
+ assert(ValuesToFind.size() == 0 && "Not all store values were handled!");
+
+ // If the size of the block is 0, then there are no stores, and we do not
+ // need to save this block.
+ if (OutputBB->size() == 0) {
+ Region.OutputBlockNum = -1;
+ OutputBB->eraseFromParent();
+ return;
+ }
+
+ // Determine is there is a duplicate block.
+ Optional<unsigned> MatchingBB =
+ findDuplicateOutputBlock(OutputBB, OutputStoreBBs);
+
+ // If there is, we remove the new output block. If it does not,
+ // we add it to our list of output blocks.
+ if (MatchingBB.hasValue()) {
+ LLVM_DEBUG(dbgs() << "Set output block for region in function"
+ << Region.ExtractedFunction << " to "
+ << MatchingBB.getValue());
+
+ Region.OutputBlockNum = MatchingBB.getValue();
+ OutputBB->eraseFromParent();
+ return;
+ }
+
+ Region.OutputBlockNum = OutputStoreBBs.size();
+
+ LLVM_DEBUG(dbgs() << "Create output block for region in"
+ << Region.ExtractedFunction << " to "
+ << *OutputBB);
+ OutputStoreBBs.push_back(OutputBB);
+ BranchInst::Create(EndBB, OutputBB);
+}
+
+/// Create the switch statement for outlined function to differentiate between
+/// all the output blocks.
+///
+/// For the outlined section, determine if an outlined block already exists that
+/// matches the needed stores for the extracted section.
+/// \param [in] M - The module we are outlining from.
+/// \param [in] OG - The group of regions to be outlined.
+/// \param [in] OS - The region that is being analyzed.
+/// \param [in] EndBB - The final block of the extracted function.
+/// \param [in,out] OutputStoreBBs - The existing output blocks.
+void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
+ ArrayRef<BasicBlock *> OutputStoreBBs) {
+ // We only need the switch statement if there is more than one store
+ // combination.
+ if (OG.OutputGVNCombinations.size() > 1) {
+ Function *AggFunc = OG.OutlinedFunction;
+ // Create a final block
+ BasicBlock *ReturnBlock =
+ BasicBlock::Create(M.getContext(), "final_block", AggFunc);
+ Instruction *Term = EndBB->getTerminator();
+ Term->moveBefore(*ReturnBlock, ReturnBlock->end());
+ // Put the switch statement in the old end basic block for the function with
+ // a fall through to the new return block
+ LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
+ << OutputStoreBBs.size() << "\n");
+ SwitchInst *SwitchI =
+ SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
+ ReturnBlock, OutputStoreBBs.size(), EndBB);
+
+ unsigned Idx = 0;
+ for (BasicBlock *BB : OutputStoreBBs) {
+ SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
+ BB);
+ Term = BB->getTerminator();
+ Term->setSuccessor(0, ReturnBlock);
+ Idx++;
+ }
+ return;
+ }
+
+ // If there needs to be stores, move them from the output block to the end
+ // block to save on branching instructions.
+ if (OutputStoreBBs.size() == 1) {
+ LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
+ << *OG.OutlinedFunction << "\n");
+ BasicBlock *OutputBlock = OutputStoreBBs[0];
+ Instruction *Term = OutputBlock->getTerminator();
+ Term->eraseFromParent();
+ Term = EndBB->getTerminator();
+ moveBBContents(*OutputBlock, *EndBB);
+ Term->moveBefore(*EndBB, EndBB->end());
+ OutputBlock->eraseFromParent();
+ }
+}
+
+/// Fill the new function that will serve as the replacement function for all of
+/// the extracted regions of a certain structure from the first region in the
+/// list of regions. Replace this first region's extracted function with the
+/// new overall function.
+///
+/// \param [in] M - The module we are outlining from.
+/// \param [in] CurrentGroup - The group of regions to be outlined.
+/// \param [in,out] OutputStoreBBs - The output blocks for each different
+/// set of stores needed for the different functions.
+/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
+/// once outlining is complete.
+static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup,
+ std::vector<BasicBlock *> &OutputStoreBBs,
+ std::vector<Function *> &FuncsToRemove) {
+ OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
+
+ // Move first extracted function's instructions into new function.
+ LLVM_DEBUG(dbgs() << "Move instructions from "
+ << *CurrentOS->ExtractedFunction << " to instruction "
+ << *CurrentGroup.OutlinedFunction << "\n");
+
+ CurrentGroup.EndBB = moveFunctionData(*CurrentOS->ExtractedFunction,
+ *CurrentGroup.OutlinedFunction);
+
+ // Transfer the attributes from the function to the new function.
+ for (Attribute A :
+ CurrentOS->ExtractedFunction->getAttributes().getFnAttributes())
+ CurrentGroup.OutlinedFunction->addFnAttr(A);
+
+ // Create an output block for the first extracted function.
+ BasicBlock *NewBB = BasicBlock::Create(
+ M.getContext(), Twine("output_block_") + Twine(static_cast<unsigned>(0)),
+ CurrentGroup.OutlinedFunction);
+ CurrentOS->OutputBlockNum = 0;
+
+ replaceArgumentUses(*CurrentOS, NewBB);
+ replaceConstants(*CurrentOS);
+
+ // If the new basic block has no new stores, we can erase it from the module.
+ // It it does, we create a branch instruction to the last basic block from the
+ // new one.
+ if (NewBB->size() == 0) {
+ CurrentOS->OutputBlockNum = -1;
+ NewBB->eraseFromParent();
+ } else {
+ BranchInst::Create(CurrentGroup.EndBB, NewBB);
+ OutputStoreBBs.push_back(NewBB);
+ }
+
+ // Replace the call to the extracted function with the outlined function.
+ CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
+
+ // We only delete the extracted functions at the end since we may need to
+ // reference instructions contained in them for mapping purposes.
+ FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
+}
+
+void IROutliner::deduplicateExtractedSections(
+ Module &M, OutlinableGroup &CurrentGroup,
+ std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
+ createFunction(M, CurrentGroup, OutlinedFunctionNum);
+
+ std::vector<BasicBlock *> OutputStoreBBs;
+
+ OutlinableRegion *CurrentOS;
+
+ fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
+
+ for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
+ CurrentOS = CurrentGroup.Regions[Idx];
+ AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
+ *CurrentOS->ExtractedFunction);
+
+ // Create a new BasicBlock to hold the needed store instructions.
+ BasicBlock *NewBB = BasicBlock::Create(
+ M.getContext(), "output_block_" + std::to_string(Idx),
+ CurrentGroup.OutlinedFunction);
+ replaceArgumentUses(*CurrentOS, NewBB);
+
+ alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
+ CurrentGroup.EndBB, OutputMappings,
+ OutputStoreBBs);
+
+ CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
+ FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
+ }
+
+ // Create a switch statement to handle the different output schemes.
+ createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
+
+ OutlinedFunctionNum++;
+}
+
+void IROutliner::pruneIncompatibleRegions(
+ std::vector<IRSimilarityCandidate> &CandidateVec,
+ OutlinableGroup &CurrentGroup) {
+ bool PreviouslyOutlined;
+
+ // Sort from beginning to end, so the IRSimilarityCandidates are in order.
+ stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
+ const IRSimilarityCandidate &RHS) {
+ return LHS.getStartIdx() < RHS.getStartIdx();
+ });
+
+ unsigned CurrentEndIdx = 0;
+ for (IRSimilarityCandidate &IRSC : CandidateVec) {
+ PreviouslyOutlined = false;
+ unsigned StartIdx = IRSC.getStartIdx();
+ unsigned EndIdx = IRSC.getEndIdx();
+
+ for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+ if (Outlined.contains(Idx)) {
+ PreviouslyOutlined = true;
+ break;
+ }
+
+ if (PreviouslyOutlined)
+ continue;
+
+ // TODO: If in the future we can outline across BasicBlocks, we will need to
+ // check all BasicBlocks contained in the region.
+ if (IRSC.getStartBB()->hasAddressTaken())
+ continue;
+
+ if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
+ !OutlineFromLinkODRs)
+ continue;
+
+ // Greedily prune out any regions that will overlap with already chosen
+ // regions.
+ if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
+ continue;
+
+ bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
+ // We check if there is a discrepancy between the InstructionDataList
+ // and the actual next instruction in the module. If there is, it means
+ // that an extra instruction was added, likely by the CodeExtractor.
+
+ // Since we do not have any similarity data about this particular
+ // instruction, we cannot confidently outline it, and must discard this
+ // candidate.
+ if (std::next(ID.getIterator())->Inst !=
+ ID.Inst->getNextNonDebugInstruction())
+ return true;
+ return !this->InstructionClassifier.visit(ID.Inst);
+ });
+
+ if (BadInst)
+ continue;
+
+ OutlinableRegion *OS = new (RegionAllocator.Allocate())
+ OutlinableRegion(IRSC, CurrentGroup);
+ CurrentGroup.Regions.push_back(OS);
+
+ CurrentEndIdx = EndIdx;
+ }
+}
+
+InstructionCost
+IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
+ InstructionCost RegionBenefit = 0;
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
+ // We add the number of instructions in the region to the benefit as an
+ // estimate as to how much will be removed.
+ RegionBenefit += Region->getBenefit(TTI);
+ LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
+ << " saved instructions to overfall benefit.\n");
+ }
+
+ return RegionBenefit;
+}
+
+InstructionCost
+IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
+ InstructionCost OverallCost = 0;
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
+
+ // Each output incurs a load after the call, so we add that to the cost.
+ for (unsigned OutputGVN : Region->GVNStores) {
+ Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
+ assert(OV.hasValue() && "Could not find value for GVN?");
+ Value *V = OV.getValue();
+ InstructionCost LoadCost =
+ TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
+ TargetTransformInfo::TCK_CodeSize);
+
+ LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
+ << " instructions to cost for output of type "
+ << *V->getType() << "\n");
+ OverallCost += LoadCost;
+ }
+ }
+
+ return OverallCost;
+}
+
+/// Find the extra instructions needed to handle any output values for the
+/// region.
+///
+/// \param [in] M - The Module to outline from.
+/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
+/// \param [in] TTI - The TargetTransformInfo used to collect information for
+/// new instruction costs.
+/// \returns the additional cost to handle the outputs.
+static InstructionCost findCostForOutputBlocks(Module &M,
+ OutlinableGroup &CurrentGroup,
+ TargetTransformInfo &TTI) {
+ InstructionCost OutputCost = 0;
+
+ for (const ArrayRef<unsigned> &OutputUse :
+ CurrentGroup.OutputGVNCombinations) {
+ IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
+ for (unsigned GVN : OutputUse) {
+ Optional<Value *> OV = Candidate.fromGVN(GVN);
+ assert(OV.hasValue() && "Could not find value for GVN?");
+ Value *V = OV.getValue();
+ InstructionCost StoreCost =
+ TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
+ TargetTransformInfo::TCK_CodeSize);
+
+ // An instruction cost is added for each store set that needs to occur for
+ // various output combinations inside the function, plus a branch to
+ // return to the exit block.
+ LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
+ << " instructions to cost for output of type "
+ << *V->getType() << "\n");
+ OutputCost += StoreCost;
+ }
+
+ InstructionCost BranchCost =
+ TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
+ LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
+ << " a branch instruction\n");
+ OutputCost += BranchCost;
+ }
+
+ // If there is more than one output scheme, we must have a comparison and
+ // branch for each different item in the switch statement.
+ if (CurrentGroup.OutputGVNCombinations.size() > 1) {
+ InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
+ Instruction::ICmp, Type::getInt32Ty(M.getContext()),
+ Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
+ TargetTransformInfo::TCK_CodeSize);
+ InstructionCost BranchCost =
+ TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
+
+ unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
+ InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
+
+ LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
+ << " instructions for each switch case for each different"
+ << " output path in a function\n");
+ OutputCost += TotalCost;
+ }
+
+ return OutputCost;
+}
+
+void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
+ InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
+ CurrentGroup.Benefit += RegionBenefit;
+ LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
+
+ InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
+ CurrentGroup.Cost += OutputReloadCost;
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+
+ InstructionCost AverageRegionBenefit =
+ RegionBenefit / CurrentGroup.Regions.size();
+ unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
+ unsigned NumRegions = CurrentGroup.Regions.size();
+ TargetTransformInfo &TTI =
+ getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
+
+ // We add one region to the cost once, to account for the instructions added
+ // inside of the newly created function.
+ LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
+ << " instructions to cost for body of new function.\n");
+ CurrentGroup.Cost += AverageRegionBenefit;
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+
+ // For each argument, we must add an instruction for loading the argument
+ // out of the register and into a value inside of the newly outlined function.
+ LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
+ << " instructions to cost for each argument in the new"
+ << " function.\n");
+ CurrentGroup.Cost +=
+ OverallArgumentNum * TargetTransformInfo::TCC_Basic;
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+
+ // Each argument needs to either be loaded into a register or onto the stack.
+ // Some arguments will only be loaded into the stack once the argument
+ // registers are filled.
+ LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
+ << " instructions to cost for each argument in the new"
+ << " function " << NumRegions << " times for the "
+ << "needed argument handling at the call site.\n");
+ CurrentGroup.Cost +=
+ 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+
+ CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+}
+
+void IROutliner::updateOutputMapping(OutlinableRegion &Region,
+ ArrayRef<Value *> Outputs,
+ LoadInst *LI) {
+ // For and load instructions following the call
+ Value *Operand = LI->getPointerOperand();
+ Optional<unsigned> OutputIdx = None;
+ // Find if the operand it is an output register.
+ for (unsigned ArgIdx = Region.NumExtractedInputs;
+ ArgIdx < Region.Call->arg_size(); ArgIdx++) {
+ if (Operand == Region.Call->getArgOperand(ArgIdx)) {
+ OutputIdx = ArgIdx - Region.NumExtractedInputs;
+ break;
+ }
+ }
+
+ // If we found an output register, place a mapping of the new value
+ // to the original in the mapping.
+ if (!OutputIdx.hasValue())
+ return;
+
+ if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
+ OutputMappings.end()) {
+ LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
+ << *Outputs[OutputIdx.getValue()] << "\n");
+ OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
+ } else {
+ Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
+ LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
+ << *Outputs[OutputIdx.getValue()] << "\n");
+ OutputMappings.insert(std::make_pair(LI, Orig));
+ }
+}
+
+bool IROutliner::extractSection(OutlinableRegion &Region) {
+ SetVector<Value *> ArgInputs, Outputs, SinkCands;
+ Region.CE->findInputsOutputs(ArgInputs, Outputs, SinkCands);
+
+ assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
+ assert(Region.FollowBB && "FollowBB for the OutlinableRegion is nullptr!");
+ Function *OrigF = Region.StartBB->getParent();
+ CodeExtractorAnalysisCache CEAC(*OrigF);
+ Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC);
+
+ // If the extraction was successful, find the BasicBlock, and reassign the
+ // OutlinableRegion blocks
+ if (!Region.ExtractedFunction) {
+ LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
+ << "\n");
+ Region.reattachCandidate();
+ return false;
+ }
+
+ BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor();
+ Region.StartBB = RewrittenBB;
+ Region.EndBB = RewrittenBB;
+
+ // The sequences of outlinable regions has now changed. We must fix the
+ // IRInstructionDataList for consistency. Although they may not be illegal
+ // instructions, they should not be compared with anything else as they
+ // should not be outlined in this round. So marking these as illegal is
+ // allowed.
+ IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
+ Instruction *BeginRewritten = &*RewrittenBB->begin();
+ Instruction *EndRewritten = &*RewrittenBB->begin();
+ Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
+ *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
+ Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
+ *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
+
+ // Insert the first IRInstructionData of the new region in front of the
+ // first IRInstructionData of the IRSimilarityCandidate.
+ IDL->insert(Region.Candidate->begin(), *Region.NewFront);
+ // Insert the first IRInstructionData of the new region after the
+ // last IRInstructionData of the IRSimilarityCandidate.
+ IDL->insert(Region.Candidate->end(), *Region.NewBack);
+ // Remove the IRInstructionData from the IRSimilarityCandidate.
+ IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
+
+ assert(RewrittenBB != nullptr &&
+ "Could not find a predecessor after extraction!");
+
+ // Iterate over the new set of instructions to find the new call
+ // instruction.
+ for (Instruction &I : *RewrittenBB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (Region.ExtractedFunction == CI->getCalledFunction())
+ Region.Call = CI;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+ updateOutputMapping(Region, Outputs.getArrayRef(), LI);
+ Region.reattachCandidate();
+ return true;
+}
+
+unsigned IROutliner::doOutline(Module &M) {
+ // Find the possible similarity sections.
+ IRSimilarityIdentifier &Identifier = getIRSI(M);
+ SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
+
+ // Sort them by size of extracted sections
+ unsigned OutlinedFunctionNum = 0;
+ // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
+ // to sort them by the potential number of instructions to be outlined
+ if (SimilarityCandidates.size() > 1)
+ llvm::stable_sort(SimilarityCandidates,
+ [](const std::vector<IRSimilarityCandidate> &LHS,
+ const std::vector<IRSimilarityCandidate> &RHS) {
+ return LHS[0].getLength() * LHS.size() >
+ RHS[0].getLength() * RHS.size();
+ });
+
+ DenseSet<unsigned> NotSame;
+ std::vector<Function *> FuncsToRemove;
+ // Iterate over the possible sets of similarity.
+ for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
+ OutlinableGroup CurrentGroup;
+
+ // Remove entries that were previously outlined
+ pruneIncompatibleRegions(CandidateVec, CurrentGroup);
+
+ // We pruned the number of regions to 0 to 1, meaning that it's not worth
+ // trying to outlined since there is no compatible similar instance of this
+ // code.
+ if (CurrentGroup.Regions.size() < 2)
+ continue;
+
+ // Determine if there are any values that are the same constant throughout
+ // each section in the set.
+ NotSame.clear();
+ CurrentGroup.findSameConstants(NotSame);
+
+ if (CurrentGroup.IgnoreGroup)
+ continue;
+
+ // Create a CodeExtractor for each outlinable region. Identify inputs and
+ // outputs for each section using the code extractor and create the argument
+ // types for the Aggregate Outlining Function.
+ std::vector<OutlinableRegion *> OutlinedRegions;
+ for (OutlinableRegion *OS : CurrentGroup.Regions) {
+ // Break the outlinable region out of its parent BasicBlock into its own
+ // BasicBlocks (see function implementation).
+ OS->splitCandidate();
+ std::vector<BasicBlock *> BE = {OS->StartBB};
+ OS->CE = new (ExtractorAllocator.Allocate())
+ CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
+ false, "outlined");
+ findAddInputsOutputs(M, *OS, NotSame);
+ if (!OS->IgnoreRegion)
+ OutlinedRegions.push_back(OS);
+ else
+ OS->reattachCandidate();
+ }
+
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+
+ if (CurrentGroup.Regions.empty())
+ continue;
+
+ CurrentGroup.collectGVNStoreSets(M);
+
+ if (CostModel)
+ findCostBenefit(M, CurrentGroup);
+
+ // If we are adhering to the cost model, reattach all the candidates
+ if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
+ for (OutlinableRegion *OS : CurrentGroup.Regions)
+ OS->reattachCandidate();
+ OptimizationRemarkEmitter &ORE = getORE(
+ *CurrentGroup.Regions[0]->Candidate->getFunction());
+ ORE.emit([&]() {
+ IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
+ OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
+ C->frontInstruction());
+ R << "did not outline "
+ << ore::NV(std::to_string(CurrentGroup.Regions.size()))
+ << " regions due to estimated increase of "
+ << ore::NV("InstructionIncrease",
+ CurrentGroup.Cost - CurrentGroup.Benefit)
+ << " instructions at locations ";
+ interleave(
+ CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
+ [&R](OutlinableRegion *Region) {
+ R << ore::NV(
+ "DebugLoc",
+ Region->Candidate->frontInstruction()->getDebugLoc());
+ },
+ [&R]() { R << " "; });
+ return R;
+ });
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
+ << " and benefit " << CurrentGroup.Benefit << "\n");
+
+ // Create functions out of all the sections, and mark them as outlined.
+ OutlinedRegions.clear();
+ for (OutlinableRegion *OS : CurrentGroup.Regions) {
+ bool FunctionOutlined = extractSection(*OS);
+ if (FunctionOutlined) {
+ unsigned StartIdx = OS->Candidate->getStartIdx();
+ unsigned EndIdx = OS->Candidate->getEndIdx();
+ for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+ Outlined.insert(Idx);
+
+ OutlinedRegions.push_back(OS);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
+ << " with benefit " << CurrentGroup.Benefit
+ << " and cost " << CurrentGroup.Cost << "\n");
+
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+
+ if (CurrentGroup.Regions.empty())
+ continue;
+
+ OptimizationRemarkEmitter &ORE =
+ getORE(*CurrentGroup.Regions[0]->Call->getFunction());
+ ORE.emit([&]() {
+ IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
+ OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
+ R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
+ << " regions with decrease of "
+ << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
+ << " instructions at locations ";
+ interleave(
+ CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
+ [&R](OutlinableRegion *Region) {
+ R << ore::NV("DebugLoc",
+ Region->Candidate->frontInstruction()->getDebugLoc());
+ },
+ [&R]() { R << " "; });
+ return R;
+ });
+
+ deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
+ OutlinedFunctionNum);
+ }
+
+ for (Function *F : FuncsToRemove)
+ F->eraseFromParent();
+
+ return OutlinedFunctionNum;
+}
+
+bool IROutliner::run(Module &M) {
+ CostModel = !NoCostModel;
+ OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
+
+ return doOutline(M) > 0;
+}
+
+// Pass Manager Boilerplate
+class IROutlinerLegacyPass : public ModulePass {
+public:
+ static char ID;
+ IROutlinerLegacyPass() : ModulePass(ID) {
+ initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<IRSimilarityIdentifierWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override;
+};
+
+bool IROutlinerLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ std::unique_ptr<OptimizationRemarkEmitter> ORE;
+ auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
+ ORE.reset(new OptimizationRemarkEmitter(&F));
+ return *ORE.get();
+ };
+
+ auto GTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+
+ auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
+ return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
+ };
+
+ return IROutliner(GTTI, GIRSI, GORE).run(M);
+}
+
+PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ std::function<TargetTransformInfo &(Function &)> GTTI =
+ [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
+ [&AM](Module &M) -> IRSimilarityIdentifier & {
+ return AM.getResult<IRSimilarityAnalysis>(M);
+ };
+
+ std::unique_ptr<OptimizationRemarkEmitter> ORE;
+ std::function<OptimizationRemarkEmitter &(Function &)> GORE =
+ [&ORE](Function &F) -> OptimizationRemarkEmitter & {
+ ORE.reset(new OptimizationRemarkEmitter(&F));
+ return *ORE.get();
+ };
+
+ if (IROutliner(GTTI, GIRSI, GORE).run(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+char IROutlinerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
+ false)
+
+ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
index 7d2260f4c169..e91b6c9b1d26 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -37,6 +36,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -60,7 +60,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
@@ -91,24 +90,14 @@ static cl::opt<bool>
DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
cl::init(false), cl::Hidden);
-namespace {
+extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
-enum class InlinerFunctionImportStatsOpts {
- No = 0,
- Basic = 1,
- Verbose = 2,
-};
-
-} // end anonymous namespace
-
-static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
- "inliner-function-import-stats",
- cl::init(InlinerFunctionImportStatsOpts::No),
- cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic",
- "basic statistics"),
- clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
- "printing of statistics for each inlined function")),
- cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
+static cl::opt<std::string> CGSCCInlineReplayFile(
+ "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
+ cl::desc(
+ "Optimization remarks file containing inline remarks to be replayed "
+ "by inlining from cgscc inline remarks."),
+ cl::Hidden);
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
@@ -648,17 +637,12 @@ bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
return true;
}
-InlinerPass::~InlinerPass() {
- if (ImportedFunctionsStats) {
- assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No);
- ImportedFunctionsStats->dump(InlinerFunctionImportStats ==
- InlinerFunctionImportStatsOpts::Verbose);
- }
-}
-
InlineAdvisor &
InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
FunctionAnalysisManager &FAM, Module &M) {
+ if (OwnedAdvisor)
+ return *OwnedAdvisor;
+
auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
if (!IAA) {
// It should still be possible to run the inliner as a stand-alone SCC pass,
@@ -669,8 +653,16 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
// duration of the inliner pass, and thus the lifetime of the owned advisor.
// The one we would get from the MAM can be invalidated as a result of the
// inliner's activity.
- OwnedDefaultAdvisor.emplace(FAM, getInlineParams());
- return *OwnedDefaultAdvisor;
+ OwnedAdvisor =
+ std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
+
+ if (!CGSCCInlineReplayFile.empty())
+ OwnedAdvisor = std::make_unique<ReplayInlineAdvisor>(
+ M, FAM, M.getContext(), std::move(OwnedAdvisor),
+ CGSCCInlineReplayFile,
+ /*EmitRemarks=*/true);
+
+ return *OwnedAdvisor;
}
assert(IAA->getAdvisor() &&
"Expected a present InlineAdvisorAnalysis also have an "
@@ -698,20 +690,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); });
- if (!ImportedFunctionsStats &&
- InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
- ImportedFunctionsStats =
- std::make_unique<ImportedFunctionsInliningStatistics>();
- ImportedFunctionsStats->setModuleInfo(M);
- }
-
// We use a single common worklist for calls across the entire SCC. We
// process these in-order and append new calls introduced during inlining to
// the end.
//
// Note that this particular order of processing is actually critical to
// avoid very bad behaviors. Consider *highly connected* call graphs where
- // each function contains a small amonut of code and a couple of calls to
+ // each function contains a small amount of code and a couple of calls to
// other functions. Because the LLVM inliner is fundamentally a bottom-up
// inliner, it can handle gracefully the fact that these all appear to be
// reasonable inlining candidates as it will flatten things until they become
@@ -761,9 +746,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (Calls.empty())
return PreservedAnalyses::all();
- // Capture updatable variables for the current SCC and RefSCC.
+ // Capture updatable variable for the current SCC.
auto *C = &InitialC;
- auto *RC = &C->getOuterRefSCC();
// When inlining a callee produces new call sites, we want to keep track of
// the fact that they were inlined from the callee. This allows us to avoid
@@ -791,12 +775,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- if (!Calls[I].first->getCalledFunction()->hasFnAttribute(
- Attribute::AlwaysInline) &&
- F.hasOptNone()) {
- setInlineRemark(*Calls[I].first, "optnone attribute");
- continue;
- }
LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
@@ -834,7 +812,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
- auto Advice = Advisor.getAdvice(*CB);
+ auto Advice = Advisor.getAdvice(*CB, OnlyMandatory);
// Check whether we want to inline this callsite.
if (!Advice->isInliningRecommended()) {
Advice->recordUnattemptedInlining();
@@ -848,7 +826,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
&FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
- InlineResult IR = InlineFunction(*CB, IFI);
+ InlineResult IR =
+ InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
if (!IR.isSuccess()) {
Advice->recordUnsuccessfulInlining(IR);
continue;
@@ -879,9 +858,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
- if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
- ImportedFunctionsStats->recordInline(F, Callee);
-
// Merge the attributes based on the inlining.
AttributeFuncs::mergeAttributesForInlining(F, Callee);
@@ -906,7 +882,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// Note that after this point, it is an error to do anything other
// than use the callee's address or delete it.
Callee.dropAllReferences();
- assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
+ assert(!is_contained(DeadFunctions, &Callee) &&
"Cannot put cause a function to become dead twice!");
DeadFunctions.push_back(&Callee);
CalleeWasDeleted = true;
@@ -926,20 +902,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
Changed = true;
- // Add all the inlined callees' edges as ref edges to the caller. These are
- // by definition trivial edges as we always have *some* transitive ref edge
- // chain. While in some cases these edges are direct calls inside the
- // callee, they have to be modeled in the inliner as reference edges as
- // there may be a reference edge anywhere along the chain from the current
- // caller to the callee that causes the whole thing to appear like
- // a (transitive) reference edge that will require promotion to a call edge
- // below.
- for (Function *InlinedCallee : InlinedCallees) {
- LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
- for (LazyCallGraph::Edge &E : *CalleeN)
- RC->insertTrivialRefEdge(N, E.getNode());
- }
-
// At this point, since we have made changes we have at least removed
// a call instruction. However, in the process we do some incremental
// simplification of the surrounding code. This simplification can
@@ -952,9 +914,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// as we're going to mutate this particular function we want to make sure
// the proxy is in place to forward any invalidation events.
LazyCallGraph::SCC *OldC = C;
- C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR, FAM);
+ C = &updateCGAndAnalysisManagerForCGSCCPass(CG, *C, N, AM, UR, FAM);
LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
- RC = &C->getOuterRefSCC();
// If this causes an SCC to split apart into multiple smaller SCCs, there
// is a subtle risk we need to prepare for. Other transformations may
@@ -1033,6 +994,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
bool Debugging,
+ bool MandatoryFirst,
InliningAdvisorMode Mode,
unsigned MaxDevirtIterations)
: Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations),
@@ -1042,13 +1004,15 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
// into the callers so that our optimizations can reflect that.
// For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
// because it makes profile annotation in the backend inaccurate.
+ if (MandatoryFirst)
+ PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
PM.addPass(InlinerPass());
}
PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
ModuleAnalysisManager &MAM) {
auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
- if (!IAA.tryCreate(Params, Mode)) {
+ if (!IAA.tryCreate(Params, Mode, CGSCCInlineReplayFile)) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index f7f5b4cf6704..a497c0390bce 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -13,12 +13,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -36,51 +38,71 @@ using namespace llvm;
STATISTIC(NumExtracted, "Number of loops extracted");
namespace {
- struct LoopExtractor : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
+struct LoopExtractorLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
- // The number of natural loops to extract from the program into functions.
- unsigned NumLoops;
+ unsigned NumLoops;
- explicit LoopExtractor(unsigned numLoops = ~0)
- : ModulePass(ID), NumLoops(numLoops) {
- initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
- bool runOnFunction(Function &F);
+ explicit LoopExtractorLegacyPass(unsigned NumLoops = ~0)
+ : ModulePass(ID), NumLoops(NumLoops) {
+ initializeLoopExtractorLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
- bool extractLoops(Loop::iterator From, Loop::iterator To, LoopInfo &LI,
- DominatorTree &DT);
- bool extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT);
+ bool runOnModule(Module &M) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(BreakCriticalEdgesID);
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addUsedIfAvailable<AssumptionCacheTracker>();
- }
- };
-}
-
-char LoopExtractor::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addUsedIfAvailable<AssumptionCacheTracker>();
+ }
+};
+
+struct LoopExtractor {
+ explicit LoopExtractor(
+ unsigned NumLoops,
+ function_ref<DominatorTree &(Function &)> LookupDomTree,
+ function_ref<LoopInfo &(Function &)> LookupLoopInfo,
+ function_ref<AssumptionCache *(Function &)> LookupAssumptionCache)
+ : NumLoops(NumLoops), LookupDomTree(LookupDomTree),
+ LookupLoopInfo(LookupLoopInfo),
+ LookupAssumptionCache(LookupAssumptionCache) {}
+ bool runOnModule(Module &M);
+
+private:
+ // The number of natural loops to extract from the program into functions.
+ unsigned NumLoops;
+
+ function_ref<DominatorTree &(Function &)> LookupDomTree;
+ function_ref<LoopInfo &(Function &)> LookupLoopInfo;
+ function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
+
+ bool runOnFunction(Function &F);
+
+ bool extractLoops(Loop::iterator From, Loop::iterator To, LoopInfo &LI,
+ DominatorTree &DT);
+ bool extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT);
+};
+} // namespace
+
+char LoopExtractorLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopExtractorLegacyPass, "loop-extract",
"Extract loops into new functions", false, false)
INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+INITIALIZE_PASS_END(LoopExtractorLegacyPass, "loop-extract",
"Extract loops into new functions", false, false)
namespace {
/// SingleLoopExtractor - For bugpoint.
- struct SingleLoopExtractor : public LoopExtractor {
- static char ID; // Pass identification, replacement for typeid
- SingleLoopExtractor() : LoopExtractor(1) {}
- };
+struct SingleLoopExtractor : public LoopExtractorLegacyPass {
+ static char ID; // Pass identification, replacement for typeid
+ SingleLoopExtractor() : LoopExtractorLegacyPass(1) {}
+};
} // End anonymous namespace
char SingleLoopExtractor::ID = 0;
@@ -90,12 +112,30 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
// createLoopExtractorPass - This pass extracts all natural loops from the
// program into a function if it can.
//
-Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractorLegacyPass(); }
-bool LoopExtractor::runOnModule(Module &M) {
+bool LoopExtractorLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
return false;
+ bool Changed = false;
+ auto LookupDomTree = [this](Function &F) -> DominatorTree & {
+ return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ };
+ auto LookupLoopInfo = [this, &Changed](Function &F) -> LoopInfo & {
+ return this->getAnalysis<LoopInfoWrapperPass>(F, &Changed).getLoopInfo();
+ };
+ auto LookupACT = [this](Function &F) -> AssumptionCache * {
+ if (auto *ACT = this->getAnalysisIfAvailable<AssumptionCacheTracker>())
+ return ACT->lookupAssumptionCache(F);
+ return nullptr;
+ };
+ return LoopExtractor(NumLoops, LookupDomTree, LookupLoopInfo, LookupACT)
+ .runOnModule(M) ||
+ Changed;
+}
+
+bool LoopExtractor::runOnModule(Module &M) {
if (M.empty())
return false;
@@ -132,13 +172,13 @@ bool LoopExtractor::runOnFunction(Function &F) {
return false;
bool Changed = false;
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>(F, &Changed).getLoopInfo();
+ LoopInfo &LI = LookupLoopInfo(F);
// If there are no loops in the function.
if (LI.empty())
return Changed;
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ DominatorTree &DT = LookupDomTree(F);
// If there is more than one top-level loop in this function, extract all of
// the loops.
@@ -203,10 +243,8 @@ bool LoopExtractor::extractLoops(Loop::iterator From, Loop::iterator To,
bool LoopExtractor::extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT) {
assert(NumLoops != 0);
- AssumptionCache *AC = nullptr;
Function &Func = *L->getHeader()->getParent();
- if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
- AC = ACT->lookupAssumptionCache(Func);
+ AssumptionCache *AC = LookupAssumptionCache(Func);
CodeExtractorAnalysisCache CEAC(Func);
CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
if (Extractor.extractCodeRegion(CEAC)) {
@@ -224,3 +262,24 @@ bool LoopExtractor::extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT) {
Pass *llvm::createSingleLoopExtractorPass() {
return new SingleLoopExtractor();
}
+
+PreservedAnalyses LoopExtractorPass::run(Module &M, ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+ auto LookupLoopInfo = [&FAM](Function &F) -> LoopInfo & {
+ return FAM.getResult<LoopAnalysis>(F);
+ };
+ auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
+ return FAM.getCachedResult<AssumptionAnalysis>(F);
+ };
+ if (!LoopExtractor(NumLoops, LookupDomTree, LookupLoopInfo,
+ LookupAssumptionCache)
+ .runOnModule(M))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 8eef7e3e7e99..8bd3036f1fc3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -198,7 +198,7 @@ void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
// indices from the old fragment in this fragment do not insert any more
// indices.
std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
- Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
+ llvm::append_range(Fragment, OldFragment);
OldFragment.clear();
}
}
@@ -1205,6 +1205,7 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
static const unsigned kX86JumpTableEntrySize = 8;
static const unsigned kARMJumpTableEntrySize = 4;
+static const unsigned kARMBTIJumpTableEntrySize = 8;
unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
switch (Arch) {
@@ -1213,7 +1214,12 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
return kX86JumpTableEntrySize;
case Triple::arm:
case Triple::thumb:
+ return kARMJumpTableEntrySize;
case Triple::aarch64:
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("branch-target-enforcement")))
+ if (BTE->getZExtValue())
+ return kARMBTIJumpTableEntrySize;
return kARMJumpTableEntrySize;
default:
report_fatal_error("Unsupported architecture for jump tables");
@@ -1232,7 +1238,13 @@ void LowerTypeTestsModule::createJumpTableEntry(
if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) {
AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n";
AsmOS << "int3\nint3\nint3\n";
- } else if (JumpTableArch == Triple::arm || JumpTableArch == Triple::aarch64) {
+ } else if (JumpTableArch == Triple::arm) {
+ AsmOS << "b $" << ArgIndex << "\n";
+ } else if (JumpTableArch == Triple::aarch64) {
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ Dest->getParent()->getModuleFlag("branch-target-enforcement")))
+ if (BTE->getZExtValue())
+ AsmOS << "bti c\n";
AsmOS << "b $" << ArgIndex << "\n";
} else if (JumpTableArch == Triple::thumb) {
AsmOS << "b.w $" << ArgIndex << "\n";
@@ -1326,7 +1338,7 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
Attribute TFAttr = F->getFnAttribute("target-features");
- if (!TFAttr.hasAttribute(Attribute::None)) {
+ if (TFAttr.isValid()) {
SmallVector<StringRef, 6> Features;
TFAttr.getValueAsString().split(Features, ',');
for (StringRef Feature : Features) {
@@ -1394,6 +1406,10 @@ void LowerTypeTestsModule::createJumpTable(
// by Clang for -march=armv7.
F->addFnAttr("target-cpu", "cortex-a8");
}
+ if (JumpTableArch == Triple::aarch64) {
+ F->addFnAttr("branch-target-enforcement", "false");
+ F->addFnAttr("sign-return-address", "none");
+ }
// Make sure we don't emit .eh_frame for this function.
F->addFnAttr(Attribute::NoUnwind);
@@ -2239,9 +2255,13 @@ bool LowerTypeTestsModule::lower() {
PreservedAnalyses LowerTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
- bool Changed =
- LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
- .lower();
+ bool Changed;
+ if (UseCommandLine)
+ Changed = LowerTypeTestsModule::runForTesting(M);
+ else
+ Changed =
+ LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
+ .lower();
if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 8cc19515f3db..ec5d86b72a1f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -725,8 +725,10 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
if (MergeFunctionsPDI) {
DISubprogram *DIS = G->getSubprogram();
if (DIS) {
- DebugLoc CIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS);
- DebugLoc RIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS);
+ DebugLoc CIDbgLoc =
+ DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
+ DebugLoc RIDbgLoc =
+ DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
CI->setDebugLoc(CIDbgLoc);
RI->setDebugLoc(RIDbgLoc);
} else {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index f664a2417374..a5ba6edb9a00 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -19,13 +19,16 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
using namespace llvm;
using namespace omp;
@@ -37,11 +40,22 @@ static cl::opt<bool> DisableOpenMPOptimizations(
cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
cl::init(false));
+static cl::opt<bool> EnableParallelRegionMerging(
+ "openmp-opt-enable-merging", cl::ZeroOrMore,
+ cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
+ cl::init(false));
+
static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
cl::Hidden);
static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
cl::init(false), cl::Hidden);
+static cl::opt<bool> HideMemoryTransferLatency(
+ "openmp-hide-memory-transfer-latency",
+ cl::desc("[WIP] Tries to hide the latency of host to device memory"
+ " transfers"),
+ cl::Hidden, cl::init(false));
+
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted,
@@ -55,70 +69,13 @@ STATISTIC(NumOpenMPTargetRegionKernels,
STATISTIC(
NumOpenMPParallelRegionsReplacedInGPUStateMachine,
"Number of OpenMP parallel regions replaced with ID in GPU state machines");
+STATISTIC(NumOpenMPParallelRegionsMerged,
+ "Number of OpenMP parallel regions merged");
#if !defined(NDEBUG)
static constexpr auto TAG = "[" DEBUG_TYPE "]";
#endif
-/// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is
-/// true, constant expression users are not given to \p CB but their uses are
-/// traversed transitively.
-template <typename CBTy>
-static void foreachUse(Function &F, CBTy CB,
- bool LookThroughConstantExprUses = true) {
- SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses()));
-
- for (unsigned idx = 0; idx < Worklist.size(); ++idx) {
- Use &U = *Worklist[idx];
-
- // Allow use in constant bitcasts and simply look through them.
- if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) {
- for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses())
- Worklist.push_back(&CEU);
- continue;
- }
-
- CB(U);
- }
-}
-
-/// Helper struct to store tracked ICV values at specif instructions.
-struct ICVValue {
- Instruction *Inst;
- Value *TrackedValue;
-
- ICVValue(Instruction *I, Value *Val) : Inst(I), TrackedValue(Val) {}
-};
-
-namespace llvm {
-
-// Provide DenseMapInfo for ICVValue
-template <> struct DenseMapInfo<ICVValue> {
- using InstInfo = DenseMapInfo<Instruction *>;
- using ValueInfo = DenseMapInfo<Value *>;
-
- static inline ICVValue getEmptyKey() {
- return ICVValue(InstInfo::getEmptyKey(), ValueInfo::getEmptyKey());
- };
-
- static inline ICVValue getTombstoneKey() {
- return ICVValue(InstInfo::getTombstoneKey(), ValueInfo::getTombstoneKey());
- };
-
- static unsigned getHashValue(const ICVValue &ICVVal) {
- return detail::combineHashValue(
- InstInfo::getHashValue(ICVVal.Inst),
- ValueInfo::getHashValue(ICVVal.TrackedValue));
- }
-
- static bool isEqual(const ICVValue &LHS, const ICVValue &RHS) {
- return InstInfo::isEqual(LHS.Inst, RHS.Inst) &&
- ValueInfo::isEqual(LHS.TrackedValue, RHS.TrackedValue);
- }
-};
-
-} // end namespace llvm
-
namespace {
struct AAICVTracker;
@@ -131,7 +88,6 @@ struct OMPInformationCache : public InformationCache {
SmallPtrSetImpl<Kernel> &Kernels)
: InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
Kernels(Kernels) {
- initializeModuleSlice(CGSCC);
OMPBuilder.initialize();
initializeRuntimeFunctions();
@@ -258,46 +214,6 @@ struct OMPInformationCache : public InformationCache {
DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
};
- /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains
- /// (a subset of) all functions that we can look at during this SCC traversal.
- /// This includes functions (transitively) called from the SCC and the
- /// (transitive) callers of SCC functions. We also can look at a function if
- /// there is a "reference edge", i.a., if the function somehow uses (!=calls)
- /// a function in the SCC or a caller of a function in the SCC.
- void initializeModuleSlice(SetVector<Function *> &SCC) {
- ModuleSlice.insert(SCC.begin(), SCC.end());
-
- SmallPtrSet<Function *, 16> Seen;
- SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end());
- while (!Worklist.empty()) {
- Function *F = Worklist.pop_back_val();
- ModuleSlice.insert(F);
-
- for (Instruction &I : instructions(*F))
- if (auto *CB = dyn_cast<CallBase>(&I))
- if (Function *Callee = CB->getCalledFunction())
- if (Seen.insert(Callee).second)
- Worklist.push_back(Callee);
- }
-
- Seen.clear();
- Worklist.append(SCC.begin(), SCC.end());
- while (!Worklist.empty()) {
- Function *F = Worklist.pop_back_val();
- ModuleSlice.insert(F);
-
- // Traverse all transitive uses.
- foreachUse(*F, [&](Use &U) {
- if (auto *UsrI = dyn_cast<Instruction>(U.getUser()))
- if (Seen.insert(UsrI->getFunction()).second)
- Worklist.push_back(UsrI->getFunction());
- });
- }
- }
-
- /// The slice of the module we are allowed to look at.
- SmallPtrSet<Function *, 8> ModuleSlice;
-
/// An OpenMP-IR-Builder instance
OpenMPIRBuilder OMPBuilder;
@@ -402,13 +318,17 @@ struct OMPInformationCache : public InformationCache {
return NumUses;
}
+ // Helper function to recollect uses of a runtime function.
+ void recollectUsesForFunction(RuntimeFunction RTF) {
+ auto &RFI = RFIs[RTF];
+ RFI.clearUsesMap();
+ collectUses(RFI, /*CollectStats*/ false);
+ }
+
// Helper function to recollect uses of all runtime functions.
void recollectUses() {
- for (int Idx = 0; Idx < RFIs.size(); ++Idx) {
- auto &RFI = RFIs[static_cast<RuntimeFunction>(Idx)];
- RFI.clearUsesMap();
- collectUses(RFI, /*CollectStats*/ false);
- }
+ for (int Idx = 0; Idx < RFIs.size(); ++Idx)
+ recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
}
/// Helper to initialize all runtime function information for those defined
@@ -472,6 +392,91 @@ struct OMPInformationCache : public InformationCache {
SmallPtrSetImpl<Kernel> &Kernels;
};
+/// Used to map the values physically (in the IR) stored in an offload
+/// array, to a vector in memory.
+struct OffloadArray {
+ /// Physical array (in the IR).
+ AllocaInst *Array = nullptr;
+ /// Mapped values.
+ SmallVector<Value *, 8> StoredValues;
+ /// Last stores made in the offload array.
+ SmallVector<StoreInst *, 8> LastAccesses;
+
+ OffloadArray() = default;
+
+ /// Initializes the OffloadArray with the values stored in \p Array before
+ /// instruction \p Before is reached. Returns false if the initialization
+ /// fails.
+ /// This MUST be used immediately after the construction of the object.
+ bool initialize(AllocaInst &Array, Instruction &Before) {
+ if (!Array.getAllocatedType()->isArrayTy())
+ return false;
+
+ if (!getValues(Array, Before))
+ return false;
+
+ this->Array = &Array;
+ return true;
+ }
+
+ static const unsigned DeviceIDArgNum = 1;
+ static const unsigned BasePtrsArgNum = 3;
+ static const unsigned PtrsArgNum = 4;
+ static const unsigned SizesArgNum = 5;
+
+private:
+ /// Traverses the BasicBlock where \p Array is, collecting the stores made to
+ /// \p Array, leaving StoredValues with the values stored before the
+ /// instruction \p Before is reached.
+ bool getValues(AllocaInst &Array, Instruction &Before) {
+ // Initialize container.
+ const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
+ StoredValues.assign(NumValues, nullptr);
+ LastAccesses.assign(NumValues, nullptr);
+
+ // TODO: This assumes the instruction \p Before is in the same
+ // BasicBlock as Array. Make it general, for any control flow graph.
+ BasicBlock *BB = Array.getParent();
+ if (BB != Before.getParent())
+ return false;
+
+ const DataLayout &DL = Array.getModule()->getDataLayout();
+ const unsigned int PointerSize = DL.getPointerSize();
+
+ for (Instruction &I : *BB) {
+ if (&I == &Before)
+ break;
+
+ if (!isa<StoreInst>(&I))
+ continue;
+
+ auto *S = cast<StoreInst>(&I);
+ int64_t Offset = -1;
+ auto *Dst =
+ GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
+ if (Dst == &Array) {
+ int64_t Idx = Offset / PointerSize;
+ StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
+ LastAccesses[Idx] = S;
+ }
+ }
+
+ return isFilled();
+ }
+
+ /// Returns true if all values in StoredValues and
+ /// LastAccesses are not nullptrs.
+ bool isFilled() {
+ const unsigned NumValues = StoredValues.size();
+ for (unsigned I = 0; I < NumValues; ++I) {
+ if (!StoredValues[I] || !LastAccesses[I])
+ return false;
+ }
+
+ return true;
+ }
+};
+
struct OpenMPOpt {
using OptimizationRemarkGetter =
@@ -483,6 +488,12 @@ struct OpenMPOpt {
: M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
+ /// Check if any remarks are enabled for openmp-opt
+ bool remarksEnabled() {
+ auto &Ctx = M.getContext();
+ return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
+ }
+
/// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
bool run() {
if (SCC.empty())
@@ -506,8 +517,18 @@ struct OpenMPOpt {
// Recollect uses, in case Attributor deleted any.
OMPInfoCache.recollectUses();
- Changed |= deduplicateRuntimeCalls();
Changed |= deleteParallelRegions();
+ if (HideMemoryTransferLatency)
+ Changed |= hideMemTransfersLatency();
+ if (remarksEnabled())
+ analysisGlobalization();
+ Changed |= deduplicateRuntimeCalls();
+ if (EnableParallelRegionMerging) {
+ if (mergeParallelRegions()) {
+ deduplicateRuntimeCalls();
+ Changed = true;
+ }
+ }
return Changed;
}
@@ -515,7 +536,8 @@ struct OpenMPOpt {
/// Print initial ICV values for testing.
/// FIXME: This should be done from the Attributor once it is added.
void printICVs() const {
- InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel};
+ InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
+ ICV_proc_bind};
for (Function *F : OMPInfoCache.ModuleSlice) {
for (auto ICV : ICVs) {
@@ -571,6 +593,394 @@ struct OpenMPOpt {
}
private:
+ /// Merge parallel regions when it is safe.
+ bool mergeParallelRegions() {
+ const unsigned CallbackCalleeOperand = 2;
+ const unsigned CallbackFirstArgOperand = 3;
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+
+ // Check if there are any __kmpc_fork_call calls to merge.
+ OMPInformationCache::RuntimeFunctionInfo &RFI =
+ OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
+
+ if (!RFI.Declaration)
+ return false;
+
+ // Unmergable calls that prevent merging a parallel region.
+ OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
+ OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
+ OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
+ };
+
+ bool Changed = false;
+ LoopInfo *LI = nullptr;
+ DominatorTree *DT = nullptr;
+
+ SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
+
+ BasicBlock *StartBB = nullptr, *EndBB = nullptr;
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ BasicBlock *CGStartBB = CodeGenIP.getBlock();
+ BasicBlock *CGEndBB =
+ SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
+ assert(StartBB != nullptr && "StartBB should not be null");
+ CGStartBB->getTerminator()->setSuccessor(0, StartBB);
+ assert(EndBB != nullptr && "EndBB should not be null");
+ EndBB->getTerminator()->setSuccessor(0, CGEndBB);
+ };
+
+ auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
+ Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
+ ReplacementValue = &Inner;
+ return CodeGenIP;
+ };
+
+ auto FiniCB = [&](InsertPointTy CodeGenIP) {};
+
+ /// Create a sequential execution region within a merged parallel region,
+ /// encapsulated in a master construct with a barrier for synchronization.
+ auto CreateSequentialRegion = [&](Function *OuterFn,
+ BasicBlock *OuterPredBB,
+ Instruction *SeqStartI,
+ Instruction *SeqEndI) {
+ // Isolate the instructions of the sequential region to a separate
+ // block.
+ BasicBlock *ParentBB = SeqStartI->getParent();
+ BasicBlock *SeqEndBB =
+ SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
+ BasicBlock *SeqAfterBB =
+ SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
+ BasicBlock *SeqStartBB =
+ SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
+
+ assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
+ "Expected a different CFG");
+ const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
+ ParentBB->getTerminator()->eraseFromParent();
+
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ BasicBlock *CGStartBB = CodeGenIP.getBlock();
+ BasicBlock *CGEndBB =
+ SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
+ assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
+ CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
+ assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
+ SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
+ };
+ auto FiniCB = [&](InsertPointTy CodeGenIP) {};
+
+ // Find outputs from the sequential region to outside users and
+ // broadcast their values to them.
+ for (Instruction &I : *SeqStartBB) {
+ SmallPtrSet<Instruction *, 4> OutsideUsers;
+ for (User *Usr : I.users()) {
+ Instruction &UsrI = *cast<Instruction>(Usr);
+ // Ignore outputs to LT intrinsics, code extraction for the merged
+ // parallel region will fix them.
+ if (UsrI.isLifetimeStartOrEnd())
+ continue;
+
+ if (UsrI.getParent() != SeqStartBB)
+ OutsideUsers.insert(&UsrI);
+ }
+
+ if (OutsideUsers.empty())
+ continue;
+
+ // Emit an alloca in the outer region to store the broadcasted
+ // value.
+ const DataLayout &DL = M.getDataLayout();
+ AllocaInst *AllocaI = new AllocaInst(
+ I.getType(), DL.getAllocaAddrSpace(), nullptr,
+ I.getName() + ".seq.output.alloc", &OuterFn->front().front());
+
+ // Emit a store instruction in the sequential BB to update the
+ // value.
+ new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
+
+ // Emit a load instruction and replace the use of the output value
+ // with it.
+ for (Instruction *UsrI : OutsideUsers) {
+ LoadInst *LoadI = new LoadInst(I.getType(), AllocaI,
+ I.getName() + ".seq.output.load", UsrI);
+ UsrI->replaceUsesOfWith(&I, LoadI);
+ }
+ }
+
+ OpenMPIRBuilder::LocationDescription Loc(
+ InsertPointTy(ParentBB, ParentBB->end()), DL);
+ InsertPointTy SeqAfterIP =
+ OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
+
+ OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
+
+ BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
+
+ LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
+ << "\n");
+ };
+
+ // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
+ // contained in BB and only separated by instructions that can be
+ // redundantly executed in parallel. The block BB is split before the first
+ // call (in MergableCIs) and after the last so the entire region we merge
+ // into a single parallel region is contained in a single basic block
+ // without any other instructions. We use the OpenMPIRBuilder to outline
+ // that block and call the resulting function via __kmpc_fork_call.
+ auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
+ // TODO: Change the interface to allow single CIs expanded, e.g, to
+ // include an outer loop.
+ assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
+
+ auto Remark = [&](OptimizationRemark OR) {
+ OR << "Parallel region at "
+ << ore::NV("OpenMPParallelMergeFront",
+ MergableCIs.front()->getDebugLoc())
+ << " merged with parallel regions at ";
+ for (auto *CI : llvm::drop_begin(MergableCIs)) {
+ OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
+ if (CI != MergableCIs.back())
+ OR << ", ";
+ }
+ return OR;
+ };
+
+ emitRemark<OptimizationRemark>(MergableCIs.front(),
+ "OpenMPParallelRegionMerging", Remark);
+
+ Function *OriginalFn = BB->getParent();
+ LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
+ << " parallel regions in " << OriginalFn->getName()
+ << "\n");
+
+ // Isolate the calls to merge in a separate block.
+ EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
+ BasicBlock *AfterBB =
+ SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
+ StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
+ "omp.par.merged");
+
+ assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
+ const DebugLoc DL = BB->getTerminator()->getDebugLoc();
+ BB->getTerminator()->eraseFromParent();
+
+ // Create sequential regions for sequential instructions that are
+ // in-between mergable parallel regions.
+ for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
+ It != End; ++It) {
+ Instruction *ForkCI = *It;
+ Instruction *NextForkCI = *(It + 1);
+
+ // Continue if there are not in-between instructions.
+ if (ForkCI->getNextNode() == NextForkCI)
+ continue;
+
+ CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
+ NextForkCI->getPrevNode());
+ }
+
+ OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
+ DL);
+ IRBuilder<>::InsertPoint AllocaIP(
+ &OriginalFn->getEntryBlock(),
+ OriginalFn->getEntryBlock().getFirstInsertionPt());
+ // Create the merged parallel region with default proc binding, to
+ // avoid overriding binding settings, and without explicit cancellation.
+ InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
+ Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
+ OMP_PROC_BIND_default, /* IsCancellable */ false);
+ BranchInst::Create(AfterBB, AfterIP.getBlock());
+
+ // Perform the actual outlining.
+ OMPInfoCache.OMPBuilder.finalize(/* AllowExtractorSinking */ true);
+
+ Function *OutlinedFn = MergableCIs.front()->getCaller();
+
+ // Replace the __kmpc_fork_call calls with direct calls to the outlined
+ // callbacks.
+ SmallVector<Value *, 8> Args;
+ for (auto *CI : MergableCIs) {
+ Value *Callee =
+ CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
+ FunctionType *FT =
+ cast<FunctionType>(Callee->getType()->getPointerElementType());
+ Args.clear();
+ Args.push_back(OutlinedFn->getArg(0));
+ Args.push_back(OutlinedFn->getArg(1));
+ for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
+ U < E; ++U)
+ Args.push_back(CI->getArgOperand(U));
+
+ CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
+ if (CI->getDebugLoc())
+ NewCI->setDebugLoc(CI->getDebugLoc());
+
+ // Forward parameter attributes from the callback to the callee.
+ for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
+ U < E; ++U)
+ for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
+ NewCI->addParamAttr(
+ U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
+
+ // Emit an explicit barrier to replace the implicit fork-join barrier.
+ if (CI != MergableCIs.back()) {
+ // TODO: Remove barrier if the merged parallel region includes the
+ // 'nowait' clause.
+ OMPInfoCache.OMPBuilder.createBarrier(
+ InsertPointTy(NewCI->getParent(),
+ NewCI->getNextNode()->getIterator()),
+ OMPD_parallel);
+ }
+
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "Parallel region at "
+ << ore::NV("OpenMPParallelMerge", CI->getDebugLoc())
+ << " merged with "
+ << ore::NV("OpenMPParallelMergeFront",
+ MergableCIs.front()->getDebugLoc());
+ };
+ if (CI != MergableCIs.front())
+ emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionMerging",
+ Remark);
+
+ CI->eraseFromParent();
+ }
+
+ assert(OutlinedFn != OriginalFn && "Outlining failed");
+ CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
+ CGUpdater.reanalyzeFunction(*OriginalFn);
+
+ NumOpenMPParallelRegionsMerged += MergableCIs.size();
+
+ return true;
+ };
+
+ // Helper function that identifes sequences of
+ // __kmpc_fork_call uses in a basic block.
+ auto DetectPRsCB = [&](Use &U, Function &F) {
+ CallInst *CI = getCallIfRegularCall(U, &RFI);
+ BB2PRMap[CI->getParent()].insert(CI);
+
+ return false;
+ };
+
+ BB2PRMap.clear();
+ RFI.foreachUse(SCC, DetectPRsCB);
+ SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
+ // Find mergable parallel regions within a basic block that are
+ // safe to merge, that is any in-between instructions can safely
+ // execute in parallel after merging.
+ // TODO: support merging across basic-blocks.
+ for (auto &It : BB2PRMap) {
+ auto &CIs = It.getSecond();
+ if (CIs.size() < 2)
+ continue;
+
+ BasicBlock *BB = It.getFirst();
+ SmallVector<CallInst *, 4> MergableCIs;
+
+ /// Returns true if the instruction is mergable, false otherwise.
+ /// A terminator instruction is unmergable by definition since merging
+ /// works within a BB. Instructions before the mergable region are
+ /// mergable if they are not calls to OpenMP runtime functions that may
+ /// set different execution parameters for subsequent parallel regions.
+ /// Instructions in-between parallel regions are mergable if they are not
+ /// calls to any non-intrinsic function since that may call a non-mergable
+ /// OpenMP runtime function.
+ auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
+ // We do not merge across BBs, hence return false (unmergable) if the
+ // instruction is a terminator.
+ if (I.isTerminator())
+ return false;
+
+ if (!isa<CallInst>(&I))
+ return true;
+
+ CallInst *CI = cast<CallInst>(&I);
+ if (IsBeforeMergableRegion) {
+ Function *CalledFunction = CI->getCalledFunction();
+ if (!CalledFunction)
+ return false;
+ // Return false (unmergable) if the call before the parallel
+ // region calls an explicit affinity (proc_bind) or number of
+ // threads (num_threads) compiler-generated function. Those settings
+ // may be incompatible with following parallel regions.
+ // TODO: ICV tracking to detect compatibility.
+ for (const auto &RFI : UnmergableCallsInfo) {
+ if (CalledFunction == RFI.Declaration)
+ return false;
+ }
+ } else {
+ // Return false (unmergable) if there is a call instruction
+ // in-between parallel regions when it is not an intrinsic. It
+ // may call an unmergable OpenMP runtime function in its callpath.
+ // TODO: Keep track of possible OpenMP calls in the callpath.
+ if (!isa<IntrinsicInst>(CI))
+ return false;
+ }
+
+ return true;
+ };
+ // Find maximal number of parallel region CIs that are safe to merge.
+ for (auto It = BB->begin(), End = BB->end(); It != End;) {
+ Instruction &I = *It;
+ ++It;
+
+ if (CIs.count(&I)) {
+ MergableCIs.push_back(cast<CallInst>(&I));
+ continue;
+ }
+
+ // Continue expanding if the instruction is mergable.
+ if (IsMergable(I, MergableCIs.empty()))
+ continue;
+
+ // Forward the instruction iterator to skip the next parallel region
+ // since there is an unmergable instruction which can affect it.
+ for (; It != End; ++It) {
+ Instruction &SkipI = *It;
+ if (CIs.count(&SkipI)) {
+ LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
+ << " due to " << I << "\n");
+ ++It;
+ break;
+ }
+ }
+
+ // Store mergable regions found.
+ if (MergableCIs.size() > 1) {
+ MergableCIsVector.push_back(MergableCIs);
+ LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
+ << " parallel regions in block " << BB->getName()
+ << " of function " << BB->getParent()->getName()
+ << "\n";);
+ }
+
+ MergableCIs.clear();
+ }
+
+ if (!MergableCIsVector.empty()) {
+ Changed = true;
+
+ for (auto &MergableCIs : MergableCIsVector)
+ Merge(MergableCIs, BB);
+ }
+ }
+
+ if (Changed) {
+ /// Re-collect use for fork calls, emitted barrier calls, and
+ /// any emitted master/end_master calls.
+ OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
+ OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
+ OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
+ OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
+ }
+
+ return Changed;
+ }
+
/// Try to delete parallel regions if possible.
bool deleteParallelRegions() {
const unsigned CallbackCalleeOperand = 2;
@@ -648,8 +1058,8 @@ private:
for (Function *F : SCC) {
for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
- deduplicateRuntimeCalls(*F,
- OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
+ Changed |= deduplicateRuntimeCalls(
+ *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
// __kmpc_global_thread_num is special as we can replace it with an
// argument in enough cases to make it worth trying.
@@ -666,6 +1076,223 @@ private:
return Changed;
}
+ /// Tries to hide the latency of runtime calls that involve host to
+ /// device memory transfers by splitting them into their "issue" and "wait"
+ /// versions. The "issue" is moved upwards as much as possible. The "wait" is
+ /// moved downards as much as possible. The "issue" issues the memory transfer
+ /// asynchronously, returning a handle. The "wait" waits in the returned
+ /// handle for the memory transfer to finish.
+ bool hideMemTransfersLatency() {
+ auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
+ bool Changed = false;
+ auto SplitMemTransfers = [&](Use &U, Function &Decl) {
+ auto *RTCall = getCallIfRegularCall(U, &RFI);
+ if (!RTCall)
+ return false;
+
+ OffloadArray OffloadArrays[3];
+ if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
+ return false;
+
+ LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
+
+ // TODO: Check if can be moved upwards.
+ bool WasSplit = false;
+ Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
+ if (WaitMovementPoint)
+ WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
+
+ Changed |= WasSplit;
+ return WasSplit;
+ };
+ RFI.foreachUse(SCC, SplitMemTransfers);
+
+ return Changed;
+ }
+
+ void analysisGlobalization() {
+ RuntimeFunction GlobalizationRuntimeIDs[] = {
+ OMPRTL___kmpc_data_sharing_coalesced_push_stack,
+ OMPRTL___kmpc_data_sharing_push_stack};
+
+ for (const auto GlobalizationCallID : GlobalizationRuntimeIDs) {
+ auto &RFI = OMPInfoCache.RFIs[GlobalizationCallID];
+
+ auto CheckGlobalization = [&](Use &U, Function &Decl) {
+ if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
+ auto Remark = [&](OptimizationRemarkAnalysis ORA) {
+ return ORA
+ << "Found thread data sharing on the GPU. "
+ << "Expect degraded performance due to data globalization.";
+ };
+ emitRemark<OptimizationRemarkAnalysis>(CI, "OpenMPGlobalization",
+ Remark);
+ }
+
+ return false;
+ };
+
+ RFI.foreachUse(SCC, CheckGlobalization);
+ }
+ }
+
+ /// Maps the values stored in the offload arrays passed as arguments to
+ /// \p RuntimeCall into the offload arrays in \p OAs.
+ bool getValuesInOffloadArrays(CallInst &RuntimeCall,
+ MutableArrayRef<OffloadArray> OAs) {
+ assert(OAs.size() == 3 && "Need space for three offload arrays!");
+
+ // A runtime call that involves memory offloading looks something like:
+ // call void @__tgt_target_data_begin_mapper(arg0, arg1,
+ // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
+ // ...)
+ // So, the idea is to access the allocas that allocate space for these
+ // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
+ // Therefore:
+ // i8** %offload_baseptrs.
+ Value *BasePtrsArg =
+ RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
+ // i8** %offload_ptrs.
+ Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
+ // i8** %offload_sizes.
+ Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
+
+ // Get values stored in **offload_baseptrs.
+ auto *V = getUnderlyingObject(BasePtrsArg);
+ if (!isa<AllocaInst>(V))
+ return false;
+ auto *BasePtrsArray = cast<AllocaInst>(V);
+ if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
+ return false;
+
+ // Get values stored in **offload_baseptrs.
+ V = getUnderlyingObject(PtrsArg);
+ if (!isa<AllocaInst>(V))
+ return false;
+ auto *PtrsArray = cast<AllocaInst>(V);
+ if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
+ return false;
+
+ // Get values stored in **offload_sizes.
+ V = getUnderlyingObject(SizesArg);
+ // If it's a [constant] global array don't analyze it.
+ if (isa<GlobalValue>(V))
+ return isa<Constant>(V);
+ if (!isa<AllocaInst>(V))
+ return false;
+
+ auto *SizesArray = cast<AllocaInst>(V);
+ if (!OAs[2].initialize(*SizesArray, RuntimeCall))
+ return false;
+
+ return true;
+ }
+
+ /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
+ /// For now this is a way to test that the function getValuesInOffloadArrays
+ /// is working properly.
+ /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
+ void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
+ assert(OAs.size() == 3 && "There are three offload arrays to debug!");
+
+ LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
+ std::string ValuesStr;
+ raw_string_ostream Printer(ValuesStr);
+ std::string Separator = " --- ";
+
+ for (auto *BP : OAs[0].StoredValues) {
+ BP->print(Printer);
+ Printer << Separator;
+ }
+ LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
+ ValuesStr.clear();
+
+ for (auto *P : OAs[1].StoredValues) {
+ P->print(Printer);
+ Printer << Separator;
+ }
+ LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
+ ValuesStr.clear();
+
+ for (auto *S : OAs[2].StoredValues) {
+ S->print(Printer);
+ Printer << Separator;
+ }
+ LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
+ }
+
+ /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
+ /// moved. Returns nullptr if the movement is not possible, or not worth it.
+ Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
+ // FIXME: This traverses only the BasicBlock where RuntimeCall is.
+ // Make it traverse the CFG.
+
+ Instruction *CurrentI = &RuntimeCall;
+ bool IsWorthIt = false;
+ while ((CurrentI = CurrentI->getNextNode())) {
+
+ // TODO: Once we detect the regions to be offloaded we should use the
+ // alias analysis manager to check if CurrentI may modify one of
+ // the offloaded regions.
+ if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
+ if (IsWorthIt)
+ return CurrentI;
+
+ return nullptr;
+ }
+
+ // FIXME: For now if we move it over anything without side effect
+ // is worth it.
+ IsWorthIt = true;
+ }
+
+ // Return end of BasicBlock.
+ return RuntimeCall.getParent()->getTerminator();
+ }
+
+ /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
+ bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
+ Instruction &WaitMovementPoint) {
+ // Create stack allocated handle (__tgt_async_info) at the beginning of the
+ // function. Used for storing information of the async transfer, allowing to
+ // wait on it later.
+ auto &IRBuilder = OMPInfoCache.OMPBuilder;
+ auto *F = RuntimeCall.getCaller();
+ Instruction *FirstInst = &(F->getEntryBlock().front());
+ AllocaInst *Handle = new AllocaInst(
+ IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
+
+ // Add "issue" runtime call declaration:
+ // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
+ // i8**, i8**, i64*, i64*)
+ FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___tgt_target_data_begin_mapper_issue);
+
+ // Change RuntimeCall call site for its asynchronous version.
+ SmallVector<Value *, 16> Args;
+ for (auto &Arg : RuntimeCall.args())
+ Args.push_back(Arg.get());
+ Args.push_back(Handle);
+
+ CallInst *IssueCallsite =
+ CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
+ RuntimeCall.eraseFromParent();
+
+ // Add "wait" runtime call declaration:
+ // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
+ FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___tgt_target_data_begin_mapper_wait);
+
+ Value *WaitParams[2] = {
+ IssueCallsite->getArgOperand(
+ OffloadArray::DeviceIDArgNum), // device_id.
+ Handle // handle to wait on.
+ };
+ CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
+
+ return true;
+ }
+
static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
bool GlobalOnly, bool &SingleChoice) {
if (CurrentIdent == NextIdent)
@@ -951,11 +1578,28 @@ private:
/// Populate the Attributor with abstract attribute opportunities in the
/// function.
void registerAAs() {
- for (Function *F : SCC) {
- if (F->isDeclaration())
- continue;
+ if (SCC.empty())
+ return;
+
+ // Create CallSite AA for all Getters.
+ for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
+ auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
+
+ auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
+
+ auto CreateAA = [&](Use &U, Function &Caller) {
+ CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
+ if (!CI)
+ return false;
+
+ auto &CB = cast<CallBase>(*CI);
+
+ IRPosition CBPos = IRPosition::callsite_function(CB);
+ A.getOrCreateAAFor<AAICVTracker>(CBPos);
+ return false;
+ };
- A.getOrCreateAAFor<AAICVTracker>(IRPosition::function(*F));
+ GetterRFI.foreachUse(SCC, CreateAA);
}
}
};
@@ -979,8 +1623,16 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
}
CachedKernel = nullptr;
- if (!F.hasLocalLinkage())
+ if (!F.hasLocalLinkage()) {
+
+ // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "[OMP100] Potentially unknown OpenMP target region caller";
+ };
+ emitRemarkOnFunction(&F, "OMP100", Remark);
+
return nullptr;
+ }
}
auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
@@ -1006,7 +1658,7 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
// TODO: In the future we want to track more than just a unique kernel.
SmallPtrSet<Kernel, 2> PotentialKernels;
- foreachUse(F, [&](const Use &U) {
+ OMPInformationCache::foreachUse(F, [&](const Use &U) {
PotentialKernels.insert(GetUniqueKernelForUse(U));
});
@@ -1037,7 +1689,7 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
unsigned NumDirectCalls = 0;
SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
- foreachUse(*F, [&](Use &U) {
+ OMPInformationCache::foreachUse(*F, [&](Use &U) {
if (auto *CB = dyn_cast<CallBase>(U.getUser()))
if (CB->isCallee(&U)) {
++NumDirectCalls;
@@ -1157,6 +1809,12 @@ struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ void initialize(Attributor &A) override {
+ Function *F = getAnchorScope();
+ if (!F || !A.isFunctionIPOAmendable(*F))
+ indicatePessimisticFixpoint();
+ }
+
/// Returns true if value is assumed to be tracked.
bool isAssumedTracked() const { return getAssumed(); }
@@ -1167,8 +1825,21 @@ struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
/// Return the value with which \p I can be replaced for specific \p ICV.
- virtual Value *getReplacementValue(InternalControlVar ICV,
- const Instruction *I, Attributor &A) = 0;
+ virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
+ const Instruction *I,
+ Attributor &A) const {
+ return None;
+ }
+
+ /// Return an assumed unique ICV value if a single candidate is found. If
+ /// there cannot be one, return a nullptr. If it is not clear yet, return the
+ /// Optional::NoneType.
+ virtual Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const = 0;
+
+ // Currently only nthreads is being tracked.
+ // this array will only grow with time.
+ InternalControlVar TrackableICVs[1] = {ICV_nthreads};
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAICVTracker"; }
@@ -1189,57 +1860,20 @@ struct AAICVTrackerFunction : public AAICVTracker {
: AAICVTracker(IRP, A) {}
// FIXME: come up with better string.
- const std::string getAsStr() const override { return "ICVTracker"; }
+ const std::string getAsStr() const override { return "ICVTrackerFunction"; }
// FIXME: come up with some stats.
void trackStatistics() const override {}
- /// TODO: decide whether to deduplicate here, or use current
- /// deduplicateRuntimeCalls function.
+ /// We don't manifest anything for this AA.
ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
-
- for (InternalControlVar &ICV : TrackableICVs)
- if (deduplicateICVGetters(ICV, A))
- Changed = ChangeStatus::CHANGED;
-
- return Changed;
- }
-
- bool deduplicateICVGetters(InternalControlVar &ICV, Attributor &A) {
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
- auto &ICVInfo = OMPInfoCache.ICVs[ICV];
- auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
-
- bool Changed = false;
-
- auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
- CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
- Instruction *UserI = cast<Instruction>(U.getUser());
- Value *ReplVal = getReplacementValue(ICV, UserI, A);
-
- if (!ReplVal || !CI)
- return false;
-
- A.removeCallSite(CI);
- CI->replaceAllUsesWith(ReplVal);
- CI->eraseFromParent();
- Changed = true;
- return true;
- };
-
- GetterRFI.foreachUse(ReplaceAndDeleteCB, getAnchorScope());
- return Changed;
+ return ChangeStatus::UNCHANGED;
}
// Map of ICV to their values at specific program point.
- EnumeratedArray<SmallSetVector<ICVValue, 4>, InternalControlVar,
+ EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
InternalControlVar::ICV___last>
- ICVValuesMap;
-
- // Currently only nthreads is being tracked.
- // this array will only grow with time.
- InternalControlVar TrackableICVs[1] = {ICV_nthreads};
+ ICVReplacementValuesMap;
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
@@ -1251,6 +1885,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
for (InternalControlVar ICV : TrackableICVs) {
auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
+ auto &ValuesMap = ICVReplacementValuesMap[ICV];
auto TrackValues = [&](Use &U, Function &) {
CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
if (!CI)
@@ -1258,51 +1893,342 @@ struct AAICVTrackerFunction : public AAICVTracker {
// FIXME: handle setters with more that 1 arguments.
/// Track new value.
- if (ICVValuesMap[ICV].insert(ICVValue(CI, CI->getArgOperand(0))))
+ if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
HasChanged = ChangeStatus::CHANGED;
return false;
};
+ auto CallCheck = [&](Instruction &I) {
+ Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
+ if (ReplVal.hasValue() &&
+ ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
+ HasChanged = ChangeStatus::CHANGED;
+
+ return true;
+ };
+
+ // Track all changes of an ICV.
SetterRFI.foreachUse(TrackValues, F);
+
+ A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
+ /* CheckBBLivenessOnly */ true);
+
+ /// TODO: Figure out a way to avoid adding entry in
+ /// ICVReplacementValuesMap
+ Instruction *Entry = &F->getEntryBlock().front();
+ if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
+ ValuesMap.insert(std::make_pair(Entry, nullptr));
}
return HasChanged;
}
- /// Return the value with which \p I can be replaced for specific \p ICV.
- Value *getReplacementValue(InternalControlVar ICV, const Instruction *I,
- Attributor &A) override {
- const BasicBlock *CurrBB = I->getParent();
+ /// Hepler to check if \p I is a call and get the value for it if it is
+ /// unique.
+ Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
+ InternalControlVar &ICV) const {
+
+ const auto *CB = dyn_cast<CallBase>(I);
+ if (!CB || CB->hasFnAttr("no_openmp") ||
+ CB->hasFnAttr("no_openmp_routines"))
+ return None;
- auto &ValuesSet = ICVValuesMap[ICV];
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
+ auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
+ Function *CalledFunction = CB->getCalledFunction();
- for (const auto &ICVVal : ValuesSet) {
- if (CurrBB == ICVVal.Inst->getParent()) {
- if (!ICVVal.Inst->comesBefore(I))
- continue;
+ // Indirect call, assume ICV changes.
+ if (CalledFunction == nullptr)
+ return nullptr;
+ if (CalledFunction == GetterRFI.Declaration)
+ return None;
+ if (CalledFunction == SetterRFI.Declaration) {
+ if (ICVReplacementValuesMap[ICV].count(I))
+ return ICVReplacementValuesMap[ICV].lookup(I);
- // both instructions are in the same BB and at \p I we know the ICV
- // value.
- while (I != ICVVal.Inst) {
- // we don't yet know if a call might update an ICV.
- // TODO: check callsite AA for value.
- if (const auto *CB = dyn_cast<CallBase>(I))
- if (CB->getCalledFunction() != GetterRFI.Declaration)
+ return nullptr;
+ }
+
+ // Since we don't know, assume it changes the ICV.
+ if (CalledFunction->isDeclaration())
+ return nullptr;
+
+ const auto &ICVTrackingAA =
+ A.getAAFor<AAICVTracker>(*this, IRPosition::callsite_returned(*CB));
+
+ if (ICVTrackingAA.isAssumedTracked())
+ return ICVTrackingAA.getUniqueReplacementValue(ICV);
+
+ // If we don't know, assume it changes.
+ return nullptr;
+ }
+
+ // We don't check unique value for a function, so return None.
+ Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const override {
+ return None;
+ }
+
+ /// Return the value with which \p I can be replaced for specific \p ICV.
+ Optional<Value *> getReplacementValue(InternalControlVar ICV,
+ const Instruction *I,
+ Attributor &A) const override {
+ const auto &ValuesMap = ICVReplacementValuesMap[ICV];
+ if (ValuesMap.count(I))
+ return ValuesMap.lookup(I);
+
+ SmallVector<const Instruction *, 16> Worklist;
+ SmallPtrSet<const Instruction *, 16> Visited;
+ Worklist.push_back(I);
+
+ Optional<Value *> ReplVal;
+
+ while (!Worklist.empty()) {
+ const Instruction *CurrInst = Worklist.pop_back_val();
+ if (!Visited.insert(CurrInst).second)
+ continue;
+
+ const BasicBlock *CurrBB = CurrInst->getParent();
+
+ // Go up and look for all potential setters/calls that might change the
+ // ICV.
+ while ((CurrInst = CurrInst->getPrevNode())) {
+ if (ValuesMap.count(CurrInst)) {
+ Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
+ // Unknown value, track new.
+ if (!ReplVal.hasValue()) {
+ ReplVal = NewReplVal;
+ break;
+ }
+
+ // If we found a new value, we can't know the icv value anymore.
+ if (NewReplVal.hasValue())
+ if (ReplVal != NewReplVal)
return nullptr;
- I = I->getPrevNode();
+ break;
}
- // No call in between, return the value.
- return ICVVal.TrackedValue;
+ Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
+ if (!NewReplVal.hasValue())
+ continue;
+
+ // Unknown value, track new.
+ if (!ReplVal.hasValue()) {
+ ReplVal = NewReplVal;
+ break;
+ }
+
+ // if (NewReplVal.hasValue())
+ // We found a new value, we can't know the icv value anymore.
+ if (ReplVal != NewReplVal)
+ return nullptr;
}
+
+ // If we are in the same BB and we have a value, we are done.
+ if (CurrBB == I->getParent() && ReplVal.hasValue())
+ return ReplVal;
+
+ // Go through all predecessors and add terminators for analysis.
+ for (const BasicBlock *Pred : predecessors(CurrBB))
+ if (const Instruction *Terminator = Pred->getTerminator())
+ Worklist.push_back(Terminator);
}
- // No value was tracked.
- return nullptr;
+ return ReplVal;
+ }
+};
+
+struct AAICVTrackerFunctionReturned : AAICVTracker {
+ AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
+ : AAICVTracker(IRP, A) {}
+
+ // FIXME: come up with better string.
+ const std::string getAsStr() const override {
+ return "ICVTrackerFunctionReturned";
+ }
+
+ // FIXME: come up with some stats.
+ void trackStatistics() const override {}
+
+ /// We don't manifest anything for this AA.
+ ChangeStatus manifest(Attributor &A) override {
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Map of ICV to their values at specific program point.
+ EnumeratedArray<Optional<Value *>, InternalControlVar,
+ InternalControlVar::ICV___last>
+ ICVReplacementValuesMap;
+
+ /// Return the value with which \p I can be replaced for specific \p ICV.
+ Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const override {
+ return ICVReplacementValuesMap[ICV];
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ *this, IRPosition::function(*getAnchorScope()));
+
+ if (!ICVTrackingAA.isAssumedTracked())
+ return indicatePessimisticFixpoint();
+
+ for (InternalControlVar ICV : TrackableICVs) {
+ Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
+ Optional<Value *> UniqueICVValue;
+
+ auto CheckReturnInst = [&](Instruction &I) {
+ Optional<Value *> NewReplVal =
+ ICVTrackingAA.getReplacementValue(ICV, &I, A);
+
+ // If we found a second ICV value there is no unique returned value.
+ if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
+ return false;
+
+ UniqueICVValue = NewReplVal;
+
+ return true;
+ };
+
+ if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
+ /* CheckBBLivenessOnly */ true))
+ UniqueICVValue = nullptr;
+
+ if (UniqueICVValue == ReplVal)
+ continue;
+
+ ReplVal = UniqueICVValue;
+ Changed = ChangeStatus::CHANGED;
+ }
+
+ return Changed;
+ }
+};
+
+struct AAICVTrackerCallSite : AAICVTracker {
+ AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
+ : AAICVTracker(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ Function *F = getAnchorScope();
+ if (!F || !A.isFunctionIPOAmendable(*F))
+ indicatePessimisticFixpoint();
+
+ // We only initialize this AA for getters, so we need to know which ICV it
+ // gets.
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ for (InternalControlVar ICV : TrackableICVs) {
+ auto ICVInfo = OMPInfoCache.ICVs[ICV];
+ auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
+ if (Getter.Declaration == getAssociatedFunction()) {
+ AssociatedICV = ICVInfo.Kind;
+ return;
+ }
+ }
+
+ /// Unknown ICV.
+ indicatePessimisticFixpoint();
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ if (!ReplVal.hasValue() || !ReplVal.getValue())
+ return ChangeStatus::UNCHANGED;
+
+ A.changeValueAfterManifest(*getCtxI(), **ReplVal);
+ A.deleteAfterManifest(*getCtxI());
+
+ return ChangeStatus::CHANGED;
+ }
+
+ // FIXME: come up with better string.
+ const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
+
+ // FIXME: come up with some stats.
+ void trackStatistics() const override {}
+
+ InternalControlVar AssociatedICV;
+ Optional<Value *> ReplVal;
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ *this, IRPosition::function(*getAnchorScope()));
+
+ // We don't have any information, so we assume it changes the ICV.
+ if (!ICVTrackingAA.isAssumedTracked())
+ return indicatePessimisticFixpoint();
+
+ Optional<Value *> NewReplVal =
+ ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
+
+ if (ReplVal == NewReplVal)
+ return ChangeStatus::UNCHANGED;
+
+ ReplVal = NewReplVal;
+ return ChangeStatus::CHANGED;
+ }
+
+ // Return the value with which associated value can be replaced for specific
+ // \p ICV.
+ Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const override {
+ return ReplVal;
+ }
+};
+
+struct AAICVTrackerCallSiteReturned : AAICVTracker {
+ AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAICVTracker(IRP, A) {}
+
+ // FIXME: come up with better string.
+ const std::string getAsStr() const override {
+ return "ICVTrackerCallSiteReturned";
+ }
+
+ // FIXME: come up with some stats.
+ void trackStatistics() const override {}
+
+ /// We don't manifest anything for this AA.
+ ChangeStatus manifest(Attributor &A) override {
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Map of ICV to their values at specific program point.
+ EnumeratedArray<Optional<Value *>, InternalControlVar,
+ InternalControlVar::ICV___last>
+ ICVReplacementValuesMap;
+
+ /// Return the value with which associated value can be replaced for specific
+ /// \p ICV.
+ Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const override {
+ return ICVReplacementValuesMap[ICV];
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ *this, IRPosition::returned(*getAssociatedFunction()));
+
+ // We don't have any information, so we assume it changes the ICV.
+ if (!ICVTrackingAA.isAssumedTracked())
+ return indicatePessimisticFixpoint();
+
+ for (InternalControlVar ICV : TrackableICVs) {
+ Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
+ Optional<Value *> NewReplVal =
+ ICVTrackingAA.getUniqueReplacementValue(ICV);
+
+ if (ReplVal == NewReplVal)
+ continue;
+
+ ReplVal = NewReplVal;
+ Changed = ChangeStatus::CHANGED;
+ }
+ return Changed;
}
};
} // namespace
@@ -1316,11 +2242,17 @@ AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ llvm_unreachable("ICVTracker can only be created for function position!");
case IRPosition::IRP_RETURNED:
+ AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
+ break;
case IRPosition::IRP_CALL_SITE_RETURNED:
- case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
+ break;
case IRPosition::IRP_CALL_SITE:
- llvm_unreachable("ICVTracker can only be created for function position!");
+ AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
+ break;
case IRPosition::IRP_FUNCTION:
AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
break;
@@ -1339,10 +2271,21 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
return PreservedAnalyses::all();
SmallVector<Function *, 16> SCC;
- for (LazyCallGraph::Node &N : C)
- SCC.push_back(&N.getFunction());
+ // If there are kernels in the module, we have to run on all SCC's.
+ bool SCCIsInteresting = !OMPInModule.getKernels().empty();
+ for (LazyCallGraph::Node &N : C) {
+ Function *Fn = &N.getFunction();
+ SCC.push_back(Fn);
+
+ // Do we already know that the SCC contains kernels,
+ // or that OpenMP functions are called from this SCC?
+ if (SCCIsInteresting)
+ continue;
+ // If not, let's check that.
+ SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
+ }
- if (SCC.empty())
+ if (!SCCIsInteresting || SCC.empty())
return PreservedAnalyses::all();
FunctionAnalysisManager &FAM =
@@ -1364,7 +2307,6 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
Attributor A(Functions, InfoCache, CGUpdater);
- // TODO: Compute the module slice we are allowed to look at.
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run();
if (Changed)
@@ -1401,12 +2343,23 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass {
return false;
SmallVector<Function *, 16> SCC;
- for (CallGraphNode *CGN : CGSCC)
- if (Function *Fn = CGN->getFunction())
- if (!Fn->isDeclaration())
- SCC.push_back(Fn);
+ // If there are kernels in the module, we have to run on all SCC's.
+ bool SCCIsInteresting = !OMPInModule.getKernels().empty();
+ for (CallGraphNode *CGN : CGSCC) {
+ Function *Fn = CGN->getFunction();
+ if (!Fn || Fn->isDeclaration())
+ continue;
+ SCC.push_back(Fn);
- if (SCC.empty())
+ // Do we already know that the SCC contains kernels,
+ // or that OpenMP functions are called from this SCC?
+ if (SCCIsInteresting)
+ continue;
+ // If not, let's check that.
+ SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
+ }
+
+ if (!SCCIsInteresting || SCC.empty())
return false;
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
@@ -1430,7 +2383,6 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass {
Attributor A(Functions, InfoCache, CGUpdater);
- // TODO: Compute the module slice we are allowed to look at.
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
return OMPOpt.run();
}
@@ -1468,13 +2420,19 @@ bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
if (OMPInModule.isKnown())
return OMPInModule;
+ auto RecordFunctionsContainingUsesOf = [&](Function *F) {
+ for (User *U : F->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction());
+ };
+
// MSVC doesn't like long if-else chains for some reason and instead just
// issues an error. Work around it..
do {
#define OMP_RTL(_Enum, _Name, ...) \
- if (M.getFunction(_Name)) { \
+ if (Function *F = M.getFunction(_Name)) { \
+ RecordFunctionsContainingUsesOf(F); \
OMPInModule = true; \
- break; \
}
#include "llvm/Frontend/OpenMP/OMPKinds.def"
} while (false);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 5d863f1330a4..2bbf4bf110ae 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -97,13 +97,6 @@ static cl::opt<bool>
MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
cl::desc("Mark outline function calls with ColdCC"));
-#ifndef NDEBUG
-// Command line option to debug partial-inlining. The default is none:
-static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
- cl::init(false), cl::Hidden,
- cl::desc("Trace partial inlining."));
-#endif
-
// This is an option used by testing:
static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
cl::init(false), cl::ZeroOrMore,
@@ -159,7 +152,7 @@ struct FunctionOutliningInfo {
// Returns the number of blocks to be inlined including all blocks
// in Entries and one return block.
- unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
+ unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
// A set of blocks including the function entry that guard
// the region to be outlined.
@@ -215,7 +208,7 @@ struct PartialInlinerImpl {
// function (only if we partially inlined early returns) as there is a
// possibility to further "peel" early return statements that were left in the
// outline function due to code size.
- std::pair<bool, Function *> unswitchFunction(Function *F);
+ std::pair<bool, Function *> unswitchFunction(Function &F);
// This class speculatively clones the function to be partial inlined.
// At the end of partial inlining, the remaining callsites to the cloned
@@ -226,16 +219,19 @@ struct PartialInlinerImpl {
// multi-region outlining.
FunctionCloner(Function *F, FunctionOutliningInfo *OI,
OptimizationRemarkEmitter &ORE,
- function_ref<AssumptionCache *(Function &)> LookupAC);
+ function_ref<AssumptionCache *(Function &)> LookupAC,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI);
FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
OptimizationRemarkEmitter &ORE,
- function_ref<AssumptionCache *(Function &)> LookupAC);
+ function_ref<AssumptionCache *(Function &)> LookupAC,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI);
+
~FunctionCloner();
// Prepare for function outlining: making sure there is only
// one incoming edge from the extracted/outlined region to
// the return block.
- void NormalizeReturnBlock();
+ void normalizeReturnBlock() const;
// Do function outlining for cold regions.
bool doMultiRegionFunctionOutlining();
@@ -266,6 +262,7 @@ struct PartialInlinerImpl {
std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
OptimizationRemarkEmitter &ORE;
function_ref<AssumptionCache *(Function &)> LookupAC;
+ function_ref<TargetTransformInfo &(Function &)> GetTTI;
};
private:
@@ -281,13 +278,14 @@ private:
// The result is no larger than 1 and is represented using BP.
// (Note that the outlined region's 'head' block can only have incoming
// edges from the guarding entry blocks).
- BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
+ BranchProbability
+ getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
// Return true if the callee of CB should be partially inlined with
// profit.
bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
BlockFrequency WeightedOutliningRcost,
- OptimizationRemarkEmitter &ORE);
+ OptimizationRemarkEmitter &ORE) const;
// Try to inline DuplicateFunction (cloned from F with call to
// the OutlinedFunction into its callers. Return true
@@ -296,10 +294,11 @@ private:
// Compute the mapping from use site of DuplicationFunction to the enclosing
// BB's profile count.
- void computeCallsiteToProfCountMap(Function *DuplicateFunction,
- DenseMap<User *, uint64_t> &SiteCountMap);
+ void
+ computeCallsiteToProfCountMap(Function *DuplicateFunction,
+ DenseMap<User *, uint64_t> &SiteCountMap) const;
- bool IsLimitReached() {
+ bool isLimitReached() const {
return (MaxNumPartialInlining != -1 &&
NumPartialInlining >= MaxNumPartialInlining);
}
@@ -311,12 +310,12 @@ private:
return nullptr;
}
- static CallBase *getOneCallSiteTo(Function *F) {
- User *User = *F->user_begin();
+ static CallBase *getOneCallSiteTo(Function &F) {
+ User *User = *F.user_begin();
return getSupportedCallBase(User);
}
- std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
+ std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
CallBase *CB = getOneCallSiteTo(F);
DebugLoc DLoc = CB->getDebugLoc();
BasicBlock *Block = CB->getParent();
@@ -329,16 +328,19 @@ private:
// outlined function itself;
// - The second value is the estimated size of the new call sequence in
// basic block Cloner.OutliningCallBB;
- std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
+ std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner) const;
// Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
// approximate both the size and runtime cost (Note that in the current
// inline cost analysis, there is no clear distinction there either).
- static int computeBBInlineCost(BasicBlock *BB);
+ static int computeBBInlineCost(BasicBlock *BB, TargetTransformInfo *TTI);
+
+ std::unique_ptr<FunctionOutliningInfo>
+ computeOutliningInfo(Function &F) const;
- std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
std::unique_ptr<FunctionOutliningMultiRegionInfo>
- computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE);
+ computeOutliningColdRegionsInfo(Function &F,
+ OptimizationRemarkEmitter &ORE) const;
};
struct PartialInlinerLegacyPass : public ModulePass {
@@ -390,20 +392,20 @@ struct PartialInlinerLegacyPass : public ModulePass {
} // end anonymous namespace
std::unique_ptr<FunctionOutliningMultiRegionInfo>
-PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
- OptimizationRemarkEmitter &ORE) {
- BasicBlock *EntryBlock = &F->front();
+PartialInlinerImpl::computeOutliningColdRegionsInfo(
+ Function &F, OptimizationRemarkEmitter &ORE) const {
+ BasicBlock *EntryBlock = &F.front();
- DominatorTree DT(*F);
+ DominatorTree DT(F);
LoopInfo LI(DT);
- BranchProbabilityInfo BPI(*F, LI);
+ BranchProbabilityInfo BPI(F, LI);
std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
BlockFrequencyInfo *BFI;
if (!GetBFI) {
- ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
+ ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
BFI = ScopedBFI.get();
} else
- BFI = &(GetBFI(*F));
+ BFI = &(GetBFI(F));
// Return if we don't have profiling information.
if (!PSI.hasInstrumentationProfile())
@@ -412,11 +414,6 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
std::make_unique<FunctionOutliningMultiRegionInfo>();
- auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
- BasicBlock *Dom = BlockList.front();
- return BlockList.size() > 1 && Dom->hasNPredecessors(1);
- };
-
auto IsSingleExit =
[&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
BasicBlock *ExitBlock = nullptr;
@@ -432,8 +429,9 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
<< " has more than one region exit edge.";
});
return nullptr;
- } else
- ExitBlock = Block;
+ }
+
+ ExitBlock = Block;
}
}
}
@@ -448,14 +446,14 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
// Use the same computeBBInlineCost function to compute the cost savings of
// the outlining the candidate region.
+ TargetTransformInfo *FTTI = &GetTTI(F);
int OverallFunctionCost = 0;
- for (auto &BB : *F)
- OverallFunctionCost += computeBBInlineCost(&BB);
+ for (auto &BB : F)
+ OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
+
+ LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
+ << "\n";);
-#ifndef NDEBUG
- if (TracePartialInlining)
- dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
-#endif
int MinOutlineRegionCost =
static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
BranchProbability MinBranchProbability(
@@ -467,6 +465,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
DenseMap<BasicBlock *, bool> VisitedMap;
DFS.push_back(CurrEntry);
VisitedMap[CurrEntry] = true;
+
// Use Depth First Search on the basic blocks to find CFG edges that are
// considered cold.
// Cold regions considered must also have its inline cost compared to the
@@ -474,88 +473,98 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
// if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
// more.
while (!DFS.empty()) {
- auto *thisBB = DFS.back();
+ auto *ThisBB = DFS.back();
DFS.pop_back();
// Only consider regions with predecessor blocks that are considered
// not-cold (default: part of the top 99.99% of all block counters)
// AND greater than our minimum block execution count (default: 100).
- if (PSI.isColdBlock(thisBB, BFI) ||
- BBProfileCount(thisBB) < MinBlockCounterExecution)
+ if (PSI.isColdBlock(ThisBB, BFI) ||
+ BBProfileCount(ThisBB) < MinBlockCounterExecution)
continue;
- for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
+ for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
if (VisitedMap[*SI])
continue;
VisitedMap[*SI] = true;
DFS.push_back(*SI);
// If branch isn't cold, we skip to the next one.
- BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
+ BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
if (SuccProb > MinBranchProbability)
continue;
-#ifndef NDEBUG
- if (TracePartialInlining) {
- dbgs() << "Found cold edge: " << thisBB->getName() << "->"
- << (*SI)->getName() << "\nBranch Probability = " << SuccProb
- << "\n";
- }
-#endif
+
+ LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
+ << SI->getName()
+ << "\nBranch Probability = " << SuccProb << "\n";);
+
SmallVector<BasicBlock *, 8> DominateVector;
DT.getDescendants(*SI, DominateVector);
+ assert(!DominateVector.empty() &&
+ "SI should be reachable and have at least itself as descendant");
+
// We can only outline single entry regions (for now).
- if (!IsSingleEntry(DominateVector))
+ if (!DominateVector.front()->hasNPredecessors(1)) {
+ LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
+ << " doesn't have a single predecessor in the "
+ "dominator tree\n";);
continue;
+ }
+
BasicBlock *ExitBlock = nullptr;
// We can only outline single exit regions (for now).
- if (!(ExitBlock = IsSingleExit(DominateVector)))
+ if (!(ExitBlock = IsSingleExit(DominateVector))) {
+ LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
+ << " doesn't have a unique successor\n";);
continue;
+ }
+
int OutlineRegionCost = 0;
for (auto *BB : DominateVector)
- OutlineRegionCost += computeBBInlineCost(BB);
+ OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
-#ifndef NDEBUG
- if (TracePartialInlining)
- dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
-#endif
+ LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
+ << "\n";);
- if (OutlineRegionCost < MinOutlineRegionCost) {
+ if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
ORE.emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
&SI->front())
- << ore::NV("Callee", F) << " inline cost-savings smaller than "
+ << ore::NV("Callee", &F)
+ << " inline cost-savings smaller than "
<< ore::NV("Cost", MinOutlineRegionCost);
});
+
+ LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
+ << MinOutlineRegionCost << "\n";);
continue;
}
+
// For now, ignore blocks that belong to a SISE region that is a
// candidate for outlining. In the future, we may want to look
// at inner regions because the outer region may have live-exit
// variables.
for (auto *BB : DominateVector)
VisitedMap[BB] = true;
+
// ReturnBlock here means the block after the outline call
BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
- // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
OutliningInfo->ORI.push_back(RegInfo);
-#ifndef NDEBUG
- if (TracePartialInlining) {
- dbgs() << "Found Cold Candidate starting at block: "
- << DominateVector.front()->getName() << "\n";
- }
-#endif
+ LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
+ << DominateVector.front()->getName() << "\n";);
ColdCandidateFound = true;
NumColdRegionsFound++;
}
}
+
if (ColdCandidateFound)
return OutliningInfo;
- else
- return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
+
+ return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
}
std::unique_ptr<FunctionOutliningInfo>
-PartialInlinerImpl::computeOutliningInfo(Function *F) {
- BasicBlock *EntryBlock = &F->front();
+PartialInlinerImpl::computeOutliningInfo(Function &F) const {
+ BasicBlock *EntryBlock = &F.front();
BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
if (!BR || BR->isUnconditional())
return std::unique_ptr<FunctionOutliningInfo>();
@@ -598,7 +607,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
// The number of blocks to be inlined has already reached
// the limit. When MaxNumInlineBlocks is set to 0 or 1, this
// disables partial inlining for the function.
- if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
+ if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
break;
if (succ_size(CurrEntry) != 2)
@@ -618,8 +627,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
break;
}
- BasicBlock *CommSucc;
- BasicBlock *OtherSucc;
+ BasicBlock *CommSucc, *OtherSucc;
std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
if (!CommSucc)
@@ -635,7 +643,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
// Do sanity check of the entries: threre should not
// be any successors (not in the entry set) other than
// {ReturnBlock, NonReturnBlock}
- assert(OutliningInfo->Entries[0] == &F->front() &&
+ assert(OutliningInfo->Entries[0] == &F.front() &&
"Function Entry must be the first in Entries vector");
DenseSet<BasicBlock *> Entries;
for (BasicBlock *E : OutliningInfo->Entries)
@@ -644,7 +652,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
// Returns true of BB has Predecessor which is not
// in Entries set.
auto HasNonEntryPred = [Entries](BasicBlock *BB) {
- for (auto Pred : predecessors(BB)) {
+ for (auto *Pred : predecessors(BB)) {
if (!Entries.count(Pred))
return true;
}
@@ -653,7 +661,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
auto CheckAndNormalizeCandidate =
[Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
for (BasicBlock *E : OutliningInfo->Entries) {
- for (auto Succ : successors(E)) {
+ for (auto *Succ : successors(E)) {
if (Entries.count(Succ))
continue;
if (Succ == OutliningInfo->ReturnBlock)
@@ -673,7 +681,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
// Now further growing the candidate's inlining region by
// peeling off dominating blocks from the outlining region:
- while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
+ while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
BasicBlock *Cand = OutliningInfo->NonReturnBlock;
if (succ_size(Cand) != 2)
break;
@@ -703,11 +711,11 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
}
// Check if there is PGO data or user annotated branch data:
-static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
- if (F->hasProfileData())
+static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
+ if (F.hasProfileData())
return true;
// Now check if any of the entry block has MD_prof data:
- for (auto *E : OI->Entries) {
+ for (auto *E : OI.Entries) {
BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
if (!BR || BR->isUnconditional())
continue;
@@ -718,8 +726,8 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
return false;
}
-BranchProbability
-PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
+BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
+ FunctionCloner &Cloner) const {
BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
auto EntryFreq =
Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
@@ -728,13 +736,13 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
// FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
// we outlined any regions, so we may encounter situations where the
// OutliningCallFreq is *slightly* bigger than the EntryFreq.
- if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
+ if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
OutliningCallFreq = EntryFreq;
- }
+
auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
- if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
+ if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get()))
return OutlineRegionRelFreq;
// When profile data is not available, we need to be conservative in
@@ -760,7 +768,7 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
bool PartialInlinerImpl::shouldPartialInline(
CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
- OptimizationRemarkEmitter &ORE) {
+ OptimizationRemarkEmitter &ORE) const {
using namespace ore;
Function *Callee = CB.getCalledFunction();
@@ -843,7 +851,8 @@ bool PartialInlinerImpl::shouldPartialInline(
// TODO: Ideally we should share Inliner's InlineCost Analysis code.
// For now use a simplified version. The returned 'InlineCost' will be used
// to esimate the size cost as well as runtime cost of the BB.
-int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
+int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
+ TargetTransformInfo *TTI) {
int InlineCost = 0;
const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
for (Instruction &I : BB->instructionsWithoutDebug()) {
@@ -866,6 +875,21 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
if (I.isLifetimeStartOrEnd())
continue;
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ Intrinsic::ID IID = II->getIntrinsicID();
+ SmallVector<Type *, 4> Tys;
+ FastMathFlags FMF;
+ for (Value *Val : II->args())
+ Tys.push_back(Val->getType());
+
+ if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+ FMF = FPMO->getFastMathFlags();
+
+ IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
+ InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
+ continue;
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
InlineCost += getCallsiteCost(*CI, DL);
continue;
@@ -886,18 +910,20 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
}
std::tuple<int, int>
-PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
+PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
for (auto FuncBBPair : Cloner.OutlinedFunctions) {
Function *OutlinedFunc = FuncBBPair.first;
BasicBlock* OutliningCallBB = FuncBBPair.second;
// Now compute the cost of the call sequence to the outlined function
// 'OutlinedFunction' in BB 'OutliningCallBB':
- OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
+ auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
+ OutliningFuncCallCost +=
+ computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
// Now compute the cost of the extracted/outlined function itself:
for (BasicBlock &BB : *OutlinedFunc)
- OutlinedFunctionCost += computeBBInlineCost(&BB);
+ OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
}
assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
@@ -921,7 +947,7 @@ PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
// after the function is partially inlined into the callsite.
void PartialInlinerImpl::computeCallsiteToProfCountMap(
Function *DuplicateFunction,
- DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
+ DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
std::vector<User *> Users(DuplicateFunction->user_begin(),
DuplicateFunction->user_end());
Function *CurrentCaller = nullptr;
@@ -962,8 +988,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
- function_ref<AssumptionCache *(Function &)> LookupAC)
- : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
+ function_ref<AssumptionCache *(Function &)> LookupAC,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI)
+ : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
ClonedOI = std::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
@@ -972,9 +999,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
- for (BasicBlock *BB : OI->Entries) {
+ for (BasicBlock *BB : OI->Entries)
ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
- }
+
for (BasicBlock *E : OI->ReturnBlockPreds) {
BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
ClonedOI->ReturnBlockPreds.push_back(NewE);
@@ -987,8 +1014,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningMultiRegionInfo *OI,
OptimizationRemarkEmitter &ORE,
- function_ref<AssumptionCache *(Function &)> LookupAC)
- : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
+ function_ref<AssumptionCache *(Function &)> LookupAC,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI)
+ : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
// Clone the function, so that we can hack away on it.
@@ -1000,9 +1028,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
OI->ORI) {
SmallVector<BasicBlock *, 8> Region;
- for (BasicBlock *BB : RegionInfo.Region) {
+ for (BasicBlock *BB : RegionInfo.Region)
Region.push_back(cast<BasicBlock>(VMap[BB]));
- }
+
BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
BasicBlock *NewReturnBlock = nullptr;
@@ -1017,8 +1045,8 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
F->replaceAllUsesWith(ClonedFunc);
}
-void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
- auto getFirstPHI = [](BasicBlock *BB) {
+void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
+ auto GetFirstPHI = [](BasicBlock *BB) {
BasicBlock::iterator I = BB->begin();
PHINode *FirstPhi = nullptr;
while (I != BB->end()) {
@@ -1044,7 +1072,7 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
// of which will go outside.
BasicBlock *PreReturn = ClonedOI->ReturnBlock;
// only split block when necessary:
- PHINode *FirstPhi = getFirstPHI(PreReturn);
+ PHINode *FirstPhi = GetFirstPHI(PreReturn);
unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
@@ -1092,17 +1120,16 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
for (auto *DP : DeadPhis)
DP->eraseFromParent();
- for (auto E : ClonedOI->ReturnBlockPreds) {
+ for (auto *E : ClonedOI->ReturnBlockPreds)
E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
- }
}
bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
- auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
+ auto ComputeRegionCost = [&](SmallVectorImpl<BasicBlock *> &Region) {
int Cost = 0;
for (BasicBlock* BB : Region)
- Cost += computeBBInlineCost(BB);
+ Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
return Cost;
};
@@ -1135,24 +1162,21 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
CE.findInputsOutputs(Inputs, Outputs, Sinks);
-#ifndef NDEBUG
- if (TracePartialInlining) {
+ LLVM_DEBUG({
dbgs() << "inputs: " << Inputs.size() << "\n";
dbgs() << "outputs: " << Outputs.size() << "\n";
for (Value *value : Inputs)
dbgs() << "value used in func: " << *value << "\n";
for (Value *output : Outputs)
dbgs() << "instr used in func: " << *output << "\n";
- }
-#endif
+ });
+
// Do not extract regions that have live exit variables.
if (Outputs.size() > 0 && !ForceLiveExit)
continue;
- Function *OutlinedFunc = CE.extractCodeRegion(CEAC);
-
- if (OutlinedFunc) {
- CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
+ if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
+ CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
BasicBlock *OutliningCallBB = OCS->getParent();
assert(OutliningCallBB->getParent() == ClonedFunc);
OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
@@ -1181,8 +1205,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
// (i.e. not to be extracted to the out of line function)
auto ToBeInlined = [&, this](BasicBlock *BB) {
return BB == ClonedOI->ReturnBlock ||
- (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
- ClonedOI->Entries.end());
+ llvm::is_contained(ClonedOI->Entries, BB);
};
assert(ClonedOI && "Expecting OutlineInfo for single region outline");
@@ -1197,9 +1220,10 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
// Gather up the blocks that we're going to extract.
std::vector<BasicBlock *> ToExtract;
+ auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
ToExtract.push_back(ClonedOI->NonReturnBlock);
- OutlinedRegionCost +=
- PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
+ OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
+ ClonedOI->NonReturnBlock, ClonedFuncTTI);
for (BasicBlock &BB : *ClonedFunc)
if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
ToExtract.push_back(&BB);
@@ -1207,7 +1231,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
// into the outlined function which may make the outlining
// overhead (the difference of the outlined function cost
// and OutliningRegionCost) look larger.
- OutlinedRegionCost += computeBBInlineCost(&BB);
+ OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
}
// Extract the body of the if.
@@ -1220,8 +1244,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
if (OutlinedFunc) {
BasicBlock *OutliningCallBB =
- PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
- ->getParent();
+ PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
assert(OutliningCallBB->getParent() == ClonedFunc);
OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
} else
@@ -1250,52 +1273,48 @@ PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
}
}
-std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
-
- if (F->hasAddressTaken())
+std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
+ if (F.hasAddressTaken())
return {false, nullptr};
// Let inliner handle it
- if (F->hasFnAttribute(Attribute::AlwaysInline))
+ if (F.hasFnAttribute(Attribute::AlwaysInline))
return {false, nullptr};
- if (F->hasFnAttribute(Attribute::NoInline))
+ if (F.hasFnAttribute(Attribute::NoInline))
return {false, nullptr};
- if (PSI.isFunctionEntryCold(F))
+ if (PSI.isFunctionEntryCold(&F))
return {false, nullptr};
- if (F->users().empty())
+ if (F.users().empty())
return {false, nullptr};
- OptimizationRemarkEmitter ORE(F);
+ OptimizationRemarkEmitter ORE(&F);
// Only try to outline cold regions if we have a profile summary, which
// implies we have profiling information.
- if (PSI.hasProfileSummary() && F->hasProfileData() &&
+ if (PSI.hasProfileSummary() && F.hasProfileData() &&
!DisableMultiRegionPartialInline) {
std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
computeOutliningColdRegionsInfo(F, ORE);
if (OMRI) {
- FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache);
+ FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
-#ifndef NDEBUG
- if (TracePartialInlining) {
+ LLVM_DEBUG({
dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
<< "\n";
- }
-#endif
+ });
+
bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
if (DidOutline) {
-#ifndef NDEBUG
- if (TracePartialInlining) {
+ LLVM_DEBUG({
dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
Cloner.ClonedFunc->print(dbgs());
dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
- }
-#endif
+ });
if (tryPartialInline(Cloner))
return {true, nullptr};
@@ -1310,17 +1329,15 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (!OI)
return {false, nullptr};
- FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache);
- Cloner.NormalizeReturnBlock();
+ FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
+ Cloner.normalizeReturnBlock();
Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
if (!OutlinedFunction)
return {false, nullptr};
- bool AnyInline = tryPartialInline(Cloner);
-
- if (AnyInline)
+ if (tryPartialInline(Cloner))
return {true, OutlinedFunction};
return {false, nullptr};
@@ -1338,9 +1355,9 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
// Only calculate RelativeToEntryFreq when we are doing single region
// outlining.
BranchProbability RelativeToEntryFreq;
- if (Cloner.ClonedOI) {
+ if (Cloner.ClonedOI)
RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
- } else
+ else
// RelativeToEntryFreq doesn't make sense when we have more than one
// outlined call because each call will have a different relative frequency
// to the entry block. We can consider using the average, but the
@@ -1358,7 +1375,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
DebugLoc DLoc;
BasicBlock *Block;
- std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
+ std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
OrigFuncORE.emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
DLoc, Block)
@@ -1389,7 +1406,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
for (User *User : Users) {
CallBase *CB = getSupportedCallBase(User);
- if (IsLimitReached())
+ if (isLimitReached())
continue;
OptimizationRemarkEmitter CallerORE(CB->getCaller());
@@ -1426,7 +1443,6 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
NumPartialInlined++;
else
NumColdOutlinePartialInlined++;
-
}
if (AnyInline) {
@@ -1439,7 +1455,6 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
<< "Partially inlined into at least one caller";
});
-
}
return AnyInline;
@@ -1473,7 +1488,7 @@ bool PartialInlinerImpl::run(Module &M) {
if (Recursive)
continue;
- std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
+ std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
if (Result.second)
Worklist.push_back(Result.second);
Changed |= Result.first;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index d73d42c52074..068328391dff 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -51,16 +51,16 @@
using namespace llvm;
-static cl::opt<bool>
- RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
- cl::ZeroOrMore, cl::desc("Run Partial inlinining pass"));
+cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Run Partial inlinining pass"));
static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
-static cl::opt<bool> ExtraVectorizerPasses(
+cl::opt<bool> ExtraVectorizerPasses(
"extra-vectorizer-passes", cl::init(false), cl::Hidden,
cl::desc("Run cleanup optimization passes after vectorization."));
@@ -68,29 +68,33 @@ static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
-static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
- cl::desc("Run the NewGVN pass"));
+cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
+ cl::desc("Run the NewGVN pass"));
// Experimental option to use CFL-AA
enum class CFLAAType { None, Steensgaard, Andersen, Both };
-static cl::opt<CFLAAType>
- UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden,
+static cl::opt<::CFLAAType>
+ UseCFLAA("use-cfl-aa", cl::init(::CFLAAType::None), cl::Hidden,
cl::desc("Enable the new, experimental CFL alias analysis"),
- cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"),
- clEnumValN(CFLAAType::Steensgaard, "steens",
+ cl::values(clEnumValN(::CFLAAType::None, "none", "Disable CFL-AA"),
+ clEnumValN(::CFLAAType::Steensgaard, "steens",
"Enable unification-based CFL-AA"),
- clEnumValN(CFLAAType::Andersen, "anders",
+ clEnumValN(::CFLAAType::Andersen, "anders",
"Enable inclusion-based CFL-AA"),
- clEnumValN(CFLAAType::Both, "both",
+ clEnumValN(::CFLAAType::Both, "both",
"Enable both variants of CFL-AA")));
static cl::opt<bool> EnableLoopInterchange(
"enable-loopinterchange", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopInterchange Pass"));
-static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
- cl::init(false), cl::Hidden,
- cl::desc("Enable Unroll And Jam Pass"));
+cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable Unroll And Jam Pass"));
+
+cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable the LoopFlatten Pass"));
static cl::opt<bool>
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
@@ -103,22 +107,25 @@ static cl::opt<bool>
cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false),
cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass"));
+cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden,
+ cl::desc("Enable ir outliner pass"));
+
static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
-static cl::opt<bool>
+cl::opt<bool>
DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
cl::desc("Disable pre-instrumentation inliner"));
-static cl::opt<int> PreInlineThreshold(
+cl::opt<int> PreInlineThreshold(
"preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
-static cl::opt<bool> EnableGVNHoist(
- "enable-gvn-hoist", cl::init(false), cl::ZeroOrMore,
- cl::desc("Enable the GVN hoisting pass (default = off)"));
+cl::opt<bool>
+ EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Enable the GVN hoisting pass (default = off)"));
static cl::opt<bool>
DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
@@ -130,13 +137,13 @@ static cl::opt<bool> EnableSimpleLoopUnswitch(
cl::desc("Enable the simple loop unswitch pass. Also enables independent "
"cleanup passes integrated into the loop pass manager pipeline."));
-static cl::opt<bool> EnableGVNSink(
- "enable-gvn-sink", cl::init(false), cl::ZeroOrMore,
- cl::desc("Enable the GVN sinking pass (default = off)"));
+cl::opt<bool>
+ EnableGVNSink("enable-gvn-sink", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Enable the GVN sinking pass (default = off)"));
// This option is used in simplifying testing SampleFDO optimizations for
// profile loading.
-static cl::opt<bool>
+cl::opt<bool>
EnableCHR("enable-chr", cl::init(true), cl::Hidden,
cl::desc("Enable control height reduction optimization (CHR)"));
@@ -149,9 +156,14 @@ cl::opt<bool> EnableOrderFileInstrumentation(
"enable-order-file-instrumentation", cl::init(false), cl::Hidden,
cl::desc("Enable order file instrumentation (default = off)"));
-static cl::opt<bool>
- EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
- cl::desc("Enable lowering of the matrix intrinsics"));
+cl::opt<bool> EnableMatrix(
+ "enable-matrix", cl::init(false), cl::Hidden,
+ cl::desc("Enable lowering of the matrix intrinsics"));
+
+cl::opt<bool> EnableConstraintElimination(
+ "enable-constraint-elimination", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Enable pass to eliminate conditions based on linear constraints."));
cl::opt<AttributorRunOption> AttributorRun(
"attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
@@ -264,13 +276,13 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
void PassManagerBuilder::addInitialAliasAnalysisPasses(
legacy::PassManagerBase &PM) const {
switch (UseCFLAA) {
- case CFLAAType::Steensgaard:
+ case ::CFLAAType::Steensgaard:
PM.add(createCFLSteensAAWrapperPass());
break;
- case CFLAAType::Andersen:
+ case ::CFLAAType::Andersen:
PM.add(createCFLAndersAAWrapperPass());
break;
- case CFLAAType::Both:
+ case ::CFLAAType::Both:
PM.add(createCFLSteensAAWrapperPass());
PM.add(createCFLAndersAAWrapperPass());
break;
@@ -294,6 +306,13 @@ void PassManagerBuilder::populateFunctionPassManager(
if (LibraryInfo)
FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
+ // The backends do not handle matrix intrinsics currently.
+ // Make sure they are also lowered in O0.
+ // FIXME: A lightweight version of the pass should run in the backend
+ // pipeline on demand.
+ if (EnableMatrix && OptLevel == 0)
+ FPM.add(createLowerMatrixIntrinsicsMinimalPass());
+
if (OptLevel == 0) return;
addInitialAliasAnalysisPasses(FPM);
@@ -314,19 +333,21 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
return;
// Perform the preinline and cleanup passes for O1 and above.
- // And avoid doing them if optimizing for size.
// We will not do this inline for context sensitive PGO (when IsCS is true).
- if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner &&
- PGOSampleUse.empty() && !IsCS) {
+ if (OptLevel > 0 && !DisablePreInliner && PGOSampleUse.empty() && !IsCS) {
// Create preinline pass. We construct an InlineParams object and specify
// the threshold here to avoid the command line options of the regular
// inliner to influence pre-inlining. The only fields of InlineParams we
// care about are DefaultThreshold and HintThreshold.
InlineParams IP;
IP.DefaultThreshold = PreInlineThreshold;
- // FIXME: The hint threshold has the same value used by the regular inliner.
- // This should probably be lowered after performance testing.
- IP.HintThreshold = 325;
+ // FIXME: The hint threshold has the same value used by the regular inliner
+ // when not optimzing for size. This should probably be lowered after
+ // performance testing.
+ // Use PreInlineThreshold for both -Os and -Oz. Not running preinliner makes
+ // the instrumented binary unusably large. Even if PreInlineThreshold is not
+ // correct thresold for -Oz, it is better than not running preinliner.
+ IP.HintThreshold = SizeLevel > 0 ? PreInlineThreshold : 325;
MPM.add(createFunctionInliningPass(IP));
MPM.add(createSROAPass());
@@ -374,6 +395,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
}
}
+ if (EnableConstraintElimination)
+ MPM.add(createConstraintEliminationPass());
+
if (OptLevel > 1) {
// Speculative execution if the target has divergent branches; otherwise nop.
MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
@@ -409,7 +433,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLoopSimplifyCFGPass());
}
// Rotate Loop - disable header duplication at -Oz
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
// TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
if (EnableSimpleLoopUnswitch)
@@ -422,20 +446,27 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
// We resume loop passes creating a second loop pipeline here.
- MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
+ if (EnableLoopFlatten) {
+ MPM.add(createLoopFlattenPass()); // Flatten loops
+ MPM.add(createLoopSimplifyCFGPass());
+ }
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
+ MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
addExtensionsToPM(EP_LateLoopOptimizations, MPM);
MPM.add(createLoopDeletionPass()); // Delete dead loops
if (EnableLoopInterchange)
MPM.add(createLoopInterchangePass()); // Interchange loops
- // Unroll small loops
+ // Unroll small loops and perform peeling.
MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.
+ // Break up allocas that may now be splittable after loop unrolling.
+ MPM.add(createSROAPass());
+
if (OptLevel > 1) {
MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
MPM.add(NewGVN ? createNewGVNPass()
@@ -444,6 +475,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
+ if (EnableConstraintElimination)
+ MPM.add(createConstraintEliminationPass());
+
// Delete dead bit computations (instcombine runs after to fold away the dead
// computations, and then ADCE will run later to exploit any new DCE
// opportunities that creates).
@@ -456,6 +490,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (OptLevel > 1) {
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
+ }
+ MPM.add(createAggressiveDCEPass()); // Delete dead instructions
+
+ // TODO: Investigate if this is too expensive at O1.
+ if (OptLevel > 1) {
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
}
@@ -465,8 +504,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (RerollLoops)
MPM.add(createLoopRerollPass());
- // TODO: Investigate if this is too expensive at O1.
- MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
// Clean up after everything.
MPM.add(createInstructionCombiningPass());
@@ -483,6 +520,8 @@ void PassManagerBuilder::populateModulePassManager(
// is handled separately, so just check this is not the ThinLTO post-link.
bool DefaultOrPreLinkPipeline = !PerformThinLTO;
+ MPM.add(createAnnotation2MetadataLegacyPass());
+
if (!PGOSampleUse.empty()) {
MPM.add(createPruneEHPass());
// In ThinLTO mode, when flattened profile is used, all the available
@@ -533,6 +572,8 @@ void PassManagerBuilder::populateModulePassManager(
// new unnamed globals.
MPM.add(createNameAnonGlobalPass());
}
+
+ MPM.add(createAnnotationRemarksLegacyPass());
return;
}
@@ -736,7 +777,7 @@ void PassManagerBuilder::populateModulePassManager(
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form. Disable header duplication at -Oz.
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
@@ -777,7 +818,14 @@ void PassManagerBuilder::populateModulePassManager(
// convert to more optimized IR using more aggressive simplify CFG options.
// The extra sinking transform can create larger basic blocks, so do this
// before SLP vectorization.
- MPM.add(createCFGSimplificationPass(1, true, true, false, true));
+ // FIXME: study whether hoisting and/or sinking of common instructions should
+ // be delayed until after SLP vectorizer.
+ MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
if (SLPVectorize) {
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
@@ -835,6 +883,9 @@ void PassManagerBuilder::populateModulePassManager(
if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
MPM.add(createHotColdSplittingPass());
+ if (EnableIROutliner)
+ MPM.add(createIROutlinerPass());
+
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
@@ -866,6 +917,8 @@ void PassManagerBuilder::populateModulePassManager(
// Rename anon globals to be able to handle them in the summary
MPM.add(createNameAnonGlobalPass());
}
+
+ MPM.add(createAnnotationRemarksLegacyPass());
}
void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
@@ -984,7 +1037,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// The IPO passes may leave cruft around. Clean up after them.
PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass());
+ PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
// Break up allocas
PM.add(createSROAPass());
@@ -1000,23 +1053,29 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());
+ PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
// More loops are countable; try to optimize them.
+ if (EnableLoopFlatten)
+ PM.add(createLoopFlattenPass());
PM.add(createIndVarSimplifyPass());
PM.add(createLoopDeletionPass());
if (EnableLoopInterchange)
PM.add(createLoopInterchangePass());
- // Unroll small loops
+ if (EnableConstraintElimination)
+ PM.add(createConstraintEliminationPass());
+
+ // Unroll small loops and perform peeling.
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
+ PM.add(createLoopDistributePass());
PM.add(createLoopVectorizePass(true, !LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
@@ -1028,7 +1087,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// we may have exposed more scalar opportunities. Run parts of the scalar
// optimizer again at this point.
PM.add(createInstructionCombiningPass()); // Initial cleanup
- PM.add(createCFGSimplificationPass()); // if-convert
+ PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
+ .hoistCommonInsts(true)));
PM.add(createSCCPPass()); // Propagate exposed constants
PM.add(createInstructionCombiningPass()); // Clean up again
PM.add(createBitTrackingDCEPass());
@@ -1047,7 +1107,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass());
+ PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
}
void PassManagerBuilder::addLateLTOOptimizationPasses(
@@ -1058,7 +1118,8 @@ void PassManagerBuilder::addLateLTOOptimizationPasses(
PM.add(createHotColdSplittingPass());
// Delete basic blocks, which optimization passes may have killed.
- PM.add(createCFGSimplificationPass());
+ PM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().hoistCommonInsts(true)));
// Drop bodies of available externally objects to improve GlobalDCE.
PM.add(createEliminateAvailableExternallyPass());
@@ -1140,6 +1201,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM);
+ PM.add(createAnnotationRemarksLegacyPass());
+
if (VerifyOutput)
PM.add(createVerifierPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp
index a16dc664db64..3f3b18771cd5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
@@ -27,8 +28,10 @@
#include "llvm/InitializePasses.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
+
using namespace llvm;
#define DEBUG_TYPE "prune-eh"
@@ -45,11 +48,10 @@ namespace {
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC) override;
-
};
}
-static bool SimplifyFunction(Function *F, CallGraph &CG);
-static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG);
+static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU);
+static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU);
char PruneEH::ID = 0;
INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
@@ -60,20 +62,17 @@ INITIALIZE_PASS_END(PruneEH, "prune-eh",
Pass *llvm::createPruneEHPass() { return new PruneEH(); }
-static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
- SmallPtrSet<CallGraphNode *, 8> SCCNodes;
+static bool runImpl(CallGraphUpdater &CGU, SetVector<Function *> &Functions) {
+#ifndef NDEBUG
+ for (auto *F : Functions)
+ assert(F && "null Function");
+#endif
bool MadeChange = false;
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphNode *I : SCC)
- SCCNodes.insert(I);
-
// First pass, scan all of the functions in the SCC, simplifying them
// according to what we know.
- for (CallGraphNode *I : SCC)
- if (Function *F = I->getFunction())
- MadeChange |= SimplifyFunction(F, CG);
+ for (Function *F : Functions)
+ MadeChange |= SimplifyFunction(F, CGU);
// Next, check to see if any callees might throw or if there are any external
// functions in this SCC: if so, we cannot prune any functions in this SCC.
@@ -83,13 +82,8 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
// obviously the SCC might throw.
//
bool SCCMightUnwind = false, SCCMightReturn = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
- (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
- Function *F = (*I)->getFunction();
- if (!F) {
- SCCMightUnwind = true;
- SCCMightReturn = true;
- } else if (!F->hasExactDefinition()) {
+ for (Function *F : Functions) {
+ if (!F->hasExactDefinition()) {
SCCMightUnwind |= !F->doesNotThrow();
SCCMightReturn |= !F->doesNotReturn();
} else {
@@ -125,10 +119,9 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
bool InstMightUnwind = true;
if (const auto *CI = dyn_cast<CallInst>(&I)) {
if (Function *Callee = CI->getCalledFunction()) {
- CallGraphNode *CalleeNode = CG[Callee];
// If the callee is outside our current SCC then we may throw
// because it might. If it is inside, do nothing.
- if (SCCNodes.count(CalleeNode) > 0)
+ if (Functions.contains(Callee))
InstMightUnwind = false;
}
}
@@ -140,18 +133,15 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
if (IA->hasSideEffects())
SCCMightReturn = true;
}
-
+ }
if (SCCMightUnwind && SCCMightReturn)
break;
- }
}
}
// If the SCC doesn't unwind or doesn't throw, note this fact.
if (!SCCMightUnwind || !SCCMightReturn)
- for (CallGraphNode *I : SCC) {
- Function *F = I->getFunction();
-
+ for (Function *F : Functions) {
if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) {
F->addFnAttr(Attribute::NoUnwind);
MadeChange = true;
@@ -163,30 +153,35 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
}
}
- for (CallGraphNode *I : SCC) {
+ for (Function *F : Functions) {
// Convert any invoke instructions to non-throwing functions in this node
// into call instructions with a branch. This makes the exception blocks
// dead.
- if (Function *F = I->getFunction())
- MadeChange |= SimplifyFunction(F, CG);
+ MadeChange |= SimplifyFunction(F, CGU);
}
return MadeChange;
}
-
bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
if (skipSCC(SCC))
return false;
+ SetVector<Function *> Functions;
+ for (auto &N : SCC) {
+ if (auto *F = N->getFunction())
+ Functions.insert(F);
+ }
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- return runImpl(SCC, CG);
+ CallGraphUpdater CGU;
+ CGU.initialize(CG, SCC);
+ return runImpl(CGU, Functions);
}
// SimplifyFunction - Given information about callees, simplify the specified
// function if we have invokes to non-unwinding functions or code after calls to
// no-return functions.
-static bool SimplifyFunction(Function *F, CallGraph &CG) {
+static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU) {
bool MadeChange = false;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
@@ -196,7 +191,7 @@ static bool SimplifyFunction(Function *F, CallGraph &CG) {
// If the unwind block is now dead, nuke it.
if (pred_empty(UnwindBlock))
- DeleteBasicBlock(UnwindBlock, CG); // Delete the new BB.
+ DeleteBasicBlock(UnwindBlock, CGU); // Delete the new BB.
++NumRemoved;
MadeChange = true;
@@ -216,7 +211,7 @@ static bool SimplifyFunction(Function *F, CallGraph &CG) {
BB->getInstList().pop_back();
new UnreachableInst(BB->getContext(), &*BB);
- DeleteBasicBlock(New, CG); // Delete the new BB.
+ DeleteBasicBlock(New, CGU); // Delete the new BB.
MadeChange = true;
++NumUnreach;
break;
@@ -229,12 +224,11 @@ static bool SimplifyFunction(Function *F, CallGraph &CG) {
/// DeleteBasicBlock - remove the specified basic block from the program,
/// updating the callgraph to reflect any now-obsolete edges due to calls that
/// exist in the BB.
-static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) {
+static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU) {
assert(pred_empty(BB) && "BB is not dead!");
Instruction *TokenInst = nullptr;
- CallGraphNode *CGN = CG[BB->getParent()];
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
--I;
@@ -246,9 +240,9 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) {
if (auto *Call = dyn_cast<CallBase>(&*I)) {
const Function *Callee = Call->getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
- CGN->removeCallEdgeFor(*Call);
+ CGU.removeCallSite(*Call);
else if (!Callee->isIntrinsic())
- CGN->removeCallEdgeFor(*Call);
+ CGU.removeCallSite(*Call);
}
if (!I->use_empty())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
new file mode 100644
index 000000000000..158fa0771c3b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -0,0 +1,585 @@
+//===- SampleContextTracker.cpp - Context-sensitive Profile Tracker -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleContextTracker used by CSSPGO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/SampleContextTracker.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include <map>
+#include <queue>
+#include <vector>
+
+using namespace llvm;
+using namespace sampleprof;
+
+#define DEBUG_TYPE "sample-context-tracker"
+
+namespace llvm {
+
+ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
+ StringRef CalleeName) {
+ if (CalleeName.empty())
+ return getHottestChildContext(CallSite);
+
+ uint32_t Hash = nodeHash(CalleeName, CallSite);
+ auto It = AllChildContext.find(Hash);
+ if (It != AllChildContext.end())
+ return &It->second;
+ return nullptr;
+}
+
+ContextTrieNode *
+ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
+ // CSFDO-TODO: This could be slow, change AllChildContext so we can
+ // do point look up for child node by call site alone.
+ // Retrieve the child node with max count for indirect call
+ ContextTrieNode *ChildNodeRet = nullptr;
+ uint64_t MaxCalleeSamples = 0;
+ for (auto &It : AllChildContext) {
+ ContextTrieNode &ChildNode = It.second;
+ if (ChildNode.CallSiteLoc != CallSite)
+ continue;
+ FunctionSamples *Samples = ChildNode.getFunctionSamples();
+ if (!Samples)
+ continue;
+ if (Samples->getTotalSamples() > MaxCalleeSamples) {
+ ChildNodeRet = &ChildNode;
+ MaxCalleeSamples = Samples->getTotalSamples();
+ }
+ }
+
+ return ChildNodeRet;
+}
+
+ContextTrieNode &ContextTrieNode::moveToChildContext(
+ const LineLocation &CallSite, ContextTrieNode &&NodeToMove,
+ StringRef ContextStrToRemove, bool DeleteNode) {
+ uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
+ assert(!AllChildContext.count(Hash) && "Node to remove must exist");
+ LineLocation OldCallSite = NodeToMove.CallSiteLoc;
+ ContextTrieNode &OldParentContext = *NodeToMove.getParentContext();
+ AllChildContext[Hash] = NodeToMove;
+ ContextTrieNode &NewNode = AllChildContext[Hash];
+ NewNode.CallSiteLoc = CallSite;
+
+ // Walk through nodes in the moved the subtree, and update
+ // FunctionSamples' context as for the context promotion.
+ // We also need to set new parant link for all children.
+ std::queue<ContextTrieNode *> NodeToUpdate;
+ NewNode.setParentContext(this);
+ NodeToUpdate.push(&NewNode);
+
+ while (!NodeToUpdate.empty()) {
+ ContextTrieNode *Node = NodeToUpdate.front();
+ NodeToUpdate.pop();
+ FunctionSamples *FSamples = Node->getFunctionSamples();
+
+ if (FSamples) {
+ FSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FSamples->getContext().setState(SyntheticContext);
+ LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext()
+ << "\n");
+ }
+
+ for (auto &It : Node->getAllChildContext()) {
+ ContextTrieNode *ChildNode = &It.second;
+ ChildNode->setParentContext(Node);
+ NodeToUpdate.push(ChildNode);
+ }
+ }
+
+ // Original context no longer needed, destroy if requested.
+ if (DeleteNode)
+ OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName());
+
+ return NewNode;
+}
+
+void ContextTrieNode::removeChildContext(const LineLocation &CallSite,
+ StringRef CalleeName) {
+ uint32_t Hash = nodeHash(CalleeName, CallSite);
+ // Note this essentially calls dtor and destroys that child context
+ AllChildContext.erase(Hash);
+}
+
+std::map<uint32_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
+ return AllChildContext;
+}
+
+const StringRef ContextTrieNode::getFuncName() const { return FuncName; }
+
+FunctionSamples *ContextTrieNode::getFunctionSamples() const {
+ return FuncSamples;
+}
+
+void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) {
+ FuncSamples = FSamples;
+}
+
+LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; }
+
+ContextTrieNode *ContextTrieNode::getParentContext() const {
+ return ParentContext;
+}
+
+void ContextTrieNode::setParentContext(ContextTrieNode *Parent) {
+ ParentContext = Parent;
+}
+
+void ContextTrieNode::dump() {
+ dbgs() << "Node: " << FuncName << "\n"
+ << " Callsite: " << CallSiteLoc << "\n"
+ << " Children:\n";
+
+ for (auto &It : AllChildContext) {
+ dbgs() << " Node: " << It.second.getFuncName() << "\n";
+ }
+}
+
+uint32_t ContextTrieNode::nodeHash(StringRef ChildName,
+ const LineLocation &Callsite) {
+ // We still use child's name for child hash, this is
+ // because for children of root node, we don't have
+ // different line/discriminator, and we'll rely on name
+ // to differentiate children.
+ uint32_t NameHash = std::hash<std::string>{}(ChildName.str());
+ uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator;
+ return NameHash + (LocId << 5) + LocId;
+}
+
+ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
+ const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) {
+ uint32_t Hash = nodeHash(CalleeName, CallSite);
+ auto It = AllChildContext.find(Hash);
+ if (It != AllChildContext.end()) {
+ assert(It->second.getFuncName() == CalleeName &&
+ "Hash collision for child context node");
+ return &It->second;
+ }
+
+ if (!AllowCreate)
+ return nullptr;
+
+ AllChildContext[Hash] = ContextTrieNode(this, CalleeName, nullptr, CallSite);
+ return &AllChildContext[Hash];
+}
+
+// Profiler tracker than manages profiles and its associated context
+SampleContextTracker::SampleContextTracker(
+ StringMap<FunctionSamples> &Profiles) {
+ for (auto &FuncSample : Profiles) {
+ FunctionSamples *FSamples = &FuncSample.second;
+ SampleContext Context(FuncSample.first(), RawContext);
+ LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
+ if (!Context.isBaseContext())
+ FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples);
+ ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
+ assert(!NewNode->getFunctionSamples() &&
+ "New node can't have sample profile");
+ NewNode->setFunctionSamples(FSamples);
+ }
+}
+
+FunctionSamples *
+SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
+ StringRef CalleeName) {
+ LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n");
+ DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL)
+ return nullptr;
+
+ // For indirect call, CalleeName will be empty, in which case the context
+ // profile for callee with largest total samples will be returned.
+ ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName);
+ if (CalleeContext) {
+ FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
+ LLVM_DEBUG(if (FSamples) {
+ dbgs() << " Callee context found: " << FSamples->getContext() << "\n";
+ });
+ return FSamples;
+ }
+
+ return nullptr;
+}
+
+std::vector<const FunctionSamples *>
+SampleContextTracker::getIndirectCalleeContextSamplesFor(
+ const DILocation *DIL) {
+ std::vector<const FunctionSamples *> R;
+ if (!DIL)
+ return R;
+
+ ContextTrieNode *CallerNode = getContextFor(DIL);
+ LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ for (auto &It : CallerNode->getAllChildContext()) {
+ ContextTrieNode &ChildNode = It.second;
+ if (ChildNode.getCallSiteLoc() != CallSite)
+ continue;
+ if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples())
+ R.push_back(CalleeSamples);
+ }
+
+ return R;
+}
+
+FunctionSamples *
+SampleContextTracker::getContextSamplesFor(const DILocation *DIL) {
+ assert(DIL && "Expect non-null location");
+
+ ContextTrieNode *ContextNode = getContextFor(DIL);
+ if (!ContextNode)
+ return nullptr;
+
+ // We may have inlined callees during pre-LTO compilation, in which case
+ // we need to rely on the inline stack from !dbg to mark context profile
+ // as inlined, instead of `MarkContextSamplesInlined` during inlining.
+ // Sample profile loader walks through all instructions to get profile,
+ // which calls this function. So once that is done, all previously inlined
+ // context profile should be marked properly.
+ FunctionSamples *Samples = ContextNode->getFunctionSamples();
+ if (Samples && ContextNode->getParentContext() != &RootContext)
+ Samples->getContext().setState(InlinedContext);
+
+ return Samples;
+}
+
+FunctionSamples *
+SampleContextTracker::getContextSamplesFor(const SampleContext &Context) {
+ ContextTrieNode *Node = getContextFor(Context);
+ if (!Node)
+ return nullptr;
+
+ return Node->getFunctionSamples();
+}
+
+SampleContextTracker::ContextSamplesTy &
+SampleContextTracker::getAllContextSamplesFor(const Function &Func) {
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
+ return FuncToCtxtProfileSet[CanonName];
+}
+
+SampleContextTracker::ContextSamplesTy &
+SampleContextTracker::getAllContextSamplesFor(StringRef Name) {
+ return FuncToCtxtProfileSet[Name];
+}
+
+FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
+ bool MergeContext) {
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
+ return getBaseSamplesFor(CanonName, MergeContext);
+}
+
+FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
+ bool MergeContext) {
+ LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n");
+ // Base profile is top-level node (child of root node), so try to retrieve
+ // existing top-level node for given function first. If it exists, it could be
+ // that we've merged base profile before, or there's actually context-less
+ // profile from the input (e.g. due to unreliable stack walking).
+ ContextTrieNode *Node = getTopLevelContextNode(Name);
+ if (MergeContext) {
+ LLVM_DEBUG(dbgs() << " Merging context profile into base profile: " << Name
+ << "\n");
+
+ // We have profile for function under different contexts,
+ // create synthetic base profile and merge context profiles
+ // into base profile.
+ for (auto *CSamples : FuncToCtxtProfileSet[Name]) {
+ SampleContext &Context = CSamples->getContext();
+ ContextTrieNode *FromNode = getContextFor(Context);
+ if (FromNode == Node)
+ continue;
+
+ // Skip inlined context profile and also don't re-merge any context
+ if (Context.hasState(InlinedContext) || Context.hasState(MergedContext))
+ continue;
+
+ ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode);
+ assert((!Node || Node == &ToNode) && "Expect only one base profile");
+ Node = &ToNode;
+ }
+ }
+
+ // Still no profile even after merge/promotion (if allowed)
+ if (!Node)
+ return nullptr;
+
+ return Node->getFunctionSamples();
+}
+
+void SampleContextTracker::markContextSamplesInlined(
+ const FunctionSamples *InlinedSamples) {
+ assert(InlinedSamples && "Expect non-null inlined samples");
+ LLVM_DEBUG(dbgs() << "Marking context profile as inlined: "
+ << InlinedSamples->getContext() << "\n");
+ InlinedSamples->getContext().setState(InlinedContext);
+}
+
+void SampleContextTracker::promoteMergeContextSamplesTree(
+ const Instruction &Inst, StringRef CalleeName) {
+ LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n"
+ << Inst << "\n");
+ // Get the caller context for the call instruction, we don't use callee
+ // name from call because there can be context from indirect calls too.
+ DILocation *DIL = Inst.getDebugLoc();
+ ContextTrieNode *CallerNode = getContextFor(DIL);
+ if (!CallerNode)
+ return;
+
+ // Get the context that needs to be promoted
+ LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ // For indirect call, CalleeName will be empty, in which case we need to
+ // promote all non-inlined child context profiles.
+ if (CalleeName.empty()) {
+ for (auto &It : CallerNode->getAllChildContext()) {
+ ContextTrieNode *NodeToPromo = &It.second;
+ if (CallSite != NodeToPromo->getCallSiteLoc())
+ continue;
+ FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples();
+ if (FromSamples && FromSamples->getContext().hasState(InlinedContext))
+ continue;
+ promoteMergeContextSamplesTree(*NodeToPromo);
+ }
+ return;
+ }
+
+ // Get the context for the given callee that needs to be promoted
+ ContextTrieNode *NodeToPromo =
+ CallerNode->getChildContext(CallSite, CalleeName);
+ if (!NodeToPromo)
+ return;
+
+ promoteMergeContextSamplesTree(*NodeToPromo);
+}
+
+ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
+ ContextTrieNode &NodeToPromo) {
+ // Promote the input node to be directly under root. This can happen
+ // when we decided to not inline a function under context represented
+ // by the input node. The promote and merge is then needed to reflect
+ // the context profile in the base (context-less) profile.
+ FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples();
+ assert(FromSamples && "Shouldn't promote a context without profile");
+ LLVM_DEBUG(dbgs() << " Found context tree root to promote: "
+ << FromSamples->getContext() << "\n");
+
+ assert(!FromSamples->getContext().hasState(InlinedContext) &&
+ "Shouldn't promote inlined context profile");
+ StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext();
+ return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
+ ContextStrToRemove);
+}
+
+void SampleContextTracker::dump() {
+ dbgs() << "Context Profile Tree:\n";
+ std::queue<ContextTrieNode *> NodeQueue;
+ NodeQueue.push(&RootContext);
+
+ while (!NodeQueue.empty()) {
+ ContextTrieNode *Node = NodeQueue.front();
+ NodeQueue.pop();
+ Node->dump();
+
+ for (auto &It : Node->getAllChildContext()) {
+ ContextTrieNode *ChildNode = &It.second;
+ NodeQueue.push(ChildNode);
+ }
+ }
+}
+
+ContextTrieNode *
+SampleContextTracker::getContextFor(const SampleContext &Context) {
+ return getOrCreateContextPath(Context, false);
+}
+
+ContextTrieNode *
+SampleContextTracker::getCalleeContextFor(const DILocation *DIL,
+ StringRef CalleeName) {
+ assert(DIL && "Expect non-null location");
+
+ ContextTrieNode *CallContext = getContextFor(DIL);
+ if (!CallContext)
+ return nullptr;
+
+ // When CalleeName is empty, the child context profile with max
+ // total samples will be returned.
+ return CallContext->getChildContext(
+ FunctionSamples::getCallSiteIdentifier(DIL), CalleeName);
+}
+
+ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
+ assert(DIL && "Expect non-null location");
+ SmallVector<std::pair<LineLocation, StringRef>, 10> S;
+
+ // Use C++ linkage name if possible.
+ const DILocation *PrevDIL = DIL;
+ for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
+ StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();
+ if (Name.empty())
+ Name = PrevDIL->getScope()->getSubprogram()->getName();
+ S.push_back(
+ std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL),
+ PrevDIL->getScope()->getSubprogram()->getLinkageName()));
+ PrevDIL = DIL;
+ }
+
+ // Push root node, note that root node like main may only
+ // a name, but not linkage name.
+ StringRef RootName = PrevDIL->getScope()->getSubprogram()->getLinkageName();
+ if (RootName.empty())
+ RootName = PrevDIL->getScope()->getSubprogram()->getName();
+ S.push_back(std::make_pair(LineLocation(0, 0), RootName));
+
+ ContextTrieNode *ContextNode = &RootContext;
+ int I = S.size();
+ while (--I >= 0 && ContextNode) {
+ LineLocation &CallSite = S[I].first;
+ StringRef &CalleeName = S[I].second;
+ ContextNode = ContextNode->getChildContext(CallSite, CalleeName);
+ }
+
+ if (I < 0)
+ return ContextNode;
+
+ return nullptr;
+}
+
+ContextTrieNode *
+SampleContextTracker::getOrCreateContextPath(const SampleContext &Context,
+ bool AllowCreate) {
+ ContextTrieNode *ContextNode = &RootContext;
+ StringRef ContextRemain = Context;
+ StringRef ChildContext;
+ StringRef CalleeName;
+ LineLocation CallSiteLoc(0, 0);
+
+ while (ContextNode && !ContextRemain.empty()) {
+ auto ContextSplit = SampleContext::splitContextString(ContextRemain);
+ ChildContext = ContextSplit.first;
+ ContextRemain = ContextSplit.second;
+ LineLocation NextCallSiteLoc(0, 0);
+ SampleContext::decodeContextString(ChildContext, CalleeName,
+ NextCallSiteLoc);
+
+ // Create child node at parent line/disc location
+ if (AllowCreate) {
+ ContextNode =
+ ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName);
+ } else {
+ ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName);
+ }
+ CallSiteLoc = NextCallSiteLoc;
+ }
+
+ assert((!AllowCreate || ContextNode) &&
+ "Node must exist if creation is allowed");
+ return ContextNode;
+}
+
+ContextTrieNode *SampleContextTracker::getTopLevelContextNode(StringRef FName) {
+ return RootContext.getChildContext(LineLocation(0, 0), FName);
+}
+
+ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) {
+ assert(!getTopLevelContextNode(FName) && "Node to add must not exist");
+ return *RootContext.getOrCreateChildContext(LineLocation(0, 0), FName);
+}
+
+void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
+ ContextTrieNode &ToNode,
+ StringRef ContextStrToRemove) {
+ FunctionSamples *FromSamples = FromNode.getFunctionSamples();
+ FunctionSamples *ToSamples = ToNode.getFunctionSamples();
+ if (FromSamples && ToSamples) {
+ // Merge/duplicate FromSamples into ToSamples
+ ToSamples->merge(*FromSamples);
+ ToSamples->getContext().setState(SyntheticContext);
+ FromSamples->getContext().setState(MergedContext);
+ } else if (FromSamples) {
+ // Transfer FromSamples from FromNode to ToNode
+ ToNode.setFunctionSamples(FromSamples);
+ FromSamples->getContext().setState(SyntheticContext);
+ FromSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FromNode.setFunctionSamples(nullptr);
+ }
+}
+
+ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
+ ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent,
+ StringRef ContextStrToRemove) {
+ assert(!ContextStrToRemove.empty() && "Context to remove can't be empty");
+
+ // Ignore call site location if destination is top level under root
+ LineLocation NewCallSiteLoc = LineLocation(0, 0);
+ LineLocation OldCallSiteLoc = FromNode.getCallSiteLoc();
+ ContextTrieNode &FromNodeParent = *FromNode.getParentContext();
+ ContextTrieNode *ToNode = nullptr;
+ bool MoveToRoot = (&ToNodeParent == &RootContext);
+ if (!MoveToRoot) {
+ NewCallSiteLoc = OldCallSiteLoc;
+ }
+
+ // Locate destination node, create/move if not existing
+ ToNode = ToNodeParent.getChildContext(NewCallSiteLoc, FromNode.getFuncName());
+ if (!ToNode) {
+ // Do not delete node to move from its parent here because
+ // caller is iterating over children of that parent node.
+ ToNode = &ToNodeParent.moveToChildContext(
+ NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false);
+ } else {
+ // Destination node exists, merge samples for the context tree
+ mergeContextNode(FromNode, *ToNode, ContextStrToRemove);
+ LLVM_DEBUG(dbgs() << " Context promoted and merged to: "
+ << ToNode->getFunctionSamples()->getContext() << "\n");
+
+ // Recursively promote and merge children
+ for (auto &It : FromNode.getAllChildContext()) {
+ ContextTrieNode &FromChildNode = It.second;
+ promoteMergeContextSamplesTree(FromChildNode, *ToNode,
+ ContextStrToRemove);
+ }
+
+ // Remove children once they're all merged
+ FromNode.getAllChildContext().clear();
+ }
+
+ // For root of subtree, remove itself from old parent too
+ if (MoveToRoot)
+ FromNodeParent.removeChildContext(OldCallSiteLoc, ToNode->getFuncName());
+
+ return *ToNode;
+}
+
+// Replace call graph edges with dynamic call edges from the profile.
+void SampleContextTracker::addCallGraphEdges(CallGraph &CG,
+ StringMap<Function *> &SymbolMap) {
+ // Add profile call edges to the call graph.
+ std::queue<ContextTrieNode *> NodeQueue;
+ NodeQueue.push(&RootContext);
+ while (!NodeQueue.empty()) {
+ ContextTrieNode *Node = NodeQueue.front();
+ NodeQueue.pop();
+ Function *F = SymbolMap.lookup(Node->getFuncName());
+ for (auto &I : Node->getAllChildContext()) {
+ ContextTrieNode *ChildNode = &I.second;
+ NodeQueue.push(ChildNode);
+ if (F && !F->isDeclaration()) {
+ Function *Callee = SymbolMap.lookup(ChildNode->getFuncName());
+ if (Callee && !Callee->isDeclaration())
+ CG[F]->addCalledFunction(nullptr, CG[Callee]);
+ }
+ }
+ }
+}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
index b6871e260532..a6a419bfe742 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -43,6 +44,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -75,10 +77,11 @@
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/SampleContextTracker.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/MisExpect.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -102,6 +105,19 @@ STATISTIC(NumCSInlined,
"Number of functions inlined with context sensitive profile");
STATISTIC(NumCSNotInlined,
"Number of functions not inlined with context sensitive profile");
+STATISTIC(NumMismatchedProfile,
+ "Number of functions with CFG mismatched profile");
+STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
+STATISTIC(NumDuplicatedInlinesite,
+ "Number of inlined callsites with a partial distribution factor");
+
+STATISTIC(NumCSInlinedHitMinLimit,
+ "Number of functions with FDO inline stopped due to min size limit");
+STATISTIC(NumCSInlinedHitMaxLimit,
+ "Number of functions with FDO inline stopped due to max size limit");
+STATISTIC(
+ NumCSInlinedHitGrowthLimit,
+ "Number of functions with FDO inline stopped due to growth size limit");
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
@@ -161,15 +177,64 @@ static cl::opt<bool> ProfileTopDownLoad(
"order of call graph during sample profile loading. It only "
"works for new pass manager. "));
+static cl::opt<bool> UseProfileIndirectCallEdges(
+ "use-profile-indirect-call-edges", cl::init(true), cl::Hidden,
+ cl::desc("Considering indirect call samples from profile when top-down "
+ "processing functions. Only CSSPGO is supported."));
+
+static cl::opt<bool> UseProfileTopDownOrder(
+ "use-profile-top-down-order", cl::init(false), cl::Hidden,
+ cl::desc("Process functions in one SCC in a top-down order "
+ "based on the input profile."));
+
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
cl::desc("Inline cold call sites in profile loader if it's beneficial "
"for code size."));
+static cl::opt<int> ProfileInlineGrowthLimit(
+ "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
+ cl::desc("The size growth ratio limit for proirity-based sample profile "
+ "loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMin(
+ "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
+ cl::desc("The lower bound of size growth limit for "
+ "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMax(
+ "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
+ cl::desc("The upper bound of size growth limit for "
+ "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileICPThreshold(
+ "sample-profile-icp-threshold", cl::Hidden, cl::init(5),
+ cl::desc(
+ "Relative hotness threshold for indirect "
+ "call promotion in proirity-based sample profile loader inlining."));
+
+static cl::opt<int> SampleHotCallSiteThreshold(
+ "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
+ cl::desc("Hot callsite threshold for proirity-based sample profile loader "
+ "inlining."));
+
+static cl::opt<bool> CallsitePrioritizedInline(
+ "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Use call site prioritized inlining for sample profile loader."
+ "Currently only CSSPGO is supported."));
+
static cl::opt<int> SampleColdCallSiteThreshold(
"sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
+static cl::opt<std::string> ProfileInlineReplayFile(
+ "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
+ cl::desc(
+ "Optimization remarks file containing inline remarks to be replayed "
+ "by inlining from sample profile loader."),
+ cl::Hidden);
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -301,6 +366,38 @@ private:
DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
};
+// Inline candidate used by iterative callsite prioritized inliner
+struct InlineCandidate {
+ CallBase *CallInstr;
+ const FunctionSamples *CalleeSamples;
+ // Prorated callsite count, which will be used to guide inlining. For example,
+ // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
+ // copies will get their own distribution factors and their prorated counts
+ // will be used to decide if they should be inlined independently.
+ uint64_t CallsiteCount;
+ // Call site distribution factor to prorate the profile samples for a
+ // duplicated callsite. Default value is 1.0.
+ float CallsiteDistribution;
+};
+
+// Inline candidate comparer using call site weight
+struct CandidateComparer {
+ bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
+ if (LHS.CallsiteCount != RHS.CallsiteCount)
+ return LHS.CallsiteCount < RHS.CallsiteCount;
+
+ // Tie breaker using GUID so we have stable/deterministic inlining order
+ assert(LHS.CalleeSamples && RHS.CalleeSamples &&
+ "Expect non-null FunctionSamples");
+ return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
+ RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
+ }
+};
+
+using CandidateQueue =
+ PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
+ CandidateComparer>;
+
/// Sample profile pass.
///
/// This pass reads profile data from the file specified by
@@ -309,17 +406,16 @@ private:
class SampleProfileLoader {
public:
SampleProfileLoader(
- StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
+ StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
std::function<const TargetLibraryInfo &(Function &)> GetTLI)
: GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
CoverageTracker(*this), Filename(std::string(Name)),
- RemappingFilename(std::string(RemapName)),
- IsThinLTOPreLink(IsThinLTOPreLink) {}
+ RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
- bool doInitialization(Module &M);
+ bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI, CallGraph *CG);
@@ -332,15 +428,28 @@ protected:
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
+ ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- bool inlineCallInstruction(CallBase &CB);
+ // Attempt to promote indirect call and also inline the promoted call
+ bool tryPromoteAndInlineCandidate(
+ Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
+ uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
+ SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
+ bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
+ bool
+ tryInlineCandidate(InlineCandidate &Candidate,
+ SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
+ bool
+ inlineHotFunctionsWithPriority(Function &F,
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs);
// Inline cold/small functions in addition to hot ones
bool shouldInlineColdCallee(CallBase &CallInst);
void emitOptimizationRemarksForInlineCandidates(
@@ -359,6 +468,8 @@ protected:
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
+ void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples);
+ void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
void clearFunctionData();
@@ -417,6 +528,9 @@ protected:
/// Profile reader object.
std::unique_ptr<SampleProfileReader> Reader;
+ /// Profile tracker for different context.
+ std::unique_ptr<SampleContextTracker> ContextTracker;
+
/// Samples collected for the body of this function.
FunctionSamples *Samples = nullptr;
@@ -429,11 +543,15 @@ protected:
/// Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid = false;
- /// Flag indicating if the pass is invoked in ThinLTO compile phase.
+ /// Flag indicating whether input profile is context-sensitive
+ bool ProfileIsCS = false;
+
+ /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
///
- /// In this phase, in annotation, we should not promote indirect calls.
- /// Instead, we will mark GUIDs that needs to be annotated to the function.
- bool IsThinLTOPreLink;
+ /// We need to know the LTO phase because for example in ThinLTOPrelink
+ /// phase, in annotation, we should not promote indirect calls. Instead,
+ /// we will mark GUIDs that needs to be annotated to the function.
+ ThinOrFullLTOPhase LTOPhase;
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;
@@ -473,6 +591,12 @@ protected:
// overriden by -profile-sample-accurate or profile-sample-accurate
// attribute.
bool ProfAccForSymsInList;
+
+ // External inline advisor used to replay inline decision from remarks.
+ std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
+
+ // A pseudo probe helper to correlate the imported sample counts.
+ std::unique_ptr<PseudoProbeManager> ProbeManager;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -480,10 +604,11 @@ public:
// Class identification, replacement for typeinfo
static char ID;
- SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
- bool IsThinLTOPreLink = false)
+ SampleProfileLoaderLegacyPass(
+ StringRef Name = SampleProfileFile,
+ ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
: ModulePass(ID), SampleLoader(
- Name, SampleProfileRemappingFile, IsThinLTOPreLink,
+ Name, SampleProfileRemappingFile, LTOPhase,
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
},
@@ -705,6 +830,9 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
///
/// \returns the weight of \p Inst.
ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
+ if (FunctionSamples::ProfileIsProbeBased)
+ return getProbeWeight(Inst);
+
const DebugLoc &DLoc = Inst.getDebugLoc();
if (!DLoc)
return std::error_code();
@@ -723,9 +851,10 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// (findCalleeFunctionSamples returns non-empty result), but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
- if (auto *CB = dyn_cast<CallBase>(&Inst))
- if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
- return 0;
+ if (!ProfileIsCS)
+ if (const auto *CB = dyn_cast<CallBase>(&Inst))
+ if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
+ return 0;
const DILocation *DIL = DLoc;
uint32_t LineOffset = FunctionSamples::getOffset(DIL);
@@ -757,6 +886,51 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
return R;
}
+ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
+ assert(FunctionSamples::ProfileIsProbeBased &&
+ "Profile is not pseudo probe based");
+ Optional<PseudoProbe> Probe = extractProbe(Inst);
+ if (!Probe)
+ return std::error_code();
+
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (!FS)
+ return std::error_code();
+
+ // If a direct call/invoke instruction is inlined in profile
+ // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
+ // it means that the inlined callsite has no sample, thus the call
+ // instruction should have 0 count.
+ if (const auto *CB = dyn_cast<CallBase>(&Inst))
+ if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
+ return 0;
+
+ const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
+ if (R) {
+ uint64_t Samples = R.get() * Probe->Factor;
+ bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
+ if (FirstMark) {
+ ORE->emit([&]() {
+ OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
+ Remark << "Applied " << ore::NV("NumSamples", Samples);
+ Remark << " samples from profile (ProbeId=";
+ Remark << ore::NV("ProbeId", Probe->Id);
+ Remark << ", Factor=";
+ Remark << ore::NV("Factor", Probe->Factor);
+ Remark << ", OriginalSamples=";
+ Remark << ore::NV("OriginalSamples", R.get());
+ Remark << ")";
+ return Remark;
+ });
+ }
+ LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
+ << " - weight: " << R.get() << " - factor: "
+ << format("%0.2f", Probe->Factor) << ")\n");
+ return Samples;
+ }
+ return R;
+}
+
/// Compute the weight of a basic block.
///
/// The weight of basic block \p BB is the maximum weight of all the
@@ -820,17 +994,18 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
}
StringRef CalleeName;
- if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
- if (Function *Callee = CI->getCalledFunction())
- CalleeName = Callee->getName();
+ if (Function *Callee = Inst.getCalledFunction())
+ CalleeName = FunctionSamples::getCanonicalFnName(*Callee);
+
+ if (ProfileIsCS)
+ return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return nullptr;
- return FS->findFunctionSamplesAt(LineLocation(FunctionSamples::getOffset(DIL),
- DIL->getBaseDiscriminator()),
- CalleeName);
+ return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
+ CalleeName, Reader->getRemapper());
}
/// Returns a vector of FunctionSamples that are the indirect call targets
@@ -846,32 +1021,49 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
return R;
}
+ auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
+ assert(L && R && "Expect non-null FunctionSamples");
+ if (L->getEntrySamples() != R->getEntrySamples())
+ return L->getEntrySamples() > R->getEntrySamples();
+ return FunctionSamples::getGUID(L->getName()) <
+ FunctionSamples::getGUID(R->getName());
+ };
+
+ if (ProfileIsCS) {
+ auto CalleeSamples =
+ ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
+ if (CalleeSamples.empty())
+ return R;
+
+ // For CSSPGO, we only use target context profile's entry count
+ // as that already includes both inlined callee and non-inlined ones..
+ Sum = 0;
+ for (const auto *const FS : CalleeSamples) {
+ Sum += FS->getEntrySamples();
+ R.push_back(FS);
+ }
+ llvm::sort(R, FSCompare);
+ return R;
+ }
+
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return R;
- uint32_t LineOffset = FunctionSamples::getOffset(DIL);
- uint32_t Discriminator = DIL->getBaseDiscriminator();
-
- auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
+ auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ auto T = FS->findCallTargetMapAt(CallSite);
Sum = 0;
if (T)
for (const auto &T_C : T.get())
Sum += T_C.second;
- if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(LineLocation(
- FunctionSamples::getOffset(DIL), DIL->getBaseDiscriminator()))) {
+ if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
if (M->empty())
return R;
for (const auto &NameFS : *M) {
Sum += NameFS.second.getEntrySamples();
R.push_back(&NameFS.second);
}
- llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
- if (L->getEntrySamples() != R->getEntrySamples())
- return L->getEntrySamples() > R->getEntrySamples();
- return FunctionSamples::getGUID(L->getName()) <
- FunctionSamples::getGUID(R->getName());
- });
+ llvm::sort(R, FSCompare);
}
return R;
}
@@ -887,42 +1079,85 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
+ if (FunctionSamples::ProfileIsProbeBased) {
+ Optional<PseudoProbe> Probe = extractProbe(Inst);
+ if (!Probe)
+ return nullptr;
+ }
+
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL)
return Samples;
auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
- if (it.second)
- it.first->second = Samples->findFunctionSamples(DIL);
+ if (it.second) {
+ if (ProfileIsCS)
+ it.first->second = ContextTracker->getContextSamplesFor(DIL);
+ else
+ it.first->second =
+ Samples->findFunctionSamples(DIL, Reader->getRemapper());
+ }
return it.first->second;
}
-bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
- Function *CalledFunction = CB.getCalledFunction();
- assert(CalledFunction);
- DebugLoc DLoc = CB.getDebugLoc();
- BasicBlock *BB = CB.getParent();
- InlineParams Params = getInlineParams();
- Params.ComputeFullInlineCost = true;
- // Checks if there is anything in the reachable portion of the callee at
- // this callsite that makes this inlining potentially illegal. Need to
- // set ComputeFullInlineCost, otherwise getInlineCost may return early
- // when cost exceeds threshold without checking all IRs in the callee.
- // The acutal cost does not matter because we only checks isNever() to
- // see if it is legal to inline the callsite.
- InlineCost Cost =
- getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI);
- if (Cost.isNever()) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
- << "incompatible inlining");
- return false;
- }
- InlineFunctionInfo IFI(nullptr, GetAC);
- if (InlineFunction(CB, IFI).isSuccess()) {
- // The call to InlineFunction erases I, so we can't pass it here.
- emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
- true, CSINLINE_DEBUG);
- return true;
+/// Attempt to promote indirect call and also inline the promoted call.
+///
+/// \param F Caller function.
+/// \param Candidate ICP and inline candidate.
+/// \param Sum Sum of target counts for indirect call.
+/// \param PromotedInsns Map to keep track of indirect call already processed.
+/// \param Candidate ICP and inline candidate.
+/// \param InlinedCallSite Output vector for new call sites exposed after
+/// inlining.
+bool SampleProfileLoader::tryPromoteAndInlineCandidate(
+ Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
+ DenseSet<Instruction *> &PromotedInsns,
+ SmallVector<CallBase *, 8> *InlinedCallSite) {
+ const char *Reason = "Callee function not available";
+ // R->getValue() != &F is to prevent promoting a recursive call.
+ // If it is a recursive call, we do not inline it as it could bloat
+ // the code exponentially. There is way to better handle this, e.g.
+ // clone the caller first, and inline the cloned caller if it is
+ // recursive. As llvm does not inline recursive calls, we will
+ // simply ignore it instead of handling it explicitly.
+ auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
+ if (R != SymbolMap.end() && R->getValue() &&
+ !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
+ R->getValue()->hasFnAttribute("use-sample-profile") &&
+ R->getValue() != &F &&
+ isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
+ auto *DI =
+ &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
+ Candidate.CallsiteCount, Sum, false, ORE);
+ if (DI) {
+ Sum -= Candidate.CallsiteCount;
+ // Prorate the indirect callsite distribution.
+ // Do not update the promoted direct callsite distribution at this
+ // point since the original distribution combined with the callee
+ // profile will be used to prorate callsites from the callee if
+ // inlined. Once not inlined, the direct callsite distribution should
+ // be prorated so that the it will reflect the real callsite counts.
+ setProbeDistributionFactor(*Candidate.CallInstr,
+ Candidate.CallsiteDistribution * Sum /
+ SumOrigin);
+ PromotedInsns.insert(Candidate.CallInstr);
+ Candidate.CallInstr = DI;
+ if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
+ bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
+ if (!Inlined) {
+ // Prorate the direct callsite distribution so that it reflects real
+ // callsite counts.
+ setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
+ Candidate.CallsiteCount /
+ SumOrigin);
+ }
+ return Inlined;
+ }
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
+ << Candidate.CalleeSamples->getFuncName() << " because "
+ << Reason << "\n");
}
return false;
}
@@ -938,6 +1173,12 @@ bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
GetAC, GetTLI);
+ if (Cost.isNever())
+ return false;
+
+ if (Cost.isAlways())
+ return true;
+
return Cost.getCost() <= SampleColdCallSiteThreshold;
}
@@ -982,10 +1223,11 @@ bool SampleProfileLoader::inlineHotFunctions(
"ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled");
- DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
+ DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
bool Changed = false;
- while (true) {
- bool LocalChanged = false;
+ bool LocalChanged = true;
+ while (LocalChanged) {
+ LocalChanged = false;
SmallVector<CallBase *, 10> CIS;
for (auto &BB : F) {
bool Hot = false;
@@ -995,9 +1237,11 @@ bool SampleProfileLoader::inlineHotFunctions(
const FunctionSamples *FS = nullptr;
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
+ assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
+ "GUIDToFuncNameMap has to be populated");
AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0)
- localNotInlinedCallSites.try_emplace(CB, FS);
+ if (FS->getEntrySamples() > 0 || ProfileIsCS)
+ LocalNotInlinedCallSites.try_emplace(CB, FS);
if (callsiteIsHot(FS, PSI))
Hot = true;
else if (shouldInlineColdCallee(*CB))
@@ -1005,7 +1249,7 @@ bool SampleProfileLoader::inlineHotFunctions(
}
}
}
- if (Hot) {
+ if (Hot || ExternalInlineAdvisor) {
CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
} else {
@@ -1015,6 +1259,11 @@ bool SampleProfileLoader::inlineHotFunctions(
}
for (CallBase *I : CIS) {
Function *CalledFunction = I->getCalledFunction();
+ InlineCandidate Candidate = {
+ I,
+ LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
+ : nullptr,
+ 0 /* dummy count */, 1.0 /* dummy distribution factor */};
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
@@ -1023,69 +1272,43 @@ bool SampleProfileLoader::inlineHotFunctions(
continue;
uint64_t Sum;
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
- if (IsThinLTOPreLink) {
+ uint64_t SumOrigin = Sum;
+ if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
PSI->getOrCompHotCountThreshold());
continue;
}
- auto CalleeFunctionName = FS->getFuncName();
- // If it is a recursive call, we do not inline it as it could bloat
- // the code exponentially. There is way to better handle this, e.g.
- // clone the caller first, and inline the cloned caller if it is
- // recursive. As llvm does not inline recursive calls, we will
- // simply ignore it instead of handling it explicitly.
- if (CalleeFunctionName == F.getName())
- continue;
-
if (!callsiteIsHot(FS, PSI))
continue;
- const char *Reason = "Callee function not available";
- auto R = SymbolMap.find(CalleeFunctionName);
- if (R != SymbolMap.end() && R->getValue() &&
- !R->getValue()->isDeclaration() &&
- R->getValue()->getSubprogram() &&
- R->getValue()->hasFnAttribute("use-sample-profile") &&
- isLegalToPromote(*I, R->getValue(), &Reason)) {
- uint64_t C = FS->getEntrySamples();
- auto &DI =
- pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE);
- Sum -= C;
- PromotedInsns.insert(I);
- // If profile mismatches, we should not attempt to inline DI.
- if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
- inlineCallInstruction(cast<CallBase>(DI))) {
- localNotInlinedCallSites.erase(I);
- LocalChanged = true;
- ++NumCSInlined;
- }
- } else {
- LLVM_DEBUG(dbgs()
- << "\nFailed to promote indirect call to "
- << CalleeFunctionName << " because " << Reason << "\n");
+ Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+ if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+ PromotedInsns)) {
+ LocalNotInlinedCallSites.erase(I);
+ LocalChanged = true;
}
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {
- if (inlineCallInstruction(*I)) {
- localNotInlinedCallSites.erase(I);
+ if (tryInlineCandidate(Candidate)) {
+ LocalNotInlinedCallSites.erase(I);
LocalChanged = true;
- ++NumCSInlined;
}
- } else if (IsThinLTOPreLink) {
+ } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions(
InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
}
}
- if (LocalChanged) {
- Changed = true;
- } else {
- break;
- }
+ Changed |= LocalChanged;
}
+ // For CS profile, profile for not inlined context will be merged when
+ // base profile is being trieved
+ if (ProfileIsCS)
+ return Changed;
+
// Accumulate not inlined callsite information into notInlinedSamples
- for (const auto &Pair : localNotInlinedCallSites) {
+ for (const auto &Pair : LocalNotInlinedCallSites) {
CallBase *I = Pair.getFirst();
Function *Callee = I->getCalledFunction();
if (!Callee || Callee->isDeclaration())
@@ -1104,16 +1327,23 @@ bool SampleProfileLoader::inlineHotFunctions(
}
if (ProfileMergeInlinee) {
- // Use entry samples as head samples during the merge, as inlinees
- // don't have head samples.
- assert(FS->getHeadSamples() == 0 && "Expect 0 head sample for inlinee");
- const_cast<FunctionSamples *>(FS)->addHeadSamples(FS->getEntrySamples());
-
- // Note that we have to do the merge right after processing function.
- // This allows OutlineFS's profile to be used for annotation during
- // top-down processing of functions' annotation.
- FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
- OutlineFS->merge(*FS);
+ // A function call can be replicated by optimizations like callsite
+ // splitting or jump threading and the replicates end up sharing the
+ // sample nested callee profile instead of slicing the original inlinee's
+ // profile. We want to do merge exactly once by filtering out callee
+ // profiles with a non-zero head sample count.
+ if (FS->getHeadSamples() == 0) {
+ // Use entry samples as head samples during the merge, as inlinees
+ // don't have head samples.
+ const_cast<FunctionSamples *>(FS)->addHeadSamples(
+ FS->getEntrySamples());
+
+ // Note that we have to do the merge right after processing function.
+ // This allows OutlineFS's profile to be used for annotation during
+ // top-down processing of functions' annotation.
+ FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
+ OutlineFS->merge(*FS);
+ }
} else {
auto pair =
notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
@@ -1123,6 +1353,266 @@ bool SampleProfileLoader::inlineHotFunctions(
return Changed;
}
+bool SampleProfileLoader::tryInlineCandidate(
+ InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
+
+ CallBase &CB = *Candidate.CallInstr;
+ Function *CalledFunction = CB.getCalledFunction();
+ assert(CalledFunction && "Expect a callee with definition");
+ DebugLoc DLoc = CB.getDebugLoc();
+ BasicBlock *BB = CB.getParent();
+
+ InlineCost Cost = shouldInlineCandidate(Candidate);
+ if (Cost.isNever()) {
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
+ << "incompatible inlining");
+ return false;
+ }
+
+ if (!Cost)
+ return false;
+
+ InlineFunctionInfo IFI(nullptr, GetAC);
+ if (InlineFunction(CB, IFI).isSuccess()) {
+ // The call to InlineFunction erases I, so we can't pass it here.
+ emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
+ true, CSINLINE_DEBUG);
+
+ // Now populate the list of newly exposed call sites.
+ if (InlinedCallSites) {
+ InlinedCallSites->clear();
+ for (auto &I : IFI.InlinedCallSites)
+ InlinedCallSites->push_back(I);
+ }
+
+ if (ProfileIsCS)
+ ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
+ ++NumCSInlined;
+
+ // Prorate inlined probes for a duplicated inlining callsite which probably
+ // has a distribution less than 100%. Samples for an inlinee should be
+ // distributed among the copies of the original callsite based on each
+ // callsite's distribution factor for counts accuracy. Note that an inlined
+ // probe may come with its own distribution factor if it has been duplicated
+ // in the inlinee body. The two factor are multiplied to reflect the
+ // aggregation of duplication.
+ if (Candidate.CallsiteDistribution < 1) {
+ for (auto &I : IFI.InlinedCallSites) {
+ if (Optional<PseudoProbe> Probe = extractProbe(*I))
+ setProbeDistributionFactor(*I, Probe->Factor *
+ Candidate.CallsiteDistribution);
+ }
+ NumDuplicatedInlinesite++;
+ }
+
+ return true;
+ }
+ return false;
+}
+
+bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
+ CallBase *CB) {
+ assert(CB && "Expect non-null call instruction");
+
+ if (isa<IntrinsicInst>(CB))
+ return false;
+
+ // Find the callee's profile. For indirect call, find hottest target profile.
+ const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
+ if (!CalleeSamples)
+ return false;
+
+ float Factor = 1.0;
+ if (Optional<PseudoProbe> Probe = extractProbe(*CB))
+ Factor = Probe->Factor;
+
+ uint64_t CallsiteCount = 0;
+ ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
+ if (Weight)
+ CallsiteCount = Weight.get();
+ if (CalleeSamples)
+ CallsiteCount = std::max(
+ CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
+
+ *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
+ return true;
+}
+
+InlineCost
+SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+ std::unique_ptr<InlineAdvice> Advice = nullptr;
+ if (ExternalInlineAdvisor) {
+ Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ return InlineCost::getNever("not previously inlined");
+ }
+ Advice->recordInlining();
+ return InlineCost::getAlways("previously inlined");
+ }
+
+ // Adjust threshold based on call site hotness, only do this for callsite
+ // prioritized inliner because otherwise cost-benefit check is done earlier.
+ int SampleThreshold = SampleColdCallSiteThreshold;
+ if (CallsitePrioritizedInline) {
+ if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
+ SampleThreshold = SampleHotCallSiteThreshold;
+ else if (!ProfileSizeInline)
+ return InlineCost::getNever("cold callsite");
+ }
+
+ Function *Callee = Candidate.CallInstr->getCalledFunction();
+ assert(Callee && "Expect a definition for inline candidate of direct call");
+
+ InlineParams Params = getInlineParams();
+ Params.ComputeFullInlineCost = true;
+ // Checks if there is anything in the reachable portion of the callee at
+ // this callsite that makes this inlining potentially illegal. Need to
+ // set ComputeFullInlineCost, otherwise getInlineCost may return early
+ // when cost exceeds threshold without checking all IRs in the callee.
+ // The acutal cost does not matter because we only checks isNever() to
+ // see if it is legal to inline the callsite.
+ InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
+ GetTTI(*Callee), GetAC, GetTLI);
+
+ // Honor always inline and never inline from call analyzer
+ if (Cost.isNever() || Cost.isAlways())
+ return Cost;
+
+ // For old FDO inliner, we inline the call site as long as cost is not
+ // "Never". The cost-benefit check is done earlier.
+ if (!CallsitePrioritizedInline) {
+ return InlineCost::get(Cost.getCost(), INT_MAX);
+ }
+
+ // Otherwise only use the cost from call analyzer, but overwite threshold with
+ // Sample PGO threshold.
+ return InlineCost::get(Cost.getCost(), SampleThreshold);
+}
+
+bool SampleProfileLoader::inlineHotFunctionsWithPriority(
+ Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
+ DenseSet<Instruction *> PromotedInsns;
+ assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
+
+ // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+ // Profile symbol list is ignored when profile-sample-accurate is on.
+ assert((!ProfAccForSymsInList ||
+ (!ProfileSampleAccurate &&
+ !F.hasFnAttribute("profile-sample-accurate"))) &&
+ "ProfAccForSymsInList should be false when profile-sample-accurate "
+ "is enabled");
+
+ // Populating worklist with initial call sites from root inliner, along
+ // with call site weights.
+ CandidateQueue CQueue;
+ InlineCandidate NewCandidate;
+ for (auto &BB : F) {
+ for (auto &I : BB.getInstList()) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.push(NewCandidate);
+ }
+ }
+
+ // Cap the size growth from profile guided inlining. This is needed even
+ // though cost of each inline candidate already accounts for callee size,
+ // because with top-down inlining, we can grow inliner size significantly
+ // with large number of smaller inlinees each pass the cost check.
+ assert(ProfileInlineLimitMax >= ProfileInlineLimitMin &&
+ "Max inline size limit should not be smaller than min inline size "
+ "limit.");
+ unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
+ SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
+ SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
+ if (ExternalInlineAdvisor)
+ SizeLimit = std::numeric_limits<unsigned>::max();
+
+ // Perform iterative BFS call site prioritized inlining
+ bool Changed = false;
+ while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
+ InlineCandidate Candidate = CQueue.top();
+ CQueue.pop();
+ CallBase *I = Candidate.CallInstr;
+ Function *CalledFunction = I->getCalledFunction();
+
+ if (CalledFunction == &F)
+ continue;
+ if (I->isIndirectCall()) {
+ if (PromotedInsns.count(I))
+ continue;
+ uint64_t Sum;
+ auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
+ uint64_t SumOrigin = Sum;
+ Sum *= Candidate.CallsiteDistribution;
+ for (const auto *FS : CalleeSamples) {
+ // TODO: Consider disable pre-lTO ICP for MonoLTO as well
+ if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+ FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
+ PSI->getOrCompHotCountThreshold());
+ continue;
+ }
+ uint64_t EntryCountDistributed =
+ FS->getEntrySamples() * Candidate.CallsiteDistribution;
+ // In addition to regular inline cost check, we also need to make sure
+ // ICP isn't introducing excessive speculative checks even if individual
+ // target looks beneficial to promote and inline. That means we should
+ // only do ICP when there's a small number dominant targets.
+ if (EntryCountDistributed < SumOrigin / ProfileICPThreshold)
+ break;
+ // TODO: Fix CallAnalyzer to handle all indirect calls.
+ // For indirect call, we don't run CallAnalyzer to get InlineCost
+ // before actual inlining. This is because we could see two different
+ // types from the same definition, which makes CallAnalyzer choke as
+ // it's expecting matching parameter type on both caller and callee
+ // side. See example from PR18962 for the triggering cases (the bug was
+ // fixed, but we generate different types).
+ if (!PSI->isHotCount(EntryCountDistributed))
+ break;
+ SmallVector<CallBase *, 8> InlinedCallSites;
+ // Attach function profile for promoted indirect callee, and update
+ // call site count for the promoted inline candidate too.
+ Candidate = {I, FS, EntryCountDistributed,
+ Candidate.CallsiteDistribution};
+ if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+ PromotedInsns, &InlinedCallSites)) {
+ for (auto *CB : InlinedCallSites) {
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.emplace(NewCandidate);
+ }
+ Changed = true;
+ }
+ }
+ } else if (CalledFunction && CalledFunction->getSubprogram() &&
+ !CalledFunction->isDeclaration()) {
+ SmallVector<CallBase *, 8> InlinedCallSites;
+ if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
+ for (auto *CB : InlinedCallSites) {
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.emplace(NewCandidate);
+ }
+ Changed = true;
+ }
+ } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+ findCalleeFunctionSamples(*I)->findInlinedFunctions(
+ InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
+ }
+ }
+
+ if (!CQueue.empty()) {
+ if (SizeLimit == (unsigned)ProfileInlineLimitMax)
+ ++NumCSInlinedHitMaxLimit;
+ else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
+ ++NumCSInlinedHitMinLimit;
+ else
+ ++NumCSInlinedHitGrowthLimit;
+ }
+
+ return Changed;
+}
+
/// Find equivalence classes for the given block.
///
/// This finds all the blocks that are guaranteed to execute the same
@@ -1538,15 +2028,21 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!DLoc)
continue;
const DILocation *DIL = DLoc;
- uint32_t LineOffset = FunctionSamples::getOffset(DIL);
- uint32_t Discriminator = DIL->getBaseDiscriminator();
-
const FunctionSamples *FS = findFunctionSamples(I);
if (!FS)
continue;
- auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
+ auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ auto T = FS->findCallTargetMapAt(CallSite);
if (!T || T.get().empty())
continue;
+ // Prorate the callsite counts to reflect what is already done to the
+ // callsite, such as ICP or calliste cloning.
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+ if (Probe->Factor < 1)
+ T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
+ }
+ }
SmallVector<InstrProfValueData, 2> SortedCallTargets =
GetSortedValueDataFromCallTargets(T.get());
uint64_t Sum;
@@ -1598,8 +2094,6 @@ void SampleProfileLoader::propagateWeights(Function &F) {
}
}
- misexpect::verifyMisExpect(TI, Weights, TI->getContext());
-
uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
@@ -1710,14 +2204,28 @@ void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
bool SampleProfileLoader::emitAnnotations(Function &F) {
bool Changed = false;
- if (getFunctionLoc(F) == 0)
- return false;
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (!ProbeManager->profileIsValid(F, *Samples)) {
+ LLVM_DEBUG(
+ dbgs() << "Profile is invalid due to CFG mismatch for Function "
+ << F.getName());
+ ++NumMismatchedProfile;
+ return false;
+ }
+ ++NumMatchedProfile;
+ } else {
+ if (getFunctionLoc(F) == 0)
+ return false;
- LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
- << F.getName() << ": " << getFunctionLoc(F) << "\n");
+ LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
+ << F.getName() << ": " << getFunctionLoc(F) << "\n");
+ }
DenseSet<GlobalValue::GUID> InlinedGUIDs;
- Changed |= inlineHotFunctions(F, InlinedGUIDs);
+ if (ProfileIsCS && CallsitePrioritizedInline)
+ Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
+ else
+ Changed |= inlineHotFunctions(F, InlinedGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
@@ -1782,6 +2290,45 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
+// Add inlined profile call edges to the call graph.
+void SampleProfileLoader::addCallGraphEdges(CallGraph &CG,
+ const FunctionSamples &Samples) {
+ Function *Caller = SymbolMap.lookup(Samples.getFuncName());
+ if (!Caller || Caller->isDeclaration())
+ return;
+
+ // Skip non-inlined call edges which are not important since top down inlining
+ // for non-CS profile is to get more precise profile matching, not to enable
+ // more inlining.
+
+ for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
+ for (const auto &InlinedSamples : CallsiteSamples.second) {
+ Function *Callee = SymbolMap.lookup(InlinedSamples.first);
+ if (Callee && !Callee->isDeclaration())
+ CG[Caller]->addCalledFunction(nullptr, CG[Callee]);
+ addCallGraphEdges(CG, InlinedSamples.second);
+ }
+ }
+}
+
+// Replace call graph edges with dynamic call edges from the profile.
+void SampleProfileLoader::replaceCallGraphEdges(
+ CallGraph &CG, StringMap<Function *> &SymbolMap) {
+ // Remove static call edges from the call graph except for the ones from the
+ // root which make the call graph connected.
+ for (const auto &Node : CG)
+ if (Node.second.get() != CG.getExternalCallingNode())
+ Node.second->removeAllCalledFunctions();
+
+ // Add profile call edges to the call graph.
+ if (ProfileIsCS) {
+ ContextTracker->addCallGraphEdges(CG, SymbolMap);
+ } else {
+ for (const auto &Samples : Reader->getProfiles())
+ addCallGraphEdges(CG, Samples.second);
+ }
+}
+
std::vector<Function *>
SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
std::vector<Function *> FunctionOrderList;
@@ -1804,24 +2351,105 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
}
assert(&CG->getModule() == &M);
+
+ // Add indirect call edges from profile to augment the static call graph.
+ // Functions will be processed in a top-down order defined by the static call
+ // graph. Adjusting the order by considering indirect call edges from the
+ // profile (which don't exist in the static call graph) can enable the
+ // inlining of indirect call targets by processing the caller before them.
+ // TODO: enable this for non-CS profile and fix the counts returning logic to
+ // have a full support for indirect calls.
+ if (UseProfileIndirectCallEdges && ProfileIsCS) {
+ for (auto &Entry : *CG) {
+ const auto *F = Entry.first;
+ if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
+ continue;
+ auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName());
+ if (AllContexts.empty())
+ continue;
+
+ for (const auto &BB : *F) {
+ for (const auto &I : BB.getInstList()) {
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->isIndirectCall())
+ continue;
+ const DebugLoc &DLoc = I.getDebugLoc();
+ if (!DLoc)
+ continue;
+ auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc);
+ for (FunctionSamples *Samples : AllContexts) {
+ if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) {
+ for (const auto &Target : CallTargets.get()) {
+ Function *Callee = SymbolMap.lookup(Target.first());
+ if (Callee && !Callee->isDeclaration())
+ Entry.second->addCalledFunction(nullptr, (*CG)[Callee]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Compute a top-down order the profile which is used to sort functions in
+ // one SCC later. The static processing order computed for an SCC may not
+ // reflect the call contexts in the context-sensitive profile, thus may cause
+ // potential inlining to be overlooked. The function order in one SCC is being
+ // adjusted to a top-down order based on the profile to favor more inlining.
+ DenseMap<Function *, uint64_t> ProfileOrderMap;
+ if (UseProfileTopDownOrder ||
+ (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) {
+ // Create a static call graph. The call edges are not important since they
+ // will be replaced by dynamic edges from the profile.
+ CallGraph ProfileCG(M);
+ replaceCallGraphEdges(ProfileCG, SymbolMap);
+ scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG);
+ uint64_t I = 0;
+ while (!CGI.isAtEnd()) {
+ for (CallGraphNode *Node : *CGI) {
+ if (auto *F = Node->getFunction())
+ ProfileOrderMap[F] = ++I;
+ }
+ ++CGI;
+ }
+ }
+
scc_iterator<CallGraph *> CGI = scc_begin(CG);
while (!CGI.isAtEnd()) {
- for (CallGraphNode *node : *CGI) {
- auto F = node->getFunction();
+ uint64_t Start = FunctionOrderList.size();
+ for (CallGraphNode *Node : *CGI) {
+ auto *F = Node->getFunction();
if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
FunctionOrderList.push_back(F);
}
+
+ // Sort nodes in SCC based on the profile top-down order.
+ if (!ProfileOrderMap.empty()) {
+ std::stable_sort(FunctionOrderList.begin() + Start,
+ FunctionOrderList.end(),
+ [&ProfileOrderMap](Function *Left, Function *Right) {
+ return ProfileOrderMap[Left] < ProfileOrderMap[Right];
+ });
+ }
+
++CGI;
}
+ LLVM_DEBUG({
+ dbgs() << "Function processing order:\n";
+ for (auto F : reverse(FunctionOrderList)) {
+ dbgs() << F->getName() << "\n";
+ }
+ });
+
std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
return FunctionOrderList;
}
-bool SampleProfileLoader::doInitialization(Module &M) {
+bool SampleProfileLoader::doInitialization(Module &M,
+ FunctionAnalysisManager *FAM) {
auto &Ctx = M.getContext();
- std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
auto ReaderOrErr =
SampleProfileReader::create(Filename, Ctx, RemappingFilename);
if (std::error_code EC = ReaderOrErr.getError()) {
@@ -1830,8 +2458,14 @@ bool SampleProfileLoader::doInitialization(Module &M) {
return false;
}
Reader = std::move(ReaderOrErr.get());
+ Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
Reader->collectFuncsFrom(M);
- ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ if (std::error_code EC = Reader->read()) {
+ std::string Msg = "profile reading failed: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+
PSL = Reader->getProfileSymbolList();
// While profile-sample-accurate is on, ignore symbol list.
@@ -1843,6 +2477,41 @@ bool SampleProfileLoader::doInitialization(Module &M) {
NamesInProfile.insert(NameTable->begin(), NameTable->end());
}
+ if (FAM && !ProfileInlineReplayFile.empty()) {
+ ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
+ M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
+ /*EmitRemarks=*/false);
+ if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
+ ExternalInlineAdvisor.reset();
+ }
+
+ // Apply tweaks if context-sensitive profile is available.
+ if (Reader->profileIsCS()) {
+ ProfileIsCS = true;
+ FunctionSamples::ProfileIsCS = true;
+
+ // Enable priority-base inliner and size inline by default for CSSPGO.
+ if (!ProfileSizeInline.getNumOccurrences())
+ ProfileSizeInline = true;
+ if (!CallsitePrioritizedInline.getNumOccurrences())
+ CallsitePrioritizedInline = true;
+
+ // Tracker for profiles under different context
+ ContextTracker =
+ std::make_unique<SampleContextTracker>(Reader->getProfiles());
+ }
+
+ // Load pseudo probe descriptors for probe-based function samples.
+ if (Reader->profileIsProbeBased()) {
+ ProbeManager = std::make_unique<PseudoProbeManager>(M);
+ if (!ProbeManager->moduleIsProbed(M)) {
+ const char *Msg =
+ "Pseudo-probe-based profile requires SampleProfileProbePass";
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+ }
+
return true;
}
@@ -1856,8 +2525,6 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI, CallGraph *CG) {
- if (!ProfileIsValid)
- return false;
GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
PSI = _PSI;
@@ -1870,6 +2537,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();
+ auto Remapper = Reader->getRemapper();
// Populate the symbol map.
for (const auto &N_F : M.getValueSymbolTable()) {
StringRef OrigName = N_F.getKey();
@@ -1887,6 +2555,15 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
// to nullptr to avoid confusion.
if (!r.second)
r.first->second = nullptr;
+ OrigName = NewName;
+ }
+ // Insert the remapped names into SymbolMap.
+ if (Remapper) {
+ if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
+ if (*MapName == OrigName)
+ continue;
+ SymbolMap.insert(std::make_pair(*MapName, F));
+ }
}
}
@@ -1898,9 +2575,10 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
}
// Account for cold calls not inlined....
- for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
- notInlinedCallInfo)
- updateProfileCallee(pair.first, pair.second.entryCount);
+ if (!ProfileIsCS)
+ for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
+ notInlinedCallInfo)
+ updateProfileCallee(pair.first, pair.second.entryCount);
return retval;
}
@@ -1915,7 +2593,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
-
+ LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
DILocation2SampleMap.clear();
// By default the entry count is initialized to -1, which will be treated
// conservatively by getEntryCount as the same as unknown (None). This is
@@ -1957,7 +2635,10 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
initialEntryCount = -1;
}
- F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
+ // Initialize entry count when the function has no existing entry
+ // count value.
+ if (!F.getEntryCount().hasValue())
+ F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
auto &FAM =
@@ -1968,7 +2649,12 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
ORE = OwnedORE.get();
}
- Samples = Reader->getSamplesFor(F);
+
+ if (ProfileIsCS)
+ Samples = ContextTracker->getBaseSamplesFor(F);
+ else
+ Samples = Reader->getSamplesFor(F);
+
if (Samples && !Samples->empty())
return emitAnnotations(F);
return false;
@@ -1993,9 +2679,9 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
: ProfileRemappingFileName,
- IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
+ LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
- if (!SampleLoader.doInitialization(M))
+ if (!SampleLoader.doInitialization(M, &FAM))
return PreservedAnalyses::all();
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
new file mode 100644
index 000000000000..a885c3ee4ded
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -0,0 +1,434 @@
+//===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileProber transformation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <unordered_set>
+#include <vector>
+
+using namespace llvm;
+#define DEBUG_TYPE "sample-profile-probe"
+
+STATISTIC(ArtificialDbgLine,
+ "Number of probes that have an artificial debug line");
+
+static cl::opt<bool>
+ VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
+ cl::desc("Do pseudo probe verification"));
+
+static cl::list<std::string> VerifyPseudoProbeFuncList(
+ "verify-pseudo-probe-funcs", cl::Hidden,
+ cl::desc("The option to specify the name of the functions to verify."));
+
+static cl::opt<bool>
+ UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
+ cl::desc("Update pseudo probe distribution factor"));
+
+bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
+ // Skip function declaration.
+ if (F->isDeclaration())
+ return false;
+ // Skip function that will not be emitted into object file. The prevailing
+ // defintion will be verified instead.
+ if (F->hasAvailableExternallyLinkage())
+ return false;
+ // Do a name matching.
+ static std::unordered_set<std::string> VerifyFuncNames(
+ VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
+ return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
+}
+
+void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+ if (VerifyPseudoProbe) {
+ PIC.registerAfterPassCallback(
+ [this](StringRef P, Any IR, const PreservedAnalyses &) {
+ this->runAfterPass(P, IR);
+ });
+ }
+}
+
+// Callback to run after each transformation for the new pass manager.
+void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
+ std::string Banner =
+ "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
+ dbgs() << Banner;
+ if (any_isa<const Module *>(IR))
+ runAfterPass(any_cast<const Module *>(IR));
+ else if (any_isa<const Function *>(IR))
+ runAfterPass(any_cast<const Function *>(IR));
+ else if (any_isa<const LazyCallGraph::SCC *>(IR))
+ runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
+ else if (any_isa<const Loop *>(IR))
+ runAfterPass(any_cast<const Loop *>(IR));
+ else
+ llvm_unreachable("Unknown IR unit");
+}
+
+void PseudoProbeVerifier::runAfterPass(const Module *M) {
+ for (const Function &F : *M)
+ runAfterPass(&F);
+}
+
+void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
+ for (const LazyCallGraph::Node &N : *C)
+ runAfterPass(&N.getFunction());
+}
+
+void PseudoProbeVerifier::runAfterPass(const Function *F) {
+ if (!shouldVerifyFunction(F))
+ return;
+ ProbeFactorMap ProbeFactors;
+ for (const auto &BB : *F)
+ collectProbeFactors(&BB, ProbeFactors);
+ verifyProbeFactors(F, ProbeFactors);
+}
+
+void PseudoProbeVerifier::runAfterPass(const Loop *L) {
+ const Function *F = L->getHeader()->getParent();
+ runAfterPass(F);
+}
+
+void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
+ ProbeFactorMap &ProbeFactors) {
+ for (const auto &I : *Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I))
+ ProbeFactors[Probe->Id] += Probe->Factor;
+ }
+}
+
+void PseudoProbeVerifier::verifyProbeFactors(
+ const Function *F, const ProbeFactorMap &ProbeFactors) {
+ bool BannerPrinted = false;
+ auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
+ for (const auto &I : ProbeFactors) {
+ float CurProbeFactor = I.second;
+ if (PrevProbeFactors.count(I.first)) {
+ float PrevProbeFactor = PrevProbeFactors[I.first];
+ if (std::abs(CurProbeFactor - PrevProbeFactor) >
+ DistributionFactorVariance) {
+ if (!BannerPrinted) {
+ dbgs() << "Function " << F->getName() << ":\n";
+ BannerPrinted = true;
+ }
+ dbgs() << "Probe " << I.first << "\tprevious factor "
+ << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
+ << format("%0.2f", CurProbeFactor) << "\n";
+ }
+ }
+
+ // Update
+ PrevProbeFactors[I.first] = I.second;
+ }
+}
+
+PseudoProbeManager::PseudoProbeManager(const Module &M) {
+ if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+ for (const auto *Operand : FuncInfo->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ auto GUID =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
+ auto Hash =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+ GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
+ }
+ }
+}
+
+const PseudoProbeDescriptor *
+PseudoProbeManager::getDesc(const Function &F) const {
+ auto I = GUIDToProbeDescMap.find(
+ Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
+ return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
+}
+
+bool PseudoProbeManager::moduleIsProbed(const Module &M) const {
+ return M.getNamedMetadata(PseudoProbeDescMetadataName);
+}
+
+bool PseudoProbeManager::profileIsValid(const Function &F,
+ const FunctionSamples &Samples) const {
+ const auto *Desc = getDesc(F);
+ if (!Desc) {
+ LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " << F.getName()
+ << "\n");
+ return false;
+ } else {
+ if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
+ LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
+ << "\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+SampleProfileProber::SampleProfileProber(Function &Func,
+ const std::string &CurModuleUniqueId)
+ : F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
+ BlockProbeIds.clear();
+ CallProbeIds.clear();
+ LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
+ computeProbeIdForBlocks();
+ computeProbeIdForCallsites();
+ computeCFGHash();
+}
+
+// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
+// value of each BB in the CFG. The higher 32 bits record the number of edges
+// preceded by the number of indirect calls.
+// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
+void SampleProfileProber::computeCFGHash() {
+ std::vector<uint8_t> Indexes;
+ JamCRC JC;
+ for (auto &BB : *F) {
+ auto *TI = BB.getTerminator();
+ for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
+ auto *Succ = TI->getSuccessor(I);
+ auto Index = getBlockId(Succ);
+ for (int J = 0; J < 4; J++)
+ Indexes.push_back((uint8_t)(Index >> (J * 8)));
+ }
+ }
+
+ JC.update(Indexes);
+
+ FunctionHash = (uint64_t)CallProbeIds.size() << 48 |
+ (uint64_t)Indexes.size() << 32 | JC.getCRC();
+ // Reserve bit 60-63 for other information purpose.
+ FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+ assert(FunctionHash && "Function checksum should not be zero");
+ LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName()
+ << ":\n"
+ << " CRC = " << JC.getCRC() << ", Edges = "
+ << Indexes.size() << ", ICSites = " << CallProbeIds.size()
+ << ", Hash = " << FunctionHash << "\n");
+}
+
+void SampleProfileProber::computeProbeIdForBlocks() {
+ for (auto &BB : *F) {
+ BlockProbeIds[&BB] = ++LastProbeId;
+ }
+}
+
+void SampleProfileProber::computeProbeIdForCallsites() {
+ for (auto &BB : *F) {
+ for (auto &I : BB) {
+ if (!isa<CallBase>(I))
+ continue;
+ if (isa<IntrinsicInst>(&I))
+ continue;
+ CallProbeIds[&I] = ++LastProbeId;
+ }
+ }
+}
+
+uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const {
+ auto I = BlockProbeIds.find(const_cast<BasicBlock *>(BB));
+ return I == BlockProbeIds.end() ? 0 : I->second;
+}
+
+uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const {
+ auto Iter = CallProbeIds.find(const_cast<Instruction *>(Call));
+ return Iter == CallProbeIds.end() ? 0 : Iter->second;
+}
+
+void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
+ Module *M = F.getParent();
+ MDBuilder MDB(F.getContext());
+ // Compute a GUID without considering the function's linkage type. This is
+ // fine since function name is the only key in the profile database.
+ uint64_t Guid = Function::getGUID(F.getName());
+
+ // Assign an artificial debug line to a probe that doesn't come with a real
+ // line. A probe not having a debug line will get an incomplete inline
+ // context. This will cause samples collected on the probe to be counted
+ // into the base profile instead of a context profile. The line number
+ // itself is not important though.
+ auto AssignDebugLoc = [&](Instruction *I) {
+ assert((isa<PseudoProbeInst>(I) || isa<CallBase>(I)) &&
+ "Expecting pseudo probe or call instructions");
+ if (!I->getDebugLoc()) {
+ if (auto *SP = F.getSubprogram()) {
+ auto DIL = DILocation::get(SP->getContext(), 0, 0, SP);
+ I->setDebugLoc(DIL);
+ ArtificialDbgLine++;
+ LLVM_DEBUG({
+ dbgs() << "\nIn Function " << F.getName()
+ << " Probe gets an artificial debug line\n";
+ I->dump();
+ });
+ }
+ }
+ };
+
+ // Probe basic blocks.
+ for (auto &I : BlockProbeIds) {
+ BasicBlock *BB = I.first;
+ uint32_t Index = I.second;
+ // Insert a probe before an instruction with a valid debug line number which
+ // will be assigned to the probe. The line number will be used later to
+ // model the inline context when the probe is inlined into other functions.
+ // Debug instructions, phi nodes and lifetime markers do not have an valid
+ // line number. Real instructions generated by optimizations may not come
+ // with a line number either.
+ auto HasValidDbgLine = [](Instruction *J) {
+ return !isa<PHINode>(J) && !isa<DbgInfoIntrinsic>(J) &&
+ !J->isLifetimeStartOrEnd() && J->getDebugLoc();
+ };
+
+ Instruction *J = &*BB->getFirstInsertionPt();
+ while (J != BB->getTerminator() && !HasValidDbgLine(J)) {
+ J = J->getNextNode();
+ }
+
+ IRBuilder<> Builder(J);
+ assert(Builder.GetInsertPoint() != BB->end() &&
+ "Cannot get the probing point");
+ Function *ProbeFn =
+ llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
+ Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
+ Builder.getInt32(0),
+ Builder.getInt64(PseudoProbeFullDistributionFactor)};
+ auto *Probe = Builder.CreateCall(ProbeFn, Args);
+ AssignDebugLoc(Probe);
+ }
+
+ // Probe both direct calls and indirect calls. Direct calls are probed so that
+ // their probe ID can be used as an call site identifier to represent a
+ // calling context.
+ for (auto &I : CallProbeIds) {
+ auto *Call = I.first;
+ uint32_t Index = I.second;
+ uint32_t Type = cast<CallBase>(Call)->getCalledFunction()
+ ? (uint32_t)PseudoProbeType::DirectCall
+ : (uint32_t)PseudoProbeType::IndirectCall;
+ AssignDebugLoc(Call);
+ // Levarge the 32-bit discriminator field of debug data to store the ID and
+ // type of a callsite probe. This gets rid of the dependency on plumbing a
+ // customized metadata through the codegen pipeline.
+ uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+ Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
+ if (auto DIL = Call->getDebugLoc()) {
+ DIL = DIL->cloneWithDiscriminator(V);
+ Call->setDebugLoc(DIL);
+ }
+ }
+
+ // Create module-level metadata that contains function info necessary to
+ // synthesize probe-based sample counts, which are
+ // - FunctionGUID
+ // - FunctionHash.
+ // - FunctionName
+ auto Hash = getFunctionHash();
+ auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F);
+ auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName);
+ assert(NMD && "llvm.pseudo_probe_desc should be pre-created");
+ NMD->addOperand(MD);
+
+ // Preserve a comdat group to hold all probes materialized later. This
+ // allows that when the function is considered dead and removed, the
+ // materialized probes are disposed too.
+ // Imported functions are defined in another module. They do not need
+ // the following handling since same care will be taken for them in their
+ // original module. The pseudo probes inserted into an imported functions
+ // above will naturally not be emitted since the imported function is free
+ // from object emission. However they will be emitted together with the
+ // inliner functions that the imported function is inlined into. We are not
+ // creating a comdat group for an import function since it's useless anyway.
+ if (!F.isDeclarationForLinker()) {
+ if (TM) {
+ auto Triple = TM->getTargetTriple();
+ if (Triple.supportsCOMDAT() && TM->getFunctionSections()) {
+ GetOrCreateFunctionComdat(F, Triple, CurModuleUniqueId);
+ }
+ }
+ }
+}
+
+PreservedAnalyses SampleProfileProbePass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ auto ModuleId = getUniqueModuleId(&M);
+ // Create the pseudo probe desc metadata beforehand.
+ // Note that modules with only data but no functions will require this to
+ // be set up so that they will be known as probed later.
+ M.getOrInsertNamedMetadata(PseudoProbeDescMetadataName);
+
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ SampleProfileProber ProbeManager(F, ModuleId);
+ ProbeManager.instrumentOneFunc(F, TM);
+ }
+
+ return PreservedAnalyses::none();
+}
+
+void PseudoProbeUpdatePass::runOnFunction(Function &F,
+ FunctionAnalysisManager &FAM) {
+ BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ auto BBProfileCount = [&BFI](BasicBlock *BB) {
+ return BFI.getBlockProfileCount(BB)
+ ? BFI.getBlockProfileCount(BB).getValue()
+ : 0;
+ };
+
+ // Collect the sum of execution weight for each probe.
+ ProbeFactorMap ProbeFactors;
+ for (auto &Block : F) {
+ for (auto &I : Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I))
+ ProbeFactors[Probe->Id] += BBProfileCount(&Block);
+ }
+ }
+
+ // Fix up over-counted probes.
+ for (auto &Block : F) {
+ for (auto &I : Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+ float Sum = ProbeFactors[Probe->Id];
+ if (Sum != 0)
+ setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
+ }
+ }
+ }
+}
+
+PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (UpdatePseudoProbe) {
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ runOnFunction(F, FAM);
+ }
+ }
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 088091df770f..4fc71847a070 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -19,18 +19,21 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/StripSymbols.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
namespace {
@@ -249,9 +252,7 @@ bool StripNonDebugSymbols::runOnModule(Module &M) {
return StripSymbolNames(M, true);
}
-bool StripDebugDeclare::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
+static bool stripDebugDeclareImpl(Module &M) {
Function *Declare = M.getFunction("llvm.dbg.declare");
std::vector<Constant*> DeadConstants;
@@ -289,17 +290,13 @@ bool StripDebugDeclare::runOnModule(Module &M) {
return true;
}
-/// Remove any debug info for global variables/functions in the given module for
-/// which said global variable/function no longer exists (i.e. is null).
-///
-/// Debugging information is encoded in llvm IR using metadata. This is designed
-/// such a way that debug info for symbols preserved even if symbols are
-/// optimized away by the optimizer. This special pass removes debug info for
-/// such symbols.
-bool StripDeadDebugInfo::runOnModule(Module &M) {
+bool StripDebugDeclare::runOnModule(Module &M) {
if (skipModule(M))
return false;
+ return stripDebugDeclareImpl(M);
+}
+static bool stripDeadDebugInfoImpl(Module &M) {
bool Changed = false;
LLVMContext &C = M.getContext();
@@ -380,3 +377,40 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
return Changed;
}
+
+/// Remove any debug info for global variables/functions in the given module for
+/// which said global variable/function no longer exists (i.e. is null).
+///
+/// Debugging information is encoded in llvm IR using metadata. This is designed
+/// such a way that debug info for symbols preserved even if symbols are
+/// optimized away by the optimizer. This special pass removes debug info for
+/// such symbols.
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+ return stripDeadDebugInfoImpl(M);
+}
+
+PreservedAnalyses StripSymbolsPass::run(Module &M, ModuleAnalysisManager &AM) {
+ StripDebugInfo(M);
+ StripSymbolNames(M, false);
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses StripNonDebugSymbolsPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ StripSymbolNames(M, true);
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses StripDebugDeclarePass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ stripDebugDeclareImpl(M);
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses StripDeadDebugInfoPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ stripDeadDebugInfoImpl(M);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 87a18171787f..225b4fe95f67 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -14,6 +14,7 @@
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@@ -260,7 +261,7 @@ void splitAndWriteThinLTOBitcode(
if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
!F->arg_begin()->use_empty())
return;
- for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
+ for (auto &Arg : drop_begin(F->args())) {
auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
if (!ArgT || ArgT->getBitWidth() > 64)
return;
@@ -333,8 +334,7 @@ void splitAndWriteThinLTOBitcode(
Linkage = CFL_Declaration;
Elts.push_back(ConstantAsMetadata::get(
llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
- for (auto Type : Types)
- Elts.push_back(Type);
+ append_range(Elts, Types);
CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 5a25f9857665..cf1ff405c493 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -59,7 +59,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
@@ -470,7 +470,7 @@ CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
auto *CBType = dyn_cast<IntegerType>(CB.getType());
if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
return CSInfo;
- for (auto &&Arg : make_range(CB.arg_begin() + 1, CB.arg_end())) {
+ for (auto &&Arg : drop_begin(CB.args())) {
auto *CI = dyn_cast<ConstantInt>(Arg);
if (!CI || CI->getBitWidth() > 64)
return CSInfo;
@@ -753,6 +753,11 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
return FAM.getResult<DominatorTreeAnalysis>(F);
};
+ if (UseCommandLine) {
+ if (DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+ }
if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
ImportSummary)
.run())
@@ -1025,6 +1030,10 @@ bool DevirtIndex::tryFindVirtualCallTargets(
void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
Constant *TheFn, bool &IsExported) {
+ // Don't devirtualize function if we're told to skip it
+ // in -wholeprogramdevirt-skip.
+ if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
+ return;
auto Apply = [&](CallSiteInfo &CSInfo) {
for (auto &&VCallSite : CSInfo.CallSites) {
if (RemarksEnabled)
@@ -1258,7 +1267,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
// Jump tables are only profitable if the retpoline mitigation is enabled.
Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
- if (FSAttr.hasAttribute(Attribute::None) ||
+ if (!FSAttr.isValid() ||
!FSAttr.getValueAsString().contains("+retpoline"))
continue;
@@ -1270,8 +1279,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
// x86_64.
std::vector<Type *> NewArgs;
NewArgs.push_back(Int8PtrTy);
- for (Type *T : CB.getFunctionType()->params())
- NewArgs.push_back(T);
+ append_range(NewArgs, CB.getFunctionType()->params());
FunctionType *NewFT =
FunctionType::get(CB.getFunctionType()->getReturnType(), NewArgs,
CB.getFunctionType()->isVarArg());
@@ -1280,7 +1288,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
IRBuilder<> IRB(&CB);
std::vector<Value *> Args;
Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy));
- Args.insert(Args.end(), CB.arg_begin(), CB.arg_end());
+ llvm::append_range(Args, CB.args());
CallBase *NewCS = nullptr;
if (isa<CallInst>(CB))
@@ -2205,6 +2213,4 @@ void DevirtIndex::run() {
if (PrintSummaryDevirt)
for (const auto &DT : DevirtTargets)
errs() << "Devirtualized call to " << DT << "\n";
-
- return;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index a7f5e0a7774d..bacb8689892a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
#include <utility>
@@ -81,11 +82,11 @@ namespace {
private:
bool insaneIntVal(int V) { return V > 4 || V < -4; }
- APFloat *getFpValPtr()
- { return reinterpret_cast<APFloat *>(&FpValBuf.buffer[0]); }
+ APFloat *getFpValPtr() { return reinterpret_cast<APFloat *>(&FpValBuf); }
- const APFloat *getFpValPtr() const
- { return reinterpret_cast<const APFloat *>(&FpValBuf.buffer[0]); }
+ const APFloat *getFpValPtr() const {
+ return reinterpret_cast<const APFloat *>(&FpValBuf);
+ }
const APFloat &getFpVal() const {
assert(IsFp && BufHasFpVal && "Incorret state");
@@ -860,7 +861,7 @@ static Instruction *foldNoWrapAdd(BinaryOperator &Add,
return nullptr;
}
-Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
+Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
Constant *Op1C;
if (!match(Op1, m_Constant(Op1C)))
@@ -886,15 +887,15 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
// zext(bool) + C -> bool ? C + 1 : C
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return SelectInst::Create(X, AddOne(Op1C), Op1);
+ return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1);
// sext(bool) + C -> bool ? C - 1 : C
if (match(Op0, m_SExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return SelectInst::Create(X, SubOne(Op1C), Op1);
+ return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1);
// ~X + C --> (C-1) - X
if (match(Op0, m_Not(m_Value(X))))
- return BinaryOperator::CreateSub(SubOne(Op1C), X);
+ return BinaryOperator::CreateSub(InstCombiner::SubOne(Op1C), X);
const APInt *C;
if (!match(Op1, m_APInt(C)))
@@ -923,6 +924,39 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C)
return CastInst::Create(Instruction::SExt, X, Ty);
+ if (match(Op0, m_Xor(m_Value(X), m_APInt(C2)))) {
+ // (X ^ signmask) + C --> (X + (signmask ^ C))
+ if (C2->isSignMask())
+ return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C2 ^ *C));
+
+ // If X has no high-bits set above an xor mask:
+ // add (xor X, LowMaskC), C --> sub (LowMaskC + C), X
+ if (C2->isMask()) {
+ KnownBits LHSKnown = computeKnownBits(X, 0, &Add);
+ if ((*C2 | LHSKnown.Zero).isAllOnesValue())
+ return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C2 + *C), X);
+ }
+
+ // Look for a math+logic pattern that corresponds to sext-in-register of a
+ // value with cleared high bits. Convert that into a pair of shifts:
+ // add (xor X, 0x80), 0xF..F80 --> (X << ShAmtC) >>s ShAmtC
+ // add (xor X, 0xF..F80), 0x80 --> (X << ShAmtC) >>s ShAmtC
+ if (Op0->hasOneUse() && *C2 == -(*C)) {
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ unsigned ShAmt = 0;
+ if (C->isPowerOf2())
+ ShAmt = BitWidth - C->logBase2() - 1;
+ else if (C2->isPowerOf2())
+ ShAmt = BitWidth - C2->logBase2() - 1;
+ if (ShAmt && MaskedValueIsZero(X, APInt::getHighBitsSet(BitWidth, ShAmt),
+ 0, &Add)) {
+ Constant *ShAmtC = ConstantInt::get(Ty, ShAmt);
+ Value *NewShl = Builder.CreateShl(X, ShAmtC, "sext");
+ return BinaryOperator::CreateAShr(NewShl, ShAmtC);
+ }
+ }
+ }
+
if (C->isOneValue() && Op0->hasOneUse()) {
// add (sext i1 X), 1 --> zext (not X)
// TODO: The smallest IR representation is (select X, 0, 1), and that would
@@ -943,6 +977,15 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
}
}
+ // If all bits affected by the add are included in a high-bit-mask, do the
+ // add before the mask op:
+ // (X & 0xFF00) + xx00 --> (X + xx00) & 0xFF00
+ if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C2)))) &&
+ C2->isNegative() && C2->isShiftedMask() && *C == (*C & *C2)) {
+ Value *NewAdd = Builder.CreateAdd(X, ConstantInt::get(Ty, *C));
+ return BinaryOperator::CreateAnd(NewAdd, ConstantInt::get(Ty, *C2));
+ }
+
return nullptr;
}
@@ -1021,7 +1064,7 @@ static bool MulWillOverflow(APInt &C0, APInt &C1, bool IsSigned) {
// Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1)
// does not overflow.
-Value *InstCombiner::SimplifyAddWithRemainder(BinaryOperator &I) {
+Value *InstCombinerImpl::SimplifyAddWithRemainder(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
Value *X, *MulOpV;
APInt C0, MulOpC;
@@ -1097,9 +1140,9 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
return nullptr;
}
-Instruction *
-InstCombiner::canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
- BinaryOperator &I) {
+Instruction *InstCombinerImpl::
+ canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
+ BinaryOperator &I) {
assert((I.getOpcode() == Instruction::Add ||
I.getOpcode() == Instruction::Or ||
I.getOpcode() == Instruction::Sub) &&
@@ -1198,7 +1241,44 @@ InstCombiner::canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
return TruncInst::CreateTruncOrBitCast(NewAShr, I.getType());
}
-Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+/// This is a specialization of a more general transform from
+/// SimplifyUsingDistributiveLaws. If that code can be made to work optimally
+/// for multi-use cases or propagating nsw/nuw, then we would not need this.
+static Instruction *factorizeMathWithShlOps(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ // TODO: Also handle mul by doubling the shift amount?
+ assert((I.getOpcode() == Instruction::Add ||
+ I.getOpcode() == Instruction::Sub) &&
+ "Expected add/sub");
+ auto *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
+ auto *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
+ if (!Op0 || !Op1 || !(Op0->hasOneUse() || Op1->hasOneUse()))
+ return nullptr;
+
+ Value *X, *Y, *ShAmt;
+ if (!match(Op0, m_Shl(m_Value(X), m_Value(ShAmt))) ||
+ !match(Op1, m_Shl(m_Value(Y), m_Specific(ShAmt))))
+ return nullptr;
+
+ // No-wrap propagates only when all ops have no-wrap.
+ bool HasNSW = I.hasNoSignedWrap() && Op0->hasNoSignedWrap() &&
+ Op1->hasNoSignedWrap();
+ bool HasNUW = I.hasNoUnsignedWrap() && Op0->hasNoUnsignedWrap() &&
+ Op1->hasNoUnsignedWrap();
+
+ // add/sub (X << ShAmt), (Y << ShAmt) --> (add/sub X, Y) << ShAmt
+ Value *NewMath = Builder.CreateBinOp(I.getOpcode(), X, Y);
+ if (auto *NewI = dyn_cast<BinaryOperator>(NewMath)) {
+ NewI->setHasNoSignedWrap(HasNSW);
+ NewI->setHasNoUnsignedWrap(HasNUW);
+ }
+ auto *NewShl = BinaryOperator::CreateShl(NewMath, ShAmt);
+ NewShl->setHasNoSignedWrap(HasNSW);
+ NewShl->setHasNoUnsignedWrap(HasNUW);
+ return NewShl;
+}
+
+Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
SQ.getWithInstruction(&I)))
@@ -1214,59 +1294,17 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
+ if (Instruction *R = factorizeMathWithShlOps(I, Builder))
+ return R;
+
if (Instruction *X = foldAddWithConstant(I))
return X;
if (Instruction *X = foldNoWrapAdd(I, Builder))
return X;
- // FIXME: This should be moved into the above helper function to allow these
- // transforms for general constant or constant splat vectors.
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
Type *Ty = I.getType();
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr;
- if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
- unsigned TySizeBits = Ty->getScalarSizeInBits();
- const APInt &RHSVal = CI->getValue();
- unsigned ExtendAmt = 0;
- // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
- // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
- if (XorRHS->getValue() == -RHSVal) {
- if (RHSVal.isPowerOf2())
- ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
- else if (XorRHS->getValue().isPowerOf2())
- ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
- }
-
- if (ExtendAmt) {
- APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
- if (!MaskedValueIsZero(XorLHS, Mask, 0, &I))
- ExtendAmt = 0;
- }
-
- if (ExtendAmt) {
- Constant *ShAmt = ConstantInt::get(Ty, ExtendAmt);
- Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext");
- return BinaryOperator::CreateAShr(NewShl, ShAmt);
- }
-
- // If this is a xor that was canonicalized from a sub, turn it back into
- // a sub and fuse this add with it.
- if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) {
- KnownBits LHSKnown = computeKnownBits(XorLHS, 0, &I);
- if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue())
- return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
- XorLHS);
- }
- // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C,
- // transform them into (X + (signmask ^ C))
- if (XorRHS->getValue().isSignMask())
- return BinaryOperator::CreateAdd(XorLHS,
- ConstantExpr::getXor(XorRHS, CI));
- }
- }
-
if (Ty->isIntOrIntVectorTy(1))
return BinaryOperator::CreateXor(LHS, RHS);
@@ -1329,34 +1367,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
return BinaryOperator::CreateOr(LHS, RHS);
- // FIXME: We already did a check for ConstantInt RHS above this.
- // FIXME: Is this pattern covered by another fold? No regression tests fail on
- // removal.
- if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
- // (X & FF00) + xx00 -> (X+xx00) & FF00
- Value *X;
- ConstantInt *C2;
- if (LHS->hasOneUse() &&
- match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
- CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
- // See if all bits from the first bit set in the Add RHS up are included
- // in the mask. First, get the rightmost bit.
- const APInt &AddRHSV = CRHS->getValue();
-
- // Form a mask of all bits from the lowest bit added through the top.
- APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
-
- // See if the and mask includes all of these bits.
- APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
-
- if (AddRHSHighBits == AddRHSHighBitsAnd) {
- // Okay, the xform is safe. Insert the new add pronto.
- Value *NewAdd = Builder.CreateAdd(X, CRHS, LHS->getName());
- return BinaryOperator::CreateAnd(NewAdd, C2);
- }
- }
- }
-
// add (select X 0 (sub n A)) A --> select X A n
{
SelectInst *SI = dyn_cast<SelectInst>(LHS);
@@ -1424,6 +1434,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
return SatAdd;
+ // usub.sat(A, B) + B => umax(A, B)
+ if (match(&I, m_c_BinOp(
+ m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(m_Value(A), m_Value(B))),
+ m_Deferred(B)))) {
+ return replaceInstUsesWith(I,
+ Builder.CreateIntrinsic(Intrinsic::umax, {I.getType()}, {A, B}));
+ }
+
return Changed ? &I : nullptr;
}
@@ -1486,7 +1504,7 @@ static Instruction *factorizeFAddFSub(BinaryOperator &I,
: BinaryOperator::CreateFDivFMF(XY, Z, &I);
}
-Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
if (Value *V = SimplifyFAddInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
@@ -1600,49 +1618,33 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
/// Optimize pointer differences into the same array into a size. Consider:
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
-Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
- Type *Ty, bool IsNUW) {
+Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
+ Type *Ty, bool IsNUW) {
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
GEPOperator *GEP1 = nullptr, *GEP2 = nullptr;
+ if (!isa<GEPOperator>(LHS) && isa<GEPOperator>(RHS)) {
+ std::swap(LHS, RHS);
+ Swapped = true;
+ }
- // For now we require one side to be the base pointer "A" or a constant
- // GEP derived from it.
- if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
+ // Require at least one GEP with a common base pointer on both sides.
+ if (auto *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
// (gep X, ...) - X
if (LHSGEP->getOperand(0) == RHS) {
GEP1 = LHSGEP;
- Swapped = false;
- } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
+ } else if (auto *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
// (gep X, ...) - (gep X, ...)
if (LHSGEP->getOperand(0)->stripPointerCasts() ==
- RHSGEP->getOperand(0)->stripPointerCasts()) {
- GEP2 = RHSGEP;
+ RHSGEP->getOperand(0)->stripPointerCasts()) {
GEP1 = LHSGEP;
- Swapped = false;
- }
- }
- }
-
- if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
- // X - (gep X, ...)
- if (RHSGEP->getOperand(0) == LHS) {
- GEP1 = RHSGEP;
- Swapped = true;
- } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
- // (gep X, ...) - (gep X, ...)
- if (RHSGEP->getOperand(0)->stripPointerCasts() ==
- LHSGEP->getOperand(0)->stripPointerCasts()) {
- GEP2 = LHSGEP;
- GEP1 = RHSGEP;
- Swapped = true;
+ GEP2 = RHSGEP;
}
}
}
if (!GEP1)
- // No GEP found.
return nullptr;
if (GEP2) {
@@ -1670,19 +1672,18 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Value *Result = EmitGEPOffset(GEP1);
// If this is a single inbounds GEP and the original sub was nuw,
- // then the final multiplication is also nuw. We match an extra add zero
- // here, because that's what EmitGEPOffset() generates.
- Instruction *I;
- if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() &&
- match(Result, m_Add(m_Instruction(I), m_Zero())) &&
- I->getOpcode() == Instruction::Mul)
- I->setHasNoUnsignedWrap();
-
- // If we had a constant expression GEP on the other side offsetting the
- // pointer, subtract it from the offset we have.
+ // then the final multiplication is also nuw.
+ if (auto *I = dyn_cast<Instruction>(Result))
+ if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() &&
+ I->getOpcode() == Instruction::Mul)
+ I->setHasNoUnsignedWrap();
+
+ // If we have a 2nd GEP of the same base pointer, subtract the offsets.
+ // If both GEPs are inbounds, then the subtract does not have signed overflow.
if (GEP2) {
Value *Offset = EmitGEPOffset(GEP2);
- Result = Builder.CreateSub(Result, Offset);
+ Result = Builder.CreateSub(Result, Offset, "gepdiff", /* NUW */ false,
+ GEP1->isInBounds() && GEP2->isInBounds());
}
// If we have p - gep(p, ...) then we have to negate the result.
@@ -1692,7 +1693,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
return Builder.CreateIntCast(Result, Ty, true);
}
-Instruction *InstCombiner::visitSub(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (Value *V = SimplifySubInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
SQ.getWithInstruction(&I)))
@@ -1721,6 +1722,19 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return Res;
}
+ // Try this before Negator to preserve NSW flag.
+ if (Instruction *R = factorizeMathWithShlOps(I, Builder))
+ return R;
+
+ if (Constant *C = dyn_cast<Constant>(Op0)) {
+ Value *X;
+ Constant *C2;
+
+ // C-(X+C2) --> (C-C2)-X
+ if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
+ return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+ }
+
auto TryToNarrowDeduceFlags = [this, &I, &Op0, &Op1]() -> Instruction * {
if (Instruction *Ext = narrowMathIfNoOverflow(I))
return Ext;
@@ -1788,8 +1802,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
}
auto m_AddRdx = [](Value *&Vec) {
- return m_OneUse(
- m_Intrinsic<Intrinsic::experimental_vector_reduce_add>(m_Value(Vec)));
+ return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec)));
};
Value *V0, *V1;
if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) &&
@@ -1797,8 +1810,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Difference of sums is sum of differences:
// add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1)
Value *Sub = Builder.CreateSub(V0, V1);
- Value *Rdx = Builder.CreateIntrinsic(
- Intrinsic::experimental_vector_reduce_add, {Sub->getType()}, {Sub});
+ Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_add,
+ {Sub->getType()}, {Sub});
return replaceInstUsesWith(I, Rdx);
}
@@ -1806,14 +1819,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *X;
if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
// C - (zext bool) --> bool ? C - 1 : C
- return SelectInst::Create(X, SubOne(C), C);
+ return SelectInst::Create(X, InstCombiner::SubOne(C), C);
if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
// C - (sext bool) --> bool ? C + 1 : C
- return SelectInst::Create(X, AddOne(C), C);
+ return SelectInst::Create(X, InstCombiner::AddOne(C), C);
// C - ~X == X + (1+C)
if (match(Op1, m_Not(m_Value(X))))
- return BinaryOperator::CreateAdd(X, AddOne(C));
+ return BinaryOperator::CreateAdd(X, InstCombiner::AddOne(C));
// Try to fold constant sub into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
@@ -1828,12 +1841,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Constant *C2;
// C-(C2-X) --> X+(C-C2)
- if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))) && !isa<ConstantExpr>(C2))
+ if (match(Op1, m_Sub(m_ImmConstant(C2), m_Value(X))))
return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
-
- // C-(X+C2) --> (C-C2)-X
- if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
- return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
}
const APInt *Op0C;
@@ -1864,6 +1873,22 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateXor(A, B);
}
+ // (sub (add A, B) (or A, B)) --> (and A, B)
+ {
+ Value *A, *B;
+ if (match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateAnd(A, B);
+ }
+
+ // (sub (add A, B) (and A, B)) --> (or A, B)
+ {
+ Value *A, *B;
+ if (match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_And(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateOr(A, B);
+ }
+
// (sub (and A, B) (or A, B)) --> neg (xor A, B)
{
Value *A, *B;
@@ -2042,6 +2067,20 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return SelectInst::Create(Cmp, Neg, A);
}
+ // If we are subtracting a low-bit masked subset of some value from an add
+ // of that same value with no low bits changed, that is clearing some low bits
+ // of the sum:
+ // sub (X + AddC), (X & AndC) --> and (X + AddC), ~AndC
+ const APInt *AddC, *AndC;
+ if (match(Op0, m_Add(m_Value(X), m_APInt(AddC))) &&
+ match(Op1, m_And(m_Specific(X), m_APInt(AndC)))) {
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ unsigned Cttz = AddC->countTrailingZeros();
+ APInt HighMask(APInt::getHighBitsSet(BitWidth, BitWidth - Cttz));
+ if ((HighMask & *AndC).isNullValue())
+ return BinaryOperator::CreateAnd(Op0, ConstantInt::get(Ty, ~(*AndC)));
+ }
+
if (Instruction *V =
canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))
return V;
@@ -2094,11 +2133,11 @@ static Instruction *hoistFNegAboveFMulFDiv(Instruction &I,
return nullptr;
}
-Instruction *InstCombiner::visitFNeg(UnaryOperator &I) {
+Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
Value *Op = I.getOperand(0);
if (Value *V = SimplifyFNegInst(Op, I.getFastMathFlags(),
- SQ.getWithInstruction(&I)))
+ getSimplifyQuery().getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (Instruction *X = foldFNegIntoConstant(I))
@@ -2117,10 +2156,10 @@ Instruction *InstCombiner::visitFNeg(UnaryOperator &I) {
return nullptr;
}
-Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
if (Value *V = SimplifyFSubInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
- SQ.getWithInstruction(&I)))
+ getSimplifyQuery().getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (Instruction *X = foldVectorBinop(I))
@@ -2175,7 +2214,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
// X - C --> X + (-C)
// But don't transform constant expressions because there's an inverse fold
// for X + (-Y) --> X - Y.
- if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1))
+ if (match(Op1, m_ImmConstant(C)))
return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I);
// X - (-Y) --> X + Y
@@ -2244,9 +2283,8 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
}
auto m_FaddRdx = [](Value *&Sum, Value *&Vec) {
- return m_OneUse(
- m_Intrinsic<Intrinsic::experimental_vector_reduce_v2_fadd>(
- m_Value(Sum), m_Value(Vec)));
+ return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(m_Value(Sum),
+ m_Value(Vec)));
};
Value *A0, *A1, *V0, *V1;
if (match(Op0, m_FaddRdx(A0, V0)) && match(Op1, m_FaddRdx(A1, V1)) &&
@@ -2254,9 +2292,8 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
// Difference of sums is sum of differences:
// add_rdx(A0, V0) - add_rdx(A1, V1) --> add_rdx(A0, V0 - V1) - A1
Value *Sub = Builder.CreateFSubFMF(V0, V1, &I);
- Value *Rdx = Builder.CreateIntrinsic(
- Intrinsic::experimental_vector_reduce_v2_fadd,
- {A0->getType(), Sub->getType()}, {A0, Sub}, &I);
+ Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd,
+ {Sub->getType()}, {A0, Sub}, &I);
return BinaryOperator::CreateFSubFMF(Rdx, A1, &I);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 1304d46fdef4..85a7abe211b3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -13,10 +13,12 @@
#include "InstCombineInternal.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
using namespace PatternMatch;
@@ -112,57 +114,12 @@ static Value *SimplifyBSwap(BinaryOperator &I,
return Builder.CreateCall(F, BinOp);
}
-/// This handles expressions of the form ((val OP C1) & C2). Where
-/// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.
-Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
- ConstantInt *OpRHS,
- ConstantInt *AndRHS,
- BinaryOperator &TheAnd) {
- Value *X = Op->getOperand(0);
-
- switch (Op->getOpcode()) {
- default: break;
- case Instruction::Add:
- if (Op->hasOneUse()) {
- // Adding a one to a single bit bit-field should be turned into an XOR
- // of the bit. First thing to check is to see if this AND is with a
- // single bit constant.
- const APInt &AndRHSV = AndRHS->getValue();
-
- // If there is only one bit set.
- if (AndRHSV.isPowerOf2()) {
- // Ok, at this point, we know that we are masking the result of the
- // ADD down to exactly one bit. If the constant we are adding has
- // no bits set below this bit, then we can eliminate the ADD.
- const APInt& AddRHS = OpRHS->getValue();
-
- // Check to see if any bits below the one bit set in AndRHSV are set.
- if ((AddRHS & (AndRHSV - 1)).isNullValue()) {
- // If not, the only thing that can effect the output of the AND is
- // the bit specified by AndRHSV. If that bit is set, the effect of
- // the XOR is to toggle the bit. If it is clear, then the ADD has
- // no effect.
- if ((AddRHS & AndRHSV).isNullValue()) { // Bit is not set, noop
- return replaceOperand(TheAnd, 0, X);
- } else {
- // Pull the XOR out of the AND.
- Value *NewAnd = Builder.CreateAnd(X, AndRHS);
- NewAnd->takeName(Op);
- return BinaryOperator::CreateXor(NewAnd, AndRHS);
- }
- }
- }
- }
- break;
- }
- return nullptr;
-}
-
/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
/// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates
/// whether to treat V, Lo, and Hi as signed or not.
-Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
- bool isSigned, bool Inside) {
+Value *InstCombinerImpl::insertRangeTest(Value *V, const APInt &Lo,
+ const APInt &Hi, bool isSigned,
+ bool Inside) {
assert((isSigned ? Lo.slt(Hi) : Lo.ult(Hi)) &&
"Lo is not < Hi in range emission code!");
@@ -437,11 +394,10 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
/// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros
/// and the right hand side is of type BMask_Mixed. For example,
/// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8).
-static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
- ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
- Value *A, Value *B, Value *C, Value *D, Value *E,
- ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
- llvm::InstCombiner::BuilderTy &Builder) {
+static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
+ ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C,
+ Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
+ InstCombiner::BuilderTy &Builder) {
// We are given the canonical form:
// (icmp ne (A & B), 0) & (icmp eq (A & D), E).
// where D & E == E.
@@ -452,17 +408,9 @@ static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
//
// We currently handle the case of B, C, D, E are constant.
//
- ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- if (!BCst)
- return nullptr;
- ConstantInt *CCst = dyn_cast<ConstantInt>(C);
- if (!CCst)
- return nullptr;
- ConstantInt *DCst = dyn_cast<ConstantInt>(D);
- if (!DCst)
- return nullptr;
- ConstantInt *ECst = dyn_cast<ConstantInt>(E);
- if (!ECst)
+ ConstantInt *BCst, *CCst, *DCst, *ECst;
+ if (!match(B, m_ConstantInt(BCst)) || !match(C, m_ConstantInt(CCst)) ||
+ !match(D, m_ConstantInt(DCst)) || !match(E, m_ConstantInt(ECst)))
return nullptr;
ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
@@ -568,11 +516,9 @@ static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
/// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side
/// aren't of the common mask pattern type.
static Value *foldLogOpOfMaskedICmpsAsymmetric(
- ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
- Value *A, Value *B, Value *C, Value *D, Value *E,
- ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
- unsigned LHSMask, unsigned RHSMask,
- llvm::InstCombiner::BuilderTy &Builder) {
+ ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C,
+ Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
+ unsigned LHSMask, unsigned RHSMask, InstCombiner::BuilderTy &Builder) {
assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
"Expected equality predicates for masked type of icmps.");
// Handle Mask_NotAllZeros-BMask_Mixed cases.
@@ -603,7 +549,7 @@ static Value *foldLogOpOfMaskedICmpsAsymmetric(
/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
/// into a single (icmp(A & X) ==/!= Y).
static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
- llvm::InstCombiner::BuilderTy &Builder) {
+ InstCombiner::BuilderTy &Builder) {
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
Optional<std::pair<unsigned, unsigned>> MaskPair =
@@ -673,11 +619,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// Remaining cases assume at least that B and D are constant, and depend on
// their actual values. This isn't strictly necessary, just a "handle the
// easy cases for now" decision.
- ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- if (!BCst)
- return nullptr;
- ConstantInt *DCst = dyn_cast<ConstantInt>(D);
- if (!DCst)
+ ConstantInt *BCst, *DCst;
+ if (!match(B, m_ConstantInt(BCst)) || !match(D, m_ConstantInt(DCst)))
return nullptr;
if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) {
@@ -718,11 +661,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// We can't simply use C and E because we might actually handle
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set.
- ConstantInt *CCst = dyn_cast<ConstantInt>(C);
- if (!CCst)
- return nullptr;
- ConstantInt *ECst = dyn_cast<ConstantInt>(E);
- if (!ECst)
+ ConstantInt *CCst, *ECst;
+ if (!match(C, m_ConstantInt(CCst)) || !match(E, m_ConstantInt(ECst)))
return nullptr;
if (PredL != NewCC)
CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst));
@@ -748,8 +688,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
/// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
/// If \p Inverted is true then the check is for the inverted range, e.g.
/// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
-Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
- bool Inverted) {
+Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
+ bool Inverted) {
// Check the lower range comparison, e.g. x >= 0
// InstCombine already ensured that if there is a constant it's on the RHS.
ConstantInt *RangeStart = dyn_cast<ConstantInt>(Cmp0->getOperand(1));
@@ -856,8 +796,9 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
-Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
- BinaryOperator &Logic) {
+Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS,
+ ICmpInst *RHS,
+ BinaryOperator &Logic) {
bool JoinedByAnd = Logic.getOpcode() == Instruction::And;
assert((JoinedByAnd || Logic.getOpcode() == Instruction::Or) &&
"Wrong opcode");
@@ -869,10 +810,8 @@ Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ)
return nullptr;
- // TODO support vector splats
- ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
- if (!LHSC || !RHSC || !LHSC->isZero() || !RHSC->isZero())
+ if (!match(LHS->getOperand(1), m_Zero()) ||
+ !match(RHS->getOperand(1), m_Zero()))
return nullptr;
Value *A, *B, *C, *D;
@@ -1184,8 +1123,8 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
}
/// Fold (icmp)&(icmp) if possible.
-Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
- BinaryOperator &And) {
+Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+ BinaryOperator &And) {
const SimplifyQuery Q = SQ.getWithInstruction(&And);
// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
@@ -1244,9 +1183,10 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
- ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
- if (!LHSC || !RHSC)
+
+ ConstantInt *LHSC, *RHSC;
+ if (!match(LHS->getOperand(1), m_ConstantInt(LHSC)) ||
+ !match(RHS->getOperand(1), m_ConstantInt(RHSC)))
return nullptr;
if (LHSC == RHSC && PredL == PredR) {
@@ -1404,7 +1344,8 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return nullptr;
}
-Value *InstCombiner::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
+Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
+ bool IsAnd) {
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
@@ -1514,8 +1455,8 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I,
Value *A, *B;
if (match(I.getOperand(0), m_OneUse(m_Not(m_Value(A)))) &&
match(I.getOperand(1), m_OneUse(m_Not(m_Value(B)))) &&
- !isFreeToInvert(A, A->hasOneUse()) &&
- !isFreeToInvert(B, B->hasOneUse())) {
+ !InstCombiner::isFreeToInvert(A, A->hasOneUse()) &&
+ !InstCombiner::isFreeToInvert(B, B->hasOneUse())) {
Value *AndOr = Builder.CreateBinOp(Opcode, A, B, I.getName() + ".demorgan");
return BinaryOperator::CreateNot(AndOr);
}
@@ -1523,7 +1464,7 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I,
return nullptr;
}
-bool InstCombiner::shouldOptimizeCast(CastInst *CI) {
+bool InstCombinerImpl::shouldOptimizeCast(CastInst *CI) {
Value *CastSrc = CI->getOperand(0);
// Noop casts and casts of constants should be eliminated trivially.
@@ -1579,7 +1520,7 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
}
/// Fold {and,or,xor} (cast X), Y.
-Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
+Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {
auto LogicOpc = I.getOpcode();
assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding");
@@ -1686,6 +1627,14 @@ static Instruction *foldOrToXor(BinaryOperator &I,
match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+ // Operand complexity canonicalization guarantees that the 'xor' is Op0.
+ // (A ^ B) | ~(A | B) --> ~(A & B)
+ // (A ^ B) | ~(B | A) --> ~(A & B)
+ if (Op0->hasOneUse() || Op1->hasOneUse())
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));
+
// (A & ~B) | (~A & B) --> A ^ B
// (A & ~B) | (B & ~A) --> A ^ B
// (~B & A) | (~A & B) --> A ^ B
@@ -1700,32 +1649,13 @@ static Instruction *foldOrToXor(BinaryOperator &I,
/// Return true if a constant shift amount is always less than the specified
/// bit-width. If not, the shift could create poison in the narrower type.
static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) {
- if (auto *ScalarC = dyn_cast<ConstantInt>(C))
- return ScalarC->getZExtValue() < BitWidth;
-
- if (C->getType()->isVectorTy()) {
- // Check each element of a constant vector.
- unsigned NumElts = cast<VectorType>(C->getType())->getNumElements();
- for (unsigned i = 0; i != NumElts; ++i) {
- Constant *Elt = C->getAggregateElement(i);
- if (!Elt)
- return false;
- if (isa<UndefValue>(Elt))
- continue;
- auto *CI = dyn_cast<ConstantInt>(Elt);
- if (!CI || CI->getZExtValue() >= BitWidth)
- return false;
- }
- return true;
- }
-
- // The constant is a constant expression or unknown.
- return false;
+ APInt Threshold(C->getType()->getScalarSizeInBits(), BitWidth);
+ return match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
}
/// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and
/// a common zext operand: and (binop (zext X), C), (zext X).
-Instruction *InstCombiner::narrowMaskedBinOp(BinaryOperator &And) {
+Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) {
// This transform could also apply to {or, and, xor}, but there are better
// folds for those cases, so we don't expect those patterns here. AShr is not
// handled because it should always be transformed to LShr in this sequence.
@@ -1767,7 +1697,9 @@ Instruction *InstCombiner::narrowMaskedBinOp(BinaryOperator &And) {
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
-Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
+ Type *Ty = I.getType();
+
if (Value *V = SimplifyAndInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1795,21 +1727,22 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ Value *X, *Y;
+ if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X)))) &&
+ match(Op1, m_One())) {
+ // (1 << X) & 1 --> zext(X == 0)
+ // (1 >> X) & 1 --> zext(X == 0)
+ Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, 0));
+ return new ZExtInst(IsZero, Ty);
+ }
+
const APInt *C;
if (match(Op1, m_APInt(C))) {
- Value *X, *Y;
- if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X)))) &&
- C->isOneValue()) {
- // (1 << X) & 1 --> zext(X == 0)
- // (1 >> X) & 1 --> zext(X == 0)
- Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(I.getType(), 0));
- return new ZExtInst(IsZero, I.getType());
- }
-
const APInt *XorC;
if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_APInt(XorC))))) {
// (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
- Constant *NewC = ConstantInt::get(I.getType(), *C & *XorC);
+ Constant *NewC = ConstantInt::get(Ty, *C & *XorC);
Value *And = Builder.CreateAnd(X, Op1);
And->takeName(Op0);
return BinaryOperator::CreateXor(And, NewC);
@@ -1824,11 +1757,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// that aren't set in C2. Meaning we can replace (C1&C2) with C1 in
// above, but this feels safer.
APInt Together = *C & *OrC;
- Value *And = Builder.CreateAnd(X, ConstantInt::get(I.getType(),
- Together ^ *C));
+ Value *And = Builder.CreateAnd(X, ConstantInt::get(Ty, Together ^ *C));
And->takeName(Op0);
- return BinaryOperator::CreateOr(And, ConstantInt::get(I.getType(),
- Together));
+ return BinaryOperator::CreateOr(And, ConstantInt::get(Ty, Together));
}
// If the mask is only needed on one incoming arm, push the 'and' op up.
@@ -1849,27 +1780,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return BinaryOperator::Create(BinOp, NewLHS, Y);
}
}
+
+ unsigned Width = Ty->getScalarSizeInBits();
const APInt *ShiftC;
if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC)))))) {
- unsigned Width = I.getType()->getScalarSizeInBits();
if (*C == APInt::getLowBitsSet(Width, Width - ShiftC->getZExtValue())) {
// We are clearing high bits that were potentially set by sext+ashr:
// and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC
- Value *Sext = Builder.CreateSExt(X, I.getType());
- Constant *ShAmtC = ConstantInt::get(I.getType(), ShiftC->zext(Width));
+ Value *Sext = Builder.CreateSExt(X, Ty);
+ Constant *ShAmtC = ConstantInt::get(Ty, ShiftC->zext(Width));
return BinaryOperator::CreateLShr(Sext, ShAmtC);
}
}
+
+ const APInt *AddC;
+ if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) {
+ // If we add zeros to every bit below a mask, the add has no effect:
+ // (X + AddC) & LowMaskC --> X & LowMaskC
+ unsigned Ctlz = C->countLeadingZeros();
+ APInt LowMask(APInt::getLowBitsSet(Width, Width - Ctlz));
+ if ((*AddC & LowMask).isNullValue())
+ return BinaryOperator::CreateAnd(X, Op1);
+
+ // If we are masking the result of the add down to exactly one bit and
+ // the constant we are adding has no bits set below that bit, then the
+ // add is flipping a single bit. Example:
+ // (X + 4) & 4 --> (X & 4) ^ 4
+ if (Op0->hasOneUse() && C->isPowerOf2() && (*AddC & (*C - 1)) == 0) {
+ assert((*C & *AddC) != 0 && "Expected common bit");
+ Value *NewAnd = Builder.CreateAnd(X, Op1);
+ return BinaryOperator::CreateXor(NewAnd, Op1);
+ }
+ }
}
- if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
+ ConstantInt *AndRHS;
+ if (match(Op1, m_ConstantInt(AndRHS))) {
const APInt &AndRHSMask = AndRHS->getValue();
// Optimize a variety of ((val OP C1) & C2) combinations...
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
// ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
// of X and OP behaves well when given trunc(C1) and X.
- // TODO: Do this for vectors by using m_APInt isntead of m_ConstantInt.
+ // TODO: Do this for vectors by using m_APInt instead of m_ConstantInt.
switch (Op0I->getOpcode()) {
default:
break;
@@ -1894,31 +1847,30 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X);
auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType());
auto *And = Builder.CreateAnd(BinOp, TruncC2);
- return new ZExtInst(And, I.getType());
+ return new ZExtInst(And, Ty);
}
}
}
-
- if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
- if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
- return Res;
}
+ }
- // If this is an integer truncation, and if the source is an 'and' with
- // immediate, transform it. This frequently occurs for bitfield accesses.
- {
- Value *X = nullptr; ConstantInt *YC = nullptr;
- if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
- // Change: and (trunc (and X, YC) to T), C2
- // into : and (trunc X to T), trunc(YC) & C2
- // This will fold the two constants together, which may allow
- // other simplifications.
- Value *NewCast = Builder.CreateTrunc(X, I.getType(), "and.shrunk");
- Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
- C3 = ConstantExpr::getAnd(C3, AndRHS);
- return BinaryOperator::CreateAnd(NewCast, C3);
- }
- }
+ if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))),
+ m_SignMask())) &&
+ match(Y, m_SpecificInt_ICMP(
+ ICmpInst::Predicate::ICMP_EQ,
+ APInt(Ty->getScalarSizeInBits(),
+ Ty->getScalarSizeInBits() -
+ X->getType()->getScalarSizeInBits())))) {
+ auto *SExt = Builder.CreateSExt(X, Ty, X->getName() + ".signext");
+ auto *SanitizedSignMask = cast<Constant>(Op1);
+ // We must be careful with the undef elements of the sign bit mask, however:
+ // the mask elt can be undef iff the shift amount for that lane was undef,
+ // otherwise we need to sanitize undef masks to zero.
+ SanitizedSignMask = Constant::replaceUndefsWith(
+ SanitizedSignMask, ConstantInt::getNullValue(Ty->getScalarType()));
+ SanitizedSignMask =
+ Constant::mergeUndefsWith(SanitizedSignMask, cast<Constant>(Y));
+ return BinaryOperator::CreateAnd(SExt, SanitizedSignMask);
}
if (Instruction *Z = narrowMaskedBinOp(I))
@@ -1939,6 +1891,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (match(Op0, m_OneUse(m_c_Xor(m_Specific(Op1), m_Value(B)))))
return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(B));
+ // A & ~(A ^ B) --> A & B
+ if (match(Op1, m_Not(m_c_Xor(m_Specific(Op0), m_Value(B)))))
+ return BinaryOperator::CreateAnd(Op0, B);
+ // ~(A ^ B) & A --> A & B
+ if (match(Op0, m_Not(m_c_Xor(m_Specific(Op1), m_Value(B)))))
+ return BinaryOperator::CreateAnd(Op1, B);
+
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
@@ -1977,7 +1936,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// TODO: Make this recursive; it's a little tricky because an arbitrary
// number of 'and' instructions might have to be created.
- Value *X, *Y;
if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
@@ -2011,29 +1969,30 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
Value *A;
if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, Op1, Constant::getNullValue(I.getType()));
+ return SelectInst::Create(A, Op1, Constant::getNullValue(Ty));
if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, Op0, Constant::getNullValue(I.getType()));
+ return SelectInst::Create(A, Op0, Constant::getNullValue(Ty));
// and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0.
- {
- Value *X, *Y;
- const APInt *ShAmt;
- Type *Ty = I.getType();
- if (match(&I, m_c_And(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)),
- m_APInt(ShAmt))),
- m_Deferred(X))) &&
- *ShAmt == Ty->getScalarSizeInBits() - 1) {
- Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
- return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty));
- }
+ if (match(&I, m_c_And(m_OneUse(m_AShr(
+ m_NSWSub(m_Value(Y), m_Value(X)),
+ m_SpecificInt(Ty->getScalarSizeInBits() - 1))),
+ m_Deferred(X)))) {
+ Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
+ return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty));
}
+ // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions
+ if (sinkNotIntoOtherHandOfAndOrOr(I))
+ return &I;
+
return nullptr;
}
-Instruction *InstCombiner::matchBSwap(BinaryOperator &Or) {
+Instruction *InstCombinerImpl::matchBSwapOrBitReverse(BinaryOperator &Or,
+ bool MatchBSwaps,
+ bool MatchBitReversals) {
assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'");
Value *Op0 = Or.getOperand(0), *Op1 = Or.getOperand(1);
@@ -2045,33 +2004,32 @@ Instruction *InstCombiner::matchBSwap(BinaryOperator &Or) {
Op1 = Ext->getOperand(0);
// (A | B) | C and A | (B | C) -> bswap if possible.
- bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) ||
- match(Op1, m_Or(m_Value(), m_Value()));
+ bool OrWithOrs = match(Op0, m_Or(m_Value(), m_Value())) ||
+ match(Op1, m_Or(m_Value(), m_Value()));
- // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
- bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
- match(Op1, m_LogicalShift(m_Value(), m_Value()));
+ // (A >> B) | C and (A << B) | C -> bswap if possible.
+ bool OrWithShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) ||
+ match(Op1, m_LogicalShift(m_Value(), m_Value()));
- // (A & B) | (C & D) -> bswap if possible.
- bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) &&
- match(Op1, m_And(m_Value(), m_Value()));
+ // (A & B) | C and A | (B & C) -> bswap if possible.
+ bool OrWithAnds = match(Op0, m_And(m_Value(), m_Value())) ||
+ match(Op1, m_And(m_Value(), m_Value()));
- // (A << B) | (C & D) -> bswap if possible.
- // The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a
- // part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935,
- // C2 = 8 for i32).
- // This pattern can occur when the operands of the 'or' are not canonicalized
- // for some reason (not having only one use, for example).
- bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
- match(Op1, m_And(m_Value(), m_Value()))) ||
- (match(Op0, m_And(m_Value(), m_Value())) &&
- match(Op1, m_LogicalShift(m_Value(), m_Value())));
+ // fshl(A,B,C) | D and A | fshl(B,C,D) -> bswap if possible.
+ // fshr(A,B,C) | D and A | fshr(B,C,D) -> bswap if possible.
+ bool OrWithFunnels = match(Op0, m_FShl(m_Value(), m_Value(), m_Value())) ||
+ match(Op0, m_FShr(m_Value(), m_Value(), m_Value())) ||
+ match(Op0, m_FShl(m_Value(), m_Value(), m_Value())) ||
+ match(Op0, m_FShr(m_Value(), m_Value(), m_Value()));
- if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh)
+ // TODO: Do we need all these filtering checks or should we just rely on
+ // recognizeBSwapOrBitReverseIdiom + collectBitParts to reject them quickly?
+ if (!OrWithOrs && !OrWithShifts && !OrWithAnds && !OrWithFunnels)
return nullptr;
- SmallVector<Instruction*, 4> Insts;
- if (!recognizeBSwapOrBitReverseIdiom(&Or, true, false, Insts))
+ SmallVector<Instruction *, 4> Insts;
+ if (!recognizeBSwapOrBitReverseIdiom(&Or, MatchBSwaps, MatchBitReversals,
+ Insts))
return nullptr;
Instruction *LastInst = Insts.pop_back_val();
LastInst->removeFromParent();
@@ -2081,34 +2039,72 @@ Instruction *InstCombiner::matchBSwap(BinaryOperator &Or) {
return LastInst;
}
-/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
-static Instruction *matchRotate(Instruction &Or) {
+/// Match UB-safe variants of the funnel shift intrinsic.
+static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
// TODO: Can we reduce the code duplication between this and the related
// rotate matching code under visitSelect and visitTrunc?
unsigned Width = Or.getType()->getScalarSizeInBits();
- if (!isPowerOf2_32(Width))
- return nullptr;
- // First, find an or'd pair of opposite shifts with the same shifted operand:
- // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
+ // First, find an or'd pair of opposite shifts:
+ // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
BinaryOperator *Or0, *Or1;
if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
!match(Or.getOperand(1), m_BinOp(Or1)))
return nullptr;
- Value *ShVal, *ShAmt0, *ShAmt1;
- if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
- !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
- return nullptr;
+ Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
+ if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
+ !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
+ Or0->getOpcode() == Or1->getOpcode())
+ return nullptr;
+
+ // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
+ if (Or0->getOpcode() == BinaryOperator::LShr) {
+ std::swap(Or0, Or1);
+ std::swap(ShVal0, ShVal1);
+ std::swap(ShAmt0, ShAmt1);
+ }
+ assert(Or0->getOpcode() == BinaryOperator::Shl &&
+ Or1->getOpcode() == BinaryOperator::LShr &&
+ "Illegal or(shift,shift) pair");
+
+ // Match the shift amount operands for a funnel shift pattern. This always
+ // matches a subtraction on the R operand.
+ auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
+ // Check for constant shift amounts that sum to the bitwidth.
+ const APInt *LI, *RI;
+ if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))
+ if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
+ return ConstantInt::get(L->getType(), *LI);
+
+ Constant *LC, *RC;
+ if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
+ match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
+ match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
+ match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
+ return ConstantExpr::mergeUndefsWith(LC, RC);
+
+ // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
+ // We limit this to X < Width in case the backend re-expands the intrinsic,
+ // and has to reintroduce a shift modulo operation (InstCombine might remove
+ // it after this fold). This still doesn't guarantee that the final codegen
+ // will match this original pattern.
+ if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
+ KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
+ return KnownL.getMaxValue().ult(Width) ? L : nullptr;
+ }
- BinaryOperator::BinaryOps ShiftOpcode0 = Or0->getOpcode();
- BinaryOperator::BinaryOps ShiftOpcode1 = Or1->getOpcode();
- if (ShiftOpcode0 == ShiftOpcode1)
- return nullptr;
+ // For non-constant cases, the following patterns currently only work for
+ // rotation patterns.
+ // TODO: Add general funnel-shift compatible patterns.
+ if (ShVal0 != ShVal1)
+ return nullptr;
+
+ // For non-constant cases we don't support non-pow2 shift masks.
+ // TODO: Is it worth matching urem as well?
+ if (!isPowerOf2_32(Width))
+ return nullptr;
- // Match the shift amount operands for a rotate pattern. This always matches
- // a subtraction on the R operand.
- auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
// The shift amount may be masked with negation:
// (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
Value *X;
@@ -2124,23 +2120,25 @@ static Instruction *matchRotate(Instruction &Or) {
m_SpecificInt(Mask))))
return L;
+ if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+ match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
+ return L;
+
return nullptr;
};
Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
- bool SubIsOnLHS = false;
+ bool IsFshl = true; // Sub on LSHR.
if (!ShAmt) {
ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
- SubIsOnLHS = true;
+ IsFshl = false; // Sub on SHL.
}
if (!ShAmt)
return nullptr;
- bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) ||
- (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
- return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt });
+ return IntrinsicInst::Create(F, {ShVal0, ShVal1, ShAmt});
}
/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
@@ -2198,7 +2196,7 @@ static Instruction *matchOrConcat(Instruction &Or,
/// If all elements of two constant vectors are 0/-1 and inverses, return true.
static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
- unsigned NumElts = cast<VectorType>(C1->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(C1->getType())->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *EltC1 = C1->getAggregateElement(i);
Constant *EltC2 = C2->getAggregateElement(i);
@@ -2216,7 +2214,7 @@ static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
/// We have an expression of the form (A & C) | (B & D). If A is a scalar or
/// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
/// B, it can be used as the condition operand of a select instruction.
-Value *InstCombiner::getSelectCondition(Value *A, Value *B) {
+Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) {
// Step 1: We may have peeked through bitcasts in the caller.
// Exit immediately if we don't have (vector) integer types.
Type *Ty = A->getType();
@@ -2273,8 +2271,8 @@ Value *InstCombiner::getSelectCondition(Value *A, Value *B) {
/// We have an expression of the form (A & C) | (B & D). Try to simplify this
/// to "A' ? C : D", where A' is a boolean or vector of booleans.
-Value *InstCombiner::matchSelectFromAndOr(Value *A, Value *C, Value *B,
- Value *D) {
+Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B,
+ Value *D) {
// The potential condition of the select may be bitcasted. In that case, look
// through its bitcast and the corresponding bitcast of the 'not' condition.
Type *OrigType = A->getType();
@@ -2294,8 +2292,8 @@ Value *InstCombiner::matchSelectFromAndOr(Value *A, Value *C, Value *B,
}
/// Fold (icmp)|(icmp) if possible.
-Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
- BinaryOperator &Or) {
+Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+ BinaryOperator &Or) {
const SimplifyQuery Q = SQ.getWithInstruction(&Or);
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
@@ -2304,9 +2302,10 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return V;
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
-
- ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
+ Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1);
+ auto *LHSC = dyn_cast<ConstantInt>(LHS1);
+ auto *RHSC = dyn_cast<ConstantInt>(RHS1);
// Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3)
// --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3)
@@ -2318,24 +2317,20 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// 3) C1 ^ C2 is one-bit mask.
// 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask.
// This implies all values in the two ranges differ by exactly one bit.
-
if ((PredL == ICmpInst::ICMP_ULT || PredL == ICmpInst::ICMP_ULE) &&
PredL == PredR && LHSC && RHSC && LHS->hasOneUse() && RHS->hasOneUse() &&
LHSC->getType() == RHSC->getType() &&
LHSC->getValue() == (RHSC->getValue())) {
- Value *LAdd = LHS->getOperand(0);
- Value *RAdd = RHS->getOperand(0);
-
- Value *LAddOpnd, *RAddOpnd;
+ Value *AddOpnd;
ConstantInt *LAddC, *RAddC;
- if (match(LAdd, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddC))) &&
- match(RAdd, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddC))) &&
+ if (match(LHS0, m_Add(m_Value(AddOpnd), m_ConstantInt(LAddC))) &&
+ match(RHS0, m_Add(m_Specific(AddOpnd), m_ConstantInt(RAddC))) &&
LAddC->getValue().ugt(LHSC->getValue()) &&
RAddC->getValue().ugt(LHSC->getValue())) {
APInt DiffC = LAddC->getValue() ^ RAddC->getValue();
- if (LAddOpnd == RAddOpnd && DiffC.isPowerOf2()) {
+ if (DiffC.isPowerOf2()) {
ConstantInt *MaxAddC = nullptr;
if (LAddC->getValue().ult(RAddC->getValue()))
MaxAddC = RAddC;
@@ -2355,7 +2350,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
RangeDiff.ugt(LHSC->getValue())) {
Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC);
- Value *NewAnd = Builder.CreateAnd(LAddOpnd, MaskC);
+ Value *NewAnd = Builder.CreateAnd(AddOpnd, MaskC);
Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC);
return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC);
}
@@ -2365,15 +2360,12 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (predicatesFoldable(PredL, PredR)) {
- if (LHS->getOperand(0) == RHS->getOperand(1) &&
- LHS->getOperand(1) == RHS->getOperand(0))
+ if (LHS0 == RHS1 && LHS1 == RHS0)
LHS->swapOperands();
- if (LHS->getOperand(0) == RHS->getOperand(0) &&
- LHS->getOperand(1) == RHS->getOperand(1)) {
- Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ if (LHS0 == RHS0 && LHS1 == RHS1) {
unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
bool IsSigned = LHS->isSigned() || RHS->isSigned();
- return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder);
+ return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder);
}
}
@@ -2382,31 +2374,30 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
return V;
- Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
if (LHS->hasOneUse() || RHS->hasOneUse()) {
// (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
// (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
Value *A = nullptr, *B = nullptr;
- if (PredL == ICmpInst::ICMP_EQ && LHSC && LHSC->isZero()) {
+ if (PredL == ICmpInst::ICMP_EQ && match(LHS1, m_Zero())) {
B = LHS0;
- if (PredR == ICmpInst::ICMP_ULT && LHS0 == RHS->getOperand(1))
+ if (PredR == ICmpInst::ICMP_ULT && LHS0 == RHS1)
A = RHS0;
else if (PredR == ICmpInst::ICMP_UGT && LHS0 == RHS0)
- A = RHS->getOperand(1);
+ A = RHS1;
}
// (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1)
// (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1)
- else if (PredR == ICmpInst::ICMP_EQ && RHSC && RHSC->isZero()) {
+ else if (PredR == ICmpInst::ICMP_EQ && match(RHS1, m_Zero())) {
B = RHS0;
- if (PredL == ICmpInst::ICMP_ULT && RHS0 == LHS->getOperand(1))
+ if (PredL == ICmpInst::ICMP_ULT && RHS0 == LHS1)
A = LHS0;
- else if (PredL == ICmpInst::ICMP_UGT && LHS0 == RHS0)
- A = LHS->getOperand(1);
+ else if (PredL == ICmpInst::ICMP_UGT && RHS0 == LHS0)
+ A = LHS1;
}
- if (A && B)
+ if (A && B && B->getType()->isIntOrIntVectorTy())
return Builder.CreateICmp(
ICmpInst::ICMP_UGE,
- Builder.CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A);
+ Builder.CreateAdd(B, Constant::getAllOnesValue(B->getType())), A);
}
if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, Or, Builder, Q))
@@ -2435,18 +2426,21 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder))
return X;
+ // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+ // TODO: Remove this when foldLogOpOfMaskedICmps can handle vectors.
+ if (PredL == ICmpInst::ICMP_NE && match(LHS1, m_Zero()) &&
+ PredR == ICmpInst::ICMP_NE && match(RHS1, m_Zero()) &&
+ LHS0->getType()->isIntOrIntVectorTy() &&
+ LHS0->getType() == RHS0->getType()) {
+ Value *NewOr = Builder.CreateOr(LHS0, RHS0);
+ return Builder.CreateICmp(PredL, NewOr,
+ Constant::getNullValue(NewOr->getType()));
+ }
+
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
if (!LHSC || !RHSC)
return nullptr;
- if (LHSC == RHSC && PredL == PredR) {
- // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
- if (PredL == ICmpInst::ICMP_NE && LHSC->isZero()) {
- Value *NewOr = Builder.CreateOr(LHS0, RHS0);
- return Builder.CreateICmp(PredL, NewOr, LHSC);
- }
- }
-
// (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
// iff C2 + CA == C1.
if (PredL == ICmpInst::ICMP_ULT && PredR == ICmpInst::ICMP_EQ) {
@@ -2560,7 +2554,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
-Instruction *InstCombiner::visitOr(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyOrInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -2590,11 +2584,12 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
return FoldedLogic;
- if (Instruction *BSwap = matchBSwap(I))
+ if (Instruction *BSwap = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true,
+ /*MatchBitReversals*/ false))
return BSwap;
- if (Instruction *Rotate = matchRotate(I))
- return Rotate;
+ if (Instruction *Funnel = matchFunnelShift(I, *this))
+ return Funnel;
if (Instruction *Concat = matchOrConcat(I, Builder))
return replaceInstUsesWith(I, Concat);
@@ -2614,9 +2609,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *A, *B, *C, *D;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
- ConstantInt *C1 = dyn_cast<ConstantInt>(C);
- ConstantInt *C2 = dyn_cast<ConstantInt>(D);
- if (C1 && C2) { // (A & C1)|(B & C2)
+ // (A & C1)|(B & C2)
+ ConstantInt *C1, *C2;
+ if (match(C, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2))) {
Value *V1 = nullptr, *V2 = nullptr;
if ((C1->getValue() & C2->getValue()).isNullValue()) {
// ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
@@ -2807,7 +2802,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// ORs in the hopes that we'll be able to simplify it this way.
// (X|C) | V --> (X|V) | C
ConstantInt *CI;
- if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+ if (Op0->hasOneUse() && !match(Op1, m_ConstantInt()) &&
match(Op0, m_Or(m_Value(A), m_ConstantInt(CI)))) {
Value *Inner = Builder.CreateOr(A, Op1);
Inner->takeName(Op0);
@@ -2828,18 +2823,17 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
- // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? -1 : X.
+ // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X.
{
Value *X, *Y;
- const APInt *ShAmt;
Type *Ty = I.getType();
- if (match(&I, m_c_Or(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)),
- m_APInt(ShAmt))),
- m_Deferred(X))) &&
- *ShAmt == Ty->getScalarSizeInBits() - 1) {
+ if (match(&I, m_c_Or(m_OneUse(m_AShr(
+ m_NSWSub(m_Value(Y), m_Value(X)),
+ m_SpecificInt(Ty->getScalarSizeInBits() - 1))),
+ m_Deferred(X)))) {
Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
- return SelectInst::Create(NewICmpInst, ConstantInt::getAllOnesValue(Ty),
- X);
+ Value *AllOnes = ConstantInt::getAllOnesValue(Ty);
+ return SelectInst::Create(NewICmpInst, AllOnes, X);
}
}
@@ -2873,6 +2867,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
+ // (~x) | y --> ~(x & (~y)) iff that gets rid of inversions
+ if (sinkNotIntoOtherHandOfAndOrOr(I))
+ return &I;
+
return nullptr;
}
@@ -2929,8 +2927,8 @@ static Instruction *foldXorToXor(BinaryOperator &I,
return nullptr;
}
-Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
- BinaryOperator &I) {
+Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+ BinaryOperator &I) {
assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS &&
I.getOperand(1) == RHS && "Should be 'xor' with these operands");
@@ -3088,9 +3086,9 @@ static Instruction *sinkNotIntoXor(BinaryOperator &I,
return nullptr;
// We only want to do the transform if it is free to do.
- if (isFreeToInvert(X, X->hasOneUse())) {
+ if (InstCombiner::isFreeToInvert(X, X->hasOneUse())) {
// Ok, good.
- } else if (isFreeToInvert(Y, Y->hasOneUse())) {
+ } else if (InstCombiner::isFreeToInvert(Y, Y->hasOneUse())) {
std::swap(X, Y);
} else
return nullptr;
@@ -3099,10 +3097,52 @@ static Instruction *sinkNotIntoXor(BinaryOperator &I,
return BinaryOperator::CreateXor(NotX, Y, I.getName() + ".demorgan");
}
+// Transform
+// z = (~x) &/| y
+// into:
+// z = ~(x |/& (~y))
+// iff y is free to invert and all uses of z can be freely updated.
+bool InstCombinerImpl::sinkNotIntoOtherHandOfAndOrOr(BinaryOperator &I) {
+ Instruction::BinaryOps NewOpc;
+ switch (I.getOpcode()) {
+ case Instruction::And:
+ NewOpc = Instruction::Or;
+ break;
+ case Instruction::Or:
+ NewOpc = Instruction::And;
+ break;
+ default:
+ return false;
+ };
+
+ Value *X, *Y;
+ if (!match(&I, m_c_BinOp(m_Not(m_Value(X)), m_Value(Y))))
+ return false;
+
+ // Will we be able to fold the `not` into Y eventually?
+ if (!InstCombiner::isFreeToInvert(Y, Y->hasOneUse()))
+ return false;
+
+ // And can our users be adapted?
+ if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr))
+ return false;
+
+ Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
+ Value *NewBinOp =
+ BinaryOperator::Create(NewOpc, X, NotY, I.getName() + ".not");
+ Builder.Insert(NewBinOp);
+ replaceInstUsesWith(I, NewBinOp);
+ // We can not just create an outer `not`, it will most likely be immediately
+ // folded back, reconstructing our initial pattern, and causing an
+ // infinite combine loop, so immediately manually fold it away.
+ freelyInvertAllUsersOf(NewBinOp);
+ return true;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
-Instruction *InstCombiner::visitXor(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -3129,6 +3169,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ Type *Ty = I.getType();
// Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
// This it a special case in haveNoCommonBitsSet, but the computeKnownBits
@@ -3180,11 +3221,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- // ~(X - Y) --> ~X + Y
- if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
- if (isa<Constant>(X) || NotVal->hasOneUse())
- return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
-
// ~(~X >>s Y) --> (X >>s Y)
if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
return BinaryOperator::CreateAShr(X, Y);
@@ -3200,7 +3236,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
match(C, m_Negative())) {
// We matched a negative constant, so propagating undef is unsafe.
// Clamp undef elements to -1.
- Type *EltTy = C->getType()->getScalarType();
+ Type *EltTy = Ty->getScalarType();
C = Constant::replaceUndefsWith(C, ConstantInt::getAllOnesValue(EltTy));
return BinaryOperator::CreateLShr(ConstantExpr::getNot(C), Y);
}
@@ -3210,14 +3246,25 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
match(C, m_NonNegative())) {
// We matched a non-negative constant, so propagating undef is unsafe.
// Clamp undef elements to 0.
- Type *EltTy = C->getType()->getScalarType();
+ Type *EltTy = Ty->getScalarType();
C = Constant::replaceUndefsWith(C, ConstantInt::getNullValue(EltTy));
return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y);
}
- // ~(X + C) --> -(C + 1) - X
- if (match(Op0, m_Add(m_Value(X), m_Constant(C))))
- return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X);
+ // ~(X + C) --> ~C - X
+ if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C))))
+ return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X);
+
+ // ~(X - Y) --> ~X + Y
+ // FIXME: is it really beneficial to sink the `not` here?
+ if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
+ if (isa<Constant>(X) || NotVal->hasOneUse())
+ return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
+
+ // ~(~X + Y) --> X - Y
+ if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y))))
+ return BinaryOperator::CreateWithCopiedFlags(Instruction::Sub, X, Y,
+ NotVal);
}
// Use DeMorgan and reassociation to eliminate a 'not' op.
@@ -3248,52 +3295,56 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (match(Op1, m_APInt(RHSC))) {
Value *X;
const APInt *C;
- if (RHSC->isSignMask() && match(Op0, m_Sub(m_APInt(C), m_Value(X)))) {
- // (C - X) ^ signmask -> (C + signmask - X)
- Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
- return BinaryOperator::CreateSub(NewC, X);
- }
- if (RHSC->isSignMask() && match(Op0, m_Add(m_Value(X), m_APInt(C)))) {
- // (X + C) ^ signmask -> (X + C + signmask)
- Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
- return BinaryOperator::CreateAdd(X, NewC);
- }
+ // (C - X) ^ signmaskC --> (C + signmaskC) - X
+ if (RHSC->isSignMask() && match(Op0, m_Sub(m_APInt(C), m_Value(X))))
+ return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C + *RHSC), X);
- // (X|C1)^C2 -> X^(C1^C2) iff X&~C1 == 0
+ // (X + C) ^ signmaskC --> X + (C + signmaskC)
+ if (RHSC->isSignMask() && match(Op0, m_Add(m_Value(X), m_APInt(C))))
+ return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C + *RHSC));
+
+ // (X | C) ^ RHSC --> X ^ (C ^ RHSC) iff X & C == 0
if (match(Op0, m_Or(m_Value(X), m_APInt(C))) &&
- MaskedValueIsZero(X, *C, 0, &I)) {
- Constant *NewC = ConstantInt::get(I.getType(), *C ^ *RHSC);
- return BinaryOperator::CreateXor(X, NewC);
+ MaskedValueIsZero(X, *C, 0, &I))
+ return BinaryOperator::CreateXor(X, ConstantInt::get(Ty, *C ^ *RHSC));
+
+ // If RHSC is inverting the remaining bits of shifted X,
+ // canonicalize to a 'not' before the shift to help SCEV and codegen:
+ // (X << C) ^ RHSC --> ~X << C
+ if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_APInt(C)))) &&
+ *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).shl(*C)) {
+ Value *NotX = Builder.CreateNot(X);
+ return BinaryOperator::CreateShl(NotX, ConstantInt::get(Ty, *C));
+ }
+ // (X >>u C) ^ RHSC --> ~X >>u C
+ if (match(Op0, m_OneUse(m_LShr(m_Value(X), m_APInt(C)))) &&
+ *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).lshr(*C)) {
+ Value *NotX = Builder.CreateNot(X);
+ return BinaryOperator::CreateLShr(NotX, ConstantInt::get(Ty, *C));
}
+ // TODO: We could handle 'ashr' here as well. That would be matching
+ // a 'not' op and moving it before the shift. Doing that requires
+ // preventing the inverse fold in canShiftBinOpWithConstantRHS().
}
}
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) {
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
- if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
- if (Op0I->getOpcode() == Instruction::LShr) {
- // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
- // E1 = "X ^ C1"
- BinaryOperator *E1;
- ConstantInt *C1;
- if (Op0I->hasOneUse() &&
- (E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) &&
- E1->getOpcode() == Instruction::Xor &&
- (C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) {
- // fold (C1 >> C2) ^ C3
- ConstantInt *C2 = Op0CI, *C3 = RHSC;
- APInt FoldConst = C1->getValue().lshr(C2->getValue());
- FoldConst ^= C3->getValue();
- // Prepare the two operands.
- Value *Opnd0 = Builder.CreateLShr(E1->getOperand(0), C2);
- Opnd0->takeName(Op0I);
- cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc());
- Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst);
-
- return BinaryOperator::CreateXor(Opnd0, FoldVal);
- }
- }
- }
+ // FIXME: This should not be limited to scalar (pull into APInt match above).
+ {
+ Value *X;
+ ConstantInt *C1, *C2, *C3;
+ // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+ if (match(Op1, m_ConstantInt(C3)) &&
+ match(Op0, m_LShr(m_Xor(m_Value(X), m_ConstantInt(C1)),
+ m_ConstantInt(C2))) &&
+ Op0->hasOneUse()) {
+ // fold (C1 >> C2) ^ C3
+ APInt FoldConst = C1->getValue().lshr(C2->getValue());
+ FoldConst ^= C3->getValue();
+ // Prepare the two operands.
+ auto *Opnd0 = cast<Instruction>(Builder.CreateLShr(X, C2));
+ Opnd0->takeName(cast<Instruction>(Op0));
+ Opnd0->setDebugLoc(I.getDebugLoc());
+ return BinaryOperator::CreateXor(Opnd0, ConstantInt::get(Ty, FoldConst));
}
}
@@ -3350,6 +3401,25 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
match(Op1, m_Not(m_Specific(A))))
return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));
+ // (~A & B) ^ A --> A | B -- There are 4 commuted variants.
+ if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(A)), m_Value(B)), m_Deferred(A))))
+ return BinaryOperator::CreateOr(A, B);
+
+ // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants.
+ // TODO: Loosen one-use restriction if common operand is a constant.
+ Value *D;
+ if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B)))) &&
+ match(Op1, m_OneUse(m_Or(m_Value(C), m_Value(D))))) {
+ if (B == C || B == D)
+ std::swap(A, B);
+ if (A == C)
+ std::swap(C, D);
+ if (A == D) {
+ Value *NotA = Builder.CreateNot(A);
+ return BinaryOperator::CreateAnd(Builder.CreateXor(B, C), NotA);
+ }
+ }
+
if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (Value *V = foldXorOfICmps(LHS, RHS, I))
@@ -3367,7 +3437,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
std::swap(Op0, Op1);
const APInt *ShAmt;
- Type *Ty = I.getType();
if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&
Op1->hasNUses(2) && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) {
@@ -3426,19 +3495,30 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- // Pull 'not' into operands of select if both operands are one-use compares.
+ // Pull 'not' into operands of select if both operands are one-use compares
+ // or one is one-use compare and the other one is a constant.
// Inverting the predicates eliminates the 'not' operation.
// Example:
- // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->
+ // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->
// select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?)
- // TODO: Canonicalize by hoisting 'not' into an arm of the select if only
- // 1 select operand is a cmp?
+ // not (select ?, (cmp TPred, ?, ?), true -->
+ // select ?, (cmp InvTPred, ?, ?), false
if (auto *Sel = dyn_cast<SelectInst>(Op0)) {
- auto *CmpT = dyn_cast<CmpInst>(Sel->getTrueValue());
- auto *CmpF = dyn_cast<CmpInst>(Sel->getFalseValue());
- if (CmpT && CmpF && CmpT->hasOneUse() && CmpF->hasOneUse()) {
- CmpT->setPredicate(CmpT->getInversePredicate());
- CmpF->setPredicate(CmpF->getInversePredicate());
+ Value *TV = Sel->getTrueValue();
+ Value *FV = Sel->getFalseValue();
+ auto *CmpT = dyn_cast<CmpInst>(TV);
+ auto *CmpF = dyn_cast<CmpInst>(FV);
+ bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(TV);
+ bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(FV);
+ if (InvertibleT && InvertibleF) {
+ if (CmpT)
+ CmpT->setPredicate(CmpT->getInversePredicate());
+ else
+ Sel->setTrueValue(ConstantExpr::getNot(cast<Constant>(TV)));
+ if (CmpF)
+ CmpF->setPredicate(CmpF->getInversePredicate());
+ else
+ Sel->setFalseValue(ConstantExpr::getNot(cast<Constant>(FV)));
return replaceInstUsesWith(I, Sel);
}
}
@@ -3447,5 +3527,15 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Instruction *NewXor = sinkNotIntoXor(I, Builder))
return NewXor;
+ // Otherwise, if all else failed, try to hoist the xor-by-constant:
+ // (X ^ C) ^ Y --> (X ^ Y) ^ C
+ // Just like we do in other places, we completely avoid the fold
+ // for constantexprs, at least to avoid endless combine loop.
+ if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_CombineAnd(m_Value(X),
+ m_Unless(m_ConstantExpr())),
+ m_ImmConstant(C1))),
+ m_Value(Y))))
+ return BinaryOperator::CreateXor(Builder.CreateXor(X, Y), C1);
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index ba1cf982229d..495493aab4b5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -9,8 +9,10 @@
// This file implements the visit functions for atomic rmw instructions.
//
//===----------------------------------------------------------------------===//
+
#include "InstCombineInternal.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
using namespace llvm;
@@ -30,7 +32,7 @@ bool isIdempotentRMW(AtomicRMWInst& RMWI) {
default:
return false;
};
-
+
auto C = dyn_cast<ConstantInt>(RMWI.getValOperand());
if(!C)
return false;
@@ -91,13 +93,13 @@ bool isSaturating(AtomicRMWInst& RMWI) {
return C->isMaxValue(false);
};
}
-}
+} // namespace
-Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+Instruction *InstCombinerImpl::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
// Volatile RMWs perform a load and a store, we cannot replace this by just a
// load or just a store. We chose not to canonicalize out of general paranoia
- // about user expectations around volatile.
+ // about user expectations around volatile.
if (RMWI.isVolatile())
return nullptr;
@@ -115,7 +117,7 @@ Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
"AtomicRMWs don't make sense with Unordered or NotAtomic");
// Any atomicrmw xchg with no uses can be converted to a atomic store if the
- // ordering is compatible.
+ // ordering is compatible.
if (RMWI.getOperation() == AtomicRMWInst::Xchg &&
RMWI.use_empty()) {
if (Ordering != AtomicOrdering::Release &&
@@ -127,14 +129,14 @@ Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
SI->setAlignment(DL.getABITypeAlign(RMWI.getType()));
return eraseInstFromFunction(RMWI);
}
-
+
if (!isIdempotentRMW(RMWI))
return nullptr;
// We chose to canonicalize all idempotent operations to an single
// operation code and constant. This makes it easier for the rest of the
// optimizer to match easily. The choices of or w/0 and fadd w/-0.0 are
- // arbitrary.
+ // arbitrary.
if (RMWI.getType()->isIntegerTy() &&
RMWI.getOperation() != AtomicRMWInst::Or) {
RMWI.setOperation(AtomicRMWInst::Or);
@@ -149,7 +151,7 @@ Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
if (Ordering != AtomicOrdering::Acquire &&
Ordering != AtomicOrdering::Monotonic)
return nullptr;
-
+
LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand(), "",
false, DL.getABITypeAlign(RMWI.getType()),
Ordering, RMWI.getSyncScopeID());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 836af6234ad5..5482b944e347 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
@@ -47,9 +48,6 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsHexagon.h"
-#include "llvm/IR/IntrinsicsNVPTX.h"
-#include "llvm/IR/IntrinsicsPowerPC.h"
-#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
@@ -68,6 +66,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include <algorithm>
@@ -100,24 +99,7 @@ static Type *getPromotedType(Type *Ty) {
return Ty;
}
-/// Return a constant boolean vector that has true elements in all positions
-/// where the input constant data vector has an element with the sign bit set.
-static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {
- SmallVector<Constant *, 32> BoolVec;
- IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
- for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
- Constant *Elt = V->getElementAsConstant(I);
- assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
- "Unexpected constant data vector element type");
- bool Sign = V->getElementType()->isIntegerTy()
- ? cast<ConstantInt>(Elt)->isNegative()
- : cast<ConstantFP>(Elt)->isNegative();
- BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
- }
- return ConstantVector::get(BoolVec);
-}
-
-Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
+Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
MaybeAlign CopyDstAlign = MI->getDestAlign();
if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
@@ -232,7 +214,7 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
return MI;
}
-Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
+Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
const Align KnownAlignment =
getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
MaybeAlign MemSetAlign = MI->getDestAlign();
@@ -292,820 +274,9 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
return nullptr;
}
-static Value *simplifyX86immShift(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- bool LogicalShift = false;
- bool ShiftLeft = false;
- bool IsImm = false;
-
- switch (II.getIntrinsicID()) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx512_psrai_q_128:
- case Intrinsic::x86_avx512_psrai_q_256:
- case Intrinsic::x86_avx512_psrai_d_512:
- case Intrinsic::x86_avx512_psrai_q_512:
- case Intrinsic::x86_avx512_psrai_w_512:
- IsImm = true;
- LLVM_FALLTHROUGH;
- case Intrinsic::x86_sse2_psra_d:
- case Intrinsic::x86_sse2_psra_w:
- case Intrinsic::x86_avx2_psra_d:
- case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx512_psra_q_128:
- case Intrinsic::x86_avx512_psra_q_256:
- case Intrinsic::x86_avx512_psra_d_512:
- case Intrinsic::x86_avx512_psra_q_512:
- case Intrinsic::x86_avx512_psra_w_512:
- LogicalShift = false;
- ShiftLeft = false;
- break;
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w:
- case Intrinsic::x86_avx512_psrli_d_512:
- case Intrinsic::x86_avx512_psrli_q_512:
- case Intrinsic::x86_avx512_psrli_w_512:
- IsImm = true;
- LLVM_FALLTHROUGH;
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx512_psrl_d_512:
- case Intrinsic::x86_avx512_psrl_q_512:
- case Intrinsic::x86_avx512_psrl_w_512:
- LogicalShift = true;
- ShiftLeft = false;
- break;
- case Intrinsic::x86_sse2_pslli_d:
- case Intrinsic::x86_sse2_pslli_q:
- case Intrinsic::x86_sse2_pslli_w:
- case Intrinsic::x86_avx2_pslli_d:
- case Intrinsic::x86_avx2_pslli_q:
- case Intrinsic::x86_avx2_pslli_w:
- case Intrinsic::x86_avx512_pslli_d_512:
- case Intrinsic::x86_avx512_pslli_q_512:
- case Intrinsic::x86_avx512_pslli_w_512:
- IsImm = true;
- LLVM_FALLTHROUGH;
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_sse2_psll_w:
- case Intrinsic::x86_avx2_psll_d:
- case Intrinsic::x86_avx2_psll_q:
- case Intrinsic::x86_avx2_psll_w:
- case Intrinsic::x86_avx512_psll_d_512:
- case Intrinsic::x86_avx512_psll_q_512:
- case Intrinsic::x86_avx512_psll_w_512:
- LogicalShift = true;
- ShiftLeft = true;
- break;
- }
- assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
-
- auto Vec = II.getArgOperand(0);
- auto Amt = II.getArgOperand(1);
- auto VT = cast<VectorType>(Vec->getType());
- auto SVT = VT->getElementType();
- auto AmtVT = Amt->getType();
- unsigned VWidth = VT->getNumElements();
- unsigned BitWidth = SVT->getPrimitiveSizeInBits();
-
- // If the shift amount is guaranteed to be in-range we can replace it with a
- // generic shift. If its guaranteed to be out of range, logical shifts combine to
- // zero and arithmetic shifts are clamped to (BitWidth - 1).
- if (IsImm) {
- assert(AmtVT ->isIntegerTy(32) &&
- "Unexpected shift-by-immediate type");
- KnownBits KnownAmtBits =
- llvm::computeKnownBits(Amt, II.getModule()->getDataLayout());
- if (KnownAmtBits.getMaxValue().ult(BitWidth)) {
- Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
- Amt = Builder.CreateVectorSplat(VWidth, Amt);
- return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
- : Builder.CreateLShr(Vec, Amt))
- : Builder.CreateAShr(Vec, Amt));
- }
- if (KnownAmtBits.getMinValue().uge(BitWidth)) {
- if (LogicalShift)
- return ConstantAggregateZero::get(VT);
- Amt = ConstantInt::get(SVT, BitWidth - 1);
- return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
- }
- } else {
- // Ensure the first element has an in-range value and the rest of the
- // elements in the bottom 64 bits are zero.
- assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
- cast<VectorType>(AmtVT)->getElementType() == SVT &&
- "Unexpected shift-by-scalar type");
- unsigned NumAmtElts = cast<VectorType>(AmtVT)->getNumElements();
- APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0);
- APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2);
- KnownBits KnownLowerBits = llvm::computeKnownBits(
- Amt, DemandedLower, II.getModule()->getDataLayout());
- KnownBits KnownUpperBits = llvm::computeKnownBits(
- Amt, DemandedUpper, II.getModule()->getDataLayout());
- if (KnownLowerBits.getMaxValue().ult(BitWidth) &&
- (DemandedUpper.isNullValue() || KnownUpperBits.isZero())) {
- SmallVector<int, 16> ZeroSplat(VWidth, 0);
- Amt = Builder.CreateShuffleVector(Amt, Amt, ZeroSplat);
- return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
- : Builder.CreateLShr(Vec, Amt))
- : Builder.CreateAShr(Vec, Amt));
- }
- }
-
- // Simplify if count is constant vector.
- auto CDV = dyn_cast<ConstantDataVector>(Amt);
- if (!CDV)
- return nullptr;
-
- // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
- // operand to compute the shift amount.
- assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
- cast<VectorType>(AmtVT)->getElementType() == SVT &&
- "Unexpected shift-by-scalar type");
-
- // Concatenate the sub-elements to create the 64-bit value.
- APInt Count(64, 0);
- for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) {
- unsigned SubEltIdx = (NumSubElts - 1) - i;
- auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
- Count <<= BitWidth;
- Count |= SubElt->getValue().zextOrTrunc(64);
- }
-
- // If shift-by-zero then just return the original value.
- if (Count.isNullValue())
- return Vec;
-
- // Handle cases when Shift >= BitWidth.
- if (Count.uge(BitWidth)) {
- // If LogicalShift - just return zero.
- if (LogicalShift)
- return ConstantAggregateZero::get(VT);
-
- // If ArithmeticShift - clamp Shift to (BitWidth - 1).
- Count = APInt(64, BitWidth - 1);
- }
-
- // Get a constant vector of the same type as the first operand.
- auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
- auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
-
- if (ShiftLeft)
- return Builder.CreateShl(Vec, ShiftVec);
-
- if (LogicalShift)
- return Builder.CreateLShr(Vec, ShiftVec);
-
- return Builder.CreateAShr(Vec, ShiftVec);
-}
-
-// Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
-// Unlike the generic IR shifts, the intrinsics have defined behaviour for out
-// of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
-static Value *simplifyX86varShift(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- bool LogicalShift = false;
- bool ShiftLeft = false;
-
- switch (II.getIntrinsicID()) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_avx2_psrav_d:
- case Intrinsic::x86_avx2_psrav_d_256:
- case Intrinsic::x86_avx512_psrav_q_128:
- case Intrinsic::x86_avx512_psrav_q_256:
- case Intrinsic::x86_avx512_psrav_d_512:
- case Intrinsic::x86_avx512_psrav_q_512:
- case Intrinsic::x86_avx512_psrav_w_128:
- case Intrinsic::x86_avx512_psrav_w_256:
- case Intrinsic::x86_avx512_psrav_w_512:
- LogicalShift = false;
- ShiftLeft = false;
- break;
- case Intrinsic::x86_avx2_psrlv_d:
- case Intrinsic::x86_avx2_psrlv_d_256:
- case Intrinsic::x86_avx2_psrlv_q:
- case Intrinsic::x86_avx2_psrlv_q_256:
- case Intrinsic::x86_avx512_psrlv_d_512:
- case Intrinsic::x86_avx512_psrlv_q_512:
- case Intrinsic::x86_avx512_psrlv_w_128:
- case Intrinsic::x86_avx512_psrlv_w_256:
- case Intrinsic::x86_avx512_psrlv_w_512:
- LogicalShift = true;
- ShiftLeft = false;
- break;
- case Intrinsic::x86_avx2_psllv_d:
- case Intrinsic::x86_avx2_psllv_d_256:
- case Intrinsic::x86_avx2_psllv_q:
- case Intrinsic::x86_avx2_psllv_q_256:
- case Intrinsic::x86_avx512_psllv_d_512:
- case Intrinsic::x86_avx512_psllv_q_512:
- case Intrinsic::x86_avx512_psllv_w_128:
- case Intrinsic::x86_avx512_psllv_w_256:
- case Intrinsic::x86_avx512_psllv_w_512:
- LogicalShift = true;
- ShiftLeft = true;
- break;
- }
- assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
-
- auto Vec = II.getArgOperand(0);
- auto Amt = II.getArgOperand(1);
- auto VT = cast<VectorType>(II.getType());
- auto SVT = VT->getElementType();
- int NumElts = VT->getNumElements();
- int BitWidth = SVT->getIntegerBitWidth();
-
- // If the shift amount is guaranteed to be in-range we can replace it with a
- // generic shift.
- APInt UpperBits =
- APInt::getHighBitsSet(BitWidth, BitWidth - Log2_32(BitWidth));
- if (llvm::MaskedValueIsZero(Amt, UpperBits,
- II.getModule()->getDataLayout())) {
- return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
- : Builder.CreateLShr(Vec, Amt))
- : Builder.CreateAShr(Vec, Amt));
- }
-
- // Simplify if all shift amounts are constant/undef.
- auto *CShift = dyn_cast<Constant>(Amt);
- if (!CShift)
- return nullptr;
-
- // Collect each element's shift amount.
- // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
- bool AnyOutOfRange = false;
- SmallVector<int, 8> ShiftAmts;
- for (int I = 0; I < NumElts; ++I) {
- auto *CElt = CShift->getAggregateElement(I);
- if (CElt && isa<UndefValue>(CElt)) {
- ShiftAmts.push_back(-1);
- continue;
- }
-
- auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
- if (!COp)
- return nullptr;
-
- // Handle out of range shifts.
- // If LogicalShift - set to BitWidth (special case).
- // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
- APInt ShiftVal = COp->getValue();
- if (ShiftVal.uge(BitWidth)) {
- AnyOutOfRange = LogicalShift;
- ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
- continue;
- }
-
- ShiftAmts.push_back((int)ShiftVal.getZExtValue());
- }
-
- // If all elements out of range or UNDEF, return vector of zeros/undefs.
- // ArithmeticShift should only hit this if they are all UNDEF.
- auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
- if (llvm::all_of(ShiftAmts, OutOfRange)) {
- SmallVector<Constant *, 8> ConstantVec;
- for (int Idx : ShiftAmts) {
- if (Idx < 0) {
- ConstantVec.push_back(UndefValue::get(SVT));
- } else {
- assert(LogicalShift && "Logical shift expected");
- ConstantVec.push_back(ConstantInt::getNullValue(SVT));
- }
- }
- return ConstantVector::get(ConstantVec);
- }
-
- // We can't handle only some out of range values with generic logical shifts.
- if (AnyOutOfRange)
- return nullptr;
-
- // Build the shift amount constant vector.
- SmallVector<Constant *, 8> ShiftVecAmts;
- for (int Idx : ShiftAmts) {
- if (Idx < 0)
- ShiftVecAmts.push_back(UndefValue::get(SVT));
- else
- ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
- }
- auto ShiftVec = ConstantVector::get(ShiftVecAmts);
-
- if (ShiftLeft)
- return Builder.CreateShl(Vec, ShiftVec);
-
- if (LogicalShift)
- return Builder.CreateLShr(Vec, ShiftVec);
-
- return Builder.CreateAShr(Vec, ShiftVec);
-}
-
-static Value *simplifyX86pack(IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder, bool IsSigned) {
- Value *Arg0 = II.getArgOperand(0);
- Value *Arg1 = II.getArgOperand(1);
- Type *ResTy = II.getType();
-
- // Fast all undef handling.
- if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
- return UndefValue::get(ResTy);
-
- auto *ArgTy = cast<VectorType>(Arg0->getType());
- unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
- unsigned NumSrcElts = ArgTy->getNumElements();
- assert(cast<VectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
- "Unexpected packing types");
-
- unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
- unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
- unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
- assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
- "Unexpected packing types");
-
- // Constant folding.
- if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
- return nullptr;
-
- // Clamp Values - signed/unsigned both use signed clamp values, but they
- // differ on the min/max values.
- APInt MinValue, MaxValue;
- if (IsSigned) {
- // PACKSS: Truncate signed value with signed saturation.
- // Source values less than dst minint are saturated to minint.
- // Source values greater than dst maxint are saturated to maxint.
- MinValue =
- APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
- MaxValue =
- APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
- } else {
- // PACKUS: Truncate signed value with unsigned saturation.
- // Source values less than zero are saturated to zero.
- // Source values greater than dst maxuint are saturated to maxuint.
- MinValue = APInt::getNullValue(SrcScalarSizeInBits);
- MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
- }
-
- auto *MinC = Constant::getIntegerValue(ArgTy, MinValue);
- auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue);
- Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
- Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
- Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
- Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
-
- // Shuffle clamped args together at the lane level.
- SmallVector<int, 32> PackMask;
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
- PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
- for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
- PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
- }
- auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
-
- // Truncate to dst size.
- return Builder.CreateTrunc(Shuffle, ResTy);
-}
-
-static Value *simplifyX86movmsk(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- Value *Arg = II.getArgOperand(0);
- Type *ResTy = II.getType();
-
- // movmsk(undef) -> zero as we must ensure the upper bits are zero.
- if (isa<UndefValue>(Arg))
- return Constant::getNullValue(ResTy);
-
- auto *ArgTy = dyn_cast<VectorType>(Arg->getType());
- // We can't easily peek through x86_mmx types.
- if (!ArgTy)
- return nullptr;
-
- // Expand MOVMSK to compare/bitcast/zext:
- // e.g. PMOVMSKB(v16i8 x):
- // %cmp = icmp slt <16 x i8> %x, zeroinitializer
- // %int = bitcast <16 x i1> %cmp to i16
- // %res = zext i16 %int to i32
- unsigned NumElts = ArgTy->getNumElements();
- Type *IntegerVecTy = VectorType::getInteger(ArgTy);
- Type *IntegerTy = Builder.getIntNTy(NumElts);
-
- Value *Res = Builder.CreateBitCast(Arg, IntegerVecTy);
- Res = Builder.CreateICmpSLT(Res, Constant::getNullValue(IntegerVecTy));
- Res = Builder.CreateBitCast(Res, IntegerTy);
- Res = Builder.CreateZExtOrTrunc(Res, ResTy);
- return Res;
-}
-
-static Value *simplifyX86addcarry(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- Value *CarryIn = II.getArgOperand(0);
- Value *Op1 = II.getArgOperand(1);
- Value *Op2 = II.getArgOperand(2);
- Type *RetTy = II.getType();
- Type *OpTy = Op1->getType();
- assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
- RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
- "Unexpected types for x86 addcarry");
-
- // If carry-in is zero, this is just an unsigned add with overflow.
- if (match(CarryIn, m_ZeroInt())) {
- Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
- { Op1, Op2 });
- // The types have to be adjusted to match the x86 call types.
- Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
- Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
- Builder.getInt8Ty());
- Value *Res = UndefValue::get(RetTy);
- Res = Builder.CreateInsertValue(Res, UAddOV, 0);
- return Builder.CreateInsertValue(Res, UAddResult, 1);
- }
-
- return nullptr;
-}
-
-static Value *simplifyX86insertps(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
- if (!CInt)
- return nullptr;
-
- VectorType *VecTy = cast<VectorType>(II.getType());
- assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
-
- // The immediate permute control byte looks like this:
- // [3:0] - zero mask for each 32-bit lane
- // [5:4] - select one 32-bit destination lane
- // [7:6] - select one 32-bit source lane
-
- uint8_t Imm = CInt->getZExtValue();
- uint8_t ZMask = Imm & 0xf;
- uint8_t DestLane = (Imm >> 4) & 0x3;
- uint8_t SourceLane = (Imm >> 6) & 0x3;
-
- ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
-
- // If all zero mask bits are set, this was just a weird way to
- // generate a zero vector.
- if (ZMask == 0xf)
- return ZeroVector;
-
- // Initialize by passing all of the first source bits through.
- int ShuffleMask[4] = {0, 1, 2, 3};
-
- // We may replace the second operand with the zero vector.
- Value *V1 = II.getArgOperand(1);
-
- if (ZMask) {
- // If the zero mask is being used with a single input or the zero mask
- // overrides the destination lane, this is a shuffle with the zero vector.
- if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
- (ZMask & (1 << DestLane))) {
- V1 = ZeroVector;
- // We may still move 32-bits of the first source vector from one lane
- // to another.
- ShuffleMask[DestLane] = SourceLane;
- // The zero mask may override the previous insert operation.
- for (unsigned i = 0; i < 4; ++i)
- if ((ZMask >> i) & 0x1)
- ShuffleMask[i] = i + 4;
- } else {
- // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
- return nullptr;
- }
- } else {
- // Replace the selected destination lane with the selected source lane.
- ShuffleMask[DestLane] = SourceLane + 4;
- }
-
- return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
-}
-
-/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
-/// or conversion to a shuffle vector.
-static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
- ConstantInt *CILength, ConstantInt *CIIndex,
- InstCombiner::BuilderTy &Builder) {
- auto LowConstantHighUndef = [&](uint64_t Val) {
- Type *IntTy64 = Type::getInt64Ty(II.getContext());
- Constant *Args[] = {ConstantInt::get(IntTy64, Val),
- UndefValue::get(IntTy64)};
- return ConstantVector::get(Args);
- };
-
- // See if we're dealing with constant values.
- Constant *C0 = dyn_cast<Constant>(Op0);
- ConstantInt *CI0 =
- C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
- : nullptr;
-
- // Attempt to constant fold.
- if (CILength && CIIndex) {
- // From AMD documentation: "The bit index and field length are each six
- // bits in length other bits of the field are ignored."
- APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
- APInt APLength = CILength->getValue().zextOrTrunc(6);
-
- unsigned Index = APIndex.getZExtValue();
-
- // From AMD documentation: "a value of zero in the field length is
- // defined as length of 64".
- unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
-
- // From AMD documentation: "If the sum of the bit index + length field
- // is greater than 64, the results are undefined".
- unsigned End = Index + Length;
-
- // Note that both field index and field length are 8-bit quantities.
- // Since variables 'Index' and 'Length' are unsigned values
- // obtained from zero-extending field index and field length
- // respectively, their sum should never wrap around.
- if (End > 64)
- return UndefValue::get(II.getType());
-
- // If we are inserting whole bytes, we can convert this to a shuffle.
- // Lowering can recognize EXTRQI shuffle masks.
- if ((Length % 8) == 0 && (Index % 8) == 0) {
- // Convert bit indices to byte indices.
- Length /= 8;
- Index /= 8;
-
- Type *IntTy8 = Type::getInt8Ty(II.getContext());
- auto *ShufTy = FixedVectorType::get(IntTy8, 16);
-
- SmallVector<int, 16> ShuffleMask;
- for (int i = 0; i != (int)Length; ++i)
- ShuffleMask.push_back(i + Index);
- for (int i = Length; i != 8; ++i)
- ShuffleMask.push_back(i + 16);
- for (int i = 8; i != 16; ++i)
- ShuffleMask.push_back(-1);
-
- Value *SV = Builder.CreateShuffleVector(
- Builder.CreateBitCast(Op0, ShufTy),
- ConstantAggregateZero::get(ShufTy), ShuffleMask);
- return Builder.CreateBitCast(SV, II.getType());
- }
-
- // Constant Fold - shift Index'th bit to lowest position and mask off
- // Length bits.
- if (CI0) {
- APInt Elt = CI0->getValue();
- Elt.lshrInPlace(Index);
- Elt = Elt.zextOrTrunc(Length);
- return LowConstantHighUndef(Elt.getZExtValue());
- }
-
- // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
- if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
- Value *Args[] = {Op0, CILength, CIIndex};
- Module *M = II.getModule();
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
- return Builder.CreateCall(F, Args);
- }
- }
-
- // Constant Fold - extraction from zero is always {zero, undef}.
- if (CI0 && CI0->isZero())
- return LowConstantHighUndef(0);
-
- return nullptr;
-}
-
-/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
-/// folding or conversion to a shuffle vector.
-static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
- APInt APLength, APInt APIndex,
- InstCombiner::BuilderTy &Builder) {
- // From AMD documentation: "The bit index and field length are each six bits
- // in length other bits of the field are ignored."
- APIndex = APIndex.zextOrTrunc(6);
- APLength = APLength.zextOrTrunc(6);
-
- // Attempt to constant fold.
- unsigned Index = APIndex.getZExtValue();
-
- // From AMD documentation: "a value of zero in the field length is
- // defined as length of 64".
- unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
-
- // From AMD documentation: "If the sum of the bit index + length field
- // is greater than 64, the results are undefined".
- unsigned End = Index + Length;
-
- // Note that both field index and field length are 8-bit quantities.
- // Since variables 'Index' and 'Length' are unsigned values
- // obtained from zero-extending field index and field length
- // respectively, their sum should never wrap around.
- if (End > 64)
- return UndefValue::get(II.getType());
-
- // If we are inserting whole bytes, we can convert this to a shuffle.
- // Lowering can recognize INSERTQI shuffle masks.
- if ((Length % 8) == 0 && (Index % 8) == 0) {
- // Convert bit indices to byte indices.
- Length /= 8;
- Index /= 8;
-
- Type *IntTy8 = Type::getInt8Ty(II.getContext());
- auto *ShufTy = FixedVectorType::get(IntTy8, 16);
-
- SmallVector<int, 16> ShuffleMask;
- for (int i = 0; i != (int)Index; ++i)
- ShuffleMask.push_back(i);
- for (int i = 0; i != (int)Length; ++i)
- ShuffleMask.push_back(i + 16);
- for (int i = Index + Length; i != 8; ++i)
- ShuffleMask.push_back(i);
- for (int i = 8; i != 16; ++i)
- ShuffleMask.push_back(-1);
-
- Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
- Builder.CreateBitCast(Op1, ShufTy),
- ShuffleMask);
- return Builder.CreateBitCast(SV, II.getType());
- }
-
- // See if we're dealing with constant values.
- Constant *C0 = dyn_cast<Constant>(Op0);
- Constant *C1 = dyn_cast<Constant>(Op1);
- ConstantInt *CI00 =
- C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
- : nullptr;
- ConstantInt *CI10 =
- C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
- : nullptr;
-
- // Constant Fold - insert bottom Length bits starting at the Index'th bit.
- if (CI00 && CI10) {
- APInt V00 = CI00->getValue();
- APInt V10 = CI10->getValue();
- APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
- V00 = V00 & ~Mask;
- V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
- APInt Val = V00 | V10;
- Type *IntTy64 = Type::getInt64Ty(II.getContext());
- Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
- UndefValue::get(IntTy64)};
- return ConstantVector::get(Args);
- }
-
- // If we were an INSERTQ call, we'll save demanded elements if we convert to
- // INSERTQI.
- if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
- Type *IntTy8 = Type::getInt8Ty(II.getContext());
- Constant *CILength = ConstantInt::get(IntTy8, Length, false);
- Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
-
- Value *Args[] = {Op0, Op1, CILength, CIIndex};
- Module *M = II.getModule();
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
- return Builder.CreateCall(F, Args);
- }
-
- return nullptr;
-}
-
-/// Attempt to convert pshufb* to shufflevector if the mask is constant.
-static Value *simplifyX86pshufb(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
- if (!V)
- return nullptr;
-
- auto *VecTy = cast<VectorType>(II.getType());
- unsigned NumElts = VecTy->getNumElements();
- assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
- "Unexpected number of elements in shuffle mask!");
-
- // Construct a shuffle mask from constant integers or UNDEFs.
- int Indexes[64];
-
- // Each byte in the shuffle control mask forms an index to permute the
- // corresponding byte in the destination operand.
- for (unsigned I = 0; I < NumElts; ++I) {
- Constant *COp = V->getAggregateElement(I);
- if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
- return nullptr;
-
- if (isa<UndefValue>(COp)) {
- Indexes[I] = -1;
- continue;
- }
-
- int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
-
- // If the most significant bit (bit[7]) of each byte of the shuffle
- // control mask is set, then zero is written in the result byte.
- // The zero vector is in the right-hand side of the resulting
- // shufflevector.
-
- // The value of each index for the high 128-bit lane is the least
- // significant 4 bits of the respective shuffle control byte.
- Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
- Indexes[I] = Index;
- }
-
- auto V1 = II.getArgOperand(0);
- auto V2 = Constant::getNullValue(VecTy);
- return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, NumElts));
-}
-
-/// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
-static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
- if (!V)
- return nullptr;
-
- auto *VecTy = cast<VectorType>(II.getType());
- unsigned NumElts = VecTy->getNumElements();
- bool IsPD = VecTy->getScalarType()->isDoubleTy();
- unsigned NumLaneElts = IsPD ? 2 : 4;
- assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
-
- // Construct a shuffle mask from constant integers or UNDEFs.
- int Indexes[16];
-
- // The intrinsics only read one or two bits, clear the rest.
- for (unsigned I = 0; I < NumElts; ++I) {
- Constant *COp = V->getAggregateElement(I);
- if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
- return nullptr;
-
- if (isa<UndefValue>(COp)) {
- Indexes[I] = -1;
- continue;
- }
-
- APInt Index = cast<ConstantInt>(COp)->getValue();
- Index = Index.zextOrTrunc(32).getLoBits(2);
-
- // The PD variants uses bit 1 to select per-lane element index, so
- // shift down to convert to generic shuffle mask index.
- if (IsPD)
- Index.lshrInPlace(1);
-
- // The _256 variants are a bit trickier since the mask bits always index
- // into the corresponding 128 half. In order to convert to a generic
- // shuffle, we have to make that explicit.
- Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
-
- Indexes[I] = Index.getZExtValue();
- }
-
- auto V1 = II.getArgOperand(0);
- auto V2 = UndefValue::get(V1->getType());
- return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, NumElts));
-}
-
-/// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
-static Value *simplifyX86vpermv(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- auto *V = dyn_cast<Constant>(II.getArgOperand(1));
- if (!V)
- return nullptr;
-
- auto *VecTy = cast<VectorType>(II.getType());
- unsigned Size = VecTy->getNumElements();
- assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
- "Unexpected shuffle mask size");
-
- // Construct a shuffle mask from constant integers or UNDEFs.
- int Indexes[64];
-
- for (unsigned I = 0; I < Size; ++I) {
- Constant *COp = V->getAggregateElement(I);
- if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
- return nullptr;
-
- if (isa<UndefValue>(COp)) {
- Indexes[I] = -1;
- continue;
- }
-
- uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
- Index &= Size - 1;
- Indexes[I] = Index;
- }
-
- auto V1 = II.getArgOperand(0);
- auto V2 = UndefValue::get(VecTy);
- return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, Size));
-}
-
// TODO, Obvious Missing Transforms:
// * Narrow width by halfs excluding zero/undef lanes
-Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) {
+Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
Value *LoadPtr = II.getArgOperand(0);
const Align Alignment =
cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
@@ -1118,9 +289,8 @@ Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) {
// If we can unconditionally load from this address, replace with a
// load/select idiom. TODO: use DT for context sensitive query
- if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment,
- II.getModule()->getDataLayout(), &II,
- nullptr)) {
+ if (isDereferenceablePointer(LoadPtr, II.getType(),
+ II.getModule()->getDataLayout(), &II, nullptr)) {
Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
@@ -1132,7 +302,7 @@ Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) {
// TODO, Obvious Missing Transforms:
// * Single constant active lane -> store
// * Narrow width by halfs excluding zero/undef lanes
-Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) {
+Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
@@ -1148,11 +318,14 @@ Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) {
return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
}
+ if (isa<ScalableVectorType>(ConstMask->getType()))
+ return nullptr;
+
// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
APInt UndefElts(DemandedElts.getBitWidth(), 0);
- if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0),
- DemandedElts, UndefElts))
+ if (Value *V =
+ SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
return replaceOperand(II, 0, V);
return nullptr;
@@ -1165,7 +338,7 @@ Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) {
// * Narrow width by halfs excluding zero/undef lanes
// * Vector splat address w/known mask -> scalar load
// * Vector incrementing address -> vector masked load
-Instruction *InstCombiner::simplifyMaskedGather(IntrinsicInst &II) {
+Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
return nullptr;
}
@@ -1175,7 +348,7 @@ Instruction *InstCombiner::simplifyMaskedGather(IntrinsicInst &II) {
// * Narrow store width by halfs excluding zero/undef lanes
// * Vector splat address w/known mask -> scalar store
// * Vector incrementing address -> vector masked store
-Instruction *InstCombiner::simplifyMaskedScatter(IntrinsicInst &II) {
+Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
@@ -1184,14 +357,17 @@ Instruction *InstCombiner::simplifyMaskedScatter(IntrinsicInst &II) {
if (ConstMask->isNullValue())
return eraseInstFromFunction(II);
+ if (isa<ScalableVectorType>(ConstMask->getType()))
+ return nullptr;
+
// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
APInt UndefElts(DemandedElts.getBitWidth(), 0);
- if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0),
- DemandedElts, UndefElts))
+ if (Value *V =
+ SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
return replaceOperand(II, 0, V);
- if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1),
- DemandedElts, UndefElts))
+ if (Value *V =
+ SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts))
return replaceOperand(II, 1, V);
return nullptr;
@@ -1206,7 +382,7 @@ Instruction *InstCombiner::simplifyMaskedScatter(IntrinsicInst &II) {
/// This is legal because it preserves the most recent information about
/// the presence or absence of invariant.group.
static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
auto *Arg = II.getArgOperand(0);
auto *StrippedArg = Arg->stripPointerCasts();
auto *StrippedInvariantGroupsArg = Arg->stripPointerCastsAndInvariantGroups();
@@ -1231,7 +407,7 @@ static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
return cast<Instruction>(Result);
}
-static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
+static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
assert((II.getIntrinsicID() == Intrinsic::cttz ||
II.getIntrinsicID() == Intrinsic::ctlz) &&
"Expected cttz or ctlz intrinsic");
@@ -1257,6 +433,9 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
if (SPF == SPF_ABS || SPF == SPF_NABS)
return IC.replaceOperand(II, 0, X);
+
+ if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
+ return IC.replaceOperand(II, 0, X);
}
KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
@@ -1301,7 +480,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
return nullptr;
}
-static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
+static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
assert(II.getIntrinsicID() == Intrinsic::ctpop &&
"Expected ctpop intrinsic");
Type *Ty = II.getType();
@@ -1356,107 +535,6 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
return nullptr;
}
-// TODO: If the x86 backend knew how to convert a bool vector mask back to an
-// XMM register mask efficiently, we could transform all x86 masked intrinsics
-// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
-static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
- Value *Ptr = II.getOperand(0);
- Value *Mask = II.getOperand(1);
- Constant *ZeroVec = Constant::getNullValue(II.getType());
-
- // Special case a zero mask since that's not a ConstantDataVector.
- // This masked load instruction creates a zero vector.
- if (isa<ConstantAggregateZero>(Mask))
- return IC.replaceInstUsesWith(II, ZeroVec);
-
- auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
- if (!ConstMask)
- return nullptr;
-
- // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
- // to allow target-independent optimizations.
-
- // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
- // the LLVM intrinsic definition for the pointer argument.
- unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
- PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
- Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
-
- // Second, convert the x86 XMM integer vector mask to a vector of bools based
- // on each element's most significant bit (the sign bit).
- Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
-
- // The pass-through vector for an x86 masked load is a zero vector.
- CallInst *NewMaskedLoad =
- IC.Builder.CreateMaskedLoad(PtrCast, Align(1), BoolMask, ZeroVec);
- return IC.replaceInstUsesWith(II, NewMaskedLoad);
-}
-
-// TODO: If the x86 backend knew how to convert a bool vector mask back to an
-// XMM register mask efficiently, we could transform all x86 masked intrinsics
-// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
-static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
- Value *Ptr = II.getOperand(0);
- Value *Mask = II.getOperand(1);
- Value *Vec = II.getOperand(2);
-
- // Special case a zero mask since that's not a ConstantDataVector:
- // this masked store instruction does nothing.
- if (isa<ConstantAggregateZero>(Mask)) {
- IC.eraseInstFromFunction(II);
- return true;
- }
-
- // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
- // anything else at this level.
- if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
- return false;
-
- auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
- if (!ConstMask)
- return false;
-
- // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
- // to allow target-independent optimizations.
-
- // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
- // the LLVM intrinsic definition for the pointer argument.
- unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
- PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
- Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
-
- // Second, convert the x86 XMM integer vector mask to a vector of bools based
- // on each element's most significant bit (the sign bit).
- Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
-
- IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask);
-
- // 'Replace uses' doesn't work for stores. Erase the original masked store.
- IC.eraseInstFromFunction(II);
- return true;
-}
-
-// Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
-//
-// A single NaN input is folded to minnum, so we rely on that folding for
-// handling NaNs.
-static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
- const APFloat &Src2) {
- APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
-
- APFloat::cmpResult Cmp0 = Max3.compare(Src0);
- assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
- if (Cmp0 == APFloat::cmpEqual)
- return maxnum(Src1, Src2);
-
- APFloat::cmpResult Cmp1 = Max3.compare(Src1);
- assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
- if (Cmp1 == APFloat::cmpEqual)
- return maxnum(Src0, Src2);
-
- return maxnum(Src0, Src1);
-}
-
/// Convert a table lookup to shufflevector if the mask is constant.
/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
/// which case we could lower the shufflevector with rev64 instructions
@@ -1468,7 +546,7 @@ static Value *simplifyNeonTbl1(const IntrinsicInst &II,
if (!C)
return nullptr;
- auto *VecTy = cast<VectorType>(II.getType());
+ auto *VecTy = cast<FixedVectorType>(II.getType());
unsigned NumElts = VecTy->getNumElements();
// Only perform this transformation for <8 x i8> vector types.
@@ -1495,28 +573,6 @@ static Value *simplifyNeonTbl1(const IntrinsicInst &II,
return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes));
}
-/// Convert a vector load intrinsic into a simple llvm load instruction.
-/// This is beneficial when the underlying object being addressed comes
-/// from a constant, since we get constant-folding for free.
-static Value *simplifyNeonVld1(const IntrinsicInst &II,
- unsigned MemAlign,
- InstCombiner::BuilderTy &Builder) {
- auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
-
- if (!IntrAlign)
- return nullptr;
-
- unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign ?
- MemAlign : IntrAlign->getLimitedValue();
-
- if (!isPowerOf2_32(Alignment))
- return nullptr;
-
- auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
- PointerType::get(II.getType(), 0));
- return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment));
-}
-
// Returns true iff the 2 intrinsics have the same operands, limiting the
// comparison to the first NumOperands.
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
@@ -1538,9 +594,9 @@ static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
// call @llvm.foo.end(i1 0)
// call @llvm.foo.end(i1 0) ; &I
-static bool removeTriviallyEmptyRange(
- IntrinsicInst &EndI, InstCombiner &IC,
- std::function<bool(const IntrinsicInst &)> IsStart) {
+static bool
+removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
+ std::function<bool(const IntrinsicInst &)> IsStart) {
// We start from the end intrinsic and scan backwards, so that InstCombine
// has already processed (and potentially removed) all the instructions
// before the end intrinsic.
@@ -1566,256 +622,7 @@ static bool removeTriviallyEmptyRange(
return false;
}
-// Convert NVVM intrinsics to target-generic LLVM code where possible.
-static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
- // Each NVVM intrinsic we can simplify can be replaced with one of:
- //
- // * an LLVM intrinsic,
- // * an LLVM cast operation,
- // * an LLVM binary operation, or
- // * ad-hoc LLVM IR for the particular operation.
-
- // Some transformations are only valid when the module's
- // flush-denormals-to-zero (ftz) setting is true/false, whereas other
- // transformations are valid regardless of the module's ftz setting.
- enum FtzRequirementTy {
- FTZ_Any, // Any ftz setting is ok.
- FTZ_MustBeOn, // Transformation is valid only if ftz is on.
- FTZ_MustBeOff, // Transformation is valid only if ftz is off.
- };
- // Classes of NVVM intrinsics that can't be replaced one-to-one with a
- // target-generic intrinsic, cast op, or binary op but that we can nonetheless
- // simplify.
- enum SpecialCase {
- SPC_Reciprocal,
- };
-
- // SimplifyAction is a poor-man's variant (plus an additional flag) that
- // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
- struct SimplifyAction {
- // Invariant: At most one of these Optionals has a value.
- Optional<Intrinsic::ID> IID;
- Optional<Instruction::CastOps> CastOp;
- Optional<Instruction::BinaryOps> BinaryOp;
- Optional<SpecialCase> Special;
-
- FtzRequirementTy FtzRequirement = FTZ_Any;
-
- SimplifyAction() = default;
-
- SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
- : IID(IID), FtzRequirement(FtzReq) {}
-
- // Cast operations don't have anything to do with FTZ, so we skip that
- // argument.
- SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
-
- SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
- : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
-
- SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
- : Special(Special), FtzRequirement(FtzReq) {}
- };
-
- // Try to generate a SimplifyAction describing how to replace our
- // IntrinsicInstr with target-generic LLVM IR.
- const SimplifyAction Action = [II]() -> SimplifyAction {
- switch (II->getIntrinsicID()) {
- // NVVM intrinsics that map directly to LLVM intrinsics.
- case Intrinsic::nvvm_ceil_d:
- return {Intrinsic::ceil, FTZ_Any};
- case Intrinsic::nvvm_ceil_f:
- return {Intrinsic::ceil, FTZ_MustBeOff};
- case Intrinsic::nvvm_ceil_ftz_f:
- return {Intrinsic::ceil, FTZ_MustBeOn};
- case Intrinsic::nvvm_fabs_d:
- return {Intrinsic::fabs, FTZ_Any};
- case Intrinsic::nvvm_fabs_f:
- return {Intrinsic::fabs, FTZ_MustBeOff};
- case Intrinsic::nvvm_fabs_ftz_f:
- return {Intrinsic::fabs, FTZ_MustBeOn};
- case Intrinsic::nvvm_floor_d:
- return {Intrinsic::floor, FTZ_Any};
- case Intrinsic::nvvm_floor_f:
- return {Intrinsic::floor, FTZ_MustBeOff};
- case Intrinsic::nvvm_floor_ftz_f:
- return {Intrinsic::floor, FTZ_MustBeOn};
- case Intrinsic::nvvm_fma_rn_d:
- return {Intrinsic::fma, FTZ_Any};
- case Intrinsic::nvvm_fma_rn_f:
- return {Intrinsic::fma, FTZ_MustBeOff};
- case Intrinsic::nvvm_fma_rn_ftz_f:
- return {Intrinsic::fma, FTZ_MustBeOn};
- case Intrinsic::nvvm_fmax_d:
- return {Intrinsic::maxnum, FTZ_Any};
- case Intrinsic::nvvm_fmax_f:
- return {Intrinsic::maxnum, FTZ_MustBeOff};
- case Intrinsic::nvvm_fmax_ftz_f:
- return {Intrinsic::maxnum, FTZ_MustBeOn};
- case Intrinsic::nvvm_fmin_d:
- return {Intrinsic::minnum, FTZ_Any};
- case Intrinsic::nvvm_fmin_f:
- return {Intrinsic::minnum, FTZ_MustBeOff};
- case Intrinsic::nvvm_fmin_ftz_f:
- return {Intrinsic::minnum, FTZ_MustBeOn};
- case Intrinsic::nvvm_round_d:
- return {Intrinsic::round, FTZ_Any};
- case Intrinsic::nvvm_round_f:
- return {Intrinsic::round, FTZ_MustBeOff};
- case Intrinsic::nvvm_round_ftz_f:
- return {Intrinsic::round, FTZ_MustBeOn};
- case Intrinsic::nvvm_sqrt_rn_d:
- return {Intrinsic::sqrt, FTZ_Any};
- case Intrinsic::nvvm_sqrt_f:
- // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
- // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
- // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
- // the versions with explicit ftz-ness.
- return {Intrinsic::sqrt, FTZ_Any};
- case Intrinsic::nvvm_sqrt_rn_f:
- return {Intrinsic::sqrt, FTZ_MustBeOff};
- case Intrinsic::nvvm_sqrt_rn_ftz_f:
- return {Intrinsic::sqrt, FTZ_MustBeOn};
- case Intrinsic::nvvm_trunc_d:
- return {Intrinsic::trunc, FTZ_Any};
- case Intrinsic::nvvm_trunc_f:
- return {Intrinsic::trunc, FTZ_MustBeOff};
- case Intrinsic::nvvm_trunc_ftz_f:
- return {Intrinsic::trunc, FTZ_MustBeOn};
-
- // NVVM intrinsics that map to LLVM cast operations.
- //
- // Note that llvm's target-generic conversion operators correspond to the rz
- // (round to zero) versions of the nvvm conversion intrinsics, even though
- // most everything else here uses the rn (round to nearest even) nvvm ops.
- case Intrinsic::nvvm_d2i_rz:
- case Intrinsic::nvvm_f2i_rz:
- case Intrinsic::nvvm_d2ll_rz:
- case Intrinsic::nvvm_f2ll_rz:
- return {Instruction::FPToSI};
- case Intrinsic::nvvm_d2ui_rz:
- case Intrinsic::nvvm_f2ui_rz:
- case Intrinsic::nvvm_d2ull_rz:
- case Intrinsic::nvvm_f2ull_rz:
- return {Instruction::FPToUI};
- case Intrinsic::nvvm_i2d_rz:
- case Intrinsic::nvvm_i2f_rz:
- case Intrinsic::nvvm_ll2d_rz:
- case Intrinsic::nvvm_ll2f_rz:
- return {Instruction::SIToFP};
- case Intrinsic::nvvm_ui2d_rz:
- case Intrinsic::nvvm_ui2f_rz:
- case Intrinsic::nvvm_ull2d_rz:
- case Intrinsic::nvvm_ull2f_rz:
- return {Instruction::UIToFP};
-
- // NVVM intrinsics that map to LLVM binary ops.
- case Intrinsic::nvvm_add_rn_d:
- return {Instruction::FAdd, FTZ_Any};
- case Intrinsic::nvvm_add_rn_f:
- return {Instruction::FAdd, FTZ_MustBeOff};
- case Intrinsic::nvvm_add_rn_ftz_f:
- return {Instruction::FAdd, FTZ_MustBeOn};
- case Intrinsic::nvvm_mul_rn_d:
- return {Instruction::FMul, FTZ_Any};
- case Intrinsic::nvvm_mul_rn_f:
- return {Instruction::FMul, FTZ_MustBeOff};
- case Intrinsic::nvvm_mul_rn_ftz_f:
- return {Instruction::FMul, FTZ_MustBeOn};
- case Intrinsic::nvvm_div_rn_d:
- return {Instruction::FDiv, FTZ_Any};
- case Intrinsic::nvvm_div_rn_f:
- return {Instruction::FDiv, FTZ_MustBeOff};
- case Intrinsic::nvvm_div_rn_ftz_f:
- return {Instruction::FDiv, FTZ_MustBeOn};
-
- // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
- // need special handling.
- //
- // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
- // as well.
- case Intrinsic::nvvm_rcp_rn_d:
- return {SPC_Reciprocal, FTZ_Any};
- case Intrinsic::nvvm_rcp_rn_f:
- return {SPC_Reciprocal, FTZ_MustBeOff};
- case Intrinsic::nvvm_rcp_rn_ftz_f:
- return {SPC_Reciprocal, FTZ_MustBeOn};
-
- // We do not currently simplify intrinsics that give an approximate answer.
- // These include:
- //
- // - nvvm_cos_approx_{f,ftz_f}
- // - nvvm_ex2_approx_{d,f,ftz_f}
- // - nvvm_lg2_approx_{d,f,ftz_f}
- // - nvvm_sin_approx_{f,ftz_f}
- // - nvvm_sqrt_approx_{f,ftz_f}
- // - nvvm_rsqrt_approx_{d,f,ftz_f}
- // - nvvm_div_approx_{ftz_d,ftz_f,f}
- // - nvvm_rcp_approx_ftz_d
- //
- // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
- // means that fastmath is enabled in the intrinsic. Unfortunately only
- // binary operators (currently) have a fastmath bit in SelectionDAG, so this
- // information gets lost and we can't select on it.
- //
- // TODO: div and rcp are lowered to a binary op, so these we could in theory
- // lower them to "fast fdiv".
-
- default:
- return {};
- }
- }();
-
- // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
- // can bail out now. (Notice that in the case that IID is not an NVVM
- // intrinsic, we don't have to look up any module metadata, as
- // FtzRequirementTy will be FTZ_Any.)
- if (Action.FtzRequirement != FTZ_Any) {
- StringRef Attr = II->getFunction()
- ->getFnAttribute("denormal-fp-math-f32")
- .getValueAsString();
- DenormalMode Mode = parseDenormalFPAttribute(Attr);
- bool FtzEnabled = Mode.Output != DenormalMode::IEEE;
-
- if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
- return nullptr;
- }
-
- // Simplify to target-generic intrinsic.
- if (Action.IID) {
- SmallVector<Value *, 4> Args(II->arg_operands());
- // All the target-generic intrinsics currently of interest to us have one
- // type argument, equal to that of the nvvm intrinsic's argument.
- Type *Tys[] = {II->getArgOperand(0)->getType()};
- return CallInst::Create(
- Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
- }
-
- // Simplify to target-generic binary op.
- if (Action.BinaryOp)
- return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
- II->getArgOperand(1), II->getName());
-
- // Simplify to target-generic cast op.
- if (Action.CastOp)
- return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
- II->getName());
-
- // All that's left are the special cases.
- if (!Action.Special)
- return nullptr;
-
- switch (*Action.Special) {
- case SPC_Reciprocal:
- // Simplify reciprocal.
- return BinaryOperator::Create(
- Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
- II->getArgOperand(0), II->getName());
- }
- llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
-}
-
-Instruction *InstCombiner::visitVAEndInst(VAEndInst &I) {
+Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) {
return I.getIntrinsicID() == Intrinsic::vastart ||
I.getIntrinsicID() == Intrinsic::vacopy;
@@ -1823,7 +630,7 @@ Instruction *InstCombiner::visitVAEndInst(VAEndInst &I) {
return nullptr;
}
-static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) {
+static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
assert(Call.getNumArgOperands() > 1 && "Need at least 2 args to swap");
Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
@@ -1834,20 +641,44 @@ static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) {
return nullptr;
}
-Instruction *InstCombiner::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
+/// Creates a result tuple for an overflow intrinsic \p II with a given
+/// \p Result and a constant \p Overflow value.
+static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result,
+ Constant *Overflow) {
+ Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
+ StructType *ST = cast<StructType>(II->getType());
+ Constant *Struct = ConstantStruct::get(ST, V);
+ return InsertValueInst::Create(Struct, Result, 0);
+}
+
+Instruction *
+InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
WithOverflowInst *WO = cast<WithOverflowInst>(II);
Value *OperationResult = nullptr;
Constant *OverflowResult = nullptr;
if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
WO->getRHS(), *WO, OperationResult, OverflowResult))
- return CreateOverflowTuple(WO, OperationResult, OverflowResult);
+ return createOverflowTuple(WO, OperationResult, OverflowResult);
return nullptr;
}
+static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
+ const DataLayout &DL, AssumptionCache *AC,
+ DominatorTree *DT) {
+ KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT);
+ if (Known.isNonNegative())
+ return false;
+ if (Known.isNegative())
+ return true;
+
+ return isImpliedByDomCondition(
+ ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
-Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Don't try to simplify calls without uses. It will not do anything useful,
// but will result in the following folds being skipped.
if (!CI.use_empty())
@@ -1953,31 +784,84 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
- if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
- return I;
-
- auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
- unsigned DemandedWidth) {
- APInt UndefElts(Width, 0);
- APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
- return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
- };
+ if (II->isCommutative()) {
+ if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
+ return NewCall;
+ }
Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
- default: break;
case Intrinsic::objectsize:
if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
return replaceInstUsesWith(CI, V);
return nullptr;
+ case Intrinsic::abs: {
+ Value *IIOperand = II->getArgOperand(0);
+ bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
+
+ // abs(-x) -> abs(x)
+ // TODO: Copy nsw if it was present on the neg?
+ Value *X;
+ if (match(IIOperand, m_Neg(m_Value(X))))
+ return replaceOperand(*II, 0, X);
+ if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X)))))
+ return replaceOperand(*II, 0, X);
+ if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))
+ return replaceOperand(*II, 0, X);
+
+ if (Optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) {
+ // abs(x) -> x if x >= 0
+ if (!*Sign)
+ return replaceInstUsesWith(*II, IIOperand);
+
+ // abs(x) -> -x if x < 0
+ if (IntMinIsPoison)
+ return BinaryOperator::CreateNSWNeg(IIOperand);
+ return BinaryOperator::CreateNeg(IIOperand);
+ }
+
+ // abs (sext X) --> zext (abs X*)
+ // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
+ if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
+ Value *NarrowAbs =
+ Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
+ return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
+ }
+
+ break;
+ }
+ case Intrinsic::umax:
+ case Intrinsic::umin: {
+ Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
+ Value *X, *Y;
+ if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
+ (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
+ Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
+ return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
+ }
+ // If both operands of unsigned min/max are sign-extended, it is still ok
+ // to narrow the operation.
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::smax:
+ case Intrinsic::smin: {
+ Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
+ Value *X, *Y;
+ if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
+ (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
+ Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
+ return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
+ }
+ break;
+ }
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
Value *X = nullptr;
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
- unsigned C = X->getType()->getPrimitiveSizeInBits() -
- IIOperand->getType()->getPrimitiveSizeInBits();
+ unsigned C = X->getType()->getScalarSizeInBits() -
+ IIOperand->getType()->getScalarSizeInBits();
Value *CV = ConstantInt::get(X->getType(), C);
Value *V = Builder.CreateLShr(X, CV);
return new TruncInst(V, IIOperand->getType());
@@ -2002,15 +886,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// 0 and 1 are handled in instsimplify
-
// powi(x, -1) -> 1/x
if (Power->isMinusOne())
- return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
- II->getArgOperand(0));
+ return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
+ II->getArgOperand(0), II);
// powi(x, 2) -> x*x
if (Power->equalsInt(2))
- return BinaryOperator::CreateFMul(II->getArgOperand(0),
- II->getArgOperand(0));
+ return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
+ II->getArgOperand(0), II);
}
break;
@@ -2031,8 +914,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Type *Ty = II->getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
Constant *ShAmtC;
- if (match(II->getArgOperand(2), m_Constant(ShAmtC)) &&
- !isa<ConstantExpr>(ShAmtC) && !ShAmtC->containsConstantExpression()) {
+ if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC)) &&
+ !ShAmtC->containsConstantExpression()) {
// Canonicalize a shift amount constant operand to modulo the bit-width.
Constant *WidthC = ConstantInt::get(Ty, BitWidth);
Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
@@ -2092,8 +975,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow: {
- if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
- return I;
if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
return I;
@@ -2121,10 +1002,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
- if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
- return I;
- LLVM_FALLTHROUGH;
-
case Intrinsic::usub_with_overflow:
if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
return I;
@@ -2155,9 +1032,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::uadd_sat:
case Intrinsic::sadd_sat:
- if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
- return I;
- LLVM_FALLTHROUGH;
case Intrinsic::usub_sat:
case Intrinsic::ssub_sat: {
SaturatingInst *SI = cast<SaturatingInst>(II);
@@ -2238,8 +1112,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::maxnum:
case Intrinsic::minimum:
case Intrinsic::maximum: {
- if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
- return I;
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
Value *X, *Y;
@@ -2348,9 +1220,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
LLVM_FALLTHROUGH;
}
case Intrinsic::fma: {
- if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
- return I;
-
// fma fneg(x), fneg(y), z -> fma x, y, z
Value *Src0 = II->getArgOperand(0);
Value *Src1 = II->getArgOperand(1);
@@ -2390,40 +1259,52 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::copysign: {
- if (SignBitMustBeZero(II->getArgOperand(1), &TLI)) {
+ Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
+ if (SignBitMustBeZero(Sign, &TLI)) {
// If we know that the sign argument is positive, reduce to FABS:
- // copysign X, Pos --> fabs X
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
- II->getArgOperand(0), II);
+ // copysign Mag, +Sign --> fabs Mag
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
return replaceInstUsesWith(*II, Fabs);
}
// TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
const APFloat *C;
- if (match(II->getArgOperand(1), m_APFloat(C)) && C->isNegative()) {
+ if (match(Sign, m_APFloat(C)) && C->isNegative()) {
// If we know that the sign argument is negative, reduce to FNABS:
- // copysign X, Neg --> fneg (fabs X)
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
- II->getArgOperand(0), II);
+ // copysign Mag, -Sign --> fneg (fabs Mag)
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
}
// Propagate sign argument through nested calls:
- // copysign X, (copysign ?, SignArg) --> copysign X, SignArg
- Value *SignArg;
- if (match(II->getArgOperand(1),
- m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(SignArg))))
- return replaceOperand(*II, 1, SignArg);
+ // copysign Mag, (copysign ?, X) --> copysign Mag, X
+ Value *X;
+ if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X))))
+ return replaceOperand(*II, 1, X);
+
+ // Peek through changes of magnitude's sign-bit. This call rewrites those:
+ // copysign (fabs X), Sign --> copysign X, Sign
+ // copysign (fneg X), Sign --> copysign X, Sign
+ if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
+ return replaceOperand(*II, 0, X);
break;
}
case Intrinsic::fabs: {
- Value *Cond;
- Constant *LHS, *RHS;
+ Value *Cond, *TVal, *FVal;
if (match(II->getArgOperand(0),
- m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
- CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
- CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
- return SelectInst::Create(Cond, Call0, Call1);
+ m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
+ // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
+ if (isa<Constant>(TVal) && isa<Constant>(FVal)) {
+ CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
+ CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
+ return SelectInst::Create(Cond, AbsT, AbsF);
+ }
+ // fabs (select Cond, -FVal, FVal) --> fabs FVal
+ if (match(TVal, m_FNeg(m_Specific(FVal))))
+ return replaceOperand(*II, 0, FVal);
+ // fabs (select Cond, TVal, -TVal) --> fabs TVal
+ if (match(FVal, m_FNeg(m_Specific(TVal))))
+ return replaceOperand(*II, 0, TVal);
}
LLVM_FALLTHROUGH;
@@ -2465,932 +1346,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
}
- case Intrinsic::ppc_altivec_lvx:
- case Intrinsic::ppc_altivec_lvxl:
- // Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), Align(16), DL, II, &AC,
- &DT) >= 16) {
- Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
- PointerType::getUnqual(II->getType()));
- return new LoadInst(II->getType(), Ptr, "", false, Align(16));
- }
- break;
- case Intrinsic::ppc_vsx_lxvw4x:
- case Intrinsic::ppc_vsx_lxvd2x: {
- // Turn PPC VSX loads into normal loads.
- Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
- PointerType::getUnqual(II->getType()));
- return new LoadInst(II->getType(), Ptr, Twine(""), false, Align(1));
- }
- case Intrinsic::ppc_altivec_stvx:
- case Intrinsic::ppc_altivec_stvxl:
- // Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), Align(16), DL, II, &AC,
- &DT) >= 16) {
- Type *OpPtrTy =
- PointerType::getUnqual(II->getArgOperand(0)->getType());
- Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
- return new StoreInst(II->getArgOperand(0), Ptr, false, Align(16));
- }
- break;
- case Intrinsic::ppc_vsx_stxvw4x:
- case Intrinsic::ppc_vsx_stxvd2x: {
- // Turn PPC VSX stores into normal stores.
- Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
- Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
- return new StoreInst(II->getArgOperand(0), Ptr, false, Align(1));
- }
- case Intrinsic::ppc_qpx_qvlfs:
- // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), Align(16), DL, II, &AC,
- &DT) >= 16) {
- Type *VTy =
- VectorType::get(Builder.getFloatTy(),
- cast<VectorType>(II->getType())->getElementCount());
- Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
- PointerType::getUnqual(VTy));
- Value *Load = Builder.CreateLoad(VTy, Ptr);
- return new FPExtInst(Load, II->getType());
- }
- break;
- case Intrinsic::ppc_qpx_qvlfd:
- // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), Align(32), DL, II, &AC,
- &DT) >= 32) {
- Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
- PointerType::getUnqual(II->getType()));
- return new LoadInst(II->getType(), Ptr, "", false, Align(32));
- }
- break;
- case Intrinsic::ppc_qpx_qvstfs:
- // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), Align(16), DL, II, &AC,
- &DT) >= 16) {
- Type *VTy = VectorType::get(
- Builder.getFloatTy(),
- cast<VectorType>(II->getArgOperand(0)->getType())->getElementCount());
- Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
- Type *OpPtrTy = PointerType::getUnqual(VTy);
- Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
- return new StoreInst(TOp, Ptr, false, Align(16));
- }
- break;
- case Intrinsic::ppc_qpx_qvstfd:
- // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), Align(32), DL, II, &AC,
- &DT) >= 32) {
- Type *OpPtrTy =
- PointerType::getUnqual(II->getArgOperand(0)->getType());
- Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
- return new StoreInst(II->getArgOperand(0), Ptr, false, Align(32));
- }
- break;
-
- case Intrinsic::x86_bmi_bextr_32:
- case Intrinsic::x86_bmi_bextr_64:
- case Intrinsic::x86_tbm_bextri_u32:
- case Intrinsic::x86_tbm_bextri_u64:
- // If the RHS is a constant we can try some simplifications.
- if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- uint64_t Shift = C->getZExtValue();
- uint64_t Length = (Shift >> 8) & 0xff;
- Shift &= 0xff;
- unsigned BitWidth = II->getType()->getIntegerBitWidth();
- // If the length is 0 or the shift is out of range, replace with zero.
- if (Length == 0 || Shift >= BitWidth)
- return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
- // If the LHS is also a constant, we can completely constant fold this.
- if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
- uint64_t Result = InC->getZExtValue() >> Shift;
- if (Length > BitWidth)
- Length = BitWidth;
- Result &= maskTrailingOnes<uint64_t>(Length);
- return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
- }
- // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
- // are only masking bits that a shift already cleared?
- }
- break;
-
- case Intrinsic::x86_bmi_bzhi_32:
- case Intrinsic::x86_bmi_bzhi_64:
- // If the RHS is a constant we can try some simplifications.
- if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- uint64_t Index = C->getZExtValue() & 0xff;
- unsigned BitWidth = II->getType()->getIntegerBitWidth();
- if (Index >= BitWidth)
- return replaceInstUsesWith(CI, II->getArgOperand(0));
- if (Index == 0)
- return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
- // If the LHS is also a constant, we can completely constant fold this.
- if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
- uint64_t Result = InC->getZExtValue();
- Result &= maskTrailingOnes<uint64_t>(Index);
- return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
- }
- // TODO should we convert this to an AND if the RHS is constant?
- }
- break;
- case Intrinsic::x86_bmi_pext_32:
- case Intrinsic::x86_bmi_pext_64:
- if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- if (MaskC->isNullValue())
- return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
- if (MaskC->isAllOnesValue())
- return replaceInstUsesWith(CI, II->getArgOperand(0));
-
- if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
- uint64_t Src = SrcC->getZExtValue();
- uint64_t Mask = MaskC->getZExtValue();
- uint64_t Result = 0;
- uint64_t BitToSet = 1;
-
- while (Mask) {
- // Isolate lowest set bit.
- uint64_t BitToTest = Mask & -Mask;
- if (BitToTest & Src)
- Result |= BitToSet;
-
- BitToSet <<= 1;
- // Clear lowest set bit.
- Mask &= Mask - 1;
- }
-
- return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
- }
- }
- break;
- case Intrinsic::x86_bmi_pdep_32:
- case Intrinsic::x86_bmi_pdep_64:
- if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- if (MaskC->isNullValue())
- return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
- if (MaskC->isAllOnesValue())
- return replaceInstUsesWith(CI, II->getArgOperand(0));
-
- if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
- uint64_t Src = SrcC->getZExtValue();
- uint64_t Mask = MaskC->getZExtValue();
- uint64_t Result = 0;
- uint64_t BitToTest = 1;
-
- while (Mask) {
- // Isolate lowest set bit.
- uint64_t BitToSet = Mask & -Mask;
- if (BitToTest & Src)
- Result |= BitToSet;
-
- BitToTest <<= 1;
- // Clear lowest set bit;
- Mask &= Mask - 1;
- }
-
- return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
- }
- }
- break;
-
- case Intrinsic::x86_sse_cvtss2si:
- case Intrinsic::x86_sse_cvtss2si64:
- case Intrinsic::x86_sse_cvttss2si:
- case Intrinsic::x86_sse_cvttss2si64:
- case Intrinsic::x86_sse2_cvtsd2si:
- case Intrinsic::x86_sse2_cvtsd2si64:
- case Intrinsic::x86_sse2_cvttsd2si:
- case Intrinsic::x86_sse2_cvttsd2si64:
- case Intrinsic::x86_avx512_vcvtss2si32:
- case Intrinsic::x86_avx512_vcvtss2si64:
- case Intrinsic::x86_avx512_vcvtss2usi32:
- case Intrinsic::x86_avx512_vcvtss2usi64:
- case Intrinsic::x86_avx512_vcvtsd2si32:
- case Intrinsic::x86_avx512_vcvtsd2si64:
- case Intrinsic::x86_avx512_vcvtsd2usi32:
- case Intrinsic::x86_avx512_vcvtsd2usi64:
- case Intrinsic::x86_avx512_cvttss2si:
- case Intrinsic::x86_avx512_cvttss2si64:
- case Intrinsic::x86_avx512_cvttss2usi:
- case Intrinsic::x86_avx512_cvttss2usi64:
- case Intrinsic::x86_avx512_cvttsd2si:
- case Intrinsic::x86_avx512_cvttsd2si64:
- case Intrinsic::x86_avx512_cvttsd2usi:
- case Intrinsic::x86_avx512_cvttsd2usi64: {
- // These intrinsics only demand the 0th element of their input vectors. If
- // we can simplify the input based on that, do so now.
- Value *Arg = II->getArgOperand(0);
- unsigned VWidth = cast<VectorType>(Arg->getType())->getNumElements();
- if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1))
- return replaceOperand(*II, 0, V);
- break;
- }
-
- case Intrinsic::x86_mmx_pmovmskb:
- case Intrinsic::x86_sse_movmsk_ps:
- case Intrinsic::x86_sse2_movmsk_pd:
- case Intrinsic::x86_sse2_pmovmskb_128:
- case Intrinsic::x86_avx_movmsk_pd_256:
- case Intrinsic::x86_avx_movmsk_ps_256:
- case Intrinsic::x86_avx2_pmovmskb:
- if (Value *V = simplifyX86movmsk(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_sse_comieq_ss:
- case Intrinsic::x86_sse_comige_ss:
- case Intrinsic::x86_sse_comigt_ss:
- case Intrinsic::x86_sse_comile_ss:
- case Intrinsic::x86_sse_comilt_ss:
- case Intrinsic::x86_sse_comineq_ss:
- case Intrinsic::x86_sse_ucomieq_ss:
- case Intrinsic::x86_sse_ucomige_ss:
- case Intrinsic::x86_sse_ucomigt_ss:
- case Intrinsic::x86_sse_ucomile_ss:
- case Intrinsic::x86_sse_ucomilt_ss:
- case Intrinsic::x86_sse_ucomineq_ss:
- case Intrinsic::x86_sse2_comieq_sd:
- case Intrinsic::x86_sse2_comige_sd:
- case Intrinsic::x86_sse2_comigt_sd:
- case Intrinsic::x86_sse2_comile_sd:
- case Intrinsic::x86_sse2_comilt_sd:
- case Intrinsic::x86_sse2_comineq_sd:
- case Intrinsic::x86_sse2_ucomieq_sd:
- case Intrinsic::x86_sse2_ucomige_sd:
- case Intrinsic::x86_sse2_ucomigt_sd:
- case Intrinsic::x86_sse2_ucomile_sd:
- case Intrinsic::x86_sse2_ucomilt_sd:
- case Intrinsic::x86_sse2_ucomineq_sd:
- case Intrinsic::x86_avx512_vcomi_ss:
- case Intrinsic::x86_avx512_vcomi_sd:
- case Intrinsic::x86_avx512_mask_cmp_ss:
- case Intrinsic::x86_avx512_mask_cmp_sd: {
- // These intrinsics only demand the 0th element of their input vectors. If
- // we can simplify the input based on that, do so now.
- bool MadeChange = false;
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
- unsigned VWidth = cast<VectorType>(Arg0->getType())->getNumElements();
- if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
- replaceOperand(*II, 0, V);
- MadeChange = true;
- }
- if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
- replaceOperand(*II, 1, V);
- MadeChange = true;
- }
- if (MadeChange)
- return II;
- break;
- }
- case Intrinsic::x86_avx512_cmp_pd_128:
- case Intrinsic::x86_avx512_cmp_pd_256:
- case Intrinsic::x86_avx512_cmp_pd_512:
- case Intrinsic::x86_avx512_cmp_ps_128:
- case Intrinsic::x86_avx512_cmp_ps_256:
- case Intrinsic::x86_avx512_cmp_ps_512: {
- // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
- bool Arg0IsZero = match(Arg0, m_PosZeroFP());
- if (Arg0IsZero)
- std::swap(Arg0, Arg1);
- Value *A, *B;
- // This fold requires only the NINF(not +/- inf) since inf minus
- // inf is nan.
- // NSZ(No Signed Zeros) is not needed because zeros of any sign are
- // equal for both compares.
- // NNAN is not needed because nans compare the same for both compares.
- // The compare intrinsic uses the above assumptions and therefore
- // doesn't require additional flags.
- if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
- match(Arg1, m_PosZeroFP()) && isa<Instruction>(Arg0) &&
- cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
- if (Arg0IsZero)
- std::swap(A, B);
- replaceOperand(*II, 0, A);
- replaceOperand(*II, 1, B);
- return II;
- }
- break;
- }
-
- case Intrinsic::x86_avx512_add_ps_512:
- case Intrinsic::x86_avx512_div_ps_512:
- case Intrinsic::x86_avx512_mul_ps_512:
- case Intrinsic::x86_avx512_sub_ps_512:
- case Intrinsic::x86_avx512_add_pd_512:
- case Intrinsic::x86_avx512_div_pd_512:
- case Intrinsic::x86_avx512_mul_pd_512:
- case Intrinsic::x86_avx512_sub_pd_512:
- // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
- // IR operations.
- if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
- if (R->getValue() == 4) {
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
-
- Value *V;
- switch (IID) {
- default: llvm_unreachable("Case stmts out of sync!");
- case Intrinsic::x86_avx512_add_ps_512:
- case Intrinsic::x86_avx512_add_pd_512:
- V = Builder.CreateFAdd(Arg0, Arg1);
- break;
- case Intrinsic::x86_avx512_sub_ps_512:
- case Intrinsic::x86_avx512_sub_pd_512:
- V = Builder.CreateFSub(Arg0, Arg1);
- break;
- case Intrinsic::x86_avx512_mul_ps_512:
- case Intrinsic::x86_avx512_mul_pd_512:
- V = Builder.CreateFMul(Arg0, Arg1);
- break;
- case Intrinsic::x86_avx512_div_ps_512:
- case Intrinsic::x86_avx512_div_pd_512:
- V = Builder.CreateFDiv(Arg0, Arg1);
- break;
- }
-
- return replaceInstUsesWith(*II, V);
- }
- }
- break;
-
- case Intrinsic::x86_avx512_mask_add_ss_round:
- case Intrinsic::x86_avx512_mask_div_ss_round:
- case Intrinsic::x86_avx512_mask_mul_ss_round:
- case Intrinsic::x86_avx512_mask_sub_ss_round:
- case Intrinsic::x86_avx512_mask_add_sd_round:
- case Intrinsic::x86_avx512_mask_div_sd_round:
- case Intrinsic::x86_avx512_mask_mul_sd_round:
- case Intrinsic::x86_avx512_mask_sub_sd_round:
- // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
- // IR operations.
- if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
- if (R->getValue() == 4) {
- // Extract the element as scalars.
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
- Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
- Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
-
- Value *V;
- switch (IID) {
- default: llvm_unreachable("Case stmts out of sync!");
- case Intrinsic::x86_avx512_mask_add_ss_round:
- case Intrinsic::x86_avx512_mask_add_sd_round:
- V = Builder.CreateFAdd(LHS, RHS);
- break;
- case Intrinsic::x86_avx512_mask_sub_ss_round:
- case Intrinsic::x86_avx512_mask_sub_sd_round:
- V = Builder.CreateFSub(LHS, RHS);
- break;
- case Intrinsic::x86_avx512_mask_mul_ss_round:
- case Intrinsic::x86_avx512_mask_mul_sd_round:
- V = Builder.CreateFMul(LHS, RHS);
- break;
- case Intrinsic::x86_avx512_mask_div_ss_round:
- case Intrinsic::x86_avx512_mask_div_sd_round:
- V = Builder.CreateFDiv(LHS, RHS);
- break;
- }
-
- // Handle the masking aspect of the intrinsic.
- Value *Mask = II->getArgOperand(3);
- auto *C = dyn_cast<ConstantInt>(Mask);
- // We don't need a select if we know the mask bit is a 1.
- if (!C || !C->getValue()[0]) {
- // Cast the mask to an i1 vector and then extract the lowest element.
- auto *MaskTy = FixedVectorType::get(
- Builder.getInt1Ty(),
- cast<IntegerType>(Mask->getType())->getBitWidth());
- Mask = Builder.CreateBitCast(Mask, MaskTy);
- Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
- // Extract the lowest element from the passthru operand.
- Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
- (uint64_t)0);
- V = Builder.CreateSelect(Mask, V, Passthru);
- }
-
- // Insert the result back into the original argument 0.
- V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
-
- return replaceInstUsesWith(*II, V);
- }
- }
- break;
-
- // Constant fold ashr( <A x Bi>, Ci ).
- // Constant fold lshr( <A x Bi>, Ci ).
- // Constant fold shl( <A x Bi>, Ci ).
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx512_psrai_q_128:
- case Intrinsic::x86_avx512_psrai_q_256:
- case Intrinsic::x86_avx512_psrai_d_512:
- case Intrinsic::x86_avx512_psrai_q_512:
- case Intrinsic::x86_avx512_psrai_w_512:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w:
- case Intrinsic::x86_avx512_psrli_d_512:
- case Intrinsic::x86_avx512_psrli_q_512:
- case Intrinsic::x86_avx512_psrli_w_512:
- case Intrinsic::x86_sse2_pslli_d:
- case Intrinsic::x86_sse2_pslli_q:
- case Intrinsic::x86_sse2_pslli_w:
- case Intrinsic::x86_avx2_pslli_d:
- case Intrinsic::x86_avx2_pslli_q:
- case Intrinsic::x86_avx2_pslli_w:
- case Intrinsic::x86_avx512_pslli_d_512:
- case Intrinsic::x86_avx512_pslli_q_512:
- case Intrinsic::x86_avx512_pslli_w_512:
- if (Value *V = simplifyX86immShift(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_sse2_psra_d:
- case Intrinsic::x86_sse2_psra_w:
- case Intrinsic::x86_avx2_psra_d:
- case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx512_psra_q_128:
- case Intrinsic::x86_avx512_psra_q_256:
- case Intrinsic::x86_avx512_psra_d_512:
- case Intrinsic::x86_avx512_psra_q_512:
- case Intrinsic::x86_avx512_psra_w_512:
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx512_psrl_d_512:
- case Intrinsic::x86_avx512_psrl_q_512:
- case Intrinsic::x86_avx512_psrl_w_512:
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_sse2_psll_w:
- case Intrinsic::x86_avx2_psll_d:
- case Intrinsic::x86_avx2_psll_q:
- case Intrinsic::x86_avx2_psll_w:
- case Intrinsic::x86_avx512_psll_d_512:
- case Intrinsic::x86_avx512_psll_q_512:
- case Intrinsic::x86_avx512_psll_w_512: {
- if (Value *V = simplifyX86immShift(*II, Builder))
- return replaceInstUsesWith(*II, V);
-
- // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
- // operand to compute the shift amount.
- Value *Arg1 = II->getArgOperand(1);
- assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
- "Unexpected packed shift size");
- unsigned VWidth = cast<VectorType>(Arg1->getType())->getNumElements();
-
- if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2))
- return replaceOperand(*II, 1, V);
- break;
- }
-
- case Intrinsic::x86_avx2_psllv_d:
- case Intrinsic::x86_avx2_psllv_d_256:
- case Intrinsic::x86_avx2_psllv_q:
- case Intrinsic::x86_avx2_psllv_q_256:
- case Intrinsic::x86_avx512_psllv_d_512:
- case Intrinsic::x86_avx512_psllv_q_512:
- case Intrinsic::x86_avx512_psllv_w_128:
- case Intrinsic::x86_avx512_psllv_w_256:
- case Intrinsic::x86_avx512_psllv_w_512:
- case Intrinsic::x86_avx2_psrav_d:
- case Intrinsic::x86_avx2_psrav_d_256:
- case Intrinsic::x86_avx512_psrav_q_128:
- case Intrinsic::x86_avx512_psrav_q_256:
- case Intrinsic::x86_avx512_psrav_d_512:
- case Intrinsic::x86_avx512_psrav_q_512:
- case Intrinsic::x86_avx512_psrav_w_128:
- case Intrinsic::x86_avx512_psrav_w_256:
- case Intrinsic::x86_avx512_psrav_w_512:
- case Intrinsic::x86_avx2_psrlv_d:
- case Intrinsic::x86_avx2_psrlv_d_256:
- case Intrinsic::x86_avx2_psrlv_q:
- case Intrinsic::x86_avx2_psrlv_q_256:
- case Intrinsic::x86_avx512_psrlv_d_512:
- case Intrinsic::x86_avx512_psrlv_q_512:
- case Intrinsic::x86_avx512_psrlv_w_128:
- case Intrinsic::x86_avx512_psrlv_w_256:
- case Intrinsic::x86_avx512_psrlv_w_512:
- if (Value *V = simplifyX86varShift(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_sse2_packssdw_128:
- case Intrinsic::x86_sse2_packsswb_128:
- case Intrinsic::x86_avx2_packssdw:
- case Intrinsic::x86_avx2_packsswb:
- case Intrinsic::x86_avx512_packssdw_512:
- case Intrinsic::x86_avx512_packsswb_512:
- if (Value *V = simplifyX86pack(*II, Builder, true))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_sse2_packuswb_128:
- case Intrinsic::x86_sse41_packusdw:
- case Intrinsic::x86_avx2_packusdw:
- case Intrinsic::x86_avx2_packuswb:
- case Intrinsic::x86_avx512_packusdw_512:
- case Intrinsic::x86_avx512_packuswb_512:
- if (Value *V = simplifyX86pack(*II, Builder, false))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_pclmulqdq:
- case Intrinsic::x86_pclmulqdq_256:
- case Intrinsic::x86_pclmulqdq_512: {
- if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
- unsigned Imm = C->getZExtValue();
-
- bool MadeChange = false;
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
- unsigned VWidth = cast<VectorType>(Arg0->getType())->getNumElements();
-
- APInt UndefElts1(VWidth, 0);
- APInt DemandedElts1 = APInt::getSplat(VWidth,
- APInt(2, (Imm & 0x01) ? 2 : 1));
- if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts1,
- UndefElts1)) {
- replaceOperand(*II, 0, V);
- MadeChange = true;
- }
-
- APInt UndefElts2(VWidth, 0);
- APInt DemandedElts2 = APInt::getSplat(VWidth,
- APInt(2, (Imm & 0x10) ? 2 : 1));
- if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts2,
- UndefElts2)) {
- replaceOperand(*II, 1, V);
- MadeChange = true;
- }
-
- // If either input elements are undef, the result is zero.
- if (DemandedElts1.isSubsetOf(UndefElts1) ||
- DemandedElts2.isSubsetOf(UndefElts2))
- return replaceInstUsesWith(*II,
- ConstantAggregateZero::get(II->getType()));
-
- if (MadeChange)
- return II;
- }
- break;
- }
-
- case Intrinsic::x86_sse41_insertps:
- if (Value *V = simplifyX86insertps(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_sse4a_extrq: {
- Value *Op0 = II->getArgOperand(0);
- Value *Op1 = II->getArgOperand(1);
- unsigned VWidth0 = cast<VectorType>(Op0->getType())->getNumElements();
- unsigned VWidth1 = cast<VectorType>(Op1->getType())->getNumElements();
- assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
- Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
- VWidth1 == 16 && "Unexpected operand sizes");
-
- // See if we're dealing with constant values.
- Constant *C1 = dyn_cast<Constant>(Op1);
- ConstantInt *CILength =
- C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
- : nullptr;
- ConstantInt *CIIndex =
- C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
- : nullptr;
-
- // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
- if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
- return replaceInstUsesWith(*II, V);
-
- // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
- // operands and the lowest 16-bits of the second.
- bool MadeChange = false;
- if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
- replaceOperand(*II, 0, V);
- MadeChange = true;
- }
- if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
- replaceOperand(*II, 1, V);
- MadeChange = true;
- }
- if (MadeChange)
- return II;
- break;
- }
-
- case Intrinsic::x86_sse4a_extrqi: {
- // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
- // bits of the lower 64-bits. The upper 64-bits are undefined.
- Value *Op0 = II->getArgOperand(0);
- unsigned VWidth = cast<VectorType>(Op0->getType())->getNumElements();
- assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
- "Unexpected operand size");
-
- // See if we're dealing with constant values.
- ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
- ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
-
- // Attempt to simplify to a constant or shuffle vector.
- if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
- return replaceInstUsesWith(*II, V);
-
- // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
- // operand.
- if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1))
- return replaceOperand(*II, 0, V);
- break;
- }
-
- case Intrinsic::x86_sse4a_insertq: {
- Value *Op0 = II->getArgOperand(0);
- Value *Op1 = II->getArgOperand(1);
- unsigned VWidth = cast<VectorType>(Op0->getType())->getNumElements();
- assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
- Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
- cast<VectorType>(Op1->getType())->getNumElements() == 2 &&
- "Unexpected operand size");
-
- // See if we're dealing with constant values.
- Constant *C1 = dyn_cast<Constant>(Op1);
- ConstantInt *CI11 =
- C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
- : nullptr;
-
- // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
- if (CI11) {
- const APInt &V11 = CI11->getValue();
- APInt Len = V11.zextOrTrunc(6);
- APInt Idx = V11.lshr(8).zextOrTrunc(6);
- if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
- return replaceInstUsesWith(*II, V);
- }
-
- // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
- // operand.
- if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1))
- return replaceOperand(*II, 0, V);
- break;
- }
-
- case Intrinsic::x86_sse4a_insertqi: {
- // INSERTQI: Extract lowest Length bits from lower half of second source and
- // insert over first source starting at Index bit. The upper 64-bits are
- // undefined.
- Value *Op0 = II->getArgOperand(0);
- Value *Op1 = II->getArgOperand(1);
- unsigned VWidth0 = cast<VectorType>(Op0->getType())->getNumElements();
- unsigned VWidth1 = cast<VectorType>(Op1->getType())->getNumElements();
- assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
- Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
- VWidth1 == 2 && "Unexpected operand sizes");
-
- // See if we're dealing with constant values.
- ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
- ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
-
- // Attempt to simplify to a constant or shuffle vector.
- if (CILength && CIIndex) {
- APInt Len = CILength->getValue().zextOrTrunc(6);
- APInt Idx = CIIndex->getValue().zextOrTrunc(6);
- if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
- return replaceInstUsesWith(*II, V);
- }
-
- // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
- // operands.
- bool MadeChange = false;
- if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
- replaceOperand(*II, 0, V);
- MadeChange = true;
- }
- if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
- replaceOperand(*II, 1, V);
- MadeChange = true;
- }
- if (MadeChange)
- return II;
- break;
- }
-
- case Intrinsic::x86_sse41_pblendvb:
- case Intrinsic::x86_sse41_blendvps:
- case Intrinsic::x86_sse41_blendvpd:
- case Intrinsic::x86_avx_blendv_ps_256:
- case Intrinsic::x86_avx_blendv_pd_256:
- case Intrinsic::x86_avx2_pblendvb: {
- // fold (blend A, A, Mask) -> A
- Value *Op0 = II->getArgOperand(0);
- Value *Op1 = II->getArgOperand(1);
- Value *Mask = II->getArgOperand(2);
- if (Op0 == Op1)
- return replaceInstUsesWith(CI, Op0);
-
- // Zero Mask - select 1st argument.
- if (isa<ConstantAggregateZero>(Mask))
- return replaceInstUsesWith(CI, Op0);
-
- // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
- if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
- Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
- return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
- }
-
- // Convert to a vector select if we can bypass casts and find a boolean
- // vector condition value.
- Value *BoolVec;
- Mask = peekThroughBitcast(Mask);
- if (match(Mask, m_SExt(m_Value(BoolVec))) &&
- BoolVec->getType()->isVectorTy() &&
- BoolVec->getType()->getScalarSizeInBits() == 1) {
- assert(Mask->getType()->getPrimitiveSizeInBits() ==
- II->getType()->getPrimitiveSizeInBits() &&
- "Not expecting mask and operands with different sizes");
-
- unsigned NumMaskElts =
- cast<VectorType>(Mask->getType())->getNumElements();
- unsigned NumOperandElts =
- cast<VectorType>(II->getType())->getNumElements();
- if (NumMaskElts == NumOperandElts)
- return SelectInst::Create(BoolVec, Op1, Op0);
-
- // If the mask has less elements than the operands, each mask bit maps to
- // multiple elements of the operands. Bitcast back and forth.
- if (NumMaskElts < NumOperandElts) {
- Value *CastOp0 = Builder.CreateBitCast(Op0, Mask->getType());
- Value *CastOp1 = Builder.CreateBitCast(Op1, Mask->getType());
- Value *Sel = Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
- return new BitCastInst(Sel, II->getType());
- }
- }
-
- break;
- }
-
- case Intrinsic::x86_ssse3_pshuf_b_128:
- case Intrinsic::x86_avx2_pshuf_b:
- case Intrinsic::x86_avx512_pshuf_b_512:
- if (Value *V = simplifyX86pshufb(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_avx_vpermilvar_ps:
- case Intrinsic::x86_avx_vpermilvar_ps_256:
- case Intrinsic::x86_avx512_vpermilvar_ps_512:
- case Intrinsic::x86_avx_vpermilvar_pd:
- case Intrinsic::x86_avx_vpermilvar_pd_256:
- case Intrinsic::x86_avx512_vpermilvar_pd_512:
- if (Value *V = simplifyX86vpermilvar(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_avx2_permd:
- case Intrinsic::x86_avx2_permps:
- case Intrinsic::x86_avx512_permvar_df_256:
- case Intrinsic::x86_avx512_permvar_df_512:
- case Intrinsic::x86_avx512_permvar_di_256:
- case Intrinsic::x86_avx512_permvar_di_512:
- case Intrinsic::x86_avx512_permvar_hi_128:
- case Intrinsic::x86_avx512_permvar_hi_256:
- case Intrinsic::x86_avx512_permvar_hi_512:
- case Intrinsic::x86_avx512_permvar_qi_128:
- case Intrinsic::x86_avx512_permvar_qi_256:
- case Intrinsic::x86_avx512_permvar_qi_512:
- case Intrinsic::x86_avx512_permvar_sf_512:
- case Intrinsic::x86_avx512_permvar_si_512:
- if (Value *V = simplifyX86vpermv(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_avx_maskload_ps:
- case Intrinsic::x86_avx_maskload_pd:
- case Intrinsic::x86_avx_maskload_ps_256:
- case Intrinsic::x86_avx_maskload_pd_256:
- case Intrinsic::x86_avx2_maskload_d:
- case Intrinsic::x86_avx2_maskload_q:
- case Intrinsic::x86_avx2_maskload_d_256:
- case Intrinsic::x86_avx2_maskload_q_256:
- if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
- return I;
- break;
-
- case Intrinsic::x86_sse2_maskmov_dqu:
- case Intrinsic::x86_avx_maskstore_ps:
- case Intrinsic::x86_avx_maskstore_pd:
- case Intrinsic::x86_avx_maskstore_ps_256:
- case Intrinsic::x86_avx_maskstore_pd_256:
- case Intrinsic::x86_avx2_maskstore_d:
- case Intrinsic::x86_avx2_maskstore_q:
- case Intrinsic::x86_avx2_maskstore_d_256:
- case Intrinsic::x86_avx2_maskstore_q_256:
- if (simplifyX86MaskedStore(*II, *this))
- return nullptr;
- break;
-
- case Intrinsic::x86_addcarry_32:
- case Intrinsic::x86_addcarry_64:
- if (Value *V = simplifyX86addcarry(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::ppc_altivec_vperm:
- // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
- // Note that ppc_altivec_vperm has a big-endian bias, so when creating
- // a vectorshuffle for little endian, we must undo the transformation
- // performed on vec_perm in altivec.h. That is, we must complement
- // the permutation mask with respect to 31 and reverse the order of
- // V1 and V2.
- if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
- assert(cast<VectorType>(Mask->getType())->getNumElements() == 16 &&
- "Bad type for intrinsic!");
-
- // Check that all of the elements are integer constants or undefs.
- bool AllEltsOk = true;
- for (unsigned i = 0; i != 16; ++i) {
- Constant *Elt = Mask->getAggregateElement(i);
- if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
- AllEltsOk = false;
- break;
- }
- }
-
- if (AllEltsOk) {
- // Cast the input vectors to byte vectors.
- Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
- Mask->getType());
- Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
- Mask->getType());
- Value *Result = UndefValue::get(Op0->getType());
-
- // Only extract each element once.
- Value *ExtractedElts[32];
- memset(ExtractedElts, 0, sizeof(ExtractedElts));
-
- for (unsigned i = 0; i != 16; ++i) {
- if (isa<UndefValue>(Mask->getAggregateElement(i)))
- continue;
- unsigned Idx =
- cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
- Idx &= 31; // Match the hardware behavior.
- if (DL.isLittleEndian())
- Idx = 31 - Idx;
-
- if (!ExtractedElts[Idx]) {
- Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
- Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
- ExtractedElts[Idx] =
- Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
- Builder.getInt32(Idx&15));
- }
-
- // Insert this value into the result vector.
- Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
- Builder.getInt32(i));
- }
- return CastInst::Create(Instruction::BitCast, Result, CI.getType());
- }
- }
- break;
-
- case Intrinsic::arm_neon_vld1: {
- Align MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
- if (Value *V = simplifyNeonVld1(*II, MemAlign.value(), Builder))
- return replaceInstUsesWith(*II, V);
- break;
- }
-
- case Intrinsic::arm_neon_vld2:
- case Intrinsic::arm_neon_vld3:
- case Intrinsic::arm_neon_vld4:
- case Intrinsic::arm_neon_vld2lane:
- case Intrinsic::arm_neon_vld3lane:
- case Intrinsic::arm_neon_vld4lane:
- case Intrinsic::arm_neon_vst1:
- case Intrinsic::arm_neon_vst2:
- case Intrinsic::arm_neon_vst3:
- case Intrinsic::arm_neon_vst4:
- case Intrinsic::arm_neon_vst2lane:
- case Intrinsic::arm_neon_vst3lane:
- case Intrinsic::arm_neon_vst4lane: {
- Align MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
- unsigned AlignArg = II->getNumArgOperands() - 1;
- Value *AlignArgOp = II->getArgOperand(AlignArg);
- MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
- if (Align && *Align < MemAlign)
- return replaceOperand(*II, AlignArg,
- ConstantInt::get(Type::getInt32Ty(II->getContext()),
- MemAlign.value(), false));
- break;
- }
case Intrinsic::arm_neon_vtbl1:
case Intrinsic::aarch64_neon_tbl1:
@@ -3453,690 +1408,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
}
- case Intrinsic::arm_mve_pred_i2v: {
- Value *Arg = II->getArgOperand(0);
- Value *ArgArg;
- if (match(Arg, m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(m_Value(ArgArg))) &&
- II->getType() == ArgArg->getType())
- return replaceInstUsesWith(*II, ArgArg);
- Constant *XorMask;
- if (match(Arg,
- m_Xor(m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(m_Value(ArgArg)),
- m_Constant(XorMask))) &&
- II->getType() == ArgArg->getType()) {
- if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
- if (CI->getValue().trunc(16).isAllOnesValue()) {
- auto TrueVector = Builder.CreateVectorSplat(
- cast<VectorType>(II->getType())->getNumElements(),
- Builder.getTrue());
- return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector);
- }
- }
- }
- KnownBits ScalarKnown(32);
- if (SimplifyDemandedBits(II, 0, APInt::getLowBitsSet(32, 16),
- ScalarKnown, 0))
- return II;
- break;
- }
- case Intrinsic::arm_mve_pred_v2i: {
- Value *Arg = II->getArgOperand(0);
- Value *ArgArg;
- if (match(Arg, m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(m_Value(ArgArg))))
- return replaceInstUsesWith(*II, ArgArg);
- if (!II->getMetadata(LLVMContext::MD_range)) {
- Type *IntTy32 = Type::getInt32Ty(II->getContext());
- Metadata *M[] = {
- ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)),
- ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0xFFFF))
- };
- II->setMetadata(LLVMContext::MD_range, MDNode::get(II->getContext(), M));
- return II;
- }
- break;
- }
- case Intrinsic::arm_mve_vadc:
- case Intrinsic::arm_mve_vadc_predicated: {
- unsigned CarryOp =
- (II->getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
- assert(II->getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
- "Bad type for intrinsic!");
-
- KnownBits CarryKnown(32);
- if (SimplifyDemandedBits(II, CarryOp, APInt::getOneBitSet(32, 29),
- CarryKnown))
- return II;
- break;
- }
- case Intrinsic::amdgcn_rcp: {
- Value *Src = II->getArgOperand(0);
-
- // TODO: Move to ConstantFolding/InstSimplify?
- if (isa<UndefValue>(Src)) {
- Type *Ty = II->getType();
- auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
- return replaceInstUsesWith(CI, QNaN);
- }
-
- if (II->isStrictFP())
- break;
-
- if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
- const APFloat &ArgVal = C->getValueAPF();
- APFloat Val(ArgVal.getSemantics(), 1);
- Val.divide(ArgVal, APFloat::rmNearestTiesToEven);
-
- // This is more precise than the instruction may give.
- //
- // TODO: The instruction always flushes denormal results (except for f16),
- // should this also?
- return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
- }
-
- break;
- }
- case Intrinsic::amdgcn_rsq: {
- Value *Src = II->getArgOperand(0);
-
- // TODO: Move to ConstantFolding/InstSimplify?
- if (isa<UndefValue>(Src)) {
- Type *Ty = II->getType();
- auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
- return replaceInstUsesWith(CI, QNaN);
- }
-
- break;
- }
- case Intrinsic::amdgcn_frexp_mant:
- case Intrinsic::amdgcn_frexp_exp: {
- Value *Src = II->getArgOperand(0);
- if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
- int Exp;
- APFloat Significand = frexp(C->getValueAPF(), Exp,
- APFloat::rmNearestTiesToEven);
-
- if (IID == Intrinsic::amdgcn_frexp_mant) {
- return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
- Significand));
- }
-
- // Match instruction special case behavior.
- if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
- Exp = 0;
-
- return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
- }
-
- if (isa<UndefValue>(Src))
- return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- break;
- }
- case Intrinsic::amdgcn_class: {
- enum {
- S_NAN = 1 << 0, // Signaling NaN
- Q_NAN = 1 << 1, // Quiet NaN
- N_INFINITY = 1 << 2, // Negative infinity
- N_NORMAL = 1 << 3, // Negative normal
- N_SUBNORMAL = 1 << 4, // Negative subnormal
- N_ZERO = 1 << 5, // Negative zero
- P_ZERO = 1 << 6, // Positive zero
- P_SUBNORMAL = 1 << 7, // Positive subnormal
- P_NORMAL = 1 << 8, // Positive normal
- P_INFINITY = 1 << 9 // Positive infinity
- };
-
- const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
- N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL | P_NORMAL | P_INFINITY;
-
- Value *Src0 = II->getArgOperand(0);
- Value *Src1 = II->getArgOperand(1);
- const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
- if (!CMask) {
- if (isa<UndefValue>(Src0))
- return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
-
- if (isa<UndefValue>(Src1))
- return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
- break;
- }
-
- uint32_t Mask = CMask->getZExtValue();
-
- // If all tests are made, it doesn't matter what the value is.
- if ((Mask & FullMask) == FullMask)
- return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
-
- if ((Mask & FullMask) == 0)
- return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
-
- if (Mask == (S_NAN | Q_NAN)) {
- // Equivalent of isnan. Replace with standard fcmp.
- Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
- FCmp->takeName(II);
- return replaceInstUsesWith(*II, FCmp);
- }
-
- if (Mask == (N_ZERO | P_ZERO)) {
- // Equivalent of == 0.
- Value *FCmp = Builder.CreateFCmpOEQ(
- Src0, ConstantFP::get(Src0->getType(), 0.0));
-
- FCmp->takeName(II);
- return replaceInstUsesWith(*II, FCmp);
- }
-
- // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
- if (((Mask & S_NAN) || (Mask & Q_NAN)) && isKnownNeverNaN(Src0, &TLI))
- return replaceOperand(*II, 1, ConstantInt::get(Src1->getType(),
- Mask & ~(S_NAN | Q_NAN)));
-
- const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
- if (!CVal) {
- if (isa<UndefValue>(Src0))
- return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
-
- // Clamp mask to used bits
- if ((Mask & FullMask) != Mask) {
- CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
- { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
- );
-
- NewCall->takeName(II);
- return replaceInstUsesWith(*II, NewCall);
- }
-
- break;
- }
-
- const APFloat &Val = CVal->getValueAPF();
-
- bool Result =
- ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
- ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
- ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
- ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
- ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
- ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
- ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
- ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
- ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
- ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
-
- return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
- }
- case Intrinsic::amdgcn_cvt_pkrtz: {
- Value *Src0 = II->getArgOperand(0);
- Value *Src1 = II->getArgOperand(1);
- if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
- if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
- const fltSemantics &HalfSem
- = II->getType()->getScalarType()->getFltSemantics();
- bool LosesInfo;
- APFloat Val0 = C0->getValueAPF();
- APFloat Val1 = C1->getValueAPF();
- Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
- Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
-
- Constant *Folded = ConstantVector::get({
- ConstantFP::get(II->getContext(), Val0),
- ConstantFP::get(II->getContext(), Val1) });
- return replaceInstUsesWith(*II, Folded);
- }
- }
-
- if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
- return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
-
- break;
- }
- case Intrinsic::amdgcn_cvt_pknorm_i16:
- case Intrinsic::amdgcn_cvt_pknorm_u16:
- case Intrinsic::amdgcn_cvt_pk_i16:
- case Intrinsic::amdgcn_cvt_pk_u16: {
- Value *Src0 = II->getArgOperand(0);
- Value *Src1 = II->getArgOperand(1);
-
- if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
- return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
-
- break;
- }
- case Intrinsic::amdgcn_ubfe:
- case Intrinsic::amdgcn_sbfe: {
- // Decompose simple cases into standard shifts.
- Value *Src = II->getArgOperand(0);
- if (isa<UndefValue>(Src))
- return replaceInstUsesWith(*II, Src);
-
- unsigned Width;
- Type *Ty = II->getType();
- unsigned IntSize = Ty->getIntegerBitWidth();
-
- ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
- if (CWidth) {
- Width = CWidth->getZExtValue();
- if ((Width & (IntSize - 1)) == 0)
- return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
-
- // Hardware ignores high bits, so remove those.
- if (Width >= IntSize)
- return replaceOperand(*II, 2, ConstantInt::get(CWidth->getType(),
- Width & (IntSize - 1)));
- }
-
- unsigned Offset;
- ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
- if (COffset) {
- Offset = COffset->getZExtValue();
- if (Offset >= IntSize)
- return replaceOperand(*II, 1, ConstantInt::get(COffset->getType(),
- Offset & (IntSize - 1)));
- }
-
- bool Signed = IID == Intrinsic::amdgcn_sbfe;
-
- if (!CWidth || !COffset)
- break;
-
- // The case of Width == 0 is handled above, which makes this tranformation
- // safe. If Width == 0, then the ashr and lshr instructions become poison
- // value since the shift amount would be equal to the bit size.
- assert(Width != 0);
-
- // TODO: This allows folding to undef when the hardware has specific
- // behavior?
- if (Offset + Width < IntSize) {
- Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
- Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
- : Builder.CreateLShr(Shl, IntSize - Width);
- RightShift->takeName(II);
- return replaceInstUsesWith(*II, RightShift);
- }
-
- Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
- : Builder.CreateLShr(Src, Offset);
-
- RightShift->takeName(II);
- return replaceInstUsesWith(*II, RightShift);
- }
- case Intrinsic::amdgcn_exp:
- case Intrinsic::amdgcn_exp_compr: {
- ConstantInt *En = cast<ConstantInt>(II->getArgOperand(1));
- unsigned EnBits = En->getZExtValue();
- if (EnBits == 0xf)
- break; // All inputs enabled.
-
- bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
- bool Changed = false;
- for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
- if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
- (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
- Value *Src = II->getArgOperand(I + 2);
- if (!isa<UndefValue>(Src)) {
- replaceOperand(*II, I + 2, UndefValue::get(Src->getType()));
- Changed = true;
- }
- }
- }
-
- if (Changed)
- return II;
-
- break;
- }
- case Intrinsic::amdgcn_fmed3: {
- // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
- // for the shader.
-
- Value *Src0 = II->getArgOperand(0);
- Value *Src1 = II->getArgOperand(1);
- Value *Src2 = II->getArgOperand(2);
-
- // Checking for NaN before canonicalization provides better fidelity when
- // mapping other operations onto fmed3 since the order of operands is
- // unchanged.
- CallInst *NewCall = nullptr;
- if (match(Src0, m_NaN()) || isa<UndefValue>(Src0)) {
- NewCall = Builder.CreateMinNum(Src1, Src2);
- } else if (match(Src1, m_NaN()) || isa<UndefValue>(Src1)) {
- NewCall = Builder.CreateMinNum(Src0, Src2);
- } else if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
- NewCall = Builder.CreateMaxNum(Src0, Src1);
- }
-
- if (NewCall) {
- NewCall->copyFastMathFlags(II);
- NewCall->takeName(II);
- return replaceInstUsesWith(*II, NewCall);
- }
-
- bool Swap = false;
- // Canonicalize constants to RHS operands.
- //
- // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
- if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
- std::swap(Src0, Src1);
- Swap = true;
- }
-
- if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
- std::swap(Src1, Src2);
- Swap = true;
- }
-
- if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
- std::swap(Src0, Src1);
- Swap = true;
- }
-
- if (Swap) {
- II->setArgOperand(0, Src0);
- II->setArgOperand(1, Src1);
- II->setArgOperand(2, Src2);
- return II;
- }
-
- if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
- if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
- if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
- APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
- C2->getValueAPF());
- return replaceInstUsesWith(*II,
- ConstantFP::get(Builder.getContext(), Result));
- }
- }
- }
-
- break;
- }
- case Intrinsic::amdgcn_icmp:
- case Intrinsic::amdgcn_fcmp: {
- const ConstantInt *CC = cast<ConstantInt>(II->getArgOperand(2));
- // Guard against invalid arguments.
- int64_t CCVal = CC->getZExtValue();
- bool IsInteger = IID == Intrinsic::amdgcn_icmp;
- if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
- CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
- (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
- CCVal > CmpInst::LAST_FCMP_PREDICATE)))
- break;
-
- Value *Src0 = II->getArgOperand(0);
- Value *Src1 = II->getArgOperand(1);
-
- if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
- if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
- Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
- if (CCmp->isNullValue()) {
- return replaceInstUsesWith(
- *II, ConstantExpr::getSExt(CCmp, II->getType()));
- }
-
- // The result of V_ICMP/V_FCMP assembly instructions (which this
- // intrinsic exposes) is one bit per thread, masked with the EXEC
- // register (which contains the bitmask of live threads). So a
- // comparison that always returns true is the same as a read of the
- // EXEC register.
- Function *NewF = Intrinsic::getDeclaration(
- II->getModule(), Intrinsic::read_register, II->getType());
- Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
- MDNode *MD = MDNode::get(II->getContext(), MDArgs);
- Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
- CallInst *NewCall = Builder.CreateCall(NewF, Args);
- NewCall->addAttribute(AttributeList::FunctionIndex,
- Attribute::Convergent);
- NewCall->takeName(II);
- return replaceInstUsesWith(*II, NewCall);
- }
-
- // Canonicalize constants to RHS.
- CmpInst::Predicate SwapPred
- = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
- II->setArgOperand(0, Src1);
- II->setArgOperand(1, Src0);
- II->setArgOperand(2, ConstantInt::get(CC->getType(),
- static_cast<int>(SwapPred)));
- return II;
- }
-
- if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
- break;
-
- // Canonicalize compare eq with true value to compare != 0
- // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
- // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
- // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
- // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
- Value *ExtSrc;
- if (CCVal == CmpInst::ICMP_EQ &&
- ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
- (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
- ExtSrc->getType()->isIntegerTy(1)) {
- replaceOperand(*II, 1, ConstantInt::getNullValue(Src1->getType()));
- replaceOperand(*II, 2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
- return II;
- }
-
- CmpInst::Predicate SrcPred;
- Value *SrcLHS;
- Value *SrcRHS;
-
- // Fold compare eq/ne with 0 from a compare result as the predicate to the
- // intrinsic. The typical use is a wave vote function in the library, which
- // will be fed from a user code condition compared with 0. Fold in the
- // redundant compare.
-
- // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
- // -> llvm.amdgcn.[if]cmp(a, b, pred)
- //
- // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
- // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
- if (match(Src1, m_Zero()) &&
- match(Src0,
- m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
- if (CCVal == CmpInst::ICMP_EQ)
- SrcPred = CmpInst::getInversePredicate(SrcPred);
-
- Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
- Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
-
- Type *Ty = SrcLHS->getType();
- if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
- // Promote to next legal integer type.
- unsigned Width = CmpType->getBitWidth();
- unsigned NewWidth = Width;
-
- // Don't do anything for i1 comparisons.
- if (Width == 1)
- break;
-
- if (Width <= 16)
- NewWidth = 16;
- else if (Width <= 32)
- NewWidth = 32;
- else if (Width <= 64)
- NewWidth = 64;
- else if (Width > 64)
- break; // Can't handle this.
-
- if (Width != NewWidth) {
- IntegerType *CmpTy = Builder.getIntNTy(NewWidth);
- if (CmpInst::isSigned(SrcPred)) {
- SrcLHS = Builder.CreateSExt(SrcLHS, CmpTy);
- SrcRHS = Builder.CreateSExt(SrcRHS, CmpTy);
- } else {
- SrcLHS = Builder.CreateZExt(SrcLHS, CmpTy);
- SrcRHS = Builder.CreateZExt(SrcRHS, CmpTy);
- }
- }
- } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
- break;
-
- Function *NewF =
- Intrinsic::getDeclaration(II->getModule(), NewIID,
- { II->getType(),
- SrcLHS->getType() });
- Value *Args[] = { SrcLHS, SrcRHS,
- ConstantInt::get(CC->getType(), SrcPred) };
- CallInst *NewCall = Builder.CreateCall(NewF, Args);
- NewCall->takeName(II);
- return replaceInstUsesWith(*II, NewCall);
- }
-
- break;
- }
- case Intrinsic::amdgcn_ballot: {
- if (auto *Src = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
- if (Src->isZero()) {
- // amdgcn.ballot(i1 0) is zero.
- return replaceInstUsesWith(*II, Constant::getNullValue(II->getType()));
- }
-
- if (Src->isOne()) {
- // amdgcn.ballot(i1 1) is exec.
- const char *RegName = "exec";
- if (II->getType()->isIntegerTy(32))
- RegName = "exec_lo";
- else if (!II->getType()->isIntegerTy(64))
- break;
-
- Function *NewF = Intrinsic::getDeclaration(
- II->getModule(), Intrinsic::read_register, II->getType());
- Metadata *MDArgs[] = {MDString::get(II->getContext(), RegName)};
- MDNode *MD = MDNode::get(II->getContext(), MDArgs);
- Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
- CallInst *NewCall = Builder.CreateCall(NewF, Args);
- NewCall->addAttribute(AttributeList::FunctionIndex,
- Attribute::Convergent);
- NewCall->takeName(II);
- return replaceInstUsesWith(*II, NewCall);
- }
- }
- break;
- }
- case Intrinsic::amdgcn_wqm_vote: {
- // wqm_vote is identity when the argument is constant.
- if (!isa<Constant>(II->getArgOperand(0)))
- break;
-
- return replaceInstUsesWith(*II, II->getArgOperand(0));
- }
- case Intrinsic::amdgcn_kill: {
- const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0));
- if (!C || !C->getZExtValue())
- break;
-
- // amdgcn.kill(i1 1) is a no-op
- return eraseInstFromFunction(CI);
- }
- case Intrinsic::amdgcn_update_dpp: {
- Value *Old = II->getArgOperand(0);
-
- auto BC = cast<ConstantInt>(II->getArgOperand(5));
- auto RM = cast<ConstantInt>(II->getArgOperand(3));
- auto BM = cast<ConstantInt>(II->getArgOperand(4));
- if (BC->isZeroValue() ||
- RM->getZExtValue() != 0xF ||
- BM->getZExtValue() != 0xF ||
- isa<UndefValue>(Old))
- break;
-
- // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
- return replaceOperand(*II, 0, UndefValue::get(Old->getType()));
- }
- case Intrinsic::amdgcn_permlane16:
- case Intrinsic::amdgcn_permlanex16: {
- // Discard vdst_in if it's not going to be read.
- Value *VDstIn = II->getArgOperand(0);
- if (isa<UndefValue>(VDstIn))
- break;
-
- ConstantInt *FetchInvalid = cast<ConstantInt>(II->getArgOperand(4));
- ConstantInt *BoundCtrl = cast<ConstantInt>(II->getArgOperand(5));
- if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
- break;
-
- return replaceOperand(*II, 0, UndefValue::get(VDstIn->getType()));
- }
- case Intrinsic::amdgcn_readfirstlane:
- case Intrinsic::amdgcn_readlane: {
- // A constant value is trivially uniform.
- if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0)))
- return replaceInstUsesWith(*II, C);
-
- // The rest of these may not be safe if the exec may not be the same between
- // the def and use.
- Value *Src = II->getArgOperand(0);
- Instruction *SrcInst = dyn_cast<Instruction>(Src);
- if (SrcInst && SrcInst->getParent() != II->getParent())
- break;
-
- // readfirstlane (readfirstlane x) -> readfirstlane x
- // readlane (readfirstlane x), y -> readfirstlane x
- if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readfirstlane>()))
- return replaceInstUsesWith(*II, Src);
-
- if (IID == Intrinsic::amdgcn_readfirstlane) {
- // readfirstlane (readlane x, y) -> readlane x, y
- if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>()))
- return replaceInstUsesWith(*II, Src);
- } else {
- // readlane (readlane x, y), y -> readlane x, y
- if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>(
- m_Value(), m_Specific(II->getArgOperand(1)))))
- return replaceInstUsesWith(*II, Src);
- }
-
- break;
- }
- case Intrinsic::amdgcn_ldexp: {
- // FIXME: This doesn't introduce new instructions and belongs in
- // InstructionSimplify.
- Type *Ty = II->getType();
- Value *Op0 = II->getArgOperand(0);
- Value *Op1 = II->getArgOperand(1);
-
- // Folding undef to qnan is safe regardless of the FP mode.
- if (isa<UndefValue>(Op0)) {
- auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
- return replaceInstUsesWith(*II, QNaN);
- }
-
- const APFloat *C = nullptr;
- match(Op0, m_APFloat(C));
-
- // FIXME: Should flush denorms depending on FP mode, but that's ignored
- // everywhere else.
- //
- // These cases should be safe, even with strictfp.
- // ldexp(0.0, x) -> 0.0
- // ldexp(-0.0, x) -> -0.0
- // ldexp(inf, x) -> inf
- // ldexp(-inf, x) -> -inf
- if (C && (C->isZero() || C->isInfinity()))
- return replaceInstUsesWith(*II, Op0);
-
- // With strictfp, be more careful about possibly needing to flush denormals
- // or not, and snan behavior depends on ieee_mode.
- if (II->isStrictFP())
- break;
-
- if (C && C->isNaN()) {
- // FIXME: We just need to make the nan quiet here, but that's unavailable
- // on APFloat, only IEEEfloat
- auto *Quieted = ConstantFP::get(
- Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
- return replaceInstUsesWith(*II, Quieted);
- }
-
- // ldexp(x, 0) -> x
- // ldexp(x, undef) -> x
- if (isa<UndefValue>(Op1) || match(Op1, m_ZeroInt()))
- return replaceInstUsesWith(*II, Op0);
-
- break;
- }
case Intrinsic::hexagon_V6_vandvrt:
case Intrinsic::hexagon_V6_vandvrt_128B: {
// Simplify Q -> V -> Q conversion.
@@ -4220,11 +1491,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
case Intrinsic::assume: {
Value *IIOperand = II->getArgOperand(0);
+ SmallVector<OperandBundleDef, 4> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+ bool HasOpBundles = !OpBundles.empty();
// Remove an assume if it is followed by an identical assume.
// TODO: Do we need this? Unless there are conflicting assumptions, the
// computeKnownBits(IIOperand) below here eliminates redundant assumes.
Instruction *Next = II->getNextNonDebugInstruction();
- if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
+ if (HasOpBundles &&
+ match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))) &&
+ !cast<IntrinsicInst>(Next)->hasOperandBundles())
return eraseInstFromFunction(CI);
// Canonicalize assume(a && b) -> assume(a); assume(b);
@@ -4233,15 +1509,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
FunctionType *AssumeIntrinsicTy = II->getFunctionType();
Value *AssumeIntrinsic = II->getCalledOperand();
Value *A, *B;
- if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
- Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName());
+ if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
+ Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
+ II->getName());
Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
return eraseInstFromFunction(*II);
}
// assume(!(a || b)) -> assume(!a); assume(!b);
- if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
+ if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
- Builder.CreateNot(A), II->getName());
+ Builder.CreateNot(A), OpBundles, II->getName());
Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
Builder.CreateNot(B), II->getName());
return eraseInstFromFunction(*II);
@@ -4257,7 +1534,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
isValidAssumeForContext(II, LHS, &DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LHS->setMetadata(LLVMContext::MD_nonnull, MD);
- return eraseInstFromFunction(*II);
+ if (!HasOpBundles)
+ return eraseInstFromFunction(*II);
// TODO: apply nonnull return attributes to calls and invokes
// TODO: apply range metadata for range check patterns?
@@ -4275,59 +1553,104 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
AC.updateAffectedValues(II);
break;
}
- case Intrinsic::experimental_gc_relocate: {
- auto &GCR = *cast<GCRelocateInst>(II);
-
- // If we have two copies of the same pointer in the statepoint argument
- // list, canonicalize to one. This may let us common gc.relocates.
- if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
- GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
- auto *OpIntTy = GCR.getOperand(2)->getType();
- return replaceOperand(*II, 2,
- ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
- }
+ case Intrinsic::experimental_gc_statepoint: {
+ GCStatepointInst &GCSP = *cast<GCStatepointInst>(II);
+ SmallPtrSet<Value *, 32> LiveGcValues;
+ for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
+ GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
- // Translate facts known about a pointer before relocating into
- // facts about the relocate value, while being careful to
- // preserve relocation semantics.
- Value *DerivedPtr = GCR.getDerivedPtr();
+ // Remove the relocation if unused.
+ if (GCR.use_empty()) {
+ eraseInstFromFunction(GCR);
+ continue;
+ }
- // Remove the relocation if unused, note that this check is required
- // to prevent the cases below from looping forever.
- if (II->use_empty())
- return eraseInstFromFunction(*II);
+ Value *DerivedPtr = GCR.getDerivedPtr();
+ Value *BasePtr = GCR.getBasePtr();
- // Undef is undef, even after relocation.
- // TODO: provide a hook for this in GCStrategy. This is clearly legal for
- // most practical collectors, but there was discussion in the review thread
- // about whether it was legal for all possible collectors.
- if (isa<UndefValue>(DerivedPtr))
- // Use undef of gc_relocate's type to replace it.
- return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
-
- if (auto *PT = dyn_cast<PointerType>(II->getType())) {
- // The relocation of null will be null for most any collector.
- // TODO: provide a hook for this in GCStrategy. There might be some
- // weird collector this property does not hold for.
- if (isa<ConstantPointerNull>(DerivedPtr))
- // Use null-pointer of gc_relocate's type to replace it.
- return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
-
- // isKnownNonNull -> nonnull attribute
- if (!II->hasRetAttr(Attribute::NonNull) &&
- isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT)) {
- II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- return II;
+ // Undef is undef, even after relocation.
+ if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
+ replaceInstUsesWith(GCR, UndefValue::get(GCR.getType()));
+ eraseInstFromFunction(GCR);
+ continue;
+ }
+
+ if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
+ // The relocation of null will be null for most any collector.
+ // TODO: provide a hook for this in GCStrategy. There might be some
+ // weird collector this property does not hold for.
+ if (isa<ConstantPointerNull>(DerivedPtr)) {
+ // Use null-pointer of gc_relocate's type to replace it.
+ replaceInstUsesWith(GCR, ConstantPointerNull::get(PT));
+ eraseInstFromFunction(GCR);
+ continue;
+ }
+
+ // isKnownNonNull -> nonnull attribute
+ if (!GCR.hasRetAttr(Attribute::NonNull) &&
+ isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT)) {
+ GCR.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ // We discovered new fact, re-check users.
+ Worklist.pushUsersToWorkList(GCR);
+ }
+ }
+
+ // If we have two copies of the same pointer in the statepoint argument
+ // list, canonicalize to one. This may let us common gc.relocates.
+ if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
+ GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
+ auto *OpIntTy = GCR.getOperand(2)->getType();
+ GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
}
- }
- // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
- // Canonicalize on the type from the uses to the defs
+ // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
+ // Canonicalize on the type from the uses to the defs
- // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
+ // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
+ LiveGcValues.insert(BasePtr);
+ LiveGcValues.insert(DerivedPtr);
+ }
+ Optional<OperandBundleUse> Bundle =
+ GCSP.getOperandBundle(LLVMContext::OB_gc_live);
+ unsigned NumOfGCLives = LiveGcValues.size();
+ if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size())
+ break;
+ // We can reduce the size of gc live bundle.
+ DenseMap<Value *, unsigned> Val2Idx;
+ std::vector<Value *> NewLiveGc;
+ for (unsigned I = 0, E = Bundle->Inputs.size(); I < E; ++I) {
+ Value *V = Bundle->Inputs[I];
+ if (Val2Idx.count(V))
+ continue;
+ if (LiveGcValues.count(V)) {
+ Val2Idx[V] = NewLiveGc.size();
+ NewLiveGc.push_back(V);
+ } else
+ Val2Idx[V] = NumOfGCLives;
+ }
+ // Update all gc.relocates
+ for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
+ GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
+ Value *BasePtr = GCR.getBasePtr();
+ assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
+ "Missed live gc for base pointer");
+ auto *OpIntTy1 = GCR.getOperand(1)->getType();
+ GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
+ Value *DerivedPtr = GCR.getDerivedPtr();
+ assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
+ "Missed live gc for derived pointer");
+ auto *OpIntTy2 = GCR.getOperand(2)->getType();
+ GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
+ }
+ // Create new statepoint instruction.
+ OperandBundleDef NewBundle("gc-live", NewLiveGc);
+ if (isa<CallInst>(II))
+ return CallInst::CreateWithReplacedBundle(cast<CallInst>(II), NewBundle);
+ else
+ return InvokeInst::CreateWithReplacedBundle(cast<InvokeInst>(II),
+ NewBundle);
break;
}
-
case Intrinsic::experimental_guard: {
// Is this guard followed by another guard? We scan forward over a small
// fixed window of instructions to handle common cases with conditions
@@ -4360,12 +1683,114 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
}
+ case Intrinsic::experimental_vector_insert: {
+ Value *Vec = II->getArgOperand(0);
+ Value *SubVec = II->getArgOperand(1);
+ Value *Idx = II->getArgOperand(2);
+ auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
+
+ // Only canonicalize if the destination vector, Vec, and SubVec are all
+ // fixed vectors.
+ if (DstTy && VecTy && SubVecTy) {
+ unsigned DstNumElts = DstTy->getNumElements();
+ unsigned VecNumElts = VecTy->getNumElements();
+ unsigned SubVecNumElts = SubVecTy->getNumElements();
+ unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
+
+ // The result of this call is undefined if IdxN is not a constant multiple
+ // of the SubVec's minimum vector length OR the insertion overruns Vec.
+ if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) {
+ replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
+ return eraseInstFromFunction(CI);
+ }
+
+ // An insert that entirely overwrites Vec with SubVec is a nop.
+ if (VecNumElts == SubVecNumElts) {
+ replaceInstUsesWith(CI, SubVec);
+ return eraseInstFromFunction(CI);
+ }
+
+ // Widen SubVec into a vector of the same width as Vec, since
+ // shufflevector requires the two input vectors to be the same width.
+ // Elements beyond the bounds of SubVec within the widened vector are
+ // undefined.
+ SmallVector<int, 8> WidenMask;
+ unsigned i;
+ for (i = 0; i != SubVecNumElts; ++i)
+ WidenMask.push_back(i);
+ for (; i != VecNumElts; ++i)
+ WidenMask.push_back(UndefMaskElem);
+
+ Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
+
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != IdxN; ++i)
+ Mask.push_back(i);
+ for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
+ Mask.push_back(i);
+ for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
+ Mask.push_back(i);
+
+ Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
+ replaceInstUsesWith(CI, Shuffle);
+ return eraseInstFromFunction(CI);
+ }
+ break;
+ }
+ case Intrinsic::experimental_vector_extract: {
+ Value *Vec = II->getArgOperand(0);
+ Value *Idx = II->getArgOperand(1);
+
+ auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+
+ // Only canonicalize if the the destination vector and Vec are fixed
+ // vectors.
+ if (DstTy && VecTy) {
+ unsigned DstNumElts = DstTy->getNumElements();
+ unsigned VecNumElts = VecTy->getNumElements();
+ unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
+
+ // The result of this call is undefined if IdxN is not a constant multiple
+ // of the result type's minimum vector length OR the extraction overruns
+ // Vec.
+ if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) {
+ replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
+ return eraseInstFromFunction(CI);
+ }
+
+ // Extracting the entirety of Vec is a nop.
+ if (VecNumElts == DstNumElts) {
+ replaceInstUsesWith(CI, Vec);
+ return eraseInstFromFunction(CI);
+ }
+
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != DstNumElts; ++i)
+ Mask.push_back(IdxN + i);
+
+ Value *Shuffle =
+ Builder.CreateShuffleVector(Vec, UndefValue::get(VecTy), Mask);
+ replaceInstUsesWith(CI, Shuffle);
+ return eraseInstFromFunction(CI);
+ }
+ break;
+ }
+ default: {
+ // Handle target specific intrinsics
+ Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
+ if (V.hasValue())
+ return V.getValue();
+ break;
+ }
}
return visitCallBase(*II);
}
// Fence instruction simplification
-Instruction *InstCombiner::visitFenceInst(FenceInst &FI) {
+Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
// Remove identical consecutive fences.
Instruction *Next = FI.getNextNonDebugInstruction();
if (auto *NFI = dyn_cast<FenceInst>(Next))
@@ -4375,12 +1800,12 @@ Instruction *InstCombiner::visitFenceInst(FenceInst &FI) {
}
// InvokeInst simplification
-Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
+Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
return visitCallBase(II);
}
// CallBrInst simplification
-Instruction *InstCombiner::visitCallBrInst(CallBrInst &CBI) {
+Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
return visitCallBase(CBI);
}
@@ -4420,7 +1845,7 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call,
return true;
}
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
+Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
auto InstCombineRAUW = [this](Instruction *From, Value *With) {
@@ -4577,7 +2002,7 @@ static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
}
/// Improvements for call, callbr and invoke instructions.
-Instruction *InstCombiner::visitCallBase(CallBase &Call) {
+Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
if (isAllocationFn(&Call, &TLI))
annotateAnyAllocSite(Call, &TLI);
@@ -4633,7 +2058,7 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {
!CalleeF->isDeclaration()) {
Instruction *OldCall = &Call;
CreateNonTerminatorUnreachable(OldCall);
- // If OldCall does not return void then replaceAllUsesWith undef.
+ // If OldCall does not return void then replaceInstUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!OldCall->getType()->isVoidTy())
replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
@@ -4652,7 +2077,7 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {
if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(Call.getFunction())) ||
isa<UndefValue>(Callee)) {
- // If Call does not return void then replaceAllUsesWith undef.
+ // If Call does not return void then replaceInstUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!Call.getType()->isVoidTy())
replaceInstUsesWith(Call, UndefValue::get(Call.getType()));
@@ -4728,7 +2153,7 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {
/// If the callee is a constexpr cast of a function, attempt to move the cast to
/// the arguments of the call/callbr/invoke.
-bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
+bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
auto *Callee =
dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts());
if (!Callee)
@@ -4827,6 +2252,9 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
if (Call.isInAllocaArgument(i))
return false; // Cannot transform to and from inalloca.
+ if (CallerPAL.hasParamAttribute(i, Attribute::SwiftError))
+ return false;
+
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
@@ -5012,8 +2440,8 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
/// Turn a call to a function created by init_trampoline / adjust_trampoline
/// intrinsic pair into a direct call to the underlying function.
Instruction *
-InstCombiner::transformCallThroughTrampoline(CallBase &Call,
- IntrinsicInst &Tramp) {
+InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
+ IntrinsicInst &Tramp) {
Value *Callee = Call.getCalledOperand();
Type *CalleeTy = Callee->getType();
FunctionType *FTy = Call.getFunctionType();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 3639edb5df4d..07e68c44416d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,10 +14,11 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <numeric>
using namespace llvm;
using namespace PatternMatch;
@@ -81,8 +82,8 @@ static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
/// If we find a cast of an allocation instruction, try to eliminate the cast by
/// moving the type information into the alloc.
-Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
- AllocaInst &AI) {
+Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
+ AllocaInst &AI) {
PointerType *PTy = cast<PointerType>(CI.getType());
IRBuilderBase::InsertPointGuard Guard(Builder);
@@ -93,6 +94,18 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
Type *CastElTy = PTy->getElementType();
if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
+ // This optimisation does not work for cases where the cast type
+ // is scalable and the allocated type is not. This because we need to
+ // know how many times the casted type fits into the allocated type.
+ // For the opposite case where the allocated type is scalable and the
+ // cast type is not this leads to poor code quality due to the
+ // introduction of 'vscale' into the calculations. It seems better to
+ // bail out for this case too until we've done a proper cost-benefit
+ // analysis.
+ bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
+ bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
+ if (AllocIsScalable != CastIsScalable) return nullptr;
+
Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
if (CastElTyAlign < AllocElTyAlign) return nullptr;
@@ -102,14 +115,15 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// same, we open the door to infinite loops of various kinds.
if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
- uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy);
- uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy);
+ // The alloc and cast types should be either both fixed or both scalable.
+ uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize();
+ uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize();
if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
// If the allocation has multiple uses, only promote it if we're not
// shrinking the amount of memory being allocated.
- uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy);
- uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy);
+ uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize();
+ uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize();
if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
// See if we can satisfy the modulus by pulling a scale out of the array
@@ -124,6 +138,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
(AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
+ // We don't currently support arrays of scalable types.
+ assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
+
unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
Value *Amt = nullptr;
if (Scale == 1) {
@@ -160,8 +177,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
/// true for, actually insert the code to evaluate the expression.
-Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
- bool isSigned) {
+Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
+ bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V)) {
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
// If we got a constantexpr back, try to simplify it with DL info.
@@ -229,8 +246,9 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
return InsertNewInstWith(Res, *I);
}
-Instruction::CastOps InstCombiner::isEliminableCastPair(const CastInst *CI1,
- const CastInst *CI2) {
+Instruction::CastOps
+InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
+ const CastInst *CI2) {
Type *SrcTy = CI1->getSrcTy();
Type *MidTy = CI1->getDestTy();
Type *DstTy = CI2->getDestTy();
@@ -257,7 +275,7 @@ Instruction::CastOps InstCombiner::isEliminableCastPair(const CastInst *CI1,
}
/// Implement the transforms common to all CastInst visitors.
-Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
+Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
// Try to eliminate a cast of a cast.
@@ -342,7 +360,7 @@ static bool canNotEvaluateInType(Value *V, Type *Ty) {
///
/// This function works on both vectors and scalars.
///
-static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
+static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
Instruction *CxtI) {
if (canAlwaysEvaluateInType(V, Ty))
return true;
@@ -459,7 +477,8 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
/// --->
/// extractelement <4 x i32> %X, 1
-static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) {
+static Instruction *foldVecTruncToExtElt(TruncInst &Trunc,
+ InstCombinerImpl &IC) {
Value *TruncOp = Trunc.getOperand(0);
Type *DestType = Trunc.getType();
if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
@@ -496,9 +515,9 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) {
return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
}
-/// Rotate left/right may occur in a wider type than necessary because of type
-/// promotion rules. Try to narrow the inputs and convert to funnel shift.
-Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
+/// Funnel/Rotate left/right may occur in a wider type than necessary because of
+/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
+Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
assert((isa<VectorType>(Trunc.getSrcTy()) ||
shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
"Don't narrow to an illegal scalar type");
@@ -510,32 +529,43 @@ Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
if (!isPowerOf2_32(NarrowWidth))
return nullptr;
- // First, find an or'd pair of opposite shifts with the same shifted operand:
- // trunc (or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1))
- Value *Or0, *Or1;
- if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1)))))
+ // First, find an or'd pair of opposite shifts:
+ // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
+ BinaryOperator *Or0, *Or1;
+ if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
return nullptr;
- Value *ShVal, *ShAmt0, *ShAmt1;
- if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
- !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
+ Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
+ if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
+ !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
+ Or0->getOpcode() == Or1->getOpcode())
return nullptr;
- auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
- auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
- if (ShiftOpcode0 == ShiftOpcode1)
- return nullptr;
+ // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
+ if (Or0->getOpcode() == BinaryOperator::LShr) {
+ std::swap(Or0, Or1);
+ std::swap(ShVal0, ShVal1);
+ std::swap(ShAmt0, ShAmt1);
+ }
+ assert(Or0->getOpcode() == BinaryOperator::Shl &&
+ Or1->getOpcode() == BinaryOperator::LShr &&
+ "Illegal or(shift,shift) pair");
- // Match the shift amount operands for a rotate pattern. This always matches
- // a subtraction on the R operand.
- auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+ // Match the shift amount operands for a funnel/rotate pattern. This always
+ // matches a subtraction on the R operand.
+ auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
// The shift amounts may add up to the narrow bit width:
- // (shl ShVal, L) | (lshr ShVal, Width - L)
+ // (shl ShVal0, L) | (lshr ShVal1, Width - L)
if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
return L;
+ // The following patterns currently only work for rotation patterns.
+ // TODO: Add more general funnel-shift compatible patterns.
+ if (ShVal0 != ShVal1)
+ return nullptr;
+
// The shift amount may be masked with negation:
- // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+ // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
Value *X;
unsigned Mask = Width - 1;
if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
@@ -551,10 +581,10 @@ Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
};
Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
- bool SubIsOnLHS = false;
+ bool IsFshl = true; // Sub on LSHR.
if (!ShAmt) {
ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
- SubIsOnLHS = true;
+ IsFshl = false; // Sub on SHL.
}
if (!ShAmt)
return nullptr;
@@ -563,26 +593,28 @@ Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
// will be a zext, but it could also be the result of an 'and' or 'shift'.
unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
- if (!MaskedValueIsZero(ShVal, HiBitMask, 0, &Trunc))
+ if (!MaskedValueIsZero(ShVal0, HiBitMask, 0, &Trunc) ||
+ !MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
return nullptr;
// We have an unnecessarily wide rotate!
- // trunc (or (lshr ShVal, ShAmt), (shl ShVal, BitWidth - ShAmt))
+ // trunc (or (lshr ShVal0, ShAmt), (shl ShVal1, BitWidth - ShAmt))
// Narrow the inputs and convert to funnel shift intrinsic:
// llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
- Value *X = Builder.CreateTrunc(ShVal, DestTy);
- bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) ||
- (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
+ Value *X, *Y;
+ X = Y = Builder.CreateTrunc(ShVal0, DestTy);
+ if (ShVal0 != ShVal1)
+ Y = Builder.CreateTrunc(ShVal1, DestTy);
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
- return IntrinsicInst::Create(F, { X, X, NarrowShAmt });
+ return IntrinsicInst::Create(F, {X, Y, NarrowShAmt});
}
/// Try to narrow the width of math or bitwise logic instructions by pulling a
/// truncate ahead of binary operators.
/// TODO: Transforms for truncated shifts should be moved into here.
-Instruction *InstCombiner::narrowBinOp(TruncInst &Trunc) {
+Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
Type *SrcTy = Trunc.getSrcTy();
Type *DestTy = Trunc.getType();
if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
@@ -631,7 +663,7 @@ Instruction *InstCombiner::narrowBinOp(TruncInst &Trunc) {
default: break;
}
- if (Instruction *NarrowOr = narrowRotate(Trunc))
+ if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
return NarrowOr;
return nullptr;
@@ -687,7 +719,7 @@ static Instruction *shrinkInsertElt(CastInst &Trunc,
return nullptr;
}
-Instruction *InstCombiner::visitTrunc(TruncInst &Trunc) {
+Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
if (Instruction *Result = commonCastTransforms(Trunc))
return Result;
@@ -695,7 +727,6 @@ Instruction *InstCombiner::visitTrunc(TruncInst &Trunc) {
Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
unsigned DestWidth = DestTy->getScalarSizeInBits();
unsigned SrcWidth = SrcTy->getScalarSizeInBits();
- ConstantInt *Cst;
// Attempt to truncate the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
@@ -782,56 +813,60 @@ Instruction *InstCombiner::visitTrunc(TruncInst &Trunc) {
}
}
- // FIXME: Maybe combine the next two transforms to handle the no cast case
- // more efficiently. Support vector types. Cleanup code by using m_OneUse.
-
- // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
- Value *A = nullptr;
- if (Src->hasOneUse() &&
- match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
- // We have three types to worry about here, the type of A, the source of
- // the truncate (MidSize), and the destination of the truncate. We know that
- // ASize < MidSize and MidSize > ResultSize, but don't know the relation
- // between ASize and ResultSize.
- unsigned ASize = A->getType()->getPrimitiveSizeInBits();
-
- // If the shift amount is larger than the size of A, then the result is
- // known to be zero because all the input bits got shifted out.
- if (Cst->getZExtValue() >= ASize)
- return replaceInstUsesWith(Trunc, Constant::getNullValue(DestTy));
-
- // Since we're doing an lshr and a zero extend, and know that the shift
- // amount is smaller than ASize, it is always safe to do the shift in A's
- // type, then zero extend or truncate to the result.
- Value *Shift = Builder.CreateLShr(A, Cst->getZExtValue());
- Shift->takeName(Src);
- return CastInst::CreateIntegerCast(Shift, DestTy, false);
- }
-
- const APInt *C;
- if (match(Src, m_LShr(m_SExt(m_Value(A)), m_APInt(C)))) {
+ Value *A;
+ Constant *C;
+ if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
unsigned AWidth = A->getType()->getScalarSizeInBits();
unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
+ auto *OldSh = cast<Instruction>(Src);
+ bool IsExact = OldSh->isExact();
// If the shift is small enough, all zero bits created by the shift are
// removed by the trunc.
- if (C->getZExtValue() <= MaxShiftAmt) {
+ if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
+ APInt(SrcWidth, MaxShiftAmt)))) {
// trunc (lshr (sext A), C) --> ashr A, C
if (A->getType() == DestTy) {
- unsigned ShAmt = std::min((unsigned)C->getZExtValue(), DestWidth - 1);
- return BinaryOperator::CreateAShr(A, ConstantInt::get(DestTy, ShAmt));
+ Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
+ Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
+ ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
+ ShAmt = Constant::mergeUndefsWith(ShAmt, C);
+ return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
+ : BinaryOperator::CreateAShr(A, ShAmt);
}
// The types are mismatched, so create a cast after shifting:
// trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
if (Src->hasOneUse()) {
- unsigned ShAmt = std::min((unsigned)C->getZExtValue(), AWidth - 1);
- Value *Shift = Builder.CreateAShr(A, ShAmt);
+ Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
+ Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
+ ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
+ Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
return CastInst::CreateIntegerCast(Shift, DestTy, true);
}
}
// TODO: Mask high bits with 'and'.
}
+ // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
+ if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) {
+ unsigned MaxShiftAmt = SrcWidth - DestWidth;
+
+ // If the shift is small enough, all zero/sign bits created by the shift are
+ // removed by the trunc.
+ if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
+ APInt(SrcWidth, MaxShiftAmt)))) {
+ auto *OldShift = cast<Instruction>(Src);
+ bool IsExact = OldShift->isExact();
+ auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
+ ShAmt = Constant::mergeUndefsWith(ShAmt, C);
+ Value *Shift =
+ OldShift->getOpcode() == Instruction::AShr
+ ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
+ : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
+ return CastInst::CreateTruncOrBitCast(Shift, DestTy);
+ }
+ }
+
if (Instruction *I = narrowBinOp(Trunc))
return I;
@@ -841,20 +876,19 @@ Instruction *InstCombiner::visitTrunc(TruncInst &Trunc) {
if (Instruction *I = shrinkInsertElt(Trunc, Builder))
return I;
- if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
- shouldChangeType(SrcTy, DestTy)) {
+ if (Src->hasOneUse() &&
+ (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
// Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
// dest type is native and cst < dest size.
- if (match(Src, m_Shl(m_Value(A), m_ConstantInt(Cst))) &&
+ if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
!match(A, m_Shr(m_Value(), m_Constant()))) {
// Skip shifts of shift by constants. It undoes a combine in
// FoldShiftByConstant and is the extend in reg pattern.
- if (Cst->getValue().ult(DestWidth)) {
+ APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
+ if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
-
- return BinaryOperator::Create(
- Instruction::Shl, NewTrunc,
- ConstantInt::get(DestTy, Cst->getValue().trunc(DestWidth)));
+ return BinaryOperator::Create(Instruction::Shl, NewTrunc,
+ ConstantExpr::getTrunc(C, DestTy));
}
}
}
@@ -871,21 +905,23 @@ Instruction *InstCombiner::visitTrunc(TruncInst &Trunc) {
// --->
// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
Value *VecOp;
+ ConstantInt *Cst;
if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
auto *VecOpTy = cast<VectorType>(VecOp->getType());
- unsigned VecNumElts = VecOpTy->getNumElements();
+ auto VecElts = VecOpTy->getElementCount();
// A badly fit destination size would result in an invalid cast.
if (SrcWidth % DestWidth == 0) {
uint64_t TruncRatio = SrcWidth / DestWidth;
- uint64_t BitCastNumElts = VecNumElts * TruncRatio;
+ uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
uint64_t VecOpIdx = Cst->getZExtValue();
uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
: VecOpIdx * TruncRatio;
assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
"overflow 32-bits");
- auto *BitCastTo = FixedVectorType::get(DestTy, BitCastNumElts);
+ auto *BitCastTo =
+ VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
}
@@ -894,8 +930,8 @@ Instruction *InstCombiner::visitTrunc(TruncInst &Trunc) {
return nullptr;
}
-Instruction *InstCombiner::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
- bool DoTransform) {
+Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
+ bool DoTransform) {
// If we are just checking for a icmp eq of a single bit and zext'ing it
// to an integer, then shift the bit to the appropriate place and then
// cast to integer to avoid the comparison.
@@ -1031,7 +1067,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
///
/// This function works on both vectors and scalars.
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
- InstCombiner &IC, Instruction *CxtI) {
+ InstCombinerImpl &IC, Instruction *CxtI) {
BitsToClear = 0;
if (canAlwaysEvaluateInType(V, Ty))
return true;
@@ -1136,7 +1172,7 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
}
}
-Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
+Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
// If this zero extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this zext.
if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
@@ -1234,6 +1270,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+ LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
(transformZExtICmp(LHS, CI, false) ||
transformZExtICmp(RHS, CI, false))) {
// zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
@@ -1274,7 +1311,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
}
/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
-Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
+Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI,
+ Instruction &CI) {
Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
ICmpInst::Predicate Pred = ICI->getPredicate();
@@ -1410,7 +1448,7 @@ static bool canEvaluateSExtd(Value *V, Type *Ty) {
return false;
}
-Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
// If this sign extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this sext.
if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
@@ -1473,31 +1511,33 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// for a truncate. If the source and dest are the same type, eliminate the
// trunc and extend and just do shifts. For example, turn:
// %a = trunc i32 %i to i8
- // %b = shl i8 %a, 6
- // %c = ashr i8 %b, 6
+ // %b = shl i8 %a, C
+ // %c = ashr i8 %b, C
// %d = sext i8 %c to i32
// into:
- // %a = shl i32 %i, 30
- // %d = ashr i32 %a, 30
+ // %a = shl i32 %i, 32-(8-C)
+ // %d = ashr i32 %a, 32-(8-C)
Value *A = nullptr;
// TODO: Eventually this could be subsumed by EvaluateInDifferentType.
Constant *BA = nullptr, *CA = nullptr;
if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
m_Constant(CA))) &&
- BA == CA && A->getType() == CI.getType()) {
- unsigned MidSize = Src->getType()->getScalarSizeInBits();
- unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
- Constant *SizeDiff = ConstantInt::get(CA->getType(), SrcDstSize - MidSize);
- Constant *ShAmt = ConstantExpr::getAdd(CA, SizeDiff);
- Constant *ShAmtExt = ConstantExpr::getSExt(ShAmt, CI.getType());
- A = Builder.CreateShl(A, ShAmtExt, CI.getName());
- return BinaryOperator::CreateAShr(A, ShAmtExt);
+ BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
+ Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
+ Constant *NumLowbitsLeft = ConstantExpr::getSub(
+ ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
+ Constant *NewShAmt = ConstantExpr::getSub(
+ ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
+ NumLowbitsLeft);
+ NewShAmt =
+ Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt, BA), CA);
+ A = Builder.CreateShl(A, NewShAmt, CI.getName());
+ return BinaryOperator::CreateAShr(A, NewShAmt);
}
return nullptr;
}
-
/// Return a Constant* for the specified floating-point constant if it fits
/// in the specified FP type without changing its value.
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
@@ -1535,7 +1575,7 @@ static Type *shrinkFPConstantVector(Value *V) {
Type *MinType = nullptr;
- unsigned NumElts = CVVTy->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CVVTy)->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
if (!CFP)
@@ -1616,7 +1656,7 @@ static bool isKnownExactCastIntToFP(CastInst &I) {
return false;
}
-Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
+Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
if (Instruction *I = commonCastTransforms(FPT))
return I;
@@ -1800,7 +1840,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
return nullptr;
}
-Instruction *InstCombiner::visitFPExt(CastInst &FPExt) {
+Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
// If the source operand is a cast from integer to FP and known exact, then
// cast the integer operand directly to the destination type.
Type *Ty = FPExt.getType();
@@ -1818,7 +1858,7 @@ Instruction *InstCombiner::visitFPExt(CastInst &FPExt) {
/// This is safe if the intermediate type has enough bits in its mantissa to
/// accurately represent all values of X. For example, this won't work with
/// i64 -> float -> i64.
-Instruction *InstCombiner::foldItoFPtoI(CastInst &FI) {
+Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
return nullptr;
@@ -1858,29 +1898,29 @@ Instruction *InstCombiner::foldItoFPtoI(CastInst &FI) {
return replaceInstUsesWith(FI, X);
}
-Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {
if (Instruction *I = foldItoFPtoI(FI))
return I;
return commonCastTransforms(FI);
}
-Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
if (Instruction *I = foldItoFPtoI(FI))
return I;
return commonCastTransforms(FI);
}
-Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
+Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
return commonCastTransforms(CI);
}
-Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
+Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
return commonCastTransforms(CI);
}
-Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
@@ -1903,7 +1943,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
}
/// Implement the transforms for cast of pointer (bitcast/ptrtoint)
-Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+Instruction *InstCombinerImpl::commonPointerCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
@@ -1925,26 +1965,37 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return commonCastTransforms(CI);
}
-Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
+Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
-
+ Value *SrcOp = CI.getPointerOperand();
Type *Ty = CI.getType();
unsigned AS = CI.getPointerAddressSpace();
+ unsigned TySize = Ty->getScalarSizeInBits();
+ unsigned PtrSize = DL.getPointerSizeInBits(AS);
+ if (TySize != PtrSize) {
+ Type *IntPtrTy = DL.getIntPtrType(CI.getContext(), AS);
+ // Handle vectors of pointers.
+ if (auto *VecTy = dyn_cast<VectorType>(Ty))
+ IntPtrTy = VectorType::get(IntPtrTy, VecTy->getElementCount());
- if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
- return commonPointerCastTransforms(CI);
+ Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
+ return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
+ }
- Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
- if (auto *VTy = dyn_cast<VectorType>(Ty)) {
- // Handle vectors of pointers.
- // FIXME: what should happen for scalable vectors?
- PtrTy = FixedVectorType::get(PtrTy, VTy->getNumElements());
+ Value *Vec, *Scalar, *Index;
+ if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),
+ m_Value(Scalar), m_Value(Index)))) &&
+ Vec->getType() == Ty) {
+ assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
+ // Convert the scalar to int followed by insert to eliminate one cast:
+ // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
+ Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
+ return InsertElementInst::Create(Vec, NewCast, Index);
}
- Value *P = Builder.CreatePtrToInt(CI.getOperand(0), PtrTy);
- return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
+ return commonPointerCastTransforms(CI);
}
/// This input value (which is known to have vector type) is being zero extended
@@ -1963,9 +2014,9 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
///
/// The source and destination vector types may have different element types.
-static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal,
- VectorType *DestTy,
- InstCombiner &IC) {
+static Instruction *
+optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,
+ InstCombinerImpl &IC) {
// We can only do this optimization if the output is a multiple of the input
// element size, or the input is a multiple of the output element size.
// Convert the input type to have the same element type as the output.
@@ -1981,13 +2032,14 @@ static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal,
return nullptr;
SrcTy =
- FixedVectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+ FixedVectorType::get(DestTy->getElementType(),
+ cast<FixedVectorType>(SrcTy)->getNumElements());
InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
}
bool IsBigEndian = IC.getDataLayout().isBigEndian();
- unsigned SrcElts = SrcTy->getNumElements();
- unsigned DestElts = DestTy->getNumElements();
+ unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
+ unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
assert(SrcElts != DestElts && "Element counts should be different.");
@@ -2165,8 +2217,8 @@ static bool collectInsertionElements(Value *V, unsigned Shift,
///
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
- InstCombiner &IC) {
- VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ InstCombinerImpl &IC) {
+ auto *DestVecTy = cast<FixedVectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
@@ -2194,7 +2246,7 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
/// vectors better than bitcasts of scalars because vector registers are
/// usually not type-specific like scalar integer or scalar floating-point.
static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
// TODO: Create and use a pattern matcher for ExtractElementInst.
auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
if (!ExtElt || !ExtElt->hasOneUse())
@@ -2206,8 +2258,7 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
if (!VectorType::isValidElementType(DestType))
return nullptr;
- unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
- auto *NewVecType = FixedVectorType::get(DestType, NumElts);
+ auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType());
auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(),
NewVecType, "bc");
return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
@@ -2270,12 +2321,11 @@ static Instruction *foldBitCastSelect(BitCastInst &BitCast,
// A vector select must maintain the same number of elements in its operands.
Type *CondTy = Cond->getType();
Type *DestTy = BitCast.getType();
- if (auto *CondVTy = dyn_cast<VectorType>(CondTy)) {
- if (!DestTy->isVectorTy())
+ if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
+ if (!DestTy->isVectorTy() ||
+ CondVTy->getElementCount() !=
+ cast<VectorType>(DestTy)->getElementCount())
return nullptr;
- if (cast<VectorType>(DestTy)->getNumElements() != CondVTy->getNumElements())
- return nullptr;
- }
// FIXME: This transform is restricted from changing the select between
// scalars and vectors to avoid backend problems caused by creating
@@ -2320,7 +2370,8 @@ static bool hasStoreUsersOnly(CastInst &CI) {
///
/// All the related PHI nodes can be replaced by new PHI nodes with type A.
/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
-Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
+Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
+ PHINode *PN) {
// BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
if (hasStoreUsersOnly(CI))
return nullptr;
@@ -2450,10 +2501,7 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
Instruction *RetVal = nullptr;
for (auto *OldPN : OldPhiNodes) {
PHINode *NewPN = NewPNodes[OldPN];
- for (auto It = OldPN->user_begin(), End = OldPN->user_end(); It != End; ) {
- User *V = *It;
- // We may remove this user, advance to avoid iterator invalidation.
- ++It;
+ for (User *V : make_early_inc_range(OldPN->users())) {
if (auto *SI = dyn_cast<StoreInst>(V)) {
assert(SI->isSimple() && SI->getOperand(0) == OldPN);
Builder.SetInsertPoint(SI);
@@ -2473,7 +2521,7 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
if (BCI == &CI)
RetVal = I;
} else if (auto *PHI = dyn_cast<PHINode>(V)) {
- assert(OldPhiNodes.count(PHI) > 0);
+ assert(OldPhiNodes.contains(PHI));
(void) PHI;
} else {
llvm_unreachable("all uses should be handled");
@@ -2484,7 +2532,7 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
return RetVal;
}
-Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
+Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
// otherwise just apply the common ones.
Value *Src = CI.getOperand(0);
@@ -2608,12 +2656,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// a bitcast to a vector with the same # elts.
Value *ShufOp0 = Shuf->getOperand(0);
Value *ShufOp1 = Shuf->getOperand(1);
- unsigned NumShufElts = Shuf->getType()->getNumElements();
- unsigned NumSrcVecElts =
- cast<VectorType>(ShufOp0->getType())->getNumElements();
+ auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
+ auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
- cast<VectorType>(DestTy)->getNumElements() == NumShufElts &&
- NumShufElts == NumSrcVecElts) {
+ cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
+ ShufElts == SrcVecElts) {
BitCastInst *Tmp;
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
@@ -2636,8 +2683,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// TODO: We should match the related pattern for bitreverse.
if (DestTy->isIntegerTy() &&
DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
- SrcTy->getScalarSizeInBits() == 8 && NumShufElts % 2 == 0 &&
- Shuf->hasOneUse() && Shuf->isReverse()) {
+ SrcTy->getScalarSizeInBits() == 8 &&
+ ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() &&
+ Shuf->isReverse()) {
assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
assert(isa<UndefValue>(ShufOp1) && "Unexpected shuffle op");
Function *Bswap =
@@ -2666,7 +2714,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
return commonCastTransforms(CI);
}
-Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
+Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
// If the destination pointer element type is not the same as the source's
// first do a bitcast to the destination type, and then the addrspacecast.
// This allows the cast to be exposed to other transforms.
@@ -2677,11 +2725,9 @@ Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
Type *DestElemTy = DestTy->getElementType();
if (SrcTy->getElementType() != DestElemTy) {
Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace());
- if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) {
- // Handle vectors of pointers.
- // FIXME: what should happen for scalable vectors?
- MidTy = FixedVectorType::get(MidTy, VT->getNumElements());
- }
+ // Handle vectors of pointers.
+ if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
+ MidTy = VectorType::get(MidTy, VT->getElementCount());
Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
return new AddrSpaceCastInst(NewBitCast, CI.getType());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index f1233b62445d..cd9a036179b6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
using namespace llvm;
using namespace PatternMatch;
@@ -95,45 +96,6 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
return false;
}
-/// Given a signed integer type and a set of known zero and one bits, compute
-/// the maximum and minimum values that could have the specified known zero and
-/// known one bits, returning them in Min/Max.
-/// TODO: Move to method on KnownBits struct?
-static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known,
- APInt &Min, APInt &Max) {
- assert(Known.getBitWidth() == Min.getBitWidth() &&
- Known.getBitWidth() == Max.getBitWidth() &&
- "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
- APInt UnknownBits = ~(Known.Zero|Known.One);
-
- // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
- // bit if it is unknown.
- Min = Known.One;
- Max = Known.One|UnknownBits;
-
- if (UnknownBits.isNegative()) { // Sign bit is unknown
- Min.setSignBit();
- Max.clearSignBit();
- }
-}
-
-/// Given an unsigned integer type and a set of known zero and one bits, compute
-/// the maximum and minimum values that could have the specified known zero and
-/// known one bits, returning them in Min/Max.
-/// TODO: Move to method on KnownBits struct?
-static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known,
- APInt &Min, APInt &Max) {
- assert(Known.getBitWidth() == Min.getBitWidth() &&
- Known.getBitWidth() == Max.getBitWidth() &&
- "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
- APInt UnknownBits = ~(Known.Zero|Known.One);
-
- // The minimum value is when the unknown bits are all zeros.
- Min = Known.One;
- // The maximum value is when the unknown bits are all ones.
- Max = Known.One|UnknownBits;
-}
-
/// This is called when we see this pattern:
/// cmp pred (load (gep GV, ...)), cmpcst
/// where GV is a global variable with a constant initializer. Try to simplify
@@ -142,10 +104,10 @@ static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known,
///
/// If AndCst is non-null, then the loaded value is masked with that constant
/// before doing the comparison. This handles cases like "A[i]&4 == 0".
-Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
- GlobalVariable *GV,
- CmpInst &ICI,
- ConstantInt *AndCst) {
+Instruction *
+InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+ GlobalVariable *GV, CmpInst &ICI,
+ ConstantInt *AndCst) {
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
@@ -313,7 +275,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
if (!GEP->isInBounds()) {
Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
- if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
+ if (Idx->getType()->getPrimitiveSizeInBits().getFixedSize() > PtrSize)
Idx = Builder.CreateTrunc(Idx, IntPtrTy);
}
@@ -422,7 +384,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
///
/// If we can't emit an optimized form for this expression, this returns null.
///
-static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
+static Value *evaluateGEPOffsetExpression(User *GEP, InstCombinerImpl &IC,
const DataLayout &DL) {
gep_type_iterator GTI = gep_type_begin(GEP);
@@ -486,7 +448,8 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
// Cast to intptrty in case a truncation occurs. If an extension is needed,
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
- if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
+ if (VariableIdx->getType()->getPrimitiveSizeInBits().getFixedSize() >
+ IntPtrWidth) {
VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
@@ -539,7 +502,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
Value *V = WorkList.back();
- if (Explored.count(V) != 0) {
+ if (Explored.contains(V)) {
WorkList.pop_back();
continue;
}
@@ -551,7 +514,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
return false;
if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
- auto *CI = dyn_cast<CastInst>(V);
+ auto *CI = cast<CastInst>(V);
if (!CI->isNoopCast(DL))
return false;
@@ -841,9 +804,9 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
/// Fold comparisons between a GEP instruction and something else. At this point
/// we know that the GEP is on the LHS of the comparison.
-Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
- ICmpInst::Predicate Cond,
- Instruction &I) {
+Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond,
+ Instruction &I) {
// Don't transform signed compares of GEPs into index compares. Even if the
// GEP is inbounds, the final add of the base pointer can have signed overflow
// and would change the result of the icmp.
@@ -897,8 +860,8 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// For vectors, we apply the same reasoning on a per-lane basis.
auto *Base = GEPLHS->getPointerOperand();
if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) {
- int NumElts = cast<VectorType>(GEPLHS->getType())->getNumElements();
- Base = Builder.CreateVectorSplat(NumElts, Base);
+ auto EC = cast<VectorType>(GEPLHS->getType())->getElementCount();
+ Base = Builder.CreateVectorSplat(EC, Base);
}
return new ICmpInst(Cond, Base,
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
@@ -941,8 +904,8 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
Type *LHSIndexTy = LOffset->getType();
Type *RHSIndexTy = ROffset->getType();
if (LHSIndexTy != RHSIndexTy) {
- if (LHSIndexTy->getPrimitiveSizeInBits() <
- RHSIndexTy->getPrimitiveSizeInBits()) {
+ if (LHSIndexTy->getPrimitiveSizeInBits().getFixedSize() <
+ RHSIndexTy->getPrimitiveSizeInBits().getFixedSize()) {
ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
} else
LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
@@ -1021,9 +984,9 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
}
-Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
- const AllocaInst *Alloca,
- const Value *Other) {
+Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI,
+ const AllocaInst *Alloca,
+ const Value *Other) {
assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
// It would be tempting to fold away comparisons between allocas and any
@@ -1099,8 +1062,8 @@ Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
}
/// Fold "icmp pred (X+C), X".
-Instruction *InstCombiner::foldICmpAddOpConst(Value *X, const APInt &C,
- ICmpInst::Predicate Pred) {
+Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C,
+ ICmpInst::Predicate Pred) {
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similarly for all other "or equals"
// operators.
@@ -1149,9 +1112,9 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, const APInt &C,
/// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
/// (icmp eq/ne A, Log2(AP2/AP1)) ->
/// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
-Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A,
- const APInt &AP1,
- const APInt &AP2) {
+Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
+ const APInt &AP1,
+ const APInt &AP2) {
assert(I.isEquality() && "Cannot fold icmp gt/lt");
auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
@@ -1208,9 +1171,9 @@ Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A,
/// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
/// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
-Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A,
- const APInt &AP1,
- const APInt &AP2) {
+Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A,
+ const APInt &AP1,
+ const APInt &AP2) {
assert(I.isEquality() && "Cannot fold icmp gt/lt");
auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
@@ -1254,7 +1217,7 @@ Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A,
///
static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
ConstantInt *CI2, ConstantInt *CI1,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
// The transformation we're trying to do here is to transform this into an
// llvm.sadd.with.overflow. To do this, we have to replace the original add
// with a narrower add, and discard the add-with-constant that is part of the
@@ -1340,7 +1303,7 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
/// icmp eq/ne (urem/srem %x, %y), 0
/// iff %y is a power-of-two, we can replace this with a bit test:
/// icmp eq/ne (and %x, (add %y, -1)), 0
-Instruction *InstCombiner::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) {
+Instruction *InstCombinerImpl::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) {
// This fold is only valid for equality predicates.
if (!I.isEquality())
return nullptr;
@@ -1359,7 +1322,7 @@ Instruction *InstCombiner::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) {
/// Fold equality-comparison between zero and any (maybe truncated) right-shift
/// by one-less-than-bitwidth into a sign test on the original value.
-Instruction *InstCombiner::foldSignBitTest(ICmpInst &I) {
+Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) {
Instruction *Val;
ICmpInst::Predicate Pred;
if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero())))
@@ -1390,7 +1353,7 @@ Instruction *InstCombiner::foldSignBitTest(ICmpInst &I) {
}
// Handle icmp pred X, 0
-Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
+Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
CmpInst::Predicate Pred = Cmp.getPredicate();
if (!match(Cmp.getOperand(1), m_Zero()))
return nullptr;
@@ -1431,7 +1394,7 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
/// should be moved to some other helper and extended as noted below (it is also
/// possible that code has been made unnecessary - do we canonicalize IR to
/// overflow/saturating intrinsics or not?).
-Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
+Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
// Match the following pattern, which is a common idiom when writing
// overflow-safe integer arithmetic functions. The source performs an addition
// in wider type and explicitly checks for overflow using comparisons against
@@ -1477,7 +1440,7 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
}
/// Canonicalize icmp instructions based on dominating conditions.
-Instruction *InstCombiner::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
+Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
// This is a cheap/incomplete check for dominance - just match a single
// predecessor with a conditional branch.
BasicBlock *CmpBB = Cmp.getParent();
@@ -1547,9 +1510,9 @@ Instruction *InstCombiner::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
}
/// Fold icmp (trunc X, Y), C.
-Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
- TruncInst *Trunc,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
+ TruncInst *Trunc,
+ const APInt &C) {
ICmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Trunc->getOperand(0);
if (C.isOneValue() && C.getBitWidth() > 1) {
@@ -1580,9 +1543,9 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
}
/// Fold icmp (xor X, Y), C.
-Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
- BinaryOperator *Xor,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp,
+ BinaryOperator *Xor,
+ const APInt &C) {
Value *X = Xor->getOperand(0);
Value *Y = Xor->getOperand(1);
const APInt *XorC;
@@ -1612,15 +1575,13 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
if (Xor->hasOneUse()) {
// (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
if (!Cmp.isEquality() && XorC->isSignMask()) {
- Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
- : Cmp.getSignedPredicate();
+ Pred = Cmp.getFlippedSignednessPredicate();
return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
}
// (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
- Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
- : Cmp.getSignedPredicate();
+ Pred = Cmp.getFlippedSignednessPredicate();
Pred = Cmp.getSwappedPredicate(Pred);
return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
}
@@ -1649,8 +1610,10 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
}
/// Fold icmp (and (sh X, Y), C2), C1.
-Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
- const APInt &C1, const APInt &C2) {
+Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp,
+ BinaryOperator *And,
+ const APInt &C1,
+ const APInt &C2) {
BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
if (!Shift || !Shift->isShift())
return nullptr;
@@ -1733,9 +1696,9 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
}
/// Fold icmp (and X, C2), C1.
-Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
- BinaryOperator *And,
- const APInt &C1) {
+Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
+ BinaryOperator *And,
+ const APInt &C1) {
bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE;
// For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
@@ -1841,9 +1804,9 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
}
/// Fold icmp (and X, Y), C.
-Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
- BinaryOperator *And,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
+ BinaryOperator *And,
+ const APInt &C) {
if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
return I;
@@ -1883,7 +1846,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
if (auto *AndVTy = dyn_cast<VectorType>(And->getType()))
- NTy = FixedVectorType::get(NTy, AndVTy->getNumElements());
+ NTy = VectorType::get(NTy, AndVTy->getElementCount());
Value *Trunc = Builder.CreateTrunc(X, NTy);
auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
: CmpInst::ICMP_SLT;
@@ -1895,8 +1858,9 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
}
/// Fold icmp (or X, Y), C.
-Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
+ BinaryOperator *Or,
+ const APInt &C) {
ICmpInst::Predicate Pred = Cmp.getPredicate();
if (C.isOneValue()) {
// icmp slt signum(V) 1 --> icmp slt V, 1
@@ -1960,9 +1924,9 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
}
/// Fold icmp (mul X, Y), C.
-Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp,
- BinaryOperator *Mul,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
+ BinaryOperator *Mul,
+ const APInt &C) {
const APInt *MulC;
if (!match(Mul->getOperand(1), m_APInt(MulC)))
return nullptr;
@@ -1977,6 +1941,21 @@ Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp,
Constant::getNullValue(Mul->getType()));
}
+ // If the multiply does not wrap, try to divide the compare constant by the
+ // multiplication factor.
+ if (Cmp.isEquality() && !MulC->isNullValue()) {
+ // (mul nsw X, MulC) == C --> X == C /s MulC
+ if (Mul->hasNoSignedWrap() && C.srem(*MulC).isNullValue()) {
+ Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC));
+ return new ICmpInst(Pred, Mul->getOperand(0), NewC);
+ }
+ // (mul nuw X, MulC) == C --> X == C /u MulC
+ if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isNullValue()) {
+ Constant *NewC = ConstantInt::get(Mul->getType(), C.udiv(*MulC));
+ return new ICmpInst(Pred, Mul->getOperand(0), NewC);
+ }
+ }
+
return nullptr;
}
@@ -2043,9 +2022,9 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
}
/// Fold icmp (shl X, Y), C.
-Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
- BinaryOperator *Shl,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
+ BinaryOperator *Shl,
+ const APInt &C) {
const APInt *ShiftVal;
if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
@@ -2173,7 +2152,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
DL.isLegalInteger(TypeBits - Amt)) {
Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
if (auto *ShVTy = dyn_cast<VectorType>(ShType))
- TruncTy = FixedVectorType::get(TruncTy, ShVTy->getNumElements());
+ TruncTy = VectorType::get(TruncTy, ShVTy->getElementCount());
Constant *NewC =
ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt));
return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
@@ -2183,9 +2162,9 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
}
/// Fold icmp ({al}shr X, Y), C.
-Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
- BinaryOperator *Shr,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
+ BinaryOperator *Shr,
+ const APInt &C) {
// An exact shr only shifts out zero bits, so:
// icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
Value *X = Shr->getOperand(0);
@@ -2231,6 +2210,21 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
(ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
}
+
+ // If the compare constant has significant bits above the lowest sign-bit,
+ // then convert an unsigned cmp to a test of the sign-bit:
+ // (ashr X, ShiftC) u> C --> X s< 0
+ // (ashr X, ShiftC) u< C --> X s> -1
+ if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) {
+ if (Pred == CmpInst::ICMP_UGT) {
+ return new ICmpInst(CmpInst::ICMP_SLT, X,
+ ConstantInt::getNullValue(ShrTy));
+ }
+ if (Pred == CmpInst::ICMP_ULT) {
+ return new ICmpInst(CmpInst::ICMP_SGT, X,
+ ConstantInt::getAllOnesValue(ShrTy));
+ }
+ }
} else {
if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
// icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
@@ -2276,9 +2270,9 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
return nullptr;
}
-Instruction *InstCombiner::foldICmpSRemConstant(ICmpInst &Cmp,
- BinaryOperator *SRem,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
+ BinaryOperator *SRem,
+ const APInt &C) {
// Match an 'is positive' or 'is negative' comparison of remainder by a
// constant power-of-2 value:
// (X % pow2C) sgt/slt 0
@@ -2315,9 +2309,9 @@ Instruction *InstCombiner::foldICmpSRemConstant(ICmpInst &Cmp,
}
/// Fold icmp (udiv X, Y), C.
-Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
- BinaryOperator *UDiv,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp,
+ BinaryOperator *UDiv,
+ const APInt &C) {
const APInt *C2;
if (!match(UDiv->getOperand(0), m_APInt(C2)))
return nullptr;
@@ -2344,9 +2338,9 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
}
/// Fold icmp ({su}div X, Y), C.
-Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
- BinaryOperator *Div,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
+ BinaryOperator *Div,
+ const APInt &C) {
// Fold: icmp pred ([us]div X, C2), C -> range test
// Fold this div into the comparison, producing a range check.
// Determine, based on the divide type, what the range is being
@@ -2514,9 +2508,9 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
}
/// Fold icmp (sub X, Y), C.
-Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
- BinaryOperator *Sub,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
+ BinaryOperator *Sub,
+ const APInt &C) {
Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
ICmpInst::Predicate Pred = Cmp.getPredicate();
const APInt *C2;
@@ -2576,9 +2570,9 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
}
/// Fold icmp (add X, Y), C.
-Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
- BinaryOperator *Add,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
+ BinaryOperator *Add,
+ const APInt &C) {
Value *Y = Add->getOperand(1);
const APInt *C2;
if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
@@ -2642,10 +2636,10 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
return nullptr;
}
-bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
- Value *&RHS, ConstantInt *&Less,
- ConstantInt *&Equal,
- ConstantInt *&Greater) {
+bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
+ Value *&RHS, ConstantInt *&Less,
+ ConstantInt *&Equal,
+ ConstantInt *&Greater) {
// TODO: Generalize this to work with other comparison idioms or ensure
// they get canonicalized into this form.
@@ -2682,7 +2676,8 @@ bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
if (PredB == ICmpInst::ICMP_SGT && isa<Constant>(RHS2)) {
// x sgt C-1 <--> x sge C <--> not(x slt C)
auto FlippedStrictness =
- getFlippedStrictnessPredicateAndConstant(PredB, cast<Constant>(RHS2));
+ InstCombiner::getFlippedStrictnessPredicateAndConstant(
+ PredB, cast<Constant>(RHS2));
if (!FlippedStrictness)
return false;
assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check");
@@ -2694,9 +2689,9 @@ bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
return PredB == ICmpInst::ICMP_SLT && RHS == RHS2;
}
-Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
- SelectInst *Select,
- ConstantInt *C) {
+Instruction *InstCombinerImpl::foldICmpSelectConstant(ICmpInst &Cmp,
+ SelectInst *Select,
+ ConstantInt *C) {
assert(C && "Cmp RHS should be a constant int!");
// If we're testing a constant value against the result of a three way
@@ -2794,7 +2789,7 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp,
const APInt *C;
bool TrueIfSigned;
if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() &&
- isSignBitCheck(Pred, *C, TrueIfSigned)) {
+ InstCombiner::isSignBitCheck(Pred, *C, TrueIfSigned)) {
if (match(BCSrcOp, m_FPExt(m_Value(X))) ||
match(BCSrcOp, m_FPTrunc(m_Value(X)))) {
// (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0
@@ -2806,7 +2801,7 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp,
Type *NewType = Builder.getIntNTy(XType->getScalarSizeInBits());
if (auto *XVTy = dyn_cast<VectorType>(XType))
- NewType = FixedVectorType::get(NewType, XVTy->getNumElements());
+ NewType = VectorType::get(NewType, XVTy->getElementCount());
Value *NewBitcast = Builder.CreateBitCast(X, NewType);
if (TrueIfSigned)
return new ICmpInst(ICmpInst::ICMP_SLT, NewBitcast,
@@ -2870,7 +2865,7 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp,
/// Try to fold integer comparisons with a constant operand: icmp Pred X, C
/// where X is some kind of instruction.
-Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
+Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) {
const APInt *C;
if (!match(Cmp.getOperand(1), m_APInt(C)))
return nullptr;
@@ -2955,9 +2950,8 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
/// Fold an icmp equality instruction with binary operator LHS and constant RHS:
/// icmp eq/ne BO, C.
-Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
- BinaryOperator *BO,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
+ ICmpInst &Cmp, BinaryOperator *BO, const APInt &C) {
// TODO: Some of these folds could work with arbitrary constants, but this
// function is limited to scalar and vector splat constants.
if (!Cmp.isEquality())
@@ -3047,17 +3041,6 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
}
break;
}
- case Instruction::Mul:
- if (C.isNullValue() && BO->hasNoSignedWrap()) {
- const APInt *BOC;
- if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) {
- // The trivial case (mul X, 0) is handled by InstSimplify.
- // General case : (mul X, C) != 0 iff X != 0
- // (mul X, C) == 0 iff X == 0
- return new ICmpInst(Pred, BOp0, Constant::getNullValue(RHS->getType()));
- }
- }
- break;
case Instruction::UDiv:
if (C.isNullValue()) {
// (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
@@ -3072,12 +3055,19 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
}
/// Fold an equality icmp with LLVM intrinsic and constant operand.
-Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
- IntrinsicInst *II,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
+ ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) {
Type *Ty = II->getType();
unsigned BitWidth = C.getBitWidth();
switch (II->getIntrinsicID()) {
+ case Intrinsic::abs:
+ // abs(A) == 0 -> A == 0
+ // abs(A) == INT_MIN -> A == INT_MIN
+ if (C.isNullValue() || C.isMinSignedValue())
+ return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
+ ConstantInt::get(Ty, C));
+ break;
+
case Intrinsic::bswap:
// bswap(A) == C -> A == bswap(C)
return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
@@ -3145,18 +3135,31 @@ Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
}
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
-Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
- IntrinsicInst *II,
- const APInt &C) {
+Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
+ IntrinsicInst *II,
+ const APInt &C) {
if (Cmp.isEquality())
return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
Type *Ty = II->getType();
unsigned BitWidth = C.getBitWidth();
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
switch (II->getIntrinsicID()) {
+ case Intrinsic::ctpop: {
+ // (ctpop X > BitWidth - 1) --> X == -1
+ Value *X = II->getArgOperand(0);
+ if (C == BitWidth - 1 && Pred == ICmpInst::ICMP_UGT)
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, X,
+ ConstantInt::getAllOnesValue(Ty));
+ // (ctpop X < BitWidth) --> X != -1
+ if (C == BitWidth && Pred == ICmpInst::ICMP_ULT)
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, X,
+ ConstantInt::getAllOnesValue(Ty));
+ break;
+ }
case Intrinsic::ctlz: {
// ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
- if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+ if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
unsigned Num = C.getLimitedValue();
APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
@@ -3164,8 +3167,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
}
// ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
- if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
- C.uge(1) && C.ule(BitWidth)) {
+ if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) {
unsigned Num = C.getLimitedValue();
APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
@@ -3179,7 +3181,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
return nullptr;
// cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
- if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+ if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
Builder.CreateAnd(II->getArgOperand(0), Mask),
@@ -3187,8 +3189,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
}
// cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
- if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
- C.uge(1) && C.ule(BitWidth)) {
+ if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) {
APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
Builder.CreateAnd(II->getArgOperand(0), Mask),
@@ -3204,7 +3205,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
}
/// Handle icmp with constant (but not simple integer constant) RHS.
-Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
+Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Constant *RHSC = dyn_cast<Constant>(Op1);
Instruction *LHSI = dyn_cast<Instruction>(Op0);
@@ -3383,8 +3384,8 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
// those elements by copying an existing, defined, and safe scalar constant.
Type *OpTy = M->getType();
auto *VecC = dyn_cast<Constant>(M);
- if (OpTy->isVectorTy() && VecC && VecC->containsUndefElement()) {
- auto *OpVTy = cast<VectorType>(OpTy);
+ auto *OpVTy = dyn_cast<FixedVectorType>(OpTy);
+ if (OpVTy && VecC && VecC->containsUndefOrPoisonElement()) {
Constant *SafeReplacementConstant = nullptr;
for (unsigned i = 0, e = OpVTy->getNumElements(); i != e; ++i) {
if (!isa<UndefValue>(VecC->getAggregateElement(i))) {
@@ -3650,7 +3651,7 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
/// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit
/// Note that the comparison is commutative, while inverted (u>=, ==) predicate
/// will mean that we are looking for the opposite answer.
-Value *InstCombiner::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
+Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
ICmpInst::Predicate Pred;
Value *X, *Y;
Instruction *Mul;
@@ -3712,11 +3713,28 @@ Value *InstCombiner::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
return Res;
}
+static Instruction *foldICmpXNegX(ICmpInst &I) {
+ CmpInst::Predicate Pred;
+ Value *X;
+ if (!match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X))))
+ return nullptr;
+
+ if (ICmpInst::isSigned(Pred))
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ else if (ICmpInst::isUnsigned(Pred))
+ Pred = ICmpInst::getSignedPredicate(Pred);
+ // else for equality-comparisons just keep the predicate.
+
+ return ICmpInst::Create(Instruction::ICmp, Pred, X,
+ Constant::getNullValue(X->getType()), I.getName());
+}
+
/// Try to fold icmp (binop), X or icmp X, (binop).
/// TODO: A large part of this logic is duplicated in InstSimplify's
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
/// duplication.
-Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I, const SimplifyQuery &SQ) {
+Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
+ const SimplifyQuery &SQ) {
const SimplifyQuery Q = SQ.getWithInstruction(&I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -3726,6 +3744,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I, const SimplifyQuery &SQ) {
if (!BO0 && !BO1)
return nullptr;
+ if (Instruction *NewICmp = foldICmpXNegX(I))
+ return NewICmp;
+
const CmpInst::Predicate Pred = I.getPredicate();
Value *X;
@@ -3946,6 +3967,19 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I, const SimplifyQuery &SQ) {
ConstantExpr::getNeg(RHSC));
}
+ {
+ // Try to remove shared constant multiplier from equality comparison:
+ // X * C == Y * C (with no overflowing/aliasing) --> X == Y
+ Value *X, *Y;
+ const APInt *C;
+ if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 &&
+ match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality())
+ if (!C->countTrailingZeros() ||
+ (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) ||
+ (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap()))
+ return new ICmpInst(Pred, X, Y);
+ }
+
BinaryOperator *SRem = nullptr;
// icmp (srem X, Y), Y
if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1))
@@ -3990,15 +4024,13 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I, const SimplifyQuery &SQ) {
if (match(BO0->getOperand(1), m_APInt(C))) {
// icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
if (C->isSignMask()) {
- ICmpInst::Predicate NewPred =
- I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
+ ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate();
return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
}
// icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) {
- ICmpInst::Predicate NewPred =
- I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
+ ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate();
NewPred = I.getSwappedPredicate(NewPred);
return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
}
@@ -4022,10 +4054,6 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I, const SimplifyQuery &SQ) {
Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
return new ICmpInst(Pred, And1, And2);
}
- // If there are no trailing zeros in the multiplier, just eliminate
- // the multiplies (no masking is needed):
- // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y
- return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
}
break;
}
@@ -4170,7 +4198,7 @@ static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) {
return nullptr;
}
-Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
+Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
if (!I.isEquality())
return nullptr;
@@ -4438,7 +4466,7 @@ static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp,
}
/// Handle icmp (cast x), (cast or constant).
-Instruction *InstCombiner::foldICmpWithCastOp(ICmpInst &ICmp) {
+Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {
auto *CastOp0 = dyn_cast<CastInst>(ICmp.getOperand(0));
if (!CastOp0)
return nullptr;
@@ -4493,9 +4521,10 @@ static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) {
}
}
-OverflowResult InstCombiner::computeOverflow(
- Instruction::BinaryOps BinaryOp, bool IsSigned,
- Value *LHS, Value *RHS, Instruction *CxtI) const {
+OverflowResult
+InstCombinerImpl::computeOverflow(Instruction::BinaryOps BinaryOp,
+ bool IsSigned, Value *LHS, Value *RHS,
+ Instruction *CxtI) const {
switch (BinaryOp) {
default:
llvm_unreachable("Unsupported binary op");
@@ -4517,9 +4546,11 @@ OverflowResult InstCombiner::computeOverflow(
}
}
-bool InstCombiner::OptimizeOverflowCheck(
- Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS,
- Instruction &OrigI, Value *&Result, Constant *&Overflow) {
+bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp,
+ bool IsSigned, Value *LHS,
+ Value *RHS, Instruction &OrigI,
+ Value *&Result,
+ Constant *&Overflow) {
if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
std::swap(LHS, RHS);
@@ -4529,9 +4560,13 @@ bool InstCombiner::OptimizeOverflowCheck(
// compare.
Builder.SetInsertPoint(&OrigI);
+ Type *OverflowTy = Type::getInt1Ty(LHS->getContext());
+ if (auto *LHSTy = dyn_cast<VectorType>(LHS->getType()))
+ OverflowTy = VectorType::get(OverflowTy, LHSTy->getElementCount());
+
if (isNeutralValue(BinaryOp, RHS)) {
Result = LHS;
- Overflow = Builder.getFalse();
+ Overflow = ConstantInt::getFalse(OverflowTy);
return true;
}
@@ -4542,12 +4577,12 @@ bool InstCombiner::OptimizeOverflowCheck(
case OverflowResult::AlwaysOverflowsHigh:
Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
Result->takeName(&OrigI);
- Overflow = Builder.getTrue();
+ Overflow = ConstantInt::getTrue(OverflowTy);
return true;
case OverflowResult::NeverOverflows:
Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
Result->takeName(&OrigI);
- Overflow = Builder.getFalse();
+ Overflow = ConstantInt::getFalse(OverflowTy);
if (auto *Inst = dyn_cast<Instruction>(Result)) {
if (IsSigned)
Inst->setHasNoSignedWrap();
@@ -4575,7 +4610,8 @@ bool InstCombiner::OptimizeOverflowCheck(
/// \returns Instruction which must replace the compare instruction, NULL if no
/// replacement required.
static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
- Value *OtherVal, InstCombiner &IC) {
+ Value *OtherVal,
+ InstCombinerImpl &IC) {
// Don't bother doing this transformation for pointers, don't do it for
// vectors.
if (!isa<IntegerType>(MulVal->getType()))
@@ -4723,15 +4759,14 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
Function *F = Intrinsic::getDeclaration(
I.getModule(), Intrinsic::umul_with_overflow, MulType);
CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
- IC.Worklist.push(MulInstr);
+ IC.addToWorklist(MulInstr);
// If there are uses of mul result other than the comparison, we know that
// they are truncation or binary AND. Change them to use result of
// mul.with.overflow and adjust properly mask/size.
if (MulVal->hasNUsesOrMore(2)) {
Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
- for (auto UI = MulVal->user_begin(), UE = MulVal->user_end(); UI != UE;) {
- User *U = *UI++;
+ for (User *U : make_early_inc_range(MulVal->users())) {
if (U == &I || U == OtherVal)
continue;
if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
@@ -4750,11 +4785,11 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
} else {
llvm_unreachable("Unexpected Binary operation");
}
- IC.Worklist.push(cast<Instruction>(U));
+ IC.addToWorklist(cast<Instruction>(U));
}
}
if (isa<Instruction>(OtherVal))
- IC.Worklist.push(cast<Instruction>(OtherVal));
+ IC.addToWorklist(cast<Instruction>(OtherVal));
// The original icmp gets replaced with the overflow value, maybe inverted
// depending on predicate.
@@ -4799,7 +4834,7 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
// If this is a normal comparison, it demands all bits. If it is a sign bit
// comparison, it only demands the sign bit.
bool UnusedBit;
- if (isSignBitCheck(I.getPredicate(), *RHS, UnusedBit))
+ if (InstCombiner::isSignBitCheck(I.getPredicate(), *RHS, UnusedBit))
return APInt::getSignMask(BitWidth);
switch (I.getPredicate()) {
@@ -4856,9 +4891,9 @@ static bool swapMayExposeCSEOpportunities(const Value *Op0, const Value *Op1) {
/// \return true when \p UI is the only use of \p DI in the parent block
/// and all other uses of \p DI are in blocks dominated by \p DB.
///
-bool InstCombiner::dominatesAllUses(const Instruction *DI,
- const Instruction *UI,
- const BasicBlock *DB) const {
+bool InstCombinerImpl::dominatesAllUses(const Instruction *DI,
+ const Instruction *UI,
+ const BasicBlock *DB) const {
assert(DI && UI && "Instruction not defined\n");
// Ignore incomplete definitions.
if (!DI->getParent())
@@ -4931,9 +4966,9 @@ static bool isChainSelectCmpBranch(const SelectInst *SI) {
/// major restriction since a NE compare should be 'normalized' to an equal
/// compare, which usually happens in the combiner and test case
/// select-cmp-br.ll checks for it.
-bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
- const ICmpInst *Icmp,
- const unsigned SIOpd) {
+bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI,
+ const ICmpInst *Icmp,
+ const unsigned SIOpd) {
assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
@@ -4959,7 +4994,7 @@ bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
/// Try to fold the comparison based on range information we can get by checking
/// whether bits are known to be zero or one in the inputs.
-Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
+Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = Op0->getType();
ICmpInst::Predicate Pred = I.getPredicate();
@@ -4990,11 +5025,15 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
if (I.isSigned()) {
- computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
- computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
+ Op0Min = Op0Known.getSignedMinValue();
+ Op0Max = Op0Known.getSignedMaxValue();
+ Op1Min = Op1Known.getSignedMinValue();
+ Op1Max = Op1Known.getSignedMaxValue();
} else {
- computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
- computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
+ Op0Min = Op0Known.getMinValue();
+ Op0Max = Op0Known.getMaxValue();
+ Op1Min = Op1Known.getMinValue();
+ Op1Max = Op1Known.getMaxValue();
}
// If Min and Max are known to be the same, then SimplifyDemandedBits figured
@@ -5012,11 +5051,9 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
llvm_unreachable("Unknown icmp opcode!");
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_NE: {
- if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) {
- return Pred == CmpInst::ICMP_EQ
- ? replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()))
- : replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
- }
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return replaceInstUsesWith(
+ I, ConstantInt::getBool(I.getType(), Pred == CmpInst::ICMP_NE));
// If all bits are known zero except for one, then we know at most one bit
// is set. If the comparison is against zero, then this is a check to see if
@@ -5186,8 +5223,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
}
llvm::Optional<std::pair<CmpInst::Predicate, Constant *>>
-llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
- Constant *C) {
+InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
+ Constant *C) {
assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
"Only for relational integer predicates.");
@@ -5209,8 +5246,8 @@ llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
// Bail out if the constant can't be safely incremented/decremented.
if (!ConstantIsOk(CI))
return llvm::None;
- } else if (auto *VTy = dyn_cast<VectorType>(Type)) {
- unsigned NumElts = VTy->getNumElements();
+ } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) {
+ unsigned NumElts = FVTy->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = C->getAggregateElement(i);
if (!Elt)
@@ -5236,7 +5273,8 @@ llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
// It may not be safe to change a compare predicate in the presence of
// undefined elements, so replace those elements with the first safe constant
// that we found.
- if (C->containsUndefElement()) {
+ // TODO: in case of poison, it is safe; let's replace undefs only.
+ if (C->containsUndefOrPoisonElement()) {
assert(SafeReplacementConstant && "Replacement constant not set");
C = Constant::replaceUndefsWith(C, SafeReplacementConstant);
}
@@ -5256,7 +5294,7 @@ llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
ICmpInst::Predicate Pred = I.getPredicate();
if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) ||
- isCanonicalPredicate(Pred))
+ InstCombiner::isCanonicalPredicate(Pred))
return nullptr;
Value *Op0 = I.getOperand(0);
@@ -5265,7 +5303,8 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
if (!Op1C)
return nullptr;
- auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, Op1C);
+ auto FlippedStrictness =
+ InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, Op1C);
if (!FlippedStrictness)
return nullptr;
@@ -5274,14 +5313,14 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
/// If we have a comparison with a non-canonical predicate, if we can update
/// all the users, invert the predicate and adjust all the users.
-static CmpInst *canonicalizeICmpPredicate(CmpInst &I) {
+CmpInst *InstCombinerImpl::canonicalizeICmpPredicate(CmpInst &I) {
// Is the predicate already canonical?
CmpInst::Predicate Pred = I.getPredicate();
- if (isCanonicalPredicate(Pred))
+ if (InstCombiner::isCanonicalPredicate(Pred))
return nullptr;
// Can all users be adjusted to predicate inversion?
- if (!canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr))
+ if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr))
return nullptr;
// Ok, we can canonicalize comparison!
@@ -5289,26 +5328,8 @@ static CmpInst *canonicalizeICmpPredicate(CmpInst &I) {
I.setPredicate(CmpInst::getInversePredicate(Pred));
I.setName(I.getName() + ".not");
- // And now let's adjust every user.
- for (User *U : I.users()) {
- switch (cast<Instruction>(U)->getOpcode()) {
- case Instruction::Select: {
- auto *SI = cast<SelectInst>(U);
- SI->swapValues();
- SI->swapProfMetadata();
- break;
- }
- case Instruction::Br:
- cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
- break;
- case Instruction::Xor:
- U->replaceAllUsesWith(&I);
- break;
- default:
- llvm_unreachable("Got unexpected user - out of sync with "
- "canFreelyInvertAllUsersOf() ?");
- }
- }
+ // And, adapt users.
+ freelyInvertAllUsersOf(&I);
return &I;
}
@@ -5510,7 +5531,7 @@ static Instruction *foldICmpOfUAddOv(ICmpInst &I) {
return ExtractValueInst::Create(UAddOv, 1);
}
-Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
+Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -5634,10 +5655,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// Try to optimize equality comparisons against alloca-based pointers.
if (Op0->getType()->isPointerTy() && I.isEquality()) {
assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
- if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
+ if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op0)))
if (Instruction *New = foldAllocaCmp(I, Alloca, Op1))
return New;
- if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
+ if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op1)))
if (Instruction *New = foldAllocaCmp(I, Alloca, Op0))
return New;
}
@@ -5748,8 +5769,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
/// Fold fcmp ([us]itofp x, cst) if possible.
-Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
- Constant *RHSC) {
+Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I,
+ Instruction *LHSI,
+ Constant *RHSC) {
if (!isa<ConstantFP>(RHSC)) return nullptr;
const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
@@ -6034,9 +6056,9 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
}
/// Optimize fabs(X) compared with zero.
-static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombiner &IC) {
+static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) {
Value *X;
- if (!match(I.getOperand(0), m_Intrinsic<Intrinsic::fabs>(m_Value(X))) ||
+ if (!match(I.getOperand(0), m_FAbs(m_Value(X))) ||
!match(I.getOperand(1), m_PosZeroFP()))
return nullptr;
@@ -6096,7 +6118,7 @@ static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombiner &IC) {
}
}
-Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
+Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
bool Changed = false;
/// Orders the operands of the compare so that they are listed from most
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index ca51f37af4d9..79e9d5c46c70 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -15,40 +15,32 @@
#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
-#include <cstdint>
#define DEBUG_TYPE "instcombine"
using namespace llvm::PatternMatch;
+// As a default, let's assume that we want to be aggressive,
+// and attempt to traverse with no limits in attempt to sink negation.
+static constexpr unsigned NegatorDefaultMaxDepth = ~0U;
+
+// Let's guesstimate that most often we will end up visiting/producing
+// fairly small number of new instructions.
+static constexpr unsigned NegatorMaxNodesSSO = 16;
+
namespace llvm {
class AAResults;
@@ -65,305 +57,26 @@ class ProfileSummaryInfo;
class TargetLibraryInfo;
class User;
-/// Assign a complexity or rank value to LLVM Values. This is used to reduce
-/// the amount of pattern matching needed for compares and commutative
-/// instructions. For example, if we have:
-/// icmp ugt X, Constant
-/// or
-/// xor (add X, Constant), cast Z
-///
-/// We do not have to consider the commuted variants of these patterns because
-/// canonicalization based on complexity guarantees the above ordering.
-///
-/// This routine maps IR values to various complexity ranks:
-/// 0 -> undef
-/// 1 -> Constants
-/// 2 -> Other non-instructions
-/// 3 -> Arguments
-/// 4 -> Cast and (f)neg/not instructions
-/// 5 -> Other instructions
-static inline unsigned getComplexity(Value *V) {
- if (isa<Instruction>(V)) {
- if (isa<CastInst>(V) || match(V, m_Neg(m_Value())) ||
- match(V, m_Not(m_Value())) || match(V, m_FNeg(m_Value())))
- return 4;
- return 5;
- }
- if (isa<Argument>(V))
- return 3;
- return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
-}
-
-/// Predicate canonicalization reduces the number of patterns that need to be
-/// matched by other transforms. For example, we may swap the operands of a
-/// conditional branch or select to create a compare with a canonical (inverted)
-/// predicate which is then more likely to be matched with other values.
-static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) {
- switch (Pred) {
- case CmpInst::ICMP_NE:
- case CmpInst::ICMP_ULE:
- case CmpInst::ICMP_SLE:
- case CmpInst::ICMP_UGE:
- case CmpInst::ICMP_SGE:
- // TODO: There are 16 FCMP predicates. Should others be (not) canonical?
- case CmpInst::FCMP_ONE:
- case CmpInst::FCMP_OLE:
- case CmpInst::FCMP_OGE:
- return false;
- default:
- return true;
- }
-}
-
-/// Given an exploded icmp instruction, return true if the comparison only
-/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the
-/// result of the comparison is true when the input value is signed.
-inline bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
- bool &TrueIfSigned) {
- switch (Pred) {
- case ICmpInst::ICMP_SLT: // True if LHS s< 0
- TrueIfSigned = true;
- return RHS.isNullValue();
- case ICmpInst::ICMP_SLE: // True if LHS s<= -1
- TrueIfSigned = true;
- return RHS.isAllOnesValue();
- case ICmpInst::ICMP_SGT: // True if LHS s> -1
- TrueIfSigned = false;
- return RHS.isAllOnesValue();
- case ICmpInst::ICMP_SGE: // True if LHS s>= 0
- TrueIfSigned = false;
- return RHS.isNullValue();
- case ICmpInst::ICMP_UGT:
- // True if LHS u> RHS and RHS == sign-bit-mask - 1
- TrueIfSigned = true;
- return RHS.isMaxSignedValue();
- case ICmpInst::ICMP_UGE:
- // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
- TrueIfSigned = true;
- return RHS.isMinSignedValue();
- case ICmpInst::ICMP_ULT:
- // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
- TrueIfSigned = false;
- return RHS.isMinSignedValue();
- case ICmpInst::ICMP_ULE:
- // True if LHS u<= RHS and RHS == sign-bit-mask - 1
- TrueIfSigned = false;
- return RHS.isMaxSignedValue();
- default:
- return false;
- }
-}
-
-llvm::Optional<std::pair<CmpInst::Predicate, Constant *>>
-getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C);
-
-/// Return the source operand of a potentially bitcasted value while optionally
-/// checking if it has one use. If there is no bitcast or the one use check is
-/// not met, return the input value itself.
-static inline Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) {
- if (auto *BitCast = dyn_cast<BitCastInst>(V))
- if (!OneUseOnly || BitCast->hasOneUse())
- return BitCast->getOperand(0);
-
- // V is not a bitcast or V has more than one use and OneUseOnly is true.
- return V;
-}
-
-/// Add one to a Constant
-static inline Constant *AddOne(Constant *C) {
- return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
-}
-
-/// Subtract one from a Constant
-static inline Constant *SubOne(Constant *C) {
- return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
-}
-
-/// Return true if the specified value is free to invert (apply ~ to).
-/// This happens in cases where the ~ can be eliminated. If WillInvertAllUses
-/// is true, work under the assumption that the caller intends to remove all
-/// uses of V and only keep uses of ~V.
-///
-/// See also: canFreelyInvertAllUsersOf()
-static inline bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
- // ~(~(X)) -> X.
- if (match(V, m_Not(m_Value())))
- return true;
-
- // Constants can be considered to be not'ed values.
- if (match(V, m_AnyIntegralConstant()))
- return true;
-
- // Compares can be inverted if all of their uses are being modified to use the
- // ~V.
- if (isa<CmpInst>(V))
- return WillInvertAllUses;
-
- // If `V` is of the form `A + Constant` then `-1 - V` can be folded into `(-1
- // - Constant) - A` if we are willing to invert all of the uses.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
- if (BO->getOpcode() == Instruction::Add ||
- BO->getOpcode() == Instruction::Sub)
- if (isa<Constant>(BO->getOperand(0)) || isa<Constant>(BO->getOperand(1)))
- return WillInvertAllUses;
-
- // Selects with invertible operands are freely invertible
- if (match(V, m_Select(m_Value(), m_Not(m_Value()), m_Not(m_Value()))))
- return WillInvertAllUses;
-
- return false;
-}
-
-/// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
-/// InstCombine's canonicalizeICmpPredicate() must be kept in sync with this fn.
-///
-/// See also: isFreeToInvert()
-static inline bool canFreelyInvertAllUsersOf(Value *V, Value *IgnoredUser) {
- // Look at every user of V.
- for (Use &U : V->uses()) {
- if (U.getUser() == IgnoredUser)
- continue; // Don't consider this user.
-
- auto *I = cast<Instruction>(U.getUser());
- switch (I->getOpcode()) {
- case Instruction::Select:
- if (U.getOperandNo() != 0) // Only if the value is used as select cond.
- return false;
- break;
- case Instruction::Br:
- assert(U.getOperandNo() == 0 && "Must be branching on that value.");
- break; // Free to invert by swapping true/false values/destinations.
- case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring it.
- if (!match(I, m_Not(m_Value())))
- return false; // Not a 'not'.
- break;
- default:
- return false; // Don't know, likely not freely invertible.
- }
- // So far all users were free to invert...
- }
- return true; // Can freely invert all users!
-}
-
-/// Some binary operators require special handling to avoid poison and undefined
-/// behavior. If a constant vector has undef elements, replace those undefs with
-/// identity constants if possible because those are always safe to execute.
-/// If no identity constant exists, replace undef with some other safe constant.
-static inline Constant *getSafeVectorConstantForBinop(
- BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant) {
- auto *InVTy = dyn_cast<VectorType>(In->getType());
- assert(InVTy && "Not expecting scalars here");
-
- Type *EltTy = InVTy->getElementType();
- auto *SafeC = ConstantExpr::getBinOpIdentity(Opcode, EltTy, IsRHSConstant);
- if (!SafeC) {
- // TODO: Should this be available as a constant utility function? It is
- // similar to getBinOpAbsorber().
- if (IsRHSConstant) {
- switch (Opcode) {
- case Instruction::SRem: // X % 1 = 0
- case Instruction::URem: // X %u 1 = 0
- SafeC = ConstantInt::get(EltTy, 1);
- break;
- case Instruction::FRem: // X % 1.0 (doesn't simplify, but it is safe)
- SafeC = ConstantFP::get(EltTy, 1.0);
- break;
- default:
- llvm_unreachable("Only rem opcodes have no identity constant for RHS");
- }
- } else {
- switch (Opcode) {
- case Instruction::Shl: // 0 << X = 0
- case Instruction::LShr: // 0 >>u X = 0
- case Instruction::AShr: // 0 >> X = 0
- case Instruction::SDiv: // 0 / X = 0
- case Instruction::UDiv: // 0 /u X = 0
- case Instruction::SRem: // 0 % X = 0
- case Instruction::URem: // 0 %u X = 0
- case Instruction::Sub: // 0 - X (doesn't simplify, but it is safe)
- case Instruction::FSub: // 0.0 - X (doesn't simplify, but it is safe)
- case Instruction::FDiv: // 0.0 / X (doesn't simplify, but it is safe)
- case Instruction::FRem: // 0.0 % X = 0
- SafeC = Constant::getNullValue(EltTy);
- break;
- default:
- llvm_unreachable("Expected to find identity constant for opcode");
- }
- }
- }
- assert(SafeC && "Must have safe constant for binop");
- unsigned NumElts = InVTy->getNumElements();
- SmallVector<Constant *, 16> Out(NumElts);
- for (unsigned i = 0; i != NumElts; ++i) {
- Constant *C = In->getAggregateElement(i);
- Out[i] = isa<UndefValue>(C) ? SafeC : C;
- }
- return ConstantVector::get(Out);
-}
-
-/// The core instruction combiner logic.
-///
-/// This class provides both the logic to recursively visit instructions and
-/// combine them.
-class LLVM_LIBRARY_VISIBILITY InstCombiner
- : public InstVisitor<InstCombiner, Instruction *> {
- // FIXME: These members shouldn't be public.
+class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
+ : public InstCombiner,
+ public InstVisitor<InstCombinerImpl, Instruction *> {
public:
- /// A worklist of the instructions that need to be simplified.
- InstCombineWorklist &Worklist;
-
- /// An IRBuilder that automatically inserts new instructions into the
- /// worklist.
- using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;
- BuilderTy &Builder;
-
-private:
- // Mode in which we are running the combiner.
- const bool MinimizeSize;
-
- AAResults *AA;
-
- // Required analyses.
- AssumptionCache &AC;
- TargetLibraryInfo &TLI;
- DominatorTree &DT;
- const DataLayout &DL;
- const SimplifyQuery SQ;
- OptimizationRemarkEmitter &ORE;
- BlockFrequencyInfo *BFI;
- ProfileSummaryInfo *PSI;
+ InstCombinerImpl(InstCombineWorklist &Worklist, BuilderTy &Builder,
+ bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
+ TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
+ DominatorTree &DT, OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ const DataLayout &DL, LoopInfo *LI)
+ : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE,
+ BFI, PSI, DL, LI) {}
- // Optional analyses. When non-null, these can both be used to do better
- // combining and will be updated to reflect any changes.
- LoopInfo *LI;
-
- bool MadeIRChange = false;
-
-public:
- InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
- bool MinimizeSize, AAResults *AA,
- AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
- OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
- : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
- AA(AA), AC(AC), TLI(TLI), DT(DT),
- DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
+ virtual ~InstCombinerImpl() {}
/// Run the combiner over the entire worklist until it is empty.
///
/// \returns true if the IR is changed.
bool run();
- AssumptionCache &getAssumptionCache() const { return AC; }
-
- const DataLayout &getDataLayout() const { return DL; }
-
- DominatorTree &getDominatorTree() const { return DT; }
-
- LoopInfo *getLoopInfo() const { return LI; }
-
- TargetLibraryInfo &getTargetLibraryInfo() const { return TLI; }
-
// Visitation implementation - Implement instruction combining for different
// instruction types. The semantics are as follows:
// Return Value:
@@ -384,9 +97,7 @@ public:
Instruction *visitSRem(BinaryOperator &I);
Instruction *visitFRem(BinaryOperator &I);
bool simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I);
- Instruction *commonRemTransforms(BinaryOperator &I);
Instruction *commonIRemTransforms(BinaryOperator &I);
- Instruction *commonDivTransforms(BinaryOperator &I);
Instruction *commonIDivTransforms(BinaryOperator &I);
Instruction *visitUDiv(BinaryOperator &I);
Instruction *visitSDiv(BinaryOperator &I);
@@ -394,6 +105,7 @@ public:
Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted);
Instruction *visitAnd(BinaryOperator &I);
Instruction *visitOr(BinaryOperator &I);
+ bool sinkNotIntoOtherHandOfAndOrOr(BinaryOperator &I);
Instruction *visitXor(BinaryOperator &I);
Instruction *visitShl(BinaryOperator &I);
Value *reassociateShiftAmtsOfTwoSameDirectionShifts(
@@ -407,6 +119,7 @@ public:
Instruction *visitLShr(BinaryOperator &I);
Instruction *commonShiftTransforms(BinaryOperator &I);
Instruction *visitFCmpInst(FCmpInst &I);
+ CmpInst *canonicalizeICmpPredicate(CmpInst &I);
Instruction *visitICmpInst(ICmpInst &I);
Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I);
@@ -445,6 +158,9 @@ public:
Instruction *visitFenceInst(FenceInst &FI);
Instruction *visitSwitchInst(SwitchInst &SI);
Instruction *visitReturnInst(ReturnInst &RI);
+ Instruction *visitUnreachableInst(UnreachableInst &I);
+ Instruction *
+ foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI);
Instruction *visitInsertValueInst(InsertValueInst &IV);
Instruction *visitInsertElementInst(InsertElementInst &IE);
Instruction *visitExtractElementInst(ExtractElementInst &EI);
@@ -467,11 +183,6 @@ public:
bool replacedSelectWithOperand(SelectInst *SI, const ICmpInst *Icmp,
const unsigned SIOpd);
- /// Try to replace instruction \p I with value \p V which are pointers
- /// in different address space.
- /// \return true if successful.
- bool replacePointer(Instruction &I, Value *V);
-
LoadInst *combineLoadToNewType(LoadInst &LI, Type *NewTy,
const Twine &Suffix = "");
@@ -609,10 +320,12 @@ private:
Instruction *narrowBinOp(TruncInst &Trunc);
Instruction *narrowMaskedBinOp(BinaryOperator &And);
Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
- Instruction *narrowRotate(TruncInst &Trunc);
+ Instruction *narrowFunnelShift(TruncInst &Trunc);
Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
Instruction *matchSAddSubSat(SelectInst &MinMax1);
+ void freelyInvertAllUsersOf(Value *V);
+
/// Determine if a pair of casts can be replaced by a single cast.
///
/// \param CI1 The first of a pair of casts.
@@ -685,6 +398,7 @@ public:
<< " with " << *V << '\n');
I.replaceAllUsesWith(V);
+ MadeIRChange = true;
return &I;
}
@@ -726,7 +440,7 @@ public:
/// When dealing with an instruction that has side effects or produces a void
/// value, we can't rely on DCE to delete the instruction. Instead, visit
/// methods should return the value returned by this function.
- Instruction *eraseInstFromFunction(Instruction &I) {
+ Instruction *eraseInstFromFunction(Instruction &I) override {
LLVM_DEBUG(dbgs() << "IC: ERASE " << I << '\n');
assert(I.use_empty() && "Cannot erase instruction that is used!");
salvageDebugInfo(I);
@@ -808,10 +522,6 @@ public:
Instruction::BinaryOps BinaryOp, bool IsSigned,
Value *LHS, Value *RHS, Instruction *CxtI) const;
- /// Maximum size of array considered when transforming.
- uint64_t MaxArraySizeForCombine = 0;
-
-private:
/// Performs a few simplifications for operators which are associative
/// or commutative.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
@@ -857,7 +567,7 @@ private:
unsigned Depth, Instruction *CxtI);
bool SimplifyDemandedBits(Instruction *I, unsigned Op,
const APInt &DemandedMask, KnownBits &Known,
- unsigned Depth = 0);
+ unsigned Depth = 0) override;
/// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne
/// bits. It also tries to handle simplifications that can be done based on
@@ -877,13 +587,10 @@ private:
/// demanded bits.
bool SimplifyDemandedInstructionBits(Instruction &Inst);
- Value *simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
- APInt DemandedElts,
- int DmaskIdx = -1);
-
- Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
- APInt &UndefElts, unsigned Depth = 0,
- bool AllowMultipleUsers = false);
+ virtual Value *
+ SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts,
+ unsigned Depth = 0,
+ bool AllowMultipleUsers = false) override;
/// Canonicalize the position of binops relative to shufflevector.
Instruction *foldVectorBinop(BinaryOperator &Inst);
@@ -907,16 +614,18 @@ private:
/// Try to rotate an operation below a PHI node, using PHI nodes for
/// its operands.
- Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
- Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
- Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
- Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
- Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN);
+ Instruction *foldPHIArgOpIntoPHI(PHINode &PN);
+ Instruction *foldPHIArgBinOpIntoPHI(PHINode &PN);
+ Instruction *foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN);
+ Instruction *foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN);
+ Instruction *foldPHIArgGEPIntoPHI(PHINode &PN);
+ Instruction *foldPHIArgLoadIntoPHI(PHINode &PN);
+ Instruction *foldPHIArgZextsIntoPHI(PHINode &PN);
/// If an integer typed PHI has only one use which is an IntToPtr operation,
/// replace the PHI with an existing pointer typed PHI if it exists. Otherwise
/// insert a new pointer typed PHI and replace the original one.
- Instruction *FoldIntegerTypedPHI(PHINode &PN);
+ Instruction *foldIntegerTypedPHI(PHINode &PN);
/// Helper function for FoldPHIArgXIntoPHI() to set debug location for the
/// folded operation.
@@ -999,18 +708,18 @@ private:
Value *A, Value *B, Instruction &Outer,
SelectPatternFlavor SPF2, Value *C);
Instruction *foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
-
- Instruction *OptAndOp(BinaryOperator *Op, ConstantInt *OpRHS,
- ConstantInt *AndRHS, BinaryOperator &TheAnd);
+ Instruction *foldSelectValueEquivalence(SelectInst &SI, ICmpInst &ICI);
Value *insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
bool isSigned, bool Inside);
Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
bool mergeStoreIntoSuccessor(StoreInst &SI);
- /// Given an 'or' instruction, check to see if it is part of a bswap idiom.
- /// If so, return the equivalent bswap intrinsic.
- Instruction *matchBSwap(BinaryOperator &Or);
+ /// Given an 'or' instruction, check to see if it is part of a
+ /// bswap/bitreverse idiom. If so, return the equivalent bswap/bitreverse
+ /// intrinsic.
+ Instruction *matchBSwapOrBitReverse(BinaryOperator &Or, bool MatchBSwaps,
+ bool MatchBitReversals);
Instruction *SimplifyAnyMemTransfer(AnyMemTransferInst *MI);
Instruction *SimplifyAnyMemSet(AnyMemSetInst *MI);
@@ -1023,18 +732,6 @@ private:
Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
};
-namespace {
-
-// As a default, let's assume that we want to be aggressive,
-// and attempt to traverse with no limits in attempt to sink negation.
-static constexpr unsigned NegatorDefaultMaxDepth = ~0U;
-
-// Let's guesstimate that most often we will end up visiting/producing
-// fairly small number of new instructions.
-static constexpr unsigned NegatorMaxNodesSSO = 16;
-
-} // namespace
-
class Negator final {
/// Top-to-bottom, def-to-use negated instruction tree we produced.
SmallVector<Instruction *, NegatorMaxNodesSSO> NewInstructions;
@@ -1061,6 +758,8 @@ class Negator final {
using Result = std::pair<ArrayRef<Instruction *> /*NewInstructions*/,
Value * /*NegatedRoot*/>;
+ std::array<Value *, 2> getSortedOperandsOfBinOp(Instruction *I);
+
LLVM_NODISCARD Value *visitImpl(Value *V, unsigned Depth);
LLVM_NODISCARD Value *negate(Value *V, unsigned Depth);
@@ -1078,7 +777,7 @@ public:
/// Attempt to negate \p Root. Retuns nullptr if negation can't be performed,
/// otherwise returns negated value.
LLVM_NODISCARD static Value *Negate(bool LHSIsZero, Value *Root,
- InstCombiner &IC);
+ InstCombinerImpl &IC);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index dad2f23120bd..c7b5f6f78069 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -166,7 +167,8 @@ static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
APInt(64, AllocaSize), DL);
}
-static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
+static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
+ AllocaInst &AI) {
// Check for array size of 1 (scalar allocation).
if (!AI.isArrayAllocation()) {
// i32 1 is the canonical array size for scalar allocations.
@@ -234,47 +236,45 @@ namespace {
// instruction.
class PointerReplacer {
public:
- PointerReplacer(InstCombiner &IC) : IC(IC) {}
+ PointerReplacer(InstCombinerImpl &IC) : IC(IC) {}
+
+ bool collectUsers(Instruction &I);
void replacePointer(Instruction &I, Value *V);
private:
- void findLoadAndReplace(Instruction &I);
void replace(Instruction *I);
Value *getReplacement(Value *I);
- SmallVector<Instruction *, 4> Path;
+ SmallSetVector<Instruction *, 4> Worklist;
MapVector<Value *, Value *> WorkMap;
- InstCombiner &IC;
+ InstCombinerImpl &IC;
};
} // end anonymous namespace
-void PointerReplacer::findLoadAndReplace(Instruction &I) {
+bool PointerReplacer::collectUsers(Instruction &I) {
for (auto U : I.users()) {
- auto *Inst = dyn_cast<Instruction>(&*U);
- if (!Inst)
- return;
- LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
- if (isa<LoadInst>(Inst)) {
- for (auto P : Path)
- replace(P);
- replace(Inst);
+ Instruction *Inst = cast<Instruction>(&*U);
+ if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+ if (Load->isVolatile())
+ return false;
+ Worklist.insert(Load);
} else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
- Path.push_back(Inst);
- findLoadAndReplace(*Inst);
- Path.pop_back();
+ Worklist.insert(Inst);
+ if (!collectUsers(*Inst))
+ return false;
+ } else if (isa<MemTransferInst>(Inst)) {
+ Worklist.insert(Inst);
} else {
- return;
+ LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
+ return false;
}
}
-}
-Value *PointerReplacer::getReplacement(Value *V) {
- auto Loc = WorkMap.find(V);
- if (Loc != WorkMap.end())
- return Loc->second;
- return nullptr;
+ return true;
}
+Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
+
void PointerReplacer::replace(Instruction *I) {
if (getReplacement(I))
return;
@@ -282,9 +282,12 @@ void PointerReplacer::replace(Instruction *I) {
if (auto *LT = dyn_cast<LoadInst>(I)) {
auto *V = getReplacement(LT->getPointerOperand());
assert(V && "Operand not replaced");
- auto *NewI = new LoadInst(I->getType(), V, "", false,
- IC.getDataLayout().getABITypeAlign(I->getType()));
+ auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
+ LT->getAlign(), LT->getOrdering(),
+ LT->getSyncScopeID());
NewI->takeName(LT);
+ copyMetadataForLoad(*NewI, *LT);
+
IC.InsertNewInstWith(NewI, *LT);
IC.replaceInstUsesWith(*LT, NewI);
WorkMap[LT] = NewI;
@@ -307,6 +310,28 @@ void PointerReplacer::replace(Instruction *I) {
IC.InsertNewInstWith(NewI, *BC);
NewI->takeName(BC);
WorkMap[BC] = NewI;
+ } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
+ auto *SrcV = getReplacement(MemCpy->getRawSource());
+ // The pointer may appear in the destination of a copy, but we don't want to
+ // replace it.
+ if (!SrcV) {
+ assert(getReplacement(MemCpy->getRawDest()) &&
+ "destination not in replace list");
+ return;
+ }
+
+ IC.Builder.SetInsertPoint(MemCpy);
+ auto *NewI = IC.Builder.CreateMemTransferInst(
+ MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(),
+ SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(),
+ MemCpy->isVolatile());
+ AAMDNodes AAMD;
+ MemCpy->getAAMetadata(AAMD);
+ if (AAMD)
+ NewI->setAAMetadata(AAMD);
+
+ IC.eraseInstFromFunction(*MemCpy);
+ WorkMap[MemCpy] = NewI;
} else {
llvm_unreachable("should never reach here");
}
@@ -320,10 +345,12 @@ void PointerReplacer::replacePointer(Instruction &I, Value *V) {
"Invalid usage");
#endif
WorkMap[&I] = V;
- findLoadAndReplace(I);
+
+ for (Instruction *Workitem : Worklist)
+ replace(Workitem);
}
-Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
if (auto *I = simplifyAllocaArraySize(*this, AI))
return I;
@@ -374,23 +401,21 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// read.
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
+ Value *TheSrc = Copy->getSource();
Align AllocaAlign = AI.getAlign();
Align SourceAlign = getOrEnforceKnownAlignment(
- Copy->getSource(), AllocaAlign, DL, &AI, &AC, &DT);
+ TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
if (AllocaAlign <= SourceAlign &&
- isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
+ isDereferenceableForAllocaSize(TheSrc, &AI, DL)) {
LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
- for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
- eraseInstFromFunction(*ToDelete[i]);
- Value *TheSrc = Copy->getSource();
- auto *SrcTy = TheSrc->getType();
- auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
- SrcTy->getPointerAddressSpace());
- Value *Cast =
- Builder.CreatePointerBitCastOrAddrSpaceCast(TheSrc, DestTy);
- if (AI.getType()->getPointerAddressSpace() ==
- SrcTy->getPointerAddressSpace()) {
+ unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
+ auto *DestTy = PointerType::get(AI.getAllocatedType(), SrcAddrSpace);
+ if (AI.getType()->getAddressSpace() == SrcAddrSpace) {
+ for (Instruction *Delete : ToDelete)
+ eraseInstFromFunction(*Delete);
+
+ Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
Instruction *NewI = replaceInstUsesWith(AI, Cast);
eraseInstFromFunction(*Copy);
++NumGlobalCopies;
@@ -398,8 +423,14 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
}
PointerReplacer PtrReplacer(*this);
- PtrReplacer.replacePointer(AI, Cast);
- ++NumGlobalCopies;
+ if (PtrReplacer.collectUsers(AI)) {
+ for (Instruction *Delete : ToDelete)
+ eraseInstFromFunction(*Delete);
+
+ Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
+ PtrReplacer.replacePointer(AI, Cast);
+ ++NumGlobalCopies;
+ }
}
}
@@ -421,9 +452,9 @@ static bool isSupportedAtomicType(Type *Ty) {
/// that pointer type, load it, etc.
///
/// Note that this will create all of the instructions with whatever insert
-/// point the \c InstCombiner currently is using.
-LoadInst *InstCombiner::combineLoadToNewType(LoadInst &LI, Type *NewTy,
- const Twine &Suffix) {
+/// point the \c InstCombinerImpl currently is using.
+LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy,
+ const Twine &Suffix) {
assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
"can't fold an atomic load to requested type");
@@ -445,7 +476,8 @@ LoadInst *InstCombiner::combineLoadToNewType(LoadInst &LI, Type *NewTy,
/// Combine a store to a new type.
///
/// Returns the newly created store instruction.
-static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
+static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,
+ Value *V) {
assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
"can't fold an atomic store of requested type");
@@ -485,6 +517,7 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
break;
case LLVMContext::MD_invariant_load:
case LLVMContext::MD_nonnull:
+ case LLVMContext::MD_noundef:
case LLVMContext::MD_range:
case LLVMContext::MD_align:
case LLVMContext::MD_dereferenceable:
@@ -502,7 +535,7 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) {
assert(V->getType()->isPointerTy() && "Expected pointer type.");
// Ignore possible ty* to ixx* bitcast.
- V = peekThroughBitcast(V);
+ V = InstCombiner::peekThroughBitcast(V);
// Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
// pattern.
CmpInst::Predicate Pred;
@@ -537,7 +570,8 @@ static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) {
/// or a volatile load. This is debatable, and might be reasonable to change
/// later. However, it is risky in case some backend or other part of LLVM is
/// relying on the exact type loaded to select appropriate atomic operations.
-static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
+static Instruction *combineLoadToOperationType(InstCombinerImpl &IC,
+ LoadInst &LI) {
// FIXME: We could probably with some care handle both volatile and ordered
// atomic loads here but it isn't clear that this is important.
if (!LI.isUnordered())
@@ -550,62 +584,38 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
if (LI.getPointerOperand()->isSwiftError())
return nullptr;
- Type *Ty = LI.getType();
const DataLayout &DL = IC.getDataLayout();
- // Try to canonicalize loads which are only ever stored to operate over
- // integers instead of any other type. We only do this when the loaded type
- // is sized and has a size exactly the same as its store size and the store
- // size is a legal integer type.
- // Do not perform canonicalization if minmax pattern is found (to avoid
- // infinite loop).
- Type *Dummy;
- if (!Ty->isIntegerTy() && Ty->isSized() && !isa<ScalableVectorType>(Ty) &&
- DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
- DL.typeSizeEqualsStoreSize(Ty) && !DL.isNonIntegralPointerType(Ty) &&
- !isMinMaxWithLoads(
- peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true),
- Dummy)) {
- if (all_of(LI.users(), [&LI](User *U) {
- auto *SI = dyn_cast<StoreInst>(U);
- return SI && SI->getPointerOperand() != &LI &&
- !SI->getPointerOperand()->isSwiftError();
- })) {
- LoadInst *NewLoad = IC.combineLoadToNewType(
- LI, Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
- // Replace all the stores with stores of the newly loaded value.
- for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
- auto *SI = cast<StoreInst>(*UI++);
- IC.Builder.SetInsertPoint(SI);
- combineStoreToNewValue(IC, *SI, NewLoad);
- IC.eraseInstFromFunction(*SI);
- }
- assert(LI.use_empty() && "Failed to remove all users of the load!");
- // Return the old load so the combiner can delete it safely.
- return &LI;
+ // Fold away bit casts of the loaded value by loading the desired type.
+ // Note that we should not do this for pointer<->integer casts,
+ // because that would result in type punning.
+ if (LI.hasOneUse()) {
+ // Don't transform when the type is x86_amx, it makes the pass that lower
+ // x86_amx type happy.
+ if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
+ assert(!LI.getType()->isX86_AMXTy() &&
+ "load from x86_amx* should not happen!");
+ if (BC->getType()->isX86_AMXTy())
+ return nullptr;
}
- }
- // Fold away bit casts of the loaded value by loading the desired type.
- // We can do this for BitCastInsts as well as casts from and to pointer types,
- // as long as those are noops (i.e., the source or dest type have the same
- // bitwidth as the target's pointers).
- if (LI.hasOneUse())
if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
- if (CI->isNoopCast(DL))
+ if (CI->isNoopCast(DL) && LI.getType()->isPtrOrPtrVectorTy() ==
+ CI->getDestTy()->isPtrOrPtrVectorTy())
if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
LoadInst *NewLoad = IC.combineLoadToNewType(LI, CI->getDestTy());
CI->replaceAllUsesWith(NewLoad);
IC.eraseInstFromFunction(*CI);
return &LI;
}
+ }
// FIXME: We should also canonicalize loads of vectors when their elements are
// cast to other types.
return nullptr;
}
-static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
+static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
// FIXME: We could probably with some care handle both volatile and atomic
// stores here but it isn't clear that this is important.
if (!LI.isSimple())
@@ -743,8 +753,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
}
if (PHINode *PN = dyn_cast<PHINode>(P)) {
- for (Value *IncValue : PN->incoming_values())
- Worklist.push_back(IncValue);
+ append_range(Worklist, PN->incoming_values());
continue;
}
@@ -804,8 +813,9 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
// not zero. Currently, we only handle the first such index. Also, we could
// also search through non-zero constant indices if we kept track of the
// offsets those indices implied.
-static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
- Instruction *MemI, unsigned &Idx) {
+static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
+ GetElementPtrInst *GEPI, Instruction *MemI,
+ unsigned &Idx) {
if (GEPI->getNumOperands() < 2)
return false;
@@ -834,12 +844,17 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
return false;
SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
- Type *AllocTy =
- GetElementPtrInst::getIndexedType(GEPI->getSourceElementType(), Ops);
+ Type *SourceElementType = GEPI->getSourceElementType();
+ // Size information about scalable vectors is not available, so we cannot
+ // deduce whether indexing at n is undefined behaviour or not. Bail out.
+ if (isa<ScalableVectorType>(SourceElementType))
+ return false;
+
+ Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
if (!AllocTy || !AllocTy->isSized())
return false;
const DataLayout &DL = IC.getDataLayout();
- uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
+ uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedSize();
// If there are more indices after the one we might replace with a zero, make
// sure they're all non-negative. If any of them are negative, the overall
@@ -874,7 +889,7 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
// access, but the object has only one element, we can assume that the index
// will always be zero. If we replace the GEP, return it.
template <typename T>
-static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
+static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
T &MemI) {
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
unsigned Idx;
@@ -916,7 +931,7 @@ static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
return false;
}
-Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
+Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
// Try to canonicalize the loaded type.
@@ -1033,7 +1048,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
/// and the layout of a <2 x double> is isomorphic to a [2 x double],
/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
/// Note that %U may contain non-undef values where %V1 has undef.
-static Value *likeBitCastFromVector(InstCombiner &IC, Value *V) {
+static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) {
Value *U = nullptr;
while (auto *IV = dyn_cast<InsertValueInst>(V)) {
auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
@@ -1060,11 +1075,11 @@ static Value *likeBitCastFromVector(InstCombiner &IC, Value *V) {
return nullptr;
}
if (auto *AT = dyn_cast<ArrayType>(VT)) {
- if (AT->getNumElements() != UT->getNumElements())
+ if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
return nullptr;
} else {
auto *ST = cast<StructType>(VT);
- if (ST->getNumElements() != UT->getNumElements())
+ if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
return nullptr;
for (const auto *EltT : ST->elements()) {
if (EltT != UT->getElementType())
@@ -1094,7 +1109,7 @@ static Value *likeBitCastFromVector(InstCombiner &IC, Value *V) {
/// the caller must erase the store instruction. We have to let the caller erase
/// the store instruction as otherwise there is no way to signal whether it was
/// combined or not: IC.EraseInstFromFunction returns a null pointer.
-static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
+static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) {
// FIXME: We could probably with some care handle both volatile and ordered
// atomic stores here but it isn't clear that this is important.
if (!SI.isUnordered())
@@ -1108,7 +1123,13 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
// Fold away bit casts of the stored value by storing the original type.
if (auto *BC = dyn_cast<BitCastInst>(V)) {
+ assert(!BC->getType()->isX86_AMXTy() &&
+ "store to x86_amx* should not happen!");
V = BC->getOperand(0);
+ // Don't transform when the type is x86_amx, it makes the pass that lower
+ // x86_amx type happy.
+ if (V->getType()->isX86_AMXTy())
+ return false;
if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
combineStoreToNewValue(IC, SI, V);
return true;
@@ -1126,7 +1147,7 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
return false;
}
-static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
+static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
// FIXME: We could probably with some care handle both volatile and atomic
// stores here but it isn't clear that this is important.
if (!SI.isSimple())
@@ -1266,7 +1287,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
/// Converts store (bitcast (load (bitcast (select ...)))) to
/// store (load (select ...)), where select is minmax:
/// select ((cmp load V1, load V2), V1, V2).
-static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
+static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC,
StoreInst &SI) {
// bitcast?
if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
@@ -1296,7 +1317,8 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
auto *SI = dyn_cast<StoreInst>(U);
return SI && SI->getPointerOperand() != LI &&
- peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
+ InstCombiner::peekThroughBitcast(SI->getPointerOperand()) !=
+ LoadAddr &&
!SI->getPointerOperand()->isSwiftError();
}))
return false;
@@ -1314,7 +1336,7 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
return true;
}
-Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
+Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
Value *Val = SI.getOperand(0);
Value *Ptr = SI.getOperand(1);
@@ -1433,7 +1455,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
/// or:
/// *P = v1; if () { *P = v2; }
/// into a phi node with a store in the successor.
-bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
+bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
if (!SI.isUnordered())
return false; // This code has not been audited for volatile/ordered case.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2f1325e80d2f..4b485a0ad85e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include <cassert>
#include <cstddef>
@@ -46,7 +47,7 @@ using namespace PatternMatch;
/// The specific integer value is used in a context where it is known to be
/// non-zero. If this allows us to simplify the computation, do so and return
/// the new operand, otherwise return null.
-static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
+static Value *simplifyValueKnownNonZero(Value *V, InstCombinerImpl &IC,
Instruction &CxtI) {
// If V has multiple uses, then we would have to do more analysis to determine
// if this is safe. For example, the use could be in dynamically unreached
@@ -94,39 +95,6 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
return MadeChange ? V : nullptr;
}
-/// A helper routine of InstCombiner::visitMul().
-///
-/// If C is a scalar/fixed width vector of known powers of 2, then this
-/// function returns a new scalar/fixed width vector obtained from logBase2
-/// of C.
-/// Return a null pointer otherwise.
-static Constant *getLogBase2(Type *Ty, Constant *C) {
- const APInt *IVal;
- if (match(C, m_APInt(IVal)) && IVal->isPowerOf2())
- return ConstantInt::get(Ty, IVal->logBase2());
-
- // FIXME: We can extract pow of 2 of splat constant for scalable vectors.
- if (!isa<FixedVectorType>(Ty))
- return nullptr;
-
- SmallVector<Constant *, 4> Elts;
- for (unsigned I = 0, E = cast<FixedVectorType>(Ty)->getNumElements(); I != E;
- ++I) {
- Constant *Elt = C->getAggregateElement(I);
- if (!Elt)
- return nullptr;
- if (isa<UndefValue>(Elt)) {
- Elts.push_back(UndefValue::get(Ty->getScalarType()));
- continue;
- }
- if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2())
- return nullptr;
- Elts.push_back(ConstantInt::get(Ty->getScalarType(), IVal->logBase2()));
- }
-
- return ConstantVector::get(Elts);
-}
-
// TODO: This is a specific form of a much more general pattern.
// We could detect a select with any binop identity constant, or we
// could use SimplifyBinOp to see if either arm of the select reduces.
@@ -171,7 +139,7 @@ static Value *foldMulSelectToNegate(BinaryOperator &I,
return nullptr;
}
-Instruction *InstCombiner::visitMul(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
if (Value *V = SimplifyMulInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -185,8 +153,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
- // X * -1 == 0 - X
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ unsigned BitWidth = I.getType()->getScalarSizeInBits();
+
+ // X * -1 == 0 - X
if (match(Op1, m_AllOnes())) {
BinaryOperator *BO = BinaryOperator::CreateNeg(Op0, I.getName());
if (I.hasNoSignedWrap())
@@ -216,11 +186,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
// Replace X*(2^C) with X << C, where C is either a scalar or a vector.
- // Note that we need to sanitize undef multipliers to 1,
- // to avoid introducing poison.
- Constant *SafeC1 = Constant::replaceUndefsWith(
- C1, ConstantInt::get(C1->getType()->getScalarType(), 1));
- if (Constant *NewCst = getLogBase2(NewOp->getType(), SafeC1)) {
+ if (Constant *NewCst = ConstantExpr::getExactLogBase2(C1)) {
BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
if (I.hasNoUnsignedWrap())
@@ -236,29 +202,12 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
- // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n
- // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n
- // The "* (2**n)" thus becomes a potential shifting opportunity.
- {
- const APInt & Val = CI->getValue();
- const APInt &PosVal = Val.abs();
- if (Val.isNegative() && PosVal.isPowerOf2()) {
- Value *X = nullptr, *Y = nullptr;
- if (Op0->hasOneUse()) {
- ConstantInt *C1;
- Value *Sub = nullptr;
- if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
- Sub = Builder.CreateSub(X, Y, "suba");
- else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
- Sub = Builder.CreateSub(Builder.CreateNeg(C1), Y, "subc");
- if (Sub)
- return
- BinaryOperator::CreateMul(Sub,
- ConstantInt::get(Y->getType(), PosVal));
- }
- }
- }
+ if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {
+ // Interpret X * (-1<<C) as (-X) * (1<<C) and try to sink the negation.
+ // The "* (1<<C)" thus becomes a potential shifting opportunity.
+ if (Value *NegOp0 = Negator::Negate(/*IsNegation*/ true, Op0, *this))
+ return BinaryOperator::CreateMul(
+ NegOp0, ConstantExpr::getNeg(cast<Constant>(Op1)), I.getName());
}
if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
@@ -288,6 +237,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
if (SPF == SPF_ABS || SPF == SPF_NABS)
return BinaryOperator::CreateMul(X, X);
+
+ if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
+ return BinaryOperator::CreateMul(X, X);
}
// -X * C --> X * -C
@@ -410,6 +362,19 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1)
return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op0);
+ // ((ashr X, 31) | 1) * X --> abs(X)
+ // X * ((ashr X, 31) | 1) --> abs(X)
+ if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X),
+ m_SpecificIntAllowUndef(BitWidth - 1)),
+ m_One()),
+ m_Deferred(X)))) {
+ Value *Abs = Builder.CreateBinaryIntrinsic(
+ Intrinsic::abs, X,
+ ConstantInt::getBool(I.getContext(), I.hasNoSignedWrap()));
+ Abs->takeName(&I);
+ return replaceInstUsesWith(I, Abs);
+ }
+
if (Instruction *Ext = narrowMathIfNoOverflow(I))
return Ext;
@@ -427,7 +392,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
-Instruction *InstCombiner::foldFPSignBitOps(BinaryOperator &I) {
+Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {
BinaryOperator::BinaryOps Opcode = I.getOpcode();
assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
"Expected fmul or fdiv");
@@ -442,13 +407,12 @@ Instruction *InstCombiner::foldFPSignBitOps(BinaryOperator &I) {
// fabs(X) * fabs(X) -> X * X
// fabs(X) / fabs(X) -> X / X
- if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::fabs>(m_Value(X))))
+ if (Op0 == Op1 && match(Op0, m_FAbs(m_Value(X))))
return BinaryOperator::CreateWithCopiedFlags(Opcode, X, X, &I);
// fabs(X) * fabs(Y) --> fabs(X * Y)
// fabs(X) / fabs(Y) --> fabs(X / Y)
- if (match(Op0, m_Intrinsic<Intrinsic::fabs>(m_Value(X))) &&
- match(Op1, m_Intrinsic<Intrinsic::fabs>(m_Value(Y))) &&
+ if (match(Op0, m_FAbs(m_Value(X))) && match(Op1, m_FAbs(m_Value(Y))) &&
(Op0->hasOneUse() || Op1->hasOneUse())) {
IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
Builder.setFastMathFlags(I.getFastMathFlags());
@@ -461,7 +425,7 @@ Instruction *InstCombiner::foldFPSignBitOps(BinaryOperator &I) {
return nullptr;
}
-Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
if (Value *V = SimplifyFMulInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
@@ -557,6 +521,21 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return replaceInstUsesWith(I, Sqrt);
}
+ // The following transforms are done irrespective of the number of uses
+ // for the expression "1.0/sqrt(X)".
+ // 1) 1.0/sqrt(X) * X -> X/sqrt(X)
+ // 2) X * 1.0/sqrt(X) -> X/sqrt(X)
+ // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it
+ // has the necessary (reassoc) fast-math-flags.
+ if (I.hasNoSignedZeros() &&
+ match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
+ match(Y, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) && Op1 == X)
+ return BinaryOperator::CreateFDivFMF(X, Y, &I);
+ if (I.hasNoSignedZeros() &&
+ match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
+ match(Y, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) && Op0 == X)
+ return BinaryOperator::CreateFDivFMF(X, Y, &I);
+
// Like the similar transform in instsimplify, this requires 'nsz' because
// sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 &&
@@ -641,7 +620,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
/// Fold a divide or remainder with a select instruction divisor when one of the
/// select operands is zero. In that case, we can use the other select operand
/// because div/rem by zero is undefined.
-bool InstCombiner::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) {
+bool InstCombinerImpl::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) {
SelectInst *SI = dyn_cast<SelectInst>(I.getOperand(1));
if (!SI)
return false;
@@ -742,7 +721,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
/// instructions (udiv and sdiv). It is called by the visitors to those integer
/// division instructions.
/// Common integer divide transforms
-Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
+Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
bool IsSigned = I.getOpcode() == Instruction::SDiv;
Type *Ty = I.getType();
@@ -878,7 +857,7 @@ namespace {
using FoldUDivOperandCb = Instruction *(*)(Value *Op0, Value *Op1,
const BinaryOperator &I,
- InstCombiner &IC);
+ InstCombinerImpl &IC);
/// Used to maintain state for visitUDivOperand().
struct UDivFoldAction {
@@ -907,8 +886,9 @@ struct UDivFoldAction {
// X udiv 2^C -> X >> C
static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
- const BinaryOperator &I, InstCombiner &IC) {
- Constant *C1 = getLogBase2(Op0->getType(), cast<Constant>(Op1));
+ const BinaryOperator &I,
+ InstCombinerImpl &IC) {
+ Constant *C1 = ConstantExpr::getExactLogBase2(cast<Constant>(Op1));
if (!C1)
llvm_unreachable("Failed to constant fold udiv -> logbase2");
BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, C1);
@@ -920,7 +900,7 @@ static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
// X udiv (zext (C1 << N)), where C1 is "1<<C2" --> X >> (N+C2)
static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
Value *ShiftLeft;
if (!match(Op1, m_ZExt(m_Value(ShiftLeft))))
ShiftLeft = Op1;
@@ -929,7 +909,7 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
Value *N;
if (!match(ShiftLeft, m_Shl(m_Constant(CI), m_Value(N))))
llvm_unreachable("match should never fail here!");
- Constant *Log2Base = getLogBase2(N->getType(), CI);
+ Constant *Log2Base = ConstantExpr::getExactLogBase2(CI);
if (!Log2Base)
llvm_unreachable("getLogBase2 should never fail here!");
N = IC.Builder.CreateAdd(N, Log2Base);
@@ -948,6 +928,8 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
SmallVectorImpl<UDivFoldAction> &Actions,
unsigned Depth = 0) {
+ // FIXME: assert that Op1 isn't/doesn't contain undef.
+
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
if (match(Op1, m_Power2())) {
@@ -967,6 +949,9 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
return 0;
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ // FIXME: missed optimization: if one of the hands of select is/contains
+ // undef, just directly pick the other one.
+ // FIXME: can both hands contain undef?
if (size_t LHSIdx =
visitUDivOperand(Op0, SI->getOperand(1), I, Actions, Depth))
if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions, Depth)) {
@@ -1014,7 +999,7 @@ static Instruction *narrowUDivURem(BinaryOperator &I,
return nullptr;
}
-Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) {
if (Value *V = SimplifyUDivInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1108,7 +1093,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return nullptr;
}
-Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
if (Value *V = SimplifySDivInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1121,6 +1106,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
return Common;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ Type *Ty = I.getType();
Value *X;
// sdiv Op0, -1 --> -Op0
// sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined)
@@ -1130,16 +1116,26 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// X / INT_MIN --> X == INT_MIN
if (match(Op1, m_SignMask()))
- return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
+ return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), Ty);
+
+ // sdiv exact X, 1<<C --> ashr exact X, C iff 1<<C is non-negative
+ // sdiv exact X, -1<<C --> -(ashr exact X, C)
+ if (I.isExact() && ((match(Op1, m_Power2()) && match(Op1, m_NonNegative())) ||
+ match(Op1, m_NegatedPower2()))) {
+ bool DivisorWasNegative = match(Op1, m_NegatedPower2());
+ if (DivisorWasNegative)
+ Op1 = ConstantExpr::getNeg(cast<Constant>(Op1));
+ auto *AShr = BinaryOperator::CreateExactAShr(
+ Op0, ConstantExpr::getExactLogBase2(cast<Constant>(Op1)), I.getName());
+ if (!DivisorWasNegative)
+ return AShr;
+ Builder.Insert(AShr);
+ AShr->setName(I.getName() + ".neg");
+ return BinaryOperator::CreateNeg(AShr, I.getName());
+ }
const APInt *Op1C;
if (match(Op1, m_APInt(Op1C))) {
- // sdiv exact X, C --> ashr exact X, log2(C)
- if (I.isExact() && Op1C->isNonNegative() && Op1C->isPowerOf2()) {
- Value *ShAmt = ConstantInt::get(Op1->getType(), Op1C->exactLogBase2());
- return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
- }
-
// If the dividend is sign-extended and the constant divisor is small enough
// to fit in the source type, shrink the division to the narrower type:
// (sext X) sdiv C --> sext (X sdiv C)
@@ -1154,7 +1150,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Constant *NarrowDivisor =
ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType());
Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor);
- return new SExtInst(NarrowOp, Op0->getType());
+ return new SExtInst(NarrowOp, Ty);
}
// -X / C --> X / -C (if the negation doesn't overflow).
@@ -1162,7 +1158,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// checking if all elements are not the min-signed-val.
if (!Op1C->isMinSignedValue() &&
match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) {
- Constant *NegC = ConstantInt::get(I.getType(), -(*Op1C));
+ Constant *NegC = ConstantInt::get(Ty, -(*Op1C));
Instruction *BO = BinaryOperator::CreateSDiv(X, NegC);
BO->setIsExact(I.isExact());
return BO;
@@ -1175,9 +1171,19 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
return BinaryOperator::CreateNSWNeg(
Builder.CreateSDiv(X, Y, I.getName(), I.isExact()));
+ // abs(X) / X --> X > -1 ? 1 : -1
+ // X / abs(X) --> X > -1 ? 1 : -1
+ if (match(&I, m_c_BinOp(
+ m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One())),
+ m_Deferred(X)))) {
+ Constant *NegOne = ConstantInt::getAllOnesValue(Ty);
+ Value *Cond = Builder.CreateICmpSGT(X, NegOne);
+ return SelectInst::Create(Cond, ConstantInt::get(Ty, 1), NegOne);
+ }
+
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a udiv.
- APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
+ APInt Mask(APInt::getSignMask(Ty->getScalarSizeInBits()));
if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
if (MaskedValueIsZero(Op1, Mask, 0, &I)) {
// X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
@@ -1186,6 +1192,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
return BO;
}
+ if (match(Op1, m_NegatedPower2())) {
+ // X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) ->
+ // -> -(X udiv (1 << C)) -> -(X u>> C)
+ return BinaryOperator::CreateNeg(Builder.Insert(foldUDivPow2Cst(
+ Op0, ConstantExpr::getNeg(cast<Constant>(Op1)), I, *this)));
+ }
+
if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
@@ -1262,7 +1275,7 @@ static Instruction *foldFDivConstantDividend(BinaryOperator &I) {
return BinaryOperator::CreateFDivFMF(NewC, X, &I);
}
-Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
if (Value *V = SimplifyFDivInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
@@ -1354,10 +1367,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
// X / fabs(X) -> copysign(1.0, X)
// fabs(X) / X -> copysign(1.0, X)
if (I.hasNoNaNs() && I.hasNoInfs() &&
- (match(&I,
- m_FDiv(m_Value(X), m_Intrinsic<Intrinsic::fabs>(m_Deferred(X)))) ||
- match(&I, m_FDiv(m_Intrinsic<Intrinsic::fabs>(m_Value(X)),
- m_Deferred(X))))) {
+ (match(&I, m_FDiv(m_Value(X), m_FAbs(m_Deferred(X)))) ||
+ match(&I, m_FDiv(m_FAbs(m_Value(X)), m_Deferred(X))))) {
Value *V = Builder.CreateBinaryIntrinsic(
Intrinsic::copysign, ConstantFP::get(I.getType(), 1.0), X, &I);
return replaceInstUsesWith(I, V);
@@ -1369,7 +1380,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
/// instructions (urem and srem). It is called by the visitors to those integer
/// remainder instructions.
/// Common integer remainder transforms
-Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
+Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
@@ -1407,7 +1418,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
return nullptr;
}
-Instruction *InstCombiner::visitURem(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
if (Value *V = SimplifyURemInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1458,7 +1469,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
return nullptr;
}
-Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) {
if (Value *V = SimplifySRemInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1481,7 +1492,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
// -X srem Y --> -(X srem Y)
Value *X, *Y;
if (match(&I, m_SRem(m_OneUse(m_NSWSub(m_Zero(), m_Value(X))), m_Value(Y))))
- return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y));
+ return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y));
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a urem.
@@ -1495,7 +1506,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
// If it's a constant vector, flip any negative values positive.
if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) {
Constant *C = cast<Constant>(Op1);
- unsigned VWidth = cast<VectorType>(C->getType())->getNumElements();
+ unsigned VWidth = cast<FixedVectorType>(C->getType())->getNumElements();
bool hasNegative = false;
bool hasMissing = false;
@@ -1530,7 +1541,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
return nullptr;
}
-Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) {
if (Value *V = SimplifyFRemInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
index 3fe615ac5439..7718c8b0eedd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
@@ -42,6 +42,9 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include <cassert>
+#include <cstdint>
#include <functional>
#include <tuple>
#include <type_traits>
@@ -112,6 +115,19 @@ Negator::~Negator() {
}
#endif
+// Due to the InstCombine's worklist management, there are no guarantees that
+// each instruction we'll encounter has been visited by InstCombine already.
+// In particular, most importantly for us, that means we have to canonicalize
+// constants to RHS ourselves, since that is helpful sometimes.
+std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
+ assert(I->getNumOperands() == 2 && "Only for binops!");
+ std::array<Value *, 2> Ops{I->getOperand(0), I->getOperand(1)};
+ if (I->isCommutative() && InstCombiner::getComplexity(I->getOperand(0)) <
+ InstCombiner::getComplexity(I->getOperand(1)))
+ std::swap(Ops[0], Ops[1]);
+ return Ops;
+}
+
// FIXME: can this be reworked into a worklist-based algorithm while preserving
// the depth-first, early bailout traversal?
LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
@@ -156,11 +172,13 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
// In some cases we can give the answer without further recursion.
switch (I->getOpcode()) {
- case Instruction::Add:
+ case Instruction::Add: {
+ std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I);
// `inc` is always negatible.
- if (match(I->getOperand(1), m_One()))
- return Builder.CreateNot(I->getOperand(0), I->getName() + ".neg");
+ if (match(Ops[1], m_One()))
+ return Builder.CreateNot(Ops[0], I->getName() + ".neg");
break;
+ }
case Instruction::Xor:
// `not` is always negatible.
if (match(I, m_Not(m_Value(X))))
@@ -181,6 +199,10 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
}
return BO;
}
+ // While we could negate exact arithmetic shift:
+ // ashr exact %x, C --> sdiv exact i8 %x, -1<<C
+ // iff C != 0 and C u< bitwidth(%x), we don't want to,
+ // because division is *THAT* much worse than a shift.
break;
}
case Instruction::SExt:
@@ -197,26 +219,28 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
break; // Other instructions require recursive reasoning.
}
+ if (I->getOpcode() == Instruction::Sub &&
+ (I->hasOneUse() || match(I->getOperand(0), m_ImmConstant()))) {
+ // `sub` is always negatible.
+ // However, only do this either if the old `sub` doesn't stick around, or
+ // it was subtracting from a constant. Otherwise, this isn't profitable.
+ return Builder.CreateSub(I->getOperand(1), I->getOperand(0),
+ I->getName() + ".neg");
+ }
+
// Some other cases, while still don't require recursion,
// are restricted to the one-use case.
if (!V->hasOneUse())
return nullptr;
switch (I->getOpcode()) {
- case Instruction::Sub:
- // `sub` is always negatible.
- // But if the old `sub` sticks around, even thought we don't increase
- // instruction count, this is a likely regression since we increased
- // live-range of *both* of the operands, which might lead to more spilling.
- return Builder.CreateSub(I->getOperand(1), I->getOperand(0),
- I->getName() + ".neg");
case Instruction::SDiv:
// `sdiv` is negatible if divisor is not undef/INT_MIN/1.
// While this is normally not behind a use-check,
// let's consider division to be special since it's costly.
if (auto *Op1C = dyn_cast<Constant>(I->getOperand(1))) {
- if (!Op1C->containsUndefElement() && Op1C->isNotMinSignedValue() &&
- Op1C->isNotOneValue()) {
+ if (!Op1C->containsUndefOrPoisonElement() &&
+ Op1C->isNotMinSignedValue() && Op1C->isNotOneValue()) {
Value *BO =
Builder.CreateSDiv(I->getOperand(0), ConstantExpr::getNeg(Op1C),
I->getName() + ".neg");
@@ -237,6 +261,13 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
}
switch (I->getOpcode()) {
+ case Instruction::Freeze: {
+ // `freeze` is negatible if its operand is negatible.
+ Value *NegOp = negate(I->getOperand(0), Depth + 1);
+ if (!NegOp) // Early return.
+ return nullptr;
+ return Builder.CreateFreeze(NegOp, I->getName() + ".neg");
+ }
case Instruction::PHI: {
// `phi` is negatible if all the incoming values are negatible.
auto *PHI = cast<PHINode>(I);
@@ -254,20 +285,16 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
return NegatedPHI;
}
case Instruction::Select: {
- {
- // `abs`/`nabs` is always negatible.
- Value *LHS, *RHS;
- SelectPatternFlavor SPF =
- matchSelectPattern(I, LHS, RHS, /*CastOp=*/nullptr, Depth).Flavor;
- if (SPF == SPF_ABS || SPF == SPF_NABS) {
- auto *NewSelect = cast<SelectInst>(I->clone());
- // Just swap the operands of the select.
- NewSelect->swapValues();
- // Don't swap prof metadata, we didn't change the branch behavior.
- NewSelect->setName(I->getName() + ".neg");
- Builder.Insert(NewSelect);
- return NewSelect;
- }
+ if (isKnownNegation(I->getOperand(1), I->getOperand(2))) {
+ // Of one hand of select is known to be negation of another hand,
+ // just swap the hands around.
+ auto *NewSelect = cast<SelectInst>(I->clone());
+ // Just swap the operands of the select.
+ NewSelect->swapValues();
+ // Don't swap prof metadata, we didn't change the branch behavior.
+ NewSelect->setName(I->getName() + ".neg");
+ Builder.Insert(NewSelect);
+ return NewSelect;
}
// `select` is negatible if both hands of `select` are negatible.
Value *NegOp1 = negate(I->getOperand(1), Depth + 1);
@@ -323,51 +350,81 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
}
case Instruction::Shl: {
// `shl` is negatible if the first operand is negatible.
- Value *NegOp0 = negate(I->getOperand(0), Depth + 1);
- if (!NegOp0) // Early return.
+ if (Value *NegOp0 = negate(I->getOperand(0), Depth + 1))
+ return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg");
+ // Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<<C`.
+ auto *Op1C = dyn_cast<Constant>(I->getOperand(1));
+ if (!Op1C) // Early return.
return nullptr;
- return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg");
+ return Builder.CreateMul(
+ I->getOperand(0),
+ ConstantExpr::getShl(Constant::getAllOnesValue(Op1C->getType()), Op1C),
+ I->getName() + ".neg");
}
- case Instruction::Or:
+ case Instruction::Or: {
if (!haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL, &AC, I,
&DT))
return nullptr; // Don't know how to handle `or` in general.
+ std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I);
// `or`/`add` are interchangeable when operands have no common bits set.
// `inc` is always negatible.
- if (match(I->getOperand(1), m_One()))
- return Builder.CreateNot(I->getOperand(0), I->getName() + ".neg");
+ if (match(Ops[1], m_One()))
+ return Builder.CreateNot(Ops[0], I->getName() + ".neg");
// Else, just defer to Instruction::Add handling.
LLVM_FALLTHROUGH;
+ }
case Instruction::Add: {
// `add` is negatible if both of its operands are negatible.
- Value *NegOp0 = negate(I->getOperand(0), Depth + 1);
- if (!NegOp0) // Early return.
- return nullptr;
- Value *NegOp1 = negate(I->getOperand(1), Depth + 1);
- if (!NegOp1)
+ SmallVector<Value *, 2> NegatedOps, NonNegatedOps;
+ for (Value *Op : I->operands()) {
+ // Can we sink the negation into this operand?
+ if (Value *NegOp = negate(Op, Depth + 1)) {
+ NegatedOps.emplace_back(NegOp); // Successfully negated operand!
+ continue;
+ }
+ // Failed to sink negation into this operand. IFF we started from negation
+ // and we manage to sink negation into one operand, we can still do this.
+ if (!IsTrulyNegation)
+ return nullptr;
+ NonNegatedOps.emplace_back(Op); // Just record which operand that was.
+ }
+ assert((NegatedOps.size() + NonNegatedOps.size()) == 2 &&
+ "Internal consistency sanity check.");
+ // Did we manage to sink negation into both of the operands?
+ if (NegatedOps.size() == 2) // Then we get to keep the `add`!
+ return Builder.CreateAdd(NegatedOps[0], NegatedOps[1],
+ I->getName() + ".neg");
+ assert(IsTrulyNegation && "We should have early-exited then.");
+ // Completely failed to sink negation?
+ if (NonNegatedOps.size() == 2)
return nullptr;
- return Builder.CreateAdd(NegOp0, NegOp1, I->getName() + ".neg");
+ // 0-(a+b) --> (-a)-b
+ return Builder.CreateSub(NegatedOps[0], NonNegatedOps[0],
+ I->getName() + ".neg");
}
- case Instruction::Xor:
+ case Instruction::Xor: {
+ std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I);
// `xor` is negatible if one of its operands is invertible.
// FIXME: InstCombineInverter? But how to connect Inverter and Negator?
- if (auto *C = dyn_cast<Constant>(I->getOperand(1))) {
- Value *Xor = Builder.CreateXor(I->getOperand(0), ConstantExpr::getNot(C));
+ if (auto *C = dyn_cast<Constant>(Ops[1])) {
+ Value *Xor = Builder.CreateXor(Ops[0], ConstantExpr::getNot(C));
return Builder.CreateAdd(Xor, ConstantInt::get(Xor->getType(), 1),
I->getName() + ".neg");
}
return nullptr;
+ }
case Instruction::Mul: {
+ std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I);
// `mul` is negatible if one of its operands is negatible.
Value *NegatedOp, *OtherOp;
// First try the second operand, in case it's a constant it will be best to
// just invert it instead of sinking the `neg` deeper.
- if (Value *NegOp1 = negate(I->getOperand(1), Depth + 1)) {
+ if (Value *NegOp1 = negate(Ops[1], Depth + 1)) {
NegatedOp = NegOp1;
- OtherOp = I->getOperand(0);
- } else if (Value *NegOp0 = negate(I->getOperand(0), Depth + 1)) {
+ OtherOp = Ops[0];
+ } else if (Value *NegOp0 = negate(Ops[0], Depth + 1)) {
NegatedOp = NegOp0;
- OtherOp = I->getOperand(1);
+ OtherOp = Ops[1];
} else
// Can't negate either of them.
return nullptr;
@@ -430,7 +487,7 @@ LLVM_NODISCARD Optional<Negator::Result> Negator::run(Value *Root) {
}
LLVM_NODISCARD Value *Negator::Negate(bool LHSIsZero, Value *Root,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
++NegatorTotalNegationsAttempted;
LLVM_DEBUG(dbgs() << "Negator: attempting to sink negation into " << *Root
<< "\n");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 2b2f2e1b9470..b211b0813611 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -13,11 +13,14 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -27,10 +30,16 @@ static cl::opt<unsigned>
MaxNumPhis("instcombine-max-num-phis", cl::init(512),
cl::desc("Maximum number phis to handle in intptr/ptrint folding"));
+STATISTIC(NumPHIsOfInsertValues,
+ "Number of phi-of-insertvalue turned into insertvalue-of-phis");
+STATISTIC(NumPHIsOfExtractValues,
+ "Number of phi-of-extractvalue turned into extractvalue-of-phi");
+STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd");
+
/// The PHI arguments will be folded into a single operation with a PHI node
/// as input. The debug location of the single operation will be the merged
/// locations of the original PHI node arguments.
-void InstCombiner::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) {
+void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) {
auto *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
Inst->setDebugLoc(FirstInst->getDebugLoc());
// We do not expect a CallInst here, otherwise, N-way merging of DebugLoc
@@ -93,7 +102,7 @@ void InstCombiner::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) {
// ptr_val_inc = ...
// ...
//
-Instruction *InstCombiner::FoldIntegerTypedPHI(PHINode &PN) {
+Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
if (!PN.getType()->isIntegerTy())
return nullptr;
if (!PN.hasOneUse())
@@ -290,9 +299,86 @@ Instruction *InstCombiner::FoldIntegerTypedPHI(PHINode &PN) {
IntToPtr->getOperand(0)->getType());
}
+/// If we have something like phi [insertvalue(a,b,0), insertvalue(c,d,0)],
+/// turn this into a phi[a,c] and phi[b,d] and a single insertvalue.
+Instruction *
+InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) {
+ auto *FirstIVI = cast<InsertValueInst>(PN.getIncomingValue(0));
+
+ // Scan to see if all operands are `insertvalue`'s with the same indicies,
+ // and all have a single use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ auto *I = dyn_cast<InsertValueInst>(PN.getIncomingValue(i));
+ if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices())
+ return nullptr;
+ }
+
+ // For each operand of an `insertvalue`
+ std::array<PHINode *, 2> NewOperands;
+ for (int OpIdx : {0, 1}) {
+ auto *&NewOperand = NewOperands[OpIdx];
+ // Create a new PHI node to receive the values the operand has in each
+ // incoming basic block.
+ NewOperand = PHINode::Create(
+ FirstIVI->getOperand(OpIdx)->getType(), PN.getNumIncomingValues(),
+ FirstIVI->getOperand(OpIdx)->getName() + ".pn");
+ // And populate each operand's PHI with said values.
+ for (auto Incoming : zip(PN.blocks(), PN.incoming_values()))
+ NewOperand->addIncoming(
+ cast<InsertValueInst>(std::get<1>(Incoming))->getOperand(OpIdx),
+ std::get<0>(Incoming));
+ InsertNewInstBefore(NewOperand, PN);
+ }
+
+ // And finally, create `insertvalue` over the newly-formed PHI nodes.
+ auto *NewIVI = InsertValueInst::Create(NewOperands[0], NewOperands[1],
+ FirstIVI->getIndices(), PN.getName());
+
+ PHIArgMergedDebugLoc(NewIVI, PN);
+ ++NumPHIsOfInsertValues;
+ return NewIVI;
+}
+
+/// If we have something like phi [extractvalue(a,0), extractvalue(b,0)],
+/// turn this into a phi[a,b] and a single extractvalue.
+Instruction *
+InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) {
+ auto *FirstEVI = cast<ExtractValueInst>(PN.getIncomingValue(0));
+
+ // Scan to see if all operands are `extractvalue`'s with the same indicies,
+ // and all have a single use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ auto *I = dyn_cast<ExtractValueInst>(PN.getIncomingValue(i));
+ if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() ||
+ I->getAggregateOperand()->getType() !=
+ FirstEVI->getAggregateOperand()->getType())
+ return nullptr;
+ }
+
+ // Create a new PHI node to receive the values the aggregate operand has
+ // in each incoming basic block.
+ auto *NewAggregateOperand = PHINode::Create(
+ FirstEVI->getAggregateOperand()->getType(), PN.getNumIncomingValues(),
+ FirstEVI->getAggregateOperand()->getName() + ".pn");
+ // And populate the PHI with said values.
+ for (auto Incoming : zip(PN.blocks(), PN.incoming_values()))
+ NewAggregateOperand->addIncoming(
+ cast<ExtractValueInst>(std::get<1>(Incoming))->getAggregateOperand(),
+ std::get<0>(Incoming));
+ InsertNewInstBefore(NewAggregateOperand, PN);
+
+ // And finally, create `extractvalue` over the newly-formed PHI nodes.
+ auto *NewEVI = ExtractValueInst::Create(NewAggregateOperand,
+ FirstEVI->getIndices(), PN.getName());
+
+ PHIArgMergedDebugLoc(NewEVI, PN);
+ ++NumPHIsOfExtractValues;
+ return NewEVI;
+}
+
/// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the
-/// adds all have a single use, turn this into a phi and a single binop.
-Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
+/// adds all have a single user, turn this into a phi and a single binop.
+Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
unsigned Opc = FirstInst->getOpcode();
@@ -302,10 +388,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
Type *LHSType = LHSVal->getType();
Type *RHSType = RHSVal->getType();
- // Scan to see if all operands are the same opcode, and all have one use.
+ // Scan to see if all operands are the same opcode, and all have one user.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
- if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+ if (!I || I->getOpcode() != Opc || !I->hasOneUser() ||
// Verify type of the LHS matches so we don't fold cmp's of different
// types.
I->getOperand(0)->getType() != LHSType ||
@@ -385,7 +471,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
return NewBinOp;
}
-Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
+Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
@@ -401,11 +487,12 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
bool AllInBounds = true;
- // Scan to see if all operands are the same opcode, and all have one use.
+ // Scan to see if all operands are the same opcode, and all have one user.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
- if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
- GEP->getNumOperands() != FirstInst->getNumOperands())
+ GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() ||
+ GEP->getNumOperands() != FirstInst->getNumOperands())
return nullptr;
AllInBounds &= GEP->isInBounds();
@@ -494,7 +581,6 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
return NewGEP;
}
-
/// Return true if we know that it is safe to sink the load out of the block
/// that defines it. This means that it must be obvious the value of the load is
/// not changed from the point of the load to the end of the block it is in.
@@ -506,8 +592,14 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
for (++BBI; BBI != E; ++BBI)
- if (BBI->mayWriteToMemory())
+ if (BBI->mayWriteToMemory()) {
+ // Calls that only access inaccessible memory do not block sinking the
+ // load.
+ if (auto *CB = dyn_cast<CallBase>(BBI))
+ if (CB->onlyAccessesInaccessibleMemory())
+ continue;
return false;
+ }
// Check for non-address taken alloca. If not address-taken already, it isn't
// profitable to do this xform.
@@ -540,7 +632,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
return true;
}
-Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
+Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
// FIXME: This is overconservative; this transform is allowed in some cases
@@ -573,7 +665,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
- if (!LI || !LI->hasOneUse())
+ if (!LI || !LI->hasOneUser())
return nullptr;
// We can't sink the load if the loaded value could be modified between
@@ -654,7 +746,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
/// TODO: This function could handle other cast types, but then it might
/// require special-casing a cast from the 'i1' type. See the comment in
/// FoldPHIArgOpIntoPHI() about pessimizing illegal integer types.
-Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
+Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) {
// We cannot create a new instruction after the PHI if the terminator is an
// EHPad because there is no valid insertion point.
if (Instruction *TI = Phi.getParent()->getTerminator())
@@ -686,8 +778,8 @@ Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
unsigned NumConsts = 0;
for (Value *V : Phi.incoming_values()) {
if (auto *Zext = dyn_cast<ZExtInst>(V)) {
- // All zexts must be identical and have one use.
- if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUse())
+ // All zexts must be identical and have one user.
+ if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUser())
return nullptr;
NewIncoming.push_back(Zext->getOperand(0));
NumZexts++;
@@ -728,7 +820,7 @@ Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
/// If all operands to a PHI node are the same "unary" operator and they all are
/// only used by the PHI, PHI together their inputs, and do the operation once,
/// to the result of the PHI.
-Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
// We cannot create a new instruction after the PHI if the terminator is an
// EHPad because there is no valid insertion point.
if (Instruction *TI = PN.getParent()->getTerminator())
@@ -738,9 +830,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
if (isa<GetElementPtrInst>(FirstInst))
- return FoldPHIArgGEPIntoPHI(PN);
+ return foldPHIArgGEPIntoPHI(PN);
if (isa<LoadInst>(FirstInst))
- return FoldPHIArgLoadIntoPHI(PN);
+ return foldPHIArgLoadIntoPHI(PN);
+ if (isa<InsertValueInst>(FirstInst))
+ return foldPHIArgInsertValueInstructionIntoPHI(PN);
+ if (isa<ExtractValueInst>(FirstInst))
+ return foldPHIArgExtractValueInstructionIntoPHI(PN);
// Scan the instruction, looking for input operations that can be folded away.
// If all input operands to the phi are the same instruction (e.g. a cast from
@@ -763,7 +859,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// otherwise call FoldPHIArgBinOpIntoPHI.
ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
if (!ConstantOp)
- return FoldPHIArgBinOpIntoPHI(PN);
+ return foldPHIArgBinOpIntoPHI(PN);
} else {
return nullptr; // Cannot fold this operation.
}
@@ -771,7 +867,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
- if (!I || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+ if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst))
return nullptr;
if (CastSrcTy) {
if (I->getOperand(0)->getType() != CastSrcTy)
@@ -923,7 +1019,7 @@ struct LoweredPHIRecord {
LoweredPHIRecord(PHINode *pn, unsigned Sh)
: PN(pn), Shift(Sh), Width(0) {}
};
-}
+} // namespace
namespace llvm {
template<>
@@ -944,7 +1040,7 @@ namespace llvm {
LHS.Width == RHS.Width;
}
};
-}
+} // namespace llvm
/// This is an integer PHI and we know that it has an illegal type: see if it is
@@ -955,7 +1051,7 @@ namespace llvm {
/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
/// inttoptr. We should produce new PHIs in the right type.
///
-Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHIUsers - Keep track of all of the truncated values extracted from a set
// of PHIs, along with their offset. These are the things we want to rewrite.
SmallVector<PHIUsageRecord, 16> PHIUsers;
@@ -1129,13 +1225,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
return replaceInstUsesWith(FirstPhi, Undef);
}
+static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
+ const DominatorTree &DT) {
+ // Simplify the following patterns:
+ // if (cond)
+ // / \
+ // ... ...
+ // \ /
+ // phi [true] [false]
+ if (!PN.getType()->isIntegerTy(1))
+ return nullptr;
+
+ if (PN.getNumOperands() != 2)
+ return nullptr;
+
+ // Make sure all inputs are constants.
+ if (!all_of(PN.operands(), [](Value *V) { return isa<ConstantInt>(V); }))
+ return nullptr;
+
+ BasicBlock *BB = PN.getParent();
+ // Do not bother with unreachable instructions.
+ if (!DT.isReachableFromEntry(BB))
+ return nullptr;
+
+ // Same inputs.
+ if (PN.getOperand(0) == PN.getOperand(1))
+ return PN.getOperand(0);
+
+ BasicBlock *TruePred = nullptr, *FalsePred = nullptr;
+ for (auto *Pred : predecessors(BB)) {
+ auto *Input = cast<ConstantInt>(PN.getIncomingValueForBlock(Pred));
+ if (Input->isAllOnesValue())
+ TruePred = Pred;
+ else
+ FalsePred = Pred;
+ }
+ assert(TruePred && FalsePred && "Must be!");
+
+ // Check which edge of the dominator dominates the true input. If it is the
+ // false edge, we should invert the condition.
+ auto *IDom = DT.getNode(BB)->getIDom()->getBlock();
+ auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
+ if (!BI || BI->isUnconditional())
+ return nullptr;
+
+ // Check that edges outgoing from the idom's terminators dominate respective
+ // inputs of the Phi.
+ BasicBlockEdge TrueOutEdge(IDom, BI->getSuccessor(0));
+ BasicBlockEdge FalseOutEdge(IDom, BI->getSuccessor(1));
+
+ BasicBlockEdge TrueIncEdge(TruePred, BB);
+ BasicBlockEdge FalseIncEdge(FalsePred, BB);
+
+ auto *Cond = BI->getCondition();
+ if (DT.dominates(TrueOutEdge, TrueIncEdge) &&
+ DT.dominates(FalseOutEdge, FalseIncEdge))
+ // This Phi is actually equivalent to branching condition of IDom.
+ return Cond;
+ else if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
+ DT.dominates(FalseOutEdge, TrueIncEdge)) {
+ // This Phi is actually opposite to branching condition of IDom. We invert
+ // the condition that will potentially open up some opportunities for
+ // sinking.
+ auto InsertPt = BB->getFirstInsertionPt();
+ if (InsertPt != BB->end()) {
+ Self.Builder.SetInsertPoint(&*InsertPt);
+ return Self.Builder.CreateNot(Cond);
+ }
+ }
+
+ return nullptr;
+}
+
// PHINode simplification
//
-Instruction *InstCombiner::visitPHINode(PHINode &PN) {
+Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
if (Value *V = SimplifyInstruction(&PN, SQ.getWithInstruction(&PN)))
return replaceInstUsesWith(PN, V);
- if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
+ if (Instruction *Result = foldPHIArgZextsIntoPHI(PN))
return Result;
// If all PHI operands are the same operation, pull them through the PHI,
@@ -1143,18 +1311,16 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (isa<Instruction>(PN.getIncomingValue(0)) &&
isa<Instruction>(PN.getIncomingValue(1)) &&
cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
- cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
- // FIXME: The hasOneUse check will fail for PHIs that use the value more
- // than themselves more than once.
- PN.getIncomingValue(0)->hasOneUse())
- if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
+ cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
+ PN.getIncomingValue(0)->hasOneUser())
+ if (Instruction *Result = foldPHIArgOpIntoPHI(PN))
return Result;
// If this is a trivial cycle in the PHI node graph, remove it. Basically, if
// this PHI only has a single use (a PHI), and if that PHI only has one use (a
// PHI)... break the cycle.
if (PN.hasOneUse()) {
- if (Instruction *Result = FoldIntegerTypedPHI(PN))
+ if (Instruction *Result = foldIntegerTypedPHI(PN))
return Result;
Instruction *PHIUser = cast<Instruction>(PN.user_back());
@@ -1267,6 +1433,21 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
}
}
+ // Is there an identical PHI node in this basic block?
+ for (PHINode &IdenticalPN : PN.getParent()->phis()) {
+ // Ignore the PHI node itself.
+ if (&IdenticalPN == &PN)
+ continue;
+ // Note that even though we've just canonicalized this PHI, due to the
+ // worklist visitation order, there are no guarantess that *every* PHI
+ // has been canonicalized, so we can't just compare operands ranges.
+ if (!PN.isIdenticalToWhenDefined(&IdenticalPN))
+ continue;
+ // Just use that PHI instead then.
+ ++NumPHICSEs;
+ return replaceInstUsesWith(PN, &IdenticalPN);
+ }
+
// If this is an integer PHI and we know that it has an illegal type, see if
// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is
@@ -1276,5 +1457,9 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
+ // Ultimately, try to replace this Phi with a dominating condition.
+ if (auto *V = SimplifyUsingControlFlow(*this, PN, DT))
+ return replaceInstUsesWith(PN, V);
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 1e43014e7d32..5f174aae09ec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -38,6 +38,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
#include <utility>
@@ -46,6 +47,11 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
+/// FIXME: Enabled by default until the pattern is supported well.
+static cl::opt<bool> EnableUnsafeSelectTransform(
+ "instcombine-unsafe-select-transform", cl::init(true),
+ cl::desc("Enable poison-unsafe select to and/or transform"));
+
static Value *createMinMax(InstCombiner::BuilderTy &Builder,
SelectPatternFlavor SPF, Value *A, Value *B) {
CmpInst::Predicate Pred = getMinMaxPred(SPF);
@@ -57,7 +63,7 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder,
/// constant of a binop.
static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,
const TargetLibraryInfo &TLI,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
// The select condition must be an equality compare with a constant operand.
Value *X;
Constant *C;
@@ -258,29 +264,9 @@ static unsigned getSelectFoldableOperands(BinaryOperator *I) {
}
}
-/// For the same transformation as the previous function, return the identity
-/// constant that goes into the select.
-static APInt getSelectFoldableConstant(BinaryOperator *I) {
- switch (I->getOpcode()) {
- default: llvm_unreachable("This cannot happen!");
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- return APInt::getNullValue(I->getType()->getScalarSizeInBits());
- case Instruction::And:
- return APInt::getAllOnesValue(I->getType()->getScalarSizeInBits());
- case Instruction::Mul:
- return APInt(I->getType()->getScalarSizeInBits(), 1);
- }
-}
-
/// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
-Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
- Instruction *FI) {
+Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI) {
// Don't break up min/max patterns. The hasOneUse checks below prevent that
// for most cases, but vector min/max with bitcasts can be transformed. If the
// one-use restrictions are eased for other patterns, we still don't want to
@@ -302,10 +288,9 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
if (auto *CondVTy = dyn_cast<VectorType>(CondTy)) {
- if (!FIOpndTy->isVectorTy())
- return nullptr;
- if (CondVTy->getNumElements() !=
- cast<VectorType>(FIOpndTy)->getNumElements())
+ if (!FIOpndTy->isVectorTy() ||
+ CondVTy->getElementCount() !=
+ cast<VectorType>(FIOpndTy)->getElementCount())
return nullptr;
// TODO: If the backend knew how to deal with casts better, we could
@@ -418,8 +403,8 @@ static bool isSelect01(const APInt &C1I, const APInt &C2I) {
/// Try to fold the select into one of the operands to allow further
/// optimization.
-Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
- Value *FalseVal) {
+Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+ Value *FalseVal) {
// See the comment above GetSelectFoldableOperands for a description of the
// transformation we are doing here.
if (auto *TVI = dyn_cast<BinaryOperator>(TrueVal)) {
@@ -433,14 +418,15 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
}
if (OpToFold) {
- APInt CI = getSelectFoldableConstant(TVI);
+ Constant *C = ConstantExpr::getBinOpIdentity(TVI->getOpcode(),
+ TVI->getType(), true);
Value *OOp = TVI->getOperand(2-OpToFold);
// Avoid creating select between 2 constants unless it's selecting
// between 0, 1 and -1.
const APInt *OOpC;
bool OOpIsAPInt = match(OOp, m_APInt(OOpC));
- if (!isa<Constant>(OOp) || (OOpIsAPInt && isSelect01(CI, *OOpC))) {
- Value *C = ConstantInt::get(OOp->getType(), CI);
+ if (!isa<Constant>(OOp) ||
+ (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) {
Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C);
NewSel->takeName(TVI);
BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(),
@@ -464,14 +450,15 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
}
if (OpToFold) {
- APInt CI = getSelectFoldableConstant(FVI);
+ Constant *C = ConstantExpr::getBinOpIdentity(FVI->getOpcode(),
+ FVI->getType(), true);
Value *OOp = FVI->getOperand(2-OpToFold);
// Avoid creating select between 2 constants unless it's selecting
// between 0, 1 and -1.
const APInt *OOpC;
bool OOpIsAPInt = match(OOp, m_APInt(OOpC));
- if (!isa<Constant>(OOp) || (OOpIsAPInt && isSelect01(CI, *OOpC))) {
- Value *C = ConstantInt::get(OOp->getType(), CI);
+ if (!isa<Constant>(OOp) ||
+ (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) {
Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp);
NewSel->takeName(FVI);
BinaryOperator *BO = BinaryOperator::Create(FVI->getOpcode(),
@@ -1026,9 +1013,9 @@ static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) {
/// select (icmp Pred X, C1), C2, X --> select (icmp Pred' X, C2), X, C2
/// Note: if C1 != C2, this will change the icmp constant to the existing
/// constant operand of the select.
-static Instruction *
-canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
- InstCombiner &IC) {
+static Instruction *canonicalizeMinMaxWithConstant(SelectInst &Sel,
+ ICmpInst &Cmp,
+ InstCombinerImpl &IC) {
if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1)))
return nullptr;
@@ -1065,89 +1052,29 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
return &Sel;
}
-/// There are many select variants for each of ABS/NABS.
-/// In matchSelectPattern(), there are different compare constants, compare
-/// predicates/operands and select operands.
-/// In isKnownNegation(), there are different formats of negated operands.
-/// Canonicalize all these variants to 1 pattern.
-/// This makes CSE more likely.
static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1)))
return nullptr;
- // Choose a sign-bit check for the compare (likely simpler for codegen).
- // ABS: (X <s 0) ? -X : X
- // NABS: (X <s 0) ? X : -X
Value *LHS, *RHS;
SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor;
if (SPF != SelectPatternFlavor::SPF_ABS &&
SPF != SelectPatternFlavor::SPF_NABS)
return nullptr;
- Value *TVal = Sel.getTrueValue();
- Value *FVal = Sel.getFalseValue();
- assert(isKnownNegation(TVal, FVal) &&
- "Unexpected result from matchSelectPattern");
-
- // The compare may use the negated abs()/nabs() operand, or it may use
- // negation in non-canonical form such as: sub A, B.
- bool CmpUsesNegatedOp = match(Cmp.getOperand(0), m_Neg(m_Specific(TVal))) ||
- match(Cmp.getOperand(0), m_Neg(m_Specific(FVal)));
+ // Note that NSW flag can only be propagated for normal, non-negated abs!
+ bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS &&
+ match(RHS, m_NSWNeg(m_Specific(LHS)));
+ Constant *IntMinIsPoisonC =
+ ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison);
+ Instruction *Abs =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC);
- bool CmpCanonicalized = !CmpUsesNegatedOp &&
- match(Cmp.getOperand(1), m_ZeroInt()) &&
- Cmp.getPredicate() == ICmpInst::ICMP_SLT;
- bool RHSCanonicalized = match(RHS, m_Neg(m_Specific(LHS)));
+ if (SPF == SelectPatternFlavor::SPF_NABS)
+ return BinaryOperator::CreateNeg(Abs); // Always without NSW flag!
- // Is this already canonical?
- if (CmpCanonicalized && RHSCanonicalized)
- return nullptr;
-
- // If RHS is not canonical but is used by other instructions, don't
- // canonicalize it and potentially increase the instruction count.
- if (!RHSCanonicalized)
- if (!(RHS->hasOneUse() || (RHS->hasNUses(2) && CmpUsesNegatedOp)))
- return nullptr;
-
- // Create the canonical compare: icmp slt LHS 0.
- if (!CmpCanonicalized) {
- Cmp.setPredicate(ICmpInst::ICMP_SLT);
- Cmp.setOperand(1, ConstantInt::getNullValue(Cmp.getOperand(0)->getType()));
- if (CmpUsesNegatedOp)
- Cmp.setOperand(0, LHS);
- }
-
- // Create the canonical RHS: RHS = sub (0, LHS).
- if (!RHSCanonicalized) {
- assert(RHS->hasOneUse() && "RHS use number is not right");
- RHS = IC.Builder.CreateNeg(LHS);
- if (TVal == LHS) {
- // Replace false value.
- IC.replaceOperand(Sel, 2, RHS);
- FVal = RHS;
- } else {
- // Replace true value.
- IC.replaceOperand(Sel, 1, RHS);
- TVal = RHS;
- }
- }
-
- // If the select operands do not change, we're done.
- if (SPF == SelectPatternFlavor::SPF_NABS) {
- if (TVal == LHS)
- return &Sel;
- assert(FVal == LHS && "Unexpected results from matchSelectPattern");
- } else {
- if (FVal == LHS)
- return &Sel;
- assert(TVal == LHS && "Unexpected results from matchSelectPattern");
- }
-
- // We are swapping the select operands, so swap the metadata too.
- Sel.swapValues();
- Sel.swapProfMetadata();
- return &Sel;
+ return IC.replaceInstUsesWith(Sel, Abs);
}
/// If we have a select with an equality comparison, then we know the value in
@@ -1166,15 +1093,55 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
///
/// We can't replace %sel with %add unless we strip away the flags.
/// TODO: Wrapping flags could be preserved in some cases with better analysis.
-static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
- const SimplifyQuery &Q) {
- if (!Cmp.isEquality())
+Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
+ ICmpInst &Cmp) {
+ // Value equivalence substitution requires an all-or-nothing replacement.
+ // It does not make sense for a vector compare where each lane is chosen
+ // independently.
+ if (!Cmp.isEquality() || Cmp.getType()->isVectorTy())
return nullptr;
// Canonicalize the pattern to ICMP_EQ by swapping the select operands.
Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();
- if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
+ bool Swapped = false;
+ if (Cmp.getPredicate() == ICmpInst::ICMP_NE) {
std::swap(TrueVal, FalseVal);
+ Swapped = true;
+ }
+
+ // In X == Y ? f(X) : Z, try to evaluate f(Y) and replace the operand.
+ // Make sure Y cannot be undef though, as we might pick different values for
+ // undef in the icmp and in f(Y). Additionally, take care to avoid replacing
+ // X == Y ? X : Z with X == Y ? Y : Z, as that would lead to an infinite
+ // replacement cycle.
+ Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
+ if (TrueVal != CmpLHS &&
+ isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) {
+ if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ,
+ /* AllowRefinement */ true))
+ return replaceOperand(Sel, Swapped ? 2 : 1, V);
+
+ // Even if TrueVal does not simplify, we can directly replace a use of
+ // CmpLHS with CmpRHS, as long as the instruction is not used anywhere
+ // else and is safe to speculatively execute (we may end up executing it
+ // with different operands, which should not cause side-effects or trigger
+ // undefined behavior). Only do this if CmpRHS is a constant, as
+ // profitability is not clear for other cases.
+ // FIXME: The replacement could be performed recursively.
+ if (match(CmpRHS, m_ImmConstant()) && !match(CmpLHS, m_ImmConstant()))
+ if (auto *I = dyn_cast<Instruction>(TrueVal))
+ if (I->hasOneUse() && isSafeToSpeculativelyExecute(I))
+ for (Use &U : I->operands())
+ if (U == CmpLHS) {
+ replaceUse(U, CmpRHS);
+ return &Sel;
+ }
+ }
+ if (TrueVal != CmpRHS &&
+ isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT))
+ if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ,
+ /* AllowRefinement */ true))
+ return replaceOperand(Sel, Swapped ? 2 : 1, V);
auto *FalseInst = dyn_cast<Instruction>(FalseVal);
if (!FalseInst)
@@ -1183,7 +1150,7 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
// InstSimplify already performed this fold if it was possible subject to
// current poison-generating flags. Try the transform again with
// poison-generating flags temporarily dropped.
- bool WasNUW = false, WasNSW = false, WasExact = false;
+ bool WasNUW = false, WasNSW = false, WasExact = false, WasInBounds = false;
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(FalseVal)) {
WasNUW = OBO->hasNoUnsignedWrap();
WasNSW = OBO->hasNoSignedWrap();
@@ -1194,17 +1161,20 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
WasExact = PEO->isExact();
FalseInst->setIsExact(false);
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(FalseVal)) {
+ WasInBounds = GEP->isInBounds();
+ GEP->setIsInBounds(false);
+ }
// Try each equivalence substitution possibility.
// We have an 'EQ' comparison, so the select's false value will propagate.
// Example:
// (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1
- Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ,
/* AllowRefinement */ false) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q,
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ,
/* AllowRefinement */ false) == TrueVal) {
- return FalseVal;
+ return replaceInstUsesWith(Sel, FalseVal);
}
// Restore poison-generating flags if the transform did not apply.
@@ -1214,6 +1184,8 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
FalseInst->setHasNoSignedWrap();
if (WasExact)
FalseInst->setIsExact();
+ if (WasInBounds)
+ cast<GetElementPtrInst>(FalseInst)->setIsInBounds();
return nullptr;
}
@@ -1264,7 +1236,7 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
APInt::getAllOnesValue(
C0->getType()->getScalarSizeInBits()))))
return nullptr; // Can't do, have all-ones element[s].
- C0 = AddOne(C0);
+ C0 = InstCombiner::AddOne(C0);
std::swap(X, Sel1);
break;
case ICmpInst::Predicate::ICMP_UGE:
@@ -1324,7 +1296,7 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
APInt::getSignedMaxValue(
C2->getType()->getScalarSizeInBits()))))
return nullptr; // Can't do, have signed max element[s].
- C2 = AddOne(C2);
+ C2 = InstCombiner::AddOne(C2);
LLVM_FALLTHROUGH;
case ICmpInst::Predicate::ICMP_SGE:
// Also non-canonical, but here we don't need to change C2,
@@ -1371,7 +1343,7 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// and swap the hands of select.
static Instruction *
tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
ICmpInst::Predicate Pred;
Value *X;
Constant *C0;
@@ -1386,7 +1358,7 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
// If comparison predicate is non-canonical, then we certainly won't be able
// to make it canonical; canonicalizeCmpWithConstant() already tried.
- if (!isCanonicalPredicate(Pred))
+ if (!InstCombiner::isCanonicalPredicate(Pred))
return nullptr;
// If the [input] type of comparison and select type are different, lets abort
@@ -1414,7 +1386,8 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
return nullptr;
// Check the constant we'd have with flipped-strictness predicate.
- auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, C0);
+ auto FlippedStrictness =
+ InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, C0);
if (!FlippedStrictness)
return nullptr;
@@ -1437,10 +1410,10 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
}
/// Visit a SelectInst that has an ICmpInst as its first operand.
-Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
- ICmpInst *ICI) {
- if (Value *V = foldSelectValueEquivalence(SI, *ICI, SQ))
- return replaceInstUsesWith(SI, V);
+Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
+ ICmpInst *ICI) {
+ if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI))
+ return NewSel;
if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this))
return NewSel;
@@ -1590,11 +1563,11 @@ static bool canSelectOperandBeMappingIntoPredBlock(const Value *V,
/// We have an SPF (e.g. a min or max) of an SPF of the form:
/// SPF2(SPF1(A, B), C)
-Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
- SelectPatternFlavor SPF1,
- Value *A, Value *B,
- Instruction &Outer,
- SelectPatternFlavor SPF2, Value *C) {
+Instruction *InstCombinerImpl::foldSPFofSPF(Instruction *Inner,
+ SelectPatternFlavor SPF1, Value *A,
+ Value *B, Instruction &Outer,
+ SelectPatternFlavor SPF2,
+ Value *C) {
if (Outer.getType() != Inner->getType())
return nullptr;
@@ -1911,7 +1884,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
return CallInst::Create(F, {X, Y});
}
-Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
+Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) {
Constant *C;
if (!match(Sel.getTrueValue(), m_Constant(C)) &&
!match(Sel.getFalseValue(), m_Constant(C)))
@@ -1977,10 +1950,11 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Constant *CondC;
- if (!CondVal->getType()->isVectorTy() || !match(CondVal, m_Constant(CondC)))
+ auto *CondValTy = dyn_cast<FixedVectorType>(CondVal->getType());
+ if (!CondValTy || !match(CondVal, m_Constant(CondC)))
return nullptr;
- unsigned NumElts = cast<VectorType>(CondVal->getType())->getNumElements();
+ unsigned NumElts = CondValTy->getNumElements();
SmallVector<int, 16> Mask;
Mask.reserve(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
@@ -2012,8 +1986,8 @@ static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) {
/// to a vector select by splatting the condition. A splat may get folded with
/// other operations in IR and having all operands of a select be vector types
/// is likely better for vector codegen.
-static Instruction *canonicalizeScalarSelectOfVecs(
- SelectInst &Sel, InstCombiner &IC) {
+static Instruction *canonicalizeScalarSelectOfVecs(SelectInst &Sel,
+ InstCombinerImpl &IC) {
auto *Ty = dyn_cast<VectorType>(Sel.getType());
if (!Ty)
return nullptr;
@@ -2026,8 +2000,8 @@ static Instruction *canonicalizeScalarSelectOfVecs(
// select (extelt V, Index), T, F --> select (splat V, Index), T, F
// Splatting the extracted condition reduces code (we could directly create a
// splat shuffle of the source vector to eliminate the intermediate step).
- unsigned NumElts = Ty->getNumElements();
- return IC.replaceOperand(Sel, 0, IC.Builder.CreateVectorSplat(NumElts, Cond));
+ return IC.replaceOperand(
+ Sel, 0, IC.Builder.CreateVectorSplat(Ty->getElementCount(), Cond));
}
/// Reuse bitcasted operands between a compare and select:
@@ -2183,7 +2157,7 @@ static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X,
}
/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
-Instruction *InstCombiner::matchSAddSubSat(SelectInst &MinMax1) {
+Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) {
Type *Ty = MinMax1.getType();
// We are looking for a tree of:
@@ -2304,34 +2278,42 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp);
}
-/// Try to reduce a rotate pattern that includes a compare and select into a
-/// funnel shift intrinsic. Example:
+/// Try to reduce a funnel/rotate pattern that includes a compare and select
+/// into a funnel shift intrinsic. Example:
/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
/// --> call llvm.fshl.i32(a, a, b)
-static Instruction *foldSelectRotate(SelectInst &Sel) {
- // The false value of the select must be a rotate of the true value.
- Value *Or0, *Or1;
- if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1)))))
+/// fshl32(a, b, c) --> (c == 0 ? a : ((b >> (32 - c)) | (a << c)))
+/// --> call llvm.fshl.i32(a, b, c)
+/// fshr32(a, b, c) --> (c == 0 ? b : ((a >> (32 - c)) | (b << c)))
+/// --> call llvm.fshr.i32(a, b, c)
+static Instruction *foldSelectFunnelShift(SelectInst &Sel,
+ InstCombiner::BuilderTy &Builder) {
+ // This must be a power-of-2 type for a bitmasking transform to be valid.
+ unsigned Width = Sel.getType()->getScalarSizeInBits();
+ if (!isPowerOf2_32(Width))
return nullptr;
- Value *TVal = Sel.getTrueValue();
- Value *SA0, *SA1;
- if (!match(Or0, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA0)))) ||
- !match(Or1, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA1)))))
+ BinaryOperator *Or0, *Or1;
+ if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
return nullptr;
- auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
- auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
- if (ShiftOpcode0 == ShiftOpcode1)
+ Value *SV0, *SV1, *SA0, *SA1;
+ if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(SV0),
+ m_ZExtOrSelf(m_Value(SA0))))) ||
+ !match(Or1, m_OneUse(m_LogicalShift(m_Value(SV1),
+ m_ZExtOrSelf(m_Value(SA1))))) ||
+ Or0->getOpcode() == Or1->getOpcode())
return nullptr;
- // We have one of these patterns so far:
- // select ?, TVal, (or (lshr TVal, SA0), (shl TVal, SA1))
- // select ?, TVal, (or (shl TVal, SA0), (lshr TVal, SA1))
- // This must be a power-of-2 rotate for a bitmasking transform to be valid.
- unsigned Width = Sel.getType()->getScalarSizeInBits();
- if (!isPowerOf2_32(Width))
- return nullptr;
+ // Canonicalize to or(shl(SV0, SA0), lshr(SV1, SA1)).
+ if (Or0->getOpcode() == BinaryOperator::LShr) {
+ std::swap(Or0, Or1);
+ std::swap(SV0, SV1);
+ std::swap(SA0, SA1);
+ }
+ assert(Or0->getOpcode() == BinaryOperator::Shl &&
+ Or1->getOpcode() == BinaryOperator::LShr &&
+ "Illegal or(shift,shift) pair");
// Check the shift amounts to see if they are an opposite pair.
Value *ShAmt;
@@ -2342,6 +2324,15 @@ static Instruction *foldSelectRotate(SelectInst &Sel) {
else
return nullptr;
+ // We should now have this pattern:
+ // select ?, TVal, (or (shl SV0, SA0), (lshr SV1, SA1))
+ // The false value of the select must be a funnel-shift of the true value:
+ // IsFShl -> TVal must be SV0 else TVal must be SV1.
+ bool IsFshl = (ShAmt == SA0);
+ Value *TVal = Sel.getTrueValue();
+ if ((IsFshl && TVal != SV0) || (!IsFshl && TVal != SV1))
+ return nullptr;
+
// Finally, see if the select is filtering out a shift-by-zero.
Value *Cond = Sel.getCondition();
ICmpInst::Predicate Pred;
@@ -2349,13 +2340,21 @@ static Instruction *foldSelectRotate(SelectInst &Sel) {
Pred != ICmpInst::ICMP_EQ)
return nullptr;
- // This is a rotate that avoids shift-by-bitwidth UB in a suboptimal way.
+ // If this is not a rotate then the select was blocking poison from the
+ // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
+ if (SV0 != SV1) {
+ if (IsFshl && !llvm::isGuaranteedNotToBePoison(SV1))
+ SV1 = Builder.CreateFreeze(SV1);
+ else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(SV0))
+ SV0 = Builder.CreateFreeze(SV0);
+ }
+
+ // This is a funnel/rotate that avoids shift-by-bitwidth UB in a suboptimal way.
// Convert to funnel shift intrinsic.
- bool IsFshl = (ShAmt == SA0 && ShiftOpcode0 == BinaryOperator::Shl) ||
- (ShAmt == SA1 && ShiftOpcode1 == BinaryOperator::Shl);
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType());
- return IntrinsicInst::Create(F, { TVal, TVal, ShAmt });
+ ShAmt = Builder.CreateZExt(ShAmt, Sel.getType());
+ return IntrinsicInst::Create(F, { SV0, SV1, ShAmt });
}
static Instruction *foldSelectToCopysign(SelectInst &Sel,
@@ -2379,7 +2378,8 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel,
bool IsTrueIfSignSet;
ICmpInst::Predicate Pred;
if (!match(Cond, m_OneUse(m_ICmp(Pred, m_BitCast(m_Value(X)), m_APInt(C)))) ||
- !isSignBitCheck(Pred, *C, IsTrueIfSignSet) || X->getType() != SelType)
+ !InstCombiner::isSignBitCheck(Pred, *C, IsTrueIfSignSet) ||
+ X->getType() != SelType)
return nullptr;
// If needed, negate the value that will be the sign argument of the copysign:
@@ -2400,7 +2400,7 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel,
return CopySign;
}
-Instruction *InstCombiner::foldVectorSelect(SelectInst &Sel) {
+Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) {
auto *VecTy = dyn_cast<FixedVectorType>(Sel.getType());
if (!VecTy)
return nullptr;
@@ -2530,7 +2530,33 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,
return nullptr;
}
-Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
+static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
+ FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());
+ if (!FI)
+ return nullptr;
+
+ Value *Cond = FI->getOperand(0);
+ Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();
+
+ // select (freeze(x == y)), x, y --> y
+ // select (freeze(x != y)), x, y --> x
+ // The freeze should be only used by this select. Otherwise, remaining uses of
+ // the freeze can observe a contradictory value.
+ // c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 1
+ // a = select c, x, y ;
+ // f(a, c) ; f(poison, 1) cannot happen, but if a is folded
+ // ; to y, this can happen.
+ CmpInst::Predicate Pred;
+ if (FI->hasOneUse() &&
+ match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) &&
+ (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) {
+ return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal;
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
Value *FalseVal = SI.getFalseValue();
@@ -2566,38 +2592,45 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SelType->isIntOrIntVectorTy(1) &&
TrueVal->getType() == CondVal->getType()) {
- if (match(TrueVal, m_One())) {
+ if (match(TrueVal, m_One()) &&
+ (EnableUnsafeSelectTransform || impliesPoison(FalseVal, CondVal))) {
// Change: A = select B, true, C --> A = or B, C
return BinaryOperator::CreateOr(CondVal, FalseVal);
}
- if (match(TrueVal, m_Zero())) {
- // Change: A = select B, false, C --> A = and !B, C
- Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return BinaryOperator::CreateAnd(NotCond, FalseVal);
- }
- if (match(FalseVal, m_Zero())) {
+ if (match(FalseVal, m_Zero()) &&
+ (EnableUnsafeSelectTransform || impliesPoison(TrueVal, CondVal))) {
// Change: A = select B, C, false --> A = and B, C
return BinaryOperator::CreateAnd(CondVal, TrueVal);
}
+
+ // select a, false, b -> select !a, b, false
+ if (match(TrueVal, m_Zero())) {
+ Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
+ return SelectInst::Create(NotCond, FalseVal,
+ ConstantInt::getFalse(SelType));
+ }
+ // select a, b, true -> select !a, true, b
if (match(FalseVal, m_One())) {
- // Change: A = select B, C, true --> A = or !B, C
Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return BinaryOperator::CreateOr(NotCond, TrueVal);
+ return SelectInst::Create(NotCond, ConstantInt::getTrue(SelType),
+ TrueVal);
}
- // select a, a, b -> a | b
- // select a, b, a -> a & b
+ // select a, a, b -> select a, true, b
if (CondVal == TrueVal)
- return BinaryOperator::CreateOr(CondVal, FalseVal);
+ return replaceOperand(SI, 1, ConstantInt::getTrue(SelType));
+ // select a, b, a -> select a, b, false
if (CondVal == FalseVal)
- return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ return replaceOperand(SI, 2, ConstantInt::getFalse(SelType));
- // select a, ~a, b -> (~a) & b
- // select a, b, ~a -> (~a) | b
+ // select a, !a, b -> select !a, b, false
if (match(TrueVal, m_Not(m_Specific(CondVal))))
- return BinaryOperator::CreateAnd(TrueVal, FalseVal);
+ return SelectInst::Create(TrueVal, FalseVal,
+ ConstantInt::getFalse(SelType));
+ // select a, b, !a -> select !a, true, b
if (match(FalseVal, m_Not(m_Specific(CondVal))))
- return BinaryOperator::CreateOr(TrueVal, FalseVal);
+ return SelectInst::Create(FalseVal, ConstantInt::getTrue(SelType),
+ TrueVal);
}
// Selecting between two integer or vector splat integer constants?
@@ -2606,7 +2639,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0>
// because that may need 3 instructions to splat the condition value:
// extend, insertelement, shufflevector.
- if (SelType->isIntOrIntVectorTy() &&
+ //
+ // Do not handle i1 TrueVal and FalseVal otherwise would result in
+ // zext/sext i1 to i1.
+ if (SelType->isIntOrIntVectorTy() && !SelType->isIntOrIntVectorTy(1) &&
CondVal->getType()->isVectorTy() == SelType->isVectorTy()) {
// select C, 1, 0 -> zext C to int
if (match(TrueVal, m_One()) && match(FalseVal, m_Zero()))
@@ -2853,8 +2889,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return replaceOperand(SI, 1, TrueSI->getTrueValue());
}
// select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
- // We choose this as normal form to enable folding on the And and shortening
- // paths for the values (this helps GetUnderlyingObjects() for example).
+ // We choose this as normal form to enable folding on the And and
+ // shortening paths for the values (this helps getUnderlyingObjects() for
+ // example).
if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition());
replaceOperand(SI, 0, And);
@@ -2937,7 +2974,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
Value *NotCond;
- if (match(CondVal, m_Not(m_Value(NotCond)))) {
+ if (match(CondVal, m_Not(m_Value(NotCond))) &&
+ !InstCombiner::shouldAvoidAbsorbingNotIntoSelect(SI)) {
replaceOperand(SI, 0, NotCond);
SI.swapValues();
SI.swapProfMetadata();
@@ -2971,8 +3009,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI, *this))
return Select;
- if (Instruction *Rot = foldSelectRotate(SI))
- return Rot;
+ if (Instruction *Funnel = foldSelectFunnelShift(SI, Builder))
+ return Funnel;
if (Instruction *Copysign = foldSelectToCopysign(SI, Builder))
return Copysign;
@@ -2980,5 +3018,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *PN = foldSelectToPhi(SI, DT, Builder))
return replaceInstUsesWith(SI, PN);
+ if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder))
+ return replaceInstUsesWith(SI, Fr);
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 0a842b4e1047..127bf8080959 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -15,11 +15,36 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
+bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1,
+ Value *ShAmt1) {
+ // We have two shift amounts from two different shifts. The types of those
+ // shift amounts may not match. If that's the case let's bailout now..
+ if (ShAmt0->getType() != ShAmt1->getType())
+ return false;
+
+ // As input, we have the following pattern:
+ // Sh0 (Sh1 X, Q), K
+ // We want to rewrite that as:
+ // Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
+ // While we know that originally (Q+K) would not overflow
+ // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
+ // shift amounts. so it may now overflow in smaller bitwidth.
+ // To ensure that does not happen, we need to ensure that the total maximal
+ // shift amount is still representable in that smaller bit width.
+ unsigned MaximalPossibleTotalShiftAmount =
+ (Sh0->getType()->getScalarSizeInBits() - 1) +
+ (Sh1->getType()->getScalarSizeInBits() - 1);
+ APInt MaximalRepresentableShiftAmount =
+ APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
+ return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount);
+}
+
// Given pattern:
// (x shiftopcode Q) shiftopcode K
// we should rewrite it as
@@ -31,7 +56,7 @@ using namespace PatternMatch;
//
// AnalyzeForSignBitExtraction indicates that we will only analyze whether this
// pattern has any 2 right-shifts that sum to 1 less than original bit width.
-Value *InstCombiner::reassociateShiftAmtsOfTwoSameDirectionShifts(
+Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
BinaryOperator *Sh0, const SimplifyQuery &SQ,
bool AnalyzeForSignBitExtraction) {
// Look for a shift of some instruction, ignore zext of shift amount if any.
@@ -56,26 +81,8 @@ Value *InstCombiner::reassociateShiftAmtsOfTwoSameDirectionShifts(
if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1)))))
return nullptr;
- // We have two shift amounts from two different shifts. The types of those
- // shift amounts may not match. If that's the case let's bailout now..
- if (ShAmt0->getType() != ShAmt1->getType())
- return nullptr;
-
- // As input, we have the following pattern:
- // Sh0 (Sh1 X, Q), K
- // We want to rewrite that as:
- // Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
- // While we know that originally (Q+K) would not overflow
- // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
- // shift amounts. so it may now overflow in smaller bitwidth.
- // To ensure that does not happen, we need to ensure that the total maximal
- // shift amount is still representable in that smaller bit width.
- unsigned MaximalPossibleTotalShiftAmount =
- (Sh0->getType()->getScalarSizeInBits() - 1) +
- (Sh1->getType()->getScalarSizeInBits() - 1);
- APInt MaximalRepresentableShiftAmount =
- APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
- if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
+ // Verify that it would be safe to try to add those two shift amounts.
+ if (!canTryToConstantAddTwoShiftAmounts(Sh0, ShAmt0, Sh1, ShAmt1))
return nullptr;
// We are only looking for signbit extraction if we have two right shifts.
@@ -219,9 +226,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// Peek through an optional zext of the shift amount.
match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
- // We have two shift amounts from two different shifts. The types of those
- // shift amounts may not match. If that's the case let's bailout now.
- if (MaskShAmt->getType() != ShiftShAmt->getType())
+ // Verify that it would be safe to try to add those two shift amounts.
+ if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked,
+ MaskShAmt))
return nullptr;
// Can we simplify (MaskShAmt+ShiftShAmt) ?
@@ -251,9 +258,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// Peek through an optional zext of the shift amount.
match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
- // We have two shift amounts from two different shifts. The types of those
- // shift amounts may not match. If that's the case let's bailout now.
- if (MaskShAmt->getType() != ShiftShAmt->getType())
+ // Verify that it would be safe to try to add those two shift amounts.
+ if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked,
+ MaskShAmt))
return nullptr;
// Can we simplify (ShiftShAmt-MaskShAmt) ?
@@ -327,8 +334,8 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
if (!LogicInst || !LogicInst->isBitwiseLogicOp() || !LogicInst->hasOneUse())
return nullptr;
- const APInt *C0, *C1;
- if (!match(I.getOperand(1), m_APInt(C1)))
+ Constant *C0, *C1;
+ if (!match(I.getOperand(1), m_Constant(C1)))
return nullptr;
Instruction::BinaryOps ShiftOpcode = I.getOpcode();
@@ -339,10 +346,12 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
// TODO: Remove the one-use check if the other logic operand (Y) is constant.
Value *X, *Y;
auto matchFirstShift = [&](Value *V) {
- return !isa<ConstantExpr>(V) &&
- match(V, m_OneUse(m_Shift(m_Value(X), m_APInt(C0)))) &&
- cast<BinaryOperator>(V)->getOpcode() == ShiftOpcode &&
- (*C0 + *C1).ult(Ty->getScalarSizeInBits());
+ BinaryOperator *BO;
+ APInt Threshold(Ty->getScalarSizeInBits(), Ty->getScalarSizeInBits());
+ return match(V, m_BinOp(BO)) && BO->getOpcode() == ShiftOpcode &&
+ match(V, m_OneUse(m_Shift(m_Value(X), m_Constant(C0)))) &&
+ match(ConstantExpr::getAdd(C0, C1),
+ m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
};
// Logic ops are commutative, so check each operand for a match.
@@ -354,13 +363,13 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
return nullptr;
// shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
- Constant *ShiftSumC = ConstantInt::get(Ty, *C0 + *C1);
+ Constant *ShiftSumC = ConstantExpr::getAdd(C0, C1);
Value *NewShift1 = Builder.CreateBinOp(ShiftOpcode, X, ShiftSumC);
Value *NewShift2 = Builder.CreateBinOp(ShiftOpcode, Y, I.getOperand(1));
return BinaryOperator::Create(LogicInst->getOpcode(), NewShift1, NewShift2);
}
-Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
+Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
assert(Op0->getType() == Op1->getType());
@@ -399,15 +408,15 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
return BinaryOperator::Create(
I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A);
- // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
+ // X shift (A srem C) -> X shift (A and (C - 1)) iff C is a power of 2.
// Because shifts by negative values (which could occur if A were negative)
// are undefined.
- const APInt *B;
- if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
+ if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Constant(C))) &&
+ match(C, m_Power2())) {
// FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
// demand the sign bit (and many others) here??
- Value *Rem = Builder.CreateAnd(A, ConstantInt::get(I.getType(), *B - 1),
- Op1->getName());
+ Constant *Mask = ConstantExpr::getSub(C, ConstantInt::get(I.getType(), 1));
+ Value *Rem = Builder.CreateAnd(A, Mask, Op1->getName());
return replaceOperand(I, 1, Rem);
}
@@ -420,8 +429,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
/// Return true if we can simplify two logical (either left or right) shifts
/// that have constant shift amounts: OuterShift (InnerShift X, C1), C2.
static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl,
- Instruction *InnerShift, InstCombiner &IC,
- Instruction *CxtI) {
+ Instruction *InnerShift,
+ InstCombinerImpl &IC, Instruction *CxtI) {
assert(InnerShift->isLogicalShift() && "Unexpected instruction type");
// We need constant scalar or constant splat shifts.
@@ -472,7 +481,7 @@ static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl,
/// where the client will ask if E can be computed shifted right by 64-bits. If
/// this succeeds, getShiftedValue() will be called to produce the value.
static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
- InstCombiner &IC, Instruction *CxtI) {
+ InstCombinerImpl &IC, Instruction *CxtI) {
// We can always evaluate constants shifted.
if (isa<Constant>(V))
return true;
@@ -480,31 +489,6 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
- // If this is the opposite shift, we can directly reuse the input of the shift
- // if the needed bits are already zero in the input. This allows us to reuse
- // the value which means that we don't care if the shift has multiple uses.
- // TODO: Handle opposite shift by exact value.
- ConstantInt *CI = nullptr;
- if ((IsLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
- (!IsLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
- if (CI->getValue() == NumBits) {
- // TODO: Check that the input bits are already zero with MaskedValueIsZero
-#if 0
- // If this is a truncate of a logical shr, we can truncate it to a smaller
- // lshr iff we know that the bits we would otherwise be shifting in are
- // already zeros.
- uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
- uint32_t BitWidth = Ty->getScalarSizeInBits();
- if (MaskedValueIsZero(I->getOperand(0),
- APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
- CI->getLimitedValue(BitWidth) < BitWidth) {
- return CanEvaluateTruncated(I->getOperand(0), Ty);
- }
-#endif
-
- }
- }
-
// We can't mutate something that has multiple uses: doing so would
// require duplicating the instruction in general, which isn't profitable.
if (!I->hasOneUse()) return false;
@@ -608,7 +592,7 @@ static Value *foldShiftedShift(BinaryOperator *InnerShift, unsigned OuterShAmt,
/// When canEvaluateShifted() returns true for an expression, this function
/// inserts the new computation that produces the shifted value.
static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
- InstCombiner &IC, const DataLayout &DL) {
+ InstCombinerImpl &IC, const DataLayout &DL) {
// We can always evaluate constants shifted.
if (Constant *C = dyn_cast<Constant>(V)) {
if (isLeftShift)
@@ -618,7 +602,7 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
}
Instruction *I = cast<Instruction>(V);
- IC.Worklist.push(I);
+ IC.addToWorklist(I);
switch (I->getOpcode()) {
default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
@@ -666,14 +650,17 @@ static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift,
case Instruction::Add:
return Shift.getOpcode() == Instruction::Shl;
case Instruction::Or:
- case Instruction::Xor:
case Instruction::And:
return true;
+ case Instruction::Xor:
+ // Do not change a 'not' of logical shift because that would create a normal
+ // 'xor'. The 'not' is likely better for analysis, SCEV, and codegen.
+ return !(Shift.isLogicalShift() && match(BO, m_Not(m_Value())));
}
}
-Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
- BinaryOperator &I) {
+Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
+ BinaryOperator &I) {
bool isLeftShift = I.getOpcode() == Instruction::Shl;
const APInt *Op1C;
@@ -695,8 +682,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- unsigned TypeBits = Op0->getType()->getScalarSizeInBits();
-
+ Type *Ty = I.getType();
+ unsigned TypeBits = Ty->getScalarSizeInBits();
assert(!Op1C->uge(TypeBits) &&
"Shift over the type width should have been removed already");
@@ -704,18 +691,20 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
return FoldedShift;
// Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
- if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
- Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+ if (auto *TI = dyn_cast<TruncInst>(Op0)) {
// If 'shift2' is an ashr, we would have to get the sign bit into a funny
// place. Don't try to do this transformation in this case. Also, we
// require that the input operand is a shift-by-constant so that we have
// confidence that the shifts will get folded together. We could do this
// xform in more cases, but it is unlikely to be profitable.
- if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
- isa<ConstantInt>(TrOp->getOperand(1))) {
+ const APInt *TrShiftAmt;
+ if (I.isLogicalShift() &&
+ match(TI->getOperand(0), m_Shift(m_Value(), m_APInt(TrShiftAmt)))) {
+ auto *TrOp = cast<Instruction>(TI->getOperand(0));
+ Type *SrcTy = TrOp->getType();
+
// Okay, we'll do this xform. Make the shift of shift.
- Constant *ShAmt =
- ConstantExpr::getZExt(cast<Constant>(Op1), TrOp->getType());
+ Constant *ShAmt = ConstantExpr::getZExt(Op1, SrcTy);
// (shift2 (shift1 & 0x00FF), c2)
Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName());
@@ -723,36 +712,27 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
// part of the register be zeros. Emulate this by inserting an AND to
// clear the top bits as needed. This 'and' will usually be zapped by
// other xforms later if dead.
- unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
- unsigned DstSize = TI->getType()->getScalarSizeInBits();
- APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+ unsigned SrcSize = SrcTy->getScalarSizeInBits();
+ Constant *MaskV =
+ ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, TypeBits));
// The mask we constructed says what the trunc would do if occurring
// between the shifts. We want to know the effect *after* the second
// shift. We know that it is a logical shift by a constant, so adjust the
// mask as appropriate.
- if (I.getOpcode() == Instruction::Shl)
- MaskV <<= Op1C->getZExtValue();
- else {
- assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
- MaskV.lshrInPlace(Op1C->getZExtValue());
- }
-
+ MaskV = ConstantExpr::get(I.getOpcode(), MaskV, ShAmt);
// shift1 & 0x00FF
- Value *And = Builder.CreateAnd(NSh,
- ConstantInt::get(I.getContext(), MaskV),
- TI->getName());
-
+ Value *And = Builder.CreateAnd(NSh, MaskV, TI->getName());
// Return the value truncated to the interesting size.
- return new TruncInst(And, I.getType());
+ return new TruncInst(And, Ty);
}
}
if (Op0->hasOneUse()) {
if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
// Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
- Value *V1, *V2;
- ConstantInt *CC;
+ Value *V1;
+ const APInt *CC;
switch (Op0BO->getOpcode()) {
default: break;
case Instruction::Add:
@@ -770,25 +750,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1,
Op0BO->getOperand(1)->getName());
unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
-
APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
- Constant *Mask = ConstantInt::get(I.getContext(), Bits);
- if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
- Mask = ConstantVector::getSplat(VT->getElementCount(), Mask);
+ Constant *Mask = ConstantInt::get(Ty, Bits);
return BinaryOperator::CreateAnd(X, Mask);
}
// Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
Value *Op0BOOp1 = Op0BO->getOperand(1);
if (isLeftShift && Op0BOOp1->hasOneUse() &&
- match(Op0BOOp1,
- m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
- m_ConstantInt(CC)))) {
- Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+ match(Op0BOOp1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
+ m_APInt(CC)))) {
+ Value *YS = // (Y << C)
+ Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
// X & (CC << C)
- Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
- V1->getName()+".mask");
+ Value *XM = Builder.CreateAnd(
+ V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1),
+ V1->getName() + ".mask");
return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
}
LLVM_FALLTHROUGH;
@@ -805,25 +782,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS,
Op0BO->getOperand(0)->getName());
unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
-
APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
- Constant *Mask = ConstantInt::get(I.getContext(), Bits);
- if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
- Mask = ConstantVector::getSplat(VT->getElementCount(), Mask);
+ Constant *Mask = ConstantInt::get(Ty, Bits);
return BinaryOperator::CreateAnd(X, Mask);
}
// Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
match(Op0BO->getOperand(0),
- m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
- m_ConstantInt(CC))) && V2 == Op1) {
+ m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
+ m_APInt(CC)))) {
Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
// X & (CC << C)
- Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
- V1->getName()+".mask");
-
+ Value *XM = Builder.CreateAnd(
+ V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1),
+ V1->getName() + ".mask");
return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
}
@@ -831,7 +805,6 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
}
}
-
// If the operand is a bitwise operator with a constant RHS, and the
// shift is the only use, we can pull it out of the shift.
const APInt *Op0C;
@@ -915,7 +888,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
return nullptr;
}
-Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
const SimplifyQuery Q = SQ.getWithInstruction(&I);
if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
@@ -955,10 +928,9 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
}
- // FIXME: we do not yet transform non-exact shr's. The backend (DAGCombine)
- // needs a few fixes for the rotate pattern recognition first.
const APInt *ShOp1;
- if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1))))) {
+ if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1)))) &&
+ ShOp1->ult(BitWidth)) {
unsigned ShrAmt = ShOp1->getZExtValue();
if (ShrAmt < ShAmt) {
// If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1)
@@ -978,7 +950,33 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
}
}
- if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1)))) {
+ if (match(Op0, m_OneUse(m_Shr(m_Value(X), m_APInt(ShOp1)))) &&
+ ShOp1->ult(BitWidth)) {
+ unsigned ShrAmt = ShOp1->getZExtValue();
+ if (ShrAmt < ShAmt) {
+ // If C1 < C2: (X >>? C1) << C2 --> X << (C2 - C1) & (-1 << C2)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShrAmt);
+ auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
+ NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
+ Builder.Insert(NewShl);
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
+ return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
+ }
+ if (ShrAmt > ShAmt) {
+ // If C1 > C2: (X >>? C1) << C2 --> X >>? (C1 - C2) & (-1 << C2)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmt);
+ auto *OldShr = cast<BinaryOperator>(Op0);
+ auto *NewShr =
+ BinaryOperator::Create(OldShr->getOpcode(), X, ShiftDiff);
+ NewShr->setIsExact(OldShr->isExact());
+ Builder.Insert(NewShr);
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
+ return BinaryOperator::CreateAnd(NewShr, ConstantInt::get(Ty, Mask));
+ }
+ }
+
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)) {
unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
// Oversized shifts are simplified to zero in InstSimplify.
if (AmtSum < BitWidth)
@@ -1037,7 +1035,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
return nullptr;
}
-Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1139,6 +1137,12 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
}
}
+ // lshr i32 (X -nsw Y), 31 --> zext (X < Y)
+ Value *Y;
+ if (ShAmt == BitWidth - 1 &&
+ match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)))))
+ return new ZExtInst(Builder.CreateICmpSLT(X, Y), Ty);
+
if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) {
unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
// Oversized shifts are simplified to zero in InstSimplify.
@@ -1167,7 +1171,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
}
Instruction *
-InstCombiner::foldVariableSignZeroExtensionOfVariableHighBitExtract(
+InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract(
BinaryOperator &OldAShr) {
assert(OldAShr.getOpcode() == Instruction::AShr &&
"Must be called with arithmetic right-shift instruction only.");
@@ -1235,7 +1239,7 @@ InstCombiner::foldVariableSignZeroExtensionOfVariableHighBitExtract(
return TruncInst::CreateTruncOrBitCast(NewAShr, OldAShr.getType());
}
-Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1301,6 +1305,12 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
return new SExtInst(NewSh, Ty);
}
+ // ashr i32 (X -nsw Y), 31 --> sext (X < Y)
+ Value *Y;
+ if (ShAmt == BitWidth - 1 &&
+ match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)))))
+ return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty);
+
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index c7f2f4ec3ca1..16efe863779a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -12,29 +12,18 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
-#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "instcombine"
-namespace {
-
-struct AMDGPUImageDMaskIntrinsic {
- unsigned Intr;
-};
-
-#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
-#include "InstCombineTables.inc"
-
-} // end anonymous namespace
-
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
/// constant that are not demanded. If so, shrink the constant and return true.
@@ -63,7 +52,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
/// the instruction has any properties that allow us to simplify its operands.
-bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
KnownBits Known(BitWidth);
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
@@ -79,22 +68,20 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
/// This form of SimplifyDemandedBits simplifies the specified instruction
/// operand if possible, updating it in place. It returns true if it made any
/// change and false otherwise.
-bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
- const APInt &DemandedMask,
- KnownBits &Known,
- unsigned Depth) {
+bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
+ const APInt &DemandedMask,
+ KnownBits &Known, unsigned Depth) {
Use &U = I->getOperandUse(OpNo);
Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known,
Depth, I);
if (!NewVal) return false;
if (Instruction* OpInst = dyn_cast<Instruction>(U))
salvageDebugInfo(*OpInst);
-
+
replaceUse(U, NewVal);
return true;
}
-
/// This function attempts to replace V with a simpler value based on the
/// demanded bits. When this function is called, it is known that only the bits
/// set in DemandedMask of the result of V are ever used downstream.
@@ -118,11 +105,12 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
/// operands based on the information about what bits are demanded. This returns
/// some other non-null value if it found out that V is equal to another value
/// in the context where the specified bits are demanded, but not for all users.
-Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
- KnownBits &Known, unsigned Depth,
- Instruction *CxtI) {
+Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ KnownBits &Known,
+ unsigned Depth,
+ Instruction *CxtI) {
assert(V != nullptr && "Null pointer of Value???");
- assert(Depth <= 6 && "Limit Search Depth");
+ assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = V->getType();
assert(
@@ -139,7 +127,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (DemandedMask.isNullValue()) // Not demanding any bits from V.
return UndefValue::get(VTy);
- if (Depth == 6) // Limit search depth.
+ if (Depth == MaxAnalysisRecursionDepth)
+ return nullptr;
+
+ if (isa<ScalableVectorType>(VTy))
return nullptr;
Instruction *I = dyn_cast<Instruction>(V);
@@ -268,35 +259,44 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return InsertNewInstWith(And, *I);
}
- // If the RHS is a constant, see if we can simplify it.
- // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
- if (ShrinkDemandedConstant(I, 1, DemandedMask))
- return I;
+ // If the RHS is a constant, see if we can change it. Don't alter a -1
+ // constant because that's a canonical 'not' op, and that is better for
+ // combining, SCEV, and codegen.
+ const APInt *C;
+ if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnesValue()) {
+ if ((*C | ~DemandedMask).isAllOnesValue()) {
+ // Force bits to 1 to create a 'not' op.
+ I->setOperand(1, ConstantInt::getAllOnesValue(VTy));
+ return I;
+ }
+ // If we can't turn this into a 'not', try to shrink the constant.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+ }
// If our LHS is an 'and' and if it has one use, and if any of the bits we
// are flipping are known to be set, then the xor is just resetting those
// bits to zero. We can just knock out bits from the 'and' and the 'xor',
// simplifying both of them.
- if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+ if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) {
+ ConstantInt *AndRHS, *XorRHS;
if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
- isa<ConstantInt>(I->getOperand(1)) &&
- isa<ConstantInt>(LHSInst->getOperand(1)) &&
+ match(I->getOperand(1), m_ConstantInt(XorRHS)) &&
+ match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) &&
(LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
- ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
- ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
Constant *AndC =
- ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+ ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
InsertNewInstWith(NewAnd, *I);
Constant *XorC =
- ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+ ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
return InsertNewInstWith(NewXor, *I);
}
-
+ }
break;
}
case Instruction::Select: {
@@ -339,16 +339,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// we can. This helps not break apart (or helps put back together)
// canonical patterns like min and max.
auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo,
- APInt DemandedMask) {
+ const APInt &DemandedMask) {
const APInt *SelC;
if (!match(I->getOperand(OpNo), m_APInt(SelC)))
return false;
// Get the constant out of the ICmp, if there is one.
+ // Only try this when exactly 1 operand is a constant (if both operands
+ // are constant, the icmp should eventually simplify). Otherwise, we may
+ // invert the transform that reduces set bits and infinite-loop.
+ Value *X;
const APInt *CmpC;
ICmpInst::Predicate Pred;
- if (!match(I->getOperand(0), m_c_ICmp(Pred, m_APInt(CmpC), m_Value())) ||
- CmpC->getBitWidth() != SelC->getBitWidth())
+ if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) ||
+ isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
return ShrinkDemandedConstant(I, OpNo, DemandedMask);
// If the constant is already the same as the ICmp, leave it as-is.
@@ -367,8 +371,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I;
// Only known if known in both the LHS and RHS.
- Known.One = RHSKnown.One & LHSKnown.One;
- Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
+ Known = KnownBits::commonBits(LHSKnown, RHSKnown);
break;
}
case Instruction::ZExt:
@@ -391,7 +394,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
if (VectorType *SrcVTy =
dyn_cast<VectorType>(I->getOperand(0)->getType())) {
- if (DstVTy->getNumElements() != SrcVTy->getNumElements())
+ if (cast<FixedVectorType>(DstVTy)->getNumElements() !=
+ cast<FixedVectorType>(SrcVTy)->getNumElements())
// Don't touch a bitcast between vectors of different element counts.
return nullptr;
} else
@@ -669,8 +673,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
break;
}
- case Instruction::SRem:
- if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ case Instruction::SRem: {
+ ConstantInt *Rem;
+ if (match(I->getOperand(1), m_ConstantInt(Rem))) {
// X % -1 demands all the bits because we don't want to introduce
// INT_MIN % -1 (== undef) by accident.
if (Rem->isMinusOne())
@@ -713,6 +718,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known.makeNonNegative();
}
break;
+ }
case Instruction::URem: {
KnownBits Known2(BitWidth);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
@@ -728,7 +734,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
bool KnownBitsComputed = false;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
- default: break;
case Intrinsic::bswap: {
// If the only bits demanded come from one byte of the bswap result,
// just shift the input byte into position to eliminate the bswap.
@@ -784,39 +789,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownBitsComputed = true;
break;
}
- case Intrinsic::x86_mmx_pmovmskb:
- case Intrinsic::x86_sse_movmsk_ps:
- case Intrinsic::x86_sse2_movmsk_pd:
- case Intrinsic::x86_sse2_pmovmskb_128:
- case Intrinsic::x86_avx_movmsk_ps_256:
- case Intrinsic::x86_avx_movmsk_pd_256:
- case Intrinsic::x86_avx2_pmovmskb: {
- // MOVMSK copies the vector elements' sign bits to the low bits
- // and zeros the high bits.
- unsigned ArgWidth;
- if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
- ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
- } else {
- auto Arg = II->getArgOperand(0);
- auto ArgType = cast<VectorType>(Arg->getType());
- ArgWidth = ArgType->getNumElements();
- }
-
- // If we don't need any of low bits then return zero,
- // we know that DemandedMask is non-zero already.
- APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
- if (DemandedElts.isNullValue())
- return ConstantInt::getNullValue(VTy);
-
- // We know that the upper bits are set to zero.
- Known.Zero.setBitsFrom(ArgWidth);
- KnownBitsComputed = true;
+ default: {
+ // Handle target specific intrinsics
+ Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
+ *II, DemandedMask, Known, KnownBitsComputed);
+ if (V.hasValue())
+ return V.getValue();
break;
}
- case Intrinsic::x86_sse42_crc32_64_64:
- Known.Zero.setBitsFrom(32);
- KnownBitsComputed = true;
- break;
}
}
@@ -836,11 +816,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
/// Helper routine of SimplifyDemandedUseBits. It computes Known
/// bits. It also tries to handle simplifications that can be done based on
/// DemandedMask, but without modifying the Instruction.
-Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
- const APInt &DemandedMask,
- KnownBits &Known,
- unsigned Depth,
- Instruction *CxtI) {
+Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
+ Instruction *I, const APInt &DemandedMask, KnownBits &Known, unsigned Depth,
+ Instruction *CxtI) {
unsigned BitWidth = DemandedMask.getBitWidth();
Type *ITy = I->getType();
@@ -925,6 +903,33 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
break;
}
+ case Instruction::AShr: {
+ // Compute the Known bits to simplify things downstream.
+ computeKnownBits(I, Known, Depth, CxtI);
+
+ // If this user is only demanding bits that we know, return the known
+ // constant.
+ if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
+ return Constant::getIntegerValue(ITy, Known.One);
+
+ // If the right shift operand 0 is a result of a left shift by the same
+ // amount, this is probably a zero/sign extension, which may be unnecessary,
+ // if we do not demand any of the new sign bits. So, return the original
+ // operand instead.
+ const APInt *ShiftRC;
+ const APInt *ShiftLC;
+ Value *X;
+ unsigned BitWidth = DemandedMask.getBitWidth();
+ if (match(I,
+ m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) &&
+ ShiftLC == ShiftRC &&
+ DemandedMask.isSubsetOf(APInt::getLowBitsSet(
+ BitWidth, BitWidth - ShiftRC->getZExtValue()))) {
+ return X;
+ }
+
+ break;
+ }
default:
// Compute the Known bits to simplify things downstream.
computeKnownBits(I, Known, Depth, CxtI);
@@ -940,7 +945,6 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
return nullptr;
}
-
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
@@ -958,11 +962,9 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
///
/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
/// not successful.
-Value *
-InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
- Instruction *Shl, const APInt &ShlOp1,
- const APInt &DemandedMask,
- KnownBits &Known) {
+Value *InstCombinerImpl::simplifyShrShlDemandedBits(
+ Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
+ const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) {
if (!ShlOp1 || !ShrOp1)
return nullptr; // No-op.
@@ -1022,155 +1024,9 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
return nullptr;
}
-/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
-///
-/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
-/// struct returns.
-Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
- APInt DemandedElts,
- int DMaskIdx) {
-
- auto *IIVTy = cast<VectorType>(II->getType());
- unsigned VWidth = IIVTy->getNumElements();
- if (VWidth == 1)
- return nullptr;
-
- IRBuilderBase::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(II);
-
- // Assume the arguments are unchanged and later override them, if needed.
- SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end());
-
- if (DMaskIdx < 0) {
- // Buffer case.
-
- const unsigned ActiveBits = DemandedElts.getActiveBits();
- const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
-
- // Start assuming the prefix of elements is demanded, but possibly clear
- // some other bits if there are trailing zeros (unused components at front)
- // and update offset.
- DemandedElts = (1 << ActiveBits) - 1;
-
- if (UnusedComponentsAtFront > 0) {
- static const unsigned InvalidOffsetIdx = 0xf;
-
- unsigned OffsetIdx;
- switch (II->getIntrinsicID()) {
- case Intrinsic::amdgcn_raw_buffer_load:
- OffsetIdx = 1;
- break;
- case Intrinsic::amdgcn_s_buffer_load:
- // If resulting type is vec3, there is no point in trimming the
- // load with updated offset, as the vec3 would most likely be widened to
- // vec4 anyway during lowering.
- if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
- OffsetIdx = InvalidOffsetIdx;
- else
- OffsetIdx = 1;
- break;
- case Intrinsic::amdgcn_struct_buffer_load:
- OffsetIdx = 2;
- break;
- default:
- // TODO: handle tbuffer* intrinsics.
- OffsetIdx = InvalidOffsetIdx;
- break;
- }
-
- if (OffsetIdx != InvalidOffsetIdx) {
- // Clear demanded bits and update the offset.
- DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
- auto *Offset = II->getArgOperand(OffsetIdx);
- unsigned SingleComponentSizeInBits =
- getDataLayout().getTypeSizeInBits(II->getType()->getScalarType());
- unsigned OffsetAdd =
- UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
- auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
- Args[OffsetIdx] = Builder.CreateAdd(Offset, OffsetAddVal);
- }
- }
- } else {
- // Image case.
-
- ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx));
- unsigned DMaskVal = DMask->getZExtValue() & 0xf;
-
- // Mask off values that are undefined because the dmask doesn't cover them
- DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
-
- unsigned NewDMaskVal = 0;
- unsigned OrigLoadIdx = 0;
- for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
- const unsigned Bit = 1 << SrcIdx;
- if (!!(DMaskVal & Bit)) {
- if (!!DemandedElts[OrigLoadIdx])
- NewDMaskVal |= Bit;
- OrigLoadIdx++;
- }
- }
-
- if (DMaskVal != NewDMaskVal)
- Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
- }
-
- unsigned NewNumElts = DemandedElts.countPopulation();
- if (!NewNumElts)
- return UndefValue::get(II->getType());
-
- // FIXME: Allow v3i16/v3f16 in buffer and image intrinsics when the types are
- // fully supported.
- if (II->getType()->getScalarSizeInBits() == 16 && NewNumElts == 3)
- return nullptr;
-
- if (NewNumElts >= VWidth && DemandedElts.isMask()) {
- if (DMaskIdx >= 0)
- II->setArgOperand(DMaskIdx, Args[DMaskIdx]);
- return nullptr;
- }
-
- // Validate function argument and return types, extracting overloaded types
- // along the way.
- SmallVector<Type *, 6> OverloadTys;
- if (!Intrinsic::getIntrinsicSignature(II->getCalledFunction(), OverloadTys))
- return nullptr;
-
- Module *M = II->getParent()->getParent()->getParent();
- Type *EltTy = IIVTy->getElementType();
- Type *NewTy =
- (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
-
- OverloadTys[0] = NewTy;
- Function *NewIntrin =
- Intrinsic::getDeclaration(M, II->getIntrinsicID(), OverloadTys);
-
- CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
- NewCall->takeName(II);
- NewCall->copyMetadata(*II);
-
- if (NewNumElts == 1) {
- return Builder.CreateInsertElement(UndefValue::get(II->getType()), NewCall,
- DemandedElts.countTrailingZeros());
- }
-
- SmallVector<int, 8> EltMask;
- unsigned NewLoadIdx = 0;
- for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
- if (!!DemandedElts[OrigLoadIdx])
- EltMask.push_back(NewLoadIdx++);
- else
- EltMask.push_back(NewNumElts);
- }
-
- Value *Shuffle =
- Builder.CreateShuffleVector(NewCall, UndefValue::get(NewTy), EltMask);
-
- return Shuffle;
-}
-
/// The specified value produces a vector with any number of elements.
-/// This method analyzes which elements of the operand are undef and returns
-/// that information in UndefElts.
+/// This method analyzes which elements of the operand are undef or poison and
+/// returns that information in UndefElts.
///
/// DemandedElts contains the set of elements that are actually used by the
/// caller, and by default (AllowMultipleUsers equals false) the value is
@@ -1181,10 +1037,11 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
/// If the information about demanded elements can be used to simplify the
/// operation, the operation is simplified, then the resultant value is
/// returned. This returns null if no change was made.
-Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
- APInt &UndefElts,
- unsigned Depth,
- bool AllowMultipleUsers) {
+Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
+ APInt DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth,
+ bool AllowMultipleUsers) {
// Cannot analyze scalable type. The number of vector elements is not a
// compile-time constant.
if (isa<ScalableVectorType>(V->getType()))
@@ -1195,14 +1052,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
if (isa<UndefValue>(V)) {
- // If the entire vector is undefined, just return this info.
+ // If the entire vector is undef or poison, just return this info.
UndefElts = EltMask;
return nullptr;
}
- if (DemandedElts.isNullValue()) { // If nothing is demanded, provide undef.
+ if (DemandedElts.isNullValue()) { // If nothing is demanded, provide poison.
UndefElts = EltMask;
- return UndefValue::get(V->getType());
+ return PoisonValue::get(V->getType());
}
UndefElts = 0;
@@ -1214,11 +1071,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
return nullptr;
Type *EltTy = cast<VectorType>(V->getType())->getElementType();
- Constant *Undef = UndefValue::get(EltTy);
+ Constant *Poison = PoisonValue::get(EltTy);
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0; i != VWidth; ++i) {
- if (!DemandedElts[i]) { // If not demanded, set to undef.
- Elts.push_back(Undef);
+ if (!DemandedElts[i]) { // If not demanded, set to poison.
+ Elts.push_back(Poison);
UndefElts.setBit(i);
continue;
}
@@ -1226,12 +1083,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Constant *Elt = C->getAggregateElement(i);
if (!Elt) return nullptr;
- if (isa<UndefValue>(Elt)) { // Already undef.
- Elts.push_back(Undef);
+ Elts.push_back(Elt);
+ if (isa<UndefValue>(Elt)) // Already undef or poison.
UndefElts.setBit(i);
- } else { // Otherwise, defined.
- Elts.push_back(Elt);
- }
}
// If we changed the constant, return it.
@@ -1291,12 +1145,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
};
if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
break;
-
+
// Conservatively track the demanded elements back through any vector
// operands we may have. We know there must be at least one, or we
// wouldn't have a vector result to get here. Note that we intentionally
// merge the undef bits here since gepping with either an undef base or
- // index results in undef.
+ // index results in undef.
for (unsigned i = 0; i < I->getNumOperands(); i++) {
if (isa<UndefValue>(I->getOperand(i))) {
// If the entire vector is undefined, just return this info.
@@ -1330,6 +1184,19 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (IdxNo < VWidth)
PreInsertDemandedElts.clearBit(IdxNo);
+ // If we only demand the element that is being inserted and that element
+ // was extracted from the same index in another vector with the same type,
+ // replace this insert with that other vector.
+ // Note: This is attempted before the call to simplifyAndSetOp because that
+ // may change UndefElts to a value that does not match with Vec.
+ Value *Vec;
+ if (PreInsertDemandedElts == 0 &&
+ match(I->getOperand(1),
+ m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) &&
+ Vec->getType() == I->getType()) {
+ return Vec;
+ }
+
simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts);
// If this is inserting an element that isn't demanded, remove this
@@ -1348,8 +1215,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
assert(Shuffle->getOperand(0)->getType() ==
Shuffle->getOperand(1)->getType() &&
"Expected shuffle operands to have same type");
- unsigned OpWidth =
- cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+ unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType())
+ ->getNumElements();
// Handle trivial case of a splat. Only check the first element of LHS
// operand.
if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) &&
@@ -1450,7 +1317,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// this constant vector to single insertelement instruction.
// shufflevector V, C, <v1, v2, .., ci, .., vm> ->
// insertelement V, C[ci], ci-n
- if (OpWidth == Shuffle->getType()->getNumElements()) {
+ if (OpWidth ==
+ cast<FixedVectorType>(Shuffle->getType())->getNumElements()) {
Value *Op = nullptr;
Constant *Value = nullptr;
unsigned Idx = -1u;
@@ -1537,7 +1405,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Vector->vector casts only.
VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
if (!VTy) break;
- unsigned InVWidth = VTy->getNumElements();
+ unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements();
APInt InputDemandedElts(InVWidth, 0);
UndefElts2 = APInt(InVWidth, 0);
unsigned Ratio;
@@ -1620,227 +1488,19 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (II->getIntrinsicID() == Intrinsic::masked_gather)
simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
-
+
// Output elements are undefined if the element from both sources are.
// TODO: can strengthen via mask as well.
UndefElts = UndefElts2 & UndefElts3;
break;
}
- case Intrinsic::x86_xop_vfrcz_ss:
- case Intrinsic::x86_xop_vfrcz_sd:
- // The instructions for these intrinsics are speced to zero upper bits not
- // pass them through like other scalar intrinsics. So we shouldn't just
- // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
- // Instead we should return a zero vector.
- if (!DemandedElts[0]) {
- Worklist.push(II);
- return ConstantAggregateZero::get(II->getType());
- }
-
- // Only the lower element is used.
- DemandedElts = 1;
- simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
-
- // Only the lower element is undefined. The high elements are zero.
- UndefElts = UndefElts[0];
- break;
-
- // Unary scalar-as-vector operations that work column-wise.
- case Intrinsic::x86_sse_rcp_ss:
- case Intrinsic::x86_sse_rsqrt_ss:
- simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
-
- // If lowest element of a scalar op isn't used then use Arg0.
- if (!DemandedElts[0]) {
- Worklist.push(II);
- return II->getArgOperand(0);
- }
- // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
- // checks).
- break;
-
- // Binary scalar-as-vector operations that work column-wise. The high
- // elements come from operand 0. The low element is a function of both
- // operands.
- case Intrinsic::x86_sse_min_ss:
- case Intrinsic::x86_sse_max_ss:
- case Intrinsic::x86_sse_cmp_ss:
- case Intrinsic::x86_sse2_min_sd:
- case Intrinsic::x86_sse2_max_sd:
- case Intrinsic::x86_sse2_cmp_sd: {
- simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
-
- // If lowest element of a scalar op isn't used then use Arg0.
- if (!DemandedElts[0]) {
- Worklist.push(II);
- return II->getArgOperand(0);
- }
-
- // Only lower element is used for operand 1.
- DemandedElts = 1;
- simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
-
- // Lower element is undefined if both lower elements are undefined.
- // Consider things like undef&0. The result is known zero, not undef.
- if (!UndefElts2[0])
- UndefElts.clearBit(0);
-
- break;
- }
-
- // Binary scalar-as-vector operations that work column-wise. The high
- // elements come from operand 0 and the low element comes from operand 1.
- case Intrinsic::x86_sse41_round_ss:
- case Intrinsic::x86_sse41_round_sd: {
- // Don't use the low element of operand 0.
- APInt DemandedElts2 = DemandedElts;
- DemandedElts2.clearBit(0);
- simplifyAndSetOp(II, 0, DemandedElts2, UndefElts);
-
- // If lowest element of a scalar op isn't used then use Arg0.
- if (!DemandedElts[0]) {
- Worklist.push(II);
- return II->getArgOperand(0);
- }
-
- // Only lower element is used for operand 1.
- DemandedElts = 1;
- simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
-
- // Take the high undef elements from operand 0 and take the lower element
- // from operand 1.
- UndefElts.clearBit(0);
- UndefElts |= UndefElts2[0];
- break;
- }
-
- // Three input scalar-as-vector operations that work column-wise. The high
- // elements come from operand 0 and the low element is a function of all
- // three inputs.
- case Intrinsic::x86_avx512_mask_add_ss_round:
- case Intrinsic::x86_avx512_mask_div_ss_round:
- case Intrinsic::x86_avx512_mask_mul_ss_round:
- case Intrinsic::x86_avx512_mask_sub_ss_round:
- case Intrinsic::x86_avx512_mask_max_ss_round:
- case Intrinsic::x86_avx512_mask_min_ss_round:
- case Intrinsic::x86_avx512_mask_add_sd_round:
- case Intrinsic::x86_avx512_mask_div_sd_round:
- case Intrinsic::x86_avx512_mask_mul_sd_round:
- case Intrinsic::x86_avx512_mask_sub_sd_round:
- case Intrinsic::x86_avx512_mask_max_sd_round:
- case Intrinsic::x86_avx512_mask_min_sd_round:
- simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
-
- // If lowest element of a scalar op isn't used then use Arg0.
- if (!DemandedElts[0]) {
- Worklist.push(II);
- return II->getArgOperand(0);
- }
-
- // Only lower element is used for operand 1 and 2.
- DemandedElts = 1;
- simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
- simplifyAndSetOp(II, 2, DemandedElts, UndefElts3);
-
- // Lower element is undefined if all three lower elements are undefined.
- // Consider things like undef&0. The result is known zero, not undef.
- if (!UndefElts2[0] || !UndefElts3[0])
- UndefElts.clearBit(0);
-
- break;
-
- case Intrinsic::x86_sse2_packssdw_128:
- case Intrinsic::x86_sse2_packsswb_128:
- case Intrinsic::x86_sse2_packuswb_128:
- case Intrinsic::x86_sse41_packusdw:
- case Intrinsic::x86_avx2_packssdw:
- case Intrinsic::x86_avx2_packsswb:
- case Intrinsic::x86_avx2_packusdw:
- case Intrinsic::x86_avx2_packuswb:
- case Intrinsic::x86_avx512_packssdw_512:
- case Intrinsic::x86_avx512_packsswb_512:
- case Intrinsic::x86_avx512_packusdw_512:
- case Intrinsic::x86_avx512_packuswb_512: {
- auto *Ty0 = II->getArgOperand(0)->getType();
- unsigned InnerVWidth = cast<VectorType>(Ty0)->getNumElements();
- assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
-
- unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
- unsigned VWidthPerLane = VWidth / NumLanes;
- unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
-
- // Per lane, pack the elements of the first input and then the second.
- // e.g.
- // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
- // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
- for (int OpNum = 0; OpNum != 2; ++OpNum) {
- APInt OpDemandedElts(InnerVWidth, 0);
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- unsigned LaneIdx = Lane * VWidthPerLane;
- for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
- unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
- if (DemandedElts[Idx])
- OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
- }
- }
-
- // Demand elements from the operand.
- APInt OpUndefElts(InnerVWidth, 0);
- simplifyAndSetOp(II, OpNum, OpDemandedElts, OpUndefElts);
-
- // Pack the operand's UNDEF elements, one lane at a time.
- OpUndefElts = OpUndefElts.zext(VWidth);
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
- LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
- LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
- UndefElts |= LaneElts;
- }
- }
- break;
- }
-
- // PSHUFB
- case Intrinsic::x86_ssse3_pshuf_b_128:
- case Intrinsic::x86_avx2_pshuf_b:
- case Intrinsic::x86_avx512_pshuf_b_512:
- // PERMILVAR
- case Intrinsic::x86_avx_vpermilvar_ps:
- case Intrinsic::x86_avx_vpermilvar_ps_256:
- case Intrinsic::x86_avx512_vpermilvar_ps_512:
- case Intrinsic::x86_avx_vpermilvar_pd:
- case Intrinsic::x86_avx_vpermilvar_pd_256:
- case Intrinsic::x86_avx512_vpermilvar_pd_512:
- // PERMV
- case Intrinsic::x86_avx2_permd:
- case Intrinsic::x86_avx2_permps: {
- simplifyAndSetOp(II, 1, DemandedElts, UndefElts);
- break;
- }
-
- // SSE4A instructions leave the upper 64-bits of the 128-bit result
- // in an undefined state.
- case Intrinsic::x86_sse4a_extrq:
- case Intrinsic::x86_sse4a_extrqi:
- case Intrinsic::x86_sse4a_insertq:
- case Intrinsic::x86_sse4a_insertqi:
- UndefElts.setHighBits(VWidth / 2);
- break;
- case Intrinsic::amdgcn_buffer_load:
- case Intrinsic::amdgcn_buffer_load_format:
- case Intrinsic::amdgcn_raw_buffer_load:
- case Intrinsic::amdgcn_raw_buffer_load_format:
- case Intrinsic::amdgcn_raw_tbuffer_load:
- case Intrinsic::amdgcn_s_buffer_load:
- case Intrinsic::amdgcn_struct_buffer_load:
- case Intrinsic::amdgcn_struct_buffer_load_format:
- case Intrinsic::amdgcn_struct_tbuffer_load:
- case Intrinsic::amdgcn_tbuffer_load:
- return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
default: {
- if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
- return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
-
+ // Handle target specific intrinsics
+ Optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
+ *II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+ simplifyAndSetOp);
+ if (V.hasValue())
+ return V.getValue();
break;
}
} // switch on IntrinsicID
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index ff70347569ab..06f22cdfb63d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
@@ -35,6 +36,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -45,6 +47,10 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
+STATISTIC(NumAggregateReconstructionsSimplified,
+ "Number of aggregate reconstructions turned into reuse of the "
+ "original aggregate");
+
/// Return true if the value is cheaper to scalarize than it is to leave as a
/// vector operation. IsConstantExtractIndex indicates whether we are extracting
/// one known element from a vector constant.
@@ -85,7 +91,8 @@ static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
// If we have a PHI node with a vector type that is only used to feed
// itself and be an operand of extractelement at a constant location,
// try to replace the PHI of the vector type with a PHI of a scalar type.
-Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
+Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
+ PHINode *PN) {
SmallVector<Instruction *, 2> Extracts;
// The users we want the PHI to have are:
// 1) The EI ExtractElement (we already know this)
@@ -178,15 +185,19 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
auto *SrcTy = cast<VectorType>(X->getType());
Type *DestTy = Ext.getType();
- unsigned NumSrcElts = SrcTy->getNumElements();
- unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
+ ElementCount NumSrcElts = SrcTy->getElementCount();
+ ElementCount NumElts =
+ cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
if (NumSrcElts == NumElts)
if (Value *Elt = findScalarElement(X, ExtIndexC))
return new BitCastInst(Elt, DestTy);
+ assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
+ "Src and Dst must be the same sort of vector type");
+
// If the source elements are wider than the destination, try to shift and
// truncate a subset of scalar bits of an insert op.
- if (NumSrcElts < NumElts) {
+ if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
Value *Scalar;
uint64_t InsIndexC;
if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
@@ -197,7 +208,8 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
// into. Example: if we inserted element 1 of a <2 x i64> and we are
// extracting an i16 (narrowing ratio = 4), then this extract must be from 1
// of elements 4-7 of the bitcasted vector.
- unsigned NarrowingRatio = NumElts / NumSrcElts;
+ unsigned NarrowingRatio =
+ NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
if (ExtIndexC / NarrowingRatio != InsIndexC)
return nullptr;
@@ -259,7 +271,7 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
/// Find elements of V demanded by UserInstr.
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
- unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+ unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
// Conservatively assume that all elements are needed.
APInt UsedElts(APInt::getAllOnesValue(VWidth));
@@ -277,7 +289,7 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
case Instruction::ShuffleVector: {
ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
unsigned MaskNumElts =
- cast<VectorType>(UserInstr->getType())->getNumElements();
+ cast<FixedVectorType>(UserInstr->getType())->getNumElements();
UsedElts = APInt(VWidth, 0);
for (unsigned i = 0; i < MaskNumElts; i++) {
@@ -303,7 +315,7 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
/// no user demands an element of V, then the corresponding bit
/// remains unset in the returned value.
static APInt findDemandedEltsByAllUsers(Value *V) {
- unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+ unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
APInt UnionUsedElts(VWidth, 0);
for (const Use &U : V->uses()) {
@@ -321,7 +333,7 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
return UnionUsedElts;
}
-Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
Value *SrcVec = EI.getVectorOperand();
Value *Index = EI.getIndexOperand();
if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
@@ -333,17 +345,17 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
auto *IndexC = dyn_cast<ConstantInt>(Index);
if (IndexC) {
ElementCount EC = EI.getVectorOperandType()->getElementCount();
- unsigned NumElts = EC.Min;
+ unsigned NumElts = EC.getKnownMinValue();
// InstSimplify should handle cases where the index is invalid.
// For fixed-length vector, it's invalid to extract out-of-range element.
- if (!EC.Scalable && IndexC->getValue().uge(NumElts))
+ if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
return nullptr;
// This instruction only demands the single element from the input vector.
// Skip for scalable type, the number of elements is unknown at
// compile-time.
- if (!EC.Scalable && NumElts != 1) {
+ if (!EC.isScalable() && NumElts != 1) {
// If the input vector has a single use, simplify it based on this use
// property.
if (SrcVec->hasOneUse()) {
@@ -460,7 +472,7 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<int> &Mask) {
assert(LHS->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
- unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, -1);
@@ -502,7 +514,7 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
unsigned ExtractedIdx =
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
unsigned NumLHSElts =
- cast<VectorType>(LHS->getType())->getNumElements();
+ cast<FixedVectorType>(LHS->getType())->getNumElements();
// This must be extracting from either LHS or RHS.
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
@@ -531,9 +543,9 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// shufflevector to replace one or more insert/extract pairs.
static void replaceExtractElements(InsertElementInst *InsElt,
ExtractElementInst *ExtElt,
- InstCombiner &IC) {
- VectorType *InsVecType = InsElt->getType();
- VectorType *ExtVecType = ExtElt->getVectorOperandType();
+ InstCombinerImpl &IC) {
+ auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
+ auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
unsigned NumInsElts = InsVecType->getNumElements();
unsigned NumExtElts = ExtVecType->getNumElements();
@@ -614,7 +626,7 @@ using ShuffleOps = std::pair<Value *, Value *>;
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
Value *PermittedRHS,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
assert(V->getType()->isVectorTy() && "Invalid shuffle!");
unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
@@ -661,7 +673,7 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
}
unsigned NumLHSElts =
- cast<VectorType>(RHS->getType())->getNumElements();
+ cast<FixedVectorType>(RHS->getType())->getNumElements();
Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
return std::make_pair(LR.first, RHS);
}
@@ -670,7 +682,8 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
// We've gone as far as we can: anything on the other side of the
// extractelement will already have been converted into a shuffle.
unsigned NumLHSElts =
- cast<VectorType>(EI->getOperand(0)->getType())->getNumElements();
+ cast<FixedVectorType>(EI->getOperand(0)->getType())
+ ->getNumElements();
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
return std::make_pair(EI->getOperand(0), PermittedRHS);
@@ -692,6 +705,285 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
return std::make_pair(V, nullptr);
}
+/// Look for chain of insertvalue's that fully define an aggregate, and trace
+/// back the values inserted, see if they are all were extractvalue'd from
+/// the same source aggregate from the exact same element indexes.
+/// If they were, just reuse the source aggregate.
+/// This potentially deals with PHI indirections.
+Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
+ InsertValueInst &OrigIVI) {
+ Type *AggTy = OrigIVI.getType();
+ unsigned NumAggElts;
+ switch (AggTy->getTypeID()) {
+ case Type::StructTyID:
+ NumAggElts = AggTy->getStructNumElements();
+ break;
+ case Type::ArrayTyID:
+ NumAggElts = AggTy->getArrayNumElements();
+ break;
+ default:
+ llvm_unreachable("Unhandled aggregate type?");
+ }
+
+ // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
+ // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
+ // FIXME: any interesting patterns to be caught with larger limit?
+ assert(NumAggElts > 0 && "Aggregate should have elements.");
+ if (NumAggElts > 2)
+ return nullptr;
+
+ static constexpr auto NotFound = None;
+ static constexpr auto FoundMismatch = nullptr;
+
+ // Try to find a value of each element of an aggregate.
+ // FIXME: deal with more complex, not one-dimensional, aggregate types
+ SmallVector<Optional<Value *>, 2> AggElts(NumAggElts, NotFound);
+
+ // Do we know values for each element of the aggregate?
+ auto KnowAllElts = [&AggElts]() {
+ return all_of(AggElts,
+ [](Optional<Value *> Elt) { return Elt != NotFound; });
+ };
+
+ int Depth = 0;
+
+ // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
+ // every element being overwritten twice, which should never happen.
+ static const int DepthLimit = 2 * NumAggElts;
+
+ // Recurse up the chain of `insertvalue` aggregate operands until either we've
+ // reconstructed full initializer or can't visit any more `insertvalue`'s.
+ for (InsertValueInst *CurrIVI = &OrigIVI;
+ Depth < DepthLimit && CurrIVI && !KnowAllElts();
+ CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
+ ++Depth) {
+ Value *InsertedValue = CurrIVI->getInsertedValueOperand();
+ ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
+
+ // Don't bother with more than single-level aggregates.
+ if (Indices.size() != 1)
+ return nullptr; // FIXME: deal with more complex aggregates?
+
+ // Now, we may have already previously recorded the value for this element
+ // of an aggregate. If we did, that means the CurrIVI will later be
+ // overwritten with the already-recorded value. But if not, let's record it!
+ Optional<Value *> &Elt = AggElts[Indices.front()];
+ Elt = Elt.getValueOr(InsertedValue);
+
+ // FIXME: should we handle chain-terminating undef base operand?
+ }
+
+ // Was that sufficient to deduce the full initializer for the aggregate?
+ if (!KnowAllElts())
+ return nullptr; // Give up then.
+
+ // We now want to find the source[s] of the aggregate elements we've found.
+ // And with "source" we mean the original aggregate[s] from which
+ // the inserted elements were extracted. This may require PHI translation.
+
+ enum class AggregateDescription {
+ /// When analyzing the value that was inserted into an aggregate, we did
+ /// not manage to find defining `extractvalue` instruction to analyze.
+ NotFound,
+ /// When analyzing the value that was inserted into an aggregate, we did
+ /// manage to find defining `extractvalue` instruction[s], and everything
+ /// matched perfectly - aggregate type, element insertion/extraction index.
+ Found,
+ /// When analyzing the value that was inserted into an aggregate, we did
+ /// manage to find defining `extractvalue` instruction, but there was
+ /// a mismatch: either the source type from which the extraction was didn't
+ /// match the aggregate type into which the insertion was,
+ /// or the extraction/insertion channels mismatched,
+ /// or different elements had different source aggregates.
+ FoundMismatch
+ };
+ auto Describe = [](Optional<Value *> SourceAggregate) {
+ if (SourceAggregate == NotFound)
+ return AggregateDescription::NotFound;
+ if (*SourceAggregate == FoundMismatch)
+ return AggregateDescription::FoundMismatch;
+ return AggregateDescription::Found;
+ };
+
+ // Given the value \p Elt that was being inserted into element \p EltIdx of an
+ // aggregate AggTy, see if \p Elt was originally defined by an
+ // appropriate extractvalue (same element index, same aggregate type).
+ // If found, return the source aggregate from which the extraction was.
+ // If \p PredBB is provided, does PHI translation of an \p Elt first.
+ auto FindSourceAggregate =
+ [&](Value *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
+ Optional<BasicBlock *> PredBB) -> Optional<Value *> {
+ // For now(?), only deal with, at most, a single level of PHI indirection.
+ if (UseBB && PredBB)
+ Elt = Elt->DoPHITranslation(*UseBB, *PredBB);
+ // FIXME: deal with multiple levels of PHI indirection?
+
+ // Did we find an extraction?
+ auto *EVI = dyn_cast<ExtractValueInst>(Elt);
+ if (!EVI)
+ return NotFound;
+
+ Value *SourceAggregate = EVI->getAggregateOperand();
+
+ // Is the extraction from the same type into which the insertion was?
+ if (SourceAggregate->getType() != AggTy)
+ return FoundMismatch;
+ // And the element index doesn't change between extraction and insertion?
+ if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
+ return FoundMismatch;
+
+ return SourceAggregate; // AggregateDescription::Found
+ };
+
+ // Given elements AggElts that were constructing an aggregate OrigIVI,
+ // see if we can find appropriate source aggregate for each of the elements,
+ // and see it's the same aggregate for each element. If so, return it.
+ auto FindCommonSourceAggregate =
+ [&](Optional<BasicBlock *> UseBB,
+ Optional<BasicBlock *> PredBB) -> Optional<Value *> {
+ Optional<Value *> SourceAggregate;
+
+ for (auto I : enumerate(AggElts)) {
+ assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
+ "We don't store nullptr in SourceAggregate!");
+ assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
+ (I.index() != 0) &&
+ "SourceAggregate should be valid after the the first element,");
+
+ // For this element, is there a plausible source aggregate?
+ // FIXME: we could special-case undef element, IFF we know that in the
+ // source aggregate said element isn't poison.
+ Optional<Value *> SourceAggregateForElement =
+ FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
+
+ // Okay, what have we found? Does that correlate with previous findings?
+
+ // Regardless of whether or not we have previously found source
+ // aggregate for previous elements (if any), if we didn't find one for
+ // this element, passthrough whatever we have just found.
+ if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
+ return SourceAggregateForElement;
+
+ // Okay, we have found source aggregate for this element.
+ // Let's see what we already know from previous elements, if any.
+ switch (Describe(SourceAggregate)) {
+ case AggregateDescription::NotFound:
+ // This is apparently the first element that we have examined.
+ SourceAggregate = SourceAggregateForElement; // Record the aggregate!
+ continue; // Great, now look at next element.
+ case AggregateDescription::Found:
+ // We have previously already successfully examined other elements.
+ // Is this the same source aggregate we've found for other elements?
+ if (*SourceAggregateForElement != *SourceAggregate)
+ return FoundMismatch;
+ continue; // Still the same aggregate, look at next element.
+ case AggregateDescription::FoundMismatch:
+ llvm_unreachable("Can't happen. We would have early-exited then.");
+ };
+ }
+
+ assert(Describe(SourceAggregate) == AggregateDescription::Found &&
+ "Must be a valid Value");
+ return *SourceAggregate;
+ };
+
+ Optional<Value *> SourceAggregate;
+
+ // Can we find the source aggregate without looking at predecessors?
+ SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None);
+ if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
+ if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
+ return nullptr; // Conflicting source aggregates!
+ ++NumAggregateReconstructionsSimplified;
+ return replaceInstUsesWith(OrigIVI, *SourceAggregate);
+ }
+
+ // Okay, apparently we need to look at predecessors.
+
+ // We should be smart about picking the "use" basic block, which will be the
+ // merge point for aggregate, where we'll insert the final PHI that will be
+ // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
+ // We should look in which blocks each of the AggElts is being defined,
+ // they all should be defined in the same basic block.
+ BasicBlock *UseBB = nullptr;
+
+ for (const Optional<Value *> &Elt : AggElts) {
+ // If this element's value was not defined by an instruction, ignore it.
+ auto *I = dyn_cast<Instruction>(*Elt);
+ if (!I)
+ continue;
+ // Otherwise, in which basic block is this instruction located?
+ BasicBlock *BB = I->getParent();
+ // If it's the first instruction we've encountered, record the basic block.
+ if (!UseBB) {
+ UseBB = BB;
+ continue;
+ }
+ // Otherwise, this must be the same basic block we've seen previously.
+ if (UseBB != BB)
+ return nullptr;
+ }
+
+ // If *all* of the elements are basic-block-independent, meaning they are
+ // either function arguments, or constant expressions, then if we didn't
+ // handle them without predecessor-aware handling, we won't handle them now.
+ if (!UseBB)
+ return nullptr;
+
+ // If we didn't manage to find source aggregate without looking at
+ // predecessors, and there are no predecessors to look at, then we're done.
+ if (pred_empty(UseBB))
+ return nullptr;
+
+ // Arbitrary predecessor count limit.
+ static const int PredCountLimit = 64;
+
+ // Cache the (non-uniqified!) list of predecessors in a vector,
+ // checking the limit at the same time for efficiency.
+ SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
+ for (BasicBlock *Pred : predecessors(UseBB)) {
+ // Don't bother if there are too many predecessors.
+ if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
+ return nullptr;
+ Preds.emplace_back(Pred);
+ }
+
+ // For each predecessor, what is the source aggregate,
+ // from which all the elements were originally extracted from?
+ // Note that we want for the map to have stable iteration order!
+ SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
+ for (BasicBlock *Pred : Preds) {
+ std::pair<decltype(SourceAggregates)::iterator, bool> IV =
+ SourceAggregates.insert({Pred, nullptr});
+ // Did we already evaluate this predecessor?
+ if (!IV.second)
+ continue;
+
+ // Let's hope that when coming from predecessor Pred, all elements of the
+ // aggregate produced by OrigIVI must have been originally extracted from
+ // the same aggregate. Is that so? Can we find said original aggregate?
+ SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
+ if (Describe(SourceAggregate) != AggregateDescription::Found)
+ return nullptr; // Give up.
+ IV.first->second = *SourceAggregate;
+ }
+
+ // All good! Now we just need to thread the source aggregates here.
+ // Note that we have to insert the new PHI here, ourselves, because we can't
+ // rely on InstCombinerImpl::run() inserting it into the right basic block.
+ // Note that the same block can be a predecessor more than once,
+ // and we need to preserve that invariant for the PHI node.
+ BuilderTy::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(UseBB->getFirstNonPHI());
+ auto *PHI =
+ Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
+ for (BasicBlock *Pred : Preds)
+ PHI->addIncoming(SourceAggregates[Pred], Pred);
+
+ ++NumAggregateReconstructionsSimplified;
+ return replaceInstUsesWith(OrigIVI, PHI);
+}
+
/// Try to find redundant insertvalue instructions, like the following ones:
/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
@@ -699,7 +991,7 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
/// first one, making the first one redundant.
/// It should be transformed to:
/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
-Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
+Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
bool IsRedundant = false;
ArrayRef<unsigned int> FirstIndices = I.getIndices();
@@ -724,6 +1016,10 @@ Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
if (IsRedundant)
return replaceInstUsesWith(I, I.getOperand(0));
+
+ if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
+ return NewI;
+
return nullptr;
}
@@ -854,7 +1150,8 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
// For example:
// inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
// --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
- unsigned NumMaskElts = Shuf->getType()->getNumElements();
+ unsigned NumMaskElts =
+ cast<FixedVectorType>(Shuf->getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts);
for (unsigned i = 0; i != NumMaskElts; ++i)
NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
@@ -892,7 +1189,8 @@ static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
// that same index value.
// For example:
// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
- unsigned NumMaskElts = Shuf->getType()->getNumElements();
+ unsigned NumMaskElts =
+ cast<FixedVectorType>(Shuf->getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts);
ArrayRef<int> OldMask = Shuf->getShuffleMask();
for (unsigned i = 0; i != NumMaskElts; ++i) {
@@ -1041,7 +1339,7 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
return nullptr;
}
-Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
Value *IdxOp = IE.getOperand(2);
@@ -1189,7 +1487,7 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
// Propagating an undefined shuffle mask element to integer div/rem is not
// allowed because those opcodes can create immediate undefined behavior
// from an undefined element in an operand.
- if (llvm::any_of(Mask, [](int M){ return M == -1; }))
+ if (llvm::is_contained(Mask, -1))
return false;
LLVM_FALLTHROUGH;
case Instruction::Add:
@@ -1222,7 +1520,7 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
// longer vector ops, but that may result in more expensive codegen.
Type *ITy = I->getType();
if (ITy->isVectorTy() &&
- Mask.size() > cast<VectorType>(ITy)->getNumElements())
+ Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
return false;
for (Value *Operand : I->operands()) {
if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
@@ -1380,7 +1678,8 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
case Instruction::GetElementPtr: {
SmallVector<Value*, 8> NewOps;
bool NeedsRebuild =
- (Mask.size() != cast<VectorType>(I->getType())->getNumElements());
+ (Mask.size() !=
+ cast<FixedVectorType>(I->getType())->getNumElements());
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
Value *V;
// Recursively call evaluateInDifferentElementOrder on vector arguments
@@ -1435,7 +1734,7 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
ArrayRef<int> Mask) {
unsigned LHSElems =
- cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
+ cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
unsigned MaskElems = Mask.size();
unsigned BegIdx = Mask.front();
unsigned EndIdx = Mask.back();
@@ -1525,7 +1824,7 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
is_contained(Mask, UndefMaskElem) &&
(Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
if (MightCreatePoisonOrUB)
- NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
+ NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
// shuf (bop X, C), X, M --> bop X, C'
// shuf X, (bop X, C), M --> bop X, C'
@@ -1567,7 +1866,8 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
// For example:
// shuf (inselt undef, X, 2), undef, <2,2,undef>
// --> shuf (inselt undef, X, 0), undef, <0,0,undef>
- unsigned NumMaskElts = Shuf.getType()->getNumElements();
+ unsigned NumMaskElts =
+ cast<FixedVectorType>(Shuf.getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
if (Mask[i] == UndefMaskElem)
@@ -1585,7 +1885,7 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
// Canonicalize to choose from operand 0 first unless operand 1 is undefined.
// Commuting undef to operand 0 conflicts with another canonicalization.
- unsigned NumElts = Shuf.getType()->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
if (!isa<UndefValue>(Shuf.getOperand(1)) &&
Shuf.getMaskValue(0) >= (int)NumElts) {
// TODO: Can we assert that both operands of a shuffle-select are not undef
@@ -1652,7 +1952,8 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
is_contained(Mask, UndefMaskElem) &&
(Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
if (MightCreatePoisonOrUB)
- NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
+ NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC,
+ ConstantsAreOp1);
Value *V;
if (X == Y) {
@@ -1719,8 +2020,8 @@ static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
// and the source element type must be larger than the shuffle element type.
Type *SrcType = X->getType();
if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
- cast<VectorType>(SrcType)->getNumElements() !=
- cast<VectorType>(DestType)->getNumElements() ||
+ cast<FixedVectorType>(SrcType)->getNumElements() !=
+ cast<FixedVectorType>(DestType)->getNumElements() ||
SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
return nullptr;
@@ -1736,8 +2037,7 @@ static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
if (Mask[i] == UndefMaskElem)
continue;
uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
- assert(LSBIndex <= std::numeric_limits<int32_t>::max() &&
- "Overflowed 32-bits");
+ assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
if (Mask[i] != (int)LSBIndex)
return nullptr;
}
@@ -1764,19 +2064,19 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
// We need a narrow condition value. It must be extended with undef elements
// and have the same number of elements as this shuffle.
- unsigned NarrowNumElts = Shuf.getType()->getNumElements();
+ unsigned NarrowNumElts =
+ cast<FixedVectorType>(Shuf.getType())->getNumElements();
Value *NarrowCond;
if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
- cast<VectorType>(NarrowCond->getType())->getNumElements() !=
+ cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
NarrowNumElts ||
!cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
return nullptr;
// shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
// sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
- Value *Undef = UndefValue::get(X->getType());
- Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getShuffleMask());
- Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getShuffleMask());
+ Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
+ Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
}
@@ -1807,7 +2107,7 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
// new shuffle mask. Otherwise, copy the original mask element. Example:
// shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
// shuf X, Y, <C0, undef, C2, undef>
- unsigned NumElts = Shuf.getType()->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
SmallVector<int, 16> NewMask(NumElts);
assert(NumElts < Mask.size() &&
"Identity with extract must have less elements than its inputs");
@@ -1823,7 +2123,7 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
/// Try to replace a shuffle with an insertelement or try to replace a shuffle
/// operand with the operand of an insertelement.
static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
- InstCombiner &IC) {
+ InstCombinerImpl &IC) {
Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
SmallVector<int, 16> Mask;
Shuf.getShuffleMask(Mask);
@@ -1832,7 +2132,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
// TODO: This restriction could be removed if the insert has only one use
// (because the transform would require a new length-changing shuffle).
int NumElts = Mask.size();
- if (NumElts != (int)(cast<VectorType>(V0->getType())->getNumElements()))
+ if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements()))
return nullptr;
// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
@@ -1844,7 +2144,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
uint64_t IdxC;
if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
- if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
+ if (!is_contained(Mask, (int)IdxC))
return IC.replaceOperand(Shuf, 0, X);
}
if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
@@ -1852,7 +2152,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
// accesses to the 2nd vector input of the shuffle.
IdxC += NumElts;
// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
- if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
+ if (!is_contained(Mask, (int)IdxC))
return IC.replaceOperand(Shuf, 1, X);
}
@@ -1927,9 +2227,10 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
Value *X = Shuffle0->getOperand(0);
Value *Y = Shuffle1->getOperand(0);
if (X->getType() != Y->getType() ||
- !isPowerOf2_32(Shuf.getType()->getNumElements()) ||
- !isPowerOf2_32(Shuffle0->getType()->getNumElements()) ||
- !isPowerOf2_32(cast<VectorType>(X->getType())->getNumElements()) ||
+ !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
+ !isPowerOf2_32(
+ cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
+ !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
isa<UndefValue>(X) || isa<UndefValue>(Y))
return nullptr;
assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
@@ -1940,8 +2241,8 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
// operands directly by adjusting the shuffle mask to account for the narrower
// types:
// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
- int NarrowElts = cast<VectorType>(X->getType())->getNumElements();
- int WideElts = Shuffle0->getType()->getNumElements();
+ int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
+ int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
ArrayRef<int> Mask = Shuf.getShuffleMask();
@@ -1974,7 +2275,7 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
return new ShuffleVectorInst(X, Y, NewMask);
}
-Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
@@ -1982,9 +2283,13 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
SVI.getType(), ShufQuery))
return replaceInstUsesWith(SVI, V);
+ // Bail out for scalable vectors
+ if (isa<ScalableVectorType>(LHS->getType()))
+ return nullptr;
+
// shuffle x, x, mask --> shuffle x, undef, mask'
- unsigned VWidth = SVI.getType()->getNumElements();
- unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
+ unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
+ unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
ArrayRef<int> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
@@ -1998,7 +2303,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
X->getType()->isVectorTy() && VWidth == LHSWidth) {
// Try to create a scaled mask constant.
- auto *XType = cast<VectorType>(X->getType());
+ auto *XType = cast<FixedVectorType>(X->getType());
unsigned XNumElts = XType->getNumElements();
SmallVector<int, 16> ScaledMask;
if (XNumElts >= VWidth) {
@@ -2106,7 +2411,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isShuffleExtractingFromLHS(SVI, Mask)) {
Value *V = LHS;
unsigned MaskElems = Mask.size();
- VectorType *SrcTy = cast<VectorType>(V->getType());
+ auto *SrcTy = cast<FixedVectorType>(V->getType());
unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
assert(SrcElemBitWidth && "vector elements must have a bitwidth");
@@ -2138,8 +2443,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
ShuffleMask[I] = Idx;
- V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
- ShuffleMask,
+ V = Builder.CreateShuffleVector(V, ShuffleMask,
SVI.getName() + ".extract");
BegIdx = 0;
}
@@ -2224,11 +2528,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (LHSShuffle) {
LHSOp0 = LHSShuffle->getOperand(0);
LHSOp1 = LHSShuffle->getOperand(1);
- LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
+ LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
}
if (RHSShuffle) {
RHSOp0 = RHSShuffle->getOperand(0);
- RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
+ RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
}
Value* newLHS = LHS;
Value* newRHS = RHS;
@@ -2331,17 +2635,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// If the result mask is equal to one of the original shuffle masks,
// or is a splat, do the replacement.
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
- SmallVector<Constant*, 16> Elts;
- for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
- if (newMask[i] < 0) {
- Elts.push_back(UndefValue::get(Int32Ty));
- } else {
- Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
- }
- }
if (!newRHS)
newRHS = UndefValue::get(newLHS->getType());
- return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
+ return new ShuffleVectorInst(newLHS, newRHS, newMask);
}
return MadeChange ? &SVI : nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 17a5ec3f87fa..828fd49524ec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -59,6 +59,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
@@ -113,6 +114,9 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "instcombine"
+STATISTIC(NumWorklistIterations,
+ "Number of instruction combining iterations performed");
+
STATISTIC(NumCombined , "Number of insts combined");
STATISTIC(NumConstProp, "Number of constant folds");
STATISTIC(NumDeadInst , "Number of dead inst eliminated");
@@ -123,8 +127,13 @@ STATISTIC(NumReassoc , "Number of reassociations");
DEBUG_COUNTER(VisitCounter, "instcombine-visit",
"Controls which instructions are visited");
+// FIXME: these limits eventually should be as low as 2.
static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
+#ifndef NDEBUG
+static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
+#else
static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
+#endif
static cl::opt<bool>
EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
@@ -155,7 +164,41 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024),
static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
cl::Hidden, cl::init(true));
-Value *InstCombiner::EmitGEPOffset(User *GEP) {
+Optional<Instruction *>
+InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
+ // Handle target specific intrinsics
+ if (II.getCalledFunction()->isTargetIntrinsic()) {
+ return TTI.instCombineIntrinsic(*this, II);
+ }
+ return None;
+}
+
+Optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
+ IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) {
+ // Handle target specific intrinsics
+ if (II.getCalledFunction()->isTargetIntrinsic()) {
+ return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
+ KnownBitsComputed);
+ }
+ return None;
+}
+
+Optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
+ IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
+ APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) {
+ // Handle target specific intrinsics
+ if (II.getCalledFunction()->isTargetIntrinsic()) {
+ return TTI.simplifyDemandedVectorEltsIntrinsic(
+ *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+ SimplifyAndSetOp);
+ }
+ return None;
+}
+
+Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
return llvm::EmitGEPOffset(&Builder, DL, GEP);
}
@@ -168,8 +211,8 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
/// legal to convert to, in order to open up more combining opportunities.
/// NOTE: this treats i8, i16 and i32 specially, due to them being so common
/// from frontend languages.
-bool InstCombiner::shouldChangeType(unsigned FromWidth,
- unsigned ToWidth) const {
+bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
+ unsigned ToWidth) const {
bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
@@ -196,7 +239,7 @@ bool InstCombiner::shouldChangeType(unsigned FromWidth,
/// to a larger illegal type. i1 is always treated as a legal type because it is
/// a fundamental type in IR, and there are many specialized optimizations for
/// i1 types.
-bool InstCombiner::shouldChangeType(Type *From, Type *To) const {
+bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
// TODO: This could be extended to allow vectors. Datalayout changes might be
// needed to properly support that.
if (!From->isIntegerTy() || !To->isIntegerTy())
@@ -264,7 +307,8 @@ static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
/// cast to eliminate one of the associative operations:
/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
-static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombiner &IC) {
+static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
+ InstCombinerImpl &IC) {
auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
if (!Cast || !Cast->hasOneUse())
return false;
@@ -322,7 +366,7 @@ static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombiner &IC) {
/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
/// if C1 and C2 are constants.
-bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
+bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Instruction::BinaryOps Opcode = I.getOpcode();
bool Changed = false;
@@ -550,9 +594,10 @@ getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
/// This tries to simplify binary operations by factorizing out common terms
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
-Value *InstCombiner::tryFactorization(BinaryOperator &I,
- Instruction::BinaryOps InnerOpcode,
- Value *A, Value *B, Value *C, Value *D) {
+Value *InstCombinerImpl::tryFactorization(BinaryOperator &I,
+ Instruction::BinaryOps InnerOpcode,
+ Value *A, Value *B, Value *C,
+ Value *D) {
assert(A && B && C && D && "All values must be provided");
Value *V = nullptr;
@@ -655,7 +700,7 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I,
/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
/// Returns the simplified value, or null if it didn't simplify.
-Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
+Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
@@ -698,8 +743,10 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
- Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
- Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I));
+ // Disable the use of undef because it's not safe to distribute undef.
+ auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
+ Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
+ Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
// Do "A op C" and "B op C" both simplify?
if (L && R) {
@@ -735,8 +782,10 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
- Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I));
- Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
+ // Disable the use of undef because it's not safe to distribute undef.
+ auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
+ Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
+ Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
// Do "A op B" and "A op C" both simplify?
if (L && R) {
@@ -769,8 +818,9 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
}
-Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
- Value *LHS, Value *RHS) {
+Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
+ Value *LHS,
+ Value *RHS) {
Value *A, *B, *C, *D, *E, *F;
bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
@@ -820,9 +870,33 @@ Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
return SI;
}
+/// Freely adapt every user of V as-if V was changed to !V.
+/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
+void InstCombinerImpl::freelyInvertAllUsersOf(Value *I) {
+ for (User *U : I->users()) {
+ switch (cast<Instruction>(U)->getOpcode()) {
+ case Instruction::Select: {
+ auto *SI = cast<SelectInst>(U);
+ SI->swapValues();
+ SI->swapProfMetadata();
+ break;
+ }
+ case Instruction::Br:
+ cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
+ break;
+ case Instruction::Xor:
+ replaceInstUsesWith(cast<Instruction>(*U), I);
+ break;
+ default:
+ llvm_unreachable("Got unexpected user - out of sync with "
+ "canFreelyInvertAllUsersOf() ?");
+ }
+ }
+}
+
/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
/// constant zero (which is the 'negate' form).
-Value *InstCombiner::dyn_castNegVal(Value *V) const {
+Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
Value *NegV;
if (match(V, m_Neg(m_Value(NegV))))
return NegV;
@@ -883,7 +957,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
return RI;
}
-Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
+Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op,
+ SelectInst *SI) {
// Don't modify shared select instructions.
if (!SI->hasOneUse())
return nullptr;
@@ -908,7 +983,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return nullptr;
// If vectors, verify that they have the same number of elements.
- if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+ if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
return nullptr;
}
@@ -978,7 +1053,7 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
return RI;
}
-Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
+Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
unsigned NumPHIValues = PN->getNumIncomingValues();
if (NumPHIValues == 0)
return nullptr;
@@ -1004,7 +1079,9 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
BasicBlock *NonConstBB = nullptr;
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InVal = PN->getIncomingValue(i);
- if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
+ // If I is a freeze instruction, count undef as a non-constant.
+ if (match(InVal, m_ImmConstant()) &&
+ (!isa<FreezeInst>(I) || isGuaranteedNotToBeUndefOrPoison(InVal)))
continue;
if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
@@ -1029,9 +1106,11 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// operation in that block. However, if this is a critical edge, we would be
// inserting the computation on some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
+ // Also, make sure that the pred block is not dead code.
if (NonConstBB != nullptr) {
BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
- if (!BI || !BI->isUnconditional()) return nullptr;
+ if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB))
+ return nullptr;
}
// Okay, we can do the transformation: create the new PHI node.
@@ -1063,7 +1142,7 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// FalseVInPred versus TrueVInPred. When we have individual nonzero
// elements in the vector, we will incorrectly fold InC to
// `TrueVInPred`.
- if (InC && !isa<ConstantExpr>(InC) && isa<ConstantInt>(InC))
+ if (InC && isa<ConstantInt>(InC))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
else {
// Generate the select in the same block as PN's current incoming block.
@@ -1097,6 +1176,15 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
Builder);
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
+ } else if (isa<FreezeInst>(&I)) {
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV;
+ if (NonConstBB == PN->getIncomingBlock(i))
+ InV = Builder.CreateFreeze(PN->getIncomingValue(i), "phi.fr");
+ else
+ InV = PN->getIncomingValue(i);
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
} else {
CastInst *CI = cast<CastInst>(&I);
Type *RetTy = CI->getType();
@@ -1111,8 +1199,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
}
}
- for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
- Instruction *User = cast<Instruction>(*UI++);
+ for (User *U : make_early_inc_range(PN->users())) {
+ Instruction *User = cast<Instruction>(U);
if (User == &I) continue;
replaceInstUsesWith(*User, NewPN);
eraseInstFromFunction(*User);
@@ -1120,7 +1208,7 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
return replaceInstUsesWith(I, NewPN);
}
-Instruction *InstCombiner::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
+Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
if (!isa<Constant>(I.getOperand(1)))
return nullptr;
@@ -1138,8 +1226,9 @@ Instruction *InstCombiner::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
/// is a sequence of GEP indices into the pointed type that will land us at the
/// specified offset. If so, fill them into NewIndices and return the resultant
/// element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
- SmallVectorImpl<Value *> &NewIndices) {
+Type *
+InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value *> &NewIndices) {
Type *Ty = PtrTy->getElementType();
if (!Ty->isSized())
return nullptr;
@@ -1208,7 +1297,7 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
/// Return a value X such that Val = X * Scale, or null if none.
/// If the multiplication is known not to overflow, then NoSignedWrap is set.
-Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
+Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
Scale.getBitWidth() && "Scale not compatible with value!");
@@ -1448,9 +1537,8 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
} while (true);
}
-Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
- // FIXME: some of this is likely fine for scalable vectors
- if (!isa<FixedVectorType>(Inst.getType()))
+Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
+ if (!isa<VectorType>(Inst.getType()))
return nullptr;
BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
@@ -1539,13 +1627,15 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
// intends to move shuffles closer to other shuffles and binops closer to
// other binops, so they can be folded. It may also enable demanded elements
// transforms.
- unsigned NumElts = cast<FixedVectorType>(Inst.getType())->getNumElements();
Constant *C;
- if (match(&Inst,
+ auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
+ if (InstVTy &&
+ match(&Inst,
m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))),
- m_Constant(C))) && !isa<ConstantExpr>(C) &&
- cast<FixedVectorType>(V1->getType())->getNumElements() <= NumElts) {
- assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
+ m_ImmConstant(C))) &&
+ cast<FixedVectorType>(V1->getType())->getNumElements() <=
+ InstVTy->getNumElements()) {
+ assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
"Shuffle should not change scalar type");
// Find constant NewC that has property:
@@ -1560,6 +1650,7 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
bool MayChange = true;
+ unsigned NumElts = InstVTy->getNumElements();
for (unsigned I = 0; I < NumElts; ++I) {
Constant *CElt = C->getAggregateElement(I);
if (ShMask[I] >= 0) {
@@ -1648,9 +1739,8 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
// values followed by a splat followed by the 2nd binary operation:
// bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
- UndefValue *Undef = UndefValue::get(Inst.getType());
SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
- Value *NewSplat = Builder.CreateShuffleVector(NewBO, Undef, NewMask);
+ Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
// Intersect FMF on both new binops. Other (poison-generating) flags are
@@ -1670,7 +1760,7 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
/// Try to narrow the width of a binop if at least 1 operand is an extend of
/// of a value. This requires a potentially expensive known bits check to make
/// sure the narrow op does not overflow.
-Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) {
+Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
// We need at least one extended operand.
Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
@@ -1750,7 +1840,7 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
// gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
// Propagate 'inbounds' and metadata from existing instructions.
// Note: using IRBuilder to create the constants for efficiency.
- SmallVector<Value *, 4> IndexC(GEP.idx_begin(), GEP.idx_end());
+ SmallVector<Value *, 4> IndexC(GEP.indices());
bool IsInBounds = GEP.isInBounds();
Value *NewTrueC = IsInBounds ? Builder.CreateInBoundsGEP(TrueC, IndexC)
: Builder.CreateGEP(TrueC, IndexC);
@@ -1759,8 +1849,8 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
}
-Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
- SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ SmallVector<Value *, 8> Ops(GEP.operands());
Type *GEPType = GEP.getType();
Type *GEPEltType = GEP.getSourceElementType();
bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
@@ -2130,7 +2220,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
if (CATy->getElementType() == StrippedPtrEltTy) {
// -> GEP i8* X, ...
- SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
+ SmallVector<Value *, 8> Idx(drop_begin(GEP.indices()));
GetElementPtrInst *Res = GetElementPtrInst::Create(
StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
@@ -2166,7 +2256,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// ->
// %0 = GEP [10 x i8] addrspace(1)* X, ...
// addrspacecast i8 addrspace(1)* %0 to i8*
- SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
+ SmallVector<Value *, 8> Idx(GEP.indices());
Value *NewGEP =
GEP.isInBounds()
? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
@@ -2308,15 +2398,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
const DataLayout &DL) {
- auto *VecVTy = cast<VectorType>(VecTy);
+ auto *VecVTy = cast<FixedVectorType>(VecTy);
return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
};
if (GEP.getNumOperands() == 3 &&
- ((GEPEltType->isArrayTy() && SrcEltType->isVectorTy() &&
+ ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
- (GEPEltType->isVectorTy() && SrcEltType->isArrayTy() &&
+ (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
// Create a new GEP here, as using `setOperand()` followed by
@@ -2511,7 +2601,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
return true;
}
-Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
+Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
// If we have a malloc call which is only used in any amount of comparisons to
// null and free calls, delete the calls and replace the comparisons with true
// or false as appropriate.
@@ -2526,10 +2616,10 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
// If we are removing an alloca with a dbg.declare, insert dbg.value calls
// before each store.
- TinyPtrVector<DbgVariableIntrinsic *> DIIs;
+ SmallVector<DbgVariableIntrinsic *, 8> DVIs;
std::unique_ptr<DIBuilder> DIB;
if (isa<AllocaInst>(MI)) {
- DIIs = FindDbgAddrUses(&MI);
+ findDbgUsers(DVIs, &MI);
DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
}
@@ -2563,8 +2653,9 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
ConstantInt::get(Type::getInt1Ty(C->getContext()),
C->isFalseWhenEqual()));
} else if (auto *SI = dyn_cast<StoreInst>(I)) {
- for (auto *DII : DIIs)
- ConvertDebugDeclareToDebugValue(DII, SI, *DIB);
+ for (auto *DVI : DVIs)
+ if (DVI->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
} else {
// Casts, GEP, or anything else: we're about to delete this instruction,
// so it can not have any valid uses.
@@ -2581,8 +2672,31 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
None, "", II->getParent());
}
- for (auto *DII : DIIs)
- eraseInstFromFunction(*DII);
+ // Remove debug intrinsics which describe the value contained within the
+ // alloca. In addition to removing dbg.{declare,addr} which simply point to
+ // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
+ //
+ // ```
+ // define void @foo(i32 %0) {
+ // %a = alloca i32 ; Deleted.
+ // store i32 %0, i32* %a
+ // dbg.value(i32 %0, "arg0") ; Not deleted.
+ // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
+ // call void @trivially_inlinable_no_op(i32* %a)
+ // ret void
+ // }
+ // ```
+ //
+ // This may not be required if we stop describing the contents of allocas
+ // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
+ // the LowerDbgDeclare utility.
+ //
+ // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
+ // "arg0" dbg.value may be stale after the call. However, failing to remove
+ // the DW_OP_deref dbg.value causes large gaps in location coverage.
+ for (auto *DVI : DVIs)
+ if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
+ DVI->eraseFromParent();
return eraseInstFromFunction(MI);
}
@@ -2670,7 +2784,7 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
return &FI;
}
-Instruction *InstCombiner::visitFree(CallInst &FI) {
+Instruction *InstCombinerImpl::visitFree(CallInst &FI) {
Value *Op = FI.getArgOperand(0);
// free undef -> unreachable.
@@ -2711,7 +2825,7 @@ static bool isMustTailCall(Value *V) {
return false;
}
-Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
+Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
if (RI.getNumOperands() == 0) // ret void
return nullptr;
@@ -2734,7 +2848,31 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
return nullptr;
}
-Instruction *InstCombiner::visitUnconditionalBranchInst(BranchInst &BI) {
+Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
+ // Try to remove the previous instruction if it must lead to unreachable.
+ // This includes instructions like stores and "llvm.assume" that may not get
+ // removed by simple dead code elimination.
+ Instruction *Prev = I.getPrevNonDebugInstruction();
+ if (Prev && !Prev->isEHPad() &&
+ isGuaranteedToTransferExecutionToSuccessor(Prev)) {
+ // Temporarily disable removal of volatile stores preceding unreachable,
+ // pending a potential LangRef change permitting volatile stores to trap.
+ // TODO: Either remove this code, or properly integrate the check into
+ // isGuaranteedToTransferExecutionToSuccessor().
+ if (auto *SI = dyn_cast<StoreInst>(Prev))
+ if (SI->isVolatile())
+ return nullptr;
+
+ // A value may still have uses before we process it here (for example, in
+ // another unreachable block), so convert those to undef.
+ replaceInstUsesWith(*Prev, UndefValue::get(Prev->getType()));
+ eraseInstFromFunction(*Prev);
+ return &I;
+ }
+ return nullptr;
+}
+
+Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
assert(BI.isUnconditional() && "Only for unconditional branches.");
// If this store is the second-to-last instruction in the basic block
@@ -2763,7 +2901,7 @@ Instruction *InstCombiner::visitUnconditionalBranchInst(BranchInst &BI) {
return nullptr;
}
-Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
if (BI.isUnconditional())
return visitUnconditionalBranchInst(BI);
@@ -2799,7 +2937,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return nullptr;
}
-Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
+Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
Value *Cond = SI.getCondition();
Value *Op0;
ConstantInt *AddRHS;
@@ -2830,7 +2968,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
// Shrink the condition operand if the new type is smaller than the old type.
- // But do not shrink to a non-standard type, because backend can't generate
+ // But do not shrink to a non-standard type, because backend can't generate
// good code for that yet.
// TODO: We can make it aggressive again after fixing PR39569.
if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
@@ -2849,7 +2987,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
return nullptr;
}
-Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
Value *Agg = EV.getAggregateOperand();
if (!EV.hasIndices())
@@ -2994,10 +3132,11 @@ static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
case EHPersonality::GNU_CXX_SjLj:
case EHPersonality::GNU_ObjC:
case EHPersonality::MSVC_X86SEH:
- case EHPersonality::MSVC_Win64SEH:
+ case EHPersonality::MSVC_TableSEH:
case EHPersonality::MSVC_CXX:
case EHPersonality::CoreCLR:
case EHPersonality::Wasm_CXX:
+ case EHPersonality::XL_CXX:
return TypeInfo->isNullValue();
}
llvm_unreachable("invalid enum");
@@ -3010,7 +3149,7 @@ static bool shorter_filter(const Value *LHS, const Value *RHS) {
cast<ArrayType>(RHS->getType())->getNumElements();
}
-Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
+Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
// The logic here should be correct for any real-world personality function.
// However if that turns out not to be true, the offending logic can always
// be conditioned on the personality function, like the catch-all logic is.
@@ -3319,12 +3458,46 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
return nullptr;
}
-Instruction *InstCombiner::visitFreeze(FreezeInst &I) {
+Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
Value *Op0 = I.getOperand(0);
if (Value *V = SimplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
+ // freeze (phi const, x) --> phi const, (freeze x)
+ if (auto *PN = dyn_cast<PHINode>(Op0)) {
+ if (Instruction *NV = foldOpIntoPhi(I, PN))
+ return NV;
+ }
+
+ if (match(Op0, m_Undef())) {
+ // If I is freeze(undef), see its uses and fold it to the best constant.
+ // - or: pick -1
+ // - select's condition: pick the value that leads to choosing a constant
+ // - other ops: pick 0
+ Constant *BestValue = nullptr;
+ Constant *NullValue = Constant::getNullValue(I.getType());
+ for (const auto *U : I.users()) {
+ Constant *C = NullValue;
+
+ if (match(U, m_Or(m_Value(), m_Value())))
+ C = Constant::getAllOnesValue(I.getType());
+ else if (const auto *SI = dyn_cast<SelectInst>(U)) {
+ if (SI->getCondition() == &I) {
+ APInt CondVal(1, isa<Constant>(SI->getFalseValue()) ? 0 : 1);
+ C = Constant::getIntegerValue(I.getType(), CondVal);
+ }
+ }
+
+ if (!BestValue)
+ BestValue = C;
+ else if (BestValue != C)
+ BestValue = NullValue;
+ }
+
+ return replaceInstUsesWith(I, BestValue);
+ }
+
return nullptr;
}
@@ -3430,7 +3603,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
return true;
}
-bool InstCombiner::run() {
+bool InstCombinerImpl::run() {
while (!Worklist.isEmpty()) {
// Walk deferred instructions in reverse order, and push them to the
// worklist, which means they'll end up popped from the worklist in-order.
@@ -3492,7 +3665,9 @@ bool InstCombiner::run() {
else
UserParent = UserInst->getParent();
- if (UserParent != BB) {
+ // Try sinking to another block. If that block is unreachable, then do
+ // not bother. SimplifyCFG should handle it.
+ if (UserParent != BB && DT.isReachableFromEntry(UserParent)) {
// See if the user is one of our successors that has only one
// predecessor, so that we don't have to split the critical edge.
bool ShouldSink = UserParent->getUniquePredecessor() == BB;
@@ -3526,7 +3701,8 @@ bool InstCombiner::run() {
// Now that we have an instruction, try combining it to simplify it.
Builder.SetInsertPoint(I);
- Builder.SetCurrentDebugLocation(I->getDebugLoc());
+ Builder.CollectMetadataToCopy(
+ I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
#ifndef NDEBUG
std::string OrigI;
@@ -3541,8 +3717,8 @@ bool InstCombiner::run() {
LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
- if (I->getDebugLoc())
- Result->setDebugLoc(I->getDebugLoc());
+ Result->copyMetadata(*I,
+ {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
// Everything uses the new instruction now.
I->replaceAllUsesWith(Result);
@@ -3553,10 +3729,14 @@ bool InstCombiner::run() {
BasicBlock *InstParent = I->getParent();
BasicBlock::iterator InsertPos = I->getIterator();
- // If we replace a PHI with something that isn't a PHI, fix up the
- // insertion point.
- if (!isa<PHINode>(Result) && isa<PHINode>(InsertPos))
- InsertPos = InstParent->getFirstInsertionPt();
+ // Are we replace a PHI with something that isn't a PHI, or vice versa?
+ if (isa<PHINode>(Result) != isa<PHINode>(I)) {
+ // We need to fix up the insertion point.
+ if (isa<PHINode>(I)) // PHI -> Non-PHI
+ InsertPos = InstParent->getFirstInsertionPt();
+ else // Non-PHI -> PHI
+ InsertPos = InstParent->getFirstNonPHI()->getIterator();
+ }
InstParent->getInstList().insert(InsertPos, Result);
@@ -3586,6 +3766,55 @@ bool InstCombiner::run() {
return MadeIRChange;
}
+// Track the scopes used by !alias.scope and !noalias. In a function, a
+// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
+// by both sets. If not, the declaration of the scope can be safely omitted.
+// The MDNode of the scope can be omitted as well for the instructions that are
+// part of this function. We do not do that at this point, as this might become
+// too time consuming to do.
+class AliasScopeTracker {
+ SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
+ SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
+
+public:
+ void analyse(Instruction *I) {
+ // This seems to be faster than checking 'mayReadOrWriteMemory()'.
+ if (!I->hasMetadataOtherThanDebugLoc())
+ return;
+
+ auto Track = [](Metadata *ScopeList, auto &Container) {
+ const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
+ if (!MDScopeList || !Container.insert(MDScopeList).second)
+ return;
+ for (auto &MDOperand : MDScopeList->operands())
+ if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
+ Container.insert(MDScope);
+ };
+
+ Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
+ Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
+ }
+
+ bool isNoAliasScopeDeclDead(Instruction *Inst) {
+ NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
+ if (!Decl)
+ return false;
+
+ assert(Decl->use_empty() &&
+ "llvm.experimental.noalias.scope.decl in use ?");
+ const MDNode *MDSL = Decl->getScopeList();
+ assert(MDSL->getNumOperands() == 1 &&
+ "llvm.experimental.noalias.scope should refer to a single scope");
+ auto &MDOperand = MDSL->getOperand(0);
+ if (auto *MD = dyn_cast<MDNode>(MDOperand))
+ return !UsedAliasScopesAndLists.contains(MD) ||
+ !UsedNoAliasScopesAndLists.contains(MD);
+
+ // Not an MDNode ? throw away.
+ return true;
+ }
+};
+
/// Populate the IC worklist from a function, by walking it in depth-first
/// order and adding all reachable code to the worklist.
///
@@ -3604,6 +3833,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
DenseMap<Constant *, Constant *> FoldedConstants;
+ AliasScopeTracker SeenAliasScopes;
do {
BasicBlock *BB = Worklist.pop_back_val();
@@ -3648,10 +3878,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
}
}
- // Skip processing debug intrinsics in InstCombine. Processing these call instructions
- // consumes non-trivial amount of time and provides no value for the optimization.
- if (!isa<DbgInfoIntrinsic>(Inst))
+ // Skip processing debug and pseudo intrinsics in InstCombine. Processing
+ // these call instructions consumes non-trivial amount of time and
+ // provides no value for the optimization.
+ if (!Inst->isDebugOrPseudoInst()) {
InstrsForInstCombineWorklist.push_back(Inst);
+ SeenAliasScopes.analyse(Inst);
+ }
}
// Recursively visit successors. If this is a branch or switch on a
@@ -3671,8 +3904,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
}
}
- for (BasicBlock *SuccBB : successors(TI))
- Worklist.push_back(SuccBB);
+ append_range(Worklist, successors(TI));
} while (!Worklist.empty());
// Remove instructions inside unreachable blocks. This prevents the
@@ -3682,8 +3914,12 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
if (Visited.count(&BB))
continue;
- unsigned NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB);
- MadeIRChange |= NumDeadInstInBB > 0;
+ unsigned NumDeadInstInBB;
+ unsigned NumDeadDbgInstInBB;
+ std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
+ removeAllNonTerminatorAndEHPadInstructions(&BB);
+
+ MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
NumDeadInst += NumDeadInstInBB;
}
@@ -3696,7 +3932,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) {
// DCE instruction if trivially dead. As we iterate in reverse program
// order here, we will clean up whole chains of dead instructions.
- if (isInstructionTriviallyDead(Inst, TLI)) {
+ if (isInstructionTriviallyDead(Inst, TLI) ||
+ SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
++NumDeadInst;
LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
salvageDebugInfo(*Inst);
@@ -3713,8 +3950,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
static bool combineInstructionsOverFunction(
Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
- AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
- OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
+ DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
auto &DL = F.getParent()->getDataLayout();
MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
@@ -3738,6 +3975,7 @@ static bool combineInstructionsOverFunction(
// Iterate while there is work to do.
unsigned Iteration = 0;
while (true) {
+ ++NumWorklistIterations;
++Iteration;
if (Iteration > InfiniteLoopDetectionThreshold) {
@@ -3758,8 +3996,8 @@ static bool combineInstructionsOverFunction(
MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
- InstCombiner IC(Worklist, Builder, F.hasMinSize(), AA,
- AC, TLI, DT, ORE, BFI, PSI, DL, LI);
+ InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
+ ORE, BFI, PSI, DL, LI);
IC.MaxArraySizeForCombine = MaxArraySize;
if (!IC.run())
@@ -3782,6 +4020,7 @@ PreservedAnalyses InstCombinePass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
@@ -3792,8 +4031,8 @@ PreservedAnalyses InstCombinePass::run(Function &F,
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
- if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, BFI,
- PSI, MaxIterations, LI))
+ if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
+ BFI, PSI, MaxIterations, LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -3811,6 +4050,7 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -3829,6 +4069,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
@@ -3842,8 +4083,8 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
nullptr;
- return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, BFI,
- PSI, MaxIterations, LI);
+ return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
+ BFI, PSI, MaxIterations, LI);
}
char InstructionCombiningPass::ID = 0;
@@ -3862,6 +4103,7 @@ INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
"Combine redundant instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 5ce21cc3616a..d393523e16e5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -105,6 +105,7 @@ static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52;
static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
+static const uint64_t kRISCV64_ShadowOffset64 = 0x20000000;
static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
static const uint64_t kFreeBSDKasan_ShadowOffset64 = 0xdffff7c000000000;
@@ -130,56 +131,48 @@ static const size_t kMaxStackMallocSize = 1 << 16; // 64K
static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
-static const char *const kAsanModuleCtorName = "asan.module_ctor";
-static const char *const kAsanModuleDtorName = "asan.module_dtor";
+const char kAsanModuleCtorName[] = "asan.module_ctor";
+const char kAsanModuleDtorName[] = "asan.module_dtor";
static const uint64_t kAsanCtorAndDtorPriority = 1;
// On Emscripten, the system needs more than one priorities for constructors.
static const uint64_t kAsanEmscriptenCtorAndDtorPriority = 50;
-static const char *const kAsanReportErrorTemplate = "__asan_report_";
-static const char *const kAsanRegisterGlobalsName = "__asan_register_globals";
-static const char *const kAsanUnregisterGlobalsName =
- "__asan_unregister_globals";
-static const char *const kAsanRegisterImageGlobalsName =
- "__asan_register_image_globals";
-static const char *const kAsanUnregisterImageGlobalsName =
- "__asan_unregister_image_globals";
-static const char *const kAsanRegisterElfGlobalsName =
- "__asan_register_elf_globals";
-static const char *const kAsanUnregisterElfGlobalsName =
- "__asan_unregister_elf_globals";
-static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
-static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *const kAsanInitName = "__asan_init";
-static const char *const kAsanVersionCheckNamePrefix =
- "__asan_version_mismatch_check_v";
-static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
-static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
-static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
+const char kAsanReportErrorTemplate[] = "__asan_report_";
+const char kAsanRegisterGlobalsName[] = "__asan_register_globals";
+const char kAsanUnregisterGlobalsName[] = "__asan_unregister_globals";
+const char kAsanRegisterImageGlobalsName[] = "__asan_register_image_globals";
+const char kAsanUnregisterImageGlobalsName[] =
+ "__asan_unregister_image_globals";
+const char kAsanRegisterElfGlobalsName[] = "__asan_register_elf_globals";
+const char kAsanUnregisterElfGlobalsName[] = "__asan_unregister_elf_globals";
+const char kAsanPoisonGlobalsName[] = "__asan_before_dynamic_init";
+const char kAsanUnpoisonGlobalsName[] = "__asan_after_dynamic_init";
+const char kAsanInitName[] = "__asan_init";
+const char kAsanVersionCheckNamePrefix[] = "__asan_version_mismatch_check_v";
+const char kAsanPtrCmp[] = "__sanitizer_ptr_cmp";
+const char kAsanPtrSub[] = "__sanitizer_ptr_sub";
+const char kAsanHandleNoReturnName[] = "__asan_handle_no_return";
static const int kMaxAsanStackMallocSizeClass = 10;
-static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
-static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
-static const char *const kAsanGenPrefix = "___asan_gen_";
-static const char *const kODRGenPrefix = "__odr_asan_gen_";
-static const char *const kSanCovGenPrefix = "__sancov_gen_";
-static const char *const kAsanSetShadowPrefix = "__asan_set_shadow_";
-static const char *const kAsanPoisonStackMemoryName =
- "__asan_poison_stack_memory";
-static const char *const kAsanUnpoisonStackMemoryName =
- "__asan_unpoison_stack_memory";
+const char kAsanStackMallocNameTemplate[] = "__asan_stack_malloc_";
+const char kAsanStackFreeNameTemplate[] = "__asan_stack_free_";
+const char kAsanGenPrefix[] = "___asan_gen_";
+const char kODRGenPrefix[] = "__odr_asan_gen_";
+const char kSanCovGenPrefix[] = "__sancov_gen_";
+const char kAsanSetShadowPrefix[] = "__asan_set_shadow_";
+const char kAsanPoisonStackMemoryName[] = "__asan_poison_stack_memory";
+const char kAsanUnpoisonStackMemoryName[] = "__asan_unpoison_stack_memory";
// ASan version script has __asan_* wildcard. Triple underscore prevents a
// linker (gold) warning about attempting to export a local symbol.
-static const char *const kAsanGlobalsRegisteredFlagName =
- "___asan_globals_registered";
+const char kAsanGlobalsRegisteredFlagName[] = "___asan_globals_registered";
-static const char *const kAsanOptionDetectUseAfterReturn =
+const char kAsanOptionDetectUseAfterReturn[] =
"__asan_option_detect_stack_use_after_return";
-static const char *const kAsanShadowMemoryDynamicAddress =
+const char kAsanShadowMemoryDynamicAddress[] =
"__asan_shadow_memory_dynamic_address";
-static const char *const kAsanAllocaPoison = "__asan_alloca_poison";
-static const char *const kAsanAllocasUnpoison = "__asan_allocas_unpoison";
+const char kAsanAllocaPoison[] = "__asan_alloca_poison";
+const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison";
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
@@ -435,6 +428,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
bool IsKasan) {
bool IsAndroid = TargetTriple.isAndroid();
bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS();
+ bool IsMacOS = TargetTriple.isMacOSX();
bool IsFreeBSD = TargetTriple.isOSFreeBSD();
bool IsNetBSD = TargetTriple.isOSNetBSD();
bool IsPS4CPU = TargetTriple.isPS4CPU();
@@ -447,6 +441,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
bool IsMIPS64 = TargetTriple.isMIPS64();
bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb();
bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64;
+ bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64;
bool IsWindows = TargetTriple.isOSWindows();
bool IsFuchsia = TargetTriple.isOSFuchsia();
bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad;
@@ -514,8 +509,12 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
Mapping.Offset = kMIPS64_ShadowOffset64;
else if (IsIOS)
Mapping.Offset = kDynamicShadowSentinel;
+ else if (IsMacOS && IsAArch64)
+ Mapping.Offset = kDynamicShadowSentinel;
else if (IsAArch64)
Mapping.Offset = kAArch64_ShadowOffset64;
+ else if (IsRISCV64)
+ Mapping.Offset = kRISCV64_ShadowOffset64;
else
Mapping.Offset = kDefaultShadowOffset64;
}
@@ -534,6 +533,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
// we could OR the constant in a single instruction, but it's more
// efficient to load it once and use indexed addressing.
Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU &&
+ !IsRISCV64 &&
!(Mapping.Offset & (Mapping.Offset - 1)) &&
Mapping.Offset != kDynamicShadowSentinel;
bool IsAndroidWithIfuncSupport =
@@ -912,10 +912,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
AllocaInst *DynamicAllocaLayout = nullptr;
IntrinsicInst *LocalEscapeCall = nullptr;
- // Maps Value to an AllocaInst from which the Value is originated.
- using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>;
- AllocaForValueMapTy AllocaForValue;
-
bool HasInlineAsm = false;
bool HasReturnsTwiceCall = false;
@@ -969,8 +965,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void createDynamicAllocasInitStorage();
// ----------------------- Visitors.
- /// Collect all Ret instructions.
- void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); }
+ /// Collect all Ret instructions, or the musttail call instruction if it
+ /// precedes the return instruction.
+ void visitReturnInst(ReturnInst &RI) {
+ if (CallInst *CI = RI.getParent()->getTerminatingMustTailCall())
+ RetVec.push_back(CI);
+ else
+ RetVec.push_back(&RI);
+ }
/// Collect all Resume instructions.
void visitResumeInst(ResumeInst &RI) { RetVec.push_back(&RI); }
@@ -1004,10 +1006,10 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
// Unpoison dynamic allocas redzones.
void unpoisonDynamicAllocas() {
- for (auto &Ret : RetVec)
+ for (Instruction *Ret : RetVec)
unpoisonDynamicAllocasBeforeInst(Ret, DynamicAllocaLayout);
- for (auto &StackRestoreInst : StackRestoreVec)
+ for (Instruction *StackRestoreInst : StackRestoreVec)
unpoisonDynamicAllocasBeforeInst(StackRestoreInst,
StackRestoreInst->getOperand(0));
}
@@ -1066,8 +1068,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
!ConstantInt::isValueValidForType(IntptrTy, SizeValue))
return;
// Find alloca instruction that corresponds to llvm.lifetime argument.
- AllocaInst *AI =
- llvm::findAllocaForValue(II.getArgOperand(1), AllocaForValue);
+ // Currently we can only handle lifetime markers pointing to the
+ // beginning of the alloca.
+ AllocaInst *AI = findAllocaForValue(II.getArgOperand(1), true);
if (!AI) {
HasUntracedLifetimeIntrinsic = true;
return;
@@ -1562,7 +1565,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
if (ClOpt && ClOptGlobals) {
// If initialization order checking is disabled, a simple access to a
// dynamically initialized global is always valid.
- GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL));
+ GlobalVariable *G = dyn_cast<GlobalVariable>(getUnderlyingObject(Addr));
if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) {
NumOptimizedAccessesToGlobalVar++;
@@ -1572,7 +1575,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
if (ClOpt && ClOptStack) {
// A direct inbounds access to a stack variable is always valid.
- if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
+ if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) {
NumOptimizedAccessesToStackVar++;
return;
@@ -1874,6 +1877,14 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
return false;
}
+ // Do not instrument user-defined sections (with names resembling
+ // valid C identifiers)
+ if (TargetTriple.isOSBinFormatELF()) {
+ if (llvm::all_of(Section,
+ [](char c) { return llvm::isAlnum(c) || c == '_'; }))
+ return false;
+ }
+
// On COFF, if the section name contains '$', it is highly likely that the
// user is using section sorting to create an array of globals similar to
// the way initialization callbacks are registered in .init_array and
@@ -1958,9 +1969,10 @@ StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const {
case Triple::ELF: return "asan_globals";
case Triple::MachO: return "__DATA,__asan_globals,regular";
case Triple::Wasm:
+ case Triple::GOFF:
case Triple::XCOFF:
report_fatal_error(
- "ModuleAddressSanitizer not implemented for object file format.");
+ "ModuleAddressSanitizer not implemented for object file format");
case Triple::UnknownObjectFormat:
break;
}
@@ -2109,23 +2121,10 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF(
SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId);
}
- // This should never be called when there are no globals, by the logic that
- // computes the UniqueModuleId string, which is "" when there are no globals.
- // It's important that this path is only used when there are actually some
- // globals, because that means that there will certainly be a live
- // `asan_globals` input section at link time and thus `__start_asan_globals`
- // and `__stop_asan_globals` symbols will definitely be defined at link time.
- // This means there's no need for the references to them to be weak, which
- // enables better code generation because ExternalWeakLinkage implies
- // isInterposable() and thus requires GOT indirection for PIC. Since these
- // are known-defined hidden/dso_local symbols, direct PIC accesses without
- // dynamic relocation are always sufficient.
- assert(!MetadataGlobals.empty());
- assert(!UniqueModuleId.empty());
-
// Update llvm.compiler.used, adding the new metadata globals. This is
// needed so that during LTO these variables stay alive.
- appendToCompilerUsed(M, MetadataGlobals);
+ if (!MetadataGlobals.empty())
+ appendToCompilerUsed(M, MetadataGlobals);
// RegisteredFlag serves two purposes. First, we can pass it to dladdr()
// to look up the loaded image that contains it. Second, we can store in it
@@ -2138,18 +2137,15 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF(
ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
- // Create start and stop symbols. These are known to be defined by
- // the linker, see comment above.
- auto MakeStartStopGV = [&](const char *Prefix) {
- GlobalVariable *StartStop =
- new GlobalVariable(M, IntptrTy, false, GlobalVariable::ExternalLinkage,
- nullptr, Prefix + getGlobalMetadataSection());
- StartStop->setVisibility(GlobalVariable::HiddenVisibility);
- assert(StartStop->isImplicitDSOLocal());
- return StartStop;
- };
- GlobalVariable *StartELFMetadata = MakeStartStopGV("__start_");
- GlobalVariable *StopELFMetadata = MakeStartStopGV("__stop_");
+ // Create start and stop symbols.
+ GlobalVariable *StartELFMetadata = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+ "__start_" + getGlobalMetadataSection());
+ StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+ GlobalVariable *StopELFMetadata = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+ "__stop_" + getGlobalMetadataSection());
+ StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
// Create a call to register the globals with the runtime.
IRB.CreateCall(AsanRegisterElfGlobals,
@@ -2353,12 +2349,9 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
NewGlobal->setSection("__TEXT,__asan_cstring,regular");
}
- // Transfer the debug info. The payload starts at offset zero so we can
- // copy the debug info over as is.
- SmallVector<DIGlobalVariableExpression *, 1> GVs;
- G->getDebugInfo(GVs);
- for (auto *GV : GVs)
- NewGlobal->addDebugInfo(GV);
+ // Transfer the debug info and type metadata. The payload starts at offset
+ // zero so we can copy the metadata over as is.
+ NewGlobal->copyMetadata(G, 0);
Value *Indices2[2];
Indices2[0] = IRB.getInt32(0);
@@ -3116,7 +3109,8 @@ void FunctionStackPoisoner::processStaticAllocas() {
int StackMallocIdx = -1;
DebugLoc EntryDebugLocation;
if (auto SP = F.getSubprogram())
- EntryDebugLocation = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ EntryDebugLocation =
+ DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
Instruction *InsBefore = AllocaVec[0];
IRBuilder<> IRB(InsBefore);
@@ -3324,7 +3318,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
SmallVector<uint8_t, 64> ShadowAfterReturn;
// (Un)poison the stack before all ret instructions.
- for (auto Ret : RetVec) {
+ for (Instruction *Ret : RetVec) {
IRBuilder<> IRBRet(Ret);
// Mark the current frame as retired.
IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CFGMST.h b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CFGMST.h
index 9addb5d1ba93..6580b6d7d73e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CFGMST.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CFGMST.h
@@ -20,7 +20,6 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Support/BranchProbability.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -30,9 +29,6 @@
#define DEBUG_TYPE "cfgmst"
using namespace llvm;
-static cl::opt<bool> PGOInstrumentEntry(
- "pgo-instrument-entry", cl::init(false), cl::Hidden,
- cl::desc("Force to instrument function entry basicblock."));
namespace llvm {
@@ -107,7 +103,7 @@ public:
const BasicBlock *Entry = &(F.getEntryBlock());
uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2);
// If we want to instrument the entry count, lower the weight to 0.
- if (PGOInstrumentEntry)
+ if (InstrumentFuncEntry)
EntryWeight = 0;
Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr,
*ExitOutgoing = nullptr, *ExitIncoming = nullptr;
@@ -282,14 +278,19 @@ public:
BranchProbabilityInfo *BPI;
BlockFrequencyInfo *BFI;
+ // If function entry will be always instrumented.
+ bool InstrumentFuncEntry;
+
public:
- CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr,
+ CFGMST(Function &Func, bool InstrumentFuncEntry_,
+ BranchProbabilityInfo *BPI_ = nullptr,
BlockFrequencyInfo *BFI_ = nullptr)
- : F(Func), BPI(BPI_), BFI(BFI_) {
+ : F(Func), BPI(BPI_), BFI(BFI_),
+ InstrumentFuncEntry(InstrumentFuncEntry_) {
buildEdges();
sortEdgesByWeight();
computeMinimumSpanningTree();
- if (PGOInstrumentEntry && (AllEdges.size() > 1))
+ if (AllEdges.size() > 1 && InstrumentFuncEntry)
std::iter_swap(std::move(AllEdges.begin()),
std::move(AllEdges.begin() + AllEdges.size() - 1));
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index 0cc0d9b07387..9acd82c005e6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -53,7 +53,8 @@ static bool runCGProfilePass(
InstrProfSymtab Symtab;
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
Function *CalledF, uint64_t NewCount) {
- if (!CalledF || !TTI.isLoweredToCall(CalledF))
+ if (!CalledF || !TTI.isLoweredToCall(CalledF) ||
+ CalledF->hasDLLImportStorageClass())
return;
uint64_t &Count = Counts[std::make_pair(F, CalledF)];
Count = SaturatingAdd(Count, NewCount);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index a99c58b74fb1..927c34180db9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -260,10 +260,9 @@ class CHRScope {
if (TailRegionSet.count(Parent))
return false;
- assert(llvm::find_if(RegInfos,
- [&Parent](const RegInfo &RI) {
- return Parent == RI.R;
- }) != RegInfos.end() &&
+ assert(llvm::any_of(
+ RegInfos,
+ [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
"Must be in head");
return true;
});
@@ -732,7 +731,7 @@ static Instruction* getBranchInsertPoint(RegInfo &RI) {
}
}
for (Instruction &I : *EntryBB) {
- if (EntryBlockSelectSet.count(&I) > 0) {
+ if (EntryBlockSelectSet.contains(&I)) {
assert(&I == HoistPoint &&
"HoistPoint must be the first one in Selects");
break;
@@ -950,10 +949,9 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
<< "Dropped select due to unhoistable branch";
});
}
- Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
- [EntryBB](SelectInst *SI) {
- return SI->getParent() == EntryBB;
- }), Selects.end());
+ llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
+ return SI->getParent() == EntryBB;
+ });
Unhoistables.clear();
InsertPoint = Branch;
}
@@ -1249,7 +1247,7 @@ SmallVector<CHRScope *, 8> CHR::splitScope(
SmallVector<CHRScope *, 8> SubSplits = splitScope(
Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
SplitUnhoistables);
- NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
+ llvm::append_range(NewSubs, SubSplits);
}
Split->Subs = NewSubs;
}
@@ -1306,17 +1304,17 @@ void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
for (RegInfo &RI : Scope->RegInfos) {
if (RI.HasBranch) {
Region *R = RI.R;
- if (TrueBiasedRegionsGlobal.count(R) > 0)
+ if (TrueBiasedRegionsGlobal.contains(R))
OutermostScope->TrueBiasedRegions.insert(R);
- else if (FalseBiasedRegionsGlobal.count(R) > 0)
+ else if (FalseBiasedRegionsGlobal.contains(R))
OutermostScope->FalseBiasedRegions.insert(R);
else
llvm_unreachable("Must be biased");
}
for (SelectInst *SI : RI.Selects) {
- if (TrueBiasedSelectsGlobal.count(SI) > 0)
+ if (TrueBiasedSelectsGlobal.contains(SI))
OutermostScope->TrueBiasedSelects.insert(SI);
- else if (FalseBiasedSelectsGlobal.count(SI) > 0)
+ else if (FalseBiasedSelectsGlobal.contains(SI))
OutermostScope->FalseBiasedSelects.insert(SI);
else
llvm_unreachable("Must be biased");
@@ -1399,8 +1397,8 @@ void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
DenseSet<Instruction *> HoistStops;
bool IsHoisted = false;
if (RI.HasBranch) {
- assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
- OutermostScope->FalseBiasedRegions.count(R) > 0) &&
+ assert((OutermostScope->TrueBiasedRegions.contains(R) ||
+ OutermostScope->FalseBiasedRegions.contains(R)) &&
"Must be truthy or falsy");
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
// Note checkHoistValue fills in HoistStops.
@@ -1412,8 +1410,8 @@ void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
IsHoisted = true;
}
for (SelectInst *SI : RI.Selects) {
- assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
- OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
+ assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
+ OutermostScope->FalseBiasedSelects.contains(SI)) &&
"Must be true or false biased");
// Note checkHoistValue fills in HoistStops.
DenseMap<Instruction *, bool> Visited;
@@ -1609,9 +1607,7 @@ static void insertTrivialPHIs(CHRScope *Scope,
// Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
// ExitBlock. Replace I with the new phi in UI unless UI is another
// phi at ExitBlock.
- unsigned PredCount = std::distance(pred_begin(ExitBlock),
- pred_end(ExitBlock));
- PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
+ PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
&ExitBlock->front());
for (BasicBlock *Pred : predecessors(ExitBlock)) {
PN->addIncoming(&I, Pred);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 284631900731..1b14b8d56994 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -46,6 +46,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -78,6 +79,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -104,10 +106,18 @@
using namespace llvm;
+// This must be consistent with ShadowWidthBits.
+static const Align kShadowTLSAlignment = Align(2);
+
+// The size of TLS variables. These constants must be kept in sync with the ones
+// in dfsan.cpp.
+static const unsigned kArgTLSSize = 800;
+static const unsigned kRetvalTLSSize = 800;
+
// External symbol to be used when generating the shadow address for
// architectures with multiple VMAs. Instead of using a constant integer
// the runtime will set the external mask based on the VMA range.
-static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask";
+const char kDFSanExternShadowPtrMask[] = "__dfsan_shadow_ptr_mask";
// The -dfsan-preserve-alignment flag controls whether this pass assumes that
// alignment requirements provided by the input IR are correct. For example,
@@ -167,8 +177,8 @@ static cl::opt<bool> ClDebugNonzeroLabels(
//
// If this flag is set to true, the user must provide definitions for the
// following callback functions:
-// void __dfsan_load_callback(dfsan_label Label);
-// void __dfsan_store_callback(dfsan_label Label);
+// void __dfsan_load_callback(dfsan_label Label, void* addr);
+// void __dfsan_store_callback(dfsan_label Label, void* addr);
// void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
// void __dfsan_cmp_callback(dfsan_label CombinedLabel);
static cl::opt<bool> ClEventCallbacks(
@@ -176,6 +186,21 @@ static cl::opt<bool> ClEventCallbacks(
cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
cl::Hidden, cl::init(false));
+// Use a distinct bit for each base label, enabling faster unions with less
+// instrumentation. Limits the max number of base labels to 16.
+static cl::opt<bool> ClFast16Labels(
+ "dfsan-fast-16-labels",
+ cl::desc("Use more efficient instrumentation, limiting the number of "
+ "labels to 16."),
+ cl::Hidden, cl::init(false));
+
+// Controls whether the pass tracks the control flow of select instructions.
+static cl::opt<bool> ClTrackSelectControlFlow(
+ "dfsan-track-select-control-flow",
+ cl::desc("Propagate labels from condition values of select instructions "
+ "to results."),
+ cl::Hidden, cl::init(true));
+
static StringRef GetGlobalTypeString(const GlobalValue &G) {
// Types of GlobalVariables are always pointer types.
Type *GType = G.getValueType();
@@ -292,7 +317,7 @@ AttributeList TransformFunctionAttributes(
llvm::makeArrayRef(ArgumentAttributes));
}
-class DataFlowSanitizer : public ModulePass {
+class DataFlowSanitizer {
friend struct DFSanFunction;
friend class DFSanVisitor;
@@ -336,20 +361,16 @@ class DataFlowSanitizer : public ModulePass {
Module *Mod;
LLVMContext *Ctx;
- IntegerType *ShadowTy;
- PointerType *ShadowPtrTy;
+ Type *Int8Ptr;
+ /// The shadow type for all primitive types and vector types.
+ IntegerType *PrimitiveShadowTy;
+ PointerType *PrimitiveShadowPtrTy;
IntegerType *IntptrTy;
- ConstantInt *ZeroShadow;
+ ConstantInt *ZeroPrimitiveShadow;
ConstantInt *ShadowPtrMask;
ConstantInt *ShadowPtrMul;
Constant *ArgTLS;
Constant *RetvalTLS;
- void *(*GetArgTLSPtr)();
- void *(*GetRetvalTLSPtr)();
- FunctionType *GetArgTLSTy;
- FunctionType *GetRetvalTLSTy;
- Constant *GetArgTLS;
- Constant *GetRetvalTLS;
Constant *ExternalShadowMask;
FunctionType *DFSanUnionFnTy;
FunctionType *DFSanUnionLoadFnTy;
@@ -357,11 +378,13 @@ class DataFlowSanitizer : public ModulePass {
FunctionType *DFSanSetLabelFnTy;
FunctionType *DFSanNonzeroLabelFnTy;
FunctionType *DFSanVarargWrapperFnTy;
- FunctionType *DFSanLoadStoreCmpCallbackFnTy;
+ FunctionType *DFSanCmpCallbackFnTy;
+ FunctionType *DFSanLoadStoreCallbackFnTy;
FunctionType *DFSanMemTransferCallbackFnTy;
FunctionCallee DFSanUnionFn;
FunctionCallee DFSanCheckedUnionFn;
FunctionCallee DFSanUnionLoadFn;
+ FunctionCallee DFSanUnionLoadFast16LabelsFn;
FunctionCallee DFSanUnimplementedFn;
FunctionCallee DFSanSetLabelFn;
FunctionCallee DFSanNonzeroLabelFn;
@@ -392,15 +415,43 @@ class DataFlowSanitizer : public ModulePass {
void initializeCallbackFunctions(Module &M);
void initializeRuntimeFunctions(Module &M);
-public:
- static char ID;
+ bool init(Module &M);
+
+ /// Returns whether the pass tracks labels for struct fields and array
+ /// indices. Support only fast16 mode in TLS ABI mode.
+ bool shouldTrackFieldsAndIndices();
+
+ /// Returns a zero constant with the shadow type of OrigTy.
+ ///
+ /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
+ /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
+ /// getZeroShadow(other type) = i16(0)
+ ///
+ /// Note that a zero shadow is always i16(0) when shouldTrackFieldsAndIndices
+ /// returns false.
+ Constant *getZeroShadow(Type *OrigTy);
+ /// Returns a zero constant with the shadow type of V's type.
+ Constant *getZeroShadow(Value *V);
+
+ /// Checks if V is a zero shadow.
+ bool isZeroShadow(Value *V);
+
+ /// Returns the shadow type of OrigTy.
+ ///
+ /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
+ /// getShadowTy([n x T]) = [n x getShadowTy(T)]
+ /// getShadowTy(other type) = i16
+ ///
+ /// Note that a shadow type is always i16 when shouldTrackFieldsAndIndices
+ /// returns false.
+ Type *getShadowTy(Type *OrigTy);
+ /// Returns the shadow type of of V's type.
+ Type *getShadowTy(Value *V);
- DataFlowSanitizer(
- const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
- void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
+public:
+ DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
- bool doInitialization(Module &M) override;
- bool runOnModule(Module &M) override;
+ bool runImpl(Module &M);
};
struct DFSanFunction {
@@ -409,8 +460,6 @@ struct DFSanFunction {
DominatorTree DT;
DataFlowSanitizer::InstrumentedABI IA;
bool IsNativeABI;
- Value *ArgTLSPtr = nullptr;
- Value *RetvalTLSPtr = nullptr;
AllocaInst *LabelReturnAlloca = nullptr;
DenseMap<Value *, Value *> ValShadowMap;
DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
@@ -419,12 +468,17 @@ struct DFSanFunction {
std::vector<Value *> NonZeroChecks;
bool AvoidNewBlocks;
- struct CachedCombinedShadow {
- BasicBlock *Block;
+ struct CachedShadow {
+ BasicBlock *Block; // The block where Shadow is defined.
Value *Shadow;
};
- DenseMap<std::pair<Value *, Value *>, CachedCombinedShadow>
- CachedCombinedShadows;
+ /// Maps a value to its latest shadow value in terms of domination tree.
+ DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
+ /// Maps a value to its latest collapsed shadow value it was converted to in
+ /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
+ /// used at a post process where CFG blocks are split. So it does not cache
+ /// BasicBlock like CachedShadows, but uses domination between values.
+ DenseMap<Value *, Value *> CachedCollapsedShadows;
DenseMap<Value *, std::set<Value *>> ShadowElements;
DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
@@ -435,17 +489,56 @@ struct DFSanFunction {
AvoidNewBlocks = F->size() > 1000;
}
- Value *getArgTLSPtr();
- Value *getArgTLS(unsigned Index, Instruction *Pos);
- Value *getRetvalTLS();
+ /// Computes the shadow address for a given function argument.
+ ///
+ /// Shadow = ArgTLS+ArgOffset.
+ Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
+
+ /// Computes the shadow address for a retval.
+ Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
+
Value *getShadow(Value *V);
void setShadow(Instruction *I, Value *Shadow);
+ /// Generates IR to compute the union of the two given shadows, inserting it
+ /// before Pos. The combined value is with primitive type.
Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
+ /// Combines the shadow values of V1 and V2, then converts the combined value
+ /// with primitive type into a shadow value with the original type T.
+ Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
+ Instruction *Pos);
Value *combineOperandShadows(Instruction *Inst);
Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
Instruction *Pos);
- void storeShadow(Value *Addr, uint64_t Size, Align Alignment, Value *Shadow,
- Instruction *Pos);
+ void storePrimitiveShadow(Value *Addr, uint64_t Size, Align Alignment,
+ Value *PrimitiveShadow, Instruction *Pos);
+ /// Applies PrimitiveShadow to all primitive subtypes of T, returning
+ /// the expanded shadow value.
+ ///
+ /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
+ /// EFP([n x T], PS) = [n x EFP(T,PS)]
+ /// EFP(other types, PS) = PS
+ Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
+ Instruction *Pos);
+ /// Collapses Shadow into a single primitive shadow value, unioning all
+ /// primitive shadow values in the process. Returns the final primitive
+ /// shadow value.
+ ///
+ /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
+ /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
+ /// CTP(other types, PS) = PS
+ Value *collapseToPrimitiveShadow(Value *Shadow, Instruction *Pos);
+
+private:
+ /// Collapses the shadow with aggregate type into a single primitive shadow
+ /// value.
+ template <class AggregateType>
+ Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
+ IRBuilder<> &IRB);
+
+ Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
+
+ /// Returns the shadow value of an argument A.
+ Value *getShadowForTLSArgument(Argument *A);
};
class DFSanVisitor : public InstVisitor<DFSanVisitor> {
@@ -485,25 +578,10 @@ public:
} // end anonymous namespace
-char DataFlowSanitizer::ID;
-
-INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
- "DataFlowSanitizer: dynamic data flow analysis.", false, false)
-
-ModulePass *
-llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,
- void *(*getArgTLS)(),
- void *(*getRetValTLS)()) {
- return new DataFlowSanitizer(ABIListFiles, getArgTLS, getRetValTLS);
-}
-
DataFlowSanitizer::DataFlowSanitizer(
- const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),
- void *(*getRetValTLS)())
- : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {
+ const std::vector<std::string> &ABIListFiles) {
std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
- AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
- ClABIListFiles.end());
+ llvm::append_range(AllABIListFiles, ClABIListFiles);
// FIXME: should we propagate vfs::FileSystem to this constructor?
ABIList.set(
SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
@@ -511,12 +589,12 @@ DataFlowSanitizer::DataFlowSanitizer(
FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
- ArgTypes.append(T->getNumParams(), ShadowTy);
+ ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
if (T->isVarArg())
- ArgTypes.push_back(ShadowPtrTy);
+ ArgTypes.push_back(PrimitiveShadowPtrTy);
Type *RetType = T->getReturnType();
if (!RetType->isVoidTy())
- RetType = StructType::get(RetType, ShadowTy);
+ RetType = StructType::get(RetType, PrimitiveShadowTy);
return FunctionType::get(RetType, ArgTypes, T->isVarArg());
}
@@ -525,10 +603,10 @@ FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
SmallVector<Type *, 4> ArgTypes;
ArgTypes.push_back(T->getPointerTo());
ArgTypes.append(T->param_begin(), T->param_end());
- ArgTypes.append(T->getNumParams(), ShadowTy);
+ ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
Type *RetType = T->getReturnType();
if (!RetType->isVoidTy())
- ArgTypes.push_back(ShadowPtrTy);
+ ArgTypes.push_back(PrimitiveShadowPtrTy);
return FunctionType::get(T->getReturnType(), ArgTypes, false);
}
@@ -554,18 +632,174 @@ TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
}
}
for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
- ArgTypes.push_back(ShadowTy);
+ ArgTypes.push_back(PrimitiveShadowTy);
if (T->isVarArg())
- ArgTypes.push_back(ShadowPtrTy);
+ ArgTypes.push_back(PrimitiveShadowPtrTy);
Type *RetType = T->getReturnType();
if (!RetType->isVoidTy())
- ArgTypes.push_back(ShadowPtrTy);
+ ArgTypes.push_back(PrimitiveShadowPtrTy);
return TransformedFunction(
T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
ArgumentIndexMapping);
}
-bool DataFlowSanitizer::doInitialization(Module &M) {
+bool DataFlowSanitizer::isZeroShadow(Value *V) {
+ if (!shouldTrackFieldsAndIndices())
+ return ZeroPrimitiveShadow == V;
+
+ Type *T = V->getType();
+ if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return CI->isZero();
+ return false;
+ }
+
+ return isa<ConstantAggregateZero>(V);
+}
+
+bool DataFlowSanitizer::shouldTrackFieldsAndIndices() {
+ return getInstrumentedABI() == DataFlowSanitizer::IA_TLS && ClFast16Labels;
+}
+
+Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
+ if (!shouldTrackFieldsAndIndices())
+ return ZeroPrimitiveShadow;
+
+ if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
+ return ZeroPrimitiveShadow;
+ Type *ShadowTy = getShadowTy(OrigTy);
+ return ConstantAggregateZero::get(ShadowTy);
+}
+
+Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
+ return getZeroShadow(V->getType());
+}
+
+static Value *expandFromPrimitiveShadowRecursive(
+ Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
+ Value *PrimitiveShadow, IRBuilder<> &IRB) {
+ if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
+ return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
+
+ if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
+ for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
+ Indices.push_back(Idx);
+ Shadow = expandFromPrimitiveShadowRecursive(
+ Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
+ Indices.pop_back();
+ }
+ return Shadow;
+ }
+
+ if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
+ for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
+ Indices.push_back(Idx);
+ Shadow = expandFromPrimitiveShadowRecursive(
+ Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
+ Indices.pop_back();
+ }
+ return Shadow;
+ }
+ llvm_unreachable("Unexpected shadow type");
+}
+
+Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
+ Instruction *Pos) {
+ Type *ShadowTy = DFS.getShadowTy(T);
+
+ if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
+ return PrimitiveShadow;
+
+ if (DFS.isZeroShadow(PrimitiveShadow))
+ return DFS.getZeroShadow(ShadowTy);
+
+ IRBuilder<> IRB(Pos);
+ SmallVector<unsigned, 4> Indices;
+ Value *Shadow = UndefValue::get(ShadowTy);
+ Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
+ PrimitiveShadow, IRB);
+
+ // Caches the primitive shadow value that built the shadow value.
+ CachedCollapsedShadows[Shadow] = PrimitiveShadow;
+ return Shadow;
+}
+
+template <class AggregateType>
+Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
+ IRBuilder<> &IRB) {
+ if (!AT->getNumElements())
+ return DFS.ZeroPrimitiveShadow;
+
+ Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
+ Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
+
+ for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
+ Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
+ Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
+ Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
+ }
+ return Aggregator;
+}
+
+Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
+ IRBuilder<> &IRB) {
+ Type *ShadowTy = Shadow->getType();
+ if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
+ return Shadow;
+ if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
+ return collapseAggregateShadow<>(AT, Shadow, IRB);
+ if (StructType *ST = dyn_cast<StructType>(ShadowTy))
+ return collapseAggregateShadow<>(ST, Shadow, IRB);
+ llvm_unreachable("Unexpected shadow type");
+}
+
+Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
+ Instruction *Pos) {
+ Type *ShadowTy = Shadow->getType();
+ if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
+ return Shadow;
+
+ assert(DFS.shouldTrackFieldsAndIndices());
+
+ // Checks if the cached collapsed shadow value dominates Pos.
+ Value *&CS = CachedCollapsedShadows[Shadow];
+ if (CS && DT.dominates(CS, Pos))
+ return CS;
+
+ IRBuilder<> IRB(Pos);
+ Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
+ // Caches the converted primitive shadow value.
+ CS = PrimitiveShadow;
+ return PrimitiveShadow;
+}
+
+Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
+ if (!shouldTrackFieldsAndIndices())
+ return PrimitiveShadowTy;
+
+ if (!OrigTy->isSized())
+ return PrimitiveShadowTy;
+ if (isa<IntegerType>(OrigTy))
+ return PrimitiveShadowTy;
+ if (isa<VectorType>(OrigTy))
+ return PrimitiveShadowTy;
+ if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
+ return ArrayType::get(getShadowTy(AT->getElementType()),
+ AT->getNumElements());
+ if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
+ SmallVector<Type *, 4> Elements;
+ for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
+ Elements.push_back(getShadowTy(ST->getElementType(I)));
+ return StructType::get(*Ctx, Elements);
+ }
+ return PrimitiveShadowTy;
+}
+
+Type *DataFlowSanitizer::getShadowTy(Value *V) {
+ return getShadowTy(V->getType());
+}
+
+bool DataFlowSanitizer::init(Module &M) {
Triple TargetTriple(M.getTargetTriple());
bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
bool IsMIPS64 = TargetTriple.isMIPS64();
@@ -576,10 +810,11 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
Mod = &M;
Ctx = &M.getContext();
- ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
- ShadowPtrTy = PointerType::getUnqual(ShadowTy);
+ Int8Ptr = Type::getInt8PtrTy(*Ctx);
+ PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
+ PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
IntptrTy = DL.getIntPtrType(*Ctx);
- ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
+ ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidthBytes);
if (IsX86_64)
ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
@@ -591,44 +826,34 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
else
report_fatal_error("unsupported triple");
- Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy };
+ Type *DFSanUnionArgs[2] = {PrimitiveShadowTy, PrimitiveShadowTy};
DFSanUnionFnTy =
- FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false);
- Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy };
- DFSanUnionLoadFnTy =
- FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
+ FunctionType::get(PrimitiveShadowTy, DFSanUnionArgs, /*isVarArg=*/false);
+ Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
+ DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
+ /*isVarArg=*/false);
DFSanUnimplementedFnTy = FunctionType::get(
Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
- Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy };
+ Type *DFSanSetLabelArgs[3] = {PrimitiveShadowTy, Type::getInt8PtrTy(*Ctx),
+ IntptrTy};
DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
DFSanSetLabelArgs, /*isVarArg=*/false);
- DFSanNonzeroLabelFnTy = FunctionType::get(
- Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
+ DFSanNonzeroLabelFnTy =
+ FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
DFSanVarargWrapperFnTy = FunctionType::get(
Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
- DFSanLoadStoreCmpCallbackFnTy =
- FunctionType::get(Type::getVoidTy(*Ctx), ShadowTy, /*isVarArg=*/false);
- Type *DFSanMemTransferCallbackArgs[2] = {ShadowPtrTy, IntptrTy};
+ DFSanCmpCallbackFnTy =
+ FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
+ /*isVarArg=*/false);
+ Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
+ DFSanLoadStoreCallbackFnTy =
+ FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
+ /*isVarArg=*/false);
+ Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
DFSanMemTransferCallbackFnTy =
FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
/*isVarArg=*/false);
- if (GetArgTLSPtr) {
- Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
- ArgTLS = nullptr;
- GetArgTLSTy = FunctionType::get(PointerType::getUnqual(ArgTLSTy), false);
- GetArgTLS = ConstantExpr::getIntToPtr(
- ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
- PointerType::getUnqual(GetArgTLSTy));
- }
- if (GetRetvalTLSPtr) {
- RetvalTLS = nullptr;
- GetRetvalTLSTy = FunctionType::get(PointerType::getUnqual(ShadowTy), false);
- GetRetvalTLS = ConstantExpr::getIntToPtr(
- ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
- PointerType::getUnqual(GetRetvalTLSTy));
- }
-
ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
return true;
}
@@ -729,14 +954,21 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
else
RI = ReturnInst::Create(*Ctx, CI, BB);
+ // F is called by a wrapped custom function with primitive shadows. So
+ // its arguments and return value need conversion.
DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
- for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
- DFSF.ValShadowMap[&*ValAI] = &*ShadowAI;
+ for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) {
+ Value *Shadow =
+ DFSF.expandFromPrimitiveShadow(ValAI->getType(), &*ShadowAI, CI);
+ DFSF.ValShadowMap[&*ValAI] = Shadow;
+ }
DFSanVisitor(DFSF).visitCallInst(*CI);
- if (!FT->getReturnType()->isVoidTy())
- new StoreInst(DFSF.getShadow(RI->getReturnValue()),
- &*std::prev(F->arg_end()), RI);
+ if (!FT->getReturnType()->isVoidTy()) {
+ Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(
+ DFSF.getShadow(RI->getReturnValue()), RI);
+ new StoreInst(PrimitiveShadow, &*std::prev(F->arg_end()), RI);
+ }
}
return cast<Constant>(C.getCallee());
@@ -781,6 +1013,17 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
DFSanUnionLoadFn =
Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
}
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadOnly);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ DFSanUnionLoadFast16LabelsFn = Mod->getOrInsertFunction(
+ "__dfsan_union_load_fast16labels", DFSanUnionLoadFnTy, AL);
+ }
DFSanUnimplementedFn =
Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
{
@@ -798,16 +1041,18 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
// Initializes event callback functions and declare them in the module
void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback",
- DFSanLoadStoreCmpCallbackFnTy);
- DFSanStoreCallbackFn = Mod->getOrInsertFunction(
- "__dfsan_store_callback", DFSanLoadStoreCmpCallbackFnTy);
+ DFSanLoadStoreCallbackFnTy);
+ DFSanStoreCallbackFn = Mod->getOrInsertFunction("__dfsan_store_callback",
+ DFSanLoadStoreCallbackFnTy);
DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
"__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
- DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback",
- DFSanLoadStoreCmpCallbackFnTy);
+ DFSanCmpCallbackFn =
+ Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy);
}
-bool DataFlowSanitizer::runOnModule(Module &M) {
+bool DataFlowSanitizer::runImpl(Module &M) {
+ init(M);
+
if (ABIList.isIn(M, "skip"))
return false;
@@ -816,20 +1061,18 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
bool Changed = false;
- if (!GetArgTLSPtr) {
- Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
- ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
- if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS)) {
- Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
- G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
- }
+ Type *ArgTLSTy = ArrayType::get(Type::getInt64Ty(*Ctx), kArgTLSSize / 8);
+ ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS)) {
+ Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
}
- if (!GetRetvalTLSPtr) {
- RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy);
- if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS)) {
- Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
- G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
- }
+ Type *RetvalTLSTy =
+ ArrayType::get(Type::getInt64Ty(*Ctx), kRetvalTLSSize / 8);
+ RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", RetvalTLSTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS)) {
+ Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
}
ExternalShadowMask =
@@ -845,6 +1088,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
&i != DFSanUnionFn.getCallee()->stripPointerCasts() &&
&i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() &&
&i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanUnionLoadFast16LabelsFn.getCallee()->stripPointerCasts() &&
&i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() &&
&i != DFSanSetLabelFn.getCallee()->stripPointerCasts() &&
&i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() &&
@@ -1044,7 +1288,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
Pos = Pos->getNextNode();
IRBuilder<> IRB(Pos);
- Value *Ne = IRB.CreateICmpNE(V, DFSF.DFS.ZeroShadow);
+ Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
+ Value *Ne =
+ IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
IRBuilder<> ThenIRB(BI);
@@ -1057,50 +1303,61 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
}
-Value *DFSanFunction::getArgTLSPtr() {
- if (ArgTLSPtr)
- return ArgTLSPtr;
- if (DFS.ArgTLS)
- return ArgTLSPtr = DFS.ArgTLS;
+Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
+ Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
+ if (ArgOffset)
+ Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),
+ "_dfsarg");
+}
- IRBuilder<> IRB(&F->getEntryBlock().front());
- return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLSTy, DFS.GetArgTLS, {});
+Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
+ return IRB.CreatePointerCast(
+ DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");
}
-Value *DFSanFunction::getRetvalTLS() {
- if (RetvalTLSPtr)
- return RetvalTLSPtr;
- if (DFS.RetvalTLS)
- return RetvalTLSPtr = DFS.RetvalTLS;
+Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
+ unsigned ArgOffset = 0;
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ for (auto &FArg : F->args()) {
+ if (!FArg.getType()->isSized()) {
+ if (A == &FArg)
+ break;
+ continue;
+ }
- IRBuilder<> IRB(&F->getEntryBlock().front());
- return RetvalTLSPtr =
- IRB.CreateCall(DFS.GetRetvalTLSTy, DFS.GetRetvalTLS, {});
-}
+ unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
+ if (A != &FArg) {
+ ArgOffset += alignTo(Size, kShadowTLSAlignment);
+ if (ArgOffset > kArgTLSSize)
+ break; // ArgTLS overflows, uses a zero shadow.
+ continue;
+ }
-Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
- IRBuilder<> IRB(Pos);
- return IRB.CreateConstGEP2_64(ArrayType::get(DFS.ShadowTy, 64),
- getArgTLSPtr(), 0, Idx);
+ if (ArgOffset + Size > kArgTLSSize)
+ break; // ArgTLS overflows, uses a zero shadow.
+
+ Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
+ IRBuilder<> IRB(ArgTLSPos);
+ Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
+ return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
+ kShadowTLSAlignment);
+ }
+
+ return DFS.getZeroShadow(A);
}
Value *DFSanFunction::getShadow(Value *V) {
if (!isa<Argument>(V) && !isa<Instruction>(V))
- return DFS.ZeroShadow;
+ return DFS.getZeroShadow(V);
Value *&Shadow = ValShadowMap[V];
if (!Shadow) {
if (Argument *A = dyn_cast<Argument>(V)) {
if (IsNativeABI)
- return DFS.ZeroShadow;
+ return DFS.getZeroShadow(V);
switch (IA) {
case DataFlowSanitizer::IA_TLS: {
- Value *ArgTLSPtr = getArgTLSPtr();
- Instruction *ArgTLSPos =
- DFS.ArgTLS ? &*F->getEntryBlock().begin()
- : cast<Instruction>(ArgTLSPtr)->getNextNode();
- IRBuilder<> IRB(ArgTLSPos);
- Shadow =
- IRB.CreateLoad(DFS.ShadowTy, getArgTLS(A->getArgNo(), ArgTLSPos));
+ Shadow = getShadowForTLSArgument(A);
break;
}
case DataFlowSanitizer::IA_Args: {
@@ -1109,13 +1366,13 @@ Value *DFSanFunction::getShadow(Value *V) {
while (ArgIdx--)
++i;
Shadow = &*i;
- assert(Shadow->getType() == DFS.ShadowTy);
+ assert(Shadow->getType() == DFS.PrimitiveShadowTy);
break;
}
}
NonZeroChecks.push_back(Shadow);
} else {
- Shadow = DFS.ZeroShadow;
+ Shadow = DFS.getZeroShadow(V);
}
}
return Shadow;
@@ -1123,7 +1380,8 @@ Value *DFSanFunction::getShadow(Value *V) {
void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
assert(!ValShadowMap.count(I));
- assert(Shadow->getType() == DFS.ShadowTy);
+ assert(DFS.shouldTrackFieldsAndIndices() ||
+ Shadow->getType() == DFS.PrimitiveShadowTy);
ValShadowMap[I] = Shadow;
}
@@ -1140,47 +1398,60 @@ Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy),
IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)),
ShadowPtrMul),
- ShadowPtrTy);
+ PrimitiveShadowPtrTy);
+}
+
+Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
+ Instruction *Pos) {
+ Value *PrimitiveValue = combineShadows(V1, V2, Pos);
+ return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
}
// Generates IR to compute the union of the two given shadows, inserting it
-// before Pos. Returns the computed union Value.
+// before Pos. The combined value is with primitive type.
Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
- if (V1 == DFS.ZeroShadow)
- return V2;
- if (V2 == DFS.ZeroShadow)
- return V1;
+ if (DFS.isZeroShadow(V1))
+ return collapseToPrimitiveShadow(V2, Pos);
+ if (DFS.isZeroShadow(V2))
+ return collapseToPrimitiveShadow(V1, Pos);
if (V1 == V2)
- return V1;
+ return collapseToPrimitiveShadow(V1, Pos);
auto V1Elems = ShadowElements.find(V1);
auto V2Elems = ShadowElements.find(V2);
if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
V2Elems->second.begin(), V2Elems->second.end())) {
- return V1;
+ return collapseToPrimitiveShadow(V1, Pos);
} else if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
V1Elems->second.begin(), V1Elems->second.end())) {
- return V2;
+ return collapseToPrimitiveShadow(V2, Pos);
}
} else if (V1Elems != ShadowElements.end()) {
if (V1Elems->second.count(V2))
- return V1;
+ return collapseToPrimitiveShadow(V1, Pos);
} else if (V2Elems != ShadowElements.end()) {
if (V2Elems->second.count(V1))
- return V2;
+ return collapseToPrimitiveShadow(V2, Pos);
}
auto Key = std::make_pair(V1, V2);
if (V1 > V2)
std::swap(Key.first, Key.second);
- CachedCombinedShadow &CCS = CachedCombinedShadows[Key];
+ CachedShadow &CCS = CachedShadows[Key];
if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
return CCS.Shadow;
+ // Converts inputs shadows to shadows with primitive types.
+ Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
+ Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
+
IRBuilder<> IRB(Pos);
- if (AvoidNewBlocks) {
- CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2});
+ if (ClFast16Labels) {
+ CCS.Block = Pos->getParent();
+ CCS.Shadow = IRB.CreateOr(PV1, PV2);
+ } else if (AvoidNewBlocks) {
+ CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {PV1, PV2});
Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
Call->addParamAttr(0, Attribute::ZExt);
Call->addParamAttr(1, Attribute::ZExt);
@@ -1189,19 +1460,20 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
CCS.Shadow = Call;
} else {
BasicBlock *Head = Pos->getParent();
- Value *Ne = IRB.CreateICmpNE(V1, V2);
+ Value *Ne = IRB.CreateICmpNE(PV1, PV2);
BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
IRBuilder<> ThenIRB(BI);
- CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2});
+ CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {PV1, PV2});
Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
Call->addParamAttr(0, Attribute::ZExt);
Call->addParamAttr(1, Attribute::ZExt);
BasicBlock *Tail = BI->getSuccessor(0);
- PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
+ PHINode *Phi =
+ PHINode::Create(DFS.PrimitiveShadowTy, 2, "", &Tail->front());
Phi->addIncoming(Call, Call->getParent());
- Phi->addIncoming(V1, Head);
+ Phi->addIncoming(PV1, Head);
CCS.Block = Tail;
CCS.Shadow = Phi;
@@ -1228,13 +1500,13 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
// the computed union Value.
Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
if (Inst->getNumOperands() == 0)
- return DFS.ZeroShadow;
+ return DFS.getZeroShadow(Inst);
Value *Shadow = getShadow(Inst->getOperand(0));
for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
}
- return Shadow;
+ return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst);
}
Value *DFSanVisitor::visitOperandShadowInst(Instruction &I) {
@@ -1244,20 +1516,21 @@ Value *DFSanVisitor::visitOperandShadowInst(Instruction &I) {
}
// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
-// Addr has alignment Align, and take the union of each of those shadows.
+// Addr has alignment Align, and take the union of each of those shadows. The
+// returned shadow always has primitive type.
Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
Instruction *Pos) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
const auto i = AllocaShadowMap.find(AI);
if (i != AllocaShadowMap.end()) {
IRBuilder<> IRB(Pos);
- return IRB.CreateLoad(DFS.ShadowTy, i->second);
+ return IRB.CreateLoad(DFS.PrimitiveShadowTy, i->second);
}
}
const llvm::Align ShadowAlign(Align * DFS.ShadowWidthBytes);
SmallVector<const Value *, 2> Objs;
- GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
+ getUnderlyingObjects(Addr, Objs);
bool AllConstants = true;
for (const Value *Obj : Objs) {
if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
@@ -1269,26 +1542,51 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
break;
}
if (AllConstants)
- return DFS.ZeroShadow;
+ return DFS.ZeroPrimitiveShadow;
Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
switch (Size) {
case 0:
- return DFS.ZeroShadow;
+ return DFS.ZeroPrimitiveShadow;
case 1: {
- LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos);
+ LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
LI->setAlignment(ShadowAlign);
return LI;
}
case 2: {
IRBuilder<> IRB(Pos);
- Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
+ Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
ConstantInt::get(DFS.IntptrTy, 1));
return combineShadows(
- IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr, ShadowAlign),
- IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr1, ShadowAlign), Pos);
+ IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign),
+ IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign),
+ Pos);
}
}
+
+ if (ClFast16Labels && Size % (64 / DFS.ShadowWidthBits) == 0) {
+ // First OR all the WideShadows, then OR individual shadows within the
+ // combined WideShadow. This is fewer instructions than ORing shadows
+ // individually.
+ IRBuilder<> IRB(Pos);
+ Value *WideAddr =
+ IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
+ Value *CombinedWideShadow =
+ IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
+ for (uint64_t Ofs = 64 / DFS.ShadowWidthBits; Ofs != Size;
+ Ofs += 64 / DFS.ShadowWidthBits) {
+ WideAddr = IRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
+ ConstantInt::get(DFS.IntptrTy, 1));
+ Value *NextWideShadow =
+ IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
+ CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
+ }
+ for (unsigned Width = 32; Width >= DFS.ShadowWidthBits; Width >>= 1) {
+ Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
+ CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
+ }
+ return IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy);
+ }
if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidthBits) == 0) {
// Fast path for the common case where each byte has identical shadow: load
// shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
@@ -1307,7 +1605,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
Value *WideShadow =
IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
- Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
+ Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.PrimitiveShadowTy);
Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidthBits);
Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidthBits);
Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
@@ -1348,15 +1646,18 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
LastBr->setSuccessor(0, Tail);
FallbackIRB.CreateBr(Tail);
- PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
+ PHINode *Shadow =
+ PHINode::Create(DFS.PrimitiveShadowTy, 2, "", &Tail->front());
Shadow->addIncoming(FallbackCall, FallbackBB);
Shadow->addIncoming(TruncShadow, LastBr->getParent());
return Shadow;
}
IRBuilder<> IRB(Pos);
+ FunctionCallee &UnionLoadFn =
+ ClFast16Labels ? DFS.DFSanUnionLoadFast16LabelsFn : DFS.DFSanUnionLoadFn;
CallInst *FallbackCall = IRB.CreateCall(
- DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
+ UnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
return FallbackCall;
}
@@ -1365,34 +1666,39 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
auto &DL = LI.getModule()->getDataLayout();
uint64_t Size = DL.getTypeStoreSize(LI.getType());
if (Size == 0) {
- DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow);
+ DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
return;
}
Align Alignment = ClPreserveAlignment ? LI.getAlign() : Align(1);
- Value *Shadow =
+ Value *PrimitiveShadow =
DFSF.loadShadow(LI.getPointerOperand(), Size, Alignment.value(), &LI);
if (ClCombinePointerLabelsOnLoad) {
Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
- Shadow = DFSF.combineShadows(Shadow, PtrShadow, &LI);
+ PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, &LI);
}
- if (Shadow != DFSF.DFS.ZeroShadow)
- DFSF.NonZeroChecks.push_back(Shadow);
+ if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
+ DFSF.NonZeroChecks.push_back(PrimitiveShadow);
+ Value *Shadow =
+ DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, &LI);
DFSF.setShadow(&LI, Shadow);
if (ClEventCallbacks) {
IRBuilder<> IRB(&LI);
- IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, Shadow);
+ Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
+ IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
}
}
-void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, Align Alignment,
- Value *Shadow, Instruction *Pos) {
+void DFSanFunction::storePrimitiveShadow(Value *Addr, uint64_t Size,
+ Align Alignment,
+ Value *PrimitiveShadow,
+ Instruction *Pos) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
const auto i = AllocaShadowMap.find(AI);
if (i != AllocaShadowMap.end()) {
IRBuilder<> IRB(Pos);
- IRB.CreateStore(Shadow, i->second);
+ IRB.CreateStore(PrimitiveShadow, i->second);
return;
}
}
@@ -1400,7 +1706,7 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, Align Alignment,
const Align ShadowAlign(Alignment.value() * DFS.ShadowWidthBytes);
IRBuilder<> IRB(Pos);
Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
- if (Shadow == DFS.ZeroShadow) {
+ if (DFS.isZeroShadow(PrimitiveShadow)) {
IntegerType *ShadowTy =
IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
@@ -1413,11 +1719,13 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, Align Alignment,
const unsigned ShadowVecSize = 128 / DFS.ShadowWidthBits;
uint64_t Offset = 0;
if (Size >= ShadowVecSize) {
- auto *ShadowVecTy = FixedVectorType::get(DFS.ShadowTy, ShadowVecSize);
+ auto *ShadowVecTy =
+ FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
Value *ShadowVec = UndefValue::get(ShadowVecTy);
for (unsigned i = 0; i != ShadowVecSize; ++i) {
ShadowVec = IRB.CreateInsertElement(
- ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
+ ShadowVec, PrimitiveShadow,
+ ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
}
Value *ShadowVecAddr =
IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
@@ -1432,8 +1740,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, Align Alignment,
}
while (Size > 0) {
Value *CurShadowAddr =
- IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset);
- IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
+ IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
+ IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
--Size;
++Offset;
}
@@ -1448,14 +1756,19 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
const Align Alignment = ClPreserveAlignment ? SI.getAlign() : Align(1);
Value* Shadow = DFSF.getShadow(SI.getValueOperand());
+ Value *PrimitiveShadow;
if (ClCombinePointerLabelsOnStore) {
Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
- Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
+ PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
+ } else {
+ PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI);
}
- DFSF.storeShadow(SI.getPointerOperand(), Size, Alignment, Shadow, &SI);
+ DFSF.storePrimitiveShadow(SI.getPointerOperand(), Size, Alignment,
+ PrimitiveShadow, &SI);
if (ClEventCallbacks) {
IRBuilder<> IRB(&SI);
- IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, Shadow);
+ Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr);
+ IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr8});
}
}
@@ -1494,11 +1807,29 @@ void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
}
void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
- visitOperandShadowInst(I);
+ if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
+ visitOperandShadowInst(I);
+ return;
+ }
+
+ IRBuilder<> IRB(&I);
+ Value *Agg = I.getAggregateOperand();
+ Value *AggShadow = DFSF.getShadow(Agg);
+ Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
+ DFSF.setShadow(&I, ResShadow);
}
void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
- visitOperandShadowInst(I);
+ if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
+ visitOperandShadowInst(I);
+ return;
+ }
+
+ IRBuilder<> IRB(&I);
+ Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
+ Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
+ Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
+ DFSF.setShadow(&I, Res);
}
void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
@@ -1517,31 +1848,32 @@ void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
}
if (AllLoadsStores) {
IRBuilder<> IRB(&I);
- DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy);
+ DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
}
- DFSF.setShadow(&I, DFSF.DFS.ZeroShadow);
+ DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
}
void DFSanVisitor::visitSelectInst(SelectInst &I) {
Value *CondShadow = DFSF.getShadow(I.getCondition());
Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
+ Value *ShadowSel = nullptr;
if (isa<VectorType>(I.getCondition()->getType())) {
- DFSF.setShadow(
- &I,
- DFSF.combineShadows(
- CondShadow, DFSF.combineShadows(TrueShadow, FalseShadow, &I), &I));
+ ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
+ FalseShadow, &I);
} else {
- Value *ShadowSel;
if (TrueShadow == FalseShadow) {
ShadowSel = TrueShadow;
} else {
ShadowSel =
SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
}
- DFSF.setShadow(&I, DFSF.combineShadows(CondShadow, ShadowSel, &I));
}
+ DFSF.setShadow(&I, ClTrackSelectControlFlow
+ ? DFSF.combineShadowsThenConvert(
+ I.getType(), CondShadow, ShadowSel, &I)
+ : ShadowSel);
}
void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
@@ -1585,7 +1917,15 @@ void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
case DataFlowSanitizer::IA_TLS: {
Value *S = DFSF.getShadow(RI.getReturnValue());
IRBuilder<> IRB(&RI);
- IRB.CreateStore(S, DFSF.getRetvalTLS());
+ Type *RT = DFSF.F->getFunctionType()->getReturnType();
+ unsigned Size =
+ getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
+ if (Size <= kRetvalTLSSize) {
+ // If the size overflows, stores nothing. At callsite, oversized return
+ // shadows are set to zero.
+ IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB),
+ kShadowTLSAlignment);
+ }
break;
}
case DataFlowSanitizer::IA_Args: {
@@ -1625,11 +1965,11 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
CB.setCalledFunction(F);
IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
IRB.CreateGlobalStringPtr(F->getName()));
- DFSF.setShadow(&CB, DFSF.DFS.ZeroShadow);
+ DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
return;
case DataFlowSanitizer::WK_Discard:
CB.setCalledFunction(F);
- DFSF.setShadow(&CB, DFSF.DFS.ZeroShadow);
+ DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
return;
case DataFlowSanitizer::WK_Functional:
CB.setCalledFunction(F);
@@ -1681,10 +2021,11 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
i = CB.arg_begin();
const unsigned ShadowArgStart = Args.size();
for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
- Args.push_back(DFSF.getShadow(*i));
+ Args.push_back(
+ DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*i), &CB));
if (FT->isVarArg()) {
- auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
+ auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
CB.arg_size() - FT->getNumParams());
auto *LabelVAAlloca = new AllocaInst(
LabelVATy, getDataLayout().getAllocaAddrSpace(),
@@ -1692,7 +2033,9 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
for (unsigned n = 0; i != CB.arg_end(); ++i, ++n) {
auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
- IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);
+ IRB.CreateStore(
+ DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*i), &CB),
+ LabelVAPtr);
}
Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
@@ -1701,9 +2044,9 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
if (!FT->getReturnType()->isVoidTy()) {
if (!DFSF.LabelReturnAlloca) {
DFSF.LabelReturnAlloca =
- new AllocaInst(DFSF.DFS.ShadowTy,
- getDataLayout().getAllocaAddrSpace(),
- "labelreturn", &DFSF.F->getEntryBlock().front());
+ new AllocaInst(DFSF.DFS.PrimitiveShadowTy,
+ getDataLayout().getAllocaAddrSpace(),
+ "labelreturn", &DFSF.F->getEntryBlock().front());
}
Args.push_back(DFSF.LabelReturnAlloca);
}
@@ -1718,17 +2061,19 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
// Update the parameter attributes of the custom call instruction to
// zero extend the shadow parameters. This is required for targets
- // which consider ShadowTy an illegal type.
+ // which consider PrimitiveShadowTy an illegal type.
for (unsigned n = 0; n < FT->getNumParams(); n++) {
const unsigned ArgNo = ShadowArgStart + n;
- if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy)
+ if (CustomCI->getArgOperand(ArgNo)->getType() ==
+ DFSF.DFS.PrimitiveShadowTy)
CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
}
if (!FT->getReturnType()->isVoidTy()) {
- LoadInst *LabelLoad =
- IRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.LabelReturnAlloca);
- DFSF.setShadow(CustomCI, LabelLoad);
+ LoadInst *LabelLoad = IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy,
+ DFSF.LabelReturnAlloca);
+ DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow(
+ FT->getReturnType(), LabelLoad, &CB));
}
CI->replaceAllUsesWith(CustomCI);
@@ -1741,9 +2086,20 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
FunctionType *FT = CB.getFunctionType();
if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
- for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) {
- IRB.CreateStore(DFSF.getShadow(CB.getArgOperand(i)),
- DFSF.getArgTLS(i, &CB));
+ unsigned ArgOffset = 0;
+ const DataLayout &DL = getDataLayout();
+ for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
+ unsigned Size =
+ DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
+ // Stop storing if arguments' size overflows. Inside a function, arguments
+ // after overflow have zero shadow values.
+ if (ArgOffset + Size > kArgTLSSize)
+ break;
+ IRB.CreateAlignedStore(
+ DFSF.getShadow(CB.getArgOperand(I)),
+ DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
+ kShadowTLSAlignment);
+ ArgOffset += alignTo(Size, kShadowTLSAlignment);
}
}
@@ -1764,10 +2120,19 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
IRBuilder<> NextIRB(Next);
- LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.getRetvalTLS());
- DFSF.SkipInsts.insert(LI);
- DFSF.setShadow(&CB, LI);
- DFSF.NonZeroChecks.push_back(LI);
+ const DataLayout &DL = getDataLayout();
+ unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
+ if (Size > kRetvalTLSSize) {
+ // Set overflowed return shadow to be zero.
+ DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
+ } else {
+ LoadInst *LI = NextIRB.CreateAlignedLoad(
+ DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
+ kShadowTLSAlignment, "_dfsret");
+ DFSF.SkipInsts.insert(LI);
+ DFSF.setShadow(&CB, LI);
+ DFSF.NonZeroChecks.push_back(LI);
+ }
}
}
@@ -1789,7 +2154,8 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
if (FT->isVarArg()) {
unsigned VarArgSize = CB.arg_size() - FT->getNumParams();
- ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
+ ArrayType *VarArgArrayTy =
+ ArrayType::get(DFSF.DFS.PrimitiveShadowTy, VarArgSize);
AllocaInst *VarArgShadow =
new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(),
"", &DFSF.F->getEntryBlock().front());
@@ -1830,11 +2196,12 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
}
void DFSanVisitor::visitPHINode(PHINode &PN) {
+ Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
PHINode *ShadowPN =
- PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN);
+ PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "", &PN);
// Give the shadow phi node valid predecessors to fool SplitEdge into working.
- Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy);
+ Value *UndefShadow = UndefValue::get(ShadowTy);
for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e;
++i) {
ShadowPN->addIncoming(UndefShadow, *i);
@@ -1843,3 +2210,39 @@ void DFSanVisitor::visitPHINode(PHINode &PN) {
DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN));
DFSF.setShadow(&PN, ShadowPN);
}
+
+namespace {
+class DataFlowSanitizerLegacyPass : public ModulePass {
+private:
+ std::vector<std::string> ABIListFiles;
+
+public:
+ static char ID;
+
+ DataFlowSanitizerLegacyPass(
+ const std::vector<std::string> &ABIListFiles = std::vector<std::string>())
+ : ModulePass(ID), ABIListFiles(ABIListFiles) {}
+
+ bool runOnModule(Module &M) override {
+ return DataFlowSanitizer(ABIListFiles).runImpl(M);
+ }
+};
+} // namespace
+
+char DataFlowSanitizerLegacyPass::ID;
+
+INITIALIZE_PASS(DataFlowSanitizerLegacyPass, "dfsan",
+ "DataFlowSanitizer: dynamic data flow analysis.", false, false)
+
+ModulePass *llvm::createDataFlowSanitizerLegacyPassPass(
+ const std::vector<std::string> &ABIListFiles) {
+ return new DataFlowSanitizerLegacyPass(ABIListFiles);
+}
+
+PreservedAnalyses DataFlowSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (DataFlowSanitizer(ABIListFiles).runImpl(M)) {
+ return PreservedAnalyses::none();
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index d8a965a90127..527644a69d91 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -13,13 +13,17 @@
//
//===----------------------------------------------------------------------===//
+#include "CFGMST.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
@@ -32,6 +36,7 @@
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CRC.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
@@ -52,6 +57,8 @@ namespace endian = llvm::support::endian;
#define DEBUG_TYPE "insert-gcov-profiling"
enum : uint32_t {
+ GCOV_ARC_ON_TREE = 1 << 0,
+
GCOV_TAG_FUNCTION = 0x01000000,
GCOV_TAG_BLOCKS = 0x01410000,
GCOV_TAG_ARCS = 0x01430000,
@@ -62,6 +69,9 @@ static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version",
cl::init("408*"), cl::Hidden,
cl::ValueRequired);
+static cl::opt<bool> AtomicCounter("gcov-atomic-counter", cl::Hidden,
+ cl::desc("Make counter updates atomic"));
+
// Returns the number of words which will be used to represent this string.
static unsigned wordsOfString(StringRef s) {
// Length + NUL-terminated string + 0~3 padding NULs.
@@ -73,6 +83,7 @@ GCOVOptions GCOVOptions::getDefault() {
Options.EmitNotes = true;
Options.EmitData = true;
Options.NoRedZone = false;
+ Options.Atomic = AtomicCounter;
if (DefaultGCOVVersion.size() != 4) {
llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
@@ -90,7 +101,8 @@ public:
GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
bool
- runOnModule(Module &M,
+ runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
+ function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
void write(uint32_t i) {
@@ -107,11 +119,14 @@ public:
private:
// Create the .gcno files for the Module based on DebugInfo.
- void emitProfileNotes();
+ bool
+ emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork,
+ function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
+ function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
+ function_ref<const TargetLibraryInfo &(Function &F)> GetTLI);
- // Modify the program to track transitions along edges and call into the
- // profiling runtime to emit .gcda files when run.
- bool emitProfileArcs();
+ void emitGlobalConstructor(
+ SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
bool isFunctionInstrumented(const Function &F);
std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
@@ -130,7 +145,6 @@ private:
Function *
insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
- Function *insertFlush(Function *ResetF);
bool AddFlushBeforeForkAndExec();
@@ -150,6 +164,7 @@ private:
SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
std::vector<Regex> FilterRe;
std::vector<Regex> ExcludeRe;
+ DenseSet<const BasicBlock *> ExecBlocks;
StringMap<bool> InstrumentedFiles;
};
@@ -165,24 +180,68 @@ public:
StringRef getPassName() const override { return "GCOV Profiler"; }
bool runOnModule(Module &M) override {
- return Profiler.runOnModule(M, [this](Function &F) -> TargetLibraryInfo & {
- return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- });
+ auto GetBFI = [this](Function &F) {
+ return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+ };
+ auto GetBPI = [this](Function &F) {
+ return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
+ };
+ auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ return Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
private:
GCOVProfiler Profiler;
};
+
+struct BBInfo {
+ BBInfo *Group;
+ uint32_t Index;
+ uint32_t Rank = 0;
+
+ BBInfo(unsigned Index) : Group(this), Index(Index) {}
+ const std::string infoString() const {
+ return (Twine("Index=") + Twine(Index)).str();
+ }
+};
+
+struct Edge {
+ // This class implements the CFG edges. Note the CFG can be a multi-graph.
+ // So there might be multiple edges with same SrcBB and DestBB.
+ const BasicBlock *SrcBB;
+ const BasicBlock *DestBB;
+ uint64_t Weight;
+ BasicBlock *Place = nullptr;
+ uint32_t SrcNumber, DstNumber;
+ bool InMST = false;
+ bool Removed = false;
+ bool IsCritical = false;
+
+ Edge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
+ : SrcBB(Src), DestBB(Dest), Weight(W) {}
+
+ // Return the information string of an edge.
+ const std::string infoString() const {
+ return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
+ (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
+ .str();
+ }
+};
}
char GCOVProfilerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(
GCOVProfilerLegacyPass, "insert-gcov-profiling",
"Insert instrumentation for GCOV profiling", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
GCOVProfilerLegacyPass, "insert-gcov-profiling",
@@ -267,8 +326,8 @@ namespace {
return LinesByFile.try_emplace(Filename, P, Filename).first->second;
}
- void addEdge(GCOVBlock &Successor) {
- OutEdges.push_back(&Successor);
+ void addEdge(GCOVBlock &Successor, uint32_t Flags) {
+ OutEdges.emplace_back(&Successor, Flags);
}
void writeOut() {
@@ -301,15 +360,16 @@ namespace {
assert(OutEdges.empty());
}
- private:
+ uint32_t Number;
+ SmallVector<std::pair<GCOVBlock *, uint32_t>, 4> OutEdges;
+
+ private:
friend class GCOVFunction;
GCOVBlock(GCOVProfiler *P, uint32_t Number)
: GCOVRecord(P), Number(Number) {}
- uint32_t Number;
StringMap<GCOVLines> LinesByFile;
- SmallVector<GCOVBlock *, 4> OutEdges;
};
// A function has a unique identifier, a checksum (we leave as zero) and a
@@ -320,16 +380,12 @@ namespace {
GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP,
unsigned EndLine, uint32_t Ident, int Version)
: GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident),
- Version(Version), ReturnBlock(P, 1) {
+ Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) {
LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
bool ExitBlockBeforeBody = Version >= 48;
- uint32_t i = 0;
- for (auto &BB : *F) {
- // Skip index 1 if it's assigned to the ReturnBlock.
- if (i == 1 && ExitBlockBeforeBody)
- ++i;
+ uint32_t i = ExitBlockBeforeBody ? 2 : 1;
+ for (BasicBlock &BB : *F)
Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++)));
- }
if (!ExitBlockBeforeBody)
ReturnBlock.Number = i;
@@ -340,26 +396,15 @@ namespace {
FuncChecksum = hash_value(FunctionNameAndLine);
}
- GCOVBlock &getBlock(BasicBlock *BB) {
- return Blocks.find(BB)->second;
+ GCOVBlock &getBlock(const BasicBlock *BB) {
+ return Blocks.find(const_cast<BasicBlock *>(BB))->second;
}
+ GCOVBlock &getEntryBlock() { return EntryBlock; }
GCOVBlock &getReturnBlock() {
return ReturnBlock;
}
- std::string getEdgeDestinations() {
- std::string EdgeDestinations;
- raw_string_ostream EDOS(EdgeDestinations);
- Function *F = Blocks.begin()->first->getParent();
- for (BasicBlock &I : *F) {
- GCOVBlock &Block = getBlock(&I);
- for (int i = 0, e = Block.OutEdges.size(); i != e; ++i)
- EDOS << Block.OutEdges[i]->Number;
- }
- return EdgeDestinations;
- }
-
uint32_t getFuncChecksum() const {
return FuncChecksum;
}
@@ -398,44 +443,52 @@ namespace {
// Emit count of blocks.
write(GCOV_TAG_BLOCKS);
if (Version < 80) {
- write(Blocks.size() + 1);
- for (int i = Blocks.size() + 1; i; --i)
+ write(Blocks.size() + 2);
+ for (int i = Blocks.size() + 2; i; --i)
write(0);
} else {
write(1);
- write(Blocks.size() + 1);
+ write(Blocks.size() + 2);
}
LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n");
// Emit edges between blocks.
- Function *F = Blocks.begin()->first->getParent();
- for (BasicBlock &I : *F) {
- GCOVBlock &Block = getBlock(&I);
+ const uint32_t Outgoing = EntryBlock.OutEdges.size();
+ if (Outgoing) {
+ write(GCOV_TAG_ARCS);
+ write(Outgoing * 2 + 1);
+ write(EntryBlock.Number);
+ for (const auto &E : EntryBlock.OutEdges) {
+ write(E.first->Number);
+ write(E.second);
+ }
+ }
+ for (auto &It : Blocks) {
+ const GCOVBlock &Block = It.second;
if (Block.OutEdges.empty()) continue;
write(GCOV_TAG_ARCS);
write(Block.OutEdges.size() * 2 + 1);
write(Block.Number);
- for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
- LLVM_DEBUG(dbgs() << Block.Number << " -> "
- << Block.OutEdges[i]->Number << "\n");
- write(Block.OutEdges[i]->Number);
- write(0); // no flags
+ for (const auto &E : Block.OutEdges) {
+ write(E.first->Number);
+ write(E.second);
}
}
// Emit lines for each block.
- for (BasicBlock &I : *F)
- getBlock(&I).writeOut();
+ for (auto &It : Blocks)
+ It.second.writeOut();
}
- private:
+ public:
const DISubprogram *SP;
unsigned EndLine;
uint32_t Ident;
uint32_t FuncChecksum;
int Version;
- DenseMap<BasicBlock *, GCOVBlock> Blocks;
+ MapVector<BasicBlock *, GCOVBlock> Blocks;
+ GCOVBlock EntryBlock;
GCOVBlock ReturnBlock;
};
}
@@ -549,20 +602,23 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
}
bool GCOVProfiler::runOnModule(
- Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
+ Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
+ function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
this->M = &M;
this->GetTLI = std::move(GetTLI);
Ctx = &M.getContext();
- bool Modified = AddFlushBeforeForkAndExec();
+ NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
+ if (!CUNode || (!Options.EmitNotes && !Options.EmitData))
+ return false;
+
+ bool HasExecOrFork = AddFlushBeforeForkAndExec();
FilterRe = createRegexesFromString(Options.Filter);
ExcludeRe = createRegexesFromString(Options.Exclude);
-
- if (Options.EmitNotes) emitProfileNotes();
- if (Options.EmitData)
- Modified |= emitProfileArcs();
- return Modified;
+ emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI);
+ return true;
}
PreservedAnalyses GCOVProfilerPass::run(Module &M,
@@ -572,9 +628,17 @@ PreservedAnalyses GCOVProfilerPass::run(Module &M,
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- if (!Profiler.runOnModule(M, [&](Function &F) -> TargetLibraryInfo & {
- return FAM.getResult<TargetLibraryAnalysis>(F);
- }))
+ auto GetBFI = [&FAM](Function &F) {
+ return &FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+ auto GetBPI = [&FAM](Function &F) {
+ return &FAM.getResult<BranchProbabilityAnalysis>(F);
+ };
+ auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+
+ if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -611,16 +675,6 @@ static bool isUsingScopeBasedEH(Function &F) {
return isScopedEHPersonality(Personality);
}
-static bool shouldKeepInEntry(BasicBlock::iterator It) {
- if (isa<AllocaInst>(*It)) return true;
- if (isa<DbgInfoIntrinsic>(*It)) return true;
- if (auto *II = dyn_cast<IntrinsicInst>(It)) {
- if (II->getIntrinsicID() == llvm::Intrinsic::localescape) return true;
- }
-
- return false;
-}
-
bool GCOVProfiler::AddFlushBeforeForkAndExec() {
SmallVector<CallInst *, 2> Forks;
SmallVector<CallInst *, 2> Execs;
@@ -690,6 +744,7 @@ bool GCOVProfiler::AddFlushBeforeForkAndExec() {
// dumped
FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy);
Builder.CreateCall(ResetF)->setDebugLoc(Loc);
+ ExecBlocks.insert(Parent);
Parent->splitBasicBlock(NextInst);
Parent->back().setDebugLoc(Loc);
}
@@ -697,10 +752,67 @@ bool GCOVProfiler::AddFlushBeforeForkAndExec() {
return !Forks.empty() || !Execs.empty();
}
-void GCOVProfiler::emitProfileNotes() {
- NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- if (!CU_Nodes) return;
+static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
+ const DenseSet<const BasicBlock *> &ExecBlocks) {
+ if (E.InMST || E.Removed)
+ return nullptr;
+
+ BasicBlock *SrcBB = const_cast<BasicBlock *>(E.SrcBB);
+ BasicBlock *DestBB = const_cast<BasicBlock *>(E.DestBB);
+ // For a fake edge, instrument the real BB.
+ if (SrcBB == nullptr)
+ return DestBB;
+ if (DestBB == nullptr)
+ return SrcBB;
+
+ auto CanInstrument = [](BasicBlock *BB) -> BasicBlock * {
+ // There are basic blocks (such as catchswitch) cannot be instrumented.
+ // If the returned first insertion point is the end of BB, skip this BB.
+ if (BB->getFirstInsertionPt() == BB->end())
+ return nullptr;
+ return BB;
+ };
+
+ // Instrument the SrcBB if it has a single successor,
+ // otherwise, the DestBB if this is not a critical edge.
+ Instruction *TI = SrcBB->getTerminator();
+ if (TI->getNumSuccessors() <= 1 && !ExecBlocks.count(SrcBB))
+ return CanInstrument(SrcBB);
+ if (!E.IsCritical)
+ return CanInstrument(DestBB);
+
+ // Some IndirectBr critical edges cannot be split by the previous
+ // SplitIndirectBrCriticalEdges call. Bail out.
+ const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+ BasicBlock *InstrBB =
+ isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
+ if (!InstrBB)
+ return nullptr;
+
+ MST.addEdge(SrcBB, InstrBB, 0);
+ MST.addEdge(InstrBB, DestBB, 0).InMST = true;
+ E.Removed = true;
+
+ return CanInstrument(InstrBB);
+}
+#ifndef NDEBUG
+static void dumpEdges(CFGMST<Edge, BBInfo> &MST, GCOVFunction &GF) {
+ size_t ID = 0;
+ for (auto &E : make_pointee_range(MST.AllEdges)) {
+ GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock();
+ GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock();
+ dbgs() << " Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number
+ << E.infoString() << "\n";
+ }
+}
+#endif
+
+bool GCOVProfiler::emitProfileNotes(
+ NamedMDNode *CUNode, bool HasExecOrFork,
+ function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
+ function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
+ function_ref<const TargetLibraryInfo &(Function &F)> GetTLI) {
int Version;
{
uint8_t c3 = Options.Version[0];
@@ -710,27 +822,20 @@ void GCOVProfiler::emitProfileNotes() {
: (c3 - '0') * 10 + c1 - '0';
}
- for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ bool EmitGCDA = Options.EmitData;
+ for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
// Each compile unit gets its own .gcno file. This means that whether we run
// this pass over the original .o's as they're produced, or run it after
// LTO, we'll generate the same .gcno files.
- auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
+ auto *CU = cast<DICompileUnit>(CUNode->getOperand(i));
// Skip module skeleton (and module) CUs.
if (CU->getDWOId())
continue;
- std::error_code EC;
- raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
- sys::fs::OF_None);
- if (EC) {
- Ctx->emitError(Twine("failed to open coverage notes file for writing: ") +
- EC.message());
- continue;
- }
-
- std::string EdgeDestinations;
+ std::vector<uint8_t> EdgeDestinations;
+ SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little
: support::endianness::big;
@@ -744,36 +849,82 @@ void GCOVProfiler::emitProfileNotes() {
// TODO: Functions using scope-based EH are currently not supported.
if (isUsingScopeBasedEH(F)) continue;
- // gcov expects every function to start with an entry block that has a
- // single successor, so split the entry block to make sure of that.
- BasicBlock &EntryBlock = F.getEntryBlock();
- BasicBlock::iterator It = EntryBlock.begin();
- while (shouldKeepInEntry(It))
- ++It;
- EntryBlock.splitBasicBlock(It);
+ // Add the function line number to the lines of the entry block
+ // to have a counter for the function definition.
+ uint32_t Line = SP->getLine();
+ auto Filename = getFilename(SP);
+
+ BranchProbabilityInfo *BPI = GetBPI(F);
+ BlockFrequencyInfo *BFI = GetBFI(F);
+ // Split indirectbr critical edges here before computing the MST rather
+ // than later in getInstrBB() to avoid invalidating it.
+ SplitIndirectBrCriticalEdges(F, BPI, BFI);
+
+ CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
+
+ // getInstrBB can split basic blocks and push elements to AllEdges.
+ for (size_t I : llvm::seq<size_t>(0, MST.AllEdges.size())) {
+ auto &E = *MST.AllEdges[I];
+ // For now, disable spanning tree optimization when fork or exec* is
+ // used.
+ if (HasExecOrFork)
+ E.InMST = false;
+ E.Place = getInstrBB(MST, E, ExecBlocks);
+ }
+ // Basic blocks in F are finalized at this point.
+ BasicBlock &EntryBlock = F.getEntryBlock();
Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine,
FunctionIdent++, Version));
GCOVFunction &Func = *Funcs.back();
- // Add the function line number to the lines of the entry block
- // to have a counter for the function definition.
- uint32_t Line = SP->getLine();
- auto Filename = getFilename(SP);
+ // Some non-tree edges are IndirectBr which cannot be split. Ignore them
+ // as well.
+ llvm::erase_if(MST.AllEdges, [](std::unique_ptr<Edge> &E) {
+ return E->Removed || (!E->InMST && !E->Place);
+ });
+ const size_t Measured =
+ std::stable_partition(
+ MST.AllEdges.begin(), MST.AllEdges.end(),
+ [](std::unique_ptr<Edge> &E) { return E->Place; }) -
+ MST.AllEdges.begin();
+ for (size_t I : llvm::seq<size_t>(0, Measured)) {
+ Edge &E = *MST.AllEdges[I];
+ GCOVBlock &Src =
+ E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
+ GCOVBlock &Dst =
+ E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
+ E.SrcNumber = Src.Number;
+ E.DstNumber = Dst.Number;
+ }
+ std::stable_sort(
+ MST.AllEdges.begin(), MST.AllEdges.begin() + Measured,
+ [](const std::unique_ptr<Edge> &L, const std::unique_ptr<Edge> &R) {
+ return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
+ : L->DstNumber < R->DstNumber;
+ });
+
+ for (const Edge &E : make_pointee_range(MST.AllEdges)) {
+ GCOVBlock &Src =
+ E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
+ GCOVBlock &Dst =
+ E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
+ Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE));
+ }
// Artificial functions such as global initializers
if (!SP->isArtificial())
Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
- for (auto &BB : F) {
- GCOVBlock &Block = Func.getBlock(&BB);
- Instruction *TI = BB.getTerminator();
- if (int successors = TI->getNumSuccessors()) {
- for (int i = 0; i != successors; ++i) {
- Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
- }
- } else if (isa<ReturnInst>(TI)) {
- Block.addEdge(Func.getReturnBlock());
+ LLVM_DEBUG(dumpEdges(MST, Func));
+
+ for (auto &GB : Func.Blocks) {
+ const BasicBlock &BB = *GB.first;
+ auto &Block = GB.second;
+ for (auto Succ : Block.OutEdges) {
+ uint32_t Idx = Succ.first->Number;
+ do EdgeDestinations.push_back(Idx & 255);
+ while ((Idx >>= 8) > 0);
}
for (auto &I : BB) {
@@ -799,149 +950,110 @@ void GCOVProfiler::emitProfileNotes() {
}
Line = 0;
}
- EdgeDestinations += Func.getEdgeDestinations();
+ if (EmitGCDA) {
+ DISubprogram *SP = F.getSubprogram();
+ ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured);
+ GlobalVariable *Counters = new GlobalVariable(
+ *M, CounterTy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(CounterTy), "__llvm_gcov_ctr");
+ CountersBySP.emplace_back(Counters, SP);
+
+ for (size_t I : llvm::seq<size_t>(0, Measured)) {
+ const Edge &E = *MST.AllEdges[I];
+ IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
+ Value *V = Builder.CreateConstInBoundsGEP2_64(
+ Counters->getValueType(), Counters, 0, I);
+ if (Options.Atomic) {
+ Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1),
+ AtomicOrdering::Monotonic);
+ } else {
+ Value *Count =
+ Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr");
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+ Builder.CreateStore(Count, V);
+ }
+ }
+ }
}
char Tmp[4];
- os = &out;
- auto Stamp = static_cast<uint32_t>(hash_value(EdgeDestinations));
+ JamCRC JC;
+ JC.update(EdgeDestinations);
+ uint32_t Stamp = JC.getCRC();
FileChecksums.push_back(Stamp);
- if (Endian == support::endianness::big) {
- out.write("gcno", 4);
- out.write(Options.Version, 4);
- } else {
- out.write("oncg", 4);
- std::reverse_copy(Options.Version, Options.Version + 4, Tmp);
- out.write(Tmp, 4);
- }
- write(Stamp);
- if (Version >= 90)
- writeString(""); // unuseful current_working_directory
- if (Version >= 80)
- write(0); // unuseful has_unexecuted_blocks
-
- for (auto &Func : Funcs)
- Func->writeOut(Stamp);
-
- write(0);
- write(0);
- out.close();
- }
-}
-
-bool GCOVProfiler::emitProfileArcs() {
- NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- if (!CU_Nodes) return false;
- bool Result = false;
- for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
- SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
- for (auto &F : M->functions()) {
- DISubprogram *SP = F.getSubprogram();
- unsigned EndLine;
- if (!SP) continue;
- if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F))
+ if (Options.EmitNotes) {
+ std::error_code EC;
+ raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
+ sys::fs::OF_None);
+ if (EC) {
+ Ctx->emitError(
+ Twine("failed to open coverage notes file for writing: ") +
+ EC.message());
continue;
- // TODO: Functions using scope-based EH are currently not supported.
- if (isUsingScopeBasedEH(F)) continue;
-
- DenseMap<std::pair<BasicBlock *, BasicBlock *>, unsigned> EdgeToCounter;
- unsigned Edges = 0;
- for (auto &BB : F) {
- Instruction *TI = BB.getTerminator();
- if (isa<ReturnInst>(TI)) {
- EdgeToCounter[{&BB, nullptr}] = Edges++;
- } else {
- for (BasicBlock *Succ : successors(TI)) {
- EdgeToCounter[{&BB, Succ}] = Edges++;
- }
- }
}
+ os = &out;
+ if (Endian == support::endianness::big) {
+ out.write("gcno", 4);
+ out.write(Options.Version, 4);
+ } else {
+ out.write("oncg", 4);
+ std::reverse_copy(Options.Version, Options.Version + 4, Tmp);
+ out.write(Tmp, 4);
+ }
+ write(Stamp);
+ if (Version >= 90)
+ writeString(""); // unuseful current_working_directory
+ if (Version >= 80)
+ write(0); // unuseful has_unexecuted_blocks
- ArrayType *CounterTy =
- ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
- GlobalVariable *Counters =
- new GlobalVariable(*M, CounterTy, false,
- GlobalValue::InternalLinkage,
- Constant::getNullValue(CounterTy),
- "__llvm_gcov_ctr");
- CountersBySP.push_back(std::make_pair(Counters, SP));
-
- // If a BB has several predecessors, use a PHINode to select
- // the correct counter.
- for (auto &BB : F) {
- const unsigned EdgeCount =
- std::distance(pred_begin(&BB), pred_end(&BB));
- if (EdgeCount) {
- // The phi node must be at the begin of the BB.
- IRBuilder<> BuilderForPhi(&*BB.begin());
- Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
- PHINode *Phi = BuilderForPhi.CreatePHI(Int64PtrTy, EdgeCount);
- for (BasicBlock *Pred : predecessors(&BB)) {
- auto It = EdgeToCounter.find({Pred, &BB});
- assert(It != EdgeToCounter.end());
- const unsigned Edge = It->second;
- Value *EdgeCounter = BuilderForPhi.CreateConstInBoundsGEP2_64(
- Counters->getValueType(), Counters, 0, Edge);
- Phi->addIncoming(EdgeCounter, Pred);
- }
+ for (auto &Func : Funcs)
+ Func->writeOut(Stamp);
- // Skip phis, landingpads.
- IRBuilder<> Builder(&*BB.getFirstInsertionPt());
- Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Phi);
- Count = Builder.CreateAdd(Count, Builder.getInt64(1));
- Builder.CreateStore(Count, Phi);
-
- Instruction *TI = BB.getTerminator();
- if (isa<ReturnInst>(TI)) {
- auto It = EdgeToCounter.find({&BB, nullptr});
- assert(It != EdgeToCounter.end());
- const unsigned Edge = It->second;
- Value *Counter = Builder.CreateConstInBoundsGEP2_64(
- Counters->getValueType(), Counters, 0, Edge);
- Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter);
- Count = Builder.CreateAdd(Count, Builder.getInt64(1));
- Builder.CreateStore(Count, Counter);
- }
- }
- }
+ write(0);
+ write(0);
+ out.close();
}
- Function *WriteoutF = insertCounterWriteout(CountersBySP);
- Function *ResetF = insertReset(CountersBySP);
- Function *FlushF = insertFlush(ResetF);
-
- // Create a small bit of code that registers the "__llvm_gcov_writeout" to
- // be executed at exit and the "__llvm_gcov_flush" function to be executed
- // when "__gcov_flush" is called.
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__llvm_gcov_init", M);
- F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- F->setLinkage(GlobalValue::InternalLinkage);
- F->addFnAttr(Attribute::NoInline);
- if (Options.NoRedZone)
- F->addFnAttr(Attribute::NoRedZone);
-
- BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
- IRBuilder<> Builder(BB);
-
- FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Type *Params[] = {PointerType::get(FTy, 0), PointerType::get(FTy, 0),
- PointerType::get(FTy, 0)};
- FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
-
- // Initialize the environment and register the local writeout, flush and
- // reset functions.
- FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
- Builder.CreateCall(GCOVInit, {WriteoutF, FlushF, ResetF});
- Builder.CreateRetVoid();
-
- appendToGlobalCtors(*M, F, 0);
- Result = true;
+ if (EmitGCDA) {
+ emitGlobalConstructor(CountersBySP);
+ EmitGCDA = false;
+ }
}
+ return true;
+}
+
+void GCOVProfiler::emitGlobalConstructor(
+ SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
+ Function *WriteoutF = insertCounterWriteout(CountersBySP);
+ Function *ResetF = insertReset(CountersBySP);
+
+ // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+ // be executed at exit and the "__llvm_gcov_flush" function to be executed
+ // when "__gcov_flush" is called.
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_init", M);
+ F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ F->addFnAttr(Attribute::NoRedZone);
- return Result;
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ IRBuilder<> Builder(BB);
+
+ FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ auto *PFTy = PointerType::get(FTy, 0);
+ FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
+
+ // Initialize the environment and register the local writeout, flush and
+ // reset functions.
+ FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+ Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
+ Builder.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
}
FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
@@ -1028,15 +1140,19 @@ Function *GCOVProfiler::insertCounterWriteout(
// Collect the relevant data into a large constant data structure that we can
// walk to write out everything.
StructType *StartFileCallArgsTy = StructType::create(
- {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()});
+ {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
+ "start_file_args_ty");
StructType *EmitFunctionCallArgsTy = StructType::create(
- {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()});
+ {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
+ "emit_function_args_ty");
StructType *EmitArcsCallArgsTy = StructType::create(
- {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()});
+ {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()},
+ "emit_arcs_args_ty");
StructType *FileInfoTy =
StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(),
EmitFunctionCallArgsTy->getPointerTo(),
- EmitArcsCallArgsTy->getPointerTo()});
+ EmitArcsCallArgsTy->getPointerTo()},
+ "file_info");
Constant *Zero32 = Builder.getInt32(0);
// Build an explicit array of two zeros for use in ConstantExpr GEP building.
@@ -1146,41 +1262,46 @@ Function *GCOVProfiler::insertCounterWriteout(
// The index into the files structure is our loop induction variable.
Builder.SetInsertPoint(FileLoopHeader);
- PHINode *IV =
- Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
+ PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
+ "file_idx");
IV->addIncoming(Builder.getInt32(0), BB);
auto *FileInfoPtr = Builder.CreateInBoundsGEP(
FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
auto *StartFileCallArgsPtr =
- Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0);
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args");
auto *StartFileCall = Builder.CreateCall(
StartFile,
{Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
Builder.CreateStructGEP(StartFileCallArgsTy,
- StartFileCallArgsPtr, 0)),
+ StartFileCallArgsPtr, 0),
+ "filename"),
Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
Builder.CreateStructGEP(StartFileCallArgsTy,
- StartFileCallArgsPtr, 1)),
+ StartFileCallArgsPtr, 1),
+ "version"),
Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
Builder.CreateStructGEP(StartFileCallArgsTy,
- StartFileCallArgsPtr, 2))});
+ StartFileCallArgsPtr, 2),
+ "stamp")});
if (auto AK = TLI->getExtAttrForI32Param(false))
StartFileCall->addParamAttr(2, AK);
- auto *NumCounters =
- Builder.CreateLoad(FileInfoTy->getElementType(1),
- Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1));
+ auto *NumCounters = Builder.CreateLoad(
+ FileInfoTy->getElementType(1),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs");
auto *EmitFunctionCallArgsArray =
Builder.CreateLoad(FileInfoTy->getElementType(2),
- Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2));
- auto *EmitArcsCallArgsArray =
- Builder.CreateLoad(FileInfoTy->getElementType(3),
- Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3));
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2),
+ "emit_function_args");
+ auto *EmitArcsCallArgsArray = Builder.CreateLoad(
+ FileInfoTy->getElementType(3),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args");
auto *EnterCounterLoopCond =
Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
Builder.SetInsertPoint(CounterLoopHeader);
- auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
+ auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
+ "ctr_idx");
JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
@@ -1188,14 +1309,16 @@ Function *GCOVProfiler::insertCounterWriteout(
EmitFunction,
{Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
Builder.CreateStructGEP(EmitFunctionCallArgsTy,
- EmitFunctionCallArgsPtr, 0)),
+ EmitFunctionCallArgsPtr, 0),
+ "ident"),
Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
Builder.CreateStructGEP(EmitFunctionCallArgsTy,
- EmitFunctionCallArgsPtr, 1)),
+ EmitFunctionCallArgsPtr, 1),
+ "func_checkssum"),
Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
Builder.CreateStructGEP(EmitFunctionCallArgsTy,
- EmitFunctionCallArgsPtr,
- 2))});
+ EmitFunctionCallArgsPtr, 2),
+ "cfg_checksum")});
if (auto AK = TLI->getExtAttrForI32Param(false)) {
EmitFunctionCall->addParamAttr(0, AK);
EmitFunctionCall->addParamAttr(1, AK);
@@ -1207,10 +1330,12 @@ Function *GCOVProfiler::insertCounterWriteout(
EmitArcs,
{Builder.CreateLoad(
EmitArcsCallArgsTy->getElementType(0),
- Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0)),
- Builder.CreateLoad(EmitArcsCallArgsTy->getElementType(1),
- Builder.CreateStructGEP(EmitArcsCallArgsTy,
- EmitArcsCallArgsPtr, 1))});
+ Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0),
+ "num_counters"),
+ Builder.CreateLoad(
+ EmitArcsCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1),
+ "counters")});
if (auto AK = TLI->getExtAttrForI32Param(false))
EmitArcsCall->addParamAttr(0, AK);
auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
@@ -1221,7 +1346,7 @@ Function *GCOVProfiler::insertCounterWriteout(
Builder.SetInsertPoint(FileLoopLatch);
Builder.CreateCall(SummaryInfo, {});
Builder.CreateCall(EndFile, {});
- auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1));
+ auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx");
auto *FileLoopCond =
Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
@@ -1266,36 +1391,3 @@ Function *GCOVProfiler::insertReset(
return ResetF;
}
-
-Function *GCOVProfiler::insertFlush(Function *ResetF) {
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *FlushF = M->getFunction("__llvm_gcov_flush");
- if (!FlushF)
- FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__llvm_gcov_flush", M);
- FlushF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- FlushF->addFnAttr(Attribute::NoInline);
- if (Options.NoRedZone)
- FlushF->addFnAttr(Attribute::NoRedZone);
-
- BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
-
- // Write out the current counters.
- Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
- assert(WriteoutF && "Need to create the writeout function first!");
-
- IRBuilder<> Builder(Entry);
- Builder.CreateCall(WriteoutF, {});
- Builder.CreateCall(ResetF, {});
-
- Type *RetTy = FlushF->getReturnType();
- if (RetTy->isVoidTy())
- Builder.CreateRetVoid();
- else if (RetTy->isIntegerTy())
- // Used if __llvm_gcov_flush was implicitly declared.
- Builder.CreateRet(ConstantInt::get(RetTy, 0));
- else
- report_fatal_error("invalid return type for __llvm_gcov_flush");
-
- return FlushF;
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 2e71d613714a..fedd9bfc977e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -55,13 +55,12 @@ using namespace llvm;
#define DEBUG_TYPE "hwasan"
-static const char *const kHwasanModuleCtorName = "hwasan.module_ctor";
-static const char *const kHwasanNoteName = "hwasan.note";
-static const char *const kHwasanInitName = "__hwasan_init";
-static const char *const kHwasanPersonalityThunkName =
- "__hwasan_personality_thunk";
+const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
+const char kHwasanNoteName[] = "hwasan.note";
+const char kHwasanInitName[] = "__hwasan_init";
+const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
-static const char *const kHwasanShadowMemoryDynamicAddress =
+const char kHwasanShadowMemoryDynamicAddress[] =
"__hwasan_shadow_memory_dynamic_address";
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
@@ -203,14 +202,16 @@ public:
bool sanitizeFunction(Function &F);
void initializeModule();
+ void createHwasanCtorComdat();
void initializeCallbacks(Module &M);
+ Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
+
Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
- Value *getDynamicShadowNonTls(IRBuilder<> &IRB);
+ Value *getShadowNonTls(IRBuilder<> &IRB);
void untagPointerOperand(Instruction *I, Value *Addr);
- Value *shadowBase();
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
@@ -280,9 +281,13 @@ private:
bool CompileKernel;
bool Recover;
+ bool OutlinedChecks;
bool UseShortGranules;
bool InstrumentLandingPads;
+ bool HasMatchAllTag = false;
+ uint8_t MatchAllTag = 0;
+
Function *HwasanCtorFunction;
FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
@@ -293,7 +298,7 @@ private:
Constant *ShadowGlobal;
- Value *LocalDynamicShadow = nullptr;
+ Value *ShadowBase = nullptr;
Value *StackBaseTag = nullptr;
GlobalValue *ThreadPtrGlobal = nullptr;
};
@@ -365,6 +370,106 @@ PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
return PreservedAnalyses::all();
}
+void HWAddressSanitizer::createHwasanCtorComdat() {
+ std::tie(HwasanCtorFunction, std::ignore) =
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kHwasanModuleCtorName, kHwasanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) {
+ Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
+ Ctor->setComdat(CtorComdat);
+ appendToGlobalCtors(M, Ctor, 0, Ctor);
+ });
+
+ // Create a note that contains pointers to the list of global
+ // descriptors. Adding a note to the output file will cause the linker to
+ // create a PT_NOTE program header pointing to the note that we can use to
+ // find the descriptor list starting from the program headers. A function
+ // provided by the runtime initializes the shadow memory for the globals by
+ // accessing the descriptor list via the note. The dynamic loader needs to
+ // call this function whenever a library is loaded.
+ //
+ // The reason why we use a note for this instead of a more conventional
+ // approach of having a global constructor pass a descriptor list pointer to
+ // the runtime is because of an order of initialization problem. With
+ // constructors we can encounter the following problematic scenario:
+ //
+ // 1) library A depends on library B and also interposes one of B's symbols
+ // 2) B's constructors are called before A's (as required for correctness)
+ // 3) during construction, B accesses one of its "own" globals (actually
+ // interposed by A) and triggers a HWASAN failure due to the initialization
+ // for A not having happened yet
+ //
+ // Even without interposition it is possible to run into similar situations in
+ // cases where two libraries mutually depend on each other.
+ //
+ // We only need one note per binary, so put everything for the note in a
+ // comdat. This needs to be a comdat with an .init_array section to prevent
+ // newer versions of lld from discarding the note.
+ //
+ // Create the note even if we aren't instrumenting globals. This ensures that
+ // binaries linked from object files with both instrumented and
+ // non-instrumented globals will end up with a note, even if a comdat from an
+ // object file with non-instrumented globals is selected. The note is harmless
+ // if the runtime doesn't support it, since it will just be ignored.
+ Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
+
+ Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
+ auto Start =
+ new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
+ nullptr, "__start_hwasan_globals");
+ Start->setVisibility(GlobalValue::HiddenVisibility);
+ Start->setDSOLocal(true);
+ auto Stop =
+ new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
+ nullptr, "__stop_hwasan_globals");
+ Stop->setVisibility(GlobalValue::HiddenVisibility);
+ Stop->setDSOLocal(true);
+
+ // Null-terminated so actually 8 bytes, which are required in order to align
+ // the note properly.
+ auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
+
+ auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
+ Int32Ty, Int32Ty);
+ auto *Note =
+ new GlobalVariable(M, NoteTy, /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
+ Note->setSection(".note.hwasan.globals");
+ Note->setComdat(NoteComdat);
+ Note->setAlignment(Align(4));
+ Note->setDSOLocal(true);
+
+ // The pointers in the note need to be relative so that the note ends up being
+ // placed in rodata, which is the standard location for notes.
+ auto CreateRelPtr = [&](Constant *Ptr) {
+ return ConstantExpr::getTrunc(
+ ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
+ ConstantExpr::getPtrToInt(Note, Int64Ty)),
+ Int32Ty);
+ };
+ Note->setInitializer(ConstantStruct::getAnon(
+ {ConstantInt::get(Int32Ty, 8), // n_namesz
+ ConstantInt::get(Int32Ty, 8), // n_descsz
+ ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
+ Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
+ appendToCompilerUsed(M, Note);
+
+ // Create a zero-length global in hwasan_globals so that the linker will
+ // always create start and stop symbols.
+ auto Dummy = new GlobalVariable(
+ M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
+ Dummy->setSection("hwasan_globals");
+ Dummy->setComdat(NoteComdat);
+ Dummy->setMetadata(LLVMContext::MD_associated,
+ MDNode::get(*C, ValueAsMetadata::get(Note)));
+ appendToCompilerUsed(M, Dummy);
+}
+
/// Module-level initialization.
///
/// inserts a call to __hwasan_init to the module's constructor list.
@@ -393,6 +498,19 @@ void HWAddressSanitizer::initializeModule() {
UseShortGranules =
ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
+ OutlinedChecks =
+ TargetTriple.isAArch64() && TargetTriple.isOSBinFormatELF() &&
+ (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
+
+ if (ClMatchAllTag.getNumOccurrences()) {
+ if (ClMatchAllTag != -1) {
+ HasMatchAllTag = true;
+ MatchAllTag = ClMatchAllTag & 0xFF;
+ }
+ } else if (CompileKernel) {
+ HasMatchAllTag = true;
+ MatchAllTag = 0xFF;
+ }
// If we don't have personality function support, fall back to landing pads.
InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
@@ -400,19 +518,7 @@ void HWAddressSanitizer::initializeModule() {
: !NewRuntime;
if (!CompileKernel) {
- std::tie(HwasanCtorFunction, std::ignore) =
- getOrCreateSanitizerCtorAndInitFunctions(
- M, kHwasanModuleCtorName, kHwasanInitName,
- /*InitArgTypes=*/{},
- /*InitArgs=*/{},
- // This callback is invoked when the functions are created the first
- // time. Hook them into the global ctors list in that case:
- [&](Function *Ctor, FunctionCallee) {
- Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
- Ctor->setComdat(CtorComdat);
- appendToGlobalCtors(M, Ctor, 0, Ctor);
- });
-
+ createHwasanCtorComdat();
bool InstrumentGlobals =
ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
if (InstrumentGlobals)
@@ -483,20 +589,27 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
}
-Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
+Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
// An empty inline asm with input reg == output reg.
// An opaque no-op cast, basically.
- InlineAsm *Asm = InlineAsm::get(
- FunctionType::get(Int8PtrTy, {ShadowGlobal->getType()}, false),
- StringRef(""), StringRef("=r,0"),
- /*hasSideEffects=*/false);
- return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
+ // This prevents code bloat as a result of rematerializing trivial definitions
+ // such as constants or global addresses at every load and store.
+ InlineAsm *Asm =
+ InlineAsm::get(FunctionType::get(Int8PtrTy, {Val->getType()}, false),
+ StringRef(""), StringRef("=r,0"),
+ /*hasSideEffects=*/false);
+ return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
+}
+
+Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
+ return getOpaqueNoopCast(IRB, ShadowGlobal);
}
-Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
- // Generate code only when dynamic addressing is needed.
+Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
if (Mapping.Offset != kDynamicShadowSentinel)
- return nullptr;
+ return getOpaqueNoopCast(
+ IRB, ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
if (Mapping.InGlobal) {
return getDynamicShadowIfunc(IRB);
@@ -532,7 +645,7 @@ void HWAddressSanitizer::getInterestingMemoryOperands(
return;
// Do not instrument the load fetching the dynamic shadow address.
- if (LocalDynamicShadow == I)
+ if (ShadowBase == I)
return;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
@@ -596,37 +709,35 @@ void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
}
-Value *HWAddressSanitizer::shadowBase() {
- if (LocalDynamicShadow)
- return LocalDynamicShadow;
- return ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, Mapping.Offset),
- Int8PtrTy);
-}
-
Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
// Mem >> Scale
Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
if (Mapping.Offset == 0)
return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
// (Mem >> Scale) + Offset
- return IRB.CreateGEP(Int8Ty, shadowBase(), Shadow);
+ return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
}
void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore) {
- const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
+ const int64_t AccessInfo =
+ (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
+ (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
+ (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
+ (Recover << HWASanAccessInfo::RecoverShift) +
+ (IsWrite << HWASanAccessInfo::IsWriteShift) +
+ (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
IRBuilder<> IRB(InsertBefore);
- if (!ClInlineAllChecks && TargetTriple.isAArch64() &&
- TargetTriple.isOSBinFormatELF() && !Recover) {
+ if (OutlinedChecks) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
IRB.CreateCall(Intrinsic::getDeclaration(
M, UseShortGranules
? Intrinsic::hwasan_check_memaccess_shortgranules
: Intrinsic::hwasan_check_memaccess),
- {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
+ {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
return;
}
@@ -638,11 +749,9 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
- int matchAllTag = ClMatchAllTag.getNumOccurrences() > 0 ?
- ClMatchAllTag : (CompileKernel ? 0xFF : -1);
- if (matchAllTag != -1) {
- Value *TagNotIgnored = IRB.CreateICmpNE(PtrTag,
- ConstantInt::get(PtrTag->getType(), matchAllTag));
+ if (HasMatchAllTag) {
+ Value *TagNotIgnored = IRB.CreateICmpNE(
+ PtrTag, ConstantInt::get(PtrTag->getType(), MatchAllTag));
TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
}
@@ -664,7 +773,8 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
MDBuilder(*C).createBranchWeights(1, 100000),
- nullptr, nullptr, CheckFailTerm->getParent());
+ (DomTreeUpdater *)nullptr, nullptr,
+ CheckFailTerm->getParent());
IRB.SetInsertPoint(CheckTerm);
Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
@@ -673,7 +783,8 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
MDBuilder(*C).createBranchWeights(1, 100000),
- nullptr, nullptr, CheckFailTerm->getParent());
+ (DomTreeUpdater *)nullptr, nullptr,
+ CheckFailTerm->getParent());
IRB.SetInsertPoint(CheckFailTerm);
InlineAsm *Asm;
@@ -682,7 +793,9 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
// The signal handler will find the data address in rdi.
Asm = InlineAsm::get(
FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
- "int3\nnopl " + itostr(0x40 + AccessInfo) + "(%rax)",
+ "int3\nnopl " +
+ itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
+ "(%rax)",
"{rdi}",
/*hasSideEffects=*/true);
break;
@@ -691,7 +804,8 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
// The signal handler will find the data address in x0.
Asm = InlineAsm::get(
FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
- "brk #" + itostr(0x900 + AccessInfo),
+ "brk #" +
+ itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
"{x0}",
/*hasSideEffects=*/true);
break;
@@ -914,12 +1028,12 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
if (!Mapping.InTls) {
- LocalDynamicShadow = getDynamicShadowNonTls(IRB);
+ ShadowBase = getShadowNonTls(IRB);
return;
}
if (!WithFrameRecord && TargetTriple.isAndroid()) {
- LocalDynamicShadow = getDynamicShadowIfunc(IRB);
+ ShadowBase = getDynamicShadowIfunc(IRB);
return;
}
@@ -980,12 +1094,12 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
// Get shadow base address by aligning RecordPtr up.
// Note: this is not correct if the pointer is already aligned.
// Runtime library will make sure this never happens.
- LocalDynamicShadow = IRB.CreateAdd(
+ ShadowBase = IRB.CreateAdd(
IRB.CreateOr(
ThreadLongMaybeUntagged,
ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
- LocalDynamicShadow = IRB.CreateIntToPtr(LocalDynamicShadow, Int8PtrTy);
+ ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
}
Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
@@ -1137,7 +1251,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
IntrinToInstrument.empty())
return Changed;
- assert(!LocalDynamicShadow);
+ assert(!ShadowBase);
Instruction *InsertPt = &*F.getEntryBlock().begin();
IRBuilder<> EntryIRB(InsertPt);
@@ -1217,7 +1331,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
}
- LocalDynamicShadow = nullptr;
+ ShadowBase = nullptr;
StackBaseTag = nullptr;
return true;
@@ -1300,85 +1414,6 @@ void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
}
void HWAddressSanitizer::instrumentGlobals() {
- // Start by creating a note that contains pointers to the list of global
- // descriptors. Adding a note to the output file will cause the linker to
- // create a PT_NOTE program header pointing to the note that we can use to
- // find the descriptor list starting from the program headers. A function
- // provided by the runtime initializes the shadow memory for the globals by
- // accessing the descriptor list via the note. The dynamic loader needs to
- // call this function whenever a library is loaded.
- //
- // The reason why we use a note for this instead of a more conventional
- // approach of having a global constructor pass a descriptor list pointer to
- // the runtime is because of an order of initialization problem. With
- // constructors we can encounter the following problematic scenario:
- //
- // 1) library A depends on library B and also interposes one of B's symbols
- // 2) B's constructors are called before A's (as required for correctness)
- // 3) during construction, B accesses one of its "own" globals (actually
- // interposed by A) and triggers a HWASAN failure due to the initialization
- // for A not having happened yet
- //
- // Even without interposition it is possible to run into similar situations in
- // cases where two libraries mutually depend on each other.
- //
- // We only need one note per binary, so put everything for the note in a
- // comdat. This need to be a comdat with an .init_array section to prevent
- // newer versions of lld from discarding the note.
- Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
-
- Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
- auto Start =
- new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
- nullptr, "__start_hwasan_globals");
- Start->setVisibility(GlobalValue::HiddenVisibility);
- Start->setDSOLocal(true);
- auto Stop =
- new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
- nullptr, "__stop_hwasan_globals");
- Stop->setVisibility(GlobalValue::HiddenVisibility);
- Stop->setDSOLocal(true);
-
- // Null-terminated so actually 8 bytes, which are required in order to align
- // the note properly.
- auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
-
- auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
- Int32Ty, Int32Ty);
- auto *Note =
- new GlobalVariable(M, NoteTy, /*isConstantGlobal=*/true,
- GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
- Note->setSection(".note.hwasan.globals");
- Note->setComdat(NoteComdat);
- Note->setAlignment(Align(4));
- Note->setDSOLocal(true);
-
- // The pointers in the note need to be relative so that the note ends up being
- // placed in rodata, which is the standard location for notes.
- auto CreateRelPtr = [&](Constant *Ptr) {
- return ConstantExpr::getTrunc(
- ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
- ConstantExpr::getPtrToInt(Note, Int64Ty)),
- Int32Ty);
- };
- Note->setInitializer(ConstantStruct::getAnon(
- {ConstantInt::get(Int32Ty, 8), // n_namesz
- ConstantInt::get(Int32Ty, 8), // n_descsz
- ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
- Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
- appendToCompilerUsed(M, Note);
-
- // Create a zero-length global in hwasan_globals so that the linker will
- // always create start and stop symbols.
- auto Dummy = new GlobalVariable(
- M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
- Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
- Dummy->setSection("hwasan_globals");
- Dummy->setComdat(NoteComdat);
- Dummy->setMetadata(LLVMContext::MD_associated,
- MDNode::get(*C, ValueAsMetadata::get(Note)));
- appendToCompilerUsed(M, Dummy);
-
std::vector<GlobalVariable *> Globals;
for (GlobalVariable &GV : M.globals()) {
if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index bcd4e2e8e33c..5b9557a9b328 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -263,8 +263,15 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
break;
}
+ // Don't promote if the symbol is not defined in the module. This avoids
+ // creating a reference to a symbol that doesn't exist in the module
+ // This can happen when we compile with a sample profile collected from
+ // one binary but used for another, which may have profiled targets that
+ // aren't used in the new binary. We might have a declaration initially in
+ // the case where the symbol is globally dead in the binary and removed by
+ // ThinLTO.
Function *TargetFunction = Symtab->getFunction(Target);
- if (TargetFunction == nullptr) {
+ if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 7b03bbfcdfe4..9efc7d1ac500 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -57,21 +57,6 @@ using namespace llvm;
#define DEBUG_TYPE "instrprof"
-// The start and end values of precise value profile range for memory
-// intrinsic sizes
-cl::opt<std::string> MemOPSizeRange(
- "memop-size-range",
- cl::desc("Set the range of size in memory intrinsic calls to be profiled "
- "precisely, in a format of <start_val>:<end_val>"),
- cl::init(""));
-
-// The value that considered to be large value in memory intrinsic.
-cl::opt<unsigned> MemOPSizeLarge(
- "memop-size-large",
- cl::desc("Set large value thresthold in memory intrinsic size profiling. "
- "Value of 0 disables the large value profiling."),
- cl::init(8192));
-
namespace {
cl::opt<bool> DoHashBasedCounterSplit(
@@ -150,6 +135,10 @@ cl::opt<bool> IterativeCounterPromotion(
cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
cl::desc("Allow counter promotion across the whole loop nest."));
+cl::opt<bool> SkipRetExitBlock(
+ cl::ZeroOrMore, "skip-ret-exit-block", cl::init(true),
+ cl::desc("Suppress counter promotion if exit blocks contain ret."));
+
class InstrProfilingLegacyPass : public ModulePass {
InstrProfiling InstrProf;
@@ -272,6 +261,18 @@ public:
// Skip 'infinite' loops:
if (ExitBlocks.size() == 0)
return false;
+
+ // Skip if any of the ExitBlocks contains a ret instruction.
+ // This is to prevent dumping of incomplete profile -- if the
+ // the loop is a long running loop and dump is called in the middle
+ // of the loop, the result profile is incomplete.
+ // FIXME: add other heuristics to detect long running loops.
+ if (SkipRetExitBlock) {
+ for (auto BB : ExitBlocks)
+ if (isa<ReturnInst>(BB->getTerminator()))
+ return false;
+ }
+
unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
if (MaxProm == 0)
return false;
@@ -395,6 +396,15 @@ private:
BlockFrequencyInfo *BFI;
};
+enum class ValueProfilingCallType {
+ // Individual values are tracked. Currently used for indiret call target
+ // profiling.
+ Default,
+
+ // MemOp: the memop size value profiling.
+ MemOp
+};
+
} // end anonymous namespace
PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
@@ -529,8 +539,6 @@ bool InstrProfiling::run(
NamesSize = 0;
ProfileDataMap.clear();
UsedVars.clear();
- getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
- MemOPSizeRangeLast);
TT = Triple(M.getTargetTriple());
// Emit the runtime hook even if no counters are present.
@@ -579,9 +587,9 @@ bool InstrProfiling::run(
return true;
}
-static FunctionCallee
-getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
- bool IsRange = false) {
+static FunctionCallee getOrInsertValueProfilingCall(
+ Module &M, const TargetLibraryInfo &TLI,
+ ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
LLVMContext &Ctx = M.getContext();
auto *ReturnTy = Type::getVoidTy(M.getContext());
@@ -589,27 +597,19 @@ getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
if (auto AK = TLI.getExtAttrForI32Param(false))
AL = AL.addParamAttribute(M.getContext(), 2, AK);
- if (!IsRange) {
- Type *ParamTypes[] = {
-#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
-#include "llvm/ProfileData/InstrProfData.inc"
- };
- auto *ValueProfilingCallTy =
- FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
- return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
- ValueProfilingCallTy, AL);
- } else {
- Type *RangeParamTypes[] = {
-#define VALUE_RANGE_PROF 1
+ assert((CallType == ValueProfilingCallType::Default ||
+ CallType == ValueProfilingCallType::MemOp) &&
+ "Must be Default or MemOp");
+ Type *ParamTypes[] = {
#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
#include "llvm/ProfileData/InstrProfData.inc"
-#undef VALUE_RANGE_PROF
- };
- auto *ValueRangeProfilingCallTy =
- FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
- return M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
- ValueRangeProfilingCallTy, AL);
- }
+ };
+ auto *ValueProfilingCallTy =
+ FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
+ StringRef FuncName = CallType == ValueProfilingCallType::Default
+ ? getInstrProfValueProfFuncName()
+ : getInstrProfValueProfMemOpFuncName();
+ return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
}
void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
@@ -638,8 +638,8 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
Index += It->second.NumValueSites[Kind];
IRBuilder<> Builder(Ind);
- bool IsRange = (Ind->getValueKind()->getZExtValue() ==
- llvm::InstrProfValueKind::IPVK_MemOPSize);
+ bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
+ llvm::InstrProfValueKind::IPVK_MemOPSize);
CallInst *Call = nullptr;
auto *TLI = &GetTLI(*Ind->getFunction());
@@ -649,22 +649,19 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
// WinEHPrepare pass.
SmallVector<OperandBundleDef, 1> OpBundles;
Ind->getOperandBundlesAsDefs(OpBundles);
- if (!IsRange) {
+ if (!IsMemOpSize) {
Value *Args[3] = {Ind->getTargetValue(),
Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
Builder.getInt32(Index)};
Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args,
OpBundles);
} else {
- Value *Args[6] = {
- Ind->getTargetValue(),
- Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
- Builder.getInt32(Index),
- Builder.getInt64(MemOPSizeRangeStart),
- Builder.getInt64(MemOPSizeRangeLast),
- Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
- Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true),
- Args, OpBundles);
+ Value *Args[3] = {Ind->getTargetValue(),
+ Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+ Builder.getInt32(Index)};
+ Call = Builder.CreateCall(
+ getOrInsertValueProfilingCall(*M, *TLI, ValueProfilingCallType::MemOp),
+ Args, OpBundles);
}
if (auto AK = TLI->getExtAttrForI32Param(false))
Call->addParamAttr(2, AK);
@@ -831,9 +828,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
Visibility = GlobalValue::HiddenVisibility;
}
}
+ std::string DataVarName = getVarName(Inc, getInstrProfDataVarPrefix());
auto MaybeSetComdat = [=](GlobalVariable *GV) {
if (NeedComdat)
- GV->setComdat(M->getOrInsertComdat(GV->getName()));
+ GV->setComdat(M->getOrInsertComdat(TT.isOSBinFormatCOFF() ? GV->getName()
+ : DataVarName));
};
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
@@ -898,9 +897,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
#include "llvm/ProfileData/InstrProfData.inc"
};
- auto *Data = new GlobalVariable(*M, DataTy, false, Linkage,
- ConstantStruct::get(DataTy, DataVals),
- getVarName(Inc, getInstrProfDataVarPrefix()));
+ auto *Data =
+ new GlobalVariable(*M, DataTy, false, Linkage,
+ ConstantStruct::get(DataTy, DataVals), DataVarName);
Data->setVisibility(Visibility);
Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index ad238f1357c6..cfdf3cad97f7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -105,6 +105,8 @@ Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T,
void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeAddressSanitizerLegacyPassPass(Registry);
initializeModuleAddressSanitizerLegacyPassPass(Registry);
+ initializeMemProfilerLegacyPassPass(Registry);
+ initializeModuleMemProfilerLegacyPassPass(Registry);
initializeBoundsCheckingLegacyPassPass(Registry);
initializeControlHeightReductionLegacyPassPass(Registry);
initializeGCOVProfilerLegacyPassPass(Registry);
@@ -119,7 +121,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeHWAddressSanitizerLegacyPassPass(Registry);
initializeThreadSanitizerLegacyPassPass(Registry);
initializeModuleSanitizerCoverageLegacyPassPass(Registry);
- initializeDataFlowSanitizerPass(Registry);
+ initializeDataFlowSanitizerLegacyPassPass(Registry);
}
/// LLVMInitializeInstrumentation - C binding for
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
new file mode 100644
index 000000000000..0e6a404a9e0b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -0,0 +1,638 @@
+//===- MemProfiler.cpp - memory allocation and access profiler ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler. Memory accesses are instrumented
+// to increment the access count held in a shadow memory location, or
+// alternatively to call into the runtime. Memory intrinsic calls (memmove,
+// memcpy, memset) are changed to call the memory profiling runtime version
+// instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/MemProfiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "memprof"
+
+constexpr int LLVM_MEM_PROFILER_VERSION = 1;
+
+// Size of memory mapped to a single shadow location.
+constexpr uint64_t DefaultShadowGranularity = 64;
+
+// Scale from granularity down to shadow size.
+constexpr uint64_t DefaultShadowScale = 3;
+
+constexpr char MemProfModuleCtorName[] = "memprof.module_ctor";
+constexpr uint64_t MemProfCtorAndDtorPriority = 1;
+// On Emscripten, the system needs more than one priorities for constructors.
+constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50;
+constexpr char MemProfInitName[] = "__memprof_init";
+constexpr char MemProfVersionCheckNamePrefix[] =
+ "__memprof_version_mismatch_check_v";
+
+constexpr char MemProfShadowMemoryDynamicAddress[] =
+ "__memprof_shadow_memory_dynamic_address";
+
+constexpr char MemProfFilenameVar[] = "__memprof_profile_filename";
+
+// Command-line flags.
+
+static cl::opt<bool> ClInsertVersionCheck(
+ "memprof-guard-against-version-mismatch",
+ cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden,
+ cl::init(true));
+
+// This flag may need to be replaced with -f[no-]memprof-reads.
+static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads",
+ cl::desc("instrument read instructions"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+ ClInstrumentWrites("memprof-instrument-writes",
+ cl::desc("instrument write instructions"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<bool> ClInstrumentAtomics(
+ "memprof-instrument-atomics",
+ cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<bool> ClUseCalls(
+ "memprof-use-callbacks",
+ cl::desc("Use callbacks instead of inline instrumentation sequences."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<std::string>
+ ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix",
+ cl::desc("Prefix for memory access callbacks"),
+ cl::Hidden, cl::init("__memprof_"));
+
+// These flags allow to change the shadow mapping.
+// The shadow mapping looks like
+// Shadow = ((Mem & mask) >> scale) + offset
+
+static cl::opt<int> ClMappingScale("memprof-mapping-scale",
+ cl::desc("scale of memprof shadow mapping"),
+ cl::Hidden, cl::init(DefaultShadowScale));
+
+static cl::opt<int>
+ ClMappingGranularity("memprof-mapping-granularity",
+ cl::desc("granularity of memprof shadow mapping"),
+ cl::Hidden, cl::init(DefaultShadowGranularity));
+
+// Debug flags.
+
+static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden,
+ cl::init(0));
+
+static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden,
+ cl::desc("Debug func"));
+
+static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"),
+ cl::Hidden, cl::init(-1));
+
+static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
+ cl::Hidden, cl::init(-1));
+
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+
+namespace {
+
+/// This struct defines the shadow mapping using the rule:
+/// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset.
+struct ShadowMapping {
+ ShadowMapping() {
+ Scale = ClMappingScale;
+ Granularity = ClMappingGranularity;
+ Mask = ~(Granularity - 1);
+ }
+
+ int Scale;
+ int Granularity;
+ uint64_t Mask; // Computed as ~(Granularity-1)
+};
+
+static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) {
+ return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority
+ : MemProfCtorAndDtorPriority;
+}
+
+struct InterestingMemoryAccess {
+ Value *Addr = nullptr;
+ bool IsWrite;
+ unsigned Alignment;
+ uint64_t TypeSize;
+ Value *MaybeMask = nullptr;
+};
+
+/// Instrument the code in module to profile memory accesses.
+class MemProfiler {
+public:
+ MemProfiler(Module &M) {
+ C = &(M.getContext());
+ LongSize = M.getDataLayout().getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ }
+
+ /// If it is an interesting memory access, populate information
+ /// about the access and return a InterestingMemoryAccess struct.
+ /// Otherwise return None.
+ Optional<InterestingMemoryAccess>
+ isInterestingMemoryAccess(Instruction *I) const;
+
+ void instrumentMop(Instruction *I, const DataLayout &DL,
+ InterestingMemoryAccess &Access);
+ void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
+ Value *Addr, uint32_t TypeSize, bool IsWrite);
+ void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
+ Instruction *I, Value *Addr,
+ unsigned Alignment, uint32_t TypeSize,
+ bool IsWrite);
+ void instrumentMemIntrinsic(MemIntrinsic *MI);
+ Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+ bool instrumentFunction(Function &F);
+ bool maybeInsertMemProfInitAtFunctionEntry(Function &F);
+ bool insertDynamicShadowAtFunctionEntry(Function &F);
+
+private:
+ void initializeCallbacks(Module &M);
+
+ LLVMContext *C;
+ int LongSize;
+ Type *IntptrTy;
+ ShadowMapping Mapping;
+
+ // These arrays is indexed by AccessIsWrite
+ FunctionCallee MemProfMemoryAccessCallback[2];
+ FunctionCallee MemProfMemoryAccessCallbackSized[2];
+
+ FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset;
+ Value *DynamicShadowOffset = nullptr;
+};
+
+class MemProfilerLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ explicit MemProfilerLegacyPass() : FunctionPass(ID) {
+ initializeMemProfilerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "MemProfilerFunctionPass"; }
+
+ bool runOnFunction(Function &F) override {
+ MemProfiler Profiler(*F.getParent());
+ return Profiler.instrumentFunction(F);
+ }
+};
+
+class ModuleMemProfiler {
+public:
+ ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); }
+
+ bool instrumentModule(Module &);
+
+private:
+ Triple TargetTriple;
+ ShadowMapping Mapping;
+ Function *MemProfCtorFunction = nullptr;
+};
+
+class ModuleMemProfilerLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ explicit ModuleMemProfilerLegacyPass() : ModulePass(ID) {
+ initializeModuleMemProfilerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "ModuleMemProfiler"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {}
+
+ bool runOnModule(Module &M) override {
+ ModuleMemProfiler MemProfiler(M);
+ return MemProfiler.instrumentModule(M);
+ }
+};
+
+} // end anonymous namespace
+
+MemProfilerPass::MemProfilerPass() {}
+
+PreservedAnalyses MemProfilerPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ Module &M = *F.getParent();
+ MemProfiler Profiler(M);
+ if (Profiler.instrumentFunction(F))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::all();
+}
+
+ModuleMemProfilerPass::ModuleMemProfilerPass() {}
+
+PreservedAnalyses ModuleMemProfilerPass::run(Module &M,
+ AnalysisManager<Module> &AM) {
+ ModuleMemProfiler Profiler(M);
+ if (Profiler.instrumentModule(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+char MemProfilerLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MemProfilerLegacyPass, "memprof",
+ "MemProfiler: profile memory allocations and accesses.",
+ false, false)
+INITIALIZE_PASS_END(MemProfilerLegacyPass, "memprof",
+ "MemProfiler: profile memory allocations and accesses.",
+ false, false)
+
+FunctionPass *llvm::createMemProfilerFunctionPass() {
+ return new MemProfilerLegacyPass();
+}
+
+char ModuleMemProfilerLegacyPass::ID = 0;
+
+INITIALIZE_PASS(ModuleMemProfilerLegacyPass, "memprof-module",
+ "MemProfiler: profile memory allocations and accesses."
+ "ModulePass",
+ false, false)
+
+ModulePass *llvm::createModuleMemProfilerLegacyPassPass() {
+ return new ModuleMemProfilerLegacyPass();
+}
+
+Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
+ // (Shadow & mask) >> scale
+ Shadow = IRB.CreateAnd(Shadow, Mapping.Mask);
+ Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
+ // (Shadow >> scale) | offset
+ assert(DynamicShadowOffset);
+ return IRB.CreateAdd(Shadow, DynamicShadowOffset);
+}
+
+// Instrument memset/memmove/memcpy
+void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) {
+ IRBuilder<> IRB(MI);
+ if (isa<MemTransferInst>(MI)) {
+ IRB.CreateCall(
+ isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ } else if (isa<MemSetInst>(MI)) {
+ IRB.CreateCall(
+ MemProfMemset,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ }
+ MI->eraseFromParent();
+}
+
+Optional<InterestingMemoryAccess>
+MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
+ // Do not instrument the load fetching the dynamic shadow address.
+ if (DynamicShadowOffset == I)
+ return None;
+
+ InterestingMemoryAccess Access;
+
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!ClInstrumentReads)
+ return None;
+ Access.IsWrite = false;
+ Access.TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
+ Access.Alignment = LI->getAlignment();
+ Access.Addr = LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!ClInstrumentWrites)
+ return None;
+ Access.IsWrite = true;
+ Access.TypeSize =
+ DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
+ Access.Alignment = SI->getAlignment();
+ Access.Addr = SI->getPointerOperand();
+ } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ if (!ClInstrumentAtomics)
+ return None;
+ Access.IsWrite = true;
+ Access.TypeSize =
+ DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
+ Access.Alignment = 0;
+ Access.Addr = RMW->getPointerOperand();
+ } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+ if (!ClInstrumentAtomics)
+ return None;
+ Access.IsWrite = true;
+ Access.TypeSize =
+ DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
+ Access.Alignment = 0;
+ Access.Addr = XCHG->getPointerOperand();
+ } else if (auto *CI = dyn_cast<CallInst>(I)) {
+ auto *F = CI->getCalledFunction();
+ if (F && (F->getIntrinsicID() == Intrinsic::masked_load ||
+ F->getIntrinsicID() == Intrinsic::masked_store)) {
+ unsigned OpOffset = 0;
+ if (F->getIntrinsicID() == Intrinsic::masked_store) {
+ if (!ClInstrumentWrites)
+ return None;
+ // Masked store has an initial operand for the value.
+ OpOffset = 1;
+ Access.IsWrite = true;
+ } else {
+ if (!ClInstrumentReads)
+ return None;
+ Access.IsWrite = false;
+ }
+
+ auto *BasePtr = CI->getOperand(0 + OpOffset);
+ auto *Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+ Access.TypeSize = DL.getTypeStoreSizeInBits(Ty);
+ if (auto *AlignmentConstant =
+ dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
+ Access.Alignment = (unsigned)AlignmentConstant->getZExtValue();
+ else
+ Access.Alignment = 1; // No alignment guarantees. We probably got Undef
+ Access.MaybeMask = CI->getOperand(2 + OpOffset);
+ Access.Addr = BasePtr;
+ }
+ }
+
+ if (!Access.Addr)
+ return None;
+
+ // Do not instrument acesses from different address spaces; we cannot deal
+ // with them.
+ Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType());
+ if (PtrTy->getPointerAddressSpace() != 0)
+ return None;
+
+ // Ignore swifterror addresses.
+ // swifterror memory addresses are mem2reg promoted by instruction
+ // selection. As such they cannot have regular uses like an instrumentation
+ // function and it makes no sense to track them as memory.
+ if (Access.Addr->isSwiftError())
+ return None;
+
+ return Access;
+}
+
+void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
+ Instruction *I, Value *Addr,
+ unsigned Alignment,
+ uint32_t TypeSize, bool IsWrite) {
+ auto *VTy = cast<FixedVectorType>(
+ cast<PointerType>(Addr->getType())->getElementType());
+ uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
+ unsigned Num = VTy->getNumElements();
+ auto *Zero = ConstantInt::get(IntptrTy, 0);
+ for (unsigned Idx = 0; Idx < Num; ++Idx) {
+ Value *InstrumentedAddress = nullptr;
+ Instruction *InsertBefore = I;
+ if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
+ // dyn_cast as we might get UndefValue
+ if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
+ if (Masked->isZero())
+ // Mask is constant false, so no instrumentation needed.
+ continue;
+ // If we have a true or undef value, fall through to instrumentAddress.
+ // with InsertBefore == I
+ }
+ } else {
+ IRBuilder<> IRB(I);
+ Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
+ Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
+ InsertBefore = ThenTerm;
+ }
+
+ IRBuilder<> IRB(InsertBefore);
+ InstrumentedAddress =
+ IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+ instrumentAddress(I, InsertBefore, InstrumentedAddress, ElemTypeSize,
+ IsWrite);
+ }
+}
+
+void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
+ InterestingMemoryAccess &Access) {
+ if (Access.IsWrite)
+ NumInstrumentedWrites++;
+ else
+ NumInstrumentedReads++;
+
+ if (Access.MaybeMask) {
+ instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,
+ Access.Alignment, Access.TypeSize,
+ Access.IsWrite);
+ } else {
+ // Since the access counts will be accumulated across the entire allocation,
+ // we only update the shadow access count for the first location and thus
+ // don't need to worry about alignment and type size.
+ instrumentAddress(I, I, Access.Addr, Access.TypeSize, Access.IsWrite);
+ }
+}
+
+void MemProfiler::instrumentAddress(Instruction *OrigIns,
+ Instruction *InsertBefore, Value *Addr,
+ uint32_t TypeSize, bool IsWrite) {
+ IRBuilder<> IRB(InsertBefore);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+
+ if (ClUseCalls) {
+ IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong);
+ return;
+ }
+
+ // Create an inline sequence to compute shadow location, and increment the
+ // value by one.
+ Type *ShadowTy = Type::getInt64Ty(*C);
+ Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
+ Value *ShadowPtr = memToShadow(AddrLong, IRB);
+ Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);
+ Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);
+ Value *Inc = ConstantInt::get(Type::getInt64Ty(*C), 1);
+ ShadowValue = IRB.CreateAdd(ShadowValue, Inc);
+ IRB.CreateStore(ShadowValue, ShadowAddr);
+}
+
+// Create the variable for the profile file name.
+void createProfileFileNameVar(Module &M) {
+ const MDString *MemProfFilename =
+ dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename"));
+ if (!MemProfFilename)
+ return;
+ assert(!MemProfFilename->getString().empty() &&
+ "Unexpected MemProfProfileFilename metadata with empty string");
+ Constant *ProfileNameConst = ConstantDataArray::getString(
+ M.getContext(), MemProfFilename->getString(), true);
+ GlobalVariable *ProfileNameVar = new GlobalVariable(
+ M, ProfileNameConst->getType(), /*isConstant=*/true,
+ GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar);
+ Triple TT(M.getTargetTriple());
+ if (TT.supportsCOMDAT()) {
+ ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
+ ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar));
+ }
+}
+
+bool ModuleMemProfiler::instrumentModule(Module &M) {
+ // Create a module constructor.
+ std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION);
+ std::string VersionCheckName =
+ ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion)
+ : "";
+ std::tie(MemProfCtorFunction, std::ignore) =
+ createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName,
+ MemProfInitName, /*InitArgTypes=*/{},
+ /*InitArgs=*/{}, VersionCheckName);
+
+ const uint64_t Priority = getCtorAndDtorPriority(TargetTriple);
+ appendToGlobalCtors(M, MemProfCtorFunction, Priority);
+
+ createProfileFileNameVar(M);
+
+ return true;
+}
+
+void MemProfiler::initializeCallbacks(Module &M) {
+ IRBuilder<> IRB(*C);
+
+ for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+ const std::string TypeStr = AccessIsWrite ? "store" : "load";
+
+ SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
+ SmallVector<Type *, 2> Args1{1, IntptrTy};
+ MemProfMemoryAccessCallbackSized[AccessIsWrite] =
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr + "N",
+ FunctionType::get(IRB.getVoidTy(), Args2, false));
+
+ MemProfMemoryAccessCallback[AccessIsWrite] =
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr,
+ FunctionType::get(IRB.getVoidTy(), Args1, false));
+ }
+ MemProfMemmove = M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ MemProfMemset = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt32Ty(), IntptrTy);
+}
+
+bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) {
+ // For each NSObject descendant having a +load method, this method is invoked
+ // by the ObjC runtime before any of the static constructors is called.
+ // Therefore we need to instrument such methods with a call to __memprof_init
+ // at the beginning in order to initialize our runtime before any access to
+ // the shadow memory.
+ // We cannot just ignore these methods, because they may call other
+ // instrumented functions.
+ if (F.getName().find(" load]") != std::string::npos) {
+ FunctionCallee MemProfInitFunction =
+ declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {});
+ IRBuilder<> IRB(&F.front(), F.front().begin());
+ IRB.CreateCall(MemProfInitFunction, {});
+ return true;
+ }
+ return false;
+}
+
+bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) {
+ IRBuilder<> IRB(&F.front().front());
+ Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
+ MemProfShadowMemoryDynamicAddress, IntptrTy);
+ if (F.getParent()->getPICLevel() == PICLevel::NotPIC)
+ cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true);
+ DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
+ return true;
+}
+
+bool MemProfiler::instrumentFunction(Function &F) {
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
+ return false;
+ if (ClDebugFunc == F.getName())
+ return false;
+ if (F.getName().startswith("__memprof_"))
+ return false;
+
+ bool FunctionModified = false;
+
+ // If needed, insert __memprof_init.
+ // This function needs to be called even if the function body is not
+ // instrumented.
+ if (maybeInsertMemProfInitAtFunctionEntry(F))
+ FunctionModified = true;
+
+ LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n");
+
+ initializeCallbacks(*F.getParent());
+
+ FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
+
+ SmallVector<Instruction *, 16> ToInstrument;
+
+ // Fill the set of memory operations to instrument.
+ for (auto &BB : F) {
+ for (auto &Inst : BB) {
+ if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst))
+ ToInstrument.push_back(&Inst);
+ }
+ }
+
+ int NumInstrumented = 0;
+ for (auto *Inst : ToInstrument) {
+ if (ClDebugMin < 0 || ClDebugMax < 0 ||
+ (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
+ Optional<InterestingMemoryAccess> Access =
+ isInterestingMemoryAccess(Inst);
+ if (Access)
+ instrumentMop(Inst, F.getParent()->getDataLayout(), *Access);
+ else
+ instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
+ }
+ NumInstrumented++;
+ }
+
+ if (NumInstrumented > 0)
+ FunctionModified = true;
+
+ LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " "
+ << F << "\n");
+
+ return FunctionModified;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fcf7f470b3e1..7a6874584d59 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -153,6 +153,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -337,8 +338,8 @@ static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
cl::desc("Define custom MSan OriginBase"),
cl::Hidden, cl::init(0));
-static const char *const kMsanModuleCtorName = "msan.module_ctor";
-static const char *const kMsanInitName = "__msan_init";
+const char kMsanModuleCtorName[] = "msan.module_ctor";
+const char kMsanInitName[] = "__msan_init";
namespace {
@@ -572,6 +573,9 @@ private:
/// uninitialized value and returns an updated origin id encoding this info.
FunctionCallee MsanChainOriginFn;
+ /// Run-time helper that paints an origin over a region.
+ FunctionCallee MsanSetOriginFn;
+
/// MSan runtime replacements for memmove, memcpy and memset.
FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
@@ -850,6 +854,9 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
// instrumentation.
MsanChainOriginFn = M.getOrInsertFunction(
"__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
+ MsanSetOriginFn =
+ M.getOrInsertFunction("__msan_set_origin", IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
MemmoveFn = M.getOrInsertFunction(
"__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IntptrTy);
@@ -1049,7 +1056,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
ValueMap<Value*, Value*> ShadowMap, OriginMap;
std::unique_ptr<VarArgHelper> VAHelper;
const TargetLibraryInfo *TLI;
- BasicBlock *ActualFnStart;
+ Instruction *FnPrologueEnd;
// The following flags disable parts of MSan instrumentation based on
// exclusion list contents and command-line options.
@@ -1081,17 +1088,31 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
PoisonStack = SanitizeFunction && ClPoisonStack;
PoisonUndef = SanitizeFunction && ClPoisonUndef;
+ // In the presence of unreachable blocks, we may see Phi nodes with
+ // incoming nodes from such blocks. Since InstVisitor skips unreachable
+ // blocks, such nodes will not have any shadow value associated with them.
+ // It's easier to remove unreachable blocks than deal with missing shadow.
+ removeUnreachableBlocks(F);
+
MS.initializeCallbacks(*F.getParent());
- if (MS.CompileKernel)
- ActualFnStart = insertKmsanPrologue(F);
- else
- ActualFnStart = &F.getEntryBlock();
+ FnPrologueEnd = IRBuilder<>(F.getEntryBlock().getFirstNonPHI())
+ .CreateIntrinsic(Intrinsic::donothing, {}, {});
+
+ if (MS.CompileKernel) {
+ IRBuilder<> IRB(FnPrologueEnd);
+ insertKmsanPrologue(IRB);
+ }
LLVM_DEBUG(if (!InsertChecks) dbgs()
<< "MemorySanitizer is not inserting checks into '"
<< F.getName() << "'\n");
}
+ bool isInPrologue(Instruction &I) {
+ return I.getParent() == FnPrologueEnd->getParent() &&
+ (&I == FnPrologueEnd || I.comesBefore(FnPrologueEnd));
+ }
+
Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
if (MS.TrackOrigins <= 1) return V;
return IRB.CreateCall(MS.MsanChainOriginFn, V);
@@ -1143,37 +1164,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getParent()->getDataLayout();
const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
- if (Shadow->getType()->isAggregateType()) {
- paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
- OriginAlignment);
- } else {
- Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
- if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
- if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
- paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
- OriginAlignment);
- return;
- }
-
- unsigned TypeSizeInBits =
- DL.getTypeSizeInBits(ConvertedShadow->getType());
- unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
- if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
- FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
- Value *ConvertedShadow2 = IRB.CreateZExt(
- ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
- IRB.CreateCall(Fn, {ConvertedShadow2,
- IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
- Origin});
- } else {
- Value *Cmp = IRB.CreateICmpNE(
- ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
- Instruction *CheckTerm = SplitBlockAndInsertIfThen(
- Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
- IRBuilder<> IRBNew(CheckTerm);
- paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
+ Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
+ if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
+ if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
+ paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
OriginAlignment);
- }
+ return;
+ }
+
+ unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
+ unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
+ if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
+ FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
+ Value *ConvertedShadow2 =
+ IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
+ IRB.CreateCall(Fn,
+ {ConvertedShadow2,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), Origin});
+ } else {
+ Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
+ IRBuilder<> IRBNew(CheckTerm);
+ paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
+ OriginAlignment);
}
}
@@ -1218,7 +1232,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
bool AsCall) {
IRBuilder<> IRB(OrigIns);
LLVM_DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
- Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+ Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
LLVM_DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
@@ -1240,8 +1254,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
? Origin
: (Value *)IRB.getInt32(0)});
} else {
- Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
- getCleanShadow(ConvertedShadow), "_mscmp");
+ Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
Cmp, OrigIns,
/* Unreachable */ !MS.Recover, MS.ColdCallWeights);
@@ -1262,10 +1275,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
LLVM_DEBUG(dbgs() << "DONE:\n" << F);
}
- BasicBlock *insertKmsanPrologue(Function &F) {
- BasicBlock *ret =
- SplitBlock(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHI());
- IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ // Returns the last instruction in the new prologue
+ void insertKmsanPrologue(IRBuilder<> &IRB) {
Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
Constant *Zero = IRB.getInt32(0);
MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
@@ -1284,21 +1295,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MS.RetvalOriginTLS =
IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
{Zero, IRB.getInt32(6)}, "retval_origin");
- return ret;
}
/// Add MemorySanitizer instrumentation to a function.
bool runOnFunction() {
- // In the presence of unreachable blocks, we may see Phi nodes with
- // incoming nodes from such blocks. Since InstVisitor skips unreachable
- // blocks, such nodes will not have any shadow value associated with them.
- // It's easier to remove unreachable blocks than deal with missing shadow.
- removeUnreachableBlocks(F);
-
// Iterate all BBs in depth-first order and create shadow instructions
// for all instructions (where applicable).
// For PHI nodes we create dummy shadow PHIs which will be finalized later.
- for (BasicBlock *BB : depth_first(ActualFnStart))
+ for (BasicBlock *BB : depth_first(FnPrologueEnd->getParent()))
visit(*BB);
// Finalize PHI nodes.
@@ -1385,14 +1389,68 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return ty;
}
- /// Convert a shadow value to it's flattened variant.
- Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
+ /// Extract combined shadow of struct elements as a bool
+ Value *collapseStructShadow(StructType *Struct, Value *Shadow,
+ IRBuilder<> &IRB) {
+ Value *FalseVal = IRB.getIntN(/* width */ 1, /* value */ 0);
+ Value *Aggregator = FalseVal;
+
+ for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
+ // Combine by ORing together each element's bool shadow
+ Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
+ Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
+ Value *ShadowBool = convertToBool(ShadowInner, IRB);
+
+ if (Aggregator != FalseVal)
+ Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
+ else
+ Aggregator = ShadowBool;
+ }
+
+ return Aggregator;
+ }
+
+ // Extract combined shadow of array elements
+ Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
+ IRBuilder<> &IRB) {
+ if (!Array->getNumElements())
+ return IRB.getIntN(/* width */ 1, /* value */ 0);
+
+ Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
+ Value *Aggregator = convertShadowToScalar(FirstItem, IRB);
+
+ for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
+ Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
+ Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
+ Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
+ }
+ return Aggregator;
+ }
+
+ /// Convert a shadow value to it's flattened variant. The resulting
+ /// shadow may not necessarily have the same bit width as the input
+ /// value, but it will always be comparable to zero.
+ Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
+ if (StructType *Struct = dyn_cast<StructType>(V->getType()))
+ return collapseStructShadow(Struct, V, IRB);
+ if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
+ return collapseArrayShadow(Array, V, IRB);
Type *Ty = V->getType();
Type *NoVecTy = getShadowTyNoVec(Ty);
if (Ty == NoVecTy) return V;
return IRB.CreateBitCast(V, NoVecTy);
}
+ // Convert a scalar value to an i1 by comparing with 0
+ Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
+ Type *VTy = V->getType();
+ assert(VTy->isIntegerTy());
+ if (VTy->getIntegerBitWidth() == 1)
+ // Just converting a bool to a bool, so do nothing.
+ return V;
+ return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
+ }
+
/// Compute the integer shadow offset that corresponds to a given
/// application address.
///
@@ -1611,7 +1669,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (*ShadowPtr)
return *ShadowPtr;
Function *F = A->getParent();
- IRBuilder<> EntryIRB(ActualFnStart->getFirstNonPHI());
+ IRBuilder<> EntryIRB(FnPrologueEnd);
unsigned ArgOffset = 0;
const DataLayout &DL = F->getParent()->getDataLayout();
for (auto &FArg : F->args()) {
@@ -1679,6 +1737,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
} else {
setOrigin(A, getCleanOrigin());
}
+
+ break;
}
if (!FArgEagerCheck)
@@ -1726,8 +1786,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (!InsertChecks) return;
#ifndef NDEBUG
Type *ShadowTy = Shadow->getType();
- assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
- "Can only insert checks for integer and vector shadow types");
+ assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
+ isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
+ "Can only insert checks for integer, vector, and aggregate shadow "
+ "types");
#endif
InstrumentationList.push_back(
ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
@@ -1769,6 +1831,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
llvm_unreachable("Unknown ordering");
}
+ Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
+ constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
+ uint32_t OrderingTable[NumOrderings] = {};
+
+ OrderingTable[(int)AtomicOrderingCABI::relaxed] =
+ OrderingTable[(int)AtomicOrderingCABI::release] =
+ (int)AtomicOrderingCABI::release;
+ OrderingTable[(int)AtomicOrderingCABI::consume] =
+ OrderingTable[(int)AtomicOrderingCABI::acquire] =
+ OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
+ (int)AtomicOrderingCABI::acq_rel;
+ OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
+ (int)AtomicOrderingCABI::seq_cst;
+
+ return ConstantDataVector::get(IRB.getContext(),
+ makeArrayRef(OrderingTable, NumOrderings));
+ }
+
AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
switch (a) {
case AtomicOrdering::NotAtomic:
@@ -1786,11 +1866,33 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
llvm_unreachable("Unknown ordering");
}
+ Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
+ constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
+ uint32_t OrderingTable[NumOrderings] = {};
+
+ OrderingTable[(int)AtomicOrderingCABI::relaxed] =
+ OrderingTable[(int)AtomicOrderingCABI::acquire] =
+ OrderingTable[(int)AtomicOrderingCABI::consume] =
+ (int)AtomicOrderingCABI::acquire;
+ OrderingTable[(int)AtomicOrderingCABI::release] =
+ OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
+ (int)AtomicOrderingCABI::acq_rel;
+ OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
+ (int)AtomicOrderingCABI::seq_cst;
+
+ return ConstantDataVector::get(IRB.getContext(),
+ makeArrayRef(OrderingTable, NumOrderings));
+ }
+
// ------------------- Visitors.
using InstVisitor<MemorySanitizerVisitor>::visit;
void visit(Instruction &I) {
- if (!I.getMetadata("nosanitize"))
- InstVisitor<MemorySanitizerVisitor>::visit(I);
+ if (I.getMetadata("nosanitize"))
+ return;
+ // Don't want to visit if we're in the prologue
+ if (isInPrologue(I))
+ return;
+ InstVisitor<MemorySanitizerVisitor>::visit(I);
}
/// Instrument LoadInst
@@ -2046,7 +2148,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
// No point in adding something that might result in 0 origin value.
if (!ConstOrigin || !ConstOrigin->isNullValue()) {
- Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
+ Value *FlatShadow = MSV->convertShadowToScalar(OpShadow, IRB);
Value *Cond =
IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
@@ -2538,7 +2640,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return false;
unsigned NumArgOperands = I.getNumArgOperands();
-
for (unsigned i = 0; i < NumArgOperands; ++i) {
Type *Ty = I.getArgOperand(i)->getType();
if (Ty != RetTy)
@@ -2602,9 +2703,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleLifetimeStart(IntrinsicInst &I) {
if (!PoisonStack)
return;
- DenseMap<Value *, AllocaInst *> AllocaForValue;
- AllocaInst *AI =
- llvm::findAllocaForValue(I.getArgOperand(1), AllocaForValue);
+ AllocaInst *AI = llvm::findAllocaForValue(I.getArgOperand(1));
if (!AI)
InstrumentLifetimeStart = false;
LifetimeStartList.push_back(std::make_pair(&I, AI));
@@ -2635,14 +2734,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// We copy the shadow of \p CopyOp[NumUsedElements:] to \p
// Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
// return a fully initialized value.
- void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
+ void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
+ bool HasRoundingMode = false) {
IRBuilder<> IRB(&I);
Value *CopyOp, *ConvertOp;
- switch (I.getNumArgOperands()) {
- case 3:
- assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
- LLVM_FALLTHROUGH;
+ assert((!HasRoundingMode ||
+ isa<ConstantInt>(I.getArgOperand(I.getNumArgOperands() - 1))) &&
+ "Invalid rounding mode");
+
+ switch (I.getNumArgOperands() - HasRoundingMode) {
case 2:
CopyOp = I.getArgOperand(0);
ConvertOp = I.getArgOperand(1);
@@ -2898,7 +2999,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getOrigin(&I, 0));
}
- // Instrument experimental.vector.reduce.or intrinsic.
+ // Instrument vector.reduce.or intrinsic.
// Valid (non-poisoned) set bits in the operand pull low the
// corresponding shadow bits.
void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
@@ -2916,7 +3017,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getOrigin(&I, 0));
}
- // Instrument experimental.vector.reduce.or intrinsic.
+ // Instrument vector.reduce.and intrinsic.
// Valid (non-poisoned) unset bits in the operand pull down the
// corresponding shadow bits.
void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
@@ -3089,18 +3190,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// and then apply the usual shadow combining logic.
void handlePclmulIntrinsic(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
- Type *ShadowTy = getShadowTy(&I);
unsigned Width =
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
assert(isa<ConstantInt>(I.getArgOperand(2)) &&
"pclmul 3rd operand must be a constant");
unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
- Value *Shuf0 =
- IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy),
- getPclmulMask(Width, Imm & 0x01));
- Value *Shuf1 =
- IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy),
- getPclmulMask(Width, Imm & 0x10));
+ Value *Shuf0 = IRB.CreateShuffleVector(getShadow(&I, 0),
+ getPclmulMask(Width, Imm & 0x01));
+ Value *Shuf1 = IRB.CreateShuffleVector(getShadow(&I, 1),
+ getPclmulMask(Width, Imm & 0x10));
ShadowAndOriginCombiner SOC(this, IRB);
SOC.Add(Shuf0, getOrigin(&I, 0));
SOC.Add(Shuf1, getOrigin(&I, 1));
@@ -3133,8 +3231,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ // Instrument abs intrinsic.
+ // handleUnknownIntrinsic can't handle it because of the last
+ // is_int_min_poison argument which does not match the result type.
+ void handleAbsIntrinsic(IntrinsicInst &I) {
+ assert(I.getType()->isIntOrIntVectorTy());
+ assert(I.getArgOperand(0)->getType() == I.getType());
+
+ // FIXME: Handle is_int_min_poison.
+ IRBuilder<> IRB(&I);
+ setShadow(&I, getShadow(&I, 0));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
+ case Intrinsic::abs:
+ handleAbsIntrinsic(I);
+ break;
case Intrinsic::lifetime_start:
handleLifetimeStart(I);
break;
@@ -3151,15 +3265,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::masked_load:
handleMaskedLoad(I);
break;
- case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::vector_reduce_and:
handleVectorReduceAndIntrinsic(I);
break;
- case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::vector_reduce_or:
handleVectorReduceOrIntrinsic(I);
break;
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_mul:
handleVectorReduceIntrinsic(I);
break;
case Intrinsic::x86_sse_stmxcsr:
@@ -3179,6 +3293,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_avx512_cvtusi2ss:
case Intrinsic::x86_avx512_cvtusi642sd:
case Intrinsic::x86_avx512_cvtusi642ss:
+ handleVectorConvertIntrinsic(I, 1, true);
+ break;
case Intrinsic::x86_sse2_cvtsd2si64:
case Intrinsic::x86_sse2_cvtsd2si:
case Intrinsic::x86_sse2_cvtsd2ss:
@@ -3404,6 +3520,63 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
+ void visitLibAtomicLoad(CallBase &CB) {
+ // Since we use getNextNode here, we can't have CB terminate the BB.
+ assert(isa<CallInst>(CB));
+
+ IRBuilder<> IRB(&CB);
+ Value *Size = CB.getArgOperand(0);
+ Value *SrcPtr = CB.getArgOperand(1);
+ Value *DstPtr = CB.getArgOperand(2);
+ Value *Ordering = CB.getArgOperand(3);
+ // Convert the call to have at least Acquire ordering to make sure
+ // the shadow operations aren't reordered before it.
+ Value *NewOrdering =
+ IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
+ CB.setArgOperand(3, NewOrdering);
+
+ IRBuilder<> NextIRB(CB.getNextNode());
+ NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
+
+ Value *SrcShadowPtr, *SrcOriginPtr;
+ std::tie(SrcShadowPtr, SrcOriginPtr) =
+ getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
+ /*isStore*/ false);
+ Value *DstShadowPtr =
+ getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
+ /*isStore*/ true)
+ .first;
+
+ NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
+ if (MS.TrackOrigins) {
+ Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
+ kMinOriginAlignment);
+ Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
+ NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
+ }
+ }
+
+ void visitLibAtomicStore(CallBase &CB) {
+ IRBuilder<> IRB(&CB);
+ Value *Size = CB.getArgOperand(0);
+ Value *DstPtr = CB.getArgOperand(2);
+ Value *Ordering = CB.getArgOperand(3);
+ // Convert the call to have at least Release ordering to make sure
+ // the shadow operations aren't reordered after it.
+ Value *NewOrdering =
+ IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
+ CB.setArgOperand(3, NewOrdering);
+
+ Value *DstShadowPtr =
+ getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
+ /*isStore*/ true)
+ .first;
+
+ // Atomic store always paints clean shadow/origin. See file header.
+ IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
+ Align(1));
+ }
+
void visitCallBase(CallBase &CB) {
assert(!CB.getMetadata("nosanitize"));
if (CB.isInlineAsm()) {
@@ -3417,6 +3590,28 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
visitInstruction(CB);
return;
}
+ LibFunc LF;
+ if (TLI->getLibFunc(CB, LF)) {
+ // libatomic.a functions need to have special handling because there isn't
+ // a good way to intercept them or compile the library with
+ // instrumentation.
+ switch (LF) {
+ case LibFunc_atomic_load:
+ if (!isa<CallInst>(CB)) {
+ llvm::errs() << "MSAN -- cannot instrument invoke of libatomic load."
+ "Ignoring!\n";
+ break;
+ }
+ visitLibAtomicLoad(CB);
+ return;
+ case LibFunc_atomic_store:
+ visitLibAtomicStore(CB);
+ return;
+ default:
+ break;
+ }
+ }
+
if (auto *Call = dyn_cast<CallInst>(&CB)) {
assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
@@ -3424,20 +3619,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// will become a non-readonly function after it is instrumented by us. To
// prevent this code from being optimized out, mark that function
// non-readonly in advance.
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::ReadNone)
+ .addAttribute(Attribute::WriteOnly)
+ .addAttribute(Attribute::ArgMemOnly)
+ .addAttribute(Attribute::Speculatable);
+
+ Call->removeAttributes(AttributeList::FunctionIndex, B);
if (Function *Func = Call->getCalledFunction()) {
- // Clear out readonly/readnone attributes.
- AttrBuilder B;
- B.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::ReadNone)
- .addAttribute(Attribute::WriteOnly)
- .addAttribute(Attribute::ArgMemOnly)
- .addAttribute(Attribute::Speculatable);
Func->removeAttributes(AttributeList::FunctionIndex, B);
}
maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
}
IRBuilder<> IRB(&CB);
+ bool MayCheckCall = ClEagerChecks;
+ if (Function *Func = CB.getCalledFunction()) {
+ // __sanitizer_unaligned_{load,store} functions may be called by users
+ // and always expects shadows in the TLS. So don't check them.
+ MayCheckCall &= !Func->getName().startswith("__sanitizer_unaligned_");
+ }
unsigned ArgOffset = 0;
LLVM_DEBUG(dbgs() << " CallSite: " << CB << "\n");
@@ -3463,7 +3665,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
- bool EagerCheck = ClEagerChecks && !ByVal && NoUndef;
+ bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
if (EagerCheck) {
insertShadowCheck(A, &CB);
@@ -3503,7 +3705,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
(void)Store;
assert(Size != 0 && Store != nullptr);
LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
- ArgOffset += alignTo(Size, 8);
+ ArgOffset += alignTo(Size, kShadowTLSAlignment);
}
LLVM_DEBUG(dbgs() << " done with call args\n");
@@ -3519,7 +3721,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
return;
- if (ClEagerChecks && CB.hasRetAttr(Attribute::NoUndef)) {
+ if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
setShadow(&CB, getCleanShadow(&CB));
setOrigin(&CB, getCleanOrigin());
return;
@@ -3902,6 +4104,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getCleanOrigin());
}
+ void visitFreezeInst(FreezeInst &I) {
+ // Freeze always returns a fully defined value.
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
void visitInstruction(Instruction &I) {
// Everything else: stop propagating and check for poisoned shadow.
if (ClDumpStrictInstructions)
@@ -4125,7 +4333,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
if (!VAStartInstrumentationList.empty()) {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
- IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+ IRBuilder<> IRB(MSV.FnPrologueEnd);
VAArgOverflowSize =
IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize =
@@ -4271,7 +4479,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
void finalizeInstrumentation() override {
assert(!VAArgSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
- IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+ IRBuilder<> IRB(MSV.FnPrologueEnd);
VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
VAArgSize);
@@ -4464,7 +4672,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
if (!VAStartInstrumentationList.empty()) {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
- IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+ IRBuilder<> IRB(MSV.FnPrologueEnd);
VAArgOverflowSize =
IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize =
@@ -4584,15 +4792,14 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
// For PowerPC, we need to deal with alignment of stack arguments -
// they are mostly aligned to 8 bytes, but vectors and i128 arrays
// are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
- // and QPX vectors are aligned to 32 bytes. For that reason, we
- // compute current offset from stack pointer (which is always properly
- // aligned), and offset for the first vararg, then subtract them.
+ // For that reason, we compute current offset from stack pointer (which is
+ // always properly aligned), and offset for the first vararg, then subtract
+ // them.
unsigned VAArgBase;
Triple TargetTriple(F.getParent()->getTargetTriple());
// Parameter save area starts at 48 bytes from frame pointer for ABIv1,
// and 32 bytes for ABIv2. This is usually determined by target
// endianness, but in theory could be overridden by function attribute.
- // For simplicity, we ignore it here (it'd only matter for QPX vectors).
if (TargetTriple.getArch() == Triple::ppc64)
VAArgBase = 48;
else
@@ -4710,7 +4917,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
void finalizeInstrumentation() override {
assert(!VAArgSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
- IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+ IRBuilder<> IRB(MSV.FnPrologueEnd);
VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
VAArgSize);
@@ -5029,7 +5236,7 @@ struct VarArgSystemZHelper : public VarArgHelper {
if (!VAStartInstrumentationList.empty()) {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
- IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+ IRBuilder<> IRB(MSV.FnPrologueEnd);
VAArgOverflowSize =
IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize =
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index dcfc28887a48..be6c8c631001 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -110,7 +110,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/MisExpect.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -249,6 +248,38 @@ static cl::opt<bool>
"optimization remarks: -{Rpass|"
"pass-remarks}=pgo-instrumentation"));
+static cl::opt<bool> PGOInstrumentEntry(
+ "pgo-instrument-entry", cl::init(false), cl::Hidden,
+ cl::desc("Force to instrument function entry basicblock."));
+
+static cl::opt<bool>
+ PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
+ cl::desc("Fix function entry count in profile use."));
+
+static cl::opt<bool> PGOVerifyHotBFI(
+ "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print out the non-match BFI count if a hot raw profile count "
+ "becomes non-hot, or a cold raw profile count becomes hot. "
+ "The print is enabled under -Rpass-analysis=pgo, or "
+ "internal option -pass-remakrs-analysis=pgo."));
+
+static cl::opt<bool> PGOVerifyBFI(
+ "pgo-verify-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print out mismatched BFI counts after setting profile metadata "
+ "The print is enabled under -Rpass-analysis=pgo, or "
+ "internal option -pass-remakrs-analysis=pgo."));
+
+static cl::opt<unsigned> PGOVerifyBFIRatio(
+ "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden,
+ cl::desc("Set the threshold for pgo-verify-big -- only print out "
+ "mismatched BFI if the difference percentage is greater than "
+ "this value (in percentage)."));
+
+static cl::opt<unsigned> PGOVerifyBFICutoff(
+ "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden,
+ cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose "
+ "profile count value is below."));
+
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
extern cl::opt<PGOViewCountsType> PGOViewCounts;
@@ -257,6 +288,10 @@ extern cl::opt<PGOViewCountsType> PGOViewCounts;
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+static cl::opt<bool>
+ PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
+ cl::desc("Use the old CFG function hashing"));
+
// Return a string describing the branch condition that can be
// used in static branch probability heuristics:
static std::string getBranchCondString(Instruction *TI) {
@@ -425,7 +460,7 @@ public:
private:
bool runOnModule(Module &M) override {
createProfileFileNameVar(M, InstrProfileOutput);
- createIRLevelProfileFlagVar(M, true);
+ createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
return false;
}
std::string InstrProfileOutput;
@@ -572,9 +607,11 @@ public:
Function &Func, TargetLibraryInfo &TLI,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
- BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
+ BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
+ bool InstrumentFuncEntry = true)
: F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
- ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) {
+ ValueSites(IPVK_Last + 1), SIVisitor(Func),
+ MST(F, InstrumentFuncEntry, BPI, BFI) {
// This should be done before CFG hash computation.
SIVisitor.countSelects(Func);
ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
@@ -611,7 +648,8 @@ public:
} // end anonymous namespace
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
-// value of each BB in the CFG. The higher 32 bits record the number of edges.
+// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
+// of selects, indirect calls, mem ops and edges.
template <class Edge, class BBInfo>
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
std::vector<uint8_t> Indexes;
@@ -630,12 +668,31 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
}
JC.update(Indexes);
- // Hash format for context sensitive profile. Reserve 4 bits for other
- // information.
- FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
- (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
- //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
- (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+ JamCRC JCH;
+ if (PGOOldCFGHashing) {
+ // Hash format for context sensitive profile. Reserve 4 bits for other
+ // information.
+ FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
+ (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
+ //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
+ (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+ } else {
+ // The higher 32 bits.
+ auto updateJCH = [&JCH](uint64_t Num) {
+ uint8_t Data[8];
+ support::endian::write64le(Data, Num);
+ JCH.update(Data);
+ };
+ updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
+ updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
+ updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
+ updateJCH((uint64_t)MST.AllEdges.size());
+
+ // Hash format for context sensitive profile. Reserve 4 bits for other
+ // information.
+ FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
+ }
+
// Reserve bit 60-63 for other information purpose.
FunctionHash &= 0x0FFFFFFFFFFFFFFF;
if (IsCS)
@@ -644,8 +701,12 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
<< " CRC = " << JC.getCRC()
<< ", Selects = " << SIVisitor.getNumOfSelectInsts()
<< ", Edges = " << MST.AllEdges.size() << ", ICSites = "
- << ValueSites[IPVK_IndirectCallTarget].size()
- << ", Hash = " << FunctionHash << "\n";);
+ << ValueSites[IPVK_IndirectCallTarget].size());
+ if (!PGOOldCFGHashing) {
+ LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
+ << ", High32 CRC = " << JCH.getCRC());
+ }
+ LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
}
// Check if we can safely rename this Comdat function.
@@ -656,7 +717,7 @@ static bool canRenameComdat(
return false;
// FIXME: Current only handle those Comdat groups that only containing one
- // function and function aliases.
+ // function.
// (1) For a Comdat group containing multiple functions, we need to have a
// unique postfix based on the hashes for each function. There is a
// non-trivial code refactoring to do this efficiently.
@@ -664,8 +725,7 @@ static bool canRenameComdat(
// group including global vars.
Comdat *C = F.getComdat();
for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
- if (dyn_cast<GlobalAlias>(CM.second))
- continue;
+ assert(!isa<GlobalAlias>(CM.second));
Function *FM = dyn_cast<Function>(CM.second);
if (FM != &F)
return false;
@@ -705,18 +765,8 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
- if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) {
- // For aliases, change the name directly.
- assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
- std::string OrigGAName = GA->getName().str();
- GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
- GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
- continue;
- }
// Must be a function.
- Function *CF = dyn_cast<Function>(CM.second);
- assert(CF);
- CF->setComdat(NewComdat);
+ cast<Function>(CM.second)->setComdat(NewComdat);
}
}
@@ -781,8 +831,11 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
if (!E->IsCritical)
return canInstrument(DestBB);
+ // Some IndirectBr critical edges cannot be split by the previous
+ // SplitIndirectBrCriticalEdges call. Bail out.
unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
- BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
+ BasicBlock *InstrBB =
+ isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
if (!InstrBB) {
LLVM_DEBUG(
dbgs() << "Fail to split critical edge: not instrument this edge.\n");
@@ -845,8 +898,8 @@ static void instrumentOneFunc(
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
- FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, TLI, ComdatMembers, true,
- BPI, BFI, IsCS);
+ FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
+ F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
unsigned NumCounters =
@@ -1004,13 +1057,15 @@ public:
PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
- ProfileSummaryInfo *PSI, bool IsCS)
+ ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
: F(Func), M(Modu), BFI(BFIin), PSI(PSI),
- FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS),
+ FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
+ InstrumentFuncEntry),
FreqAttr(FFA_Normal), IsCS(IsCS) {}
// Read counts for the instrumented BB from profile.
- bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
+ bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
+ bool &AllMinusOnes);
// Populate the counts for all BBs.
void populateCounters();
@@ -1121,11 +1176,18 @@ bool PGOUseFunc::setInstrumentedCounts(
if (NumCounters != CountFromProfile.size()) {
return false;
}
+ auto *FuncEntry = &*F.begin();
+
// Set the profile count to the Instrumented BBs.
uint32_t I = 0;
for (BasicBlock *InstrBB : InstrumentBBs) {
uint64_t CountValue = CountFromProfile[I++];
UseBBInfo &Info = getBBInfo(InstrBB);
+ // If we reach here, we know that we have some nonzero count
+ // values in this function. The entry count should not be 0.
+ // Fix it if necessary.
+ if (InstrBB == FuncEntry && CountValue == 0)
+ CountValue = 1;
Info.setBBInfoCount(CountValue);
}
ProfileCountSize = CountFromProfile.size();
@@ -1186,7 +1248,8 @@ void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
// Read the profile from ProfileFileName and assign the value to the
// instrumented BB and the edges. This function also updates ProgramMaxCount.
// Return true if the profile are successfully read, and false on errors.
-bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
+bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
+ bool &AllMinusOnes) {
auto &Ctx = M->getContext();
Expected<InstrProfRecord> Result =
PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
@@ -1229,10 +1292,13 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
+ AllMinusOnes = (CountFromProfile.size() > 0);
uint64_t ValueSum = 0;
for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
ValueSum += CountFromProfile[I];
+ if (CountFromProfile[I] != (uint64_t)-1)
+ AllMinusOnes = false;
}
AllZeros = (ValueSum == 0);
@@ -1316,7 +1382,6 @@ void PGOUseFunc::populateCounters() {
}
#endif
uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
- F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
uint64_t FuncMaxCount = FuncEntryCount;
for (auto &BB : F) {
auto BI = findBBInfo(&BB);
@@ -1324,6 +1389,11 @@ void PGOUseFunc::populateCounters() {
continue;
FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
}
+
+ // Fix the obviously inconsistent entry count.
+ if (FuncMaxCount > 0 && FuncEntryCount == 0)
+ FuncEntryCount = 1;
+ F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
markFunctionAttributes(FuncEntryCount, FuncMaxCount);
// Now annotate select instructions
@@ -1514,13 +1584,15 @@ static bool InstrumentAllFunctions(
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS)
- createIRLevelProfileFlagVar(M, /* IsCS */ false);
+ createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
for (auto &F : M) {
if (F.isDeclaration())
continue;
+ if (F.hasFnAttribute(llvm::Attribute::NoProfile))
+ continue;
auto &TLI = LookupTLI(F);
auto *BPI = LookupBPI(F);
auto *BFI = LookupBFI(F);
@@ -1532,7 +1604,7 @@ static bool InstrumentAllFunctions(
PreservedAnalyses
PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
createProfileFileNameVar(M, CSInstrName);
- createIRLevelProfileFlagVar(M, /* IsCS */ true);
+ createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
return PreservedAnalyses::all();
}
@@ -1571,6 +1643,129 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
return PreservedAnalyses::none();
}
+// Using the ratio b/w sums of profile count values and BFI count values to
+// adjust the func entry count.
+static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
+ BranchProbabilityInfo &NBPI) {
+ Function &F = Func.getFunc();
+ BlockFrequencyInfo NBFI(F, NBPI, LI);
+#ifndef NDEBUG
+ auto BFIEntryCount = F.getEntryCount();
+ assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) &&
+ "Invalid BFI Entrycount");
+#endif
+ auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
+ auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
+ for (auto &BBI : F) {
+ uint64_t CountValue = 0;
+ uint64_t BFICountValue = 0;
+ if (!Func.findBBInfo(&BBI))
+ continue;
+ auto BFICount = NBFI.getBlockProfileCount(&BBI);
+ CountValue = Func.getBBInfo(&BBI).CountValue;
+ BFICountValue = BFICount.getValue();
+ SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
+ SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
+ }
+ if (SumCount.isZero())
+ return;
+
+ assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
+ "Incorrect sum of BFI counts");
+ if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
+ return;
+ double Scale = (SumCount / SumBFICount).convertToDouble();
+ if (Scale < 1.001 && Scale > 0.999)
+ return;
+
+ uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
+ uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
+ if (NewEntryCount == 0)
+ NewEntryCount = 1;
+ if (NewEntryCount != FuncEntryCount) {
+ F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
+ LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
+ << ", entry_count " << FuncEntryCount << " --> "
+ << NewEntryCount << "\n");
+ }
+}
+
+// Compare the profile count values with BFI count values, and print out
+// the non-matching ones.
+static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
+ BranchProbabilityInfo &NBPI,
+ uint64_t HotCountThreshold,
+ uint64_t ColdCountThreshold) {
+ Function &F = Func.getFunc();
+ BlockFrequencyInfo NBFI(F, NBPI, LI);
+ // bool PrintFunc = false;
+ bool HotBBOnly = PGOVerifyHotBFI;
+ std::string Msg;
+ OptimizationRemarkEmitter ORE(&F);
+
+ unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
+ for (auto &BBI : F) {
+ uint64_t CountValue = 0;
+ uint64_t BFICountValue = 0;
+
+ if (Func.getBBInfo(&BBI).CountValid)
+ CountValue = Func.getBBInfo(&BBI).CountValue;
+
+ BBNum++;
+ if (CountValue)
+ NonZeroBBNum++;
+ auto BFICount = NBFI.getBlockProfileCount(&BBI);
+ if (BFICount)
+ BFICountValue = BFICount.getValue();
+
+ if (HotBBOnly) {
+ bool rawIsHot = CountValue >= HotCountThreshold;
+ bool BFIIsHot = BFICountValue >= HotCountThreshold;
+ bool rawIsCold = CountValue <= ColdCountThreshold;
+ bool ShowCount = false;
+ if (rawIsHot && !BFIIsHot) {
+ Msg = "raw-Hot to BFI-nonHot";
+ ShowCount = true;
+ } else if (rawIsCold && BFIIsHot) {
+ Msg = "raw-Cold to BFI-Hot";
+ ShowCount = true;
+ }
+ if (!ShowCount)
+ continue;
+ } else {
+ if ((CountValue < PGOVerifyBFICutoff) &&
+ (BFICountValue < PGOVerifyBFICutoff))
+ continue;
+ uint64_t Diff = (BFICountValue >= CountValue)
+ ? BFICountValue - CountValue
+ : CountValue - BFICountValue;
+ if (Diff < CountValue / 100 * PGOVerifyBFIRatio)
+ continue;
+ }
+ BBMisMatchNum++;
+
+ ORE.emit([&]() {
+ OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify",
+ F.getSubprogram(), &BBI);
+ Remark << "BB " << ore::NV("Block", BBI.getName())
+ << " Count=" << ore::NV("Count", CountValue)
+ << " BFI_Count=" << ore::NV("Count", BFICountValue);
+ if (!Msg.empty())
+ Remark << " (" << Msg << ")";
+ return Remark;
+ });
+ }
+ if (BBMisMatchNum)
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
+ F.getSubprogram(), &F.getEntryBlock())
+ << "In Func " << ore::NV("Function", F.getName())
+ << ": Num_of_BB=" << ore::NV("Count", BBNum)
+ << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
+ << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
+ });
+}
+
static bool annotateAllFunctions(
Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
@@ -1619,6 +1814,12 @@ static bool annotateAllFunctions(
collectComdatMembers(M, ComdatMembers);
std::vector<Function *> HotFunctions;
std::vector<Function *> ColdFunctions;
+
+ // If the profile marked as always instrument the entry BB, do the
+ // same. Note this can be overwritten by the internal option in CFGMST.h
+ bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
+ if (PGOInstrumentEntry.getNumOccurrences() > 0)
+ InstrumentFuncEntry = PGOInstrumentEntry;
for (auto &F : M) {
if (F.isDeclaration())
continue;
@@ -1628,9 +1829,15 @@ static bool annotateAllFunctions(
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
- PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS);
+ PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
+ InstrumentFuncEntry);
+ // When AllMinusOnes is true, it means the profile for the function
+ // is unrepresentative and this function is actually hot. Set the
+ // entry count of the function to be multiple times of hot threshold
+ // and drop all its internal counters.
+ bool AllMinusOnes = false;
bool AllZeros = false;
- if (!Func.readCounters(PGOReader.get(), AllZeros))
+ if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
continue;
if (AllZeros) {
F.setEntryCount(ProfileCount(0, Function::PCT_Real));
@@ -1638,6 +1845,15 @@ static bool annotateAllFunctions(
ColdFunctions.push_back(&F);
continue;
}
+ const unsigned MultiplyFactor = 3;
+ if (AllMinusOnes) {
+ uint64_t HotThreshold = PSI->getHotCountThreshold();
+ if (HotThreshold)
+ F.setEntryCount(
+ ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
+ HotFunctions.push_back(&F);
+ continue;
+ }
Func.populateCounters();
Func.setBranchWeights();
Func.annotateValueSites();
@@ -1675,6 +1891,23 @@ static bool annotateAllFunctions(
Func.dumpInfo();
}
}
+
+ if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {
+ LoopInfo LI{DominatorTree(F)};
+ BranchProbabilityInfo NBPI(F, LI);
+
+ // Fix func entry count.
+ if (PGOFixEntryCount)
+ fixFuncEntryCount(Func, LI, NBPI);
+
+ // Verify BlockFrequency information.
+ uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
+ if (PGOVerifyHotBFI) {
+ HotCountThreshold = PSI->getOrCompHotCountThreshold();
+ ColdCountThreshold = PSI->getOrCompColdCountThreshold();
+ }
+ verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
+ }
}
// Set function hotness attribute from the profile.
@@ -1687,6 +1920,17 @@ static bool annotateAllFunctions(
<< "\n");
}
for (auto &F : ColdFunctions) {
+ // Only set when there is no Attribute::Hot set by the user. For Hot
+ // attribute, user's annotation has the precedence over the profile.
+ if (F->hasFnAttribute(Attribute::Hot)) {
+ auto &Ctx = M.getContext();
+ std::string Msg = std::string("Function ") + F->getName().str() +
+ std::string(" is annotated as a hot function but"
+ " the profile is cold");
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
+ continue;
+ }
F->addFnAttr(Attribute::Cold);
LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
<< "\n");
@@ -1772,8 +2016,6 @@ void llvm::setProfMetadata(Module *M, Instruction *TI,
dbgs() << W << " ";
} dbgs() << "\n";);
- misexpect::verifyMisExpect(TI, Weights, TI->getContext());
-
TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
if (EmitBranchProbability) {
std::string BrCondStr = getBranchCondString(TI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 2b7b859891dc..55a93b6152dc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -38,6 +39,8 @@
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/ProfileData/InstrProf.h"
+#define INSTR_PROF_VALUE_PROF_MEMOP_API
+#include "llvm/ProfileData/InstrProfData.inc"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -89,17 +92,15 @@ static cl::opt<bool>
cl::desc("Scale the memop size counts using the basic "
" block count value"));
-// This option sets the rangge of precise profile memop sizes.
-extern cl::opt<std::string> MemOPSizeRange;
-
-// This option sets the value that groups large memop sizes
-extern cl::opt<unsigned> MemOPSizeLarge;
-
cl::opt<bool>
MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
cl::Hidden,
cl::desc("Size-specialize memcmp and bcmp calls"));
+static cl::opt<unsigned>
+ MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
+ cl::desc("Optimize the memop size <= this value"));
+
namespace {
class PGOMemOPSizeOptLegacyPass : public FunctionPass {
public:
@@ -224,9 +225,6 @@ public:
: Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {
ValueDataArray =
std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
- // Get the MemOPSize range information from option MemOPSizeRange,
- getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
- PreciseRangeLast);
}
bool isChanged() const { return Changed; }
void perform() {
@@ -256,7 +254,7 @@ public:
LibFunc Func;
if (TLI.getLibFunc(CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- !dyn_cast<ConstantInt>(CI.getArgOperand(2))) {
+ !isa<ConstantInt>(CI.getArgOperand(2))) {
WorkList.push_back(MemOp(&CI));
}
}
@@ -269,26 +267,9 @@ private:
TargetLibraryInfo &TLI;
bool Changed;
std::vector<MemOp> WorkList;
- // Start of the previse range.
- int64_t PreciseRangeStart;
- // Last value of the previse range.
- int64_t PreciseRangeLast;
// The space to read the profile annotation.
std::unique_ptr<InstrProfValueData[]> ValueDataArray;
bool perform(MemOp MO);
-
- // This kind shows which group the value falls in. For PreciseValue, we have
- // the profile count for that value. LargeGroup groups the values that are in
- // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
- enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
-
- MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
- if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
- return LargeGroup;
- if (Value == PreciseRangeLast + 1)
- return NonLargeGroup;
- return PreciseValue;
- }
};
static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
@@ -365,8 +346,7 @@ bool MemOPSizeOpt::perform(MemOp MO) {
if (MemOPScaleCount)
C = getScaledCount(C, ActualCount, SavedTotalCount);
- // Only care precise value here.
- if (getMemOPSizeKind(V) != PreciseValue)
+ if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize)
continue;
// ValueCounts are sorted on the count. Break at the first un-profitable
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
index 85e096112fca..fc5267261851 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -282,8 +282,10 @@ static bool rewrite(Function &F) {
// Note: There are many more sources of documented UB, but this pass only
// attempts to find UB triggered by propagation of poison.
- if (Value *Op = const_cast<Value*>(getGuaranteedNonPoisonOp(&I)))
- CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+ SmallPtrSet<const Value *, 4> NonPoisonOps;
+ getGuaranteedNonPoisonOps(&I, NonPoisonOps);
+ for (const Value *Op : NonPoisonOps)
+ CreateAssertNot(B, getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
if (LocalCheck)
if (auto *RI = dyn_cast<ReturnInst>(&I))
@@ -293,11 +295,11 @@ static bool rewrite(Function &F) {
}
SmallVector<Value*, 4> Checks;
- if (propagatesPoison(&I))
+ if (propagatesPoison(cast<Operator>(&I)))
for (Value *V : I.operands())
Checks.push_back(getPoisonFor(ValToPoison, V));
- if (canCreatePoison(&I))
+ if (canCreatePoison(cast<Operator>(&I)))
generateCreationChecks(I, Checks);
ValToPoison[&I] = buildOrChain(B, Checks);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index b6a9df57e431..2d4b07939463 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -45,49 +45,39 @@ using namespace llvm;
#define DEBUG_TYPE "sancov"
-static const char *const SanCovTracePCIndirName =
- "__sanitizer_cov_trace_pc_indir";
-static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc";
-static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1";
-static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2";
-static const char *const SanCovTraceCmp4 = "__sanitizer_cov_trace_cmp4";
-static const char *const SanCovTraceCmp8 = "__sanitizer_cov_trace_cmp8";
-static const char *const SanCovTraceConstCmp1 =
- "__sanitizer_cov_trace_const_cmp1";
-static const char *const SanCovTraceConstCmp2 =
- "__sanitizer_cov_trace_const_cmp2";
-static const char *const SanCovTraceConstCmp4 =
- "__sanitizer_cov_trace_const_cmp4";
-static const char *const SanCovTraceConstCmp8 =
- "__sanitizer_cov_trace_const_cmp8";
-static const char *const SanCovTraceDiv4 = "__sanitizer_cov_trace_div4";
-static const char *const SanCovTraceDiv8 = "__sanitizer_cov_trace_div8";
-static const char *const SanCovTraceGep = "__sanitizer_cov_trace_gep";
-static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch";
-static const char *const SanCovModuleCtorTracePcGuardName =
+const char SanCovTracePCIndirName[] = "__sanitizer_cov_trace_pc_indir";
+const char SanCovTracePCName[] = "__sanitizer_cov_trace_pc";
+const char SanCovTraceCmp1[] = "__sanitizer_cov_trace_cmp1";
+const char SanCovTraceCmp2[] = "__sanitizer_cov_trace_cmp2";
+const char SanCovTraceCmp4[] = "__sanitizer_cov_trace_cmp4";
+const char SanCovTraceCmp8[] = "__sanitizer_cov_trace_cmp8";
+const char SanCovTraceConstCmp1[] = "__sanitizer_cov_trace_const_cmp1";
+const char SanCovTraceConstCmp2[] = "__sanitizer_cov_trace_const_cmp2";
+const char SanCovTraceConstCmp4[] = "__sanitizer_cov_trace_const_cmp4";
+const char SanCovTraceConstCmp8[] = "__sanitizer_cov_trace_const_cmp8";
+const char SanCovTraceDiv4[] = "__sanitizer_cov_trace_div4";
+const char SanCovTraceDiv8[] = "__sanitizer_cov_trace_div8";
+const char SanCovTraceGep[] = "__sanitizer_cov_trace_gep";
+const char SanCovTraceSwitchName[] = "__sanitizer_cov_trace_switch";
+const char SanCovModuleCtorTracePcGuardName[] =
"sancov.module_ctor_trace_pc_guard";
-static const char *const SanCovModuleCtor8bitCountersName =
+const char SanCovModuleCtor8bitCountersName[] =
"sancov.module_ctor_8bit_counters";
-static const char *const SanCovModuleCtorBoolFlagName =
- "sancov.module_ctor_bool_flag";
+const char SanCovModuleCtorBoolFlagName[] = "sancov.module_ctor_bool_flag";
static const uint64_t SanCtorAndDtorPriority = 2;
-static const char *const SanCovTracePCGuardName =
- "__sanitizer_cov_trace_pc_guard";
-static const char *const SanCovTracePCGuardInitName =
- "__sanitizer_cov_trace_pc_guard_init";
-static const char *const SanCov8bitCountersInitName =
- "__sanitizer_cov_8bit_counters_init";
-static const char *const SanCovBoolFlagInitName =
- "__sanitizer_cov_bool_flag_init";
-static const char *const SanCovPCsInitName = "__sanitizer_cov_pcs_init";
+const char SanCovTracePCGuardName[] = "__sanitizer_cov_trace_pc_guard";
+const char SanCovTracePCGuardInitName[] = "__sanitizer_cov_trace_pc_guard_init";
+const char SanCov8bitCountersInitName[] = "__sanitizer_cov_8bit_counters_init";
+const char SanCovBoolFlagInitName[] = "__sanitizer_cov_bool_flag_init";
+const char SanCovPCsInitName[] = "__sanitizer_cov_pcs_init";
-static const char *const SanCovGuardsSectionName = "sancov_guards";
-static const char *const SanCovCountersSectionName = "sancov_cntrs";
-static const char *const SanCovBoolFlagSectionName = "sancov_bools";
-static const char *const SanCovPCsSectionName = "sancov_pcs";
+const char SanCovGuardsSectionName[] = "sancov_guards";
+const char SanCovCountersSectionName[] = "sancov_cntrs";
+const char SanCovBoolFlagSectionName[] = "sancov_bools";
+const char SanCovPCsSectionName[] = "sancov_pcs";
-static const char *const SanCovLowestStackName = "__sancov_lowest_stack";
+const char SanCovLowestStackName[] = "__sancov_lowest_stack";
static cl::opt<int> ClCoverageLevel(
"sanitizer-coverage-level",
@@ -338,25 +328,24 @@ PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M,
std::pair<Value *, Value *>
ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section,
Type *Ty) {
- GlobalVariable *SecStart =
- new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,
- getSectionStart(Section));
+ GlobalVariable *SecStart = new GlobalVariable(
+ M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage,
+ nullptr, getSectionStart(Section));
SecStart->setVisibility(GlobalValue::HiddenVisibility);
- GlobalVariable *SecEnd =
- new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
- nullptr, getSectionEnd(Section));
+ GlobalVariable *SecEnd = new GlobalVariable(
+ M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage,
+ nullptr, getSectionEnd(Section));
SecEnd->setVisibility(GlobalValue::HiddenVisibility);
IRBuilder<> IRB(M.getContext());
- Value *SecEndPtr = IRB.CreatePointerCast(SecEnd, Ty);
if (!TargetTriple.isOSBinFormatCOFF())
- return std::make_pair(IRB.CreatePointerCast(SecStart, Ty), SecEndPtr);
+ return std::make_pair(SecStart, SecEnd);
// Account for the fact that on windows-msvc __start_* symbols actually
// point to a uint64_t before the start of the array.
auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr,
ConstantInt::get(IntptrTy, sizeof(uint64_t)));
- return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
+ return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEnd);
}
Function *ModuleSanitizerCoverage::CreateInitCallsForSections(
@@ -426,15 +415,13 @@ bool ModuleSanitizerCoverage::instrumentModule(
SanCovTracePCIndir =
M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy);
- // Make sure smaller parameters are zero-extended to i64 as required by the
- // x86_64 ABI.
+ // Make sure smaller parameters are zero-extended to i64 if required by the
+ // target ABI.
AttributeList SanCovTraceCmpZeroExtAL;
- if (TargetTriple.getArch() == Triple::x86_64) {
- SanCovTraceCmpZeroExtAL =
- SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt);
- SanCovTraceCmpZeroExtAL =
- SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt);
- }
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt);
SanCovTraceCmpFunction[0] =
M.getOrInsertFunction(SanCovTraceCmp1, SanCovTraceCmpZeroExtAL, VoidTy,
@@ -459,8 +446,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
{
AttributeList AL;
- if (TargetTriple.getArch() == Triple::x86_64)
- AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
+ AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
SanCovTraceDivFunction[0] =
M.getOrInsertFunction(SanCovTraceDiv4, AL, VoidTy, IRB.getInt32Ty());
}
@@ -523,29 +509,23 @@ bool ModuleSanitizerCoverage::instrumentModule(
// True if block has successors and it dominates all of them.
static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) {
- if (succ_begin(BB) == succ_end(BB))
+ if (succ_empty(BB))
return false;
- for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) {
- if (!DT->dominates(BB, SUCC))
- return false;
- }
-
- return true;
+ return llvm::all_of(successors(BB), [&](const BasicBlock *SUCC) {
+ return DT->dominates(BB, SUCC);
+ });
}
// True if block has predecessors and it postdominates all of them.
static bool isFullPostDominator(const BasicBlock *BB,
const PostDominatorTree *PDT) {
- if (pred_begin(BB) == pred_end(BB))
+ if (pred_empty(BB))
return false;
- for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) {
- if (!PDT->dominates(BB, PRED))
- return false;
- }
-
- return true;
+ return llvm::all_of(predecessors(BB), [&](const BasicBlock *PRED) {
+ return PDT->dominates(BB, PRED);
+ });
}
static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
@@ -815,7 +795,7 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch(
C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty);
Initializers.push_back(C);
}
- llvm::sort(Initializers.begin() + 2, Initializers.end(),
+ llvm::sort(drop_begin(Initializers, 2),
[](const Constant *A, const Constant *B) {
return cast<ConstantInt>(A)->getLimitedValue() <
cast<ConstantInt>(B)->getLimitedValue();
@@ -903,7 +883,7 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
DebugLoc EntryLoc;
if (IsEntryBB) {
if (auto SP = F.getSubprogram())
- EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ EntryLoc = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
// Keep static allocas and llvm.localescape calls in the entry block. Even
// if we aren't splitting the block, it's nice for allocas to be before
// calls.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index c911b37afac7..783878cf1ec0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -19,7 +19,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -52,30 +53,36 @@ using namespace llvm;
#define DEBUG_TYPE "tsan"
-static cl::opt<bool> ClInstrumentMemoryAccesses(
+static cl::opt<bool> ClInstrumentMemoryAccesses(
"tsan-instrument-memory-accesses", cl::init(true),
cl::desc("Instrument memory accesses"), cl::Hidden);
-static cl::opt<bool> ClInstrumentFuncEntryExit(
- "tsan-instrument-func-entry-exit", cl::init(true),
- cl::desc("Instrument function entry and exit"), cl::Hidden);
-static cl::opt<bool> ClHandleCxxExceptions(
+static cl::opt<bool>
+ ClInstrumentFuncEntryExit("tsan-instrument-func-entry-exit", cl::init(true),
+ cl::desc("Instrument function entry and exit"),
+ cl::Hidden);
+static cl::opt<bool> ClHandleCxxExceptions(
"tsan-handle-cxx-exceptions", cl::init(true),
cl::desc("Handle C++ exceptions (insert cleanup blocks for unwinding)"),
cl::Hidden);
-static cl::opt<bool> ClInstrumentAtomics(
- "tsan-instrument-atomics", cl::init(true),
- cl::desc("Instrument atomics"), cl::Hidden);
-static cl::opt<bool> ClInstrumentMemIntrinsics(
+static cl::opt<bool> ClInstrumentAtomics("tsan-instrument-atomics",
+ cl::init(true),
+ cl::desc("Instrument atomics"),
+ cl::Hidden);
+static cl::opt<bool> ClInstrumentMemIntrinsics(
"tsan-instrument-memintrinsics", cl::init(true),
cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
-static cl::opt<bool> ClDistinguishVolatile(
+static cl::opt<bool> ClDistinguishVolatile(
"tsan-distinguish-volatile", cl::init(false),
cl::desc("Emit special instrumentation for accesses to volatiles"),
cl::Hidden);
-static cl::opt<bool> ClInstrumentReadBeforeWrite(
+static cl::opt<bool> ClInstrumentReadBeforeWrite(
"tsan-instrument-read-before-write", cl::init(false),
cl::desc("Do not eliminate read instrumentation for read-before-writes"),
cl::Hidden);
+static cl::opt<bool> ClCompoundReadBeforeWrite(
+ "tsan-compound-read-before-write", cl::init(false),
+ cl::desc("Emit special compound instrumentation for reads-before-writes"),
+ cl::Hidden);
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
@@ -89,8 +96,8 @@ STATISTIC(NumOmittedReadsFromConstantGlobals,
STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
STATISTIC(NumOmittedNonCaptured, "Number of accesses ignored due to capturing");
-static const char *const kTsanModuleCtorName = "tsan.module_ctor";
-static const char *const kTsanInitName = "__tsan_init";
+const char kTsanModuleCtorName[] = "tsan.module_ctor";
+const char kTsanInitName[] = "__tsan_init";
namespace {
@@ -101,15 +108,37 @@ namespace {
/// ensures the __tsan_init function is in the list of global constructors for
/// the module.
struct ThreadSanitizer {
+ ThreadSanitizer() {
+ // Sanity check options and warn user.
+ if (ClInstrumentReadBeforeWrite && ClCompoundReadBeforeWrite) {
+ errs()
+ << "warning: Option -tsan-compound-read-before-write has no effect "
+ "when -tsan-instrument-read-before-write is set.\n";
+ }
+ }
+
bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
private:
+ // Internal Instruction wrapper that contains more information about the
+ // Instruction from prior analysis.
+ struct InstructionInfo {
+ // Instrumentation emitted for this instruction is for a compounded set of
+ // read and write operations in the same basic block.
+ static constexpr unsigned kCompoundRW = (1U << 0);
+
+ explicit InstructionInfo(Instruction *Inst) : Inst(Inst) {}
+
+ Instruction *Inst;
+ unsigned Flags = 0;
+ };
+
void initialize(Module &M);
- bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
+ bool instrumentLoadOrStore(const InstructionInfo &II, const DataLayout &DL);
bool instrumentAtomic(Instruction *I, const DataLayout &DL);
bool instrumentMemIntrinsic(Instruction *I);
void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local,
- SmallVectorImpl<Instruction *> &All,
+ SmallVectorImpl<InstructionInfo> &All,
const DataLayout &DL);
bool addrPointsToConstantData(Value *Addr);
int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
@@ -130,6 +159,8 @@ private:
FunctionCallee TsanVolatileWrite[kNumberOfAccessSizes];
FunctionCallee TsanUnalignedVolatileRead[kNumberOfAccessSizes];
FunctionCallee TsanUnalignedVolatileWrite[kNumberOfAccessSizes];
+ FunctionCallee TsanCompoundRW[kNumberOfAccessSizes];
+ FunctionCallee TsanUnalignedCompoundRW[kNumberOfAccessSizes];
FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes];
FunctionCallee TsanAtomicStore[kNumberOfAccessSizes];
FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1]
@@ -268,6 +299,15 @@ void ThreadSanitizer::initialize(Module &M) {
TsanUnalignedVolatileWrite[i] = M.getOrInsertFunction(
UnalignedVolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+ SmallString<64> CompoundRWName("__tsan_read_write" + ByteSizeStr);
+ TsanCompoundRW[i] = M.getOrInsertFunction(
+ CompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
+ SmallString<64> UnalignedCompoundRWName("__tsan_unaligned_read_write" +
+ ByteSizeStr);
+ TsanUnalignedCompoundRW[i] = M.getOrInsertFunction(
+ UnalignedCompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
@@ -402,35 +442,43 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
// 'Local' is a vector of insns within the same BB (no calls between).
// 'All' is a vector of insns that will be instrumented.
void ThreadSanitizer::chooseInstructionsToInstrument(
- SmallVectorImpl<Instruction *> &Local, SmallVectorImpl<Instruction *> &All,
- const DataLayout &DL) {
- SmallPtrSet<Value*, 8> WriteTargets;
+ SmallVectorImpl<Instruction *> &Local,
+ SmallVectorImpl<InstructionInfo> &All, const DataLayout &DL) {
+ DenseMap<Value *, size_t> WriteTargets; // Map of addresses to index in All
// Iterate from the end.
for (Instruction *I : reverse(Local)) {
- if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
- Value *Addr = Store->getPointerOperand();
- if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
- continue;
- WriteTargets.insert(Addr);
- } else {
- LoadInst *Load = cast<LoadInst>(I);
- Value *Addr = Load->getPointerOperand();
- if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
- continue;
- if (!ClInstrumentReadBeforeWrite && WriteTargets.count(Addr)) {
- // We will write to this temp, so no reason to analyze the read.
- NumOmittedReadsBeforeWrite++;
- continue;
+ const bool IsWrite = isa<StoreInst>(*I);
+ Value *Addr = IsWrite ? cast<StoreInst>(I)->getPointerOperand()
+ : cast<LoadInst>(I)->getPointerOperand();
+
+ if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
+ continue;
+
+ if (!IsWrite) {
+ const auto WriteEntry = WriteTargets.find(Addr);
+ if (!ClInstrumentReadBeforeWrite && WriteEntry != WriteTargets.end()) {
+ auto &WI = All[WriteEntry->second];
+ // If we distinguish volatile accesses and if either the read or write
+ // is volatile, do not omit any instrumentation.
+ const bool AnyVolatile =
+ ClDistinguishVolatile && (cast<LoadInst>(I)->isVolatile() ||
+ cast<StoreInst>(WI.Inst)->isVolatile());
+ if (!AnyVolatile) {
+ // We will write to this temp, so no reason to analyze the read.
+ // Mark the write instruction as compound.
+ WI.Flags |= InstructionInfo::kCompoundRW;
+ NumOmittedReadsBeforeWrite++;
+ continue;
+ }
}
+
if (addrPointsToConstantData(Addr)) {
// Addr points to some constant data -- it can not race with any writes.
continue;
}
}
- Value *Addr = isa<StoreInst>(*I)
- ? cast<StoreInst>(I)->getPointerOperand()
- : cast<LoadInst>(I)->getPointerOperand();
- if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
+
+ if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
!PointerMayBeCaptured(Addr, true, true)) {
// The variable is addressable but not captured, so it cannot be
// referenced from a different thread and participate in a data race
@@ -438,7 +486,14 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
NumOmittedNonCaptured++;
continue;
}
- All.push_back(I);
+
+ // Instrument this instruction.
+ All.emplace_back(I);
+ if (IsWrite) {
+ // For read-before-write and compound instrumentation we only need one
+ // write target, and we can override any previous entry if it exists.
+ WriteTargets[Addr] = All.size() - 1;
+ }
}
Local.clear();
}
@@ -479,7 +534,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
if (F.hasFnAttribute(Attribute::Naked))
return false;
initialize(*F.getParent());
- SmallVector<Instruction*, 8> AllLoadsAndStores;
+ SmallVector<InstructionInfo, 8> AllLoadsAndStores;
SmallVector<Instruction*, 8> LocalLoadsAndStores;
SmallVector<Instruction*, 8> AtomicAccesses;
SmallVector<Instruction*, 8> MemIntrinCalls;
@@ -514,8 +569,8 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
// Instrument memory accesses only if we want to report bugs in the function.
if (ClInstrumentMemoryAccesses && SanitizeFunction)
- for (auto Inst : AllLoadsAndStores) {
- Res |= instrumentLoadOrStore(Inst, DL);
+ for (const auto &II : AllLoadsAndStores) {
+ Res |= instrumentLoadOrStore(II, DL);
}
// Instrument atomic memory accesses in any case (they can be used to
@@ -553,13 +608,12 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
return Res;
}
-bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
+bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
const DataLayout &DL) {
- IRBuilder<> IRB(I);
- bool IsWrite = isa<StoreInst>(*I);
- Value *Addr = IsWrite
- ? cast<StoreInst>(I)->getPointerOperand()
- : cast<LoadInst>(I)->getPointerOperand();
+ IRBuilder<> IRB(II.Inst);
+ const bool IsWrite = isa<StoreInst>(*II.Inst);
+ Value *Addr = IsWrite ? cast<StoreInst>(II.Inst)->getPointerOperand()
+ : cast<LoadInst>(II.Inst)->getPointerOperand();
// swifterror memory addresses are mem2reg promoted by instruction selection.
// As such they cannot have regular uses like an instrumentation function and
@@ -570,9 +624,9 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
- if (IsWrite && isVtableAccess(I)) {
- LLVM_DEBUG(dbgs() << " VPTR : " << *I << "\n");
- Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
+ if (IsWrite && isVtableAccess(II.Inst)) {
+ LLVM_DEBUG(dbgs() << " VPTR : " << *II.Inst << "\n");
+ Value *StoredValue = cast<StoreInst>(II.Inst)->getValueOperand();
// StoredValue may be a vector type if we are storing several vptrs at once.
// In this case, just take the first element of the vector since this is
// enough to find vptr races.
@@ -588,36 +642,46 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
NumInstrumentedVtableWrites++;
return true;
}
- if (!IsWrite && isVtableAccess(I)) {
+ if (!IsWrite && isVtableAccess(II.Inst)) {
IRB.CreateCall(TsanVptrLoad,
IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
NumInstrumentedVtableReads++;
return true;
}
- const unsigned Alignment = IsWrite
- ? cast<StoreInst>(I)->getAlignment()
- : cast<LoadInst>(I)->getAlignment();
- const bool IsVolatile =
- ClDistinguishVolatile && (IsWrite ? cast<StoreInst>(I)->isVolatile()
- : cast<LoadInst>(I)->isVolatile());
+
+ const unsigned Alignment = IsWrite ? cast<StoreInst>(II.Inst)->getAlignment()
+ : cast<LoadInst>(II.Inst)->getAlignment();
+ const bool IsCompoundRW =
+ ClCompoundReadBeforeWrite && (II.Flags & InstructionInfo::kCompoundRW);
+ const bool IsVolatile = ClDistinguishVolatile &&
+ (IsWrite ? cast<StoreInst>(II.Inst)->isVolatile()
+ : cast<LoadInst>(II.Inst)->isVolatile());
+ assert((!IsVolatile || !IsCompoundRW) && "Compound volatile invalid!");
+
Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
FunctionCallee OnAccessFunc = nullptr;
if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) {
- if (IsVolatile)
+ if (IsCompoundRW)
+ OnAccessFunc = TsanCompoundRW[Idx];
+ else if (IsVolatile)
OnAccessFunc = IsWrite ? TsanVolatileWrite[Idx] : TsanVolatileRead[Idx];
else
OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
} else {
- if (IsVolatile)
+ if (IsCompoundRW)
+ OnAccessFunc = TsanUnalignedCompoundRW[Idx];
+ else if (IsVolatile)
OnAccessFunc = IsWrite ? TsanUnalignedVolatileWrite[Idx]
: TsanUnalignedVolatileRead[Idx];
else
OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx];
}
IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
- if (IsWrite) NumInstrumentedWrites++;
- else NumInstrumentedReads++;
+ if (IsCompoundRW || IsWrite)
+ NumInstrumentedWrites++;
+ if (IsCompoundRW || !IsWrite)
+ NumInstrumentedReads++;
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
index cd4f636ff132..fb6216bb2177 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
@@ -11,10 +11,10 @@
//===----------------------------------------------------------------------===//
#include "ValueProfilePlugins.inc"
+#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/InitializePasses.h"
-
#include <cassert>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
index c3f549c2e7cc..584a60ab451e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
@@ -17,13 +17,16 @@
#define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
+#include <memory>
+#include <vector>
namespace llvm {
+class Function;
+class Instruction;
+class Value;
+
/// Utility analysis that determines what values are worth profiling.
/// The actual logic is inside the ValueProfileCollectorImpl, whose job is to
/// populate the Candidates vector.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 46bc586fe688..7f7f2dc89b7e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -22,6 +22,7 @@
#include "DependencyAnalysis.h"
#include "ObjCARC.h"
#include "ProvenanceAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
using namespace llvm;
@@ -51,10 +52,8 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
if (AliasAnalysis::onlyReadsMemory(MRB))
return false;
if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
- const DataLayout &DL = Inst->getModule()->getDataLayout();
for (const Value *Op : Call->args()) {
- if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
- PA.related(Ptr, Op, DL))
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -85,8 +84,6 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
if (Class == ARCInstKind::Call)
return false;
- const DataLayout &DL = Inst->getModule()->getDataLayout();
-
// Consider various instructions which may have pointer arguments which are
// not "uses".
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
@@ -99,26 +96,24 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
// For calls, just check the arguments (and not the callee operand).
for (auto OI = CS->arg_begin(), OE = CS->arg_end(); OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
- PA.related(Ptr, Op, DL))
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Special-case stores, because we don't care about the stored value, just
// the store address.
- const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand(), DL);
+ const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
// If we can't tell what the underlying object was, assume there is a
// dependence.
- return IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
- PA.related(Op, Ptr, DL);
+ return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr);
}
// Check each operand for a match.
for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op, DL))
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -214,15 +209,13 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
/// non-local dependencies on Arg.
///
/// TODO: Cache results?
-void
-llvm::objcarc::FindDependencies(DependenceKind Flavor,
- const Value *Arg,
- BasicBlock *StartBB, Instruction *StartInst,
- SmallPtrSetImpl<Instruction *> &DependingInsts,
- SmallPtrSetImpl<const BasicBlock *> &Visited,
- ProvenanceAnalysis &PA) {
+static bool findDependencies(DependenceKind Flavor, const Value *Arg,
+ BasicBlock *StartBB, Instruction *StartInst,
+ SmallPtrSetImpl<Instruction *> &DependingInsts,
+ ProvenanceAnalysis &PA) {
BasicBlock::iterator StartPos = StartInst->getIterator();
+ SmallPtrSet<const BasicBlock *, 4> Visited;
SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
Worklist.push_back(std::make_pair(StartBB, StartPos));
do {
@@ -235,15 +228,14 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
if (LocalStartPos == StartBBBegin) {
pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
if (PI == PE)
- // If we've reached the function entry, produce a null dependence.
- DependingInsts.insert(nullptr);
- else
- // Add the predecessors to the worklist.
- do {
- BasicBlock *PredBB = *PI;
- if (Visited.insert(PredBB).second)
- Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
- } while (++PI != PE);
+ // Return if we've reached the function entry.
+ return false;
+ // Add the predecessors to the worklist.
+ do {
+ BasicBlock *PredBB = *PI;
+ if (Visited.insert(PredBB).second)
+ Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
+ } while (++PI != PE);
break;
}
@@ -262,9 +254,22 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
if (BB == StartBB)
continue;
for (const BasicBlock *Succ : successors(BB))
- if (Succ != StartBB && !Visited.count(Succ)) {
- DependingInsts.insert(reinterpret_cast<Instruction *>(-1));
- return;
- }
+ if (Succ != StartBB && !Visited.count(Succ))
+ return false;
}
+
+ return true;
+}
+
+llvm::Instruction *llvm::objcarc::findSingleDependency(DependenceKind Flavor,
+ const Value *Arg,
+ BasicBlock *StartBB,
+ Instruction *StartInst,
+ ProvenanceAnalysis &PA) {
+ SmallPtrSet<Instruction *, 4> DependingInsts;
+
+ if (!findDependencies(Flavor, Arg, StartBB, StartInst, DependingInsts, PA) ||
+ DependingInsts.size() != 1)
+ return nullptr;
+ return *DependingInsts.begin();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
index ed89c8c8fc89..cf4c05ebe91c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -50,12 +50,12 @@ enum DependenceKind {
RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue.
};
-void FindDependencies(DependenceKind Flavor,
- const Value *Arg,
- BasicBlock *StartBB, Instruction *StartInst,
- SmallPtrSetImpl<Instruction *> &DependingInstructions,
- SmallPtrSetImpl<const BasicBlock *> &Visited,
- ProvenanceAnalysis &PA);
+/// Find dependent instructions. If there is exactly one dependent instruction,
+/// return it. Otherwise, return null.
+llvm::Instruction *findSingleDependency(DependenceKind Flavor, const Value *Arg,
+ BasicBlock *StartBB,
+ Instruction *StartInst,
+ ProvenanceAnalysis &PA);
bool
Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index f4da51650a7d..970136392fdd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -29,8 +29,8 @@ void llvm::initializeObjCARCOpts(PassRegistry &Registry) {
initializeObjCARCAAWrapperPassPass(Registry);
initializeObjCARCAPElimPass(Registry);
initializeObjCARCExpandPass(Registry);
- initializeObjCARCContractPass(Registry);
- initializeObjCARCOptPass(Registry);
+ initializeObjCARCContractLegacyPassPass(Registry);
+ initializeObjCARCOptLegacyPassPass(Registry);
initializePAEvalPass(Registry);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index b496842fcfc5..8227a8c6f75f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -23,9 +23,7 @@
#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
-#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/ObjCARC.h"
namespace llvm {
namespace objcarc {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index ac1db27f5e64..6a928f2c7ffb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -26,9 +26,11 @@
#include "ObjCARC.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/ObjCARC.h"
using namespace llvm;
using namespace llvm::objcarc;
@@ -36,39 +38,10 @@ using namespace llvm::objcarc;
#define DEBUG_TYPE "objc-arc-ap-elim"
namespace {
- /// Autorelease pool elimination.
- class ObjCARCAPElim : public ModulePass {
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnModule(Module &M) override;
-
- static bool MayAutorelease(const CallBase &CB, unsigned Depth = 0);
- static bool OptimizeBB(BasicBlock *BB);
-
- public:
- static char ID;
- ObjCARCAPElim() : ModulePass(ID) {
- initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
- }
- };
-}
-
-char ObjCARCAPElim::ID = 0;
-INITIALIZE_PASS(ObjCARCAPElim,
- "objc-arc-apelim",
- "ObjC ARC autorelease pool elimination",
- false, false)
-
-Pass *llvm::createObjCARCAPElimPass() {
- return new ObjCARCAPElim();
-}
-
-void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-}
/// Interprocedurally determine if calls made by the given call site can
/// possibly produce autoreleases.
-bool ObjCARCAPElim::MayAutorelease(const CallBase &CB, unsigned Depth) {
+bool MayAutorelease(const CallBase &CB, unsigned Depth = 0) {
if (const Function *Callee = CB.getCalledFunction()) {
if (!Callee->hasExactDefinition())
return true;
@@ -87,7 +60,7 @@ bool ObjCARCAPElim::MayAutorelease(const CallBase &CB, unsigned Depth) {
return true;
}
-bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
+bool OptimizeBB(BasicBlock *BB) {
bool Changed = false;
Instruction *Push = nullptr;
@@ -125,17 +98,13 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
return Changed;
}
-bool ObjCARCAPElim::runOnModule(Module &M) {
+bool runImpl(Module &M) {
if (!EnableARCOpts)
return false;
// If nothing in the Module uses ARC, don't do anything.
if (!ModuleHasARC(M))
return false;
-
- if (skipModule(M))
- return false;
-
// Find the llvm.global_ctors variable, as the first step in
// identifying the global constructors. In theory, unnecessary autorelease
// pools could occur anywhere, but in practice it's pretty rare. Global
@@ -175,3 +144,40 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
return Changed;
}
+
+/// Autorelease pool elimination.
+class ObjCARCAPElim : public ModulePass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnModule(Module &M) override;
+
+public:
+ static char ID;
+ ObjCARCAPElim() : ModulePass(ID) {
+ initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
+ }
+};
+} // namespace
+
+char ObjCARCAPElim::ID = 0;
+INITIALIZE_PASS(ObjCARCAPElim, "objc-arc-apelim",
+ "ObjC ARC autorelease pool elimination", false, false)
+
+Pass *llvm::createObjCARCAPElimPass() { return new ObjCARCAPElim(); }
+
+void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+bool ObjCARCAPElim::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+ return runImpl(M);
+}
+
+PreservedAnalyses ObjCARCAPElimPass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (!runImpl(M))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 7fd4857c4490..86d161116e8c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -30,15 +30,18 @@
#include "ObjCARC.h"
#include "ProvenanceAnalysis.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/ObjCARC.h"
using namespace llvm;
using namespace llvm::objcarc;
@@ -53,59 +56,63 @@ STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
//===----------------------------------------------------------------------===//
namespace {
- /// Late ARC optimizations
- ///
- /// These change the IR in a way that makes it difficult to be analyzed by
- /// ObjCARCOpt, so it's run late.
- class ObjCARCContract : public FunctionPass {
- bool Changed;
- AliasAnalysis *AA;
- DominatorTree *DT;
- ProvenanceAnalysis PA;
- ARCRuntimeEntryPoints EP;
-
- /// A flag indicating whether this optimization pass should run.
- bool Run;
-
- /// The inline asm string to insert between calls and RetainRV calls to make
- /// the optimization work on targets which need it.
- const MDString *RVInstMarker;
-
- /// The set of inserted objc_storeStrong calls. If at the end of walking the
- /// function we have found no alloca instructions, these calls can be marked
- /// "tail".
- SmallPtrSet<CallInst *, 8> StoreStrongCalls;
-
- /// Returns true if we eliminated Inst.
- bool tryToPeepholeInstruction(
- Function &F, Instruction *Inst, inst_iterator &Iter,
- SmallPtrSetImpl<Instruction *> &DepInsts,
- SmallPtrSetImpl<const BasicBlock *> &Visited,
- bool &TailOkForStoreStrong,
- const DenseMap<BasicBlock *, ColorVector> &BlockColors);
-
- bool optimizeRetainCall(Function &F, Instruction *Retain);
-
- bool
- contractAutorelease(Function &F, Instruction *Autorelease,
- ARCInstKind Class,
- SmallPtrSetImpl<Instruction *> &DependingInstructions,
- SmallPtrSetImpl<const BasicBlock *> &Visited);
-
- void tryToContractReleaseIntoStoreStrong(
- Instruction *Release, inst_iterator &Iter,
- const DenseMap<BasicBlock *, ColorVector> &BlockColors);
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
-
- public:
- static char ID;
- ObjCARCContract() : FunctionPass(ID) {
- initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
- }
- };
+/// Late ARC optimizations
+///
+/// These change the IR in a way that makes it difficult to be analyzed by
+/// ObjCARCOpt, so it's run late.
+
+class ObjCARCContract {
+ bool Changed;
+ AAResults *AA;
+ DominatorTree *DT;
+ ProvenanceAnalysis PA;
+ ARCRuntimeEntryPoints EP;
+
+ /// A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// The inline asm string to insert between calls and RetainRV calls to make
+ /// the optimization work on targets which need it.
+ const MDString *RVInstMarker;
+
+ /// The set of inserted objc_storeStrong calls. If at the end of walking the
+ /// function we have found no alloca instructions, these calls can be marked
+ /// "tail".
+ SmallPtrSet<CallInst *, 8> StoreStrongCalls;
+
+ /// Returns true if we eliminated Inst.
+ bool tryToPeepholeInstruction(
+ Function &F, Instruction *Inst, inst_iterator &Iter,
+ bool &TailOkForStoreStrong,
+ const DenseMap<BasicBlock *, ColorVector> &BlockColors);
+
+ bool optimizeRetainCall(Function &F, Instruction *Retain);
+
+ bool contractAutorelease(Function &F, Instruction *Autorelease,
+ ARCInstKind Class);
+
+ void tryToContractReleaseIntoStoreStrong(
+ Instruction *Release, inst_iterator &Iter,
+ const DenseMap<BasicBlock *, ColorVector> &BlockColors);
+
+public:
+ bool init(Module &M);
+ bool run(Function &F, AAResults *AA, DominatorTree *DT);
+};
+
+class ObjCARCContractLegacyPass : public FunctionPass {
+ ObjCARCContract OCARCC;
+
+public:
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+ static char ID;
+ ObjCARCContractLegacyPass() : FunctionPass(ID) {
+ initializeObjCARCContractLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+};
}
//===----------------------------------------------------------------------===//
@@ -149,32 +156,17 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
}
/// Merge an autorelease with a retain into a fused call.
-bool ObjCARCContract::contractAutorelease(
- Function &F, Instruction *Autorelease, ARCInstKind Class,
- SmallPtrSetImpl<Instruction *> &DependingInstructions,
- SmallPtrSetImpl<const BasicBlock *> &Visited) {
+bool ObjCARCContract::contractAutorelease(Function &F, Instruction *Autorelease,
+ ARCInstKind Class) {
const Value *Arg = GetArgRCIdentityRoot(Autorelease);
// Check that there are no instructions between the retain and the autorelease
// (such as an autorelease_pop) which may change the count.
- CallInst *Retain = nullptr;
- if (Class == ARCInstKind::AutoreleaseRV)
- FindDependencies(RetainAutoreleaseRVDep, Arg,
- Autorelease->getParent(), Autorelease,
- DependingInstructions, Visited, PA);
- else
- FindDependencies(RetainAutoreleaseDep, Arg,
- Autorelease->getParent(), Autorelease,
- DependingInstructions, Visited, PA);
-
- Visited.clear();
- if (DependingInstructions.size() != 1) {
- DependingInstructions.clear();
- return false;
- }
-
- Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
- DependingInstructions.clear();
+ DependenceKind DK = Class == ARCInstKind::AutoreleaseRV
+ ? RetainAutoreleaseRVDep
+ : RetainAutoreleaseDep;
+ auto *Retain = dyn_cast_or_null<CallInst>(
+ findSingleDependency(DK, Arg, Autorelease->getParent(), Autorelease, PA));
if (!Retain || GetBasicARCInstKind(Retain) != ARCInstKind::Retain ||
GetArgRCIdentityRoot(Retain) != Arg)
@@ -204,7 +196,7 @@ bool ObjCARCContract::contractAutorelease(
static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
Instruction *Release,
ProvenanceAnalysis &PA,
- AliasAnalysis *AA) {
+ AAResults *AA) {
StoreInst *Store = nullptr;
bool SawRelease = false;
@@ -442,8 +434,7 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(
bool ObjCARCContract::tryToPeepholeInstruction(
Function &F, Instruction *Inst, inst_iterator &Iter,
- SmallPtrSetImpl<Instruction *> &DependingInsts,
- SmallPtrSetImpl<const BasicBlock *> &Visited, bool &TailOkForStoreStrongs,
+ bool &TailOkForStoreStrongs,
const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
// Only these library routines return their argument. In particular,
// objc_retainBlock does not necessarily return its argument.
@@ -454,7 +445,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
return false;
case ARCInstKind::Autorelease:
case ARCInstKind::AutoreleaseRV:
- return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
+ return contractAutorelease(F, Inst, Class);
case ARCInstKind::Retain:
// Attempt to convert retains to retainrvs if they are next to function
// calls.
@@ -485,7 +476,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
--BBI;
} while (IsNoopInstruction(&*BBI));
- if (&*BBI == GetArgRCIdentityRoot(Inst)) {
+ if (GetRCIdentityRoot(&*BBI) == GetArgRCIdentityRoot(Inst)) {
LLVM_DEBUG(dbgs() << "Adding inline asm marker for the return value "
"optimization.\n");
Changed = true;
@@ -542,7 +533,22 @@ bool ObjCARCContract::tryToPeepholeInstruction(
// Top Level Driver
//===----------------------------------------------------------------------===//
-bool ObjCARCContract::runOnFunction(Function &F) {
+bool ObjCARCContract::init(Module &M) {
+ // If nothing in the Module uses ARC, don't do anything.
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ EP.init(&M);
+
+ // Initialize RVInstMarker.
+ const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
+ RVInstMarker = dyn_cast_or_null<MDString>(M.getModuleFlag(MarkerKey));
+
+ return false;
+}
+
+bool ObjCARCContract::run(Function &F, AAResults *A, DominatorTree *D) {
if (!EnableARCOpts)
return false;
@@ -551,10 +557,9 @@ bool ObjCARCContract::runOnFunction(Function &F) {
return false;
Changed = false;
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
+ AA = A;
+ DT = D;
+ PA.setAA(A);
DenseMap<BasicBlock *, ColorVector> BlockColors;
if (F.hasPersonalityFn() &&
@@ -574,9 +579,6 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// For ObjC library calls which return their argument, replace uses of the
// argument with uses of the call return value, if it dominates the use. This
// reduces register pressure.
- SmallPtrSet<Instruction *, 4> DependingInstructions;
- SmallPtrSet<const BasicBlock *, 4> Visited;
-
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) {
Instruction *Inst = &*I++;
@@ -584,8 +586,8 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// First try to peephole Inst. If there is nothing further we can do in
// terms of undoing objc-arc-expand, process the next inst.
- if (tryToPeepholeInstruction(F, Inst, I, DependingInstructions, Visited,
- TailOkForStoreStrongs, BlockColors))
+ if (tryToPeepholeInstruction(F, Inst, I, TailOkForStoreStrongs,
+ BlockColors))
continue;
// Otherwise, try to undo objc-arc-expand.
@@ -720,33 +722,45 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// Misc Pass Manager
//===----------------------------------------------------------------------===//
-char ObjCARCContract::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCContract, "objc-arc-contract",
+char ObjCARCContractLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContractLegacyPass, "objc-arc-contract",
"ObjC ARC contraction", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ObjCARCContract, "objc-arc-contract",
+INITIALIZE_PASS_END(ObjCARCContractLegacyPass, "objc-arc-contract",
"ObjC ARC contraction", false, false)
-void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+void ObjCARCContractLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
-Pass *llvm::createObjCARCContractPass() { return new ObjCARCContract(); }
-
-bool ObjCARCContract::doInitialization(Module &M) {
- // If nothing in the Module uses ARC, don't do anything.
- Run = ModuleHasARC(M);
- if (!Run)
- return false;
+Pass *llvm::createObjCARCContractPass() {
+ return new ObjCARCContractLegacyPass();
+}
- EP.init(&M);
+bool ObjCARCContractLegacyPass::doInitialization(Module &M) {
+ return OCARCC.init(M);
+}
- // Initialize RVInstMarker.
- const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
- RVInstMarker = dyn_cast_or_null<MDString>(M.getModuleFlag(MarkerKey));
+bool ObjCARCContractLegacyPass::runOnFunction(Function &F) {
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return OCARCC.run(F, AA, DT);
+}
- return false;
+PreservedAnalyses ObjCARCContractPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ ObjCARCContract OCAC;
+ OCAC.init(*F.getParent());
+
+ bool Changed = OCAC.run(F, &AM.getResult<AAManager>(F),
+ &AM.getResult<DominatorTreeAnalysis>(F));
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index f8d872a7c995..d2121dcebe91 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -34,57 +35,20 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/ObjCARC.h"
#define DEBUG_TYPE "objc-arc-expand"
-namespace llvm {
- class Module;
-}
-
using namespace llvm;
using namespace llvm::objcarc;
namespace {
- /// Early ARC transformations.
- class ObjCARCExpand : public FunctionPass {
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
-
- /// A flag indicating whether this optimization pass should run.
- bool Run;
-
- public:
- static char ID;
- ObjCARCExpand() : FunctionPass(ID) {
- initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
- }
- };
-}
-
-char ObjCARCExpand::ID = 0;
-INITIALIZE_PASS(ObjCARCExpand,
- "objc-arc-expand", "ObjC ARC expansion", false, false)
-
-Pass *llvm::createObjCARCExpandPass() {
- return new ObjCARCExpand();
-}
-
-void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-}
-
-bool ObjCARCExpand::doInitialization(Module &M) {
- Run = ModuleHasARC(M);
- return false;
-}
-
-bool ObjCARCExpand::runOnFunction(Function &F) {
+static bool runImpl(Function &F) {
if (!EnableARCOpts)
return false;
// If nothing in the Module uses ARC, don't do anything.
- if (!Run)
+ if (!ModuleHasARC(*F.getParent()))
return false;
bool Changed = false;
@@ -126,3 +90,37 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
return Changed;
}
+
+/// Early ARC transformations.
+class ObjCARCExpand : public FunctionPass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+
+public:
+ static char ID;
+ ObjCARCExpand() : FunctionPass(ID) {
+ initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
+ }
+};
+} // namespace
+
+char ObjCARCExpand::ID = 0;
+INITIALIZE_PASS(ObjCARCExpand, "objc-arc-expand", "ObjC ARC expansion", false,
+ false)
+
+Pass *llvm::createObjCARCExpandPass() { return new ObjCARCExpand(); }
+
+void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+bool ObjCARCExpand::runOnFunction(Function &F) { return runImpl(F); }
+
+PreservedAnalyses ObjCARCExpandPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!runImpl(F))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index cb1fa804fa11..1c447499519c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -65,6 +65,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/ObjCARC.h"
#include <cassert>
#include <iterator>
#include <utility>
@@ -480,123 +481,133 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) {
namespace {
/// The main ARC optimization pass.
- class ObjCARCOpt : public FunctionPass {
- bool Changed;
- ProvenanceAnalysis PA;
-
- /// A cache of references to runtime entry point constants.
- ARCRuntimeEntryPoints EP;
-
- /// A cache of MDKinds that can be passed into other functions to propagate
- /// MDKind identifiers.
- ARCMDKindCache MDKindCache;
-
- /// A flag indicating whether this optimization pass should run.
- bool Run;
-
- /// A flag indicating whether the optimization that removes or moves
- /// retain/release pairs should be performed.
- bool DisableRetainReleasePairing = false;
-
- /// Flags which determine whether each of the interesting runtime functions
- /// is in fact used in the current function.
- unsigned UsedInThisFunction;
-
- bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
- void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
- ARCInstKind &Class);
- void OptimizeIndividualCalls(Function &F);
-
- /// Optimize an individual call, optionally passing the
- /// GetArgRCIdentityRoot if it has already been computed.
- void OptimizeIndividualCallImpl(
- Function &F, DenseMap<BasicBlock *, ColorVector> &BlockColors,
- Instruction *Inst, ARCInstKind Class, const Value *Arg);
-
- /// Try to optimize an AutoreleaseRV with a RetainRV or ClaimRV. If the
- /// optimization occurs, returns true to indicate that the caller should
- /// assume the instructions are dead.
- bool OptimizeInlinedAutoreleaseRVCall(
- Function &F, DenseMap<BasicBlock *, ColorVector> &BlockColors,
- Instruction *Inst, const Value *&Arg, ARCInstKind Class,
- Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg);
-
- void CheckForCFGHazards(const BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- BBState &MyStates) const;
- bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB,
- BlotMapVector<Value *, RRInfo> &Retains,
- BBState &MyStates);
- bool VisitBottomUp(BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- BlotMapVector<Value *, RRInfo> &Retains);
- bool VisitInstructionTopDown(Instruction *Inst,
- DenseMap<Value *, RRInfo> &Releases,
- BBState &MyStates);
- bool VisitTopDown(BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- DenseMap<Value *, RRInfo> &Releases);
- bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates,
- BlotMapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases);
-
- void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
- BlotMapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts, Module *M);
-
- bool
- PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
- BlotMapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases, Module *M,
- Instruction * Retain,
- SmallVectorImpl<Instruction *> &DeadInsts,
- RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
- Value *Arg, bool KnownSafe,
- bool &AnyPairsCompletelyEliminated);
-
- bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
- BlotMapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases, Module *M);
-
- void OptimizeWeakCalls(Function &F);
-
- bool OptimizeSequences(Function &F);
-
- void OptimizeReturns(Function &F);
+class ObjCARCOpt {
+ bool Changed;
+ ProvenanceAnalysis PA;
+
+ /// A cache of references to runtime entry point constants.
+ ARCRuntimeEntryPoints EP;
+
+ /// A cache of MDKinds that can be passed into other functions to propagate
+ /// MDKind identifiers.
+ ARCMDKindCache MDKindCache;
+
+ /// A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// A flag indicating whether the optimization that removes or moves
+ /// retain/release pairs should be performed.
+ bool DisableRetainReleasePairing = false;
+
+ /// Flags which determine whether each of the interesting runtime functions
+ /// is in fact used in the current function.
+ unsigned UsedInThisFunction;
+
+ bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
+ void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
+ ARCInstKind &Class);
+ void OptimizeIndividualCalls(Function &F);
+
+ /// Optimize an individual call, optionally passing the
+ /// GetArgRCIdentityRoot if it has already been computed.
+ void OptimizeIndividualCallImpl(
+ Function &F, DenseMap<BasicBlock *, ColorVector> &BlockColors,
+ Instruction *Inst, ARCInstKind Class, const Value *Arg);
+
+ /// Try to optimize an AutoreleaseRV with a RetainRV or ClaimRV. If the
+ /// optimization occurs, returns true to indicate that the caller should
+ /// assume the instructions are dead.
+ bool OptimizeInlinedAutoreleaseRVCall(
+ Function &F, DenseMap<BasicBlock *, ColorVector> &BlockColors,
+ Instruction *Inst, const Value *&Arg, ARCInstKind Class,
+ Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg);
+
+ void CheckForCFGHazards(const BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BBState &MyStates) const;
+ bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates);
+ bool VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains);
+ bool VisitInstructionTopDown(Instruction *Inst,
+ DenseMap<Value *, RRInfo> &Releases,
+ BBState &MyStates);
+ bool VisitTopDown(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ DenseMap<Value *, RRInfo> &Releases);
+ bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases);
+
+ void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ SmallVectorImpl<Instruction *> &DeadInsts, Module *M);
+
+ bool PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M,
+ Instruction *Retain,
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+ Value *Arg, bool KnownSafe,
+ bool &AnyPairsCompletelyEliminated);
+
+ bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M);
+
+ void OptimizeWeakCalls(Function &F);
+
+ bool OptimizeSequences(Function &F);
+
+ void OptimizeReturns(Function &F);
#ifndef NDEBUG
- void GatherStatistics(Function &F, bool AfterOptimization = false);
+ void GatherStatistics(Function &F, bool AfterOptimization = false);
#endif
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
- void releaseMemory() override;
-
public:
- static char ID;
-
- ObjCARCOpt() : FunctionPass(ID) {
- initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
- }
- };
+ void init(Module &M);
+ bool run(Function &F, AAResults &AA);
+ void releaseMemory();
+};
+
+/// The main ARC optimization pass.
+class ObjCARCOptLegacyPass : public FunctionPass {
+public:
+ ObjCARCOptLegacyPass() : FunctionPass(ID) {
+ initializeObjCARCOptLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool doInitialization(Module &M) override {
+ OCAO.init(M);
+ return false;
+ }
+ bool runOnFunction(Function &F) override {
+ return OCAO.run(F, getAnalysis<AAResultsWrapperPass>().getAAResults());
+ }
+ void releaseMemory() override { OCAO.releaseMemory(); }
+ static char ID;
+private:
+ ObjCARCOpt OCAO;
+};
} // end anonymous namespace
-char ObjCARCOpt::ID = 0;
+char ObjCARCOptLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCOpt,
- "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_BEGIN(ObjCARCOptLegacyPass, "objc-arc", "ObjC ARC optimization",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass)
-INITIALIZE_PASS_END(ObjCARCOpt,
- "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_END(ObjCARCOptLegacyPass, "objc-arc", "ObjC ARC optimization",
+ false, false)
-Pass *llvm::createObjCARCOptPass() {
- return new ObjCARCOpt();
-}
+Pass *llvm::createObjCARCOptPass() { return new ObjCARCOptLegacyPass(); }
-void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+void ObjCARCOptLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<ObjCARCAAWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
// ARC optimization doesn't currently split critical edges.
@@ -664,7 +675,7 @@ bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall(
SmallVector<const Value *, 4> ArgUsers;
getEquivalentPHIs(*PN, ArgUsers);
- if (llvm::find(ArgUsers, AutoreleaseRVArg) == ArgUsers.end())
+ if (!llvm::is_contained(ArgUsers, AutoreleaseRVArg))
return false;
}
@@ -1114,8 +1125,7 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
if (!HasNull)
continue;
- SmallPtrSet<Instruction *, 4> DependingInstructions;
- SmallPtrSet<const BasicBlock *, 4> Visited;
+ Instruction *DepInst = nullptr;
// Check that there is nothing that cares about the reference
// count between the call and the phi.
@@ -1127,13 +1137,13 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
case ARCInstKind::Release:
// These can't be moved across things that care about the retain
// count.
- FindDependencies(NeedsPositiveRetainCount, Arg, Inst->getParent(), Inst,
- DependingInstructions, Visited, PA);
+ DepInst = findSingleDependency(NeedsPositiveRetainCount, Arg,
+ Inst->getParent(), Inst, PA);
break;
case ARCInstKind::Autorelease:
// These can't be moved across autorelease pool scope boundaries.
- FindDependencies(AutoreleasePoolBoundary, Arg, Inst->getParent(), Inst,
- DependingInstructions, Visited, PA);
+ DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg,
+ Inst->getParent(), Inst, PA);
break;
case ARCInstKind::ClaimRV:
case ARCInstKind::RetainRV:
@@ -1147,9 +1157,7 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
llvm_unreachable("Invalid dependence flavor");
}
- if (DependingInstructions.size() != 1)
- continue;
- if (*DependingInstructions.begin() != PN)
+ if (DepInst != PN)
continue;
Changed = true;
@@ -2223,25 +2231,21 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
/// Check if there is a dependent call earlier that does not have anything in
/// between the Retain and the call that can affect the reference count of their
/// shared pointer argument. Note that Retain need not be in BB.
-static bool
-HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
- SmallPtrSetImpl<Instruction *> &DepInsts,
- SmallPtrSetImpl<const BasicBlock *> &Visited,
- ProvenanceAnalysis &PA) {
- FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
- DepInsts, Visited, PA);
- if (DepInsts.size() != 1)
- return false;
-
- auto *Call = dyn_cast_or_null<CallInst>(*DepInsts.begin());
+static CallInst *HasSafePathToPredecessorCall(const Value *Arg,
+ Instruction *Retain,
+ ProvenanceAnalysis &PA) {
+ auto *Call = dyn_cast_or_null<CallInst>(findSingleDependency(
+ CanChangeRetainCount, Arg, Retain->getParent(), Retain, PA));
// Check that the pointer is the return value of the call.
if (!Call || Arg != Call)
- return false;
+ return nullptr;
// Check that the call is a regular call.
ARCInstKind Class = GetBasicARCInstKind(Call);
- return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call;
+ return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call
+ ? Call
+ : nullptr;
}
/// Find a dependent retain that precedes the given autorelease for which there
@@ -2250,15 +2254,9 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
static CallInst *
FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
Instruction *Autorelease,
- SmallPtrSetImpl<Instruction *> &DepInsts,
- SmallPtrSetImpl<const BasicBlock *> &Visited,
ProvenanceAnalysis &PA) {
- FindDependencies(CanChangeRetainCount, Arg,
- BB, Autorelease, DepInsts, Visited, PA);
- if (DepInsts.size() != 1)
- return nullptr;
-
- auto *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin());
+ auto *Retain = dyn_cast_or_null<CallInst>(
+ findSingleDependency(CanChangeRetainCount, Arg, BB, Autorelease, PA));
// Check that we found a retain with the same argument.
if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) ||
@@ -2275,15 +2273,11 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
static CallInst *
FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
ReturnInst *Ret,
- SmallPtrSetImpl<Instruction *> &DepInsts,
- SmallPtrSetImpl<const BasicBlock *> &V,
ProvenanceAnalysis &PA) {
- FindDependencies(NeedsPositiveRetainCount, Arg,
- BB, Ret, DepInsts, V, PA);
- if (DepInsts.size() != 1)
- return nullptr;
+ SmallPtrSet<Instruction *, 4> DepInsts;
+ auto *Autorelease = dyn_cast_or_null<CallInst>(
+ findSingleDependency(NeedsPositiveRetainCount, Arg, BB, Ret, PA));
- auto *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin());
if (!Autorelease)
return nullptr;
ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease);
@@ -2309,8 +2303,6 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
- SmallPtrSet<Instruction *, 4> DependingInstructions;
- SmallPtrSet<const BasicBlock *, 4> Visited;
for (BasicBlock &BB: F) {
ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back());
if (!Ret)
@@ -2323,40 +2315,27 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
// Look for an ``autorelease'' instruction that is a predecessor of Ret and
// dependent on Arg such that there are no instructions dependent on Arg
// that need a positive ref count in between the autorelease and Ret.
- CallInst *Autorelease = FindPredecessorAutoreleaseWithSafePath(
- Arg, &BB, Ret, DependingInstructions, Visited, PA);
- DependingInstructions.clear();
- Visited.clear();
+ CallInst *Autorelease =
+ FindPredecessorAutoreleaseWithSafePath(Arg, &BB, Ret, PA);
if (!Autorelease)
continue;
CallInst *Retain = FindPredecessorRetainWithSafePath(
- Arg, Autorelease->getParent(), Autorelease, DependingInstructions,
- Visited, PA);
- DependingInstructions.clear();
- Visited.clear();
+ Arg, Autorelease->getParent(), Autorelease, PA);
if (!Retain)
continue;
// Check that there is nothing that can affect the reference count
// between the retain and the call. Note that Retain need not be in BB.
- bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain,
- DependingInstructions,
- Visited, PA);
+ CallInst *Call = HasSafePathToPredecessorCall(Arg, Retain, PA);
// Don't remove retainRV/autoreleaseRV pairs if the call isn't a tail call.
- if (HasSafePathToCall &&
- GetBasicARCInstKind(Retain) == ARCInstKind::RetainRV &&
- GetBasicARCInstKind(Autorelease) == ARCInstKind::AutoreleaseRV &&
- !cast<CallInst>(*DependingInstructions.begin())->isTailCall())
- continue;
-
- DependingInstructions.clear();
- Visited.clear();
-
- if (!HasSafePathToCall)
+ if (!Call ||
+ (!Call->isTailCall() &&
+ GetBasicARCInstKind(Retain) == ARCInstKind::RetainRV &&
+ GetBasicARCInstKind(Autorelease) == ARCInstKind::AutoreleaseRV))
continue;
// If so, we can zap the retain and autorelease.
@@ -2393,14 +2372,14 @@ ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
}
#endif
-bool ObjCARCOpt::doInitialization(Module &M) {
+void ObjCARCOpt::init(Module &M) {
if (!EnableARCOpts)
- return false;
+ return;
// If nothing in the Module uses ARC, don't do anything.
Run = ModuleHasARC(M);
if (!Run)
- return false;
+ return;
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
@@ -2409,11 +2388,9 @@ bool ObjCARCOpt::doInitialization(Module &M) {
// Initialize our runtime entry point cache.
EP.init(&M);
-
- return false;
}
-bool ObjCARCOpt::runOnFunction(Function &F) {
+bool ObjCARCOpt::run(Function &F, AAResults &AA) {
if (!EnableARCOpts)
return false;
@@ -2427,7 +2404,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
<< " >>>"
"\n");
- PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
+ PA.setAA(&AA);
#ifndef NDEBUG
if (AreStatisticsEnabled()) {
@@ -2484,3 +2461,17 @@ void ObjCARCOpt::releaseMemory() {
/// @}
///
+
+PreservedAnalyses ObjCARCOptPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ ObjCARCOpt OCAO;
+ OCAO.init(*F.getParent());
+
+ bool Changed = OCAO.run(F, AM.getResult<AAManager>(F));
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index c6138edba95a..3d59b2edc55c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -40,22 +40,19 @@ using namespace llvm::objcarc;
bool ProvenanceAnalysis::relatedSelect(const SelectInst *A,
const Value *B) {
- const DataLayout &DL = A->getModule()->getDataLayout();
// If the values are Selects with the same condition, we can do a more precise
// check: just check for relations between the values on corresponding arms.
if (const SelectInst *SB = dyn_cast<SelectInst>(B))
if (A->getCondition() == SB->getCondition())
- return related(A->getTrueValue(), SB->getTrueValue(), DL) ||
- related(A->getFalseValue(), SB->getFalseValue(), DL);
+ return related(A->getTrueValue(), SB->getTrueValue()) ||
+ related(A->getFalseValue(), SB->getFalseValue());
// Check both arms of the Select node individually.
- return related(A->getTrueValue(), B, DL) ||
- related(A->getFalseValue(), B, DL);
+ return related(A->getTrueValue(), B) || related(A->getFalseValue(), B);
}
bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
const Value *B) {
- const DataLayout &DL = A->getModule()->getDataLayout();
// If the values are PHIs in the same block, we can do a more precise as well
// as efficient check: just check for relations between the values on
// corresponding edges.
@@ -63,7 +60,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
if (PNB->getParent() == A->getParent()) {
for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
if (related(A->getIncomingValue(i),
- PNB->getIncomingValueForBlock(A->getIncomingBlock(i)), DL))
+ PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
return true;
return false;
}
@@ -71,7 +68,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
// Check each unique source of the PHI node against B.
SmallPtrSet<const Value *, 4> UniqueSrc;
for (Value *PV1 : A->incoming_values()) {
- if (UniqueSrc.insert(PV1).second && related(PV1, B, DL))
+ if (UniqueSrc.insert(PV1).second && related(PV1, B))
return true;
}
@@ -112,8 +109,7 @@ static bool IsStoredObjCPointer(const Value *P) {
return false;
}
-bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B,
- const DataLayout &DL) {
+bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
// Ask regular AliasAnalysis, for a first approximation.
switch (AA->alias(A, B)) {
case NoAlias:
@@ -160,10 +156,9 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B,
return true;
}
-bool ProvenanceAnalysis::related(const Value *A, const Value *B,
- const DataLayout &DL) {
- A = GetUnderlyingObjCPtrCached(A, DL, UnderlyingObjCPtrCache);
- B = GetUnderlyingObjCPtrCached(B, DL, UnderlyingObjCPtrCache);
+bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
+ A = GetUnderlyingObjCPtrCached(A, UnderlyingObjCPtrCache);
+ B = GetUnderlyingObjCPtrCached(B, UnderlyingObjCPtrCache);
// Quick check.
if (A == B)
@@ -178,7 +173,7 @@ bool ProvenanceAnalysis::related(const Value *A, const Value *B,
if (!Pair.second)
return Pair.first->second;
- bool Result = relatedCheck(A, B, DL);
+ bool Result = relatedCheck(A, B);
CachedResults[ValuePairTy(A, B)] = Result;
return Result;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 8fd842fd42d6..a63e356ce1fc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -26,12 +26,12 @@
#define LLVM_LIB_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/ValueHandle.h"
#include <utility>
namespace llvm {
+class AAResults;
class DataLayout;
class PHINode;
class SelectInst;
@@ -49,7 +49,7 @@ namespace objcarc {
/// not two pointers have the same provenance source and thus could
/// potentially be related.
class ProvenanceAnalysis {
- AliasAnalysis *AA;
+ AAResults *AA;
using ValuePairTy = std::pair<const Value *, const Value *>;
using CachedResultsTy = DenseMap<ValuePairTy, bool>;
@@ -58,7 +58,7 @@ class ProvenanceAnalysis {
DenseMap<const Value *, WeakTrackingVH> UnderlyingObjCPtrCache;
- bool relatedCheck(const Value *A, const Value *B, const DataLayout &DL);
+ bool relatedCheck(const Value *A, const Value *B);
bool relatedSelect(const SelectInst *A, const Value *B);
bool relatedPHI(const PHINode *A, const Value *B);
@@ -67,11 +67,11 @@ public:
ProvenanceAnalysis(const ProvenanceAnalysis &) = delete;
ProvenanceAnalysis &operator=(const ProvenanceAnalysis &) = delete;
- void setAA(AliasAnalysis *aa) { AA = aa; }
+ void setAA(AAResults *aa) { AA = aa; }
- AliasAnalysis *getAA() const { return AA; }
+ AAResults *getAA() const { return AA; }
- bool related(const Value *A, const Value *B, const DataLayout &DL);
+ bool related(const Value *A, const Value *B);
void clear() {
CachedResults.clear();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 99a2055aba94..6fdfe787d438 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -12,6 +12,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -66,7 +67,6 @@ bool PAEval::runOnFunction(Function &F) {
ProvenanceAnalysis PA;
PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
- const DataLayout &DL = F.getParent()->getDataLayout();
for (Value *V1 : Values) {
StringRef NameV1 = getName(V1);
@@ -75,7 +75,7 @@ bool PAEval::runOnFunction(Function &F) {
if (NameV1 >= NameV2)
continue;
errs() << NameV1 << " and " << NameV2;
- if (PA.related(V1, V2, DL))
+ if (PA.related(V1, V2))
errs() << " are related.\n";
else
errs() << " are not related.\n";
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/PtrState.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/PtrState.cpp
index 26dd416d6184..6071ec3e4d91 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/PtrState.cpp
@@ -232,7 +232,7 @@ bool BottomUpPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
Sequence S = GetSeq();
// Check for possible releases.
- if (!CanAlterRefCount(Inst, Ptr, PA, Class))
+ if (!CanDecrementRefCount(Inst, Ptr, PA, Class))
return false;
LLVM_DEBUG(dbgs() << " CanAlterRefCount: Seq: " << S << "; "
@@ -383,7 +383,7 @@ bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
ARCInstKind Class) {
// Check for possible releases. Treat clang.arc.use as a releasing instruction
// to prevent sinking a retain past it.
- if (!CanAlterRefCount(Inst, Ptr, PA, Class) &&
+ if (!CanDecrementRefCount(Inst, Ptr, PA, Class) &&
Class != ARCInstKind::IntrinsicUser)
return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
index c3709b9afffb..ce4e5e575fbf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -325,7 +325,7 @@ void AggressiveDeadCodeElimination::initialize() {
bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
// TODO -- use llvm::isInstructionTriviallyDead
- if (I.isEHPad() || I.mayHaveSideEffects()) {
+ if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) {
// Skip any value profile instrumentation calls if they are
// instrumenting constants.
if (isInstrumentsConstant(I))
@@ -643,7 +643,7 @@ void AggressiveDeadCodeElimination::computeReversePostOrder() {
SmallPtrSet<BasicBlock*, 16> Visited;
unsigned PostOrder = 0;
for (auto &BB : F) {
- if (succ_begin(&BB) != succ_end(&BB))
+ if (!succ_empty(&BB))
continue;
for (BasicBlock *Block : inverse_post_order_ext(&BB,Visited))
BlockInfo[Block].PostOrder = PostOrder++;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 5c008585869c..bccf94fc217f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -15,6 +15,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#define AA_NAME "alignment-from-assumptions"
#define DEBUG_TYPE AA_NAME
@@ -203,103 +204,33 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
}
bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
+ unsigned Idx,
Value *&AAPtr,
const SCEV *&AlignSCEV,
const SCEV *&OffSCEV) {
- // An alignment assume must be a statement about the least-significant
- // bits of the pointer being zero, possibly with some offset.
- ICmpInst *ICI = dyn_cast<ICmpInst>(I->getArgOperand(0));
- if (!ICI)
+ Type *Int64Ty = Type::getInt64Ty(I->getContext());
+ OperandBundleUse AlignOB = I->getOperandBundleAt(Idx);
+ if (AlignOB.getTagName() != "align")
return false;
-
- // This must be an expression of the form: x & m == 0.
- if (ICI->getPredicate() != ICmpInst::ICMP_EQ)
- return false;
-
- // Swap things around so that the RHS is 0.
- Value *CmpLHS = ICI->getOperand(0);
- Value *CmpRHS = ICI->getOperand(1);
- const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS);
- const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS);
- if (CmpLHSSCEV->isZero())
- std::swap(CmpLHS, CmpRHS);
- else if (!CmpRHSSCEV->isZero())
- return false;
-
- BinaryOperator *CmpBO = dyn_cast<BinaryOperator>(CmpLHS);
- if (!CmpBO || CmpBO->getOpcode() != Instruction::And)
- return false;
-
- // Swap things around so that the right operand of the and is a constant
- // (the mask); we cannot deal with variable masks.
- Value *AndLHS = CmpBO->getOperand(0);
- Value *AndRHS = CmpBO->getOperand(1);
- const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS);
- const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS);
- if (isa<SCEVConstant>(AndLHSSCEV)) {
- std::swap(AndLHS, AndRHS);
- std::swap(AndLHSSCEV, AndRHSSCEV);
- }
-
- const SCEVConstant *MaskSCEV = dyn_cast<SCEVConstant>(AndRHSSCEV);
- if (!MaskSCEV)
- return false;
-
- // The mask must have some trailing ones (otherwise the condition is
- // trivial and tells us nothing about the alignment of the left operand).
- unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes();
- if (!TrailingOnes)
- return false;
-
- // Cap the alignment at the maximum with which LLVM can deal (and make sure
- // we don't overflow the shift).
- uint64_t Alignment;
- TrailingOnes = std::min(TrailingOnes,
- unsigned(sizeof(unsigned) * CHAR_BIT - 1));
- Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment);
-
- Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext());
- AlignSCEV = SE->getConstant(Int64Ty, Alignment);
-
- // The LHS might be a ptrtoint instruction, or it might be the pointer
- // with an offset.
- AAPtr = nullptr;
- OffSCEV = nullptr;
- if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
- AAPtr = PToI->getPointerOperand();
+ assert(AlignOB.Inputs.size() >= 2);
+ AAPtr = AlignOB.Inputs[0].get();
+ // TODO: Consider accumulating the offset to the base.
+ AAPtr = AAPtr->stripPointerCastsSameRepresentation();
+ AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
+ AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
+ if (AlignOB.Inputs.size() == 3)
+ OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
+ else
OffSCEV = SE->getZero(Int64Ty);
- } else if (const SCEVAddExpr* AndLHSAddSCEV =
- dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
- // Try to find the ptrtoint; subtract it and the rest is the offset.
- for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
- JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
- if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J))
- if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue())) {
- AAPtr = PToI->getPointerOperand();
- OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
- break;
- }
- }
-
- if (!AAPtr)
- return false;
-
- // Sign extend the offset to 64 bits (so that it is like all of the other
- // expressions).
- unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
- if (OffSCEVBits < 64)
- OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
- else if (OffSCEVBits > 64)
- return false;
-
- AAPtr = AAPtr->stripPointerCasts();
+ OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty);
return true;
}
-bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
+bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
+ unsigned Idx) {
Value *AAPtr;
const SCEV *AlignSCEV, *OffSCEV;
- if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
+ if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV))
return false;
// Skip ConstantPointerNull and UndefValue. Assumptions on these shouldn't
@@ -317,13 +248,14 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
continue;
if (Instruction *K = dyn_cast<Instruction>(J))
- if (isValidAssumeForContext(ACall, K, DT))
WorkList.push_back(K);
}
while (!WorkList.empty()) {
Instruction *J = WorkList.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
+ if (!isValidAssumeForContext(ACall, J, DT))
+ continue;
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
LI->getPointerOperand(), SE);
if (NewAlignment > LI->getAlign()) {
@@ -331,6 +263,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
++NumLoadAlignChanged;
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
+ if (!isValidAssumeForContext(ACall, J, DT))
+ continue;
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
SI->getPointerOperand(), SE);
if (NewAlignment > SI->getAlign()) {
@@ -338,6 +272,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
++NumStoreAlignChanged;
}
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
+ if (!isValidAssumeForContext(ACall, J, DT))
+ continue;
Align NewDestAlignment =
getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE);
@@ -369,7 +305,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
Visited.insert(J);
for (User *UJ : J->users()) {
Instruction *K = cast<Instruction>(UJ);
- if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT))
+ if (!Visited.count(K))
WorkList.push_back(K);
}
}
@@ -396,8 +332,11 @@ bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
bool Changed = false;
for (auto &AssumeVH : AC.assumptions())
- if (AssumeVH)
- Changed |= processAssumption(cast<CallInst>(AssumeVH));
+ if (AssumeVH) {
+ CallInst *Call = cast<CallInst>(AssumeVH);
+ for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++)
+ Changed |= processAssumption(Call, Idx);
+ }
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
new file mode 100644
index 000000000000..a02d88fe066f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
@@ -0,0 +1,90 @@
+//===-- AnnotationRemarks.cpp - Generate remarks for annotated instrs. ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generate remarks for instructions marked with !annotation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+using namespace llvm::ore;
+
+#define DEBUG_TYPE "annotation-remarks"
+#define REMARK_PASS DEBUG_TYPE
+
+static void runImpl(Function &F) {
+ if (!OptimizationRemarkEmitter::allowExtraAnalysis(F, REMARK_PASS))
+ return;
+
+ OptimizationRemarkEmitter ORE(&F);
+ // For now, just generate a summary of the annotated instructions.
+ MapVector<StringRef, unsigned> Mapping;
+ for (Instruction &I : instructions(F)) {
+ if (!I.hasMetadata(LLVMContext::MD_annotation))
+ continue;
+ for (const MDOperand &Op :
+ I.getMetadata(LLVMContext::MD_annotation)->operands()) {
+ auto Iter = Mapping.insert({cast<MDString>(Op.get())->getString(), 0});
+ Iter.first->second++;
+ }
+ }
+
+ Instruction *IP = &*F.begin()->begin();
+ for (const auto &KV : Mapping)
+ ORE.emit(OptimizationRemarkAnalysis(REMARK_PASS, "AnnotationSummary", IP)
+ << "Annotated " << NV("count", KV.second) << " instructions with "
+ << NV("type", KV.first));
+}
+
+namespace {
+
+struct AnnotationRemarksLegacy : public FunctionPass {
+ static char ID;
+
+ AnnotationRemarksLegacy() : FunctionPass(ID) {
+ initializeAnnotationRemarksLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ runImpl(F);
+ return false;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+
+} // end anonymous namespace
+
+char AnnotationRemarksLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AnnotationRemarksLegacy, "annotation-remarks",
+ "Annotation Remarks", false, false)
+INITIALIZE_PASS_END(AnnotationRemarksLegacy, "annotation-remarks",
+ "Annotation Remarks", false, false)
+
+FunctionPass *llvm::createAnnotationRemarksLegacyPass() {
+ return new AnnotationRemarksLegacy();
+}
+
+PreservedAnalyses AnnotationRemarksPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ runImpl(F);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index b26bd1114bd4..2eb94b721d96 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -208,7 +208,7 @@ static bool canSplitCallSite(CallBase &CB, TargetTransformInfo &TTI) {
// instructions before the call is less then DuplicationThreshold. The
// instructions before the call will be duplicated in the split blocks and
// corresponding uses will be updated.
- unsigned Cost = 0;
+ InstructionCost Cost = 0;
for (auto &InstBeforeCall :
llvm::make_range(CallSiteBB->begin(), CB.getIterator())) {
Cost += TTI.getInstructionCost(&InstBeforeCall,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 7c14b69d658d..fdab74fc94c5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -366,9 +366,9 @@ void ConstantHoistingPass::collectConstantCandidates(
ConstInt->getValue(), ConstInt->getType(),
TargetTransformInfo::TCK_SizeAndLatency);
else
- Cost = TTI->getIntImmCostInst(Inst->getOpcode(), Idx, ConstInt->getValue(),
- ConstInt->getType(),
- TargetTransformInfo::TCK_SizeAndLatency);
+ Cost = TTI->getIntImmCostInst(
+ Inst->getOpcode(), Idx, ConstInt->getValue(), ConstInt->getType(),
+ TargetTransformInfo::TCK_SizeAndLatency, Inst);
// Ignore cheap integer constants.
if (Cost > TargetTransformInfo::TCC_Basic) {
@@ -418,8 +418,9 @@ void ConstantHoistingPass::collectConstantCandidates(
// usually lowered to a load from constant pool. Such operation is unlikely
// to be cheaper than compute it by <Base + Offset>, which can be lowered to
// an ADD instruction or folded into Load/Store instruction.
- int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy,
- TargetTransformInfo::TCK_SizeAndLatency);
+ int Cost =
+ TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy,
+ TargetTransformInfo::TCK_SizeAndLatency, Inst);
ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
ConstCandMapType::iterator Itr;
bool Inserted;
@@ -950,7 +951,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
// base constant.
if (!ConstIntCandVec.empty())
findBaseConstants(nullptr);
- for (auto &MapEntry : ConstGEPCandMap)
+ for (const auto &MapEntry : ConstGEPCandMap)
if (!MapEntry.second.empty())
findBaseConstants(MapEntry.first);
@@ -959,7 +960,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
bool MadeChange = false;
if (!ConstIntInfoVec.empty())
MadeChange = emitBaseConstants(nullptr);
- for (auto MapEntry : ConstGEPInfoMap)
+ for (const auto &MapEntry : ConstGEPInfoMap)
if (!MapEntry.second.empty())
MadeChange |= emitBaseConstants(MapEntry.first);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantProp.cpp
deleted file mode 100644
index 73bf1d521b1d..000000000000
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements constant propagation and merging:
-//
-// Specifically, this:
-// * Converts instructions like "add int 1, 2" into 3
-//
-// Notice that:
-// * This pass has a habit of making definitions be dead. It is a good idea
-// to run a DIE pass sometime after running this pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/DebugCounter.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "constprop"
-
-STATISTIC(NumInstKilled, "Number of instructions killed");
-DEBUG_COUNTER(CPCounter, "constprop-transform",
- "Controls which instructions are killed");
-
-namespace {
- struct ConstantPropagation : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- ConstantPropagation() : FunctionPass(ID) {
- initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
- };
-}
-
-char ConstantPropagation::ID = 0;
-INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
- "Simple constant propagation", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(ConstantPropagation, "constprop",
- "Simple constant propagation", false, false)
-
-FunctionPass *llvm::createConstantPropagationPass() {
- return new ConstantPropagation();
-}
-
-bool ConstantPropagation::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- // Initialize the worklist to all of the instructions ready to process...
- SmallPtrSet<Instruction *, 16> WorkList;
- // The SmallVector of WorkList ensures that we do iteration at stable order.
- // We use two containers rather than one SetVector, since remove is
- // linear-time, and we don't care enough to remove from Vec.
- SmallVector<Instruction *, 16> WorkListVec;
- for (Instruction &I : instructions(&F)) {
- WorkList.insert(&I);
- WorkListVec.push_back(&I);
- }
-
- bool Changed = false;
- const DataLayout &DL = F.getParent()->getDataLayout();
- TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-
- while (!WorkList.empty()) {
- SmallVector<Instruction*, 16> NewWorkListVec;
- for (auto *I : WorkListVec) {
- WorkList.erase(I); // Remove element from the worklist...
-
- if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
- if (!DebugCounter::shouldExecute(CPCounter))
- continue;
-
- // Add all of the users of this instruction to the worklist, they might
- // be constant propagatable now...
- for (User *U : I->users()) {
- // If user not in the set, then add it to the vector.
- if (WorkList.insert(cast<Instruction>(U)).second)
- NewWorkListVec.push_back(cast<Instruction>(U));
- }
-
- // Replace all of the uses of a variable with uses of the constant.
- I->replaceAllUsesWith(C);
-
- if (isInstructionTriviallyDead(I, TLI)) {
- I->eraseFromParent();
- ++NumInstKilled;
- }
-
- // We made a change to the function...
- Changed = true;
- }
- }
- WorkListVec = std::move(NewWorkListVec);
- }
- return Changed;
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
new file mode 100644
index 000000000000..3b8af6f21ce5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -0,0 +1,407 @@
+//===-- ConstraintElimination.cpp - Eliminate conds using constraints. ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Eliminate conditions based on constraints collected from dominating
+// conditions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/ConstraintElimination.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstraintSystem.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "constraint-elimination"
+
+STATISTIC(NumCondsRemoved, "Number of instructions removed");
+DEBUG_COUNTER(EliminatedCounter, "conds-eliminated",
+ "Controls which conditions are eliminated");
+
+static int64_t MaxConstraintValue = std::numeric_limits<int64_t>::max();
+
+// Decomposes \p V into a vector of pairs of the form { c, X } where c * X. The
+// sum of the pairs equals \p V. The first pair is the constant-factor and X
+// must be nullptr. If the expression cannot be decomposed, returns an empty
+// vector.
+static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->isNegative() || CI->uge(MaxConstraintValue))
+ return {};
+ return {{CI->getSExtValue(), nullptr}};
+ }
+ auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (GEP && GEP->getNumOperands() == 2) {
+ if (isa<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1))) {
+ return {{cast<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1))
+ ->getSExtValue(),
+ nullptr},
+ {1, GEP->getPointerOperand()}};
+ }
+ Value *Op0;
+ ConstantInt *CI;
+ if (match(GEP->getOperand(GEP->getNumOperands() - 1),
+ m_NUWShl(m_Value(Op0), m_ConstantInt(CI))))
+ return {{0, nullptr},
+ {1, GEP->getPointerOperand()},
+ {std::pow(int64_t(2), CI->getSExtValue()), Op0}};
+ if (match(GEP->getOperand(GEP->getNumOperands() - 1),
+ m_ZExt(m_NUWShl(m_Value(Op0), m_ConstantInt(CI)))))
+ return {{0, nullptr},
+ {1, GEP->getPointerOperand()},
+ {std::pow(int64_t(2), CI->getSExtValue()), Op0}};
+
+ return {{0, nullptr},
+ {1, GEP->getPointerOperand()},
+ {1, GEP->getOperand(GEP->getNumOperands() - 1)}};
+ }
+
+ Value *Op0;
+ Value *Op1;
+ ConstantInt *CI;
+ if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI))))
+ return {{CI->getSExtValue(), nullptr}, {1, Op0}};
+ if (match(V, m_NUWAdd(m_Value(Op0), m_Value(Op1))))
+ return {{0, nullptr}, {1, Op0}, {1, Op1}};
+
+ if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))))
+ return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}};
+ if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
+ return {{0, nullptr}, {1, Op0}, {1, Op1}};
+
+ return {{0, nullptr}, {1, V}};
+}
+
+/// Turn a condition \p CmpI into a constraint vector, using indices from \p
+/// Value2Index. If \p ShouldAdd is true, new indices are added for values not
+/// yet in \p Value2Index.
+static SmallVector<int64_t, 8>
+getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
+ DenseMap<Value *, unsigned> &Value2Index, bool ShouldAdd) {
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+
+ auto TryToGetIndex = [ShouldAdd,
+ &Value2Index](Value *V) -> Optional<unsigned> {
+ if (ShouldAdd) {
+ Value2Index.insert({V, Value2Index.size() + 1});
+ return Value2Index[V];
+ }
+ auto I = Value2Index.find(V);
+ if (I == Value2Index.end())
+ return None;
+ return I->second;
+ };
+
+ if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE)
+ return getConstraint(CmpInst::getSwappedPredicate(Pred), Op1, Op0,
+ Value2Index, ShouldAdd);
+
+ // Only ULE and ULT predicates are supported at the moment.
+ if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT)
+ return {};
+
+ auto ADec = decompose(Op0);
+ auto BDec = decompose(Op1);
+ // Skip if decomposing either of the values failed.
+ if (ADec.empty() || BDec.empty())
+ return {};
+
+ // Skip trivial constraints without any variables.
+ if (ADec.size() == 1 && BDec.size() == 1)
+ return {};
+
+ Offset1 = ADec[0].first;
+ Offset2 = BDec[0].first;
+ Offset1 *= -1;
+
+ // Create iterator ranges that skip the constant-factor.
+ auto VariablesA = make_range(std::next(ADec.begin()), ADec.end());
+ auto VariablesB = make_range(std::next(BDec.begin()), BDec.end());
+
+ // Check if each referenced value in the constraint is already in the system
+ // or can be added (if ShouldAdd is true).
+ for (const auto &KV :
+ concat<std::pair<int64_t, Value *>>(VariablesA, VariablesB))
+ if (!TryToGetIndex(KV.second))
+ return {};
+
+ // Build result constraint, by first adding all coefficients from A and then
+ // subtracting all coefficients from B.
+ SmallVector<int64_t, 8> R(Value2Index.size() + 1, 0);
+ for (const auto &KV : VariablesA)
+ R[Value2Index[KV.second]] += KV.first;
+
+ for (const auto &KV : VariablesB)
+ R[Value2Index[KV.second]] -= KV.first;
+
+ R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0);
+ return R;
+}
+
+static SmallVector<int64_t, 8>
+getConstraint(CmpInst *Cmp, DenseMap<Value *, unsigned> &Value2Index,
+ bool ShouldAdd) {
+ return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0),
+ Cmp->getOperand(1), Value2Index, ShouldAdd);
+}
+
+namespace {
+/// Represents either a condition that holds on entry to a block or a basic
+/// block, with their respective Dominator DFS in and out numbers.
+struct ConstraintOrBlock {
+ unsigned NumIn;
+ unsigned NumOut;
+ bool IsBlock;
+ bool Not;
+ union {
+ BasicBlock *BB;
+ CmpInst *Condition;
+ };
+
+ ConstraintOrBlock(DomTreeNode *DTN)
+ : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(true),
+ BB(DTN->getBlock()) {}
+ ConstraintOrBlock(DomTreeNode *DTN, CmpInst *Condition, bool Not)
+ : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(false),
+ Not(Not), Condition(Condition) {}
+};
+
+struct StackEntry {
+ unsigned NumIn;
+ unsigned NumOut;
+ CmpInst *Condition;
+ bool IsNot;
+
+ StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot)
+ : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot) {}
+};
+} // namespace
+
+static bool eliminateConstraints(Function &F, DominatorTree &DT) {
+ bool Changed = false;
+ DT.updateDFSNumbers();
+ ConstraintSystem CS;
+
+ SmallVector<ConstraintOrBlock, 64> WorkList;
+
+ // First, collect conditions implied by branches and blocks with their
+ // Dominator DFS in and out numbers.
+ for (BasicBlock &BB : F) {
+ if (!DT.getNode(&BB))
+ continue;
+ WorkList.emplace_back(DT.getNode(&BB));
+
+ auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
+ if (!Br || !Br->isConditional())
+ continue;
+
+ // If the condition is an OR of 2 compares and the false successor only has
+ // the current block as predecessor, queue both negated conditions for the
+ // false successor.
+ Value *Op0, *Op1;
+ if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) &&
+ match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
+ BasicBlock *FalseSuccessor = Br->getSuccessor(1);
+ if (FalseSuccessor->getSinglePredecessor()) {
+ WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op0),
+ true);
+ WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op1),
+ true);
+ }
+ continue;
+ }
+
+ // If the condition is an AND of 2 compares and the true successor only has
+ // the current block as predecessor, queue both conditions for the true
+ // successor.
+ if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) &&
+ match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
+ BasicBlock *TrueSuccessor = Br->getSuccessor(0);
+ if (TrueSuccessor->getSinglePredecessor()) {
+ WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op0),
+ false);
+ WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op1),
+ false);
+ }
+ continue;
+ }
+
+ auto *CmpI = dyn_cast<CmpInst>(Br->getCondition());
+ if (!CmpI)
+ continue;
+ if (Br->getSuccessor(0)->getSinglePredecessor())
+ WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false);
+ if (Br->getSuccessor(1)->getSinglePredecessor())
+ WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true);
+ }
+
+ // Next, sort worklist by dominance, so that dominating blocks and conditions
+ // come before blocks and conditions dominated by them. If a block and a
+ // condition have the same numbers, the condition comes before the block, as
+ // it holds on entry to the block.
+ sort(WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) {
+ return std::tie(A.NumIn, A.IsBlock) < std::tie(B.NumIn, B.IsBlock);
+ });
+
+ // Finally, process ordered worklist and eliminate implied conditions.
+ SmallVector<StackEntry, 16> DFSInStack;
+ DenseMap<Value *, unsigned> Value2Index;
+ for (ConstraintOrBlock &CB : WorkList) {
+ // First, pop entries from the stack that are out-of-scope for CB. Remove
+ // the corresponding entry from the constraint system.
+ while (!DFSInStack.empty()) {
+ auto &E = DFSInStack.back();
+ LLVM_DEBUG(dbgs() << "Top of stack : " << E.NumIn << " " << E.NumOut
+ << "\n");
+ LLVM_DEBUG(dbgs() << "CB: " << CB.NumIn << " " << CB.NumOut << "\n");
+ assert(E.NumIn <= CB.NumIn);
+ if (CB.NumOut <= E.NumOut)
+ break;
+ LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot
+ << "\n");
+ DFSInStack.pop_back();
+ CS.popLastConstraint();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Processing ";
+ if (CB.IsBlock)
+ dbgs() << *CB.BB;
+ else
+ dbgs() << *CB.Condition;
+ dbgs() << "\n";
+ });
+
+ // For a block, check if any CmpInsts become known based on the current set
+ // of constraints.
+ if (CB.IsBlock) {
+ for (Instruction &I : *CB.BB) {
+ auto *Cmp = dyn_cast<CmpInst>(&I);
+ if (!Cmp)
+ continue;
+ auto R = getConstraint(Cmp, Value2Index, false);
+ if (R.empty() || R.size() == 1)
+ continue;
+ if (CS.isConditionImplied(R)) {
+ if (!DebugCounter::shouldExecute(EliminatedCounter))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Condition " << *Cmp
+ << " implied by dominating constraints\n");
+ LLVM_DEBUG({
+ for (auto &E : reverse(DFSInStack))
+ dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
+ });
+ Cmp->replaceAllUsesWith(
+ ConstantInt::getTrue(F.getParent()->getContext()));
+ NumCondsRemoved++;
+ Changed = true;
+ }
+ if (CS.isConditionImplied(ConstraintSystem::negate(R))) {
+ if (!DebugCounter::shouldExecute(EliminatedCounter))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Condition !" << *Cmp
+ << " implied by dominating constraints\n");
+ LLVM_DEBUG({
+ for (auto &E : reverse(DFSInStack))
+ dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
+ });
+ Cmp->replaceAllUsesWith(
+ ConstantInt::getFalse(F.getParent()->getContext()));
+ NumCondsRemoved++;
+ Changed = true;
+ }
+ }
+ continue;
+ }
+
+ // Otherwise, add the condition to the system and stack, if we can transform
+ // it into a constraint.
+ auto R = getConstraint(CB.Condition, Value2Index, true);
+ if (R.empty())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n");
+ if (CB.Not)
+ R = ConstraintSystem::negate(R);
+
+ // If R has been added to the system, queue it for removal once it goes
+ // out-of-scope.
+ if (CS.addVariableRowFill(R))
+ DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not);
+ }
+
+ return Changed;
+}
+
+PreservedAnalyses ConstraintEliminationPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ if (!eliminateConstraints(F, DT))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<GlobalsAA>();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+namespace {
+
+class ConstraintElimination : public FunctionPass {
+public:
+ static char ID;
+
+ ConstraintElimination() : FunctionPass(ID) {
+ initializeConstraintEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return eliminateConstraints(F, DT);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+};
+
+} // end anonymous namespace
+
+char ConstraintElimination::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ConstraintElimination, "constraint-elimination",
+ "Constraint Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(ConstraintElimination, "constraint-elimination",
+ "Constraint Elimination", false, false)
+
+FunctionPass *llvm::createConstraintEliminationPass() {
+ return new ConstraintElimination();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index cd2f4ca36f3b..b671d68031a8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -58,8 +58,11 @@ STATISTIC(NumMemAccess, "Number of memory access targets propagated");
STATISTIC(NumCmps, "Number of comparisons propagated");
STATISTIC(NumReturns, "Number of return values propagated");
STATISTIC(NumDeadCases, "Number of switch cases removed");
+STATISTIC(NumSDivSRemsNarrowed,
+ "Number of sdivs/srems whose width was decreased");
STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
-STATISTIC(NumUDivs, "Number of udivs whose width was decreased");
+STATISTIC(NumUDivURemsNarrowed,
+ "Number of udivs/urems whose width was decreased");
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumSExt, "Number of sext converted to zext");
@@ -126,7 +129,7 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
if (S->getType()->isVectorTy()) return false;
if (isa<Constant>(S->getCondition())) return false;
- Constant *C = LVI->getConstant(S->getCondition(), S->getParent(), S);
+ Constant *C = LVI->getConstant(S->getCondition(), S);
if (!C) return false;
ConstantInt *CI = dyn_cast<ConstantInt>(C);
@@ -283,7 +286,7 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
if (isa<Constant>(Pointer)) return false;
- Constant *C = LVI->getConstant(Pointer, I->getParent(), I);
+ Constant *C = LVI->getConstant(Pointer, I);
if (!C) return false;
++NumMemAccess;
@@ -301,18 +304,9 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
if (!C)
return false;
- // As a policy choice, we choose not to waste compile time on anything where
- // the comparison is testing local values. While LVI can sometimes reason
- // about such cases, it's not its primary purpose. We do make sure to do
- // the block local query for uses from terminator instructions, but that's
- // handled in the code for each terminator. As an exception, we allow phi
- // nodes, for which LVI can thread the condition into predecessors.
- auto *I = dyn_cast<Instruction>(Op0);
- if (I && I->getParent() == Cmp->getParent() && !isa<PHINode>(I))
- return false;
-
LazyValueInfo::Tristate Result =
- LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp);
+ LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp,
+ /*UseBlockValue=*/true);
if (Result == LazyValueInfo::Unknown)
return false;
@@ -336,15 +330,6 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
Value *Cond = I->getCondition();
BasicBlock *BB = I->getParent();
- // If the condition was defined in same block as the switch then LazyValueInfo
- // currently won't say anything useful about it, though in theory it could.
- if (isa<Instruction>(Cond) && cast<Instruction>(Cond)->getParent() == BB)
- return false;
-
- // If the switch is unreachable then trying to improve it is a waste of time.
- pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
- if (PB == PE) return false;
-
// Analyse each switch case in turn.
bool Changed = false;
DenseMap<BasicBlock*, int> SuccessorsCount;
@@ -357,35 +342,9 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
-
- // Check to see if the switch condition is equal to/not equal to the case
- // value on every incoming edge, equal/not equal being the same each time.
- LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- // Is the switch condition equal to the case value?
- LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
- Cond, Case, *PI,
- BB, SI);
- // Give up on this case if nothing is known.
- if (Value == LazyValueInfo::Unknown) {
- State = LazyValueInfo::Unknown;
- break;
- }
-
- // If this was the first edge to be visited, record that all other edges
- // need to give the same result.
- if (PI == PB) {
- State = Value;
- continue;
- }
-
- // If this case is known to fire for some edges and known not to fire for
- // others then there is nothing we can do - give up.
- if (Value != State) {
- State = LazyValueInfo::Unknown;
- break;
- }
- }
+ LazyValueInfo::Tristate State =
+ LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
+ /* UseBlockValue */ true);
if (State == LazyValueInfo::False) {
// This case never fires - remove it.
@@ -429,10 +388,8 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// See if we can prove that the given binary op intrinsic will not overflow.
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
- ConstantRange LRange = LVI->getConstantRange(
- BO->getLHS(), BO->getParent(), BO);
- ConstantRange RRange = LVI->getConstantRange(
- BO->getRHS(), BO->getParent(), BO);
+ ConstantRange LRange = LVI->getConstantRange(BO->getLHS(), BO);
+ ConstantRange RRange = LVI->getConstantRange(BO->getRHS(), BO);
ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
BO->getBinaryOp(), RRange, BO->getNoWrapKind());
return NWRegion.contains(LRange);
@@ -532,8 +489,6 @@ static void processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) {
/// Infer nonnull attributes for the arguments at the specified callsite.
static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
- SmallVector<unsigned, 4> ArgNos;
- unsigned ArgNo = 0;
if (auto *WO = dyn_cast<WithOverflowInst>(&CB)) {
if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
@@ -549,6 +504,8 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
}
}
+ bool Changed = false;
+
// Deopt bundle operands are intended to capture state with minimal
// perturbance of the code otherwise. If we can find a constant value for
// any such operand and remove a use of the original value, that's
@@ -557,22 +514,22 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
// idiomatically, appear along rare conditional paths, it's reasonable likely
// we may have a conditional fact with which LVI can fold.
if (auto DeoptBundle = CB.getOperandBundle(LLVMContext::OB_deopt)) {
- bool Progress = false;
for (const Use &ConstU : DeoptBundle->Inputs) {
Use &U = const_cast<Use&>(ConstU);
Value *V = U.get();
if (V->getType()->isVectorTy()) continue;
if (isa<Constant>(V)) continue;
- Constant *C = LVI->getConstant(V, CB.getParent(), &CB);
+ Constant *C = LVI->getConstant(V, &CB);
if (!C) continue;
U.set(C);
- Progress = true;
+ Changed = true;
}
- if (Progress)
- return true;
}
+ SmallVector<unsigned, 4> ArgNos;
+ unsigned ArgNo = 0;
+
for (Value *V : CB.args()) {
PointerType *Type = dyn_cast<PointerType>(V->getType());
// Try to mark pointer typed parameters as non-null. We skip the
@@ -590,7 +547,7 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
assert(ArgNo == CB.arg_size() && "sanity check");
if (ArgNos.empty())
- return false;
+ return Changed;
AttributeList AS = CB.getAttributes();
LLVMContext &Ctx = CB.getContext();
@@ -601,13 +558,79 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
return true;
}
-static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) {
- Constant *Zero = ConstantInt::get(SDI->getType(), 0);
- for (Value *O : SDI->operands()) {
- auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, O, Zero, SDI);
- if (Result != LazyValueInfo::True)
- return false;
+static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+ Constant *Zero = ConstantInt::get(V->getType(), 0);
+ auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, V, Zero, CxtI);
+ return Result == LazyValueInfo::True;
+}
+
+static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+ Constant *Zero = ConstantInt::get(V->getType(), 0);
+ auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI);
+ return Result == LazyValueInfo::True;
+}
+
+enum class Domain { NonNegative, NonPositive, Unknown };
+
+Domain getDomain(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+ if (isNonNegative(V, LVI, CxtI))
+ return Domain::NonNegative;
+ if (isNonPositive(V, LVI, CxtI))
+ return Domain::NonPositive;
+ return Domain::Unknown;
+}
+
+/// Try to shrink a sdiv/srem's width down to the smallest power of two that's
+/// sufficient to contain its operands.
+static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
+ assert(Instr->getOpcode() == Instruction::SDiv ||
+ Instr->getOpcode() == Instruction::SRem);
+ if (Instr->getType()->isVectorTy())
+ return false;
+
+ // Find the smallest power of two bitwidth that's sufficient to hold Instr's
+ // operands.
+ unsigned OrigWidth = Instr->getType()->getIntegerBitWidth();
+
+ // What is the smallest bit width that can accomodate the entire value ranges
+ // of both of the operands?
+ std::array<Optional<ConstantRange>, 2> CRs;
+ unsigned MinSignedBits = 0;
+ for (auto I : zip(Instr->operands(), CRs)) {
+ std::get<1>(I) = LVI->getConstantRange(std::get<0>(I), Instr);
+ MinSignedBits = std::max(std::get<1>(I)->getMinSignedBits(), MinSignedBits);
}
+
+ // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
+ // prove that such a combination is impossible, we need to bump the bitwidth.
+ if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) &&
+ CRs[0]->contains(
+ APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
+ ++MinSignedBits;
+
+ // Don't shrink below 8 bits wide.
+ unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8);
+
+ // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
+ // two.
+ if (NewWidth >= OrigWidth)
+ return false;
+
+ ++NumSDivSRemsNarrowed;
+ IRBuilder<> B{Instr};
+ auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
+ auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
+ Instr->getName() + ".lhs.trunc");
+ auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
+ Instr->getName() + ".rhs.trunc");
+ auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName());
+ auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext");
+ if (auto *BinOp = dyn_cast<BinaryOperator>(BO))
+ if (BinOp->getOpcode() == Instruction::SDiv)
+ BinOp->setIsExact(Instr->isExact());
+
+ Instr->replaceAllUsesWith(Sext);
+ Instr->eraseFromParent();
return true;
}
@@ -621,21 +644,23 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
// Find the smallest power of two bitwidth that's sufficient to hold Instr's
// operands.
- auto OrigWidth = Instr->getType()->getIntegerBitWidth();
- ConstantRange OperandRange(OrigWidth, /*isFullSet=*/false);
+
+ // What is the smallest bit width that can accomodate the entire value ranges
+ // of both of the operands?
+ unsigned MaxActiveBits = 0;
for (Value *Operand : Instr->operands()) {
- OperandRange = OperandRange.unionWith(
- LVI->getConstantRange(Operand, Instr->getParent()));
+ ConstantRange CR = LVI->getConstantRange(Operand, Instr);
+ MaxActiveBits = std::max(CR.getActiveBits(), MaxActiveBits);
}
// Don't shrink below 8 bits wide.
- unsigned NewWidth = std::max<unsigned>(
- PowerOf2Ceil(OperandRange.getUnsignedMax().getActiveBits()), 8);
+ unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8);
+
// NewWidth might be greater than OrigWidth if OrigWidth is not a power of
// two.
- if (NewWidth >= OrigWidth)
+ if (NewWidth >= Instr->getType()->getIntegerBitWidth())
return false;
- ++NumUDivs;
+ ++NumUDivURemsNarrowed;
IRBuilder<> B{Instr};
auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
@@ -654,52 +679,135 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
}
static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
- if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI))
+ assert(SDI->getOpcode() == Instruction::SRem);
+ if (SDI->getType()->isVectorTy())
return false;
+ struct Operand {
+ Value *V;
+ Domain D;
+ };
+ std::array<Operand, 2> Ops;
+
+ for (const auto I : zip(Ops, SDI->operands())) {
+ Operand &Op = std::get<0>(I);
+ Op.V = std::get<1>(I);
+ Op.D = getDomain(Op.V, LVI, SDI);
+ if (Op.D == Domain::Unknown)
+ return false;
+ }
+
+ // We know domains of both of the operands!
++NumSRems;
- auto *BO = BinaryOperator::CreateURem(SDI->getOperand(0), SDI->getOperand(1),
- SDI->getName(), SDI);
- BO->setDebugLoc(SDI->getDebugLoc());
- SDI->replaceAllUsesWith(BO);
+
+ // We need operands to be non-negative, so negate each one that isn't.
+ for (Operand &Op : Ops) {
+ if (Op.D == Domain::NonNegative)
+ continue;
+ auto *BO =
+ BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
+ BO->setDebugLoc(SDI->getDebugLoc());
+ Op.V = BO;
+ }
+
+ auto *URem =
+ BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
+ URem->setDebugLoc(SDI->getDebugLoc());
+
+ Value *Res = URem;
+
+ // If the divident was non-positive, we need to negate the result.
+ if (Ops[0].D == Domain::NonPositive)
+ Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+
+ SDI->replaceAllUsesWith(Res);
SDI->eraseFromParent();
- // Try to process our new urem.
- processUDivOrURem(BO, LVI);
+ // Try to simplify our new urem.
+ processUDivOrURem(URem, LVI);
return true;
}
/// See if LazyValueInfo's ability to exploit edge conditions or range
-/// information is sufficient to prove the both operands of this SDiv are
-/// positive. If this is the case, replace the SDiv with a UDiv. Even for local
+/// information is sufficient to prove the signs of both operands of this SDiv.
+/// If this is the case, replace the SDiv with a UDiv. Even for local
/// conditions, this can sometimes prove conditions instcombine can't by
/// exploiting range information.
static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
- if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI))
+ assert(SDI->getOpcode() == Instruction::SDiv);
+ if (SDI->getType()->isVectorTy())
return false;
+ struct Operand {
+ Value *V;
+ Domain D;
+ };
+ std::array<Operand, 2> Ops;
+
+ for (const auto I : zip(Ops, SDI->operands())) {
+ Operand &Op = std::get<0>(I);
+ Op.V = std::get<1>(I);
+ Op.D = getDomain(Op.V, LVI, SDI);
+ if (Op.D == Domain::Unknown)
+ return false;
+ }
+
+ // We know domains of both of the operands!
++NumSDivs;
- auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1),
- SDI->getName(), SDI);
- BO->setDebugLoc(SDI->getDebugLoc());
- BO->setIsExact(SDI->isExact());
- SDI->replaceAllUsesWith(BO);
+
+ // We need operands to be non-negative, so negate each one that isn't.
+ for (Operand &Op : Ops) {
+ if (Op.D == Domain::NonNegative)
+ continue;
+ auto *BO =
+ BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
+ BO->setDebugLoc(SDI->getDebugLoc());
+ Op.V = BO;
+ }
+
+ auto *UDiv =
+ BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
+ UDiv->setDebugLoc(SDI->getDebugLoc());
+ UDiv->setIsExact(SDI->isExact());
+
+ Value *Res = UDiv;
+
+ // If the operands had two different domains, we need to negate the result.
+ if (Ops[0].D != Ops[1].D)
+ Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+
+ SDI->replaceAllUsesWith(Res);
SDI->eraseFromParent();
// Try to simplify our new udiv.
- processUDivOrURem(BO, LVI);
+ processUDivOrURem(UDiv, LVI);
return true;
}
+static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
+ assert(Instr->getOpcode() == Instruction::SDiv ||
+ Instr->getOpcode() == Instruction::SRem);
+ if (Instr->getType()->isVectorTy())
+ return false;
+
+ if (Instr->getOpcode() == Instruction::SDiv)
+ if (processSDiv(Instr, LVI))
+ return true;
+
+ if (Instr->getOpcode() == Instruction::SRem)
+ if (processSRem(Instr, LVI))
+ return true;
+
+ return narrowSDivOrSRem(Instr, LVI);
+}
+
static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
if (SDI->getType()->isVectorTy())
return false;
- Constant *Zero = ConstantInt::get(SDI->getType(), 0);
- if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, SDI->getOperand(0), Zero, SDI) !=
- LazyValueInfo::True)
+ if (!isNonNegative(SDI->getOperand(0), LVI, SDI))
return false;
++NumAShrs;
@@ -719,9 +827,7 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
Value *Base = SDI->getOperand(0);
- Constant *Zero = ConstantInt::get(Base->getType(), 0);
- if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, Base, Zero, SDI) !=
- LazyValueInfo::True)
+ if (!isNonNegative(Base, LVI, SDI))
return false;
++NumSExt;
@@ -748,14 +854,12 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
if (NSW && NUW)
return false;
- BasicBlock *BB = BinOp->getParent();
-
Instruction::BinaryOps Opcode = BinOp->getOpcode();
Value *LHS = BinOp->getOperand(0);
Value *RHS = BinOp->getOperand(1);
- ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp);
- ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp);
+ ConstantRange LRange = LVI->getConstantRange(LHS, BinOp);
+ ConstantRange RRange = LVI->getConstantRange(RHS, BinOp);
bool Changed = false;
bool NewNUW = false, NewNSW = false;
@@ -783,7 +887,6 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) {
// Pattern match (and lhs, C) where C includes a superset of bits which might
// be set in lhs. This is a common truncation idiom created by instcombine.
- BasicBlock *BB = BinOp->getParent();
Value *LHS = BinOp->getOperand(0);
ConstantInt *RHS = dyn_cast<ConstantInt>(BinOp->getOperand(1));
if (!RHS || !RHS->getValue().isMask())
@@ -792,7 +895,7 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) {
// We can only replace the AND with LHS based on range info if the range does
// not include undef.
ConstantRange LRange =
- LVI->getConstantRange(LHS, BB, BinOp, /*UndefAllowed=*/false);
+ LVI->getConstantRange(LHS, BinOp, /*UndefAllowed=*/false);
if (!LRange.getUnsignedMax().ule(RHS->getValue()))
return false;
@@ -804,7 +907,7 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) {
static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
- if (Constant *C = LVI->getConstant(V, At->getParent(), At))
+ if (Constant *C = LVI->getConstant(V, At))
return C;
// TODO: The following really should be sunk inside LVI's core algorithm, or
@@ -858,10 +961,8 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
BBChanged |= processCallSite(cast<CallBase>(*II), LVI);
break;
case Instruction::SRem:
- BBChanged |= processSRem(cast<BinaryOperator>(II), LVI);
- break;
case Instruction::SDiv:
- BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI);
break;
case Instruction::UDiv:
case Instruction::URem:
@@ -929,11 +1030,19 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F));
- if (!Changed)
- return PreservedAnalyses::all();
PreservedAnalyses PA;
- PA.preserve<GlobalsAA>();
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LazyValueAnalysis>();
+ if (!Changed) {
+ PA = PreservedAnalyses::all();
+ } else {
+ PA.preserve<GlobalsAA>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LazyValueAnalysis>();
+ }
+
+ // Keeping LVI alive is expensive, both because it uses a lot of memory, and
+ // because invalidating values in LVI is expensive. While CVP does preserve
+ // LVI, we know that passes after JumpThreading+CVP will not need the result
+ // of this analysis, so we forcefully discard it early.
+ PA.abandon<LazyValueAnalysis>();
return PA;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp
index 28947482e303..d55adf7c2d12 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -32,57 +32,10 @@ using namespace llvm;
#define DEBUG_TYPE "dce"
-STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
STATISTIC(DCEEliminated, "Number of insts removed");
DEBUG_COUNTER(DCECounter, "dce-transform",
"Controls which instructions are eliminated");
-namespace {
- //===--------------------------------------------------------------------===//
- // DeadInstElimination pass implementation
- //
-struct DeadInstElimination : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- DeadInstElimination() : FunctionPass(ID) {
- initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
-
- bool Changed = false;
- for (auto &BB : F) {
- for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
- Instruction *Inst = &*DI++;
- if (isInstructionTriviallyDead(Inst, TLI)) {
- if (!DebugCounter::shouldExecute(DCECounter))
- continue;
- salvageDebugInfo(*Inst);
- Inst->eraseFromParent();
- Changed = true;
- ++DIEEliminated;
- }
- }
- }
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- }
-};
-}
-
-char DeadInstElimination::ID = 0;
-INITIALIZE_PASS(DeadInstElimination, "die",
- "Dead Instruction Elimination", false, false)
-
-Pass *llvm::createDeadInstEliminationPass() {
- return new DeadInstElimination();
-}
-
//===--------------------------------------------------------------------===//
// RedundantDbgInstElimination pass implementation
//
@@ -116,6 +69,18 @@ Pass *llvm::createRedundantDbgInstEliminationPass() {
return new RedundantDbgInstElimination();
}
+PreservedAnalyses
+RedundantDbgInstEliminationPass::run(Function &F, FunctionAnalysisManager &AM) {
+ bool Changed = false;
+ for (auto &BB : F)
+ Changed |= RemoveRedundantDbgInstrs(&BB);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
//===--------------------------------------------------------------------===//
// DeadCodeElimination pass implementation
//
@@ -178,7 +143,7 @@ static bool eliminateDeadCode(Function &F, TargetLibraryInfo *TLI) {
}
PreservedAnalyses DCEPass::run(Function &F, FunctionAnalysisManager &AM) {
- if (!eliminateDeadCode(F, AM.getCachedResult<TargetLibraryAnalysis>(F)))
+ if (!eliminateDeadCode(F, &AM.getResult<TargetLibraryAnalysis>(F)))
return PreservedAnalyses::all();
PreservedAnalyses PA;
@@ -197,13 +162,14 @@ struct DCELegacyPass : public FunctionPass {
if (skipFunction(F))
return false;
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+ TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
return eliminateDeadCode(F, TLI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesCFG();
}
};
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e58db03225ee..2979225c6016 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -84,10 +84,13 @@ STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther, "Number of other instrs removed");
STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
STATISTIC(NumModifiedStores, "Number of stores modified");
-STATISTIC(NumNoopStores, "Number of noop stores deleted");
STATISTIC(NumCFGChecks, "Number of stores modified");
STATISTIC(NumCFGTries, "Number of stores modified");
STATISTIC(NumCFGSuccess, "Number of stores modified");
+STATISTIC(NumGetDomMemoryDefPassed,
+ "Number of times a valid candidate is returned from getDomMemoryDef");
+STATISTIC(NumDomMemDefChecks,
+ "Number iterations check for reads in getDomMemoryDef");
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
"Controls which MemoryDefs are eliminated.");
@@ -103,19 +106,42 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging",
cl::desc("Enable partial store merging in DSE"));
static cl::opt<bool>
- EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden,
+ EnableMemorySSA("enable-dse-memoryssa", cl::init(true), cl::Hidden,
cl::desc("Use the new MemorySSA-backed DSE."));
static cl::opt<unsigned>
- MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden,
+ MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
cl::desc("The number of memory instructions to scan for "
"dead store elimination (default = 100)"));
+static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
+ "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
+ cl::desc("The maximum number of steps while walking upwards to find "
+ "MemoryDefs that may be killed (default = 90)"));
+
+static cl::opt<unsigned> MemorySSAPartialStoreLimit(
+ "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
+ cl::desc("The maximum number candidates that only partially overwrite the "
+ "killing MemoryDef to consider"
+ " (default = 5)"));
static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
"dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
"other stores per basic block (default = 5000)"));
+static cl::opt<unsigned> MemorySSASameBBStepCost(
+ "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
+ cl::desc(
+ "The cost of a step in the same basic block as the killing MemoryDef"
+ "(default = 1)"));
+
+static cl::opt<unsigned>
+ MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
+ cl::Hidden,
+ cl::desc("The cost of a step in a different basic "
+ "block than the killing MemoryDef"
+ "(default = 5)"));
+
static cl::opt<unsigned> MemorySSAPathCheckLimit(
"dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
cl::desc("The maximum number of blocks to check when trying to prove that "
@@ -203,11 +229,13 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
case Intrinsic::memset_element_unordered_atomic:
case Intrinsic::init_trampoline:
case Intrinsic::lifetime_end:
+ case Intrinsic::masked_store:
return true;
}
}
@@ -231,23 +259,23 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
/// Return a Location stored to by the specified instruction. If isRemovable
/// returns true, this function and getLocForRead completely describe the memory
/// operations for this instruction.
-static MemoryLocation getLocForWrite(Instruction *Inst) {
-
+static MemoryLocation getLocForWrite(Instruction *Inst,
+ const TargetLibraryInfo &TLI) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return MemoryLocation::get(SI);
- if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
- // memcpy/memmove/memset.
- MemoryLocation Loc = MemoryLocation::getForDest(MI);
- return Loc;
- }
+ // memcpy/memmove/memset.
+ if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
+ return MemoryLocation::getForDest(MI);
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
default:
return MemoryLocation(); // Unhandled intrinsic.
case Intrinsic::init_trampoline:
- return MemoryLocation(II->getArgOperand(0));
+ return MemoryLocation::getAfter(II->getArgOperand(0));
+ case Intrinsic::masked_store:
+ return MemoryLocation::getForArgument(II, 1, TLI);
case Intrinsic::lifetime_end: {
uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
return MemoryLocation(II->getArgOperand(1), Len);
@@ -257,7 +285,7 @@ static MemoryLocation getLocForWrite(Instruction *Inst) {
if (auto *CB = dyn_cast<CallBase>(Inst))
// All the supported TLI functions so far happen to have dest as their
// first argument.
- return MemoryLocation(CB->getArgOperand(0));
+ return MemoryLocation::getAfter(CB->getArgOperand(0));
return MemoryLocation();
}
@@ -294,11 +322,13 @@ static bool isRemovable(Instruction *I) {
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
// Don't remove volatile memory intrinsics.
return !cast<MemIntrinsic>(II)->isVolatile();
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
case Intrinsic::memset_element_unordered_atomic:
+ case Intrinsic::masked_store:
return true;
}
}
@@ -344,9 +374,10 @@ static bool isShortenableAtTheBeginning(Instruction *I) {
}
/// Return the pointer that is being written to.
-static Value *getStoredPointerOperand(Instruction *I) {
+static Value *getStoredPointerOperand(Instruction *I,
+ const TargetLibraryInfo &TLI) {
//TODO: factor this to reuse getLocForWrite
- MemoryLocation Loc = getLocForWrite(I);
+ MemoryLocation Loc = getLocForWrite(I, TLI);
assert(Loc.Ptr &&
"unable to find pointer written for analyzable instruction?");
// TODO: most APIs don't expect const Value *
@@ -372,31 +403,59 @@ enum OverwriteResult {
OW_Complete,
OW_End,
OW_PartialEarlierWithFullLater,
+ OW_MaybePartial,
OW_Unknown
};
} // end anonymous namespace
-/// Return 'OW_Complete' if a store to the 'Later' location completely
-/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
-/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later'.
-/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
-/// overwritten by a latter (smaller) store which doesn't write outside the big
-/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
-static OverwriteResult isOverwrite(const MemoryLocation &Later,
- const MemoryLocation &Earlier,
- const DataLayout &DL,
- const TargetLibraryInfo &TLI,
- int64_t &EarlierOff, int64_t &LaterOff,
- Instruction *DepWrite,
- InstOverlapIntervalsTy &IOL,
- AliasAnalysis &AA,
- const Function *F) {
+/// Check if two instruction are masked stores that completely
+/// overwrite one another. More specifically, \p Later has to
+/// overwrite \p Earlier.
+template <typename AATy>
+static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
+ const Instruction *Earlier,
+ AATy &AA) {
+ const auto *IIL = dyn_cast<IntrinsicInst>(Later);
+ const auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
+ if (IIL == nullptr || IIE == nullptr)
+ return OW_Unknown;
+ if (IIL->getIntrinsicID() != Intrinsic::masked_store ||
+ IIE->getIntrinsicID() != Intrinsic::masked_store)
+ return OW_Unknown;
+ // Pointers.
+ Value *LP = IIL->getArgOperand(1)->stripPointerCasts();
+ Value *EP = IIE->getArgOperand(1)->stripPointerCasts();
+ if (LP != EP && !AA.isMustAlias(LP, EP))
+ return OW_Unknown;
+ // Masks.
+ // TODO: check that Later's mask is a superset of the Earlier's mask.
+ if (IIL->getArgOperand(3) != IIE->getArgOperand(3))
+ return OW_Unknown;
+ return OW_Complete;
+}
+
+/// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
+/// instruction) completely overwrites a store to the 'Earlier' location.
+/// (by \p EarlierI instruction).
+/// Return OW_MaybePartial if \p Later does not completely overwrite
+/// \p Earlier, but they both write to the same underlying object. In that
+/// case, use isPartialOverwrite to check if \p Later partially overwrites
+/// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
+template <typename AATy>
+static OverwriteResult
+isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
+ const MemoryLocation &Later, const MemoryLocation &Earlier,
+ const DataLayout &DL, const TargetLibraryInfo &TLI,
+ int64_t &EarlierOff, int64_t &LaterOff, AATy &AA,
+ const Function *F) {
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
- if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise())
- return OW_Unknown;
+ if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
+ // Masked stores have imprecise locations, but we can reason about them
+ // to some extent.
+ return isMaskedStoreOverwrite(LaterI, EarlierI, AA);
+ }
const uint64_t LaterSize = Later.Size.getValue();
const uint64_t EarlierSize = Earlier.Size.getValue();
@@ -415,8 +474,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval/inalloca argument). If so, then it clearly
// overwrites any other store to the same object.
- const Value *UO1 = GetUnderlyingObject(P1, DL),
- *UO2 = GetUnderlyingObject(P2, DL);
+ const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
@@ -441,26 +499,59 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
if (BP1 != BP2)
return OW_Unknown;
- // The later store completely overlaps the earlier store if:
- //
- // 1. Both start at the same offset and the later one's size is greater than
- // or equal to the earlier one's, or
- //
- // |--earlier--|
- // |-- later --|
- //
- // 2. The earlier store has an offset greater than the later offset, but which
- // still lies completely within the later store.
- //
- // |--earlier--|
- // |----- later ------|
+ // The later access completely overlaps the earlier store if and only if
+ // both start and end of the earlier one is "inside" the later one:
+ // |<->|--earlier--|<->|
+ // |-------later-------|
+ // Accesses may overlap if and only if start of one of them is "inside"
+ // another one:
+ // |<->|--earlier--|<----->|
+ // |-------later-------|
+ // OR
+ // |----- earlier -----|
+ // |<->|---later---|<----->|
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
- if (EarlierOff >= LaterOff &&
- LaterSize >= EarlierSize &&
- uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
- return OW_Complete;
+ // Check if the earlier access starts "not before" the later one.
+ if (EarlierOff >= LaterOff) {
+ // If the earlier access ends "not after" the later access then the earlier
+ // one is completely overwritten by the later one.
+ if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
+ return OW_Complete;
+ // If start of the earlier access is "before" end of the later access then
+ // accesses overlap.
+ else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
+ return OW_MaybePartial;
+ }
+ // If start of the later access is "before" end of the earlier access then
+ // accesses overlap.
+ else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
+ return OW_MaybePartial;
+ }
+
+ // Can reach here only if accesses are known not to overlap. There is no
+ // dedicated code to indicate no overlap so signal "unknown".
+ return OW_Unknown;
+}
+
+/// Return 'OW_Complete' if a store to the 'Later' location completely
+/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
+/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
+/// beginning of the 'Earlier' location is overwritten by 'Later'.
+/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
+/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
+/// NOTE: This function must only be called if both \p Later and \p Earlier
+/// write to the same underlying object with valid \p EarlierOff and \p
+/// LaterOff.
+static OverwriteResult isPartialOverwrite(const MemoryLocation &Later,
+ const MemoryLocation &Earlier,
+ int64_t EarlierOff, int64_t LaterOff,
+ Instruction *DepWrite,
+ InstOverlapIntervalsTy &IOL) {
+ const uint64_t LaterSize = Later.Size.getValue();
+ const uint64_t EarlierSize = Earlier.Size.getValue();
// We may now overlap, although the overlap is not complete. There might also
// be other incomplete overlaps, and together, they might cover the complete
// earlier write.
@@ -627,11 +718,10 @@ static bool isPossibleSelfRead(Instruction *Inst,
/// modified between the first and the second instruction.
/// Precondition: Second instruction must be dominated by the first
/// instruction.
-static bool memoryIsNotModifiedBetween(Instruction *FirstI,
- Instruction *SecondI,
- AliasAnalysis *AA,
- const DataLayout &DL,
- DominatorTree *DT) {
+template <typename AATy>
+static bool
+memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, AATy &AA,
+ const DataLayout &DL, DominatorTree *DT) {
// Do a backwards scan through the CFG from SecondI to FirstI. Look for
// instructions which can modify the memory location accessed by SecondI.
//
@@ -680,7 +770,7 @@ static bool memoryIsNotModifiedBetween(Instruction *FirstI,
for (; BI != EI; ++BI) {
Instruction *I = &*BI;
if (I->mayWriteToMemory() && I != SecondI)
- if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
+ if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
return false;
}
if (B != FirstBB) {
@@ -736,10 +826,9 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
MapVector<Instruction *, bool> &ThrowableInst) {
bool MadeChange = false;
- MemoryLocation Loc = MemoryLocation(F->getOperand(0));
+ MemoryLocation Loc = MemoryLocation::getAfter(F->getOperand(0));
SmallVector<BasicBlock *, 16> Blocks;
Blocks.push_back(F->getParent());
- const DataLayout &DL = F->getModule()->getDataLayout();
while (!Blocks.empty()) {
BasicBlock *BB = Blocks.pop_back_val();
@@ -755,7 +844,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
break;
Value *DepPointer =
- GetUnderlyingObject(getStoredPointerOperand(Dependency), DL);
+ getUnderlyingObject(getStoredPointerOperand(Dependency, *TLI));
// Check for aliasing.
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
@@ -795,7 +884,7 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
const DataLayout &DL, AliasAnalysis *AA,
const TargetLibraryInfo *TLI,
const Function *F) {
- const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
+ const Value *UnderlyingPointer = getUnderlyingObject(LoadedLoc.Ptr);
// A constant can't be in the dead pointer set.
if (isa<Constant>(UnderlyingPointer))
@@ -848,7 +937,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
// Treat byval or inalloca arguments the same, stores to them are dead at the
// end of the function.
for (Argument &AI : BB.getParent()->args())
- if (AI.hasPassPointeeByValueAttr())
+ if (AI.hasPassPointeeByValueCopyAttr())
DeadStackObjects.insert(&AI);
const DataLayout &DL = BB.getModule()->getDataLayout();
@@ -861,7 +950,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<const Value *, 4> Pointers;
- GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
+ getUnderlyingObjects(getStoredPointerOperand(&*BBI, *TLI), Pointers);
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
@@ -980,8 +1069,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
}
static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
- int64_t &EarlierSize, int64_t LaterOffset,
- int64_t LaterSize, bool IsOverwriteEnd) {
+ uint64_t &EarlierSize, int64_t LaterOffset,
+ uint64_t LaterSize, bool IsOverwriteEnd) {
// TODO: base this on the target vector size so that if the earlier
// store was too small to get vector writes anyway then its likely
// a good idea to shorten it
@@ -1036,16 +1125,23 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
static bool tryToShortenEnd(Instruction *EarlierWrite,
OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, int64_t &EarlierSize) {
+ int64_t &EarlierStart, uint64_t &EarlierSize) {
if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
return false;
OverlapIntervalsTy::iterator OII = --IntervalMap.end();
int64_t LaterStart = OII->second;
- int64_t LaterSize = OII->first - LaterStart;
+ uint64_t LaterSize = OII->first - LaterStart;
+
+ assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
- if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize &&
- LaterStart + LaterSize >= EarlierStart + EarlierSize) {
+ if (LaterStart > EarlierStart &&
+ // Note: "LaterStart - EarlierStart" is known to be positive due to
+ // preceding check.
+ (uint64_t)(LaterStart - EarlierStart) < EarlierSize &&
+ // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to
+ // be non negative due to preceding checks.
+ LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) {
if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
LaterSize, true)) {
IntervalMap.erase(OII);
@@ -1057,16 +1153,23 @@ static bool tryToShortenEnd(Instruction *EarlierWrite,
static bool tryToShortenBegin(Instruction *EarlierWrite,
OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, int64_t &EarlierSize) {
+ int64_t &EarlierStart, uint64_t &EarlierSize) {
if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
return false;
OverlapIntervalsTy::iterator OII = IntervalMap.begin();
int64_t LaterStart = OII->second;
- int64_t LaterSize = OII->first - LaterStart;
+ uint64_t LaterSize = OII->first - LaterStart;
- if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
- assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
+ assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+
+ if (LaterStart <= EarlierStart &&
+ // Note: "EarlierStart - LaterStart" is known to be non negative due to
+ // preceding check.
+ LaterSize > (uint64_t)(EarlierStart - LaterStart)) {
+ // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be
+ // positive due to preceding checks.
+ assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize &&
"Should have been handled as OW_Complete");
if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
LaterSize, false)) {
@@ -1077,18 +1180,18 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
return false;
}
-static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
- const DataLayout &DL,
- InstOverlapIntervalsTy &IOL) {
+static bool removePartiallyOverlappedStores(const DataLayout &DL,
+ InstOverlapIntervalsTy &IOL,
+ const TargetLibraryInfo &TLI) {
bool Changed = false;
for (auto OI : IOL) {
Instruction *EarlierWrite = OI.first;
- MemoryLocation Loc = getLocForWrite(EarlierWrite);
+ MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI);
assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
const Value *Ptr = Loc.Ptr->stripPointerCasts();
int64_t EarlierStart = 0;
- int64_t EarlierSize = int64_t(Loc.Size.getValue());
+ uint64_t EarlierSize = Loc.Size.getValue();
GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
OverlapIntervalsTy &IntervalMap = OI.second;
Changed |=
@@ -1118,7 +1221,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
isRemovable(SI) &&
- memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) {
+ memoryIsNotModifiedBetween(DepLoad, SI, *AA, DL, DT)) {
LLVM_DEBUG(
dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
@@ -1134,10 +1237,10 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) {
Instruction *UnderlyingPointer =
- dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL));
+ dyn_cast<Instruction>(getUnderlyingObject(SI->getPointerOperand()));
if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
- memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) {
+ memoryIsNotModifiedBetween(UnderlyingPointer, SI, *AA, DL, DT)) {
LLVM_DEBUG(
dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
<< *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
@@ -1150,11 +1253,10 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
return false;
}
-static Constant *
-tryToMergePartialOverlappingStores(StoreInst *Earlier, StoreInst *Later,
- int64_t InstWriteOffset,
- int64_t DepWriteOffset, const DataLayout &DL,
- AliasAnalysis *AA, DominatorTree *DT) {
+template <typename AATy>
+static Constant *tryToMergePartialOverlappingStores(
+ StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset,
+ int64_t DepWriteOffset, const DataLayout &DL, AATy &AA, DominatorTree *DT) {
if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) &&
@@ -1245,7 +1347,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
continue;
// Figure out what location is being stored to.
- MemoryLocation Loc = getLocForWrite(Inst);
+ MemoryLocation Loc = getLocForWrite(Inst, *TLI);
// If we didn't get a useful location, fail.
if (!Loc.Ptr)
@@ -1269,7 +1371,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
Instruction *DepWrite = InstDep.getInst();
if (!hasAnalyzableMemoryWrite(DepWrite, *TLI))
break;
- MemoryLocation DepLoc = getLocForWrite(DepWrite);
+ MemoryLocation DepLoc = getLocForWrite(DepWrite, *TLI);
// If we didn't get a useful location, or if it isn't a size, bail out.
if (!DepLoc.Ptr)
break;
@@ -1289,7 +1391,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// to it is dead along the unwind edge. Otherwise, we need to preserve
// the store.
if (LastThrowing && DepWrite->comesBefore(LastThrowing)) {
- const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL);
+ const Value *Underlying = getUnderlyingObject(DepLoc.Ptr);
bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying);
if (!IsStoreDeadOnUnwind) {
// We're looking for a call to an allocation function
@@ -1311,9 +1413,13 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
if (isRemovable(DepWrite) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset,
- InstWriteOffset, DepWrite, IOL, *AA,
+ OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI,
+ DepWriteOffset, InstWriteOffset, *AA,
BB.getParent());
+ if (OR == OW_MaybePartial)
+ OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset,
+ DepWrite, IOL);
+
if (OR == OW_Complete) {
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite
<< "\n KILLER: " << *Inst << '\n');
@@ -1334,8 +1440,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
"when partial-overwrite "
"tracking is enabled");
// The overwrite result is known, so these must be known, too.
- int64_t EarlierSize = DepLoc.Size.getValue();
- int64_t LaterSize = Loc.Size.getValue();
+ uint64_t EarlierSize = DepLoc.Size.getValue();
+ uint64_t LaterSize = Loc.Size.getValue();
bool IsOverwriteEnd = (OR == OW_End);
MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
InstWriteOffset, LaterSize, IsOverwriteEnd);
@@ -1344,7 +1450,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
auto *Earlier = dyn_cast<StoreInst>(DepWrite);
auto *Later = dyn_cast<StoreInst>(Inst);
if (Constant *C = tryToMergePartialOverlappingStores(
- Earlier, Later, InstWriteOffset, DepWriteOffset, DL, AA,
+ Earlier, Later, InstWriteOffset, DepWriteOffset, DL, *AA,
DT)) {
auto *SI = new StoreInst(
C, Earlier->getPointerOperand(), false, Earlier->getAlign(),
@@ -1391,7 +1497,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
}
if (EnablePartialOverwriteTracking)
- MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
+ MadeChange |= removePartiallyOverlappedStores(DL, IOL, *TLI);
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
@@ -1425,20 +1531,21 @@ namespace {
// in between both MemoryDefs. A bit more concretely:
//
// For all MemoryDefs StartDef:
-// 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking
+// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
// upwards.
-// 2. Check that there are no reads between DomAccess and the StartDef by
-// checking all uses starting at DomAccess and walking until we see StartDef.
-// 3. For each found DomDef, check that:
-// 1. There are no barrier instructions between DomDef and StartDef (like
+// 2. Check that there are no reads between EarlierAccess and the StartDef by
+// checking all uses starting at EarlierAccess and walking until we see
+// StartDef.
+// 3. For each found CurrentDef, check that:
+// 1. There are no barrier instructions between CurrentDef and StartDef (like
// throws or stores with ordering constraints).
-// 2. StartDef is executed whenever DomDef is executed.
-// 3. StartDef completely overwrites DomDef.
-// 4. Erase DomDef from the function and MemorySSA.
+// 2. StartDef is executed whenever CurrentDef is executed.
+// 3. StartDef completely overwrites CurrentDef.
+// 4. Erase CurrentDef from the function and MemorySSA.
-// Returns true if \p M is an intrisnic that does not read or write memory.
-bool isNoopIntrinsic(MemoryUseOrDef *M) {
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(M->getMemoryInst())) {
+// Returns true if \p I is an intrisnic that does not read or write memory.
+bool isNoopIntrinsic(Instruction *I) {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
@@ -1481,7 +1588,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return true;
// Skip intrinsics that do not really read or modify memory.
- if (isNoopIntrinsic(D))
+ if (isNoopIntrinsic(D->getMemoryInst()))
return true;
return false;
@@ -1490,10 +1597,21 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
struct DSEState {
Function &F;
AliasAnalysis &AA;
+
+ /// The single BatchAA instance that is used to cache AA queries. It will
+ /// not be invalidated over the whole run. This is safe, because:
+ /// 1. Only memory writes are removed, so the alias cache for memory
+ /// locations remains valid.
+ /// 2. No new instructions are added (only instructions removed), so cached
+ /// information for a deleted value cannot be accessed by a re-used new
+ /// value pointer.
+ BatchAAResults BatchAA;
+
MemorySSA &MSSA;
DominatorTree &DT;
PostDominatorTree &PDT;
const TargetLibraryInfo &TLI;
+ const DataLayout &DL;
// All MemoryDefs that potentially could kill other MemDefs.
SmallVector<MemoryDef *, 64> MemDefs;
@@ -1501,10 +1619,11 @@ struct DSEState {
SmallPtrSet<MemoryAccess *, 4> SkipStores;
// Keep track of all of the objects that are invisible to the caller before
// the function returns.
- SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
+ // SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
+ DenseMap<const Value *, bool> InvisibleToCallerBeforeRet;
// Keep track of all of the objects that are invisible to the caller after
// the function returns.
- SmallPtrSet<const Value *, 16> InvisibleToCallerAfterRet;
+ DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
// Keep track of blocks with throwing instructions not modeled in MemorySSA.
SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
// Post-order numbers for each basic block. Used to figure out if memory
@@ -1517,7 +1636,8 @@ struct DSEState {
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
- : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {}
+ : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
+ DL(F.getParent()->getDataLayout()) {}
static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
@@ -1537,42 +1657,54 @@ struct DSEState {
if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
(State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I)))
State.MemDefs.push_back(MD);
-
- // Track whether alloca and alloca-like objects are visible in the
- // caller before and after the function returns. Alloca objects are
- // invalid in the caller, so they are neither visible before or after
- // the function returns.
- if (isa<AllocaInst>(&I)) {
- State.InvisibleToCallerBeforeRet.insert(&I);
- State.InvisibleToCallerAfterRet.insert(&I);
- }
-
- // For alloca-like objects we need to check if they are captured before
- // the function returns and if the return might capture the object.
- if (isAllocLikeFn(&I, &TLI)) {
- bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true);
- if (!CapturesBeforeRet) {
- State.InvisibleToCallerBeforeRet.insert(&I);
- if (!PointerMayBeCaptured(&I, true, false))
- State.InvisibleToCallerAfterRet.insert(&I);
- }
- }
}
}
// Treat byval or inalloca arguments the same as Allocas, stores to them are
// dead at the end of the function.
for (Argument &AI : F.args())
- if (AI.hasPassPointeeByValueAttr()) {
+ if (AI.hasPassPointeeByValueCopyAttr()) {
// For byval, the caller doesn't know the address of the allocation.
if (AI.hasByValAttr())
- State.InvisibleToCallerBeforeRet.insert(&AI);
- State.InvisibleToCallerAfterRet.insert(&AI);
+ State.InvisibleToCallerBeforeRet.insert({&AI, true});
+ State.InvisibleToCallerAfterRet.insert({&AI, true});
}
return State;
}
+ bool isInvisibleToCallerAfterRet(const Value *V) {
+ if (isa<AllocaInst>(V))
+ return true;
+ auto I = InvisibleToCallerAfterRet.insert({V, false});
+ if (I.second) {
+ if (!isInvisibleToCallerBeforeRet(V)) {
+ I.first->second = false;
+ } else {
+ auto *Inst = dyn_cast<Instruction>(V);
+ if (Inst && isAllocLikeFn(Inst, &TLI))
+ I.first->second = !PointerMayBeCaptured(V, true, false);
+ }
+ }
+ return I.first->second;
+ }
+
+ bool isInvisibleToCallerBeforeRet(const Value *V) {
+ if (isa<AllocaInst>(V))
+ return true;
+ auto I = InvisibleToCallerBeforeRet.insert({V, false});
+ if (I.second) {
+ auto *Inst = dyn_cast<Instruction>(V);
+ if (Inst && isAllocLikeFn(Inst, &TLI))
+ // NOTE: This could be made more precise by PointerMayBeCapturedBefore
+ // with the killing MemoryDef. But we refrain from doing so for now to
+ // limit compile-time and this does not cause any changes to the number
+ // of stores removed on a large test set in practice.
+ I.first->second = !PointerMayBeCaptured(V, false, true);
+ }
+ return I.first->second;
+ }
+
Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const {
if (!I->mayWriteToMemory())
return None;
@@ -1581,6 +1713,11 @@ struct DSEState {
return {MemoryLocation::getForDest(MTI)};
if (auto *CB = dyn_cast<CallBase>(I)) {
+ // If the functions may write to memory we do not know about, bail out.
+ if (!CB->onlyAccessesArgMemory() &&
+ !CB->onlyAccessesInaccessibleMemOrArgMem())
+ return None;
+
LibFunc LF;
if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
switch (LF) {
@@ -1588,19 +1725,29 @@ struct DSEState {
case LibFunc_strncpy:
case LibFunc_strcat:
case LibFunc_strncat:
- return {MemoryLocation(CB->getArgOperand(0))};
+ return {MemoryLocation::getAfter(CB->getArgOperand(0))};
default:
break;
}
}
+ switch (CB->getIntrinsicID()) {
+ case Intrinsic::init_trampoline:
+ return {MemoryLocation::getAfter(CB->getArgOperand(0))};
+ case Intrinsic::masked_store:
+ return {MemoryLocation::getForArgument(CB, 1, TLI)};
+ default:
+ break;
+ }
return None;
}
return MemoryLocation::getOrNone(I);
}
- /// Returns true if \p Use completely overwrites \p DefLoc.
- bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const {
+ /// Returns true if \p UseInst completely overwrites \p DefLoc
+ /// (stored by \p DefInst).
+ bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
+ Instruction *UseInst) {
// UseInst has a MemoryDef associated in MemorySSA. It's possible for a
// MemoryDef to not write to memory, e.g. a volatile load is modeled as a
// MemoryDef.
@@ -1612,14 +1759,10 @@ struct DSEState {
return false;
int64_t InstWriteOffset, DepWriteOffset;
- auto CC = getLocForWriteEx(UseInst);
- InstOverlapIntervalsTy IOL;
-
- const DataLayout &DL = F.getParent()->getDataLayout();
-
- return CC &&
- isOverwrite(*CC, DefLoc, DL, TLI, DepWriteOffset, InstWriteOffset,
- UseInst, IOL, AA, &F) == OW_Complete;
+ if (auto CC = getLocForWriteEx(UseInst))
+ return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset,
+ InstWriteOffset, BatchAA, &F) == OW_Complete;
+ return false;
}
/// Returns true if \p Def is not read before returning from the function.
@@ -1650,10 +1793,12 @@ struct DSEState {
}
MemoryAccess *UseAccess = WorkList[I];
- if (isa<MemoryPhi>(UseAccess)) {
- PushMemUses(UseAccess);
- continue;
- }
+ // Simply adding the users of MemoryPhi to the worklist is not enough,
+ // because we might miss read clobbers in different iterations of a loop,
+ // for example.
+ // TODO: Add support for phi translation to handle the loop case.
+ if (isa<MemoryPhi>(UseAccess))
+ return false;
// TODO: Checking for aliasing is expensive. Consider reducing the amount
// of times this is called and/or caching it.
@@ -1682,7 +1827,8 @@ struct DSEState {
if (auto *CB = dyn_cast<CallBase>(I)) {
if (isFreeCall(I, &TLI))
- return {std::make_pair(MemoryLocation(CB->getArgOperand(0)), true)};
+ return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
+ true)};
}
return None;
@@ -1696,9 +1842,10 @@ struct DSEState {
isFreeCall(I, &TLI);
}
- /// Returns true if \p MaybeTerm is a memory terminator for the same
- /// underlying object as \p DefLoc.
- bool isMemTerminator(MemoryLocation DefLoc, Instruction *MaybeTerm) const {
+ /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
+ /// instruction \p AccessI.
+ bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
+ Instruction *MaybeTerm) {
Optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
getLocForTerminator(MaybeTerm);
@@ -1707,15 +1854,31 @@ struct DSEState {
// If the terminator is a free-like call, all accesses to the underlying
// object can be considered terminated.
+ if (getUnderlyingObject(Loc.Ptr) !=
+ getUnderlyingObject(MaybeTermLoc->first.Ptr))
+ return false;
+
+ auto TermLoc = MaybeTermLoc->first;
if (MaybeTermLoc->second) {
- DataLayout DL = MaybeTerm->getParent()->getModule()->getDataLayout();
- DefLoc = MemoryLocation(GetUnderlyingObject(DefLoc.Ptr, DL));
+ const Value *LocUO = getUnderlyingObject(Loc.Ptr);
+ return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
}
- return AA.isMustAlias(MaybeTermLoc->first, DefLoc);
+ int64_t InstWriteOffset, DepWriteOffset;
+ return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI,
+ DepWriteOffset, InstWriteOffset, BatchAA,
+ &F) == OW_Complete;
}
// Returns true if \p Use may read from \p DefLoc.
- bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const {
+ bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
+ if (isNoopIntrinsic(UseInst))
+ return false;
+
+ // Monotonic or weaker atomic stores can be re-ordered and do not need to be
+ // treated as read clobber.
+ if (auto SI = dyn_cast<StoreInst>(UseInst))
+ return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
+
if (!UseInst->mayReadFromMemory())
return false;
@@ -1723,88 +1886,246 @@ struct DSEState {
if (CB->onlyAccessesInaccessibleMemory())
return false;
- ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc);
- // If necessary, perform additional analysis.
- if (isRefSet(MR))
- MR = AA.callCapturesBefore(UseInst, DefLoc, &DT);
- return isRefSet(MR);
+ // NOTE: For calls, the number of stores removed could be slightly improved
+ // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
+ // be expensive compared to the benefits in practice. For now, avoid more
+ // expensive analysis to limit compile-time.
+ return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
+ }
+
+ /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
+ /// loop. In particular, this guarantees that it only references a single
+ /// MemoryLocation during execution of the containing function.
+ bool IsGuaranteedLoopInvariant(Value *Ptr) {
+ auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) {
+ Ptr = Ptr->stripPointerCasts();
+ if (auto *I = dyn_cast<Instruction>(Ptr)) {
+ if (isa<AllocaInst>(Ptr))
+ return true;
+
+ if (isAllocLikeFn(I, &TLI))
+ return true;
+
+ return false;
+ }
+ return true;
+ };
+
+ Ptr = Ptr->stripPointerCasts();
+ if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
+ GEP->hasAllConstantIndices();
+ }
+ return IsGuaranteedLoopInvariantBase(Ptr);
}
- // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no
- // read access between them or on any other path to a function exit block if
- // \p DefLoc is not accessible after the function returns. If there is no such
- // MemoryDef, return None. The returned value may not (completely) overwrite
- // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse
- // (read).
+ // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
+ // no read access between them or on any other path to a function exit block
+ // if \p DefLoc is not accessible after the function returns. If there is no
+ // such MemoryDef, return None. The returned value may not (completely)
+ // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
+ // MemoryUse (read).
Optional<MemoryAccess *>
- getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current,
- MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet,
- bool DefVisibleToCallerAfterRet, int &ScanLimit) const {
- MemoryAccess *DomAccess;
+ getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
+ const MemoryLocation &DefLoc, const Value *DefUO,
+ unsigned &ScanLimit, unsigned &WalkerStepLimit,
+ bool IsMemTerm, unsigned &PartialLimit) {
+ if (ScanLimit == 0 || WalkerStepLimit == 0) {
+ LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
+ return None;
+ }
+
+ MemoryAccess *Current = StartAccess;
+ Instruction *KillingI = KillingDef->getMemoryInst();
bool StepAgain;
- LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current
- << "\n");
- // Find the next clobbering Mod access for DefLoc, starting at Current.
+ LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
+
+ // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
+ Optional<MemoryLocation> CurrentLoc;
do {
StepAgain = false;
+ LLVM_DEBUG({
+ dbgs() << " visiting " << *Current;
+ if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
+ dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
+ << ")";
+ dbgs() << "\n";
+ });
+
// Reached TOP.
- if (MSSA.isLiveOnEntryDef(Current))
+ if (MSSA.isLiveOnEntryDef(Current)) {
+ LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
return None;
+ }
+
+ // Cost of a step. Accesses in the same block are more likely to be valid
+ // candidates for elimination, hence consider them cheaper.
+ unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
+ ? MemorySSASameBBStepCost
+ : MemorySSAOtherBBStepCost;
+ if (WalkerStepLimit <= StepCost) {
+ LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
+ return None;
+ }
+ WalkerStepLimit -= StepCost;
+ // Return for MemoryPhis. They cannot be eliminated directly and the
+ // caller is responsible for traversing them.
if (isa<MemoryPhi>(Current)) {
- DomAccess = Current;
- break;
+ LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
+ return Current;
+ }
+
+ // Below, check if CurrentDef is a valid candidate to be eliminated by
+ // KillingDef. If it is not, check the next candidate.
+ MemoryDef *CurrentDef = cast<MemoryDef>(Current);
+ Instruction *CurrentI = CurrentDef->getMemoryInst();
+
+ if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ continue;
}
- MemoryUseOrDef *CurrentUD = cast<MemoryUseOrDef>(Current);
- // Look for access that clobber DefLoc.
- DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(CurrentUD,
- DefLoc);
- if (MSSA.isLiveOnEntryDef(DomAccess))
+
+ // Before we try to remove anything, check for any extra throwing
+ // instructions that block us from DSEing
+ if (mayThrowBetween(KillingI, CurrentI, DefUO)) {
+ LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
return None;
+ }
- if (isa<MemoryPhi>(DomAccess))
- break;
+ // Check for anything that looks like it will be a barrier to further
+ // removal
+ if (isDSEBarrier(DefUO, CurrentI)) {
+ LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
+ return None;
+ }
+
+ // If Current is known to be on path that reads DefLoc or is a read
+ // clobber, bail out, as the path is not profitable. We skip this check
+ // for intrinsic calls, because the code knows how to handle memcpy
+ // intrinsics.
+ if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
+ return None;
+
+ // Quick check if there are direct uses that are read-clobbers.
+ if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
+ if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
+ return !MSSA.dominates(StartAccess, UseOrDef) &&
+ isReadClobber(DefLoc, UseOrDef->getMemoryInst());
+ return false;
+ })) {
+ LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
+ return None;
+ }
+
+ // If Current cannot be analyzed or is not removable, check the next
+ // candidate.
+ if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ continue;
+ }
+
+ // If Current does not have an analyzable write location, skip it
+ CurrentLoc = getLocForWriteEx(CurrentI);
+ if (!CurrentLoc) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ continue;
+ }
- // Check if we can skip DomDef for DSE.
- MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess);
- if (DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) {
+ // AliasAnalysis does not account for loops. Limit elimination to
+ // candidates for which we can guarantee they always store to the same
+ // memory location and not multiple locations in a loop.
+ if (Current->getBlock() != KillingDef->getBlock() &&
+ !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
StepAgain = true;
- Current = DomDef->getDefiningAccess();
+ Current = CurrentDef->getDefiningAccess();
+ WalkerStepLimit -= 1;
+ continue;
}
+ if (IsMemTerm) {
+ // If the killing def is a memory terminator (e.g. lifetime.end), check
+ // the next candidate if the current Current does not write the same
+ // underlying object as the terminator.
+ if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ }
+ continue;
+ } else {
+ int64_t InstWriteOffset, DepWriteOffset;
+ auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
+ DepWriteOffset, InstWriteOffset, BatchAA, &F);
+ // If Current does not write to the same object as KillingDef, check
+ // the next candidate.
+ if (OR == OW_Unknown) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ } else if (OR == OW_MaybePartial) {
+ // If KillingDef only partially overwrites Current, check the next
+ // candidate if the partial step limit is exceeded. This aggressively
+ // limits the number of candidates for partial store elimination,
+ // which are less likely to be removable in the end.
+ if (PartialLimit <= 1) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ WalkerStepLimit -= 1;
+ continue;
+ }
+ PartialLimit -= 1;
+ }
+ }
} while (StepAgain);
// Accesses to objects accessible after the function returns can only be
// eliminated if the access is killed along all paths to the exit. Collect
// the blocks with killing (=completely overwriting MemoryDefs) and check if
- // they cover all paths from DomAccess to any function exit.
- SmallPtrSet<BasicBlock *, 16> KillingBlocks = {KillingDef->getBlock()};
- LLVM_DEBUG({
- dbgs() << " Checking for reads of " << *DomAccess;
- if (isa<MemoryDef>(DomAccess))
- dbgs() << " (" << *cast<MemoryDef>(DomAccess)->getMemoryInst() << ")\n";
- else
- dbgs() << ")\n";
- });
+ // they cover all paths from EarlierAccess to any function exit.
+ SmallPtrSet<Instruction *, 16> KillingDefs;
+ KillingDefs.insert(KillingDef->getMemoryInst());
+ MemoryAccess *EarlierAccess = Current;
+ Instruction *EarlierMemInst =
+ cast<MemoryDef>(EarlierAccess)->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " ("
+ << *EarlierMemInst << ")\n");
SmallSetVector<MemoryAccess *, 32> WorkList;
auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
for (Use &U : Acc->uses())
WorkList.insert(cast<MemoryAccess>(U.getUser()));
};
- PushMemUses(DomAccess);
-
- // Check if DomDef may be read.
+ PushMemUses(EarlierAccess);
+
+ // Optimistically collect all accesses for reads. If we do not find any
+ // read clobbers, add them to the cache.
+ SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
+ if (!EarlierMemInst->mayReadFromMemory())
+ KnownNoReads.insert(EarlierAccess);
+ // Check if EarlierDef may be read.
for (unsigned I = 0; I < WorkList.size(); I++) {
MemoryAccess *UseAccess = WorkList[I];
LLVM_DEBUG(dbgs() << " " << *UseAccess);
- if (--ScanLimit == 0) {
+ // Bail out if the number of accesses to check exceeds the scan limit.
+ if (ScanLimit < (WorkList.size() - I)) {
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
return None;
}
+ --ScanLimit;
+ NumDomMemDefChecks++;
+ KnownNoReads.insert(UseAccess);
if (isa<MemoryPhi>(UseAccess)) {
+ if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
+ return DT.properlyDominates(KI->getParent(),
+ UseAccess->getBlock());
+ })) {
+ LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
+ continue;
+ }
LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
PushMemUses(UseAccess);
continue;
@@ -1813,29 +2134,45 @@ struct DSEState {
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
- if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess))) {
- LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
- PushMemUses(UseAccess);
+ if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
+ return DT.dominates(KI, UseInst);
+ })) {
+ LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
continue;
}
// A memory terminator kills all preceeding MemoryDefs and all succeeding
// MemoryAccesses. We do not have to check it's users.
- if (isMemTerminator(DefLoc, UseInst))
+ if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
+ LLVM_DEBUG(
+ dbgs()
+ << " ... skipping, memterminator invalidates following accesses\n");
continue;
+ }
+
+ if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
+ LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
+ PushMemUses(UseAccess);
+ continue;
+ }
+
+ if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
+ LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
+ return None;
+ }
// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
- if (isReadClobber(DefLoc, UseInst)) {
+ if (isReadClobber(*CurrentLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
return None;
}
- // For the KillingDef and DomAccess we only have to check if it reads the
- // memory location.
+ // For the KillingDef and EarlierAccess we only have to check if it reads
+ // the memory location.
// TODO: It would probably be better to check for self-reads before
// calling the function.
- if (KillingDef == UseAccess || DomAccess == UseAccess) {
+ if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
continue;
}
@@ -1844,22 +2181,23 @@ struct DSEState {
// the original location. Otherwise we have to check uses of *all*
// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
// miss cases like the following
- // 1 = Def(LoE) ; <----- DomDef stores [0,1]
+ // 1 = Def(LoE) ; <----- EarlierDef stores [0,1]
// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
// (The Use points to the *first* Def it may alias)
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
// stores [0,1]
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
- if (isCompleteOverwrite(DefLoc, UseInst)) {
- if (DefVisibleToCallerAfterRet && UseAccess != DomAccess) {
+ if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (!isInvisibleToCallerAfterRet(DefUO) &&
+ UseAccess != EarlierAccess) {
BasicBlock *MaybeKillingBlock = UseInst->getParent();
if (PostOrderNumbers.find(MaybeKillingBlock)->second <
- PostOrderNumbers.find(DomAccess->getBlock())->second) {
+ PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
- LLVM_DEBUG(dbgs() << " ... found killing block "
- << MaybeKillingBlock->getName() << "\n");
- KillingBlocks.insert(MaybeKillingBlock);
+ LLVM_DEBUG(dbgs()
+ << " ... found killing def " << *UseInst << "\n");
+ KillingDefs.insert(UseInst);
}
}
} else
@@ -1868,11 +2206,15 @@ struct DSEState {
}
// For accesses to locations visible after the function returns, make sure
- // that the location is killed (=overwritten) along all paths from DomAccess
- // to the exit.
- if (DefVisibleToCallerAfterRet) {
+ // that the location is killed (=overwritten) along all paths from
+ // EarlierAccess to the exit.
+ if (!isInvisibleToCallerAfterRet(DefUO)) {
+ SmallPtrSet<BasicBlock *, 16> KillingBlocks;
+ for (Instruction *KD : KillingDefs)
+ KillingBlocks.insert(KD->getParent());
assert(!KillingBlocks.empty() &&
"Expected at least a single killing block");
+
// Find the common post-dominator of all killing blocks.
BasicBlock *CommonPred = *KillingBlocks.begin();
for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end();
@@ -1883,23 +2225,19 @@ struct DSEState {
}
// If CommonPred is in the set of killing blocks, just check if it
- // post-dominates DomAccess.
+ // post-dominates EarlierAccess.
if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, DomAccess->getBlock()))
- return {DomAccess};
+ if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
+ return {EarlierAccess};
return None;
}
- // If the common post-dominator does not post-dominate DomAccess, there
- // is a path from DomAccess to an exit not going through a killing block.
- if (PDT.dominates(CommonPred, DomAccess->getBlock())) {
+ // If the common post-dominator does not post-dominate EarlierAccess,
+ // there is a path from EarlierAccess to an exit not going through a
+ // killing block.
+ if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
SetVector<BasicBlock *> WorkList;
- // DomAccess's post-order number provides an upper bound of the blocks
- // on a path starting at DomAccess.
- unsigned UpperBound =
- PostOrderNumbers.find(DomAccess->getBlock())->second;
-
// If CommonPred is null, there are multiple exits from the function.
// They all have to be added to the worklist.
if (CommonPred)
@@ -1910,24 +2248,20 @@ struct DSEState {
NumCFGTries++;
// Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching DomAccess.
+ // killing blocks before reaching EarlierAccess.
for (unsigned I = 0; I < WorkList.size(); I++) {
NumCFGChecks++;
BasicBlock *Current = WorkList[I];
if (KillingBlocks.count(Current))
continue;
- if (Current == DomAccess->getBlock())
+ if (Current == EarlierAccess->getBlock())
return None;
- // DomAccess is reachable from the entry, so we don't have to explore
- // unreachable blocks further.
+ // EarlierAccess is reachable from the entry, so we don't have to
+ // explore unreachable blocks further.
if (!DT.isReachableFromEntry(Current))
continue;
- unsigned CPO = PostOrderNumbers.find(Current)->second;
- // Current block is not on a path starting at DomAccess.
- if (CPO > UpperBound)
- continue;
for (BasicBlock *Pred : predecessors(Current))
WorkList.insert(Pred);
@@ -1935,13 +2269,14 @@ struct DSEState {
return None;
}
NumCFGSuccess++;
- return {DomAccess};
+ return {EarlierAccess};
}
return None;
}
- // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead.
- return {DomAccess};
+ // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
+ // potentially dead.
+ return {EarlierAccess};
}
// Delete dead memory defs
@@ -1986,11 +2321,11 @@ struct DSEState {
// checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
// throw are handled during the walk from one def to the next.
bool mayThrowBetween(Instruction *SI, Instruction *NI,
- const Value *SILocUnd) const {
+ const Value *SILocUnd) {
// First see if we can ignore it by using the fact that SI is an
// alloca/alloca like object that is not visible to the caller during
// execution of the function.
- if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd))
+ if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
return false;
if (SI->getParent() == NI->getParent())
@@ -2003,10 +2338,10 @@ struct DSEState {
// * A memory instruction that may throw and \p SI accesses a non-stack
// object.
// * Atomic stores stronger that monotonic.
- bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) const {
+ bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
// If NI may throw it acts as a barrier, unless we are to an alloca/alloca
// like object that does not escape.
- if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd))
+ if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
return true;
// If NI is an atomic load/store stronger than monotonic, do not try to
@@ -2016,6 +2351,11 @@ struct DSEState {
return isStrongerThanMonotonic(LI->getOrdering());
if (auto *SI = dyn_cast<StoreInst>(NI))
return isStrongerThanMonotonic(SI->getOrdering());
+ if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI))
+ return isStrongerThanMonotonic(ARMW->getOrdering());
+ if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI))
+ return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
+ isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
llvm_unreachable("other instructions should be skipped in MemorySSA");
}
return false;
@@ -2024,40 +2364,37 @@ struct DSEState {
/// Eliminate writes to objects that are not visible in the caller and are not
/// accessed before returning from the function.
bool eliminateDeadWritesAtEndOfFunction() {
- const DataLayout &DL = F.getParent()->getDataLayout();
bool MadeChange = false;
LLVM_DEBUG(
dbgs()
<< "Trying to eliminate MemoryDefs at the end of the function\n");
for (int I = MemDefs.size() - 1; I >= 0; I--) {
MemoryDef *Def = MemDefs[I];
- if (SkipStores.find(Def) != SkipStores.end() ||
- !isRemovable(Def->getMemoryInst()))
+ if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst()))
+ continue;
+
+ Instruction *DefI = Def->getMemoryInst();
+ SmallVector<const Value *, 4> Pointers;
+ auto DefLoc = getLocForWriteEx(DefI);
+ if (!DefLoc)
+ continue;
+
+ // NOTE: Currently eliminating writes at the end of a function is limited
+ // to MemoryDefs with a single underlying object, to save compile-time. In
+ // practice it appears the case with multiple underlying objects is very
+ // uncommon. If it turns out to be important, we can use
+ // getUnderlyingObjects here instead.
+ const Value *UO = getUnderlyingObject(DefLoc->Ptr);
+ if (!UO || !isInvisibleToCallerAfterRet(UO))
continue;
- // TODO: Consider doing the underlying object check first, if it is
- // beneficial compile-time wise.
if (isWriteAtEndOfFunction(Def)) {
- Instruction *DefI = Def->getMemoryInst();
// See through pointer-to-pointer bitcasts
- SmallVector<const Value *, 4> Pointers;
- GetUnderlyingObjects(getLocForWriteEx(DefI)->Ptr, Pointers, DL);
-
LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
"of the function\n");
- bool CanKill = true;
- for (const Value *Pointer : Pointers) {
- if (!InvisibleToCallerAfterRet.count(Pointer)) {
- CanKill = false;
- break;
- }
- }
-
- if (CanKill) {
- deleteDeadInstruction(DefI);
- ++NumFastStores;
- MadeChange = true;
- }
+ deleteDeadInstruction(DefI);
+ ++NumFastStores;
+ MadeChange = true;
}
}
return MadeChange;
@@ -2065,17 +2402,53 @@ struct DSEState {
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
- bool storeIsNoop(MemoryDef *Def, MemoryLocation DefLoc, const Value *DefUO) {
+ bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc,
+ const Value *DefUO) {
StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
if (!Store)
return false;
if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
if (LoadI->getPointerOperand() == Store->getOperand(1)) {
+ // Get the defining access for the load.
auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
- // If both accesses share the same defining access, no instructions
- // between them can modify the memory location.
- return LoadAccess == Def->getDefiningAccess();
+ // Fast path: the defining accesses are the same.
+ if (LoadAccess == Def->getDefiningAccess())
+ return true;
+
+ // Look through phi accesses. Recursively scan all phi accesses by
+ // adding them to a worklist. Bail when we run into a memory def that
+ // does not match LoadAccess.
+ SetVector<MemoryAccess *> ToCheck;
+ MemoryAccess *Current =
+ MSSA.getWalker()->getClobberingMemoryAccess(Def);
+ // We don't want to bail when we run into the store memory def. But,
+ // the phi access may point to it. So, pretend like we've already
+ // checked it.
+ ToCheck.insert(Def);
+ ToCheck.insert(Current);
+ // Start at current (1) to simulate already having checked Def.
+ for (unsigned I = 1; I < ToCheck.size(); ++I) {
+ Current = ToCheck[I];
+ if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
+ // Check all the operands.
+ for (auto &Use : PhiAccess->incoming_values())
+ ToCheck.insert(cast<MemoryAccess>(&Use));
+ continue;
+ }
+
+ // If we found a memory def, bail. This happens when we have an
+ // unrelated write in between an otherwise noop store.
+ assert(isa<MemoryDef>(Current) &&
+ "Only MemoryDefs should reach here.");
+ // TODO: Skip no alias MemoryDefs that have no aliasing reads.
+ // We are searching for the definition of the store's destination.
+ // So, if that is the same definition as the load, then this is a
+ // noop. Otherwise, fail.
+ if (LoadAccess != Current)
+ return false;
+ }
+ return true;
}
}
@@ -2099,7 +2472,6 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT,
const TargetLibraryInfo &TLI) {
- const DataLayout &DL = F.getParent()->getDataLayout();
bool MadeChange = false;
DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI);
@@ -2110,7 +2482,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
continue;
Instruction *SI = KillingDef->getMemoryInst();
- auto MaybeSILoc = State.getLocForWriteEx(SI);
+ Optional<MemoryLocation> MaybeSILoc;
if (State.isMemTerminatorInst(SI))
MaybeSILoc = State.getLocForTerminator(SI).map(
[](const std::pair<MemoryLocation, bool> &P) { return P.first; });
@@ -2124,38 +2496,23 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
}
MemoryLocation SILoc = *MaybeSILoc;
assert(SILoc.Ptr && "SILoc should not be null");
- const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL);
-
- // Check if the store is a no-op.
- if (isRemovable(SI) && State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n');
- State.deleteDeadInstruction(SI);
- NumNoopStores++;
- MadeChange = true;
- continue;
- }
-
- Instruction *DefObj =
- const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd));
- bool DefVisibleToCallerBeforeRet =
- !State.InvisibleToCallerBeforeRet.count(SILocUnd);
- bool DefVisibleToCallerAfterRet =
- !State.InvisibleToCallerAfterRet.count(SILocUnd);
- if (DefObj && isAllocLikeFn(DefObj, &TLI)) {
- if (DefVisibleToCallerBeforeRet)
- DefVisibleToCallerBeforeRet =
- PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT);
- }
+ const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
MemoryAccess *Current = KillingDef;
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
<< *KillingDef << " (" << *SI << ")\n");
- int ScanLimit = MemorySSAScanLimit;
+ unsigned ScanLimit = MemorySSAScanLimit;
+ unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
+ unsigned PartialLimit = MemorySSAPartialStoreLimit;
// Worklist of MemoryAccesses that may be killed by KillingDef.
SetVector<MemoryAccess *> ToCheck;
- ToCheck.insert(KillingDef->getDefiningAccess());
+ if (SILocUnd)
+ ToCheck.insert(KillingDef->getDefiningAccess());
+
+ bool Shortend = false;
+ bool IsMemTerm = State.isMemTerminatorInst(SI);
// Check if MemoryAccesses in the worklist are killed by KillingDef.
for (unsigned I = 0; I < ToCheck.size(); I++) {
Current = ToCheck[I];
@@ -2163,22 +2520,22 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
continue;
Optional<MemoryAccess *> Next = State.getDomMemoryDef(
- KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet,
- DefVisibleToCallerAfterRet, ScanLimit);
+ KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
+ IsMemTerm, PartialLimit);
if (!Next) {
LLVM_DEBUG(dbgs() << " finished walk\n");
continue;
}
- MemoryAccess *DomAccess = *Next;
- LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess);
- if (isa<MemoryPhi>(DomAccess)) {
+ MemoryAccess *EarlierAccess = *Next;
+ LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
+ if (isa<MemoryPhi>(EarlierAccess)) {
LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
- for (Value *V : cast<MemoryPhi>(DomAccess)->incoming_values()) {
+ for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
BasicBlock *IncomingBlock = IncomingAccess->getBlock();
- BasicBlock *PhiBlock = DomAccess->getBlock();
+ BasicBlock *PhiBlock = EarlierAccess->getBlock();
// We only consider incoming MemoryAccesses that come before the
// MemoryPhi. Otherwise we could discover candidates that do not
@@ -2189,44 +2546,20 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
}
continue;
}
- MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess);
+ auto *NextDef = cast<MemoryDef>(EarlierAccess);
Instruction *NI = NextDef->getMemoryInst();
LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
-
- // Before we try to remove anything, check for any extra throwing
- // instructions that block us from DSEing
- if (State.mayThrowBetween(SI, NI, SILocUnd)) {
- LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
- break;
- }
-
- // Check for anything that looks like it will be a barrier to further
- // removal
- if (State.isDSEBarrier(SILocUnd, NI)) {
- LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
- continue;
- }
-
ToCheck.insert(NextDef->getDefiningAccess());
-
- if (!hasAnalyzableMemoryWrite(NI, TLI)) {
- LLVM_DEBUG(dbgs() << " ... skip, cannot analyze def\n");
- continue;
- }
-
- if (!isRemovable(NI)) {
- LLVM_DEBUG(dbgs() << " ... skip, cannot remove def\n");
- continue;
- }
+ NumGetDomMemoryDefPassed++;
if (!DebugCounter::shouldExecute(MemorySSACounter))
continue;
MemoryLocation NILoc = *State.getLocForWriteEx(NI);
- if (State.isMemTerminatorInst(SI)) {
- const Value *NIUnd = GetUnderlyingObject(NILoc.Ptr, DL);
- if (!SILocUnd || SILocUnd != NIUnd)
+ if (IsMemTerm) {
+ const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
+ if (SILocUnd != NIUnd)
continue;
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
<< "\n KILLER: " << *SI << '\n');
@@ -2236,32 +2569,43 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
} else {
// Check if NI overwrites SI.
int64_t InstWriteOffset, DepWriteOffset;
- auto Iter = State.IOLs.insert(
- std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
- NI->getParent(), InstOverlapIntervalsTy()));
- auto &IOL = Iter.first->second;
- OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset,
- InstWriteOffset, NI, IOL, AA, &F);
+ OverwriteResult OR =
+ isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset,
+ InstWriteOffset, State.BatchAA, &F);
+ if (OR == OW_MaybePartial) {
+ auto Iter = State.IOLs.insert(
+ std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
+ NI->getParent(), InstOverlapIntervalsTy()));
+ auto &IOL = Iter.first->second;
+ OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
+ NI, IOL);
+ }
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
auto *Earlier = dyn_cast<StoreInst>(NI);
auto *Later = dyn_cast<StoreInst>(SI);
- if (Constant *Merged = tryToMergePartialOverlappingStores(
- Earlier, Later, InstWriteOffset, DepWriteOffset, DL, &AA,
- &DT)) {
-
- // Update stored value of earlier store to merged constant.
- Earlier->setOperand(0, Merged);
- ++NumModifiedStores;
- MadeChange = true;
-
- // Remove later store and remove any outstanding overlap intervals
- // for the updated store.
- State.deleteDeadInstruction(Later);
- auto I = State.IOLs.find(Earlier->getParent());
- if (I != State.IOLs.end())
- I->second.erase(Earlier);
- break;
+ // We are re-using tryToMergePartialOverlappingStores, which requires
+ // Earlier to domiante Later.
+ // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
+ if (Earlier && Later && DT.dominates(Earlier, Later)) {
+ if (Constant *Merged = tryToMergePartialOverlappingStores(
+ Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
+ State.BatchAA, &DT)) {
+
+ // Update stored value of earlier store to merged constant.
+ Earlier->setOperand(0, Merged);
+ ++NumModifiedStores;
+ MadeChange = true;
+
+ Shortend = true;
+ // Remove later store and remove any outstanding overlap intervals
+ // for the updated store.
+ State.deleteDeadInstruction(Later);
+ auto I = State.IOLs.find(Earlier->getParent());
+ if (I != State.IOLs.end())
+ I->second.erase(Earlier);
+ break;
+ }
}
}
@@ -2274,11 +2618,21 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
}
}
}
+
+ // Check if the store is a no-op.
+ if (!Shortend && isRemovable(SI) &&
+ State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n');
+ State.deleteDeadInstruction(SI);
+ NumRedundantStores++;
+ MadeChange = true;
+ continue;
+ }
}
if (EnablePartialOverwriteTracking)
for (auto &KV : State.IOLs)
- MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second);
+ MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
return MadeChange;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
index d44a5979a8b2..3c6c444d6649 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -151,8 +151,8 @@ static DivRemWorklistTy getWorklist(Function &F) {
// rare than division.
for (auto &RemPair : RemMap) {
// Find the matching division instruction from the division map.
- Instruction *DivInst = DivMap[RemPair.first];
- if (!DivInst)
+ auto It = DivMap.find(RemPair.first);
+ if (It == DivMap.end())
continue;
// We have a matching pair of div/rem instructions.
@@ -160,7 +160,7 @@ static DivRemWorklistTy getWorklist(Function &F) {
Instruction *RemInst = RemPair.second;
// Place it in the worklist.
- Worklist.emplace_back(DivInst, RemInst);
+ Worklist.emplace_back(It->second, RemInst);
}
return Worklist;
@@ -315,14 +315,14 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// %rem = sub %x, %mul // %rem = undef - undef = undef
// If X is not frozen, %rem becomes undef after transformation.
// TODO: We need a undef-specific checking function in ValueTracking
- if (!isGuaranteedNotToBeUndefOrPoison(X, DivInst, &DT)) {
+ if (!isGuaranteedNotToBeUndefOrPoison(X, nullptr, DivInst, &DT)) {
auto *FrX = new FreezeInst(X, X->getName() + ".frozen", DivInst);
DivInst->setOperand(0, FrX);
Sub->setOperand(0, FrX);
}
// Same for Y. If X = 1 and Y = (undef | 1), %rem in src is either 1 or 0,
// but %rem in tgt can be one of many integer values.
- if (!isGuaranteedNotToBeUndefOrPoison(Y, DivInst, &DT)) {
+ if (!isGuaranteedNotToBeUndefOrPoison(Y, nullptr, DivInst, &DT)) {
auto *FrY = new FreezeInst(Y, Y->getName() + ".frozen", DivInst);
DivInst->setOperand(1, FrY);
Mul->setOperand(1, FrY);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index ddfc8555b0a0..180a82917fa9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -154,33 +154,13 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
std::swap(A, B);
}
- // Match canonical forms of abs/nabs/min/max. We are not using ValueTracking's
+ // Match canonical forms of min/max. We are not using ValueTracking's
// more powerful matchSelectPattern() because it may rely on instruction flags
// such as "nsw". That would be incompatible with the current hashing
// mechanism that may remove flags to increase the likelihood of CSE.
- // These are the canonical forms of abs(X) and nabs(X) created by instcombine:
- // %N = sub i32 0, %X
- // %C = icmp slt i32 %X, 0
- // %ABS = select i1 %C, i32 %N, i32 %X
- //
- // %N = sub i32 0, %X
- // %C = icmp slt i32 %X, 0
- // %NABS = select i1 %C, i32 %X, i32 %N
Flavor = SPF_UNKNOWN;
CmpInst::Predicate Pred;
- if (match(Cond, m_ICmp(Pred, m_Specific(B), m_ZeroInt())) &&
- Pred == ICmpInst::ICMP_SLT && match(A, m_Neg(m_Specific(B)))) {
- // ABS: B < 0 ? -B : B
- Flavor = SPF_ABS;
- return true;
- }
- if (match(Cond, m_ICmp(Pred, m_Specific(A), m_ZeroInt())) &&
- Pred == ICmpInst::ICMP_SLT && match(B, m_Neg(m_Specific(A)))) {
- // NABS: A < 0 ? A : -A
- Flavor = SPF_NABS;
- return true;
- }
if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
// Check for commuted variants of min/max by swapping predicate.
@@ -196,6 +176,11 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
+ // Non-strict inequalities.
+ case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
+ case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
+ case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
+ case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
default: break;
}
@@ -234,7 +219,7 @@ static unsigned getHashValueImpl(SimpleValue Val) {
SelectPatternFlavor SPF;
Value *Cond, *A, *B;
if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
- // Hash min/max/abs (cmp + select) to allow for commuted operands.
+ // Hash min/max (cmp + select) to allow for commuted operands.
// Min/max may also have non-canonical compare predicate (eg, the compare for
// smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
// compare.
@@ -245,10 +230,6 @@ static unsigned getHashValueImpl(SimpleValue Val) {
std::swap(A, B);
return hash_combine(Inst->getOpcode(), SPF, A, B);
}
- if (SPF == SPF_ABS || SPF == SPF_NABS) {
- // ABS/NABS always puts the input in A and its negation in B.
- return hash_combine(Inst->getOpcode(), SPF, A, B);
- }
// Hash general selects to allow matching commuted true/false operands.
@@ -288,6 +269,17 @@ static unsigned getHashValueImpl(SimpleValue Val) {
isa<FreezeInst>(Inst)) &&
"Invalid/unknown instruction");
+ // Handle intrinsics with commutative operands.
+ // TODO: Extend this to handle intrinsics with >2 operands where the 1st
+ // 2 operands are commutative.
+ auto *II = dyn_cast<IntrinsicInst>(Inst);
+ if (II && II->isCommutative() && II->getNumArgOperands() == 2) {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ if (LHS > RHS)
+ std::swap(LHS, RHS);
+ return hash_combine(II->getOpcode(), LHS, RHS);
+ }
+
// Mix in the opcode.
return hash_combine(
Inst->getOpcode(),
@@ -340,7 +332,16 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
}
- // Min/max/abs can occur with commuted operands, non-canonical predicates,
+ // TODO: Extend this for >2 args by matching the trailing N-2 args.
+ auto *LII = dyn_cast<IntrinsicInst>(LHSI);
+ auto *RII = dyn_cast<IntrinsicInst>(RHSI);
+ if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
+ LII->isCommutative() && LII->getNumArgOperands() == 2) {
+ return LII->getArgOperand(0) == RII->getArgOperand(1) &&
+ LII->getArgOperand(1) == RII->getArgOperand(0);
+ }
+
+ // Min/max can occur with commuted operands, non-canonical predicates,
// and/or non-canonical operands.
// Selects can be non-trivially equivalent via inverted conditions and swaps.
SelectPatternFlavor LSPF, RSPF;
@@ -354,11 +355,6 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
return ((LHSA == RHSA && LHSB == RHSB) ||
(LHSA == RHSB && LHSB == RHSA));
- if (LSPF == SPF_ABS || LSPF == SPF_NABS) {
- // Abs results are placed in a defined order by matchSelectPattern.
- return LHSA == RHSA && LHSB == RHSB;
- }
-
// select Cond, A, B <--> select not(Cond), B, A
if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
return true;
@@ -376,7 +372,7 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
// This intentionally does NOT handle patterns with a double-negation in
// the sense of not + not, because doing so could result in values
// comparing
- // as equal that hash differently in the min/max/abs cases like:
+ // as equal that hash differently in the min/max cases like:
// select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
// ^ hashes as min ^ would not hash as min
// In the context of the EarlyCSE pass, however, such cases never reach
@@ -631,11 +627,11 @@ private:
StackNode &operator=(const StackNode &) = delete;
// Accessors.
- unsigned currentGeneration() { return CurrentGeneration; }
- unsigned childGeneration() { return ChildGeneration; }
+ unsigned currentGeneration() const { return CurrentGeneration; }
+ unsigned childGeneration() const { return ChildGeneration; }
void childGeneration(unsigned generation) { ChildGeneration = generation; }
DomTreeNode *node() { return Node; }
- DomTreeNode::const_iterator childIter() { return ChildIter; }
+ DomTreeNode::const_iterator childIter() const { return ChildIter; }
DomTreeNode *nextChild() {
DomTreeNode *child = *ChildIter;
@@ -643,8 +639,8 @@ private:
return child;
}
- DomTreeNode::const_iterator end() { return EndIter; }
- bool isProcessed() { return Processed; }
+ DomTreeNode::const_iterator end() const { return EndIter; }
+ bool isProcessed() const { return Processed; }
void process() { Processed = true; }
private:
@@ -663,29 +659,60 @@ private:
public:
ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
: Inst(Inst) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ IntrID = II->getIntrinsicID();
if (TTI.getTgtMemIntrinsic(II, Info))
- IsTargetMemInst = true;
+ return;
+ if (isHandledNonTargetIntrinsic(IntrID)) {
+ switch (IntrID) {
+ case Intrinsic::masked_load:
+ Info.PtrVal = Inst->getOperand(0);
+ Info.MatchingId = Intrinsic::masked_load;
+ Info.ReadMem = true;
+ Info.WriteMem = false;
+ Info.IsVolatile = false;
+ break;
+ case Intrinsic::masked_store:
+ Info.PtrVal = Inst->getOperand(1);
+ // Use the ID of masked load as the "matching id". This will
+ // prevent matching non-masked loads/stores with masked ones
+ // (which could be done), but at the moment, the code here
+ // does not support matching intrinsics with non-intrinsics,
+ // so keep the MatchingIds specific to masked instructions
+ // for now (TODO).
+ Info.MatchingId = Intrinsic::masked_load;
+ Info.ReadMem = false;
+ Info.WriteMem = true;
+ Info.IsVolatile = false;
+ break;
+ }
+ }
+ }
}
+ Instruction *get() { return Inst; }
+ const Instruction *get() const { return Inst; }
+
bool isLoad() const {
- if (IsTargetMemInst) return Info.ReadMem;
+ if (IntrID != 0)
+ return Info.ReadMem;
return isa<LoadInst>(Inst);
}
bool isStore() const {
- if (IsTargetMemInst) return Info.WriteMem;
+ if (IntrID != 0)
+ return Info.WriteMem;
return isa<StoreInst>(Inst);
}
bool isAtomic() const {
- if (IsTargetMemInst)
+ if (IntrID != 0)
return Info.Ordering != AtomicOrdering::NotAtomic;
return Inst->isAtomic();
}
bool isUnordered() const {
- if (IsTargetMemInst)
+ if (IntrID != 0)
return Info.isUnordered();
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
@@ -698,7 +725,7 @@ private:
}
bool isVolatile() const {
- if (IsTargetMemInst)
+ if (IntrID != 0)
return Info.IsVolatile;
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
@@ -716,11 +743,6 @@ private:
return false;
}
- bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
- return (getPointerOperand() == Inst.getPointerOperand() &&
- getMatchingId() == Inst.getMatchingId());
- }
-
bool isValid() const { return getPointerOperand() != nullptr; }
// For regular (non-intrinsic) loads/stores, this is set to -1. For
@@ -728,44 +750,83 @@ private:
// field in the MemIntrinsicInfo structure. That field contains
// non-negative values only.
int getMatchingId() const {
- if (IsTargetMemInst) return Info.MatchingId;
+ if (IntrID != 0)
+ return Info.MatchingId;
return -1;
}
Value *getPointerOperand() const {
- if (IsTargetMemInst) return Info.PtrVal;
+ if (IntrID != 0)
+ return Info.PtrVal;
return getLoadStorePointerOperand(Inst);
}
bool mayReadFromMemory() const {
- if (IsTargetMemInst) return Info.ReadMem;
+ if (IntrID != 0)
+ return Info.ReadMem;
return Inst->mayReadFromMemory();
}
bool mayWriteToMemory() const {
- if (IsTargetMemInst) return Info.WriteMem;
+ if (IntrID != 0)
+ return Info.WriteMem;
return Inst->mayWriteToMemory();
}
private:
- bool IsTargetMemInst = false;
+ Intrinsic::ID IntrID = 0;
MemIntrinsicInfo Info;
Instruction *Inst;
};
+ // This function is to prevent accidentally passing a non-target
+ // intrinsic ID to TargetTransformInfo.
+ static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_store:
+ return true;
+ }
+ return false;
+ }
+ static bool isHandledNonTargetIntrinsic(const Value *V) {
+ if (auto *II = dyn_cast<IntrinsicInst>(V))
+ return isHandledNonTargetIntrinsic(II->getIntrinsicID());
+ return false;
+ }
+
bool processNode(DomTreeNode *Node);
bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
const BasicBlock *BB, const BasicBlock *Pred);
+ Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
+ unsigned CurrentGeneration);
+
+ bool overridingStores(const ParseMemoryInst &Earlier,
+ const ParseMemoryInst &Later);
+
Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
if (auto *LI = dyn_cast<LoadInst>(Inst))
return LI;
if (auto *SI = dyn_cast<StoreInst>(Inst))
return SI->getValueOperand();
assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
- return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
- ExpectedType);
+ auto *II = cast<IntrinsicInst>(Inst);
+ if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
+ return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
+ return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
+ }
+
+ Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
+ Type *ExpectedType) const {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ return II;
+ case Intrinsic::masked_store:
+ return II->getOperand(0);
+ }
+ return nullptr;
}
/// Return true if the instruction is known to only operate on memory
@@ -775,6 +836,101 @@ private:
bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
Instruction *EarlierInst, Instruction *LaterInst);
+ bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
+ const IntrinsicInst *Later) {
+ auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
+ // Is Mask0 a submask of Mask1?
+ if (Mask0 == Mask1)
+ return true;
+ if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
+ return false;
+ auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
+ auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
+ if (!Vec0 || !Vec1)
+ return false;
+ assert(Vec0->getType() == Vec1->getType() &&
+ "Masks should have the same type");
+ for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
+ Constant *Elem0 = Vec0->getOperand(i);
+ Constant *Elem1 = Vec1->getOperand(i);
+ auto *Int0 = dyn_cast<ConstantInt>(Elem0);
+ if (Int0 && Int0->isZero())
+ continue;
+ auto *Int1 = dyn_cast<ConstantInt>(Elem1);
+ if (Int1 && !Int1->isZero())
+ continue;
+ if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
+ return false;
+ if (Elem0 == Elem1)
+ continue;
+ return false;
+ }
+ return true;
+ };
+ auto PtrOp = [](const IntrinsicInst *II) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getOperand(0);
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(1);
+ llvm_unreachable("Unexpected IntrinsicInst");
+ };
+ auto MaskOp = [](const IntrinsicInst *II) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getOperand(2);
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(3);
+ llvm_unreachable("Unexpected IntrinsicInst");
+ };
+ auto ThruOp = [](const IntrinsicInst *II) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getOperand(3);
+ llvm_unreachable("Unexpected IntrinsicInst");
+ };
+
+ if (PtrOp(Earlier) != PtrOp(Later))
+ return false;
+
+ Intrinsic::ID IDE = Earlier->getIntrinsicID();
+ Intrinsic::ID IDL = Later->getIntrinsicID();
+ // We could really use specific intrinsic classes for masked loads
+ // and stores in IntrinsicInst.h.
+ if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
+ // Trying to replace later masked load with the earlier one.
+ // Check that the pointers are the same, and
+ // - masks and pass-throughs are the same, or
+ // - replacee's pass-through is "undef" and replacer's mask is a
+ // super-set of the replacee's mask.
+ if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
+ return true;
+ if (!isa<UndefValue>(ThruOp(Later)))
+ return false;
+ return IsSubmask(MaskOp(Later), MaskOp(Earlier));
+ }
+ if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
+ // Trying to replace a load of a stored value with the store's value.
+ // Check that the pointers are the same, and
+ // - load's mask is a subset of store's mask, and
+ // - load's pass-through is "undef".
+ if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
+ return false;
+ return isa<UndefValue>(ThruOp(Later));
+ }
+ if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
+ // Trying to remove a store of the loaded value.
+ // Check that the pointers are the same, and
+ // - store's mask is a subset of the load's mask.
+ return IsSubmask(MaskOp(Later), MaskOp(Earlier));
+ }
+ if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
+ // Trying to remove a dead store (earlier).
+ // Check that the pointers are the same,
+ // - the to-be-removed store's mask is a subset of the other store's
+ // mask.
+ return IsSubmask(MaskOp(Earlier), MaskOp(Later));
+ }
+ return false;
+ }
+
void removeMSSA(Instruction &Inst) {
if (!MSSA)
return;
@@ -877,9 +1033,14 @@ bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
auto *TorF = (BI->getSuccessor(0) == BB)
? ConstantInt::getTrue(BB->getContext())
: ConstantInt::getFalse(BB->getContext());
- auto MatchBinOp = [](Instruction *I, unsigned Opcode) {
- if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I))
- return BOp->getOpcode() == Opcode;
+ auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
+ Value *&RHS) {
+ if (Opcode == Instruction::And &&
+ match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
+ return true;
+ else if (Opcode == Instruction::Or &&
+ match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
+ return true;
return false;
};
// If the condition is AND operation, we can propagate its operands into the
@@ -910,8 +1071,9 @@ bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
}
}
- if (MatchBinOp(Curr, PropagateOpcode))
- for (auto &Op : cast<BinaryOperator>(Curr)->operands())
+ Value *LHS, *RHS;
+ if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
+ for (auto &Op : { LHS, RHS })
if (Instruction *OPI = dyn_cast<Instruction>(Op))
if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
WorkList.push_back(OPI);
@@ -920,6 +1082,86 @@ bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
return MadeChanges;
}
+Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
+ unsigned CurrentGeneration) {
+ if (InVal.DefInst == nullptr)
+ return nullptr;
+ if (InVal.MatchingId != MemInst.getMatchingId())
+ return nullptr;
+ // We don't yet handle removing loads with ordering of any kind.
+ if (MemInst.isVolatile() || !MemInst.isUnordered())
+ return nullptr;
+ // We can't replace an atomic load with one which isn't also atomic.
+ if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
+ return nullptr;
+ // The value V returned from this function is used differently depending
+ // on whether MemInst is a load or a store. If it's a load, we will replace
+ // MemInst with V, if it's a store, we will check if V is the same as the
+ // available value.
+ bool MemInstMatching = !MemInst.isLoad();
+ Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
+ Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
+
+ // For stores check the result values before checking memory generation
+ // (otherwise isSameMemGeneration may crash).
+ Value *Result = MemInst.isStore()
+ ? getOrCreateResult(Matching, Other->getType())
+ : nullptr;
+ if (MemInst.isStore() && InVal.DefInst != Result)
+ return nullptr;
+
+ // Deal with non-target memory intrinsics.
+ bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
+ bool OtherNTI = isHandledNonTargetIntrinsic(Other);
+ if (OtherNTI != MatchingNTI)
+ return nullptr;
+ if (OtherNTI && MatchingNTI) {
+ if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
+ cast<IntrinsicInst>(MemInst.get())))
+ return nullptr;
+ }
+
+ if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
+ !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
+ MemInst.get()))
+ return nullptr;
+
+ if (!Result)
+ Result = getOrCreateResult(Matching, Other->getType());
+ return Result;
+}
+
+bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
+ const ParseMemoryInst &Later) {
+ // Can we remove Earlier store because of Later store?
+
+ assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
+ "Violated invariant");
+ if (Earlier.getPointerOperand() != Later.getPointerOperand())
+ return false;
+ if (Earlier.getMatchingId() != Later.getMatchingId())
+ return false;
+ // At the moment, we don't remove ordered stores, but do remove
+ // unordered atomic stores. There's no special requirement (for
+ // unordered atomics) about removing atomic stores only in favor of
+ // other atomic stores since we were going to execute the non-atomic
+ // one anyway and the atomic one might never have become visible.
+ if (!Earlier.isUnordered() || !Later.isUnordered())
+ return false;
+
+ // Deal with non-target memory intrinsics.
+ bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
+ bool LNTI = isHandledNonTargetIntrinsic(Later.get());
+ if (ENTI && LNTI)
+ return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
+ cast<IntrinsicInst>(Later.get()));
+
+ // Because of the check above, at least one of them is false.
+ // For now disallow matching intrinsics with non-intrinsics,
+ // so assume that the stores match if neither is an intrinsic.
+ return ENTI == LNTI;
+}
+
bool EarlyCSE::processNode(DomTreeNode *Node) {
bool Changed = false;
BasicBlock *BB = Node->getBlock();
@@ -990,6 +1232,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // Likewise, noalias intrinsics don't actually write.
+ if (match(&Inst,
+ m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
+ LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
+ << '\n');
+ continue;
+ }
+
// Skip sideeffect intrinsics, for the same reason as assume intrinsics.
if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
@@ -1136,32 +1386,21 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// we can assume the current load loads the same value as the dominating
// load.
LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
- if (InVal.DefInst != nullptr &&
- InVal.MatchingId == MemInst.getMatchingId() &&
- // We don't yet handle removing loads with ordering of any kind.
- !MemInst.isVolatile() && MemInst.isUnordered() &&
- // We can't replace an atomic load with one which isn't also atomic.
- InVal.IsAtomic >= MemInst.isAtomic() &&
- (isOperatingOnInvariantMemAt(&Inst, InVal.Generation) ||
- isSameMemGeneration(InVal.Generation, CurrentGeneration,
- InVal.DefInst, &Inst))) {
- Value *Op = getOrCreateResult(InVal.DefInst, Inst.getType());
- if (Op != nullptr) {
- LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
- << " to: " << *InVal.DefInst << '\n');
- if (!DebugCounter::shouldExecute(CSECounter)) {
- LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
- continue;
- }
- if (!Inst.use_empty())
- Inst.replaceAllUsesWith(Op);
- salvageKnowledge(&Inst, &AC);
- removeMSSA(Inst);
- Inst.eraseFromParent();
- Changed = true;
- ++NumCSELoad;
+ if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
+ LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
+ << " to: " << *InVal.DefInst << '\n');
+ if (!DebugCounter::shouldExecute(CSECounter)) {
+ LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
+ if (!Inst.use_empty())
+ Inst.replaceAllUsesWith(Op);
+ salvageKnowledge(&Inst, &AC);
+ removeMSSA(Inst);
+ Inst.eraseFromParent();
+ Changed = true;
+ ++NumCSELoad;
+ continue;
}
// Otherwise, remember that we have this instruction.
@@ -1231,13 +1470,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (MemInst.isValid() && MemInst.isStore()) {
LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
if (InVal.DefInst &&
- InVal.DefInst == getOrCreateResult(&Inst, InVal.DefInst->getType()) &&
- InVal.MatchingId == MemInst.getMatchingId() &&
- // We don't yet handle removing stores with ordering of any kind.
- !MemInst.isVolatile() && MemInst.isUnordered() &&
- (isOperatingOnInvariantMemAt(&Inst, InVal.Generation) ||
- isSameMemGeneration(InVal.Generation, CurrentGeneration,
- InVal.DefInst, &Inst))) {
+ InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
// It is okay to have a LastStore to a different pointer here if MemorySSA
// tells us that the load and store are from the same memory generation.
// In that case, LastStore should keep its present value since we're
@@ -1272,17 +1505,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (MemInst.isValid() && MemInst.isStore()) {
// We do a trivial form of DSE if there are two stores to the same
// location with no intervening loads. Delete the earlier store.
- // At the moment, we don't remove ordered stores, but do remove
- // unordered atomic stores. There's no special requirement (for
- // unordered atomics) about removing atomic stores only in favor of
- // other atomic stores since we were going to execute the non-atomic
- // one anyway and the atomic one might never have become visible.
if (LastStore) {
- ParseMemoryInst LastStoreMemInst(LastStore, TTI);
- assert(LastStoreMemInst.isUnordered() &&
- !LastStoreMemInst.isVolatile() &&
- "Violated invariant");
- if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
+ if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
<< " due to: " << Inst << '\n');
if (!DebugCounter::shouldExecute(CSECounter)) {
@@ -1443,6 +1667,7 @@ public:
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
if (UseMemorySSA) {
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
}
@@ -1484,6 +1709,7 @@ INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
"Early CSE w/ MemorySSA", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 72512430b366..e54a270fb276 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -12,6 +12,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 83f4c402ed4d..b6d82685e884 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
index b16f8591b5a4..c6b6d75aefe8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -26,8 +26,8 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
@@ -36,6 +36,8 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -46,7 +48,6 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -98,21 +99,33 @@ STATISTIC(NumGVNSimpl, "Number of instructions simplified");
STATISTIC(NumGVNEqProp, "Number of equalities propagated");
STATISTIC(NumPRELoad, "Number of loads PRE'd");
+STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
+ "Number of blocks speculated as available in "
+ "IsValueFullyAvailableInBlock(), max");
+STATISTIC(MaxBBSpeculationCutoffReachedTimes,
+ "Number of times we we reached gvn-max-block-speculations cut-off "
+ "preventing further exploration");
+
static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden);
static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
cl::init(true));
+static cl::opt<bool>
+GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
+ cl::init(true));
static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
-// Maximum allowed recursion depth.
-static cl::opt<uint32_t>
-MaxRecurseDepth("gvn-max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
- cl::desc("Max recurse depth in GVN (default = 1000)"));
-
static cl::opt<uint32_t> MaxNumDeps(
"gvn-max-num-deps", cl::Hidden, cl::init(100), cl::ZeroOrMore,
cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
+// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
+static cl::opt<uint32_t> MaxBBSpeculations(
+ "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::ZeroOrMore,
+ cl::desc("Max number of blocks we're willing to speculate on (and recurse "
+ "into) when deducing if a value is fully available or not in GVN "
+ "(default = 600)"));
+
struct llvm::GVN::Expression {
uint32_t opcode;
bool commutative = false;
@@ -282,9 +295,9 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
if (I->isCommutative()) {
// Ensure that commutative instructions that only differ by a permutation
// of their operands get the same value number by sorting the operand value
- // numbers. Since all commutative instructions have two operands it is more
+ // numbers. Since commutative operands are the 1st two operands it is more
// efficient to sort by hand rather than using, say, std::sort.
- assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
+ assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
e.commutative = true;
@@ -353,9 +366,7 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
OI != OE; ++OI)
e.varargs.push_back(lookupOrAdd(*OI));
- for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
- II != IE; ++II)
- e.varargs.push_back(*II);
+ append_range(e.varargs, EI->indices());
return e;
}
@@ -399,9 +410,12 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
}
if (local_dep.isDef()) {
- CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
+ // For masked load/store intrinsics, the local_dep may actully be
+ // a normal load or store instruction.
+ CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
- if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ if (!local_cdep ||
+ local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
@@ -626,6 +640,11 @@ bool GVN::isLoadInLoopPREEnabled() const {
return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE);
}
+bool GVN::isLoadPRESplitBackedgeEnabled() const {
+ return Options.AllowLoadPRESplitBackedge.getValueOr(
+ GVNEnableSplitBackedgeInLoadPRE);
+}
+
bool GVN::isMemDepEnabled() const {
return Options.AllowMemDep.getValueOr(GVNEnableMemDep);
}
@@ -642,14 +661,18 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
auto *MemDep =
isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(F) : nullptr;
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
+ auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE);
+ bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
+ MSSA ? &MSSA->getMSSA() : nullptr);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<GlobalsAA>();
PA.preserve<TargetLibraryAnalysis>();
+ if (MSSA)
+ PA.preserve<MemorySSAAnalysis>();
if (LI)
PA.preserve<LoopAnalysis>();
return PA;
@@ -667,6 +690,18 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
}
#endif
+enum class AvailabilityState : char {
+ /// We know the block *is not* fully available. This is a fixpoint.
+ Unavailable = 0,
+ /// We know the block *is* fully available. This is a fixpoint.
+ Available = 1,
+ /// We do not know whether the block is fully available or not,
+ /// but we are currently speculating that it will be.
+ /// If it would have turned out that the block was, in fact, not fully
+ /// available, this would have been cleaned up into an Unavailable.
+ SpeculativelyAvailable = 2,
+};
+
/// Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
@@ -675,76 +710,118 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
/// 1) we know the block *is* fully available.
/// 2) we do not know whether the block is fully available or not, but we are
/// currently speculating that it will be.
-/// 3) we are speculating for this block and have used that to speculate for
-/// other blocks.
-static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
- DenseMap<BasicBlock*, char> &FullyAvailableBlocks,
- uint32_t RecurseDepth) {
- if (RecurseDepth > MaxRecurseDepth)
- return false;
-
- // Optimistically assume that the block is fully available and check to see
- // if we already know about this block in one lookup.
- std::pair<DenseMap<BasicBlock*, char>::iterator, bool> IV =
- FullyAvailableBlocks.insert(std::make_pair(BB, 2));
-
- // If the entry already existed for this block, return the precomputed value.
- if (!IV.second) {
- // If this is a speculative "available" value, mark it as being used for
- // speculation of other blocks.
- if (IV.first->second == 2)
- IV.first->second = 3;
- return IV.first->second != 0;
- }
-
- // Otherwise, see if it is fully available in all predecessors.
- pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-
- // If this block has no predecessors, it isn't live-in here.
- if (PI == PE)
- goto SpeculationFailure;
+static bool IsValueFullyAvailableInBlock(
+ BasicBlock *BB,
+ DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
+ SmallVector<BasicBlock *, 32> Worklist;
+ Optional<BasicBlock *> UnavailableBB;
+
+ // The number of times we didn't find an entry for a block in a map and
+ // optimistically inserted an entry marking block as speculatively available.
+ unsigned NumNewNewSpeculativelyAvailableBBs = 0;
+
+#ifndef NDEBUG
+ SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
+ SmallVector<BasicBlock *, 32> AvailableBBs;
+#endif
- for (; PI != PE; ++PI)
- // If the value isn't fully available in one of our predecessors, then it
- // isn't fully available in this block either. Undo our previous
- // optimistic assumption and bail out.
- if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1))
- goto SpeculationFailure;
+ Worklist.emplace_back(BB);
+ while (!Worklist.empty()) {
+ BasicBlock *CurrBB = Worklist.pop_back_val(); // LIFO - depth-first!
+ // Optimistically assume that the block is Speculatively Available and check
+ // to see if we already know about this block in one lookup.
+ std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
+ FullyAvailableBlocks.try_emplace(
+ CurrBB, AvailabilityState::SpeculativelyAvailable);
+ AvailabilityState &State = IV.first->second;
+
+ // Did the entry already exist for this block?
+ if (!IV.second) {
+ if (State == AvailabilityState::Unavailable) {
+ UnavailableBB = CurrBB;
+ break; // Backpropagate unavailability info.
+ }
- return true;
+#ifndef NDEBUG
+ AvailableBBs.emplace_back(CurrBB);
+#endif
+ continue; // Don't recurse further, but continue processing worklist.
+ }
-// If we get here, we found out that this is not, after
-// all, a fully-available block. We have a problem if we speculated on this and
-// used the speculation to mark other blocks as available.
-SpeculationFailure:
- char &BBVal = FullyAvailableBlocks[BB];
+ // No entry found for block.
+ ++NumNewNewSpeculativelyAvailableBBs;
+ bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
+
+ // If we have exhausted our budget, mark this block as unavailable.
+ // Also, if this block has no predecessors, the value isn't live-in here.
+ if (OutOfBudget || pred_empty(CurrBB)) {
+ MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
+ State = AvailabilityState::Unavailable;
+ UnavailableBB = CurrBB;
+ break; // Backpropagate unavailability info.
+ }
- // If we didn't speculate on this, just return with it set to false.
- if (BBVal == 2) {
- BBVal = 0;
- return false;
+ // Tentatively consider this block as speculatively available.
+#ifndef NDEBUG
+ NewSpeculativelyAvailableBBs.insert(CurrBB);
+#endif
+ // And further recurse into block's predecessors, in depth-first order!
+ Worklist.append(pred_begin(CurrBB), pred_end(CurrBB));
}
- // If we did speculate on this value, we could have blocks set to 1 that are
- // incorrect. Walk the (transitive) successors of this block and mark them as
- // 0 if set to one.
- SmallVector<BasicBlock*, 32> BBWorklist;
- BBWorklist.push_back(BB);
-
- do {
- BasicBlock *Entry = BBWorklist.pop_back_val();
- // Note that this sets blocks to 0 (unavailable) if they happen to not
- // already be in FullyAvailableBlocks. This is safe.
- char &EntryVal = FullyAvailableBlocks[Entry];
- if (EntryVal == 0) continue; // Already unavailable.
-
- // Mark as unavailable.
- EntryVal = 0;
+#if LLVM_ENABLE_STATS
+ IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
+ NumNewNewSpeculativelyAvailableBBs);
+#endif
- BBWorklist.append(succ_begin(Entry), succ_end(Entry));
- } while (!BBWorklist.empty());
+ // If the block isn't marked as fixpoint yet
+ // (the Unavailable and Available states are fixpoints)
+ auto MarkAsFixpointAndEnqueueSuccessors =
+ [&](BasicBlock *BB, AvailabilityState FixpointState) {
+ auto It = FullyAvailableBlocks.find(BB);
+ if (It == FullyAvailableBlocks.end())
+ return; // Never queried this block, leave as-is.
+ switch (AvailabilityState &State = It->second) {
+ case AvailabilityState::Unavailable:
+ case AvailabilityState::Available:
+ return; // Don't backpropagate further, continue processing worklist.
+ case AvailabilityState::SpeculativelyAvailable: // Fix it!
+ State = FixpointState;
+#ifndef NDEBUG
+ assert(NewSpeculativelyAvailableBBs.erase(BB) &&
+ "Found a speculatively available successor leftover?");
+#endif
+ // Queue successors for further processing.
+ Worklist.append(succ_begin(BB), succ_end(BB));
+ return;
+ }
+ };
+
+ if (UnavailableBB) {
+ // Okay, we have encountered an unavailable block.
+ // Mark speculatively available blocks reachable from UnavailableBB as
+ // unavailable as well. Paths are terminated when they reach blocks not in
+ // FullyAvailableBlocks or they are not marked as speculatively available.
+ Worklist.clear();
+ Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB));
+ while (!Worklist.empty())
+ MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
+ AvailabilityState::Unavailable);
+ }
+
+#ifndef NDEBUG
+ Worklist.clear();
+ for (BasicBlock *AvailableBB : AvailableBBs)
+ Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
+ while (!Worklist.empty())
+ MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
+ AvailabilityState::Available);
+
+ assert(NewSpeculativelyAvailableBBs.empty() &&
+ "Must have fixed all the new speculatively available blocks.");
+#endif
- return false;
+ return !UnavailableBB;
}
/// Given a set of loads specified by ValuesPerBlock,
@@ -963,7 +1040,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
- // different types if we have to. If the stored value is larger or equal to
+ // different types if we have to. If the stored value is convertable to
// the loaded value, we can reuse it.
if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(),
DL))
@@ -1062,7 +1139,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// backwards through predecessors if needed.
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
- bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI);
// Check that there is no implicit control flow instructions above our load in
// its block. If there is an instruction that doesn't always pass the
@@ -1079,8 +1155,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// because if the index is out of bounds we should deoptimize rather than
// access the array.
// Check that there is no guard in this block above our instruction.
- if (!IsSafeToSpeculativelyExecute && ICF->isDominatedByICFIFromSameBlock(LI))
- return false;
+ bool MustEnsureSafetyOfSpeculativeExecution =
+ ICF->isDominatedByICFIFromSameBlock(LI);
+
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1097,8 +1174,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
// Check that there is no implicit control flow in a block above.
- if (!IsSafeToSpeculativelyExecute && ICF->hasICF(TmpBB))
- return false;
+ MustEnsureSafetyOfSpeculativeExecution =
+ MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
}
assert(TmpBB);
@@ -1107,11 +1184,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check to see how many predecessors have the loaded value fully
// available.
MapVector<BasicBlock *, Value *> PredLoads;
- DenseMap<BasicBlock*, char> FullyAvailableBlocks;
+ DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
for (const AvailableValueInBlock &AV : ValuesPerBlock)
- FullyAvailableBlocks[AV.BB] = true;
+ FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
for (BasicBlock *UnavailableBB : UnavailableBlocks)
- FullyAvailableBlocks[UnavailableBB] = false;
+ FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
SmallVector<BasicBlock *, 4> CriticalEdgePred;
for (BasicBlock *Pred : predecessors(LoadBB)) {
@@ -1124,7 +1201,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
+ if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
continue;
}
@@ -1151,6 +1228,16 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
+ // Do not split backedge as it will break the canonical loop form.
+ if (!isLoadPRESplitBackedgeEnabled())
+ if (DT->dominates(LoadBB, Pred)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
CriticalEdgePred.push_back(Pred);
} else {
// Only add the predecessors that will not be split for now.
@@ -1170,6 +1257,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (NumUnavailablePreds != 1)
return false;
+ // Now we know where we will insert load. We must ensure that it is safe
+ // to speculatively execute the load at that points.
+ if (MustEnsureSafetyOfSpeculativeExecution) {
+ if (CriticalEdgePred.size())
+ if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT))
+ return false;
+ for (auto &PL : PredLoads)
+ if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT))
+ return false;
+ }
+
// Split critical edges, and update the unavailable predecessors accordingly.
for (BasicBlock *OrigPred : CriticalEdgePred) {
BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
@@ -1251,8 +1349,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Instructions that have been inserted in predecessor(s) to materialize
// the load address do not retain their original debug locations. Doing
// so could lead to confusing (but correct) source attributions.
- if (const DebugLoc &DL = I->getDebugLoc())
- I->setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
+ I->updateLocationAfterHoist();
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
@@ -1270,6 +1367,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
LI->getAlign(), LI->getOrdering(), LI->getSyncScopeID(),
UnavailablePred->getTerminator());
NewLoad->setDebugLoc(LI->getDebugLoc());
+ if (MSSAU) {
+ auto *MSSA = MSSAU->getMemorySSA();
+ // Get the defining access of the original load or use the load if it is a
+ // MemoryDef (e.g. because it is volatile). The inserted loads are
+ // guaranteed to load from the same definition.
+ auto *LIAcc = MSSA->getMemoryAccess(LI);
+ auto *DefiningAcc =
+ isa<MemoryDef>(LIAcc) ? LIAcc : LIAcc->getDefiningAccess();
+ auto *NewAccess = MSSAU->createMemoryAccessInBB(
+ NewLoad, DefiningAcc, NewLoad->getParent(),
+ MemorySSA::BeforeTerminator);
+ if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
+ MSSAU->insertDef(NewDef, /*RenameUses=*/true);
+ else
+ MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true);
+ }
// Transfer the old load's AA tags to the new load.
AAMDNodes Tags;
@@ -1357,13 +1470,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return false;
}
+ bool Changed = false;
// If this load follows a GEP, see if we can PRE the indices before analyzing.
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) {
for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
OE = GEP->idx_end();
OI != OE; ++OI)
if (Instruction *I = dyn_cast<Instruction>(OI->get()))
- performScalarPRE(I);
+ Changed |= performScalarPRE(I);
}
// Step 2: Analyze the availability of the load
@@ -1374,7 +1488,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If we have no predecessors that produce a known value for this load, exit
// early.
if (ValuesPerBlock.empty())
- return false;
+ return Changed;
// Step 3: Eliminate fully redundancy.
//
@@ -1406,12 +1520,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// Step 4: Eliminate partial redundancy.
if (!isPREEnabled() || !isLoadPREEnabled())
- return false;
+ return Changed;
if (!isLoadInLoopPREEnabled() && this->LI &&
this->LI->getLoopFor(LI->getParent()))
- return false;
+ return Changed;
- return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+ return Changed || PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
}
static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
@@ -1486,9 +1600,40 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
// Insert a new store to null instruction before the load to indicate that
// this code is not reachable. FIXME: We could insert unreachable
// instruction directly because we can modify the CFG.
- new StoreInst(UndefValue::get(Int8Ty),
- Constant::getNullValue(Int8Ty->getPointerTo()),
- IntrinsicI);
+ auto *NewS = new StoreInst(UndefValue::get(Int8Ty),
+ Constant::getNullValue(Int8Ty->getPointerTo()),
+ IntrinsicI);
+ if (MSSAU) {
+ const MemoryUseOrDef *FirstNonDom = nullptr;
+ const auto *AL =
+ MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent());
+
+ // If there are accesses in the current basic block, find the first one
+ // that does not come before NewS. The new memory access is inserted
+ // after the found access or before the terminator if no such access is
+ // found.
+ if (AL) {
+ for (auto &Acc : *AL) {
+ if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc))
+ if (!Current->getMemoryInst()->comesBefore(NewS)) {
+ FirstNonDom = Current;
+ break;
+ }
+ }
+ }
+
+ // This added store is to null, so it will never executed and we can
+ // just use the LiveOnEntry def as defining access.
+ auto *NewDef =
+ FirstNonDom ? MSSAU->createMemoryAccessBefore(
+ NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(),
+ const_cast<MemoryUseOrDef *>(FirstNonDom))
+ : MSSAU->createMemoryAccessInBB(
+ NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(),
+ NewS->getParent(), MemorySSA::BeforeTerminator);
+
+ MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
+ }
}
if (isAssumeWithEmptyBundle(*IntrinsicI))
markInstructionForDeletion(IntrinsicI);
@@ -1516,6 +1661,11 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
// br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
ReplaceOperandsWithMap[V] = True;
+ // Similarly, after assume(!NotV) we know that NotV == false.
+ Value *NotV;
+ if (match(V, m_Not(m_Value(NotV))))
+ ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext());
+
// If we find an equality fact, canonicalize all dominated uses in this block
// to one of the two values. We heuristically choice the "oldest" of the
// two where age is determined by value number. (Note that propagateEquality
@@ -1622,6 +1772,8 @@ bool GVN::processLoad(LoadInst *L) {
// Replace the load!
patchAndReplaceAllUsesWith(L, AvailableValue);
markInstructionForDeletion(L);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(L);
++NumGVNLoad;
reportLoadElim(L, AvailableValue, ORE);
// Tell MDA to rexamine the reused pointer since we might have more
@@ -1743,7 +1895,7 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
}
if (Exp.commutative) {
- assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
+ assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!");
if (Exp.varargs[0] > Exp.varargs[1]) {
std::swap(Exp.varargs[0], Exp.varargs[1]);
uint32_t Opcode = Exp.opcode >> 8;
@@ -1766,11 +1918,8 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
/// again.
void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
const BasicBlock &CurrBlock) {
- for (const BasicBlock *Pred : predecessors(&CurrBlock)) {
- auto FindRes = PhiTranslateTable.find({Num, Pred});
- if (FindRes != PhiTranslateTable.end())
- PhiTranslateTable.erase(FindRes);
- }
+ for (const BasicBlock *Pred : predecessors(&CurrBlock))
+ PhiTranslateTable.erase({Num, Pred});
}
// In order to find a leader for a given value number at a
@@ -1934,8 +2083,8 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
// If "A && B" is known true then both A and B are known true. If "A || B"
// is known false then both A and B are known false.
Value *A, *B;
- if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
- (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
+ if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) ||
+ (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) {
Worklist.push_back(std::make_pair(A, RHS));
Worklist.push_back(std::make_pair(B, RHS));
continue;
@@ -2137,7 +2286,7 @@ bool GVN::processInstruction(Instruction *I) {
bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
const TargetLibraryInfo &RunTLI, AAResults &RunAA,
MemoryDependenceResults *RunMD, LoopInfo *LI,
- OptimizationRemarkEmitter *RunORE) {
+ OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
AC = &RunAC;
DT = &RunDT;
VN.setDomTree(DT);
@@ -2150,6 +2299,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
VN.setMemDep(MD);
ORE = RunORE;
InvalidBlockRPONumbers = true;
+ MemorySSAUpdater Updater(MSSA);
+ MSSAU = MSSA ? &Updater : nullptr;
bool Changed = false;
bool ShouldContinue = true;
@@ -2160,7 +2311,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
BasicBlock *BB = &*FI++;
- bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, nullptr, MD);
+ bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD);
if (removedBlock)
++NumGVNBlocks;
@@ -2196,6 +2347,9 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// iteration.
DeadBlocks.clear();
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
return Changed;
}
@@ -2236,6 +2390,8 @@ bool GVN::processBlock(BasicBlock *BB) {
salvageKnowledge(I, AC);
salvageDebugInfo(*I);
if (MD) MD->removeInstruction(I);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
LLVM_DEBUG(verifyRemoved(I));
ICF->removeInstruction(I);
I->eraseFromParent();
@@ -2323,10 +2479,14 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
if (isa<GetElementPtrInst>(CurInst))
return false;
- // We don't currently value number ANY inline asm calls.
- if (auto *CallB = dyn_cast<CallBase>(CurInst))
+ if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
+ // We don't currently value number ANY inline asm calls.
if (CallB->isInlineAsm())
return false;
+ // Don't do PRE on convergent calls.
+ if (CallB->isConvergent())
+ return false;
+ }
uint32_t ValNo = VN.lookup(CurInst);
@@ -2466,6 +2626,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
if (MD)
MD->removeInstruction(CurInst);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(CurInst);
LLVM_DEBUG(verifyRemoved(CurInst));
// FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
// some assertion failures.
@@ -2510,10 +2672,12 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
// possible.
BasicBlock *BB = SplitCriticalEdge(
Pred, Succ,
- CriticalEdgeSplittingOptions(DT, LI).unsetPreserveLoopSimplify());
- if (MD)
- MD->invalidateCachedPredecessors();
- InvalidBlockRPONumbers = true;
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
+ if (BB) {
+ if (MD)
+ MD->invalidateCachedPredecessors();
+ InvalidBlockRPONumbers = true;
+ }
return BB;
}
@@ -2522,14 +2686,20 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
bool GVN::splitCriticalEdges() {
if (toSplit.empty())
return false;
+
+ bool Changed = false;
do {
std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val();
- SplitCriticalEdge(Edge.first, Edge.second,
- CriticalEdgeSplittingOptions(DT, LI));
+ Changed |= SplitCriticalEdge(Edge.first, Edge.second,
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
+ nullptr;
} while (!toSplit.empty());
- if (MD) MD->invalidateCachedPredecessors();
- InvalidBlockRPONumbers = true;
- return true;
+ if (Changed) {
+ if (MD)
+ MD->invalidateCachedPredecessors();
+ InvalidBlockRPONumbers = true;
+ }
+ return Changed;
}
/// Executes one iteration of GVN
@@ -2633,13 +2803,12 @@ void GVN::addDeadBlock(BasicBlock *BB) {
// First, split the critical edges. This might also create additional blocks
// to preserve LoopSimplify form and adjust edges accordingly.
- SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
+ SmallVector<BasicBlock *, 4> Preds(predecessors(B));
for (BasicBlock *P : Preds) {
if (!DeadBlocks.count(P))
continue;
- if (llvm::any_of(successors(P),
- [B](BasicBlock *Succ) { return Succ == B; }) &&
+ if (llvm::is_contained(successors(P), B) &&
isCriticalEdge(P->getTerminator(), B)) {
if (BasicBlock *S = splitCriticalEdges(P, B))
DeadBlocks.insert(P = S);
@@ -2724,6 +2893,7 @@ public:
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
return Impl.runImpl(
F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
@@ -2733,7 +2903,8 @@ public:
? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
: nullptr,
LIWP ? &LIWP->getLoopInfo() : nullptr,
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
+ MSSAWP ? &MSSAWP->getMSSA() : nullptr);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -2744,12 +2915,12 @@ public:
if (Impl.isMemDepEnabled())
AU.addRequired<MemoryDependenceWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
-
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<TargetLibraryInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
private:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 9c4cdf2feb56..8d0bd5674964 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -149,8 +149,8 @@ struct CHIArg {
// The instruction (VN) which uses the values flowing out of CHI.
Instruction *I;
- bool operator==(const CHIArg &A) { return VN == A.VN; }
- bool operator!=(const CHIArg &A) { return !(*this == A); }
+ bool operator==(const CHIArg &A) const { return VN == A.VN; }
+ bool operator!=(const CHIArg &A) const { return !(*this == A); }
};
using CHIIt = SmallVectorImpl<CHIArg>::iterator;
@@ -242,11 +242,14 @@ public:
};
static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
- static const unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group, LLVMContext::MD_access_group};
+ static const unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_range,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group,
+ LLVMContext::MD_access_group};
combineMetadata(ReplInst, I, KnownIDs, true);
}
@@ -260,43 +263,7 @@ public:
: DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA),
MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
- bool run(Function &F) {
- NumFuncArgs = F.arg_size();
- VN.setDomTree(DT);
- VN.setAliasAnalysis(AA);
- VN.setMemDep(MD);
- bool Res = false;
- // Perform DFS Numbering of instructions.
- unsigned BBI = 0;
- for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) {
- DFSNumber[BB] = ++BBI;
- unsigned I = 0;
- for (auto &Inst : *BB)
- DFSNumber[&Inst] = ++I;
- }
-
- int ChainLength = 0;
-
- // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
- while (true) {
- if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
- return Res;
-
- auto HoistStat = hoistExpressions(F);
- if (HoistStat.first + HoistStat.second == 0)
- return Res;
-
- if (HoistStat.second > 0)
- // To address a limitation of the current GVN, we need to rerun the
- // hoisting after we hoisted loads or stores in order to be able to
- // hoist all scalars dependent on the hoisted ld/st.
- VN.clear();
-
- Res = true;
- }
-
- return Res;
- }
+ bool run(Function &F);
// Copied from NewGVN.cpp
// This function provides global ranking of operations so that we can place
@@ -304,27 +271,7 @@ public:
// for a complete ordering, as constants all have the same rank. However,
// generally, we will simplify an operation with all constants so that it
// doesn't matter what order they appear in.
- unsigned int rank(const Value *V) const {
- // Prefer constants to undef to anything else
- // Undef is a constant, have to check it first.
- // Prefer smaller constants to constantexprs
- if (isa<ConstantExpr>(V))
- return 2;
- if (isa<UndefValue>(V))
- return 1;
- if (isa<Constant>(V))
- return 0;
- else if (auto *A = dyn_cast<Argument>(V))
- return 3 + A->getArgNo();
-
- // Need to shift the instruction DFS by number of arguments + 3 to account
- // for the constant and argument ranking above.
- auto Result = DFSNumber.lookup(V);
- if (Result > 0)
- return 4 + NumFuncArgs + Result;
- // Unreachable or something else, just return a really large number.
- return ~0;
- }
+ unsigned int rank(const Value *V) const;
private:
GVN::ValueTable VN;
@@ -344,33 +291,7 @@ private:
enum InsKind { Unknown, Scalar, Load, Store };
// Return true when there are exception handling in BB.
- bool hasEH(const BasicBlock *BB) {
- auto It = BBSideEffects.find(BB);
- if (It != BBSideEffects.end())
- return It->second;
-
- if (BB->isEHPad() || BB->hasAddressTaken()) {
- BBSideEffects[BB] = true;
- return true;
- }
-
- if (BB->getTerminator()->mayThrow()) {
- BBSideEffects[BB] = true;
- return true;
- }
-
- BBSideEffects[BB] = false;
- return false;
- }
-
- // Return true when a successor of BB dominates A.
- bool successorDominate(const BasicBlock *BB, const BasicBlock *A) {
- for (const BasicBlock *Succ : successors(BB))
- if (DT->dominates(Succ, A))
- return true;
-
- return false;
- }
+ bool hasEH(const BasicBlock *BB);
// Return true when I1 appears before I2 in the instructions of BB.
bool firstInBB(const Instruction *I1, const Instruction *I2) {
@@ -383,57 +304,10 @@ private:
// Return true when there are memory uses of Def in BB.
bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
- const BasicBlock *BB) {
- const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
- if (!Acc)
- return false;
-
- Instruction *OldPt = Def->getMemoryInst();
- const BasicBlock *OldBB = OldPt->getParent();
- const BasicBlock *NewBB = NewPt->getParent();
- bool ReachedNewPt = false;
-
- for (const MemoryAccess &MA : *Acc)
- if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
- Instruction *Insn = MU->getMemoryInst();
-
- // Do not check whether MU aliases Def when MU occurs after OldPt.
- if (BB == OldBB && firstInBB(OldPt, Insn))
- break;
-
- // Do not check whether MU aliases Def when MU occurs before NewPt.
- if (BB == NewBB) {
- if (!ReachedNewPt) {
- if (firstInBB(Insn, NewPt))
- continue;
- ReachedNewPt = true;
- }
- }
- if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
- return true;
- }
-
- return false;
- }
+ const BasicBlock *BB);
bool hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
- int &NBBsOnAllPaths) {
- // Stop walk once the limit is reached.
- if (NBBsOnAllPaths == 0)
- return true;
-
- // Impossible to hoist with exceptions on the path.
- if (hasEH(BB))
- return true;
-
- // No such instruction after HoistBarrier in a basic block was
- // selected for hoisting so instructions selected within basic block with
- // a hoist barrier can be hoisted.
- if ((BB != SrcBB) && HoistBarrier.count(BB))
- return true;
-
- return false;
- }
+ int &NBBsOnAllPaths);
// Return true when there are exception handling or loads of memory Def
// between Def and NewPt. This function is only called for stores: Def is
@@ -443,118 +317,19 @@ private:
// return true when the counter NBBsOnAllPaths reaces 0, except when it is
// initialized to -1 which is unlimited.
bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
- int &NBBsOnAllPaths) {
- const BasicBlock *NewBB = NewPt->getParent();
- const BasicBlock *OldBB = Def->getBlock();
- assert(DT->dominates(NewBB, OldBB) && "invalid path");
- assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
- "def does not dominate new hoisting point");
-
- // Walk all basic blocks reachable in depth-first iteration on the inverse
- // CFG from OldBB to NewBB. These blocks are all the blocks that may be
- // executed between the execution of NewBB and OldBB. Hoisting an expression
- // from OldBB into NewBB has to be safe on all execution paths.
- for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
- const BasicBlock *BB = *I;
- if (BB == NewBB) {
- // Stop traversal when reaching HoistPt.
- I.skipChildren();
- continue;
- }
-
- if (hasEHhelper(BB, OldBB, NBBsOnAllPaths))
- return true;
-
- // Check that we do not move a store past loads.
- if (hasMemoryUse(NewPt, Def, BB))
- return true;
-
- // -1 is unlimited number of blocks on all paths.
- if (NBBsOnAllPaths != -1)
- --NBBsOnAllPaths;
-
- ++I;
- }
-
- return false;
- }
+ int &NBBsOnAllPaths);
// Return true when there are exception handling between HoistPt and BB.
// Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
// return true when the counter NBBsOnAllPaths reaches 0, except when it is
// initialized to -1 which is unlimited.
bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
- int &NBBsOnAllPaths) {
- assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
-
- // Walk all basic blocks reachable in depth-first iteration on
- // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
- // blocks that may be executed between the execution of NewHoistPt and
- // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
- // on all execution paths.
- for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
- const BasicBlock *BB = *I;
- if (BB == HoistPt) {
- // Stop traversal when reaching NewHoistPt.
- I.skipChildren();
- continue;
- }
-
- if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths))
- return true;
-
- // -1 is unlimited number of blocks on all paths.
- if (NBBsOnAllPaths != -1)
- --NBBsOnAllPaths;
-
- ++I;
- }
-
- return false;
- }
+ int &NBBsOnAllPaths);
// Return true when it is safe to hoist a memory load or store U from OldPt
// to NewPt.
bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
- MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) {
- // In place hoisting is safe.
- if (NewPt == OldPt)
- return true;
-
- const BasicBlock *NewBB = NewPt->getParent();
- const BasicBlock *OldBB = OldPt->getParent();
- const BasicBlock *UBB = U->getBlock();
-
- // Check for dependences on the Memory SSA.
- MemoryAccess *D = U->getDefiningAccess();
- BasicBlock *DBB = D->getBlock();
- if (DT->properlyDominates(NewBB, DBB))
- // Cannot move the load or store to NewBB above its definition in DBB.
- return false;
-
- if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
- if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
- if (!firstInBB(UD->getMemoryInst(), NewPt))
- // Cannot move the load or store to NewPt above its definition in D.
- return false;
-
- // Check for unsafe hoistings due to side effects.
- if (K == InsKind::Store) {
- if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths))
- return false;
- } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
- return false;
-
- if (UBB == NewBB) {
- if (DT->properlyDominates(DBB, NewBB))
- return true;
- assert(UBB == DBB);
- assert(MSSA->locallyDominates(D, U));
- }
-
- // No side effects: it is safe to hoist.
- return true;
- }
+ MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths);
// Return true when it is safe to hoist scalar instructions from all blocks in
// WL to HoistBB.
@@ -577,92 +352,21 @@ private:
// Returns the edge via which an instruction in BB will get the values from.
// Returns true when the values are flowing out to each edge.
- bool valueAnticipable(CHIArgs C, Instruction *TI) const {
- if (TI->getNumSuccessors() > (unsigned)size(C))
- return false; // Not enough args in this CHI.
-
- for (auto CHI : C) {
- BasicBlock *Dest = CHI.Dest;
- // Find if all the edges have values flowing out of BB.
- bool Found = llvm::any_of(
- successors(TI), [Dest](const BasicBlock *BB) { return BB == Dest; });
- if (!Found)
- return false;
- }
- return true;
- }
+ bool valueAnticipable(CHIArgs C, Instruction *TI) const;
// Check if it is safe to hoist values tracked by CHI in the range
// [Begin, End) and accumulate them in Safe.
void checkSafety(CHIArgs C, BasicBlock *BB, InsKind K,
- SmallVectorImpl<CHIArg> &Safe) {
- int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
- for (auto CHI : C) {
- Instruction *Insn = CHI.I;
- if (!Insn) // No instruction was inserted in this CHI.
- continue;
- if (K == InsKind::Scalar) {
- if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths))
- Safe.push_back(CHI);
- } else {
- MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn);
- if (safeToHoistLdSt(BB->getTerminator(), Insn, UD, K, NumBBsOnAllPaths))
- Safe.push_back(CHI);
- }
- }
- }
+ SmallVectorImpl<CHIArg> &Safe);
using RenameStackType = DenseMap<VNType, SmallVector<Instruction *, 2>>;
// Push all the VNs corresponding to BB into RenameStack.
void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
- RenameStackType &RenameStack) {
- auto it1 = ValueBBs.find(BB);
- if (it1 != ValueBBs.end()) {
- // Iterate in reverse order to keep lower ranked values on the top.
- for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
- // Get the value of instruction I
- LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
- RenameStack[VI.first].push_back(VI.second);
- }
- }
- }
+ RenameStackType &RenameStack);
void fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
- RenameStackType &RenameStack) {
- // For each *predecessor* (because Post-DOM) of BB check if it has a CHI
- for (auto Pred : predecessors(BB)) {
- auto P = CHIBBs.find(Pred);
- if (P == CHIBBs.end()) {
- continue;
- }
- LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName(););
- // A CHI is found (BB -> Pred is an edge in the CFG)
- // Pop the stack until Top(V) = Ve.
- auto &VCHI = P->second;
- for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) {
- CHIArg &C = *It;
- if (!C.Dest) {
- auto si = RenameStack.find(C.VN);
- // The Basic Block where CHI is must dominate the value we want to
- // track in a CHI. In the PDom walk, there can be values in the
- // stack which are not control dependent e.g., nested loop.
- if (si != RenameStack.end() && si->second.size() &&
- DT->properlyDominates(Pred, si->second.back()->getParent())) {
- C.Dest = BB; // Assign the edge
- C.I = si->second.pop_back_val(); // Assign the argument
- LLVM_DEBUG(dbgs()
- << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I
- << ", VN: " << C.VN.first << ", " << C.VN.second);
- }
- // Move to next CHI of a different value
- It = std::find_if(It, VCHI.end(),
- [It](CHIArg &A) { return A != *It; });
- } else
- ++It;
- }
- }
- }
+ RenameStackType &RenameStack);
// Walk the post-dominator tree top-down and use a stack for each value to
// store the last value you see. When you hit a CHI from a given edge, the
@@ -692,48 +396,7 @@ private:
// they form a list of anticipable values. OutValues contains CHIs
// corresponding to each basic block.
void findHoistableCandidates(OutValuesType &CHIBBs, InsKind K,
- HoistingPointList &HPL) {
- auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; };
-
- // CHIArgs now have the outgoing values, so check for anticipability and
- // accumulate hoistable candidates in HPL.
- for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) {
- BasicBlock *BB = A.first;
- SmallVectorImpl<CHIArg> &CHIs = A.second;
- // Vector of PHIs contains PHIs for different instructions.
- // Sort the args according to their VNs, such that identical
- // instructions are together.
- llvm::stable_sort(CHIs, cmpVN);
- auto TI = BB->getTerminator();
- auto B = CHIs.begin();
- // [PreIt, PHIIt) form a range of CHIs which have identical VNs.
- auto PHIIt = std::find_if(CHIs.begin(), CHIs.end(),
- [B](CHIArg &A) { return A != *B; });
- auto PrevIt = CHIs.begin();
- while (PrevIt != PHIIt) {
- // Collect values which satisfy safety checks.
- SmallVector<CHIArg, 2> Safe;
- // We check for safety first because there might be multiple values in
- // the same path, some of which are not safe to be hoisted, but overall
- // each edge has at least one value which can be hoisted, making the
- // value anticipable along that path.
- checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe);
-
- // List of safe values should be anticipable at TI.
- if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) {
- HPL.push_back({BB, SmallVecInsn()});
- SmallVecInsn &V = HPL.back().second;
- for (auto B : Safe)
- V.push_back(B.I);
- }
-
- // Check other VNs
- PrevIt = PHIIt;
- PHIIt = std::find_if(PrevIt, CHIs.end(),
- [PrevIt](CHIArg &A) { return A != *PrevIt; });
- }
- }
- }
+ HoistingPointList &HPL);
// Compute insertion points for each values which can be fully anticipated at
// a dominator. HPL contains all such values.
@@ -791,14 +454,14 @@ private:
}
// Insert empty CHI node for this VN. This is used to factor out
// basic blocks where the ANTIC can potentially change.
- for (auto IDFB : IDFBlocks) {
+ CHIArg EmptyChi = {VN, nullptr, nullptr};
+ for (auto *IDFBB : IDFBlocks) {
for (unsigned i = 0; i < V.size(); ++i) {
- CHIArg C = {VN, nullptr, nullptr};
- // Ignore spurious PDFs.
- if (DT->properlyDominates(IDFB, V[i]->getParent())) {
- OutValue[IDFB].push_back(C);
- LLVM_DEBUG(dbgs() << "\nInsertion a CHI for BB: " << IDFB->getName()
- << ", for Insn: " << *V[i]);
+ // Ignore spurious PDFs.
+ if (DT->properlyDominates(IDFBB, V[i]->getParent())) {
+ OutValue[IDFBB].push_back(EmptyChi);
+ LLVM_DEBUG(dbgs() << "\nInserting a CHI for BB: "
+ << IDFBB->getName() << ", for Insn: " << *V[i]);
}
}
}
@@ -816,364 +479,754 @@ private:
// a load without hoisting its access function. So before hoisting any
// expression, make sure that all its operands are available at insert point.
bool allOperandsAvailable(const Instruction *I,
- const BasicBlock *HoistPt) const {
- for (const Use &Op : I->operands())
- if (const auto *Inst = dyn_cast<Instruction>(&Op))
- if (!DT->dominates(Inst->getParent(), HoistPt))
- return false;
-
- return true;
- }
+ const BasicBlock *HoistPt) const;
// Same as allOperandsAvailable with recursive check for GEP operands.
bool allGepOperandsAvailable(const Instruction *I,
- const BasicBlock *HoistPt) const {
- for (const Use &Op : I->operands())
- if (const auto *Inst = dyn_cast<Instruction>(&Op))
- if (!DT->dominates(Inst->getParent(), HoistPt)) {
- if (const GetElementPtrInst *GepOp =
- dyn_cast<GetElementPtrInst>(Inst)) {
- if (!allGepOperandsAvailable(GepOp, HoistPt))
- return false;
- // Gep is available if all operands of GepOp are available.
- } else {
- // Gep is not available if it has operands other than GEPs that are
- // defined in blocks not dominating HoistPt.
- return false;
- }
- }
- return true;
- }
+ const BasicBlock *HoistPt) const;
// Make all operands of the GEP available.
void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
const SmallVecInsn &InstructionsToHoist,
- Instruction *Gep) const {
- assert(allGepOperandsAvailable(Gep, HoistPt) &&
- "GEP operands not available");
-
- Instruction *ClonedGep = Gep->clone();
- for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
- // Check whether the operand is already available.
- if (DT->dominates(Op->getParent(), HoistPt))
- continue;
+ Instruction *Gep) const;
+
+ void updateAlignment(Instruction *I, Instruction *Repl);
+
+ // Remove all the instructions in Candidates and replace their usage with
+ // Repl. Returns the number of instructions removed.
+ unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl,
+ MemoryUseOrDef *NewMemAcc);
+
+ // Replace all Memory PHI usage with NewMemAcc.
+ void raMPHIuw(MemoryUseOrDef *NewMemAcc);
+
+ // Remove all other instructions and replace them with Repl.
+ unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl,
+ BasicBlock *DestBB, bool MoveAccess);
+
+ // In the case Repl is a load or a store, we make all their GEPs
+ // available: GEPs are not hoisted by default to avoid the address
+ // computations to be hoisted without the associated load or store.
+ bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+ const SmallVecInsn &InstructionsToHoist) const;
+
+ std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL);
+
+ // Hoist all expressions. Returns Number of scalars hoisted
+ // and number of non-scalars hoisted.
+ std::pair<unsigned, unsigned> hoistExpressions(Function &F);
+};
+
+class GVNHoistLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ GVNHoistLegacyPass() : FunctionPass(ID) {
+ initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+ auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+
+ GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA);
+ return G.run(F);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MemoryDependenceWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+
+bool GVNHoist::run(Function &F) {
+ NumFuncArgs = F.arg_size();
+ VN.setDomTree(DT);
+ VN.setAliasAnalysis(AA);
+ VN.setMemDep(MD);
+ bool Res = false;
+ // Perform DFS Numbering of instructions.
+ unsigned BBI = 0;
+ for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+ DFSNumber[BB] = ++BBI;
+ unsigned I = 0;
+ for (auto &Inst : *BB)
+ DFSNumber[&Inst] = ++I;
+ }
+
+ int ChainLength = 0;
+
+ // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
+ while (true) {
+ if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
+ return Res;
+
+ auto HoistStat = hoistExpressions(F);
+ if (HoistStat.first + HoistStat.second == 0)
+ return Res;
+
+ if (HoistStat.second > 0)
+ // To address a limitation of the current GVN, we need to rerun the
+ // hoisting after we hoisted loads or stores in order to be able to
+ // hoist all scalars dependent on the hoisted ld/st.
+ VN.clear();
+
+ Res = true;
+ }
+
+ return Res;
+}
+
+unsigned int GVNHoist::rank(const Value *V) const {
+ // Prefer constants to undef to anything else
+ // Undef is a constant, have to check it first.
+ // Prefer smaller constants to constantexprs
+ if (isa<ConstantExpr>(V))
+ return 2;
+ if (isa<UndefValue>(V))
+ return 1;
+ if (isa<Constant>(V))
+ return 0;
+ else if (auto *A = dyn_cast<Argument>(V))
+ return 3 + A->getArgNo();
+
+ // Need to shift the instruction DFS by number of arguments + 3 to account
+ // for the constant and argument ranking above.
+ auto Result = DFSNumber.lookup(V);
+ if (Result > 0)
+ return 4 + NumFuncArgs + Result;
+ // Unreachable or something else, just return a really large number.
+ return ~0;
+}
+
+bool GVNHoist::hasEH(const BasicBlock *BB) {
+ auto It = BBSideEffects.find(BB);
+ if (It != BBSideEffects.end())
+ return It->second;
+
+ if (BB->isEHPad() || BB->hasAddressTaken()) {
+ BBSideEffects[BB] = true;
+ return true;
+ }
+
+ if (BB->getTerminator()->mayThrow()) {
+ BBSideEffects[BB] = true;
+ return true;
+ }
+
+ BBSideEffects[BB] = false;
+ return false;
+}
+
+bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
+ const BasicBlock *BB) {
+ const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
+ if (!Acc)
+ return false;
+
+ Instruction *OldPt = Def->getMemoryInst();
+ const BasicBlock *OldBB = OldPt->getParent();
+ const BasicBlock *NewBB = NewPt->getParent();
+ bool ReachedNewPt = false;
- // As a GEP can refer to other GEPs, recursively make all the operands
- // of this GEP available at HoistPt.
- if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
- makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
+ for (const MemoryAccess &MA : *Acc)
+ if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
+ Instruction *Insn = MU->getMemoryInst();
+
+ // Do not check whether MU aliases Def when MU occurs after OldPt.
+ if (BB == OldBB && firstInBB(OldPt, Insn))
+ break;
+
+ // Do not check whether MU aliases Def when MU occurs before NewPt.
+ if (BB == NewBB) {
+ if (!ReachedNewPt) {
+ if (firstInBB(Insn, NewPt))
+ continue;
+ ReachedNewPt = true;
+ }
}
+ if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
+ return true;
+ }
+
+ return false;
+}
+
+bool GVNHoist::hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
+ int &NBBsOnAllPaths) {
+ // Stop walk once the limit is reached.
+ if (NBBsOnAllPaths == 0)
+ return true;
+
+ // Impossible to hoist with exceptions on the path.
+ if (hasEH(BB))
+ return true;
+
+ // No such instruction after HoistBarrier in a basic block was
+ // selected for hoisting so instructions selected within basic block with
+ // a hoist barrier can be hoisted.
+ if ((BB != SrcBB) && HoistBarrier.count(BB))
+ return true;
+
+ return false;
+}
- // Copy Gep and replace its uses in Repl with ClonedGep.
- ClonedGep->insertBefore(HoistPt->getTerminator());
-
- // Conservatively discard any optimization hints, they may differ on the
- // other paths.
- ClonedGep->dropUnknownNonDebugMetadata();
-
- // If we have optimization hints which agree with each other along different
- // paths, preserve them.
- for (const Instruction *OtherInst : InstructionsToHoist) {
- const GetElementPtrInst *OtherGep;
- if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
- OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
- else
- OtherGep = cast<GetElementPtrInst>(
- cast<StoreInst>(OtherInst)->getPointerOperand());
- ClonedGep->andIRFlags(OtherGep);
+bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
+ int &NBBsOnAllPaths) {
+ const BasicBlock *NewBB = NewPt->getParent();
+ const BasicBlock *OldBB = Def->getBlock();
+ assert(DT->dominates(NewBB, OldBB) && "invalid path");
+ assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
+ "def does not dominate new hoisting point");
+
+ // Walk all basic blocks reachable in depth-first iteration on the inverse
+ // CFG from OldBB to NewBB. These blocks are all the blocks that may be
+ // executed between the execution of NewBB and OldBB. Hoisting an expression
+ // from OldBB into NewBB has to be safe on all execution paths.
+ for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
+ const BasicBlock *BB = *I;
+ if (BB == NewBB) {
+ // Stop traversal when reaching HoistPt.
+ I.skipChildren();
+ continue;
}
- // Replace uses of Gep with ClonedGep in Repl.
- Repl->replaceUsesOfWith(Gep, ClonedGep);
+ if (hasEHhelper(BB, OldBB, NBBsOnAllPaths))
+ return true;
+
+ // Check that we do not move a store past loads.
+ if (hasMemoryUse(NewPt, Def, BB))
+ return true;
+
+ // -1 is unlimited number of blocks on all paths.
+ if (NBBsOnAllPaths != -1)
+ --NBBsOnAllPaths;
+
+ ++I;
}
- void updateAlignment(Instruction *I, Instruction *Repl) {
- if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
- ReplacementLoad->setAlignment(
- std::min(ReplacementLoad->getAlign(), cast<LoadInst>(I)->getAlign()));
- ++NumLoadsRemoved;
- } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
- ReplacementStore->setAlignment(std::min(ReplacementStore->getAlign(),
- cast<StoreInst>(I)->getAlign()));
- ++NumStoresRemoved;
- } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
- ReplacementAlloca->setAlignment(std::max(
- ReplacementAlloca->getAlign(), cast<AllocaInst>(I)->getAlign()));
- } else if (isa<CallInst>(Repl)) {
- ++NumCallsRemoved;
+ return false;
+}
+
+bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
+ int &NBBsOnAllPaths) {
+ assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
+
+ // Walk all basic blocks reachable in depth-first iteration on
+ // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
+ // blocks that may be executed between the execution of NewHoistPt and
+ // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
+ // on all execution paths.
+ for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
+ const BasicBlock *BB = *I;
+ if (BB == HoistPt) {
+ // Stop traversal when reaching NewHoistPt.
+ I.skipChildren();
+ continue;
}
+
+ if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths))
+ return true;
+
+ // -1 is unlimited number of blocks on all paths.
+ if (NBBsOnAllPaths != -1)
+ --NBBsOnAllPaths;
+
+ ++I;
}
- // Remove all the instructions in Candidates and replace their usage with Repl.
- // Returns the number of instructions removed.
- unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl,
- MemoryUseOrDef *NewMemAcc) {
- unsigned NR = 0;
- for (Instruction *I : Candidates) {
- if (I != Repl) {
- ++NR;
- updateAlignment(I, Repl);
- if (NewMemAcc) {
- // Update the uses of the old MSSA access with NewMemAcc.
- MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
- OldMA->replaceAllUsesWith(NewMemAcc);
- MSSAUpdater->removeMemoryAccess(OldMA);
- }
+ return false;
+}
- Repl->andIRFlags(I);
- combineKnownMetadata(Repl, I);
- I->replaceAllUsesWith(Repl);
- // Also invalidate the Alias Analysis cache.
- MD->removeInstruction(I);
- I->eraseFromParent();
- }
+bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt,
+ const Instruction *OldPt, MemoryUseOrDef *U,
+ GVNHoist::InsKind K, int &NBBsOnAllPaths) {
+ // In place hoisting is safe.
+ if (NewPt == OldPt)
+ return true;
+
+ const BasicBlock *NewBB = NewPt->getParent();
+ const BasicBlock *OldBB = OldPt->getParent();
+ const BasicBlock *UBB = U->getBlock();
+
+ // Check for dependences on the Memory SSA.
+ MemoryAccess *D = U->getDefiningAccess();
+ BasicBlock *DBB = D->getBlock();
+ if (DT->properlyDominates(NewBB, DBB))
+ // Cannot move the load or store to NewBB above its definition in DBB.
+ return false;
+
+ if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
+ if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
+ if (!firstInBB(UD->getMemoryInst(), NewPt))
+ // Cannot move the load or store to NewPt above its definition in D.
+ return false;
+
+ // Check for unsafe hoistings due to side effects.
+ if (K == InsKind::Store) {
+ if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths))
+ return false;
+ } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
+ return false;
+
+ if (UBB == NewBB) {
+ if (DT->properlyDominates(DBB, NewBB))
+ return true;
+ assert(UBB == DBB);
+ assert(MSSA->locallyDominates(D, U));
+ }
+
+ // No side effects: it is safe to hoist.
+ return true;
+}
+
+bool GVNHoist::valueAnticipable(CHIArgs C, Instruction *TI) const {
+ if (TI->getNumSuccessors() > (unsigned)size(C))
+ return false; // Not enough args in this CHI.
+
+ for (auto CHI : C) {
+ // Find if all the edges have values flowing out of BB.
+ if (!llvm::is_contained(successors(TI), CHI.Dest))
+ return false;
+ }
+ return true;
+}
+
+void GVNHoist::checkSafety(CHIArgs C, BasicBlock *BB, GVNHoist::InsKind K,
+ SmallVectorImpl<CHIArg> &Safe) {
+ int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
+ for (auto CHI : C) {
+ Instruction *Insn = CHI.I;
+ if (!Insn) // No instruction was inserted in this CHI.
+ continue;
+ if (K == InsKind::Scalar) {
+ if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths))
+ Safe.push_back(CHI);
+ } else {
+ auto *T = BB->getTerminator();
+ if (MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn))
+ if (safeToHoistLdSt(T, Insn, UD, K, NumBBsOnAllPaths))
+ Safe.push_back(CHI);
}
- return NR;
}
+}
- // Replace all Memory PHI usage with NewMemAcc.
- void raMPHIuw(MemoryUseOrDef *NewMemAcc) {
- SmallPtrSet<MemoryPhi *, 4> UsePhis;
- for (User *U : NewMemAcc->users())
- if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
- UsePhis.insert(Phi);
-
- for (MemoryPhi *Phi : UsePhis) {
- auto In = Phi->incoming_values();
- if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
- Phi->replaceAllUsesWith(NewMemAcc);
- MSSAUpdater->removeMemoryAccess(Phi);
- }
+void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
+ GVNHoist::RenameStackType &RenameStack) {
+ auto it1 = ValueBBs.find(BB);
+ if (it1 != ValueBBs.end()) {
+ // Iterate in reverse order to keep lower ranked values on the top.
+ for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
+ // Get the value of instruction I
+ LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
+ RenameStack[VI.first].push_back(VI.second);
}
}
+}
- // Remove all other instructions and replace them with Repl.
- unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl,
- BasicBlock *DestBB, bool MoveAccess) {
- MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl);
- if (MoveAccess && NewMemAcc) {
- // The definition of this ld/st will not change: ld/st hoisting is
- // legal when the ld/st is not moved past its current definition.
- MSSAUpdater->moveToPlace(NewMemAcc, DestBB,
- MemorySSA::BeforeTerminator);
+void GVNHoist::fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
+ GVNHoist::RenameStackType &RenameStack) {
+ // For each *predecessor* (because Post-DOM) of BB check if it has a CHI
+ for (auto Pred : predecessors(BB)) {
+ auto P = CHIBBs.find(Pred);
+ if (P == CHIBBs.end()) {
+ continue;
}
+ LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName(););
+ // A CHI is found (BB -> Pred is an edge in the CFG)
+ // Pop the stack until Top(V) = Ve.
+ auto &VCHI = P->second;
+ for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) {
+ CHIArg &C = *It;
+ if (!C.Dest) {
+ auto si = RenameStack.find(C.VN);
+ // The Basic Block where CHI is must dominate the value we want to
+ // track in a CHI. In the PDom walk, there can be values in the
+ // stack which are not control dependent e.g., nested loop.
+ if (si != RenameStack.end() && si->second.size() &&
+ DT->properlyDominates(Pred, si->second.back()->getParent())) {
+ C.Dest = BB; // Assign the edge
+ C.I = si->second.pop_back_val(); // Assign the argument
+ LLVM_DEBUG(dbgs()
+ << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I
+ << ", VN: " << C.VN.first << ", " << C.VN.second);
+ }
+ // Move to next CHI of a different value
+ It = std::find_if(It, VCHI.end(), [It](CHIArg &A) { return A != *It; });
+ } else
+ ++It;
+ }
+ }
+}
- // Replace all other instructions with Repl with memory access NewMemAcc.
- unsigned NR = rauw(Candidates, Repl, NewMemAcc);
+void GVNHoist::findHoistableCandidates(OutValuesType &CHIBBs,
+ GVNHoist::InsKind K,
+ HoistingPointList &HPL) {
+ auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; };
+
+ // CHIArgs now have the outgoing values, so check for anticipability and
+ // accumulate hoistable candidates in HPL.
+ for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) {
+ BasicBlock *BB = A.first;
+ SmallVectorImpl<CHIArg> &CHIs = A.second;
+ // Vector of PHIs contains PHIs for different instructions.
+ // Sort the args according to their VNs, such that identical
+ // instructions are together.
+ llvm::stable_sort(CHIs, cmpVN);
+ auto TI = BB->getTerminator();
+ auto B = CHIs.begin();
+ // [PreIt, PHIIt) form a range of CHIs which have identical VNs.
+ auto PHIIt = llvm::find_if(CHIs, [B](CHIArg &A) { return A != *B; });
+ auto PrevIt = CHIs.begin();
+ while (PrevIt != PHIIt) {
+ // Collect values which satisfy safety checks.
+ SmallVector<CHIArg, 2> Safe;
+ // We check for safety first because there might be multiple values in
+ // the same path, some of which are not safe to be hoisted, but overall
+ // each edge has at least one value which can be hoisted, making the
+ // value anticipable along that path.
+ checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe);
+
+ // List of safe values should be anticipable at TI.
+ if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) {
+ HPL.push_back({BB, SmallVecInsn()});
+ SmallVecInsn &V = HPL.back().second;
+ for (auto B : Safe)
+ V.push_back(B.I);
+ }
- // Remove MemorySSA phi nodes with the same arguments.
- if (NewMemAcc)
- raMPHIuw(NewMemAcc);
- return NR;
+ // Check other VNs
+ PrevIt = PHIIt;
+ PHIIt = std::find_if(PrevIt, CHIs.end(),
+ [PrevIt](CHIArg &A) { return A != *PrevIt; });
+ }
}
+}
- // In the case Repl is a load or a store, we make all their GEPs
- // available: GEPs are not hoisted by default to avoid the address
- // computations to be hoisted without the associated load or store.
- bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
- const SmallVecInsn &InstructionsToHoist) const {
- // Check whether the GEP of a ld/st can be synthesized at HoistPt.
- GetElementPtrInst *Gep = nullptr;
- Instruction *Val = nullptr;
- if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
- Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
- } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
- Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
- Val = dyn_cast<Instruction>(St->getValueOperand());
- // Check that the stored value is available.
- if (Val) {
- if (isa<GetElementPtrInst>(Val)) {
- // Check whether we can compute the GEP at HoistPt.
- if (!allGepOperandsAvailable(Val, HoistPt))
+bool GVNHoist::allOperandsAvailable(const Instruction *I,
+ const BasicBlock *HoistPt) const {
+ for (const Use &Op : I->operands())
+ if (const auto *Inst = dyn_cast<Instruction>(&Op))
+ if (!DT->dominates(Inst->getParent(), HoistPt))
+ return false;
+
+ return true;
+}
+
+bool GVNHoist::allGepOperandsAvailable(const Instruction *I,
+ const BasicBlock *HoistPt) const {
+ for (const Use &Op : I->operands())
+ if (const auto *Inst = dyn_cast<Instruction>(&Op))
+ if (!DT->dominates(Inst->getParent(), HoistPt)) {
+ if (const GetElementPtrInst *GepOp =
+ dyn_cast<GetElementPtrInst>(Inst)) {
+ if (!allGepOperandsAvailable(GepOp, HoistPt))
return false;
- } else if (!DT->dominates(Val->getParent(), HoistPt))
+ // Gep is available if all operands of GepOp are available.
+ } else {
+ // Gep is not available if it has operands other than GEPs that are
+ // defined in blocks not dominating HoistPt.
return false;
+ }
}
- }
+ return true;
+}
- // Check whether we can compute the Gep at HoistPt.
- if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
- return false;
+void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+ const SmallVecInsn &InstructionsToHoist,
+ Instruction *Gep) const {
+ assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
- makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
+ Instruction *ClonedGep = Gep->clone();
+ for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
+ // Check whether the operand is already available.
+ if (DT->dominates(Op->getParent(), HoistPt))
+ continue;
- if (Val && isa<GetElementPtrInst>(Val))
- makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
+ // As a GEP can refer to other GEPs, recursively make all the operands
+ // of this GEP available at HoistPt.
+ if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
+ makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
+ }
- return true;
- }
+ // Copy Gep and replace its uses in Repl with ClonedGep.
+ ClonedGep->insertBefore(HoistPt->getTerminator());
- std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) {
- unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
- for (const HoistingPointInfo &HP : HPL) {
- // Find out whether we already have one of the instructions in HoistPt,
- // in which case we do not have to move it.
- BasicBlock *DestBB = HP.first;
- const SmallVecInsn &InstructionsToHoist = HP.second;
- Instruction *Repl = nullptr;
- for (Instruction *I : InstructionsToHoist)
- if (I->getParent() == DestBB)
- // If there are two instructions in HoistPt to be hoisted in place:
- // update Repl to be the first one, such that we can rename the uses
- // of the second based on the first.
- if (!Repl || firstInBB(I, Repl))
- Repl = I;
-
- // Keep track of whether we moved the instruction so we know whether we
- // should move the MemoryAccess.
- bool MoveAccess = true;
- if (Repl) {
- // Repl is already in HoistPt: it remains in place.
- assert(allOperandsAvailable(Repl, DestBB) &&
- "instruction depends on operands that are not available");
- MoveAccess = false;
- } else {
- // When we do not find Repl in HoistPt, select the first in the list
- // and move it to HoistPt.
- Repl = InstructionsToHoist.front();
-
- // We can move Repl in HoistPt only when all operands are available.
- // The order in which hoistings are done may influence the availability
- // of operands.
- if (!allOperandsAvailable(Repl, DestBB)) {
- // When HoistingGeps there is nothing more we can do to make the
- // operands available: just continue.
- if (HoistingGeps)
- continue;
+ // Conservatively discard any optimization hints, they may differ on the
+ // other paths.
+ ClonedGep->dropUnknownNonDebugMetadata();
- // When not HoistingGeps we need to copy the GEPs.
- if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist))
- continue;
- }
+ // If we have optimization hints which agree with each other along different
+ // paths, preserve them.
+ for (const Instruction *OtherInst : InstructionsToHoist) {
+ const GetElementPtrInst *OtherGep;
+ if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
+ OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
+ else
+ OtherGep = cast<GetElementPtrInst>(
+ cast<StoreInst>(OtherInst)->getPointerOperand());
+ ClonedGep->andIRFlags(OtherGep);
+ }
- // Move the instruction at the end of HoistPt.
- Instruction *Last = DestBB->getTerminator();
- MD->removeInstruction(Repl);
- Repl->moveBefore(Last);
+ // Replace uses of Gep with ClonedGep in Repl.
+ Repl->replaceUsesOfWith(Gep, ClonedGep);
+}
- DFSNumber[Repl] = DFSNumber[Last]++;
+void GVNHoist::updateAlignment(Instruction *I, Instruction *Repl) {
+ if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
+ ReplacementLoad->setAlignment(
+ std::min(ReplacementLoad->getAlign(), cast<LoadInst>(I)->getAlign()));
+ ++NumLoadsRemoved;
+ } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
+ ReplacementStore->setAlignment(
+ std::min(ReplacementStore->getAlign(), cast<StoreInst>(I)->getAlign()));
+ ++NumStoresRemoved;
+ } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
+ ReplacementAlloca->setAlignment(std::max(ReplacementAlloca->getAlign(),
+ cast<AllocaInst>(I)->getAlign()));
+ } else if (isa<CallInst>(Repl)) {
+ ++NumCallsRemoved;
+ }
+}
+
+unsigned GVNHoist::rauw(const SmallVecInsn &Candidates, Instruction *Repl,
+ MemoryUseOrDef *NewMemAcc) {
+ unsigned NR = 0;
+ for (Instruction *I : Candidates) {
+ if (I != Repl) {
+ ++NR;
+ updateAlignment(I, Repl);
+ if (NewMemAcc) {
+ // Update the uses of the old MSSA access with NewMemAcc.
+ MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
+ OldMA->replaceAllUsesWith(NewMemAcc);
+ MSSAUpdater->removeMemoryAccess(OldMA);
}
- NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
+ Repl->andIRFlags(I);
+ combineKnownMetadata(Repl, I);
+ I->replaceAllUsesWith(Repl);
+ // Also invalidate the Alias Analysis cache.
+ MD->removeInstruction(I);
+ I->eraseFromParent();
+ }
+ }
+ return NR;
+}
- if (isa<LoadInst>(Repl))
- ++NL;
- else if (isa<StoreInst>(Repl))
- ++NS;
- else if (isa<CallInst>(Repl))
- ++NC;
- else // Scalar
- ++NI;
+void GVNHoist::raMPHIuw(MemoryUseOrDef *NewMemAcc) {
+ SmallPtrSet<MemoryPhi *, 4> UsePhis;
+ for (User *U : NewMemAcc->users())
+ if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
+ UsePhis.insert(Phi);
+
+ for (MemoryPhi *Phi : UsePhis) {
+ auto In = Phi->incoming_values();
+ if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
+ Phi->replaceAllUsesWith(NewMemAcc);
+ MSSAUpdater->removeMemoryAccess(Phi);
}
+ }
+}
+
+unsigned GVNHoist::removeAndReplace(const SmallVecInsn &Candidates,
+ Instruction *Repl, BasicBlock *DestBB,
+ bool MoveAccess) {
+ MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl);
+ if (MoveAccess && NewMemAcc) {
+ // The definition of this ld/st will not change: ld/st hoisting is
+ // legal when the ld/st is not moved past its current definition.
+ MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::BeforeTerminator);
+ }
- if (MSSA && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
+ // Replace all other instructions with Repl with memory access NewMemAcc.
+ unsigned NR = rauw(Candidates, Repl, NewMemAcc);
- NumHoisted += NL + NS + NC + NI;
- NumRemoved += NR;
- NumLoadsHoisted += NL;
- NumStoresHoisted += NS;
- NumCallsHoisted += NC;
- return {NI, NL + NC + NS};
+ // Remove MemorySSA phi nodes with the same arguments.
+ if (NewMemAcc)
+ raMPHIuw(NewMemAcc);
+ return NR;
+}
+
+bool GVNHoist::makeGepOperandsAvailable(
+ Instruction *Repl, BasicBlock *HoistPt,
+ const SmallVecInsn &InstructionsToHoist) const {
+ // Check whether the GEP of a ld/st can be synthesized at HoistPt.
+ GetElementPtrInst *Gep = nullptr;
+ Instruction *Val = nullptr;
+ if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
+ Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
+ } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
+ Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
+ Val = dyn_cast<Instruction>(St->getValueOperand());
+ // Check that the stored value is available.
+ if (Val) {
+ if (isa<GetElementPtrInst>(Val)) {
+ // Check whether we can compute the GEP at HoistPt.
+ if (!allGepOperandsAvailable(Val, HoistPt))
+ return false;
+ } else if (!DT->dominates(Val->getParent(), HoistPt))
+ return false;
+ }
}
- // Hoist all expressions. Returns Number of scalars hoisted
- // and number of non-scalars hoisted.
- std::pair<unsigned, unsigned> hoistExpressions(Function &F) {
- InsnInfo II;
- LoadInfo LI;
- StoreInfo SI;
- CallInfo CI;
- for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
- int InstructionNb = 0;
- for (Instruction &I1 : *BB) {
- // If I1 cannot guarantee progress, subsequent instructions
- // in BB cannot be hoisted anyways.
- if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
- HoistBarrier.insert(BB);
- break;
- }
- // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
- // deeper may increase the register pressure and compilation time.
- if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
- break;
+ // Check whether we can compute the Gep at HoistPt.
+ if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
+ return false;
- // Do not value number terminator instructions.
- if (I1.isTerminator())
- break;
+ makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
- if (auto *Load = dyn_cast<LoadInst>(&I1))
- LI.insert(Load, VN);
- else if (auto *Store = dyn_cast<StoreInst>(&I1))
- SI.insert(Store, VN);
- else if (auto *Call = dyn_cast<CallInst>(&I1)) {
- if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
- if (isa<DbgInfoIntrinsic>(Intr) ||
- Intr->getIntrinsicID() == Intrinsic::assume ||
- Intr->getIntrinsicID() == Intrinsic::sideeffect)
- continue;
- }
- if (Call->mayHaveSideEffects())
- break;
-
- if (Call->isConvergent())
- break;
-
- CI.insert(Call, VN);
- } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
- // Do not hoist scalars past calls that may write to memory because
- // that could result in spills later. geps are handled separately.
- // TODO: We can relax this for targets like AArch64 as they have more
- // registers than X86.
- II.insert(&I1, VN);
+ if (Val && isa<GetElementPtrInst>(Val))
+ makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
+
+ return true;
+}
+
+std::pair<unsigned, unsigned> GVNHoist::hoist(HoistingPointList &HPL) {
+ unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
+ for (const HoistingPointInfo &HP : HPL) {
+ // Find out whether we already have one of the instructions in HoistPt,
+ // in which case we do not have to move it.
+ BasicBlock *DestBB = HP.first;
+ const SmallVecInsn &InstructionsToHoist = HP.second;
+ Instruction *Repl = nullptr;
+ for (Instruction *I : InstructionsToHoist)
+ if (I->getParent() == DestBB)
+ // If there are two instructions in HoistPt to be hoisted in place:
+ // update Repl to be the first one, such that we can rename the uses
+ // of the second based on the first.
+ if (!Repl || firstInBB(I, Repl))
+ Repl = I;
+
+ // Keep track of whether we moved the instruction so we know whether we
+ // should move the MemoryAccess.
+ bool MoveAccess = true;
+ if (Repl) {
+ // Repl is already in HoistPt: it remains in place.
+ assert(allOperandsAvailable(Repl, DestBB) &&
+ "instruction depends on operands that are not available");
+ MoveAccess = false;
+ } else {
+ // When we do not find Repl in HoistPt, select the first in the list
+ // and move it to HoistPt.
+ Repl = InstructionsToHoist.front();
+
+ // We can move Repl in HoistPt only when all operands are available.
+ // The order in which hoistings are done may influence the availability
+ // of operands.
+ if (!allOperandsAvailable(Repl, DestBB)) {
+ // When HoistingGeps there is nothing more we can do to make the
+ // operands available: just continue.
+ if (HoistingGeps)
+ continue;
+
+ // When not HoistingGeps we need to copy the GEPs.
+ if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist))
+ continue;
}
+
+ // Move the instruction at the end of HoistPt.
+ Instruction *Last = DestBB->getTerminator();
+ MD->removeInstruction(Repl);
+ Repl->moveBefore(Last);
+
+ DFSNumber[Repl] = DFSNumber[Last]++;
}
- HoistingPointList HPL;
- computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
- computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
- computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
- computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
- computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
- computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
- return hoist(HPL);
+ NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
+
+ if (isa<LoadInst>(Repl))
+ ++NL;
+ else if (isa<StoreInst>(Repl))
+ ++NS;
+ else if (isa<CallInst>(Repl))
+ ++NC;
+ else // Scalar
+ ++NI;
}
-};
-class GVNHoistLegacyPass : public FunctionPass {
-public:
- static char ID;
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
- GVNHoistLegacyPass() : FunctionPass(ID) {
- initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
- }
+ NumHoisted += NL + NS + NC + NI;
+ NumRemoved += NR;
+ NumLoadsHoisted += NL;
+ NumStoresHoisted += NS;
+ NumCallsHoisted += NC;
+ return {NI, NL + NC + NS};
+}
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
- auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+std::pair<unsigned, unsigned> GVNHoist::hoistExpressions(Function &F) {
+ InsnInfo II;
+ LoadInfo LI;
+ StoreInfo SI;
+ CallInfo CI;
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+ int InstructionNb = 0;
+ for (Instruction &I1 : *BB) {
+ // If I1 cannot guarantee progress, subsequent instructions
+ // in BB cannot be hoisted anyways.
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
+ HoistBarrier.insert(BB);
+ break;
+ }
+ // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
+ // deeper may increase the register pressure and compilation time.
+ if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
+ break;
+
+ // Do not value number terminator instructions.
+ if (I1.isTerminator())
+ break;
+
+ if (auto *Load = dyn_cast<LoadInst>(&I1))
+ LI.insert(Load, VN);
+ else if (auto *Store = dyn_cast<StoreInst>(&I1))
+ SI.insert(Store, VN);
+ else if (auto *Call = dyn_cast<CallInst>(&I1)) {
+ if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
+ if (isa<DbgInfoIntrinsic>(Intr) ||
+ Intr->getIntrinsicID() == Intrinsic::assume ||
+ Intr->getIntrinsicID() == Intrinsic::sideeffect)
+ continue;
+ }
+ if (Call->mayHaveSideEffects())
+ break;
- GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA);
- return G.run(F);
- }
+ if (Call->isConvergent())
+ break;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
+ CI.insert(Call, VN);
+ } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
+ // Do not hoist scalars past calls that may write to memory because
+ // that could result in spills later. geps are handled separately.
+ // TODO: We can relax this for targets like AArch64 as they have more
+ // registers than X86.
+ II.insert(&I1, VN);
+ }
}
-};
+
+ HoistingPointList HPL;
+ computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
+ computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
+ computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
+ computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
+ computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
+ computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
+ return hoist(HPL);
+}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
index dfb4b7e038ba..aef927ab6558 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -158,8 +158,7 @@ public:
void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
for (auto II = Insts.begin(); II != Insts.end();) {
- if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
- Blocks.end()) {
+ if (!llvm::is_contained(Blocks, (*II)->getParent())) {
ActiveBlocks.remove((*II)->getParent());
II = Insts.erase(II);
} else {
@@ -277,8 +276,7 @@ public:
auto VI = Values.begin();
while (BI != Blocks.end()) {
assert(VI != Values.end());
- if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
- NewBlocks.end()) {
+ if (!llvm::is_contained(NewBlocks, *BI)) {
BI = Blocks.erase(BI);
VI = Values.erase(VI);
} else {
@@ -694,10 +692,8 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
ModelledPHI NewPHI(NewInsts, ActivePreds);
// Does sinking this instruction render previous PHIs redundant?
- if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
- NeededPHIs.erase(NewPHI);
+ if (NeededPHIs.erase(NewPHI))
RecomputePHIContents = true;
- }
if (RecomputePHIContents) {
// The needed PHIs have changed, so recompute the set of all needed
@@ -758,8 +754,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
Cand.NumMemoryInsts = MemoryInstNum;
Cand.NumBlocks = ActivePreds.size();
Cand.NumPHIs = NeededPHIs.size();
- for (auto *C : ActivePreds)
- Cand.Blocks.push_back(C);
+ append_range(Cand.Blocks, ActivePreds);
return Cand;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index a3eba27a4d90..61eb4ce0ed46 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -347,9 +347,8 @@ bool GuardWideningImpl::eliminateInstrViaWidening(
const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
auto I = GuardsInCurBB.begin();
- auto E = Instr->getParent() == CurBB
- ? std::find(GuardsInCurBB.begin(), GuardsInCurBB.end(), Instr)
- : GuardsInCurBB.end();
+ auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr)
+ : GuardsInCurBB.end();
#ifndef NDEBUG
{
@@ -666,13 +665,12 @@ bool GuardWideningImpl::combineRangeChecks(
};
copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck);
- Checks.erase(remove_if(Checks, IsCurrentCheck), Checks.end());
+ erase_if(Checks, IsCurrentCheck);
assert(CurrentChecks.size() != 0 && "We know we have at least one!");
if (CurrentChecks.size() < 3) {
- RangeChecksOut.insert(RangeChecksOut.end(), CurrentChecks.begin(),
- CurrentChecks.end());
+ llvm::append_range(RangeChecksOut, CurrentChecks);
continue;
}
@@ -700,9 +698,7 @@ bool GuardWideningImpl::combineRangeChecks(
return (HighOffset - RC.getOffsetValue()).ult(MaxDiff);
};
- if (MaxDiff.isMinValue() ||
- !std::all_of(std::next(CurrentChecks.begin()), CurrentChecks.end(),
- OffsetOK))
+ if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK))
return false;
// We have a series of f+1 checks as:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 0f36c3f772e6..ae1fff0fa844 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -131,6 +131,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
+static cl::opt<bool>
+AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
+ cl::desc("Allow widening of indvars to eliminate s/zext"));
+
namespace {
struct RewritePhi;
@@ -145,6 +149,7 @@ class IndVarSimplify {
std::unique_ptr<MemorySSAUpdater> MSSAU;
SmallVector<WeakTrackingVH, 16> DeadInsts;
+ bool WidenIndVars;
bool handleFloatingPointIV(Loop *L, PHINode *PH);
bool rewriteNonIntegerIVs(Loop *L);
@@ -167,8 +172,9 @@ class IndVarSimplify {
public:
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
const DataLayout &DL, TargetLibraryInfo *TLI,
- TargetTransformInfo *TTI, MemorySSA *MSSA)
- : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
+ TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
+ : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
+ WidenIndVars(WidenIndVars) {
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
@@ -178,57 +184,6 @@ public:
} // end anonymous namespace
-/// Determine the insertion point for this user. By default, insert immediately
-/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
-/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
-/// common dominator for the incoming blocks. A nullptr can be returned if no
-/// viable location is found: it may happen if User is a PHI and Def only comes
-/// to this PHI from unreachable blocks.
-static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
- DominatorTree *DT, LoopInfo *LI) {
- PHINode *PHI = dyn_cast<PHINode>(User);
- if (!PHI)
- return User;
-
- Instruction *InsertPt = nullptr;
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
- if (PHI->getIncomingValue(i) != Def)
- continue;
-
- BasicBlock *InsertBB = PHI->getIncomingBlock(i);
-
- if (!DT->isReachableFromEntry(InsertBB))
- continue;
-
- if (!InsertPt) {
- InsertPt = InsertBB->getTerminator();
- continue;
- }
- InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
- InsertPt = InsertBB->getTerminator();
- }
-
- // If we have skipped all inputs, it means that Def only comes to Phi from
- // unreachable blocks.
- if (!InsertPt)
- return nullptr;
-
- auto *DefI = dyn_cast<Instruction>(Def);
- if (!DefI)
- return InsertPt;
-
- assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
-
- auto *L = LI->getLoopFor(DefI->getParent());
- assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
-
- for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
- if (LI->getLoopFor(DTN->getBlock()) == L)
- return DTN->getBlock()->getTerminator();
-
- llvm_unreachable("DefI dominates InsertPt!");
-}
-
//===----------------------------------------------------------------------===//
// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
//===----------------------------------------------------------------------===//
@@ -550,27 +505,11 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
// IV Widening - Extend the width of an IV to cover its widest uses.
//===----------------------------------------------------------------------===//
-namespace {
-
-// Collect information about induction variables that are used by sign/zero
-// extend operations. This information is recorded by CollectExtend and provides
-// the input to WidenIV.
-struct WideIVInfo {
- PHINode *NarrowIV = nullptr;
-
- // Widest integer type created [sz]ext
- Type *WidestNativeType = nullptr;
-
- // Was a sext user seen before a zext?
- bool IsSigned = false;
-};
-
-} // end anonymous namespace
-
/// Update information about the induction variable that is extended by this
/// sign or zero extend operation. This is used to determine the final width of
/// the IV before actually widening it.
-static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
+static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
+ ScalarEvolution *SE,
const TargetTransformInfo *TTI) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
@@ -616,982 +555,6 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
}
-namespace {
-
-/// Record a link in the Narrow IV def-use chain along with the WideIV that
-/// computes the same value as the Narrow IV def. This avoids caching Use*
-/// pointers.
-struct NarrowIVDefUse {
- Instruction *NarrowDef = nullptr;
- Instruction *NarrowUse = nullptr;
- Instruction *WideDef = nullptr;
-
- // True if the narrow def is never negative. Tracking this information lets
- // us use a sign extension instead of a zero extension or vice versa, when
- // profitable and legal.
- bool NeverNegative = false;
-
- NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
- bool NeverNegative)
- : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
- NeverNegative(NeverNegative) {}
-};
-
-/// The goal of this transform is to remove sign and zero extends without
-/// creating any new induction variables. To do this, it creates a new phi of
-/// the wider type and redirects all users, either removing extends or inserting
-/// truncs whenever we stop propagating the type.
-class WidenIV {
- // Parameters
- PHINode *OrigPhi;
- Type *WideType;
-
- // Context
- LoopInfo *LI;
- Loop *L;
- ScalarEvolution *SE;
- DominatorTree *DT;
-
- // Does the module have any calls to the llvm.experimental.guard intrinsic
- // at all? If not we can avoid scanning instructions looking for guards.
- bool HasGuards;
-
- // Result
- PHINode *WidePhi = nullptr;
- Instruction *WideInc = nullptr;
- const SCEV *WideIncExpr = nullptr;
- SmallVectorImpl<WeakTrackingVH> &DeadInsts;
-
- SmallPtrSet<Instruction *,16> Widened;
- SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
-
- enum ExtendKind { ZeroExtended, SignExtended, Unknown };
-
- // A map tracking the kind of extension used to widen each narrow IV
- // and narrow IV user.
- // Key: pointer to a narrow IV or IV user.
- // Value: the kind of extension used to widen this Instruction.
- DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
-
- using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
-
- // A map with control-dependent ranges for post increment IV uses. The key is
- // a pair of IV def and a use of this def denoting the context. The value is
- // a ConstantRange representing possible values of the def at the given
- // context.
- DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
-
- Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
- Instruction *UseI) {
- DefUserPair Key(Def, UseI);
- auto It = PostIncRangeInfos.find(Key);
- return It == PostIncRangeInfos.end()
- ? Optional<ConstantRange>(None)
- : Optional<ConstantRange>(It->second);
- }
-
- void calculatePostIncRanges(PHINode *OrigPhi);
- void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
-
- void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
- DefUserPair Key(Def, UseI);
- auto It = PostIncRangeInfos.find(Key);
- if (It == PostIncRangeInfos.end())
- PostIncRangeInfos.insert({Key, R});
- else
- It->second = R.intersectWith(It->second);
- }
-
-public:
- WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
- DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
- bool HasGuards)
- : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
- L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
- HasGuards(HasGuards), DeadInsts(DI) {
- assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
- ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
- }
-
- PHINode *createWideIV(SCEVExpander &Rewriter);
-
-protected:
- Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
- Instruction *Use);
-
- Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
- Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
- const SCEVAddRecExpr *WideAR);
- Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
-
- ExtendKind getExtendKind(Instruction *I);
-
- using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
-
- WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
-
- WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
-
- const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
- unsigned OpCode) const;
-
- Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
-
- bool widenLoopCompare(NarrowIVDefUse DU);
- bool widenWithVariantUse(NarrowIVDefUse DU);
- void widenWithVariantUseCodegen(NarrowIVDefUse DU);
-
- void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
-};
-
-} // end anonymous namespace
-
-Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
- bool IsSigned, Instruction *Use) {
- // Set the debug location and conservative insertion point.
- IRBuilder<> Builder(Use);
- // Hoist the insertion point into loop preheaders as far as possible.
- for (const Loop *L = LI->getLoopFor(Use->getParent());
- L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
- L = L->getParentLoop())
- Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
-
- return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
- Builder.CreateZExt(NarrowOper, WideType);
-}
-
-/// Instantiate a wide operation to replace a narrow operation. This only needs
-/// to handle operations that can evaluation to SCEVAddRec. It can safely return
-/// 0 for any operation we decide not to clone.
-Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
- const SCEVAddRecExpr *WideAR) {
- unsigned Opcode = DU.NarrowUse->getOpcode();
- switch (Opcode) {
- default:
- return nullptr;
- case Instruction::Add:
- case Instruction::Mul:
- case Instruction::UDiv:
- case Instruction::Sub:
- return cloneArithmeticIVUser(DU, WideAR);
-
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- return cloneBitwiseIVUser(DU);
- }
-}
-
-Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
- Instruction *NarrowUse = DU.NarrowUse;
- Instruction *NarrowDef = DU.NarrowDef;
- Instruction *WideDef = DU.WideDef;
-
- LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
-
- // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
- // about the narrow operand yet so must insert a [sz]ext. It is probably loop
- // invariant and will be folded or hoisted. If it actually comes from a
- // widened IV, it should be removed during a future call to widenIVUse.
- bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
- Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
- ? WideDef
- : createExtendInst(NarrowUse->getOperand(0), WideType,
- IsSigned, NarrowUse);
- Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
- ? WideDef
- : createExtendInst(NarrowUse->getOperand(1), WideType,
- IsSigned, NarrowUse);
-
- auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
- auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
- NarrowBO->getName());
- IRBuilder<> Builder(NarrowUse);
- Builder.Insert(WideBO);
- WideBO->copyIRFlags(NarrowBO);
- return WideBO;
-}
-
-Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
- const SCEVAddRecExpr *WideAR) {
- Instruction *NarrowUse = DU.NarrowUse;
- Instruction *NarrowDef = DU.NarrowDef;
- Instruction *WideDef = DU.WideDef;
-
- LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
-
- unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
-
- // We're trying to find X such that
- //
- // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
- //
- // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
- // and check using SCEV if any of them are correct.
-
- // Returns true if extending NonIVNarrowDef according to `SignExt` is a
- // correct solution to X.
- auto GuessNonIVOperand = [&](bool SignExt) {
- const SCEV *WideLHS;
- const SCEV *WideRHS;
-
- auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
- if (SignExt)
- return SE->getSignExtendExpr(S, Ty);
- return SE->getZeroExtendExpr(S, Ty);
- };
-
- if (IVOpIdx == 0) {
- WideLHS = SE->getSCEV(WideDef);
- const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
- WideRHS = GetExtend(NarrowRHS, WideType);
- } else {
- const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
- WideLHS = GetExtend(NarrowLHS, WideType);
- WideRHS = SE->getSCEV(WideDef);
- }
-
- // WideUse is "WideDef `op.wide` X" as described in the comment.
- const SCEV *WideUse = nullptr;
-
- switch (NarrowUse->getOpcode()) {
- default:
- llvm_unreachable("No other possibility!");
-
- case Instruction::Add:
- WideUse = SE->getAddExpr(WideLHS, WideRHS);
- break;
-
- case Instruction::Mul:
- WideUse = SE->getMulExpr(WideLHS, WideRHS);
- break;
-
- case Instruction::UDiv:
- WideUse = SE->getUDivExpr(WideLHS, WideRHS);
- break;
-
- case Instruction::Sub:
- WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
- break;
- }
-
- return WideUse == WideAR;
- };
-
- bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
- if (!GuessNonIVOperand(SignExtend)) {
- SignExtend = !SignExtend;
- if (!GuessNonIVOperand(SignExtend))
- return nullptr;
- }
-
- Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
- ? WideDef
- : createExtendInst(NarrowUse->getOperand(0), WideType,
- SignExtend, NarrowUse);
- Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
- ? WideDef
- : createExtendInst(NarrowUse->getOperand(1), WideType,
- SignExtend, NarrowUse);
-
- auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
- auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
- NarrowBO->getName());
-
- IRBuilder<> Builder(NarrowUse);
- Builder.Insert(WideBO);
- WideBO->copyIRFlags(NarrowBO);
- return WideBO;
-}
-
-WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
- auto It = ExtendKindMap.find(I);
- assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
- return It->second;
-}
-
-const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
- unsigned OpCode) const {
- if (OpCode == Instruction::Add)
- return SE->getAddExpr(LHS, RHS);
- if (OpCode == Instruction::Sub)
- return SE->getMinusSCEV(LHS, RHS);
- if (OpCode == Instruction::Mul)
- return SE->getMulExpr(LHS, RHS);
-
- llvm_unreachable("Unsupported opcode.");
-}
-
-/// No-wrap operations can transfer sign extension of their result to their
-/// operands. Generate the SCEV value for the widened operation without
-/// actually modifying the IR yet. If the expression after extending the
-/// operands is an AddRec for this loop, return the AddRec and the kind of
-/// extension used.
-WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
- // Handle the common case of add<nsw/nuw>
- const unsigned OpCode = DU.NarrowUse->getOpcode();
- // Only Add/Sub/Mul instructions supported yet.
- if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
- OpCode != Instruction::Mul)
- return {nullptr, Unknown};
-
- // One operand (NarrowDef) has already been extended to WideDef. Now determine
- // if extending the other will lead to a recurrence.
- const unsigned ExtendOperIdx =
- DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
- assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
-
- const SCEV *ExtendOperExpr = nullptr;
- const OverflowingBinaryOperator *OBO =
- cast<OverflowingBinaryOperator>(DU.NarrowUse);
- ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
- if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
- ExtendOperExpr = SE->getSignExtendExpr(
- SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
- else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
- ExtendOperExpr = SE->getZeroExtendExpr(
- SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
- else
- return {nullptr, Unknown};
-
- // When creating this SCEV expr, don't apply the current operations NSW or NUW
- // flags. This instruction may be guarded by control flow that the no-wrap
- // behavior depends on. Non-control-equivalent instructions can be mapped to
- // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
- // semantics to those operations.
- const SCEV *lhs = SE->getSCEV(DU.WideDef);
- const SCEV *rhs = ExtendOperExpr;
-
- // Let's swap operands to the initial order for the case of non-commutative
- // operations, like SUB. See PR21014.
- if (ExtendOperIdx == 0)
- std::swap(lhs, rhs);
- const SCEVAddRecExpr *AddRec =
- dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
-
- if (!AddRec || AddRec->getLoop() != L)
- return {nullptr, Unknown};
-
- return {AddRec, ExtKind};
-}
-
-/// Is this instruction potentially interesting for further simplification after
-/// widening it's type? In other words, can the extend be safely hoisted out of
-/// the loop with SCEV reducing the value to a recurrence on the same loop. If
-/// so, return the extended recurrence and the kind of extension used. Otherwise
-/// return {nullptr, Unknown}.
-WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
- if (!SE->isSCEVable(DU.NarrowUse->getType()))
- return {nullptr, Unknown};
-
- const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
- if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
- SE->getTypeSizeInBits(WideType)) {
- // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
- // index. So don't follow this use.
- return {nullptr, Unknown};
- }
-
- const SCEV *WideExpr;
- ExtendKind ExtKind;
- if (DU.NeverNegative) {
- WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
- if (isa<SCEVAddRecExpr>(WideExpr))
- ExtKind = SignExtended;
- else {
- WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
- ExtKind = ZeroExtended;
- }
- } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
- WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
- ExtKind = SignExtended;
- } else {
- WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
- ExtKind = ZeroExtended;
- }
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
- if (!AddRec || AddRec->getLoop() != L)
- return {nullptr, Unknown};
- return {AddRec, ExtKind};
-}
-
-/// This IV user cannot be widened. Replace this use of the original narrow IV
-/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
-static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
- auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
- if (!InsertPt)
- return;
- LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
- << *DU.NarrowUse << "\n");
- IRBuilder<> Builder(InsertPt);
- Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
- DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
-}
-
-/// If the narrow use is a compare instruction, then widen the compare
-// (and possibly the other operand). The extend operation is hoisted into the
-// loop preheader as far as possible.
-bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
- ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
- if (!Cmp)
- return false;
-
- // We can legally widen the comparison in the following two cases:
- //
- // - The signedness of the IV extension and comparison match
- //
- // - The narrow IV is always positive (and thus its sign extension is equal
- // to its zero extension). For instance, let's say we're zero extending
- // %narrow for the following use
- //
- // icmp slt i32 %narrow, %val ... (A)
- //
- // and %narrow is always positive. Then
- //
- // (A) == icmp slt i32 sext(%narrow), sext(%val)
- // == icmp slt i32 zext(%narrow), sext(%val)
- bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
- if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
- return false;
-
- Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
- unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
- unsigned IVWidth = SE->getTypeSizeInBits(WideType);
- assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
-
- // Widen the compare instruction.
- auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
- if (!InsertPt)
- return false;
- IRBuilder<> Builder(InsertPt);
- DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
-
- // Widen the other operand of the compare, if necessary.
- if (CastWidth < IVWidth) {
- Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
- DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
- }
- return true;
-}
-
-// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
-// will not work when:
-// 1) SCEV traces back to an instruction inside the loop that SCEV can not
-// expand, eg. add %indvar, (load %addr)
-// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
-// While SCEV fails to avoid trunc, we can still try to use instruction
-// combining approach to prove trunc is not required. This can be further
-// extended with other instruction combining checks, but for now we handle the
-// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
-//
-// Src:
-// %c = sub nsw %b, %indvar
-// %d = sext %c to i64
-// Dst:
-// %indvar.ext1 = sext %indvar to i64
-// %m = sext %b to i64
-// %d = sub nsw i64 %m, %indvar.ext1
-// Therefore, as long as the result of add/sub/mul is extended to wide type, no
-// trunc is required regardless of how %b is generated. This pattern is common
-// when calculating address in 64 bit architecture
-bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
- Instruction *NarrowUse = DU.NarrowUse;
- Instruction *NarrowDef = DU.NarrowDef;
- Instruction *WideDef = DU.WideDef;
-
- // Handle the common case of add<nsw/nuw>
- const unsigned OpCode = NarrowUse->getOpcode();
- // Only Add/Sub/Mul instructions are supported.
- if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
- OpCode != Instruction::Mul)
- return false;
-
- // The operand that is not defined by NarrowDef of DU. Let's call it the
- // other operand.
- unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
- assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
- "bad DU");
-
- const SCEV *ExtendOperExpr = nullptr;
- const OverflowingBinaryOperator *OBO =
- cast<OverflowingBinaryOperator>(NarrowUse);
- ExtendKind ExtKind = getExtendKind(NarrowDef);
- if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
- ExtendOperExpr = SE->getSignExtendExpr(
- SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
- else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
- ExtendOperExpr = SE->getZeroExtendExpr(
- SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
- else
- return false;
-
- // Verifying that Defining operand is an AddRec
- const SCEV *Op1 = SE->getSCEV(WideDef);
- const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
- if (!AddRecOp1 || AddRecOp1->getLoop() != L)
- return false;
- // Verifying that other operand is an Extend.
- if (ExtKind == SignExtended) {
- if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
- return false;
- } else {
- if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
- return false;
- }
-
- if (ExtKind == SignExtended) {
- for (Use &U : NarrowUse->uses()) {
- SExtInst *User = dyn_cast<SExtInst>(U.getUser());
- if (!User || User->getType() != WideType)
- return false;
- }
- } else { // ExtKind == ZeroExtended
- for (Use &U : NarrowUse->uses()) {
- ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
- if (!User || User->getType() != WideType)
- return false;
- }
- }
-
- return true;
-}
-
-/// Special Case for widening with loop variant (see
-/// WidenIV::widenWithVariant). This is the code generation part.
-void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) {
- Instruction *NarrowUse = DU.NarrowUse;
- Instruction *NarrowDef = DU.NarrowDef;
- Instruction *WideDef = DU.WideDef;
-
- ExtendKind ExtKind = getExtendKind(NarrowDef);
-
- LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
-
- // Generating a widening use instruction.
- Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
- ? WideDef
- : createExtendInst(NarrowUse->getOperand(0), WideType,
- ExtKind, NarrowUse);
- Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
- ? WideDef
- : createExtendInst(NarrowUse->getOperand(1), WideType,
- ExtKind, NarrowUse);
-
- auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
- auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
- NarrowBO->getName());
- IRBuilder<> Builder(NarrowUse);
- Builder.Insert(WideBO);
- WideBO->copyIRFlags(NarrowBO);
-
- assert(ExtKind != Unknown && "Unknown ExtKind not handled");
-
- ExtendKindMap[NarrowUse] = ExtKind;
-
- for (Use &U : NarrowUse->uses()) {
- Instruction *User = nullptr;
- if (ExtKind == SignExtended)
- User = dyn_cast<SExtInst>(U.getUser());
- else
- User = dyn_cast<ZExtInst>(U.getUser());
- if (User && User->getType() == WideType) {
- LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
- << *WideBO << "\n");
- ++NumElimExt;
- User->replaceAllUsesWith(WideBO);
- DeadInsts.emplace_back(User);
- }
- }
-}
-
-/// Determine whether an individual user of the narrow IV can be widened. If so,
-/// return the wide clone of the user.
-Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
- assert(ExtendKindMap.count(DU.NarrowDef) &&
- "Should already know the kind of extension used to widen NarrowDef");
-
- // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
- if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
- if (LI->getLoopFor(UsePhi->getParent()) != L) {
- // For LCSSA phis, sink the truncate outside the loop.
- // After SimplifyCFG most loop exit targets have a single predecessor.
- // Otherwise fall back to a truncate within the loop.
- if (UsePhi->getNumOperands() != 1)
- truncateIVUse(DU, DT, LI);
- else {
- // Widening the PHI requires us to insert a trunc. The logical place
- // for this trunc is in the same BB as the PHI. This is not possible if
- // the BB is terminated by a catchswitch.
- if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
- return nullptr;
-
- PHINode *WidePhi =
- PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
- UsePhi);
- WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
- IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
- Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
- UsePhi->replaceAllUsesWith(Trunc);
- DeadInsts.emplace_back(UsePhi);
- LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
- << *WidePhi << "\n");
- }
- return nullptr;
- }
- }
-
- // This narrow use can be widened by a sext if it's non-negative or its narrow
- // def was widended by a sext. Same for zext.
- auto canWidenBySExt = [&]() {
- return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
- };
- auto canWidenByZExt = [&]() {
- return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
- };
-
- // Our raison d'etre! Eliminate sign and zero extension.
- if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
- (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
- Value *NewDef = DU.WideDef;
- if (DU.NarrowUse->getType() != WideType) {
- unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
- unsigned IVWidth = SE->getTypeSizeInBits(WideType);
- if (CastWidth < IVWidth) {
- // The cast isn't as wide as the IV, so insert a Trunc.
- IRBuilder<> Builder(DU.NarrowUse);
- NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
- }
- else {
- // A wider extend was hidden behind a narrower one. This may induce
- // another round of IV widening in which the intermediate IV becomes
- // dead. It should be very rare.
- LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
- << " not wide enough to subsume " << *DU.NarrowUse
- << "\n");
- DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
- NewDef = DU.NarrowUse;
- }
- }
- if (NewDef != DU.NarrowUse) {
- LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
- << " replaced by " << *DU.WideDef << "\n");
- ++NumElimExt;
- DU.NarrowUse->replaceAllUsesWith(NewDef);
- DeadInsts.emplace_back(DU.NarrowUse);
- }
- // Now that the extend is gone, we want to expose it's uses for potential
- // further simplification. We don't need to directly inform SimplifyIVUsers
- // of the new users, because their parent IV will be processed later as a
- // new loop phi. If we preserved IVUsers analysis, we would also want to
- // push the uses of WideDef here.
-
- // No further widening is needed. The deceased [sz]ext had done it for us.
- return nullptr;
- }
-
- // Does this user itself evaluate to a recurrence after widening?
- WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
- if (!WideAddRec.first)
- WideAddRec = getWideRecurrence(DU);
-
- assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
- if (!WideAddRec.first) {
- // If use is a loop condition, try to promote the condition instead of
- // truncating the IV first.
- if (widenLoopCompare(DU))
- return nullptr;
-
- // We are here about to generate a truncate instruction that may hurt
- // performance because the scalar evolution expression computed earlier
- // in WideAddRec.first does not indicate a polynomial induction expression.
- // In that case, look at the operands of the use instruction to determine
- // if we can still widen the use instead of truncating its operand.
- if (widenWithVariantUse(DU)) {
- widenWithVariantUseCodegen(DU);
- return nullptr;
- }
-
- // This user does not evaluate to a recurrence after widening, so don't
- // follow it. Instead insert a Trunc to kill off the original use,
- // eventually isolating the original narrow IV so it can be removed.
- truncateIVUse(DU, DT, LI);
- return nullptr;
- }
- // Assume block terminators cannot evaluate to a recurrence. We can't to
- // insert a Trunc after a terminator if there happens to be a critical edge.
- assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
- "SCEV is not expected to evaluate a block terminator");
-
- // Reuse the IV increment that SCEVExpander created as long as it dominates
- // NarrowUse.
- Instruction *WideUse = nullptr;
- if (WideAddRec.first == WideIncExpr &&
- Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
- WideUse = WideInc;
- else {
- WideUse = cloneIVUser(DU, WideAddRec.first);
- if (!WideUse)
- return nullptr;
- }
- // Evaluation of WideAddRec ensured that the narrow expression could be
- // extended outside the loop without overflow. This suggests that the wide use
- // evaluates to the same expression as the extended narrow use, but doesn't
- // absolutely guarantee it. Hence the following failsafe check. In rare cases
- // where it fails, we simply throw away the newly created wide use.
- if (WideAddRec.first != SE->getSCEV(WideUse)) {
- LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
- << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
- << "\n");
- DeadInsts.emplace_back(WideUse);
- return nullptr;
- }
-
- // if we reached this point then we are going to replace
- // DU.NarrowUse with WideUse. Reattach DbgValue then.
- replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
-
- ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
- // Returning WideUse pushes it on the worklist.
- return WideUse;
-}
-
-/// Add eligible users of NarrowDef to NarrowIVUsers.
-void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
- const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
- bool NonNegativeDef =
- SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
- SE->getConstant(NarrowSCEV->getType(), 0));
- for (User *U : NarrowDef->users()) {
- Instruction *NarrowUser = cast<Instruction>(U);
-
- // Handle data flow merges and bizarre phi cycles.
- if (!Widened.insert(NarrowUser).second)
- continue;
-
- bool NonNegativeUse = false;
- if (!NonNegativeDef) {
- // We might have a control-dependent range information for this context.
- if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
- NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
- }
-
- NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
- NonNegativeDef || NonNegativeUse);
- }
-}
-
-/// Process a single induction variable. First use the SCEVExpander to create a
-/// wide induction variable that evaluates to the same recurrence as the
-/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
-/// def-use chain. After widenIVUse has processed all interesting IV users, the
-/// narrow IV will be isolated for removal by DeleteDeadPHIs.
-///
-/// It would be simpler to delete uses as they are processed, but we must avoid
-/// invalidating SCEV expressions.
-PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
- // Is this phi an induction variable?
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
- if (!AddRec)
- return nullptr;
-
- // Widen the induction variable expression.
- const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
- ? SE->getSignExtendExpr(AddRec, WideType)
- : SE->getZeroExtendExpr(AddRec, WideType);
-
- assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
- "Expect the new IV expression to preserve its type");
-
- // Can the IV be extended outside the loop without overflow?
- AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
- if (!AddRec || AddRec->getLoop() != L)
- return nullptr;
-
- // An AddRec must have loop-invariant operands. Since this AddRec is
- // materialized by a loop header phi, the expression cannot have any post-loop
- // operands, so they must dominate the loop header.
- assert(
- SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
- SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
- "Loop header phi recurrence inputs do not dominate the loop");
-
- // Iterate over IV uses (including transitive ones) looking for IV increments
- // of the form 'add nsw %iv, <const>'. For each increment and each use of
- // the increment calculate control-dependent range information basing on
- // dominating conditions inside of the loop (e.g. a range check inside of the
- // loop). Calculated ranges are stored in PostIncRangeInfos map.
- //
- // Control-dependent range information is later used to prove that a narrow
- // definition is not negative (see pushNarrowIVUsers). It's difficult to do
- // this on demand because when pushNarrowIVUsers needs this information some
- // of the dominating conditions might be already widened.
- if (UsePostIncrementRanges)
- calculatePostIncRanges(OrigPhi);
-
- // The rewriter provides a value for the desired IV expression. This may
- // either find an existing phi or materialize a new one. Either way, we
- // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
- // of the phi-SCC dominates the loop entry.
- Instruction *InsertPt = &L->getHeader()->front();
- WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
-
- // Remembering the WideIV increment generated by SCEVExpander allows
- // widenIVUse to reuse it when widening the narrow IV's increment. We don't
- // employ a general reuse mechanism because the call above is the only call to
- // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
- if (BasicBlock *LatchBlock = L->getLoopLatch()) {
- WideInc =
- cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
- WideIncExpr = SE->getSCEV(WideInc);
- // Propagate the debug location associated with the original loop increment
- // to the new (widened) increment.
- auto *OrigInc =
- cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
- WideInc->setDebugLoc(OrigInc->getDebugLoc());
- }
-
- LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
- ++NumWidened;
-
- // Traverse the def-use chain using a worklist starting at the original IV.
- assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
-
- Widened.insert(OrigPhi);
- pushNarrowIVUsers(OrigPhi, WidePhi);
-
- while (!NarrowIVUsers.empty()) {
- NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
-
- // Process a def-use edge. This may replace the use, so don't hold a
- // use_iterator across it.
- Instruction *WideUse = widenIVUse(DU, Rewriter);
-
- // Follow all def-use edges from the previous narrow use.
- if (WideUse)
- pushNarrowIVUsers(DU.NarrowUse, WideUse);
-
- // widenIVUse may have removed the def-use edge.
- if (DU.NarrowDef->use_empty())
- DeadInsts.emplace_back(DU.NarrowDef);
- }
-
- // Attach any debug information to the new PHI.
- replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
-
- return WidePhi;
-}
-
-/// Calculates control-dependent range for the given def at the given context
-/// by looking at dominating conditions inside of the loop
-void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
- Instruction *NarrowUser) {
- using namespace llvm::PatternMatch;
-
- Value *NarrowDefLHS;
- const APInt *NarrowDefRHS;
- if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
- m_APInt(NarrowDefRHS))) ||
- !NarrowDefRHS->isNonNegative())
- return;
-
- auto UpdateRangeFromCondition = [&] (Value *Condition,
- bool TrueDest) {
- CmpInst::Predicate Pred;
- Value *CmpRHS;
- if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
- m_Value(CmpRHS))))
- return;
-
- CmpInst::Predicate P =
- TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
-
- auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
- auto CmpConstrainedLHSRange =
- ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
- auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
- *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
-
- updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
- };
-
- auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
- if (!HasGuards)
- return;
-
- for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
- Ctx->getParent()->rend())) {
- Value *C = nullptr;
- if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
- UpdateRangeFromCondition(C, /*TrueDest=*/true);
- }
- };
-
- UpdateRangeFromGuards(NarrowUser);
-
- BasicBlock *NarrowUserBB = NarrowUser->getParent();
- // If NarrowUserBB is statically unreachable asking dominator queries may
- // yield surprising results. (e.g. the block may not have a dom tree node)
- if (!DT->isReachableFromEntry(NarrowUserBB))
- return;
-
- for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
- L->contains(DTB->getBlock());
- DTB = DTB->getIDom()) {
- auto *BB = DTB->getBlock();
- auto *TI = BB->getTerminator();
- UpdateRangeFromGuards(TI);
-
- auto *BI = dyn_cast<BranchInst>(TI);
- if (!BI || !BI->isConditional())
- continue;
-
- auto *TrueSuccessor = BI->getSuccessor(0);
- auto *FalseSuccessor = BI->getSuccessor(1);
-
- auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
- return BBE.isSingleEdge() &&
- DT->dominates(BBE, NarrowUser->getParent());
- };
-
- if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
- UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
-
- if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
- UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
- }
-}
-
-/// Calculates PostIncRangeInfos map for the given IV
-void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
- SmallPtrSet<Instruction *, 16> Visited;
- SmallVector<Instruction *, 6> Worklist;
- Worklist.push_back(OrigPhi);
- Visited.insert(OrigPhi);
-
- while (!Worklist.empty()) {
- Instruction *NarrowDef = Worklist.pop_back_val();
-
- for (Use &U : NarrowDef->uses()) {
- auto *NarrowUser = cast<Instruction>(U.getUser());
-
- // Don't go looking outside the current loop.
- auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
- if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
- continue;
-
- if (!Visited.insert(NarrowUser).second)
- continue;
-
- Worklist.push_back(NarrowUser);
-
- calculatePostIncRange(NarrowDef, NarrowUser);
- }
- }
-}
-
//===----------------------------------------------------------------------===//
// Live IV Reduction - Minimize IVs live across the loop.
//===----------------------------------------------------------------------===//
@@ -1668,9 +631,18 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
}
} while(!LoopPhis.empty());
+ // Continue if we disallowed widening.
+ if (!WidenIndVars)
+ continue;
+
for (; !WideIVs.empty(); WideIVs.pop_back()) {
- WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
- if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
+ unsigned ElimExt;
+ unsigned Widened;
+ if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
+ DT, DeadInsts, ElimExt, Widened,
+ HasGuards, UsePostIncrementRanges)) {
+ NumElimExt += ElimExt;
+ NumWidened += Widened;
Changed = true;
LoopPhis.push_back(WidePhi);
}
@@ -1813,7 +785,7 @@ static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
// If we can't analyze propagation through this instruction, just skip it
// and transitive users. Safe as false is a conservative result.
- if (!propagatesPoison(I) && I != Root)
+ if (!propagatesPoison(cast<Operator>(I)) && I != Root)
continue;
if (KnownPoison.insert(I).second)
@@ -2318,42 +1290,116 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
return MadeAnyChanges;
}
-/// Return a symbolic upper bound for the backedge taken count of the loop.
-/// This is more general than getConstantMaxBackedgeTakenCount as it returns
-/// an arbitrary expression as opposed to only constants.
-/// TODO: Move into the ScalarEvolution class.
-static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
- DominatorTree &DT, Loop *L) {
- SmallVector<BasicBlock*, 16> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
+static void replaceExitCond(BranchInst *BI, Value *NewCond,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ auto *OldCond = BI->getCondition();
+ BI->setCondition(NewCond);
+ if (OldCond->use_empty())
+ DeadInsts.emplace_back(OldCond);
+}
- // Form an expression for the maximum exit count possible for this loop. We
- // merge the max and exact information to approximate a version of
- // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
- SmallVector<const SCEV*, 4> ExitCounts;
- for (BasicBlock *ExitingBB : ExitingBlocks) {
- const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
- if (isa<SCEVCouldNotCompute>(ExitCount))
- ExitCount = SE.getExitCount(L, ExitingBB,
- ScalarEvolution::ConstantMaximum);
- if (!isa<SCEVCouldNotCompute>(ExitCount)) {
- assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
- "We should only have known counts for exiting blocks that "
- "dominate latch!");
- ExitCounts.push_back(ExitCount);
- }
+static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ auto *OldCond = BI->getCondition();
+ auto *NewCond =
+ ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
+ replaceExitCond(BI, NewCond, DeadInsts);
+}
+
+static void replaceWithInvariantCond(
+ const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
+ const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ Rewriter.setInsertPoint(BI);
+ auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
+ auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
+ bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ if (ExitIfTrue)
+ InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
+ IRBuilder<> Builder(BI);
+ auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
+ BI->getCondition()->getName());
+ replaceExitCond(BI, NewCond, DeadInsts);
+}
+
+static bool optimizeLoopExitWithUnknownExitCount(
+ const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
+ const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
+ ScalarEvolution *SE, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ ICmpInst::Predicate Pred;
+ Value *LHS, *RHS;
+ using namespace PatternMatch;
+ BasicBlock *TrueSucc, *FalseSucc;
+ if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
+ m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
+ return false;
+
+ assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
+ "Not a loop exit!");
+
+ // 'LHS pred RHS' should now mean that we stay in loop.
+ if (L->contains(FalseSucc))
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ // If we are proving loop exit, invert the predicate.
+ if (Inverted)
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
+ const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
+ // Can we prove it to be trivially true?
+ if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
+ foldExit(L, ExitingBB, Inverted, DeadInsts);
+ return true;
+ }
+ // Further logic works for non-inverted condition only.
+ if (Inverted)
+ return false;
+
+ auto *ARTy = LHSS->getType();
+ auto *MaxIterTy = MaxIter->getType();
+ // If possible, adjust types.
+ if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
+ MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
+ else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
+ const SCEV *MinusOne = SE->getMinusOne(ARTy);
+ auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
+ if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
+ MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
+ }
+
+ if (SkipLastIter) {
+ const SCEV *One = SE->getOne(MaxIter->getType());
+ MaxIter = SE->getMinusSCEV(MaxIter, One);
}
- if (ExitCounts.empty())
- return SE.getCouldNotCompute();
- return SE.getUMinFromMismatchedTypes(ExitCounts);
+
+ // Check if there is a loop-invariant predicate equivalent to our check.
+ auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
+ L, BI, MaxIter);
+ if (!LIP)
+ return false;
+
+ // Can we prove it to be trivially true?
+ if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
+ foldExit(L, ExitingBB, Inverted, DeadInsts);
+ else
+ replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
+ Rewriter, DeadInsts);
+
+ return true;
}
bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- // Remove all exits which aren't both rewriteable and analyzeable.
- auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
+ // Remove all exits which aren't both rewriteable and execute on every
+ // iteration.
+ llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
// If our exitting block exits multiple loops, we can only rewrite the
// innermost one. Otherwise, we're changing how many times the innermost
// loop runs before it exits.
@@ -2369,56 +1415,85 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (isa<Constant>(BI->getCondition()))
return true;
- const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
- if (isa<SCEVCouldNotCompute>(ExitCount))
+ // Likewise, the loop latch must be dominated by the exiting BB.
+ if (!DT->dominates(ExitingBB, L->getLoopLatch()))
return true;
+
return false;
});
- ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
if (ExitingBlocks.empty())
return false;
// Get a symbolic upper bound on the loop backedge taken count.
- const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
+ const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(MaxExitCount))
return false;
- // Visit our exit blocks in order of dominance. We know from the fact that
- // all exits (left) are analyzeable that the must be a total dominance order
- // between them as each must dominate the latch. The visit order only
- // matters for the provably equal case.
- llvm::sort(ExitingBlocks,
- [&](BasicBlock *A, BasicBlock *B) {
+ // Visit our exit blocks in order of dominance. We know from the fact that
+ // all exits must dominate the latch, so there is a total dominance order
+ // between them.
+ llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
// std::sort sorts in ascending order, so we want the inverse of
// the normal dominance relation.
if (A == B) return false;
- if (DT->properlyDominates(A, B)) return true;
- if (DT->properlyDominates(B, A)) return false;
- llvm_unreachable("expected total dominance order!");
- });
+ if (DT->properlyDominates(A, B))
+ return true;
+ else {
+ assert(DT->properlyDominates(B, A) &&
+ "expected total dominance order!");
+ return false;
+ }
+ });
#ifdef ASSERT
for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
}
#endif
- auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
- bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
- auto *OldCond = BI->getCondition();
- auto *NewCond = ConstantInt::get(OldCond->getType(),
- IsTaken ? ExitIfTrue : !ExitIfTrue);
- BI->setCondition(NewCond);
- if (OldCond->use_empty())
- DeadInsts.emplace_back(OldCond);
- };
-
bool Changed = false;
+ bool SkipLastIter = false;
SmallSet<const SCEV*, 8> DominatingExitCounts;
for (BasicBlock *ExitingBB : ExitingBlocks) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
- assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
+ if (isa<SCEVCouldNotCompute>(ExitCount)) {
+ // Okay, we do not know the exit count here. Can we at least prove that it
+ // will remain the same within iteration space?
+ auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
+ return optimizeLoopExitWithUnknownExitCount(
+ L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
+ Rewriter, DeadInsts);
+ };
+
+ // TODO: We might have proved that we can skip the last iteration for
+ // this check. In this case, we only want to check the condition on the
+ // pre-last iteration (MaxExitCount - 1). However, there is a nasty
+ // corner case:
+ //
+ // for (i = len; i != 0; i--) { ... check (i ult X) ... }
+ //
+ // If we could not prove that len != 0, then we also could not prove that
+ // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
+ // OptimizeCond will likely not prove anything for it, even if it could
+ // prove the same fact for len.
+ //
+ // As a temporary solution, we query both last and pre-last iterations in
+ // hope that we will be able to prove triviality for at least one of
+ // them. We can stop querying MaxExitCount for this case once SCEV
+ // understands that (MaxExitCount - 1) will not overflow here.
+ if (OptimizeCond(false, false) || OptimizeCond(true, false))
+ Changed = true;
+ else if (SkipLastIter)
+ if (OptimizeCond(false, true) || OptimizeCond(true, true))
+ Changed = true;
+ continue;
+ }
+
+ if (MaxExitCount == ExitCount)
+ // If the loop has more than 1 iteration, all further checks will be
+ // executed 1 iteration less.
+ SkipLastIter = true;
// If we know we'd exit on the first iteration, rewrite the exit to
// reflect this. This does not imply the loop must exit through this
@@ -2426,7 +1501,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// TODO: Given we know the backedge can't be taken, we should go ahead
// and break it. Or at least, kill all the header phis and simplify.
if (ExitCount->isZero()) {
- FoldExit(ExitingBB, true);
+ foldExit(L, ExitingBB, true, DeadInsts);
Changed = true;
continue;
}
@@ -2448,7 +1523,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// one?
if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
MaxExitCount, ExitCount)) {
- FoldExit(ExitingBB, false);
+ foldExit(L, ExitingBB, false, DeadInsts);
Changed = true;
continue;
}
@@ -2458,7 +1533,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// exiting iteration, but (from the visit order) strictly follows another
// which does the same and is thus dead.
if (!DominatingExitCounts.insert(ExitCount).second) {
- FoldExit(ExitingBB, false);
+ foldExit(L, ExitingBB, false, DeadInsts);
Changed = true;
continue;
}
@@ -2714,7 +1789,9 @@ bool IndVarSimplify::run(Loop *L) {
if (optimizeLoopExits(L, Rewriter)) {
Changed = true;
// Given we've changed exit counts, notify SCEV
- SE->forgetLoop(L);
+ // Some nested loops may share same folded exit basic block,
+ // thus we need to notify top most loop.
+ SE->forgetTopmostLoop(L);
}
// Try to form loop invariant tests for loop exits by changing how many
@@ -2791,11 +1868,15 @@ bool IndVarSimplify::run(Loop *L) {
// Now that we're done iterating through lists, clean up any instructions
// which are now dead.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ while (!DeadInsts.empty()) {
+ Value *V = DeadInsts.pop_back_val();
+
+ if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
+ Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
+ else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
Changed |=
RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
+ }
// The Rewriter may not be used from this point on.
@@ -2845,7 +1926,8 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
Function *F = L.getHeader()->getParent();
const DataLayout &DL = F->getParent()->getDataLayout();
- IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
+ IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
+ WidenIndVars && AllowIVWidening);
if (!IVS.run(&L))
return PreservedAnalyses::all();
@@ -2882,7 +1964,7 @@ struct IndVarSimplifyLegacyPass : public LoopPass {
if (MSSAAnalysis)
MSSA = &MSSAAnalysis->getMSSA();
- IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
+ IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
return IVS.run(L);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 30e4822b6769..6e09dec198c2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -52,6 +52,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -109,12 +110,12 @@ static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden,
static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
cl::init(false));
-static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal",
- cl::Hidden, cl::init(10));
-
static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
cl::Hidden, cl::init(false));
+static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
+ cl::Hidden, cl::init(10));
+
static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
cl::Hidden, cl::init(true));
@@ -145,7 +146,6 @@ class InductiveRangeCheck {
const SCEV *Step = nullptr;
const SCEV *End = nullptr;
Use *CheckUse = nullptr;
- bool IsSigned = true;
static bool parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE,
Value *&Index, Value *&Length,
@@ -160,7 +160,6 @@ public:
const SCEV *getBegin() const { return Begin; }
const SCEV *getStep() const { return Step; }
const SCEV *getEnd() const { return End; }
- bool isSigned() const { return IsSigned; }
void print(raw_ostream &OS) const {
OS << "InductiveRangeCheck:\n";
@@ -229,17 +228,27 @@ public:
SmallVectorImpl<InductiveRangeCheck> &Checks);
};
+struct LoopStructure;
+
class InductiveRangeCheckElimination {
ScalarEvolution &SE;
BranchProbabilityInfo *BPI;
DominatorTree &DT;
LoopInfo &LI;
+ using GetBFIFunc =
+ llvm::Optional<llvm::function_ref<llvm::BlockFrequencyInfo &()> >;
+ GetBFIFunc GetBFI;
+
+ // Returns true if it is profitable to do a transform basing on estimation of
+ // number of iterations.
+ bool isProfitableToTransform(const Loop &L, LoopStructure &LS);
+
public:
InductiveRangeCheckElimination(ScalarEvolution &SE,
BranchProbabilityInfo *BPI, DominatorTree &DT,
- LoopInfo &LI)
- : SE(SE), BPI(BPI), DT(DT), LI(LI) {}
+ LoopInfo &LI, GetBFIFunc GetBFI = None)
+ : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {}
bool run(Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop);
};
@@ -394,7 +403,6 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
IRC.Begin = IndexAddRec->getStart();
IRC.Step = IndexAddRec->getStepRecurrence(SE);
IRC.CheckUse = &ConditionUse;
- IRC.IsSigned = IsSigned;
Checks.push_back(IRC);
}
@@ -497,9 +505,8 @@ struct LoopStructure {
return Result;
}
- static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &,
- BranchProbabilityInfo *BPI,
- Loop &, const char *&);
+ static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &, Loop &,
+ const char *&);
};
/// This class is used to constrain loops to run within a given iteration space.
@@ -743,8 +750,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,
}
Optional<LoopStructure>
-LoopStructure::parseLoopStructure(ScalarEvolution &SE,
- BranchProbabilityInfo *BPI, Loop &L,
+LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
const char *&FailureReason) {
if (!L.isLoopSimplifyForm()) {
FailureReason = "loop not in LoopSimplify form";
@@ -779,16 +785,6 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;
- BranchProbability ExitProbability =
- BPI ? BPI->getEdgeProbability(LatchBr->getParent(), LatchBrExitIdx)
- : BranchProbability::getZero();
-
- if (!SkipProfitabilityChecks &&
- ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) {
- FailureReason = "short running loop, not profitable";
- return None;
- }
-
ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
FailureReason = "latch terminator branch not conditional on integral icmp";
@@ -1772,14 +1768,25 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
- InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI);
+ // Get BFI analysis result on demand. Please note that modification of
+ // CFG invalidates this analysis and we should handle it.
+ auto getBFI = [&F, &AM ]()->BlockFrequencyInfo & {
+ return AM.getResult<BlockFrequencyAnalysis>(F);
+ };
+ InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI });
bool Changed = false;
+ {
+ bool CFGChanged = false;
+ for (const auto &L : LI) {
+ CFGChanged |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,
+ /*PreserveLCSSA=*/false);
+ Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
+ }
+ Changed |= CFGChanged;
- for (const auto &L : LI) {
- Changed |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,
- /*PreserveLCSSA=*/false);
- Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
+ if (CFGChanged && !SkipProfitabilityChecks)
+ AM.invalidate<BlockFrequencyAnalysis>(F);
}
SmallPriorityWorklist<Loop *, 4> Worklist;
@@ -1791,7 +1798,11 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
while (!Worklist.empty()) {
Loop *L = Worklist.pop_back_val();
- Changed |= IRCE.run(L, LPMAddNewLoop);
+ if (IRCE.run(L, LPMAddNewLoop)) {
+ Changed = true;
+ if (!SkipProfitabilityChecks)
+ AM.invalidate<BlockFrequencyAnalysis>(F);
+ }
}
if (!Changed)
@@ -1832,6 +1843,37 @@ bool IRCELegacyPass::runOnFunction(Function &F) {
return Changed;
}
+bool
+InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,
+ LoopStructure &LS) {
+ if (SkipProfitabilityChecks)
+ return true;
+ if (GetBFI.hasValue()) {
+ BlockFrequencyInfo &BFI = (*GetBFI)();
+ uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();
+ uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
+ if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
+ LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
+ << "the estimated number of iterations basing on "
+ "frequency info is " << (hFreq / phFreq) << "\n";);
+ return false;
+ }
+ return true;
+ }
+
+ if (!BPI)
+ return true;
+ BranchProbability ExitProbability =
+ BPI->getEdgeProbability(LS.Latch, LS.LatchBrExitIdx);
+ if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
+ LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
+ << "the exit probability is too big " << ExitProbability
+ << "\n";);
+ return false;
+ }
+ return true;
+}
+
bool InductiveRangeCheckElimination::run(
Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop) {
if (L->getBlocks().size() >= LoopSizeCutoff) {
@@ -1871,13 +1913,15 @@ bool InductiveRangeCheckElimination::run(
const char *FailureReason = nullptr;
Optional<LoopStructure> MaybeLoopStructure =
- LoopStructure::parseLoopStructure(SE, BPI, *L, FailureReason);
+ LoopStructure::parseLoopStructure(SE, *L, FailureReason);
if (!MaybeLoopStructure.hasValue()) {
LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
<< FailureReason << "\n";);
return false;
}
LoopStructure LS = MaybeLoopStructure.getValue();
+ if (!isProfitableToTransform(*L, LS))
+ return false;
const SCEVAddRecExpr *IndVar =
cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index db9cc58bbfc4..332eb10ac16b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -88,6 +88,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -108,6 +109,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -146,13 +148,7 @@ namespace {
using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
-/// InferAddressSpaces
class InferAddressSpaces : public FunctionPass {
- const TargetTransformInfo *TTI = nullptr;
- const DataLayout *DL = nullptr;
-
- /// Target specific address space which uses of should be replaced if
- /// possible.
unsigned FlatAddrSpace = 0;
public:
@@ -168,8 +164,16 @@ public:
}
bool runOnFunction(Function &F) override;
+};
+
+class InferAddressSpacesImpl {
+ const TargetTransformInfo *TTI = nullptr;
+ const DataLayout *DL = nullptr;
+
+ /// Target specific address space which uses of should be replaced if
+ /// possible.
+ unsigned FlatAddrSpace = 0;
-private:
// Returns the new address space of V if updated; otherwise, returns None.
Optional<unsigned>
updateAddressSpace(const Value &V,
@@ -211,6 +215,11 @@ private:
const ValueToValueMapTy &ValueWithNewAddrSpace,
SmallVectorImpl<const Use *> *UndefUsesToFix) const;
unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
+
+public:
+ InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
+ : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
+ bool run(Function &F);
};
} // end anonymous namespace
@@ -286,7 +295,8 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
case Instruction::IntToPtr:
return isNoopPtrIntCastPair(Op, DL, TTI);
default:
- return false;
+ // That value is an address expression if it has an assumed address space.
+ return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
}
}
@@ -325,9 +335,9 @@ getPointerOperands(const Value &V, const DataLayout &DL,
}
}
-bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
- Value *OldV,
- Value *NewV) const {
+bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
+ Value *OldV,
+ Value *NewV) const {
Module *M = II->getParent()->getParent()->getParent();
switch (II->getIntrinsicID()) {
@@ -354,7 +364,7 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
}
}
-void InferAddressSpaces::collectRewritableIntrinsicOperands(
+void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
IntrinsicInst *II, PostorderStackTy &PostorderStack,
DenseSet<Value *> &Visited) const {
auto IID = II->getIntrinsicID();
@@ -379,7 +389,7 @@ void InferAddressSpaces::collectRewritableIntrinsicOperands(
// Returns all flat address expressions in function F. The elements are
// If V is an unvisited flat address expression, appends V to PostorderStack
// and marks it as visited.
-void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
+void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
Value *V, PostorderStackTy &PostorderStack,
DenseSet<Value *> &Visited) const {
assert(V->getType()->isPointerTy());
@@ -394,8 +404,8 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
return;
}
- if (isAddressExpression(*V, *DL, TTI) &&
- V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
+ if (V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
+ isAddressExpression(*V, *DL, TTI)) {
if (Visited.insert(V).second) {
PostorderStack.emplace_back(V, false);
@@ -413,7 +423,7 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
// Returns all flat address expressions in function F. The elements are ordered
// ordered in postorder.
std::vector<WeakTrackingVH>
-InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
+InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
// This function implements a non-recursive postorder traversal of a partial
// use-def graph of function F.
PostorderStackTy PostorderStack;
@@ -478,9 +488,12 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
}
// Otherwise, adds its operands to the stack and explores them.
PostorderStack.back().setInt(true);
- for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
- appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
- Visited);
+ // Skip values with an assumed address space.
+ if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) {
+ for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
+ appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
+ Visited);
+ }
}
}
return Postorder;
@@ -520,7 +533,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
//
// This may also return nullptr in the case the instruction could not be
// rewritten.
-Value *InferAddressSpaces::cloneInstructionWithNewAddressSpace(
+Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
@@ -555,6 +568,16 @@ Value *InferAddressSpaces::cloneInstructionWithNewAddressSpace(
return nullptr;
}
+ unsigned AS = TTI->getAssumedAddrSpace(I);
+ if (AS != UninitializedAddressSpace) {
+ // For the assumed address space, insert an `addrspacecast` to make that
+ // explicit.
+ auto *NewPtrTy = I->getType()->getPointerElementType()->getPointerTo(AS);
+ auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
+ NewI->insertAfter(I);
+ return NewI;
+ }
+
// Computes the converted pointer operands.
SmallVector<Value *, 4> NewPointerOperands;
for (const Use &OperandUse : I->operands()) {
@@ -583,7 +606,7 @@ Value *InferAddressSpaces::cloneInstructionWithNewAddressSpace(
GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
GEP->getSourceElementType(), NewPointerOperands[0],
- SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end()));
+ SmallVector<Value *, 4>(GEP->indices()));
NewGEP->setIsInBounds(GEP->isInBounds());
return NewGEP;
}
@@ -695,13 +718,13 @@ static Value *cloneConstantExprWithNewAddressSpace(
// expression whose address space needs to be modified, in postorder.
//
// See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix.
-Value *InferAddressSpaces::cloneValueWithNewAddressSpace(
- Value *V, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace,
- SmallVectorImpl<const Use *> *UndefUsesToFix) const {
+Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
+ Value *V, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) const {
// All values in Postorder are flat address expressions.
- assert(isAddressExpression(*V, *DL, TTI) &&
- V->getType()->getPointerAddressSpace() == FlatAddrSpace);
+ assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
+ isAddressExpression(*V, *DL, TTI));
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
@@ -721,8 +744,8 @@ Value *InferAddressSpaces::cloneValueWithNewAddressSpace(
// Defines the join operation on the address space lattice (see the file header
// comments).
-unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1,
- unsigned AS2) const {
+unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
+ unsigned AS2) const {
if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace)
return FlatAddrSpace;
@@ -735,11 +758,7 @@ unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1,
return (AS1 == AS2) ? AS1 : FlatAddrSpace;
}
-bool InferAddressSpaces::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+bool InferAddressSpacesImpl::run(Function &F) {
DL = &F.getParent()->getDataLayout();
if (AssumeDefaultIsFlatAddressSpace)
@@ -766,7 +785,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
// Constants need to be tracked through RAUW to handle cases with nested
// constant expressions, so wrap values in WeakTrackingVH.
-void InferAddressSpaces::inferAddressSpaces(
+void InferAddressSpacesImpl::inferAddressSpaces(
ArrayRef<WeakTrackingVH> Postorder,
ValueToAddrSpaceMapTy *InferredAddrSpace) const {
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
@@ -810,7 +829,7 @@ void InferAddressSpaces::inferAddressSpaces(
}
}
-Optional<unsigned> InferAddressSpaces::updateAddressSpace(
+Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
assert(InferredAddrSpace.count(&V));
@@ -848,15 +867,24 @@ Optional<unsigned> InferAddressSpaces::updateAddressSpace(
else
NewAS = joinAddressSpaces(Src0AS, Src1AS);
} else {
- for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
- auto I = InferredAddrSpace.find(PtrOperand);
- unsigned OperandAS = I != InferredAddrSpace.end() ?
- I->second : PtrOperand->getType()->getPointerAddressSpace();
-
- // join(flat, *) = flat. So we can break if NewAS is already flat.
- NewAS = joinAddressSpaces(NewAS, OperandAS);
- if (NewAS == FlatAddrSpace)
- break;
+ unsigned AS = TTI->getAssumedAddrSpace(&V);
+ if (AS != UninitializedAddressSpace) {
+ // Use the assumed address space directly.
+ NewAS = AS;
+ } else {
+ // Otherwise, infer the address space from its pointer operands.
+ for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
+ auto I = InferredAddrSpace.find(PtrOperand);
+ unsigned OperandAS =
+ I != InferredAddrSpace.end()
+ ? I->second
+ : PtrOperand->getType()->getPointerAddressSpace();
+
+ // join(flat, *) = flat. So we can break if NewAS is already flat.
+ NewAS = joinAddressSpaces(NewAS, OperandAS);
+ if (NewAS == FlatAddrSpace)
+ break;
+ }
}
}
@@ -947,7 +975,8 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
// \p returns true if it is OK to change the address space of constant \p C with
// a ConstantExpr addrspacecast.
-bool InferAddressSpaces::isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const {
+bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C,
+ unsigned NewAS) const {
assert(NewAS != UninitializedAddressSpace);
unsigned SrcAS = C->getType()->getPointerAddressSpace();
@@ -986,7 +1015,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
return I;
}
-bool InferAddressSpaces::rewriteWithNewAddressSpaces(
+bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
@@ -997,6 +1026,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
SmallVector<const Use *, 32> UndefUsesToFix;
for (Value* V : Postorder) {
unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
+
+ // In some degenerate cases (e.g. invalid IR in unreachable code), we may
+ // not even infer the value to have its original address space.
+ if (NewAddrSpace == UninitializedAddressSpace)
+ continue;
+
if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
Value *New = cloneValueWithNewAddressSpace(
V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
@@ -1062,6 +1097,9 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
}
User *CurUser = U.getUser();
+ // Skip if the current user is the new value itself.
+ if (CurUser == NewV)
+ continue;
// Handle more complex cases like intrinsic that need to be remangled.
if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
@@ -1148,6 +1186,34 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
return true;
}
+bool InferAddressSpaces::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ return InferAddressSpacesImpl(
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
+ FlatAddrSpace)
+ .run(F);
+}
+
FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {
return new InferAddressSpaces(AddressSpace);
}
+
+InferAddressSpacesPass::InferAddressSpacesPass()
+ : FlatAddrSpace(UninitializedAddressSpace) {}
+InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
+ : FlatAddrSpace(AddressSpace) {}
+
+PreservedAnalyses InferAddressSpacesPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed =
+ InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
+ .run(F);
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
index e87b622ab19f..c11d2e4c1d6b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -20,8 +20,10 @@
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
#define DEBUG_TYPE "instsimplify"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 2f379b7f6160..10b08b4e2224 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -32,6 +32,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -104,6 +105,11 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
cl::Hidden);
+static cl::opt<bool> JumpThreadingFreezeSelectCond(
+ "jump-threading-freeze-select-cond",
+ cl::desc("Freeze the condition when unfolding select"), cl::init(false),
+ cl::Hidden);
+
static cl::opt<bool> ThreadAcrossLoopHeaders(
"jump-threading-across-loop-headers",
cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
@@ -133,7 +139,8 @@ namespace {
public:
static char ID; // Pass identification
- JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
+ JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
+ : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
@@ -147,6 +154,7 @@ namespace {
AU.addPreserved<LazyValueInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
void releaseMemory() override { Impl.releaseMemory(); }
@@ -166,11 +174,12 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
// Public interface to the Jump Threading pass
-FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
- return new JumpThreading(Threshold);
+FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) {
+ return new JumpThreading(InsertFr, Threshold);
}
-JumpThreadingPass::JumpThreadingPass(int T) {
+JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) {
+ InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
}
@@ -304,6 +313,10 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
bool JumpThreading::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
+ auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ // Jump Threading has no sense for the targets with divergent CF
+ if (TTI->hasBranchDivergence())
+ return false;
auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
@@ -328,6 +341,10 @@ bool JumpThreading::runOnFunction(Function &F) {
PreservedAnalyses JumpThreadingPass::run(Function &F,
FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ // Jump Threading has no sense for the targets with divergent CF
+ if (TTI.hasBranchDivergence())
+ return PreservedAnalyses::all();
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
@@ -345,6 +362,11 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
+ if (PrintLVIAfterJumpThreading) {
+ dbgs() << "LVI for function '" << F.getName() << "':\n";
+ LVI.printLVI(F, DTU.getDomTree(), dbgs());
+ }
+
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
@@ -397,7 +419,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
Unreachable.insert(&BB);
if (!ThreadAcrossLoopHeaders)
- FindLoopHeaders(F);
+ findLoopHeaders(F);
bool EverChanged = false;
bool Changed;
@@ -406,7 +428,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
for (auto &BB : F) {
if (Unreachable.count(&BB))
continue;
- while (ProcessBlock(&BB)) // Thread all of the branches we can over BB.
+ while (processBlock(&BB)) // Thread all of the branches we can over BB.
Changed = true;
// Jump threading may have introduced redundant debug values into BB
@@ -421,7 +443,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
continue;
if (pred_empty(&BB)) {
- // When ProcessBlock makes BB unreachable it doesn't bother to fix up
+ // When processBlock makes BB unreachable it doesn't bother to fix up
// the instructions in it. We must remove BB to prevent invalid IR.
LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
<< "' with terminator: " << *BB.getTerminator()
@@ -433,7 +455,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
continue;
}
- // ProcessBlock doesn't thread BBs with unconditional TIs. However, if BB
+ // processBlock doesn't thread BBs with unconditional TIs. However, if BB
// is "almost empty", we attempt to merge BB with its sole successor.
auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
if (BI && BI->isUnconditional()) {
@@ -467,7 +489,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// at the end of block. RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
-static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) {
+static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
assert(Cond->getType() == ToVal->getType());
auto *BB = Cond->getParent();
// We can unconditionally replace all uses in non-local blocks (i.e. uses
@@ -531,10 +553,18 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
// Debugger intrinsics don't incur code size.
if (isa<DbgInfoIntrinsic>(I)) continue;
+ // Pseudo-probes don't incur code size.
+ if (isa<PseudoProbeInst>(I))
+ continue;
+
// If this is a pointer->pointer bitcast, it is free.
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
+ // Freeze instruction is free, too.
+ if (isa<FreezeInst>(I))
+ continue;
+
// Bail out if this instruction gives back a token type, it is not possible
// to duplicate it if it is used outside this BB.
if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
@@ -562,7 +592,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
return Size > Bonus ? Size - Bonus : 0;
}
-/// FindLoopHeaders - We do not want jump threading to turn proper loop
+/// findLoopHeaders - We do not want jump threading to turn proper loop
/// structures into irreducible loops. Doing this breaks up the loop nesting
/// hierarchy and pessimizes later transformations. To prevent this from
/// happening, we first have to find the loop headers. Here we approximate this
@@ -576,7 +606,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
/// within the loop (forming a nested loop). This simple analysis is not rich
/// enough to track all of these properties and keep it up-to-date as the CFG
/// mutates, so we don't allow any of these transformations.
-void JumpThreadingPass::FindLoopHeaders(Function &F) {
+void JumpThreadingPass::findLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
@@ -603,13 +633,13 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
return dyn_cast<ConstantInt>(Val);
}
-/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
/// in any of our predecessors. If so, return the known list of value and pred
/// BB in the result vector.
///
/// This returns true if there were any known values.
-bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
+bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
Value *V, BasicBlock *BB, PredValueInfo &Result,
ConstantPreference Preference, DenseSet<Value *> &RecursionSet,
Instruction *CxtI) {
@@ -674,13 +704,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
return !Result.empty();
}
- // Handle Cast instructions. Only see through Cast when the source operand is
- // PHI or Cmp to save the compilation time.
+ // Handle Cast instructions.
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Source = CI->getOperand(0);
- if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
- return false;
- ComputeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
+ computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
RecursionSet, CxtI);
if (Result.empty())
return false;
@@ -692,6 +719,18 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
return true;
}
+ if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
+ Value *Source = FI->getOperand(0);
+ computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
+ RecursionSet, CxtI);
+
+ erase_if(Result, [](auto &Pair) {
+ return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
+ });
+
+ return !Result.empty();
+ }
+
// Handle some boolean conditions.
if (I->getType()->getPrimitiveSizeInBits() == 1) {
assert(Preference == WantInteger && "One-bit non-integer type?");
@@ -701,9 +740,9 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
I->getOpcode() == Instruction::And) {
PredValueInfoTy LHSVals, RHSVals;
- ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+ computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
WantInteger, RecursionSet, CxtI);
- ComputeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals,
+ computeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals,
WantInteger, RecursionSet, CxtI);
if (LHSVals.empty() && RHSVals.empty())
@@ -739,7 +778,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
if (I->getOpcode() == Instruction::Xor &&
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
- ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
+ computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
WantInteger, RecursionSet, CxtI);
if (Result.empty())
return false;
@@ -757,7 +796,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
&& "A binary operator creating a block address?");
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
PredValueInfoTy LHSVals;
- ComputeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
+ computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
WantInteger, RecursionSet, CxtI);
// Try to use constant folding to simplify the binary operator.
@@ -891,7 +930,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
// Try to find a constant value for the LHS of a comparison,
// and evaluate it statically if we can.
PredValueInfoTy LHSVals;
- ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+ computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
WantInteger, RecursionSet, CxtI);
for (const auto &LHSVal : LHSVals) {
@@ -912,7 +951,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
PredValueInfoTy Conds;
if ((TrueVal || FalseVal) &&
- ComputeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
+ computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
WantInteger, RecursionSet, CxtI)) {
for (auto &C : Conds) {
Constant *Cond = C.first;
@@ -940,7 +979,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
}
// If all else fails, see if LVI can figure out a constant value for us.
- Constant *CI = LVI->getConstant(V, BB, CxtI);
+ assert(CxtI->getParent() == BB && "CxtI should be in BB");
+ Constant *CI = LVI->getConstant(V, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference)) {
for (BasicBlock *Pred : predecessors(BB))
Result.emplace_back(KC, Pred);
@@ -954,7 +994,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
///
/// Since we can pick an arbitrary destination, we pick the successor with the
/// fewest predecessors. This should reduce the in-degree of the others.
-static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
+static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) {
Instruction *BBTerm = BB->getTerminator();
unsigned MinSucc = 0;
BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
@@ -982,9 +1022,9 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) {
return !BA->use_empty();
}
-/// ProcessBlock - If there are any predecessors whose control can be threaded
+/// processBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
-bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
+bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// If the block is trivially dead, just return and let the caller nuke it.
// This simplifies other transformations.
if (DTU->isBBPendingDeletion(BB) ||
@@ -995,14 +1035,14 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
- if (MaybeMergeBasicBlockIntoOnlyPred(BB))
+ if (maybeMergeBasicBlockIntoOnlyPred(BB))
return true;
- if (TryToUnfoldSelectInCurrBB(BB))
+ if (tryToUnfoldSelectInCurrBB(BB))
return true;
// Look if we can propagate guards to predecessors.
- if (HasGuards && ProcessGuards(BB))
+ if (HasGuards && processGuards(BB))
return true;
// What kind of constant we're looking for.
@@ -1027,6 +1067,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
return false; // Must be an invoke or callbr.
}
+ // Keep track if we constant folded the condition in this invocation.
+ bool ConstantFolded = false;
+
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
@@ -1037,13 +1080,16 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (isInstructionTriviallyDead(I, TLI))
I->eraseFromParent();
Condition = SimpleVal;
+ ConstantFolded = true;
}
}
- // If the terminator is branching on an undef, we can pick any of the
- // successors to branch to. Let GetBestDestForJumpOnUndef decide.
- if (isa<UndefValue>(Condition)) {
- unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
+ // If the terminator is branching on an undef or freeze undef, we can pick any
+ // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
+ auto *FI = dyn_cast<FreezeInst>(Condition);
+ if (isa<UndefValue>(Condition) ||
+ (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
+ unsigned BestSucc = getBestDestForJumpOnUndef(BB);
std::vector<DominatorTree::UpdateType> Updates;
// Fold the branch/switch.
@@ -1061,6 +1107,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
DTU->applyUpdatesPermissive(Updates);
+ if (FI)
+ FI->eraseFromParent();
return true;
}
@@ -1073,6 +1121,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
<< '\n');
++NumFolds;
ConstantFoldTerminator(BB, true, nullptr, DTU);
+ if (HasProfileData)
+ BPI->eraseBlock(BB);
return true;
}
@@ -1081,9 +1131,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// All the rest of our checks depend on the condition being an instruction.
if (!CondInst) {
// FIXME: Unify this with code below.
- if (ProcessThreadableEdges(Condition, BB, Preference, Terminator))
+ if (processThreadableEdges(Condition, BB, Preference, Terminator))
return true;
- return false;
+ return ConstantFolded;
}
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
@@ -1124,22 +1174,24 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
auto *CI = Ret == LazyValueInfo::True ?
ConstantInt::getTrue(CondCmp->getType()) :
ConstantInt::getFalse(CondCmp->getType());
- ReplaceFoldableUses(CondCmp, CI);
+ replaceFoldableUses(CondCmp, CI);
}
DTU->applyUpdatesPermissive(
{{DominatorTree::Delete, BB, ToRemoveSucc}});
+ if (HasProfileData)
+ BPI->eraseBlock(BB);
return true;
}
// We did not manage to simplify this branch, try to see whether
// CondCmp depends on a known phi-select pattern.
- if (TryToUnfoldSelect(CondCmp, BB))
+ if (tryToUnfoldSelect(CondCmp, BB))
return true;
}
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
- if (TryToUnfoldSelect(SI, BB))
+ if (tryToUnfoldSelect(SI, BB))
return true;
// Check for some cases that are worth simplifying. Right now we want to look
@@ -1147,6 +1199,11 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// we see one, check to see if it's partially redundant. If so, insert a PHI
// which can then be used to thread the values.
Value *SimplifyValue = CondInst;
+
+ if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue))
+ // Look into freeze's operand
+ SimplifyValue = FI->getOperand(0);
+
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
@@ -1154,7 +1211,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// TODO: There are other places where load PRE would be profitable, such as
// more complex comparisons.
if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
- if (SimplifyPartiallyRedundantLoad(LoadI))
+ if (simplifyPartiallyRedundantLoad(LoadI))
return true;
// Before threading, try to propagate profile data backwards:
@@ -1165,29 +1222,32 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// Handle a variety of cases where we are branching on something derived from
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
- if (ProcessThreadableEdges(CondInst, BB, Preference, Terminator))
+ if (processThreadableEdges(CondInst, BB, Preference, Terminator))
return true;
- // If this is an otherwise-unfoldable branch on a phi node in the current
- // block, see if we can simplify.
- if (PHINode *PN = dyn_cast<PHINode>(CondInst))
- if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
- return ProcessBranchOnPHI(PN);
+ // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
+ // the current block, see if we can simplify.
+ PHINode *PN = dyn_cast<PHINode>(
+ isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0)
+ : CondInst);
+
+ if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return processBranchOnPHI(PN);
// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
if (CondInst->getOpcode() == Instruction::Xor &&
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
- return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+ return processBranchOnXOR(cast<BinaryOperator>(CondInst));
// Search for a stronger dominating condition that can be used to simplify a
// conditional branch leaving BB.
- if (ProcessImpliedCondition(BB))
+ if (processImpliedCondition(BB))
return true;
return false;
}
-bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
+bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isConditional())
return false;
@@ -1217,6 +1277,8 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
UncondBI->setDebugLoc(BI->getDebugLoc());
BI->eraseFromParent();
DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
+ if (HasProfileData)
+ BPI->eraseBlock(BB);
return true;
}
CurrentBB = CurrentPred;
@@ -1234,11 +1296,11 @@ static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) {
return false;
}
-/// SimplifyPartiallyRedundantLoad - If LoadI is an obviously partially
+/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
/// redundant load instruction, eliminate it by replacing it with a PHI node.
/// This is an important optimization that encourages jump threading, and needs
/// to be run interlaced with other jump threading tasks.
-bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
+bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// Don't hack volatile and ordered loads.
if (!LoadI->isUnordered()) return false;
@@ -1408,7 +1470,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
}
// Split them out to their own block.
- UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
+ UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
}
// If the value isn't available in all predecessors, then there will be
@@ -1472,11 +1534,11 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
return true;
}
-/// FindMostPopularDest - The specified list contains multiple possible
+/// findMostPopularDest - The specified list contains multiple possible
/// threadable destinations. Pick the one that occurs the most frequently in
/// the list.
static BasicBlock *
-FindMostPopularDest(BasicBlock *BB,
+findMostPopularDest(BasicBlock *BB,
const SmallVectorImpl<std::pair<BasicBlock *,
BasicBlock *>> &PredToDestList) {
assert(!PredToDestList.empty());
@@ -1511,7 +1573,7 @@ FindMostPopularDest(BasicBlock *BB,
// Try to evaluate the value of V when the control flows from PredPredBB to
// BB->getSinglePredecessor() and then on to BB.
-Constant *JumpThreadingPass::EvaluateOnPredecessorEdge(BasicBlock *BB,
+Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
BasicBlock *PredPredBB,
Value *V) {
BasicBlock *PredBB = BB->getSinglePredecessor();
@@ -1538,9 +1600,9 @@ Constant *JumpThreadingPass::EvaluateOnPredecessorEdge(BasicBlock *BB,
if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
if (CondCmp->getParent() == BB) {
Constant *Op0 =
- EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
+ evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
Constant *Op1 =
- EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
+ evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
if (Op0 && Op1) {
return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
}
@@ -1551,7 +1613,7 @@ Constant *JumpThreadingPass::EvaluateOnPredecessorEdge(BasicBlock *BB,
return nullptr;
}
-bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
ConstantPreference Preference,
Instruction *CxtI) {
// If threading this would thread across a loop header, don't even try to
@@ -1560,15 +1622,15 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
return false;
PredValueInfoTy PredValues;
- if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
+ if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
CxtI)) {
// We don't have known values in predecessors. See if we can thread through
// BB and its sole predecessor.
- return MaybeThreadThroughTwoBasicBlocks(BB, Cond);
+ return maybethreadThroughTwoBasicBlocks(BB, Cond);
}
assert(!PredValues.empty() &&
- "ComputeValueKnownInPredecessors returned true with no values");
+ "computeValueKnownInPredecessors returned true with no values");
LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
for (const auto &PredValue : PredValues) {
@@ -1660,6 +1722,8 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BranchInst::Create(OnlyDest, Term);
Term->eraseFromParent();
DTU->applyUpdatesPermissive(Updates);
+ if (HasProfileData)
+ BPI->eraseBlock(BB);
// If the condition is now dead due to the removal of the old terminator,
// erase it.
@@ -1675,7 +1739,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// guard/assume.
else if (OnlyVal && OnlyVal != MultipleVal &&
CondInst->getParent() == BB)
- ReplaceFoldableUses(CondInst, OnlyVal);
+ replaceFoldableUses(CondInst, OnlyVal);
}
return true;
}
@@ -1688,18 +1752,18 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *MostPopularDest = OnlyDest;
if (MostPopularDest == MultipleDestSentinel) {
- // Remove any loop headers from the Dest list, ThreadEdge conservatively
+ // Remove any loop headers from the Dest list, threadEdge conservatively
// won't process them, but we might have other destination that are eligible
// and we still want to process.
erase_if(PredToDestList,
[&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
- return LoopHeaders.count(PredToDest.second) != 0;
+ return LoopHeaders.contains(PredToDest.second);
});
if (PredToDestList.empty())
return false;
- MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+ MostPopularDest = findMostPopularDest(BB, PredToDestList);
}
// Now that we know what the most popular destination is, factor all
@@ -1721,16 +1785,16 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// the destination that these predecessors should get to.
if (!MostPopularDest)
MostPopularDest = BB->getTerminator()->
- getSuccessor(GetBestDestForJumpOnUndef(BB));
+ getSuccessor(getBestDestForJumpOnUndef(BB));
// Ok, try to thread it!
- return TryThreadEdge(BB, PredsToFactor, MostPopularDest);
+ return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
}
-/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
-/// a PHI node in the current block. See if there are any simplifications we
-/// can do based on inputs to the phi node.
-bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) {
+/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node (or freeze PHI) in the current block. See if there are any
+/// simplifications we can do based on inputs to the phi node.
+bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
// TODO: We could make use of this to do it once for blocks with common PHI
@@ -1742,13 +1806,16 @@ bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) {
// *duplicate* the conditional branch into that block in order to further
// encourage jump threading and to eliminate cases where we have branch on a
// phi of an icmp (branch on icmp is much better).
+ // This is still beneficial when a frozen phi is used as the branch condition
+ // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
+ // to br(icmp(freeze ...)).
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PredBB = PN->getIncomingBlock(i);
if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
if (PredBr->isUnconditional()) {
PredBBs[0] = PredBB;
// Try to duplicate BB into PredBB.
- if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+ if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
return true;
}
}
@@ -1756,10 +1823,10 @@ bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) {
return false;
}
-/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
/// a xor instruction in the current block. See if there are any
/// simplifications we can do based on inputs to the xor.
-bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
+bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
BasicBlock *BB = BO->getParent();
// If either the LHS or RHS of the xor is a constant, don't do this
@@ -1797,17 +1864,17 @@ bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
PredValueInfoTy XorOpValues;
bool isLHS = true;
- if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+ if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
WantInteger, BO)) {
assert(XorOpValues.empty());
- if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+ if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
WantInteger, BO))
return false;
isLHS = false;
}
assert(!XorOpValues.empty() &&
- "ComputeValueKnownInPredecessors returned true with no values");
+ "computeValueKnownInPredecessors returned true with no values");
// Scan the information to see which is most popular: true or false. The
// predecessors can be of the set true, false, or undef.
@@ -1868,13 +1935,13 @@ bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
return false;
// Try to duplicate BB into PredBB.
- return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+ return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
}
-/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
+/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
/// NewPred using the entries from OldPred (suitably mapped).
-static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
+static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
BasicBlock *OldPred,
BasicBlock *NewPred,
DenseMap<Instruction*, Value*> &ValueMap) {
@@ -1895,7 +1962,7 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
}
/// Merge basic block BB into its sole predecessor if possible.
-bool JumpThreadingPass::MaybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
+bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
BasicBlock *SinglePred = BB->getSinglePredecessor();
if (!SinglePred)
return false;
@@ -1946,7 +2013,7 @@ bool JumpThreadingPass::MaybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
/// Update the SSA form. NewBB contains instructions that are copied from BB.
/// ValueMapping maps old values in BB to new ones in NewBB.
-void JumpThreadingPass::UpdateSSA(
+void JumpThreadingPass::updateSSA(
BasicBlock *BB, BasicBlock *NewBB,
DenseMap<Instruction *, Value *> &ValueMapping) {
// If there were values defined in BB that are used outside the block, then we
@@ -1992,7 +2059,7 @@ void JumpThreadingPass::UpdateSSA(
/// arguments that come from PredBB. Return the map from the variables in the
/// source basic block to the variables in the newly created basic block.
DenseMap<Instruction *, Value *>
-JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
+JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
BasicBlock::iterator BE, BasicBlock *NewBB,
BasicBlock *PredBB) {
// We are going to have to map operands from the source basic block to the new
@@ -2009,6 +2076,15 @@ JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
ValueMapping[PN] = NewPN;
}
+ // Clone noalias scope declarations in the threaded block. When threading a
+ // loop exit, we would otherwise end up with two idential scope declarations
+ // visible at the same time.
+ SmallVector<MDNode *> NoAliasScopes;
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ LLVMContext &Context = PredBB->getContext();
+ identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
+ cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
+
// Clone the non-phi instructions of the source basic block into NewBB,
// keeping track of the mapping and using it to remap operands in the cloned
// instructions.
@@ -2017,6 +2093,7 @@ JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
ValueMapping[&*BI] = New;
+ adaptNoAliasScopes(New, ClonedScopes, Context);
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
@@ -2031,7 +2108,7 @@ JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
}
/// Attempt to thread through two successive basic blocks.
-bool JumpThreadingPass::MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB,
+bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
Value *Cond) {
// Consider:
//
@@ -2100,7 +2177,7 @@ bool JumpThreadingPass::MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB,
BasicBlock *OnePred = nullptr;
for (BasicBlock *P : predecessors(PredBB)) {
if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
- EvaluateOnPredecessorEdge(BB, P, Cond))) {
+ evaluateOnPredecessorEdge(BB, P, Cond))) {
if (CI->isZero()) {
ZeroCount++;
ZeroPred = P;
@@ -2131,7 +2208,7 @@ bool JumpThreadingPass::MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB,
}
// If threading this would thread across a loop header, don't thread the edge.
- // See the comments above FindLoopHeaders for justifications and caveats.
+ // See the comments above findLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
LLVM_DEBUG({
bool BBIsHeader = LoopHeaders.count(BB);
@@ -2164,11 +2241,11 @@ bool JumpThreadingPass::MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB,
}
// Now we are ready to duplicate PredBB.
- ThreadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
+ threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
return true;
}
-void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
+void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
BasicBlock *PredBB,
BasicBlock *BB,
BasicBlock *SuccBB) {
@@ -2194,7 +2271,11 @@ void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
// copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
// to account for entry from PredPredBB.
DenseMap<Instruction *, Value *> ValueMapping =
- CloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
+ cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
+
+ // Copy the edge probabilities from PredBB to NewBB.
+ if (HasProfileData)
+ BPI->copyEdgeProbabilities(PredBB, NewBB);
// Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
// This eliminates predecessors from PredPredBB, which requires us to simplify
@@ -2206,9 +2287,9 @@ void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
PredPredTerm->setSuccessor(i, NewBB);
}
- AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
+ addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
ValueMapping);
- AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
+ addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
ValueMapping);
DTU->applyUpdatesPermissive(
@@ -2217,7 +2298,7 @@ void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
{DominatorTree::Insert, PredPredBB, NewBB},
{DominatorTree::Delete, PredPredBB, PredBB}});
- UpdateSSA(PredBB, NewBB, ValueMapping);
+ updateSSA(PredBB, NewBB, ValueMapping);
// Clean up things like PHI nodes with single operands, dead instructions,
// etc.
@@ -2226,11 +2307,11 @@ void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
SmallVector<BasicBlock *, 1> PredsToFactor;
PredsToFactor.push_back(NewBB);
- ThreadEdge(BB, PredsToFactor, SuccBB);
+ threadEdge(BB, PredsToFactor, SuccBB);
}
-/// TryThreadEdge - Thread an edge if it's safe and profitable to do so.
-bool JumpThreadingPass::TryThreadEdge(
+/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
+bool JumpThreadingPass::tryThreadEdge(
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
@@ -2241,7 +2322,7 @@ bool JumpThreadingPass::TryThreadEdge(
}
// If threading this would thread across a loop header, don't thread the edge.
- // See the comments above FindLoopHeaders for justifications and caveats.
+ // See the comments above findLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
LLVM_DEBUG({
bool BBIsHeader = LoopHeaders.count(BB);
@@ -2262,14 +2343,14 @@ bool JumpThreadingPass::TryThreadEdge(
return false;
}
- ThreadEdge(BB, PredBBs, SuccBB);
+ threadEdge(BB, PredBBs, SuccBB);
return true;
}
-/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// threadEdge - We have decided that it is safe and profitable to factor the
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
/// across BB. Transform the IR to reflect this change.
-void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
+void JumpThreadingPass::threadEdge(BasicBlock *BB,
const SmallVectorImpl<BasicBlock *> &PredBBs,
BasicBlock *SuccBB) {
assert(SuccBB != BB && "Don't create an infinite loop");
@@ -2284,7 +2365,7 @@ void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
else {
LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
+ PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
}
// And finally, do it!
@@ -2308,7 +2389,7 @@ void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// Copy all the instructions from BB to NewBB except the terminator.
DenseMap<Instruction *, Value *> ValueMapping =
- CloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
+ cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
@@ -2317,7 +2398,7 @@ void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// PHI nodes for NewBB now.
- AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
+ addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
// Update the terminator of PredBB to jump to NewBB instead of BB. This
// eliminates predecessors from BB, which requires us to simplify any PHI
@@ -2334,7 +2415,7 @@ void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
{DominatorTree::Insert, PredBB, NewBB},
{DominatorTree::Delete, PredBB, BB}});
- UpdateSSA(BB, NewBB, ValueMapping);
+ updateSSA(BB, NewBB, ValueMapping);
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
@@ -2342,7 +2423,7 @@ void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
SimplifyInstructionsInBlock(NewBB, TLI);
// Update the edge weight from BB to SuccBB, which should be less than before.
- UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
+ updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
// Threaded an edge!
++NumThreads;
@@ -2351,7 +2432,7 @@ void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
/// Create a new basic block that will be the predecessor of BB and successor of
/// all blocks in Preds. When profile data is available, update the frequency of
/// this new block.
-BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
+BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix) {
SmallVector<BasicBlock *, 2> NewBBs;
@@ -2412,7 +2493,7 @@ bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
/// Update the block frequency of BB and branch weight and the metadata on the
/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
/// Freq(PredBB->BB) / Freq(BB->SuccBB).
-void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
+void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
BasicBlock *BB,
BasicBlock *NewBB,
BasicBlock *SuccBB) {
@@ -2504,18 +2585,18 @@ void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
}
}
-/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
+/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
/// If we can duplicate the contents of BB up into PredBB do so now, this
/// improves the odds that the branch will be on an analyzable instruction like
/// a compare.
-bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
+bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
assert(!PredBBs.empty() && "Can't handle an empty set");
// If BB is a loop header, then duplicating this block outside the loop would
// cause us to transform this into an irreducible loop, don't do this.
- // See the comments above FindLoopHeaders for justifications and caveats.
+ // See the comments above findLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
<< "' into predecessor block '" << PredBBs[0]->getName()
@@ -2539,7 +2620,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
else {
LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
+ PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
}
Updates.push_back({DominatorTree::Delete, PredBB, BB});
@@ -2611,12 +2692,12 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
// Check to see if the targets of the branch had PHI nodes. If so, we need to
// add entries to the PHI nodes for branch from PredBB now.
BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
- AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
+ addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
ValueMapping);
- AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
+ addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
ValueMapping);
- UpdateSSA(BB, PredBB, ValueMapping);
+ updateSSA(BB, PredBB, ValueMapping);
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
@@ -2624,6 +2705,8 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
+ if (HasProfileData)
+ BPI->copyEdgeProbabilities(BB, PredBB);
DTU->applyUpdatesPermissive(Updates);
++NumDupes;
@@ -2635,7 +2718,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
// a PHI node in BB. SI has no other use.
// A new basic block, NewBB, is created and SI is converted to compare and
// conditional branch. SI is erased from parent.
-void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
+void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
SelectInst *SI, PHINode *SIUse,
unsigned Idx) {
// Expand the select.
@@ -2670,7 +2753,7 @@ void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
}
-bool JumpThreadingPass::TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
+bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
if (!CondPHI || CondPHI->getParent() != BB)
@@ -2682,7 +2765,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
// The second and third condition can be potentially relaxed. Currently
// the conditions help to simplify the code and allow us to reuse existing
- // code, developed for TryToUnfoldSelect(CmpInst *, BasicBlock *)
+ // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
continue;
@@ -2690,13 +2773,13 @@ bool JumpThreadingPass::TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
if (!PredTerm || !PredTerm->isUnconditional())
continue;
- UnfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
+ unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
return true;
}
return false;
}
-/// TryToUnfoldSelect - Look for blocks of the form
+/// tryToUnfoldSelect - Look for blocks of the form
/// bb1:
/// %a = select
/// br bb2
@@ -2708,7 +2791,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
///
/// And expand the select into a branch structure if one of its arms allows %c
/// to be folded. This later enables threading from bb1 over bb2.
-bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
+bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
@@ -2742,14 +2825,14 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
if ((LHSFolds != LazyValueInfo::Unknown ||
RHSFolds != LazyValueInfo::Unknown) &&
LHSFolds != RHSFolds) {
- UnfoldSelectInstr(Pred, BB, SI, CondLHS, I);
+ unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
return true;
}
}
return false;
}
-/// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
+/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
/// same BB in the form
/// bb:
/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
@@ -2769,19 +2852,14 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
/// select if the associated PHI has at least one constant. If the unfolded
/// select is not jump-threaded, it will be folded again in the later
/// optimizations.
-bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
- // This transform can introduce a UB (a conditional branch that depends on a
- // poison value) that was not present in the original program. See
- // @TryToUnfoldSelectInCurrBB test in test/Transforms/JumpThreading/select.ll.
+bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
+ // This transform would reduce the quality of msan diagnostics.
// Disable this transform under MemorySanitizer.
- // FIXME: either delete it or replace with a valid transform. This issue is
- // not limited to MemorySanitizer (but has only been observed as an MSan false
- // positive in practice so far).
if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
return false;
// If threading this would thread across a loop header, don't thread the edge.
- // See the comments above FindLoopHeaders for justifications and caveats.
+ // See the comments above findLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB))
return false;
@@ -2824,8 +2902,12 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
if (!SI)
continue;
// Expand the select.
- Instruction *Term =
- SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
+ Value *Cond = SI->getCondition();
+ if (InsertFreezeWhenUnfoldingSelect &&
+ !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI,
+ &DTU->getDomTree()))
+ Cond = new FreezeInst(Cond, "cond.fr", SI);
+ Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
BasicBlock *SplitBB = SI->getParent();
BasicBlock *NewBB = Term->getParent();
PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
@@ -2869,7 +2951,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
/// And cond either implies condGuard or !condGuard. In this case all the
/// instructions before the guard can be duplicated in both branches, and the
/// guard is then threaded to one of them.
-bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
+bool JumpThreadingPass::processGuards(BasicBlock *BB) {
using namespace PatternMatch;
// We only want to deal with two predecessors.
@@ -2894,7 +2976,7 @@ bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
for (auto &I : *BB)
- if (isGuard(&I) && ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
+ if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
return true;
return false;
@@ -2903,7 +2985,7 @@ bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
/// Try to propagate the guard from BB which is the lower block of a diamond
/// to one of its branches, in case if diamond's condition implies guard's
/// condition.
-bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
+bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
BranchInst *BI) {
assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
assert(BI->isConditional() && "Unconditional branch has 2 successors?");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
index 1a22edaf8726..d2b4ba296f41 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -12,6 +12,13 @@
// safe. This pass also promotes must-aliased memory locations in the loop to
// live in registers, thus hoisting and sinking "invariant" loads and stores.
//
+// Hoisting operations out of loops is a canonicalization transform. It
+// enables and simplifies subsequent optimizations in the middle-end.
+// Rematerialization of hoisted instructions to reduce register pressure is the
+// responsibility of the back-end, which has more accurate information about
+// register pressure and also handles other optimizations than LICM that
+// increase live-ranges.
+//
// This pass uses alias analysis for two purposes:
//
// 1. Moving loop invariant loads and calls out of loops. If we can determine
@@ -35,10 +42,12 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -98,6 +107,11 @@ static cl::opt<bool> ControlFlowHoisting(
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
+static cl::opt<unsigned> HoistSinkColdnessThreshold(
+ "licm-coldness-threshold", cl::Hidden, cl::init(4),
+ cl::desc("Relative coldness Threshold of hoisting/sinking destination "
+ "block for LICM to be considered beneficial"));
+
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
@@ -143,8 +157,9 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
+ BlockFrequencyInfo *BFI, const Loop *CurLoop,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ OptimizationRemarkEmitter *ORE);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
@@ -155,8 +170,10 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
AliasSetTracker *CurAST, Loop *CurLoop,
AAResults *AA);
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop,
+ Loop *CurLoop, Instruction &I,
SinkAndHoistLICMFlags &Flags);
+static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
+ MemoryUse &MU);
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
@@ -171,8 +188,8 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest,
namespace {
struct LoopInvariantCodeMotion {
bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
- TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- ScalarEvolution *SE, MemorySSA *MSSA,
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
+ TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE);
LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
@@ -204,23 +221,30 @@ struct LegacyLICMPass : public LoopPass {
if (skipLoop(L))
return false;
+ LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
+ << L->getHeader()->getNameOrAsOperand() << "\n");
+
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
MemorySSA *MSSA = EnableMSSALoopDependency
? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
: nullptr;
+ bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
+ BlockFrequencyInfo *BFI =
+ hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
+ : nullptr;
// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
- // pass. Function analyses need to be preserved across loop transformations
+ // pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
- return LICM.runOnLoop(L,
- &getAnalysis<AAResultsWrapperPass>().getAAResults(),
- &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
- &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
- *L->getHeader()->getParent()),
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *L->getHeader()->getParent()),
- SE ? &SE->getSE() : nullptr, MSSA, &ORE);
+ return LICM.runOnLoop(
+ L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
+ &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
+ &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI,
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent()),
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent()),
+ SE ? &SE->getSE() : nullptr, MSSA, &ORE);
}
/// This transformation requires natural loop information & requires that
@@ -236,6 +260,9 @@ struct LegacyLICMPass : public LoopPass {
}
AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ AU.addPreserved<LazyBlockFrequencyInfoPass>();
+ AU.addPreserved<LazyBranchProbabilityInfoPass>();
}
private:
@@ -251,8 +278,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
- if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
- AR.MSSA, &ORE))
+ if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
+ &AR.SE, AR.MSSA, &ORE))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
@@ -272,6 +299,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)
@@ -281,13 +309,42 @@ Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
}
+llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
+ MemorySSA *MSSA)
+ : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
+ IsSink, L, MSSA) {}
+
+llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
+ unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
+ Loop *L, MemorySSA *MSSA)
+ : LicmMssaOptCap(LicmMssaOptCap),
+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+ IsSink(IsSink) {
+ assert(((L != nullptr) == (MSSA != nullptr)) &&
+ "Unexpected values for SinkAndHoistLICMFlags");
+ if (!MSSA)
+ return;
+
+ unsigned AccessCapCount = 0;
+ for (auto *BB : L->getBlocks())
+ if (const auto *Accesses = MSSA->getBlockAccesses(BB))
+ for (const auto &MA : *Accesses) {
+ (void)MA;
+ ++AccessCapCount;
+ if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
+ NoOfMemAccTooLarge = true;
+ return;
+ }
+ }
+}
+
/// Hoist expressions out of the specified loop. Note, alias info for inner
/// loop is not preserved so it is not a good idea to run LICM multiple
/// times on one loop.
bool LoopInvariantCodeMotion::runOnLoop(
Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
- TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
- MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) {
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) {
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@@ -300,31 +357,18 @@ bool LoopInvariantCodeMotion::runOnLoop(
std::unique_ptr<AliasSetTracker> CurAST;
std::unique_ptr<MemorySSAUpdater> MSSAU;
- bool NoOfMemAccTooLarge = false;
- unsigned LicmMssaOptCounter = 0;
+ std::unique_ptr<SinkAndHoistLICMFlags> Flags;
if (!MSSA) {
LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
CurAST = collectAliasInfoForLoop(L, LI, AA);
+ Flags = std::make_unique<SinkAndHoistLICMFlags>(
+ LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true);
} else {
LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
-
- unsigned AccessCapCount = 0;
- for (auto *BB : L->getBlocks()) {
- if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
- for (const auto &MA : *Accesses) {
- (void)MA;
- AccessCapCount++;
- if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
- NoOfMemAccTooLarge = true;
- break;
- }
- }
- }
- if (NoOfMemAccTooLarge)
- break;
- }
+ Flags = std::make_unique<SinkAndHoistLICMFlags>(
+ LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA);
}
// Get the preheader block to move instructions into...
@@ -343,17 +387,15 @@ bool LoopInvariantCodeMotion::runOnLoop(
// that we are guaranteed to see definitions before we see uses. This allows
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
- SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
- /*IsSink=*/true};
if (L->hasDedicatedExits())
- Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
- Flags.IsSink = false;
- if (Preheader)
Changed |=
- hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
- CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, ORE);
+ sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
+ CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
+ Flags->setIsSink(false);
+ if (Preheader)
+ Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
+ CurAST.get(), MSSAU.get(), SE, &SafetyInfo,
+ *Flags.get(), ORE);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -363,7 +405,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
- !NoOfMemAccTooLarge) {
+ !Flags->tooManyMemoryAccesses()) {
// Figure out the loop exits and their insertion points
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -432,7 +474,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// specifically moving instructions across the loop boundary and so it is
// especially in need of sanity checking here.
assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
- assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
+ assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
"Parent loop not left in LCSSA form after LICM!");
if (MSSAU.get() && VerifyMemorySSA)
@@ -449,10 +491,10 @@ bool LoopInvariantCodeMotion::runOnLoop(
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
- DominatorTree *DT, TargetLibraryInfo *TLI,
- TargetTransformInfo *TTI, Loop *CurLoop,
- AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
- ICFLoopSafetyInfo *SafetyInfo,
+ DominatorTree *DT, BlockFrequencyInfo *BFI,
+ TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ Loop *CurLoop, AliasSetTracker *CurAST,
+ MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
@@ -501,7 +543,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
ORE)) {
- if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
+ if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
salvageDebugInfo(I);
@@ -585,7 +627,7 @@ public:
else if (!TrueDestSucc.empty()) {
Function *F = TrueDest->getParent();
auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
- auto It = std::find_if(F->begin(), F->end(), IsSucc);
+ auto It = llvm::find_if(*F, IsSucc);
assert(It != F->end() && "Could not find successor in function");
CommonSucc = &*It;
}
@@ -653,15 +695,15 @@ public:
return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
Pair.first->getSuccessor(1) == BB);
};
- auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
- HasBBAsSuccessor);
+ auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
// If not involved in a pending branch, hoist to preheader
BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
if (It == HoistableBranches.end()) {
- LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
- << " as hoist destination for " << BB->getName()
- << "\n");
+ LLVM_DEBUG(dbgs() << "LICM using "
+ << InitialPreheader->getNameOrAsOperand()
+ << " as hoist destination for "
+ << BB->getNameOrAsOperand() << "\n");
HoistDestinationMap[BB] = InitialPreheader;
return InitialPreheader;
}
@@ -746,13 +788,43 @@ public:
};
} // namespace
+// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
+// only worthwhile if the destination block is actually colder than current
+// block.
+static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
+ OptimizationRemarkEmitter *ORE,
+ BlockFrequencyInfo *BFI) {
+ // Check block frequency only when runtime profile is available
+ // to avoid pathological cases. With static profile, lean towards
+ // hosting because it helps canonicalize the loop for vectorizer.
+ if (!DstBlock->getParent()->hasProfileData())
+ return true;
+
+ if (!HoistSinkColdnessThreshold || !BFI)
+ return true;
+
+ BasicBlock *SrcBlock = I.getParent();
+ if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
+ BFI->getBlockFreq(SrcBlock).getFrequency()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
+ << "failed to sink or hoist instruction because containing block "
+ "has lower frequency than destination block";
+ });
+ return false;
+ }
+
+ return true;
+}
+
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
///
bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
- DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
+ DominatorTree *DT, BlockFrequencyInfo *BFI,
+ TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
@@ -803,13 +875,15 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// Try hoisting the instruction out to the preheader. We can only do
// this if all of the operands of the instruction are loop invariant and
- // if it is safe to hoist the instruction.
+ // if it is safe to hoist the instruction. We also check block frequency
+ // to make sure instruction only gets hoisted into colder blocks.
// TODO: It may be safe to hoist if we are hoisting to a conditional block
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
ORE) &&
+ worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
isSafeToExecuteUnconditionally(
I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
@@ -908,7 +982,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
HoistPoint = Dominator->getTerminator();
}
LLVM_DEBUG(dbgs() << "LICM rehoisting to "
- << HoistPoint->getParent()->getName()
+ << HoistPoint->getParent()->getNameOrAsOperand()
<< ": " << *I << "\n");
moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
HoistPoint = I;
@@ -940,7 +1014,19 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
Loop *CurLoop) {
Value *Addr = LI->getOperand(0);
const DataLayout &DL = LI->getModule()->getDataLayout();
- const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
+ const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
+
+ // It is not currently possible for clang to generate an invariant.start
+ // intrinsic with scalable vector types because we don't support thread local
+ // sizeless types and we don't permit sizeless types in structs or classes.
+ // Furthermore, even if support is added for this in future the intrinsic
+ // itself is defined to have a size of -1 for variable sized objects. This
+ // makes it impossible to verify if the intrinsic envelops our region of
+ // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
+ // types would have a -1 parameter, but the former is clearly double the size
+ // of the latter.
+ if (LocSizeInBits.isScalable())
+ return false;
// if the type is i8 addrspace(x)*, we know this is the type of
// llvm.invariant.start operand
@@ -970,13 +1056,17 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
!II->use_empty())
continue;
- unsigned InvariantSizeInBits =
- cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
+ ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
+ // The intrinsic supports having a -1 argument for variable sized objects
+ // so we should check for that here.
+ if (InvariantSize->isNegative())
+ continue;
+ uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
// Confirm the invariant.start location size contains the load operand size
// in bits. Also, the invariant.start should dominate the load, and we
// should not hoist the load out of a loop that contains this dominating
// invariant.start.
- if (LocSizeInBits <= InvariantSizeInBits &&
+ if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
return true;
}
@@ -1041,6 +1131,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
bool TargetExecutesOncePerLoop,
SinkAndHoistLICMFlags *Flags,
OptimizationRemarkEmitter *ORE) {
+ assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
+ "Either AliasSetTracker or MemorySSA should be initialized.");
+
// If we don't understand the instruction, bail early.
if (!isHoistableAndSinkableInst(I))
return false;
@@ -1074,7 +1167,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
CurLoop, AA);
else
Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
// Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1095,6 +1188,13 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (CI->mayThrow())
return false;
+ // Convergent attribute has been used on operations that involve
+ // inter-thread communication which results are implicitly affected by the
+ // enclosing control flows. It is not safe to hoist or sink such operations
+ // across control flow.
+ if (CI->isConvergent())
+ return false;
+
using namespace PatternMatch;
if (match(CI, m_Intrinsic<Intrinsic::assume>()))
// Assumes don't actually alias anything or throw
@@ -1119,11 +1219,10 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
bool Invalidated;
if (CurAST)
Invalidated = pointerInvalidatedByLoop(
- MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()),
- CurAST, CurLoop, AA);
+ MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA);
else
Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
*Flags);
if (Invalidated)
return false;
@@ -1183,12 +1282,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
} else { // MSSAU
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
- // If there are more accesses than the Promotion cap, give up, we're not
- // walking a list that long.
- if (Flags->NoOfMemAccTooLarge)
- return false;
- // Check store only if there's still "quota" to check clobber.
- if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap)
+ // If there are more accesses than the Promotion cap or no "quota" to
+ // check clobber, then give up as we're not walking a list that long.
+ if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls())
return false;
// If there are interfering Uses (i.e. their defining access is in the
// loop), or ordered loads (stored as Defs!), don't move this store.
@@ -1208,7 +1304,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// Uses may point to an access outside the loop, as getClobbering
// checks the previous iteration when walking the backedge.
// FIXME: More precise: no Uses that alias SI.
- if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
+ if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU))
return false;
} else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
@@ -1227,9 +1323,8 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
}
}
}
-
auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
- Flags->LicmMssaOptCounter++;
+ Flags->incrementClobberingCalls();
// If there are no clobbering Defs in the loop, store is safe to hoist.
return MSSA->isLiveOnEntryDef(Source) ||
!CurLoop->contains(Source->getBlock());
@@ -1529,8 +1624,9 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
/// position, and may either delete it or move it to outside of the loop.
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
+ BlockFrequencyInfo *BFI, const Loop *CurLoop,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -1606,7 +1702,10 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
+ // First check if I is worth sinking for all uses. Sink only when it is worth
+ // across all uses.
SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
+ SmallVector<PHINode *, 8> ExitPNs;
for (auto *UI : Users) {
auto *User = cast<Instruction>(UI);
@@ -1616,6 +1715,15 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
PHINode *PN = cast<PHINode>(User);
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
+ if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
+ return Changed;
+ }
+
+ ExitPNs.push_back(PN);
+ }
+
+ for (auto *PN : ExitPNs) {
+
// The PHI must be trivially replaceable.
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
@@ -1633,8 +1741,8 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE) {
- LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
- << "\n");
+ LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
+ << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
<< ore::NV("Inst", &I);
@@ -1658,10 +1766,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
// Move the new node to the destination block, before its terminator.
moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
- // Apply line 0 debug locations when we are moving instructions to different
- // basic blocks because we want to avoid jumpy line tables.
- if (const DebugLoc &DL = I.getDebugLoc())
- I.setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
+ I.updateLocationAfterHoist();
if (isa<LoadInst>(I))
++NumMovedLoads;
@@ -1707,7 +1812,7 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<Instruction *> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
- AliasSetTracker &AST;
+ AliasSetTracker *AST;
MemorySSAUpdater *MSSAU;
LoopInfo &LI;
DebugLoc DL;
@@ -1737,7 +1842,7 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
- AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
+ AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
DebugLoc dl, int alignment, bool UnorderedAtomic,
const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
@@ -1794,11 +1899,13 @@ public:
void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
// Update alias analysis.
- AST.copyValue(LI, V);
+ if (AST)
+ AST->copyValue(LI, V);
}
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
- AST.deleteValue(I);
+ if (AST)
+ AST->deleteValue(I);
if (MSSAU)
MSSAU->removeMemoryAccess(I);
}
@@ -1844,7 +1951,7 @@ bool llvm::promoteLoopAccessesToScalars(
ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
- CurAST != nullptr && SafetyInfo != nullptr &&
+ SafetyInfo != nullptr &&
"Unexpected Input to promoteLoopAccessesToScalars");
Value *SomePtr = *PointerMustAliases.begin();
@@ -1909,7 +2016,7 @@ bool llvm::promoteLoopAccessesToScalars(
// we have to prove that the store is dead along the unwind edge. We do
// this by proving that the caller can't have a reference to the object
// after return and thus can't possibly load from the object.
- Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ Value *Object = getUnderlyingObject(SomePtr);
if (!isKnownNonEscaping(Object, TLI))
return false;
// Subtlety: Alloca's aren't visible to callers, but *are* potentially
@@ -2041,7 +2148,7 @@ bool llvm::promoteLoopAccessesToScalars(
if (IsKnownThreadLocalObject)
SafeToInsertStore = true;
else {
- Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ Value *Object = getUnderlyingObject(SomePtr);
SafeToInsertStore =
(isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
!PointerMayBeCaptured(Object, true, true);
@@ -2072,7 +2179,7 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
+ InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL,
Alignment.value(), SawUnorderedAtomic, AATags,
*SafetyInfo);
@@ -2187,18 +2294,18 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
return false;
}
-static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop,
- SinkAndHoistLICMFlags &Flags) {
+bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
+ Loop *CurLoop, Instruction &I,
+ SinkAndHoistLICMFlags &Flags) {
// For hoisting, use the walker to determine safety
- if (!Flags.IsSink) {
+ if (!Flags.getIsSink()) {
MemoryAccess *Source;
// See declaration of SetLicmMssaOptCap for usage details.
- if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
+ if (Flags.tooManyClobberingCalls())
Source = MU->getDefiningAccess();
else {
Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
- Flags.LicmMssaOptCounter++;
+ Flags.incrementClobberingCalls();
}
return !MSSA->isLiveOnEntryDef(Source) &&
CurLoop->contains(Source->getBlock());
@@ -2221,15 +2328,25 @@ static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
// FIXME: Increase precision: Safe to sink if Use post dominates the Def;
// needs PostDominatorTreeAnalysis.
// FIXME: More precise: no Defs that alias this Use.
- if (Flags.NoOfMemAccTooLarge)
+ if (Flags.tooManyMemoryAccesses())
return true;
for (auto *BB : CurLoop->getBlocks())
- if (auto *Accesses = MSSA->getBlockDefs(BB))
- for (const auto &MA : *Accesses)
- if (const auto *MD = dyn_cast<MemoryDef>(&MA))
- if (MU->getBlock() != MD->getBlock() ||
- !MSSA->locallyDominates(MD, MU))
- return true;
+ if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
+ return true;
+ // When sinking, the source block may not be part of the loop so check it.
+ if (!CurLoop->contains(&I))
+ return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
+
+ return false;
+}
+
+bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
+ MemoryUse &MU) {
+ if (const auto *Accesses = MSSA.getBlockDefs(&BB))
+ for (const auto &MA : *Accesses)
+ if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+ if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
+ return true;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 687e14d6d7d2..45cdcb2f37dd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -271,7 +271,7 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
bool MadeChange = false;
// Only prefetch in the inner-most loop
- if (!L->empty())
+ if (!L->isInnermost())
return MadeChange;
SmallPtrSet<const Value *, 32> EphValues;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index be209d34be42..1266c93316fa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -26,6 +26,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+
using namespace llvm;
#define DEBUG_TYPE "loop-delete"
@@ -38,6 +39,14 @@ enum class LoopDeletionResult {
Deleted,
};
+static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) {
+ if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted)
+ return LoopDeletionResult::Deleted;
+ if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified)
+ return LoopDeletionResult::Modified;
+ return LoopDeletionResult::Unmodified;
+}
+
/// Determines if a loop is dead.
///
/// This assumes that we've already checked for unique exit and exiting blocks,
@@ -53,26 +62,28 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
// of the loop.
bool AllEntriesInvariant = true;
bool AllOutgoingValuesSame = true;
- for (PHINode &P : ExitBlock->phis()) {
- Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
-
- // Make sure all exiting blocks produce the same incoming value for the exit
- // block. If there are different incoming values for different exiting
- // blocks, then it is impossible to statically determine which value should
- // be used.
- AllOutgoingValuesSame =
- all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
- return incoming == P.getIncomingValueForBlock(BB);
- });
-
- if (!AllOutgoingValuesSame)
- break;
-
- if (Instruction *I = dyn_cast<Instruction>(incoming))
- if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) {
- AllEntriesInvariant = false;
+ if (!L->hasNoExitBlocks()) {
+ for (PHINode &P : ExitBlock->phis()) {
+ Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
+
+ // Make sure all exiting blocks produce the same incoming value for the
+ // block. If there are different incoming values for different exiting
+ // blocks, then it is impossible to statically determine which value
+ // should be used.
+ AllOutgoingValuesSame =
+ all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
+ return incoming == P.getIncomingValueForBlock(BB);
+ });
+
+ if (!AllOutgoingValuesSame)
break;
- }
+
+ if (Instruction *I = dyn_cast<Instruction>(incoming))
+ if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) {
+ AllEntriesInvariant = false;
+ break;
+ }
+ }
}
if (Changed)
@@ -85,7 +96,9 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
// This includes instructions that could write to memory, and loads that are
// marked volatile.
for (auto &I : L->blocks())
- if (any_of(*I, [](Instruction &I) { return I.mayHaveSideEffects(); }))
+ if (any_of(*I, [](Instruction &I) {
+ return I.mayHaveSideEffects() && !I.isDroppable();
+ }))
return false;
return true;
}
@@ -122,12 +135,33 @@ static bool isLoopNeverExecuted(Loop *L) {
return true;
}
+/// If we can prove the backedge is untaken, remove it. This destroys the
+/// loop, but leaves the (now trivially loop invariant) control flow and
+/// side effects (if any) in place.
+static LoopDeletionResult
+breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, MemorySSA *MSSA,
+ OptimizationRemarkEmitter &ORE) {
+ assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
+
+ if (!L->getLoopLatch())
+ return LoopDeletionResult::Unmodified;
+
+ auto *BTC = SE.getBackedgeTakenCount(L);
+ if (!BTC->isZero())
+ return LoopDeletionResult::Unmodified;
+
+ breakLoopBackedge(L, DT, SE, LI, MSSA);
+ return LoopDeletionResult::Deleted;
+}
+
/// Remove a loop if it is dead.
///
-/// A loop is considered dead if it does not impact the observable behavior of
-/// the program other than finite running time. This never removes a loop that
-/// might be infinite (unless it is never executed), as doing so could change
-/// the halting/non-halting nature of a program.
+/// A loop is considered dead either if it does not impact the observable
+/// behavior of the program other than finite running time, or if it is
+/// required to make progress by an attribute such as 'mustprogress' or
+/// 'llvm.loop.mustprogress' and does not make any. This may remove
+/// infinite loops that have been required to make progress.
///
/// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
/// order to make various safety checks work.
@@ -151,18 +185,15 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
<< "Deletion requires Loop with preheader and dedicated exits.\n");
return LoopDeletionResult::Unmodified;
}
- // We can't remove loops that contain subloops. If the subloops were dead,
- // they would already have been removed in earlier executions of this pass.
- if (L->begin() != L->end()) {
- LLVM_DEBUG(dbgs() << "Loop contains subloops.\n");
- return LoopDeletionResult::Unmodified;
- }
-
BasicBlock *ExitBlock = L->getUniqueExitBlock();
if (ExitBlock && isLoopNeverExecuted(L)) {
LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
+ // We need to forget the loop before setting the incoming values of the exit
+ // phis to undef, so we properly invalidate the SCEV expressions for those
+ // phis.
+ SE.forgetLoop(L);
// Set incoming value to undef for phi nodes in the exit block.
for (PHINode &P : ExitBlock->phis()) {
std::fill(P.incoming_values().begin(), P.incoming_values().end(),
@@ -183,12 +214,12 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
SmallVector<BasicBlock *, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- // We require that the loop only have a single exit block. Otherwise, we'd
- // be in the situation of needing to be able to solve statically which exit
- // block will be branched to, or trying to preserve the branching logic in
- // a loop invariant manner.
- if (!ExitBlock) {
- LLVM_DEBUG(dbgs() << "Deletion requires single exit block\n");
+ // We require that the loop has at most one exit block. Otherwise, we'd be in
+ // the situation of needing to be able to solve statically which exit block
+ // will be branched to, or trying to preserve the branching logic in a loop
+ // invariant manner.
+ if (!ExitBlock && !L->hasNoExitBlocks()) {
+ LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n");
return LoopDeletionResult::Unmodified;
}
// Finally, we have to check that the loop really is dead.
@@ -199,11 +230,13 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
: LoopDeletionResult::Unmodified;
}
- // Don't remove loops for which we can't solve the trip count.
- // They could be infinite, in which case we'd be changing program behavior.
+ // Don't remove loops for which we can't solve the trip count unless the loop
+ // was required to make progress but has been determined to be dead.
const SCEV *S = SE.getConstantMaxBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(S)) {
- LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n");
+ if (isa<SCEVCouldNotCompute>(S) &&
+ !L->getHeader()->getParent()->mustProgress() && !hasMustProgress(L)) {
+ LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was "
+ "not required to make progress.\n");
return Changed ? LoopDeletionResult::Modified
: LoopDeletionResult::Unmodified;
}
@@ -232,6 +265,14 @@ PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE);
+
+ // If we can prove the backedge isn't taken, just break it and be done. This
+ // leaves the loop structure in place which means it can handle dispatching
+ // to the right exit based on whatever loop invariant structure remains.
+ if (Result != LoopDeletionResult::Deleted)
+ Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI,
+ AR.MSSA, ORE));
+
if (Result == LoopDeletionResult::Unmodified)
return PreservedAnalyses::all();
@@ -291,6 +332,12 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE);
+ // If we can prove the backedge isn't taken, just break it and be done. This
+ // leaves the loop structure in place which means it can handle dispatching
+ // to the right exit based on whatever loop invariant structure remains.
+ if (Result != LoopDeletionResult::Deleted)
+ Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE));
+
if (Result == LoopDeletionResult::Deleted)
LPM.markLoopAsDeleted(*L);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 7867a5468891..1bd2529891b7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -33,7 +33,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
@@ -664,21 +663,23 @@ public:
/// Try to distribute an inner-most loop.
bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
- assert(L->empty() && "Only process inner loops.");
+ assert(L->isInnermost() && "Only process inner loops.");
LLVM_DEBUG(dbgs() << "\nLDist: In \""
<< L->getHeader()->getParent()->getName()
<< "\" checking " << *L << "\n");
+ // Having a single exit block implies there's also one exiting block.
if (!L->getExitBlock())
return fail("MultipleExitBlocks", "multiple exit blocks");
if (!L->isLoopSimplifyForm())
return fail("NotLoopSimplifyForm",
"loop is not in loop-simplify form");
+ if (!L->isRotatedForm())
+ return fail("NotBottomTested", "loop is not bottom tested");
BasicBlock *PH = L->getLoopPreheader();
- // LAA will check that we only have a single exiting block.
LAI = &GetLAA(*L);
// Currently, we only distribute to isolate the part of the loop with
@@ -814,9 +815,7 @@ public:
LLVM_DEBUG(dbgs() << "\nPointers:\n");
LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
- LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
- LVer.setAliasChecks(std::move(Checks));
- LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
+ LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
LVer.versionLoop(DefsUsedOutside);
LVer.annotateLoopWithNoAlias();
@@ -982,7 +981,7 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
for (Loop *TopLevelLoop : *LI)
for (Loop *L : depth_first(TopLevelLoop))
// We only handle inner-most loops.
- if (L->empty())
+ if (L->isInnermost())
Worklist.push_back(L);
// Now walk the identified inner loops.
@@ -1058,7 +1057,8 @@ PreservedAnalyses LoopDistributePass::run(Function &F,
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
new file mode 100644
index 000000000000..aaff68436c13
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -0,0 +1,728 @@
+//===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass flattens pairs nested loops into a single loop.
+//
+// The intention is to optimise loop nests like this, which together access an
+// array linearly:
+// for (int i = 0; i < N; ++i)
+// for (int j = 0; j < M; ++j)
+// f(A[i*M+j]);
+// into one loop:
+// for (int i = 0; i < (N*M); ++i)
+// f(A[i]);
+//
+// It can also flatten loops where the induction variables are not used in the
+// loop. This is only worth doing if the induction variables are only used in an
+// expression like i*M+j. If they had any other uses, we would have to insert a
+// div/mod to reconstruct the original values, so this wouldn't be profitable.
+//
+// We also need to prove that N*M will not overflow.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopFlatten.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+
+#define DEBUG_TYPE "loop-flatten"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+static cl::opt<unsigned> RepeatedInstructionThreshold(
+ "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
+ cl::desc("Limit on the cost of instructions that can be repeated due to "
+ "loop flattening"));
+
+static cl::opt<bool>
+ AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden,
+ cl::init(false),
+ cl::desc("Assume that the product of the two iteration "
+ "limits will never overflow"));
+
+static cl::opt<bool>
+ WidenIV("loop-flatten-widen-iv", cl::Hidden,
+ cl::init(true),
+ cl::desc("Widen the loop induction variables, if possible, so "
+ "overflow checks won't reject flattening"));
+
+struct FlattenInfo {
+ Loop *OuterLoop = nullptr;
+ Loop *InnerLoop = nullptr;
+ PHINode *InnerInductionPHI = nullptr;
+ PHINode *OuterInductionPHI = nullptr;
+ Value *InnerLimit = nullptr;
+ Value *OuterLimit = nullptr;
+ BinaryOperator *InnerIncrement = nullptr;
+ BinaryOperator *OuterIncrement = nullptr;
+ BranchInst *InnerBranch = nullptr;
+ BranchInst *OuterBranch = nullptr;
+ SmallPtrSet<Value *, 4> LinearIVUses;
+ SmallPtrSet<PHINode *, 4> InnerPHIsToTransform;
+
+ // Whether this holds the flatten info before or after widening.
+ bool Widened = false;
+
+ FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
+};
+
+// Finds the induction variable, increment and limit for a simple loop that we
+// can flatten.
+static bool findLoopComponents(
+ Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions,
+ PHINode *&InductionPHI, Value *&Limit, BinaryOperator *&Increment,
+ BranchInst *&BackBranch, ScalarEvolution *SE) {
+ LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n");
+
+ if (!L->isLoopSimplifyForm()) {
+ LLVM_DEBUG(dbgs() << "Loop is not in normal form\n");
+ return false;
+ }
+
+ // There must be exactly one exiting block, and it must be the same at the
+ // latch.
+ BasicBlock *Latch = L->getLoopLatch();
+ if (L->getExitingBlock() != Latch) {
+ LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n");
+ return false;
+ }
+ // Latch block must end in a conditional branch.
+ BackBranch = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!BackBranch || !BackBranch->isConditional()) {
+ LLVM_DEBUG(dbgs() << "Could not find back-branch\n");
+ return false;
+ }
+ IterationInstructions.insert(BackBranch);
+ LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump());
+ bool ContinueOnTrue = L->contains(BackBranch->getSuccessor(0));
+
+ // Find the induction PHI. If there is no induction PHI, we can't do the
+ // transformation. TODO: could other variables trigger this? Do we have to
+ // search for the best one?
+ InductionPHI = nullptr;
+ for (PHINode &PHI : L->getHeader()->phis()) {
+ InductionDescriptor ID;
+ if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) {
+ InductionPHI = &PHI;
+ LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump());
+ break;
+ }
+ }
+ if (!InductionPHI) {
+ LLVM_DEBUG(dbgs() << "Could not find induction PHI\n");
+ return false;
+ }
+
+ auto IsValidPredicate = [&](ICmpInst::Predicate Pred) {
+ if (ContinueOnTrue)
+ return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT;
+ else
+ return Pred == CmpInst::ICMP_EQ;
+ };
+
+ // Find Compare and make sure it is valid
+ ICmpInst *Compare = dyn_cast<ICmpInst>(BackBranch->getCondition());
+ if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) ||
+ Compare->hasNUsesOrMore(2)) {
+ LLVM_DEBUG(dbgs() << "Could not find valid comparison\n");
+ return false;
+ }
+ IterationInstructions.insert(Compare);
+ LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump());
+
+ // Find increment and limit from the compare
+ Increment = nullptr;
+ if (match(Compare->getOperand(0),
+ m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) {
+ Increment = dyn_cast<BinaryOperator>(Compare->getOperand(0));
+ Limit = Compare->getOperand(1);
+ } else if (Compare->getUnsignedPredicate() == CmpInst::ICMP_NE &&
+ match(Compare->getOperand(1),
+ m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) {
+ Increment = dyn_cast<BinaryOperator>(Compare->getOperand(1));
+ Limit = Compare->getOperand(0);
+ }
+ if (!Increment || Increment->hasNUsesOrMore(3)) {
+ LLVM_DEBUG(dbgs() << "Cound not find valid increment\n");
+ return false;
+ }
+ IterationInstructions.insert(Increment);
+ LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump());
+ LLVM_DEBUG(dbgs() << "Found limit: "; Limit->dump());
+
+ assert(InductionPHI->getNumIncomingValues() == 2);
+ assert(InductionPHI->getIncomingValueForBlock(Latch) == Increment &&
+ "PHI value is not increment inst");
+
+ auto *CI = dyn_cast<ConstantInt>(
+ InductionPHI->getIncomingValueForBlock(L->getLoopPreheader()));
+ if (!CI || !CI->isZero()) {
+ LLVM_DEBUG(dbgs() << "PHI value is not zero: "; CI->dump());
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
+ return true;
+}
+
+static bool checkPHIs(struct FlattenInfo &FI,
+ const TargetTransformInfo *TTI) {
+ // All PHIs in the inner and outer headers must either be:
+ // - The induction PHI, which we are going to rewrite as one induction in
+ // the new loop. This is already checked by findLoopComponents.
+ // - An outer header PHI with all incoming values from outside the loop.
+ // LoopSimplify guarantees we have a pre-header, so we don't need to
+ // worry about that here.
+ // - Pairs of PHIs in the inner and outer headers, which implement a
+ // loop-carried dependency that will still be valid in the new loop. To
+ // be valid, this variable must be modified only in the inner loop.
+
+ // The set of PHI nodes in the outer loop header that we know will still be
+ // valid after the transformation. These will not need to be modified (with
+ // the exception of the induction variable), but we do need to check that
+ // there are no unsafe PHI nodes.
+ SmallPtrSet<PHINode *, 4> SafeOuterPHIs;
+ SafeOuterPHIs.insert(FI.OuterInductionPHI);
+
+ // Check that all PHI nodes in the inner loop header match one of the valid
+ // patterns.
+ for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) {
+ // The induction PHIs break these rules, and that's OK because we treat
+ // them specially when doing the transformation.
+ if (&InnerPHI == FI.InnerInductionPHI)
+ continue;
+
+ // Each inner loop PHI node must have two incoming values/blocks - one
+ // from the pre-header, and one from the latch.
+ assert(InnerPHI.getNumIncomingValues() == 2);
+ Value *PreHeaderValue =
+ InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader());
+ Value *LatchValue =
+ InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch());
+
+ // The incoming value from the outer loop must be the PHI node in the
+ // outer loop header, with no modifications made in the top of the outer
+ // loop.
+ PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
+ if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) {
+ LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n");
+ return false;
+ }
+
+ // The other incoming value must come from the inner loop, without any
+ // modifications in the tail end of the outer loop. We are in LCSSA form,
+ // so this will actually be a PHI in the inner loop's exit block, which
+ // only uses values from inside the inner loop.
+ PHINode *LCSSAPHI = dyn_cast<PHINode>(
+ OuterPHI->getIncomingValueForBlock(FI.OuterLoop->getLoopLatch()));
+ if (!LCSSAPHI) {
+ LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n");
+ return false;
+ }
+
+ // The value used by the LCSSA PHI must be the same one that the inner
+ // loop's PHI uses.
+ if (LCSSAPHI->hasConstantValue() != LatchValue) {
+ LLVM_DEBUG(
+ dbgs() << "LCSSA PHI incoming value does not match latch value\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "PHI pair is safe:\n");
+ LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump());
+ LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump());
+ SafeOuterPHIs.insert(OuterPHI);
+ FI.InnerPHIsToTransform.insert(&InnerPHI);
+ }
+
+ for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
+ if (!SafeOuterPHIs.count(&OuterPHI)) {
+ LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "checkPHIs: OK\n");
+ return true;
+}
+
+static bool
+checkOuterLoopInsts(struct FlattenInfo &FI,
+ SmallPtrSetImpl<Instruction *> &IterationInstructions,
+ const TargetTransformInfo *TTI) {
+ // Check for instructions in the outer but not inner loop. If any of these
+ // have side-effects then this transformation is not legal, and if there is
+ // a significant amount of code here which can't be optimised out that it's
+ // not profitable (as these instructions would get executed for each
+ // iteration of the inner loop).
+ unsigned RepeatedInstrCost = 0;
+ for (auto *B : FI.OuterLoop->getBlocks()) {
+ if (FI.InnerLoop->contains(B))
+ continue;
+
+ for (auto &I : *B) {
+ if (!isa<PHINode>(&I) && !I.isTerminator() &&
+ !isSafeToSpeculativelyExecute(&I)) {
+ LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have "
+ "side effects: ";
+ I.dump());
+ return false;
+ }
+ // The execution count of the outer loop's iteration instructions
+ // (increment, compare and branch) will be increased, but the
+ // equivalent instructions will be removed from the inner loop, so
+ // they make a net difference of zero.
+ if (IterationInstructions.count(&I))
+ continue;
+ // The uncoditional branch to the inner loop's header will turn into
+ // a fall-through, so adds no cost.
+ BranchInst *Br = dyn_cast<BranchInst>(&I);
+ if (Br && Br->isUnconditional() &&
+ Br->getSuccessor(0) == FI.InnerLoop->getHeader())
+ continue;
+ // Multiplies of the outer iteration variable and inner iteration
+ // count will be optimised out.
+ if (match(&I, m_c_Mul(m_Specific(FI.OuterInductionPHI),
+ m_Specific(FI.InnerLimit))))
+ continue;
+ int Cost = TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+ LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump());
+ RepeatedInstrCost += Cost;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: "
+ << RepeatedInstrCost << "\n");
+ // Bail out if flattening the loops would cause instructions in the outer
+ // loop but not in the inner loop to be executed extra times.
+ if (RepeatedInstrCost > RepeatedInstructionThreshold) {
+ LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n");
+ return true;
+}
+
+static bool checkIVUsers(struct FlattenInfo &FI) {
+ // We require all uses of both induction variables to match this pattern:
+ //
+ // (OuterPHI * InnerLimit) + InnerPHI
+ //
+ // Any uses of the induction variables not matching that pattern would
+ // require a div/mod to reconstruct in the flattened loop, so the
+ // transformation wouldn't be profitable.
+
+ Value *InnerLimit = FI.InnerLimit;
+ if (FI.Widened &&
+ (isa<SExtInst>(InnerLimit) || isa<ZExtInst>(InnerLimit)))
+ InnerLimit = cast<Instruction>(InnerLimit)->getOperand(0);
+
+ // Check that all uses of the inner loop's induction variable match the
+ // expected pattern, recording the uses of the outer IV.
+ SmallPtrSet<Value *, 4> ValidOuterPHIUses;
+ for (User *U : FI.InnerInductionPHI->users()) {
+ if (U == FI.InnerIncrement)
+ continue;
+
+ // After widening the IVs, a trunc instruction might have been introduced, so
+ // look through truncs.
+ if (isa<TruncInst>(U)) {
+ if (!U->hasOneUse())
+ return false;
+ U = *U->user_begin();
+ }
+
+ LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
+
+ Value *MatchedMul;
+ Value *MatchedItCount;
+ bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
+ m_Value(MatchedMul))) &&
+ match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
+ m_Value(MatchedItCount)));
+
+ // Matches the same pattern as above, except it also looks for truncs
+ // on the phi, which can be the result of widening the induction variables.
+ bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
+ m_Value(MatchedMul))) &&
+ match(MatchedMul,
+ m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
+ m_Value(MatchedItCount)));
+
+ if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerLimit) {
+ LLVM_DEBUG(dbgs() << "Use is optimisable\n");
+ ValidOuterPHIUses.insert(MatchedMul);
+ FI.LinearIVUses.insert(U);
+ } else {
+ LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
+ return false;
+ }
+ }
+
+ // Check that there are no uses of the outer IV other than the ones found
+ // as part of the pattern above.
+ for (User *U : FI.OuterInductionPHI->users()) {
+ if (U == FI.OuterIncrement)
+ continue;
+
+ auto IsValidOuterPHIUses = [&] (User *U) -> bool {
+ LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
+ if (!ValidOuterPHIUses.count(U)) {
+ LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Use is optimisable\n");
+ return true;
+ };
+
+ if (auto *V = dyn_cast<TruncInst>(U)) {
+ for (auto *K : V->users()) {
+ if (!IsValidOuterPHIUses(K))
+ return false;
+ }
+ continue;
+ }
+
+ if (!IsValidOuterPHIUses(U))
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
+ dbgs() << "Found " << FI.LinearIVUses.size()
+ << " value(s) that can be replaced:\n";
+ for (Value *V : FI.LinearIVUses) {
+ dbgs() << " ";
+ V->dump();
+ });
+ return true;
+}
+
+// Return an OverflowResult dependant on if overflow of the multiplication of
+// InnerLimit and OuterLimit can be assumed not to happen.
+static OverflowResult checkOverflow(struct FlattenInfo &FI,
+ DominatorTree *DT, AssumptionCache *AC) {
+ Function *F = FI.OuterLoop->getHeader()->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // For debugging/testing.
+ if (AssumeNoOverflow)
+ return OverflowResult::NeverOverflows;
+
+ // Check if the multiply could not overflow due to known ranges of the
+ // input values.
+ OverflowResult OR = computeOverflowForUnsignedMul(
+ FI.InnerLimit, FI.OuterLimit, DL, AC,
+ FI.OuterLoop->getLoopPreheader()->getTerminator(), DT);
+ if (OR != OverflowResult::MayOverflow)
+ return OR;
+
+ for (Value *V : FI.LinearIVUses) {
+ for (Value *U : V->users()) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // The IV is used as the operand of a GEP, and the IV is at least as
+ // wide as the address space of the GEP. In this case, the GEP would
+ // wrap around the address space before the IV increment wraps, which
+ // would be UB.
+ if (GEP->isInBounds() &&
+ V->getType()->getIntegerBitWidth() >=
+ DL.getPointerTypeSizeInBits(GEP->getType())) {
+ LLVM_DEBUG(
+ dbgs() << "use of linear IV would be UB if overflow occurred: ";
+ GEP->dump());
+ return OverflowResult::NeverOverflows;
+ }
+ }
+ }
+ }
+
+ return OverflowResult::MayOverflow;
+}
+
+static bool CanFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC, const TargetTransformInfo *TTI) {
+ SmallPtrSet<Instruction *, 8> IterationInstructions;
+ if (!findLoopComponents(FI.InnerLoop, IterationInstructions, FI.InnerInductionPHI,
+ FI.InnerLimit, FI.InnerIncrement, FI.InnerBranch, SE))
+ return false;
+ if (!findLoopComponents(FI.OuterLoop, IterationInstructions, FI.OuterInductionPHI,
+ FI.OuterLimit, FI.OuterIncrement, FI.OuterBranch, SE))
+ return false;
+
+ // Both of the loop limit values must be invariant in the outer loop
+ // (non-instructions are all inherently invariant).
+ if (!FI.OuterLoop->isLoopInvariant(FI.InnerLimit)) {
+ LLVM_DEBUG(dbgs() << "inner loop limit not invariant\n");
+ return false;
+ }
+ if (!FI.OuterLoop->isLoopInvariant(FI.OuterLimit)) {
+ LLVM_DEBUG(dbgs() << "outer loop limit not invariant\n");
+ return false;
+ }
+
+ if (!checkPHIs(FI, TTI))
+ return false;
+
+ // FIXME: it should be possible to handle different types correctly.
+ if (FI.InnerInductionPHI->getType() != FI.OuterInductionPHI->getType())
+ return false;
+
+ if (!checkOuterLoopInsts(FI, IterationInstructions, TTI))
+ return false;
+
+ // Find the values in the loop that can be replaced with the linearized
+ // induction variable, and check that there are no other uses of the inner
+ // or outer induction variable. If there were, we could still do this
+ // transformation, but we'd have to insert a div/mod to calculate the
+ // original IVs, so it wouldn't be profitable.
+ if (!checkIVUsers(FI))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n");
+ return true;
+}
+
+static bool DoFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC,
+ const TargetTransformInfo *TTI) {
+ Function *F = FI.OuterLoop->getHeader()->getParent();
+ LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
+ {
+ using namespace ore;
+ OptimizationRemark Remark(DEBUG_TYPE, "Flattened", FI.InnerLoop->getStartLoc(),
+ FI.InnerLoop->getHeader());
+ OptimizationRemarkEmitter ORE(F);
+ Remark << "Flattened into outer loop";
+ ORE.emit(Remark);
+ }
+
+ Value *NewTripCount =
+ BinaryOperator::CreateMul(FI.InnerLimit, FI.OuterLimit, "flatten.tripcount",
+ FI.OuterLoop->getLoopPreheader()->getTerminator());
+ LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
+ NewTripCount->dump());
+
+ // Fix up PHI nodes that take values from the inner loop back-edge, which
+ // we are about to remove.
+ FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch());
+
+ // The old Phi will be optimised away later, but for now we can't leave
+ // leave it in an invalid state, so are updating them too.
+ for (PHINode *PHI : FI.InnerPHIsToTransform)
+ PHI->removeIncomingValue(FI.InnerLoop->getLoopLatch());
+
+ // Modify the trip count of the outer loop to be the product of the two
+ // trip counts.
+ cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);
+
+ // Replace the inner loop backedge with an unconditional branch to the exit.
+ BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
+ BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
+ InnerExitingBlock->getTerminator()->eraseFromParent();
+ BranchInst::Create(InnerExitBlock, InnerExitingBlock);
+ DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
+
+ // Replace all uses of the polynomial calculated from the two induction
+ // variables with the one new one.
+ IRBuilder<> Builder(FI.OuterInductionPHI->getParent()->getTerminator());
+ for (Value *V : FI.LinearIVUses) {
+ Value *OuterValue = FI.OuterInductionPHI;
+ if (FI.Widened)
+ OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
+ "flatten.trunciv");
+
+ LLVM_DEBUG(dbgs() << "Replacing: "; V->dump();
+ dbgs() << "with: "; OuterValue->dump());
+ V->replaceAllUsesWith(OuterValue);
+ }
+
+ // Tell LoopInfo, SCEV and the pass manager that the inner loop has been
+ // deleted, and any information that have about the outer loop invalidated.
+ SE->forgetLoop(FI.OuterLoop);
+ SE->forgetLoop(FI.InnerLoop);
+ LI->erase(FI.InnerLoop);
+ return true;
+}
+
+static bool CanWidenIV(struct FlattenInfo &FI, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC, const TargetTransformInfo *TTI) {
+ if (!WidenIV) {
+ LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Try widening the IVs\n");
+ Module *M = FI.InnerLoop->getHeader()->getParent()->getParent();
+ auto &DL = M->getDataLayout();
+ auto *InnerType = FI.InnerInductionPHI->getType();
+ auto *OuterType = FI.OuterInductionPHI->getType();
+ unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits();
+ auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext());
+
+ // If both induction types are less than the maximum legal integer width,
+ // promote both to the widest type available so we know calculating
+ // (OuterLimit * InnerLimit) as the new trip count is safe.
+ if (InnerType != OuterType ||
+ InnerType->getScalarSizeInBits() >= MaxLegalSize ||
+ MaxLegalType->getScalarSizeInBits() < InnerType->getScalarSizeInBits() * 2) {
+ LLVM_DEBUG(dbgs() << "Can't widen the IV\n");
+ return false;
+ }
+
+ SCEVExpander Rewriter(*SE, DL, "loopflatten");
+ SmallVector<WideIVInfo, 2> WideIVs;
+ SmallVector<WeakTrackingVH, 4> DeadInsts;
+ WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false });
+ WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false });
+ unsigned ElimExt;
+ unsigned Widened;
+
+ for (unsigned i = 0; i < WideIVs.size(); i++) {
+ PHINode *WidePhi = createWideIV(WideIVs[i], LI, SE, Rewriter, DT, DeadInsts,
+ ElimExt, Widened, true /* HasGuards */,
+ true /* UsePostIncrementRanges */);
+ if (!WidePhi)
+ return false;
+ LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
+ LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIVs[i].NarrowIV->dump());
+ RecursivelyDeleteDeadPHINode(WideIVs[i].NarrowIV);
+ }
+ // After widening, rediscover all the loop components.
+ assert(Widened && "Widenend IV expected");
+ FI.Widened = true;
+ return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+}
+
+static bool FlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC,
+ const TargetTransformInfo *TTI) {
+ LLVM_DEBUG(
+ dbgs() << "Loop flattening running on outer loop "
+ << FI.OuterLoop->getHeader()->getName() << " and inner loop "
+ << FI.InnerLoop->getHeader()->getName() << " in "
+ << FI.OuterLoop->getHeader()->getParent()->getName() << "\n");
+
+ if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI))
+ return false;
+
+ // Check if we can widen the induction variables to avoid overflow checks.
+ if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+
+ // Check if the new iteration variable might overflow. In this case, we
+ // need to version the loop, and select the original version at runtime if
+ // the iteration space is too large.
+ // TODO: We currently don't version the loop.
+ OverflowResult OR = checkOverflow(FI, DT, AC);
+ if (OR == OverflowResult::AlwaysOverflowsHigh ||
+ OR == OverflowResult::AlwaysOverflowsLow) {
+ LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n");
+ return false;
+ } else if (OR == OverflowResult::MayOverflow) {
+ LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+}
+
+bool Flatten(DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC, TargetTransformInfo *TTI) {
+ bool Changed = false;
+ for (auto *InnerLoop : LI->getLoopsInPreorder()) {
+ auto *OuterLoop = InnerLoop->getParentLoop();
+ if (!OuterLoop)
+ continue;
+ struct FlattenInfo FI(OuterLoop, InnerLoop);
+ Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+ }
+ return Changed;
+}
+
+PreservedAnalyses LoopFlattenPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *LI = &AM.getResult<LoopAnalysis>(F);
+ auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
+
+ if (!Flatten(DT, LI, SE, AC, TTI))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+namespace {
+class LoopFlattenLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopFlattenLegacyPass() : FunctionPass(ID) {
+ initializeLoopFlattenLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Possibly flatten loop L into its child.
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ getLoopAnalysisUsage(AU);
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<AssumptionCacheTracker>();
+ }
+};
+} // namespace
+
+char LoopFlattenLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
+ false, false)
+
+FunctionPass *llvm::createLoopFlattenPass() { return new LoopFlattenLegacyPass(); }
+
+bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
+ auto *TTI = &TTIP.getTTI(F);
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ return Flatten(DT, LI, SE, AC, TTI);
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 20edc8699d79..b5f8dfa9aafb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -46,6 +46,7 @@
#include "llvm/Transforms/Scalar/LoopFuse.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -53,6 +54,7 @@
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -64,6 +66,7 @@
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeMoverUtils.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
using namespace llvm;
@@ -114,6 +117,11 @@ static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
"Use all available analyses")),
cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
+static cl::opt<unsigned> FusionPeelMaxCount(
+ "loop-fusion-peel-max-count", cl::init(0), cl::Hidden,
+ cl::desc("Max number of iterations to be peeled from a loop, such that "
+ "fusion can take place"));
+
#ifndef NDEBUG
static cl::opt<bool>
VerboseFusionDebugging("loop-fusion-verbose-debug",
@@ -157,6 +165,12 @@ struct FusionCandidate {
bool Valid;
/// Guard branch of the loop, if it exists
BranchInst *GuardBranch;
+ /// Peeling Paramaters of the Loop.
+ TTI::PeelingPreferences PP;
+ /// Can you Peel this Loop?
+ bool AbleToPeel;
+ /// Has this loop been Peeled
+ bool Peeled;
/// Dominator and PostDominator trees are needed for the
/// FusionCandidateCompare function, required by FusionCandidateSet to
@@ -168,11 +182,13 @@ struct FusionCandidate {
OptimizationRemarkEmitter &ORE;
FusionCandidate(Loop *L, const DominatorTree *DT,
- const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
+ const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE,
+ TTI::PeelingPreferences PP)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
Latch(L->getLoopLatch()), L(L), Valid(true),
- GuardBranch(L->getLoopGuardBranch()), DT(DT), PDT(PDT), ORE(ORE) {
+ GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
+ Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
@@ -243,6 +259,17 @@ struct FusionCandidate {
return Preheader;
}
+ /// After Peeling the loop is modified quite a bit, hence all of the Blocks
+ /// need to be updated accordingly.
+ void updateAfterPeeling() {
+ Preheader = L->getLoopPreheader();
+ Header = L->getHeader();
+ ExitingBlock = L->getExitingBlock();
+ ExitBlock = L->getExitBlock();
+ Latch = L->getLoopLatch();
+ verify();
+ }
+
/// Given a guarded loop, get the successor of the guard that is not in the
/// loop.
///
@@ -254,6 +281,8 @@ struct FusionCandidate {
assert(GuardBranch && "Only valid on guarded loops.");
assert(GuardBranch->isConditional() &&
"Expecting guard to be a conditional branch.");
+ if (Peeled)
+ return GuardBranch->getSuccessor(1);
return (GuardBranch->getSuccessor(0) == Preheader)
? GuardBranch->getSuccessor(1)
: GuardBranch->getSuccessor(0);
@@ -515,13 +544,17 @@ private:
ScalarEvolution &SE;
PostDominatorTree &PDT;
OptimizationRemarkEmitter &ORE;
+ AssumptionCache &AC;
+
+ const TargetTransformInfo &TTI;
public:
LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
ScalarEvolution &SE, PostDominatorTree &PDT,
- OptimizationRemarkEmitter &ORE, const DataLayout &DL)
+ OptimizationRemarkEmitter &ORE, const DataLayout &DL,
+ AssumptionCache &AC, const TargetTransformInfo &TTI)
: LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
- DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE) {}
+ DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}
/// This is the main entry point for loop fusion. It will traverse the
/// specified function and collect candidate loops to fuse, starting at the
@@ -606,7 +639,9 @@ private:
/// Flow Equivalent sets, sorted by dominance.
void collectFusionCandidates(const LoopVector &LV) {
for (Loop *L : LV) {
- FusionCandidate CurrCand(L, &DT, &PDT, ORE);
+ TTI::PeelingPreferences PP =
+ gatherPeelingPreferences(L, SE, TTI, None, None);
+ FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP);
if (!CurrCand.isEligibleForFusion(SE))
continue;
@@ -656,33 +691,133 @@ private:
/// Determine if two fusion candidates have the same trip count (i.e., they
/// execute the same number of iterations).
///
- /// Note that for now this method simply returns a boolean value because there
- /// are no mechanisms in loop fusion to handle different trip counts. In the
- /// future, this behaviour can be extended to adjust one of the loops to make
- /// the trip counts equal (e.g., loop peeling). When this is added, this
- /// interface may need to change to return more information than just a
- /// boolean value.
- bool identicalTripCounts(const FusionCandidate &FC0,
- const FusionCandidate &FC1) const {
+ /// This function will return a pair of values. The first is a boolean,
+ /// stating whether or not the two candidates are known at compile time to
+ /// have the same TripCount. The second is the difference in the two
+ /// TripCounts. This information can be used later to determine whether or not
+ /// peeling can be performed on either one of the candiates.
+ std::pair<bool, Optional<unsigned>>
+ haveIdenticalTripCounts(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+
const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
if (isa<SCEVCouldNotCompute>(TripCount0)) {
UncomputableTripCount++;
LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
- return false;
+ return {false, None};
}
const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
if (isa<SCEVCouldNotCompute>(TripCount1)) {
UncomputableTripCount++;
LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
- return false;
+ return {false, None};
}
+
LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
<< *TripCount1 << " are "
<< (TripCount0 == TripCount1 ? "identical" : "different")
<< "\n");
- return (TripCount0 == TripCount1);
+ if (TripCount0 == TripCount1)
+ return {true, 0};
+
+ LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "
+ "determining the difference between trip counts\n");
+
+ // Currently only considering loops with a single exit point
+ // and a non-constant trip count.
+ const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);
+ const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);
+
+ // If any of the tripcounts are zero that means that loop(s) do not have
+ // a single exit or a constant tripcount.
+ if (TC0 == 0 || TC1 == 0) {
+ LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "
+ "have a constant number of iterations. Peeling "
+ "is not benefical\n");
+ return {false, None};
+ }
+
+ Optional<unsigned> Difference = None;
+ int Diff = TC0 - TC1;
+
+ if (Diff > 0)
+ Difference = Diff;
+ else {
+ LLVM_DEBUG(
+ dbgs() << "Difference is less than 0. FC1 (second loop) has more "
+ "iterations than the first one. Currently not supported\n");
+ }
+
+ LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference
+ << "\n");
+
+ return {false, Difference};
+ }
+
+ void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,
+ unsigned PeelCount) {
+ assert(FC0.AbleToPeel && "Should be able to peel loop");
+
+ LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
+ << " iterations of the first loop. \n");
+
+ FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true);
+ if (FC0.Peeled) {
+ LLVM_DEBUG(dbgs() << "Done Peeling\n");
+
+#ifndef NDEBUG
+ auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
+
+ assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&
+ "Loops should have identical trip counts after peeling");
+#endif
+
+ FC0.PP.PeelCount += PeelCount;
+
+ // Peeling does not update the PDT
+ PDT.recalculate(*FC0.Preheader->getParent());
+
+ FC0.updateAfterPeeling();
+
+ // In this case the iterations of the loop are constant, so the first
+ // loop will execute completely (will not jump from one of
+ // the peeled blocks to the second loop). Here we are updating the
+ // branch conditions of each of the peeled blocks, such that it will
+ // branch to its successor which is not the preheader of the second loop
+ // in the case of unguarded loops, or the succesors of the exit block of
+ // the first loop otherwise. Doing this update will ensure that the entry
+ // block of the first loop dominates the entry block of the second loop.
+ BasicBlock *BB =
+ FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
+ if (BB) {
+ SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
+ SmallVector<Instruction *, 8> WorkList;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (Pred != FC0.ExitBlock) {
+ WorkList.emplace_back(Pred->getTerminator());
+ TreeUpdates.emplace_back(
+ DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB));
+ }
+ }
+ // Cannot modify the predecessors inside the above loop as it will cause
+ // the iterators to be nullptrs, causing memory errors.
+ for (Instruction *CurrentBranch: WorkList) {
+ BasicBlock *Succ = CurrentBranch->getSuccessor(0);
+ if (Succ == BB)
+ Succ = CurrentBranch->getSuccessor(1);
+ ReplaceInstWithInst(CurrentBranch, BranchInst::Create(Succ));
+ }
+
+ DTU.applyUpdates(TreeUpdates);
+ DTU.flush();
+ }
+ LLVM_DEBUG(
+ dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount
+ << " iterations from the first loop.\n"
+ "Both Loops have the same number of iterations now.\n");
+ }
}
/// Walk each set of control flow equivalent fusion candidates and attempt to
@@ -716,7 +851,32 @@ private:
FC0->verify();
FC1->verify();
- if (!identicalTripCounts(*FC0, *FC1)) {
+ // Check if the candidates have identical tripcounts (first value of
+ // pair), and if not check the difference in the tripcounts between
+ // the loops (second value of pair). The difference is not equal to
+ // None iff the loops iterate a constant number of times, and have a
+ // single exit.
+ std::pair<bool, Optional<unsigned>> IdenticalTripCountRes =
+ haveIdenticalTripCounts(*FC0, *FC1);
+ bool SameTripCount = IdenticalTripCountRes.first;
+ Optional<unsigned> TCDifference = IdenticalTripCountRes.second;
+
+ // Here we are checking that FC0 (the first loop) can be peeled, and
+ // both loops have different tripcounts.
+ if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
+ if (*TCDifference > FusionPeelMaxCount) {
+ LLVM_DEBUG(dbgs()
+ << "Difference in loop trip counts: " << *TCDifference
+ << " is greater than maximum peel count specificed: "
+ << FusionPeelMaxCount << "\n");
+ } else {
+ // Dependent on peeling being performed on the first loop, and
+ // assuming all other conditions for fusion return true.
+ SameTripCount = true;
+ }
+ }
+
+ if (!SameTripCount) {
LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
"counts. Not fusing.\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
@@ -734,7 +894,7 @@ private:
// Ensure that FC0 and FC1 have identical guards.
// If one (or both) are not guarded, this check is not necessary.
if (FC0->GuardBranch && FC1->GuardBranch &&
- !haveIdenticalGuards(*FC0, *FC1)) {
+ !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
"guards. Not Fusing.\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
@@ -803,13 +963,23 @@ private:
LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
<< *FC1 << "\n");
+ FusionCandidate FC0Copy = *FC0;
+ // Peel the loop after determining that fusion is legal. The Loops
+ // will still be safe to fuse after the peeling is performed.
+ bool Peel = TCDifference && *TCDifference > 0;
+ if (Peel)
+ peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
+
// Report fusion to the Optimization Remarks.
// Note this needs to be done *before* performFusion because
// performFusion will change the original loops, making it not
// possible to identify them after fusion is complete.
- reportLoopFusion<OptimizationRemark>(*FC0, *FC1, FuseCounter);
+ reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
+ FuseCounter);
- FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE);
+ FusionCandidate FusedCand(
+ performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE,
+ FC0Copy.PP);
FusedCand.verify();
assert(FusedCand.isEligibleForFusion(SE) &&
"Fused candidate should be eligible for fusion!");
@@ -1086,16 +1256,17 @@ private:
return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
}
- /// Simplify the condition of the latch branch of \p FC to true, when both of
- /// its successors are the same.
+ /// Modify the latch branch of FC to be unconditional since successors of the
+ /// branch are the same.
void simplifyLatchBranch(const FusionCandidate &FC) const {
BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
if (FCLatchBranch) {
assert(FCLatchBranch->isConditional() &&
FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
"Expecting the two successors of FCLatchBranch to be the same");
- FCLatchBranch->setCondition(
- llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType()));
+ BranchInst *NewBranch =
+ BranchInst::Create(FCLatchBranch->getSuccessor(0));
+ ReplaceInstWithInst(FCLatchBranch, NewBranch);
}
}
@@ -1155,7 +1326,8 @@ private:
if (FC0.GuardBranch)
return fuseGuardedLoops(FC0, FC1);
- assert(FC1.Preheader == FC0.ExitBlock);
+ assert(FC1.Preheader ==
+ (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
assert(FC1.Preheader->size() == 1 &&
FC1.Preheader->getSingleSuccessor() == FC1.Header);
@@ -1197,15 +1369,30 @@ private:
// to FC1.Header? I think this is basically what the three sequences are
// trying to accomplish; however, doing this directly in the CFG may mean
// the DT/PDT becomes invalid
- FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
- FC1.Header);
- TreeUpdates.emplace_back(DominatorTree::UpdateType(
- DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
- TreeUpdates.emplace_back(DominatorTree::UpdateType(
- DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+ if (!FC0.Peeled) {
+ FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
+ FC1.Header);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+ } else {
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));
+
+ // Remove the ExitBlock of the first Loop (also not needed)
+ FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
+ FC1.Header);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
+ FC0.ExitBlock->getTerminator()->eraseFromParent();
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+ new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
+ }
// The pre-header of L1 is not necessary anymore.
- assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
+ assert(pred_empty(FC1.Preheader));
FC1.Preheader->getTerminator()->eraseFromParent();
new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
TreeUpdates.emplace_back(DominatorTree::UpdateType(
@@ -1246,7 +1433,7 @@ private:
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
- // Change the condition of FC0 latch branch to true, as both successors of
+ // Modify the latch branch of FC0 to be unconditional as both successors of
// the branch are the same.
simplifyLatchBranch(FC0);
@@ -1268,6 +1455,11 @@ private:
LI.removeBlock(FC1.Preheader);
DTU.deleteBB(FC1.Preheader);
+ if (FC0.Peeled) {
+ LI.removeBlock(FC0.ExitBlock);
+ DTU.deleteBB(FC0.ExitBlock);
+ }
+
DTU.flush();
// Is there a way to keep SE up-to-date so we don't need to forget the loops
@@ -1282,8 +1474,7 @@ private:
mergeLatch(FC0, FC1);
// Merge the loops.
- SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
- FC1.L->block_end());
+ SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
for (BasicBlock *BB : Blocks) {
FC0.L->addBlockEntry(BB);
FC1.L->removeBlockFromLoop(BB);
@@ -1291,7 +1482,7 @@ private:
continue;
LI.changeLoopFor(BB, FC0.L);
}
- while (!FC1.L->empty()) {
+ while (!FC1.L->isInnermost()) {
const auto &ChildLoopIt = FC1.L->begin();
Loop *ChildLoop = *ChildLoopIt;
FC1.L->removeChildLoop(ChildLoopIt);
@@ -1364,10 +1555,15 @@ private:
BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
+ BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor();
// Move instructions from the exit block of FC0 to the beginning of the exit
- // block of FC1.
- moveInstructionsToTheBeginning(*FC0.ExitBlock, *FC1.ExitBlock, DT, PDT, DI);
+ // block of FC1, in the case that the FC0 loop has not been peeled. In the
+ // case that FC0 loop is peeled, then move the instructions of the successor
+ // of the FC0 Exit block to the beginning of the exit block of FC1.
+ moveInstructionsToTheBeginning(
+ (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,
+ DT, PDT, DI);
// Move instructions from the guard block of FC1 to the end of the guard
// block of FC0.
@@ -1387,8 +1583,9 @@ private:
// for FC1 (where FC1 guard would have gone if FC1 was not executed).
FC1NonLoopBlock->replacePhiUsesWith(FC1GuardBlock, FC0GuardBlock);
FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
- FC0.ExitBlock->getTerminator()->replaceUsesOfWith(FC1GuardBlock,
- FC1.Header);
+
+ BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
+ BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header);
// The guard of FC1 is not necessary anymore.
FC1.GuardBranch->eraseFromParent();
@@ -1403,9 +1600,18 @@ private:
TreeUpdates.emplace_back(DominatorTree::UpdateType(
DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
- assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) &&
+ if (FC0.Peeled) {
+ // Remove the Block after the ExitBlock of FC0
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));
+ FC0ExitBlockSuccessor->getTerminator()->eraseFromParent();
+ new UnreachableInst(FC0ExitBlockSuccessor->getContext(),
+ FC0ExitBlockSuccessor);
+ }
+
+ assert(pred_empty(FC1GuardBlock) &&
"Expecting guard block to have no predecessors");
- assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) &&
+ assert(succ_empty(FC1GuardBlock) &&
"Expecting guard block to have no successors");
// Remember the phi nodes originally in the header of FC0 in order to rewire
@@ -1459,14 +1665,13 @@ private:
// TODO: In the future, we can handle non-empty exit blocks my merging any
// instructions from FC0 exit block into FC1 exit block prior to removing
// the block.
- assert(pred_begin(FC0.ExitBlock) == pred_end(FC0.ExitBlock) &&
- "Expecting exit block to be empty");
+ assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty");
FC0.ExitBlock->getTerminator()->eraseFromParent();
new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
// Remove FC1 Preheader
// The pre-header of L1 is not necessary anymore.
- assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
+ assert(pred_empty(FC1.Preheader));
FC1.Preheader->getTerminator()->eraseFromParent();
new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
TreeUpdates.emplace_back(DominatorTree::UpdateType(
@@ -1509,7 +1714,7 @@ private:
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
- // Change the condition of FC0 latch branch to true, as both successors of
+ // Modify the latch branch of FC0 to be unconditional as both successors of
// the branch are the same.
simplifyLatchBranch(FC0);
@@ -1529,10 +1734,8 @@ private:
// All done
// Apply the updates to the Dominator Tree and cleanup.
- assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) &&
- "FC1GuardBlock has successors!!");
- assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) &&
- "FC1GuardBlock has predecessors!!");
+ assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!");
+ assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!");
// Update DT/PDT
DTU.applyUpdates(TreeUpdates);
@@ -1540,6 +1743,10 @@ private:
LI.removeBlock(FC1GuardBlock);
LI.removeBlock(FC1.Preheader);
LI.removeBlock(FC0.ExitBlock);
+ if (FC0.Peeled) {
+ LI.removeBlock(FC0ExitBlockSuccessor);
+ DTU.deleteBB(FC0ExitBlockSuccessor);
+ }
DTU.deleteBB(FC1GuardBlock);
DTU.deleteBB(FC1.Preheader);
DTU.deleteBB(FC0.ExitBlock);
@@ -1557,8 +1764,7 @@ private:
mergeLatch(FC0, FC1);
// Merge the loops.
- SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
- FC1.L->block_end());
+ SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
for (BasicBlock *BB : Blocks) {
FC0.L->addBlockEntry(BB);
FC1.L->removeBlockFromLoop(BB);
@@ -1566,7 +1772,7 @@ private:
continue;
LI.changeLoopFor(BB, FC0.L);
}
- while (!FC1.L->empty()) {
+ while (!FC1.L->isInnermost()) {
const auto &ChildLoopIt = FC1.L->begin();
Loop *ChildLoop = *ChildLoopIt;
FC1.L->removeChildLoop(ChildLoopIt);
@@ -1606,6 +1812,8 @@ struct LoopFuseLegacy : public FunctionPass {
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addRequired<DependenceAnalysisWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
@@ -1622,9 +1830,12 @@ struct LoopFuseLegacy : public FunctionPass {
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
const DataLayout &DL = F.getParent()->getDataLayout();
- LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
+
+ LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
return LF.fuseLoops(F);
}
};
@@ -1637,9 +1848,11 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
const DataLayout &DL = F.getParent()->getDataLayout();
- LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
+
+ LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
bool Changed = LF.fuseLoops(F);
if (!Changed)
return PreservedAnalyses::all();
@@ -1662,6 +1875,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3cb4df12e9b0..8064c02e2b39 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -47,6 +47,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -79,6 +80,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -106,6 +108,32 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+STATISTIC(
+ NumShiftUntilBitTest,
+ "Number of uncountable loops recognized as 'shift until bitttest' idiom");
+
+bool DisableLIRP::All;
+static cl::opt<bool, true>
+ DisableLIRPAll("disable-" DEBUG_TYPE "-all",
+ cl::desc("Options to disable Loop Idiom Recognize Pass."),
+ cl::location(DisableLIRP::All), cl::init(false),
+ cl::ReallyHidden);
+
+bool DisableLIRP::Memset;
+static cl::opt<bool, true>
+ DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
+ cl::desc("Proceed with loop idiom recognize pass, but do "
+ "not convert loop(s) to memset."),
+ cl::location(DisableLIRP::Memset), cl::init(false),
+ cl::ReallyHidden);
+
+bool DisableLIRP::Memcpy;
+static cl::opt<bool, true>
+ DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
+ cl::desc("Proceed with loop idiom recognize pass, but do "
+ "not convert loop(s) to memcpy."),
+ cl::location(DisableLIRP::Memcpy), cl::init(false),
+ cl::ReallyHidden);
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
@@ -204,6 +232,8 @@ private:
const DebugLoc &DL, bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop);
+ bool recognizeShiftUntilBitTest();
+
/// @}
};
@@ -217,6 +247,9 @@ public:
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (DisableLIRP::All)
+ return false;
+
if (skipLoop(L))
return false;
@@ -262,6 +295,9 @@ char LoopIdiomRecognizeLegacyPass::ID = 0;
PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
+ if (DisableLIRP::All)
+ return PreservedAnalyses::all();
+
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
@@ -426,11 +462,6 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
if (!SI->isUnordered())
return LegalStoreKind::None;
- // Don't convert stores of non-integral pointer types to memsets (which stores
- // integers).
- if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
- return LegalStoreKind::None;
-
// Avoid merging nontemporal stores.
if (SI->getMetadata(LLVMContext::MD_nontemporal))
return LegalStoreKind::None;
@@ -438,9 +469,17 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
+ // Don't convert stores of non-integral pointer types to memsets (which stores
+ // integers).
+ if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
+ return LegalStoreKind::None;
+
// Reject stores that are so large that they overflow an unsigned.
- uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
- if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+ // When storing out scalable vectors we bail out for now, since the code
+ // below currently only works for constant strides.
+ TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+ if (SizeInBits.isScalable() || (SizeInBits.getFixedSize() & 7) ||
+ (SizeInBits.getFixedSize() >> 32) != 0)
return LegalStoreKind::None;
// See if the pointer expression is an AddRec like {base,+,1} on the current
@@ -469,13 +508,13 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// If we're allowed to form a memset, and the stored value would be
// acceptable for memset, use it.
- if (!UnorderedAtomic && HasMemset && SplatValue &&
+ if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
// Verify that the stored value is loop invariant. If not, we can't
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// It looks like we can use SplatValue.
return LegalStoreKind::Memset;
- } else if (!UnorderedAtomic && HasMemsetPattern &&
+ } else if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
// Don't create memset_pattern16s with address spaces.
StorePtr->getType()->getPointerAddressSpace() == 0 &&
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
@@ -484,7 +523,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
}
// Otherwise, see if the store can be turned into a memcpy.
- if (HasMemcpy) {
+ if (HasMemcpy && !DisableLIRP::Memcpy) {
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
APInt Stride = getStoreStride(StoreEv);
@@ -539,12 +578,12 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
break;
case LegalStoreKind::Memset: {
// Find the base pointer.
- Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
+ Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
StoreRefsForMemset[Ptr].push_back(SI);
} break;
case LegalStoreKind::MemsetPattern: {
// Find the base pointer.
- Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
+ Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
StoreRefsForMemsetPattern[Ptr].push_back(SI);
} break;
case LegalStoreKind::Memcpy:
@@ -812,7 +851,7 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
- LocationSize AccessSize = LocationSize::unknown();
+ LocationSize AccessSize = LocationSize::afterPointer();
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
@@ -864,8 +903,8 @@ static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
// If we're going to need to zero extend the BE count, check if we can add
// one to it prior to zero extending without overflow. Provided this is safe,
// it allows better simplification of the +1.
- if (DL->getTypeSizeInBits(BECount->getType()) <
- DL->getTypeSizeInBits(IntPtr) &&
+ if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
+ DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
SE->isLoopEntryGuardedByCond(
CurLoop, ICmpInst::ICMP_NE, BECount,
SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
@@ -908,10 +947,12 @@ bool LoopIdiomRecognize::processLoopStridedStore(
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
+ SCEVExpanderCleaner ExpCleaner(Expander, *DT);
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
+ bool Changed = false;
const SCEV *Start = Ev->getStart();
// Handle negative strided loops.
if (NegStride)
@@ -920,7 +961,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
if (!isSafeToExpand(Start, *SE))
- return false;
+ return Changed;
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
@@ -929,16 +970,22 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// base pointer and checking the region.
Value *BasePtr =
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
+
+ // From here on out, conservatively report to the pass manager that we've
+ // changed the IR, even if we later clean up these added instructions. There
+ // may be structural differences e.g. in the order of use lists not accounted
+ // for in just a textual dump of the IR. This is written as a variable, even
+ // though statically all the places this dominates could be replaced with
+ // 'true', with the hope that anyone trying to be clever / "more precise" with
+ // the return value will read this comment, and leave them alone.
+ Changed = true;
+
if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores)) {
- Expander.clear();
- // If we generated new code for the base pointer, clean up.
- RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
- return false;
- }
+ StoreSize, *AA, Stores))
+ return Changed;
if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
- return false;
+ return Changed;
// Okay, everything looks good, insert the memset.
@@ -948,7 +995,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
if (!isSafeToExpand(NumBytesS, *SE))
- return false;
+ return Changed;
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
@@ -1007,32 +1054,10 @@ bool LoopIdiomRecognize::processLoopStridedStore(
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
++NumMemSet;
+ ExpCleaner.markResultUsed();
return true;
}
-class ExpandedValuesCleaner {
- SCEVExpander &Expander;
- TargetLibraryInfo *TLI;
- SmallVector<Value *, 4> ExpandedValues;
- bool Commit = false;
-
-public:
- ExpandedValuesCleaner(SCEVExpander &Expander, TargetLibraryInfo *TLI)
- : Expander(Expander), TLI(TLI) {}
-
- void add(Value *V) { ExpandedValues.push_back(V); }
-
- void commit() { Commit = true; }
-
- ~ExpandedValuesCleaner() {
- if (!Commit) {
- Expander.clear();
- for (auto *V : ExpandedValues)
- RecursivelyDeleteTriviallyDeadInstructions(V, TLI);
- }
- }
-};
-
/// If the stored value is a strided load in the same loop with the same stride
/// this may be transformable into a memcpy. This kicks in for stuff like
/// for (i) A[i] = B[i];
@@ -1063,8 +1088,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
- ExpandedValuesCleaner EVC(Expander, TLI);
+ SCEVExpanderCleaner ExpCleaner(Expander, *DT);
+ bool Changed = false;
const SCEV *StrStart = StoreEv->getStart();
unsigned StrAS = SI->getPointerAddressSpace();
Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
@@ -1081,13 +1107,21 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// checking everything.
Value *StoreBasePtr = Expander.expandCodeFor(
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
- EVC.add(StoreBasePtr);
+
+ // From here on out, conservatively report to the pass manager that we've
+ // changed the IR, even if we later clean up these added instructions. There
+ // may be structural differences e.g. in the order of use lists not accounted
+ // for in just a textual dump of the IR. This is written as a variable, even
+ // though statically all the places this dominates could be replaced with
+ // 'true', with the hope that anyone trying to be clever / "more precise" with
+ // the return value will read this comment, and leave them alone.
+ Changed = true;
SmallPtrSet<Instruction *, 1> Stores;
Stores.insert(SI);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
StoreSize, *AA, Stores))
- return false;
+ return Changed;
const SCEV *LdStart = LoadEv->getStart();
unsigned LdAS = LI->getPointerAddressSpace();
@@ -1100,14 +1134,13 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// mutated by the loop.
Value *LoadBasePtr = Expander.expandCodeFor(
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
- EVC.add(LoadBasePtr);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
StoreSize, *AA, Stores))
- return false;
+ return Changed;
if (avoidLIRForMultiBlockLoop())
- return false;
+ return Changed;
// Okay, everything is safe, we can transform this!
@@ -1116,7 +1149,6 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
- EVC.add(NumBytes);
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
@@ -1131,14 +1163,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const Align StoreAlign = SI->getAlign();
const Align LoadAlign = LI->getAlign();
if (StoreAlign < StoreSize || LoadAlign < StoreSize)
- return false;
+ return Changed;
// If the element.atomic memcpy is not lowered into explicit
// loads/stores later, then it will be lowered into an element-size
// specific lib call. If the lib call doesn't exist for our store size, then
// we shouldn't generate the memcpy.
if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
- return false;
+ return Changed;
// Create the call.
// Note that unordered atomic loads/stores are *required* by the spec to
@@ -1176,7 +1208,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
++NumMemCpy;
- EVC.commit();
+ ExpCleaner.markResultUsed();
return true;
}
@@ -1186,7 +1218,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
bool IsLoopMemset) {
if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
- if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
+ if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
<< " : LIR " << (IsMemset ? "Memset" : "Memcpy")
<< " avoided: multi-block top-level loop\n");
@@ -1203,7 +1235,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
<< "] Noncountable Loop %"
<< CurLoop->getHeader()->getName() << "\n");
- return recognizePopcount() || recognizeAndInsertFFS();
+ return recognizePopcount() || recognizeAndInsertFFS() ||
+ recognizeShiftUntilBitTest();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1450,6 +1483,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
return false;
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
+ // or cnt.next = cnt + -1.
// TODO: We can skip the step. If loop trip count is known (CTLZ),
// then all uses of "cnt.next" could be optimized to the trip count
// plus "cnt0". Currently it is not optimized.
@@ -1463,7 +1497,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
continue;
ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
- if (!Inc || !Inc->isOne())
+ if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
continue;
PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
@@ -1692,11 +1726,13 @@ void LoopIdiomRecognize::transformLoopToCountable(
// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
IRBuilder<> Builder(PreheaderBr);
Builder.SetCurrentDebugLocation(DL);
- Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
// Count = BitWidth - CTLZ(InitX);
+ // NewCount = Count;
// If there are uses of CntPhi create:
- // CountPrev = BitWidth - CTLZ(InitX >> 1);
+ // NewCount = BitWidth - CTLZ(InitX >> 1);
+ // Count = NewCount + 1;
+ Value *InitXNext;
if (IsCntPhiUsedOutsideLoop) {
if (DefX->getOpcode() == Instruction::AShr)
InitXNext =
@@ -1711,27 +1747,31 @@ void LoopIdiomRecognize::transformLoopToCountable(
llvm_unreachable("Unexpected opcode!");
} else
InitXNext = InitX;
- FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
- Count = Builder.CreateSub(
- ConstantInt::get(FFS->getType(),
- FFS->getType()->getIntegerBitWidth()),
+ Value *FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
+ Value *Count = Builder.CreateSub(
+ ConstantInt::get(FFS->getType(), FFS->getType()->getIntegerBitWidth()),
FFS);
+ Value *NewCount = Count;
if (IsCntPhiUsedOutsideLoop) {
- CountPrev = Count;
- Count = Builder.CreateAdd(
- CountPrev,
- ConstantInt::get(CountPrev->getType(), 1));
+ NewCount = Count;
+ Count = Builder.CreateAdd(Count, ConstantInt::get(Count->getType(), 1));
}
- NewCount = Builder.CreateZExtOrTrunc(
- IsCntPhiUsedOutsideLoop ? CountPrev : Count,
- cast<IntegerType>(CntInst->getType()));
+ NewCount = Builder.CreateZExtOrTrunc(NewCount,
+ cast<IntegerType>(CntInst->getType()));
- // If the counter's initial value is not zero, insert Add Inst.
Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
- ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
- if (!InitConst || !InitConst->isZero())
- NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+ if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
+ // If the counter was being incremented in the loop, add NewCount to the
+ // counter's initial value, but only if the initial value is not zero.
+ ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+ if (!InitConst || !InitConst->isZero())
+ NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+ } else {
+ // If the count was being decremented in the loop, subtract NewCount from
+ // the counter's initial value.
+ NewCount = Builder.CreateSub(CntInitVal, NewCount);
+ }
// Step 2: Insert new IV and loop condition:
// loop:
@@ -1879,3 +1919,343 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);
}
+
+/// Match loop-invariant value.
+template <typename SubPattern_t> struct match_LoopInvariant {
+ SubPattern_t SubPattern;
+ const Loop *L;
+
+ match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
+ : SubPattern(SP), L(L) {}
+
+ template <typename ITy> bool match(ITy *V) {
+ return L->isLoopInvariant(V) && SubPattern.match(V);
+ }
+};
+
+/// Matches if the value is loop-invariant.
+template <typename Ty>
+inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
+ return match_LoopInvariant<Ty>(M, L);
+}
+
+/// Return true if the idiom is detected in the loop.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+/// entry:
+/// <...>
+/// %bitmask = shl i32 1, %bitpos
+/// br label %loop
+///
+/// loop:
+/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
+/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
+/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
+/// %x.next = shl i32 %x.curr, 1
+/// <...>
+/// br i1 %x.curr.isbitunset, label %loop, label %end
+///
+/// end:
+/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
+/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
+/// <...>
+/// \endcode
+static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
+ Value *&BitMask, Value *&BitPos,
+ Value *&CurrX, Instruction *&NextX) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ " Performing shift-until-bittest idiom detection.\n");
+
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
+ return false;
+ }
+
+ BasicBlock *LoopHeaderBB = CurLoop->getHeader();
+ BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
+ assert(LoopPreheaderBB && "There is always a loop preheader.");
+
+ using namespace PatternMatch;
+
+ // Step 1: Check if the loop backedge is in desirable form.
+
+ ICmpInst::Predicate Pred;
+ Value *CmpLHS, *CmpRHS;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(LoopHeaderBB->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
+ m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
+ return false;
+ }
+
+ // Step 2: Check if the backedge's condition is in desirable form.
+
+ auto MatchVariableBitMask = [&]() {
+ return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
+ match(CmpLHS,
+ m_c_And(m_Value(CurrX),
+ m_CombineAnd(
+ m_Value(BitMask),
+ m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
+ CurLoop))));
+ };
+ auto MatchConstantBitMask = [&]() {
+ return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
+ match(CmpLHS, m_And(m_Value(CurrX),
+ m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
+ (BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
+ };
+ auto MatchDecomposableConstantBitMask = [&]() {
+ APInt Mask;
+ return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
+ ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
+ (BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
+ (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
+ };
+
+ if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
+ !MatchDecomposableConstantBitMask()) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
+ return false;
+ }
+
+ // Step 3: Check if the recurrence is in desirable form.
+ auto *CurrXPN = dyn_cast<PHINode>(CurrX);
+ if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
+ return false;
+ }
+
+ BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
+ NextX =
+ dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
+
+ if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
+ // FIXME: support right-shift?
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
+ return false;
+ }
+
+ // Step 4: Check if the backedge's destinations are in desirable form.
+
+ assert(ICmpInst::isEquality(Pred) &&
+ "Should only get equality predicates here.");
+
+ // cmp-br is commutative, so canonicalize to a single variant.
+ if (Pred != ICmpInst::Predicate::ICMP_EQ) {
+ Pred = ICmpInst::getInversePredicate(Pred);
+ std::swap(TrueBB, FalseBB);
+ }
+
+ // We expect to exit loop when comparison yields false,
+ // so when it yields true we should branch back to loop header.
+ if (TrueBB != LoopHeaderBB) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
+ return false;
+ }
+
+ // Okay, idiom checks out.
+ return true;
+}
+
+/// Look for the following loop:
+/// \code
+/// entry:
+/// <...>
+/// %bitmask = shl i32 1, %bitpos
+/// br label %loop
+///
+/// loop:
+/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
+/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
+/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
+/// %x.next = shl i32 %x.curr, 1
+/// <...>
+/// br i1 %x.curr.isbitunset, label %loop, label %end
+///
+/// end:
+/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
+/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
+/// <...>
+/// \endcode
+///
+/// And transform it into:
+/// \code
+/// entry:
+/// %bitmask = shl i32 1, %bitpos
+/// %lowbitmask = add i32 %bitmask, -1
+/// %mask = or i32 %lowbitmask, %bitmask
+/// %x.masked = and i32 %x, %mask
+/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
+/// i1 true)
+/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
+/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
+/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
+/// %tripcount = add i32 %backedgetakencount, 1
+/// %x.curr = shl i32 %x, %backedgetakencount
+/// %x.next = shl i32 %x, %tripcount
+/// br label %loop
+///
+/// loop:
+/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
+/// %loop.iv.next = add nuw i32 %loop.iv, 1
+/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
+/// <...>
+/// br i1 %loop.ivcheck, label %end, label %loop
+///
+/// end:
+/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
+/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
+/// <...>
+/// \endcode
+bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
+ bool MadeChange = false;
+
+ Value *X, *BitMask, *BitPos, *XCurr;
+ Instruction *XNext;
+ if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
+ XNext)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ " shift-until-bittest idiom detection failed.\n");
+ return MadeChange;
+ }
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");
+
+ // Ok, it is the idiom we were looking for, we *could* transform this loop,
+ // but is it profitable to transform?
+
+ BasicBlock *LoopHeaderBB = CurLoop->getHeader();
+ BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
+ assert(LoopPreheaderBB && "There is always a loop preheader.");
+
+ BasicBlock *SuccessorBB = CurLoop->getExitBlock();
+ assert(LoopPreheaderBB && "There is only a single successor.");
+
+ IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
+ Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
+
+ Intrinsic::ID IntrID = Intrinsic::ctlz;
+ Type *Ty = X->getType();
+
+ TargetTransformInfo::TargetCostKind CostKind =
+ TargetTransformInfo::TCK_SizeAndLatency;
+
+ // The rewrite is considered to be unprofitable iff and only iff the
+ // intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
+ // making the loop countable, even if nothing else changes.
+ IntrinsicCostAttributes Attrs(
+ IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()});
+ int Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
+ if (Cost > TargetTransformInfo::TCC_Basic) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ " Intrinsic is too costly, not beneficial\n");
+ return MadeChange;
+ }
+ if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
+ TargetTransformInfo::TCC_Basic) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
+ return MadeChange;
+ }
+
+ // Ok, transform appears worthwhile.
+ MadeChange = true;
+
+ // Step 1: Compute the loop trip count.
+
+ Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
+ BitPos->getName() + ".lowbitmask");
+ Value *Mask =
+ Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
+ Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
+ CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
+ IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()},
+ /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
+ Value *XMaskedNumActiveBits = Builder.CreateSub(
+ ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
+ XMasked->getName() + ".numactivebits");
+ Value *XMaskedLeadingOnePos =
+ Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
+ XMasked->getName() + ".leadingonepos");
+
+ Value *LoopBackedgeTakenCount = Builder.CreateSub(
+ BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount");
+ // We know loop's backedge-taken count, but what's loop's trip count?
+ // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
+ Value *LoopTripCount =
+ Builder.CreateNUWAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
+ CurLoop->getName() + ".tripcount");
+
+ // Step 2: Compute the recurrence's final value without a loop.
+
+ // NewX is always safe to compute, because `LoopBackedgeTakenCount`
+ // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
+ Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
+ NewX->takeName(XCurr);
+ if (auto *I = dyn_cast<Instruction>(NewX))
+ I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
+
+ Value *NewXNext;
+ // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
+ // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
+ // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
+ // that isn't the case, we'll need to emit an alternative, safe IR.
+ if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
+ PatternMatch::match(
+ BitPos, PatternMatch::m_SpecificInt_ICMP(
+ ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(),
+ Ty->getScalarSizeInBits() - 1))))
+ NewXNext = Builder.CreateShl(X, LoopTripCount);
+ else {
+ // Otherwise, just additionally shift by one. It's the smallest solution,
+ // alternatively, we could check that NewX is INT_MIN (or BitPos is )
+ // and select 0 instead.
+ NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
+ }
+
+ NewXNext->takeName(XNext);
+ if (auto *I = dyn_cast<Instruction>(NewXNext))
+ I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
+
+ // Step 3: Adjust the successor basic block to recieve the computed
+ // recurrence's final value instead of the recurrence itself.
+
+ XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
+ XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
+
+ // Step 4: Rewrite the loop into a countable form, with canonical IV.
+
+ // The new canonical induction variable.
+ Builder.SetInsertPoint(&LoopHeaderBB->front());
+ auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
+
+ // The induction itself.
+ // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
+ Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
+ auto *IVNext = Builder.CreateNUWAdd(IV, ConstantInt::get(Ty, 1),
+ IV->getName() + ".next");
+
+ // The loop trip count check.
+ auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
+ CurLoop->getName() + ".ivcheck");
+ Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ LoopHeaderBB->getTerminator()->eraseFromParent();
+
+ // Populate the IV PHI.
+ IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
+ IV->addIncoming(IVNext, LoopHeaderBB);
+
+ // Step 5: Forget the "non-computable" trip-count SCEV associated with the
+ // loop. The loop would otherwise not be deleted even if it becomes empty.
+
+ SE->forgetLoop(CurLoop);
+
+ // Other passes will take care of actually deleting the loop if possible.
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");
+
+ ++NumShiftUntilBitTest;
+ return MadeChange;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 7787c0bccd4c..d9dbc0deb42a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopInterchange.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -27,6 +28,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -427,9 +429,7 @@ private:
const LoopInterchangeLegality &LIL;
};
-// Main LoopInterchange Pass.
-struct LoopInterchange : public LoopPass {
- static char ID;
+struct LoopInterchange {
ScalarEvolution *SE = nullptr;
LoopInfo *LI = nullptr;
DependenceInfo *DI = nullptr;
@@ -438,34 +438,21 @@ struct LoopInterchange : public LoopPass {
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
- LoopInterchange() : LoopPass(ID) {
- initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DependenceAnalysisWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
+ DominatorTree *DT, OptimizationRemarkEmitter *ORE)
+ : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
- getLoopAnalysisUsage(AU);
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L) || L->getParentLoop())
+ bool run(Loop *L) {
+ if (L->getParentLoop())
return false;
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
return processLoopList(populateWorklist(*L));
}
bool isComputableLoopNest(LoopVector LoopList) {
for (Loop *L : LoopList) {
const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
- if (ExitCountOuter == SE->getCouldNotCompute()) {
+ if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
return false;
}
@@ -624,6 +611,13 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
containsUnsafeInstructions(OuterLoopLatch))
return false;
+ // Also make sure the inner loop preheader does not contain any unsafe
+ // instructions. Note that all instructions in the preheader will be moved to
+ // the outer loop header when interchanging.
+ if (InnerLoopPreHeader != OuterLoopHeader &&
+ containsUnsafeInstructions(InnerLoopPreHeader))
+ return false;
+
LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
// We have a perfect loop nest.
return true;
@@ -667,6 +661,10 @@ static Value *followLCSSA(Value *SV) {
// Check V's users to see if it is involved in a reduction in L.
static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
+ // Reduction variables cannot be constants.
+ if (isa<Constant>(V))
+ return nullptr;
+
for (Value *User : V->users()) {
if (PHINode *PHI = dyn_cast<PHINode>(User)) {
if (PHI->getNumIncomingValues() == 1)
@@ -707,8 +705,7 @@ bool LoopInterchangeLegality::findInductionAndReductions(
Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
if (!InnerRedPhi ||
- !llvm::any_of(InnerRedPhi->incoming_values(),
- [&PHI](Value *V) { return V == &PHI; })) {
+ !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
LLVM_DEBUG(
dbgs()
<< "Failed to recognize PHI as an induction or reduction.\n");
@@ -1045,6 +1042,10 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
bool FoundInnerInduction = false;
bool FoundOuterInduction = false;
for (unsigned i = 0; i < NumOp; ++i) {
+ // Skip operands that are not SCEV-able.
+ if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
+ continue;
+
const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
if (!AR)
@@ -1189,7 +1190,7 @@ void LoopInterchangeTransform::restructureLoops(
removeChildLoop(NewInner, NewOuter);
LI->changeTopLevelLoop(NewInner, NewOuter);
}
- while (!NewOuter->empty())
+ while (!NewOuter->isInnermost())
NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
NewOuter->addChildLoop(NewInner);
@@ -1305,6 +1306,21 @@ bool LoopInterchangeTransform::transform() {
LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
}
+ // Instructions in the original inner loop preheader may depend on values
+ // defined in the outer loop header. Move them there, because the original
+ // inner loop preheader will become the entry into the interchanged loop nest.
+ // Currently we move all instructions and rely on LICM to move invariant
+ // instructions outside the loop nest.
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+ if (InnerLoopPreHeader != OuterLoopHeader) {
+ SmallPtrSet<Instruction *, 4> NeedsMoving;
+ for (Instruction &I :
+ make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
+ std::prev(InnerLoopPreHeader->end()))))
+ I.moveBefore(OuterLoopHeader->getTerminator());
+ }
+
Transformed |= adjustLoopLinks();
if (!Transformed) {
LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
@@ -1521,8 +1537,7 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
// The outer loop header might or might not branch to the outer latch.
// We are guaranteed to branch to the inner loop preheader.
- if (std::find(succ_begin(OuterLoopHeaderBI), succ_end(OuterLoopHeaderBI),
- OuterLoopLatch) != succ_end(OuterLoopHeaderBI))
+ if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch))
updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates,
/*MustUpdateOnce=*/false);
updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
@@ -1569,9 +1584,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
// Now update the reduction PHIs in the inner and outer loop headers.
SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
- for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
+ for (PHINode &PHI : drop_begin(InnerLoopHeader->phis()))
InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
- for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
+ for (PHINode &PHI : drop_begin(OuterLoopHeader->phis()))
OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
auto &OuterInnerReductions = LIL.getOuterInnerReductions();
@@ -1595,6 +1610,17 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
+ // Values defined in the outer loop header could be used in the inner loop
+ // latch. In that case, we need to create LCSSA phis for them, because after
+ // interchanging they will be defined in the new inner loop and used in the
+ // new outer loop.
+ IRBuilder<> Builder(OuterLoopHeader->getContext());
+ SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
+ for (Instruction &I :
+ make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
+ MayNeedLCSSAPhis.push_back(&I);
+ formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
+
return true;
}
@@ -1612,15 +1638,58 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
return Changed;
}
-char LoopInterchange::ID = 0;
+/// Main LoopInterchange Pass.
+struct LoopInterchangeLegacyPass : public LoopPass {
+ static char ID;
+
+ LoopInterchangeLegacyPass() : LoopPass(ID) {
+ initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DependenceAnalysisWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+
+ getLoopAnalysisUsage(AU);
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L))
+ return false;
+
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+
+ return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
+ }
+};
-INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
+char LoopInterchangeLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
+INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
-Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }
+Pass *llvm::createLoopInterchangePass() {
+ return new LoopInterchangeLegacyPass();
+}
+
+PreservedAnalyses LoopInterchangePass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ Function &F = *L.getHeader()->getParent();
+
+ DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
+ OptimizationRemarkEmitter ORE(&F);
+ if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(&L))
+ return PreservedAnalyses::all();
+ return getLoopPassPreservedAnalyses();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 4412b3079461..058612149a94 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -27,7 +27,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
@@ -56,6 +55,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
@@ -308,8 +308,8 @@ public:
/// We need a check if one is a pointer for a candidate load and the other is
/// a pointer for a possibly intervening store.
bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
- const SmallPtrSet<Value *, 4> &PtrsWrittenOnFwdingPath,
- const std::set<Value *> &CandLoadPtrs) {
+ const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
+ const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
Value *Ptr1 =
LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue;
Value *Ptr2 =
@@ -384,11 +384,9 @@ public:
findPointersWrittenOnForwardingPath(Candidates);
// Collect the pointers of the candidate loads.
- // FIXME: SmallPtrSet does not work with std::inserter.
- std::set<Value *> CandLoadPtrs;
- transform(Candidates,
- std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
- std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
+ SmallPtrSet<Value *, 4> CandLoadPtrs;
+ for (const auto &Candidate : Candidates)
+ CandLoadPtrs.insert(Candidate.getLoadPtr());
const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
SmallVector<RuntimePointerCheck, 4> Checks;
@@ -488,7 +486,6 @@ public:
// Filter the candidates further.
SmallVector<StoreToLoadForwardingCandidate, 4> Candidates;
- unsigned NumForwarding = 0;
for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
LLVM_DEBUG(dbgs() << "Candidate " << Cand);
@@ -508,12 +505,17 @@ public:
if (!Cand.isDependenceDistanceOfOne(PSE, L))
continue;
- ++NumForwarding;
+ assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
+ "Loading from something other than indvar?");
+ assert(
+ isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
+ "Storing to something other than indvar?");
+
+ Candidates.push_back(Cand);
LLVM_DEBUG(
dbgs()
- << NumForwarding
+ << Candidates.size()
<< ". Valid store-to-load forwarding across the loop backedge\n");
- Candidates.push_back(Cand);
}
if (Candidates.empty())
return false;
@@ -561,10 +563,19 @@ public:
// Point of no-return, start the transformation. First, version the loop
// if necessary.
- LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
- LV.setAliasChecks(std::move(Checks));
- LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
+ LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
LV.versionLoop();
+
+ // After versioning, some of the candidates' pointers could stop being
+ // SCEVAddRecs. We need to filter them out.
+ auto NoLongerGoodCandidate = [this](
+ const StoreToLoadForwardingCandidate &Cand) {
+ return !isa<SCEVAddRecExpr>(
+ PSE.getSCEV(Cand.Load->getPointerOperand())) ||
+ !isa<SCEVAddRecExpr>(
+ PSE.getSCEV(Cand.Store->getPointerOperand()));
+ };
+ llvm::erase_if(Candidates, NoLongerGoodCandidate);
}
// Next, propagate the value stored by the store to the users of the load.
@@ -573,7 +584,7 @@ public:
"storeforward");
for (const auto &Cand : Candidates)
propagateStoredValueToLoadUsers(Cand, SEE);
- NumLoopLoadEliminted += NumForwarding;
+ NumLoopLoadEliminted += Candidates.size();
return true;
}
@@ -599,6 +610,7 @@ private:
static bool
eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ ScalarEvolution *SE, AssumptionCache *AC,
function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
// Build up a worklist of inner-loops to transform to avoid iterator
// invalidation.
@@ -607,15 +619,21 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
// which merely optimizes the use of loads in a loop.
SmallVector<Loop *, 8> Worklist;
+ bool Changed = false;
+
for (Loop *TopLevelLoop : LI)
- for (Loop *L : depth_first(TopLevelLoop))
+ for (Loop *L : depth_first(TopLevelLoop)) {
+ Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
// We only handle inner-most loops.
- if (L->empty())
+ if (L->isInnermost())
Worklist.push_back(L);
+ }
// Now walk the identified inner loops.
- bool Changed = false;
for (Loop *L : Worklist) {
+ // Match historical behavior
+ if (!L->isRotatedForm() || !L->getExitingBlock())
+ continue;
// The actual work is performed by LoadEliminationForLoop.
LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
Changed |= LEL.processLoop();
@@ -649,7 +667,7 @@ public:
// Process each loop nest in the function.
return eliminateLoadsAcrossLoops(
- F, LI, DT, BFI, PSI,
+ F, LI, DT, BFI, PSI, /*SE*/ nullptr, /*AC*/ nullptr,
[&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
}
@@ -706,8 +724,9 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
bool Changed = eliminateLoadsAcrossLoops(
- F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
+ F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & {
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
});
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 98889a9df116..3fe8e7259114 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -6,74 +6,113 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/TimeProfiler.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Support/TimeProfiler.h"
using namespace llvm;
-// Explicit template instantiations and specialization defininitions for core
-// template typedefs.
namespace llvm {
-template class PassManager<Loop, LoopAnalysisManager,
- LoopStandardAnalysisResults &, LPMUpdater &>;
/// Explicitly specialize the pass manager's run method to handle loop nest
/// structure updates.
-template <>
PreservedAnalyses
PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U) {
- PreservedAnalyses PA = PreservedAnalyses::all();
if (DebugLogging)
dbgs() << "Starting Loop pass manager run.\n";
+ // Runs loop-nest passes only when the current loop is a top-level one.
+ PreservedAnalyses PA = (L.isOutermost() && !LoopNestPasses.empty())
+ ? runWithLoopNestPasses(L, AM, AR, U)
+ : runWithoutLoopNestPasses(L, AM, AR, U);
+
+ // Invalidation for the current loop should be handled above, and other loop
+ // analysis results shouldn't be impacted by runs over this loop. Therefore,
+ // the remaining analysis results in the AnalysisManager are preserved. We
+ // mark this with a set so that we don't need to inspect each one
+ // individually.
+ // FIXME: This isn't correct! This loop and all nested loops' analyses should
+ // be preserved, but unrolling should invalidate the parent loop's analyses.
+ PA.preserveSet<AllAnalysesOn<Loop>>();
+
+ if (DebugLogging)
+ dbgs() << "Finished Loop pass manager run.\n";
+
+ return PA;
+}
+
+// Run both loop passes and loop-nest passes on top-level loop \p L.
+PreservedAnalyses
+LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ assert(L.isOutermost() &&
+ "Loop-nest passes should only run on top-level loops.");
+ PreservedAnalyses PA = PreservedAnalyses::all();
+
// Request PassInstrumentation from analysis manager, will use it to run
// instrumenting callbacks for the passes later.
PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR);
- for (auto &Pass : Passes) {
- // Check the PassInstrumentation's BeforePass callbacks before running the
- // pass, skip its execution completely if asked to (callback returns false).
- if (!PI.runBeforePass<Loop>(*Pass, L))
- continue;
- if (DebugLogging)
- dbgs() << "Running pass: " << Pass->name() << " on " << L;
+ unsigned LoopPassIndex = 0, LoopNestPassIndex = 0;
- PreservedAnalyses PassPA;
- {
- TimeTraceScope TimeScope(Pass->name(), L.getName());
- PassPA = Pass->run(L, AM, AR, U);
+ // `LoopNestPtr` points to the `LoopNest` object for the current top-level
+ // loop and `IsLoopNestPtrValid` indicates whether the pointer is still valid.
+ // The `LoopNest` object will have to be re-constructed if the pointer is
+ // invalid when encountering a loop-nest pass.
+ std::unique_ptr<LoopNest> LoopNestPtr;
+ bool IsLoopNestPtrValid = false;
+
+ for (size_t I = 0, E = IsLoopNestPass.size(); I != E; ++I) {
+ Optional<PreservedAnalyses> PassPA;
+ if (!IsLoopNestPass[I]) {
+ // The `I`-th pass is a loop pass.
+ auto &Pass = LoopPasses[LoopPassIndex++];
+ PassPA = runSinglePass(L, Pass, AM, AR, U, PI);
+ } else {
+ // The `I`-th pass is a loop-nest pass.
+ auto &Pass = LoopNestPasses[LoopNestPassIndex++];
+
+ // If the loop-nest object calculated before is no longer valid,
+ // re-calculate it here before running the loop-nest pass.
+ if (!IsLoopNestPtrValid) {
+ LoopNestPtr = LoopNest::getLoopNest(L, AR.SE);
+ IsLoopNestPtrValid = true;
+ }
+ PassPA = runSinglePass(*LoopNestPtr, Pass, AM, AR, U, PI);
}
- // do not pass deleted Loop into the instrumentation
- if (U.skipCurrentLoop())
- PI.runAfterPassInvalidated<Loop>(*Pass);
- else
- PI.runAfterPass<Loop>(*Pass, L);
+ // `PassPA` is `None` means that the before-pass callbacks in
+ // `PassInstrumentation` return false. The pass does not run in this case,
+ // so we can skip the following procedure.
+ if (!PassPA)
+ continue;
// If the loop was deleted, abort the run and return to the outer walk.
if (U.skipCurrentLoop()) {
- PA.intersect(std::move(PassPA));
+ PA.intersect(std::move(*PassPA));
break;
}
-#ifndef NDEBUG
- // Verify the loop structure and LCSSA form before visiting the loop.
- L.verifyLoop();
- assert(L.isRecursivelyLCSSAForm(AR.DT, AR.LI) &&
- "Loops must remain in LCSSA form!");
-#endif
-
// Update the analysis manager as each pass runs and potentially
// invalidates analyses.
- AM.invalidate(L, PassPA);
+ AM.invalidate(L, *PassPA);
// Finally, we intersect the final preserved analyses to compute the
// aggregate preserved set for this pass manager.
- PA.intersect(std::move(PassPA));
+ PA.intersect(std::move(*PassPA));
+
+ // Check if the current pass preserved the loop-nest object or not.
+ IsLoopNestPtrValid &= PassPA->getChecker<LoopNestAnalysis>().preserved();
// FIXME: Historically, the pass managers all called the LLVM context's
// yield function here. We don't have a generic way to acquire the
@@ -81,21 +120,207 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
// in the new pass manager so it is currently omitted.
// ...getContext().yield();
}
+ return PA;
+}
- // Invalidation for the current loop should be handled above, and other loop
- // analysis results shouldn't be impacted by runs over this loop. Therefore,
- // the remaining analysis results in the AnalysisManager are preserved. We
- // mark this with a set so that we don't need to inspect each one
- // individually.
- // FIXME: This isn't correct! This loop and all nested loops' analyses should
- // be preserved, but unrolling should invalidate the parent loop's analyses.
- PA.preserveSet<AllAnalysesOn<Loop>>();
+// Run all loop passes on loop \p L. Loop-nest passes don't run either because
+// \p L is not a top-level one or simply because there are no loop-nest passes
+// in the pass manager at all.
+PreservedAnalyses
+LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ PreservedAnalyses PA = PreservedAnalyses::all();
- if (DebugLogging)
- dbgs() << "Finished Loop pass manager run.\n";
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR);
+ for (auto &Pass : LoopPasses) {
+ Optional<PreservedAnalyses> PassPA = runSinglePass(L, Pass, AM, AR, U, PI);
+
+ // `PassPA` is `None` means that the before-pass callbacks in
+ // `PassInstrumentation` return false. The pass does not run in this case,
+ // so we can skip the following procedure.
+ if (!PassPA)
+ continue;
+
+ // If the loop was deleted, abort the run and return to the outer walk.
+ if (U.skipCurrentLoop()) {
+ PA.intersect(std::move(*PassPA));
+ break;
+ }
+ // Update the analysis manager as each pass runs and potentially
+ // invalidates analyses.
+ AM.invalidate(L, *PassPA);
+
+ // Finally, we intersect the final preserved analyses to compute the
+ // aggregate preserved set for this pass manager.
+ PA.intersect(std::move(*PassPA));
+
+ // FIXME: Historically, the pass managers all called the LLVM context's
+ // yield function here. We don't have a generic way to acquire the
+ // context and it isn't yet clear what the right pattern is for yielding
+ // in the new pass manager so it is currently omitted.
+ // ...getContext().yield();
+ }
return PA;
}
+} // namespace llvm
+
+PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ // Before we even compute any loop analyses, first run a miniature function
+ // pass pipeline to put loops into their canonical form. Note that we can
+ // directly build up function analyses after this as the function pass
+ // manager handles all the invalidation at that layer.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(F);
+
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // canonicalization pipeline.
+ if (PI.runBeforePass<Function>(LoopCanonicalizationFPM, F)) {
+ PA = LoopCanonicalizationFPM.run(F, AM);
+ PI.runAfterPass<Function>(LoopCanonicalizationFPM, F, PA);
+ }
+
+ // Get the loop structure for this function
+ LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+
+ // If there are no loops, there is nothing to do here.
+ if (LI.empty())
+ return PA;
+
+ // Get the analysis results needed by loop passes.
+ MemorySSA *MSSA =
+ UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr;
+ BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
+ ? (&AM.getResult<BlockFrequencyAnalysis>(F))
+ : nullptr;
+ LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
+ AM.getResult<AssumptionAnalysis>(F),
+ AM.getResult<DominatorTreeAnalysis>(F),
+ AM.getResult<LoopAnalysis>(F),
+ AM.getResult<ScalarEvolutionAnalysis>(F),
+ AM.getResult<TargetLibraryAnalysis>(F),
+ AM.getResult<TargetIRAnalysis>(F),
+ BFI,
+ MSSA};
+
+ // Setup the loop analysis manager from its proxy. It is important that
+ // this is only done when there are loops to process and we have built the
+ // LoopStandardAnalysisResults object. The loop analyses cached in this
+ // manager have access to those analysis results and so it must invalidate
+ // itself when they go away.
+ auto &LAMFP = AM.getResult<LoopAnalysisManagerFunctionProxy>(F);
+ if (UseMemorySSA)
+ LAMFP.markMSSAUsed();
+ LoopAnalysisManager &LAM = LAMFP.getManager();
+
+ // A postorder worklist of loops to process.
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+
+ // Register the worklist and loop analysis manager so that loop passes can
+ // update them when they mutate the loop nest structure.
+ LPMUpdater Updater(Worklist, LAM, LoopNestMode);
+
+ // Add the loop nests in the reverse order of LoopInfo. See method
+ // declaration.
+ if (!LoopNestMode) {
+ appendLoopsToWorklist(LI, Worklist);
+ } else {
+ for (Loop *L : LI)
+ Worklist.insert(L);
+ }
+
+#ifndef NDEBUG
+ PI.pushBeforeNonSkippedPassCallback([&LAR, &LI](StringRef PassID, Any IR) {
+ if (isSpecialPass(PassID, {"PassManager"}))
+ return;
+ assert(any_isa<const Loop *>(IR) || any_isa<const LoopNest *>(IR));
+ const Loop *L = any_isa<const Loop *>(IR)
+ ? any_cast<const Loop *>(IR)
+ : &any_cast<const LoopNest *>(IR)->getOutermostLoop();
+ assert(L && "Loop should be valid for printing");
+
+ // Verify the loop structure and LCSSA form before visiting the loop.
+ L->verifyLoop();
+ assert(L->isRecursivelyLCSSAForm(LAR.DT, LI) &&
+ "Loops must remain in LCSSA form!");
+ });
+#endif
+
+ do {
+ Loop *L = Worklist.pop_back_val();
+ assert(!(LoopNestMode && L->getParentLoop()) &&
+ "L should be a top-level loop in loop-nest mode.");
+
+ // Reset the update structure for this loop.
+ Updater.CurrentL = L;
+ Updater.SkipCurrentLoop = false;
+
+#ifndef NDEBUG
+ // Save a parent loop pointer for asserts.
+ Updater.ParentL = L->getParentLoop();
+#endif
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns
+ // false).
+ if (!PI.runBeforePass<Loop>(*Pass, *L))
+ continue;
+
+ PreservedAnalyses PassPA;
+ {
+ TimeTraceScope TimeScope(Pass->name());
+ PassPA = Pass->run(*L, LAM, LAR, Updater);
+ }
+
+ // Do not pass deleted Loop into the instrumentation.
+ if (Updater.skipCurrentLoop())
+ PI.runAfterPassInvalidated<Loop>(*Pass, PassPA);
+ else
+ PI.runAfterPass<Loop>(*Pass, *L, PassPA);
+
+ // FIXME: We should verify the set of analyses relevant to Loop passes
+ // are preserved.
+
+ // If the loop hasn't been deleted, we need to handle invalidation here.
+ if (!Updater.skipCurrentLoop())
+ // We know that the loop pass couldn't have invalidated any other
+ // loop's analyses (that's the contract of a loop pass), so directly
+ // handle the loop analysis manager's invalidation here.
+ LAM.invalidate(*L, PassPA);
+
+ // Then intersect the preserved set so that invalidation of module
+ // analyses will eventually occur when the module pass completes.
+ PA.intersect(std::move(PassPA));
+ } while (!Worklist.empty());
+
+#ifndef NDEBUG
+ PI.popBeforeNonSkippedPassCallback();
+#endif
+
+ // By definition we preserve the proxy. We also preserve all analyses on
+ // Loops. This precludes *any* invalidation of loop analyses by the proxy,
+ // but that's OK because we've taken care to invalidate analyses in the
+ // loop analysis manager incrementally above.
+ PA.preserveSet<AllAnalysesOn<Loop>>();
+ PA.preserve<LoopAnalysisManagerFunctionProxy>();
+ // We also preserve the set of standard analyses.
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ if (UseBlockFrequencyInfo && F.hasProfileData())
+ PA.preserve<BlockFrequencyAnalysis>();
+ if (UseMemorySSA)
+ PA.preserve<MemorySSAAnalysis>();
+ // FIXME: What we really want to do here is preserve an AA category, but
+ // that concept doesn't exist yet.
+ PA.preserve<AAManager>();
+ PA.preserve<BasicAA>();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<SCEVAA>();
+ return PA;
}
PrintLoopPass::PrintLoopPass() : OS(dbgs()) {}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index edde22d6708f..4f97641e2027 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -362,7 +362,7 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
// For the new PM, we also can't use BranchProbabilityInfo as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but BPI is not preserved, hence a newly built one is needed.
- BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI);
+ BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr);
LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI);
if (!LP.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -439,8 +439,8 @@ static bool isSafeToTruncateWideIVType(const DataLayout &DL,
Type *RangeCheckType) {
if (!EnableIVTruncation)
return false;
- assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()) >
- DL.getTypeSizeInBits(RangeCheckType) &&
+ assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedSize() >
+ DL.getTypeSizeInBits(RangeCheckType).getFixedSize() &&
"Expected latch check IV type to be larger than range check operand "
"type!");
// The start and end values of the IV should be known. This is to guarantee
@@ -454,13 +454,13 @@ static bool isSafeToTruncateWideIVType(const DataLayout &DL,
// LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
// IV wraps around, and the truncation of the IV would lose the range of
// iterations between 2^32 and 2^64.
- bool Increasing;
- if (!SE.isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
+ if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred))
return false;
// The active bits should be less than the bits in the RangeCheckType. This
// guarantees that truncating the latch check to RangeCheckType is a safe
// operation.
- auto RangeCheckTypeBitSize = DL.getTypeSizeInBits(RangeCheckType);
+ auto RangeCheckTypeBitSize =
+ DL.getTypeSizeInBits(RangeCheckType).getFixedSize();
return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
}
@@ -477,7 +477,8 @@ static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,
if (RangeCheckType == LatchType)
return LatchCheck;
// For now, bail out if latch type is narrower than range type.
- if (DL.getTypeSizeInBits(LatchType) < DL.getTypeSizeInBits(RangeCheckType))
+ if (DL.getTypeSizeInBits(LatchType).getFixedSize() <
+ DL.getTypeSizeInBits(RangeCheckType).getFixedSize())
return None;
if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
return None;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 3542d0a4ee73..65a6205f0302 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -50,6 +50,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopReroll.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -161,12 +162,12 @@ namespace {
IL_End
};
- class LoopReroll : public LoopPass {
+ class LoopRerollLegacyPass : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopReroll() : LoopPass(ID) {
- initializeLoopRerollPass(*PassRegistry::getPassRegistry());
+ LoopRerollLegacyPass() : LoopPass(ID) {
+ initializeLoopRerollLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -175,6 +176,15 @@ namespace {
AU.addRequired<TargetLibraryInfoWrapperPass>();
getLoopAnalysisUsage(AU);
}
+ };
+
+ class LoopReroll {
+ public:
+ LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE,
+ TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA)
+ : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT),
+ PreserveLCSSA(PreserveLCSSA) {}
+ bool runOnLoop(Loop *L);
protected:
AliasAnalysis *AA;
@@ -484,16 +494,16 @@ namespace {
} // end anonymous namespace
-char LoopReroll::ID = 0;
+char LoopRerollLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false,
+ false)
-Pass *llvm::createLoopRerollPass() {
- return new LoopReroll;
-}
+Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; }
// Returns true if the provided instruction is used outside the given loop.
// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
@@ -1071,6 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
DenseSet<Instruction*> V;
collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
for (auto *I : V) {
+ if (I->mayHaveSideEffects()) {
+ LLVM_DEBUG(dbgs() << "LRR: Aborting - "
+ << "An instruction which does not belong to any root "
+ << "sets must not have side effects: " << *I);
+ return false;
+ }
Uses[I].set(IL_All);
}
@@ -1086,7 +1102,7 @@ LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
UsesTy::iterator *StartI) {
UsesTy::iterator I = StartI ? *StartI : In.begin();
while (I != In.end() && (I->second.test(Val) == 0 ||
- Exclude.count(I->first) != 0))
+ Exclude.contains(I->first)))
++I;
return I;
}
@@ -1644,18 +1660,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
return true;
}
-bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipLoop(L))
- return false;
-
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
- *L->getHeader()->getParent());
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
+bool LoopReroll::runOnLoop(Loop *L) {
BasicBlock *Header = L->getHeader();
LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %"
<< Header->getName() << " (" << L->getNumBlocks()
@@ -1704,3 +1709,26 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
}
+
+bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent());
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L);
+}
+
+PreservedAnalyses LoopRerollPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L)
+ ? getLoopPassPreservedAnalyses()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index f92566ba77ce..ad1cfc68ece0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -12,6 +12,7 @@
#include "llvm/Transforms/Scalar/LoopRotation.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -33,22 +34,35 @@ static cl::opt<unsigned> DefaultRotationThreshold(
"rotation-max-header-size", cl::init(16), cl::Hidden,
cl::desc("The default maximum header size for automatic loop rotation"));
-LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication)
- : EnableHeaderDuplication(EnableHeaderDuplication) {}
+static cl::opt<bool> PrepareForLTOOption(
+ "rotation-prepare-for-lto", cl::init(false), cl::Hidden,
+ cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
+ "should be used for testing only."));
+
+LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO)
+ : EnableHeaderDuplication(EnableHeaderDuplication),
+ PrepareForLTO(PrepareForLTO) {}
PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
- int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
+ // Vectorization requires loop-rotation. Use default threshold for loops the
+ // user explicitly marked for vectorization, even when header duplication is
+ // disabled.
+ int Threshold = EnableHeaderDuplication ||
+ hasVectorizeTransformation(&L) == TM_ForcedByUser
+ ? DefaultRotationThreshold
+ : 0;
const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
Optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA)
MSSAU = MemorySSAUpdater(AR.MSSA);
- bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
- SQ, false, Threshold, false);
+ bool Changed =
+ LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false,
+ Threshold, false, PrepareForLTO || PrepareForLTOOption);
if (!Changed)
return PreservedAnalyses::all();
@@ -66,10 +80,13 @@ namespace {
class LoopRotateLegacyPass : public LoopPass {
unsigned MaxHeaderSize;
+ bool PrepareForLTO;
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
+ LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1,
+ bool PrepareForLTO = false)
+ : LoopPass(ID), PrepareForLTO(PrepareForLTO) {
initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry());
if (SpecifiedMaxHeaderSize == -1)
MaxHeaderSize = DefaultRotationThreshold;
@@ -105,9 +122,17 @@ public:
if (MSSAA)
MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
}
+ // Vectorization requires loop-rotation. Use default threshold for loops the
+ // user explicitly marked for vectorization, even when header duplication is
+ // disabled.
+ int Threshold = hasVectorizeTransformation(L) == TM_ForcedByUser
+ ? DefaultRotationThreshold
+ : MaxHeaderSize;
+
return LoopRotation(L, LI, TTI, AC, &DT, &SE,
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
- false, MaxHeaderSize, false);
+ false, Threshold, false,
+ PrepareForLTO || PrepareForLTOOption);
}
};
} // end namespace
@@ -122,6 +147,6 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
false)
-Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
- return new LoopRotateLegacyPass(MaxHeaderSize);
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize, bool PrepareForLTO) {
+ return new LoopRotateLegacyPass(MaxHeaderSize, PrepareForLTO);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 031e5b9c1d2c..cc6d11220807 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -16,7 +16,6 @@
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
@@ -366,15 +365,20 @@ private:
unsigned DummyIdx = 1;
for (BasicBlock *BB : DeadExitBlocks) {
- SmallVector<Instruction *, 4> DeadPhis;
+ // Eliminate all Phis and LandingPads from dead exits.
+ // TODO: Consider removing all instructions in this dead block.
+ SmallVector<Instruction *, 4> DeadInstructions;
for (auto &PN : BB->phis())
- DeadPhis.push_back(&PN);
+ DeadInstructions.push_back(&PN);
- // Eliminate all Phis from dead exits.
- for (Instruction *PN : DeadPhis) {
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
- PN->eraseFromParent();
+ if (auto *LandingPad = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
+ DeadInstructions.emplace_back(LandingPad);
+
+ for (Instruction *I : DeadInstructions) {
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
}
+
assert(DummyIdx != 0 && "Too many dead exits!");
DummySwitch->addCase(Builder.getInt32(DummyIdx++), BB);
DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
@@ -410,9 +414,10 @@ private:
FixLCSSALoop = FixLCSSALoop->getParentLoop();
assert(FixLCSSALoop && "Should be a loop!");
// We need all DT updates to be done before forming LCSSA.
- DTU.applyUpdates(DTUpdates);
if (MSSAU)
- MSSAU->applyUpdates(DTUpdates, DT);
+ MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
+ else
+ DTU.applyUpdates(DTUpdates);
DTUpdates.clear();
formLCSSARecursively(*FixLCSSALoop, DT, &LI, &SE);
}
@@ -420,8 +425,7 @@ private:
if (MSSAU) {
// Clear all updates now. Facilitates deletes that follow.
- DTU.applyUpdates(DTUpdates);
- MSSAU->applyUpdates(DTUpdates, DT);
+ MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
DTUpdates.clear();
if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -447,7 +451,7 @@ private:
if (LI.isLoopHeader(BB)) {
assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
Loop *DL = LI.getLoopFor(BB);
- if (DL->getParentLoop()) {
+ if (!DL->isOutermost()) {
for (auto *PL = DL->getParentLoop(); PL; PL = PL->getParentLoop())
for (auto *BB : DL->getBlocks())
PL->removeBlockFromLoop(BB);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 1c03a4bf6c02..47698fdde69f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -39,6 +39,8 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Dominators.h"
@@ -67,6 +69,14 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
"max-uses-for-sinking", cl::Hidden, cl::init(30),
cl::desc("Do not sink instructions that have too many uses."));
+static cl::opt<bool> EnableMSSAInLoopSink(
+ "enable-mssa-in-loop-sink", cl::Hidden, cl::init(true),
+ cl::desc("Enable MemorySSA for LoopSink in new pass manager"));
+
+static cl::opt<bool> EnableMSSAInLegacyLoopSink(
+ "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false),
+ cl::desc("Enable MemorySSA for LoopSink in legacy pass manager"));
+
/// Return adjusted total frequency of \p BBs.
///
/// * If there is only one BB, sinking instruction will not introduce code
@@ -172,11 +182,10 @@ findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
// sinking is successful.
// \p LoopBlockNumber is used to sort the insertion blocks to ensure
// determinism.
-static bool sinkInstruction(Loop &L, Instruction &I,
- const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
- const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber,
- LoopInfo &LI, DominatorTree &DT,
- BlockFrequencyInfo &BFI) {
+static bool sinkInstruction(
+ Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
+ const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI,
+ DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) {
// Compute the set of blocks in loop L which contain a use of I.
SmallPtrSet<BasicBlock *, 2> BBs;
for (auto &U : I.uses()) {
@@ -213,8 +222,7 @@ static bool sinkInstruction(Loop &L, Instruction &I,
// of the loop block numbers as iterating the set doesn't give a useful
// order. No need to stable sort as the block numbers are a total ordering.
SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
- SortedBBsToSinkInto.insert(SortedBBsToSinkInto.begin(), BBsToSinkInto.begin(),
- BBsToSinkInto.end());
+ llvm::append_range(SortedBBsToSinkInto, BBsToSinkInto);
llvm::sort(SortedBBsToSinkInto, [&](BasicBlock *A, BasicBlock *B) {
return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second;
});
@@ -230,6 +238,21 @@ static bool sinkInstruction(Loop &L, Instruction &I,
Instruction *IC = I.clone();
IC->setName(I.getName());
IC->insertBefore(&*N->getFirstInsertionPt());
+
+ if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ // Create a new MemoryAccess and let MemorySSA set its defining access.
+ MemoryAccess *NewMemAcc =
+ MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning);
+ if (NewMemAcc) {
+ if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
+ MSSAU->insertDef(MemDef, /*RenameUses=*/true);
+ else {
+ auto *MemUse = cast<MemoryUse>(NewMemAcc);
+ MSSAU->insertUse(MemUse, /*RenameUses=*/true);
+ }
+ }
+ }
+
// Replaces uses of I with IC in N
I.replaceUsesWithIf(IC, [N](Use &U) {
return cast<Instruction>(U.getUser())->getParent() == N;
@@ -244,6 +267,11 @@ static bool sinkInstruction(Loop &L, Instruction &I,
NumLoopSunk++;
I.moveBefore(&*MoveBB->getFirstInsertionPt());
+ if (MSSAU)
+ if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&I)))
+ MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning);
+
return true;
}
@@ -252,15 +280,14 @@ static bool sinkInstruction(Loop &L, Instruction &I,
static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
DominatorTree &DT,
BlockFrequencyInfo &BFI,
- ScalarEvolution *SE) {
+ ScalarEvolution *SE,
+ AliasSetTracker *CurAST,
+ MemorySSA *MSSA) {
BasicBlock *Preheader = L.getLoopPreheader();
- if (!Preheader)
- return false;
+ assert(Preheader && "Expected loop to have preheader");
- // Enable LoopSink only when runtime profile is available.
- // With static profile, the sinking decision may be sub-optimal.
- if (!Preheader->getParent()->hasProfileData())
- return false;
+ assert(Preheader->getParent()->hasProfileData() &&
+ "Unexpected call when profile data unavailable.");
const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
// If there are no basic blocks with lower frequency than the preheader then
@@ -271,13 +298,15 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
}))
return false;
- bool Changed = false;
- AliasSetTracker CurAST(AA);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ std::unique_ptr<SinkAndHoistLICMFlags> LICMFlags;
+ if (MSSA) {
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+ LICMFlags =
+ std::make_unique<SinkAndHoistLICMFlags>(/*IsSink=*/true, &L, MSSA);
+ }
- // Compute alias set.
- for (BasicBlock *BB : L.blocks())
- CurAST.add(*BB);
- CurAST.add(*Preheader);
+ bool Changed = false;
// Sort loop's basic blocks by frequency
SmallVector<BasicBlock *, 10> ColdLoopBBs;
@@ -300,9 +329,11 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// No need to check for instruction's operands are loop invariant.
assert(L.hasLoopInvariantOperands(I) &&
"Insts in a loop's preheader should have loop invariant operands!");
- if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
+ if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
+ LICMFlags.get()))
continue;
- if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
+ if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
+ MSSAU.get()))
Changed = true;
}
@@ -311,6 +342,13 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
return Changed;
}
+static void computeAliasSet(Loop &L, BasicBlock &Preheader,
+ AliasSetTracker &CurAST) {
+ for (BasicBlock *BB : L.blocks())
+ CurAST.add(*BB);
+ CurAST.add(Preheader);
+}
+
PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
// Nothing to do if there are no loops.
@@ -321,6 +359,10 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ MemorySSA *MSSA = EnableMSSAInLoopSink
+ ? &FAM.getResult<MemorySSAAnalysis>(F).getMSSA()
+ : nullptr;
+
// We want to do a postorder walk over the loops. Since loops are a tree this
// is equivalent to a reversed preorder walk and preorder is easy to compute
// without recursion. Since we reverse the preorder, we will visit siblings
@@ -332,11 +374,27 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
do {
Loop &L = *PreorderLoops.pop_back_val();
+ BasicBlock *Preheader = L.getLoopPreheader();
+ if (!Preheader)
+ continue;
+
+ // Enable LoopSink only when runtime profile is available.
+ // With static profile, the sinking decision may be sub-optimal.
+ if (!Preheader->getParent()->hasProfileData())
+ continue;
+
+ std::unique_ptr<AliasSetTracker> CurAST;
+ if (!EnableMSSAInLoopSink) {
+ CurAST = std::make_unique<AliasSetTracker>(AA);
+ computeAliasSet(L, *Preheader, *CurAST.get());
+ }
+
// Note that we don't pass SCEV here because it is only used to invalidate
// loops in SCEV and we don't preserve (or request) SCEV at all making that
// unnecessary.
Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI,
- /*ScalarEvolution*/ nullptr);
+ /*ScalarEvolution*/ nullptr,
+ CurAST.get(), MSSA);
} while (!PreorderLoops.empty());
if (!Changed)
@@ -344,6 +402,14 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
+
+ if (MSSA) {
+ PA.preserve<MemorySSAAnalysis>();
+
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
+
return PA;
}
@@ -358,19 +424,46 @@ struct LegacyLoopSinkPass : public LoopPass {
if (skipLoop(L))
return false;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ // Enable LoopSink only when runtime profile is available.
+ // With static profile, the sinking decision may be sub-optimal.
+ if (!Preheader->getParent()->hasProfileData())
+ return false;
+
+ AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- return sinkLoopInvariantInstructions(
- *L, getAnalysis<AAResultsWrapperPass>().getAAResults(),
- getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
+ std::unique_ptr<AliasSetTracker> CurAST;
+ MemorySSA *MSSA = nullptr;
+ if (EnableMSSAInLegacyLoopSink)
+ MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ else {
+ CurAST = std::make_unique<AliasSetTracker>(AA);
+ computeAliasSet(*L, *Preheader, *CurAST.get());
+ }
+
+ bool Changed = sinkLoopInvariantInstructions(
+ *L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
- SE ? &SE->getSE() : nullptr);
+ SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA);
+
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
+ return Changed;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
getLoopAnalysisUsage(AU);
+ if (EnableMSSAInLegacyLoopSink) {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
}
};
}
@@ -380,6 +473,7 @@ INITIALIZE_PASS_BEGIN(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false,
false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false)
Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index cf02ef1e83f3..5dec9b542076 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -75,11 +75,13 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
@@ -422,7 +424,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
// Handle a multiplication by -1 (negation) if it didn't fold.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
if (Mul->getOperand(0)->isAllOnesValue()) {
- SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+ SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));
const SCEV *NewMul = SE.getMulExpr(Ops);
SmallVector<const SCEV *, 4> MyGood;
@@ -483,11 +485,10 @@ bool Formula::isCanonical(const Loop &L) const {
// If ScaledReg is not a recurrent expr, or it is but its loop is not current
// loop, meanwhile BaseRegs contains a recurrent expr reg related with current
// loop, we want to swap the reg in BaseRegs with ScaledReg.
- auto I =
- find_if(make_range(BaseRegs.begin(), BaseRegs.end()), [&](const SCEV *S) {
- return isa<const SCEVAddRecExpr>(S) &&
- (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
- });
+ auto I = find_if(BaseRegs, [&](const SCEV *S) {
+ return isa<const SCEVAddRecExpr>(S) &&
+ (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
return I == BaseRegs.end();
}
@@ -506,8 +507,7 @@ void Formula::canonicalize(const Loop &L) {
// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
if (!ScaledReg) {
- ScaledReg = BaseRegs.back();
- BaseRegs.pop_back();
+ ScaledReg = BaseRegs.pop_back_val();
Scale = 1;
}
@@ -516,11 +516,10 @@ void Formula::canonicalize(const Loop &L) {
// reg with ScaledReg.
const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
if (!SAR || SAR->getLoop() != &L) {
- auto I = find_if(make_range(BaseRegs.begin(), BaseRegs.end()),
- [&](const SCEV *S) {
- return isa<const SCEVAddRecExpr>(S) &&
- (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
- });
+ auto I = find_if(BaseRegs, [&](const SCEV *S) {
+ return isa<const SCEVAddRecExpr>(S) &&
+ (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
if (I != BaseRegs.end())
std::swap(ScaledReg, *I);
}
@@ -753,13 +752,13 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
return C->getValue()->getSExtValue();
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ SmallVector<const SCEV *, 8> NewOps(Add->operands());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ SmallVector<const SCEV *, 8> NewOps(AR->operands());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
@@ -779,13 +778,13 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
return GV;
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ SmallVector<const SCEV *, 8> NewOps(Add->operands());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
if (Result)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ SmallVector<const SCEV *, 8> NewOps(AR->operands());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
if (Result)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
@@ -935,6 +934,8 @@ static bool isHighCostExpansion(const SCEV *S,
case scSignExtend:
return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
Processed, SE);
+ default:
+ break;
}
if (!Processed.insert(S).second)
@@ -1210,7 +1211,7 @@ static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
return 0;
if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
return getSetupCost(S->getStart(), Depth - 1);
- if (auto S = dyn_cast<SCEVCastExpr>(Reg))
+ if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
return getSetupCost(S->getOperand(), Depth - 1);
if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
return std::accumulate(S->op_begin(), S->op_end(), 0,
@@ -2786,6 +2787,7 @@ static const SCEV *getExprBase(const SCEV *S) {
case scAddRecExpr:
return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
}
+ llvm_unreachable("Unknown SCEV kind!");
}
/// Return true if the chain increment is profitable to expand into a loop
@@ -2861,7 +2863,6 @@ static bool isProfitableChain(IVChain &Chain,
for (const IVInc &Inc : Chain) {
if (TTI.isProfitableLSRChainElement(Inc.UserInst))
return true;
-
if (Inc.IncExpr->isZero())
continue;
@@ -3401,7 +3402,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.append(N->op_begin(), N->op_end());
- else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
@@ -3834,10 +3835,14 @@ void LSRInstance::GenerateConstantOffsetsImpl(
F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
return;
- if (IsScaledReg)
+ if (IsScaledReg) {
F.ScaledReg = G;
- else
+ } else {
F.BaseRegs[Idx] = G;
+ // We may generate non canonical Formula if G is a recurrent expr reg
+ // related with current loop while F.ScaledReg is not.
+ F.canonicalize(*L);
+ }
(void)InsertFormula(LU, LUIdx, F);
}
@@ -5378,10 +5383,11 @@ void LSRInstance::RewriteForPHI(
// Split the critical edge.
BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
- NewBB = SplitCriticalEdge(BB, Parent,
- CriticalEdgeSplittingOptions(&DT, &LI)
- .setMergeIdenticalEdges()
- .setKeepOneInputPHIs());
+ NewBB =
+ SplitCriticalEdge(BB, Parent,
+ CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
+ .setMergeIdenticalEdges()
+ .setKeepOneInputPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
@@ -5514,8 +5520,8 @@ void LSRInstance::ImplementSolution(
// we can remove them after we are done working.
SmallVector<WeakTrackingVH, 16> DeadInsts;
- SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
- "lsr");
+ SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr",
+ false);
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
@@ -5614,13 +5620,19 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
if (IU.empty()) return;
// Skip nested loops until we can model them better with formulae.
- if (!L->empty()) {
+ if (!L->isInnermost()) {
LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
return;
}
// Start collecting data and preparing for the solver.
- CollectChains();
+ // If number of registers is not the major cost, we cannot benefit from the
+ // current profitable chain optimization which is based on number of
+ // registers.
+ // FIXME: add profitable chain optimization for other kinds major cost, for
+ // example number of instructions.
+ if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain)
+ CollectChains();
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
@@ -5760,6 +5772,63 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MemorySSAWrapperPass>();
}
+using EqualValues = SmallVector<std::tuple<WeakVH, int64_t, DIExpression *>, 4>;
+using EqualValuesMap = DenseMap<DbgValueInst *, EqualValues>;
+
+static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE,
+ EqualValuesMap &DbgValueToEqualSet) {
+ for (auto &B : L->getBlocks()) {
+ for (auto &I : *B) {
+ auto DVI = dyn_cast<DbgValueInst>(&I);
+ if (!DVI)
+ continue;
+ auto V = DVI->getVariableLocation();
+ if (!V || !SE.isSCEVable(V->getType()))
+ continue;
+ auto DbgValueSCEV = SE.getSCEV(V);
+ EqualValues EqSet;
+ for (PHINode &Phi : L->getHeader()->phis()) {
+ if (V->getType() != Phi.getType())
+ continue;
+ if (!SE.isSCEVable(Phi.getType()))
+ continue;
+ auto PhiSCEV = SE.getSCEV(&Phi);
+ Optional<APInt> Offset =
+ SE.computeConstantDifference(DbgValueSCEV, PhiSCEV);
+ if (Offset && Offset->getMinSignedBits() <= 64)
+ EqSet.emplace_back(std::make_tuple(
+ &Phi, Offset.getValue().getSExtValue(), DVI->getExpression()));
+ }
+ DbgValueToEqualSet[DVI] = std::move(EqSet);
+ }
+ }
+}
+
+static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet) {
+ for (auto A : DbgValueToEqualSet) {
+ auto DVI = A.first;
+ // Only update those that are now undef.
+ if (!isa_and_nonnull<UndefValue>(DVI->getVariableLocation()))
+ continue;
+ for (auto EV : A.second) {
+ auto V = std::get<WeakVH>(EV);
+ if (!V)
+ continue;
+ auto DbgDIExpr = std::get<DIExpression *>(EV);
+ auto Offset = std::get<int64_t>(EV);
+ auto &Ctx = DVI->getContext();
+ DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
+ if (Offset) {
+ SmallVector<uint64_t, 8> Ops;
+ DIExpression::appendOffset(Ops, Offset);
+ DbgDIExpr = DIExpression::prependOpcodes(DbgDIExpr, Ops, true);
+ }
+ DVI->setOperand(2, MetadataAsValue::get(Ctx, DbgDIExpr));
+ break;
+ }
+ }
+}
+
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
const TargetTransformInfo &TTI,
@@ -5775,12 +5844,17 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
Changed |=
LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged();
+ // Debug preservation - before we start removing anything create equivalence
+ // sets for the llvm.dbg.value intrinsics.
+ EqualValuesMap DbgValueToEqualSet;
+ DbgGatherEqualValues(L, SE, DbgValueToEqualSet);
+
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- SCEVExpander Rewriter(SE, DL, "lsr");
+ SCEVExpander Rewriter(SE, DL, "lsr", false);
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
@@ -5792,6 +5866,9 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
}
}
+
+ DbgApplyEqualValues(DbgValueToEqualSet);
+
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 285cba6ee205..495906e1a763 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -41,6 +41,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
@@ -287,6 +288,13 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
None, None, None, None, None);
TargetTransformInfo::PeelingPreferences PP =
gatherPeelingPreferences(L, SE, TTI, None, None);
+
+ TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
+ if (EnableMode & TM_Disable)
+ return LoopUnrollResult::Unmodified;
+ if (EnableMode & TM_ForcedByUser)
+ UP.UnrollAndJam = true;
+
if (AllowUnrollAndJam.getNumOccurrences() > 0)
UP.UnrollAndJam = AllowUnrollAndJam;
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
@@ -299,10 +307,6 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
<< L->getHeader()->getParent()->getName() << "] Loop %"
<< L->getHeader()->getName() << "\n");
- TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
- if (EnableMode & TM_Disable)
- return LoopUnrollResult::Unmodified;
-
// A loop with any unroll pragma (enabling/disabling/count/etc) is left for
// the unroller, so long as it does not explicitly have unroll_and_jam
// metadata. This means #pragma nounroll will disable unroll and jam as well
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 87f40bb7ba85..1b974576a3cc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -56,6 +56,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
@@ -75,13 +76,19 @@ using namespace llvm;
cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
"forget-scev-loop-unroll", cl::init(false), cl::Hidden,
cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
- " the current top-most loop. This is somtimes preferred to reduce"
+ " the current top-most loop. This is sometimes preferred to reduce"
" compile time."));
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::Hidden,
cl::desc("The cost threshold for loop unrolling"));
+static cl::opt<unsigned>
+ UnrollOptSizeThreshold(
+ "unroll-optsize-threshold", cl::init(0), cl::Hidden,
+ cl::desc("The cost threshold for loop unrolling when optimizing for "
+ "size"));
+
static cl::opt<unsigned> UnrollPartialThreshold(
"unroll-partial-threshold", cl::Hidden,
cl::desc("The cost threshold for partial loop unrolling"));
@@ -115,10 +122,6 @@ static cl::opt<unsigned> UnrollFullMaxCount(
cl::desc(
"Set the max unroll count for full unrolling, for testing purposes"));
-static cl::opt<unsigned> UnrollPeelCount(
- "unroll-peel-count", cl::Hidden,
- cl::desc("Set the unroll peeling count, for testing purposes"));
-
static cl::opt<bool>
UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
@@ -149,15 +152,6 @@ static cl::opt<unsigned> FlatLoopTripCountThreshold(
"threshold, the loop is considered as flat and will be less "
"aggressively unrolled."));
-static cl::opt<bool>
- UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
- cl::desc("Allows loops to be peeled when the dynamic "
- "trip count is known to be low."));
-
-static cl::opt<bool> UnrollAllowLoopNestsPeeling(
- "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden,
- cl::desc("Allows loop nests to be peeled."));
-
static cl::opt<bool> UnrollUnrollRemainder(
"unroll-remainder", cl::Hidden,
cl::desc("Allow the loop remainder to be unrolled."));
@@ -200,9 +194,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Threshold =
OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault;
UP.MaxPercentThresholdBoost = 400;
- UP.OptSizeThreshold = 0;
+ UP.OptSizeThreshold = UnrollOptSizeThreshold;
UP.PartialThreshold = 150;
- UP.PartialOptSizeThreshold = 0;
+ UP.PartialOptSizeThreshold = UnrollOptSizeThreshold;
UP.Count = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = std::numeric_limits<unsigned>::max();
@@ -224,8 +218,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
// Apply size attributes
bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
- llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
- PGSOQueryType::IRPass);
+ // Let unroll hints / pragmas take precedence over PGSO.
+ (hasUnrollTransformation(L) != TM_ForcedByUser &&
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+ PGSOQueryType::IRPass));
if (OptForSize) {
UP.Threshold = UP.OptSizeThreshold;
UP.PartialThreshold = UP.PartialOptSizeThreshold;
@@ -275,39 +271,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
return UP;
}
-TargetTransformInfo::PeelingPreferences
-llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
- const TargetTransformInfo &TTI,
- Optional<bool> UserAllowPeeling,
- Optional<bool> UserAllowProfileBasedPeeling) {
- TargetTransformInfo::PeelingPreferences PP;
-
- // Default values
- PP.PeelCount = 0;
- PP.AllowPeeling = true;
- PP.AllowLoopNestsPeeling = false;
- PP.PeelProfiledIterations = true;
-
- // Get Target Specifc Values
- TTI.getPeelingPreferences(L, SE, PP);
-
- // User Specified Values using cl::opt
- if (UnrollPeelCount.getNumOccurrences() > 0)
- PP.PeelCount = UnrollPeelCount;
- if (UnrollAllowPeeling.getNumOccurrences() > 0)
- PP.AllowPeeling = UnrollAllowPeeling;
- if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
- PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
-
- // User Specifed values provided by argument
- if (UserAllowPeeling.hasValue())
- PP.AllowPeeling = *UserAllowPeeling;
- if (UserAllowProfileBasedPeeling.hasValue())
- PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
-
- return PP;
-}
-
namespace {
/// A struct to densely store the state of an instruction after unrolling at
@@ -384,7 +347,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// Only analyze inner loops. We can't properly estimate cost of nested loops
// and we won't visit inner loops again anyway.
- if (!L->empty())
+ if (!L->isInnermost())
return None;
// Don't simulate loops with a big or unknown tripcount
@@ -426,6 +389,10 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
assert(CostWorklist.empty() && "Must start with an empty cost list");
assert(PHIUsedList.empty() && "Must start with an empty phi used list");
CostWorklist.push_back(&RootI);
+ TargetTransformInfo::TargetCostKind CostKind =
+ RootI.getFunction()->hasMinSize() ?
+ TargetTransformInfo::TCK_CodeSize :
+ TargetTransformInfo::TCK_SizeAndLatency;
for (;; --Iteration) {
do {
Instruction *I = CostWorklist.pop_back_val();
@@ -466,7 +433,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// First accumulate the cost of this instruction.
if (!Cost.IsFree) {
- UnrolledCost += TTI.getUserCost(I, TargetTransformInfo::TCK_CodeSize);
+ UnrolledCost += TTI.getUserCost(I, CostKind);
LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
<< Iteration << "): ");
LLVM_DEBUG(I->dump());
@@ -506,6 +473,9 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
+ TargetTransformInfo::TargetCostKind CostKind =
+ L->getHeader()->getParent()->hasMinSize() ?
+ TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency;
// Simulate execution of each iteration of the loop counting instructions,
// which would be simplified.
// Since the same load will take different values on different iterations,
@@ -559,7 +529,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// Track this instruction's expected baseline cost when executing the
// rolled loop form.
- RolledDynamicCost += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
+ RolledDynamicCost += TTI.getUserCost(&I, CostKind);
// Visit the instruction to analyze its loop cost after unrolling,
// and if the visitor returns true, mark the instruction as free after
@@ -881,7 +851,7 @@ bool llvm::computeUnrollCount(
}
// 4th priority is loop peeling.
- computePeelCount(L, LoopSize, UP, PP, TripCount, SE);
+ computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
@@ -1073,7 +1043,7 @@ static LoopUnrollResult tryToUnrollLoop(
return LoopUnrollResult::Unmodified;
}
- // When automtatic unrolling is disabled, do not unroll unless overridden for
+ // When automatic unrolling is disabled, do not unroll unless overridden for
// this loop.
if (OnlyWhenForced && !(TM & TM_Enable))
return LoopUnrollResult::Unmodified;
@@ -1087,7 +1057,7 @@ static LoopUnrollResult tryToUnrollLoop(
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedFullUnrollMaxCount);
TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
- L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling);
+ L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
// as threshold later on.
@@ -1135,7 +1105,7 @@ static LoopUnrollResult tryToUnrollLoop(
// If the loop contains a convergent operation, the prelude we'd add
// to do the first few instructions before we hit the unrolled loop
// is unsafe -- it adds a control-flow dependency to the convergent
- // operation. Therefore restrict remainder loop (try unrollig without).
+ // operation. Therefore restrict remainder loop (try unrolling without).
//
// TODO: This is quite conservative. In practice, convergent_op()
// is likely to be called unconditionally in the loop. In this
@@ -1331,7 +1301,7 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
bool ForgetAllSCEV) {
return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
- 0, 0, 0, 0);
+ 0, 0, 0, 1);
}
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -1359,7 +1329,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
OnlyWhenForced, ForgetSCEV, /*Count*/ None,
/*Threshold*/ None, /*AllowPartial*/ false,
/*Runtime*/ false, /*UpperBound*/ false,
- /*AllowPeeling*/ false,
+ /*AllowPeeling*/ true,
/*AllowProfileBasedPeeling*/ false,
/*FullUnrollMaxCount*/ None) !=
LoopUnrollResult::Unmodified;
@@ -1401,7 +1371,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
}
// Otherwise erase the loop from the list if it was in the old loops.
- return OldLoops.count(SibLoop) != 0;
+ return OldLoops.contains(SibLoop);
});
Updater.addSiblingLoops(SibLoops);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 645a89bbd0ff..822a786fc7c7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,6 +32,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -98,6 +99,12 @@ static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
+static cl::opt<unsigned>
+ MSSAThreshold("loop-unswitch-memoryssa-threshold",
+ cl::desc("Max number of memory uses to explore during "
+ "partial unswitching analysis"),
+ cl::init(100), cl::Hidden);
+
namespace {
class LUAnalysisCache {
@@ -184,6 +191,7 @@ namespace {
Loop *CurrentLoop = nullptr;
DominatorTree *DT = nullptr;
MemorySSA *MSSA = nullptr;
+ AAResults *AA = nullptr;
std::unique_ptr<MemorySSAUpdater> MSSAU;
BasicBlock *LoopHeader = nullptr;
BasicBlock *LoopPreheader = nullptr;
@@ -217,6 +225,10 @@ namespace {
/// loop preheaders be inserted into the CFG.
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // Lazy BFI and BPI are marked as preserved here so Loop Unswitching
+ // can remain part of the same loop pass as LICM
+ AU.addPreserved<LazyBlockFrequencyInfoPass>();
+ AU.addPreserved<LazyBranchProbabilityInfoPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
if (EnableMSSALoopDependency) {
@@ -244,19 +256,22 @@ namespace {
bool tryTrivialLoopUnswitch(bool &Changed);
bool unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI = nullptr);
+ Instruction *TI = nullptr,
+ ArrayRef<Instruction *> ToDuplicate = {});
void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
BasicBlock *ExitBlock, Instruction *TI);
void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
- Instruction *TI);
+ Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate = {});
void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val, bool IsEqual);
- void emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
- BasicBlock *TrueDest,
- BasicBlock *FalseDest,
- BranchInst *OldBranch, Instruction *TI);
+ void
+ emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest, BasicBlock *FalseDest,
+ BranchInst *OldBranch, Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate = {});
void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L);
@@ -523,6 +538,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LPM = &LPMRef;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
if (EnableMSSALoopDependency) {
MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
@@ -624,6 +640,145 @@ static bool equalityPropUnSafe(Value &LoopCond) {
return false;
}
+/// Check if the loop header has a conditional branch that is not
+/// loop-invariant, because it involves load instructions. If all paths from
+/// either the true or false successor to the header or loop exists do not
+/// modify the memory feeding the condition, perform 'partial unswitching'. That
+/// is, duplicate the instructions feeding the condition in the pre-header. Then
+/// unswitch on the duplicated condition. The condition is now known in the
+/// unswitched version for the 'invariant' path through the original loop.
+///
+/// If the branch condition of the header is partially invariant, return a pair
+/// containing the instructions to duplicate and a boolean Constant to update
+/// the condition in the loops created for the true or false successors.
+static std::pair<SmallVector<Instruction *, 4>, Constant *>
+hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
+ SmallVector<Instruction *, 4> ToDuplicate;
+
+ auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator());
+ if (!TI || !TI->isConditional())
+ return {};
+
+ auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
+ // The case with the condition outside the loop should already be handled
+ // earlier.
+ if (!CondI || !L->contains(CondI))
+ return {};
+
+ ToDuplicate.push_back(CondI);
+
+ SmallVector<Value *, 4> WorkList;
+ WorkList.append(CondI->op_begin(), CondI->op_end());
+
+ SmallVector<MemoryAccess *, 4> AccessesToCheck;
+ SmallVector<MemoryLocation, 4> AccessedLocs;
+ while (!WorkList.empty()) {
+ Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
+ if (!I || !L->contains(I))
+ continue;
+
+ // TODO: support additional instructions.
+ if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
+ return {};
+
+ // Do not duplicate volatile and atomic loads.
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (LI->isVolatile() || LI->isAtomic())
+ return {};
+
+ ToDuplicate.push_back(I);
+ if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
+ if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
+ // Queue the defining access to check for alias checks.
+ AccessesToCheck.push_back(MemUse->getDefiningAccess());
+ AccessedLocs.push_back(MemoryLocation::get(I));
+ } else {
+ // MemoryDefs may clobber the location or may be atomic memory
+ // operations. Bail out.
+ return {};
+ }
+ }
+ WorkList.append(I->op_begin(), I->op_end());
+ }
+
+ if (ToDuplicate.size() <= 1)
+ return {};
+
+ auto HasNoClobbersOnPath =
+ [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
+ SmallVector<MemoryAccess *, 4> AccessesToCheck) {
+ // First, collect all blocks in the loop that are on a patch from Succ
+ // to the header.
+ SmallVector<BasicBlock *, 4> WorkList;
+ WorkList.push_back(Succ);
+ WorkList.push_back(Header);
+ SmallPtrSet<BasicBlock *, 4> Seen;
+ Seen.insert(Header);
+ while (!WorkList.empty()) {
+ BasicBlock *Current = WorkList.pop_back_val();
+ if (!L->contains(Current))
+ continue;
+ const auto &SeenIns = Seen.insert(Current);
+ if (!SeenIns.second)
+ continue;
+
+ WorkList.append(succ_begin(Current), succ_end(Current));
+ }
+
+ // Require at least 2 blocks on a path through the loop. This skips
+ // paths that directly exit the loop.
+ if (Seen.size() < 2)
+ return false;
+
+ // Next, check if there are any MemoryDefs that are on the path through
+ // the loop (in the Seen set) and they may-alias any of the locations in
+ // AccessedLocs. If that is the case, they may modify the condition and
+ // partial unswitching is not possible.
+ SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
+ while (!AccessesToCheck.empty()) {
+ MemoryAccess *Current = AccessesToCheck.pop_back_val();
+ auto SeenI = SeenAccesses.insert(Current);
+ if (!SeenI.second || !Seen.contains(Current->getBlock()))
+ continue;
+
+ // Bail out if exceeded the threshold.
+ if (SeenAccesses.size() >= MSSAThreshold)
+ return false;
+
+ // MemoryUse are read-only accesses.
+ if (isa<MemoryUse>(Current))
+ continue;
+
+ // For a MemoryDef, check if is aliases any of the location feeding
+ // the original condition.
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
+ if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
+ return isModSet(
+ AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
+ }))
+ return false;
+ }
+
+ for (Use &U : Current->uses())
+ AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
+ }
+
+ return true;
+ };
+
+ // If we branch to the same successor, partial unswitching will not be
+ // beneficial.
+ if (TI->getSuccessor(0) == TI->getSuccessor(1))
+ return {};
+
+ if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck))
+ return {ToDuplicate, ConstantInt::getTrue(TI->getContext())};
+ if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck))
+ return {ToDuplicate, ConstantInt::getFalse(TI->getContext())};
+
+ return {};
+}
+
/// Do actual work and unswitch loop if possible and profitable.
bool LoopUnswitch::processCurrentLoop() {
bool Changed = false;
@@ -661,7 +816,7 @@ bool LoopUnswitch::processCurrentLoop() {
// FIXME: Use Function::hasOptSize().
if (OptimizeForSize ||
LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
- return false;
+ return Changed;
// Run through the instructions in the loop, keeping track of three things:
//
@@ -685,10 +840,10 @@ bool LoopUnswitch::processCurrentLoop() {
if (!CB)
continue;
if (CB->isConvergent())
- return false;
+ return Changed;
if (auto *II = dyn_cast<InvokeInst>(&I))
if (!II->getUnwindDest()->canSplitPredecessors())
- return false;
+ return Changed;
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::experimental_guard)
Guards.push_back(II);
@@ -823,6 +978,28 @@ bool LoopUnswitch::processCurrentLoop() {
}
}
}
+
+ // Check if there is a header condition that is invariant along the patch from
+ // either the true or false successors to the header. This allows unswitching
+ // conditions depending on memory accesses, if there's a path not clobbering
+ // the memory locations. Check if this transform has been disabled using
+ // metadata, to avoid unswitching the same loop multiple times.
+ if (MSSA &&
+ !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
+ auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA);
+ if (!ToDuplicate.first.empty()) {
+ LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
+ << *ToDuplicate.first[0] << "\n");
+ ++NumBranches;
+ unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second,
+ CurrentLoop->getHeader()->getTerminator(),
+ ToDuplicate.first);
+
+ RedoLoop = false;
+ return true;
+ }
+ }
+
return Changed;
}
@@ -880,7 +1057,8 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
/// simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI) {
+ Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate) {
// Check to see if it would be profitable to unswitch current loop.
if (!BranchesInfo.costAllowsUnswitching()) {
LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
@@ -900,31 +1078,69 @@ bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
return false;
}
- unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI);
+ unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate);
return true;
}
/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
/// otherwise branch to FalseDest. Insert the code immediately before OldBranch
/// and remove (but not erase!) it from the function.
-void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
- BasicBlock *TrueDest,
- BasicBlock *FalseDest,
- BranchInst *OldBranch,
- Instruction *TI) {
+void LoopUnswitch::emitPreheaderBranchOnCondition(
+ Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest,
+ BranchInst *OldBranch, Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate) {
assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
assert(TrueDest != FalseDest && "Branch targets should be different");
+
// Insert a conditional branch on LIC to the two preheaders. The original
// code is the true version and the new code is the false version.
Value *BranchVal = LIC;
bool Swapped = false;
- if (!isa<ConstantInt>(Val) ||
- Val->getType() != Type::getInt1Ty(LIC->getContext()))
- BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
- else if (Val != ConstantInt::getTrue(Val->getContext())) {
- // We want to enter the new loop when the condition is true.
- std::swap(TrueDest, FalseDest);
- Swapped = true;
+
+ if (!ToDuplicate.empty()) {
+ ValueToValueMapTy Old2New;
+ for (Instruction *I : reverse(ToDuplicate)) {
+ auto *New = I->clone();
+ New->insertBefore(OldBranch);
+ RemapInstruction(New, Old2New,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ Old2New[I] = New;
+
+ if (MSSAU) {
+ MemorySSA *MSSA = MSSAU->getMemorySSA();
+ auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I));
+ if (!MemA)
+ continue;
+
+ Loop *L = LI->getLoopFor(I->getParent());
+ auto *DefiningAccess = MemA->getDefiningAccess();
+ // Get the first defining access before the loop.
+ while (L->contains(DefiningAccess->getBlock())) {
+ // If the defining access is a MemoryPhi, get the incoming
+ // value for the pre-header as defining access.
+ if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
+ DefiningAccess =
+ MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
+ } else {
+ DefiningAccess =
+ cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
+ }
+ }
+ MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
+ MemorySSA::BeforeTerminator);
+ }
+ }
+ BranchVal = Old2New[ToDuplicate[0]];
+ } else {
+
+ if (!isa<ConstantInt>(Val) ||
+ Val->getType() != Type::getInt1Ty(LIC->getContext()))
+ BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
+ else if (Val != ConstantInt::getTrue(Val->getContext())) {
+ // We want to enter the new loop when the condition is true.
+ std::swap(TrueDest, FalseDest);
+ Swapped = true;
+ }
}
// Old branch will be removed, so save its parent and successor to update the
@@ -955,10 +1171,11 @@ void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) {
Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc});
}
- DT->applyUpdates(Updates);
if (MSSAU)
- MSSAU->applyUpdates(Updates, *DT);
+ MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
+ else
+ DT->applyUpdates(Updates);
}
// If either edge is critical, split it. This helps preserve LoopSimplify
@@ -1207,8 +1424,9 @@ void LoopUnswitch::splitExitEdges(
/// We determined that the loop is profitable to unswitch when LIC equal Val.
/// Split it into loop versions and test the condition outside of either loop.
/// Return the loops created as Out1/Out2.
-void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
- Loop *L, Instruction *TI) {
+void LoopUnswitch::unswitchNontrivialCondition(
+ Value *LIC, Constant *Val, Loop *L, Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate) {
Function *F = LoopHeader->getParent();
LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
<< LoopHeader->getName() << " [" << L->getBlocks().size()
@@ -1233,7 +1451,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
LoopBlocks.push_back(NewPreheader);
// We want the loop to come after the preheader, but before the exit blocks.
- LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+ llvm::append_range(LoopBlocks, L->blocks());
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -1247,7 +1465,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
L->getUniqueExitBlocks(ExitBlocks);
// Add exit blocks to the loop blocks.
- LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
+ llvm::append_range(LoopBlocks, ExitBlocks);
// Next step, clone all of the basic blocks that make up the loop (including
// the loop preheader and exit blocks), keeping track of the mapping between
@@ -1340,7 +1558,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
// Emit the new branch that selects between the two versions of this loop.
emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
- TI);
+ TI, ToDuplicate);
if (MSSAU) {
// Update MemoryPhis in Exit blocks.
MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
@@ -1362,17 +1580,38 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
// iteration.
WeakTrackingVH LICHandle(LIC);
- // Now we rewrite the original code to know that the condition is true and the
- // new code to know that the condition is false.
- rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
-
- // It's possible that simplifying one loop could cause the other to be
- // changed to another value or a constant. If its a constant, don't simplify
- // it.
- if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
- LICHandle && !isa<Constant>(LICHandle))
- rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
- /*IsEqual=*/true);
+ if (ToDuplicate.empty()) {
+ // Now we rewrite the original code to know that the condition is true and
+ // the new code to know that the condition is false.
+ rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
+
+ // It's possible that simplifying one loop could cause the other to be
+ // changed to another value or a constant. If its a constant, don't
+ // simplify it.
+ if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+ LICHandle && !isa<Constant>(LICHandle))
+ rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
+ /*IsEqual=*/true);
+ } else {
+ // Partial unswitching. Update the condition in the right loop with the
+ // constant.
+ auto *CC = cast<ConstantInt>(Val);
+ if (CC->isOneValue()) {
+ rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val,
+ /*IsEqual=*/true);
+ } else
+ rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true);
+
+ // Mark the new loop as partially unswitched, to avoid unswitching on the
+ // same condition again.
+ auto &Context = NewLoop->getHeader()->getContext();
+ MDNode *DisableUnswitchMD = MDNode::get(
+ Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
+ MDNode *NewLoopID = makePostTransformationMetadata(
+ Context, L->getLoopID(), {"llvm.loop.unswitch.partial"},
+ {DisableUnswitchMD});
+ NewLoop->setLoopID(NewLoopID);
+ }
if (MSSA && VerifyMemorySSA)
MSSA->verifyMemorySSA();
@@ -1381,9 +1620,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
/// Remove all instances of I from the worklist vector specified.
static void removeFromWorklist(Instruction *I,
std::vector<Instruction *> &Worklist) {
-
- Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I),
- Worklist.end());
+ llvm::erase_value(Worklist, I);
}
/// When we find that I really equals V, remove I from the
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 06b684ef1e70..2ff1e8480749 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -59,6 +59,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -114,17 +115,18 @@ static cl::opt<unsigned> LVLoopDepthThreshold(
namespace {
-struct LoopVersioningLICM : public LoopPass {
+struct LoopVersioningLICMLegacyPass : public LoopPass {
static char ID;
- LoopVersioningLICM()
- : LoopPass(ID), LoopDepthThreshold(LVLoopDepthThreshold),
- InvariantThreshold(LVInvarThreshold) {
- initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry());
+ LoopVersioningLICMLegacyPass() : LoopPass(ID) {
+ initializeLoopVersioningLICMLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ StringRef getPassName() const override { return "Loop Versioning for LICM"; }
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
@@ -138,13 +140,25 @@ struct LoopVersioningLICM : public LoopPass {
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
+};
- StringRef getPassName() const override { return "Loop Versioning for LICM"; }
+struct LoopVersioningLICM {
+ // We don't explicitly pass in LoopAccessInfo to the constructor since the
+ // loop versioning might return early due to instructions that are not safe
+ // for versioning. By passing the proxy instead the construction of
+ // LoopAccessInfo will take place only when it's necessary.
+ LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE,
+ OptimizationRemarkEmitter *ORE,
+ function_ref<const LoopAccessInfo &(Loop *)> GetLAI)
+ : AA(AA), SE(SE), GetLAI(GetLAI),
+ LoopDepthThreshold(LVLoopDepthThreshold),
+ InvariantThreshold(LVInvarThreshold), ORE(ORE) {}
+
+ bool runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT);
void reset() {
AA = nullptr;
SE = nullptr;
- LAA = nullptr;
CurLoop = nullptr;
LoadAndStoreCounter = 0;
InvariantCounter = 0;
@@ -169,12 +183,12 @@ private:
// Current ScalarEvolution
ScalarEvolution *SE = nullptr;
- // Current LoopAccessAnalysis
- LoopAccessLegacyAnalysis *LAA = nullptr;
-
// Current Loop's LoopAccessInfo
const LoopAccessInfo *LAI = nullptr;
+ // Proxy for retrieving LoopAccessInfo.
+ function_ref<const LoopAccessInfo &(Loop *)> GetLAI;
+
// The current loop we are working on.
Loop *CurLoop = nullptr;
@@ -253,7 +267,7 @@ bool LoopVersioningLICM::legalLoopStructure() {
// We need to be able to compute the loop trip count in order
// to generate the bound checks.
const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop);
- if (ExitCount == SE->getCouldNotCompute()) {
+ if (isa<SCEVCouldNotCompute>(ExitCount)) {
LLVM_DEBUG(dbgs() << " loop does not has trip count\n");
return false;
}
@@ -400,8 +414,8 @@ bool LoopVersioningLICM::legalLoopInstructions() {
return false;
}
}
- // Get LoopAccessInfo from current loop.
- LAI = &LAA->getInfo(CurLoop);
+ // Get LoopAccessInfo from current loop via the proxy.
+ LAI = &GetLAI(CurLoop);
// Check LoopAccessInfo for need of runtime check.
if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
@@ -539,8 +553,8 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
MDBuilder MDB(I->getContext());
MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain");
StringRef Name = "LVAliasScope";
- SmallVector<Metadata *, 4> Scopes, NoAliases;
MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+ SmallVector<Metadata *, 4> Scopes{NewScope}, NoAliases{NewScope};
// Iterate over each instruction of loop.
// set no-alias for all load & store instructions.
for (auto *Block : CurLoop->getBlocks()) {
@@ -548,8 +562,6 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
// Only interested in instruction that may modify or read memory.
if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory())
continue;
- Scopes.push_back(NewScope);
- NoAliases.push_back(NewScope);
// Set no-alias for current instruction.
Inst.setMetadata(
LLVMContext::MD_noalias,
@@ -564,30 +576,38 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
}
}
-bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopVersioningLICMLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ OptimizationRemarkEmitter *ORE =
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & {
+ return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(L);
+ };
+
+ return LoopVersioningLICM(AA, SE, ORE, GetLAI).runOnLoop(L, LI, DT);
+}
+
+bool LoopVersioningLICM::runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT) {
// This will automatically release all resources hold by the current
// LoopVersioningLICM object.
AutoResetter Resetter(*this);
- if (skipLoop(L))
- return false;
-
// Do not do the transformation if disabled by metadata.
if (hasLICMVersioningTransformation(L) & TM_Disable)
return false;
- // Get Analysis information.
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
- ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- LAI = nullptr;
// Set Current Loop
CurLoop = L;
CurAST.reset(new AliasSetTracker(*AA));
// Loop over the body of this loop, construct AST.
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
for (auto *Block : L->getBlocks()) {
if (LI->getLoopFor(Block) == L) // Ignore blocks in subloop.
CurAST->add(*Block); // Incorporate the specified basic block
@@ -602,8 +622,8 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Do loop versioning.
// Create memcheck for memory accessed inside loop.
// Clone original loop, and set blocks properly.
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopVersioning LVer(*LAI, CurLoop, LI, DT, SE, true);
+ LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
+ CurLoop, LI, DT, SE);
LVer.versionLoop();
// Set Loop Versioning metaData for original loop.
addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData);
@@ -621,9 +641,9 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
}
-char LoopVersioningLICM::ID = 0;
+char LoopVersioningLICMLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopVersioningLICM, "loop-versioning-licm",
+INITIALIZE_PASS_BEGIN(LoopVersioningLICMLegacyPass, "loop-versioning-licm",
"Loop Versioning For LICM", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -634,7 +654,31 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(LoopVersioningLICM, "loop-versioning-licm",
+INITIALIZE_PASS_END(LoopVersioningLICMLegacyPass, "loop-versioning-licm",
"Loop Versioning For LICM", false, false)
-Pass *llvm::createLoopVersioningLICMPass() { return new LoopVersioningLICM(); }
+Pass *llvm::createLoopVersioningLICMPass() {
+ return new LoopVersioningLICMLegacyPass();
+}
+
+namespace llvm {
+
+PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &LAR,
+ LPMUpdater &U) {
+ AliasAnalysis *AA = &LAR.AA;
+ ScalarEvolution *SE = &LAR.SE;
+ DominatorTree *DT = &LAR.DT;
+ LoopInfo *LI = &LAR.LI;
+ const Function *F = L.getHeader()->getParent();
+ OptimizationRemarkEmitter ORE(F);
+
+ auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & {
+ return AM.getResult<LoopAccessAnalysis>(*L, LAR);
+ };
+
+ if (!LoopVersioningLICM(AA, SE, &ORE, GetLAI).runOnLoop(&L, LI, DT))
+ return PreservedAnalyses::all();
+ return getLoopPassPreservedAnalyses();
+}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index fddf28c281fc..bb30c48127a0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -43,10 +43,10 @@ STATISTIC(ObjectSizeIntrinsicsHandled,
"Number of 'objectsize' intrinsic calls handled");
static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) {
- Value *Op = II->getOperand(0);
-
- return isa<Constant>(Op) ? ConstantInt::getTrue(II->getType())
- : ConstantInt::getFalse(II->getType());
+ if (auto *C = dyn_cast<Constant>(II->getOperand(0)))
+ if (C->isManifestConstant())
+ return ConstantInt::getTrue(II->getType());
+ return ConstantInt::getFalse(II->getType());
}
static bool replaceConditionalBranchesOnConstant(Instruction *II,
@@ -78,7 +78,7 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II,
Other->removePredecessor(Source);
BI->eraseFromParent();
BranchInst::Create(Target, Source);
- if (pred_begin(Other) == pred_end(Other))
+ if (pred_empty(Other))
HasDeadBlocks = true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 0fe7dd9cfb39..da13075dfee2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -24,10 +24,8 @@
#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/MisExpect.h"
using namespace llvm;
@@ -48,10 +46,10 @@ STATISTIC(ExpectIntrinsicsHandled,
// 'select' instructions. It may be worthwhile to hoist these values to some
// shared space, so they can be used directly by other passes.
-static cl::opt<uint32_t> LikelyBranchWeight(
+cl::opt<uint32_t> llvm::LikelyBranchWeight(
"likely-branch-weight", cl::Hidden, cl::init(2000),
cl::desc("Weight of the branch likely to be taken (default = 2000)"));
-static cl::opt<uint32_t> UnlikelyBranchWeight(
+cl::opt<uint32_t> llvm::UnlikelyBranchWeight(
"unlikely-branch-weight", cl::Hidden, cl::init(1),
cl::desc("Weight of the branch unlikely to be taken (default = 1)"));
@@ -102,13 +100,7 @@ static bool handleSwitchExpect(SwitchInst &SI) {
uint64_t Index = (Case == *SI.case_default()) ? 0 : Case.getCaseIndex() + 1;
Weights[Index] = LikelyBranchWeightVal;
- SI.setMetadata(LLVMContext::MD_misexpect,
- MDBuilder(CI->getContext())
- .createMisExpect(Index, LikelyBranchWeightVal,
- UnlikelyBranchWeightVal));
-
SI.setCondition(ArgValue);
- misexpect::checkFrontendInstrumentation(SI);
SI.setMetadata(LLVMContext::MD_prof,
MDBuilder(CI->getContext()).createBranchWeights(Weights));
@@ -317,7 +309,6 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
MDBuilder MDB(CI->getContext());
MDNode *Node;
- MDNode *ExpNode;
uint32_t LikelyBranchWeightVal, UnlikelyBranchWeightVal;
std::tie(LikelyBranchWeightVal, UnlikelyBranchWeightVal) =
@@ -327,24 +318,16 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
(Predicate == CmpInst::ICMP_EQ)) {
Node =
MDB.createBranchWeights(LikelyBranchWeightVal, UnlikelyBranchWeightVal);
- ExpNode =
- MDB.createMisExpect(0, LikelyBranchWeightVal, UnlikelyBranchWeightVal);
} else {
Node =
MDB.createBranchWeights(UnlikelyBranchWeightVal, LikelyBranchWeightVal);
- ExpNode =
- MDB.createMisExpect(1, LikelyBranchWeightVal, UnlikelyBranchWeightVal);
}
- BSI.setMetadata(LLVMContext::MD_misexpect, ExpNode);
-
if (CmpI)
CmpI->setOperand(0, ArgValue);
else
BSI.setCondition(ArgValue);
- misexpect::checkFrontendInstrumentation(BSI);
-
BSI.setMetadata(LLVMContext::MD_prof, Node);
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 90314b17b5e2..8e251ca940a3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -42,6 +42,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/MatrixUtils.h"
using namespace llvm;
using namespace PatternMatch;
@@ -61,6 +63,9 @@ static cl::opt<unsigned> TileSize(
"fuse-matrix-tile-size", cl::init(4), cl::Hidden,
cl::desc(
"Tile size for matrix instruction fusion using square-shaped tiles."));
+static cl::opt<bool> TileUseLoops("fuse-matrix-use-loops", cl::init(false),
+ cl::Hidden,
+ cl::desc("Generate loop nest for tiling."));
static cl::opt<bool> ForceFusion(
"force-fuse-matrix", cl::init(false), cl::Hidden,
cl::desc("Force matrix instruction fusion even if not profitable."));
@@ -182,10 +187,10 @@ class LowerMatrixIntrinsics {
Function &Func;
const DataLayout &DL;
const TargetTransformInfo &TTI;
- AliasAnalysis &AA;
- DominatorTree &DT;
- LoopInfo &LI;
- OptimizationRemarkEmitter &ORE;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ OptimizationRemarkEmitter *ORE;
/// Contains estimates of the number of operations (loads, stores, compute) required to lower a matrix operation.
struct OpInfoTy {
@@ -241,7 +246,7 @@ class LowerMatrixIntrinsics {
void setVector(unsigned i, Value *V) { Vectors[i] = V; }
- Type *getElementType() { return getVectorTy()->getElementType(); }
+ Type *getElementType() const { return getVectorTy()->getElementType(); }
unsigned getNumVectors() const {
if (isColumnMajor())
@@ -271,7 +276,7 @@ class LowerMatrixIntrinsics {
return getVectorTy();
}
- VectorType *getVectorTy() {
+ VectorType *getVectorTy() const {
return cast<VectorType>(Vectors[0]->getType());
}
@@ -329,9 +334,8 @@ class LowerMatrixIntrinsics {
Value *extractVector(unsigned I, unsigned J, unsigned NumElts,
IRBuilder<> &Builder) const {
Value *Vec = isColumnMajor() ? getColumn(J) : getRow(I);
- Value *Undef = UndefValue::get(Vec->getType());
return Builder.CreateShuffleVector(
- Vec, Undef, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
+ Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
"block");
}
};
@@ -393,8 +397,8 @@ class LowerMatrixIntrinsics {
public:
LowerMatrixIntrinsics(Function &F, TargetTransformInfo &TTI,
- AliasAnalysis &AA, DominatorTree &DT, LoopInfo &LI,
- OptimizationRemarkEmitter &ORE)
+ AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
+ OptimizationRemarkEmitter *ORE)
: Func(F), DL(F.getParent()->getDataLayout()), TTI(TTI), AA(AA), DT(DT),
LI(LI), ORE(ORE) {}
@@ -442,12 +446,11 @@ public:
// Otherwise split MatrixVal.
SmallVector<Value *, 16> SplitVecs;
- Value *Undef = UndefValue::get(VType);
for (unsigned MaskStart = 0;
MaskStart < cast<FixedVectorType>(VType)->getNumElements();
MaskStart += SI.getStride()) {
Value *V = Builder.CreateShuffleVector(
- MatrixVal, Undef, createSequentialMask(MaskStart, SI.getStride(), 0),
+ MatrixVal, createSequentialMask(MaskStart, SI.getStride(), 0),
"split");
SplitVecs.push_back(V);
}
@@ -485,6 +488,7 @@ public:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul: // Scalar multiply.
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::Mul:
case Instruction::Sub:
@@ -527,8 +531,7 @@ public:
// list.
LLVM_DEBUG(dbgs() << "Forward-propagate shapes:\n");
while (!WorkList.empty()) {
- Instruction *Inst = WorkList.back();
- WorkList.pop_back();
+ Instruction *Inst = WorkList.pop_back_val();
// New entry, set the value and insert operands
bool Propagate = false;
@@ -598,8 +601,7 @@ public:
// worklist.
LLVM_DEBUG(dbgs() << "Backward-propagate shapes:\n");
while (!WorkList.empty()) {
- Value *V = WorkList.back();
- WorkList.pop_back();
+ Value *V = WorkList.pop_back_val();
size_t BeforeProcessingV = WorkList.size();
if (!isa<Instruction>(V))
@@ -721,14 +723,18 @@ public:
Value *Op2;
if (auto *BinOp = dyn_cast<BinaryOperator>(Inst))
Changed |= VisitBinaryOperator(BinOp);
+ if (auto *UnOp = dyn_cast<UnaryOperator>(Inst))
+ Changed |= VisitUnaryOperator(UnOp);
if (match(Inst, m_Load(m_Value(Op1))))
Changed |= VisitLoad(cast<LoadInst>(Inst), Op1, Builder);
else if (match(Inst, m_Store(m_Value(Op1), m_Value(Op2))))
Changed |= VisitStore(cast<StoreInst>(Inst), Op1, Op2, Builder);
}
- RemarkGenerator RemarkGen(Inst2ColumnMatrix, ORE, Func);
- RemarkGen.emitRemarks();
+ if (ORE) {
+ RemarkGenerator RemarkGen(Inst2ColumnMatrix, *ORE, Func);
+ RemarkGen.emitRemarks();
+ }
for (Instruction *Inst : reverse(ToRemove))
Inst->eraseFromParent();
@@ -934,10 +940,8 @@ public:
unsigned NumElts = cast<FixedVectorType>(Col->getType())->getNumElements();
assert(NumElts >= BlockNumElts && "Too few elements for current block");
- Value *Undef = UndefValue::get(Block->getType());
Block = Builder.CreateShuffleVector(
- Block, Undef,
- createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts));
+ Block, createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts));
// If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7,
// 8, 4, 5, 6
@@ -1085,7 +1089,7 @@ public:
MemoryLocation StoreLoc = MemoryLocation::get(Store);
MemoryLocation LoadLoc = MemoryLocation::get(Load);
- AliasResult LdAliased = AA.alias(LoadLoc, StoreLoc);
+ AliasResult LdAliased = AA->alias(LoadLoc, StoreLoc);
// If we can statically determine noalias we're good.
if (!LdAliased)
@@ -1101,14 +1105,17 @@ public:
// as we adjust Check0 and Check1's branches.
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
for (BasicBlock *Succ : successors(Check0))
- DTUpdates.push_back({DT.Delete, Check0, Succ});
+ DTUpdates.push_back({DT->Delete, Check0, Succ});
- BasicBlock *Check1 = SplitBlock(MatMul->getParent(), MatMul, nullptr, &LI,
- nullptr, "alias_cont");
+ BasicBlock *Check1 =
+ SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
+ nullptr, "alias_cont");
BasicBlock *Copy =
- SplitBlock(MatMul->getParent(), MatMul, nullptr, &LI, nullptr, "copy");
- BasicBlock *Fusion = SplitBlock(MatMul->getParent(), MatMul, nullptr, &LI,
- nullptr, "no_alias");
+ SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
+ nullptr, "copy");
+ BasicBlock *Fusion =
+ SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
+ nullptr, "no_alias");
// Check if the loaded memory location begins before the end of the store
// location. If the condition holds, they might overlap, otherwise they are
@@ -1152,11 +1159,11 @@ public:
PHI->addIncoming(NewLd, Copy);
// Adjust DT.
- DTUpdates.push_back({DT.Insert, Check0, Check1});
- DTUpdates.push_back({DT.Insert, Check0, Fusion});
- DTUpdates.push_back({DT.Insert, Check1, Copy});
- DTUpdates.push_back({DT.Insert, Check1, Fusion});
- DT.applyUpdates(DTUpdates);
+ DTUpdates.push_back({DT->Insert, Check0, Check1});
+ DTUpdates.push_back({DT->Insert, Check0, Fusion});
+ DTUpdates.push_back({DT->Insert, Check1, Copy});
+ DTUpdates.push_back({DT->Insert, Check1, Fusion});
+ DT->applyUpdates(DTUpdates);
return PHI;
}
@@ -1202,6 +1209,63 @@ public:
return Res;
}
+ void createTiledLoops(CallInst *MatMul, Value *LPtr, ShapeInfo LShape,
+ Value *RPtr, ShapeInfo RShape, StoreInst *Store,
+ bool AllowContract) {
+ auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
+
+ // Create the main tiling loop nest.
+ TileInfo TI(LShape.NumRows, RShape.NumColumns, LShape.NumColumns, TileSize);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ Instruction *InsertI = cast<Instruction>(MatMul);
+ BasicBlock *Start = InsertI->getParent();
+ BasicBlock *End =
+ SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue");
+ IRBuilder<> Builder(MatMul);
+ BasicBlock *InnerBody = TI.CreateTiledLoops(Start, End, Builder, DTU, *LI);
+
+ Type *TileVecTy =
+ FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize);
+ MatrixTy TileResult;
+ // Insert in the inner loop header.
+ Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator());
+ // Create PHI nodes for the result columns to accumulate across iterations.
+ SmallVector<PHINode *, 4> ColumnPhis;
+ for (unsigned I = 0; I < TileSize; I++) {
+ auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I));
+ Phi->addIncoming(ConstantAggregateZero::get(TileVecTy),
+ TI.RowLoopHeader->getSingleSuccessor());
+ TileResult.addVector(Phi);
+ ColumnPhis.push_back(Phi);
+ }
+
+ // Insert in the inner loop body, which computes
+ // Res += Load(CurrentRow, K) * Load(K, CurrentColumn)
+ Builder.SetInsertPoint(InnerBody->getTerminator());
+ // Load tiles of the operands.
+ MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK,
+ {TileSize, TileSize}, EltType, Builder);
+ MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol,
+ {TileSize, TileSize}, EltType, Builder);
+ emitMatrixMultiply(TileResult, A, B, AllowContract, Builder, true);
+ // Store result after the inner loop is done.
+ Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator());
+ storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(),
+ Store->isVolatile(), {LShape.NumRows, RShape.NumColumns},
+ TI.CurrentRow, TI.CurrentCol, EltType, Builder);
+
+ for (unsigned I = 0; I < TileResult.getNumVectors(); I++)
+ ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch);
+
+ // Force unrolling of a few iterations of the inner loop, to make sure there
+ // is enough work per iteration.
+ // FIXME: The unroller should make this decision directly instead, but
+ // currently the cost-model is not up to the task.
+ unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize);
+ addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader),
+ "llvm.loop.unroll.count", InnerLoopUnrollCount);
+ }
+
void emitSIMDTiling(CallInst *MatMul, LoadInst *LoadOp0, LoadInst *LoadOp1,
StoreInst *Store,
SmallPtrSetImpl<Instruction *> &FusedInsts) {
@@ -1224,28 +1288,34 @@ public:
bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) &&
MatMul->hasAllowContract());
- IRBuilder<> Builder(Store);
- for (unsigned J = 0; J < C; J += TileSize)
- for (unsigned I = 0; I < R; I += TileSize) {
- const unsigned TileR = std::min(R - I, unsigned(TileSize));
- const unsigned TileC = std::min(C - J, unsigned(TileSize));
- MatrixTy Res = getZeroMatrix(EltType, TileR, TileC);
-
- for (unsigned K = 0; K < M; K += TileSize) {
- const unsigned TileM = std::min(M - K, unsigned(TileSize));
- MatrixTy A =
- loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(),
- LShape, Builder.getInt64(I), Builder.getInt64(K),
- {TileR, TileM}, EltType, Builder);
- MatrixTy B =
- loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(),
- RShape, Builder.getInt64(K), Builder.getInt64(J),
- {TileM, TileC}, EltType, Builder);
- emitMatrixMultiply(Res, A, B, AllowContract, Builder, true);
+ if (TileUseLoops && (R % TileSize == 0 && C % TileSize == 0))
+ createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store,
+ AllowContract);
+ else {
+ IRBuilder<> Builder(Store);
+ for (unsigned J = 0; J < C; J += TileSize)
+ for (unsigned I = 0; I < R; I += TileSize) {
+ const unsigned TileR = std::min(R - I, unsigned(TileSize));
+ const unsigned TileC = std::min(C - J, unsigned(TileSize));
+ MatrixTy Res = getZeroMatrix(EltType, TileR, TileC);
+
+ for (unsigned K = 0; K < M; K += TileSize) {
+ const unsigned TileM = std::min(M - K, unsigned(TileSize));
+ MatrixTy A =
+ loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(),
+ LShape, Builder.getInt64(I), Builder.getInt64(K),
+ {TileR, TileM}, EltType, Builder);
+ MatrixTy B =
+ loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(),
+ RShape, Builder.getInt64(K), Builder.getInt64(J),
+ {TileM, TileC}, EltType, Builder);
+ emitMatrixMultiply(Res, A, B, AllowContract, Builder, true);
+ }
+ storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M},
+ Builder.getInt64(I), Builder.getInt64(J), EltType,
+ Builder);
}
- storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M},
- Builder.getInt64(I), Builder.getInt64(J), EltType, Builder);
- }
+ }
// Mark eliminated instructions as fused and remove them.
FusedInsts.insert(Store);
@@ -1272,9 +1342,11 @@ public:
void LowerMatrixMultiplyFused(CallInst *MatMul,
SmallPtrSetImpl<Instruction *> &FusedInsts) {
if (!FuseMatrix || !MatMul->hasOneUse() ||
- MatrixLayout != MatrixLayoutTy::ColumnMajor)
+ MatrixLayout != MatrixLayoutTy::ColumnMajor || !DT)
return;
+ assert(AA && LI && "Analyses should be available");
+
auto *LoadOp0 = dyn_cast<LoadInst>(MatMul->getOperand(0));
auto *LoadOp1 = dyn_cast<LoadInst>(MatMul->getOperand(1));
auto *Store = dyn_cast<StoreInst>(*MatMul->user_begin());
@@ -1283,7 +1355,7 @@ public:
// we create invalid IR.
// FIXME: See if we can hoist the store address computation.
auto *AddrI = dyn_cast<Instruction>(Store->getOperand(1));
- if (AddrI && (!DT.dominates(AddrI, MatMul)))
+ if (AddrI && (!DT->dominates(AddrI, MatMul)))
return;
emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts);
@@ -1300,6 +1372,8 @@ public:
const MatrixTy &Lhs = getMatrix(MatMul->getArgOperand(0), LShape, Builder);
const MatrixTy &Rhs = getMatrix(MatMul->getArgOperand(1), RShape, Builder);
+ assert(Lhs.getElementType() == Rhs.getElementType() &&
+ "Matrix multiply argument element types do not match.");
const unsigned R = LShape.NumRows;
const unsigned C = RShape.NumColumns;
@@ -1307,6 +1381,8 @@ public:
// Initialize the output
MatrixTy Result(R, C, EltType);
+ assert(Lhs.getElementType() == Result.getElementType() &&
+ "Matrix multiply result element type does not match arguments.");
bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) &&
MatMul->hasAllowContract());
@@ -1424,6 +1500,40 @@ public:
return true;
}
+ /// Lower unary operators, if shape information is available.
+ bool VisitUnaryOperator(UnaryOperator *Inst) {
+ auto I = ShapeMap.find(Inst);
+ if (I == ShapeMap.end())
+ return false;
+
+ Value *Op = Inst->getOperand(0);
+
+ IRBuilder<> Builder(Inst);
+ ShapeInfo &Shape = I->second;
+
+ MatrixTy Result;
+ MatrixTy M = getMatrix(Op, Shape, Builder);
+
+ // Helper to perform unary op on vectors.
+ auto BuildVectorOp = [&Builder, Inst](Value *Op) {
+ switch (Inst->getOpcode()) {
+ case Instruction::FNeg:
+ return Builder.CreateFNeg(Op);
+ default:
+ llvm_unreachable("Unsupported unary operator for matrix");
+ }
+ };
+
+ for (unsigned I = 0; I < Shape.getNumVectors(); ++I)
+ Result.addVector(BuildVectorOp(M.getVector(I)));
+
+ finalizeLowering(Inst,
+ Result.addNumComputeOps(getNumOps(Result.getVectorTy()) *
+ Result.getNumVectors()),
+ Builder);
+ return true;
+ }
+
/// Helper to linearize a matrix expression tree into a string. Currently
/// matrix expressions are linarized by starting at an expression leaf and
/// linearizing bottom up.
@@ -1488,7 +1598,7 @@ public:
if (Value *Ptr = getPointerOperand(V))
return getUnderlyingObjectThroughLoads(Ptr);
else if (V->getType()->isPointerTy())
- return GetUnderlyingObject(V, DL);
+ return getUnderlyingObject(V);
return V;
}
@@ -1524,7 +1634,7 @@ public:
write(StringRef(Intrinsic::getName(II->getIntrinsicID(), {}))
.drop_front(StringRef("llvm.matrix.").size()));
write(".");
- std::string Tmp = "";
+ std::string Tmp;
raw_string_ostream SS(Tmp);
switch (II->getIntrinsicID()) {
@@ -1737,7 +1847,6 @@ public:
for (Value *Op : cast<Instruction>(V)->operand_values())
collectSharedInfo(Leaf, Op, ExprsInSubprogram, Shared);
- return;
}
/// Calculate the number of exclusive and shared op counts for expression
@@ -1863,15 +1972,25 @@ public:
PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- auto &AA = AM.getResult<AAManager>(F);
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &LI = AM.getResult<LoopAnalysis>(F);
+ OptimizationRemarkEmitter *ORE = nullptr;
+ AAResults *AA = nullptr;
+ DominatorTree *DT = nullptr;
+ LoopInfo *LI = nullptr;
+
+ if (!Minimal) {
+ ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ AA = &AM.getResult<AAManager>(F);
+ DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ LI = &AM.getResult<LoopAnalysis>(F);
+ }
LowerMatrixIntrinsics LMT(F, TTI, AA, DT, LI, ORE);
if (LMT.Visit()) {
PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
+ if (!Minimal) {
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ }
return PA;
}
return PreservedAnalyses::all();
@@ -1894,7 +2013,7 @@ public:
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LowerMatrixIntrinsics LMT(F, TTI, AA, DT, LI, ORE);
+ LowerMatrixIntrinsics LMT(F, TTI, &AA, &DT, &LI, &ORE);
bool C = LMT.Visit();
return C;
}
@@ -1925,3 +2044,45 @@ INITIALIZE_PASS_END(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name,
Pass *llvm::createLowerMatrixIntrinsicsPass() {
return new LowerMatrixIntrinsicsLegacyPass();
}
+
+namespace {
+
+/// A lightweight version of the matrix lowering pass that only requires TTI.
+/// Advanced features that require DT, AA or ORE like tiling are disabled. This
+/// is used to lower matrix intrinsics if the main lowering pass is not run, for
+/// example with -O0.
+class LowerMatrixIntrinsicsMinimalLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ LowerMatrixIntrinsicsMinimalLegacyPass() : FunctionPass(ID) {
+ initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LowerMatrixIntrinsics LMT(F, TTI, nullptr, nullptr, nullptr, nullptr);
+ bool C = LMT.Visit();
+ return C;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+} // namespace
+
+static const char pass_name_minimal[] = "Lower the matrix intrinsics (minimal)";
+char LowerMatrixIntrinsicsMinimalLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsMinimalLegacyPass,
+ "lower-matrix-intrinsics-minimal", pass_name_minimal,
+ false, false)
+INITIALIZE_PASS_END(LowerMatrixIntrinsicsMinimalLegacyPass,
+ "lower-matrix-intrinsics-minimal", pass_name_minimal, false,
+ false)
+
+Pass *llvm::createLowerMatrixIntrinsicsMinimalPass() {
+ return new LowerMatrixIntrinsicsMinimalLegacyPass();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 4b4196edc12b..a4e695497f30 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -21,8 +21,11 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -64,10 +67,15 @@ using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
+static cl::opt<bool>
+ EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(false), cl::Hidden,
+ cl::desc("Use MemorySSA-backed MemCpyOpt."));
+
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
+STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
namespace {
@@ -271,11 +279,17 @@ private:
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ if (!EnableMemorySSA)
+ AU.addRequired<MemoryDependenceWrapperPass>();
AU.addPreserved<MemoryDependenceWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ if (EnableMemorySSA)
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
@@ -297,6 +311,56 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
false, false)
+// Check that V is either not accessible by the caller, or unwinding cannot
+// occur between Start and End.
+static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
+ Instruction *End) {
+ assert(Start->getParent() == End->getParent() && "Must be in same block");
+ if (!Start->getFunction()->doesNotThrow() &&
+ !isa<AllocaInst>(getUnderlyingObject(V))) {
+ for (const Instruction &I :
+ make_range(Start->getIterator(), End->getIterator())) {
+ if (I.mayThrow())
+ return true;
+ }
+ }
+ return false;
+}
+
+void MemCpyOptPass::eraseInstruction(Instruction *I) {
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
+ if (MD)
+ MD->removeInstruction(I);
+ I->eraseFromParent();
+}
+
+// Check for mod or ref of Loc between Start and End, excluding both boundaries.
+// Start and End must be in the same block
+static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
+ const MemoryUseOrDef *Start,
+ const MemoryUseOrDef *End) {
+ assert(Start->getBlock() == End->getBlock() && "Only local supported");
+ for (const MemoryAccess &MA :
+ make_range(++Start->getIterator(), End->getIterator())) {
+ if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
+ Loc)))
+ return true;
+ }
+ return false;
+}
+
+// Check for mod of Loc between Start and End, excluding both boundaries.
+// Start and End can be in different blocks.
+static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
+ const MemoryUseOrDef *Start,
+ const MemoryUseOrDef *End) {
+ // TODO: Only walk until we hit Start.
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ End->getDefiningAccess(), Loc);
+ return !MSSA->dominates(Clobber, Start);
+}
+
/// When scanning forward over instructions, we look for some other patterns to
/// fold away. In particular, this looks for stores to neighboring locations of
/// memory. If it sees enough consecutive ones, it attempts to merge them
@@ -313,7 +377,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
MemsetRanges Ranges(DL);
BasicBlock::iterator BI(StartInst);
+
+ // Keeps track of the last memory use or def before the insertion point for
+ // the new memset. The new MemoryDef for the inserted memsets will be inserted
+ // after MemInsertPoint. It points to either LastMemDef or to the last user
+ // before the insertion point of the memset, if there are any such users.
+ MemoryUseOrDef *MemInsertPoint = nullptr;
+ // Keeps track of the last MemoryDef between StartInst and the insertion point
+ // for the new memset. This will become the defining access of the inserted
+ // memsets.
+ MemoryDef *LastMemDef = nullptr;
for (++BI; !BI->isTerminator(); ++BI) {
+ if (MSSAU) {
+ auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
+ if (CurrentAcc) {
+ MemInsertPoint = CurrentAcc;
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
+ LastMemDef = CurrentDef;
+ }
+ }
+
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
// If the instruction is readnone, ignore it, otherwise bail out. We
// don't even allow readonly here because we don't want something like:
@@ -327,8 +411,15 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// If this is a store, see if we can merge it in.
if (!NextStore->isSimple()) break;
+ Value *StoredVal = NextStore->getValueOperand();
+
+ // Don't convert stores of non-integral pointer types to memsets (which
+ // stores integers).
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
+ break;
+
// Check to see if this stored value is of the same byte-splattable value.
- Value *StoredByte = isBytewiseValue(NextStore->getOperand(0), DL);
+ Value *StoredByte = isBytewiseValue(StoredVal, DL);
if (isa<UndefValue>(ByteVal) && StoredByte)
ByteVal = StoredByte;
if (ByteVal != StoredByte)
@@ -392,15 +483,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
: Range.TheStores) dbgs()
<< *SI << '\n';
dbgs() << "With: " << *AMemSet << '\n');
-
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
- // Zap all the stores.
- for (Instruction *SI : Range.TheStores) {
- MD->removeInstruction(SI);
- SI->eraseFromParent();
+ if (MSSAU) {
+ assert(LastMemDef && MemInsertPoint &&
+ "Both LastMemDef and MemInsertPoint need to be set");
+ auto *NewDef =
+ cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
+ ? MSSAU->createMemoryAccessBefore(
+ AMemSet, LastMemDef, MemInsertPoint)
+ : MSSAU->createMemoryAccessAfter(
+ AMemSet, LastMemDef, MemInsertPoint));
+ MSSAU->insertDef(NewDef, /*RenameUses=*/true);
+ LastMemDef = NewDef;
+ MemInsertPoint = NewDef;
}
+
+ // Zap all the stores.
+ for (Instruction *SI : Range.TheStores)
+ eraseInstruction(SI);
+
++NumMemSetInfer;
}
@@ -411,11 +514,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// It will lift the store and its argument + that anything that
// may alias with these.
// The method returns true if it was successful.
-static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
- const LoadInst *LI) {
+bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
// If the store alias this position, early bail out.
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (isModOrRefSet(AA.getModRefInfo(P, StoreLoc)))
+ if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
return false;
// Keep track of the arguments of all instruction we plan to lift
@@ -426,7 +528,7 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
Args.insert(Ptr);
// Instruction to lift before P.
- SmallVector<Instruction*, 8> ToLift;
+ SmallVector<Instruction *, 8> ToLift{SI};
// Memory locations of lifted instructions.
SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
@@ -439,19 +541,24 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
auto *C = &*I;
- bool MayAlias = isModOrRefSet(AA.getModRefInfo(C, None));
+ // Make sure hoisting does not perform a store that was not guaranteed to
+ // happen.
+ if (!isGuaranteedToTransferExecutionToSuccessor(C))
+ return false;
+
+ bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
bool NeedLift = false;
if (Args.erase(C))
NeedLift = true;
else if (MayAlias) {
- NeedLift = llvm::any_of(MemLocs, [C, &AA](const MemoryLocation &ML) {
- return isModOrRefSet(AA.getModRefInfo(C, ML));
+ NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
+ return isModOrRefSet(AA->getModRefInfo(C, ML));
});
if (!NeedLift)
- NeedLift = llvm::any_of(Calls, [C, &AA](const CallBase *Call) {
- return isModOrRefSet(AA.getModRefInfo(C, Call));
+ NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
+ return isModOrRefSet(AA->getModRefInfo(C, Call));
});
}
@@ -461,18 +568,18 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
if (MayAlias) {
// Since LI is implicitly moved downwards past the lifted instructions,
// none of them may modify its source.
- if (isModSet(AA.getModRefInfo(C, LoadLoc)))
+ if (isModSet(AA->getModRefInfo(C, LoadLoc)))
return false;
else if (const auto *Call = dyn_cast<CallBase>(C)) {
// If we can't lift this before P, it's game over.
- if (isModOrRefSet(AA.getModRefInfo(P, Call)))
+ if (isModOrRefSet(AA->getModRefInfo(P, Call)))
return false;
Calls.push_back(Call);
} else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
// If we can't lift this before P, it's game over.
auto ML = MemoryLocation::get(C);
- if (isModOrRefSet(AA.getModRefInfo(P, ML)))
+ if (isModOrRefSet(AA->getModRefInfo(P, ML)))
return false;
MemLocs.push_back(ML);
@@ -492,10 +599,40 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
}
}
- // We made it, we need to lift
+ // Find MSSA insertion point. Normally P will always have a corresponding
+ // memory access before which we can insert. However, with non-standard AA
+ // pipelines, there may be a mismatch between AA and MSSA, in which case we
+ // will scan for a memory access before P. In either case, we know for sure
+ // that at least the load will have a memory access.
+ // TODO: Simplify this once P will be determined by MSSA, in which case the
+ // discrepancy can no longer occur.
+ MemoryUseOrDef *MemInsertPoint = nullptr;
+ if (MSSAU) {
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
+ MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
+ } else {
+ const Instruction *ConstP = P;
+ for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
+ ++LI->getReverseIterator())) {
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ MemInsertPoint = MA;
+ break;
+ }
+ }
+ }
+ }
+
+ // We made it, we need to lift.
for (auto *I : llvm::reverse(ToLift)) {
LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
+ if (MSSAU) {
+ assert(MemInsertPoint && "Must have found insert point");
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
+ MSSAU->moveAfter(MA, MemInsertPoint);
+ MemInsertPoint = MA;
+ }
+ }
}
return true;
@@ -515,23 +652,30 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
const DataLayout &DL = SI->getModule()->getDataLayout();
+ Value *StoredVal = SI->getValueOperand();
+
+ // Not all the transforms below are correct for non-integral pointers, bail
+ // until we've audited the individual pieces.
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
+ return false;
+
// Load to store forwarding can be interpreted as memcpy.
- if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
auto *T = LI->getType();
if (T->isAggregateType()) {
- AliasAnalysis &AA = LookupAliasAnalysis();
MemoryLocation LoadLoc = MemoryLocation::get(LI);
// We use alias analysis to check if an instruction may store to
// the memory we load from in between the load and the store. If
// such an instruction is found, we try to promote there instead
// of at the store position.
+ // TODO: Can use MSSA for this.
Instruction *P = SI;
for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
- if (isModSet(AA.getModRefInfo(&I, LoadLoc))) {
+ if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
P = &I;
break;
}
@@ -542,7 +686,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// position if nothing alias the store memory after this and the store
// destination is not in the range.
if (P && P != SI) {
- if (!moveUp(AA, SI, P, LI))
+ if (!moveUp(SI, P, LI))
P = nullptr;
}
@@ -553,7 +697,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// memmove must be used to preserve semantic. If not, memcpy can
// be used.
bool UseMemMove = false;
- if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
+ if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
UseMemMove = true;
uint64_t Size = DL.getTypeStoreSize(T);
@@ -572,10 +716,16 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
<< *M << "\n");
- MD->removeInstruction(SI);
- SI->eraseFromParent();
- MD->removeInstruction(LI);
- LI->eraseFromParent();
+ if (MSSAU) {
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *NewAccess =
+ MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(SI);
+ eraseInstruction(LI);
++NumMemCpyInstr;
// Make sure we do not invalidate the iterator.
@@ -587,44 +737,50 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
- MemDepResult ldep = MD->getDependency(LI);
CallInst *C = nullptr;
- if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
- C = dyn_cast<CallInst>(ldep.getInst());
+ if (EnableMemorySSA) {
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
+ // The load most post-dom the call. Limit to the same block for now.
+ // TODO: Support non-local call-slot optimization?
+ if (LoadClobber->getBlock() == SI->getParent())
+ C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
+ }
+ } else {
+ MemDepResult ldep = MD->getDependency(LI);
+ if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
+ C = dyn_cast<CallInst>(ldep.getInst());
+ }
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
- Value *CpyDest = SI->getPointerOperand()->stripPointerCasts();
- bool CpyDestIsLocal = isa<AllocaInst>(CpyDest);
- AliasAnalysis &AA = LookupAliasAnalysis();
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
- I != E; --I) {
- if (isModOrRefSet(AA.getModRefInfo(&*I, StoreLoc))) {
+ if (EnableMemorySSA) {
+ if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
+ MSSA->getMemoryAccess(SI)))
C = nullptr;
- break;
- }
- // The store to dest may never happen if an exception can be thrown
- // between the load and the store.
- if (I->mayThrow() && !CpyDestIsLocal) {
- C = nullptr;
- break;
+ } else {
+ for (BasicBlock::iterator I = --SI->getIterator(),
+ E = C->getIterator();
+ I != E; --I) {
+ if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
+ C = nullptr;
+ break;
+ }
}
}
}
if (C) {
bool changed = performCallSlotOptzn(
- LI, SI->getPointerOperand()->stripPointerCasts(),
+ LI, SI, SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
commonAlignment(SI->getAlign(), LI->getAlign()), C);
if (changed) {
- MD->removeInstruction(SI);
- SI->eraseFromParent();
- MD->removeInstruction(LI);
- LI->eraseFromParent();
+ eraseInstruction(SI);
+ eraseInstruction(LI);
++NumMemCpyInstr;
return true;
}
@@ -658,8 +814,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
- MD->removeInstruction(SI);
- SI->eraseFromParent();
+ if (MSSAU) {
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)));
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(SI);
NumMemSetInfer++;
// Make sure we do not invalidate the iterator.
@@ -686,7 +849,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
+bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
+ Instruction *cpyStore, Value *cpyDest,
Value *cpySrc, uint64_t cpyLen,
Align cpyAlign, CallInst *C) {
// The general transformation to keep in mind is
@@ -717,7 +881,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
if (!srcArraySize)
return false;
- const DataLayout &DL = cpy->getModule()->getDataLayout();
+ const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
@@ -727,43 +891,26 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
- if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
- // The destination is an alloca. Check it is larger than srcSize.
- ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
- if (!destArraySize)
- return false;
-
- uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
- destArraySize->getZExtValue();
-
- if (destSize < srcSize)
- return false;
- } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
- // The store to dest may never happen if the call can throw.
- if (C->mayThrow())
- return false;
-
- if (A->getDereferenceableBytes() < srcSize) {
- // If the destination is an sret parameter then only accesses that are
- // outside of the returned struct type can trap.
- if (!A->hasStructRetAttr())
- return false;
-
- Type *StructTy = cast<PointerType>(A->getType())->getElementType();
- if (!StructTy->isSized()) {
- // The call may never return and hence the copy-instruction may never
- // be executed, and therefore it's not safe to say "the destination
- // has at least <cpyLen> bytes, as implied by the copy-instruction",
- return false;
- }
+ if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
+ DL, C, DT))
+ return false;
- uint64_t destSize = DL.getTypeAllocSize(StructTy);
- if (destSize < srcSize)
- return false;
- }
- } else {
+ // Make sure that nothing can observe cpyDest being written early. There are
+ // a number of cases to consider:
+ // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
+ // the transform.
+ // 2. C itself may not access cpyDest (prior to the transform). This is
+ // checked further below.
+ // 3. If cpyDest is accessible to the caller of this function (potentially
+ // captured and not based on an alloca), we need to ensure that we cannot
+ // unwind between C and cpyStore. This is checked here.
+ // 4. If cpyDest is potentially captured, there may be accesses to it from
+ // another thread. In this case, we need to check that cpyStore is
+ // guaranteed to be executed if C is. As it is a non-atomic access, it
+ // renders accesses from other threads undefined.
+ // TODO: This is currently not checked.
+ if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
return false;
- }
// Check that dest points to memory that is at least as aligned as src.
Align srcAlign = srcAlloca->getAlign();
@@ -777,29 +924,26 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// guarantees that it holds only undefined values when passed in (so the final
// memcpy can be dropped), that it is not read or written between the call and
// the memcpy, and that writing beyond the end of it is undefined.
- SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
- srcAlloca->user_end());
+ SmallVector<User *, 8> srcUseList(srcAlloca->users());
while (!srcUseList.empty()) {
User *U = srcUseList.pop_back_val();
if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
- for (User *UU : U->users())
- srcUseList.push_back(UU);
+ append_range(srcUseList, U->users());
continue;
}
if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
if (!G->hasAllZeroIndices())
return false;
- for (User *UU : U->users())
- srcUseList.push_back(UU);
+ append_range(srcUseList, U->users());
continue;
}
if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
if (IT->isLifetimeStartOrEnd())
continue;
- if (U != C && U != cpy)
+ if (U != C && U != cpyLoad)
return false;
}
@@ -811,20 +955,24 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
- DominatorTree &DT = LookupDomTree();
- if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
- if (!DT.dominates(cpyDestInst, C))
+ if (!DT->dominates(cpyDest, C)) {
+ // Support moving a constant index GEP before the call.
+ auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
+ if (GEP && GEP->hasAllConstantIndices() &&
+ DT->dominates(GEP->getPointerOperand(), C))
+ GEP->moveBefore(C);
+ else
return false;
+ }
// In addition to knowing that the call does not access src in some
// unexpected manner, for example via a global, which we deduce from
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
- AliasAnalysis &AA = LookupAliasAnalysis();
- ModRefInfo MR = AA.getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
+ ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
// If necessary, perform additional analysis.
if (isModOrRefSet(MR))
- MR = AA.callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), &DT);
+ MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
if (isModOrRefSet(MR))
return false;
@@ -866,7 +1014,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
- MD->removeInstruction(C);
+ if (MD)
+ MD->removeInstruction(C);
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
@@ -875,12 +1024,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
LLVMContext::MD_noalias,
LLVMContext::MD_invariant_group,
LLVMContext::MD_access_group};
- combineMetadata(C, cpy, KnownIDs, true);
-
- // Remove the memcpy.
- MD->removeInstruction(cpy);
- ++NumMemCpyInstr;
+ combineMetadata(C, cpyLoad, KnownIDs, true);
+ ++NumCallSlot;
return true;
}
@@ -908,8 +1054,6 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
- AliasAnalysis &AA = LookupAliasAnalysis();
-
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
// memcpy(a <- b)
@@ -919,21 +1063,28 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// TODO: If the code between M and MDep is transparent to the destination "c",
// then we could still perform the xform by moving M up to the first memcpy.
- //
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- M->getIterator(), M->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
+ if (EnableMemorySSA) {
+ // TODO: It would be sufficient to check the MDep source up to the memcpy
+ // size of M, rather than MDep.
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
+ return false;
+ } else {
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+ M->getIterator(), M->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+ }
// If the dest of the second might alias the source of the first, then the
// source and dest might overlap. We still want to eliminate the intermediate
// value, but we have to generate a memmove instead of memcpy.
bool UseMemMove = false;
- if (!AA.isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(MDep)))
+ if (!AA->isNoAlias(MemoryLocation::getForDest(M),
+ MemoryLocation::getForSource(MDep)))
UseMemMove = true;
// If all checks passed, then we can transform M.
@@ -943,18 +1094,25 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
IRBuilder<> Builder(M);
+ Instruction *NewM;
if (UseMemMove)
- Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
+ NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
+ M->getLength(), M->isVolatile());
else
- Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
+ NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
+ M->getLength(), M->isVolatile());
+
+ if (MSSAU) {
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
+ auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
// Remove the instruction we're replacing.
- MD->removeInstruction(M);
- M->eraseFromParent();
+ eraseInstruction(M);
++NumMemCpyInstr;
return true;
}
@@ -979,18 +1137,41 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
if (MemSet->getDest() != MemCpy->getDest())
return false;
- // Check that there are no other dependencies on the memset destination.
- MemDepResult DstDepInfo =
- MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false,
- MemCpy->getIterator(), MemCpy->getParent());
- if (DstDepInfo.getInst() != MemSet)
+ // Check that src and dst of the memcpy aren't the same. While memcpy
+ // operands cannot partially overlap, exact equality is allowed.
+ if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
+ LocationSize::precise(1)),
+ MemoryLocation(MemCpy->getDest(),
+ LocationSize::precise(1))))
return false;
+ if (EnableMemorySSA) {
+ // We know that dst up to src_size is not written. We now need to make sure
+ // that dst up to dst_size is not accessed. (If we did not move the memset,
+ // checking for reads would be sufficient.)
+ if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
+ MSSA->getMemoryAccess(MemSet),
+ MSSA->getMemoryAccess(MemCpy))) {
+ return false;
+ }
+ } else {
+ // We have already checked that dst up to src_size is not accessed. We
+ // need to make sure that there are no accesses up to dst_size either.
+ MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
+ MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
+ MemCpy->getParent());
+ if (DstDepInfo.getInst() != MemSet)
+ return false;
+ }
+
// Use the same i8* dest as the memcpy, killing the memset dest if different.
Value *Dest = MemCpy->getRawDest();
Value *DestSize = MemSet->getLength();
Value *SrcSize = MemCpy->getLength();
+ if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
+ return false;
+
// By default, create an unaligned memset.
unsigned Align = 1;
// If Dest is aligned, and SrcSize is constant, use the minimum alignment
@@ -1016,13 +1197,25 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
Value *MemsetLen = Builder.CreateSelect(
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
- Builder.CreateMemSet(
+ Instruction *NewMemSet = Builder.CreateMemSet(
Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
SrcSize),
MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
- MD->removeInstruction(MemSet);
- MemSet->eraseFromParent();
+ if (MSSAU) {
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
+ "MemCpy must be a MemoryDef");
+ // The new memset is inserted after the memcpy, but it is known that its
+ // defining access is the memset about to be removed which immediately
+ // precedes the memcpy.
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(
+ NewMemSet, LastDef->getDefiningAccess(), LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(MemSet);
return true;
}
@@ -1041,6 +1234,24 @@ static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
return false;
}
+static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
+ MemoryDef *Def, ConstantInt *Size) {
+ if (MSSA->isLiveOnEntryDef(Def))
+ return isa<AllocaInst>(getUnderlyingObject(V));
+
+ if (IntrinsicInst *II =
+ dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
+ if (AA->isMustAlias(V, II->getArgOperand(1)) &&
+ LTSize->getZExtValue() >= Size->getZExtValue())
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// Transform memcpy to memset when its source was just memset.
/// In other words, turn:
/// \code
@@ -1057,11 +1268,9 @@ static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
/// The \p MemCpy must have a Constant length.
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
MemSetInst *MemSet) {
- AliasAnalysis &AA = LookupAliasAnalysis();
-
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
// memcpying from the same address. Otherwise it is hard to reason about.
- if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
+ if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
return false;
// A known memset size is required.
@@ -1078,17 +1287,37 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
// interested in the bytes from MemSetSize..CopySize here, but as we can't
// easily represent this location, we use the full 0..CopySize range.
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
- if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
- CopySize = MemSetSize;
- else
+ bool CanReduceSize = false;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ MemSetAccess->getDefiningAccess(), MemCpyLoc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
+ CanReduceSize = true;
+ } else {
+ MemDepResult DepInfo = MD->getPointerDependencyFrom(
+ MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
+ if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+ CanReduceSize = true;
+ }
+
+ if (!CanReduceSize)
return false;
+ CopySize = MemSetSize;
}
IRBuilder<> Builder(MemCpy);
- Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), CopySize,
- MaybeAlign(MemCpy->getDestAlignment()));
+ Instruction *NewM =
+ Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
+ CopySize, MaybeAlign(MemCpy->getDestAlignment()));
+ if (MSSAU) {
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
return true;
}
@@ -1104,8 +1333,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
// If the source and destination of the memcpy are the same, then zap it.
if (M->getSource() == M->getDest()) {
++BBI;
- MD->removeInstruction(M);
- M->eraseFromParent();
+ eraseInstruction(M);
return true;
}
@@ -1115,73 +1343,156 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
M->getModule()->getDataLayout())) {
IRBuilder<> Builder(M);
- Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
- MaybeAlign(M->getDestAlignment()), false);
- MD->removeInstruction(M);
- M->eraseFromParent();
+ Instruction *NewM =
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
+ MaybeAlign(M->getDestAlignment()), false);
+ if (MSSAU) {
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess =
+ MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(M);
++NumCpyToSet;
return true;
}
- MemDepResult DepInfo = MD->getDependency(M);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- if (DepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
- if (processMemSetMemCpyDependence(M, MDep))
- return true;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
+ MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+ MemoryLocation DestLoc = MemoryLocation::getForDest(M);
+ const MemoryAccess *DestClobber =
+ MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
+
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ // The memcpy most post-dom the memset, so limit this to the same basic
+ // block. A non-local generalization is likely not worthwhile.
+ if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
+ if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
+ if (DestClobber->getBlock() == M->getParent())
+ if (processMemSetMemCpyDependence(M, MDep))
+ return true;
+
+ // The optimizations after this point require the memcpy size.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (!CopySize) return false;
+
+ MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ AnyClobber, MemoryLocation::getForSource(M));
+
+ // There are four possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started
+ // its lifetime copies undefined data, and we can therefore eliminate
+ // the memcpy in favor of the data that was already at the destination.
+ // d) memcpy from a just-memset'd source can be turned into memset.
+ if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
+ if (Instruction *MI = MD->getMemoryInst()) {
+ if (auto *C = dyn_cast<CallInst>(MI)) {
+ // The memcpy must post-dom the call. Limit to the same block for now.
+ // Additionally, we need to ensure that there are no accesses to dest
+ // between the call and the memcpy. Accesses to src will be checked
+ // by performCallSlotOptzn().
+ // TODO: Support non-local call-slot optimization?
+ if (C->getParent() == M->getParent() &&
+ !accessedBetween(*AA, DestLoc, MD, MA)) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), Alignment, C)) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ if (auto *MDep = dyn_cast<MemCpyInst>(MI))
+ return processMemCpyMemCpyDependence(M, MDep);
+ if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
+ if (performMemCpyToMemSetOptzn(M, MDep)) {
+ LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
+ eraseInstruction(M);
+ ++NumCpyToSet;
+ return true;
+ }
+ }
+ }
- // The optimizations after this point require the memcpy size.
- ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (!CopySize) return false;
-
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started its
- // lifetime copies undefined data, and we can therefore eliminate the
- // memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (DepInfo.isClobber()) {
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment, C)) {
- MD->removeInstruction(M);
- M->eraseFromParent();
+ if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) {
+ LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
return true;
}
}
- }
+ } else {
+ MemDepResult DepInfo = MD->getDependency(M);
- MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
- SrcLoc, true, M->getIterator(), M->getParent());
-
- if (SrcDepInfo.isClobber()) {
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
- return processMemCpyMemCpyDependence(M, MDep);
- } else if (SrcDepInfo.isDef()) {
- if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
- MD->removeInstruction(M);
- M->eraseFromParent();
- ++NumMemCpyInstr;
- return true;
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ if (DepInfo.isClobber())
+ if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
+ if (processMemSetMemCpyDependence(M, MDep))
+ return true;
+
+ // The optimizations after this point require the memcpy size.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (!CopySize) return false;
+
+ // There are four possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started
+ // its lifetime copies undefined data, and we can therefore eliminate
+ // the memcpy in favor of the data that was already at the destination.
+ // d) memcpy from a just-memset'd source can be turned into memset.
+ if (DepInfo.isClobber()) {
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), Alignment, C)) {
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
}
- }
- if (SrcDepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
- if (performMemCpyToMemSetOptzn(M, MDep)) {
- MD->removeInstruction(M);
- M->eraseFromParent();
- ++NumCpyToSet;
+ MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
+ SrcLoc, true, M->getIterator(), M->getParent());
+
+ if (SrcDepInfo.isClobber()) {
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep);
+ } else if (SrcDepInfo.isDef()) {
+ if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
return true;
}
+ }
+
+ if (SrcDepInfo.isClobber())
+ if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
+ if (performMemCpyToMemSetOptzn(M, MDep)) {
+ eraseInstruction(M);
+ ++NumCpyToSet;
+ return true;
+ }
+ }
return false;
}
@@ -1189,14 +1500,12 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
/// not to alias.
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
- AliasAnalysis &AA = LookupAliasAnalysis();
-
if (!TLI->has(LibFunc_memmove))
return false;
// See if the pointers alias.
- if (!AA.isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(M)))
+ if (!AA->isNoAlias(MemoryLocation::getForDest(M),
+ MemoryLocation::getForSource(M)))
return false;
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
@@ -1209,9 +1518,13 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
Intrinsic::memcpy, ArgTys));
+ // For MemorySSA nothing really changes (except that memcpy may imply stricter
+ // aliasing guarantees).
+
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
- MD->removeInstruction(M);
+ if (MD)
+ MD->removeInstruction(M);
++NumMoveToCpy;
return true;
@@ -1224,16 +1537,25 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
Value *ByValArg = CB.getArgOperand(ArgNo);
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true,
- CB.getIterator(), CB.getParent());
- if (!DepInfo.isClobber())
- return false;
+ MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
+ MemCpyInst *MDep = nullptr;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ CallAccess->getDefiningAccess(), Loc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
+ } else {
+ MemDepResult DepInfo = MD->getPointerDependencyFrom(
+ Loc, true, CB.getIterator(), CB.getParent());
+ if (!DepInfo.isClobber())
+ return false;
+ MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+ }
// If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
// a memcpy, see if we can byval from the source of the memcpy instead of the
// result.
- MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
if (!MDep || MDep->isVolatile() ||
ByValArg->stripPointerCasts() != MDep->getDest())
return false;
@@ -1250,12 +1572,10 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
- AssumptionCache &AC = LookupAssumptionCache();
- DominatorTree &DT = LookupDomTree();
MaybeAlign MemDepAlign = MDep->getSourceAlign();
if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
- getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, &AC,
- &DT) < *ByValAlign)
+ getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
+ DT) < *ByValAlign)
return false;
// The address space of the memcpy source must match the byval argument
@@ -1269,14 +1589,19 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
- //
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep = MD->getPointerDependencyFrom(
- MemoryLocation::getForSource(MDep), false,
- CB.getIterator(), MDep->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
+ if (EnableMemorySSA) {
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
+ return false;
+ } else {
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep = MD->getPointerDependencyFrom(
+ MemoryLocation::getForSource(MDep), false,
+ CB.getIterator(), MDep->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+ }
Value *TmpCast = MDep->getSource();
if (MDep->getSource()->getType() != ByValArg->getType()) {
@@ -1301,15 +1626,13 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
bool MemCpyOptPass::iterateOnFunction(Function &F) {
bool MadeChange = false;
- DominatorTree &DT = LookupDomTree();
-
// Walk all instruction in the function.
for (BasicBlock &BB : F) {
// Skip unreachable blocks. For example processStore assumes that an
// instruction in a BB can't be dominated by a later instruction in the
// same BB (which is a scenario that can happen for an unreachable BB that
// has itself as a predecessor).
- if (!DT.isReachableFromEntry(&BB))
+ if (!DT->isReachableFromEntry(&BB))
continue;
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
@@ -1345,43 +1668,43 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
}
PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &MD = AM.getResult<MemoryDependenceAnalysis>(F);
+ auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F)
+ : AM.getCachedResult<MemoryDependenceAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
-
- auto LookupAliasAnalysis = [&]() -> AliasAnalysis & {
- return AM.getResult<AAManager>(F);
- };
- auto LookupAssumptionCache = [&]() -> AssumptionCache & {
- return AM.getResult<AssumptionAnalysis>(F);
- };
- auto LookupDomTree = [&]() -> DominatorTree & {
- return AM.getResult<DominatorTreeAnalysis>(F);
- };
-
- bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis,
- LookupAssumptionCache, LookupDomTree);
+ auto *AA = &AM.getResult<AAManager>(F);
+ auto *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
+ : AM.getCachedResult<MemorySSAAnalysis>(F);
+
+ bool MadeChange =
+ runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
if (!MadeChange)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
- PA.preserve<MemoryDependenceAnalysis>();
+ if (MD)
+ PA.preserve<MemoryDependenceAnalysis>();
+ if (MSSA)
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
-bool MemCpyOptPass::runImpl(
- Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_,
- std::function<AliasAnalysis &()> LookupAliasAnalysis_,
- std::function<AssumptionCache &()> LookupAssumptionCache_,
- std::function<DominatorTree &()> LookupDomTree_) {
+bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
+ TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
+ AssumptionCache *AC_, DominatorTree *DT_,
+ MemorySSA *MSSA_) {
bool MadeChange = false;
MD = MD_;
TLI = TLI_;
- LookupAliasAnalysis = std::move(LookupAliasAnalysis_);
- LookupAssumptionCache = std::move(LookupAssumptionCache_);
- LookupDomTree = std::move(LookupDomTree_);
-
+ AA = AA_;
+ AC = AC_;
+ DT = DT_;
+ MSSA = MSSA_;
+ MemorySSAUpdater MSSAU_(MSSA_);
+ MSSAU = MSSA_ ? &MSSAU_ : nullptr;
// If we don't have at least memset and memcpy, there is little point of doing
// anything here. These are required by a freestanding implementation, so if
// even they are disabled, there is no point in trying hard.
@@ -1394,6 +1717,9 @@ bool MemCpyOptPass::runImpl(
MadeChange = true;
}
+ if (MSSA_ && VerifyMemorySSA)
+ MSSA_->verifyMemorySSA();
+
MD = nullptr;
return MadeChange;
}
@@ -1403,19 +1729,17 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto *MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+ auto *MDWP = !EnableMemorySSA
+ ? &getAnalysis<MemoryDependenceWrapperPass>()
+ : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-
- auto LookupAliasAnalysis = [this]() -> AliasAnalysis & {
- return getAnalysis<AAResultsWrapperPass>().getAAResults();
- };
- auto LookupAssumptionCache = [this, &F]() -> AssumptionCache & {
- return getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- };
- auto LookupDomTree = [this]() -> DominatorTree & {
- return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- };
-
- return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupAssumptionCache,
- LookupDomTree);
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *MSSAWP = EnableMemorySSA
+ ? &getAnalysis<MemorySSAWrapperPass>()
+ : getAnalysisIfAvailable<MemorySSAWrapperPass>();
+
+ return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
+ MSSAWP ? &MSSAWP->getMSSA() : nullptr);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index ce1e142101b8..7f8b75ac8806 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -372,7 +372,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
} else {
// In this case, we expect a constant incoming value (the comparison is
// chained).
- const auto *const Const = dyn_cast<ConstantInt>(Val);
+ const auto *const Const = cast<ConstantInt>(Val);
LLVM_DEBUG(dbgs() << "const\n");
if (!Const->isZero()) return {};
LLVM_DEBUG(dbgs() << "false\n");
@@ -624,6 +624,17 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
Value *IsEqual = nullptr;
LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
<< BB->getName() << "\n");
+
+ // If there is one block that requires splitting, we do it now, i.e.
+ // just before we know we will collapse the chain. The instructions
+ // can be executed before any of the instructions in the chain.
+ const auto ToSplit = llvm::find_if(
+ Comparisons, [](const BCECmpBlock &B) { return B.RequireSplit; });
+ if (ToSplit != Comparisons.end()) {
+ LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
+ ToSplit->split(BB, AA);
+ }
+
if (Comparisons.size() == 1) {
LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
Value *const LhsLoad =
@@ -633,17 +644,6 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
// There are no blocks to merge, just do the comparison.
IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
} else {
- // If there is one block that requires splitting, we do it now, i.e.
- // just before we know we will collapse the chain. The instructions
- // can be executed before any of the instructions in the chain.
- const auto ToSplit =
- std::find_if(Comparisons.begin(), Comparisons.end(),
- [](const BCECmpBlock &B) { return B.RequireSplit; });
- if (ToSplit != Comparisons.end()) {
- LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
- ToSplit->split(BB, AA);
- }
-
const unsigned TotalSizeBits = std::accumulate(
Comparisons.begin(), Comparisons.end(), 0u,
[](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); });
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index 4e010f8704d0..32bb62129e8f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -213,54 +213,33 @@ bool NaryReassociatePass::runImpl(Function &F, AssumptionCache *AC_,
return Changed;
}
-// Explicitly list the instruction types NaryReassociate handles for now.
-static bool isPotentiallyNaryReassociable(Instruction *I) {
- switch (I->getOpcode()) {
- case Instruction::Add:
- case Instruction::GetElementPtr:
- case Instruction::Mul:
- return true;
- default:
- return false;
- }
-}
-
bool NaryReassociatePass::doOneIteration(Function &F) {
bool Changed = false;
SeenExprs.clear();
// Process the basic blocks in a depth first traversal of the dominator
// tree. This order ensures that all bases of a candidate are in Candidates
// when we process it.
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
for (const auto Node : depth_first(DT)) {
BasicBlock *BB = Node->getBlock();
for (auto I = BB->begin(); I != BB->end(); ++I) {
- if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(&*I)) {
- const SCEV *OldSCEV = SE->getSCEV(&*I);
- if (Instruction *NewI = tryReassociate(&*I)) {
- Changed = true;
- SE->forgetValue(&*I);
- I->replaceAllUsesWith(NewI);
- WeakVH NewIExist = NewI;
- // If SeenExprs/NewIExist contains I's WeakTrackingVH/WeakVH, that
- // entry will be replaced with nullptr if deleted.
- RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
- if (!NewIExist) {
- // Rare occation where the new instruction (NewI) have been removed,
- // probably due to parts of the input code was dead from the
- // beginning, reset the iterator and start over from the beginning
- I = BB->begin();
- continue;
- }
- I = NewI->getIterator();
- }
- // Add the rewritten instruction to SeenExprs; the original instruction
- // is deleted.
- const SCEV *NewSCEV = SE->getSCEV(&*I);
- SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I));
+ Instruction *OrigI = &*I;
+ const SCEV *OrigSCEV = nullptr;
+ if (Instruction *NewI = tryReassociate(OrigI, OrigSCEV)) {
+ Changed = true;
+ OrigI->replaceAllUsesWith(NewI);
+
+ // Add 'OrigI' to the list of dead instructions.
+ DeadInsts.push_back(WeakTrackingVH(OrigI));
+ // Add the rewritten instruction to SeenExprs; the original
+ // instruction is deleted.
+ const SCEV *NewSCEV = SE->getSCEV(NewI);
+ SeenExprs[NewSCEV].push_back(WeakTrackingVH(NewI));
+
// Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
// is equivalent to I. However, ScalarEvolution::getSCEV may
- // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
- // we reassociate
+ // weaken nsw causing NewSCEV not to equal OldSCEV. For example,
+ // suppose we reassociate
// I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4
// to
// NewI = &a[sext(i)] + sext(j).
@@ -274,32 +253,47 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
// equivalence, we add I to SeenExprs[OldSCEV] as well so that we can
// map both SCEV before and after tryReassociate(I) to I.
//
- // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
- if (NewSCEV != OldSCEV)
- SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I));
- }
+ // This improvement is exercised in @reassociate_gep_nsw in
+ // nary-gep.ll.
+ if (NewSCEV != OrigSCEV)
+ SeenExprs[OrigSCEV].push_back(WeakTrackingVH(NewI));
+ } else if (OrigSCEV)
+ SeenExprs[OrigSCEV].push_back(WeakTrackingVH(OrigI));
}
}
+ // Delete all dead instructions from 'DeadInsts'.
+ // Please note ScalarEvolution is updated along the way.
+ RecursivelyDeleteTriviallyDeadInstructionsPermissive(
+ DeadInsts, TLI, nullptr, [this](Value *V) { SE->forgetValue(V); });
+
return Changed;
}
-Instruction *NaryReassociatePass::tryReassociate(Instruction *I) {
+Instruction *NaryReassociatePass::tryReassociate(Instruction * I,
+ const SCEV *&OrigSCEV) {
+
+ if (!SE->isSCEVable(I->getType()))
+ return nullptr;
+
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Mul:
+ OrigSCEV = SE->getSCEV(I);
return tryReassociateBinaryOp(cast<BinaryOperator>(I));
case Instruction::GetElementPtr:
+ OrigSCEV = SE->getSCEV(I);
return tryReassociateGEP(cast<GetElementPtrInst>(I));
default:
- llvm_unreachable("should be filtered out by isPotentiallyNaryReassociable");
+ return nullptr;
}
+
+ llvm_unreachable("should not be reached");
+ return nullptr;
}
static bool isGEPFoldable(GetElementPtrInst *GEP,
const TargetTransformInfo *TTI) {
- SmallVector<const Value*, 4> Indices;
- for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
- Indices.push_back(*I);
+ SmallVector<const Value *, 4> Indices(GEP->indices());
return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
Indices) == TargetTransformInfo::TCC_Free;
}
@@ -375,8 +369,8 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
// Replace the I-th index with LHS.
IndexExprs[I] = SE->getSCEV(LHS);
if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) &&
- DL->getTypeSizeInBits(LHS->getType()) <
- DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) {
+ DL->getTypeSizeInBits(LHS->getType()).getFixedSize() <
+ DL->getTypeSizeInBits(GEP->getOperand(I)->getType()).getFixedSize()) {
// Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
// zext if the source operand is proved non-negative. We should do that
// consistently so that CandidateExpr more likely appears before. See
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 0ed1773373a7..281d47c8625f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -662,7 +662,8 @@ public:
const DataLayout &DL)
: F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), AC(AC), DL(DL),
PredInfo(std::make_unique<PredicateInfo>(F, *DT, *AC)),
- SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false) {}
+ SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false,
+ /*CanUseUndef=*/false) {}
bool runGVN();
@@ -800,12 +801,7 @@ private:
Value *findPHIOfOpsLeader(const Expression *, const Instruction *,
const BasicBlock *) const;
- // New instruction creation.
- void handleNewInstruction(Instruction *) {}
-
// Various instruction touch utilities
- template <typename Map, typename KeyType, typename Func>
- void for_each_found(Map &, const KeyType &, Func);
template <typename Map, typename KeyType>
void touchAndErase(Map &, const KeyType &);
void markUsersTouched(Value *);
@@ -834,7 +830,6 @@ private:
BasicBlock *getBlockForValue(Value *V) const;
void deleteExpression(const Expression *E) const;
MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
- MemoryAccess *getDefiningAccess(const MemoryAccess *) const;
MemoryPhi *getMemoryAccess(const BasicBlock *) const;
template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
@@ -1253,6 +1248,7 @@ const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const {
const CallExpression *
NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const {
// FIXME: Add operand bundles for calls.
+ // FIXME: Allow commutative matching for intrinsics.
auto *E =
new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA);
setBasicExpressionInfo(CI, E);
@@ -1539,90 +1535,39 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
LLVM_DEBUG(dbgs() << "Found predicate info from instruction !\n");
- auto *PWC = dyn_cast<PredicateWithCondition>(PI);
- if (!PWC)
+ const Optional<PredicateConstraint> &Constraint = PI->getConstraint();
+ if (!Constraint)
return nullptr;
- auto *CopyOf = I->getOperand(0);
- auto *Cond = PWC->Condition;
-
- // If this a copy of the condition, it must be either true or false depending
- // on the predicate info type and edge.
- if (CopyOf == Cond) {
- // We should not need to add predicate users because the predicate info is
- // already a use of this operand.
- if (isa<PredicateAssume>(PI))
- return createConstantExpression(ConstantInt::getTrue(Cond->getType()));
- if (auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
- if (PBranch->TrueEdge)
- return createConstantExpression(ConstantInt::getTrue(Cond->getType()));
- return createConstantExpression(ConstantInt::getFalse(Cond->getType()));
- }
- if (auto *PSwitch = dyn_cast<PredicateSwitch>(PI))
- return createConstantExpression(cast<Constant>(PSwitch->CaseValue));
- }
+ CmpInst::Predicate Predicate = Constraint->Predicate;
+ Value *CmpOp0 = I->getOperand(0);
+ Value *CmpOp1 = Constraint->OtherOp;
- // Not a copy of the condition, so see what the predicates tell us about this
- // value. First, though, we check to make sure the value is actually a copy
- // of one of the condition operands. It's possible, in certain cases, for it
- // to be a copy of a predicateinfo copy. In particular, if two branch
- // operations use the same condition, and one branch dominates the other, we
- // will end up with a copy of a copy. This is currently a small deficiency in
- // predicateinfo. What will end up happening here is that we will value
- // number both copies the same anyway.
-
- // Everything below relies on the condition being a comparison.
- auto *Cmp = dyn_cast<CmpInst>(Cond);
- if (!Cmp)
- return nullptr;
+ Value *FirstOp = lookupOperandLeader(CmpOp0);
+ Value *SecondOp = lookupOperandLeader(CmpOp1);
+ Value *AdditionallyUsedValue = CmpOp0;
- if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) {
- LLVM_DEBUG(dbgs() << "Copy is not of any condition operands!\n");
- return nullptr;
- }
- Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0));
- Value *SecondOp = lookupOperandLeader(Cmp->getOperand(1));
- bool SwappedOps = false;
// Sort the ops.
if (shouldSwapOperands(FirstOp, SecondOp)) {
std::swap(FirstOp, SecondOp);
- SwappedOps = true;
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ AdditionallyUsedValue = CmpOp1;
}
- CmpInst::Predicate Predicate =
- SwappedOps ? Cmp->getSwappedPredicate() : Cmp->getPredicate();
-
- if (isa<PredicateAssume>(PI)) {
- // If we assume the operands are equal, then they are equal.
- if (Predicate == CmpInst::ICMP_EQ) {
- addPredicateUsers(PI, I);
- addAdditionalUsers(SwappedOps ? Cmp->getOperand(1) : Cmp->getOperand(0),
- I);
- return createVariableOrConstant(FirstOp);
- }
+
+ if (Predicate == CmpInst::ICMP_EQ) {
+ addPredicateUsers(PI, I);
+ addAdditionalUsers(AdditionallyUsedValue, I);
+ return createVariableOrConstant(FirstOp);
}
- if (const auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
- // If we are *not* a copy of the comparison, we may equal to the other
- // operand when the predicate implies something about equality of
- // operations. In particular, if the comparison is true/false when the
- // operands are equal, and we are on the right edge, we know this operation
- // is equal to something.
- if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) ||
- (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) {
- addPredicateUsers(PI, I);
- addAdditionalUsers(SwappedOps ? Cmp->getOperand(1) : Cmp->getOperand(0),
- I);
- return createVariableOrConstant(FirstOp);
- }
- // Handle the special case of floating point.
- if (((PBranch->TrueEdge && Predicate == CmpInst::FCMP_OEQ) ||
- (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) &&
- isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) {
- addPredicateUsers(PI, I);
- addAdditionalUsers(SwappedOps ? Cmp->getOperand(1) : Cmp->getOperand(0),
- I);
- return createConstantExpression(cast<Constant>(FirstOp));
- }
+
+ // Handle the special case of floating point.
+ if (Predicate == CmpInst::FCMP_OEQ && isa<ConstantFP>(FirstOp) &&
+ !cast<ConstantFP>(FirstOp)->isZero()) {
+ addPredicateUsers(PI, I);
+ addAdditionalUsers(AdditionallyUsedValue, I);
+ return createConstantExpression(cast<Constant>(FirstOp));
}
+
return nullptr;
}
@@ -2044,16 +1989,6 @@ NewGVN::performSymbolicEvaluation(Value *V,
return E;
}
-// Look up a container in a map, and then call a function for each thing in the
-// found container.
-template <typename Map, typename KeyType, typename Func>
-void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) {
- const auto Result = M.find_as(Key);
- if (Result != M.end())
- for (typename Map::mapped_type::value_type Mapped : Result->second)
- F(Mapped);
-}
-
// Look up a container of values/instructions in a map, and touch all the
// instructions in the container. Then erase value from the map.
template <typename Map, typename KeyType>
@@ -2941,8 +2876,7 @@ void NewGVN::cleanupTables() {
}
while (!TempInst.empty()) {
- auto *I = TempInst.back();
- TempInst.pop_back();
+ auto *I = TempInst.pop_back_val();
I->deleteValue();
}
@@ -3437,10 +3371,9 @@ bool NewGVN::runGVN() {
for (auto &B : RPOT) {
auto *Node = DT->getNode(B);
if (Node->getNumChildren() > 1)
- llvm::sort(Node->begin(), Node->end(),
- [&](const DomTreeNode *A, const DomTreeNode *B) {
- return RPOOrdering[A] < RPOOrdering[B];
- });
+ llvm::sort(*Node, [&](const DomTreeNode *A, const DomTreeNode *B) {
+ return RPOOrdering[A] < RPOOrdering[B];
+ });
}
// Now a standard depth first ordering of the domtree is equivalent to RPO.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 4553b23532f2..a110f7d5c241 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -243,7 +243,7 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
BasicBlock *Pred) {
// A conservative bound on the loop as a whole.
const SCEV *MaxTrips = SE->getConstantMaxBackedgeTakenCount(L);
- if (MaxTrips != SE->getCouldNotCompute() &&
+ if (!isa<SCEVCouldNotCompute>(MaxTrips) &&
SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(
CountedLoopTripWidth))
return true;
@@ -255,7 +255,7 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
// This returns an exact expression only. TODO: We really only need an
// upper bound here, but SE doesn't expose that.
const SCEV *MaxExec = SE->getExitCount(L, Pred);
- if (MaxExec != SE->getCouldNotCompute() &&
+ if (!isa<SCEVCouldNotCompute>(MaxExec) &&
SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(
CountedLoopTripWidth))
return true;
@@ -435,7 +435,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
return Cursor;
}
-static const char *const GCSafepointPollName = "gc.safepoint_poll";
+const char GCSafepointPollName[] = "gc.safepoint_poll";
static bool isGCSafepointPoll(Function &F) {
return F.getName().equals(GCSafepointPollName);
@@ -589,8 +589,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
for (Instruction *PollLocation : PollsNeeded) {
std::vector<CallBase *> RuntimeCalls;
InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
- ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
- RuntimeCalls.end());
+ llvm::append_range(ParsePointNeeded, RuntimeCalls);
}
return Modified;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
index ba7f367267fe..dffeb7cc227b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -920,6 +920,100 @@ static Value *NegateValue(Value *V, Instruction *BI,
return NewNeg;
}
+// See if this `or` looks like an load widening reduction, i.e. that it
+// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
+// ensure that the pattern is *really* a load widening reduction,
+// we do not ensure that it can really be replaced with a widened load,
+// only that it mostly looks like one.
+static bool isLoadCombineCandidate(Instruction *Or) {
+ SmallVector<Instruction *, 8> Worklist;
+ SmallSet<Instruction *, 8> Visited;
+
+ auto Enqueue = [&](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ // Each node of an `or` reduction must be an instruction,
+ if (!I)
+ return false; // Node is certainly not part of an `or` load reduction.
+ // Only process instructions we have never processed before.
+ if (Visited.insert(I).second)
+ Worklist.emplace_back(I);
+ return true; // Will need to look at parent nodes.
+ };
+
+ if (!Enqueue(Or))
+ return false; // Not an `or` reduction pattern.
+
+ while (!Worklist.empty()) {
+ auto *I = Worklist.pop_back_val();
+
+ // Okay, which instruction is this node?
+ switch (I->getOpcode()) {
+ case Instruction::Or:
+ // Got an `or` node. That's fine, just recurse into it's operands.
+ for (Value *Op : I->operands())
+ if (!Enqueue(Op))
+ return false; // Not an `or` reduction pattern.
+ continue;
+
+ case Instruction::Shl:
+ case Instruction::ZExt:
+ // `shl`/`zext` nodes are fine, just recurse into their base operand.
+ if (!Enqueue(I->getOperand(0)))
+ return false; // Not an `or` reduction pattern.
+ continue;
+
+ case Instruction::Load:
+ // Perfect, `load` node means we've reached an edge of the graph.
+ continue;
+
+ default: // Unknown node.
+ return false; // Not an `or` reduction pattern.
+ }
+ }
+
+ return true;
+}
+
+/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
+static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
+ // Don't bother to convert this up unless either the LHS is an associable add
+ // or subtract or mul or if this is only used by one of the above.
+ // This is only a compile-time improvement, it is not needed for correctness!
+ auto isInteresting = [](Value *V) {
+ for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul})
+ if (isReassociableOp(V, Op))
+ return true;
+ return false;
+ };
+
+ if (any_of(Or->operands(), isInteresting))
+ return true;
+
+ Value *VB = Or->user_back();
+ if (Or->hasOneUse() && isInteresting(VB))
+ return true;
+
+ return false;
+}
+
+/// If we have (X|Y), and iff X and Y have no common bits set,
+/// transform this into (X+Y) to allow arithmetics reassociation.
+static BinaryOperator *ConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
+ // Convert an or into an add.
+ BinaryOperator *New =
+ CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
+ New->setHasNoSignedWrap();
+ New->setHasNoUnsignedWrap();
+ New->takeName(Or);
+
+ // Everyone now refers to the add instruction.
+ Or->replaceAllUsesWith(New);
+ New->setDebugLoc(Or->getDebugLoc());
+
+ LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
+ return New;
+}
+
/// Return true if we should break up this subtract of X-Y into (X + -Y).
static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
@@ -1034,8 +1128,7 @@ static Value *EmitAddTreeOfValues(Instruction *I,
SmallVectorImpl<WeakTrackingVH> &Ops) {
if (Ops.size() == 1) return Ops.back();
- Value *V1 = Ops.back();
- Ops.pop_back();
+ Value *V1 = Ops.pop_back_val();
Value *V2 = EmitAddTreeOfValues(I, Ops);
return CreateAdd(V2, V1, "reass.add", I, I);
}
@@ -1899,7 +1992,7 @@ Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
OrderedSet &Insts) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
- SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end());
+ SmallVector<Value *, 4> Ops(I->operands());
ValueRankMap.erase(I);
Insts.remove(I);
RedoInsts.remove(I);
@@ -1916,7 +2009,7 @@ void ReassociatePass::EraseInst(Instruction *I) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
- SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+ SmallVector<Value *, 8> Ops(I->operands());
// Erase the dead instruction.
ValueRankMap.erase(I);
RedoInsts.remove(I);
@@ -2116,6 +2209,19 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
if (I->getType()->isIntegerTy(1))
return;
+ // If this is a bitwise or instruction of operands
+ // with no common bits set, convert it to X+Y.
+ if (I->getOpcode() == Instruction::Or &&
+ ShouldConvertOrWithNoCommonBitsToAdd(I) && !isLoadCombineCandidate(I) &&
+ haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
+ I->getModule()->getDataLayout(), /*AC=*/nullptr, I,
+ /*DT=*/nullptr)) {
+ Instruction *NI = ConvertOrWithNoCommonBitsToAdd(I);
+ RedoInsts.insert(I);
+ MadeChange = true;
+ I = NI;
+ }
+
// If this is a subtract instruction which is not already in negate form,
// see if we can convert it to X+-Y.
if (I->getOpcode() == Instruction::Sub) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 0716c1320982..a49b9ad3f62b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -15,17 +15,23 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/Reg2Mem.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
@@ -35,43 +41,17 @@ using namespace llvm;
STATISTIC(NumRegsDemoted, "Number of registers demoted");
STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
-namespace {
- struct RegToMem : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- RegToMem() : FunctionPass(ID) {
- initializeRegToMemPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(BreakCriticalEdgesID);
- AU.addPreservedID(BreakCriticalEdgesID);
- }
-
- bool valueEscapes(const Instruction *Inst) const {
- const BasicBlock *BB = Inst->getParent();
- for (const User *U : Inst->users()) {
- const Instruction *UI = cast<Instruction>(U);
- if (UI->getParent() != BB || isa<PHINode>(UI))
- return true;
- }
- return false;
- }
-
- bool runOnFunction(Function &F) override;
- };
+static bool valueEscapes(const Instruction &Inst) {
+ const BasicBlock *BB = Inst.getParent();
+ for (const User *U : Inst.users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != BB || isa<PHINode>(UI))
+ return true;
+ }
+ return false;
}
-char RegToMem::ID = 0;
-INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
-INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
- false, false)
-
-bool RegToMem::runOnFunction(Function &F) {
- if (F.isDeclaration() || skipFunction(F))
- return false;
-
+static bool runPass(Function &F) {
// Insert all new allocas into entry block.
BasicBlock *BBEntry = &F.getEntryBlock();
assert(pred_empty(BBEntry) &&
@@ -90,40 +70,72 @@ bool RegToMem::runOnFunction(Function &F) {
// Find the escaped instructions. But don't create stack slots for
// allocas in entry block.
std::list<Instruction*> WorkList;
- for (BasicBlock &ibb : F)
- for (BasicBlock::iterator iib = ibb.begin(), iie = ibb.end(); iib != iie;
- ++iib) {
- if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
- valueEscapes(&*iib)) {
- WorkList.push_front(&*iib);
- }
- }
+ for (Instruction &I : instructions(F))
+ if (!(isa<AllocaInst>(I) && I.getParent() == BBEntry) && valueEscapes(I))
+ WorkList.push_front(&I);
// Demote escaped instructions
NumRegsDemoted += WorkList.size();
- for (Instruction *ilb : WorkList)
- DemoteRegToStack(*ilb, false, AllocaInsertionPoint);
+ for (Instruction *I : WorkList)
+ DemoteRegToStack(*I, false, AllocaInsertionPoint);
WorkList.clear();
// Find all phi's
- for (BasicBlock &ibb : F)
- for (BasicBlock::iterator iib = ibb.begin(), iie = ibb.end(); iib != iie;
- ++iib)
- if (isa<PHINode>(iib))
- WorkList.push_front(&*iib);
+ for (BasicBlock &BB : F)
+ for (auto &Phi : BB.phis())
+ WorkList.push_front(&Phi);
// Demote phi nodes
NumPhisDemoted += WorkList.size();
- for (Instruction *ilb : WorkList)
- DemotePHIToStack(cast<PHINode>(ilb), AllocaInsertionPoint);
+ for (Instruction *I : WorkList)
+ DemotePHIToStack(cast<PHINode>(I), AllocaInsertionPoint);
return true;
}
+PreservedAnalyses RegToMemPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *LI = &AM.getResult<LoopAnalysis>(F);
+ unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
+ bool Changed = runPass(F);
+ if (N == 0 && !Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
+
+namespace {
+struct RegToMemLegacy : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ RegToMemLegacy() : FunctionPass(ID) {
+ initializeRegToMemLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addPreservedID(BreakCriticalEdgesID);
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (F.isDeclaration() || skipFunction(F))
+ return false;
+ return runPass(F);
+ }
+};
+} // namespace
+
+char RegToMemLegacy::ID = 0;
+INITIALIZE_PASS_BEGIN(RegToMemLegacy, "reg2mem",
+ "Demote all values to stack slots", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMemLegacy, "reg2mem",
+ "Demote all values to stack slots", false, false)
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
-char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
+char &llvm::DemoteRegisterToMemoryID = RegToMemLegacy::ID;
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
- return new RegToMem();
+ return new RegToMemLegacy();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index dc2ad14ae61e..b7830555bf73 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1487,7 +1487,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
uint32_t NumPatchBytes = 0;
uint32_t Flags = uint32_t(StatepointFlags::None);
- ArrayRef<Use> CallArgs(Call->arg_begin(), Call->arg_end());
+ SmallVector<Value *, 8> CallArgs(Call->args());
Optional<ArrayRef<Use>> DeoptArgs;
if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt))
DeoptArgs = Bundle->Inputs;
@@ -1520,7 +1520,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
Value *CallTarget = Call->getCalledOperand();
if (Function *F = dyn_cast<Function>(CallTarget)) {
- if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) {
+ auto IID = F->getIntrinsicID();
+ if (IID == Intrinsic::experimental_deoptimize) {
// Calls to llvm.experimental.deoptimize are lowered to calls to the
// __llvm_deoptimize symbol. We want to resolve this now, since the
// verifier does not allow taking the address of an intrinsic function.
@@ -1540,6 +1541,101 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
.getCallee();
IsDeoptimize = true;
+ } else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
+ IID == Intrinsic::memmove_element_unordered_atomic) {
+ // Unordered atomic memcpy and memmove intrinsics which are not explicitly
+ // marked as "gc-leaf-function" should be lowered in a GC parseable way.
+ // Specifically, these calls should be lowered to the
+ // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
+ // Similarly to __llvm_deoptimize we want to resolve this now, since the
+ // verifier does not allow taking the address of an intrinsic function.
+ //
+ // Moreover we need to shuffle the arguments for the call in order to
+ // accommodate GC. The underlying source and destination objects might be
+ // relocated during copy operation should the GC occur. To relocate the
+ // derived source and destination pointers the implementation of the
+ // intrinsic should know the corresponding base pointers.
+ //
+ // To make the base pointers available pass them explicitly as arguments:
+ // memcpy(dest_derived, source_derived, ...) =>
+ // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
+ auto &Context = Call->getContext();
+ auto &DL = Call->getModule()->getDataLayout();
+ auto GetBaseAndOffset = [&](Value *Derived) {
+ assert(Result.PointerToBase.count(Derived));
+ unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
+ unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
+ Value *Base = Result.PointerToBase.find(Derived)->second;
+ Value *Base_int = Builder.CreatePtrToInt(
+ Base, Type::getIntNTy(Context, IntPtrSize));
+ Value *Derived_int = Builder.CreatePtrToInt(
+ Derived, Type::getIntNTy(Context, IntPtrSize));
+ return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int));
+ };
+
+ auto *Dest = CallArgs[0];
+ Value *DestBase, *DestOffset;
+ std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest);
+
+ auto *Source = CallArgs[1];
+ Value *SourceBase, *SourceOffset;
+ std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source);
+
+ auto *LengthInBytes = CallArgs[2];
+ auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]);
+
+ CallArgs.clear();
+ CallArgs.push_back(DestBase);
+ CallArgs.push_back(DestOffset);
+ CallArgs.push_back(SourceBase);
+ CallArgs.push_back(SourceOffset);
+ CallArgs.push_back(LengthInBytes);
+
+ SmallVector<Type *, 8> DomainTy;
+ for (Value *Arg : CallArgs)
+ DomainTy.push_back(Arg->getType());
+ auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
+ /* isVarArg = */ false);
+
+ auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) {
+ uint64_t ElementSize = ElementSizeCI->getZExtValue();
+ if (IID == Intrinsic::memcpy_element_unordered_atomic) {
+ switch (ElementSize) {
+ case 1:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
+ case 2:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
+ case 4:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
+ case 8:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
+ case 16:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
+ default:
+ llvm_unreachable("unexpected element size!");
+ }
+ }
+ assert(IID == Intrinsic::memmove_element_unordered_atomic);
+ switch (ElementSize) {
+ case 1:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_1";
+ case 2:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_2";
+ case 4:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_4";
+ case 8:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_8";
+ case 16:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_16";
+ default:
+ llvm_unreachable("unexpected element size!");
+ }
+ };
+
+ CallTarget =
+ F->getParent()
+ ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy)
+ .getCallee();
}
}
@@ -1940,8 +2036,7 @@ static void relocationViaAlloca(
/// tests in ways which make them less useful in testing fused safepoints.
template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
SmallSet<T, 8> Seen;
- Vec.erase(remove_if(Vec, [&](const T &V) { return !Seen.insert(V).second; }),
- Vec.end());
+ erase_if(Vec, [&](const T &V) { return !Seen.insert(V).second; });
}
/// Insert holders so that each Value is obviously live through the entire
@@ -2013,10 +2108,10 @@ static Value* findRematerializableChainToBasePointer(
// Helper function for the "rematerializeLiveValues". Compute cost of the use
// chain we are going to rematerialize.
-static unsigned
-chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
+static InstructionCost
+chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain,
TargetTransformInfo &TTI) {
- unsigned Cost = 0;
+ InstructionCost Cost = 0;
for (Instruction *Instr : Chain) {
if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
@@ -2025,8 +2120,8 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
Type *SrcTy = CI->getOperand(0)->getType();
Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy,
- TargetTransformInfo::TCK_SizeAndLatency,
- CI);
+ TTI::getCastContextHint(CI),
+ TargetTransformInfo::TCK_SizeAndLatency, CI);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
// Cost of the address calculation
@@ -2123,7 +2218,7 @@ static void rematerializeLiveValues(CallBase *Call,
assert(Info.LiveSet.count(AlternateRootPhi));
}
// Compute cost of this chain
- unsigned Cost = chainToBasePointerCost(ChainToBase, TTI);
+ InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI);
// TODO: We can also account for cases when we will be able to remove some
// of the rematerialized values by later optimization passes. I.e if
// we rematerialized several intersecting chains. Or if original values
@@ -2404,8 +2499,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// That Value* no longer exists and we need to use the new gc_result.
// Thankfully, the live set is embedded in the statepoint (and updated), so
// we just grab that.
- Live.insert(Live.end(), Info.StatepointToken->gc_args_begin(),
- Info.StatepointToken->gc_args_end());
+ llvm::append_range(Live, Info.StatepointToken->gc_args());
#ifndef NDEBUG
// Do some basic sanity checks on our liveness results before performing
// relocation. Relocation can and will turn mistakes in liveness results
@@ -2581,8 +2675,27 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
auto NeedsRewrite = [&TLI](Instruction &I) {
- if (const auto *Call = dyn_cast<CallBase>(&I))
- return !callsGCLeafFunction(Call, TLI) && !isa<GCStatepointInst>(Call);
+ if (const auto *Call = dyn_cast<CallBase>(&I)) {
+ if (isa<GCStatepointInst>(Call))
+ return false;
+ if (callsGCLeafFunction(Call, TLI))
+ return false;
+
+ // Normally it's up to the frontend to make sure that non-leaf calls also
+ // have proper deopt state if it is required. We make an exception for
+ // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
+ // these are non-leaf by default. They might be generated by the optimizer
+ // which doesn't know how to produce a proper deopt state. So if we see a
+ // non-leaf memcpy/memmove without deopt state just treat it as a leaf
+ // copy and don't produce a statepoint.
+ if (!AllowStatepointWithNoDeoptInfo &&
+ !Call->getOperandBundle(LLVMContext::OB_deopt)) {
+ assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) &&
+ "Don't expect any other calls here!");
+ return false;
+ }
+ return true;
+ }
return false;
};
@@ -2620,10 +2733,8 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// of liveness sets for no good reason. It may be harder to do this post
// insertion since relocations and base phis can confuse things.
for (BasicBlock &BB : F)
- if (BB.getUniquePredecessor()) {
- MadeChange = true;
- FoldSingleEntryPHINodes(&BB);
- }
+ if (BB.getUniquePredecessor())
+ MadeChange |= FoldSingleEntryPHINodes(&BB);
// Before we start introducing relocations, we want to tweak the IR a bit to
// avoid unfortunate code generation effects. The main example is that we
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
index 5ebd3b71fe78..8feed9e9ebfe 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -33,6 +34,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -103,8 +105,7 @@ bool isConstant(const ValueLatticeElement &LV) {
// ValueLatticeElement::isOverdefined() and is intended to be used in the
// transition to ValueLatticeElement.
bool isOverdefined(const ValueLatticeElement &LV) {
- return LV.isOverdefined() ||
- (LV.isConstantRange() && !LV.getConstantRange().isSingleElement());
+ return !LV.isUnknownOrUndef() && !isConstant(LV);
}
//===----------------------------------------------------------------------===//
@@ -233,7 +234,7 @@ public:
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
TrackedMultipleRetVals.insert(
std::make_pair(std::make_pair(F, i), ValueLatticeElement()));
- } else
+ } else if (!F->getReturnType()->isVoidTy())
TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement()));
}
@@ -275,7 +276,7 @@ public:
// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
// block to the 'To' basic block is currently feasible.
- bool isEdgeFeasible(BasicBlock *From, BasicBlock *To);
+ bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const;
std::vector<ValueLatticeElement> getStructLatticeValueFor(Value *V) const {
std::vector<ValueLatticeElement> StructValues;
@@ -541,9 +542,14 @@ private:
auto Iter = AdditionalUsers.find(I);
if (Iter != AdditionalUsers.end()) {
+ // Copy additional users before notifying them of changes, because new
+ // users may be added, potentially invalidating the iterator.
+ SmallVector<Instruction *, 2> ToNotify;
for (User *U : Iter->second)
if (auto *UI = dyn_cast<Instruction>(U))
- OperandChangedState(UI);
+ ToNotify.push_back(UI);
+ for (Instruction *UI : ToNotify)
+ OperandChangedState(UI);
}
}
void handleCallOverdefined(CallBase &CB);
@@ -648,17 +654,30 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
Succs[0] = true;
return;
}
- ValueLatticeElement SCValue = getValueState(SI->getCondition());
- ConstantInt *CI = getConstantInt(SCValue);
+ const ValueLatticeElement &SCValue = getValueState(SI->getCondition());
+ if (ConstantInt *CI = getConstantInt(SCValue)) {
+ Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
+ return;
+ }
- if (!CI) { // Overdefined or unknown condition?
- // All destinations are executable!
- if (!SCValue.isUnknownOrUndef())
- Succs.assign(TI.getNumSuccessors(), true);
+ // TODO: Switch on undef is UB. Stop passing false once the rest of LLVM
+ // is ready.
+ if (SCValue.isConstantRange(/*UndefAllowed=*/false)) {
+ const ConstantRange &Range = SCValue.getConstantRange();
+ for (const auto &Case : SI->cases()) {
+ const APInt &CaseValue = Case.getCaseValue()->getValue();
+ if (Range.contains(CaseValue))
+ Succs[Case.getSuccessorIndex()] = true;
+ }
+
+ // TODO: Determine whether default case is reachable.
+ Succs[SI->case_default()->getSuccessorIndex()] = true;
return;
}
- Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
+ // Overdefined or unknown condition? All destinations are executable!
+ if (!SCValue.isUnknownOrUndef())
+ Succs.assign(TI.getNumSuccessors(), true);
return;
}
@@ -704,7 +723,7 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
// block to the 'To' basic block is currently feasible.
-bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
// Check if we've called markEdgeExecutable on the edge yet. (We could
// be more aggressive and try to consider edges which haven't been marked
// yet, but there isn't any need.)
@@ -829,6 +848,16 @@ void SCCPSolver::visitCastInst(CastInst &I) {
auto &LV = getValueState(&I);
ConstantRange OpRange = OpSt.getConstantRange();
Type *DestTy = I.getDestTy();
+ // Vectors where all elements have the same known constant range are treated
+ // as a single constant range in the lattice. When bitcasting such vectors,
+ // there is a mis-match between the width of the lattice value (single
+ // constant range) and the original operands (vector). Go to overdefined in
+ // that case.
+ if (I.getOpcode() == Instruction::BitCast &&
+ I.getOperand(0)->getType()->isVectorTy() &&
+ OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy))
+ return (void)markOverdefined(&I);
+
ConstantRange Res =
OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy));
mergeInValue(LV, &I, ValueLatticeElement::getRange(Res));
@@ -1109,7 +1138,9 @@ static ValueLatticeElement getValueFromMetadata(const Instruction *I) {
if (I->getType()->isIntegerTy())
return ValueLatticeElement::getRange(
getConstantRangeFromMetadata(*Ranges));
- // TODO: Also handle MD_nonnull.
+ if (I->hasMetadata(LLVMContext::MD_nonnull))
+ return ValueLatticeElement::getNot(
+ ConstantPointerNull::get(cast<PointerType>(I->getType())));
return ValueLatticeElement::getOverdefined();
}
@@ -1262,55 +1293,33 @@ void SCCPSolver::handleCallResult(CallBase &CB) {
auto *PI = getPredicateInfoFor(&CB);
assert(PI && "Missing predicate info for ssa.copy");
- CmpInst *Cmp;
- bool TrueEdge;
- if (auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
- Cmp = dyn_cast<CmpInst>(PBranch->Condition);
- TrueEdge = PBranch->TrueEdge;
- } else if (auto *PAssume = dyn_cast<PredicateAssume>(PI)) {
- Cmp = dyn_cast<CmpInst>(PAssume->Condition);
- TrueEdge = true;
- } else {
+ const Optional<PredicateConstraint> &Constraint = PI->getConstraint();
+ if (!Constraint) {
mergeInValue(ValueState[&CB], &CB, CopyOfVal);
return;
}
- // Everything below relies on the condition being a comparison.
- if (!Cmp) {
- mergeInValue(ValueState[&CB], &CB, CopyOfVal);
- return;
- }
+ CmpInst::Predicate Pred = Constraint->Predicate;
+ Value *OtherOp = Constraint->OtherOp;
- Value *RenamedOp = PI->RenamedOp;
- Value *CmpOp0 = Cmp->getOperand(0);
- Value *CmpOp1 = Cmp->getOperand(1);
- // Bail out if neither of the operands matches RenamedOp.
- if (CmpOp0 != RenamedOp && CmpOp1 != RenamedOp) {
- mergeInValue(ValueState[&CB], &CB, getValueState(CopyOf));
+ // Wait until OtherOp is resolved.
+ if (getValueState(OtherOp).isUnknown()) {
+ addAdditionalUser(OtherOp, &CB);
return;
}
- auto Pred = Cmp->getPredicate();
- if (CmpOp1 == RenamedOp) {
- std::swap(CmpOp0, CmpOp1);
- Pred = Cmp->getSwappedPredicate();
- }
-
- // Wait until CmpOp1 is resolved.
- if (getValueState(CmpOp1).isUnknown()) {
- addAdditionalUser(CmpOp1, &CB);
- return;
- }
-
- // The code below relies on PredicateInfo only inserting copies for the
- // true branch when the branch condition is an AND and only inserting
- // copies for the false branch when the branch condition is an OR. This
- // ensures we can intersect the range from the condition with the range of
- // CopyOf.
- if (!TrueEdge)
- Pred = CmpInst::getInversePredicate(Pred);
-
- ValueLatticeElement CondVal = getValueState(CmpOp1);
+ // TODO: Actually filp MayIncludeUndef for the created range to false,
+ // once most places in the optimizer respect the branches on
+ // undef/poison are UB rule. The reason why the new range cannot be
+ // undef is as follows below:
+ // The new range is based on a branch condition. That guarantees that
+ // neither of the compare operands can be undef in the branch targets,
+ // unless we have conditions that are always true/false (e.g. icmp ule
+ // i32, %a, i32_max). For the latter overdefined/empty range will be
+ // inferred, but the branch will get folded accordingly anyways.
+ bool MayIncludeUndef = !isa<PredicateAssume>(PI);
+
+ ValueLatticeElement CondVal = getValueState(OtherOp);
ValueLatticeElement &IV = ValueState[&CB];
if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) {
auto ImposedCR =
@@ -1334,30 +1343,47 @@ void SCCPSolver::handleCallResult(CallBase &CB) {
if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement())
NewCR = CopyOfCR;
- addAdditionalUser(CmpOp1, &CB);
- // TODO: Actually filp MayIncludeUndef for the created range to false,
- // once most places in the optimizer respect the branches on
- // undef/poison are UB rule. The reason why the new range cannot be
- // undef is as follows below:
- // The new range is based on a branch condition. That guarantees that
- // neither of the compare operands can be undef in the branch targets,
- // unless we have conditions that are always true/false (e.g. icmp ule
- // i32, %a, i32_max). For the latter overdefined/empty range will be
- // inferred, but the branch will get folded accordingly anyways.
+ addAdditionalUser(OtherOp, &CB);
mergeInValue(
IV, &CB,
- ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef=*/true));
+ ValueLatticeElement::getRange(NewCR, MayIncludeUndef));
return;
} else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) {
// For non-integer values or integer constant expressions, only
// propagate equal constants.
- addAdditionalUser(CmpOp1, &CB);
+ addAdditionalUser(OtherOp, &CB);
mergeInValue(IV, &CB, CondVal);
return;
+ } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() &&
+ !MayIncludeUndef) {
+ // Propagate inequalities.
+ addAdditionalUser(OtherOp, &CB);
+ mergeInValue(IV, &CB,
+ ValueLatticeElement::getNot(CondVal.getConstant()));
+ return;
}
return (void)mergeInValue(IV, &CB, CopyOfVal);
}
+
+ if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
+ // Compute result range for intrinsics supported by ConstantRange.
+ // Do this even if we don't know a range for all operands, as we may
+ // still know something about the result range, e.g. of abs(x).
+ SmallVector<ConstantRange, 2> OpRanges;
+ for (Value *Op : II->args()) {
+ const ValueLatticeElement &State = getValueState(Op);
+ if (State.isConstantRange())
+ OpRanges.push_back(State.getConstantRange());
+ else
+ OpRanges.push_back(
+ ConstantRange::getFull(Op->getType()->getScalarSizeInBits()));
+ }
+
+ ConstantRange Result =
+ ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges);
+ return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ }
}
// The common case is that we aren't tracking the callee, either because we
@@ -1427,8 +1453,7 @@ void SCCPSolver::Solve() {
// Process the basic block work list.
while (!BBWorkList.empty()) {
- BasicBlock *BB = BBWorkList.back();
- BBWorkList.pop_back();
+ BasicBlock *BB = BBWorkList.pop_back_val();
LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
@@ -1456,6 +1481,7 @@ void SCCPSolver::Solve() {
/// This scan also checks for values that use undefs. It conservatively marks
/// them as overdefined.
bool SCCPSolver::ResolvedUndefsIn(Function &F) {
+ bool MadeChange = false;
for (BasicBlock &BB : F) {
if (!BBExecutable.count(&BB))
continue;
@@ -1481,8 +1507,10 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// more precise than this but it isn't worth bothering.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
ValueLatticeElement &LV = getStructValueState(&I, i);
- if (LV.isUnknownOrUndef())
+ if (LV.isUnknownOrUndef()) {
markOverdefined(LV, &I);
+ MadeChange = true;
+ }
}
continue;
}
@@ -1509,7 +1537,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
}
markOverdefined(&I);
- return true;
+ MadeChange = true;
}
// Check to see if we have a branch or switch on an undefined value. If so
@@ -1526,7 +1554,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (isa<UndefValue>(BI->getCondition())) {
BI->setCondition(ConstantInt::getFalse(BI->getContext()));
markEdgeExecutable(&BB, TI->getSuccessor(1));
- return true;
+ MadeChange = true;
+ continue;
}
// Otherwise, it is a branch on a symbolic value which is currently
@@ -1535,7 +1564,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// FIXME: Distinguish between dead code and an LLVM "undef" value.
BasicBlock *DefaultSuccessor = TI->getSuccessor(1);
if (markEdgeExecutable(&BB, DefaultSuccessor))
- return true;
+ MadeChange = true;
continue;
}
@@ -1554,7 +1583,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (isa<UndefValue>(IBR->getAddress())) {
IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0)));
markEdgeExecutable(&BB, IBR->getSuccessor(0));
- return true;
+ MadeChange = true;
+ continue;
}
// Otherwise, it is a branch on a symbolic value which is currently
@@ -1564,7 +1594,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// we can assume the branch has undefined behavior instead.
BasicBlock *DefaultSuccessor = IBR->getSuccessor(0);
if (markEdgeExecutable(&BB, DefaultSuccessor))
- return true;
+ MadeChange = true;
continue;
}
@@ -1579,7 +1609,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (isa<UndefValue>(SI->getCondition())) {
SI->setCondition(SI->case_begin()->getCaseValue());
markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor());
- return true;
+ MadeChange = true;
+ continue;
}
// Otherwise, it is a branch on a symbolic value which is currently
@@ -1588,13 +1619,13 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// FIXME: Distinguish between dead code and an LLVM "undef" value.
BasicBlock *DefaultSuccessor = SI->case_begin()->getCaseSuccessor();
if (markEdgeExecutable(&BB, DefaultSuccessor))
- return true;
+ MadeChange = true;
continue;
}
}
- return false;
+ return MadeChange;
}
static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
@@ -1716,7 +1747,7 @@ static bool runSCCP(Function &F, const DataLayout &DL,
LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB);
++NumDeadBlocks;
- NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB);
+ NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB).first;
MadeChanges = true;
continue;
@@ -1839,39 +1870,68 @@ static void findReturnsToZap(Function &F,
}
}
-// Update the condition for terminators that are branching on indeterminate
-// values, forcing them to use a specific edge.
-static void forceIndeterminateEdge(Instruction* I, SCCPSolver &Solver) {
- BasicBlock *Dest = nullptr;
- Constant *C = nullptr;
- if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
- if (!isa<ConstantInt>(SI->getCondition())) {
- // Indeterminate switch; use first case value.
- Dest = SI->case_begin()->getCaseSuccessor();
- C = SI->case_begin()->getCaseValue();
- }
- } else if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- if (!isa<ConstantInt>(BI->getCondition())) {
- // Indeterminate branch; use false.
- Dest = BI->getSuccessor(1);
- C = ConstantInt::getFalse(BI->getContext());
+static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
+ DomTreeUpdater &DTU) {
+ SmallPtrSet<BasicBlock *, 8> FeasibleSuccessors;
+ bool HasNonFeasibleEdges = false;
+ for (BasicBlock *Succ : successors(BB)) {
+ if (Solver.isEdgeFeasible(BB, Succ))
+ FeasibleSuccessors.insert(Succ);
+ else
+ HasNonFeasibleEdges = true;
+ }
+
+ // All edges feasible, nothing to do.
+ if (!HasNonFeasibleEdges)
+ return false;
+
+ // SCCP can only determine non-feasible edges for br, switch and indirectbr.
+ Instruction *TI = BB->getTerminator();
+ assert((isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
+ isa<IndirectBrInst>(TI)) &&
+ "Terminator must be a br, switch or indirectbr");
+
+ if (FeasibleSuccessors.size() == 1) {
+ // Replace with an unconditional branch to the only feasible successor.
+ BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin();
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ bool HaveSeenOnlyFeasibleSuccessor = false;
+ for (BasicBlock *Succ : successors(BB)) {
+ if (Succ == OnlyFeasibleSuccessor && !HaveSeenOnlyFeasibleSuccessor) {
+ // Don't remove the edge to the only feasible successor the first time
+ // we see it. We still do need to remove any multi-edges to it though.
+ HaveSeenOnlyFeasibleSuccessor = true;
+ continue;
+ }
+
+ Succ->removePredecessor(BB);
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
- } else if (IndirectBrInst *IBR = dyn_cast<IndirectBrInst>(I)) {
- if (!isa<BlockAddress>(IBR->getAddress()->stripPointerCasts())) {
- // Indeterminate indirectbr; use successor 0.
- Dest = IBR->getSuccessor(0);
- C = BlockAddress::get(IBR->getSuccessor(0));
+
+ BranchInst::Create(OnlyFeasibleSuccessor, BB);
+ TI->eraseFromParent();
+ DTU.applyUpdatesPermissive(Updates);
+ } else if (FeasibleSuccessors.size() > 1) {
+ SwitchInstProfUpdateWrapper SI(*cast<SwitchInst>(TI));
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ for (auto CI = SI->case_begin(); CI != SI->case_end();) {
+ if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) {
+ ++CI;
+ continue;
+ }
+
+ BasicBlock *Succ = CI->getCaseSuccessor();
+ Succ->removePredecessor(BB);
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ SI.removeCase(CI);
+ // Don't increment CI, as we removed a case.
}
- } else {
- llvm_unreachable("Unexpected terminator instruction");
- }
- if (C) {
- assert(Solver.isEdgeFeasible(I->getParent(), Dest) &&
- "Didn't find feasible edge?");
- (void)Dest;
- I->setOperand(0, C);
+ DTU.applyUpdatesPermissive(Updates);
+ } else {
+ llvm_unreachable("Must have at least one feasible successor");
}
+ return true;
}
bool llvm::runIPSCCP(
@@ -1923,13 +1983,12 @@ bool llvm::runIPSCCP(
while (ResolvedUndefs) {
LLVM_DEBUG(dbgs() << "RESOLVING UNDEFS\n");
ResolvedUndefs = false;
- for (Function &F : M)
- if (Solver.ResolvedUndefsIn(F)) {
- // We run Solve() after we resolved an undef in a function, because
- // we might deduce a fact that eliminates an undef in another function.
- Solver.Solve();
+ for (Function &F : M) {
+ if (Solver.ResolvedUndefsIn(F))
ResolvedUndefs = true;
- }
+ }
+ if (ResolvedUndefs)
+ Solver.Solve();
}
bool MadeChanges = false;
@@ -1943,15 +2002,35 @@ bool llvm::runIPSCCP(
SmallVector<BasicBlock *, 512> BlocksToErase;
- if (Solver.isBlockExecutable(&F.front()))
- for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;
- ++AI) {
- if (!AI->use_empty() && tryToReplaceWithConstant(Solver, &*AI)) {
+ if (Solver.isBlockExecutable(&F.front())) {
+ bool ReplacedPointerArg = false;
+ for (Argument &Arg : F.args()) {
+ if (!Arg.use_empty() && tryToReplaceWithConstant(Solver, &Arg)) {
+ ReplacedPointerArg |= Arg.getType()->isPointerTy();
++IPNumArgsElimed;
- continue;
}
}
+ // If we replaced an argument, the argmemonly and
+ // inaccessiblemem_or_argmemonly attributes do not hold any longer. Remove
+ // them from both the function and callsites.
+ if (ReplacedPointerArg) {
+ AttrBuilder AttributesToRemove;
+ AttributesToRemove.addAttribute(Attribute::ArgMemOnly);
+ AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
+ F.removeAttributes(AttributeList::FunctionIndex, AttributesToRemove);
+
+ for (User *U : F.users()) {
+ auto *CB = dyn_cast<CallBase>(U);
+ if (!CB || CB->getCalledFunction() != &F)
+ continue;
+
+ CB->removeAttributes(AttributeList::FunctionIndex,
+ AttributesToRemove);
+ }
+ }
+ }
+
SmallPtrSet<Value *, 32> InsertedValues;
for (BasicBlock &BB : F) {
if (!Solver.isBlockExecutable(&BB)) {
@@ -1984,45 +2063,11 @@ bool llvm::runIPSCCP(
/*UseLLVMTrap=*/false,
/*PreserveLCSSA=*/false, &DTU);
- // Now that all instructions in the function are constant folded,
- // use ConstantFoldTerminator to get rid of in-edges, record DT updates and
- // delete dead BBs.
- for (BasicBlock *DeadBB : BlocksToErase) {
- // If there are any PHI nodes in this successor, drop entries for BB now.
- for (Value::user_iterator UI = DeadBB->user_begin(),
- UE = DeadBB->user_end();
- UI != UE;) {
- // Grab the user and then increment the iterator early, as the user
- // will be deleted. Step past all adjacent uses from the same user.
- auto *I = dyn_cast<Instruction>(*UI);
- do { ++UI; } while (UI != UE && *UI == I);
-
- // Ignore blockaddress users; BasicBlock's dtor will handle them.
- if (!I) continue;
-
- // If we have forced an edge for an indeterminate value, then force the
- // terminator to fold to that edge.
- forceIndeterminateEdge(I, Solver);
- BasicBlock *InstBB = I->getParent();
- bool Folded = ConstantFoldTerminator(InstBB,
- /*DeleteDeadConditions=*/false,
- /*TLI=*/nullptr, &DTU);
- assert(Folded &&
- "Expect TermInst on constantint or blockaddress to be folded");
- (void) Folded;
- // If we folded the terminator to an unconditional branch to another
- // dead block, replace it with Unreachable, to avoid trying to fold that
- // branch again.
- BranchInst *BI = cast<BranchInst>(InstBB->getTerminator());
- if (BI && BI->isUnconditional() &&
- !Solver.isBlockExecutable(BI->getSuccessor(0))) {
- InstBB->getTerminator()->eraseFromParent();
- new UnreachableInst(InstBB->getContext(), InstBB);
- }
- }
- // Mark dead BB for deletion.
+ for (BasicBlock &BB : F)
+ MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU);
+
+ for (BasicBlock *DeadBB : BlocksToErase)
DTU.deleteBB(DeadBB);
- }
for (BasicBlock &BB : F) {
for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
@@ -2054,9 +2099,47 @@ bool llvm::runIPSCCP(
for (const auto &I : Solver.getTrackedRetVals()) {
Function *F = I.first;
- if (isOverdefined(I.second) || F->getReturnType()->isVoidTy())
+ const ValueLatticeElement &ReturnValue = I.second;
+
+ // If there is a known constant range for the return value, add !range
+ // metadata to the function's call sites.
+ if (ReturnValue.isConstantRange() &&
+ !ReturnValue.getConstantRange().isSingleElement()) {
+ // Do not add range metadata if the return value may include undef.
+ if (ReturnValue.isConstantRangeIncludingUndef())
+ continue;
+
+ auto &CR = ReturnValue.getConstantRange();
+ for (User *User : F->users()) {
+ auto *CB = dyn_cast<CallBase>(User);
+ if (!CB || CB->getCalledFunction() != F)
+ continue;
+
+ // Limit to cases where the return value is guaranteed to be neither
+ // poison nor undef. Poison will be outside any range and currently
+ // values outside of the specified range cause immediate undefined
+ // behavior.
+ if (!isGuaranteedNotToBeUndefOrPoison(CB, nullptr, CB))
+ continue;
+
+ // Do not touch existing metadata for now.
+ // TODO: We should be able to take the intersection of the existing
+ // metadata and the inferred range.
+ if (CB->getMetadata(LLVMContext::MD_range))
+ continue;
+
+ LLVMContext &Context = CB->getParent()->getContext();
+ Metadata *RangeMD[] = {
+ ConstantAsMetadata::get(ConstantInt::get(Context, CR.getLower())),
+ ConstantAsMetadata::get(ConstantInt::get(Context, CR.getUpper()))};
+ CB->setMetadata(LLVMContext::MD_range, MDNode::get(Context, RangeMD));
+ }
continue;
- findReturnsToZap(*F, ReturnsToZap, Solver);
+ }
+ if (F->getReturnType()->isVoidTy())
+ continue;
+ if (isConstant(ReturnValue) || ReturnValue.isUnknownOrUndef())
+ findReturnsToZap(*F, ReturnsToZap, Solver);
}
for (auto F : Solver.getMRVFunctionsTracked()) {
@@ -2068,9 +2151,27 @@ bool llvm::runIPSCCP(
}
// Zap all returns which we've identified as zap to change.
+ SmallSetVector<Function *, 8> FuncZappedReturn;
for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
Function *F = ReturnsToZap[i]->getParent()->getParent();
ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
+ // Record all functions that are zapped.
+ FuncZappedReturn.insert(F);
+ }
+
+ // Remove the returned attribute for zapped functions and the
+ // corresponding call sites.
+ for (Function *F : FuncZappedReturn) {
+ for (Argument &A : F->args())
+ F->removeParamAttr(A.getArgNo(), Attribute::Returned);
+ for (Use &U : F->uses()) {
+ // Skip over blockaddr users.
+ if (isa<BlockAddress>(U.getUser()))
+ continue;
+ CallBase *CB = cast<CallBase>(U.getUser());
+ for (Use &Arg : CB->args())
+ CB->removeParamAttr(CB->getArgOperandNo(&Arg), Attribute::Returned);
+ }
}
// If we inferred constant or undef values for globals variables, we can
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
index 89f324deef9f..af510f1a84bf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -268,6 +268,11 @@ public:
/// Access the dead users for this alloca.
ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
+ /// Access Uses that should be dropped if the alloca is promotable.
+ ArrayRef<Use *> getDeadUsesIfPromotable() const {
+ return DeadUseIfPromotable;
+ }
+
/// Access the dead operands referring to this alloca.
///
/// These are operands which have cannot actually be used to refer to the
@@ -322,6 +327,9 @@ private:
/// they come from outside of the allocated space.
SmallVector<Instruction *, 8> DeadUsers;
+ /// Uses which will become dead if can promote the alloca.
+ SmallVector<Use *, 8> DeadUseIfPromotable;
+
/// Operands which will become dead if we rewrite the alloca.
///
/// These are operands that in their particular use can be replaced with
@@ -459,12 +467,8 @@ class AllocaSlices::partition_iterator
// Remove the uses which have ended in the prior partition. This
// cannot change the max split slice end because we just checked that
// the prior partition ended prior to that max.
- P.SplitTails.erase(llvm::remove_if(P.SplitTails,
- [&](Slice *S) {
- return S->endOffset() <=
- P.EndOffset;
- }),
- P.SplitTails.end());
+ llvm::erase_if(P.SplitTails,
+ [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
assert(llvm::any_of(P.SplitTails,
[&](Slice *S) {
return S->endOffset() == MaxSplitSliceEndOffset;
@@ -780,6 +784,9 @@ private:
LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
return PI.setAborted(&LI);
+ if (isa<ScalableVectorType>(LI.getType()))
+ return PI.setAborted(&LI);
+
uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize();
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
@@ -795,6 +802,9 @@ private:
SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
return PI.setAborted(&SI);
+ if (isa<ScalableVectorType>(ValOp->getType()))
+ return PI.setAborted(&SI);
+
uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize();
// If this memory access can be shown to *statically* extend outside the
@@ -920,6 +930,11 @@ private:
// FIXME: What about debug intrinsics? This matches old behavior, but
// doesn't make sense.
void visitIntrinsicInst(IntrinsicInst &II) {
+ if (II.isDroppable()) {
+ AS.DeadUseIfPromotable.push_back(U);
+ return;
+ }
+
if (!IsOffsetKnown)
return PI.setAborted(&II);
@@ -1057,13 +1072,11 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
return;
}
- Slices.erase(
- llvm::remove_if(Slices, [](const Slice &S) { return S.isDead(); }),
- Slices.end());
+ llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
- std::stable_sort(Slices.begin(), Slices.end());
+ llvm::stable_sort(Slices);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1109,9 +1122,9 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
/// Walk the range of a partitioning looking for a common type to cover this
/// sequence of slices.
-static Type *findCommonType(AllocaSlices::const_iterator B,
- AllocaSlices::const_iterator E,
- uint64_t EndOffset) {
+static std::pair<Type *, IntegerType *>
+findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
+ uint64_t EndOffset) {
Type *Ty = nullptr;
bool TyIsCommon = true;
IntegerType *ITy = nullptr;
@@ -1155,7 +1168,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
Ty = UserTy;
}
- return TyIsCommon ? Ty : ITy;
+ return {TyIsCommon ? Ty : nullptr, ITy};
}
/// PHI instructions that use an alloca and are subsequently loaded can be
@@ -1379,7 +1392,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
- SmallVectorImpl<Value *> &Indices, Twine NamePrefix) {
+ SmallVectorImpl<Value *> &Indices,
+ const Twine &NamePrefix) {
if (Indices.empty())
return BasePtr;
@@ -1404,7 +1418,7 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
Value *BasePtr, Type *Ty, Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
- Twine NamePrefix) {
+ const Twine &NamePrefix) {
if (Ty == TargetTy)
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
@@ -1449,7 +1463,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
- Twine NamePrefix) {
+ const Twine &NamePrefix) {
if (Offset == 0)
return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices,
NamePrefix);
@@ -1524,7 +1538,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, APInt Offset, Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
- Twine NamePrefix) {
+ const Twine &NamePrefix) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1535,6 +1549,8 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
return nullptr; // We can't GEP through an unsized element.
+ if (isa<ScalableVectorType>(ElementTy))
+ return nullptr;
APInt ElementSize(Offset.getBitWidth(),
DL.getTypeAllocSize(ElementTy).getFixedSize());
if (ElementSize == 0)
@@ -1563,7 +1579,8 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
- APInt Offset, Type *PointerTy, Twine NamePrefix) {
+ APInt Offset, Type *PointerTy,
+ const Twine &NamePrefix) {
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1825,7 +1842,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (!II->isLifetimeStartOrEnd())
+ if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
} else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
@@ -1909,12 +1926,9 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// do that until all the backends are known to produce good code for all
// integer vector types.
if (!HaveCommonEltTy) {
- CandidateTys.erase(
- llvm::remove_if(CandidateTys,
- [](VectorType *VTy) {
- return !VTy->getElementType()->isIntegerTy();
- }),
- CandidateTys.end());
+ llvm::erase_if(CandidateTys, [](VectorType *VTy) {
+ return !VTy->getElementType()->isIntegerTy();
+ });
// If there were no integer vector types, give up.
if (CandidateTys.empty())
@@ -2058,7 +2072,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (!II->isLifetimeStartOrEnd())
+ if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
} else {
return false;
@@ -2099,8 +2113,7 @@ static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
// that we cover the alloca.
// FIXME: We shouldn't consider split slices that happen to start in the
// partition here...
- bool WholeAllocaOp =
- P.begin() != P.end() ? false : DL.isLegalInteger(SizeInBits);
+ bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
for (const Slice &S : P)
if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
@@ -2193,8 +2206,7 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
Mask.reserve(NumElements);
for (unsigned i = BeginIndex; i != EndIndex; ++i)
Mask.push_back(i);
- V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), Mask,
- Name + ".extract");
+ V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
return V;
}
@@ -2227,22 +2239,22 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
// use a shuffle vector to widen it with undef elements, and then
// a second shuffle vector to select between the loaded vector and the
// incoming vector.
- SmallVector<Constant *, 8> Mask;
+ SmallVector<int, 8> Mask;
Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(IRB.getInt32(i - BeginIndex));
+ Mask.push_back(i - BeginIndex);
else
- Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
- V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
- ConstantVector::get(Mask), Name + ".expand");
+ Mask.push_back(-1);
+ V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
- Mask.clear();
+ SmallVector<Constant *, 8> Mask2;
+ Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
- Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
+ Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
- V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
+ V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
@@ -2446,7 +2458,7 @@ private:
void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
- Pass.DeadInsts.insert(I);
+ Pass.DeadInsts.push_back(I);
}
Value *rewriteVectorizedLoadInst() {
@@ -2512,7 +2524,7 @@ private:
NewAI.getAlign(), LI.isVolatile(),
LI.getName());
if (AATags)
- NewLI->setAAMetadata(AATags);
+ NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
if (NewLI->isAtomic())
@@ -2551,7 +2563,7 @@ private:
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(), LI.isVolatile(), LI.getName());
if (AATags)
- NewLI->setAAMetadata(AATags);
+ NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
@@ -2586,7 +2598,7 @@ private:
LI.replaceAllUsesWith(V);
}
- Pass.DeadInsts.insert(&LI);
+ Pass.DeadInsts.push_back(&LI);
deleteIfTriviallyDead(OldOp);
LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
return !LI.isVolatile() && !IsPtrAdjusted;
@@ -2614,8 +2626,8 @@ private:
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
if (AATags)
- Store->setAAMetadata(AATags);
- Pass.DeadInsts.insert(&SI);
+ Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Pass.DeadInsts.push_back(&SI);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
@@ -2638,8 +2650,8 @@ private:
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- Store->setAAMetadata(AATags);
- Pass.DeadInsts.insert(&SI);
+ Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Pass.DeadInsts.push_back(&SI);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
@@ -2708,12 +2720,12 @@ private:
NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- NewSI->setAAMetadata(AATags);
+ NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (SI.isVolatile())
NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
if (NewSI->isAtomic())
NewSI->setAlignment(SI.getAlign());
- Pass.DeadInsts.insert(&SI);
+ Pass.DeadInsts.push_back(&SI);
deleteIfTriviallyDead(OldOp);
LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
@@ -2774,11 +2786,11 @@ private:
}
// Record this instruction for deletion.
- Pass.DeadInsts.insert(&II);
+ Pass.DeadInsts.push_back(&II);
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
-
+
const bool CanContinue = [&]() {
if (VecTy || IntTy)
return true;
@@ -2804,7 +2816,7 @@ private:
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
MaybeAlign(getSliceAlign()), II.isVolatile());
if (AATags)
- New->setAAMetadata(AATags);
+ New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
@@ -2873,7 +2885,7 @@ private:
StoreInst *New =
IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
if (AATags)
- New->setAAMetadata(AATags);
+ New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
}
@@ -2944,7 +2956,7 @@ private:
return false;
}
// Record this instruction for deletion.
- Pass.DeadInsts.insert(&II);
+ Pass.DeadInsts.push_back(&II);
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after rewriting this instruction.
@@ -2994,7 +3006,7 @@ private:
CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
Size, II.isVolatile());
if (AATags)
- New->setAAMetadata(AATags);
+ New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
@@ -3048,7 +3060,7 @@ private:
LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
II.isVolatile(), "copyload");
if (AATags)
- Load->setAAMetadata(AATags);
+ Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
Src = Load;
}
@@ -3068,19 +3080,27 @@ private:
StoreInst *Store = cast<StoreInst>(
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
if (AATags)
- Store->setAAMetadata(AATags);
+ Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
}
bool visitIntrinsicInst(IntrinsicInst &II) {
- assert(II.isLifetimeStartOrEnd());
+ assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
+ "Unexpected intrinsic!");
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
- assert(II.getArgOperand(1) == OldPtr);
// Record this instruction for deletion.
- Pass.DeadInsts.insert(&II);
+ Pass.DeadInsts.push_back(&II);
+ if (II.isDroppable()) {
+ assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
+ // TODO For now we forget assumed information, this can be improved.
+ OldPtr->dropDroppableUsesIn(II);
+ return true;
+ }
+
+ assert(II.getArgOperand(1) == OldPtr);
// Lifetime intrinsics are only promotable if they cover the whole alloca.
// Therefore, we drop lifetime intrinsics which don't cover the whole
// alloca.
@@ -3361,8 +3381,13 @@ private:
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
LoadInst *Load =
IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
- if (AATags)
- Load->setAAMetadata(AATags);
+
+ APInt Offset(
+ DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
+ if (AATags &&
+ GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
+ Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
}
@@ -3408,8 +3433,13 @@ private:
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
StoreInst *Store =
IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
- if (AATags)
- Store->setAAMetadata(AATags);
+
+ APInt Offset(
+ DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
+ if (AATags &&
+ GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
+ Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
}
};
@@ -3455,7 +3485,7 @@ private:
<< "\n " << GEPI);
IRBuilderTy Builder(&GEPI);
- SmallVector<Value *, 4> Index(GEPI.idx_begin(), GEPI.idx_end());
+ SmallVector<Value *, 4> Index(GEPI.indices());
bool IsInBounds = GEPI.isInBounds();
Value *True = Sel->getTrueValue();
@@ -3509,20 +3539,27 @@ private:
<< "\n " << GEPI
<< "\n to: ");
- SmallVector<Value *, 4> Index(GEPI.idx_begin(), GEPI.idx_end());
+ SmallVector<Value *, 4> Index(GEPI.indices());
bool IsInBounds = GEPI.isInBounds();
IRBuilderTy PHIBuilder(GEPI.getParent()->getFirstNonPHI());
PHINode *NewPN = PHIBuilder.CreatePHI(GEPI.getType(),
PHI->getNumIncomingValues(),
PHI->getName() + ".sroa.phi");
for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
- Instruction *In = cast<Instruction>(PHI->getIncomingValue(I));
+ BasicBlock *B = PHI->getIncomingBlock(I);
+ Value *NewVal = nullptr;
+ int Idx = NewPN->getBasicBlockIndex(B);
+ if (Idx >= 0) {
+ NewVal = NewPN->getIncomingValue(Idx);
+ } else {
+ Instruction *In = cast<Instruction>(PHI->getIncomingValue(I));
- IRBuilderTy B(In->getParent(), std::next(In->getIterator()));
- Value *NewVal = IsInBounds
- ? B.CreateInBoundsGEP(In, Index, In->getName() + ".sroa.gep")
- : B.CreateGEP(In, Index, In->getName() + ".sroa.gep");
- NewPN->addIncoming(NewVal, PHI->getIncomingBlock(I));
+ IRBuilderTy B(In->getParent(), std::next(In->getIterator()));
+ NewVal = IsInBounds
+ ? B.CreateInBoundsGEP(In, Index, In->getName() + ".sroa.gep")
+ : B.CreateGEP(In, Index, In->getName() + ".sroa.gep");
+ }
+ NewPN->addIncoming(NewVal, B);
}
Visited.erase(&GEPI);
@@ -3864,63 +3901,53 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// such loads and stores, we can only pre-split them if their splits exactly
// match relative to their starting offset. We have to verify this prior to
// any rewriting.
- Stores.erase(
- llvm::remove_if(Stores,
- [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
- // Lookup the load we are storing in our map of split
- // offsets.
- auto *LI = cast<LoadInst>(SI->getValueOperand());
- // If it was completely unsplittable, then we're done,
- // and this store can't be pre-split.
- if (UnsplittableLoads.count(LI))
- return true;
-
- auto LoadOffsetsI = SplitOffsetsMap.find(LI);
- if (LoadOffsetsI == SplitOffsetsMap.end())
- return false; // Unrelated loads are definitely safe.
- auto &LoadOffsets = LoadOffsetsI->second;
-
- // Now lookup the store's offsets.
- auto &StoreOffsets = SplitOffsetsMap[SI];
-
- // If the relative offsets of each split in the load and
- // store match exactly, then we can split them and we
- // don't need to remove them here.
- if (LoadOffsets.Splits == StoreOffsets.Splits)
- return false;
-
- LLVM_DEBUG(
- dbgs()
- << " Mismatched splits for load and store:\n"
- << " " << *LI << "\n"
- << " " << *SI << "\n");
-
- // We've found a store and load that we need to split
- // with mismatched relative splits. Just give up on them
- // and remove both instructions from our list of
- // candidates.
- UnsplittableLoads.insert(LI);
- return true;
- }),
- Stores.end());
+ llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
+ // Lookup the load we are storing in our map of split
+ // offsets.
+ auto *LI = cast<LoadInst>(SI->getValueOperand());
+ // If it was completely unsplittable, then we're done,
+ // and this store can't be pre-split.
+ if (UnsplittableLoads.count(LI))
+ return true;
+
+ auto LoadOffsetsI = SplitOffsetsMap.find(LI);
+ if (LoadOffsetsI == SplitOffsetsMap.end())
+ return false; // Unrelated loads are definitely safe.
+ auto &LoadOffsets = LoadOffsetsI->second;
+
+ // Now lookup the store's offsets.
+ auto &StoreOffsets = SplitOffsetsMap[SI];
+
+ // If the relative offsets of each split in the load and
+ // store match exactly, then we can split them and we
+ // don't need to remove them here.
+ if (LoadOffsets.Splits == StoreOffsets.Splits)
+ return false;
+
+ LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
+ << " " << *LI << "\n"
+ << " " << *SI << "\n");
+
+ // We've found a store and load that we need to split
+ // with mismatched relative splits. Just give up on them
+ // and remove both instructions from our list of
+ // candidates.
+ UnsplittableLoads.insert(LI);
+ return true;
+ });
// Now we have to go *back* through all the stores, because a later store may
// have caused an earlier store's load to become unsplittable and if it is
// unsplittable for the later store, then we can't rely on it being split in
// the earlier store either.
- Stores.erase(llvm::remove_if(Stores,
- [&UnsplittableLoads](StoreInst *SI) {
- auto *LI =
- cast<LoadInst>(SI->getValueOperand());
- return UnsplittableLoads.count(LI);
- }),
- Stores.end());
+ llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
+ auto *LI = cast<LoadInst>(SI->getValueOperand());
+ return UnsplittableLoads.count(LI);
+ });
// Once we've established all the loads that can't be split for some reason,
// filter any that made it into our list out.
- Loads.erase(llvm::remove_if(Loads,
- [&UnsplittableLoads](LoadInst *LI) {
- return UnsplittableLoads.count(LI);
- }),
- Loads.end());
+ llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
+ return UnsplittableLoads.count(LI);
+ });
// If no loads or stores are left, there is no pre-splitting to be done for
// this alloca.
@@ -4057,7 +4084,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
// Mark the original store as dead.
- DeadInsts.insert(SI);
+ DeadInsts.push_back(SI);
}
// Save the split loads if there are deferred stores among the users.
@@ -4065,7 +4092,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
// Mark the original load as dead and kill the original slice.
- DeadInsts.insert(LI);
+ DeadInsts.push_back(LI);
Offsets.S->kill();
}
@@ -4187,15 +4214,14 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// trivial CSE, including instcombine.
if (LI->hasOneUse()) {
assert(*LI->user_begin() == SI && "Single use isn't this store!");
- DeadInsts.insert(LI);
+ DeadInsts.push_back(LI);
}
- DeadInsts.insert(SI);
+ DeadInsts.push_back(SI);
Offsets.S->kill();
}
// Remove the killed slices that have ben pre-split.
- AS.erase(llvm::remove_if(AS, [](const Slice &S) { return S.isDead(); }),
- AS.end());
+ llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
// Insert our new slices. This will sort and merge them into the sorted
// sequence.
@@ -4209,11 +4235,9 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// Finally, don't try to promote any allocas that new require re-splitting.
// They have already been added to the worklist above.
- PromotableAllocas.erase(
- llvm::remove_if(
- PromotableAllocas,
- [&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }),
- PromotableAllocas.end());
+ llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) {
+ return ResplitPromotableAllocas.count(AI);
+ });
return true;
}
@@ -4235,13 +4259,21 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// or an i8 array of an appropriate size.
Type *SliceTy = nullptr;
const DataLayout &DL = AI.getModule()->getDataLayout();
- if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
- if (DL.getTypeAllocSize(CommonUseTy).getFixedSize() >= P.size())
- SliceTy = CommonUseTy;
+ std::pair<Type *, IntegerType *> CommonUseTy =
+ findCommonType(P.begin(), P.end(), P.endOffset());
+ // Do all uses operate on the same type?
+ if (CommonUseTy.first)
+ if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size())
+ SliceTy = CommonUseTy.first;
+ // If not, can we find an appropriate subtype in the original allocated type?
if (!SliceTy)
if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
P.beginOffset(), P.size()))
SliceTy = TypePartitionTy;
+ // If still not, can we use the largest bitwidth integer type used?
+ if (!SliceTy && CommonUseTy.second)
+ if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size())
+ SliceTy = CommonUseTy.second;
if ((!SliceTy || (SliceTy->isArrayTy() &&
SliceTy->getArrayElementType()->isIntegerTy())) &&
DL.isLegalInteger(P.size() * 8))
@@ -4331,6 +4363,13 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
}
if (Promotable) {
+ for (Use *U : AS.getDeadUsesIfPromotable()) {
+ auto *OldInst = dyn_cast<Instruction>(U->get());
+ Value::dropDroppableUse(*U);
+ if (OldInst)
+ if (isInstructionTriviallyDead(OldInst))
+ DeadInsts.push_back(OldInst);
+ }
if (PHIUsers.empty() && SelectUsers.empty()) {
// Promote the alloca.
PromotableAllocas.push_back(NewAI);
@@ -4465,10 +4504,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
- if (!DbgDeclares.empty()) {
- auto *Var = DbgDeclares.front()->getVariable();
- auto *Expr = DbgDeclares.front()->getExpression();
- auto VarSize = Var->getSizeInBits();
+ for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) {
+ auto *Expr = DbgDeclare->getExpression();
DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
uint64_t AllocaSize =
DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedSize();
@@ -4499,6 +4536,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
// The alloca may be larger than the variable.
+ auto VarSize = DbgDeclare->getVariable()->getSizeInBits();
if (VarSize) {
if (Size > *VarSize)
Size = *VarSize;
@@ -4516,12 +4554,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
}
- // Remove any existing intrinsics describing the same alloca.
- for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca))
- OldDII->eraseFromParent();
+ // Remove any existing intrinsics on the new alloca describing
+ // the variable fragment.
+ for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) {
+ auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS,
+ const DbgVariableIntrinsic *RHS) {
+ return LHS->getVariable() == RHS->getVariable() &&
+ LHS->getDebugLoc()->getInlinedAt() ==
+ RHS->getDebugLoc()->getInlinedAt();
+ };
+ if (SameVariableFragment(OldDII, DbgDeclare))
+ OldDII->eraseFromParent();
+ }
- DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
- DbgDeclares.front()->getDebugLoc(), &AI);
+ DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr,
+ DbgDeclare->getDebugLoc(), &AI);
}
}
return Changed;
@@ -4538,7 +4585,7 @@ void SROA::clobberUse(Use &U) {
// minimal.
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
if (isInstructionTriviallyDead(OldI)) {
- DeadInsts.insert(OldI);
+ DeadInsts.push_back(OldI);
}
}
@@ -4587,7 +4634,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType()));
// And mark it for deletion.
- DeadInsts.insert(DeadUser);
+ DeadInsts.push_back(DeadUser);
Changed = true;
}
for (Use *DeadOp : AS.getDeadOperands()) {
@@ -4625,7 +4672,8 @@ bool SROA::deleteDeadInstructions(
SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = DeadInsts.pop_back_val();
+ Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+ if (!I) continue;
LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
// If the instruction is an alloca, find the possible dbg.declare connected
@@ -4644,7 +4692,7 @@ bool SROA::deleteDeadInstructions(
// Zero out the operand and see if it becomes trivially dead.
Operand = nullptr;
if (isInstructionTriviallyDead(U))
- DeadInsts.insert(U);
+ DeadInsts.push_back(U);
}
++NumDeleted;
@@ -4707,8 +4755,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
Worklist.remove_if(IsInSet);
PostPromotionWorklist.remove_if(IsInSet);
- PromotableAllocas.erase(llvm::remove_if(PromotableAllocas, IsInSet),
- PromotableAllocas.end());
+ llvm::erase_if(PromotableAllocas, IsInSet);
DeletedAllocas.clear();
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
index 9d088547b436..dba3dba24e25 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -34,14 +34,14 @@ using namespace llvm;
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCELegacyPassPass(Registry);
+ initializeAnnotationRemarksLegacyPass(Registry);
initializeBDCELegacyPassPass(Registry);
initializeAlignmentFromAssumptionsPass(Registry);
initializeCallSiteSplittingLegacyPassPass(Registry);
initializeConstantHoistingLegacyPassPass(Registry);
- initializeConstantPropagationPass(Registry);
+ initializeConstraintEliminationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
initializeDCELegacyPassPass(Registry);
- initializeDeadInstEliminationPass(Registry);
initializeDivRemPairsLegacyPassPass(Registry);
initializeScalarizerLegacyPassPass(Registry);
initializeDSELegacyPassPass(Registry);
@@ -67,22 +67,24 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopDeletionLegacyPassPass(Registry);
initializeLoopAccessLegacyAnalysisPass(Registry);
initializeLoopInstSimplifyLegacyPassPass(Registry);
- initializeLoopInterchangePass(Registry);
+ initializeLoopInterchangeLegacyPassPass(Registry);
+ initializeLoopFlattenLegacyPassPass(Registry);
initializeLoopPredicationLegacyPassPass(Registry);
initializeLoopRotateLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
- initializeLoopRerollPass(Registry);
+ initializeLoopRerollLegacyPassPass(Registry);
initializeLoopUnrollPass(Registry);
initializeLoopUnrollAndJamPass(Registry);
initializeLoopUnswitchPass(Registry);
initializeWarnMissedTransformationsLegacyPass(Registry);
- initializeLoopVersioningLICMPass(Registry);
+ initializeLoopVersioningLICMLegacyPassPass(Registry);
initializeLoopIdiomRecognizeLegacyPassPass(Registry);
initializeLowerAtomicLegacyPassPass(Registry);
initializeLowerConstantIntrinsicsPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeLowerGuardIntrinsicLegacyPassPass(Registry);
initializeLowerMatrixIntrinsicsLegacyPassPass(Registry);
+ initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(Registry);
initializeLowerWidenableConditionLegacyPassPass(Registry);
initializeMemCpyOptLegacyPassPass(Registry);
initializeMergeICmpsLegacyPassPass(Registry);
@@ -91,25 +93,26 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
initializeReassociateLegacyPassPass(Registry);
initializeRedundantDbgInstEliminationPass(Registry);
- initializeRegToMemPass(Registry);
+ initializeRegToMemLegacyPass(Registry);
initializeRewriteStatepointsForGCLegacyPassPass(Registry);
+ initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeSCCPLegacyPassPass(Registry);
initializeSROALegacyPassPass(Registry);
initializeCFGSimplifyPassPass(Registry);
- initializeStructurizeCFGPass(Registry);
+ initializeStructurizeCFGLegacyPassPass(Registry);
initializeSimpleLoopUnswitchLegacyPassPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
- initializeSeparateConstOffsetFromGEPPass(Registry);
+ initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
initializeSpeculativeExecutionLegacyPassPass(Registry);
- initializeStraightLineStrengthReducePass(Registry);
+ initializeStraightLineStrengthReduceLegacyPassPass(Registry);
initializePlaceBackedgeSafepointsImplPass(Registry);
initializePlaceSafepointsPass(Registry);
initializeFloat2IntLegacyPassPass(Registry);
initializeLoopDistributeLegacyPass(Registry);
initializeLoopLoadEliminationPass(Registry);
initializeLoopSimplifyCFGLegacyPassPass(Registry);
- initializeLoopVersioningPassPass(Registry);
+ initializeLoopVersioningLegacyPassPass(Registry);
initializeEntryExitInstrumenterPass(Registry);
initializePostInlineEntryExitInstrumenterPass(Registry);
}
@@ -139,7 +142,7 @@ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
}
void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCFGSimplificationPass(1, false, false, true));
+ unwrap(PM)->add(createCFGSimplificationPass());
}
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
@@ -166,6 +169,10 @@ void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createIndVarSimplifyPass());
}
+void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createInstSimplifyLegacyPass());
+}
+
void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createJumpThreadingPass());
}
@@ -182,6 +189,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopDeletionPass());
}
+void LLVMAddLoopFlattenPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopFlattenPass());
+}
+
void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopIdiomPass());
}
@@ -247,10 +258,6 @@ void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createTailCallEliminationPass());
}
-void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createConstantPropagationPass());
-}
-
void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createDemoteRegisterToMemoryPass());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index c93b29617438..afa2d1bc7966 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -13,9 +13,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -32,6 +32,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Scalar.h"
#include <algorithm>
#include <cassert>
@@ -41,15 +42,13 @@ using namespace llvm;
namespace {
-class ScalarizeMaskedMemIntrin : public FunctionPass {
- const TargetTransformInfo *TTI = nullptr;
- const DataLayout *DL = nullptr;
-
+class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) {
- initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
+ explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
+ initializeScalarizeMaskedMemIntrinLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -61,21 +60,28 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
}
-
-private:
- bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
- bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
};
} // end anonymous namespace
-char ScalarizeMaskedMemIntrin::ID = 0;
+static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
+ const TargetTransformInfo &TTI, const DataLayout &DL);
+static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL);
+
+char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
-INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
- "Scalarize unsupported masked memory intrinsics", false, false)
+INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
+ "Scalarize unsupported masked memory intrinsics", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
+ "Scalarize unsupported masked memory intrinsics", false,
+ false)
-FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
- return new ScalarizeMaskedMemIntrin();
+FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
+ return new ScalarizeMaskedMemIntrinLegacyPass();
}
static bool isConstantIntVector(Value *Mask) {
@@ -622,18 +628,29 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
Value *VResult = PassThru;
// Shorten the way if the mask is a vector of constants.
+ // Create a build_vector pattern, with loads/undefs as necessary and then
+ // shuffle blend with the pass through value.
if (isConstantIntVector(Mask)) {
unsigned MemIndex = 0;
+ VResult = UndefValue::get(VecType);
+ SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem);
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
- "Load" + Twine(Idx));
- VResult =
- Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
- ++MemIndex;
+ Value *InsertElt;
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
+ InsertElt = UndefValue::get(EltTy);
+ ShuffleMask[Idx] = Idx + VectorWidth;
+ } else {
+ Value *NewPtr =
+ Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+ InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
+ "Load" + Twine(Idx));
+ ShuffleMask[Idx] = Idx;
+ ++MemIndex;
+ }
+ VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
+ "Res" + Twine(Idx));
}
+ VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
return;
@@ -811,19 +828,16 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
ModifiedDT = true;
}
-bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
+static bool runImpl(Function &F, const TargetTransformInfo &TTI) {
bool EverMadeChange = false;
-
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DL = &F.getParent()->getDataLayout();
-
bool MadeChange = true;
+ auto &DL = F.getParent()->getDataLayout();
while (MadeChange) {
MadeChange = false;
for (Function::iterator I = F.begin(); I != F.end();) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
+ MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
@@ -832,17 +846,33 @@ bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
EverMadeChange |= MadeChange;
}
-
return EverMadeChange;
}
-bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
+bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return runImpl(F, TTI);
+}
+
+PreservedAnalyses
+ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!runImpl(F, TTI))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<TargetIRAnalysis>();
+ return PA;
+}
+
+static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
bool MadeChange = false;
BasicBlock::iterator CurInstIterator = BB.begin();
while (CurInstIterator != BB.end()) {
if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
- MadeChange |= optimizeCallInst(CI, ModifiedDT);
+ MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL);
if (ModifiedDT)
return true;
}
@@ -850,23 +880,30 @@ bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
return MadeChange;
}
-bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
- bool &ModifiedDT) {
+static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
+ // The scalarization code below does not work for scalable vectors.
+ if (isa<ScalableVectorType>(II->getType()) ||
+ any_of(II->arg_operands(),
+ [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
+ return false;
+
switch (II->getIntrinsicID()) {
default:
break;
case Intrinsic::masked_load:
// Scalarize unsupported vector masked load
- if (TTI->isLegalMaskedLoad(
+ if (TTI.isLegalMaskedLoad(
CI->getType(),
cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
return false;
scalarizeMaskedLoad(CI, ModifiedDT);
return true;
case Intrinsic::masked_store:
- if (TTI->isLegalMaskedStore(
+ if (TTI.isLegalMaskedStore(
CI->getArgOperand(0)->getType(),
cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
return false;
@@ -877,8 +914,8 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Type *LoadTy = CI->getType();
Align Alignment =
- DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy);
- if (TTI->isLegalMaskedGather(LoadTy, Alignment))
+ DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy);
+ if (TTI.isLegalMaskedGather(LoadTy, Alignment))
return false;
scalarizeMaskedGather(CI, ModifiedDT);
return true;
@@ -888,19 +925,19 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Type *StoreTy = CI->getArgOperand(0)->getType();
Align Alignment =
- DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy);
- if (TTI->isLegalMaskedScatter(StoreTy, Alignment))
+ DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy);
+ if (TTI.isLegalMaskedScatter(StoreTy, Alignment))
return false;
scalarizeMaskedScatter(CI, ModifiedDT);
return true;
}
case Intrinsic::masked_expandload:
- if (TTI->isLegalMaskedExpandLoad(CI->getType()))
+ if (TTI.isLegalMaskedExpandLoad(CI->getType()))
return false;
scalarizeMaskedExpandLoad(CI, ModifiedDT);
return true;
case Intrinsic::masked_compressstore:
- if (TTI->isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
+ if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
return false;
scalarizeMaskedCompressStore(CI, ModifiedDT);
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 851bd79cd6d8..c95984fe198f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -398,7 +398,8 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
continue;
Instruction *Old = cast<Instruction>(V);
- CV[I]->takeName(Old);
+ if (isa<Instruction>(CV[I]))
+ CV[I]->takeName(Old);
Old->replaceAllUsesWith(CV[I]);
PotentiallyDeadInstrs.emplace_back(Old);
}
@@ -732,7 +733,7 @@ bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
auto *MidTy = FixedVectorType::get(SrcVT->getElementType(), FanIn);
unsigned Op0I = 0;
for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
- Value *V = UndefValue::get(MidTy);
+ Value *V = PoisonValue::get(MidTy);
for (unsigned MidI = 0; MidI < FanIn; ++MidI)
V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
BCI.getName() + ".i" + Twine(ResI)
@@ -931,7 +932,7 @@ bool ScalarizerVisitor::finish() {
if (!Op->use_empty()) {
// The value is still needed, so recreate it using a series of
// InsertElements.
- Value *Res = UndefValue::get(Op->getType());
+ Value *Res = PoisonValue::get(Op->getType());
if (auto *Ty = dyn_cast<VectorType>(Op->getType())) {
BasicBlock *BB = Op->getParent();
unsigned Count = cast<FixedVectorType>(Ty)->getNumElements();
@@ -941,13 +942,13 @@ bool ScalarizerVisitor::finish() {
for (unsigned I = 0; I < Count; ++I)
Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
Op->getName() + ".upto" + Twine(I));
+ Res->takeName(Op);
} else {
assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
Res = CV[0];
if (Op == Res)
continue;
}
- Res->takeName(Op);
Op->replaceAllUsesWith(Res);
}
PotentiallyDeadInstrs.emplace_back(Op);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index f1d2e3c1ecfa..f216956406b6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -155,6 +155,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -177,6 +178,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -342,13 +344,14 @@ private:
/// A pass that tries to split every GEP in the function into a variadic
/// base and a constant offset. It is a FunctionPass because searching for the
/// constant offset may inspect other basic blocks.
-class SeparateConstOffsetFromGEP : public FunctionPass {
+class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
public:
static char ID;
- SeparateConstOffsetFromGEP(bool LowerGEP = false)
+ SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
: FunctionPass(ID), LowerGEP(LowerGEP) {
- initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
+ initializeSeparateConstOffsetFromGEPLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -360,14 +363,26 @@ public:
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
- bool doInitialization(Module &M) override {
- DL = &M.getDataLayout();
- return false;
- }
-
bool runOnFunction(Function &F) override;
private:
+ bool LowerGEP;
+};
+
+/// A pass that tries to split every GEP in the function into a variadic
+/// base and a constant offset. It is a FunctionPass because searching for the
+/// constant offset may inspect other basic blocks.
+class SeparateConstOffsetFromGEP {
+public:
+ SeparateConstOffsetFromGEP(
+ DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI,
+ TargetLibraryInfo *TLI,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
+ : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
+
+ bool run(Function &F);
+
+private:
/// Tries to split the given GEP into a variadic base and a constant offset,
/// and returns true if the splitting succeeds.
bool splitGEP(GetElementPtrInst *GEP);
@@ -450,9 +465,10 @@ private:
const DataLayout *DL = nullptr;
DominatorTree *DT = nullptr;
ScalarEvolution *SE;
-
LoopInfo *LI;
TargetLibraryInfo *TLI;
+ // Retrieved lazily since not always used.
+ function_ref<TargetTransformInfo &(Function &)> GetTTI;
/// Whether to lower a GEP with multiple indices into arithmetic operations or
/// multiple GEPs with a single index.
@@ -464,10 +480,10 @@ private:
} // end anonymous namespace
-char SeparateConstOffsetFromGEP::ID = 0;
+char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(
- SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -476,12 +492,12 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
- SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) {
- return new SeparateConstOffsetFromGEP(LowerGEP);
+ return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
}
bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
@@ -886,8 +902,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
// If we created a GEP with constant index, and the base is loop invariant,
// then we swap the first one with it, so LICM can move constant GEP out
// later.
- GetElementPtrInst *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
- GetElementPtrInst *SecondGEP = dyn_cast_or_null<GetElementPtrInst>(ResultPtr);
+ auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
+ auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
swapGEPOperand(FirstGEP, SecondGEP);
@@ -962,8 +978,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
if (!NeedsExtraction)
return Changed;
- TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*GEP->getFunction());
+ TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
// If LowerGEP is disabled, before really splitting the GEP, check whether the
// backend supports the addressing mode we are about to produce. If no, this
@@ -1128,17 +1143,25 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
return true;
}
-bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
+bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+ SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+ return Impl.run(F);
+}
+bool SeparateConstOffsetFromGEP::run(Function &F) {
if (DisableSeparateConstOffsetFromGEP)
return false;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ DL = &F.getParent()->getDataLayout();
bool Changed = false;
for (BasicBlock &B : F) {
for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
@@ -1345,3 +1368,20 @@ void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
} else
First->setIsInBounds(true);
}
+
+PreservedAnalyses
+SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto *LI = &AM.getResult<LoopAnalysis>(F);
+ auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
+ return AM.getResult<TargetIRAnalysis>(F);
+ };
+ SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+ if (!Impl.run(F))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 6c6d6ca9cf65..9d3c8d0f3739 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -26,16 +26,18 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
@@ -49,6 +51,7 @@
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
@@ -93,6 +96,11 @@ static cl::opt<bool> UnswitchGuards(
"simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
cl::desc("If enabled, simple loop unswitching will also consider "
"llvm.experimental.guard intrinsics as unswitch candidates."));
+static cl::opt<bool> DropNonTrivialImplicitNullChecks(
+ "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
+ cl::init(false), cl::Hidden,
+ cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
+ "null checks to save time analyzing if we can keep it."));
/// Collect all of the loop invariant input values transitively used by the
/// homogeneous instruction graph from a given root.
@@ -684,11 +692,9 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// successor.
BasicBlock *CommonSuccBB = nullptr;
if (SI.getNumCases() > 0 &&
- std::all_of(std::next(SI.case_begin()), SI.case_end(),
- [&SI](const SwitchInst::CaseHandle &Case) {
- return Case.getCaseSuccessor() ==
- SI.case_begin()->getCaseSuccessor();
- }))
+ all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
+ return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
+ }))
CommonSuccBB = SI.case_begin()->getCaseSuccessor();
if (!DefaultExitBB) {
// If we're not unswitching the default, we need it to match any cases to
@@ -847,12 +853,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
}
- DT.applyUpdates(DTUpdates);
if (MSSAU) {
- MSSAU->applyUpdates(DTUpdates, DT);
+ MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
+ } else {
+ DT.applyUpdates(DTUpdates);
}
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
@@ -1133,9 +1140,22 @@ static BasicBlock *buildClonedLoopBlocks(
// Replace the cloned branch with an unconditional branch to the cloned
// unswitched successor.
auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
- ClonedParentBB->getTerminator()->eraseFromParent();
+ Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
+ // Trivial Simplification. If Terminator is a conditional branch and
+ // condition becomes dead - erase it.
+ Value *ClonedConditionToErase = nullptr;
+ if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
+ ClonedConditionToErase = BI->getCondition();
+ else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
+ ClonedConditionToErase = SI->getCondition();
+
+ ClonedTerminator->eraseFromParent();
BranchInst::Create(ClonedSuccBB, ClonedParentBB);
+ if (ClonedConditionToErase)
+ RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
+ MSSAU);
+
// If there are duplicate entries in the PHI nodes because of multiple edges
// to the unswitched successor, we need to nuke all but one as we replaced it
// with a direct branch.
@@ -1194,7 +1214,7 @@ static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
LI.addTopLevelLoop(ClonedRootL);
AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
- if (OrigRootL.empty())
+ if (OrigRootL.isInnermost())
return ClonedRootL;
// If we have a nest, we can quickly clone the entire loop nest using an
@@ -2070,6 +2090,23 @@ static void unswitchNontrivialInvariants(
DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
}
+ // Drop metadata if we may break its semantics by moving this instr into the
+ // split block.
+ if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
+ if (DropNonTrivialImplicitNullChecks)
+ // Do not spend time trying to understand if we can keep it, just drop it
+ // to save compile time.
+ TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
+ else {
+ // It is only legal to preserve make.implicit metadata if we are
+ // guaranteed no reach implicit null check after following this branch.
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(&L);
+ if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
+ TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
+ }
+ }
+
// The stitching of the branched code back together depends on whether we're
// doing full unswitching or not with the exception that we always want to
// nuke the initial terminator placed in the split block.
@@ -2316,12 +2353,12 @@ static void unswitchNontrivialInvariants(
for (Loop *UpdatedL :
llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
UpdateLoop(*UpdatedL);
- if (!UpdatedL->getParentLoop())
+ if (UpdatedL->isOutermost())
OuterExitL = nullptr;
}
if (IsStillLoop) {
UpdateLoop(L);
- if (!L.getParentLoop())
+ if (L.isOutermost())
OuterExitL = nullptr;
}
@@ -2669,6 +2706,10 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
// (convergent, noduplicate, or cross-basic-block tokens).
// FIXME: We might be able to safely handle some of these in non-duplicated
// regions.
+ TargetTransformInfo::TargetCostKind CostKind =
+ L.getHeader()->getParent()->hasMinSize()
+ ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_SizeAndLatency;
int LoopCost = 0;
for (auto *BB : L.blocks()) {
int Cost = 0;
@@ -2682,7 +2723,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (CB->isConvergent() || CB->cannotDuplicate())
return false;
- Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
+ Cost += TTI.getUserCost(&I, CostKind);
}
assert(Cost >= 0 && "Must not have negative costs!");
LoopCost += Cost;
@@ -2844,7 +2885,6 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
- bool Changed = false;
// Must be in loop simplified form: we need a preheader and dedicated exits.
if (!L.isLoopSimplifyForm())
@@ -2864,6 +2904,10 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (!NonTrivial && !EnableNonTrivialUnswitch)
return false;
+ // Skip non-trivial unswitching for optsize functions.
+ if (L.getHeader()->getParent()->hasOptSize())
+ return false;
+
// For non-trivial unswitching, because it often creates new loops, we rely on
// the pass manager to iterate on the loops rather than trying to immediately
// reach a fixed point. There is no substantial advantage to iterating
@@ -2876,7 +2920,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
return true;
// No other opportunities to unswitch.
- return Changed;
+ return false;
}
PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 2e459c9a64d4..38e7109ead57 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -25,21 +25,25 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <utility>
using namespace llvm;
@@ -61,6 +65,10 @@ static cl::opt<bool> UserForwardSwitchCond(
"forward-switch-cond", cl::Hidden, cl::init(false),
cl::desc("Forward switch condition to phi ops (default = false)"));
+static cl::opt<bool> UserHoistCommonInsts(
+ "hoist-common-insts", cl::Hidden, cl::init(false),
+ cl::desc("hoist common instructions (default = false)"));
+
static cl::opt<bool> UserSinkCommonInsts(
"sink-common-insts", cl::Hidden, cl::init(false),
cl::desc("Sink common instructions (default = false)"));
@@ -70,14 +78,18 @@ STATISTIC(NumSimpl, "Number of blocks simplified");
/// If we have more than one empty (other than phi node) return blocks,
/// merge them together to promote recursive block merging.
-static bool mergeEmptyReturnBlocks(Function &F) {
+static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) {
bool Changed = false;
+ std::vector<DominatorTree::UpdateType> Updates;
+ SmallVector<BasicBlock *, 8> DeadBlocks;
+
BasicBlock *RetBlock = nullptr;
// Scan all the blocks in the function, looking for empty return blocks.
- for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
- BasicBlock &BB = *BBI++;
+ for (BasicBlock &BB : make_early_inc_range(F)) {
+ if (DTU && DTU->isBBPendingDeletion(&BB))
+ continue;
// Only look at return blocks.
ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
@@ -128,8 +140,18 @@ static bool mergeEmptyReturnBlocks(Function &F) {
if (Ret->getNumOperands() == 0 ||
Ret->getOperand(0) ==
cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
+ // All predecessors of BB should now branch to RetBlock instead.
+ if (DTU) {
+ for (auto *Predecessor : predecessors(&BB)) {
+ // But, iff Predecessor already branches to RetBlock,
+ // don't (re-)add DomTree edge, because it already exists.
+ if (!is_contained(successors(Predecessor), RetBlock))
+ Updates.push_back({DominatorTree::Insert, Predecessor, RetBlock});
+ Updates.push_back({DominatorTree::Delete, Predecessor, &BB});
+ }
+ }
BB.replaceAllUsesWith(RetBlock);
- BB.eraseFromParent();
+ DeadBlocks.emplace_back(&BB);
continue;
}
@@ -153,6 +175,17 @@ static bool mergeEmptyReturnBlocks(Function &F) {
RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
BB.getTerminator()->eraseFromParent();
BranchInst::Create(RetBlock, &BB);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, &BB, RetBlock});
+ }
+
+ if (DTU) {
+ DTU->applyUpdates(Updates);
+ for (auto *BB : DeadBlocks)
+ DTU->deleteBB(BB);
+ } else {
+ for (auto *BB : DeadBlocks)
+ BB->eraseFromParent();
}
return Changed;
@@ -161,22 +194,36 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
+ DomTreeUpdater *DTU,
const SimplifyCFGOptions &Options) {
bool Changed = false;
bool LocalChange = true;
SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges;
FindFunctionBackedges(F, Edges);
- SmallPtrSet<BasicBlock *, 16> LoopHeaders;
+ SmallPtrSet<BasicBlock *, 16> UniqueLoopHeaders;
for (unsigned i = 0, e = Edges.size(); i != e; ++i)
- LoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
+ UniqueLoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
+
+ SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(),
+ UniqueLoopHeaders.end());
while (LocalChange) {
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (simplifyCFG(&*BBIt++, TTI, Options, &LoopHeaders)) {
+ BasicBlock &BB = *BBIt++;
+ if (DTU) {
+ assert(
+ !DTU->isBBPendingDeletion(&BB) &&
+ "Should not end up trying to simplify blocks marked for removal.");
+ // Make sure that the advanced iterator does not point at the blocks
+ // that are marked for removal, skip over all such blocks.
+ while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt))
+ ++BBIt;
+ }
+ if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) {
LocalChange = true;
++NumSimpl;
}
@@ -186,11 +233,14 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
return Changed;
}
-static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
- const SimplifyCFGOptions &Options) {
- bool EverChanged = removeUnreachableBlocks(F);
- EverChanged |= mergeEmptyReturnBlocks(F);
- EverChanged |= iterativelySimplifyCFG(F, TTI, Options);
+static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
+ DominatorTree *DT,
+ const SimplifyCFGOptions &Options) {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+
+ bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr);
+ EverChanged |= mergeEmptyReturnBlocks(F, DT ? &DTU : nullptr);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -200,43 +250,75 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
// iterate between the two optimizations. We structure the code like this to
// avoid rerunning iterativelySimplifyCFG if the second pass of
// removeUnreachableBlocks doesn't do anything.
- if (!removeUnreachableBlocks(F))
+ if (!removeUnreachableBlocks(F, DT ? &DTU : nullptr))
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, Options);
- EverChanged |= removeUnreachableBlocks(F);
+ EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
+ EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr);
} while (EverChanged);
return true;
}
+static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
+ DominatorTree *DT,
+ const SimplifyCFGOptions &Options) {
+ assert((!RequireAndPreserveDomTree ||
+ (DT && DT->verify(DominatorTree::VerificationLevel::Full))) &&
+ "Original domtree is invalid?");
+
+ bool Changed = simplifyFunctionCFGImpl(F, TTI, DT, Options);
+
+ assert((!RequireAndPreserveDomTree ||
+ (DT && DT->verify(DominatorTree::VerificationLevel::Full))) &&
+ "Failed to maintain validity of domtree!");
+
+ return Changed;
+}
+
// Command-line settings override compile-time settings.
-SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) {
- Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences()
- ? UserBonusInstThreshold
- : Opts.BonusInstThreshold;
- Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences()
- ? UserForwardSwitchCond
- : Opts.ForwardSwitchCondToPhi;
- Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences()
- ? UserSwitchToLookup
- : Opts.ConvertSwitchToLookupTable;
- Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
- ? UserKeepLoops
- : Opts.NeedCanonicalLoop;
- Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
- ? UserSinkCommonInsts
- : Opts.SinkCommonInsts;
+static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
+ if (UserBonusInstThreshold.getNumOccurrences())
+ Options.BonusInstThreshold = UserBonusInstThreshold;
+ if (UserForwardSwitchCond.getNumOccurrences())
+ Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
+ if (UserSwitchToLookup.getNumOccurrences())
+ Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
+ if (UserKeepLoops.getNumOccurrences())
+ Options.NeedCanonicalLoop = UserKeepLoops;
+ if (UserHoistCommonInsts.getNumOccurrences())
+ Options.HoistCommonInsts = UserHoistCommonInsts;
+ if (UserSinkCommonInsts.getNumOccurrences())
+ Options.SinkCommonInsts = UserSinkCommonInsts;
+}
+
+SimplifyCFGPass::SimplifyCFGPass() : Options() {
+ applyCommandLineOverridesToOptions(Options);
+}
+
+SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts)
+ : Options(Opts) {
+ applyCommandLineOverridesToOptions(Options);
}
PreservedAnalyses SimplifyCFGPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
Options.AC = &AM.getResult<AssumptionAnalysis>(F);
- if (!simplifyFunctionCFG(F, TTI, Options))
+ DominatorTree *DT = nullptr;
+ if (RequireAndPreserveDomTree)
+ DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ if (F.hasFnAttribute(Attribute::OptForFuzzing)) {
+ Options.setSimplifyCondBranch(false).setFoldTwoEntryPHINode(false);
+ } else {
+ Options.setSimplifyCondBranch(true).setFoldTwoEntryPHINode(true);
+ }
+ if (!simplifyFunctionCFG(F, TTI, DT, Options))
return PreservedAnalyses::all();
PreservedAnalyses PA;
+ if (RequireAndPreserveDomTree)
+ PA.preserve<DominatorTreeAnalysis>();
PA.preserve<GlobalsAA>();
return PA;
}
@@ -247,33 +329,14 @@ struct CFGSimplifyPass : public FunctionPass {
SimplifyCFGOptions Options;
std::function<bool(const Function &)> PredicateFtor;
- CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false,
- bool ConvertSwitch = false, bool KeepLoops = true,
- bool SinkCommon = false,
+ CFGSimplifyPass(SimplifyCFGOptions Options_ = SimplifyCFGOptions(),
std::function<bool(const Function &)> Ftor = nullptr)
- : FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
+ : FunctionPass(ID), Options(Options_), PredicateFtor(std::move(Ftor)) {
initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
// Check for command-line overrides of options for debug/customization.
- Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences()
- ? UserBonusInstThreshold
- : Threshold;
-
- Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences()
- ? UserForwardSwitchCond
- : ForwardSwitchCond;
-
- Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences()
- ? UserSwitchToLookup
- : ConvertSwitch;
-
- Options.NeedCanonicalLoop =
- UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops;
-
- Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
- ? UserSinkCommonInsts
- : SinkCommon;
+ applyCommandLineOverridesToOptions(Options);
}
bool runOnFunction(Function &F) override {
@@ -281,6 +344,9 @@ struct CFGSimplifyPass : public FunctionPass {
return false;
Options.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DominatorTree *DT = nullptr;
+ if (RequireAndPreserveDomTree)
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
if (F.hasFnAttribute(Attribute::OptForFuzzing)) {
Options.setSimplifyCondBranch(false)
.setFoldTwoEntryPHINode(false);
@@ -290,11 +356,15 @@ struct CFGSimplifyPass : public FunctionPass {
}
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return simplifyFunctionCFG(F, TTI, Options);
+ return simplifyFunctionCFG(F, TTI, DT, Options);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
+ if (RequireAndPreserveDomTree)
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (RequireAndPreserveDomTree)
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
};
@@ -305,15 +375,13 @@ INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
false)
// Public interface to the CFGSimplification pass
FunctionPass *
-llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
- bool ConvertSwitch, bool KeepLoops,
- bool SinkCommon,
+llvm::createCFGSimplificationPass(SimplifyCFGOptions Options,
std::function<bool(const Function &)> Ftor) {
- return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch,
- KeepLoops, SinkCommon, std::move(Ftor));
+ return new CFGSimplifyPass(Options, std::move(Ftor));
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp
index 48f289c8f17d..89cfbe384be4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -32,31 +32,6 @@ using namespace llvm;
STATISTIC(NumSunk, "Number of instructions sunk");
STATISTIC(NumSinkIter, "Number of sinking iterations");
-/// AllUsesDominatedByBlock - Return true if all uses of the specified value
-/// occur in blocks dominated by the specified block.
-static bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB,
- DominatorTree &DT) {
- // Ignoring debug uses is necessary so debug info doesn't affect the code.
- // This may leave a referencing dbg_value in the original block, before
- // the definition of the vreg. Dwarf generator handles this although the
- // user might not get the right info at runtime.
- for (Use &U : Inst->uses()) {
- // Determine the block of the use.
- Instruction *UseInst = cast<Instruction>(U.getUser());
- BasicBlock *UseBlock = UseInst->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
- // PHI nodes use the operand in the predecessor block, not the block with
- // the PHI.
- unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo());
- UseBlock = PN->getIncomingBlock(Num);
- }
- // Check that it dominates.
- if (!DT.dominates(BB, UseBlock))
- return false;
- }
- return true;
-}
-
static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA,
SmallPtrSetImpl<Instruction *> &Stores) {
@@ -97,11 +72,6 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo,
assert(Inst && "Instruction to be sunk is null");
assert(SuccToSinkTo && "Candidate sink target is null");
- // It is not possible to sink an instruction into its own block. This can
- // happen with loops.
- if (Inst->getParent() == SuccToSinkTo)
- return false;
-
// It's never legal to sink an instruction into a block which terminates in an
// EH-pad.
if (SuccToSinkTo->getTerminator()->isExceptionalTerminator())
@@ -129,9 +99,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo,
return false;
}
- // Finally, check that all the uses of the instruction are actually
- // dominated by the candidate
- return AllUsesDominatedByBlock(Inst, SuccToSinkTo, DT);
+ return true;
}
/// SinkInstruction - Determine whether it is safe to sink the specified machine
@@ -162,25 +130,37 @@ static bool SinkInstruction(Instruction *Inst,
// decide.
BasicBlock *SuccToSinkTo = nullptr;
- // Instructions can only be sunk if all their uses are in blocks
- // dominated by one of the successors.
- // Look at all the dominated blocks and see if we can sink it in one.
- DomTreeNode *DTN = DT.getNode(Inst->getParent());
- for (auto I = DTN->begin(), E = DTN->end(); I != E && SuccToSinkTo == nullptr;
- ++I) {
- BasicBlock *Candidate = (*I)->getBlock();
- // A node always immediate-dominates its children on the dominator
- // tree.
- if (IsAcceptableTarget(Inst, Candidate, DT, LI))
- SuccToSinkTo = Candidate;
+ // Find the nearest common dominator of all users as the candidate.
+ BasicBlock *BB = Inst->getParent();
+ for (Use &U : Inst->uses()) {
+ Instruction *UseInst = cast<Instruction>(U.getUser());
+ BasicBlock *UseBlock = UseInst->getParent();
+ // Don't worry about dead users.
+ if (!DT.isReachableFromEntry(UseBlock))
+ continue;
+ if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo());
+ UseBlock = PN->getIncomingBlock(Num);
+ }
+ if (SuccToSinkTo)
+ SuccToSinkTo = DT.findNearestCommonDominator(SuccToSinkTo, UseBlock);
+ else
+ SuccToSinkTo = UseBlock;
+ // The current basic block needs to dominate the candidate.
+ if (!DT.dominates(BB, SuccToSinkTo))
+ return false;
}
- // If no suitable postdominator was found, look at all the successors and
- // decide which one we should sink to, if any.
- for (succ_iterator I = succ_begin(Inst->getParent()),
- E = succ_end(Inst->getParent()); I != E && !SuccToSinkTo; ++I) {
- if (IsAcceptableTarget(Inst, *I, DT, LI))
- SuccToSinkTo = *I;
+ if (SuccToSinkTo) {
+ // The nearest common dominator may be in a parent loop of BB, which may not
+ // be beneficial. Find an ancestor.
+ while (SuccToSinkTo != BB &&
+ !IsAcceptableTarget(Inst, SuccToSinkTo, DT, LI))
+ SuccToSinkTo = DT.getNode(SuccToSinkTo)->getIDom()->getBlock();
+ if (SuccToSinkTo == BB)
+ SuccToSinkTo = nullptr;
}
// If we couldn't find a block to sink to, ignore this instruction.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index 8258b92a716d..9b18c945d950 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -756,13 +756,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
// For each PHI node in this block, check whether there are immediate folding
// opportunities from speculation, and whether that speculation will be
// valid. This determise the set of safe PHIs to speculate.
- PNs.erase(llvm::remove_if(PNs,
- [&](PHINode *PN) {
- return !isSafeAndProfitableToSpeculateAroundPHI(
- *PN, CostSavingsMap, PotentialSpecSet,
- UnsafeSet, DT, TTI);
- }),
- PNs.end());
+ llvm::erase_if(PNs, [&](PHINode *PN) {
+ return !isSafeAndProfitableToSpeculateAroundPHI(
+ *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI);
+ });
// If no PHIs were profitable, skip.
if (PNs.empty()) {
LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index f82a2936c762..c78185f2a6ad 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -245,6 +245,13 @@ static unsigned ComputeSpeculationCost(const Instruction *I,
case Instruction::FNeg:
case Instruction::ICmp:
case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::Freeze:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
+ case Instruction::ShuffleVector:
+ case Instruction::ExtractValue:
+ case Instruction::InsertValue:
return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
default:
@@ -274,7 +281,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
for (const Value *V : U->operand_values()) {
if (const Instruction *I = dyn_cast<Instruction>(V)) {
- if (NotHoisted.count(I) > 0)
+ if (NotHoisted.contains(I))
return false;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 9f82b1263ebd..577992ccb5f4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -55,6 +55,7 @@
// - When (i' - i) is constant but i and i' are not, we could still perform
// SLSR.
+#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -95,8 +96,39 @@ static const unsigned UnknownAddressSpace =
namespace {
-class StraightLineStrengthReduce : public FunctionPass {
+class StraightLineStrengthReduceLegacyPass : public FunctionPass {
+ const DataLayout *DL = nullptr;
+
public:
+ static char ID;
+
+ StraightLineStrengthReduceLegacyPass() : FunctionPass(ID) {
+ initializeStraightLineStrengthReduceLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ // We do not modify the shape of the CFG.
+ AU.setPreservesCFG();
+ }
+
+ bool doInitialization(Module &M) override {
+ DL = &M.getDataLayout();
+ return false;
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+
+class StraightLineStrengthReduce {
+public:
+ StraightLineStrengthReduce(const DataLayout *DL, DominatorTree *DT,
+ ScalarEvolution *SE, TargetTransformInfo *TTI)
+ : DL(DL), DT(DT), SE(SE), TTI(TTI) {}
+
// SLSR candidate. Such a candidate must be in one of the forms described in
// the header comments.
struct Candidate {
@@ -144,26 +176,7 @@ public:
Candidate *Basis = nullptr;
};
- static char ID;
-
- StraightLineStrengthReduce() : FunctionPass(ID) {
- initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- // We do not modify the shape of the CFG.
- AU.setPreservesCFG();
- }
-
- bool doInitialization(Module &M) override {
- DL = &M.getDataLayout();
- return false;
- }
-
- bool runOnFunction(Function &F) override;
+ bool runOnFunction(Function &F);
private:
// Returns true if Basis is a basis for C, i.e., Basis dominates C and they
@@ -243,18 +256,18 @@ private:
} // end anonymous namespace
-char StraightLineStrengthReduce::ID = 0;
+char StraightLineStrengthReduceLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
+INITIALIZE_PASS_BEGIN(StraightLineStrengthReduceLegacyPass, "slsr",
"Straight line strength reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
+INITIALIZE_PASS_END(StraightLineStrengthReduceLegacyPass, "slsr",
"Straight line strength reduction", false, false)
FunctionPass *llvm::createStraightLineStrengthReducePass() {
- return new StraightLineStrengthReduce();
+ return new StraightLineStrengthReduceLegacyPass();
}
bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
@@ -272,9 +285,7 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
static bool isGEPFoldable(GetElementPtrInst *GEP,
const TargetTransformInfo *TTI) {
- SmallVector<const Value*, 4> Indices;
- for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
- Indices.push_back(*I);
+ SmallVector<const Value *, 4> Indices(GEP->indices());
return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
Indices) == TargetTransformInfo::TCC_Free;
}
@@ -704,13 +715,17 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
UnlinkedInstructions.push_back(C.Ins);
}
-bool StraightLineStrengthReduce::runOnFunction(Function &F) {
+bool StraightLineStrengthReduceLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ return StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F);
+}
+
+bool StraightLineStrengthReduce::runOnFunction(Function &F) {
// Traverse the dominator tree in the depth-first order. This order makes sure
// all bases of a candidate are in Candidates when we process it.
for (const auto Node : depth_first(DT))
@@ -740,3 +755,25 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
UnlinkedInstructions.clear();
return Ret;
}
+
+namespace llvm {
+
+PreservedAnalyses
+StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
+ const DataLayout *DL = &F.getParent()->getDataLayout();
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
+
+ if (!StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<TargetIRAnalysis>();
+ return PA;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index c20e57b02c1a..3e15cad5f3f3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
@@ -28,6 +29,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -55,7 +57,7 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "structurizecfg"
// The name for newly created blocks.
-static const char *const FlowBlockName = "Flow";
+const char FlowBlockName[] = "Flow";
namespace {
@@ -233,9 +235,8 @@ public:
/// while the true side continues the general flow. So the loop condition
/// consist of a network of PHI nodes where the true incoming values expresses
/// breaks and the false values expresses continue states.
-class StructurizeCFG : public RegionPass {
- bool SkipUniformRegions;
+class StructurizeCFG {
Type *Boolean;
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
@@ -244,7 +245,7 @@ class StructurizeCFG : public RegionPass {
Function *Func;
Region *ParentRegion;
- LegacyDivergenceAnalysis *DA;
+ LegacyDivergenceAnalysis *DA = nullptr;
DominatorTree *DT;
SmallVector<RegionNode *, 8> Order;
@@ -309,19 +310,35 @@ class StructurizeCFG : public RegionPass {
void rebuildSSA();
public:
+ void init(Region *R);
+ bool run(Region *R, DominatorTree *DT);
+ bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA);
+};
+
+class StructurizeCFGLegacyPass : public RegionPass {
+ bool SkipUniformRegions;
+
+public:
static char ID;
- explicit StructurizeCFG(bool SkipUniformRegions_ = false)
- : RegionPass(ID),
- SkipUniformRegions(SkipUniformRegions_) {
+ explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
+ : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
if (ForceSkipUniformRegions.getNumOccurrences())
SkipUniformRegions = ForceSkipUniformRegions.getValue();
- initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
+ initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
}
- bool doInitialization(Region *R, RGPassManager &RGM) override;
-
- bool runOnRegion(Region *R, RGPassManager &RGM) override;
+ bool runOnRegion(Region *R, RGPassManager &RGM) override {
+ StructurizeCFG SCFG;
+ SCFG.init(R);
+ if (SkipUniformRegions) {
+ LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ if (SCFG.makeUniformRegion(R, DA))
+ return false;
+ }
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return SCFG.run(R, DT);
+ }
StringRef getPassName() const override { return "Structurize control flow"; }
@@ -338,28 +355,16 @@ public:
} // end anonymous namespace
-char StructurizeCFG::ID = 0;
+char StructurizeCFGLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
- false, false)
+INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
+ "Structurize the CFG", false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
-INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
- false, false)
-
-/// Initialize the types and constants used in the pass
-bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
- LLVMContext &Context = R->getEntry()->getContext();
-
- Boolean = Type::getInt1Ty(Context);
- BoolTrue = ConstantInt::getTrue(Context);
- BoolFalse = ConstantInt::getFalse(Context);
- BoolUndef = UndefValue::get(Boolean);
-
- return false;
-}
+INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
+ "Structurize the CFG", false, false)
/// Build up the general order of nodes, by performing a topological sort of the
/// parent region's nodes, while ensuring that there is no outer cycle node
@@ -1003,48 +1008,62 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
}
-/// Run the transformation for each region found
-bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
+void StructurizeCFG::init(Region *R) {
+ LLVMContext &Context = R->getEntry()->getContext();
+
+ Boolean = Type::getInt1Ty(Context);
+ BoolTrue = ConstantInt::getTrue(Context);
+ BoolFalse = ConstantInt::getFalse(Context);
+ BoolUndef = UndefValue::get(Boolean);
+
+ this->DA = nullptr;
+}
+
+bool StructurizeCFG::makeUniformRegion(Region *R,
+ LegacyDivergenceAnalysis *DA) {
if (R->isTopLevelRegion())
return false;
- DA = nullptr;
-
- if (SkipUniformRegions) {
- // TODO: We could probably be smarter here with how we handle sub-regions.
- // We currently rely on the fact that metadata is set by earlier invocations
- // of the pass on sub-regions, and that this metadata doesn't get lost --
- // but we shouldn't rely on metadata for correctness!
- unsigned UniformMDKindID =
- R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
- DA = &getAnalysis<LegacyDivergenceAnalysis>();
-
- if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
- LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
- << '\n');
-
- // Mark all direct child block terminators as having been treated as
- // uniform. To account for a possible future in which non-uniform
- // sub-regions are treated more cleverly, indirect children are not
- // marked as uniform.
- MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
- for (RegionNode *E : R->elements()) {
- if (E->isSubRegion())
- continue;
-
- if (Instruction *Term = E->getEntry()->getTerminator())
- Term->setMetadata(UniformMDKindID, MD);
- }
+ this->DA = DA;
+ // TODO: We could probably be smarter here with how we handle sub-regions.
+ // We currently rely on the fact that metadata is set by earlier invocations
+ // of the pass on sub-regions, and that this metadata doesn't get lost --
+ // but we shouldn't rely on metadata for correctness!
+ unsigned UniformMDKindID =
+ R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
+
+ if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
+ LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
+ << '\n');
+
+ // Mark all direct child block terminators as having been treated as
+ // uniform. To account for a possible future in which non-uniform
+ // sub-regions are treated more cleverly, indirect children are not
+ // marked as uniform.
+ MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
+ for (RegionNode *E : R->elements()) {
+ if (E->isSubRegion())
+ continue;
- return false;
+ if (Instruction *Term = E->getEntry()->getTerminator())
+ Term->setMetadata(UniformMDKindID, MD);
}
+
+ return true;
}
+ return false;
+}
+
+/// Run the transformation for each region found
+bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
+ if (R->isTopLevelRegion())
+ return false;
+
+ this->DT = DT;
Func = R->getEntry()->getParent();
ParentRegion = R;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
orderNodes();
collectInfos();
createFlow();
@@ -1069,5 +1088,33 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
}
Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
- return new StructurizeCFG(SkipUniformRegions);
+ return new StructurizeCFGLegacyPass(SkipUniformRegions);
+}
+
+static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
+ Regions.push_back(&R);
+ for (const auto &E : R)
+ addRegionIntoQueue(*E, Regions);
+}
+
+PreservedAnalyses StructurizeCFGPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+
+ bool Changed = false;
+ DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto &RI = AM.getResult<RegionInfoAnalysis>(F);
+ std::vector<Region *> Regions;
+ addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
+ while (!Regions.empty()) {
+ Region *R = Regions.back();
+ StructurizeCFG SCFG;
+ SCFG.init(R);
+ Changed |= SCFG.run(R, DT);
+ Regions.pop_back();
+ }
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 5bb1d54d7d12..9e7cccc88412 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -92,7 +92,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
/// Scan the specified function for alloca instructions.
/// If it contains any dynamic allocas, returns false.
static bool canTRE(Function &F) {
- // Because of PR962, we don't TRE dynamic allocas.
+ // FIXME: The code generator produces really bad code when an 'escaping
+ // alloca' is changed from being a static alloca to being a dynamic alloca.
+ // Until this is resolved, disable this transformation if that would ever
+ // happen. This bug is PR962.
return llvm::all_of(instructions(F), [](Instruction &I) {
auto *AI = dyn_cast<AllocaInst>(&I);
return !AI || AI->isStaticAlloca();
@@ -237,7 +240,11 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
Escaped = ESCAPED;
CallInst *CI = dyn_cast<CallInst>(&I);
- if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I))
+ // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is
+ // considered accessing memory and will be marked as a tail call if we
+ // don't bail out here.
+ if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I) ||
+ isa<PseudoProbeInst>(&I))
continue;
bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles();
@@ -279,7 +286,7 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
}
}
- for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
+ for (auto *SuccBB : successors(BB)) {
auto &State = Visited[SuccBB];
if (State < Escaped) {
State = Escaped;
@@ -419,7 +426,7 @@ class TailRecursionEliminator {
DomTreeUpdater &DTU)
: F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
- CallInst *findTRECandidate(Instruction *TI,
+ CallInst *findTRECandidate(BasicBlock *BB,
bool CannotTailCallElimCallsMarkedTail);
void createTailRecurseLoopHeader(CallInst *CI);
@@ -428,14 +435,10 @@ class TailRecursionEliminator {
bool eliminateCall(CallInst *CI);
- bool foldReturnAndProcessPred(ReturnInst *Ret,
- bool CannotTailCallElimCallsMarkedTail);
-
- bool processReturningBlock(ReturnInst *Ret,
- bool CannotTailCallElimCallsMarkedTail);
-
void cleanupAndFinalize();
+ bool processBlock(BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail);
+
public:
static bool eliminate(Function &F, const TargetTransformInfo *TTI,
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
@@ -444,8 +447,8 @@ public:
} // namespace
CallInst *TailRecursionEliminator::findTRECandidate(
- Instruction *TI, bool CannotTailCallElimCallsMarkedTail) {
- BasicBlock *BB = TI->getParent();
+ BasicBlock *BB, bool CannotTailCallElimCallsMarkedTail) {
+ Instruction *TI = BB->getTerminator();
if (&BB->front() == TI) // Make sure there is something before the terminator.
return nullptr;
@@ -672,63 +675,6 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
return true;
}
-bool TailRecursionEliminator::foldReturnAndProcessPred(
- ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
- BasicBlock *BB = Ret->getParent();
-
- bool Change = false;
-
- // Make sure this block is a trivial return block.
- assert(BB->getFirstNonPHIOrDbg() == Ret &&
- "Trying to fold non-trivial return block");
-
- // If the return block contains nothing but the return and PHI's,
- // there might be an opportunity to duplicate the return in its
- // predecessors and perform TRE there. Look for predecessors that end
- // in unconditional branch and recursive call(s).
- SmallVector<BranchInst*, 8> UncondBranchPreds;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *Pred = *PI;
- Instruction *PTI = Pred->getTerminator();
- if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
- if (BI->isUnconditional())
- UncondBranchPreds.push_back(BI);
- }
-
- while (!UncondBranchPreds.empty()) {
- BranchInst *BI = UncondBranchPreds.pop_back_val();
- BasicBlock *Pred = BI->getParent();
- if (CallInst *CI =
- findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) {
- LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
- << "INTO UNCOND BRANCH PRED: " << *Pred);
- FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
-
- // Cleanup: if all predecessors of BB have been eliminated by
- // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
- // because the ret instruction in there is still using a value which
- // eliminateRecursiveTailCall will attempt to remove.
- if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
- DTU.deleteBB(BB);
-
- eliminateCall(CI);
- ++NumRetDuped;
- Change = true;
- }
- }
-
- return Change;
-}
-
-bool TailRecursionEliminator::processReturningBlock(
- ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
- CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
- if (!CI)
- return false;
-
- return eliminateCall(CI);
-}
-
void TailRecursionEliminator::cleanupAndFinalize() {
// If we eliminated any tail recursions, it's possible that we inserted some
// silly PHI nodes which just merge an initial value (the incoming operand)
@@ -801,6 +747,50 @@ void TailRecursionEliminator::cleanupAndFinalize() {
}
}
+bool TailRecursionEliminator::processBlock(
+ BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail) {
+ Instruction *TI = BB.getTerminator();
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional())
+ return false;
+
+ BasicBlock *Succ = BI->getSuccessor(0);
+ ReturnInst *Ret = dyn_cast<ReturnInst>(Succ->getFirstNonPHIOrDbg(true));
+
+ if (!Ret)
+ return false;
+
+ CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail);
+
+ if (!CI)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ
+ << "INTO UNCOND BRANCH PRED: " << BB);
+ FoldReturnIntoUncondBranch(Ret, Succ, &BB, &DTU);
+ ++NumRetDuped;
+
+ // If all predecessors of Succ have been eliminated by
+ // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
+ // because the ret instruction in there is still using a value which
+ // eliminateCall will attempt to remove. This block can only contain
+ // instructions that can't have uses, therefore it is safe to remove.
+ if (pred_empty(Succ))
+ DTU.deleteBB(Succ);
+
+ eliminateCall(CI);
+ return true;
+ } else if (isa<ReturnInst>(TI)) {
+ CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail);
+
+ if (CI)
+ return eliminateCall(CI);
+ }
+
+ return false;
+}
+
bool TailRecursionEliminator::eliminate(Function &F,
const TargetTransformInfo *TTI,
AliasAnalysis *AA,
@@ -825,23 +815,11 @@ bool TailRecursionEliminator::eliminate(Function &F,
// TRE would deallocate variable sized allocas, TRE doesn't).
bool CanTRETailMarkedCall = canTRE(F);
+ // Change any tail recursive calls to loops.
TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
- // Change any tail recursive calls to loops.
- //
- // FIXME: The code generator produces really bad code when an 'escaping
- // alloca' is changed from being a static alloca to being a dynamic alloca.
- // Until this is resolved, disable this transformation if that would ever
- // happen. This bug is PR962.
- for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
- BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB.
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
- bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall);
- MadeChange |= Change;
- }
- }
+ for (BasicBlock &BB : F)
+ MadeChange |= TRE.processBlock(BB, !CanTRETailMarkedCall);
TRE.cleanupAndFinalize();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 7c81e6352dec..80a7d3a43ad6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -48,12 +48,12 @@ static void warnAboutLeftoverTransformations(Loop *L,
if (hasVectorizeTransformation(L) == TM_ForcedByUser) {
LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n");
- Optional<int> VectorizeWidth =
- getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+ Optional<ElementCount> VectorizeWidth =
+ getOptionalElementCountLoopAttribute(L);
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
- if (VectorizeWidth.getValueOr(0) != 1)
+ if (!VectorizeWidth || VectorizeWidth->isVector())
ORE->emit(
DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
"FailedRequestedVectorization",
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index 84a66e1e96d2..ccdcf7cbce38 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -17,9 +17,6 @@
#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IRBuilder.h"
-
-#include <iostream>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index 7ff73fcdada7..3daff3b4430b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -52,6 +52,7 @@ namespace {
bool isUsefullToPreserve(Attribute::AttrKind Kind) {
switch (Kind) {
case Attribute::NonNull:
+ case Attribute::NoUndef:
case Attribute::Alignment:
case Attribute::Dereferenceable:
case Attribute::DereferenceableOrNull:
@@ -69,7 +70,7 @@ RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) {
default:
return RK;
case Attribute::NonNull:
- RK.WasOn = GetUnderlyingObject(RK.WasOn, M->getDataLayout());
+ RK.WasOn = getUnderlyingObject(RK.WasOn);
return RK;
case Attribute::Alignment: {
Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) {
@@ -145,7 +146,7 @@ struct AssumeBuilderState {
if (!RK.WasOn)
return true;
if (RK.WasOn->getType()->isPointerTy()) {
- Value *UnderlyingPtr = GetUnderlyingObject(RK.WasOn, M->getDataLayout());
+ Value *UnderlyingPtr = getUnderlyingObject(RK.WasOn);
if (isa<AllocaInst>(UnderlyingPtr) || isa<GlobalValue>(UnderlyingPtr))
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 085d91031cf9..6bcd42c4c6d8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -105,7 +105,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
if (DTU)
- DTU->applyUpdatesPermissive(Updates);
+ DTU->applyUpdates(Updates);
for (BasicBlock *BB : BBs)
if (DTU)
@@ -136,9 +136,10 @@ bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
return !DeadBlocks.empty();
}
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
+bool llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
MemoryDependenceResults *MemDep) {
- if (!isa<PHINode>(BB->begin())) return;
+ if (!isa<PHINode>(BB->begin()))
+ return false;
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
if (PN->getIncomingValue(0) != PN)
@@ -151,6 +152,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
PN->eraseFromParent();
}
+ return true;
}
bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI,
@@ -228,19 +230,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// These dominator edges will be redirected from Pred.
std::vector<DominatorTree::UpdateType> Updates;
if (DTU) {
- Updates.reserve(1 + (2 * succ_size(BB)));
+ SmallSetVector<BasicBlock *, 2> UniqueSuccessors(succ_begin(BB),
+ succ_end(BB));
+ Updates.reserve(1 + (2 * UniqueSuccessors.size()));
// Add insert edges first. Experimentally, for the particular case of two
// blocks that can be merged, with a single successor and single predecessor
// respectively, it is beneficial to have all insert updates first. Deleting
// edges first may lead to unreachable blocks, followed by inserting edges
// making the blocks reachable again. Such DT updates lead to high compile
// times. We add inserts before deletes here to reduce compile time.
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+ for (BasicBlock *UniqueSuccessor : UniqueSuccessors)
// This successor of BB may already have PredBB as a predecessor.
- if (llvm::find(successors(PredBB), *I) == succ_end(PredBB))
- Updates.push_back({DominatorTree::Insert, PredBB, *I});
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
- Updates.push_back({DominatorTree::Delete, BB, *I});
+ if (!llvm::is_contained(successors(PredBB), UniqueSuccessor))
+ Updates.push_back({DominatorTree::Insert, PredBB, UniqueSuccessor});
+ for (BasicBlock *UniqueSuccessor : UniqueSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
@@ -285,11 +289,6 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// Add unreachable to now empty BB.
new UnreachableInst(BB->getContext(), BB);
- // Eliminate duplicate/redundant dbg.values. This seems to be a good place to
- // do that since we might end up with redundant dbg.values describing the
- // entry PHI node post-splice.
- RemoveRedundantDbgInstrs(PredBB);
-
// Inherit predecessors name if it exists.
if (!PredBB->hasName())
PredBB->takeName(BB);
@@ -306,7 +305,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
isa<UnreachableInst>(BB->getTerminator()) &&
"The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
- DTU->applyUpdatesPermissive(Updates);
+ DTU->applyUpdates(Updates);
DTU->deleteBB(BB);
} else {
BB->eraseFromParent(); // Nuke BB if DTU is nullptr.
@@ -498,14 +497,16 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
}
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
- LoopInfo *LI, MemorySSAUpdater *MSSAU) {
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ const Twine &BBName) {
unsigned SuccNum = GetSuccessorNumber(BB, Succ);
// If this is a critical edge, let SplitCriticalEdge do it.
Instruction *LatchTerm = BB->getTerminator();
if (SplitCriticalEdge(
LatchTerm, SuccNum,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()))
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA(),
+ BBName))
return LatchTerm->getSuccessor(SuccNum);
// If the edge isn't critical, then BB has a single successor or Succ has a
@@ -515,14 +516,15 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
// block.
assert(SP == BB && "CFG broken");
SP = nullptr;
- return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU);
+ return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU, BBName,
+ /*Before=*/true);
}
// Otherwise, if BB has a single successor, split it at the bottom of the
// block.
assert(BB->getTerminator()->getNumSuccessors() == 1 &&
"Should have a single succ!");
- return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU);
+ return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName);
}
unsigned
@@ -540,9 +542,16 @@ llvm::SplitAllCriticalEdges(Function &F,
return NumBroken;
}
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU, const Twine &BBName) {
+static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ const Twine &BBName, bool Before) {
+ if (Before) {
+ DomTreeUpdater LocalDTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ return splitBlockBefore(Old, SplitPt,
+ DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU,
+ BBName);
+ }
BasicBlock::iterator SplitIt = SplitPt->getIterator();
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
@@ -556,7 +565,20 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
if (Loop *L = LI->getLoopFor(Old))
L->addBasicBlockToLoop(New, *LI);
- if (DT)
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ // Old dominates New. New node dominates all other nodes dominated by Old.
+ SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New),
+ succ_end(New));
+ Updates.push_back({DominatorTree::Insert, Old, New});
+ Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size());
+ for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) {
+ Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld});
+ Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld});
+ }
+
+ DTU->applyUpdates(Updates);
+ } else if (DT)
// Old dominates New. New node dominates all other nodes dominated by Old.
if (DomTreeNode *OldNode = DT->getNode(Old)) {
std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
@@ -574,14 +596,94 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
return New;
}
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, const Twine &BBName,
+ bool Before) {
+ return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName,
+ Before);
+}
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, const Twine &BBName,
+ bool Before) {
+ return SplitBlockImpl(Old, SplitPt, DTU, /*DT=*/nullptr, LI, MSSAU, BBName,
+ Before);
+}
+
+BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ const Twine &BBName) {
+
+ BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
+ ++SplitIt;
+ std::string Name = BBName.str();
+ BasicBlock *New = Old->splitBasicBlock(
+ SplitIt, Name.empty() ? Old->getName() + ".split" : Name,
+ /* Before=*/true);
+
+ // The new block lives in whichever loop the old one did. This preserves
+ // LCSSA as well, because we force the split point to be after any PHI nodes.
+ if (LI)
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, *LI);
+
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
+ // New dominates Old. The predecessor nodes of the Old node dominate
+ // New node.
+ SmallSetVector<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New),
+ pred_end(New));
+ DTUpdates.push_back({DominatorTree::Insert, New, Old});
+ DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size());
+ for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) {
+ DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New});
+ DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old});
+ }
+
+ DTU->applyUpdates(DTUpdates);
+
+ // Move MemoryAccesses still tracked in Old, but part of New now.
+ // Update accesses in successor blocks accordingly.
+ if (MSSAU) {
+ MSSAU->applyUpdates(DTUpdates, DTU->getDomTree());
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
+ }
+ return New;
+}
+
/// Update DominatorTree, LoopInfo, and LCCSA analysis information.
static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU,
+ DomTreeUpdater *DTU, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
bool PreserveLCSSA, bool &HasLoopExit) {
// Update dominator tree if available.
- if (DT) {
+ if (DTU) {
+ // Recalculation of DomTree is needed when updating a forward DomTree and
+ // the Entry BB is replaced.
+ if (NewBB == &NewBB->getParent()->getEntryBlock() && DTU->hasDomTree()) {
+ // The entry block was removed and there is no external interface for
+ // the dominator tree to be notified of this change. In this corner-case
+ // we recalculate the entire tree.
+ DTU->recalculate(*NewBB->getParent());
+ } else {
+ // Split block expects NewBB to have a non-empty set of predecessors.
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ SmallSetVector<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end());
+ Updates.push_back({DominatorTree::Insert, NewBB, OldBB});
+ Updates.reserve(Updates.size() + 2 * UniquePreds.size());
+ for (auto *UniquePred : UniquePreds) {
+ Updates.push_back({DominatorTree::Insert, UniquePred, NewBB});
+ Updates.push_back({DominatorTree::Delete, UniquePred, OldBB});
+ }
+ DTU->applyUpdates(Updates);
+ }
+ } else if (DT) {
if (OldBB == DT->getRootNode()->getBlock()) {
assert(NewBB == &NewBB->getParent()->getEntryBlock());
DT->setNewRoot(NewBB);
@@ -599,6 +701,8 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
if (!LI)
return;
+ if (DTU && DTU->hasDomTree())
+ DT = &DTU->getDomTree();
assert(DT && "DT should be available to update LoopInfo!");
Loop *L = LI->getLoopFor(OldBB);
@@ -732,11 +836,17 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
-BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix, DominatorTree *DT,
- LoopInfo *LI, MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
+static void SplitLandingPadPredecessorsImpl(
+ BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1,
+ const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs,
+ DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
+
+static BasicBlock *
+SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, DomTreeUpdater *DTU,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
// Do not attempt to split that which cannot be split.
if (!BB->canSplitPredecessors())
return nullptr;
@@ -747,8 +857,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
SmallVector<BasicBlock*, 2> NewBBs;
std::string NewName = std::string(Suffix) + ".split-lp";
- SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
- LI, MSSAU, PreserveLCSSA);
+ SplitLandingPadPredecessorsImpl(BB, Preds, Suffix, NewName.c_str(), NewBBs,
+ DTU, DT, LI, MSSAU, PreserveLCSSA);
return NewBBs[0];
}
@@ -758,12 +868,22 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// The new block unconditionally branches to the old block.
BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ Loop *L = nullptr;
+ BasicBlock *OldLatch = nullptr;
// Splitting the predecessors of a loop header creates a preheader block.
- if (LI && LI->isLoopHeader(BB))
+ if (LI && LI->isLoopHeader(BB)) {
+ L = LI->getLoopFor(BB);
// Using the loop start line number prevents debuggers stepping into the
// loop body for this instruction.
- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
- else
+ BI->setDebugLoc(L->getStartLoc());
+
+ // If BB is the header of the Loop, it is possible that the loop is
+ // modified, such that the current latch does not remain the latch of the
+ // loop. If that is the case, the loop metadata from the current latch needs
+ // to be applied to the new latch.
+ OldLatch = L->getLoopLatch();
+ } else
BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
// Move the edges from Preds to point to NewBB instead of BB.
@@ -790,7 +910,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, MSSAU, PreserveLCSSA,
+ UpdateAnalysisInformation(BB, NewBB, Preds, DTU, DT, LI, MSSAU, PreserveLCSSA,
HasLoopExit);
if (!Preds.empty()) {
@@ -798,16 +918,41 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
}
+ if (OldLatch) {
+ BasicBlock *NewLatch = L->getLoopLatch();
+ if (NewLatch != OldLatch) {
+ MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop");
+ NewLatch->getTerminator()->setMetadata("llvm.loop", MD);
+ OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr);
+ }
+ }
+
return NewBB;
}
-void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix1, const char *Suffix2,
- SmallVectorImpl<BasicBlock *> &NewBBs,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitBlockPredecessorsImpl(BB, Preds, Suffix, /*DTU=*/nullptr, DT, LI,
+ MSSAU, PreserveLCSSA);
+}
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitBlockPredecessorsImpl(BB, Preds, Suffix, DTU,
+ /*DT=*/nullptr, LI, MSSAU, PreserveLCSSA);
+}
+
+static void SplitLandingPadPredecessorsImpl(
+ BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1,
+ const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs,
+ DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
// Create a new basic block for OrigBB's predecessors listed in Preds. Insert
@@ -832,8 +977,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
bool HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, MSSAU, PreserveLCSSA,
- HasLoopExit);
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DTU, DT, LI, MSSAU,
+ PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB1.
UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit);
@@ -868,7 +1013,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, MSSAU,
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DTU, DT, LI, MSSAU,
PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB2.
@@ -905,6 +1050,29 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
}
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix1, const char *Suffix2,
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitLandingPadPredecessorsImpl(
+ OrigBB, Preds, Suffix1, Suffix2, NewBBs,
+ /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA);
+}
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix1, const char *Suffix2,
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitLandingPadPredecessorsImpl(OrigBB, Preds, Suffix1, Suffix2,
+ NewBBs, DTU, /*DT=*/nullptr, LI, MSSAU,
+ PreserveLCSSA);
+}
+
ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
BasicBlock *Pred,
DomTreeUpdater *DTU) {
@@ -964,14 +1132,24 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
return cast<ReturnInst>(NewRet);
}
-Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
- Instruction *SplitBefore,
- bool Unreachable,
- MDNode *BranchWeights,
- DominatorTree *DT, LoopInfo *LI,
- BasicBlock *ThenBlock) {
+static Instruction *
+SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore,
+ bool Unreachable, MDNode *BranchWeights,
+ DomTreeUpdater *DTU, DominatorTree *DT,
+ LoopInfo *LI, BasicBlock *ThenBlock) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
+ if (DTU) {
+ SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail),
+ succ_end(Tail));
+ Updates.push_back({DominatorTree::Insert, Head, Tail});
+ Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size());
+ for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) {
+ Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead});
+ Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead});
+ }
+ }
Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
Instruction *CheckTerm;
@@ -980,17 +1158,24 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
if (Unreachable)
CheckTerm = new UnreachableInst(C, ThenBlock);
- else
+ else {
CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, ThenBlock, Tail});
+ }
CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
} else
CheckTerm = ThenBlock->getTerminator();
BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
+ BranchInst::Create(/*ifTrue*/ ThenBlock, /*ifFalse*/ Tail, Cond);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, Head, ThenBlock});
HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
- if (DT) {
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ else if (DT) {
if (DomTreeNode *OldNode = DT->getNode(Head)) {
std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
@@ -1016,6 +1201,27 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
return CheckTerm;
}
+Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights,
+ DominatorTree *DT, LoopInfo *LI,
+ BasicBlock *ThenBlock) {
+ return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable,
+ BranchWeights,
+ /*DTU=*/nullptr, DT, LI, ThenBlock);
+}
+Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ BasicBlock *ThenBlock) {
+ return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable,
+ BranchWeights, DTU, /*DT=*/nullptr, LI,
+ ThenBlock);
+}
+
void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
Instruction **ThenTerm,
Instruction **ElseTerm,
@@ -1326,11 +1532,11 @@ BasicBlock *llvm::CreateControlFlowHub(
SmallVector<DominatorTree::UpdateType, 16> Updates;
if (DTU) {
for (auto In : Incoming) {
+ Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock});
for (auto Succ : successors(In)) {
if (Outgoing.count(Succ))
Updates.push_back({DominatorTree::Delete, In, Succ});
}
- Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock});
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 39fb504cf7b7..939a1a3a868d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -134,9 +134,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
}
}
-BasicBlock *
-llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
- const CriticalEdgeSplittingOptions &Options) {
+BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
+ const CriticalEdgeSplittingOptions &Options,
+ const Twine &BBName) {
if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
return nullptr;
@@ -158,22 +158,21 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
SmallVector<BasicBlock *, 4> LoopPreds;
// Check if extra modifications will be required to preserve loop-simplify
// form after splitting. If it would require splitting blocks with IndirectBr
- // terminators, bail out if preserving loop-simplify form is requested.
+ // or CallBr terminators, bail out if preserving loop-simplify form is
+ // requested.
if (LI) {
if (Loop *TIL = LI->getLoopFor(TIBB)) {
- // The only that we can break LoopSimplify form by splitting a critical
- // edge is if after the split there exists some edge from TIL to DestBB
- // *and* the only edge into DestBB from outside of TIL is that of
+ // The only way that we can break LoopSimplify form by splitting a
+ // critical edge is if after the split there exists some edge from TIL to
+ // DestBB *and* the only edge into DestBB from outside of TIL is that of
// NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
// is the new exit block and it has no non-loop predecessors. If the
// second isn't true, then DestBB was not in LoopSimplify form prior to
// the split as it had a non-loop predecessor. In both of these cases,
// the predecessor must be directly in TIL, not in a subloop, or again
// LoopSimplify doesn't hold.
- for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
- ++I) {
- BasicBlock *P = *I;
+ for (BasicBlock *P : predecessors(DestBB)) {
if (P == TIBB)
continue; // The new block is known.
if (LI->getLoopFor(P) != TIL) {
@@ -186,7 +185,10 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
// Loop-simplify form can be preserved, if we can split all in-loop
// predecessors.
if (any_of(LoopPreds, [](BasicBlock *Pred) {
- return isa<IndirectBrInst>(Pred->getTerminator());
+ const Instruction *T = Pred->getTerminator();
+ if (const auto *CBR = dyn_cast<CallBrInst>(T))
+ return CBR->getDefaultDest() != Pred;
+ return isa<IndirectBrInst>(T);
})) {
if (Options.PreserveLoopSimplify)
return nullptr;
@@ -196,8 +198,13 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
}
// Create a new basic block, linking it into the CFG.
- BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
- TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+ BasicBlock *NewBB = nullptr;
+ if (BBName.str() != "")
+ NewBB = BasicBlock::Create(TI->getContext(), BBName);
+ else
+ NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." +
+ DestBB->getName() +
+ "_crit_edge");
// Create our unconditional branch.
BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
NewBI->setDebugLoc(TI->getDebugLoc());
@@ -270,7 +277,7 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
SmallVector<DominatorTree::UpdateType, 3> Updates;
Updates.push_back({DominatorTree::Insert, TIBB, NewBB});
Updates.push_back({DominatorTree::Insert, NewBB, DestBB});
- if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB))
+ if (!llvm::is_contained(successors(TIBB), DestBB))
Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
if (DT)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index c64ad147fdfe..dba5403f272a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -31,14 +31,22 @@ using namespace llvm;
//- Infer Attributes ---------------------------------------------------------//
STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumInaccessibleMemOnly,
+ "Number of functions inferred as inaccessiblememonly");
STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
+STATISTIC(NumInaccessibleMemOrArgMemOnly,
+ "Number of functions inferred as inaccessiblemem_or_argmemonly");
STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly");
+STATISTIC(NumSExtArg, "Number of arguments inferred as signext");
STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns");
STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
STATISTIC(NumReturnedArg, "Number of arguments inferred as returned");
+STATISTIC(NumWillReturn, "Number of functions inferred as willreturn");
static bool setDoesNotAccessMemory(Function &F) {
if (F.doesNotAccessMemory())
@@ -48,6 +56,14 @@ static bool setDoesNotAccessMemory(Function &F) {
return true;
}
+static bool setOnlyAccessesInaccessibleMemory(Function &F) {
+ if (F.onlyAccessesInaccessibleMemory())
+ return false;
+ F.setOnlyAccessesInaccessibleMemory();
+ ++NumInaccessibleMemOnly;
+ return true;
+}
+
static bool setOnlyReadsMemory(Function &F) {
if (F.onlyReadsMemory())
return false;
@@ -64,6 +80,14 @@ static bool setOnlyAccessesArgMemory(Function &F) {
return true;
}
+static bool setOnlyAccessesInaccessibleMemOrArgMem(Function &F) {
+ if (F.onlyAccessesInaccessibleMemOrArgMem())
+ return false;
+ F.setOnlyAccessesInaccessibleMemOrArgMem();
+ ++NumInaccessibleMemOrArgMemOnly;
+ return true;
+}
+
static bool setDoesNotThrow(Function &F) {
if (F.doesNotThrow())
return false;
@@ -104,16 +128,48 @@ static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) {
return true;
}
-static bool setRetNonNull(Function &F) {
- assert(F.getReturnType()->isPointerTy() &&
- "nonnull applies only to pointers");
- if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull))
+static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::WriteOnly))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::WriteOnly);
+ ++NumWriteOnlyArg;
+ return true;
+}
+
+static bool setSignExtendedArg(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::SExt))
return false;
- F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- ++NumNonNull;
+ F.addParamAttr(ArgNo, Attribute::SExt);
+ ++NumSExtArg;
return true;
}
+static bool setRetNoUndef(Function &F) {
+ if (!F.getReturnType()->isVoidTy() &&
+ !F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef)) {
+ F.addAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
+ ++NumNoUndef;
+ return true;
+ }
+ return false;
+}
+
+static bool setArgsNoUndef(Function &F) {
+ bool Changed = false;
+ for (unsigned ArgNo = 0; ArgNo < F.arg_size(); ++ArgNo) {
+ if (!F.hasParamAttribute(ArgNo, Attribute::NoUndef)) {
+ F.addParamAttr(ArgNo, Attribute::NoUndef);
+ ++NumNoUndef;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+static bool setRetAndArgsNoUndef(Function &F) {
+ return setRetNoUndef(F) | setArgsNoUndef(F);
+}
+
static bool setReturnedArg(Function &F, unsigned ArgNo) {
if (F.hasParamAttribute(ArgNo, Attribute::Returned))
return false;
@@ -136,6 +192,14 @@ static bool setDoesNotFreeMemory(Function &F) {
return true;
}
+static bool setWillReturn(Function &F) {
+ if (F.hasFnAttribute(Attribute::WillReturn))
+ return false;
+ F.addFnAttr(Attribute::WillReturn);
+ ++NumWillReturn;
+ return true;
+}
+
bool llvm::inferLibFuncAttributes(Module *M, StringRef Name,
const TargetLibraryInfo &TLI) {
Function *F = M->getFunction(Name);
@@ -163,12 +227,15 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_strchr:
case LibFunc_strrchr:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_strtol:
case LibFunc_strtod:
@@ -178,26 +245,31 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_strtold:
case LibFunc_strtoull:
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_strcpy:
case LibFunc_strncpy:
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotAlias(F, 1);
- LLVM_FALLTHROUGH;
case LibFunc_strcat:
case LibFunc_strncat:
+ Changed |= setWillReturn(F);
Changed |= setReturnedArg(F, 0);
LLVM_FALLTHROUGH;
case LibFunc_stpcpy:
case LibFunc_stpncpy:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
return Changed;
case LibFunc_strxfrm:
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
@@ -206,51 +278,70 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_strspn: // 0,1
case LibFunc_strncmp: // 0,1
case LibFunc_strcspn: // 0,1
- case LibFunc_strcoll: // 0,1
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_strcoll:
case LibFunc_strcasecmp: // 0,1
case LibFunc_strncasecmp: //
+ // Those functions may depend on the locale, which may be accessed through
+ // global memory.
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_strstr:
case LibFunc_strpbrk:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_strtok:
case LibFunc_strtok_r:
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_scanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_setbuf:
case LibFunc_setvbuf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_strdup:
case LibFunc_strndup:
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_stat:
case LibFunc_statvfs:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_sscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -258,70 +349,95 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_sprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotAlias(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_snprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotAlias(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
case LibFunc_setitimer:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_system:
// May throw; "system" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_malloc:
+ case LibFunc_vec_malloc:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_memcmp:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_memchr:
case LibFunc_memrchr:
- Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_modf:
case LibFunc_modff:
case LibFunc_modfl:
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_memcpy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotAlias(F, 1);
Changed |= setReturnedArg(F, 0);
- Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_memmove:
- Changed |= setReturnedArg(F, 0);
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setReturnedArg(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_mempcpy:
case LibFunc_memccpy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotAlias(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotAlias(F, 1);
- Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
@@ -329,38 +445,57 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotThrow(F);
return Changed;
case LibFunc_memalign:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
+ Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_mkdir:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_mktime:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_realloc:
+ case LibFunc_vec_realloc:
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
+ case LibFunc_reallocf:
+ Changed |= setRetNoUndef(F);
+ Changed |= setWillReturn(F);
+ return Changed;
case LibFunc_read:
// May throw; "read" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_rewind:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_rmdir:
case LibFunc_remove:
case LibFunc_realpath:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_rename:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -368,6 +503,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_readlink:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -375,35 +511,52 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_write:
// May throw; "write" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_aligned_alloc:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_bcopy:
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyWritesMemory(F, 1);
+ Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_bcmp:
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_bzero:
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
return Changed;
case LibFunc_calloc:
+ case LibFunc_vec_calloc:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_chmod:
case LibFunc_chown:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
@@ -411,6 +564,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_ctermid:
case LibFunc_clearerr:
case LibFunc_closedir:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
@@ -420,14 +574,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_atoll:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_access:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_fopen:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 0);
@@ -436,13 +593,25 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_fdopen:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_feof:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
case LibFunc_free:
+ case LibFunc_vec_free:
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
+ Changed |= setArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
case LibFunc_fseek:
case LibFunc_ftell:
case LibFunc_fgetc:
@@ -456,10 +625,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_flockfile:
case LibFunc_funlockfile:
case LibFunc_ftrylockfile:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_ferror:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F);
@@ -467,26 +638,38 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_fputc:
case LibFunc_fputc_unlocked:
case LibFunc_fstat:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
case LibFunc_frexp:
case LibFunc_frexpf:
case LibFunc_frexpl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
case LibFunc_fstatvfs:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_fgets:
case LibFunc_fgets_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
case LibFunc_fread:
case LibFunc_fread_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 3);
return Changed;
case LibFunc_fwrite:
case LibFunc_fwrite_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 3);
@@ -494,6 +677,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_fputs:
case LibFunc_fputs_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -501,23 +685,35 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_fscanf:
case LibFunc_fprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_fgetpos:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_getc:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
case LibFunc_getlogin_r:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
case LibFunc_getc_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_getenv:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 0);
@@ -525,37 +721,45 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_gets:
case LibFunc_getchar:
case LibFunc_getchar_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
return Changed;
case LibFunc_getitimer:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_getpwnam:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_ungetc:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_uname:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_unlink:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_unsetenv:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_utime:
case LibFunc_utimes:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -564,30 +768,36 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_putc:
case LibFunc_putc_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_puts:
case LibFunc_printf:
case LibFunc_perror:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_pread:
// May throw; "pread" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_pwrite:
// May throw; "pwrite" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_putchar:
case LibFunc_putchar_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
return Changed;
case LibFunc_popen:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 0);
@@ -596,15 +806,18 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_pclose:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_vscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_vsscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -612,28 +825,35 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_vfscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_valloc:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_vprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_vfprintf:
case LibFunc_vsprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_vsnprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 2);
@@ -641,20 +861,24 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_open:
// May throw; "open" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_opendir:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_tmpfile:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
return Changed;
case LibFunc_times:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
@@ -666,24 +890,29 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotAccessMemory(F);
return Changed;
case LibFunc_lstat:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_lchown:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_qsort:
// May throw; places call through function pointer.
+ // Cannot give undef pointer/size
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 3);
return Changed;
case LibFunc_dunder_strdup:
case LibFunc_dunder_strndup:
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
@@ -693,14 +922,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_under_IO_getc:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_under_IO_putc:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_dunder_isoc99_scanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
@@ -708,12 +940,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_stat64:
case LibFunc_lstat64:
case LibFunc_statvfs64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_dunder_isoc99_sscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -721,6 +955,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_fopen64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 0);
@@ -730,20 +965,24 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_fseeko64:
case LibFunc_ftello64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_tmpfile64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
return Changed;
case LibFunc_fstat64:
case LibFunc_fstatvfs64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_open64:
// May throw; "open" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
@@ -751,37 +990,166 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
// Currently some platforms have the restrict keyword on the arguments to
// gettimeofday. To be conservative, do not add noalias to gettimeofday's
// arguments.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc_Znwj: // new(unsigned int)
- case LibFunc_Znwm: // new(unsigned long)
- case LibFunc_Znaj: // new[](unsigned int)
- case LibFunc_Znam: // new[](unsigned long)
- case LibFunc_msvc_new_int: // new(unsigned int)
- case LibFunc_msvc_new_longlong: // new(unsigned long long)
- case LibFunc_msvc_new_array_int: // new[](unsigned int)
- case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
- // Operator new always returns a nonnull noalias pointer
- Changed |= setRetNonNull(F);
- Changed |= setRetDoesNotAlias(F);
- return Changed;
// TODO: add LibFunc entries for:
// case LibFunc_memset_pattern4:
// case LibFunc_memset_pattern8:
case LibFunc_memset_pattern16:
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
+ case LibFunc_memset:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyWritesMemory(F, 0);
+ return Changed;
// int __nvvm_reflect(const char *)
case LibFunc_nvvm_reflect:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotAccessMemory(F);
Changed |= setDoesNotThrow(F);
return Changed;
-
+ case LibFunc_ldexp:
+ case LibFunc_ldexpf:
+ case LibFunc_ldexpl:
+ Changed |= setSignExtendedArg(F, 1);
+ Changed |= setWillReturn(F);
+ return Changed;
+ case LibFunc_abs:
+ case LibFunc_acos:
+ case LibFunc_acosf:
+ case LibFunc_acosh:
+ case LibFunc_acoshf:
+ case LibFunc_acoshl:
+ case LibFunc_acosl:
+ case LibFunc_asin:
+ case LibFunc_asinf:
+ case LibFunc_asinh:
+ case LibFunc_asinhf:
+ case LibFunc_asinhl:
+ case LibFunc_asinl:
+ case LibFunc_atan:
+ case LibFunc_atan2:
+ case LibFunc_atan2f:
+ case LibFunc_atan2l:
+ case LibFunc_atanf:
+ case LibFunc_atanh:
+ case LibFunc_atanhf:
+ case LibFunc_atanhl:
+ case LibFunc_atanl:
+ case LibFunc_cbrt:
+ case LibFunc_cbrtf:
+ case LibFunc_cbrtl:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ case LibFunc_cos:
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_coshl:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
+ case LibFunc_cospi:
+ case LibFunc_cospif:
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_expl:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_expm1:
+ case LibFunc_expm1f:
+ case LibFunc_expm1l:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
+ case LibFunc_ffs:
+ case LibFunc_ffsl:
+ case LibFunc_ffsll:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
+ case LibFunc_fls:
+ case LibFunc_flsl:
+ case LibFunc_flsll:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
+ case LibFunc_fmod:
+ case LibFunc_fmodf:
+ case LibFunc_fmodl:
+ case LibFunc_isascii:
+ case LibFunc_isdigit:
+ case LibFunc_labs:
+ case LibFunc_llabs:
+ case LibFunc_log:
+ case LibFunc_log10:
+ case LibFunc_log10f:
+ case LibFunc_log10l:
+ case LibFunc_log1p:
+ case LibFunc_log1pf:
+ case LibFunc_log1pl:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
+ case LibFunc_logb:
+ case LibFunc_logbf:
+ case LibFunc_logbl:
+ case LibFunc_logf:
+ case LibFunc_logl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
+ case LibFunc_pow:
+ case LibFunc_powf:
+ case LibFunc_powl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
+ case LibFunc_sin:
+ case LibFunc_sincospif_stret:
+ case LibFunc_sinf:
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinhl:
+ case LibFunc_sinl:
+ case LibFunc_sinpi:
+ case LibFunc_sinpif:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_strnlen:
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanh:
+ case LibFunc_tanhf:
+ case LibFunc_tanhl:
+ case LibFunc_tanl:
+ case LibFunc_toascii:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotFreeMemory(F);
+ Changed |= setWillReturn(F);
+ return Changed;
default:
// FIXME: It'd be really nice to cover all the library functions we're
// aware of here.
@@ -930,6 +1298,15 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
return CI;
}
+Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ return emitLibCall(
+ LibFunc_mempcpy, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+ {Dst, Src, Len}, B, TLI);
+}
+
Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
@@ -969,7 +1346,7 @@ Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
- Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+ llvm::append_range(Args, VariadicArgs);
return emitLibCall(LibFunc_snprintf, B.getInt32Ty(),
{B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()},
Args, B, TLI, /*IsVaArgs=*/true);
@@ -979,7 +1356,7 @@ Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
- Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+ llvm::append_range(Args, VariadicArgs);
return emitLibCall(LibFunc_sprintf, B.getInt32Ty(),
{B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI,
/*IsVaArgs=*/true);
@@ -1087,12 +1464,15 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
StringRef Name, IRBuilderBase &B,
- const AttributeList &Attrs) {
+ const AttributeList &Attrs,
+ const TargetLibraryInfo *TLI = nullptr) {
assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
Module *M = B.GetInsertBlock()->getModule();
FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(),
Op1->getType(), Op2->getType());
+ if (TLI != nullptr)
+ inferLibFuncAttributes(M, Name, *TLI);
CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name);
// The incoming attribute set may have come from a speculatable intrinsic, but
@@ -1128,7 +1508,7 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
StringRef Name = getFloatFnName(TLI, Op1->getType(),
DoubleFn, FloatFn, LongDoubleFn);
- return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs);
+ return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs, TLI);
}
Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
index 52e859361c59..b2763900e154 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
@@ -96,11 +96,12 @@ void CallGraphUpdater::reanalyzeFunction(Function &Fn) {
}
}
-void CallGraphUpdater::registerOutlinedFunction(Function &NewFn) {
+void CallGraphUpdater::registerOutlinedFunction(Function &OriginalFn,
+ Function &NewFn) {
if (CG)
CG->addToCallGraph(&NewFn);
else if (LCG)
- LCG->addNewFunctionIntoSCC(NewFn, *SCC);
+ LCG->addSplitFunction(OriginalFn, NewFn);
}
void CallGraphUpdater::removeFunction(Function &DeadFn) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 5a47c1fd0b6c..bf08bf274737 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -112,9 +112,7 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
Builder.SetInsertPoint(&MergeBlock->front());
PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
- SmallVector<User *, 16> UsersToUpdate;
- for (User *U : OrigInst->users())
- UsersToUpdate.push_back(U);
+ SmallVector<User *, 16> UsersToUpdate(OrigInst->users());
for (User *U : UsersToUpdate)
U->replaceUsesOfWith(OrigInst, Phi);
Phi->addIncoming(OrigInst, OrigInst->getParent());
@@ -165,9 +163,7 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
// Save the users of the calling instruction. These uses will be changed to
// use the bitcast after we create it.
- SmallVector<User *, 16> UsersToUpdate;
- for (User *U : CB.users())
- UsersToUpdate.push_back(U);
+ SmallVector<User *, 16> UsersToUpdate(CB.users());
// Determine an appropriate location to create the bitcast for the return
// value. The location depends on if we have a call or invoke instruction.
@@ -430,10 +426,11 @@ bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
}
}
for (; I < NumArgs; I++) {
- // Vararg functions can have more arguments than paramters.
+ // Vararg functions can have more arguments than parameters.
assert(Callee->isVarArg());
if (CB.paramHasAttr(I, Attribute::StructRet)) {
- *FailureReason = "SRet arg to vararg function";
+ if (FailureReason)
+ *FailureReason = "SRet arg to vararg function";
return false;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index 1ae17c64b8f6..1f649fe6c748 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -109,7 +109,7 @@ void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) {
auto *ValueToFr = U.get();
assert(L->contains(UserI->getParent()) &&
"Should not process an instruction that isn't inside the loop");
- if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, UserI, &DT))
+ if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, nullptr, UserI, &DT))
return;
LLVM_DEBUG(dbgs() << "canonfr: inserting freeze:\n");
@@ -176,7 +176,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() {
assert(StepI && "Step instruction should have been found");
// Drop flags from the step instruction.
- if (!isGuaranteedNotToBeUndefOrPoison(StepI, StepI, &DT)) {
+ if (!isGuaranteedNotToBeUndefOrPoison(StepI, nullptr, StepI, &DT)) {
LLVM_DEBUG(dbgs() << "canonfr: drop flags: " << *StepI << "\n");
StepI->dropPoisonGeneratingFlags();
SE.forgetValue(StepI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 788983c15690..6ab061510a60 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -36,6 +37,8 @@
#include <map>
using namespace llvm;
+#define DEBUG_TYPE "clone-function"
+
/// See comments in Cloning.h.
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
@@ -137,15 +140,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
MD[SP].reset(SP);
}
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- OldFunc->getAllMetadata(MDs);
- for (auto MD : MDs) {
- NewFunc->addMetadata(
- MD.first,
- *MapMetadata(MD.second, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer));
- }
+ // Everything else beyond this point deals with function instructions,
+ // so if we are dealing with a function declaration, we're done.
+ if (OldFunc->isDeclaration())
+ return;
// When we remap instructions, we want to avoid duplicating inlined
// DISubprograms, so record all subprograms we find as we duplicate
@@ -157,7 +155,6 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
- //
for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
BI != BE; ++BI) {
const BasicBlock &BB = *BI;
@@ -196,6 +193,19 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
for (DIType *Type : DIFinder.types())
VMap.MD()[Type].reset(Type);
+ // Duplicate the metadata that is attached to the cloned function.
+ // Subprograms/CUs/types that were already mapped to themselves won't be
+ // duplicated.
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ OldFunc->getAllMetadata(MDs);
+ for (auto MD : MDs) {
+ NewFunc->addMetadata(
+ MD.first,
+ *MapMetadata(MD.second, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
+ }
+
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
for (Function::iterator BB =
@@ -426,9 +436,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
CodeInfo->OperandBundleCallSites.push_back(NewInst);
// Recursively clone any reachable successor blocks.
- const Instruction *TI = BB->getTerminator();
- for (const BasicBlock *Succ : successors(TI))
- ToClone.push_back(Succ);
+ append_range(ToClone, successors(BB->getTerminator()));
}
if (CodeInfo) {
@@ -668,8 +676,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Check if this block has become dead during inlining or other
// simplifications. Note that the first block will appear dead, as it has
// not yet been wired up properly.
- if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
- I->getSinglePredecessor() == &*I)) {
+ if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) {
BasicBlock *DeadBB = &*I++;
DeleteDeadBlock(DeadBB);
continue;
@@ -877,3 +884,116 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
return NewBB;
}
+
+void llvm::cloneNoAliasScopes(
+ ArrayRef<MDNode *> NoAliasDeclScopes,
+ DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ StringRef Ext, LLVMContext &Context) {
+ MDBuilder MDB(Context);
+
+ for (auto *ScopeList : NoAliasDeclScopes) {
+ for (auto &MDOperand : ScopeList->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) {
+ AliasScopeNode SNANode(MD);
+
+ std::string Name;
+ auto ScopeName = SNANode.getName();
+ if (!ScopeName.empty())
+ Name = (Twine(ScopeName) + ":" + Ext).str();
+ else
+ Name = std::string(Ext);
+
+ MDNode *NewScope = MDB.createAnonymousAliasScope(
+ const_cast<MDNode *>(SNANode.getDomain()), Name);
+ ClonedScopes.insert(std::make_pair(MD, NewScope));
+ }
+ }
+ }
+}
+
+void llvm::adaptNoAliasScopes(
+ Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ LLVMContext &Context) {
+ auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * {
+ bool NeedsReplacement = false;
+ SmallVector<Metadata *, 8> NewScopeList;
+ for (auto &MDOp : ScopeList->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOp)) {
+ if (auto *NewMD = ClonedScopes.lookup(MD)) {
+ NewScopeList.push_back(NewMD);
+ NeedsReplacement = true;
+ continue;
+ }
+ NewScopeList.push_back(MD);
+ }
+ }
+ if (NeedsReplacement)
+ return MDNode::get(Context, NewScopeList);
+ return nullptr;
+ };
+
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
+ if (auto *NewScopeList = CloneScopeList(Decl->getScopeList()))
+ Decl->setScopeList(NewScopeList);
+
+ auto replaceWhenNeeded = [&](unsigned MD_ID) {
+ if (const MDNode *CSNoAlias = I->getMetadata(MD_ID))
+ if (auto *NewScopeList = CloneScopeList(CSNoAlias))
+ I->setMetadata(MD_ID, NewScopeList);
+ };
+ replaceWhenNeeded(LLVMContext::MD_noalias);
+ replaceWhenNeeded(LLVMContext::MD_alias_scope);
+}
+
+void llvm::cloneAndAdaptNoAliasScopes(
+ ArrayRef<MDNode *> NoAliasDeclScopes,
+ ArrayRef<BasicBlock *> NewBlocks, LLVMContext &Context, StringRef Ext) {
+ if (NoAliasDeclScopes.empty())
+ return;
+
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
+ << NoAliasDeclScopes.size() << " node(s)\n");
+
+ cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context);
+ // Identify instructions using metadata that needs adaptation
+ for (BasicBlock *NewBlock : NewBlocks)
+ for (Instruction &I : *NewBlock)
+ adaptNoAliasScopes(&I, ClonedScopes, Context);
+}
+
+void llvm::cloneAndAdaptNoAliasScopes(
+ ArrayRef<MDNode *> NoAliasDeclScopes, Instruction *IStart,
+ Instruction *IEnd, LLVMContext &Context, StringRef Ext) {
+ if (NoAliasDeclScopes.empty())
+ return;
+
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
+ << NoAliasDeclScopes.size() << " node(s)\n");
+
+ cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context);
+ // Identify instructions using metadata that needs adaptation
+ assert(IStart->getParent() == IEnd->getParent() && "different basic block ?");
+ auto ItStart = IStart->getIterator();
+ auto ItEnd = IEnd->getIterator();
+ ++ItEnd; // IEnd is included, increment ItEnd to get the end of the range
+ for (auto &I : llvm::make_range(ItStart, ItEnd))
+ adaptNoAliasScopes(&I, ClonedScopes, Context);
+}
+
+void llvm::identifyNoAliasScopesToClone(
+ ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes) {
+ for (BasicBlock *BB : BBs)
+ for (Instruction &I : *BB)
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclScopes.push_back(Decl->getScopeList());
+}
+
+void llvm::identifyNoAliasScopesToClone(
+ BasicBlock::iterator Start, BasicBlock::iterator End,
+ SmallVectorImpl<MDNode *> &NoAliasDeclScopes) {
+ for (Instruction &I : make_range(Start, End))
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclScopes.push_back(Decl->getScopeList());
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
index 2c8c3abb2922..a6327bbf21bc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -117,10 +117,17 @@ std::unique_ptr<Module> llvm::CloneModule(
//
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
+
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ I->getAllMetadata(MDs);
+ for (auto MD : MDs)
+ GV->addMetadata(MD.first,
+ *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
+
if (I->isDeclaration())
continue;
- GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
if (!ShouldCloneDefinition(&*I)) {
// Skip after setting the correct linkage for an external reference.
GV->setLinkage(GlobalValue::ExternalLinkage);
@@ -129,12 +136,6 @@ std::unique_ptr<Module> llvm::CloneModule(
if (I->hasInitializer())
GV->setInitializer(MapValue(I->getInitializer(), VMap));
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- I->getAllMetadata(MDs);
- for (auto MD : MDs)
- GV->addMetadata(MD.first,
- *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
-
copyComdat(GV, &*I);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 8cdbb9d35652..390925a03b73 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -535,6 +535,46 @@ void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
continue;
}
+ // Find bitcasts in the outlined region that have lifetime marker users
+ // outside that region. Replace the lifetime marker use with an
+ // outside region bitcast to avoid unnecessary alloca/reload instructions
+ // and extra lifetime markers.
+ SmallVector<Instruction *, 2> LifetimeBitcastUsers;
+ for (User *U : AI->users()) {
+ if (!definedInRegion(Blocks, U))
+ continue;
+
+ if (U->stripInBoundsConstantOffsets() != AI)
+ continue;
+
+ Instruction *Bitcast = cast<Instruction>(U);
+ for (User *BU : Bitcast->users()) {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU);
+ if (!IntrInst)
+ continue;
+
+ if (!IntrInst->isLifetimeStartOrEnd())
+ continue;
+
+ if (definedInRegion(Blocks, IntrInst))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
+ << *Bitcast << " in out-of-region lifetime marker "
+ << *IntrInst << "\n");
+ LifetimeBitcastUsers.push_back(IntrInst);
+ }
+ }
+
+ for (Instruction *I : LifetimeBitcastUsers) {
+ Module *M = AIFunc->getParent();
+ LLVMContext &Ctx = M->getContext();
+ auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ CastInst *CastI =
+ CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I);
+ I->replaceUsesOfWith(I->getOperand(1), CastI);
+ }
+
// Follow any bitcasts.
SmallVector<Instruction *, 2> Bitcasts;
SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
@@ -728,8 +768,7 @@ void CodeExtractor::severSplitPHINodesOfExits(
NewBB = BasicBlock::Create(ExitBB->getContext(),
ExitBB->getName() + ".split",
ExitBB->getParent(), ExitBB);
- SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBB),
- pred_end(ExitBB));
+ SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB));
for (BasicBlock *PredBB : Preds)
if (Blocks.count(PredBB))
PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
@@ -895,6 +934,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::WriteOnly:
case Attribute::ZExt:
case Attribute::ImmArg:
+ case Attribute::ByRef:
case Attribute::EndAttrKinds:
case Attribute::EmptyKey:
case Attribute::TombstoneKey:
@@ -902,9 +942,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
case Attribute::Cold:
+ case Attribute::Hot:
case Attribute::NoRecurse:
case Attribute::InlineHint:
case Attribute::MinSize:
+ case Attribute::NoCallback:
case Attribute::NoDuplicate:
case Attribute::NoFree:
case Attribute::NoImplicitFloat:
@@ -930,6 +972,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::StrictFP:
case Attribute::UWTable:
case Attribute::NoCfCheck:
+ case Attribute::MustProgress:
+ case Attribute::NoProfile:
break;
}
@@ -1434,7 +1478,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
// function arguments, as the parameters don't correspond to anything at the
// source level.
assert(OldSP->getUnit() && "Missing compile unit for subprogram");
- DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolvedNodes=*/false,
+ DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false,
OldSP->getUnit());
auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition |
@@ -1505,7 +1549,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
// function.
for (Instruction &I : instructions(NewFunc)) {
if (const DebugLoc &DL = I.getDebugLoc())
- I.setDebugLoc(DebugLoc::get(DL.getLine(), DL.getCol(), NewSP));
+ I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP));
// Loop info metadata may contain line locations. Fix them up.
auto updateLoopInfoLoc = [&Ctx,
@@ -1516,7 +1560,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
}
if (!TheCall.getDebugLoc())
- TheCall.setDebugLoc(DebugLoc::get(0, 0, OldSP));
+ TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP));
eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
}
@@ -1739,7 +1783,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
const Function &NewFunc,
AssumptionCache *AC) {
for (auto AssumeVH : AC->assumptions()) {
- CallInst *I = dyn_cast_or_null<CallInst>(AssumeVH);
+ auto *I = dyn_cast_or_null<CallInst>(AssumeVH);
if (!I)
continue;
@@ -1751,12 +1795,12 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
// that were previously in the old function, but that have now been moved
// to the new function.
for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) {
- CallInst *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH);
+ auto *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH);
if (!AffectedCI)
continue;
if (AffectedCI->getFunction() != &OldFunc)
return true;
- auto *AssumedInst = dyn_cast<Instruction>(AffectedCI->getOperand(0));
+ auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0));
if (AssumedInst->getFunction() != &OldFunc)
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 08047dc0f96e..ce982c7403aa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -355,35 +355,32 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
// Check if there exists instructions which may throw, may synchonize, or may
// never return, from I to InsertPoint.
if (!isSafeToSpeculativelyExecute(&I))
- if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(),
- [](Instruction *I) {
- if (I->mayThrow())
- return true;
-
- const CallBase *CB = dyn_cast<CallBase>(I);
- if (!CB)
- return false;
- if (!CB->hasFnAttr(Attribute::WillReturn))
- return true;
- if (!CB->hasFnAttr(Attribute::NoSync))
- return true;
-
- return false;
- })) {
+ if (llvm::any_of(InstsToCheck, [](Instruction *I) {
+ if (I->mayThrow())
+ return true;
+
+ const CallBase *CB = dyn_cast<CallBase>(I);
+ if (!CB)
+ return false;
+ if (!CB->hasFnAttr(Attribute::WillReturn))
+ return true;
+ if (!CB->hasFnAttr(Attribute::NoSync))
+ return true;
+
+ return false;
+ })) {
return reportInvalidCandidate(I, MayThrowException);
}
// Check if I has any output/flow/anti dependences with instructions from \p
// StartInst to \p EndInst.
- if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(),
- [&DI, &I](Instruction *CurInst) {
- auto DepResult = DI->depends(&I, CurInst, true);
- if (DepResult &&
- (DepResult->isOutput() || DepResult->isFlow() ||
- DepResult->isAnti()))
- return true;
- return false;
- }))
+ if (llvm::any_of(InstsToCheck, [&DI, &I](Instruction *CurInst) {
+ auto DepResult = DI->depends(&I, CurInst, true);
+ if (DepResult && (DepResult->isOutput() || DepResult->isFlow() ||
+ DepResult->isAnti()))
+ return true;
+ return false;
+ }))
return reportInvalidCandidate(I, HasDependences);
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
index 8f98d81a3d79..3e4d53c10dc9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassInstrumentation.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -198,6 +199,18 @@ bool llvm::applyDebugifyMetadata(
return true;
}
+static bool applyDebugify(Function &F) {
+ Module &M = *F.getParent();
+ auto FuncIt = F.getIterator();
+ return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+ "FunctionDebugify: ", /*ApplyToMF=*/nullptr);
+}
+
+static bool applyDebugify(Module &M) {
+ return applyDebugifyMetadata(M, M.functions(),
+ "ModuleDebugify: ", /*ApplyToMF=*/nullptr);
+}
+
bool llvm::stripDebugifyMetadata(Module &M) {
bool Changed = false;
@@ -226,9 +239,7 @@ bool llvm::stripDebugifyMetadata(Module &M) {
NamedMDNode *NMD = M.getModuleFlagsMetadata();
if (!NMD)
return Changed;
- SmallVector<MDNode *, 4> Flags;
- for (MDNode *Flag : NMD->operands())
- Flags.push_back(Flag);
+ SmallVector<MDNode *, 4> Flags(NMD->operands());
NMD->clearOperands();
for (MDNode *Flag : Flags) {
MDString *Key = dyn_cast_or_null<MDString>(Flag->getOperand(1));
@@ -383,10 +394,7 @@ bool checkDebugifyMetadata(Module &M,
/// ModulePass for attaching synthetic debug info to everything, used with the
/// legacy module pass manager.
struct DebugifyModulePass : public ModulePass {
- bool runOnModule(Module &M) override {
- return applyDebugifyMetadata(M, M.functions(),
- "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
- }
+ bool runOnModule(Module &M) override { return applyDebugify(M); }
DebugifyModulePass() : ModulePass(ID) {}
@@ -400,12 +408,7 @@ struct DebugifyModulePass : public ModulePass {
/// FunctionPass for attaching synthetic debug info to instructions within a
/// single function, used with the legacy module pass manager.
struct DebugifyFunctionPass : public FunctionPass {
- bool runOnFunction(Function &F) override {
- Module &M = *F.getParent();
- auto FuncIt = F.getIterator();
- return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
- "FunctionDebugify: ", /*ApplyToMF*/ nullptr);
- }
+ bool runOnFunction(Function &F) override { return applyDebugify(F); }
DebugifyFunctionPass() : FunctionPass(ID) {}
@@ -472,9 +475,32 @@ private:
} // end anonymous namespace
-ModulePass *createDebugifyModulePass() { return new DebugifyModulePass(); }
+void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) {
+ std::error_code EC;
+ raw_fd_ostream OS{Path, EC};
+ if (EC) {
+ errs() << "Could not open file: " << EC.message() << ", " << Path << '\n';
+ return;
+ }
+
+ OS << "Pass Name" << ',' << "# of missing debug values" << ','
+ << "# of missing locations" << ',' << "Missing/Expected value ratio" << ','
+ << "Missing/Expected location ratio" << '\n';
+ for (const auto &Entry : Map) {
+ StringRef Pass = Entry.first;
+ DebugifyStatistics Stats = Entry.second;
+
+ OS << Pass << ',' << Stats.NumDbgValuesMissing << ','
+ << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ','
+ << Stats.getEmptyLocationRatio() << '\n';
+ }
+}
+
+ModulePass *llvm::createDebugifyModulePass() {
+ return new DebugifyModulePass();
+}
-FunctionPass *createDebugifyFunctionPass() {
+FunctionPass *llvm::createDebugifyFunctionPass() {
return new DebugifyFunctionPass();
}
@@ -484,15 +510,15 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
}
-ModulePass *createCheckDebugifyModulePass(bool Strip,
- StringRef NameOfWrappedPass,
- DebugifyStatsMap *StatsMap) {
+ModulePass *llvm::createCheckDebugifyModulePass(bool Strip,
+ StringRef NameOfWrappedPass,
+ DebugifyStatsMap *StatsMap) {
return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap);
}
-FunctionPass *createCheckDebugifyFunctionPass(bool Strip,
- StringRef NameOfWrappedPass,
- DebugifyStatsMap *StatsMap) {
+FunctionPass *
+llvm::createCheckDebugifyFunctionPass(bool Strip, StringRef NameOfWrappedPass,
+ DebugifyStatsMap *StatsMap) {
return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap);
}
@@ -503,6 +529,41 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
return PreservedAnalyses::all();
}
+static bool isIgnoredPass(StringRef PassID) {
+ return isSpecialPass(PassID, {"PassManager", "PassAdaptor",
+ "AnalysisManagerProxy", "PrintFunctionPass",
+ "PrintModulePass", "BitcodeWriterPass",
+ "ThinLTOBitcodeWriterPass", "VerifierPass"});
+}
+
+void DebugifyEachInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ PIC.registerBeforeNonSkippedPassCallback([](StringRef P, Any IR) {
+ if (isIgnoredPass(P))
+ return;
+ if (any_isa<const Function *>(IR))
+ applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR)));
+ else if (any_isa<const Module *>(IR))
+ applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR)));
+ });
+ PIC.registerAfterPassCallback([this](StringRef P, Any IR,
+ const PreservedAnalyses &PassPA) {
+ if (isIgnoredPass(P))
+ return;
+ if (any_isa<const Function *>(IR)) {
+ auto &F = *const_cast<Function *>(any_cast<const Function *>(IR));
+ Module &M = *F.getParent();
+ auto It = F.getIterator();
+ checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
+ "CheckFunctionDebugify", /*Strip=*/true, &StatsMap);
+ } else if (any_isa<const Module *>(IR)) {
+ auto &M = *const_cast<Module *>(any_cast<const Module *>(IR));
+ checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
+ /*Strip=*/true, &StatsMap);
+ }
+ });
+}
+
char DebugifyModulePass::ID = 0;
static RegisterPass<DebugifyModulePass> DM("debugify",
"Attach debug info to everything");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index f84ff9e5aad1..26f8e21952cc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -83,7 +83,7 @@ static bool runOnFunction(Function &F, bool PostInlining) {
if (!EntryFunc.empty()) {
DebugLoc DL;
if (auto SP = F.getSubprogram())
- DL = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ DL = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL);
Changed = true;
@@ -97,19 +97,14 @@ static bool runOnFunction(Function &F, bool PostInlining) {
continue;
// If T is preceded by a musttail call, that's the real terminator.
- Instruction *Prev = T->getPrevNode();
- if (BitCastInst *BCI = dyn_cast_or_null<BitCastInst>(Prev))
- Prev = BCI->getPrevNode();
- if (CallInst *CI = dyn_cast_or_null<CallInst>(Prev)) {
- if (CI->isMustTailCall())
- T = CI;
- }
+ if (CallInst *CI = BB.getTerminatingMustTailCall())
+ T = CI;
DebugLoc DL;
if (DebugLoc TerminatorDL = T->getDebugLoc())
DL = TerminatorDL;
else if (auto SP = F.getSubprogram())
- DL = DebugLoc::get(0, 0, SP);
+ DL = DILocation::get(SP->getContext(), 0, 0, SP);
insertCall(F, ExitFunc, T, DL);
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index cae9d9ee6d70..accedd5b4ee0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -41,6 +41,8 @@ IRBuilder<> *EscapeEnumerator::Next() {
if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
continue;
+ if (CallInst *CI = CurBB->getTerminatingMustTailCall())
+ TI = CI;
Builder.SetInsertPoint(TI);
return &Builder;
}
@@ -54,11 +56,12 @@ IRBuilder<> *EscapeEnumerator::Next() {
return nullptr;
// Find all 'call' instructions that may throw.
+ // We cannot tranform calls with musttail tag.
SmallVector<Instruction *, 16> Calls;
for (BasicBlock &BB : F)
for (Instruction &II : BB)
if (CallInst *CI = dyn_cast<CallInst>(&II))
- if (!CI->doesNotThrow())
+ if (!CI->doesNotThrow() && !CI->isMustTailCall())
Calls.push_back(CI);
if (Calls.empty())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
index c5dfbf9d92d1..732b00635e29 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -183,11 +183,11 @@ evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL,
std::function<Constant *(Constant *)> Func) {
Constant *Val;
while (!(Val = Func(Ptr))) {
- // If Ty is a struct, we can convert the pointer to the struct
+ // If Ty is a non-opaque struct, we can convert the pointer to the struct
// into a pointer to its first member.
// FIXME: This could be extended to support arrays as well.
Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
- if (!isa<StructType>(Ty))
+ if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isOpaque())
break;
IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32);
@@ -210,11 +210,7 @@ static Constant *getInitializer(Constant *C) {
Constant *Evaluator::ComputeLoadResult(Constant *P) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
- auto findMemLoc = [this](Constant *Ptr) {
- DenseMap<Constant *, Constant *>::const_iterator I =
- MutatedMemory.find(Ptr);
- return I != MutatedMemory.end() ? I->second : nullptr;
- };
+ auto findMemLoc = [this](Constant *Ptr) { return MutatedMemory.lookup(Ptr); };
if (Constant *Val = findMemLoc(P))
return Val;
@@ -551,6 +547,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n");
++CurInst;
continue;
+ } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) {
+ LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n");
+ ++CurInst;
+ continue;
}
LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 460ba9e97fc6..44af95eef67d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -66,6 +66,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/InitializePasses.h"
@@ -104,7 +105,7 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
"Convert irreducible control-flow into natural loops",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
@@ -304,11 +305,9 @@ static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) {
return Changed;
}
-bool FixIrreducible::runOnFunction(Function &F) {
+static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
bool Changed = false;
SmallVector<Loop *, 8> WorkList;
@@ -318,13 +317,10 @@ bool FixIrreducible::runOnFunction(Function &F) {
// Any SCCs reduced are now already in the list of top-level loops, so simply
// add them all to the worklist.
- for (auto L : LI) {
- WorkList.push_back(L);
- }
+ append_range(WorkList, LI);
while (!WorkList.empty()) {
- auto L = WorkList.back();
- WorkList.pop_back();
+ auto L = WorkList.pop_back_val();
LLVM_DEBUG(dbgs() << "visiting loop with header "
<< L->getHeader()->getName() << "\n");
Changed |= makeReducible(LI, DT, *L);
@@ -335,3 +331,21 @@ bool FixIrreducible::runOnFunction(Function &F) {
return Changed;
}
+
+bool FixIrreducible::runOnFunction(Function &F) {
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return FixIrreducibleImpl(F, LI, DT);
+}
+
+PreservedAnalyses FixIrreduciblePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ if (!FixIrreducibleImpl(F, LI, DT))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 101cb232d8ae..2696557a719f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -124,12 +124,17 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
Type *TyL = LA.getValueAsType();
Type *TyR = RA.getValueAsType();
- if (TyL && TyR)
- return cmpTypes(TyL, TyR);
+ if (TyL && TyR) {
+ if (int Res = cmpTypes(TyL, TyR))
+ return Res;
+ continue;
+ }
// Two pointers, at least one null, so the comparison result is
// independent of the value of a real pointer.
- return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
+ if (int Res = cmpNumbers((uint64_t)TyL, (uint64_t)TyR))
+ return Res;
+ continue;
}
if (LA < RA)
return -1;
@@ -286,6 +291,7 @@ int FunctionComparator::cmpConstants(const Constant *L,
switch (L->getValueID()) {
case Value::UndefValueVal:
+ case Value::PoisonValueVal:
case Value::ConstantTokenNoneVal:
return TypesRes;
case Value::ConstantIntVal: {
@@ -488,12 +494,13 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
case Type::ScalableVectorTyID: {
auto *STyL = cast<VectorType>(TyL);
auto *STyR = cast<VectorType>(TyR);
- if (STyL->getElementCount().Scalable != STyR->getElementCount().Scalable)
- return cmpNumbers(STyL->getElementCount().Scalable,
- STyR->getElementCount().Scalable);
- if (STyL->getElementCount().Min != STyR->getElementCount().Min)
- return cmpNumbers(STyL->getElementCount().Min,
- STyR->getElementCount().Min);
+ if (STyL->getElementCount().isScalable() !=
+ STyR->getElementCount().isScalable())
+ return cmpNumbers(STyL->getElementCount().isScalable(),
+ STyR->getElementCount().isScalable());
+ if (STyL->getElementCount() != STyR->getElementCount())
+ return cmpNumbers(STyL->getElementCount().getKnownMinValue(),
+ STyR->getElementCount().getKnownMinValue());
return cmpTypes(STyL->getElementType(), STyR->getElementType());
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index fe58f0e0fe40..f782396be7b6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -136,7 +136,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
GS.StoredType = GlobalStatus::Stored;
}
}
- } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<AddrSpaceCastInst>(I)) {
// Skip over bitcasts and GEPs; we don't care about the type or offset
// of the pointer.
if (analyzeGlobalAux(I, GS, VisitedUsers))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
index 4cfc9358499a..4dbcbf80d3da 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
@@ -30,7 +30,7 @@ static cl::opt<uint32_t> PredicatePassBranchWeight(
void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
CallInst *Guard, bool UseWC) {
OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt));
- SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end());
+ SmallVector<Value *, 4> Args(drop_begin(Guard->args()));
auto *CheckBB = Guard->getParent();
auto *DeoptBlockTerm =
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index 9d8f59d62d6d..a2b72e4e7f03 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -77,7 +78,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
if (CI.isNoBuiltin() || !CI.getCalledFunction())
return;
- const std::string ScalarName = std::string(CI.getCalledFunction()->getName());
+ StringRef ScalarName = CI.getCalledFunction()->getName();
+
// Nothing to be done if the TLI thinks the function is not
// vectorizable.
if (!TLI.isFunctionVectorizable(ScalarName))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
index b0b7ca484798..fb271a2118ba 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -79,6 +79,12 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
cl::Hidden,
cl::desc("Convert noalias attributes to metadata during inlining."));
+static cl::opt<bool>
+ UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true),
+ cl::desc("Use the llvm.experimental.noalias.scope.decl "
+ "intrinsic during inlining."));
+
// Disabled by default, because the added alignment assumptions may increase
// compile-time and block optimizations. This option is not suitable for use
// with frontends that emit comprehensive parameter alignment annotations.
@@ -771,145 +777,149 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
UnwindDest->removePredecessor(InvokeBB);
}
-/// When inlining a call site that has !llvm.mem.parallel_loop_access or
-/// llvm.access.group metadata, that metadata should be propagated to all
-/// memory-accessing cloned instructions.
-static void PropagateParallelLoopAccessMetadata(CallBase &CB,
- ValueToValueMapTy &VMap) {
- MDNode *M = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
- MDNode *CallAccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
- if (!M && !CallAccessGroup)
+/// When inlining a call site that has !llvm.mem.parallel_loop_access,
+/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
+/// be propagated to all memory-accessing cloned instructions.
+static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
+ Function::iterator FEnd) {
+ MDNode *MemParallelLoopAccess =
+ CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+ MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
+ MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope);
+ MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias);
+ if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
return;
- for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
- VMI != VMIE; ++VMI) {
- if (!VMI->second)
- continue;
-
- Instruction *NI = dyn_cast<Instruction>(VMI->second);
- if (!NI)
- continue;
+ for (BasicBlock &BB : make_range(FStart, FEnd)) {
+ for (Instruction &I : BB) {
+ // This metadata is only relevant for instructions that access memory.
+ if (!I.mayReadOrWriteMemory())
+ continue;
- if (M) {
- if (MDNode *PM =
- NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
- M = MDNode::concatenate(PM, M);
- NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
- } else if (NI->mayReadOrWriteMemory()) {
- NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ if (MemParallelLoopAccess) {
+ // TODO: This probably should not overwrite MemParalleLoopAccess.
+ MemParallelLoopAccess = MDNode::concatenate(
+ I.getMetadata(LLVMContext::MD_mem_parallel_loop_access),
+ MemParallelLoopAccess);
+ I.setMetadata(LLVMContext::MD_mem_parallel_loop_access,
+ MemParallelLoopAccess);
}
- }
- if (NI->mayReadOrWriteMemory()) {
- MDNode *UnitedAccGroups = uniteAccessGroups(
- NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup);
- NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups);
+ if (AccessGroup)
+ I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
+ I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
+
+ if (AliasScope)
+ I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
+ I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));
+
+ if (NoAlias)
+ I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
+ I.getMetadata(LLVMContext::MD_noalias), NoAlias));
}
}
}
-/// When inlining a function that contains noalias scope metadata,
-/// this metadata needs to be cloned so that the inlined blocks
-/// have different "unique scopes" at every call site. Were this not done, then
-/// aliasing scopes from a function inlined into a caller multiple times could
-/// not be differentiated (and this would lead to miscompiles because the
-/// non-aliasing property communicated by the metadata could have
-/// call-site-specific control dependencies).
-static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
- const Function *CalledFunc = CB.getCalledFunction();
+/// Utility for cloning !noalias and !alias.scope metadata. When a code region
+/// using scoped alias metadata is inlined, the aliasing relationships may not
+/// hold between the two version. It is necessary to create a deep clone of the
+/// metadata, putting the two versions in separate scope domains.
+class ScopedAliasMetadataDeepCloner {
+ using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
SetVector<const MDNode *> MD;
-
- // Note: We could only clone the metadata if it is already used in the
- // caller. I'm omitting that check here because it might confuse
- // inter-procedural alias analysis passes. We can revisit this if it becomes
- // an efficiency or overhead problem.
-
- for (const BasicBlock &I : *CalledFunc)
- for (const Instruction &J : I) {
- if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope))
+ MetadataMap MDMap;
+ void addRecursiveMetadataUses();
+
+public:
+ ScopedAliasMetadataDeepCloner(const Function *F);
+
+ /// Create a new clone of the scoped alias metadata, which will be used by
+ /// subsequent remap() calls.
+ void clone();
+
+ /// Remap instructions in the given range from the original to the cloned
+ /// metadata.
+ void remap(Function::iterator FStart, Function::iterator FEnd);
+};
+
+ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
+ const Function *F) {
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
MD.insert(M);
- if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias))
+ if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
MD.insert(M);
- }
- if (MD.empty())
- return;
+ // We also need to clone the metadata in noalias intrinsics.
+ if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ MD.insert(Decl->getScopeList());
+ }
+ }
+ addRecursiveMetadataUses();
+}
- // Walk the existing metadata, adding the complete (perhaps cyclic) chain to
- // the set.
+void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
while (!Queue.empty()) {
const MDNode *M = cast<MDNode>(Queue.pop_back_val());
- for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i)
- if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i)))
- if (MD.insert(M1))
- Queue.push_back(M1);
+ for (const Metadata *Op : M->operands())
+ if (const MDNode *OpMD = dyn_cast<MDNode>(Op))
+ if (MD.insert(OpMD))
+ Queue.push_back(OpMD);
}
+}
+
+void ScopedAliasMetadataDeepCloner::clone() {
+ assert(MDMap.empty() && "clone() already called ?");
- // Now we have a complete set of all metadata in the chains used to specify
- // the noalias scopes and the lists of those scopes.
SmallVector<TempMDTuple, 16> DummyNodes;
- DenseMap<const MDNode *, TrackingMDNodeRef> MDMap;
for (const MDNode *I : MD) {
- DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
+ DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), None));
MDMap[I].reset(DummyNodes.back().get());
}
// Create new metadata nodes to replace the dummy nodes, replacing old
// metadata references with either a dummy node or an already-created new
// node.
+ SmallVector<Metadata *, 4> NewOps;
for (const MDNode *I : MD) {
- SmallVector<Metadata *, 4> NewOps;
- for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) {
- const Metadata *V = I->getOperand(i);
- if (const MDNode *M = dyn_cast<MDNode>(V))
+ for (const Metadata *Op : I->operands()) {
+ if (const MDNode *M = dyn_cast<MDNode>(Op))
NewOps.push_back(MDMap[M]);
else
- NewOps.push_back(const_cast<Metadata *>(V));
+ NewOps.push_back(const_cast<Metadata *>(Op));
}
- MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
+ MDNode *NewM = MDNode::get(I->getContext(), NewOps);
MDTuple *TempM = cast<MDTuple>(MDMap[I]);
assert(TempM->isTemporary() && "Expected temporary node");
TempM->replaceAllUsesWith(NewM);
+ NewOps.clear();
}
+}
- // Now replace the metadata in the new inlined instructions with the
- // repacements from the map.
- for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
- VMI != VMIE; ++VMI) {
- if (!VMI->second)
- continue;
-
- Instruction *NI = dyn_cast<Instruction>(VMI->second);
- if (!NI)
- continue;
-
- if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) {
- MDNode *NewMD = MDMap[M];
- // If the call site also had alias scope metadata (a list of scopes to
- // which instructions inside it might belong), propagate those scopes to
- // the inlined instructions.
- if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope))
- NewMD = MDNode::concatenate(NewMD, CSM);
- NI->setMetadata(LLVMContext::MD_alias_scope, NewMD);
- } else if (NI->mayReadOrWriteMemory()) {
- if (MDNode *M = CB.getMetadata(LLVMContext::MD_alias_scope))
- NI->setMetadata(LLVMContext::MD_alias_scope, M);
- }
-
- if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) {
- MDNode *NewMD = MDMap[M];
- // If the call site also had noalias metadata (a list of scopes with
- // which instructions inside it don't alias), propagate those scopes to
- // the inlined instructions.
- if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias))
- NewMD = MDNode::concatenate(NewMD, CSM);
- NI->setMetadata(LLVMContext::MD_noalias, NewMD);
- } else if (NI->mayReadOrWriteMemory()) {
- if (MDNode *M = CB.getMetadata(LLVMContext::MD_noalias))
- NI->setMetadata(LLVMContext::MD_noalias, M);
+void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
+ Function::iterator FEnd) {
+ if (MDMap.empty())
+ return; // Nothing to do.
+
+ for (BasicBlock &BB : make_range(FStart, FEnd)) {
+ for (Instruction &I : BB) {
+ // TODO: The null checks for the MDMap.lookup() results should no longer
+ // be necessary.
+ if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
+ if (MDNode *MNew = MDMap.lookup(M))
+ I.setMetadata(LLVMContext::MD_alias_scope, MNew);
+
+ if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
+ if (MDNode *MNew = MDMap.lookup(M))
+ I.setMetadata(LLVMContext::MD_noalias, MNew);
+
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
+ Decl->setScopeList(MNew);
}
}
}
@@ -967,6 +977,17 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
// property of the callee, but also all control dependencies in the caller.
MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
NewScopes.insert(std::make_pair(A, NewScope));
+
+ if (UseNoAliasIntrinsic) {
+ // Introduce a llvm.experimental.noalias.scope.decl for the noalias
+ // argument.
+ MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope);
+ auto *NoAliasDecl =
+ IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList);
+ // Ignore the result for now. The result will be used when the
+ // llvm.noalias intrinsic is introduced.
+ (void)NoAliasDecl;
+ }
}
// Iterate over all new instructions in the map; for all memory-access
@@ -1037,7 +1058,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
SmallSetVector<const Argument *, 4> NAPtrArgs;
for (const Value *V : PtrArgs) {
SmallVector<const Value *, 4> Objects;
- GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr);
+ getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
for (const Value *O : Objects)
ObjSet.insert(O);
@@ -1245,7 +1266,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
Function *CalledFunc = CB.getCalledFunction();
for (Argument &Arg : CalledFunc->args()) {
unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
- if (Align && !Arg.hasPassPointeeByValueAttr() && !Arg.hasNUses(0)) {
+ if (Align && !Arg.hasPassPointeeByValueCopyAttr() && !Arg.hasNUses(0)) {
if (!DTCalculated) {
DT.recalculate(*CB.getCaller());
DTCalculated = true;
@@ -1448,8 +1469,8 @@ static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
LLVMContext &Ctx,
DenseMap<const MDNode *, MDNode *> &IANodes) {
auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
- return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(),
- IA);
+ return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(),
+ OrigDL.getScope(), IA);
}
/// Update inlined instructions' line numbers to
@@ -1573,8 +1594,7 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
return;
auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
int64_t CallCount =
- std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
- CalleeEntryCount.getCount());
+ std::min(CallSiteCount.getValueOr(0), CalleeEntryCount.getCount());
updateProfileCallee(Callee, -CallCount, &VMap);
}
@@ -1765,6 +1785,14 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Keep a list of pair (dst, src) to emit byval initializations.
SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
+ // When inlining a function that contains noalias scope metadata,
+ // this metadata needs to be cloned so that the inlined blocks
+ // have different "unique scopes" at every call site.
+ // Track the metadata that must be cloned. Do this before other changes to
+ // the function, so that we do not get in trouble when inlining caller ==
+ // callee.
+ ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
+
auto &DL = Caller->getParent()->getDataLayout();
// Calculate the vector of arguments to pass into the function cloner, which
@@ -1855,11 +1883,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
ChildOB.Inputs.size());
- MergedDeoptArgs.insert(MergedDeoptArgs.end(),
- ParentDeopt->Inputs.begin(),
- ParentDeopt->Inputs.end());
- MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
- ChildOB.Inputs.end());
+ llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs);
+ llvm::append_range(MergedDeoptArgs, ChildOB.Inputs);
OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
}
@@ -1885,8 +1910,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
fixupLineNumbers(Caller, FirstNewBlock, &CB,
CalledFunc->getSubprogram() != nullptr);
- // Clone existing noalias metadata if necessary.
- CloneAliasScopeMetadata(CB, VMap);
+ // Now clone the inlined noalias scope metadata.
+ SAMetadataCloner.clone();
+ SAMetadataCloner.remap(FirstNewBlock, Caller->end());
// Add noalias metadata if necessary.
AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR);
@@ -1895,8 +1921,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// function which feed into its return value.
AddReturnAttributes(CB, VMap);
- // Propagate llvm.mem.parallel_loop_access if necessary.
- PropagateParallelLoopAccessMetadata(CB, VMap);
+ // Propagate metadata on the callsite if necessary.
+ PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());
// Register any cloned assumptions.
if (IFI.GetAssumptionCache)
@@ -2061,7 +2087,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
dyn_cast<ConstantInt>(AI->getArraySize())) {
auto &DL = Caller->getParent()->getDataLayout();
Type *AllocaType = AI->getAllocatedType();
- uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+ TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
// Don't add markers for zero-sized allocas.
@@ -2070,9 +2096,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Check that array size doesn't saturate uint64_t and doesn't
// overflow when it's multiplied by type size.
- if (AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
+ if (!AllocaTypeSize.isScalable() &&
+ AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
- AllocaTypeSize) {
+ AllocaTypeSize.getFixedSize()) {
AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
AllocaArraySize * AllocaTypeSize);
}
@@ -2198,10 +2225,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// match the callee's return type, we also need to change the return type of
// the intrinsic.
if (Caller->getReturnType() == CB.getType()) {
- auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) {
+ llvm::erase_if(Returns, [](ReturnInst *RI) {
return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
});
- Returns.erase(NewEnd, Returns.end());
} else {
SmallVector<ReturnInst *, 8> NormalReturns;
Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
@@ -2225,8 +2251,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
auto *CurBB = RI->getParent();
RI->eraseFromParent();
- SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(),
- DeoptCall->arg_end());
+ SmallVector<Value *, 4> CallArgs(DeoptCall->args());
SmallVector<OperandBundleDef, 1> OpBundles;
DeoptCall->getOperandBundlesAsDefs(OpBundles);
@@ -2463,7 +2488,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// If we inlined any musttail calls and the original return is now
// unreachable, delete it. It can only contain a bitcast and ret.
- if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
+ if (InlinedMustTailCalls && pred_empty(AfterCallBB))
AfterCallBB->eraseFromParent();
// We should always be able to fold the entry block of the function into the
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 8e339fe46d45..f3499c9c8aed 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -13,43 +13,52 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/InstructionNamer.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
+
using namespace llvm;
namespace {
- struct InstNamer : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- InstNamer() : FunctionPass(ID) {
- initializeInstNamerPass(*PassRegistry::getPassRegistry());
- }
+void nameInstructions(Function &F) {
+ for (auto &Arg : F.args()) {
+ if (!Arg.hasName())
+ Arg.setName("arg");
+ }
- void getAnalysisUsage(AnalysisUsage &Info) const override {
- Info.setPreservesAll();
+ for (BasicBlock &BB : F) {
+ if (!BB.hasName())
+ BB.setName("bb");
+
+ for (Instruction &I : BB) {
+ if (!I.hasName() && !I.getType()->isVoidTy())
+ I.setName("i");
}
+ }
+}
- bool runOnFunction(Function &F) override {
- for (auto &Arg : F.args())
- if (!Arg.hasName())
- Arg.setName("arg");
+struct InstNamer : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
- for (BasicBlock &BB : F) {
- if (!BB.hasName())
- BB.setName("bb");
+ void getAnalysisUsage(AnalysisUsage &Info) const override {
+ Info.setPreservesAll();
+ }
- for (Instruction &I : BB)
- if (!I.hasName() && !I.getType()->isVoidTy())
- I.setName("i");
- }
- return true;
- }
- };
+ bool runOnFunction(Function &F) override {
+ nameInstructions(F);
+ return true;
+ }
+};
char InstNamer::ID = 0;
-}
+ } // namespace
INITIALIZE_PASS(InstNamer, "instnamer",
"Assign names to anonymous instructions", false, false)
@@ -61,3 +70,9 @@ char &llvm::InstructionNamerID = InstNamer::ID;
FunctionPass *llvm::createInstructionNamerPass() {
return new InstNamer();
}
+
+PreservedAnalyses InstructionNamerPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ nameInstructions(F);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
index b1a1c564d217..7437701f5339 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PredIteratorCache.h"
@@ -77,12 +78,15 @@ static bool isExitBlock(BasicBlock *BB,
/// rewrite the uses.
bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
const DominatorTree &DT, const LoopInfo &LI,
- ScalarEvolution *SE) {
+ ScalarEvolution *SE, IRBuilderBase &Builder,
+ SmallVectorImpl<PHINode *> *PHIsToRemove) {
SmallVector<Use *, 16> UsesToRewrite;
- SmallSetVector<PHINode *, 16> PHIsToRemove;
+ SmallSetVector<PHINode *, 16> LocalPHIsToRemove;
PredIteratorCache PredCache;
bool Changed = false;
+ IRBuilderBase::InsertPointGuard InsertPtGuard(Builder);
+
// Cache the Loop ExitBlocks across this loop. We expect to get a lot of
// instructions within the same loops, computing the exit blocks is
// expensive, and we're not mutating the loop structure.
@@ -107,6 +111,10 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
BasicBlock *UserBB = User->getParent();
+
+ // For practical purposes, we consider that the use in a PHI
+ // occurs in the respective predecessor block. For more info,
+ // see the `phi` doc in LangRef and the LCSSA doc.
if (auto *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(U);
@@ -151,12 +159,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// If we already inserted something for this BB, don't reprocess it.
if (SSAUpdate.HasValueForBlock(ExitBB))
continue;
-
- PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
- I->getName() + ".lcssa", &ExitBB->front());
+ Builder.SetInsertPoint(&ExitBB->front());
+ PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB),
+ I->getName() + ".lcssa");
// Get the debug location from the original instruction.
PN->setDebugLoc(I->getDebugLoc());
- // Add inputs from inside the loop for this PHI.
+
+ // Add inputs from inside the loop for this PHI. This is valid
+ // because `I` dominates `ExitBB` (checked above). This implies
+ // that every incoming block/edge is dominated by `I` as well,
+ // i.e. we can add uses of `I` to those incoming edges/append to the incoming
+ // blocks without violating the SSA dominance property.
for (BasicBlock *Pred : PredCache.get(ExitBB)) {
PN->addIncoming(I, Pred);
@@ -190,15 +203,19 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// Rewrite all uses outside the loop in terms of the new PHIs we just
// inserted.
for (Use *UseToRewrite : UsesToRewrite) {
- // If this use is in an exit block, rewrite to use the newly inserted PHI.
- // This is required for correctness because SSAUpdate doesn't handle uses
- // in the same block. It assumes the PHI we inserted is at the end of the
- // block.
Instruction *User = cast<Instruction>(UseToRewrite->getUser());
BasicBlock *UserBB = User->getParent();
+
+ // For practical purposes, we consider that the use in a PHI
+ // occurs in the respective predecessor block. For more info,
+ // see the `phi` doc in LangRef and the LCSSA doc.
if (auto *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(*UseToRewrite);
+ // If this use is in an exit block, rewrite to use the newly inserted PHI.
+ // This is required for correctness because SSAUpdate doesn't handle uses
+ // in the same block. It assumes the PHI we inserted is at the end of the
+ // block.
if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
UseToRewrite->set(&UserBB->front());
continue;
@@ -248,27 +265,29 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
Worklist.push_back(PostProcessPN);
// Keep track of PHI nodes that we want to remove because they did not have
- // any uses rewritten. If the new PHI is used, store it so that we can
- // try to propagate dbg.value intrinsics to it.
- SmallVector<PHINode *, 2> NeedDbgValues;
+ // any uses rewritten.
for (PHINode *PN : AddedPHIs)
if (PN->use_empty())
- PHIsToRemove.insert(PN);
- else
- NeedDbgValues.push_back(PN);
- insertDebugValuesForPHIs(InstBB, NeedDbgValues);
+ LocalPHIsToRemove.insert(PN);
+
Changed = true;
}
- // Remove PHI nodes that did not have any uses rewritten. We need to redo the
- // use_empty() check here, because even if the PHI node wasn't used when added
- // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is
- // not guaranteed to handle trees/cycles of PHI nodes that only are used by
- // each other. Such situations has only been noticed when the input IR
- // contains unreachable code, and leaving some extra redundant PHI nodes in
- // such situations is considered a minor problem.
- for (PHINode *PN : PHIsToRemove)
- if (PN->use_empty())
- PN->eraseFromParent();
+
+ // Remove PHI nodes that did not have any uses rewritten or add them to
+ // PHIsToRemove, so the caller can remove them after some additional cleanup.
+ // We need to redo the use_empty() check here, because even if the PHI node
+ // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be
+ // using it. This cleanup is not guaranteed to handle trees/cycles of PHI
+ // nodes that only are used by each other. Such situations has only been
+ // noticed when the input IR contains unreachable code, and leaving some extra
+ // redundant PHI nodes in such situations is considered a minor problem.
+ if (PHIsToRemove) {
+ PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end());
+ } else {
+ for (PHINode *PN : LocalPHIsToRemove)
+ if (PN->use_empty())
+ PN->eraseFromParent();
+ }
return Changed;
}
@@ -276,12 +295,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
static void computeBlocksDominatingExits(
Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
- SmallVector<BasicBlock *, 8> BBWorklist;
-
// We start from the exit blocks, as every block trivially dominates itself
// (not strictly).
- for (BasicBlock *BB : ExitBlocks)
- BBWorklist.push_back(BB);
+ SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks);
while (!BBWorklist.empty()) {
BasicBlock *BB = BBWorklist.pop_back_val();
@@ -369,7 +385,9 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
Worklist.push_back(&I);
}
}
- Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE);
+
+ IRBuilder<> Builder(L.getHeader()->getContext());
+ Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder);
// If we modified the code, remove any caches about the loop from SCEV to
// avoid dangling entries.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index da40c342af3a..ae26058c210c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -91,6 +91,24 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "local"
STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd");
+
+static cl::opt<bool> PHICSEDebugHash(
+ "phicse-debug-hash",
+#ifdef EXPENSIVE_CHECKS
+ cl::init(true),
+#else
+ cl::init(false),
+#endif
+ cl::Hidden,
+ cl::desc("Perform extra assertion checking to verify that PHINodes's hash "
+ "function is well-behaved w.r.t. its isEqual predicate"));
+
+static cl::opt<unsigned> PHICSENumPHISmallSize(
+ "phicse-num-phi-smallsize", cl::init(32), cl::Hidden,
+ cl::desc(
+ "When the basic block contains not more than this number of PHI nodes, "
+ "perform a (faster!) exhaustive search instead of set-driven one."));
// Max recursion depth for collectBitParts used when detecting bswap and
// bitreverse idioms
@@ -116,27 +134,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Branch - See if we are conditional jumping on constant
if (auto *BI = dyn_cast<BranchInst>(T)) {
if (BI->isUnconditional()) return false; // Can't optimize uncond branch
+
BasicBlock *Dest1 = BI->getSuccessor(0);
BasicBlock *Dest2 = BI->getSuccessor(1);
- if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
- // Are we branching on constant?
- // YES. Change to unconditional branch...
- BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
- BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
-
- // Let the basic block know that we are letting go of it. Based on this,
- // it will adjust it's PHI nodes.
- OldDest->removePredecessor(BB);
-
- // Replace the conditional branch with an unconditional one.
- Builder.CreateBr(Destination);
- BI->eraseFromParent();
- if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, OldDest}});
- return true;
- }
-
if (Dest2 == Dest1) { // Conditional branch to same location?
// This branch matches something like this:
// br bool %cond, label %Dest, label %Dest
@@ -154,6 +155,25 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
return true;
}
+
+ if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // Are we branching on constant?
+ // YES. Change to unconditional branch...
+ BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+ BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+
+ // Let the basic block know that we are letting go of it. Based on this,
+ // it will adjust it's PHI nodes.
+ OldDest->removePredecessor(BB);
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Destination);
+ BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, OldDest}});
+ return true;
+ }
+
return false;
}
@@ -170,6 +190,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
TheOnlyDest = SI->case_begin()->getCaseSuccessor();
}
+ bool Changed = false;
+
// Figure out which case it goes to.
for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
// Found case matching a constant operand?
@@ -208,9 +230,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
DefaultDest->removePredecessor(ParentBB);
i = SI->removeCase(i);
e = SI->case_end();
- if (DTU)
- DTU->applyUpdatesPermissive(
- {{DominatorTree::Delete, ParentBB, DefaultDest}});
+ Changed = true;
continue;
}
@@ -236,19 +256,19 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Insert the new branch.
Builder.CreateBr(TheOnlyDest);
BasicBlock *BB = SI->getParent();
- std::vector <DominatorTree::UpdateType> Updates;
- if (DTU)
- Updates.reserve(SI->getNumSuccessors() - 1);
+
+ SmallSetVector<BasicBlock *, 8> RemovedSuccessors;
// Remove entries from PHI nodes which we no longer branch to...
+ BasicBlock *SuccToKeep = TheOnlyDest;
for (BasicBlock *Succ : successors(SI)) {
+ if (DTU && Succ != TheOnlyDest)
+ RemovedSuccessors.insert(Succ);
// Found case matching a constant operand?
- if (Succ == TheOnlyDest) {
- TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
+ if (Succ == SuccToKeep) {
+ SuccToKeep = nullptr; // Don't modify the first branch to TheOnlyDest
} else {
Succ->removePredecessor(BB);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Succ});
}
}
@@ -257,8 +277,13 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SI->eraseFromParent();
if (DeleteDeadConditions)
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(RemovedSuccessors.size());
+ for (auto *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
@@ -296,7 +321,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SI->eraseFromParent();
return true;
}
- return false;
+ return Changed;
}
if (auto *IBI = dyn_cast<IndirectBrInst>(T)) {
@@ -304,22 +329,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
if (auto *BA =
dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
BasicBlock *TheOnlyDest = BA->getBasicBlock();
- std::vector <DominatorTree::UpdateType> Updates;
- if (DTU)
- Updates.reserve(IBI->getNumDestinations() - 1);
+ SmallSetVector<BasicBlock *, 8> RemovedSuccessors;
// Insert the new branch.
Builder.CreateBr(TheOnlyDest);
+ BasicBlock *SuccToKeep = TheOnlyDest;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
- if (IBI->getDestination(i) == TheOnlyDest) {
- TheOnlyDest = nullptr;
+ BasicBlock *DestBB = IBI->getDestination(i);
+ if (DTU && DestBB != TheOnlyDest)
+ RemovedSuccessors.insert(DestBB);
+ if (IBI->getDestination(i) == SuccToKeep) {
+ SuccToKeep = nullptr;
} else {
- BasicBlock *ParentBB = IBI->getParent();
- BasicBlock *DestBB = IBI->getDestination(i);
- DestBB->removePredecessor(ParentBB);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, ParentBB, DestBB});
+ DestBB->removePredecessor(BB);
}
}
Value *Address = IBI->getAddress();
@@ -336,13 +359,18 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// If we didn't find our destination in the IBI successor list, then we
// have undefined behavior. Replace the unconditional branch with an
// 'unreachable' instruction.
- if (TheOnlyDest) {
+ if (SuccToKeep) {
BB->getTerminator()->eraseFromParent();
new UnreachableInst(BB->getContext(), BB);
}
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(RemovedSuccessors.size());
+ for (auto *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
}
@@ -392,6 +420,9 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return true;
}
+ if (!I->willReturn())
+ return false;
+
if (!I->mayHaveSideEffects())
return true;
@@ -453,21 +484,24 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
/// trivially dead, delete them too, recursively. Return true if any
/// instructions were deleted.
bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
- Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
+ Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU,
+ std::function<void(Value *)> AboutToDeleteCallback) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I || !isInstructionTriviallyDead(I, TLI))
return false;
SmallVector<WeakTrackingVH, 16> DeadInsts;
DeadInsts.push_back(I);
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+ AboutToDeleteCallback);
return true;
}
bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ std::function<void(Value *)> AboutToDeleteCallback) {
unsigned S = 0, E = DeadInsts.size(), Alive = 0;
for (; S != E; ++S) {
auto *I = cast<Instruction>(DeadInsts[S]);
@@ -478,13 +512,15 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
}
if (Alive == E)
return false;
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+ AboutToDeleteCallback);
return true;
}
void llvm::RecursivelyDeleteTriviallyDeadInstructions(
SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ std::function<void(Value *)> AboutToDeleteCallback) {
// Process the dead instruction list until empty.
while (!DeadInsts.empty()) {
Value *V = DeadInsts.pop_back_val();
@@ -498,6 +534,9 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
// Don't lose the debug info while deleting the instructions.
salvageDebugInfo(*I);
+ if (AboutToDeleteCallback)
+ AboutToDeleteCallback(I);
+
// Null out all of the instruction's operands to see if any operand becomes
// dead as we go.
for (Use &OpU : I->operands()) {
@@ -675,34 +714,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
// Control Flow Graph Restructuring.
//
-void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- DomTreeUpdater *DTU) {
- // This only adjusts blocks with PHI nodes.
- if (!isa<PHINode>(BB->begin()))
- return;
-
- // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
- // them down. This will leave us with single entry phi nodes and other phis
- // that can be removed.
- BB->removePredecessor(Pred, true);
-
- WeakTrackingVH PhiIt = &BB->front();
- while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
- PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
- Value *OldPhiIt = PhiIt;
-
- if (!recursivelySimplifyInstruction(PN))
- continue;
-
- // If recursive simplification ended up deleting the next PHI node we would
- // iterate to, then our iterator is invalid, restart scanning from the top
- // of the block.
- if (PhiIt != OldPhiIt) PhiIt = &BB->front();
- }
- if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}});
-}
-
void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
DomTreeUpdater *DTU) {
@@ -727,13 +738,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
- Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) {
- Updates.push_back({DominatorTree::Delete, *I, PredBB});
// This predecessor of PredBB may already have DestBB as a successor.
- if (llvm::find(successors(*I), DestBB) == succ_end(*I))
+ if (!llvm::is_contained(successors(*I), DestBB))
Updates.push_back({DominatorTree::Insert, *I, DestBB});
+ Updates.push_back({DominatorTree::Delete, *I, PredBB});
}
+ Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
}
// Zap anything that took the address of DestBB. Not doing this will give the
@@ -907,6 +918,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN,
/// \param IncomingValues A map from block to value.
static void replaceUndefValuesInPhi(PHINode *PN,
const IncomingValueMap &IncomingValues) {
+ SmallVector<unsigned> TrueUndefOps;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
@@ -914,10 +926,31 @@ static void replaceUndefValuesInPhi(PHINode *PN,
BasicBlock *BB = PN->getIncomingBlock(i);
IncomingValueMap::const_iterator It = IncomingValues.find(BB);
- if (It == IncomingValues.end()) continue;
+ // Keep track of undef/poison incoming values. Those must match, so we fix
+ // them up below if needed.
+ // Note: this is conservatively correct, but we could try harder and group
+ // the undef values per incoming basic block.
+ if (It == IncomingValues.end()) {
+ TrueUndefOps.push_back(i);
+ continue;
+ }
+
+ // There is a defined value for this incoming block, so map this undef
+ // incoming value to the defined value.
PN->setIncomingValue(i, It->second);
}
+
+ // If there are both undef and poison values incoming, then convert those
+ // values to undef. It is invalid to have different values for the same
+ // incoming block.
+ unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) {
+ return isa<PoisonValue>(PN->getIncomingValue(i));
+ });
+ if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) {
+ for (unsigned i : TrueUndefOps)
+ PN->setIncomingValue(i, UndefValue::get(PN->getType()));
+ }
}
/// Replace a value flowing from a block to a phi with
@@ -1038,14 +1071,16 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
- Updates.push_back({DominatorTree::Delete, BB, Succ});
// All predecessors of BB will be moved to Succ.
- for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
- Updates.push_back({DominatorTree::Delete, *I, BB});
+ SmallSetVector<BasicBlock *, 8> Predecessors(pred_begin(BB), pred_end(BB));
+ Updates.reserve(Updates.size() + 2 * Predecessors.size());
+ for (auto *Predecessor : Predecessors) {
// This predecessor of BB may already have Succ as a successor.
- if (llvm::find(successors(*I), Succ) == succ_end(*I))
- Updates.push_back({DominatorTree::Insert, *I, Succ});
+ if (!llvm::is_contained(successors(Predecessor), Succ))
+ Updates.push_back({DominatorTree::Insert, Predecessor, Succ});
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
}
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
if (isa<PHINode>(Succ->begin())) {
@@ -1101,7 +1136,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
"applying corresponding DTU updates.");
if (DTU) {
- DTU->applyUpdatesPermissive(Updates);
+ DTU->applyUpdates(Updates);
DTU->deleteBB(BB);
} else {
BB->eraseFromParent(); // Delete the old basic block.
@@ -1109,7 +1144,39 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
return true;
}
-bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) {
+ // This implementation doesn't currently consider undef operands
+ // specially. Theoretically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ bool Changed = false;
+
+ // Examine each PHI.
+ // Note that increment of I must *NOT* be in the iteration_expression, since
+ // we don't want to immediately advance when we restart from the beginning.
+ for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I);) {
+ ++I;
+ // Is there an identical PHI node in this basic block?
+ // Note that we only look in the upper square's triangle,
+ // we already checked that the lower triangle PHI's aren't identical.
+ for (auto J = I; PHINode *DuplicatePN = dyn_cast<PHINode>(J); ++J) {
+ if (!DuplicatePN->isIdenticalToWhenDefined(PN))
+ continue;
+ // A duplicate. Replace this PHI with the base PHI.
+ ++NumPHICSEs;
+ DuplicatePN->replaceAllUsesWith(PN);
+ DuplicatePN->eraseFromParent();
+ Changed = true;
+
+ // The RAUW can change PHIs that we already visited.
+ I = BB->begin();
+ break; // Start over from the beginning.
+ }
+ }
+ return Changed;
+}
+
+static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) {
// This implementation doesn't currently consider undef operands
// specially. Theoretically, two phis which are identical except for
// one having an undef where the other doesn't could be collapsed.
@@ -1123,7 +1190,13 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
return DenseMapInfo<PHINode *>::getTombstoneKey();
}
- static unsigned getHashValue(PHINode *PN) {
+ static bool isSentinel(PHINode *PN) {
+ return PN == getEmptyKey() || PN == getTombstoneKey();
+ }
+
+ // WARNING: this logic must be kept in sync with
+ // Instruction::isIdenticalToWhenDefined()!
+ static unsigned getHashValueImpl(PHINode *PN) {
// Compute a hash value on the operands. Instcombine will likely have
// sorted them, which helps expose duplicates, but we have to check all
// the operands to be safe in case instcombine hasn't run.
@@ -1132,16 +1205,37 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
hash_combine_range(PN->block_begin(), PN->block_end())));
}
- static bool isEqual(PHINode *LHS, PHINode *RHS) {
- if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
- RHS == getEmptyKey() || RHS == getTombstoneKey())
+ static unsigned getHashValue(PHINode *PN) {
+#ifndef NDEBUG
+ // If -phicse-debug-hash was specified, return a constant -- this
+ // will force all hashing to collide, so we'll exhaustively search
+ // the table for a match, and the assertion in isEqual will fire if
+ // there's a bug causing equal keys to hash differently.
+ if (PHICSEDebugHash)
+ return 0;
+#endif
+ return getHashValueImpl(PN);
+ }
+
+ static bool isEqualImpl(PHINode *LHS, PHINode *RHS) {
+ if (isSentinel(LHS) || isSentinel(RHS))
return LHS == RHS;
return LHS->isIdenticalTo(RHS);
}
+
+ static bool isEqual(PHINode *LHS, PHINode *RHS) {
+ // These comparisons are nontrivial, so assert that equality implies
+ // hash equality (DenseMap demands this as an invariant).
+ bool Result = isEqualImpl(LHS, RHS);
+ assert(!Result || (isSentinel(LHS) && LHS == RHS) ||
+ getHashValueImpl(LHS) == getHashValueImpl(RHS));
+ return Result;
+ }
};
// Set of unique PHINodes.
DenseSet<PHINode *, PHIDenseMapInfo> PHISet;
+ PHISet.reserve(4 * PHICSENumPHISmallSize);
// Examine each PHI.
bool Changed = false;
@@ -1149,6 +1243,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
auto Inserted = PHISet.insert(PN);
if (!Inserted.second) {
// A duplicate. Replace this PHI with its duplicate.
+ ++NumPHICSEs;
PN->replaceAllUsesWith(*Inserted.first);
PN->eraseFromParent();
Changed = true;
@@ -1163,54 +1258,63 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
return Changed;
}
-/// enforceKnownAlignment - If the specified pointer points to an object that
-/// we control, modify the object's alignment to PrefAlign. This isn't
-/// often possible though. If alignment is important, a more reliable approach
-/// is to simply align all global variables and allocation instructions to
-/// their preferred alignment from the beginning.
-static Align enforceKnownAlignment(Value *V, Align Alignment, Align PrefAlign,
- const DataLayout &DL) {
- assert(PrefAlign > Alignment);
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ if (
+#ifndef NDEBUG
+ !PHICSEDebugHash &&
+#endif
+ hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize))
+ return EliminateDuplicatePHINodesNaiveImpl(BB);
+ return EliminateDuplicatePHINodesSetBasedImpl(BB);
+}
+/// If the specified pointer points to an object that we control, try to modify
+/// the object's alignment to PrefAlign. Returns a minimum known alignment of
+/// the value after the operation, which may be lower than PrefAlign.
+///
+/// Increating value alignment isn't often possible though. If alignment is
+/// important, a more reliable approach is to simply align all global variables
+/// and allocation instructions to their preferred alignment from the beginning.
+static Align tryEnforceAlignment(Value *V, Align PrefAlign,
+ const DataLayout &DL) {
V = V->stripPointerCasts();
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
- // TODO: ideally, computeKnownBits ought to have used
- // AllocaInst::getAlignment() in its computation already, making
- // the below max redundant. But, as it turns out,
- // stripPointerCasts recurses through infinite layers of bitcasts,
- // while computeKnownBits is not allowed to traverse more than 6
- // levels.
- Alignment = std::max(AI->getAlign(), Alignment);
- if (PrefAlign <= Alignment)
- return Alignment;
+ // TODO: Ideally, this function would not be called if PrefAlign is smaller
+ // than the current alignment, as the known bits calculation should have
+ // already taken it into account. However, this is not always the case,
+ // as computeKnownBits() has a depth limit, while stripPointerCasts()
+ // doesn't.
+ Align CurrentAlign = AI->getAlign();
+ if (PrefAlign <= CurrentAlign)
+ return CurrentAlign;
// If the preferred alignment is greater than the natural stack alignment
// then don't round up. This avoids dynamic stack realignment.
if (DL.exceedsNaturalStackAlignment(PrefAlign))
- return Alignment;
+ return CurrentAlign;
AI->setAlignment(PrefAlign);
return PrefAlign;
}
if (auto *GO = dyn_cast<GlobalObject>(V)) {
// TODO: as above, this shouldn't be necessary.
- Alignment = max(GO->getAlign(), Alignment);
- if (PrefAlign <= Alignment)
- return Alignment;
+ Align CurrentAlign = GO->getPointerAlignment(DL);
+ if (PrefAlign <= CurrentAlign)
+ return CurrentAlign;
// If there is a large requested alignment and we can, bump up the alignment
// of the global. If the memory we set aside for the global may not be the
// memory used by the final program then it is impossible for us to reliably
// enforce the preferred alignment.
if (!GO->canIncreaseAlignment())
- return Alignment;
+ return CurrentAlign;
GO->setAlignment(PrefAlign);
return PrefAlign;
}
- return Alignment;
+ return Align(1);
}
Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign,
@@ -1232,7 +1336,7 @@ Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign,
Align Alignment = Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
if (PrefAlign && *PrefAlign > Alignment)
- Alignment = enforceKnownAlignment(V, Alignment, *PrefAlign, DL);
+ Alignment = std::max(Alignment, tryEnforceAlignment(V, *PrefAlign, DL));
// We don't need to make any adjustment.
return Alignment;
@@ -1270,16 +1374,22 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
/// least n bits.
static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
const DataLayout &DL = DII->getModule()->getDataLayout();
- uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy);
- if (auto FragmentSize = DII->getFragmentSizeInBits())
- return ValueSize >= *FragmentSize;
+ TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy);
+ if (Optional<uint64_t> FragmentSize = DII->getFragmentSizeInBits()) {
+ assert(!ValueSize.isScalable() &&
+ "Fragments don't work on scalable types.");
+ return ValueSize.getFixedSize() >= *FragmentSize;
+ }
// We can't always calculate the size of the DI variable (e.g. if it is a
// VLA). Try to use the size of the alloca that the dbg intrinsic describes
// intead.
if (DII->isAddressOfVariable())
if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation()))
- if (auto FragmentSize = AI->getAllocationSizeInBits(DL))
- return ValueSize >= *FragmentSize;
+ if (Optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) {
+ assert(ValueSize.isScalable() == FragmentSize->isScalable() &&
+ "Both sizes should agree on the scalable flag.");
+ return TypeSize::isKnownGE(ValueSize, *FragmentSize);
+ }
// Could not determine size of variable. Conservatively return false.
return false;
}
@@ -1294,7 +1404,7 @@ static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) {
MDNode *Scope = DeclareLoc.getScope();
DILocation *InlinedAt = DeclareLoc.getInlinedAt();
// Produce an unknown location with the correct scope / inlinedAt fields.
- return DebugLoc::get(0, 0, Scope, InlinedAt);
+ return DILocation::get(DII->getContext(), 0, 0, Scope, InlinedAt);
}
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
@@ -1911,8 +2021,10 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
return false;
}
-unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
+std::pair<unsigned, unsigned>
+llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
unsigned NumDeadInst = 0;
+ unsigned NumDeadDbgInst = 0;
// Delete the instructions backwards, as it has a reduced likelihood of
// having to update as many def-use and use-def chains.
Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
@@ -1925,30 +2037,31 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
EndInst = Inst;
continue;
}
- if (!isa<DbgInfoIntrinsic>(Inst))
+ if (isa<DbgInfoIntrinsic>(Inst))
+ ++NumDeadDbgInst;
+ else
++NumDeadInst;
Inst->eraseFromParent();
}
- return NumDeadInst;
+ return {NumDeadInst, NumDeadDbgInst};
}
unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
bool PreserveLCSSA, DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU) {
BasicBlock *BB = I->getParent();
- std::vector <DominatorTree::UpdateType> Updates;
if (MSSAU)
MSSAU->changeToUnreachable(I);
+ SmallSetVector<BasicBlock *, 8> UniqueSuccessors;
+
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
- if (DTU)
- Updates.reserve(BB->getTerminator()->getNumSuccessors());
for (BasicBlock *Successor : successors(BB)) {
Successor->removePredecessor(BB, PreserveLCSSA);
if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Successor});
+ UniqueSuccessors.insert(Successor);
}
// Insert a call to llvm.trap right before this. This turns the undefined
// behavior into a hard fail instead of falling through into random code.
@@ -1970,13 +2083,18 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
BB->getInstList().erase(BBI++);
++NumInstrsRemoved;
}
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ Updates.reserve(UniqueSuccessors.size());
+ for (BasicBlock *UniqueSuccessor : UniqueSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
+ DTU->applyUpdates(Updates);
+ }
return NumInstrsRemoved;
}
CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) {
- SmallVector<Value *, 8> Args(II->arg_begin(), II->arg_end());
+ SmallVector<Value *, 8> Args(II->args());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
CallInst *NewCall = CallInst::Create(II->getFunctionType(),
@@ -2017,7 +2135,7 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
UnwindDestBB->removePredecessor(BB);
II->eraseFromParent();
if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDestBB}});
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
}
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -2033,7 +2151,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
BB->getInstList().pop_back();
// Create the new invoke instruction.
- SmallVector<Value *, 8> InvokeArgs(CI->arg_begin(), CI->arg_end());
+ SmallVector<Value *, 8> InvokeArgs(CI->args());
SmallVector<OperandBundleDef, 1> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
@@ -2164,8 +2282,7 @@ static bool markAliveBlocks(Function &F,
UnwindDestBB->removePredecessor(II->getParent());
II->eraseFromParent();
if (DTU)
- DTU->applyUpdatesPermissive(
- {{DominatorTree::Delete, BB, UnwindDestBB}});
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
} else
changeToCall(II, DTU);
Changed = true;
@@ -2194,6 +2311,7 @@ static bool markAliveBlocks(Function &F,
}
};
+ SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases;
// Set of unique CatchPads.
SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4,
CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>>
@@ -2203,14 +2321,22 @@ static bool markAliveBlocks(Function &F,
E = CatchSwitch->handler_end();
I != E; ++I) {
BasicBlock *HandlerBB = *I;
+ ++NumPerSuccessorCases[HandlerBB];
auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI());
if (!HandlerSet.insert({CatchPad, Empty}).second) {
+ --NumPerSuccessorCases[HandlerBB];
CatchSwitch->removeHandler(I);
--I;
--E;
Changed = true;
}
}
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, BB, I.first});
+ if (DTU)
+ DTU->applyUpdates(Updates);
}
Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU);
@@ -2254,7 +2380,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
TI->replaceAllUsesWith(NewTI);
TI->eraseFromParent();
if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDest}});
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDest}});
}
/// removeUnreachableBlocks - Remove blocks that are not reachable, even
@@ -2270,28 +2396,39 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
return Changed;
assert(Reachable.size() < F.size());
- NumRemoved += F.size() - Reachable.size();
- SmallSetVector<BasicBlock *, 8> DeadBlockSet;
+ // Are there any blocks left to actually delete?
+ SmallSetVector<BasicBlock *, 8> BlocksToRemove;
for (BasicBlock &BB : F) {
// Skip reachable basic blocks
if (Reachable.count(&BB))
continue;
- DeadBlockSet.insert(&BB);
+ // Skip already-deleted blocks
+ if (DTU && DTU->isBBPendingDeletion(&BB))
+ continue;
+ BlocksToRemove.insert(&BB);
}
+ if (BlocksToRemove.empty())
+ return Changed;
+
+ Changed = true;
+ NumRemoved += BlocksToRemove.size();
+
if (MSSAU)
- MSSAU->removeBlocks(DeadBlockSet);
+ MSSAU->removeBlocks(BlocksToRemove);
- // Loop over all of the basic blocks that are not reachable, dropping all of
+ // Loop over all of the basic blocks that are up for removal, dropping all of
// their internal references. Update DTU if available.
std::vector<DominatorTree::UpdateType> Updates;
- for (auto *BB : DeadBlockSet) {
+ for (auto *BB : BlocksToRemove) {
+ SmallSetVector<BasicBlock *, 8> UniqueSuccessors;
for (BasicBlock *Successor : successors(BB)) {
- if (!DeadBlockSet.count(Successor))
+ // Only remove references to BB in reachable successors of BB.
+ if (Reachable.count(Successor))
Successor->removePredecessor(BB);
if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Successor});
+ UniqueSuccessors.insert(Successor);
}
BB->dropAllReferences();
if (DTU) {
@@ -2305,27 +2442,22 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
new UnreachableInst(BB->getContext(), BB);
assert(succ_empty(BB) && "The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
+ Updates.reserve(Updates.size() + UniqueSuccessors.size());
+ for (auto *UniqueSuccessor : UniqueSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
}
}
if (DTU) {
- DTU->applyUpdatesPermissive(Updates);
- bool Deleted = false;
- for (auto *BB : DeadBlockSet) {
- if (DTU->isBBPendingDeletion(BB))
- --NumRemoved;
- else
- Deleted = true;
+ DTU->applyUpdates(Updates);
+ for (auto *BB : BlocksToRemove)
DTU->deleteBB(BB);
- }
- if (!Deleted)
- return false;
} else {
- for (auto *BB : DeadBlockSet)
+ for (auto *BB : BlocksToRemove)
BB->eraseFromParent();
}
- return true;
+ return Changed;
}
void llvm::combineMetadata(Instruction *K, const Instruction *J,
@@ -2570,10 +2702,13 @@ bool llvm::callsGCLeafFunction(const CallBase *Call,
if (F->hasFnAttribute("gc-leaf-function"))
return true;
- if (auto IID = F->getIntrinsicID())
+ if (auto IID = F->getIntrinsicID()) {
// Most LLVM intrinsics do not take safepoints.
return IID != Intrinsic::experimental_gc_statepoint &&
- IID != Intrinsic::experimental_deoptimize;
+ IID != Intrinsic::experimental_deoptimize &&
+ IID != Intrinsic::memcpy_element_unordered_atomic &&
+ IID != Intrinsic::memmove_element_unordered_atomic;
+ }
}
// Lib calls can be materialized by some passes, and won't be
@@ -2701,7 +2836,7 @@ struct BitPart {
/// Analyze the specified subexpression and see if it is capable of providing
/// pieces of a bswap or bitreverse. The subexpression provides a potential
-/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
+/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in
/// the output of the expression came from a corresponding bit in some other
/// value. This function is recursive, and the end result is a mapping of
/// bitnumber to bitnumber. It is the caller's responsibility to validate that
@@ -2713,6 +2848,10 @@ struct BitPart {
/// BitPart is returned with Provider set to %X and Provenance[24-31] set to
/// [0-7].
///
+/// For vector types, all analysis is performed at the per-element level. No
+/// cross-element analysis is supported (shuffle/insertion/reduction), and all
+/// constant masks must be splatted across all elements.
+///
/// To avoid revisiting values, the BitPart results are memoized into the
/// provided map. To avoid unnecessary copying of BitParts, BitParts are
/// constructed in-place in the \c BPS map. Because of this \c BPS needs to
@@ -2730,7 +2869,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return I->second;
auto &Result = BPS[V] = None;
- auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ auto BitWidth = V->getType()->getScalarSizeInBits();
// Prevent stack overflow by limiting the recursion depth
if (Depth == BitPartRecursionMaxDepth) {
@@ -2738,13 +2877,16 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
}
- if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Value *X, *Y;
+ const APInt *C;
+
// If this is an or instruction, it may be an inner node of the bswap.
- if (I->getOpcode() == Instruction::Or) {
- auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
- auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
+ if (match(V, m_Or(m_Value(X), m_Value(Y)))) {
+ const auto &A =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ const auto &B =
+ collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
if (!A || !B)
return Result;
@@ -2753,31 +2895,31 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
Result = BitPart(A->Provider, BitWidth);
- for (unsigned i = 0; i < A->Provenance.size(); ++i) {
- if (A->Provenance[i] != BitPart::Unset &&
- B->Provenance[i] != BitPart::Unset &&
- A->Provenance[i] != B->Provenance[i])
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) {
+ if (A->Provenance[BitIdx] != BitPart::Unset &&
+ B->Provenance[BitIdx] != BitPart::Unset &&
+ A->Provenance[BitIdx] != B->Provenance[BitIdx])
return Result = None;
- if (A->Provenance[i] == BitPart::Unset)
- Result->Provenance[i] = B->Provenance[i];
+ if (A->Provenance[BitIdx] == BitPart::Unset)
+ Result->Provenance[BitIdx] = B->Provenance[BitIdx];
else
- Result->Provenance[i] = A->Provenance[i];
+ Result->Provenance[BitIdx] = A->Provenance[BitIdx];
}
return Result;
}
// If this is a logical shift by a constant, recurse then shift the result.
- if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
- unsigned BitShift =
- cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+ if (match(V, m_LogicalShift(m_Value(X), m_APInt(C)))) {
+ const APInt &BitShift = *C;
+
// Ensure the shift amount is defined.
- if (BitShift > BitWidth)
+ if (BitShift.uge(BitWidth))
return Result;
- auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
Result = Res;
@@ -2785,11 +2927,11 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// Perform the "shift" on BitProvenance.
auto &P = Result->Provenance;
if (I->getOpcode() == Instruction::Shl) {
- P.erase(std::prev(P.end(), BitShift), P.end());
- P.insert(P.begin(), BitShift, BitPart::Unset);
+ P.erase(std::prev(P.end(), BitShift.getZExtValue()), P.end());
+ P.insert(P.begin(), BitShift.getZExtValue(), BitPart::Unset);
} else {
- P.erase(P.begin(), std::next(P.begin(), BitShift));
- P.insert(P.end(), BitShift, BitPart::Unset);
+ P.erase(P.begin(), std::next(P.begin(), BitShift.getZExtValue()));
+ P.insert(P.end(), BitShift.getZExtValue(), BitPart::Unset);
}
return Result;
@@ -2797,44 +2939,102 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// If this is a logical 'and' with a mask that clears bits, recurse then
// unset the appropriate bits.
- if (I->getOpcode() == Instruction::And &&
- isa<ConstantInt>(I->getOperand(1))) {
- APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
- const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if (match(V, m_And(m_Value(X), m_APInt(C)))) {
+ const APInt &AndMask = *C;
// Check that the mask allows a multiple of 8 bits for a bswap, for an
// early exit.
unsigned NumMaskedBits = AndMask.countPopulation();
- if (!MatchBitReversals && NumMaskedBits % 8 != 0)
+ if (!MatchBitReversals && (NumMaskedBits % 8) != 0)
return Result;
- auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
Result = Res;
- for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1)
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
// If the AndMask is zero for this bit, clear the bit.
- if ((AndMask & Bit) == 0)
- Result->Provenance[i] = BitPart::Unset;
+ if (AndMask[BitIdx] == 0)
+ Result->Provenance[BitIdx] = BitPart::Unset;
return Result;
}
// If this is a zext instruction zero extend the result.
- if (I->getOpcode() == Instruction::ZExt) {
- auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
+ if (match(V, m_ZExt(m_Value(X)))) {
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ if (!Res)
+ return Result;
+
+ Result = BitPart(Res->Provider, BitWidth);
+ auto NarrowBitWidth = X->getType()->getScalarSizeInBits();
+ for (unsigned BitIdx = 0; BitIdx < NarrowBitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = Res->Provenance[BitIdx];
+ for (unsigned BitIdx = NarrowBitWidth; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = BitPart::Unset;
+ return Result;
+ }
+
+ // BITREVERSE - most likely due to us previous matching a partial
+ // bitreverse.
+ if (match(V, m_BitReverse(m_Value(X)))) {
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ if (!Res)
+ return Result;
+
+ Result = BitPart(Res->Provider, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[(BitWidth - 1) - BitIdx] = Res->Provenance[BitIdx];
+ return Result;
+ }
+
+ // BSWAP - most likely due to us previous matching a partial bswap.
+ if (match(V, m_BSwap(m_Value(X)))) {
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
+ unsigned ByteWidth = BitWidth / 8;
Result = BitPart(Res->Provider, BitWidth);
- auto NarrowBitWidth =
- cast<IntegerType>(cast<ZExtInst>(I)->getSrcTy())->getBitWidth();
- for (unsigned i = 0; i < NarrowBitWidth; ++i)
- Result->Provenance[i] = Res->Provenance[i];
- for (unsigned i = NarrowBitWidth; i < BitWidth; ++i)
- Result->Provenance[i] = BitPart::Unset;
+ for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) {
+ unsigned ByteBitOfs = ByteIdx * 8;
+ for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx)
+ Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] =
+ Res->Provenance[ByteBitOfs + BitIdx];
+ }
+ return Result;
+ }
+
+ // Funnel 'double' shifts take 3 operands, 2 inputs and the shift
+ // amount (modulo).
+ // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ if (match(V, m_FShl(m_Value(X), m_Value(Y), m_APInt(C))) ||
+ match(V, m_FShr(m_Value(X), m_Value(Y), m_APInt(C)))) {
+ // We can treat fshr as a fshl by flipping the modulo amount.
+ unsigned ModAmt = C->urem(BitWidth);
+ if (cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fshr)
+ ModAmt = BitWidth - ModAmt;
+
+ const auto &LHS =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ const auto &RHS =
+ collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+
+ // Check we have both sources and they are from the same provider.
+ if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider)
+ return Result;
+
+ unsigned StartBitRHS = BitWidth - ModAmt;
+ Result = BitPart(LHS->Provider, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx)
+ Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx];
+ for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx)
+ Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS];
return Result;
}
}
@@ -2842,8 +3042,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
// the input value to the bswap/bitreverse.
Result = BitPart(V, BitWidth);
- for (unsigned i = 0; i < BitWidth; ++i)
- Result->Provenance[i] = i;
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = BitIdx;
return Result;
}
@@ -2870,65 +3070,92 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
return false;
if (!MatchBSwaps && !MatchBitReversals)
return false;
- IntegerType *ITy = dyn_cast<IntegerType>(I->getType());
- if (!ITy || ITy->getBitWidth() > 128)
- return false; // Can't do vectors or integers > 128 bits.
- unsigned BW = ITy->getBitWidth();
-
- unsigned DemandedBW = BW;
- IntegerType *DemandedTy = ITy;
- if (I->hasOneUse()) {
- if (TruncInst *Trunc = dyn_cast<TruncInst>(I->user_back())) {
- DemandedTy = cast<IntegerType>(Trunc->getType());
- DemandedBW = DemandedTy->getBitWidth();
- }
- }
+ Type *ITy = I->getType();
+ if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128)
+ return false; // Can't do integer/elements > 128 bits.
+
+ Type *DemandedTy = ITy;
+ if (I->hasOneUse())
+ if (auto *Trunc = dyn_cast<TruncInst>(I->user_back()))
+ DemandedTy = Trunc->getType();
// Try to find all the pieces corresponding to the bswap.
std::map<Value *, Optional<BitPart>> BPS;
auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0);
if (!Res)
return false;
- auto &BitProvenance = Res->Provenance;
+ ArrayRef<int8_t> BitProvenance = Res->Provenance;
+ assert(all_of(BitProvenance,
+ [](int8_t I) { return I == BitPart::Unset || 0 <= I; }) &&
+ "Illegal bit provenance index");
+
+ // If the upper bits are zero, then attempt to perform as a truncated op.
+ if (BitProvenance.back() == BitPart::Unset) {
+ while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset)
+ BitProvenance = BitProvenance.drop_back();
+ if (BitProvenance.empty())
+ return false; // TODO - handle null value?
+ DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size());
+ if (auto *IVecTy = dyn_cast<VectorType>(ITy))
+ DemandedTy = VectorType::get(DemandedTy, IVecTy);
+ }
+
+ // Check BitProvenance hasn't found a source larger than the result type.
+ unsigned DemandedBW = DemandedTy->getScalarSizeInBits();
+ if (DemandedBW > ITy->getScalarSizeInBits())
+ return false;
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
// only byteswap values with an even number of bytes.
- bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true;
- for (unsigned i = 0; i < DemandedBW; ++i) {
- OKForBSwap &=
- bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW);
- OKForBitReverse &=
- bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW);
+ APInt DemandedMask = APInt::getAllOnesValue(DemandedBW);
+ bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0;
+ bool OKForBitReverse = MatchBitReversals;
+ for (unsigned BitIdx = 0;
+ (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) {
+ if (BitProvenance[BitIdx] == BitPart::Unset) {
+ DemandedMask.clearBit(BitIdx);
+ continue;
+ }
+ OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx,
+ DemandedBW);
+ OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx],
+ BitIdx, DemandedBW);
}
Intrinsic::ID Intrin;
- if (OKForBSwap && MatchBSwaps)
+ if (OKForBSwap)
Intrin = Intrinsic::bswap;
- else if (OKForBitReverse && MatchBitReversals)
+ else if (OKForBitReverse)
Intrin = Intrinsic::bitreverse;
else
return false;
- if (ITy != DemandedTy) {
- Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
- Value *Provider = Res->Provider;
- IntegerType *ProviderTy = cast<IntegerType>(Provider->getType());
- // We may need to truncate the provider.
- if (DemandedTy != ProviderTy) {
- auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy,
- "trunc", I);
- InsertedInsts.push_back(Trunc);
- Provider = Trunc;
- }
- auto *CI = CallInst::Create(F, Provider, "rev", I);
- InsertedInsts.push_back(CI);
- auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I);
+ Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
+ Value *Provider = Res->Provider;
+
+ // We may need to truncate the provider.
+ if (DemandedTy != Provider->getType()) {
+ auto *Trunc =
+ CastInst::CreateIntegerCast(Provider, DemandedTy, false, "trunc", I);
+ InsertedInsts.push_back(Trunc);
+ Provider = Trunc;
+ }
+
+ Instruction *Result = CallInst::Create(F, Provider, "rev", I);
+ InsertedInsts.push_back(Result);
+
+ if (!DemandedMask.isAllOnesValue()) {
+ auto *Mask = ConstantInt::get(DemandedTy, DemandedMask);
+ Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I);
+ InsertedInsts.push_back(Result);
+ }
+
+ // We may need to zeroextend back to the result type.
+ if (ITy != Result->getType()) {
+ auto *ExtInst = CastInst::CreateIntegerCast(Result, ITy, false, "zext", I);
InsertedInsts.push_back(ExtInst);
- return true;
}
- Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy);
- InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I));
return true;
}
@@ -3020,44 +3247,6 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
}
}
-using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>;
-AllocaInst *llvm::findAllocaForValue(Value *V,
- AllocaForValueMapTy &AllocaForValue) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
- return AI;
- // See if we've already calculated (or started to calculate) alloca for a
- // given value.
- AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
- if (I != AllocaForValue.end())
- return I->second;
- // Store 0 while we're calculating alloca for value V to avoid
- // infinite recursion if the value references itself.
- AllocaForValue[V] = nullptr;
- AllocaInst *Res = nullptr;
- if (CastInst *CI = dyn_cast<CastInst>(V))
- Res = findAllocaForValue(CI->getOperand(0), AllocaForValue);
- else if (PHINode *PN = dyn_cast<PHINode>(V)) {
- for (Value *IncValue : PN->incoming_values()) {
- // Allow self-referencing phi-nodes.
- if (IncValue == PN)
- continue;
- AllocaInst *IncValueAI = findAllocaForValue(IncValue, AllocaForValue);
- // AI for incoming values should exist and should all be equal.
- if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
- return nullptr;
- Res = IncValueAI;
- }
- } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
- Res = findAllocaForValue(EP->getPointerOperand(), AllocaForValue);
- } else {
- LLVM_DEBUG(dbgs() << "Alloca search cancelled on unknown instruction: "
- << *V << "\n");
- }
- if (Res)
- AllocaForValue[V] = Res;
- return Res;
-}
-
Value *llvm::invertCondition(Value *Condition) {
// First: Check if it's a constant
if (Constant *C = dyn_cast<Constant>(Condition))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
index c653aacbee6c..befacb591762 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -1,4 +1,4 @@
-//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
+//===- LoopPeel.cpp -------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,12 +6,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements some loop unrolling utilities for peeling loops
-// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
-// unrolling loops with compile-time constant trip counts.
-//
+// Loop Peeling Utilities.
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
@@ -49,10 +47,24 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "loop-unroll"
+#define DEBUG_TYPE "loop-peel"
STATISTIC(NumPeeled, "Number of loops peeled");
+static cl::opt<unsigned> UnrollPeelCount(
+ "unroll-peel-count", cl::Hidden,
+ cl::desc("Set the unroll peeling count, for testing purposes"));
+
+static cl::opt<bool>
+ UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
+ cl::desc("Allows loops to be peeled when the dynamic "
+ "trip count is known to be low."));
+
+static cl::opt<bool>
+ UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
+ cl::init(false), cl::Hidden,
+ cl::desc("Allows loop nests to be peeled."));
+
static cl::opt<unsigned> UnrollPeelMaxCount(
"unroll-peel-max-count", cl::init(7), cl::Hidden,
cl::desc("Max average trip count which will cause loop peeling."));
@@ -103,7 +115,12 @@ bool llvm::canPeel(Loop *L) {
// This can be an indication of two different things:
// 1) The loop is not rotated.
// 2) The loop contains irreducible control flow that involves the latch.
- if (L->getLoopLatch() != L->getExitingBlock())
+ const BasicBlock *Latch = L->getLoopLatch();
+ if (Latch != L->getExitingBlock())
+ return false;
+
+ // Peeling is only supported if the latch is a branch.
+ if (!isa<BranchInst>(Latch->getTerminator()))
return false;
return true;
@@ -215,11 +232,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// consider AddRecs of the loop we are trying to peel.
if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
continue;
- bool Increasing;
if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
- !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
+ !SE.getMonotonicPredicateType(LeftAR, Pred))
continue;
- (void)Increasing;
// Check if extending the current DesiredPeelCount lets us evaluate Pred
// or !Pred in the loop body statically.
@@ -278,9 +293,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
- TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, ScalarEvolution &SE) {
+ unsigned &TripCount, ScalarEvolution &SE,
+ unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -292,7 +307,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// Only try to peel innermost loops by default.
// The constraint can be relaxed by the target in TTI.getUnrollingPreferences
// or by the flag -unroll-allow-loop-nests-peeling.
- if (!PP.AllowLoopNestsPeeling && !L->empty())
+ if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
return;
// If the user provided a peel count, use that.
@@ -322,7 +337,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// maximum number of iterations among these values, thus turning all those
// Phis into invariants.
// First, check that we can peel at least one iteration.
- if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
+ if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) {
// Store the pre-calculated values here.
SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
// Now go through all Phis to calculate their the number of iterations they
@@ -342,7 +357,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// Pay respect to limitations implied by loop size and the max peel count.
unsigned MaxPeelCount = UnrollPeelMaxCount;
- MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
+ MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
DesiredPeelCount = std::max(DesiredPeelCount,
countToEliminateCompares(*L, MaxPeelCount, SE));
@@ -385,7 +400,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (*PeelCount) {
if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
- (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
+ (LoopSize * (*PeelCount + 1) <= Threshold)) {
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
<< " iterations.\n");
PP.PeelCount = *PeelCount;
@@ -396,7 +411,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
<< "\n");
- LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
}
}
}
@@ -491,10 +506,10 @@ static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
/// instructions in the last peeled-off iteration.
static void cloneLoopBlocks(
Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
- SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
+ SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
- LoopInfo *LI) {
+ LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
BasicBlock *PreHeader = L->getLoopPreheader();
@@ -530,6 +545,15 @@ static void cloneLoopBlocks(
}
}
+ {
+ // Identify what other metadata depends on the cloned version. After
+ // cloning, replace the metadata with the corrected version for both
+ // memory instructions and noalias intrinsics.
+ std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
+ cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
+ Header->getContext(), Ext);
+ }
+
// Recursively create the new Loop objects for nested loops, if any,
// to preserve LoopInfo.
for (Loop *ChildLoop : *L) {
@@ -599,6 +623,40 @@ static void cloneLoopBlocks(
LVMap[KV.first] = KV.second;
}
+TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences(
+ Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ Optional<bool> UserAllowPeeling,
+ Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) {
+ TargetTransformInfo::PeelingPreferences PP;
+
+ // Set the default values.
+ PP.PeelCount = 0;
+ PP.AllowPeeling = true;
+ PP.AllowLoopNestsPeeling = false;
+ PP.PeelProfiledIterations = true;
+
+ // Get the target specifc values.
+ TTI.getPeelingPreferences(L, SE, PP);
+
+ // User specified values using cl::opt.
+ if (UnrollingSpecficValues) {
+ if (UnrollPeelCount.getNumOccurrences() > 0)
+ PP.PeelCount = UnrollPeelCount;
+ if (UnrollAllowPeeling.getNumOccurrences() > 0)
+ PP.AllowPeeling = UnrollAllowPeeling;
+ if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
+ PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
+ }
+
+ // User specifed values provided by argument.
+ if (UserAllowPeeling.hasValue())
+ PP.AllowPeeling = *UserAllowPeeling;
+ if (UserAllowProfileBasedPeeling.hasValue())
+ PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
+
+ return PP;
+}
+
/// Peel off the first \p PeelCount iterations of loop \p L.
///
/// Note that this does not peel them off as a single straight-line block.
@@ -609,8 +667,8 @@ static void cloneLoopBlocks(
/// for the bulk of dynamic execution, can be further simplified by scalar
/// optimizations.
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, bool PreserveLCSSA) {
+ ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ bool PreserveLCSSA) {
assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
@@ -720,13 +778,19 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
uint64_t ExitWeight = 0, FallThroughWeight = 0;
initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
+ // Identify what noalias metadata is inside the loop: if it is inside the
+ // loop, the associated metadata must be cloned for each iteration.
+ SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
+ identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+
// For each peeled-off iteration, make a copy of the loop.
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;
ValueToValueMapTy VMap;
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
- LoopBlocks, VMap, LVMap, DT, LI);
+ LoopBlocks, VMap, LVMap, DT, LI,
+ LoopLocalNoAliasDeclScopes);
// Remap to use values from the current iteration instead of the
// previous one.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 8804bba975b6..b678efdc8d88 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -12,7 +12,6 @@
#include "llvm/Transforms/Utils/LoopRotationUtils.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -36,6 +35,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -44,6 +44,8 @@ using namespace llvm;
#define DEBUG_TYPE "loop-rotate"
+STATISTIC(NumNotRotatedDueToHeaderSize,
+ "Number of loops not rotated due to the header size");
STATISTIC(NumRotated, "Number of loops rotated");
static cl::opt<bool>
@@ -64,15 +66,17 @@ class LoopRotate {
const SimplifyQuery &SQ;
bool RotationOnly;
bool IsUtilMode;
+ bool PrepareForLTO;
public:
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
+ const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
+ bool PrepareForLTO)
: MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
- IsUtilMode(IsUtilMode) {}
+ IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
bool processLoop(Loop *L);
private:
@@ -300,7 +304,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
CodeMetrics::collectEphemeralValues(L, AC, EphValues);
CodeMetrics Metrics;
- Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO);
if (Metrics.notDuplicatable) {
LLVM_DEBUG(
dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
@@ -320,8 +324,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
<< " instructions, which is more than the threshold ("
<< MaxHeaderSize << " instructions): ";
L->dump());
+ ++NumNotRotatedDueToHeaderSize;
return Rotated;
}
+
+ // When preparing for LTO, avoid rotating loops with calls that could be
+ // inlined during the LTO stage.
+ if (PrepareForLTO && Metrics.NumInlineCandidates > 0)
+ return Rotated;
}
// Now, this loop is suitable for rotation.
@@ -391,6 +401,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
break;
}
+ // Remember the local noalias scope declarations in the header. After the
+ // rotation, they must be duplicated and the scope must be cloned. This
+ // avoids unwanted interaction across iterations.
+ SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions;
+ for (Instruction &I : *OrigHeader)
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclInstructions.push_back(Decl);
+
while (I != E) {
Instruction *Inst = &*I++;
@@ -451,6 +469,69 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
}
}
+ if (!NoAliasDeclInstructions.empty()) {
+ // There are noalias scope declarations:
+ // (general):
+ // Original: OrigPre { OrigHeader NewHeader ... Latch }
+ // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader }
+ //
+ // with D: llvm.experimental.noalias.scope.decl,
+ // U: !noalias or !alias.scope depending on D
+ // ... { D U1 U2 } can transform into:
+ // (0) : ... { D U1 U2 } // no relevant rotation for this part
+ // (1) : ... D' { U1 U2 D } // D is part of OrigHeader
+ // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader
+ //
+ // We now want to transform:
+ // (1) -> : ... D' { D U1 U2 D'' }
+ // (2) -> : ... D' U1' { D U2 D'' U1'' }
+ // D: original llvm.experimental.noalias.scope.decl
+ // D', U1': duplicate with replaced scopes
+ // D'', U1'': different duplicate with replaced scopes
+ // This ensures a safe fallback to 'may_alias' introduced by the rotate,
+ // as U1'' and U1' scopes will not be compatible wrt to the local restrict
+
+ // Clone the llvm.experimental.noalias.decl again for the NewHeader.
+ Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI());
+ for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) {
+ LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:"
+ << *NAD << "\n");
+ Instruction *NewNAD = NAD->clone();
+ NewNAD->insertBefore(NewHeaderInsertionPoint);
+ }
+
+ // Scopes must now be duplicated, once for OrigHeader and once for
+ // OrigPreHeader'.
+ {
+ auto &Context = NewHeader->getContext();
+
+ SmallVector<MDNode *, 8> NoAliasDeclScopes;
+ for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions)
+ NoAliasDeclScopes.push_back(NAD->getScopeList());
+
+ LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n");
+ cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context,
+ "h.rot");
+ LLVM_DEBUG(OrigHeader->dump());
+
+ // Keep the compile time impact low by only adapting the inserted block
+ // of instructions in the OrigPreHeader. This might result in slightly
+ // more aliasing between these instructions and those that were already
+ // present, but it will be much faster when the original PreHeader is
+ // large.
+ LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n");
+ auto *FirstDecl =
+ cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]);
+ auto *LastInst = &OrigPreheader->back();
+ cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst,
+ Context, "pre.rot");
+ LLVM_DEBUG(OrigPreheader->dump());
+
+ LLVM_DEBUG(dbgs() << " Updated NewHeader:\n");
+ LLVM_DEBUG(NewHeader->dump());
+ }
+ }
+
// Along with all the other instructions, we just cloned OrigHeader's
// terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
// successors by duplicating their incoming values for OrigHeader.
@@ -496,12 +577,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
- DT->applyUpdates(Updates);
if (MSSAU) {
- MSSAU->applyUpdates(Updates, *DT);
+ MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
+ } else {
+ DT->applyUpdates(Updates);
}
}
@@ -575,7 +657,10 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// connected by an unconditional branch. This is just a cleanup so the
// emitted code isn't too gross in this common case.
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
+ BasicBlock *PredBB = OrigHeader->getUniquePredecessor();
+ bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
+ if (DidMerge)
+ RemoveRedundantDbgInstrs(PredBB);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -739,13 +824,8 @@ bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
const SimplifyQuery &SQ, bool RotationOnly = true,
unsigned Threshold = unsigned(-1),
- bool IsUtilMode = true) {
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ bool IsUtilMode = true, bool PrepareForLTO) {
LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
- IsUtilMode);
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
+ IsUtilMode, PrepareForLTO);
return LR.processLoop(L);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index a8445e94e55a..2e104334ad96 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -163,7 +163,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
/// if it's not already in there. Stop predecessor traversal when we reach
/// StopBlock.
static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
- std::set<BasicBlock*> &Blocks) {
+ SmallPtrSetImpl<BasicBlock *> &Blocks) {
SmallVector<BasicBlock *, 8> Worklist;
Worklist.push_back(InputBB);
do {
@@ -171,10 +171,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
if (Blocks.insert(BB).second && BB != StopBlock)
// If BB is not already processed and it is not a stop block then
// insert its predecessor in the work list
- for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
- BasicBlock *WBB = *I;
- Worklist.push_back(WBB);
- }
+ append_range(Worklist, predecessors(BB));
} while (!Worklist.empty());
}
@@ -308,9 +305,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
// Determine which blocks should stay in L and which should be moved out to
// the Outer loop now.
- std::set<BasicBlock*> BlocksInL;
- for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
- BasicBlock *P = *PI;
+ SmallPtrSet<BasicBlock *, 4> BlocksInL;
+ for (BasicBlock *P : predecessors(Header)) {
if (DT->dominates(Header, P))
addBlockAndPredsToSet(P, Header, BlocksInL);
}
@@ -683,7 +679,7 @@ ReprocessLoop:
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
- if (!FoldBranchToCommonDest(BI, MSSAU))
+ if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
continue;
// Success. The block is now dead, so remove it from the loop,
@@ -691,7 +687,7 @@ ReprocessLoop:
LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
<< ExitingBlock->getName() << "\n");
- assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ assert(pred_empty(ExitingBlock));
Changed = true;
LI->removeBlock(ExitingBlock);
@@ -836,8 +832,8 @@ bool LoopSimplify::runOnFunction(Function &F) {
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
// Simplify each loop nest in the function.
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
+ for (auto *L : *LI)
+ Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
#ifndef NDEBUG
if (PreserveLCSSA) {
@@ -866,9 +862,9 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
// Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
// after simplifying the loops. MemorySSA is preserved if it exists.
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ for (auto *L : *LI)
Changed |=
- simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false);
+ simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 3875c631f839..d4cd57405239 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -59,6 +59,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -108,14 +109,15 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
/// insert a phi-node, otherwise LCSSA will be broken.
/// The function is just a helper function for llvm::UnrollLoop that returns
/// true if this situation occurs, indicating that LCSSA needs to be fixed.
-static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
+static bool needToInsertPhisForLCSSA(Loop *L,
+ const std::vector<BasicBlock *> &Blocks,
LoopInfo *LI) {
for (BasicBlock *BB : Blocks) {
if (LI->getLoopFor(BB) == L)
continue;
for (Instruction &I : *BB) {
for (Use &U : I.operands()) {
- if (auto Def = dyn_cast<Instruction>(U)) {
+ if (const auto *Def = dyn_cast<Instruction>(U)) {
Loop *DefLoop = LI->getLoopFor(Def->getParent());
if (!DefLoop)
continue;
@@ -286,14 +288,12 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
OptimizationRemarkEmitter *ORE,
bool PreserveLCSSA, Loop **RemainderLoop) {
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) {
+ if (!L->getLoopPreheader()) {
LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
return LoopUnrollResult::Unmodified;
}
- BasicBlock *LatchBlock = L->getLoopLatch();
- if (!LatchBlock) {
+ if (!L->getLoopLatch()) {
LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
return LoopUnrollResult::Unmodified;
}
@@ -304,37 +304,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}
- // The current loop unroll pass can unroll loops that have
- // (1) single latch; and
- // (2a) latch is unconditional; or
- // (2b) latch is conditional and is an exiting block
- // FIXME: The implementation can be extended to work with more complicated
- // cases, e.g. loops with multiple latches.
- BasicBlock *Header = L->getHeader();
- BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
-
- // A conditional branch which exits the loop, which can be optimized to an
- // unconditional branch in the unrolled loop in some cases.
- BranchInst *ExitingBI = nullptr;
- bool LatchIsExiting = L->isLoopExiting(LatchBlock);
- if (LatchIsExiting)
- ExitingBI = LatchBI;
- else if (BasicBlock *ExitingBlock = L->getExitingBlock())
- ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
- LLVM_DEBUG(
- dbgs() << "Can't unroll; a conditional latch must exit the loop");
- return LoopUnrollResult::Unmodified;
- }
- LLVM_DEBUG({
- if (ExitingBI)
- dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
- << "\n";
- else
- dbgs() << " No single exiting block\n";
- });
-
- if (Header->hasAddressTaken()) {
+ if (L->getHeader()->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
LLVM_DEBUG(
dbgs() << " Won't unroll loop: address of header block is taken.\n");
@@ -363,20 +333,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = ULO.Count == ULO.TripCount;
- SmallVector<BasicBlock *, 4> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
-
- // Go through all exits of L and see if there are any phi-nodes there. We just
- // conservatively assume that they're inserted to preserve LCSSA form, which
- // means that complete unrolling might break this form. We need to either fix
- // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
- // now we just recompute LCSSA for the outer loop, but it should be possible
- // to fix it in-place.
- bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
- any_of(ExitBlocks, [](const BasicBlock *BB) {
- return isa<PHINode>(BB->begin());
- });
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
@@ -401,12 +357,63 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
BasicBlock *ExitingBlock = L->getLoopLatch();
assert(ExitingBlock && "Loop without exiting block?");
assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
- Preheader = L->getLoopPreheader();
ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
}
+ // All these values should be taken only after peeling because they might have
+ // changed.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
+
+ // Go through all exits of L and see if there are any phi-nodes there. We just
+ // conservatively assume that they're inserted to preserve LCSSA form, which
+ // means that complete unrolling might break this form. We need to either fix
+ // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
+ // now we just recompute LCSSA for the outer loop, but it should be possible
+ // to fix it in-place.
+ bool NeedToFixLCSSA =
+ PreserveLCSSA && CompletelyUnroll &&
+ any_of(ExitBlocks,
+ [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
+
+ // The current loop unroll pass can unroll loops that have
+ // (1) single latch; and
+ // (2a) latch is unconditional; or
+ // (2b) latch is conditional and is an exiting block
+ // FIXME: The implementation can be extended to work with more complicated
+ // cases, e.g. loops with multiple latches.
+ BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ // A conditional branch which exits the loop, which can be optimized to an
+ // unconditional branch in the unrolled loop in some cases.
+ BranchInst *ExitingBI = nullptr;
+ bool LatchIsExiting = L->isLoopExiting(LatchBlock);
+ if (LatchIsExiting)
+ ExitingBI = LatchBI;
+ else if (BasicBlock *ExitingBlock = L->getExitingBlock())
+ ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
+ // If the peeling guard is changed this assert may be relaxed or even
+ // deleted.
+ assert(!Peeled && "Peeling guard changed!");
+ LLVM_DEBUG(
+ dbgs() << "Can't unroll; a conditional latch must exit the loop");
+ return LoopUnrollResult::Unmodified;
+ }
+ LLVM_DEBUG({
+ if (ExitingBI)
+ dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
+ << "\n";
+ else
+ dbgs() << " No single exiting block\n";
+ });
+
// Loops containing convergent instructions must have a count that divides
// their TripMultiple.
LLVM_DEBUG(
@@ -583,6 +590,11 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
<< DIL->getFilename() << " Line: " << DIL->getLine());
}
+ // Identify what noalias metadata is inside the loop: if it is inside the
+ // loop, the associated metadata must be cloned for each iteration.
+ SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
+ identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+
for (unsigned It = 1; It != ULO.Count; ++It) {
SmallVector<BasicBlock *, 8> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -676,6 +688,15 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
AC->registerAssumption(II);
}
}
+
+ {
+ // Identify what other metadata depends on the cloned version. After
+ // cloning, replace the metadata with the corrected version for both
+ // memory instructions and noalias intrinsics.
+ std::string ext = (Twine("It") + Twine(It)).str();
+ cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
+ Header->getContext(), ext);
+ }
}
// Loop over the PHI nodes in the original block, setting incoming values.
@@ -863,9 +884,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
// Dest has been folded into Fold. Update our worklists accordingly.
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
- UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
- UnrolledLoopBlocks.end(), Dest),
- UnrolledLoopBlocks.end());
+ llvm::erase_value(UnrolledLoopBlocks, Dest);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index dd628f3e7e0c..6e32a2b865aa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -148,8 +148,7 @@ static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
}
while (!Worklist.empty()) {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
+ Instruction *I = Worklist.pop_back_val();
if (!Visit(I))
return false;
@@ -459,14 +458,6 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
// finish up connecting the blocks and phi nodes. At this point LastValueMap
// is the last unrolled iterations values.
- // Update Phis in BB from OldBB to point to NewBB
- auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
- BasicBlock *NewBB) {
- for (PHINode &Phi : BB->phis()) {
- int I = Phi.getBasicBlockIndex(OldBB);
- Phi.setIncomingBlock(I, NewBB);
- }
- };
// Update Phis in BB from OldBB to point to NewBB and use the latest value
// from LastValueMap
auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
@@ -525,10 +516,10 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
- updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
- ForeBlocksLast.back());
- updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
- SubLoopBlocksLast.back());
+ SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0],
+ ForeBlocksLast.back());
+ SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
+ SubLoopBlocksLast.back());
for (unsigned It = 1; It != Count; It++) {
// Replace the conditional branch of the previous iteration subloop with an
@@ -538,10 +529,10 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
SubTerm->eraseFromParent();
- updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
- ForeBlocksLast.back());
- updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
- SubLoopBlocksLast.back());
+ SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It],
+ ForeBlocksLast.back());
+ SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
+ SubLoopBlocksLast.back());
movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
}
@@ -555,8 +546,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit &&
"Expecting the ContinueOnTrue successor of AftTerm to be LoopExit");
}
- updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
- SubLoopBlocksLast.back());
+ AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
+ SubLoopBlocksLast.back());
for (unsigned It = 1; It != Count; It++) {
// Replace the conditional branch of the previous iteration subloop with an
@@ -566,8 +557,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
BranchInst::Create(AftBlocksFirst[It], AftTerm);
AftTerm->eraseFromParent();
- updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
- SubLoopBlocksLast.back());
+ AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
+ SubLoopBlocksLast.back());
movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 2515b1676cb9..0abf62be156f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -22,11 +22,11 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
@@ -505,6 +505,32 @@ static bool canProfitablyUnrollMultiExitLoop(
// know of kinds of multiexit loops that would benefit from unrolling.
}
+// Assign the maximum possible trip count as the back edge weight for the
+// remainder loop if the original loop comes with a branch weight.
+static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
+ Loop *RemainderLoop,
+ uint64_t UnrollFactor) {
+ uint64_t TrueWeight, FalseWeight;
+ BranchInst *LatchBR =
+ cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
+ if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
+ ? FalseWeight
+ : TrueWeight;
+ assert(UnrollFactor > 1);
+ uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
+ BasicBlock *Header = RemainderLoop->getHeader();
+ BasicBlock *Latch = RemainderLoop->getLoopLatch();
+ auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
+ unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
+ MDBuilder MDB(RemainderLatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+ : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+ }
+}
+
/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
///
@@ -788,6 +814,11 @@ bool llvm::UnrollRuntimeLoopRemainder(
InsertTop, InsertBot,
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
+ // Assign the maximum possible trip count as the back edge weight for the
+ // remainder loop if the original loop comes with a branch weight.
+ if (remainderLoop && !UnrollRemainder)
+ updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
+
// Insert the cloned blocks into the function.
F->getBasicBlockList().splice(InsertBot->getIterator(),
F->getBasicBlockList(),
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 43363736684e..f0f423e9812a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -63,6 +63,7 @@ static cl::opt<bool> ForceReductionIntrinsic(
static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
+static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
@@ -297,10 +298,24 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
llvm_unreachable("unexpected number of options");
}
-static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
+bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
}
+Optional<ElementCount>
+llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
+ Optional<int> Width =
+ getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
+
+ if (Width.hasValue()) {
+ Optional<int> IsScalable = getOptionalIntLoopAttribute(
+ TheLoop, "llvm.loop.vectorize.scalable.enable");
+ return ElementCount::get(*Width, IsScalable.getValueOr(false));
+ }
+
+ return None;
+}
+
llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
StringRef Name) {
const MDOperand *AttrMD =
@@ -334,7 +349,7 @@ Optional<MDNode *> llvm::makeFollowupLoopID(
bool Changed = false;
if (InheritAllAttrs || InheritSomeAttrs) {
- for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) {
+ for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
MDNode *Op = cast<MDNode>(Existing.get());
auto InheritThisAttribute = [InheritSomeAttrs,
@@ -371,7 +386,7 @@ Optional<MDNode *> llvm::makeFollowupLoopID(
continue;
HasAnyFollowup = true;
- for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) {
+ for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
MDs.push_back(Option.get());
Changed = true;
}
@@ -404,6 +419,10 @@ bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);
}
+bool llvm::hasMustProgress(const Loop *L) {
+ return getBooleanLoopAttribute(L, LLVMLoopMustProgress);
+}
+
TransformationMode llvm::hasUnrollTransformation(Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
return TM_SuppressedByUser;
@@ -450,14 +469,15 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
if (Enable == false)
return TM_SuppressedByUser;
- Optional<int> VectorizeWidth =
- getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+ Optional<ElementCount> VectorizeWidth =
+ getOptionalElementCountLoopAttribute(L);
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
// 'Forcing' vector width and interleave count to one effectively disables
// this tranformation.
- if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+ if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
+ InterleaveCount == 1)
return TM_SuppressedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
@@ -466,10 +486,10 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
if (Enable == true)
return TM_ForcedByUser;
- if (VectorizeWidth == 1 && InterleaveCount == 1)
+ if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
return TM_Disable;
- if (VectorizeWidth > 1 || InterleaveCount > 1)
+ if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
return TM_Enable;
if (hasDisableAllTransformsHint(L))
@@ -542,10 +562,6 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
if (SE)
SE->forgetLoop(L);
- auto *ExitBlock = L->getUniqueExitBlock();
- assert(ExitBlock && "Should have a unique exit block!");
- assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
-
auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
assert(OldBr && "Preheader must end with a branch");
assert(OldBr->isUnconditional() && "Preheader must have a single successor");
@@ -575,48 +591,63 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// deleting the backedge of the outer loop). If the outer loop is indeed a
// non-loop, it will be deleted in a future iteration of loop deletion pass.
IRBuilder<> Builder(OldBr);
- Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
- // Remove the old branch. The conditional branch becomes a new terminator.
- OldBr->eraseFromParent();
-
- // Rewrite phis in the exit block to get their inputs from the Preheader
- // instead of the exiting block.
- for (PHINode &P : ExitBlock->phis()) {
- // Set the zero'th element of Phi to be from the preheader and remove all
- // other incoming values. Given the loop has dedicated exits, all other
- // incoming values must be from the exiting blocks.
- int PredIndex = 0;
- P.setIncomingBlock(PredIndex, Preheader);
- // Removes all incoming values from all other exiting blocks (including
- // duplicate values from an exiting block).
- // Nuke all entries except the zero'th entry which is the preheader entry.
- // NOTE! We need to remove Incoming Values in the reverse order as done
- // below, to keep the indices valid for deletion (removeIncomingValues
- // updates getNumIncomingValues and shifts all values down into the operand
- // being deleted).
- for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
- P.removeIncomingValue(e - i, false);
-
- assert((P.getNumIncomingValues() == 1 &&
- P.getIncomingBlock(PredIndex) == Preheader) &&
- "Should have exactly one value and that's from the preheader!");
- }
+ auto *ExitBlock = L->getUniqueExitBlock();
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- if (DT) {
- DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
- if (MSSA) {
- MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, *DT);
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
+ if (ExitBlock) {
+ assert(ExitBlock && "Should have a unique exit block!");
+ assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
+
+ Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
+ // Remove the old branch. The conditional branch becomes a new terminator.
+ OldBr->eraseFromParent();
+
+ // Rewrite phis in the exit block to get their inputs from the Preheader
+ // instead of the exiting block.
+ for (PHINode &P : ExitBlock->phis()) {
+ // Set the zero'th element of Phi to be from the preheader and remove all
+ // other incoming values. Given the loop has dedicated exits, all other
+ // incoming values must be from the exiting blocks.
+ int PredIndex = 0;
+ P.setIncomingBlock(PredIndex, Preheader);
+ // Removes all incoming values from all other exiting blocks (including
+ // duplicate values from an exiting block).
+ // Nuke all entries except the zero'th entry which is the preheader entry.
+ // NOTE! We need to remove Incoming Values in the reverse order as done
+ // below, to keep the indices valid for deletion (removeIncomingValues
+ // updates getNumIncomingValues and shifts all values down into the
+ // operand being deleted).
+ for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
+ P.removeIncomingValue(e - i, false);
+
+ assert((P.getNumIncomingValues() == 1 &&
+ P.getIncomingBlock(PredIndex) == Preheader) &&
+ "Should have exactly one value and that's from the preheader!");
+ }
+
+ if (DT) {
+ DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
+ if (MSSA) {
+ MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
+ *DT);
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
}
- }
- // Disconnect the loop body by branching directly to its exit.
- Builder.SetInsertPoint(Preheader->getTerminator());
- Builder.CreateBr(ExitBlock);
- // Remove the old branch.
- Preheader->getTerminator()->eraseFromParent();
+ // Disconnect the loop body by branching directly to its exit.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ Builder.CreateBr(ExitBlock);
+ // Remove the old branch.
+ Preheader->getTerminator()->eraseFromParent();
+ } else {
+ assert(L->hasNoExitBlocks() &&
+ "Loop should have either zero or one exit blocks.");
+
+ Builder.SetInsertPoint(OldBr);
+ Builder.CreateUnreachable();
+ Preheader->getTerminator()->eraseFromParent();
+ }
if (DT) {
DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
@@ -635,54 +666,58 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet;
llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
- // Given LCSSA form is satisfied, we should not have users of instructions
- // within the dead loop outside of the loop. However, LCSSA doesn't take
- // unreachable uses into account. We handle them here.
- // We could do it after drop all references (in this case all users in the
- // loop will be already eliminated and we have less work to do but according
- // to API doc of User::dropAllReferences only valid operation after dropping
- // references, is deletion. So let's substitute all usages of
- // instruction from the loop with undef value of corresponding type first.
- for (auto *Block : L->blocks())
- for (Instruction &I : *Block) {
- auto *Undef = UndefValue::get(I.getType());
- for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) {
- Use &U = *UI;
- ++UI;
- if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
- if (L->contains(Usr->getParent()))
- continue;
- // If we have a DT then we can check that uses outside a loop only in
- // unreachable block.
- if (DT)
- assert(!DT->isReachableFromEntry(U) &&
- "Unexpected user in reachable block");
- U.set(Undef);
+ if (ExitBlock) {
+ // Given LCSSA form is satisfied, we should not have users of instructions
+ // within the dead loop outside of the loop. However, LCSSA doesn't take
+ // unreachable uses into account. We handle them here.
+ // We could do it after drop all references (in this case all users in the
+ // loop will be already eliminated and we have less work to do but according
+ // to API doc of User::dropAllReferences only valid operation after dropping
+ // references, is deletion. So let's substitute all usages of
+ // instruction from the loop with undef value of corresponding type first.
+ for (auto *Block : L->blocks())
+ for (Instruction &I : *Block) {
+ auto *Undef = UndefValue::get(I.getType());
+ for (Value::use_iterator UI = I.use_begin(), E = I.use_end();
+ UI != E;) {
+ Use &U = *UI;
+ ++UI;
+ if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
+ if (L->contains(Usr->getParent()))
+ continue;
+ // If we have a DT then we can check that uses outside a loop only in
+ // unreachable block.
+ if (DT)
+ assert(!DT->isReachableFromEntry(U) &&
+ "Unexpected user in reachable block");
+ U.set(Undef);
+ }
+ auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+ if (!DVI)
+ continue;
+ auto Key =
+ DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()});
+ if (Key != DeadDebugSet.end())
+ continue;
+ DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()});
+ DeadDebugInst.push_back(DVI);
}
- auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
- if (!DVI)
- continue;
- auto Key = DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()});
- if (Key != DeadDebugSet.end())
- continue;
- DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()});
- DeadDebugInst.push_back(DVI);
- }
- // After the loop has been deleted all the values defined and modified
- // inside the loop are going to be unavailable.
- // Since debug values in the loop have been deleted, inserting an undef
- // dbg.value truncates the range of any dbg.value before the loop where the
- // loop used to be. This is particularly important for constant values.
- DIBuilder DIB(*ExitBlock->getModule());
- Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
- assert(InsertDbgValueBefore &&
- "There should be a non-PHI instruction in exit block, else these "
- "instructions will have no parent.");
- for (auto *DVI : DeadDebugInst)
- DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()),
- DVI->getVariable(), DVI->getExpression(),
- DVI->getDebugLoc(), InsertDbgValueBefore);
+ // After the loop has been deleted all the values defined and modified
+ // inside the loop are going to be unavailable.
+ // Since debug values in the loop have been deleted, inserting an undef
+ // dbg.value truncates the range of any dbg.value before the loop where the
+ // loop used to be. This is particularly important for constant values.
+ DIBuilder DIB(*ExitBlock->getModule());
+ Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
+ assert(InsertDbgValueBefore &&
+ "There should be a non-PHI instruction in exit block, else these "
+ "instructions will have no parent.");
+ for (auto *DVI : DeadDebugInst)
+ DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()),
+ DVI->getVariable(), DVI->getExpression(),
+ DVI->getDebugLoc(), InsertDbgValueBefore);
+ }
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
@@ -726,6 +761,51 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
}
}
+static Loop *getOutermostLoop(Loop *L) {
+ while (Loop *Parent = L->getParentLoop())
+ L = Parent;
+ return L;
+}
+
+void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, MemorySSA *MSSA) {
+ auto *Latch = L->getLoopLatch();
+ assert(Latch && "multiple latches not yet supported");
+ auto *Header = L->getHeader();
+ Loop *OutermostLoop = getOutermostLoop(L);
+
+ SE.forgetLoop(L);
+
+ // Note: By splitting the backedge, and then explicitly making it unreachable
+ // we gracefully handle corner cases such as non-bottom tested loops and the
+ // like. We also have the benefit of being able to reuse existing well tested
+ // code. It might be worth special casing the common bottom tested case at
+ // some point to avoid code churn.
+
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+
+ auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false,
+ /*PreserveLCSSA*/true, &DTU, MSSAU.get());
+
+ // Erase (and destroy) this loop instance. Handles relinking sub-loops
+ // and blocks within the loop as needed.
+ LI.erase(L);
+
+ // If the loop we broke had a parent, then changeToUnreachable might have
+ // caused a block to be removed from the parent loop (see loop_nest_lcssa
+ // test case in zero-btc.ll for an example), thus changing the parent's
+ // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
+ // loop which might have a had a block removed.
+ if (OutermostLoop != L)
+ formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
+}
+
+
/// Checks if \p L has single exit through latch block except possibly
/// "deoptimizing" exits. Returns branch instruction terminating the loop
/// latch if above check is successful, nullptr otherwise.
@@ -838,30 +918,29 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
return true;
}
-Value *llvm::createMinMaxOp(IRBuilderBase &Builder,
- RecurrenceDescriptor::MinMaxRecurrenceKind RK,
- Value *Left, Value *Right) {
- CmpInst::Predicate P = CmpInst::ICMP_NE;
+Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
+ Value *Right) {
+ CmpInst::Predicate Pred;
switch (RK) {
default:
llvm_unreachable("Unknown min/max recurrence kind");
- case RecurrenceDescriptor::MRK_UIntMin:
- P = CmpInst::ICMP_ULT;
+ case RecurKind::UMin:
+ Pred = CmpInst::ICMP_ULT;
break;
- case RecurrenceDescriptor::MRK_UIntMax:
- P = CmpInst::ICMP_UGT;
+ case RecurKind::UMax:
+ Pred = CmpInst::ICMP_UGT;
break;
- case RecurrenceDescriptor::MRK_SIntMin:
- P = CmpInst::ICMP_SLT;
+ case RecurKind::SMin:
+ Pred = CmpInst::ICMP_SLT;
break;
- case RecurrenceDescriptor::MRK_SIntMax:
- P = CmpInst::ICMP_SGT;
+ case RecurKind::SMax:
+ Pred = CmpInst::ICMP_SGT;
break;
- case RecurrenceDescriptor::MRK_FloatMin:
- P = CmpInst::FCMP_OLT;
+ case RecurKind::FMin:
+ Pred = CmpInst::FCMP_OLT;
break;
- case RecurrenceDescriptor::MRK_FloatMax:
- P = CmpInst::FCMP_OGT;
+ case RecurKind::FMax:
+ Pred = CmpInst::FCMP_OGT;
break;
}
@@ -871,17 +950,15 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder,
FastMathFlags FMF;
FMF.setFast();
Builder.setFastMathFlags(FMF);
- Value *Cmp = Builder.CreateCmp(P, Left, Right, "rdx.minmax.cmp");
+ Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
}
// Helper to generate an ordered reduction.
-Value *
-llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
- unsigned Op,
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
- ArrayRef<Value *> RedOps) {
+Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
+ unsigned Op, RecurKind RdxKind,
+ ArrayRef<Value *> RedOps) {
unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
// Extract and apply reduction ops in ascending order:
@@ -895,9 +972,9 @@ llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
"bin.rdx");
} else {
- assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
+ assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
"Invalid min/max");
- Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext);
+ Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
}
if (!RedOps.empty())
@@ -908,10 +985,9 @@ llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
}
// Helper to generate a log2 shuffle reduction.
-Value *
-llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
- ArrayRef<Value *> RedOps) {
+Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
+ unsigned Op, RecurKind RdxKind,
+ ArrayRef<Value *> RedOps) {
unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each
@@ -928,17 +1004,16 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
// Fill the rest of the mask with undef.
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
- Value *Shuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()), ShuffleMask, "rdx.shuf");
+ Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
// The builder propagates its fast-math-flags setting.
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
"bin.rdx");
} else {
- assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
+ assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
"Invalid min/max");
- TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
+ TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
}
if (!RedOps.empty())
propagateIRFlags(TmpVec, RedOps);
@@ -952,124 +1027,62 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}
-/// Create a simple vector reduction specified by an opcode and some
-/// flags (if generating min/max reductions).
-Value *llvm::createSimpleTargetReduction(
- IRBuilderBase &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
- Value *Src, TargetTransformInfo::ReductionFlags Flags,
- ArrayRef<Value *> RedOps) {
- auto *SrcVTy = cast<VectorType>(Src->getType());
-
- std::function<Value *()> BuildFunc;
- using RD = RecurrenceDescriptor;
- RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
-
- switch (Opcode) {
- case Instruction::Add:
- BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
- break;
- case Instruction::Mul:
- BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
- break;
- case Instruction::And:
- BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
- break;
- case Instruction::Or:
- BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
- break;
- case Instruction::Xor:
- BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
- break;
- case Instruction::FAdd:
- BuildFunc = [&]() {
- auto Rdx = Builder.CreateFAddReduce(
- Constant::getNullValue(SrcVTy->getElementType()), Src);
- return Rdx;
- };
- break;
- case Instruction::FMul:
- BuildFunc = [&]() {
- Type *Ty = SrcVTy->getElementType();
- auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
- return Rdx;
- };
- break;
- case Instruction::ICmp:
- if (Flags.IsMaxOp) {
- MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
- BuildFunc = [&]() {
- return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
- };
- } else {
- MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
- BuildFunc = [&]() {
- return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
- };
- }
- break;
- case Instruction::FCmp:
- if (Flags.IsMaxOp) {
- MinMaxKind = RD::MRK_FloatMax;
- BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
- } else {
- MinMaxKind = RD::MRK_FloatMin;
- BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
- }
- break;
+Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
+ const TargetTransformInfo *TTI,
+ Value *Src, RecurKind RdxKind,
+ ArrayRef<Value *> RedOps) {
+ unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
+ TargetTransformInfo::ReductionFlags RdxFlags;
+ RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax ||
+ RdxKind == RecurKind::FMax;
+ RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;
+ if (!ForceReductionIntrinsic &&
+ !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags))
+ return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps);
+
+ auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
+ switch (RdxKind) {
+ case RecurKind::Add:
+ return Builder.CreateAddReduce(Src);
+ case RecurKind::Mul:
+ return Builder.CreateMulReduce(Src);
+ case RecurKind::And:
+ return Builder.CreateAndReduce(Src);
+ case RecurKind::Or:
+ return Builder.CreateOrReduce(Src);
+ case RecurKind::Xor:
+ return Builder.CreateXorReduce(Src);
+ case RecurKind::FAdd:
+ return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
+ Src);
+ case RecurKind::FMul:
+ return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
+ case RecurKind::SMax:
+ return Builder.CreateIntMaxReduce(Src, true);
+ case RecurKind::SMin:
+ return Builder.CreateIntMinReduce(Src, true);
+ case RecurKind::UMax:
+ return Builder.CreateIntMaxReduce(Src, false);
+ case RecurKind::UMin:
+ return Builder.CreateIntMinReduce(Src, false);
+ case RecurKind::FMax:
+ return Builder.CreateFPMaxReduce(Src);
+ case RecurKind::FMin:
+ return Builder.CreateFPMinReduce(Src);
default:
llvm_unreachable("Unhandled opcode");
- break;
}
- if (ForceReductionIntrinsic ||
- TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
- return BuildFunc();
- return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
}
-/// Create a vector reduction using a given recurrence descriptor.
Value *llvm::createTargetReduction(IRBuilderBase &B,
const TargetTransformInfo *TTI,
- RecurrenceDescriptor &Desc, Value *Src,
- bool NoNaN) {
+ RecurrenceDescriptor &Desc, Value *Src) {
// TODO: Support in-order reductions based on the recurrence descriptor.
- using RD = RecurrenceDescriptor;
- RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
- TargetTransformInfo::ReductionFlags Flags;
- Flags.NoNaN = NoNaN;
-
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
IRBuilderBase::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(Desc.getFastMathFlags());
-
- switch (RecKind) {
- case RD::RK_FloatAdd:
- return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
- case RD::RK_FloatMult:
- return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
- case RD::RK_IntegerAdd:
- return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
- case RD::RK_IntegerMult:
- return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
- case RD::RK_IntegerAnd:
- return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
- case RD::RK_IntegerOr:
- return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
- case RD::RK_IntegerXor:
- return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
- case RD::RK_IntegerMinMax: {
- RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
- Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
- Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
- return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
- }
- case RD::RK_FloatMinMax: {
- Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
- return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
- }
- default:
- llvm_unreachable("Unhandled RecKind");
- }
+ return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());
}
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
@@ -1145,7 +1158,7 @@ static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) {
// producing an expression involving multiple pointers. Until then, we must
// bail out here.
//
- // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+ // Retrieve the pointer operand of the GEP. Don't use getUnderlyingObject
// because it understands lcssa phis while SCEV does not.
Value *FromPtr = FromVal;
Value *ToPtr = ToVal;
@@ -1162,7 +1175,7 @@ static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) {
// SCEV may have rewritten an expression that produces the GEP's pointer
// operand. That's ok as long as the pointer operand has the same base
- // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+ // pointer. Unlike getUnderlyingObject(), getPointerBase() will find the
// base of a recurrence. This handles the case in which SCEV expansion
// converts a pointer type recurrence into a nonrecurrent pointer base
// indexed by an integer recurrence.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 16bd08c704ee..599bd1feb2bc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -16,8 +16,12 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -32,29 +36,22 @@ static cl::opt<bool>
cl::desc("Add no-alias annotation for instructions that "
"are disambiguated by memchecks"));
-LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
- DominatorTree *DT, ScalarEvolution *SE,
- bool UseLAIChecks)
- : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT),
+LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI,
+ ArrayRef<RuntimePointerCheck> Checks, Loop *L,
+ LoopInfo *LI, DominatorTree *DT,
+ ScalarEvolution *SE)
+ : VersionedLoop(L), NonVersionedLoop(nullptr),
+ AliasChecks(Checks.begin(), Checks.end()),
+ Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT),
SE(SE) {
- assert(L->getExitBlock() && "No single exit block");
- assert(L->isLoopSimplifyForm() && "Loop is not in loop-simplify form");
- if (UseLAIChecks) {
- setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
- setSCEVChecks(LAI.getPSE().getUnionPredicate());
- }
-}
-
-void LoopVersioning::setAliasChecks(ArrayRef<RuntimePointerCheck> Checks) {
- AliasChecks = {Checks.begin(), Checks.end()};
-}
-
-void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) {
- Preds = std::move(Check);
+ assert(L->getUniqueExitBlock() && "No single exit block");
}
void LoopVersioning::versionLoop(
const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+ assert(VersionedLoop->isLoopSimplifyForm() &&
+ "Loop is not in loop-simplify form");
+
Instruction *FirstCheckInst;
Instruction *MemRuntimeCheck;
Value *SCEVRuntimeCheck;
@@ -67,11 +64,10 @@ void LoopVersioning::versionLoop(
addRuntimeChecks(RuntimeCheckBB->getTerminator(), VersionedLoop,
AliasChecks, RtPtrChecking.getSE());
- const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate();
SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
"scev.check");
SCEVRuntimeCheck =
- Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator());
+ Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator());
auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck);
// Discard the SCEV runtime check if it is always true.
@@ -122,6 +118,11 @@ void LoopVersioning::versionLoop(
// Adds the necessary PHI nodes for the versioned loops based on the
// loop-defined values used outside of the loop.
addPHINodes(DefsUsedOutside);
+ formDedicatedExitBlocks(NonVersionedLoop, DT, LI, nullptr, true);
+ formDedicatedExitBlocks(VersionedLoop, DT, LI, nullptr, true);
+ assert(NonVersionedLoop->isLoopSimplifyForm() &&
+ VersionedLoop->isLoopSimplifyForm() &&
+ "The versioned loops should be in simplify form.");
}
void LoopVersioning::addPHINodes(
@@ -253,47 +254,59 @@ void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst,
}
namespace {
+bool runImpl(LoopInfo *LI, function_ref<const LoopAccessInfo &(Loop &)> GetLAA,
+ DominatorTree *DT, ScalarEvolution *SE) {
+ // Build up a worklist of inner-loops to version. This is necessary as the
+ // act of versioning a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->isInnermost())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ if (!L->isLoopSimplifyForm() || !L->isRotatedForm() ||
+ !L->getExitingBlock())
+ continue;
+ const LoopAccessInfo &LAI = GetLAA(*L);
+ if (!LAI.hasConvergentOp() &&
+ (LAI.getNumRuntimePointerChecks() ||
+ !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
+ LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L,
+ LI, DT, SE);
+ LVer.versionLoop();
+ LVer.annotateLoopWithNoAlias();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
/// Also expose this is a pass. Currently this is only used for
/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
/// array accesses from the loop.
-class LoopVersioningPass : public FunctionPass {
+class LoopVersioningLegacyPass : public FunctionPass {
public:
- LoopVersioningPass() : FunctionPass(ID) {
- initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry());
+ LoopVersioningLegacyPass() : FunctionPass(ID) {
+ initializeLoopVersioningLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
+ auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & {
+ return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(&L);
+ };
+
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- // Build up a worklist of inner-loops to version. This is necessary as the
- // act of versioning a loop creates new loops and can invalidate iterators
- // across the loops.
- SmallVector<Loop *, 8> Worklist;
-
- for (Loop *TopLevelLoop : *LI)
- for (Loop *L : depth_first(TopLevelLoop))
- // We only handle inner-most loops.
- if (L->empty())
- Worklist.push_back(L);
-
- // Now walk the identified inner loops.
- bool Changed = false;
- for (Loop *L : Worklist) {
- const LoopAccessInfo &LAI = LAA->getInfo(L);
- if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() &&
- (LAI.getNumRuntimePointerChecks() ||
- !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
- LoopVersioning LVer(LAI, L, LI, DT, SE);
- LVer.versionLoop();
- LVer.annotateLoopWithNoAlias();
- Changed = true;
- }
- }
-
- return Changed;
+ return runImpl(LI, GetLAA, DT, SE);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -312,18 +325,45 @@ public:
#define LVER_OPTION "loop-versioning"
#define DEBUG_TYPE LVER_OPTION
-char LoopVersioningPass::ID;
+char LoopVersioningLegacyPass::ID;
static const char LVer_name[] = "Loop Versioning";
-INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
+INITIALIZE_PASS_BEGIN(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false,
+ false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
+INITIALIZE_PASS_END(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false,
+ false)
namespace llvm {
-FunctionPass *createLoopVersioningPass() {
- return new LoopVersioningPass();
+FunctionPass *createLoopVersioningLegacyPass() {
+ return new LoopVersioningLegacyPass();
}
+
+PreservedAnalyses LoopVersioningPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ MemorySSA *MSSA = EnableMSSALoopDependency
+ ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
+ : nullptr;
+
+ auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
+ auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & {
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, MSSA};
+ return LAM.getResult<LoopAccessAnalysis>(L, AR);
+ };
+
+ if (runImpl(&LI, GetLAA, &DT, &SE))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index 0b225e8abc4e..fe0ff5899d8f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -48,7 +48,7 @@ static bool runImpl(Function &F) {
bool Changed = false;
for (BasicBlock &BB : F)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
- SmallVector<Value *, 16> CallArgs(II->arg_begin(), II->arg_end());
+ SmallVector<Value *, 16> CallArgs(II->args());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
// Insert a normal call instruction...
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 34e836d9660f..ec8d7a7074cd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -26,6 +27,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -55,9 +57,9 @@ namespace {
} // end anonymous namespace
+namespace {
// Return true iff R is covered by Ranges.
-static bool IsInRanges(const IntRange &R,
- const std::vector<IntRange> &Ranges) {
+bool IsInRanges(const IntRange &R, const std::vector<IntRange> &Ranges) {
// Note: Ranges must be sorted, non-overlapping and non-adjacent.
// Find the first range whose High field is >= R.High,
@@ -68,120 +70,34 @@ static bool IsInRanges(const IntRange &R,
return I != Ranges.end() && I->Low <= R.Low;
}
-namespace {
-
- /// Replace all SwitchInst instructions with chained branch instructions.
- class LowerSwitch : public FunctionPass {
- public:
- // Pass identification, replacement for typeid
- static char ID;
-
- LowerSwitch() : FunctionPass(ID) {
- initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LazyValueInfoWrapperPass>();
- }
-
- struct CaseRange {
- ConstantInt* Low;
- ConstantInt* High;
- BasicBlock* BB;
-
- CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
- : Low(low), High(high), BB(bb) {}
- };
-
- using CaseVector = std::vector<CaseRange>;
- using CaseItr = std::vector<CaseRange>::iterator;
-
- private:
- void processSwitchInst(SwitchInst *SI,
- SmallPtrSetImpl<BasicBlock *> &DeleteList,
- AssumptionCache *AC, LazyValueInfo *LVI);
-
- BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
- ConstantInt *LowerBound, ConstantInt *UpperBound,
- Value *Val, BasicBlock *Predecessor,
- BasicBlock *OrigBlock, BasicBlock *Default,
- const std::vector<IntRange> &UnreachableRanges);
- BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val,
- ConstantInt *LowerBound, ConstantInt *UpperBound,
- BasicBlock *OrigBlock, BasicBlock *Default);
- unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
- };
-
- /// The comparison function for sorting the switch case values in the vector.
- /// WARNING: Case ranges should be disjoint!
- struct CaseCmp {
- bool operator()(const LowerSwitch::CaseRange& C1,
- const LowerSwitch::CaseRange& C2) {
- const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
- const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
- return CI1->getValue().slt(CI2->getValue());
- }
- };
-
-} // end anonymous namespace
-
-char LowerSwitch::ID = 0;
-
-// Publicly exposed interface to pass...
-char &llvm::LowerSwitchID = LowerSwitch::ID;
-
-INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
-INITIALIZE_PASS_END(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false)
-
-// createLowerSwitchPass - Interface to this file...
-FunctionPass *llvm::createLowerSwitchPass() {
- return new LowerSwitch();
-}
-
-bool LowerSwitch::runOnFunction(Function &F) {
- LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
- AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
-
- bool Changed = false;
- SmallPtrSet<BasicBlock*, 8> DeleteList;
-
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks
-
- // If the block is a dead Default block that will be deleted later, don't
- // waste time processing it.
- if (DeleteList.count(Cur))
- continue;
-
- if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
- Changed = true;
- processSwitchInst(SI, DeleteList, AC, LVI);
- }
- }
-
- for (BasicBlock* BB: DeleteList) {
- LVI->eraseBlock(BB);
- DeleteDeadBlock(BB);
+struct CaseRange {
+ ConstantInt *Low;
+ ConstantInt *High;
+ BasicBlock *BB;
+
+ CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
+ : Low(low), High(high), BB(bb) {}
+};
+
+using CaseVector = std::vector<CaseRange>;
+using CaseItr = std::vector<CaseRange>::iterator;
+
+/// The comparison function for sorting the switch case values in the vector.
+/// WARNING: Case ranges should be disjoint!
+struct CaseCmp {
+ bool operator()(const CaseRange &C1, const CaseRange &C2) {
+ const ConstantInt *CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt *CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
}
-
- return Changed;
-}
+};
/// Used for debugging purposes.
LLVM_ATTRIBUTE_USED
-static raw_ostream &operator<<(raw_ostream &O,
- const LowerSwitch::CaseVector &C) {
+raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) {
O << "[";
- for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end();
- B != E;) {
+ for (CaseVector::const_iterator B = C.begin(), E = C.end(); B != E;) {
O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]";
if (++B != E)
O << ", ";
@@ -200,9 +116,9 @@ static raw_ostream &operator<<(raw_ostream &O,
/// 2) Removed if subsequent incoming values now share the same case, i.e.,
/// multiple outcome edges are condensed into one. This is necessary to keep the
/// number of phi values equal to the number of branches to SuccBB.
-static void
-fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
- const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) {
+void FixPhis(
+ BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
+ const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) {
for (BasicBlock::iterator I = SuccBB->begin(),
IE = SuccBB->getFirstNonPHI()->getIterator();
I != IE; ++I) {
@@ -233,17 +149,80 @@ fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
+/// Create a new leaf block for the binary lookup tree. It checks if the
+/// switch's value == the case's value. If not, then it jumps to the default
+/// branch. At this point in the tree, the value can't be another valid case
+/// value, so the jump to the "default" branch is warranted.
+BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound,
+ ConstantInt *UpperBound, BasicBlock *OrigBlock,
+ BasicBlock *Default) {
+ Function *F = OrigBlock->getParent();
+ BasicBlock *NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+ F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
+
+ // Emit comparison
+ ICmpInst *Comp = nullptr;
+ if (Leaf.Low == Leaf.High) {
+ // Make the seteq instruction...
+ Comp =
+ new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf");
+ } else {
+ // Make range comparison
+ if (Leaf.Low == LowerBound) {
+ // Val >= Min && Val <= Hi --> Val <= Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else if (Leaf.High == UpperBound) {
+ // Val <= Max && Val >= Lo --> Val >= Lo
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
+ "SwitchLeaf");
+ } else if (Leaf.Low->isZero()) {
+ // Val >= 0 && Val <= Hi --> Val <=u Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else {
+ // Emit V-Lo <=u Hi-Lo
+ Constant *NegLo = ConstantExpr::getNeg(Leaf.Low);
+ Instruction *Add = BinaryOperator::CreateAdd(
+ Val, NegLo, Val->getName() + ".off", NewLeaf);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+ "SwitchLeaf");
+ }
+ }
+
+ // Make the conditional branch...
+ BasicBlock *Succ = Leaf.BB;
+ BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+ // If there were any PHI nodes in this successor, rewrite one entry
+ // from OrigBlock to come from NewLeaf.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Remove all but one incoming entries from the cluster
+ uint64_t Range = Leaf.High->getSExtValue() - Leaf.Low->getSExtValue();
+ for (uint64_t j = 0; j < Range; ++j) {
+ PN->removeIncomingValue(OrigBlock);
+ }
+
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+ }
+
+ return NewLeaf;
+}
+
/// Convert the switch statement into a binary lookup of the case values.
/// The function recursively builds this tree. LowerBound and UpperBound are
/// used to keep track of the bounds for Val that have already been checked by
/// a block emitted by one of the previous calls to switchConvert in the call
/// stack.
-BasicBlock *
-LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
- ConstantInt *UpperBound, Value *Val,
- BasicBlock *Predecessor, BasicBlock *OrigBlock,
- BasicBlock *Default,
- const std::vector<IntRange> &UnreachableRanges) {
+BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
+ ConstantInt *UpperBound, Value *Val,
+ BasicBlock *Predecessor, BasicBlock *OrigBlock,
+ BasicBlock *Default,
+ const std::vector<IntRange> &UnreachableRanges) {
assert(LowerBound && UpperBound && "Bounds must be initialized");
unsigned Size = End - Begin;
@@ -255,10 +234,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
if (Begin->Low == LowerBound && Begin->High == UpperBound) {
unsigned NumMergedCases = 0;
NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue();
- fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
+ FixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
return Begin->BB;
}
- return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
+ return NewLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
Default);
}
@@ -305,12 +284,12 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
Val, Pivot.Low, "Pivot");
- BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
- NewUpperBound, Val, NewNode, OrigBlock,
- Default, UnreachableRanges);
- BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
- UpperBound, Val, NewNode, OrigBlock,
- Default, UnreachableRanges);
+ BasicBlock *LBranch =
+ SwitchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val,
+ NewNode, OrigBlock, Default, UnreachableRanges);
+ BasicBlock *RBranch =
+ SwitchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val,
+ NewNode, OrigBlock, Default, UnreachableRanges);
F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode);
NewNode->getInstList().push_back(Comp);
@@ -319,78 +298,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
return NewNode;
}
-/// Create a new leaf block for the binary lookup tree. It checks if the
-/// switch's value == the case's value. If not, then it jumps to the default
-/// branch. At this point in the tree, the value can't be another valid case
-/// value, so the jump to the "default" branch is warranted.
-BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val,
- ConstantInt *LowerBound,
- ConstantInt *UpperBound,
- BasicBlock *OrigBlock,
- BasicBlock *Default) {
- Function* F = OrigBlock->getParent();
- BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
- F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
-
- // Emit comparison
- ICmpInst* Comp = nullptr;
- if (Leaf.Low == Leaf.High) {
- // Make the seteq instruction...
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
- Leaf.Low, "SwitchLeaf");
- } else {
- // Make range comparison
- if (Leaf.Low == LowerBound) {
- // Val >= Min && Val <= Hi --> Val <= Hi
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
- "SwitchLeaf");
- } else if (Leaf.High == UpperBound) {
- // Val <= Max && Val >= Lo --> Val >= Lo
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
- "SwitchLeaf");
- } else if (Leaf.Low->isZero()) {
- // Val >= 0 && Val <= Hi --> Val <=u Hi
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
- "SwitchLeaf");
- } else {
- // Emit V-Lo <=u Hi-Lo
- Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
- Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
- Val->getName()+".off",
- NewLeaf);
- Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
- "SwitchLeaf");
- }
- }
-
- // Make the conditional branch...
- BasicBlock* Succ = Leaf.BB;
- BranchInst::Create(Succ, Default, Comp, NewLeaf);
-
- // If there were any PHI nodes in this successor, rewrite one entry
- // from OrigBlock to come from NewLeaf.
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode* PN = cast<PHINode>(I);
- // Remove all but one incoming entries from the cluster
- uint64_t Range = Leaf.High->getSExtValue() -
- Leaf.Low->getSExtValue();
- for (uint64_t j = 0; j < Range; ++j) {
- PN->removeIncomingValue(OrigBlock);
- }
-
- int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
- assert(BlockIdx != -1 && "Switch didn't go to this successor??");
- PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
- }
-
- return NewLeaf;
-}
-
/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases.
/// \post \p Cases wouldn't contain references to \p SI's default BB.
/// \returns Number of \p SI's cases that do not reference \p SI's default BB.
-unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) {
unsigned NumSimpleCases = 0;
// Start with "simple" cases
@@ -431,9 +342,9 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
/// Replace the specified switch instruction with a sequence of chained if-then
/// insts in a balanced binary search.
-void LowerSwitch::processSwitchInst(SwitchInst *SI,
- SmallPtrSetImpl<BasicBlock *> &DeleteList,
- AssumptionCache *AC, LazyValueInfo *LVI) {
+void ProcessSwitchInst(SwitchInst *SI,
+ SmallPtrSetImpl<BasicBlock *> &DeleteList,
+ AssumptionCache *AC, LazyValueInfo *LVI) {
BasicBlock *OrigBlock = SI->getParent();
Function *F = OrigBlock->getParent();
Value *Val = SI->getCondition(); // The value we are switching on...
@@ -458,7 +369,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
if (Cases.empty()) {
BranchInst::Create(Default, OrigBlock);
// Remove all the references from Default's PHIs to OrigBlock, but one.
- fixPhis(Default, OrigBlock, OrigBlock);
+ FixPhis(Default, OrigBlock, OrigBlock);
SI->eraseFromParent();
return;
}
@@ -489,7 +400,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
// TODO Shouldn't this create a signed range?
ConstantRange KnownBitsRange =
ConstantRange::fromKnownBits(Known, /*IsSigned=*/false);
- const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI);
+ const ConstantRange LVIRange = LVI->getConstantRange(Val, SI);
ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange);
// We delegate removal of unreachable non-default cases to other passes. In
// the unlikely event that some of them survived, we just conservatively
@@ -563,10 +474,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
// cases.
assert(MaxPop > 0 && PopSucc);
Default = PopSucc;
- Cases.erase(
- llvm::remove_if(
- Cases, [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
- Cases.end());
+ llvm::erase_if(Cases,
+ [PopSucc](const CaseRange &R) { return R.BB == PopSucc; });
// If there are no cases left, just branch.
if (Cases.empty()) {
@@ -592,12 +501,12 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
BranchInst::Create(Default, NewDefault);
BasicBlock *SwitchBlock =
- switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
+ SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
// If there are entries in any PHI nodes for the default edge, make sure
// to update them as well.
- fixPhis(Default, OrigBlock, NewDefault);
+ FixPhis(Default, OrigBlock, NewDefault);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
@@ -607,6 +516,84 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
OrigBlock->getInstList().erase(SI);
// If the Default block has no more predecessors just add it to DeleteList.
- if (pred_begin(OldDefault) == pred_end(OldDefault))
+ if (pred_empty(OldDefault))
DeleteList.insert(OldDefault);
}
+
+bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) {
+ bool Changed = false;
+ SmallPtrSet<BasicBlock *, 8> DeleteList;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+ BasicBlock *Cur =
+ &*I++; // Advance over block so we don't traverse new blocks
+
+ // If the block is a dead Default block that will be deleted later, don't
+ // waste time processing it.
+ if (DeleteList.count(Cur))
+ continue;
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ Changed = true;
+ ProcessSwitchInst(SI, DeleteList, AC, LVI);
+ }
+ }
+
+ for (BasicBlock *BB : DeleteList) {
+ LVI->eraseBlock(BB);
+ DeleteDeadBlock(BB);
+ }
+
+ return Changed;
+}
+
+/// Replace all SwitchInst instructions with chained branch instructions.
+class LowerSwitchLegacyPass : public FunctionPass {
+public:
+ // Pass identification, replacement for typeid
+ static char ID;
+
+ LowerSwitchLegacyPass() : FunctionPass(ID) {
+ initializeLowerSwitchLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LazyValueInfoWrapperPass>();
+ }
+};
+
+} // end anonymous namespace
+
+char LowerSwitchLegacyPass::ID = 0;
+
+// Publicly exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitchLegacyPass::ID;
+
+INITIALIZE_PASS_BEGIN(LowerSwitchLegacyPass, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(LowerSwitchLegacyPass, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+ return new LowerSwitchLegacyPass();
+}
+
+bool LowerSwitchLegacyPass::runOnFunction(Function &F) {
+ LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
+ AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
+ return LowerSwitch(F, LVI, AC);
+}
+
+PreservedAnalyses LowerSwitchPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
+ AssumptionCache *AC = AM.getCachedResult<AssumptionAnalysis>(F);
+ return LowerSwitch(F, LVI, AC) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp
new file mode 100644
index 000000000000..6a137630deeb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -0,0 +1,104 @@
+//===- MatrixUtils.cpp - Utilities to lower matrix intrinsics ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for generating tiled loops for matrix operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MatrixUtils.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Type.h"
+
+using namespace llvm;
+
+BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
+ Value *Bound, Value *Step, StringRef Name,
+ IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L,
+ LoopInfo &LI) {
+ LLVMContext &Ctx = Preheader->getContext();
+ BasicBlock *Header = BasicBlock::Create(
+ Preheader->getContext(), Name + ".header", Preheader->getParent(), Exit);
+ BasicBlock *Body = BasicBlock::Create(Header->getContext(), Name + ".body",
+ Header->getParent(), Exit);
+ BasicBlock *Latch = BasicBlock::Create(Header->getContext(), Name + ".latch",
+ Header->getParent(), Exit);
+
+ Type *I32Ty = Type::getInt64Ty(Ctx);
+ BranchInst::Create(Body, Header);
+ BranchInst::Create(Latch, Body);
+ PHINode *IV =
+ PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator());
+ IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader);
+
+ B.SetInsertPoint(Latch);
+ Value *Inc = B.CreateAdd(IV, Step, Name + ".step");
+ Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond");
+ BranchInst::Create(Header, Exit, Cond, Latch);
+ IV->addIncoming(Inc, Latch);
+
+ BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
+ BasicBlock *Tmp = PreheaderBr->getSuccessor(0);
+ PreheaderBr->setSuccessor(0, Header);
+ DTU.applyUpdatesPermissive({
+ {DominatorTree::Delete, Preheader, Tmp},
+ {DominatorTree::Insert, Header, Body},
+ {DominatorTree::Insert, Body, Latch},
+ {DominatorTree::Insert, Latch, Header},
+ {DominatorTree::Insert, Latch, Exit},
+ {DominatorTree::Insert, Preheader, Header},
+ });
+
+ L->addBasicBlockToLoop(Header, LI);
+ L->addBasicBlockToLoop(Body, LI);
+ L->addBasicBlockToLoop(Latch, LI);
+ return Body;
+}
+
+// Creates the following loop nest skeleton:
+// for C = 0; C < NumColumns; C += TileSize
+// for R = 0; R < NumRows; R += TileSize
+// for K = 0; K < Inner ; K += TileSize
+BasicBlock *TileInfo::CreateTiledLoops(BasicBlock *Start, BasicBlock *End,
+ IRBuilderBase &B, DomTreeUpdater &DTU,
+ LoopInfo &LI) {
+ Loop *ColLoop = LI.AllocateLoop();
+ Loop *RowLoop = LI.AllocateLoop();
+ Loop *InnerLoop = LI.AllocateLoop();
+ RowLoop->addChildLoop(InnerLoop);
+ ColLoop->addChildLoop(RowLoop);
+ if (Loop *ParentL = LI.getLoopFor(Start))
+ ParentL->addChildLoop(ColLoop);
+ else
+ LI.addTopLevelLoop(ColLoop);
+
+ BasicBlock *ColBody =
+ CreateLoop(Start, End, B.getInt64(NumColumns), B.getInt64(TileSize),
+ "cols", B, DTU, ColLoop, LI);
+ BasicBlock *ColLatch = ColBody->getSingleSuccessor();
+ BasicBlock *RowBody =
+ CreateLoop(ColBody, ColLatch, B.getInt64(NumRows), B.getInt64(TileSize),
+ "rows", B, DTU, RowLoop, LI);
+ RowLoopLatch = RowBody->getSingleSuccessor();
+
+ BasicBlock *InnerBody =
+ CreateLoop(RowBody, RowLoopLatch, B.getInt64(NumInner),
+ B.getInt64(TileSize), "inner", B, DTU, InnerLoop, LI);
+ InnerLoopLatch = InnerBody->getSingleSuccessor();
+ ColumnLoopHeader = ColBody->getSinglePredecessor();
+ RowLoopHeader = RowBody->getSinglePredecessor();
+ InnerLoopHeader = InnerBody->getSinglePredecessor();
+ CurrentRow = &*RowLoopHeader->begin();
+ CurrentCol = &*ColumnLoopHeader->begin();
+ CurrentK = &*InnerLoopHeader->begin();
+
+ return InnerBody;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 7f961dbaf4b4..e350320e7569 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/MetaRenamer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
@@ -25,6 +26,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/InitializePasses.h"
@@ -40,123 +42,125 @@ static const char *const metaNames[] = {
};
namespace {
+// This PRNG is from the ISO C spec. It is intentionally simple and
+// unsuitable for cryptographic use. We're just looking for enough
+// variety to surprise and delight users.
+struct PRNG {
+ unsigned long next;
+
+ void srand(unsigned int seed) { next = seed; }
+
+ int rand() {
+ next = next * 1103515245 + 12345;
+ return (unsigned int)(next / 65536) % 32768;
+ }
+};
- // This PRNG is from the ISO C spec. It is intentionally simple and
- // unsuitable for cryptographic use. We're just looking for enough
- // variety to surprise and delight users.
- struct PRNG {
- unsigned long next;
+struct Renamer {
+ Renamer(unsigned int seed) { prng.srand(seed); }
- void srand(unsigned int seed) {
- next = seed;
- }
+ const char *newName() {
+ return metaNames[prng.rand() % array_lengthof(metaNames)];
+ }
- int rand() {
- next = next * 1103515245 + 12345;
- return (unsigned int)(next / 65536) % 32768;
- }
- };
+ PRNG prng;
+};
- struct Renamer {
- Renamer(unsigned int seed) {
- prng.srand(seed);
- }
+void MetaRename(Function &F) {
+ for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
+ if (!AI->getType()->isVoidTy())
+ AI->setName("arg");
- const char *newName() {
- return metaNames[prng.rand() % array_lengthof(metaNames)];
- }
+ for (auto &BB : F) {
+ BB.setName("bb");
- PRNG prng;
- };
+ for (auto &I : BB)
+ if (!I.getType()->isVoidTy())
+ I.setName("tmp");
+ }
+}
- struct MetaRenamer : public ModulePass {
- // Pass identification, replacement for typeid
- static char ID;
-
- MetaRenamer() : ModulePass(ID) {
- initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.setPreservesAll();
- }
-
- bool runOnModule(Module &M) override {
- // Seed our PRNG with simple additive sum of ModuleID. We're looking to
- // simply avoid always having the same function names, and we need to
- // remain deterministic.
- unsigned int randSeed = 0;
- for (auto C : M.getModuleIdentifier())
- randSeed += C;
-
- Renamer renamer(randSeed);
-
- // Rename all aliases
- for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) {
- StringRef Name = AI->getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
- continue;
-
- AI->setName("alias");
- }
-
- // Rename all global variables
- for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
- StringRef Name = GI->getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
- continue;
-
- GI->setName("global");
- }
-
- // Rename all struct types
- TypeFinder StructTypes;
- StructTypes.run(M, true);
- for (StructType *STy : StructTypes) {
- if (STy->isLiteral() || STy->getName().empty()) continue;
-
- SmallString<128> NameStorage;
- STy->setName((Twine("struct.") +
- renamer.newName()).toStringRef(NameStorage));
- }
-
- // Rename all functions
- for (auto &F : M) {
- StringRef Name = F.getName();
- LibFunc Tmp;
- // Leave library functions alone because their presence or absence could
- // affect the behavior of other passes.
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F).getLibFunc(
- F, Tmp))
- continue;
-
- // Leave @main alone. The output of -metarenamer might be passed to
- // lli for execution and the latter needs a main entry point.
- if (Name != "main")
- F.setName(renamer.newName());
-
- runOnFunction(F);
- }
- return true;
- }
-
- bool runOnFunction(Function &F) {
- for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
- if (!AI->getType()->isVoidTy())
- AI->setName("arg");
-
- for (auto &BB : F) {
- BB.setName("bb");
-
- for (auto &I : BB)
- if (!I.getType()->isVoidTy())
- I.setName("tmp");
- }
- return true;
- }
- };
+void MetaRename(Module &M,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
+ // Seed our PRNG with simple additive sum of ModuleID. We're looking to
+ // simply avoid always having the same function names, and we need to
+ // remain deterministic.
+ unsigned int randSeed = 0;
+ for (auto C : M.getModuleIdentifier())
+ randSeed += C;
+
+ Renamer renamer(randSeed);
+
+ // Rename all aliases
+ for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) {
+ StringRef Name = AI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ AI->setName("alias");
+ }
+
+ // Rename all global variables
+ for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
+ StringRef Name = GI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ GI->setName("global");
+ }
+
+ // Rename all struct types
+ TypeFinder StructTypes;
+ StructTypes.run(M, true);
+ for (StructType *STy : StructTypes) {
+ if (STy->isLiteral() || STy->getName().empty())
+ continue;
+
+ SmallString<128> NameStorage;
+ STy->setName(
+ (Twine("struct.") + renamer.newName()).toStringRef(NameStorage));
+ }
+
+ // Rename all functions
+ for (auto &F : M) {
+ StringRef Name = F.getName();
+ LibFunc Tmp;
+ // Leave library functions alone because their presence or absence could
+ // affect the behavior of other passes.
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ GetTLI(F).getLibFunc(F, Tmp))
+ continue;
+
+ // Leave @main alone. The output of -metarenamer might be passed to
+ // lli for execution and the latter needs a main entry point.
+ if (Name != "main")
+ F.setName(renamer.newName());
+
+ MetaRename(F);
+ }
+}
+
+struct MetaRenamer : public ModulePass {
+ // Pass identification, replacement for typeid
+ static char ID;
+
+ MetaRenamer() : ModulePass(ID) {
+ initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesAll();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ MetaRename(M, GetTLI);
+ return true;
+ }
+};
} // end anonymous namespace
@@ -175,3 +179,14 @@ INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
ModulePass *llvm::createMetaRenamerPass() {
return new MetaRenamer();
}
+
+PreservedAnalyses MetaRenamerPass::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ MetaRename(M, GetTLI);
+
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp
deleted file mode 100644
index a16ca1fb8efa..000000000000
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp
+++ /dev/null
@@ -1,178 +0,0 @@
-//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This contains code to emit warnings for potentially incorrect usage of the
-// llvm.expect intrinsic. This utility extracts the threshold values from
-// metadata associated with the instrumented Branch or Switch instruction. The
-// threshold values are then used to determine if a warning should be emmited.
-//
-// MisExpect metadata is generated when llvm.expect intrinsics are lowered see
-// LowerExpectIntrinsic.cpp
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/MisExpect.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/BranchProbability.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormatVariadic.h"
-#include <cstdint>
-#include <functional>
-#include <numeric>
-
-#define DEBUG_TYPE "misexpect"
-
-using namespace llvm;
-using namespace misexpect;
-
-namespace llvm {
-
-// Command line option to enable/disable the warning when profile data suggests
-// a mismatch with the use of the llvm.expect intrinsic
-static cl::opt<bool> PGOWarnMisExpect(
- "pgo-warn-misexpect", cl::init(false), cl::Hidden,
- cl::desc("Use this option to turn on/off "
- "warnings about incorrect usage of llvm.expect intrinsics."));
-
-} // namespace llvm
-
-namespace {
-
-Instruction *getOprndOrInst(Instruction *I) {
- assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
- Instruction *Ret = nullptr;
- if (auto *B = dyn_cast<BranchInst>(I)) {
- Ret = dyn_cast<Instruction>(B->getCondition());
- }
- // TODO: Find a way to resolve condition location for switches
- // Using the condition of the switch seems to often resolve to an earlier
- // point in the program, i.e. the calculation of the switch condition, rather
- // than the switches location in the source code. Thus, we should use the
- // instruction to get source code locations rather than the condition to
- // improve diagnostic output, such as the caret. If the same problem exists
- // for branch instructions, then we should remove this function and directly
- // use the instruction
- //
- // else if (auto S = dyn_cast<SwitchInst>(I)) {
- // Ret = I;
- //}
- return Ret ? Ret : I;
-}
-
-void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
- uint64_t ProfCount, uint64_t TotalCount) {
- double PercentageCorrect = (double)ProfCount / TotalCount;
- auto PerString =
- formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
- auto RemStr = formatv(
- "Potential performance regression from use of the llvm.expect intrinsic: "
- "Annotation was correct on {0} of profiled executions.",
- PerString);
- Twine Msg(PerString);
- Instruction *Cond = getOprndOrInst(I);
- if (PGOWarnMisExpect)
- Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
- OptimizationRemarkEmitter ORE(I->getParent()->getParent());
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
-}
-
-} // namespace
-
-namespace llvm {
-namespace misexpect {
-
-void verifyMisExpect(Instruction *I, const SmallVector<uint32_t, 4> &Weights,
- LLVMContext &Ctx) {
- if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) {
- auto *MisExpectDataName = dyn_cast<MDString>(MisExpectData->getOperand(0));
- if (MisExpectDataName &&
- MisExpectDataName->getString().equals("misexpect")) {
- LLVM_DEBUG(llvm::dbgs() << "------------------\n");
- LLVM_DEBUG(llvm::dbgs()
- << "Function: " << I->getFunction()->getName() << "\n");
- LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n");
- LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) {
- llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n";
- });
-
- // extract values from misexpect metadata
- const auto *IndexCint =
- mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(1));
- const auto *LikelyCInt =
- mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(2));
- const auto *UnlikelyCInt =
- mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(3));
-
- if (!IndexCint || !LikelyCInt || !UnlikelyCInt)
- return;
-
- const uint64_t Index = IndexCint->getZExtValue();
- const uint64_t LikelyBranchWeight = LikelyCInt->getZExtValue();
- const uint64_t UnlikelyBranchWeight = UnlikelyCInt->getZExtValue();
- const uint64_t ProfileCount = Weights[Index];
- const uint64_t CaseTotal = std::accumulate(
- Weights.begin(), Weights.end(), (uint64_t)0, std::plus<uint64_t>());
- const uint64_t NumUnlikelyTargets = Weights.size() - 1;
-
- const uint64_t TotalBranchWeight =
- LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
-
- const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight,
- TotalBranchWeight);
- uint64_t ScaledThreshold = LikelyThreshold.scale(CaseTotal);
-
- LLVM_DEBUG(llvm::dbgs()
- << "Unlikely Targets: " << NumUnlikelyTargets << ":\n");
- LLVM_DEBUG(llvm::dbgs() << "Profile Count: " << ProfileCount << ":\n");
- LLVM_DEBUG(llvm::dbgs()
- << "Scaled Threshold: " << ScaledThreshold << ":\n");
- LLVM_DEBUG(llvm::dbgs() << "------------------\n");
- if (ProfileCount < ScaledThreshold)
- emitMisexpectDiagnostic(I, Ctx, ProfileCount, CaseTotal);
- }
- }
-}
-
-void checkFrontendInstrumentation(Instruction &I) {
- if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) {
- unsigned NOps = MD->getNumOperands();
-
- // Only emit misexpect diagnostics if at least 2 branch weights are present.
- // Less than 2 branch weights means that the profiling metadata is:
- // 1) incorrect/corrupted
- // 2) not branch weight metadata
- // 3) completely deterministic
- // In these cases we should not emit any diagnostic related to misexpect.
- if (NOps < 3)
- return;
-
- // Operand 0 is a string tag "branch_weights"
- if (MDString *Tag = cast<MDString>(MD->getOperand(0))) {
- if (Tag->getString().equals("branch_weights")) {
- SmallVector<uint32_t, 4> RealWeights(NOps - 1);
- for (unsigned i = 1; i < NOps; i++) {
- ConstantInt *Value =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(i));
- RealWeights[i - 1] = Value->getZExtValue();
- }
- verifyMisExpect(&I, RealWeights, I.getContext());
- }
- }
- }
-}
-
-} // namespace misexpect
-} // namespace llvm
-#undef DEBUG_TYPE
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 99b64a7462f6..3312a6f9459b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -53,6 +53,10 @@ static cl::opt<bool> VerifyPredicateInfo(
DEBUG_COUNTER(RenameCounter, "predicateinfo-rename",
"Controls which variables are renamed with predicateinfo");
+// Maximum number of conditions considered for renaming for each branch/assume.
+// This limits renaming of deep and/or chains.
+static const unsigned MaxCondsPerBranch = 8;
+
namespace {
// Given a predicate info that is a type of branching terminator, get the
// branching block.
@@ -367,6 +371,13 @@ void PredicateInfoBuilder::convertUsesToDFSOrdered(
}
}
+bool shouldRename(Value *V) {
+ // Only want real values, not constants. Additionally, operands with one use
+ // are only being used in the comparison, which means they will not be useful
+ // for us to consider for predicateinfo.
+ return (isa<Instruction>(V) || isa<Argument>(V)) && !V->hasOneUse();
+}
+
// Collect relevant operations from Comparison that we may want to insert copies
// for.
void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
@@ -374,15 +385,9 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
auto *Op1 = Comparison->getOperand(1);
if (Op0 == Op1)
return;
- CmpOperands.push_back(Comparison);
- // Only want real values, not constants. Additionally, operands with one use
- // are only being used in the comparison, which means they will not be useful
- // for us to consider for predicateinfo.
- //
- if ((isa<Instruction>(Op0) || isa<Argument>(Op0)) && !Op0->hasOneUse())
- CmpOperands.push_back(Op0);
- if ((isa<Instruction>(Op1) || isa<Argument>(Op1)) && !Op1->hasOneUse())
- CmpOperands.push_back(Op1);
+
+ CmpOperands.push_back(Op0);
+ CmpOperands.push_back(Op1);
}
// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
@@ -400,38 +405,32 @@ void PredicateInfoBuilder::addInfoFor(SmallVectorImpl<Value *> &OpsToRename,
void PredicateInfoBuilder::processAssume(
IntrinsicInst *II, BasicBlock *AssumeBB,
SmallVectorImpl<Value *> &OpsToRename) {
- // See if we have a comparison we support
- SmallVector<Value *, 8> CmpOperands;
- SmallVector<Value *, 2> ConditionsToProcess;
- CmpInst::Predicate Pred;
- Value *Operand = II->getOperand(0);
- if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()),
- m_Cmp(Pred, m_Value(), m_Value()))
- .match(II->getOperand(0))) {
- ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(0));
- ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(1));
- ConditionsToProcess.push_back(Operand);
- } else if (isa<CmpInst>(Operand)) {
-
- ConditionsToProcess.push_back(Operand);
- }
- for (auto Cond : ConditionsToProcess) {
- if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
- collectCmpOps(Cmp, CmpOperands);
- // Now add our copy infos for our operands
- for (auto *Op : CmpOperands) {
- auto *PA = new PredicateAssume(Op, II, Cmp);
- addInfoFor(OpsToRename, Op, PA);
+ SmallVector<Value *, 4> Worklist;
+ SmallPtrSet<Value *, 4> Visited;
+ Worklist.push_back(II->getOperand(0));
+ while (!Worklist.empty()) {
+ Value *Cond = Worklist.pop_back_val();
+ if (!Visited.insert(Cond).second)
+ continue;
+ if (Visited.size() > MaxCondsPerBranch)
+ break;
+
+ Value *Op0, *Op1;
+ if (match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
+ Worklist.push_back(Op1);
+ Worklist.push_back(Op0);
+ }
+
+ SmallVector<Value *, 4> Values;
+ Values.push_back(Cond);
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond))
+ collectCmpOps(Cmp, Values);
+
+ for (Value *V : Values) {
+ if (shouldRename(V)) {
+ auto *PA = new PredicateAssume(V, II, Cond);
+ addInfoFor(OpsToRename, V, PA);
}
- CmpOperands.clear();
- } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
- // Otherwise, it should be an AND.
- assert(BinOp->getOpcode() == Instruction::And &&
- "Should have been an AND");
- auto *PA = new PredicateAssume(BinOp, II, BinOp);
- addInfoFor(OpsToRename, BinOp, PA);
- } else {
- llvm_unreachable("Unknown type of condition");
}
}
}
@@ -443,68 +442,46 @@ void PredicateInfoBuilder::processBranch(
SmallVectorImpl<Value *> &OpsToRename) {
BasicBlock *FirstBB = BI->getSuccessor(0);
BasicBlock *SecondBB = BI->getSuccessor(1);
- SmallVector<BasicBlock *, 2> SuccsToProcess;
- SuccsToProcess.push_back(FirstBB);
- SuccsToProcess.push_back(SecondBB);
- SmallVector<Value *, 2> ConditionsToProcess;
-
- auto InsertHelper = [&](Value *Op, bool isAnd, bool isOr, Value *Cond) {
- for (auto *Succ : SuccsToProcess) {
- // Don't try to insert on a self-edge. This is mainly because we will
- // eliminate during renaming anyway.
- if (Succ == BranchBB)
- continue;
- bool TakenEdge = (Succ == FirstBB);
- // For and, only insert on the true edge
- // For or, only insert on the false edge
- if ((isAnd && !TakenEdge) || (isOr && TakenEdge))
+
+ for (BasicBlock *Succ : {FirstBB, SecondBB}) {
+ bool TakenEdge = Succ == FirstBB;
+ // Don't try to insert on a self-edge. This is mainly because we will
+ // eliminate during renaming anyway.
+ if (Succ == BranchBB)
+ continue;
+
+ SmallVector<Value *, 4> Worklist;
+ SmallPtrSet<Value *, 4> Visited;
+ Worklist.push_back(BI->getCondition());
+ while (!Worklist.empty()) {
+ Value *Cond = Worklist.pop_back_val();
+ if (!Visited.insert(Cond).second)
continue;
- PredicateBase *PB =
- new PredicateBranch(Op, BranchBB, Succ, Cond, TakenEdge);
- addInfoFor(OpsToRename, Op, PB);
- if (!Succ->getSinglePredecessor())
- EdgeUsesOnly.insert({BranchBB, Succ});
- }
- };
+ if (Visited.size() > MaxCondsPerBranch)
+ break;
+
+ Value *Op0, *Op1;
+ if (TakenEdge ? match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))
+ : match(Cond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
+ Worklist.push_back(Op1);
+ Worklist.push_back(Op0);
+ }
- // Match combinations of conditions.
- CmpInst::Predicate Pred;
- bool isAnd = false;
- bool isOr = false;
- SmallVector<Value *, 8> CmpOperands;
- if (match(BI->getCondition(), m_And(m_Cmp(Pred, m_Value(), m_Value()),
- m_Cmp(Pred, m_Value(), m_Value()))) ||
- match(BI->getCondition(), m_Or(m_Cmp(Pred, m_Value(), m_Value()),
- m_Cmp(Pred, m_Value(), m_Value())))) {
- auto *BinOp = cast<BinaryOperator>(BI->getCondition());
- if (BinOp->getOpcode() == Instruction::And)
- isAnd = true;
- else if (BinOp->getOpcode() == Instruction::Or)
- isOr = true;
- ConditionsToProcess.push_back(BinOp->getOperand(0));
- ConditionsToProcess.push_back(BinOp->getOperand(1));
- ConditionsToProcess.push_back(BI->getCondition());
- } else if (isa<CmpInst>(BI->getCondition())) {
- ConditionsToProcess.push_back(BI->getCondition());
- }
- for (auto Cond : ConditionsToProcess) {
- if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
- collectCmpOps(Cmp, CmpOperands);
- // Now add our copy infos for our operands
- for (auto *Op : CmpOperands)
- InsertHelper(Op, isAnd, isOr, Cmp);
- } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
- // This must be an AND or an OR.
- assert((BinOp->getOpcode() == Instruction::And ||
- BinOp->getOpcode() == Instruction::Or) &&
- "Should have been an AND or an OR");
- // The actual value of the binop is not subject to the same restrictions
- // as the comparison. It's either true or false on the true/false branch.
- InsertHelper(BinOp, false, false, BinOp);
- } else {
- llvm_unreachable("Unknown type of condition");
+ SmallVector<Value *, 4> Values;
+ Values.push_back(Cond);
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond))
+ collectCmpOps(Cmp, Values);
+
+ for (Value *V : Values) {
+ if (shouldRename(V)) {
+ PredicateBase *PB =
+ new PredicateBranch(V, BranchBB, Succ, Cond, TakenEdge);
+ addInfoFor(OpsToRename, V, PB);
+ if (!Succ->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, Succ});
+ }
+ }
}
- CmpOperands.clear();
}
}
// Process a block terminating switch, and place relevant operations to be
@@ -822,6 +799,56 @@ PredicateInfo::~PredicateInfo() {
}
}
+Optional<PredicateConstraint> PredicateBase::getConstraint() const {
+ switch (Type) {
+ case PT_Assume:
+ case PT_Branch: {
+ bool TrueEdge = true;
+ if (auto *PBranch = dyn_cast<PredicateBranch>(this))
+ TrueEdge = PBranch->TrueEdge;
+
+ if (Condition == RenamedOp) {
+ return {{CmpInst::ICMP_EQ,
+ TrueEdge ? ConstantInt::getTrue(Condition->getType())
+ : ConstantInt::getFalse(Condition->getType())}};
+ }
+
+ CmpInst *Cmp = dyn_cast<CmpInst>(Condition);
+ if (!Cmp) {
+ // TODO: Make this an assertion once RenamedOp is fully accurate.
+ return None;
+ }
+
+ CmpInst::Predicate Pred;
+ Value *OtherOp;
+ if (Cmp->getOperand(0) == RenamedOp) {
+ Pred = Cmp->getPredicate();
+ OtherOp = Cmp->getOperand(1);
+ } else if (Cmp->getOperand(1) == RenamedOp) {
+ Pred = Cmp->getSwappedPredicate();
+ OtherOp = Cmp->getOperand(0);
+ } else {
+ // TODO: Make this an assertion once RenamedOp is fully accurate.
+ return None;
+ }
+
+ // Invert predicate along false edge.
+ if (!TrueEdge)
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ return {{Pred, OtherOp}};
+ }
+ case PT_Switch:
+ if (Condition != RenamedOp) {
+ // TODO: Make this an assertion once RenamedOp is fully accurate.
+ return None;
+ }
+
+ return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}};
+ }
+ llvm_unreachable("Unknown predicate type");
+}
+
void PredicateInfo::verifyPredicateInfo() const {}
char PredicateInfoPrinterLegacyPass::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index c7e9c919ec47..86bbb6a889e6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -62,10 +62,6 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
bool llvm::isAllocaPromotable(const AllocaInst *AI) {
- // FIXME: If the memory unit is of pointer or integer type, we can permit
- // assignments to subsections of the memory unit.
- unsigned AS = AI->getType()->getAddressSpace();
-
// Only allow direct and non-volatile loads and stores...
for (const User *U : AI->users()) {
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
@@ -81,19 +77,18 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
if (SI->isVolatile())
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (!II->isLifetimeStartOrEnd())
+ if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
- return false;
- if (!onlyUsedByLifetimeMarkers(BCI))
+ if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
return false;
} else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
- return false;
if (!GEPI->hasAllZeroIndices())
return false;
- if (!onlyUsedByLifetimeMarkers(GEPI))
+ if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI))
+ return false;
+ } else if (const AddrSpaceCastInst *ASCI = dyn_cast<AddrSpaceCastInst>(U)) {
+ if (!onlyUsedByLifetimeMarkers(ASCI))
return false;
} else {
return false;
@@ -106,6 +101,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
namespace {
struct AllocaInfo {
+ using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>;
+
SmallVector<BasicBlock *, 32> DefiningBlocks;
SmallVector<BasicBlock *, 32> UsingBlocks;
@@ -113,7 +110,7 @@ struct AllocaInfo {
BasicBlock *OnlyBlock;
bool OnlyUsedInOneBlock;
- TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares;
+ DbgUserVec DbgUsers;
void clear() {
DefiningBlocks.clear();
@@ -121,7 +118,7 @@ struct AllocaInfo {
OnlyStore = nullptr;
OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
- DbgDeclares.clear();
+ DbgUsers.clear();
}
/// Scan the uses of the specified alloca, filling in the AllocaInfo used
@@ -132,8 +129,8 @@ struct AllocaInfo {
// As we scan the uses of the alloca instruction, keep track of stores,
// and decide whether all of the loads and stores to the alloca are within
// the same basic block.
- for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
- Instruction *User = cast<Instruction>(*UI++);
+ for (User *U : AI->users()) {
+ Instruction *User = cast<Instruction>(U);
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Remember the basic blocks which define new values for the alloca
@@ -154,7 +151,7 @@ struct AllocaInfo {
}
}
- DbgDeclares = FindDbgAddrUses(AI);
+ findDbgUsers(DbgUsers, AI);
}
};
@@ -252,7 +249,7 @@ struct PromoteMem2Reg {
/// For each alloca, we keep track of the dbg.declare intrinsic that
/// describes it, if any, so that we can convert it to a dbg.value
/// intrinsic if the alloca gets promoted.
- SmallVector<TinyPtrVector<DbgVariableIntrinsic *>, 8> AllocaDbgDeclares;
+ SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers;
/// The set of basic blocks the renamer has already visited.
SmallPtrSet<BasicBlock *, 16> Visited;
@@ -312,23 +309,37 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
AC->registerAssumption(CI);
}
-static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+static void removeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
// instructions except for load and store.
- for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
- Instruction *I = cast<Instruction>(*UI);
+ for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) {
+ Instruction *I = cast<Instruction>(UI->getUser());
+ Use &U = *UI;
++UI;
if (isa<LoadInst>(I) || isa<StoreInst>(I))
continue;
+ // Drop the use of AI in droppable instructions.
+ if (I->isDroppable()) {
+ I->dropDroppableUse(U);
+ continue;
+ }
+
if (!I->getType()->isVoidTy()) {
// The only users of this bitcast/GEP instruction are lifetime intrinsics.
// Follow the use/def chain to erase them now instead of leaving it for
// dead code elimination later.
- for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
- Instruction *Inst = cast<Instruction>(*UUI);
+ for (auto UUI = I->use_begin(), UUE = I->use_end(); UUI != UUE;) {
+ Instruction *Inst = cast<Instruction>(UUI->getUser());
+ Use &UU = *UUI;
++UUI;
+
+ // Drop the use of I in droppable instructions.
+ if (Inst->isDroppable()) {
+ Inst->dropDroppableUse(UU);
+ continue;
+ }
Inst->eraseFromParent();
}
}
@@ -355,8 +366,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Clear out UsingBlocks. We will reconstruct it here if needed.
Info.UsingBlocks.clear();
- for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
- Instruction *UserInst = cast<Instruction>(*UI++);
+ for (User *U : make_early_inc_range(AI->users())) {
+ Instruction *UserInst = cast<Instruction>(U);
if (UserInst == OnlyStore)
continue;
LoadInst *LI = cast<LoadInst>(UserInst);
@@ -412,10 +423,14 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Record debuginfo for the store and remove the declaration's
// debuginfo.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
- DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
- ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
- DII->eraseFromParent();
+ for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
+ if (DII->isAddressOfVariable()) {
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
+ ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
+ DII->eraseFromParent();
+ } else if (DII->getExpression()->startsWithDeref()) {
+ DII->eraseFromParent();
+ }
}
// Remove the (now dead) store and alloca.
Info.OnlyStore->eraseFromParent();
@@ -465,8 +480,8 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
- for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ for (User *U : make_early_inc_range(AI->users())) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
if (!LI)
continue;
@@ -510,9 +525,11 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
while (!AI->use_empty()) {
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
- DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
+ if (DII->isAddressOfVariable()) {
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
+ ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ }
}
SI->eraseFromParent();
LBI.deleteValue(SI);
@@ -521,8 +538,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
AI->eraseFromParent();
// The alloca's debuginfo can be removed as well.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares)
- DII->eraseFromParent();
+ for (DbgVariableIntrinsic *DII : Info.DbgUsers)
+ if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
+ DII->eraseFromParent();
++NumLocalPromoted;
return true;
@@ -531,7 +549,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
void PromoteMem2Reg::run() {
Function &F = *DT.getRoot()->getParent();
- AllocaDbgDeclares.resize(Allocas.size());
+ AllocaDbgUsers.resize(Allocas.size());
AllocaInfo Info;
LargeBlockInfo LBI;
@@ -544,7 +562,7 @@ void PromoteMem2Reg::run() {
assert(AI->getParent()->getParent() == &F &&
"All allocas should be in the same function, which is same as DF!");
- removeLifetimeIntrinsicUsers(AI);
+ removeIntrinsicUsers(AI);
if (AI->use_empty()) {
// If there are no uses of the alloca, just delete it now.
@@ -589,8 +607,8 @@ void PromoteMem2Reg::run() {
}
// Remember the dbg.declare intrinsic describing this alloca, if any.
- if (!Info.DbgDeclares.empty())
- AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares;
+ if (!Info.DbgUsers.empty())
+ AllocaDbgUsers[AllocaNum] = Info.DbgUsers;
// Keep the reverse mapping of the 'Allocas' array for the rename pass.
AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
@@ -663,9 +681,11 @@ void PromoteMem2Reg::run() {
}
// Remove alloca's dbg.declare instrinsics from the function.
- for (auto &Declares : AllocaDbgDeclares)
- for (auto *DII : Declares)
- DII->eraseFromParent();
+ for (auto &DbgUsers : AllocaDbgUsers) {
+ for (auto *DII : DbgUsers)
+ if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
+ DII->eraseFromParent();
+ }
// Loop over all of the PHI nodes and see if there are any that we can get
// rid of because they merge all of the same incoming values. This can
@@ -720,7 +740,7 @@ void PromoteMem2Reg::run() {
continue;
// Get the preds for BB.
- SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ SmallVector<BasicBlock *, 16> Preds(predecessors(BB));
// Ok, now we know that all of the PHI nodes are missing entries for some
// basic blocks. Start by sorting the incoming predecessors for efficient
@@ -887,7 +907,7 @@ NextIteration:
// operands so far. Remember this count.
unsigned NewPHINumOperands = APN->getNumOperands();
- unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
+ unsigned NumEdges = llvm::count(successors(Pred), BB);
assert(NumEdges && "Must be at least one edge from Pred to BB!");
// Add entries for all the phis.
@@ -905,8 +925,9 @@ NextIteration:
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
- for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
- ConvertDebugDeclareToDebugValue(DII, APN, DIB);
+ for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo])
+ if (DII->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DII, APN, DIB);
// Get the next phi node.
++PNI;
@@ -965,8 +986,9 @@ NextIteration:
// Record debuginfo for the store before removing it.
IncomingLocs[AllocaNo] = SI->getDebugLoc();
- for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[ai->second])
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second])
+ if (DII->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DII, SI, DIB);
BB->getInstList().erase(SI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 57df2334c750..c210d1c46077 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -64,8 +64,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
}
Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const {
- AvailableValsTy::iterator AVI = getAvailableVals(AV).find(BB);
- return (AVI != getAvailableVals(AV).end()) ? AVI->second : nullptr;
+ return getAvailableVals(AV).lookup(BB);
}
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
@@ -254,12 +253,10 @@ public:
// We can get our predecessor info by walking the pred_iterator list,
// but it is relatively slow. If we already have PHI nodes in this
// block, walk one of them to get the predecessor list instead.
- if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
- Preds->append(SomePhi->block_begin(), SomePhi->block_end());
- } else {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Preds->push_back(*PI);
- }
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin()))
+ append_range(*Preds, SomePhi->blocks());
+ else
+ append_range(*Preds, predecessors(BB));
}
/// GetUndefVal - Get an undefined value of the same type as the value
@@ -283,12 +280,6 @@ public:
PHI->addIncoming(Val, Pred);
}
- /// InstrIsPHI - Check if an instruction is a PHI.
- ///
- static PHINode *InstrIsPHI(Instruction *I) {
- return dyn_cast<PHINode>(I);
- }
-
/// ValueIsPHI - Check if a value is a PHI.
static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
return dyn_cast<PHINode>(Val);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 71b48482f26a..6dbfb0b61fea 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -38,8 +39,7 @@ cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
using namespace PatternMatch;
/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
-/// reusing an existing cast if a suitable one exists, moving an existing
-/// cast if a suitable one exists but isn't in the right place, or
+/// reusing an existing cast if a suitable one (= dominating IP) exists, or
/// creating a new one.
Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
Instruction::CastOps Op,
@@ -58,40 +58,38 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
Instruction *Ret = nullptr;
// Check to see if there is already a cast!
- for (User *U : V->users())
- if (U->getType() == Ty)
- if (CastInst *CI = dyn_cast<CastInst>(U))
- if (CI->getOpcode() == Op) {
- // If the cast isn't where we want it, create a new cast at IP.
- // Likewise, do not reuse a cast at BIP because it must dominate
- // instructions that might be inserted before BIP.
- if (BasicBlock::iterator(CI) != IP || BIP == IP) {
- // Create a new cast, and leave the old cast in place in case
- // it is being used as an insert point.
- Ret = CastInst::Create(Op, V, Ty, "", &*IP);
- Ret->takeName(CI);
- CI->replaceAllUsesWith(Ret);
- break;
- }
- Ret = CI;
- break;
- }
+ for (User *U : V->users()) {
+ if (U->getType() != Ty)
+ continue;
+ CastInst *CI = dyn_cast<CastInst>(U);
+ if (!CI || CI->getOpcode() != Op)
+ continue;
+
+ // Found a suitable cast that is at IP or comes before IP. Use it. Note that
+ // the cast must also properly dominate the Builder's insertion point.
+ if (IP->getParent() == CI->getParent() && &*BIP != CI &&
+ (&*IP == CI || CI->comesBefore(&*IP))) {
+ Ret = CI;
+ break;
+ }
+ }
// Create a new cast.
- if (!Ret)
+ if (!Ret) {
Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
+ rememberInstruction(Ret);
+ }
// We assert at the end of the function since IP might point to an
// instruction with different dominance properties than a cast
// (an invoke for example) and not dominate BIP (but the cast does).
assert(SE.DT.dominates(Ret, &*BIP));
- rememberInstruction(Ret);
return Ret;
}
-static BasicBlock::iterator findInsertPointAfter(Instruction *I,
- BasicBlock *MustDominate) {
+BasicBlock::iterator
+SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) {
BasicBlock::iterator IP = ++I->getIterator();
if (auto *II = dyn_cast<InvokeInst>(I))
IP = II->getNormalDest()->begin();
@@ -102,11 +100,17 @@ static BasicBlock::iterator findInsertPointAfter(Instruction *I,
if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
++IP;
} else if (isa<CatchSwitchInst>(IP)) {
- IP = MustDominate->getFirstInsertionPt();
+ IP = MustDominate->getParent()->getFirstInsertionPt();
} else {
assert(!IP->isEHPad() && "unexpected eh pad!");
}
+ // Adjust insert point to be after instructions inserted by the expander, so
+ // we can re-use already inserted instructions. Avoid skipping past the
+ // original \p MustDominate, in case it is an inserted instruction.
+ while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
+ ++IP;
+
return IP;
}
@@ -122,6 +126,22 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
"InsertNoopCastOfTo cannot change sizes!");
+ // inttoptr only works for integral pointers. For non-integral pointers, we
+ // can create a GEP on i8* null with the integral value as index. Note that
+ // it is safe to use GEP of null instead of inttoptr here, because only
+ // expressions already based on a GEP of null should be converted to pointers
+ // during expansion.
+ if (Op == Instruction::IntToPtr) {
+ auto *PtrTy = cast<PointerType>(Ty);
+ if (DL.isNonIntegralPointerType(PtrTy)) {
+ auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
+ assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 &&
+ "alloc size of i8 must by 1 byte for the GEP to be correct");
+ auto *GEP = Builder.CreateGEP(
+ Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep");
+ return Builder.CreateBitCast(GEP, Ty);
+ }
+ }
// Short-circuit unnecessary bitcasts.
if (Op == Instruction::BitCast) {
if (V->getType() == Ty)
@@ -166,7 +186,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
// Cast the instruction immediately after the instruction.
Instruction *I = cast<Instruction>(V);
- BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());
+ BasicBlock::iterator IP = findInsertPointAfter(I, &*Builder.GetInsertPoint());
return ReuseOrCreateCast(I, Ty, Op, IP);
}
@@ -238,7 +258,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
BO->setHasNoUnsignedWrap();
if (Flags & SCEV::FlagNSW)
BO->setHasNoSignedWrap();
- rememberInstruction(BO);
return BO;
}
@@ -290,7 +309,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor))
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
if (!C->getAPInt().srem(FC->getAPInt())) {
- SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ SmallVector<const SCEV *, 4> NewMulOps(M->operands());
NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
S = SE.getMulExpr(NewMulOps);
return true;
@@ -462,9 +481,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// we didn't find any operands that could be factored, tentatively
// assume that element zero was selected (since the zero offset
// would obviously be folded away).
- Value *Scaled = ScaledOps.empty() ?
- Constant::getNullValue(Ty) :
- expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+ Value *Scaled =
+ ScaledOps.empty()
+ ? Constant::getNullValue(Ty)
+ : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false);
GepIndices.push_back(Scaled);
// Collect struct field index operands.
@@ -523,7 +543,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
// Expand the operands for a plain byte offset.
- Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
+ Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty, false);
// Fold a GEP with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(V))
@@ -564,10 +584,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
// Emit a GEP.
- Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
- rememberInstruction(GEP);
-
- return GEP;
+ return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
}
{
@@ -598,7 +615,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Casted = InsertNoopCastOfTo(Casted, PTy);
Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
Ops.push_back(SE.getUnknown(GEP));
- rememberInstruction(GEP);
}
return expand(SE.getAddExpr(Ops));
@@ -748,14 +764,14 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
} else if (Op->isNonConstantNegative()) {
// Instead of doing a negate and add, just do a subtract.
- Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+ Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty, false);
Sum = InsertNoopCastOfTo(Sum, Ty);
Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ true);
++I;
} else {
// A simple add.
- Value *W = expandCodeFor(Op, Ty);
+ Value *W = expandCodeForImpl(Op, Ty, false);
Sum = InsertNoopCastOfTo(Sum, Ty);
// Canonicalize a constant to the RHS.
if (isa<Constant>(Sum)) std::swap(Sum, W);
@@ -807,7 +823,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
// that are needed into the result.
- Value *P = expandCodeFor(I->second, Ty);
+ Value *P = expandCodeForImpl(I->second, Ty, false);
Value *Result = nullptr;
if (Exponent & 1)
Result = P;
@@ -866,7 +882,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *LHS = expandCodeFor(S->getLHS(), Ty);
+ Value *LHS = expandCodeForImpl(S->getLHS(), Ty, false);
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
const APInt &RHS = SC->getAPInt();
if (RHS.isPowerOf2())
@@ -875,7 +891,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
}
- Value *RHS = expandCodeFor(S->getRHS(), Ty);
+ Value *RHS = expandCodeForImpl(S->getRHS(), Ty, false);
return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
@@ -895,7 +911,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
}
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
Base = A->getOperand(A->getNumOperands()-1);
- SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
+ SmallVector<const SCEV *, 8> NewAddOps(A->operands());
NewAddOps.back() = Rest;
Rest = SE.getAddExpr(NewAddOps);
ExposePointerBase(Base, Rest, SE);
@@ -1073,15 +1089,12 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
GEPPtrTy->getAddressSpace());
IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN);
- if (IncV->getType() != PN->getType()) {
+ if (IncV->getType() != PN->getType())
IncV = Builder.CreateBitCast(IncV, PN->getType());
- rememberInstruction(IncV);
- }
} else {
IncV = useSubtract ?
Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
- rememberInstruction(IncV);
}
return IncV;
}
@@ -1193,6 +1206,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (!SE.isSCEVable(PN.getType()))
continue;
+ // We should not look for a incomplete PHI. Getting SCEV for a incomplete
+ // PHI has no meaning at all.
+ if (!PN.isComplete()) {
+ DEBUG_WITH_TYPE(
+ DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
+ continue;
+ }
+
const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
if (!PhiSCEV)
continue;
@@ -1253,6 +1274,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
InsertedValues.insert(AddRecPhiMatch);
// Remember the increment.
rememberInstruction(IncV);
+ // Those values were not actually inserted but re-used.
+ ReusedValues.insert(AddRecPhiMatch);
+ ReusedValues.insert(IncV);
return AddRecPhiMatch;
}
}
@@ -1273,8 +1297,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Expand code for the start value into the loop preheader.
assert(L->getLoopPreheader() &&
"Can't expand add recurrences without a loop preheader!");
- Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
- L->getLoopPreheader()->getTerminator());
+ Value *StartV =
+ expandCodeForImpl(Normalized->getStart(), ExpandTy,
+ L->getLoopPreheader()->getTerminator(), false);
// StartV must have been be inserted into L's preheader to dominate the new
// phi.
@@ -1292,7 +1317,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
// Expand the step somewhere that dominates the loop header.
- Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+ Value *StepV = expandCodeForImpl(
+ Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false);
// The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
// we actually do emit an addition. It does not apply if we emit a
@@ -1306,7 +1332,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
Twine(IVName) + ".iv");
- rememberInstruction(PN);
// Create the step instructions and populate the PHI.
for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
@@ -1415,6 +1440,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
assert(LatchBlock && "PostInc mode requires a unique loop latch!");
Result = PN->getIncomingValueForBlock(LatchBlock);
+ // We might be introducing a new use of the post-inc IV that is not poison
+ // safe, in which case we should drop poison generating flags. Only keep
+ // those flags for which SCEV has proven that they always hold.
+ if (isa<OverflowingBinaryOperator>(Result)) {
+ auto *I = cast<Instruction>(Result);
+ if (!S->hasNoUnsignedWrap())
+ I->setHasNoUnsignedWrap(false);
+ if (!S->hasNoSignedWrap())
+ I->setHasNoSignedWrap(false);
+ }
+
// For an expansion to use the postinc form, the client must call
// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
// or dominated by IVIncInsertPos.
@@ -1438,7 +1474,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
{
// Expand the step somewhere that dominates the loop header.
SCEVInsertPointGuard Guard(Builder, this);
- StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+ StepV = expandCodeForImpl(
+ Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false);
}
Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
}
@@ -1452,16 +1489,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
if (ResTy != SE.getEffectiveSCEVType(ResTy))
Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
// Truncate the result.
- if (TruncTy != Result->getType()) {
+ if (TruncTy != Result->getType())
Result = Builder.CreateTrunc(Result, TruncTy);
- rememberInstruction(Result);
- }
+
// Invert the result.
- if (InvertStep) {
- Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
- Result);
- rememberInstruction(Result);
- }
+ if (InvertStep)
+ Result = Builder.CreateSub(
+ expandCodeForImpl(Normalized->getStart(), TruncTy, false), Result);
}
// Re-apply any non-loop-dominating scale.
@@ -1469,24 +1503,22 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
Result = InsertNoopCastOfTo(Result, IntTy);
Result = Builder.CreateMul(Result,
- expandCodeFor(PostLoopScale, IntTy));
- rememberInstruction(Result);
+ expandCodeForImpl(PostLoopScale, IntTy, false));
}
// Re-apply any non-loop-dominating offset.
if (PostLoopOffset) {
if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
if (Result->getType()->isIntegerTy()) {
- Value *Base = expandCodeFor(PostLoopOffset, ExpandTy);
+ Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy, false);
Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base);
} else {
Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result);
}
} else {
Result = InsertNoopCastOfTo(Result, IntTy);
- Result = Builder.CreateAdd(Result,
- expandCodeFor(PostLoopOffset, IntTy));
- rememberInstruction(Result);
+ Result = Builder.CreateAdd(
+ Result, expandCodeForImpl(PostLoopOffset, IntTy, false));
}
}
@@ -1527,15 +1559,15 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
S->getNoWrapFlags(SCEV::FlagNW)));
BasicBlock::iterator NewInsertPt =
- findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());
- V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
- &*NewInsertPt);
+ findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
+ V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
+ &*NewInsertPt, false);
return V;
}
// {X,+,F} --> X + {0,+,F}
if (!S->getStart()->isZero()) {
- SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
+ SmallVector<const SCEV *, 4> NewOps(S->operands());
NewOps[0] = SE.getConstant(Ty, 0);
const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
S->getNoWrapFlags(SCEV::FlagNW));
@@ -1642,31 +1674,34 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
return expand(T);
}
+Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
+ Value *V =
+ expandCodeForImpl(S->getOperand(), S->getOperand()->getType(), false);
+ return Builder.CreatePtrToInt(V, S->getType());
+}
+
Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeFor(S->getOperand(),
- SE.getEffectiveSCEVType(S->getOperand()->getType()));
- Value *I = Builder.CreateTrunc(V, Ty);
- rememberInstruction(I);
- return I;
+ Value *V = expandCodeForImpl(
+ S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()),
+ false);
+ return Builder.CreateTrunc(V, Ty);
}
Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeFor(S->getOperand(),
- SE.getEffectiveSCEVType(S->getOperand()->getType()));
- Value *I = Builder.CreateZExt(V, Ty);
- rememberInstruction(I);
- return I;
+ Value *V = expandCodeForImpl(
+ S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()),
+ false);
+ return Builder.CreateZExt(V, Ty);
}
Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeFor(S->getOperand(),
- SE.getEffectiveSCEVType(S->getOperand()->getType()));
- Value *I = Builder.CreateSExt(V, Ty);
- rememberInstruction(I);
- return I;
+ Value *V = expandCodeForImpl(
+ S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()),
+ false);
+ return Builder.CreateSExt(V, Ty);
}
Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
@@ -1680,11 +1715,9 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
- Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
- rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
- rememberInstruction(Sel);
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1705,11 +1738,9 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
- Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
- rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
- rememberInstruction(Sel);
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1730,11 +1761,9 @@ Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
- Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
- rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
- rememberInstruction(Sel);
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1755,11 +1784,9 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
- Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
- rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
- rememberInstruction(Sel);
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1769,15 +1796,45 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
return LHS;
}
-Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
- Instruction *IP) {
+Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,
+ Instruction *IP, bool Root) {
setInsertPoint(IP);
- return expandCodeFor(SH, Ty);
+ Value *V = expandCodeForImpl(SH, Ty, Root);
+ return V;
}
-Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
+Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
// Expand the code for this SCEV.
Value *V = expand(SH);
+
+ if (PreserveLCSSA) {
+ if (auto *Inst = dyn_cast<Instruction>(V)) {
+ // Create a temporary instruction to at the current insertion point, so we
+ // can hand it off to the helper to create LCSSA PHIs if required for the
+ // new use.
+ // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
+ // would accept a insertion point and return an LCSSA phi for that
+ // insertion point, so there is no need to insert & remove the temporary
+ // instruction.
+ Instruction *Tmp;
+ if (Inst->getType()->isIntegerTy())
+ Tmp =
+ cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user"));
+ else {
+ assert(Inst->getType()->isPointerTy());
+ Tmp = cast<Instruction>(
+ Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user"));
+ }
+ V = fixupLCSSAFormFor(Tmp, 0);
+
+ // Clean up temporary instruction.
+ InsertedValues.erase(Tmp);
+ InsertedPostIncValues.erase(Tmp);
+ Tmp->eraseFromParent();
+ }
+ }
+
+ InsertedExpressions[std::make_pair(SH, &*Builder.GetInsertPoint())] = V;
if (Ty) {
assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
"non-trivial casts should be done with the SCEVs directly!");
@@ -1861,20 +1918,17 @@ Value *SCEVExpander::expand(const SCEV *S) {
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
InsertPt = &*L->getHeader()->getFirstInsertionPt();
+
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
(isInsertedInstruction(InsertPt) ||
- isa<DbgInfoIntrinsic>(InsertPt)))
+ isa<DbgInfoIntrinsic>(InsertPt))) {
InsertPt = &*std::next(InsertPt->getIterator());
+ }
break;
}
}
}
- // IndVarSimplify sometimes sets the insertion point at the block start, even
- // when there are PHIs at that point. We must correct for this.
- if (isa<PHINode>(*InsertPt))
- InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
-
// Check to see if we already expanded this here.
auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
if (I != InsertedExpressions.end())
@@ -1922,32 +1976,25 @@ Value *SCEVExpander::expand(const SCEV *S) {
}
void SCEVExpander::rememberInstruction(Value *I) {
- if (!PostIncLoops.empty())
- InsertedPostIncValues.insert(I);
- else
- InsertedValues.insert(I);
-}
-
-/// getOrInsertCanonicalInductionVariable - This method returns the
-/// canonical induction variable of the specified type for the specified
-/// loop (inserting one if there is none). A canonical induction variable
-/// starts at zero and steps by one on each iteration.
-PHINode *
-SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
- Type *Ty) {
- assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
-
- // Build a SCEV for {0,+,1}<L>.
- // Conservatively use FlagAnyWrap for now.
- const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
- SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
+ auto DoInsert = [this](Value *V) {
+ if (!PostIncLoops.empty())
+ InsertedPostIncValues.insert(V);
+ else
+ InsertedValues.insert(V);
+ };
+ DoInsert(I);
- // Emit code for it.
- SCEVInsertPointGuard Guard(Builder, this);
- PHINode *V =
- cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
+ if (!PreserveLCSSA)
+ return;
- return V;
+ if (auto *Inst = dyn_cast<Instruction>(I)) {
+ // A new instruction has been added, which might introduce new uses outside
+ // a defining loop. Fix LCSSA from for each operand of the new instruction,
+ // if required.
+ for (unsigned OpIdx = 0, OpEnd = Inst->getNumOperands(); OpIdx != OpEnd;
+ OpIdx++)
+ fixupLCSSAFormFor(Inst, OpIdx);
+ }
}
/// replaceCongruentIVs - Check for congruent phis in this loop header and
@@ -1970,8 +2017,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// Put pointers at the back and make sure pointer < pointer = false.
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
- return RHS->getType()->getPrimitiveSizeInBits() <
- LHS->getType()->getPrimitiveSizeInBits();
+ return RHS->getType()->getPrimitiveSizeInBits().getFixedSize() <
+ LHS->getType()->getPrimitiveSizeInBits().getFixedSize();
});
unsigned NumElim = 0;
@@ -2079,6 +2126,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
}
DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: "
<< *Phi << '\n');
+ DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Original iv: "
+ << *OrigPhiRef << '\n');
++NumElim;
Value *NewIV = OrigPhiRef;
if (OrigPhiRef->getType() != Phi->getType()) {
@@ -2092,15 +2141,6 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
-Value *SCEVExpander::getExactExistingExpansion(const SCEV *S,
- const Instruction *At, Loop *L) {
- Optional<ScalarEvolution::ValueOffsetPair> VO =
- getRelatedExistingExpansion(S, At, L);
- if (VO && VO.getValue().second == nullptr)
- return VO.getValue().first;
- return nullptr;
-}
-
Optional<ScalarEvolution::ValueOffsetPair>
SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
Loop *L) {
@@ -2139,15 +2179,156 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
return None;
}
+template<typename T> static int costAndCollectOperands(
+ const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
+ TargetTransformInfo::TargetCostKind CostKind,
+ SmallVectorImpl<SCEVOperand> &Worklist) {
+
+ const T *S = cast<T>(WorkItem.S);
+ int Cost = 0;
+ // Object to help map SCEV operands to expanded IR instructions.
+ struct OperationIndices {
+ OperationIndices(unsigned Opc, size_t min, size_t max) :
+ Opcode(Opc), MinIdx(min), MaxIdx(max) { }
+ unsigned Opcode;
+ size_t MinIdx;
+ size_t MaxIdx;
+ };
+
+ // Collect the operations of all the instructions that will be needed to
+ // expand the SCEVExpr. This is so that when we come to cost the operands,
+ // we know what the generated user(s) will be.
+ SmallVector<OperationIndices, 2> Operations;
+
+ auto CastCost = [&](unsigned Opcode) {
+ Operations.emplace_back(Opcode, 0, 0);
+ return TTI.getCastInstrCost(Opcode, S->getType(),
+ S->getOperand(0)->getType(),
+ TTI::CastContextHint::None, CostKind);
+ };
+
+ auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
+ unsigned MinIdx = 0, unsigned MaxIdx = 1) {
+ Operations.emplace_back(Opcode, MinIdx, MaxIdx);
+ return NumRequired *
+ TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
+ };
+
+ auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired,
+ unsigned MinIdx, unsigned MaxIdx) {
+ Operations.emplace_back(Opcode, MinIdx, MaxIdx);
+ Type *OpType = S->getOperand(0)->getType();
+ return NumRequired * TTI.getCmpSelInstrCost(
+ Opcode, OpType, CmpInst::makeCmpResultType(OpType),
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ };
+
+ switch (S->getSCEVType()) {
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ case scUnknown:
+ case scConstant:
+ return 0;
+ case scPtrToInt:
+ Cost = CastCost(Instruction::PtrToInt);
+ break;
+ case scTruncate:
+ Cost = CastCost(Instruction::Trunc);
+ break;
+ case scZeroExtend:
+ Cost = CastCost(Instruction::ZExt);
+ break;
+ case scSignExtend:
+ Cost = CastCost(Instruction::SExt);
+ break;
+ case scUDivExpr: {
+ unsigned Opcode = Instruction::UDiv;
+ if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
+ if (SC->getAPInt().isPowerOf2())
+ Opcode = Instruction::LShr;
+ Cost = ArithCost(Opcode, 1);
+ break;
+ }
+ case scAddExpr:
+ Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
+ break;
+ case scMulExpr:
+ // TODO: this is a very pessimistic cost modelling for Mul,
+ // because of Bin Pow algorithm actually used by the expander,
+ // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
+ Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
+ break;
+ case scSMaxExpr:
+ case scUMaxExpr:
+ case scSMinExpr:
+ case scUMinExpr: {
+ Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
+ Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
+ break;
+ }
+ case scAddRecExpr: {
+ // In this polynominal, we may have some zero operands, and we shouldn't
+ // really charge for those. So how many non-zero coeffients are there?
+ int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
+ return !Op->isZero();
+ });
+
+ assert(NumTerms >= 1 && "Polynominal should have at least one term.");
+ assert(!(*std::prev(S->operands().end()))->isZero() &&
+ "Last operand should not be zero");
+
+ // Ignoring constant term (operand 0), how many of the coeffients are u> 1?
+ int NumNonZeroDegreeNonOneTerms =
+ llvm::count_if(S->operands(), [](const SCEV *Op) {
+ auto *SConst = dyn_cast<SCEVConstant>(Op);
+ return !SConst || SConst->getAPInt().ugt(1);
+ });
+
+ // Much like with normal add expr, the polynominal will require
+ // one less addition than the number of it's terms.
+ int AddCost = ArithCost(Instruction::Add, NumTerms - 1,
+ /*MinIdx*/1, /*MaxIdx*/1);
+ // Here, *each* one of those will require a multiplication.
+ int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
+ Cost = AddCost + MulCost;
+
+ // What is the degree of this polynominal?
+ int PolyDegree = S->getNumOperands() - 1;
+ assert(PolyDegree >= 1 && "Should be at least affine.");
+
+ // The final term will be:
+ // Op_{PolyDegree} * x ^ {PolyDegree}
+ // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
+ // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
+ // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
+ // FIXME: this is conservatively correct, but might be overly pessimistic.
+ Cost += MulCost * (PolyDegree - 1);
+ break;
+ }
+ }
+
+ for (auto &CostOp : Operations) {
+ for (auto SCEVOp : enumerate(S->operands())) {
+ // Clamp the index to account for multiple IR operations being chained.
+ size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
+ size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
+ Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
+ }
+ }
+ return Cost;
+}
+
bool SCEVExpander::isHighCostExpansionHelper(
- const SCEV *S, Loop *L, const Instruction &At, int &BudgetRemaining,
- const TargetTransformInfo &TTI, SmallPtrSetImpl<const SCEV *> &Processed,
- SmallVectorImpl<const SCEV *> &Worklist) {
+ const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
+ int &BudgetRemaining, const TargetTransformInfo &TTI,
+ SmallPtrSetImpl<const SCEV *> &Processed,
+ SmallVectorImpl<SCEVOperand> &Worklist) {
if (BudgetRemaining < 0)
return true; // Already run out of budget, give up.
+ const SCEV *S = WorkItem.S;
// Was the cost of expansion of this expression already accounted for?
- if (!Processed.insert(S).second)
+ if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
return false; // We have already accounted for this expression.
// If we can find an existing value for this scev available at the point "At"
@@ -2155,52 +2336,37 @@ bool SCEVExpander::isHighCostExpansionHelper(
if (getRelatedExistingExpansion(S, &At, L))
return false; // Consider the expression to be free.
- switch (S->getSCEVType()) {
- case scUnknown:
- case scConstant:
- return false; // Assume to be zero-cost.
- }
-
TargetTransformInfo::TargetCostKind CostKind =
- TargetTransformInfo::TCK_RecipThroughput;
+ L->getHeader()->getParent()->hasMinSize()
+ ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_RecipThroughput;
- if (auto *CastExpr = dyn_cast<SCEVCastExpr>(S)) {
- unsigned Opcode;
- switch (S->getSCEVType()) {
- case scTruncate:
- Opcode = Instruction::Trunc;
- break;
- case scZeroExtend:
- Opcode = Instruction::ZExt;
- break;
- case scSignExtend:
- Opcode = Instruction::SExt;
- break;
- default:
- llvm_unreachable("There are no other cast types.");
- }
- const SCEV *Op = CastExpr->getOperand();
- BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(),
- /*Src=*/Op->getType(), CostKind);
- Worklist.emplace_back(Op);
+ switch (S->getSCEVType()) {
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ case scUnknown:
+ // Assume to be zero-cost.
+ return false;
+ case scConstant: {
+ // Only evalulate the costs of constants when optimizing for size.
+ if (CostKind != TargetTransformInfo::TCK_CodeSize)
+ return 0;
+ const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
+ Type *Ty = S->getType();
+ BudgetRemaining -= TTI.getIntImmCostInst(
+ WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
+ return BudgetRemaining < 0;
+ }
+ case scTruncate:
+ case scPtrToInt:
+ case scZeroExtend:
+ case scSignExtend: {
+ int Cost =
+ costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
+ BudgetRemaining -= Cost;
return false; // Will answer upon next entry into this function.
}
-
- if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
- // If the divisor is a power of two count this as a logical right-shift.
- if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) {
- if (SC->getAPInt().isPowerOf2()) {
- BudgetRemaining -=
- TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(),
- CostKind);
- // Note that we don't count the cost of RHS, because it is a constant,
- // and we consider those to be free. But if that changes, we would need
- // to log2() it first before calling isHighCostExpansionHelper().
- Worklist.emplace_back(UDivExpr->getLHS());
- return false; // Will answer upon next entry into this function.
- }
- }
-
+ case scUDivExpr: {
// UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
// HowManyLessThans produced to compute a precise expression, rather than a
// UDiv from the user's code. If we can't find a UDiv in the code with some
@@ -2213,117 +2379,36 @@ bool SCEVExpander::isHighCostExpansionHelper(
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
return false; // Consider it to be free.
+ int Cost =
+ costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
// Need to count the cost of this UDiv.
- BudgetRemaining -=
- TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(),
- CostKind);
- Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()});
+ BudgetRemaining -= Cost;
return false; // Will answer upon next entry into this function.
}
-
- if (const auto *NAry = dyn_cast<SCEVAddRecExpr>(S)) {
- Type *OpType = NAry->getType();
-
- assert(NAry->getNumOperands() >= 2 &&
- "Polynomial should be at least linear");
-
- int AddCost =
- TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
- int MulCost =
- TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
-
- // In this polynominal, we may have some zero operands, and we shouldn't
- // really charge for those. So how many non-zero coeffients are there?
- int NumTerms = llvm::count_if(NAry->operands(),
- [](const SCEV *S) { return !S->isZero(); });
- assert(NumTerms >= 1 && "Polynominal should have at least one term.");
- assert(!(*std::prev(NAry->operands().end()))->isZero() &&
- "Last operand should not be zero");
-
- // Much like with normal add expr, the polynominal will require
- // one less addition than the number of it's terms.
- BudgetRemaining -= AddCost * (NumTerms - 1);
- if (BudgetRemaining < 0)
- return true;
-
- // Ignoring constant term (operand 0), how many of the coeffients are u> 1?
- int NumNonZeroDegreeNonOneTerms =
- llvm::count_if(make_range(std::next(NAry->op_begin()), NAry->op_end()),
- [](const SCEV *S) {
- auto *SConst = dyn_cast<SCEVConstant>(S);
- return !SConst || SConst->getAPInt().ugt(1);
- });
- // Here, *each* one of those will require a multiplication.
- BudgetRemaining -= MulCost * NumNonZeroDegreeNonOneTerms;
- if (BudgetRemaining < 0)
- return true;
-
- // What is the degree of this polynominal?
- int PolyDegree = NAry->getNumOperands() - 1;
- assert(PolyDegree >= 1 && "Should be at least affine.");
-
- // The final term will be:
- // Op_{PolyDegree} * x ^ {PolyDegree}
- // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
- // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
- // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
- // FIXME: this is conservatively correct, but might be overly pessimistic.
- BudgetRemaining -= MulCost * (PolyDegree - 1);
- if (BudgetRemaining < 0)
- return true;
-
- // And finally, the operands themselves should fit within the budget.
- Worklist.insert(Worklist.end(), NAry->operands().begin(),
- NAry->operands().end());
- return false; // So far so good, though ops may be too costly?
- }
-
- if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
- Type *OpType = NAry->getType();
-
- int PairCost;
- switch (S->getSCEVType()) {
- case scAddExpr:
- PairCost =
- TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
- break;
- case scMulExpr:
- // TODO: this is a very pessimistic cost modelling for Mul,
- // because of Bin Pow algorithm actually used by the expander,
- // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
- PairCost =
- TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
- break;
- case scSMaxExpr:
- case scUMaxExpr:
- case scSMinExpr:
- case scUMinExpr:
- PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType,
- CmpInst::makeCmpResultType(OpType),
- CostKind) +
- TTI.getCmpSelInstrCost(Instruction::Select, OpType,
- CmpInst::makeCmpResultType(OpType),
- CostKind);
- break;
- default:
- llvm_unreachable("There are no other variants here.");
- }
-
- assert(NAry->getNumOperands() > 1 &&
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr: {
+ assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
"Nary expr should have more than 1 operand.");
// The simple nary expr will require one less op (or pair of ops)
// than the number of it's terms.
- BudgetRemaining -= PairCost * (NAry->getNumOperands() - 1);
- if (BudgetRemaining < 0)
- return true;
-
- // And finally, the operands themselves should fit within the budget.
- Worklist.insert(Worklist.end(), NAry->operands().begin(),
- NAry->operands().end());
- return false; // So far so good, though ops may be too costly?
+ int Cost =
+ costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
+ BudgetRemaining -= Cost;
+ return BudgetRemaining < 0;
}
-
- llvm_unreachable("No other scev expressions possible.");
+ case scAddRecExpr: {
+ assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
+ "Polynomial should be at least linear");
+ BudgetRemaining -= costAndCollectOperands<SCEVAddRecExpr>(
+ WorkItem, TTI, CostKind, Worklist);
+ return BudgetRemaining < 0;
+ }
+ }
+ llvm_unreachable("Unknown SCEV kind!");
}
Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
@@ -2344,8 +2429,10 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
Instruction *IP) {
- Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP);
- Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+ Value *Expr0 =
+ expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP, false);
+ Value *Expr1 =
+ expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP, false);
Builder.SetInsertPoint(IP);
auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
@@ -2361,7 +2448,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
const SCEV *ExitCount =
SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
- assert(ExitCount != SE.getCouldNotCompute() && "Invalid loop count");
+ assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
const SCEV *Step = AR->getStepRecurrence(SE);
const SCEV *Start = AR->getStart();
@@ -2377,15 +2464,16 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits);
Builder.SetInsertPoint(Loc);
- Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
+ Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc, false);
IntegerType *Ty =
IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
- Value *StepValue = expandCodeFor(Step, Ty, Loc);
- Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
- Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
+ Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false);
+ Value *NegStepValue =
+ expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false);
+ Value *StartValue = expandCodeForImpl(Start, ARExpandTy, Loc, false);
ConstantInt *Zero =
ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2445,8 +2533,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
}
- EndCheck = Builder.CreateOr(EndCheck, OfMul);
- return EndCheck;
+ return Builder.CreateOr(EndCheck, OfMul);
}
Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
@@ -2489,6 +2576,34 @@ Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
return Check;
}
+Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) {
+ assert(PreserveLCSSA);
+ SmallVector<Instruction *, 1> ToUpdate;
+
+ auto *OpV = User->getOperand(OpIdx);
+ auto *OpI = dyn_cast<Instruction>(OpV);
+ if (!OpI)
+ return OpV;
+
+ Loop *DefLoop = SE.LI.getLoopFor(OpI->getParent());
+ Loop *UseLoop = SE.LI.getLoopFor(User->getParent());
+ if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
+ return OpV;
+
+ ToUpdate.push_back(OpI);
+ SmallVector<PHINode *, 16> PHIsToRemove;
+ formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove);
+ for (PHINode *PN : PHIsToRemove) {
+ if (!PN->use_empty())
+ continue;
+ InsertedValues.erase(PN);
+ InsertedPostIncValues.erase(PN);
+ PN->eraseFromParent();
+ }
+
+ return User->getOperand(OpIdx);
+}
+
namespace {
// Search for a SCEV subexpression that is not safe to expand. Any expression
// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
@@ -2566,4 +2681,40 @@ bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
}
return false;
}
+
+SCEVExpanderCleaner::~SCEVExpanderCleaner() {
+ // Result is used, nothing to remove.
+ if (ResultUsed)
+ return;
+
+ auto InsertedInstructions = Expander.getAllInsertedInstructions();
+#ifndef NDEBUG
+ SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
+ InsertedInstructions.end());
+ (void)InsertedSet;
+#endif
+ // Remove sets with value handles.
+ Expander.clear();
+
+ // Sort so that earlier instructions do not dominate later instructions.
+ stable_sort(InsertedInstructions, [this](Instruction *A, Instruction *B) {
+ return DT.dominates(B, A);
+ });
+ // Remove all inserted instructions.
+ for (Instruction *I : InsertedInstructions) {
+
+#ifndef NDEBUG
+ assert(all_of(I->users(),
+ [&InsertedSet](Value *U) {
+ return InsertedSet.contains(cast<Instruction>(U));
+ }) &&
+ "removed instruction should only be used by instructions inserted "
+ "during expansion");
+#endif
+ assert(!I->getType()->isVoidTy() &&
+ "inserted instruction should have non-void types");
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
+}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b450d71c996c..de9560df9785 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -13,8 +13,11 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -58,6 +61,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -67,6 +71,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -85,6 +90,12 @@ using namespace PatternMatch;
#define DEBUG_TYPE "simplifycfg"
+cl::opt<bool> llvm::RequireAndPreserveDomTree(
+ "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
+ "into preserving DomTree,"));
+
// Chosen as 2 so as to be cheap, but still to have enough power to fold
// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
// To catch this, we need to fold a compare and a select, hence '2' being the
@@ -105,6 +116,10 @@ static cl::opt<bool> DupRet(
cl::desc("Duplicate return instructions into unconditional branches"));
static cl::opt<bool>
+ HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
+ cl::desc("Hoist common instructions up to the parent block"));
+
+static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
@@ -138,6 +153,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
cl::desc("Max size of a block which is still considered "
"small enough to thread through"));
+// Two is chosen to allow one negation and a logical combine.
+static cl::opt<unsigned>
+ BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
+ cl::init(2),
+ cl::desc("Maximum cost of combining conditions when "
+ "folding branches"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -147,9 +169,22 @@ STATISTIC(
NumLookupTablesHoles,
"Number of switch instructions turned into lookup tables (holes checked)");
STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
-STATISTIC(NumSinkCommons,
+STATISTIC(NumFoldValueComparisonIntoPredecessors,
+ "Number of value comparisons folded into predecessor basic blocks");
+STATISTIC(NumFoldBranchToCommonDest,
+ "Number of branches folded into predecessor basic block");
+STATISTIC(
+ NumHoistCommonCode,
+ "Number of common instruction 'blocks' hoisted up to the begin block");
+STATISTIC(NumHoistCommonInstrs,
+ "Number of common instructions hoisted up to the begin block");
+STATISTIC(NumSinkCommonCode,
+ "Number of common instruction 'blocks' sunk down to the end block");
+STATISTIC(NumSinkCommonInstrs,
"Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+STATISTIC(NumInvokes,
+ "Number of invokes with empty resume blocks simplified into calls");
namespace {
@@ -182,8 +217,9 @@ struct ValueEqualityComparisonCase {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
+ DomTreeUpdater *DTU;
const DataLayout &DL;
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
+ ArrayRef<WeakVH> LoopHeaders;
const SimplifyCFGOptions &Options;
bool Resimplify;
@@ -193,6 +229,9 @@ class SimplifyCFGOpt {
bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
BasicBlock *Pred,
IRBuilder<> &Builder);
+ bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
+ Instruction *PTI,
+ IRBuilder<> &Builder);
bool FoldValueComparisonIntoPredecessors(Instruction *TI,
IRBuilder<> &Builder);
@@ -225,13 +264,18 @@ class SimplifyCFGOpt {
bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
public:
- SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
+ const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
const SimplifyCFGOptions &Opts)
- : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
+ : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
+ assert((!DTU || !DTU->hasPostDomTree()) &&
+ "SimplifyCFG is not yet capable of maintaining validity of a "
+ "PostDomTree, so don't ask for it.");
+ }
- bool run(BasicBlock *BB);
bool simplifyOnce(BasicBlock *BB);
+ bool simplifyOnceImpl(BasicBlock *BB);
+ bool run(BasicBlock *BB);
// Helper to set Resimplify and return change indication.
bool requestResimplify() {
@@ -273,46 +317,6 @@ SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
return !Fail;
}
-/// Return true if it is safe and profitable to merge these two terminator
-/// instructions together, where SI1 is an unconditional branch. PhiNodes will
-/// store all PHI nodes in common successors.
-static bool
-isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
- Instruction *Cond,
- SmallVectorImpl<PHINode *> &PhiNodes) {
- if (SI1 == SI2)
- return false; // Can't merge with self!
- assert(SI1->isUnconditional() && SI2->isConditional());
-
- // We fold the unconditional branch if we can easily update all PHI nodes in
- // common successors:
- // 1> We have a constant incoming value for the conditional branch;
- // 2> We have "Cond" as the incoming value for the unconditional branch;
- // 3> SI2->getCondition() and Cond have same operands.
- CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
- if (!Ci2)
- return false;
- if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
- Cond->getOperand(1) == Ci2->getOperand(1)) &&
- !(Cond->getOperand(0) == Ci2->getOperand(1) &&
- Cond->getOperand(1) == Ci2->getOperand(0)))
- return false;
-
- BasicBlock *SI1BB = SI1->getParent();
- BasicBlock *SI2BB = SI2->getParent();
- SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
- for (BasicBlock *Succ : successors(SI2BB))
- if (SI1Succs.count(Succ))
- for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
- PHINode *PN = cast<PHINode>(BBI);
- if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
- !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
- return false;
- PhiNodes.push_back(PN);
- }
- return true;
-}
-
/// Update PHI nodes in Succ to indicate that there will now be entries in it
/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
/// will be the same as those coming in from ExistPred, an existing predecessor
@@ -651,7 +655,7 @@ private:
/// vector.
/// One "Extra" case is allowed to differ from the other.
void gather(Value *V) {
- bool isEQ = (cast<Instruction>(V)->getOpcode() == Instruction::Or);
+ bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
// Keep a stack (SmallVector for efficiency) for depth-first traversal
SmallVector<Value *, 8> DFT;
@@ -666,11 +670,14 @@ private:
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If it is a || (or && depending on isEQ), process the operands.
- if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) {
- if (Visited.insert(I->getOperand(1)).second)
- DFT.push_back(I->getOperand(1));
- if (Visited.insert(I->getOperand(0)).second)
- DFT.push_back(I->getOperand(0));
+ Value *Op0, *Op1;
+ if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
+ : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
+ if (Visited.insert(Op1).second)
+ DFT.push_back(Op1);
+ if (Visited.insert(Op0).second)
+ DFT.push_back(Op0);
+
continue;
}
@@ -765,7 +772,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
static void
EliminateBlockCases(BasicBlock *BB,
std::vector<ValueEqualityComparisonCase> &Cases) {
- Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
+ llvm::erase_value(Cases, BB);
}
/// Return true if there are any keys in C1 that exist in C2 as well.
@@ -875,13 +882,18 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
(void)NI;
// Remove PHI node entries for the dead edge.
- ThisCases[0].Dest->removePredecessor(TI->getParent());
+ ThisCases[0].Dest->removePredecessor(PredDef);
LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI << "Leaving: " << *NI
<< "\n");
EraseTerminatorAndDCECond(TI);
+
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
+
return true;
}
@@ -894,13 +906,25 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
+ SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases;
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
+ auto *Successor = i->getCaseSuccessor();
+ ++NumPerSuccessorCases[Successor];
if (DeadCases.count(i->getCaseValue())) {
- i->getCaseSuccessor()->removePredecessor(TI->getParent());
+ Successor->removePredecessor(PredDef);
SI.removeCase(i);
+ --NumPerSuccessorCases[Successor];
}
}
+
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, PredDef, I.first});
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
}
@@ -930,12 +954,16 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
if (!TheRealDest)
TheRealDest = ThisDef;
+ SmallSetVector<BasicBlock *, 2> RemovedSuccs;
+
// Remove PHI node entries for dead edges.
BasicBlock *CheckEdge = TheRealDest;
for (BasicBlock *Succ : successors(TIBB))
- if (Succ != CheckEdge)
+ if (Succ != CheckEdge) {
+ if (Succ != TheRealDest)
+ RemovedSuccs.insert(Succ);
Succ->removePredecessor(TIBB);
- else
+ } else
CheckEdge = nullptr;
// Insert the new branch.
@@ -947,6 +975,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
<< "\n");
EraseTerminatorAndDCECond(TI);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.reserve(RemovedSuccs.size());
+ for (auto *RemovedSucc : RemovedSuccs)
+ Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
@@ -1014,219 +1049,300 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) {
}
}
-/// The specified terminator is a value equality comparison instruction
-/// (either a switch or a branch on "X == c").
-/// See if any of the predecessors of the terminator block are value comparisons
-/// on the same value. If so, and if safe to do so, fold them together.
-bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
- IRBuilder<> &Builder) {
- BasicBlock *BB = TI->getParent();
- Value *CV = isValueEqualityComparison(TI); // CondVal
- assert(CV && "Not a comparison?");
- bool Changed = false;
+static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
+ BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
+ Instruction *PTI = PredBlock->getTerminator();
- SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
- while (!Preds.empty()) {
- BasicBlock *Pred = Preds.pop_back_val();
+ // If we have bonus instructions, clone them into the predecessor block.
+ // Note that there may be multiple predecessor blocks, so we cannot move
+ // bonus instructions to a predecessor block.
+ for (Instruction &BonusInst : *BB) {
+ if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator())
+ continue;
- // See if the predecessor is a comparison with the same value.
- Instruction *PTI = Pred->getTerminator();
- Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+ Instruction *NewBonusInst = BonusInst.clone();
- if (PCV == CV && TI != PTI) {
- SmallSetVector<BasicBlock*, 4> FailBlocks;
- if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
- for (auto *Succ : FailBlocks) {
- if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split"))
- return false;
- }
- }
+ if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
+ // Unless the instruction has the same !dbg location as the original
+ // branch, drop it. When we fold the bonus instructions we want to make
+ // sure we reset their debug locations in order to avoid stepping on
+ // dead code caused by folding dead branches.
+ NewBonusInst->setDebugLoc(DebugLoc());
+ }
- // Figure out which 'cases' to copy from SI to PSI.
- std::vector<ValueEqualityComparisonCase> BBCases;
- BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
-
- std::vector<ValueEqualityComparisonCase> PredCases;
- BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
-
- // Based on whether the default edge from PTI goes to BB or not, fill in
- // PredCases and PredDefault with the new switch cases we would like to
- // build.
- SmallVector<BasicBlock *, 8> NewSuccessors;
-
- // Update the branch weight metadata along the way
- SmallVector<uint64_t, 8> Weights;
- bool PredHasWeights = HasBranchWeights(PTI);
- bool SuccHasWeights = HasBranchWeights(TI);
-
- if (PredHasWeights) {
- GetBranchWeights(PTI, Weights);
- // branch-weight metadata is inconsistent here.
- if (Weights.size() != 1 + PredCases.size())
- PredHasWeights = SuccHasWeights = false;
- } else if (SuccHasWeights)
- // If there are no predecessor weights but there are successor weights,
- // populate Weights with 1, which will later be scaled to the sum of
- // successor's weights
- Weights.assign(1 + PredCases.size(), 1);
-
- SmallVector<uint64_t, 8> SuccWeights;
- if (SuccHasWeights) {
- GetBranchWeights(TI, SuccWeights);
- // branch-weight metadata is inconsistent here.
- if (SuccWeights.size() != 1 + BBCases.size())
- PredHasWeights = SuccHasWeights = false;
- } else if (PredHasWeights)
- SuccWeights.assign(1 + BBCases.size(), 1);
-
- if (PredDefault == BB) {
- // If this is the default destination from PTI, only the edges in TI
- // that don't occur in PTI, or that branch to BB will be activated.
- std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- if (PredCases[i].Dest != BB)
- PTIHandled.insert(PredCases[i].Value);
- else {
- // The default destination is BB, we don't need explicit targets.
- std::swap(PredCases[i], PredCases.back());
-
- if (PredHasWeights || SuccHasWeights) {
- // Increase weight for the default case.
- Weights[0] += Weights[i + 1];
- std::swap(Weights[i + 1], Weights.back());
- Weights.pop_back();
- }
+ RemapInstruction(NewBonusInst, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ VMap[&BonusInst] = NewBonusInst;
+
+ // If we moved a load, we cannot any longer claim any knowledge about
+ // its potential value. The previous information might have been valid
+ // only given the branch precondition.
+ // For an analogous reason, we must also drop all the metadata whose
+ // semantics we don't understand. We *can* preserve !annotation, because
+ // it is tied to the instruction itself, not the value or position.
+ NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation);
+
+ PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst);
+ NewBonusInst->takeName(&BonusInst);
+ BonusInst.setName(NewBonusInst->getName() + ".old");
+
+ // Update (liveout) uses of bonus instructions,
+ // now that the bonus instruction has been cloned into predecessor.
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(BonusInst.getType(),
+ (NewBonusInst->getName() + ".merge").str());
+ SSAUpdate.AddAvailableValue(BB, &BonusInst);
+ SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst);
+ for (Use &U : make_early_inc_range(BonusInst.uses()))
+ SSAUpdate.RewriteUseAfterInsertions(U);
+ }
+}
- PredCases.pop_back();
- --i;
- --e;
- }
+bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
+ Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ BasicBlock *Pred = PTI->getParent();
+
+ std::vector<DominatorTree::UpdateType> Updates;
+
+ // Figure out which 'cases' to copy from SI to PSI.
+ std::vector<ValueEqualityComparisonCase> BBCases;
+ BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
- // Reconstruct the new switch statement we will be building.
- if (PredDefault != BBDefault) {
- PredDefault->removePredecessor(Pred);
- PredDefault = BBDefault;
- NewSuccessors.push_back(BBDefault);
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+ // Based on whether the default edge from PTI goes to BB or not, fill in
+ // PredCases and PredDefault with the new switch cases we would like to
+ // build.
+ SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
+
+ // Update the branch weight metadata along the way
+ SmallVector<uint64_t, 8> Weights;
+ bool PredHasWeights = HasBranchWeights(PTI);
+ bool SuccHasWeights = HasBranchWeights(TI);
+
+ if (PredHasWeights) {
+ GetBranchWeights(PTI, Weights);
+ // branch-weight metadata is inconsistent here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
+ // If there are no predecessor weights but there are successor weights,
+ // populate Weights with 1, which will later be scaled to the sum of
+ // successor's weights
+ Weights.assign(1 + PredCases.size(), 1);
+
+ SmallVector<uint64_t, 8> SuccWeights;
+ if (SuccHasWeights) {
+ GetBranchWeights(TI, SuccWeights);
+ // branch-weight metadata is inconsistent here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
+ SuccWeights.assign(1 + BBCases.size(), 1);
+
+ if (PredDefault == BB) {
+ // If this is the default destination from PTI, only the edges in TI
+ // that don't occur in PTI, or that branch to BB will be activated.
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest != BB)
+ PTIHandled.insert(PredCases[i].Value);
+ else {
+ // The default destination is BB, we don't need explicit targets.
+ std::swap(PredCases[i], PredCases.back());
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Increase weight for the default case.
+ Weights[0] += Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
+ Weights.pop_back();
}
- unsigned CasesFromPred = Weights.size();
- uint64_t ValidTotalSuccWeight = 0;
- for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
- if (!PTIHandled.count(BBCases[i].Value) &&
- BBCases[i].Dest != BBDefault) {
- PredCases.push_back(BBCases[i]);
- NewSuccessors.push_back(BBCases[i].Dest);
- if (SuccHasWeights || PredHasWeights) {
- // The default weight is at index 0, so weight for the ith case
- // should be at index i+1. Scale the cases from successor by
- // PredDefaultWeight (Weights[0]).
- Weights.push_back(Weights[0] * SuccWeights[i + 1]);
- ValidTotalSuccWeight += SuccWeights[i + 1];
- }
- }
+ PredCases.pop_back();
+ --i;
+ --e;
+ }
+ // Reconstruct the new switch statement we will be building.
+ if (PredDefault != BBDefault) {
+ PredDefault->removePredecessor(Pred);
+ if (PredDefault != BB)
+ Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
+ PredDefault = BBDefault;
+ ++NewSuccessors[BBDefault];
+ }
+
+ unsigned CasesFromPred = Weights.size();
+ uint64_t ValidTotalSuccWeight = 0;
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
+ PredCases.push_back(BBCases[i]);
+ ++NewSuccessors[BBCases[i].Dest];
if (SuccHasWeights || PredHasWeights) {
- ValidTotalSuccWeight += SuccWeights[0];
- // Scale the cases from predecessor by ValidTotalSuccWeight.
- for (unsigned i = 1; i < CasesFromPred; ++i)
- Weights[i] *= ValidTotalSuccWeight;
- // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
- Weights[0] *= SuccWeights[0];
+ // The default weight is at index 0, so weight for the ith case
+ // should be at index i+1. Scale the cases from successor by
+ // PredDefaultWeight (Weights[0]).
+ Weights.push_back(Weights[0] * SuccWeights[i + 1]);
+ ValidTotalSuccWeight += SuccWeights[i + 1];
}
- } else {
- // If this is not the default destination from PSI, only the edges
- // in SI that occur in PSI with a destination of BB will be
- // activated.
- std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
- std::map<ConstantInt *, uint64_t> WeightsForHandled;
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- if (PredCases[i].Dest == BB) {
- PTIHandled.insert(PredCases[i].Value);
-
- if (PredHasWeights || SuccHasWeights) {
- WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
- std::swap(Weights[i + 1], Weights.back());
- Weights.pop_back();
- }
-
- std::swap(PredCases[i], PredCases.back());
- PredCases.pop_back();
- --i;
- --e;
- }
+ }
- // Okay, now we know which constants were sent to BB from the
- // predecessor. Figure out where they will all go now.
- for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
- if (PTIHandled.count(BBCases[i].Value)) {
- // If this is one we are capable of getting...
- if (PredHasWeights || SuccHasWeights)
- Weights.push_back(WeightsForHandled[BBCases[i].Value]);
- PredCases.push_back(BBCases[i]);
- NewSuccessors.push_back(BBCases[i].Dest);
- PTIHandled.erase(
- BBCases[i].Value); // This constant is taken care of
- }
+ if (SuccHasWeights || PredHasWeights) {
+ ValidTotalSuccWeight += SuccWeights[0];
+ // Scale the cases from predecessor by ValidTotalSuccWeight.
+ for (unsigned i = 1; i < CasesFromPred; ++i)
+ Weights[i] *= ValidTotalSuccWeight;
+ // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
+ Weights[0] *= SuccWeights[0];
+ }
+ } else {
+ // If this is not the default destination from PSI, only the edges
+ // in SI that occur in PSI with a destination of BB will be
+ // activated.
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt *, uint64_t> WeightsForHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == BB) {
+ PTIHandled.insert(PredCases[i].Value);
- // If there are any constants vectored to BB that TI doesn't handle,
- // they must go to the default destination of TI.
- for (ConstantInt *I : PTIHandled) {
- if (PredHasWeights || SuccHasWeights)
- Weights.push_back(WeightsForHandled[I]);
- PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
- NewSuccessors.push_back(BBDefault);
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
+ Weights.pop_back();
}
+
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i;
+ --e;
}
- // Okay, at this point, we know which new successor Pred will get. Make
- // sure we update the number of entries in the PHI nodes for these
- // successors.
- for (BasicBlock *NewSuccessor : NewSuccessors)
- AddPredecessorToBlock(NewSuccessor, Pred, BB);
-
- Builder.SetInsertPoint(PTI);
- // Convert pointer to int before we switch.
- if (CV->getType()->isPointerTy()) {
- CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
- "magicptr");
+ // Okay, now we know which constants were sent to BB from the
+ // predecessor. Figure out where they will all go now.
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (PTIHandled.count(BBCases[i].Value)) {
+ // If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
+ PredCases.push_back(BBCases[i]);
+ ++NewSuccessors[BBCases[i].Dest];
+ PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
}
- // Now that the successors are updated, create the new Switch instruction.
- SwitchInst *NewSI =
- Builder.CreateSwitch(CV, PredDefault, PredCases.size());
- NewSI->setDebugLoc(PTI->getDebugLoc());
- for (ValueEqualityComparisonCase &V : PredCases)
- NewSI->addCase(V.Value, V.Dest);
+ // If there are any constants vectored to BB that TI doesn't handle,
+ // they must go to the default destination of TI.
+ for (ConstantInt *I : PTIHandled) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[I]);
+ PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
+ ++NewSuccessors[BBDefault];
+ }
+ }
+
+ // Okay, at this point, we know which new successor Pred will get. Make
+ // sure we update the number of entries in the PHI nodes for these
+ // successors.
+ for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
+ NewSuccessors) {
+ for (auto I : seq(0, NewSuccessor.second)) {
+ (void)I;
+ AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
+ }
+ if (!is_contained(successors(Pred), NewSuccessor.first))
+ Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
+ }
+
+ Builder.SetInsertPoint(PTI);
+ // Convert pointer to int before we switch.
+ if (CV->getType()->isPointerTy()) {
+ CV =
+ Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
+ }
+
+ // Now that the successors are updated, create the new Switch instruction.
+ SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
+ NewSI->setDebugLoc(PTI->getDebugLoc());
+ for (ValueEqualityComparisonCase &V : PredCases)
+ NewSI->addCase(V.Value, V.Dest);
- if (PredHasWeights || SuccHasWeights) {
- // Halve the weights if any of them cannot fit in an uint32_t
- FitWeights(Weights);
+ if (PredHasWeights || SuccHasWeights) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(Weights);
- SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- setBranchWeights(NewSI, MDWeights);
+ setBranchWeights(NewSI, MDWeights);
+ }
+
+ EraseTerminatorAndDCECond(PTI);
+
+ // Okay, last check. If BB is still a successor of PSI, then we must
+ // have an infinite loop case. If so, add an infinitely looping block
+ // to handle the case to preserve the behavior of the code.
+ BasicBlock *InfLoopBlock = nullptr;
+ for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+ if (NewSI->getSuccessor(i) == BB) {
+ if (!InfLoopBlock) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ InfLoopBlock =
+ BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
}
+ NewSI->setSuccessor(i, InfLoopBlock);
+ }
- EraseTerminatorAndDCECond(PTI);
-
- // Okay, last check. If BB is still a successor of PSI, then we must
- // have an infinite loop case. If so, add an infinitely looping block
- // to handle the case to preserve the behavior of the code.
- BasicBlock *InfLoopBlock = nullptr;
- for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
- if (NewSI->getSuccessor(i) == BB) {
- if (!InfLoopBlock) {
- // Insert it at the end of the function, because it's either code,
- // or it won't matter if it's hot. :)
- InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop",
- BB->getParent());
- BranchInst::Create(InfLoopBlock, InfLoopBlock);
- }
- NewSI->setSuccessor(i, InfLoopBlock);
- }
+ if (InfLoopBlock)
+ Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
- Changed = true;
+ Updates.push_back({DominatorTree::Delete, Pred, BB});
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ ++NumFoldValueComparisonIntoPredecessors;
+ return true;
+}
+
+/// The specified terminator is a value equality comparison instruction
+/// (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value. If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ Value *CV = isValueEqualityComparison(TI); // CondVal
+ assert(CV && "Not a comparison?");
+
+ bool Changed = false;
+
+ SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.pop_back_val();
+ Instruction *PTI = Pred->getTerminator();
+
+ // Don't try to fold into itself.
+ if (Pred == BB)
+ continue;
+
+ // See if the predecessor is a comparison with the same value.
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+ if (PCV != CV)
+ continue;
+
+ SmallSetVector<BasicBlock *, 4> FailBlocks;
+ if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
+ for (auto *Succ : FailBlocks) {
+ if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
+ return false;
+ }
}
+
+ PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
+ Changed = true;
}
return Changed;
}
@@ -1248,7 +1364,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
return true;
}
-static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
/// in the two blocks up into the branch block. The caller of this function
@@ -1285,6 +1401,12 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
BasicBlock *BIParent = BI->getParent();
bool Changed = false;
+
+ auto _ = make_scope_exit([&]() {
+ if (Changed)
+ ++NumHoistCommonCode;
+ });
+
do {
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
@@ -1353,6 +1475,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
I2->eraseFromParent();
Changed = true;
}
+ ++NumHoistCommonInstrs;
I1 = &*BB1_Itr++;
I2 = &*BB2_Itr++;
@@ -1407,6 +1530,8 @@ HoistTerminator:
I2->replaceAllUsesWith(NT);
NT->takeName(I1);
}
+ Changed = true;
+ ++NumHoistCommonInstrs;
// Ensure terminator gets a debug location, even an unknown one, in case
// it involves inlinable calls.
@@ -1448,12 +1573,20 @@ HoistTerminator:
}
}
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+
// Update any PHI nodes in our new successors.
- for (BasicBlock *Succ : successors(BB1))
+ for (BasicBlock *Succ : successors(BB1)) {
AddPredecessorToBlock(Succ, BIParent, BB1);
+ Updates.push_back({DominatorTree::Insert, BIParent, Succ});
+ }
+ for (BasicBlock *Succ : successors(BI))
+ Updates.push_back({DominatorTree::Delete, BIParent, Succ});
EraseTerminatorAndDCECond(BI);
- return true;
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ return Changed;
}
// Check lifetime markers.
@@ -1495,6 +1628,11 @@ static bool canSinkInstructions(
I->getType()->isTokenTy())
return false;
+ // Do not try to sink an instruction in an infinite loop - it can cause
+ // this algorithm to infinite loop.
+ if (I->getParent()->getSingleSuccessor() == I->getParent())
+ return false;
+
// Conservatively return false if I is an inline-asm instruction. Sinking
// and merging inline-asm instructions can potentially create arguments
// that cannot satisfy the inline-asm constraints.
@@ -1581,13 +1719,13 @@ static bool canSinkInstructions(
return true;
}
-// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
+// Assuming canSinkInstructions(Blocks) has returned true, sink the last
// instruction of every block in Blocks to their common successor, commoning
// into one instruction.
static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
- // canSinkLastInstruction returning true guarantees that every block has at
+ // canSinkInstructions returning true guarantees that every block has at
// least one non-terminator instruction.
SmallVector<Instruction*,4> Insts;
for (auto *BB : Blocks) {
@@ -1600,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
}
// The only checking we need to do now is that all users of all instructions
- // are the same PHI node. canSinkLastInstruction should have checked this but
- // it is slightly over-aggressive - it gets confused by commutative instructions
- // so double-check it here.
+ // are the same PHI node. canSinkInstructions should have checked this but
+ // it is slightly over-aggressive - it gets confused by commutative
+ // instructions so double-check it here.
Instruction *I0 = Insts.front();
if (!I0->user_empty()) {
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
@@ -1613,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
return false;
}
- // We don't need to do any more checking here; canSinkLastInstruction should
+ // We don't need to do any more checking here; canSinkInstructions should
// have done it all for us.
SmallVector<Value*, 4> NewOperands;
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
- // This check is different to that in canSinkLastInstruction. There, we
+ // This check is different to that in canSinkInstructions. There, we
// cared about the global view once simplifycfg (and instcombine) have
// completed - it takes into account PHIs that become trivially
// simplifiable. However here we need a more local view; if an operand
@@ -1744,7 +1882,8 @@ namespace {
/// true, sink any common code from the predecessors to BB.
/// We also allow one predecessor to end with conditional branch (but no more
/// than one).
-static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
+static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
+ DomTreeUpdater *DTU) {
// We support two situations:
// (1) all incoming arcs are unconditional
// (2) one incoming arc is conditional
@@ -1800,7 +1939,6 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
if (UnconditionalPreds.size() < 2)
return false;
- bool Changed = false;
// We take a two-step approach to tail sinking. First we scan from the end of
// each block upwards in lockstep. If the n'th instruction from the end of each
// block can be sunk, those instructions are added to ValuesToSink and we
@@ -1820,6 +1958,12 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
--LRI;
}
+ // If no instructions can be sunk, early-return.
+ if (ScanIdx == 0)
+ return false;
+
+ bool Changed = false;
+
auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
unsigned NumPHIdValues = 0;
for (auto *I : *LRI)
@@ -1834,7 +1978,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
return NumPHIInsts <= 1;
};
- if (ScanIdx > 0 && Cond) {
+ if (Cond) {
// Check if we would actually sink anything first! This mutates the CFG and
// adds an extra block. The goal in doing this is to allow instructions that
// couldn't be sunk before to be sunk - obviously, speculatable instructions
@@ -1857,7 +2001,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
// We have a conditional edge and we're going to sink some instructions.
// Insert a new block postdominating all blocks we're going to sink from.
- if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split"))
+ if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
// Edges couldn't be split.
return false;
Changed = true;
@@ -1875,7 +2019,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
// sink presuming a later value will also be sunk, but stop half way through
// and never actually sink it which means we produce more PHIs than intended.
// This is unlikely in practice though.
- for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
+ unsigned SinkIdx = 0;
+ for (; SinkIdx != ScanIdx; ++SinkIdx) {
LLVM_DEBUG(dbgs() << "SINK: Sink: "
<< *UnconditionalPreds[0]->getTerminator()->getPrevNode()
<< "\n");
@@ -1890,11 +2035,18 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
break;
}
- if (!sinkLastInstruction(UnconditionalPreds))
- return Changed;
- NumSinkCommons++;
+ if (!sinkLastInstruction(UnconditionalPreds)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "SINK: stopping here, failed to actually sink instruction!\n");
+ break;
+ }
+
+ NumSinkCommonInstrs++;
Changed = true;
}
+ if (SinkIdx != 0)
+ ++NumSinkCommonCode;
return Changed;
}
@@ -1938,7 +2090,9 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
// Look for a store to the same pointer in BrBB.
unsigned MaxNumInstToLookAt = 9;
- for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) {
+ // Skip pseudo probe intrinsic calls which are not really killing any memory
+ // accesses.
+ for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
if (!MaxNumInstToLookAt)
break;
--MaxNumInstToLookAt;
@@ -1959,6 +2113,65 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
return nullptr;
}
+/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
+/// converted to selects.
+static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
+ BasicBlock *EndBB,
+ unsigned &SpeculatedInstructions,
+ int &BudgetRemaining,
+ const TargetTransformInfo &TTI) {
+ TargetTransformInfo::TargetCostKind CostKind =
+ BB->getParent()->hasMinSize()
+ ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_SizeAndLatency;
+
+ bool HaveRewritablePHIs = false;
+ for (PHINode &PN : EndBB->phis()) {
+ Value *OrigV = PN.getIncomingValueForBlock(BB);
+ Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
+
+ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
+ // Skip PHIs which are trivial.
+ if (ThenV == OrigV)
+ continue;
+
+ BudgetRemaining -=
+ TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+
+ // Don't convert to selects if we could remove undefined behavior instead.
+ if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
+ passingValueIsAlwaysUndefined(ThenV, &PN))
+ return false;
+
+ HaveRewritablePHIs = true;
+ ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
+ ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
+ if (!OrigCE && !ThenCE)
+ continue; // Known safe and cheap.
+
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
+ return false;
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
+ unsigned MaxCost =
+ 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ if (OrigCost + ThenCost > MaxCost)
+ return false;
+
+ // Account for the cost of an unfolded ConstantExpr which could end up
+ // getting expanded into Instructions.
+ // FIXME: This doesn't account for how many operations are combined in the
+ // constant expression.
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
+ return false;
+ }
+
+ return HaveRewritablePHIs;
+}
+
/// Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
@@ -2005,6 +2218,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
BasicBlock *BB = BI->getParent();
BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
+ int BudgetRemaining =
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
// If ThenBB is actually on the false edge of the conditional branch, remember
// to swap the select operands later.
@@ -2037,6 +2252,14 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
continue;
}
+ // Skip pseudo probes. The consequence is we lose track of the branch
+ // probability for ThenBB, which is fine since the optimization here takes
+ // place regardless of the branch probability.
+ if (isa<PseudoProbeInst>(I)) {
+ SpeculatedDbgIntrinsics.push_back(I);
+ continue;
+ }
+
// Only speculatively execute a single instruction (not counting the
// terminator) for now.
++SpeculatedInstructions;
@@ -2082,50 +2305,13 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
return false;
}
- // Check that the PHI nodes can be converted to selects.
- bool HaveRewritablePHIs = false;
- for (PHINode &PN : EndBB->phis()) {
- Value *OrigV = PN.getIncomingValueForBlock(BB);
- Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
-
- // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
- // Skip PHIs which are trivial.
- if (ThenV == OrigV)
- continue;
-
- // Don't convert to selects if we could remove undefined behavior instead.
- if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
- passingValueIsAlwaysUndefined(ThenV, &PN))
- return false;
-
- HaveRewritablePHIs = true;
- ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
- ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
- if (!OrigCE && !ThenCE)
- continue; // Known safe and cheap.
-
- if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
- (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
- return false;
- unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
- unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
- unsigned MaxCost =
- 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
- if (OrigCost + ThenCost > MaxCost)
- return false;
-
- // Account for the cost of an unfolded ConstantExpr which could end up
- // getting expanded into Instructions.
- // FIXME: This doesn't account for how many operations are combined in the
- // constant expression.
- ++SpeculatedInstructions;
- if (SpeculatedInstructions > 1)
- return false;
- }
-
- // If there are no PHIs to process, bail early. This helps ensure idempotence
- // as well.
- if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
+ // Check that we can insert the selects and that it's not too expensive to do
+ // so.
+ bool Convert = SpeculatedStore != nullptr;
+ Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
+ SpeculatedInstructions,
+ BudgetRemaining, TTI);
+ if (!Convert || BudgetRemaining < 0)
return false;
// If we get here, we can hoist the instruction and if-convert.
@@ -2199,6 +2385,12 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
for (Instruction &I : BB->instructionsWithoutDebug()) {
if (Size > MaxSmallBlockSize)
return false; // Don't clone large BB's.
+
+ // Can't fold blocks that contain noduplicate or convergent calls.
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->cannotDuplicate() || CI->isConvergent())
+ return false;
+
// We will delete Phis while threading, so Phis should not be accounted in
// block's size
if (!isa<PHINode>(I))
@@ -2221,8 +2413,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// If we have a conditional branch on a PHI node value that is defined in the
/// same block as the branch and if any PHI entries are constants, thread edges
/// corresponding to that entry to be branches to their ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
- AssumptionCache *AC) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
+ const DataLayout &DL, AssumptionCache *AC) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -2240,13 +2432,6 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
if (!BlockIsSimpleEnoughToThreadThrough(BB))
return false;
- // Can't fold blocks that contain noduplicate or convergent calls.
- if (any_of(*BB, [](const Instruction &I) {
- const CallInst *CI = dyn_cast<CallInst>(&I);
- return CI && (CI->cannotDuplicate() || CI->isConvergent());
- }))
- return false;
-
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -2265,6 +2450,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
if (isa<IndirectBrInst>(PredBB->getTerminator()))
continue;
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+
// The dest block might have PHI nodes, other predecessors and other
// difficult cases. Instead of being smart about this, just insert a new
// block that jumps to the destination block, effectively splitting
@@ -2273,6 +2460,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
RealDest->getParent(), RealDest);
BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
+ Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
// Update PHI nodes.
@@ -2331,8 +2519,14 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
PredBBTI->setSuccessor(i, EdgeBB);
}
+ Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB});
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
// Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, DL, AC) || true;
+ return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true;
}
return false;
@@ -2341,7 +2535,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
/// Given a BB that starts with the specified two-entry PHI node,
/// see if we can eliminate it.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
- const DataLayout &DL) {
+ DomTreeUpdater *DTU, const DataLayout &DL) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -2374,11 +2568,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
int BudgetRemaining =
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ bool Changed = false;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
+ Changed = true;
continue;
}
@@ -2386,7 +2582,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
BudgetRemaining, TTI) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
BudgetRemaining, TTI))
- return false;
+ return Changed;
}
// If we folded the first phi, PN dangles at this point. Refresh it. If
@@ -2413,7 +2609,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
isa<BinaryOperator>(IfCond)) &&
!CanHoistNotFromBothValues(PN->getIncomingValue(0),
PN->getIncomingValue(1)))
- return false;
+ return Changed;
// If all PHI nodes are promotable, check to make sure that all instructions
// in the predecessor blocks can be promoted as well. If not, we won't be able
@@ -2427,11 +2623,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
} else {
DomBlock = *pred_begin(IfBlock1);
for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I)
- if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
+ !isa<PseudoProbeInst>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
// the xform is not worth it.
- return false;
+ return Changed;
}
}
@@ -2440,11 +2637,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
} else {
DomBlock = *pred_begin(IfBlock2);
for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I)
- if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
+ !isa<PseudoProbeInst>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
// the xform is not worth it.
- return false;
+ return Changed;
}
}
assert(DomBlock && "Failed to find root DomBlock");
@@ -2487,7 +2685,18 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
Instruction *OldTI = DomBlock->getTerminator();
Builder.SetInsertPoint(OldTI);
Builder.CreateBr(BB);
+
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, DomBlock, BB});
+ for (auto *Successor : successors(DomBlock))
+ Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
+ }
+
OldTI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
return true;
}
@@ -2496,9 +2705,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
/// introducing a select if the return values disagree.
bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
IRBuilder<> &Builder) {
+ auto *BB = BI->getParent();
assert(BI->isConditional() && "Must be a conditional branch");
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
+ // NOTE: destinations may match, this could be degenerate uncond branch.
ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
@@ -2515,10 +2726,17 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
// there is no return value for this function, just change the
// branch into a return.
if (FalseRet->getNumOperands() == 0) {
- TrueSucc->removePredecessor(BI->getParent());
- FalseSucc->removePredecessor(BI->getParent());
+ TrueSucc->removePredecessor(BB);
+ FalseSucc->removePredecessor(BB);
Builder.CreateRetVoid();
EraseTerminatorAndDCECond(BI);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Delete, BB, TrueSucc});
+ if (TrueSucc != FalseSucc)
+ Updates.push_back({DominatorTree::Delete, BB, FalseSucc});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
@@ -2530,10 +2748,10 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
// Unwrap any PHI nodes in the return blocks.
if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
if (TVPN->getParent() == TrueSucc)
- TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+ TrueValue = TVPN->getIncomingValueForBlock(BB);
if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
if (FVPN->getParent() == FalseSucc)
- FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+ FalseValue = FVPN->getIncomingValueForBlock(BB);
// In order for this transformation to be safe, we must be able to
// unconditionally execute both operands to the return. This is
@@ -2549,8 +2767,8 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
// Okay, we collected all the mapped values and checked them for sanity, and
// defined to really do this transformation. First, update the CFG.
- TrueSucc->removePredecessor(BI->getParent());
- FalseSucc->removePredecessor(BI->getParent());
+ TrueSucc->removePredecessor(BB);
+ FalseSucc->removePredecessor(BB);
// Insert select instructions where needed.
Value *BrCond = BI->getCondition();
@@ -2575,27 +2793,17 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
<< *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc);
EraseTerminatorAndDCECond(BI);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Delete, BB, TrueSucc});
+ if (TrueSucc != FalseSucc)
+ Updates.push_back({DominatorTree::Delete, BB, FalseSucc});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
-/// Return true if the given instruction is available
-/// in its predecessor block. If yes, the instruction will be removed.
-static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) {
- if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
- return false;
- for (Instruction &I : *PB) {
- Instruction *PBI = &I;
- // Check whether Inst and PBI generate the same value.
- if (Inst->isIdenticalTo(PBI)) {
- Inst->replaceAllUsesWith(PBI);
- Inst->eraseFromParent();
- return true;
- }
- }
- return false;
-}
-
/// Return true if either PBI or BI has branch weight available, and store
/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
/// not have branch weight, use 1:1 as its weight.
@@ -2619,63 +2827,174 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
}
}
+// Determine if the two branches share a common destination,
+// and deduce a glue that we need to use to join branch's conditions
+// to arrive at the common destination.
+static Optional<std::pair<Instruction::BinaryOps, bool>>
+CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
+ assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
+ "Both blocks must end with a conditional branches.");
+ assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
+ "PredBB must be a predecessor of BB.");
+
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+ return {{Instruction::Or, false}};
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+ return {{Instruction::And, false}};
+ else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+ return {{Instruction::And, true}};
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+ return {{Instruction::Or, true}};
+ return None;
+}
+
+static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
+ DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU) {
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *PredBlock = PBI->getParent();
+
+ // Determine if the two branches share a common destination.
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond;
+ std::tie(Opc, InvertPredCond) =
+ *CheckIfCondBranchesShareCommonDestination(BI, PBI);
+
+ LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+
+ IRBuilder<> Builder(PBI);
+ // The builder is used to create instructions to eliminate the branch in BB.
+ // If BB's terminator has !annotation metadata, add it to the new
+ // instructions.
+ Builder.CollectMetadataToCopy(BB->getTerminator(),
+ {LLVMContext::MD_annotation});
+
+ // If we need to invert the condition in the pred block to match, do so now.
+ if (InvertPredCond) {
+ Value *NewCond = PBI->getCondition();
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else {
+ NewCond =
+ Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
+ }
+
+ PBI->setCondition(NewCond);
+ PBI->swapSuccessors();
+ }
+
+ BasicBlock *UniqueSucc =
+ PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
+
+ // Before cloning instructions, notify the successor basic block that it
+ // is about to have a new predecessor. This will update PHI nodes,
+ // which will allow us to update live-out uses of bonus instructions.
+ AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
+
+ // Try to update branch weights.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
+ SuccTrueWeight, SuccFalseWeight)) {
+ SmallVector<uint64_t, 8> NewWeights;
+
+ if (PBI->getSuccessor(0) == BB) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, UniqueSucc, FalseDest
+ // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight *
+ (SuccFalseWeight + SuccTrueWeight) +
+ PredTrueWeight * SuccFalseWeight);
+ } else {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, UniqueSucc
+ // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
+ PredFalseWeight * SuccTrueWeight);
+ // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
+
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
+ setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
+
+ // TODO: If BB is reachable from all paths through PredBlock, then we
+ // could replace PBI's branch probabilities with BI's.
+ } else
+ PBI->setMetadata(LLVMContext::MD_prof, nullptr);
+
+ // Now, update the CFG.
+ PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
+
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
+ {DominatorTree::Delete, PredBlock, BB}});
+
+ // If BI was a loop latch, it may have had associated loop metadata.
+ // We need to copy it to the new latch, that is, PBI.
+ if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
+ PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
+
+ ValueToValueMapTy VMap; // maps original values to cloned values
+ CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
+
+ // Now that the Cond was cloned into the predecessor basic block,
+ // or/and the two conditions together.
+ Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp(
+ Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond"));
+ PBI->setCondition(NewCond);
+
+ // Copy any debug value intrinsics into the end of PredBlock.
+ for (Instruction &I : *BB) {
+ if (isa<DbgInfoIntrinsic>(I)) {
+ Instruction *NewI = I.clone();
+ RemapInstruction(NewI, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ NewI->insertBefore(PBI);
+ }
+ }
+
+ ++NumFoldBranchToCommonDest;
+ return true;
+}
+
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU,
+ const TargetTransformInfo *TTI,
unsigned BonusInstThreshold) {
+ // If this block ends with an unconditional branch,
+ // let SpeculativelyExecuteBB() deal with it.
+ if (!BI->isConditional())
+ return false;
+
BasicBlock *BB = BI->getParent();
const unsigned PredCount = pred_size(BB);
bool Changed = false;
- Instruction *Cond = nullptr;
- if (BI->isConditional())
- Cond = dyn_cast<Instruction>(BI->getCondition());
- else {
- // For unconditional branch, check for a simple CFG pattern, where
- // BB has a single predecessor and BB's successor is also its predecessor's
- // successor. If such pattern exists, check for CSE between BB and its
- // predecessor.
- if (BasicBlock *PB = BB->getSinglePredecessor())
- if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
- if (PBI->isConditional() &&
- (BI->getSuccessor(0) == PBI->getSuccessor(0) ||
- BI->getSuccessor(0) == PBI->getSuccessor(1))) {
- for (auto I = BB->instructionsWithoutDebug().begin(),
- E = BB->instructionsWithoutDebug().end();
- I != E;) {
- Instruction *Curr = &*I++;
- if (isa<CmpInst>(Curr)) {
- Cond = Curr;
- break;
- }
- // Quit if we can't remove this instruction.
- if (!tryCSEWithPredecessor(Curr, PB))
- return Changed;
- Changed = true;
- }
- }
+ TargetTransformInfo::TargetCostKind CostKind =
+ BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_SizeAndLatency;
- if (!Cond)
- return Changed;
- }
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
return Changed;
- // Make sure the instruction after the condition is the cond branch.
- BasicBlock::iterator CondIt = ++Cond->getIterator();
-
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(CondIt))
- ++CondIt;
-
- if (&*CondIt != BI)
- return Changed;
-
// Only allow this transformation if computing the condition doesn't involve
// too many instructions and these involved instructions can be executed
// unconditionally. We denote all involved instructions except the condition
@@ -2683,19 +3002,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
// number of the bonus instructions we'll need to create when cloning into
// each predecessor does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
- for (auto I = BB->begin(); Cond != &*I; ++I) {
- // Ignore dbg intrinsics.
- if (isa<DbgInfoIntrinsic>(I))
+ for (Instruction &I : *BB) {
+ // Don't check the branch condition comparison itself.
+ if (&I == Cond)
continue;
- if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I))
- return Changed;
- // I has only one use and can be executed unconditionally.
- Instruction *User = dyn_cast<Instruction>(I->user_back());
- if (User == nullptr || User->getParent() != BB)
+ // Ignore dbg intrinsics, and the terminator.
+ if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
+ continue;
+ // I must be safe to execute unconditionally.
+ if (!isSafeToSpeculativelyExecute(&I))
return Changed;
- // I is used in the same BB. Since BI uses Cond and doesn't have more slots
- // to use any other instruction, User must be an instruction between next(I)
- // and Cond.
// Account for the cost of duplicating this instruction into each
// predecessor.
@@ -2715,9 +3031,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
return Changed;
// Finally, don't infinitely unroll conditional loops.
- BasicBlock *TrueDest = BI->getSuccessor(0);
- BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
- if (TrueDest == BB || FalseDest == BB)
+ if (is_contained(successors(BB), BB))
return Changed;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
@@ -2727,222 +3041,31 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
// Check that we have two conditional branches. If there is a PHI node in
// the common successor, verify that the same value flows in from both
// blocks.
- SmallVector<PHINode *, 4> PHIs;
- if (!PBI || PBI->isUnconditional() ||
- (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) ||
- (!BI->isConditional() &&
- !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
+ if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
continue;
// Determine if the two branches share a common destination.
- Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
- bool InvertPredCond = false;
-
- if (BI->isConditional()) {
- if (PBI->getSuccessor(0) == TrueDest) {
- Opc = Instruction::Or;
- } else if (PBI->getSuccessor(1) == FalseDest) {
- Opc = Instruction::And;
- } else if (PBI->getSuccessor(0) == FalseDest) {
- Opc = Instruction::And;
- InvertPredCond = true;
- } else if (PBI->getSuccessor(1) == TrueDest) {
- Opc = Instruction::Or;
- InvertPredCond = true;
- } else {
- continue;
- }
- } else {
- if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest)
- continue;
- }
-
- LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
- Changed = true;
-
- IRBuilder<> Builder(PBI);
-
- // If we need to invert the condition in the pred block to match, do so now.
- if (InvertPredCond) {
- Value *NewCond = PBI->getCondition();
-
- if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
- CmpInst *CI = cast<CmpInst>(NewCond);
- CI->setPredicate(CI->getInversePredicate());
- } else {
- NewCond =
- Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
- }
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond;
+ if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI))
+ std::tie(Opc, InvertPredCond) = *Recepie;
+ else
+ continue;
- PBI->setCondition(NewCond);
- PBI->swapSuccessors();
- }
+ // Check the cost of inserting the necessary logic before performing the
+ // transformation.
+ if (TTI) {
+ Type *Ty = BI->getCondition()->getType();
+ unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
+ if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
+ !isa<CmpInst>(PBI->getCondition())))
+ Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
- // If we have bonus instructions, clone them into the predecessor block.
- // Note that there may be multiple predecessor blocks, so we cannot move
- // bonus instructions to a predecessor block.
- ValueToValueMapTy VMap; // maps original values to cloned values
- // We already make sure Cond is the last instruction before BI. Therefore,
- // all instructions before Cond other than DbgInfoIntrinsic are bonus
- // instructions.
- for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) {
- if (isa<DbgInfoIntrinsic>(BonusInst))
+ if (Cost > BranchFoldThreshold)
continue;
- Instruction *NewBonusInst = BonusInst->clone();
-
- // When we fold the bonus instructions we want to make sure we
- // reset their debug locations in order to avoid stepping on dead
- // code caused by folding dead branches.
- NewBonusInst->setDebugLoc(DebugLoc());
-
- RemapInstruction(NewBonusInst, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- VMap[&*BonusInst] = NewBonusInst;
-
- // If we moved a load, we cannot any longer claim any knowledge about
- // its potential value. The previous information might have been valid
- // only given the branch precondition.
- // For an analogous reason, we must also drop all the metadata whose
- // semantics we don't understand.
- NewBonusInst->dropUnknownNonDebugMetadata();
-
- PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
- NewBonusInst->takeName(&*BonusInst);
- BonusInst->setName(BonusInst->getName() + ".old");
}
- // Clone Cond into the predecessor basic block, and or/and the
- // two conditions together.
- Instruction *CondInPred = Cond->clone();
-
- // Reset the condition debug location to avoid jumping on dead code
- // as the result of folding dead branches.
- CondInPred->setDebugLoc(DebugLoc());
-
- RemapInstruction(CondInPred, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- PredBlock->getInstList().insert(PBI->getIterator(), CondInPred);
- CondInPred->takeName(Cond);
- Cond->setName(CondInPred->getName() + ".old");
-
- if (BI->isConditional()) {
- Instruction *NewCond = cast<Instruction>(
- Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond"));
- PBI->setCondition(NewCond);
-
- uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
- bool HasWeights =
- extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
- SuccTrueWeight, SuccFalseWeight);
- SmallVector<uint64_t, 8> NewWeights;
-
- if (PBI->getSuccessor(0) == BB) {
- if (HasWeights) {
- // PBI: br i1 %x, BB, FalseDest
- // BI: br i1 %y, TrueDest, FalseDest
- // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
- NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
- // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
- // TrueWeight for PBI * FalseWeight for BI.
- // We assume that total weights of a BranchInst can fit into 32 bits.
- // Therefore, we will not have overflow using 64-bit arithmetic.
- NewWeights.push_back(PredFalseWeight *
- (SuccFalseWeight + SuccTrueWeight) +
- PredTrueWeight * SuccFalseWeight);
- }
- AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU);
- PBI->setSuccessor(0, TrueDest);
- }
- if (PBI->getSuccessor(1) == BB) {
- if (HasWeights) {
- // PBI: br i1 %x, TrueDest, BB
- // BI: br i1 %y, TrueDest, FalseDest
- // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
- // FalseWeight for PBI * TrueWeight for BI.
- NewWeights.push_back(PredTrueWeight *
- (SuccFalseWeight + SuccTrueWeight) +
- PredFalseWeight * SuccTrueWeight);
- // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
- NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
- }
- AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU);
- PBI->setSuccessor(1, FalseDest);
- }
- if (NewWeights.size() == 2) {
- // Halve the weights if any of them cannot fit in an uint32_t
- FitWeights(NewWeights);
-
- SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),
- NewWeights.end());
- setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
- } else
- PBI->setMetadata(LLVMContext::MD_prof, nullptr);
- } else {
- // Update PHI nodes in the common successors.
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
- ConstantInt *PBI_C = cast<ConstantInt>(
- PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
- assert(PBI_C->getType()->isIntegerTy(1));
- Instruction *MergedCond = nullptr;
- if (PBI->getSuccessor(0) == TrueDest) {
- // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
- // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
- // is false: !PBI_Cond and BI_Value
- Instruction *NotCond = cast<Instruction>(
- Builder.CreateNot(PBI->getCondition(), "not.cond"));
- MergedCond = cast<Instruction>(
- Builder.CreateBinOp(Instruction::And, NotCond, CondInPred,
- "and.cond"));
- if (PBI_C->isOne())
- MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
- } else {
- // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
- // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
- // is false: PBI_Cond and BI_Value
- MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::And, PBI->getCondition(), CondInPred, "and.cond"));
- if (PBI_C->isOne()) {
- Instruction *NotCond = cast<Instruction>(
- Builder.CreateNot(PBI->getCondition(), "not.cond"));
- MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::Or, NotCond, MergedCond, "or.cond"));
- }
- }
- // Update PHI Node.
- PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond);
- }
-
- // PBI is changed to branch to TrueDest below. Remove itself from
- // potential phis from all other successors.
- if (MSSAU)
- MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest);
-
- // Change PBI from Conditional to Unconditional.
- BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
- EraseTerminatorAndDCECond(PBI, MSSAU);
- PBI = New_PBI;
- }
-
- // If BI was a loop latch, it may have had associated loop metadata.
- // We need to copy it to the new latch, that is, PBI.
- if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
- PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
-
- // TODO: If BB is reachable from all paths through PredBlock, then we
- // could replace PBI's branch probabilities with BI's.
-
- // Copy any debug value intrinsics into the end of PredBlock.
- for (Instruction &I : *BB) {
- if (isa<DbgInfoIntrinsic>(I)) {
- Instruction *NewI = I.clone();
- RemapInstruction(NewI, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- NewI->insertBefore(PBI);
- }
- }
-
- return Changed;
+ return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU);
}
return Changed;
}
@@ -3015,12 +3138,10 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
return PHI;
}
-static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
- BasicBlock *QTB, BasicBlock *QFB,
- BasicBlock *PostBB, Value *Address,
- bool InvertPCond, bool InvertQCond,
- const DataLayout &DL,
- const TargetTransformInfo &TTI) {
+static bool mergeConditionalStoreToAddress(
+ BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
+ BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
+ DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
// For every pointer, there must be exactly two stores, one coming from
// PTB or PFB, and the other from QTB or QFB. We don't support more than one
// store (to any address) in PTB,PFB or QTB,QFB.
@@ -3095,7 +3216,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
return true;
};
- const SmallVector<StoreInst *, 2> FreeStores = {PStore, QStore};
+ const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
if (!MergeCondStoresAggressively &&
(!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
!IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
@@ -3109,8 +3230,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
// If QTB does not exist, then QFB's only predecessor has a conditional
// branch to QFB and PostBB.
BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
- BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred},
- "condstore.split");
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
if (!NewBB)
return false;
PostBB = NewBB;
@@ -3139,8 +3260,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
QPred = QB.CreateNot(QPred);
Value *CombinedPred = QB.CreateOr(PPred, QPred);
- auto *T =
- SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false);
+ auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(),
+ /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
QB.SetInsertPoint(T);
StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
AAMDNodes AAMD;
@@ -3160,7 +3282,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
}
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
- const DataLayout &DL,
+ DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
// The intention here is to find diamonds or triangles (see below) where each
// conditional block contains a store to the same address. Both of these
@@ -3262,16 +3384,17 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
bool Changed = false;
for (auto *Address : CommonAddresses)
- Changed |= mergeConditionalStoreToAddress(
- PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI);
+ Changed |=
+ mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
+ InvertPCond, InvertQCond, DTU, DL, TTI);
return Changed;
}
-
/// If the previous block ended with a widenable branch, determine if reusing
/// the target block is profitable and legal. This will have the effect of
/// "widening" PBI, but doesn't require us to reason about hosting safety.
-static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+ DomTreeUpdater *DTU) {
// TODO: This can be generalized in two important ways:
// 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
// values from the PBI edge.
@@ -3294,15 +3417,25 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
NoSideEffects(*BI->getParent())) {
- BI->getSuccessor(1)->removePredecessor(BI->getParent());
+ auto *OldSuccessor = BI->getSuccessor(1);
+ OldSuccessor->removePredecessor(BI->getParent());
BI->setSuccessor(1, IfFalseBB);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
+ {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
return true;
}
if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
NoSideEffects(*BI->getParent())) {
- BI->getSuccessor(0)->removePredecessor(BI->getParent());
+ auto *OldSuccessor = BI->getSuccessor(0);
+ OldSuccessor->removePredecessor(BI->getParent());
BI->setSuccessor(0, IfFalseBB);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
+ {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
return true;
}
return false;
@@ -3313,6 +3446,7 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
/// that PBI and BI are both conditional branches, and BI is in one of the
/// successor blocks of PBI - PBI branches to BI.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+ DomTreeUpdater *DTU,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
assert(PBI->isConditional() && BI->isConditional());
@@ -3366,7 +3500,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// If the previous block ended with a widenable branch, determine if reusing
// the target block is profitable and legal. This will have the effect of
// "widening" PBI, but doesn't require us to reason about hosting safety.
- if (tryWidenCondBranchToCondBranch(PBI, BI))
+ if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
return true;
if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
@@ -3376,7 +3510,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
// merged store at the end.
- if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI))
+ if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
return true;
// If this is a conditional branch in an empty block, and if any
@@ -3419,6 +3553,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// case, it would be unsafe to hoist the operation into a select instruction.
BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+ BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
unsigned NumPhis = 0;
for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
++II, ++NumPhis) {
@@ -3444,6 +3579,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
<< "AND: " << *BI->getParent());
+ SmallVector<DominatorTree::UpdateType, 5> Updates;
+
// If OtherDest *is* BB, then BB is a basic block with a single conditional
// branch in it, where one edge (OtherDest) goes back to itself but the other
// exits. We don't *know* that the program avoids the infinite loop
@@ -3457,6 +3594,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
BasicBlock *InfLoopBlock =
BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
OtherDest = InfLoopBlock;
}
@@ -3483,6 +3621,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
PBI->setSuccessor(0, CommonDest);
PBI->setSuccessor(1, OtherDest);
+ Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
+ Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
// Update branch weight for PBI.
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
@@ -3562,6 +3706,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
BasicBlock *FalseBB,
uint32_t TrueWeight,
uint32_t FalseWeight) {
+ auto *BB = OldTerm->getParent();
// Remove any superfluous successor edges from the CFG.
// First, figure out which successors to preserve.
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
@@ -3569,6 +3714,8 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
BasicBlock *KeepEdge1 = TrueBB;
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
+ SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
+
// Then remove the rest.
for (BasicBlock *Succ : successors(OldTerm)) {
// Make sure only to keep exactly one copy of each edge.
@@ -3576,9 +3723,13 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
KeepEdge1 = nullptr;
else if (Succ == KeepEdge2)
KeepEdge2 = nullptr;
- else
- Succ->removePredecessor(OldTerm->getParent(),
+ else {
+ Succ->removePredecessor(BB,
/*KeepOneInputPHIs=*/true);
+
+ if (Succ != TrueBB && Succ != FalseBB)
+ RemovedSuccessors.insert(Succ);
+ }
}
IRBuilder<> Builder(OldTerm);
@@ -3586,11 +3737,11 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
// Insert an appropriate new terminator.
if (!KeepEdge1 && !KeepEdge2) {
- if (TrueBB == FalseBB)
+ if (TrueBB == FalseBB) {
// We were only looking for one successor, and it was present.
// Create an unconditional branch to it.
Builder.CreateBr(TrueBB);
- else {
+ } else {
// We found both of the successors we were looking for.
// Create a conditional branch sharing the condition of the select.
BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
@@ -3605,15 +3756,25 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
// One of the selected values was a successor, but the other wasn't.
// Insert an unconditional branch to the one that was found;
// the edge to the one that wasn't must be unreachable.
- if (!KeepEdge1)
+ if (!KeepEdge1) {
// Only TrueBB was found.
Builder.CreateBr(TrueBB);
- else
+ } else {
// Only FalseBB was found.
Builder.CreateBr(FalseBB);
+ }
}
EraseTerminatorAndDCECond(OldTerm);
+
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.reserve(RemovedSuccessors.size());
+ for (auto *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
+
return true;
}
@@ -3768,6 +3929,8 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(DefaultCst);
ICI->eraseFromParent();
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+
// Okay, the switch goes to this block on a default value. Add an edge from
// the switch to the merge point on the compared value.
BasicBlock *NewBB =
@@ -3781,13 +3944,17 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
SIW.setSuccessorWeight(0, *NewW);
}
SIW.addCase(Cst, NewBB, NewW);
+ Updates.push_back({DominatorTree::Insert, Pred, NewBB});
}
// NewBB branches to the phi block, add the uncond branch and the phi entry.
Builder.SetInsertPoint(NewBB);
Builder.SetCurrentDebugLocation(SI->getDebugLoc());
Builder.CreateBr(SuccBlock);
+ Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
PHIUse->addIncoming(NewCst, NewBB);
+ if (DTU)
+ DTU->applyUpdates(Updates);
return true;
}
@@ -3821,7 +3988,7 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
if (UsedICmps <= 1)
return false;
- bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or);
+ bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
// There might be duplicate constants in the list, which the switch
// instruction can't handle, remove them now.
@@ -3853,12 +4020,15 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
<< " cases into SWITCH. BB is:\n"
<< *BB);
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+
// If there are any extra values that couldn't be folded into the switch
// then we evaluate them with an explicit branch first. Split the block
// right before the condbr to handle it.
if (ExtraCase) {
- BasicBlock *NewBB =
- BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
+ BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
+ /*MSSAU=*/nullptr, "switch.early.test");
+
// Remove the uncond branch added to the old block.
Instruction *OldTI = BB->getTerminator();
Builder.SetInsertPoint(OldTI);
@@ -3870,6 +4040,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
OldTI->eraseFromParent();
+ Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
+
// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
// for the edge we just added.
AddPredecessorToBlock(EdgeBB, BB, NewBB);
@@ -3905,6 +4077,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
// Erase the old branch instruction.
EraseTerminatorAndDCECond(BI);
+ if (DTU)
+ DTU->applyUpdates(Updates);
LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
return true;
@@ -3921,17 +4095,36 @@ bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return false;
}
+// Check if cleanup block is empty
+static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
+ for (Instruction &I : R) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II)
+ return false;
+
+ Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
+ case Intrinsic::lifetime_end:
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
// Simplify resume that is shared by several landing pads (phi of landing pad).
bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
- // Check that there are no other instructions except for debug intrinsics
- // between the phi of landing pads (RI->getValue()) and resume instruction.
- BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
- E = RI->getIterator();
- while (++I != E)
- if (!isa<DbgInfoIntrinsic>(I))
- return false;
+ // Check that there are no other instructions except for debug and lifetime
+ // intrinsics between the phi's and resume instruction.
+ if (!isCleanupBlockEmpty(
+ make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
+ return false;
SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
auto *PhiLPInst = cast<PHINode>(RI->getValue());
@@ -3952,17 +4145,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
if (IncomingValue != LandingPad)
continue;
- bool isTrivial = true;
-
- I = IncomingBB->getFirstNonPHI()->getIterator();
- E = IncomingBB->getTerminator()->getIterator();
- while (++I != E)
- if (!isa<DbgInfoIntrinsic>(I)) {
- isTrivial = false;
- break;
- }
-
- if (isTrivial)
+ if (isCleanupBlockEmpty(
+ make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
TrivialUnwindBlocks.insert(IncomingBB);
}
@@ -3981,7 +4165,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
PI != PE;) {
BasicBlock *Pred = *PI++;
- removeUnwindEdge(Pred);
+ removeUnwindEdge(Pred, DTU);
+ ++NumInvokes;
}
// In each SimplifyCFG run, only the current processed block can be erased.
@@ -3991,37 +4176,21 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
// predecessors.
TrivialBB->getTerminator()->eraseFromParent();
new UnreachableInst(RI->getContext(), TrivialBB);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
}
// Delete the resume block if all its predecessors have been removed.
- if (pred_empty(BB))
- BB->eraseFromParent();
+ if (pred_empty(BB)) {
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
+ }
return !TrivialUnwindBlocks.empty();
}
-// Check if cleanup block is empty
-static bool isCleanupBlockEmpty(Instruction *Inst, Instruction *RI) {
- BasicBlock::iterator I = Inst->getIterator(), E = RI->getIterator();
- while (++I != E) {
- auto *II = dyn_cast<IntrinsicInst>(I);
- if (!II)
- return false;
-
- Intrinsic::ID IntrinsicID = II->getIntrinsicID();
- switch (IntrinsicID) {
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::dbg_label:
- case Intrinsic::lifetime_end:
- break;
- default:
- return false;
- }
- }
- return true;
-}
-
// Simplify resume that is only used by a single (non-phi) landing pad.
bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
@@ -4030,23 +4199,26 @@ bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
"Resume must unwind the exception that caused control to here");
// Check that there are no other instructions except for debug intrinsics.
- if (!isCleanupBlockEmpty(LPInst, RI))
+ if (!isCleanupBlockEmpty(
+ make_range<Instruction *>(LPInst->getNextNode(), RI)))
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
BasicBlock *Pred = *PI++;
- removeUnwindEdge(Pred);
+ removeUnwindEdge(Pred, DTU);
+ ++NumInvokes;
}
// The landingpad is now unreachable. Zap it.
- if (LoopHeaders)
- LoopHeaders->erase(BB);
- BB->eraseFromParent();
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
return true;
}
-static bool removeEmptyCleanup(CleanupReturnInst *RI) {
+static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
// If this is a trivial cleanup pad that executes no instructions, it can be
// eliminated. If the cleanup pad continues to the caller, any predecessor
// that is an EH pad will be updated to continue to the caller and any
@@ -4067,7 +4239,8 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
return false;
// Check that there are no other instructions except for benign intrinsics.
- if (!isCleanupBlockEmpty(CPInst, RI))
+ if (!isCleanupBlockEmpty(
+ make_range<Instruction *>(CPInst->getNextNode(), RI)))
return false;
// If the cleanup return we are simplifying unwinds to the caller, this will
@@ -4152,19 +4325,32 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
}
}
+ std::vector<DominatorTree::UpdateType> Updates;
+
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
// The iterator must be updated here because we are removing this pred.
BasicBlock *PredBB = *PI++;
if (UnwindDest == nullptr) {
- removeUnwindEdge(PredBB);
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ removeUnwindEdge(PredBB, DTU);
+ ++NumInvokes;
} else {
Instruction *TI = PredBB->getTerminator();
TI->replaceUsesOfWith(BB, UnwindDest);
+ Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
}
- // The cleanup pad is now unreachable. Zap it.
- BB->eraseFromParent();
+ if (DTU) {
+ DTU->applyUpdates(Updates);
+ DTU->deleteBB(BB);
+ } else
+ // The cleanup pad is now unreachable. Zap it.
+ BB->eraseFromParent();
+
return true;
}
@@ -4211,7 +4397,7 @@ bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
if (mergeCleanupPad(RI))
return true;
- if (removeEmptyCleanup(RI))
+ if (removeEmptyCleanup(RI, DTU))
return true;
return false;
@@ -4242,15 +4428,16 @@ bool SimplifyCFGOpt::simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *Pred = UncondBranchPreds.pop_back_val();
LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
<< "INTO UNCOND BRANCH PRED: " << *Pred);
- (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+ (void)FoldReturnIntoUncondBranch(RI, BB, Pred, DTU);
}
// If we eliminated all predecessors of the block, delete the block now.
if (pred_empty(BB)) {
// We know there are no successors, so just nuke the block.
- if (LoopHeaders)
- LoopHeaders->erase(BB);
- BB->eraseFromParent();
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
}
return true;
@@ -4330,18 +4517,26 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
if (&BB->front() != UI)
return Changed;
- SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
+ std::vector<DominatorTree::UpdateType> Updates;
+
+ SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- Instruction *TI = Preds[i]->getTerminator();
+ auto *Predecessor = Preds[i];
+ Instruction *TI = Predecessor->getTerminator();
IRBuilder<> Builder(TI);
if (auto *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isUnconditional()) {
- assert(BI->getSuccessor(0) == BB && "Incorrect CFG");
+ // We could either have a proper unconditional branch,
+ // or a degenerate conditional branch with matching destinations.
+ if (all_of(BI->successors(),
+ [BB](auto *Successor) { return Successor == BB; })) {
new UnreachableInst(TI->getContext(), TI);
TI->eraseFromParent();
Changed = true;
} else {
+ assert(BI->isConditional() && "Can't get here with an uncond branch.");
Value* Cond = BI->getCondition();
+ assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
+ "The destinations are guaranteed to be different here.");
if (BI->getSuccessor(0) == BB) {
Builder.CreateAssumption(Builder.CreateNot(Cond));
Builder.CreateBr(BI->getSuccessor(1));
@@ -4353,6 +4548,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
EraseTerminatorAndDCECond(BI);
Changed = true;
}
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
SwitchInstProfUpdateWrapper SU(*SI);
for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
@@ -4365,14 +4561,23 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
e = SU->case_end();
Changed = true;
}
+ // Note that the default destination can't be removed!
+ if (SI->getDefaultDest() != BB)
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
} else if (auto *II = dyn_cast<InvokeInst>(TI)) {
if (II->getUnwindDest() == BB) {
- removeUnwindEdge(TI->getParent());
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ removeUnwindEdge(TI->getParent(), DTU);
Changed = true;
}
} else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
if (CSI->getUnwindDest() == BB) {
- removeUnwindEdge(TI->getParent());
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ removeUnwindEdge(TI->getParent(), DTU);
Changed = true;
continue;
}
@@ -4387,35 +4592,53 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
Changed = true;
}
}
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
if (CSI->getNumHandlers() == 0) {
- BasicBlock *CatchSwitchBB = CSI->getParent();
if (CSI->hasUnwindDest()) {
- // Redirect preds to the unwind dest
- CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest());
+ // Redirect all predecessors of the block containing CatchSwitchInst
+ // to instead branch to the CatchSwitchInst's unwind destination.
+ for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
+ Updates.push_back({DominatorTree::Insert, PredecessorOfPredecessor,
+ CSI->getUnwindDest()});
+ Updates.push_back(
+ {DominatorTree::Delete, PredecessorOfPredecessor, Predecessor});
+ }
+ Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
} else {
// Rewrite all preds to unwind to caller (or from invoke to call).
- SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB));
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
for (BasicBlock *EHPred : EHPreds)
- removeUnwindEdge(EHPred);
+ removeUnwindEdge(EHPred, DTU);
}
// The catchswitch is no longer reachable.
new UnreachableInst(CSI->getContext(), CSI);
CSI->eraseFromParent();
Changed = true;
}
- } else if (isa<CleanupReturnInst>(TI)) {
+ } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+ (void)CRI;
+ assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
+ "Expected to always have an unwind to BB.");
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
new UnreachableInst(TI->getContext(), TI);
TI->eraseFromParent();
Changed = true;
}
}
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
// If this block is now dead, remove it.
if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
- if (LoopHeaders)
- LoopHeaders->erase(BB);
- BB->eraseFromParent();
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
return true;
}
@@ -4433,15 +4656,26 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
return true;
}
-static void createUnreachableSwitchDefault(SwitchInst *Switch) {
+static void createUnreachableSwitchDefault(SwitchInst *Switch,
+ DomTreeUpdater *DTU) {
LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- BasicBlock *NewDefaultBlock =
- SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), "");
+ auto *BB = Switch->getParent();
+ BasicBlock *NewDefaultBlock = SplitBlockPredecessors(
+ Switch->getDefaultDest(), Switch->getParent(), "", DTU);
+ auto *OrigDefaultBlock = Switch->getDefaultDest();
Switch->setDefaultDest(&*NewDefaultBlock);
- SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front());
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, &*NewDefaultBlock},
+ {DominatorTree::Delete, BB, OrigDefaultBlock}});
+ SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU);
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ for (auto *Successor : successors(NewDefaultBlock))
+ Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor});
auto *NewTerminator = NewDefaultBlock->getTerminator();
new UnreachableInst(Switch->getContext(), NewTerminator);
EraseTerminatorAndDCECond(NewTerminator);
+ if (DTU)
+ DTU->applyUpdates(Updates);
}
/// Turn a switch with two reachable destinations into an integer range
@@ -4453,6 +4687,8 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
bool HasDefault =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ auto *BB = SI->getParent();
+
// Partition the cases into two sets with different destinations.
BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
BasicBlock *DestB = nullptr;
@@ -4556,17 +4792,23 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
// Clean up the default block - it may have phis or other instructions before
// the unreachable terminator.
if (!HasDefault)
- createUnreachableSwitchDefault(SI);
+ createUnreachableSwitchDefault(SI, DTU);
+
+ auto *UnreachableDefault = SI->getDefaultDest();
// Drop the switch.
SI->eraseFromParent();
+ if (!HasDefault && DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
+
return true;
}
/// Compute masked bits for the condition of a switch
/// and use it to remove dead cases.
-static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
+static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
+ AssumptionCache *AC,
const DataLayout &DL) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
@@ -4580,11 +4822,15 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
// Gather dead cases.
SmallVector<ConstantInt *, 8> DeadCases;
+ SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases;
for (auto &Case : SI->cases()) {
+ auto *Successor = Case.getCaseSuccessor();
+ ++NumPerSuccessorCases[Successor];
const APInt &CaseVal = Case.getCaseValue()->getValue();
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
DeadCases.push_back(Case.getCaseValue());
+ --NumPerSuccessorCases[Successor];
LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
<< " is dead.\n");
}
@@ -4602,7 +4848,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
SI->getNumCases() == (1ULL << NumUnknownBits)) {
- createUnreachableSwitchDefault(SI);
+ createUnreachableSwitchDefault(SI, DTU);
return true;
}
@@ -4619,6 +4865,13 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
SIW.removeCase(CaseI);
}
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first});
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
return true;
}
@@ -4974,30 +5227,41 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
// a select, fixing up PHI nodes and basic blocks.
static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
Value *SelectValue,
- IRBuilder<> &Builder) {
+ IRBuilder<> &Builder,
+ DomTreeUpdater *DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+
BasicBlock *SelectBB = SI->getParent();
+ BasicBlock *DestBB = PHI->getParent();
+
+ if (!is_contained(predecessors(DestBB), SelectBB))
+ Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
+ Builder.CreateBr(DestBB);
+
+ // Remove the switch.
+
while (PHI->getBasicBlockIndex(SelectBB) >= 0)
PHI->removeIncomingValue(SelectBB);
PHI->addIncoming(SelectValue, SelectBB);
- Builder.CreateBr(PHI->getParent());
-
- // Remove the switch.
for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
- if (Succ == PHI->getParent())
+ if (Succ == DestBB)
continue;
Succ->removePredecessor(SelectBB);
+ Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
}
SI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates(Updates);
}
/// If the switch is only used to initialize one or more
/// phi nodes in a common successor block with only two different
/// constant values, replace the switch with select.
static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout &DL,
+ DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
Value *const Cond = SI->getCondition();
PHINode *PHI = nullptr;
@@ -5017,7 +5281,7 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
Value *SelectValue =
ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
if (SelectValue) {
- RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder);
+ RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU);
return true;
}
// The switch couldn't be converted into a select.
@@ -5402,11 +5666,12 @@ static void reuseTableCompare(
/// successor block with different constant values, replace the switch with
/// lookup tables.
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout &DL,
+ DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
- Function *Fn = SI->getParent()->getParent();
+ BasicBlock *BB = SI->getParent();
+ Function *Fn = BB->getParent();
// Only build lookup table when we have a target that supports it or the
// attribute is not set.
if (!TTI.shouldBuildLookupTables() ||
@@ -5500,6 +5765,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
return false;
+ std::vector<DominatorTree::UpdateType> Updates;
+
// Create the BB that does the lookups.
Module &Mod = *CommonDest->getParent()->getParent();
BasicBlock *LookupBB = BasicBlock::Create(
@@ -5532,6 +5799,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
+ Updates.push_back({DominatorTree::Insert, BB, LookupBB});
// Note: We call removeProdecessor later since we need to be able to get the
// PHI value for the default case in case we're using a bit mask.
} else {
@@ -5539,6 +5807,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
RangeCheckBranch =
Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ Updates.push_back({DominatorTree::Insert, BB, LookupBB});
}
// Populate the BB that does the lookups.
@@ -5576,16 +5845,18 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
Value *LoBit = Builder.CreateTrunc(
Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
-
+ Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
+ Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
Builder.SetInsertPoint(LookupBB);
- AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
+ AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
}
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
// We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
// do not delete PHINodes here.
- SI->getDefaultDest()->removePredecessor(SI->getParent(),
+ SI->getDefaultDest()->removePredecessor(BB,
/*KeepOneInputPHIs=*/true);
+ Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
}
bool ReturnedEarly = false;
@@ -5622,19 +5893,29 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
PHI->addIncoming(Result, LookupBB);
}
- if (!ReturnedEarly)
+ if (!ReturnedEarly) {
Builder.CreateBr(CommonDest);
+ Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
+ }
// Remove the switch.
+ SmallSetVector<BasicBlock *, 8> RemovedSuccessors;
for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
if (Succ == SI->getDefaultDest())
continue;
- Succ->removePredecessor(SI->getParent());
+ Succ->removePredecessor(BB);
+ RemovedSuccessors.insert(Succ);
}
SI->eraseFromParent();
+ if (DTU) {
+ for (BasicBlock *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
+
++NumLookupTables;
if (NeedMask)
++NumLookupTablesHoles;
@@ -5770,10 +6051,10 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
return requestResimplify();
// Remove unreachable cases.
- if (eliminateDeadSwitchCases(SI, Options.AC, DL))
+ if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
return requestResimplify();
- if (switchToSelect(SI, Builder, DL, TTI))
+ if (switchToSelect(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
@@ -5785,7 +6066,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// CVP. Therefore, only apply this transformation during late stages of the
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
- SwitchToLookupTable(SI, Builder, DL, TTI))
+ SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (ReduceSwitchRange(SI, Builder, DL, TTI))
@@ -5800,9 +6081,12 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
// Eliminate redundant destinations.
SmallPtrSet<Value *, 8> Succs;
+ SmallSetVector<BasicBlock *, 8> RemovedSuccs;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
BasicBlock *Dest = IBI->getDestination(i);
if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
+ if (!Dest->hasAddressTaken())
+ RemovedSuccs.insert(Dest);
Dest->removePredecessor(BB);
IBI->removeDestination(i);
--i;
@@ -5811,6 +6095,14 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
}
}
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(RemovedSuccs.size());
+ for (auto *RemovedSucc : RemovedSuccs)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
+ DTU->applyUpdates(Updates);
+ }
+
if (IBI->getNumDestinations() == 0) {
// If the indirectbr has no successors, change it to unreachable.
new UnreachableInst(IBI->getContext(), IBI);
@@ -5854,7 +6146,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
/// block when the inputs in the phi are the same for the two blocks being
/// merged. In some cases, this could result in removal of the PHI entirely.
static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
- BasicBlock *BB) {
+ BasicBlock *BB, DomTreeUpdater *DTU) {
auto Succ = BB->getUniqueSuccessor();
assert(Succ);
// If there's a phi in the successor block, we'd likely have to introduce
@@ -5875,6 +6167,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
if (!BI2 || !BI2->isIdenticalTo(BI))
continue;
+ std::vector<DominatorTree::UpdateType> Updates;
+
// We've found an identical block. Update our predecessors to take that
// path instead and make ourselves dead.
SmallPtrSet<BasicBlock *, 16> Preds;
@@ -5884,6 +6178,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
"unexpected successor");
II->setUnwindDest(OtherPred);
+ Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
+ Updates.push_back({DominatorTree::Delete, Pred, BB});
}
// The debug info in OtherPred doesn't cover the merged control flow that
@@ -5899,11 +6195,14 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
Succs.insert(succ_begin(BB), succ_end(BB));
for (BasicBlock *Succ : Succs) {
Succ->removePredecessor(BB);
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
IRBuilder<> Builder(BI);
Builder.CreateUnreachable();
BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates(Updates);
return true;
}
return false;
@@ -5928,11 +6227,11 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
// backedge, so we can eliminate BB.
bool NeedCanonicalLoop =
Options.NeedCanonicalLoop &&
- (LoopHeaders && BB->hasNPredecessorsOrMore(2) &&
- (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
+ (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
+ (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
- !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
return true;
// If the only instruction in the block is a seteq/setne comparison against a
@@ -5951,7 +6250,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
- if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB))
+ if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
return true;
}
@@ -5959,7 +6258,8 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ Options.BonusInstThreshold))
return requestResimplify();
return false;
}
@@ -6022,7 +6322,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ Options.BonusInstThreshold))
return requestResimplify();
// We have a conditional branch to two blocks that are only reachable
@@ -6031,8 +6332,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistThenElseCodeToIf(BI, TTI))
- return requestResimplify();
+ if (HoistCommon && Options.HoistCommonInsts)
+ if (HoistThenElseCodeToIf(BI, TTI))
+ return requestResimplify();
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
@@ -6056,14 +6358,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, DL, Options.AC))
+ if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC))
return requestResimplify();
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI))
+ if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
return requestResimplify();
// Look for diamond patterns.
@@ -6071,14 +6373,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (mergeConditionalStores(PBI, BI, DL, TTI))
+ if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
return requestResimplify();
return false;
}
/// Check if passing a value to an instruction will cause undefined behavior.
-static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
Constant *C = dyn_cast<Constant>(V);
if (!C)
return false;
@@ -6101,12 +6403,15 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
// Look through GEPs. A load from a GEP derived from NULL is still undefined
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
- if (GEP->getPointerOperand() == I)
- return passingValueIsAlwaysUndefined(V, GEP);
+ if (GEP->getPointerOperand() == I) {
+ if (!GEP->isInBounds() || !GEP->hasAllZeroIndices())
+ PtrValueMayBeModified = true;
+ return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
+ }
// Look through bitcasts.
if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
- return passingValueIsAlwaysUndefined(V, BC);
+ return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
// Load from null is undefined.
if (LoadInst *LI = dyn_cast<LoadInst>(Use))
@@ -6121,24 +6426,51 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
SI->getPointerAddressSpace())) &&
SI->getPointerOperand() == I;
- // A call to null is undefined.
- if (auto *CB = dyn_cast<CallBase>(Use))
- return !NullPointerIsDefined(CB->getFunction()) &&
- CB->getCalledOperand() == I;
+ if (auto *CB = dyn_cast<CallBase>(Use)) {
+ if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
+ return false;
+ // A call to null is undefined.
+ if (CB->getCalledOperand() == I)
+ return true;
+
+ if (C->isNullValue()) {
+ for (const llvm::Use &Arg : CB->args())
+ if (Arg == I) {
+ unsigned ArgIdx = CB->getArgOperandNo(&Arg);
+ if (CB->paramHasAttr(ArgIdx, Attribute::NonNull) &&
+ CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) {
+ // Passing null to a nonnnull+noundef argument is undefined.
+ return !PtrValueMayBeModified;
+ }
+ }
+ } else if (isa<UndefValue>(C)) {
+ // Passing undef to a noundef argument is undefined.
+ for (const llvm::Use &Arg : CB->args())
+ if (Arg == I) {
+ unsigned ArgIdx = CB->getArgOperandNo(&Arg);
+ if (CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) {
+ // Passing undef to a noundef argument is undefined.
+ return true;
+ }
+ }
+ }
+ }
}
return false;
}
/// If BB has an incoming value that will always trigger undefined behavior
/// (eg. null pointer dereference), remove the branch leading here.
-static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
+static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
+ DomTreeUpdater *DTU) {
for (PHINode &PHI : BB->phis())
for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
- Instruction *T = PHI.getIncomingBlock(i)->getTerminator();
+ BasicBlock *Predecessor = PHI.getIncomingBlock(i);
+ Instruction *T = Predecessor->getTerminator();
IRBuilder<> Builder(T);
if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
- BB->removePredecessor(PHI.getIncomingBlock(i));
+ BB->removePredecessor(Predecessor);
// Turn uncoditional branches into unreachables and remove the dead
// destination from conditional branches.
if (BI->isUnconditional())
@@ -6147,6 +6479,8 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
: BI->getSuccessor(0));
BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
return true;
}
// TODO: SwitchInst.
@@ -6155,7 +6489,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
return false;
}
-bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
+bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
bool Changed = false;
assert(BB && BB->getParent() && "Block not embedded in function!");
@@ -6166,28 +6500,29 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
BB->getSinglePredecessor() == BB) {
LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
- DeleteDeadBlock(BB);
+ DeleteDeadBlock(BB, DTU);
return true;
}
// Check to see if we can constant propagate this terminator instruction
// away...
- Changed |= ConstantFoldTerminator(BB, true);
+ Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
+ /*TLI=*/nullptr, DTU);
// Check for and eliminate duplicate PHI nodes in this block.
Changed |= EliminateDuplicatePHINodes(BB);
// Check for and remove branches that will always cause undefined behavior.
- Changed |= removeUndefIntroducingPredecessor(BB);
+ Changed |= removeUndefIntroducingPredecessor(BB, DTU);
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
- if (MergeBlockIntoPredecessor(BB))
+ if (MergeBlockIntoPredecessor(BB, DTU))
return true;
if (SinkCommon && Options.SinkCommonInsts)
- Changed |= SinkCommonCodeFromPredecessors(BB);
+ Changed |= SinkCommonCodeFromPredecessors(BB, DTU);
IRBuilder<> Builder(BB);
@@ -6196,7 +6531,7 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
// eliminate it, do so now.
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
+ Changed |= FoldTwoEntryPHINode(PN, TTI, DTU, DL);
}
Instruction *Terminator = BB->getTerminator();
@@ -6228,7 +6563,23 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
return Changed;
}
+bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
+ bool Changed = simplifyOnceImpl(BB);
+
+ assert((!RequireAndPreserveDomTree ||
+ (DTU &&
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
+ "Failed to maintain validity of domtree!");
+
+ return Changed;
+}
+
bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ assert((!RequireAndPreserveDomTree ||
+ (DTU &&
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
+ "Original domtree is invalid?");
+
bool Changed = false;
// Repeated simplify BB as long as resimplification is requested.
@@ -6244,9 +6595,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
}
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- const SimplifyCFGOptions &Options,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
- return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders,
- Options)
+ DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
+ ArrayRef<WeakVH> LoopHeaders) {
+ return SimplifyCFGOpt(TTI, RequireAndPreserveDomTree ? DTU : nullptr,
+ BB->getModule()->getDataLayout(), LoopHeaders, Options)
.run(BB);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index d3d0c3341908..290c04a7ad10 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -191,15 +191,15 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
- ICmpInst::Predicate InvariantPredicate;
- const SCEV *InvariantLHS, *InvariantRHS;
-
auto *PN = dyn_cast<PHINode>(IVOperand);
if (!PN)
return false;
- if (!SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate,
- InvariantLHS, InvariantRHS))
+ auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L);
+ if (!LIP)
return false;
+ ICmpInst::Predicate InvariantPredicate = LIP->Pred;
+ const SCEV *InvariantLHS = LIP->LHS;
+ const SCEV *InvariantRHS = LIP->RHS;
// Rewrite the comparison to a loop invariant comparison if it can be done
// cheaply, where cheaply means "we don't need to emit any new
@@ -477,6 +477,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
if (WO->use_empty())
WO->eraseFromParent();
+ Changed = true;
return true;
}
@@ -967,3 +968,1122 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
}
} // namespace llvm
+
+//===----------------------------------------------------------------------===//
+// Widen Induction Variables - Extend the width of an IV to cover its
+// widest uses.
+//===----------------------------------------------------------------------===//
+
+class WidenIV {
+ // Parameters
+ PHINode *OrigPhi;
+ Type *WideType;
+
+ // Context
+ LoopInfo *LI;
+ Loop *L;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+
+ // Does the module have any calls to the llvm.experimental.guard intrinsic
+ // at all? If not we can avoid scanning instructions looking for guards.
+ bool HasGuards;
+
+ bool UsePostIncrementRanges;
+
+ // Statistics
+ unsigned NumElimExt = 0;
+ unsigned NumWidened = 0;
+
+ // Result
+ PHINode *WidePhi = nullptr;
+ Instruction *WideInc = nullptr;
+ const SCEV *WideIncExpr = nullptr;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts;
+
+ SmallPtrSet<Instruction *,16> Widened;
+
+ enum ExtendKind { ZeroExtended, SignExtended, Unknown };
+
+ // A map tracking the kind of extension used to widen each narrow IV
+ // and narrow IV user.
+ // Key: pointer to a narrow IV or IV user.
+ // Value: the kind of extension used to widen this Instruction.
+ DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
+
+ using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
+
+ // A map with control-dependent ranges for post increment IV uses. The key is
+ // a pair of IV def and a use of this def denoting the context. The value is
+ // a ConstantRange representing possible values of the def at the given
+ // context.
+ DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
+
+ Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
+ Instruction *UseI) {
+ DefUserPair Key(Def, UseI);
+ auto It = PostIncRangeInfos.find(Key);
+ return It == PostIncRangeInfos.end()
+ ? Optional<ConstantRange>(None)
+ : Optional<ConstantRange>(It->second);
+ }
+
+ void calculatePostIncRanges(PHINode *OrigPhi);
+ void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
+
+ void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
+ DefUserPair Key(Def, UseI);
+ auto It = PostIncRangeInfos.find(Key);
+ if (It == PostIncRangeInfos.end())
+ PostIncRangeInfos.insert({Key, R});
+ else
+ It->second = R.intersectWith(It->second);
+ }
+
+public:
+ /// Record a link in the Narrow IV def-use chain along with the WideIV that
+ /// computes the same value as the Narrow IV def. This avoids caching Use*
+ /// pointers.
+ struct NarrowIVDefUse {
+ Instruction *NarrowDef = nullptr;
+ Instruction *NarrowUse = nullptr;
+ Instruction *WideDef = nullptr;
+
+ // True if the narrow def is never negative. Tracking this information lets
+ // us use a sign extension instead of a zero extension or vice versa, when
+ // profitable and legal.
+ bool NeverNegative = false;
+
+ NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
+ bool NeverNegative)
+ : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
+ NeverNegative(NeverNegative) {}
+ };
+
+ WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+ DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+ bool HasGuards, bool UsePostIncrementRanges = true);
+
+ PHINode *createWideIV(SCEVExpander &Rewriter);
+
+ unsigned getNumElimExt() { return NumElimExt; };
+ unsigned getNumWidened() { return NumWidened; };
+
+protected:
+ Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
+
+ Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
+ Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR);
+ Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
+
+ ExtendKind getExtendKind(Instruction *I);
+
+ using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
+
+ WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
+
+ WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
+
+ const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ unsigned OpCode) const;
+
+ Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
+
+ bool widenLoopCompare(NarrowIVDefUse DU);
+ bool widenWithVariantUse(NarrowIVDefUse DU);
+
+ void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
+
+private:
+ SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
+};
+
+
+/// Determine the insertion point for this user. By default, insert immediately
+/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
+/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
+/// common dominator for the incoming blocks. A nullptr can be returned if no
+/// viable location is found: it may happen if User is a PHI and Def only comes
+/// to this PHI from unreachable blocks.
+static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
+ DominatorTree *DT, LoopInfo *LI) {
+ PHINode *PHI = dyn_cast<PHINode>(User);
+ if (!PHI)
+ return User;
+
+ Instruction *InsertPt = nullptr;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+ if (PHI->getIncomingValue(i) != Def)
+ continue;
+
+ BasicBlock *InsertBB = PHI->getIncomingBlock(i);
+
+ if (!DT->isReachableFromEntry(InsertBB))
+ continue;
+
+ if (!InsertPt) {
+ InsertPt = InsertBB->getTerminator();
+ continue;
+ }
+ InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
+ InsertPt = InsertBB->getTerminator();
+ }
+
+ // If we have skipped all inputs, it means that Def only comes to Phi from
+ // unreachable blocks.
+ if (!InsertPt)
+ return nullptr;
+
+ auto *DefI = dyn_cast<Instruction>(Def);
+ if (!DefI)
+ return InsertPt;
+
+ assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
+
+ auto *L = LI->getLoopFor(DefI->getParent());
+ assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
+
+ for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
+ if (LI->getLoopFor(DTN->getBlock()) == L)
+ return DTN->getBlock()->getTerminator();
+
+ llvm_unreachable("DefI dominates InsertPt!");
+}
+
+WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+ DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+ bool HasGuards, bool UsePostIncrementRanges)
+ : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
+ L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
+ HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges),
+ DeadInsts(DI) {
+ assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
+ ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
+}
+
+Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
+ bool IsSigned, Instruction *Use) {
+ // Set the debug location and conservative insertion point.
+ IRBuilder<> Builder(Use);
+ // Hoist the insertion point into loop preheaders as far as possible.
+ for (const Loop *L = LI->getLoopFor(Use->getParent());
+ L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
+ L = L->getParentLoop())
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+
+ return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
+ Builder.CreateZExt(NarrowOper, WideType);
+}
+
+/// Instantiate a wide operation to replace a narrow operation. This only needs
+/// to handle operations that can evaluation to SCEVAddRec. It can safely return
+/// 0 for any operation we decide not to clone.
+Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
+ unsigned Opcode = DU.NarrowUse->getOpcode();
+ switch (Opcode) {
+ default:
+ return nullptr;
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::Sub:
+ return cloneArithmeticIVUser(DU, WideAR);
+
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return cloneBitwiseIVUser(DU);
+ }
+}
+
+Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
+ // about the narrow operand yet so must insert a [sz]ext. It is probably loop
+ // invariant and will be folded or hoisted. If it actually comes from a
+ // widened IV, it should be removed during a future call to widenIVUse.
+ bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ IsSigned, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ IsSigned, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
+}
+
+Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
+
+ // We're trying to find X such that
+ //
+ // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
+ //
+ // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
+ // and check using SCEV if any of them are correct.
+
+ // Returns true if extending NonIVNarrowDef according to `SignExt` is a
+ // correct solution to X.
+ auto GuessNonIVOperand = [&](bool SignExt) {
+ const SCEV *WideLHS;
+ const SCEV *WideRHS;
+
+ auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
+ if (SignExt)
+ return SE->getSignExtendExpr(S, Ty);
+ return SE->getZeroExtendExpr(S, Ty);
+ };
+
+ if (IVOpIdx == 0) {
+ WideLHS = SE->getSCEV(WideDef);
+ const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
+ WideRHS = GetExtend(NarrowRHS, WideType);
+ } else {
+ const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
+ WideLHS = GetExtend(NarrowLHS, WideType);
+ WideRHS = SE->getSCEV(WideDef);
+ }
+
+ // WideUse is "WideDef `op.wide` X" as described in the comment.
+ const SCEV *WideUse =
+ getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode());
+
+ return WideUse == WideAR;
+ };
+
+ bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
+ if (!GuessNonIVOperand(SignExtend)) {
+ SignExtend = !SignExtend;
+ if (!GuessNonIVOperand(SignExtend))
+ return nullptr;
+ }
+
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ SignExtend, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ SignExtend, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
+}
+
+WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
+ auto It = ExtendKindMap.find(I);
+ assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
+ return It->second;
+}
+
+const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ unsigned OpCode) const {
+ switch (OpCode) {
+ case Instruction::Add:
+ return SE->getAddExpr(LHS, RHS);
+ case Instruction::Sub:
+ return SE->getMinusSCEV(LHS, RHS);
+ case Instruction::Mul:
+ return SE->getMulExpr(LHS, RHS);
+ case Instruction::UDiv:
+ return SE->getUDivExpr(LHS, RHS);
+ default:
+ llvm_unreachable("Unsupported opcode.");
+ };
+}
+
+/// No-wrap operations can transfer sign extension of their result to their
+/// operands. Generate the SCEV value for the widened operation without
+/// actually modifying the IR yet. If the expression after extending the
+/// operands is an AddRec for this loop, return the AddRec and the kind of
+/// extension used.
+WidenIV::WidenedRecTy
+WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
+ // Handle the common case of add<nsw/nuw>
+ const unsigned OpCode = DU.NarrowUse->getOpcode();
+ // Only Add/Sub/Mul instructions supported yet.
+ if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+ OpCode != Instruction::Mul)
+ return {nullptr, Unknown};
+
+ // One operand (NarrowDef) has already been extended to WideDef. Now determine
+ // if extending the other will lead to a recurrence.
+ const unsigned ExtendOperIdx =
+ DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
+ assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
+
+ const SCEV *ExtendOperExpr = nullptr;
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(DU.NarrowUse);
+ ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
+ if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
+ ExtendOperExpr = SE->getSignExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
+ ExtendOperExpr = SE->getZeroExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else
+ return {nullptr, Unknown};
+
+ // When creating this SCEV expr, don't apply the current operations NSW or NUW
+ // flags. This instruction may be guarded by control flow that the no-wrap
+ // behavior depends on. Non-control-equivalent instructions can be mapped to
+ // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+ // semantics to those operations.
+ const SCEV *lhs = SE->getSCEV(DU.WideDef);
+ const SCEV *rhs = ExtendOperExpr;
+
+ // Let's swap operands to the initial order for the case of non-commutative
+ // operations, like SUB. See PR21014.
+ if (ExtendOperIdx == 0)
+ std::swap(lhs, rhs);
+ const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
+
+ if (!AddRec || AddRec->getLoop() != L)
+ return {nullptr, Unknown};
+
+ return {AddRec, ExtKind};
+}
+
+/// Is this instruction potentially interesting for further simplification after
+/// widening it's type? In other words, can the extend be safely hoisted out of
+/// the loop with SCEV reducing the value to a recurrence on the same loop. If
+/// so, return the extended recurrence and the kind of extension used. Otherwise
+/// return {nullptr, Unknown}.
+WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
+ if (!SE->isSCEVable(DU.NarrowUse->getType()))
+ return {nullptr, Unknown};
+
+ const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
+ if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
+ SE->getTypeSizeInBits(WideType)) {
+ // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+ // index. So don't follow this use.
+ return {nullptr, Unknown};
+ }
+
+ const SCEV *WideExpr;
+ ExtendKind ExtKind;
+ if (DU.NeverNegative) {
+ WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
+ if (isa<SCEVAddRecExpr>(WideExpr))
+ ExtKind = SignExtended;
+ else {
+ WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
+ ExtKind = ZeroExtended;
+ }
+ } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
+ WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
+ ExtKind = SignExtended;
+ } else {
+ WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
+ ExtKind = ZeroExtended;
+ }
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return {nullptr, Unknown};
+ return {AddRec, ExtKind};
+}
+
+/// This IV user cannot be widened. Replace this use of the original narrow IV
+/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
+static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT,
+ LoopInfo *LI) {
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return;
+ LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
+ << *DU.NarrowUse << "\n");
+ IRBuilder<> Builder(InsertPt);
+ Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
+}
+
+/// If the narrow use is a compare instruction, then widen the compare
+// (and possibly the other operand). The extend operation is hoisted into the
+// loop preheader as far as possible.
+bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
+ if (!Cmp)
+ return false;
+
+ // We can legally widen the comparison in the following two cases:
+ //
+ // - The signedness of the IV extension and comparison match
+ //
+ // - The narrow IV is always positive (and thus its sign extension is equal
+ // to its zero extension). For instance, let's say we're zero extending
+ // %narrow for the following use
+ //
+ // icmp slt i32 %narrow, %val ... (A)
+ //
+ // and %narrow is always positive. Then
+ //
+ // (A) == icmp slt i32 sext(%narrow), sext(%val)
+ // == icmp slt i32 zext(%narrow), sext(%val)
+ bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
+ if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
+ return false;
+
+ Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
+ unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
+
+ // Widen the compare instruction.
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return false;
+ IRBuilder<> Builder(InsertPt);
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+
+ // Widen the other operand of the compare, if necessary.
+ if (CastWidth < IVWidth) {
+ Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
+ DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
+ }
+ return true;
+}
+
+// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
+// will not work when:
+// 1) SCEV traces back to an instruction inside the loop that SCEV can not
+// expand, eg. add %indvar, (load %addr)
+// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
+// While SCEV fails to avoid trunc, we can still try to use instruction
+// combining approach to prove trunc is not required. This can be further
+// extended with other instruction combining checks, but for now we handle the
+// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
+//
+// Src:
+// %c = sub nsw %b, %indvar
+// %d = sext %c to i64
+// Dst:
+// %indvar.ext1 = sext %indvar to i64
+// %m = sext %b to i64
+// %d = sub nsw i64 %m, %indvar.ext1
+// Therefore, as long as the result of add/sub/mul is extended to wide type, no
+// trunc is required regardless of how %b is generated. This pattern is common
+// when calculating address in 64 bit architecture
+bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ // Handle the common case of add<nsw/nuw>
+ const unsigned OpCode = NarrowUse->getOpcode();
+ // Only Add/Sub/Mul instructions are supported.
+ if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+ OpCode != Instruction::Mul)
+ return false;
+
+ // The operand that is not defined by NarrowDef of DU. Let's call it the
+ // other operand.
+ assert((NarrowUse->getOperand(0) == NarrowDef ||
+ NarrowUse->getOperand(1) == NarrowDef) &&
+ "bad DU");
+
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(NarrowUse);
+ ExtendKind ExtKind = getExtendKind(NarrowDef);
+ bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap();
+ bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
+ auto AnotherOpExtKind = ExtKind;
+
+ // Check that all uses are either:
+ // - narrow def (in case of we are widening the IV increment);
+ // - single-input LCSSA Phis;
+ // - comparison of the chosen type;
+ // - extend of the chosen type (raison d'etre).
+ SmallVector<Instruction *, 4> ExtUsers;
+ SmallVector<PHINode *, 4> LCSSAPhiUsers;
+ SmallVector<ICmpInst *, 4> ICmpUsers;
+ for (Use &U : NarrowUse->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ if (User == NarrowDef)
+ continue;
+ if (!L->contains(User)) {
+ auto *LCSSAPhi = cast<PHINode>(User);
+ // Make sure there is only 1 input, so that we don't have to split
+ // critical edges.
+ if (LCSSAPhi->getNumOperands() != 1)
+ return false;
+ LCSSAPhiUsers.push_back(LCSSAPhi);
+ continue;
+ }
+ if (auto *ICmp = dyn_cast<ICmpInst>(User)) {
+ auto Pred = ICmp->getPredicate();
+ // We have 3 types of predicates: signed, unsigned and equality
+ // predicates. For equality, it's legal to widen icmp for either sign and
+ // zero extend. For sign extend, we can also do so for signed predicates,
+ // likeweise for zero extend we can widen icmp for unsigned predicates.
+ if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred))
+ return false;
+ if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred))
+ return false;
+ ICmpUsers.push_back(ICmp);
+ continue;
+ }
+ if (ExtKind == SignExtended)
+ User = dyn_cast<SExtInst>(User);
+ else
+ User = dyn_cast<ZExtInst>(User);
+ if (!User || User->getType() != WideType)
+ return false;
+ ExtUsers.push_back(User);
+ }
+ if (ExtUsers.empty()) {
+ DeadInsts.emplace_back(NarrowUse);
+ return true;
+ }
+
+ // We'll prove some facts that should be true in the context of ext users. If
+ // there is no users, we are done now. If there are some, pick their common
+ // dominator as context.
+ Instruction *Context = nullptr;
+ for (auto *Ext : ExtUsers) {
+ if (!Context || DT->dominates(Ext, Context))
+ Context = Ext;
+ else if (!DT->dominates(Context, Ext))
+ // For users that don't have dominance relation, use common dominator.
+ Context =
+ DT->findNearestCommonDominator(Context->getParent(), Ext->getParent())
+ ->getTerminator();
+ }
+ assert(Context && "Context not found?");
+
+ if (!CanSignExtend && !CanZeroExtend) {
+ // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
+ // will most likely not see it. Let's try to prove it.
+ if (OpCode != Instruction::Add)
+ return false;
+ if (ExtKind != ZeroExtended)
+ return false;
+ const SCEV *LHS = SE->getSCEV(OBO->getOperand(0));
+ const SCEV *RHS = SE->getSCEV(OBO->getOperand(1));
+ // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
+ if (NarrowUse->getOperand(0) != NarrowDef)
+ return false;
+ if (!SE->isKnownNegative(RHS))
+ return false;
+ bool ProvedSubNUW = SE->isKnownPredicateAt(
+ ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), Context);
+ if (!ProvedSubNUW)
+ return false;
+ // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
+ // neg(zext(neg(op))), which is basically sext(op).
+ AnotherOpExtKind = SignExtended;
+ }
+
+ // Verifying that Defining operand is an AddRec
+ const SCEV *Op1 = SE->getSCEV(WideDef);
+ const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
+ if (!AddRecOp1 || AddRecOp1->getLoop() != L)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ // Generating a widening use instruction.
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ AnotherOpExtKind, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ AnotherOpExtKind, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ ExtendKindMap[NarrowUse] = ExtKind;
+
+ for (Instruction *User : ExtUsers) {
+ assert(User->getType() == WideType && "Checked before!");
+ LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
+ << *WideBO << "\n");
+ ++NumElimExt;
+ User->replaceAllUsesWith(WideBO);
+ DeadInsts.emplace_back(User);
+ }
+
+ for (PHINode *User : LCSSAPhiUsers) {
+ assert(User->getNumOperands() == 1 && "Checked before!");
+ Builder.SetInsertPoint(User);
+ auto *WidePN =
+ Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide");
+ BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor();
+ assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
+ "Not a LCSSA Phi?");
+ WidePN->addIncoming(WideBO, LoopExitingBlock);
+ Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt());
+ auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
+ User->replaceAllUsesWith(TruncPN);
+ DeadInsts.emplace_back(User);
+ }
+
+ for (ICmpInst *User : ICmpUsers) {
+ Builder.SetInsertPoint(User);
+ auto ExtendedOp = [&](Value * V)->Value * {
+ if (V == NarrowUse)
+ return WideBO;
+ if (ExtKind == ZeroExtended)
+ return Builder.CreateZExt(V, WideBO->getType());
+ else
+ return Builder.CreateSExt(V, WideBO->getType());
+ };
+ auto Pred = User->getPredicate();
+ auto *LHS = ExtendedOp(User->getOperand(0));
+ auto *RHS = ExtendedOp(User->getOperand(1));
+ auto *WideCmp =
+ Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
+ User->replaceAllUsesWith(WideCmp);
+ DeadInsts.emplace_back(User);
+ }
+
+ return true;
+}
+
+/// Determine whether an individual user of the narrow IV can be widened. If so,
+/// return the wide clone of the user.
+Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) {
+ assert(ExtendKindMap.count(DU.NarrowDef) &&
+ "Should already know the kind of extension used to widen NarrowDef");
+
+ // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+ if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
+ if (LI->getLoopFor(UsePhi->getParent()) != L) {
+ // For LCSSA phis, sink the truncate outside the loop.
+ // After SimplifyCFG most loop exit targets have a single predecessor.
+ // Otherwise fall back to a truncate within the loop.
+ if (UsePhi->getNumOperands() != 1)
+ truncateIVUse(DU, DT, LI);
+ else {
+ // Widening the PHI requires us to insert a trunc. The logical place
+ // for this trunc is in the same BB as the PHI. This is not possible if
+ // the BB is terminated by a catchswitch.
+ if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
+ return nullptr;
+
+ PHINode *WidePhi =
+ PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
+ UsePhi);
+ WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
+ IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
+ Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
+ UsePhi->replaceAllUsesWith(Trunc);
+ DeadInsts.emplace_back(UsePhi);
+ LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
+ << *WidePhi << "\n");
+ }
+ return nullptr;
+ }
+ }
+
+ // This narrow use can be widened by a sext if it's non-negative or its narrow
+ // def was widended by a sext. Same for zext.
+ auto canWidenBySExt = [&]() {
+ return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
+ };
+ auto canWidenByZExt = [&]() {
+ return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
+ };
+
+ // Our raison d'etre! Eliminate sign and zero extension.
+ if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
+ (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
+ Value *NewDef = DU.WideDef;
+ if (DU.NarrowUse->getType() != WideType) {
+ unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ if (CastWidth < IVWidth) {
+ // The cast isn't as wide as the IV, so insert a Trunc.
+ IRBuilder<> Builder(DU.NarrowUse);
+ NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
+ }
+ else {
+ // A wider extend was hidden behind a narrower one. This may induce
+ // another round of IV widening in which the intermediate IV becomes
+ // dead. It should be very rare.
+ LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
+ << " not wide enough to subsume " << *DU.NarrowUse
+ << "\n");
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+ NewDef = DU.NarrowUse;
+ }
+ }
+ if (NewDef != DU.NarrowUse) {
+ LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
+ << " replaced by " << *DU.WideDef << "\n");
+ ++NumElimExt;
+ DU.NarrowUse->replaceAllUsesWith(NewDef);
+ DeadInsts.emplace_back(DU.NarrowUse);
+ }
+ // Now that the extend is gone, we want to expose it's uses for potential
+ // further simplification. We don't need to directly inform SimplifyIVUsers
+ // of the new users, because their parent IV will be processed later as a
+ // new loop phi. If we preserved IVUsers analysis, we would also want to
+ // push the uses of WideDef here.
+
+ // No further widening is needed. The deceased [sz]ext had done it for us.
+ return nullptr;
+ }
+
+ // Does this user itself evaluate to a recurrence after widening?
+ WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
+ if (!WideAddRec.first)
+ WideAddRec = getWideRecurrence(DU);
+
+ assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
+ if (!WideAddRec.first) {
+ // If use is a loop condition, try to promote the condition instead of
+ // truncating the IV first.
+ if (widenLoopCompare(DU))
+ return nullptr;
+
+ // We are here about to generate a truncate instruction that may hurt
+ // performance because the scalar evolution expression computed earlier
+ // in WideAddRec.first does not indicate a polynomial induction expression.
+ // In that case, look at the operands of the use instruction to determine
+ // if we can still widen the use instead of truncating its operand.
+ if (widenWithVariantUse(DU))
+ return nullptr;
+
+ // This user does not evaluate to a recurrence after widening, so don't
+ // follow it. Instead insert a Trunc to kill off the original use,
+ // eventually isolating the original narrow IV so it can be removed.
+ truncateIVUse(DU, DT, LI);
+ return nullptr;
+ }
+ // Assume block terminators cannot evaluate to a recurrence. We can't to
+ // insert a Trunc after a terminator if there happens to be a critical edge.
+ assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
+ "SCEV is not expected to evaluate a block terminator");
+
+ // Reuse the IV increment that SCEVExpander created as long as it dominates
+ // NarrowUse.
+ Instruction *WideUse = nullptr;
+ if (WideAddRec.first == WideIncExpr &&
+ Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
+ WideUse = WideInc;
+ else {
+ WideUse = cloneIVUser(DU, WideAddRec.first);
+ if (!WideUse)
+ return nullptr;
+ }
+ // Evaluation of WideAddRec ensured that the narrow expression could be
+ // extended outside the loop without overflow. This suggests that the wide use
+ // evaluates to the same expression as the extended narrow use, but doesn't
+ // absolutely guarantee it. Hence the following failsafe check. In rare cases
+ // where it fails, we simply throw away the newly created wide use.
+ if (WideAddRec.first != SE->getSCEV(WideUse)) {
+ LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
+ << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
+ << "\n");
+ DeadInsts.emplace_back(WideUse);
+ return nullptr;
+ }
+
+ // if we reached this point then we are going to replace
+ // DU.NarrowUse with WideUse. Reattach DbgValue then.
+ replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
+
+ ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
+ // Returning WideUse pushes it on the worklist.
+ return WideUse;
+}
+
+/// Add eligible users of NarrowDef to NarrowIVUsers.
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
+ bool NonNegativeDef =
+ SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
+ SE->getZero(NarrowSCEV->getType()));
+ for (User *U : NarrowDef->users()) {
+ Instruction *NarrowUser = cast<Instruction>(U);
+
+ // Handle data flow merges and bizarre phi cycles.
+ if (!Widened.insert(NarrowUser).second)
+ continue;
+
+ bool NonNegativeUse = false;
+ if (!NonNegativeDef) {
+ // We might have a control-dependent range information for this context.
+ if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
+ NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
+ }
+
+ NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
+ NonNegativeDef || NonNegativeUse);
+ }
+}
+
+/// Process a single induction variable. First use the SCEVExpander to create a
+/// wide induction variable that evaluates to the same recurrence as the
+/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
+/// def-use chain. After widenIVUse has processed all interesting IV users, the
+/// narrow IV will be isolated for removal by DeleteDeadPHIs.
+///
+/// It would be simpler to delete uses as they are processed, but we must avoid
+/// invalidating SCEV expressions.
+PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
+ // Is this phi an induction variable?
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
+ if (!AddRec)
+ return nullptr;
+
+ // Widen the induction variable expression.
+ const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
+ ? SE->getSignExtendExpr(AddRec, WideType)
+ : SE->getZeroExtendExpr(AddRec, WideType);
+
+ assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
+ "Expect the new IV expression to preserve its type");
+
+ // Can the IV be extended outside the loop without overflow?
+ AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return nullptr;
+
+ // An AddRec must have loop-invariant operands. Since this AddRec is
+ // materialized by a loop header phi, the expression cannot have any post-loop
+ // operands, so they must dominate the loop header.
+ assert(
+ SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
+ SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
+ "Loop header phi recurrence inputs do not dominate the loop");
+
+ // Iterate over IV uses (including transitive ones) looking for IV increments
+ // of the form 'add nsw %iv, <const>'. For each increment and each use of
+ // the increment calculate control-dependent range information basing on
+ // dominating conditions inside of the loop (e.g. a range check inside of the
+ // loop). Calculated ranges are stored in PostIncRangeInfos map.
+ //
+ // Control-dependent range information is later used to prove that a narrow
+ // definition is not negative (see pushNarrowIVUsers). It's difficult to do
+ // this on demand because when pushNarrowIVUsers needs this information some
+ // of the dominating conditions might be already widened.
+ if (UsePostIncrementRanges)
+ calculatePostIncRanges(OrigPhi);
+
+ // The rewriter provides a value for the desired IV expression. This may
+ // either find an existing phi or materialize a new one. Either way, we
+ // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
+ // of the phi-SCC dominates the loop entry.
+ Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
+ // If the wide phi is not a phi node, for example a cast node, like bitcast,
+ // inttoptr, ptrtoint, just skip for now.
+ if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
+ // if the cast node is an inserted instruction without any user, we should
+ // remove it to make sure the pass don't touch the function as we can not
+ // wide the phi.
+ if (ExpandInst->hasNUses(0) &&
+ Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
+ DeadInsts.emplace_back(ExpandInst);
+ return nullptr;
+ }
+
+ // Remembering the WideIV increment generated by SCEVExpander allows
+ // widenIVUse to reuse it when widening the narrow IV's increment. We don't
+ // employ a general reuse mechanism because the call above is the only call to
+ // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ WideInc =
+ cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
+ WideIncExpr = SE->getSCEV(WideInc);
+ // Propagate the debug location associated with the original loop increment
+ // to the new (widened) increment.
+ auto *OrigInc =
+ cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+ WideInc->setDebugLoc(OrigInc->getDebugLoc());
+ }
+
+ LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
+ ++NumWidened;
+
+ // Traverse the def-use chain using a worklist starting at the original IV.
+ assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+ Widened.insert(OrigPhi);
+ pushNarrowIVUsers(OrigPhi, WidePhi);
+
+ while (!NarrowIVUsers.empty()) {
+ WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
+
+ // Process a def-use edge. This may replace the use, so don't hold a
+ // use_iterator across it.
+ Instruction *WideUse = widenIVUse(DU, Rewriter);
+
+ // Follow all def-use edges from the previous narrow use.
+ if (WideUse)
+ pushNarrowIVUsers(DU.NarrowUse, WideUse);
+
+ // widenIVUse may have removed the def-use edge.
+ if (DU.NarrowDef->use_empty())
+ DeadInsts.emplace_back(DU.NarrowDef);
+ }
+
+ // Attach any debug information to the new PHI.
+ replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
+
+ return WidePhi;
+}
+
+/// Calculates control-dependent range for the given def at the given context
+/// by looking at dominating conditions inside of the loop
+void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
+ Instruction *NarrowUser) {
+ using namespace llvm::PatternMatch;
+
+ Value *NarrowDefLHS;
+ const APInt *NarrowDefRHS;
+ if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
+ m_APInt(NarrowDefRHS))) ||
+ !NarrowDefRHS->isNonNegative())
+ return;
+
+ auto UpdateRangeFromCondition = [&] (Value *Condition,
+ bool TrueDest) {
+ CmpInst::Predicate Pred;
+ Value *CmpRHS;
+ if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
+ m_Value(CmpRHS))))
+ return;
+
+ CmpInst::Predicate P =
+ TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
+
+ auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
+ auto CmpConstrainedLHSRange =
+ ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
+ auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
+ *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
+
+ updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
+ };
+
+ auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
+ if (!HasGuards)
+ return;
+
+ for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
+ Ctx->getParent()->rend())) {
+ Value *C = nullptr;
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
+ UpdateRangeFromCondition(C, /*TrueDest=*/true);
+ }
+ };
+
+ UpdateRangeFromGuards(NarrowUser);
+
+ BasicBlock *NarrowUserBB = NarrowUser->getParent();
+ // If NarrowUserBB is statically unreachable asking dominator queries may
+ // yield surprising results. (e.g. the block may not have a dom tree node)
+ if (!DT->isReachableFromEntry(NarrowUserBB))
+ return;
+
+ for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
+ L->contains(DTB->getBlock());
+ DTB = DTB->getIDom()) {
+ auto *BB = DTB->getBlock();
+ auto *TI = BB->getTerminator();
+ UpdateRangeFromGuards(TI);
+
+ auto *BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isConditional())
+ continue;
+
+ auto *TrueSuccessor = BI->getSuccessor(0);
+ auto *FalseSuccessor = BI->getSuccessor(1);
+
+ auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
+ return BBE.isSingleEdge() &&
+ DT->dominates(BBE, NarrowUser->getParent());
+ };
+
+ if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
+ UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
+
+ if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
+ UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
+ }
+}
+
+/// Calculates PostIncRangeInfos map for the given IV
+void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
+ SmallPtrSet<Instruction *, 16> Visited;
+ SmallVector<Instruction *, 6> Worklist;
+ Worklist.push_back(OrigPhi);
+ Visited.insert(OrigPhi);
+
+ while (!Worklist.empty()) {
+ Instruction *NarrowDef = Worklist.pop_back_val();
+
+ for (Use &U : NarrowDef->uses()) {
+ auto *NarrowUser = cast<Instruction>(U.getUser());
+
+ // Don't go looking outside the current loop.
+ auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
+ if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
+ continue;
+
+ if (!Visited.insert(NarrowUser).second)
+ continue;
+
+ Worklist.push_back(NarrowUser);
+
+ calculatePostIncRange(NarrowDef, NarrowUser);
+ }
+ }
+}
+
+PHINode *llvm::createWideIV(const WideIVInfo &WI,
+ LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
+ DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
+ unsigned &NumElimExt, unsigned &NumWidened,
+ bool HasGuards, bool UsePostIncrementRanges) {
+ WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges);
+ PHINode *WidePHI = Widener.createWideIV(Rewriter);
+ NumElimExt = Widener.getNumElimExt();
+ NumWidened = Widener.getNumWidened();
+ return WidePHI;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index cfcc3454a210..f9a9dd237b6c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -20,7 +20,6 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/CaptureTracking.h"
@@ -542,6 +541,8 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return Dst;
}
@@ -569,6 +570,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
// copy for us. Make a memcpy to copy the nul byte with align = 1.
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return DstEnd;
}
@@ -609,15 +612,27 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
return Dst;
}
- // Let strncpy handle the zero padding
- if (Len > SrcLen + 1)
- return nullptr;
+ // strncpy(a, "a", 4) - > memcpy(a, "a\0\0\0", 4)
+ if (Len > SrcLen + 1) {
+ if (Len <= 128) {
+ StringRef Str;
+ if (!getConstantStringInfo(Src, Str))
+ return nullptr;
+ std::string SrcStr = Str.str();
+ SrcStr.resize(Len, '\0');
+ Src = B.CreateGlobalString(SrcStr, "str");
+ } else {
+ return nullptr;
+ }
+ }
Type *PT = Callee->getFunctionType()->getParamType(0);
// strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(PT), Len));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return Dst;
}
@@ -684,8 +699,6 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
Offset);
}
}
-
- return nullptr;
}
// strlen(x?"foo":"bars") --> x ? 3 : 4
@@ -1095,6 +1108,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1),
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1143,7 +1158,12 @@ Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
// mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
CallInst *NewCI =
B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N);
+ // Propagate attributes, but memcpy has no return value, so make sure that
+ // any return attributes are compliant.
+ // TODO: Attach return value attributes to the 1st operand to preserve them?
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
}
@@ -1157,6 +1177,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1),
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1217,6 +1239,8 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1629,6 +1653,14 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
return nullptr;
+ // If we have a pow() library call (accesses memory) and we can't guarantee
+ // that the base is not an infinity, give up:
+ // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting
+ // errno), but sqrt(-Inf) is required by various standards to set errno.
+ if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&
+ !isKnownNeverInfinity(Base, TLI))
+ return nullptr;
+
Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
if (!Sqrt)
return nullptr;
@@ -1715,7 +1747,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
// pow(x, n) -> x * x * x * ...
const APFloat *ExpoF;
- if (AllowApprox && match(Expo, m_APFloat(ExpoF))) {
+ if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
+ !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
// We limit to a max of 7 multiplications, thus the maximum exponent is 32.
// If the exponent is an integer+0.5 we generate a call to sqrt and an
// additional fmul.
@@ -1741,6 +1774,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
Pow->doesNotAccessMemory(), M, B, TLI);
+ if (!Sqrt)
+ return nullptr;
}
// We will memoize intermediate products of the Addition Chain.
@@ -2164,7 +2199,7 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
// It's only worthwhile if both sinpi and cospi are actually used.
- if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
+ if (SinCalls.empty() || CosCalls.empty())
return nullptr;
Value *Sin, *Cos, *SinCos;
@@ -2190,7 +2225,7 @@ void LibCallSimplifier::classifyArgUse(
SmallVectorImpl<CallInst *> &SinCosCalls) {
CallInst *CI = dyn_cast<CallInst>(Val);
- if (!CI)
+ if (!CI || CI->use_empty())
return;
// Don't consider calls in other functions.
@@ -2487,6 +2522,30 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
+ if (CI->use_empty())
+ // sprintf(dest, "%s", str) -> strcpy(dest, str)
+ return emitStrCpy(CI->getArgOperand(0), CI->getArgOperand(2), B, TLI);
+
+ uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
+ if (SrcLen) {
+ B.CreateMemCpy(
+ CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen));
+ // Returns total number of characters written without null-character.
+ return ConstantInt::get(CI->getType(), SrcLen - 1);
+ } else if (Value *V = emitStpCpy(CI->getArgOperand(0), CI->getArgOperand(2),
+ B, TLI)) {
+ // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
+ Value *PtrDiff = B.CreatePtrDiff(V, CI->getArgOperand(0));
+ return B.CreateIntCast(PtrDiff, CI->getType(), false);
+ }
+
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+ PGSOQueryType::IRPass);
+ if (OptForSize)
+ return nullptr;
+
Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
if (!Len)
return nullptr;
@@ -3219,6 +3278,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3231,6 +3292,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3245,11 +3308,29 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
CI->getArgOperand(2), Align(1));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
}
+Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
+ IRBuilderBase &B) {
+ const DataLayout &DL = CI->getModule()->getDataLayout();
+ if (isFortifiedCallFoldable(CI, 3, 2))
+ if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, DL, TLI)) {
+ CallInst *NewCI = cast<CallInst>(Call);
+ NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(
+ AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
+ return NewCI;
+ }
+ return nullptr;
+}
+
Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
IRBuilderBase &B,
LibFunc Func) {
@@ -3330,7 +3411,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
- SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end());
+ SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5));
return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(4), VariadicArgs, B, TLI);
}
@@ -3341,7 +3422,7 @@ Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
- SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end());
+ SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4));
return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
B, TLI);
}
@@ -3439,6 +3520,8 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
switch (Func) {
case LibFunc_memcpy_chk:
return optimizeMemCpyChk(CI, Builder);
+ case LibFunc_mempcpy_chk:
+ return optimizeMemPCpyChk(CI, Builder);
case LibFunc_memmove_chk:
return optimizeMemMoveChk(CI, Builder);
case LibFunc_memset_chk:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
index e257c5a015f5..beeb60698f04 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -43,11 +43,6 @@ cl::opt<bool> PGSOColdCodeOnlyForPartialSamplePGO(
cl::desc("Apply the profile guided size optimizations only "
"to cold code under partial-profile sample PGO."));
-cl::opt<bool> PGSOIRPassOrTestOnly(
- "pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false),
- cl::desc("Apply the profile guided size optimizations only"
- "to the IR passes or tests."));
-
cl::opt<bool> ForcePGSO(
"force-pgso", cl::Hidden, cl::init(false),
cl::desc("Force the (profiled-guided) size optimizations. "));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
index b559811d120b..1fa574f04c37 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -13,6 +13,7 @@
// present.
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/StripGCRelocates.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -24,22 +25,7 @@
using namespace llvm;
-namespace {
-struct StripGCRelocates : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- StripGCRelocates() : FunctionPass(ID) {
- initializeStripGCRelocatesPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &Info) const override {}
-
- bool runOnFunction(Function &F) override;
-
-};
-char StripGCRelocates::ID = 0;
-}
-
-bool StripGCRelocates::runOnFunction(Function &F) {
+static bool stripGCRelocates(Function &F) {
// Nothing to do for declarations.
if (F.isDeclaration())
return false;
@@ -71,6 +57,32 @@ bool StripGCRelocates::runOnFunction(Function &F) {
return !GCRelocates.empty();
}
-INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates",
+PreservedAnalyses StripGCRelocates::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!stripGCRelocates(F))
+ return PreservedAnalyses::all();
+
+ // Removing gc.relocate preserves the CFG, but most other analysis probably
+ // need to re-run.
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+namespace {
+struct StripGCRelocatesLegacy : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ StripGCRelocatesLegacy() : FunctionPass(ID) {
+ initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &Info) const override {}
+
+ bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); }
+};
+char StripGCRelocatesLegacy::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates",
"Strip gc.relocates inserted through RewriteStatepointsForGC",
true, false)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index 21cbbfb140b6..10fda4df51ba 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -17,10 +18,11 @@ namespace {
/// This pass strips all debug info that is not related line tables.
/// The result will be the same as if the program where compiled with
/// -gline-tables-only.
-struct StripNonLineTableDebugInfo : public ModulePass {
+struct StripNonLineTableDebugLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- StripNonLineTableDebugInfo() : ModulePass(ID) {
- initializeStripNonLineTableDebugInfoPass(*PassRegistry::getPassRegistry());
+ StripNonLineTableDebugLegacyPass() : ModulePass(ID) {
+ initializeStripNonLineTableDebugLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -33,10 +35,17 @@ struct StripNonLineTableDebugInfo : public ModulePass {
};
}
-char StripNonLineTableDebugInfo::ID = 0;
-INITIALIZE_PASS(StripNonLineTableDebugInfo, "strip-nonlinetable-debuginfo",
+char StripNonLineTableDebugLegacyPass::ID = 0;
+INITIALIZE_PASS(StripNonLineTableDebugLegacyPass,
+ "strip-nonlinetable-debuginfo",
"Strip all debug info except linetables", false, false)
-ModulePass *llvm::createStripNonLineTableDebugInfoPass() {
- return new StripNonLineTableDebugInfo();
+ModulePass *llvm::createStripNonLineTableDebugLegacyPass() {
+ return new StripNonLineTableDebugLegacyPass();
+}
+
+PreservedAnalyses
+StripNonLineTableDebugInfoPass::run(Module &M, ModuleAnalysisManager &AM) {
+ llvm::stripNonLineTableDebugInfo(M);
+ return PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 9af39d9a0dd1..3631733713ab 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -6,10 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass is used to ensure that functions have at most one return
-// instruction in them. Additionally, it keeps track of which node is the new
-// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode
-// method will return a null pointer.
+// This pass is used to ensure that functions have at most one return and one
+// unreachable instruction in them.
//
//===----------------------------------------------------------------------===//
@@ -22,73 +20,66 @@
#include "llvm/Transforms/Utils.h"
using namespace llvm;
-char UnifyFunctionExitNodes::ID = 0;
+char UnifyFunctionExitNodesLegacyPass::ID = 0;
-UnifyFunctionExitNodes::UnifyFunctionExitNodes() : FunctionPass(ID) {
- initializeUnifyFunctionExitNodesPass(*PassRegistry::getPassRegistry());
+UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass()
+ : FunctionPass(ID) {
+ initializeUnifyFunctionExitNodesLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
-INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn",
"Unify function exit nodes", false, false)
Pass *llvm::createUnifyFunctionExitNodesPass() {
- return new UnifyFunctionExitNodes();
+ return new UnifyFunctionExitNodesLegacyPass();
}
-void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
// This is a cluster of orthogonal Transforms
AU.addPreservedID(LowerSwitchID);
}
-// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
-// BasicBlock, and converting all returns to unconditional branches to this
-// new basic block. The singular exit node is returned.
-//
-// If there are no return stmts in the Function, a null pointer is returned.
-//
-bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
- // Loop over all of the blocks in a function, tracking all of the blocks that
- // return.
- //
- std::vector<BasicBlock*> ReturningBlocks;
- std::vector<BasicBlock*> UnreachableBlocks;
+namespace {
+
+bool unifyUnreachableBlocks(Function &F) {
+ std::vector<BasicBlock *> UnreachableBlocks;
+
for (BasicBlock &I : F)
- if (isa<ReturnInst>(I.getTerminator()))
- ReturningBlocks.push_back(&I);
- else if (isa<UnreachableInst>(I.getTerminator()))
+ if (isa<UnreachableInst>(I.getTerminator()))
UnreachableBlocks.push_back(&I);
- // Then unreachable blocks.
- if (UnreachableBlocks.empty()) {
- UnreachableBlock = nullptr;
- } else if (UnreachableBlocks.size() == 1) {
- UnreachableBlock = UnreachableBlocks.front();
- } else {
- UnreachableBlock = BasicBlock::Create(F.getContext(),
- "UnifiedUnreachableBlock", &F);
- new UnreachableInst(F.getContext(), UnreachableBlock);
-
- for (BasicBlock *BB : UnreachableBlocks) {
- BB->getInstList().pop_back(); // Remove the unreachable inst.
- BranchInst::Create(UnreachableBlock, BB);
- }
+ if (UnreachableBlocks.size() <= 1)
+ return false;
+
+ BasicBlock *UnreachableBlock =
+ BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (BasicBlock *BB : UnreachableBlocks) {
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
}
- // Now handle return blocks.
- if (ReturningBlocks.empty()) {
- ReturnBlock = nullptr;
- return false; // No blocks return
- } else if (ReturningBlocks.size() == 1) {
- ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+ return true;
+}
+
+bool unifyReturnBlocks(Function &F) {
+ std::vector<BasicBlock *> ReturningBlocks;
+
+ for (BasicBlock &I : F)
+ if (isa<ReturnInst>(I.getTerminator()))
+ ReturningBlocks.push_back(&I);
+
+ if (ReturningBlocks.size() <= 1)
return false;
- }
- // Otherwise, we need to insert a new basic block into the function, add a PHI
- // nodes (if the function returns values), and convert all of the return
- // instructions into unconditional branches.
- //
+ // Insert a new basic block into the function, add PHI nodes (if the function
+ // returns values), and convert all of the return instructions into
+ // unconditional branches.
BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
"UnifiedReturnBlock", &F);
@@ -105,7 +96,6 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
// Loop over all of the blocks, replacing the return instruction with an
// unconditional branch.
- //
for (BasicBlock *BB : ReturningBlocks) {
// Add an incoming element to the PHI node for every return instruction that
// is merging into this new block...
@@ -115,6 +105,25 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
BB->getInstList().pop_back(); // Remove the return insn
BranchInst::Create(NewRetBlock, BB);
}
- ReturnBlock = NewRetBlock;
+
return true;
}
+} // namespace
+
+// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting
+// all returns to unconditional branches to this new basic block. Also, unify
+// all unreachable blocks.
+bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) {
+ bool Changed = false;
+ Changed |= unifyUnreachableBlocks(F);
+ Changed |= unifyReturnBlocks(F);
+ return Changed;
+}
+
+PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = false;
+ Changed |= unifyUnreachableBlocks(F);
+ Changed |= unifyReturnBlocks(F);
+ return Changed ? PreservedAnalyses() : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index b10deee3907c..0b718ed6136e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -16,6 +16,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/UnifyLoopExits.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/InitializePasses.h"
@@ -27,10 +29,10 @@
using namespace llvm;
namespace {
-struct UnifyLoopExits : public FunctionPass {
+struct UnifyLoopExitsLegacyPass : public FunctionPass {
static char ID;
- UnifyLoopExits() : FunctionPass(ID) {
- initializeUnifyLoopExitsPass(*PassRegistry::getPassRegistry());
+ UnifyLoopExitsLegacyPass() : FunctionPass(ID) {
+ initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -46,17 +48,19 @@ struct UnifyLoopExits : public FunctionPass {
};
} // namespace
-char UnifyLoopExits::ID = 0;
+char UnifyLoopExitsLegacyPass::ID = 0;
-FunctionPass *llvm::createUnifyLoopExitsPass() { return new UnifyLoopExits(); }
+FunctionPass *llvm::createUnifyLoopExitsPass() {
+ return new UnifyLoopExitsLegacyPass();
+}
-INITIALIZE_PASS_BEGIN(UnifyLoopExits, "unify-loop-exits",
+INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
"Fixup each natural loop to have a single exit block",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(UnifyLoopExits, "unify-loop-exits",
+INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
"Fixup each natural loop to have a single exit block",
false /* Only looks at CFG */, false /* Analysis Pass */)
@@ -80,7 +84,7 @@ static void restoreSSA(const DominatorTree &DT, const Loop *L,
const SetVector<BasicBlock *> &Incoming,
BasicBlock *LoopExitBlock) {
using InstVector = SmallVector<Instruction *, 8>;
- using IIMap = DenseMap<Instruction *, InstVector>;
+ using IIMap = MapVector<Instruction *, InstVector>;
IIMap ExternalUsers;
for (auto BB : L->blocks()) {
for (auto &I : *BB) {
@@ -203,11 +207,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
return true;
}
-bool UnifyLoopExits::runOnFunction(Function &F) {
- LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
- << "\n");
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+static bool runImpl(LoopInfo &LI, DominatorTree &DT) {
bool Changed = false;
auto Loops = LI.getLoopsInPreorder();
@@ -218,3 +218,28 @@ bool UnifyLoopExits::runOnFunction(Function &F) {
}
return Changed;
}
+
+bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
+ LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
+ << "\n");
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ return runImpl(LI, DT);
+}
+
+namespace llvm {
+
+PreservedAnalyses UnifyLoopExitsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+
+ if (!runImpl(LI, DT))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
index 5b58548e54dc..c57cec6be676 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
@@ -13,8 +13,11 @@
#include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MD5.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -27,13 +30,31 @@ static bool uniqueifyInternalLinkageNames(Module &M) {
Md5.final(R);
SmallString<32> Str;
llvm::MD5::stringifyResult(R, Str);
- std::string ModuleNameHash = (Twine(".") + Twine(Str)).str();
+ // Convert MD5hash to Decimal. Demangler suffixes can either contain numbers
+ // or characters but not both.
+ APInt IntHash = APInt(128, Str.str(), 16);
+ // Prepend "__uniq" before the hash for tools like profilers to understand that
+ // this symbol is of internal linkage type.
+ std::string ModuleNameHash = (Twine(".__uniq.") + Twine(IntHash.toString(10, false))).str();
bool Changed = false;
+ MDBuilder MDB(M.getContext());
// Append the module hash to all internal linkage functions.
for (auto &F : M) {
if (F.hasInternalLinkage()) {
F.setName(F.getName() + ModuleNameHash);
+ F.addFnAttr("sample-profile-suffix-elision-policy", "selected");
+ // Replace linkage names in the debug metadata.
+ if (DISubprogram *SP = F.getSubprogram()) {
+ if (SP->getRawLinkageName()) {
+ auto *Name = MDB.createString(F.getName());
+ SP->replaceRawLinkageName(Name);
+ if (DISubprogram *SPDecl = SP->getDeclaration()) {
+ if (SPDecl->getRawLinkageName())
+ SPDecl->replaceRawLinkageName(Name);
+ }
+ }
+ }
Changed = true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
index ce98a739bea8..73c0532f3fd5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
@@ -34,17 +34,17 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeLibCallsShrinkWrapLegacyPassPass(Registry);
initializeLoopSimplifyPass(Registry);
initializeLowerInvokeLegacyPassPass(Registry);
- initializeLowerSwitchPass(Registry);
+ initializeLowerSwitchLegacyPassPass(Registry);
initializeNameAnonGlobalLegacyPassPass(Registry);
initializePromoteLegacyPassPass(Registry);
- initializeStripNonLineTableDebugInfoPass(Registry);
- initializeUnifyFunctionExitNodesPass(Registry);
+ initializeStripNonLineTableDebugLegacyPassPass(Registry);
+ initializeUnifyFunctionExitNodesLegacyPassPass(Registry);
initializeMetaRenamerPass(Registry);
- initializeStripGCRelocatesPass(Registry);
+ initializeStripGCRelocatesLegacyPass(Registry);
initializePredicateInfoPrinterLegacyPassPass(Registry);
initializeInjectTLIMappingsLegacyPass(Registry);
initializeFixIrreduciblePass(Registry);
- initializeUnifyLoopExitsPass(Registry);
+ initializeUnifyLoopExitsLegacyPassPass(Registry);
initializeUniqueInternalLinkageNamesLegacyPassPass(Registry);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 6ff08cd28712..61cd8595a73b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -17,6 +17,7 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
const DataLayout &DL) {
Type *StoredTy = StoredVal->getType();
+
if (StoredTy == LoadTy)
return true;
@@ -36,17 +37,29 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedSize())
return false;
+ bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType());
+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType());
// Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
- DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+ if (StoredNI != LoadNI) {
// As a special case, allow coercion of memset used to initialize
// an array w/null. Despite non-integral pointers not generally having a
// specific bit pattern, we do assume null is zero.
if (auto *CI = dyn_cast<Constant>(StoredVal))
return CI->isNullValue();
return false;
+ } else if (StoredNI && LoadNI &&
+ StoredTy->getPointerAddressSpace() !=
+ LoadTy->getPointerAddressSpace()) {
+ return false;
}
-
+
+
+ // The implementation below uses inttoptr for vectors of unequal size; we
+ // can't allow this for non integral pointers. We could teach it to extract
+ // exact subvectors if desired.
+ if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize())
+ return false;
+
return true;
}
@@ -223,14 +236,8 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
if (isFirstClassAggregateOrScalableType(StoredVal->getType()))
return -1;
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
- DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
- // Allow casts of zero values to null as a special case
- auto *CI = dyn_cast<Constant>(StoredVal);
- if (!CI || !CI->isNullValue())
- return -1;
- }
+ if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL))
+ return -1;
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =
@@ -333,9 +340,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) !=
- DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+ if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL))
return -1;
Value *DepPtr = DepLI->getPointerOperand();
@@ -393,7 +398,7 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (!Src)
return -1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src));
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return -1;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
index f1b3fe8e2fa9..930e0b7ee01a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -167,12 +167,9 @@ public:
void flush();
private:
- void mapGlobalInitializer(GlobalVariable &GV, Constant &Init);
void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers);
- void mapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target);
- void remapFunction(Function &F, ValueToValueMapTy &VM);
ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; }
ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; }
@@ -822,11 +819,15 @@ void Mapper::flush() {
break;
case WorklistEntry::MapAppendingVar: {
unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers;
+ // mapAppendingVariable call can change AppendingInits if initalizer for
+ // the variable depends on another appending global, because of that inits
+ // need to be extracted and updated before the call.
+ SmallVector<Constant *, 8> NewInits(
+ drop_begin(AppendingInits, PrefixSize));
+ AppendingInits.resize(PrefixSize);
mapAppendingVariable(*E.Data.AppendingGV.GV,
E.Data.AppendingGV.InitPrefix,
- E.AppendingGVIsOldCtorDtor,
- makeArrayRef(AppendingInits).slice(PrefixSize));
- AppendingInits.resize(PrefixSize);
+ E.AppendingGVIsOldCtorDtor, makeArrayRef(NewInits));
break;
}
case WorklistEntry::MapGlobalIndirectSymbol:
@@ -900,14 +901,13 @@ void Mapper::remapInstruction(Instruction *I) {
LLVMContext &C = CB->getContext();
AttributeList Attrs = CB->getAttributes();
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
- if (Attrs.hasAttribute(i, Attribute::ByVal)) {
- Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
- if (!Ty)
- continue;
-
- Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
- Attrs = Attrs.addAttribute(
- C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty)));
+ for (Attribute::AttrKind TypedAttr :
+ {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) {
+ if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) {
+ Attrs = Attrs.replaceAttributeType(C, i, TypedAttr,
+ TypeMapper->remapType(Ty));
+ break;
+ }
}
}
CB->setAttributes(Attrs);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 9b81afbb4b6c..6ec5590d76ba 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -666,6 +666,10 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
cast<IntrinsicInst>(&I)->getIntrinsicID() ==
Intrinsic::sideeffect) {
// Ignore llvm.sideeffect calls.
+ } else if (isa<IntrinsicInst>(&I) &&
+ cast<IntrinsicInst>(&I)->getIntrinsicID() ==
+ Intrinsic::pseudoprobe) {
+ // Ignore llvm.pseudoprobe calls.
} else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I
<< '\n');
@@ -762,8 +766,8 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
return Chain.slice(0, ChainIdx);
}
-static ChainID getChainID(const Value *Ptr, const DataLayout &DL) {
- const Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
+static ChainID getChainID(const Value *Ptr) {
+ const Value *ObjPtr = getUnderlyingObject(Ptr);
if (const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) {
// The select's themselves are distinct instructions even if they share the
// same condition and evaluate to consecutive pointers for true and false
@@ -830,7 +834,7 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
continue;
// Save the load locations.
- const ChainID ID = getChainID(Ptr, DL);
+ const ChainID ID = getChainID(Ptr);
LoadRefs[ID].push_back(LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isSimple())
@@ -876,7 +880,7 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
continue;
// Save store location.
- const ChainID ID = getChainID(Ptr, DL);
+ const ChainID ID = getChainID(Ptr);
StoreRefs[ID].push_back(SI);
}
}
@@ -1027,8 +1031,8 @@ bool Vectorizer::vectorizeStoreChain(
unsigned EltSzInBytes = Sz / 8;
unsigned SzInBytes = EltSzInBytes * ChainSize;
- VectorType *VecTy;
- VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
+ FixedVectorType *VecTy;
+ auto *VecStoreTy = dyn_cast<FixedVectorType>(StoreTy);
if (VecStoreTy)
VecTy = FixedVectorType::get(StoreTy->getScalarType(),
Chain.size() * VecStoreTy->getNumElements());
@@ -1180,7 +1184,7 @@ bool Vectorizer::vectorizeLoadChain(
unsigned EltSzInBytes = Sz / 8;
unsigned SzInBytes = EltSzInBytes * ChainSize;
VectorType *VecTy;
- VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
+ auto *VecLoadTy = dyn_cast<FixedVectorType>(LoadTy);
if (VecLoadTy)
VecTy = FixedVectorType::get(LoadTy->getScalarType(),
Chain.size() * VecLoadTy->getNumElements());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 23613775d896..b8c21a0e1cd3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -13,13 +13,16 @@
// pass. It should be easy to create an analysis pass around it if there
// is a need (but D45420 needs to happen first).
//
+
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
using namespace llvm;
@@ -63,6 +66,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
return (Val <= 1);
case HK_ISVECTORIZED:
case HK_PREDICATE:
+ case HK_SCALABLE:
return (Val == 0 || Val == 1);
}
return false;
@@ -75,7 +79,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
- Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
+ Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
+ Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
ORE(ORE) {
// Populate values with existing loop metadata.
getHintsFromMetadata();
@@ -88,7 +93,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
// If the vectorization width and interleaving count are both 1 then
// consider the loop to have been already vectorized because there's
// nothing more that we can do.
- IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
+ IsVectorized.Value =
+ getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
<< "LV: Interleaving disabled by the pass manager\n");
}
@@ -161,7 +167,7 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
if (Force.Value == LoopVectorizeHints::FK_Enabled) {
R << " (Force=" << NV("Force", true);
if (Width.Value != 0)
- R << ", Vector Width=" << NV("VectorWidth", Width.Value);
+ R << ", Vector Width=" << NV("VectorWidth", getWidth());
if (Interleave.Value != 0)
R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
R << ")";
@@ -172,11 +178,11 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
}
const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
- if (getWidth() == 1)
+ if (getWidth() == ElementCount::getFixed(1))
return LV_NAME;
if (getForce() == LoopVectorizeHints::FK_Disabled)
return LV_NAME;
- if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
+ if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
return LV_NAME;
return OptimizationRemarkAnalysis::AlwaysPrint;
}
@@ -227,7 +233,8 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
return;
unsigned Val = C->getZExtValue();
- Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
+ Hint *Hints[] = {&Width, &Interleave, &Force,
+ &IsVectorized, &Predicate, &Scalable};
for (auto H : Hints) {
if (Name == H->Name) {
if (H->validate(Val))
@@ -412,7 +419,11 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
const ValueToValueMap &Strides =
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
- bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
+ Function *F = TheLoop->getHeader()->getParent();
+ bool OptForSize = F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
+ PGSOQueryType::IRPass);
+ bool CanAddPredicate = !OptForSize;
int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
if (Stride == 1 || Stride == -1)
return Stride;
@@ -424,7 +435,7 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
}
bool LoopVectorizationLegality::canVectorizeOuterLoop() {
- assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
+ assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
// Store the result and return it at the end instead of exiting early, in case
// allowExtraAnalysis is used to report multiple reasons for not vectorizing.
bool Result = true;
@@ -768,7 +779,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// supported on the target.
if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
// Arbitrarily try a vector of 2 elements.
- auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
+ auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
assert(VecTy && "did not find vectorized version of stored type");
if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
reportVectorizationFailure(
@@ -783,7 +794,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
// For nontemporal loads, check that a nontemporal vector version is
// supported on the target (arbitrarily try a vector of 2 elements).
- auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
+ auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
assert(VecTy && "did not find vectorized version of load type");
if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
reportVectorizationFailure(
@@ -912,9 +923,9 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
}
bool LoopVectorizationLegality::blockCanBePredicated(
- BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
- const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
-
+ BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
+ SmallPtrSetImpl<const Instruction *> &MaskedOp,
+ SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
for (Instruction &I : *BB) {
// Check that we don't have a constant expression that can trap as operand.
for (Value *Operand : I.operands()) {
@@ -930,17 +941,19 @@ bool LoopVectorizationLegality::blockCanBePredicated(
continue;
}
+ // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
+ // TODO: there might be cases that it should block the vectorization. Let's
+ // ignore those for now.
+ if (isa<NoAliasScopeDeclInst>(&I))
+ continue;
+
// We might be able to hoist the load.
if (I.mayReadFromMemory()) {
auto *LI = dyn_cast<LoadInst>(&I);
if (!LI)
return false;
if (!SafePtrs.count(LI->getPointerOperand())) {
- // !llvm.mem.parallel_loop_access implies if-conversion safety.
- // Otherwise, record that the load needs (real or emulated) masking
- // and let the cost model decide.
- if (!IsAnnotatedParallel || PreserveGuards)
- MaskedOp.insert(LI);
+ MaskedOp.insert(LI);
continue;
}
}
@@ -999,7 +1012,7 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
ScalarEvolution &SE = *PSE.getSE();
for (Instruction &I : *BB) {
LoadInst *LI = dyn_cast<LoadInst>(&I);
- if (LI && !mustSuppressSpeculation(*LI) &&
+ if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
SafePointers.insert(LI->getPointerOperand());
}
@@ -1019,7 +1032,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB)) {
- if (!blockCanBePredicated(BB, SafePointers)) {
+ if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
+ ConditionalAssumes)) {
reportVectorizationFailure(
"Control flow cannot be substituted for a select",
"control flow cannot be substituted for a select",
@@ -1044,7 +1058,7 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// Helper function to canVectorizeLoopNestCFG.
bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
bool UseVPlanNativePath) {
- assert((UseVPlanNativePath || Lp->empty()) &&
+ assert((UseVPlanNativePath || Lp->isInnermost()) &&
"VPlan-native path is not enabled.");
// TODO: ORE should be improved to show more accurate information when an
@@ -1080,22 +1094,14 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
return false;
}
- // We must have a single exiting block.
- if (!Lp->getExitingBlock()) {
- reportVectorizationFailure("The loop must have an exiting block",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // We only handle bottom-tested loops, i.e. loop in which the condition is
- // checked at the end of each iteration. With that we can assume that all
- // instructions in the loop are executed the same number of times.
- if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
- reportVectorizationFailure("The exiting block is not the loop latch",
+ // We currently must have a single "exit block" after the loop. Note that
+ // multiple "exiting blocks" inside the loop are allowed, provided they all
+ // reach the single exit block.
+ // TODO: This restriction can be relaxed in the near future, it's here solely
+ // to allow separation of changes for review. We need to generalize the phi
+ // update logic in a number of places.
+ if (!Lp->getUniqueExitBlock()) {
+ reportVectorizationFailure("The loop must have a unique exit block",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
@@ -1103,7 +1109,6 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
else
return false;
}
-
return Result;
}
@@ -1154,7 +1159,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
// Specific checks for outer loops. We skip the remaining legal checks at this
// point because they don't support outer loops.
- if (!TheLoop->empty()) {
+ if (!TheLoop->isInnermost()) {
assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
if (!canVectorizeOuterLoop()) {
@@ -1171,7 +1176,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return Result;
}
- assert(TheLoop->empty() && "Inner loop expected.");
+ assert(TheLoop->isInnermost() && "Inner loop expected.");
// Check if we can if-convert non-single-bb loops.
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
@@ -1246,10 +1251,10 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
Instruction *UI = cast<Instruction>(U);
if (TheLoop->contains(UI))
continue;
- reportVectorizationFailure(
- "Cannot fold tail by masking, loop has an outside user for",
- "Cannot fold tail by masking in the presence of live outs.",
- "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Cannot fold tail by masking, loop has an outside user for "
+ << *UI << "\n");
return false;
}
}
@@ -1257,20 +1262,25 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
// The list of pointers that we can safely read and write to remains empty.
SmallPtrSet<Value *, 8> SafePointers;
+ SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
+ SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
+
// Check and mark all blocks for predication, including those that ordinarily
// do not need predication such as the header block.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
- reportVectorizationFailure(
- "Cannot fold tail by masking as required",
- "control flow cannot be substituted for a select",
- "NoCFGForSelect", ORE, TheLoop,
- BB->getTerminator());
+ if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
+ TmpConditionalAssumes)) {
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
return false;
}
}
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
+
+ MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
+ ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
+ TmpConditionalAssumes.end());
+
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 8dd06983cd84..19797e6f7858 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -34,6 +34,7 @@ namespace llvm {
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class PredicatedScalarEvolution;
+class VPRecipeBuilder;
/// VPlan-based builder utility analogous to IRBuilder.
class VPBuilder {
@@ -141,6 +142,10 @@ public:
return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
}
+ VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) {
+ return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal});
+ }
+
//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//
@@ -171,16 +176,22 @@ public:
/// Information about vectorization costs
struct VectorizationFactor {
// Vector width with best cost
- unsigned Width;
+ ElementCount Width;
// Cost of the loop with that width
unsigned Cost;
// Width 1 means no vectorization, cost 0 means uncomputed cost.
- static VectorizationFactor Disabled() { return {1, 0}; }
+ static VectorizationFactor Disabled() {
+ return {ElementCount::getFixed(1), 0};
+ }
bool operator==(const VectorizationFactor &rhs) const {
return Width == rhs.Width && Cost == rhs.Cost;
}
+
+ bool operator!=(const VectorizationFactor &rhs) const {
+ return !(*this == rhs);
+ }
};
/// Planner drives the vectorization process after having passed
@@ -226,7 +237,10 @@ class LoopVectorizationPlanner {
/// A builder used to construct the current plan.
VPBuilder Builder;
- unsigned BestVF = 0;
+ /// The best number of elements of the vector types used in the
+ /// transformed loop. BestVF = None means that vectorization is
+ /// disabled.
+ Optional<ElementCount> BestVF = None;
unsigned BestUF = 0;
public:
@@ -241,14 +255,14 @@ public:
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.
- Optional<VectorizationFactor> plan(unsigned UserVF, unsigned UserIC);
+ Optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
/// Use the VPlan-native path to plan how to best vectorize, return the best
/// VF and its cost.
- VectorizationFactor planInVPlanNativePath(unsigned UserVF);
+ VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
/// Finalize the best decision and dispose of all other VPlans.
- void setBestPlan(unsigned VF, unsigned UF);
+ void setBestPlan(ElementCount VF, unsigned UF);
/// Generate the IR code for the body of the vectorized loop according to the
/// best selected VPlan.
@@ -259,11 +273,21 @@ public:
O << *Plan;
}
+ /// Look through the existing plans and return true if we have one with all
+ /// the vectorization factors in question.
+ bool hasPlanWithVFs(const ArrayRef<ElementCount> VFs) const {
+ return any_of(VPlans, [&](const VPlanPtr &Plan) {
+ return all_of(VFs, [&](const ElementCount &VF) {
+ return Plan->hasVF(VF);
+ });
+ });
+ }
+
/// Test a \p Predicate on a \p Range of VF's. Return the value of applying
/// \p Predicate on Range.Start, possibly decreasing Range.End such that the
/// returned value holds for the entire \p Range.
static bool
- getDecisionAndClampRange(const std::function<bool(unsigned)> &Predicate,
+ getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
VFRange &Range);
protected:
@@ -275,7 +299,7 @@ protected:
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
/// legal to vectorize the loop.
- void buildVPlans(unsigned MinVF, unsigned MaxVF);
+ void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
private:
/// Build a VPlan according to the information gathered by Legal. \return a
@@ -286,14 +310,20 @@ private:
/// Build a VPlan using VPRecipes according to the information gather by
/// Legal. This method is only used for the legacy inner loop vectorizer.
VPlanPtr buildVPlanWithVPRecipes(
- VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
- SmallPtrSetImpl<Instruction *> &DeadInstructions,
+ VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
const DenseMap<Instruction *, Instruction *> &SinkAfter);
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
/// legal to vectorize the loop. This method creates VPlans using VPRecipes.
- void buildVPlansWithVPRecipes(unsigned MinVF, unsigned MaxVF);
+ void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
+
+ /// Adjust the recipes for any inloop reductions. The chain of instructions
+ /// leading from the loop exit instr to the phi need to be converted to
+ /// reductions, with one operand being vector and the other being the scalar
+ /// reduction chain.
+ void adjustRecipesForInLoopReductions(VPlanPtr &Plan,
+ VPRecipeBuilder &RecipeBuilder);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 35af8e425778..b456a97aa4ec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -130,6 +130,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -157,18 +158,37 @@ using namespace llvm;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
+#ifndef NDEBUG
+const char VerboseDebug[] = DEBUG_TYPE "-verbose";
+#endif
+
/// @{
/// Metadata attribute names
-static const char *const LLVMLoopVectorizeFollowupAll =
- "llvm.loop.vectorize.followup_all";
-static const char *const LLVMLoopVectorizeFollowupVectorized =
+const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
+const char LLVMLoopVectorizeFollowupVectorized[] =
"llvm.loop.vectorize.followup_vectorized";
-static const char *const LLVMLoopVectorizeFollowupEpilogue =
+const char LLVMLoopVectorizeFollowupEpilogue[] =
"llvm.loop.vectorize.followup_epilogue";
/// @}
STATISTIC(LoopsVectorized, "Number of loops vectorized");
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
+STATISTIC(LoopsEpilogueVectorized, "Number of epilogues vectorized");
+
+static cl::opt<bool> EnableEpilogueVectorization(
+ "enable-epilogue-vectorization", cl::init(true), cl::Hidden,
+ cl::desc("Enable vectorization of epilogue loops."));
+
+static cl::opt<unsigned> EpilogueVectorizationForceVF(
+ "epilogue-vectorization-force-VF", cl::init(1), cl::Hidden,
+ cl::desc("When epilogue vectorization is enabled, and a value greater than "
+ "1 is specified, forces the given VF for all applicable epilogue "
+ "loops."));
+
+static cl::opt<unsigned> EpilogueVectorizationMinVF(
+ "epilogue-vectorization-minimum-VF", cl::init(16), cl::Hidden,
+ cl::desc("Only loops with vectorization factor equal to or larger than "
+ "the specified value are considered for epilogue vectorization."));
/// Loops with a known constant trip count below this number are vectorized only
/// if no scalar iteration overheads are incurred.
@@ -178,13 +198,36 @@ static cl::opt<unsigned> TinyTripCountVectorThreshold(
"value are vectorized only if no scalar iteration overheads "
"are incurred."));
-// Indicates that an epilogue is undesired, predication is preferred.
-// This means that the vectorizer will try to fold the loop-tail (epilogue)
-// into the loop and predicate the loop body accordingly.
-static cl::opt<bool> PreferPredicateOverEpilog(
- "prefer-predicate-over-epilog", cl::init(false), cl::Hidden,
- cl::desc("Indicate that an epilogue is undesired, predication should be "
- "used instead."));
+// Option prefer-predicate-over-epilogue indicates that an epilogue is undesired,
+// that predication is preferred, and this lists all options. I.e., the
+// vectorizer will try to fold the tail-loop (epilogue) into the vector body
+// and predicate the instructions accordingly. If tail-folding fails, there are
+// different fallback strategies depending on these values:
+namespace PreferPredicateTy {
+ enum Option {
+ ScalarEpilogue = 0,
+ PredicateElseScalarEpilogue,
+ PredicateOrDontVectorize
+ };
+} // namespace PreferPredicateTy
+
+static cl::opt<PreferPredicateTy::Option> PreferPredicateOverEpilogue(
+ "prefer-predicate-over-epilogue",
+ cl::init(PreferPredicateTy::ScalarEpilogue),
+ cl::Hidden,
+ cl::desc("Tail-folding and predication preferences over creating a scalar "
+ "epilogue loop."),
+ cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue,
+ "scalar-epilogue",
+ "Don't tail-predicate loops, create scalar epilogue"),
+ clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue,
+ "predicate-else-scalar-epilogue",
+ "prefer tail-folding, create scalar epilogue if tail "
+ "folding fails."),
+ clEnumValN(PreferPredicateTy::PredicateOrDontVectorize,
+ "predicate-dont-vectorize",
+ "prefers tail-folding, don't attempt vectorization if "
+ "tail-folding fails.")));
static cl::opt<bool> MaximizeBandwidth(
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
@@ -196,7 +239,7 @@ static cl::opt<bool> EnableInterleavedMemAccesses(
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
/// An interleave-group may need masking if it resides in a block that needs
-/// predication, or in order to mask away gaps.
+/// predication, or in order to mask away gaps.
static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
"enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
@@ -230,6 +273,12 @@ static cl::opt<unsigned> ForceTargetInstructionCost(
"an instruction to a single constant value. Mostly "
"useful for getting consistent testing."));
+static cl::opt<bool> ForceTargetSupportsScalableVectors(
+ "force-target-supports-scalable-vectors", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Pretend that scalable vectors are supported, even if the target does "
+ "not support them. This flag should only be used for testing."));
+
static cl::opt<unsigned> SmallLoopCost(
"small-loop-cost", cl::init(20), cl::Hidden,
cl::desc(
@@ -247,6 +296,12 @@ static cl::opt<bool> EnableLoadStoreRuntimeInterleave(
cl::desc(
"Enable runtime interleaving until load/store ports are saturated"));
+/// Interleave small loops with scalar reductions.
+static cl::opt<bool> InterleaveSmallLoopScalarReduction(
+ "interleave-small-loop-scalar-reduction", cl::init(false), cl::Hidden,
+ cl::desc("Enable interleaving for loops with small iteration counts that "
+ "contain scalar reductions to expose ILP."));
+
/// The number of stores in a loop that are allowed to need predication.
static cl::opt<unsigned> NumberOfStoresToPredicate(
"vectorize-num-stores-pred", cl::init(1), cl::Hidden,
@@ -265,6 +320,17 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
cl::desc("The maximum interleave count to use when interleaving a scalar "
"reduction in a nested loop."));
+static cl::opt<bool>
+ PreferInLoopReductions("prefer-inloop-reductions", cl::init(false),
+ cl::Hidden,
+ cl::desc("Prefer in-loop vector reductions, "
+ "overriding the targets preference."));
+
+static cl::opt<bool> PreferPredicatedReductionSelect(
+ "prefer-predicated-reduction-select", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Prefer predicating a reduction operation over an after loop select."));
+
cl::opt<bool> EnableVPlanNativePath(
"enable-vplan-native-path", cl::init(false), cl::Hidden,
cl::desc("Enable VPlan-native vectorization path with "
@@ -306,17 +372,11 @@ static Type *getMemInstValueType(Value *I) {
/// A helper function that returns true if the given type is irregular. The
/// type is irregular if its allocated size doesn't equal the store size of an
-/// element of the corresponding vector type at the given vectorization factor.
-static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) {
- // Determine if an array of VF elements of type Ty is "bitcast compatible"
- // with a <VF x Ty> vector.
- if (VF > 1) {
- auto *VectorTy = FixedVectorType::get(Ty, VF);
- return VF * DL.getTypeAllocSize(Ty) != DL.getTypeStoreSize(VectorTy);
- }
-
- // If the vectorization factor is one, we just check if an array of type Ty
- // requires padding between elements.
+/// element of the corresponding vector type.
+static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
+ // Determine if an array of N elements of type Ty is "bitcast compatible"
+ // with a <N x Ty> vector.
+ // This is only true if there is no padding between the array elements.
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
}
@@ -393,29 +453,42 @@ public:
LoopInfo *LI, DominatorTree *DT,
const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, unsigned VecWidth,
+ OptimizationRemarkEmitter *ORE, ElementCount VecWidth,
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
- LoopVectorizationCostModel *CM)
+ LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI)
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()),
- VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM) {}
+ VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM),
+ BFI(BFI), PSI(PSI) {
+ // Query this against the original loop and save it here because the profile
+ // of the original loop header may change as the transformation happens.
+ OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
+ OrigLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
+ }
+
virtual ~InnerLoopVectorizer() = default;
- /// Create a new empty loop. Unlink the old loop and connect the new one.
- /// Return the pre-header block of the new loop.
- BasicBlock *createVectorizedLoopSkeleton();
+ /// Create a new empty loop that will contain vectorized instructions later
+ /// on, while the old loop will be used as the scalar remainder. Control flow
+ /// is generated around the vectorized (and scalar epilogue) loops consisting
+ /// of various checks and bypasses. Return the pre-header block of the new
+ /// loop.
+ /// In the case of epilogue vectorization, this function is overriden to
+ /// handle the more complex control flow around the loops.
+ virtual BasicBlock *createVectorizedLoopSkeleton();
/// Widen a single instruction within the innermost loop.
- void widenInstruction(Instruction &I, VPUser &Operands,
+ void widenInstruction(Instruction &I, VPValue *Def, VPUser &Operands,
VPTransformState &State);
/// Widen a single call instruction within the innermost loop.
- void widenCallInstruction(CallInst &I, VPUser &ArgOperands,
+ void widenCallInstruction(CallInst &I, VPValue *Def, VPUser &ArgOperands,
VPTransformState &State);
/// Widen a single select instruction within the innermost loop.
- void widenSelectInstruction(SelectInst &I, VPUser &Operands,
+ void widenSelectInstruction(SelectInst &I, VPValue *VPDef, VPUser &Operands,
bool InvariantCond, VPTransformState &State);
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
@@ -431,14 +504,15 @@ public:
/// Vectorize a single GetElementPtrInst based on information gathered and
/// decisions taken during planning.
- void widenGEP(GetElementPtrInst *GEP, VPUser &Indices, unsigned UF,
- unsigned VF, bool IsPtrLoopInvariant,
+ void widenGEP(GetElementPtrInst *GEP, VPValue *VPDef, VPUser &Indices,
+ unsigned UF, ElementCount VF, bool IsPtrLoopInvariant,
SmallBitVector &IsIndexLoopInvariant, VPTransformState &State);
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
- void widenPHIInstruction(Instruction *PN, unsigned UF, unsigned VF);
+ void widenPHIInstruction(Instruction *PN, RecurrenceDescriptor *RdxDesc,
+ Value *StartV, unsigned UF, ElementCount VF);
/// A helper function to scalarize a single Instruction in the innermost loop.
/// Generates a sequence of scalar instances for each lane between \p MinLane
@@ -452,7 +526,8 @@ public:
/// Widen an integer or floating-point induction variable \p IV. If \p Trunc
/// is provided, the integer induction variable will first be truncated to
/// the corresponding type.
- void widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc = nullptr);
+ void widenIntOrFpInduction(PHINode *IV, Value *Start,
+ TruncInst *Trunc = nullptr);
/// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a
/// vector or scalar value on-demand if one is not yet available. When
@@ -477,6 +552,10 @@ public:
/// value into a vector.
Value *getOrCreateVectorValue(Value *V, unsigned Part);
+ void setVectorValue(Value *Scalar, unsigned Part, Value *Vector) {
+ VectorLoopValueMap.setVectorValue(Scalar, Part, Vector);
+ }
+
/// Return a value in the new loop corresponding to \p V from the original
/// loop at unroll and vector indices \p Instance. If the value has been
/// vectorized but not scalarized, the necessary extractelement instruction
@@ -491,7 +570,9 @@ public:
/// BlockInMask is non-null. Use \p State to translate given VPValues to IR
/// values in the vectorized loop.
void vectorizeInterleaveGroup(const InterleaveGroup<Instruction> *Group,
+ ArrayRef<VPValue *> VPDefs,
VPTransformState &State, VPValue *Addr,
+ ArrayRef<VPValue *> StoredValues,
VPValue *BlockInMask = nullptr);
/// Vectorize Load and Store instructions with the base address given in \p
@@ -499,8 +580,8 @@ public:
/// non-null. Use \p State to translate given VPValues to IR values in the
/// vectorized loop.
void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
- VPValue *Addr, VPValue *StoredValue,
- VPValue *BlockInMask);
+ VPValue *Def, VPValue *Addr,
+ VPValue *StoredValue, VPValue *BlockInMask);
/// Set the debug location in the builder using the debug location in
/// the instruction.
@@ -544,10 +625,11 @@ protected:
/// Clear NSW/NUW flags from reduction instructions if necessary.
void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc);
- /// The Loop exit block may have single value PHI nodes with some
- /// incoming value. While vectorizing we only handled real values
- /// that were defined inside the loop and we should have one value for
- /// each predecessor of its parent basic block. See PR14725.
+ /// Fixup the LCSSA phi nodes in the unique exit block. This simply
+ /// means we need to add the appropriate incoming value from the middle
+ /// block as exiting edges from the scalar epilogue loop (if present) are
+ /// already in place, and we exit the vector loop exclusively to the middle
+ /// block.
void fixLCSSAPHIs();
/// Iteratively sink the scalarized operands of a predicated instruction into
@@ -586,7 +668,8 @@ protected:
/// truncate instruction, instead of widening the original IV, we widen a
/// version of the IV truncated to \p EntryVal's type.
void createVectorIntOrFpInductionPHI(const InductionDescriptor &II,
- Value *Step, Instruction *EntryVal);
+ Value *Step, Value *Start,
+ Instruction *EntryVal);
/// Returns true if an instruction \p I should be scalarized instead of
/// vectorized for the chosen vectorization factor.
@@ -654,6 +737,28 @@ protected:
const DataLayout &DL,
const InductionDescriptor &ID) const;
+ /// Emit basic blocks (prefixed with \p Prefix) for the iteration check,
+ /// vector loop preheader, middle block and scalar preheader. Also
+ /// allocate a loop object for the new vector loop and return it.
+ Loop *createVectorLoopSkeleton(StringRef Prefix);
+
+ /// Create new phi nodes for the induction variables to resume iteration count
+ /// in the scalar epilogue, from where the vectorized loop left off (given by
+ /// \p VectorTripCount).
+ /// In cases where the loop skeleton is more complicated (eg. epilogue
+ /// vectorization) and the resume values can come from an additional bypass
+ /// block, the \p AdditionalBypass pair provides information about the bypass
+ /// block and the end value on the edge from bypass to this loop.
+ void createInductionResumeValues(
+ Loop *L, Value *VectorTripCount,
+ std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
+
+ /// Complete the loop skeleton by adding debug MDs, creating appropriate
+ /// conditional branches in the middle block, preparing the builder and
+ /// running the verifier. Take in the vector loop \p L as argument, and return
+ /// the preheader of the completed vector loop.
+ BasicBlock *completeLoopSkeleton(Loop *L, MDNode *OrigLoopID);
+
/// Add additional metadata to \p To that was not present on \p Orig.
///
/// Currently this is used to add the noalias annotations based on the
@@ -672,6 +777,11 @@ protected:
/// vector of instructions.
void addMetadata(ArrayRef<Value *> To, Instruction *From);
+ /// Allow subclasses to override and print debug traces before/after vplan
+ /// execution, when trace information is requested.
+ virtual void printDebugTracesAtStart(){};
+ virtual void printDebugTracesAtEnd(){};
+
/// The original loop.
Loop *OrigLoop;
@@ -710,7 +820,7 @@ protected:
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
- unsigned VF;
+ ElementCount VF;
/// The vectorization unroll factor to use. Each scalar is vectorized to this
/// many different vector instructions.
@@ -730,7 +840,8 @@ protected:
/// Middle Block between the vector and the scalar.
BasicBlock *LoopMiddleBlock;
- /// The ExitBlock of the scalar loop.
+ /// The (unique) ExitBlock of the scalar loop. Note that
+ /// there can be multiple exiting edges reaching this block.
BasicBlock *LoopExitBlock;
/// The vector loop body.
@@ -779,6 +890,14 @@ protected:
// Vector of original scalar PHIs whose corresponding widened PHIs need to be
// fixed up at the end of vector code generation.
SmallVector<PHINode *, 8> OrigPHIsToFix;
+
+ /// BFI and PSI are used to check for profile guided size optimizations.
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
+
+ // Whether this loop should be optimized for size based on profile guided size
+ // optimizatios.
+ bool OptForSizeBasedOnProfile;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -789,9 +908,11 @@ public:
const TargetTransformInfo *TTI, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
LoopVectorizationLegality *LVL,
- LoopVectorizationCostModel *CM)
- : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
- UnrollFactor, LVL, CM) {}
+ LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI)
+ : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
+ ElementCount::getFixed(1), UnrollFactor, LVL, CM,
+ BFI, PSI) {}
private:
Value *getBroadcastInstrs(Value *V) override;
@@ -801,6 +922,128 @@ private:
Value *reverseVector(Value *Vec) override;
};
+/// Encapsulate information regarding vectorization of a loop and its epilogue.
+/// This information is meant to be updated and used across two stages of
+/// epilogue vectorization.
+struct EpilogueLoopVectorizationInfo {
+ ElementCount MainLoopVF = ElementCount::getFixed(0);
+ unsigned MainLoopUF = 0;
+ ElementCount EpilogueVF = ElementCount::getFixed(0);
+ unsigned EpilogueUF = 0;
+ BasicBlock *MainLoopIterationCountCheck = nullptr;
+ BasicBlock *EpilogueIterationCountCheck = nullptr;
+ BasicBlock *SCEVSafetyCheck = nullptr;
+ BasicBlock *MemSafetyCheck = nullptr;
+ Value *TripCount = nullptr;
+ Value *VectorTripCount = nullptr;
+
+ EpilogueLoopVectorizationInfo(unsigned MVF, unsigned MUF, unsigned EVF,
+ unsigned EUF)
+ : MainLoopVF(ElementCount::getFixed(MVF)), MainLoopUF(MUF),
+ EpilogueVF(ElementCount::getFixed(EVF)), EpilogueUF(EUF) {
+ assert(EUF == 1 &&
+ "A high UF for the epilogue loop is likely not beneficial.");
+ }
+};
+
+/// An extension of the inner loop vectorizer that creates a skeleton for a
+/// vectorized loop that has its epilogue (residual) also vectorized.
+/// The idea is to run the vplan on a given loop twice, firstly to setup the
+/// skeleton and vectorize the main loop, and secondly to complete the skeleton
+/// from the first step and vectorize the epilogue. This is achieved by
+/// deriving two concrete strategy classes from this base class and invoking
+/// them in succession from the loop vectorizer planner.
+class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
+public:
+ InnerLoopAndEpilogueVectorizer(
+ Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
+ DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
+ LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+ : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
+ EPI.MainLoopVF, EPI.MainLoopUF, LVL, CM, BFI, PSI),
+ EPI(EPI) {}
+
+ // Override this function to handle the more complex control flow around the
+ // three loops.
+ BasicBlock *createVectorizedLoopSkeleton() final override {
+ return createEpilogueVectorizedLoopSkeleton();
+ }
+
+ /// The interface for creating a vectorized skeleton using one of two
+ /// different strategies, each corresponding to one execution of the vplan
+ /// as described above.
+ virtual BasicBlock *createEpilogueVectorizedLoopSkeleton() = 0;
+
+ /// Holds and updates state information required to vectorize the main loop
+ /// and its epilogue in two separate passes. This setup helps us avoid
+ /// regenerating and recomputing runtime safety checks. It also helps us to
+ /// shorten the iteration-count-check path length for the cases where the
+ /// iteration count of the loop is so small that the main vector loop is
+ /// completely skipped.
+ EpilogueLoopVectorizationInfo &EPI;
+};
+
+/// A specialized derived class of inner loop vectorizer that performs
+/// vectorization of *main* loops in the process of vectorizing loops and their
+/// epilogues.
+class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
+public:
+ EpilogueVectorizerMainLoop(
+ Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
+ DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
+ LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+ : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
+ EPI, LVL, CM, BFI, PSI) {}
+ /// Implements the interface for creating a vectorized skeleton using the
+ /// *main loop* strategy (ie the first pass of vplan execution).
+ BasicBlock *createEpilogueVectorizedLoopSkeleton() final override;
+
+protected:
+ /// Emits an iteration count bypass check once for the main loop (when \p
+ /// ForEpilogue is false) and once for the epilogue loop (when \p
+ /// ForEpilogue is true).
+ BasicBlock *emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass,
+ bool ForEpilogue);
+ void printDebugTracesAtStart() override;
+ void printDebugTracesAtEnd() override;
+};
+
+// A specialized derived class of inner loop vectorizer that performs
+// vectorization of *epilogue* loops in the process of vectorizing loops and
+// their epilogues.
+class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
+public:
+ EpilogueVectorizerEpilogueLoop(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE,
+ EpilogueLoopVectorizationInfo &EPI,
+ LoopVectorizationLegality *LVL,
+ llvm::LoopVectorizationCostModel *CM,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+ : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
+ EPI, LVL, CM, BFI, PSI) {}
+ /// Implements the interface for creating a vectorized skeleton using the
+ /// *epilogue loop* strategy (ie the second pass of vplan execution).
+ BasicBlock *createEpilogueVectorizedLoopSkeleton() final override;
+
+protected:
+ /// Emits an iteration count bypass check after the main vector loop has
+ /// finished to see if there are any iterations left to execute by either
+ /// the vector epilogue or the scalar epilogue.
+ BasicBlock *emitMinimumVectorEpilogueIterCountCheck(Loop *L,
+ BasicBlock *Bypass,
+ BasicBlock *Insert);
+ void printDebugTracesAtStart() override;
+ void printDebugTracesAtEnd() override;
+};
} // end namespace llvm
/// Look for a meaningful debug location on the instruction or it's
@@ -827,7 +1070,9 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
const DILocation *DIL = Inst->getDebugLoc();
if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
!isa<DbgInfoIntrinsic>(Inst)) {
- auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF);
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ auto NewDIL =
+ DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
if (NewDIL)
B.SetCurrentDebugLocation(NewDIL.getValue());
else
@@ -881,6 +1126,15 @@ static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
return R;
}
+/// Return a value for Step multiplied by VF.
+static Value *createStepForVF(IRBuilder<> &B, Constant *Step, ElementCount VF) {
+ assert(isa<ConstantInt>(Step) && "Expected an integer step");
+ Constant *StepVal = ConstantInt::get(
+ Step->getType(),
+ cast<ConstantInt>(Step)->getSExtValue() * VF.getKnownMinValue());
+ return VF.isScalable() ? B.CreateVScale(StepVal) : StepVal;
+}
+
namespace llvm {
void reportVectorizationFailure(const StringRef DebugMsg,
@@ -952,7 +1206,10 @@ enum ScalarEpilogueLowering {
CM_ScalarEpilogueNotAllowedLowTripLoop,
// Loop hint predicate indicating an epilogue is undesired.
- CM_ScalarEpilogueNotNeededUsePredicate
+ CM_ScalarEpilogueNotNeededUsePredicate,
+
+ // Directive indicating we must either tail fold or not vectorize
+ CM_ScalarEpilogueNotAllowedUsePredicate
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -979,7 +1236,7 @@ public:
/// \return An upper bound for the vectorization factor, or None if
/// vectorization and interleaving should be avoided up front.
- Optional<unsigned> computeMaxVF(unsigned UserVF, unsigned UserIC);
+ Optional<ElementCount> computeMaxVF(ElementCount UserVF, unsigned UserIC);
/// \return True if runtime checks are required for vectorization, and false
/// otherwise.
@@ -989,10 +1246,13 @@ public:
/// This method checks every power of two up to MaxVF. If UserVF is not ZERO
/// then this vectorization factor will be selected if vectorization is
/// possible.
- VectorizationFactor selectVectorizationFactor(unsigned MaxVF);
+ VectorizationFactor selectVectorizationFactor(ElementCount MaxVF);
+ VectorizationFactor
+ selectEpilogueVectorizationFactor(const ElementCount MaxVF,
+ const LoopVectorizationPlanner &LVP);
/// Setup cost-based decisions for user vectorization factor.
- void selectUserVectorizationFactor(unsigned UserVF) {
+ void selectUserVectorizationFactor(ElementCount UserVF) {
collectUniformsAndScalars(UserVF);
collectInstsToScalarize(UserVF);
}
@@ -1006,7 +1266,7 @@ public:
/// If interleave count has been specified by metadata it will be returned.
/// Otherwise, the interleave count is computed and returned. VF and LoopCost
/// are the selected vectorization factor and the cost of the selected VF.
- unsigned selectInterleaveCount(unsigned VF, unsigned LoopCost);
+ unsigned selectInterleaveCount(ElementCount VF, unsigned LoopCost);
/// Memory access instruction may be vectorized in more than one way.
/// Form of instruction after vectorization depends on cost.
@@ -1015,7 +1275,7 @@ public:
/// the lists of loop-uniform and loop-scalar instructions.
/// The calculated cost is saved with widening decision in order to
/// avoid redundant calculations.
- void setCostBasedWideningDecision(unsigned VF);
+ void setCostBasedWideningDecision(ElementCount VF);
/// A struct that represents some properties of the register usage
/// of a loop.
@@ -1030,11 +1290,16 @@ public:
/// \return Returns information about the register usages of the loop for the
/// given vectorization factors.
- SmallVector<RegisterUsage, 8> calculateRegisterUsage(ArrayRef<unsigned> VFs);
+ SmallVector<RegisterUsage, 8>
+ calculateRegisterUsage(ArrayRef<ElementCount> VFs);
/// Collect values we want to ignore in the cost model.
void collectValuesToIgnore();
+ /// Split reductions into those that happen in the loop, and those that happen
+ /// outside. In loop reductions are collected into InLoopReductionChains.
+ void collectInLoopReductions();
+
/// \returns The smallest bitwidth each instruction can be represented with.
/// The vector equivalents of these instructions should be truncated to this
/// type.
@@ -1044,8 +1309,9 @@ public:
/// \returns True if it is more profitable to scalarize instruction \p I for
/// vectorization factor \p VF.
- bool isProfitableToScalarize(Instruction *I, unsigned VF) const {
- assert(VF > 1 && "Profitable to scalarize relevant only for VF > 1.");
+ bool isProfitableToScalarize(Instruction *I, ElementCount VF) const {
+ assert(VF.isVector() &&
+ "Profitable to scalarize relevant only for VF > 1.");
// Cost model is not run in the VPlan-native path - return conservative
// result until this changes.
@@ -1059,8 +1325,8 @@ public:
}
/// Returns true if \p I is known to be uniform after vectorization.
- bool isUniformAfterVectorization(Instruction *I, unsigned VF) const {
- if (VF == 1)
+ bool isUniformAfterVectorization(Instruction *I, ElementCount VF) const {
+ if (VF.isScalar())
return true;
// Cost model is not run in the VPlan-native path - return conservative
@@ -1075,8 +1341,8 @@ public:
}
/// Returns true if \p I is known to be scalar after vectorization.
- bool isScalarAfterVectorization(Instruction *I, unsigned VF) const {
- if (VF == 1)
+ bool isScalarAfterVectorization(Instruction *I, ElementCount VF) const {
+ if (VF.isScalar())
return true;
// Cost model is not run in the VPlan-native path - return conservative
@@ -1092,8 +1358,8 @@ public:
/// \returns True if instruction \p I can be truncated to a smaller bitwidth
/// for vectorization factor \p VF.
- bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
- return VF > 1 && MinBWs.find(I) != MinBWs.end() &&
+ bool canTruncateToMinimalBitwidth(Instruction *I, ElementCount VF) const {
+ return VF.isVector() && MinBWs.find(I) != MinBWs.end() &&
!isProfitableToScalarize(I, VF) &&
!isScalarAfterVectorization(I, VF);
}
@@ -1110,17 +1376,18 @@ public:
/// Save vectorization decision \p W and \p Cost taken by the cost model for
/// instruction \p I and vector width \p VF.
- void setWideningDecision(Instruction *I, unsigned VF, InstWidening W,
- unsigned Cost) {
- assert(VF >= 2 && "Expected VF >=2");
+ void setWideningDecision(Instruction *I, ElementCount VF, InstWidening W,
+ InstructionCost Cost) {
+ assert(VF.isVector() && "Expected VF >=2");
WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost);
}
/// Save vectorization decision \p W and \p Cost taken by the cost model for
/// interleaving group \p Grp and vector width \p VF.
- void setWideningDecision(const InterleaveGroup<Instruction> *Grp, unsigned VF,
- InstWidening W, unsigned Cost) {
- assert(VF >= 2 && "Expected VF >=2");
+ void setWideningDecision(const InterleaveGroup<Instruction> *Grp,
+ ElementCount VF, InstWidening W,
+ InstructionCost Cost) {
+ assert(VF.isVector() && "Expected VF >=2");
/// Broadcast this decicion to all instructions inside the group.
/// But the cost will be assigned to one instruction only.
for (unsigned i = 0; i < Grp->getFactor(); ++i) {
@@ -1136,15 +1403,14 @@ public:
/// Return the cost model decision for the given instruction \p I and vector
/// width \p VF. Return CM_Unknown if this instruction did not pass
/// through the cost modeling.
- InstWidening getWideningDecision(Instruction *I, unsigned VF) {
- assert(VF >= 2 && "Expected VF >=2");
-
+ InstWidening getWideningDecision(Instruction *I, ElementCount VF) {
+ assert(VF.isVector() && "Expected VF to be a vector VF");
// Cost model is not run in the VPlan-native path - return conservative
// result until this changes.
if (EnableVPlanNativePath)
return CM_GatherScatter;
- std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
+ std::pair<Instruction *, ElementCount> InstOnVF = std::make_pair(I, VF);
auto Itr = WideningDecisions.find(InstOnVF);
if (Itr == WideningDecisions.end())
return CM_Unknown;
@@ -1153,9 +1419,9 @@ public:
/// Return the vectorization cost for the given instruction \p I and vector
/// width \p VF.
- unsigned getWideningCost(Instruction *I, unsigned VF) {
- assert(VF >= 2 && "Expected VF >=2");
- std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
+ InstructionCost getWideningCost(Instruction *I, ElementCount VF) {
+ assert(VF.isVector() && "Expected VF >=2");
+ std::pair<Instruction *, ElementCount> InstOnVF = std::make_pair(I, VF);
assert(WideningDecisions.find(InstOnVF) != WideningDecisions.end() &&
"The cost is not calculated");
return WideningDecisions[InstOnVF].second;
@@ -1164,7 +1430,7 @@ public:
/// Return True if instruction \p I is an optimizable truncate whose operand
/// is an induction variable. Such a truncate will be removed by adding a new
/// induction variable with the destination type.
- bool isOptimizableIVTruncate(Instruction *I, unsigned VF) {
+ bool isOptimizableIVTruncate(Instruction *I, ElementCount VF) {
// If the instruction is not a truncate, return false.
auto *Trunc = dyn_cast<TruncInst>(I);
if (!Trunc)
@@ -1189,14 +1455,14 @@ public:
/// Collects the instructions to scalarize for each predicated instruction in
/// the loop.
- void collectInstsToScalarize(unsigned VF);
+ void collectInstsToScalarize(ElementCount VF);
/// Collect Uniform and Scalar values for the given \p VF.
/// The sets depend on CM decision for Load/Store instructions
/// that may be vectorized as interleave, gather-scatter or scalarized.
- void collectUniformsAndScalars(unsigned VF) {
+ void collectUniformsAndScalars(ElementCount VF) {
// Do the analysis once.
- if (VF == 1 || Uniforms.find(VF) != Uniforms.end())
+ if (VF.isScalar() || Uniforms.find(VF) != Uniforms.end())
return;
setCostBasedWideningDecision(VF);
collectLoopUniforms(VF);
@@ -1247,7 +1513,8 @@ public:
/// instructions that may divide by zero.
/// If a non-zero VF has been calculated, we check if I will be scalarized
/// predication for that VF.
- bool isScalarWithPredication(Instruction *I, unsigned VF = 1);
+ bool isScalarWithPredication(Instruction *I,
+ ElementCount VF = ElementCount::getFixed(1));
// Returns true if \p I is an instruction that will be predicated either
// through scalar predication or masked load/store or masked gather/scatter.
@@ -1264,12 +1531,16 @@ public:
/// Returns true if \p I is a memory instruction with consecutive memory
/// access that can be widened.
- bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1);
+ bool
+ memoryInstructionCanBeWidened(Instruction *I,
+ ElementCount VF = ElementCount::getFixed(1));
/// Returns true if \p I is a memory instruction in an interleaved-group
/// of memory accesses that can be vectorized with wide vector loads/stores
/// and shuffles.
- bool interleavedAccessCanBeWidened(Instruction *I, unsigned VF = 1);
+ bool
+ interleavedAccessCanBeWidened(Instruction *I,
+ ElementCount VF = ElementCount::getFixed(1));
/// Check if \p Instr belongs to any interleaved access group.
bool isAccessInterleaved(Instruction *Instr) {
@@ -1282,11 +1553,16 @@ public:
return InterleaveInfo.getInterleaveGroup(Instr);
}
- /// Returns true if an interleaved group requires a scalar iteration
- /// to handle accesses with gaps, and there is nothing preventing us from
- /// creating a scalar epilogue.
+ /// Returns true if we're required to use a scalar epilogue for at least
+ /// the final iteration of the original loop.
bool requiresScalarEpilogue() const {
- return isScalarEpilogueAllowed() && InterleaveInfo.requiresScalarEpilogue();
+ if (!isScalarEpilogueAllowed())
+ return false;
+ // If we might exit from anywhere but the latch, must run the exiting
+ // iteration in scalar form.
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch())
+ return true;
+ return InterleaveInfo.requiresScalarEpilogue();
}
/// Returns true if a scalar epilogue is not allowed due to optsize or a
@@ -1302,17 +1578,34 @@ public:
return foldTailByMasking() || Legal->blockNeedsPredication(BB);
}
+ /// A SmallMapVector to store the InLoop reduction op chains, mapping phi
+ /// nodes to the chain of instructions representing the reductions. Uses a
+ /// MapVector to ensure deterministic iteration order.
+ using ReductionChainMap =
+ SmallMapVector<PHINode *, SmallVector<Instruction *, 4>, 4>;
+
+ /// Return the chain of instructions representing an inloop reduction.
+ const ReductionChainMap &getInLoopReductionChains() const {
+ return InLoopReductionChains;
+ }
+
+ /// Returns true if the Phi is part of an inloop reduction.
+ bool isInLoopReduction(PHINode *Phi) const {
+ return InLoopReductionChains.count(Phi);
+ }
+
/// Estimate cost of an intrinsic call instruction CI if it were vectorized
/// with factor VF. Return the cost of the instruction, including
/// scalarization overhead if it's needed.
- unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF);
+ InstructionCost getVectorIntrinsicCost(CallInst *CI, ElementCount VF);
/// Estimate cost of a call instruction CI if it were vectorized with factor
/// VF. Return the cost of the instruction, including scalarization overhead
/// if it's needed. The flag NeedToScalarize shows if the call needs to be
/// scalarized -
/// i.e. either vector version isn't available, or is too expensive.
- unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
+ InstructionCost getVectorCallCost(CallInst *CI, ElementCount VF,
+ bool &NeedToScalarize);
/// Invalidates decisions already taken by the cost model.
void invalidateCostModelingDecisions() {
@@ -1327,7 +1620,8 @@ private:
/// \return An upper bound for the vectorization factor, a power-of-2 larger
/// than zero. One is returned if vectorization should best be avoided due
/// to cost.
- unsigned computeFeasibleMaxVF(unsigned ConstTripCount);
+ ElementCount computeFeasibleMaxVF(unsigned ConstTripCount,
+ ElementCount UserVF);
/// The vectorization cost is a combination of the cost itself and a boolean
/// indicating whether any of the contributing operations will actually
@@ -1336,47 +1630,54 @@ private:
/// is
/// false, then all operations will be scalarized (i.e. no vectorization has
/// actually taken place).
- using VectorizationCostTy = std::pair<unsigned, bool>;
+ using VectorizationCostTy = std::pair<InstructionCost, bool>;
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare different
/// vector widths. The cost that is returned is *not* normalized by
/// the factor width.
- VectorizationCostTy expectedCost(unsigned VF);
+ VectorizationCostTy expectedCost(ElementCount VF);
/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
- VectorizationCostTy getInstructionCost(Instruction *I, unsigned VF);
+ VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
/// The cost-computation logic from getInstructionCost which provides
/// the vector type as an output parameter.
- unsigned getInstructionCost(Instruction *I, unsigned VF, Type *&VectorTy);
+ InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
+ Type *&VectorTy);
+
+ /// Return the cost of instructions in an inloop reduction pattern, if I is
+ /// part of that pattern.
+ InstructionCost getReductionPatternCost(Instruction *I, ElementCount VF,
+ Type *VectorTy,
+ TTI::TargetCostKind CostKind);
/// Calculate vectorization cost of memory instruction \p I.
- unsigned getMemoryInstructionCost(Instruction *I, unsigned VF);
+ InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF);
/// The cost computation for scalarized memory instruction.
- unsigned getMemInstScalarizationCost(Instruction *I, unsigned VF);
+ InstructionCost getMemInstScalarizationCost(Instruction *I, ElementCount VF);
/// The cost computation for interleaving group of memory instructions.
- unsigned getInterleaveGroupCost(Instruction *I, unsigned VF);
+ InstructionCost getInterleaveGroupCost(Instruction *I, ElementCount VF);
/// The cost computation for Gather/Scatter instruction.
- unsigned getGatherScatterCost(Instruction *I, unsigned VF);
+ InstructionCost getGatherScatterCost(Instruction *I, ElementCount VF);
/// The cost computation for widening instruction \p I with consecutive
/// memory access.
- unsigned getConsecutiveMemOpCost(Instruction *I, unsigned VF);
+ InstructionCost getConsecutiveMemOpCost(Instruction *I, ElementCount VF);
/// The cost calculation for Load/Store instruction \p I with uniform pointer -
/// Load: scalar load + broadcast.
/// Store: scalar store + (loop invariant value stored? 0 : extract of last
/// element)
- unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
+ InstructionCost getUniformMemOpCost(Instruction *I, ElementCount VF);
/// Estimate the overhead of scalarizing an instruction. This is a
/// convenience wrapper for the type-based getScalarizationOverhead API.
- unsigned getScalarizationOverhead(Instruction *I, unsigned VF);
+ InstructionCost getScalarizationOverhead(Instruction *I, ElementCount VF);
/// Returns whether the instruction is a load or store and will be a emitted
/// as a vector operation.
@@ -1394,7 +1695,7 @@ private:
/// A type representing the costs for instructions if they were to be
/// scalarized rather than vectorized. The entries are Instruction-Cost
/// pairs.
- using ScalarCostsTy = DenseMap<Instruction *, unsigned>;
+ using ScalarCostsTy = DenseMap<Instruction *, InstructionCost>;
/// A set containing all BasicBlocks that are known to present after
/// vectorization as a predicated block.
@@ -1416,19 +1717,30 @@ private:
/// presence of a cost for an instruction in the mapping indicates that the
/// instruction will be scalarized when vectorizing with the associated
/// vectorization factor. The entries are VF-ScalarCostTy pairs.
- DenseMap<unsigned, ScalarCostsTy> InstsToScalarize;
+ DenseMap<ElementCount, ScalarCostsTy> InstsToScalarize;
/// Holds the instructions known to be uniform after vectorization.
/// The data is collected per VF.
- DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> Uniforms;
+ DenseMap<ElementCount, SmallPtrSet<Instruction *, 4>> Uniforms;
/// Holds the instructions known to be scalar after vectorization.
/// The data is collected per VF.
- DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> Scalars;
+ DenseMap<ElementCount, SmallPtrSet<Instruction *, 4>> Scalars;
/// Holds the instructions (address computations) that are forced to be
/// scalarized.
- DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> ForcedScalars;
+ DenseMap<ElementCount, SmallPtrSet<Instruction *, 4>> ForcedScalars;
+
+ /// PHINodes of the reductions that should be expanded in-loop along with
+ /// their associated chains of reduction operations, in program order from top
+ /// (PHI) to bottom
+ ReductionChainMap InLoopReductionChains;
+
+ /// A Map of inloop reduction operations and their immediate chain operand.
+ /// FIXME: This can be removed once reductions can be costed correctly in
+ /// vplan. This was added to allow quick lookup to the inloop operations,
+ /// without having to loop through InLoopReductionChains.
+ DenseMap<Instruction *, Instruction *> InLoopReductionImmediateChains;
/// Returns the expected difference in cost from scalarizing the expression
/// feeding a predicated instruction \p PredInst. The instructions to
@@ -1436,7 +1748,7 @@ private:
/// non-negative return value implies the expression will be scalarized.
/// Currently, only single-use chains are considered for scalarization.
int computePredInstDiscount(Instruction *PredInst, ScalarCostsTy &ScalarCosts,
- unsigned VF);
+ ElementCount VF);
/// Collect the instructions that are uniform after vectorization. An
/// instruction is uniform if we represent it with a single scalar value in
@@ -1447,27 +1759,28 @@ private:
/// scalarized instruction will be represented by VF scalar values in the
/// vectorized loop, each corresponding to an iteration of the original
/// scalar loop.
- void collectLoopUniforms(unsigned VF);
+ void collectLoopUniforms(ElementCount VF);
/// Collect the instructions that are scalar after vectorization. An
/// instruction is scalar if it is known to be uniform or will be scalarized
/// during vectorization. Non-uniform scalarized instructions will be
/// represented by VF values in the vectorized loop, each corresponding to an
/// iteration of the original scalar loop.
- void collectLoopScalars(unsigned VF);
+ void collectLoopScalars(ElementCount VF);
/// Keeps cost model vectorization decision and cost for instructions.
/// Right now it is used for memory instructions only.
- using DecisionList = DenseMap<std::pair<Instruction *, unsigned>,
- std::pair<InstWidening, unsigned>>;
+ using DecisionList = DenseMap<std::pair<Instruction *, ElementCount>,
+ std::pair<InstWidening, InstructionCost>>;
DecisionList WideningDecisions;
/// Returns true if \p V is expected to be vectorized and it needs to be
/// extracted.
- bool needsExtract(Value *V, unsigned VF) const {
+ bool needsExtract(Value *V, ElementCount VF) const {
Instruction *I = dyn_cast<Instruction>(V);
- if (VF == 1 || !I || !TheLoop->contains(I) || TheLoop->isLoopInvariant(I))
+ if (VF.isScalar() || !I || !TheLoop->contains(I) ||
+ TheLoop->isLoopInvariant(I))
return false;
// Assume we can vectorize V (and hence we need extraction) if the
@@ -1482,11 +1795,21 @@ private:
/// Returns a range containing only operands needing to be extracted.
SmallVector<Value *, 4> filterExtractingOperands(Instruction::op_range Ops,
- unsigned VF) {
+ ElementCount VF) {
return SmallVector<Value *, 4>(make_filter_range(
Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
}
+ /// Determines if we have the infrastructure to vectorize loop \p L and its
+ /// epilogue, assuming the main loop is vectorized by \p VF.
+ bool isCandidateForEpilogueVectorization(const Loop &L,
+ const ElementCount VF) const;
+
+ /// Returns true if epilogue vectorization is considered profitable, and
+ /// false otherwise.
+ /// \p VF is the vectorization factor chosen for the original loop.
+ bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
+
public:
/// The loop that we evaluate.
Loop *TheLoop;
@@ -1529,6 +1852,9 @@ public:
/// Values to ignore in the cost model when VF > 1.
SmallPtrSet<const Value *, 16> VecValuesToIgnore;
+
+ /// Profitable vector factors.
+ SmallVector<VectorizationFactor, 8> ProfitableVFs;
};
} // end namespace llvm
@@ -1549,7 +1875,7 @@ public:
// representation for pragma 'omp simd' is introduced.
static bool isExplicitVecOuterLoop(Loop *OuterLp,
OptimizationRemarkEmitter *ORE) {
- assert(!OuterLp->empty() && "This is not an outer loop");
+ assert(!OuterLp->isInnermost() && "This is not an outer loop");
LoopVectorizeHints Hints(OuterLp, true /*DisableInterleaving*/, *ORE);
// Only outer loops with an explicit vectorization hint are supported.
@@ -1582,7 +1908,7 @@ static void collectSupportedLoops(Loop &L, LoopInfo *LI,
// now, only collect outer loops that have explicit vectorization hints. If we
// are stress testing the VPlan H-CFG construction, we collect the outermost
// loop of every loop nest.
- if (L.empty() || VPlanBuildStressTest ||
+ if (L.isInnermost() || VPlanBuildStressTest ||
(EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) {
LoopBlocksRPO RPOT(&L);
RPOT.perform(LI);
@@ -1696,10 +2022,10 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
}
void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
- const InductionDescriptor &II, Value *Step, Instruction *EntryVal) {
+ const InductionDescriptor &II, Value *Step, Value *Start,
+ Instruction *EntryVal) {
assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
"Expected either an induction phi-node or a truncate of it!");
- Value *Start = II.getStartValue();
// Construct the initial value of the vector IV in the vector loop preheader
auto CurrIP = Builder.saveIP();
@@ -1729,7 +2055,8 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
// Multiply the vectorization factor by the step using integer or
// floating-point arithmetic as appropriate.
- Value *ConstVF = getSignedIntOrFpConstant(Step->getType(), VF);
+ Value *ConstVF =
+ getSignedIntOrFpConstant(Step->getType(), VF.getKnownMinValue());
Value *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, Step, ConstVF));
// Create a vector splat to use in the induction update.
@@ -1737,10 +2064,10 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
// FIXME: If the step is non-constant, we create the vector splat with
// IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
// handle a constant vector splat.
- Value *SplatVF =
- isa<Constant>(Mul)
- ? ConstantVector::getSplat({VF, false}, cast<Constant>(Mul))
- : Builder.CreateVectorSplat(VF, Mul);
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ Value *SplatVF = isa<Constant>(Mul)
+ ? ConstantVector::getSplat(VF, cast<Constant>(Mul))
+ : Builder.CreateVectorSplat(VF, Mul);
Builder.restoreIP(CurrIP);
// We may need to add the step a number of times, depending on the unroll
@@ -1816,7 +2143,8 @@ void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal);
}
-void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
+void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
+ TruncInst *Trunc) {
assert((IV->getType()->isIntegerTy() || IV != OldInduction) &&
"Primary induction variable must have an integer type");
@@ -1874,8 +2202,10 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) {
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
for (unsigned Part = 0; Part < UF; ++Part) {
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
Value *EntryPart =
- getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
+ getStepVector(Broadcasted, VF.getKnownMinValue() * Part, Step,
+ ID.getInductionOpcode());
VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
if (Trunc)
addMetadata(EntryPart, Trunc);
@@ -1885,7 +2215,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
// Now do the actual transformations, and start with creating the step value.
Value *Step = CreateStepValue(ID.getStep());
- if (VF <= 1) {
+ if (VF.isZero() || VF.isScalar()) {
Value *ScalarIV = CreateScalarIV(Step);
CreateSplatIV(ScalarIV, Step);
return;
@@ -1896,7 +2226,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
// least one user in the loop that is not widened.
auto NeedsScalarIV = needsScalarInduction(EntryVal);
if (!NeedsScalarIV) {
- createVectorIntOrFpInductionPHI(ID, Step, EntryVal);
+ createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal);
return;
}
@@ -1904,7 +2234,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
// create the phi node, we will splat the scalar induction variable in each
// loop iteration.
if (!shouldScalarizeInstruction(EntryVal)) {
- createVectorIntOrFpInductionPHI(ID, Step, EntryVal);
+ createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal);
Value *ScalarIV = CreateScalarIV(Step);
// Create scalar steps that can be used by instructions we will later
// scalarize. Note that the addition of the scalar steps will not increase
@@ -1926,7 +2256,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
Instruction::BinaryOps BinOp) {
// Create and check the types.
- auto *ValVTy = cast<VectorType>(Val->getType());
+ auto *ValVTy = cast<FixedVectorType>(Val->getType());
int VLen = ValVTy->getNumElements();
Type *STy = Val->getType()->getScalarType();
@@ -1983,8 +2313,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
Instruction *EntryVal,
const InductionDescriptor &ID) {
// We shouldn't have to build scalar steps if we aren't vectorizing.
- assert(VF > 1 && "VF should be greater than one");
-
+ assert(VF.isVector() && "VF should be greater than one");
// Get the value type and ensure it and the step have the same integer type.
Type *ScalarIVTy = ScalarIV->getType()->getScalarType();
assert(ScalarIVTy == Step->getType() &&
@@ -2006,12 +2335,27 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
// iteration. If EntryVal is uniform, we only need to generate the first
// lane. Otherwise, we generate all VF values.
unsigned Lanes =
- Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), VF) ? 1
- : VF;
+ Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), VF)
+ ? 1
+ : VF.getKnownMinValue();
+ assert((!VF.isScalable() || Lanes == 1) &&
+ "Should never scalarize a scalable vector");
// Compute the scalar steps and save the results in VectorLoopValueMap.
for (unsigned Part = 0; Part < UF; ++Part) {
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
- auto *StartIdx = getSignedIntOrFpConstant(ScalarIVTy, VF * Part + Lane);
+ auto *IntStepTy = IntegerType::get(ScalarIVTy->getContext(),
+ ScalarIVTy->getScalarSizeInBits());
+ Value *StartIdx =
+ createStepForVF(Builder, ConstantInt::get(IntStepTy, Part), VF);
+ if (ScalarIVTy->isFloatingPointTy())
+ StartIdx = Builder.CreateSIToFP(StartIdx, ScalarIVTy);
+ StartIdx = addFastMathFlag(Builder.CreateBinOp(
+ AddOp, StartIdx, getSignedIntOrFpConstant(ScalarIVTy, Lane)));
+ // The step returned by `createStepForVF` is a runtime-evaluated value
+ // when VF is scalable. Otherwise, it should be folded into a Constant.
+ assert((VF.isScalable() || isa<Constant>(StartIdx)) &&
+ "Expected StartIdx to be folded to a constant when VF is not "
+ "scalable");
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add);
@@ -2045,7 +2389,7 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
// If we aren't vectorizing, we can just copy the scalar map values over to
// the vector map.
- if (VF == 1) {
+ if (VF.isScalar()) {
VectorLoopValueMap.setVectorValue(V, Part, ScalarValue);
return ScalarValue;
}
@@ -2054,7 +2398,11 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
// is known to be uniform after vectorization, this corresponds to lane zero
// of the Part unroll iteration. Otherwise, the last instruction is the one
// we created for the last vector lane of the Part unroll iteration.
- unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF - 1;
+ unsigned LastLane = Cost->isUniformAfterVectorization(I, VF)
+ ? 0
+ : VF.getKnownMinValue() - 1;
+ assert((!VF.isScalable() || LastLane == 0) &&
+ "Scalable vectorization can't lead to any scalarized values.");
auto *LastInst = cast<Instruction>(
VectorLoopValueMap.getScalarValue(V, {Part, LastLane}));
@@ -2075,10 +2423,11 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
VectorValue = getBroadcastInstrs(ScalarValue);
VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
} else {
- // Initialize packing with insertelements to start from undef.
- Value *Undef = UndefValue::get(FixedVectorType::get(V->getType(), VF));
- VectorLoopValueMap.setVectorValue(V, Part, Undef);
- for (unsigned Lane = 0; Lane < VF; ++Lane)
+ // Initialize packing with insertelements to start from poison.
+ assert(!VF.isScalable() && "VF is assumed to be non scalable.");
+ Value *Poison = PoisonValue::get(VectorType::get(V->getType(), VF));
+ VectorLoopValueMap.setVectorValue(V, Part, Poison);
+ for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorValue(V, {Part, Lane});
VectorValue = VectorLoopValueMap.getVectorValue(V, Part);
}
@@ -2117,7 +2466,7 @@ InnerLoopVectorizer::getOrCreateScalarValue(Value *V,
// extractelement instruction.
auto *U = getOrCreateVectorValue(V, Instance.Part);
if (!U->getType()->isVectorTy()) {
- assert(VF == 1 && "Value not scalarized has non-vector type");
+ assert(VF.isScalar() && "Value not scalarized has non-vector type");
return U;
}
@@ -2142,12 +2491,12 @@ void InnerLoopVectorizer::packScalarIntoVectorValue(
Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
assert(Vec->getType()->isVectorTy() && "Invalid type");
+ assert(!VF.isScalable() && "Cannot reverse scalable vectors");
SmallVector<int, 8> ShuffleMask;
- for (unsigned i = 0; i < VF; ++i)
- ShuffleMask.push_back(VF - i - 1);
+ for (unsigned i = 0; i < VF.getKnownMinValue(); ++i)
+ ShuffleMask.push_back(VF.getKnownMinValue() - i - 1);
- return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()),
- ShuffleMask, "reverse");
+ return Builder.CreateShuffleVector(Vec, ShuffleMask, "reverse");
}
// Return whether we allow using masked interleave-groups (for dealing with
@@ -2172,9 +2521,9 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
// }
// To:
// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
-// %R.vec = shuffle %wide.vec, undef, <0, 3, 6, 9> ; R elements
-// %G.vec = shuffle %wide.vec, undef, <1, 4, 7, 10> ; G elements
-// %B.vec = shuffle %wide.vec, undef, <2, 5, 8, 11> ; B elements
+// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
+// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
+// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
//
// Or translate following interleaved store group (factor = 3):
// for (i = 0; i < N; i+=3) {
@@ -2185,20 +2534,22 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
// }
// To:
// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
-// %B_U.vec = shuffle %B.vec, undef, <0, 1, 2, 3, u, u, u, u>
+// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
void InnerLoopVectorizer::vectorizeInterleaveGroup(
- const InterleaveGroup<Instruction> *Group, VPTransformState &State,
- VPValue *Addr, VPValue *BlockInMask) {
+ const InterleaveGroup<Instruction> *Group, ArrayRef<VPValue *> VPDefs,
+ VPTransformState &State, VPValue *Addr, ArrayRef<VPValue *> StoredValues,
+ VPValue *BlockInMask) {
Instruction *Instr = Group->getInsertPos();
const DataLayout &DL = Instr->getModule()->getDataLayout();
// Prepare for the vector type of the interleaved load/store.
Type *ScalarTy = getMemInstValueType(Instr);
unsigned InterleaveFactor = Group->getFactor();
- auto *VecTy = FixedVectorType::get(ScalarTy, InterleaveFactor * VF);
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ auto *VecTy = VectorType::get(ScalarTy, VF * InterleaveFactor);
// Prepare for the new pointers.
SmallVector<Value *, 2> AddrParts;
@@ -2214,8 +2565,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
// pointer operand of the interleaved access is supposed to be uniform. For
// uniform instructions, we're only required to generate a value for the
// first vector lane in each unroll iteration.
+ assert(!VF.isScalable() &&
+ "scalable vector reverse operation is not implemented");
if (Group->isReverse())
- Index += (VF - 1) * Group->getFactor();
+ Index += (VF.getKnownMinValue() - 1) * Group->getFactor();
for (unsigned Part = 0; Part < UF; Part++) {
Value *AddrPart = State.get(Addr, {Part, 0});
@@ -2246,11 +2599,12 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
setDebugLocFromInst(Builder, Instr);
- Value *UndefVec = UndefValue::get(VecTy);
+ Value *PoisonVec = PoisonValue::get(VecTy);
Value *MaskForGaps = nullptr;
if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) {
- MaskForGaps = createBitMaskForGaps(Builder, VF, *Group);
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ MaskForGaps = createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);
assert(MaskForGaps && "Mask for Gaps is required but it is null");
}
@@ -2266,10 +2620,11 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
Value *GroupMask = MaskForGaps;
if (BlockInMask) {
Value *BlockInMaskPart = State.get(BlockInMask, Part);
- auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
Value *ShuffledMask = Builder.CreateShuffleVector(
- BlockInMaskPart, Undefs,
- createReplicatedMask(InterleaveFactor, VF), "interleaved.mask");
+ BlockInMaskPart,
+ createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
+ "interleaved.mask");
GroupMask = MaskForGaps
? Builder.CreateBinOp(Instruction::And, ShuffledMask,
MaskForGaps)
@@ -2277,7 +2632,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
NewLoad =
Builder.CreateMaskedLoad(AddrParts[Part], Group->getAlign(),
- GroupMask, UndefVec, "wide.masked.vec");
+ GroupMask, PoisonVec, "wide.masked.vec");
}
else
NewLoad = Builder.CreateAlignedLoad(VecTy, AddrParts[Part],
@@ -2288,6 +2643,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
// For each member in the group, shuffle out the appropriate data from the
// wide loads.
+ unsigned J = 0;
for (unsigned I = 0; I < InterleaveFactor; ++I) {
Instruction *Member = Group->getMember(I);
@@ -2295,28 +2651,33 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
if (!Member)
continue;
- auto StrideMask = createStrideMask(I, InterleaveFactor, VF);
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ auto StrideMask =
+ createStrideMask(I, InterleaveFactor, VF.getKnownMinValue());
for (unsigned Part = 0; Part < UF; Part++) {
Value *StridedVec = Builder.CreateShuffleVector(
- NewLoads[Part], UndefVec, StrideMask, "strided.vec");
+ NewLoads[Part], StrideMask, "strided.vec");
// If this member has different type, cast the result type.
if (Member->getType() != ScalarTy) {
- VectorType *OtherVTy = FixedVectorType::get(Member->getType(), VF);
+ assert(!VF.isScalable() && "VF is assumed to be non scalable.");
+ VectorType *OtherVTy = VectorType::get(Member->getType(), VF);
StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
}
if (Group->isReverse())
StridedVec = reverseVector(StridedVec);
- VectorLoopValueMap.setVectorValue(Member, Part, StridedVec);
+ State.set(VPDefs[J], Member, StridedVec, Part);
}
+ ++J;
}
return;
}
// The sub vector type for current instruction.
- auto *SubVT = FixedVectorType::get(ScalarTy, VF);
+ assert(!VF.isScalable() && "VF is assumed to be non scalable.");
+ auto *SubVT = VectorType::get(ScalarTy, VF);
// Vectorize the interleaved store group.
for (unsigned Part = 0; Part < UF; Part++) {
@@ -2324,11 +2685,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
SmallVector<Value *, 4> StoredVecs;
for (unsigned i = 0; i < InterleaveFactor; i++) {
// Interleaved store group doesn't allow a gap, so each index has a member
- Instruction *Member = Group->getMember(i);
- assert(Member && "Fail to get a member from an interleaved store group");
+ assert(Group->getMember(i) && "Fail to get a member from an interleaved store group");
+
+ Value *StoredVec = State.get(StoredValues[i], Part);
- Value *StoredVec = getOrCreateVectorValue(
- cast<StoreInst>(Member)->getValueOperand(), Part);
if (Group->isReverse())
StoredVec = reverseVector(StoredVec);
@@ -2344,16 +2704,17 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
Value *WideVec = concatenateVectors(Builder, StoredVecs);
// Interleave the elements in the wide vector.
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
Value *IVec = Builder.CreateShuffleVector(
- WideVec, UndefVec, createInterleaveMask(VF, InterleaveFactor),
+ WideVec, createInterleaveMask(VF.getKnownMinValue(), InterleaveFactor),
"interleaved.vec");
Instruction *NewStoreInstr;
if (BlockInMask) {
Value *BlockInMaskPart = State.get(BlockInMask, Part);
- auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
Value *ShuffledMask = Builder.CreateShuffleVector(
- BlockInMaskPart, Undefs, createReplicatedMask(InterleaveFactor, VF),
+ BlockInMaskPart,
+ createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
"interleaved.mask");
NewStoreInstr = Builder.CreateMaskedStore(
IVec, AddrParts[Part], Group->getAlign(), ShuffledMask);
@@ -2366,11 +2727,9 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
}
-void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
- VPTransformState &State,
- VPValue *Addr,
- VPValue *StoredValue,
- VPValue *BlockInMask) {
+void InnerLoopVectorizer::vectorizeMemoryInstruction(
+ Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr,
+ VPValue *StoredValue, VPValue *BlockInMask) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(Instr);
StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -2387,7 +2746,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
"CM decision is not to widen the memory instruction");
Type *ScalarDataTy = getMemInstValueType(Instr);
- auto *DataTy = FixedVectorType::get(ScalarDataTy, VF);
+
+ auto *DataTy = VectorType::get(ScalarDataTy, VF);
const Align Alignment = getLoadStoreAlignment(Instr);
// Determine if the pointer operand of the access is either consecutive or
@@ -2419,19 +2779,23 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
InBounds = gep->isInBounds();
if (Reverse) {
+ assert(!VF.isScalable() &&
+ "Reversing vectors is not yet supported for scalable vectors.");
+
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
- PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(-Part * VF)));
+ PartPtr = cast<GetElementPtrInst>(Builder.CreateGEP(
+ ScalarDataTy, Ptr, Builder.getInt32(-Part * VF.getKnownMinValue())));
PartPtr->setIsInBounds(InBounds);
- PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(ScalarDataTy, PartPtr, Builder.getInt32(1 - VF)));
+ PartPtr = cast<GetElementPtrInst>(Builder.CreateGEP(
+ ScalarDataTy, PartPtr, Builder.getInt32(1 - VF.getKnownMinValue())));
PartPtr->setIsInBounds(InBounds);
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
} else {
+ Value *Increment = createStepForVF(Builder, Builder.getInt32(Part), VF);
PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(Part * VF)));
+ Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
PartPtr->setIsInBounds(InBounds);
}
@@ -2486,7 +2850,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0}));
if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(
- VecPtr, Alignment, BlockInMaskParts[Part], UndefValue::get(DataTy),
+ VecPtr, Alignment, BlockInMaskParts[Part], PoisonValue::get(DataTy),
"wide.masked.load");
else
NewLI =
@@ -2497,7 +2861,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
if (Reverse)
NewLI = reverseVector(NewLI);
}
- VectorLoopValueMap.setVectorValue(Instr, Part, NewLI);
+
+ State.set(Def, Instr, NewLI, Part);
}
}
@@ -2507,6 +2872,12 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPUser &User,
VPTransformState &State) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+ // llvm.experimental.noalias.scope.decl intrinsics must only be duplicated for
+ // the first lane and part.
+ if (isa<NoAliasScopeDeclInst>(Instr))
+ if (Instance.Lane != 0 || Instance.Part != 0)
+ return;
+
setDebugLocFromInst(Builder, Instr);
// Does this instruction return a value ?
@@ -2519,7 +2890,12 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPUser &User,
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
for (unsigned op = 0, e = User.getNumOperands(); op != e; ++op) {
- auto *NewOp = State.get(User.getOperand(op), Instance);
+ auto *Operand = dyn_cast<Instruction>(Instr->getOperand(op));
+ auto InputInstance = Instance;
+ if (!Operand || !OrigLoop->contains(Operand) ||
+ (Cost->isUniformAfterVectorization(Operand, State.VF)))
+ InputInstance.Lane = 0;
+ auto *NewOp = State.get(User.getOperand(op), InputInstance);
Cloned->setOperand(op, NewOp);
}
addNewMetadata(Cloned, Instr);
@@ -2527,7 +2903,9 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPUser &User,
// Place the cloned scalar in the new loop.
Builder.Insert(Cloned);
- // Add the cloned scalar to the scalar map entry.
+ // TODO: Set result for VPValue of VPReciplicateRecipe. This requires
+ // representing scalar values in VPTransformState. Add the cloned scalar to
+ // the scalar map entry.
VectorLoopValueMap.setScalarValue(Instr, Instance, Cloned);
// If we just cloned a new assumption, add it the assumption cache.
@@ -2564,7 +2942,7 @@ PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
Induction->addIncoming(Next, Latch);
// Create the compare.
Value *ICmp = Builder.CreateICmpEQ(Next, End);
- Builder.CreateCondBr(ICmp, L->getExitBlock(), Header);
+ Builder.CreateCondBr(ICmp, L->getUniqueExitBlock(), Header);
// Now we have two terminators. Remove the old one from the block.
Latch->getTerminator()->eraseFromParent();
@@ -2581,7 +2959,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
// Find the loop boundaries.
ScalarEvolution *SE = PSE.getSE();
const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
- assert(BackedgeTakenCount != SE->getCouldNotCompute() &&
+ assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
"Invalid loop count");
Type *IdxTy = Legal->getWidestInductionType();
@@ -2627,7 +3005,8 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
Type *Ty = TC->getType();
- Constant *Step = ConstantInt::get(Ty, VF * UF);
+ // This is where we can make the step a runtime constant.
+ Value *Step = createStepForVF(Builder, ConstantInt::get(Ty, UF), VF);
// If the tail is to be folded by masking, round the number of iterations N
// up to a multiple of Step instead of rounding down. This is done by first
@@ -2636,9 +3015,12 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
// that it starts at zero and its Step is a power of two; the loop will then
// exit, with the last early-exit vector comparison also producing all-true.
if (Cost->foldTailByMasking()) {
- assert(isPowerOf2_32(VF * UF) &&
+ assert(isPowerOf2_32(VF.getKnownMinValue() * UF) &&
"VF*UF must be a power of 2 when folding tail by masking");
- TC = Builder.CreateAdd(TC, ConstantInt::get(Ty, VF * UF - 1), "n.rnd.up");
+ assert(!VF.isScalable() &&
+ "Tail folding not yet supported for scalable vectors");
+ TC = Builder.CreateAdd(
+ TC, ConstantInt::get(Ty, VF.getKnownMinValue() * UF - 1), "n.rnd.up");
}
// Now we need to generate the expression for the part of the loop that the
@@ -2648,14 +3030,18 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
// unroll factor (number of SIMD instructions).
Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
- // If there is a non-reversed interleaved group that may speculatively access
- // memory out-of-bounds, we need to ensure that there will be at least one
- // iteration of the scalar epilogue loop. Thus, if the step evenly divides
+ // There are two cases where we need to ensure (at least) the last iteration
+ // runs in the scalar remainder loop. Thus, if the step evenly divides
// the trip count, we set the remainder to be equal to the step. If the step
// does not evenly divide the trip count, no adjustment is necessary since
// there will already be scalar iterations. Note that the minimum iterations
- // check ensures that N >= Step.
- if (VF > 1 && Cost->requiresScalarEpilogue()) {
+ // check ensures that N >= Step. The cases are:
+ // 1) If there is a non-reversed interleaved group that may speculatively
+ // access memory out-of-bounds.
+ // 2) If any instruction may follow a conditionally taken exit. That is, if
+ // the loop contains multiple exiting blocks, or a single exiting block
+ // which is not the latch.
+ if (VF.isVector() && Cost->requiresScalarEpilogue()) {
auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
R = Builder.CreateSelect(IsZero, Step, R);
}
@@ -2668,17 +3054,18 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
const DataLayout &DL) {
// Verify that V is a vector type with same number of elements as DstVTy.
- unsigned VF = DstVTy->getNumElements();
- VectorType *SrcVecTy = cast<VectorType>(V->getType());
+ auto *DstFVTy = cast<FixedVectorType>(DstVTy);
+ unsigned VF = DstFVTy->getNumElements();
+ auto *SrcVecTy = cast<FixedVectorType>(V->getType());
assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match");
Type *SrcElemTy = SrcVecTy->getElementType();
- Type *DstElemTy = DstVTy->getElementType();
+ Type *DstElemTy = DstFVTy->getElementType();
assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
"Vector elements must have same size");
// Do a direct cast if element types are castable.
if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
- return Builder.CreateBitOrPointerCast(V, DstVTy);
+ return Builder.CreateBitOrPointerCast(V, DstFVTy);
}
// V cannot be directly casted to desired vector type.
// May happen when V is a floating point vector but DstVTy is a vector of
@@ -2692,7 +3079,7 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
auto *VecIntTy = FixedVectorType::get(IntTy, VF);
Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
- return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
+ return Builder.CreateBitOrPointerCast(CastVal, DstFVTy);
}
void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
@@ -2713,11 +3100,11 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
// If tail is to be folded, vector loop takes care of all iterations.
Value *CheckMinIters = Builder.getFalse();
- if (!Cost->foldTailByMasking())
- CheckMinIters = Builder.CreateICmp(
- P, Count, ConstantInt::get(Count->getType(), VF * UF),
- "min.iters.check");
-
+ if (!Cost->foldTailByMasking()) {
+ Value *Step =
+ createStepForVF(Builder, ConstantInt::get(Count->getType(), UF), VF);
+ CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
+ }
// Create new preheader for vector loop.
LoopVectorPreHeader =
SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(), DT, LI, nullptr,
@@ -2754,7 +3141,9 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
if (C->isZero())
return;
- assert(!SCEVCheckBlock->getParent()->hasOptSize() &&
+ assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
+ (OptForSizeBasedOnProfile &&
+ Cost->Hints->getForce() != LoopVectorizeHints::FK_Enabled)) &&
"Cannot SCEV check stride or overflow when optimizing for size");
SCEVCheckBlock->setName("vector.scevcheck");
@@ -2792,15 +3181,8 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
const auto &RtPtrChecking = *LAI->getRuntimePointerChecking();
if (!RtPtrChecking.Need)
return;
- Instruction *FirstCheckInst;
- Instruction *MemRuntimeCheck;
- std::tie(FirstCheckInst, MemRuntimeCheck) =
- addRuntimeChecks(MemCheckBlock->getTerminator(), OrigLoop,
- RtPtrChecking.getChecks(), RtPtrChecking.getSE());
- assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
- "claimed checks are required");
- if (MemCheckBlock->getParent()->hasOptSize()) {
+ if (MemCheckBlock->getParent()->hasOptSize() || OptForSizeBasedOnProfile) {
assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
"Cannot emit memory checks when optimizing for size, unless forced "
"to vectorize.");
@@ -2820,22 +3202,33 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
SplitBlock(MemCheckBlock, MemCheckBlock->getTerminator(), DT, LI, nullptr,
"vector.ph");
+ auto *CondBranch = cast<BranchInst>(
+ Builder.CreateCondBr(Builder.getTrue(), Bypass, LoopVectorPreHeader));
+ ReplaceInstWithInst(MemCheckBlock->getTerminator(), CondBranch);
+ LoopBypassBlocks.push_back(MemCheckBlock);
+ AddedSafetyChecks = true;
+
// Update dominator only if this is first RT check.
if (LoopBypassBlocks.empty()) {
DT->changeImmediateDominator(Bypass, MemCheckBlock);
DT->changeImmediateDominator(LoopExitBlock, MemCheckBlock);
}
- ReplaceInstWithInst(
- MemCheckBlock->getTerminator(),
- BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheck));
- LoopBypassBlocks.push_back(MemCheckBlock);
- AddedSafetyChecks = true;
+ Instruction *FirstCheckInst;
+ Instruction *MemRuntimeCheck;
+ std::tie(FirstCheckInst, MemRuntimeCheck) =
+ addRuntimeChecks(MemCheckBlock->getTerminator(), OrigLoop,
+ RtPtrChecking.getChecks(), RtPtrChecking.getSE());
+ assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
+ "claimed checks are required");
+ CondBranch->setCondition(MemRuntimeCheck);
// We currently don't use LoopVersioning for the actual loop cloning but we
// still use it to add the noalias metadata.
- LVer = std::make_unique<LoopVersioning>(*Legal->getLAI(), OrigLoop, LI, DT,
- PSE.getSE());
+ LVer = std::make_unique<LoopVersioning>(
+ *Legal->getLAI(),
+ Legal->getLAI()->getRuntimePointerChecking()->getChecks(), OrigLoop, LI,
+ DT, PSE.getSE());
LVer->prepareNoAliasMetadata();
}
@@ -2939,74 +3332,35 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
llvm_unreachable("invalid enum");
}
-BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
- /*
- In this function we generate a new loop. The new loop will contain
- the vectorized instructions while the old loop will continue to run the
- scalar remainder.
-
- [ ] <-- loop iteration number check.
- / |
- / v
- | [ ] <-- vector loop bypass (may consist of multiple blocks).
- | / |
- | / v
- || [ ] <-- vector pre header.
- |/ |
- | v
- | [ ] \
- | [ ]_| <-- vector loop.
- | |
- | v
- | -[ ] <--- middle-block.
- | / |
- | / v
- -|- >[ ] <--- new preheader.
- | |
- | v
- | [ ] \
- | [ ]_| <-- old scalar loop to handle remainder.
- \ |
- \ v
- >[ ] <-- exit block.
- ...
- */
-
- MDNode *OrigLoopID = OrigLoop->getLoopID();
-
- // Some loops have a single integer induction variable, while other loops
- // don't. One example is c++ iterators that often have multiple pointer
- // induction variables. In the code below we also support a case where we
- // don't have a single induction variable.
- //
- // We try to obtain an induction variable from the original loop as hard
- // as possible. However if we don't find one that:
- // - is an integer
- // - counts from zero, stepping by one
- // - is the size of the widest induction variable type
- // then we create a new one.
- OldInduction = Legal->getPrimaryInduction();
- Type *IdxTy = Legal->getWidestInductionType();
-
- // Split the single block loop into the two loop structure described above.
+Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
LoopScalarBody = OrigLoop->getHeader();
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
- LoopExitBlock = OrigLoop->getExitBlock();
+ LoopExitBlock = OrigLoop->getUniqueExitBlock();
assert(LoopExitBlock && "Must have an exit block");
assert(LoopVectorPreHeader && "Invalid loop structure");
LoopMiddleBlock =
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
- LI, nullptr, "middle.block");
+ LI, nullptr, Twine(Prefix) + "middle.block");
LoopScalarPreHeader =
SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI,
- nullptr, "scalar.ph");
+ nullptr, Twine(Prefix) + "scalar.ph");
+
+ // Set up branch from middle block to the exit and scalar preheader blocks.
+ // completeLoopSkeleton will update the condition to use an iteration check,
+ // if required to decide whether to execute the remainder.
+ BranchInst *BrInst =
+ BranchInst::Create(LoopExitBlock, LoopScalarPreHeader, Builder.getTrue());
+ auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
+ BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
+ ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
+
// We intentionally don't let SplitBlock to update LoopInfo since
// LoopVectorBody should belong to another loop than LoopVectorPreHeader.
// LoopVectorBody is explicitly added to the correct place few lines later.
LoopVectorBody =
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
- nullptr, nullptr, "vector.body");
+ nullptr, nullptr, Twine(Prefix) + "vector.body");
// Update dominator for loop exit.
DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
@@ -3023,37 +3377,16 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
LI->addTopLevelLoop(Lp);
}
Lp->addBasicBlockToLoop(LoopVectorBody, *LI);
+ return Lp;
+}
- // Find the loop boundaries.
- Value *Count = getOrCreateTripCount(Lp);
-
- Value *StartIdx = ConstantInt::get(IdxTy, 0);
-
- // Now, compare the new count to zero. If it is zero skip the vector loop and
- // jump to the scalar loop. This check also covers the case where the
- // backedge-taken count is uint##_max: adding one to it will overflow leading
- // to an incorrect trip count of zero. In this (rare) case we will also jump
- // to the scalar loop.
- emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader);
-
- // Generate the code to check any assumptions that we've made for SCEV
- // expressions.
- emitSCEVChecks(Lp, LoopScalarPreHeader);
-
- // Generate the code that checks in runtime if arrays overlap. We put the
- // checks into a separate block to make the more common case of few elements
- // faster.
- emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
-
- // Generate the induction variable.
- // The loop step is equal to the vectorization factor (num of SIMD elements)
- // times the unroll factor (num of SIMD instructions).
- Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
- Constant *Step = ConstantInt::get(IdxTy, VF * UF);
- Induction =
- createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
- getDebugLocFromInstOrOperands(OldInduction));
-
+void InnerLoopVectorizer::createInductionResumeValues(
+ Loop *L, Value *VectorTripCount,
+ std::pair<BasicBlock *, Value *> AdditionalBypass) {
+ assert(VectorTripCount && L && "Expected valid arguments");
+ assert(((AdditionalBypass.first && AdditionalBypass.second) ||
+ (!AdditionalBypass.first && !AdditionalBypass.second)) &&
+ "Inconsistent information about additional bypass.");
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variables that we found and fix the
// PHIs that are left in the scalar version of the loop.
@@ -3061,10 +3394,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// iteration in the vectorized loop.
// If we come from a bypass edge then we need to start from the original
// start value.
-
- // This variable saves the new starting index for the scalar loop. It is used
- // to test if there are any tail iterations left once the vector loop has
- // completed.
for (auto &InductionEntry : Legal->getInductionVars()) {
PHINode *OrigPhi = InductionEntry.first;
InductionDescriptor II = InductionEntry.second;
@@ -3076,20 +3405,32 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// Copy original phi DL over to the new one.
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
Value *&EndValue = IVEndValues[OrigPhi];
+ Value *EndValueFromAdditionalBypass = AdditionalBypass.second;
if (OrigPhi == OldInduction) {
// We know what the end value is.
- EndValue = CountRoundDown;
+ EndValue = VectorTripCount;
} else {
- IRBuilder<> B(Lp->getLoopPreheader()->getTerminator());
+ IRBuilder<> B(L->getLoopPreheader()->getTerminator());
Type *StepType = II.getStep()->getType();
Instruction::CastOps CastOp =
- CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
- Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd");
+ CastInst::getCastOpcode(VectorTripCount, true, StepType, true);
+ Value *CRD = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.crd");
const DataLayout &DL = LoopScalarBody->getModule()->getDataLayout();
EndValue = emitTransformedIndex(B, CRD, PSE.getSE(), DL, II);
EndValue->setName("ind.end");
- }
+ // Compute the end value for the additional bypass (if applicable).
+ if (AdditionalBypass.first) {
+ B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt()));
+ CastOp = CastInst::getCastOpcode(AdditionalBypass.second, true,
+ StepType, true);
+ CRD =
+ B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.crd");
+ EndValueFromAdditionalBypass =
+ emitTransformedIndex(B, CRD, PSE.getSE(), DL, II);
+ EndValueFromAdditionalBypass->setName("ind.end");
+ }
+ }
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);
@@ -3099,42 +3440,44 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// value.
for (BasicBlock *BB : LoopBypassBlocks)
BCResumeVal->addIncoming(II.getStartValue(), BB);
+
+ if (AdditionalBypass.first)
+ BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first,
+ EndValueFromAdditionalBypass);
+
OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
}
+}
+
+BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L,
+ MDNode *OrigLoopID) {
+ assert(L && "Expected valid loop.");
+
+ // The trip counts should be cached by now.
+ Value *Count = getOrCreateTripCount(L);
+ Value *VectorTripCount = getOrCreateVectorTripCount(L);
- // We need the OrigLoop (scalar loop part) latch terminator to help
- // produce correct debug info for the middle block BB instructions.
- // The legality check stage guarantees that the loop will have a single
- // latch.
- assert(isa<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()) &&
- "Scalar loop latch terminator isn't a branch");
- BranchInst *ScalarLatchBr =
- cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
+ auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
// If (N - N%VF) == N, then we *don't* need to run the remainder.
// If tail is to be folded, we know we don't need to run the remainder.
- Value *CmpN = Builder.getTrue();
if (!Cost->foldTailByMasking()) {
- CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
- CountRoundDown, "cmp.n",
- LoopMiddleBlock->getTerminator());
+ Instruction *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
+ Count, VectorTripCount, "cmp.n",
+ LoopMiddleBlock->getTerminator());
- // Here we use the same DebugLoc as the scalar loop latch branch instead
+ // Here we use the same DebugLoc as the scalar loop latch terminator instead
// of the corresponding compare because they may have ended up with
// different line numbers and we want to avoid awkward line stepping while
// debugging. Eg. if the compare has got a line number inside the loop.
- cast<Instruction>(CmpN)->setDebugLoc(ScalarLatchBr->getDebugLoc());
+ CmpN->setDebugLoc(ScalarLatchTerm->getDebugLoc());
+ cast<BranchInst>(LoopMiddleBlock->getTerminator())->setCondition(CmpN);
}
- BranchInst *BrInst =
- BranchInst::Create(LoopExitBlock, LoopScalarPreHeader, CmpN);
- BrInst->setDebugLoc(ScalarLatchBr->getDebugLoc());
- ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
-
// Get ready to start creating new instructions into the vectorized body.
- assert(LoopVectorPreHeader == Lp->getLoopPreheader() &&
+ assert(LoopVectorPreHeader == L->getLoopPreheader() &&
"Inconsistent vector loop preheader");
Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());
@@ -3142,7 +3485,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupVectorized});
if (VectorizedLoopID.hasValue()) {
- Lp->setLoopID(VectorizedLoopID.getValue());
+ L->setLoopID(VectorizedLoopID.getValue());
// Do not setAlreadyVectorized if loop attributes have been defined
// explicitly.
@@ -3152,9 +3495,9 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// Keep all loop hints from the original loop on the vector loop (we'll
// replace the vectorizer-specific hints below).
if (MDNode *LID = OrigLoop->getLoopID())
- Lp->setLoopID(LID);
+ L->setLoopID(LID);
- LoopVectorizeHints Hints(Lp, true, *ORE);
+ LoopVectorizeHints Hints(L, true, *ORE);
Hints.setAlreadyVectorized();
#ifdef EXPENSIVE_CHECKS
@@ -3165,6 +3508,91 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
return LoopVectorPreHeader;
}
+BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
+ /*
+ In this function we generate a new loop. The new loop will contain
+ the vectorized instructions while the old loop will continue to run the
+ scalar remainder.
+
+ [ ] <-- loop iteration number check.
+ / |
+ / v
+ | [ ] <-- vector loop bypass (may consist of multiple blocks).
+ | / |
+ | / v
+ || [ ] <-- vector pre header.
+ |/ |
+ | v
+ | [ ] \
+ | [ ]_| <-- vector loop.
+ | |
+ | v
+ | -[ ] <--- middle-block.
+ | / |
+ | / v
+ -|- >[ ] <--- new preheader.
+ | |
+ | v
+ | [ ] \
+ | [ ]_| <-- old scalar loop to handle remainder.
+ \ |
+ \ v
+ >[ ] <-- exit block.
+ ...
+ */
+
+ // Get the metadata of the original loop before it gets modified.
+ MDNode *OrigLoopID = OrigLoop->getLoopID();
+
+ // Create an empty vector loop, and prepare basic blocks for the runtime
+ // checks.
+ Loop *Lp = createVectorLoopSkeleton("");
+
+ // Now, compare the new count to zero. If it is zero skip the vector loop and
+ // jump to the scalar loop. This check also covers the case where the
+ // backedge-taken count is uint##_max: adding one to it will overflow leading
+ // to an incorrect trip count of zero. In this (rare) case we will also jump
+ // to the scalar loop.
+ emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader);
+
+ // Generate the code to check any assumptions that we've made for SCEV
+ // expressions.
+ emitSCEVChecks(Lp, LoopScalarPreHeader);
+
+ // Generate the code that checks in runtime if arrays overlap. We put the
+ // checks into a separate block to make the more common case of few elements
+ // faster.
+ emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
+
+ // Some loops have a single integer induction variable, while other loops
+ // don't. One example is c++ iterators that often have multiple pointer
+ // induction variables. In the code below we also support a case where we
+ // don't have a single induction variable.
+ //
+ // We try to obtain an induction variable from the original loop as hard
+ // as possible. However if we don't find one that:
+ // - is an integer
+ // - counts from zero, stepping by one
+ // - is the size of the widest induction variable type
+ // then we create a new one.
+ OldInduction = Legal->getPrimaryInduction();
+ Type *IdxTy = Legal->getWidestInductionType();
+ Value *StartIdx = ConstantInt::get(IdxTy, 0);
+ // The loop step is equal to the vectorization factor (num of SIMD elements)
+ // times the unroll factor (num of SIMD instructions).
+ Builder.SetInsertPoint(&*Lp->getHeader()->getFirstInsertionPt());
+ Value *Step = createStepForVF(Builder, ConstantInt::get(IdxTy, UF), VF);
+ Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
+ Induction =
+ createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
+ getDebugLocFromInstOrOperands(OldInduction));
+
+ // Emit phis for the new starting index of the scalar loop.
+ createInductionResumeValues(Lp, CountRoundDown);
+
+ return completeLoopSkeleton(Lp, OrigLoopID);
+}
+
// Fix up external users of the induction variable. At this point, we are
// in LCSSA form, with all external PHIs that use the IV having one input value,
// coming from the remainder loop. We need those PHIs to also have a correct
@@ -3178,7 +3606,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
// value (the value that feeds into the phi from the loop latch).
// We allow both, but they, obviously, have different values.
- assert(OrigLoop->getExitBlock() && "Expected a single exit block");
+ assert(OrigLoop->getUniqueExitBlock() && "Expected a single exit block");
DenseMap<Value *, Value *> MissingVals;
@@ -3284,9 +3712,10 @@ static void cse(BasicBlock *BB) {
}
}
-unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
- unsigned VF,
- bool &NeedToScalarize) {
+InstructionCost
+LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF,
+ bool &NeedToScalarize) {
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
Function *F = CI->getCalledFunction();
Type *ScalarRetTy = CI->getType();
SmallVector<Type *, 4> Tys, ScalarTys;
@@ -3297,9 +3726,9 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
// to be vectors, so we need to extract individual elements from there,
// execute VF scalar calls, and then gather the result into the vector return
// value.
- unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys,
- TTI::TCK_RecipThroughput);
- if (VF == 1)
+ InstructionCost ScalarCallCost =
+ TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, TTI::TCK_RecipThroughput);
+ if (VF.isScalar())
return ScalarCallCost;
// Compute corresponding vector type for return value and arguments.
@@ -3309,31 +3738,33 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
// Compute costs of unpacking argument values for the scalar calls and
// packing the return values to a vector.
- unsigned ScalarizationCost = getScalarizationOverhead(CI, VF);
+ InstructionCost ScalarizationCost = getScalarizationOverhead(CI, VF);
- unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
+ InstructionCost Cost =
+ ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost;
// If we can't emit a vector call for this function, then the currently found
// cost is the cost we need to return.
NeedToScalarize = true;
- VFShape Shape = VFShape::get(*CI, {VF, false}, false /*HasGlobalPred*/);
+ VFShape Shape = VFShape::get(*CI, VF, false /*HasGlobalPred*/);
Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
if (!TLI || CI->isNoBuiltin() || !VecFunc)
return Cost;
// If the corresponding vector cost is cheaper, return its cost.
- unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys,
- TTI::TCK_RecipThroughput);
+ InstructionCost VectorCallCost =
+ TTI.getCallInstrCost(nullptr, RetTy, Tys, TTI::TCK_RecipThroughput);
if (VectorCallCost < Cost) {
NeedToScalarize = false;
- return VectorCallCost;
+ Cost = VectorCallCost;
}
return Cost;
}
-unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
- unsigned VF) {
+InstructionCost
+LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
+ ElementCount VF) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
assert(ID && "Expected intrinsic call!");
@@ -3373,7 +3804,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
Type *ScalarTruncatedTy =
IntegerType::get(OriginalTy->getContext(), KV.second);
auto *TruncatedTy = FixedVectorType::get(
- ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getNumElements());
+ ScalarTruncatedTy,
+ cast<FixedVectorType>(OriginalTy)->getNumElements());
if (TruncatedTy == OriginalTy)
continue;
@@ -3423,13 +3855,13 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
break;
}
} else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {
- auto Elements0 =
- cast<VectorType>(SI->getOperand(0)->getType())->getNumElements();
+ auto Elements0 = cast<FixedVectorType>(SI->getOperand(0)->getType())
+ ->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(
SI->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements0));
- auto Elements1 =
- cast<VectorType>(SI->getOperand(1)->getType())->getNumElements();
+ auto Elements1 = cast<FixedVectorType>(SI->getOperand(1)->getType())
+ ->getNumElements();
auto *O1 = B.CreateZExtOrTrunc(
SI->getOperand(1),
FixedVectorType::get(ScalarTruncatedTy, Elements1));
@@ -3439,16 +3871,16 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
// Don't do anything with the operands, just extend the result.
continue;
} else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
- auto Elements =
- cast<VectorType>(IE->getOperand(0)->getType())->getNumElements();
+ auto Elements = cast<FixedVectorType>(IE->getOperand(0)->getType())
+ ->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(
IE->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements));
auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
} else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
- auto Elements =
- cast<VectorType>(EE->getOperand(0)->getType())->getNumElements();
+ auto Elements = cast<FixedVectorType>(EE->getOperand(0)->getType())
+ ->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(
EE->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements));
@@ -3490,7 +3922,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
void InnerLoopVectorizer::fixVectorizedLoop() {
// Insert truncates and extends for any truncated instructions as hints to
// InstCombine.
- if (VF > 1)
+ if (VF.isVector())
truncateToMinimalBitwidths();
// Fix widened non-induction PHIs by setting up the PHI operands.
@@ -3531,9 +3963,13 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
// profile is not inherently precise anyway. Note also possible bypass of
// vector code caused by legality checks is ignored, assigning all the weight
// to the vector loop, optimistically.
- setProfileInfoAfterUnrolling(LI->getLoopFor(LoopScalarBody),
- LI->getLoopFor(LoopVectorBody),
- LI->getLoopFor(LoopScalarBody), VF * UF);
+ //
+ // For scalable vectorization we can't know at compile time how many iterations
+ // of the loop are handled in one vector iteration, so instead assume a pessimistic
+ // vscale of '1'.
+ setProfileInfoAfterUnrolling(
+ LI->getLoopFor(LoopScalarBody), LI->getLoopFor(LoopVectorBody),
+ LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF);
}
void InnerLoopVectorizer::fixCrossIterationPHIs() {
@@ -3612,11 +4048,12 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// Create a vector from the initial value.
auto *VectorInit = ScalarInit;
- if (VF > 1) {
+ if (VF.isVector()) {
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+ assert(!VF.isScalable() && "VF is assumed to be non scalable.");
VectorInit = Builder.CreateInsertElement(
- UndefValue::get(FixedVectorType::get(VectorInit->getType(), VF)),
- VectorInit, Builder.getInt32(VF - 1), "vector.recur.init");
+ PoisonValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit,
+ Builder.getInt32(VF.getKnownMinValue() - 1), "vector.recur.init");
}
// We constructed a temporary phi node in the first phase of vectorization.
@@ -3657,10 +4094,11 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// We will construct a vector for the recurrence by combining the values for
// the current and previous iterations. This is the required shuffle mask.
- SmallVector<int, 8> ShuffleMask(VF);
- ShuffleMask[0] = VF - 1;
- for (unsigned I = 1; I < VF; ++I)
- ShuffleMask[I] = I + VF - 1;
+ assert(!VF.isScalable());
+ SmallVector<int, 8> ShuffleMask(VF.getKnownMinValue());
+ ShuffleMask[0] = VF.getKnownMinValue() - 1;
+ for (unsigned I = 1; I < VF.getKnownMinValue(); ++I)
+ ShuffleMask[I] = I + VF.getKnownMinValue() - 1;
// The vector from which to take the initial value for the current iteration
// (actual or unrolled). Initially, this is the vector phi node.
@@ -3670,9 +4108,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
for (unsigned Part = 0; Part < UF; ++Part) {
Value *PreviousPart = getOrCreateVectorValue(Previous, Part);
Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part);
- auto *Shuffle = VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart,
- ShuffleMask)
- : Incoming;
+ auto *Shuffle =
+ VF.isVector()
+ ? Builder.CreateShuffleVector(Incoming, PreviousPart, ShuffleMask)
+ : Incoming;
PhiPart->replaceAllUsesWith(Shuffle);
cast<Instruction>(PhiPart)->eraseFromParent();
VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle);
@@ -3685,10 +4124,11 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// Extract the last vector element in the middle block. This will be the
// initial value for the recurrence when jumping to the scalar loop.
auto *ExtractForScalar = Incoming;
- if (VF > 1) {
+ if (VF.isVector()) {
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
ExtractForScalar = Builder.CreateExtractElement(
- ExtractForScalar, Builder.getInt32(VF - 1), "vector.recur.extract");
+ ExtractForScalar, Builder.getInt32(VF.getKnownMinValue() - 1),
+ "vector.recur.extract");
}
// Extract the second last element in the middle block if the
// Phi is used outside the loop. We need to extract the phi itself
@@ -3696,9 +4136,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// will be the value when jumping to the exit block from the LoopMiddleBlock,
// when the scalar loop is not run at all.
Value *ExtractForPhiUsedOutsideLoop = nullptr;
- if (VF > 1)
+ if (VF.isVector())
ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement(
- Incoming, Builder.getInt32(VF - 2), "vector.recur.extract.for.phi");
+ Incoming, Builder.getInt32(VF.getKnownMinValue() - 2),
+ "vector.recur.extract.for.phi");
// When loop is unrolled without vectorizing, initialize
// ExtractForPhiUsedOutsideLoop with the value just prior to unrolled value of
// `Incoming`. This is analogous to the vectorized case above: extracting the
@@ -3722,69 +4163,31 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// vector recurrence we extracted in the middle block. Since the loop is in
// LCSSA form, we just need to find all the phi nodes for the original scalar
// recurrence in the exit block, and then add an edge for the middle block.
- for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
- if (LCSSAPhi.getIncomingValue(0) == Phi) {
+ // Note that LCSSA does not imply single entry when the original scalar loop
+ // had multiple exiting edges (as we always run the last iteration in the
+ // scalar epilogue); in that case, the exiting path through middle will be
+ // dynamically dead and the value picked for the phi doesn't matter.
+ for (PHINode &LCSSAPhi : LoopExitBlock->phis())
+ if (any_of(LCSSAPhi.incoming_values(),
+ [Phi](Value *V) { return V == Phi; }))
LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
- }
- }
}
void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
- Constant *Zero = Builder.getInt32(0);
-
// Get it's reduction variable descriptor.
assert(Legal->isReductionVariable(Phi) &&
"Unable to find the reduction variable");
RecurrenceDescriptor RdxDesc = Legal->getReductionVars()[Phi];
- RecurrenceDescriptor::RecurrenceKind RK = RdxDesc.getRecurrenceKind();
+ RecurKind RK = RdxDesc.getRecurrenceKind();
TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
- RdxDesc.getMinMaxRecurrenceKind();
setDebugLocFromInst(Builder, ReductionStartValue);
-
- // We need to generate a reduction vector from the incoming scalar.
- // To do so, we need to generate the 'identity' vector and override
- // one of the elements with the incoming scalar reduction. We need
- // to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+ bool IsInLoopReductionPhi = Cost->isInLoopReduction(Phi);
// This is the vector-clone of the value that leaves the loop.
Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();
- // Find the reduction identity variable. Zero for addition, or, xor,
- // one for multiplication, -1 for And.
- Value *Identity;
- Value *VectorStart;
- if (RK == RecurrenceDescriptor::RK_IntegerMinMax ||
- RK == RecurrenceDescriptor::RK_FloatMinMax) {
- // MinMax reduction have the start value as their identify.
- if (VF == 1) {
- VectorStart = Identity = ReductionStartValue;
- } else {
- VectorStart = Identity =
- Builder.CreateVectorSplat(VF, ReductionStartValue, "minmax.ident");
- }
- } else {
- // Handle other reduction kinds:
- Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
- RK, VecTy->getScalarType());
- if (VF == 1) {
- Identity = Iden;
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- VectorStart = ReductionStartValue;
- } else {
- Identity = ConstantVector::getSplat({VF, false}, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- VectorStart =
- Builder.CreateInsertElement(Identity, ReductionStartValue, Zero);
- }
- }
-
// Wrap flags are in general invalid after vectorization, clear them.
clearReductionWrapFlags(RdxDesc);
@@ -3798,10 +4201,6 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
for (unsigned Part = 0; Part < UF; ++Part) {
Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part);
Value *Val = getOrCreateVectorValue(LoopVal, Part);
- // Make sure to add the reduction start value only to the
- // first unroll part.
- Value *StartVal = (Part == 0) ? VectorStart : Identity;
- cast<PHINode>(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader);
cast<PHINode>(VecRdxPhi)
->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
}
@@ -3816,8 +4215,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// If tail is folded by masking, the vector value to leave the loop should be
// a Select choosing between the vectorized LoopExitInst and vectorized Phi,
- // instead of the former.
- if (Cost->foldTailByMasking()) {
+ // instead of the former. For an inloop reduction the reduction will already
+ // be predicated, and does not need to be handled here.
+ if (Cost->foldTailByMasking() && !IsInLoopReductionPhi) {
for (unsigned Part = 0; Part < UF; ++Part) {
Value *VecLoopExitInst =
VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
@@ -3831,14 +4231,31 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
}
assert(Sel && "Reduction exit feeds no select");
VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, Sel);
+
+ // If the target can create a predicated operator for the reduction at no
+ // extra cost in the loop (for example a predicated vadd), it can be
+ // cheaper for the select to remain in the loop than be sunk out of it,
+ // and so use the select value for the phi instead of the old
+ // LoopExitValue.
+ RecurrenceDescriptor RdxDesc = Legal->getReductionVars()[Phi];
+ if (PreferPredicatedReductionSelect ||
+ TTI->preferPredicatedReductionSelect(
+ RdxDesc.getOpcode(), Phi->getType(),
+ TargetTransformInfo::ReductionFlags())) {
+ auto *VecRdxPhi = cast<PHINode>(getOrCreateVectorValue(Phi, Part));
+ VecRdxPhi->setIncomingValueForBlock(
+ LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel);
+ }
}
}
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
- if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
- Type *RdxVecTy = FixedVectorType::get(RdxDesc.getRecurrenceType(), VF);
+ if (VF.isVector() && Phi->getType() != RdxDesc.getRecurrenceType()) {
+ assert(!IsInLoopReductionPhi && "Unexpected truncated inloop reduction!");
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
Builder.SetInsertPoint(
LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator());
VectorParts RdxParts(UF);
@@ -3865,7 +4282,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
- unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
+ unsigned Op = RecurrenceDescriptor::getOpcode(RK);
// The middle block terminator has already been assigned a DebugLoc here (the
// OrigLoop's single latch terminator). We want the whole middle block to
@@ -3884,14 +4301,14 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
ReducedPartRdx, "bin.rdx"),
RdxDesc.getFastMathFlags());
else
- ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
- RdxPart);
+ ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
- if (VF > 1) {
- bool NoNaN = Legal->hasFunNoNaNAttr();
+ // Create the reduction after the loop. Note that inloop reductions create the
+ // target reduction in the loop using a Reduction recipe.
+ if (VF.isVector() && !IsInLoopReductionPhi) {
ReducedPartRdx =
- createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, NoNaN);
+ createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx);
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
if (Phi->getType() != RdxDesc.getRecurrenceType())
@@ -3911,21 +4328,17 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
- // We know that the loop is in LCSSA form. We need to update the
- // PHI nodes in the exit blocks.
- for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
- // All PHINodes need to have a single entry edge, or two if
- // we already fixed them.
- assert(LCSSAPhi.getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
- // We found a reduction value exit-PHI. Update it with the
- // incoming bypass edge.
- if (LCSSAPhi.getIncomingValue(0) == LoopExitInst)
+ // We know that the loop is in LCSSA form. We need to update the PHI nodes
+ // in the exit blocks. See comment on analogous loop in
+ // fixFirstOrderRecurrence for a more complete explaination of the logic.
+ for (PHINode &LCSSAPhi : LoopExitBlock->phis())
+ if (any_of(LCSSAPhi.incoming_values(),
+ [LoopExitInst](Value *V) { return V == LoopExitInst; }))
LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock);
- } // end of the LCSSA phi scan.
- // Fix the scalar loop reduction variable with the incoming reduction sum
- // from the vector body and from the backedge value.
+ // Fix the scalar loop reduction variable with the incoming reduction sum
+ // from the vector body and from the backedge value.
int IncomingEdgeBlockIdx =
Phi->getBasicBlockIndex(OrigLoop->getLoopLatch());
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
@@ -3937,9 +4350,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
void InnerLoopVectorizer::clearReductionWrapFlags(
RecurrenceDescriptor &RdxDesc) {
- RecurrenceDescriptor::RecurrenceKind RK = RdxDesc.getRecurrenceKind();
- if (RK != RecurrenceDescriptor::RK_IntegerAdd &&
- RK != RecurrenceDescriptor::RK_IntegerMult)
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+ if (RK != RecurKind::Add && RK != RecurKind::Mul)
return;
Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr();
@@ -3968,22 +4380,27 @@ void InnerLoopVectorizer::clearReductionWrapFlags(
void InnerLoopVectorizer::fixLCSSAPHIs() {
for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
- if (LCSSAPhi.getNumIncomingValues() == 1) {
- auto *IncomingValue = LCSSAPhi.getIncomingValue(0);
- // Non-instruction incoming values will have only one value.
- unsigned LastLane = 0;
- if (isa<Instruction>(IncomingValue))
- LastLane = Cost->isUniformAfterVectorization(
- cast<Instruction>(IncomingValue), VF)
- ? 0
- : VF - 1;
- // Can be a loop invariant incoming value or the last scalar value to be
- // extracted from the vectorized loop.
- Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
- Value *lastIncomingValue =
- getOrCreateScalarValue(IncomingValue, { UF - 1, LastLane });
- LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock);
- }
+ if (LCSSAPhi.getBasicBlockIndex(LoopMiddleBlock) != -1)
+ // Some phis were already hand updated by the reduction and recurrence
+ // code above, leave them alone.
+ continue;
+
+ auto *IncomingValue = LCSSAPhi.getIncomingValue(0);
+ // Non-instruction incoming values will have only one value.
+ unsigned LastLane = 0;
+ if (isa<Instruction>(IncomingValue))
+ LastLane = Cost->isUniformAfterVectorization(
+ cast<Instruction>(IncomingValue), VF)
+ ? 0
+ : VF.getKnownMinValue() - 1;
+ assert((!VF.isScalable() || LastLane == 0) &&
+ "scalable vectors dont support non-uniform scalars yet");
+ // Can be a loop invariant incoming value or the last scalar value to be
+ // extracted from the vectorized loop.
+ Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
+ Value *lastIncomingValue =
+ getOrCreateScalarValue(IncomingValue, { UF - 1, LastLane });
+ LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock);
}
}
@@ -4087,9 +4504,9 @@ void InnerLoopVectorizer::fixNonInductionPHIs() {
}
}
-void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPUser &Operands,
- unsigned UF, unsigned VF,
- bool IsPtrLoopInvariant,
+void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPValue *VPDef,
+ VPUser &Operands, unsigned UF,
+ ElementCount VF, bool IsPtrLoopInvariant,
SmallBitVector &IsIndexLoopInvariant,
VPTransformState &State) {
// Construct a vector GEP by widening the operands of the scalar GEP as
@@ -4098,7 +4515,7 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPUser &Operands,
// is vector-typed. Thus, to keep the representation compact, we only use
// vector-typed operands for loop-varying values.
- if (VF > 1 && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
+ if (VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
// If we are vectorizing, but the GEP has only loop-invariant operands,
// the GEP we build (by only using vector-typed operands for
// loop-varying values) would be a scalar pointer. Thus, to ensure we
@@ -4114,7 +4531,7 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPUser &Operands,
auto *Clone = Builder.Insert(GEP->clone());
for (unsigned Part = 0; Part < UF; ++Part) {
Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
- VectorLoopValueMap.setVectorValue(GEP, Part, EntryPart);
+ State.set(VPDef, GEP, EntryPart, Part);
addMetadata(EntryPart, GEP);
}
} else {
@@ -4149,16 +4566,19 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPUser &Operands,
? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
Indices)
: Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
- assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
+ assert((VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
- VectorLoopValueMap.setVectorValue(GEP, Part, NewGEP);
+ State.set(VPDef, GEP, NewGEP, Part);
addMetadata(NewGEP, GEP);
}
}
}
-void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
- unsigned VF) {
+void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
+ RecurrenceDescriptor *RdxDesc,
+ Value *StartV, unsigned UF,
+ ElementCount VF) {
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
PHINode *P = cast<PHINode>(PN);
if (EnableVPlanNativePath) {
// Currently we enter here in the VPlan-native path for non-induction
@@ -4166,7 +4586,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
// Create a vector phi with no operands - the vector phi operands will be
// set at the end of vector code generation.
Type *VecTy =
- (VF == 1) ? PN->getType() : FixedVectorType::get(PN->getType(), VF);
+ (VF.isScalar()) ? PN->getType() : VectorType::get(PN->getType(), VF);
Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
VectorLoopValueMap.setVectorValue(P, 0, VecPhi);
OrigPHIsToFix.push_back(P);
@@ -4181,18 +4601,60 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
// this value when we vectorize all of the instructions that use the PHI.
- if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
+ if (RdxDesc || Legal->isFirstOrderRecurrence(P)) {
+ Value *Iden = nullptr;
+ bool ScalarPHI =
+ (VF.isScalar()) || Cost->isInLoopReduction(cast<PHINode>(PN));
+ Type *VecTy =
+ ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), VF);
+
+ if (RdxDesc) {
+ assert(Legal->isReductionVariable(P) && StartV &&
+ "RdxDesc should only be set for reduction variables; in that case "
+ "a StartV is also required");
+ RecurKind RK = RdxDesc->getRecurrenceKind();
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) {
+ // MinMax reduction have the start value as their identify.
+ if (ScalarPHI) {
+ Iden = StartV;
+ } else {
+ IRBuilderBase::InsertPointGuard IPBuilder(Builder);
+ Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+ StartV = Iden = Builder.CreateVectorSplat(VF, StartV, "minmax.ident");
+ }
+ } else {
+ Constant *IdenC = RecurrenceDescriptor::getRecurrenceIdentity(
+ RK, VecTy->getScalarType());
+ Iden = IdenC;
+
+ if (!ScalarPHI) {
+ Iden = ConstantVector::getSplat(VF, IdenC);
+ IRBuilderBase::InsertPointGuard IPBuilder(Builder);
+ Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+ Constant *Zero = Builder.getInt32(0);
+ StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
+ }
+ }
+ }
+
for (unsigned Part = 0; Part < UF; ++Part) {
// This is phase one of vectorizing PHIs.
- Type *VecTy =
- (VF == 1) ? PN->getType() : FixedVectorType::get(PN->getType(), VF);
Value *EntryPart = PHINode::Create(
VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
VectorLoopValueMap.setVectorValue(P, Part, EntryPart);
+ if (StartV) {
+ // Make sure to add the reduction start value only to the
+ // first unroll part.
+ Value *StartVal = (Part == 0) ? StartV : Iden;
+ cast<PHINode>(EntryPart)->addIncoming(StartVal, LoopVectorPreHeader);
+ }
}
return;
}
+ assert(!Legal->isReductionVariable(P) &&
+ "reductions should be handled above");
+
setDebugLocFromInst(Builder, P);
// This PHINode must be an induction variable.
@@ -4213,26 +4675,74 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
case InductionDescriptor::IK_PtrInduction: {
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
- // This is the normalized GEP that starts counting at zero.
- Value *PtrInd = Induction;
- PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType());
- // Determine the number of scalars we need to generate for each unroll
- // iteration. If the instruction is uniform, we only need to generate the
- // first lane. Otherwise, we generate all VF values.
- unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF;
- // These are the scalar results. Notice that we don't generate vector GEPs
- // because scalar GEPs result in better code.
- for (unsigned Part = 0; Part < UF; ++Part) {
- for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
- Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
- Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
- Value *SclrGep =
- emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
- SclrGep->setName("next.gep");
- VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep);
+
+ if (Cost->isScalarAfterVectorization(P, VF)) {
+ // This is the normalized GEP that starts counting at zero.
+ Value *PtrInd =
+ Builder.CreateSExtOrTrunc(Induction, II.getStep()->getType());
+ // Determine the number of scalars we need to generate for each unroll
+ // iteration. If the instruction is uniform, we only need to generate the
+ // first lane. Otherwise, we generate all VF values.
+ unsigned Lanes =
+ Cost->isUniformAfterVectorization(P, VF) ? 1 : VF.getKnownMinValue();
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
+ Constant *Idx = ConstantInt::get(PtrInd->getType(),
+ Lane + Part * VF.getKnownMinValue());
+ Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
+ Value *SclrGep =
+ emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
+ SclrGep->setName("next.gep");
+ VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep);
+ }
}
+ return;
+ }
+ assert(isa<SCEVConstant>(II.getStep()) &&
+ "Induction step not a SCEV constant!");
+ Type *PhiType = II.getStep()->getType();
+
+ // Build a pointer phi
+ Value *ScalarStartValue = II.getStartValue();
+ Type *ScStValueType = ScalarStartValue->getType();
+ PHINode *NewPointerPhi =
+ PHINode::Create(ScStValueType, 2, "pointer.phi", Induction);
+ NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader);
+
+ // A pointer induction, performed by using a gep
+ BasicBlock *LoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch();
+ Instruction *InductionLoc = LoopLatch->getTerminator();
+ const SCEV *ScalarStep = II.getStep();
+ SCEVExpander Exp(*PSE.getSE(), DL, "induction");
+ Value *ScalarStepValue =
+ Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc);
+ Value *InductionGEP = GetElementPtrInst::Create(
+ ScStValueType->getPointerElementType(), NewPointerPhi,
+ Builder.CreateMul(
+ ScalarStepValue,
+ ConstantInt::get(PhiType, VF.getKnownMinValue() * UF)),
+ "ptr.ind", InductionLoc);
+ NewPointerPhi->addIncoming(InductionGEP, LoopLatch);
+
+ // Create UF many actual address geps that use the pointer
+ // phi as base and a vectorized version of the step value
+ // (<step*0, ..., step*N>) as offset.
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<Constant *, 8> Indices;
+ // Create a vector of consecutive numbers from zero to VF.
+ for (unsigned i = 0; i < VF.getKnownMinValue(); ++i)
+ Indices.push_back(
+ ConstantInt::get(PhiType, i + Part * VF.getKnownMinValue()));
+ Constant *StartOffset = ConstantVector::get(Indices);
+
+ Value *GEP = Builder.CreateGEP(
+ ScStValueType->getPointerElementType(), NewPointerPhi,
+ Builder.CreateMul(
+ StartOffset,
+ Builder.CreateVectorSplat(VF.getKnownMinValue(), ScalarStepValue),
+ "vector.gep"));
+ VectorLoopValueMap.setVectorValue(P, Part, GEP);
}
- return;
}
}
}
@@ -4255,7 +4765,8 @@ static bool mayDivideByZero(Instruction &I) {
return !CInt || CInt->isZero();
}
-void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,
+void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def,
+ VPUser &User,
VPTransformState &State) {
switch (I.getOpcode()) {
case Instruction::Call:
@@ -4297,7 +4808,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,
VecOp->copyIRFlags(&I);
// Use this vector value for all users of the original instruction.
- VectorLoopValueMap.setVectorValue(&I, Part, V);
+ State.set(Def, &I, V, Part);
addMetadata(V, &I);
}
@@ -4321,7 +4832,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,
} else {
C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
}
- VectorLoopValueMap.setVectorValue(&I, Part, C);
+ State.set(Def, &I, C, Part);
addMetadata(C, &I);
}
@@ -4345,12 +4856,12 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,
/// Vectorize casts.
Type *DestTy =
- (VF == 1) ? CI->getType() : FixedVectorType::get(CI->getType(), VF);
+ (VF.isScalar()) ? CI->getType() : VectorType::get(CI->getType(), VF);
for (unsigned Part = 0; Part < UF; ++Part) {
Value *A = State.get(User.getOperand(0), Part);
Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
- VectorLoopValueMap.setVectorValue(&I, Part, Cast);
+ State.set(Def, &I, Cast, Part);
addMetadata(Cast, &I);
}
break;
@@ -4362,7 +4873,8 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,
} // end of switch.
}
-void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
+void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
+ VPUser &ArgOperands,
VPTransformState &State) {
assert(!isa<DbgInfoIntrinsic>(I) &&
"DbgInfoIntrinsic should have been dropped during VPlan construction");
@@ -4373,7 +4885,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
SmallVector<Type *, 4> Tys;
for (Value *ArgOperand : CI->arg_operands())
- Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
+ Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue()));
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
@@ -4381,11 +4893,13 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
// version of the instruction.
// Is it beneficial to perform intrinsic call compared to lib call?
bool NeedToScalarize = false;
- unsigned CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
- bool UseVectorIntrinsic =
- ID && Cost->getVectorIntrinsicCost(CI, VF) <= CallCost;
+ InstructionCost CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
+ InstructionCost IntrinsicCost = ID ? Cost->getVectorIntrinsicCost(CI, VF) : 0;
+ bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost;
assert((UseVectorIntrinsic || !NeedToScalarize) &&
"Instruction should be scalarized elsewhere.");
+ assert(IntrinsicCost.isValid() && CallCost.isValid() &&
+ "Cannot have invalid costs while widening");
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
@@ -4404,15 +4918,15 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
if (UseVectorIntrinsic) {
// Use vector version of the intrinsic.
Type *TysForDecl[] = {CI->getType()};
- if (VF > 1)
- TysForDecl[0] =
- FixedVectorType::get(CI->getType()->getScalarType(), VF);
+ if (VF.isVector()) {
+ assert(!VF.isScalable() && "VF is assumed to be non scalable.");
+ TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+ }
VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
assert(VectorF && "Can't retrieve vector intrinsic.");
} else {
// Use vector version of the function call.
- const VFShape Shape =
- VFShape::get(*CI, {VF, false} /*EC*/, false /*HasGlobalPred*/);
+ const VFShape Shape = VFShape::get(*CI, VF, false /*HasGlobalPred*/);
#ifndef NDEBUG
assert(VFDatabase(*CI).getVectorizedFunction(Shape) != nullptr &&
"Can't create vector function.");
@@ -4426,12 +4940,12 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
if (isa<FPMathOperator>(V))
V->copyFastMathFlags(CI);
- VectorLoopValueMap.setVectorValue(&I, Part, V);
+ State.set(Def, &I, V, Part);
addMetadata(V, &I);
}
}
-void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I,
+void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I, VPValue *VPDef,
VPUser &Operands,
bool InvariantCond,
VPTransformState &State) {
@@ -4450,16 +4964,16 @@ void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I,
Value *Op0 = State.get(Operands.getOperand(1), Part);
Value *Op1 = State.get(Operands.getOperand(2), Part);
Value *Sel = Builder.CreateSelect(Cond, Op0, Op1);
- VectorLoopValueMap.setVectorValue(&I, Part, Sel);
+ State.set(VPDef, &I, Sel, Part);
addMetadata(Sel, &I);
}
}
-void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
+void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
// this check. Collecting Scalars for VF=1 does not make any sense.
- assert(VF >= 2 && Scalars.find(VF) == Scalars.end() &&
+ assert(VF.isVector() && Scalars.find(VF) == Scalars.end() &&
"This function should not be visited twice for the same VF");
SmallSetVector<Instruction *, 8> Worklist;
@@ -4468,6 +4982,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
// accesses that will remain scalar.
SmallSetVector<Instruction *, 8> ScalarPtrs;
SmallPtrSet<Instruction *, 8> PossibleNonScalarPtrs;
+ auto *Latch = TheLoop->getLoopLatch();
// A helper that returns true if the use of Ptr by MemAccess will be scalar.
// The pointer operands of loads and stores will be scalar as long as the
@@ -4493,11 +5008,33 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
!TheLoop->isLoopInvariant(V);
};
- // A helper that evaluates a memory access's use of a pointer. If the use
- // will be a scalar use, and the pointer is only used by memory accesses, we
- // place the pointer in ScalarPtrs. Otherwise, the pointer is placed in
- // PossibleNonScalarPtrs.
+ auto isScalarPtrInduction = [&](Instruction *MemAccess, Value *Ptr) {
+ if (!isa<PHINode>(Ptr) ||
+ !Legal->getInductionVars().count(cast<PHINode>(Ptr)))
+ return false;
+ auto &Induction = Legal->getInductionVars()[cast<PHINode>(Ptr)];
+ if (Induction.getKind() != InductionDescriptor::IK_PtrInduction)
+ return false;
+ return isScalarUse(MemAccess, Ptr);
+ };
+
+ // A helper that evaluates a memory access's use of a pointer. If the
+ // pointer is actually the pointer induction of a loop, it is being
+ // inserted into Worklist. If the use will be a scalar use, and the
+ // pointer is only used by memory accesses, we place the pointer in
+ // ScalarPtrs. Otherwise, the pointer is placed in PossibleNonScalarPtrs.
auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) {
+ if (isScalarPtrInduction(MemAccess, Ptr)) {
+ Worklist.insert(cast<Instruction>(Ptr));
+ Instruction *Update = cast<Instruction>(
+ cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
+ Worklist.insert(Update);
+ LLVM_DEBUG(dbgs() << "LV: Found new scalar instruction: " << *Ptr
+ << "\n");
+ LLVM_DEBUG(dbgs() << "LV: Found new scalar instruction: " << *Update
+ << "\n");
+ return;
+ }
// We only care about bitcast and getelementptr instructions contained in
// the loop.
if (!isLoopVaryingBitCastOrGEP(Ptr))
@@ -4521,10 +5058,9 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
};
// We seed the scalars analysis with three classes of instructions: (1)
- // instructions marked uniform-after-vectorization, (2) bitcast and
- // getelementptr instructions used by memory accesses requiring a scalar use,
- // and (3) pointer induction variables and their update instructions (we
- // currently only scalarize these).
+ // instructions marked uniform-after-vectorization and (2) bitcast,
+ // getelementptr and (pointer) phi instructions used by memory accesses
+ // requiring a scalar use.
//
// (1) Add to the worklist all instructions that have been identified as
// uniform-after-vectorization.
@@ -4550,24 +5086,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
Worklist.insert(I);
}
- // (3) Add to the worklist all pointer induction variables and their update
- // instructions.
- //
- // TODO: Once we are able to vectorize pointer induction variables we should
- // no longer insert them into the worklist here.
- auto *Latch = TheLoop->getLoopLatch();
- for (auto &Induction : Legal->getInductionVars()) {
- auto *Ind = Induction.first;
- auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
- if (Induction.second.getKind() != InductionDescriptor::IK_PtrInduction)
- continue;
- Worklist.insert(Ind);
- Worklist.insert(IndUpdate);
- LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n");
- LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate
- << "\n");
- }
-
// Insert the forced scalars.
// FIXME: Currently widenPHIInstruction() often creates a dead vector
// induction variable when the PHI user is scalarized.
@@ -4603,14 +5121,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
auto *Ind = Induction.first;
auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
- // We already considered pointer induction variables, so there's no reason
- // to look at their users again.
- //
- // TODO: Once we are able to vectorize pointer induction variables we
- // should no longer skip over them here.
- if (Induction.second.getKind() == InductionDescriptor::IK_PtrInduction)
- continue;
-
// If tail-folding is applied, the primary induction variable will be used
// to feed a vector compare.
if (Ind == Legal->getPrimaryInduction() && foldTailByMasking())
@@ -4646,7 +5156,8 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
Scalars[VF].insert(Worklist.begin(), Worklist.end());
}
-bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigned VF) {
+bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I,
+ ElementCount VF) {
if (!blockNeedsPredication(I->getParent()))
return false;
switch(I->getOpcode()) {
@@ -4660,7 +5171,7 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne
auto *Ty = getMemInstValueType(I);
// We have already decided how to vectorize this instruction, get that
// result.
- if (VF > 1) {
+ if (VF.isVector()) {
InstWidening WideningDecision = getWideningDecision(I, VF);
assert(WideningDecision != CM_Unknown &&
"Widening decision should be ready at this moment");
@@ -4681,8 +5192,8 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne
return false;
}
-bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
- unsigned VF) {
+bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
+ Instruction *I, ElementCount VF) {
assert(isAccessInterleaved(I) && "Expecting interleaved access.");
assert(getWideningDecision(I, VF) == CM_Unknown &&
"Decision should not be set yet.");
@@ -4693,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = getMemInstValueType(I);
- if (hasIrregularType(ScalarTy, DL, VF))
+ if (hasIrregularType(ScalarTy, DL))
return false;
// Check if masking is required.
@@ -4718,8 +5229,8 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
: TTI.isLegalMaskedStore(Ty, Alignment);
}
-bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
- unsigned VF) {
+bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
+ Instruction *I, ElementCount VF) {
// Get and ensure we have a valid memory instruction.
LoadInst *LI = dyn_cast<LoadInst>(I);
StoreInst *SI = dyn_cast<StoreInst>(I);
@@ -4740,19 +5251,19 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
- if (hasIrregularType(ScalarTy, DL, VF))
+ if (hasIrregularType(ScalarTy, DL))
return false;
return true;
}
-void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
+void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// We should not collect Uniforms more than once per VF. Right now,
// this function is called from collectUniformsAndScalars(), which
// already does this check. Collecting Uniforms for VF=1 does not make any
// sense.
- assert(VF >= 2 && Uniforms.find(VF) == Uniforms.end() &&
+ assert(VF.isVector() && Uniforms.find(VF) == Uniforms.end() &&
"This function should not be visited twice for the same VF");
// Visit the list of Uniforms. If we'll not find any uniform value, we'll
@@ -4778,6 +5289,11 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
// replicating region where only a single instance out of VF should be formed.
// TODO: optimize such seldom cases if found important, see PR40816.
auto addToWorklistIfAllowed = [&](Instruction *I) -> void {
+ if (isOutOfScope(I)) {
+ LLVM_DEBUG(dbgs() << "LV: Found not uniform due to scope: "
+ << *I << "\n");
+ return;
+ }
if (isScalarWithPredication(I, VF)) {
LLVM_DEBUG(dbgs() << "LV: Found not uniform being ScalarWithPredication: "
<< *I << "\n");
@@ -4794,65 +5310,71 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
addToWorklistIfAllowed(Cmp);
- // Holds consecutive and consecutive-like pointers. Consecutive-like pointers
- // are pointers that are treated like consecutive pointers during
- // vectorization. The pointer operands of interleaved accesses are an
- // example.
- SmallSetVector<Instruction *, 8> ConsecutiveLikePtrs;
-
- // Holds pointer operands of instructions that are possibly non-uniform.
- SmallPtrSet<Instruction *, 8> PossibleNonUniformPtrs;
-
- auto isUniformDecision = [&](Instruction *I, unsigned VF) {
+ auto isUniformDecision = [&](Instruction *I, ElementCount VF) {
InstWidening WideningDecision = getWideningDecision(I, VF);
assert(WideningDecision != CM_Unknown &&
"Widening decision should be ready at this moment");
+ // A uniform memory op is itself uniform. We exclude uniform stores
+ // here as they demand the last lane, not the first one.
+ if (isa<LoadInst>(I) && Legal->isUniformMemOp(*I)) {
+ assert(WideningDecision == CM_Scalarize);
+ return true;
+ }
+
return (WideningDecision == CM_Widen ||
WideningDecision == CM_Widen_Reverse ||
WideningDecision == CM_Interleave);
};
- // Iterate over the instructions in the loop, and collect all
- // consecutive-like pointer operands in ConsecutiveLikePtrs. If it's possible
- // that a consecutive-like pointer operand will be scalarized, we collect it
- // in PossibleNonUniformPtrs instead. We use two sets here because a single
- // getelementptr instruction can be used by both vectorized and scalarized
- // memory instructions. For example, if a loop loads and stores from the same
- // location, but the store is conditional, the store will be scalarized, and
- // the getelementptr won't remain uniform.
+
+
+ // Returns true if Ptr is the pointer operand of a memory access instruction
+ // I, and I is known to not require scalarization.
+ auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool {
+ return getLoadStorePointerOperand(I) == Ptr && isUniformDecision(I, VF);
+ };
+
+ // Holds a list of values which are known to have at least one uniform use.
+ // Note that there may be other uses which aren't uniform. A "uniform use"
+ // here is something which only demands lane 0 of the unrolled iterations;
+ // it does not imply that all lanes produce the same value (e.g. this is not
+ // the usual meaning of uniform)
+ SmallPtrSet<Value *, 8> HasUniformUse;
+
+ // Scan the loop for instructions which are either a) known to have only
+ // lane 0 demanded or b) are uses which demand only lane 0 of their operand.
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
// If there's no pointer operand, there's nothing to do.
- auto *Ptr = dyn_cast_or_null<Instruction>(getLoadStorePointerOperand(&I));
+ auto *Ptr = getLoadStorePointerOperand(&I);
if (!Ptr)
continue;
- // True if all users of Ptr are memory accesses that have Ptr as their
- // pointer operand.
- auto UsersAreMemAccesses =
- llvm::all_of(Ptr->users(), [&](User *U) -> bool {
- return getLoadStorePointerOperand(U) == Ptr;
- });
-
- // Ensure the memory instruction will not be scalarized or used by
- // gather/scatter, making its pointer operand non-uniform. If the pointer
- // operand is used by any instruction other than a memory access, we
- // conservatively assume the pointer operand may be non-uniform.
- if (!UsersAreMemAccesses || !isUniformDecision(&I, VF))
- PossibleNonUniformPtrs.insert(Ptr);
-
- // If the memory instruction will be vectorized and its pointer operand
- // is consecutive-like, or interleaving - the pointer operand should
- // remain uniform.
- else
- ConsecutiveLikePtrs.insert(Ptr);
+ // A uniform memory op is itself uniform. We exclude uniform stores
+ // here as they demand the last lane, not the first one.
+ if (isa<LoadInst>(I) && Legal->isUniformMemOp(I))
+ addToWorklistIfAllowed(&I);
+
+ if (isUniformDecision(&I, VF)) {
+ assert(isVectorizedMemAccessUse(&I, Ptr) && "consistency check");
+ HasUniformUse.insert(Ptr);
+ }
}
- // Add to the Worklist all consecutive and consecutive-like pointers that
- // aren't also identified as possibly non-uniform.
- for (auto *V : ConsecutiveLikePtrs)
- if (!PossibleNonUniformPtrs.count(V))
- addToWorklistIfAllowed(V);
+ // Add to the worklist any operands which have *only* uniform (e.g. lane 0
+ // demanding) users. Since loops are assumed to be in LCSSA form, this
+ // disallows uses outside the loop as well.
+ for (auto *V : HasUniformUse) {
+ if (isOutOfScope(V))
+ continue;
+ auto *I = cast<Instruction>(V);
+ auto UsersAreMemAccesses =
+ llvm::all_of(I->users(), [&](User *U) -> bool {
+ return isVectorizedMemAccessUse(cast<Instruction>(U), V);
+ });
+ if (UsersAreMemAccesses)
+ addToWorklistIfAllowed(I);
+ }
// Expand Worklist in topological order: whenever a new instruction
// is added , its users should be already inside Worklist. It ensures
@@ -4875,20 +5397,12 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
auto *OI = cast<Instruction>(OV);
if (llvm::all_of(OI->users(), [&](User *U) -> bool {
auto *J = cast<Instruction>(U);
- return Worklist.count(J) ||
- (OI == getLoadStorePointerOperand(J) &&
- isUniformDecision(J, VF));
+ return Worklist.count(J) || isVectorizedMemAccessUse(J, OI);
}))
addToWorklistIfAllowed(OI);
}
}
- // Returns true if Ptr is the pointer operand of a memory access instruction
- // I, and I is known to not require scalarization.
- auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool {
- return getLoadStorePointerOperand(I) == Ptr && isUniformDecision(I, VF);
- };
-
// For an instruction to be added into Worklist above, all its users inside
// the loop should also be in Worklist. However, this condition cannot be
// true for phi nodes that form a cyclic dependence. We must process phi
@@ -4961,8 +5475,8 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
return false;
}
-Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(unsigned UserVF,
- unsigned UserIC) {
+Optional<ElementCount>
+LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
// TODO: It may by useful to do since it's still likely to be dynamically
// uniform if the target can skip.
@@ -4984,7 +5498,9 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(unsigned UserVF,
switch (ScalarEpilogueStatus) {
case CM_ScalarEpilogueAllowed:
- return UserVF ? UserVF : computeFeasibleMaxVF(TC);
+ return computeFeasibleMaxVF(TC, UserVF);
+ case CM_ScalarEpilogueNotAllowedUsePredicate:
+ LLVM_FALLTHROUGH;
case CM_ScalarEpilogueNotNeededUsePredicate:
LLVM_DEBUG(
dbgs() << "LV: vector predicate hint/switch found.\n"
@@ -5005,9 +5521,26 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(unsigned UserVF,
// for size.
if (runtimeChecksRequired())
return None;
+
break;
}
+ // The only loops we can vectorize without a scalar epilogue, are loops with
+ // a bottom-test and a single exiting block. We'd have to handle the fact
+ // that not every instruction executes on the last iteration. This will
+ // require a lane mask which varies through the vector loop body. (TODO)
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+ // If there was a tail-folding hint/switch, but we can't fold the tail by
+ // masking, fallback to a vectorization with a scalar epilogue.
+ if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
+ "scalar epilogue instead.\n");
+ ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
+ return computeFeasibleMaxVF(TC, UserVF);
+ }
+ return None;
+ }
+
// Now try the tail folding
// Invalidate interleave groups that require an epilogue if we can't mask
@@ -5020,10 +5553,22 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(unsigned UserVF,
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
}
- unsigned MaxVF = UserVF ? UserVF : computeFeasibleMaxVF(TC);
- assert((UserVF || isPowerOf2_32(MaxVF)) && "MaxVF must be a power of 2");
- unsigned MaxVFtimesIC = UserIC ? MaxVF * UserIC : MaxVF;
- if (TC > 0 && TC % MaxVFtimesIC == 0) {
+ ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
+ assert(!MaxVF.isScalable() &&
+ "Scalable vectors do not yet support tail folding");
+ assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&
+ "MaxVF must be a power of 2");
+ unsigned MaxVFtimesIC =
+ UserIC ? MaxVF.getFixedValue() * UserIC : MaxVF.getFixedValue();
+ // Avoid tail folding if the trip count is known to be a multiple of any VF we
+ // chose.
+ ScalarEvolution *SE = PSE.getSE();
+ const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
+ const SCEV *ExitCount = SE->getAddExpr(
+ BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
+ const SCEV *Rem = SE->getURemExpr(
+ ExitCount, SE->getConstant(BackedgeTakenCount->getType(), MaxVFtimesIC));
+ if (Rem->isZero()) {
// Accept MaxVF if we do not have a tail.
LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
return MaxVF;
@@ -5038,6 +5583,20 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(unsigned UserVF,
return MaxVF;
}
+ // If there was a tail-folding hint/switch, but we can't fold the tail by
+ // masking, fallback to a vectorization with a scalar epilogue.
+ if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
+ "scalar epilogue instead.\n");
+ ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
+ return MaxVF;
+ }
+
+ if (ScalarEpilogueStatus == CM_ScalarEpilogueNotAllowedUsePredicate) {
+ LLVM_DEBUG(dbgs() << "LV: Can't fold tail by masking: don't vectorize\n");
+ return None;
+ }
+
if (TC == 0) {
reportVectorizationFailure(
"Unable to calculate the loop count due to complex control flow",
@@ -5055,8 +5614,33 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(unsigned UserVF,
return None;
}
-unsigned
-LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
+ElementCount
+LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
+ ElementCount UserVF) {
+ bool IgnoreScalableUserVF = UserVF.isScalable() &&
+ !TTI.supportsScalableVectors() &&
+ !ForceTargetSupportsScalableVectors;
+ if (IgnoreScalableUserVF) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Ignoring VF=" << UserVF
+ << " because target does not support scalable vectors.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreScalableUserVF",
+ TheLoop->getStartLoc(),
+ TheLoop->getHeader())
+ << "Ignoring VF=" << ore::NV("UserVF", UserVF)
+ << " because target does not support scalable vectors.";
+ });
+ }
+
+ // Beyond this point two scenarios are handled. If UserVF isn't specified
+ // then a suitable VF is chosen. If UserVF is specified and there are
+ // dependencies, check if it's legal. However, if a UserVF is specified and
+ // there are no dependencies, then there's nothing to do.
+ if (UserVF.isNonZero() && !IgnoreScalableUserVF &&
+ Legal->isSafeForAnyVectorWidth())
+ return UserVF;
+
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
unsigned SmallestType, WidestType;
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
@@ -5066,9 +5650,62 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
// It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
// the memory accesses that is most restrictive (involved in the smallest
// dependence distance).
- unsigned MaxSafeRegisterWidth = Legal->getMaxSafeRegisterWidth();
+ unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits();
+
+ // If the user vectorization factor is legally unsafe, clamp it to a safe
+ // value. Otherwise, return as is.
+ if (UserVF.isNonZero() && !IgnoreScalableUserVF) {
+ unsigned MaxSafeElements =
+ PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType);
+ ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements);
+
+ if (UserVF.isScalable()) {
+ Optional<unsigned> MaxVScale = TTI.getMaxVScale();
+
+ // Scale VF by vscale before checking if it's safe.
+ MaxSafeVF = ElementCount::getScalable(
+ MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
+
+ if (MaxSafeVF.isZero()) {
+ // The dependence distance is too small to use scalable vectors,
+ // fallback on fixed.
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Max legal vector width too small, scalable vectorization "
+ "unfeasible. Using fixed-width vectorization instead.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible",
+ TheLoop->getStartLoc(),
+ TheLoop->getHeader())
+ << "Max legal vector width too small, scalable vectorization "
+ << "unfeasible. Using fixed-width vectorization instead.";
+ });
+ return computeFeasibleMaxVF(
+ ConstTripCount, ElementCount::getFixed(UserVF.getKnownMinValue()));
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n");
+
+ if (ElementCount::isKnownLE(UserVF, MaxSafeVF))
+ return UserVF;
- WidestRegister = std::min(WidestRegister, MaxSafeRegisterWidth);
+ LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
+ << " is unsafe, clamping to max safe VF=" << MaxSafeVF
+ << ".\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
+ TheLoop->getStartLoc(),
+ TheLoop->getHeader())
+ << "User-specified vectorization factor "
+ << ore::NV("UserVectorizationFactor", UserVF)
+ << " is unsafe, clamping to maximum safe vectorization factor "
+ << ore::NV("VectorizationFactor", MaxSafeVF);
+ });
+ return MaxSafeVF;
+ }
+
+ WidestRegister = std::min(WidestRegister, MaxSafeVectorWidthInBits);
// Ensure MaxVF is a power of 2; the dependence distance bound may not be.
// Note that both WidestRegister and WidestType may not be a powers of 2.
@@ -5079,12 +5716,13 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: "
<< WidestRegister << " bits.\n");
- assert(MaxVectorSize <= 256 && "Did not expect to pack so many elements"
- " into one vector!");
+ assert(MaxVectorSize <= WidestRegister &&
+ "Did not expect to pack so many elements"
+ " into one vector!");
if (MaxVectorSize == 0) {
LLVM_DEBUG(dbgs() << "LV: The target has no vector registers.\n");
MaxVectorSize = 1;
- return MaxVectorSize;
+ return ElementCount::getFixed(MaxVectorSize);
} else if (ConstTripCount && ConstTripCount < MaxVectorSize &&
isPowerOf2_32(ConstTripCount)) {
// We need to clamp the VF to be the ConstTripCount. There is no point in
@@ -5092,7 +5730,7 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: "
<< ConstTripCount << "\n");
MaxVectorSize = ConstTripCount;
- return MaxVectorSize;
+ return ElementCount::getFixed(MaxVectorSize);
}
unsigned MaxVF = MaxVectorSize;
@@ -5100,10 +5738,10 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
(MaximizeBandwidth && isScalarEpilogueAllowed())) {
// Collect all viable vectorization factors larger than the default MaxVF
// (i.e. MaxVectorSize).
- SmallVector<unsigned, 8> VFs;
+ SmallVector<ElementCount, 8> VFs;
unsigned NewMaxVectorSize = WidestRegister / SmallestType;
for (unsigned VS = MaxVectorSize * 2; VS <= NewMaxVectorSize; VS *= 2)
- VFs.push_back(VS);
+ VFs.push_back(ElementCount::getFixed(VS));
// For each VF calculate its register usage.
auto RUs = calculateRegisterUsage(VFs);
@@ -5118,7 +5756,7 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
Selected = false;
}
if (Selected) {
- MaxVF = VFs[i];
+ MaxVF = VFs[i].getKnownMinValue();
break;
}
}
@@ -5130,30 +5768,39 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
}
}
}
- return MaxVF;
+ return ElementCount::getFixed(MaxVF);
}
VectorizationFactor
-LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) {
- float Cost = expectedCost(1).first;
- const float ScalarCost = Cost;
+LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) {
+ // FIXME: This can be fixed for scalable vectors later, because at this stage
+ // the LoopVectorizer will only consider vectorizing a loop with scalable
+ // vectors when the loop has a hint to enable vectorization for a given VF.
+ assert(!MaxVF.isScalable() && "scalable vectors not yet supported");
+
+ InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first;
+ LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
+ assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop");
+
unsigned Width = 1;
- LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n");
+ const float ScalarCost = *ExpectedCost.getValue();
+ float Cost = ScalarCost;
bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
- if (ForceVectorization && MaxVF > 1) {
+ if (ForceVectorization && MaxVF.isVector()) {
// Ignore scalar width, because the user explicitly wants vectorization.
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
// evaluation.
Cost = std::numeric_limits<float>::max();
}
- for (unsigned i = 2; i <= MaxVF; i *= 2) {
+ for (unsigned i = 2; i <= MaxVF.getFixedValue(); i *= 2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
// the vector elements.
- VectorizationCostTy C = expectedCost(i);
- float VectorCost = C.first / (float)i;
+ VectorizationCostTy C = expectedCost(ElementCount::getFixed(i));
+ assert(C.first.isValid() && "Unexpected invalid cost for vector loop");
+ float VectorCost = *C.first.getValue() / (float)i;
LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i
<< " costs: " << (int)VectorCost << ".\n");
if (!C.second && !ForceVectorization) {
@@ -5162,6 +5809,13 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) {
<< " because it will not generate any vector instructions.\n");
continue;
}
+
+ // If profitable add it to ProfitableVF list.
+ if (VectorCost < ScalarCost) {
+ ProfitableVFs.push_back(VectorizationFactor(
+ {ElementCount::getFixed(i), (unsigned)VectorCost}));
+ }
+
if (VectorCost < Cost) {
Cost = VectorCost;
Width = i;
@@ -5180,10 +5834,131 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) {
<< "LV: Vectorization seems to be not beneficial, "
<< "but was forced by a user.\n");
LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n");
- VectorizationFactor Factor = {Width, (unsigned)(Width * Cost)};
+ VectorizationFactor Factor = {ElementCount::getFixed(Width),
+ (unsigned)(Width * Cost)};
return Factor;
}
+bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
+ const Loop &L, ElementCount VF) const {
+ // Cross iteration phis such as reductions need special handling and are
+ // currently unsupported.
+ if (any_of(L.getHeader()->phis(), [&](PHINode &Phi) {
+ return Legal->isFirstOrderRecurrence(&Phi) ||
+ Legal->isReductionVariable(&Phi);
+ }))
+ return false;
+
+ // Phis with uses outside of the loop require special handling and are
+ // currently unsupported.
+ for (auto &Entry : Legal->getInductionVars()) {
+ // Look for uses of the value of the induction at the last iteration.
+ Value *PostInc = Entry.first->getIncomingValueForBlock(L.getLoopLatch());
+ for (User *U : PostInc->users())
+ if (!L.contains(cast<Instruction>(U)))
+ return false;
+ // Look for uses of penultimate value of the induction.
+ for (User *U : Entry.first->users())
+ if (!L.contains(cast<Instruction>(U)))
+ return false;
+ }
+
+ // Induction variables that are widened require special handling that is
+ // currently not supported.
+ if (any_of(Legal->getInductionVars(), [&](auto &Entry) {
+ return !(this->isScalarAfterVectorization(Entry.first, VF) ||
+ this->isProfitableToScalarize(Entry.first, VF));
+ }))
+ return false;
+
+ return true;
+}
+
+bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
+ const ElementCount VF) const {
+ // FIXME: We need a much better cost-model to take different parameters such
+ // as register pressure, code size increase and cost of extra branches into
+ // account. For now we apply a very crude heuristic and only consider loops
+ // with vectorization factors larger than a certain value.
+ // We also consider epilogue vectorization unprofitable for targets that don't
+ // consider interleaving beneficial (eg. MVE).
+ if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1)
+ return false;
+ if (VF.getFixedValue() >= EpilogueVectorizationMinVF)
+ return true;
+ return false;
+}
+
+VectorizationFactor
+LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
+ const ElementCount MainLoopVF, const LoopVectorizationPlanner &LVP) {
+ VectorizationFactor Result = VectorizationFactor::Disabled();
+ if (!EnableEpilogueVectorization) {
+ LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is disabled.\n";);
+ return Result;
+ }
+
+ if (!isScalarEpilogueAllowed()) {
+ LLVM_DEBUG(
+ dbgs() << "LEV: Unable to vectorize epilogue because no epilogue is "
+ "allowed.\n";);
+ return Result;
+ }
+
+ // FIXME: This can be fixed for scalable vectors later, because at this stage
+ // the LoopVectorizer will only consider vectorizing a loop with scalable
+ // vectors when the loop has a hint to enable vectorization for a given VF.
+ if (MainLoopVF.isScalable()) {
+ LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization for scalable vectors not "
+ "yet supported.\n");
+ return Result;
+ }
+
+ // Not really a cost consideration, but check for unsupported cases here to
+ // simplify the logic.
+ if (!isCandidateForEpilogueVectorization(*TheLoop, MainLoopVF)) {
+ LLVM_DEBUG(
+ dbgs() << "LEV: Unable to vectorize epilogue because the loop is "
+ "not a supported candidate.\n";);
+ return Result;
+ }
+
+ if (EpilogueVectorizationForceVF > 1) {
+ LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";);
+ if (LVP.hasPlanWithVFs(
+ {MainLoopVF, ElementCount::getFixed(EpilogueVectorizationForceVF)}))
+ return {ElementCount::getFixed(EpilogueVectorizationForceVF), 0};
+ else {
+ LLVM_DEBUG(
+ dbgs()
+ << "LEV: Epilogue vectorization forced factor is not viable.\n";);
+ return Result;
+ }
+ }
+
+ if (TheLoop->getHeader()->getParent()->hasOptSize() ||
+ TheLoop->getHeader()->getParent()->hasMinSize()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LEV: Epilogue vectorization skipped due to opt for size.\n";);
+ return Result;
+ }
+
+ if (!isEpilogueVectorizationProfitable(MainLoopVF))
+ return Result;
+
+ for (auto &NextVF : ProfitableVFs)
+ if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
+ (Result.Width.getFixedValue() == 1 || NextVF.Cost < Result.Cost) &&
+ LVP.hasPlanWithVFs({MainLoopVF, NextVF.Width}))
+ Result = NextVF;
+
+ if (Result != VectorizationFactor::Disabled())
+ LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = "
+ << Result.Width.getFixedValue() << "\n";);
+ return Result;
+}
+
std::pair<unsigned, unsigned>
LoopVectorizationCostModel::getSmallestAndWidestTypes() {
unsigned MinWidth = -1U;
@@ -5210,6 +5985,11 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
if (!Legal->isReductionVariable(PN))
continue;
RecurrenceDescriptor RdxDesc = Legal->getReductionVars()[PN];
+ if (PreferInLoopReductions ||
+ TTI.preferInLoopReduction(RdxDesc.getOpcode(),
+ RdxDesc.getRecurrenceType(),
+ TargetTransformInfo::ReductionFlags()))
+ continue;
T = RdxDesc.getRecurrenceType();
}
@@ -5240,7 +6020,7 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
return {MinWidth, MaxWidth};
}
-unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
+unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
unsigned LoopCost) {
// -- The interleave heuristics --
// We interleave the loop in order to expose ILP and reduce the loop overhead.
@@ -5263,10 +6043,15 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
if (Legal->getMaxSafeDepDistBytes() != -1U)
return 1;
- // Do not interleave loops with a relatively small known or estimated trip
- // count.
auto BestKnownTC = getSmallBestKnownTC(*PSE.getSE(), TheLoop);
- if (BestKnownTC && *BestKnownTC < TinyTripCountInterleaveThreshold)
+ const bool HasReductions = !Legal->getReductionVars().empty();
+ // Do not interleave loops with a relatively small known or estimated trip
+ // count. But we will interleave when InterleaveSmallLoopScalarReduction is
+ // enabled, and the code has scalar reductions(HasReductions && VF = 1),
+ // because with the above conditions interleaving can expose ILP and break
+ // cross iteration dependences for reductions.
+ if (BestKnownTC && (*BestKnownTC < TinyTripCountInterleaveThreshold) &&
+ !(InterleaveSmallLoopScalarReduction && HasReductions && VF.isScalar()))
return 1;
RegisterUsage R = calculateRegisterUsage({VF})[0];
@@ -5294,7 +6079,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
<< " registers of "
<< TTI.getRegisterClassName(pair.first) << " register class\n");
- if (VF == 1) {
+ if (VF.isScalar()) {
if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
TargetNumRegisters = ForceTargetNumScalarRegs;
} else {
@@ -5318,10 +6103,11 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
}
// Clamp the interleave ranges to reasonable counts.
- unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
+ unsigned MaxInterleaveCount =
+ TTI.getMaxInterleaveFactor(VF.getKnownMinValue());
// Check if the user has overridden the max.
- if (VF == 1) {
+ if (VF.isScalar()) {
if (ForceTargetMaxScalarInterleaveFactor.getNumOccurrences() > 0)
MaxInterleaveCount = ForceTargetMaxScalarInterleaveFactor;
} else {
@@ -5330,28 +6116,47 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
}
// If trip count is known or estimated compile time constant, limit the
- // interleave count to be less than the trip count divided by VF.
+ // interleave count to be less than the trip count divided by VF, provided it
+ // is at least 1.
+ //
+ // For scalable vectors we can't know if interleaving is beneficial. It may
+ // not be beneficial for small loops if none of the lanes in the second vector
+ // iterations is enabled. However, for larger loops, there is likely to be a
+ // similar benefit as for fixed-width vectors. For now, we choose to leave
+ // the InterleaveCount as if vscale is '1', although if some information about
+ // the vector is known (e.g. min vector size), we can make a better decision.
if (BestKnownTC) {
- MaxInterleaveCount = std::min(*BestKnownTC / VF, MaxInterleaveCount);
+ MaxInterleaveCount =
+ std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount);
+ // Make sure MaxInterleaveCount is greater than 0.
+ MaxInterleaveCount = std::max(1u, MaxInterleaveCount);
}
- // If we did not calculate the cost for VF (because the user selected the VF)
- // then we calculate the cost of VF here.
- if (LoopCost == 0)
- LoopCost = expectedCost(VF).first;
-
- assert(LoopCost && "Non-zero loop cost expected");
+ assert(MaxInterleaveCount > 0 &&
+ "Maximum interleave count must be greater than 0");
// Clamp the calculated IC to be between the 1 and the max interleave count
// that the target and trip count allows.
if (IC > MaxInterleaveCount)
IC = MaxInterleaveCount;
- else if (IC < 1)
- IC = 1;
+ else
+ // Make sure IC is greater than 0.
+ IC = std::max(1u, IC);
+
+ assert(IC > 0 && "Interleave count must be greater than 0.");
+
+ // If we did not calculate the cost for VF (because the user selected the VF)
+ // then we calculate the cost of VF here.
+ if (LoopCost == 0) {
+ assert(expectedCost(VF).first.isValid() && "Expected a valid cost");
+ LoopCost = *expectedCost(VF).first.getValue();
+ }
+
+ assert(LoopCost && "Non-zero loop cost expected");
// Interleave if we vectorized this loop and there is a reduction that could
// benefit from interleaving.
- if (VF > 1 && !Legal->getReductionVars().empty()) {
+ if (VF.isVector() && HasReductions) {
LLVM_DEBUG(dbgs() << "LV: Interleaving because of reductions.\n");
return IC;
}
@@ -5359,11 +6164,15 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
// Note that if we've already vectorized the loop we will have done the
// runtime check and so interleaving won't require further checks.
bool InterleavingRequiresRuntimePointerCheck =
- (VF == 1 && Legal->getRuntimePointerChecking()->Need);
+ (VF.isScalar() && Legal->getRuntimePointerChecking()->Need);
// We want to interleave small loops in order to reduce the loop overhead and
// potentially expose ILP opportunities.
- LLVM_DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
+ LLVM_DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n'
+ << "LV: IC is " << IC << '\n'
+ << "LV: VF is " << VF << '\n');
+ const bool AggressivelyInterleaveReductions =
+ TTI.enableAggressiveInterleaving(HasReductions);
if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) {
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and interleave until the cost of the
@@ -5382,7 +6191,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
// by this point), we can increase the critical path length if the loop
// we're interleaving is inside another loop. Limit, by default to 2, so the
// critical path only gets increased by one reduction operation.
- if (!Legal->getReductionVars().empty() && TheLoop->getLoopDepth() > 1) {
+ if (HasReductions && TheLoop->getLoopDepth() > 1) {
unsigned F = static_cast<unsigned>(MaxNestedScalarReductionIC);
SmallIC = std::min(SmallIC, F);
StoresIC = std::min(StoresIC, F);
@@ -5396,14 +6205,23 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
return std::max(StoresIC, LoadsIC);
}
- LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
- return SmallIC;
+ // If there are scalar reductions and TTI has enabled aggressive
+ // interleaving for reductions, we will interleave to expose ILP.
+ if (InterleaveSmallLoopScalarReduction && VF.isScalar() &&
+ AggressivelyInterleaveReductions) {
+ LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
+ // Interleave no less than SmallIC but not as aggressive as the normal IC
+ // to satisfy the rare situation when resources are too limited.
+ return std::max(IC / 2, SmallIC);
+ } else {
+ LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
+ return SmallIC;
+ }
}
// Interleave if this is a large loop (small loops are already dealt with by
// this point) that could benefit from interleaving.
- bool HasReductions = !Legal->getReductionVars().empty();
- if (TTI.enableAggressiveInterleaving(HasReductions)) {
+ if (AggressivelyInterleaveReductions) {
LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
return IC;
}
@@ -5413,7 +6231,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
}
SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
-LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
+LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
// This function calculates the register usage by measuring the highest number
// of values that are alive at a single location. Obviously, this is a very
// rough estimation. We scan the loop in a topological order in order and
@@ -5485,26 +6303,17 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
TransposeEnds[Interval.second].push_back(Interval.first);
SmallPtrSet<Instruction *, 8> OpenIntervals;
-
- // Get the size of the widest register.
- unsigned MaxSafeDepDist = -1U;
- if (Legal->getMaxSafeDepDistBytes() != -1U)
- MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
- unsigned WidestRegister =
- std::min(TTI.getRegisterBitWidth(true), MaxSafeDepDist);
- const DataLayout &DL = TheFunction->getParent()->getDataLayout();
-
SmallVector<RegisterUsage, 8> RUs(VFs.size());
SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
// A lambda that gets the register usage for the given type and VF.
- auto GetRegUsage = [&DL, WidestRegister](Type *Ty, unsigned VF) {
- if (Ty->isTokenTy())
+ const auto &TTICapture = TTI;
+ auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) {
+ if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty))
return 0U;
- unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType());
- return std::max<unsigned>(1, VF * TypeSize / WidestRegister);
+ return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
};
for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {
@@ -5528,7 +6337,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
// Count the number of live intervals.
SmallMapVector<unsigned, unsigned, 4> RegUsage;
- if (VFs[j] == 1) {
+ if (VFs[j].isScalar()) {
for (auto Inst : OpenIntervals) {
unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
if (RegUsage.find(ClassID) == RegUsage.end())
@@ -5557,7 +6366,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
}
}
}
-
+
for (auto& pair : RegUsage) {
if (MaxUsages[j].find(pair.first) != MaxUsages[j].end())
MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second);
@@ -5575,10 +6384,12 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
SmallMapVector<unsigned, unsigned, 4> Invariant;
-
+
for (auto Inst : LoopInvariants) {
- unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
- unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType());
+ unsigned Usage =
+ VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
+ unsigned ClassID =
+ TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType());
if (Invariant.find(ClassID) == Invariant.end())
Invariant[ClassID] = Usage;
else
@@ -5626,12 +6437,13 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){
NumPredStores > NumberOfStoresToPredicate);
}
-void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
+void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
// If we aren't vectorizing the loop, or if we've already collected the
// instructions to scalarize, there's nothing to do. Collection may already
// have occurred if we have a user-selected VF and are now computing the
// expected cost for interleaving.
- if (VF < 2 || InstsToScalarize.find(VF) != InstsToScalarize.end())
+ if (VF.isScalar() || VF.isZero() ||
+ InstsToScalarize.find(VF) != InstsToScalarize.end())
return;
// Initialize a mapping for VF in InstsToScalalarize. If we find that it's
@@ -5660,14 +6472,13 @@ void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
}
int LoopVectorizationCostModel::computePredInstDiscount(
- Instruction *PredInst, DenseMap<Instruction *, unsigned> &ScalarCosts,
- unsigned VF) {
+ Instruction *PredInst, ScalarCostsTy &ScalarCosts, ElementCount VF) {
assert(!isUniformAfterVectorization(PredInst, VF) &&
"Instruction marked uniform-after-vectorization will be predicated");
// Initialize the discount to zero, meaning that the scalar version and the
// vector version cost the same.
- int Discount = 0;
+ InstructionCost Discount = 0;
// Holds instructions to analyze. The instructions we visit are mapped in
// ScalarCosts. Those instructions are the ones that would be scalarized if
@@ -5722,22 +6533,27 @@ int LoopVectorizationCostModel::computePredInstDiscount(
// Compute the cost of the vector instruction. Note that this cost already
// includes the scalarization overhead of the predicated instruction.
- unsigned VectorCost = getInstructionCost(I, VF).first;
+ InstructionCost VectorCost = getInstructionCost(I, VF).first;
// Compute the cost of the scalarized instruction. This cost is the cost of
// the instruction as if it wasn't if-converted and instead remained in the
// predicated block. We will scale this cost by block probability after
// computing the scalarization overhead.
- unsigned ScalarCost = VF * getInstructionCost(I, 1).first;
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ InstructionCost ScalarCost =
+ VF.getKnownMinValue() *
+ getInstructionCost(I, ElementCount::getFixed(1)).first;
// Compute the scalarization overhead of needed insertelement instructions
// and phi nodes.
if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
ScalarCost += TTI.getScalarizationOverhead(
cast<VectorType>(ToVectorTy(I->getType(), VF)),
- APInt::getAllOnesValue(VF), true, false);
- ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI,
- TTI::TCK_RecipThroughput);
+ APInt::getAllOnesValue(VF.getKnownMinValue()), true, false);
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ ScalarCost +=
+ VF.getKnownMinValue() *
+ TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
}
// Compute the scalarization overhead of needed extractelement
@@ -5750,10 +6566,12 @@ int LoopVectorizationCostModel::computePredInstDiscount(
"Instruction has non-scalar type");
if (canBeScalarized(J))
Worklist.push_back(J);
- else if (needsExtract(J, VF))
+ else if (needsExtract(J, VF)) {
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
ScalarCost += TTI.getScalarizationOverhead(
cast<VectorType>(ToVectorTy(J->getType(), VF)),
- APInt::getAllOnesValue(VF), false, true);
+ APInt::getAllOnesValue(VF.getKnownMinValue()), false, true);
+ }
}
// Scale the total scalar cost by block probability.
@@ -5765,11 +6583,11 @@ int LoopVectorizationCostModel::computePredInstDiscount(
ScalarCosts[I] = ScalarCost;
}
- return Discount;
+ return *Discount.getValue();
}
LoopVectorizationCostModel::VectorizationCostTy
-LoopVectorizationCostModel::expectedCost(unsigned VF) {
+LoopVectorizationCostModel::expectedCost(ElementCount VF) {
VectorizationCostTy Cost;
// For each block.
@@ -5779,14 +6597,15 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
// For each instruction in the old loop.
for (Instruction &I : BB->instructionsWithoutDebug()) {
// Skip ignored values.
- if (ValuesToIgnore.count(&I) || (VF > 1 && VecValuesToIgnore.count(&I)))
+ if (ValuesToIgnore.count(&I) ||
+ (VF.isVector() && VecValuesToIgnore.count(&I)))
continue;
VectorizationCostTy C = getInstructionCost(&I, VF);
// Check if we should override the cost.
if (ForceTargetInstructionCost.getNumOccurrences() > 0)
- C.first = ForceTargetInstructionCost;
+ C.first = InstructionCost(ForceTargetInstructionCost);
BlockCost.first += C.first;
BlockCost.second |= C.second;
@@ -5799,9 +6618,10 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
// if-converted. This means that the block's instructions (aside from
// stores and instructions that may divide by zero) will now be
// unconditionally executed. For the scalar case, we may not always execute
- // the predicated block. Thus, scale the block's cost by the probability of
- // executing it.
- if (VF == 1 && blockNeedsPredication(BB))
+ // the predicated block, if it is an if-else block. Thus, scale the block's
+ // cost by the probability of executing it. blockNeedsPredication from
+ // Legal is used so as to not include all blocks in tail folded loops.
+ if (VF.isScalar() && Legal->blockNeedsPredication(BB))
BlockCost.first /= getReciprocalPredBlockProb();
Cost.first += BlockCost.first;
@@ -5846,9 +6666,12 @@ static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
Legal->hasStride(I->getOperand(1));
}
-unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
- unsigned VF) {
- assert(VF > 1 && "Scalarization cost of instruction implies vectorization.");
+InstructionCost
+LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
+ ElementCount VF) {
+ assert(VF.isVector() &&
+ "Scalarization cost of instruction implies vectorization.");
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
Type *ValTy = getMemInstValueType(I);
auto SE = PSE.getSE();
@@ -5861,14 +6684,15 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop);
// Get the cost of the scalar memory instruction and address computation.
- unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
+ InstructionCost Cost =
+ VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
// Don't pass *I here, since it is scalar but will actually be part of a
// vectorized loop where the user of it is a vectorized instruction.
const Align Alignment = getLoadStoreAlignment(I);
- Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
- Alignment, AS,
- TTI::TCK_RecipThroughput);
+ Cost += VF.getKnownMinValue() *
+ TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
+ AS, TTI::TCK_RecipThroughput);
// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
@@ -5889,8 +6713,9 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
return Cost;
}
-unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
- unsigned VF) {
+InstructionCost
+LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
+ ElementCount VF) {
Type *ValTy = getMemInstValueType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
Value *Ptr = getLoadStorePointerOperand(I);
@@ -5901,7 +6726,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
"Stride should be 1 or -1 for consecutive memory access");
const Align Alignment = getLoadStoreAlignment(I);
- unsigned Cost = 0;
+ InstructionCost Cost = 0;
if (Legal->isMaskRequired(I))
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
CostKind);
@@ -5915,8 +6740,11 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
return Cost;
}
-unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
- unsigned VF) {
+InstructionCost
+LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
+ ElementCount VF) {
+ assert(Legal->isUniformMemOp(*I));
+
Type *ValTy = getMemInstValueType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
const Align Alignment = getLoadStoreAlignment(I);
@@ -5937,11 +6765,12 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
(isLoopInvariantStoreValue
? 0
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
- VF - 1));
+ VF.getKnownMinValue() - 1));
}
-unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
- unsigned VF) {
+InstructionCost
+LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
+ ElementCount VF) {
Type *ValTy = getMemInstValueType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
const Align Alignment = getLoadStoreAlignment(I);
@@ -5953,8 +6782,9 @@ unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
TargetTransformInfo::TCK_RecipThroughput, I);
}
-unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
- unsigned VF) {
+InstructionCost
+LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
+ ElementCount VF) {
Type *ValTy = getMemInstValueType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
unsigned AS = getLoadStoreAddressSpace(I);
@@ -5963,7 +6793,8 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
assert(Group && "Fail to get an interleaved access group.");
unsigned InterleaveFactor = Group->getFactor();
- auto *WideVecTy = FixedVectorType::get(ValTy, VF * InterleaveFactor);
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
// Holds the indices of existing members in an interleaved load group.
// An interleaved store group doesn't need this as it doesn't allow gaps.
@@ -5977,7 +6808,7 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
// Calculate the cost of the whole interleaved group.
bool UseMaskForGaps =
Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
- unsigned Cost = TTI.getInterleavedMemoryOpCost(
+ InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
I->getOpcode(), WideVecTy, Group->getFactor(), Indices, Group->getAlign(),
AS, TTI::TCK_RecipThroughput, Legal->isMaskRequired(I), UseMaskForGaps);
@@ -5991,11 +6822,122 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
return Cost;
}
-unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
- unsigned VF) {
+InstructionCost LoopVectorizationCostModel::getReductionPatternCost(
+ Instruction *I, ElementCount VF, Type *Ty, TTI::TargetCostKind CostKind) {
+ // Early exit for no inloop reductions
+ if (InLoopReductionChains.empty() || VF.isScalar() || !isa<VectorType>(Ty))
+ return InstructionCost::getInvalid();
+ auto *VectorTy = cast<VectorType>(Ty);
+
+ // We are looking for a pattern of, and finding the minimal acceptable cost:
+ // reduce(mul(ext(A), ext(B))) or
+ // reduce(mul(A, B)) or
+ // reduce(ext(A)) or
+ // reduce(A).
+ // The basic idea is that we walk down the tree to do that, finding the root
+ // reduction instruction in InLoopReductionImmediateChains. From there we find
+ // the pattern of mul/ext and test the cost of the entire pattern vs the cost
+ // of the components. If the reduction cost is lower then we return it for the
+ // reduction instruction and 0 for the other instructions in the pattern. If
+ // it is not we return an invalid cost specifying the orignal cost method
+ // should be used.
+ Instruction *RetI = I;
+ if ((RetI->getOpcode() == Instruction::SExt ||
+ RetI->getOpcode() == Instruction::ZExt)) {
+ if (!RetI->hasOneUser())
+ return InstructionCost::getInvalid();
+ RetI = RetI->user_back();
+ }
+ if (RetI->getOpcode() == Instruction::Mul &&
+ RetI->user_back()->getOpcode() == Instruction::Add) {
+ if (!RetI->hasOneUser())
+ return InstructionCost::getInvalid();
+ RetI = RetI->user_back();
+ }
+
+ // Test if the found instruction is a reduction, and if not return an invalid
+ // cost specifying the parent to use the original cost modelling.
+ if (!InLoopReductionImmediateChains.count(RetI))
+ return InstructionCost::getInvalid();
+
+ // Find the reduction this chain is a part of and calculate the basic cost of
+ // the reduction on its own.
+ Instruction *LastChain = InLoopReductionImmediateChains[RetI];
+ Instruction *ReductionPhi = LastChain;
+ while (!isa<PHINode>(ReductionPhi))
+ ReductionPhi = InLoopReductionImmediateChains[ReductionPhi];
+
+ RecurrenceDescriptor RdxDesc =
+ Legal->getReductionVars()[cast<PHINode>(ReductionPhi)];
+ unsigned BaseCost = TTI.getArithmeticReductionCost(RdxDesc.getOpcode(),
+ VectorTy, false, CostKind);
+
+ // Get the operand that was not the reduction chain and match it to one of the
+ // patterns, returning the better cost if it is found.
+ Instruction *RedOp = RetI->getOperand(1) == LastChain
+ ? dyn_cast<Instruction>(RetI->getOperand(0))
+ : dyn_cast<Instruction>(RetI->getOperand(1));
+
+ VectorTy = VectorType::get(I->getOperand(0)->getType(), VectorTy);
+
+ if (RedOp && (isa<SExtInst>(RedOp) || isa<ZExtInst>(RedOp)) &&
+ !TheLoop->isLoopInvariant(RedOp)) {
+ bool IsUnsigned = isa<ZExtInst>(RedOp);
+ auto *ExtType = VectorType::get(RedOp->getOperand(0)->getType(), VectorTy);
+ InstructionCost RedCost = TTI.getExtendedAddReductionCost(
+ /*IsMLA=*/false, IsUnsigned, RdxDesc.getRecurrenceType(), ExtType,
+ CostKind);
+
+ unsigned ExtCost =
+ TTI.getCastInstrCost(RedOp->getOpcode(), VectorTy, ExtType,
+ TTI::CastContextHint::None, CostKind, RedOp);
+ if (RedCost.isValid() && RedCost < BaseCost + ExtCost)
+ return I == RetI ? *RedCost.getValue() : 0;
+ } else if (RedOp && RedOp->getOpcode() == Instruction::Mul) {
+ Instruction *Mul = RedOp;
+ Instruction *Op0 = dyn_cast<Instruction>(Mul->getOperand(0));
+ Instruction *Op1 = dyn_cast<Instruction>(Mul->getOperand(1));
+ if (Op0 && Op1 && (isa<SExtInst>(Op0) || isa<ZExtInst>(Op0)) &&
+ Op0->getOpcode() == Op1->getOpcode() &&
+ Op0->getOperand(0)->getType() == Op1->getOperand(0)->getType() &&
+ !TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1)) {
+ bool IsUnsigned = isa<ZExtInst>(Op0);
+ auto *ExtType = VectorType::get(Op0->getOperand(0)->getType(), VectorTy);
+ // reduce(mul(ext, ext))
+ unsigned ExtCost =
+ TTI.getCastInstrCost(Op0->getOpcode(), VectorTy, ExtType,
+ TTI::CastContextHint::None, CostKind, Op0);
+ unsigned MulCost =
+ TTI.getArithmeticInstrCost(Mul->getOpcode(), VectorTy, CostKind);
+
+ InstructionCost RedCost = TTI.getExtendedAddReductionCost(
+ /*IsMLA=*/true, IsUnsigned, RdxDesc.getRecurrenceType(), ExtType,
+ CostKind);
+
+ if (RedCost.isValid() && RedCost < ExtCost * 2 + MulCost + BaseCost)
+ return I == RetI ? *RedCost.getValue() : 0;
+ } else {
+ unsigned MulCost =
+ TTI.getArithmeticInstrCost(Mul->getOpcode(), VectorTy, CostKind);
+
+ InstructionCost RedCost = TTI.getExtendedAddReductionCost(
+ /*IsMLA=*/true, true, RdxDesc.getRecurrenceType(), VectorTy,
+ CostKind);
+
+ if (RedCost.isValid() && RedCost < MulCost + BaseCost)
+ return I == RetI ? *RedCost.getValue() : 0;
+ }
+ }
+
+ return I == RetI ? BaseCost : InstructionCost::getInvalid();
+}
+
+InstructionCost
+LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
+ ElementCount VF) {
// Calculate scalar cost only. Vectorization cost should be ready at this
// moment.
- if (VF == 1) {
+ if (VF.isScalar()) {
Type *ValTy = getMemInstValueType(I);
const Align Alignment = getLoadStoreAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);
@@ -6008,43 +6950,52 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
}
LoopVectorizationCostModel::VectorizationCostTy
-LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
+LoopVectorizationCostModel::getInstructionCost(Instruction *I,
+ ElementCount VF) {
// If we know that this instruction will remain uniform, check the cost of
// the scalar version.
if (isUniformAfterVectorization(I, VF))
- VF = 1;
+ VF = ElementCount::getFixed(1);
- if (VF > 1 && isProfitableToScalarize(I, VF))
+ if (VF.isVector() && isProfitableToScalarize(I, VF))
return VectorizationCostTy(InstsToScalarize[VF][I], false);
// Forced scalars do not have any scalarization overhead.
auto ForcedScalar = ForcedScalars.find(VF);
- if (VF > 1 && ForcedScalar != ForcedScalars.end()) {
+ if (VF.isVector() && ForcedScalar != ForcedScalars.end()) {
auto InstSet = ForcedScalar->second;
if (InstSet.count(I))
- return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false);
+ return VectorizationCostTy(
+ (getInstructionCost(I, ElementCount::getFixed(1)).first *
+ VF.getKnownMinValue()),
+ false);
}
Type *VectorTy;
- unsigned C = getInstructionCost(I, VF, VectorTy);
+ InstructionCost C = getInstructionCost(I, VF, VectorTy);
bool TypeNotScalarized =
- VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF;
+ VF.isVector() && VectorTy->isVectorTy() &&
+ TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue();
return VectorizationCostTy(C, TypeNotScalarized);
}
-unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
- unsigned VF) {
+InstructionCost
+LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
+ ElementCount VF) {
- if (VF == 1)
+ assert(!VF.isScalable() &&
+ "cannot compute scalarization overhead for scalable vectorization");
+ if (VF.isScalar())
return 0;
- unsigned Cost = 0;
+ InstructionCost Cost = 0;
Type *RetTy = ToVectorTy(I->getType(), VF);
if (!RetTy->isVoidTy() &&
(!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
Cost += TTI.getScalarizationOverhead(
- cast<VectorType>(RetTy), APInt::getAllOnesValue(VF), true, false);
+ cast<VectorType>(RetTy), APInt::getAllOnesValue(VF.getKnownMinValue()),
+ true, false);
// Some targets keep addresses scalar.
if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
@@ -6061,11 +7012,11 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
// Skip operands that do not require extraction/scalarization and do not incur
// any overhead.
return Cost + TTI.getOperandsScalarizationOverhead(
- filterExtractingOperands(Ops, VF), VF);
+ filterExtractingOperands(Ops, VF), VF.getKnownMinValue());
}
-void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
- if (VF == 1)
+void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
+ if (VF.isScalar())
return;
NumPredStores = 0;
for (BasicBlock *BB : TheLoop->blocks()) {
@@ -6082,23 +7033,19 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
if (isa<StoreInst>(&I) && isScalarWithPredication(&I))
NumPredStores++;
- if (Legal->isUniform(Ptr) &&
- // Conditional loads and stores should be scalarized and predicated.
- // isScalarWithPredication cannot be used here since masked
- // gather/scatters are not considered scalar with predication.
- !Legal->blockNeedsPredication(I.getParent())) {
+ if (Legal->isUniformMemOp(I)) {
// TODO: Avoid replicating loads and stores instead of
// relying on instcombine to remove them.
// Load: Scalar load + broadcast
// Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract
- unsigned Cost = getUniformMemOpCost(&I, VF);
+ InstructionCost Cost = getUniformMemOpCost(&I, VF);
setWideningDecision(&I, VF, CM_Scalarize, Cost);
continue;
}
// We assume that widening is the best solution when possible.
if (memoryInstructionCanBeWidened(&I, VF)) {
- unsigned Cost = getConsecutiveMemOpCost(&I, VF);
+ InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);
int ConsecutiveStride =
Legal->isConsecutivePtr(getLoadStorePointerOperand(&I));
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
@@ -6110,7 +7057,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
}
// Choose between Interleaving, Gather/Scatter or Scalarization.
- unsigned InterleaveCost = std::numeric_limits<unsigned>::max();
+ InstructionCost InterleaveCost = std::numeric_limits<int>::max();
unsigned NumAccesses = 1;
if (isAccessInterleaved(&I)) {
auto Group = getInterleavedAccessGroup(&I);
@@ -6125,17 +7072,17 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
InterleaveCost = getInterleaveGroupCost(&I, VF);
}
- unsigned GatherScatterCost =
+ InstructionCost GatherScatterCost =
isLegalGatherOrScatter(&I)
? getGatherScatterCost(&I, VF) * NumAccesses
- : std::numeric_limits<unsigned>::max();
+ : std::numeric_limits<int>::max();
- unsigned ScalarizationCost =
+ InstructionCost ScalarizationCost =
getMemInstScalarizationCost(&I, VF) * NumAccesses;
// Choose better solution for the current VF,
// write down this decision and use it during vectorization.
- unsigned Cost;
+ InstructionCost Cost;
InstWidening Decision;
if (InterleaveCost <= GatherScatterCost &&
InterleaveCost < ScalarizationCost) {
@@ -6179,8 +7126,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
// Add all instructions used to generate the addresses.
SmallVector<Instruction *, 4> Worklist;
- for (auto *I : AddrDefs)
- Worklist.push_back(I);
+ append_range(Worklist, AddrDefs);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
for (auto &Op : I->operands())
@@ -6199,14 +7145,18 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
InstWidening Decision = getWideningDecision(I, VF);
if (Decision == CM_Widen || Decision == CM_Widen_Reverse)
// Scalarize a widened load of address.
- setWideningDecision(I, VF, CM_Scalarize,
- (VF * getMemoryInstructionCost(I, 1)));
+ setWideningDecision(
+ I, VF, CM_Scalarize,
+ (VF.getKnownMinValue() *
+ getMemoryInstructionCost(I, ElementCount::getFixed(1))));
else if (auto Group = getInterleavedAccessGroup(I)) {
// Scalarize an interleave group of address loads.
for (unsigned I = 0; I < Group->getFactor(); ++I) {
if (Instruction *Member = Group->getMember(I))
- setWideningDecision(Member, VF, CM_Scalarize,
- (VF * getMemoryInstructionCost(Member, 1)));
+ setWideningDecision(
+ Member, VF, CM_Scalarize,
+ (VF.getKnownMinValue() *
+ getMemoryInstructionCost(Member, ElementCount::getFixed(1))));
}
}
} else
@@ -6216,9 +7166,9 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
}
}
-unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
- unsigned VF,
- Type *&VectorTy) {
+InstructionCost
+LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
+ Type *&VectorTy) {
Type *RetTy = I->getType();
if (canTruncateToMinimalBitwidth(I, VF))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
@@ -6240,19 +7190,22 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// blocks requires also an extract of its vector compare i1 element.
bool ScalarPredicatedBB = false;
BranchInst *BI = cast<BranchInst>(I);
- if (VF > 1 && BI->isConditional() &&
+ if (VF.isVector() && BI->isConditional() &&
(PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) ||
PredicatedBBsAfterVectorization.count(BI->getSuccessor(1))))
ScalarPredicatedBB = true;
if (ScalarPredicatedBB) {
// Return cost for branches around scalarized and predicated blocks.
+ assert(!VF.isScalable() && "scalable vectors not yet supported.");
auto *Vec_i1Ty =
- FixedVectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
- return (TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnesValue(VF),
- false, true) +
- (TTI.getCFInstrCost(Instruction::Br, CostKind) * VF));
- } else if (I->getParent() == TheLoop->getLoopLatch() || VF == 1)
+ VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
+ return (TTI.getScalarizationOverhead(
+ Vec_i1Ty, APInt::getAllOnesValue(VF.getKnownMinValue()),
+ false, true) +
+ (TTI.getCFInstrCost(Instruction::Br, CostKind) *
+ VF.getKnownMinValue()));
+ } else if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar())
// The back-edge branch will remain, as will all scalar branches.
return TTI.getCFInstrCost(Instruction::Br, CostKind);
else
@@ -6267,20 +7220,20 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// First-order recurrences are replaced by vector shuffles inside the loop.
// NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type.
- if (VF > 1 && Legal->isFirstOrderRecurrence(Phi))
- return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- cast<VectorType>(VectorTy), VF - 1,
- FixedVectorType::get(RetTy, 1));
+ if (VF.isVector() && Legal->isFirstOrderRecurrence(Phi))
+ return TTI.getShuffleCost(
+ TargetTransformInfo::SK_ExtractSubvector, cast<VectorType>(VectorTy),
+ VF.getKnownMinValue() - 1, FixedVectorType::get(RetTy, 1));
// Phi nodes in non-header blocks (not inductions, reductions, etc.) are
// converted into select instructions. We require N - 1 selects per phi
// node, where N is the number of incoming values.
- if (VF > 1 && Phi->getParent() != TheLoop->getHeader())
+ if (VF.isVector() && Phi->getParent() != TheLoop->getHeader())
return (Phi->getNumIncomingValues() - 1) *
TTI.getCmpSelInstrCost(
Instruction::Select, ToVectorTy(Phi->getType(), VF),
ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF),
- CostKind);
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
return TTI.getCFInstrCost(Instruction::PHI, CostKind);
}
@@ -6292,17 +7245,19 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// vector lane. Get the scalarization cost and scale this amount by the
// probability of executing the predicated block. If the instruction is not
// predicated, we fall through to the next case.
- if (VF > 1 && isScalarWithPredication(I)) {
- unsigned Cost = 0;
+ if (VF.isVector() && isScalarWithPredication(I)) {
+ InstructionCost Cost = 0;
// These instructions have a non-void type, so account for the phi nodes
// that we will create. This cost is likely to be zero. The phi node
// cost, if any, should be scaled by the block probability because it
// models a copy at the end of each predicated block.
- Cost += VF * TTI.getCFInstrCost(Instruction::PHI, CostKind);
+ Cost += VF.getKnownMinValue() *
+ TTI.getCFInstrCost(Instruction::PHI, CostKind);
// The cost of the non-predicated instruction.
- Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind);
+ Cost += VF.getKnownMinValue() *
+ TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind);
// The cost of insertelement and extractelement instructions needed for
// scalarization.
@@ -6331,6 +7286,13 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// Since we will replace the stride by 1 the multiplication should go away.
if (I->getOpcode() == Instruction::Mul && isStrideMul(I, Legal))
return 0;
+
+ // Detect reduction patterns
+ InstructionCost RedCost;
+ if ((RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind))
+ .isValid())
+ return RedCost;
+
// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
Value *Op2 = I->getOperand(1);
@@ -6341,14 +7303,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Op2VK = TargetTransformInfo::OK_UniformValue;
SmallVector<const Value *, 4> Operands(I->operand_values());
- unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
return N * TTI.getArithmeticInstrCost(
I->getOpcode(), VectorTy, CostKind,
TargetTransformInfo::OK_AnyValue,
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
}
case Instruction::FNeg: {
- unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+ assert(!VF.isScalable() && "VF is assumed to be non scalable.");
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
return N * TTI.getArithmeticInstrCost(
I->getOpcode(), VectorTy, CostKind,
TargetTransformInfo::OK_AnyValue,
@@ -6362,10 +7325,9 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
if (!ScalarCond)
- CondTy = FixedVectorType::get(CondTy, VF);
-
+ CondTy = VectorType::get(CondTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy,
- CostKind, I);
+ CmpInst::BAD_ICMP_PREDICATE, CostKind, I);
}
case Instruction::ICmp:
case Instruction::FCmp: {
@@ -6374,18 +7336,18 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
VectorTy = ToVectorTy(ValTy, VF);
- return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, CostKind,
- I);
+ return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind, I);
}
case Instruction::Store:
case Instruction::Load: {
- unsigned Width = VF;
- if (Width > 1) {
+ ElementCount Width = VF;
+ if (Width.isVector()) {
InstWidening Decision = getWideningDecision(I, Width);
assert(Decision != CM_Unknown &&
"CM decision should be taken at this point");
if (Decision == CM_Scalarize)
- Width = 1;
+ Width = ElementCount::getFixed(1);
}
VectorTy = ToVectorTy(getMemInstValueType(I), Width);
return getMemoryInstructionCost(I, VF);
@@ -6402,15 +7364,62 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
+ // Computes the CastContextHint from a Load/Store instruction.
+ auto ComputeCCH = [&](Instruction *I) -> TTI::CastContextHint {
+ assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ "Expected a load or a store!");
+
+ if (VF.isScalar() || !TheLoop->contains(I))
+ return TTI::CastContextHint::Normal;
+
+ switch (getWideningDecision(I, VF)) {
+ case LoopVectorizationCostModel::CM_GatherScatter:
+ return TTI::CastContextHint::GatherScatter;
+ case LoopVectorizationCostModel::CM_Interleave:
+ return TTI::CastContextHint::Interleave;
+ case LoopVectorizationCostModel::CM_Scalarize:
+ case LoopVectorizationCostModel::CM_Widen:
+ return Legal->isMaskRequired(I) ? TTI::CastContextHint::Masked
+ : TTI::CastContextHint::Normal;
+ case LoopVectorizationCostModel::CM_Widen_Reverse:
+ return TTI::CastContextHint::Reversed;
+ case LoopVectorizationCostModel::CM_Unknown:
+ llvm_unreachable("Instr did not go through cost modelling?");
+ }
+
+ llvm_unreachable("Unhandled case!");
+ };
+
+ unsigned Opcode = I->getOpcode();
+ TTI::CastContextHint CCH = TTI::CastContextHint::None;
+ // For Trunc, the context is the only user, which must be a StoreInst.
+ if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {
+ if (I->hasOneUse())
+ if (StoreInst *Store = dyn_cast<StoreInst>(*I->user_begin()))
+ CCH = ComputeCCH(Store);
+ }
+ // For Z/Sext, the context is the operand, which must be a LoadInst.
+ else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
+ Opcode == Instruction::FPExt) {
+ if (LoadInst *Load = dyn_cast<LoadInst>(I->getOperand(0)))
+ CCH = ComputeCCH(Load);
+ }
+
// We optimize the truncation of induction variables having constant
// integer steps. The cost of these truncations is the same as the scalar
// operation.
if (isOptimizableIVTruncate(I, VF)) {
auto *Trunc = cast<TruncInst>(I);
return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(),
- Trunc->getSrcTy(), CostKind, Trunc);
+ Trunc->getSrcTy(), CCH, CostKind, Trunc);
}
+ // Detect reduction patterns
+ InstructionCost RedCost;
+ if ((RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind))
+ .isValid())
+ return RedCost;
+
Type *SrcScalarTy = I->getOperand(0)->getType();
Type *SrcVecTy =
VectorTy->isVectorTy() ? ToVectorTy(SrcScalarTy, VF) : SrcScalarTy;
@@ -6421,35 +7430,39 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
//
// Calculate the modified src and dest types.
Type *MinVecTy = VectorTy;
- if (I->getOpcode() == Instruction::Trunc) {
+ if (Opcode == Instruction::Trunc) {
SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy);
VectorTy =
largestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
- } else if (I->getOpcode() == Instruction::ZExt ||
- I->getOpcode() == Instruction::SExt) {
+ } else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy);
VectorTy =
smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
}
}
- unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
- return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy,
- CostKind, I);
+ assert(!VF.isScalable() && "VF is assumed to be non scalable");
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
+ return N *
+ TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
}
case Instruction::Call: {
bool NeedToScalarize;
CallInst *CI = cast<CallInst>(I);
- unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
- if (getVectorIntrinsicIDForCall(CI, TLI))
- return std::min(CallCost, getVectorIntrinsicCost(CI, VF));
+ InstructionCost CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
+ if (getVectorIntrinsicIDForCall(CI, TLI)) {
+ InstructionCost IntrinsicCost = getVectorIntrinsicCost(CI, VF);
+ return std::min(CallCost, IntrinsicCost);
+ }
return CallCost;
}
+ case Instruction::ExtractValue:
+ return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput);
default:
// The cost of executing VF copies of the scalar instruction. This opcode
// is unknown. Assume that it is the same as 'mul'.
- return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
- CostKind) +
+ return VF.getKnownMinValue() * TTI.getArithmeticInstrCost(
+ Instruction::Mul, VectorTy, CostKind) +
getScalarizationOverhead(I, VF);
} // end of switch.
}
@@ -6502,7 +7515,7 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
// detection.
for (auto &Reduction : Legal->getReductionVars()) {
RecurrenceDescriptor &RedDes = Reduction.second;
- SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
+ const SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
VecValuesToIgnore.insert(Casts.begin(), Casts.end());
}
// Ignore type-casting instructions we identified during induction
@@ -6514,6 +7527,43 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
}
}
+void LoopVectorizationCostModel::collectInLoopReductions() {
+ for (auto &Reduction : Legal->getReductionVars()) {
+ PHINode *Phi = Reduction.first;
+ RecurrenceDescriptor &RdxDesc = Reduction.second;
+
+ // We don't collect reductions that are type promoted (yet).
+ if (RdxDesc.getRecurrenceType() != Phi->getType())
+ continue;
+
+ // If the target would prefer this reduction to happen "in-loop", then we
+ // want to record it as such.
+ unsigned Opcode = RdxDesc.getOpcode();
+ if (!PreferInLoopReductions &&
+ !TTI.preferInLoopReduction(Opcode, Phi->getType(),
+ TargetTransformInfo::ReductionFlags()))
+ continue;
+
+ // Check that we can correctly put the reductions into the loop, by
+ // finding the chain of operations that leads from the phi to the loop
+ // exit value.
+ SmallVector<Instruction *, 4> ReductionOperations =
+ RdxDesc.getReductionOpChain(Phi, TheLoop);
+ bool InLoop = !ReductionOperations.empty();
+ if (InLoop) {
+ InLoopReductionChains[Phi] = ReductionOperations;
+ // Add the elements to InLoopReductionImmediateChains for cost modelling.
+ Instruction *LastChain = Phi;
+ for (auto *I : ReductionOperations) {
+ InLoopReductionImmediateChains[I] = LastChain;
+ LastChain = I;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "LV: Using " << (InLoop ? "inloop" : "out of loop")
+ << " reduction for phi: " << *Phi << "\n");
+ }
+}
+
// TODO: we could return a pair of values that specify the max VF and
// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
@@ -6527,37 +7577,40 @@ static unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
}
VectorizationFactor
-LoopVectorizationPlanner::planInVPlanNativePath(unsigned UserVF) {
- unsigned VF = UserVF;
+LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
+ assert(!UserVF.isScalable() && "scalable vectors not yet supported");
+ ElementCount VF = UserVF;
// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
// the vectorization pipeline.
- if (!OrigLoop->empty()) {
+ if (!OrigLoop->isInnermost()) {
// If the user doesn't provide a vectorization factor, determine a
// reasonable one.
- if (!UserVF) {
- VF = determineVPlanVF(TTI->getRegisterBitWidth(true /* Vector*/), CM);
+ if (UserVF.isZero()) {
+ VF = ElementCount::getFixed(
+ determineVPlanVF(TTI->getRegisterBitWidth(true /* Vector*/), CM));
LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
// Make sure we have a VF > 1 for stress testing.
- if (VPlanBuildStressTest && VF < 2) {
+ if (VPlanBuildStressTest && (VF.isScalar() || VF.isZero())) {
LLVM_DEBUG(dbgs() << "LV: VPlan stress testing: "
<< "overriding computed VF.\n");
- VF = 4;
+ VF = ElementCount::getFixed(4);
}
}
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
- assert(isPowerOf2_32(VF) && "VF needs to be a power of two");
- LLVM_DEBUG(dbgs() << "LV: Using " << (UserVF ? "user " : "") << "VF " << VF
- << " to build VPlans.\n");
+ assert(isPowerOf2_32(VF.getKnownMinValue()) &&
+ "VF needs to be a power of two");
+ LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "")
+ << "VF " << VF << " to build VPlans.\n");
buildVPlans(VF, VF);
// For VPlan build stress testing, we bail out after VPlan construction.
if (VPlanBuildStressTest)
return VectorizationFactor::Disabled();
- return {VF, 0};
+ return {VF, 0 /*Cost*/};
}
LLVM_DEBUG(
@@ -6566,10 +7619,10 @@ LoopVectorizationPlanner::planInVPlanNativePath(unsigned UserVF) {
return VectorizationFactor::Disabled();
}
-Optional<VectorizationFactor> LoopVectorizationPlanner::plan(unsigned UserVF,
- unsigned UserIC) {
- assert(OrigLoop->empty() && "Inner loop expected.");
- Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(UserVF, UserIC);
+Optional<VectorizationFactor>
+LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
+ assert(OrigLoop->isInnermost() && "Inner loop expected.");
+ Optional<ElementCount> MaybeMaxVF = CM.computeMaxVF(UserVF, UserIC);
if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved.
return None;
@@ -6587,40 +7640,55 @@ Optional<VectorizationFactor> LoopVectorizationPlanner::plan(unsigned UserVF,
CM.invalidateCostModelingDecisions();
}
- if (UserVF) {
- LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
- assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
+ ElementCount MaxVF = MaybeMaxVF.getValue();
+ assert(MaxVF.isNonZero() && "MaxVF is zero.");
+
+ bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxVF);
+ if (!UserVF.isZero() &&
+ (UserVFIsLegal || (UserVF.isScalable() && MaxVF.isScalable()))) {
+ // FIXME: MaxVF is temporarily used inplace of UserVF for illegal scalable
+ // VFs here, this should be reverted to only use legal UserVFs once the
+ // loop below supports scalable VFs.
+ ElementCount VF = UserVFIsLegal ? UserVF : MaxVF;
+ LLVM_DEBUG(dbgs() << "LV: Using " << (UserVFIsLegal ? "user" : "max")
+ << " VF " << VF << ".\n");
+ assert(isPowerOf2_32(VF.getKnownMinValue()) &&
+ "VF needs to be a power of two");
// Collect the instructions (and their associated costs) that will be more
// profitable to scalarize.
- CM.selectUserVectorizationFactor(UserVF);
- buildVPlansWithVPRecipes(UserVF, UserVF);
+ CM.selectUserVectorizationFactor(VF);
+ CM.collectInLoopReductions();
+ buildVPlansWithVPRecipes(VF, VF);
LLVM_DEBUG(printPlans(dbgs()));
- return {{UserVF, 0}};
+ return {{VF, 0}};
}
- unsigned MaxVF = MaybeMaxVF.getValue();
- assert(MaxVF != 0 && "MaxVF is zero.");
+ assert(!MaxVF.isScalable() &&
+ "Scalable vectors not yet supported beyond this point");
- for (unsigned VF = 1; VF <= MaxVF; VF *= 2) {
+ for (ElementCount VF = ElementCount::getFixed(1);
+ ElementCount::isKnownLE(VF, MaxVF); VF *= 2) {
// Collect Uniform and Scalar instructions after vectorization with VF.
CM.collectUniformsAndScalars(VF);
// Collect the instructions (and their associated costs) that will be more
// profitable to scalarize.
- if (VF > 1)
+ if (VF.isVector())
CM.collectInstsToScalarize(VF);
}
- buildVPlansWithVPRecipes(1, MaxVF);
+ CM.collectInLoopReductions();
+
+ buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF);
LLVM_DEBUG(printPlans(dbgs()));
- if (MaxVF == 1)
+ if (MaxVF.isScalar())
return VectorizationFactor::Disabled();
// Select the optimal vectorization factor.
return CM.selectVectorizationFactor(MaxVF);
}
-void LoopVectorizationPlanner::setBestPlan(unsigned VF, unsigned UF) {
+void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) {
LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF
<< '\n');
BestVF = VF;
@@ -6639,13 +7707,23 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
// 1. Create a new empty loop. Unlink the old loop and connect the new one.
VPCallbackILV CallbackILV(ILV);
- VPTransformState State{BestVF, BestUF, LI,
- DT, ILV.Builder, ILV.VectorLoopValueMap,
- &ILV, CallbackILV};
+ assert(BestVF.hasValue() && "Vectorization Factor is missing");
+
+ VPTransformState State{*BestVF,
+ BestUF,
+ OrigLoop,
+ LI,
+ DT,
+ ILV.Builder,
+ ILV.VectorLoopValueMap,
+ &ILV,
+ CallbackILV};
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
State.TripCount = ILV.getOrCreateTripCount(nullptr);
State.CanonicalIV = ILV.Induction;
+ ILV.printDebugTracesAtStart();
+
//===------------------------------------------------===//
//
// Notice: any optimization or new instruction that go
@@ -6661,25 +7739,48 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
ILV.fixVectorizedLoop();
+
+ ILV.printDebugTracesAtEnd();
}
void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
- BasicBlock *Latch = OrigLoop->getLoopLatch();
- // We create new control-flow for the vectorized loop, so the original
- // condition will be dead after vectorization if it's only used by the
- // branch.
- auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
- if (Cmp && Cmp->hasOneUse())
- DeadInstructions.insert(Cmp);
+ // We create new control-flow for the vectorized loop, so the original exit
+ // conditions will be dead after vectorization if it's only used by the
+ // terminator
+ SmallVector<BasicBlock*> ExitingBlocks;
+ OrigLoop->getExitingBlocks(ExitingBlocks);
+ for (auto *BB : ExitingBlocks) {
+ auto *Cmp = dyn_cast<Instruction>(BB->getTerminator()->getOperand(0));
+ if (!Cmp || !Cmp->hasOneUse())
+ continue;
+
+ // TODO: we should introduce a getUniqueExitingBlocks on Loop
+ if (!DeadInstructions.insert(Cmp).second)
+ continue;
+
+ // The operands of the icmp is often a dead trunc, used by IndUpdate.
+ // TODO: can recurse through operands in general
+ for (Value *Op : Cmp->operands()) {
+ if (isa<TruncInst>(Op) && Op->hasOneUse())
+ DeadInstructions.insert(cast<Instruction>(Op));
+ }
+ }
// We create new "steps" for induction variable updates to which the original
// induction variables map. An original update instruction will be dead if
// all its users except the induction variable are dead.
+ auto *Latch = OrigLoop->getLoopLatch();
for (auto &Induction : Legal->getInductionVars()) {
PHINode *Ind = Induction.first;
auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
+
+ // If the tail is to be folded by masking, the primary induction variable,
+ // if exists, isn't dead: it will be used for masking. Don't kill it.
+ if (CM.foldTailByMasking() && IndUpdate == Legal->getPrimaryInduction())
+ continue;
+
if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
return U == Ind || DeadInstructions.count(cast<Instruction>(U));
}))
@@ -6754,12 +7855,284 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
}
}
+//===--------------------------------------------------------------------===//
+// EpilogueVectorizerMainLoop
+//===--------------------------------------------------------------------===//
+
+/// This function is partially responsible for generating the control flow
+/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
+BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
+ MDNode *OrigLoopID = OrigLoop->getLoopID();
+ Loop *Lp = createVectorLoopSkeleton("");
+
+ // Generate the code to check the minimum iteration count of the vector
+ // epilogue (see below).
+ EPI.EpilogueIterationCountCheck =
+ emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader, true);
+ EPI.EpilogueIterationCountCheck->setName("iter.check");
+
+ // Generate the code to check any assumptions that we've made for SCEV
+ // expressions.
+ BasicBlock *SavedPreHeader = LoopVectorPreHeader;
+ emitSCEVChecks(Lp, LoopScalarPreHeader);
+
+ // If a safety check was generated save it.
+ if (SavedPreHeader != LoopVectorPreHeader)
+ EPI.SCEVSafetyCheck = SavedPreHeader;
+
+ // Generate the code that checks at runtime if arrays overlap. We put the
+ // checks into a separate block to make the more common case of few elements
+ // faster.
+ SavedPreHeader = LoopVectorPreHeader;
+ emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
+
+ // If a safety check was generated save/overwite it.
+ if (SavedPreHeader != LoopVectorPreHeader)
+ EPI.MemSafetyCheck = SavedPreHeader;
+
+ // Generate the iteration count check for the main loop, *after* the check
+ // for the epilogue loop, so that the path-length is shorter for the case
+ // that goes directly through the vector epilogue. The longer-path length for
+ // the main loop is compensated for, by the gain from vectorizing the larger
+ // trip count. Note: the branch will get updated later on when we vectorize
+ // the epilogue.
+ EPI.MainLoopIterationCountCheck =
+ emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader, false);
+
+ // Generate the induction variable.
+ OldInduction = Legal->getPrimaryInduction();
+ Type *IdxTy = Legal->getWidestInductionType();
+ Value *StartIdx = ConstantInt::get(IdxTy, 0);
+ Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF);
+ Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
+ EPI.VectorTripCount = CountRoundDown;
+ Induction =
+ createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
+ getDebugLocFromInstOrOperands(OldInduction));
+
+ // Skip induction resume value creation here because they will be created in
+ // the second pass. If we created them here, they wouldn't be used anyway,
+ // because the vplan in the second pass still contains the inductions from the
+ // original loop.
+
+ return completeLoopSkeleton(Lp, OrigLoopID);
+}
+
+void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
+ LLVM_DEBUG({
+ dbgs() << "Create Skeleton for epilogue vectorized loop (first pass)\n"
+ << "Main Loop VF:" << EPI.MainLoopVF.getKnownMinValue()
+ << ", Main Loop UF:" << EPI.MainLoopUF
+ << ", Epilogue Loop VF:" << EPI.EpilogueVF.getKnownMinValue()
+ << ", Epilogue Loop UF:" << EPI.EpilogueUF << "\n";
+ });
+}
+
+void EpilogueVectorizerMainLoop::printDebugTracesAtEnd() {
+ DEBUG_WITH_TYPE(VerboseDebug, {
+ dbgs() << "intermediate fn:\n" << *Induction->getFunction() << "\n";
+ });
+}
+
+BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
+ Loop *L, BasicBlock *Bypass, bool ForEpilogue) {
+ assert(L && "Expected valid Loop.");
+ assert(Bypass && "Expected valid bypass basic block.");
+ unsigned VFactor =
+ ForEpilogue ? EPI.EpilogueVF.getKnownMinValue() : VF.getKnownMinValue();
+ unsigned UFactor = ForEpilogue ? EPI.EpilogueUF : UF;
+ Value *Count = getOrCreateTripCount(L);
+ // Reuse existing vector loop preheader for TC checks.
+ // Note that new preheader block is generated for vector loop.
+ BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
+ IRBuilder<> Builder(TCCheckBlock->getTerminator());
+
+ // Generate code to check if the loop's trip count is less than VF * UF of the
+ // main vector loop.
+ auto P =
+ Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
+
+ Value *CheckMinIters = Builder.CreateICmp(
+ P, Count, ConstantInt::get(Count->getType(), VFactor * UFactor),
+ "min.iters.check");
+
+ if (!ForEpilogue)
+ TCCheckBlock->setName("vector.main.loop.iter.check");
+
+ // Create new preheader for vector loop.
+ LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(),
+ DT, LI, nullptr, "vector.ph");
+
+ if (ForEpilogue) {
+ assert(DT->properlyDominates(DT->getNode(TCCheckBlock),
+ DT->getNode(Bypass)->getIDom()) &&
+ "TC check is expected to dominate Bypass");
+
+ // Update dominator for Bypass & LoopExit.
+ DT->changeImmediateDominator(Bypass, TCCheckBlock);
+ DT->changeImmediateDominator(LoopExitBlock, TCCheckBlock);
+
+ LoopBypassBlocks.push_back(TCCheckBlock);
+
+ // Save the trip count so we don't have to regenerate it in the
+ // vec.epilog.iter.check. This is safe to do because the trip count
+ // generated here dominates the vector epilog iter check.
+ EPI.TripCount = Count;
+ }
+
+ ReplaceInstWithInst(
+ TCCheckBlock->getTerminator(),
+ BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters));
+
+ return TCCheckBlock;
+}
+
+//===--------------------------------------------------------------------===//
+// EpilogueVectorizerEpilogueLoop
+//===--------------------------------------------------------------------===//
+
+/// This function is partially responsible for generating the control flow
+/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
+BasicBlock *
+EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
+ MDNode *OrigLoopID = OrigLoop->getLoopID();
+ Loop *Lp = createVectorLoopSkeleton("vec.epilog.");
+
+ // Now, compare the remaining count and if there aren't enough iterations to
+ // execute the vectorized epilogue skip to the scalar part.
+ BasicBlock *VecEpilogueIterationCountCheck = LoopVectorPreHeader;
+ VecEpilogueIterationCountCheck->setName("vec.epilog.iter.check");
+ LoopVectorPreHeader =
+ SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
+ LI, nullptr, "vec.epilog.ph");
+ emitMinimumVectorEpilogueIterCountCheck(Lp, LoopScalarPreHeader,
+ VecEpilogueIterationCountCheck);
+
+ // Adjust the control flow taking the state info from the main loop
+ // vectorization into account.
+ assert(EPI.MainLoopIterationCountCheck && EPI.EpilogueIterationCountCheck &&
+ "expected this to be saved from the previous pass.");
+ EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
+ VecEpilogueIterationCountCheck, LoopVectorPreHeader);
+
+ DT->changeImmediateDominator(LoopVectorPreHeader,
+ EPI.MainLoopIterationCountCheck);
+
+ EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
+ VecEpilogueIterationCountCheck, LoopScalarPreHeader);
+
+ if (EPI.SCEVSafetyCheck)
+ EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
+ VecEpilogueIterationCountCheck, LoopScalarPreHeader);
+ if (EPI.MemSafetyCheck)
+ EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
+ VecEpilogueIterationCountCheck, LoopScalarPreHeader);
+
+ DT->changeImmediateDominator(
+ VecEpilogueIterationCountCheck,
+ VecEpilogueIterationCountCheck->getSinglePredecessor());
+
+ DT->changeImmediateDominator(LoopScalarPreHeader,
+ EPI.EpilogueIterationCountCheck);
+ DT->changeImmediateDominator(LoopExitBlock, EPI.EpilogueIterationCountCheck);
+
+ // Keep track of bypass blocks, as they feed start values to the induction
+ // phis in the scalar loop preheader.
+ if (EPI.SCEVSafetyCheck)
+ LoopBypassBlocks.push_back(EPI.SCEVSafetyCheck);
+ if (EPI.MemSafetyCheck)
+ LoopBypassBlocks.push_back(EPI.MemSafetyCheck);
+ LoopBypassBlocks.push_back(EPI.EpilogueIterationCountCheck);
+
+ // Generate a resume induction for the vector epilogue and put it in the
+ // vector epilogue preheader
+ Type *IdxTy = Legal->getWidestInductionType();
+ PHINode *EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val",
+ LoopVectorPreHeader->getFirstNonPHI());
+ EPResumeVal->addIncoming(EPI.VectorTripCount, VecEpilogueIterationCountCheck);
+ EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
+ EPI.MainLoopIterationCountCheck);
+
+ // Generate the induction variable.
+ OldInduction = Legal->getPrimaryInduction();
+ Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
+ Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF);
+ Value *StartIdx = EPResumeVal;
+ Induction =
+ createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
+ getDebugLocFromInstOrOperands(OldInduction));
+
+ // Generate induction resume values. These variables save the new starting
+ // indexes for the scalar loop. They are used to test if there are any tail
+ // iterations left once the vector loop has completed.
+ // Note that when the vectorized epilogue is skipped due to iteration count
+ // check, then the resume value for the induction variable comes from
+ // the trip count of the main vector loop, hence passing the AdditionalBypass
+ // argument.
+ createInductionResumeValues(Lp, CountRoundDown,
+ {VecEpilogueIterationCountCheck,
+ EPI.VectorTripCount} /* AdditionalBypass */);
+
+ AddRuntimeUnrollDisableMetaData(Lp);
+ return completeLoopSkeleton(Lp, OrigLoopID);
+}
+
+BasicBlock *
+EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
+ Loop *L, BasicBlock *Bypass, BasicBlock *Insert) {
+
+ assert(EPI.TripCount &&
+ "Expected trip count to have been safed in the first pass.");
+ assert(
+ (!isa<Instruction>(EPI.TripCount) ||
+ DT->dominates(cast<Instruction>(EPI.TripCount)->getParent(), Insert)) &&
+ "saved trip count does not dominate insertion point.");
+ Value *TC = EPI.TripCount;
+ IRBuilder<> Builder(Insert->getTerminator());
+ Value *Count = Builder.CreateSub(TC, EPI.VectorTripCount, "n.vec.remaining");
+
+ // Generate code to check if the loop's trip count is less than VF * UF of the
+ // vector epilogue loop.
+ auto P =
+ Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
+
+ Value *CheckMinIters = Builder.CreateICmp(
+ P, Count,
+ ConstantInt::get(Count->getType(),
+ EPI.EpilogueVF.getKnownMinValue() * EPI.EpilogueUF),
+ "min.epilog.iters.check");
+
+ ReplaceInstWithInst(
+ Insert->getTerminator(),
+ BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters));
+
+ LoopBypassBlocks.push_back(Insert);
+ return Insert;
+}
+
+void EpilogueVectorizerEpilogueLoop::printDebugTracesAtStart() {
+ LLVM_DEBUG({
+ dbgs() << "Create Skeleton for epilogue vectorized loop (second pass)\n"
+ << "Main Loop VF:" << EPI.MainLoopVF.getKnownMinValue()
+ << ", Main Loop UF:" << EPI.MainLoopUF
+ << ", Epilogue Loop VF:" << EPI.EpilogueVF.getKnownMinValue()
+ << ", Epilogue Loop UF:" << EPI.EpilogueUF << "\n";
+ });
+}
+
+void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() {
+ DEBUG_WITH_TYPE(VerboseDebug, {
+ dbgs() << "final fn:\n" << *Induction->getFunction() << "\n";
+ });
+}
+
bool LoopVectorizationPlanner::getDecisionAndClampRange(
- const std::function<bool(unsigned)> &Predicate, VFRange &Range) {
- assert(Range.End > Range.Start && "Trying to test an empty VF range.");
+ const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
+ assert(!Range.isEmpty() && "Trying to test an empty VF range.");
bool PredicateAtRangeStart = Predicate(Range.Start);
- for (unsigned TmpVF = Range.Start * 2; TmpVF < Range.End; TmpVF *= 2)
+ for (ElementCount TmpVF = Range.Start * 2;
+ ElementCount::isKnownLT(TmpVF, Range.End); TmpVF *= 2)
if (Predicate(TmpVF) != PredicateAtRangeStart) {
Range.End = TmpVF;
break;
@@ -6773,9 +8146,11 @@ bool LoopVectorizationPlanner::getDecisionAndClampRange(
/// of VF's starting at a given VF and extending it as much as possible. Each
/// vectorization decision can potentially shorten this sub-range during
/// buildVPlan().
-void LoopVectorizationPlanner::buildVPlans(unsigned MinVF, unsigned MaxVF) {
- for (unsigned VF = MinVF; VF < MaxVF + 1;) {
- VFRange SubRange = {VF, MaxVF + 1};
+void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
+ ElementCount MaxVF) {
+ auto MaxVFPlusOne = MaxVF.getWithIncrement(1);
+ for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) {
+ VFRange SubRange = {VF, MaxVFPlusOne};
VPlans.push_back(buildVPlan(SubRange));
VF = SubRange.End;
}
@@ -6800,14 +8175,27 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
if (!BI->isConditional() || BI->getSuccessor(0) == BI->getSuccessor(1))
return EdgeMaskCache[Edge] = SrcMask;
- VPValue *EdgeMask = Plan->getVPValue(BI->getCondition());
+ // If source is an exiting block, we know the exit edge is dynamically dead
+ // in the vector loop, and thus we don't need to restrict the mask. Avoid
+ // adding uses of an otherwise potentially dead instruction.
+ if (OrigLoop->isLoopExiting(Src))
+ return EdgeMaskCache[Edge] = SrcMask;
+
+ VPValue *EdgeMask = Plan->getOrAddVPValue(BI->getCondition());
assert(EdgeMask && "No Edge Mask found for condition");
if (BI->getSuccessor(0) != Dst)
EdgeMask = Builder.createNot(EdgeMask);
- if (SrcMask) // Otherwise block in-mask is all-one, no need to AND.
- EdgeMask = Builder.createAnd(EdgeMask, SrcMask);
+ if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
+ // The condition is 'SrcMask && EdgeMask', which is equivalent to
+ // 'select i1 SrcMask, i1 EdgeMask, i1 false'.
+ // The select version does not introduce new UB if SrcMask is false and
+ // EdgeMask is poison. Using 'and' here introduces undefined behavior.
+ VPValue *False = Plan->getOrAddVPValue(
+ ConstantInt::getFalse(BI->getCondition()->getType()));
+ EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False);
+ }
return EdgeMaskCache[Edge] = EdgeMask;
}
@@ -6828,23 +8216,34 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
if (!CM.blockNeedsPredication(BB))
return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
+ // Create the block in mask as the first non-phi instruction in the block.
+ VPBuilder::InsertPointGuard Guard(Builder);
+ auto NewInsertionPoint = Builder.getInsertBlock()->getFirstNonPhi();
+ Builder.setInsertPoint(Builder.getInsertBlock(), NewInsertionPoint);
+
// Introduce the early-exit compare IV <= BTC to form header block mask.
// This is used instead of IV < TC because TC may wrap, unlike BTC.
// Start by constructing the desired canonical IV.
VPValue *IV = nullptr;
if (Legal->getPrimaryInduction())
- IV = Plan->getVPValue(Legal->getPrimaryInduction());
+ IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction());
else {
auto IVRecipe = new VPWidenCanonicalIVRecipe();
- Builder.getInsertBlock()->appendRecipe(IVRecipe);
+ Builder.getInsertBlock()->insert(IVRecipe, NewInsertionPoint);
IV = IVRecipe->getVPValue();
}
VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
bool TailFolded = !CM.isScalarEpilogueAllowed();
- if (TailFolded && CM.TTI.emitGetActiveLaneMask())
- BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, BTC});
- else
+
+ if (TailFolded && CM.TTI.emitGetActiveLaneMask()) {
+ // While ActiveLaneMask is a binary op that consumes the loop tripcount
+ // as a second argument, we only pass the IV here and extract the
+ // tripcount from the transform state where codegen of the VP instructions
+ // happen.
+ BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV});
+ } else {
BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC});
+ }
return BlockMaskCache[BB] = BlockMask;
}
@@ -6865,14 +8264,13 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
return BlockMaskCache[BB] = BlockMask;
}
-VPWidenMemoryInstructionRecipe *
-VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
- VPlanPtr &Plan) {
+VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
+ VPlanPtr &Plan) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
"Must be called with either a load or store");
- auto willWiden = [&](unsigned VF) -> bool {
- if (VF == 1)
+ auto willWiden = [&](ElementCount VF) -> bool {
+ if (VF.isScalar())
return false;
LoopVectorizationCostModel::InstWidening Decision =
CM.getWideningDecision(I, VF);
@@ -6903,20 +8301,22 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
}
VPWidenIntOrFpInductionRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi) const {
+VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi, VPlan &Plan) const {
// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
InductionDescriptor II = Legal->getInductionVars().lookup(Phi);
if (II.getKind() == InductionDescriptor::IK_IntInduction ||
- II.getKind() == InductionDescriptor::IK_FpInduction)
- return new VPWidenIntOrFpInductionRecipe(Phi);
+ II.getKind() == InductionDescriptor::IK_FpInduction) {
+ VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start);
+ }
return nullptr;
}
VPWidenIntOrFpInductionRecipe *
-VPRecipeBuilder::tryToOptimizeInductionTruncate(TruncInst *I,
- VFRange &Range) const {
+VPRecipeBuilder::tryToOptimizeInductionTruncate(TruncInst *I, VFRange &Range,
+ VPlan &Plan) const {
// Optimize the special case where the source is a constant integer
// induction variable. Notice that we can only optimize the 'trunc' case
// because (a) FP conversions lose precision, (b) sext/zext may wrap, and
@@ -6925,15 +8325,21 @@ VPRecipeBuilder::tryToOptimizeInductionTruncate(TruncInst *I,
// Determine whether \p K is a truncation based on an induction variable that
// can be optimized.
auto isOptimizableIVTruncate =
- [&](Instruction *K) -> std::function<bool(unsigned)> {
- return
- [=](unsigned VF) -> bool { return CM.isOptimizableIVTruncate(K, VF); };
+ [&](Instruction *K) -> std::function<bool(ElementCount)> {
+ return [=](ElementCount VF) -> bool {
+ return CM.isOptimizableIVTruncate(K, VF);
+ };
};
if (LoopVectorizationPlanner::getDecisionAndClampRange(
- isOptimizableIVTruncate(I), Range))
+ isOptimizableIVTruncate(I), Range)) {
+
+ InductionDescriptor II =
+ Legal->getInductionVars().lookup(cast<PHINode>(I->getOperand(0)));
+ VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
return new VPWidenIntOrFpInductionRecipe(cast<PHINode>(I->getOperand(0)),
- I);
+ Start, I);
+ }
return nullptr;
}
@@ -6962,7 +8368,9 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, VFRange &Range,
VPlan &Plan) const {
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
- [this, CI](unsigned VF) { return CM.isScalarWithPredication(CI, VF); },
+ [this, CI](ElementCount VF) {
+ return CM.isScalarWithPredication(CI, VF);
+ },
Range);
if (IsPredicated)
@@ -6970,19 +8378,23 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, VFRange &Range,
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
- ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect))
+ ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect ||
+ ID == Intrinsic::pseudoprobe ||
+ ID == Intrinsic::experimental_noalias_scope_decl))
return nullptr;
- auto willWiden = [&](unsigned VF) -> bool {
+ auto willWiden = [&](ElementCount VF) -> bool {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// The following case may be scalarized depending on the VF.
// The flag shows whether we use Intrinsic or a usual Call for vectorized
// version of the instruction.
// Is it beneficial to perform intrinsic call compared to lib call?
bool NeedToScalarize = false;
- unsigned CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
- bool UseVectorIntrinsic =
- ID && CM.getVectorIntrinsicCost(CI, VF) <= CallCost;
+ InstructionCost CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
+ InstructionCost IntrinsicCost = ID ? CM.getVectorIntrinsicCost(CI, VF) : 0;
+ bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost;
+ assert(IntrinsicCost.isValid() && CallCost.isValid() &&
+ "Cannot have invalid costs while widening");
return UseVectorIntrinsic || !NeedToScalarize;
};
@@ -6997,7 +8409,7 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
!isa<StoreInst>(I) && "Instruction should have been handled earlier");
// Instruction should be widened, unless it is scalar after vectorization,
// scalarization is profitable or it is predicated.
- auto WillScalarize = [this, I](unsigned VF) -> bool {
+ auto WillScalarize = [this, I](ElementCount VF) -> bool {
return CM.isScalarAfterVectorization(I, VF) ||
CM.isProfitableToScalarize(I, VF) ||
CM.isScalarWithPredication(I, VF);
@@ -7060,15 +8472,17 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe,
VPlanPtr &Plan) {
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](unsigned VF) { return CM.isUniformAfterVectorization(I, VF); },
+ [&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
+ [&](ElementCount VF) { return CM.isScalarWithPredication(I, VF); },
+ Range);
auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
IsUniform, IsPredicated);
setRecipe(I, Recipe);
+ Plan->addVPValue(I, Recipe);
// Find if I uses a predicated instruction. If so, it will use its scalar
// value. Avoid hoisting the insert-element which packs the scalar value into
@@ -7110,8 +8524,9 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
assert(Instr->getParent() && "Predicated instruction not in any basic block");
auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
- auto *PHIRecipe =
- Instr->getType()->isVoidTy() ? nullptr : new VPPredInstPHIRecipe(Instr);
+ auto *PHIRecipe = Instr->getType()->isVoidTy()
+ ? nullptr
+ : new VPPredInstPHIRecipe(Plan->getOrAddVPValue(Instr));
auto *Exit = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe);
VPRegionBlock *Region = new VPRegionBlock(Entry, Exit, RegionName, true);
@@ -7139,13 +8554,21 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
if (auto Phi = dyn_cast<PHINode>(Instr)) {
if (Phi->getParent() != OrigLoop->getHeader())
return tryToBlend(Phi, Plan);
- if ((Recipe = tryToOptimizeInductionPHI(Phi)))
+ if ((Recipe = tryToOptimizeInductionPHI(Phi, *Plan)))
return Recipe;
+
+ if (Legal->isReductionVariable(Phi)) {
+ RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi];
+ VPValue *StartV =
+ Plan->getOrAddVPValue(RdxDesc.getRecurrenceStartValue());
+ return new VPWidenPHIRecipe(Phi, RdxDesc, *StartV);
+ }
+
return new VPWidenPHIRecipe(Phi);
}
- if (isa<TruncInst>(Instr) &&
- (Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Range)))
+ if (isa<TruncInst>(Instr) && (Recipe = tryToOptimizeInductionTruncate(
+ cast<TruncInst>(Instr), Range, *Plan)))
return Recipe;
if (!shouldWiden(Instr, Range))
@@ -7165,35 +8588,9 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
return tryToWiden(Instr, *Plan);
}
-void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
- unsigned MaxVF) {
- assert(OrigLoop->empty() && "Inner loop expected.");
-
- // Collect conditions feeding internal conditional branches; they need to be
- // represented in VPlan for it to model masking.
- SmallPtrSet<Value *, 1> NeedDef;
-
- auto *Latch = OrigLoop->getLoopLatch();
- for (BasicBlock *BB : OrigLoop->blocks()) {
- if (BB == Latch)
- continue;
- BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
- if (Branch && Branch->isConditional())
- NeedDef.insert(Branch->getCondition());
- }
-
- // If the tail is to be folded by masking, the primary induction variable, if
- // exists needs to be represented in VPlan for it to model early-exit masking.
- // Also, both the Phi and the live-out instruction of each reduction are
- // required in order to introduce a select between them in VPlan.
- if (CM.foldTailByMasking()) {
- if (Legal->getPrimaryInduction())
- NeedDef.insert(Legal->getPrimaryInduction());
- for (auto &Reduction : Legal->getReductionVars()) {
- NeedDef.insert(Reduction.first);
- NeedDef.insert(Reduction.second.getLoopExitInstr());
- }
- }
+void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
+ ElementCount MaxVF) {
+ assert(OrigLoop->isInnermost() && "Inner loop expected.");
// Collect instructions from the original loop that will become trivially dead
// in the vectorized loop. We don't need to vectorize these instructions. For
@@ -7216,17 +8613,17 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
for (Instruction *I : DeadInstructions)
SinkAfter.erase(I);
- for (unsigned VF = MinVF; VF < MaxVF + 1;) {
- VFRange SubRange = {VF, MaxVF + 1};
- VPlans.push_back(buildVPlanWithVPRecipes(SubRange, NeedDef,
- DeadInstructions, SinkAfter));
+ auto MaxVFPlusOne = MaxVF.getWithIncrement(1);
+ for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) {
+ VFRange SubRange = {VF, MaxVFPlusOne};
+ VPlans.push_back(
+ buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter));
VF = SubRange.End;
}
}
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
- VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
- SmallPtrSetImpl<Instruction *> &DeadInstructions,
+ VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
const DenseMap<Instruction *, Instruction *> &SinkAfter) {
// Hold a mapping from predicated instructions to their recipes, in order to
@@ -7249,14 +8646,28 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RecipeBuilder.recordRecipeOf(Entry.first);
RecipeBuilder.recordRecipeOf(Entry.second);
}
+ for (auto &Reduction : CM.getInLoopReductionChains()) {
+ PHINode *Phi = Reduction.first;
+ RecurKind Kind = Legal->getReductionVars()[Phi].getRecurrenceKind();
+ const SmallVector<Instruction *, 4> &ReductionOperations = Reduction.second;
+
+ RecipeBuilder.recordRecipeOf(Phi);
+ for (auto &R : ReductionOperations) {
+ RecipeBuilder.recordRecipeOf(R);
+ // For min/max reducitons, where we have a pair of icmp/select, we also
+ // need to record the ICmp recipe, so it can be removed later.
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
+ RecipeBuilder.recordRecipeOf(cast<Instruction>(R->getOperand(0)));
+ }
+ }
// For each interleave group which is relevant for this (possibly trimmed)
// Range, add it to the set of groups to be later applied to the VPlan and add
// placeholders for its members' Recipes which we'll be replacing with a
// single VPInterleaveRecipe.
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
- auto applyIG = [IG, this](unsigned VF) -> bool {
- return (VF >= 2 && // Query is illegal for VF == 1
+ auto applyIG = [IG, this](ElementCount VF) -> bool {
+ return (VF.isVector() && // Query is illegal for VF == 1
CM.getWideningDecision(IG->getInsertPos(), VF) ==
LoopVectorizationCostModel::CM_Interleave);
};
@@ -7278,10 +8689,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
Plan->setEntry(VPBB);
- // Represent values that will have defs inside VPlan.
- for (Value *V : NeedDef)
- Plan->addVPValue(V);
-
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
LoopBlocksDFS DFS(OrigLoop);
@@ -7308,6 +8715,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
if (auto Recipe =
RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) {
+ for (auto *Def : Recipe->definedValues()) {
+ auto *UV = Def->getUnderlyingValue();
+ Plan->addVPValue(UV, Def);
+ }
+
RecipeBuilder.setRecipe(Instr, Recipe);
VPBB->appendRecipe(Recipe);
continue;
@@ -7343,6 +8755,18 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
for (auto &Entry : SinkAfter) {
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
+ // If the target is in a replication region, make sure to move Sink to the
+ // block after it, not into the replication region itself.
+ if (auto *Region =
+ dyn_cast_or_null<VPRegionBlock>(Target->getParent()->getParent())) {
+ if (Region->isReplicator()) {
+ assert(Region->getNumSuccessors() == 1 && "Expected SESE region!");
+ VPBasicBlock *NextBlock =
+ cast<VPBasicBlock>(Region->getSuccessors().front());
+ Sink->moveBefore(*NextBlock, NextBlock->getFirstNonPhi());
+ continue;
+ }
+ }
Sink->moveAfter(Target);
}
@@ -7352,33 +8776,52 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
for (auto IG : InterleaveGroups) {
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
RecipeBuilder.getRecipe(IG->getInsertPos()));
- (new VPInterleaveRecipe(IG, Recipe->getAddr(), Recipe->getMask()))
- ->insertBefore(Recipe);
+ SmallVector<VPValue *, 4> StoredValues;
+ for (unsigned i = 0; i < IG->getFactor(); ++i)
+ if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i)))
+ StoredValues.push_back(Plan->getOrAddVPValue(SI->getOperand(0)));
+ auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues,
+ Recipe->getMask());
+ VPIG->insertBefore(Recipe);
+ unsigned J = 0;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *Member = IG->getMember(i)) {
+ if (!Member->getType()->isVoidTy()) {
+ VPValue *OriginalV = Plan->getVPValue(Member);
+ Plan->removeVPValueFor(Member);
+ Plan->addVPValue(Member, VPIG->getVPValue(J));
+ OriginalV->replaceAllUsesWith(VPIG->getVPValue(J));
+ J++;
+ }
RecipeBuilder.getRecipe(Member)->eraseFromParent();
}
}
+ // Adjust the recipes for any inloop reductions.
+ if (Range.Start.isVector())
+ adjustRecipesForInLoopReductions(Plan, RecipeBuilder);
+
// Finally, if tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the end of the latch.
- if (CM.foldTailByMasking()) {
+ if (CM.foldTailByMasking() && !Legal->getReductionVars().empty()) {
Builder.setInsertPoint(VPBB);
auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan);
for (auto &Reduction : Legal->getReductionVars()) {
- VPValue *Phi = Plan->getVPValue(Reduction.first);
- VPValue *Red = Plan->getVPValue(Reduction.second.getLoopExitInstr());
+ if (CM.isInLoopReduction(Reduction.first))
+ continue;
+ VPValue *Phi = Plan->getOrAddVPValue(Reduction.first);
+ VPValue *Red = Plan->getOrAddVPValue(Reduction.second.getLoopExitInstr());
Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi});
}
}
std::string PlanName;
raw_string_ostream RSO(PlanName);
- unsigned VF = Range.Start;
+ ElementCount VF = Range.Start;
Plan->addVF(VF);
RSO << "Initial VPlan for VF={" << VF;
- for (VF *= 2; VF < Range.End; VF *= 2) {
+ for (VF *= 2; ElementCount::isKnownLT(VF, Range.End); VF *= 2) {
Plan->addVF(VF);
RSO << "," << VF;
}
@@ -7394,7 +8837,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// transformations before even evaluating whether vectorization is profitable.
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
// the vectorization pipeline.
- assert(!OrigLoop->empty());
+ assert(!OrigLoop->isInnermost());
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
// Create new empty VPlan
@@ -7404,7 +8847,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();
- for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
+ for (ElementCount VF = Range.Start; ElementCount::isKnownLT(VF, Range.End);
+ VF *= 2)
Plan->addVF(VF);
if (EnableVPlanPredication) {
@@ -7422,6 +8866,67 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
return Plan;
}
+// Adjust the recipes for any inloop reductions. The chain of instructions
+// leading from the loop exit instr to the phi need to be converted to
+// reductions, with one operand being vector and the other being the scalar
+// reduction chain.
+void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
+ VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder) {
+ for (auto &Reduction : CM.getInLoopReductionChains()) {
+ PHINode *Phi = Reduction.first;
+ RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi];
+ const SmallVector<Instruction *, 4> &ReductionOperations = Reduction.second;
+
+ // ReductionOperations are orders top-down from the phi's use to the
+ // LoopExitValue. We keep a track of the previous item (the Chain) to tell
+ // which of the two operands will remain scalar and which will be reduced.
+ // For minmax the chain will be the select instructions.
+ Instruction *Chain = Phi;
+ for (Instruction *R : ReductionOperations) {
+ VPRecipeBase *WidenRecipe = RecipeBuilder.getRecipe(R);
+ RecurKind Kind = RdxDesc.getRecurrenceKind();
+
+ VPValue *ChainOp = Plan->getVPValue(Chain);
+ unsigned FirstOpId;
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
+ assert(isa<VPWidenSelectRecipe>(WidenRecipe) &&
+ "Expected to replace a VPWidenSelectSC");
+ FirstOpId = 1;
+ } else {
+ assert(isa<VPWidenRecipe>(WidenRecipe) &&
+ "Expected to replace a VPWidenSC");
+ FirstOpId = 0;
+ }
+ unsigned VecOpId =
+ R->getOperand(FirstOpId) == Chain ? FirstOpId + 1 : FirstOpId;
+ VPValue *VecOp = Plan->getVPValue(R->getOperand(VecOpId));
+
+ auto *CondOp = CM.foldTailByMasking()
+ ? RecipeBuilder.createBlockInMask(R->getParent(), Plan)
+ : nullptr;
+ VPReductionRecipe *RedRecipe = new VPReductionRecipe(
+ &RdxDesc, R, ChainOp, VecOp, CondOp, Legal->hasFunNoNaNAttr(), TTI);
+ WidenRecipe->getVPValue()->replaceAllUsesWith(RedRecipe);
+ Plan->removeVPValueFor(R);
+ Plan->addVPValue(R, RedRecipe);
+ WidenRecipe->getParent()->insert(RedRecipe, WidenRecipe->getIterator());
+ WidenRecipe->getVPValue()->replaceAllUsesWith(RedRecipe);
+ WidenRecipe->eraseFromParent();
+
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
+ VPRecipeBase *CompareRecipe =
+ RecipeBuilder.getRecipe(cast<Instruction>(R->getOperand(0)));
+ assert(isa<VPWidenRecipe>(CompareRecipe) &&
+ "Expected to replace a VPWidenSC");
+ assert(cast<VPWidenRecipe>(CompareRecipe)->getNumUsers() == 0 &&
+ "Expected no remaining users");
+ CompareRecipe->eraseFromParent();
+ }
+ Chain = R;
+ }
+ }
+}
+
Value* LoopVectorizationPlanner::VPCallbackILV::
getOrCreateVectorValues(Value *V, unsigned Part) {
return ILV.getOrCreateVectorValue(V, Part);
@@ -7449,29 +8954,35 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
}
void VPWidenCallRecipe::execute(VPTransformState &State) {
- State.ILV->widenCallInstruction(Ingredient, User, State);
+ State.ILV->widenCallInstruction(*cast<CallInst>(getUnderlyingInstr()), this,
+ *this, State);
}
void VPWidenSelectRecipe::execute(VPTransformState &State) {
- State.ILV->widenSelectInstruction(Ingredient, User, InvariantCond, State);
+ State.ILV->widenSelectInstruction(*cast<SelectInst>(getUnderlyingInstr()),
+ this, *this, InvariantCond, State);
}
void VPWidenRecipe::execute(VPTransformState &State) {
- State.ILV->widenInstruction(Ingredient, User, State);
+ State.ILV->widenInstruction(*getUnderlyingInstr(), this, *this, State);
}
void VPWidenGEPRecipe::execute(VPTransformState &State) {
- State.ILV->widenGEP(GEP, User, State.UF, State.VF, IsPtrLoopInvariant,
+ State.ILV->widenGEP(cast<GetElementPtrInst>(getUnderlyingInstr()), this,
+ *this, State.UF, State.VF, IsPtrLoopInvariant,
IsIndexLoopInvariant, State);
}
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Int or FP induction being replicated.");
- State.ILV->widenIntOrFpInduction(IV, Trunc);
+ State.ILV->widenIntOrFpInduction(IV, getStartValue()->getLiveInIRValue(),
+ Trunc);
}
void VPWidenPHIRecipe::execute(VPTransformState &State) {
- State.ILV->widenPHIInstruction(Phi, State.UF, State.VF);
+ Value *StartV =
+ getStartValue() ? getStartValue()->getLiveInIRValue() : nullptr;
+ State.ILV->widenPHIInstruction(Phi, RdxDesc, StartV, State.UF, State.VF);
}
void VPBlendRecipe::execute(VPTransformState &State) {
@@ -7515,22 +9026,59 @@ void VPBlendRecipe::execute(VPTransformState &State) {
void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Interleave group being replicated.");
- State.ILV->vectorizeInterleaveGroup(IG, State, getAddr(), getMask());
+ State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(),
+ getStoredValues(), getMask());
+}
+
+void VPReductionRecipe::execute(VPTransformState &State) {
+ assert(!State.Instance && "Reduction being replicated.");
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ RecurKind Kind = RdxDesc->getRecurrenceKind();
+ Value *NewVecOp = State.get(getVecOp(), Part);
+ if (VPValue *Cond = getCondOp()) {
+ Value *NewCond = State.get(Cond, Part);
+ VectorType *VecTy = cast<VectorType>(NewVecOp->getType());
+ Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
+ Kind, VecTy->getElementType());
+ Constant *IdenVec =
+ ConstantVector::getSplat(VecTy->getElementCount(), Iden);
+ Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
+ NewVecOp = Select;
+ }
+ Value *NewRed =
+ createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp);
+ Value *PrevInChain = State.get(getChainOp(), Part);
+ Value *NextInChain;
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
+ NextInChain =
+ createMinMaxOp(State.Builder, RdxDesc->getRecurrenceKind(),
+ NewRed, PrevInChain);
+ } else {
+ NextInChain = State.Builder.CreateBinOp(
+ (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed,
+ PrevInChain);
+ }
+ State.set(this, getUnderlyingInstr(), NextInChain, Part);
+ }
}
void VPReplicateRecipe::execute(VPTransformState &State) {
if (State.Instance) { // Generate a single instance.
- State.ILV->scalarizeInstruction(Ingredient, User, *State.Instance,
- IsPredicated, State);
+ assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
+ State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this,
+ *State.Instance, IsPredicated, State);
// Insert scalar instance packing it into a vector.
- if (AlsoPack && State.VF > 1) {
- // If we're constructing lane 0, initialize to start from undef.
+ if (AlsoPack && State.VF.isVector()) {
+ // If we're constructing lane 0, initialize to start from poison.
if (State.Instance->Lane == 0) {
- Value *Undef = UndefValue::get(
- FixedVectorType::get(Ingredient->getType(), State.VF));
- State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef);
+ assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
+ Value *Poison = PoisonValue::get(
+ VectorType::get(getUnderlyingValue()->getType(), State.VF));
+ State.ValueMap.setVectorValue(getUnderlyingInstr(),
+ State.Instance->Part, Poison);
}
- State.ILV->packScalarIntoVectorValue(Ingredient, *State.Instance);
+ State.ILV->packScalarIntoVectorValue(getUnderlyingInstr(),
+ *State.Instance);
}
return;
}
@@ -7538,10 +9086,12 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
// Generate scalar instances for all VF lanes of all UF parts, unless the
// instruction is uniform inwhich case generate only the first lane for each
// of the UF parts.
- unsigned EndLane = IsUniform ? 1 : State.VF;
+ unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue();
+ assert((!State.VF.isScalable() || IsUniform) &&
+ "Can't scalarize a scalable vector");
for (unsigned Part = 0; Part < State.UF; ++Part)
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
- State.ILV->scalarizeInstruction(Ingredient, User, {Part, Lane},
+ State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, {Part, Lane},
IsPredicated, State);
}
@@ -7573,8 +9123,8 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
assert(State.Instance && "Predicated instruction PHI works per instance.");
- Instruction *ScalarPredInst = cast<Instruction>(
- State.ValueMap.getScalarValue(PredInst, *State.Instance));
+ Instruction *ScalarPredInst =
+ cast<Instruction>(State.get(getOperand(0), *State.Instance));
BasicBlock *PredicatedBB = ScalarPredInst->getParent();
BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
assert(PredicatingBB && "Predicated block has no single predecessor.");
@@ -7586,6 +9136,8 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
// also do that packing, thereby "hoisting" the insert-element sequence.
// Otherwise, a phi node for the scalar value is needed.
unsigned Part = State.Instance->Part;
+ Instruction *PredInst =
+ cast<Instruction>(getOperand(0)->getUnderlyingValue());
if (State.ValueMap.hasVectorValue(PredInst, Part)) {
Value *VectorValue = State.ValueMap.getVectorValue(PredInst, Part);
InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
@@ -7596,16 +9148,17 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
} else {
Type *PredInstType = PredInst->getType();
PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
- Phi->addIncoming(UndefValue::get(ScalarPredInst->getType()), PredicatingBB);
+ Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), PredicatingBB);
Phi->addIncoming(ScalarPredInst, PredicatedBB);
State.ValueMap.resetScalarValue(PredInst, *State.Instance, Phi);
}
}
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
- VPValue *StoredValue = isa<StoreInst>(Instr) ? getStoredValue() : nullptr;
- State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), StoredValue,
- getMask());
+ VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
+ State.ILV->vectorizeMemoryInstruction(&Ingredient, State,
+ StoredValue ? nullptr : getVPValue(),
+ getAddr(), StoredValue, getMask());
}
// Determine how to lower the scalar epilogue, which depends on 1) optimising
@@ -7617,35 +9170,53 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
LoopVectorizationLegality &LVL) {
- bool OptSize =
- F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
- PGSOQueryType::IRPass);
// 1) OptSize takes precedence over all other options, i.e. if this is set,
// don't look at hints or options, and don't request a scalar epilogue.
- if (OptSize)
+ // (For PGSO, as shouldOptimizeForSize isn't currently accessible from
+ // LoopAccessInfo (due to code dependency and not being able to reliably get
+ // PSI/BFI from a loop analysis under NPM), we cannot suppress the collection
+ // of strides in LoopAccessInfo::analyzeLoop() and vectorize without
+ // versioning when the vectorization is forced, unlike hasOptSize. So revert
+ // back to the old way and vectorize with versioning when forced. See D81345.)
+ if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+ PGSOQueryType::IRPass) &&
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled))
return CM_ScalarEpilogueNotAllowedOptSize;
- bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() &&
- !PreferPredicateOverEpilog;
+ // 2) If set, obey the directives
+ if (PreferPredicateOverEpilogue.getNumOccurrences()) {
+ switch (PreferPredicateOverEpilogue) {
+ case PreferPredicateTy::ScalarEpilogue:
+ return CM_ScalarEpilogueAllowed;
+ case PreferPredicateTy::PredicateElseScalarEpilogue:
+ return CM_ScalarEpilogueNotNeededUsePredicate;
+ case PreferPredicateTy::PredicateOrDontVectorize:
+ return CM_ScalarEpilogueNotAllowedUsePredicate;
+ };
+ }
- // 2) Next, if disabling predication is requested on the command line, honour
- // this and request a scalar epilogue.
- if (PredicateOptDisabled)
+ // 3) If set, obey the hints
+ switch (Hints.getPredicate()) {
+ case LoopVectorizeHints::FK_Enabled:
+ return CM_ScalarEpilogueNotNeededUsePredicate;
+ case LoopVectorizeHints::FK_Disabled:
return CM_ScalarEpilogueAllowed;
+ };
- // 3) and 4) look if enabling predication is requested on the command line,
- // with a loop hint, or if the TTI hook indicates this is profitable, request
- // predication .
- if (PreferPredicateOverEpilog ||
- Hints.getPredicate() == LoopVectorizeHints::FK_Enabled ||
- (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT,
- LVL.getLAI()) &&
- Hints.getPredicate() != LoopVectorizeHints::FK_Disabled))
+ // 4) if the TTI hook indicates this is profitable, request predication.
+ if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT,
+ LVL.getLAI()))
return CM_ScalarEpilogueNotNeededUsePredicate;
return CM_ScalarEpilogueAllowed;
}
+void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V,
+ unsigned Part) {
+ set(Def, V, Part);
+ ILV->setVectorValue(IRDef, Part, V);
+}
+
// Process the loop in the VPlan-native vectorization path. This path builds
// VPlan upfront in the vectorization pipeline, which allows to apply
// VPlan-to-VPlan transformations from the very beginning without modifying the
@@ -7657,7 +9228,7 @@ static bool processLoopInVPlanNativePath(
OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) {
- if (PSE.getBackedgeTakenCount() == PSE.getSE()->getCouldNotCompute()) {
+ if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
LLVM_DEBUG(dbgs() << "LV: cannot compute the outer-loop trip count\n");
return false;
}
@@ -7676,7 +9247,7 @@ static bool processLoopInVPlanNativePath(
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE);
// Get user vectorization factor.
- const unsigned UserVF = Hints.getWidth();
+ ElementCount UserVF = Hints.getWidth();
// Plan how to best vectorize, return the best VF and its cost.
const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF);
@@ -7691,7 +9262,7 @@ static bool processLoopInVPlanNativePath(
LVP.setBestPlan(VF.Width, 1);
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
- &CM);
+ &CM, BFI, PSI);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
LVP.executePlan(LB, DT);
@@ -7710,7 +9281,7 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
!EnableLoopVectorization) {}
bool LoopVectorizePass::processLoop(Loop *L) {
- assert((EnableVPlanNativePath || L->empty()) &&
+ assert((EnableVPlanNativePath || L->isInnermost()) &&
"VPlan-native path is not enabled. Only process inner loops.");
#ifndef NDEBUG
@@ -7755,7 +9326,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check if it is legal to vectorize the loop.
LoopVectorizationRequirements Requirements(*ORE);
LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
- &Requirements, &Hints, DB, AC);
+ &Requirements, &Hints, DB, AC, BFI, PSI);
if (!LVL.canVectorize(EnableVPlanNativePath)) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
Hints.emitRemarkWithHints();
@@ -7772,11 +9343,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// even evaluating whether vectorization is profitable. Since we cannot modify
// the incoming IR, we need to build VPlan upfront in the vectorization
// pipeline.
- if (!L->empty())
+ if (!L->isInnermost())
return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
ORE, BFI, PSI, Hints);
- assert(L->empty() && "Inner loop expected.");
+ assert(L->isInnermost() && "Inner loop expected.");
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
// count by optimizing for size, to minimize overheads.
@@ -7841,7 +9412,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE);
// Get user vectorization factor and interleave count.
- unsigned UserVF = Hints.getWidth();
+ ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
// Plan how to best vectorize, return the best VF and its cost.
@@ -7866,7 +9437,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- if (VF.Width == 1) {
+ if (VF.Width.isScalar()) {
LLVM_DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
VecDiagMsg = std::make_pair(
"VectorizationNotBeneficial",
@@ -7955,8 +9526,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
assert(IC > 1 && "interleave count should not be 1 or 0");
// If we decided that it is not legal to vectorize the loop, then
// interleave it.
- InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
- &CM);
+ InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM,
+ BFI, PSI);
LVP.executePlan(Unroller, DT);
ORE->emit([&]() {
@@ -7967,16 +9538,51 @@ bool LoopVectorizePass::processLoop(Loop *L) {
});
} else {
// If we decided that it is *legal* to vectorize the loop, then do it.
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
- &LVL, &CM);
- LVP.executePlan(LB, DT);
- ++LoopsVectorized;
- // Add metadata to disable runtime unrolling a scalar loop when there are
- // no runtime checks about strides and memory. A scalar loop that is
- // rarely used is not worth unrolling.
- if (!LB.areSafetyChecksAdded())
- DisableRuntimeUnroll = true;
+ // Consider vectorizing the epilogue too if it's profitable.
+ VectorizationFactor EpilogueVF =
+ CM.selectEpilogueVectorizationFactor(VF.Width, LVP);
+ if (EpilogueVF.Width.isVector()) {
+
+ // The first pass vectorizes the main loop and creates a scalar epilogue
+ // to be vectorized by executing the plan (potentially with a different
+ // factor) again shortly afterwards.
+ EpilogueLoopVectorizationInfo EPI(VF.Width.getKnownMinValue(), IC,
+ EpilogueVF.Width.getKnownMinValue(), 1);
+ EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI,
+ &LVL, &CM, BFI, PSI);
+
+ LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF);
+ LVP.executePlan(MainILV, DT);
+ ++LoopsVectorized;
+
+ simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);
+ formLCSSARecursively(*L, *DT, LI, SE);
+
+ // Second pass vectorizes the epilogue and adjusts the control flow
+ // edges from the first pass.
+ LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF);
+ EPI.MainLoopVF = EPI.EpilogueVF;
+ EPI.MainLoopUF = EPI.EpilogueUF;
+ EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
+ ORE, EPI, &LVL, &CM, BFI, PSI);
+ LVP.executePlan(EpilogILV, DT);
+ ++LoopsEpilogueVectorized;
+
+ if (!MainILV.areSafetyChecksAdded())
+ DisableRuntimeUnroll = true;
+ } else {
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
+ &LVL, &CM, BFI, PSI);
+ LVP.executePlan(LB, DT);
+ ++LoopsVectorized;
+
+ // Add metadata to disable runtime unrolling a scalar loop when there are
+ // no runtime checks about strides and memory. A scalar loop that is
+ // rarely used is not worth unrolling.
+ if (!LB.areSafetyChecksAdded())
+ DisableRuntimeUnroll = true;
+ }
// Report the vectorization decision.
ORE->emit([&]() {
@@ -8090,7 +9696,8 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f950d0d4eb2b..0b630197911a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17,11 +17,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
@@ -29,14 +26,16 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -47,7 +46,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
-#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -66,7 +64,6 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
-#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -83,6 +80,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -130,6 +128,10 @@ static cl::opt<int>
MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
+static cl::opt<unsigned>
+MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
+ cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
+
static cl::opt<int>
MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
cl::desc("Maximum depth of the lookup for consecutive stores."));
@@ -204,12 +206,12 @@ static bool allSameBlock(ArrayRef<Value *> VL) {
if (!I0)
return false;
BasicBlock *BB = I0->getParent();
- for (int i = 1, e = VL.size(); i < e; i++) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- if (!I)
+ for (int I = 1, E = VL.size(); I < E; I++) {
+ auto *II = dyn_cast<Instruction>(VL[I]);
+ if (!II)
return false;
- if (BB != I->getParent())
+ if (BB != II->getParent())
return false;
}
return true;
@@ -234,11 +236,16 @@ static bool isSplat(ArrayRef<Value *> VL) {
return true;
}
-/// \returns True if \p I is commutative, handles CmpInst as well as Instruction.
+/// \returns True if \p I is commutative, handles CmpInst and BinaryOperator.
static bool isCommutative(Instruction *I) {
- if (auto *IC = dyn_cast<CmpInst>(I))
- return IC->isCommutative();
- return I->isCommutative();
+ if (auto *Cmp = dyn_cast<CmpInst>(I))
+ return Cmp->isCommutative();
+ if (auto *BO = dyn_cast<BinaryOperator>(I))
+ return BO->isCommutative();
+ // TODO: This should check for generic Instruction::isCommutative(), but
+ // we need to confirm that the caller code correctly handles Intrinsics
+ // for example (does not have 2 operands).
+ return false;
}
/// Checks if the vector of instructions can be represented as a shuffle, like:
@@ -250,7 +257,7 @@ static bool isCommutative(Instruction *I) {
/// %x3x3 = mul i8 %x3, %x3
/// %y1y1 = mul i8 %y1, %y1
/// %y2y2 = mul i8 %y2, %y2
-/// %ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0
+/// %ins1 = insertelement <4 x i8> poison, i8 %x0x0, i32 0
/// %ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
/// %ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
/// %ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
@@ -265,13 +272,13 @@ static bool isCommutative(Instruction *I) {
/// %x3 = extractelement <4 x i8> %x, i32 3
/// %y1 = extractelement <4 x i8> %y, i32 1
/// %y2 = extractelement <4 x i8> %y, i32 2
-/// %1 = insertelement <4 x i8> undef, i8 %x0, i32 0
+/// %1 = insertelement <4 x i8> poison, i8 %x0, i32 0
/// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2
/// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3
/// %5 = mul <4 x i8> %4, %4
/// %6 = extractelement <4 x i8> %5, i32 0
-/// %ins1 = insertelement <4 x i8> undef, i8 %6, i32 0
+/// %ins1 = insertelement <4 x i8> poison, i8 %6, i32 0
/// %7 = extractelement <4 x i8> %5, i32 1
/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1
/// %8 = extractelement <4 x i8> %5, i32 2
@@ -285,7 +292,8 @@ static bool isCommutative(Instruction *I) {
static Optional<TargetTransformInfo::ShuffleKind>
isShuffle(ArrayRef<Value *> VL) {
auto *EI0 = cast<ExtractElementInst>(VL[0]);
- unsigned Size = EI0->getVectorOperandType()->getNumElements();
+ unsigned Size =
+ cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
Value *Vec1 = nullptr;
Value *Vec2 = nullptr;
enum ShuffleMode { Unknown, Select, Permute };
@@ -294,7 +302,7 @@ isShuffle(ArrayRef<Value *> VL) {
auto *EI = cast<ExtractElementInst>(VL[I]);
auto *Vec = EI->getVectorOperand();
// All vector operands must have the same number of vector elements.
- if (cast<VectorType>(Vec->getType())->getNumElements() != Size)
+ if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
return None;
auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
if (!Idx)
@@ -303,7 +311,7 @@ isShuffle(ArrayRef<Value *> VL) {
if (Idx->getValue().uge(Size))
continue;
unsigned IntIdx = Idx->getValue().getZExtValue();
- // We can extractelement from undef vector.
+ // We can extractelement from undef or poison vector.
if (isa<UndefValue>(Vec))
continue;
// For correct shuffling we have to have at most 2 different vector operands
@@ -500,7 +508,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
}
/// \returns the AA location that is being access by the instruction.
-static MemoryLocation getLocation(Instruction *I, AliasAnalysis *AA) {
+static MemoryLocation getLocation(Instruction *I, AAResults *AA) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return MemoryLocation::get(SI);
if (LoadInst *LI = dyn_cast<LoadInst>(I))
@@ -521,6 +529,15 @@ static bool isSimple(Instruction *I) {
namespace llvm {
+static void inversePermutation(ArrayRef<unsigned> Indices,
+ SmallVectorImpl<int> &Mask) {
+ Mask.clear();
+ const unsigned E = Indices.size();
+ Mask.resize(E, E + 1);
+ for (unsigned I = 0; I < E; ++I)
+ Mask[Indices[I]] = I;
+}
+
namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
@@ -535,9 +552,10 @@ public:
using StoreList = SmallVector<StoreInst *, 8>;
using ExtraValueToDebugLocsMap =
MapVector<Value *, SmallVector<Instruction *, 2>>;
+ using OrdersType = SmallVector<unsigned, 4>;
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
- TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
+ TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li,
DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB,
const DataLayout *DL, OptimizationRemarkEmitter *ORE)
: F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC),
@@ -571,11 +589,11 @@ public:
/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
- int getSpillCost() const;
+ InstructionCost getSpillCost() const;
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
- int getTreeCost();
+ InstructionCost getTreeCost();
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
@@ -612,6 +630,14 @@ public:
/// \returns The best order of instructions for vectorization.
Optional<ArrayRef<unsigned>> bestOrder() const {
+ assert(llvm::all_of(
+ NumOpsWantToKeepOrder,
+ [this](const decltype(NumOpsWantToKeepOrder)::value_type &D) {
+ return D.getFirst().size() ==
+ VectorizableTree[0]->Scalars.size();
+ }) &&
+ "All orders must have the same size as number of instructions in "
+ "tree node.");
auto I = std::max_element(
NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(),
[](const decltype(NumOpsWantToKeepOrder)::value_type &D1,
@@ -625,6 +651,81 @@ public:
return makeArrayRef(I->getFirst());
}
+ /// Builds the correct order for root instructions.
+ /// If some leaves have the same instructions to be vectorized, we may
+ /// incorrectly evaluate the best order for the root node (it is built for the
+ /// vector of instructions without repeated instructions and, thus, has less
+ /// elements than the root node). This function builds the correct order for
+ /// the root node.
+ /// For example, if the root node is \<a+b, a+c, a+d, f+e\>, then the leaves
+ /// are \<a, a, a, f\> and \<b, c, d, e\>. When we try to vectorize the first
+ /// leaf, it will be shrink to \<a, b\>. If instructions in this leaf should
+ /// be reordered, the best order will be \<1, 0\>. We need to extend this
+ /// order for the root node. For the root node this order should look like
+ /// \<3, 0, 1, 2\>. This function extends the order for the reused
+ /// instructions.
+ void findRootOrder(OrdersType &Order) {
+ // If the leaf has the same number of instructions to vectorize as the root
+ // - order must be set already.
+ unsigned RootSize = VectorizableTree[0]->Scalars.size();
+ if (Order.size() == RootSize)
+ return;
+ SmallVector<unsigned, 4> RealOrder(Order.size());
+ std::swap(Order, RealOrder);
+ SmallVector<int, 4> Mask;
+ inversePermutation(RealOrder, Mask);
+ Order.assign(Mask.begin(), Mask.end());
+ // The leaf has less number of instructions - need to find the true order of
+ // the root.
+ // Scan the nodes starting from the leaf back to the root.
+ const TreeEntry *PNode = VectorizableTree.back().get();
+ SmallVector<const TreeEntry *, 4> Nodes(1, PNode);
+ SmallPtrSet<const TreeEntry *, 4> Visited;
+ while (!Nodes.empty() && Order.size() != RootSize) {
+ const TreeEntry *PNode = Nodes.pop_back_val();
+ if (!Visited.insert(PNode).second)
+ continue;
+ const TreeEntry &Node = *PNode;
+ for (const EdgeInfo &EI : Node.UserTreeIndices)
+ if (EI.UserTE)
+ Nodes.push_back(EI.UserTE);
+ if (Node.ReuseShuffleIndices.empty())
+ continue;
+ // Build the order for the parent node.
+ OrdersType NewOrder(Node.ReuseShuffleIndices.size(), RootSize);
+ SmallVector<unsigned, 4> OrderCounter(Order.size(), 0);
+ // The algorithm of the order extension is:
+ // 1. Calculate the number of the same instructions for the order.
+ // 2. Calculate the index of the new order: total number of instructions
+ // with order less than the order of the current instruction + reuse
+ // number of the current instruction.
+ // 3. The new order is just the index of the instruction in the original
+ // vector of the instructions.
+ for (unsigned I : Node.ReuseShuffleIndices)
+ ++OrderCounter[Order[I]];
+ SmallVector<unsigned, 4> CurrentCounter(Order.size(), 0);
+ for (unsigned I = 0, E = Node.ReuseShuffleIndices.size(); I < E; ++I) {
+ unsigned ReusedIdx = Node.ReuseShuffleIndices[I];
+ unsigned OrderIdx = Order[ReusedIdx];
+ unsigned NewIdx = 0;
+ for (unsigned J = 0; J < OrderIdx; ++J)
+ NewIdx += OrderCounter[J];
+ NewIdx += CurrentCounter[OrderIdx];
+ ++CurrentCounter[OrderIdx];
+ assert(NewOrder[NewIdx] == RootSize &&
+ "The order index should not be written already.");
+ NewOrder[NewIdx] = I;
+ }
+ std::swap(Order, NewOrder);
+ }
+ assert(Order.size() == RootSize &&
+ "Root node is expected or the size of the order must be the same as "
+ "the number of elements in the root node.");
+ assert(llvm::all_of(Order,
+ [RootSize](unsigned Val) { return Val != RootSize; }) &&
+ "All indices must be initialized");
+ }
+
/// \return The vector element size in bits to use when vectorizing the
/// expression tree ending at \p V. If V is a store, the size is the width of
/// the stored value. Otherwise, the size is the width of the largest loaded
@@ -646,6 +747,12 @@ public:
return MinVecRegSize;
}
+ unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
+ unsigned MaxVF = MaxVFOption.getNumOccurrences() ?
+ MaxVFOption : TTI->getMaximumVF(ElemWidth, Opcode);
+ return MaxVF ? MaxVF : UINT_MAX;
+ }
+
/// Check if homogeneous aggregate is isomorphic to some VectorType.
/// Accepts homogeneous multidimensional aggregate of scalars/vectors like
/// {[4 x i16], [4 x i16]}, { <2 x float>, <2 x float> },
@@ -665,7 +772,7 @@ public:
/// effectively impossible for the backend to undo.
/// TODO: If load combining is allowed in the IR optimizer, this analysis
/// may not be necessary.
- bool isLoadCombineReductionCandidate(unsigned ReductionOpcode) const;
+ bool isLoadCombineReductionCandidate(RecurKind RdxKind) const;
/// Assume that a vector of stores of bitwise-or/shifted/zexted loaded values
/// can be load combined in the backend. Load combining may not be allowed in
@@ -880,6 +987,14 @@ public:
std::array<std::pair<Value *, int>, 2> Values = {{LHS, RHS}};
for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) {
Value *V = Values[Idx].first;
+ if (isa<Constant>(V)) {
+ // Since this is a function pass, it doesn't make semantic sense to
+ // walk the users of a subclass of Constant. The users could be in
+ // another function, or even another module that happens to be in
+ // the same LLVMContext.
+ continue;
+ }
+
// Calculate the absolute lane, using the minimum relative lane of LHS
// and RHS as base and Idx as the offset.
int Ln = std::min(LHS.second, RHS.second) + Idx;
@@ -1388,7 +1503,7 @@ private:
bool areAllUsersVectorized(Instruction *I) const;
/// \returns the cost of the vectorizable entry.
- int getEntryCost(TreeEntry *E);
+ InstructionCost getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
@@ -1410,20 +1525,21 @@ private:
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.
- int getGatherCost(VectorType *Ty,
- const DenseSet<unsigned> &ShuffledIndices) const;
+ InstructionCost
+ getGatherCost(FixedVectorType *Ty,
+ const DenseSet<unsigned> &ShuffledIndices) const;
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
- int getGatherCost(ArrayRef<Value *> VL) const;
+ InstructionCost getGatherCost(ArrayRef<Value *> VL) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
void setInsertPointAfterBundle(TreeEntry *E);
/// \returns a vector from a collection of scalars in \p VL.
- Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
+ Value *gather(ArrayRef<Value *> VL);
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
@@ -1457,15 +1573,17 @@ private:
/// The Scalars are vectorized into this value. It is initialized to Null.
Value *VectorizedValue = nullptr;
- /// Do we need to gather this sequence ?
- enum EntryState { Vectorize, NeedToGather };
+ /// Do we need to gather this sequence or vectorize it
+ /// (either with vector instruction or with scatter/gather
+ /// intrinsics for store/load)?
+ enum EntryState { Vectorize, ScatterVectorize, NeedToGather };
EntryState State;
/// Does this sequence require some shuffling?
SmallVector<int, 4> ReuseShuffleIndices;
/// Does this entry require reordering?
- ArrayRef<unsigned> ReorderIndices;
+ SmallVector<unsigned, 4> ReorderIndices;
/// Points back to the VectorizableTree.
///
@@ -1606,6 +1724,9 @@ private:
case Vectorize:
dbgs() << "Vectorize\n";
break;
+ case ScatterVectorize:
+ dbgs() << "ScatterVectorize\n";
+ break;
case NeedToGather:
dbgs() << "NeedToGather\n";
break;
@@ -1627,7 +1748,7 @@ private:
dbgs() << "NULL\n";
dbgs() << "ReuseShuffleIndices: ";
if (ReuseShuffleIndices.empty())
- dbgs() << "Emtpy";
+ dbgs() << "Empty";
else
for (unsigned ReuseIdx : ReuseShuffleIndices)
dbgs() << ReuseIdx << ", ";
@@ -1644,26 +1765,55 @@ private:
#endif
};
+#ifndef NDEBUG
+ void dumpTreeCosts(TreeEntry *E, InstructionCost ReuseShuffleCost,
+ InstructionCost VecCost,
+ InstructionCost ScalarCost) const {
+ dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump();
+ dbgs() << "SLP: Costs:\n";
+ dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n";
+ dbgs() << "SLP: VectorCost = " << VecCost << "\n";
+ dbgs() << "SLP: ScalarCost = " << ScalarCost << "\n";
+ dbgs() << "SLP: ReuseShuffleCost + VecCost - ScalarCost = " <<
+ ReuseShuffleCost + VecCost - ScalarCost << "\n";
+ }
+#endif
+
/// Create a new VectorizableTree entry.
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, Optional<ScheduleData *> Bundle,
const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
ArrayRef<unsigned> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
- bool Vectorized = (bool)Bundle;
+ TreeEntry::EntryState EntryState =
+ Bundle ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
+ return newTreeEntry(VL, EntryState, Bundle, S, UserTreeIdx,
+ ReuseShuffleIndices, ReorderIndices);
+ }
+
+ TreeEntry *newTreeEntry(ArrayRef<Value *> VL,
+ TreeEntry::EntryState EntryState,
+ Optional<ScheduleData *> Bundle,
+ const InstructionsState &S,
+ const EdgeInfo &UserTreeIdx,
+ ArrayRef<unsigned> ReuseShuffleIndices = None,
+ ArrayRef<unsigned> ReorderIndices = None) {
+ assert(((!Bundle && EntryState == TreeEntry::NeedToGather) ||
+ (Bundle && EntryState != TreeEntry::NeedToGather)) &&
+ "Need to vectorize gather entry?");
VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
TreeEntry *Last = VectorizableTree.back().get();
Last->Idx = VectorizableTree.size() - 1;
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
- Last->State = Vectorized ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
+ Last->State = EntryState;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
ReuseShuffleIndices.end());
- Last->ReorderIndices = ReorderIndices;
+ Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
Last->setOperations(S);
- if (Vectorized) {
- for (int i = 0, e = VL.size(); i != e; ++i) {
- assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
- ScalarToTreeEntry[VL[i]] = Last;
+ if (Last->State != TreeEntry::NeedToGather) {
+ for (Value *V : VL) {
+ assert(!getTreeEntry(V) && "Scalar already in tree!");
+ ScalarToTreeEntry[V] = Last;
}
// Update the scheduler bundle to point to this TreeEntry.
unsigned Lane = 0;
@@ -1699,18 +1849,10 @@ private:
}
#endif
- TreeEntry *getTreeEntry(Value *V) {
- auto I = ScalarToTreeEntry.find(V);
- if (I != ScalarToTreeEntry.end())
- return I->second;
- return nullptr;
- }
+ TreeEntry *getTreeEntry(Value *V) { return ScalarToTreeEntry.lookup(V); }
const TreeEntry *getTreeEntry(Value *V) const {
- auto I = ScalarToTreeEntry.find(V);
- if (I != ScalarToTreeEntry.end())
- return I->second;
- return nullptr;
+ return ScalarToTreeEntry.lookup(V);
}
/// Maps a specific scalar to its tree entry.
@@ -2195,7 +2337,6 @@ private:
/// List of users to ignore during scheduling and that don't need extracting.
ArrayRef<Value *> UserIgnoreList;
- using OrdersType = SmallVector<unsigned, 4>;
/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of
/// sorted SmallVectors of unsigned.
struct OrdersTypeDenseMapInfo {
@@ -2233,7 +2374,7 @@ private:
ScalarEvolution *SE;
TargetTransformInfo *TTI;
TargetLibraryInfo *TLI;
- AliasAnalysis *AA;
+ AAResults *AA;
LoopInfo *LI;
DominatorTree *DT;
AssumptionCache *AC;
@@ -2332,9 +2473,9 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
}
for (auto V : Entry->Scalars) {
OS << *V;
- if (std::any_of(
- R->ExternalUses.begin(), R->ExternalUses.end(),
- [&](const BoUpSLP::ExternalUser &EU) { return EU.Scalar == V; }))
+ if (llvm::any_of(R->ExternalUses, [&](const BoUpSLP::ExternalUser &EU) {
+ return EU.Scalar == V;
+ }))
OS << " <extract>";
OS << "\n";
}
@@ -2366,13 +2507,17 @@ BoUpSLP::~BoUpSLP() {
"trying to erase instruction with users.");
Pair.getFirst()->eraseFromParent();
}
+#ifdef EXPENSIVE_CHECKS
+ // If we could guarantee that this call is not extremely slow, we could
+ // remove the ifdef limitation (see PR47712).
assert(!verifyFunction(*F, &dbgs()));
+#endif
}
void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
for (auto *V : AV) {
if (auto *I = dyn_cast<Instruction>(V))
- eraseInstruction(I, /*ReplaceWithUndef=*/true);
+ eraseInstruction(I, /*ReplaceOpsWithUndef=*/true);
};
}
@@ -2597,11 +2742,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
auto *PH = cast<PHINode>(VL0);
// Check for terminator values (e.g. invoke).
- for (unsigned j = 0; j < VL.size(); ++j)
- for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
+ for (Value *V : VL)
+ for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
Instruction *Term = dyn_cast<Instruction>(
- cast<PHINode>(VL[j])->getIncomingValueForBlock(
- PH->getIncomingBlock(i)));
+ cast<PHINode>(V)->getIncomingValueForBlock(
+ PH->getIncomingBlock(I)));
if (Term && Term->isTerminator()) {
LLVM_DEBUG(dbgs()
<< "SLP: Need to swizzle PHINodes (terminator use).\n");
@@ -2618,13 +2763,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Keeps the reordered operands to avoid code duplication.
SmallVector<ValueList, 2> OperandsVec;
- for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
+ for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
ValueList Operands;
// Prepare the operand vector.
- for (Value *j : VL)
- Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
- PH->getIncomingBlock(i)));
- TE->setOperand(i, Operands);
+ for (Value *V : VL)
+ Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(
+ PH->getIncomingBlock(I)));
+ TE->setOperand(I, Operands);
OperandsVec.push_back(Operands);
}
for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
@@ -2657,12 +2802,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
});
// Insert new order with initial value 0, if it does not exist,
// otherwise return the iterator to the existing one.
- auto StoredCurrentOrderAndNum =
- NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
- ++StoredCurrentOrderAndNum->getSecond();
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies,
- StoredCurrentOrderAndNum->getFirst());
+ ReuseShuffleIndicies, CurrentOrder);
+ findRootOrder(CurrentOrder);
+ ++NumOpsWantToKeepOrder[CurrentOrder];
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTree_rec() towards the operands.
ValueList Op0;
@@ -2739,16 +2882,23 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
} else {
// Need to reorder.
- auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
- ++I->getSecond();
TreeEntry *TE =
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies, I->getFirst());
+ ReuseShuffleIndicies, CurrentOrder);
TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
+ findRootOrder(CurrentOrder);
+ ++NumOpsWantToKeepOrder[CurrentOrder];
}
return;
}
+ // Vectorizing non-consecutive loads with `llvm.masked.gather`.
+ TreeEntry *TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
+ UserTreeIdx, ReuseShuffleIndicies);
+ TE->setOperandsInOrder();
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
+ return;
}
LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
@@ -2883,8 +3033,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
- for (Value *j : VL)
- Operands.push_back(cast<Instruction>(j)->getOperand(i));
+ for (Value *V : VL)
+ Operands.push_back(cast<Instruction>(V)->getOperand(i));
buildTree_rec(Operands, Depth + 1, {TE, i});
}
@@ -2952,6 +3102,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::Store: {
// Check if the stores are consecutive or if we need to swizzle them.
llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();
+ // Avoid types that are padded when being allocated as scalars, while
+ // being packed together in a vector (such as i1).
+ if (DL->getTypeSizeInBits(ScalarTy) !=
+ DL->getTypeAllocSizeInBits(ScalarTy)) {
+ BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n");
+ return;
+ }
// Make sure all stores in the bundle are simple - we can't vectorize
// atomic or volatile stores.
SmallVector<Value *, 4> PointerOps(VL.size());
@@ -3001,15 +3161,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
buildTree_rec(Operands, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
} else {
- // Need to reorder.
- auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
- ++(I->getSecond());
TreeEntry *TE =
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies, I->getFirst());
+ ReuseShuffleIndicies, CurrentOrder);
TE->setOperandsInOrder();
buildTree_rec(Operands, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
+ findRootOrder(CurrentOrder);
+ ++NumOpsWantToKeepOrder[CurrentOrder];
}
return;
}
@@ -3028,7 +3187,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
VFShape Shape = VFShape::get(
- *CI, {static_cast<unsigned int>(VL.size()), false /*Scalable*/},
+ *CI, ElementCount::getFixed(static_cast<unsigned int>(VL.size())),
false /*HasGlobalPred*/);
Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
@@ -3165,7 +3324,7 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
N *= AT->getNumElements();
EltTy = AT->getElementType();
} else {
- auto *VT = cast<VectorType>(EltTy);
+ auto *VT = cast<FixedVectorType>(EltTy);
N *= VT->getNumElements();
EltTy = VT->getElementType();
}
@@ -3203,7 +3362,7 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
if (!LI || !LI->isSimple() || !LI->hasNUses(VL.size()))
return false;
} else {
- NElts = cast<VectorType>(Vec->getType())->getNumElements();
+ NElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
}
if (NElts != VL.size())
@@ -3247,27 +3406,26 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
}
bool BoUpSLP::areAllUsersVectorized(Instruction *I) const {
- return I->hasOneUse() ||
- std::all_of(I->user_begin(), I->user_end(), [this](User *U) {
+ return I->hasOneUse() || llvm::all_of(I->users(), [this](User *U) {
return ScalarToTreeEntry.count(U) > 0;
});
}
-static std::pair<unsigned, unsigned>
-getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI,
- TargetLibraryInfo *TLI) {
+static std::pair<InstructionCost, InstructionCost>
+getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
+ TargetTransformInfo *TTI, TargetLibraryInfo *TLI) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// Calculate the cost of the scalar and vector calls.
- IntrinsicCostAttributes CostAttrs(ID, *CI, VecTy->getNumElements());
- int IntrinsicCost =
+ IntrinsicCostAttributes CostAttrs(ID, *CI, VecTy->getElementCount());
+ auto IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
- auto Shape =
- VFShape::get(*CI, {static_cast<unsigned>(VecTy->getNumElements()), false},
- false /*HasGlobalPred*/);
+ auto Shape = VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
+ VecTy->getNumElements())),
+ false /*HasGlobalPred*/);
Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
- int LibCost = IntrinsicCost;
+ auto LibCost = IntrinsicCost;
if (!CI->isNoBuiltin() && VecFunc) {
// Calculate the cost of the vector library call.
SmallVector<Type *, 4> VecTys;
@@ -3282,7 +3440,7 @@ getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI,
return {IntrinsicCost, LibCost};
}
-int BoUpSLP::getEntryCost(TreeEntry *E) {
+InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
ArrayRef<Value*> VL = E->Scalars;
Type *ScalarTy = VL[0]->getType();
@@ -3301,7 +3459,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
- int ReuseShuffleCost = 0;
+ InstructionCost ReuseShuffleCost = 0;
if (NeedToShuffleReuses) {
ReuseShuffleCost =
TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
@@ -3317,7 +3475,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
allSameType(VL) && allSameBlock(VL)) {
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
if (ShuffleKind.hasValue()) {
- int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
+ InstructionCost Cost =
+ TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
for (auto *V : VL) {
// If all users of instruction are going to be vectorized and this
// instruction itself is not going to be vectorized, consider this
@@ -3336,7 +3495,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
return ReuseShuffleCost + getGatherCost(VL);
}
- assert(E->State == TreeEntry::Vectorize && "Unhandled state");
+ assert((E->State == TreeEntry::Vectorize ||
+ E->State == TreeEntry::ScatterVectorize) &&
+ "Unhandled state");
assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
Instruction *VL0 = E->getMainOp();
unsigned ShuffleOrOp =
@@ -3375,37 +3536,37 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, Idx);
}
}
- int DeadCost = ReuseShuffleCost;
+ InstructionCost DeadCost = ReuseShuffleCost;
if (!E->ReorderIndices.empty()) {
// TODO: Merge this shuffle with the ReuseShuffleCost.
DeadCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
}
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- Instruction *E = cast<Instruction>(VL[i]);
+ for (unsigned I = 0, E = VL.size(); I < E; ++I) {
+ Instruction *EI = cast<Instruction>(VL[I]);
// If all users are going to be vectorized, instruction can be
// considered as dead.
// The same, if have only one user, it will be vectorized for sure.
- if (areAllUsersVectorized(E)) {
+ if (areAllUsersVectorized(EI)) {
// Take credit for instruction that will become dead.
- if (E->hasOneUse()) {
- Instruction *Ext = E->user_back();
+ if (EI->hasOneUse()) {
+ Instruction *Ext = EI->user_back();
if ((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
all_of(Ext->users(),
[](User *U) { return isa<GetElementPtrInst>(U); })) {
// Use getExtractWithExtendCost() to calculate the cost of
// extractelement/ext pair.
DeadCost -= TTI->getExtractWithExtendCost(
- Ext->getOpcode(), Ext->getType(), VecTy, i);
+ Ext->getOpcode(), Ext->getType(), VecTy, I);
// Add back the cost of s|zext which is subtracted separately.
DeadCost += TTI->getCastInstrCost(
- Ext->getOpcode(), Ext->getType(), E->getType(), CostKind,
- Ext);
+ Ext->getOpcode(), Ext->getType(), EI->getType(),
+ TTI::getCastContextHint(Ext), CostKind, Ext);
continue;
}
}
DeadCost -=
- TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
+ TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I);
}
}
return DeadCost;
@@ -3423,40 +3584,78 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::FPTrunc:
case Instruction::BitCast: {
Type *SrcTy = VL0->getOperand(0)->getType();
- int ScalarEltCost =
- TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, CostKind,
- VL0);
+ InstructionCost ScalarEltCost =
+ TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
+ TTI::getCastContextHint(VL0), CostKind, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
// Calculate the cost of this instruction.
- int ScalarCost = VL.size() * ScalarEltCost;
+ InstructionCost ScalarCost = VL.size() * ScalarEltCost;
auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
- int VecCost = 0;
+ InstructionCost VecCost = 0;
// Check if the values are candidates to demote.
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
- VecCost = ReuseShuffleCost +
- TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy,
- CostKind, VL0);
+ VecCost =
+ ReuseShuffleCost +
+ TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy,
+ TTI::getCastContextHint(VL0), CostKind, VL0);
}
+ LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
return VecCost - ScalarCost;
}
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select: {
// Calculate the cost of this instruction.
- int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
- Builder.getInt1Ty(),
- CostKind, VL0);
+ InstructionCost ScalarEltCost =
+ TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
+ CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
- int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
- int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
- CostKind, VL0);
+ InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
+
+ // Check if all entries in VL are either compares or selects with compares
+ // as condition that have the same predicates.
+ CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE;
+ bool First = true;
+ for (auto *V : VL) {
+ CmpInst::Predicate CurrentPred;
+ auto MatchCmp = m_Cmp(CurrentPred, m_Value(), m_Value());
+ if ((!match(V, m_Select(MatchCmp, m_Value(), m_Value())) &&
+ !match(V, MatchCmp)) ||
+ (!First && VecPred != CurrentPred)) {
+ VecPred = CmpInst::BAD_ICMP_PREDICATE;
+ break;
+ }
+ First = false;
+ VecPred = CurrentPred;
+ }
+
+ InstructionCost VecCost = TTI->getCmpSelInstrCost(
+ E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0);
+ // Check if it is possible and profitable to use min/max for selects in
+ // VL.
+ //
+ auto IntrinsicAndUse = canConvertToMinOrMaxIntrinsic(VL);
+ if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
+ IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
+ {VecTy, VecTy});
+ InstructionCost IntrinsicCost =
+ TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
+ // If the selects are the only uses of the compares, they will be dead
+ // and we can adjust the cost by removing their cost.
+ if (IntrinsicAndUse.second)
+ IntrinsicCost -=
+ TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, MaskTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ VecCost = std::min(VecCost, IntrinsicCost);
+ }
+ LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
return ReuseShuffleCost + VecCost - ScalarCost;
}
case Instruction::FNeg:
@@ -3516,16 +3715,17 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
SmallVector<const Value *, 4> Operands(VL0->operand_values());
- int ScalarEltCost = TTI->getArithmeticInstrCost(
- E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
- Operands, VL0);
+ InstructionCost ScalarEltCost =
+ TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
+ Op2VK, Op1VP, Op2VP, Operands, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
- int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
- int VecCost = TTI->getArithmeticInstrCost(
- E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
- Operands, VL0);
+ InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
+ InstructionCost VecCost =
+ TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK,
+ Op2VK, Op1VP, Op2VP, Operands, VL0);
+ LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
return ReuseShuffleCost + VecCost - ScalarCost;
}
case Instruction::GetElementPtr: {
@@ -3534,36 +3734,42 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;
- int ScalarEltCost =
- TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind,
- Op1VK, Op2VK);
+ InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
+ Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
- int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
- int VecCost =
- TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind,
- Op1VK, Op2VK);
+ InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
+ InstructionCost VecCost = TTI->getArithmeticInstrCost(
+ Instruction::Add, VecTy, CostKind, Op1VK, Op2VK);
+ LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
return ReuseShuffleCost + VecCost - ScalarCost;
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
Align alignment = cast<LoadInst>(VL0)->getAlign();
- int ScalarEltCost =
- TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0,
- CostKind, VL0);
+ InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
+ Instruction::Load, ScalarTy, alignment, 0, CostKind, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
- int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
- int VecLdCost =
- TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0,
- CostKind, VL0);
+ InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
+ InstructionCost VecLdCost;
+ if (E->State == TreeEntry::Vectorize) {
+ VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0,
+ CostKind, VL0);
+ } else {
+ assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");
+ VecLdCost = TTI->getGatherScatterOpCost(
+ Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
+ /*VariableMask=*/false, alignment, CostKind, VL0);
+ }
if (!E->ReorderIndices.empty()) {
// TODO: Merge this shuffle with the ReuseShuffleCost.
VecLdCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
}
+ LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecLdCost, ScalarLdCost));
return ReuseShuffleCost + VecLdCost - ScalarLdCost;
}
case Instruction::Store: {
@@ -3572,19 +3778,19 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
auto *SI =
cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
Align Alignment = SI->getAlign();
- int ScalarEltCost =
- TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0,
- CostKind, VL0);
+ InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
+ Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0);
if (NeedToShuffleReuses)
ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
- int ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
- int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
- VecTy, Alignment, 0, CostKind, VL0);
+ InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
+ InstructionCost VecStCost = TTI->getMemoryOpCost(
+ Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
if (IsReorder) {
// TODO: Merge this shuffle with the ReuseShuffleCost.
VecStCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
}
+ LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecStCost, ScalarStCost));
return ReuseShuffleCost + VecStCost - ScalarStCost;
}
case Instruction::Call: {
@@ -3592,15 +3798,17 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// Calculate the cost of the scalar and vector calls.
- IntrinsicCostAttributes CostAttrs(ID, *CI, 1, 1);
- int ScalarEltCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
+ IntrinsicCostAttributes CostAttrs(ID, *CI, ElementCount::getFixed(1), 1);
+ InstructionCost ScalarEltCost =
+ TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
- int ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
+ InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
- int VecCallCost = std::min(VecCallCosts.first, VecCallCosts.second);
+ InstructionCost VecCallCost =
+ std::min(VecCallCosts.first, VecCallCosts.second);
LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
@@ -3615,7 +3823,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
(Instruction::isCast(E->getOpcode()) &&
Instruction::isCast(E->getAltOpcode()))) &&
"Invalid Shuffle Vector Operand");
- int ScalarCost = 0;
+ InstructionCost ScalarCost = 0;
if (NeedToShuffleReuses) {
for (unsigned Idx : E->ReuseShuffleIndices) {
Instruction *I = cast<Instruction>(VL[Idx]);
@@ -3633,7 +3841,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
// VecCost is equal to sum of the cost of creating 2 vectors
// and the cost of creating shuffle.
- int VecCost = 0;
+ InstructionCost VecCost = 0;
if (Instruction::isBinaryOp(E->getOpcode())) {
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
@@ -3644,11 +3852,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
- CostKind);
+ TTI::CastContextHint::None, CostKind);
VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
- CostKind);
+ TTI::CastContextHint::None, CostKind);
}
VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0);
+ LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
return ReuseShuffleCost + VecCost - ScalarCost;
}
default:
@@ -3686,11 +3895,13 @@ static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
TargetTransformInfo *TTI) {
// Look past the root to find a source value. Arbitrarily follow the
// path through operand 0 of any 'or'. Also, peek through optional
- // shift-left-by-constant.
+ // shift-left-by-multiple-of-8-bits.
Value *ZextLoad = Root;
+ const APInt *ShAmtC;
while (!isa<ConstantExpr>(ZextLoad) &&
(match(ZextLoad, m_Or(m_Value(), m_Value())) ||
- match(ZextLoad, m_Shl(m_Value(), m_Constant()))))
+ (match(ZextLoad, m_Shl(m_Value(), m_APInt(ShAmtC))) &&
+ ShAmtC->urem(8) == 0)))
ZextLoad = cast<BinaryOperator>(ZextLoad)->getOperand(0);
// Check if the input is an extended load of the required or/shift expression.
@@ -3714,8 +3925,8 @@ static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
return true;
}
-bool BoUpSLP::isLoadCombineReductionCandidate(unsigned RdxOpcode) const {
- if (RdxOpcode != Instruction::Or)
+bool BoUpSLP::isLoadCombineReductionCandidate(RecurKind RdxKind) const {
+ if (RdxKind != RecurKind::Or)
return false;
unsigned NumElts = VectorizableTree[0]->Scalars.size();
@@ -3756,22 +3967,35 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
return true;
}
-int BoUpSLP::getSpillCost() const {
+InstructionCost BoUpSLP::getSpillCost() const {
// Walk from the bottom of the tree to the top, tracking which values are
// live. When we see a call instruction that is not part of our tree,
// query TTI to see if there is a cost to keeping values live over it
// (for example, if spills and fills are required).
unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
- int Cost = 0;
+ InstructionCost Cost = 0;
SmallPtrSet<Instruction*, 4> LiveValues;
Instruction *PrevInst = nullptr;
+ // The entries in VectorizableTree are not necessarily ordered by their
+ // position in basic blocks. Collect them and order them by dominance so later
+ // instructions are guaranteed to be visited first. For instructions in
+ // different basic blocks, we only scan to the beginning of the block, so
+ // their order does not matter, as long as all instructions in a basic block
+ // are grouped together. Using dominance ensures a deterministic order.
+ SmallVector<Instruction *, 16> OrderedScalars;
for (const auto &TEPtr : VectorizableTree) {
Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
if (!Inst)
continue;
+ OrderedScalars.push_back(Inst);
+ }
+ llvm::stable_sort(OrderedScalars, [this](Instruction *A, Instruction *B) {
+ return DT->dominates(B, A);
+ });
+ for (Instruction *Inst : OrderedScalars) {
if (!PrevInst) {
PrevInst = Inst;
continue;
@@ -3825,8 +4049,8 @@ int BoUpSLP::getSpillCost() const {
return Cost;
}
-int BoUpSLP::getTreeCost() {
- int Cost = 0;
+InstructionCost BoUpSLP::getTreeCost() {
+ InstructionCost Cost = 0;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
<< VectorizableTree.size() << ".\n");
@@ -3856,15 +4080,16 @@ int BoUpSLP::getTreeCost() {
}))
continue;
- int C = getEntryCost(&TE);
+ InstructionCost C = getEntryCost(&TE);
+ Cost += C;
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for bundle that starts with " << *TE.Scalars[0]
- << ".\n");
- Cost += C;
+ << ".\n"
+ << "SLP: Current total cost = " << Cost << "\n");
}
SmallPtrSet<Value *, 16> ExtractCostCalculated;
- int ExtractCost = 0;
+ InstructionCost ExtractCost = 0;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
if (!ExtractCostCalculated.insert(EU.Scalar).second)
@@ -3894,39 +4119,42 @@ int BoUpSLP::getTreeCost() {
}
}
- int SpillCost = getSpillCost();
+ InstructionCost SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
- std::string Str;
+#ifndef NDEBUG
+ SmallString<256> Str;
{
- raw_string_ostream OS(Str);
+ raw_svector_ostream OS(Str);
OS << "SLP: Spill Cost = " << SpillCost << ".\n"
<< "SLP: Extract Cost = " << ExtractCost << ".\n"
<< "SLP: Total Cost = " << Cost << ".\n";
}
LLVM_DEBUG(dbgs() << Str);
-
if (ViewSLPTree)
ViewGraph(this, "SLP" + F->getName(), false, Str);
+#endif
return Cost;
}
-int BoUpSLP::getGatherCost(VectorType *Ty,
- const DenseSet<unsigned> &ShuffledIndices) const {
+InstructionCost
+BoUpSLP::getGatherCost(FixedVectorType *Ty,
+ const DenseSet<unsigned> &ShuffledIndices) const {
unsigned NumElts = Ty->getNumElements();
APInt DemandedElts = APInt::getNullValue(NumElts);
- for (unsigned i = 0; i < NumElts; ++i)
- if (!ShuffledIndices.count(i))
- DemandedElts.setBit(i);
- int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
- /*Extract*/ false);
+ for (unsigned I = 0; I < NumElts; ++I)
+ if (!ShuffledIndices.count(I))
+ DemandedElts.setBit(I);
+ InstructionCost Cost =
+ TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
+ /*Extract*/ false);
if (!ShuffledIndices.empty())
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
return Cost;
}
-int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
+InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Find the type of the operands in VL.
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
@@ -3968,11 +4196,10 @@ void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) {
// should be in this block.
auto *Front = E->getMainOp();
auto *BB = Front->getParent();
- assert(llvm::all_of(make_range(E->Scalars.begin(), E->Scalars.end()),
- [=](Value *V) -> bool {
- auto *I = cast<Instruction>(V);
- return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
- }));
+ assert(llvm::all_of(E->Scalars, [=](Value *V) -> bool {
+ auto *I = cast<Instruction>(V);
+ return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
+ }));
// The last instruction in the bundle in program order.
Instruction *LastInst = nullptr;
@@ -4025,34 +4252,30 @@ void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) {
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}
-Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
- Value *Vec = UndefValue::get(Ty);
- // Generate the 'InsertElement' instruction.
- for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
- Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
- if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {
- GatherSeq.insert(Insrt);
- CSEBlocks.insert(Insrt->getParent());
-
- // Add to our 'need-to-extract' list.
- if (TreeEntry *E = getTreeEntry(VL[i])) {
- // Find which lane we need to extract.
- int FoundLane = -1;
- for (unsigned Lane = 0, LE = E->Scalars.size(); Lane != LE; ++Lane) {
- // Is this the lane of the scalar that we are looking for ?
- if (E->Scalars[Lane] == VL[i]) {
- FoundLane = Lane;
- break;
- }
- }
- assert(FoundLane >= 0 && "Could not find the correct lane");
- if (!E->ReuseShuffleIndices.empty()) {
- FoundLane =
- std::distance(E->ReuseShuffleIndices.begin(),
- llvm::find(E->ReuseShuffleIndices, FoundLane));
- }
- ExternalUses.push_back(ExternalUser(VL[i], Insrt, FoundLane));
+Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
+ Value *Val0 =
+ isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
+ FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size());
+ Value *Vec = PoisonValue::get(VecTy);
+ unsigned InsIndex = 0;
+ for (Value *Val : VL) {
+ Vec = Builder.CreateInsertElement(Vec, Val, Builder.getInt32(InsIndex++));
+ auto *InsElt = dyn_cast<InsertElementInst>(Vec);
+ if (!InsElt)
+ continue;
+ GatherSeq.insert(InsElt);
+ CSEBlocks.insert(InsElt->getParent());
+ // Add to our 'need-to-extract' list.
+ if (TreeEntry *Entry = getTreeEntry(Val)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = std::distance(Entry->Scalars.begin(),
+ find(Entry->Scalars, Val));
+ assert(FoundLane < Entry->Scalars.size() && "Couldn't find extract lane");
+ if (!Entry->ReuseShuffleIndices.empty()) {
+ FoundLane = std::distance(Entry->ReuseShuffleIndices.begin(),
+ find(Entry->ReuseShuffleIndices, FoundLane));
}
+ ExternalUses.push_back(ExternalUser(Val, InsElt, FoundLane));
}
}
@@ -4076,8 +4299,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
for (int Idx : E->ReuseShuffleIndices)
if (UsedIdxs.insert(Idx).second)
UniqueIdxs.emplace_back(Idx);
- V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
- UniqueIdxs);
+ V = Builder.CreateShuffleVector(V, UniqueIdxs);
}
}
return V;
@@ -4085,10 +4307,6 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
}
}
- Type *ScalarTy = S.OpValue->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
- ScalarTy = SI->getValueOperand()->getType();
-
// Check that every instruction appears once in this bundle.
SmallVector<int, 4> ReuseShuffleIndicies;
SmallVector<Value *, 4> UniqueValues;
@@ -4108,27 +4326,16 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
else
VL = UniqueValues;
}
- auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
- Value *V = Gather(VL, VecTy);
+ Value *Vec = gather(VL);
if (!ReuseShuffleIndicies.empty()) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- ReuseShuffleIndicies, "shuffle");
- if (auto *I = dyn_cast<Instruction>(V)) {
+ Vec = Builder.CreateShuffleVector(Vec, ReuseShuffleIndicies, "shuffle");
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
GatherSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
}
- return V;
-}
-
-static void inversePermutation(ArrayRef<unsigned> Indices,
- SmallVectorImpl<int> &Mask) {
- Mask.clear();
- const unsigned E = Indices.size();
- Mask.resize(E);
- for (unsigned I = 0; I < E; ++I)
- Mask[Indices[I]] = I;
+ return Vec;
}
Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
@@ -4139,32 +4346,31 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return E->VectorizedValue;
}
- Instruction *VL0 = E->getMainOp();
- Type *ScalarTy = VL0->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
- ScalarTy = SI->getValueOperand()->getType();
- auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
-
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
-
if (E->State == TreeEntry::NeedToGather) {
setInsertPointAfterBundle(E);
- auto *V = Gather(E->Scalars, VecTy);
+ Value *Vec = gather(E->Scalars);
if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- if (auto *I = dyn_cast<Instruction>(V)) {
+ Vec = Builder.CreateShuffleVector(Vec, E->ReuseShuffleIndices, "shuffle");
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
GatherSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
}
- E->VectorizedValue = V;
- return V;
+ E->VectorizedValue = Vec;
+ return Vec;
}
- assert(E->State == TreeEntry::Vectorize && "Unhandled state");
+ assert((E->State == TreeEntry::Vectorize ||
+ E->State == TreeEntry::ScatterVectorize) &&
+ "Unhandled state");
unsigned ShuffleOrOp =
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
+ Instruction *VL0 = E->getMainOp();
+ Type *ScalarTy = VL0->getType();
+ if (auto *Store = dyn_cast<StoreInst>(VL0))
+ ScalarTy = Store->getValueOperand()->getType();
+ auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
switch (ShuffleOrOp) {
case Instruction::PHI: {
auto *PH = cast<PHINode>(VL0);
@@ -4172,10 +4378,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
Value *V = NewPhi;
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
// PHINodes may have multiple entries from the same block. We want to
@@ -4208,37 +4413,33 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
SmallVector<int, 4> Mask;
inversePermutation(E->ReorderIndices, Mask);
Builder.SetInsertPoint(VL0);
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), Mask,
- "reorder_shuffle");
+ V = Builder.CreateShuffleVector(V, Mask, "reorder_shuffle");
}
if (NeedToShuffleReuses) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
if (E->ReorderIndices.empty())
Builder.SetInsertPoint(VL0);
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
}
E->VectorizedValue = V;
return V;
}
case Instruction::ExtractValue: {
- LoadInst *LI = cast<LoadInst>(E->getSingleOperand(0));
+ auto *LI = cast<LoadInst>(E->getSingleOperand(0));
Builder.SetInsertPoint(LI);
- PointerType *PtrTy =
- PointerType::get(VecTy, LI->getPointerAddressSpace());
+ auto *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
Value *NewV = propagateMetadata(V, E->Scalars);
if (!E->ReorderIndices.empty()) {
SmallVector<int, 4> Mask;
inversePermutation(E->ReorderIndices, Mask);
- NewV = Builder.CreateShuffleVector(NewV, UndefValue::get(VecTy), Mask,
- "reorder_shuffle");
+ NewV = Builder.CreateShuffleVector(NewV, Mask, "reorder_shuffle");
}
if (NeedToShuffleReuses) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
- NewV = Builder.CreateShuffleVector(NewV, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
+ NewV = Builder.CreateShuffleVector(NewV, E->ReuseShuffleIndices,
+ "shuffle");
}
E->VectorizedValue = NewV;
return NewV;
@@ -4266,10 +4467,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
auto *CI = cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
@@ -4289,10 +4489,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V = Builder.CreateCmp(P0, L, R);
propagateIRFlags(V, E->Scalars, VL0);
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
@@ -4310,10 +4509,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = Builder.CreateSelect(Cond, True, False);
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
@@ -4334,10 +4532,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4378,10 +4575,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4396,30 +4592,40 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
setInsertPointAfterBundle(E);
LoadInst *LI = cast<LoadInst>(VL0);
+ Instruction *NewLI;
unsigned AS = LI->getPointerAddressSpace();
+ Value *PO = LI->getPointerOperand();
+ if (E->State == TreeEntry::Vectorize) {
- Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
- VecTy->getPointerTo(AS));
+ Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS));
- // The pointer operand uses an in-tree scalar so we add the new BitCast to
- // ExternalUses list to make sure that an extract will be generated in the
- // future.
- Value *PO = LI->getPointerOperand();
- if (getTreeEntry(PO))
- ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
+ // The pointer operand uses an in-tree scalar so we add the new BitCast
+ // to ExternalUses list to make sure that an extract will be generated
+ // in the future.
+ if (getTreeEntry(PO))
+ ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0);
+
+ NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
+ } else {
+ assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
+ Value *VecPtr = vectorizeTree(E->getOperand(0));
+ // Use the minimum alignment of the gathered loads.
+ Align CommonAlignment = LI->getAlign();
+ for (Value *V : E->Scalars)
+ CommonAlignment =
+ commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
+ NewLI = Builder.CreateMaskedGather(VecPtr, CommonAlignment);
+ }
+ Value *V = propagateMetadata(NewLI, E->Scalars);
- LI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
- Value *V = propagateMetadata(LI, E->Scalars);
if (IsReorder) {
SmallVector<int, 4> Mask;
inversePermutation(E->ReorderIndices, Mask);
- V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
- Mask, "reorder_shuffle");
+ V = Builder.CreateShuffleVector(V, Mask, "reorder_shuffle");
}
if (NeedToShuffleReuses) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
}
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4437,9 +4643,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (IsReorder) {
SmallVector<int, 4> Mask(E->ReorderIndices.begin(),
E->ReorderIndices.end());
- VecValue = Builder.CreateShuffleVector(
- VecValue, UndefValue::get(VecValue->getType()), Mask,
- "reorder_shuffle");
+ VecValue = Builder.CreateShuffleVector(VecValue, Mask, "reorder_shuf");
}
Value *ScalarPtr = SI->getPointerOperand();
Value *VecPtr = Builder.CreateBitCast(
@@ -4454,10 +4658,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
Value *V = propagateMetadata(ST, E->Scalars);
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
@@ -4494,10 +4697,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4537,9 +4739,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Function *CF;
if (!UseIntrinsic) {
- VFShape Shape = VFShape::get(
- *CI, {static_cast<unsigned>(VecTy->getNumElements()), false},
- false /*HasGlobalPred*/);
+ VFShape Shape =
+ VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
+ VecTy->getNumElements())),
+ false /*HasGlobalPred*/);
CF = VFDatabase(*CI).getVectorizedFunction(Shape);
} else {
Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())};
@@ -4557,10 +4760,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
propagateIRFlags(V, E->Scalars, VL0);
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
@@ -4625,10 +4827,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- if (NeedToShuffleReuses) {
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
- E->ReuseShuffleIndices, "shuffle");
- }
+ if (NeedToShuffleReuses)
+ V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4693,7 +4894,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
continue;
TreeEntry *E = getTreeEntry(Scalar);
assert(E && "Invalid scalar");
- assert(E->State == TreeEntry::Vectorize && "Extracting from a gather list");
+ assert(E->State != TreeEntry::NeedToGather &&
+ "Extracting from a gather list");
Value *Vec = E->VectorizedValue;
assert(Vec && "Can't find vectorizable value");
@@ -4851,7 +5053,8 @@ void BoUpSLP::optimizeGatherSequence() {
// instructions into different buckets based on the insert lane.
SmallVector<Instruction *, 16> Visited;
for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) {
- assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
+ assert(*I &&
+ (I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
"Worklist not sorted properly!");
BasicBlock *BB = (*I)->getBlock();
// For all instructions in blocks containing gather sequences:
@@ -4961,8 +5164,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
// cancelScheduling).
while (!Bundle->isReady() && !ReadyInsts.empty()) {
- ScheduleData *pickedSD = ReadyInsts.back();
- ReadyInsts.pop_back();
+ ScheduleData *pickedSD = ReadyInsts.pop_back_val();
if (pickedSD->isSchedulingEntity() && pickedSD->isReady()) {
schedule(pickedSD, ReadyInsts);
@@ -5106,7 +5308,9 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
if (I->mayReadOrWriteMemory() &&
(!isa<IntrinsicInst>(I) ||
- cast<IntrinsicInst>(I)->getIntrinsicID() != Intrinsic::sideeffect)) {
+ (cast<IntrinsicInst>(I)->getIntrinsicID() != Intrinsic::sideeffect &&
+ cast<IntrinsicInst>(I)->getIntrinsicID() !=
+ Intrinsic::pseudoprobe))) {
// Update the linked list of memory accessing instructions.
if (CurrentLoadStore) {
CurrentLoadStore->NextLoadStore = SD;
@@ -5133,8 +5337,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
WorkList.push_back(SD);
while (!WorkList.empty()) {
- ScheduleData *SD = WorkList.back();
- WorkList.pop_back();
+ ScheduleData *SD = WorkList.pop_back_val();
ScheduleData *BundleMember = SD;
while (BundleMember) {
@@ -5331,10 +5534,15 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
}
unsigned BoUpSLP::getVectorElementSize(Value *V) {
- // If V is a store, just return the width of the stored value without
- // traversing the expression tree. This is the common case.
- if (auto *Store = dyn_cast<StoreInst>(V))
- return DL->getTypeSizeInBits(Store->getValueOperand()->getType());
+ // If V is a store, just return the width of the stored value (or value
+ // truncated just before storing) without traversing the expression tree.
+ // This is the common case.
+ if (auto *Store = dyn_cast<StoreInst>(V)) {
+ if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
+ return DL->getTypeSizeInBits(Trunc->getSrcTy());
+ else
+ return DL->getTypeSizeInBits(Store->getValueOperand()->getType());
+ }
auto E = InstrElementSize.find(V);
if (E != InstrElementSize.end())
@@ -5683,7 +5891,7 @@ PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &A
bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
TargetTransformInfo *TTI_,
- TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
+ TargetLibraryInfo *TLI_, AAResults *AA_,
LoopInfo *LI_, DominatorTree *DT_,
AssumptionCache *AC_, DemandedBits *DB_,
OptimizationRemarkEmitter *ORE_) {
@@ -5783,11 +5991,11 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
R.computeMinimumValueSizes();
- int Cost = R.getTreeCost();
+ InstructionCost Cost = R.getTreeCost();
- LLVM_DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
+ LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF =" << VF << "\n");
if (Cost < -SLPCostThreshold) {
- LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+ LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost = " << Cost << "\n");
using namespace ore;
@@ -5860,7 +6068,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
// If a vector register can't hold 1 element, we are done.
unsigned MaxVecRegSize = R.getMaxVecRegSize();
- unsigned EltSize = R.getVectorElementSize(Stores[0]);
+ unsigned EltSize = R.getVectorElementSize(Operands[0]);
if (MaxVecRegSize % EltSize != 0)
continue;
@@ -5911,7 +6119,7 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
continue;
if (!isValidElementType(SI->getValueOperand()->getType()))
continue;
- Stores[GetUnderlyingObject(SI->getPointerOperand(), *DL)].push_back(SI);
+ Stores[getUnderlyingObject(SI->getPointerOperand())].push_back(SI);
}
// Ignore getelementptr instructions that have more than one index, a
@@ -5975,6 +6183,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
unsigned Sz = R.getVectorElementSize(I0);
unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+ MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
if (MaxVF < 2) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
@@ -5986,7 +6195,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
bool Changed = false;
bool CandidateFound = false;
- int MinCost = SLPCostThreshold;
+ InstructionCost MinCost = SLPCostThreshold.getValue();
bool CompensateUseCost =
!InsertUses.empty() && llvm::all_of(InsertUses, [](const Value *V) {
@@ -6042,7 +6251,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
continue;
R.computeMinimumValueSizes();
- int Cost = R.getTreeCost();
+ InstructionCost Cost = R.getTreeCost();
CandidateFound = true;
if (CompensateUseCost) {
// TODO: Use TTI's getScalarizationOverhead for sequence of inserts
@@ -6052,7 +6261,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// part should also switch to same interface.
// For example, the following case is projected code after SLP:
// %4 = extractelement <4 x i64> %3, i32 0
- // %v0 = insertelement <4 x i64> undef, i64 %4, i32 0
+ // %v0 = insertelement <4 x i64> poison, i64 %4, i32 0
// %5 = extractelement <4 x i64> %3, i32 1
// %v1 = insertelement <4 x i64> %v0, i64 %5, i32 1
// %6 = extractelement <4 x i64> %3, i32 2
@@ -6072,7 +6281,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// Switching to the TTI interface might help a bit.
// Alternative solution could be pattern-match to detect a no-op or
// shuffle.
- unsigned UserCost = 0;
+ InstructionCost UserCost = 0;
for (unsigned Lane = 0; Lane < OpsWidth; Lane++) {
auto *IE = cast<InsertElementInst>(InsertUses[I + Lane]);
if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
@@ -6163,50 +6372,20 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
return false;
}
-/// Generate a shuffle mask to be used in a reduction tree.
-///
-/// \param VecLen The length of the vector to be reduced.
-/// \param NumEltsToRdx The number of elements that should be reduced in the
-/// vector.
-/// \param IsPairwise Whether the reduction is a pairwise or splitting
-/// reduction. A pairwise reduction will generate a mask of
-/// <0,2,...> or <1,3,..> while a splitting reduction will generate
-/// <2,3, undef,undef> for a vector of 4 and NumElts = 2.
-/// \param IsLeft True will generate a mask of even elements, odd otherwise.
-static SmallVector<int, 32> createRdxShuffleMask(unsigned VecLen,
- unsigned NumEltsToRdx,
- bool IsPairwise, bool IsLeft) {
- assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
-
- SmallVector<int, 32> ShuffleMask(VecLen, -1);
-
- if (IsPairwise)
- // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
- for (unsigned i = 0; i != NumEltsToRdx; ++i)
- ShuffleMask[i] = 2 * i + !IsLeft;
- else
- // Move the upper half of the vector to the lower half.
- for (unsigned i = 0; i != NumEltsToRdx; ++i)
- ShuffleMask[i] = NumEltsToRdx + i;
-
- return ShuffleMask;
-}
-
namespace {
/// Model horizontal reductions.
///
-/// A horizontal reduction is a tree of reduction operations (currently add and
-/// fadd) that has operations that can be put into a vector as its leaf.
-/// For example, this tree:
+/// A horizontal reduction is a tree of reduction instructions that has values
+/// that can be put into a vector as its leaves. For example:
///
/// mul mul mul mul
/// \ / \ /
/// + +
/// \ /
/// +
-/// This tree has "mul" as its reduced values and "+" as its reduction
-/// operations. A reduction might be feeding into a store or a binary operation
+/// This tree has "mul" as its leaf values and "+" as its reduction
+/// instructions. A reduction can feed into a store or a binary operation
/// feeding a phi.
/// ...
/// \ /
@@ -6224,333 +6403,30 @@ namespace {
class HorizontalReduction {
using ReductionOpsType = SmallVector<Value *, 16>;
using ReductionOpsListType = SmallVector<ReductionOpsType, 2>;
- ReductionOpsListType ReductionOps;
+ ReductionOpsListType ReductionOps;
SmallVector<Value *, 32> ReducedVals;
// Use map vector to make stable output.
MapVector<Instruction *, Value *> ExtraArgs;
-
- /// Kind of the reduction data.
- enum ReductionKind {
- RK_None, /// Not a reduction.
- RK_Arithmetic, /// Binary reduction data.
- RK_Min, /// Minimum reduction data.
- RK_UMin, /// Unsigned minimum reduction data.
- RK_Max, /// Maximum reduction data.
- RK_UMax, /// Unsigned maximum reduction data.
- };
-
- /// Contains info about operation, like its opcode, left and right operands.
- class OperationData {
- /// Opcode of the instruction.
- unsigned Opcode = 0;
-
- /// Left operand of the reduction operation.
- Value *LHS = nullptr;
-
- /// Right operand of the reduction operation.
- Value *RHS = nullptr;
-
- /// Kind of the reduction operation.
- ReductionKind Kind = RK_None;
-
- /// True if float point min/max reduction has no NaNs.
- bool NoNaN = false;
-
- /// Checks if the reduction operation can be vectorized.
- bool isVectorizable() const {
- return LHS && RHS &&
- // We currently only support add/mul/logical && min/max reductions.
- ((Kind == RK_Arithmetic &&
- (Opcode == Instruction::Add || Opcode == Instruction::FAdd ||
- Opcode == Instruction::Mul || Opcode == Instruction::FMul ||
- Opcode == Instruction::And || Opcode == Instruction::Or ||
- Opcode == Instruction::Xor)) ||
- ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
- (Kind == RK_Min || Kind == RK_Max)) ||
- (Opcode == Instruction::ICmp &&
- (Kind == RK_UMin || Kind == RK_UMax)));
- }
-
- /// Creates reduction operation with the current opcode.
- Value *createOp(IRBuilder<> &Builder, const Twine &Name) const {
- assert(isVectorizable() &&
- "Expected add|fadd or min/max reduction operation.");
- Value *Cmp = nullptr;
- switch (Kind) {
- case RK_Arithmetic:
- return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS,
- Name);
- case RK_Min:
- Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS)
- : Builder.CreateFCmpOLT(LHS, RHS);
- return Builder.CreateSelect(Cmp, LHS, RHS, Name);
- case RK_Max:
- Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS)
- : Builder.CreateFCmpOGT(LHS, RHS);
- return Builder.CreateSelect(Cmp, LHS, RHS, Name);
- case RK_UMin:
- assert(Opcode == Instruction::ICmp && "Expected integer types.");
- Cmp = Builder.CreateICmpULT(LHS, RHS);
- return Builder.CreateSelect(Cmp, LHS, RHS, Name);
- case RK_UMax:
- assert(Opcode == Instruction::ICmp && "Expected integer types.");
- Cmp = Builder.CreateICmpUGT(LHS, RHS);
- return Builder.CreateSelect(Cmp, LHS, RHS, Name);
- case RK_None:
- break;
- }
- llvm_unreachable("Unknown reduction operation.");
- }
-
- public:
- explicit OperationData() = default;
-
- /// Construction for reduced values. They are identified by opcode only and
- /// don't have associated LHS/RHS values.
- explicit OperationData(Value *V) {
- if (auto *I = dyn_cast<Instruction>(V))
- Opcode = I->getOpcode();
- }
-
- /// Constructor for reduction operations with opcode and its left and
- /// right operands.
- OperationData(unsigned Opcode, Value *LHS, Value *RHS, ReductionKind Kind,
- bool NoNaN = false)
- : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind), NoNaN(NoNaN) {
- assert(Kind != RK_None && "One of the reduction operations is expected.");
- }
-
- explicit operator bool() const { return Opcode; }
-
- /// Return true if this operation is any kind of minimum or maximum.
- bool isMinMax() const {
- switch (Kind) {
- case RK_Arithmetic:
- return false;
- case RK_Min:
- case RK_Max:
- case RK_UMin:
- case RK_UMax:
- return true;
- case RK_None:
- break;
- }
- llvm_unreachable("Reduction kind is not set");
- }
-
- /// Get the index of the first operand.
- unsigned getFirstOperandIndex() const {
- assert(!!*this && "The opcode is not set.");
- // We allow calling this before 'Kind' is set, so handle that specially.
- if (Kind == RK_None)
- return 0;
- return isMinMax() ? 1 : 0;
- }
-
- /// Total number of operands in the reduction operation.
- unsigned getNumberOfOperands() const {
- assert(Kind != RK_None && !!*this && LHS && RHS &&
- "Expected reduction operation.");
- return isMinMax() ? 3 : 2;
- }
-
- /// Checks if the operation has the same parent as \p P.
- bool hasSameParent(Instruction *I, Value *P, bool IsRedOp) const {
- assert(Kind != RK_None && !!*this && LHS && RHS &&
- "Expected reduction operation.");
- if (!IsRedOp)
- return I->getParent() == P;
- if (isMinMax()) {
- // SelectInst must be used twice while the condition op must have single
- // use only.
- auto *Cmp = cast<Instruction>(cast<SelectInst>(I)->getCondition());
- return I->getParent() == P && Cmp && Cmp->getParent() == P;
- }
- // Arithmetic reduction operation must be used once only.
- return I->getParent() == P;
- }
-
- /// Expected number of uses for reduction operations/reduced values.
- bool hasRequiredNumberOfUses(Instruction *I, bool IsReductionOp) const {
- assert(Kind != RK_None && !!*this && LHS && RHS &&
- "Expected reduction operation.");
- if (isMinMax())
- return I->hasNUses(2) &&
- (!IsReductionOp ||
- cast<SelectInst>(I)->getCondition()->hasOneUse());
- return I->hasOneUse();
- }
-
- /// Initializes the list of reduction operations.
- void initReductionOps(ReductionOpsListType &ReductionOps) {
- assert(Kind != RK_None && !!*this && LHS && RHS &&
- "Expected reduction operation.");
- if (isMinMax())
- ReductionOps.assign(2, ReductionOpsType());
- else
- ReductionOps.assign(1, ReductionOpsType());
- }
-
- /// Add all reduction operations for the reduction instruction \p I.
- void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) {
- assert(Kind != RK_None && !!*this && LHS && RHS &&
- "Expected reduction operation.");
- if (isMinMax()) {
- ReductionOps[0].emplace_back(cast<SelectInst>(I)->getCondition());
- ReductionOps[1].emplace_back(I);
- } else {
- ReductionOps[0].emplace_back(I);
- }
- }
-
- /// Checks if instruction is associative and can be vectorized.
- bool isAssociative(Instruction *I) const {
- assert(Kind != RK_None && *this && LHS && RHS &&
- "Expected reduction operation.");
- switch (Kind) {
- case RK_Arithmetic:
- return I->isAssociative();
- case RK_Min:
- case RK_Max:
- return Opcode == Instruction::ICmp ||
- cast<Instruction>(I->getOperand(0))->isFast();
- case RK_UMin:
- case RK_UMax:
- assert(Opcode == Instruction::ICmp &&
- "Only integer compare operation is expected.");
- return true;
- case RK_None:
- break;
- }
- llvm_unreachable("Reduction kind is not set");
- }
-
- /// Checks if the reduction operation can be vectorized.
- bool isVectorizable(Instruction *I) const {
- return isVectorizable() && isAssociative(I);
- }
-
- /// Checks if two operation data are both a reduction op or both a reduced
- /// value.
- bool operator==(const OperationData &OD) const {
- assert(((Kind != OD.Kind) || ((!LHS == !OD.LHS) && (!RHS == !OD.RHS))) &&
- "One of the comparing operations is incorrect.");
- return this == &OD || (Kind == OD.Kind && Opcode == OD.Opcode);
- }
- bool operator!=(const OperationData &OD) const { return !(*this == OD); }
- void clear() {
- Opcode = 0;
- LHS = nullptr;
- RHS = nullptr;
- Kind = RK_None;
- NoNaN = false;
- }
-
- /// Get the opcode of the reduction operation.
- unsigned getOpcode() const {
- assert(isVectorizable() && "Expected vectorizable operation.");
- return Opcode;
- }
-
- /// Get kind of reduction data.
- ReductionKind getKind() const { return Kind; }
- Value *getLHS() const { return LHS; }
- Value *getRHS() const { return RHS; }
- Type *getConditionType() const {
- return isMinMax() ? CmpInst::makeCmpResultType(LHS->getType()) : nullptr;
- }
-
- /// Creates reduction operation with the current opcode with the IR flags
- /// from \p ReductionOps.
- Value *createOp(IRBuilder<> &Builder, const Twine &Name,
- const ReductionOpsListType &ReductionOps) const {
- assert(isVectorizable() &&
- "Expected add|fadd or min/max reduction operation.");
- auto *Op = createOp(Builder, Name);
- switch (Kind) {
- case RK_Arithmetic:
- propagateIRFlags(Op, ReductionOps[0]);
- return Op;
- case RK_Min:
- case RK_Max:
- case RK_UMin:
- case RK_UMax:
- if (auto *SI = dyn_cast<SelectInst>(Op))
- propagateIRFlags(SI->getCondition(), ReductionOps[0]);
- propagateIRFlags(Op, ReductionOps[1]);
- return Op;
- case RK_None:
- break;
- }
- llvm_unreachable("Unknown reduction operation.");
- }
- /// Creates reduction operation with the current opcode with the IR flags
- /// from \p I.
- Value *createOp(IRBuilder<> &Builder, const Twine &Name,
- Instruction *I) const {
- assert(isVectorizable() &&
- "Expected add|fadd or min/max reduction operation.");
- auto *Op = createOp(Builder, Name);
- switch (Kind) {
- case RK_Arithmetic:
- propagateIRFlags(Op, I);
- return Op;
- case RK_Min:
- case RK_Max:
- case RK_UMin:
- case RK_UMax:
- if (auto *SI = dyn_cast<SelectInst>(Op)) {
- propagateIRFlags(SI->getCondition(),
- cast<SelectInst>(I)->getCondition());
- }
- propagateIRFlags(Op, I);
- return Op;
- case RK_None:
- break;
- }
- llvm_unreachable("Unknown reduction operation.");
- }
-
- TargetTransformInfo::ReductionFlags getFlags() const {
- TargetTransformInfo::ReductionFlags Flags;
- Flags.NoNaN = NoNaN;
- switch (Kind) {
- case RK_Arithmetic:
- break;
- case RK_Min:
- Flags.IsSigned = Opcode == Instruction::ICmp;
- Flags.IsMaxOp = false;
- break;
- case RK_Max:
- Flags.IsSigned = Opcode == Instruction::ICmp;
- Flags.IsMaxOp = true;
- break;
- case RK_UMin:
- Flags.IsSigned = false;
- Flags.IsMaxOp = false;
- break;
- case RK_UMax:
- Flags.IsSigned = false;
- Flags.IsMaxOp = true;
- break;
- case RK_None:
- llvm_unreachable("Reduction kind is not set");
- }
- return Flags;
- }
- };
-
WeakTrackingVH ReductionRoot;
+ /// The type of reduction operation.
+ RecurKind RdxKind;
- /// The operation data of the reduction operation.
- OperationData ReductionData;
+ /// Checks if instruction is associative and can be vectorized.
+ static bool isVectorizable(RecurKind Kind, Instruction *I) {
+ if (Kind == RecurKind::None)
+ return false;
+ if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind))
+ return true;
- /// The operation data of the values we perform a reduction on.
- OperationData ReducedValueData;
+ if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
+ // FP min/max are associative except for NaN and -0.0. We do not
+ // have to rule out -0.0 here because the intrinsic semantics do not
+ // specify a fixed result for it.
+ return I->getFastMathFlags().noNaNs();
+ }
- /// Should we model this reduction as a pairwise reduction tree or a tree that
- /// splits the vector in halves and adds those halves.
- bool IsPairwiseReduction = false;
+ return I->isAssociative();
+ }
/// Checks if the ParentStackElem.first should be marked as a reduction
/// operation with an extra argument or as extra argument itself.
@@ -6564,7 +6440,8 @@ class HorizontalReduction {
// in this case.
// Do not perform analysis of remaining operands of ParentStackElem.first
// instruction, this whole instruction is an extra argument.
- ParentStackElem.second = ParentStackElem.first->getNumOperands();
+ RecurKind ParentRdxKind = getRdxKind(ParentStackElem.first);
+ ParentStackElem.second = getNumberOfOperands(ParentRdxKind);
} else {
// We ran into something like:
// ParentStackElem.first += ... + ExtraArg + ...
@@ -6572,110 +6449,238 @@ class HorizontalReduction {
}
}
- static OperationData getOperationData(Value *V) {
- if (!V)
- return OperationData();
-
- Value *LHS;
- Value *RHS;
- if (m_BinOp(m_Value(LHS), m_Value(RHS)).match(V)) {
- return OperationData(cast<BinaryOperator>(V)->getOpcode(), LHS, RHS,
- RK_Arithmetic);
- }
- if (auto *Select = dyn_cast<SelectInst>(V)) {
- // Look for a min/max pattern.
- if (m_UMin(m_Value(LHS), m_Value(RHS)).match(Select)) {
- return OperationData(Instruction::ICmp, LHS, RHS, RK_UMin);
- } else if (m_SMin(m_Value(LHS), m_Value(RHS)).match(Select)) {
- return OperationData(Instruction::ICmp, LHS, RHS, RK_Min);
- } else if (m_OrdFMin(m_Value(LHS), m_Value(RHS)).match(Select) ||
- m_UnordFMin(m_Value(LHS), m_Value(RHS)).match(Select)) {
- return OperationData(
- Instruction::FCmp, LHS, RHS, RK_Min,
- cast<Instruction>(Select->getCondition())->hasNoNaNs());
- } else if (m_UMax(m_Value(LHS), m_Value(RHS)).match(Select)) {
- return OperationData(Instruction::ICmp, LHS, RHS, RK_UMax);
- } else if (m_SMax(m_Value(LHS), m_Value(RHS)).match(Select)) {
- return OperationData(Instruction::ICmp, LHS, RHS, RK_Max);
- } else if (m_OrdFMax(m_Value(LHS), m_Value(RHS)).match(Select) ||
- m_UnordFMax(m_Value(LHS), m_Value(RHS)).match(Select)) {
- return OperationData(
- Instruction::FCmp, LHS, RHS, RK_Max,
- cast<Instruction>(Select->getCondition())->hasNoNaNs());
- } else {
- // Try harder: look for min/max pattern based on instructions producing
- // same values such as: select ((cmp Inst1, Inst2), Inst1, Inst2).
- // During the intermediate stages of SLP, it's very common to have
- // pattern like this (since optimizeGatherSequence is run only once
- // at the end):
- // %1 = extractelement <2 x i32> %a, i32 0
- // %2 = extractelement <2 x i32> %a, i32 1
- // %cond = icmp sgt i32 %1, %2
- // %3 = extractelement <2 x i32> %a, i32 0
- // %4 = extractelement <2 x i32> %a, i32 1
- // %select = select i1 %cond, i32 %3, i32 %4
- CmpInst::Predicate Pred;
- Instruction *L1;
- Instruction *L2;
-
- LHS = Select->getTrueValue();
- RHS = Select->getFalseValue();
- Value *Cond = Select->getCondition();
-
- // TODO: Support inverse predicates.
- if (match(Cond, m_Cmp(Pred, m_Specific(LHS), m_Instruction(L2)))) {
- if (!isa<ExtractElementInst>(RHS) ||
- !L2->isIdenticalTo(cast<Instruction>(RHS)))
- return OperationData(V);
- } else if (match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Specific(RHS)))) {
- if (!isa<ExtractElementInst>(LHS) ||
- !L1->isIdenticalTo(cast<Instruction>(LHS)))
- return OperationData(V);
- } else {
- if (!isa<ExtractElementInst>(LHS) || !isa<ExtractElementInst>(RHS))
- return OperationData(V);
- if (!match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2))) ||
- !L1->isIdenticalTo(cast<Instruction>(LHS)) ||
- !L2->isIdenticalTo(cast<Instruction>(RHS)))
- return OperationData(V);
- }
- switch (Pred) {
- default:
- return OperationData(V);
-
- case CmpInst::ICMP_ULT:
- case CmpInst::ICMP_ULE:
- return OperationData(Instruction::ICmp, LHS, RHS, RK_UMin);
-
- case CmpInst::ICMP_SLT:
- case CmpInst::ICMP_SLE:
- return OperationData(Instruction::ICmp, LHS, RHS, RK_Min);
-
- case CmpInst::FCMP_OLT:
- case CmpInst::FCMP_OLE:
- case CmpInst::FCMP_ULT:
- case CmpInst::FCMP_ULE:
- return OperationData(Instruction::FCmp, LHS, RHS, RK_Min,
- cast<Instruction>(Cond)->hasNoNaNs());
-
- case CmpInst::ICMP_UGT:
- case CmpInst::ICMP_UGE:
- return OperationData(Instruction::ICmp, LHS, RHS, RK_UMax);
-
- case CmpInst::ICMP_SGT:
- case CmpInst::ICMP_SGE:
- return OperationData(Instruction::ICmp, LHS, RHS, RK_Max);
-
- case CmpInst::FCMP_OGT:
- case CmpInst::FCMP_OGE:
- case CmpInst::FCMP_UGT:
- case CmpInst::FCMP_UGE:
- return OperationData(Instruction::FCmp, LHS, RHS, RK_Max,
- cast<Instruction>(Cond)->hasNoNaNs());
- }
+ /// Creates reduction operation with the current opcode.
+ static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS,
+ Value *RHS, const Twine &Name) {
+ unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
+ switch (Kind) {
+ case RecurKind::Add:
+ case RecurKind::Mul:
+ case RecurKind::Or:
+ case RecurKind::And:
+ case RecurKind::Xor:
+ case RecurKind::FAdd:
+ case RecurKind::FMul:
+ return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
+ Name);
+ case RecurKind::FMax:
+ return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS);
+ case RecurKind::FMin:
+ return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS);
+
+ case RecurKind::SMax: {
+ Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name);
+ return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+ }
+ case RecurKind::SMin: {
+ Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name);
+ return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+ }
+ case RecurKind::UMax: {
+ Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name);
+ return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+ }
+ case RecurKind::UMin: {
+ Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name);
+ return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+ }
+ default:
+ llvm_unreachable("Unknown reduction operation.");
+ }
+ }
+
+ /// Creates reduction operation with the current opcode with the IR flags
+ /// from \p ReductionOps.
+ static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
+ Value *RHS, const Twine &Name,
+ const ReductionOpsListType &ReductionOps) {
+ Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name);
+ if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
+ if (auto *Sel = dyn_cast<SelectInst>(Op))
+ propagateIRFlags(Sel->getCondition(), ReductionOps[0]);
+ propagateIRFlags(Op, ReductionOps[1]);
+ return Op;
+ }
+ propagateIRFlags(Op, ReductionOps[0]);
+ return Op;
+ }
+ /// Creates reduction operation with the current opcode with the IR flags
+ /// from \p I.
+ static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
+ Value *RHS, const Twine &Name, Instruction *I) {
+ Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name);
+ if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
+ if (auto *Sel = dyn_cast<SelectInst>(Op)) {
+ propagateIRFlags(Sel->getCondition(),
+ cast<SelectInst>(I)->getCondition());
}
}
- return OperationData(V);
+ propagateIRFlags(Op, I);
+ return Op;
+ }
+
+ static RecurKind getRdxKind(Instruction *I) {
+ assert(I && "Expected instruction for reduction matching");
+ TargetTransformInfo::ReductionFlags RdxFlags;
+ if (match(I, m_Add(m_Value(), m_Value())))
+ return RecurKind::Add;
+ if (match(I, m_Mul(m_Value(), m_Value())))
+ return RecurKind::Mul;
+ if (match(I, m_And(m_Value(), m_Value())))
+ return RecurKind::And;
+ if (match(I, m_Or(m_Value(), m_Value())))
+ return RecurKind::Or;
+ if (match(I, m_Xor(m_Value(), m_Value())))
+ return RecurKind::Xor;
+ if (match(I, m_FAdd(m_Value(), m_Value())))
+ return RecurKind::FAdd;
+ if (match(I, m_FMul(m_Value(), m_Value())))
+ return RecurKind::FMul;
+
+ if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
+ return RecurKind::FMax;
+ if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
+ return RecurKind::FMin;
+
+ if (match(I, m_SMax(m_Value(), m_Value())))
+ return RecurKind::SMax;
+ if (match(I, m_SMin(m_Value(), m_Value())))
+ return RecurKind::SMin;
+ if (match(I, m_UMax(m_Value(), m_Value())))
+ return RecurKind::UMax;
+ if (match(I, m_UMin(m_Value(), m_Value())))
+ return RecurKind::UMin;
+
+ if (auto *Select = dyn_cast<SelectInst>(I)) {
+ // Try harder: look for min/max pattern based on instructions producing
+ // same values such as: select ((cmp Inst1, Inst2), Inst1, Inst2).
+ // During the intermediate stages of SLP, it's very common to have
+ // pattern like this (since optimizeGatherSequence is run only once
+ // at the end):
+ // %1 = extractelement <2 x i32> %a, i32 0
+ // %2 = extractelement <2 x i32> %a, i32 1
+ // %cond = icmp sgt i32 %1, %2
+ // %3 = extractelement <2 x i32> %a, i32 0
+ // %4 = extractelement <2 x i32> %a, i32 1
+ // %select = select i1 %cond, i32 %3, i32 %4
+ CmpInst::Predicate Pred;
+ Instruction *L1;
+ Instruction *L2;
+
+ Value *LHS = Select->getTrueValue();
+ Value *RHS = Select->getFalseValue();
+ Value *Cond = Select->getCondition();
+
+ // TODO: Support inverse predicates.
+ if (match(Cond, m_Cmp(Pred, m_Specific(LHS), m_Instruction(L2)))) {
+ if (!isa<ExtractElementInst>(RHS) ||
+ !L2->isIdenticalTo(cast<Instruction>(RHS)))
+ return RecurKind::None;
+ } else if (match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Specific(RHS)))) {
+ if (!isa<ExtractElementInst>(LHS) ||
+ !L1->isIdenticalTo(cast<Instruction>(LHS)))
+ return RecurKind::None;
+ } else {
+ if (!isa<ExtractElementInst>(LHS) || !isa<ExtractElementInst>(RHS))
+ return RecurKind::None;
+ if (!match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2))) ||
+ !L1->isIdenticalTo(cast<Instruction>(LHS)) ||
+ !L2->isIdenticalTo(cast<Instruction>(RHS)))
+ return RecurKind::None;
+ }
+
+ TargetTransformInfo::ReductionFlags RdxFlags;
+ switch (Pred) {
+ default:
+ return RecurKind::None;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SGE:
+ return RecurKind::SMax;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SLE:
+ return RecurKind::SMin;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ return RecurKind::UMax;
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ return RecurKind::UMin;
+ }
+ }
+ return RecurKind::None;
+ }
+
+ /// Return true if this operation is a cmp+select idiom.
+ static bool isCmpSel(RecurKind Kind) {
+ return RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind);
+ }
+
+ /// Get the index of the first operand.
+ static unsigned getFirstOperandIndex(RecurKind Kind) {
+ // We allow calling this before 'Kind' is set, so handle that specially.
+ if (Kind == RecurKind::None)
+ return 0;
+ return isCmpSel(Kind) ? 1 : 0;
+ }
+
+ /// Total number of operands in the reduction operation.
+ static unsigned getNumberOfOperands(RecurKind Kind) {
+ return isCmpSel(Kind) ? 3 : 2;
+ }
+
+ /// Checks if the instruction is in basic block \p BB.
+ /// For a min/max reduction check that both compare and select are in \p BB.
+ static bool hasSameParent(RecurKind Kind, Instruction *I, BasicBlock *BB,
+ bool IsRedOp) {
+ if (IsRedOp && isCmpSel(Kind)) {
+ auto *Cmp = cast<Instruction>(cast<SelectInst>(I)->getCondition());
+ return I->getParent() == BB && Cmp && Cmp->getParent() == BB;
+ }
+ return I->getParent() == BB;
+ }
+
+ /// Expected number of uses for reduction operations/reduced values.
+ static bool hasRequiredNumberOfUses(RecurKind Kind, Instruction *I,
+ bool IsReductionOp) {
+ // SelectInst must be used twice while the condition op must have single
+ // use only.
+ if (isCmpSel(Kind))
+ return I->hasNUses(2) &&
+ (!IsReductionOp ||
+ cast<SelectInst>(I)->getCondition()->hasOneUse());
+
+ // Arithmetic reduction operation must be used once only.
+ return I->hasOneUse();
+ }
+
+ /// Initializes the list of reduction operations.
+ void initReductionOps(RecurKind Kind) {
+ if (isCmpSel(Kind))
+ ReductionOps.assign(2, ReductionOpsType());
+ else
+ ReductionOps.assign(1, ReductionOpsType());
+ }
+
+ /// Add all reduction operations for the reduction instruction \p I.
+ void addReductionOps(RecurKind Kind, Instruction *I) {
+ assert(Kind != RecurKind::None && "Expected reduction operation.");
+ if (isCmpSel(Kind)) {
+ ReductionOps[0].emplace_back(cast<SelectInst>(I)->getCondition());
+ ReductionOps[1].emplace_back(I);
+ } else {
+ ReductionOps[0].emplace_back(I);
+ }
+ }
+
+ static Value *getLHS(RecurKind Kind, Instruction *I) {
+ if (Kind == RecurKind::None)
+ return nullptr;
+ return I->getOperand(getFirstOperandIndex(Kind));
+ }
+ static Value *getRHS(RecurKind Kind, Instruction *I) {
+ if (Kind == RecurKind::None)
+ return nullptr;
+ return I->getOperand(getFirstOperandIndex(Kind) + 1);
}
public:
@@ -6684,50 +6689,59 @@ public:
/// Try to find a reduction tree.
bool matchAssociativeReduction(PHINode *Phi, Instruction *B) {
assert((!Phi || is_contained(Phi->operands(), B)) &&
- "Thi phi needs to use the binary operator");
+ "Phi needs to use the binary operator");
- ReductionData = getOperationData(B);
+ RdxKind = getRdxKind(B);
// We could have a initial reductions that is not an add.
// r *= v1 + v2 + v3 + v4
// In such a case start looking for a tree rooted in the first '+'.
if (Phi) {
- if (ReductionData.getLHS() == Phi) {
+ if (getLHS(RdxKind, B) == Phi) {
Phi = nullptr;
- B = dyn_cast<Instruction>(ReductionData.getRHS());
- ReductionData = getOperationData(B);
- } else if (ReductionData.getRHS() == Phi) {
+ B = dyn_cast<Instruction>(getRHS(RdxKind, B));
+ if (!B)
+ return false;
+ RdxKind = getRdxKind(B);
+ } else if (getRHS(RdxKind, B) == Phi) {
Phi = nullptr;
- B = dyn_cast<Instruction>(ReductionData.getLHS());
- ReductionData = getOperationData(B);
+ B = dyn_cast<Instruction>(getLHS(RdxKind, B));
+ if (!B)
+ return false;
+ RdxKind = getRdxKind(B);
}
}
- if (!ReductionData.isVectorizable(B))
+ if (!isVectorizable(RdxKind, B))
return false;
+ // Analyze "regular" integer/FP types for reductions - no target-specific
+ // types or pointers.
Type *Ty = B->getType();
- if (!isValidElementType(Ty))
- return false;
- if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy())
+ if (!isValidElementType(Ty) || Ty->isPointerTy())
return false;
- ReducedValueData.clear();
ReductionRoot = B;
+ // The opcode for leaf values that we perform a reduction on.
+ // For example: load(x) + load(y) + load(z) + fptoui(w)
+ // The leaf opcode for 'w' does not match, so we don't include it as a
+ // potential candidate for the reduction.
+ unsigned LeafOpcode = 0;
+
// Post order traverse the reduction tree starting at B. We only handle true
// trees containing only binary operators.
SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
- Stack.push_back(std::make_pair(B, ReductionData.getFirstOperandIndex()));
- ReductionData.initReductionOps(ReductionOps);
+ Stack.push_back(std::make_pair(B, getFirstOperandIndex(RdxKind)));
+ initReductionOps(RdxKind);
while (!Stack.empty()) {
Instruction *TreeN = Stack.back().first;
- unsigned EdgeToVist = Stack.back().second++;
- OperationData OpData = getOperationData(TreeN);
- bool IsReducedValue = OpData != ReductionData;
+ unsigned EdgeToVisit = Stack.back().second++;
+ const RecurKind TreeRdxKind = getRdxKind(TreeN);
+ bool IsReducedValue = TreeRdxKind != RdxKind;
- // Postorder vist.
- if (IsReducedValue || EdgeToVist == OpData.getNumberOfOperands()) {
+ // Postorder visit.
+ if (IsReducedValue || EdgeToVisit == getNumberOfOperands(TreeRdxKind)) {
if (IsReducedValue)
ReducedVals.push_back(TreeN);
else {
@@ -6745,7 +6759,7 @@ public:
markExtraArg(Stack[Stack.size() - 2], TreeN);
ExtraArgs.erase(TreeN);
} else
- ReductionData.addReductionOps(TreeN, ReductionOps);
+ addReductionOps(RdxKind, TreeN);
}
// Retract.
Stack.pop_back();
@@ -6753,91 +6767,72 @@ public:
}
// Visit left or right.
- Value *NextV = TreeN->getOperand(EdgeToVist);
- if (NextV != Phi) {
- auto *I = dyn_cast<Instruction>(NextV);
- OpData = getOperationData(I);
- // Continue analysis if the next operand is a reduction operation or
- // (possibly) a reduced value. If the reduced value opcode is not set,
- // the first met operation != reduction operation is considered as the
- // reduced value class.
- if (I && (!ReducedValueData || OpData == ReducedValueData ||
- OpData == ReductionData)) {
- const bool IsReductionOperation = OpData == ReductionData;
- // Only handle trees in the current basic block.
- if (!ReductionData.hasSameParent(I, B->getParent(),
- IsReductionOperation)) {
- // I is an extra argument for TreeN (its parent operation).
- markExtraArg(Stack.back(), I);
- continue;
- }
-
- // Each tree node needs to have minimal number of users except for the
- // ultimate reduction.
- if (!ReductionData.hasRequiredNumberOfUses(I,
- OpData == ReductionData) &&
- I != B) {
+ Value *EdgeVal = TreeN->getOperand(EdgeToVisit);
+ auto *I = dyn_cast<Instruction>(EdgeVal);
+ if (!I) {
+ // Edge value is not a reduction instruction or a leaf instruction.
+ // (It may be a constant, function argument, or something else.)
+ markExtraArg(Stack.back(), EdgeVal);
+ continue;
+ }
+ RecurKind EdgeRdxKind = getRdxKind(I);
+ // Continue analysis if the next operand is a reduction operation or
+ // (possibly) a leaf value. If the leaf value opcode is not set,
+ // the first met operation != reduction operation is considered as the
+ // leaf opcode.
+ // Only handle trees in the current basic block.
+ // Each tree node needs to have minimal number of users except for the
+ // ultimate reduction.
+ const bool IsRdxInst = EdgeRdxKind == RdxKind;
+ if (I != Phi && I != B &&
+ hasSameParent(RdxKind, I, B->getParent(), IsRdxInst) &&
+ hasRequiredNumberOfUses(RdxKind, I, IsRdxInst) &&
+ (!LeafOpcode || LeafOpcode == I->getOpcode() || IsRdxInst)) {
+ if (IsRdxInst) {
+ // We need to be able to reassociate the reduction operations.
+ if (!isVectorizable(EdgeRdxKind, I)) {
// I is an extra argument for TreeN (its parent operation).
markExtraArg(Stack.back(), I);
continue;
}
-
- if (IsReductionOperation) {
- // We need to be able to reassociate the reduction operations.
- if (!OpData.isAssociative(I)) {
- // I is an extra argument for TreeN (its parent operation).
- markExtraArg(Stack.back(), I);
- continue;
- }
- } else if (ReducedValueData &&
- ReducedValueData != OpData) {
- // Make sure that the opcodes of the operations that we are going to
- // reduce match.
- // I is an extra argument for TreeN (its parent operation).
- markExtraArg(Stack.back(), I);
- continue;
- } else if (!ReducedValueData)
- ReducedValueData = OpData;
-
- Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex()));
- continue;
+ } else if (!LeafOpcode) {
+ LeafOpcode = I->getOpcode();
}
+ Stack.push_back(std::make_pair(I, getFirstOperandIndex(EdgeRdxKind)));
+ continue;
}
- // NextV is an extra argument for TreeN (its parent operation).
- markExtraArg(Stack.back(), NextV);
+ // I is an extra argument for TreeN (its parent operation).
+ markExtraArg(Stack.back(), I);
}
return true;
}
- /// Attempt to vectorize the tree found by
- /// matchAssociativeReduction.
+ /// Attempt to vectorize the tree found by matchAssociativeReduction.
bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
- if (ReducedVals.empty())
- return false;
-
- // If there is a sufficient number of reduction values, reduce
- // to a nearby power-of-2. Can safely generate oversized
+ // If there are a sufficient number of reduction values, reduce
+ // to a nearby power-of-2. We can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.
unsigned NumReducedVals = ReducedVals.size();
if (NumReducedVals < 4)
return false;
- unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
-
- Value *VectorizedTree = nullptr;
+ // Intersect the fast-math-flags from all reduction operations.
+ FastMathFlags RdxFMF;
+ RdxFMF.set();
+ for (ReductionOpsType &RdxOp : ReductionOps) {
+ for (Value *RdxVal : RdxOp) {
+ if (auto *FPMO = dyn_cast<FPMathOperator>(RdxVal))
+ RdxFMF &= FPMO->getFastMathFlags();
+ }
+ }
- // FIXME: Fast-math-flags should be set based on the instructions in the
- // reduction (not all of 'fast' are required).
IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
- FastMathFlags Unsafe;
- Unsafe.setFast();
- Builder.setFastMathFlags(Unsafe);
- unsigned i = 0;
+ Builder.setFastMathFlags(RdxFMF);
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
- // The same extra argument may be used several time, so log each attempt
+ // The same extra argument may be used several times, so log each attempt
// to use it.
- for (auto &Pair : ExtraArgs) {
+ for (const std::pair<Instruction *, Value *> &Pair : ExtraArgs) {
assert(Pair.first && "DebugLoc must be set.");
ExternallyUsedValues[Pair.second].push_back(Pair.first);
}
@@ -6857,14 +6852,48 @@ public:
// so set it as externally used to prevent it from being deleted.
ExternallyUsedValues[ReductionRoot];
SmallVector<Value *, 16> IgnoreList;
- for (auto &V : ReductionOps)
- IgnoreList.append(V.begin(), V.end());
+ for (ReductionOpsType &RdxOp : ReductionOps)
+ IgnoreList.append(RdxOp.begin(), RdxOp.end());
+
+ unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
+ if (NumReducedVals > ReduxWidth) {
+ // In the loop below, we are building a tree based on a window of
+ // 'ReduxWidth' values.
+ // If the operands of those values have common traits (compare predicate,
+ // constant operand, etc), then we want to group those together to
+ // minimize the cost of the reduction.
+
+ // TODO: This should be extended to count common operands for
+ // compares and binops.
+
+ // Step 1: Count the number of times each compare predicate occurs.
+ SmallDenseMap<unsigned, unsigned> PredCountMap;
+ for (Value *RdxVal : ReducedVals) {
+ CmpInst::Predicate Pred;
+ if (match(RdxVal, m_Cmp(Pred, m_Value(), m_Value())))
+ ++PredCountMap[Pred];
+ }
+ // Step 2: Sort the values so the most common predicates come first.
+ stable_sort(ReducedVals, [&PredCountMap](Value *A, Value *B) {
+ CmpInst::Predicate PredA, PredB;
+ if (match(A, m_Cmp(PredA, m_Value(), m_Value())) &&
+ match(B, m_Cmp(PredB, m_Value(), m_Value()))) {
+ return PredCountMap[PredA] > PredCountMap[PredB];
+ }
+ return false;
+ });
+ }
+
+ Value *VectorizedTree = nullptr;
+ unsigned i = 0;
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
- auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
+ ArrayRef<Value *> VL(&ReducedVals[i], ReduxWidth);
V.buildTree(VL, ExternallyUsedValues, IgnoreList);
Optional<ArrayRef<unsigned>> Order = V.bestOrder();
- // TODO: Handle orders of size less than number of elements in the vector.
- if (Order && Order->size() == VL.size()) {
+ if (Order) {
+ assert(Order->size() == VL.size() &&
+ "Order size must be the same as number of vectorized "
+ "instructions.");
// TODO: reorder tree nodes without tree rebuilding.
SmallVector<Value *, 4> ReorderedOps(VL.size());
llvm::transform(*Order, ReorderedOps.begin(),
@@ -6873,60 +6902,66 @@ public:
}
if (V.isTreeTinyAndNotFullyVectorizable())
break;
- if (V.isLoadCombineReductionCandidate(ReductionData.getOpcode()))
+ if (V.isLoadCombineReductionCandidate(RdxKind))
break;
V.computeMinimumValueSizes();
// Estimate cost.
- int TreeCost = V.getTreeCost();
- int ReductionCost = getReductionCost(TTI, ReducedVals[i], ReduxWidth);
- int Cost = TreeCost + ReductionCost;
+ InstructionCost TreeCost = V.getTreeCost();
+ InstructionCost ReductionCost =
+ getReductionCost(TTI, ReducedVals[i], ReduxWidth);
+ InstructionCost Cost = TreeCost + ReductionCost;
+ if (!Cost.isValid()) {
+ LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
+ return false;
+ }
if (Cost >= -SLPCostThreshold) {
- V.getORE()->emit([&]() {
- return OptimizationRemarkMissed(
- SV_NAME, "HorSLPNotBeneficial", cast<Instruction>(VL[0]))
- << "Vectorizing horizontal reduction is possible"
- << "but not beneficial with cost "
- << ore::NV("Cost", Cost) << " and threshold "
- << ore::NV("Threshold", -SLPCostThreshold);
- });
- break;
+ V.getORE()->emit([&]() {
+ return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial",
+ cast<Instruction>(VL[0]))
+ << "Vectorizing horizontal reduction is possible"
+ << "but not beneficial with cost " << ore::NV("Cost", Cost)
+ << " and threshold "
+ << ore::NV("Threshold", -SLPCostThreshold);
+ });
+ break;
}
LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
<< Cost << ". (HorRdx)\n");
V.getORE()->emit([&]() {
- return OptimizationRemark(
- SV_NAME, "VectorizedHorizontalReduction", cast<Instruction>(VL[0]))
- << "Vectorized horizontal reduction with cost "
- << ore::NV("Cost", Cost) << " and with tree size "
- << ore::NV("TreeSize", V.getTreeSize());
+ return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction",
+ cast<Instruction>(VL[0]))
+ << "Vectorized horizontal reduction with cost "
+ << ore::NV("Cost", Cost) << " and with tree size "
+ << ore::NV("TreeSize", V.getTreeSize());
});
// Vectorize a tree.
DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
- // Emit a reduction. For min/max, the root is a select, but the insertion
+ // Emit a reduction. If the root is a select (min/max idiom), the insert
// point is the compare condition of that select.
Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
- if (ReductionData.isMinMax())
+ if (isCmpSel(RdxKind))
Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
else
Builder.SetInsertPoint(RdxRootInst);
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
- if (VectorizedTree) {
- Builder.SetCurrentDebugLocation(Loc);
- OperationData VectReductionData(ReductionData.getOpcode(),
- VectorizedTree, ReducedSubTree,
- ReductionData.getKind());
- VectorizedTree =
- VectReductionData.createOp(Builder, "op.rdx", ReductionOps);
- } else
+
+ if (!VectorizedTree) {
+ // Initialize the final value in the reduction.
VectorizedTree = ReducedSubTree;
+ } else {
+ // Update the final value in the reduction.
+ Builder.SetCurrentDebugLocation(Loc);
+ VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
+ ReducedSubTree, "op.rdx", ReductionOps);
+ }
i += ReduxWidth;
ReduxWidth = PowerOf2Floor(NumReducedVals - i);
}
@@ -6936,19 +6971,15 @@ public:
for (; i < NumReducedVals; ++i) {
auto *I = cast<Instruction>(ReducedVals[i]);
Builder.SetCurrentDebugLocation(I->getDebugLoc());
- OperationData VectReductionData(ReductionData.getOpcode(),
- VectorizedTree, I,
- ReductionData.getKind());
- VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps);
+ VectorizedTree =
+ createOp(Builder, RdxKind, VectorizedTree, I, "", ReductionOps);
}
for (auto &Pair : ExternallyUsedValues) {
// Add each externally used value to the final reduction.
for (auto *I : Pair.second) {
Builder.SetCurrentDebugLocation(I->getDebugLoc());
- OperationData VectReductionData(ReductionData.getOpcode(),
- VectorizedTree, Pair.first,
- ReductionData.getKind());
- VectorizedTree = VectReductionData.createOp(Builder, "op.extra", I);
+ VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
+ Pair.first, "op.extra", I);
}
}
@@ -6956,7 +6987,7 @@ public:
// select, we also have to RAUW for the compare instruction feeding the
// reduction root. That's because the original compare may have extra uses
// besides the final select of the reduction.
- if (ReductionData.isMinMax()) {
+ if (isCmpSel(RdxKind)) {
if (auto *VecSelect = dyn_cast<SelectInst>(VectorizedTree)) {
Instruction *ScalarCmp =
getCmpForMinMaxReduction(cast<Instruction>(ReductionRoot));
@@ -6972,77 +7003,68 @@ public:
return VectorizedTree != nullptr;
}
- unsigned numReductionValues() const {
- return ReducedVals.size();
- }
+ unsigned numReductionValues() const { return ReducedVals.size(); }
private:
/// Calculate the cost of a reduction.
- int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal,
- unsigned ReduxWidth) {
+ InstructionCost getReductionCost(TargetTransformInfo *TTI,
+ Value *FirstReducedVal,
+ unsigned ReduxWidth) {
Type *ScalarTy = FirstReducedVal->getType();
- auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth);
-
- int PairwiseRdxCost;
- int SplittingRdxCost;
- switch (ReductionData.getKind()) {
- case RK_Arithmetic:
- PairwiseRdxCost =
- TTI->getArithmeticReductionCost(ReductionData.getOpcode(), VecTy,
- /*IsPairwiseForm=*/true);
- SplittingRdxCost =
- TTI->getArithmeticReductionCost(ReductionData.getOpcode(), VecTy,
- /*IsPairwiseForm=*/false);
- break;
- case RK_Min:
- case RK_Max:
- case RK_UMin:
- case RK_UMax: {
- auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VecTy));
- bool IsUnsigned = ReductionData.getKind() == RK_UMin ||
- ReductionData.getKind() == RK_UMax;
- PairwiseRdxCost =
- TTI->getMinMaxReductionCost(VecTy, VecCondTy,
- /*IsPairwiseForm=*/true, IsUnsigned);
- SplittingRdxCost =
- TTI->getMinMaxReductionCost(VecTy, VecCondTy,
- /*IsPairwiseForm=*/false, IsUnsigned);
+ FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
+ InstructionCost VectorCost, ScalarCost;
+ switch (RdxKind) {
+ case RecurKind::Add:
+ case RecurKind::Mul:
+ case RecurKind::Or:
+ case RecurKind::And:
+ case RecurKind::Xor:
+ case RecurKind::FAdd:
+ case RecurKind::FMul: {
+ unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind);
+ VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy,
+ /*IsPairwiseForm=*/false);
+ ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
break;
}
- case RK_None:
- llvm_unreachable("Expected arithmetic or min/max reduction operation");
- }
-
- IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
- int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
-
- int ScalarReduxCost = 0;
- switch (ReductionData.getKind()) {
- case RK_Arithmetic:
- ScalarReduxCost =
- TTI->getArithmeticInstrCost(ReductionData.getOpcode(), ScalarTy);
+ case RecurKind::FMax:
+ case RecurKind::FMin: {
+ auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
+ VectorCost =
+ TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
+ /*pairwise=*/false, /*unsigned=*/false);
+ ScalarCost =
+ TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy) +
+ TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
+ CmpInst::makeCmpResultType(ScalarTy));
break;
- case RK_Min:
- case RK_Max:
- case RK_UMin:
- case RK_UMax:
- ScalarReduxCost =
- TTI->getCmpSelInstrCost(ReductionData.getOpcode(), ScalarTy) +
+ }
+ case RecurKind::SMax:
+ case RecurKind::SMin:
+ case RecurKind::UMax:
+ case RecurKind::UMin: {
+ auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
+ bool IsUnsigned =
+ RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
+ VectorCost =
+ TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
+ /*IsPairwiseForm=*/false, IsUnsigned);
+ ScalarCost =
+ TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy) +
TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
CmpInst::makeCmpResultType(ScalarTy));
break;
- case RK_None:
+ }
+ default:
llvm_unreachable("Expected arithmetic or min/max reduction operation");
}
- ScalarReduxCost *= (ReduxWidth - 1);
- LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
+ // Scalar cost is repeated for N-1 elements.
+ ScalarCost *= (ReduxWidth - 1);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VectorCost - ScalarCost
<< " for reduction that starts with " << *FirstReducedVal
- << " (It is a "
- << (IsPairwiseReduction ? "pairwise" : "splitting")
- << " reduction)\n");
-
- return VecReduxCost - ScalarReduxCost;
+ << " (It is a splitting reduction)\n");
+ return VectorCost - ScalarCost;
}
/// Emit a horizontal reduction of the vectorized value.
@@ -7052,92 +7074,142 @@ private:
assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
- if (!IsPairwiseReduction) {
- // FIXME: The builder should use an FMF guard. It should not be hard-coded
- // to 'fast'.
- assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF");
- return createSimpleTargetReduction(
- Builder, TTI, ReductionData.getOpcode(), VectorizedValue,
- ReductionData.getFlags(), ReductionOps.back());
- }
+ return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind,
+ ReductionOps.back());
+ }
+};
- Value *TmpVec = VectorizedValue;
- for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
- auto LeftMask = createRdxShuffleMask(ReduxWidth, i, true, true);
- auto RightMask = createRdxShuffleMask(ReduxWidth, i, true, false);
+} // end anonymous namespace
+
+static Optional<unsigned> getAggregateSize(Instruction *InsertInst) {
+ if (auto *IE = dyn_cast<InsertElementInst>(InsertInst))
+ return cast<FixedVectorType>(IE->getType())->getNumElements();
- Value *LeftShuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
- Value *RightShuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
- "rdx.shuf.r");
- OperationData VectReductionData(ReductionData.getOpcode(), LeftShuf,
- RightShuf, ReductionData.getKind());
- TmpVec = VectReductionData.createOp(Builder, "op.rdx", ReductionOps);
+ unsigned AggregateSize = 1;
+ auto *IV = cast<InsertValueInst>(InsertInst);
+ Type *CurrentType = IV->getType();
+ do {
+ if (auto *ST = dyn_cast<StructType>(CurrentType)) {
+ for (auto *Elt : ST->elements())
+ if (Elt != ST->getElementType(0)) // check homogeneity
+ return None;
+ AggregateSize *= ST->getNumElements();
+ CurrentType = ST->getElementType(0);
+ } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
+ AggregateSize *= AT->getNumElements();
+ CurrentType = AT->getElementType();
+ } else if (auto *VT = dyn_cast<FixedVectorType>(CurrentType)) {
+ AggregateSize *= VT->getNumElements();
+ return AggregateSize;
+ } else if (CurrentType->isSingleValueType()) {
+ return AggregateSize;
+ } else {
+ return None;
}
+ } while (true);
+}
- // The result is in the first element of the vector.
- return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+static Optional<unsigned> getOperandIndex(Instruction *InsertInst,
+ unsigned OperandOffset) {
+ unsigned OperandIndex = OperandOffset;
+ if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
+ if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
+ auto *VT = cast<FixedVectorType>(IE->getType());
+ OperandIndex *= VT->getNumElements();
+ OperandIndex += CI->getZExtValue();
+ return OperandIndex;
+ }
+ return None;
}
-};
-} // end anonymous namespace
+ auto *IV = cast<InsertValueInst>(InsertInst);
+ Type *CurrentType = IV->getType();
+ for (unsigned int Index : IV->indices()) {
+ if (auto *ST = dyn_cast<StructType>(CurrentType)) {
+ OperandIndex *= ST->getNumElements();
+ CurrentType = ST->getElementType(Index);
+ } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
+ OperandIndex *= AT->getNumElements();
+ CurrentType = AT->getElementType();
+ } else {
+ return None;
+ }
+ OperandIndex += Index;
+ }
+ return OperandIndex;
+}
+
+static bool findBuildAggregate_rec(Instruction *LastInsertInst,
+ TargetTransformInfo *TTI,
+ SmallVectorImpl<Value *> &BuildVectorOpds,
+ SmallVectorImpl<Value *> &InsertElts,
+ unsigned OperandOffset) {
+ do {
+ Value *InsertedOperand = LastInsertInst->getOperand(1);
+ Optional<unsigned> OperandIndex =
+ getOperandIndex(LastInsertInst, OperandOffset);
+ if (!OperandIndex)
+ return false;
+ if (isa<InsertElementInst>(InsertedOperand) ||
+ isa<InsertValueInst>(InsertedOperand)) {
+ if (!findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
+ BuildVectorOpds, InsertElts, *OperandIndex))
+ return false;
+ } else {
+ BuildVectorOpds[*OperandIndex] = InsertedOperand;
+ InsertElts[*OperandIndex] = LastInsertInst;
+ }
+ if (isa<UndefValue>(LastInsertInst->getOperand(0)))
+ return true;
+ LastInsertInst = dyn_cast<Instruction>(LastInsertInst->getOperand(0));
+ } while (LastInsertInst != nullptr &&
+ (isa<InsertValueInst>(LastInsertInst) ||
+ isa<InsertElementInst>(LastInsertInst)) &&
+ LastInsertInst->hasOneUse());
+ return false;
+}
/// Recognize construction of vectors like
-/// %ra = insertelement <4 x float> undef, float %s0, i32 0
+/// %ra = insertelement <4 x float> poison, float %s0, i32 0
/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
/// starting from the last insertelement or insertvalue instruction.
///
-/// Also recognize aggregates like {<2 x float>, <2 x float>},
+/// Also recognize homogeneous aggregates like {<2 x float>, <2 x float>},
/// {{float, float}, {float, float}}, [2 x {float, float}] and so on.
/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
///
/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
///
/// \return true if it matches.
-static bool findBuildAggregate(Value *LastInsertInst, TargetTransformInfo *TTI,
+static bool findBuildAggregate(Instruction *LastInsertInst,
+ TargetTransformInfo *TTI,
SmallVectorImpl<Value *> &BuildVectorOpds,
SmallVectorImpl<Value *> &InsertElts) {
+
assert((isa<InsertElementInst>(LastInsertInst) ||
isa<InsertValueInst>(LastInsertInst)) &&
"Expected insertelement or insertvalue instruction!");
- do {
- Value *InsertedOperand;
- auto *IE = dyn_cast<InsertElementInst>(LastInsertInst);
- if (IE) {
- InsertedOperand = IE->getOperand(1);
- LastInsertInst = IE->getOperand(0);
- } else {
- auto *IV = cast<InsertValueInst>(LastInsertInst);
- InsertedOperand = IV->getInsertedValueOperand();
- LastInsertInst = IV->getAggregateOperand();
- }
- if (isa<InsertElementInst>(InsertedOperand) ||
- isa<InsertValueInst>(InsertedOperand)) {
- SmallVector<Value *, 8> TmpBuildVectorOpds;
- SmallVector<Value *, 8> TmpInsertElts;
- if (!findBuildAggregate(InsertedOperand, TTI, TmpBuildVectorOpds,
- TmpInsertElts))
- return false;
- BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(),
- TmpBuildVectorOpds.rend());
- InsertElts.append(TmpInsertElts.rbegin(), TmpInsertElts.rend());
- } else {
- BuildVectorOpds.push_back(InsertedOperand);
- InsertElts.push_back(IE);
- }
- if (isa<UndefValue>(LastInsertInst))
- break;
- if ((!isa<InsertValueInst>(LastInsertInst) &&
- !isa<InsertElementInst>(LastInsertInst)) ||
- !LastInsertInst->hasOneUse())
- return false;
- } while (true);
- std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
- std::reverse(InsertElts.begin(), InsertElts.end());
- return true;
+
+ assert((BuildVectorOpds.empty() && InsertElts.empty()) &&
+ "Expected empty result vectors!");
+
+ Optional<unsigned> AggregateSize = getAggregateSize(LastInsertInst);
+ if (!AggregateSize)
+ return false;
+ BuildVectorOpds.resize(*AggregateSize);
+ InsertElts.resize(*AggregateSize);
+
+ if (findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts,
+ 0)) {
+ llvm::erase_value(BuildVectorOpds, nullptr);
+ llvm::erase_value(InsertElts, nullptr);
+ if (BuildVectorOpds.size() >= 2)
+ return true;
+ }
+
+ return false;
}
static bool PhiTypeSorterFunc(Value *V, Value *V2) {
@@ -7195,6 +7267,16 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
return nullptr;
}
+static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) {
+ if (match(I, m_BinOp(m_Value(V0), m_Value(V1))))
+ return true;
+ if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(V0), m_Value(V1))))
+ return true;
+ if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(V0), m_Value(V1))))
+ return true;
+ return false;
+}
+
/// Attempt to reduce a horizontal reduction.
/// If it is legal to match a horizontal reduction feeding the phi node \a P
/// with reduction operators \a Root (or one of its operands) in a basic block
@@ -7234,9 +7316,10 @@ static bool tryToVectorizeHorReductionOrInstOperands(
Instruction *Inst;
unsigned Level;
std::tie(Inst, Level) = Stack.pop_back_val();
- auto *BI = dyn_cast<BinaryOperator>(Inst);
- auto *SI = dyn_cast<SelectInst>(Inst);
- if (BI || SI) {
+ Value *B0, *B1;
+ bool IsBinop = matchRdxBop(Inst, B0, B1);
+ bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
+ if (IsBinop || IsSelect) {
HorizontalReduction HorRdx;
if (HorRdx.matchAssociativeReduction(P, Inst)) {
if (HorRdx.tryToReduce(R, TTI)) {
@@ -7247,10 +7330,10 @@ static bool tryToVectorizeHorReductionOrInstOperands(
continue;
}
}
- if (P && BI) {
- Inst = dyn_cast<Instruction>(BI->getOperand(0));
+ if (P && IsBinop) {
+ Inst = dyn_cast<Instruction>(B0);
if (Inst == P)
- Inst = dyn_cast<Instruction>(BI->getOperand(1));
+ Inst = dyn_cast<Instruction>(B1);
if (!Inst) {
// Set P to nullptr to avoid re-analysis of phi node in
// matchAssociativeReduction function unless this is the root node.
@@ -7283,9 +7366,7 @@ static bool tryToVectorizeHorReductionOrInstOperands(
bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
BasicBlock *BB, BoUpSLP &R,
TargetTransformInfo *TTI) {
- if (!V)
- return false;
- auto *I = dyn_cast<Instruction>(V);
+ auto *I = dyn_cast_or_null<Instruction>(V);
if (!I)
return false;
@@ -7307,8 +7388,7 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
SmallVector<Value *, 16> BuildVectorOpds;
SmallVector<Value *, 16> BuildVectorInsts;
- if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts) ||
- BuildVectorOpds.size() < 2)
+ if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts))
return false;
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
@@ -7323,7 +7403,6 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
SmallVector<Value *, 16> BuildVectorInsts;
SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
- BuildVectorOpds.size() < 2 ||
(llvm::all_of(BuildVectorOpds,
[](Value *V) { return isa<ExtractElementInst>(V); }) &&
isShuffle(BuildVectorOpds)))
@@ -7369,7 +7448,6 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
SmallPtrSet<Value *, 16> VisitedInstrs;
- unsigned MaxVecRegSize = R.getMaxVecRegSize();
bool HaveVectorizedPhiNodes = true;
while (HaveVectorizedPhiNodes) {
@@ -7396,27 +7474,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Look for the next elements with the same type.
SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
- Type *EltTy = (*IncIt)->getType();
-
- assert(EltTy->isSized() &&
- "Instructions should all be sized at this point");
- TypeSize EltTS = DL->getTypeSizeInBits(EltTy);
- if (EltTS.isScalable()) {
- // For now, just ignore vectorizing scalable types.
- ++IncIt;
- continue;
- }
-
- unsigned EltSize = EltTS.getFixedSize();
- unsigned MaxNumElts = MaxVecRegSize / EltSize;
- if (MaxNumElts < 2) {
- ++IncIt;
- continue;
- }
-
while (SameTypeIt != E &&
- (*SameTypeIt)->getType() == EltTy &&
- (SameTypeIt - IncIt) < MaxNumElts) {
+ (*SameTypeIt)->getType() == (*IncIt)->getType()) {
VisitedInstrs.insert(*SameTypeIt);
++SameTypeIt;
}
@@ -7448,12 +7507,17 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
SmallVector<Instruction *, 8> PostProcessInstructions;
SmallDenseSet<Instruction *, 4> KeyNodes;
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ // Skip instructions with scalable type. The num of elements is unknown at
+ // compile-time for scalable type.
+ if (isa<ScalableVectorType>(it->getType()))
+ continue;
+
// Skip instructions marked for the deletion.
if (R.isDeleted(&*it))
continue;
// We may go through BB multiple times so skip the one we have checked.
if (!VisitedInstrs.insert(&*it).second) {
- if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
+ if (it->use_empty() && KeyNodes.contains(&*it) &&
vectorizeSimpleInstructions(PostProcessInstructions, BB, R)) {
// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.
@@ -7470,16 +7534,29 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Try to vectorize reductions that use PHINodes.
if (PHINode *P = dyn_cast<PHINode>(it)) {
// Check that the PHI is a reduction PHI.
- if (P->getNumIncomingValues() != 2)
- return Changed;
+ if (P->getNumIncomingValues() == 2) {
+ // Try to match and vectorize a horizontal reduction.
+ if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
+ TTI)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+ }
+ // Try to vectorize the incoming values of the PHI, to catch reductions
+ // that feed into PHIs.
+ for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; I++) {
+ // Skip if the incoming block is the current BB for now. Also, bypass
+ // unreachable IR for efficiency and to avoid crashing.
+ // TODO: Collect the skipped incoming values and try to vectorize them
+ // after processing BB.
+ if (BB == P->getIncomingBlock(I) ||
+ !DT->isReachableFromEntry(P->getIncomingBlock(I)))
+ continue;
- // Try to match and vectorize a horizontal reduction.
- if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
- TTI)) {
- Changed = true;
- it = BB->begin();
- e = BB->end();
- continue;
+ Changed |= vectorizeRootInstruction(nullptr, P->getIncomingValue(I),
+ P->getIncomingBlock(I), R, TTI);
}
continue;
}
@@ -7543,7 +7620,7 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
unsigned MaxElts = MaxVecRegSize / EltSize;
for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += MaxElts) {
auto Len = std::min<unsigned>(BE - BI, MaxElts);
- auto GEPList = makeArrayRef(&Entry.second[BI], Len);
+ ArrayRef<GetElementPtrInst *> GEPList(&Entry.second[BI], Len);
// Initialize a set a candidate getelementptrs. Note that we use a
// SetVector here to preserve program order. If the index computations
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 6f055ca80ff2..873701676067 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -61,17 +61,19 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
- VPWidenMemoryInstructionRecipe *
- tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan);
+ VPRecipeBase *tryToWidenMemory(Instruction *I, VFRange &Range,
+ VPlanPtr &Plan);
/// Check if an induction recipe should be constructed for \I. If so build and
/// return it. If not, return null.
- VPWidenIntOrFpInductionRecipe *tryToOptimizeInductionPHI(PHINode *Phi) const;
+ VPWidenIntOrFpInductionRecipe *tryToOptimizeInductionPHI(PHINode *Phi,
+ VPlan &Plan) const;
/// Optimize the special case where the operand of \p I is a constant integer
/// induction variable.
VPWidenIntOrFpInductionRecipe *
- tryToOptimizeInductionTruncate(TruncInst *I, VFRange &Range) const;
+ tryToOptimizeInductionTruncate(TruncInst *I, VFRange &Range,
+ VPlan &Plan) const;
/// Handle non-loop phi nodes. Currently all such phi nodes are turned into
/// a sequence of select instructions as the vectorizer currently performs
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f5f28a3bffa1..b26399e0ae58 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -20,8 +20,10 @@
#include "VPlanDominatorTree.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -56,13 +58,69 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
return OS;
}
+VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
+ : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
+ if (Def)
+ Def->addDefinedValue(this);
+}
+
+VPValue::~VPValue() {
+ assert(Users.empty() && "trying to delete a VPValue with remaining users");
+ if (Def)
+ Def->removeDefinedValue(this);
+}
+
void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
- if (const VPInstruction *Instr = dyn_cast<VPInstruction>(this))
- Instr->print(OS, SlotTracker);
+ if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
+ R->print(OS, "", SlotTracker);
else
printAsOperand(OS, SlotTracker);
}
+void VPValue::dump() const {
+ const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
+ VPSlotTracker SlotTracker(
+ (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
+ print(dbgs(), SlotTracker);
+ dbgs() << "\n";
+}
+
+void VPDef::dump() const {
+ const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
+ VPSlotTracker SlotTracker(
+ (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
+ print(dbgs(), "", SlotTracker);
+ dbgs() << "\n";
+}
+
+VPUser *VPRecipeBase::toVPUser() {
+ if (auto *U = dyn_cast<VPInstruction>(this))
+ return U;
+ if (auto *U = dyn_cast<VPWidenRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPWidenCallRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPWidenSelectRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPWidenGEPRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPBlendRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPInterleaveRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPReplicateRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPBranchOnMaskRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPWidenMemoryInstructionRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPReductionRecipe>(this))
+ return U;
+ if (auto *U = dyn_cast<VPPredInstPHIRecipe>(this))
+ return U;
+ return nullptr;
+}
+
// Get the top-most entry block of \p Start. This is the entry block of the
// containing VPlan. This function is templated to support both const and non-const blocks
template <typename T> static T *getPlanEntry(T *Start) {
@@ -142,14 +200,43 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
}
void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
- SmallVector<VPBlockBase *, 8> Blocks;
- for (VPBlockBase *Block : depth_first(Entry))
- Blocks.push_back(Block);
+ SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
for (VPBlockBase *Block : Blocks)
delete Block;
}
+VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
+ iterator It = begin();
+ while (It != end() && (isa<VPWidenPHIRecipe>(&*It) ||
+ isa<VPWidenIntOrFpInductionRecipe>(&*It) ||
+ isa<VPPredInstPHIRecipe>(&*It) ||
+ isa<VPWidenCanonicalIVRecipe>(&*It)))
+ It++;
+ return It;
+}
+
+Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
+ if (!Def->getDef() && OrigLoop->isLoopInvariant(Def->getLiveInIRValue()))
+ return Def->getLiveInIRValue();
+
+ if (hasScalarValue(Def, Instance))
+ return Data.PerPartScalars[Def][Instance.Part][Instance.Lane];
+
+ if (hasVectorValue(Def, Instance.Part)) {
+ assert(Data.PerPartOutput.count(Def));
+ auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
+ if (!VecPart->getType()->isVectorTy()) {
+ assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar");
+ return VecPart;
+ }
+ // TODO: Cache created scalar values.
+ return Builder.CreateExtractElement(VecPart,
+ Builder.getInt32(Instance.Lane));
+ }
+ return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance);
+}
+
BasicBlock *
VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
// BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
@@ -267,6 +354,24 @@ void VPBasicBlock::execute(VPTransformState *State) {
LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
}
+void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
+ for (VPRecipeBase &R : Recipes) {
+ for (auto *Def : R.definedValues())
+ Def->replaceAllUsesWith(NewValue);
+
+ if (auto *User = R.toVPUser())
+ for (unsigned I = 0, E = User->getNumOperands(); I != E; I++)
+ User->setOperand(I, NewValue);
+ }
+}
+
+void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
+ for (VPBlockBase *Block : depth_first(Entry))
+ // Drop all references in VPBasicBlocks and replace all uses with
+ // DummyValue.
+ Block->dropAllReferences(NewValue);
+}
+
void VPRegionBlock::execute(VPTransformState *State) {
ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
@@ -300,7 +405,9 @@ void VPRegionBlock::execute(VPTransformState *State) {
for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
State->Instance->Part = Part;
- for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) {
+ assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
+ for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
+ ++Lane) {
State->Instance->Lane = Lane;
// Visit the VPBlocks connected to \p this, starting from it.
for (VPBlockBase *Block : RPOT) {
@@ -346,6 +453,14 @@ void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
insertAfter(InsertPos);
}
+void VPRecipeBase::moveBefore(VPBasicBlock &BB,
+ iplist<VPRecipeBase>::iterator I) {
+ assert(I == BB.end() || I->getParent() == &BB);
+ removeFromParent();
+ Parent = &BB;
+ BB.getRecipeList().insert(I, this);
+}
+
void VPInstruction::generateInstruction(VPTransformState &State,
unsigned Part) {
IRBuilder<> &Builder = State.Builder;
@@ -383,14 +498,14 @@ void VPInstruction::generateInstruction(VPTransformState &State,
case VPInstruction::ActiveLaneMask: {
// Get first lane of vector induction variable.
Value *VIVElem0 = State.get(getOperand(0), {Part, 0});
- // Get first lane of backedge-taken-count.
- Value *ScalarBTC = State.get(getOperand(1), {Part, 0});
+ // Get the original loop tripcount.
+ Value *ScalarTC = State.TripCount;
auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
- auto *PredTy = FixedVectorType::get(Int1Ty, State.VF);
+ auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue());
Instruction *Call = Builder.CreateIntrinsic(
- Intrinsic::get_active_lane_mask, {PredTy, ScalarBTC->getType()},
- {VIVElem0, ScalarBTC}, nullptr, "active.lane.mask");
+ Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
+ {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
State.set(this, Call, Part);
break;
}
@@ -405,18 +520,15 @@ void VPInstruction::execute(VPTransformState &State) {
generateInstruction(State, Part);
}
-void VPInstruction::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << "\"EMIT ";
- print(O, SlotTracker);
-}
-
-void VPInstruction::print(raw_ostream &O) const {
+void VPInstruction::dump() const {
VPSlotTracker SlotTracker(getParent()->getPlan());
- print(O, SlotTracker);
+ print(dbgs(), "", SlotTracker);
}
-void VPInstruction::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
+void VPInstruction::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << "EMIT ";
+
if (hasResult()) {
printAsOperand(O, SlotTracker);
O << " = ";
@@ -461,7 +573,7 @@ void VPlan::execute(VPTransformState *State) {
"trip.count.minus.1");
auto VF = State->VF;
Value *VTCMO =
- VF == 1 ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
+ VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part)
State->set(BackedgeTakenCount, VTCMO, Part);
}
@@ -666,7 +778,7 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
// Dump the block predicate.
const VPValue *Pred = BasicBlock->getPredicate();
if (Pred) {
- OS << " +\n" << Indent << " \"BlockPredicate: ";
+ OS << " +\n" << Indent << " \"BlockPredicate: \"";
if (const VPInstruction *PredI = dyn_cast<VPInstruction>(Pred)) {
PredI->printAsOperand(OS, SlotTracker);
OS << " (" << DOT::EscapeString(PredI->getParent()->getName())
@@ -676,7 +788,7 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
}
for (const VPRecipeBase &Recipe : *BasicBlock) {
- OS << " +\n" << Indent;
+ OS << " +\n" << Indent << "\"";
Recipe.print(OS, Indent, SlotTracker);
OS << "\\l\"";
}
@@ -715,7 +827,7 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
dumpEdges(Region);
}
-void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) {
+void VPlanPrinter::printAsIngredient(raw_ostream &O, const Value *V) {
std::string IngredientString;
raw_string_ostream RSO(IngredientString);
if (auto *Inst = dyn_cast<Instruction>(V)) {
@@ -738,24 +850,45 @@ void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) {
void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"WIDEN-CALL " << VPlanIngredient(&Ingredient);
+ O << "WIDEN-CALL ";
+
+ auto *CI = cast<CallInst>(getUnderlyingInstr());
+ if (CI->getType()->isVoidTy())
+ O << "void ";
+ else {
+ printAsOperand(O, SlotTracker);
+ O << " = ";
+ }
+
+ O << "call @" << CI->getCalledFunction()->getName() << "(";
+ printOperands(O, SlotTracker);
+ O << ")";
}
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"WIDEN-SELECT" << VPlanIngredient(&Ingredient)
- << (InvariantCond ? " (condition is loop invariant)" : "");
+ O << "WIDEN-SELECT ";
+ printAsOperand(O, SlotTracker);
+ O << " = select ";
+ getOperand(0)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(1)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(2)->printAsOperand(O, SlotTracker);
+ O << (InvariantCond ? " (condition is loop invariant)" : "");
}
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"WIDEN\\l\"";
- O << "\" " << VPlanIngredient(&Ingredient);
+ O << "WIDEN ";
+ printAsOperand(O, SlotTracker);
+ O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
+ printOperands(O, SlotTracker);
}
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"WIDEN-INDUCTION";
+ O << "WIDEN-INDUCTION";
if (Trunc) {
O << "\\l\"";
O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
@@ -766,23 +899,26 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"WIDEN-GEP ";
+ O << "WIDEN-GEP ";
O << (IsPtrLoopInvariant ? "Inv" : "Var");
size_t IndicesNumber = IsIndexLoopInvariant.size();
for (size_t I = 0; I < IndicesNumber; ++I)
O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
- O << "\\l\"";
- O << " +\n" << Indent << "\" " << VPlanIngredient(GEP);
+
+ O << " ";
+ printAsOperand(O, SlotTracker);
+ O << " = getelementptr ";
+ printOperands(O, SlotTracker);
}
void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"WIDEN-PHI " << VPlanIngredient(Phi);
+ O << "WIDEN-PHI " << VPlanIngredient(Phi);
}
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"BLEND ";
+ O << "BLEND ";
Phi->printAsOperand(O, false);
O << " =";
if (getNumIncomingValues() == 1) {
@@ -800,46 +936,75 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
}
}
+void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << "REDUCE ";
+ printAsOperand(O, SlotTracker);
+ O << " = ";
+ getChainOp()->printAsOperand(O, SlotTracker);
+ O << " + reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode())
+ << " (";
+ getVecOp()->printAsOperand(O, SlotTracker);
+ if (getCondOp()) {
+ O << ", ";
+ getCondOp()->printAsOperand(O, SlotTracker);
+ }
+ O << ")";
+}
+
void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"" << (IsUniform ? "CLONE " : "REPLICATE ")
- << VPlanIngredient(Ingredient);
+ O << (IsUniform ? "CLONE " : "REPLICATE ");
+
+ if (!getUnderlyingInstr()->getType()->isVoidTy()) {
+ printAsOperand(O, SlotTracker);
+ O << " = ";
+ }
+ O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
+ printOperands(O, SlotTracker);
+
if (AlsoPack)
O << " (S->V)";
}
void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst);
+ O << "PHI-PREDICATED-INSTRUCTION ";
+ printOperands(O, SlotTracker);
}
void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"WIDEN " << VPlanIngredient(&Instr);
- O << ", ";
- getAddr()->printAsOperand(O, SlotTracker);
- VPValue *Mask = getMask();
- if (Mask) {
- O << ", ";
- Mask->printAsOperand(O, SlotTracker);
+ O << "WIDEN ";
+
+ if (!isStore()) {
+ getVPValue()->printAsOperand(O, SlotTracker);
+ O << " = ";
}
+ O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
+
+ printOperands(O, SlotTracker);
}
void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
Value *CanonicalIV = State.CanonicalIV;
Type *STy = CanonicalIV->getType();
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
- auto VF = State.VF;
- Value *VStart = VF == 1
+ ElementCount VF = State.VF;
+ assert(!VF.isScalable() && "the code following assumes non scalables ECs");
+ Value *VStart = VF.isScalar()
? CanonicalIV
- : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
+ : Builder.CreateVectorSplat(VF.getKnownMinValue(),
+ CanonicalIV, "broadcast");
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
SmallVector<Constant *, 8> Indices;
- for (unsigned Lane = 0; Lane < VF; ++Lane)
- Indices.push_back(ConstantInt::get(STy, Part * VF + Lane));
+ for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
+ Indices.push_back(
+ ConstantInt::get(STy, Part * VF.getKnownMinValue() + Lane));
// If VF == 1, there is only one iteration in the loop above, thus the
// element pushed back into Indices is ConstantInt::get(STy, Part)
- Constant *VStep = VF == 1 ? Indices.back() : ConstantVector::get(Indices);
+ Constant *VStep =
+ VF.isScalar() ? Indices.back() : ConstantVector::get(Indices);
// Add the consecutive indices to the vector value.
Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
State.set(getVPValue(), CanonicalVectorIV, Part);
@@ -848,7 +1013,7 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << "\"EMIT ";
+ O << "EMIT ";
getVPValue()->printAsOperand(O, SlotTracker);
O << " = WIDEN-CANONICAL-INDUCTION";
}
@@ -856,10 +1021,18 @@ void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
void VPValue::replaceAllUsesWith(VPValue *New) {
- for (VPUser *User : users())
+ for (unsigned J = 0; J < getNumUsers();) {
+ VPUser *User = Users[J];
+ unsigned NumUsers = getNumUsers();
for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
if (User->getOperand(I) == this)
User->setOperand(I, New);
+ // If a user got removed after updating the current user, the next user to
+ // update will be moved to the current position, so we only need to
+ // increment the index if the number of users did not change.
+ if (NumUsers == getNumUsers())
+ J++;
+ }
}
void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
@@ -877,6 +1050,12 @@ void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
OS << "vp<%" << Tracker.getSlot(this) << ">";
}
+void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
+ interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
+ Op->printAsOperand(O, SlotTracker);
+ });
+}
+
void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
Old2NewTy &Old2New,
InterleavedAccessInfo &IAI) {
@@ -925,13 +1104,6 @@ VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
void VPSlotTracker::assignSlot(const VPValue *V) {
assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
- const Value *UV = V->getUnderlyingValue();
- if (UV)
- return;
- const auto *VPI = dyn_cast<VPInstruction>(V);
- if (VPI && !VPI->hasResult())
- return;
-
Slots[V] = NextSlot++;
}
@@ -950,10 +1122,8 @@ void VPSlotTracker::assignSlots(const VPRegionBlock *Region) {
void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) {
for (const VPRecipeBase &Recipe : *VPBB) {
- if (const auto *VPI = dyn_cast<VPInstruction>(&Recipe))
- assignSlot(VPI);
- else if (const auto *VPIV = dyn_cast<VPWidenCanonicalIVRecipe>(&Recipe))
- assignSlot(VPIV->getVPValue());
+ for (VPValue *Def : Recipe.definedValues())
+ assignSlot(Def);
}
}
@@ -962,10 +1132,6 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) {
for (const VPValue *V : Plan.VPExternalDefs)
assignSlot(V);
- for (auto &E : Plan.Value2VPValue)
- if (!isa<VPInstruction>(E.second))
- assignSlot(E.second);
-
for (const VPValue *V : Plan.VPCBVs)
assignSlot(V);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
index f07c94e7a3c7..2cce127cd4ce 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -51,13 +51,12 @@ namespace llvm {
class BasicBlock;
class DominatorTree;
class InnerLoopVectorizer;
-template <class T> class InterleaveGroup;
class LoopInfo;
class raw_ostream;
+class RecurrenceDescriptor;
class Value;
class VPBasicBlock;
class VPRegionBlock;
-class VPSlotTracker;
class VPlan;
class VPlanSlp;
@@ -66,10 +65,22 @@ class VPlanSlp;
/// [1, 9) = {1, 2, 4, 8}
struct VFRange {
// A power of 2.
- const unsigned Start;
+ const ElementCount Start;
// Need not be a power of 2. If End <= Start range is empty.
- unsigned End;
+ ElementCount End;
+
+ bool isEmpty() const {
+ return End.getKnownMinValue() <= Start.getKnownMinValue();
+ }
+
+ VFRange(const ElementCount &Start, const ElementCount &End)
+ : Start(Start), End(End) {
+ assert(Start.isScalable() == End.isScalable() &&
+ "Both Start and End should have the same scalable flag");
+ assert(isPowerOf2_32(Start.getKnownMinValue()) &&
+ "Expected Start to be a power of 2");
+ }
};
using VPlanPtr = std::unique_ptr<VPlan>;
@@ -114,7 +125,7 @@ private:
/// The vectorization factor. Each entry in the scalar map contains UF x VF
/// scalar values.
- unsigned VF;
+ ElementCount VF;
/// The vector and scalar map storage. We use std::map and not DenseMap
/// because insertions to DenseMap invalidate its iterators.
@@ -125,7 +136,7 @@ private:
public:
/// Construct an empty map with the given unroll and vectorization factors.
- VectorizerValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {}
+ VectorizerValueMap(unsigned UF, ElementCount VF) : UF(UF), VF(VF) {}
/// \return True if the map has any vector entry for \p Key.
bool hasAnyVectorValue(Value *Key) const {
@@ -150,12 +161,14 @@ public:
/// \return True if the map has a scalar entry for \p Key and \p Instance.
bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
assert(Instance.Part < UF && "Queried Scalar Part is too large.");
- assert(Instance.Lane < VF && "Queried Scalar Lane is too large.");
+ assert(Instance.Lane < VF.getKnownMinValue() &&
+ "Queried Scalar Lane is too large.");
+
if (!hasAnyScalarValue(Key))
return false;
const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
- assert(Entry[Instance.Part].size() == VF &&
+ assert(Entry[Instance.Part].size() == VF.getKnownMinValue() &&
"ScalarParts has wrong dimensions.");
return Entry[Instance.Part][Instance.Lane] != nullptr;
}
@@ -194,7 +207,7 @@ public:
// TODO: Consider storing uniform values only per-part, as they occupy
// lane 0 only, keeping the other VF-1 redundant entries null.
for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part].resize(VF, nullptr);
+ Entry[Part].resize(VF.getKnownMinValue(), nullptr);
ScalarMapStorage[Key] = Entry;
}
ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
@@ -233,14 +246,15 @@ struct VPCallback {
/// VPTransformState holds information passed down when "executing" a VPlan,
/// needed for generating the output IR.
struct VPTransformState {
- VPTransformState(unsigned VF, unsigned UF, LoopInfo *LI, DominatorTree *DT,
- IRBuilder<> &Builder, VectorizerValueMap &ValueMap,
- InnerLoopVectorizer *ILV, VPCallback &Callback)
- : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
- ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
+ VPTransformState(ElementCount VF, unsigned UF, Loop *OrigLoop, LoopInfo *LI,
+ DominatorTree *DT, IRBuilder<> &Builder,
+ VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV,
+ VPCallback &Callback)
+ : VF(VF), UF(UF), Instance(), OrigLoop(OrigLoop), LI(LI), DT(DT),
+ Builder(Builder), ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
- unsigned VF;
+ ElementCount VF;
unsigned UF;
/// Hold the indices to generate specific scalar instructions. Null indicates
@@ -255,6 +269,9 @@ struct VPTransformState {
typedef SmallVector<Value *, 2> PerPartValuesTy;
DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
+
+ using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;
+ DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
} Data;
/// Get the generated Value for a given VPValue and a given Part. Note that
@@ -271,20 +288,21 @@ struct VPTransformState {
}
/// Get the generated Value for a given VPValue and given Part and Lane.
- Value *get(VPValue *Def, const VPIteration &Instance) {
- // If the Def is managed directly by VPTransformState, extract the lane from
- // the relevant part. Note that currently only VPInstructions and external
- // defs are managed by VPTransformState. Other Defs are still created by ILV
- // and managed in its ValueMap. For those this method currently just
- // delegates the call to ILV below.
- if (Data.PerPartOutput.count(Def)) {
- auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
- // TODO: Cache created scalar values.
- return Builder.CreateExtractElement(VecPart,
- Builder.getInt32(Instance.Lane));
- }
+ Value *get(VPValue *Def, const VPIteration &Instance);
+
+ bool hasVectorValue(VPValue *Def, unsigned Part) {
+ auto I = Data.PerPartOutput.find(Def);
+ return I != Data.PerPartOutput.end() && Part < I->second.size() &&
+ I->second[Part];
+ }
- return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance);
+ bool hasScalarValue(VPValue *Def, VPIteration Instance) {
+ auto I = Data.PerPartScalars.find(Def);
+ if (I == Data.PerPartScalars.end())
+ return false;
+ return Instance.Part < I->second.size() &&
+ Instance.Lane < I->second[Instance.Part].size() &&
+ I->second[Instance.Part][Instance.Lane];
}
/// Set the generated Value for a given VPValue and a given Part.
@@ -295,6 +313,18 @@ struct VPTransformState {
}
Data.PerPartOutput[Def][Part] = V;
}
+ void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
+
+ void set(VPValue *Def, Value *V, const VPIteration &Instance) {
+ auto Iter = Data.PerPartScalars.insert({Def, {}});
+ auto &PerPartVec = Iter.first->second;
+ while (PerPartVec.size() <= Instance.Part)
+ PerPartVec.emplace_back();
+ auto &Scalars = PerPartVec[Instance.Part];
+ while (Scalars.size() <= Instance.Lane)
+ Scalars.push_back(nullptr);
+ Scalars[Instance.Lane] = V;
+ }
/// Hold state information used when constructing the CFG of the output IR,
/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
@@ -321,6 +351,9 @@ struct VPTransformState {
CFGState() = default;
} CFG;
+ /// Hold a pointer to the original loop.
+ Loop *OrigLoop;
+
/// Hold a pointer to LoopInfo to register new basic blocks in the loop.
LoopInfo *LI;
@@ -394,14 +427,14 @@ class VPBlockBase {
/// Remove \p Predecessor from the predecessors of this block.
void removePredecessor(VPBlockBase *Predecessor) {
- auto Pos = std::find(Predecessors.begin(), Predecessors.end(), Predecessor);
+ auto Pos = find(Predecessors, Predecessor);
assert(Pos && "Predecessor does not exist");
Predecessors.erase(Pos);
}
/// Remove \p Successor from the successors of this block.
void removeSuccessor(VPBlockBase *Successor) {
- auto Pos = std::find(Successors.begin(), Successors.end(), Successor);
+ auto Pos = find(Successors, Successor);
assert(Pos && "Successor does not exist");
Successors.erase(Pos);
}
@@ -594,49 +627,30 @@ public:
// hoisted into a VPBlockBase.
return true;
}
+
+ /// Replace all operands of VPUsers in the block with \p NewValue and also
+ /// replaces all uses of VPValues defined in the block with NewValue.
+ virtual void dropAllReferences(VPValue *NewValue) = 0;
};
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
-/// instructions.
-class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
+/// instructions. VPRecipeBase owns the the VPValues it defines through VPDef
+/// and is responsible for deleting its defined values. Single-value
+/// VPRecipeBases that also inherit from VPValue must make sure to inherit from
+/// VPRecipeBase before VPValue.
+class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
+ public VPDef {
friend VPBasicBlock;
friend class VPBlockUtils;
- const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
/// Each VPRecipe belongs to a single VPBasicBlock.
VPBasicBlock *Parent = nullptr;
public:
- /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
- /// that is actually instantiated. Values of this enumeration are kept in the
- /// SubclassID field of the VPRecipeBase objects. They are used for concrete
- /// type identification.
- using VPRecipeTy = enum {
- VPBlendSC,
- VPBranchOnMaskSC,
- VPInstructionSC,
- VPInterleaveSC,
- VPPredInstPHISC,
- VPReplicateSC,
- VPWidenCallSC,
- VPWidenCanonicalIVSC,
- VPWidenGEPSC,
- VPWidenIntOrFpInductionSC,
- VPWidenMemoryInstructionSC,
- VPWidenPHISC,
- VPWidenSC,
- VPWidenSelectSC
- };
-
- VPRecipeBase(const unsigned char SC) : SubclassID(SC) {}
+ VPRecipeBase(const unsigned char SC) : VPDef(SC) {}
virtual ~VPRecipeBase() = default;
- /// \return an ID for the concrete type of this object.
- /// This is used to implement the classof checks. This should not be used
- /// for any other purpose, as the values may change as LLVM evolves.
- unsigned getVPRecipeID() const { return SubclassID; }
-
/// \return the VPBasicBlock which this VPRecipe belongs to.
VPBasicBlock *getParent() { return Parent; }
const VPBasicBlock *getParent() const { return Parent; }
@@ -645,10 +659,6 @@ public:
/// this VPRecipe, thereby "executing" the VPlan.
virtual void execute(struct VPTransformState &State) = 0;
- /// Each recipe prints itself.
- virtual void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const = 0;
-
/// Insert an unlinked recipe into a basic block immediately before
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
@@ -661,6 +671,11 @@ public:
/// the VPBasicBlock that MovePos lives in, right after MovePos.
void moveAfter(VPRecipeBase *MovePos);
+ /// Unlink this recipe and insert into BB before I.
+ ///
+ /// \pre I is a valid iterator into BB.
+ void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
+
/// This method unlinks 'this' from the containing basic block, but does not
/// delete it.
void removeFromParent();
@@ -669,13 +684,46 @@ public:
///
/// \returns an iterator pointing to the element after the erased one
iplist<VPRecipeBase>::iterator eraseFromParent();
+
+ /// Returns a pointer to a VPUser, if the recipe inherits from VPUser or
+ /// nullptr otherwise.
+ VPUser *toVPUser();
+
+ /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
+ /// otherwise.
+ Instruction *getUnderlyingInstr() {
+ return cast<Instruction>(getVPValue()->getUnderlyingValue());
+ }
+ const Instruction *getUnderlyingInstr() const {
+ return cast<Instruction>(getVPValue()->getUnderlyingValue());
+ }
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPDef *D) {
+ // All VPDefs are also VPRecipeBases.
+ return true;
+ }
};
+inline bool VPUser::classof(const VPDef *Def) {
+ return Def->getVPDefID() == VPRecipeBase::VPInstructionSC ||
+ Def->getVPDefID() == VPRecipeBase::VPWidenSC ||
+ Def->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
+ Def->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
+ Def->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
+ Def->getVPDefID() == VPRecipeBase::VPBlendSC ||
+ Def->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
+ Def->getVPDefID() == VPRecipeBase::VPReplicateSC ||
+ Def->getVPDefID() == VPRecipeBase::VPReductionSC ||
+ Def->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC ||
+ Def->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC;
+}
+
/// This is a concrete Recipe that models a single VPlan-level instruction.
/// While as any Recipe it may generate a sequence of IR instructions when
/// executed, these instructions would always form a single-def expression as
/// the VPInstruction is also a single def-use vertex.
-class VPInstruction : public VPUser, public VPRecipeBase {
+class VPInstruction : public VPRecipeBase, public VPUser, public VPValue {
friend class VPlanSlp;
public:
@@ -697,23 +745,26 @@ private:
void generateInstruction(VPTransformState &State, unsigned Part);
protected:
- Instruction *getUnderlyingInstr() {
- return cast_or_null<Instruction>(getUnderlyingValue());
- }
-
void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
public:
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
- : VPUser(VPValue::VPInstructionSC, Operands),
- VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {}
+ : VPRecipeBase(VPRecipeBase::VPInstructionSC), VPUser(Operands),
+ VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {}
+
+ VPInstruction(unsigned Opcode, ArrayRef<VPInstruction *> Operands)
+ : VPRecipeBase(VPRecipeBase::VPInstructionSC), VPUser({}),
+ VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {
+ for (auto *I : Operands)
+ addOperand(I->getVPValue());
+ }
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
: VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPValue *V) {
- return V->getVPValueID() == VPValue::VPInstructionSC;
+ return V->getVPValueID() == VPValue::VPVInstructionSC;
}
VPInstruction *clone() const {
@@ -722,8 +773,8 @@ public:
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *R) {
- return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
+ static inline bool classof(const VPDef *R) {
+ return R->getVPDefID() == VPRecipeBase::VPInstructionSC;
}
unsigned getOpcode() const { return Opcode; }
@@ -733,13 +784,12 @@ public:
/// provided.
void execute(VPTransformState &State) override;
- /// Print the Recipe.
+ /// Print the VPInstruction to \p O.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
- /// Print the VPInstruction.
- void print(raw_ostream &O) const;
- void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
+ /// Print the VPInstruction to dbgs() (for debugging).
+ void dump() const;
/// Return true if this instruction may modify memory.
bool mayWriteToMemory() const {
@@ -773,23 +823,21 @@ public:
/// VPWidenRecipe is a recipe for producing a copy of vector type its
/// ingredient. This recipe covers most of the traditional vectorization cases
/// where each ingredient transforms into a vectorized version of itself.
-class VPWidenRecipe : public VPRecipeBase {
- /// Hold the instruction to be widened.
- Instruction &Ingredient;
-
- /// Hold VPValues for the operands of the ingredient.
- VPUser User;
-
+class VPWidenRecipe : public VPRecipeBase, public VPValue, public VPUser {
public:
template <typename IterT>
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
- : VPRecipeBase(VPWidenSC), Ingredient(I), User(Operands) {}
+ : VPRecipeBase(VPRecipeBase::VPWidenSC),
+ VPValue(VPValue::VPVWidenSC, &I, this), VPUser(Operands) {}
~VPWidenRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPWidenSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPWidenSC;
+ }
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVWidenSC;
}
/// Produce widened copies of all Ingredients.
@@ -801,23 +849,19 @@ public:
};
/// A recipe for widening Call instructions.
-class VPWidenCallRecipe : public VPRecipeBase {
- /// Hold the call to be widened.
- CallInst &Ingredient;
-
- /// Hold VPValues for the arguments of the call.
- VPUser User;
+class VPWidenCallRecipe : public VPRecipeBase, public VPUser, public VPValue {
public:
template <typename IterT>
VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments)
- : VPRecipeBase(VPWidenCallSC), Ingredient(I), User(CallArguments) {}
+ : VPRecipeBase(VPRecipeBase::VPWidenCallSC), VPUser(CallArguments),
+ VPValue(VPValue::VPVWidenCallSC, &I, this) {}
~VPWidenCallRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPWidenCallSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPWidenCallSC;
}
/// Produce a widened version of the call instruction.
@@ -829,13 +873,7 @@ public:
};
/// A recipe for widening select instructions.
-class VPWidenSelectRecipe : public VPRecipeBase {
-private:
- /// Hold the select to be widened.
- SelectInst &Ingredient;
-
- /// Hold VPValues for the operands of the select.
- VPUser User;
+class VPWidenSelectRecipe : public VPRecipeBase, public VPUser, public VPValue {
/// Is the condition of the select loop invariant?
bool InvariantCond;
@@ -844,14 +882,15 @@ public:
template <typename IterT>
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands,
bool InvariantCond)
- : VPRecipeBase(VPWidenSelectSC), Ingredient(I), User(Operands),
+ : VPRecipeBase(VPRecipeBase::VPWidenSelectSC), VPUser(Operands),
+ VPValue(VPValue::VPVWidenSelectSC, &I, this),
InvariantCond(InvariantCond) {}
~VPWidenSelectRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPWidenSelectSC;
}
/// Produce a widened version of the select instruction.
@@ -863,20 +902,24 @@ public:
};
/// A recipe for handling GEP instructions.
-class VPWidenGEPRecipe : public VPRecipeBase {
- GetElementPtrInst *GEP;
-
- /// Hold VPValues for the base and indices of the GEP.
- VPUser User;
-
+class VPWidenGEPRecipe : public VPRecipeBase,
+ public VPUser,
+ public VPValue {
bool IsPtrLoopInvariant;
SmallBitVector IsIndexLoopInvariant;
public:
template <typename IterT>
+ VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
+ : VPRecipeBase(VPRecipeBase::VPWidenGEPSC), VPUser(Operands),
+ VPValue(VPWidenGEPSC, GEP, this),
+ IsIndexLoopInvariant(GEP->getNumIndices(), false) {}
+
+ template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands,
Loop *OrigLoop)
- : VPRecipeBase(VPWidenGEPSC), GEP(GEP), User(Operands),
+ : VPRecipeBase(VPRecipeBase::VPWidenGEPSC), VPUser(Operands),
+ VPValue(VPValue::VPVWidenGEPSC, GEP, this),
IsIndexLoopInvariant(GEP->getNumIndices(), false) {
IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
for (auto Index : enumerate(GEP->indices()))
@@ -886,8 +929,8 @@ public:
~VPWidenGEPRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPWidenGEPSC;
}
/// Generate the gep nodes.
@@ -900,18 +943,25 @@ public:
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their vector and scalar values.
-class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
+class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPUser {
PHINode *IV;
TruncInst *Trunc;
public:
- VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr)
- : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {}
+ VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
+ TruncInst *Trunc = nullptr)
+ : VPRecipeBase(VPWidenIntOrFpInductionSC), VPUser({Start}), IV(IV),
+ Trunc(Trunc) {
+ if (Trunc)
+ new VPValue(Trunc, this);
+ else
+ new VPValue(IV, this);
+ }
~VPWidenIntOrFpInductionRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC;
}
/// Generate the vectorized and scalarized versions of the phi node as
@@ -921,19 +971,38 @@ public:
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+
+ /// Returns the start value of the induction.
+ VPValue *getStartValue() { return getOperand(0); }
};
/// A recipe for handling all phi nodes except for integer and FP inductions.
-class VPWidenPHIRecipe : public VPRecipeBase {
+/// For reduction PHIs, RdxDesc must point to the corresponding recurrence
+/// descriptor and the start value is the first operand of the recipe.
+class VPWidenPHIRecipe : public VPRecipeBase, public VPUser {
PHINode *Phi;
+ /// Descriptor for a reduction PHI.
+ RecurrenceDescriptor *RdxDesc = nullptr;
+
public:
- VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {}
+ /// Create a new VPWidenPHIRecipe for the reduction \p Phi described by \p
+ /// RdxDesc.
+ VPWidenPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc, VPValue &Start)
+ : VPWidenPHIRecipe(Phi) {
+ this->RdxDesc = &RdxDesc;
+ addOperand(&Start);
+ }
+
+ /// Create a VPWidenPHIRecipe for \p Phi
+ VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {
+ new VPValue(Phi, this);
+ }
~VPWidenPHIRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPWidenPHISC;
}
/// Generate the phi/select nodes.
@@ -942,21 +1011,25 @@ public:
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+
+ /// Returns the start value of the phi, if it is a reduction.
+ VPValue *getStartValue() {
+ return getNumOperands() == 0 ? nullptr : getOperand(0);
+ }
};
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
/// instructions.
-class VPBlendRecipe : public VPRecipeBase {
+class VPBlendRecipe : public VPRecipeBase, public VPUser {
PHINode *Phi;
+public:
/// The blend operation is a User of the incoming values and of their
/// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
/// might be incoming with a full mask for which there is no VPValue.
- VPUser User;
-
-public:
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
- : VPRecipeBase(VPBlendSC), Phi(Phi), User(Operands) {
+ : VPRecipeBase(VPBlendSC), VPUser(Operands), Phi(Phi) {
+ new VPValue(Phi, this);
assert(Operands.size() > 0 &&
((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
"Expected either a single incoming value or a positive even number "
@@ -964,23 +1037,19 @@ public:
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPBlendSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPBlendSC;
}
/// Return the number of incoming values, taking into account that a single
/// incoming value has no mask.
- unsigned getNumIncomingValues() const {
- return (User.getNumOperands() + 1) / 2;
- }
+ unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
/// Return incoming value number \p Idx.
- VPValue *getIncomingValue(unsigned Idx) const {
- return User.getOperand(Idx * 2);
- }
+ VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
/// Return mask number \p Idx.
- VPValue *getMask(unsigned Idx) const { return User.getOperand(Idx * 2 + 1); }
+ VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;
@@ -991,35 +1060,58 @@ public:
};
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
-/// or stores into one wide load/store and shuffles.
-class VPInterleaveRecipe : public VPRecipeBase {
+/// or stores into one wide load/store and shuffles. The first operand of a
+/// VPInterleave recipe is the address, followed by the stored values, followed
+/// by an optional mask.
+class VPInterleaveRecipe : public VPRecipeBase, public VPUser {
const InterleaveGroup<Instruction> *IG;
- VPUser User;
+
+ bool HasMask = false;
public:
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
- VPValue *Mask)
- : VPRecipeBase(VPInterleaveSC), IG(IG), User({Addr}) {
- if (Mask)
- User.addOperand(Mask);
+ ArrayRef<VPValue *> StoredValues, VPValue *Mask)
+ : VPRecipeBase(VPInterleaveSC), VPUser(Addr), IG(IG) {
+ for (unsigned i = 0; i < IG->getFactor(); ++i)
+ if (Instruction *I = IG->getMember(i)) {
+ if (I->getType()->isVoidTy())
+ continue;
+ new VPValue(I, this);
+ }
+
+ for (auto *SV : StoredValues)
+ addOperand(SV);
+ if (Mask) {
+ HasMask = true;
+ addOperand(Mask);
+ }
}
~VPInterleaveRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPInterleaveSC;
}
/// Return the address accessed by this recipe.
VPValue *getAddr() const {
- return User.getOperand(0); // Address is the 1st, mandatory operand.
+ return getOperand(0); // Address is the 1st, mandatory operand.
}
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
VPValue *getMask() const {
// Mask is optional and therefore the last, currently 2nd operand.
- return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
+ return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
+ }
+
+ /// Return the VPValues stored by this interleave group. If it is a load
+ /// interleave group, return an empty ArrayRef.
+ ArrayRef<VPValue *> getStoredValues() const {
+ // The first operand is the address, followed by the stored values, followed
+ // by an optional mask.
+ return ArrayRef<VPValue *>(op_begin(), getNumOperands())
+ .slice(1, getNumOperands() - (HasMask ? 2 : 1));
}
/// Generate the wide load or store, and shuffles.
@@ -1032,17 +1124,61 @@ public:
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
};
+/// A recipe to represent inloop reduction operations, performing a reduction on
+/// a vector operand into a scalar value, and adding the result to a chain.
+/// The Operands are {ChainOp, VecOp, [Condition]}.
+class VPReductionRecipe : public VPRecipeBase, public VPUser, public VPValue {
+ /// The recurrence decriptor for the reduction in question.
+ RecurrenceDescriptor *RdxDesc;
+ /// Fast math flags to use for the resulting reduction operation.
+ bool NoNaN;
+ /// Pointer to the TTI, needed to create the target reduction
+ const TargetTransformInfo *TTI;
+
+public:
+ VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp,
+ VPValue *VecOp, VPValue *CondOp, bool NoNaN,
+ const TargetTransformInfo *TTI)
+ : VPRecipeBase(VPRecipeBase::VPReductionSC), VPUser({ChainOp, VecOp}),
+ VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), NoNaN(NoNaN),
+ TTI(TTI) {
+ if (CondOp)
+ addOperand(CondOp);
+ }
+
+ ~VPReductionRecipe() override = default;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVReductionSC;
+ }
+
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPReductionSC;
+ }
+
+ /// Generate the reduction in the loop
+ void execute(VPTransformState &State) override;
+
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+
+ /// The VPValue of the scalar Chain being accumulated.
+ VPValue *getChainOp() const { return getOperand(0); }
+ /// The VPValue of the vector value to be reduced.
+ VPValue *getVecOp() const { return getOperand(1); }
+ /// The VPValue of the condition for the block.
+ VPValue *getCondOp() const {
+ return getNumOperands() > 2 ? getOperand(2) : nullptr;
+ }
+};
+
/// VPReplicateRecipe replicates a given instruction producing multiple scalar
/// copies of the original scalar type, one per lane, instead of producing a
/// single copy of widened type for all lanes. If the instruction is known to be
/// uniform only one copy, per lane zero, will be generated.
-class VPReplicateRecipe : public VPRecipeBase {
- /// The instruction being replicated.
- Instruction *Ingredient;
-
- /// Hold VPValues for the operands of the ingredient.
- VPUser User;
-
+class VPReplicateRecipe : public VPRecipeBase, public VPUser, public VPValue {
/// Indicator if only a single replica per lane is needed.
bool IsUniform;
@@ -1056,8 +1192,9 @@ public:
template <typename IterT>
VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
bool IsUniform, bool IsPredicated = false)
- : VPRecipeBase(VPReplicateSC), Ingredient(I), User(Operands),
- IsUniform(IsUniform), IsPredicated(IsPredicated) {
+ : VPRecipeBase(VPReplicateSC), VPUser(Operands),
+ VPValue(VPVReplicateSC, I, this), IsUniform(IsUniform),
+ IsPredicated(IsPredicated) {
// Retain the previous behavior of predicateInstructions(), where an
// insert-element of a predicated instruction got hoisted into the
// predicated basic block iff it was its only user. This is achieved by
@@ -1069,8 +1206,12 @@ public:
~VPReplicateRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPReplicateSC;
+ }
+
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVReplicateSC;
}
/// Generate replicas of the desired Ingredient. Replicas will be generated
@@ -1083,21 +1224,21 @@ public:
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+
+ bool isUniform() const { return IsUniform; }
};
/// A recipe for generating conditional branches on the bits of a mask.
-class VPBranchOnMaskRecipe : public VPRecipeBase {
- VPUser User;
-
+class VPBranchOnMaskRecipe : public VPRecipeBase, public VPUser {
public:
VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) {
if (BlockInMask) // nullptr means all-one mask.
- User.addOperand(BlockInMask);
+ addOperand(BlockInMask);
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC;
}
/// Generate the extraction of the appropriate bit from the block mask and the
@@ -1109,7 +1250,7 @@ public:
VPSlotTracker &SlotTracker) const override {
O << " +\n" << Indent << "\"BRANCH-ON-MASK ";
if (VPValue *Mask = getMask())
- Mask->print(O, SlotTracker);
+ Mask->printAsOperand(O, SlotTracker);
else
O << " All-One";
O << "\\l\"";
@@ -1118,9 +1259,9 @@ public:
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
VPValue *getMask() const {
- assert(User.getNumOperands() <= 1 && "should have either 0 or 1 operands");
+ assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
// Mask is optional.
- return User.getNumOperands() == 1 ? User.getOperand(0) : nullptr;
+ return getNumOperands() == 1 ? getOperand(0) : nullptr;
}
};
@@ -1129,19 +1270,20 @@ public:
/// order to merge values that are set under such a branch and feed their uses.
/// The phi nodes can be scalar or vector depending on the users of the value.
/// This recipe works in concert with VPBranchOnMaskRecipe.
-class VPPredInstPHIRecipe : public VPRecipeBase {
- Instruction *PredInst;
+class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser {
public:
/// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
/// nodes after merging back from a Branch-on-Mask.
- VPPredInstPHIRecipe(Instruction *PredInst)
- : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {}
+ VPPredInstPHIRecipe(VPValue *PredV)
+ : VPRecipeBase(VPPredInstPHISC), VPUser(PredV) {
+ new VPValue(PredV->getUnderlyingValue(), this);
+ }
~VPPredInstPHIRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPPredInstPHISC;
}
/// Generates phi nodes for live-outs as needed to retain SSA form.
@@ -1158,56 +1300,59 @@ public:
/// - For store: Address, stored value, optional mask
/// TODO: We currently execute only per-part unless a specific instance is
/// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
- Instruction &Instr;
- VPUser User;
+class VPWidenMemoryInstructionRecipe : public VPRecipeBase,
+ public VPUser {
+ Instruction &Ingredient;
void setMask(VPValue *Mask) {
if (!Mask)
return;
- User.addOperand(Mask);
+ addOperand(Mask);
}
bool isMasked() const {
- return (isa<LoadInst>(Instr) && User.getNumOperands() == 2) ||
- (isa<StoreInst>(Instr) && User.getNumOperands() == 3);
+ return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
}
public:
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
- : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Load), User({Addr}) {
+ : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr}),
+ Ingredient(Load) {
+ new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this);
setMask(Mask);
}
VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
VPValue *StoredValue, VPValue *Mask)
- : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Store),
- User({Addr, StoredValue}) {
+ : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr, StoredValue}),
+ Ingredient(Store) {
setMask(Mask);
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC;
}
/// Return the address accessed by this recipe.
VPValue *getAddr() const {
- return User.getOperand(0); // Address is the 1st, mandatory operand.
+ return getOperand(0); // Address is the 1st, mandatory operand.
}
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
VPValue *getMask() const {
// Mask is optional and therefore the last operand.
- return isMasked() ? User.getOperand(User.getNumOperands() - 1) : nullptr;
+ return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
}
+ /// Returns true if this recipe is a store.
+ bool isStore() const { return isa<StoreInst>(Ingredient); }
+
/// Return the address accessed by this recipe.
VPValue *getStoredValue() const {
- assert(isa<StoreInst>(Instr) &&
- "Stored value only available for store instructions");
- return User.getOperand(1); // Stored value is the 2nd, mandatory operand.
+ assert(isStore() && "Stored value only available for store instructions");
+ return getOperand(1); // Stored value is the 2nd, mandatory operand.
}
/// Generate the wide load/store.
@@ -1220,21 +1365,16 @@ public:
/// A Recipe for widening the canonical induction variable of the vector loop.
class VPWidenCanonicalIVRecipe : public VPRecipeBase {
- /// A VPValue representing the canonical vector IV.
- VPValue Val;
-
public:
- VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) {}
- ~VPWidenCanonicalIVRecipe() override = default;
+ VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) {
+ new VPValue(nullptr, this);
+ }
- /// Return the VPValue representing the canonical vector induction variable of
- /// the vector loop.
- const VPValue *getVPValue() const { return &Val; }
- VPValue *getVPValue() { return &Val; }
+ ~VPWidenCanonicalIVRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPRecipeBase *V) {
- return V->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC;
}
/// Generate a canonical vector induction variable of the vector loop, with
@@ -1321,6 +1461,11 @@ public:
/// this VPBasicBlock, thereby "executing" the VPlan.
void execute(struct VPTransformState *State) override;
+ /// Return the position of the first non-phi node recipe in the block.
+ iterator getFirstNonPhi();
+
+ void dropAllReferences(VPValue *NewValue) override;
+
private:
/// Create an IR BasicBlock to hold the output instructions generated by this
/// VPBasicBlock, and return it. Update the CFGState accordingly.
@@ -1361,8 +1506,11 @@ public:
IsReplicator(IsReplicator) {}
~VPRegionBlock() override {
- if (Entry)
+ if (Entry) {
+ VPValue DummyValue;
+ Entry->dropAllReferences(&DummyValue);
deleteCFG(Entry);
+ }
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -1407,6 +1555,8 @@ public:
/// The method which generates the output IR instructions that correspond to
/// this VPRegionBlock, thereby "executing" the VPlan.
void execute(struct VPTransformState *State) override;
+
+ void dropAllReferences(VPValue *NewValue) override;
};
//===----------------------------------------------------------------------===//
@@ -1544,7 +1694,7 @@ class VPlan {
VPBlockBase *Entry;
/// Holds the VFs applicable to this VPlan.
- SmallSet<unsigned, 2> VFs;
+ SmallSetVector<ElementCount, 2> VFs;
/// Holds the name of the VPlan, for printing.
std::string Name;
@@ -1564,6 +1714,10 @@ class VPlan {
/// VPlan.
Value2VPValueTy Value2VPValue;
+ /// Contains all VPValues that been allocated by addVPValue directly and need
+ /// to be free when the plan's destructor is called.
+ SmallVector<VPValue *, 16> VPValuesToFree;
+
/// Holds the VPLoopInfo analysis for this VPlan.
VPLoopInfo VPLInfo;
@@ -1577,10 +1731,15 @@ public:
}
~VPlan() {
- if (Entry)
+ if (Entry) {
+ VPValue DummyValue;
+ for (VPBlockBase *Block : depth_first(Entry))
+ Block->dropAllReferences(&DummyValue);
+
VPBlockBase::deleteCFG(Entry);
- for (auto &MapEntry : Value2VPValue)
- delete MapEntry.second;
+ }
+ for (VPValue *VPV : VPValuesToFree)
+ delete VPV;
if (BackedgeTakenCount)
delete BackedgeTakenCount;
for (VPValue *Def : VPExternalDefs)
@@ -1608,9 +1767,9 @@ public:
return BackedgeTakenCount;
}
- void addVF(unsigned VF) { VFs.insert(VF); }
+ void addVF(ElementCount VF) { VFs.insert(VF); }
- bool hasVF(unsigned VF) { return VFs.count(VF); }
+ bool hasVF(ElementCount VF) { return VFs.count(VF); }
const std::string &getName() const { return Name; }
@@ -1630,7 +1789,15 @@ public:
void addVPValue(Value *V) {
assert(V && "Trying to add a null Value to VPlan");
assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
- Value2VPValue[V] = new VPValue(V);
+ VPValue *VPV = new VPValue(V);
+ Value2VPValue[V] = VPV;
+ VPValuesToFree.push_back(VPV);
+ }
+
+ void addVPValue(Value *V, VPValue *VPV) {
+ assert(V && "Trying to add a null Value to VPlan");
+ assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
+ Value2VPValue[V] = VPV;
}
VPValue *getVPValue(Value *V) {
@@ -1646,6 +1813,8 @@ public:
return getVPValue(V);
}
+ void removeVPValueFor(Value *V) { Value2VPValue.erase(V); }
+
/// Return the VPLoopInfo analysis for this VPlan.
VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
@@ -1723,13 +1892,13 @@ private:
void dump();
- static void printAsIngredient(raw_ostream &O, Value *V);
+ static void printAsIngredient(raw_ostream &O, const Value *V);
};
struct VPlanIngredient {
- Value *V;
+ const Value *V;
- VPlanIngredient(Value *V) : V(V) {}
+ VPlanIngredient(const Value *V) : V(V) {}
};
inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
@@ -1879,9 +2048,7 @@ public:
/// \returns nullptr if doesn't have such group.
InterleaveGroup<VPInstruction> *
getInterleaveGroup(VPInstruction *Instr) const {
- if (InterleaveGroupMap.count(Instr))
- return InterleaveGroupMap.find(Instr)->second;
- return nullptr;
+ return InterleaveGroupMap.lookup(Instr);
}
};
@@ -1965,10 +2132,7 @@ class VPlanSlp {
public:
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
- ~VPlanSlp() {
- for (auto &KV : BundleToCombined)
- delete KV.second;
- }
+ ~VPlanSlp() = default;
/// Tries to build an SLP tree rooted at \p Operands and returns a
/// VPInstruction combining \p Operands, if they can be combined.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 7a80f3ff80a5..ac3b3505dc34 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -191,7 +191,7 @@ void VPlanPredicator::predicateRegionRec(VPRegionBlock *Region) {
// Generate edge predicates and append them to the block predicate. RPO is
// necessary since the predecessor blocks' block predicate needs to be set
// before the current block's block predicate can be computed.
- for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end())) {
+ for (VPBlockBase *Block : RPOT) {
// TODO: Handle nested regions once we start generating the same.
assert(!isa<VPRegionBlock>(Block) && "Nested region not expected");
createOrPropagatePredicates(Block, Region);
@@ -208,7 +208,7 @@ void VPlanPredicator::linearizeRegionRec(VPRegionBlock *Region) {
ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
VPBlockBase *PrevBlock = nullptr;
- for (VPBlockBase *CurrBlock : make_range(RPOT.begin(), RPOT.end())) {
+ for (VPBlockBase *CurrBlock : RPOT) {
// TODO: Handle nested regions once we start generating the same.
assert(!isa<VPRegionBlock>(CurrBlock) && "Nested region not expected");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
index 9019ed15ec5f..6f21bf44291a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -124,7 +124,7 @@ bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
for (auto &I : *Parent) {
auto *VPI = cast<VPInstruction>(&I);
if (VPI->getOpcode() == Instruction::Load &&
- std::find(Operands.begin(), Operands.end(), VPI) != Operands.end())
+ llvm::is_contained(Operands, VPI))
LoadsSeen++;
if (LoadsSeen == Operands.size())
@@ -161,7 +161,8 @@ static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
unsigned OperandIndex) {
SmallVector<VPValue *, 4> Operands;
for (VPValue *V : Values) {
- auto *U = cast<VPUser>(V);
+ // Currently we only support VPInstructions.
+ auto *U = cast<VPInstruction>(V);
Operands.push_back(U->getOperand(OperandIndex));
}
return Operands;
@@ -222,18 +223,20 @@ static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
/// Traverses and compares operands of V1 and V2 to MaxLevel.
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
VPInterleavedAccessInfo &IAI) {
- if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
+ auto *I1 = dyn_cast<VPInstruction>(V1);
+ auto *I2 = dyn_cast<VPInstruction>(V2);
+ // Currently we only support VPInstructions.
+ if (!I1 || !I2)
return 0;
if (MaxLevel == 0)
- return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),
- cast<VPInstruction>(V2), IAI);
+ return (unsigned)areConsecutiveOrMatch(I1, I2, IAI);
unsigned Score = 0;
- for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I)
- for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
- Score += getLAScore(cast<VPUser>(V1)->getOperand(I),
- cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
+ for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I)
+ for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
+ Score +=
+ getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI);
return Score;
}
@@ -463,8 +466,8 @@ VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
auto *VPI = new VPInstruction(Opcode, CombinedOperands);
VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
- LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());
- cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " "
+ << *cast<VPInstruction>(Values[0]) << "\n");
addCombined(Values, VPI);
return VPI;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3a4872a72122..1a54603faf22 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -48,6 +48,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
VPInstruction *VPInst = cast<VPInstruction>(Ingredient);
Instruction *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
if (DeadInstructions.count(Inst)) {
+ VPValue DummyValue;
+ VPInst->replaceAllUsesWith(&DummyValue);
Ingredient->eraseFromParent();
continue;
}
@@ -66,7 +68,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
InductionDescriptor II = Inductions.lookup(Phi);
if (II.getKind() == InductionDescriptor::IK_IntInduction ||
II.getKind() == InductionDescriptor::IK_FpInduction) {
- NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi);
+ VPValue *Start = Plan->getOrAddVPValue(II.getStartValue());
+ NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start);
} else
NewRecipe = new VPWidenPHIRecipe(Phi);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
@@ -77,6 +80,11 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands()));
NewRecipe->insertBefore(Ingredient);
+ if (NewRecipe->getNumDefinedValues() == 1)
+ VPInst->replaceAllUsesWith(NewRecipe->getVPValue());
+ else
+ assert(NewRecipe->getNumDefinedValues() == 0 &&
+ "Only recpies with zero or one defined values expected");
Ingredient->eraseFromParent();
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
index f73505d0279a..ed572ca36627 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -10,9 +10,9 @@
/// This file contains the declarations of the entities induced by Vectorization
/// Plans, e.g. the instructions the VPlan intends to generate if executed.
/// VPlan models the following entities:
-/// VPValue
-/// |-- VPUser
-/// | |-- VPInstruction
+/// VPValue VPUser VPDef
+/// | |
+/// VPInstruction
/// These are documented in docs/VectorizationPlan.rst.
///
//===----------------------------------------------------------------------===//
@@ -21,7 +21,9 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VALUE_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/iterator_range.h"
namespace llvm {
@@ -29,8 +31,11 @@ namespace llvm {
// Forward declarations.
class raw_ostream;
class Value;
+class VPDef;
class VPSlotTracker;
class VPUser;
+class VPRecipeBase;
+class VPWidenMemoryInstructionRecipe;
// This is the base class of the VPlan Def/Use graph, used for modeling the data
// flow into, within and out of the VPlan. VPValues can stand for live-ins
@@ -38,10 +43,14 @@ class VPUser;
// and live-outs which the VPlan will need to fix accordingly.
class VPValue {
friend class VPBuilder;
+ friend class VPDef;
+ friend class VPInstruction;
friend struct VPlanTransforms;
friend class VPBasicBlock;
friend class VPInterleavedAccessInfo;
friend class VPSlotTracker;
+ friend class VPRecipeBase;
+ friend class VPWidenMemoryInstructionRecipe;
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@@ -51,8 +60,11 @@ protected:
// Hold the underlying Value, if any, attached to this VPValue.
Value *UnderlyingVal;
- VPValue(const unsigned char SC, Value *UV = nullptr)
- : SubclassID(SC), UnderlyingVal(UV) {}
+ /// Pointer to the VPDef that defines this VPValue. If it is nullptr, the
+ /// VPValue is not defined by any recipe modeled in VPlan.
+ VPDef *Def;
+
+ VPValue(const unsigned char SC, Value *UV = nullptr, VPDef *Def = nullptr);
// DESIGN PRINCIPLE: Access to the underlying IR must be strictly limited to
// the front-end and back-end of VPlan so that the middle-end is as
@@ -61,10 +73,6 @@ protected:
// for multiple underlying IRs (Polly?) by providing a new VPlan front-end,
// back-end and analysis information for the new IR.
- /// Return the underlying Value attached to this VPValue.
- Value *getUnderlyingValue() { return UnderlyingVal; }
- const Value *getUnderlyingValue() const { return UnderlyingVal; }
-
// Set \p Val as the underlying Value of this VPValue.
void setUnderlyingValue(Value *Val) {
assert(!UnderlyingVal && "Underlying Value is already set.");
@@ -72,16 +80,33 @@ protected:
}
public:
+ /// Return the underlying Value attached to this VPValue.
+ Value *getUnderlyingValue() { return UnderlyingVal; }
+ const Value *getUnderlyingValue() const { return UnderlyingVal; }
+
/// An enumeration for keeping track of the concrete subclass of VPValue that
/// are actually instantiated. Values of this enumeration are kept in the
/// SubclassID field of the VPValue objects. They are used for concrete
/// type identification.
- enum { VPValueSC, VPUserSC, VPInstructionSC };
-
- VPValue(Value *UV = nullptr) : VPValue(VPValueSC, UV) {}
+ enum {
+ VPValueSC,
+ VPVInstructionSC,
+ VPVMemoryInstructionSC,
+ VPVReductionSC,
+ VPVReplicateSC,
+ VPVWidenSC,
+ VPVWidenCallSC,
+ VPVWidenGEPSC,
+ VPVWidenSelectSC,
+ };
+
+ VPValue(Value *UV = nullptr, VPDef *Def = nullptr)
+ : VPValue(VPValueSC, UV, Def) {}
VPValue(const VPValue &) = delete;
VPValue &operator=(const VPValue &) = delete;
+ virtual ~VPValue();
+
/// \return an ID for the concrete type of this object.
/// This is used to implement the classof checks. This should not be used
/// for any other purpose, as the values may change as LLVM evolves.
@@ -90,9 +115,28 @@ public:
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const;
void print(raw_ostream &OS, VPSlotTracker &Tracker) const;
+ /// Dump the value to stderr (for debugging).
+ void dump() const;
+
unsigned getNumUsers() const { return Users.size(); }
void addUser(VPUser &User) { Users.push_back(&User); }
+ /// Remove a single \p User from the list of users.
+ void removeUser(VPUser &User) {
+ bool Found = false;
+ // The same user can be added multiple times, e.g. because the same VPValue
+ // is used twice by the same VPUser. Remove a single one.
+ erase_if(Users, [&User, &Found](VPUser *Other) {
+ if (Found)
+ return false;
+ if (Other == &User) {
+ Found = true;
+ return true;
+ }
+ return false;
+ });
+ }
+
typedef SmallVectorImpl<VPUser *>::iterator user_iterator;
typedef SmallVectorImpl<VPUser *>::const_iterator const_user_iterator;
typedef iterator_range<user_iterator> user_range;
@@ -120,6 +164,17 @@ public:
}
void replaceAllUsesWith(VPValue *New);
+
+ VPDef *getDef() { return Def; }
+
+ /// Returns the underlying IR value, if this VPValue is defined outside the
+ /// scope of VPlan. Returns nullptr if the VPValue is defined by a VPDef
+ /// inside a VPlan.
+ Value *getLiveInIRValue() {
+ assert(!getDef() &&
+ "VPValue is not a live-in; it is defined by a VPDef inside a VPlan");
+ return getUnderlyingValue();
+ }
};
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
@@ -129,34 +184,32 @@ raw_ostream &operator<<(raw_ostream &OS, const VPValue &V);
/// This class augments VPValue with operands which provide the inverse def-use
/// edges from VPValue's users to their defs.
-class VPUser : public VPValue {
+class VPUser {
SmallVector<VPValue *, 2> Operands;
protected:
- VPUser(const unsigned char SC) : VPValue(SC) {}
- VPUser(const unsigned char SC, ArrayRef<VPValue *> Operands) : VPValue(SC) {
+ /// Print the operands to \p O.
+ void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
+
+public:
+ VPUser() {}
+ VPUser(ArrayRef<VPValue *> Operands) {
for (VPValue *Operand : Operands)
addOperand(Operand);
}
-public:
- VPUser() : VPValue(VPValue::VPUserSC) {}
- VPUser(ArrayRef<VPValue *> Operands) : VPUser(VPValue::VPUserSC, Operands) {}
VPUser(std::initializer_list<VPValue *> Operands)
: VPUser(ArrayRef<VPValue *>(Operands)) {}
- template <typename IterT>
- VPUser(iterator_range<IterT> Operands) : VPValue(VPValue::VPUserSC) {
+ template <typename IterT> VPUser(iterator_range<IterT> Operands) {
for (VPValue *Operand : Operands)
addOperand(Operand);
}
VPUser(const VPUser &) = delete;
VPUser &operator=(const VPUser &) = delete;
-
- /// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPValue *V) {
- return V->getVPValueID() >= VPUserSC &&
- V->getVPValueID() <= VPInstructionSC;
+ virtual ~VPUser() {
+ for (VPValue *Op : operands())
+ Op->removeUser(*this);
}
void addOperand(VPValue *Operand) {
@@ -170,7 +223,11 @@ public:
return Operands[N];
}
- void setOperand(unsigned I, VPValue *New) { Operands[I] = New; }
+ void setOperand(unsigned I, VPValue *New) {
+ Operands[I]->removeUser(*this);
+ Operands[I] = New;
+ New->addUser(*this);
+ }
typedef SmallVectorImpl<VPValue *>::iterator operand_iterator;
typedef SmallVectorImpl<VPValue *>::const_iterator const_operand_iterator;
@@ -185,7 +242,110 @@ public:
const_operand_range operands() const {
return const_operand_range(op_begin(), op_end());
}
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPDef *Recipe);
};
+
+/// This class augments a recipe with a set of VPValues defined by the recipe.
+/// It allows recipes to define zero, one or multiple VPValues. A VPDef owns
+/// the VPValues it defines and is responsible for deleting its defined values.
+/// Single-value VPDefs that also inherit from VPValue must make sure to inherit
+/// from VPDef before VPValue.
+class VPDef {
+ friend class VPValue;
+
+ /// Subclass identifier (for isa/dyn_cast).
+ const unsigned char SubclassID;
+
+ /// The VPValues defined by this VPDef.
+ TinyPtrVector<VPValue *> DefinedValues;
+
+ /// Add \p V as a defined value by this VPDef.
+ void addDefinedValue(VPValue *V) {
+ assert(V->getDef() == this &&
+ "can only add VPValue already linked with this VPDef");
+ DefinedValues.push_back(V);
+ }
+
+ /// Remove \p V from the values defined by this VPDef. \p V must be a defined
+ /// value of this VPDef.
+ void removeDefinedValue(VPValue *V) {
+ assert(V->getDef() == this &&
+ "can only remove VPValue linked with this VPDef");
+ assert(is_contained(DefinedValues, V) &&
+ "VPValue to remove must be in DefinedValues");
+ erase_value(DefinedValues, V);
+ V->Def = nullptr;
+ }
+
+public:
+ /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
+ /// that is actually instantiated. Values of this enumeration are kept in the
+ /// SubclassID field of the VPRecipeBase objects. They are used for concrete
+ /// type identification.
+ using VPRecipeTy = enum {
+ VPBlendSC,
+ VPBranchOnMaskSC,
+ VPInstructionSC,
+ VPInterleaveSC,
+ VPPredInstPHISC,
+ VPReductionSC,
+ VPReplicateSC,
+ VPWidenCallSC,
+ VPWidenCanonicalIVSC,
+ VPWidenGEPSC,
+ VPWidenIntOrFpInductionSC,
+ VPWidenMemoryInstructionSC,
+ VPWidenPHISC,
+ VPWidenSC,
+ VPWidenSelectSC
+ };
+
+ VPDef(const unsigned char SC) : SubclassID(SC) {}
+
+ virtual ~VPDef() {
+ for (VPValue *D : make_early_inc_range(DefinedValues)) {
+ assert(D->Def == this &&
+ "all defined VPValues should point to the containing VPDef");
+ assert(D->getNumUsers() == 0 &&
+ "all defined VPValues should have no more users");
+ D->Def = nullptr;
+ delete D;
+ }
+ }
+
+ /// Returns the VPValue with index \p I defined by the VPDef.
+ VPValue *getVPValue(unsigned I = 0) {
+ assert(DefinedValues[I] && "defined value must be non-null");
+ return DefinedValues[I];
+ }
+ const VPValue *getVPValue(unsigned I = 0) const {
+ assert(DefinedValues[I] && "defined value must be non-null");
+ return DefinedValues[I];
+ }
+
+ /// Returns an ArrayRef of the values defined by the VPDef.
+ ArrayRef<VPValue *> definedValues() { return DefinedValues; }
+ /// Returns an ArrayRef of the values defined by the VPDef.
+ ArrayRef<VPValue *> definedValues() const { return DefinedValues; }
+
+ /// Returns the number of values defined by the VPDef.
+ unsigned getNumDefinedValues() const { return DefinedValues.size(); }
+
+ /// \return an ID for the concrete type of this object.
+ /// This is used to implement the classof checks. This should not be used
+ /// for any other purpose, as the values may change as LLVM evolves.
+ unsigned getVPDefID() const { return SubclassID; }
+
+ /// Dump the VPDef to stderr (for debugging).
+ void dump() const;
+
+ /// Each concrete VPDef prints itself.
+ virtual void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const = 0;
+};
+
class VPlan;
class VPBasicBlock;
class VPRegionBlock;
@@ -205,7 +365,7 @@ class VPSlotTracker {
void assignSlots(const VPlan &Plan);
public:
- VPSlotTracker(const VPlan *Plan) {
+ VPSlotTracker(const VPlan *Plan = nullptr) {
if (Plan)
assignSlots(*Plan);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index b384c94121e9..6eec8d14de4a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -65,9 +65,7 @@ static void verifyBlocksInRegion(const VPRegionBlock *Region) {
for (const VPBlockBase *Succ : Successors) {
// There must be a bi-directional link between block and successor.
const auto &SuccPreds = Succ->getPredecessors();
- assert(std::find(SuccPreds.begin(), SuccPreds.end(), VPB) !=
- SuccPreds.end() &&
- "Missing predecessor link.");
+ assert(llvm::is_contained(SuccPreds, VPB) && "Missing predecessor link.");
(void)SuccPreds;
}
@@ -86,9 +84,7 @@ static void verifyBlocksInRegion(const VPRegionBlock *Region) {
// There must be a bi-directional link between block and predecessor.
const auto &PredSuccs = Pred->getSuccessors();
- assert(std::find(PredSuccs.begin(), PredSuccs.end(), VPB) !=
- PredSuccs.end() &&
- "Missing successor link.");
+ assert(llvm::is_contained(PredSuccs, VPB) && "Missing successor link.");
(void)PredSuccs;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 64b41bf9cefa..787f146bdddc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -33,6 +34,7 @@ using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "vector-combine"
+STATISTIC(NumVecLoad, "Number of vector loads formed");
STATISTIC(NumVecCmp, "Number of vector compares formed");
STATISTIC(NumVecBO, "Number of vector binops formed");
STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
@@ -65,6 +67,7 @@ private:
const TargetTransformInfo &TTI;
const DominatorTree &DT;
+ bool vectorizeLoadInsert(Instruction &I);
ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
unsigned PreferredExtractIndex) const;
@@ -88,6 +91,138 @@ static void replaceValue(Value &Old, Value &New) {
New.takeName(&Old);
}
+bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
+ // Match insert into fixed vector of scalar value.
+ // TODO: Handle non-zero insert index.
+ auto *Ty = dyn_cast<FixedVectorType>(I.getType());
+ Value *Scalar;
+ if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
+ !Scalar->hasOneUse())
+ return false;
+
+ // Optionally match an extract from another vector.
+ Value *X;
+ bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
+ if (!HasExtract)
+ X = Scalar;
+
+ // Match source value as load of scalar or vector.
+ // Do not vectorize scalar load (widening) if atomic/volatile or under
+ // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
+ // or create data races non-existent in the source.
+ auto *Load = dyn_cast<LoadInst>(X);
+ if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
+ Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
+ mustSuppressSpeculation(*Load))
+ return false;
+
+ const DataLayout &DL = I.getModule()->getDataLayout();
+ Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
+ assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
+
+ // If original AS != Load's AS, we can't bitcast the original pointer and have
+ // to use Load's operand instead. Ideally we would want to strip pointer casts
+ // without changing AS, but there's no API to do that ATM.
+ unsigned AS = Load->getPointerAddressSpace();
+ if (AS != SrcPtr->getType()->getPointerAddressSpace())
+ SrcPtr = Load->getPointerOperand();
+
+ // We are potentially transforming byte-sized (8-bit) memory accesses, so make
+ // sure we have all of our type-based constraints in place for this target.
+ Type *ScalarTy = Scalar->getType();
+ uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
+ unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
+ if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
+ ScalarSize % 8 != 0)
+ return false;
+
+ // Check safety of replacing the scalar load with a larger vector load.
+ // We use minimal alignment (maximum flexibility) because we only care about
+ // the dereferenceable region. When calculating cost and creating a new op,
+ // we may use a larger value based on alignment attributes.
+ unsigned MinVecNumElts = MinVectorSize / ScalarSize;
+ auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
+ unsigned OffsetEltIndex = 0;
+ Align Alignment = Load->getAlign();
+ if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) {
+ // It is not safe to load directly from the pointer, but we can still peek
+ // through gep offsets and check if it safe to load from a base address with
+ // updated alignment. If it is, we can shuffle the element(s) into place
+ // after loading.
+ unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
+ APInt Offset(OffsetBitWidth, 0);
+ SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ // We want to shuffle the result down from a high element of a vector, so
+ // the offset must be positive.
+ if (Offset.isNegative())
+ return false;
+
+ // The offset must be a multiple of the scalar element to shuffle cleanly
+ // in the element's size.
+ uint64_t ScalarSizeInBytes = ScalarSize / 8;
+ if (Offset.urem(ScalarSizeInBytes) != 0)
+ return false;
+
+ // If we load MinVecNumElts, will our target element still be loaded?
+ OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
+ if (OffsetEltIndex >= MinVecNumElts)
+ return false;
+
+ if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
+ return false;
+
+ // Update alignment with offset value. Note that the offset could be negated
+ // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
+ // negation does not change the result of the alignment calculation.
+ Alignment = commonAlignment(Alignment, Offset.getZExtValue());
+ }
+
+ // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
+ // Use the greater of the alignment on the load or its source pointer.
+ Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
+ Type *LoadTy = Load->getType();
+ InstructionCost OldCost =
+ TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
+ APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
+ OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
+ /* Insert */ true, HasExtract);
+
+ // New pattern: load VecPtr
+ InstructionCost NewCost =
+ TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
+ // Optionally, we are shuffling the loaded vector element(s) into place.
+ if (OffsetEltIndex)
+ NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy);
+
+ // We can aggressively convert to the vector form because the backend can
+ // invert this transform if it does not result in a performance win.
+ if (OldCost < NewCost || !NewCost.isValid())
+ return false;
+
+ // It is safe and potentially profitable to load a vector directly:
+ // inselt undef, load Scalar, 0 --> load VecPtr
+ IRBuilder<> Builder(Load);
+ Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
+ Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
+
+ // Set everything but element 0 to undef to prevent poison from propagating
+ // from the extra loaded memory. This will also optionally shrink/grow the
+ // vector from the loaded size to the output size.
+ // We assume this operation has no cost in codegen if there was no offset.
+ // Note that we could use freeze to avoid poison problems, but then we might
+ // still need a shuffle to change the vector size.
+ unsigned OutputNumElts = Ty->getNumElements();
+ SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
+ assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
+ Mask[0] = OffsetEltIndex;
+ VecLd = Builder.CreateShuffleVector(VecLd, Mask);
+
+ replaceValue(I, *VecLd);
+ ++NumVecLoad;
+ return true;
+}
+
/// Determine which, if any, of the inputs should be replaced by a shuffle
/// followed by extract from a different index.
ExtractElementInst *VectorCombine::getShuffleExtract(
@@ -106,8 +241,14 @@ ExtractElementInst *VectorCombine::getShuffleExtract(
Type *VecTy = Ext0->getVectorOperand()->getType();
assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
- int Cost0 = TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
- int Cost1 = TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
+ InstructionCost Cost0 =
+ TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
+ InstructionCost Cost1 =
+ TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
+
+ // If both costs are invalid no shuffle is needed
+ if (!Cost0.isValid() && !Cost1.isValid())
+ return nullptr;
// We are extracting from 2 different indexes, so one operand must be shuffled
// before performing a vector operation and/or extract. The more expensive
@@ -143,7 +284,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
"Expected constant extract indexes");
Type *ScalarTy = Ext0->getType();
auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
- int ScalarOpCost, VectorOpCost;
+ InstructionCost ScalarOpCost, VectorOpCost;
// Get cost estimates for scalar and vector versions of the operation.
bool IsBinOp = Instruction::isBinaryOp(Opcode);
@@ -164,9 +305,9 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
- int Extract0Cost =
+ InstructionCost Extract0Cost =
TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
- int Extract1Cost =
+ InstructionCost Extract1Cost =
TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
// A more expensive extract will always be replaced by a splat shuffle.
@@ -176,11 +317,11 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
// TODO: Evaluate whether that always results in lowest cost. Alternatively,
// check the cost of creating a broadcast shuffle and shuffling both
// operands to element 0.
- int CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
+ InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
// Extra uses of the extracts mean that we include those costs in the
// vector total because those instructions will not be eliminated.
- int OldCost, NewCost;
+ InstructionCost OldCost, NewCost;
if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
// Handle a special case. If the 2 extracts are identical, adjust the
// formulas to account for that. The extra use charge allows for either the
@@ -231,8 +372,7 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
auto *VecTy = cast<FixedVectorType>(Vec->getType());
SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
ShufMask[NewIndex] = OldIndex;
- Value *Undef = UndefValue::get(VecTy);
- return Builder.CreateShuffleVector(Vec, Undef, ShufMask, "shift");
+ return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
}
/// Given an extract element instruction with constant index operand, shuffle
@@ -366,17 +506,23 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
return false;
- // Disallow non-vector casts and length-changing shuffles.
+ // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
+ // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
+ // mask for scalable type is a splat or not.
+ // 2) Disallow non-vector casts and length-changing shuffles.
// TODO: We could allow any shuffle.
- auto *DestTy = dyn_cast<VectorType>(I.getType());
- auto *SrcTy = cast<VectorType>(V->getType());
- if (!DestTy || I.getOperand(0)->getType() != SrcTy)
+ auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
+ auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
+ if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
return false;
// The new shuffle must not cost more than the old shuffle. The bitcast is
// moved ahead of the shuffle, so assume that it has the same cost as before.
- if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) >
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))
+ InstructionCost DestCost =
+ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy);
+ InstructionCost SrcCost =
+ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy);
+ if (DestCost > SrcCost || !DestCost.isValid())
return false;
unsigned DestNumElts = DestTy->getNumElements();
@@ -399,8 +545,7 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
// bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
++NumShufOfBitcast;
Value *CastV = Builder.CreateBitCast(V, DestTy);
- Value *Shuf =
- Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask);
+ Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
replaceValue(I, *Shuf);
return true;
}
@@ -467,7 +612,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
"Unexpected types for insert element into binop or cmp");
unsigned Opcode = I.getOpcode();
- int ScalarOpCost, VectorOpCost;
+ InstructionCost ScalarOpCost, VectorOpCost;
if (IsCmp) {
ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
@@ -478,16 +623,16 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
- int InsertCost =
+ InstructionCost InsertCost =
TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
- int OldCost = (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) +
- VectorOpCost;
- int NewCost = ScalarOpCost + InsertCost +
- (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
- (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
+ InstructionCost OldCost =
+ (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
+ InstructionCost NewCost = ScalarOpCost + InsertCost +
+ (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
+ (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
// We want to scalarize unless the vector variant actually has lower cost.
- if (OldCost < NewCost)
+ if (OldCost < NewCost || !NewCost.isValid())
return false;
// vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
@@ -567,7 +712,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
if (!VecTy)
return false;
- int OldCost = TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
+ InstructionCost OldCost =
+ TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2;
OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
@@ -578,7 +724,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
- int NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
+ InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
NewCost +=
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy);
NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
@@ -587,7 +733,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
// Aggressively form vector ops if the cost is equal because the transform
// may enable further optimization.
// Codegen can reverse this transform (scalarize) if it was not profitable.
- if (OldCost < NewCost)
+ if (OldCost < NewCost || !NewCost.isValid())
return false;
// Create a vector constant from the 2 scalar constants.
@@ -612,6 +758,10 @@ bool VectorCombine::run() {
if (DisableVectorCombine)
return false;
+ // Don't attempt vectorization if the target does not support vectors.
+ if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
+ return false;
+
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
@@ -625,6 +775,7 @@ bool VectorCombine::run() {
if (isa<DbgInfoIntrinsic>(I))
continue;
Builder.SetInsertPoint(&I);
+ MadeChange |= vectorizeLoadInsert(I);
MadeChange |= foldExtractExtract(I);
MadeChange |= foldBitcastShuf(I);
MadeChange |= scalarizeBinopOrCmp(I);
diff --git a/contrib/llvm-project/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp b/contrib/llvm-project/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
index 031a963cd3b0..6af7bc699d05 100644
--- a/contrib/llvm-project/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
+++ b/contrib/llvm-project/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
@@ -16,7 +16,7 @@
#include <map>
-#if LLVM_LIBXML2_ENABLED
+#if LLVM_ENABLE_LIBXML2
#include <libxml/xmlreader.h>
#endif
@@ -41,7 +41,7 @@ public:
private:
static void errorCallback(void *Ctx, const char *Format, ...);
Error getParseError();
-#if LLVM_LIBXML2_ENABLED
+#if LLVM_ENABLE_LIBXML2
xmlDocPtr CombinedDoc = nullptr;
std::vector<xmlDocPtr> MergedDocs;
@@ -56,7 +56,7 @@ private:
bool ParseErrorOccurred = false;
};
-#if LLVM_LIBXML2_ENABLED
+#if LLVM_ENABLE_LIBXML2
static constexpr std::pair<StringLiteral, StringLiteral> MtNsHrefsPrefixes[] = {
{"urn:schemas-microsoft-com:asm.v1", "ms_asmv1"},
diff --git a/contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp b/contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp
index de0a9e60a511..e6534e5a7be7 100644
--- a/contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp
+++ b/contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp
@@ -95,42 +95,46 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
if (ObjFile.getBinary()->isELF()) {
uint32_t RelativeRelocation = [](object::ObjectFile *ObjFile) {
if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(ObjFile))
- return ELFObj->getELFFile()->getRelativeRelocationType();
+ return ELFObj->getELFFile().getRelativeRelocationType();
else if (const auto *ELFObj =
dyn_cast<object::ELF32BEObjectFile>(ObjFile))
- return ELFObj->getELFFile()->getRelativeRelocationType();
+ return ELFObj->getELFFile().getRelativeRelocationType();
else if (const auto *ELFObj =
dyn_cast<object::ELF64LEObjectFile>(ObjFile))
- return ELFObj->getELFFile()->getRelativeRelocationType();
+ return ELFObj->getELFFile().getRelativeRelocationType();
else if (const auto *ELFObj =
dyn_cast<object::ELF64BEObjectFile>(ObjFile))
- return ELFObj->getELFFile()->getRelativeRelocationType();
+ return ELFObj->getELFFile().getRelativeRelocationType();
else
return static_cast<uint32_t>(0);
}(ObjFile.getBinary());
- bool (*SupportsRelocation)(uint64_t);
+ object::SupportsRelocation Supports;
object::RelocationResolver Resolver;
- std::tie(SupportsRelocation, Resolver) =
+ std::tie(Supports, Resolver) =
object::getRelocationResolver(*ObjFile.getBinary());
for (const object::SectionRef &Section : Sections) {
for (const object::RelocationRef &Reloc : Section.relocations()) {
if (ObjFile.getBinary()->getArch() == Triple::arm) {
- if (SupportsRelocation && SupportsRelocation(Reloc.getType())) {
+ if (Supports && Supports(Reloc.getType())) {
Expected<uint64_t> ValueOrErr = Reloc.getSymbol()->getValue();
if (!ValueOrErr)
return ValueOrErr.takeError();
- Relocs.insert({Reloc.getOffset(), Resolver(Reloc, *ValueOrErr, 0)});
+ Relocs.insert(
+ {Reloc.getOffset(),
+ object::resolveRelocation(Resolver, Reloc, *ValueOrErr, 0)});
}
- } else if (SupportsRelocation && SupportsRelocation(Reloc.getType())) {
+ } else if (Supports && Supports(Reloc.getType())) {
auto AddendOrErr = object::ELFRelocationRef(Reloc).getAddend();
auto A = AddendOrErr ? *AddendOrErr : 0;
Expected<uint64_t> ValueOrErr = Reloc.getSymbol()->getValue();
if (!ValueOrErr)
// TODO: Test this error.
return ValueOrErr.takeError();
- Relocs.insert({Reloc.getOffset(), Resolver(Reloc, *ValueOrErr, A)});
+ Relocs.insert(
+ {Reloc.getOffset(),
+ object::resolveRelocation(Resolver, Reloc, *ValueOrErr, A)});
} else if (Reloc.getType() == RelativeRelocation) {
if (auto AddendOrErr = object::ELFRelocationRef(Reloc).getAddend())
Relocs.insert({Reloc.getOffset(), *AddendOrErr});
diff --git a/contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp b/contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp
index 1a39ff654f05..d06bca9a1258 100644
--- a/contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -1229,7 +1229,7 @@ static Error DebugACrash(BugDriver &BD, BugTester TestFn) {
unsigned NewSize = 0;
for (std::string &Name : FunctionNames) {
Function *Fn = BD.getProgram().getFunction(Name);
- assert(Fn && "Could not find funcion?");
+ assert(Fn && "Could not find function?");
std::vector<Attribute> Attrs;
for (Attribute A : Fn->getAttributes().getFnAttributes())
diff --git a/contrib/llvm-project/llvm/tools/bugpoint/ExecutionDriver.cpp b/contrib/llvm-project/llvm/tools/bugpoint/ExecutionDriver.cpp
index 4c83a9598976..f06f378962d9 100644
--- a/contrib/llvm-project/llvm/tools/bugpoint/ExecutionDriver.cpp
+++ b/contrib/llvm-project/llvm/tools/bugpoint/ExecutionDriver.cpp
@@ -33,7 +33,6 @@ enum OutputType {
RunJIT,
RunLLC,
RunLLCIA,
- LLC_Safe,
CompileCustom,
Custom
};
@@ -53,7 +52,6 @@ cl::opt<OutputType> InterpreterSel(
clEnumValN(RunLLC, "run-llc", "Compile with LLC"),
clEnumValN(RunLLCIA, "run-llc-ia",
"Compile with LLC with integrated assembler"),
- clEnumValN(LLC_Safe, "llc-safe", "Use LLC for all"),
clEnumValN(CompileCustom, "compile-custom",
"Use -compile-command to define a command to "
"compile the bitcode. Useful to avoid linking."),
@@ -182,7 +180,6 @@ Error BugDriver::initializeExecutionEnvironment() {
break;
case RunLLC:
case RunLLCIA:
- case LLC_Safe:
Interpreter = AbstractInterpreter::createLLC(
getToolName(), Message, CCBinary, &ToolArgv, &CCToolArgv,
InterpreterSel == RunLLCIA);
@@ -212,21 +209,12 @@ Error BugDriver::initializeExecutionEnvironment() {
switch (SafeInterpreterSel) {
case AutoPick:
// In "llc-safe" mode, default to using LLC as the "safe" backend.
- if (!SafeInterpreter && InterpreterSel == LLC_Safe) {
+ if (InterpreterSel == RunLLC) {
SafeInterpreterSel = RunLLC;
SafeToolArgs.push_back("--relocation-model=pic");
SafeInterpreter = AbstractInterpreter::createLLC(
Path.c_str(), Message, CCBinary, &SafeToolArgs, &CCToolArgv);
- }
-
- if (!SafeInterpreter && InterpreterSel != RunLLC &&
- InterpreterSel != RunJIT) {
- SafeInterpreterSel = RunLLC;
- SafeToolArgs.push_back("--relocation-model=pic");
- SafeInterpreter = AbstractInterpreter::createLLC(
- Path.c_str(), Message, CCBinary, &SafeToolArgs, &CCToolArgv);
- }
- if (!SafeInterpreter) {
+ } else if (InterpreterSel != CompileCustom) {
SafeInterpreterSel = AutoPick;
Message = "Sorry, I can't automatically select a safe interpreter!\n";
}
@@ -247,7 +235,7 @@ Error BugDriver::initializeExecutionEnvironment() {
"\"safe\" backend right now!\n";
break;
}
- if (!SafeInterpreter) {
+ if (!SafeInterpreter && InterpreterSel != CompileCustom) {
outs() << Message << "\nExiting.\n";
exit(1);
}
diff --git a/contrib/llvm-project/llvm/tools/bugpoint/ExtractFunction.cpp b/contrib/llvm-project/llvm/tools/bugpoint/ExtractFunction.cpp
index d9047acd30e1..7a75cb90edc5 100644
--- a/contrib/llvm-project/llvm/tools/bugpoint/ExtractFunction.cpp
+++ b/contrib/llvm-project/llvm/tools/bugpoint/ExtractFunction.cpp
@@ -386,7 +386,7 @@ BugDriver::extractMappedBlocksFromModule(const std::vector<BasicBlock *> &BBs,
for (Function &F : *M)
for (BasicBlock &BB : F)
// Check if this block is going to be extracted.
- if (std::find(BBs.begin(), BBs.end(), &BB) == BBs.end())
+ if (!llvm::is_contained(BBs, &BB))
BlocksToExtract.push_back(&BB);
raw_fd_ostream OS(Temp->FD, /*shouldClose*/ false);
diff --git a/contrib/llvm-project/llvm/tools/bugpoint/OptimizerDriver.cpp b/contrib/llvm-project/llvm/tools/bugpoint/OptimizerDriver.cpp
index 25a970bd6878..ca78735202fc 100644
--- a/contrib/llvm-project/llvm/tools/bugpoint/OptimizerDriver.cpp
+++ b/contrib/llvm-project/llvm/tools/bugpoint/OptimizerDriver.cpp
@@ -205,6 +205,9 @@ bool BugDriver::runPasses(Module &Program,
for (unsigned i = 0, e = OptArgs.size(); i != e; ++i)
Args.push_back(OptArgs[i]);
+ // Pin to legacy PM since bugpoint has lots of infra and hacks revolving
+ // around the legacy PM.
+ Args.push_back("-enable-new-pm=0");
Args.push_back("-disable-symbolication");
Args.push_back("-o");
Args.push_back(OutputFilename);
diff --git a/contrib/llvm-project/llvm/tools/bugpoint/ToolRunner.cpp b/contrib/llvm-project/llvm/tools/bugpoint/ToolRunner.cpp
index d880aca044d1..c4ea1dad122b 100644
--- a/contrib/llvm-project/llvm/tools/bugpoint/ToolRunner.cpp
+++ b/contrib/llvm-project/llvm/tools/bugpoint/ToolRunner.cpp
@@ -495,7 +495,7 @@ Expected<int> LLC::ExecuteProgram(const std::string &Bitcode,
return std::move(E);
std::vector<std::string> CCArgs(ArgsForCC);
- CCArgs.insert(CCArgs.end(), SharedLibs.begin(), SharedLibs.end());
+ llvm::append_range(CCArgs, SharedLibs);
// Assuming LLC worked, compile the result with CC and run it.
return cc->ExecuteProgram(OutputAsmFile, Args, *FileKind, InputFile,
diff --git a/contrib/llvm-project/llvm/tools/llc/llc.cpp b/contrib/llvm-project/llvm/tools/llc/llc.cpp
index 95f2963ecbd6..48f0adf7c726 100644
--- a/contrib/llvm-project/llvm/tools/llc/llc.cpp
+++ b/contrib/llvm-project/llvm/tools/llc/llc.cpp
@@ -37,6 +37,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Pass.h"
+#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
@@ -81,6 +82,15 @@ TimeCompilations("time-compilations", cl::Hidden, cl::init(1u),
cl::value_desc("N"),
cl::desc("Repeat compilation N times for timing"));
+static cl::opt<std::string>
+ BinutilsVersion("binutils-version", cl::Hidden,
+ cl::desc("Produced object files can use all ELF features "
+ "supported by this binutils version and newer."
+ "If -no-integrated-as is specified, the generated "
+ "assembly will consider GNU as support."
+ "'none' means that all ELF features can be used, "
+ "regardless of binutils support"));
+
static cl::opt<bool>
NoIntegratedAssembler("no-integrated-as", cl::Hidden,
cl::desc("Disable integrated assembler"));
@@ -142,11 +152,13 @@ static cl::opt<bool> RemarksWithHotness(
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
-static cl::opt<unsigned>
- RemarksHotnessThreshold("pass-remarks-hotness-threshold",
- cl::desc("Minimum profile count required for "
- "an optimization remark to be output"),
- cl::Hidden);
+static cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser>
+ RemarksHotnessThreshold(
+ "pass-remarks-hotness-threshold",
+ cl::desc("Minimum profile count required for "
+ "an optimization remark to be output. "
+ "Use 'auto' to apply the threshold from profile summary."),
+ cl::value_desc("N or 'auto'"), cl::init(0), cl::Hidden);
static cl::opt<std::string>
RemarksFilename("pass-remarks-output",
@@ -188,6 +200,25 @@ static cl::opt<RunPassOption, true, cl::parser<std::string>> RunPass(
static int compileModule(char **, LLVMContext &);
+LLVM_ATTRIBUTE_NORETURN static void reportError(Twine Msg,
+ StringRef Filename = "") {
+ SmallString<256> Prefix;
+ if (!Filename.empty()) {
+ if (Filename == "-")
+ Filename = "<stdin>";
+ ("'" + Twine(Filename) + "': ").toStringRef(Prefix);
+ }
+ WithColor::error(errs(), "llc") << Prefix << Msg << "\n";
+ exit(1);
+}
+
+LLVM_ATTRIBUTE_NORETURN static void reportError(Error Err, StringRef Filename) {
+ assert(Err);
+ handleAllErrors(createFileError(Filename, std::move(Err)),
+ [&](const ErrorInfoBase &EI) { reportError(EI.message()); });
+ llvm_unreachable("reportError() should not return");
+}
+
static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
Triple::OSType OS,
const char *ProgName) {
@@ -224,7 +255,7 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
OutputFilename += ".o";
break;
case CGFT_Null:
- OutputFilename += ".null";
+ OutputFilename = "-";
break;
}
}
@@ -248,7 +279,7 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
OpenFlags |= sys::fs::OF_Text;
auto FDOut = std::make_unique<ToolOutputFile>(OutputFilename, EC, OpenFlags);
if (EC) {
- WithColor::error() << EC.message() << '\n';
+ reportError(EC.message());
return nullptr;
}
@@ -316,7 +347,7 @@ int main(int argc, char **argv) {
initializeConstantHoistingLegacyPassPass(*Registry);
initializeScalarOpts(*Registry);
initializeVectorization(*Registry);
- initializeScalarizeMaskedMemIntrinPass(*Registry);
+ initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
initializeExpandReductionsPass(*Registry);
initializeHardwareLoopsPass(*Registry);
initializeTransformUtils(*Registry);
@@ -341,18 +372,12 @@ int main(int argc, char **argv) {
setupLLVMOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
RemarksFormat, RemarksWithHotness,
RemarksHotnessThreshold);
- if (Error E = RemarksFileOrErr.takeError()) {
- WithColor::error(errs(), argv[0]) << toString(std::move(E)) << '\n';
- return 1;
- }
+ if (Error E = RemarksFileOrErr.takeError())
+ reportError(std::move(E), RemarksFilename);
std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr);
- if (InputLanguage != "" && InputLanguage != "ir" &&
- InputLanguage != "mir") {
- WithColor::error(errs(), argv[0])
- << "input language must be '', 'IR' or 'MIR'\n";
- return 1;
- }
+ if (InputLanguage != "" && InputLanguage != "ir" && InputLanguage != "mir")
+ reportError("input language must be '', 'IR' or 'MIR'");
// Compile the module TimeCompilations times to give better compile time
// metrics.
@@ -424,14 +449,32 @@ static int compileModule(char **argv, LLVMContext &Context) {
case '3': OLvl = CodeGenOpt::Aggressive; break;
}
- TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
- Options.DisableIntegratedAS = NoIntegratedAssembler;
- Options.MCOptions.ShowMCEncoding = ShowMCEncoding;
- Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory;
- Options.MCOptions.AsmVerbose = AsmVerbose;
- Options.MCOptions.PreserveAsmComments = PreserveComments;
- Options.MCOptions.IASSearchPaths = IncludeDirs;
- Options.MCOptions.SplitDwarfFile = SplitDwarfFile;
+ // Parse 'none' or '$major.$minor'. Disallow -binutils-version=0 because we
+ // use that to indicate the MC default.
+ if (!BinutilsVersion.empty() && BinutilsVersion != "none") {
+ StringRef V = BinutilsVersion.getValue();
+ unsigned Num;
+ if (V.consumeInteger(10, Num) || Num == 0 ||
+ !(V.empty() ||
+ (V.consume_front(".") && !V.consumeInteger(10, Num) && V.empty()))) {
+ WithColor::error(errs(), argv[0])
+ << "invalid -binutils-version, accepting 'none' or major.minor\n";
+ return 1;
+ }
+ }
+ TargetOptions Options;
+ auto InitializeOptions = [&](const Triple &TheTriple) {
+ Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple);
+ Options.BinutilsVersion =
+ TargetMachine::parseBinutilsVersion(BinutilsVersion);
+ Options.DisableIntegratedAS = NoIntegratedAssembler;
+ Options.MCOptions.ShowMCEncoding = ShowMCEncoding;
+ Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory;
+ Options.MCOptions.AsmVerbose = AsmVerbose;
+ Options.MCOptions.PreserveAsmComments = PreserveComments;
+ Options.MCOptions.IASSearchPaths = IncludeDirs;
+ Options.MCOptions.SplitDwarfFile = SplitDwarfFile;
+ };
Optional<Reloc::Model> RM = codegen::getExplicitRelocModel();
@@ -460,12 +503,11 @@ static int compileModule(char **argv, LLVMContext &Context) {
// On AIX, setting the relocation model to anything other than PIC is
// considered a user error.
- if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_) {
- WithColor::error(errs(), argv[0])
- << "invalid relocation model, AIX only supports PIC.\n";
- exit(1);
- }
+ if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_)
+ reportError("invalid relocation model, AIX only supports PIC",
+ InputFilename);
+ InitializeOptions(TheTriple);
Target = std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM,
codegen::getExplicitCodeModel(), OLvl));
@@ -510,6 +552,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
return 1;
}
+ InitializeOptions(TheTriple);
Target = std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM,
codegen::getExplicitCodeModel(), OLvl));
@@ -535,10 +578,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
std::error_code EC;
DwoOut = std::make_unique<ToolOutputFile>(SplitDwarfOutputFile, EC,
sys::fs::OF_None);
- if (EC) {
- WithColor::error(errs(), argv[0]) << EC.message() << '\n';
- return 1;
- }
+ if (EC)
+ reportError(EC.message(), SplitDwarfOutputFile);
}
// Build up all of the passes that we want to do to the module.
@@ -554,12 +595,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
// Verify module immediately to catch problems before doInitialization() is
// called on any passes.
- if (!NoVerify && verifyModule(*M, &errs())) {
- std::string Prefix =
- (Twine(argv[0]) + Twine(": ") + Twine(InputFilename)).str();
- WithColor::error(errs(), Prefix) << "input module is broken!\n";
- return 1;
- }
+ if (!NoVerify && verifyModule(*M, &errs()))
+ reportError("input module cannot be verified", InputFilename);
// Override function attributes based on CPUStr, FeaturesStr, and command line
// flags.
@@ -618,10 +655,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
} else if (Target->addPassesToEmitFile(
PM, *OS, DwoOut ? &DwoOut->os() : nullptr,
codegen::getFileType(), NoVerify, MMIWP)) {
- WithColor::warning(errs(), argv[0])
- << "target does not support generation of this"
- << " file type!\n";
- return 1;
+ reportError("target does not support generation of this file type");
}
const_cast<TargetLoweringObjectFile *>(LLVMTM.getObjFileLowering())
diff --git a/contrib/llvm-project/llvm/tools/lli/ChildTarget/ChildTarget.cpp b/contrib/llvm-project/llvm/tools/lli/ChildTarget/ChildTarget.cpp
index 77b1d47a9466..5772baca1d09 100644
--- a/contrib/llvm-project/llvm/tools/lli/ChildTarget/ChildTarget.cpp
+++ b/contrib/llvm-project/llvm/tools/lli/ChildTarget/ChildTarget.cpp
@@ -1,5 +1,6 @@
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h"
+#include "llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/Process.h"
@@ -53,8 +54,9 @@ int main(int argc, char *argv[]) {
RTDyldMemoryManager::deregisterEHFramesInProcess(Addr, Size);
};
- FDRawChannel Channel(InFD, OutFD);
- typedef remote::OrcRemoteTargetServer<FDRawChannel, HostOrcArch> JITServer;
+ shared::FDRawByteChannel Channel(InFD, OutFD);
+ typedef remote::OrcRemoteTargetServer<shared::FDRawByteChannel, HostOrcArch>
+ JITServer;
JITServer Server(Channel, SymbolLookup, RegisterEHFrames, DeregisterEHFrames);
while (!Server.receivedTerminate())
diff --git a/contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h b/contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h
index cc93294af0cf..cc8d034f62a5 100644
--- a/contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h
+++ b/contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h
@@ -13,7 +13,7 @@
#ifndef LLVM_TOOLS_LLI_REMOTEJITUTILS_H
#define LLVM_TOOLS_LLI_REMOTEJITUTILS_H
-#include "llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h"
+#include "llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include <mutex>
@@ -23,55 +23,8 @@
#include <io.h>
#endif
-/// RPC channel that reads from and writes from file descriptors.
-class FDRawChannel final : public llvm::orc::rpc::RawByteChannel {
-public:
- FDRawChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {}
-
- llvm::Error readBytes(char *Dst, unsigned Size) override {
- assert(Dst && "Attempt to read into null.");
- ssize_t Completed = 0;
- while (Completed < static_cast<ssize_t>(Size)) {
- ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed);
- if (Read <= 0) {
- auto ErrNo = errno;
- if (ErrNo == EAGAIN || ErrNo == EINTR)
- continue;
- else
- return llvm::errorCodeToError(
- std::error_code(errno, std::generic_category()));
- }
- Completed += Read;
- }
- return llvm::Error::success();
- }
-
- llvm::Error appendBytes(const char *Src, unsigned Size) override {
- assert(Src && "Attempt to append from null.");
- ssize_t Completed = 0;
- while (Completed < static_cast<ssize_t>(Size)) {
- ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed);
- if (Written < 0) {
- auto ErrNo = errno;
- if (ErrNo == EAGAIN || ErrNo == EINTR)
- continue;
- else
- return llvm::errorCodeToError(
- std::error_code(errno, std::generic_category()));
- }
- Completed += Written;
- }
- return llvm::Error::success();
- }
-
- llvm::Error send() override { return llvm::Error::success(); }
-
-private:
- int InFD, OutFD;
-};
-
// launch the remote process (see lli.cpp) and return a channel to it.
-std::unique_ptr<FDRawChannel> launchRemote();
+std::unique_ptr<llvm::orc::shared::FDRawByteChannel> launchRemote();
namespace llvm {
@@ -146,6 +99,27 @@ private:
std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr;
std::shared_ptr<LegacyJITSymbolResolver> Resolver;
};
+
+template <typename RemoteT>
+class RemoteResolver : public LegacyJITSymbolResolver {
+public:
+
+ RemoteResolver(RemoteT &R) : R(R) {}
+
+ JITSymbol findSymbol(const std::string &Name) override {
+ if (auto Addr = R.getSymbolAddress(Name))
+ return JITSymbol(*Addr, JITSymbolFlags::Exported);
+ else
+ return Addr.takeError();
+ }
+
+ JITSymbol findSymbolInLogicalDylib(const std::string &Name) override {
+ return nullptr;
+ }
+
+public:
+ RemoteT &R;
+};
}
#endif
diff --git a/contrib/llvm-project/llvm/tools/lli/lli.cpp b/contrib/llvm-project/llvm/tools/lli/lli.cpp
index 981e0812d45e..70c838126946 100644
--- a/contrib/llvm-project/llvm/tools/lli/lli.cpp
+++ b/contrib/llvm-project/llvm/tools/lli/lli.cpp
@@ -31,7 +31,7 @@
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/OrcMCJITReplacement.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
@@ -76,7 +76,7 @@ static codegen::RegisterCodeGenFlags CGF;
namespace {
- enum class JITKind { MCJIT, OrcMCJITReplacement, OrcLazy };
+ enum class JITKind { MCJIT, OrcLazy };
cl::opt<std::string>
InputFile(cl::desc("<input bitcode>"), cl::Positional, cl::init("-"));
@@ -92,9 +92,6 @@ namespace {
"jit-kind", cl::desc("Choose underlying JIT kind."),
cl::init(JITKind::MCJIT),
cl::values(clEnumValN(JITKind::MCJIT, "mcjit", "MCJIT"),
- clEnumValN(JITKind::OrcMCJITReplacement, "orc-mcjit",
- "Orc-based MCJIT replacement "
- "(deprecated)"),
clEnumValN(JITKind::OrcLazy, "orc-lazy",
"Orc-based lazy JIT.")));
@@ -449,8 +446,6 @@ int main(int argc, char **argv, char * const *envp) {
builder.setEngineKind(ForceInterpreter
? EngineKind::Interpreter
: EngineKind::JIT);
- builder.setUseOrcMCJITReplacement(AcknowledgeORCv1Deprecation,
- UseJITKind == JITKind::OrcMCJITReplacement);
// If we are supposed to override the target triple, do so now.
if (!TargetTriple.empty())
@@ -476,7 +471,8 @@ int main(int argc, char **argv, char * const *envp) {
builder.setOptLevel(getOptLevel());
- TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
+ TargetOptions Options =
+ codegen::InitTargetOptionsFromCodeGenFlags(Triple(TargetTriple));
if (codegen::getFloatABIForCalls() != FloatABI::Default)
Options.FloatABIType = codegen::getFloatABIForCalls();
@@ -674,7 +670,7 @@ int main(int argc, char **argv, char * const *envp) {
// MCJIT itself. FIXME.
// Lanch the remote process and get a channel to it.
- std::unique_ptr<FDRawChannel> C = launchRemote();
+ std::unique_ptr<orc::shared::FDRawByteChannel> C = launchRemote();
if (!C) {
WithColor::error(errs(), argv[0]) << "failed to launch remote JIT.\n";
exit(1);
@@ -695,14 +691,7 @@ int main(int argc, char **argv, char * const *envp) {
// Forward MCJIT's symbol resolution calls to the remote.
static_cast<ForwardingMemoryManager *>(RTDyldMM)->setResolver(
- orc::createLambdaResolver(
- AcknowledgeORCv1Deprecation,
- [](const std::string &Name) { return nullptr; },
- [&](const std::string &Name) {
- if (auto Addr = ExitOnErr(R->getSymbolAddress(Name)))
- return JITSymbol(Addr, JITSymbolFlags::Exported);
- return JITSymbol(nullptr);
- }));
+ std::make_unique<RemoteResolver<MyRemote>>(*R));
// Grab the target address of the JIT'd main function on the remote and call
// it.
@@ -1026,7 +1015,7 @@ void disallowOrcOptions() {
}
}
-std::unique_ptr<FDRawChannel> launchRemote() {
+std::unique_ptr<orc::shared::FDRawByteChannel> launchRemote() {
#ifndef LLVM_ON_UNIX
llvm_unreachable("launchRemote not supported on non-Unix platforms");
#else
@@ -1076,6 +1065,7 @@ std::unique_ptr<FDRawChannel> launchRemote() {
close(PipeFD[1][1]);
// Return an RPC channel connected to our end of the pipes.
- return std::make_unique<FDRawChannel>(PipeFD[1][0], PipeFD[0][1]);
+ return std::make_unique<orc::shared::FDRawByteChannel>(PipeFD[1][0],
+ PipeFD[0][1]);
#endif
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp
index d699d4323f0a..4c26c8cad3fa 100644
--- a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -125,7 +125,7 @@ MODIFIERS:
[V] - display the version and exit
)";
-void printHelpMessage() {
+static void printHelpMessage() {
if (Stem.contains_lower("ranlib"))
outs() << RanlibHelp;
else if (Stem.contains_lower("ar"))
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp
index b3c895b44a6d..baa968820b63 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -88,6 +88,12 @@ private:
ArrayRef<ExpansionRecord> Expansions,
const CoverageMapping &Coverage);
+ /// Create source views for the branches of the view.
+ void attachBranchSubViews(SourceCoverageView &View, StringRef SourceName,
+ ArrayRef<CountedRegion> Branches,
+ const MemoryBuffer &File,
+ CoverageData &CoverageInfo);
+
/// Create the source view of a particular function.
std::unique_ptr<SourceCoverageView>
createFunctionView(const FunctionRecord &Function,
@@ -130,6 +136,9 @@ private:
CoverageFiltersMatchAll Filters;
CoverageFilters IgnoreFilenameFilters;
+ /// True if InputSourceFiles are provided.
+ bool HadSourceFiles = false;
+
/// The path to the indexed profile.
std::string PGOFilename;
@@ -194,6 +203,7 @@ void CodeCoverageTool::addCollectedPath(const std::string &Path) {
sys::path::remove_dots(EffectivePath, /*remove_dot_dots=*/true);
if (!IgnoreFilenameFilters.matchesFilename(EffectivePath))
SourceFiles.emplace_back(EffectivePath.str());
+ HadSourceFiles = !SourceFiles.empty();
}
void CodeCoverageTool::collectPaths(const std::string &Path) {
@@ -264,15 +274,45 @@ void CodeCoverageTool::attachExpansionSubViews(
if (!SourceBuffer)
continue;
+ auto SubViewBranches = ExpansionCoverage.getBranches();
auto SubViewExpansions = ExpansionCoverage.getExpansions();
auto SubView =
SourceCoverageView::create(Expansion.Function.Name, SourceBuffer.get(),
ViewOpts, std::move(ExpansionCoverage));
attachExpansionSubViews(*SubView, SubViewExpansions, Coverage);
+ attachBranchSubViews(*SubView, Expansion.Function.Name, SubViewBranches,
+ SourceBuffer.get(), ExpansionCoverage);
View.addExpansion(Expansion.Region, std::move(SubView));
}
}
+void CodeCoverageTool::attachBranchSubViews(SourceCoverageView &View,
+ StringRef SourceName,
+ ArrayRef<CountedRegion> Branches,
+ const MemoryBuffer &File,
+ CoverageData &CoverageInfo) {
+ if (!ViewOpts.ShowBranchCounts && !ViewOpts.ShowBranchPercents)
+ return;
+
+ const auto *NextBranch = Branches.begin();
+ const auto *EndBranch = Branches.end();
+
+ // Group branches that have the same line number into the same subview.
+ while (NextBranch != EndBranch) {
+ std::vector<CountedRegion> ViewBranches;
+ unsigned CurrentLine = NextBranch->LineStart;
+
+ while (NextBranch != EndBranch && CurrentLine == NextBranch->LineStart)
+ ViewBranches.push_back(*NextBranch++);
+
+ if (!ViewBranches.empty()) {
+ auto SubView = SourceCoverageView::create(SourceName, File, ViewOpts,
+ std::move(CoverageInfo));
+ View.addBranch(CurrentLine, ViewBranches, std::move(SubView));
+ }
+ }
+}
+
std::unique_ptr<SourceCoverageView>
CodeCoverageTool::createFunctionView(const FunctionRecord &Function,
const CoverageMapping &Coverage) {
@@ -283,11 +323,14 @@ CodeCoverageTool::createFunctionView(const FunctionRecord &Function,
if (!SourceBuffer)
return nullptr;
+ auto Branches = FunctionCoverage.getBranches();
auto Expansions = FunctionCoverage.getExpansions();
auto View = SourceCoverageView::create(DC.demangle(Function.Name),
SourceBuffer.get(), ViewOpts,
std::move(FunctionCoverage));
attachExpansionSubViews(*View, Expansions, Coverage);
+ attachBranchSubViews(*View, DC.demangle(Function.Name), Branches,
+ SourceBuffer.get(), FunctionCoverage);
return View;
}
@@ -302,10 +345,13 @@ CodeCoverageTool::createSourceFileView(StringRef SourceFile,
if (FileCoverage.empty())
return nullptr;
+ auto Branches = FileCoverage.getBranches();
auto Expansions = FileCoverage.getExpansions();
auto View = SourceCoverageView::create(SourceFile, SourceBuffer.get(),
ViewOpts, std::move(FileCoverage));
attachExpansionSubViews(*View, Expansions, Coverage);
+ attachBranchSubViews(*View, SourceFile, Branches, SourceBuffer.get(),
+ FileCoverage);
if (!ViewOpts.ShowFunctionInstantiations)
return View;
@@ -322,9 +368,12 @@ CodeCoverageTool::createSourceFileView(StringRef SourceFile,
if (Function->ExecutionCount > 0) {
auto SubViewCoverage = Coverage.getCoverageForFunction(*Function);
auto SubViewExpansions = SubViewCoverage.getExpansions();
+ auto SubViewBranches = SubViewCoverage.getBranches();
SubView = SourceCoverageView::create(
Funcname, SourceBuffer.get(), ViewOpts, std::move(SubViewCoverage));
attachExpansionSubViews(*SubView, SubViewExpansions, Coverage);
+ attachBranchSubViews(*SubView, SourceFile, SubViewBranches,
+ SourceBuffer.get(), SubViewCoverage);
}
unsigned FileID = Function->CountedRegions.front().FileID;
@@ -395,6 +444,7 @@ void CodeCoverageTool::remapPathNames(const CoverageMapping &Coverage) {
return "";
SmallString<128> NativePath;
sys::path::native(Path, NativePath);
+ sys::path::remove_dots(NativePath, true);
if (!sys::path::is_separator(NativePath.back()))
NativePath += sys::path::get_separator();
return NativePath.c_str();
@@ -406,6 +456,7 @@ void CodeCoverageTool::remapPathNames(const CoverageMapping &Coverage) {
for (StringRef Filename : Coverage.getUniqueSourceFiles()) {
SmallString<128> NativeFilename;
sys::path::native(Filename, NativeFilename);
+ sys::path::remove_dots(NativeFilename, true);
if (NativeFilename.startswith(RemapFrom)) {
RemappedFilenames[Filename] =
RemapTo + NativeFilename.substr(RemapFrom.size()).str();
@@ -430,16 +481,11 @@ void CodeCoverageTool::remapPathNames(const CoverageMapping &Coverage) {
void CodeCoverageTool::removeUnmappedInputs(const CoverageMapping &Coverage) {
std::vector<StringRef> CoveredFiles = Coverage.getUniqueSourceFiles();
- auto UncoveredFilesIt = SourceFiles.end();
// The user may have specified source files which aren't in the coverage
// mapping. Filter these files away.
- UncoveredFilesIt = std::remove_if(
- SourceFiles.begin(), SourceFiles.end(), [&](const std::string &SF) {
- return !std::binary_search(CoveredFiles.begin(), CoveredFiles.end(),
- SF);
- });
-
- SourceFiles.erase(UncoveredFilesIt, SourceFiles.end());
+ llvm::erase_if(SourceFiles, [&](const std::string &SF) {
+ return !std::binary_search(CoveredFiles.begin(), CoveredFiles.end(), SF);
+ });
}
void CodeCoverageTool::demangleSymbols(const CoverageMapping &Coverage) {
@@ -544,8 +590,11 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
cl::Positional, cl::desc("Covered executable or object file."));
cl::list<std::string> CovFilenames(
- "object", cl::desc("Coverage executable or object file"), cl::ZeroOrMore,
- cl::CommaSeparated);
+ "object", cl::desc("Coverage executable or object file"), cl::ZeroOrMore);
+
+ cl::opt<bool> DebugDumpCollectedObjects(
+ "dump-collected-objects", cl::Optional, cl::Hidden,
+ cl::desc("Show the collected coverage object files"));
cl::list<std::string> InputSourceFiles(
cl::Positional, cl::desc("<Source files>"), cl::ZeroOrMore);
@@ -641,6 +690,11 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
cl::desc("Show region statistics in summary table"),
cl::init(true));
+ cl::opt<bool> BranchSummary(
+ "show-branch-summary", cl::Optional,
+ cl::desc("Show branch condition statistics in summary table"),
+ cl::init(true));
+
cl::opt<bool> InstantiationSummary(
"show-instantiation-summary", cl::Optional,
cl::desc("Show instantiation statistics in summary table"));
@@ -668,6 +722,12 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
::exit(1);
}
+ if (DebugDumpCollectedObjects) {
+ for (StringRef OF : ObjectFilenames)
+ outs() << OF << '\n';
+ ::exit(0);
+ }
+
ViewOpts.Format = Format;
switch (ViewOpts.Format) {
case CoverageViewOptions::OutputFormat::Text:
@@ -778,6 +838,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
::exit(0);
}
+ ViewOpts.ShowBranchSummary = BranchSummary;
ViewOpts.ShowRegionSummary = RegionSummary;
ViewOpts.ShowInstantiationSummary = InstantiationSummary;
ViewOpts.ExportSummaryOnly = SummaryOnly;
@@ -812,6 +873,15 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
cl::desc("Show the execution counts for each region"),
cl::cat(ViewCategory));
+ cl::opt<CoverageViewOptions::BranchOutputType> ShowBranches(
+ "show-branches", cl::Optional,
+ cl::desc("Show coverage for branch conditions"), cl::cat(ViewCategory),
+ cl::values(clEnumValN(CoverageViewOptions::BranchOutputType::Count,
+ "count", "Show True/False counts"),
+ clEnumValN(CoverageViewOptions::BranchOutputType::Percent,
+ "percent", "Show True/False percent")),
+ cl::init(CoverageViewOptions::BranchOutputType::Off));
+
cl::opt<bool> ShowBestLineRegionsCounts(
"show-line-counts-or-regions", cl::Optional,
cl::desc("Show the execution counts for each line, or the execution "
@@ -855,6 +925,10 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
!ShowRegions || ShowBestLineRegionsCounts;
ViewOpts.ShowRegionMarkers = ShowRegions || ShowBestLineRegionsCounts;
ViewOpts.ShowExpandedRegions = ShowExpansions;
+ ViewOpts.ShowBranchCounts =
+ ShowBranches == CoverageViewOptions::BranchOutputType::Count;
+ ViewOpts.ShowBranchPercents =
+ ShowBranches == CoverageViewOptions::BranchOutputType::Percent;
ViewOpts.ShowFunctionInstantiations = ShowInstantiations;
ViewOpts.ShowOutputDirectory = ShowOutputDirectory;
ViewOpts.TabSize = TabSize;
@@ -886,7 +960,7 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
auto Printer = CoveragePrinter::create(ViewOpts);
- if (SourceFiles.empty())
+ if (SourceFiles.empty() && !HadSourceFiles)
// Get the source files from the function coverage mapping.
for (StringRef Filename : Coverage->getUniqueSourceFiles()) {
if (!IgnoreFilenameFilters.matchesFilename(Filename))
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp
index c8bb1aa5b6ea..d1446f379f00 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp
@@ -18,6 +18,8 @@
// -- Export: dict => Json representation of one CoverageMapping
// -- Files: array => List of objects describing coverage for files
// -- File: dict => Coverage for a single file
+// -- Branches: array => List of Branches in the file
+// -- Branch: dict => Describes a branch of the file with counters
// -- Segments: array => List of Segments contained in the file
// -- Segment: dict => Describes a segment of the file with a counter
// -- Expansions: array => List of expansion records
@@ -25,10 +27,13 @@
// -- CountedRegion: dict => The region to be expanded
// -- TargetRegions: array => List of Regions in the expansion
// -- CountedRegion: dict => Single Region in the expansion
+// -- Branches: array => List of Branches in the expansion
+// -- Branch: dict => Describes a branch in expansion and counters
// -- Summary: dict => Object summarizing the coverage for this file
// -- LineCoverage: dict => Object summarizing line coverage
// -- FunctionCoverage: dict => Object summarizing function coverage
// -- RegionCoverage: dict => Object summarizing region coverage
+// -- BranchCoverage: dict => Object summarizing branch coverage
// -- Functions: array => List of objects describing coverage for functions
// -- Function: dict => Coverage info for a single function
// -- Filenames: array => List of filenames that the function relates to
@@ -37,6 +42,7 @@
// -- FunctionCoverage: dict => Object summarizing function coverage
// -- InstantiationCoverage: dict => Object summarizing inst. coverage
// -- RegionCoverage: dict => Object summarizing region coverage
+// -- BranchCoverage: dict => Object summarizing branch coverage
//
//===----------------------------------------------------------------------===//
@@ -84,6 +90,14 @@ json::Array renderRegion(const coverage::CountedRegion &Region) {
int64_t(Region.Kind)});
}
+json::Array renderBranch(const coverage::CountedRegion &Region) {
+ return json::Array(
+ {Region.LineStart, Region.ColumnStart, Region.LineEnd, Region.ColumnEnd,
+ clamp_uint64_to_int64(Region.ExecutionCount),
+ clamp_uint64_to_int64(Region.FalseExecutionCount), Region.FileID,
+ Region.ExpandedFileID, int64_t(Region.Kind)});
+}
+
json::Array renderRegions(ArrayRef<coverage::CountedRegion> Regions) {
json::Array RegionArray;
for (const auto &Region : Regions)
@@ -91,13 +105,49 @@ json::Array renderRegions(ArrayRef<coverage::CountedRegion> Regions) {
return RegionArray;
}
-json::Object renderExpansion(const coverage::ExpansionRecord &Expansion) {
+json::Array renderBranchRegions(ArrayRef<coverage::CountedRegion> Regions) {
+ json::Array RegionArray;
+ for (const auto &Region : Regions)
+ if (!Region.Folded)
+ RegionArray.push_back(renderBranch(Region));
+ return RegionArray;
+}
+
+std::vector<llvm::coverage::CountedRegion>
+collectNestedBranches(const coverage::CoverageMapping &Coverage,
+ ArrayRef<llvm::coverage::ExpansionRecord> Expansions) {
+ std::vector<llvm::coverage::CountedRegion> Branches;
+ for (const auto &Expansion : Expansions) {
+ auto ExpansionCoverage = Coverage.getCoverageForExpansion(Expansion);
+
+ // Recursively collect branches from nested expansions.
+ auto NestedExpansions = ExpansionCoverage.getExpansions();
+ auto NestedExBranches = collectNestedBranches(Coverage, NestedExpansions);
+ Branches.insert(Branches.end(), NestedExBranches.begin(),
+ NestedExBranches.end());
+
+ // Add branches from this level of expansion.
+ auto ExBranches = ExpansionCoverage.getBranches();
+ for (auto B : ExBranches)
+ if (B.FileID == Expansion.FileID)
+ Branches.push_back(B);
+ }
+
+ return Branches;
+}
+
+json::Object renderExpansion(const coverage::CoverageMapping &Coverage,
+ const coverage::ExpansionRecord &Expansion) {
+ std::vector<llvm::coverage::ExpansionRecord> Expansions = {Expansion};
return json::Object(
{{"filenames", json::Array(Expansion.Function.Filenames)},
// Mark the beginning and end of this expansion in the source file.
{"source_region", renderRegion(Expansion.Region)},
// Enumerate the coverage information for the expansion.
- {"target_regions", renderRegions(Expansion.Function.CountedRegions)}});
+ {"target_regions", renderRegions(Expansion.Function.CountedRegions)},
+ // Enumerate the branch coverage information for the expansion.
+ {"branches",
+ renderBranchRegions(collectNestedBranches(Coverage, Expansions))}});
}
json::Object renderSummary(const FileCoverageSummary &Summary) {
@@ -123,14 +173,22 @@ json::Object renderSummary(const FileCoverageSummary &Summary) {
{"covered", int64_t(Summary.RegionCoverage.getCovered())},
{"notcovered", int64_t(Summary.RegionCoverage.getNumRegions() -
Summary.RegionCoverage.getCovered())},
- {"percent", Summary.RegionCoverage.getPercentCovered()}})}});
+ {"percent", Summary.RegionCoverage.getPercentCovered()}})},
+ {"branches",
+ json::Object(
+ {{"count", int64_t(Summary.BranchCoverage.getNumBranches())},
+ {"covered", int64_t(Summary.BranchCoverage.getCovered())},
+ {"notcovered", int64_t(Summary.BranchCoverage.getNumBranches() -
+ Summary.BranchCoverage.getCovered())},
+ {"percent", Summary.BranchCoverage.getPercentCovered()}})}});
}
-json::Array renderFileExpansions(const coverage::CoverageData &FileCoverage,
+json::Array renderFileExpansions(const coverage::CoverageMapping &Coverage,
+ const coverage::CoverageData &FileCoverage,
const FileCoverageSummary &FileReport) {
json::Array ExpansionArray;
for (const auto &Expansion : FileCoverage.getExpansions())
- ExpansionArray.push_back(renderExpansion(Expansion));
+ ExpansionArray.push_back(renderExpansion(Coverage, Expansion));
return ExpansionArray;
}
@@ -142,6 +200,14 @@ json::Array renderFileSegments(const coverage::CoverageData &FileCoverage,
return SegmentArray;
}
+json::Array renderFileBranches(const coverage::CoverageData &FileCoverage,
+ const FileCoverageSummary &FileReport) {
+ json::Array BranchArray;
+ for (const auto &Branch : FileCoverage.getBranches())
+ BranchArray.push_back(renderBranch(Branch));
+ return BranchArray;
+}
+
json::Object renderFile(const coverage::CoverageMapping &Coverage,
const std::string &Filename,
const FileCoverageSummary &FileReport,
@@ -151,8 +217,10 @@ json::Object renderFile(const coverage::CoverageMapping &Coverage,
// Calculate and render detailed coverage information for given file.
auto FileCoverage = Coverage.getCoverageForFile(Filename);
File["segments"] = renderFileSegments(FileCoverage, FileReport);
+ File["branches"] = renderFileBranches(FileCoverage, FileReport);
if (!Options.SkipExpansions) {
- File["expansions"] = renderFileExpansions(FileCoverage, FileReport);
+ File["expansions"] =
+ renderFileExpansions(Coverage, FileCoverage, FileReport);
}
}
File["summary"] = renderSummary(FileReport);
@@ -197,6 +265,7 @@ json::Array renderFunctions(
json::Object({{"name", F.Name},
{"count", clamp_uint64_to_int64(F.ExecutionCount)},
{"regions", renderRegions(F.CountedRegions)},
+ {"branches", renderBranchRegions(F.CountedBranchRegions)},
{"filenames", json::Array(F.Filenames)}}));
return FunctionArray;
}
@@ -218,16 +287,15 @@ void CoverageExporterJson::renderRoot(ArrayRef<std::string> SourceFiles) {
SourceFiles, Options);
auto Files = renderFiles(Coverage, SourceFiles, FileReports, Options);
// Sort files in order of their names.
- std::sort(Files.begin(), Files.end(),
- [](const json::Value &A, const json::Value &B) {
- const json::Object *ObjA = A.getAsObject();
- const json::Object *ObjB = B.getAsObject();
- assert(ObjA != nullptr && "Value A was not an Object");
- assert(ObjB != nullptr && "Value B was not an Object");
- const StringRef FilenameA = ObjA->getString("filename").getValue();
- const StringRef FilenameB = ObjB->getString("filename").getValue();
- return FilenameA.compare(FilenameB) < 0;
- });
+ llvm::sort(Files, [](const json::Value &A, const json::Value &B) {
+ const json::Object *ObjA = A.getAsObject();
+ const json::Object *ObjB = B.getAsObject();
+ assert(ObjA != nullptr && "Value A was not an Object");
+ assert(ObjB != nullptr && "Value B was not an Object");
+ const StringRef FilenameA = ObjA->getString("filename").getValue();
+ const StringRef FilenameB = ObjB->getString("filename").getValue();
+ return FilenameA.compare(FilenameB) < 0;
+ });
auto Export = json::Object(
{{"files", std::move(Files)}, {"totals", renderSummary(Totals)}});
// Skip functions-level information if necessary.
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
index a6b3c6607030..99ca037e7b5e 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
@@ -26,8 +26,12 @@
// - "FNH:<number of functions hit>"
// - for each instrumented line:
// - "DA:<line number>,<execution count>[,<checksum>]
+// - for each branch:
+// - "BRDA:<line number>,<branch pair id>,<branch id>,<count>"
+// - "BRF:<number of branches found>"
+// - "BRH:<number of branches hit>"
// - "LH:<number of lines with non-zero execution count>"
-// - "LF:<nubmer of instrumented lines>"
+// - "LF:<number of instrumented lines>"
// - "end_of_record"
//
// If the user is exporting summary information only, then the FN, FNDA, and DA
@@ -71,11 +75,102 @@ void renderLineExecutionCounts(raw_ostream &OS,
}
}
+std::vector<llvm::coverage::CountedRegion>
+collectNestedBranches(const coverage::CoverageMapping &Coverage,
+ ArrayRef<llvm::coverage::ExpansionRecord> Expansions,
+ int ViewDepth = 0, int SrcLine = 0) {
+ std::vector<llvm::coverage::CountedRegion> Branches;
+ for (const auto &Expansion : Expansions) {
+ auto ExpansionCoverage = Coverage.getCoverageForExpansion(Expansion);
+
+ // If we're at the top level, set the corresponding source line.
+ if (ViewDepth == 0)
+ SrcLine = Expansion.Region.LineStart;
+
+ // Recursively collect branches from nested expansions.
+ auto NestedExpansions = ExpansionCoverage.getExpansions();
+ auto NestedExBranches = collectNestedBranches(Coverage, NestedExpansions,
+ ViewDepth + 1, SrcLine);
+ Branches.insert(Branches.end(), NestedExBranches.begin(),
+ NestedExBranches.end());
+
+ // Add branches from this level of expansion.
+ auto ExBranches = ExpansionCoverage.getBranches();
+ for (auto B : ExBranches)
+ if (B.FileID == Expansion.FileID) {
+ B.LineStart = SrcLine;
+ Branches.push_back(B);
+ }
+ }
+
+ return Branches;
+}
+
+bool sortLine(llvm::coverage::CountedRegion I,
+ llvm::coverage::CountedRegion J) {
+ return (I.LineStart < J.LineStart) ||
+ ((I.LineStart == J.LineStart) && (I.ColumnStart < J.ColumnStart));
+}
+
+void renderBranchExecutionCounts(raw_ostream &OS,
+ const coverage::CoverageMapping &Coverage,
+ const coverage::CoverageData &FileCoverage) {
+ std::vector<llvm::coverage::CountedRegion> Branches =
+ FileCoverage.getBranches();
+
+ // Recursively collect branches for all file expansions.
+ std::vector<llvm::coverage::CountedRegion> ExBranches =
+ collectNestedBranches(Coverage, FileCoverage.getExpansions());
+
+ // Append Expansion Branches to Source Branches.
+ Branches.insert(Branches.end(), ExBranches.begin(), ExBranches.end());
+
+ // Sort branches based on line number to ensure branches corresponding to the
+ // same source line are counted together.
+ llvm::sort(Branches, sortLine);
+
+ auto NextBranch = Branches.begin();
+ auto EndBranch = Branches.end();
+
+ // Branches with the same source line are enumerated individually
+ // (BranchIndex) as well as based on True/False pairs (PairIndex).
+ while (NextBranch != EndBranch) {
+ unsigned CurrentLine = NextBranch->LineStart;
+ unsigned PairIndex = 0;
+ unsigned BranchIndex = 0;
+
+ while (NextBranch != EndBranch && CurrentLine == NextBranch->LineStart) {
+ if (!NextBranch->Folded) {
+ unsigned BC1 = NextBranch->ExecutionCount;
+ unsigned BC2 = NextBranch->FalseExecutionCount;
+ bool BranchNotExecuted = (BC1 == 0 && BC2 == 0);
+
+ for (int I = 0; I < 2; I++, BranchIndex++) {
+ OS << "BRDA:" << CurrentLine << ',' << PairIndex << ','
+ << BranchIndex;
+ if (BranchNotExecuted)
+ OS << ',' << '-' << '\n';
+ else
+ OS << ',' << (I == 0 ? BC1 : BC2) << '\n';
+ }
+
+ PairIndex++;
+ }
+ NextBranch++;
+ }
+ }
+}
+
void renderLineSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
OS << "LF:" << Summary.LineCoverage.getNumLines() << '\n'
<< "LH:" << Summary.LineCoverage.getCovered() << '\n';
}
+void renderBranchSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
+ OS << "BRF:" << Summary.BranchCoverage.getNumBranches() << '\n'
+ << "BFH:" << Summary.BranchCoverage.getCovered() << '\n';
+}
+
void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
const std::string &Filename,
const FileCoverageSummary &FileReport, bool ExportSummaryOnly,
@@ -91,7 +186,9 @@ void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
// Calculate and render detailed coverage information for given file.
auto FileCoverage = Coverage.getCoverageForFile(Filename);
renderLineExecutionCounts(OS, FileCoverage);
+ renderBranchExecutionCounts(OS, Coverage, FileCoverage);
}
+ renderBranchSummary(OS, FileReport);
renderLineSummary(OS, FileReport);
OS << "end_of_record\n";
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageReport.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageReport.cpp
index 8509710032d1..2c08f5309494 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageReport.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageReport.cpp
@@ -86,9 +86,9 @@ Column column(StringRef Str, unsigned Width, const T &Value) {
}
// Specify the default column widths.
-size_t FileReportColumns[] = {25, 12, 18, 10, 12, 18, 10,
- 16, 16, 10, 12, 18, 10};
-size_t FunctionReportColumns[] = {25, 10, 8, 8, 10, 8, 8};
+size_t FileReportColumns[] = {25, 12, 18, 10, 12, 18, 10, 16,
+ 16, 10, 12, 18, 10, 12, 18, 10};
+size_t FunctionReportColumns[] = {25, 10, 8, 8, 10, 8, 8, 10, 8, 8};
/// Adjust column widths to fit long file paths and function names.
void adjustColumnWidths(ArrayRef<StringRef> Files,
@@ -239,6 +239,23 @@ void CoverageReport::render(const FileCoverageSummary &File,
<< '%';
else
OS << column("-", FileReportColumns[12], Column::RightAlignment);
+
+ if (Options.ShowBranchSummary) {
+ OS << format("%*u", FileReportColumns[13],
+ (unsigned)File.BranchCoverage.getNumBranches());
+ Options.colored_ostream(OS, LineCoverageColor)
+ << format("%*u", FileReportColumns[14],
+ (unsigned)(File.BranchCoverage.getNumBranches() -
+ File.BranchCoverage.getCovered()));
+ if (File.BranchCoverage.getNumBranches())
+ Options.colored_ostream(OS, LineCoverageColor)
+ << format("%*.2f", FileReportColumns[15] - 1,
+ File.BranchCoverage.getPercentCovered())
+ << '%';
+ else
+ OS << column("-", FileReportColumns[15], Column::RightAlignment);
+ }
+
OS << "\n";
}
@@ -273,6 +290,19 @@ void CoverageReport::render(const FunctionCoverageSummary &Function,
<< format("%*.2f", FunctionReportColumns[6] - 1,
Function.LineCoverage.getPercentCovered())
<< '%';
+ if (Options.ShowBranchSummary) {
+ OS << format("%*u", FunctionReportColumns[7],
+ (unsigned)Function.BranchCoverage.getNumBranches());
+ Options.colored_ostream(OS, LineCoverageColor)
+ << format("%*u", FunctionReportColumns[8],
+ (unsigned)(Function.BranchCoverage.getNumBranches() -
+ Function.BranchCoverage.getCovered()));
+ Options.colored_ostream(
+ OS, determineCoveragePercentageColor(Function.BranchCoverage))
+ << format("%*.2f", FunctionReportColumns[9] - 1,
+ Function.BranchCoverage.getPercentCovered())
+ << '%';
+ }
OS << "\n";
}
@@ -301,6 +331,10 @@ void CoverageReport::renderFunctionReports(ArrayRef<std::string> Files,
<< column("Lines", FunctionReportColumns[4], Column::RightAlignment)
<< column("Miss", FunctionReportColumns[5], Column::RightAlignment)
<< column("Cover", FunctionReportColumns[6], Column::RightAlignment);
+ if (Options.ShowBranchSummary)
+ OS << column("Branches", FunctionReportColumns[7], Column::RightAlignment)
+ << column("Miss", FunctionReportColumns[8], Column::RightAlignment)
+ << column("Cover", FunctionReportColumns[9], Column::RightAlignment);
OS << "\n";
renderDivider(FunctionReportColumns, OS);
OS << "\n";
@@ -310,6 +344,7 @@ void CoverageReport::renderFunctionReports(ArrayRef<std::string> Files,
++Totals.ExecutionCount;
Totals.RegionCoverage += Function.RegionCoverage;
Totals.LineCoverage += Function.LineCoverage;
+ Totals.BranchCoverage += Function.BranchCoverage;
render(Function, DC, OS);
}
if (Totals.ExecutionCount) {
@@ -420,7 +455,13 @@ void CoverageReport::renderFileReports(
<< column("Executed", FileReportColumns[9], Column::RightAlignment);
OS << column("Lines", FileReportColumns[10], Column::RightAlignment)
<< column("Missed Lines", FileReportColumns[11], Column::RightAlignment)
- << column("Cover", FileReportColumns[12], Column::RightAlignment) << "\n";
+ << column("Cover", FileReportColumns[12], Column::RightAlignment);
+ if (Options.ShowBranchSummary)
+ OS << column("Branches", FileReportColumns[13], Column::RightAlignment)
+ << column("Missed Branches", FileReportColumns[14],
+ Column::RightAlignment)
+ << column("Cover", FileReportColumns[15], Column::RightAlignment);
+ OS << "\n";
renderDivider(FileReportColumns, OS);
OS << "\n";
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp
index 929529c27b6e..10e059adeb7d 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp
@@ -16,6 +16,34 @@
using namespace llvm;
using namespace coverage;
+static void sumBranches(size_t &NumBranches, size_t &CoveredBranches,
+ const ArrayRef<CountedRegion> &Branches) {
+ for (const auto &BR : Branches) {
+ // Skip folded branches.
+ if (BR.Folded)
+ continue;
+
+ // "True" Condition Branches.
+ ++NumBranches;
+ if (BR.ExecutionCount > 0)
+ ++CoveredBranches;
+ // "False" Condition Branches.
+ ++NumBranches;
+ if (BR.FalseExecutionCount > 0)
+ ++CoveredBranches;
+ }
+}
+
+static void sumBranchExpansions(size_t &NumBranches, size_t &CoveredBranches,
+ const CoverageMapping &CM,
+ ArrayRef<ExpansionRecord> Expansions) {
+ for (const auto &Expansion : Expansions) {
+ auto CE = CM.getCoverageForExpansion(Expansion);
+ sumBranches(NumBranches, CoveredBranches, CE.getBranches());
+ sumBranchExpansions(NumBranches, CoveredBranches, CM, CE.getExpansions());
+ }
+}
+
FunctionCoverageSummary
FunctionCoverageSummary::get(const CoverageMapping &CM,
const coverage::FunctionRecord &Function) {
@@ -40,10 +68,16 @@ FunctionCoverageSummary::get(const CoverageMapping &CM,
++CoveredLines;
}
+ // Compute the branch coverage, including branches from expansions.
+ size_t NumBranches = 0, CoveredBranches = 0;
+ sumBranches(NumBranches, CoveredBranches, CD.getBranches());
+ sumBranchExpansions(NumBranches, CoveredBranches, CM, CD.getExpansions());
+
return FunctionCoverageSummary(
Function.Name, Function.ExecutionCount,
RegionCoverageInfo(CoveredRegions, NumCodeRegions),
- LineCoverageInfo(CoveredLines, NumLines));
+ LineCoverageInfo(CoveredLines, NumLines),
+ BranchCoverageInfo(CoveredBranches, NumBranches));
}
FunctionCoverageSummary
@@ -62,9 +96,11 @@ FunctionCoverageSummary::get(const InstantiationGroup &Group,
Summary.ExecutionCount = Group.getTotalExecutionCount();
Summary.RegionCoverage = Summaries[0].RegionCoverage;
Summary.LineCoverage = Summaries[0].LineCoverage;
+ Summary.BranchCoverage = Summaries[0].BranchCoverage;
for (const auto &FCS : Summaries.drop_front()) {
Summary.RegionCoverage.merge(FCS.RegionCoverage);
Summary.LineCoverage.merge(FCS.LineCoverage);
+ Summary.BranchCoverage.merge(FCS.BranchCoverage);
}
return Summary;
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.h b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.h
index 97beacb26d07..62e7cad1012b 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.h
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.h
@@ -101,6 +101,47 @@ public:
}
};
+/// Provides information about branches coverage for a function/file.
+class BranchCoverageInfo {
+ /// The number of branches that were executed at least once.
+ size_t Covered;
+
+ /// The total number of branches in a function/file.
+ size_t NumBranches;
+
+public:
+ BranchCoverageInfo() : Covered(0), NumBranches(0) {}
+
+ BranchCoverageInfo(size_t Covered, size_t NumBranches)
+ : Covered(Covered), NumBranches(NumBranches) {
+ assert(Covered <= NumBranches && "Covered branches over-counted");
+ }
+
+ BranchCoverageInfo &operator+=(const BranchCoverageInfo &RHS) {
+ Covered += RHS.Covered;
+ NumBranches += RHS.NumBranches;
+ return *this;
+ }
+
+ void merge(const BranchCoverageInfo &RHS) {
+ Covered = std::max(Covered, RHS.Covered);
+ NumBranches = std::max(NumBranches, RHS.NumBranches);
+ }
+
+ size_t getCovered() const { return Covered; }
+
+ size_t getNumBranches() const { return NumBranches; }
+
+ bool isFullyCovered() const { return Covered == NumBranches; }
+
+ double getPercentCovered() const {
+ assert(Covered <= NumBranches && "Covered branches over-counted");
+ if (NumBranches == 0)
+ return 0.0;
+ return double(Covered) / double(NumBranches) * 100.0;
+ }
+};
+
/// Provides information about function coverage for a file.
class FunctionCoverageInfo {
/// The number of functions that were executed.
@@ -147,15 +188,19 @@ struct FunctionCoverageSummary {
uint64_t ExecutionCount;
RegionCoverageInfo RegionCoverage;
LineCoverageInfo LineCoverage;
+ BranchCoverageInfo BranchCoverage;
FunctionCoverageSummary(const std::string &Name)
- : Name(Name), ExecutionCount(0), RegionCoverage(), LineCoverage() {}
+ : Name(Name), ExecutionCount(0), RegionCoverage(), LineCoverage(),
+ BranchCoverage() {}
FunctionCoverageSummary(const std::string &Name, uint64_t ExecutionCount,
const RegionCoverageInfo &RegionCoverage,
- const LineCoverageInfo &LineCoverage)
+ const LineCoverageInfo &LineCoverage,
+ const BranchCoverageInfo &BranchCoverage)
: Name(Name), ExecutionCount(ExecutionCount),
- RegionCoverage(RegionCoverage), LineCoverage(LineCoverage) {}
+ RegionCoverage(RegionCoverage), LineCoverage(LineCoverage),
+ BranchCoverage(BranchCoverage) {}
/// Compute the code coverage summary for the given function coverage
/// mapping record.
@@ -174,6 +219,7 @@ struct FileCoverageSummary {
StringRef Name;
RegionCoverageInfo RegionCoverage;
LineCoverageInfo LineCoverage;
+ BranchCoverageInfo BranchCoverage;
FunctionCoverageInfo FunctionCoverage;
FunctionCoverageInfo InstantiationCoverage;
@@ -185,6 +231,7 @@ struct FileCoverageSummary {
RegionCoverage += RHS.RegionCoverage;
LineCoverage += RHS.LineCoverage;
FunctionCoverage += RHS.FunctionCoverage;
+ BranchCoverage += RHS.BranchCoverage;
InstantiationCoverage += RHS.InstantiationCoverage;
return *this;
}
@@ -192,6 +239,7 @@ struct FileCoverageSummary {
void addFunction(const FunctionCoverageSummary &Function) {
RegionCoverage += Function.RegionCoverage;
LineCoverage += Function.LineCoverage;
+ BranchCoverage += Function.BranchCoverage;
FunctionCoverage.addFunction(/*Covered=*/Function.ExecutionCount > 0);
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageViewOptions.h b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageViewOptions.h
index dde0c692ab05..eee4ba74e165 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageViewOptions.h
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageViewOptions.h
@@ -23,20 +23,26 @@ struct CoverageViewOptions {
Lcov
};
+ enum class BranchOutputType { Count, Percent, Off };
+
bool Debug;
bool Colors;
bool ShowLineNumbers;
bool ShowLineStats;
bool ShowRegionMarkers;
+ bool ShowBranchCounts;
+ bool ShowBranchPercents;
bool ShowExpandedRegions;
bool ShowFunctionInstantiations;
bool ShowFullFilenames;
+ bool ShowBranchSummary;
bool ShowRegionSummary;
bool ShowInstantiationSummary;
bool ExportSummaryOnly;
bool SkipExpansions;
bool SkipFunctions;
OutputFormat Format;
+ BranchOutputType ShowBranches;
std::string ShowOutputDirectory;
std::vector<std::string> DemanglerOpts;
uint32_t TabSize;
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.cpp
index cd7395a1a87d..aca58a07f250 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.cpp
@@ -132,7 +132,8 @@ bool SourceCoverageView::shouldRenderRegionMarkers(
}
bool SourceCoverageView::hasSubViews() const {
- return !ExpansionSubViews.empty() || !InstantiationSubViews.empty();
+ return !ExpansionSubViews.empty() || !InstantiationSubViews.empty() ||
+ !BranchSubViews.empty();
}
std::unique_ptr<SourceCoverageView>
@@ -167,6 +168,12 @@ void SourceCoverageView::addExpansion(
ExpansionSubViews.emplace_back(Region, std::move(View));
}
+void SourceCoverageView::addBranch(unsigned Line,
+ ArrayRef<CountedRegion> Regions,
+ std::unique_ptr<SourceCoverageView> View) {
+ BranchSubViews.emplace_back(Line, Regions, std::move(View));
+}
+
void SourceCoverageView::addInstantiation(
StringRef FunctionName, unsigned Line,
std::unique_ptr<SourceCoverageView> View) {
@@ -187,14 +194,17 @@ void SourceCoverageView::print(raw_ostream &OS, bool WholeFile,
renderTableHeader(OS, (ViewDepth > 0) ? 0 : getFirstUncoveredLineNo(),
ViewDepth);
- // We need the expansions and instantiations sorted so we can go through them
- // while we iterate lines.
+ // We need the expansions, instantiations, and branches sorted so we can go
+ // through them while we iterate lines.
llvm::stable_sort(ExpansionSubViews);
llvm::stable_sort(InstantiationSubViews);
+ llvm::stable_sort(BranchSubViews);
auto NextESV = ExpansionSubViews.begin();
auto EndESV = ExpansionSubViews.end();
auto NextISV = InstantiationSubViews.begin();
auto EndISV = InstantiationSubViews.end();
+ auto NextBRV = BranchSubViews.begin();
+ auto EndBRV = BranchSubViews.end();
// Get the coverage information for the file.
auto StartSegment = CoverageInfo.begin();
@@ -234,7 +244,7 @@ void SourceCoverageView::print(raw_ostream &OS, bool WholeFile,
if (shouldRenderRegionMarkers(*LCI))
renderRegionMarkers(OS, *LCI, ViewDepth);
- // Show the expansions and instantiations for this line.
+ // Show the expansions, instantiations, and branches for this line.
bool RenderedSubView = false;
for (; NextESV != EndESV && NextESV->getLine() == LI.line_number();
++NextESV) {
@@ -257,6 +267,11 @@ void SourceCoverageView::print(raw_ostream &OS, bool WholeFile,
renderInstantiationView(OS, *NextISV, ViewDepth + 1);
RenderedSubView = true;
}
+ for (; NextBRV != EndBRV && NextBRV->Line == LI.line_number(); ++NextBRV) {
+ renderViewDivider(OS, ViewDepth + 1);
+ renderBranchView(OS, *NextBRV, ViewDepth + 1);
+ RenderedSubView = true;
+ }
if (RenderedSubView)
renderViewDivider(OS, ViewDepth + 1);
renderLineSuffix(OS, ViewDepth);
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.h b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.h
index 9ae928443651..5a9fcdd15761 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.h
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.h
@@ -67,6 +67,23 @@ struct InstantiationView {
}
};
+/// A view that represents one or more branch regions on a given source line.
+struct BranchView {
+ std::vector<CountedRegion> Regions;
+ std::unique_ptr<SourceCoverageView> View;
+ unsigned Line;
+
+ BranchView(unsigned Line, ArrayRef<CountedRegion> Regions,
+ std::unique_ptr<SourceCoverageView> View)
+ : Regions(Regions), View(std::move(View)), Line(Line) {}
+
+ unsigned getLine() const { return Line; }
+
+ friend bool operator<(const BranchView &LHS, const BranchView &RHS) {
+ return LHS.Line < RHS.Line;
+ }
+};
+
/// A file manager that handles format-aware file creation.
class CoveragePrinter {
public:
@@ -140,6 +157,9 @@ class SourceCoverageView {
/// A container for all expansions (e.g macros) in the source on display.
std::vector<ExpansionView> ExpansionSubViews;
+ /// A container for all branches in the source on display.
+ std::vector<BranchView> BranchSubViews;
+
/// A container for all instantiations (e.g template functions) in the source
/// on display.
std::vector<InstantiationView> InstantiationSubViews;
@@ -209,6 +229,10 @@ protected:
virtual void renderInstantiationView(raw_ostream &OS, InstantiationView &ISV,
unsigned ViewDepth) = 0;
+ /// Render a branch view and any nested views.
+ virtual void renderBranchView(raw_ostream &OS, BranchView &BRV,
+ unsigned ViewDepth) = 0;
+
/// Render \p Title, a project title if one is available, and the
/// created time.
virtual void renderTitle(raw_ostream &OS, StringRef CellText) = 0;
@@ -255,6 +279,10 @@ public:
void addInstantiation(StringRef FunctionName, unsigned Line,
std::unique_ptr<SourceCoverageView> View);
+ /// Add a branch subview to this view.
+ void addBranch(unsigned Line, ArrayRef<CountedRegion> Regions,
+ std::unique_ptr<SourceCoverageView> View);
+
/// Print the code coverage information for a specific portion of a
/// source file to the output stream.
void print(raw_ostream &OS, bool WholeFile, bool ShowSourceName,
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
index 9d10def0a211..7ab1d6608ccf 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
@@ -248,7 +248,7 @@ const char *ReportTitleTag = "h2";
const char *CreatedTimeTag = "h4";
std::string getPathToStyle(StringRef ViewPath) {
- std::string PathToStyle = "";
+ std::string PathToStyle;
std::string PathSep = std::string(sys::path::get_separator());
unsigned NumSeps = ViewPath.count(PathSep);
for (unsigned I = 0, E = NumSeps; I < E; ++I)
@@ -313,6 +313,8 @@ static void emitColumnLabelsForIndex(raw_ostream &OS,
Columns.emplace_back(tag("td", "Line Coverage", "column-entry-bold"));
if (Opts.ShowRegionSummary)
Columns.emplace_back(tag("td", "Region Coverage", "column-entry-bold"));
+ if (Opts.ShowBranchSummary)
+ Columns.emplace_back(tag("td", "Branch Coverage", "column-entry-bold"));
OS << tag("tr", join(Columns.begin(), Columns.end(), ""));
}
@@ -378,6 +380,10 @@ void CoveragePrinterHTML::emitFileSummary(raw_ostream &OS, StringRef SF,
AddCoverageTripleToColumn(FCS.RegionCoverage.getCovered(),
FCS.RegionCoverage.getNumRegions(),
FCS.RegionCoverage.getPercentCovered());
+ if (Opts.ShowBranchSummary)
+ AddCoverageTripleToColumn(FCS.BranchCoverage.getCovered(),
+ FCS.BranchCoverage.getNumBranches(),
+ FCS.BranchCoverage.getPercentCovered());
if (IsTotals)
OS << tag("tr", join(Columns.begin(), Columns.end(), ""), "light-row-bold");
@@ -611,7 +617,7 @@ void SourceCoverageViewHTML::renderLine(raw_ostream &OS, LineRef L,
void SourceCoverageViewHTML::renderLineCoverageColumn(
raw_ostream &OS, const LineCoverageStats &Line) {
- std::string Count = "";
+ std::string Count;
if (Line.isMapped())
Count = tag("pre", formatCount(Line.getExecutionCount()));
std::string CoverageClass =
@@ -650,6 +656,72 @@ void SourceCoverageViewHTML::renderExpansionView(raw_ostream &OS,
OS << EndExpansionDiv;
}
+void SourceCoverageViewHTML::renderBranchView(raw_ostream &OS, BranchView &BRV,
+ unsigned ViewDepth) {
+ // Render the child subview.
+ if (getOptions().Debug)
+ errs() << "Branch at line " << BRV.getLine() << '\n';
+
+ OS << BeginExpansionDiv;
+ OS << BeginPre;
+ for (const auto &R : BRV.Regions) {
+ // Calculate TruePercent and False Percent.
+ double TruePercent = 0.0;
+ double FalsePercent = 0.0;
+ unsigned Total = R.ExecutionCount + R.FalseExecutionCount;
+
+ if (!getOptions().ShowBranchCounts && Total != 0) {
+ TruePercent = ((double)(R.ExecutionCount) / (double)Total) * 100.0;
+ FalsePercent = ((double)(R.FalseExecutionCount) / (double)Total) * 100.0;
+ }
+
+ // Display Line + Column.
+ std::string LineNoStr = utostr(uint64_t(R.LineStart));
+ std::string ColNoStr = utostr(uint64_t(R.ColumnStart));
+ std::string TargetName = "L" + LineNoStr;
+
+ OS << " Branch (";
+ OS << tag("span",
+ a("#" + TargetName, tag("span", LineNoStr + ":" + ColNoStr),
+ TargetName),
+ "line-number") +
+ "): [";
+
+ if (R.Folded) {
+ OS << "Folded - Ignored]\n";
+ continue;
+ }
+
+ // Display TrueCount or TruePercent.
+ std::string TrueColor = R.ExecutionCount ? "None" : "red";
+ std::string TrueCovClass =
+ (R.ExecutionCount > 0) ? "covered-line" : "uncovered-line";
+
+ OS << tag("span", "True", TrueColor);
+ OS << ": ";
+ if (getOptions().ShowBranchCounts)
+ OS << tag("span", formatCount(R.ExecutionCount), TrueCovClass) << ", ";
+ else
+ OS << format("%0.2f", TruePercent) << "%, ";
+
+ // Display FalseCount or FalsePercent.
+ std::string FalseColor = R.FalseExecutionCount ? "None" : "red";
+ std::string FalseCovClass =
+ (R.FalseExecutionCount > 0) ? "covered-line" : "uncovered-line";
+
+ OS << tag("span", "False", FalseColor);
+ OS << ": ";
+ if (getOptions().ShowBranchCounts)
+ OS << tag("span", formatCount(R.FalseExecutionCount), FalseCovClass);
+ else
+ OS << format("%0.2f", FalsePercent) << "%";
+
+ OS << "]\n";
+ }
+ OS << EndPre;
+ OS << EndExpansionDiv;
+}
+
void SourceCoverageViewHTML::renderInstantiationView(raw_ostream &OS,
InstantiationView &ISV,
unsigned ViewDepth) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.h b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.h
index 9834040008a6..7d94675f4b0b 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.h
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.h
@@ -68,6 +68,9 @@ class SourceCoverageViewHTML : public SourceCoverageView {
void renderExpansionView(raw_ostream &OS, ExpansionView &ESV,
unsigned ViewDepth) override;
+ void renderBranchView(raw_ostream &OS, BranchView &BRV,
+ unsigned ViewDepth) override;
+
void renderInstantiationView(raw_ostream &OS, InstantiationView &ISV,
unsigned ViewDepth) override;
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.cpp
index fcabee2ee69d..948414a4f995 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.cpp
@@ -10,11 +10,12 @@
///
//===----------------------------------------------------------------------===//
-#include "CoverageReport.h"
#include "SourceCoverageViewText.h"
+#include "CoverageReport.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Format.h"
using namespace llvm;
@@ -222,6 +223,53 @@ void SourceCoverageViewText::renderExpansionView(raw_ostream &OS,
/*ShowTitle=*/false, ViewDepth + 1);
}
+void SourceCoverageViewText::renderBranchView(raw_ostream &OS, BranchView &BRV,
+ unsigned ViewDepth) {
+ // Render the child subview.
+ if (getOptions().Debug)
+ errs() << "Branch at line " << BRV.getLine() << '\n';
+
+ for (const auto &R : BRV.Regions) {
+ double TruePercent = 0.0;
+ double FalsePercent = 0.0;
+ unsigned Total = R.ExecutionCount + R.FalseExecutionCount;
+
+ if (!getOptions().ShowBranchCounts && Total != 0) {
+ TruePercent = ((double)(R.ExecutionCount) / (double)Total) * 100.0;
+ FalsePercent = ((double)(R.FalseExecutionCount) / (double)Total) * 100.0;
+ }
+
+ renderLinePrefix(OS, ViewDepth);
+ OS << " Branch (" << R.LineStart << ":" << R.ColumnStart << "): [";
+
+ if (R.Folded) {
+ OS << "Folded - Ignored]\n";
+ continue;
+ }
+
+ colored_ostream(OS, raw_ostream::RED,
+ getOptions().Colors && !R.ExecutionCount,
+ /*Bold=*/false, /*BG=*/true)
+ << "True";
+
+ if (getOptions().ShowBranchCounts)
+ OS << ": " << formatCount(R.ExecutionCount) << ", ";
+ else
+ OS << ": " << format("%0.2f", TruePercent) << "%, ";
+
+ colored_ostream(OS, raw_ostream::RED,
+ getOptions().Colors && !R.FalseExecutionCount,
+ /*Bold=*/false, /*BG=*/true)
+ << "False";
+
+ if (getOptions().ShowBranchCounts)
+ OS << ": " << formatCount(R.FalseExecutionCount);
+ else
+ OS << ": " << format("%0.2f", FalsePercent) << "%";
+ OS << "]\n";
+ }
+}
+
void SourceCoverageViewText::renderInstantiationView(raw_ostream &OS,
InstantiationView &ISV,
unsigned ViewDepth) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.h b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.h
index c8c4632c3b9d..b2be06039f95 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.h
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.h
@@ -59,6 +59,9 @@ class SourceCoverageViewText : public SourceCoverageView {
void renderExpansionView(raw_ostream &OS, ExpansionView &ESV,
unsigned ViewDepth) override;
+ void renderBranchView(raw_ostream &OS, BranchView &BRV,
+ unsigned ViewDepth) override;
+
void renderInstantiationView(raw_ostream &OS, InstantiationView &ISV,
unsigned ViewDepth) override;
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/gcov.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/gcov.cpp
index 7a1dbbfe9338..d42e7cd3b551 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/gcov.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/gcov.cpp
@@ -43,8 +43,10 @@ static void reportCoverage(StringRef SourceFile, StringRef ObjectDir,
: InputGCDA;
GCOVFile GF;
+ // Open .gcda and .gcda without requiring a NUL terminator. The concurrent
+ // modification may nullify the NUL terminator condition.
ErrorOr<std::unique_ptr<MemoryBuffer>> GCNO_Buff =
- MemoryBuffer::getFileOrSTDIN(GCNO);
+ MemoryBuffer::getFileOrSTDIN(GCNO, -1, /*RequiresNullTerminator=*/false);
if (std::error_code EC = GCNO_Buff.getError()) {
errs() << GCNO << ": " << EC.message() << "\n";
return;
@@ -56,7 +58,7 @@ static void reportCoverage(StringRef SourceFile, StringRef ObjectDir,
}
ErrorOr<std::unique_ptr<MemoryBuffer>> GCDA_Buff =
- MemoryBuffer::getFileOrSTDIN(GCDA);
+ MemoryBuffer::getFileOrSTDIN(GCDA, -1, /*RequiresNullTerminator=*/false);
if (std::error_code EC = GCDA_Buff.getError()) {
if (EC != errc::no_such_file_or_directory) {
errs() << GCDA << ": " << EC.message() << "\n";
@@ -75,9 +77,7 @@ static void reportCoverage(StringRef SourceFile, StringRef ObjectDir,
if (DumpGCOV)
GF.print(errs());
- FileInfo FI(Options);
- GF.collectLineCounts(FI);
- FI.print(llvm::outs(), SourceFile, GCNO, GCDA, GF);
+ gcovOneInput(Options, SourceFile, GCNO, GCDA, GF);
}
int gcovMain(int argc, const char *argv[]) {
@@ -115,6 +115,11 @@ int gcovMain(int argc, const char *argv[]) {
cl::Grouping, cl::NotHidden,
cl::aliasopt(Intermediate));
+ cl::opt<bool> Demangle("demangled-names", cl::init(false),
+ cl::desc("Demangle function names"));
+ cl::alias DemangleA("m", cl::desc("Alias for --demangled-names"),
+ cl::Grouping, cl::NotHidden, cl::aliasopt(Demangle));
+
cl::opt<bool> NoOutput("n", cl::Grouping, cl::init(false),
cl::desc("Do not output any .gcov files"));
cl::alias NoOutputA("no-output", cl::aliasopt(NoOutput));
@@ -129,6 +134,14 @@ int gcovMain(int argc, const char *argv[]) {
cl::desc("Preserve path components"));
cl::alias PreservePathsA("preserve-paths", cl::aliasopt(PreservePaths));
+ cl::opt<bool> RelativeOnly(
+ "r", cl::Grouping,
+ cl::desc("Only dump files with relative paths or absolute paths with the "
+ "prefix specified by -s"));
+ cl::alias RelativeOnlyA("relative-only", cl::aliasopt(RelativeOnly));
+ cl::opt<std::string> SourcePrefix("s", cl::desc("Source prefix to elide"));
+ cl::alias SourcePrefixA("source-prefix", cl::aliasopt(SourcePrefix));
+
cl::opt<bool> UseStdout("t", cl::Grouping, cl::init(false),
cl::desc("Print to stdout"));
cl::alias UseStdoutA("stdout", cl::aliasopt(UseStdout));
@@ -155,7 +168,8 @@ int gcovMain(int argc, const char *argv[]) {
GCOV::Options Options(AllBlocks, BranchProb, BranchCount, FuncSummary,
PreservePaths, UncondBranch, Intermediate, LongNames,
- NoOutput, UseStdout, HashFilenames);
+ Demangle, NoOutput, RelativeOnly, UseStdout,
+ HashFilenames, SourcePrefix);
for (const auto &SourceFile : SourceFiles)
reportCoverage(SourceFile, ObjectDir, InputGCNO, InputGCDA, DumpGCOV,
diff --git a/contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp b/contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp
index 2cf1afbc6af5..64c0dc61e806 100644
--- a/contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp
@@ -67,7 +67,7 @@ public:
unsigned NewSize = Storage.size() - 1;
if (NewSize) {
// Move the slot at the end to the beginning.
- if (is_trivially_copyable<T>::value)
+ if (std::is_trivially_copyable<T>::value)
Storage[0] = Storage[NewSize];
else
std::swap(Storage[0], Storage[NewSize]);
diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp
index 18b4c40c4d75..82da06eab1d6 100644
--- a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -71,6 +71,8 @@ struct PerFunctionStats {
/// Holds accumulated global statistics about DIEs.
struct GlobalStats {
/// Total number of PC range bytes covered by DW_AT_locations.
+ unsigned TotalBytesCovered = 0;
+ /// Total number of parent DIE PC range bytes covered by DW_AT_Locations.
unsigned ScopeBytesCovered = 0;
/// Total number of PC range bytes in each variable's enclosing scope.
unsigned ScopeBytes = 0;
@@ -143,20 +145,20 @@ struct LocationStats {
} // namespace
/// Collect debug location statistics for one DIE.
-static void collectLocStats(uint64_t BytesCovered, uint64_t BytesInScope,
+static void collectLocStats(uint64_t ScopeBytesCovered, uint64_t BytesInScope,
std::vector<unsigned> &VarParamLocStats,
std::vector<unsigned> &ParamLocStats,
std::vector<unsigned> &LocalVarLocStats,
bool IsParam, bool IsLocalVar) {
- auto getCoverageBucket = [BytesCovered, BytesInScope]() -> unsigned {
+ auto getCoverageBucket = [ScopeBytesCovered, BytesInScope]() -> unsigned {
// No debug location at all for the variable.
- if (BytesCovered == 0)
+ if (ScopeBytesCovered == 0)
return 0;
// Fully covered variable within its scope.
- if (BytesCovered >= BytesInScope)
+ if (ScopeBytesCovered >= BytesInScope)
return NumOfCoverageCategories - 1;
// Get covered range (e.g. 20%-29%).
- unsigned LocBucket = 100 * (double)BytesCovered / BytesInScope;
+ unsigned LocBucket = 100 * (double)ScopeBytesCovered / BytesInScope;
LocBucket /= 10;
return LocBucket + 1;
};
@@ -198,6 +200,15 @@ static std::string constructDieID(DWARFDie Die,
return ID.str();
}
+/// Return the number of bytes in the overlap of ranges A and B.
+static uint64_t calculateOverlap(DWARFAddressRange A, DWARFAddressRange B) {
+ uint64_t Lower = std::max(A.LowPC, B.LowPC);
+ uint64_t Upper = std::min(A.HighPC, B.HighPC);
+ if (Lower >= Upper)
+ return 0;
+ return Upper - Lower;
+}
+
/// Collect debug info quality metrics for one DIE.
static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
std::string VarPrefix, uint64_t BytesInScope,
@@ -208,7 +219,8 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
bool HasLoc = false;
bool HasSrcLoc = false;
bool HasType = false;
- uint64_t BytesCovered = 0;
+ uint64_t TotalBytesCovered = 0;
+ uint64_t ScopeBytesCovered = 0;
uint64_t BytesEntryValuesCovered = 0;
auto &FnStats = FnStatMap[FnPrefix];
bool IsParam = Die.getTag() == dwarf::DW_TAG_formal_parameter;
@@ -261,7 +273,8 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
if (Die.find(dwarf::DW_AT_const_value)) {
// This catches constant members *and* variables.
HasLoc = true;
- BytesCovered = BytesInScope;
+ ScopeBytesCovered = BytesInScope;
+ TotalBytesCovered = BytesInScope;
} else {
// Handle variables and function arguments.
Expected<std::vector<DWARFLocationExpression>> Loc =
@@ -275,13 +288,27 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
*Loc, [](const DWARFLocationExpression &L) { return !L.Range; });
if (Default != Loc->end()) {
// Assume the entire range is covered by a single location.
- BytesCovered = BytesInScope;
+ ScopeBytesCovered = BytesInScope;
+ TotalBytesCovered = BytesInScope;
} else {
+ // Caller checks this Expected result already, it cannot fail.
+ auto ScopeRanges = cantFail(Die.getParent().getAddressRanges());
for (auto Entry : *Loc) {
- uint64_t BytesEntryCovered = Entry.Range->HighPC - Entry.Range->LowPC;
- BytesCovered += BytesEntryCovered;
+ TotalBytesCovered += Entry.Range->HighPC - Entry.Range->LowPC;
+ uint64_t ScopeBytesCoveredByEntry = 0;
+ // Calculate how many bytes of the parent scope this entry covers.
+ // FIXME: In section 2.6.2 of the DWARFv5 spec it says that "The
+ // address ranges defined by the bounded location descriptions of a
+ // location list may overlap". So in theory a variable can have
+ // multiple simultaneous locations, which would make this calculation
+ // misleading because we will count the overlapped areas
+ // twice. However, clang does not currently emit DWARF like this.
+ for (DWARFAddressRange R : ScopeRanges) {
+ ScopeBytesCoveredByEntry += calculateOverlap(*Entry.Range, R);
+ }
+ ScopeBytesCovered += ScopeBytesCoveredByEntry;
if (IsEntryValue(Entry.Expr))
- BytesEntryValuesCovered += BytesEntryCovered;
+ BytesEntryValuesCovered += ScopeBytesCoveredByEntry;
}
}
}
@@ -295,11 +322,11 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
else if (IsLocalVar)
LocStats.NumVar++;
- collectLocStats(BytesCovered, BytesInScope, LocStats.VarParamLocStats,
+ collectLocStats(ScopeBytesCovered, BytesInScope, LocStats.VarParamLocStats,
LocStats.ParamLocStats, LocStats.LocalVarLocStats, IsParam,
IsLocalVar);
// Non debug entry values coverage statistics.
- collectLocStats(BytesCovered - BytesEntryValuesCovered, BytesInScope,
+ collectLocStats(ScopeBytesCovered - BytesEntryValuesCovered, BytesInScope,
LocStats.VarParamNonEntryValLocStats,
LocStats.ParamNonEntryValLocStats,
LocStats.LocalVarNonEntryValLocStats, IsParam, IsLocalVar);
@@ -313,19 +340,17 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
std::string VarID = constructDieID(Die, VarPrefix);
FnStats.VarsInFunction.insert(VarID);
+ GlobalStats.TotalBytesCovered += TotalBytesCovered;
if (BytesInScope) {
- // Turns out we have a lot of ranges that extend past the lexical scope.
- GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered);
+ GlobalStats.ScopeBytesCovered += ScopeBytesCovered;
GlobalStats.ScopeBytes += BytesInScope;
GlobalStats.ScopeEntryValueBytesCovered += BytesEntryValuesCovered;
if (IsParam) {
- GlobalStats.ParamScopeBytesCovered +=
- std::min(BytesInScope, BytesCovered);
+ GlobalStats.ParamScopeBytesCovered += ScopeBytesCovered;
GlobalStats.ParamScopeBytes += BytesInScope;
GlobalStats.ParamScopeEntryValueBytesCovered += BytesEntryValuesCovered;
} else if (IsLocalVar) {
- GlobalStats.LocalVarScopeBytesCovered +=
- std::min(BytesInScope, BytesCovered);
+ GlobalStats.LocalVarScopeBytesCovered += ScopeBytesCovered;
GlobalStats.LocalVarScopeBytes += BytesInScope;
GlobalStats.LocalVarScopeEntryValueBytesCovered +=
BytesEntryValuesCovered;
@@ -466,49 +491,54 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
}
}
-/// Print machine-readable output.
-/// The machine-readable format is single-line JSON output.
+/// Print human-readable output.
/// \{
-static void printDatum(raw_ostream &OS, const char *Key, json::Value Value) {
- OS << ",\"" << Key << "\":" << Value;
+static void printDatum(json::OStream &J, const char *Key, json::Value Value) {
+ J.attribute(Key, Value);
LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
}
-static void printLocationStats(raw_ostream &OS, const char *Key,
+static void printLocationStats(json::OStream &J, const char *Key,
std::vector<unsigned> &LocationStats) {
- OS << ",\"" << Key << " with 0% of parent scope covered by DW_AT_location\":"
- << LocationStats[0];
+ J.attribute(
+ (Twine(Key) + " with 0% of parent scope covered by DW_AT_location").str(),
+ LocationStats[0]);
LLVM_DEBUG(
llvm::dbgs() << Key
<< " with 0% of parent scope covered by DW_AT_location: \\"
<< LocationStats[0] << '\n');
- OS << ",\"" << Key
- << " with (0%,10%) of parent scope covered by DW_AT_location\":"
- << LocationStats[1];
+ J.attribute(
+ (Twine(Key) + " with (0%,10%) of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[1]);
LLVM_DEBUG(llvm::dbgs()
<< Key
<< " with (0%,10%) of parent scope covered by DW_AT_location: "
<< LocationStats[1] << '\n');
for (unsigned i = 2; i < NumOfCoverageCategories - 1; ++i) {
- OS << ",\"" << Key << " with [" << (i - 1) * 10 << "%," << i * 10
- << "%) of parent scope covered by DW_AT_location\":" << LocationStats[i];
+ J.attribute((Twine(Key) + " with [" + Twine((i - 1) * 10) + "%," +
+ Twine(i * 10) + "%) of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[i]);
LLVM_DEBUG(llvm::dbgs()
<< Key << " with [" << (i - 1) * 10 << "%," << i * 10
<< "%) of parent scope covered by DW_AT_location: "
<< LocationStats[i]);
}
- OS << ",\"" << Key
- << " with 100% of parent scope covered by DW_AT_location\":"
- << LocationStats[NumOfCoverageCategories - 1];
+ J.attribute(
+ (Twine(Key) + " with 100% of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[NumOfCoverageCategories - 1]);
LLVM_DEBUG(
llvm::dbgs() << Key
<< " with 100% of parent scope covered by DW_AT_location: "
<< LocationStats[NumOfCoverageCategories - 1]);
}
-static void printSectionSizes(raw_ostream &OS, const SectionSizes &Sizes) {
+static void printSectionSizes(json::OStream &J, const SectionSizes &Sizes) {
for (const auto &DebugSec : Sizes.DebugSectionSizes)
- OS << ",\"#bytes in " << DebugSec.getKey() << "\":" << DebugSec.getValue();
+ J.attribute((Twine("#bytes in ") + DebugSec.getKey()).str(),
+ int64_t(DebugSec.getValue()));
}
/// \}
@@ -540,7 +570,7 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
/// The version number should be increased every time the algorithm is changed
/// (including bug fixes). New metrics may be added without increasing the
/// version.
- unsigned Version = 5;
+ unsigned Version = 6;
unsigned VarParamTotal = 0;
unsigned VarParamUnique = 0;
unsigned VarParamWithLoc = 0;
@@ -587,101 +617,105 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
// Print summary.
OS.SetBufferSize(1024);
- OS << "{\"version\":" << Version;
+ json::OStream J(OS, 2);
+ J.objectBegin();
+ J.attribute("version", Version);
LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
llvm::dbgs() << "---------------------------------\n");
- printDatum(OS, "file", Filename.str());
- printDatum(OS, "format", FormatName);
+ printDatum(J, "file", Filename.str());
+ printDatum(J, "format", FormatName);
- printDatum(OS, "#functions", NumFunctions);
- printDatum(OS, "#functions with location", NumFuncsWithSrcLoc);
- printDatum(OS, "#inlined functions", NumInlinedFunctions);
- printDatum(OS, "#inlined functions with abstract origins",
- NumAbstractOrigins);
+ printDatum(J, "#functions", NumFunctions);
+ printDatum(J, "#functions with location", NumFuncsWithSrcLoc);
+ printDatum(J, "#inlined functions", NumInlinedFunctions);
+ printDatum(J, "#inlined functions with abstract origins", NumAbstractOrigins);
// This includes local variables and formal parameters.
- printDatum(OS, "#unique source variables", VarParamUnique);
- printDatum(OS, "#source variables", VarParamTotal);
- printDatum(OS, "#source variables with location", VarParamWithLoc);
+ printDatum(J, "#unique source variables", VarParamUnique);
+ printDatum(J, "#source variables", VarParamTotal);
+ printDatum(J, "#source variables with location", VarParamWithLoc);
- printDatum(OS, "#call site entries", GlobalStats.CallSiteEntries);
- printDatum(OS, "#call site DIEs", GlobalStats.CallSiteDIEs);
- printDatum(OS, "#call site parameter DIEs", GlobalStats.CallSiteParamDIEs);
+ printDatum(J, "#call site entries", GlobalStats.CallSiteEntries);
+ printDatum(J, "#call site DIEs", GlobalStats.CallSiteDIEs);
+ printDatum(J, "#call site parameter DIEs", GlobalStats.CallSiteParamDIEs);
- printDatum(OS, "sum_all_variables(#bytes in parent scope)",
+ printDatum(J, "sum_all_variables(#bytes in parent scope)",
GlobalStats.ScopeBytes);
- printDatum(OS,
+ printDatum(J,
+ "sum_all_variables(#bytes in any scope covered by DW_AT_location)",
+ GlobalStats.TotalBytesCovered);
+ printDatum(J,
"sum_all_variables(#bytes in parent scope covered by "
"DW_AT_location)",
GlobalStats.ScopeBytesCovered);
- printDatum(OS,
+ printDatum(J,
"sum_all_variables(#bytes in parent scope covered by "
"DW_OP_entry_value)",
GlobalStats.ScopeEntryValueBytesCovered);
- printDatum(OS, "sum_all_params(#bytes in parent scope)",
+ printDatum(J, "sum_all_params(#bytes in parent scope)",
GlobalStats.ParamScopeBytes);
- printDatum(
- OS,
- "sum_all_params(#bytes in parent scope covered by DW_AT_location)",
- GlobalStats.ParamScopeBytesCovered);
- printDatum(OS,
+ printDatum(J,
+ "sum_all_params(#bytes in parent scope covered by DW_AT_location)",
+ GlobalStats.ParamScopeBytesCovered);
+ printDatum(J,
"sum_all_params(#bytes in parent scope covered by "
"DW_OP_entry_value)",
GlobalStats.ParamScopeEntryValueBytesCovered);
- printDatum(OS, "sum_all_local_vars(#bytes in parent scope)",
+ printDatum(J, "sum_all_local_vars(#bytes in parent scope)",
GlobalStats.LocalVarScopeBytes);
- printDatum(OS,
+ printDatum(J,
"sum_all_local_vars(#bytes in parent scope covered by "
"DW_AT_location)",
GlobalStats.LocalVarScopeBytesCovered);
- printDatum(OS,
+ printDatum(J,
"sum_all_local_vars(#bytes in parent scope covered by "
"DW_OP_entry_value)",
GlobalStats.LocalVarScopeEntryValueBytesCovered);
- printDatum(OS, "#bytes witin functions", GlobalStats.FunctionSize);
- printDatum(OS, "#bytes witin inlined functions",
+ printDatum(J, "#bytes within functions", GlobalStats.FunctionSize);
+ printDatum(J, "#bytes within inlined functions",
GlobalStats.InlineFunctionSize);
// Print the summary for formal parameters.
- printDatum(OS, "#params", ParamTotal);
- printDatum(OS, "#params with source location", ParamWithSrcLoc);
- printDatum(OS, "#params with type", ParamWithType);
- printDatum(OS, "#params with binary location", ParamWithLoc);
+ printDatum(J, "#params", ParamTotal);
+ printDatum(J, "#params with source location", ParamWithSrcLoc);
+ printDatum(J, "#params with type", ParamWithType);
+ printDatum(J, "#params with binary location", ParamWithLoc);
// Print the summary for local variables.
- printDatum(OS, "#local vars", LocalVarTotal);
- printDatum(OS, "#local vars with source location", LocalVarWithSrcLoc);
- printDatum(OS, "#local vars with type", LocalVarWithType);
- printDatum(OS, "#local vars with binary location", LocalVarWithLoc);
+ printDatum(J, "#local vars", LocalVarTotal);
+ printDatum(J, "#local vars with source location", LocalVarWithSrcLoc);
+ printDatum(J, "#local vars with type", LocalVarWithType);
+ printDatum(J, "#local vars with binary location", LocalVarWithLoc);
// Print the debug section sizes.
- printSectionSizes(OS, Sizes);
+ printSectionSizes(J, Sizes);
// Print the location statistics for variables (includes local variables
// and formal parameters).
- printDatum(OS, "#variables processed by location statistics",
+ printDatum(J, "#variables processed by location statistics",
LocStats.NumVarParam);
- printLocationStats(OS, "#variables", LocStats.VarParamLocStats);
- printLocationStats(OS, "#variables - entry values",
+ printLocationStats(J, "#variables", LocStats.VarParamLocStats);
+ printLocationStats(J, "#variables - entry values",
LocStats.VarParamNonEntryValLocStats);
// Print the location statistics for formal parameters.
- printDatum(OS, "#params processed by location statistics", LocStats.NumParam);
- printLocationStats(OS, "#params", LocStats.ParamLocStats);
- printLocationStats(OS, "#params - entry values",
+ printDatum(J, "#params processed by location statistics", LocStats.NumParam);
+ printLocationStats(J, "#params", LocStats.ParamLocStats);
+ printLocationStats(J, "#params - entry values",
LocStats.ParamNonEntryValLocStats);
// Print the location statistics for local variables.
- printDatum(OS, "#local vars processed by location statistics",
+ printDatum(J, "#local vars processed by location statistics",
LocStats.NumVar);
- printLocationStats(OS, "#local vars", LocStats.LocalVarLocStats);
- printLocationStats(OS, "#local vars - entry values",
+ printLocationStats(J, "#local vars", LocStats.LocalVarLocStats);
+ printLocationStats(J, "#local vars - entry values",
LocStats.LocalVarNonEntryValLocStats);
- OS << "}\n";
+ J.objectEnd();
+ OS << '\n';
LLVM_DEBUG(
llvm::dbgs() << "Total Availability: "
<< (int)std::round((VarParamWithLoc * 100.0) / VarParamTotal)
diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index d8fa4f9953dc..4322f125a84d 100644
--- a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -624,7 +624,7 @@ int main(int argc, char **argv) {
if (Diff && Verbose) {
WithColor::error() << "incompatible arguments: specifying both -diff and "
"-verbose is currently not supported";
- return 0;
+ return 1;
}
std::error_code EC;
@@ -670,7 +670,7 @@ int main(int argc, char **argv) {
std::vector<std::string> Objects;
for (const auto &F : InputFilenames) {
auto Objs = expandBundle(F);
- Objects.insert(Objects.end(), Objs.begin(), Objs.end());
+ llvm::append_range(Objects, Objs);
}
bool Success = true;
diff --git a/contrib/llvm-project/llvm/tools/llvm-dwp/llvm-dwp.cpp b/contrib/llvm-project/llvm/tools/llvm-dwp/llvm-dwp.cpp
index d5ebe5ab0a57..d495bd3d4cab 100644
--- a/contrib/llvm-project/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -377,7 +377,8 @@ writeIndex(MCStreamer &Out, MCSection *Section,
&DWARFUnitIndex::Entry::SectionContribution::Length);
}
-std::string buildDWODescription(StringRef Name, StringRef DWPName, StringRef DWOName) {
+static std::string buildDWODescription(StringRef Name, StringRef DWPName,
+ StringRef DWOName) {
std::string Text = "\'";
Text += Name;
Text += '\'';
@@ -525,8 +526,8 @@ getDWOFilenames(StringRef ExecFilename) {
std::string DWOCompDir =
dwarf::toString(Die.find(dwarf::DW_AT_comp_dir), "");
if (!DWOCompDir.empty()) {
- SmallString<16> DWOPath;
- sys::path::append(DWOPath, DWOCompDir, DWOName);
+ SmallString<16> DWOPath(std::move(DWOName));
+ sys::fs::make_absolute(DWOCompDir, DWOPath);
DWOPaths.emplace_back(DWOPath.data(), DWOPath.size());
} else {
DWOPaths.push_back(std::move(DWOName));
@@ -695,6 +696,14 @@ static int error(const Twine &Error, const Twine &Context) {
return 1;
}
+static Expected<Triple> readTargetTriple(StringRef FileName) {
+ auto ErrOrObj = object::ObjectFile::createObjectFile(FileName);
+ if (!ErrOrObj)
+ return ErrOrObj.takeError();
+
+ return ErrOrObj->getBinary()->makeTriple();
+}
+
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
@@ -705,17 +714,36 @@ int main(int argc, char **argv) {
llvm::InitializeAllTargets();
llvm::InitializeAllAsmPrinters();
+ std::vector<std::string> DWOFilenames = InputFiles;
+ for (const auto &ExecFilename : ExecFilenames) {
+ auto DWOs = getDWOFilenames(ExecFilename);
+ if (!DWOs) {
+ logAllUnhandledErrors(DWOs.takeError(), WithColor::error());
+ return 1;
+ }
+ DWOFilenames.insert(DWOFilenames.end(),
+ std::make_move_iterator(DWOs->begin()),
+ std::make_move_iterator(DWOs->end()));
+ }
+
+ if (DWOFilenames.empty())
+ return 0;
+
std::string ErrorStr;
StringRef Context = "dwarf streamer init";
- Triple TheTriple("x86_64-linux-gnu");
+ auto ErrOrTriple = readTargetTriple(DWOFilenames.front());
+ if (!ErrOrTriple) {
+ logAllUnhandledErrors(ErrOrTriple.takeError(), WithColor::error());
+ return 1;
+ }
// Get the target.
const Target *TheTarget =
- TargetRegistry::lookupTarget("", TheTriple, ErrorStr);
+ TargetRegistry::lookupTarget("", *ErrOrTriple, ErrorStr);
if (!TheTarget)
return error(ErrorStr, Context);
- std::string TripleName = TheTriple.getTriple();
+ std::string TripleName = ErrOrTriple->getTriple();
// Create all the MC Objects.
std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
@@ -730,7 +758,7 @@ int main(int argc, char **argv) {
MCObjectFileInfo MOFI;
MCContext MC(MAI.get(), MRI.get(), &MOFI);
- MOFI.InitMCObjectFileInfo(TheTriple, /*PIC*/ false, MC);
+ MOFI.InitMCObjectFileInfo(*ErrOrTriple, /*PIC*/ false, MC);
std::unique_ptr<MCSubtargetInfo> MSTI(
TheTarget->createMCSubtargetInfo(TripleName, "", ""));
@@ -765,25 +793,13 @@ int main(int argc, char **argv) {
}
std::unique_ptr<MCStreamer> MS(TheTarget->createMCObjectStreamer(
- TheTriple, MC, std::unique_ptr<MCAsmBackend>(MAB),
+ *ErrOrTriple, MC, std::unique_ptr<MCAsmBackend>(MAB),
MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(MCE), *MSTI,
MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ false));
if (!MS)
return error("no object streamer for target " + TripleName, Context);
- std::vector<std::string> DWOFilenames = InputFiles;
- for (const auto &ExecFilename : ExecFilenames) {
- auto DWOs = getDWOFilenames(ExecFilename);
- if (!DWOs) {
- logAllUnhandledErrors(DWOs.takeError(), WithColor::error());
- return 1;
- }
- DWOFilenames.insert(DWOFilenames.end(),
- std::make_move_iterator(DWOs->begin()),
- std::make_move_iterator(DWOs->end()));
- }
-
if (auto Err = write(*MS, DWOFilenames)) {
logAllUnhandledErrors(std::move(Err), WithColor::error());
return 1;
diff --git a/contrib/llvm-project/llvm/tools/llvm-link/llvm-link.cpp b/contrib/llvm-project/llvm/tools/llvm-link/llvm-link.cpp
index 7141bd1ca7a1..eed49c438335 100644
--- a/contrib/llvm-project/llvm/tools/llvm-link/llvm-link.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-link/llvm-link.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/Archive.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/AutoUpgrade.h"
@@ -24,6 +24,7 @@
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
+#include "llvm/Object/Archive.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
@@ -115,17 +116,18 @@ static ExitOnError ExitOnErr;
// link path for the specified file to try to find it...
//
static std::unique_ptr<Module> loadFile(const char *argv0,
- const std::string &FN,
+ std::unique_ptr<MemoryBuffer> Buffer,
LLVMContext &Context,
bool MaterializeMetadata = true) {
SMDiagnostic Err;
if (Verbose)
- errs() << "Loading '" << FN << "'\n";
+ errs() << "Loading '" << Buffer->getBufferIdentifier() << "'\n";
std::unique_ptr<Module> Result;
if (DisableLazyLoad)
- Result = parseIRFile(FN, Err, Context);
+ Result = parseIR(*Buffer, Err, Context);
else
- Result = getLazyIRFileModule(FN, Err, Context, !MaterializeMetadata);
+ Result =
+ getLazyIRModule(std::move(Buffer), Err, Context, !MaterializeMetadata);
if (!Result) {
Err.print(argv0, errs());
@@ -141,20 +143,17 @@ static std::unique_ptr<Module> loadFile(const char *argv0,
}
static std::unique_ptr<Module> loadArFile(const char *Argv0,
- const std::string &ArchiveName,
- LLVMContext &Context, Linker &L,
- unsigned OrigFlags,
- unsigned ApplicableFlags) {
+ std::unique_ptr<MemoryBuffer> Buffer,
+ LLVMContext &Context) {
std::unique_ptr<Module> Result(new Module("ArchiveModule", Context));
+ StringRef ArchiveName = Buffer->getBufferIdentifier();
if (Verbose)
errs() << "Reading library archive file '" << ArchiveName
<< "' to memory\n";
- ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
- MemoryBuffer::getFile(ArchiveName, -1, false);
- ExitOnErr(errorCodeToError(Buf.getError()));
Error Err = Error::success();
- object::Archive Archive(Buf.get()->getMemBufferRef(), Err);
+ object::Archive Archive(*Buffer, Err);
ExitOnErr(std::move(Err));
+ Linker L(*Result);
for (const object::Archive::Child &C : Archive.children(Err)) {
Expected<StringRef> Ename = C.getName();
if (Error E = Ename.takeError()) {
@@ -163,7 +162,7 @@ static std::unique_ptr<Module> loadArFile(const char *Argv0,
<< " failed to read name of archive member"
<< ArchiveName << "'\n";
return nullptr;
- };
+ }
std::string ChildName = Ename.get().str();
if (Verbose)
errs() << "Parsing member '" << ChildName
@@ -188,7 +187,12 @@ static std::unique_ptr<Module> loadArFile(const char *Argv0,
return nullptr;
}
- std::unique_ptr<Module> M = parseIR(MemBuf.get(), ParseErr, Context);
+ std::unique_ptr<Module> M;
+ if (DisableLazyLoad)
+ M = parseIR(MemBuf.get(), ParseErr, Context);
+ else
+ M = getLazyIRModule(MemoryBuffer::getMemBuffer(MemBuf.get(), false),
+ ParseErr, Context);
if (!M.get()) {
errs() << Argv0 << ": ";
@@ -199,9 +203,8 @@ static std::unique_ptr<Module> loadArFile(const char *Argv0,
}
if (Verbose)
errs() << "Linking member '" << ChildName << "' of archive library.\n";
- if (L.linkModules(*Result, std::move(M), ApplicableFlags))
+ if (L.linkInModule(std::move(M)))
return nullptr;
- ApplicableFlags = OrigFlags;
} // end for each child
ExitOnErr(std::move(Err));
return Result;
@@ -287,7 +290,9 @@ static bool importFunctions(const char *argv0, Module &DestModule) {
auto ModuleLoader = [&DestModule](const char *argv0,
const std::string &Identifier) {
- return loadFile(argv0, Identifier, DestModule.getContext(), false);
+ std::unique_ptr<MemoryBuffer> Buffer =
+ ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Identifier)));
+ return loadFile(argv0, std::move(Buffer), DestModule.getContext(), false);
};
ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
@@ -349,10 +354,13 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L,
// Similar to some flags, internalization doesn't apply to the first file.
bool InternalizeLinkedSymbols = false;
for (const auto &File : Files) {
+ std::unique_ptr<MemoryBuffer> Buffer =
+ ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(File)));
+
std::unique_ptr<Module> M =
- (llvm::sys::path::extension(File) == ".a")
- ? loadArFile(argv0, File, Context, L, Flags, ApplicableFlags)
- : loadFile(argv0, File, Context);
+ identify_magic(Buffer->getBuffer()) == file_magic::archive
+ ? loadArFile(argv0, std::move(Buffer), Context)
+ : loadFile(argv0, std::move(Buffer), Context);
if (!M.get()) {
errs() << argv0 << ": ";
WithColor::error() << " loading file '" << File << "'\n";
@@ -454,7 +462,8 @@ int main(int argc, char **argv) {
errs() << "Here's the assembly:\n" << *Composite;
std::error_code EC;
- ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None);
+ ToolOutputFile Out(OutputFilename, EC,
+ OutputAssembly ? sys::fs::OF_Text : sys::fs::OF_None);
if (EC) {
WithColor::error() << EC.message() << '\n';
return 1;
diff --git a/contrib/llvm-project/llvm/tools/llvm-lto/llvm-lto.cpp b/contrib/llvm-project/llvm/tools/llvm-lto/llvm-lto.cpp
index 0bd9078f2d8c..1745b7449714 100644
--- a/contrib/llvm-project/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -78,17 +78,6 @@ static cl::opt<bool> DisableVerify(
"disable-verify", cl::init(false),
cl::desc("Do not run the verifier during the optimization pipeline"));
-static cl::opt<bool> DisableInline("disable-inlining", cl::init(false),
- cl::desc("Do not run the inliner pass"));
-
-static cl::opt<bool>
- DisableGVNLoadPRE("disable-gvn-loadpre", cl::init(false),
- cl::desc("Do not run the GVN load PRE pass"));
-
-static cl::opt<bool> DisableLTOVectorization(
- "disable-lto-vectorization", cl::init(false),
- cl::desc("Do not run loop or slp vectorization during LTO"));
-
static cl::opt<bool> EnableFreestanding(
"lto-freestanding", cl::init(false),
cl::desc("Enable Freestanding (disable builtins / TLI) during LTO"));
@@ -181,6 +170,10 @@ static cl::opt<std::string> ThinLTOGeneratedObjectsDir(
cl::desc("Save ThinLTO generated object files using filenames created in "
"the given directory."));
+static cl::opt<bool> SaveLinkedModuleFile(
+ "save-linked-module", cl::init(false),
+ cl::desc("Write linked LTO module to file before optimize"));
+
static cl::opt<bool>
SaveModuleFile("save-merged-module", cl::init(false),
cl::desc("Write merged LTO module to file before CodeGen"));
@@ -333,7 +326,7 @@ getLocalLTOModule(StringRef Path, std::unique_ptr<MemoryBuffer> &Buffer,
}
/// Print some statistics on the index for each input files.
-void printIndexStats() {
+static void printIndexStats() {
for (auto &Filename : InputFilenames) {
ExitOnError ExitOnErr("llvm-lto: error loading file '" + Filename + "': ");
std::unique_ptr<ModuleSummaryIndex> Index =
@@ -414,7 +407,7 @@ static void printMachOCPUOnly() {
LLVMContext Context;
Context.setDiagnosticHandler(std::make_unique<LLVMLTODiagnosticHandler>(),
true);
- TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple());
for (auto &Filename : InputFilenames) {
ErrorOr<std::unique_ptr<LTOModule>> ModuleOrErr =
LTOModule::createFromFile(Context, Filename, Options);
@@ -461,7 +454,7 @@ static void createCombinedModuleSummaryIndex() {
static void getThinLTOOldAndNewPrefix(std::string &OldPrefix,
std::string &NewPrefix) {
assert(ThinLTOPrefixReplace.empty() ||
- ThinLTOPrefixReplace.find(";") != StringRef::npos);
+ ThinLTOPrefixReplace.find(';') != StringRef::npos);
StringRef PrefixReplace = ThinLTOPrefixReplace;
std::pair<StringRef, StringRef> Split = PrefixReplace.split(";");
OldPrefix = Split.first.str();
@@ -903,7 +896,7 @@ int main(int argc, char **argv) {
InitializeAllAsmParsers();
// set up the TargetOptions for the machine
- TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple());
if (ListSymbolsOnly) {
listSymbols(Options);
@@ -960,6 +953,7 @@ int main(int argc, char **argv) {
true);
LTOCodeGenerator CodeGen(Context);
+ CodeGen.setDisableVerify(DisableVerify);
if (UseDiagnosticHandler)
CodeGen.setDiagnosticHandler(handleDiagnostics, nullptr);
@@ -1018,19 +1012,22 @@ int main(int argc, char **argv) {
CodeGen.setCpu(codegen::getMCPU().c_str());
CodeGen.setOptLevel(OptLevel - '0');
-
- auto MAttrs = codegen::getMAttrs();
- if (!MAttrs.empty()) {
- std::string attrs = join(MAttrs, ",");
- CodeGen.setAttr(attrs);
- }
+ CodeGen.setAttrs(codegen::getMAttrs());
if (auto FT = codegen::getExplicitFileType())
CodeGen.setFileType(FT.getValue());
if (!OutputFilename.empty()) {
- if (!CodeGen.optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
- DisableLTOVectorization)) {
+ if (SaveLinkedModuleFile) {
+ std::string ModuleFilename = OutputFilename;
+ ModuleFilename += ".linked.bc";
+ std::string ErrMsg;
+
+ if (!CodeGen.writeMergedModules(ModuleFilename))
+ error("writing linked module failed.");
+ }
+
+ if (!CodeGen.optimize()) {
// Diagnostic messages should have been printed by the handler.
error("error optimizing the code");
}
@@ -1071,8 +1068,7 @@ int main(int argc, char **argv) {
error(": -save-merged-module must be specified with -o");
const char *OutputName = nullptr;
- if (!CodeGen.compile_to_file(&OutputName, DisableVerify, DisableInline,
- DisableGVNLoadPRE, DisableLTOVectorization))
+ if (!CodeGen.compile_to_file(&OutputName))
error("error compiling the code");
// Diagnostic messages should have been printed by the handler.
diff --git a/contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp b/contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 9dd1f13bd3c3..ca4278fafb89 100644
--- a/contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -17,10 +17,12 @@
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/LTO/Caching.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Passes/PassPlugin.h"
+#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
@@ -103,6 +105,14 @@ static cl::opt<bool> RemarksWithHotness(
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
+cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser>
+ RemarksHotnessThreshold(
+ "pass-remarks-hotness-threshold",
+ cl::desc("Minimum profile count required for an "
+ "optimization remark to be output."
+ " Use 'auto' to apply the threshold from profile summary."),
+ cl::value_desc("uint or 'auto'"), cl::init(0), cl::Hidden);
+
static cl::opt<std::string>
RemarksFilename("pass-remarks-output",
cl::desc("Output filename for pass remarks"),
@@ -135,7 +145,7 @@ static cl::opt<bool>
static cl::opt<bool>
UseNewPM("use-new-pm",
cl::desc("Run LTO passes using the new pass manager"),
- cl::init(false), cl::Hidden);
+ cl::init(LLVM_ENABLE_NEW_PASS_MANAGER), cl::Hidden);
static cl::opt<bool>
DebugPassManager("debug-pass-manager", cl::init(false), cl::Hidden,
@@ -148,6 +158,11 @@ static cl::list<std::string>
PassPlugins("load-pass-plugin",
cl::desc("Load passes from plugin library"));
+static cl::opt<bool> EnableFreestanding(
+ "lto-freestanding",
+ cl::desc("Enable Freestanding (disable builtins / TLI) during LTO"),
+ cl::init(false), cl::Hidden);
+
static void check(Error E, std::string Msg) {
if (!E)
return;
@@ -230,7 +245,7 @@ static int run(int argc, char **argv) {
};
Conf.CPU = codegen::getMCPU();
- Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags();
+ Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple());
Conf.MAttrs = codegen::getMAttrs();
if (auto RM = codegen::getExplicitRelocModel())
Conf.RelocModel = RM.getValue();
@@ -246,6 +261,7 @@ static int run(int argc, char **argv) {
Conf.RemarksFilename = RemarksFilename;
Conf.RemarksPasses = RemarksPasses;
Conf.RemarksWithHotness = RemarksWithHotness;
+ Conf.RemarksHotnessThreshold = RemarksHotnessThreshold;
Conf.RemarksFormat = RemarksFormat;
Conf.SampleProfile = SamplePGOFile;
@@ -258,6 +274,7 @@ static int run(int argc, char **argv) {
Conf.OptLevel = OptLevel - '0';
Conf.UseNewPM = UseNewPM;
+ Conf.Freestanding = EnableFreestanding;
for (auto &PluginFN : PassPlugins)
Conf.PassPlugins.push_back(PluginFN);
switch (CGOptLevel) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp b/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp
index 66b55abc4898..f3a66187dd0a 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -169,6 +169,10 @@ static cl::opt<bool> LexMasmIntegers(
"masm-integers",
cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)"));
+static cl::opt<bool> LexMasmHexFloats(
+ "masm-hexfloats",
+ cl::desc("Enable MASM-style hex float initializers (3F800000r)"));
+
static cl::opt<bool> NoExecStack("no-exec-stack",
cl::desc("File doesn't need an exec stack"));
@@ -300,6 +304,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget,
Parser->setShowParsedOperands(ShowInstOperands);
Parser->setTargetParser(*TAP);
Parser->getLexer().setLexMasmIntegers(LexMasmIntegers);
+ Parser->getLexer().setLexMasmHexFloats(LexMasmHexFloats);
int Res = Parser->Run(NoInitialTextSection);
@@ -464,8 +469,11 @@ int main(int argc, char **argv) {
std::unique_ptr<MCStreamer> Str;
std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
+ assert(MCII && "Unable to create instruction info!");
+
std::unique_ptr<MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
+ assert(STI && "Unable to create subtarget info!");
MCInstPrinter *IP = nullptr;
if (FileType == OFT_AssemblyFile) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/CodeRegion.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/CodeRegion.cpp
index e05517c1ac95..7662538e3b68 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/CodeRegion.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/CodeRegion.cpp
@@ -63,7 +63,6 @@ void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
ActiveRegions[Description] = Regions.size();
Regions.emplace_back(std::make_unique<CodeRegion>(Description, Loc));
- return;
}
void CodeRegions::endRegion(StringRef Description, SMLoc Loc) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.cpp
index 90d468075996..e7dfbfdce26d 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.cpp
@@ -19,7 +19,7 @@ namespace mca {
void PipelinePrinter::printReport(llvm::raw_ostream &OS) const {
for (const auto &V : Views)
- V->printView(OS);
+ V->printView(OutputKind, OS);
}
} // namespace mca.
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.h b/contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.h
index 004309cd7b8e..ae18140d32b7 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/PipelinePrinter.h
@@ -36,9 +36,11 @@ namespace mca {
class PipelinePrinter {
Pipeline &P;
llvm::SmallVector<std::unique_ptr<View>, 8> Views;
+ View::OutputKind OutputKind;
public:
- PipelinePrinter(Pipeline &pipeline) : P(pipeline) {}
+ PipelinePrinter(Pipeline &pipeline, View::OutputKind OutputKind)
+ : P(pipeline), OutputKind(OutputKind) {}
void addView(std::unique_ptr<View> V) {
P.addEventListener(V.get());
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index 99deed6eae97..38a8e2ef9c53 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -16,7 +16,6 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MCA/Support.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
namespace llvm {
namespace mca {
@@ -284,21 +283,13 @@ void DependencyGraph::getCriticalSequence(
}
}
-static void printInstruction(formatted_raw_ostream &FOS,
- const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
- const MCInst &MCI,
- bool UseDifferentColor = false) {
- std::string Instruction;
- raw_string_ostream InstrStream(Instruction);
-
+void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
+ const MCInst &MCI,
+ bool UseDifferentColor) const {
FOS.PadToColumn(14);
-
- MCIP.printInst(&MCI, 0, "", STI, InstrStream);
- InstrStream.flush();
-
if (UseDifferentColor)
FOS.changeColor(raw_ostream::CYAN, true, false);
- FOS << StringRef(Instruction).ltrim();
+ FOS << printInstructionString(MCI);
if (UseDifferentColor)
FOS.resetColor();
}
@@ -316,6 +307,7 @@ void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
OS << "\nCritical sequence based on the simulation:\n\n";
const DependencyEdge &FirstEdge = *Seq[0];
+ ArrayRef<llvm::MCInst> Source = getSource();
unsigned FromIID = FirstEdge.FromIID % Source.size();
unsigned ToIID = FirstEdge.ToIID % Source.size();
bool IsLoopCarried = FromIID >= ToIID;
@@ -331,17 +323,17 @@ void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
unsigned CurrentIID = 0;
if (IsLoopCarried) {
FOS << "\n +----< " << FromIID << ".";
- printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
+ printInstruction(FOS, Source[FromIID], HasColors);
FOS << "\n |\n | < loop carried > \n |";
} else {
while (CurrentIID < FromIID) {
FOS << "\n " << CurrentIID << ".";
- printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+ printInstruction(FOS, Source[CurrentIID]);
CurrentIID++;
}
FOS << "\n +----< " << CurrentIID << ".";
- printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+ printInstruction(FOS, Source[CurrentIID], HasColors);
CurrentIID++;
}
@@ -351,17 +343,17 @@ void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
while (CurrentIID < LastIID) {
FOS << "\n | " << CurrentIID << ".";
- printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+ printInstruction(FOS, Source[CurrentIID]);
CurrentIID++;
}
if (CurrentIID == ToIID) {
FOS << "\n +----> " << ToIID << ".";
- printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+ printInstruction(FOS, Source[CurrentIID], HasColors);
} else {
FOS << "\n |\n | < loop carried > \n |"
<< "\n +----> " << ToIID << ".";
- printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
+ printInstruction(FOS, Source[ToIID], HasColors);
}
FOS.PadToColumn(58);
@@ -373,7 +365,7 @@ void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
FOS << "## REGISTER dependency: ";
if (HasColors)
FOS.changeColor(raw_ostream::MAGENTA, true, false);
- MCIP.printRegName(FOS, Dep.ResourceOrRegID);
+ getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID);
} else if (Dep.Type == DependencyEdge::DT_MEMORY) {
FOS << "## MEMORY dependency.";
} else {
@@ -397,7 +389,7 @@ void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
while (CurrentIID < Source.size()) {
FOS << "\n " << CurrentIID << ".";
- printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+ printInstruction(FOS, Source[CurrentIID]);
CurrentIID++;
}
@@ -451,8 +443,8 @@ void DependencyGraph::addDependency(unsigned From, unsigned To,
BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
MCInstPrinter &Printer,
ArrayRef<MCInst> S, unsigned NumIter)
- : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
- Source(S), Iterations(NumIter), TotalCycles(0),
+ : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
+ DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
PressureIncreasedBecauseOfResources(false),
PressureIncreasedBecauseOfRegisterDependencies(false),
PressureIncreasedBecauseOfMemoryDependencies(false),
@@ -461,7 +453,7 @@ BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
unsigned RegID, unsigned Cost) {
bool IsLoopCarried = From >= To;
- unsigned SourceSize = Source.size();
+ unsigned SourceSize = getSource().size();
if (IsLoopCarried) {
DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
@@ -473,7 +465,7 @@ void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
unsigned Cost) {
bool IsLoopCarried = From >= To;
- unsigned SourceSize = Source.size();
+ unsigned SourceSize = getSource().size();
if (IsLoopCarried) {
DG.addMemoryDep(From, To + SourceSize, Cost);
DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
@@ -485,7 +477,7 @@ void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
uint64_t Mask, unsigned Cost) {
bool IsLoopCarried = From >= To;
- unsigned SourceSize = Source.size();
+ unsigned SourceSize = getSource().size();
if (IsLoopCarried) {
DG.addResourceDep(From, To + SourceSize, Mask, Cost);
DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
@@ -508,6 +500,7 @@ void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
if (Event.Type != HWInstructionEvent::Issued)
return;
+ ArrayRef<llvm::MCInst> Source = getSource();
const Instruction &IS = *Event.IR.getInstruction();
unsigned To = IID % Source.size();
@@ -617,7 +610,7 @@ void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
if (BPI.PressureIncreaseCycles) {
ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
- const MCSchedModel &SM = STI.getSchedModel();
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
unsigned ResourceCycles = Distribution[I];
if (ResourceCycles) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
index 9e3bd5978f09..427937d9e3d7 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -80,12 +80,13 @@
#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
-#include "Views/View.h"
+#include "Views/InstructionView.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -282,13 +283,10 @@ public:
};
/// A view that collects and prints a few performance numbers.
-class BottleneckAnalysis : public View {
- const MCSubtargetInfo &STI;
- MCInstPrinter &MCIP;
+class BottleneckAnalysis : public InstructionView {
PressureTracker Tracker;
DependencyGraph DG;
- ArrayRef<MCInst> Source;
unsigned Iterations;
unsigned TotalCycles;
@@ -317,6 +315,9 @@ class BottleneckAnalysis : public View {
void addMemoryDep(unsigned From, unsigned To, unsigned Cy);
void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy);
+ void printInstruction(formatted_raw_ostream &FOS, const MCInst &MCI,
+ bool UseDifferentColor = false) const;
+
// Prints a bottleneck message to OS.
void printBottleneckHints(raw_ostream &OS) const;
void printCriticalSequence(raw_ostream &OS) const;
@@ -331,6 +332,8 @@ public:
void onEvent(const HWInstructionEvent &Event) override;
void printView(raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "BottleneckAnalysis"; }
+ json::Value toJSON() const override { return "not implemented"; }
#ifndef NDEBUG
void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); }
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.h
index 07c0f5a4c68f..8d999fb0acfe 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.h
@@ -78,6 +78,7 @@ public:
printDispatchStalls(OS);
printDispatchHistogram(OS);
}
+ StringRef getNameAsString() const override { return "DispatchStatistics"; }
};
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
index fbe9d9021554..2248f63fe7e9 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -13,6 +13,7 @@
#include "Views/InstructionInfoView.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/JSON.h"
namespace llvm {
namespace mca {
@@ -20,10 +21,13 @@ namespace mca {
void InstructionInfoView::printView(raw_ostream &OS) const {
std::string Buffer;
raw_string_ostream TempStream(Buffer);
- const MCSchedModel &SM = STI.getSchedModel();
- std::string Instruction;
- raw_string_ostream InstrStream(Instruction);
+ ArrayRef<llvm::MCInst> Source = getSource();
+ if (!Source.size())
+ return;
+
+ IIVDVec IIVD(Source.size());
+ collectData(IIVD);
TempStream << "\n\nInstruction Info:\n";
TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n"
@@ -36,40 +40,22 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n";
}
- for (unsigned I = 0, E = Source.size(); I < E; ++I) {
- const MCInst &Inst = Source[I];
- const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
-
- // Obtain the scheduling class information from the instruction.
- unsigned SchedClassID = MCDesc.getSchedClass();
- unsigned CPUID = SM.getProcessorID();
-
- // Try to solve variant scheduling classes.
- while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
- SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &Inst, CPUID);
+ for (const auto &I : enumerate(zip(IIVD, Source))) {
+ const InstructionInfoViewData &IIVDEntry = std::get<0>(I.value());
- const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
- unsigned NumMicroOpcodes = SCDesc.NumMicroOps;
- unsigned Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
- // Add extra latency due to delays in the forwarding data paths.
- Latency += MCSchedModel::getForwardingDelayCycles(
- STI.getReadAdvanceEntries(SCDesc));
- Optional<double> RThroughput =
- MCSchedModel::getReciprocalThroughput(STI, SCDesc);
-
- TempStream << ' ' << NumMicroOpcodes << " ";
- if (NumMicroOpcodes < 10)
+ TempStream << ' ' << IIVDEntry.NumMicroOpcodes << " ";
+ if (IIVDEntry.NumMicroOpcodes < 10)
TempStream << " ";
- else if (NumMicroOpcodes < 100)
+ else if (IIVDEntry.NumMicroOpcodes < 100)
TempStream << ' ';
- TempStream << Latency << " ";
- if (Latency < 10)
+ TempStream << IIVDEntry.Latency << " ";
+ if (IIVDEntry.Latency < 10)
TempStream << " ";
- else if (Latency < 100)
+ else if (IIVDEntry.Latency < 100)
TempStream << ' ';
- if (RThroughput.hasValue()) {
- double RT = RThroughput.getValue();
+ if (IIVDEntry.RThroughput.hasValue()) {
+ double RT = IIVDEntry.RThroughput.getValue();
TempStream << format("%.2f", RT) << ' ';
if (RT < 10.0)
TempStream << " ";
@@ -78,12 +64,12 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
} else {
TempStream << " - ";
}
- TempStream << (MCDesc.mayLoad() ? " * " : " ");
- TempStream << (MCDesc.mayStore() ? " * " : " ");
- TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " ");
+ TempStream << (IIVDEntry.mayLoad ? " * " : " ");
+ TempStream << (IIVDEntry.mayStore ? " * " : " ");
+ TempStream << (IIVDEntry.hasUnmodeledSideEffects ? " U " : " ");
if (PrintEncodings) {
- StringRef Encoding(CE.getEncoding(I));
+ StringRef Encoding(CE.getEncoding(I.index()));
unsigned EncodingSize = Encoding.size();
TempStream << " " << EncodingSize
<< (EncodingSize < 10 ? " " : " ");
@@ -95,18 +81,73 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
FOS.flush();
}
- MCIP.printInst(&Inst, 0, "", STI, InstrStream);
- InstrStream.flush();
-
- // Consume any tabs or spaces at the beginning of the string.
- StringRef Str(Instruction);
- Str = Str.ltrim();
- TempStream << Str << '\n';
- Instruction = "";
+ const MCInst &Inst = std::get<1>(I.value());
+ TempStream << printInstructionString(Inst) << '\n';
}
TempStream.flush();
OS << Buffer;
}
+
+void InstructionInfoView::collectData(
+ MutableArrayRef<InstructionInfoViewData> IIVD) const {
+ const llvm::MCSubtargetInfo &STI = getSubTargetInfo();
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (const auto &I : zip(getSource(), IIVD)) {
+ const MCInst &Inst = std::get<0>(I);
+ InstructionInfoViewData &IIVDEntry = std::get<1>(I);
+ const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
+
+ // Obtain the scheduling class information from the instruction.
+ unsigned SchedClassID = MCDesc.getSchedClass();
+ unsigned CPUID = SM.getProcessorID();
+
+ // Try to solve variant scheduling classes.
+ while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+ SchedClassID =
+ STI.resolveVariantSchedClass(SchedClassID, &Inst, &MCII, CPUID);
+
+ const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+ IIVDEntry.NumMicroOpcodes = SCDesc.NumMicroOps;
+ IIVDEntry.Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+ // Add extra latency due to delays in the forwarding data paths.
+ IIVDEntry.Latency += MCSchedModel::getForwardingDelayCycles(
+ STI.getReadAdvanceEntries(SCDesc));
+ IIVDEntry.RThroughput = MCSchedModel::getReciprocalThroughput(STI, SCDesc);
+ IIVDEntry.mayLoad = MCDesc.mayLoad();
+ IIVDEntry.mayStore = MCDesc.mayStore();
+ IIVDEntry.hasUnmodeledSideEffects = MCDesc.hasUnmodeledSideEffects();
+ }
+}
+
+// Construct a JSON object from a single InstructionInfoViewData object.
+json::Object
+InstructionInfoView::toJSON(const InstructionInfoViewData &IIVD) const {
+ json::Object JO({{"NumMicroOpcodes", IIVD.NumMicroOpcodes},
+ {"Latency", IIVD.Latency},
+ {"mayLoad", IIVD.mayLoad},
+ {"mayStore", IIVD.mayStore},
+ {"hasUnmodeledSideEffects", IIVD.hasUnmodeledSideEffects}});
+ JO.try_emplace("RThroughput", IIVD.RThroughput.getValueOr(0.0));
+ return JO;
+}
+
+json::Value InstructionInfoView::toJSON() const {
+ ArrayRef<llvm::MCInst> Source = getSource();
+ if (!Source.size())
+ return json::Value(0);
+
+ IIVDVec IIVD(Source.size());
+ collectData(IIVD);
+
+ json::Array InstInfo;
+ for (const auto &I : enumerate(IIVD)) {
+ const InstructionInfoViewData &IIVDEntry = I.value();
+ json::Object JO = toJSON(IIVDEntry);
+ JO.try_emplace("Instruction", (unsigned)I.index());
+ InstInfo.push_back(std::move(JO));
+ }
+ return json::Value(std::move(InstInfo));
+}
} // namespace mca.
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.h
index 0e948304119f..5d52164e2d50 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.h
@@ -34,8 +34,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
-#include "Views/View.h"
+#include "Views/InstructionView.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -49,23 +50,36 @@ namespace llvm {
namespace mca {
/// A view that prints out generic instruction information.
-class InstructionInfoView : public View {
- const llvm::MCSubtargetInfo &STI;
+class InstructionInfoView : public InstructionView {
const llvm::MCInstrInfo &MCII;
CodeEmitter &CE;
bool PrintEncodings;
- llvm::ArrayRef<llvm::MCInst> Source;
- llvm::MCInstPrinter &MCIP;
+
+ struct InstructionInfoViewData {
+ unsigned NumMicroOpcodes = 0;
+ unsigned Latency = 0;
+ Optional<double> RThroughput = 0.0;
+ bool mayLoad = false;
+ bool mayStore = false;
+ bool hasUnmodeledSideEffects = false;
+ };
+ using IIVDVec = SmallVector<InstructionInfoViewData, 16>;
+
+ /// Place the data into the array of InstructionInfoViewData IIVD.
+ void collectData(MutableArrayRef<InstructionInfoViewData> IIVD) const;
public:
InstructionInfoView(const llvm::MCSubtargetInfo &ST,
const llvm::MCInstrInfo &II, CodeEmitter &C,
bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S,
llvm::MCInstPrinter &IP)
- : STI(ST), MCII(II), CE(C), PrintEncodings(ShouldPrintEncodings),
- Source(S), MCIP(IP) {}
+ : InstructionView(ST, IP, S), MCII(II), CE(C),
+ PrintEncodings(ShouldPrintEncodings) {}
void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "InstructionInfoView"; }
+ json::Value toJSON() const override;
+ json::Object toJSON(const InstructionInfoViewData &IIVD) const;
};
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.cpp
new file mode 100644
index 000000000000..7f7a5b7cdbbb
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.cpp
@@ -0,0 +1,60 @@
+//===----------------------- View.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the member functions of the class InstructionView.
+///
+//===----------------------------------------------------------------------===//
+
+#include <sstream>
+#include "Views/InstructionView.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+namespace mca {
+
+StringRef InstructionView::printInstructionString(const llvm::MCInst &MCI) const {
+ InstructionString = "";
+ MCIP.printInst(&MCI, 0, "", STI, InstrStream);
+ InstrStream.flush();
+ // Remove any tabs or spaces at the beginning of the instruction.
+ return StringRef(InstructionString).ltrim();
+}
+
+json::Value InstructionView::toJSON() const {
+ json::Object JO;
+ json::Array SourceInfo;
+ for (const auto &MCI : getSource()) {
+ StringRef Instruction = printInstructionString(MCI);
+ SourceInfo.push_back(Instruction.str());
+ }
+ JO.try_emplace("Instructions", std::move(SourceInfo));
+
+ json::Array Resources;
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+ for (unsigned J = 0; J < NumUnits; ++J) {
+ std::stringstream ResNameStream;
+ ResNameStream << ProcResource.Name;
+ if (NumUnits > 1)
+ ResNameStream << "." << J;
+ Resources.push_back(ResNameStream.str());
+ }
+ }
+ JO.try_emplace("Resources", json::Object({{"CPUName", MCPU}, {"Resources", std::move(Resources)}}));
+
+ return JO;
+}
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.h
new file mode 100644
index 000000000000..2a260b97d8fb
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.h
@@ -0,0 +1,67 @@
+//===----------------------- InstrucionView.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the main interface for Views that examine and reference
+/// a sequence of machine instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
+
+#include "Views/View.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace mca {
+
+// The base class for views that deal with individual machine instructions.
+class InstructionView : public View {
+ const llvm::MCSubtargetInfo &STI;
+ llvm::MCInstPrinter &MCIP;
+ llvm::ArrayRef<llvm::MCInst> Source;
+ StringRef MCPU;
+
+ mutable std::string InstructionString;
+ mutable raw_string_ostream InstrStream;
+
+public:
+ void printView(llvm::raw_ostream &) const override {}
+ InstructionView(const llvm::MCSubtargetInfo &STI,
+ llvm::MCInstPrinter &Printer,
+ llvm::ArrayRef<llvm::MCInst> S,
+ StringRef MCPU = StringRef())
+ : STI(STI), MCIP(Printer), Source(S), MCPU(MCPU),
+ InstrStream(InstructionString) {}
+
+ virtual ~InstructionView() = default;
+
+ StringRef getNameAsString() const override {
+ return "Instructions and CPU resources";
+ }
+ // Return a reference to a string representing a given machine instruction.
+ // The result should be used or copied before the next call to
+ // printInstructionString() as it will overwrite the previous result.
+ StringRef printInstructionString(const llvm::MCInst &MCI) const;
+ const llvm::MCSubtargetInfo &getSubTargetInfo() const { return STI; }
+
+ llvm::MCInstPrinter &getInstPrinter() const { return MCIP; }
+ llvm::ArrayRef<llvm::MCInst> getSource() const { return Source; }
+ json::Value toJSON() const override;
+ virtual void printViewJSON(llvm::raw_ostream &OS) override {
+ json::Value JV = toJSON();
+ OS << formatv("{0:2}", JV) << "\n";
+ }
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
index a2273dd48b22..cf384dbfe337 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -73,6 +73,9 @@ public:
void onCycleEnd() override;
void onEvent(const HWInstructionEvent &Event) override;
void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override {
+ return "RegisterFileStatistics";
+ }
};
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
index bdb9dc21247b..77b3ba0b7c8d 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -21,10 +21,10 @@ namespace mca {
ResourcePressureView::ResourcePressureView(const llvm::MCSubtargetInfo &sti,
MCInstPrinter &Printer,
ArrayRef<MCInst> S)
- : STI(sti), MCIP(Printer), Source(S), LastInstructionIdx(0) {
+ : InstructionView(sti, Printer, S), LastInstructionIdx(0) {
// Populate the map of resource descriptors.
unsigned R2VIndex = 0;
- const MCSchedModel &SM = STI.getSchedModel();
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
unsigned NumUnits = ProcResource.NumUnits;
@@ -37,7 +37,7 @@ ResourcePressureView::ResourcePressureView(const llvm::MCSubtargetInfo &sti,
}
NumResourceUnits = R2VIndex;
- ResourceUsage.resize(NumResourceUnits * (Source.size() + 1));
+ ResourceUsage.resize(NumResourceUnits * (getSource().size() + 1));
std::fill(ResourceUsage.begin(), ResourceUsage.end(), 0.0);
}
@@ -52,6 +52,7 @@ void ResourcePressureView::onEvent(const HWInstructionEvent &Event) {
return;
const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event);
+ ArrayRef<llvm::MCInst> Source = getSource();
const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size();
for (const std::pair<ResourceRef, ResourceCycles> &Use :
IssueEvent.UsedResources) {
@@ -105,7 +106,7 @@ void ResourcePressureView::printResourcePressurePerIter(raw_ostream &OS) const {
formatted_raw_ostream FOS(TempStream);
FOS << "\n\nResources:\n";
- const MCSchedModel &SM = STI.getSchedModel();
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds();
I < E; ++I) {
const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
@@ -132,6 +133,7 @@ void ResourcePressureView::printResourcePressurePerIter(raw_ostream &OS) const {
FOS << '\n';
FOS.flush();
+ ArrayRef<llvm::MCInst> Source = getSource();
const unsigned Executions = LastInstructionIdx / Source.size() + 1;
for (unsigned I = 0, E = NumResourceUnits; I < E; ++I) {
double Usage = ResourceUsage[I + Source.size() * E];
@@ -148,13 +150,11 @@ void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const {
formatted_raw_ostream FOS(TempStream);
FOS << "\n\nResource pressure by instruction:\n";
- printColumnNames(FOS, STI.getSchedModel());
+ printColumnNames(FOS, getSubTargetInfo().getSchedModel());
FOS << "Instructions:\n";
- std::string Instruction;
- raw_string_ostream InstrStream(Instruction);
-
unsigned InstrIndex = 0;
+ ArrayRef<llvm::MCInst> Source = getSource();
const unsigned Executions = LastInstructionIdx / Source.size() + 1;
for (const MCInst &MCI : Source) {
unsigned BaseEltIdx = InstrIndex * NumResourceUnits;
@@ -163,16 +163,7 @@ void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const {
printResourcePressure(FOS, Usage / Executions, (J + 1) * 7);
}
- MCIP.printInst(&MCI, 0, "", STI, InstrStream);
- InstrStream.flush();
- StringRef Str(Instruction);
-
- // Remove any tabs or spaces at the beginning of the instruction.
- Str = Str.ltrim();
-
- FOS << Str << '\n';
- Instruction = "";
-
+ FOS << printInstructionString(MCI) << '\n';
FOS.flush();
OS << Buffer;
Buffer = "";
@@ -180,5 +171,30 @@ void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const {
++InstrIndex;
}
}
+
+json::Value ResourcePressureView::toJSON() const {
+ // We're dumping the instructions and the ResourceUsage array.
+ json::Array ResourcePressureInfo;
+
+ // The ResourceUsage matrix is sparse, so we only consider
+ // non-zero values.
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned Executions = LastInstructionIdx / Source.size() + 1;
+ for (const auto &R : enumerate(ResourceUsage)) {
+ const ResourceCycles &RU = R.value();
+ if (RU.getNumerator() == 0)
+ continue;
+ unsigned InstructionIndex = R.index() / NumResourceUnits;
+ unsigned ResourceIndex = R.index() % NumResourceUnits;
+ double Usage = RU / Executions;
+ ResourcePressureInfo.push_back(
+ json::Object({{"InstructionIndex", InstructionIndex},
+ {"ResourceIndex", ResourceIndex},
+ {"ResourceUsage", Usage}}));
+ }
+
+ json::Object JO({{"ResourcePressureInfo", std::move(ResourcePressureInfo)}});
+ return JO;
+}
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.h
index 0fa0b9a36aa3..c3993a08c170 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.h
@@ -57,22 +57,20 @@
#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
#define LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
-#include "Views/View.h"
+#include "Views/InstructionView.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/JSON.h"
namespace llvm {
namespace mca {
/// This class collects resource pressure statistics and it is able to print
/// out all the collected information as a table to an output stream.
-class ResourcePressureView : public View {
- const llvm::MCSubtargetInfo &STI;
- llvm::MCInstPrinter &MCIP;
- llvm::ArrayRef<llvm::MCInst> Source;
+class ResourcePressureView : public InstructionView {
unsigned LastInstructionIdx;
// Map to quickly obtain the ResourceUsage column index from a processor
@@ -96,6 +94,8 @@ public:
printResourcePressurePerIter(OS);
printResourcePressurePerInst(OS);
}
+ StringRef getNameAsString() const override { return "ResourcePressureView"; }
+ json::Value toJSON() const override;
};
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
index 1a4d3dec5c56..662a223662e6 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -52,6 +52,9 @@ public:
void onEvent(const HWInstructionEvent &Event) override;
void onCycleEnd() override;
void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override {
+ return "RetireControlUnitStatistics";
+ }
};
} // namespace mca
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
index 32711b4483b4..734046c3112f 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -88,6 +88,7 @@ public:
llvm::ArrayRef<unsigned> Buffers) override;
void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "SchedulerStatistics"; }
};
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp
index f0e75f7b13ae..c0fe3b5193a7 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -63,32 +63,52 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) {
}
void SummaryView::printView(raw_ostream &OS) const {
- unsigned Instructions = Source.size();
- unsigned Iterations = (LastInstructionIdx / Instructions) + 1;
- unsigned TotalInstructions = Instructions * Iterations;
- unsigned TotalUOps = NumMicroOps * Iterations;
- double IPC = (double)TotalInstructions / TotalCycles;
- double UOpsPerCycle = (double)TotalUOps / TotalCycles;
- double BlockRThroughput = computeBlockRThroughput(
- SM, DispatchWidth, NumMicroOps, ProcResourceUsage);
-
std::string Buffer;
raw_string_ostream TempStream(Buffer);
- TempStream << "Iterations: " << Iterations;
- TempStream << "\nInstructions: " << TotalInstructions;
- TempStream << "\nTotal Cycles: " << TotalCycles;
- TempStream << "\nTotal uOps: " << TotalUOps << '\n';
- TempStream << "\nDispatch Width: " << DispatchWidth;
+ DisplayValues DV;
+
+ collectData(DV);
+ TempStream << "Iterations: " << DV.Iterations;
+ TempStream << "\nInstructions: " << DV.TotalInstructions;
+ TempStream << "\nTotal Cycles: " << DV.TotalCycles;
+ TempStream << "\nTotal uOps: " << DV.TotalUOps << '\n';
+ TempStream << "\nDispatch Width: " << DV.DispatchWidth;
TempStream << "\nuOps Per Cycle: "
- << format("%.2f", floor((UOpsPerCycle * 100) + 0.5) / 100);
+ << format("%.2f", floor((DV.UOpsPerCycle * 100) + 0.5) / 100);
TempStream << "\nIPC: "
- << format("%.2f", floor((IPC * 100) + 0.5) / 100);
+ << format("%.2f", floor((DV.IPC * 100) + 0.5) / 100);
TempStream << "\nBlock RThroughput: "
- << format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10)
+ << format("%.1f", floor((DV.BlockRThroughput * 10) + 0.5) / 10)
<< '\n';
TempStream.flush();
OS << Buffer;
}
+void SummaryView::collectData(DisplayValues &DV) const {
+ DV.Instructions = Source.size();
+ DV.Iterations = (LastInstructionIdx / DV.Instructions) + 1;
+ DV.TotalInstructions = DV.Instructions * DV.Iterations;
+ DV.TotalCycles = TotalCycles;
+ DV.DispatchWidth = DispatchWidth;
+ DV.TotalUOps = NumMicroOps * DV.Iterations;
+ DV.UOpsPerCycle = (double)DV.TotalUOps / TotalCycles;
+ DV.IPC = (double)DV.TotalInstructions / TotalCycles;
+ DV.BlockRThroughput = computeBlockRThroughput(SM, DispatchWidth, NumMicroOps,
+ ProcResourceUsage);
+}
+
+json::Value SummaryView::toJSON() const {
+ DisplayValues DV;
+ collectData(DV);
+ json::Object JO({{"Iterations", DV.Iterations},
+ {"Instructions", DV.TotalInstructions},
+ {"TotalCycles", DV.TotalCycles},
+ {"TotaluOps", DV.TotalUOps},
+ {"DispatchWidth", DV.DispatchWidth},
+ {"uOpsPerCycle", DV.UOpsPerCycle},
+ {"IPC", DV.IPC},
+ {"BlockRThroughput", DV.BlockRThroughput}});
+ return JO;
+}
} // namespace mca.
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.h
index 9be31b7d51bd..2622e869ef23 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.h
@@ -46,6 +46,18 @@ class SummaryView : public View {
// The total number of micro opcodes contributed by a block of instructions.
unsigned NumMicroOps;
+ struct DisplayValues {
+ unsigned Instructions;
+ unsigned Iterations;
+ unsigned TotalInstructions;
+ unsigned TotalCycles;
+ unsigned DispatchWidth;
+ unsigned TotalUOps;
+ double IPC;
+ double UOpsPerCycle;
+ double BlockRThroughput;
+ };
+
// For each processor resource, this vector stores the cumulative number of
// resource cycles consumed by the analyzed code block.
llvm::SmallVector<unsigned, 8> ProcResourceUsage;
@@ -65,6 +77,9 @@ class SummaryView : public View {
// - Total Resource Cycles / #Units (for every resource consumed).
double getBlockRThroughput() const;
+ /// Compute the data we want to print out in the object DV.
+ void collectData(DisplayValues &DV) const;
+
public:
SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S,
unsigned Width);
@@ -72,8 +87,9 @@ public:
void onCycleEnd() override { ++TotalCycles; }
void onEvent(const HWInstructionEvent &Event) override;
void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "SummaryView"; }
+ json::Value toJSON() const override;
};
-
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp
index cf5b48e811b8..c8b481bc7ce6 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -20,10 +20,10 @@ namespace mca {
TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
unsigned Cycles)
- : STI(sti), MCIP(Printer), Source(S), CurrentCycle(0),
+ : InstructionView(sti, Printer, S), CurrentCycle(0),
MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0), WaitTime(S.size()),
UsedBuffer(S.size()) {
- unsigned NumInstructions = Source.size();
+ unsigned NumInstructions = getSource().size();
assert(Iterations && "Invalid number of iterations specified!");
NumInstructions *= Iterations;
Timeline.resize(NumInstructions);
@@ -40,10 +40,10 @@ TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
void TimelineView::onReservedBuffers(const InstRef &IR,
ArrayRef<unsigned> Buffers) {
- if (IR.getSourceIndex() >= Source.size())
+ if (IR.getSourceIndex() >= getSource().size())
return;
- const MCSchedModel &SM = STI.getSchedModel();
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
std::pair<unsigned, int> BufferInfo = {0, -1};
for (const unsigned Buffer : Buffers) {
const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
@@ -70,7 +70,7 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) {
// Update the WaitTime entry which corresponds to this Index.
assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
- WaitTimeEntry &WTEntry = WaitTime[Index % Source.size()];
+ WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()];
WTEntry.CyclesSpentInSchedulerQueue +=
TVEntry.CycleIssued - CycleDispatched;
assert(CycleDispatched <= TVEntry.CycleReady &&
@@ -133,7 +133,7 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
const WaitTimeEntry &Entry,
unsigned SourceIndex,
unsigned Executions) const {
- bool PrintingTotals = SourceIndex == Source.size();
+ bool PrintingTotals = SourceIndex == getSource().size();
unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions;
if (!PrintingTotals)
@@ -164,7 +164,8 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
OS.PadToColumn(27);
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
- CumulativeExecutions, STI.getSchedModel().MicroOpBufferSize);
+ CumulativeExecutions,
+ getSubTargetInfo().getSchedModel().MicroOpBufferSize);
OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
if (OS.has_colors())
@@ -181,33 +182,19 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
"[3]: Average time elapsed from WB until retire stage\n\n"
" [0] [1] [2] [3]\n";
OS << Header;
-
- // Use a different string stream for printing instructions.
- std::string Instruction;
- raw_string_ostream InstrStream(Instruction);
-
formatted_raw_ostream FOS(OS);
- unsigned Executions = Timeline.size() / Source.size();
+ unsigned Executions = Timeline.size() / getSource().size();
unsigned IID = 0;
- for (const MCInst &Inst : Source) {
+ for (const MCInst &Inst : getSource()) {
printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);
- // Append the instruction info at the end of the line.
- MCIP.printInst(&Inst, 0, "", STI, InstrStream);
- InstrStream.flush();
-
- // Consume any tabs or spaces at the beginning of the string.
- StringRef Str(Instruction);
- Str = Str.ltrim();
- FOS << " " << Str << '\n';
+ FOS << " " << printInstructionString(Inst) << '\n';
FOS.flush();
- Instruction = "";
-
++IID;
}
// If the timeline contains more than one instruction,
// let's also print global averages.
- if (Source.size() != 1) {
+ if (getSource().size() != 1) {
WaitTimeEntry TotalWaitTime = std::accumulate(
WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0},
[](const WaitTimeEntry &A, const WaitTimeEntry &B) {
@@ -220,7 +207,7 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions);
FOS << " "
<< "<total>" << '\n';
- InstrStream.flush();
+ FOS.flush();
}
}
@@ -292,11 +279,8 @@ void TimelineView::printTimeline(raw_ostream &OS) const {
printTimelineHeader(FOS, LastCycle);
FOS.flush();
- // Use a different string stream for the instruction.
- std::string Instruction;
- raw_string_ostream InstrStream(Instruction);
-
unsigned IID = 0;
+ ArrayRef<llvm::MCInst> Source = getSource();
const unsigned Iterations = Timeline.size() / Source.size();
for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) {
for (const MCInst &Inst : Source) {
@@ -306,20 +290,26 @@ void TimelineView::printTimeline(raw_ostream &OS) const {
unsigned SourceIndex = IID % Source.size();
printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
- // Append the instruction info at the end of the line.
- MCIP.printInst(&Inst, 0, "", STI, InstrStream);
- InstrStream.flush();
-
- // Consume any tabs or spaces at the beginning of the string.
- StringRef Str(Instruction);
- Str = Str.ltrim();
- FOS << " " << Str << '\n';
+ FOS << " " << printInstructionString(Inst) << '\n';
FOS.flush();
- Instruction = "";
++IID;
}
}
}
+
+json::Value TimelineView::toJSON() const {
+ json::Array TimelineInfo;
+
+ for (const TimelineViewEntry &TLE : Timeline) {
+ TimelineInfo.push_back(
+ json::Object({{"CycleDispatched", TLE.CycleDispatched},
+ {"CycleReady", TLE.CycleReady},
+ {"CycleIssued", TLE.CycleIssued},
+ {"CycleExecuted", TLE.CycleExecuted},
+ {"CycleRetired", TLE.CycleRetired}}));
+ }
+ return json::Object({{"TimelineInfo", std::move(TimelineInfo)}});
+}
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.h
index 9bec3b87db45..81f2b0335081 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.h
@@ -100,12 +100,13 @@
#ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
#define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
-#include "Views/View.h"
+#include "Views/InstructionView.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -118,11 +119,7 @@ namespace mca {
/// a TimelineViewEntry object. TimelineViewEntry objects are then used
/// to print the timeline information, as well as the "average wait times"
/// for every instruction in the input assembly sequence.
-class TimelineView : public View {
- const llvm::MCSubtargetInfo &STI;
- llvm::MCInstPrinter &MCIP;
- llvm::ArrayRef<llvm::MCInst> Source;
-
+class TimelineView : public InstructionView {
unsigned CurrentCycle;
unsigned MaxCycle;
unsigned LastCycle;
@@ -182,6 +179,8 @@ public:
printTimeline(OS);
printAverageWaitTimes(OS);
}
+ StringRef getNameAsString() const override { return "TimelineView"; }
+ json::Value toJSON() const override;
};
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/View.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/View.cpp
index 8e5c34d2d5c2..09d08d3ae007 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/View.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/View.cpp
@@ -12,10 +12,13 @@
//===----------------------------------------------------------------------===//
#include "Views/View.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
namespace mca {
void View::anchor() {}
+
} // namespace mca
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/View.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/View.h
index 3b52511b4d29..85464bfda662 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/View.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/View.h
@@ -15,16 +15,34 @@
#ifndef LLVM_TOOLS_LLVM_MCA_VIEW_H
#define LLVM_TOOLS_LLVM_MCA_VIEW_H
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MCA/HWEventListener.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/JSON.h"
namespace llvm {
namespace mca {
class View : public HWEventListener {
public:
+ enum OutputKind { OK_READABLE, OK_JSON };
+
+ void printView(OutputKind OutputKind, llvm::raw_ostream &OS) {
+ if (OutputKind == OK_JSON)
+ printViewJSON(OS);
+ else
+ printView(OS);
+ }
+
virtual void printView(llvm::raw_ostream &OS) const = 0;
+ virtual void printViewJSON(llvm::raw_ostream &OS) {
+ json::Object JO;
+ JO.try_emplace(getNameAsString().str(), toJSON());
+ OS << formatv("{0:2}", json::Value(std::move(JO))) << "\n";
+ }
virtual ~View() = default;
+ virtual StringRef getNameAsString() const = 0;
+ virtual json::Value toJSON() const { return "not implemented"; }
void anchor() override;
};
} // namespace mca
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp
index 9f3bf41ff3f8..13a2c6363579 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -96,6 +96,11 @@ static cl::opt<std::string>
cl::desc("Additional target features."),
cl::cat(ToolOptions));
+static cl::opt<bool>
+ PrintJson("json",
+ cl::desc("Print the output in json format"),
+ cl::cat(ToolOptions), cl::init(false));
+
static cl::opt<int>
OutputAsmVariant("output-asm-variant",
cl::desc("Syntax variant to use for output printing"),
@@ -325,11 +330,12 @@ int main(int argc, char **argv) {
// Apply overrides to llvm-mca specific options.
processViewOptions();
- if (!MCPU.compare("native"))
+ if (MCPU == "native")
MCPU = std::string(llvm::sys::getHostCPUName());
std::unique_ptr<MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, MATTR));
+ assert(STI && "Unable to create subtarget info!");
if (!STI->isCPUStringValid(MCPU))
return 1;
@@ -373,6 +379,7 @@ int main(int argc, char **argv) {
std::unique_ptr<buffer_ostream> BOS;
std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
+ assert(MCII && "Unable to create instruction info!");
std::unique_ptr<MCInstrAnalysis> MCIA(
TheTarget->createMCInstrAnalysis(MCII.get()));
@@ -443,9 +450,11 @@ int main(int argc, char **argv) {
std::unique_ptr<MCCodeEmitter> MCE(
TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
+ assert(MCE && "Unable to create code emitter!");
std::unique_ptr<MCAsmBackend> MAB(TheTarget->createMCAsmBackend(
*STI, *MRI, mc::InitMCTargetOptionsFromFlags()));
+ assert(MAB && "Unable to create asm backend!");
for (const std::unique_ptr<mca::CodeRegion> &Region : Regions) {
// Skip empty code regions.
@@ -497,7 +506,7 @@ int main(int argc, char **argv) {
auto P = std::make_unique<mca::Pipeline>();
P->appendStage(std::make_unique<mca::EntryStage>(S));
P->appendStage(std::make_unique<mca::InstructionTables>(SM));
- mca::PipelinePrinter Printer(*P);
+ mca::PipelinePrinter Printer(*P, mca::View::OK_READABLE);
// Create the views for this pipeline, execute, and emit a report.
if (PrintInstructionInfoView) {
@@ -516,7 +525,14 @@ int main(int argc, char **argv) {
// Create a basic pipeline simulating an out-of-order backend.
auto P = MCA.createDefaultPipeline(PO, S);
- mca::PipelinePrinter Printer(*P);
+ mca::PipelinePrinter Printer(*P, PrintJson ? mca::View::OK_JSON
+ : mca::View::OK_READABLE);
+
+ // When we output JSON, we add a view that contains the instructions
+ // and CPU resource information.
+ if (PrintJson)
+ Printer.addView(
+ std::make_unique<mca::InstructionView>(*STI, *IP, Insts, MCPU));
if (PrintSummaryView)
Printer.addView(
diff --git a/contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp b/contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp
index ecd1e21e15bf..c678108807c5 100644
--- a/contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -295,7 +295,7 @@ struct NMSymbol {
uint64_t Address;
uint64_t Size;
char TypeChar;
- StringRef Name;
+ std::string Name;
StringRef SectionName;
StringRef TypeName;
BasicSymbolRef Sym;
@@ -309,25 +309,27 @@ struct NMSymbol {
uint8_t NType;
uint8_t NSect;
uint16_t NDesc;
- StringRef IndirectName;
+ std::string IndirectName;
};
} // anonymous namespace
static bool compareSymbolAddress(const NMSymbol &A, const NMSymbol &B) {
bool ADefined;
// Symbol flags have been checked in the caller.
- uint32_t AFlags = cantFail(A.Sym.getFlags());
- if (A.Sym.getRawDataRefImpl().p)
+ if (A.Sym.getRawDataRefImpl().p) {
+ uint32_t AFlags = cantFail(A.Sym.getFlags());
ADefined = !(AFlags & SymbolRef::SF_Undefined);
- else
+ } else {
ADefined = A.TypeChar != 'U';
+ }
bool BDefined;
// Symbol flags have been checked in the caller.
- uint32_t BFlags = cantFail(B.Sym.getFlags());
- if (B.Sym.getRawDataRefImpl().p)
+ if (B.Sym.getRawDataRefImpl().p) {
+ uint32_t BFlags = cantFail(B.Sym.getFlags());
BDefined = !(BFlags & SymbolRef::SF_Undefined);
- else
+ } else {
BDefined = B.TypeChar != 'U';
+ }
return std::make_tuple(ADefined, A.Address, A.Name, A.Size) <
std::make_tuple(BDefined, B.Address, B.Name, B.Size);
}
@@ -813,7 +815,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
for (const NMSymbol &S : SymbolList) {
uint32_t SymFlags;
- std::string Name = S.Name.str();
+ std::string Name = S.Name;
MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
if (Demangle) {
if (Optional<std::string> Opt = demangle(S.Name, MachO))
@@ -1093,9 +1095,8 @@ static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I) {
}
static bool isObject(SymbolicFile &Obj, basic_symbol_iterator I) {
- return !dyn_cast<ELFObjectFileBase>(&Obj)
- ? false
- : elf_symbol_iterator(I)->getELFType() == ELF::STT_OBJECT;
+ return isa<ELFObjectFileBase>(&Obj) &&
+ elf_symbol_iterator(I)->getELFType() == ELF::STT_OBJECT;
}
// For ELF object files, Set TypeName to the symbol typename, to be printed
@@ -1218,10 +1219,557 @@ static unsigned getNsectInMachO(MachOObjectFile &Obj, BasicSymbolRef Sym) {
return (STE.n_type & MachO::N_TYPE) == MachO::N_SECT ? STE.n_sect : 0;
}
+static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO) {
+ size_t I = SymbolList.size();
+ std::string ExportsNameBuffer;
+ raw_string_ostream EOS(ExportsNameBuffer);
+ std::string BindsNameBuffer;
+ raw_string_ostream BOS(BindsNameBuffer);
+ std::string LazysNameBuffer;
+ raw_string_ostream LOS(LazysNameBuffer);
+ std::string WeaksNameBuffer;
+ raw_string_ostream WOS(WeaksNameBuffer);
+ std::string FunctionStartsNameBuffer;
+ raw_string_ostream FOS(FunctionStartsNameBuffer);
+
+ MachO::mach_header H;
+ MachO::mach_header_64 H_64;
+ uint32_t HFlags = 0;
+ if (MachO.is64Bit()) {
+ H_64 = MachO.MachOObjectFile::getHeader64();
+ HFlags = H_64.flags;
+ } else {
+ H = MachO.MachOObjectFile::getHeader();
+ HFlags = H.flags;
+ }
+ uint64_t BaseSegmentAddress = 0;
+ for (const auto &Command : MachO.load_commands()) {
+ if (Command.C.cmd == MachO::LC_SEGMENT) {
+ MachO::segment_command Seg = MachO.getSegmentLoadCommand(Command);
+ if (Seg.fileoff == 0 && Seg.filesize != 0) {
+ BaseSegmentAddress = Seg.vmaddr;
+ break;
+ }
+ } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
+ MachO::segment_command_64 Seg = MachO.getSegment64LoadCommand(Command);
+ if (Seg.fileoff == 0 && Seg.filesize != 0) {
+ BaseSegmentAddress = Seg.vmaddr;
+ break;
+ }
+ }
+ }
+ if (DyldInfoOnly || AddDyldInfo ||
+ HFlags & MachO::MH_NLIST_OUTOFSYNC_WITH_DYLDINFO) {
+ unsigned ExportsAdded = 0;
+ Error Err = Error::success();
+ for (const llvm::object::ExportEntry &Entry : MachO.exports(Err)) {
+ bool found = false;
+ bool ReExport = false;
+ if (!DyldInfoOnly) {
+ for (const NMSymbol &S : SymbolList)
+ if (S.Address == Entry.address() + BaseSegmentAddress &&
+ S.Name == Entry.name()) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ NMSymbol S = {};
+ S.Address = Entry.address() + BaseSegmentAddress;
+ S.Size = 0;
+ S.TypeChar = '\0';
+ S.Name = Entry.name().str();
+ // There is no symbol in the nlist symbol table for this so we set
+ // Sym effectivly to null and the rest of code in here must test for
+ // it and not do things like Sym.getFlags() for it.
+ S.Sym = BasicSymbolRef();
+ S.SymFlags = SymbolRef::SF_Global;
+ S.Section = SectionRef();
+ S.NType = 0;
+ S.NSect = 0;
+ S.NDesc = 0;
+
+ uint64_t EFlags = Entry.flags();
+ bool Abs = ((EFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) ==
+ MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE);
+ bool Resolver = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER);
+ ReExport = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT);
+ bool WeakDef = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
+ if (WeakDef)
+ S.NDesc |= MachO::N_WEAK_DEF;
+ if (Abs) {
+ S.NType = MachO::N_EXT | MachO::N_ABS;
+ S.TypeChar = 'A';
+ } else if (ReExport) {
+ S.NType = MachO::N_EXT | MachO::N_INDR;
+ S.TypeChar = 'I';
+ } else {
+ S.NType = MachO::N_EXT | MachO::N_SECT;
+ if (Resolver) {
+ S.Address = Entry.other() + BaseSegmentAddress;
+ if ((S.Address & 1) != 0 && !MachO.is64Bit() &&
+ H.cputype == MachO::CPU_TYPE_ARM) {
+ S.Address &= ~1LL;
+ S.NDesc |= MachO::N_ARM_THUMB_DEF;
+ }
+ } else {
+ S.Address = Entry.address() + BaseSegmentAddress;
+ }
+ StringRef SegmentName = StringRef();
+ StringRef SectionName = StringRef();
+ for (const SectionRef &Section : MachO.sections()) {
+ S.NSect++;
+
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ SectionName = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
+ SegmentName =
+ MachO.getSectionFinalSegmentName(Section.getRawDataRefImpl());
+ if (S.Address >= Section.getAddress() &&
+ S.Address < Section.getAddress() + Section.getSize()) {
+ S.Section = Section;
+ break;
+ } else if (Entry.name() == "__mh_execute_header" &&
+ SegmentName == "__TEXT" && SectionName == "__text") {
+ S.Section = Section;
+ S.NDesc |= MachO::REFERENCED_DYNAMICALLY;
+ break;
+ }
+ }
+ if (SegmentName == "__TEXT" && SectionName == "__text")
+ S.TypeChar = 'T';
+ else if (SegmentName == "__DATA" && SectionName == "__data")
+ S.TypeChar = 'D';
+ else if (SegmentName == "__DATA" && SectionName == "__bss")
+ S.TypeChar = 'B';
+ else
+ S.TypeChar = 'S';
+ }
+ SymbolList.push_back(S);
+
+ EOS << Entry.name();
+ EOS << '\0';
+ ExportsAdded++;
+
+ // For ReExports there are a two more things to do, first add the
+ // indirect name and second create the undefined symbol using the
+ // referened dynamic library.
+ if (ReExport) {
+
+ // Add the indirect name.
+ if (Entry.otherName().empty())
+ EOS << Entry.name();
+ else
+ EOS << Entry.otherName();
+ EOS << '\0';
+
+ // Now create the undefined symbol using the referened dynamic
+ // library.
+ NMSymbol U = {};
+ U.Address = 0;
+ U.Size = 0;
+ U.TypeChar = 'U';
+ if (Entry.otherName().empty())
+ U.Name = Entry.name().str();
+ else
+ U.Name = Entry.otherName().str();
+ // Again there is no symbol in the nlist symbol table for this so
+ // we set Sym effectivly to null and the rest of code in here must
+ // test for it and not do things like Sym.getFlags() for it.
+ U.Sym = BasicSymbolRef();
+ U.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
+ U.Section = SectionRef();
+ U.NType = MachO::N_EXT | MachO::N_UNDF;
+ U.NSect = 0;
+ U.NDesc = 0;
+ // The library ordinal for this undefined symbol is in the export
+ // trie Entry.other().
+ MachO::SET_LIBRARY_ORDINAL(U.NDesc, Entry.other());
+ SymbolList.push_back(U);
+
+ // Finally add the undefined symbol's name.
+ if (Entry.otherName().empty())
+ EOS << Entry.name();
+ else
+ EOS << Entry.otherName();
+ EOS << '\0';
+ ExportsAdded++;
+ }
+ }
+ }
+ if (Err)
+ error(std::move(Err), MachO.getFileName());
+ // Set the symbol names and indirect names for the added symbols.
+ if (ExportsAdded) {
+ EOS.flush();
+ const char *Q = ExportsNameBuffer.c_str();
+ for (unsigned K = 0; K < ExportsAdded; K++) {
+ SymbolList[I].Name = Q;
+ Q += strlen(Q) + 1;
+ if (SymbolList[I].TypeChar == 'I') {
+ SymbolList[I].IndirectName = Q;
+ Q += strlen(Q) + 1;
+ }
+ I++;
+ }
+ }
+
+ // Add the undefined symbols from the bind entries.
+ unsigned BindsAdded = 0;
+ Error BErr = Error::success();
+ StringRef LastSymbolName = StringRef();
+ for (const llvm::object::MachOBindEntry &Entry : MachO.bindTable(BErr)) {
+ bool found = false;
+ if (LastSymbolName == Entry.symbolName())
+ found = true;
+ else if (!DyldInfoOnly) {
+ for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
+ if (SymbolList[J].Name == Entry.symbolName())
+ found = true;
+ }
+ }
+ if (!found) {
+ LastSymbolName = Entry.symbolName();
+ NMSymbol B = {};
+ B.Address = 0;
+ B.Size = 0;
+ B.TypeChar = 'U';
+ // There is no symbol in the nlist symbol table for this so we set
+ // Sym effectivly to null and the rest of code in here must test for
+ // it and not do things like Sym.getFlags() for it.
+ B.Sym = BasicSymbolRef();
+ B.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
+ B.NType = MachO::N_EXT | MachO::N_UNDF;
+ B.NSect = 0;
+ B.NDesc = 0;
+ MachO::SET_LIBRARY_ORDINAL(B.NDesc, Entry.ordinal());
+ B.Name = Entry.symbolName().str();
+ SymbolList.push_back(B);
+ BOS << Entry.symbolName();
+ BOS << '\0';
+ BindsAdded++;
+ }
+ }
+ if (BErr)
+ error(std::move(BErr), MachO.getFileName());
+ // Set the symbol names and indirect names for the added symbols.
+ if (BindsAdded) {
+ BOS.flush();
+ const char *Q = BindsNameBuffer.c_str();
+ for (unsigned K = 0; K < BindsAdded; K++) {
+ SymbolList[I].Name = Q;
+ Q += strlen(Q) + 1;
+ if (SymbolList[I].TypeChar == 'I') {
+ SymbolList[I].IndirectName = Q;
+ Q += strlen(Q) + 1;
+ }
+ I++;
+ }
+ }
+
+ // Add the undefined symbols from the lazy bind entries.
+ unsigned LazysAdded = 0;
+ Error LErr = Error::success();
+ LastSymbolName = StringRef();
+ for (const llvm::object::MachOBindEntry &Entry :
+ MachO.lazyBindTable(LErr)) {
+ bool found = false;
+ if (LastSymbolName == Entry.symbolName())
+ found = true;
+ else {
+ // Here we must check to see it this symbol is already in the
+ // SymbolList as it might have already have been added above via a
+ // non-lazy (bind) entry.
+ for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
+ if (SymbolList[J].Name == Entry.symbolName())
+ found = true;
+ }
+ }
+ if (!found) {
+ LastSymbolName = Entry.symbolName();
+ NMSymbol L = {};
+ L.Name = Entry.symbolName().str();
+ L.Address = 0;
+ L.Size = 0;
+ L.TypeChar = 'U';
+ // There is no symbol in the nlist symbol table for this so we set
+ // Sym effectivly to null and the rest of code in here must test for
+ // it and not do things like Sym.getFlags() for it.
+ L.Sym = BasicSymbolRef();
+ L.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
+ L.NType = MachO::N_EXT | MachO::N_UNDF;
+ L.NSect = 0;
+ // The REFERENCE_FLAG_UNDEFINED_LAZY is no longer used but here it
+ // makes sence since we are creating this from a lazy bind entry.
+ L.NDesc = MachO::REFERENCE_FLAG_UNDEFINED_LAZY;
+ MachO::SET_LIBRARY_ORDINAL(L.NDesc, Entry.ordinal());
+ SymbolList.push_back(L);
+ LOS << Entry.symbolName();
+ LOS << '\0';
+ LazysAdded++;
+ }
+ }
+ if (LErr)
+ error(std::move(LErr), MachO.getFileName());
+ // Set the symbol names and indirect names for the added symbols.
+ if (LazysAdded) {
+ LOS.flush();
+ const char *Q = LazysNameBuffer.c_str();
+ for (unsigned K = 0; K < LazysAdded; K++) {
+ SymbolList[I].Name = Q;
+ Q += strlen(Q) + 1;
+ if (SymbolList[I].TypeChar == 'I') {
+ SymbolList[I].IndirectName = Q;
+ Q += strlen(Q) + 1;
+ }
+ I++;
+ }
+ }
+
+ // Add the undefineds symbol from the weak bind entries which are not
+ // strong symbols.
+ unsigned WeaksAdded = 0;
+ Error WErr = Error::success();
+ LastSymbolName = StringRef();
+ for (const llvm::object::MachOBindEntry &Entry :
+ MachO.weakBindTable(WErr)) {
+ bool found = false;
+ unsigned J = 0;
+ if (LastSymbolName == Entry.symbolName() ||
+ Entry.flags() & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) {
+ found = true;
+ } else {
+ for (J = 0; J < SymbolList.size() && !found; ++J) {
+ if (SymbolList[J].Name == Entry.symbolName()) {
+ found = true;
+ break;
+ }
+ }
+ }
+ if (!found) {
+ LastSymbolName = Entry.symbolName();
+ NMSymbol W = {};
+ W.Name = Entry.symbolName().str();
+ W.Address = 0;
+ W.Size = 0;
+ W.TypeChar = 'U';
+ // There is no symbol in the nlist symbol table for this so we set
+ // Sym effectivly to null and the rest of code in here must test for
+ // it and not do things like Sym.getFlags() for it.
+ W.Sym = BasicSymbolRef();
+ W.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
+ W.NType = MachO::N_EXT | MachO::N_UNDF;
+ W.NSect = 0;
+ // Odd that we are using N_WEAK_DEF on an undefined symbol but that is
+ // what is created in this case by the linker when there are real
+ // symbols in the nlist structs.
+ W.NDesc = MachO::N_WEAK_DEF;
+ SymbolList.push_back(W);
+ WOS << Entry.symbolName();
+ WOS << '\0';
+ WeaksAdded++;
+ } else {
+ // This is the case the symbol was previously been found and it could
+ // have been added from a bind or lazy bind symbol. If so and not
+ // a definition also mark it as weak.
+ if (SymbolList[J].TypeChar == 'U')
+ // See comment above about N_WEAK_DEF.
+ SymbolList[J].NDesc |= MachO::N_WEAK_DEF;
+ }
+ }
+ if (WErr)
+ error(std::move(WErr), MachO.getFileName());
+ // Set the symbol names and indirect names for the added symbols.
+ if (WeaksAdded) {
+ WOS.flush();
+ const char *Q = WeaksNameBuffer.c_str();
+ for (unsigned K = 0; K < WeaksAdded; K++) {
+ SymbolList[I].Name = Q;
+ Q += strlen(Q) + 1;
+ if (SymbolList[I].TypeChar == 'I') {
+ SymbolList[I].IndirectName = Q;
+ Q += strlen(Q) + 1;
+ }
+ I++;
+ }
+ }
+
+ // Trying adding symbol from the function starts table and LC_MAIN entry
+ // point.
+ SmallVector<uint64_t, 8> FoundFns;
+ uint64_t lc_main_offset = UINT64_MAX;
+ for (const auto &Command : MachO.load_commands()) {
+ if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
+ // We found a function starts segment, parse the addresses for
+ // consumption.
+ MachO::linkedit_data_command LLC =
+ MachO.getLinkeditDataLoadCommand(Command);
+
+ MachO.ReadULEB128s(LLC.dataoff, FoundFns);
+ } else if (Command.C.cmd == MachO::LC_MAIN) {
+ MachO::entry_point_command LCmain = MachO.getEntryPointCommand(Command);
+ lc_main_offset = LCmain.entryoff;
+ }
+ }
+ // See if these addresses are already in the symbol table.
+ unsigned FunctionStartsAdded = 0;
+ for (uint64_t f = 0; f < FoundFns.size(); f++) {
+ bool found = false;
+ for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
+ if (SymbolList[J].Address == FoundFns[f] + BaseSegmentAddress)
+ found = true;
+ }
+ // See this address is not already in the symbol table fake up an
+ // nlist for it.
+ if (!found) {
+ NMSymbol F = {};
+ F.Name = "<redacted function X>";
+ F.Address = FoundFns[f] + BaseSegmentAddress;
+ F.Size = 0;
+ // There is no symbol in the nlist symbol table for this so we set
+ // Sym effectivly to null and the rest of code in here must test for
+ // it and not do things like Sym.getFlags() for it.
+ F.Sym = BasicSymbolRef();
+ F.SymFlags = 0;
+ F.NType = MachO::N_SECT;
+ F.NSect = 0;
+ StringRef SegmentName = StringRef();
+ StringRef SectionName = StringRef();
+ for (const SectionRef &Section : MachO.sections()) {
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ SectionName = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
+ SegmentName =
+ MachO.getSectionFinalSegmentName(Section.getRawDataRefImpl());
+ F.NSect++;
+ if (F.Address >= Section.getAddress() &&
+ F.Address < Section.getAddress() + Section.getSize()) {
+ F.Section = Section;
+ break;
+ }
+ }
+ if (SegmentName == "__TEXT" && SectionName == "__text")
+ F.TypeChar = 't';
+ else if (SegmentName == "__DATA" && SectionName == "__data")
+ F.TypeChar = 'd';
+ else if (SegmentName == "__DATA" && SectionName == "__bss")
+ F.TypeChar = 'b';
+ else
+ F.TypeChar = 's';
+ F.NDesc = 0;
+ SymbolList.push_back(F);
+ if (FoundFns[f] == lc_main_offset)
+ FOS << "<redacted LC_MAIN>";
+ else
+ FOS << "<redacted function " << f << ">";
+ FOS << '\0';
+ FunctionStartsAdded++;
+ }
+ }
+ if (FunctionStartsAdded) {
+ FOS.flush();
+ const char *Q = FunctionStartsNameBuffer.c_str();
+ for (unsigned K = 0; K < FunctionStartsAdded; K++) {
+ SymbolList[I].Name = Q;
+ Q += strlen(Q) + 1;
+ if (SymbolList[I].TypeChar == 'I') {
+ SymbolList[I].IndirectName = Q;
+ Q += strlen(Q) + 1;
+ }
+ I++;
+ }
+ }
+ }
+}
+
+namespace {
+struct SymbolVersion {
+ std::string Name;
+ bool IsDefault;
+};
+} // namespace
+
+template <class ELFT>
+static Expected<std::vector<SymbolVersion>>
+readSymbolVersionsELF(const ELFFile<ELFT> &Obj, StringRef FileName,
+ ELFObjectFileBase::elf_symbol_iterator_range Symbols) {
+ using Elf_Shdr = typename ELFT::Shdr;
+
+ // We called sections() earlier, so can't fail here.
+ typename ELFT::ShdrRange SectionsOrErr = cantFail(Obj.sections());
+ const Elf_Shdr *SymVerSec = nullptr;
+ const Elf_Shdr *SymVerNeedSec = nullptr;
+ const Elf_Shdr *SymVerDefSec = nullptr;
+ for (const Elf_Shdr &Sec : SectionsOrErr) {
+ if (Sec.sh_type == ELF::SHT_GNU_versym)
+ SymVerSec = &Sec;
+ else if (Sec.sh_type == ELF::SHT_GNU_verdef)
+ SymVerDefSec = &Sec;
+ else if (Sec.sh_type == ELF::SHT_GNU_verneed)
+ SymVerNeedSec = &Sec;
+ }
+
+ if (!SymVerSec)
+ return std::vector<SymbolVersion>{};
+
+ Expected<SmallVector<Optional<VersionEntry>, 0>> MapOrErr =
+ Obj.loadVersionMap(SymVerNeedSec, SymVerDefSec);
+ if (!MapOrErr)
+ return MapOrErr.takeError();
+
+ std::vector<SymbolVersion> Ret;
+ size_t I = 0;
+ for (auto It = Symbols.begin(), E = Symbols.end(); It != E; ++It) {
+ ++I;
+ Expected<const typename ELFT::Versym *> VerEntryOrErr =
+ Obj.template getEntry<typename ELFT::Versym>(*SymVerSec, I);
+ if (!VerEntryOrErr)
+ return createError("unable to read an entry with index " + Twine(I) +
+ " from " + describe(Obj, *SymVerSec) + ": " +
+ toString(VerEntryOrErr.takeError()));
+
+ Expected<uint32_t> FlagsOrErr = It->getFlags();
+ if (!FlagsOrErr)
+ return createError("unable to read flags for symbol with index " +
+ Twine(I) + ": " + toString(FlagsOrErr.takeError()));
+
+ bool IsDefault;
+ Expected<StringRef> VerOrErr = Obj.getSymbolVersionByIndex(
+ (*VerEntryOrErr)->vs_index, IsDefault, *MapOrErr,
+ (*FlagsOrErr) & SymbolRef::SF_Undefined);
+ if (!VerOrErr)
+ return createError("unable to get a version for entry " + Twine(I) +
+ " of " + describe(Obj, *SymVerSec) + ": " +
+ toString(VerOrErr.takeError()));
+
+ Ret.push_back({(*VerOrErr).str(), IsDefault});
+ }
+
+ return Ret;
+}
+
+static Expected<std::vector<SymbolVersion>>
+readSymbolVersionsELF(const ELFObjectFileBase &Obj,
+ ELFObjectFileBase::elf_symbol_iterator_range Symbols) {
+ if (const auto *ELF = dyn_cast<ELF32LEObjectFile>(&Obj))
+ return readSymbolVersionsELF(ELF->getELFFile(), Obj.getFileName(), Symbols);
+ else if (const auto *ELF = dyn_cast<ELF32BEObjectFile>(&Obj))
+ return readSymbolVersionsELF(ELF->getELFFile(), Obj.getFileName(), Symbols);
+ else if (const auto *ELF = dyn_cast<ELF64LEObjectFile>(&Obj))
+ return readSymbolVersionsELF(ELF->getELFFile(), Obj.getFileName(), Symbols);
+ return readSymbolVersionsELF(cast<ELF64BEObjectFile>(&Obj)->getELFFile(),
+ Obj.getFileName(), Symbols);
+}
+
static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
StringRef ArchiveName = {},
StringRef ArchitectureName = {}) {
auto Symbols = Obj.symbols();
+ std::vector<SymbolVersion> SymbolVersions;
if (DynamicSyms) {
const auto *E = dyn_cast<ELFObjectFileBase>(&Obj);
if (!E) {
@@ -1229,9 +1777,16 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
return;
}
Symbols = E->getDynamicSymbolIterators();
+
+ if (Expected<std::vector<SymbolVersion>> VersionsOrErr =
+ readSymbolVersionsELF(*E, Symbols))
+ SymbolVersions = std::move(*VersionsOrErr);
+ else
+ WithColor::warning(errs(), ToolName)
+ << "unable to read symbol versions: "
+ << toString(VersionsOrErr.takeError()) << "\n";
}
- std::string NameBuffer;
- raw_string_ostream OS(NameBuffer);
+
// If a "-s segname sectname" option was specified and this is a Mach-O
// file get the section number for that section in this object file.
unsigned int Nsect = 0;
@@ -1243,7 +1798,9 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
return;
}
if (!(MachO && DyldInfoOnly)) {
+ size_t I = -1;
for (BasicSymbolRef Sym : Symbols) {
+ ++I;
Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
if (!SymFlagsOrErr) {
error(SymFlagsOrErr.takeError(), Obj.getFileName());
@@ -1274,6 +1831,8 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
S.TypeName = getNMTypeName(Obj, Sym);
S.TypeChar = getNMSectionTagAndName(Obj, Sym, S.SectionName);
+
+ raw_string_ostream OS(S.Name);
if (Error E = Sym.printName(OS)) {
if (MachO) {
OS << "bad string index";
@@ -1281,499 +1840,23 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
} else
error(std::move(E), Obj.getFileName());
}
- OS << '\0';
+ if (!SymbolVersions.empty() && !SymbolVersions[I].Name.empty())
+ S.Name +=
+ (SymbolVersions[I].IsDefault ? "@@" : "@") + SymbolVersions[I].Name;
+
S.Sym = Sym;
SymbolList.push_back(S);
}
}
- OS.flush();
- const char *P = NameBuffer.c_str();
- unsigned I;
- for (I = 0; I < SymbolList.size(); ++I) {
- SymbolList[I].Name = P;
- P += strlen(P) + 1;
- }
-
// If this is a Mach-O file where the nlist symbol table is out of sync
// with the dyld export trie then look through exports and fake up symbols
// for the ones that are missing (also done with the -add-dyldinfo flag).
// This is needed if strip(1) -T is run on a binary containing swift
// language symbols for example. The option -only-dyldinfo will fake up
// all symbols from the dyld export trie as well as the bind info.
- std::string ExportsNameBuffer;
- raw_string_ostream EOS(ExportsNameBuffer);
- std::string BindsNameBuffer;
- raw_string_ostream BOS(BindsNameBuffer);
- std::string LazysNameBuffer;
- raw_string_ostream LOS(LazysNameBuffer);
- std::string WeaksNameBuffer;
- raw_string_ostream WOS(WeaksNameBuffer);
- std::string FunctionStartsNameBuffer;
- raw_string_ostream FOS(FunctionStartsNameBuffer);
- if (MachO && !NoDyldInfo) {
- MachO::mach_header H;
- MachO::mach_header_64 H_64;
- uint32_t HFlags = 0;
- if (MachO->is64Bit()) {
- H_64 = MachO->MachOObjectFile::getHeader64();
- HFlags = H_64.flags;
- } else {
- H = MachO->MachOObjectFile::getHeader();
- HFlags = H.flags;
- }
- uint64_t BaseSegmentAddress = 0;
- for (const auto &Command : MachO->load_commands()) {
- if (Command.C.cmd == MachO::LC_SEGMENT) {
- MachO::segment_command Seg = MachO->getSegmentLoadCommand(Command);
- if (Seg.fileoff == 0 && Seg.filesize != 0) {
- BaseSegmentAddress = Seg.vmaddr;
- break;
- }
- } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
- MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Command);
- if (Seg.fileoff == 0 && Seg.filesize != 0) {
- BaseSegmentAddress = Seg.vmaddr;
- break;
- }
- }
- }
- if (DyldInfoOnly || AddDyldInfo ||
- HFlags & MachO::MH_NLIST_OUTOFSYNC_WITH_DYLDINFO) {
- unsigned ExportsAdded = 0;
- Error Err = Error::success();
- for (const llvm::object::ExportEntry &Entry : MachO->exports(Err)) {
- bool found = false;
- bool ReExport = false;
- if (!DyldInfoOnly) {
- for (const NMSymbol &S : SymbolList)
- if (S.Address == Entry.address() + BaseSegmentAddress &&
- S.Name == Entry.name()) {
- found = true;
- break;
- }
- }
- if (!found) {
- NMSymbol S = {};
- S.Address = Entry.address() + BaseSegmentAddress;
- S.Size = 0;
- S.TypeChar = '\0';
- S.Name = Entry.name();
- // There is no symbol in the nlist symbol table for this so we set
- // Sym effectivly to null and the rest of code in here must test for
- // it and not do things like Sym.getFlags() for it.
- S.Sym = BasicSymbolRef();
- S.SymFlags = SymbolRef::SF_Global;
- S.Section = SectionRef();
- S.NType = 0;
- S.NSect = 0;
- S.NDesc = 0;
- S.IndirectName = StringRef();
-
- uint64_t EFlags = Entry.flags();
- bool Abs = ((EFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) ==
- MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE);
- bool Resolver = (EFlags &
- MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER);
- ReExport = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT);
- bool WeakDef = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
- if (WeakDef)
- S.NDesc |= MachO::N_WEAK_DEF;
- if (Abs) {
- S.NType = MachO::N_EXT | MachO::N_ABS;
- S.TypeChar = 'A';
- } else if (ReExport) {
- S.NType = MachO::N_EXT | MachO::N_INDR;
- S.TypeChar = 'I';
- } else {
- S.NType = MachO::N_EXT | MachO::N_SECT;
- if (Resolver) {
- S.Address = Entry.other() + BaseSegmentAddress;
- if ((S.Address & 1) != 0 &&
- !MachO->is64Bit() && H.cputype == MachO::CPU_TYPE_ARM){
- S.Address &= ~1LL;
- S.NDesc |= MachO::N_ARM_THUMB_DEF;
- }
- } else {
- S.Address = Entry.address() + BaseSegmentAddress;
- }
- StringRef SegmentName = StringRef();
- StringRef SectionName = StringRef();
- for (const SectionRef &Section : MachO->sections()) {
- S.NSect++;
-
- if (Expected<StringRef> NameOrErr = Section.getName())
- SectionName = *NameOrErr;
- else
- consumeError(NameOrErr.takeError());
-
- SegmentName = MachO->getSectionFinalSegmentName(
- Section.getRawDataRefImpl());
- if (S.Address >= Section.getAddress() &&
- S.Address < Section.getAddress() + Section.getSize()) {
- S.Section = Section;
- break;
- } else if (Entry.name() == "__mh_execute_header" &&
- SegmentName == "__TEXT" && SectionName == "__text") {
- S.Section = Section;
- S.NDesc |= MachO::REFERENCED_DYNAMICALLY;
- break;
- }
- }
- if (SegmentName == "__TEXT" && SectionName == "__text")
- S.TypeChar = 'T';
- else if (SegmentName == "__DATA" && SectionName == "__data")
- S.TypeChar = 'D';
- else if (SegmentName == "__DATA" && SectionName == "__bss")
- S.TypeChar = 'B';
- else
- S.TypeChar = 'S';
- }
- SymbolList.push_back(S);
-
- EOS << Entry.name();
- EOS << '\0';
- ExportsAdded++;
-
- // For ReExports there are a two more things to do, first add the
- // indirect name and second create the undefined symbol using the
- // referened dynamic library.
- if (ReExport) {
-
- // Add the indirect name.
- if (Entry.otherName().empty())
- EOS << Entry.name();
- else
- EOS << Entry.otherName();
- EOS << '\0';
-
- // Now create the undefined symbol using the referened dynamic
- // library.
- NMSymbol U = {};
- U.Address = 0;
- U.Size = 0;
- U.TypeChar = 'U';
- if (Entry.otherName().empty())
- U.Name = Entry.name();
- else
- U.Name = Entry.otherName();
- // Again there is no symbol in the nlist symbol table for this so
- // we set Sym effectivly to null and the rest of code in here must
- // test for it and not do things like Sym.getFlags() for it.
- U.Sym = BasicSymbolRef();
- U.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
- U.Section = SectionRef();
- U.NType = MachO::N_EXT | MachO::N_UNDF;
- U.NSect = 0;
- U.NDesc = 0;
- // The library ordinal for this undefined symbol is in the export
- // trie Entry.other().
- MachO::SET_LIBRARY_ORDINAL(U.NDesc, Entry.other());
- U.IndirectName = StringRef();
- SymbolList.push_back(U);
-
- // Finally add the undefined symbol's name.
- if (Entry.otherName().empty())
- EOS << Entry.name();
- else
- EOS << Entry.otherName();
- EOS << '\0';
- ExportsAdded++;
- }
- }
- }
- if (Err)
- error(std::move(Err), MachO->getFileName());
- // Set the symbol names and indirect names for the added symbols.
- if (ExportsAdded) {
- EOS.flush();
- const char *Q = ExportsNameBuffer.c_str();
- for (unsigned K = 0; K < ExportsAdded; K++) {
- SymbolList[I].Name = Q;
- Q += strlen(Q) + 1;
- if (SymbolList[I].TypeChar == 'I') {
- SymbolList[I].IndirectName = Q;
- Q += strlen(Q) + 1;
- }
- I++;
- }
- }
-
- // Add the undefined symbols from the bind entries.
- unsigned BindsAdded = 0;
- Error BErr = Error::success();
- StringRef LastSymbolName = StringRef();
- for (const llvm::object::MachOBindEntry &Entry : MachO->bindTable(BErr)) {
- bool found = false;
- if (LastSymbolName == Entry.symbolName())
- found = true;
- else if(!DyldInfoOnly) {
- for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
- if (SymbolList[J].Name == Entry.symbolName())
- found = true;
- }
- }
- if (!found) {
- LastSymbolName = Entry.symbolName();
- NMSymbol B = {};
- B.Address = 0;
- B.Size = 0;
- B.TypeChar = 'U';
- // There is no symbol in the nlist symbol table for this so we set
- // Sym effectivly to null and the rest of code in here must test for
- // it and not do things like Sym.getFlags() for it.
- B.Sym = BasicSymbolRef();
- B.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
- B.NType = MachO::N_EXT | MachO::N_UNDF;
- B.NSect = 0;
- B.NDesc = 0;
- MachO::SET_LIBRARY_ORDINAL(B.NDesc, Entry.ordinal());
- B.IndirectName = StringRef();
- B.Name = Entry.symbolName();
- SymbolList.push_back(B);
- BOS << Entry.symbolName();
- BOS << '\0';
- BindsAdded++;
- }
- }
- if (BErr)
- error(std::move(BErr), MachO->getFileName());
- // Set the symbol names and indirect names for the added symbols.
- if (BindsAdded) {
- BOS.flush();
- const char *Q = BindsNameBuffer.c_str();
- for (unsigned K = 0; K < BindsAdded; K++) {
- SymbolList[I].Name = Q;
- Q += strlen(Q) + 1;
- if (SymbolList[I].TypeChar == 'I') {
- SymbolList[I].IndirectName = Q;
- Q += strlen(Q) + 1;
- }
- I++;
- }
- }
-
- // Add the undefined symbols from the lazy bind entries.
- unsigned LazysAdded = 0;
- Error LErr = Error::success();
- LastSymbolName = StringRef();
- for (const llvm::object::MachOBindEntry &Entry :
- MachO->lazyBindTable(LErr)) {
- bool found = false;
- if (LastSymbolName == Entry.symbolName())
- found = true;
- else {
- // Here we must check to see it this symbol is already in the
- // SymbolList as it might have already have been added above via a
- // non-lazy (bind) entry.
- for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
- if (SymbolList[J].Name == Entry.symbolName())
- found = true;
- }
- }
- if (!found) {
- LastSymbolName = Entry.symbolName();
- NMSymbol L = {};
- L.Name = Entry.symbolName();
- L.Address = 0;
- L.Size = 0;
- L.TypeChar = 'U';
- // There is no symbol in the nlist symbol table for this so we set
- // Sym effectivly to null and the rest of code in here must test for
- // it and not do things like Sym.getFlags() for it.
- L.Sym = BasicSymbolRef();
- L.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
- L.NType = MachO::N_EXT | MachO::N_UNDF;
- L.NSect = 0;
- // The REFERENCE_FLAG_UNDEFINED_LAZY is no longer used but here it
- // makes sence since we are creating this from a lazy bind entry.
- L.NDesc = MachO::REFERENCE_FLAG_UNDEFINED_LAZY;
- MachO::SET_LIBRARY_ORDINAL(L.NDesc, Entry.ordinal());
- L.IndirectName = StringRef();
- SymbolList.push_back(L);
- LOS << Entry.symbolName();
- LOS << '\0';
- LazysAdded++;
- }
- }
- if (LErr)
- error(std::move(LErr), MachO->getFileName());
- // Set the symbol names and indirect names for the added symbols.
- if (LazysAdded) {
- LOS.flush();
- const char *Q = LazysNameBuffer.c_str();
- for (unsigned K = 0; K < LazysAdded; K++) {
- SymbolList[I].Name = Q;
- Q += strlen(Q) + 1;
- if (SymbolList[I].TypeChar == 'I') {
- SymbolList[I].IndirectName = Q;
- Q += strlen(Q) + 1;
- }
- I++;
- }
- }
-
- // Add the undefineds symbol from the weak bind entries which are not
- // strong symbols.
- unsigned WeaksAdded = 0;
- Error WErr = Error::success();
- LastSymbolName = StringRef();
- for (const llvm::object::MachOBindEntry &Entry :
- MachO->weakBindTable(WErr)) {
- bool found = false;
- unsigned J = 0;
- if (LastSymbolName == Entry.symbolName() ||
- Entry.flags() & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) {
- found = true;
- } else {
- for (J = 0; J < SymbolList.size() && !found; ++J) {
- if (SymbolList[J].Name == Entry.symbolName()) {
- found = true;
- break;
- }
- }
- }
- if (!found) {
- LastSymbolName = Entry.symbolName();
- NMSymbol W;
- memset(&W, '\0', sizeof(NMSymbol));
- W.Name = Entry.symbolName();
- W.Address = 0;
- W.Size = 0;
- W.TypeChar = 'U';
- // There is no symbol in the nlist symbol table for this so we set
- // Sym effectivly to null and the rest of code in here must test for
- // it and not do things like Sym.getFlags() for it.
- W.Sym = BasicSymbolRef();
- W.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
- W.NType = MachO::N_EXT | MachO::N_UNDF;
- W.NSect = 0;
- // Odd that we are using N_WEAK_DEF on an undefined symbol but that is
- // what is created in this case by the linker when there are real
- // symbols in the nlist structs.
- W.NDesc = MachO::N_WEAK_DEF;
- W.IndirectName = StringRef();
- SymbolList.push_back(W);
- WOS << Entry.symbolName();
- WOS << '\0';
- WeaksAdded++;
- } else {
- // This is the case the symbol was previously been found and it could
- // have been added from a bind or lazy bind symbol. If so and not
- // a definition also mark it as weak.
- if (SymbolList[J].TypeChar == 'U')
- // See comment above about N_WEAK_DEF.
- SymbolList[J].NDesc |= MachO::N_WEAK_DEF;
- }
- }
- if (WErr)
- error(std::move(WErr), MachO->getFileName());
- // Set the symbol names and indirect names for the added symbols.
- if (WeaksAdded) {
- WOS.flush();
- const char *Q = WeaksNameBuffer.c_str();
- for (unsigned K = 0; K < WeaksAdded; K++) {
- SymbolList[I].Name = Q;
- Q += strlen(Q) + 1;
- if (SymbolList[I].TypeChar == 'I') {
- SymbolList[I].IndirectName = Q;
- Q += strlen(Q) + 1;
- }
- I++;
- }
- }
-
- // Trying adding symbol from the function starts table and LC_MAIN entry
- // point.
- SmallVector<uint64_t, 8> FoundFns;
- uint64_t lc_main_offset = UINT64_MAX;
- for (const auto &Command : MachO->load_commands()) {
- if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
- // We found a function starts segment, parse the addresses for
- // consumption.
- MachO::linkedit_data_command LLC =
- MachO->getLinkeditDataLoadCommand(Command);
-
- MachO->ReadULEB128s(LLC.dataoff, FoundFns);
- } else if (Command.C.cmd == MachO::LC_MAIN) {
- MachO::entry_point_command LCmain =
- MachO->getEntryPointCommand(Command);
- lc_main_offset = LCmain.entryoff;
- }
- }
- // See if these addresses are already in the symbol table.
- unsigned FunctionStartsAdded = 0;
- for (uint64_t f = 0; f < FoundFns.size(); f++) {
- bool found = false;
- for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
- if (SymbolList[J].Address == FoundFns[f] + BaseSegmentAddress)
- found = true;
- }
- // See this address is not already in the symbol table fake up an
- // nlist for it.
- if (!found) {
- NMSymbol F = {};
- F.Name = "<redacted function X>";
- F.Address = FoundFns[f] + BaseSegmentAddress;
- F.Size = 0;
- // There is no symbol in the nlist symbol table for this so we set
- // Sym effectivly to null and the rest of code in here must test for
- // it and not do things like Sym.getFlags() for it.
- F.Sym = BasicSymbolRef();
- F.SymFlags = 0;
- F.NType = MachO::N_SECT;
- F.NSect = 0;
- StringRef SegmentName = StringRef();
- StringRef SectionName = StringRef();
- for (const SectionRef &Section : MachO->sections()) {
- if (Expected<StringRef> NameOrErr = Section.getName())
- SectionName = *NameOrErr;
- else
- consumeError(NameOrErr.takeError());
-
- SegmentName = MachO->getSectionFinalSegmentName(
- Section.getRawDataRefImpl());
- F.NSect++;
- if (F.Address >= Section.getAddress() &&
- F.Address < Section.getAddress() + Section.getSize()) {
- F.Section = Section;
- break;
- }
- }
- if (SegmentName == "__TEXT" && SectionName == "__text")
- F.TypeChar = 't';
- else if (SegmentName == "__DATA" && SectionName == "__data")
- F.TypeChar = 'd';
- else if (SegmentName == "__DATA" && SectionName == "__bss")
- F.TypeChar = 'b';
- else
- F.TypeChar = 's';
- F.NDesc = 0;
- F.IndirectName = StringRef();
- SymbolList.push_back(F);
- if (FoundFns[f] == lc_main_offset)
- FOS << "<redacted LC_MAIN>";
- else
- FOS << "<redacted function " << f << ">";
- FOS << '\0';
- FunctionStartsAdded++;
- }
- }
- if (FunctionStartsAdded) {
- FOS.flush();
- const char *Q = FunctionStartsNameBuffer.c_str();
- for (unsigned K = 0; K < FunctionStartsAdded; K++) {
- SymbolList[I].Name = Q;
- Q += strlen(Q) + 1;
- if (SymbolList[I].TypeChar == 'I') {
- SymbolList[I].IndirectName = Q;
- Q += strlen(Q) + 1;
- }
- I++;
- }
- }
- }
- }
+ if (MachO && !NoDyldInfo)
+ dumpSymbolsFromDLInfoMachO(*MachO);
CurrentFilename = Obj.getFileName();
@@ -1810,9 +1893,7 @@ static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) {
&McpuDefault, &ArchFlag);
}
const std::string ArchFlagName(ArchFlag);
- if (none_of(ArchFlags, [&](const std::string &Name) {
- return Name == ArchFlagName;
- })) {
+ if (!llvm::is_contained(ArchFlags, ArchFlagName)) {
error("No architecture specified", Filename);
return false;
}
@@ -2114,8 +2195,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
for (const TapiUniversal::ObjectForArch &I : TU->objects()) {
StringRef ArchName = I.getArchFlagName();
const bool ShowArch =
- ArchFlags.empty() ||
- any_of(ArchFlags, [&](StringRef Name) { return Name == ArchName; });
+ ArchFlags.empty() || llvm::is_contained(ArchFlags, ArchName);
if (!ShowArch)
continue;
if (!AddInlinedInfo && !I.isTopLevelLib())
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/BitcodeStripOpts.td b/contrib/llvm-project/llvm/tools/llvm-objcopy/BitcodeStripOpts.td
new file mode 100644
index 000000000000..cc178164b03c
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/BitcodeStripOpts.td
@@ -0,0 +1,24 @@
+//===-- BitcodeStripOpts.td - llvm-bitcode-strip options ---------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the command line options of llvm-bitcode-strip.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Option/OptParser.td"
+
+def help : Flag<["--"], "help">;
+
+def h : Flag<["-"], "h">, Alias<help>;
+
+def version : Flag<["--"], "version">,
+ HelpText<"Print the version and exit.">;
+
+def V : Flag<["-"], "V">,
+ Alias<version>,
+ HelpText<"Alias for --version">;
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
index 43ec2b1fa82f..b5de8a45a80f 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -12,7 +12,6 @@
#include "Object.h"
#include "Reader.h"
#include "Writer.h"
-#include "llvm-objcopy.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
@@ -40,11 +39,12 @@ static uint64_t getNextRVA(const Object &Obj) {
Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1);
}
-static std::vector<uint8_t> createGnuDebugLinkSectionContents(StringRef File) {
+static Expected<std::vector<uint8_t>>
+createGnuDebugLinkSectionContents(StringRef File) {
ErrorOr<std::unique_ptr<MemoryBuffer>> LinkTargetOrErr =
MemoryBuffer::getFile(File);
if (!LinkTargetOrErr)
- error("'" + File + "': " + LinkTargetOrErr.getError().message());
+ return createFileError(File, LinkTargetOrErr.getError());
auto LinkTarget = std::move(*LinkTargetOrErr);
uint32_t CRC32 = llvm::crc32(arrayRefFromStringRef(LinkTarget->getBuffer()));
@@ -81,12 +81,17 @@ static void addSection(Object &Obj, StringRef Name, ArrayRef<uint8_t> Contents,
Obj.addSections(Sec);
}
-static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
- std::vector<uint8_t> Contents =
+static Error addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
+ Expected<std::vector<uint8_t>> Contents =
createGnuDebugLinkSectionContents(DebugLinkFile);
- addSection(Obj, ".gnu_debuglink", Contents,
+ if (!Contents)
+ return Contents.takeError();
+
+ addSection(Obj, ".gnu_debuglink", *Contents,
IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
IMAGE_SCN_MEM_DISCARDABLE);
+
+ return Error::success();
}
static void setSectionFlags(Section &Sec, SectionFlag AllFlags) {
@@ -174,8 +179,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
Sym.Name = I->getValue();
}
- // Actually do removals of symbols.
- Obj.removeSymbols([&](const Symbol &Sym) {
+ auto ToRemove = [&](const Symbol &Sym) -> Expected<bool> {
// For StripAll, all relocations have been stripped and we remove all
// symbols.
if (Config.StripAll || Config.StripAllGNU)
@@ -184,11 +188,10 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
if (Config.SymbolsToRemove.matches(Sym.Name)) {
// Explicitly removing a referenced symbol is an error.
if (Sym.Referenced)
- reportError(Config.OutputFilename,
- createStringError(llvm::errc::invalid_argument,
- "not stripping symbol '%s' because it is "
- "named in a relocation",
- Sym.Name.str().c_str()));
+ return createStringError(
+ llvm::errc::invalid_argument,
+ "'" + Config.OutputFilename + "': not stripping symbol '" +
+ Sym.Name.str() + "' because it is named in a relocation");
return true;
}
@@ -213,7 +216,11 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
}
return false;
- });
+ };
+
+ // Actually do removals of symbols.
+ if (Error Err = Obj.removeSymbols(ToRemove))
+ return Err;
if (!Config.SetSectionFlags.empty())
for (Section &Sec : Obj.getMutableSections()) {
@@ -239,7 +246,8 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
}
if (!Config.AddGnuDebugLink.empty())
- addGnuDebugLink(Obj, Config.AddGnuDebugLink);
+ if (Error E = addGnuDebugLink(Obj, Config.AddGnuDebugLink))
+ return E;
if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.cpp
index b07532c1dc39..1c17b8408ee7 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.cpp
@@ -31,18 +31,23 @@ void Object::updateSymbols() {
}
const Symbol *Object::findSymbol(size_t UniqueId) const {
- auto It = SymbolMap.find(UniqueId);
- if (It == SymbolMap.end())
- return nullptr;
- return It->second;
+ return SymbolMap.lookup(UniqueId);
}
-void Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
- Symbols.erase(
- std::remove_if(std::begin(Symbols), std::end(Symbols),
- [ToRemove](const Symbol &Sym) { return ToRemove(Sym); }),
- std::end(Symbols));
+Error Object::removeSymbols(
+ function_ref<Expected<bool>(const Symbol &)> ToRemove) {
+ Error Errs = Error::success();
+ llvm::erase_if(Symbols, [ToRemove, &Errs](const Symbol &Sym) {
+ Expected<bool> ShouldRemove = ToRemove(Sym);
+ if (!ShouldRemove) {
+ Errs = joinErrors(std::move(Errs), ShouldRemove.takeError());
+ return false;
+ }
+ return *ShouldRemove;
+ });
+
updateSymbols();
+ return Errs;
}
Error Object::markSymbols() {
@@ -78,44 +83,34 @@ void Object::updateSections() {
}
const Section *Object::findSection(ssize_t UniqueId) const {
- auto It = SectionMap.find(UniqueId);
- if (It == SectionMap.end())
- return nullptr;
- return It->second;
+ return SectionMap.lookup(UniqueId);
}
void Object::removeSections(function_ref<bool(const Section &)> ToRemove) {
DenseSet<ssize_t> AssociatedSections;
auto RemoveAssociated = [&AssociatedSections](const Section &Sec) {
- return AssociatedSections.count(Sec.UniqueId) == 1;
+ return AssociatedSections.contains(Sec.UniqueId);
};
do {
DenseSet<ssize_t> RemovedSections;
- Sections.erase(
- std::remove_if(std::begin(Sections), std::end(Sections),
- [ToRemove, &RemovedSections](const Section &Sec) {
- bool Remove = ToRemove(Sec);
- if (Remove)
- RemovedSections.insert(Sec.UniqueId);
- return Remove;
- }),
- std::end(Sections));
+ llvm::erase_if(Sections, [ToRemove, &RemovedSections](const Section &Sec) {
+ bool Remove = ToRemove(Sec);
+ if (Remove)
+ RemovedSections.insert(Sec.UniqueId);
+ return Remove;
+ });
// Remove all symbols referring to the removed sections.
AssociatedSections.clear();
- Symbols.erase(
- std::remove_if(
- std::begin(Symbols), std::end(Symbols),
- [&RemovedSections, &AssociatedSections](const Symbol &Sym) {
- // If there are sections that are associative to a removed
- // section,
- // remove those as well as nothing will include them (and we can't
- // leave them dangling).
- if (RemovedSections.count(Sym.AssociativeComdatTargetSectionId) ==
- 1)
- AssociatedSections.insert(Sym.TargetSectionId);
- return RemovedSections.count(Sym.TargetSectionId) == 1;
- }),
- std::end(Symbols));
+ llvm::erase_if(
+ Symbols, [&RemovedSections, &AssociatedSections](const Symbol &Sym) {
+ // If there are sections that are associative to a removed
+ // section,
+ // remove those as well as nothing will include them (and we can't
+ // leave them dangling).
+ if (RemovedSections.count(Sym.AssociativeComdatTargetSectionId) == 1)
+ AssociatedSections.insert(Sym.TargetSectionId);
+ return RemovedSections.contains(Sym.TargetSectionId);
+ });
ToRemove = RemoveAssociated;
} while (!AssociatedSections.empty());
updateSections();
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h
index 78f8da00b8cd..0e854b58cbdb 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h
@@ -26,7 +26,7 @@ namespace coff {
struct Relocation {
Relocation() = default;
- Relocation(const object::coff_relocation& R) : Reloc(R) {}
+ Relocation(const object::coff_relocation &R) : Reloc(R) {}
object::coff_relocation Reloc;
size_t Target = 0;
@@ -116,7 +116,7 @@ struct Object {
const Symbol *findSymbol(size_t UniqueId) const;
void addSymbols(ArrayRef<Symbol> NewSymbols);
- void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
+ Error removeSymbols(function_ref<Expected<bool>(const Symbol &)> ToRemove);
// Set the Referenced field on all Symbols, based on relocations in
// all sections.
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp
index 1fde54dd290a..ba74759a34c2 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp
@@ -101,6 +101,43 @@ public:
InstallNameToolOptTable() : OptTable(InstallNameToolInfoTable) {}
};
+enum BitcodeStripID {
+ BITCODE_STRIP_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ BITCODE_STRIP_##ID,
+#include "BitcodeStripOpts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const BITCODE_STRIP_##NAME[] = VALUE;
+#include "BitcodeStripOpts.inc"
+#undef PREFIX
+
+static const opt::OptTable::Info BitcodeStripInfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ {BITCODE_STRIP_##PREFIX, \
+ NAME, \
+ HELPTEXT, \
+ METAVAR, \
+ BITCODE_STRIP_##ID, \
+ opt::Option::KIND##Class, \
+ PARAM, \
+ FLAGS, \
+ BITCODE_STRIP_##GROUP, \
+ BITCODE_STRIP_##ALIAS, \
+ ALIASARGS, \
+ VALUES},
+#include "BitcodeStripOpts.inc"
+#undef OPTION
+};
+
+class BitcodeStripOptTable : public opt::OptTable {
+public:
+ BitcodeStripOptTable() : OptTable(BitcodeStripInfoTable) {}
+};
+
enum StripID {
STRIP_INVALID = 0, // This is not an option ID.
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
@@ -395,7 +432,7 @@ template <class T> static ErrorOr<T> getAsInteger(StringRef Val) {
namespace {
-enum class ToolType { Objcopy, Strip, InstallNameTool };
+enum class ToolType { Objcopy, Strip, InstallNameTool, BitcodeStrip };
} // anonymous namespace
@@ -415,6 +452,10 @@ static void printHelp(const opt::OptTable &OptTable, raw_ostream &OS,
ToolName = "llvm-install-name-tool";
HelpText = " [options] input";
break;
+ case ToolType::BitcodeStrip:
+ ToolName = "llvm-bitcode-strip";
+ HelpText = " [options] input";
+ break;
}
OptTable.PrintHelp(OS, (ToolName + HelpText).str().c_str(),
(ToolName + " tool").str().c_str());
@@ -854,6 +895,9 @@ parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_add_rpath))
Config.RPathToAdd.push_back(Arg->getValue());
+ for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_prepend_rpath))
+ Config.RPathToPrepend.push_back(Arg->getValue());
+
for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_delete_rpath)) {
StringRef RPath = Arg->getValue();
@@ -861,7 +905,12 @@ parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
if (is_contained(Config.RPathToAdd, RPath))
return createStringError(
errc::invalid_argument,
- "cannot specify both -add_rpath %s and -delete_rpath %s",
+ "cannot specify both -add_rpath '%s' and -delete_rpath '%s'",
+ RPath.str().c_str(), RPath.str().c_str());
+ if (is_contained(Config.RPathToPrepend, RPath))
+ return createStringError(
+ errc::invalid_argument,
+ "cannot specify both -prepend_rpath '%s' and -delete_rpath '%s'",
RPath.str().c_str(), RPath.str().c_str());
Config.RPathsToRemove.insert(RPath);
@@ -881,33 +930,46 @@ parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
});
if (It1 != Config.RPathsToUpdate.end())
return createStringError(errc::invalid_argument,
- "cannot specify both -rpath " + It1->getFirst() +
- " " + It1->getSecond() + " and -rpath " +
- Old + " " + New);
+ "cannot specify both -rpath '" +
+ It1->getFirst() + "' '" + It1->getSecond() +
+ "' and -rpath '" + Old + "' '" + New + "'");
// Cannot specify the same rpath under both -delete_rpath and -rpath
auto It2 = find_if(Config.RPathsToRemove, Match);
if (It2 != Config.RPathsToRemove.end())
return createStringError(errc::invalid_argument,
- "cannot specify both -delete_rpath " + *It2 +
- " and -rpath " + Old + " " + New);
+ "cannot specify both -delete_rpath '" + *It2 +
+ "' and -rpath '" + Old + "' '" + New + "'");
// Cannot specify the same rpath under both -add_rpath and -rpath
auto It3 = find_if(Config.RPathToAdd, Match);
if (It3 != Config.RPathToAdd.end())
return createStringError(errc::invalid_argument,
- "cannot specify both -add_rpath " + *It3 +
- " and -rpath " + Old + " " + New);
+ "cannot specify both -add_rpath '" + *It3 +
+ "' and -rpath '" + Old + "' '" + New + "'");
+
+ // Cannot specify the same rpath under both -prepend_rpath and -rpath.
+ auto It4 = find_if(Config.RPathToPrepend, Match);
+ if (It4 != Config.RPathToPrepend.end())
+ return createStringError(errc::invalid_argument,
+ "cannot specify both -prepend_rpath '" + *It4 +
+ "' and -rpath '" + Old + "' '" + New + "'");
Config.RPathsToUpdate.insert({Old, New});
}
- if (auto *Arg = InputArgs.getLastArg(INSTALL_NAME_TOOL_id))
+ if (auto *Arg = InputArgs.getLastArg(INSTALL_NAME_TOOL_id)) {
Config.SharedLibId = Arg->getValue();
+ if (Config.SharedLibId->empty())
+ return createStringError(errc::invalid_argument,
+ "cannot specify an empty id");
+ }
- for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_change)) {
+ for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_change))
Config.InstallNamesToUpdate.insert({Arg->getValue(0), Arg->getValue(1)});
- }
+
+ Config.RemoveAllRpaths =
+ InputArgs.hasArg(INSTALL_NAME_TOOL_delete_all_rpaths);
SmallVector<StringRef, 2> Positional;
for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_UNKNOWN))
@@ -928,6 +990,50 @@ parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
return std::move(DC);
}
+Expected<DriverConfig>
+parseBitcodeStripOptions(ArrayRef<const char *> ArgsArr) {
+ DriverConfig DC;
+ CopyConfig Config;
+ BitcodeStripOptTable T;
+ unsigned MissingArgumentIndex, MissingArgumentCount;
+ opt::InputArgList InputArgs =
+ T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
+
+ if (InputArgs.size() == 0) {
+ printHelp(T, errs(), ToolType::BitcodeStrip);
+ exit(1);
+ }
+
+ if (InputArgs.hasArg(BITCODE_STRIP_help)) {
+ printHelp(T, outs(), ToolType::BitcodeStrip);
+ exit(0);
+ }
+
+ if (InputArgs.hasArg(BITCODE_STRIP_version)) {
+ outs() << "llvm-bitcode-strip, compatible with cctools "
+ "bitcode_strip\n";
+ cl::PrintVersionMessage();
+ exit(0);
+ }
+
+ for (auto *Arg : InputArgs.filtered(BITCODE_STRIP_UNKNOWN))
+ return createStringError(errc::invalid_argument, "unknown argument '%s'",
+ Arg->getAsString(InputArgs).c_str());
+
+ SmallVector<StringRef, 2> Positional;
+ for (auto *Arg : InputArgs.filtered(BITCODE_STRIP_INPUT))
+ Positional.push_back(Arg->getValue());
+ if (Positional.size() > 1)
+ return createStringError(errc::invalid_argument,
+ "llvm-bitcode-strip expects a single input file");
+ assert(!Positional.empty());
+ Config.InputFilename = Positional[0];
+ Config.OutputFilename = Positional[0];
+
+ DC.CopyConfigs.push_back(std::move(Config));
+ return std::move(DC);
+}
+
// ParseStripOptions returns the config and sets the input arguments. If a
// help flag is set then ParseStripOptions will print the help messege and
// exit.
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h
index 1341dd674c7b..07eac9d2bb1b 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h
@@ -178,6 +178,7 @@ struct CopyConfig {
std::vector<StringRef> DumpSection;
std::vector<StringRef> SymbolsToAdd;
std::vector<StringRef> RPathToAdd;
+ std::vector<StringRef> RPathToPrepend;
DenseMap<StringRef, StringRef> RPathsToUpdate;
DenseMap<StringRef, StringRef> InstallNamesToUpdate;
DenseSet<StringRef> RPathsToRemove;
@@ -230,6 +231,9 @@ struct CopyConfig {
bool StripUnneeded = false;
bool Weaken = false;
bool DecompressDebugSections = false;
+ // install-name-tool's --delete_all_rpaths
+ bool RemoveAllRpaths = false;
+
DebugCompressionType CompressionType = DebugCompressionType::None;
// parseELFConfig performs ELF-specific command-line parsing. Fills `ELF` on
@@ -267,6 +271,11 @@ parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
Expected<DriverConfig>
parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr);
+// ParseBitcodeStripOptions returns the config and sets the input arguments.
+// If a help flag is set then ParseBitcodeStripOptions will print the help
+// messege and exit.
+Expected<DriverConfig> parseBitcodeStripOptions(ArrayRef<const char *> ArgsArr);
+
// ParseStripOptions returns the config and sets the input arguments. If a
// help flag is set then ParseStripOptions will print the help messege and
// exit. ErrorCallback is used to handle recoverable errors. An Error returned
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index 66953f9ef0d5..c53a34bc46a3 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -10,7 +10,6 @@
#include "Buffer.h"
#include "CopyConfig.h"
#include "Object.h"
-#include "llvm-objcopy.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
@@ -185,28 +184,28 @@ findBuildID(const CopyConfig &Config, const object::ELFFile<ELFT> &In) {
return createFileError(Config.InputFilename, std::move(Err));
}
- return createFileError(
- Config.InputFilename,
- createStringError(llvm::errc::invalid_argument,
- "could not find build ID"));
+ return createFileError(Config.InputFilename,
+ createStringError(llvm::errc::invalid_argument,
+ "could not find build ID"));
}
static Expected<ArrayRef<uint8_t>>
findBuildID(const CopyConfig &Config, const object::ELFObjectFileBase &In) {
if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(&In))
- return findBuildID(Config, *O->getELFFile());
+ return findBuildID(Config, O->getELFFile());
else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(&In))
- return findBuildID(Config, *O->getELFFile());
+ return findBuildID(Config, O->getELFFile());
else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(&In))
- return findBuildID(Config, *O->getELFFile());
+ return findBuildID(Config, O->getELFFile());
else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(&In))
- return findBuildID(Config, *O->getELFFile());
+ return findBuildID(Config, O->getELFFile());
llvm_unreachable("Bad file format");
}
template <class... Ts>
-static Error makeStringError(std::error_code EC, const Twine &Msg, Ts &&... Args) {
+static Error makeStringError(std::error_code EC, const Twine &Msg,
+ Ts &&... Args) {
std::string FullMsg = (EC.message() + ": " + Msg).str();
return createStringError(EC, FullMsg.c_str(), std::forward<Ts>(Args)...);
}
@@ -266,19 +265,23 @@ static Error linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink,
static Error splitDWOToFile(const CopyConfig &Config, const Reader &Reader,
StringRef File, ElfType OutputElfType) {
- auto DWOFile = Reader.create(false);
+ Expected<std::unique_ptr<Object>> DWOFile = Reader.create(false);
+ if (!DWOFile)
+ return DWOFile.takeError();
+
auto OnlyKeepDWOPred = [&DWOFile](const SectionBase &Sec) {
- return onlyKeepDWOPred(*DWOFile, Sec);
+ return onlyKeepDWOPred(**DWOFile, Sec);
};
- if (Error E = DWOFile->removeSections(Config.AllowBrokenLinks,
- OnlyKeepDWOPred))
+ if (Error E =
+ (*DWOFile)->removeSections(Config.AllowBrokenLinks, OnlyKeepDWOPred))
return E;
if (Config.OutputArch) {
- DWOFile->Machine = Config.OutputArch.getValue().EMachine;
- DWOFile->OSABI = Config.OutputArch.getValue().OSABI;
+ (*DWOFile)->Machine = Config.OutputArch.getValue().EMachine;
+ (*DWOFile)->OSABI = Config.OutputArch.getValue().OSABI;
}
FileBuffer FB(File);
- auto Writer = createWriter(Config, *DWOFile, FB, OutputElfType);
+ std::unique_ptr<Writer> Writer =
+ createWriter(Config, **DWOFile, FB, OutputElfType);
if (Error E = Writer->finalize())
return E;
return Writer->write();
@@ -313,22 +316,27 @@ static bool isCompressable(const SectionBase &Sec) {
StringRef(Sec.Name).startswith(".debug");
}
-static void replaceDebugSections(
+static Error replaceDebugSections(
Object &Obj, SectionPred &RemovePred,
- function_ref<bool(const SectionBase &)> shouldReplace,
- function_ref<SectionBase *(const SectionBase *)> addSection) {
+ function_ref<bool(const SectionBase &)> ShouldReplace,
+ function_ref<Expected<SectionBase *>(const SectionBase *)> AddSection) {
// Build a list of the debug sections we are going to replace.
- // We can't call `addSection` while iterating over sections,
+ // We can't call `AddSection` while iterating over sections,
// because it would mutate the sections array.
SmallVector<SectionBase *, 13> ToReplace;
for (auto &Sec : Obj.sections())
- if (shouldReplace(Sec))
+ if (ShouldReplace(Sec))
ToReplace.push_back(&Sec);
// Build a mapping from original section to a new one.
DenseMap<SectionBase *, SectionBase *> FromTo;
- for (SectionBase *S : ToReplace)
- FromTo[S] = addSection(S);
+ for (SectionBase *S : ToReplace) {
+ Expected<SectionBase *> NewSection = AddSection(S);
+ if (!NewSection)
+ return NewSection.takeError();
+
+ FromTo[S] = *NewSection;
+ }
// Now we want to update the target sections of relocation
// sections. Also we will update the relocations themselves
@@ -336,9 +344,11 @@ static void replaceDebugSections(
for (auto &Sec : Obj.sections())
Sec.replaceSectionReferences(FromTo);
- RemovePred = [shouldReplace, RemovePred](const SectionBase &Sec) {
- return shouldReplace(Sec) || RemovePred(Sec);
+ RemovePred = [ShouldReplace, RemovePred](const SectionBase &Sec) {
+ return ShouldReplace(Sec) || RemovePred(Sec);
};
+
+ return Error::success();
}
static bool isUnneededSymbol(const Symbol &Sym) {
@@ -577,20 +587,28 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) {
};
}
- if (Config.CompressionType != DebugCompressionType::None)
- replaceDebugSections(Obj, RemovePred, isCompressable,
- [&Config, &Obj](const SectionBase *S) {
- return &Obj.addSection<CompressedSection>(
- *S, Config.CompressionType);
- });
- else if (Config.DecompressDebugSections)
- replaceDebugSections(
- Obj, RemovePred,
- [](const SectionBase &S) { return isa<CompressedSection>(&S); },
- [&Obj](const SectionBase *S) {
- auto CS = cast<CompressedSection>(S);
- return &Obj.addSection<DecompressedSection>(*CS);
- });
+ if (Config.CompressionType != DebugCompressionType::None) {
+ if (Error Err = replaceDebugSections(
+ Obj, RemovePred, isCompressable,
+ [&Config, &Obj](const SectionBase *S) -> Expected<SectionBase *> {
+ Expected<CompressedSection> NewSection =
+ CompressedSection::create(*S, Config.CompressionType);
+ if (!NewSection)
+ return NewSection.takeError();
+
+ return &Obj.addSection<CompressedSection>(std::move(*NewSection));
+ }))
+ return Err;
+ } else if (Config.DecompressDebugSections) {
+ if (Error Err = replaceDebugSections(
+ Obj, RemovePred,
+ [](const SectionBase &S) { return isa<CompressedSection>(&S); },
+ [&Obj](const SectionBase *S) {
+ const CompressedSection *CS = cast<CompressedSection>(S);
+ return &Obj.addSection<DecompressedSection>(*CS);
+ }))
+ return Err;
+ }
return Obj.removeSections(Config.AllowBrokenLinks, RemovePred);
}
@@ -701,16 +719,6 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj,
}
}
- if (!Config.SetSectionFlags.empty()) {
- for (auto &Sec : Obj.sections()) {
- const auto Iter = Config.SetSectionFlags.find(Sec.Name);
- if (Iter != Config.SetSectionFlags.end()) {
- const SectionFlagsUpdate &SFU = Iter->second;
- setSectionFlagsAndType(Sec, SFU.NewFlags);
- }
- }
- }
-
if (Config.OnlyKeepDebug)
for (auto &Sec : Obj.sections())
if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE)
@@ -740,9 +748,9 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj,
// If the symbol table was previously removed, we need to create a new one
// before adding new symbols.
- if (!Obj.SymbolTable && !Config.ELF->SymbolsToAdd.empty()) {
- Obj.addNewSymbolTable();
- }
+ if (!Obj.SymbolTable && !Config.ELF->SymbolsToAdd.empty())
+ if (Error E = Obj.addNewSymbolTable())
+ return E;
for (const NewSymbolInfo &SI : Config.ELF->SymbolsToAdd) {
SectionBase *Sec = Obj.findSection(SI.SectionName);
@@ -752,6 +760,17 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj,
Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0);
}
+ // --set-section-flags works with sections added by --add-section.
+ if (!Config.SetSectionFlags.empty()) {
+ for (auto &Sec : Obj.sections()) {
+ const auto Iter = Config.SetSectionFlags.find(Sec.Name);
+ if (Iter != Config.SetSectionFlags.end()) {
+ const SectionFlagsUpdate &SFU = Iter->second;
+ setSectionFlagsAndType(Sec, SFU.NewFlags);
+ }
+ }
+ }
+
if (Config.EntryExpr)
Obj.Entry = Config.EntryExpr(Obj.Entry);
return Error::success();
@@ -769,12 +788,15 @@ static Error writeOutput(const CopyConfig &Config, Object &Obj, Buffer &Out,
Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
Buffer &Out) {
IHexReader Reader(&In);
- std::unique_ptr<Object> Obj = Reader.create(true);
+ Expected<std::unique_ptr<Object>> Obj = Reader.create(true);
+ if (!Obj)
+ return Obj.takeError();
+
const ElfType OutputElfType =
- getOutputElfType(Config.OutputArch.getValueOr(MachineInfo()));
- if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
+ getOutputElfType(Config.OutputArch.getValueOr(MachineInfo()));
+ if (Error E = handleArgs(Config, **Obj, Reader, OutputElfType))
return E;
- return writeOutput(Config, *Obj, Out, OutputElfType);
+ return writeOutput(Config, **Obj, Out, OutputElfType);
}
Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
@@ -782,21 +804,26 @@ Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
uint8_t NewSymbolVisibility =
Config.ELF->NewSymbolVisibility.getValueOr((uint8_t)ELF::STV_DEFAULT);
BinaryReader Reader(&In, NewSymbolVisibility);
- std::unique_ptr<Object> Obj = Reader.create(true);
+ Expected<std::unique_ptr<Object>> Obj = Reader.create(true);
+ if (!Obj)
+ return Obj.takeError();
// Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch
// (-B<arch>).
const ElfType OutputElfType =
getOutputElfType(Config.OutputArch.getValueOr(MachineInfo()));
- if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
+ if (Error E = handleArgs(Config, **Obj, Reader, OutputElfType))
return E;
- return writeOutput(Config, *Obj, Out, OutputElfType);
+ return writeOutput(Config, **Obj, Out, OutputElfType);
}
Error executeObjcopyOnBinary(const CopyConfig &Config,
object::ELFObjectFileBase &In, Buffer &Out) {
ELFReader Reader(&In, Config.ExtractPartition);
- std::unique_ptr<Object> Obj = Reader.create(!Config.SymbolsToAdd.empty());
+ Expected<std::unique_ptr<Object>> Obj =
+ Reader.create(!Config.SymbolsToAdd.empty());
+ if (!Obj)
+ return Obj.takeError();
// Prefer OutputArch (-O<format>) if set, otherwise infer it from the input.
const ElfType OutputElfType =
Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue())
@@ -822,10 +849,10 @@ Error executeObjcopyOnBinary(const CopyConfig &Config,
Config.BuildIdLinkInput.getValue(), BuildIdBytes))
return E;
- if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
+ if (Error E = handleArgs(Config, **Obj, Reader, OutputElfType))
return createFileError(Config.InputFilename, std::move(E));
- if (Error E = writeOutput(Config, *Obj, Out, OutputElfType))
+ if (Error E = writeOutput(Config, **Obj, Out, OutputElfType))
return createFileError(Config.InputFilename, std::move(E));
if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput)
if (Error E =
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp
index e15fb24f4c42..0ff82f951b62 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "Object.h"
-#include "llvm-objcopy.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -15,6 +14,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
@@ -51,16 +51,15 @@ template <class ELFT> void ELFWriter<ELFT>::writePhdr(const Segment &Seg) {
}
Error SectionBase::removeSectionReferences(
- bool AllowBrokenLinks,
- function_ref<bool(const SectionBase *)> ToRemove) {
+ bool, function_ref<bool(const SectionBase *)>) {
return Error::success();
}
-Error SectionBase::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
+Error SectionBase::removeSymbols(function_ref<bool(const Symbol &)>) {
return Error::success();
}
-void SectionBase::initialize(SectionTableRef SecTable) {}
+Error SectionBase::initialize(SectionTableRef) { return Error::success(); }
void SectionBase::finalize() {}
void SectionBase::markSymbols() {}
void SectionBase::replaceSectionReferences(
@@ -82,72 +81,98 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) {
Shdr.sh_entsize = Sec.EntrySize;
}
-template <class ELFT> void ELFSectionSizer<ELFT>::visit(Section &Sec) {}
+template <class ELFT> Error ELFSectionSizer<ELFT>::visit(Section &) {
+ return Error::success();
+}
-template <class ELFT>
-void ELFSectionSizer<ELFT>::visit(OwnedDataSection &Sec) {}
+template <class ELFT> Error ELFSectionSizer<ELFT>::visit(OwnedDataSection &) {
+ return Error::success();
+}
-template <class ELFT>
-void ELFSectionSizer<ELFT>::visit(StringTableSection &Sec) {}
+template <class ELFT> Error ELFSectionSizer<ELFT>::visit(StringTableSection &) {
+ return Error::success();
+}
template <class ELFT>
-void ELFSectionSizer<ELFT>::visit(DynamicRelocationSection &Sec) {}
+Error ELFSectionSizer<ELFT>::visit(DynamicRelocationSection &) {
+ return Error::success();
+}
template <class ELFT>
-void ELFSectionSizer<ELFT>::visit(SymbolTableSection &Sec) {
+Error ELFSectionSizer<ELFT>::visit(SymbolTableSection &Sec) {
Sec.EntrySize = sizeof(Elf_Sym);
Sec.Size = Sec.Symbols.size() * Sec.EntrySize;
// Align to the largest field in Elf_Sym.
Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word);
+ return Error::success();
}
template <class ELFT>
-void ELFSectionSizer<ELFT>::visit(RelocationSection &Sec) {
+Error ELFSectionSizer<ELFT>::visit(RelocationSection &Sec) {
Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela);
Sec.Size = Sec.Relocations.size() * Sec.EntrySize;
// Align to the largest field in Elf_Rel(a).
Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word);
+ return Error::success();
}
template <class ELFT>
-void ELFSectionSizer<ELFT>::visit(GnuDebugLinkSection &Sec) {}
+Error ELFSectionSizer<ELFT>::visit(GnuDebugLinkSection &) {
+ return Error::success();
+}
-template <class ELFT> void ELFSectionSizer<ELFT>::visit(GroupSection &Sec) {
+template <class ELFT> Error ELFSectionSizer<ELFT>::visit(GroupSection &Sec) {
Sec.Size = sizeof(Elf_Word) + Sec.GroupMembers.size() * sizeof(Elf_Word);
+ return Error::success();
}
template <class ELFT>
-void ELFSectionSizer<ELFT>::visit(SectionIndexSection &Sec) {}
+Error ELFSectionSizer<ELFT>::visit(SectionIndexSection &) {
+ return Error::success();
+}
-template <class ELFT>
-void ELFSectionSizer<ELFT>::visit(CompressedSection &Sec) {}
+template <class ELFT> Error ELFSectionSizer<ELFT>::visit(CompressedSection &) {
+ return Error::success();
+}
template <class ELFT>
-void ELFSectionSizer<ELFT>::visit(DecompressedSection &Sec) {}
+Error ELFSectionSizer<ELFT>::visit(DecompressedSection &) {
+ return Error::success();
+}
-void BinarySectionWriter::visit(const SectionIndexSection &Sec) {
- error("cannot write symbol section index table '" + Sec.Name + "' ");
+Error BinarySectionWriter::visit(const SectionIndexSection &Sec) {
+ return createStringError(errc::operation_not_permitted,
+ "cannot write symbol section index table '" +
+ Sec.Name + "' ");
}
-void BinarySectionWriter::visit(const SymbolTableSection &Sec) {
- error("cannot write symbol table '" + Sec.Name + "' out to binary");
+Error BinarySectionWriter::visit(const SymbolTableSection &Sec) {
+ return createStringError(errc::operation_not_permitted,
+ "cannot write symbol table '" + Sec.Name +
+ "' out to binary");
}
-void BinarySectionWriter::visit(const RelocationSection &Sec) {
- error("cannot write relocation section '" + Sec.Name + "' out to binary");
+Error BinarySectionWriter::visit(const RelocationSection &Sec) {
+ return createStringError(errc::operation_not_permitted,
+ "cannot write relocation section '" + Sec.Name +
+ "' out to binary");
}
-void BinarySectionWriter::visit(const GnuDebugLinkSection &Sec) {
- error("cannot write '" + Sec.Name + "' out to binary");
+Error BinarySectionWriter::visit(const GnuDebugLinkSection &Sec) {
+ return createStringError(errc::operation_not_permitted,
+ "cannot write '" + Sec.Name + "' out to binary");
}
-void BinarySectionWriter::visit(const GroupSection &Sec) {
- error("cannot write '" + Sec.Name + "' out to binary");
+Error BinarySectionWriter::visit(const GroupSection &Sec) {
+ return createStringError(errc::operation_not_permitted,
+ "cannot write '" + Sec.Name + "' out to binary");
}
-void SectionWriter::visit(const Section &Sec) {
+Error SectionWriter::visit(const Section &Sec) {
if (Sec.Type != SHT_NOBITS)
llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
+
+ return Error::success();
}
static bool addressOverflows32bit(uint64_t Addr) {
@@ -352,30 +377,34 @@ uint64_t IHexSectionWriterBase::writeBaseAddr(uint64_t Addr) {
return Base;
}
-void IHexSectionWriterBase::writeData(uint8_t Type, uint16_t Addr,
+void IHexSectionWriterBase::writeData(uint8_t, uint16_t,
ArrayRef<uint8_t> Data) {
Offset += IHexRecord::getLineLength(Data.size());
}
-void IHexSectionWriterBase::visit(const Section &Sec) {
+Error IHexSectionWriterBase::visit(const Section &Sec) {
writeSection(&Sec, Sec.Contents);
+ return Error::success();
}
-void IHexSectionWriterBase::visit(const OwnedDataSection &Sec) {
+Error IHexSectionWriterBase::visit(const OwnedDataSection &Sec) {
writeSection(&Sec, Sec.Data);
+ return Error::success();
}
-void IHexSectionWriterBase::visit(const StringTableSection &Sec) {
+Error IHexSectionWriterBase::visit(const StringTableSection &Sec) {
// Check that sizer has already done its work
assert(Sec.Size == Sec.StrTabBuilder.getSize());
// We are free to pass an invalid pointer to writeSection as long
// as we don't actually write any data. The real writer class has
// to override this method .
writeSection(&Sec, {nullptr, static_cast<size_t>(Sec.Size)});
+ return Error::success();
}
-void IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) {
+Error IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) {
writeSection(&Sec, Sec.Contents);
+ return Error::success();
}
void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr,
@@ -385,19 +414,25 @@ void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr,
Offset += HexData.size();
}
-void IHexSectionWriter::visit(const StringTableSection &Sec) {
+Error IHexSectionWriter::visit(const StringTableSection &Sec) {
assert(Sec.Size == Sec.StrTabBuilder.getSize());
std::vector<uint8_t> Data(Sec.Size);
Sec.StrTabBuilder.write(Data.data());
writeSection(&Sec, Data);
+ return Error::success();
}
-void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
+Error Section::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
+}
-void Section::accept(MutableSectionVisitor &Visitor) { Visitor.visit(*this); }
+Error Section::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
+}
-void SectionWriter::visit(const OwnedDataSection &Sec) {
+Error SectionWriter::visit(const OwnedDataSection &Sec) {
llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset);
+ return Error::success();
}
static constexpr std::array<uint8_t, 4> ZlibGnuMagic = {{'Z', 'L', 'I', 'B'}};
@@ -424,7 +459,7 @@ getDecompressedSizeAndAlignment(ArrayRef<uint8_t> Data) {
}
template <class ELFT>
-void ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
+Error ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
const size_t DataOffset = isDataGnuCompressed(Sec.OriginalData)
? (ZlibGnuMagic.size() + sizeof(Sec.Size))
: sizeof(Elf_Chdr_Impl<ELFT>);
@@ -434,32 +469,37 @@ void ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
Sec.OriginalData.size() - DataOffset);
SmallVector<char, 128> DecompressedContent;
- if (Error E = zlib::uncompress(CompressedContent, DecompressedContent,
- static_cast<size_t>(Sec.Size)))
- reportError(Sec.Name, std::move(E));
+ if (Error Err = zlib::uncompress(CompressedContent, DecompressedContent,
+ static_cast<size_t>(Sec.Size)))
+ return createStringError(errc::invalid_argument,
+ "'" + Sec.Name + "': " + toString(std::move(Err)));
uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
std::copy(DecompressedContent.begin(), DecompressedContent.end(), Buf);
+
+ return Error::success();
}
-void BinarySectionWriter::visit(const DecompressedSection &Sec) {
- error("cannot write compressed section '" + Sec.Name + "' ");
+Error BinarySectionWriter::visit(const DecompressedSection &Sec) {
+ return createStringError(errc::operation_not_permitted,
+ "cannot write compressed section '" + Sec.Name +
+ "' ");
}
-void DecompressedSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error DecompressedSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void DecompressedSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error DecompressedSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
-void OwnedDataSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error OwnedDataSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void OwnedDataSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error OwnedDataSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
void OwnedDataSection::appendHexData(StringRef HexData) {
@@ -471,16 +511,18 @@ void OwnedDataSection::appendHexData(StringRef HexData) {
Size = Data.size();
}
-void BinarySectionWriter::visit(const CompressedSection &Sec) {
- error("cannot write compressed section '" + Sec.Name + "' ");
+Error BinarySectionWriter::visit(const CompressedSection &Sec) {
+ return createStringError(errc::operation_not_permitted,
+ "cannot write compressed section '" + Sec.Name +
+ "' ");
}
template <class ELFT>
-void ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
+Error ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
if (Sec.CompressionType == DebugCompressionType::None) {
std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
- return;
+ return Error::success();
}
if (Sec.CompressionType == DebugCompressionType::GNU) {
@@ -501,17 +543,42 @@ void ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
}
std::copy(Sec.CompressedData.begin(), Sec.CompressedData.end(), Buf);
+ return Error::success();
+}
+
+Expected<CompressedSection>
+CompressedSection::create(const SectionBase &Sec,
+ DebugCompressionType CompressionType) {
+ Error Err = Error::success();
+ CompressedSection Section(Sec, CompressionType, Err);
+
+ if (Err)
+ return std::move(Err);
+
+ return Section;
+}
+Expected<CompressedSection>
+CompressedSection::create(ArrayRef<uint8_t> CompressedData,
+ uint64_t DecompressedSize,
+ uint64_t DecompressedAlign) {
+ return CompressedSection(CompressedData, DecompressedSize, DecompressedAlign);
}
CompressedSection::CompressedSection(const SectionBase &Sec,
- DebugCompressionType CompressionType)
+ DebugCompressionType CompressionType,
+ Error &OutErr)
: SectionBase(Sec), CompressionType(CompressionType),
DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) {
- if (Error E = zlib::compress(
+ ErrorAsOutParameter EAO(&OutErr);
+
+ if (Error Err = zlib::compress(
StringRef(reinterpret_cast<const char *>(OriginalData.data()),
OriginalData.size()),
- CompressedData))
- reportError(Name, std::move(E));
+ CompressedData)) {
+ OutErr = createStringError(llvm::errc::invalid_argument,
+ "'" + Name + "': " + toString(std::move(Err)));
+ return;
+ }
size_t ChdrSize;
if (CompressionType == DebugCompressionType::GNU) {
@@ -537,12 +604,12 @@ CompressedSection::CompressedSection(ArrayRef<uint8_t> CompressedData,
OriginalData = CompressedData;
}
-void CompressedSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error CompressedSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void CompressedSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error CompressedSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
void StringTableSection::addString(StringRef Name) { StrTabBuilder.add(Name); }
@@ -556,42 +623,51 @@ void StringTableSection::prepareForLayout() {
Size = StrTabBuilder.getSize();
}
-void SectionWriter::visit(const StringTableSection &Sec) {
+Error SectionWriter::visit(const StringTableSection &Sec) {
Sec.StrTabBuilder.write(Out.getBufferStart() + Sec.Offset);
+ return Error::success();
}
-void StringTableSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error StringTableSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void StringTableSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error StringTableSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
template <class ELFT>
-void ELFSectionWriter<ELFT>::visit(const SectionIndexSection &Sec) {
+Error ELFSectionWriter<ELFT>::visit(const SectionIndexSection &Sec) {
uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
llvm::copy(Sec.Indexes, reinterpret_cast<Elf_Word *>(Buf));
+ return Error::success();
}
-void SectionIndexSection::initialize(SectionTableRef SecTable) {
+Error SectionIndexSection::initialize(SectionTableRef SecTable) {
Size = 0;
- setSymTab(SecTable.getSectionOfType<SymbolTableSection>(
- Link,
- "Link field value " + Twine(Link) + " in section " + Name + " is invalid",
- "Link field value " + Twine(Link) + " in section " + Name +
- " is not a symbol table"));
+ Expected<SymbolTableSection *> Sec =
+ SecTable.getSectionOfType<SymbolTableSection>(
+ Link,
+ "Link field value " + Twine(Link) + " in section " + Name +
+ " is invalid",
+ "Link field value " + Twine(Link) + " in section " + Name +
+ " is not a symbol table");
+ if (!Sec)
+ return Sec.takeError();
+
+ setSymTab(*Sec);
Symbols->setShndxTable(this);
+ return Error::success();
}
void SectionIndexSection::finalize() { Link = Symbols->Index; }
-void SectionIndexSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error SectionIndexSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void SectionIndexSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error SectionIndexSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) {
@@ -674,8 +750,7 @@ void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
}
Error SymbolTableSection::removeSectionReferences(
- bool AllowBrokenLinks,
- function_ref<bool(const SectionBase *)> ToRemove) {
+ bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) {
if (ToRemove(SectionIndexTable))
SectionIndexTable = nullptr;
if (ToRemove(SymbolNames)) {
@@ -718,14 +793,20 @@ void SymbolTableSection::replaceSectionReferences(
Sym->DefinedIn = To;
}
-void SymbolTableSection::initialize(SectionTableRef SecTable) {
+Error SymbolTableSection::initialize(SectionTableRef SecTable) {
Size = 0;
- setStrTab(SecTable.getSectionOfType<StringTableSection>(
- Link,
- "Symbol table has link index of " + Twine(Link) +
- " which is not a valid index",
- "Symbol table has link index of " + Twine(Link) +
- " which is not a string table"));
+ Expected<StringTableSection *> Sec =
+ SecTable.getSectionOfType<StringTableSection>(
+ Link,
+ "Symbol table has link index of " + Twine(Link) +
+ " which is not a valid index",
+ "Symbol table has link index of " + Twine(Link) +
+ " which is not a string table");
+ if (!Sec)
+ return Sec.takeError();
+
+ setStrTab(*Sec);
+ return Error::success();
}
void SymbolTableSection::finalize() {
@@ -770,19 +851,25 @@ void SymbolTableSection::fillShndxTable() {
}
}
-const Symbol *SymbolTableSection::getSymbolByIndex(uint32_t Index) const {
+Expected<const Symbol *>
+SymbolTableSection::getSymbolByIndex(uint32_t Index) const {
if (Symbols.size() <= Index)
- error("invalid symbol index: " + Twine(Index));
+ return createStringError(errc::invalid_argument,
+ "invalid symbol index: " + Twine(Index));
return Symbols[Index].get();
}
-Symbol *SymbolTableSection::getSymbolByIndex(uint32_t Index) {
- return const_cast<Symbol *>(
- static_cast<const SymbolTableSection *>(this)->getSymbolByIndex(Index));
+Expected<Symbol *> SymbolTableSection::getSymbolByIndex(uint32_t Index) {
+ Expected<const Symbol *> Sym =
+ static_cast<const SymbolTableSection *>(this)->getSymbolByIndex(Index);
+ if (!Sym)
+ return Sym.takeError();
+
+ return const_cast<Symbol *>(*Sym);
}
template <class ELFT>
-void ELFSectionWriter<ELFT>::visit(const SymbolTableSection &Sec) {
+Error ELFSectionWriter<ELFT>::visit(const SymbolTableSection &Sec) {
Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Out.getBufferStart() + Sec.Offset);
// Loop though symbols setting each entry of the symbol table.
for (const std::unique_ptr<Symbol> &Symbol : Sec.Symbols) {
@@ -795,19 +882,19 @@ void ELFSectionWriter<ELFT>::visit(const SymbolTableSection &Sec) {
Sym->st_shndx = Symbol->getShndx();
++Sym;
}
+ return Error::success();
}
-void SymbolTableSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error SymbolTableSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void SymbolTableSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error SymbolTableSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
Error RelocationSection::removeSectionReferences(
- bool AllowBrokenLinks,
- function_ref<bool(const SectionBase *)> ToRemove) {
+ bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) {
if (ToRemove(Symbols)) {
if (!AllowBrokenLinks)
return createStringError(
@@ -834,22 +921,33 @@ Error RelocationSection::removeSectionReferences(
}
template <class SymTabType>
-void RelocSectionWithSymtabBase<SymTabType>::initialize(
+Error RelocSectionWithSymtabBase<SymTabType>::initialize(
SectionTableRef SecTable) {
- if (Link != SHN_UNDEF)
- setSymTab(SecTable.getSectionOfType<SymTabType>(
+ if (Link != SHN_UNDEF) {
+ Expected<SymTabType *> Sec = SecTable.getSectionOfType<SymTabType>(
Link,
"Link field value " + Twine(Link) + " in section " + Name +
" is invalid",
"Link field value " + Twine(Link) + " in section " + Name +
- " is not a symbol table"));
+ " is not a symbol table");
+ if (!Sec)
+ return Sec.takeError();
- if (Info != SHN_UNDEF)
- setSection(SecTable.getSection(Info, "Info field value " + Twine(Info) +
- " in section " + Name +
- " is invalid"));
- else
+ setSymTab(*Sec);
+ }
+
+ if (Info != SHN_UNDEF) {
+ Expected<SectionBase *> Sec =
+ SecTable.getSection(Info, "Info field value " + Twine(Info) +
+ " in section " + Name + " is invalid");
+ if (!Sec)
+ return Sec.takeError();
+
+ setSection(*Sec);
+ } else
setSection(nullptr);
+
+ return Error::success();
}
template <class SymTabType>
@@ -861,7 +959,7 @@ void RelocSectionWithSymtabBase<SymTabType>::finalize() {
}
template <class ELFT>
-static void setAddend(Elf_Rel_Impl<ELFT, false> &Rel, uint64_t Addend) {}
+static void setAddend(Elf_Rel_Impl<ELFT, false> &, uint64_t) {}
template <class ELFT>
static void setAddend(Elf_Rel_Impl<ELFT, true> &Rela, uint64_t Addend) {
@@ -880,20 +978,21 @@ static void writeRel(const RelRange &Relocations, T *Buf) {
}
template <class ELFT>
-void ELFSectionWriter<ELFT>::visit(const RelocationSection &Sec) {
+Error ELFSectionWriter<ELFT>::visit(const RelocationSection &Sec) {
uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
if (Sec.Type == SHT_REL)
writeRel(Sec.Relocations, reinterpret_cast<Elf_Rel *>(Buf));
else
writeRel(Sec.Relocations, reinterpret_cast<Elf_Rela *>(Buf));
+ return Error::success();
}
-void RelocationSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error RelocationSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void RelocationSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error RelocationSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
Error RelocationSection::removeSymbols(
@@ -920,16 +1019,17 @@ void RelocationSection::replaceSectionReferences(
SecToApplyRel = To;
}
-void SectionWriter::visit(const DynamicRelocationSection &Sec) {
+Error SectionWriter::visit(const DynamicRelocationSection &Sec) {
llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
+ return Error::success();
}
-void DynamicRelocationSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error DynamicRelocationSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
Error DynamicRelocationSection::removeSectionReferences(
@@ -1015,14 +1115,22 @@ void GroupSection::onRemove() {
Sec->Flags &= ~SHF_GROUP;
}
-void Section::initialize(SectionTableRef SecTable) {
+Error Section::initialize(SectionTableRef SecTable) {
if (Link == ELF::SHN_UNDEF)
- return;
- LinkSection =
+ return Error::success();
+
+ Expected<SectionBase *> Sec =
SecTable.getSection(Link, "Link field value " + Twine(Link) +
" in section " + Name + " is invalid");
+ if (!Sec)
+ return Sec.takeError();
+
+ LinkSection = *Sec;
+
if (LinkSection->Type == ELF::SHT_SYMTAB)
LinkSection = nullptr;
+
+ return Error::success();
}
void Section::finalize() { this->Link = LinkSection ? LinkSection->Index : 0; }
@@ -1051,37 +1159,39 @@ GnuDebugLinkSection::GnuDebugLinkSection(StringRef File,
}
template <class ELFT>
-void ELFSectionWriter<ELFT>::visit(const GnuDebugLinkSection &Sec) {
+Error ELFSectionWriter<ELFT>::visit(const GnuDebugLinkSection &Sec) {
unsigned char *Buf = Out.getBufferStart() + Sec.Offset;
Elf_Word *CRC =
reinterpret_cast<Elf_Word *>(Buf + Sec.Size - sizeof(Elf_Word));
*CRC = Sec.CRC32;
llvm::copy(Sec.FileName, Buf);
+ return Error::success();
}
-void GnuDebugLinkSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error GnuDebugLinkSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void GnuDebugLinkSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error GnuDebugLinkSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
template <class ELFT>
-void ELFSectionWriter<ELFT>::visit(const GroupSection &Sec) {
+Error ELFSectionWriter<ELFT>::visit(const GroupSection &Sec) {
ELF::Elf32_Word *Buf =
reinterpret_cast<ELF::Elf32_Word *>(Out.getBufferStart() + Sec.Offset);
*Buf++ = Sec.FlagWord;
for (SectionBase *S : Sec.GroupMembers)
support::endian::write32<ELFT::TargetEndianness>(Buf++, S->Index);
+ return Error::success();
}
-void GroupSection::accept(SectionVisitor &Visitor) const {
- Visitor.visit(*this);
+Error GroupSection::accept(SectionVisitor &Visitor) const {
+ return Visitor.visit(*this);
}
-void GroupSection::accept(MutableSectionVisitor &Visitor) {
- Visitor.visit(*this);
+Error GroupSection::accept(MutableSectionVisitor &Visitor) {
+ return Visitor.visit(*this);
}
// Returns true IFF a section is wholly inside the range of a segment
@@ -1161,9 +1271,12 @@ SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) {
return &SymTab;
}
-void BasicELFBuilder::initSections() {
+Error BasicELFBuilder::initSections() {
for (SectionBase &Sec : Obj->sections())
- Sec.initialize(Obj->sections());
+ if (Error Err = Sec.initialize(Obj->sections()))
+ return Err;
+
+ return Error::success();
}
void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
@@ -1190,12 +1303,13 @@ void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
0);
}
-std::unique_ptr<Object> BinaryELFBuilder::build() {
+Expected<std::unique_ptr<Object>> BinaryELFBuilder::build() {
initFileHeader();
initHeaderSegment();
SymbolTableSection *SymTab = addSymTab(addStrTab());
- initSections();
+ if (Error Err = initSections())
+ return std::move(Err);
addData(SymTab);
return std::move(Obj);
@@ -1246,12 +1360,13 @@ void IHexELFBuilder::addDataSections() {
}
}
-std::unique_ptr<Object> IHexELFBuilder::build() {
+Expected<std::unique_ptr<Object>> IHexELFBuilder::build() {
initFileHeader();
initHeaderSegment();
StringTableSection *StrTab = addStrTab();
addSymTab(StrTab);
- initSections();
+ if (Error Err = initSections())
+ return std::move(Err);
addDataSections();
return std::move(Obj);
@@ -1273,27 +1388,37 @@ template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) {
}
}
-template <class ELFT> void ELFBuilder<ELFT>::findEhdrOffset() {
+template <class ELFT> Error ELFBuilder<ELFT>::findEhdrOffset() {
if (!ExtractPartition)
- return;
+ return Error::success();
for (const SectionBase &Sec : Obj.sections()) {
if (Sec.Type == SHT_LLVM_PART_EHDR && Sec.Name == *ExtractPartition) {
EhdrOffset = Sec.Offset;
- return;
+ return Error::success();
}
}
- error("could not find partition named '" + *ExtractPartition + "'");
+ return createStringError(errc::invalid_argument,
+ "could not find partition named '" +
+ *ExtractPartition + "'");
}
template <class ELFT>
-void ELFBuilder<ELFT>::readProgramHeaders(const ELFFile<ELFT> &HeadersFile) {
+Error ELFBuilder<ELFT>::readProgramHeaders(const ELFFile<ELFT> &HeadersFile) {
uint32_t Index = 0;
- for (const auto &Phdr : unwrapOrError(HeadersFile.program_headers())) {
+
+ Expected<typename ELFFile<ELFT>::Elf_Phdr_Range> Headers =
+ HeadersFile.program_headers();
+ if (!Headers)
+ return Headers.takeError();
+
+ for (const typename ELFFile<ELFT>::Elf_Phdr &Phdr : *Headers) {
if (Phdr.p_offset + Phdr.p_filesz > HeadersFile.getBufSize())
- error("program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) +
- " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) +
- " goes past the end of the file");
+ return createStringError(
+ errc::invalid_argument,
+ "program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) +
+ " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) +
+ " goes past the end of the file");
ArrayRef<uint8_t> Data{HeadersFile.base() + Phdr.p_offset,
(size_t)Phdr.p_filesz};
@@ -1320,7 +1445,7 @@ void ELFBuilder<ELFT>::readProgramHeaders(const ELFFile<ELFT> &HeadersFile) {
ElfHdr.Index = Index++;
ElfHdr.OriginalOffset = ElfHdr.Offset = EhdrOffset;
- const auto &Ehdr = *HeadersFile.getHeader();
+ const typename ELFT::Ehdr &Ehdr = HeadersFile.getHeader();
auto &PrHdr = Obj.ProgramHdrSegment;
PrHdr.Type = PT_PHDR;
PrHdr.Flags = 0;
@@ -1341,13 +1466,16 @@ void ELFBuilder<ELFT>::readProgramHeaders(const ELFFile<ELFT> &HeadersFile) {
setParentSegment(Child);
setParentSegment(ElfHdr);
setParentSegment(PrHdr);
+
+ return Error::success();
}
template <class ELFT>
-void ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) {
+Error ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) {
if (GroupSec->Align % sizeof(ELF::Elf32_Word) != 0)
- error("invalid alignment " + Twine(GroupSec->Align) + " of group section '" +
- GroupSec->Name + "'");
+ return createStringError(errc::invalid_argument,
+ "invalid alignment " + Twine(GroupSec->Align) +
+ " of group section '" + GroupSec->Name + "'");
SectionTableRef SecTable = Obj.sections();
if (GroupSec->Link != SHN_UNDEF) {
auto SymTab = SecTable.template getSectionOfType<SymbolTableSection>(
@@ -1356,16 +1484,23 @@ void ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) {
GroupSec->Name + "' is invalid",
"link field value '" + Twine(GroupSec->Link) + "' in section '" +
GroupSec->Name + "' is not a symbol table");
- Symbol *Sym = SymTab->getSymbolByIndex(GroupSec->Info);
+ if (!SymTab)
+ return SymTab.takeError();
+
+ Expected<Symbol *> Sym = (*SymTab)->getSymbolByIndex(GroupSec->Info);
if (!Sym)
- error("info field value '" + Twine(GroupSec->Info) + "' in section '" +
- GroupSec->Name + "' is not a valid symbol index");
- GroupSec->setSymTab(SymTab);
- GroupSec->setSymbol(Sym);
+ return createStringError(errc::invalid_argument,
+ "info field value '" + Twine(GroupSec->Info) +
+ "' in section '" + GroupSec->Name +
+ "' is not a valid symbol index");
+ GroupSec->setSymTab(*SymTab);
+ GroupSec->setSymbol(*Sym);
}
if (GroupSec->Contents.size() % sizeof(ELF::Elf32_Word) ||
GroupSec->Contents.empty())
- error("the content of the section " + GroupSec->Name + " is malformed");
+ return createStringError(errc::invalid_argument,
+ "the content of the section " + GroupSec->Name +
+ " is malformed");
const ELF::Elf32_Word *Word =
reinterpret_cast<const ELF::Elf32_Word *>(GroupSec->Contents.data());
const ELF::Elf32_Word *End =
@@ -1373,61 +1508,103 @@ void ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) {
GroupSec->setFlagWord(*Word++);
for (; Word != End; ++Word) {
uint32_t Index = support::endian::read32<ELFT::TargetEndianness>(Word);
- GroupSec->addMember(SecTable.getSection(
+ Expected<SectionBase *> Sec = SecTable.getSection(
Index, "group member index " + Twine(Index) + " in section '" +
- GroupSec->Name + "' is invalid"));
+ GroupSec->Name + "' is invalid");
+ if (!Sec)
+ return Sec.takeError();
+
+ GroupSec->addMember(*Sec);
}
+
+ return Error::success();
}
template <class ELFT>
-void ELFBuilder<ELFT>::initSymbolTable(SymbolTableSection *SymTab) {
- const Elf_Shdr &Shdr = *unwrapOrError(ElfFile.getSection(SymTab->Index));
- StringRef StrTabData = unwrapOrError(ElfFile.getStringTableForSymtab(Shdr));
+Error ELFBuilder<ELFT>::initSymbolTable(SymbolTableSection *SymTab) {
+ Expected<const Elf_Shdr *> Shdr = ElfFile.getSection(SymTab->Index);
+ if (!Shdr)
+ return Shdr.takeError();
+
+ Expected<StringRef> StrTabData = ElfFile.getStringTableForSymtab(**Shdr);
+ if (!StrTabData)
+ return StrTabData.takeError();
+
ArrayRef<Elf_Word> ShndxData;
- auto Symbols = unwrapOrError(ElfFile.symbols(&Shdr));
- for (const auto &Sym : Symbols) {
+ Expected<typename ELFFile<ELFT>::Elf_Sym_Range> Symbols =
+ ElfFile.symbols(*Shdr);
+ if (!Symbols)
+ return Symbols.takeError();
+
+ for (const typename ELFFile<ELFT>::Elf_Sym &Sym : *Symbols) {
SectionBase *DefSection = nullptr;
- StringRef Name = unwrapOrError(Sym.getName(StrTabData));
+
+ Expected<StringRef> Name = Sym.getName(*StrTabData);
+ if (!Name)
+ return Name.takeError();
if (Sym.st_shndx == SHN_XINDEX) {
if (SymTab->getShndxTable() == nullptr)
- error("symbol '" + Name +
- "' has index SHN_XINDEX but no SHT_SYMTAB_SHNDX section exists");
+ return createStringError(errc::invalid_argument,
+ "symbol '" + *Name +
+ "' has index SHN_XINDEX but no "
+ "SHT_SYMTAB_SHNDX section exists");
if (ShndxData.data() == nullptr) {
- const Elf_Shdr &ShndxSec =
- *unwrapOrError(ElfFile.getSection(SymTab->getShndxTable()->Index));
- ShndxData = unwrapOrError(
- ElfFile.template getSectionContentsAsArray<Elf_Word>(&ShndxSec));
- if (ShndxData.size() != Symbols.size())
- error("symbol section index table does not have the same number of "
- "entries as the symbol table");
+ Expected<const Elf_Shdr *> ShndxSec =
+ ElfFile.getSection(SymTab->getShndxTable()->Index);
+ if (!ShndxSec)
+ return ShndxSec.takeError();
+
+ Expected<ArrayRef<Elf_Word>> Data =
+ ElfFile.template getSectionContentsAsArray<Elf_Word>(**ShndxSec);
+ if (!Data)
+ return Data.takeError();
+
+ ShndxData = *Data;
+ if (ShndxData.size() != Symbols->size())
+ return createStringError(
+ errc::invalid_argument,
+ "symbol section index table does not have the same number of "
+ "entries as the symbol table");
}
- Elf_Word Index = ShndxData[&Sym - Symbols.begin()];
- DefSection = Obj.sections().getSection(
+ Elf_Word Index = ShndxData[&Sym - Symbols->begin()];
+ Expected<SectionBase *> Sec = Obj.sections().getSection(
Index,
- "symbol '" + Name + "' has invalid section index " + Twine(Index));
+ "symbol '" + *Name + "' has invalid section index " + Twine(Index));
+ if (!Sec)
+ return Sec.takeError();
+
+ DefSection = *Sec;
} else if (Sym.st_shndx >= SHN_LORESERVE) {
if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) {
- error(
- "symbol '" + Name +
- "' has unsupported value greater than or equal to SHN_LORESERVE: " +
- Twine(Sym.st_shndx));
+ return createStringError(
+ errc::invalid_argument,
+ "symbol '" + *Name +
+ "' has unsupported value greater than or equal "
+ "to SHN_LORESERVE: " +
+ Twine(Sym.st_shndx));
}
} else if (Sym.st_shndx != SHN_UNDEF) {
- DefSection = Obj.sections().getSection(
- Sym.st_shndx, "symbol '" + Name +
+ Expected<SectionBase *> Sec = Obj.sections().getSection(
+ Sym.st_shndx, "symbol '" + *Name +
"' is defined has invalid section index " +
Twine(Sym.st_shndx));
+ if (!Sec)
+ return Sec.takeError();
+
+ DefSection = *Sec;
}
- SymTab->addSymbol(Name, Sym.getBinding(), Sym.getType(), DefSection,
+ SymTab->addSymbol(*Name, Sym.getBinding(), Sym.getType(), DefSection,
Sym.getValue(), Sym.st_other, Sym.st_shndx, Sym.st_size);
}
+
+ return Error::success();
}
template <class ELFT>
-static void getAddend(uint64_t &ToSet, const Elf_Rel_Impl<ELFT, false> &Rel) {}
+static void getAddend(uint64_t &, const Elf_Rel_Impl<ELFT, false> &) {}
template <class ELFT>
static void getAddend(uint64_t &ToSet, const Elf_Rel_Impl<ELFT, true> &Rela) {
@@ -1435,8 +1612,8 @@ static void getAddend(uint64_t &ToSet, const Elf_Rel_Impl<ELFT, true> &Rela) {
}
template <class T>
-static void initRelocations(RelocationSection *Relocs,
- SymbolTableSection *SymbolTable, T RelRange) {
+static Error initRelocations(RelocationSection *Relocs,
+ SymbolTableSection *SymbolTable, T RelRange) {
for (const auto &Rel : RelRange) {
Relocation ToAdd;
ToAdd.Offset = Rel.r_offset;
@@ -1445,39 +1622,54 @@ static void initRelocations(RelocationSection *Relocs,
if (uint32_t Sym = Rel.getSymbol(false)) {
if (!SymbolTable)
- error("'" + Relocs->Name +
- "': relocation references symbol with index " + Twine(Sym) +
- ", but there is no symbol table");
- ToAdd.RelocSymbol = SymbolTable->getSymbolByIndex(Sym);
+ return createStringError(
+ errc::invalid_argument,
+ "'" + Relocs->Name + "': relocation references symbol with index " +
+ Twine(Sym) + ", but there is no symbol table");
+ Expected<Symbol *> SymByIndex = SymbolTable->getSymbolByIndex(Sym);
+ if (!SymByIndex)
+ return SymByIndex.takeError();
+
+ ToAdd.RelocSymbol = *SymByIndex;
}
Relocs->addRelocation(ToAdd);
}
+
+ return Error::success();
}
-SectionBase *SectionTableRef::getSection(uint32_t Index, Twine ErrMsg) {
+Expected<SectionBase *> SectionTableRef::getSection(uint32_t Index,
+ Twine ErrMsg) {
if (Index == SHN_UNDEF || Index > Sections.size())
- error(ErrMsg);
+ return createStringError(errc::invalid_argument, ErrMsg);
return Sections[Index - 1].get();
}
template <class T>
-T *SectionTableRef::getSectionOfType(uint32_t Index, Twine IndexErrMsg,
- Twine TypeErrMsg) {
- if (T *Sec = dyn_cast<T>(getSection(Index, IndexErrMsg)))
+Expected<T *> SectionTableRef::getSectionOfType(uint32_t Index,
+ Twine IndexErrMsg,
+ Twine TypeErrMsg) {
+ Expected<SectionBase *> BaseSec = getSection(Index, IndexErrMsg);
+ if (!BaseSec)
+ return BaseSec.takeError();
+
+ if (T *Sec = dyn_cast<T>(*BaseSec))
return Sec;
- error(TypeErrMsg);
+
+ return createStringError(errc::invalid_argument, TypeErrMsg);
}
template <class ELFT>
-SectionBase &ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
- ArrayRef<uint8_t> Data;
+Expected<SectionBase &> ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
switch (Shdr.sh_type) {
case SHT_REL:
case SHT_RELA:
if (Shdr.sh_flags & SHF_ALLOC) {
- Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
- return Obj.addSection<DynamicRelocationSection>(Data);
+ if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr))
+ return Obj.addSection<DynamicRelocationSection>(*Data);
+ else
+ return Data.takeError();
}
return Obj.addSection<RelocationSection>();
case SHT_STRTAB:
@@ -1485,25 +1677,35 @@ SectionBase &ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
// mean altering the memory image. There are no special link types or
// anything so we can just use a Section.
if (Shdr.sh_flags & SHF_ALLOC) {
- Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
- return Obj.addSection<Section>(Data);
+ if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr))
+ return Obj.addSection<Section>(*Data);
+ else
+ return Data.takeError();
}
return Obj.addSection<StringTableSection>();
case SHT_HASH:
case SHT_GNU_HASH:
// Hash tables should refer to SHT_DYNSYM which we're not going to change.
// Because of this we don't need to mess with the hash tables either.
- Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
- return Obj.addSection<Section>(Data);
+ if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr))
+ return Obj.addSection<Section>(*Data);
+ else
+ return Data.takeError();
case SHT_GROUP:
- Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
- return Obj.addSection<GroupSection>(Data);
+ if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr))
+ return Obj.addSection<GroupSection>(*Data);
+ else
+ return Data.takeError();
case SHT_DYNSYM:
- Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
- return Obj.addSection<DynamicSymbolTableSection>(Data);
+ if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr))
+ return Obj.addSection<DynamicSymbolTableSection>(*Data);
+ else
+ return Data.takeError();
case SHT_DYNAMIC:
- Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
- return Obj.addSection<DynamicSection>(Data);
+ if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr))
+ return Obj.addSection<DynamicSection>(*Data);
+ else
+ return Data.takeError();
case SHT_SYMTAB: {
auto &SymTab = Obj.addSection<SymbolTableSection>();
Obj.SymbolTable = &SymTab;
@@ -1515,114 +1717,176 @@ SectionBase &ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
return ShndxSection;
}
case SHT_NOBITS:
- return Obj.addSection<Section>(Data);
+ return Obj.addSection<Section>(ArrayRef<uint8_t>());
default: {
- Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
+ Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr);
+ if (!Data)
+ return Data.takeError();
+
+ Expected<StringRef> Name = ElfFile.getSectionName(Shdr);
+ if (!Name)
+ return Name.takeError();
- StringRef Name = unwrapOrError(ElfFile.getSectionName(&Shdr));
- if (Name.startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) {
+ if (Name->startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) {
uint64_t DecompressedSize, DecompressedAlign;
std::tie(DecompressedSize, DecompressedAlign) =
- getDecompressedSizeAndAlignment<ELFT>(Data);
- return Obj.addSection<CompressedSection>(Data, DecompressedSize,
- DecompressedAlign);
+ getDecompressedSizeAndAlignment<ELFT>(*Data);
+ Expected<CompressedSection> NewSection =
+ CompressedSection::create(*Data, DecompressedSize, DecompressedAlign);
+ if (!NewSection)
+ return NewSection.takeError();
+
+ return Obj.addSection<CompressedSection>(std::move(*NewSection));
}
- return Obj.addSection<Section>(Data);
+ return Obj.addSection<Section>(*Data);
}
}
}
-template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
+template <class ELFT> Error ELFBuilder<ELFT>::readSectionHeaders() {
uint32_t Index = 0;
- for (const auto &Shdr : unwrapOrError(ElfFile.sections())) {
+ Expected<typename ELFFile<ELFT>::Elf_Shdr_Range> Sections =
+ ElfFile.sections();
+ if (!Sections)
+ return Sections.takeError();
+
+ for (const typename ELFFile<ELFT>::Elf_Shdr &Shdr : *Sections) {
if (Index == 0) {
++Index;
continue;
}
- auto &Sec = makeSection(Shdr);
- Sec.Name = std::string(unwrapOrError(ElfFile.getSectionName(&Shdr)));
- Sec.Type = Sec.OriginalType = Shdr.sh_type;
- Sec.Flags = Sec.OriginalFlags = Shdr.sh_flags;
- Sec.Addr = Shdr.sh_addr;
- Sec.Offset = Shdr.sh_offset;
- Sec.OriginalOffset = Shdr.sh_offset;
- Sec.Size = Shdr.sh_size;
- Sec.Link = Shdr.sh_link;
- Sec.Info = Shdr.sh_info;
- Sec.Align = Shdr.sh_addralign;
- Sec.EntrySize = Shdr.sh_entsize;
- Sec.Index = Index++;
- Sec.OriginalData =
+ Expected<SectionBase &> Sec = makeSection(Shdr);
+ if (!Sec)
+ return Sec.takeError();
+
+ Expected<StringRef> SecName = ElfFile.getSectionName(Shdr);
+ if (!SecName)
+ return SecName.takeError();
+ Sec->Name = SecName->str();
+ Sec->Type = Sec->OriginalType = Shdr.sh_type;
+ Sec->Flags = Sec->OriginalFlags = Shdr.sh_flags;
+ Sec->Addr = Shdr.sh_addr;
+ Sec->Offset = Shdr.sh_offset;
+ Sec->OriginalOffset = Shdr.sh_offset;
+ Sec->Size = Shdr.sh_size;
+ Sec->Link = Shdr.sh_link;
+ Sec->Info = Shdr.sh_info;
+ Sec->Align = Shdr.sh_addralign;
+ Sec->EntrySize = Shdr.sh_entsize;
+ Sec->Index = Index++;
+ Sec->OriginalIndex = Sec->Index;
+ Sec->OriginalData =
ArrayRef<uint8_t>(ElfFile.base() + Shdr.sh_offset,
(Shdr.sh_type == SHT_NOBITS) ? 0 : Shdr.sh_size);
}
+
+ return Error::success();
}
-template <class ELFT> void ELFBuilder<ELFT>::readSections(bool EnsureSymtab) {
- uint32_t ShstrIndex = ElfFile.getHeader()->e_shstrndx;
- if (ShstrIndex == SHN_XINDEX)
- ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link;
+template <class ELFT> Error ELFBuilder<ELFT>::readSections(bool EnsureSymtab) {
+ uint32_t ShstrIndex = ElfFile.getHeader().e_shstrndx;
+ if (ShstrIndex == SHN_XINDEX) {
+ Expected<const Elf_Shdr *> Sec = ElfFile.getSection(0);
+ if (!Sec)
+ return Sec.takeError();
+
+ ShstrIndex = (*Sec)->sh_link;
+ }
if (ShstrIndex == SHN_UNDEF)
Obj.HadShdrs = false;
- else
- Obj.SectionNames =
+ else {
+ Expected<StringTableSection *> Sec =
Obj.sections().template getSectionOfType<StringTableSection>(
ShstrIndex,
"e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
" is invalid",
"e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
" does not reference a string table");
+ if (!Sec)
+ return Sec.takeError();
+
+ Obj.SectionNames = *Sec;
+ }
// If a section index table exists we'll need to initialize it before we
// initialize the symbol table because the symbol table might need to
// reference it.
if (Obj.SectionIndexTable)
- Obj.SectionIndexTable->initialize(Obj.sections());
+ if (Error Err = Obj.SectionIndexTable->initialize(Obj.sections()))
+ return Err;
// Now that all of the sections have been added we can fill out some extra
// details about symbol tables. We need the symbol table filled out before
// any relocations.
if (Obj.SymbolTable) {
- Obj.SymbolTable->initialize(Obj.sections());
- initSymbolTable(Obj.SymbolTable);
+ if (Error Err = Obj.SymbolTable->initialize(Obj.sections()))
+ return Err;
+ if (Error Err = initSymbolTable(Obj.SymbolTable))
+ return Err;
} else if (EnsureSymtab) {
- Obj.addNewSymbolTable();
+ if (Error Err = Obj.addNewSymbolTable())
+ return Err;
}
// Now that all sections and symbols have been added we can add
// relocations that reference symbols and set the link and info fields for
// relocation sections.
- for (auto &Sec : Obj.sections()) {
+ for (SectionBase &Sec : Obj.sections()) {
if (&Sec == Obj.SymbolTable)
continue;
- Sec.initialize(Obj.sections());
+ if (Error Err = Sec.initialize(Obj.sections()))
+ return Err;
if (auto RelSec = dyn_cast<RelocationSection>(&Sec)) {
- auto Shdr = unwrapOrError(ElfFile.sections()).begin() + RelSec->Index;
- if (RelSec->Type == SHT_REL)
- initRelocations(RelSec, Obj.SymbolTable,
- unwrapOrError(ElfFile.rels(Shdr)));
- else
- initRelocations(RelSec, Obj.SymbolTable,
- unwrapOrError(ElfFile.relas(Shdr)));
+ Expected<typename ELFFile<ELFT>::Elf_Shdr_Range> Sections =
+ ElfFile.sections();
+ if (!Sections)
+ return Sections.takeError();
+
+ const typename ELFFile<ELFT>::Elf_Shdr *Shdr =
+ Sections->begin() + RelSec->Index;
+ if (RelSec->Type == SHT_REL) {
+ Expected<typename ELFFile<ELFT>::Elf_Rel_Range> Rels =
+ ElfFile.rels(*Shdr);
+ if (!Rels)
+ return Rels.takeError();
+
+ if (Error Err = initRelocations(RelSec, Obj.SymbolTable, *Rels))
+ return Err;
+ } else {
+ Expected<typename ELFFile<ELFT>::Elf_Rela_Range> Relas =
+ ElfFile.relas(*Shdr);
+ if (!Relas)
+ return Relas.takeError();
+
+ if (Error Err = initRelocations(RelSec, Obj.SymbolTable, *Relas))
+ return Err;
+ }
} else if (auto GroupSec = dyn_cast<GroupSection>(&Sec)) {
- initGroupSection(GroupSec);
+ if (Error Err = initGroupSection(GroupSec))
+ return Err;
}
}
+
+ return Error::success();
}
-template <class ELFT> void ELFBuilder<ELFT>::build(bool EnsureSymtab) {
- readSectionHeaders();
- findEhdrOffset();
+template <class ELFT> Error ELFBuilder<ELFT>::build(bool EnsureSymtab) {
+ if (Error E = readSectionHeaders())
+ return E;
+ if (Error E = findEhdrOffset())
+ return E;
// The ELFFile whose ELF headers and program headers are copied into the
// output file. Normally the same as ElfFile, but if we're extracting a
// loadable partition it will point to the partition's headers.
- ELFFile<ELFT> HeadersFile = unwrapOrError(ELFFile<ELFT>::create(toStringRef(
- {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset})));
+ Expected<ELFFile<ELFT>> HeadersFile = ELFFile<ELFT>::create(toStringRef(
+ {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset}));
+ if (!HeadersFile)
+ return HeadersFile.takeError();
- auto &Ehdr = *HeadersFile.getHeader();
+ const typename ELFFile<ELFT>::Elf_Ehdr &Ehdr = HeadersFile->getHeader();
Obj.OSABI = Ehdr.e_ident[EI_OSABI];
Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION];
Obj.Type = Ehdr.e_type;
@@ -1631,15 +1895,17 @@ template <class ELFT> void ELFBuilder<ELFT>::build(bool EnsureSymtab) {
Obj.Entry = Ehdr.e_entry;
Obj.Flags = Ehdr.e_flags;
- readSections(EnsureSymtab);
- readProgramHeaders(HeadersFile);
+ if (Error E = readSections(EnsureSymtab))
+ return E;
+ return readProgramHeaders(*HeadersFile);
}
Writer::~Writer() {}
Reader::~Reader() {}
-std::unique_ptr<Object> BinaryReader::create(bool /*EnsureSymtab*/) const {
+Expected<std::unique_ptr<Object>>
+BinaryReader::create(bool /*EnsureSymtab*/) const {
return BinaryELFBuilder(MemBuf, NewSymbolVisibility).build();
}
@@ -1669,31 +1935,39 @@ Expected<std::vector<IHexRecord>> IHexReader::parse() const {
return std::move(Records);
}
-std::unique_ptr<Object> IHexReader::create(bool /*EnsureSymtab*/) const {
- std::vector<IHexRecord> Records = unwrapOrError(parse());
- return IHexELFBuilder(Records).build();
+Expected<std::unique_ptr<Object>>
+IHexReader::create(bool /*EnsureSymtab*/) const {
+ Expected<std::vector<IHexRecord>> Records = parse();
+ if (!Records)
+ return Records.takeError();
+
+ return IHexELFBuilder(*Records).build();
}
-std::unique_ptr<Object> ELFReader::create(bool EnsureSymtab) const {
+Expected<std::unique_ptr<Object>> ELFReader::create(bool EnsureSymtab) const {
auto Obj = std::make_unique<Object>();
if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
ELFBuilder<ELF32LE> Builder(*O, *Obj, ExtractPartition);
- Builder.build(EnsureSymtab);
- return Obj;
+ if (Error Err = Builder.build(EnsureSymtab))
+ return std::move(Err);
+ return std::move(Obj);
} else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
ELFBuilder<ELF64LE> Builder(*O, *Obj, ExtractPartition);
- Builder.build(EnsureSymtab);
- return Obj;
+ if (Error Err = Builder.build(EnsureSymtab))
+ return std::move(Err);
+ return std::move(Obj);
} else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
ELFBuilder<ELF32BE> Builder(*O, *Obj, ExtractPartition);
- Builder.build(EnsureSymtab);
- return Obj;
+ if (Error Err = Builder.build(EnsureSymtab))
+ return std::move(Err);
+ return std::move(Obj);
} else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
ELFBuilder<ELF64BE> Builder(*O, *Obj, ExtractPartition);
- Builder.build(EnsureSymtab);
- return Obj;
+ if (Error Err = Builder.build(EnsureSymtab))
+ return std::move(Err);
+ return std::move(Obj);
}
- error("invalid file type");
+ return createStringError(errc::invalid_argument, "invalid file type");
}
template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
@@ -1787,13 +2061,16 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdrs() {
writeShdr(Sec);
}
-template <class ELFT> void ELFWriter<ELFT>::writeSectionData() {
+template <class ELFT> Error ELFWriter<ELFT>::writeSectionData() {
for (SectionBase &Sec : Obj.sections())
// Segments are responsible for writing their contents, so only write the
// section data if the section is not in a segment. Note that this renders
// sections in segments effectively immutable.
if (Sec.ParentSegment == nullptr)
- Sec.accept(*SecWriter);
+ if (Error Err = Sec.accept(*SecWriter))
+ return Err;
+
+ return Error::success();
}
template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() {
@@ -1820,8 +2097,8 @@ ELFWriter<ELFT>::ELFWriter(Object &Obj, Buffer &Buf, bool WSH,
: Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs),
OnlyKeepDebug(OnlyKeepDebug) {}
-Error Object::removeSections(bool AllowBrokenLinks,
- std::function<bool(const SectionBase &)> ToRemove) {
+Error Object::removeSections(
+ bool AllowBrokenLinks, std::function<bool(const SectionBase &)> ToRemove) {
auto Iter = std::stable_partition(
std::begin(Sections), std::end(Sections), [=](const SecPtr &Sec) {
@@ -1857,8 +2134,8 @@ Error Object::removeSections(bool AllowBrokenLinks,
// a live section critically depends on a section being removed somehow
// (e.g. the removed section is referenced by a relocation).
for (auto &KeepSec : make_range(std::begin(Sections), Iter)) {
- if (Error E = KeepSec->removeSectionReferences(AllowBrokenLinks,
- [&RemoveSections](const SectionBase *Sec) {
+ if (Error E = KeepSec->removeSectionReferences(
+ AllowBrokenLinks, [&RemoveSections](const SectionBase *Sec) {
return RemoveSections.find(Sec) != RemoveSections.end();
}))
return E;
@@ -1880,7 +2157,7 @@ Error Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
return Error::success();
}
-void Object::addNewSymbolTable() {
+Error Object::addNewSymbolTable() {
assert(!SymbolTable && "Object must not has a SymbolTable.");
// Reuse an existing SHT_STRTAB section if it exists.
@@ -1901,10 +2178,13 @@ void Object::addNewSymbolTable() {
SymbolTableSection &SymTab = addSection<SymbolTableSection>();
SymTab.Name = ".symtab";
SymTab.Link = StrTab->Index;
- SymTab.initialize(sections());
+ if (Error Err = SymTab.initialize(sections()))
+ return Err;
SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0);
SymbolTable = &SymTab;
+
+ return Error::success();
}
void Object::sortSections() {
@@ -2025,12 +2305,19 @@ static uint64_t layoutSectionsForOnlyKeepDebug(Object &Obj, uint64_t Off) {
return Off;
}
-// Rewrite p_offset and p_filesz of non-empty non-PT_PHDR segments after
-// sh_offset values have been updated.
+// Rewrite p_offset and p_filesz of non-PT_PHDR segments after sh_offset values
+// have been updated.
static uint64_t layoutSegmentsForOnlyKeepDebug(std::vector<Segment *> &Segments,
uint64_t HdrEnd) {
uint64_t MaxOffset = 0;
for (Segment *Seg : Segments) {
+ // An empty segment contains no section (see sectionWithinSegment). If it
+ // has a parent segment, copy the parent segment's offset field. This works
+ // for empty PT_TLS. We don't handle empty segments without a parent for
+ // now.
+ if (Seg->ParentSegment != nullptr && Seg->MemSize == 0)
+ Seg->Offset = Seg->ParentSegment->Offset;
+
const SectionBase *FirstSec = Seg->firstSection();
if (Seg->Type == PT_PHDR || !FirstSec)
continue;
@@ -2118,7 +2405,8 @@ template <class ELFT> Error ELFWriter<ELFT>::write() {
writeSegmentData();
writeEhdr();
writePhdrs();
- writeSectionData();
+ if (Error E = writeSectionData())
+ return E;
if (WriteSectionHeaders)
writeShdrs();
return Buf.commit();
@@ -2163,8 +2451,8 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
if (Obj.sections().size() >= SHN_LORESERVE) {
SectionTableRef Sections = Obj.sections();
NeedsLargeIndexes =
- std::any_of(Sections.begin() + SHN_LORESERVE, Sections.end(),
- [](const SectionBase &Sec) { return Sec.HasSymbol; });
+ any_of(drop_begin(Sections, SHN_LORESERVE),
+ [](const SectionBase &Sec) { return Sec.HasSymbol; });
// TODO: handle case where only one section needs the large index table but
// only needs it because the large index table hasn't been removed yet.
}
@@ -2207,7 +2495,8 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
auto SecSizer = std::make_unique<ELFSectionSizer<ELFT>>();
for (SectionBase &Sec : Obj.sections()) {
Sec.Index = Index++;
- Sec.accept(*SecSizer);
+ if (Error Err = Sec.accept(*SecSizer))
+ return Err;
}
// The symbol table does not update all other sections on update. For
@@ -2248,7 +2537,9 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
Error BinaryWriter::write() {
for (const SectionBase &Sec : Obj.allocSections())
- Sec.accept(*SecWriter);
+ if (Error Err = Sec.accept(*SecWriter))
+ return Err;
+
return Buf.commit();
}
@@ -2320,7 +2611,8 @@ Error IHexWriter::write() {
IHexSectionWriter Writer(Buf);
// Write sections.
for (const SectionBase *Sec : Sections)
- Sec->accept(Writer);
+ if (Error Err = Sec->accept(Writer))
+ return Err;
uint64_t Offset = Writer.getBufferOffset();
// Write entry point address.
@@ -2336,8 +2628,8 @@ Error IHexWriter::checkSection(const SectionBase &Sec) {
if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1))
return createStringError(
errc::invalid_argument,
- "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit", Sec.Name.c_str(),
- Addr, Addr + Sec.Size - 1);
+ "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit",
+ Sec.Name.c_str(), Addr, Addr + Sec.Size - 1);
return Error::success();
}
@@ -2374,7 +2666,8 @@ Error IHexWriter::finalize() {
IHexSectionWriterBase LengthCalc(Buf);
for (const SectionBase *Sec : Sections)
- Sec->accept(LengthCalc);
+ if (Error Err = Sec->accept(LengthCalc))
+ return Err;
// We need space to write section records + StartAddress record
// (if start adress is not zero) + EndOfFile record.
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h
index ed89e916b838..0205c2d4f398 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h
@@ -61,10 +61,11 @@ public:
iterator end() const { return iterator(Sections.data() + Sections.size()); }
size_t size() const { return Sections.size(); }
- SectionBase *getSection(uint32_t Index, Twine ErrMsg);
+ Expected<SectionBase *> getSection(uint32_t Index, Twine ErrMsg);
template <class T>
- T *getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg);
+ Expected<T *> getSectionOfType(uint32_t Index, Twine IndexErrMsg,
+ Twine TypeErrMsg);
};
enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE };
@@ -73,34 +74,34 @@ class SectionVisitor {
public:
virtual ~SectionVisitor() = default;
- virtual void visit(const Section &Sec) = 0;
- virtual void visit(const OwnedDataSection &Sec) = 0;
- virtual void visit(const StringTableSection &Sec) = 0;
- virtual void visit(const SymbolTableSection &Sec) = 0;
- virtual void visit(const RelocationSection &Sec) = 0;
- virtual void visit(const DynamicRelocationSection &Sec) = 0;
- virtual void visit(const GnuDebugLinkSection &Sec) = 0;
- virtual void visit(const GroupSection &Sec) = 0;
- virtual void visit(const SectionIndexSection &Sec) = 0;
- virtual void visit(const CompressedSection &Sec) = 0;
- virtual void visit(const DecompressedSection &Sec) = 0;
+ virtual Error visit(const Section &Sec) = 0;
+ virtual Error visit(const OwnedDataSection &Sec) = 0;
+ virtual Error visit(const StringTableSection &Sec) = 0;
+ virtual Error visit(const SymbolTableSection &Sec) = 0;
+ virtual Error visit(const RelocationSection &Sec) = 0;
+ virtual Error visit(const DynamicRelocationSection &Sec) = 0;
+ virtual Error visit(const GnuDebugLinkSection &Sec) = 0;
+ virtual Error visit(const GroupSection &Sec) = 0;
+ virtual Error visit(const SectionIndexSection &Sec) = 0;
+ virtual Error visit(const CompressedSection &Sec) = 0;
+ virtual Error visit(const DecompressedSection &Sec) = 0;
};
class MutableSectionVisitor {
public:
virtual ~MutableSectionVisitor() = default;
- virtual void visit(Section &Sec) = 0;
- virtual void visit(OwnedDataSection &Sec) = 0;
- virtual void visit(StringTableSection &Sec) = 0;
- virtual void visit(SymbolTableSection &Sec) = 0;
- virtual void visit(RelocationSection &Sec) = 0;
- virtual void visit(DynamicRelocationSection &Sec) = 0;
- virtual void visit(GnuDebugLinkSection &Sec) = 0;
- virtual void visit(GroupSection &Sec) = 0;
- virtual void visit(SectionIndexSection &Sec) = 0;
- virtual void visit(CompressedSection &Sec) = 0;
- virtual void visit(DecompressedSection &Sec) = 0;
+ virtual Error visit(Section &Sec) = 0;
+ virtual Error visit(OwnedDataSection &Sec) = 0;
+ virtual Error visit(StringTableSection &Sec) = 0;
+ virtual Error visit(SymbolTableSection &Sec) = 0;
+ virtual Error visit(RelocationSection &Sec) = 0;
+ virtual Error visit(DynamicRelocationSection &Sec) = 0;
+ virtual Error visit(GnuDebugLinkSection &Sec) = 0;
+ virtual Error visit(GroupSection &Sec) = 0;
+ virtual Error visit(SectionIndexSection &Sec) = 0;
+ virtual Error visit(CompressedSection &Sec) = 0;
+ virtual Error visit(DecompressedSection &Sec) = 0;
};
class SectionWriter : public SectionVisitor {
@@ -110,17 +111,17 @@ protected:
public:
virtual ~SectionWriter() = default;
- void visit(const Section &Sec) override;
- void visit(const OwnedDataSection &Sec) override;
- void visit(const StringTableSection &Sec) override;
- void visit(const DynamicRelocationSection &Sec) override;
- virtual void visit(const SymbolTableSection &Sec) override = 0;
- virtual void visit(const RelocationSection &Sec) override = 0;
- virtual void visit(const GnuDebugLinkSection &Sec) override = 0;
- virtual void visit(const GroupSection &Sec) override = 0;
- virtual void visit(const SectionIndexSection &Sec) override = 0;
- virtual void visit(const CompressedSection &Sec) override = 0;
- virtual void visit(const DecompressedSection &Sec) override = 0;
+ Error visit(const Section &Sec) override;
+ Error visit(const OwnedDataSection &Sec) override;
+ Error visit(const StringTableSection &Sec) override;
+ Error visit(const DynamicRelocationSection &Sec) override;
+ virtual Error visit(const SymbolTableSection &Sec) override = 0;
+ virtual Error visit(const RelocationSection &Sec) override = 0;
+ virtual Error visit(const GnuDebugLinkSection &Sec) override = 0;
+ virtual Error visit(const GroupSection &Sec) override = 0;
+ virtual Error visit(const SectionIndexSection &Sec) override = 0;
+ virtual Error visit(const CompressedSection &Sec) override = 0;
+ virtual Error visit(const DecompressedSection &Sec) override = 0;
explicit SectionWriter(Buffer &Buf) : Out(Buf) {}
};
@@ -134,13 +135,13 @@ private:
public:
virtual ~ELFSectionWriter() {}
- void visit(const SymbolTableSection &Sec) override;
- void visit(const RelocationSection &Sec) override;
- void visit(const GnuDebugLinkSection &Sec) override;
- void visit(const GroupSection &Sec) override;
- void visit(const SectionIndexSection &Sec) override;
- void visit(const CompressedSection &Sec) override;
- void visit(const DecompressedSection &Sec) override;
+ Error visit(const SymbolTableSection &Sec) override;
+ Error visit(const RelocationSection &Sec) override;
+ Error visit(const GnuDebugLinkSection &Sec) override;
+ Error visit(const GroupSection &Sec) override;
+ Error visit(const SectionIndexSection &Sec) override;
+ Error visit(const CompressedSection &Sec) override;
+ Error visit(const DecompressedSection &Sec) override;
explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
};
@@ -154,17 +155,17 @@ private:
using Elf_Xword = typename ELFT::Xword;
public:
- void visit(Section &Sec) override;
- void visit(OwnedDataSection &Sec) override;
- void visit(StringTableSection &Sec) override;
- void visit(DynamicRelocationSection &Sec) override;
- void visit(SymbolTableSection &Sec) override;
- void visit(RelocationSection &Sec) override;
- void visit(GnuDebugLinkSection &Sec) override;
- void visit(GroupSection &Sec) override;
- void visit(SectionIndexSection &Sec) override;
- void visit(CompressedSection &Sec) override;
- void visit(DecompressedSection &Sec) override;
+ Error visit(Section &Sec) override;
+ Error visit(OwnedDataSection &Sec) override;
+ Error visit(StringTableSection &Sec) override;
+ Error visit(DynamicRelocationSection &Sec) override;
+ Error visit(SymbolTableSection &Sec) override;
+ Error visit(RelocationSection &Sec) override;
+ Error visit(GnuDebugLinkSection &Sec) override;
+ Error visit(GroupSection &Sec) override;
+ Error visit(SectionIndexSection &Sec) override;
+ Error visit(CompressedSection &Sec) override;
+ Error visit(DecompressedSection &Sec) override;
};
#define MAKE_SEC_WRITER_FRIEND \
@@ -178,13 +179,13 @@ class BinarySectionWriter : public SectionWriter {
public:
virtual ~BinarySectionWriter() {}
- void visit(const SymbolTableSection &Sec) override;
- void visit(const RelocationSection &Sec) override;
- void visit(const GnuDebugLinkSection &Sec) override;
- void visit(const GroupSection &Sec) override;
- void visit(const SectionIndexSection &Sec) override;
- void visit(const CompressedSection &Sec) override;
- void visit(const DecompressedSection &Sec) override;
+ Error visit(const SymbolTableSection &Sec) override;
+ Error visit(const RelocationSection &Sec) override;
+ Error visit(const GnuDebugLinkSection &Sec) override;
+ Error visit(const GroupSection &Sec) override;
+ Error visit(const SectionIndexSection &Sec) override;
+ Error visit(const CompressedSection &Sec) override;
+ Error visit(const DecompressedSection &Sec) override;
explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
};
@@ -285,10 +286,10 @@ public:
explicit IHexSectionWriterBase(Buffer &Buf) : BinarySectionWriter(Buf) {}
uint64_t getBufferOffset() const { return Offset; }
- void visit(const Section &Sec) final;
- void visit(const OwnedDataSection &Sec) final;
- void visit(const StringTableSection &Sec) override;
- void visit(const DynamicRelocationSection &Sec) final;
+ Error visit(const Section &Sec) final;
+ Error visit(const OwnedDataSection &Sec) final;
+ Error visit(const StringTableSection &Sec) override;
+ Error visit(const DynamicRelocationSection &Sec) final;
using BinarySectionWriter::visit;
};
@@ -298,7 +299,7 @@ public:
IHexSectionWriter(Buffer &Buf) : IHexSectionWriterBase(Buf) {}
void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override;
- void visit(const StringTableSection &Sec) override;
+ Error visit(const StringTableSection &Sec) override;
};
class Writer {
@@ -329,7 +330,7 @@ private:
void writePhdrs();
void writeShdrs();
- void writeSectionData();
+ Error writeSectionData();
void writeSegmentData();
void assignOffsets();
@@ -389,8 +390,8 @@ public:
Segment *ParentSegment = nullptr;
uint64_t HeaderOffset = 0;
uint32_t Index = 0;
- bool HasSymbol = false;
+ uint32_t OriginalIndex = 0;
uint64_t OriginalFlags = 0;
uint64_t OriginalType = ELF::SHT_NULL;
uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
@@ -406,21 +407,22 @@ public:
uint64_t Size = 0;
uint64_t Type = ELF::SHT_NULL;
ArrayRef<uint8_t> OriginalData;
+ bool HasSymbol = false;
SectionBase() = default;
SectionBase(const SectionBase &) = default;
virtual ~SectionBase() = default;
- virtual void initialize(SectionTableRef SecTable);
+ virtual Error initialize(SectionTableRef SecTable);
virtual void finalize();
// Remove references to these sections. The list of sections must be sorted.
virtual Error
removeSectionReferences(bool AllowBrokenLinks,
function_ref<bool(const SectionBase *)> ToRemove);
virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
- virtual void accept(SectionVisitor &Visitor) const = 0;
- virtual void accept(MutableSectionVisitor &Visitor) = 0;
+ virtual Error accept(SectionVisitor &Visitor) const = 0;
+ virtual Error accept(MutableSectionVisitor &Visitor) = 0;
virtual void markSymbols();
virtual void
replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &);
@@ -434,9 +436,9 @@ private:
bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const {
// Some sections might have the same address if one of them is empty. To
// fix this we can use the lexicographic ordering on ->Addr and the
- // address of the actully stored section.
+ // original index.
if (Lhs->OriginalOffset == Rhs->OriginalOffset)
- return Lhs < Rhs;
+ return Lhs->OriginalIndex < Rhs->OriginalIndex;
return Lhs->OriginalOffset < Rhs->OriginalOffset;
}
};
@@ -481,11 +483,12 @@ class Section : public SectionBase {
public:
explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {}
- void accept(SectionVisitor &Visitor) const override;
- void accept(MutableSectionVisitor &Visitor) override;
- Error removeSectionReferences(bool AllowBrokenLinks,
+ Error accept(SectionVisitor &Visitor) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
+ Error removeSectionReferences(
+ bool AllowBrokenLinks,
function_ref<bool(const SectionBase *)> ToRemove) override;
- void initialize(SectionTableRef SecTable) override;
+ Error initialize(SectionTableRef SecTable) override;
void finalize() override;
};
@@ -513,8 +516,8 @@ public:
}
void appendHexData(StringRef HexData);
- void accept(SectionVisitor &Sec) const override;
- void accept(MutableSectionVisitor &Visitor) override;
+ Error accept(SectionVisitor &Sec) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
};
class CompressedSection : public SectionBase {
@@ -526,21 +529,28 @@ class CompressedSection : public SectionBase {
SmallVector<char, 128> CompressedData;
public:
- CompressedSection(const SectionBase &Sec,
- DebugCompressionType CompressionType);
- CompressedSection(ArrayRef<uint8_t> CompressedData, uint64_t DecompressedSize,
- uint64_t DecompressedAlign);
+ static Expected<CompressedSection>
+ create(const SectionBase &Sec, DebugCompressionType CompressionType);
+ static Expected<CompressedSection> create(ArrayRef<uint8_t> CompressedData,
+ uint64_t DecompressedSize,
+ uint64_t DecompressedAlign);
uint64_t getDecompressedSize() const { return DecompressedSize; }
uint64_t getDecompressedAlign() const { return DecompressedAlign; }
- void accept(SectionVisitor &Visitor) const override;
- void accept(MutableSectionVisitor &Visitor) override;
+ Error accept(SectionVisitor &Visitor) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
static bool classof(const SectionBase *S) {
return (S->OriginalFlags & ELF::SHF_COMPRESSED) ||
(StringRef(S->Name).startswith(".zdebug"));
}
+
+private:
+ CompressedSection(const SectionBase &Sec,
+ DebugCompressionType CompressionType, Error &Err);
+ CompressedSection(ArrayRef<uint8_t> CompressedData, uint64_t DecompressedSize,
+ uint64_t DecompressedAlign);
};
class DecompressedSection : public SectionBase {
@@ -556,8 +566,8 @@ public:
Name = "." + Name.substr(2);
}
- void accept(SectionVisitor &Visitor) const override;
- void accept(MutableSectionVisitor &Visitor) override;
+ Error accept(SectionVisitor &Visitor) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
};
// There are two types of string tables that can exist, dynamic and not dynamic.
@@ -581,8 +591,8 @@ public:
void addString(StringRef Name);
uint32_t findIndex(StringRef Name) const;
void prepareForLayout();
- void accept(SectionVisitor &Visitor) const override;
- void accept(MutableSectionVisitor &Visitor) override;
+ Error accept(SectionVisitor &Visitor) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
static bool classof(const SectionBase *S) {
if (S->OriginalFlags & ELF::SHF_ALLOC)
@@ -639,18 +649,18 @@ public:
virtual ~SectionIndexSection() {}
void addIndex(uint32_t Index) {
assert(Size > 0);
- Indexes.push_back(Index);
+ Indexes.push_back(Index);
}
void reserve(size_t NumSymbols) {
Indexes.reserve(NumSymbols);
Size = NumSymbols * 4;
- }
+ }
void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; }
- void initialize(SectionTableRef SecTable) override;
+ Error initialize(SectionTableRef SecTable) override;
void finalize() override;
- void accept(SectionVisitor &Visitor) const override;
- void accept(MutableSectionVisitor &Visitor) override;
+ Error accept(SectionVisitor &Visitor) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
SectionIndexSection() {
Name = ".symtab_shndx";
@@ -688,16 +698,17 @@ public:
const SectionIndexSection *getShndxTable() const { return SectionIndexTable; }
void fillShndxTable();
const SectionBase *getStrTab() const { return SymbolNames; }
- const Symbol *getSymbolByIndex(uint32_t Index) const;
- Symbol *getSymbolByIndex(uint32_t Index);
+ Expected<const Symbol *> getSymbolByIndex(uint32_t Index) const;
+ Expected<Symbol *> getSymbolByIndex(uint32_t Index);
void updateSymbols(function_ref<void(Symbol &)> Callable);
- Error removeSectionReferences(bool AllowBrokenLinks,
+ Error removeSectionReferences(
+ bool AllowBrokenLinks,
function_ref<bool(const SectionBase *)> ToRemove) override;
- void initialize(SectionTableRef SecTable) override;
+ Error initialize(SectionTableRef SecTable) override;
void finalize() override;
- void accept(SectionVisitor &Visitor) const override;
- void accept(MutableSectionVisitor &Visitor) override;
+ Error accept(SectionVisitor &Visitor) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
void replaceSectionReferences(
const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
@@ -748,7 +759,7 @@ protected:
SymTabType *Symbols = nullptr;
public:
- void initialize(SectionTableRef SecTable) override;
+ Error initialize(SectionTableRef SecTable) override;
void finalize() override;
};
@@ -760,9 +771,10 @@ class RelocationSection
public:
void addRelocation(Relocation Rel) { Relocations.push_back(Rel); }
- void accept(SectionVisitor &Visitor) const override;
- void accept(MutableSectionVisitor &Visitor) override;
- Error removeSectionReferences(bool AllowBrokenLinks,
+ Error accept(SectionVisitor &Visitor) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
+ Error removeSectionReferences(
+ bool AllowBrokenLinks,
function_ref<bool(const SectionBase *)> ToRemove) override;
Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
void markSymbols() override;
@@ -798,8 +810,8 @@ public:
void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; }
void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); }
- void accept(SectionVisitor &) const override;
- void accept(MutableSectionVisitor &Visitor) override;
+ Error accept(SectionVisitor &) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
void finalize() override;
Error removeSectionReferences(
bool AllowBrokenLinks,
@@ -843,8 +855,8 @@ private:
public:
explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
- void accept(SectionVisitor &) const override;
- void accept(MutableSectionVisitor &Visitor) override;
+ Error accept(SectionVisitor &) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
Error removeSectionReferences(
bool AllowBrokenLinks,
function_ref<bool(const SectionBase *)> ToRemove) override;
@@ -868,14 +880,14 @@ private:
public:
// If we add this section from an external source we can use this ctor.
explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC);
- void accept(SectionVisitor &Visitor) const override;
- void accept(MutableSectionVisitor &Visitor) override;
+ Error accept(SectionVisitor &Visitor) const override;
+ Error accept(MutableSectionVisitor &Visitor) override;
};
class Reader {
public:
virtual ~Reader();
- virtual std::unique_ptr<Object> create(bool EnsureSymtab) const = 0;
+ virtual Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const = 0;
};
using object::Binary;
@@ -891,7 +903,7 @@ protected:
void initHeaderSegment();
StringTableSection *addStrTab();
SymbolTableSection *addSymTab(StringTableSection *StrTab);
- void initSections();
+ Error initSections();
public:
BasicELFBuilder() : Obj(std::make_unique<Object>()) {}
@@ -907,7 +919,7 @@ public:
: BasicELFBuilder(), MemBuf(MB),
NewSymbolVisibility(NewSymbolVisibility) {}
- std::unique_ptr<Object> build();
+ Expected<std::unique_ptr<Object>> build();
};
class IHexELFBuilder : public BasicELFBuilder {
@@ -919,7 +931,7 @@ public:
IHexELFBuilder(const std::vector<IHexRecord> &Records)
: BasicELFBuilder(), Records(Records) {}
- std::unique_ptr<Object> build();
+ Expected<std::unique_ptr<Object>> build();
};
template <class ELFT> class ELFBuilder {
@@ -934,21 +946,21 @@ private:
Optional<StringRef> ExtractPartition;
void setParentSegment(Segment &Child);
- void readProgramHeaders(const ELFFile<ELFT> &HeadersFile);
- void initGroupSection(GroupSection *GroupSec);
- void initSymbolTable(SymbolTableSection *SymTab);
- void readSectionHeaders();
- void readSections(bool EnsureSymtab);
- void findEhdrOffset();
- SectionBase &makeSection(const Elf_Shdr &Shdr);
+ Error readProgramHeaders(const ELFFile<ELFT> &HeadersFile);
+ Error initGroupSection(GroupSection *GroupSec);
+ Error initSymbolTable(SymbolTableSection *SymTab);
+ Error readSectionHeaders();
+ Error readSections(bool EnsureSymtab);
+ Error findEhdrOffset();
+ Expected<SectionBase &> makeSection(const Elf_Shdr &Shdr);
public:
ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj,
Optional<StringRef> ExtractPartition)
- : ElfFile(*ElfObj.getELFFile()), Obj(Obj),
+ : ElfFile(ElfObj.getELFFile()), Obj(Obj),
ExtractPartition(ExtractPartition) {}
- void build(bool EnsureSymtab);
+ Error build(bool EnsureSymtab);
};
class BinaryReader : public Reader {
@@ -958,7 +970,7 @@ class BinaryReader : public Reader {
public:
BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility)
: MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {}
- std::unique_ptr<Object> create(bool EnsureSymtab) const override;
+ Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
};
class IHexReader : public Reader {
@@ -980,7 +992,7 @@ class IHexReader : public Reader {
public:
IHexReader(MemoryBuffer *MB) : MemBuf(MB) {}
- std::unique_ptr<Object> create(bool EnsureSymtab) const override;
+ Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
};
class ELFReader : public Reader {
@@ -988,7 +1000,7 @@ class ELFReader : public Reader {
Optional<StringRef> ExtractPartition;
public:
- std::unique_ptr<Object> create(bool EnsureSymtab) const override;
+ Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
explicit ELFReader(Binary *B, Optional<StringRef> ExtractPartition)
: Bin(B), ExtractPartition(ExtractPartition) {}
};
@@ -1072,7 +1084,7 @@ public:
Ptr->Index = Sections.size();
return *Ptr;
}
- void addNewSymbolTable();
+ Error addNewSymbolTable();
Segment &addSegment(ArrayRef<uint8_t> Data) {
Segments.emplace_back(std::make_unique<Segment>(Data));
return *Segments.back();
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/InstallNameToolOpts.td b/contrib/llvm-project/llvm/tools/llvm-objcopy/InstallNameToolOpts.td
index 04ffe62c42fc..88dea84400fb 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/InstallNameToolOpts.td
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/InstallNameToolOpts.td
@@ -18,9 +18,15 @@ def h : Flag<["-"], "h">, Alias<help>;
def add_rpath : Option<["-", "--"], "add_rpath", KIND_SEPARATE>,
HelpText<"Add new rpath">;
+def prepend_rpath : Option<["-", "--"], "prepend_rpath", KIND_SEPARATE>,
+ HelpText<"Add new rpath before other rpaths">;
+
def delete_rpath: Option<["-", "--"], "delete_rpath", KIND_SEPARATE>,
HelpText<"Delete specified rpath">;
+def delete_all_rpaths: Flag<["-", "--"], "delete_all_rpaths">,
+ HelpText<"Delete all rpath directives">;
+
def rpath: MultiArg<["-", "--"], "rpath", 2>,
HelpText<"Change rpath path name">;
@@ -32,3 +38,7 @@ def change: MultiArg<["-", "--"], "change", 2>,
def version : Flag<["--"], "version">,
HelpText<"Print the version and exit.">;
+
+def V : Flag<["-"], "V">,
+ Alias<version>,
+ HelpText<"Alias for --version">;
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
index 256c830a44a4..8e2bf36238ec 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
@@ -15,6 +15,14 @@ namespace llvm {
namespace objcopy {
namespace macho {
+StringTableBuilder::Kind
+MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
+ if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
+ return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO;
+ return Is64Bit ? StringTableBuilder::MachO64Linked
+ : StringTableBuilder::MachOLinked;
+}
+
uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
uint32_t Size = 0;
for (const LoadCommand &LC : O.LoadCommands) {
@@ -148,7 +156,7 @@ uint64_t MachOLayoutBuilder::layoutSegments() {
"Section's address cannot be smaller than Segment's one");
uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
if (IsObjectFile) {
- if (Sec->isVirtualSection()) {
+ if (!Sec->hasValidOffset()) {
Sec->Offset = 0;
} else {
uint64_t PaddingSize =
@@ -158,7 +166,7 @@ uint64_t MachOLayoutBuilder::layoutSegments() {
SegFileSize += PaddingSize + Sec->Size;
}
} else {
- if (Sec->isVirtualSection()) {
+ if (!Sec->hasValidOffset()) {
Sec->Offset = 0;
} else {
Sec->Offset = SegOffset + SectOffset;
@@ -252,6 +260,8 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
uint64_t StartOfCodeSignature =
StartOfSymbolStrings + StrTableBuilder.getSize();
+ if (O.CodeSignatureCommandIndex)
+ StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
uint64_t LinkEditSize =
(StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit;
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
index 21cbe56605de..5fe6683e27f3 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
@@ -23,7 +23,7 @@ class MachOLayoutBuilder {
// Points to the __LINKEDIT segment if it exists.
MachO::macho_load_command *LinkEditLoadCommand = nullptr;
- StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
+ StringTableBuilder StrTableBuilder;
uint32_t computeSizeOfCmds() const;
void constructStringTable();
@@ -33,9 +33,13 @@ class MachOLayoutBuilder {
uint64_t layoutRelocations(uint64_t Offset);
Error layoutTail(uint64_t Offset);
+ static StringTableBuilder::Kind getStringTableBuilderKind(const Object &O,
+ bool Is64Bit);
+
public:
MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize)
- : O(O), Is64Bit(Is64Bit), PageSize(PageSize) {}
+ : O(O), Is64Bit(Is64Bit), PageSize(PageSize),
+ StrTableBuilder(getStringTableBuilderKind(O, Is64Bit)) {}
// Recomputes and updates fields in the given object such as file offsets.
Error layout();
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 5ca5b133572b..fef4a0ae5594 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -8,9 +8,13 @@
#include "MachOObjcopy.h"
#include "../CopyConfig.h"
+#include "../llvm-objcopy.h"
#include "MachOReader.h"
#include "MachOWriter.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/Object/ArchiveWriter.h"
+#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Object/MachOUniversalWriter.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
@@ -133,8 +137,14 @@ static Error processLoadCommands(const CopyConfig &Config, Object &Obj) {
DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(),
Config.RPathsToRemove.end());
- LoadCommandPred RemovePred = [&RPathsToRemove](const LoadCommand &LC) {
+ LoadCommandPred RemovePred = [&RPathsToRemove,
+ &Config](const LoadCommand &LC) {
if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
+ // When removing all RPaths we don't need to care
+ // about what it contains
+ if (Config.RemoveAllRpaths)
+ return true;
+
StringRef RPath = getPayloadString(LC);
if (RPathsToRemove.count(RPath)) {
RPathsToRemove.erase(RPath);
@@ -168,26 +178,22 @@ static Error processLoadCommands(const CopyConfig &Config, Object &Obj) {
for (const auto &OldNew : Config.RPathsToUpdate) {
StringRef Old = OldNew.getFirst();
StringRef New = OldNew.getSecond();
- if (RPaths.count(Old) == 0)
+ if (!RPaths.contains(Old))
return createStringError(errc::invalid_argument,
"no LC_RPATH load command with path: " + Old);
- if (RPaths.count(New) != 0)
+ if (RPaths.contains(New))
return createStringError(errc::invalid_argument,
- "rpath " + New +
- " would create a duplicate load command");
+ "rpath '" + New +
+ "' would create a duplicate load command");
}
// Update load commands.
for (LoadCommand &LC : Obj.LoadCommands) {
switch (LC.MachOLoadCommand.load_command_data.cmd) {
case MachO::LC_ID_DYLIB:
- if (Config.SharedLibId) {
- StringRef Id = Config.SharedLibId.getValue();
- if (Id.empty())
- return createStringError(errc::invalid_argument,
- "cannot specify an empty id");
- updateLoadCommandPayloadString<MachO::dylib_command>(LC, Id);
- }
+ if (Config.SharedLibId)
+ updateLoadCommandPayloadString<MachO::dylib_command>(
+ LC, *Config.SharedLibId);
break;
case MachO::LC_RPATH: {
@@ -214,14 +220,30 @@ static Error processLoadCommands(const CopyConfig &Config, Object &Obj) {
// Add new RPaths.
for (StringRef RPath : Config.RPathToAdd) {
- if (RPaths.count(RPath) != 0)
+ if (RPaths.contains(RPath))
+ return createStringError(errc::invalid_argument,
+ "rpath '" + RPath +
+ "' would create a duplicate load command");
+ RPaths.insert(RPath);
+ Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
+ }
+
+ for (StringRef RPath : Config.RPathToPrepend) {
+ if (RPaths.contains(RPath))
return createStringError(errc::invalid_argument,
- "rpath " + RPath +
- " would create a duplicate load command");
+ "rpath '" + RPath +
+ "' would create a duplicate load command");
+
RPaths.insert(RPath);
- Obj.addLoadCommand(buildRPathLoadCommand(RPath));
+ Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
+ buildRPathLoadCommand(RPath));
}
+ // Unlike appending rpaths, the indexes of subsequent load commands must
+ // be recalculated after prepending one.
+ if (!Config.RPathToPrepend.empty())
+ Obj.updateLoadCommandIndexes();
+
return Error::success();
}
@@ -258,27 +280,34 @@ static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) {
StringRef TargetSegName = Pair.first;
Section Sec(TargetSegName, Pair.second);
Sec.Content = Obj.NewSectionsContents.save(Buf->getBuffer());
+ Sec.Size = Sec.Content.size();
// Add the a section into an existing segment.
for (LoadCommand &LC : Obj.LoadCommands) {
Optional<StringRef> SegName = LC.getSegmentName();
if (SegName && SegName == TargetSegName) {
+ uint64_t Addr = *LC.getSegmentVMAddr();
+ for (const std::unique_ptr<Section> &S : LC.Sections)
+ Addr = std::max(Addr, S->Addr + S->Size);
LC.Sections.push_back(std::make_unique<Section>(Sec));
+ LC.Sections.back()->Addr = Addr;
return Error::success();
}
}
// There's no segment named TargetSegName. Create a new load command and
// Insert a new section into it.
- LoadCommand &NewSegment = Obj.addSegment(TargetSegName);
+ LoadCommand &NewSegment =
+ Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
+ NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
return Error::success();
}
// isValidMachOCannonicalName returns success if Name is a MachO cannonical name
// ("<segment>,<section>") and lengths of both segment and section names are
// valid.
-Error isValidMachOCannonicalName(StringRef Name) {
+static Error isValidMachOCannonicalName(StringRef Name) {
if (Name.count(',') != 1)
return createStringError(errc::invalid_argument,
"invalid section name '%s' (should be formatted "
@@ -311,8 +340,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
Config.ExtractDWO || Config.LocalizeHidden || Config.PreserveDates ||
Config.StripAllGNU || Config.StripDWO || Config.StripNonAlloc ||
Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
- Config.StripNonAlloc || Config.StripSections || Config.StripUnneeded ||
- Config.DiscardMode == DiscardType::Locals ||
+ Config.StripUnneeded || Config.DiscardMode == DiscardType::Locals ||
!Config.SymbolsToAdd.empty() || Config.EntryExpr) {
return createStringError(llvm::errc::invalid_argument,
"option not supported by llvm-objcopy for MachO");
@@ -360,14 +388,11 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
Error executeObjcopyOnBinary(const CopyConfig &Config,
object::MachOObjectFile &In, Buffer &Out) {
MachOReader Reader(In);
- std::unique_ptr<Object> O = Reader.create();
+ Expected<std::unique_ptr<Object>> O = Reader.create();
if (!O)
- return createFileError(
- Config.InputFilename,
- createStringError(object_error::parse_failed,
- "unable to deserialize MachO object"));
+ return createFileError(Config.InputFilename, O.takeError());
- if (Error E = handleArgs(Config, *O))
+ if (Error E = handleArgs(Config, **O))
return createFileError(Config.InputFilename, std::move(E));
// Page size used for alignment of segment sizes in Mach-O executables and
@@ -383,12 +408,81 @@ Error executeObjcopyOnBinary(const CopyConfig &Config,
PageSize = 4096;
}
- MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
+ MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
if (auto E = Writer.finalize())
return E;
return Writer.write();
}
+Error executeObjcopyOnMachOUniversalBinary(CopyConfig &Config,
+ const MachOUniversalBinary &In,
+ Buffer &Out) {
+ SmallVector<OwningBinary<Binary>, 2> Binaries;
+ SmallVector<Slice, 2> Slices;
+ for (const auto &O : In.objects()) {
+ Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
+ if (ArOrErr) {
+ Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
+ createNewArchiveMembers(Config, **ArOrErr);
+ if (!NewArchiveMembersOrErr)
+ return NewArchiveMembersOrErr.takeError();
+ Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
+ writeArchiveToBuffer(*NewArchiveMembersOrErr,
+ (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(),
+ Config.DeterministicArchives,
+ (*ArOrErr)->isThin());
+ if (!OutputBufferOrErr)
+ return OutputBufferOrErr.takeError();
+ Expected<std::unique_ptr<Binary>> BinaryOrErr =
+ object::createBinary(**OutputBufferOrErr);
+ if (!BinaryOrErr)
+ return BinaryOrErr.takeError();
+ Binaries.emplace_back(std::move(*BinaryOrErr),
+ std::move(*OutputBufferOrErr));
+ Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
+ O.getCPUType(), O.getCPUSubType(),
+ O.getArchFlagName(), O.getAlign());
+ continue;
+ }
+ // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
+ // ObjectForArch return an Error in case of the type mismatch. We need to
+ // check each in turn to see what kind of slice this is, so ignore errors
+ // produced along the way.
+ consumeError(ArOrErr.takeError());
+
+ Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
+ if (!ObjOrErr) {
+ consumeError(ObjOrErr.takeError());
+ return createStringError(std::errc::invalid_argument,
+ "slice for '%s' of the universal Mach-O binary "
+ "'%s' is not a Mach-O object or an archive",
+ O.getArchFlagName().c_str(),
+ Config.InputFilename.str().c_str());
+ }
+ std::string ArchFlagName = O.getArchFlagName();
+ MemBuffer MB(ArchFlagName);
+ if (Error E = executeObjcopyOnBinary(Config, **ObjOrErr, MB))
+ return E;
+ std::unique_ptr<WritableMemoryBuffer> OutputBuffer =
+ MB.releaseMemoryBuffer();
+ Expected<std::unique_ptr<Binary>> BinaryOrErr =
+ object::createBinary(*OutputBuffer);
+ if (!BinaryOrErr)
+ return BinaryOrErr.takeError();
+ Binaries.emplace_back(std::move(*BinaryOrErr), std::move(OutputBuffer));
+ Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
+ O.getAlign());
+ }
+ Expected<std::unique_ptr<MemoryBuffer>> B =
+ writeUniversalBinaryToBuffer(Slices);
+ if (!B)
+ return B.takeError();
+ if (Error E = Out.allocate((*B)->getBufferSize()))
+ return E;
+ memcpy(Out.getBufferStart(), (*B)->getBufferStart(), (*B)->getBufferSize());
+ return Out.commit();
+}
+
} // end namespace macho
} // end namespace objcopy
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
index f34e361db7ea..c3f5391f79b6 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
@@ -24,6 +24,10 @@ class Buffer;
namespace macho {
Error executeObjcopyOnBinary(const CopyConfig &Config,
object::MachOObjectFile &In, Buffer &Out);
+
+Error executeObjcopyOnMachOUniversalBinary(
+ CopyConfig &Config, const object::MachOUniversalBinary &In, Buffer &Out);
+
} // end namespace macho
} // end namespace objcopy
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
index 99bcec7f6b51..548a85bd497e 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -7,10 +7,10 @@
//===----------------------------------------------------------------------===//
#include "MachOReader.h"
-#include "../llvm-objcopy.h"
#include "Object.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Object/MachO.h"
+#include "llvm/Support/Errc.h"
#include <memory>
namespace llvm {
@@ -35,7 +35,7 @@ Section constructSectionCommon(SectionType Sec, uint32_t Index) {
S.Index = Index;
S.Addr = Sec.addr;
S.Size = Sec.size;
- S.Offset = Sec.offset;
+ S.OriginalOffset = Sec.offset;
S.Align = Sec.align;
S.RelOff = Sec.reloff;
S.NReloc = Sec.nreloc;
@@ -59,9 +59,8 @@ template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) {
return S;
}
-// TODO: get rid of reportError and make MachOReader return Expected<> instead.
template <typename SectionType, typename SegmentType>
-std::vector<std::unique_ptr<Section>>
+Expected<std::vector<std::unique_ptr<Section>>>
extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
const object::MachOObjectFile &MachOObj,
uint32_t &NextSectionIndex) {
@@ -86,14 +85,15 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
Expected<object::SectionRef> SecRef =
MachOObj.getSection(NextSectionIndex++);
if (!SecRef)
- reportError(MachOObj.getFileName(), SecRef.takeError());
+ return SecRef.takeError();
- if (Expected<ArrayRef<uint8_t>> E =
- MachOObj.getSectionContents(SecRef->getRawDataRefImpl()))
- S.Content =
- StringRef(reinterpret_cast<const char *>(E->data()), E->size());
- else
- reportError(MachOObj.getFileName(), E.takeError());
+ Expected<ArrayRef<uint8_t>> Data =
+ MachOObj.getSectionContents(SecRef->getRawDataRefImpl());
+ if (!Data)
+ return Data.takeError();
+
+ S.Content =
+ StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
S.Relocations.reserve(S.NReloc);
for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
@@ -110,10 +110,10 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
assert(S.NReloc == S.Relocations.size() &&
"Incorrect number of relocations");
}
- return Sections;
+ return std::move(Sections);
}
-void MachOReader::readLoadCommands(Object &O) const {
+Error MachOReader::readLoadCommands(Object &O) const {
// For MachO sections indices start from 1.
uint32_t NextSectionIndex = 1;
for (auto LoadCmd : MachOObj.load_commands()) {
@@ -123,13 +123,20 @@ void MachOReader::readLoadCommands(Object &O) const {
O.CodeSignatureCommandIndex = O.LoadCommands.size();
break;
case MachO::LC_SEGMENT:
- LC.Sections = extractSections<MachO::section, MachO::segment_command>(
- LoadCmd, MachOObj, NextSectionIndex);
+ if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
+ extractSections<MachO::section, MachO::segment_command>(
+ LoadCmd, MachOObj, NextSectionIndex))
+ LC.Sections = std::move(*Sections);
+ else
+ return Sections.takeError();
break;
case MachO::LC_SEGMENT_64:
- LC.Sections =
- extractSections<MachO::section_64, MachO::segment_command_64>(
- LoadCmd, MachOObj, NextSectionIndex);
+ if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
+ extractSections<MachO::section_64, MachO::segment_command_64>(
+ LoadCmd, MachOObj, NextSectionIndex))
+ LC.Sections = std::move(*Sections);
+ else
+ return Sections.takeError();
break;
case MachO::LC_SYMTAB:
O.SymTabCommandIndex = O.LoadCommands.size();
@@ -177,6 +184,7 @@ void MachOReader::readLoadCommands(Object &O) const {
}
O.LoadCommands.push_back(std::move(LC));
}
+ return Error::success();
}
template <typename nlist_t>
@@ -308,10 +316,11 @@ void MachOReader::readSwiftVersion(Object &O) const {
}
}
-std::unique_ptr<Object> MachOReader::create() const {
+Expected<std::unique_ptr<Object>> MachOReader::create() const {
auto Obj = std::make_unique<Object>();
readHeader(*Obj);
- readLoadCommands(*Obj);
+ if (Error E = readLoadCommands(*Obj))
+ return std::move(E);
readSymbolTable(*Obj);
setSymbolInRelocationInfo(*Obj);
readRebaseInfo(*Obj);
@@ -324,7 +333,7 @@ std::unique_ptr<Object> MachOReader::create() const {
readFunctionStartsData(*Obj);
readIndirectSymbolTable(*Obj);
readSwiftVersion(*Obj);
- return Obj;
+ return std::move(Obj);
}
} // end namespace macho
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.h
index 65824b6eb389..b446e02865e5 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.h
@@ -21,14 +21,14 @@ namespace macho {
class Reader {
public:
virtual ~Reader(){};
- virtual std::unique_ptr<Object> create() const = 0;
+ virtual Expected<std::unique_ptr<Object>> create() const = 0;
};
class MachOReader : public Reader {
const object::MachOObjectFile &MachOObj;
void readHeader(Object &O) const;
- void readLoadCommands(Object &O) const;
+ Error readLoadCommands(Object &O) const;
void readSymbolTable(Object &O) const;
void setSymbolInRelocationInfo(Object &O) const;
void readRebaseInfo(Object &O) const;
@@ -46,7 +46,7 @@ class MachOReader : public Reader {
public:
explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {}
- std::unique_ptr<Object> create() const override;
+ Expected<std::unique_ptr<Object>> create() const override;
};
} // end namespace macho
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
index 3c41e73b2b01..56dd08df3b09 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -121,6 +121,14 @@ size_t MachOWriter::totalSize() const {
// Otherwise, use the last section / reloction.
for (const LoadCommand &LC : O.LoadCommands)
for (const std::unique_ptr<Section> &S : LC.Sections) {
+ if (!S->hasValidOffset()) {
+ assert((S->Offset == 0) && "Skipped section's offset must be zero");
+ assert((S->isVirtualSection() || S->Size == 0) &&
+ "Non-zero-fill sections with zero offset must have zero size");
+ continue;
+ }
+ assert((S->Offset != 0) &&
+ "Non-zero-fill section's offset cannot be zero");
Ends.push_back(S->Offset + S->Size);
if (S->RelOff)
Ends.push_back(S->RelOff +
@@ -240,8 +248,12 @@ void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) {
void MachOWriter::writeSections() {
for (const LoadCommand &LC : O.LoadCommands)
for (const std::unique_ptr<Section> &Sec : LC.Sections) {
- if (Sec->isVirtualSection())
+ if (!Sec->hasValidOffset()) {
+ assert((Sec->Offset == 0) && "Skipped section's offset must be zero");
+ assert((Sec->isVirtualSection() || Sec->Size == 0) &&
+ "Non-zero-fill sections with zero offset must have zero size");
continue;
+ }
assert(Sec->Offset && "Section offset can not be zero");
assert((Sec->Size == Sec->Content.size()) && "Incorrect section size");
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp
index de8cb0af108d..cdb97531fb66 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "Object.h"
-#include "../llvm-objcopy.h"
#include "llvm/ADT/SmallPtrSet.h"
#include <unordered_set>
@@ -27,9 +26,7 @@ SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
void SymbolTable::removeSymbols(
function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
- Symbols.erase(
- std::remove_if(std::begin(Symbols), std::end(Symbols), ToRemove),
- std::end(Symbols));
+ llvm::erase_if(Symbols, ToRemove);
}
void Object::updateLoadCommandIndexes() {
@@ -111,28 +108,54 @@ Error Object::removeSections(
return Error::success();
}
-void Object::addLoadCommand(LoadCommand LC) {
- LoadCommands.push_back(std::move(LC));
+uint64_t Object::nextAvailableSegmentAddress() const {
+ uint64_t HeaderSize =
+ is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+ uint64_t Addr = HeaderSize + Header.SizeOfCmds;
+ for (const LoadCommand &LC : LoadCommands) {
+ const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ Addr = std::max(Addr,
+ static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
+ MLC.segment_command_data.vmsize);
+ break;
+ case MachO::LC_SEGMENT_64:
+ Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
+ MLC.segment_command_64_data.vmsize);
+ break;
+ default:
+ continue;
+ }
+ }
+ return Addr;
}
template <typename SegmentType>
-static void constructSegment(SegmentType &Seg,
- llvm::MachO::LoadCommandType CmdType,
- StringRef SegName) {
+static void
+constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
+ StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
memset(&Seg, 0, sizeof(SegmentType));
Seg.cmd = CmdType;
strncpy(Seg.segname, SegName.data(), SegName.size());
+ Seg.maxprot |=
+ (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
+ Seg.initprot |=
+ (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
+ Seg.vmaddr = SegVMAddr;
+ Seg.vmsize = SegVMSize;
}
-LoadCommand &Object::addSegment(StringRef SegName) {
+LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
LoadCommand LC;
+ const uint64_t SegVMAddr = nextAvailableSegmentAddress();
if (is64Bit())
constructSegment(LC.MachOLoadCommand.segment_command_64_data,
- MachO::LC_SEGMENT_64, SegName);
+ MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
else
constructSegment(LC.MachOLoadCommand.segment_command_data,
- MachO::LC_SEGMENT, SegName);
+ MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
LoadCommands.push_back(std::move(LC));
return LoadCommands.back();
@@ -156,6 +179,18 @@ Optional<StringRef> LoadCommand::getSegmentName() const {
}
}
+Optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
+ const MachO::macho_load_command &MLC = MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ return MLC.segment_command_data.vmaddr;
+ case MachO::LC_SEGMENT_64:
+ return MLC.segment_command_64_data.vmaddr;
+ default:
+ return None;
+ }
+}
+
} // end namespace macho
} // end namespace objcopy
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h
index e825d1867b09..0bb4b344b2eb 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h
@@ -44,6 +44,8 @@ struct Section {
std::string CanonicalName;
uint64_t Addr = 0;
uint64_t Size = 0;
+ // Offset in the input file.
+ Optional<uint32_t> OriginalOffset;
uint32_t Offset = 0;
uint32_t Align = 0;
uint32_t RelOff = 0;
@@ -73,6 +75,10 @@ struct Section {
getType() == MachO::S_GB_ZEROFILL ||
getType() == MachO::S_THREAD_LOCAL_ZEROFILL);
}
+
+ bool hasValidOffset() const {
+ return !(isVirtualSection() || (OriginalOffset && *OriginalOffset == 0));
+ }
};
struct LoadCommand {
@@ -94,6 +100,9 @@ struct LoadCommand {
// Returns the segment name if the load command is a segment command.
Optional<StringRef> getSegmentName() const;
+
+ // Returns the segment vm address if the load command is a segment command.
+ Optional<uint64_t> getSegmentVMAddr() const;
};
// A symbol information. Fields which starts with "n_" are same as them in the
@@ -331,17 +340,17 @@ struct Object {
void updateLoadCommandIndexes();
- void addLoadCommand(LoadCommand LC);
-
/// Creates a new segment load command in the object and returns a reference
/// to the newly created load command. The caller should verify that SegName
/// is not too long (SegName.size() should be less than or equal to 16).
- LoadCommand &addSegment(StringRef SegName);
+ LoadCommand &addSegment(StringRef SegName, uint64_t SegVMSize);
bool is64Bit() const {
return Header.Magic == MachO::MH_MAGIC_64 ||
Header.Magic == MachO::MH_CIGAM_64;
}
+
+ uint64_t nextAvailableSegmentAddress() const;
};
} // end namespace macho
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
index 69b23b6cf975..7fd2acd11e99 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm-objcopy.h"
#include "Buffer.h"
#include "COFF/COFFObjcopy.h"
#include "CopyConfig.h"
@@ -26,6 +25,7 @@
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
@@ -57,42 +57,36 @@ namespace objcopy {
// The name this program was invoked as.
StringRef ToolName;
-LLVM_ATTRIBUTE_NORETURN void error(Twine Message) {
- WithColor::error(errs(), ToolName) << Message << "\n";
- exit(1);
-}
-
-LLVM_ATTRIBUTE_NORETURN void error(Error E) {
- assert(E);
- std::string Buf;
- raw_string_ostream OS(Buf);
- logAllUnhandledErrors(std::move(E), OS);
- OS.flush();
- WithColor::error(errs(), ToolName) << Buf;
- exit(1);
-}
-
-LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, std::error_code EC) {
- assert(EC);
- error(createFileError(File, EC));
-}
-
-LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) {
- assert(E);
- std::string Buf;
- raw_string_ostream OS(Buf);
- logAllUnhandledErrors(std::move(E), OS);
- OS.flush();
- WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf;
- exit(1);
-}
-
ErrorSuccess reportWarning(Error E) {
assert(E);
WithColor::warning(errs(), ToolName) << toString(std::move(E)) << '\n';
return Error::success();
}
+static Expected<DriverConfig> getDriverConfig(ArrayRef<const char *> Args) {
+ StringRef Stem = sys::path::stem(ToolName);
+ auto Is = [=](StringRef Tool) {
+ // We need to recognize the following filenames:
+ //
+ // llvm-objcopy -> objcopy
+ // strip-10.exe -> strip
+ // powerpc64-unknown-freebsd13-objcopy -> objcopy
+ // llvm-install-name-tool -> install-name-tool
+ auto I = Stem.rfind_lower(Tool);
+ return I != StringRef::npos &&
+ (I + Tool.size() == Stem.size() || !isAlnum(Stem[I + Tool.size()]));
+ };
+
+ if (Is("bitcode-strip") || Is("bitcode_strip"))
+ return parseBitcodeStripOptions(Args);
+ else if (Is("strip"))
+ return parseStripOptions(Args, reportWarning);
+ else if (Is("install-name-tool") || Is("install_name_tool"))
+ return parseInstallNameToolOptions(Args);
+ else
+ return parseObjcopyOptions(Args, reportWarning);
+}
+
} // end namespace objcopy
} // end namespace llvm
@@ -175,6 +169,10 @@ static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In,
return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out);
else if (auto *MachOBinary = dyn_cast<object::MachOObjectFile>(&In))
return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out);
+ else if (auto *MachOUniversalBinary =
+ dyn_cast<object::MachOUniversalBinary>(&In))
+ return macho::executeObjcopyOnMachOUniversalBinary(
+ Config, *MachOUniversalBinary, Out);
else if (auto *WasmBinary = dyn_cast<object::WasmObjectFile>(&In))
return objcopy::wasm::executeObjcopyOnBinary(Config, *WasmBinary, Out);
else
@@ -182,7 +180,11 @@ static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In,
"unsupported object file format");
}
-static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) {
+namespace llvm {
+namespace objcopy {
+
+Expected<std::vector<NewArchiveMember>>
+createNewArchiveMembers(CopyConfig &Config, const Archive &Ar) {
std::vector<NewArchiveMember> NewArchiveMembers;
Error Err = Error::success();
for (const Archive::Child &Child : Ar.children(Err)) {
@@ -197,7 +199,7 @@ static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) {
MemBuffer MB(ChildNameOrErr.get());
if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MB))
- return E;
+ return std::move(E);
Expected<NewArchiveMember> Member =
NewArchiveMember::getOldMember(Child, Config.DeterministicArchives);
@@ -209,8 +211,19 @@ static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) {
}
if (Err)
return createFileError(Config.InputFilename, std::move(Err));
+ return std::move(NewArchiveMembers);
+}
- return deepWriteArchive(Config.OutputFilename, NewArchiveMembers,
+} // end namespace objcopy
+} // end namespace llvm
+
+static Error executeObjcopyOnArchive(CopyConfig &Config,
+ const object::Archive &Ar) {
+ Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
+ createNewArchiveMembers(Config, Ar);
+ if (!NewArchiveMembersOrErr)
+ return NewArchiveMembersOrErr.takeError();
+ return deepWriteArchive(Config.OutputFilename, *NewArchiveMembersOrErr,
Ar.hasSymbolTable(), Ar.kind(),
Config.DeterministicArchives, Ar.isThin());
}
@@ -320,32 +333,12 @@ static Error executeObjcopy(CopyConfig &Config) {
namespace {
-enum class ToolType { Objcopy, Strip, InstallNameTool };
-
} // anonymous namespace
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
ToolName = argv[0];
- StringRef Stem = sys::path::stem(ToolName);
- auto Is = [=](StringRef Tool) {
- // We need to recognize the following filenames:
- //
- // llvm-objcopy -> objcopy
- // strip-10.exe -> strip
- // powerpc64-unknown-freebsd13-objcopy -> objcopy
- // llvm-install-name-tool -> install-name-tool
- auto I = Stem.rfind_lower(Tool);
- return I != StringRef::npos &&
- (I + Tool.size() == Stem.size() || !isAlnum(Stem[I + Tool.size()]));
- };
- ToolType Tool = ToolType::Objcopy;
- if (Is("strip"))
- Tool = ToolType::Strip;
- else if (Is("install-name-tool") || Is("install_name_tool"))
- Tool = ToolType::InstallNameTool;
-
// Expand response files.
// TODO: Move these lines, which are copied from lib/Support/CommandLine.cpp,
// into a separate function in the CommandLine library and call that function
@@ -360,11 +353,8 @@ int main(int argc, char **argv) {
NewArgv);
auto Args = makeArrayRef(NewArgv).drop_front();
- Expected<DriverConfig> DriverConfig =
- (Tool == ToolType::Strip) ? parseStripOptions(Args, reportWarning)
- : ((Tool == ToolType::InstallNameTool)
- ? parseInstallNameToolOptions(Args)
- : parseObjcopyOptions(Args, reportWarning));
+ Expected<DriverConfig> DriverConfig = getDriverConfig(Args);
+
if (!DriverConfig) {
logAllUnhandledErrors(DriverConfig.takeError(),
WithColor::error(errs(), ToolName));
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.h
index 18a789ca1f83..97a166769f95 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.h
@@ -9,32 +9,22 @@
#ifndef LLVM_TOOLS_OBJCOPY_OBJCOPY_H
#define LLVM_TOOLS_OBJCOPY_OBJCOPY_H
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
namespace llvm {
-namespace objcopy {
-LLVM_ATTRIBUTE_NORETURN extern void error(Twine Message);
-LLVM_ATTRIBUTE_NORETURN extern void error(Error E);
-LLVM_ATTRIBUTE_NORETURN extern void reportError(StringRef File, Error E);
-LLVM_ATTRIBUTE_NORETURN extern void reportError(StringRef File,
- std::error_code EC);
-
-// This is taken from llvm-readobj.
-// [see here](llvm/tools/llvm-readobj/llvm-readobj.h:38)
-template <class T> T unwrapOrError(Expected<T> EO) {
- if (EO)
- return *EO;
- std::string Buf;
- raw_string_ostream OS(Buf);
- logAllUnhandledErrors(EO.takeError(), OS);
- OS.flush();
- error(Buf);
-}
+struct NewArchiveMember;
+
+namespace object {
+
+class Archive;
+
+} // end namespace object
+
+namespace objcopy {
+struct CopyConfig;
+Expected<std::vector<NewArchiveMember>>
+createNewArchiveMembers(CopyConfig &Config, const object::Archive &Ar);
} // end namespace objcopy
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/Object.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/Object.cpp
index 0c416483663f..e7a2956fedca 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/Object.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/Object.cpp
@@ -26,9 +26,7 @@ void Object::addSectionWithOwnedContents(
void Object::removeSections(function_ref<bool(const Section &)> ToRemove) {
// TODO: remove reloc sections for the removed section, handle symbols, etc.
- Sections.erase(
- std::remove_if(std::begin(Sections), std::end(Sections), ToRemove),
- std::end(Sections));
+ llvm::erase_if(Sections, ToRemove);
}
} // end namespace wasm
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp
index 20781cef2d33..eb0e5635cef9 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp
@@ -12,7 +12,6 @@
#include "Object.h"
#include "Reader.h"
#include "Writer.h"
-#include "llvm-objcopy.h"
#include "llvm/Support/Errc.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp
index 602bc6388252..1c4d59179cc7 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -25,14 +25,14 @@ using namespace llvm::object;
using namespace llvm::objdump;
template <class ELFT>
-static Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
- auto DynamicEntriesOrError = Elf->dynamicEntries();
+static Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> &Elf) {
+ auto DynamicEntriesOrError = Elf.dynamicEntries();
if (!DynamicEntriesOrError)
return DynamicEntriesOrError.takeError();
for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) {
if (Dyn.d_tag == ELF::DT_STRTAB) {
- auto MappedAddrOrError = Elf->toMappedAddr(Dyn.getPtr());
+ auto MappedAddrOrError = Elf.toMappedAddr(Dyn.getPtr());
if (!MappedAddrOrError)
consumeError(MappedAddrOrError.takeError());
return StringRef(reinterpret_cast<const char *>(*MappedAddrOrError));
@@ -40,13 +40,13 @@ static Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
}
// If the dynamic segment is not present, we fall back on the sections.
- auto SectionsOrError = Elf->sections();
+ auto SectionsOrError = Elf.sections();
if (!SectionsOrError)
return SectionsOrError.takeError();
for (const typename ELFT::Shdr &Sec : *SectionsOrError) {
if (Sec.sh_type == ELF::SHT_DYNSYM)
- return Elf->getStringTableForSymtab(Sec);
+ return Elf.getStringTableForSymtab(Sec);
}
return createError("dynamic string table not found");
@@ -56,7 +56,7 @@ template <class ELFT>
static Error getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
const RelocationRef &RelRef,
SmallVectorImpl<char> &Result) {
- const ELFFile<ELFT> &EF = *Obj->getELFFile();
+ const ELFFile<ELFT> &EF = Obj->getELFFile();
DataRefImpl Rel = RelRef.getRawDataRefImpl();
auto SecOrErr = EF.getSection(Rel.d.a);
if (!SecOrErr)
@@ -85,14 +85,19 @@ static Error getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
if (!Undef) {
symbol_iterator SI = RelRef.getSymbol();
- const typename ELFT::Sym *Sym = Obj->getSymbol(SI->getRawDataRefImpl());
- if (Sym->getType() == ELF::STT_SECTION) {
+ Expected<const typename ELFT::Sym *> SymOrErr =
+ Obj->getSymbol(SI->getRawDataRefImpl());
+ // TODO: test this error.
+ if (!SymOrErr)
+ return SymOrErr.takeError();
+
+ if ((*SymOrErr)->getType() == ELF::STT_SECTION) {
Expected<section_iterator> SymSI = SI->getSection();
if (!SymSI)
return SymSI.takeError();
const typename ELFT::Shdr *SymSec =
Obj->getSection((*SymSI)->getRawDataRefImpl());
- auto SecName = EF.getSectionName(SymSec);
+ auto SecName = EF.getSectionName(*SymSec);
if (!SecName)
return SecName.takeError();
Fmt << *SecName;
@@ -133,9 +138,9 @@ Error objdump::getELFRelocationValueString(const ELFObjectFileBase *Obj,
}
template <class ELFT>
-static uint64_t getSectionLMA(const ELFFile<ELFT> *Obj,
+static uint64_t getSectionLMA(const ELFFile<ELFT> &Obj,
const object::ELFSectionRef &Sec) {
- auto PhdrRangeOrErr = Obj->program_headers();
+ auto PhdrRangeOrErr = Obj.program_headers();
if (!PhdrRangeOrErr)
report_fatal_error(toString(PhdrRangeOrErr.takeError()));
@@ -162,14 +167,14 @@ uint64_t objdump::getELFSectionLMA(const object::ELFSectionRef &Sec) {
}
template <class ELFT>
-static void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
+static void printDynamicSection(const ELFFile<ELFT> &Elf, StringRef Filename) {
ArrayRef<typename ELFT::Dyn> DynamicEntries =
- unwrapOrError(Elf->dynamicEntries(), Filename);
+ unwrapOrError(Elf.dynamicEntries(), Filename);
// Find the maximum tag name length to format the value column properly.
size_t MaxLen = 0;
for (const typename ELFT::Dyn &Dyn : DynamicEntries)
- MaxLen = std::max(MaxLen, Elf->getDynamicTagAsString(Dyn.d_tag).size());
+ MaxLen = std::max(MaxLen, Elf.getDynamicTagAsString(Dyn.d_tag).size());
std::string TagFmt = " %-" + std::to_string(MaxLen) + "s ";
outs() << "Dynamic Section:\n";
@@ -177,7 +182,7 @@ static void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
if (Dyn.d_tag == ELF::DT_NULL)
continue;
- std::string Str = Elf->getDynamicTagAsString(Dyn.d_tag);
+ std::string Str = Elf.getDynamicTagAsString(Dyn.d_tag);
outs() << format(TagFmt.c_str(), Str.c_str());
const char *Fmt =
@@ -199,9 +204,9 @@ static void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
}
template <class ELFT>
-static void printProgramHeaders(const ELFFile<ELFT> *Obj, StringRef FileName) {
+static void printProgramHeaders(const ELFFile<ELFT> &Obj, StringRef FileName) {
outs() << "Program Header:\n";
- auto ProgramHeaderOrError = Obj->program_headers();
+ auto ProgramHeaderOrError = Obj.program_headers();
if (!ProgramHeaderOrError) {
reportWarning("unable to read program headers: " +
toString(ProgramHeaderOrError.takeError()),
@@ -328,20 +333,20 @@ static void printSymbolVersionDefinition(const typename ELFT::Shdr &Shdr,
}
template <class ELFT>
-static void printSymbolVersionInfo(const ELFFile<ELFT> *Elf,
+static void printSymbolVersionInfo(const ELFFile<ELFT> &Elf,
StringRef FileName) {
ArrayRef<typename ELFT::Shdr> Sections =
- unwrapOrError(Elf->sections(), FileName);
+ unwrapOrError(Elf.sections(), FileName);
for (const typename ELFT::Shdr &Shdr : Sections) {
if (Shdr.sh_type != ELF::SHT_GNU_verneed &&
Shdr.sh_type != ELF::SHT_GNU_verdef)
continue;
ArrayRef<uint8_t> Contents =
- unwrapOrError(Elf->getSectionContents(&Shdr), FileName);
+ unwrapOrError(Elf.getSectionContents(Shdr), FileName);
const typename ELFT::Shdr *StrTabSec =
- unwrapOrError(Elf->getSection(Shdr.sh_link), FileName);
- StringRef StrTab = unwrapOrError(Elf->getStringTable(StrTabSec), FileName);
+ unwrapOrError(Elf.getSection(Shdr.sh_link), FileName);
+ StringRef StrTab = unwrapOrError(Elf.getStringTable(*StrTabSec), FileName);
if (Shdr.sh_type == ELF::SHT_GNU_verneed)
printSymbolVersionDependency<ELFT>(Contents, StrTab);
diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp
index 6d46496ecd4e..51212d52c18b 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp
@@ -107,70 +107,70 @@ static cl::opt<bool> NoLeadingHeaders("no-leading-headers",
cl::opt<bool> objdump::UniversalHeaders(
"universal-headers",
- cl::desc("Print Mach-O universal headers (requires -macho)"),
+ cl::desc("Print Mach-O universal headers (requires --macho)"),
cl::cat(MachOCat));
static cl::opt<bool> ArchiveMemberOffsets(
"archive-member-offsets",
cl::desc("Print the offset to each archive member for Mach-O archives "
- "(requires -macho and -archive-headers)"),
+ "(requires --macho and --archive-headers)"),
cl::cat(MachOCat));
cl::opt<bool> objdump::IndirectSymbols(
"indirect-symbols",
cl::desc(
- "Print indirect symbol table for Mach-O objects (requires -macho)"),
+ "Print indirect symbol table for Mach-O objects (requires --macho)"),
cl::cat(MachOCat));
cl::opt<bool> objdump::DataInCode(
"data-in-code",
cl::desc(
- "Print the data in code table for Mach-O objects (requires -macho)"),
+ "Print the data in code table for Mach-O objects (requires --macho)"),
cl::cat(MachOCat));
cl::opt<bool>
objdump::LinkOptHints("link-opt-hints",
cl::desc("Print the linker optimization hints for "
- "Mach-O objects (requires -macho)"),
+ "Mach-O objects (requires --macho)"),
cl::cat(MachOCat));
cl::opt<bool>
objdump::InfoPlist("info-plist",
cl::desc("Print the info plist section as strings for "
- "Mach-O objects (requires -macho)"),
+ "Mach-O objects (requires --macho)"),
cl::cat(MachOCat));
cl::opt<bool>
objdump::DylibsUsed("dylibs-used",
cl::desc("Print the shared libraries used for linked "
- "Mach-O files (requires -macho)"),
+ "Mach-O files (requires --macho)"),
cl::cat(MachOCat));
cl::opt<bool> objdump::DylibId("dylib-id",
cl::desc("Print the shared library's id for the "
- "dylib Mach-O file (requires -macho)"),
+ "dylib Mach-O file (requires --macho)"),
cl::cat(MachOCat));
static cl::opt<bool>
NonVerbose("non-verbose",
cl::desc("Print the info for Mach-O objects in non-verbose or "
- "numeric form (requires -macho)"),
+ "numeric form (requires --macho)"),
cl::cat(MachOCat));
cl::opt<bool>
objdump::ObjcMetaData("objc-meta-data",
cl::desc("Print the Objective-C runtime meta data "
- "for Mach-O files (requires -macho)"),
+ "for Mach-O files (requires --macho)"),
cl::cat(MachOCat));
static cl::opt<std::string> DisSymName(
"dis-symname",
- cl::desc("disassemble just this symbol's instructions (requires -macho)"),
+ cl::desc("disassemble just this symbol's instructions (requires --macho)"),
cl::cat(MachOCat));
static cl::opt<bool> NoSymbolicOperands(
"no-symbolic-operands",
- cl::desc("do not symbolic operands when disassembling (requires -macho)"),
+ cl::desc("do not symbolic operands when disassembling (requires --macho)"),
cl::cat(MachOCat));
static cl::list<std::string>
@@ -453,7 +453,8 @@ static void printRelocationTargetName(const MachOObjectFile *O,
bool isExtern = O->getPlainRelocationExternal(RE);
uint64_t Val = O->getPlainRelocationSymbolNum(RE);
- if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) {
+ if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND &&
+ (O->getArch() == Triple::aarch64 || O->getArch() == Triple::aarch64_be)) {
Fmt << format("0x%0" PRIx64, Val);
return;
}
@@ -1888,9 +1889,7 @@ static bool checkMachOAndArchFlags(ObjectFile *O, StringRef Filename) {
&McpuDefault, &ArchFlag);
}
const std::string ArchFlagName(ArchFlag);
- if (none_of(ArchFlags, [&](const std::string &Name) {
- return Name == ArchFlagName;
- })) {
+ if (!llvm::is_contained(ArchFlags, ArchFlagName)) {
WithColor::error(errs(), "llvm-objdump")
<< Filename << ": no architecture specified.\n";
return false;
@@ -2109,6 +2108,10 @@ static void printCPUType(uint32_t cputype, uint32_t cpusubtype) {
outs() << " cputype CPU_TYPE_ARM64\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM64_ALL\n";
break;
+ case MachO::CPU_SUBTYPE_ARM64_V8:
+ outs() << " cputype CPU_TYPE_ARM64\n";
+ outs() << " cpusubtype CPU_SUBTYPE_ARM64_V8\n";
+ break;
case MachO::CPU_SUBTYPE_ARM64E:
outs() << " cputype CPU_TYPE_ARM64\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM64E\n";
@@ -2392,7 +2395,7 @@ void objdump::parseInputMachO(MachOUniversalBinary *UB) {
ArchFound = true;
Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
I->getAsObjectFile();
- std::string ArchitectureName = "";
+ std::string ArchitectureName;
if (ArchFlags.size() > 1)
ArchitectureName = I->getArchFlagName();
if (ObjOrErr) {
@@ -2508,7 +2511,7 @@ void objdump::parseInputMachO(MachOUniversalBinary *UB) {
E = UB->end_objects();
I != E; ++I) {
Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
- std::string ArchitectureName = "";
+ std::string ArchitectureName;
if (moreThanOneArch)
ArchitectureName = I->getArchFlagName();
if (ObjOrErr) {
@@ -7195,10 +7198,32 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
else
MachOMCPU = MCPU;
+#define CHECK_TARGET_INFO_CREATION(NAME) \
+ do { \
+ if (!NAME) { \
+ WithColor::error(errs(), "llvm-objdump") \
+ << "couldn't initialize disassembler for target " << TripleName \
+ << '\n'; \
+ return; \
+ } \
+ } while (false)
+#define CHECK_THUMB_TARGET_INFO_CREATION(NAME) \
+ do { \
+ if (!NAME) { \
+ WithColor::error(errs(), "llvm-objdump") \
+ << "couldn't initialize disassembler for target " << ThumbTripleName \
+ << '\n'; \
+ return; \
+ } \
+ } while (false)
+
std::unique_ptr<const MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
+ CHECK_TARGET_INFO_CREATION(InstrInfo);
std::unique_ptr<const MCInstrInfo> ThumbInstrInfo;
- if (ThumbTarget)
+ if (ThumbTarget) {
ThumbInstrInfo.reset(ThumbTarget->createMCInstrInfo());
+ CHECK_THUMB_TARGET_INFO_CREATION(ThumbInstrInfo);
+ }
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
@@ -7213,13 +7238,17 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
// Set up disassembler.
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
+ CHECK_TARGET_INFO_CREATION(MRI);
std::unique_ptr<const MCAsmInfo> AsmInfo(
TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
+ CHECK_TARGET_INFO_CREATION(AsmInfo);
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MachOMCPU, FeaturesStr));
+ CHECK_TARGET_INFO_CREATION(STI);
MCContext Ctx(AsmInfo.get(), MRI.get(), nullptr);
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, Ctx));
+ CHECK_TARGET_INFO_CREATION(DisAsm);
std::unique_ptr<MCSymbolizer> Symbolizer;
struct DisassembleInfo SymbolizerInfo(nullptr, nullptr, nullptr, false);
std::unique_ptr<MCRelocationInfo> RelInfo(
@@ -7233,6 +7262,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
Triple(TripleName), AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI));
+ CHECK_TARGET_INFO_CREATION(IP);
// Set the display preference for hex vs. decimal immediates.
IP->setPrintImmHex(PrintImmHex);
// Comment stream and backing vector.
@@ -7245,12 +7275,6 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
// comment causing different diffs with the 'C' disassembler library API.
// IP->setCommentStream(CommentStream);
- if (!AsmInfo || !STI || !DisAsm || !IP) {
- WithColor::error(errs(), "llvm-objdump")
- << "couldn't initialize disassembler for target " << TripleName << '\n';
- return;
- }
-
// Set up separate thumb disassembler if needed.
std::unique_ptr<const MCRegisterInfo> ThumbMRI;
std::unique_ptr<const MCAsmInfo> ThumbAsmInfo;
@@ -7263,13 +7287,17 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
std::unique_ptr<MCRelocationInfo> ThumbRelInfo;
if (ThumbTarget) {
ThumbMRI.reset(ThumbTarget->createMCRegInfo(ThumbTripleName));
+ CHECK_THUMB_TARGET_INFO_CREATION(ThumbMRI);
ThumbAsmInfo.reset(
ThumbTarget->createMCAsmInfo(*ThumbMRI, ThumbTripleName, MCOptions));
+ CHECK_THUMB_TARGET_INFO_CREATION(ThumbAsmInfo);
ThumbSTI.reset(
ThumbTarget->createMCSubtargetInfo(ThumbTripleName, MachOMCPU,
FeaturesStr));
+ CHECK_THUMB_TARGET_INFO_CREATION(ThumbSTI);
ThumbCtx.reset(new MCContext(ThumbAsmInfo.get(), ThumbMRI.get(), nullptr));
ThumbDisAsm.reset(ThumbTarget->createMCDisassembler(*ThumbSTI, *ThumbCtx));
+ CHECK_THUMB_TARGET_INFO_CREATION(ThumbDisAsm);
MCContext *PtrThumbCtx = ThumbCtx.get();
ThumbRelInfo.reset(
ThumbTarget->createMCRelocationInfo(ThumbTripleName, *PtrThumbCtx));
@@ -7283,16 +7311,13 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
ThumbIP.reset(ThumbTarget->createMCInstPrinter(
Triple(ThumbTripleName), ThumbAsmPrinterVariant, *ThumbAsmInfo,
*ThumbInstrInfo, *ThumbMRI));
+ CHECK_THUMB_TARGET_INFO_CREATION(ThumbIP);
// Set the display preference for hex vs. decimal immediates.
ThumbIP->setPrintImmHex(PrintImmHex);
}
- if (ThumbTarget && (!ThumbAsmInfo || !ThumbSTI || !ThumbDisAsm || !ThumbIP)) {
- WithColor::error(errs(), "llvm-objdump")
- << "couldn't initialize disassembler for target " << ThumbTripleName
- << '\n';
- return;
- }
+#undef CHECK_TARGET_INFO_CREATION
+#undef CHECK_THUMB_TARGET_INFO_CREATION
MachO::mach_header Header = MachOOF->getHeader();
@@ -8315,6 +8340,9 @@ static void PrintMachHeader(uint32_t magic, uint32_t cputype,
case MachO::CPU_SUBTYPE_ARM64_ALL:
outs() << " ALL";
break;
+ case MachO::CPU_SUBTYPE_ARM64_V8:
+ outs() << " V8";
+ break;
case MachO::CPU_SUBTYPE_ARM64E:
outs() << " E";
break;
diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 320bbb5d358b..17128e95727f 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
@@ -101,7 +102,7 @@ static cl::alias AllHeadersShort("x", cl::desc("Alias for --all-headers"),
static cl::opt<std::string>
ArchName("arch-name",
cl::desc("Target arch to disassemble for, "
- "see -version for available targets"),
+ "see --version for available targets"),
cl::cat(ObjdumpCat));
cl::opt<bool>
@@ -226,13 +227,13 @@ static cl::alias MachOm("m", cl::desc("Alias for --macho"), cl::NotHidden,
cl::Grouping, cl::aliasopt(MachOOpt));
cl::opt<std::string> objdump::MCPU(
- "mcpu", cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ "mcpu", cl::desc("Target a specific cpu type (--mcpu=help for details)"),
cl::value_desc("cpu-name"), cl::init(""), cl::cat(ObjdumpCat));
-cl::list<std::string> objdump::MAttrs("mattr", cl::CommaSeparated,
- cl::desc("Target specific attributes"),
- cl::value_desc("a1,+a2,-a3,..."),
- cl::cat(ObjdumpCat));
+cl::list<std::string> objdump::MAttrs(
+ "mattr", cl::CommaSeparated,
+ cl::desc("Target specific attributes (--mattr=help for details)"),
+ cl::value_desc("a1,+a2,-a3,..."), cl::cat(ObjdumpCat));
cl::opt<bool> objdump::NoShowRawInsn(
"no-show-raw-insn",
@@ -274,7 +275,7 @@ static cl::alias PrivateHeadersShort("p",
cl::list<std::string>
objdump::FilterSections("section",
cl::desc("Operate on the specified sections only. "
- "With -macho dump segment,section"),
+ "With --macho dump segment,section"),
cl::cat(ObjdumpCat));
static cl::alias FilterSectionsj("j", cl::desc("Alias for --section"),
cl::NotHidden, cl::Grouping, cl::Prefix,
@@ -303,7 +304,7 @@ static cl::opt<bool> PrintSource(
cl::desc(
"Display source inlined with disassembly. Implies disassemble object"),
cl::cat(ObjdumpCat));
-static cl::alias PrintSourceShort("S", cl::desc("Alias for -source"),
+static cl::alias PrintSourceShort("S", cl::desc("Alias for --source"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(PrintSource));
@@ -321,6 +322,11 @@ static cl::alias SymbolTableShort("t", cl::desc("Alias for --syms"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(SymbolTable));
+static cl::opt<bool> SymbolizeOperands(
+ "symbolize-operands",
+ cl::desc("Symbolize instruction operands when disassembling"),
+ cl::cat(ObjdumpCat));
+
static cl::opt<bool> DynamicSymbolTable(
"dynamic-syms",
cl::desc("Display the contents of the dynamic symbol table"),
@@ -330,11 +336,11 @@ static cl::alias DynamicSymbolTableShort("T",
cl::NotHidden, cl::Grouping,
cl::aliasopt(DynamicSymbolTable));
-cl::opt<std::string> objdump::TripleName(
- "triple",
- cl::desc(
- "Target triple to disassemble for, see -version for available targets"),
- cl::cat(ObjdumpCat));
+cl::opt<std::string>
+ objdump::TripleName("triple",
+ cl::desc("Target triple to disassemble for, see "
+ "--version for available targets"),
+ cl::cat(ObjdumpCat));
cl::opt<bool> objdump::UnwindInfo("unwind-info",
cl::desc("Display unwind information"),
@@ -348,6 +354,10 @@ static cl::opt<bool>
cl::cat(ObjdumpCat));
static cl::alias WideShort("w", cl::Grouping, cl::aliasopt(Wide));
+cl::opt<std::string> objdump::Prefix("prefix",
+ cl::desc("Add prefix to absolute paths"),
+ cl::cat(ObjdumpCat));
+
enum DebugVarsFormat {
DVDisabled,
DVUnicode,
@@ -439,7 +449,7 @@ std::string objdump::getFileNameForError(const object::Archive::Child &C,
return "<file index: " + std::to_string(Index) + ">";
}
-void objdump::reportWarning(Twine Message, StringRef File) {
+void objdump::reportWarning(const Twine &Message, StringRef File) {
// Output order between errs() and outs() matters especially for archive
// files where the output is per member object.
outs().flush();
@@ -448,7 +458,7 @@ void objdump::reportWarning(Twine Message, StringRef File) {
}
LLVM_ATTRIBUTE_NORETURN void objdump::reportError(StringRef File,
- Twine Message) {
+ const Twine &Message) {
outs().flush();
WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n";
exit(1);
@@ -471,11 +481,11 @@ LLVM_ATTRIBUTE_NORETURN void objdump::reportError(Error E, StringRef FileName,
exit(1);
}
-static void reportCmdLineWarning(Twine Message) {
+static void reportCmdLineWarning(const Twine &Message) {
WithColor::warning(errs(), ToolName) << Message << "\n";
}
-LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(Twine Message) {
+LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(const Twine &Message) {
WithColor::error(errs(), ToolName) << Message << "\n";
exit(1);
}
@@ -797,19 +807,19 @@ public:
bool IsASCII = DbgVariables == DVASCII;
switch (C) {
case LineChar::RangeStart:
- return IsASCII ? "^" : u8"\u2548";
+ return IsASCII ? "^" : (const char *)u8"\u2548";
case LineChar::RangeMid:
- return IsASCII ? "|" : u8"\u2503";
+ return IsASCII ? "|" : (const char *)u8"\u2503";
case LineChar::RangeEnd:
- return IsASCII ? "v" : u8"\u253b";
+ return IsASCII ? "v" : (const char *)u8"\u253b";
case LineChar::LabelVert:
- return IsASCII ? "|" : u8"\u2502";
+ return IsASCII ? "|" : (const char *)u8"\u2502";
case LineChar::LabelCornerNew:
- return IsASCII ? "/" : u8"\u250c";
+ return IsASCII ? "/" : (const char *)u8"\u250c";
case LineChar::LabelCornerActive:
- return IsASCII ? "|" : u8"\u2520";
+ return IsASCII ? "|" : (const char *)u8"\u2520";
case LineChar::LabelHoriz:
- return IsASCII ? "-" : u8"\u2500";
+ return IsASCII ? "-" : (const char *)u8"\u2500";
}
llvm_unreachable("Unhandled LineChar enum");
}
@@ -937,8 +947,8 @@ protected:
std::unordered_map<std::string, std::vector<StringRef>> LineCache;
// Keep track of missing sources.
StringSet<> MissingSources;
- // Only emit 'no debug info' warning once.
- bool WarnedNoDebugInfo;
+ // Only emit 'invalid debug info' warning once.
+ bool WarnedInvalidDebugInfo = false;
private:
bool cacheSource(const DILineInfo& LineInfoFile);
@@ -952,8 +962,7 @@ private:
public:
SourcePrinter() = default;
- SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch)
- : Obj(Obj), WarnedNoDebugInfo(false) {
+ SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) {
symbolize::LLVMSymbolizer::Options SymbolizerOpts;
SymbolizerOpts.PrintFunctions =
DILineInfoSpecifier::FunctionNameKind::LinkageName;
@@ -1008,22 +1017,24 @@ void SourcePrinter::printSourceLine(formatted_raw_ostream &OS,
return;
DILineInfo LineInfo = DILineInfo();
- auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address);
+ Expected<DILineInfo> ExpectedLineInfo =
+ Symbolizer->symbolizeCode(*Obj, Address);
std::string ErrorMessage;
- if (!ExpectedLineInfo)
- ErrorMessage = toString(ExpectedLineInfo.takeError());
- else
+ if (ExpectedLineInfo) {
LineInfo = *ExpectedLineInfo;
+ } else if (!WarnedInvalidDebugInfo) {
+ WarnedInvalidDebugInfo = true;
+ // TODO Untested.
+ reportWarning("failed to parse debug information: " +
+ toString(ExpectedLineInfo.takeError()),
+ ObjectFilename);
+ }
- if (LineInfo.FileName == DILineInfo::BadString) {
- if (!WarnedNoDebugInfo) {
- std::string Warning =
- "failed to parse debug information for " + ObjectFilename.str();
- if (!ErrorMessage.empty())
- Warning += ": " + ErrorMessage;
- reportWarning(Warning, ObjectFilename);
- WarnedNoDebugInfo = true;
- }
+ if (!Prefix.empty() && sys::path::is_absolute_gnu(LineInfo.FileName)) {
+ SmallString<128> FilePath;
+ sys::path::append(FilePath, Prefix, LineInfo.FileName);
+
+ LineInfo.FileName = std::string(FilePath);
}
if (PrintLines)
@@ -1324,13 +1335,21 @@ PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
static uint8_t getElfSymbolType(const ObjectFile *Obj, const SymbolRef &Sym) {
assert(Obj->isELF());
if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj))
- return Elf32LEObj->getSymbol(Sym.getRawDataRefImpl())->getType();
+ return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl()),
+ Obj->getFileName())
+ ->getType();
if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj))
- return Elf64LEObj->getSymbol(Sym.getRawDataRefImpl())->getType();
+ return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl()),
+ Obj->getFileName())
+ ->getType();
if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj))
- return Elf32BEObj->getSymbol(Sym.getRawDataRefImpl())->getType();
+ return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl()),
+ Obj->getFileName())
+ ->getType();
if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj))
- return Elf64BEObj->getSymbol(Sym.getRawDataRefImpl())->getType();
+ return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl()),
+ Obj->getFileName())
+ ->getType();
llvm_unreachable("Unsupported binary format");
}
@@ -1346,7 +1365,9 @@ addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
// ELFSymbolRef::getAddress() returns size instead of value for common
// symbols which is not desirable for disassembly output. Overriding.
if (SymbolType == ELF::STT_COMMON)
- Address = Obj->getSymbol(Symbol.getRawDataRefImpl())->st_value;
+ Address = unwrapOrError(Obj->getSymbol(Symbol.getRawDataRefImpl()),
+ Obj->getFileName())
+ ->st_value;
StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName());
if (Name.empty())
@@ -1394,13 +1415,23 @@ static void addPltEntries(const ObjectFile *Obj,
return;
if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(Obj)) {
for (auto PltEntry : ElfObj->getPltAddresses()) {
- SymbolRef Symbol(PltEntry.first, ElfObj);
- uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
-
- StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName());
- if (!Name.empty())
- AllSymbols[*Plt].emplace_back(
- PltEntry.second, Saver.save((Name + "@plt").str()), SymbolType);
+ if (PltEntry.first) {
+ SymbolRef Symbol(*PltEntry.first, ElfObj);
+ uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
+ if (Expected<StringRef> NameOrErr = Symbol.getName()) {
+ if (!NameOrErr->empty())
+ AllSymbols[*Plt].emplace_back(
+ PltEntry.second, Saver.save((*NameOrErr + "@plt").str()),
+ SymbolType);
+ continue;
+ } else {
+ // The warning has been reported in disassembleObject().
+ consumeError(NameOrErr.takeError());
+ }
+ }
+ reportWarning("PLT entry at 0x" + Twine::utohexstr(PltEntry.second) +
+ " references an invalid symbol",
+ Obj->getFileName());
}
}
}
@@ -1568,6 +1599,52 @@ static SymbolInfoTy createDummySymbolInfo(const ObjectFile *Obj,
return SymbolInfoTy(Addr, Name, Type);
}
+static void
+collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
+ MCDisassembler *DisAsm, MCInstPrinter *IP,
+ const MCSubtargetInfo *STI, uint64_t SectionAddr,
+ uint64_t Start, uint64_t End,
+ std::unordered_map<uint64_t, std::string> &Labels) {
+ // So far only supports X86.
+ if (!STI->getTargetTriple().isX86())
+ return;
+
+ Labels.clear();
+ unsigned LabelCount = 0;
+ Start += SectionAddr;
+ End += SectionAddr;
+ uint64_t Index = Start;
+ while (Index < End) {
+ // Disassemble a real instruction and record function-local branch labels.
+ MCInst Inst;
+ uint64_t Size;
+ bool Disassembled = DisAsm->getInstruction(
+ Inst, Size, Bytes.slice(Index - SectionAddr), Index, nulls());
+ if (Size == 0)
+ Size = 1;
+
+ if (Disassembled && MIA) {
+ uint64_t Target;
+ bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
+ if (TargetKnown && (Target >= Start && Target < End) &&
+ !Labels.count(Target))
+ Labels[Target] = ("L" + Twine(LabelCount++)).str();
+ }
+
+ Index += Size;
+ }
+}
+
+static StringRef getSegmentName(const MachOObjectFile *MachO,
+ const SectionRef &Section) {
+ if (MachO) {
+ DataRefImpl DR = Section.getRawDataRefImpl();
+ StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
+ return SegmentName;
+ }
+ return "";
+}
+
static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
MCContext &Ctx, MCDisassembler *PrimaryDisAsm,
MCDisassembler *SecondaryDisAsm,
@@ -1594,8 +1671,12 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
const StringRef FileName = Obj->getFileName();
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);
for (const SymbolRef &Symbol : Obj->symbols()) {
- StringRef Name = unwrapOrError(Symbol.getName(), FileName);
- if (Name.empty() && !(Obj->isXCOFF() && SymbolDescription))
+ Expected<StringRef> NameOrErr = Symbol.getName();
+ if (!NameOrErr) {
+ reportWarning(toString(NameOrErr.takeError()), FileName);
+ continue;
+ }
+ if (NameOrErr->empty() && !(Obj->isXCOFF() && SymbolDescription))
continue;
if (Obj->isELF() && getElfSymbolType(Obj, Symbol) == ELF::STT_SECTION)
@@ -1672,8 +1753,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
// the output.
StringSet<> FoundDisasmSymbolSet;
for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
- stable_sort(SecSyms.second);
- stable_sort(AbsoluteSymbols);
+ llvm::stable_sort(SecSyms.second);
+ llvm::stable_sort(AbsoluteSymbols);
std::unique_ptr<DWARFContext> DICtx;
LiveVariablePrinter LVP(*Ctx.getRegisterInfo(), *STI);
@@ -1728,12 +1809,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
}
- StringRef SegmentName = "";
- if (MachO) {
- DataRefImpl DR = Section.getRawDataRefImpl();
- SegmentName = MachO->getSectionFinalSegmentName(DR);
- }
-
+ StringRef SegmentName = getSegmentName(MachO, Section);
StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName());
// If the section has no symbol at the start, just insert a dummy one.
if (Symbols.empty() || Symbols[0].Addr != 0) {
@@ -1794,23 +1870,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
outs() << SectionName << ":\n";
}
- if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
- if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) {
- // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
- Start += 256;
- }
- if (SI == SE - 1 ||
- Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) {
- // cut trailing zeroes at the end of kernel
- // cut up to 256 bytes
- const uint64_t EndAlign = 256;
- const auto Limit = End - (std::min)(EndAlign, End - Start);
- while (End > Limit &&
- *reinterpret_cast<const support::ulittle32_t*>(&Bytes[End - 4]) == 0)
- End -= 4;
- }
- }
-
outs() << '\n';
if (!NoLeadingAddr)
outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
@@ -1880,6 +1939,12 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
!DisassembleAll;
bool DumpARMELFData = false;
formatted_raw_ostream FOS(outs());
+
+ std::unordered_map<uint64_t, std::string> AllLabels;
+ if (SymbolizeOperands)
+ collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI,
+ SectionAddr, Index, End, AllLabels);
+
while (Index < End) {
// ARM and AArch64 ELF binaries can interleave data and text in the
// same section. We rely on the markers introduced to understand what
@@ -1920,6 +1985,11 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
}
+ // Print local label if there's any.
+ auto Iter = AllLabels.find(SectionAddr + Index);
+ if (Iter != AllLabels.end())
+ FOS << "<" << Iter->second << ">:\n";
+
// Disassemble a real instruction or a data when disassemble all is
// provided
MCInst Inst;
@@ -1953,7 +2023,9 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
Inst, SectionAddr + Index, Size)) {
Target = *MaybeTarget;
PrintTarget = true;
- FOS << " # " << Twine::utohexstr(Target);
+ // Do not print real address when symbolizing.
+ if (!SymbolizeOperands)
+ FOS << " # " << Twine::utohexstr(Target);
}
if (PrintTarget) {
// In a relocatable object, the target's section must reside in
@@ -2003,17 +2075,30 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
}
+ // Print the labels corresponding to the target if there's any.
+ bool LabelAvailable = AllLabels.count(Target);
if (TargetSym != nullptr) {
uint64_t TargetAddress = TargetSym->Addr;
+ uint64_t Disp = Target - TargetAddress;
std::string TargetName = TargetSym->Name.str();
if (Demangle)
TargetName = demangle(TargetName);
- FOS << " <" << TargetName;
- uint64_t Disp = Target - TargetAddress;
- if (Disp)
- FOS << "+0x" << Twine::utohexstr(Disp);
- FOS << '>';
+ FOS << " <";
+ if (!Disp) {
+ // Always Print the binary symbol precisely corresponding to
+ // the target address.
+ FOS << TargetName;
+ } else if (!LabelAvailable) {
+ // Always Print the binary symbol plus an offset if there's no
+ // local label corresponding to the target address.
+ FOS << TargetName << "+0x" << Twine::utohexstr(Disp);
+ } else {
+ FOS << AllLabels[Target];
+ }
+ FOS << ">";
+ } else if (LabelAvailable) {
+ FOS << " <" << AllLabels[Target] << ">";
}
}
}
@@ -2089,6 +2174,10 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (!AsmInfo)
reportError(Obj->getFileName(),
"no assembly info for target " + TripleName);
+
+ if (MCPU.empty())
+ MCPU = Obj->tryGetCPUName().getValueOr("").str();
+
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
if (!STI)
@@ -2135,6 +2224,8 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
"no instruction printer for target " + TripleName);
IP->setPrintImmHex(PrintImmHex);
IP->setPrintBranchImmAsAddress(true);
+ IP->setSymbolizeOperands(SymbolizeOperands);
+ IP->setMCInstrAnalysis(MIA.get());
PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
SourcePrinter SP(Obj, TheTarget->getName());
@@ -2301,6 +2392,8 @@ void objdump::printSectionHeaders(const ObjectFile *Obj) {
}
void objdump::printSectionContents(const ObjectFile *Obj) {
+ const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);
+
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
uint64_t BaseAddr = Section.getAddress();
@@ -2308,7 +2401,11 @@ void objdump::printSectionContents(const ObjectFile *Obj) {
if (!Size)
continue;
- outs() << "Contents of section " << Name << ":\n";
+ outs() << "Contents of section ";
+ StringRef SegmentName = getSegmentName(MachO, Section);
+ if (!SegmentName.empty())
+ outs() << SegmentName << ",";
+ outs() << Name << ":\n";
if (Section.isBSS()) {
outs() << format("<skipping contents of bss section at [%04" PRIx64
", %04" PRIx64 ")>\n",
@@ -2466,11 +2563,9 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
} else if (Section == O->section_end()) {
outs() << "*UND*";
} else {
- if (MachO) {
- DataRefImpl DR = Section->getRawDataRefImpl();
- StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
+ StringRef SegmentName = getSegmentName(MachO, *Section);
+ if (!SegmentName.empty())
outs() << SegmentName << ",";
- }
StringRef SectionName = unwrapOrError(Section->getName(), FileName);
outs() << SectionName;
}
@@ -2882,6 +2977,10 @@ int main(int argc, char **argv) {
if (InputFilenames.empty())
InputFilenames.push_back("a.out");
+ // Removes trailing separators from prefix.
+ while (!Prefix.empty() && sys::path::is_separator(Prefix.back()))
+ Prefix.pop_back();
+
if (AllHeaders)
ArchiveHeaders = FileHeaders = PrivateHeaders = Relocations =
SectionHeaders = SymbolTable = true;
diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.h b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.h
index 390fc62d09f8..99bf191a301e 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -18,6 +18,7 @@
namespace llvm {
class StringRef;
+class Twine;
namespace object {
class ELFObjectFileBase;
@@ -39,6 +40,7 @@ extern cl::list<std::string> MAttrs;
extern cl::opt<std::string> MCPU;
extern cl::opt<bool> NoShowRawInsn;
extern cl::opt<bool> NoLeadingAddr;
+extern cl::opt<std::string> Prefix;
extern cl::opt<bool> PrintImmHex;
extern cl::opt<bool> PrivateHeaders;
extern cl::opt<bool> Relocations;
@@ -126,11 +128,11 @@ void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName,
void printSymbol(const object::ObjectFile *O, const object::SymbolRef &Symbol,
StringRef FileName, StringRef ArchiveName,
StringRef ArchitectureName, bool DumpDynamic);
-LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Twine Message);
+LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, const Twine &Message);
LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName,
StringRef ArchiveName = "",
StringRef ArchitectureName = "");
-void reportWarning(Twine Message, StringRef File);
+void reportWarning(const Twine &Message, StringRef File);
template <typename T, typename... Ts>
T unwrapOrError(Expected<T> EO, Ts &&... Args) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
index aa185e8a2f22..babdb56a718c 100644
--- a/contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -738,21 +738,17 @@ namespace {
constexpr uint32_t kNoneUdtKind = 0;
constexpr uint32_t kSimpleUdtKind = 1;
constexpr uint32_t kUnknownUdtKind = 2;
-const StringRef NoneLabel("<none type>");
-const StringRef SimpleLabel("<simple type>");
-const StringRef UnknownLabel("<unknown type>");
-
} // namespace
-static StringRef getUdtStatLabel(uint32_t Kind) {
+static std::string getUdtStatLabel(uint32_t Kind) {
if (Kind == kNoneUdtKind)
- return NoneLabel;
+ return "<none type>";
if (Kind == kSimpleUdtKind)
- return SimpleLabel;
+ return "<simple type>";
if (Kind == kUnknownUdtKind)
- return UnknownLabel;
+ return "<unknown type>";
return formatTypeLeafKind(static_cast<TypeLeafKind>(Kind));
}
@@ -760,7 +756,7 @@ static StringRef getUdtStatLabel(uint32_t Kind) {
static uint32_t getLongestTypeLeafName(const StatCollection &Stats) {
size_t L = 0;
for (const auto &Stat : Stats.Individual) {
- StringRef Label = getUdtStatLabel(Stat.first);
+ std::string Label = getUdtStatLabel(Stat.first);
L = std::max(L, Label.size());
}
return static_cast<uint32_t>(L);
@@ -869,7 +865,7 @@ Error DumpOutputStyle::dumpUdtStats() {
P.formatLine("{0}", fmt_repeat('-', TableWidth));
for (const auto &Stat : UdtTargetStats.getStatsSortedBySize()) {
- StringRef Label = getUdtStatLabel(Stat.first);
+ std::string Label = getUdtStatLabel(Stat.first);
P.formatLine("{0} | {1:N} {2:N}",
fmt_align(Label, AlignStyle::Right, FieldWidth),
fmt_align(Stat.second.Count, AlignStyle::Right, CD),
@@ -1911,7 +1907,6 @@ void DumpOutputStyle::dumpSectionHeaders(StringRef Label, DbgHeaderType Type) {
P.getIndentLevel(), Header.Characteristics, 1, ""));
++I;
}
- return;
}
Error DumpOutputStyle::dumpSectionContribs() {
diff --git a/contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.cpp b/contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.cpp
index c9ef19609496..b4837398f1d0 100644
--- a/contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.cpp
@@ -156,16 +156,17 @@ std::string llvm::pdb::formatSymbolKind(SymbolKind K) {
return formatUnknownEnum(K);
}
-StringRef llvm::pdb::formatTypeLeafKind(TypeLeafKind K) {
+std::string llvm::pdb::formatTypeLeafKind(TypeLeafKind K) {
switch (K) {
#define TYPE_RECORD(EnumName, value, name) \
case EnumName: \
return #EnumName;
#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
default:
- llvm_unreachable("Unknown type leaf kind!");
+ return formatv("UNKNOWN RECORD ({0:X})",
+ static_cast<std::underlying_type_t<TypeLeafKind>>(K))
+ .str();
}
- return "";
}
std::string llvm::pdb::formatSegmentOffset(uint16_t Segment, uint32_t Offset) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.h b/contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.h
index 1a006844e011..b99ccec215b5 100644
--- a/contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.h
+++ b/contrib/llvm-project/llvm/tools/llvm-pdbutil/FormatUtil.h
@@ -66,7 +66,7 @@ std::string typesetStringList(uint32_t IndentLevel,
std::string formatChunkKind(codeview::DebugSubsectionKind Kind,
bool Friendly = true);
std::string formatSymbolKind(codeview::SymbolKind K);
-StringRef formatTypeLeafKind(codeview::TypeLeafKind K);
+std::string formatTypeLeafKind(codeview::TypeLeafKind K);
/// Returns the number of digits in the given integer.
inline int NumDigits(uint64_t N) {
@@ -123,7 +123,7 @@ struct EndianAdapter final
explicit EndianAdapter(EndianType &&Item)
: FormatAdapter<EndianType>(std::move(Item)) {}
- void format(llvm::raw_ostream &Stream, StringRef Style) {
+ void format(llvm::raw_ostream &Stream, StringRef Style) override {
format_provider<T>::format(static_cast<T>(this->Item), Stream, Style);
}
};
diff --git a/contrib/llvm-project/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index 7a06140855f8..787785c34b78 100644
--- a/contrib/llvm-project/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -288,7 +288,18 @@ static std::string formatCookieKind(FrameCookieKind Kind) {
}
static std::string formatRegisterId(RegisterId Id, CPUType Cpu) {
- if (Cpu == CPUType::ARM64) {
+ if (Cpu == CPUType::ARMNT) {
+ switch (Id) {
+#define CV_REGISTERS_ARM
+#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM
+
+ default:
+ break;
+ }
+ } else if (Cpu == CPUType::ARM64) {
switch (Id) {
#define CV_REGISTERS_ARM64
#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
diff --git a/contrib/llvm-project/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/contrib/llvm-project/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
index 00092e71c6b4..19f4880ab5eb 100644
--- a/contrib/llvm-project/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -892,9 +892,9 @@ static void dumpBytes(StringRef Path) {
bool opts::pretty::shouldDumpSymLevel(SymLevel Search) {
if (SymTypes.empty())
return true;
- if (llvm::find(SymTypes, Search) != SymTypes.end())
+ if (llvm::is_contained(SymTypes, Search))
return true;
- if (llvm::find(SymTypes, SymLevel::All) != SymTypes.end())
+ if (llvm::is_contained(SymTypes, SymLevel::All))
return true;
return false;
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp b/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 843f072a61c3..7e53c30c7579 100644
--- a/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -251,6 +251,7 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
Filename);
return;
}
+ WC->Writer.setInstrEntryBBEnabled(Reader->instrEntryBBEnabled());
for (auto &I : *Reader) {
if (Remapper)
@@ -295,7 +296,9 @@ static void writeInstrProfile(StringRef OutputFilename,
ProfileFormat OutputFormat,
InstrProfWriter &Writer) {
std::error_code EC;
- raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::OF_None);
+ raw_fd_ostream Output(OutputFilename.data(), EC,
+ OutputFormat == PF_Text ? sys::fs::OF_Text
+ : sys::fs::OF_None);
if (EC)
exitWithErrorCode(EC, OutputFilename);
@@ -385,6 +388,172 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
}
+/// The profile entry for a function in instrumentation profile.
+struct InstrProfileEntry {
+ uint64_t MaxCount = 0;
+ float ZeroCounterRatio = 0.0;
+ InstrProfRecord *ProfRecord;
+ InstrProfileEntry(InstrProfRecord *Record);
+ InstrProfileEntry() = default;
+};
+
+InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
+ ProfRecord = Record;
+ uint64_t CntNum = Record->Counts.size();
+ uint64_t ZeroCntNum = 0;
+ for (size_t I = 0; I < CntNum; ++I) {
+ MaxCount = std::max(MaxCount, Record->Counts[I]);
+ ZeroCntNum += !Record->Counts[I];
+ }
+ ZeroCounterRatio = (float)ZeroCntNum / CntNum;
+}
+
+/// Either set all the counters in the instr profile entry \p IFE to -1
+/// in order to drop the profile or scale up the counters in \p IFP to
+/// be above hot threshold. We use the ratio of zero counters in the
+/// profile of a function to decide the profile is helpful or harmful
+/// for performance, and to choose whether to scale up or drop it.
+static void updateInstrProfileEntry(InstrProfileEntry &IFE,
+ uint64_t HotInstrThreshold,
+ float ZeroCounterThreshold) {
+ InstrProfRecord *ProfRecord = IFE.ProfRecord;
+ if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
+ // If all or most of the counters of the function are zero, the
+ // profile is unaccountable and shuld be dropped. Reset all the
+ // counters to be -1 and PGO profile-use will drop the profile.
+ // All counters being -1 also implies that the function is hot so
+ // PGO profile-use will also set the entry count metadata to be
+ // above hot threshold.
+ for (size_t I = 0; I < ProfRecord->Counts.size(); ++I)
+ ProfRecord->Counts[I] = -1;
+ return;
+ }
+
+ // Scale up the MaxCount to be multiple times above hot threshold.
+ const unsigned MultiplyFactor = 3;
+ uint64_t Numerator = HotInstrThreshold * MultiplyFactor;
+ uint64_t Denominator = IFE.MaxCount;
+ ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
+ warn(toString(make_error<InstrProfError>(E)));
+ });
+}
+
+const uint64_t ColdPercentileIdx = 15;
+const uint64_t HotPercentileIdx = 11;
+
+/// Adjust the instr profile in \p WC based on the sample profile in
+/// \p Reader.
+static void
+adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
+ std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
+ unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
+ unsigned InstrProfColdThreshold) {
+ // Function to its entry in instr profile.
+ StringMap<InstrProfileEntry> InstrProfileMap;
+ InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
+ for (auto &PD : WC->Writer.getProfileData()) {
+ // Populate IPBuilder.
+ for (const auto &PDV : PD.getValue()) {
+ InstrProfRecord Record = PDV.second;
+ IPBuilder.addRecord(Record);
+ }
+
+ // If a function has multiple entries in instr profile, skip it.
+ if (PD.getValue().size() != 1)
+ continue;
+
+ // Initialize InstrProfileMap.
+ InstrProfRecord *R = &PD.getValue().begin()->second;
+ InstrProfileMap[PD.getKey()] = InstrProfileEntry(R);
+ }
+
+ ProfileSummary InstrPS = *IPBuilder.getSummary();
+ ProfileSummary SamplePS = Reader->getSummary();
+
+ // Compute cold thresholds for instr profile and sample profile.
+ uint64_t ColdSampleThreshold =
+ ProfileSummaryBuilder::getEntryForPercentile(
+ SamplePS.getDetailedSummary(),
+ ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
+ .MinCount;
+ uint64_t HotInstrThreshold =
+ ProfileSummaryBuilder::getEntryForPercentile(
+ InstrPS.getDetailedSummary(),
+ ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
+ .MinCount;
+ uint64_t ColdInstrThreshold =
+ InstrProfColdThreshold
+ ? InstrProfColdThreshold
+ : ProfileSummaryBuilder::getEntryForPercentile(
+ InstrPS.getDetailedSummary(),
+ ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
+ .MinCount;
+
+ // Find hot/warm functions in sample profile which is cold in instr profile
+ // and adjust the profiles of those functions in the instr profile.
+ for (const auto &PD : Reader->getProfiles()) {
+ StringRef FName = PD.getKey();
+ const sampleprof::FunctionSamples &FS = PD.getValue();
+ auto It = InstrProfileMap.find(FName);
+ if (FS.getHeadSamples() > ColdSampleThreshold &&
+ It != InstrProfileMap.end() &&
+ It->second.MaxCount <= ColdInstrThreshold &&
+ FS.getBodySamples().size() >= SupplMinSizeThreshold) {
+ updateInstrProfileEntry(It->second, HotInstrThreshold,
+ ZeroCounterThreshold);
+ }
+ }
+}
+
+/// The main function to supplement instr profile with sample profile.
+/// \Inputs contains the instr profile. \p SampleFilename specifies the
+/// sample profile. \p OutputFilename specifies the output profile name.
+/// \p OutputFormat specifies the output profile format. \p OutputSparse
+/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
+/// specifies the minimal size for the functions whose profile will be
+/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
+/// a function contains too many zero counters and whether its profile
+/// should be dropped. \p InstrProfColdThreshold is the user specified
+/// cold threshold which will override the cold threshold got from the
+/// instr profile summary.
+static void supplementInstrProfile(
+ const WeightedFileVector &Inputs, StringRef SampleFilename,
+ StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse,
+ unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
+ unsigned InstrProfColdThreshold) {
+ if (OutputFilename.compare("-") == 0)
+ exitWithError("Cannot write indexed profdata format to stdout.");
+ if (Inputs.size() != 1)
+ exitWithError("Expect one input to be an instr profile.");
+ if (Inputs[0].Weight != 1)
+ exitWithError("Expect instr profile doesn't have weight.");
+
+ StringRef InstrFilename = Inputs[0].Filename;
+
+ // Read sample profile.
+ LLVMContext Context;
+ auto ReaderOrErr =
+ sampleprof::SampleProfileReader::create(SampleFilename.str(), Context);
+ if (std::error_code EC = ReaderOrErr.getError())
+ exitWithErrorCode(EC, SampleFilename);
+ auto Reader = std::move(ReaderOrErr.get());
+ if (std::error_code EC = Reader->read())
+ exitWithErrorCode(EC, SampleFilename);
+
+ // Read instr profile.
+ std::mutex ErrorLock;
+ SmallSet<instrprof_error, 4> WriterErrorCodes;
+ auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
+ WriterErrorCodes);
+ loadInput(Inputs[0], nullptr, WC.get());
+ if (WC->Errors.size() > 0)
+ exitWithError(std::move(WC->Errors[0].first), InstrFilename);
+
+ adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
+ InstrProfColdThreshold);
+ writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
+}
+
/// Make a copy of the given function samples with all symbol names remapped
/// by the provided symbol remapper.
static sampleprof::FunctionSamples
@@ -493,6 +662,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
LLVMContext Context;
sampleprof::ProfileSymbolList WriterList;
+ Optional<bool> ProfileIsProbeBased;
for (const auto &Input : Inputs) {
auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context);
if (std::error_code EC = ReaderOrErr.getError()) {
@@ -513,6 +683,11 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
}
StringMap<FunctionSamples> &Profiles = Reader->getProfiles();
+ if (ProfileIsProbeBased &&
+ ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
+ exitWithError(
+ "cannot merge probe-based profile with non-probe-based profile");
+ ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
E = Profiles.end();
I != E; ++I) {
@@ -521,7 +696,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
Remapper ? remapSamples(I->second, *Remapper, Result)
: FunctionSamples();
FunctionSamples &Samples = Remapper ? Remapped : I->second;
- StringRef FName = Samples.getName();
+ StringRef FName = Samples.getNameWithContext(true);
MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight));
if (Result != sampleprof_error::success) {
std::error_code EC = make_error_code(Result);
@@ -679,6 +854,28 @@ static int merge_main(int argc, const char *argv[]) {
cl::opt<bool> GenPartialProfile(
"gen-partial-profile", cl::init(false), cl::Hidden,
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
+ cl::opt<std::string> SupplInstrWithSample(
+ "supplement-instr-with-sample", cl::init(""), cl::Hidden,
+ cl::desc("Supplement an instr profile with sample profile, to correct "
+ "the profile unrepresentativeness issue. The sample "
+ "profile is the input of the flag. Output will be in instr "
+ "format (The flag only works with -instr)"));
+ cl::opt<float> ZeroCounterThreshold(
+ "zero-counter-threshold", cl::init(0.7), cl::Hidden,
+ cl::desc("For the function which is cold in instr profile but hot in "
+ "sample profile, if the ratio of the number of zero counters "
+ "divided by the the total number of counters is above the "
+ "threshold, the profile of the function will be regarded as "
+ "being harmful for performance and will be dropped. "));
+ cl::opt<unsigned> SupplMinSizeThreshold(
+ "suppl-min-size-threshold", cl::init(10), cl::Hidden,
+ cl::desc("If the size of a function is smaller than the threshold, "
+ "assume it can be inlined by PGO early inliner and it won't "
+ "be adjusted based on sample profile. "));
+ cl::opt<unsigned> InstrProfColdThreshold(
+ "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
+ cl::desc("User specified cold threshold for instr profile which will "
+ "override the cold threshold got from profile summary. "));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
@@ -707,6 +904,17 @@ static int merge_main(int argc, const char *argv[]) {
if (!RemappingFile.empty())
Remapper = SymbolRemapper::create(RemappingFile);
+ if (!SupplInstrWithSample.empty()) {
+ if (ProfileKind != instr)
+ exitWithError(
+ "-supplement-instr-with-sample can only work with -instr. ");
+
+ supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename,
+ OutputFormat, OutputSparse, SupplMinSizeThreshold,
+ ZeroCounterThreshold, InstrProfColdThreshold);
+ return 0;
+ }
+
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename,
OutputFormat, OutputSparse, NumThreads, FailureMode);
@@ -745,6 +953,933 @@ static void overlapInstrProfile(const std::string &BaseFilename,
Overlap.dump(OS);
}
+namespace {
+struct SampleOverlapStats {
+ StringRef BaseName;
+ StringRef TestName;
+ // Number of overlap units
+ uint64_t OverlapCount;
+ // Total samples of overlap units
+ uint64_t OverlapSample;
+ // Number of and total samples of units that only present in base or test
+ // profile
+ uint64_t BaseUniqueCount;
+ uint64_t BaseUniqueSample;
+ uint64_t TestUniqueCount;
+ uint64_t TestUniqueSample;
+ // Number of units and total samples in base or test profile
+ uint64_t BaseCount;
+ uint64_t BaseSample;
+ uint64_t TestCount;
+ uint64_t TestSample;
+ // Number of and total samples of units that present in at least one profile
+ uint64_t UnionCount;
+ uint64_t UnionSample;
+ // Weighted similarity
+ double Similarity;
+ // For SampleOverlapStats instances representing functions, weights of the
+ // function in base and test profiles
+ double BaseWeight;
+ double TestWeight;
+
+ SampleOverlapStats()
+ : OverlapCount(0), OverlapSample(0), BaseUniqueCount(0),
+ BaseUniqueSample(0), TestUniqueCount(0), TestUniqueSample(0),
+ BaseCount(0), BaseSample(0), TestCount(0), TestSample(0), UnionCount(0),
+ UnionSample(0), Similarity(0.0), BaseWeight(0.0), TestWeight(0.0) {}
+};
+} // end anonymous namespace
+
+namespace {
+struct FuncSampleStats {
+ uint64_t SampleSum;
+ uint64_t MaxSample;
+ uint64_t HotBlockCount;
+ FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {}
+ FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
+ uint64_t HotBlockCount)
+ : SampleSum(SampleSum), MaxSample(MaxSample),
+ HotBlockCount(HotBlockCount) {}
+};
+} // end anonymous namespace
+
+namespace {
+enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
+
+// Class for updating merging steps for two sorted maps. The class should be
+// instantiated with a map iterator type.
+template <class T> class MatchStep {
+public:
+ MatchStep() = delete;
+
+ MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
+ : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
+ SecondEnd(SecondEnd), Status(MS_None) {}
+
+ bool areBothFinished() const {
+ return (FirstIter == FirstEnd && SecondIter == SecondEnd);
+ }
+
+ bool isFirstFinished() const { return FirstIter == FirstEnd; }
+
+ bool isSecondFinished() const { return SecondIter == SecondEnd; }
+
+ /// Advance one step based on the previous match status unless the previous
+ /// status is MS_None. Then update Status based on the comparison between two
+ /// container iterators at the current step. If the previous status is
+ /// MS_None, it means two iterators are at the beginning and no comparison has
+ /// been made, so we simply update Status without advancing the iterators.
+ void updateOneStep();
+
+ T getFirstIter() const { return FirstIter; }
+
+ T getSecondIter() const { return SecondIter; }
+
+ MatchStatus getMatchStatus() const { return Status; }
+
+private:
+ // Current iterator and end iterator of the first container.
+ T FirstIter;
+ T FirstEnd;
+ // Current iterator and end iterator of the second container.
+ T SecondIter;
+ T SecondEnd;
+ // Match status of the current step.
+ MatchStatus Status;
+};
+} // end anonymous namespace
+
+template <class T> void MatchStep<T>::updateOneStep() {
+ switch (Status) {
+ case MS_Match:
+ ++FirstIter;
+ ++SecondIter;
+ break;
+ case MS_FirstUnique:
+ ++FirstIter;
+ break;
+ case MS_SecondUnique:
+ ++SecondIter;
+ break;
+ case MS_None:
+ break;
+ }
+
+ // Update Status according to iterators at the current step.
+ if (areBothFinished())
+ return;
+ if (FirstIter != FirstEnd &&
+ (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
+ Status = MS_FirstUnique;
+ else if (SecondIter != SecondEnd &&
+ (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
+ Status = MS_SecondUnique;
+ else
+ Status = MS_Match;
+}
+
+// Return the sum of line/block samples, the max line/block sample, and the
+// number of line/block samples above the given threshold in a function
+// including its inlinees.
+static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
+ FuncSampleStats &FuncStats,
+ uint64_t HotThreshold) {
+ for (const auto &L : Func.getBodySamples()) {
+ uint64_t Sample = L.second.getSamples();
+ FuncStats.SampleSum += Sample;
+ FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample);
+ if (Sample >= HotThreshold)
+ ++FuncStats.HotBlockCount;
+ }
+
+ for (const auto &C : Func.getCallsiteSamples()) {
+ for (const auto &F : C.second)
+ getFuncSampleStats(F.second, FuncStats, HotThreshold);
+ }
+}
+
+/// Predicate that determines if a function is hot with a given threshold. We
+/// keep it separate from its callsites for possible extension in the future.
+static bool isFunctionHot(const FuncSampleStats &FuncStats,
+ uint64_t HotThreshold) {
+ // We intentionally compare the maximum sample count in a function with the
+ // HotThreshold to get an approximate determination on hot functions.
+ return (FuncStats.MaxSample >= HotThreshold);
+}
+
+namespace {
+class SampleOverlapAggregator {
+public:
+ SampleOverlapAggregator(const std::string &BaseFilename,
+ const std::string &TestFilename,
+ double LowSimilarityThreshold, double Epsilon,
+ const OverlapFuncFilters &FuncFilter)
+ : BaseFilename(BaseFilename), TestFilename(TestFilename),
+ LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
+ FuncFilter(FuncFilter) {}
+
+ /// Detect 0-sample input profile and report to output stream. This interface
+ /// should be called after loadProfiles().
+ bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
+
+ /// Write out function-level similarity statistics for functions specified by
+ /// options --function, --value-cutoff, and --similarity-cutoff.
+ void dumpFuncSimilarity(raw_fd_ostream &OS) const;
+
+ /// Write out program-level similarity and overlap statistics.
+ void dumpProgramSummary(raw_fd_ostream &OS) const;
+
+ /// Write out hot-function and hot-block statistics for base_profile,
+ /// test_profile, and their overlap. For both cases, the overlap HO is
+ /// calculated as follows:
+ /// Given the number of functions (or blocks) that are hot in both profiles
+ /// HCommon and the number of functions (or blocks) that are hot in at
+ /// least one profile HUnion, HO = HCommon / HUnion.
+ void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
+
+ /// This function tries matching functions in base and test profiles. For each
+ /// pair of matched functions, it aggregates the function-level
+ /// similarity into a profile-level similarity. It also dump function-level
+ /// similarity information of functions specified by --function,
+ /// --value-cutoff, and --similarity-cutoff options. The program-level
+ /// similarity PS is computed as follows:
+ /// Given function-level similarity FS(A) for all function A, the
+ /// weight of function A in base profile WB(A), and the weight of function
+ /// A in test profile WT(A), compute PS(base_profile, test_profile) =
+ /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
+ /// meaning no-overlap.
+ void computeSampleProfileOverlap(raw_fd_ostream &OS);
+
+ /// Initialize ProfOverlap with the sum of samples in base and test
+ /// profiles. This function also computes and keeps the sum of samples and
+ /// max sample counts of each function in BaseStats and TestStats for later
+ /// use to avoid re-computations.
+ void initializeSampleProfileOverlap();
+
+ /// Load profiles specified by BaseFilename and TestFilename.
+ std::error_code loadProfiles();
+
+private:
+ SampleOverlapStats ProfOverlap;
+ SampleOverlapStats HotFuncOverlap;
+ SampleOverlapStats HotBlockOverlap;
+ std::string BaseFilename;
+ std::string TestFilename;
+ std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
+ std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
+ // BaseStats and TestStats hold FuncSampleStats for each function, with
+ // function name as the key.
+ StringMap<FuncSampleStats> BaseStats;
+ StringMap<FuncSampleStats> TestStats;
+ // Low similarity threshold in floating point number
+ double LowSimilarityThreshold;
+ // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
+ // for tracking hot blocks.
+ uint64_t BaseHotThreshold;
+ uint64_t TestHotThreshold;
+ // A small threshold used to round the results of floating point accumulations
+ // to resolve imprecision.
+ const double Epsilon;
+ std::multimap<double, SampleOverlapStats, std::greater<double>>
+ FuncSimilarityDump;
+ // FuncFilter carries specifications in options --value-cutoff and
+ // --function.
+ OverlapFuncFilters FuncFilter;
+ // Column offsets for printing the function-level details table.
+ static const unsigned int TestWeightCol = 15;
+ static const unsigned int SimilarityCol = 30;
+ static const unsigned int OverlapCol = 43;
+ static const unsigned int BaseUniqueCol = 53;
+ static const unsigned int TestUniqueCol = 67;
+ static const unsigned int BaseSampleCol = 81;
+ static const unsigned int TestSampleCol = 96;
+ static const unsigned int FuncNameCol = 111;
+
+ /// Return a similarity of two line/block sample counters in the same
+ /// function in base and test profiles. The line/block-similarity BS(i) is
+ /// computed as follows:
+ /// For an offsets i, given the sample count at i in base profile BB(i),
+ /// the sample count at i in test profile BT(i), the sum of sample counts
+ /// in this function in base profile SB, and the sum of sample counts in
+ /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
+ /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
+ double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
+ const SampleOverlapStats &FuncOverlap) const;
+
+ void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
+ uint64_t HotBlockCount);
+
+ void getHotFunctions(const StringMap<FuncSampleStats> &ProfStats,
+ StringMap<FuncSampleStats> &HotFunc,
+ uint64_t HotThreshold) const;
+
+ void computeHotFuncOverlap();
+
+ /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
+ /// Difference for two sample units in a matched function according to the
+ /// given match status.
+ void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
+ uint64_t HotBlockCount,
+ SampleOverlapStats &FuncOverlap,
+ double &Difference, MatchStatus Status);
+
+ /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
+ /// Difference for unmatched callees that only present in one profile in a
+ /// matched caller function.
+ void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
+ SampleOverlapStats &FuncOverlap,
+ double &Difference, MatchStatus Status);
+
+ /// This function updates sample overlap statistics of an overlap function in
+ /// base and test profile. It also calculates a function-internal similarity
+ /// FIS as follows:
+ /// For offsets i that have samples in at least one profile in this
+ /// function A, given BS(i) returned by computeBlockSimilarity(), compute
+ /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
+ /// 0.0 meaning no overlap.
+ double computeSampleFunctionInternalOverlap(
+ const sampleprof::FunctionSamples &BaseFunc,
+ const sampleprof::FunctionSamples &TestFunc,
+ SampleOverlapStats &FuncOverlap);
+
+ /// Function-level similarity (FS) is a weighted value over function internal
+ /// similarity (FIS). This function computes a function's FS from its FIS by
+ /// applying the weight.
+ double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
+ uint64_t TestFuncSample) const;
+
+ /// The function-level similarity FS(A) for a function A is computed as
+ /// follows:
+ /// Compute a function-internal similarity FIS(A) by
+ /// computeSampleFunctionInternalOverlap(). Then, with the weight of
+ /// function A in base profile WB(A), and the weight of function A in test
+ /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
+ /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
+ double
+ computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
+ const sampleprof::FunctionSamples *TestFunc,
+ SampleOverlapStats *FuncOverlap,
+ uint64_t BaseFuncSample,
+ uint64_t TestFuncSample);
+
+ /// Profile-level similarity (PS) is a weighted aggregate over function-level
+ /// similarities (FS). This method weights the FS value by the function
+ /// weights in the base and test profiles for the aggregation.
+ double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
+ uint64_t TestFuncSample) const;
+};
+} // end anonymous namespace
+
+bool SampleOverlapAggregator::detectZeroSampleProfile(
+ raw_fd_ostream &OS) const {
+ bool HaveZeroSample = false;
+ if (ProfOverlap.BaseSample == 0) {
+ OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
+ HaveZeroSample = true;
+ }
+ if (ProfOverlap.TestSample == 0) {
+ OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
+ HaveZeroSample = true;
+ }
+ return HaveZeroSample;
+}
+
+double SampleOverlapAggregator::computeBlockSimilarity(
+ uint64_t BaseSample, uint64_t TestSample,
+ const SampleOverlapStats &FuncOverlap) const {
+ double BaseFrac = 0.0;
+ double TestFrac = 0.0;
+ if (FuncOverlap.BaseSample > 0)
+ BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
+ if (FuncOverlap.TestSample > 0)
+ TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
+ return 1.0 - std::fabs(BaseFrac - TestFrac);
+}
+
+void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
+ uint64_t TestSample,
+ uint64_t HotBlockCount) {
+ bool IsBaseHot = (BaseSample >= BaseHotThreshold);
+ bool IsTestHot = (TestSample >= TestHotThreshold);
+ if (!IsBaseHot && !IsTestHot)
+ return;
+
+ HotBlockOverlap.UnionCount += HotBlockCount;
+ if (IsBaseHot)
+ HotBlockOverlap.BaseCount += HotBlockCount;
+ if (IsTestHot)
+ HotBlockOverlap.TestCount += HotBlockCount;
+ if (IsBaseHot && IsTestHot)
+ HotBlockOverlap.OverlapCount += HotBlockCount;
+}
+
+void SampleOverlapAggregator::getHotFunctions(
+ const StringMap<FuncSampleStats> &ProfStats,
+ StringMap<FuncSampleStats> &HotFunc, uint64_t HotThreshold) const {
+ for (const auto &F : ProfStats) {
+ if (isFunctionHot(F.second, HotThreshold))
+ HotFunc.try_emplace(F.first(), F.second);
+ }
+}
+
+void SampleOverlapAggregator::computeHotFuncOverlap() {
+ StringMap<FuncSampleStats> BaseHotFunc;
+ getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
+ HotFuncOverlap.BaseCount = BaseHotFunc.size();
+
+ StringMap<FuncSampleStats> TestHotFunc;
+ getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
+ HotFuncOverlap.TestCount = TestHotFunc.size();
+ HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
+
+ for (const auto &F : BaseHotFunc) {
+ if (TestHotFunc.count(F.first()))
+ ++HotFuncOverlap.OverlapCount;
+ else
+ ++HotFuncOverlap.UnionCount;
+ }
+}
+
+void SampleOverlapAggregator::updateOverlapStatsForFunction(
+ uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
+ SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
+ assert(Status != MS_None &&
+ "Match status should be updated before updating overlap statistics");
+ if (Status == MS_FirstUnique) {
+ TestSample = 0;
+ FuncOverlap.BaseUniqueSample += BaseSample;
+ } else if (Status == MS_SecondUnique) {
+ BaseSample = 0;
+ FuncOverlap.TestUniqueSample += TestSample;
+ } else {
+ ++FuncOverlap.OverlapCount;
+ }
+
+ FuncOverlap.UnionSample += std::max(BaseSample, TestSample);
+ FuncOverlap.OverlapSample += std::min(BaseSample, TestSample);
+ Difference +=
+ 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
+ updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
+}
+
+void SampleOverlapAggregator::updateForUnmatchedCallee(
+ const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
+ double &Difference, MatchStatus Status) {
+ assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
+ "Status must be either of the two unmatched cases");
+ FuncSampleStats FuncStats;
+ if (Status == MS_FirstUnique) {
+ getFuncSampleStats(Func, FuncStats, BaseHotThreshold);
+ updateOverlapStatsForFunction(FuncStats.SampleSum, 0,
+ FuncStats.HotBlockCount, FuncOverlap,
+ Difference, Status);
+ } else {
+ getFuncSampleStats(Func, FuncStats, TestHotThreshold);
+ updateOverlapStatsForFunction(0, FuncStats.SampleSum,
+ FuncStats.HotBlockCount, FuncOverlap,
+ Difference, Status);
+ }
+}
+
+double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
+ const sampleprof::FunctionSamples &BaseFunc,
+ const sampleprof::FunctionSamples &TestFunc,
+ SampleOverlapStats &FuncOverlap) {
+
+ using namespace sampleprof;
+
+ double Difference = 0;
+
+ // Accumulate Difference for regular line/block samples in the function.
+ // We match them through sort-merge join algorithm because
+ // FunctionSamples::getBodySamples() returns a map of sample counters ordered
+ // by their offsets.
+ MatchStep<BodySampleMap::const_iterator> BlockIterStep(
+ BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
+ TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
+ BlockIterStep.updateOneStep();
+ while (!BlockIterStep.areBothFinished()) {
+ uint64_t BaseSample =
+ BlockIterStep.isFirstFinished()
+ ? 0
+ : BlockIterStep.getFirstIter()->second.getSamples();
+ uint64_t TestSample =
+ BlockIterStep.isSecondFinished()
+ ? 0
+ : BlockIterStep.getSecondIter()->second.getSamples();
+ updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap,
+ Difference, BlockIterStep.getMatchStatus());
+
+ BlockIterStep.updateOneStep();
+ }
+
+ // Accumulate Difference for callsite lines in the function. We match
+ // them through sort-merge algorithm because
+ // FunctionSamples::getCallsiteSamples() returns a map of callsite records
+ // ordered by their offsets.
+ MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
+ BaseFunc.getCallsiteSamples().cbegin(),
+ BaseFunc.getCallsiteSamples().cend(),
+ TestFunc.getCallsiteSamples().cbegin(),
+ TestFunc.getCallsiteSamples().cend());
+ CallsiteIterStep.updateOneStep();
+ while (!CallsiteIterStep.areBothFinished()) {
+ MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
+ assert(CallsiteStepStatus != MS_None &&
+ "Match status should be updated before entering loop body");
+
+ if (CallsiteStepStatus != MS_Match) {
+ auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
+ ? CallsiteIterStep.getFirstIter()
+ : CallsiteIterStep.getSecondIter();
+ for (const auto &F : Callsite->second)
+ updateForUnmatchedCallee(F.second, FuncOverlap, Difference,
+ CallsiteStepStatus);
+ } else {
+ // There may be multiple inlinees at the same offset, so we need to try
+ // matching all of them. This match is implemented through sort-merge
+ // algorithm because callsite records at the same offset are ordered by
+ // function names.
+ MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
+ CallsiteIterStep.getFirstIter()->second.cbegin(),
+ CallsiteIterStep.getFirstIter()->second.cend(),
+ CallsiteIterStep.getSecondIter()->second.cbegin(),
+ CallsiteIterStep.getSecondIter()->second.cend());
+ CalleeIterStep.updateOneStep();
+ while (!CalleeIterStep.areBothFinished()) {
+ MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
+ if (CalleeStepStatus != MS_Match) {
+ auto Callee = (CalleeStepStatus == MS_FirstUnique)
+ ? CalleeIterStep.getFirstIter()
+ : CalleeIterStep.getSecondIter();
+ updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
+ CalleeStepStatus);
+ } else {
+ // An inlined function can contain other inlinees inside, so compute
+ // the Difference recursively.
+ Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
+ CalleeIterStep.getFirstIter()->second,
+ CalleeIterStep.getSecondIter()->second,
+ FuncOverlap);
+ }
+ CalleeIterStep.updateOneStep();
+ }
+ }
+ CallsiteIterStep.updateOneStep();
+ }
+
+ // Difference reflects the total differences of line/block samples in this
+ // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
+ // reflect the similarity between function profiles in [0.0f to 1.0f].
+ return (2.0 - Difference) / 2;
+}
+
+double SampleOverlapAggregator::weightForFuncSimilarity(
+ double FuncInternalSimilarity, uint64_t BaseFuncSample,
+ uint64_t TestFuncSample) const {
+ // Compute the weight as the distance between the function weights in two
+ // profiles.
+ double BaseFrac = 0.0;
+ double TestFrac = 0.0;
+ assert(ProfOverlap.BaseSample > 0 &&
+ "Total samples in base profile should be greater than 0");
+ BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
+ assert(ProfOverlap.TestSample > 0 &&
+ "Total samples in test profile should be greater than 0");
+ TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
+ double WeightDistance = std::fabs(BaseFrac - TestFrac);
+
+ // Take WeightDistance into the similarity.
+ return FuncInternalSimilarity * (1 - WeightDistance);
+}
+
+double
+SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
+ uint64_t BaseFuncSample,
+ uint64_t TestFuncSample) const {
+
+ double BaseFrac = 0.0;
+ double TestFrac = 0.0;
+ assert(ProfOverlap.BaseSample > 0 &&
+ "Total samples in base profile should be greater than 0");
+ BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
+ assert(ProfOverlap.TestSample > 0 &&
+ "Total samples in test profile should be greater than 0");
+ TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
+ return FuncSimilarity * (BaseFrac + TestFrac);
+}
+
+double SampleOverlapAggregator::computeSampleFunctionOverlap(
+ const sampleprof::FunctionSamples *BaseFunc,
+ const sampleprof::FunctionSamples *TestFunc,
+ SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
+ uint64_t TestFuncSample) {
+ // Default function internal similarity before weighted, meaning two functions
+ // has no overlap.
+ const double DefaultFuncInternalSimilarity = 0;
+ double FuncSimilarity;
+ double FuncInternalSimilarity;
+
+ // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
+ // In this case, we use DefaultFuncInternalSimilarity as the function internal
+ // similarity.
+ if (!BaseFunc || !TestFunc) {
+ FuncInternalSimilarity = DefaultFuncInternalSimilarity;
+ } else {
+ assert(FuncOverlap != nullptr &&
+ "FuncOverlap should be provided in this case");
+ FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
+ *BaseFunc, *TestFunc, *FuncOverlap);
+ // Now, FuncInternalSimilarity may be a little less than 0 due to
+ // imprecision of floating point accumulations. Make it zero if the
+ // difference is below Epsilon.
+ FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
+ ? 0
+ : FuncInternalSimilarity;
+ }
+ FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
+ BaseFuncSample, TestFuncSample);
+ return FuncSimilarity;
+}
+
+void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
+ using namespace sampleprof;
+
+ StringMap<const FunctionSamples *> BaseFuncProf;
+ const auto &BaseProfiles = BaseReader->getProfiles();
+ for (const auto &BaseFunc : BaseProfiles) {
+ BaseFuncProf.try_emplace(BaseFunc.second.getName(), &(BaseFunc.second));
+ }
+ ProfOverlap.UnionCount = BaseFuncProf.size();
+
+ const auto &TestProfiles = TestReader->getProfiles();
+ for (const auto &TestFunc : TestProfiles) {
+ SampleOverlapStats FuncOverlap;
+ FuncOverlap.TestName = TestFunc.second.getName();
+ assert(TestStats.count(FuncOverlap.TestName) &&
+ "TestStats should have records for all functions in test profile "
+ "except inlinees");
+ FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
+
+ const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
+ if (Match == BaseFuncProf.end()) {
+ const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
+ ++ProfOverlap.TestUniqueCount;
+ ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
+ FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
+
+ updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount);
+
+ double FuncSimilarity = computeSampleFunctionOverlap(
+ nullptr, nullptr, nullptr, 0, FuncStats.SampleSum);
+ ProfOverlap.Similarity +=
+ weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum);
+
+ ++ProfOverlap.UnionCount;
+ ProfOverlap.UnionSample += FuncStats.SampleSum;
+ } else {
+ ++ProfOverlap.OverlapCount;
+
+ // Two functions match with each other. Compute function-level overlap and
+ // aggregate them into profile-level overlap.
+ FuncOverlap.BaseName = Match->second->getName();
+ assert(BaseStats.count(FuncOverlap.BaseName) &&
+ "BaseStats should have records for all functions in base profile "
+ "except inlinees");
+ FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
+
+ FuncOverlap.Similarity = computeSampleFunctionOverlap(
+ Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
+ FuncOverlap.TestSample);
+ ProfOverlap.Similarity +=
+ weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample,
+ FuncOverlap.TestSample);
+ ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
+ ProfOverlap.UnionSample += FuncOverlap.UnionSample;
+
+ // Accumulate the percentage of base unique and test unique samples into
+ // ProfOverlap.
+ ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
+ ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
+
+ // Remove matched base functions for later reporting functions not found
+ // in test profile.
+ BaseFuncProf.erase(Match);
+ }
+
+ // Print function-level similarity information if specified by options.
+ assert(TestStats.count(FuncOverlap.TestName) &&
+ "TestStats should have records for all functions in test profile "
+ "except inlinees");
+ if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
+ (Match != BaseFuncProf.end() &&
+ FuncOverlap.Similarity < LowSimilarityThreshold) ||
+ (Match != BaseFuncProf.end() && !FuncFilter.NameFilter.empty() &&
+ FuncOverlap.BaseName.find(FuncFilter.NameFilter) !=
+ FuncOverlap.BaseName.npos)) {
+ assert(ProfOverlap.BaseSample > 0 &&
+ "Total samples in base profile should be greater than 0");
+ FuncOverlap.BaseWeight =
+ static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
+ assert(ProfOverlap.TestSample > 0 &&
+ "Total samples in test profile should be greater than 0");
+ FuncOverlap.TestWeight =
+ static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
+ FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap);
+ }
+ }
+
+ // Traverse through functions in base profile but not in test profile.
+ for (const auto &F : BaseFuncProf) {
+ assert(BaseStats.count(F.second->getName()) &&
+ "BaseStats should have records for all functions in base profile "
+ "except inlinees");
+ const FuncSampleStats &FuncStats = BaseStats[F.second->getName()];
+ ++ProfOverlap.BaseUniqueCount;
+ ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
+
+ updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount);
+
+ double FuncSimilarity = computeSampleFunctionOverlap(
+ nullptr, nullptr, nullptr, FuncStats.SampleSum, 0);
+ ProfOverlap.Similarity +=
+ weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0);
+
+ ProfOverlap.UnionSample += FuncStats.SampleSum;
+ }
+
+ // Now, ProfSimilarity may be a little greater than 1 due to imprecision
+ // of floating point accumulations. Make it 1.0 if the difference is below
+ // Epsilon.
+ ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
+ ? 1
+ : ProfOverlap.Similarity;
+
+ computeHotFuncOverlap();
+}
+
+void SampleOverlapAggregator::initializeSampleProfileOverlap() {
+ const auto &BaseProf = BaseReader->getProfiles();
+ for (const auto &I : BaseProf) {
+ ++ProfOverlap.BaseCount;
+ FuncSampleStats FuncStats;
+ getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
+ ProfOverlap.BaseSample += FuncStats.SampleSum;
+ BaseStats.try_emplace(I.second.getName(), FuncStats);
+ }
+
+ const auto &TestProf = TestReader->getProfiles();
+ for (const auto &I : TestProf) {
+ ++ProfOverlap.TestCount;
+ FuncSampleStats FuncStats;
+ getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
+ ProfOverlap.TestSample += FuncStats.SampleSum;
+ TestStats.try_emplace(I.second.getName(), FuncStats);
+ }
+
+ ProfOverlap.BaseName = StringRef(BaseFilename);
+ ProfOverlap.TestName = StringRef(TestFilename);
+}
+
+void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
+ using namespace sampleprof;
+
+ if (FuncSimilarityDump.empty())
+ return;
+
+ formatted_raw_ostream FOS(OS);
+ FOS << "Function-level details:\n";
+ FOS << "Base weight";
+ FOS.PadToColumn(TestWeightCol);
+ FOS << "Test weight";
+ FOS.PadToColumn(SimilarityCol);
+ FOS << "Similarity";
+ FOS.PadToColumn(OverlapCol);
+ FOS << "Overlap";
+ FOS.PadToColumn(BaseUniqueCol);
+ FOS << "Base unique";
+ FOS.PadToColumn(TestUniqueCol);
+ FOS << "Test unique";
+ FOS.PadToColumn(BaseSampleCol);
+ FOS << "Base samples";
+ FOS.PadToColumn(TestSampleCol);
+ FOS << "Test samples";
+ FOS.PadToColumn(FuncNameCol);
+ FOS << "Function name\n";
+ for (const auto &F : FuncSimilarityDump) {
+ double OverlapPercent =
+ F.second.UnionSample > 0
+ ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
+ : 0;
+ double BaseUniquePercent =
+ F.second.BaseSample > 0
+ ? static_cast<double>(F.second.BaseUniqueSample) /
+ F.second.BaseSample
+ : 0;
+ double TestUniquePercent =
+ F.second.TestSample > 0
+ ? static_cast<double>(F.second.TestUniqueSample) /
+ F.second.TestSample
+ : 0;
+
+ FOS << format("%.2f%%", F.second.BaseWeight * 100);
+ FOS.PadToColumn(TestWeightCol);
+ FOS << format("%.2f%%", F.second.TestWeight * 100);
+ FOS.PadToColumn(SimilarityCol);
+ FOS << format("%.2f%%", F.second.Similarity * 100);
+ FOS.PadToColumn(OverlapCol);
+ FOS << format("%.2f%%", OverlapPercent * 100);
+ FOS.PadToColumn(BaseUniqueCol);
+ FOS << format("%.2f%%", BaseUniquePercent * 100);
+ FOS.PadToColumn(TestUniqueCol);
+ FOS << format("%.2f%%", TestUniquePercent * 100);
+ FOS.PadToColumn(BaseSampleCol);
+ FOS << F.second.BaseSample;
+ FOS.PadToColumn(TestSampleCol);
+ FOS << F.second.TestSample;
+ FOS.PadToColumn(FuncNameCol);
+ FOS << F.second.TestName << "\n";
+ }
+}
+
+void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
+ OS << "Profile overlap infomation for base_profile: " << ProfOverlap.BaseName
+ << " and test_profile: " << ProfOverlap.TestName << "\nProgram level:\n";
+
+ OS << " Whole program profile similarity: "
+ << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
+
+ assert(ProfOverlap.UnionSample > 0 &&
+ "Total samples in two profile should be greater than 0");
+ double OverlapPercent =
+ static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
+ assert(ProfOverlap.BaseSample > 0 &&
+ "Total samples in base profile should be greater than 0");
+ double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
+ ProfOverlap.BaseSample;
+ assert(ProfOverlap.TestSample > 0 &&
+ "Total samples in test profile should be greater than 0");
+ double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
+ ProfOverlap.TestSample;
+
+ OS << " Whole program sample overlap: "
+ << format("%.3f%%", OverlapPercent * 100) << "\n";
+ OS << " percentage of samples unique in base profile: "
+ << format("%.3f%%", BaseUniquePercent * 100) << "\n";
+ OS << " percentage of samples unique in test profile: "
+ << format("%.3f%%", TestUniquePercent * 100) << "\n";
+ OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
+ << " total samples in test profile: " << ProfOverlap.TestSample << "\n";
+
+ assert(ProfOverlap.UnionCount > 0 &&
+ "There should be at least one function in two input profiles");
+ double FuncOverlapPercent =
+ static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
+ OS << " Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100)
+ << "\n";
+ OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n";
+ OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount
+ << "\n";
+ OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount
+ << "\n";
+}
+
+void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
+ raw_fd_ostream &OS) const {
+ assert(HotFuncOverlap.UnionCount > 0 &&
+ "There should be at least one hot function in two input profiles");
+ OS << " Hot-function overlap: "
+ << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) /
+ HotFuncOverlap.UnionCount * 100)
+ << "\n";
+ OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
+ OS << " hot functions unique in base profile: "
+ << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
+ OS << " hot functions unique in test profile: "
+ << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
+
+ assert(HotBlockOverlap.UnionCount > 0 &&
+ "There should be at least one hot block in two input profiles");
+ OS << " Hot-block overlap: "
+ << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) /
+ HotBlockOverlap.UnionCount * 100)
+ << "\n";
+ OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
+ OS << " hot blocks unique in base profile: "
+ << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
+ OS << " hot blocks unique in test profile: "
+ << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
+}
+
+std::error_code SampleOverlapAggregator::loadProfiles() {
+ using namespace sampleprof;
+
+ LLVMContext Context;
+ auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context);
+ if (std::error_code EC = BaseReaderOrErr.getError())
+ exitWithErrorCode(EC, BaseFilename);
+
+ auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context);
+ if (std::error_code EC = TestReaderOrErr.getError())
+ exitWithErrorCode(EC, TestFilename);
+
+ BaseReader = std::move(BaseReaderOrErr.get());
+ TestReader = std::move(TestReaderOrErr.get());
+
+ if (std::error_code EC = BaseReader->read())
+ exitWithErrorCode(EC, BaseFilename);
+ if (std::error_code EC = TestReader->read())
+ exitWithErrorCode(EC, TestFilename);
+ if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
+ exitWithError(
+ "cannot compare probe-based profile with non-probe-based profile");
+
+ // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
+ // profile summary.
+ const uint64_t HotCutoff = 990000;
+ ProfileSummary &BasePS = BaseReader->getSummary();
+ for (const auto &SummaryEntry : BasePS.getDetailedSummary()) {
+ if (SummaryEntry.Cutoff == HotCutoff) {
+ BaseHotThreshold = SummaryEntry.MinCount;
+ break;
+ }
+ }
+
+ ProfileSummary &TestPS = TestReader->getSummary();
+ for (const auto &SummaryEntry : TestPS.getDetailedSummary()) {
+ if (SummaryEntry.Cutoff == HotCutoff) {
+ TestHotThreshold = SummaryEntry.MinCount;
+ break;
+ }
+ }
+ return std::error_code();
+}
+
+void overlapSampleProfile(const std::string &BaseFilename,
+ const std::string &TestFilename,
+ const OverlapFuncFilters &FuncFilter,
+ uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
+ using namespace sampleprof;
+
+ // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
+ // report 2--3 places after decimal point in percentage numbers.
+ SampleOverlapAggregator OverlapAggr(
+ BaseFilename, TestFilename,
+ static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
+ if (std::error_code EC = OverlapAggr.loadProfiles())
+ exitWithErrorCode(EC);
+
+ OverlapAggr.initializeSampleProfileOverlap();
+ if (OverlapAggr.detectZeroSampleProfile(OS))
+ return;
+
+ OverlapAggr.computeSampleProfileOverlap(OS);
+
+ OverlapAggr.dumpProgramSummary(OS);
+ OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
+ OverlapAggr.dumpFuncSimilarity(OS);
+}
+
static int overlap_main(int argc, const char *argv[]) {
cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
cl::desc("<base profile file>"));
@@ -763,6 +1898,15 @@ static int overlap_main(int argc, const char *argv[]) {
cl::opt<std::string> FuncNameFilter(
"function",
cl::desc("Function level overlap information for matching functions"));
+ cl::opt<unsigned long long> SimilarityCutoff(
+ "similarity-cutoff", cl::init(0),
+ cl::desc(
+ "For sample profiles, list function names for overlapped functions "
+ "with similarities below the cutoff (percentage times 10000)."));
+ cl::opt<ProfileKinds> ProfileKind(
+ cl::desc("Profile kind:"), cl::init(instr),
+ cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
+ clEnumVal(sample, "Sample profile")));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n");
std::error_code EC;
@@ -770,9 +1914,14 @@ static int overlap_main(int argc, const char *argv[]) {
if (EC)
exitWithErrorCode(EC, Output);
- overlapInstrProfile(BaseFilename, TestFilename,
- OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS,
- IsCS);
+ if (ProfileKind == instr)
+ overlapInstrProfile(BaseFilename, TestFilename,
+ OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS,
+ IsCS);
+ else
+ overlapSampleProfile(BaseFilename, TestFilename,
+ OverlapFuncFilters{ValueCutoff, FuncNameFilter},
+ SimilarityCutoff, OS);
return 0;
}
@@ -903,6 +2052,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
uint64_t FuncMax = 0;
uint64_t FuncSum = 0;
for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
+ if (Func.Counts[I] == (uint64_t)-1)
+ continue;
FuncMax = std::max(FuncMax, Func.Counts[I]);
FuncSum += Func.Counts[I];
}
@@ -977,8 +2128,11 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
if (TextFormat)
return 0;
std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
- OS << "Instrumentation level: "
- << (Reader->isIRLevelProfile() ? "IR" : "Front-end") << "\n";
+ bool IsIR = Reader->isIRLevelProfile();
+ OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
+ if (IsIR)
+ OS << " entry_first = " << Reader->instrEntryBBEnabled();
+ OS << "\n";
if (ShowAllFunctions || !ShowFunction.empty())
OS << "Functions shown: " << ShownFunctions << "\n";
OS << "Total functions: " << PS->getNumFunctions() << "\n";
@@ -1062,17 +2216,21 @@ static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
uint64_t HotProfCount, uint64_t TotalProfCount,
const std::string &HotFuncMetric,
raw_fd_ostream &OS) {
- assert(ColumnOffset.size() == ColumnTitle.size());
- assert(ColumnTitle.size() >= 4);
- assert(TotalFuncCount > 0);
+ assert(ColumnOffset.size() == ColumnTitle.size() &&
+ "ColumnOffset and ColumnTitle should have the same size");
+ assert(ColumnTitle.size() >= 4 &&
+ "ColumnTitle should have at least 4 elements");
+ assert(TotalFuncCount > 0 &&
+ "There should be at least one function in the profile");
double TotalProfPercent = 0;
if (TotalProfCount > 0)
- TotalProfPercent = ((double)HotProfCount) / TotalProfCount * 100;
+ TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
formatted_raw_ostream FOS(OS);
FOS << HotFuncCount << " out of " << TotalFuncCount
<< " functions with profile ("
- << format("%.2f%%", (((double)HotFuncCount) / TotalFuncCount * 100))
+ << format("%.2f%%",
+ (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
<< ") are considered hot functions";
if (!HotFuncMetric.empty())
FOS << " (" << HotFuncMetric << ")";
@@ -1096,7 +2254,6 @@ static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
FOS.PadToColumn(ColumnOffset[3]);
FOS << R.FuncName << "\n";
}
- return;
}
static int
@@ -1113,7 +2270,6 @@ showHotFunctionList(const StringMap<sampleprof::FunctionSamples> &Profiles,
break;
}
}
- assert(MinCountThreshold != 0);
// Traverse all functions in the profile and keep only hot functions.
// The following loop also calculates the sum of total samples of all
@@ -1124,18 +2280,16 @@ showHotFunctionList(const StringMap<sampleprof::FunctionSamples> &Profiles,
uint64_t ProfileTotalSample = 0;
uint64_t HotFuncSample = 0;
uint64_t HotFuncCount = 0;
- uint64_t MaxCount = 0;
+
for (const auto &I : Profiles) {
+ FuncSampleStats FuncStats;
const FunctionSamples &FuncProf = I.second;
ProfileTotalSample += FuncProf.getTotalSamples();
- MaxCount = FuncProf.getMaxCountInside();
+ getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold);
- // MinCountThreshold is a block/line threshold computed for a given cutoff.
- // We intentionally compare the maximum sample count in a function with this
- // threshold to get an approximate threshold for hot functions.
- if (MaxCount >= MinCountThreshold) {
+ if (isFunctionHot(FuncStats, MinCountThreshold)) {
HotFunc.emplace(FuncProf.getTotalSamples(),
- std::make_pair(&(I.second), MaxCount));
+ std::make_pair(&(I.second), FuncStats.MaxSample));
HotFuncSample += FuncProf.getTotalSamples();
++HotFuncCount;
}
@@ -1153,9 +2307,9 @@ showHotFunctionList(const StringMap<sampleprof::FunctionSamples> &Profiles,
(ProfileTotalSample > 0)
? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
: 0;
- PrintValues.emplace_back(HotFuncInfo(
- Func.getFuncName(), Func.getTotalSamples(), TotalSamplePercent,
- FuncPair.second.second, Func.getEntrySamples()));
+ PrintValues.emplace_back(
+ HotFuncInfo(Func.getName(), Func.getTotalSamples(), TotalSamplePercent,
+ FuncPair.second.second, Func.getEntrySamples()));
}
dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
Profiles.size(), HotFuncSample, ProfileTotalSample,
@@ -1275,7 +2429,7 @@ static int show_main(int argc, const char *argv[]) {
if (OutputFilename.empty())
OutputFilename = "-";
- if (!Filename.compare(OutputFilename)) {
+ if (Filename == OutputFilename) {
errs() << sys::path::filename(argv[0])
<< ": Input file name cannot be the same as the output file name!\n";
return 1;
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/contrib/llvm-project/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
index 2c0912038c31..3d8acbf48fa9 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -9,7 +9,6 @@
#ifndef LLVM_TOOLS_LLVM_READOBJ_ARMEHABIPRINTER_H
#define LLVM_TOOLS_LLVM_READOBJ_ARMEHABIPRINTER_H
-#include "Error.h"
#include "llvm-readobj.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Object/ELF.h"
@@ -328,7 +327,7 @@ class PrinterContext {
typedef typename ET::Word Elf_Word;
ScopedPrinter &SW;
- const object::ELFFile<ET> *ELF;
+ const object::ELFFile<ET> &ELF;
StringRef FileName;
const Elf_Shdr *Symtab;
ArrayRef<Elf_Word> ShndxTable;
@@ -337,22 +336,23 @@ class PrinterContext {
static uint64_t PREL31(uint32_t Address, uint32_t Place) {
uint64_t Location = Address & 0x7fffffff;
- if (Location & 0x04000000)
+ if (Location & 0x40000000)
Location |= (uint64_t) ~0x7fffffff;
return Location + Place;
}
- ErrorOr<StringRef> FunctionAtAddress(unsigned Section, uint64_t Address) const;
+ ErrorOr<StringRef> FunctionAtAddress(uint64_t Address,
+ Optional<unsigned> SectionIndex) const;
const Elf_Shdr *FindExceptionTable(unsigned IndexTableIndex,
off_t IndexTableOffset) const;
void PrintIndexTable(unsigned SectionIndex, const Elf_Shdr *IT) const;
- void PrintExceptionTable(const Elf_Shdr *IT, const Elf_Shdr *EHT,
+ void PrintExceptionTable(const Elf_Shdr &EHT,
uint64_t TableEntryOffset) const;
void PrintOpcodes(const uint8_t *Entry, size_t Length, off_t Offset) const;
public:
- PrinterContext(ScopedPrinter &SW, const object::ELFFile<ET> *ELF,
+ PrinterContext(ScopedPrinter &SW, const object::ELFFile<ET> &ELF,
StringRef FileName, const Elf_Shdr *Symtab)
: SW(SW), ELF(ELF), FileName(FileName), Symtab(Symtab) {}
@@ -364,27 +364,31 @@ const size_t PrinterContext<ET>::IndexTableEntrySize = 8;
template <typename ET>
ErrorOr<StringRef>
-PrinterContext<ET>::FunctionAtAddress(unsigned Section,
- uint64_t Address) const {
+PrinterContext<ET>::FunctionAtAddress(uint64_t Address,
+ Optional<unsigned> SectionIndex) const {
if (!Symtab)
- return readobj_error::unknown_symbol;
- auto StrTableOrErr = ELF->getStringTableForSymtab(*Symtab);
+ return inconvertibleErrorCode();
+ auto StrTableOrErr = ELF.getStringTableForSymtab(*Symtab);
if (!StrTableOrErr)
reportError(StrTableOrErr.takeError(), FileName);
StringRef StrTable = *StrTableOrErr;
- for (const Elf_Sym &Sym : unwrapOrError(FileName, ELF->symbols(Symtab)))
- if (Sym.st_shndx == Section && Sym.st_value == Address &&
- Sym.getType() == ELF::STT_FUNC) {
+ for (const Elf_Sym &Sym : unwrapOrError(FileName, ELF.symbols(Symtab))) {
+ if (SectionIndex && *SectionIndex != Sym.st_shndx)
+ continue;
+
+ if (Sym.st_value == Address && Sym.getType() == ELF::STT_FUNC) {
auto NameOrErr = Sym.getName(StrTable);
if (!NameOrErr) {
// TODO: Actually report errors helpfully.
consumeError(NameOrErr.takeError());
- return readobj_error::unknown_symbol;
+ return inconvertibleErrorCode();
}
return *NameOrErr;
}
- return readobj_error::unknown_symbol;
+ }
+
+ return inconvertibleErrorCode();
}
template <typename ET>
@@ -399,16 +403,16 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
/// handling table. Use this symbol to recover the actual exception handling
/// table.
- for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF->sections())) {
+ for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF.sections())) {
if (Sec.sh_type != ELF::SHT_REL || Sec.sh_info != IndexSectionIndex)
continue;
- auto SymTabOrErr = ELF->getSection(Sec.sh_link);
+ auto SymTabOrErr = ELF.getSection(Sec.sh_link);
if (!SymTabOrErr)
reportError(SymTabOrErr.takeError(), FileName);
const Elf_Shdr *SymTab = *SymTabOrErr;
- for (const Elf_Rel &R : unwrapOrError(FileName, ELF->rels(&Sec))) {
+ for (const Elf_Rel &R : unwrapOrError(FileName, ELF.rels(Sec))) {
if (R.r_offset != static_cast<unsigned>(IndexTableOffset))
continue;
@@ -418,9 +422,9 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
RelA.r_addend = 0;
const Elf_Sym *Symbol =
- unwrapOrError(FileName, ELF->getRelocationSymbol(&RelA, SymTab));
+ unwrapOrError(FileName, ELF.getRelocationSymbol(RelA, SymTab));
- auto Ret = ELF->getSection(Symbol, SymTab, ShndxTable);
+ auto Ret = ELF.getSection(*Symbol, SymTab, ShndxTable);
if (!Ret)
report_fatal_error(errorToErrorCode(Ret.takeError()).message());
return *Ret;
@@ -430,10 +434,20 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
}
template <typename ET>
-void PrinterContext<ET>::PrintExceptionTable(const Elf_Shdr *IT,
- const Elf_Shdr *EHT,
+static const typename ET::Shdr *
+findSectionContainingAddress(const object::ELFFile<ET> &Obj, StringRef FileName,
+ uint64_t Address) {
+ for (const typename ET::Shdr &Sec : unwrapOrError(FileName, Obj.sections()))
+ if (Address >= Sec.sh_addr && Address < Sec.sh_addr + Sec.sh_size)
+ return &Sec;
+ return nullptr;
+}
+
+template <typename ET>
+void PrinterContext<ET>::PrintExceptionTable(const Elf_Shdr &EHT,
uint64_t TableEntryOffset) const {
- Expected<ArrayRef<uint8_t>> Contents = ELF->getSectionContents(EHT);
+ // TODO: handle failure.
+ Expected<ArrayRef<uint8_t>> Contents = ELF.getSectionContents(EHT);
if (!Contents)
return;
@@ -482,10 +496,14 @@ void PrinterContext<ET>::PrintExceptionTable(const Elf_Shdr *IT,
}
} else {
SW.printString("Model", StringRef("Generic"));
-
- uint64_t Address = PREL31(Word, EHT->sh_addr);
+ const bool IsRelocatable = ELF.getHeader().e_type == ELF::ET_REL;
+ uint64_t Address = IsRelocatable
+ ? PREL31(Word, EHT.sh_addr)
+ : PREL31(Word, EHT.sh_addr + TableEntryOffset);
SW.printHex("PersonalityRoutineAddress", Address);
- if (ErrorOr<StringRef> Name = FunctionAtAddress(EHT->sh_link, Address))
+ Optional<unsigned> SecIndex =
+ IsRelocatable ? Optional<unsigned>(EHT.sh_link) : None;
+ if (ErrorOr<StringRef> Name = FunctionAtAddress(Address, SecIndex))
SW.printString("PersonalityRoutineName", *Name);
}
}
@@ -500,7 +518,8 @@ void PrinterContext<ET>::PrintOpcodes(const uint8_t *Entry,
template <typename ET>
void PrinterContext<ET>::PrintIndexTable(unsigned SectionIndex,
const Elf_Shdr *IT) const {
- Expected<ArrayRef<uint8_t>> Contents = ELF->getSectionContents(IT);
+ // TODO: handle failure.
+ Expected<ArrayRef<uint8_t>> Contents = ELF.getSectionContents(*IT);
if (!Contents)
return;
@@ -517,6 +536,7 @@ void PrinterContext<ET>::PrintIndexTable(unsigned SectionIndex,
const support::ulittle32_t *Data =
reinterpret_cast<const support::ulittle32_t *>(Contents->data());
const unsigned Entries = IT->sh_size / IndexTableEntrySize;
+ const bool IsRelocatable = ELF.getHeader().e_type == ELF::ET_REL;
ListScope E(SW, "Entries");
for (unsigned Entry = 0; Entry < Entries; ++Entry) {
@@ -532,9 +552,31 @@ void PrinterContext<ET>::PrintIndexTable(unsigned SectionIndex,
continue;
}
- const uint64_t Offset = PREL31(Word0, IT->sh_addr);
- SW.printHex("FunctionAddress", Offset);
- if (ErrorOr<StringRef> Name = FunctionAtAddress(IT->sh_link, Offset))
+ // FIXME: For a relocatable object ideally we might want to:
+ // 1) Find a relocation for the offset of Word0.
+ // 2) Verify this relocation is of an expected type (R_ARM_PREL31) and
+ // verify the symbol index.
+ // 3) Resolve the relocation using it's symbol value, addend etc.
+ // Currently the code assumes that Word0 contains an addend of a
+ // R_ARM_PREL31 REL relocation that references a section symbol. RELA
+ // relocations are not supported and it works because addresses of sections
+ // are nulls in relocatable objects.
+ //
+ // For a non-relocatable object, Word0 contains a place-relative signed
+ // offset to the referenced entity.
+ const uint64_t Address =
+ IsRelocatable
+ ? PREL31(Word0, IT->sh_addr)
+ : PREL31(Word0, IT->sh_addr + Entry * IndexTableEntrySize);
+ SW.printHex("FunctionAddress", Address);
+
+ // In a relocatable output we might have many .ARM.exidx sections linked to
+ // their code sections via the sh_link field. For a non-relocatable ELF file
+ // the sh_link field is not reliable, because we have one .ARM.exidx section
+ // normally, but might have many code sections.
+ Optional<unsigned> SecIndex =
+ IsRelocatable ? Optional<unsigned>(IT->sh_link) : None;
+ if (ErrorOr<StringRef> Name = FunctionAtAddress(Address, SecIndex))
SW.printString("FunctionName", *Name);
if (Word1 == EXIDX_CANTUNWIND) {
@@ -550,18 +592,30 @@ void PrinterContext<ET>::PrintIndexTable(unsigned SectionIndex,
PrintOpcodes(Contents->data() + Entry * IndexTableEntrySize + 4, 3, 1);
} else {
- const Elf_Shdr *EHT =
- FindExceptionTable(SectionIndex, Entry * IndexTableEntrySize + 4);
+ const Elf_Shdr *EHT;
+ uint64_t TableEntryAddress;
+ if (IsRelocatable) {
+ TableEntryAddress = PREL31(Word1, IT->sh_addr);
+ EHT = FindExceptionTable(SectionIndex, Entry * IndexTableEntrySize + 4);
+ } else {
+ TableEntryAddress =
+ PREL31(Word1, IT->sh_addr + Entry * IndexTableEntrySize + 4);
+ EHT = findSectionContainingAddress(ELF, FileName, TableEntryAddress);
+ }
if (EHT)
- if (auto Name = ELF->getSectionName(EHT))
+ // TODO: handle failure.
+ if (Expected<StringRef> Name = ELF.getSectionName(*EHT))
SW.printString("ExceptionHandlingTable", *Name);
- uint64_t TableEntryOffset = PREL31(Word1, IT->sh_addr);
- SW.printHex("TableEntryOffset", TableEntryOffset);
-
- if (EHT)
- PrintExceptionTable(IT, EHT, TableEntryOffset);
+ SW.printHex(IsRelocatable ? "TableEntryOffset" : "TableEntryAddress",
+ TableEntryAddress);
+ if (EHT) {
+ if (IsRelocatable)
+ PrintExceptionTable(*EHT, TableEntryAddress);
+ else
+ PrintExceptionTable(*EHT, TableEntryAddress - EHT->sh_addr);
+ }
}
}
}
@@ -571,12 +625,13 @@ void PrinterContext<ET>::PrintUnwindInformation() const {
DictScope UI(SW, "UnwindInformation");
int SectionIndex = 0;
- for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF->sections())) {
+ for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF.sections())) {
if (Sec.sh_type == ELF::SHT_ARM_EXIDX) {
DictScope UIT(SW, "UnwindIndexTable");
SW.printNumber("SectionIndex", SectionIndex);
- if (auto SectionName = ELF->getSectionName(&Sec))
+ // TODO: handle failure.
+ if (Expected<StringRef> SectionName = ELF.getSectionName(Sec))
SW.printString("SectionName", *SectionName);
SW.printHex("SectionOffset", Sec.sh_offset);
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 8f365c5ad082..5995a09514c8 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -62,7 +62,6 @@
// epilogue of the function.
#include "ARMWinEHPrinter.h"
-#include "Error.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ARMWinEH.h"
@@ -168,6 +167,11 @@ const Decoder::RingEntry Decoder::Ring64[] = {
{ 0xff, 0xe3, 1, &Decoder::opcode_nop },
{ 0xff, 0xe4, 1, &Decoder::opcode_end },
{ 0xff, 0xe5, 1, &Decoder::opcode_end_c },
+ { 0xff, 0xe6, 1, &Decoder::opcode_save_next },
+ { 0xff, 0xe8, 1, &Decoder::opcode_trap_frame },
+ { 0xff, 0xe9, 1, &Decoder::opcode_machine_frame },
+ { 0xff, 0xea, 1, &Decoder::opcode_context },
+ { 0xff, 0xec, 1, &Decoder::opcode_clear_unwound_to_call },
};
void Decoder::printRegisters(const std::pair<uint16_t, uint32_t> &RegisterMask) {
@@ -217,7 +221,7 @@ Decoder::getSectionContaining(const COFFObjectFile &COFF, uint64_t VA) {
if (VA >= Address && (VA - Address) <= Size)
return Section;
}
- return readobj_error::unknown_symbol;
+ return inconvertibleErrorCode();
}
ErrorOr<object::SymbolRef> Decoder::getSymbol(const COFFObjectFile &COFF,
@@ -235,7 +239,7 @@ ErrorOr<object::SymbolRef> Decoder::getSymbol(const COFFObjectFile &COFF,
if (*Address == VA)
return Symbol;
}
- return readobj_error::unknown_symbol;
+ return inconvertibleErrorCode();
}
ErrorOr<SymbolRef> Decoder::getRelocatedSymbol(const COFFObjectFile &,
@@ -246,7 +250,7 @@ ErrorOr<SymbolRef> Decoder::getRelocatedSymbol(const COFFObjectFile &,
if (RelocationOffset == Offset)
return *Relocation.getSymbol();
}
- return readobj_error::unknown_symbol;
+ return inconvertibleErrorCode();
}
bool Decoder::opcode_0xxxxxxx(const uint8_t *OC, unsigned &Offset,
@@ -637,7 +641,7 @@ bool Decoder::opcode_save_reg_x(const uint8_t *OC, unsigned &Offset,
Reg += 19;
uint32_t Off = ((OC[Offset + 1] & 0x1F) + 1) << 3;
if (Prologue)
- SW.startLine() << format("0x%02x%02x ; str x%u, [sp, #%u]!\n",
+ SW.startLine() << format("0x%02x%02x ; str x%u, [sp, #-%u]!\n",
OC[Offset], OC[Offset + 1], Reg, Off);
else
SW.startLine() << format("0x%02x%02x ; ldr x%u, [sp], #%u\n",
@@ -703,7 +707,7 @@ bool Decoder::opcode_save_freg(const uint8_t *OC, unsigned &Offset,
Reg >>= 6;
Reg += 8;
uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
- SW.startLine() << format("0x%02x%02x ; %s d%u, [sp, #%u]\n",
+ SW.startLine() << format("0x%02x%02x ; %s d%u, [sp, #%u]\n",
OC[Offset], OC[Offset + 1],
static_cast<const char *>(Prologue ? "str" : "ldr"),
Reg, Off);
@@ -742,7 +746,9 @@ bool Decoder::opcode_alloc_l(const uint8_t *OC, unsigned &Offset,
bool Decoder::opcode_setfp(const uint8_t *OC, unsigned &Offset, unsigned Length,
bool Prologue) {
- SW.startLine() << format("0x%02x ; mov fp, sp\n", OC[Offset]);
+ SW.startLine() << format("0x%02x ; mov %s, %s\n", OC[Offset],
+ static_cast<const char *>(Prologue ? "fp" : "sp"),
+ static_cast<const char *>(Prologue ? "sp" : "fp"));
++Offset;
return false;
}
@@ -750,8 +756,11 @@ bool Decoder::opcode_setfp(const uint8_t *OC, unsigned &Offset, unsigned Length,
bool Decoder::opcode_addfp(const uint8_t *OC, unsigned &Offset, unsigned Length,
bool Prologue) {
unsigned NumBytes = OC[Offset + 1] << 3;
- SW.startLine() << format("0x%02x%02x ; add fp, sp, #%u\n",
- OC[Offset], OC[Offset + 1], NumBytes);
+ SW.startLine() << format(
+ "0x%02x%02x ; %s %s, %s, #%u\n", OC[Offset], OC[Offset + 1],
+ static_cast<const char *>(Prologue ? "add" : "sub"),
+ static_cast<const char *>(Prologue ? "fp" : "sp"),
+ static_cast<const char *>(Prologue ? "sp" : "fp"), NumBytes);
Offset += 2;
return false;
}
@@ -777,6 +786,47 @@ bool Decoder::opcode_end_c(const uint8_t *OC, unsigned &Offset, unsigned Length,
return true;
}
+bool Decoder::opcode_save_next(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ if (Prologue)
+ SW.startLine() << format("0x%02x ; save next\n", OC[Offset]);
+ else
+ SW.startLine() << format("0x%02x ; restore next\n",
+ OC[Offset]);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_trap_frame(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ SW.startLine() << format("0x%02x ; trap frame\n", OC[Offset]);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_machine_frame(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ SW.startLine() << format("0x%02x ; machine frame\n",
+ OC[Offset]);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_context(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ SW.startLine() << format("0x%02x ; context\n", OC[Offset]);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_clear_unwound_to_call(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ SW.startLine() << format("0x%02x ; clear unwound to call\n",
+ OC[Offset]);
+ ++Offset;
+ return false;
+}
+
void Decoder::decodeOpcodes(ArrayRef<uint8_t> Opcodes, unsigned Offset,
bool Prologue) {
assert((!Prologue || Offset == 0) && "prologue should always use offset 0");
@@ -1061,6 +1111,145 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
return true;
}
+bool Decoder::dumpPackedARM64Entry(const object::COFFObjectFile &COFF,
+ const SectionRef Section, uint64_t Offset,
+ unsigned Index,
+ const RuntimeFunctionARM64 &RF) {
+ assert((RF.Flag() == RuntimeFunctionFlag::RFF_Packed ||
+ RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment) &&
+ "unpacked entry cannot be treated as a packed entry");
+
+ ErrorOr<SymbolRef> Function = getRelocatedSymbol(COFF, Section, Offset);
+ if (!Function)
+ Function = getSymbol(COFF, RF.BeginAddress, /*FunctionOnly=*/true);
+
+ StringRef FunctionName;
+ uint64_t FunctionAddress;
+ if (Function) {
+ Expected<StringRef> FunctionNameOrErr = Function->getName();
+ if (!FunctionNameOrErr) {
+ std::string Buf;
+ llvm::raw_string_ostream OS(Buf);
+ logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
+ OS.flush();
+ report_fatal_error(Buf);
+ }
+ FunctionName = *FunctionNameOrErr;
+ Expected<uint64_t> FunctionAddressOrErr = Function->getAddress();
+ if (!FunctionAddressOrErr) {
+ std::string Buf;
+ llvm::raw_string_ostream OS(Buf);
+ logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS);
+ OS.flush();
+ report_fatal_error(Buf);
+ }
+ FunctionAddress = *FunctionAddressOrErr;
+ } else {
+ FunctionAddress = COFF.getPE32PlusHeader()->ImageBase + RF.BeginAddress;
+ }
+
+ SW.printString("Function", formatSymbol(FunctionName, FunctionAddress));
+ SW.printBoolean("Fragment",
+ RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment);
+ SW.printNumber("FunctionLength", RF.FunctionLength());
+ SW.printNumber("RegF", RF.RegF());
+ SW.printNumber("RegI", RF.RegI());
+ SW.printBoolean("HomedParameters", RF.H());
+ SW.printNumber("CR", RF.CR());
+ SW.printNumber("FrameSize", RF.FrameSize() << 4);
+ ListScope PS(SW, "Prologue");
+
+ // Synthesize the equivalent prologue according to the documentation
+ // at https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling,
+ // printed in reverse order compared to the docs, to match how prologues
+ // are printed for the non-packed case.
+ int IntSZ = 8 * RF.RegI();
+ if (RF.CR() == 1)
+ IntSZ += 8;
+ int FpSZ = 8 * RF.RegF();
+ if (RF.RegF())
+ FpSZ += 8;
+ int SavSZ = (IntSZ + FpSZ + 8 * 8 * RF.H() + 0xf) & ~0xf;
+ int LocSZ = (RF.FrameSize() << 4) - SavSZ;
+
+ if (RF.CR() == 3) {
+ SW.startLine() << "mov x29, sp\n";
+ if (LocSZ <= 512) {
+ SW.startLine() << format("stp x29, lr, [sp, #-%d]!\n", LocSZ);
+ } else {
+ SW.startLine() << "stp x29, lr, [sp, #0]\n";
+ }
+ }
+ if (LocSZ > 4080) {
+ SW.startLine() << format("sub sp, sp, #%d\n", LocSZ - 4080);
+ SW.startLine() << "sub sp, sp, #4080\n";
+ } else if ((RF.CR() != 3 && LocSZ > 0) || LocSZ > 512) {
+ SW.startLine() << format("sub sp, sp, #%d\n", LocSZ);
+ }
+ if (RF.H()) {
+ SW.startLine() << format("stp x6, x7, [sp, #%d]\n", IntSZ + FpSZ + 48);
+ SW.startLine() << format("stp x4, x5, [sp, #%d]\n", IntSZ + FpSZ + 32);
+ SW.startLine() << format("stp x2, x3, [sp, #%d]\n", IntSZ + FpSZ + 16);
+ if (RF.RegI() > 0 || RF.RegF() > 0 || RF.CR() == 1) {
+ SW.startLine() << format("stp x0, x1, [sp, #%d]\n", IntSZ + FpSZ);
+ } else {
+ // This case isn't documented; if neither RegI nor RegF nor CR=1
+ // have decremented the stack pointer by SavSZ, we need to do it here
+ // (as the final stack adjustment of LocSZ excludes SavSZ).
+ SW.startLine() << format("stp x0, x1, [sp, #-%d]!\n", SavSZ);
+ }
+ }
+ int FloatRegs = RF.RegF() > 0 ? RF.RegF() + 1 : 0;
+ for (int I = (FloatRegs + 1) / 2 - 1; I >= 0; I--) {
+ if (I == (FloatRegs + 1) / 2 - 1 && FloatRegs % 2 == 1) {
+ // The last register, an odd register without a pair
+ SW.startLine() << format("str d%d, [sp, #%d]\n", 8 + 2 * I,
+ IntSZ + 16 * I);
+ } else if (I == 0 && RF.RegI() == 0 && RF.CR() != 1) {
+ SW.startLine() << format("stp d%d, d%d, [sp, #-%d]!\n", 8 + 2 * I,
+ 8 + 2 * I + 1, SavSZ);
+ } else {
+ SW.startLine() << format("stp d%d, d%d, [sp, #%d]\n", 8 + 2 * I,
+ 8 + 2 * I + 1, IntSZ + 16 * I);
+ }
+ }
+ if (RF.CR() == 1 && (RF.RegI() % 2) == 0) {
+ if (RF.RegI() == 0)
+ SW.startLine() << format("str lr, [sp, #-%d]!\n", SavSZ);
+ else
+ SW.startLine() << format("str lr, [sp, #%d]\n", IntSZ - 8);
+ }
+ for (int I = (RF.RegI() + 1) / 2 - 1; I >= 0; I--) {
+ if (I == (RF.RegI() + 1) / 2 - 1 && RF.RegI() % 2 == 1) {
+ // The last register, an odd register without a pair
+ if (RF.CR() == 1) {
+ if (I == 0) { // If this is the only register pair
+ // CR=1 combined with RegI=1 doesn't map to a documented case;
+ // it doesn't map to any regular unwind info opcode, and the
+ // actual unwinder doesn't support it.
+ SW.startLine() << "INVALID!\n";
+ } else
+ SW.startLine() << format("stp x%d, lr, [sp, #%d]\n", 19 + 2 * I,
+ 16 * I);
+ } else {
+ if (I == 0)
+ SW.startLine() << format("str x%d, [sp, #-%d]!\n", 19 + 2 * I, SavSZ);
+ else
+ SW.startLine() << format("str x%d, [sp, #%d]\n", 19 + 2 * I, 16 * I);
+ }
+ } else if (I == 0) {
+ // The first register pair
+ SW.startLine() << format("stp x19, x20, [sp, #-%d]!\n", SavSZ);
+ } else {
+ SW.startLine() << format("stp x%d, x%d, [sp, #%d]\n", 19 + 2 * I,
+ 19 + 2 * I + 1, 16 * I);
+ }
+ }
+ SW.startLine() << "end\n";
+
+ return true;
+}
+
bool Decoder::dumpProcedureDataEntry(const COFFObjectFile &COFF,
const SectionRef Section, unsigned Index,
ArrayRef<uint8_t> Contents) {
@@ -1073,8 +1262,8 @@ bool Decoder::dumpProcedureDataEntry(const COFFObjectFile &COFF,
if (Entry.Flag() == RuntimeFunctionFlag::RFF_Unpacked)
return dumpUnpackedEntry(COFF, Section, Offset, Index, Entry);
if (isAArch64) {
- SW.startLine() << "Packed unwind data not yet supported for ARM64\n";
- return true;
+ const RuntimeFunctionARM64 EntryARM64(Data);
+ return dumpPackedARM64Entry(COFF, Section, Offset, Index, EntryARM64);
}
return dumpPackedEntry(COFF, Section, Offset, Index, Entry);
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
index 5de7062cb1d7..3263841a267b 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
@@ -17,6 +17,7 @@ namespace llvm {
namespace ARM {
namespace WinEH {
class RuntimeFunction;
+class RuntimeFunctionARM64;
class Decoder {
static const size_t PDataEntrySize;
@@ -120,6 +121,14 @@ class Decoder {
bool Prologue);
bool opcode_save_next(const uint8_t *Opcodes, unsigned &Offset,
unsigned Length, bool Prologue);
+ bool opcode_trap_frame(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_machine_frame(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_context(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+ bool Prologue);
+ bool opcode_clear_unwound_to_call(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
void decodeOpcodes(ArrayRef<uint8_t> Opcodes, unsigned Offset,
bool Prologue);
@@ -146,6 +155,9 @@ class Decoder {
bool dumpPackedEntry(const object::COFFObjectFile &COFF,
const object::SectionRef Section, uint64_t Offset,
unsigned Index, const RuntimeFunction &Entry);
+ bool dumpPackedARM64Entry(const object::COFFObjectFile &COFF,
+ const object::SectionRef Section, uint64_t Offset,
+ unsigned Index, const RuntimeFunctionARM64 &Entry);
bool dumpProcedureDataEntry(const object::COFFObjectFile &COFF,
const object::SectionRef Section, unsigned Entry,
ArrayRef<uint8_t> Contents);
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/COFFDumper.cpp
index 89a904f53ae7..684967f93393 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "ARMWinEHPrinter.h"
-#include "Error.h"
#include "ObjDumper.h"
#include "StackMapPrinter.h"
#include "Win64EHDumper.h"
@@ -60,10 +59,6 @@ using namespace llvm::codeview;
using namespace llvm::support;
using namespace llvm::Win64EH;
-static inline Error createError(const Twine &Err) {
- return make_error<StringError>(Err, object_error::parse_failed);
-}
-
namespace {
struct LoadConfigTables {
@@ -72,6 +67,8 @@ struct LoadConfigTables {
uint32_t GuardFlags = 0;
uint64_t GuardFidTableVA = 0;
uint64_t GuardFidTableCount = 0;
+ uint64_t GuardIatTableVA = 0;
+ uint64_t GuardIatTableCount = 0;
uint64_t GuardLJmpTableVA = 0;
uint64_t GuardLJmpTableCount = 0;
};
@@ -80,7 +77,8 @@ class COFFDumper : public ObjDumper {
public:
friend class COFFObjectDumpDelegate;
COFFDumper(const llvm::object::COFFObjectFile *Obj, ScopedPrinter &Writer)
- : ObjDumper(Writer), Obj(Obj), Writer(Writer), Types(100) {}
+ : ObjDumper(Writer, Obj->getFileName()), Obj(Obj), Writer(Writer),
+ Types(100) {}
void printFileHeaders() override;
void printSectionHeaders() override;
@@ -94,6 +92,7 @@ public:
void printCOFFDirectives() override;
void printCOFFBaseReloc() override;
void printCOFFDebugDirectory() override;
+ void printCOFFTLSDirectory() override;
void printCOFFResources() override;
void printCOFFLoadConfig() override;
void printCodeViewDebugInfo() override;
@@ -121,13 +120,14 @@ private:
void printBaseOfDataField(const pe32plus_header *Hdr);
template <typename T>
void printCOFFLoadConfig(const T *Conf, LoadConfigTables &Tables);
+ template <typename IntTy>
+ void printCOFFTLSDirectory(const coff_tls_directory<IntTy> *TlsTable);
typedef void (*PrintExtraCB)(raw_ostream &, const uint8_t *);
void printRVATable(uint64_t TableVA, uint64_t Count, uint64_t EntrySize,
PrintExtraCB PrintExtra = 0);
void printCodeViewSymbolSection(StringRef SectionName, const SectionRef &Section);
void printCodeViewTypeSection(StringRef SectionName, const SectionRef &Section);
- StringRef getTypeName(TypeIndex Ty);
StringRef getFileNameForFileOffset(uint32_t FileOffset);
void printFileNameForOffset(StringRef Label, uint32_t FileOffset);
void printTypeIndex(StringRef FieldName, TypeIndex TI) {
@@ -243,15 +243,9 @@ private:
namespace llvm {
-std::error_code createCOFFDumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result) {
- const COFFObjectFile *COFFObj = dyn_cast<COFFObjectFile>(Obj);
- if (!COFFObj)
- return readobj_error::unsupported_obj_file_format;
-
- Result.reset(new COFFDumper(COFFObj, Writer));
- return readobj_error::success;
+std::unique_ptr<ObjDumper> createCOFFDumper(const object::COFFObjectFile &Obj,
+ ScopedPrinter &Writer) {
+ return std::make_unique<COFFDumper>(&Obj, Writer);
}
} // namespace llvm
@@ -272,9 +266,9 @@ std::error_code COFFDumper::resolveSymbol(const coff_section *Section,
}
}
if (SymI == Obj->symbol_end())
- return readobj_error::unknown_symbol;
+ return inconvertibleErrorCode();
Sym = *SymI;
- return readobj_error::success;
+ return std::error_code();
}
// Given a section and an offset into this section the function returns the name
@@ -588,7 +582,7 @@ static std::error_code getSymbolAuxData(const COFFObjectFile *Obj,
ArrayRef<uint8_t> AuxData = Obj->getSymbolAuxData(Symbol);
AuxData = AuxData.slice(AuxSymbolIdx * Obj->getSymbolTableEntrySize());
Aux = reinterpret_cast<const T*>(AuxData.data());
- return readobj_error::success;
+ return std::error_code();
}
void COFFDumper::cacheRelocations() {
@@ -815,6 +809,11 @@ void COFFDumper::printCOFFLoadConfig() {
}
}
+ if (Tables.GuardIatTableVA) {
+ ListScope LS(W, "GuardIatTable");
+ printRVATable(Tables.GuardIatTableVA, Tables.GuardIatTableCount, 4);
+ }
+
if (Tables.GuardLJmpTableVA) {
ListScope LS(W, "GuardLJmpTable");
printRVATable(Tables.GuardLJmpTableVA, Tables.GuardLJmpTableCount, 4);
@@ -899,6 +898,9 @@ void COFFDumper::printCOFFLoadConfig(const T *Conf, LoadConfigTables &Tables) {
Conf->GuardRFVerifyStackPointerFunctionPointer);
W.printHex("HotPatchTableOffset", Conf->HotPatchTableOffset);
+ Tables.GuardIatTableVA = Conf->GuardAddressTakenIatEntryTable;
+ Tables.GuardIatTableCount = Conf->GuardAddressTakenIatEntryCount;
+
Tables.GuardLJmpTableVA = Conf->GuardLongJumpTargetTable;
Tables.GuardLJmpTableCount = Conf->GuardLongJumpTargetCount;
}
@@ -2029,3 +2031,27 @@ void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer,
Writer.flush();
}
}
+
+void COFFDumper::printCOFFTLSDirectory() {
+ if (Obj->is64())
+ printCOFFTLSDirectory(Obj->getTLSDirectory64());
+ else
+ printCOFFTLSDirectory(Obj->getTLSDirectory32());
+}
+
+template <typename IntTy>
+void COFFDumper::printCOFFTLSDirectory(
+ const coff_tls_directory<IntTy> *TlsTable) {
+ DictScope D(W, "TLSDirectory");
+ if (!TlsTable)
+ return;
+
+ W.printHex("StartAddressOfRawData", TlsTable->StartAddressOfRawData);
+ W.printHex("EndAddressOfRawData", TlsTable->EndAddressOfRawData);
+ W.printHex("AddressOfIndex", TlsTable->AddressOfIndex);
+ W.printHex("AddressOfCallBacks", TlsTable->AddressOfCallBacks);
+ W.printHex("SizeOfZeroFill", TlsTable->SizeOfZeroFill);
+ W.printFlags("Characteristics", TlsTable->Characteristics,
+ makeArrayRef(ImageSectionCharacteristics),
+ COFF::SectionCharacteristics(COFF::IMAGE_SCN_ALIGN_MASK));
+}
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h b/contrib/llvm-project/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
index 27942224053f..2dfe21684a62 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
@@ -9,38 +9,37 @@
#ifndef LLVM_TOOLS_LLVM_READOBJ_DWARFCFIEHPRINTER_H
#define LLVM_TOOLS_LLVM_READOBJ_DWARFCFIEHPRINTER_H
-#include "Error.h"
#include "llvm-readobj.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/Object/ELF.h"
-#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/Debug.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/type_traits.h"
namespace llvm {
namespace DwarfCFIEH {
-template <typename ELFT>
-class PrinterContext {
+template <typename ELFT> class PrinterContext {
using Elf_Shdr = typename ELFT::Shdr;
using Elf_Phdr = typename ELFT::Phdr;
ScopedPrinter &W;
- const object::ELFObjectFile<ELFT> *ObjF;
+ const object::ELFObjectFile<ELFT> &ObjF;
void printEHFrameHdr(const Elf_Phdr *EHFramePHdr) const;
void printEHFrame(const Elf_Shdr *EHFrameShdr) const;
public:
- PrinterContext(ScopedPrinter &W, const object::ELFObjectFile<ELFT> *ObjF)
+ PrinterContext(ScopedPrinter &W, const object::ELFObjectFile<ELFT> &ObjF)
: W(W), ObjF(ObjF) {}
void printUnwindInformation() const;
@@ -48,11 +47,11 @@ public:
template <class ELFT>
static const typename ELFT::Shdr *
-findSectionByAddress(const object::ELFObjectFile<ELFT> *ObjF, uint64_t Addr) {
+findSectionByAddress(const object::ELFObjectFile<ELFT> &ObjF, uint64_t Addr) {
Expected<typename ELFT::ShdrRange> SectionsOrErr =
- ObjF->getELFFile()->sections();
+ ObjF.getELFFile().sections();
if (!SectionsOrErr)
- reportError(SectionsOrErr.takeError(), ObjF->getFileName());
+ reportError(SectionsOrErr.takeError(), ObjF.getFileName());
for (const typename ELFT::Shdr &Shdr : *SectionsOrErr)
if (Shdr.sh_addr == Addr)
@@ -62,11 +61,11 @@ findSectionByAddress(const object::ELFObjectFile<ELFT> *ObjF, uint64_t Addr) {
template <typename ELFT>
void PrinterContext<ELFT>::printUnwindInformation() const {
- const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ const object::ELFFile<ELFT> &Obj = ObjF.getELFFile();
- Expected<typename ELFT::PhdrRange> PhdrsOrErr = Obj->program_headers();
+ Expected<typename ELFT::PhdrRange> PhdrsOrErr = Obj.program_headers();
if (!PhdrsOrErr)
- reportError(PhdrsOrErr.takeError(), ObjF->getFileName());
+ reportError(PhdrsOrErr.takeError(), ObjF.getFileName());
for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
if (Phdr.p_type != ELF::PT_GNU_EH_FRAME)
@@ -75,20 +74,19 @@ void PrinterContext<ELFT>::printUnwindInformation() const {
if (Phdr.p_memsz != Phdr.p_filesz)
reportError(object::createError(
"p_memsz does not match p_filesz for GNU_EH_FRAME"),
- ObjF->getFileName());
+ ObjF.getFileName());
printEHFrameHdr(&Phdr);
break;
}
- Expected<typename ELFT::ShdrRange> SectionsOrErr =
- ObjF->getELFFile()->sections();
+ Expected<typename ELFT::ShdrRange> SectionsOrErr = Obj.sections();
if (!SectionsOrErr)
- reportError(SectionsOrErr.takeError(), ObjF->getFileName());
+ reportError(SectionsOrErr.takeError(), ObjF.getFileName());
for (const Elf_Shdr &Shdr : *SectionsOrErr) {
- Expected<StringRef> NameOrErr = Obj->getSectionName(&Shdr);
+ Expected<StringRef> NameOrErr = Obj.getSectionName(Shdr);
if (!NameOrErr)
- reportError(NameOrErr.takeError(), ObjF->getFileName());
+ reportError(NameOrErr.takeError(), ObjF.getFileName());
if (*NameOrErr == ".eh_frame")
printEHFrame(&Shdr);
}
@@ -102,18 +100,18 @@ void PrinterContext<ELFT>::printEHFrameHdr(const Elf_Phdr *EHFramePHdr) const {
W.startLine() << format("Offset: 0x%" PRIx64 "\n", (uint64_t)EHFramePHdr->p_offset);
W.startLine() << format("Size: 0x%" PRIx64 "\n", (uint64_t)EHFramePHdr->p_memsz);
- const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ const object::ELFFile<ELFT> &Obj = ObjF.getELFFile();
if (const Elf_Shdr *EHFrameHdr =
findSectionByAddress(ObjF, EHFramePHdr->p_vaddr)) {
- Expected<StringRef> NameOrErr = Obj->getSectionName(EHFrameHdr);
+ Expected<StringRef> NameOrErr = Obj.getSectionName(*EHFrameHdr);
if (!NameOrErr)
- reportError(NameOrErr.takeError(), ObjF->getFileName());
+ reportError(NameOrErr.takeError(), ObjF.getFileName());
W.printString("Corresponding Section", *NameOrErr);
}
- Expected<ArrayRef<uint8_t>> Content = Obj->getSegmentContents(EHFramePHdr);
+ Expected<ArrayRef<uint8_t>> Content = Obj.getSegmentContents(*EHFramePHdr);
if (!Content)
- reportError(Content.takeError(), ObjF->getFileName());
+ reportError(Content.takeError(), ObjF.getFileName());
DataExtractor DE(*Content,
ELFT::TargetEndianness == support::endianness::little,
@@ -127,25 +125,25 @@ void PrinterContext<ELFT>::printEHFrameHdr(const Elf_Phdr *EHFramePHdr) const {
if (Version != 1)
reportError(
object::createError("only version 1 of .eh_frame_hdr is supported"),
- ObjF->getFileName());
+ ObjF.getFileName());
uint64_t EHFramePtrEnc = DE.getU8(&Offset);
W.startLine() << format("eh_frame_ptr_enc: 0x%" PRIx64 "\n", EHFramePtrEnc);
if (EHFramePtrEnc != (dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4))
reportError(object::createError("unexpected encoding eh_frame_ptr_enc"),
- ObjF->getFileName());
+ ObjF.getFileName());
uint64_t FDECountEnc = DE.getU8(&Offset);
W.startLine() << format("fde_count_enc: 0x%" PRIx64 "\n", FDECountEnc);
if (FDECountEnc != dwarf::DW_EH_PE_udata4)
reportError(object::createError("unexpected encoding fde_count_enc"),
- ObjF->getFileName());
+ ObjF.getFileName());
uint64_t TableEnc = DE.getU8(&Offset);
W.startLine() << format("table_enc: 0x%" PRIx64 "\n", TableEnc);
if (TableEnc != (dwarf::DW_EH_PE_datarel | dwarf::DW_EH_PE_sdata4))
reportError(object::createError("unexpected encoding table_enc"),
- ObjF->getFileName());
+ ObjF.getFileName());
auto EHFramePtr = DE.getSigned(&Offset, 4) + EHFrameHdrAddress + 4;
W.startLine() << format("eh_frame_ptr: 0x%" PRIx64 "\n", EHFramePtr);
@@ -156,7 +154,7 @@ void PrinterContext<ELFT>::printEHFrameHdr(const Elf_Phdr *EHFramePHdr) const {
unsigned NumEntries = 0;
uint64_t PrevPC = 0;
while (Offset + 8 <= EHFramePHdr->p_memsz && NumEntries < FDECount) {
- DictScope D(W, std::string("entry ") + std::to_string(NumEntries));
+ DictScope D(W, std::string("entry ") + std::to_string(NumEntries));
auto InitialPC = DE.getSigned(&Offset, 4) + EHFrameHdrAddress;
W.startLine() << format("initial_location: 0x%" PRIx64 "\n", InitialPC);
@@ -165,7 +163,7 @@ void PrinterContext<ELFT>::printEHFrameHdr(const Elf_Phdr *EHFramePHdr) const {
if (InitialPC < PrevPC)
reportError(object::createError("initial_location is out of order"),
- ObjF->getFileName());
+ ObjF.getFileName());
PrevPC = InitialPC;
++NumEntries;
@@ -182,17 +180,20 @@ void PrinterContext<ELFT>::printEHFrame(const Elf_Shdr *EHFrameShdr) const {
W.indent();
Expected<ArrayRef<uint8_t>> DataOrErr =
- ObjF->getELFFile()->getSectionContents(EHFrameShdr);
+ ObjF.getELFFile().getSectionContents(*EHFrameShdr);
if (!DataOrErr)
- reportError(DataOrErr.takeError(), ObjF->getFileName());
+ reportError(DataOrErr.takeError(), ObjF.getFileName());
- DWARFDataExtractor DE(*DataOrErr,
+ // Construct DWARFDataExtractor to handle relocations ("PC Begin" fields).
+ std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(ObjF, nullptr);
+ DWARFDataExtractor DE(DICtx->getDWARFObj(),
+ DICtx->getDWARFObj().getEHFrameSection(),
ELFT::TargetEndianness == support::endianness::little,
ELFT::Is64Bits ? 8 : 4);
- DWARFDebugFrame EHFrame(Triple::ArchType(ObjF->getArch()), /*IsEH=*/true,
+ DWARFDebugFrame EHFrame(Triple::ArchType(ObjF.getArch()), /*IsEH=*/true,
/*EHFrameAddress=*/Address);
if (Error E = EHFrame.parse(DE))
- reportError(std::move(E), ObjF->getFileName());
+ reportError(std::move(E), ObjF.getFileName());
for (const dwarf::FrameEntry &Entry : EHFrame) {
if (const dwarf::CIE *CIE = dyn_cast<dwarf::CIE>(&Entry)) {
@@ -224,7 +225,8 @@ void PrinterContext<ELFT>::printEHFrame(const Elf_Shdr *EHFrameShdr) const {
W.getOStream() << "\n";
W.startLine() << "Program:\n";
W.indent();
- Entry.cfis().dump(W.getOStream(), nullptr, W.getIndentLevel());
+ Entry.cfis().dump(W.getOStream(), DIDumpOptions(), nullptr,
+ W.getIndentLevel());
W.unindent();
W.unindent();
W.getOStream() << "\n";
@@ -232,7 +234,7 @@ void PrinterContext<ELFT>::printEHFrame(const Elf_Shdr *EHFrameShdr) const {
W.unindent();
}
-}
-}
+} // namespace DwarfCFIEH
+} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp
index 15076f1f8933..0f508f8dc0f2 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -13,7 +13,6 @@
#include "ARMEHABIPrinter.h"
#include "DwarfCFIEHPrinter.h"
-#include "Error.h"
#include "ObjDumper.h"
#include "StackMapPrinter.h"
#include "llvm-readobj.h"
@@ -65,7 +64,6 @@
#include <memory>
#include <string>
#include <system_error>
-#include <unordered_set>
#include <vector>
using namespace llvm;
@@ -82,61 +80,37 @@ using namespace ELF;
#define ENUM_ENT_1(enum) \
{ #enum, #enum, ELF::enum }
-#define LLVM_READOBJ_PHDR_ENUM(ns, enum) \
- case ns::enum: \
- return std::string(#enum).substr(3);
-
-#define TYPEDEF_ELF_TYPES(ELFT) \
- using ELFO = ELFFile<ELFT>; \
- using Elf_Addr = typename ELFT::Addr; \
- using Elf_Shdr = typename ELFT::Shdr; \
- using Elf_Sym = typename ELFT::Sym; \
- using Elf_Dyn = typename ELFT::Dyn; \
- using Elf_Dyn_Range = typename ELFT::DynRange; \
- using Elf_Rel = typename ELFT::Rel; \
- using Elf_Rela = typename ELFT::Rela; \
- using Elf_Relr = typename ELFT::Relr; \
- using Elf_Rel_Range = typename ELFT::RelRange; \
- using Elf_Rela_Range = typename ELFT::RelaRange; \
- using Elf_Relr_Range = typename ELFT::RelrRange; \
- using Elf_Phdr = typename ELFT::Phdr; \
- using Elf_Half = typename ELFT::Half; \
- using Elf_Ehdr = typename ELFT::Ehdr; \
- using Elf_Word = typename ELFT::Word; \
- using Elf_Hash = typename ELFT::Hash; \
- using Elf_GnuHash = typename ELFT::GnuHash; \
- using Elf_Note = typename ELFT::Note; \
- using Elf_Sym_Range = typename ELFT::SymRange; \
- using Elf_Versym = typename ELFT::Versym; \
- using Elf_Verneed = typename ELFT::Verneed; \
- using Elf_Vernaux = typename ELFT::Vernaux; \
- using Elf_Verdef = typename ELFT::Verdef; \
- using Elf_Verdaux = typename ELFT::Verdaux; \
- using Elf_CGProfile = typename ELFT::CGProfile; \
- using uintX_t = typename ELFT::uint;
-
namespace {
-template <class ELFT> class DumpStyle;
+template <class ELFT> struct RelSymbol {
+ RelSymbol(const typename ELFT::Sym *S, StringRef N)
+ : Sym(S), Name(N.str()) {}
+ const typename ELFT::Sym *Sym;
+ std::string Name;
+};
/// Represents a contiguous uniform range in the file. We cannot just create a
/// range directly because when creating one of these from the .dynamic table
/// the size, entity size and virtual address are different entries in arbitrary
/// order (DT_REL, DT_RELSZ, DT_RELENT for example).
struct DynRegionInfo {
- DynRegionInfo(StringRef ObjName) : FileName(ObjName) {}
- DynRegionInfo(const void *A, uint64_t S, uint64_t ES, StringRef ObjName)
- : Addr(A), Size(S), EntSize(ES), FileName(ObjName) {}
+ DynRegionInfo(const Binary &Owner, const ObjDumper &D)
+ : Obj(&Owner), Dumper(&D) {}
+ DynRegionInfo(const Binary &Owner, const ObjDumper &D, const uint8_t *A,
+ uint64_t S, uint64_t ES)
+ : Addr(A), Size(S), EntSize(ES), Obj(&Owner), Dumper(&D) {}
/// Address in current address space.
- const void *Addr = nullptr;
+ const uint8_t *Addr = nullptr;
/// Size in bytes of the region.
uint64_t Size = 0;
/// Size of each entity in the region.
uint64_t EntSize = 0;
- /// Name of the file. Used for error reporting.
- StringRef FileName;
+ /// Owner object. Used for error reporting.
+ const Binary *Obj;
+ /// Dumper used for error reporting.
+ const ObjDumper *Dumper;
/// Error prefix. Used for error reporting to provide more information.
std::string Context;
/// Region size name. Used for error reporting.
@@ -149,6 +123,20 @@ struct DynRegionInfo {
const Type *Start = reinterpret_cast<const Type *>(Addr);
if (!Start)
return {Start, Start};
+
+ const uint64_t Offset =
+ Addr - (const uint8_t *)Obj->getMemoryBufferRef().getBufferStart();
+ const uint64_t ObjSize = Obj->getMemoryBufferRef().getBufferSize();
+
+ if (Size > ObjSize - Offset) {
+ Dumper->reportUniqueWarning(
+ "unable to read data at 0x" + Twine::utohexstr(Offset) +
+ " of size 0x" + Twine::utohexstr(Size) + " (" + SizePrintName +
+ "): it goes past the end of the file of size 0x" +
+ Twine::utohexstr(ObjSize));
+ return {Start, Start};
+ }
+
if (EntSize == sizeof(Type) && (Size % EntSize == 0))
return {Start, Start + (Size / EntSize)};
@@ -163,697 +151,415 @@ struct DynRegionInfo {
(" or " + EntSizePrintName + " (0x" + Twine::utohexstr(EntSize) + ")")
.str();
- reportWarning(createError(Msg.c_str()), FileName);
+ Dumper->reportUniqueWarning(Msg);
return {Start, Start};
}
};
-namespace {
-struct VerdAux {
- unsigned Offset;
- std::string Name;
+struct GroupMember {
+ StringRef Name;
+ uint64_t Index;
};
-struct VerDef {
- unsigned Offset;
- unsigned Version;
- unsigned Flags;
- unsigned Ndx;
- unsigned Cnt;
- unsigned Hash;
- std::string Name;
- std::vector<VerdAux> AuxV;
+struct GroupSection {
+ StringRef Name;
+ std::string Signature;
+ uint64_t ShName;
+ uint64_t Index;
+ uint32_t Link;
+ uint32_t Info;
+ uint32_t Type;
+ std::vector<GroupMember> Members;
};
-struct VernAux {
- unsigned Hash;
- unsigned Flags;
- unsigned Other;
- unsigned Offset;
- std::string Name;
-};
+namespace {
-struct VerNeed {
- unsigned Version;
- unsigned Cnt;
- unsigned Offset;
- std::string File;
- std::vector<VernAux> AuxV;
+struct NoteType {
+ uint32_t ID;
+ StringRef Name;
};
} // namespace
+template <class ELFT> class Relocation {
+public:
+ Relocation(const typename ELFT::Rel &R, bool IsMips64EL)
+ : Type(R.getType(IsMips64EL)), Symbol(R.getSymbol(IsMips64EL)),
+ Offset(R.r_offset), Info(R.r_info) {}
+
+ Relocation(const typename ELFT::Rela &R, bool IsMips64EL)
+ : Relocation((const typename ELFT::Rel &)R, IsMips64EL) {
+ Addend = R.r_addend;
+ }
+
+ uint32_t Type;
+ uint32_t Symbol;
+ typename ELFT::uint Offset;
+ typename ELFT::uint Info;
+ Optional<int64_t> Addend;
+};
+
+template <class ELFT> class MipsGOTParser;
+
template <typename ELFT> class ELFDumper : public ObjDumper {
+ LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
+
public:
- ELFDumper(const object::ELFObjectFile<ELFT> *ObjF, ScopedPrinter &Writer);
+ ELFDumper(const object::ELFObjectFile<ELFT> &ObjF, ScopedPrinter &Writer);
- void printFileHeaders() override;
- void printSectionHeaders() override;
- void printRelocations() override;
- void printDependentLibs() override;
- void printDynamicRelocations() override;
- void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override;
- void printHashSymbols() override;
void printUnwindInfo() override;
-
- void printDynamicTable() override;
void printNeededLibraries() override;
- void printProgramHeaders(bool PrintProgramHeaders,
- cl::boolOrDefault PrintSectionMapping) override;
void printHashTable() override;
- void printGnuHashTable(const object::ObjectFile *Obj) override;
+ void printGnuHashTable() override;
void printLoadName() override;
void printVersionInfo() override;
- void printGroupSections() override;
-
void printArchSpecificInfo() override;
-
void printStackMap() const override;
- void printHashHistograms() override;
-
- void printCGProfile() override;
- void printAddrsig() override;
-
- void printNotes() override;
+ const object::ELFObjectFile<ELFT> &getElfObject() const { return ObjF; };
- void printELFLinkerOptions() override;
- void printStackSizes() override;
+ std::string describe(const Elf_Shdr &Sec) const;
- const object::ELFObjectFile<ELFT> *getElfObject() const { return ObjF; };
+ unsigned getHashTableEntSize() const {
+ // EM_S390 and ELF::EM_ALPHA platforms use 8-bytes entries in SHT_HASH
+ // sections. This violates the ELF specification.
+ if (Obj.getHeader().e_machine == ELF::EM_S390 ||
+ Obj.getHeader().e_machine == ELF::EM_ALPHA)
+ return 8;
+ return 4;
+ }
-private:
- std::unique_ptr<DumpStyle<ELFT>> ELFDumperStyle;
+ Elf_Dyn_Range dynamic_table() const {
+ // A valid .dynamic section contains an array of entries terminated
+ // with a DT_NULL entry. However, sometimes the section content may
+ // continue past the DT_NULL entry, so to dump the section correctly,
+ // we first find the end of the entries by iterating over them.
+ Elf_Dyn_Range Table = DynamicTable.template getAsArrayRef<Elf_Dyn>();
- TYPEDEF_ELF_TYPES(ELFT)
+ size_t Size = 0;
+ while (Size < Table.size())
+ if (Table[Size++].getTag() == DT_NULL)
+ break;
- DynRegionInfo checkDRI(DynRegionInfo DRI) {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- if (DRI.Addr < Obj->base() ||
- reinterpret_cast<const uint8_t *>(DRI.Addr) + DRI.Size >
- Obj->base() + Obj->getBufSize())
- reportError(errorCodeToError(llvm::object::object_error::parse_failed),
- ObjF->getFileName());
- return DRI;
+ return Table.slice(0, Size);
}
- DynRegionInfo createDRIFrom(const Elf_Phdr *P, uintX_t EntSize) {
- return checkDRI({ObjF->getELFFile()->base() + P->p_offset, P->p_filesz,
- EntSize, ObjF->getFileName()});
+ Elf_Sym_Range dynamic_symbols() const {
+ if (!DynSymRegion)
+ return Elf_Sym_Range();
+ return DynSymRegion->template getAsArrayRef<Elf_Sym>();
}
- DynRegionInfo createDRIFrom(const Elf_Shdr *S) {
- return checkDRI({ObjF->getELFFile()->base() + S->sh_offset, S->sh_size,
- S->sh_entsize, ObjF->getFileName()});
+ const Elf_Shdr *findSectionByName(StringRef Name) const;
+
+ StringRef getDynamicStringTable() const { return DynamicStringTable; }
+
+protected:
+ virtual void printVersionSymbolSection(const Elf_Shdr *Sec) = 0;
+ virtual void printVersionDefinitionSection(const Elf_Shdr *Sec) = 0;
+ virtual void printVersionDependencySection(const Elf_Shdr *Sec) = 0;
+
+ void
+ printDependentLibsHelper(function_ref<void(const Elf_Shdr &)> OnSectionStart,
+ function_ref<void(StringRef, uint64_t)> OnLibEntry);
+
+ virtual void printRelRelaReloc(const Relocation<ELFT> &R,
+ const RelSymbol<ELFT> &RelSym) = 0;
+ virtual void printRelrReloc(const Elf_Relr &R) = 0;
+ virtual void printDynamicRelocHeader(unsigned Type, StringRef Name,
+ const DynRegionInfo &Reg) {}
+ void printReloc(const Relocation<ELFT> &R, unsigned RelIndex,
+ const Elf_Shdr &Sec, const Elf_Shdr *SymTab);
+ void printDynamicReloc(const Relocation<ELFT> &R);
+ void printDynamicRelocationsHelper();
+ void printRelocationsHelper(const Elf_Shdr &Sec);
+ void forEachRelocationDo(
+ const Elf_Shdr &Sec, bool RawRelr,
+ llvm::function_ref<void(const Relocation<ELFT> &, unsigned,
+ const Elf_Shdr &, const Elf_Shdr *)>
+ RelRelaFn,
+ llvm::function_ref<void(const Elf_Relr &)> RelrFn);
+
+ virtual void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset,
+ bool NonVisibilityBitsUsed) const {};
+ virtual void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable,
+ Optional<StringRef> StrTable, bool IsDynamic,
+ bool NonVisibilityBitsUsed) const = 0;
+
+ virtual void printMipsABIFlags() = 0;
+ virtual void printMipsGOT(const MipsGOTParser<ELFT> &Parser) = 0;
+ virtual void printMipsPLT(const MipsGOTParser<ELFT> &Parser) = 0;
+
+ Expected<ArrayRef<Elf_Versym>>
+ getVersionTable(const Elf_Shdr &Sec, ArrayRef<Elf_Sym> *SymTab,
+ StringRef *StrTab, const Elf_Shdr **SymTabSec) const;
+ StringRef getPrintableSectionName(const Elf_Shdr &Sec) const;
+
+ std::vector<GroupSection> getGroups();
+
+ bool printFunctionStackSize(uint64_t SymValue,
+ Optional<const Elf_Shdr *> FunctionSec,
+ const Elf_Shdr &StackSizeSec, DataExtractor Data,
+ uint64_t *Offset);
+ void printStackSize(const Relocation<ELFT> &R, const Elf_Shdr &RelocSec,
+ unsigned Ndx, const Elf_Shdr *SymTab,
+ const Elf_Shdr *FunctionSec, const Elf_Shdr &StackSizeSec,
+ const RelocationResolver &Resolver, DataExtractor Data);
+ virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0;
+
+ void printRelocatableStackSizes(std::function<void()> PrintHeader);
+ void printNonRelocatableStackSizes(std::function<void()> PrintHeader);
+
+ const object::ELFObjectFile<ELFT> &ObjF;
+ const ELFFile<ELFT> &Obj;
+ StringRef FileName;
+
+ Expected<DynRegionInfo> createDRI(uint64_t Offset, uint64_t Size,
+ uint64_t EntSize) {
+ if (Offset + Size < Offset || Offset + Size > Obj.getBufSize())
+ return createError("offset (0x" + Twine::utohexstr(Offset) +
+ ") + size (0x" + Twine::utohexstr(Size) +
+ ") is greater than the file size (0x" +
+ Twine::utohexstr(Obj.getBufSize()) + ")");
+ return DynRegionInfo(ObjF, *this, Obj.base() + Offset, Size, EntSize);
}
void printAttributes();
void printMipsReginfo();
void printMipsOptions();
- std::pair<const Elf_Phdr *, const Elf_Shdr *>
- findDynamic(const ELFFile<ELFT> *Obj);
- void loadDynamicTable(const ELFFile<ELFT> *Obj);
- void parseDynamicTable(const ELFFile<ELFT> *Obj);
+ std::pair<const Elf_Phdr *, const Elf_Shdr *> findDynamic();
+ void loadDynamicTable();
+ void parseDynamicTable();
- Expected<StringRef> getSymbolVersion(const Elf_Sym *symb,
+ Expected<StringRef> getSymbolVersion(const Elf_Sym &Sym,
bool &IsDefault) const;
- Error LoadVersionMap() const;
+ Expected<SmallVector<Optional<VersionEntry>, 0> *> getVersionMap() const;
- const object::ELFObjectFile<ELFT> *ObjF;
DynRegionInfo DynRelRegion;
DynRegionInfo DynRelaRegion;
DynRegionInfo DynRelrRegion;
DynRegionInfo DynPLTRelRegion;
Optional<DynRegionInfo> DynSymRegion;
+ DynRegionInfo DynSymTabShndxRegion;
DynRegionInfo DynamicTable;
StringRef DynamicStringTable;
- StringRef SOName = "<Not found>";
const Elf_Hash *HashTable = nullptr;
const Elf_GnuHash *GnuHashTable = nullptr;
const Elf_Shdr *DotSymtabSec = nullptr;
+ const Elf_Shdr *DotDynsymSec = nullptr;
const Elf_Shdr *DotCGProfileSec = nullptr;
const Elf_Shdr *DotAddrsigSec = nullptr;
- StringRef DynSymtabName;
- ArrayRef<Elf_Word> ShndxTable;
+ DenseMap<const Elf_Shdr *, ArrayRef<Elf_Word>> ShndxTables;
+ Optional<uint64_t> SONameOffset;
const Elf_Shdr *SymbolVersionSection = nullptr; // .gnu.version
const Elf_Shdr *SymbolVersionNeedSection = nullptr; // .gnu.version_r
const Elf_Shdr *SymbolVersionDefSection = nullptr; // .gnu.version_d
- struct VersionEntry {
- std::string Name;
- bool IsVerDef;
- };
- mutable SmallVector<Optional<VersionEntry>, 16> VersionMap;
-
- std::unordered_set<std::string> Warnings;
-
-public:
- Elf_Dyn_Range dynamic_table() const {
- // A valid .dynamic section contains an array of entries terminated
- // with a DT_NULL entry. However, sometimes the section content may
- // continue past the DT_NULL entry, so to dump the section correctly,
- // we first find the end of the entries by iterating over them.
- Elf_Dyn_Range Table = DynamicTable.getAsArrayRef<Elf_Dyn>();
-
- size_t Size = 0;
- while (Size < Table.size())
- if (Table[Size++].getTag() == DT_NULL)
- break;
-
- return Table.slice(0, Size);
- }
-
- Optional<DynRegionInfo> getDynSymRegion() const { return DynSymRegion; }
-
- Elf_Sym_Range dynamic_symbols() const {
- if (!DynSymRegion)
- return Elf_Sym_Range();
- return DynSymRegion->getAsArrayRef<Elf_Sym>();
- }
-
- Elf_Rel_Range dyn_rels() const;
- Elf_Rela_Range dyn_relas() const;
- Elf_Relr_Range dyn_relrs() const;
- std::string getFullSymbolName(const Elf_Sym *Symbol,
+ std::string getFullSymbolName(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable,
bool IsDynamic) const;
- Expected<unsigned> getSymbolSectionIndex(const Elf_Sym *Symbol,
- const Elf_Sym *FirstSym) const;
- Expected<StringRef> getSymbolSectionName(const Elf_Sym *Symbol,
+ Expected<unsigned>
+ getSymbolSectionIndex(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable) const;
+ Expected<StringRef> getSymbolSectionName(const Elf_Sym &Symbol,
unsigned SectionIndex) const;
std::string getStaticSymbolName(uint32_t Index) const;
StringRef getDynamicString(uint64_t Value) const;
- Expected<StringRef> getSymbolVersionByIndex(uint32_t VersionSymbolIndex,
- bool &IsDefault) const;
void printSymbolsHelper(bool IsDynamic) const;
std::string getDynamicEntry(uint64_t Type, uint64_t Value) const;
- const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; }
- const Elf_Shdr *getDotCGProfileSec() const { return DotCGProfileSec; }
- const Elf_Shdr *getDotAddrsigSec() const { return DotAddrsigSec; }
- ArrayRef<Elf_Word> getShndxTable() const { return ShndxTable; }
- StringRef getDynamicStringTable() const { return DynamicStringTable; }
- const DynRegionInfo &getDynRelRegion() const { return DynRelRegion; }
- const DynRegionInfo &getDynRelaRegion() const { return DynRelaRegion; }
- const DynRegionInfo &getDynRelrRegion() const { return DynRelrRegion; }
- const DynRegionInfo &getDynPLTRelRegion() const { return DynPLTRelRegion; }
- const DynRegionInfo &getDynamicTableRegion() const { return DynamicTable; }
- const Elf_Hash *getHashTable() const { return HashTable; }
- const Elf_GnuHash *getGnuHashTable() const { return GnuHashTable; }
-
- Expected<ArrayRef<Elf_Versym>> getVersionTable(const Elf_Shdr *Sec,
- ArrayRef<Elf_Sym> *SymTab,
- StringRef *StrTab) const;
- Expected<std::vector<VerDef>>
- getVersionDefinitions(const Elf_Shdr *Sec) const;
- Expected<std::vector<VerNeed>>
- getVersionDependencies(const Elf_Shdr *Sec) const;
-
- Expected<std::pair<const Elf_Sym *, std::string>>
- getRelocationTarget(const Elf_Shdr *SymTab, const Elf_Rela &R) const;
-
- std::function<Error(const Twine &Msg)> WarningHandler;
- void reportUniqueWarning(Error Err) const;
+ Expected<RelSymbol<ELFT>> getRelocationTarget(const Relocation<ELFT> &R,
+ const Elf_Shdr *SymTab) const;
+
+ ArrayRef<Elf_Word> getShndxTable(const Elf_Shdr *Symtab) const;
+
+private:
+ mutable SmallVector<Optional<VersionEntry>, 0> VersionMap;
};
template <class ELFT>
-static Expected<StringRef> getLinkAsStrtab(const ELFFile<ELFT> *Obj,
- const typename ELFT::Shdr *Sec,
- unsigned SecNdx) {
- Expected<const typename ELFT::Shdr *> StrTabSecOrErr =
- Obj->getSection(Sec->sh_link);
- if (!StrTabSecOrErr)
- return createError("invalid section linked to " +
- object::getELFSectionTypeName(
- Obj->getHeader()->e_machine, Sec->sh_type) +
- " section with index " + Twine(SecNdx) + ": " +
- toString(StrTabSecOrErr.takeError()));
-
- Expected<StringRef> StrTabOrErr = Obj->getStringTable(*StrTabSecOrErr);
- if (!StrTabOrErr)
- return createError("invalid string table linked to " +
- object::getELFSectionTypeName(
- Obj->getHeader()->e_machine, Sec->sh_type) +
- " section with index " + Twine(SecNdx) + ": " +
- toString(StrTabOrErr.takeError()));
- return *StrTabOrErr;
+std::string ELFDumper<ELFT>::describe(const Elf_Shdr &Sec) const {
+ return ::describe(Obj, Sec);
}
-// Returns the linked symbol table and associated string table for a given section.
+namespace {
+
+template <class ELFT> struct SymtabLink {
+ typename ELFT::SymRange Symbols;
+ StringRef StringTable;
+ const typename ELFT::Shdr *SymTab;
+};
+
+// Returns the linked symbol table, symbols and associated string table for a
+// given section.
template <class ELFT>
-static Expected<std::pair<typename ELFT::SymRange, StringRef>>
-getLinkAsSymtab(const ELFFile<ELFT> *Obj, const typename ELFT::Shdr *Sec,
- unsigned SecNdx, unsigned ExpectedType) {
+Expected<SymtabLink<ELFT>> getLinkAsSymtab(const ELFFile<ELFT> &Obj,
+ const typename ELFT::Shdr &Sec,
+ unsigned ExpectedType) {
Expected<const typename ELFT::Shdr *> SymtabOrErr =
- Obj->getSection(Sec->sh_link);
+ Obj.getSection(Sec.sh_link);
if (!SymtabOrErr)
- return createError("invalid section linked to " +
- object::getELFSectionTypeName(
- Obj->getHeader()->e_machine, Sec->sh_type) +
- " section with index " + Twine(SecNdx) + ": " +
- toString(SymtabOrErr.takeError()));
+ return createError("invalid section linked to " + describe(Obj, Sec) +
+ ": " + toString(SymtabOrErr.takeError()));
if ((*SymtabOrErr)->sh_type != ExpectedType)
return createError(
- "invalid section linked to " +
- object::getELFSectionTypeName(Obj->getHeader()->e_machine,
- Sec->sh_type) +
- " section with index " + Twine(SecNdx) + ": expected " +
- object::getELFSectionTypeName(Obj->getHeader()->e_machine,
- ExpectedType) +
+ "invalid section linked to " + describe(Obj, Sec) + ": expected " +
+ object::getELFSectionTypeName(Obj.getHeader().e_machine, ExpectedType) +
", but got " +
- object::getELFSectionTypeName(Obj->getHeader()->e_machine,
+ object::getELFSectionTypeName(Obj.getHeader().e_machine,
(*SymtabOrErr)->sh_type));
- Expected<StringRef> StrTabOrErr =
- getLinkAsStrtab(Obj, *SymtabOrErr, Sec->sh_link);
+ Expected<StringRef> StrTabOrErr = Obj.getLinkAsStrtab(**SymtabOrErr);
if (!StrTabOrErr)
return createError(
"can't get a string table for the symbol table linked to " +
- object::getELFSectionTypeName(Obj->getHeader()->e_machine,
- Sec->sh_type) +
- " section with index " + Twine(SecNdx) + ": " +
- toString(StrTabOrErr.takeError()));
+ describe(Obj, Sec) + ": " + toString(StrTabOrErr.takeError()));
- Expected<typename ELFT::SymRange> SymsOrErr = Obj->symbols(*SymtabOrErr);
+ Expected<typename ELFT::SymRange> SymsOrErr = Obj.symbols(*SymtabOrErr);
if (!SymsOrErr)
- return createError(
- "unable to read symbols from the symbol table with index " +
- Twine(Sec->sh_link) + ": " + toString(SymsOrErr.takeError()));
+ return createError("unable to read symbols from the " + describe(Obj, Sec) +
+ ": " + toString(SymsOrErr.takeError()));
- return std::make_pair(*SymsOrErr, *StrTabOrErr);
+ return SymtabLink<ELFT>{*SymsOrErr, *StrTabOrErr, *SymtabOrErr};
}
+} // namespace
+
template <class ELFT>
Expected<ArrayRef<typename ELFT::Versym>>
-ELFDumper<ELFT>::getVersionTable(const Elf_Shdr *Sec, ArrayRef<Elf_Sym> *SymTab,
- StringRef *StrTab) const {
- assert((!SymTab && !StrTab) || (SymTab && StrTab));
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
-
- if (uintptr_t(Obj->base() + Sec->sh_offset) % sizeof(uint16_t) != 0)
- return createError("the SHT_GNU_versym section with index " +
- Twine(SecNdx) + " is misaligned");
+ELFDumper<ELFT>::getVersionTable(const Elf_Shdr &Sec, ArrayRef<Elf_Sym> *SymTab,
+ StringRef *StrTab,
+ const Elf_Shdr **SymTabSec) const {
+ assert((!SymTab && !StrTab && !SymTabSec) || (SymTab && StrTab && SymTabSec));
+ if (reinterpret_cast<uintptr_t>(Obj.base() + Sec.sh_offset) %
+ sizeof(uint16_t) !=
+ 0)
+ return createError("the " + describe(Sec) + " is misaligned");
Expected<ArrayRef<Elf_Versym>> VersionsOrErr =
- Obj->template getSectionContentsAsArray<Elf_Versym>(Sec);
+ Obj.template getSectionContentsAsArray<Elf_Versym>(Sec);
if (!VersionsOrErr)
- return createError(
- "cannot read content of SHT_GNU_versym section with index " +
- Twine(SecNdx) + ": " + toString(VersionsOrErr.takeError()));
+ return createError("cannot read content of " + describe(Sec) + ": " +
+ toString(VersionsOrErr.takeError()));
- Expected<std::pair<ArrayRef<Elf_Sym>, StringRef>> SymTabOrErr =
- getLinkAsSymtab(Obj, Sec, SecNdx, SHT_DYNSYM);
+ Expected<SymtabLink<ELFT>> SymTabOrErr =
+ getLinkAsSymtab(Obj, Sec, SHT_DYNSYM);
if (!SymTabOrErr) {
reportUniqueWarning(SymTabOrErr.takeError());
return *VersionsOrErr;
}
- if (SymTabOrErr->first.size() != VersionsOrErr->size())
- reportUniqueWarning(
- createError("SHT_GNU_versym section with index " + Twine(SecNdx) +
- ": the number of entries (" + Twine(VersionsOrErr->size()) +
- ") does not match the number of symbols (" +
- Twine(SymTabOrErr->first.size()) +
- ") in the symbol table with index " + Twine(Sec->sh_link)));
-
- if (SymTab)
- std::tie(*SymTab, *StrTab) = *SymTabOrErr;
- return *VersionsOrErr;
-}
-
-template <class ELFT>
-Expected<std::vector<VerDef>>
-ELFDumper<ELFT>::getVersionDefinitions(const Elf_Shdr *Sec) const {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
-
- Expected<StringRef> StrTabOrErr = getLinkAsStrtab(Obj, Sec, SecNdx);
- if (!StrTabOrErr)
- return StrTabOrErr.takeError();
-
- Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj->getSectionContents(Sec);
- if (!ContentsOrErr)
- return createError(
- "cannot read content of SHT_GNU_verdef section with index " +
- Twine(SecNdx) + ": " + toString(ContentsOrErr.takeError()));
-
- const uint8_t *Start = ContentsOrErr->data();
- const uint8_t *End = Start + ContentsOrErr->size();
-
- auto ExtractNextAux = [&](const uint8_t *&VerdauxBuf,
- unsigned VerDefNdx) -> Expected<VerdAux> {
- if (VerdauxBuf + sizeof(Elf_Verdaux) > End)
- return createError("invalid SHT_GNU_verdef section with index " +
- Twine(SecNdx) + ": version definition " +
- Twine(VerDefNdx) +
- " refers to an auxiliary entry that goes past the end "
- "of the section");
-
- auto *Verdaux = reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
- VerdauxBuf += Verdaux->vda_next;
-
- VerdAux Aux;
- Aux.Offset = VerdauxBuf - Start;
- if (Verdaux->vda_name <= StrTabOrErr->size())
- Aux.Name = std::string(StrTabOrErr->drop_front(Verdaux->vda_name));
- else
- Aux.Name = "<invalid vda_name: " + to_string(Verdaux->vda_name) + ">";
- return Aux;
- };
-
- std::vector<VerDef> Ret;
- const uint8_t *VerdefBuf = Start;
- for (unsigned I = 1; I <= /*VerDefsNum=*/Sec->sh_info; ++I) {
- if (VerdefBuf + sizeof(Elf_Verdef) > End)
- return createError("invalid SHT_GNU_verdef section with index " +
- Twine(SecNdx) + ": version definition " + Twine(I) +
- " goes past the end of the section");
-
- if (uintptr_t(VerdefBuf) % sizeof(uint32_t) != 0)
- return createError(
- "invalid SHT_GNU_verdef section with index " + Twine(SecNdx) +
- ": found a misaligned version definition entry at offset 0x" +
- Twine::utohexstr(VerdefBuf - Start));
-
- unsigned Version = *reinterpret_cast<const Elf_Half *>(VerdefBuf);
- if (Version != 1)
- return createError("unable to dump SHT_GNU_verdef section with index " +
- Twine(SecNdx) + ": version " + Twine(Version) +
- " is not yet supported");
-
- const Elf_Verdef *D = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
- VerDef &VD = *Ret.emplace(Ret.end());
- VD.Offset = VerdefBuf - Start;
- VD.Version = D->vd_version;
- VD.Flags = D->vd_flags;
- VD.Ndx = D->vd_ndx;
- VD.Cnt = D->vd_cnt;
- VD.Hash = D->vd_hash;
-
- const uint8_t *VerdauxBuf = VerdefBuf + D->vd_aux;
- for (unsigned J = 0; J < D->vd_cnt; ++J) {
- if (uintptr_t(VerdauxBuf) % sizeof(uint32_t) != 0)
- return createError("invalid SHT_GNU_verdef section with index " +
- Twine(SecNdx) +
- ": found a misaligned auxiliary entry at offset 0x" +
- Twine::utohexstr(VerdauxBuf - Start));
-
- Expected<VerdAux> AuxOrErr = ExtractNextAux(VerdauxBuf, I);
- if (!AuxOrErr)
- return AuxOrErr.takeError();
-
- if (J == 0)
- VD.Name = AuxOrErr->Name;
- else
- VD.AuxV.push_back(*AuxOrErr);
- }
-
- VerdefBuf += D->vd_next;
- }
-
- return Ret;
-}
-
-template <class ELFT>
-Expected<std::vector<VerNeed>>
-ELFDumper<ELFT>::getVersionDependencies(const Elf_Shdr *Sec) const {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
-
- StringRef StrTab;
- Expected<StringRef> StrTabOrErr = getLinkAsStrtab(Obj, Sec, SecNdx);
- if (!StrTabOrErr)
- reportUniqueWarning(StrTabOrErr.takeError());
- else
- StrTab = *StrTabOrErr;
-
- Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj->getSectionContents(Sec);
- if (!ContentsOrErr)
- return createError(
- "cannot read content of SHT_GNU_verneed section with index " +
- Twine(SecNdx) + ": " + toString(ContentsOrErr.takeError()));
-
- const uint8_t *Start = ContentsOrErr->data();
- const uint8_t *End = Start + ContentsOrErr->size();
- const uint8_t *VerneedBuf = Start;
-
- std::vector<VerNeed> Ret;
- for (unsigned I = 1; I <= /*VerneedNum=*/Sec->sh_info; ++I) {
- if (VerneedBuf + sizeof(Elf_Verdef) > End)
- return createError("invalid SHT_GNU_verneed section with index " +
- Twine(SecNdx) + ": version dependency " + Twine(I) +
- " goes past the end of the section");
+ if (SymTabOrErr->Symbols.size() != VersionsOrErr->size())
+ reportUniqueWarning(describe(Sec) + ": the number of entries (" +
+ Twine(VersionsOrErr->size()) +
+ ") does not match the number of symbols (" +
+ Twine(SymTabOrErr->Symbols.size()) +
+ ") in the symbol table with index " +
+ Twine(Sec.sh_link));
- if (uintptr_t(VerneedBuf) % sizeof(uint32_t) != 0)
- return createError(
- "invalid SHT_GNU_verneed section with index " + Twine(SecNdx) +
- ": found a misaligned version dependency entry at offset 0x" +
- Twine::utohexstr(VerneedBuf - Start));
-
- unsigned Version = *reinterpret_cast<const Elf_Half *>(VerneedBuf);
- if (Version != 1)
- return createError("unable to dump SHT_GNU_verneed section with index " +
- Twine(SecNdx) + ": version " + Twine(Version) +
- " is not yet supported");
-
- const Elf_Verneed *Verneed =
- reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
-
- VerNeed &VN = *Ret.emplace(Ret.end());
- VN.Version = Verneed->vn_version;
- VN.Cnt = Verneed->vn_cnt;
- VN.Offset = VerneedBuf - Start;
-
- if (Verneed->vn_file < StrTab.size())
- VN.File = std::string(StrTab.drop_front(Verneed->vn_file));
- else
- VN.File = "<corrupt vn_file: " + to_string(Verneed->vn_file) + ">";
-
- const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
- for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
- if (uintptr_t(VernauxBuf) % sizeof(uint32_t) != 0)
- return createError("invalid SHT_GNU_verneed section with index " +
- Twine(SecNdx) +
- ": found a misaligned auxiliary entry at offset 0x" +
- Twine::utohexstr(VernauxBuf - Start));
-
- if (VernauxBuf + sizeof(Elf_Vernaux) > End)
- return createError(
- "invalid SHT_GNU_verneed section with index " + Twine(SecNdx) +
- ": version dependency " + Twine(I) +
- " refers to an auxiliary entry that goes past the end "
- "of the section");
-
- const Elf_Vernaux *Vernaux =
- reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
-
- VernAux &Aux = *VN.AuxV.emplace(VN.AuxV.end());
- Aux.Hash = Vernaux->vna_hash;
- Aux.Flags = Vernaux->vna_flags;
- Aux.Other = Vernaux->vna_other;
- Aux.Offset = VernauxBuf - Start;
- if (StrTab.size() <= Vernaux->vna_name)
- Aux.Name = "<corrupt>";
- else
- Aux.Name = std::string(StrTab.drop_front(Vernaux->vna_name));
-
- VernauxBuf += Vernaux->vna_next;
- }
- VerneedBuf += Verneed->vn_next;
+ if (SymTab) {
+ *SymTab = SymTabOrErr->Symbols;
+ *StrTab = SymTabOrErr->StringTable;
+ *SymTabSec = SymTabOrErr->SymTab;
}
- return Ret;
+ return *VersionsOrErr;
}
template <class ELFT>
void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) const {
Optional<StringRef> StrTable;
- StringRef SymtabName;
size_t Entries = 0;
Elf_Sym_Range Syms(nullptr, nullptr);
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ const Elf_Shdr *SymtabSec = IsDynamic ? DotDynsymSec : DotSymtabSec;
+
if (IsDynamic) {
StrTable = DynamicStringTable;
Syms = dynamic_symbols();
- SymtabName = DynSymtabName;
Entries = Syms.size();
- } else {
- if (!DotSymtabSec)
- return;
-
+ } else if (DotSymtabSec) {
if (Expected<StringRef> StrTableOrErr =
- Obj->getStringTableForSymtab(*DotSymtabSec))
+ Obj.getStringTableForSymtab(*DotSymtabSec))
StrTable = *StrTableOrErr;
else
- reportUniqueWarning(createError(
+ reportUniqueWarning(
"unable to get the string table for the SHT_SYMTAB section: " +
- toString(StrTableOrErr.takeError())));
+ toString(StrTableOrErr.takeError()));
- if (Expected<Elf_Sym_Range> SymsOrErr = Obj->symbols(DotSymtabSec))
+ if (Expected<Elf_Sym_Range> SymsOrErr = Obj.symbols(DotSymtabSec))
Syms = *SymsOrErr;
else
reportUniqueWarning(
- createError("unable to read symbols from the SHT_SYMTAB section: " +
- toString(SymsOrErr.takeError())));
-
- if (Expected<StringRef> SymtabNameOrErr =
- Obj->getSectionName(DotSymtabSec)) {
- SymtabName = *SymtabNameOrErr;
- } else {
- reportUniqueWarning(
- createError("unable to get the name of the SHT_SYMTAB section: " +
- toString(SymtabNameOrErr.takeError())));
- SymtabName = "<?>";
- }
-
+ "unable to read symbols from the SHT_SYMTAB section: " +
+ toString(SymsOrErr.takeError()));
Entries = DotSymtabSec->getEntityCount();
}
- if (Syms.begin() == Syms.end())
+ if (Syms.empty())
return;
// The st_other field has 2 logical parts. The first two bits hold the symbol
// visibility (STV_*) and the remainder hold other platform-specific values.
- bool NonVisibilityBitsUsed = llvm::find_if(Syms, [](const Elf_Sym &S) {
- return S.st_other & ~0x3;
- }) != Syms.end();
+ bool NonVisibilityBitsUsed =
+ llvm::any_of(Syms, [](const Elf_Sym &S) { return S.st_other & ~0x3; });
+
+ DataRegion<Elf_Word> ShndxTable =
+ IsDynamic ? DataRegion<Elf_Word>(
+ (const Elf_Word *)this->DynSymTabShndxRegion.Addr,
+ this->getElfObject().getELFFile().end())
+ : DataRegion<Elf_Word>(this->getShndxTable(SymtabSec));
- ELFDumperStyle->printSymtabMessage(Obj, SymtabName, Entries,
- NonVisibilityBitsUsed);
- for (const auto &Sym : Syms)
- ELFDumperStyle->printSymbol(Obj, &Sym, Syms.begin(), StrTable, IsDynamic,
- NonVisibilityBitsUsed);
+ printSymtabMessage(SymtabSec, Entries, NonVisibilityBitsUsed);
+ for (const Elf_Sym &Sym : Syms)
+ printSymbol(Sym, &Sym - Syms.begin(), ShndxTable, StrTable, IsDynamic,
+ NonVisibilityBitsUsed);
}
-template <class ELFT> class MipsGOTParser;
+template <typename ELFT> class GNUELFDumper : public ELFDumper<ELFT> {
+ formatted_raw_ostream &OS;
-template <typename ELFT> class DumpStyle {
public:
- using Elf_Shdr = typename ELFT::Shdr;
- using Elf_Sym = typename ELFT::Sym;
- using Elf_Addr = typename ELFT::Addr;
-
- DumpStyle(ELFDumper<ELFT> *Dumper) : Dumper(Dumper) {
- FileName = this->Dumper->getElfObject()->getFileName();
- }
-
- virtual ~DumpStyle() = default;
-
- virtual void printFileHeaders(const ELFFile<ELFT> *Obj) = 0;
- virtual void printGroupSections(const ELFFile<ELFT> *Obj) = 0;
- virtual void printRelocations(const ELFFile<ELFT> *Obj) = 0;
- virtual void printSectionHeaders(const ELFFile<ELFT> *Obj) = 0;
- virtual void printSymbols(const ELFFile<ELFT> *Obj, bool PrintSymbols,
- bool PrintDynamicSymbols) = 0;
- virtual void printHashSymbols(const ELFFile<ELFT> *Obj) {}
- virtual void printDependentLibs(const ELFFile<ELFT> *Obj) = 0;
- virtual void printDynamic(const ELFFile<ELFT> *Obj) {}
- virtual void printDynamicRelocations(const ELFFile<ELFT> *Obj) = 0;
- virtual void printSymtabMessage(const ELFFile<ELFT> *Obj, StringRef Name,
- size_t Offset, bool NonVisibilityBitsUsed) {}
- virtual void printSymbol(const ELFFile<ELFT> *Obj, const Elf_Sym *Symbol,
- const Elf_Sym *FirstSym,
- Optional<StringRef> StrTable, bool IsDynamic,
- bool NonVisibilityBitsUsed) = 0;
- virtual void printProgramHeaders(const ELFFile<ELFT> *Obj,
- bool PrintProgramHeaders,
- cl::boolOrDefault PrintSectionMapping) = 0;
- virtual void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) = 0;
- virtual void printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) = 0;
- virtual void printVersionDependencySection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) = 0;
- virtual void printHashHistograms(const ELFFile<ELFT> *Obj) = 0;
- virtual void printCGProfile(const ELFFile<ELFT> *Obj) = 0;
- virtual void printAddrsig(const ELFFile<ELFT> *Obj) = 0;
- virtual void printNotes(const ELFFile<ELFT> *Obj) = 0;
- virtual void printELFLinkerOptions(const ELFFile<ELFT> *Obj) = 0;
- virtual void printStackSizes(const ELFObjectFile<ELFT> *Obj) = 0;
- void printNonRelocatableStackSizes(const ELFObjectFile<ELFT> *Obj,
- std::function<void()> PrintHeader);
- void printRelocatableStackSizes(const ELFObjectFile<ELFT> *Obj,
- std::function<void()> PrintHeader);
- void printFunctionStackSize(const ELFObjectFile<ELFT> *Obj, uint64_t SymValue,
- Optional<SectionRef> FunctionSec,
- const StringRef SectionName, DataExtractor Data,
- uint64_t *Offset);
- void printStackSize(const ELFObjectFile<ELFT> *Obj, RelocationRef Rel,
- SectionRef FunctionSec,
- const StringRef &StackSizeSectionName,
- const RelocationResolver &Resolver, DataExtractor Data);
- virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0;
- virtual void printMipsGOT(const MipsGOTParser<ELFT> &Parser) = 0;
- virtual void printMipsPLT(const MipsGOTParser<ELFT> &Parser) = 0;
- virtual void printMipsABIFlags(const ELFObjectFile<ELFT> *Obj) = 0;
- const ELFDumper<ELFT> *dumper() const { return Dumper; }
-
-protected:
- void printDependentLibsHelper(
- const ELFFile<ELFT> *Obj,
- function_ref<void(const Elf_Shdr &)> OnSectionStart,
- function_ref<void(StringRef, uint64_t)> OnSectionEntry);
-
- void reportUniqueWarning(Error Err) const;
- StringRef FileName;
-
-private:
- const ELFDumper<ELFT> *Dumper;
-};
+ LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
-template <typename ELFT> class GNUStyle : public DumpStyle<ELFT> {
- formatted_raw_ostream &OS;
+ GNUELFDumper(const object::ELFObjectFile<ELFT> &ObjF, ScopedPrinter &Writer)
+ : ELFDumper<ELFT>(ObjF, Writer),
+ OS(static_cast<formatted_raw_ostream &>(Writer.getOStream())) {
+ assert(&this->W.getOStream() == &llvm::fouts());
+ }
-public:
- TYPEDEF_ELF_TYPES(ELFT)
-
- GNUStyle(ScopedPrinter &W, ELFDumper<ELFT> *Dumper)
- : DumpStyle<ELFT>(Dumper),
- OS(static_cast<formatted_raw_ostream&>(W.getOStream())) {
- assert (&W.getOStream() == &llvm::fouts());
- }
-
- void printFileHeaders(const ELFO *Obj) override;
- void printGroupSections(const ELFFile<ELFT> *Obj) override;
- void printRelocations(const ELFO *Obj) override;
- void printSectionHeaders(const ELFO *Obj) override;
- void printSymbols(const ELFO *Obj, bool PrintSymbols,
- bool PrintDynamicSymbols) override;
- void printHashSymbols(const ELFO *Obj) override;
- void printDependentLibs(const ELFFile<ELFT> *Obj) override;
- void printDynamic(const ELFFile<ELFT> *Obj) override;
- void printDynamicRelocations(const ELFO *Obj) override;
- void printSymtabMessage(const ELFO *Obj, StringRef Name, size_t Offset,
- bool NonVisibilityBitsUsed) override;
- void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders,
+ void printFileHeaders() override;
+ void printGroupSections() override;
+ void printRelocations() override;
+ void printSectionHeaders() override;
+ void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override;
+ void printHashSymbols() override;
+ void printSectionDetails() override;
+ void printDependentLibs() override;
+ void printDynamicTable() override;
+ void printDynamicRelocations() override;
+ void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset,
+ bool NonVisibilityBitsUsed) const override;
+ void printProgramHeaders(bool PrintProgramHeaders,
cl::boolOrDefault PrintSectionMapping) override;
- void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) override;
- void printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) override;
- void printVersionDependencySection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) override;
- void printHashHistograms(const ELFFile<ELFT> *Obj) override;
- void printCGProfile(const ELFFile<ELFT> *Obj) override;
- void printAddrsig(const ELFFile<ELFT> *Obj) override;
- void printNotes(const ELFFile<ELFT> *Obj) override;
- void printELFLinkerOptions(const ELFFile<ELFT> *Obj) override;
- void printStackSizes(const ELFObjectFile<ELFT> *Obj) override;
- void printStackSizeEntry(uint64_t Size, StringRef FuncName) override;
- void printMipsGOT(const MipsGOTParser<ELFT> &Parser) override;
- void printMipsPLT(const MipsGOTParser<ELFT> &Parser) override;
- void printMipsABIFlags(const ELFObjectFile<ELFT> *Obj) override;
+ void printVersionSymbolSection(const Elf_Shdr *Sec) override;
+ void printVersionDefinitionSection(const Elf_Shdr *Sec) override;
+ void printVersionDependencySection(const Elf_Shdr *Sec) override;
+ void printHashHistograms() override;
+ void printCGProfile() override;
+ void printAddrsig() override;
+ void printNotes() override;
+ void printELFLinkerOptions() override;
+ void printStackSizes() override;
private:
void printHashHistogram(const Elf_Hash &HashTable);
void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable);
-
- void printHashTableSymbols(const ELFO *Obj, const Elf_Hash &HashTable);
- void printGnuHashTableSymbols(const ELFO *Obj,
- const Elf_GnuHash &GnuHashTable);
+ void printHashTableSymbols(const Elf_Hash &HashTable);
+ void printGnuHashTableSymbols(const Elf_GnuHash &GnuHashTable);
struct Field {
std::string Str;
@@ -864,8 +570,8 @@ private:
};
template <typename T, typename TEnum>
- std::string printEnum(T Value, ArrayRef<EnumEntry<TEnum>> EnumValues) {
- for (const auto &EnumItem : EnumValues)
+ std::string printEnum(T Value, ArrayRef<EnumEntry<TEnum>> EnumValues) const {
+ for (const EnumEntry<TEnum> &EnumItem : EnumValues)
if (EnumItem.Value == Value)
return std::string(EnumItem.AltName);
return to_hexString(Value, false);
@@ -874,9 +580,9 @@ private:
template <typename T, typename TEnum>
std::string printFlags(T Value, ArrayRef<EnumEntry<TEnum>> EnumValues,
TEnum EnumMask1 = {}, TEnum EnumMask2 = {},
- TEnum EnumMask3 = {}) {
+ TEnum EnumMask3 = {}) const {
std::string Str;
- for (const auto &Flag : EnumValues) {
+ for (const EnumEntry<TEnum> &Flag : EnumValues) {
if (Flag.Value == 0)
continue;
@@ -898,95 +604,85 @@ private:
return Str;
}
- formatted_raw_ostream &printField(struct Field F) {
+ formatted_raw_ostream &printField(struct Field F) const {
if (F.Column != 0)
OS.PadToColumn(F.Column);
OS << F.Str;
OS.flush();
return OS;
}
- void printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, uint32_t Sym,
- StringRef StrTable, uint32_t Bucket);
- void printRelocHeader(unsigned SType);
- void printRelocation(const ELFO *Obj, unsigned SecIndex,
- const Elf_Shdr *SymTab, const Elf_Rela &R,
- unsigned RelIndex, bool IsRela);
- void printRelocation(const ELFO *Obj, const Elf_Sym *Sym,
- StringRef SymbolName, const Elf_Rela &R, bool IsRela);
- void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
+ void printHashedSymbol(const Elf_Sym *Sym, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable, StringRef StrTable,
+ uint32_t Bucket);
+ void printRelrReloc(const Elf_Relr &R) override;
+ void printRelRelaReloc(const Relocation<ELFT> &R,
+ const RelSymbol<ELFT> &RelSym) override;
+ void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable, bool IsDynamic,
- bool NonVisibilityBitsUsed) override;
- std::string getSymbolSectionNdx(const ELFO *Obj, const Elf_Sym *Symbol,
- const Elf_Sym *FirstSym);
- void printDynamicRelocation(const ELFO *Obj, Elf_Rela R, bool IsRela);
- void printProgramHeaders(const ELFO *Obj);
- void printSectionMapping(const ELFO *Obj);
- void printGNUVersionSectionProlog(const ELFFile<ELFT> *Obj,
- const typename ELFT::Shdr *Sec,
+ bool NonVisibilityBitsUsed) const override;
+ void printDynamicRelocHeader(unsigned Type, StringRef Name,
+ const DynRegionInfo &Reg) override;
+
+ std::string getSymbolSectionNdx(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable) const;
+ void printProgramHeaders() override;
+ void printSectionMapping() override;
+ void printGNUVersionSectionProlog(const typename ELFT::Shdr &Sec,
const Twine &Label, unsigned EntriesNum);
-};
-template <class ELFT>
-void ELFDumper<ELFT>::reportUniqueWarning(Error Err) const {
- handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
- cantFail(WarningHandler(EI.message()),
- "WarningHandler should always return ErrorSuccess");
- });
-}
+ void printStackSizeEntry(uint64_t Size, StringRef FuncName) override;
-template <class ELFT>
-void DumpStyle<ELFT>::reportUniqueWarning(Error Err) const {
- this->dumper()->reportUniqueWarning(std::move(Err));
-}
+ void printMipsGOT(const MipsGOTParser<ELFT> &Parser) override;
+ void printMipsPLT(const MipsGOTParser<ELFT> &Parser) override;
+ void printMipsABIFlags() override;
+};
-template <typename ELFT> class LLVMStyle : public DumpStyle<ELFT> {
+template <typename ELFT> class LLVMELFDumper : public ELFDumper<ELFT> {
public:
- TYPEDEF_ELF_TYPES(ELFT)
-
- LLVMStyle(ScopedPrinter &W, ELFDumper<ELFT> *Dumper)
- : DumpStyle<ELFT>(Dumper), W(W) {}
-
- void printFileHeaders(const ELFO *Obj) override;
- void printGroupSections(const ELFFile<ELFT> *Obj) override;
- void printRelocations(const ELFO *Obj) override;
- void printRelocations(const Elf_Shdr *Sec, const ELFO *Obj);
- void printSectionHeaders(const ELFO *Obj) override;
- void printSymbols(const ELFO *Obj, bool PrintSymbols,
- bool PrintDynamicSymbols) override;
- void printDependentLibs(const ELFFile<ELFT> *Obj) override;
- void printDynamic(const ELFFile<ELFT> *Obj) override;
- void printDynamicRelocations(const ELFO *Obj) override;
- void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders,
+ LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
+
+ LLVMELFDumper(const object::ELFObjectFile<ELFT> &ObjF, ScopedPrinter &Writer)
+ : ELFDumper<ELFT>(ObjF, Writer), W(Writer) {}
+
+ void printFileHeaders() override;
+ void printGroupSections() override;
+ void printRelocations() override;
+ void printSectionHeaders() override;
+ void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override;
+ void printDependentLibs() override;
+ void printDynamicTable() override;
+ void printDynamicRelocations() override;
+ void printProgramHeaders(bool PrintProgramHeaders,
cl::boolOrDefault PrintSectionMapping) override;
- void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) override;
- void printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) override;
- void printVersionDependencySection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) override;
- void printHashHistograms(const ELFFile<ELFT> *Obj) override;
- void printCGProfile(const ELFFile<ELFT> *Obj) override;
- void printAddrsig(const ELFFile<ELFT> *Obj) override;
- void printNotes(const ELFFile<ELFT> *Obj) override;
- void printELFLinkerOptions(const ELFFile<ELFT> *Obj) override;
- void printStackSizes(const ELFObjectFile<ELFT> *Obj) override;
- void printStackSizeEntry(uint64_t Size, StringRef FuncName) override;
- void printMipsGOT(const MipsGOTParser<ELFT> &Parser) override;
- void printMipsPLT(const MipsGOTParser<ELFT> &Parser) override;
- void printMipsABIFlags(const ELFObjectFile<ELFT> *Obj) override;
+ void printVersionSymbolSection(const Elf_Shdr *Sec) override;
+ void printVersionDefinitionSection(const Elf_Shdr *Sec) override;
+ void printVersionDependencySection(const Elf_Shdr *Sec) override;
+ void printHashHistograms() override;
+ void printCGProfile() override;
+ void printAddrsig() override;
+ void printNotes() override;
+ void printELFLinkerOptions() override;
+ void printStackSizes() override;
private:
- void printRelocation(const ELFO *Obj, unsigned SecIndex, Elf_Rela Rel,
- unsigned RelIndex, const Elf_Shdr *SymTab);
- void printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel);
- void printSymbols(const ELFO *Obj);
- void printDynamicSymbols(const ELFO *Obj);
- void printSymbolSection(const Elf_Sym *Symbol, const Elf_Sym *First);
- void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
+ void printRelrReloc(const Elf_Relr &R) override;
+ void printRelRelaReloc(const Relocation<ELFT> &R,
+ const RelSymbol<ELFT> &RelSym) override;
+
+ void printSymbolSection(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable) const;
+ void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable, bool IsDynamic,
- bool /*NonVisibilityBitsUsed*/) override;
- void printProgramHeaders(const ELFO *Obj);
- void printSectionMapping(const ELFO *Obj) {}
+ bool /*NonVisibilityBitsUsed*/) const override;
+ void printProgramHeaders() override;
+ void printSectionMapping() override {}
+ void printStackSizeEntry(uint64_t Size, StringRef FuncName) override;
+
+ void printMipsGOT(const MipsGOTParser<ELFT> &Parser) override;
+ void printMipsPLT(const MipsGOTParser<ELFT> &Parser) override;
+ void printMipsABIFlags() override;
ScopedPrinter &W;
};
@@ -996,81 +692,53 @@ private:
namespace llvm {
template <class ELFT>
-static std::error_code createELFDumper(const ELFObjectFile<ELFT> *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result) {
- Result.reset(new ELFDumper<ELFT>(Obj, Writer));
- return readobj_error::success;
+static std::unique_ptr<ObjDumper>
+createELFDumper(const ELFObjectFile<ELFT> &Obj, ScopedPrinter &Writer) {
+ if (opts::Output == opts::GNU)
+ return std::make_unique<GNUELFDumper<ELFT>>(Obj, Writer);
+ return std::make_unique<LLVMELFDumper<ELFT>>(Obj, Writer);
}
-std::error_code createELFDumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result) {
+std::unique_ptr<ObjDumper> createELFDumper(const object::ELFObjectFileBase &Obj,
+ ScopedPrinter &Writer) {
// Little-endian 32-bit
- if (const ELF32LEObjectFile *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
- return createELFDumper(ELFObj, Writer, Result);
+ if (const ELF32LEObjectFile *ELFObj = dyn_cast<ELF32LEObjectFile>(&Obj))
+ return createELFDumper(*ELFObj, Writer);
// Big-endian 32-bit
- if (const ELF32BEObjectFile *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
- return createELFDumper(ELFObj, Writer, Result);
+ if (const ELF32BEObjectFile *ELFObj = dyn_cast<ELF32BEObjectFile>(&Obj))
+ return createELFDumper(*ELFObj, Writer);
// Little-endian 64-bit
- if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
- return createELFDumper(ELFObj, Writer, Result);
+ if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(&Obj))
+ return createELFDumper(*ELFObj, Writer);
// Big-endian 64-bit
- if (const ELF64BEObjectFile *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
- return createELFDumper(ELFObj, Writer, Result);
-
- return readobj_error::unsupported_obj_file_format;
+ return createELFDumper(*cast<ELF64BEObjectFile>(&Obj), Writer);
}
} // end namespace llvm
-template <class ELFT> Error ELFDumper<ELFT>::LoadVersionMap() const {
- // If there is no dynamic symtab or version table, there is nothing to do.
- if (!DynSymRegion || !SymbolVersionSection)
- return Error::success();
-
- // Has the VersionMap already been loaded?
- if (!VersionMap.empty())
- return Error::success();
-
- // The first two version indexes are reserved.
- // Index 0 is LOCAL, index 1 is GLOBAL.
- VersionMap.push_back(VersionEntry());
- VersionMap.push_back(VersionEntry());
-
- auto InsertEntry = [this](unsigned N, StringRef Version, bool IsVerdef) {
- if (N >= VersionMap.size())
- VersionMap.resize(N + 1);
- VersionMap[N] = {std::string(Version), IsVerdef};
- };
-
- if (SymbolVersionDefSection) {
- Expected<std::vector<VerDef>> Defs =
- this->getVersionDefinitions(SymbolVersionDefSection);
- if (!Defs)
- return Defs.takeError();
- for (const VerDef &Def : *Defs)
- InsertEntry(Def.Ndx & ELF::VERSYM_VERSION, Def.Name, true);
- }
-
- if (SymbolVersionNeedSection) {
- Expected<std::vector<VerNeed>> Deps =
- this->getVersionDependencies(SymbolVersionNeedSection);
- if (!Deps)
- return Deps.takeError();
- for (const VerNeed &Dep : *Deps)
- for (const VernAux &Aux : Dep.AuxV)
- InsertEntry(Aux.Other & ELF::VERSYM_VERSION, Aux.Name, false);
- }
+template <class ELFT>
+Expected<SmallVector<Optional<VersionEntry>, 0> *>
+ELFDumper<ELFT>::getVersionMap() const {
+ // If the VersionMap has already been loaded or if there is no dynamic symtab
+ // or version table, there is nothing to do.
+ if (!VersionMap.empty() || !DynSymRegion || !SymbolVersionSection)
+ return &VersionMap;
+
+ Expected<SmallVector<Optional<VersionEntry>, 0>> MapOrErr =
+ Obj.loadVersionMap(SymbolVersionNeedSection, SymbolVersionDefSection);
+ if (MapOrErr)
+ VersionMap = *MapOrErr;
+ else
+ return MapOrErr.takeError();
- return Error::success();
+ return &VersionMap;
}
template <typename ELFT>
-Expected<StringRef> ELFDumper<ELFT>::getSymbolVersion(const Elf_Sym *Sym,
+Expected<StringRef> ELFDumper<ELFT>::getSymbolVersion(const Elf_Sym &Sym,
bool &IsDefault) const {
// This is a dynamic symbol. Look in the GNU symbol version table.
if (!SymbolVersionSection) {
@@ -1081,55 +749,69 @@ Expected<StringRef> ELFDumper<ELFT>::getSymbolVersion(const Elf_Sym *Sym,
assert(DynSymRegion && "DynSymRegion has not been initialised");
// Determine the position in the symbol table of this entry.
- size_t EntryIndex = (reinterpret_cast<uintptr_t>(Sym) -
+ size_t EntryIndex = (reinterpret_cast<uintptr_t>(&Sym) -
reinterpret_cast<uintptr_t>(DynSymRegion->Addr)) /
sizeof(Elf_Sym);
// Get the corresponding version index entry.
- if (Expected<const Elf_Versym *> EntryOrErr =
- ObjF->getELFFile()->template getEntry<Elf_Versym>(
- SymbolVersionSection, EntryIndex))
- return this->getSymbolVersionByIndex((*EntryOrErr)->vs_index, IsDefault);
- else
+ Expected<const Elf_Versym *> EntryOrErr =
+ Obj.template getEntry<Elf_Versym>(*SymbolVersionSection, EntryIndex);
+ if (!EntryOrErr)
return EntryOrErr.takeError();
+
+ unsigned Version = (*EntryOrErr)->vs_index;
+ if (Version == VER_NDX_LOCAL || Version == VER_NDX_GLOBAL) {
+ IsDefault = false;
+ return "";
+ }
+
+ Expected<SmallVector<Optional<VersionEntry>, 0> *> MapOrErr =
+ getVersionMap();
+ if (!MapOrErr)
+ return MapOrErr.takeError();
+
+ return Obj.getSymbolVersionByIndex(Version, IsDefault, **MapOrErr,
+ Sym.st_shndx == ELF::SHN_UNDEF);
}
template <typename ELFT>
-Expected<std::pair<const typename ELFT::Sym *, std::string>>
-ELFDumper<ELFT>::getRelocationTarget(const Elf_Shdr *SymTab,
- const Elf_Rela &R) const {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- Expected<const Elf_Sym *> SymOrErr = Obj->getRelocationSymbol(&R, SymTab);
+Expected<RelSymbol<ELFT>>
+ELFDumper<ELFT>::getRelocationTarget(const Relocation<ELFT> &R,
+ const Elf_Shdr *SymTab) const {
+ if (R.Symbol == 0)
+ return RelSymbol<ELFT>(nullptr, "");
+
+ Expected<const Elf_Sym *> SymOrErr =
+ Obj.template getEntry<Elf_Sym>(*SymTab, R.Symbol);
if (!SymOrErr)
- return SymOrErr.takeError();
+ return createError("unable to read an entry with index " + Twine(R.Symbol) +
+ " from " + describe(*SymTab) + ": " +
+ toString(SymOrErr.takeError()));
const Elf_Sym *Sym = *SymOrErr;
if (!Sym)
- return std::make_pair(nullptr, "");
-
- // The st_name field of a STT_SECTION is usually 0 (empty string).
- // This code block returns the section name.
- if (Sym->getType() == ELF::STT_SECTION) {
- Expected<const Elf_Shdr *> SecOrErr =
- Obj->getSection(Sym, SymTab, ShndxTable);
- if (!SecOrErr)
- return SecOrErr.takeError();
- // A section symbol describes the section at index 0.
- if (*SecOrErr == nullptr)
- return std::make_pair(Sym, "");
-
- Expected<StringRef> NameOrErr = Obj->getSectionName(*SecOrErr);
- if (!NameOrErr)
- return NameOrErr.takeError();
- return std::make_pair(Sym, NameOrErr->str());
- }
-
- Expected<StringRef> StrTableOrErr = Obj->getStringTableForSymtab(*SymTab);
+ return RelSymbol<ELFT>(nullptr, "");
+
+ Expected<StringRef> StrTableOrErr = Obj.getStringTableForSymtab(*SymTab);
if (!StrTableOrErr)
return StrTableOrErr.takeError();
+ const Elf_Sym *FirstSym =
+ cantFail(Obj.template getEntry<Elf_Sym>(*SymTab, 0));
std::string SymbolName =
- getFullSymbolName(Sym, *StrTableOrErr, SymTab->sh_type == SHT_DYNSYM);
- return std::make_pair(Sym, SymbolName);
+ getFullSymbolName(*Sym, Sym - FirstSym, getShndxTable(SymTab),
+ *StrTableOrErr, SymTab->sh_type == SHT_DYNSYM);
+ return RelSymbol<ELFT>(Sym, SymbolName);
+}
+
+template <typename ELFT>
+ArrayRef<typename ELFT::Word>
+ELFDumper<ELFT>::getShndxTable(const Elf_Shdr *Symtab) const {
+ if (Symtab) {
+ auto It = ShndxTables.find(Symtab);
+ if (It != ShndxTables.end())
+ return It->second;
+ }
+ return {};
}
static std::string maybeDemangle(StringRef Name) {
@@ -1139,19 +821,17 @@ static std::string maybeDemangle(StringRef Name) {
template <typename ELFT>
std::string ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
auto Warn = [&](Error E) -> std::string {
- this->reportUniqueWarning(
- createError("unable to read the name of symbol with index " +
- Twine(Index) + ": " + toString(std::move(E))));
+ reportUniqueWarning("unable to read the name of symbol with index " +
+ Twine(Index) + ": " + toString(std::move(E)));
return "<?>";
};
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
Expected<const typename ELFT::Sym *> SymOrErr =
- Obj->getSymbol(DotSymtabSec, Index);
+ Obj.getSymbol(DotSymtabSec, Index);
if (!SymOrErr)
return Warn(SymOrErr.takeError());
- Expected<StringRef> StrTabOrErr = Obj->getStringTableForSymtab(*DotSymtabSec);
+ Expected<StringRef> StrTabOrErr = Obj.getStringTableForSymtab(*DotSymtabSec);
if (!StrTabOrErr)
return Warn(StrTabOrErr.takeError());
@@ -1162,52 +842,25 @@ std::string ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
}
template <typename ELFT>
-Expected<StringRef>
-ELFDumper<ELFT>::getSymbolVersionByIndex(uint32_t SymbolVersionIndex,
- bool &IsDefault) const {
- size_t VersionIndex = SymbolVersionIndex & VERSYM_VERSION;
-
- // Special markers for unversioned symbols.
- if (VersionIndex == VER_NDX_LOCAL || VersionIndex == VER_NDX_GLOBAL) {
- IsDefault = false;
- return "";
- }
-
- // Lookup this symbol in the version table.
- if (Error E = LoadVersionMap())
- return std::move(E);
- if (VersionIndex >= VersionMap.size() || !VersionMap[VersionIndex])
- return createError("SHT_GNU_versym section refers to a version index " +
- Twine(VersionIndex) + " which is missing");
-
- const VersionEntry &Entry = *VersionMap[VersionIndex];
- if (Entry.IsVerDef)
- IsDefault = !(SymbolVersionIndex & VERSYM_HIDDEN);
- else
- IsDefault = false;
- return Entry.Name.c_str();
-}
-
-template <typename ELFT>
-std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
+std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym &Symbol,
+ unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable,
bool IsDynamic) const {
if (!StrTable)
return "<?>";
std::string SymbolName;
- if (Expected<StringRef> NameOrErr = Symbol->getName(*StrTable)) {
+ if (Expected<StringRef> NameOrErr = Symbol.getName(*StrTable)) {
SymbolName = maybeDemangle(*NameOrErr);
} else {
reportUniqueWarning(NameOrErr.takeError());
return "<?>";
}
- if (SymbolName.empty() && Symbol->getType() == ELF::STT_SECTION) {
- Elf_Sym_Range Syms = unwrapOrError(
- ObjF->getFileName(), ObjF->getELFFile()->symbols(DotSymtabSec));
+ if (SymbolName.empty() && Symbol.getType() == ELF::STT_SECTION) {
Expected<unsigned> SectionIndex =
- getSymbolSectionIndex(Symbol, Syms.begin());
+ getSymbolSectionIndex(Symbol, SymIndex, ShndxTable);
if (!SectionIndex) {
reportUniqueWarning(SectionIndex.takeError());
return "<?>";
@@ -1224,7 +877,7 @@ std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
return SymbolName;
bool IsDefault;
- Expected<StringRef> VersionOrErr = getSymbolVersion(&*Symbol, IsDefault);
+ Expected<StringRef> VersionOrErr = getSymbolVersion(Symbol, IsDefault);
if (!VersionOrErr) {
reportUniqueWarning(VersionOrErr.takeError());
return SymbolName + "@<corrupt>";
@@ -1239,59 +892,55 @@ std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
template <typename ELFT>
Expected<unsigned>
-ELFDumper<ELFT>::getSymbolSectionIndex(const Elf_Sym *Symbol,
- const Elf_Sym *FirstSym) const {
- return Symbol->st_shndx == SHN_XINDEX
- ? object::getExtendedSymbolTableIndex<ELFT>(Symbol, FirstSym,
- ShndxTable)
- : Symbol->st_shndx;
-}
-
-// If the Symbol has a reserved st_shndx other than SHN_XINDEX, return a
-// descriptive interpretation of the st_shndx value. Otherwise, return the name
-// of the section with index SectionIndex. This function assumes that if the
-// Symbol has st_shndx == SHN_XINDEX the SectionIndex will be the value derived
-// from the SHT_SYMTAB_SHNDX section.
+ELFDumper<ELFT>::getSymbolSectionIndex(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable) const {
+ unsigned Ndx = Symbol.st_shndx;
+ if (Ndx == SHN_XINDEX)
+ return object::getExtendedSymbolTableIndex<ELFT>(Symbol, SymIndex,
+ ShndxTable);
+ if (Ndx != SHN_UNDEF && Ndx < SHN_LORESERVE)
+ return Ndx;
+
+ auto CreateErr = [&](const Twine &Name, Optional<unsigned> Offset = None) {
+ std::string Desc;
+ if (Offset)
+ Desc = (Name + "+0x" + Twine::utohexstr(*Offset)).str();
+ else
+ Desc = Name.str();
+ return createError(
+ "unable to get section index for symbol with st_shndx = 0x" +
+ Twine::utohexstr(Ndx) + " (" + Desc + ")");
+ };
+
+ if (Ndx >= ELF::SHN_LOPROC && Ndx <= ELF::SHN_HIPROC)
+ return CreateErr("SHN_LOPROC", Ndx - ELF::SHN_LOPROC);
+ if (Ndx >= ELF::SHN_LOOS && Ndx <= ELF::SHN_HIOS)
+ return CreateErr("SHN_LOOS", Ndx - ELF::SHN_LOOS);
+ if (Ndx == ELF::SHN_UNDEF)
+ return CreateErr("SHN_UNDEF");
+ if (Ndx == ELF::SHN_ABS)
+ return CreateErr("SHN_ABS");
+ if (Ndx == ELF::SHN_COMMON)
+ return CreateErr("SHN_COMMON");
+ return CreateErr("SHN_LORESERVE", Ndx - SHN_LORESERVE);
+}
+
template <typename ELFT>
Expected<StringRef>
-ELFDumper<ELFT>::getSymbolSectionName(const Elf_Sym *Symbol,
+ELFDumper<ELFT>::getSymbolSectionName(const Elf_Sym &Symbol,
unsigned SectionIndex) const {
- if (Symbol->isUndefined())
- return "Undefined";
- if (Symbol->isProcessorSpecific())
- return "Processor Specific";
- if (Symbol->isOSSpecific())
- return "Operating System Specific";
- if (Symbol->isAbsolute())
- return "Absolute";
- if (Symbol->isCommon())
- return "Common";
- if (Symbol->isReserved() && Symbol->st_shndx != SHN_XINDEX)
- return "Reserved";
-
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- Expected<const Elf_Shdr *> SecOrErr =
- Obj->getSection(SectionIndex);
+ Expected<const Elf_Shdr *> SecOrErr = Obj.getSection(SectionIndex);
if (!SecOrErr)
return SecOrErr.takeError();
- return Obj->getSectionName(*SecOrErr);
+ return Obj.getSectionName(**SecOrErr);
}
template <class ELFO>
static const typename ELFO::Elf_Shdr *
-findNotEmptySectionByAddress(const ELFO *Obj, StringRef FileName,
+findNotEmptySectionByAddress(const ELFO &Obj, StringRef FileName,
uint64_t Addr) {
- for (const typename ELFO::Elf_Shdr &Shdr : cantFail(Obj->sections()))
- if (Shdr.sh_addr == Addr && Shdr.sh_size > 0)
- return &Shdr;
- return nullptr;
-}
-
-template <class ELFO>
-static const typename ELFO::Elf_Shdr *
-findSectionByName(const ELFO &Obj, StringRef FileName, StringRef Name) {
for (const typename ELFO::Elf_Shdr &Shdr : cantFail(Obj.sections()))
- if (Name == unwrapOrError(FileName, Obj.getSectionName(&Shdr)))
+ if (Shdr.sh_addr == Addr && Shdr.sh_size > 0)
return &Shdr;
return nullptr;
}
@@ -1337,11 +986,6 @@ static const EnumEntry<unsigned> ElfOSABI[] = {
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
};
-static const EnumEntry<unsigned> SymVersionFlags[] = {
- {"Base", "BASE", VER_FLG_BASE},
- {"Weak", "WEAK", VER_FLG_WEAK},
- {"Info", "INFO", VER_FLG_INFO}};
-
static const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
{"AMDGPU_PAL", "AMDGPU - PAL", ELF::ELFOSABI_AMDGPU_PAL},
@@ -1671,9 +1315,8 @@ static std::string getGNUFlags(unsigned EMachine, uint64_t Flags) {
return Str;
}
-static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
- // Check potentially overlapped processor-specific
- // program header type.
+static StringRef segmentTypeToString(unsigned Arch, unsigned Type) {
+ // Check potentially overlapped processor-specific program header type.
switch (Arch) {
case ELF::EM_ARM:
switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX); }
@@ -1682,25 +1325,25 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
case ELF::EM_MIPS_RS3_LE:
switch (Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_REGINFO);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_RTPROC);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_OPTIONS);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_RTPROC);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_OPTIONS);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS);
}
break;
}
switch (Type) {
- LLVM_READOBJ_ENUM_CASE(ELF, PT_NULL );
- LLVM_READOBJ_ENUM_CASE(ELF, PT_LOAD );
- LLVM_READOBJ_ENUM_CASE(ELF, PT_DYNAMIC);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_INTERP );
- LLVM_READOBJ_ENUM_CASE(ELF, PT_NOTE );
- LLVM_READOBJ_ENUM_CASE(ELF, PT_SHLIB );
- LLVM_READOBJ_ENUM_CASE(ELF, PT_PHDR );
- LLVM_READOBJ_ENUM_CASE(ELF, PT_TLS );
-
- LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_EH_FRAME);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_SUNW_UNWIND);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_NULL);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_LOAD);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_DYNAMIC);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_INTERP);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_NOTE);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_SHLIB);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_PHDR);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_TLS);
+
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_EH_FRAME);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_SUNW_UNWIND);
LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_STACK);
LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_RELRO);
@@ -1709,50 +1352,27 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE);
LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED);
LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA);
-
default:
return "";
}
}
-static std::string getElfPtType(unsigned Arch, unsigned Type) {
- switch (Type) {
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_NULL)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_LOAD)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_DYNAMIC)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_INTERP)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_NOTE)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_SHLIB)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_PHDR)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_TLS)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_GNU_EH_FRAME)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_SUNW_UNWIND)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_GNU_STACK)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_GNU_RELRO)
- LLVM_READOBJ_PHDR_ENUM(ELF, PT_GNU_PROPERTY)
- default:
- // All machine specific PT_* types
- switch (Arch) {
- case ELF::EM_ARM:
- if (Type == ELF::PT_ARM_EXIDX)
- return "EXIDX";
- break;
- case ELF::EM_MIPS:
- case ELF::EM_MIPS_RS3_LE:
- switch (Type) {
- case PT_MIPS_REGINFO:
- return "REGINFO";
- case PT_MIPS_RTPROC:
- return "RTPROC";
- case PT_MIPS_OPTIONS:
- return "OPTIONS";
- case PT_MIPS_ABIFLAGS:
- return "ABIFLAGS";
- }
- break;
- }
- }
- return std::string("<unknown>: ") + to_string(format_hex(Type, 1));
+static std::string getGNUPtType(unsigned Arch, unsigned Type) {
+ StringRef Seg = segmentTypeToString(Arch, Type);
+ if (Seg.empty())
+ return std::string("<unknown>: ") + to_string(format_hex(Type, 1));
+
+ // E.g. "PT_ARM_EXIDX" -> "EXIDX".
+ if (Seg.startswith("PT_ARM_"))
+ return Seg.drop_front(7).str();
+
+ // E.g. "PT_MIPS_REGINFO" -> "REGINFO".
+ if (Seg.startswith("PT_MIPS_"))
+ return Seg.drop_front(8).str();
+
+ // E.g. "PT_LOAD" -> "LOAD".
+ assert(Seg.startswith("PT_"));
+ return Seg.drop_front(3).str();
}
static const EnumEntry<unsigned> ElfSegmentFlags[] = {
@@ -1827,14 +1447,17 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_TURKS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX601),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX602),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX700),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX701),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX702),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX703),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX704),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX705),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX801),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX802),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX803),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX805),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX810),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX900),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
@@ -1842,10 +1465,14 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX908),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
};
@@ -1871,6 +1498,10 @@ static const EnumEntry<unsigned> ElfMipsSymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_MICROMIPS)
};
+static const EnumEntry<unsigned> ElfAArch64SymOtherFlags[] = {
+ LLVM_READOBJ_ENUM_ENT(ELF, STO_AARCH64_VARIANT_PCS)
+};
+
static const EnumEntry<unsigned> ElfMips16SymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_OPTIONAL),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PLT),
@@ -1898,10 +1529,10 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
template <typename ELFT>
std::pair<const typename ELFT::Phdr *, const typename ELFT::Shdr *>
-ELFDumper<ELFT>::findDynamic(const ELFFile<ELFT> *Obj) {
+ELFDumper<ELFT>::findDynamic() {
// Try to locate the PT_DYNAMIC header.
const Elf_Phdr *DynamicPhdr = nullptr;
- if (Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers()) {
+ if (Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj.program_headers()) {
for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
if (Phdr.p_type != ELF::PT_DYNAMIC)
continue;
@@ -1909,85 +1540,89 @@ ELFDumper<ELFT>::findDynamic(const ELFFile<ELFT> *Obj) {
break;
}
} else {
- this->reportUniqueWarning(createError(
+ reportUniqueWarning(
"unable to read program headers to locate the PT_DYNAMIC segment: " +
- toString(PhdrsOrErr.takeError())));
+ toString(PhdrsOrErr.takeError()));
}
// Try to locate the .dynamic section in the sections header table.
const Elf_Shdr *DynamicSec = nullptr;
- for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
+ for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
if (Sec.sh_type != ELF::SHT_DYNAMIC)
continue;
DynamicSec = &Sec;
break;
}
- if (DynamicPhdr && DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
- ObjF->getMemoryBufferRef().getBufferSize()) {
- reportWarning(
- createError(
- "PT_DYNAMIC segment offset + size exceeds the size of the file"),
- ObjF->getFileName());
+ if (DynamicPhdr && ((DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
+ ObjF.getMemoryBufferRef().getBufferSize()) ||
+ (DynamicPhdr->p_offset + DynamicPhdr->p_filesz <
+ DynamicPhdr->p_offset))) {
+ reportUniqueWarning(
+ "PT_DYNAMIC segment offset (0x" +
+ Twine::utohexstr(DynamicPhdr->p_offset) + ") + file size (0x" +
+ Twine::utohexstr(DynamicPhdr->p_filesz) +
+ ") exceeds the size of the file (0x" +
+ Twine::utohexstr(ObjF.getMemoryBufferRef().getBufferSize()) + ")");
// Don't use the broken dynamic header.
DynamicPhdr = nullptr;
}
if (DynamicPhdr && DynamicSec) {
- StringRef Name =
- unwrapOrError(ObjF->getFileName(), Obj->getSectionName(DynamicSec));
if (DynamicSec->sh_addr + DynamicSec->sh_size >
DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz ||
DynamicSec->sh_addr < DynamicPhdr->p_vaddr)
- reportWarning(createError("The SHT_DYNAMIC section '" + Name +
- "' is not contained within the "
- "PT_DYNAMIC segment"),
- ObjF->getFileName());
+ reportUniqueWarning(describe(*DynamicSec) +
+ " is not contained within the "
+ "PT_DYNAMIC segment");
if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr)
- reportWarning(createError("The SHT_DYNAMIC section '" + Name +
- "' is not at the start of "
- "PT_DYNAMIC segment"),
- ObjF->getFileName());
+ reportUniqueWarning(describe(*DynamicSec) + " is not at the start of "
+ "PT_DYNAMIC segment");
}
return std::make_pair(DynamicPhdr, DynamicSec);
}
template <typename ELFT>
-void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
+void ELFDumper<ELFT>::loadDynamicTable() {
const Elf_Phdr *DynamicPhdr;
const Elf_Shdr *DynamicSec;
- std::tie(DynamicPhdr, DynamicSec) = findDynamic(Obj);
+ std::tie(DynamicPhdr, DynamicSec) = findDynamic();
if (!DynamicPhdr && !DynamicSec)
return;
- DynRegionInfo FromPhdr(ObjF->getFileName());
+ DynRegionInfo FromPhdr(ObjF, *this);
bool IsPhdrTableValid = false;
if (DynamicPhdr) {
- FromPhdr = createDRIFrom(DynamicPhdr, sizeof(Elf_Dyn));
+ // Use cantFail(), because p_offset/p_filesz fields of a PT_DYNAMIC are
+ // validated in findDynamic() and so createDRI() is not expected to fail.
+ FromPhdr = cantFail(createDRI(DynamicPhdr->p_offset, DynamicPhdr->p_filesz,
+ sizeof(Elf_Dyn)));
FromPhdr.SizePrintName = "PT_DYNAMIC size";
FromPhdr.EntSizePrintName = "";
-
- IsPhdrTableValid = !FromPhdr.getAsArrayRef<Elf_Dyn>().empty();
+ IsPhdrTableValid = !FromPhdr.template getAsArrayRef<Elf_Dyn>().empty();
}
// Locate the dynamic table described in a section header.
// Ignore sh_entsize and use the expected value for entry size explicitly.
// This allows us to dump dynamic sections with a broken sh_entsize
// field.
- DynRegionInfo FromSec(ObjF->getFileName());
+ DynRegionInfo FromSec(ObjF, *this);
bool IsSecTableValid = false;
if (DynamicSec) {
- FromSec =
- checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset,
- DynamicSec->sh_size, sizeof(Elf_Dyn), ObjF->getFileName()});
- FromSec.Context = ("section with index " +
- Twine(DynamicSec - &cantFail(Obj->sections()).front()))
- .str();
- FromSec.EntSizePrintName = "";
-
- IsSecTableValid = !FromSec.getAsArrayRef<Elf_Dyn>().empty();
+ Expected<DynRegionInfo> RegOrErr =
+ createDRI(DynamicSec->sh_offset, DynamicSec->sh_size, sizeof(Elf_Dyn));
+ if (RegOrErr) {
+ FromSec = *RegOrErr;
+ FromSec.Context = describe(*DynamicSec);
+ FromSec.EntSizePrintName = "";
+ IsSecTableValid = !FromSec.template getAsArrayRef<Elf_Dyn>().empty();
+ } else {
+ reportUniqueWarning("unable to read the dynamic table from " +
+ describe(*DynamicSec) + ": " +
+ toString(RegOrErr.takeError()));
+ }
}
// When we only have information from one of the SHT_DYNAMIC section header or
@@ -1995,10 +1630,9 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
if (!DynamicPhdr || !DynamicSec) {
if ((DynamicPhdr && IsPhdrTableValid) || (DynamicSec && IsSecTableValid)) {
DynamicTable = DynamicPhdr ? FromPhdr : FromSec;
- parseDynamicTable(Obj);
+ parseDynamicTable();
} else {
- reportWarning(createError("no valid dynamic table was found"),
- ObjF->getFileName());
+ reportUniqueWarning("no valid dynamic table was found");
}
return;
}
@@ -2008,58 +1642,43 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
// verify that.
if (FromPhdr.Addr != FromSec.Addr)
- reportWarning(createError("SHT_DYNAMIC section header and PT_DYNAMIC "
- "program header disagree about "
- "the location of the dynamic table"),
- ObjF->getFileName());
+ reportUniqueWarning("SHT_DYNAMIC section header and PT_DYNAMIC "
+ "program header disagree about "
+ "the location of the dynamic table");
if (!IsPhdrTableValid && !IsSecTableValid) {
- reportWarning(createError("no valid dynamic table was found"),
- ObjF->getFileName());
+ reportUniqueWarning("no valid dynamic table was found");
return;
}
- // Information in the PT_DYNAMIC program header has priority over the information
- // in a section header.
+ // Information in the PT_DYNAMIC program header has priority over the
+ // information in a section header.
if (IsPhdrTableValid) {
if (!IsSecTableValid)
- reportWarning(
- createError(
- "SHT_DYNAMIC dynamic table is invalid: PT_DYNAMIC will be used"),
- ObjF->getFileName());
+ reportUniqueWarning(
+ "SHT_DYNAMIC dynamic table is invalid: PT_DYNAMIC will be used");
DynamicTable = FromPhdr;
} else {
- reportWarning(
- createError(
- "PT_DYNAMIC dynamic table is invalid: SHT_DYNAMIC will be used"),
- ObjF->getFileName());
+ reportUniqueWarning(
+ "PT_DYNAMIC dynamic table is invalid: SHT_DYNAMIC will be used");
DynamicTable = FromSec;
}
- parseDynamicTable(Obj);
+ parseDynamicTable();
}
template <typename ELFT>
-ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
+ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> &O,
ScopedPrinter &Writer)
- : ObjDumper(Writer), ObjF(ObjF), DynRelRegion(ObjF->getFileName()),
- DynRelaRegion(ObjF->getFileName()), DynRelrRegion(ObjF->getFileName()),
- DynPLTRelRegion(ObjF->getFileName()), DynamicTable(ObjF->getFileName()) {
- // Dumper reports all non-critical errors as warnings.
- // It does not print the same warning more than once.
- WarningHandler = [this](const Twine &Msg) {
- if (Warnings.insert(Msg.str()).second)
- reportWarning(createError(Msg), this->ObjF->getFileName());
- return Error::success();
- };
-
- if (opts::Output == opts::GNU)
- ELFDumperStyle.reset(new GNUStyle<ELFT>(Writer, this));
- else
- ELFDumperStyle.reset(new LLVMStyle<ELFT>(Writer, this));
+ : ObjDumper(Writer, O.getFileName()), ObjF(O), Obj(O.getELFFile()),
+ FileName(O.getFileName()), DynRelRegion(O, *this),
+ DynRelaRegion(O, *this), DynRelrRegion(O, *this),
+ DynPLTRelRegion(O, *this), DynSymTabShndxRegion(O, *this),
+ DynamicTable(O, *this) {
+ if (!O.IsContentValid())
+ return;
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- typename ELFT::ShdrRange Sections = cantFail(Obj->sections());
+ typename ELFT::ShdrRange Sections = cantFail(Obj.sections());
for (const Elf_Shdr &Sec : Sections) {
switch (Sec.sh_type) {
case ELF::SHT_SYMTAB:
@@ -2067,24 +1686,51 @@ ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
DotSymtabSec = &Sec;
break;
case ELF::SHT_DYNSYM:
+ if (!DotDynsymSec)
+ DotDynsymSec = &Sec;
+
if (!DynSymRegion) {
- DynSymRegion = createDRIFrom(&Sec);
- DynSymRegion->Context =
- ("section with index " + Twine(&Sec - &Sections.front())).str();
- // This is only used (if Elf_Shdr present)for naming section in GNU
- // style
- DynSymtabName =
- unwrapOrError(ObjF->getFileName(), Obj->getSectionName(&Sec));
-
- if (Expected<StringRef> E = Obj->getStringTableForSymtab(Sec))
- DynamicStringTable = *E;
- else
- reportWarning(E.takeError(), ObjF->getFileName());
+ Expected<DynRegionInfo> RegOrErr =
+ createDRI(Sec.sh_offset, Sec.sh_size, Sec.sh_entsize);
+ if (RegOrErr) {
+ DynSymRegion = *RegOrErr;
+ DynSymRegion->Context = describe(Sec);
+
+ if (Expected<StringRef> E = Obj.getStringTableForSymtab(Sec))
+ DynamicStringTable = *E;
+ else
+ reportUniqueWarning("unable to get the string table for the " +
+ describe(Sec) + ": " + toString(E.takeError()));
+ } else {
+ reportUniqueWarning("unable to read dynamic symbols from " +
+ describe(Sec) + ": " +
+ toString(RegOrErr.takeError()));
+ }
}
break;
- case ELF::SHT_SYMTAB_SHNDX:
- ShndxTable = unwrapOrError(ObjF->getFileName(), Obj->getSHNDXTable(Sec));
+ case ELF::SHT_SYMTAB_SHNDX: {
+ uint32_t SymtabNdx = Sec.sh_link;
+ if (SymtabNdx >= Sections.size()) {
+ reportUniqueWarning(
+ "unable to get the associated symbol table for " + describe(Sec) +
+ ": sh_link (" + Twine(SymtabNdx) +
+ ") is greater than or equal to the total number of sections (" +
+ Twine(Sections.size()) + ")");
+ continue;
+ }
+
+ if (Expected<ArrayRef<Elf_Word>> ShndxTableOrErr =
+ Obj.getSHNDXTable(Sec)) {
+ if (!ShndxTables.insert({&Sections[SymtabNdx], *ShndxTableOrErr})
+ .second)
+ reportUniqueWarning(
+ "multiple SHT_SYMTAB_SHNDX sections are linked to " +
+ describe(Sec));
+ } else {
+ reportUniqueWarning(ShndxTableOrErr.takeError());
+ }
break;
+ }
case ELF::SHT_GNU_versym:
if (!SymbolVersionSection)
SymbolVersionSection = &Sec;
@@ -2108,25 +1754,24 @@ ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
}
}
- loadDynamicTable(Obj);
+ loadDynamicTable();
}
-template <typename ELFT>
-void ELFDumper<ELFT>::parseDynamicTable(const ELFFile<ELFT> *Obj) {
+template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * {
- auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr);
+ auto MappedAddrOrError = Obj.toMappedAddr(VAddr, [&](const Twine &Msg) {
+ this->reportUniqueWarning(Msg);
+ return Error::success();
+ });
if (!MappedAddrOrError) {
- Error Err =
- createError("Unable to parse DT_" + Obj->getDynamicTagAsString(Tag) +
- ": " + llvm::toString(MappedAddrOrError.takeError()));
-
- reportWarning(std::move(Err), ObjF->getFileName());
+ this->reportUniqueWarning("unable to parse DT_" +
+ Obj.getDynamicTagAsString(Tag) + ": " +
+ llvm::toString(MappedAddrOrError.takeError()));
return nullptr;
}
return MappedAddrOrError.get();
};
- uint64_t SONameOffset = 0;
const char *StringTableBegin = nullptr;
uint64_t StringTableSize = 0;
Optional<DynRegionInfo> DynSymFromTable;
@@ -2151,7 +1796,7 @@ void ELFDumper<ELFT>::parseDynamicTable(const ELFFile<ELFT> *Obj) {
// If we can't map the DT_SYMTAB value to an address (e.g. when there are
// no program headers), we ignore its value.
if (const uint8_t *VA = toMappedAddr(Dyn.getTag(), Dyn.getPtr())) {
- DynSymFromTable.emplace(ObjF->getFileName());
+ DynSymFromTable.emplace(ObjF, *this);
DynSymFromTable->Addr = VA;
DynSymFromTable->EntSize = sizeof(Elf_Sym);
DynSymFromTable->EntSizePrintName = "";
@@ -2161,11 +1806,10 @@ void ELFDumper<ELFT>::parseDynamicTable(const ELFFile<ELFT> *Obj) {
case ELF::DT_SYMENT: {
uint64_t Val = Dyn.getVal();
if (Val != sizeof(Elf_Sym))
- reportWarning(createError("DT_SYMENT value of 0x" +
+ this->reportUniqueWarning("DT_SYMENT value of 0x" +
Twine::utohexstr(Val) +
" is not the size of a symbol (0x" +
- Twine::utohexstr(sizeof(Elf_Sym)) + ")"),
- ObjF->getFileName());
+ Twine::utohexstr(sizeof(Elf_Sym)) + ")");
break;
}
case ELF::DT_RELA:
@@ -2217,10 +1861,9 @@ void ELFDumper<ELFT>::parseDynamicTable(const ELFFile<ELFT> *Obj) {
else if (Dyn.getVal() == DT_RELA)
DynPLTRelRegion.EntSize = sizeof(Elf_Rela);
else
- reportError(createError(Twine("unknown DT_PLTREL value of ") +
- Twine((uint64_t)Dyn.getVal())),
- ObjF->getFileName());
- DynPLTRelRegion.EntSizePrintName = "";
+ reportUniqueWarning(Twine("unknown DT_PLTREL value of ") +
+ Twine((uint64_t)Dyn.getVal()));
+ DynPLTRelRegion.EntSizePrintName = "PLTREL entry size";
break;
case ELF::DT_JMPREL:
DynPLTRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
@@ -2229,24 +1872,26 @@ void ELFDumper<ELFT>::parseDynamicTable(const ELFFile<ELFT> *Obj) {
DynPLTRelRegion.Size = Dyn.getVal();
DynPLTRelRegion.SizePrintName = "DT_PLTRELSZ value";
break;
+ case ELF::DT_SYMTAB_SHNDX:
+ DynSymTabShndxRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
+ DynSymTabShndxRegion.EntSize = sizeof(Elf_Word);
+ break;
}
}
if (StringTableBegin) {
- const uint64_t FileSize = ObjF->getELFFile()->getBufSize();
- const uint64_t Offset =
- (const uint8_t *)StringTableBegin - ObjF->getELFFile()->base();
+ const uint64_t FileSize = Obj.getBufSize();
+ const uint64_t Offset = (const uint8_t *)StringTableBegin - Obj.base();
if (StringTableSize > FileSize - Offset)
- reportUniqueWarning(createError(
+ reportUniqueWarning(
"the dynamic string table at 0x" + Twine::utohexstr(Offset) +
" goes past the end of the file (0x" + Twine::utohexstr(FileSize) +
- ") with DT_STRSZ = 0x" + Twine::utohexstr(StringTableSize)));
+ ") with DT_STRSZ = 0x" + Twine::utohexstr(StringTableSize));
else
DynamicStringTable = StringRef(StringTableBegin, StringTableSize);
}
- SOName = getDynamicString(SONameOffset);
-
+ const bool IsHashTableSupported = getHashTableEntSize() == 4;
if (DynSymRegion) {
// Often we find the information about the dynamic symbol table
// location in the SHT_DYNSYM section header. However, the value in
@@ -2262,16 +1907,15 @@ void ELFDumper<ELFT>::parseDynamicTable(const ELFFile<ELFT> *Obj) {
// equal nchain". Check to see if the DT_HASH hash table nchain value
// conflicts with the number of symbols in the dynamic symbol table
// according to the section header.
- if (HashTable) {
+ if (HashTable && IsHashTableSupported) {
if (DynSymRegion->EntSize == 0)
- reportUniqueWarning(
- createError("SHT_DYNSYM section has sh_entsize == 0"));
+ reportUniqueWarning("SHT_DYNSYM section has sh_entsize == 0");
else if (HashTable->nchain != DynSymRegion->Size / DynSymRegion->EntSize)
- reportUniqueWarning(createError(
+ reportUniqueWarning(
"hash table nchain (" + Twine(HashTable->nchain) +
") differs from symbol count derived from SHT_DYNSYM section "
"header (" +
- Twine(DynSymRegion->Size / DynSymRegion->EntSize) + ")"));
+ Twine(DynSymRegion->Size / DynSymRegion->EntSize) + ")");
}
}
@@ -2290,95 +1934,31 @@ void ELFDumper<ELFT>::parseDynamicTable(const ELFFile<ELFT> *Obj) {
// Derive the dynamic symbol table size from the DT_HASH hash table, if
// present.
- if (HashTable && DynSymRegion)
- DynSymRegion->Size = HashTable->nchain * DynSymRegion->EntSize;
-}
-
-template <typename ELFT>
-typename ELFDumper<ELFT>::Elf_Rel_Range ELFDumper<ELFT>::dyn_rels() const {
- return DynRelRegion.getAsArrayRef<Elf_Rel>();
-}
-
-template <typename ELFT>
-typename ELFDumper<ELFT>::Elf_Rela_Range ELFDumper<ELFT>::dyn_relas() const {
- return DynRelaRegion.getAsArrayRef<Elf_Rela>();
-}
-
-template <typename ELFT>
-typename ELFDumper<ELFT>::Elf_Relr_Range ELFDumper<ELFT>::dyn_relrs() const {
- return DynRelrRegion.getAsArrayRef<Elf_Relr>();
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printFileHeaders() {
- ELFDumperStyle->printFileHeaders(ObjF->getELFFile());
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printSectionHeaders() {
- ELFDumperStyle->printSectionHeaders(ObjF->getELFFile());
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printRelocations() {
- ELFDumperStyle->printRelocations(ObjF->getELFFile());
-}
-
-template <class ELFT>
-void ELFDumper<ELFT>::printProgramHeaders(
- bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) {
- ELFDumperStyle->printProgramHeaders(ObjF->getELFFile(), PrintProgramHeaders,
- PrintSectionMapping);
+ if (HashTable && IsHashTableSupported && DynSymRegion) {
+ const uint64_t FileSize = Obj.getBufSize();
+ const uint64_t DerivedSize =
+ (uint64_t)HashTable->nchain * DynSymRegion->EntSize;
+ const uint64_t Offset = (const uint8_t *)DynSymRegion->Addr - Obj.base();
+ if (DerivedSize > FileSize - Offset)
+ reportUniqueWarning(
+ "the size (0x" + Twine::utohexstr(DerivedSize) +
+ ") of the dynamic symbol table at 0x" + Twine::utohexstr(Offset) +
+ ", derived from the hash table, goes past the end of the file (0x" +
+ Twine::utohexstr(FileSize) + ") and will be ignored");
+ else
+ DynSymRegion->Size = HashTable->nchain * DynSymRegion->EntSize;
+ }
}
template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
// Dump version symbol section.
- ELFDumperStyle->printVersionSymbolSection(ObjF->getELFFile(),
- SymbolVersionSection);
+ printVersionSymbolSection(SymbolVersionSection);
// Dump version definition section.
- ELFDumperStyle->printVersionDefinitionSection(ObjF->getELFFile(),
- SymbolVersionDefSection);
+ printVersionDefinitionSection(SymbolVersionDefSection);
// Dump version dependency section.
- ELFDumperStyle->printVersionDependencySection(ObjF->getELFFile(),
- SymbolVersionNeedSection);
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printDependentLibs() {
- ELFDumperStyle->printDependentLibs(ObjF->getELFFile());
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printDynamicRelocations() {
- ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile());
-}
-
-template <class ELFT>
-void ELFDumper<ELFT>::printSymbols(bool PrintSymbols,
- bool PrintDynamicSymbols) {
- ELFDumperStyle->printSymbols(ObjF->getELFFile(), PrintSymbols,
- PrintDynamicSymbols);
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printHashSymbols() {
- ELFDumperStyle->printHashSymbols(ObjF->getELFFile());
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printHashHistograms() {
- ELFDumperStyle->printHashHistograms(ObjF->getELFFile());
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printCGProfile() {
- ELFDumperStyle->printCGProfile(ObjF->getELFFile());
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printNotes() {
- ELFDumperStyle->printNotes(ObjF->getELFFile());
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printELFLinkerOptions() {
- ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile());
-}
-
-template <class ELFT> void ELFDumper<ELFT>::printStackSizes() {
- ELFDumperStyle->printStackSizes(ObjF);
+ printVersionDependencySection(SymbolVersionNeedSection);
}
#define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \
@@ -2445,21 +2025,28 @@ static const EnumEntry<unsigned> ElfDynamicDTMipsFlags[] = {
template <typename T, typename TFlag>
void printFlags(T Value, ArrayRef<EnumEntry<TFlag>> Flags, raw_ostream &OS) {
- using FlagEntry = EnumEntry<TFlag>;
- using FlagVector = SmallVector<FlagEntry, 10>;
- FlagVector SetFlags;
-
- for (const auto &Flag : Flags) {
- if (Flag.Value == 0)
- continue;
-
- if ((Value & Flag.Value) == Flag.Value)
+ SmallVector<EnumEntry<TFlag>, 10> SetFlags;
+ for (const EnumEntry<TFlag> &Flag : Flags)
+ if (Flag.Value != 0 && (Value & Flag.Value) == Flag.Value)
SetFlags.push_back(Flag);
- }
- for (const auto &Flag : SetFlags) {
+ for (const EnumEntry<TFlag> &Flag : SetFlags)
OS << Flag.Name << " ";
+}
+
+template <class ELFT>
+const typename ELFT::Shdr *
+ELFDumper<ELFT>::findSectionByName(StringRef Name) const {
+ for (const Elf_Shdr &Shdr : cantFail(Obj.sections())) {
+ if (Expected<StringRef> NameOrErr = Obj.getSectionName(Shdr)) {
+ if (*NameOrErr == Name)
+ return &Shdr;
+ } else {
+ reportUniqueWarning("unable to read the name of " + describe(Shdr) +
+ ": " + toString(NameOrErr.takeError()));
+ }
}
+ return nullptr;
}
template <class ELFT>
@@ -2483,11 +2070,12 @@ std::string ELFDumper<ELFT>::getDynamicEntry(uint64_t Type,
};
// Handle custom printing of architecture specific tags
- switch (ObjF->getELFFile()->getHeader()->e_machine) {
+ switch (Obj.getHeader().e_machine) {
case EM_AARCH64:
switch (Type) {
case DT_AARCH64_BTI_PLT:
case DT_AARCH64_PAC_PLT:
+ case DT_AARCH64_VARIANT_PCS:
return std::to_string(Value);
default:
break;
@@ -2635,19 +2223,19 @@ std::string ELFDumper<ELFT>::getDynamicEntry(uint64_t Type,
template <class ELFT>
StringRef ELFDumper<ELFT>::getDynamicString(uint64_t Value) const {
if (DynamicStringTable.empty() && !DynamicStringTable.data()) {
- reportUniqueWarning(createError("string table was not found"));
+ reportUniqueWarning("string table was not found");
return "<?>";
}
auto WarnAndReturn = [this](const Twine &Msg, uint64_t Offset) {
- reportUniqueWarning(createError("string table at offset 0x" +
- Twine::utohexstr(Offset) + Msg));
+ reportUniqueWarning("string table at offset 0x" + Twine::utohexstr(Offset) +
+ Msg);
return "<?>";
};
- const uint64_t FileSize = ObjF->getELFFile()->getBufSize();
+ const uint64_t FileSize = Obj.getBufSize();
const uint64_t Offset =
- (const uint8_t *)DynamicStringTable.data() - ObjF->getELFFile()->base();
+ (const uint8_t *)DynamicStringTable.data() - Obj.base();
if (DynamicStringTable.size() > FileSize - Offset)
return WarnAndReturn(" with size 0x" +
Twine::utohexstr(DynamicStringTable.size()) +
@@ -2676,25 +2264,18 @@ template <class ELFT> void ELFDumper<ELFT>::printUnwindInfo() {
Ctx.printUnwindInformation();
}
+// The namespace is needed to fix the compilation with GCC older than 7.0+.
namespace {
-
template <> void ELFDumper<ELF32LE>::printUnwindInfo() {
- const ELFFile<ELF32LE> *Obj = ObjF->getELFFile();
- const unsigned Machine = Obj->getHeader()->e_machine;
- if (Machine == EM_ARM) {
- ARM::EHABI::PrinterContext<ELF32LE> Ctx(W, Obj, ObjF->getFileName(),
+ if (Obj.getHeader().e_machine == EM_ARM) {
+ ARM::EHABI::PrinterContext<ELF32LE> Ctx(W, Obj, ObjF.getFileName(),
DotSymtabSec);
Ctx.PrintUnwindInformation();
}
DwarfCFIEH::PrinterContext<ELF32LE> Ctx(W, ObjF);
Ctx.printUnwindInformation();
}
-
-} // end anonymous namespace
-
-template <class ELFT> void ELFDumper<ELFT>::printDynamicTable() {
- ELFDumperStyle->printDynamic(ObjF->getELFFile());
-}
+} // namespace
template <class ELFT> void ELFDumper<ELFT>::printNeededLibraries() {
ListScope D(W, "NeededLibraries");
@@ -2711,47 +2292,60 @@ template <class ELFT> void ELFDumper<ELFT>::printNeededLibraries() {
}
template <class ELFT>
-static Error checkHashTable(const ELFFile<ELFT> *Obj,
+static Error checkHashTable(const ELFDumper<ELFT> &Dumper,
const typename ELFT::Hash *H,
bool *IsHeaderValid = nullptr) {
- auto MakeError = [&](uint64_t Off, const Twine &Msg = "") {
- return createError("the hash table at offset 0x" + Twine::utohexstr(Off) +
+ const ELFFile<ELFT> &Obj = Dumper.getElfObject().getELFFile();
+ const uint64_t SecOffset = (const uint8_t *)H - Obj.base();
+ if (Dumper.getHashTableEntSize() == 8) {
+ auto It = llvm::find_if(ElfMachineType, [&](const EnumEntry<unsigned> &E) {
+ return E.Value == Obj.getHeader().e_machine;
+ });
+ if (IsHeaderValid)
+ *IsHeaderValid = false;
+ return createError("the hash table at 0x" + Twine::utohexstr(SecOffset) +
+ " is not supported: it contains non-standard 8 "
+ "byte entries on " +
+ It->AltName + " platform");
+ }
+
+ auto MakeError = [&](const Twine &Msg = "") {
+ return createError("the hash table at offset 0x" +
+ Twine::utohexstr(SecOffset) +
" goes past the end of the file (0x" +
- Twine::utohexstr(Obj->getBufSize()) + ")" + Msg);
+ Twine::utohexstr(Obj.getBufSize()) + ")" + Msg);
};
// Each SHT_HASH section starts from two 32-bit fields: nbucket and nchain.
const unsigned HeaderSize = 2 * sizeof(typename ELFT::Word);
- const uint64_t SecOffset = (const uint8_t *)H - Obj->base();
if (IsHeaderValid)
- *IsHeaderValid = Obj->getBufSize() - SecOffset >= HeaderSize;
+ *IsHeaderValid = Obj.getBufSize() - SecOffset >= HeaderSize;
- if (Obj->getBufSize() - SecOffset < HeaderSize)
- return MakeError(SecOffset);
+ if (Obj.getBufSize() - SecOffset < HeaderSize)
+ return MakeError();
- if (Obj->getBufSize() - SecOffset - HeaderSize <
+ if (Obj.getBufSize() - SecOffset - HeaderSize <
((uint64_t)H->nbucket + H->nchain) * sizeof(typename ELFT::Word))
- return MakeError(SecOffset, ", nbucket = " + Twine(H->nbucket) +
- ", nchain = " + Twine(H->nchain));
+ return MakeError(", nbucket = " + Twine(H->nbucket) +
+ ", nchain = " + Twine(H->nchain));
return Error::success();
}
template <class ELFT>
-static Error checkGNUHashTable(const ELFFile<ELFT> *Obj,
+static Error checkGNUHashTable(const ELFFile<ELFT> &Obj,
const typename ELFT::GnuHash *GnuHashTable,
bool *IsHeaderValid = nullptr) {
const uint8_t *TableData = reinterpret_cast<const uint8_t *>(GnuHashTable);
- assert(TableData >= Obj->base() &&
- TableData < Obj->base() + Obj->getBufSize() &&
+ assert(TableData >= Obj.base() && TableData < Obj.base() + Obj.getBufSize() &&
"GnuHashTable must always point to a location inside the file");
- uint64_t TableOffset = TableData - Obj->base();
+ uint64_t TableOffset = TableData - Obj.base();
if (IsHeaderValid)
- *IsHeaderValid = TableOffset + /*Header size:*/ 16 < Obj->getBufSize();
+ *IsHeaderValid = TableOffset + /*Header size:*/ 16 < Obj.getBufSize();
if (TableOffset + 16 + (uint64_t)GnuHashTable->nbuckets * 4 +
(uint64_t)GnuHashTable->maskwords * sizeof(typename ELFT::Off) >=
- Obj->getBufSize())
+ Obj.getBufSize())
return createError("unable to dump the SHT_GNU_HASH "
"section at 0x" +
Twine::utohexstr(TableOffset) +
@@ -2765,7 +2359,7 @@ template <typename ELFT> void ELFDumper<ELFT>::printHashTable() {
return;
bool IsHeaderValid;
- Error Err = checkHashTable(ObjF->getELFFile(), HashTable, &IsHeaderValid);
+ Error Err = checkHashTable(*this, HashTable, &IsHeaderValid);
if (IsHeaderValid) {
W.printNumber("Num Buckets", HashTable->nbucket);
W.printNumber("Num Chains", HashTable->nchain);
@@ -2788,7 +2382,7 @@ getGnuHashTableChains(Optional<DynRegionInfo> DynSymRegion,
return createError("no dynamic symbol table found");
ArrayRef<typename ELFT::Sym> DynSymTable =
- DynSymRegion->getAsArrayRef<typename ELFT::Sym>();
+ DynSymRegion->template getAsArrayRef<typename ELFT::Sym>();
size_t NumSyms = DynSymTable.size();
if (!NumSyms)
return createError("the dynamic symbol table is empty");
@@ -2805,24 +2399,23 @@ getGnuHashTableChains(Optional<DynRegionInfo> DynSymRegion,
// is irrelevant and we should not report a warning.
ArrayRef<typename ELFT::Word> Buckets = GnuHashTable->buckets();
if (!llvm::all_of(Buckets, [](typename ELFT::Word V) { return V == 0; }))
- return createError("the first hashed symbol index (" +
- Twine(GnuHashTable->symndx) +
- ") is larger than the number of dynamic symbols (" +
- Twine(NumSyms) + ")");
+ return createError(
+ "the first hashed symbol index (" + Twine(GnuHashTable->symndx) +
+ ") is greater than or equal to the number of dynamic symbols (" +
+ Twine(NumSyms) + ")");
// There is no way to represent an array of (dynamic symbols count - symndx)
// length.
return ArrayRef<typename ELFT::Word>();
}
template <typename ELFT>
-void ELFDumper<ELFT>::printGnuHashTable(const object::ObjectFile *Obj) {
+void ELFDumper<ELFT>::printGnuHashTable() {
DictScope D(W, "GnuHashTable");
if (!GnuHashTable)
return;
bool IsHeaderValid;
- Error Err =
- checkGNUHashTable<ELFT>(ObjF->getELFFile(), GnuHashTable, &IsHeaderValid);
+ Error Err = checkGNUHashTable<ELFT>(Obj, GnuHashTable, &IsHeaderValid);
if (IsHeaderValid) {
W.printNumber("Num Buckets", GnuHashTable->nbuckets);
W.printNumber("First Hashed Symbol Index", GnuHashTable->symndx);
@@ -2844,10 +2437,9 @@ void ELFDumper<ELFT>::printGnuHashTable(const object::ObjectFile *Obj) {
Expected<ArrayRef<Elf_Word>> Chains =
getGnuHashTableChains<ELFT>(DynSymRegion, GnuHashTable);
if (!Chains) {
- reportUniqueWarning(
- createError("unable to dump 'Values' for the SHT_GNU_HASH "
- "section: " +
- toString(Chains.takeError())));
+ reportUniqueWarning("unable to dump 'Values' for the SHT_GNU_HASH "
+ "section: " +
+ toString(Chains.takeError()));
return;
}
@@ -2855,32 +2447,32 @@ void ELFDumper<ELFT>::printGnuHashTable(const object::ObjectFile *Obj) {
}
template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
+ StringRef SOName = "<Not found>";
+ if (SONameOffset)
+ SOName = getDynamicString(*SONameOffset);
W.printString("LoadName", SOName);
}
template <class ELFT> void ELFDumper<ELFT>::printArchSpecificInfo() {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- switch (Obj->getHeader()->e_machine) {
+ switch (Obj.getHeader().e_machine) {
case EM_ARM:
case EM_RISCV:
printAttributes();
break;
case EM_MIPS: {
- ELFDumperStyle->printMipsABIFlags(ObjF);
+ printMipsABIFlags();
printMipsOptions();
printMipsReginfo();
-
- MipsGOTParser<ELFT> Parser(Obj, ObjF->getFileName(), dynamic_table(),
- dynamic_symbols());
+ MipsGOTParser<ELFT> Parser(*this);
if (Error E = Parser.findGOT(dynamic_table(), dynamic_symbols()))
- reportError(std::move(E), ObjF->getFileName());
+ reportUniqueWarning(std::move(E));
else if (!Parser.isGotEmpty())
- ELFDumperStyle->printMipsGOT(Parser);
+ printMipsGOT(Parser);
if (Error E = Parser.findPLT(dynamic_table()))
- reportError(std::move(E), ObjF->getFileName());
+ reportUniqueWarning(std::move(E));
else if (!Parser.isPltEmpty())
- ELFDumperStyle->printMipsPLT(Parser);
+ printMipsPLT(Parser);
break;
}
default:
@@ -2888,59 +2480,63 @@ template <class ELFT> void ELFDumper<ELFT>::printArchSpecificInfo() {
}
}
-namespace {
-
template <class ELFT> void ELFDumper<ELFT>::printAttributes() {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- if (!Obj->isLE()) {
+ if (!Obj.isLE()) {
W.startLine() << "Attributes not implemented.\n";
return;
}
- const unsigned Machine = Obj->getHeader()->e_machine;
+ const unsigned Machine = Obj.getHeader().e_machine;
assert((Machine == EM_ARM || Machine == EM_RISCV) &&
"Attributes not implemented.");
DictScope BA(W, "BuildAttributes");
- for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
+ for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES &&
Sec.sh_type != ELF::SHT_RISCV_ATTRIBUTES)
continue;
- ArrayRef<uint8_t> Contents =
- unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(&Sec));
- if (Contents[0] != ELFAttrs::Format_Version) {
- reportWarning(createError(Twine("unrecognised FormatVersion: 0x") +
- Twine::utohexstr(Contents[0])),
- ObjF->getFileName());
+ ArrayRef<uint8_t> Contents;
+ if (Expected<ArrayRef<uint8_t>> ContentOrErr =
+ Obj.getSectionContents(Sec)) {
+ Contents = *ContentOrErr;
+ if (Contents.empty()) {
+ reportUniqueWarning("the " + describe(Sec) + " is empty");
+ continue;
+ }
+ } else {
+ reportUniqueWarning("unable to read the content of the " + describe(Sec) +
+ ": " + toString(ContentOrErr.takeError()));
continue;
}
+
W.printHex("FormatVersion", Contents[0]);
- if (Contents.size() == 1)
- continue;
- // TODO: Delete the redundant FormatVersion check above.
- if (Machine == EM_ARM) {
- if (Error E = ARMAttributeParser(&W).parse(Contents, support::little))
- reportWarning(std::move(E), ObjF->getFileName());
- } else if (Machine == EM_RISCV) {
- if (Error E = RISCVAttributeParser(&W).parse(Contents, support::little))
- reportWarning(std::move(E), ObjF->getFileName());
- }
+ auto ParseAttrubutes = [&]() {
+ if (Machine == EM_ARM)
+ return ARMAttributeParser(&W).parse(Contents, support::little);
+ return RISCVAttributeParser(&W).parse(Contents, support::little);
+ };
+
+ if (Error E = ParseAttrubutes())
+ reportUniqueWarning("unable to dump attributes from the " +
+ describe(Sec) + ": " + toString(std::move(E)));
}
}
+namespace {
+
template <class ELFT> class MipsGOTParser {
public:
- TYPEDEF_ELF_TYPES(ELFT)
- using Entry = typename ELFO::Elf_Addr;
+ LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
+ using Entry = typename ELFT::Addr;
using Entries = ArrayRef<Entry>;
const bool IsStatic;
- const ELFO * const Obj;
+ const ELFFile<ELFT> &Obj;
+ const ELFDumper<ELFT> &Dumper;
- MipsGOTParser(const ELFO *Obj, StringRef FileName, Elf_Dyn_Range DynTable,
- Elf_Sym_Range DynSyms);
+ MipsGOTParser(const ELFDumper<ELFT> &D);
Error findGOT(Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms);
Error findPLT(Elf_Dyn_Range DynTable);
@@ -2967,6 +2563,7 @@ public:
const Elf_Sym *getPltSym(const Entry *E) const;
StringRef getPltStrTable() const { return PltStrTable; }
+ const Elf_Shdr *getPltSymTable() const { return PltSymTable; }
private:
const Elf_Shdr *GotSec;
@@ -2988,12 +2585,11 @@ private:
} // end anonymous namespace
template <class ELFT>
-MipsGOTParser<ELFT>::MipsGOTParser(const ELFO *Obj, StringRef FileName,
- Elf_Dyn_Range DynTable,
- Elf_Sym_Range DynSyms)
- : IsStatic(DynTable.empty()), Obj(Obj), GotSec(nullptr), LocalNum(0),
- GlobalNum(0), PltSec(nullptr), PltRelSec(nullptr), PltSymTable(nullptr),
- FileName(FileName) {}
+MipsGOTParser<ELFT>::MipsGOTParser(const ELFDumper<ELFT> &D)
+ : IsStatic(D.dynamic_table().empty()), Obj(D.getElfObject().getELFFile()),
+ Dumper(D), GotSec(nullptr), LocalNum(0), GlobalNum(0), PltSec(nullptr),
+ PltRelSec(nullptr), PltSymTable(nullptr),
+ FileName(D.getElfObject().getFileName()) {}
template <class ELFT>
Error MipsGOTParser<ELFT>::findGOT(Elf_Dyn_Range DynTable,
@@ -3004,12 +2600,12 @@ Error MipsGOTParser<ELFT>::findGOT(Elf_Dyn_Range DynTable,
// Find static GOT secton.
if (IsStatic) {
- GotSec = findSectionByName(*Obj, FileName, ".got");
+ GotSec = Dumper.findSectionByName(".got");
if (!GotSec)
return Error::success();
ArrayRef<uint8_t> Content =
- unwrapOrError(FileName, Obj->getSectionContents(GotSec));
+ unwrapOrError(FileName, Obj.getSectionContents(*GotSec));
GotEntries = Entries(reinterpret_cast<const Entry *>(Content.data()),
Content.size() / sizeof(Entry));
LocalNum = GotEntries.size();
@@ -3059,7 +2655,7 @@ Error MipsGOTParser<ELFT>::findGOT(Elf_Dyn_Range DynTable,
GlobalNum = DynSymTotal - *DtGotSym;
ArrayRef<uint8_t> Content =
- unwrapOrError(FileName, Obj->getSectionContents(GotSec));
+ unwrapOrError(FileName, Obj.getSectionContents(*GotSec));
GotEntries = Entries(reinterpret_cast<const Entry *>(Content.data()),
Content.size() / sizeof(Entry));
GotDynSyms = DynSyms.drop_front(*DtGotSym);
@@ -3103,7 +2699,7 @@ Error MipsGOTParser<ELFT>::findPLT(Elf_Dyn_Range DynTable) {
Twine::utohexstr(*DtJmpRel));
if (Expected<ArrayRef<uint8_t>> PltContentOrErr =
- Obj->getSectionContents(PltSec))
+ Obj.getSectionContents(*PltSec))
PltEntries =
Entries(reinterpret_cast<const Entry *>(PltContentOrErr->data()),
PltContentOrErr->size() / sizeof(Entry));
@@ -3112,25 +2708,20 @@ Error MipsGOTParser<ELFT>::findPLT(Elf_Dyn_Range DynTable) {
toString(PltContentOrErr.takeError()));
if (Expected<const Elf_Shdr *> PltSymTableOrErr =
- Obj->getSection(PltRelSec->sh_link)) {
+ Obj.getSection(PltRelSec->sh_link))
PltSymTable = *PltSymTableOrErr;
- } else {
- unsigned SecNdx = PltRelSec - &cantFail(Obj->sections()).front();
- return createError("unable to get a symbol table linked to the RELPLT "
- "section with index " +
- Twine(SecNdx) + ": " +
+ else
+ return createError("unable to get a symbol table linked to the " +
+ describe(Obj, *PltRelSec) + ": " +
toString(PltSymTableOrErr.takeError()));
- }
if (Expected<StringRef> StrTabOrErr =
- Obj->getStringTableForSymtab(*PltSymTable)) {
+ Obj.getStringTableForSymtab(*PltSymTable))
PltStrTable = *StrTabOrErr;
- } else {
- unsigned SecNdx = PltSymTable - &cantFail(Obj->sections()).front();
- return createError(
- "unable to get a string table for the symbol table with index " +
- Twine(SecNdx) + ": " + toString(StrTabOrErr.takeError()));
- }
+ else
+ return createError("unable to get a string table for the " +
+ describe(Obj, *PltSymTable) + ": " +
+ toString(StrTabOrErr.takeError()));
return Error::success();
}
@@ -3232,13 +2823,13 @@ const typename MipsGOTParser<ELFT>::Elf_Sym *
MipsGOTParser<ELFT>::getPltSym(const Entry *E) const {
int64_t Offset = std::distance(getPltEntries().data(), E);
if (PltRelSec->sh_type == ELF::SHT_REL) {
- Elf_Rel_Range Rels = unwrapOrError(FileName, Obj->rels(PltRelSec));
+ Elf_Rel_Range Rels = unwrapOrError(FileName, Obj.rels(*PltRelSec));
return unwrapOrError(FileName,
- Obj->getRelocationSymbol(&Rels[Offset], PltSymTable));
+ Obj.getRelocationSymbol(Rels[Offset], PltSymTable));
} else {
- Elf_Rela_Range Rels = unwrapOrError(FileName, Obj->relas(PltRelSec));
+ Elf_Rela_Range Rels = unwrapOrError(FileName, Obj.relas(*PltRelSec));
return unwrapOrError(FileName,
- Obj->getRelocationSymbol(&Rels[Offset], PltSymTable));
+ Obj.getRelocationSymbol(Rels[Offset], PltSymTable));
}
}
@@ -3327,84 +2918,143 @@ static void printMipsReginfoData(ScopedPrinter &W,
}
template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const Elf_Shdr *Shdr = findSectionByName(*Obj, ObjF->getFileName(), ".reginfo");
- if (!Shdr) {
+ const Elf_Shdr *RegInfoSec = findSectionByName(".reginfo");
+ if (!RegInfoSec) {
W.startLine() << "There is no .reginfo section in the file.\n";
return;
}
- ArrayRef<uint8_t> Sec =
- unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr));
- if (Sec.size() != sizeof(Elf_Mips_RegInfo<ELFT>)) {
- W.startLine() << "The .reginfo section has a wrong size.\n";
+
+ Expected<ArrayRef<uint8_t>> ContentsOrErr =
+ Obj.getSectionContents(*RegInfoSec);
+ if (!ContentsOrErr) {
+ this->reportUniqueWarning(
+ "unable to read the content of the .reginfo section (" +
+ describe(*RegInfoSec) + "): " + toString(ContentsOrErr.takeError()));
+ return;
+ }
+
+ if (ContentsOrErr->size() < sizeof(Elf_Mips_RegInfo<ELFT>)) {
+ this->reportUniqueWarning("the .reginfo section has an invalid size (0x" +
+ Twine::utohexstr(ContentsOrErr->size()) + ")");
return;
}
DictScope GS(W, "MIPS RegInfo");
- auto *Reginfo = reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(Sec.data());
- printMipsReginfoData(W, *Reginfo);
+ printMipsReginfoData(W, *reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(
+ ContentsOrErr->data()));
+}
+
+template <class ELFT>
+static Expected<const Elf_Mips_Options<ELFT> *>
+readMipsOptions(const uint8_t *SecBegin, ArrayRef<uint8_t> &SecData,
+ bool &IsSupported) {
+ if (SecData.size() < sizeof(Elf_Mips_Options<ELFT>))
+ return createError("the .MIPS.options section has an invalid size (0x" +
+ Twine::utohexstr(SecData.size()) + ")");
+
+ const Elf_Mips_Options<ELFT> *O =
+ reinterpret_cast<const Elf_Mips_Options<ELFT> *>(SecData.data());
+ const uint8_t Size = O->size;
+ if (Size > SecData.size()) {
+ const uint64_t Offset = SecData.data() - SecBegin;
+ const uint64_t SecSize = Offset + SecData.size();
+ return createError("a descriptor of size 0x" + Twine::utohexstr(Size) +
+ " at offset 0x" + Twine::utohexstr(Offset) +
+ " goes past the end of the .MIPS.options "
+ "section of size 0x" +
+ Twine::utohexstr(SecSize));
+ }
+
+ IsSupported = O->kind == ODK_REGINFO;
+ const size_t ExpectedSize =
+ sizeof(Elf_Mips_Options<ELFT>) + sizeof(Elf_Mips_RegInfo<ELFT>);
+
+ if (IsSupported)
+ if (Size < ExpectedSize)
+ return createError(
+ "a .MIPS.options entry of kind " +
+ Twine(getElfMipsOptionsOdkType(O->kind)) +
+ " has an invalid size (0x" + Twine::utohexstr(Size) +
+ "), the expected size is 0x" + Twine::utohexstr(ExpectedSize));
+
+ SecData = SecData.drop_front(Size);
+ return O;
}
template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const Elf_Shdr *Shdr =
- findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.options");
- if (!Shdr) {
+ const Elf_Shdr *MipsOpts = findSectionByName(".MIPS.options");
+ if (!MipsOpts) {
W.startLine() << "There is no .MIPS.options section in the file.\n";
return;
}
DictScope GS(W, "MIPS Options");
- ArrayRef<uint8_t> Sec =
- unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr));
- while (!Sec.empty()) {
- if (Sec.size() < sizeof(Elf_Mips_Options<ELFT>)) {
- W.startLine() << "The .MIPS.options section has a wrong size.\n";
- return;
- }
- auto *O = reinterpret_cast<const Elf_Mips_Options<ELFT> *>(Sec.data());
- DictScope GS(W, getElfMipsOptionsOdkType(O->kind));
- switch (O->kind) {
- case ODK_REGINFO:
- printMipsReginfoData(W, O->getRegInfo());
- break;
- default:
- W.startLine() << "Unsupported MIPS options tag.\n";
+ ArrayRef<uint8_t> Data =
+ unwrapOrError(ObjF.getFileName(), Obj.getSectionContents(*MipsOpts));
+ const uint8_t *const SecBegin = Data.begin();
+ while (!Data.empty()) {
+ bool IsSupported;
+ Expected<const Elf_Mips_Options<ELFT> *> OptsOrErr =
+ readMipsOptions<ELFT>(SecBegin, Data, IsSupported);
+ if (!OptsOrErr) {
+ reportUniqueWarning(OptsOrErr.takeError());
break;
}
- Sec = Sec.slice(O->size);
- }
-}
-template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const Elf_Shdr *StackMapSection = nullptr;
- for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
- StringRef Name =
- unwrapOrError(ObjF->getFileName(), Obj->getSectionName(&Sec));
- if (Name == ".llvm_stackmaps") {
- StackMapSection = &Sec;
- break;
+ unsigned Kind = (*OptsOrErr)->kind;
+ const char *Type = getElfMipsOptionsOdkType(Kind);
+ if (!IsSupported) {
+ W.startLine() << "Unsupported MIPS options tag: " << Type << " (" << Kind
+ << ")\n";
+ continue;
}
+
+ DictScope GS(W, Type);
+ if (Kind == ODK_REGINFO)
+ printMipsReginfoData(W, (*OptsOrErr)->getRegInfo());
+ else
+ llvm_unreachable("unexpected .MIPS.options section descriptor kind");
}
+}
+template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
+ const Elf_Shdr *StackMapSection = findSectionByName(".llvm_stackmaps");
if (!StackMapSection)
return;
- ArrayRef<uint8_t> StackMapContentsArray = unwrapOrError(
- ObjF->getFileName(), Obj->getSectionContents(StackMapSection));
+ auto Warn = [&](Error &&E) {
+ this->reportUniqueWarning("unable to read the stack map from " +
+ describe(*StackMapSection) + ": " +
+ toString(std::move(E)));
+ };
- prettyPrintStackMap(
- W, StackMapParser<ELFT::TargetEndianness>(StackMapContentsArray));
-}
+ Expected<ArrayRef<uint8_t>> ContentOrErr =
+ Obj.getSectionContents(*StackMapSection);
+ if (!ContentOrErr) {
+ Warn(ContentOrErr.takeError());
+ return;
+ }
-template <class ELFT> void ELFDumper<ELFT>::printGroupSections() {
- ELFDumperStyle->printGroupSections(ObjF->getELFFile());
+ if (Error E = StackMapParser<ELFT::TargetEndianness>::validateHeader(
+ *ContentOrErr)) {
+ Warn(std::move(E));
+ return;
+ }
+
+ prettyPrintStackMap(W, StackMapParser<ELFT::TargetEndianness>(*ContentOrErr));
}
-template <class ELFT> void ELFDumper<ELFT>::printAddrsig() {
- ELFDumperStyle->printAddrsig(ObjF->getELFFile());
+template <class ELFT>
+void ELFDumper<ELFT>::printReloc(const Relocation<ELFT> &R, unsigned RelIndex,
+ const Elf_Shdr &Sec, const Elf_Shdr *SymTab) {
+ Expected<RelSymbol<ELFT>> Target = getRelocationTarget(R, SymTab);
+ if (!Target)
+ reportUniqueWarning("unable to print relocation " + Twine(RelIndex) +
+ " in " + describe(Sec) + ": " +
+ toString(Target.takeError()));
+ else
+ printRelRelaReloc(R, *Target);
}
static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
@@ -3417,155 +3067,214 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
}
template <class ELFT>
-static std::string getSectionHeadersNumString(const ELFFile<ELFT> *Obj,
+static std::string getSectionHeadersNumString(const ELFFile<ELFT> &Obj,
StringRef FileName) {
- const typename ELFT::Ehdr *ElfHeader = Obj->getHeader();
- if (ElfHeader->e_shnum != 0)
- return to_string(ElfHeader->e_shnum);
+ const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
+ if (ElfHeader.e_shnum != 0)
+ return to_string(ElfHeader.e_shnum);
+
+ Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
+ if (!ArrOrErr) {
+ // In this case we can ignore an error, because we have already reported a
+ // warning about the broken section header table earlier.
+ consumeError(ArrOrErr.takeError());
+ return "<?>";
+ }
- ArrayRef<typename ELFT::Shdr> Arr = cantFail(Obj->sections());
- if (Arr.empty())
+ if (ArrOrErr->empty())
return "0";
- return "0 (" + to_string(Arr[0].sh_size) + ")";
+ return "0 (" + to_string((*ArrOrErr)[0].sh_size) + ")";
}
template <class ELFT>
-static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> *Obj,
+static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> &Obj,
StringRef FileName) {
- const typename ELFT::Ehdr *ElfHeader = Obj->getHeader();
- if (ElfHeader->e_shstrndx != SHN_XINDEX)
- return to_string(ElfHeader->e_shstrndx);
+ const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
+ if (ElfHeader.e_shstrndx != SHN_XINDEX)
+ return to_string(ElfHeader.e_shstrndx);
+
+ Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
+ if (!ArrOrErr) {
+ // In this case we can ignore an error, because we have already reported a
+ // warning about the broken section header table earlier.
+ consumeError(ArrOrErr.takeError());
+ return "<?>";
+ }
- ArrayRef<typename ELFT::Shdr> Arr = cantFail(Obj->sections());
- if (Arr.empty())
+ if (ArrOrErr->empty())
return "65535 (corrupt: out of range)";
- return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) +
- ")";
+ return to_string(ElfHeader.e_shstrndx) + " (" +
+ to_string((*ArrOrErr)[0].sh_link) + ")";
+}
+
+static const EnumEntry<unsigned> *getObjectFileEnumEntry(unsigned Type) {
+ auto It = llvm::find_if(ElfObjectFileType, [&](const EnumEntry<unsigned> &E) {
+ return E.Value == Type;
+ });
+ if (It != makeArrayRef(ElfObjectFileType).end())
+ return It;
+ return nullptr;
}
-template <class ELFT> void GNUStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
- const Elf_Ehdr *e = Obj->getHeader();
+template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
+ const Elf_Ehdr &e = this->Obj.getHeader();
OS << "ELF Header:\n";
OS << " Magic: ";
std::string Str;
for (int i = 0; i < ELF::EI_NIDENT; i++)
- OS << format(" %02x", static_cast<int>(e->e_ident[i]));
+ OS << format(" %02x", static_cast<int>(e.e_ident[i]));
OS << "\n";
- Str = printEnum(e->e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
+ Str = printEnum(e.e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
printFields(OS, "Class:", Str);
- Str = printEnum(e->e_ident[ELF::EI_DATA], makeArrayRef(ElfDataEncoding));
+ Str = printEnum(e.e_ident[ELF::EI_DATA], makeArrayRef(ElfDataEncoding));
printFields(OS, "Data:", Str);
OS.PadToColumn(2u);
OS << "Version:";
OS.PadToColumn(37u);
- OS << to_hexString(e->e_ident[ELF::EI_VERSION]);
- if (e->e_version == ELF::EV_CURRENT)
+ OS << to_hexString(e.e_ident[ELF::EI_VERSION]);
+ if (e.e_version == ELF::EV_CURRENT)
OS << " (current)";
OS << "\n";
- Str = printEnum(e->e_ident[ELF::EI_OSABI], makeArrayRef(ElfOSABI));
+ Str = printEnum(e.e_ident[ELF::EI_OSABI], makeArrayRef(ElfOSABI));
printFields(OS, "OS/ABI:", Str);
printFields(OS,
- "ABI Version:", std::to_string(e->e_ident[ELF::EI_ABIVERSION]));
- Str = printEnum(e->e_type, makeArrayRef(ElfObjectFileType));
+ "ABI Version:", std::to_string(e.e_ident[ELF::EI_ABIVERSION]));
+
+ if (const EnumEntry<unsigned> *E = getObjectFileEnumEntry(e.e_type)) {
+ Str = E->AltName.str();
+ } else {
+ if (e.e_type >= ET_LOPROC)
+ Str = "Processor Specific: (" + to_hexString(e.e_type, false) + ")";
+ else if (e.e_type >= ET_LOOS)
+ Str = "OS Specific: (" + to_hexString(e.e_type, false) + ")";
+ else
+ Str = "<unknown>: " + to_hexString(e.e_type, false);
+ }
printFields(OS, "Type:", Str);
- Str = printEnum(e->e_machine, makeArrayRef(ElfMachineType));
+
+ Str = printEnum(e.e_machine, makeArrayRef(ElfMachineType));
printFields(OS, "Machine:", Str);
- Str = "0x" + to_hexString(e->e_version);
+ Str = "0x" + to_hexString(e.e_version);
printFields(OS, "Version:", Str);
- Str = "0x" + to_hexString(e->e_entry);
+ Str = "0x" + to_hexString(e.e_entry);
printFields(OS, "Entry point address:", Str);
- Str = to_string(e->e_phoff) + " (bytes into file)";
+ Str = to_string(e.e_phoff) + " (bytes into file)";
printFields(OS, "Start of program headers:", Str);
- Str = to_string(e->e_shoff) + " (bytes into file)";
+ Str = to_string(e.e_shoff) + " (bytes into file)";
printFields(OS, "Start of section headers:", Str);
std::string ElfFlags;
- if (e->e_machine == EM_MIPS)
+ if (e.e_machine == EM_MIPS)
ElfFlags =
- printFlags(e->e_flags, makeArrayRef(ElfHeaderMipsFlags),
+ printFlags(e.e_flags, makeArrayRef(ElfHeaderMipsFlags),
unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
unsigned(ELF::EF_MIPS_MACH));
- else if (e->e_machine == EM_RISCV)
- ElfFlags = printFlags(e->e_flags, makeArrayRef(ElfHeaderRISCVFlags));
- Str = "0x" + to_hexString(e->e_flags);
+ else if (e.e_machine == EM_RISCV)
+ ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderRISCVFlags));
+ Str = "0x" + to_hexString(e.e_flags);
if (!ElfFlags.empty())
Str = Str + ", " + ElfFlags;
printFields(OS, "Flags:", Str);
- Str = to_string(e->e_ehsize) + " (bytes)";
+ Str = to_string(e.e_ehsize) + " (bytes)";
printFields(OS, "Size of this header:", Str);
- Str = to_string(e->e_phentsize) + " (bytes)";
+ Str = to_string(e.e_phentsize) + " (bytes)";
printFields(OS, "Size of program headers:", Str);
- Str = to_string(e->e_phnum);
+ Str = to_string(e.e_phnum);
printFields(OS, "Number of program headers:", Str);
- Str = to_string(e->e_shentsize) + " (bytes)";
+ Str = to_string(e.e_shentsize) + " (bytes)";
printFields(OS, "Size of section headers:", Str);
- Str = getSectionHeadersNumString(Obj, this->FileName);
+ Str = getSectionHeadersNumString(this->Obj, this->FileName);
printFields(OS, "Number of section headers:", Str);
- Str = getSectionHeaderTableIndexString(Obj, this->FileName);
+ Str = getSectionHeaderTableIndexString(this->Obj, this->FileName);
printFields(OS, "Section header string table index:", Str);
}
-namespace {
-struct GroupMember {
- StringRef Name;
- uint64_t Index;
-};
+template <class ELFT> std::vector<GroupSection> ELFDumper<ELFT>::getGroups() {
+ auto GetSignature = [&](const Elf_Sym &Sym, unsigned SymNdx,
+ const Elf_Shdr &Symtab) -> StringRef {
+ Expected<StringRef> StrTableOrErr = Obj.getStringTableForSymtab(Symtab);
+ if (!StrTableOrErr) {
+ reportUniqueWarning("unable to get the string table for " +
+ describe(Symtab) + ": " +
+ toString(StrTableOrErr.takeError()));
+ return "<?>";
+ }
-struct GroupSection {
- StringRef Name;
- std::string Signature;
- uint64_t ShName;
- uint64_t Index;
- uint32_t Link;
- uint32_t Info;
- uint32_t Type;
- std::vector<GroupMember> Members;
-};
+ StringRef Strings = *StrTableOrErr;
+ if (Sym.st_name >= Strings.size()) {
+ reportUniqueWarning("unable to get the name of the symbol with index " +
+ Twine(SymNdx) + ": st_name (0x" +
+ Twine::utohexstr(Sym.st_name) +
+ ") is past the end of the string table of size 0x" +
+ Twine::utohexstr(Strings.size()));
+ return "<?>";
+ }
-template <class ELFT>
-std::vector<GroupSection> getGroups(const ELFFile<ELFT> *Obj,
- StringRef FileName) {
- using Elf_Shdr = typename ELFT::Shdr;
- using Elf_Sym = typename ELFT::Sym;
- using Elf_Word = typename ELFT::Word;
+ return StrTableOrErr->data() + Sym.st_name;
+ };
std::vector<GroupSection> Ret;
uint64_t I = 0;
- for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
+ for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
++I;
if (Sec.sh_type != ELF::SHT_GROUP)
continue;
- const Elf_Shdr *Symtab =
- unwrapOrError(FileName, Obj->getSection(Sec.sh_link));
- StringRef StrTable =
- unwrapOrError(FileName, Obj->getStringTableForSymtab(*Symtab));
- const Elf_Sym *Sym = unwrapOrError(
- FileName, Obj->template getEntry<Elf_Sym>(Symtab, Sec.sh_info));
- auto Data = unwrapOrError(
- FileName, Obj->template getSectionContentsAsArray<Elf_Word>(&Sec));
-
- StringRef Name = unwrapOrError(FileName, Obj->getSectionName(&Sec));
- StringRef Signature = StrTable.data() + Sym->st_name;
- Ret.push_back({Name,
+ StringRef Signature = "<?>";
+ if (Expected<const Elf_Shdr *> SymtabOrErr = Obj.getSection(Sec.sh_link)) {
+ if (Expected<const Elf_Sym *> SymOrErr =
+ Obj.template getEntry<Elf_Sym>(**SymtabOrErr, Sec.sh_info))
+ Signature = GetSignature(**SymOrErr, Sec.sh_info, **SymtabOrErr);
+ else
+ reportUniqueWarning("unable to get the signature symbol for " +
+ describe(Sec) + ": " +
+ toString(SymOrErr.takeError()));
+ } else {
+ reportUniqueWarning("unable to get the symbol table for " +
+ describe(Sec) + ": " +
+ toString(SymtabOrErr.takeError()));
+ }
+
+ ArrayRef<Elf_Word> Data;
+ if (Expected<ArrayRef<Elf_Word>> ContentsOrErr =
+ Obj.template getSectionContentsAsArray<Elf_Word>(Sec)) {
+ if (ContentsOrErr->empty())
+ reportUniqueWarning("unable to read the section group flag from the " +
+ describe(Sec) + ": the section is empty");
+ else
+ Data = *ContentsOrErr;
+ } else {
+ reportUniqueWarning("unable to get the content of the " + describe(Sec) +
+ ": " + toString(ContentsOrErr.takeError()));
+ }
+
+ Ret.push_back({getPrintableSectionName(Sec),
maybeDemangle(Signature),
Sec.sh_name,
I - 1,
Sec.sh_link,
Sec.sh_info,
- Data[0],
+ Data.empty() ? Elf_Word(0) : Data[0],
{}});
+ if (Data.empty())
+ continue;
+
std::vector<GroupMember> &GM = Ret.back().Members;
for (uint32_t Ndx : Data.slice(1)) {
- auto Sec = unwrapOrError(FileName, Obj->getSection(Ndx));
- const StringRef Name = unwrapOrError(FileName, Obj->getSectionName(Sec));
- GM.push_back({Name, Ndx});
+ if (Expected<const Elf_Shdr *> SecOrErr = Obj.getSection(Ndx)) {
+ GM.push_back({getPrintableSectionName(**SecOrErr), Ndx});
+ } else {
+ reportUniqueWarning("unable to get the section with index " +
+ Twine(Ndx) + " when dumping the " + describe(Sec) +
+ ": " + toString(SecOrErr.takeError()));
+ GM.push_back({"<?>", Ndx});
+ }
}
}
return Ret;
}
-DenseMap<uint64_t, const GroupSection *>
+static DenseMap<uint64_t, const GroupSection *>
mapSectionsToGroups(ArrayRef<GroupSection> Groups) {
DenseMap<uint64_t, const GroupSection *> Ret;
for (const GroupSection &G : Groups)
@@ -3574,10 +3283,8 @@ mapSectionsToGroups(ArrayRef<GroupSection> Groups) {
return Ret;
}
-} // namespace
-
-template <class ELFT> void GNUStyle<ELFT>::printGroupSections(const ELFO *Obj) {
- std::vector<GroupSection> V = getGroups<ELFT>(Obj, this->FileName);
+template <class ELFT> void GNUELFDumper<ELFT>::printGroupSections() {
+ std::vector<GroupSection> V = this->getGroups();
DenseMap<uint64_t, const GroupSection *> Map = mapSectionsToGroups(V);
for (const GroupSection &G : V) {
OS << "\n"
@@ -3587,15 +3294,13 @@ template <class ELFT> void GNUStyle<ELFT>::printGroupSections(const ELFO *Obj) {
<< " [Index] Name\n";
for (const GroupMember &GM : G.Members) {
const GroupSection *MainGroup = Map[GM.Index];
- if (MainGroup != &G) {
- OS.flush();
- errs() << "Error: section [" << format_decimal(GM.Index, 5)
- << "] in group section [" << format_decimal(G.Index, 5)
- << "] already in group section ["
- << format_decimal(MainGroup->Index, 5) << "]";
- errs().flush();
- continue;
- }
+ if (MainGroup != &G)
+ this->reportUniqueWarning(
+ "section with index " + Twine(GM.Index) +
+ ", included in the group section with index " +
+ Twine(MainGroup->Index) +
+ ", was also found in the group section with index " +
+ Twine(G.Index));
OS << " [" << format_decimal(GM.Index, 5) << "] " << GM.Name << "\n";
}
}
@@ -3605,60 +3310,51 @@ template <class ELFT> void GNUStyle<ELFT>::printGroupSections(const ELFO *Obj) {
}
template <class ELFT>
-void GNUStyle<ELFT>::printRelocation(const ELFO *Obj, unsigned SecIndex,
- const Elf_Shdr *SymTab, const Elf_Rela &R,
- unsigned RelIndex, bool IsRela) {
- Expected<std::pair<const typename ELFT::Sym *, std::string>> Target =
- this->dumper()->getRelocationTarget(SymTab, R);
- if (!Target)
- this->reportUniqueWarning(createError(
- "unable to print relocation " + Twine(RelIndex) + " in section " +
- Twine(SecIndex) + ": " + toString(Target.takeError())));
- else
- printRelocation(Obj, /*Sym=*/Target->first, /*Name=*/Target->second, R,
- IsRela);
+void GNUELFDumper<ELFT>::printRelrReloc(const Elf_Relr &R) {
+ OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n";
}
template <class ELFT>
-void GNUStyle<ELFT>::printRelocation(const ELFO *Obj, const Elf_Sym *Sym,
- StringRef SymbolName, const Elf_Rela &R,
- bool IsRela) {
+void GNUELFDumper<ELFT>::printRelRelaReloc(const Relocation<ELFT> &R,
+ const RelSymbol<ELFT> &RelSym) {
// First two fields are bit width dependent. The rest of them are fixed width.
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias};
unsigned Width = ELFT::Is64Bits ? 16 : 8;
- Fields[0].Str = to_string(format_hex_no_prefix(R.r_offset, Width));
- Fields[1].Str = to_string(format_hex_no_prefix(R.r_info, Width));
+ Fields[0].Str = to_string(format_hex_no_prefix(R.Offset, Width));
+ Fields[1].Str = to_string(format_hex_no_prefix(R.Info, Width));
SmallString<32> RelocName;
- Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName);
+ this->Obj.getRelocationTypeName(R.Type, RelocName);
Fields[2].Str = RelocName.c_str();
- if (Sym)
- Fields[3].Str = to_string(format_hex_no_prefix(Sym->getValue(), Width));
+ if (RelSym.Sym)
+ Fields[3].Str =
+ to_string(format_hex_no_prefix(RelSym.Sym->getValue(), Width));
- Fields[4].Str = std::string(SymbolName);
+ Fields[4].Str = std::string(RelSym.Name);
for (const Field &F : Fields)
printField(F);
std::string Addend;
- if (IsRela) {
- int64_t RelAddend = R.r_addend;
- if (!SymbolName.empty()) {
- if (R.r_addend < 0) {
+ if (Optional<int64_t> A = R.Addend) {
+ int64_t RelAddend = *A;
+ if (!RelSym.Name.empty()) {
+ if (RelAddend < 0) {
Addend = " - ";
RelAddend = std::abs(RelAddend);
- } else
+ } else {
Addend = " + ";
+ }
}
-
Addend += to_hexString(RelAddend, false);
}
OS << Addend << "\n";
}
-template <class ELFT> void GNUStyle<ELFT>::printRelocHeader(unsigned SType) {
+template <class ELFT>
+static void printRelocHeaderFields(formatted_raw_ostream &OS, unsigned SType) {
bool IsRela = SType == ELF::SHT_RELA || SType == ELF::SHT_ANDROID_RELA;
bool IsRelr = SType == ELF::SHT_RELR || SType == ELF::SHT_ANDROID_RELR;
if (ELFT::Is64Bits)
@@ -3679,75 +3375,68 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocHeader(unsigned SType) {
OS << "\n";
}
-template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
- bool HasRelocSections = false;
- for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
- if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA &&
- Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_REL &&
- Sec.sh_type != ELF::SHT_ANDROID_RELA &&
- Sec.sh_type != ELF::SHT_ANDROID_RELR)
- continue;
- HasRelocSections = true;
- StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec));
- unsigned Entries = Sec.getEntityCount();
- std::vector<Elf_Rela> AndroidRelas;
+template <class ELFT>
+void GNUELFDumper<ELFT>::printDynamicRelocHeader(unsigned Type, StringRef Name,
+ const DynRegionInfo &Reg) {
+ uint64_t Offset = Reg.Addr - this->Obj.base();
+ OS << "\n'" << Name.str().c_str() << "' relocation section at offset 0x"
+ << to_hexString(Offset, false) << " contains " << Reg.Size << " bytes:\n";
+ printRelocHeaderFields<ELFT>(OS, Type);
+}
+
+template <class ELFT>
+static bool isRelocationSec(const typename ELFT::Shdr &Sec) {
+ return Sec.sh_type == ELF::SHT_REL || Sec.sh_type == ELF::SHT_RELA ||
+ Sec.sh_type == ELF::SHT_RELR || Sec.sh_type == ELF::SHT_ANDROID_REL ||
+ Sec.sh_type == ELF::SHT_ANDROID_RELA ||
+ Sec.sh_type == ELF::SHT_ANDROID_RELR;
+}
+
+template <class ELFT> void GNUELFDumper<ELFT>::printRelocations() {
+ auto GetEntriesNum = [&](const Elf_Shdr &Sec) -> Expected<size_t> {
+ // Android's packed relocation section needs to be unpacked first
+ // to get the actual number of entries.
if (Sec.sh_type == ELF::SHT_ANDROID_REL ||
Sec.sh_type == ELF::SHT_ANDROID_RELA) {
- // Android's packed relocation section needs to be unpacked first
- // to get the actual number of entries.
- AndroidRelas = unwrapOrError(this->FileName, Obj->android_relas(&Sec));
- Entries = AndroidRelas.size();
+ Expected<std::vector<typename ELFT::Rela>> RelasOrErr =
+ this->Obj.android_relas(Sec);
+ if (!RelasOrErr)
+ return RelasOrErr.takeError();
+ return RelasOrErr->size();
}
- std::vector<Elf_Rela> RelrRelas;
+
if (!opts::RawRelr && (Sec.sh_type == ELF::SHT_RELR ||
Sec.sh_type == ELF::SHT_ANDROID_RELR)) {
- // .relr.dyn relative relocation section needs to be unpacked first
- // to get the actual number of entries.
- Elf_Relr_Range Relrs = unwrapOrError(this->FileName, Obj->relrs(&Sec));
- RelrRelas = unwrapOrError(this->FileName, Obj->decode_relrs(Relrs));
- Entries = RelrRelas.size();
+ Expected<Elf_Relr_Range> RelrsOrErr = this->Obj.relrs(Sec);
+ if (!RelrsOrErr)
+ return RelrsOrErr.takeError();
+ return this->Obj.decode_relrs(*RelrsOrErr).size();
}
+
+ return Sec.getEntityCount();
+ };
+
+ bool HasRelocSections = false;
+ for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
+ if (!isRelocationSec<ELFT>(Sec))
+ continue;
+ HasRelocSections = true;
+
+ std::string EntriesNum = "<?>";
+ if (Expected<size_t> NumOrErr = GetEntriesNum(Sec))
+ EntriesNum = std::to_string(*NumOrErr);
+ else
+ this->reportUniqueWarning("unable to get the number of relocations in " +
+ this->describe(Sec) + ": " +
+ toString(NumOrErr.takeError()));
+
uintX_t Offset = Sec.sh_offset;
+ StringRef Name = this->getPrintableSectionName(Sec);
OS << "\nRelocation section '" << Name << "' at offset 0x"
- << to_hexString(Offset, false) << " contains " << Entries
+ << to_hexString(Offset, false) << " contains " << EntriesNum
<< " entries:\n";
- printRelocHeader(Sec.sh_type);
- const Elf_Shdr *SymTab =
- unwrapOrError(this->FileName, Obj->getSection(Sec.sh_link));
- unsigned SecNdx = &Sec - &cantFail(Obj->sections()).front();
- unsigned RelNdx = 0;
-
- switch (Sec.sh_type) {
- case ELF::SHT_REL:
- for (const auto &R : unwrapOrError(this->FileName, Obj->rels(&Sec))) {
- Elf_Rela Rela;
- Rela.r_offset = R.r_offset;
- Rela.r_info = R.r_info;
- Rela.r_addend = 0;
- printRelocation(Obj, SecNdx, SymTab, Rela, ++RelNdx, false);
- }
- break;
- case ELF::SHT_RELA:
- for (const auto &R : unwrapOrError(this->FileName, Obj->relas(&Sec)))
- printRelocation(Obj, SecNdx, SymTab, R, ++RelNdx, true);
- break;
- case ELF::SHT_RELR:
- case ELF::SHT_ANDROID_RELR:
- if (opts::RawRelr)
- for (const auto &R : unwrapOrError(this->FileName, Obj->relrs(&Sec)))
- OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8))
- << "\n";
- else
- for (const auto &R : RelrRelas)
- printRelocation(Obj, SecNdx, SymTab, R, ++RelNdx, false);
- break;
- case ELF::SHT_ANDROID_REL:
- case ELF::SHT_ANDROID_RELA:
- for (const auto &R : AndroidRelas)
- printRelocation(Obj, SecNdx, SymTab, R, ++RelNdx,
- Sec.sh_type == ELF::SHT_ANDROID_RELA);
- break;
- }
+ printRelocHeaderFields<ELFT>(OS, Sec.sh_type);
+ this->printRelocationsHelper(Sec);
}
if (!HasRelocSections)
OS << "\nThere are no relocations in this file.\n";
@@ -3767,122 +3456,23 @@ static std::string getSectionTypeOffsetString(unsigned Type) {
return "0x" + to_hexString(Type) + ": <unknown>";
}
-static std::string getSectionTypeString(unsigned Arch, unsigned Type) {
- using namespace ELF;
+static std::string getSectionTypeString(unsigned Machine, unsigned Type) {
+ StringRef Name = getELFSectionTypeName(Machine, Type);
- switch (Arch) {
- case EM_ARM:
- switch (Type) {
- case SHT_ARM_EXIDX:
- return "ARM_EXIDX";
- case SHT_ARM_PREEMPTMAP:
- return "ARM_PREEMPTMAP";
- case SHT_ARM_ATTRIBUTES:
- return "ARM_ATTRIBUTES";
- case SHT_ARM_DEBUGOVERLAY:
- return "ARM_DEBUGOVERLAY";
- case SHT_ARM_OVERLAYSECTION:
- return "ARM_OVERLAYSECTION";
- }
- break;
- case EM_X86_64:
- switch (Type) {
- case SHT_X86_64_UNWIND:
- return "X86_64_UNWIND";
- }
- break;
- case EM_MIPS:
- case EM_MIPS_RS3_LE:
- switch (Type) {
- case SHT_MIPS_REGINFO:
- return "MIPS_REGINFO";
- case SHT_MIPS_OPTIONS:
- return "MIPS_OPTIONS";
- case SHT_MIPS_DWARF:
- return "MIPS_DWARF";
- case SHT_MIPS_ABIFLAGS:
- return "MIPS_ABIFLAGS";
- }
- break;
- case EM_RISCV:
- switch (Type) {
- case SHT_RISCV_ATTRIBUTES:
- return "RISCV_ATTRIBUTES";
- }
+ // Handle SHT_GNU_* type names.
+ if (Name.startswith("SHT_GNU_")) {
+ if (Name == "SHT_GNU_HASH")
+ return "GNU_HASH";
+ // E.g. SHT_GNU_verneed -> VERNEED.
+ return Name.drop_front(8).upper();
}
- switch (Type) {
- case SHT_NULL:
- return "NULL";
- case SHT_PROGBITS:
- return "PROGBITS";
- case SHT_SYMTAB:
- return "SYMTAB";
- case SHT_STRTAB:
- return "STRTAB";
- case SHT_RELA:
- return "RELA";
- case SHT_HASH:
- return "HASH";
- case SHT_DYNAMIC:
- return "DYNAMIC";
- case SHT_NOTE:
- return "NOTE";
- case SHT_NOBITS:
- return "NOBITS";
- case SHT_REL:
- return "REL";
- case SHT_SHLIB:
- return "SHLIB";
- case SHT_DYNSYM:
- return "DYNSYM";
- case SHT_INIT_ARRAY:
- return "INIT_ARRAY";
- case SHT_FINI_ARRAY:
- return "FINI_ARRAY";
- case SHT_PREINIT_ARRAY:
- return "PREINIT_ARRAY";
- case SHT_GROUP:
- return "GROUP";
- case SHT_SYMTAB_SHNDX:
+
+ if (Name == "SHT_SYMTAB_SHNDX")
return "SYMTAB SECTION INDICES";
- case SHT_ANDROID_REL:
- return "ANDROID_REL";
- case SHT_ANDROID_RELA:
- return "ANDROID_RELA";
- case SHT_RELR:
- case SHT_ANDROID_RELR:
- return "RELR";
- case SHT_LLVM_ODRTAB:
- return "LLVM_ODRTAB";
- case SHT_LLVM_LINKER_OPTIONS:
- return "LLVM_LINKER_OPTIONS";
- case SHT_LLVM_CALL_GRAPH_PROFILE:
- return "LLVM_CALL_GRAPH_PROFILE";
- case SHT_LLVM_ADDRSIG:
- return "LLVM_ADDRSIG";
- case SHT_LLVM_DEPENDENT_LIBRARIES:
- return "LLVM_DEPENDENT_LIBRARIES";
- case SHT_LLVM_SYMPART:
- return "LLVM_SYMPART";
- case SHT_LLVM_PART_EHDR:
- return "LLVM_PART_EHDR";
- case SHT_LLVM_PART_PHDR:
- return "LLVM_PART_PHDR";
- // FIXME: Parse processor specific GNU attributes
- case SHT_GNU_ATTRIBUTES:
- return "ATTRIBUTES";
- case SHT_GNU_HASH:
- return "GNU_HASH";
- case SHT_GNU_verdef:
- return "VERDEF";
- case SHT_GNU_verneed:
- return "VERNEED";
- case SHT_GNU_versym:
- return "VERSYM";
- default:
- return getSectionTypeOffsetString(Type);
- }
- return "";
+
+ if (Name.startswith("SHT_"))
+ return Name.drop_front(4).str();
+ return getSectionTypeOffsetString(Type);
}
static void printSectionDescription(formatted_raw_ostream &OS,
@@ -3904,26 +3494,25 @@ static void printSectionDescription(formatted_raw_ostream &OS,
OS << "p (processor specific)\n";
}
-template <class ELFT>
-void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
+template <class ELFT> void GNUELFDumper<ELFT>::printSectionHeaders() {
unsigned Bias = ELFT::Is64Bits ? 0 : 8;
- ArrayRef<Elf_Shdr> Sections = cantFail(Obj->sections());
+ ArrayRef<Elf_Shdr> Sections = cantFail(this->Obj.sections());
OS << "There are " << to_string(Sections.size())
<< " section headers, starting at offset "
- << "0x" << to_hexString(Obj->getHeader()->e_shoff, false) << ":\n\n";
+ << "0x" << to_hexString(this->Obj.getHeader().e_shoff, false) << ":\n\n";
OS << "Section Headers:\n";
Field Fields[11] = {
{"[Nr]", 2}, {"Name", 7}, {"Type", 25},
{"Address", 41}, {"Off", 58 - Bias}, {"Size", 65 - Bias},
{"ES", 72 - Bias}, {"Flg", 75 - Bias}, {"Lk", 79 - Bias},
{"Inf", 82 - Bias}, {"Al", 86 - Bias}};
- for (auto &F : Fields)
+ for (const Field &F : Fields)
printField(F);
OS << "\n";
StringRef SecStrTable;
if (Expected<StringRef> SecStrTableOrErr =
- Obj->getSectionStringTable(Sections, this->dumper()->WarningHandler))
+ this->Obj.getSectionStringTable(Sections, this->WarningHandler))
SecStrTable = *SecStrTableOrErr;
else
this->reportUniqueWarning(SecStrTableOrErr.takeError());
@@ -3935,15 +3524,15 @@ void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
Fields[1].Str = "<no-strings>";
else
Fields[1].Str = std::string(unwrapOrError<StringRef>(
- this->FileName, Obj->getSectionName(&Sec, SecStrTable)));
+ this->FileName, this->Obj.getSectionName(Sec, SecStrTable)));
Fields[2].Str =
- getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type);
+ getSectionTypeString(this->Obj.getHeader().e_machine, Sec.sh_type);
Fields[3].Str =
to_string(format_hex_no_prefix(Sec.sh_addr, ELFT::Is64Bits ? 16 : 8));
Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6));
Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6));
Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2));
- Fields[7].Str = getGNUFlags(Obj->getHeader()->e_machine, Sec.sh_flags);
+ Fields[7].Str = getGNUFlags(this->Obj.getHeader().e_machine, Sec.sh_flags);
Fields[8].Str = to_string(Sec.sh_link);
Fields[9].Str = to_string(Sec.sh_info);
Fields[10].Str = to_string(Sec.sh_addralign);
@@ -3963,13 +3552,16 @@ void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
OS << "\n";
++SectionIndex;
}
- printSectionDescription(OS, Obj->getHeader()->e_machine);
+ printSectionDescription(OS, this->Obj.getHeader().e_machine);
}
template <class ELFT>
-void GNUStyle<ELFT>::printSymtabMessage(const ELFO *Obj, StringRef Name,
- size_t Entries,
- bool NonVisibilityBitsUsed) {
+void GNUELFDumper<ELFT>::printSymtabMessage(const Elf_Shdr *Symtab,
+ size_t Entries,
+ bool NonVisibilityBitsUsed) const {
+ StringRef Name;
+ if (Symtab)
+ Name = this->getPrintableSectionName(*Symtab);
if (!Name.empty())
OS << "\nSymbol table '" << Name << "'";
else
@@ -3987,10 +3579,11 @@ void GNUStyle<ELFT>::printSymtabMessage(const ELFO *Obj, StringRef Name,
}
template <class ELFT>
-std::string GNUStyle<ELFT>::getSymbolSectionNdx(const ELFO *Obj,
- const Elf_Sym *Symbol,
- const Elf_Sym *FirstSym) {
- unsigned SectionIndex = Symbol->st_shndx;
+std::string
+GNUELFDumper<ELFT>::getSymbolSectionNdx(const Elf_Sym &Symbol,
+ unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable) const {
+ unsigned SectionIndex = Symbol.st_shndx;
switch (SectionIndex) {
case ELF::SHN_UNDEF:
return "UND";
@@ -3999,11 +3592,11 @@ std::string GNUStyle<ELFT>::getSymbolSectionNdx(const ELFO *Obj,
case ELF::SHN_COMMON:
return "COM";
case ELF::SHN_XINDEX: {
- Expected<uint32_t> IndexOrErr = object::getExtendedSymbolTableIndex<ELFT>(
- Symbol, FirstSym, this->dumper()->getShndxTable());
+ Expected<uint32_t> IndexOrErr =
+ object::getExtendedSymbolTableIndex<ELFT>(Symbol, SymIndex, ShndxTable);
if (!IndexOrErr) {
- assert(Symbol->st_shndx == SHN_XINDEX &&
- "getSymbolSectionIndex should only fail due to an invalid "
+ assert(Symbol.st_shndx == SHN_XINDEX &&
+ "getExtendedSymbolTableIndex should only fail due to an invalid "
"SHT_SYMTAB_SHNDX table/reference");
this->reportUniqueWarning(IndexOrErr.takeError());
return "RSV[0xffff]";
@@ -4031,71 +3624,75 @@ std::string GNUStyle<ELFT>::getSymbolSectionNdx(const ELFO *Obj,
}
template <class ELFT>
-void GNUStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
- const Elf_Sym *FirstSym,
- Optional<StringRef> StrTable, bool IsDynamic,
- bool NonVisibilityBitsUsed) {
- static int Idx = 0;
- static bool Dynamic = true;
-
- // If this function was called with a different value from IsDynamic
- // from last call, happens when we move from dynamic to static symbol
- // table, "Num" field should be reset.
- if (!Dynamic != !IsDynamic) {
- Idx = 0;
- Dynamic = false;
- }
-
+void GNUELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable,
+ Optional<StringRef> StrTable,
+ bool IsDynamic,
+ bool NonVisibilityBitsUsed) const {
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
Field Fields[8] = {0, 8, 17 + Bias, 23 + Bias,
31 + Bias, 38 + Bias, 48 + Bias, 51 + Bias};
- Fields[0].Str = to_string(format_decimal(Idx++, 6)) + ":";
- Fields[1].Str = to_string(
- format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8));
- Fields[2].Str = to_string(format_decimal(Symbol->st_size, 5));
+ Fields[0].Str = to_string(format_decimal(SymIndex, 6)) + ":";
+ Fields[1].Str =
+ to_string(format_hex_no_prefix(Symbol.st_value, ELFT::Is64Bits ? 16 : 8));
+ Fields[2].Str = to_string(format_decimal(Symbol.st_size, 5));
- unsigned char SymbolType = Symbol->getType();
- if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
+ unsigned char SymbolType = Symbol.getType();
+ if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU &&
SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
Fields[3].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
else
Fields[3].Str = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
Fields[4].Str =
- printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
+ printEnum(Symbol.getBinding(), makeArrayRef(ElfSymbolBindings));
Fields[5].Str =
- printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities));
- if (Symbol->st_other & ~0x3)
- Fields[5].Str +=
- " [<other: " + to_string(format_hex(Symbol->st_other, 2)) + ">]";
+ printEnum(Symbol.getVisibility(), makeArrayRef(ElfSymbolVisibilities));
+
+ if (Symbol.st_other & ~0x3) {
+ if (this->Obj.getHeader().e_machine == ELF::EM_AARCH64) {
+ uint8_t Other = Symbol.st_other & ~0x3;
+ if (Other & STO_AARCH64_VARIANT_PCS) {
+ Other &= ~STO_AARCH64_VARIANT_PCS;
+ Fields[5].Str += " [VARIANT_PCS";
+ if (Other != 0)
+ Fields[5].Str.append(" | " + to_hexString(Other, false));
+ Fields[5].Str.append("]");
+ }
+ } else {
+ Fields[5].Str +=
+ " [<other: " + to_string(format_hex(Symbol.st_other, 2)) + ">]";
+ }
+ }
Fields[6].Column += NonVisibilityBitsUsed ? 13 : 0;
- Fields[6].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym);
+ Fields[6].Str = getSymbolSectionNdx(Symbol, SymIndex, ShndxTable);
- Fields[7].Str =
- this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic);
- for (auto &Entry : Fields)
+ Fields[7].Str = this->getFullSymbolName(Symbol, SymIndex, ShndxTable,
+ StrTable, IsDynamic);
+ for (const Field &Entry : Fields)
printField(Entry);
OS << "\n";
}
template <class ELFT>
-void GNUStyle<ELFT>::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym,
- uint32_t Sym, StringRef StrTable,
- uint32_t Bucket) {
+void GNUELFDumper<ELFT>::printHashedSymbol(const Elf_Sym *Symbol,
+ unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable,
+ StringRef StrTable,
+ uint32_t Bucket) {
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
Field Fields[9] = {0, 6, 11, 20 + Bias, 25 + Bias,
34 + Bias, 41 + Bias, 49 + Bias, 53 + Bias};
- Fields[0].Str = to_string(format_decimal(Sym, 5));
+ Fields[0].Str = to_string(format_decimal(SymIndex, 5));
Fields[1].Str = to_string(format_decimal(Bucket, 3)) + ":";
- const auto Symbol = FirstSym + Sym;
Fields[2].Str = to_string(
format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8));
Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5));
unsigned char SymbolType = Symbol->getType();
- if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
+ if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU &&
SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
Fields[4].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
else
@@ -4105,30 +3702,29 @@ void GNUStyle<ELFT>::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym,
printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
Fields[6].Str =
printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities));
- Fields[7].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym);
- Fields[8].Str = this->dumper()->getFullSymbolName(Symbol, StrTable, true);
+ Fields[7].Str = getSymbolSectionNdx(*Symbol, SymIndex, ShndxTable);
+ Fields[8].Str =
+ this->getFullSymbolName(*Symbol, SymIndex, ShndxTable, StrTable, true);
- for (auto &Entry : Fields)
+ for (const Field &Entry : Fields)
printField(Entry);
OS << "\n";
}
template <class ELFT>
-void GNUStyle<ELFT>::printSymbols(const ELFO *Obj, bool PrintSymbols,
- bool PrintDynamicSymbols) {
+void GNUELFDumper<ELFT>::printSymbols(bool PrintSymbols,
+ bool PrintDynamicSymbols) {
if (!PrintSymbols && !PrintDynamicSymbols)
return;
// GNU readelf prints both the .dynsym and .symtab with --symbols.
- this->dumper()->printSymbolsHelper(true);
+ this->printSymbolsHelper(true);
if (PrintSymbols)
- this->dumper()->printSymbolsHelper(false);
+ this->printSymbolsHelper(false);
}
template <class ELFT>
-void GNUStyle<ELFT>::printHashTableSymbols(const ELFO *Obj,
- const Elf_Hash &SysVHash) {
- StringRef StringTable = this->dumper()->getDynamicStringTable();
- if (StringTable.empty())
+void GNUELFDumper<ELFT>::printHashTableSymbols(const Elf_Hash &SysVHash) {
+ if (this->DynamicStringTable.empty())
return;
if (ELFT::Is64Bits)
@@ -4137,17 +3733,18 @@ void GNUStyle<ELFT>::printHashTableSymbols(const ELFO *Obj,
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
OS << "\n";
- Elf_Sym_Range DynSyms = this->dumper()->dynamic_symbols();
+ Elf_Sym_Range DynSyms = this->dynamic_symbols();
const Elf_Sym *FirstSym = DynSyms.empty() ? nullptr : &DynSyms[0];
if (!FirstSym) {
- Optional<DynRegionInfo> DynSymRegion = this->dumper()->getDynSymRegion();
this->reportUniqueWarning(
- createError(Twine("unable to print symbols for the .hash table: the "
- "dynamic symbol table ") +
- (DynSymRegion ? "is empty" : "was not found")));
+ Twine("unable to print symbols for the .hash table: the "
+ "dynamic symbol table ") +
+ (this->DynSymRegion ? "is empty" : "was not found"));
return;
}
+ DataRegion<Elf_Word> ShndxTable(
+ (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
auto Buckets = SysVHash.buckets();
auto Chains = SysVHash.chains();
for (uint32_t Buc = 0; Buc < SysVHash.nbucket; Buc++) {
@@ -4159,64 +3756,100 @@ void GNUStyle<ELFT>::printHashTableSymbols(const ELFO *Obj,
break;
if (Visited[Ch]) {
- reportWarning(createError(".hash section is invalid: bucket " +
+ this->reportUniqueWarning(".hash section is invalid: bucket " +
Twine(Ch) +
- ": a cycle was detected in the linked chain"),
- this->FileName);
+ ": a cycle was detected in the linked chain");
break;
}
- printHashedSymbol(Obj, FirstSym, Ch, StringTable, Buc);
+ printHashedSymbol(FirstSym + Ch, Ch, ShndxTable, this->DynamicStringTable,
+ Buc);
Visited[Ch] = true;
}
}
}
template <class ELFT>
-void GNUStyle<ELFT>::printGnuHashTableSymbols(const ELFO *Obj,
- const Elf_GnuHash &GnuHash) {
- StringRef StringTable = this->dumper()->getDynamicStringTable();
- if (StringTable.empty())
+void GNUELFDumper<ELFT>::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) {
+ if (this->DynamicStringTable.empty())
return;
- Elf_Sym_Range DynSyms = this->dumper()->dynamic_symbols();
+ Elf_Sym_Range DynSyms = this->dynamic_symbols();
const Elf_Sym *FirstSym = DynSyms.empty() ? nullptr : &DynSyms[0];
if (!FirstSym) {
- Optional<DynRegionInfo> DynSymRegion = this->dumper()->getDynSymRegion();
- this->reportUniqueWarning(createError(
+ this->reportUniqueWarning(
Twine("unable to print symbols for the .gnu.hash table: the "
"dynamic symbol table ") +
- (DynSymRegion ? "is empty" : "was not found")));
+ (this->DynSymRegion ? "is empty" : "was not found"));
return;
}
+ auto GetSymbol = [&](uint64_t SymIndex,
+ uint64_t SymsTotal) -> const Elf_Sym * {
+ if (SymIndex >= SymsTotal) {
+ this->reportUniqueWarning(
+ "unable to print hashed symbol with index " + Twine(SymIndex) +
+ ", which is greater than or equal to the number of dynamic symbols "
+ "(" +
+ Twine::utohexstr(SymsTotal) + ")");
+ return nullptr;
+ }
+ return FirstSym + SymIndex;
+ };
+
+ Expected<ArrayRef<Elf_Word>> ValuesOrErr =
+ getGnuHashTableChains<ELFT>(this->DynSymRegion, &GnuHash);
+ ArrayRef<Elf_Word> Values;
+ if (!ValuesOrErr)
+ this->reportUniqueWarning("unable to get hash values for the SHT_GNU_HASH "
+ "section: " +
+ toString(ValuesOrErr.takeError()));
+ else
+ Values = *ValuesOrErr;
+
+ DataRegion<Elf_Word> ShndxTable(
+ (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
ArrayRef<Elf_Word> Buckets = GnuHash.buckets();
for (uint32_t Buc = 0; Buc < GnuHash.nbuckets; Buc++) {
if (Buckets[Buc] == ELF::STN_UNDEF)
continue;
uint32_t Index = Buckets[Buc];
- uint32_t GnuHashable = Index - GnuHash.symndx;
- // Print whole chain
+ // Print whole chain.
while (true) {
- printHashedSymbol(Obj, FirstSym, Index++, StringTable, Buc);
- // Chain ends at symbol with stopper bit
- if ((GnuHash.values(DynSyms.size())[GnuHashable++] & 1) == 1)
+ uint32_t SymIndex = Index++;
+ if (const Elf_Sym *Sym = GetSymbol(SymIndex, DynSyms.size()))
+ printHashedSymbol(Sym, SymIndex, ShndxTable, this->DynamicStringTable,
+ Buc);
+ else
+ break;
+
+ if (SymIndex < GnuHash.symndx) {
+ this->reportUniqueWarning(
+ "unable to read the hash value for symbol with index " +
+ Twine(SymIndex) +
+ ", which is less than the index of the first hashed symbol (" +
+ Twine(GnuHash.symndx) + ")");
+ break;
+ }
+
+ // Chain ends at symbol with stopper bit.
+ if ((Values[SymIndex - GnuHash.symndx] & 1) == 1)
break;
}
}
}
-template <class ELFT> void GNUStyle<ELFT>::printHashSymbols(const ELFO *Obj) {
- if (const Elf_Hash *SysVHash = this->dumper()->getHashTable()) {
+template <class ELFT> void GNUELFDumper<ELFT>::printHashSymbols() {
+ if (this->HashTable) {
OS << "\n Symbol table of .hash for image:\n";
- if (Error E = checkHashTable<ELFT>(Obj, SysVHash))
+ if (Error E = checkHashTable<ELFT>(*this, this->HashTable))
this->reportUniqueWarning(std::move(E));
else
- printHashTableSymbols(Obj, *SysVHash);
+ printHashTableSymbols(*this->HashTable);
}
// Try printing the .gnu.hash table.
- if (const Elf_GnuHash *GnuHash = this->dumper()->getGnuHashTable()) {
+ if (this->GnuHashTable) {
OS << "\n Symbol table of .gnu.hash for image:\n";
if (ELFT::Is64Bits)
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
@@ -4224,10 +3857,119 @@ template <class ELFT> void GNUStyle<ELFT>::printHashSymbols(const ELFO *Obj) {
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
OS << "\n";
- if (Error E = checkGNUHashTable<ELFT>(Obj, GnuHash))
+ if (Error E = checkGNUHashTable<ELFT>(this->Obj, this->GnuHashTable))
this->reportUniqueWarning(std::move(E));
else
- printGnuHashTableSymbols(Obj, *GnuHash);
+ printGnuHashTableSymbols(*this->GnuHashTable);
+ }
+}
+
+template <class ELFT> void GNUELFDumper<ELFT>::printSectionDetails() {
+ ArrayRef<Elf_Shdr> Sections = cantFail(this->Obj.sections());
+ OS << "There are " << to_string(Sections.size())
+ << " section headers, starting at offset "
+ << "0x" << to_hexString(this->Obj.getHeader().e_shoff, false) << ":\n\n";
+
+ OS << "Section Headers:\n";
+
+ auto PrintFields = [&](ArrayRef<Field> V) {
+ for (const Field &F : V)
+ printField(F);
+ OS << "\n";
+ };
+
+ PrintFields({{"[Nr]", 2}, {"Name", 7}});
+
+ constexpr bool Is64 = ELFT::Is64Bits;
+ PrintFields({{"Type", 7},
+ {Is64 ? "Address" : "Addr", 23},
+ {"Off", Is64 ? 40 : 32},
+ {"Size", Is64 ? 47 : 39},
+ {"ES", Is64 ? 54 : 46},
+ {"Lk", Is64 ? 59 : 51},
+ {"Inf", Is64 ? 62 : 54},
+ {"Al", Is64 ? 66 : 57}});
+ PrintFields({{"Flags", 7}});
+
+ StringRef SecStrTable;
+ if (Expected<StringRef> SecStrTableOrErr =
+ this->Obj.getSectionStringTable(Sections, this->WarningHandler))
+ SecStrTable = *SecStrTableOrErr;
+ else
+ this->reportUniqueWarning(SecStrTableOrErr.takeError());
+
+ size_t SectionIndex = 0;
+ const unsigned AddrSize = Is64 ? 16 : 8;
+ for (const Elf_Shdr &S : Sections) {
+ StringRef Name = "<?>";
+ if (Expected<StringRef> NameOrErr =
+ this->Obj.getSectionName(S, SecStrTable))
+ Name = *NameOrErr;
+ else
+ this->reportUniqueWarning(NameOrErr.takeError());
+
+ OS.PadToColumn(2);
+ OS << "[" << right_justify(to_string(SectionIndex), 2) << "]";
+ PrintFields({{Name, 7}});
+ PrintFields(
+ {{getSectionTypeString(this->Obj.getHeader().e_machine, S.sh_type), 7},
+ {to_string(format_hex_no_prefix(S.sh_addr, AddrSize)), 23},
+ {to_string(format_hex_no_prefix(S.sh_offset, 6)), Is64 ? 39 : 32},
+ {to_string(format_hex_no_prefix(S.sh_size, 6)), Is64 ? 47 : 39},
+ {to_string(format_hex_no_prefix(S.sh_entsize, 2)), Is64 ? 54 : 46},
+ {to_string(S.sh_link), Is64 ? 59 : 51},
+ {to_string(S.sh_info), Is64 ? 63 : 55},
+ {to_string(S.sh_addralign), Is64 ? 66 : 58}});
+
+ OS.PadToColumn(7);
+ OS << "[" << to_string(format_hex_no_prefix(S.sh_flags, AddrSize)) << "]: ";
+
+ DenseMap<unsigned, StringRef> FlagToName = {
+ {SHF_WRITE, "WRITE"}, {SHF_ALLOC, "ALLOC"},
+ {SHF_EXECINSTR, "EXEC"}, {SHF_MERGE, "MERGE"},
+ {SHF_STRINGS, "STRINGS"}, {SHF_INFO_LINK, "INFO LINK"},
+ {SHF_LINK_ORDER, "LINK ORDER"}, {SHF_OS_NONCONFORMING, "OS NONCONF"},
+ {SHF_GROUP, "GROUP"}, {SHF_TLS, "TLS"},
+ {SHF_COMPRESSED, "COMPRESSED"}, {SHF_EXCLUDE, "EXCLUDE"}};
+
+ uint64_t Flags = S.sh_flags;
+ uint64_t UnknownFlags = 0;
+ bool NeedsComma = false;
+ while (Flags) {
+ // Take the least significant bit as a flag.
+ uint64_t Flag = Flags & -Flags;
+ Flags -= Flag;
+
+ auto It = FlagToName.find(Flag);
+ if (It != FlagToName.end()) {
+ if (NeedsComma)
+ OS << ", ";
+ NeedsComma = true;
+ OS << It->second;
+ } else {
+ UnknownFlags |= Flag;
+ }
+ }
+
+ auto PrintUnknownFlags = [&](uint64_t Mask, StringRef Name) {
+ uint64_t FlagsToPrint = UnknownFlags & Mask;
+ if (!FlagsToPrint)
+ return;
+
+ if (NeedsComma)
+ OS << ", ";
+ OS << Name << " ("
+ << to_string(format_hex_no_prefix(FlagsToPrint, AddrSize)) << ")";
+ UnknownFlags &= ~Mask;
+ NeedsComma = true;
+ };
+
+ PrintUnknownFlags(SHF_MASKOS, "OS");
+ PrintUnknownFlags(SHF_MASKPROC, "PROC");
+ PrintUnknownFlags(uint64_t(-1), "UNKNOWN");
+
+ OS << "\n";
+ ++SectionIndex;
}
}
@@ -4312,29 +4054,27 @@ static bool checkPTDynamic(const typename ELFT::Phdr &Phdr,
}
template <class ELFT>
-void GNUStyle<ELFT>::printProgramHeaders(
- const ELFO *Obj, bool PrintProgramHeaders,
- cl::boolOrDefault PrintSectionMapping) {
+void GNUELFDumper<ELFT>::printProgramHeaders(
+ bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) {
if (PrintProgramHeaders)
- printProgramHeaders(Obj);
+ printProgramHeaders();
// Display the section mapping along with the program headers, unless
// -section-mapping is explicitly set to false.
if (PrintSectionMapping != cl::BOU_FALSE)
- printSectionMapping(Obj);
+ printSectionMapping();
}
-template <class ELFT>
-void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
+template <class ELFT> void GNUELFDumper<ELFT>::printProgramHeaders() {
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
- const Elf_Ehdr *Header = Obj->getHeader();
+ const Elf_Ehdr &Header = this->Obj.getHeader();
Field Fields[8] = {2, 17, 26, 37 + Bias,
48 + Bias, 56 + Bias, 64 + Bias, 68 + Bias};
OS << "\nElf file type is "
- << printEnum(Header->e_type, makeArrayRef(ElfObjectFileType)) << "\n"
- << "Entry point " << format_hex(Header->e_entry, 3) << "\n"
- << "There are " << Header->e_phnum << " program headers,"
- << " starting at offset " << Header->e_phoff << "\n\n"
+ << printEnum(Header.e_type, makeArrayRef(ElfObjectFileType)) << "\n"
+ << "Entry point " << format_hex(Header.e_entry, 3) << "\n"
+ << "There are " << Header.e_phnum << " program headers,"
+ << " starting at offset " << Header.e_phoff << "\n\n"
<< "Program Headers:\n";
if (ELFT::Is64Bits)
OS << " Type Offset VirtAddr PhysAddr "
@@ -4346,15 +4086,15 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
unsigned Width = ELFT::Is64Bits ? 18 : 10;
unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7;
- Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
+ Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = this->Obj.program_headers();
if (!PhdrsOrErr) {
- this->reportUniqueWarning(createError("unable to dump program headers: " +
- toString(PhdrsOrErr.takeError())));
+ this->reportUniqueWarning("unable to dump program headers: " +
+ toString(PhdrsOrErr.takeError()));
return;
}
for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
- Fields[0].Str = getElfPtType(Header->e_machine, Phdr.p_type);
+ Fields[0].Str = getGNUPtType(Header.e_machine, Phdr.p_type);
Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8));
Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width));
Fields[3].Str = to_string(format_hex(Phdr.p_paddr, Width));
@@ -4362,26 +4102,25 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
Fields[5].Str = to_string(format_hex(Phdr.p_memsz, SizeWidth));
Fields[6].Str = printPhdrFlags(Phdr.p_flags);
Fields[7].Str = to_string(format_hex(Phdr.p_align, 1));
- for (auto Field : Fields)
- printField(Field);
+ for (const Field &F : Fields)
+ printField(F);
if (Phdr.p_type == ELF::PT_INTERP) {
OS << "\n";
auto ReportBadInterp = [&](const Twine &Msg) {
- reportWarning(
- createError("unable to read program interpreter name at offset 0x" +
- Twine::utohexstr(Phdr.p_offset) + ": " + Msg),
- this->FileName);
+ this->reportUniqueWarning(
+ "unable to read program interpreter name at offset 0x" +
+ Twine::utohexstr(Phdr.p_offset) + ": " + Msg);
};
- if (Phdr.p_offset >= Obj->getBufSize()) {
+ if (Phdr.p_offset >= this->Obj.getBufSize()) {
ReportBadInterp("it goes past the end of the file (0x" +
- Twine::utohexstr(Obj->getBufSize()) + ")");
+ Twine::utohexstr(this->Obj.getBufSize()) + ")");
continue;
}
const char *Data =
- reinterpret_cast<const char *>(Obj->base()) + Phdr.p_offset;
- size_t MaxSize = Obj->getBufSize() - Phdr.p_offset;
+ reinterpret_cast<const char *>(this->Obj.base()) + Phdr.p_offset;
+ size_t MaxSize = this->Obj.getBufSize() - Phdr.p_offset;
size_t Len = strnlen(Data, MaxSize);
if (Len == MaxSize) {
ReportBadInterp("it is not null-terminated");
@@ -4395,17 +4134,16 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
}
}
-template <class ELFT>
-void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
+template <class ELFT> void GNUELFDumper<ELFT>::printSectionMapping() {
OS << "\n Section to Segment mapping:\n Segment Sections...\n";
DenseSet<const Elf_Shdr *> BelongsToSegment;
int Phnum = 0;
- Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
+ Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = this->Obj.program_headers();
if (!PhdrsOrErr) {
- this->reportUniqueWarning(createError(
+ this->reportUniqueWarning(
"can't read program headers to build section to segment mapping: " +
- toString(PhdrsOrErr.takeError())));
+ toString(PhdrsOrErr.takeError()));
return;
}
@@ -4413,7 +4151,7 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
std::string Sections;
OS << format(" %2.2d ", Phnum++);
// Check if each section is in a segment and then print mapping.
- for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
+ for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
if (Sec.sh_type == ELF::SHT_NULL)
continue;
@@ -4423,7 +4161,7 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
if (checkTLSSections<ELFT>(Phdr, Sec) && checkOffsets<ELFT>(Phdr, Sec) &&
checkVMA<ELFT>(Phdr, Sec) && checkPTDynamic<ELFT>(Phdr, Sec)) {
Sections +=
- unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() +
+ unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)).str() +
" ";
BelongsToSegment.insert(&Sec);
}
@@ -4434,10 +4172,11 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
// Display sections that do not belong to a segment.
std::string Sections;
- for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
+ for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
if (BelongsToSegment.find(&Sec) == BelongsToSegment.end())
Sections +=
- unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + ' ';
+ unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)).str() +
+ ' ';
}
if (!Sections.empty()) {
OS << " None " << Sections << '\n';
@@ -4446,41 +4185,44 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
}
namespace {
-template <class ELFT> struct RelSymbol {
- const typename ELFT::Sym *Sym;
- std::string Name;
-};
template <class ELFT>
-RelSymbol<ELFT> getSymbolForReloc(const ELFFile<ELFT> *Obj, StringRef FileName,
- const ELFDumper<ELFT> *Dumper,
- const typename ELFT::Rela &Reloc) {
- uint32_t SymIndex = Reloc.getSymbol(Obj->isMips64EL());
- auto WarnAndReturn = [&](const typename ELFT::Sym *Sym,
+RelSymbol<ELFT> getSymbolForReloc(const ELFDumper<ELFT> &Dumper,
+ const Relocation<ELFT> &Reloc) {
+ using Elf_Sym = typename ELFT::Sym;
+ auto WarnAndReturn = [&](const Elf_Sym *Sym,
const Twine &Reason) -> RelSymbol<ELFT> {
- reportWarning(
- createError("unable to get name of the dynamic symbol with index " +
- Twine(SymIndex) + ": " + Reason),
- FileName);
+ Dumper.reportUniqueWarning(
+ "unable to get name of the dynamic symbol with index " +
+ Twine(Reloc.Symbol) + ": " + Reason);
return {Sym, "<corrupt>"};
};
- ArrayRef<typename ELFT::Sym> Symbols = Dumper->dynamic_symbols();
- const typename ELFT::Sym *FirstSym = Symbols.begin();
+ ArrayRef<Elf_Sym> Symbols = Dumper.dynamic_symbols();
+ const Elf_Sym *FirstSym = Symbols.begin();
if (!FirstSym)
return WarnAndReturn(nullptr, "no dynamic symbol table found");
// We might have an object without a section header. In this case the size of
// Symbols is zero, because there is no way to know the size of the dynamic
// table. We should allow this case and not print a warning.
- if (!Symbols.empty() && SymIndex >= Symbols.size())
+ if (!Symbols.empty() && Reloc.Symbol >= Symbols.size())
return WarnAndReturn(
nullptr,
"index is greater than or equal to the number of dynamic symbols (" +
Twine(Symbols.size()) + ")");
- const typename ELFT::Sym *Sym = FirstSym + SymIndex;
- Expected<StringRef> ErrOrName = Sym->getName(Dumper->getDynamicStringTable());
+ const ELFFile<ELFT> &Obj = Dumper.getElfObject().getELFFile();
+ const uint64_t FileSize = Obj.getBufSize();
+ const uint64_t SymOffset = ((const uint8_t *)FirstSym - Obj.base()) +
+ (uint64_t)Reloc.Symbol * sizeof(Elf_Sym);
+ if (SymOffset + sizeof(Elf_Sym) > FileSize)
+ return WarnAndReturn(nullptr, "symbol at 0x" + Twine::utohexstr(SymOffset) +
+ " goes past the end of the file (0x" +
+ Twine::utohexstr(FileSize) + ")");
+
+ const Elf_Sym *Sym = FirstSym + Reloc.Symbol;
+ Expected<StringRef> ErrOrName = Sym->getName(Dumper.getDynamicStringTable());
if (!ErrOrName)
return WarnAndReturn(Sym, toString(ErrOrName.takeError()));
@@ -4489,37 +4231,27 @@ RelSymbol<ELFT> getSymbolForReloc(const ELFFile<ELFT> *Obj, StringRef FileName,
} // namespace
template <class ELFT>
-void GNUStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela R,
- bool IsRela) {
- RelSymbol<ELFT> S = getSymbolForReloc(Obj, this->FileName, this->dumper(), R);
- printRelocation(Obj, S.Sym, S.Name, R, IsRela);
-}
-
-template <class ELFT>
-static size_t getMaxDynamicTagSize(const ELFFile<ELFT> *Obj,
+static size_t getMaxDynamicTagSize(const ELFFile<ELFT> &Obj,
typename ELFT::DynRange Tags) {
size_t Max = 0;
for (const typename ELFT::Dyn &Dyn : Tags)
- Max = std::max(Max, Obj->getDynamicTagAsString(Dyn.d_tag).size());
+ Max = std::max(Max, Obj.getDynamicTagAsString(Dyn.d_tag).size());
return Max;
}
-template <class ELFT> void GNUStyle<ELFT>::printDynamic(const ELFO *Obj) {
- Elf_Dyn_Range Table = this->dumper()->dynamic_table();
+template <class ELFT> void GNUELFDumper<ELFT>::printDynamicTable() {
+ Elf_Dyn_Range Table = this->dynamic_table();
if (Table.empty())
return;
- const DynRegionInfo &DynamicTableRegion =
- this->dumper()->getDynamicTableRegion();
-
OS << "Dynamic section at offset "
- << format_hex(reinterpret_cast<const uint8_t *>(DynamicTableRegion.Addr) -
- Obj->base(),
+ << format_hex(reinterpret_cast<const uint8_t *>(this->DynamicTable.Addr) -
+ this->Obj.base(),
1)
<< " contains " << Table.size() << " entries:\n";
// The type name is surrounded with round brackets, hence add 2.
- size_t MaxTagSize = getMaxDynamicTagSize(Obj, Table) + 2;
+ size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table) + 2;
// The "Name/Value" column should be indented from the "Type" column by N
// spaces, where N = MaxTagSize - length of "Type" (4) + trailing
// space (1) = 3.
@@ -4530,127 +4262,115 @@ template <class ELFT> void GNUStyle<ELFT>::printDynamic(const ELFO *Obj) {
for (auto Entry : Table) {
uintX_t Tag = Entry.getTag();
std::string Type =
- std::string("(") + Obj->getDynamicTagAsString(Tag).c_str() + ")";
- std::string Value = this->dumper()->getDynamicEntry(Tag, Entry.getVal());
+ std::string("(") + this->Obj.getDynamicTagAsString(Tag).c_str() + ")";
+ std::string Value = this->getDynamicEntry(Tag, Entry.getVal());
OS << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10)
<< format(ValueFmt.c_str(), Type.c_str()) << Value << "\n";
}
}
+template <class ELFT> void GNUELFDumper<ELFT>::printDynamicRelocations() {
+ this->printDynamicRelocationsHelper();
+}
+
template <class ELFT>
-void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
- const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion();
- const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion();
- const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion();
- const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion();
- if (DynRelaRegion.Size > 0) {
- OS << "\n'RELA' relocation section at offset "
- << format_hex(reinterpret_cast<const uint8_t *>(DynRelaRegion.Addr) -
- Obj->base(),
- 1)
- << " contains " << DynRelaRegion.Size << " bytes:\n";
- printRelocHeader(ELF::SHT_RELA);
- for (const Elf_Rela &Rela : this->dumper()->dyn_relas())
- printDynamicRelocation(Obj, Rela, true);
- }
- if (DynRelRegion.Size > 0) {
- OS << "\n'REL' relocation section at offset "
- << format_hex(reinterpret_cast<const uint8_t *>(DynRelRegion.Addr) -
- Obj->base(),
- 1)
- << " contains " << DynRelRegion.Size << " bytes:\n";
- printRelocHeader(ELF::SHT_REL);
- for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) {
- Elf_Rela Rela;
- Rela.r_offset = Rel.r_offset;
- Rela.r_info = Rel.r_info;
- Rela.r_addend = 0;
- printDynamicRelocation(Obj, Rela, false);
- }
- }
- if (DynRelrRegion.Size > 0) {
- OS << "\n'RELR' relocation section at offset "
- << format_hex(reinterpret_cast<const uint8_t *>(DynRelrRegion.Addr) -
- Obj->base(),
- 1)
- << " contains " << DynRelrRegion.Size << " bytes:\n";
- printRelocHeader(ELF::SHT_REL);
- Elf_Relr_Range Relrs = this->dumper()->dyn_relrs();
- std::vector<Elf_Rela> RelrRelas =
- unwrapOrError(this->FileName, Obj->decode_relrs(Relrs));
- for (const Elf_Rela &Rela : RelrRelas) {
- printDynamicRelocation(Obj, Rela, false);
- }
- }
- if (DynPLTRelRegion.Size) {
- OS << "\n'PLT' relocation section at offset "
- << format_hex(reinterpret_cast<const uint8_t *>(DynPLTRelRegion.Addr) -
- Obj->base(),
- 1)
- << " contains " << DynPLTRelRegion.Size << " bytes:\n";
-
- if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) {
- printRelocHeader(ELF::SHT_RELA);
- for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef<Elf_Rela>())
- printDynamicRelocation(Obj, Rela, true);
+void ELFDumper<ELFT>::printDynamicReloc(const Relocation<ELFT> &R) {
+ printRelRelaReloc(R, getSymbolForReloc(*this, R));
+}
+
+template <class ELFT>
+void ELFDumper<ELFT>::printRelocationsHelper(const Elf_Shdr &Sec) {
+ this->forEachRelocationDo(
+ Sec, opts::RawRelr,
+ [&](const Relocation<ELFT> &R, unsigned Ndx, const Elf_Shdr &Sec,
+ const Elf_Shdr *SymTab) { printReloc(R, Ndx, Sec, SymTab); },
+ [&](const Elf_Relr &R) { printRelrReloc(R); });
+}
+
+template <class ELFT> void ELFDumper<ELFT>::printDynamicRelocationsHelper() {
+ const bool IsMips64EL = this->Obj.isMips64EL();
+ if (this->DynRelaRegion.Size > 0) {
+ printDynamicRelocHeader(ELF::SHT_RELA, "RELA", this->DynRelaRegion);
+ for (const Elf_Rela &Rela :
+ this->DynRelaRegion.template getAsArrayRef<Elf_Rela>())
+ printDynamicReloc(Relocation<ELFT>(Rela, IsMips64EL));
+ }
+
+ if (this->DynRelRegion.Size > 0) {
+ printDynamicRelocHeader(ELF::SHT_REL, "REL", this->DynRelRegion);
+ for (const Elf_Rel &Rel :
+ this->DynRelRegion.template getAsArrayRef<Elf_Rel>())
+ printDynamicReloc(Relocation<ELFT>(Rel, IsMips64EL));
+ }
+
+ if (this->DynRelrRegion.Size > 0) {
+ printDynamicRelocHeader(ELF::SHT_REL, "RELR", this->DynRelrRegion);
+ Elf_Relr_Range Relrs =
+ this->DynRelrRegion.template getAsArrayRef<Elf_Relr>();
+ for (const Elf_Rel &Rel : Obj.decode_relrs(Relrs))
+ printDynamicReloc(Relocation<ELFT>(Rel, IsMips64EL));
+ }
+
+ if (this->DynPLTRelRegion.Size) {
+ if (this->DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) {
+ printDynamicRelocHeader(ELF::SHT_RELA, "PLT", this->DynPLTRelRegion);
+ for (const Elf_Rela &Rela :
+ this->DynPLTRelRegion.template getAsArrayRef<Elf_Rela>())
+ printDynamicReloc(Relocation<ELFT>(Rela, IsMips64EL));
} else {
- printRelocHeader(ELF::SHT_REL);
- for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef<Elf_Rel>()) {
- Elf_Rela Rela;
- Rela.r_offset = Rel.r_offset;
- Rela.r_info = Rel.r_info;
- Rela.r_addend = 0;
- printDynamicRelocation(Obj, Rela, false);
- }
+ printDynamicRelocHeader(ELF::SHT_REL, "PLT", this->DynPLTRelRegion);
+ for (const Elf_Rel &Rel :
+ this->DynPLTRelRegion.template getAsArrayRef<Elf_Rel>())
+ printDynamicReloc(Relocation<ELFT>(Rel, IsMips64EL));
}
}
}
template <class ELFT>
-void GNUStyle<ELFT>::printGNUVersionSectionProlog(
- const ELFFile<ELFT> *Obj, const typename ELFT::Shdr *Sec,
- const Twine &Label, unsigned EntriesNum) {
- StringRef SecName = unwrapOrError(this->FileName, Obj->getSectionName(Sec));
+void GNUELFDumper<ELFT>::printGNUVersionSectionProlog(
+ const typename ELFT::Shdr &Sec, const Twine &Label, unsigned EntriesNum) {
+ // Don't inline the SecName, because it might report a warning to stderr and
+ // corrupt the output.
+ StringRef SecName = this->getPrintableSectionName(Sec);
OS << Label << " section '" << SecName << "' "
<< "contains " << EntriesNum << " entries:\n";
- unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
- StringRef SymTabName = "<corrupt>";
-
- Expected<const typename ELFT::Shdr *> SymTabOrErr =
- Obj->getSection(Sec->sh_link);
- if (SymTabOrErr)
- SymTabName =
- unwrapOrError(this->FileName, Obj->getSectionName(*SymTabOrErr));
+ StringRef LinkedSecName = "<corrupt>";
+ if (Expected<const typename ELFT::Shdr *> LinkedSecOrErr =
+ this->Obj.getSection(Sec.sh_link))
+ LinkedSecName = this->getPrintableSectionName(**LinkedSecOrErr);
else
- this->reportUniqueWarning(
- createError("invalid section linked to " +
- object::getELFSectionTypeName(Obj->getHeader()->e_machine,
- Sec->sh_type) +
- " section with index " + Twine(SecNdx) + ": " +
- toString(SymTabOrErr.takeError())));
+ this->reportUniqueWarning("invalid section linked to " +
+ this->describe(Sec) + ": " +
+ toString(LinkedSecOrErr.takeError()));
- OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16)
- << " Offset: " << format_hex(Sec->sh_offset, 8)
- << " Link: " << Sec->sh_link << " (" << SymTabName << ")\n";
+ OS << " Addr: " << format_hex_no_prefix(Sec.sh_addr, 16)
+ << " Offset: " << format_hex(Sec.sh_offset, 8)
+ << " Link: " << Sec.sh_link << " (" << LinkedSecName << ")\n";
}
template <class ELFT>
-void GNUStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) {
+void GNUELFDumper<ELFT>::printVersionSymbolSection(const Elf_Shdr *Sec) {
if (!Sec)
return;
- printGNUVersionSectionProlog(Obj, Sec, "Version symbols",
+ printGNUVersionSectionProlog(*Sec, "Version symbols",
Sec->sh_size / sizeof(Elf_Versym));
Expected<ArrayRef<Elf_Versym>> VerTableOrErr =
- this->dumper()->getVersionTable(Sec, /*SymTab=*/nullptr,
- /*StrTab=*/nullptr);
+ this->getVersionTable(*Sec, /*SymTab=*/nullptr,
+ /*StrTab=*/nullptr, /*SymTabSec=*/nullptr);
if (!VerTableOrErr) {
this->reportUniqueWarning(VerTableOrErr.takeError());
return;
}
+ SmallVector<Optional<VersionEntry>, 0> *VersionMap = nullptr;
+ if (Expected<SmallVector<Optional<VersionEntry>, 0> *> MapOrErr =
+ this->getVersionMap())
+ VersionMap = *MapOrErr;
+ else
+ this->reportUniqueWarning(MapOrErr.takeError());
+
ArrayRef<Elf_Versym> VerTable = *VerTableOrErr;
std::vector<StringRef> Versions;
for (size_t I = 0, E = VerTable.size(); I < E; ++I) {
@@ -4660,17 +4380,18 @@ void GNUStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
continue;
}
+ if (!VersionMap) {
+ Versions.emplace_back("<corrupt>");
+ continue;
+ }
+
bool IsDefault;
- Expected<StringRef> NameOrErr =
- this->dumper()->getSymbolVersionByIndex(Ndx, IsDefault);
+ Expected<StringRef> NameOrErr = this->Obj.getSymbolVersionByIndex(
+ Ndx, IsDefault, *VersionMap, /*IsSymHidden=*/None);
if (!NameOrErr) {
- if (!NameOrErr) {
- unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
- this->reportUniqueWarning(createError(
- "unable to get a version for entry " + Twine(I) +
- " of SHT_GNU_versym section with index " + Twine(SecNdx) + ": " +
- toString(NameOrErr.takeError())));
- }
+ this->reportUniqueWarning("unable to get a version for entry " +
+ Twine(I) + " of " + this->describe(*Sec) +
+ ": " + toString(NameOrErr.takeError()));
Versions.emplace_back("<corrupt>");
continue;
}
@@ -4714,14 +4435,13 @@ static std::string versionFlagToString(unsigned Flags) {
}
template <class ELFT>
-void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) {
+void GNUELFDumper<ELFT>::printVersionDefinitionSection(const Elf_Shdr *Sec) {
if (!Sec)
return;
- printGNUVersionSectionProlog(Obj, Sec, "Version definition", Sec->sh_info);
+ printGNUVersionSectionProlog(*Sec, "Version definition", Sec->sh_info);
- Expected<std::vector<VerDef>> V = this->dumper()->getVersionDefinitions(Sec);
+ Expected<std::vector<VerDef>> V = this->Obj.getVersionDefinitions(*Sec);
if (!V) {
this->reportUniqueWarning(V.takeError());
return;
@@ -4742,16 +4462,15 @@ void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
}
template <class ELFT>
-void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) {
+void GNUELFDumper<ELFT>::printVersionDependencySection(const Elf_Shdr *Sec) {
if (!Sec)
return;
unsigned VerneedNum = Sec->sh_info;
- printGNUVersionSectionProlog(Obj, Sec, "Version needs", VerneedNum);
+ printGNUVersionSectionProlog(*Sec, "Version needs", VerneedNum);
Expected<std::vector<VerNeed>> V =
- this->dumper()->getVersionDependencies(Sec);
+ this->Obj.getVersionDependencies(*Sec, this->WarningHandler);
if (!V) {
this->reportUniqueWarning(V.takeError());
return;
@@ -4769,7 +4488,7 @@ void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
}
template <class ELFT>
-void GNUStyle<ELFT>::printHashHistogram(const Elf_Hash &HashTable) {
+void GNUELFDumper<ELFT>::printHashHistogram(const Elf_Hash &HashTable) {
size_t NBucket = HashTable.nbucket;
size_t NChain = HashTable.nchain;
ArrayRef<Elf_Word> Buckets = HashTable.buckets();
@@ -4791,10 +4510,9 @@ void GNUStyle<ELFT>::printHashHistogram(const Elf_Hash &HashTable) {
if (C == ELF::STN_UNDEF)
break;
if (Visited[C]) {
- reportWarning(createError(".hash section is invalid: bucket " +
+ this->reportUniqueWarning(".hash section is invalid: bucket " +
Twine(C) +
- ": a cycle was detected in the linked chain"),
- this->FileName);
+ ": a cycle was detected in the linked chain");
break;
}
Visited[C] = true;
@@ -4825,13 +4543,13 @@ void GNUStyle<ELFT>::printHashHistogram(const Elf_Hash &HashTable) {
}
template <class ELFT>
-void GNUStyle<ELFT>::printGnuHashHistogram(const Elf_GnuHash &GnuHashTable) {
- Expected<ArrayRef<Elf_Word>> ChainsOrErr = getGnuHashTableChains<ELFT>(
- this->dumper()->getDynSymRegion(), &GnuHashTable);
+void GNUELFDumper<ELFT>::printGnuHashHistogram(
+ const Elf_GnuHash &GnuHashTable) {
+ Expected<ArrayRef<Elf_Word>> ChainsOrErr =
+ getGnuHashTableChains<ELFT>(this->DynSymRegion, &GnuHashTable);
if (!ChainsOrErr) {
- this->reportUniqueWarning(
- createError("unable to print the GNU hash table histogram: " +
- toString(ChainsOrErr.takeError())));
+ this->reportUniqueWarning("unable to print the GNU hash table histogram: " +
+ toString(ChainsOrErr.takeError()));
return;
}
@@ -4883,211 +4601,83 @@ void GNUStyle<ELFT>::printGnuHashHistogram(const Elf_GnuHash &GnuHashTable) {
// dynamic symbol table. The table shows the number of hash buckets for
// different lengths of chains as an absolute number and percentage of the total
// buckets, and the cumulative coverage of symbols for each set of buckets.
-template <class ELFT>
-void GNUStyle<ELFT>::printHashHistograms(const ELFFile<ELFT> *Obj) {
+template <class ELFT> void GNUELFDumper<ELFT>::printHashHistograms() {
// Print histogram for the .hash section.
- if (const Elf_Hash *HashTable = this->dumper()->getHashTable()) {
- if (Error E = checkHashTable<ELFT>(Obj, HashTable))
+ if (this->HashTable) {
+ if (Error E = checkHashTable<ELFT>(*this, this->HashTable))
this->reportUniqueWarning(std::move(E));
else
- printHashHistogram(*HashTable);
+ printHashHistogram(*this->HashTable);
}
// Print histogram for the .gnu.hash section.
- if (const Elf_GnuHash *GnuHashTable = this->dumper()->getGnuHashTable()) {
- if (Error E = checkGNUHashTable<ELFT>(Obj, GnuHashTable))
+ if (this->GnuHashTable) {
+ if (Error E = checkGNUHashTable<ELFT>(this->Obj, this->GnuHashTable))
this->reportUniqueWarning(std::move(E));
else
- printGnuHashHistogram(*GnuHashTable);
+ printGnuHashHistogram(*this->GnuHashTable);
}
}
-template <class ELFT>
-void GNUStyle<ELFT>::printCGProfile(const ELFFile<ELFT> *Obj) {
+template <class ELFT> void GNUELFDumper<ELFT>::printCGProfile() {
OS << "GNUStyle::printCGProfile not implemented\n";
}
-template <class ELFT>
-void GNUStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
- reportError(createError("--addrsig: not implemented"), this->FileName);
-}
-
-static StringRef getGenericNoteTypeName(const uint32_t NT) {
- static const struct {
- uint32_t ID;
- const char *Name;
- } Notes[] = {
- {ELF::NT_VERSION, "NT_VERSION (version)"},
- {ELF::NT_ARCH, "NT_ARCH (architecture)"},
- {ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"},
- {ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"},
- };
-
- for (const auto &Note : Notes)
- if (Note.ID == NT)
- return Note.Name;
-
- return "";
-}
-
-static StringRef getCoreNoteTypeName(const uint32_t NT) {
- static const struct {
- uint32_t ID;
- const char *Name;
- } Notes[] = {
- {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"},
- {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"},
- {ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"},
- {ELF::NT_TASKSTRUCT, "NT_TASKSTRUCT (task structure)"},
- {ELF::NT_AUXV, "NT_AUXV (auxiliary vector)"},
- {ELF::NT_PSTATUS, "NT_PSTATUS (pstatus structure)"},
- {ELF::NT_FPREGS, "NT_FPREGS (floating point registers)"},
- {ELF::NT_PSINFO, "NT_PSINFO (psinfo structure)"},
- {ELF::NT_LWPSTATUS, "NT_LWPSTATUS (lwpstatus_t structure)"},
- {ELF::NT_LWPSINFO, "NT_LWPSINFO (lwpsinfo_t structure)"},
- {ELF::NT_WIN32PSTATUS, "NT_WIN32PSTATUS (win32_pstatus structure)"},
-
- {ELF::NT_PPC_VMX, "NT_PPC_VMX (ppc Altivec registers)"},
- {ELF::NT_PPC_VSX, "NT_PPC_VSX (ppc VSX registers)"},
- {ELF::NT_PPC_TAR, "NT_PPC_TAR (ppc TAR register)"},
- {ELF::NT_PPC_PPR, "NT_PPC_PPR (ppc PPR register)"},
- {ELF::NT_PPC_DSCR, "NT_PPC_DSCR (ppc DSCR register)"},
- {ELF::NT_PPC_EBB, "NT_PPC_EBB (ppc EBB registers)"},
- {ELF::NT_PPC_PMU, "NT_PPC_PMU (ppc PMU registers)"},
- {ELF::NT_PPC_TM_CGPR, "NT_PPC_TM_CGPR (ppc checkpointed GPR registers)"},
- {ELF::NT_PPC_TM_CFPR,
- "NT_PPC_TM_CFPR (ppc checkpointed floating point registers)"},
- {ELF::NT_PPC_TM_CVMX,
- "NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)"},
- {ELF::NT_PPC_TM_CVSX, "NT_PPC_TM_CVSX (ppc checkpointed VSX registers)"},
- {ELF::NT_PPC_TM_SPR, "NT_PPC_TM_SPR (ppc TM special purpose registers)"},
- {ELF::NT_PPC_TM_CTAR, "NT_PPC_TM_CTAR (ppc checkpointed TAR register)"},
- {ELF::NT_PPC_TM_CPPR, "NT_PPC_TM_CPPR (ppc checkpointed PPR register)"},
- {ELF::NT_PPC_TM_CDSCR,
- "NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)"},
-
- {ELF::NT_386_TLS, "NT_386_TLS (x86 TLS information)"},
- {ELF::NT_386_IOPERM, "NT_386_IOPERM (x86 I/O permissions)"},
- {ELF::NT_X86_XSTATE, "NT_X86_XSTATE (x86 XSAVE extended state)"},
-
- {ELF::NT_S390_HIGH_GPRS,
- "NT_S390_HIGH_GPRS (s390 upper register halves)"},
- {ELF::NT_S390_TIMER, "NT_S390_TIMER (s390 timer register)"},
- {ELF::NT_S390_TODCMP, "NT_S390_TODCMP (s390 TOD comparator register)"},
- {ELF::NT_S390_TODPREG,
- "NT_S390_TODPREG (s390 TOD programmable register)"},
- {ELF::NT_S390_CTRS, "NT_S390_CTRS (s390 control registers)"},
- {ELF::NT_S390_PREFIX, "NT_S390_PREFIX (s390 prefix register)"},
- {ELF::NT_S390_LAST_BREAK,
- "NT_S390_LAST_BREAK (s390 last breaking event address)"},
- {ELF::NT_S390_SYSTEM_CALL,
- "NT_S390_SYSTEM_CALL (s390 system call restart data)"},
- {ELF::NT_S390_TDB, "NT_S390_TDB (s390 transaction diagnostic block)"},
- {ELF::NT_S390_VXRS_LOW,
- "NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)"},
- {ELF::NT_S390_VXRS_HIGH,
- "NT_S390_VXRS_HIGH (s390 vector registers 16-31)"},
- {ELF::NT_S390_GS_CB, "NT_S390_GS_CB (s390 guarded-storage registers)"},
- {ELF::NT_S390_GS_BC,
- "NT_S390_GS_BC (s390 guarded-storage broadcast control)"},
-
- {ELF::NT_ARM_VFP, "NT_ARM_VFP (arm VFP registers)"},
- {ELF::NT_ARM_TLS, "NT_ARM_TLS (AArch TLS registers)"},
- {ELF::NT_ARM_HW_BREAK,
- "NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"},
- {ELF::NT_ARM_HW_WATCH,
- "NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"},
-
- {ELF::NT_FILE, "NT_FILE (mapped files)"},
- {ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"},
- {ELF::NT_SIGINFO, "NT_SIGINFO (siginfo_t data)"},
- };
-
- for (const auto &Note : Notes)
- if (Note.ID == NT)
- return Note.Name;
-
- return "";
-}
-
-static std::string getGNUNoteTypeName(const uint32_t NT) {
- static const struct {
- uint32_t ID;
- const char *Name;
- } Notes[] = {
- {ELF::NT_GNU_ABI_TAG, "NT_GNU_ABI_TAG (ABI version tag)"},
- {ELF::NT_GNU_HWCAP, "NT_GNU_HWCAP (DSO-supplied software HWCAP info)"},
- {ELF::NT_GNU_BUILD_ID, "NT_GNU_BUILD_ID (unique build ID bitstring)"},
- {ELF::NT_GNU_GOLD_VERSION, "NT_GNU_GOLD_VERSION (gold version)"},
- {ELF::NT_GNU_PROPERTY_TYPE_0, "NT_GNU_PROPERTY_TYPE_0 (property note)"},
- };
-
- for (const auto &Note : Notes)
- if (Note.ID == NT)
- return std::string(Note.Name);
-
- std::string string;
- raw_string_ostream OS(string);
- OS << format("Unknown note type (0x%08x)", NT);
- return OS.str();
+static Expected<std::vector<uint64_t>> toULEB128Array(ArrayRef<uint8_t> Data) {
+ std::vector<uint64_t> Ret;
+ const uint8_t *Cur = Data.begin();
+ const uint8_t *End = Data.end();
+ while (Cur != End) {
+ unsigned Size;
+ const char *Err;
+ Ret.push_back(decodeULEB128(Cur, &Size, End, &Err));
+ if (Err)
+ return createError(Err);
+ Cur += Size;
+ }
+ return Ret;
}
-static std::string getFreeBSDNoteTypeName(const uint32_t NT) {
- static const struct {
- uint32_t ID;
- const char *Name;
- } Notes[] = {
- {ELF::NT_FREEBSD_THRMISC, "NT_THRMISC (thrmisc structure)"},
- {ELF::NT_FREEBSD_PROCSTAT_PROC, "NT_PROCSTAT_PROC (proc data)"},
- {ELF::NT_FREEBSD_PROCSTAT_FILES, "NT_PROCSTAT_FILES (files data)"},
- {ELF::NT_FREEBSD_PROCSTAT_VMMAP, "NT_PROCSTAT_VMMAP (vmmap data)"},
- {ELF::NT_FREEBSD_PROCSTAT_GROUPS, "NT_PROCSTAT_GROUPS (groups data)"},
- {ELF::NT_FREEBSD_PROCSTAT_UMASK, "NT_PROCSTAT_UMASK (umask data)"},
- {ELF::NT_FREEBSD_PROCSTAT_RLIMIT, "NT_PROCSTAT_RLIMIT (rlimit data)"},
- {ELF::NT_FREEBSD_PROCSTAT_OSREL, "NT_PROCSTAT_OSREL (osreldate data)"},
- {ELF::NT_FREEBSD_PROCSTAT_PSSTRINGS,
- "NT_PROCSTAT_PSSTRINGS (ps_strings data)"},
- {ELF::NT_FREEBSD_PROCSTAT_AUXV, "NT_PROCSTAT_AUXV (auxv data)"},
- };
-
- for (const auto &Note : Notes)
- if (Note.ID == NT)
- return std::string(Note.Name);
+template <class ELFT>
+static Expected<std::vector<uint64_t>>
+decodeAddrsigSection(const ELFFile<ELFT> &Obj, const typename ELFT::Shdr &Sec) {
+ Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj.getSectionContents(Sec);
+ if (!ContentsOrErr)
+ return ContentsOrErr.takeError();
- std::string string;
- raw_string_ostream OS(string);
- OS << format("Unknown note type (0x%08x)", NT);
- return OS.str();
+ if (Expected<std::vector<uint64_t>> SymsOrErr =
+ toULEB128Array(*ContentsOrErr))
+ return *SymsOrErr;
+ else
+ return createError("unable to decode " + describe(Obj, Sec) + ": " +
+ toString(SymsOrErr.takeError()));
}
-static std::string getAMDNoteTypeName(const uint32_t NT) {
- static const struct {
- uint32_t ID;
- const char *Name;
- } Notes[] = {{ELF::NT_AMD_AMDGPU_HSA_METADATA,
- "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"},
- {ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"},
- {ELF::NT_AMD_AMDGPU_PAL_METADATA,
- "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"}};
-
- for (const auto &Note : Notes)
- if (Note.ID == NT)
- return std::string(Note.Name);
+template <class ELFT> void GNUELFDumper<ELFT>::printAddrsig() {
+ if (!this->DotAddrsigSec)
+ return;
- std::string string;
- raw_string_ostream OS(string);
- OS << format("Unknown note type (0x%08x)", NT);
- return OS.str();
-}
+ Expected<std::vector<uint64_t>> SymsOrErr =
+ decodeAddrsigSection(this->Obj, *this->DotAddrsigSec);
+ if (!SymsOrErr) {
+ this->reportUniqueWarning(SymsOrErr.takeError());
+ return;
+ }
-static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
- if (NT == ELF::NT_AMDGPU_METADATA)
- return std::string("NT_AMDGPU_METADATA (AMDGPU Metadata)");
+ StringRef Name = this->getPrintableSectionName(*this->DotAddrsigSec);
+ OS << "\nAddress-significant symbols section '" << Name << "'"
+ << " contains " << SymsOrErr->size() << " entries:\n";
+ OS << " Num: Name\n";
- std::string string;
- raw_string_ostream OS(string);
- OS << format("Unknown note type (0x%08x)", NT);
- return OS.str();
+ Field Fields[2] = {0, 8};
+ size_t SymIndex = 0;
+ for (uint64_t Sym : *SymsOrErr) {
+ Fields[0].Str = to_string(format_decimal(++SymIndex, 6)) + ":";
+ Fields[1].Str = this->getStaticSymbolName(Sym);
+ for (const Field &Entry : Fields)
+ printField(Entry);
+ OS << "\n";
+ }
}
template <typename ELFT>
@@ -5277,7 +4867,7 @@ template <typename ELFT> static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
static std::string getGNUBuildId(ArrayRef<uint8_t> Desc) {
std::string str;
raw_string_ostream OS(str);
- for (const auto &B : Desc)
+ for (uint8_t B : Desc)
OS << format_hex_no_prefix(B, 2);
return OS.str();
}
@@ -5309,7 +4899,7 @@ static void printGNUNote(raw_ostream &OS, uint32_t NoteType,
break;
case ELF::NT_GNU_PROPERTY_TYPE_0:
OS << " Properties:";
- for (const auto &Property : getGNUPropertyList<ELFT>(Desc))
+ for (const std::string &Property : getGNUPropertyList<ELFT>(Desc))
OS << " " << Property << "\n";
break;
}
@@ -5348,17 +4938,17 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
default:
return {"", ""};
case ELF::NT_AMDGPU_METADATA: {
- auto MsgPackString =
+ StringRef MsgPackString =
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
msgpack::Document MsgPackDoc;
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+ std::string HSAMetadataString;
if (!Verifier.verify(MsgPackDoc.getRoot()))
- return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+ HSAMetadataString = "Invalid AMDGPU Metadata\n";
- std::string HSAMetadataString;
raw_string_ostream StrOS(HSAMetadataString);
MsgPackDoc.toYAML(StrOS);
@@ -5389,19 +4979,20 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
const int Bytes = Desc.getAddressSize();
if (!Desc.isValidOffsetForAddress(2))
- return createStringError(object_error::parse_failed,
- "malformed note: header too short");
+ return createError("the note of size 0x" + Twine::utohexstr(Desc.size()) +
+ " is too short, expected at least 0x" +
+ Twine::utohexstr(Bytes * 2));
if (Desc.getData().back() != 0)
- return createStringError(object_error::parse_failed,
- "malformed note: not NUL terminated");
+ return createError("the note is not NUL terminated");
uint64_t DescOffset = 0;
uint64_t FileCount = Desc.getAddress(&DescOffset);
Ret.PageSize = Desc.getAddress(&DescOffset);
if (!Desc.isValidOffsetForAddress(3 * FileCount * Bytes))
- return createStringError(object_error::parse_failed,
- "malformed note: too short for number of files");
+ return createError("unable to read file mappings (found " +
+ Twine(FileCount) + "): the note of size 0x" +
+ Twine::utohexstr(Desc.size()) + " is too short");
uint64_t FilenamesOffset = 0;
DataExtractor Filenames(
@@ -5409,10 +5000,14 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
Desc.isLittleEndian(), Desc.getAddressSize());
Ret.Mappings.resize(FileCount);
+ size_t I = 0;
for (CoreFileMapping &Mapping : Ret.Mappings) {
+ ++I;
if (!Filenames.isValidOffsetForDataOfSize(FilenamesOffset, 1))
- return createStringError(object_error::parse_failed,
- "malformed note: too few filenames");
+ return createError(
+ "unable to read the file name for the mapping with index " +
+ Twine(I) + ": the note of size 0x" + Twine::utohexstr(Desc.size()) +
+ " is truncated");
Mapping.Start = Desc.getAddress(&DescOffset);
Mapping.End = Desc.getAddress(&DescOffset);
Mapping.Offset = Desc.getAddress(&DescOffset);
@@ -5439,8 +5034,205 @@ static void printCoreNote(raw_ostream &OS, const CoreNote &Note) {
}
}
+static const NoteType GenericNoteTypes[] = {
+ {ELF::NT_VERSION, "NT_VERSION (version)"},
+ {ELF::NT_ARCH, "NT_ARCH (architecture)"},
+ {ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"},
+ {ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"},
+};
+
+static const NoteType GNUNoteTypes[] = {
+ {ELF::NT_GNU_ABI_TAG, "NT_GNU_ABI_TAG (ABI version tag)"},
+ {ELF::NT_GNU_HWCAP, "NT_GNU_HWCAP (DSO-supplied software HWCAP info)"},
+ {ELF::NT_GNU_BUILD_ID, "NT_GNU_BUILD_ID (unique build ID bitstring)"},
+ {ELF::NT_GNU_GOLD_VERSION, "NT_GNU_GOLD_VERSION (gold version)"},
+ {ELF::NT_GNU_PROPERTY_TYPE_0, "NT_GNU_PROPERTY_TYPE_0 (property note)"},
+};
+
+static const NoteType FreeBSDNoteTypes[] = {
+ {ELF::NT_FREEBSD_THRMISC, "NT_THRMISC (thrmisc structure)"},
+ {ELF::NT_FREEBSD_PROCSTAT_PROC, "NT_PROCSTAT_PROC (proc data)"},
+ {ELF::NT_FREEBSD_PROCSTAT_FILES, "NT_PROCSTAT_FILES (files data)"},
+ {ELF::NT_FREEBSD_PROCSTAT_VMMAP, "NT_PROCSTAT_VMMAP (vmmap data)"},
+ {ELF::NT_FREEBSD_PROCSTAT_GROUPS, "NT_PROCSTAT_GROUPS (groups data)"},
+ {ELF::NT_FREEBSD_PROCSTAT_UMASK, "NT_PROCSTAT_UMASK (umask data)"},
+ {ELF::NT_FREEBSD_PROCSTAT_RLIMIT, "NT_PROCSTAT_RLIMIT (rlimit data)"},
+ {ELF::NT_FREEBSD_PROCSTAT_OSREL, "NT_PROCSTAT_OSREL (osreldate data)"},
+ {ELF::NT_FREEBSD_PROCSTAT_PSSTRINGS,
+ "NT_PROCSTAT_PSSTRINGS (ps_strings data)"},
+ {ELF::NT_FREEBSD_PROCSTAT_AUXV, "NT_PROCSTAT_AUXV (auxv data)"},
+};
+
+static const NoteType AMDNoteTypes[] = {
+ {ELF::NT_AMD_AMDGPU_HSA_METADATA,
+ "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"},
+ {ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"},
+ {ELF::NT_AMD_AMDGPU_PAL_METADATA,
+ "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"},
+};
+
+static const NoteType AMDGPUNoteTypes[] = {
+ {ELF::NT_AMDGPU_METADATA, "NT_AMDGPU_METADATA (AMDGPU Metadata)"},
+};
+
+static const NoteType CoreNoteTypes[] = {
+ {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"},
+ {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"},
+ {ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"},
+ {ELF::NT_TASKSTRUCT, "NT_TASKSTRUCT (task structure)"},
+ {ELF::NT_AUXV, "NT_AUXV (auxiliary vector)"},
+ {ELF::NT_PSTATUS, "NT_PSTATUS (pstatus structure)"},
+ {ELF::NT_FPREGS, "NT_FPREGS (floating point registers)"},
+ {ELF::NT_PSINFO, "NT_PSINFO (psinfo structure)"},
+ {ELF::NT_LWPSTATUS, "NT_LWPSTATUS (lwpstatus_t structure)"},
+ {ELF::NT_LWPSINFO, "NT_LWPSINFO (lwpsinfo_t structure)"},
+ {ELF::NT_WIN32PSTATUS, "NT_WIN32PSTATUS (win32_pstatus structure)"},
+
+ {ELF::NT_PPC_VMX, "NT_PPC_VMX (ppc Altivec registers)"},
+ {ELF::NT_PPC_VSX, "NT_PPC_VSX (ppc VSX registers)"},
+ {ELF::NT_PPC_TAR, "NT_PPC_TAR (ppc TAR register)"},
+ {ELF::NT_PPC_PPR, "NT_PPC_PPR (ppc PPR register)"},
+ {ELF::NT_PPC_DSCR, "NT_PPC_DSCR (ppc DSCR register)"},
+ {ELF::NT_PPC_EBB, "NT_PPC_EBB (ppc EBB registers)"},
+ {ELF::NT_PPC_PMU, "NT_PPC_PMU (ppc PMU registers)"},
+ {ELF::NT_PPC_TM_CGPR, "NT_PPC_TM_CGPR (ppc checkpointed GPR registers)"},
+ {ELF::NT_PPC_TM_CFPR,
+ "NT_PPC_TM_CFPR (ppc checkpointed floating point registers)"},
+ {ELF::NT_PPC_TM_CVMX,
+ "NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)"},
+ {ELF::NT_PPC_TM_CVSX, "NT_PPC_TM_CVSX (ppc checkpointed VSX registers)"},
+ {ELF::NT_PPC_TM_SPR, "NT_PPC_TM_SPR (ppc TM special purpose registers)"},
+ {ELF::NT_PPC_TM_CTAR, "NT_PPC_TM_CTAR (ppc checkpointed TAR register)"},
+ {ELF::NT_PPC_TM_CPPR, "NT_PPC_TM_CPPR (ppc checkpointed PPR register)"},
+ {ELF::NT_PPC_TM_CDSCR, "NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)"},
+
+ {ELF::NT_386_TLS, "NT_386_TLS (x86 TLS information)"},
+ {ELF::NT_386_IOPERM, "NT_386_IOPERM (x86 I/O permissions)"},
+ {ELF::NT_X86_XSTATE, "NT_X86_XSTATE (x86 XSAVE extended state)"},
+
+ {ELF::NT_S390_HIGH_GPRS, "NT_S390_HIGH_GPRS (s390 upper register halves)"},
+ {ELF::NT_S390_TIMER, "NT_S390_TIMER (s390 timer register)"},
+ {ELF::NT_S390_TODCMP, "NT_S390_TODCMP (s390 TOD comparator register)"},
+ {ELF::NT_S390_TODPREG, "NT_S390_TODPREG (s390 TOD programmable register)"},
+ {ELF::NT_S390_CTRS, "NT_S390_CTRS (s390 control registers)"},
+ {ELF::NT_S390_PREFIX, "NT_S390_PREFIX (s390 prefix register)"},
+ {ELF::NT_S390_LAST_BREAK,
+ "NT_S390_LAST_BREAK (s390 last breaking event address)"},
+ {ELF::NT_S390_SYSTEM_CALL,
+ "NT_S390_SYSTEM_CALL (s390 system call restart data)"},
+ {ELF::NT_S390_TDB, "NT_S390_TDB (s390 transaction diagnostic block)"},
+ {ELF::NT_S390_VXRS_LOW,
+ "NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)"},
+ {ELF::NT_S390_VXRS_HIGH, "NT_S390_VXRS_HIGH (s390 vector registers 16-31)"},
+ {ELF::NT_S390_GS_CB, "NT_S390_GS_CB (s390 guarded-storage registers)"},
+ {ELF::NT_S390_GS_BC,
+ "NT_S390_GS_BC (s390 guarded-storage broadcast control)"},
+
+ {ELF::NT_ARM_VFP, "NT_ARM_VFP (arm VFP registers)"},
+ {ELF::NT_ARM_TLS, "NT_ARM_TLS (AArch TLS registers)"},
+ {ELF::NT_ARM_HW_BREAK,
+ "NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"},
+ {ELF::NT_ARM_HW_WATCH,
+ "NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"},
+
+ {ELF::NT_FILE, "NT_FILE (mapped files)"},
+ {ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"},
+ {ELF::NT_SIGINFO, "NT_SIGINFO (siginfo_t data)"},
+};
+
+template <class ELFT>
+const StringRef getNoteTypeName(const typename ELFT::Note &Note,
+ unsigned ELFType) {
+ uint32_t Type = Note.getType();
+ auto FindNote = [&](ArrayRef<NoteType> V) -> StringRef {
+ for (const NoteType &N : V)
+ if (N.ID == Type)
+ return N.Name;
+ return "";
+ };
+
+ StringRef Name = Note.getName();
+ if (Name == "GNU")
+ return FindNote(GNUNoteTypes);
+ if (Name == "FreeBSD")
+ return FindNote(FreeBSDNoteTypes);
+ if (Name == "AMD")
+ return FindNote(AMDNoteTypes);
+ if (Name == "AMDGPU")
+ return FindNote(AMDGPUNoteTypes);
+
+ if (ELFType == ELF::ET_CORE)
+ return FindNote(CoreNoteTypes);
+ return FindNote(GenericNoteTypes);
+}
+
template <class ELFT>
-void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
+static void printNotesHelper(
+ const ELFDumper<ELFT> &Dumper,
+ llvm::function_ref<void(Optional<StringRef>, typename ELFT::Off,
+ typename ELFT::Addr)>
+ StartNotesFn,
+ llvm::function_ref<Error(const typename ELFT::Note &)> ProcessNoteFn,
+ llvm::function_ref<void()> FinishNotesFn) {
+ const ELFFile<ELFT> &Obj = Dumper.getElfObject().getELFFile();
+
+ ArrayRef<typename ELFT::Shdr> Sections = cantFail(Obj.sections());
+ if (Obj.getHeader().e_type != ELF::ET_CORE && !Sections.empty()) {
+ for (const typename ELFT::Shdr &S : Sections) {
+ if (S.sh_type != SHT_NOTE)
+ continue;
+ StartNotesFn(expectedToOptional(Obj.getSectionName(S)), S.sh_offset,
+ S.sh_size);
+ Error Err = Error::success();
+ size_t I = 0;
+ for (const typename ELFT::Note Note : Obj.notes(S, Err)) {
+ if (Error E = ProcessNoteFn(Note))
+ Dumper.reportUniqueWarning(
+ "unable to read note with index " + Twine(I) + " from the " +
+ describe(Obj, S) + ": " + toString(std::move(E)));
+ ++I;
+ }
+ if (Err)
+ Dumper.reportUniqueWarning("unable to read notes from the " +
+ describe(Obj, S) + ": " +
+ toString(std::move(Err)));
+ FinishNotesFn();
+ }
+ return;
+ }
+
+ Expected<ArrayRef<typename ELFT::Phdr>> PhdrsOrErr = Obj.program_headers();
+ if (!PhdrsOrErr) {
+ Dumper.reportUniqueWarning(
+ "unable to read program headers to locate the PT_NOTE segment: " +
+ toString(PhdrsOrErr.takeError()));
+ return;
+ }
+
+ for (size_t I = 0, E = (*PhdrsOrErr).size(); I != E; ++I) {
+ const typename ELFT::Phdr &P = (*PhdrsOrErr)[I];
+ if (P.p_type != PT_NOTE)
+ continue;
+ StartNotesFn(/*SecName=*/None, P.p_offset, P.p_filesz);
+ Error Err = Error::success();
+ size_t Index = 0;
+ for (const typename ELFT::Note Note : Obj.notes(P, Err)) {
+ if (Error E = ProcessNoteFn(Note))
+ Dumper.reportUniqueWarning("unable to read note with index " +
+ Twine(Index) +
+ " from the PT_NOTE segment with index " +
+ Twine(I) + ": " + toString(std::move(E)));
+ ++Index;
+ }
+ if (Err)
+ Dumper.reportUniqueWarning(
+ "unable to read notes from the PT_NOTE segment with index " +
+ Twine(I) + ": " + toString(std::move(Err)));
+ FinishNotesFn();
+ }
+}
+
+template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
auto PrintHeader = [&](Optional<StringRef> SecName,
const typename ELFT::Off Offset,
const typename ELFT::Addr Size) {
@@ -5455,7 +5247,7 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
OS << " Owner Data size \tDescription\n";
};
- auto ProcessNote = [&](const Elf_Note &Note) {
+ auto ProcessNote = [&](const Elf_Note &Note) -> Error {
StringRef Name = Note.getName();
ArrayRef<uint8_t> Descriptor = Note.getDesc();
Elf_Word Type = Note.getType();
@@ -5463,23 +5255,13 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
// Print the note owner/type.
OS << " " << left_justify(Name, 20) << ' '
<< format_hex(Descriptor.size(), 10) << '\t';
- if (Name == "GNU") {
- OS << getGNUNoteTypeName(Type) << '\n';
- } else if (Name == "FreeBSD") {
- OS << getFreeBSDNoteTypeName(Type) << '\n';
- } else if (Name == "AMD") {
- OS << getAMDNoteTypeName(Type) << '\n';
- } else if (Name == "AMDGPU") {
- OS << getAMDGPUNoteTypeName(Type) << '\n';
- } else {
- StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE
- ? getCoreNoteTypeName(Type)
- : getGenericNoteTypeName(Type);
- if (!NoteType.empty())
- OS << NoteType << '\n';
- else
- OS << "Unknown note type: (" << format_hex(Type, 10) << ")\n";
- }
+
+ StringRef NoteType =
+ getNoteTypeName<ELFT>(Note, this->Obj.getHeader().e_type);
+ if (!NoteType.empty())
+ OS << NoteType << '\n';
+ else
+ OS << "Unknown note type: (" << format_hex(Type, 10) << ")\n";
// Print the description, or fallback to printing raw bytes for unknown
// owners.
@@ -5498,11 +5280,10 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
DataExtractor DescExtractor(Descriptor,
ELFT::TargetEndianness == support::little,
sizeof(Elf_Addr));
- Expected<CoreNote> Note = readCoreNote(DescExtractor);
- if (Note)
- printCoreNote<ELFT>(OS, *Note);
+ if (Expected<CoreNote> NoteOrErr = readCoreNote(DescExtractor))
+ printCoreNote<ELFT>(OS, *NoteOrErr);
else
- reportWarning(Note.takeError(), this->FileName);
+ return NoteOrErr.takeError();
}
} else if (!Descriptor.empty()) {
OS << " description data:";
@@ -5510,68 +5291,34 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
OS << " " << format("%02x", B);
OS << '\n';
}
+ return Error::success();
};
- ArrayRef<Elf_Shdr> Sections = cantFail(Obj->sections());
- if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) {
- for (const auto &S : Sections) {
- if (S.sh_type != SHT_NOTE)
- continue;
- PrintHeader(expectedToOptional(Obj->getSectionName(&S)), S.sh_offset,
- S.sh_size);
- Error Err = Error::success();
- for (auto Note : Obj->notes(S, Err))
- ProcessNote(Note);
- if (Err)
- reportError(std::move(Err), this->FileName);
- }
- } else {
- Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
- if (!PhdrsOrErr) {
- this->reportUniqueWarning(createError(
- "unable to read program headers to locate the PT_NOTE segment: " +
- toString(PhdrsOrErr.takeError())));
- return;
- }
-
- for (const Elf_Phdr &P : *PhdrsOrErr) {
- if (P.p_type != PT_NOTE)
- continue;
- PrintHeader(/*SecName=*/None, P.p_offset, P.p_filesz);
- Error Err = Error::success();
- for (auto Note : Obj->notes(P, Err))
- ProcessNote(Note);
- if (Err)
- reportError(std::move(Err), this->FileName);
- }
- }
+ printNotesHelper(*this, PrintHeader, ProcessNote, []() {});
}
-template <class ELFT>
-void GNUStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
+template <class ELFT> void GNUELFDumper<ELFT>::printELFLinkerOptions() {
OS << "printELFLinkerOptions not implemented!\n";
}
template <class ELFT>
-void DumpStyle<ELFT>::printDependentLibsHelper(
- const ELFFile<ELFT> *Obj,
+void ELFDumper<ELFT>::printDependentLibsHelper(
function_ref<void(const Elf_Shdr &)> OnSectionStart,
function_ref<void(StringRef, uint64_t)> OnLibEntry) {
auto Warn = [this](unsigned SecNdx, StringRef Msg) {
- this->reportUniqueWarning(
- createError("SHT_LLVM_DEPENDENT_LIBRARIES section at index " +
- Twine(SecNdx) + " is broken: " + Msg));
+ this->reportUniqueWarning("SHT_LLVM_DEPENDENT_LIBRARIES section at index " +
+ Twine(SecNdx) + " is broken: " + Msg);
};
unsigned I = -1;
- for (const Elf_Shdr &Shdr : cantFail(Obj->sections())) {
+ for (const Elf_Shdr &Shdr : cantFail(Obj.sections())) {
++I;
if (Shdr.sh_type != ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
continue;
OnSectionStart(Shdr);
- Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj->getSectionContents(&Shdr);
+ Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj.getSectionContents(Shdr);
if (!ContentsOrErr) {
Warn(I, toString(ContentsOrErr.takeError()));
continue;
@@ -5592,7 +5339,93 @@ void DumpStyle<ELFT>::printDependentLibsHelper(
}
template <class ELFT>
-void GNUStyle<ELFT>::printDependentLibs(const ELFFile<ELFT> *Obj) {
+void ELFDumper<ELFT>::forEachRelocationDo(
+ const Elf_Shdr &Sec, bool RawRelr,
+ llvm::function_ref<void(const Relocation<ELFT> &, unsigned,
+ const Elf_Shdr &, const Elf_Shdr *)>
+ RelRelaFn,
+ llvm::function_ref<void(const Elf_Relr &)> RelrFn) {
+ auto Warn = [&](Error &&E,
+ const Twine &Prefix = "unable to read relocations from") {
+ this->reportUniqueWarning(Prefix + " " + describe(Sec) + ": " +
+ toString(std::move(E)));
+ };
+
+ // SHT_RELR/SHT_ANDROID_RELR sections do not have an associated symbol table.
+ // For them we should not treat the value of the sh_link field as an index of
+ // a symbol table.
+ const Elf_Shdr *SymTab;
+ if (Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_RELR) {
+ Expected<const Elf_Shdr *> SymTabOrErr = Obj.getSection(Sec.sh_link);
+ if (!SymTabOrErr) {
+ Warn(SymTabOrErr.takeError(), "unable to locate a symbol table for");
+ return;
+ }
+ SymTab = *SymTabOrErr;
+ }
+
+ unsigned RelNdx = 0;
+ const bool IsMips64EL = this->Obj.isMips64EL();
+ switch (Sec.sh_type) {
+ case ELF::SHT_REL:
+ if (Expected<Elf_Rel_Range> RangeOrErr = Obj.rels(Sec)) {
+ for (const Elf_Rel &R : *RangeOrErr)
+ RelRelaFn(Relocation<ELFT>(R, IsMips64EL), RelNdx++, Sec, SymTab);
+ } else {
+ Warn(RangeOrErr.takeError());
+ }
+ break;
+ case ELF::SHT_RELA:
+ if (Expected<Elf_Rela_Range> RangeOrErr = Obj.relas(Sec)) {
+ for (const Elf_Rela &R : *RangeOrErr)
+ RelRelaFn(Relocation<ELFT>(R, IsMips64EL), RelNdx++, Sec, SymTab);
+ } else {
+ Warn(RangeOrErr.takeError());
+ }
+ break;
+ case ELF::SHT_RELR:
+ case ELF::SHT_ANDROID_RELR: {
+ Expected<Elf_Relr_Range> RangeOrErr = Obj.relrs(Sec);
+ if (!RangeOrErr) {
+ Warn(RangeOrErr.takeError());
+ break;
+ }
+ if (RawRelr) {
+ for (const Elf_Relr &R : *RangeOrErr)
+ RelrFn(R);
+ break;
+ }
+
+ for (const Elf_Rel &R : Obj.decode_relrs(*RangeOrErr))
+ RelRelaFn(Relocation<ELFT>(R, IsMips64EL), RelNdx++, Sec,
+ /*SymTab=*/nullptr);
+ break;
+ }
+ case ELF::SHT_ANDROID_REL:
+ case ELF::SHT_ANDROID_RELA:
+ if (Expected<std::vector<Elf_Rela>> RelasOrErr = Obj.android_relas(Sec)) {
+ for (const Elf_Rela &R : *RelasOrErr)
+ RelRelaFn(Relocation<ELFT>(R, IsMips64EL), RelNdx++, Sec, SymTab);
+ } else {
+ Warn(RelasOrErr.takeError());
+ }
+ break;
+ }
+}
+
+template <class ELFT>
+StringRef ELFDumper<ELFT>::getPrintableSectionName(const Elf_Shdr &Sec) const {
+ StringRef Name = "<?>";
+ if (Expected<StringRef> SecNameOrErr =
+ Obj.getSectionName(Sec, this->WarningHandler))
+ Name = *SecNameOrErr;
+ else
+ this->reportUniqueWarning("unable to get the name of " + describe(Sec) +
+ ": " + toString(SecNameOrErr.takeError()));
+ return Name;
+}
+
+template <class ELFT> void GNUELFDumper<ELFT>::printDependentLibs() {
bool SectionStarted = false;
struct NameOffset {
StringRef Name;
@@ -5605,7 +5438,7 @@ void GNUStyle<ELFT>::printDependentLibs(const ELFFile<ELFT> *Obj) {
<< format_hex(Current.Offset, 1) << " contains " << SecEntries.size()
<< " entries:\n";
for (NameOffset Entry : SecEntries)
- OS << " [" << format("%6tx", Entry.Offset) << "] " << Entry.Name
+ OS << " [" << format("%6" PRIx64, Entry.Offset) << "] " << Entry.Name
<< "\n";
OS << "\n";
SecEntries.clear();
@@ -5616,104 +5449,94 @@ void GNUStyle<ELFT>::printDependentLibs(const ELFFile<ELFT> *Obj) {
PrintSection();
SectionStarted = true;
Current.Offset = Shdr.sh_offset;
- Expected<StringRef> Name = Obj->getSectionName(&Shdr);
- if (!Name) {
- Current.Name = "<?>";
- this->reportUniqueWarning(
- createError("cannot get section name of "
- "SHT_LLVM_DEPENDENT_LIBRARIES section: " +
- toString(Name.takeError())));
- } else {
- Current.Name = *Name;
- }
+ Current.Name = this->getPrintableSectionName(Shdr);
};
auto OnLibEntry = [&](StringRef Lib, uint64_t Offset) {
SecEntries.push_back(NameOffset{Lib, Offset});
};
- this->printDependentLibsHelper(Obj, OnSectionStart, OnLibEntry);
+ this->printDependentLibsHelper(OnSectionStart, OnLibEntry);
if (SectionStarted)
PrintSection();
}
-// Used for printing section names in places where possible errors can be
-// ignored.
-static StringRef getSectionName(const SectionRef &Sec) {
- Expected<StringRef> NameOrErr = Sec.getName();
- if (NameOrErr)
- return *NameOrErr;
- consumeError(NameOrErr.takeError());
- return "<?>";
-}
+template <class ELFT>
+bool ELFDumper<ELFT>::printFunctionStackSize(
+ uint64_t SymValue, Optional<const Elf_Shdr *> FunctionSec,
+ const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset) {
+ uint32_t FuncSymIndex = 0;
+ if (this->DotSymtabSec) {
+ if (Expected<Elf_Sym_Range> SymsOrError = Obj.symbols(this->DotSymtabSec)) {
+ uint32_t Index = (uint32_t)-1;
+ for (const Elf_Sym &Sym : *SymsOrError) {
+ ++Index;
+
+ if (Sym.st_shndx == ELF::SHN_UNDEF || Sym.getType() != ELF::STT_FUNC)
+ continue;
+
+ if (Expected<uint64_t> SymAddrOrErr =
+ ObjF.toSymbolRef(this->DotSymtabSec, Index).getAddress()) {
+ if (SymValue != *SymAddrOrErr)
+ continue;
+ } else {
+ std::string Name = this->getStaticSymbolName(Index);
+ reportUniqueWarning("unable to get address of symbol '" + Name +
+ "': " + toString(SymAddrOrErr.takeError()));
+ break;
+ }
-// Used for printing symbol names in places where possible errors can be
-// ignored.
-static std::string getSymbolName(const ELFSymbolRef &Sym) {
- Expected<StringRef> NameOrErr = Sym.getName();
- if (NameOrErr)
- return maybeDemangle(*NameOrErr);
- consumeError(NameOrErr.takeError());
- return "<?>";
-}
+ // Check if the symbol is in the right section. FunctionSec == None
+ // means "any section".
+ if (FunctionSec) {
+ if (Expected<const Elf_Shdr *> SecOrErr =
+ Obj.getSection(Sym, this->DotSymtabSec,
+ this->getShndxTable(this->DotSymtabSec))) {
+ if (*FunctionSec != *SecOrErr)
+ continue;
+ } else {
+ std::string Name = this->getStaticSymbolName(Index);
+ // Note: it is impossible to trigger this error currently, it is
+ // untested.
+ reportUniqueWarning("unable to get section of symbol '" + Name +
+ "': " + toString(SecOrErr.takeError()));
+ break;
+ }
+ }
-template <class ELFT>
-void DumpStyle<ELFT>::printFunctionStackSize(const ELFObjectFile<ELFT> *Obj,
- uint64_t SymValue,
- Optional<SectionRef> FunctionSec,
- const StringRef SectionName,
- DataExtractor Data,
- uint64_t *Offset) {
- // This function ignores potentially erroneous input, unless it is directly
- // related to stack size reporting.
- SymbolRef FuncSym;
- for (const ELFSymbolRef &Symbol : Obj->symbols()) {
- Expected<uint64_t> SymAddrOrErr = Symbol.getAddress();
- if (!SymAddrOrErr) {
- consumeError(SymAddrOrErr.takeError());
- continue;
- }
- if (Expected<uint32_t> SymFlags = Symbol.getFlags()) {
- if (*SymFlags & SymbolRef::SF_Undefined)
- continue;
- } else
- consumeError(SymFlags.takeError());
- if (Symbol.getELFType() == ELF::STT_FUNC && *SymAddrOrErr == SymValue) {
- // Check if the symbol is in the right section. FunctionSec == None means
- // "any section".
- if (!FunctionSec || FunctionSec->containsSymbol(Symbol)) {
- FuncSym = Symbol;
+ FuncSymIndex = Index;
break;
}
+ } else {
+ reportUniqueWarning("unable to read the symbol table: " +
+ toString(SymsOrError.takeError()));
}
}
std::string FuncName = "?";
- // A valid SymbolRef has a non-null object file pointer.
- if (FuncSym.BasicSymbolRef::getObject())
- FuncName = getSymbolName(FuncSym);
+ if (!FuncSymIndex)
+ reportUniqueWarning(
+ "could not identify function symbol for stack size entry in " +
+ describe(StackSizeSec));
else
- reportWarning(
- createError("could not identify function symbol for stack size entry"),
- Obj->getFileName());
+ FuncName = this->getStaticSymbolName(FuncSymIndex);
// Extract the size. The expectation is that Offset is pointing to the right
// place, i.e. past the function address.
- uint64_t PrevOffset = *Offset;
- uint64_t StackSize = Data.getULEB128(Offset);
- // getULEB128() does not advance Offset if it is not able to extract a valid
- // integer.
- if (*Offset == PrevOffset)
- reportError(
- createStringError(object_error::parse_failed,
- "could not extract a valid stack size in section %s",
- SectionName.data()),
- Obj->getFileName());
-
+ Error Err = Error::success();
+ uint64_t StackSize = Data.getULEB128(Offset, &Err);
+ if (Err) {
+ reportUniqueWarning("could not extract a valid stack size from " +
+ describe(StackSizeSec) + ": " +
+ toString(std::move(Err)));
+ return false;
+ }
printStackSizeEntry(StackSize, FuncName);
+ return true;
}
template <class ELFT>
-void GNUStyle<ELFT>::printStackSizeEntry(uint64_t Size, StringRef FuncName) {
+void GNUELFDumper<ELFT>::printStackSizeEntry(uint64_t Size,
+ StringRef FuncName) {
OS.PadToColumn(2);
OS << format_decimal(Size, 11);
OS.PadToColumn(18);
@@ -5721,108 +5544,98 @@ void GNUStyle<ELFT>::printStackSizeEntry(uint64_t Size, StringRef FuncName) {
}
template <class ELFT>
-void DumpStyle<ELFT>::printStackSize(const ELFObjectFile<ELFT> *Obj,
- RelocationRef Reloc,
- SectionRef FunctionSec,
- const StringRef &StackSizeSectionName,
+void ELFDumper<ELFT>::printStackSize(const Relocation<ELFT> &R,
+ const Elf_Shdr &RelocSec, unsigned Ndx,
+ const Elf_Shdr *SymTab,
+ const Elf_Shdr *FunctionSec,
+ const Elf_Shdr &StackSizeSec,
const RelocationResolver &Resolver,
DataExtractor Data) {
// This function ignores potentially erroneous input, unless it is directly
// related to stack size reporting.
- object::symbol_iterator RelocSym = Reloc.getSymbol();
+ const Elf_Sym *Sym = nullptr;
+ Expected<RelSymbol<ELFT>> TargetOrErr = this->getRelocationTarget(R, SymTab);
+ if (!TargetOrErr)
+ reportUniqueWarning("unable to get the target of relocation with index " +
+ Twine(Ndx) + " in " + describe(RelocSec) + ": " +
+ toString(TargetOrErr.takeError()));
+ else
+ Sym = TargetOrErr->Sym;
+
uint64_t RelocSymValue = 0;
- StringRef FileStr = Obj->getFileName();
- if (RelocSym != Obj->symbol_end()) {
- // Ensure that the relocation symbol is in the function section, i.e. the
- // section where the functions whose stack sizes we are reporting are
- // located.
- auto SectionOrErr = RelocSym->getSection();
+ if (Sym) {
+ Expected<const Elf_Shdr *> SectionOrErr =
+ this->Obj.getSection(*Sym, SymTab, this->getShndxTable(SymTab));
if (!SectionOrErr) {
- reportWarning(
- createError("cannot identify the section for relocation symbol '" +
- getSymbolName(*RelocSym) + "'"),
- FileStr);
- consumeError(SectionOrErr.takeError());
+ reportUniqueWarning(
+ "cannot identify the section for relocation symbol '" +
+ (*TargetOrErr).Name + "': " + toString(SectionOrErr.takeError()));
} else if (*SectionOrErr != FunctionSec) {
- reportWarning(createError("relocation symbol '" +
- getSymbolName(*RelocSym) +
- "' is not in the expected section"),
- FileStr);
+ reportUniqueWarning("relocation symbol '" + (*TargetOrErr).Name +
+ "' is not in the expected section");
// Pretend that the symbol is in the correct section and report its
// stack size anyway.
- FunctionSec = **SectionOrErr;
+ FunctionSec = *SectionOrErr;
}
- Expected<uint64_t> RelocSymValueOrErr = RelocSym->getValue();
- if (RelocSymValueOrErr)
- RelocSymValue = *RelocSymValueOrErr;
- else
- consumeError(RelocSymValueOrErr.takeError());
+ RelocSymValue = Sym->st_value;
}
- uint64_t Offset = Reloc.getOffset();
- if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1))
- reportError(
- createStringError(object_error::parse_failed,
- "found invalid relocation offset into section %s "
- "while trying to extract a stack size entry",
- StackSizeSectionName.data()),
- FileStr);
+ uint64_t Offset = R.Offset;
+ if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) {
+ reportUniqueWarning("found invalid relocation offset (0x" +
+ Twine::utohexstr(Offset) + ") into " +
+ describe(StackSizeSec) +
+ " while trying to extract a stack size entry");
+ return;
+ }
- uint64_t Addend = Data.getAddress(&Offset);
- uint64_t SymValue = Resolver(Reloc, RelocSymValue, Addend);
- this->printFunctionStackSize(Obj, SymValue, FunctionSec, StackSizeSectionName,
- Data, &Offset);
+ uint64_t SymValue =
+ Resolver(R.Type, Offset, RelocSymValue, Data.getAddress(&Offset),
+ R.Addend.getValueOr(0));
+ this->printFunctionStackSize(SymValue, FunctionSec, StackSizeSec, Data,
+ &Offset);
}
template <class ELFT>
-void DumpStyle<ELFT>::printNonRelocatableStackSizes(
- const ELFObjectFile<ELFT> *Obj, std::function<void()> PrintHeader) {
+void ELFDumper<ELFT>::printNonRelocatableStackSizes(
+ std::function<void()> PrintHeader) {
// This function ignores potentially erroneous input, unless it is directly
// related to stack size reporting.
- const ELFFile<ELFT> *EF = Obj->getELFFile();
- StringRef FileStr = Obj->getFileName();
- for (const SectionRef &Sec : Obj->sections()) {
- StringRef SectionName = getSectionName(Sec);
- if (SectionName != ".stack_sizes")
+ for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
+ if (this->getPrintableSectionName(Sec) != ".stack_sizes")
continue;
PrintHeader();
- const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl());
ArrayRef<uint8_t> Contents =
- unwrapOrError(this->FileName, EF->getSectionContents(ElfSec));
- DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr));
+ unwrapOrError(this->FileName, Obj.getSectionContents(Sec));
+ DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr));
uint64_t Offset = 0;
while (Offset < Contents.size()) {
// The function address is followed by a ULEB representing the stack
// size. Check for an extra byte before we try to process the entry.
if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) {
- reportError(
- createStringError(
- object_error::parse_failed,
- "section %s ended while trying to extract a stack size entry",
- SectionName.data()),
- FileStr);
+ reportUniqueWarning(
+ describe(Sec) +
+ " ended while trying to extract a stack size entry");
+ break;
}
uint64_t SymValue = Data.getAddress(&Offset);
- printFunctionStackSize(Obj, SymValue, /*FunctionSec=*/None, SectionName,
- Data, &Offset);
+ if (!printFunctionStackSize(SymValue, /*FunctionSec=*/None, Sec, Data,
+ &Offset))
+ break;
}
}
}
template <class ELFT>
-void DumpStyle<ELFT>::printRelocatableStackSizes(
- const ELFObjectFile<ELFT> *Obj, std::function<void()> PrintHeader) {
- const ELFFile<ELFT> *EF = Obj->getELFFile();
-
+void ELFDumper<ELFT>::printRelocatableStackSizes(
+ std::function<void()> PrintHeader) {
// Build a map between stack size sections and their corresponding relocation
// sections.
- llvm::MapVector<SectionRef, SectionRef> StackSizeRelocMap;
- const SectionRef NullSection{};
-
- for (const SectionRef &Sec : Obj->sections()) {
+ llvm::MapVector<const Elf_Shdr *, const Elf_Shdr *> StackSizeRelocMap;
+ for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
StringRef SectionName;
- if (Expected<StringRef> NameOrErr = Sec.getName())
+ if (Expected<StringRef> NameOrErr = Obj.getSectionName(Sec))
SectionName = *NameOrErr;
else
consumeError(NameOrErr.takeError());
@@ -5830,84 +5643,83 @@ void DumpStyle<ELFT>::printRelocatableStackSizes(
// A stack size section that we haven't encountered yet is mapped to the
// null section until we find its corresponding relocation section.
if (SectionName == ".stack_sizes")
- if (StackSizeRelocMap.count(Sec) == 0) {
- StackSizeRelocMap[Sec] = NullSection;
+ if (StackSizeRelocMap
+ .insert(std::make_pair(&Sec, (const Elf_Shdr *)nullptr))
+ .second)
continue;
- }
// Check relocation sections if they are relocating contents of a
// stack sizes section.
- const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl());
- uint32_t SectionType = ElfSec->sh_type;
- if (SectionType != ELF::SHT_RELA && SectionType != ELF::SHT_REL)
+ if (Sec.sh_type != ELF::SHT_RELA && Sec.sh_type != ELF::SHT_REL)
continue;
- Expected<section_iterator> RelSecOrErr = Sec.getRelocatedSection();
- if (!RelSecOrErr)
- reportError(createStringError(object_error::parse_failed,
- "%s: failed to get a relocated section: %s",
- SectionName.data(),
- toString(RelSecOrErr.takeError()).c_str()),
- Obj->getFileName());
-
- const Elf_Shdr *ContentsSec =
- Obj->getSection((*RelSecOrErr)->getRawDataRefImpl());
- Expected<StringRef> ContentsSectionNameOrErr =
- EF->getSectionName(ContentsSec);
- if (!ContentsSectionNameOrErr) {
- consumeError(ContentsSectionNameOrErr.takeError());
+ Expected<const Elf_Shdr *> RelSecOrErr = Obj.getSection(Sec.sh_info);
+ if (!RelSecOrErr) {
+ reportUniqueWarning(describe(Sec) +
+ ": failed to get a relocated section: " +
+ toString(RelSecOrErr.takeError()));
continue;
}
- if (*ContentsSectionNameOrErr != ".stack_sizes")
+
+ const Elf_Shdr *ContentsSec = *RelSecOrErr;
+ if (this->getPrintableSectionName(**RelSecOrErr) != ".stack_sizes")
continue;
+
// Insert a mapping from the stack sizes section to its relocation section.
- StackSizeRelocMap[Obj->toSectionRef(ContentsSec)] = Sec;
+ StackSizeRelocMap[ContentsSec] = &Sec;
}
for (const auto &StackSizeMapEntry : StackSizeRelocMap) {
PrintHeader();
- const SectionRef &StackSizesSec = StackSizeMapEntry.first;
- const SectionRef &RelocSec = StackSizeMapEntry.second;
+ const Elf_Shdr *StackSizesELFSec = StackSizeMapEntry.first;
+ const Elf_Shdr *RelocSec = StackSizeMapEntry.second;
// Warn about stack size sections without a relocation section.
- StringRef StackSizeSectionName = getSectionName(StackSizesSec);
- if (RelocSec == NullSection) {
- reportWarning(createError("section " + StackSizeSectionName +
- " does not have a corresponding "
+ if (!RelocSec) {
+ reportWarning(createError(".stack_sizes (" + describe(*StackSizesELFSec) +
+ ") does not have a corresponding "
"relocation section"),
- Obj->getFileName());
+ FileName);
continue;
}
// A .stack_sizes section header's sh_link field is supposed to point
// to the section that contains the functions whose stack sizes are
// described in it.
- const Elf_Shdr *StackSizesELFSec =
- Obj->getSection(StackSizesSec.getRawDataRefImpl());
- const SectionRef FunctionSec = Obj->toSectionRef(unwrapOrError(
- this->FileName, EF->getSection(StackSizesELFSec->sh_link)));
+ const Elf_Shdr *FunctionSec = unwrapOrError(
+ this->FileName, Obj.getSection(StackSizesELFSec->sh_link));
- bool (*IsSupportedFn)(uint64_t);
+ SupportsRelocation IsSupportedFn;
RelocationResolver Resolver;
- std::tie(IsSupportedFn, Resolver) = getRelocationResolver(*Obj);
- auto Contents = unwrapOrError(this->FileName, StackSizesSec.getContents());
- DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr));
- for (const RelocationRef &Reloc : RelocSec.relocations()) {
- if (!IsSupportedFn || !IsSupportedFn(Reloc.getType()))
- reportError(createStringError(
- object_error::parse_failed,
- "unsupported relocation type in section %s: %s",
- getSectionName(RelocSec).data(),
- EF->getRelocationTypeName(Reloc.getType()).data()),
- Obj->getFileName());
- this->printStackSize(Obj, Reloc, FunctionSec, StackSizeSectionName,
- Resolver, Data);
- }
+ std::tie(IsSupportedFn, Resolver) = getRelocationResolver(this->ObjF);
+ ArrayRef<uint8_t> Contents =
+ unwrapOrError(this->FileName, Obj.getSectionContents(*StackSizesELFSec));
+ DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr));
+
+ forEachRelocationDo(
+ *RelocSec, /*RawRelr=*/false,
+ [&](const Relocation<ELFT> &R, unsigned Ndx, const Elf_Shdr &Sec,
+ const Elf_Shdr *SymTab) {
+ if (!IsSupportedFn || !IsSupportedFn(R.Type)) {
+ reportUniqueWarning(
+ describe(*RelocSec) +
+ " contains an unsupported relocation with index " + Twine(Ndx) +
+ ": " + Obj.getRelocationTypeName(R.Type));
+ return;
+ }
+
+ this->printStackSize(R, *RelocSec, Ndx, SymTab, FunctionSec,
+ *StackSizesELFSec, Resolver, Data);
+ },
+ [](const Elf_Relr &) {
+ llvm_unreachable("can't get here, because we only support "
+ "SHT_REL/SHT_RELA sections");
+ });
}
}
template <class ELFT>
-void GNUStyle<ELFT>::printStackSizes(const ELFObjectFile<ELFT> *Obj) {
+void GNUELFDumper<ELFT>::printStackSizes() {
bool HeaderHasBeenPrinted = false;
auto PrintHeader = [&]() {
if (HeaderHasBeenPrinted)
@@ -5922,14 +5734,14 @@ void GNUStyle<ELFT>::printStackSizes(const ELFObjectFile<ELFT> *Obj) {
// For non-relocatable objects, look directly for sections whose name starts
// with .stack_sizes and process the contents.
- if (Obj->isRelocatableObject())
- this->printRelocatableStackSizes(Obj, PrintHeader);
+ if (this->Obj.getHeader().e_type == ELF::ET_REL)
+ this->printRelocatableStackSizes(PrintHeader);
else
- this->printNonRelocatableStackSizes(Obj, PrintHeader);
+ this->printNonRelocatableStackSizes(PrintHeader);
}
template <class ELFT>
-void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
+void GNUELFDumper<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
size_t Bias = ELFT::Is64Bits ? 8 : 0;
auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) {
OS.PadToColumn(2);
@@ -5977,10 +5789,14 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
<< " Type Ndx Name\n";
else
OS << " Address Access Initial Sym.Val. Type Ndx Name\n";
+
+ DataRegion<Elf_Word> ShndxTable(
+ (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
for (auto &E : Parser.getGlobalEntries()) {
- const Elf_Sym *Sym = Parser.getGotSym(&E);
- std::string SymName = this->dumper()->getFullSymbolName(
- Sym, this->dumper()->getDynamicStringTable(), false);
+ const Elf_Sym &Sym = *Parser.getGotSym(&E);
+ const Elf_Sym &FirstSym = this->dynamic_symbols()[0];
+ std::string SymName = this->getFullSymbolName(
+ Sym, &Sym - &FirstSym, ShndxTable, this->DynamicStringTable, false);
OS.PadToColumn(2);
OS << to_string(format_hex_no_prefix(Parser.getGotAddress(&E), 8 + Bias));
@@ -5989,12 +5805,12 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
OS.PadToColumn(22 + Bias);
OS << to_string(format_hex_no_prefix(E, 8 + Bias));
OS.PadToColumn(31 + 2 * Bias);
- OS << to_string(format_hex_no_prefix(Sym->st_value, 8 + Bias));
+ OS << to_string(format_hex_no_prefix(Sym.st_value, 8 + Bias));
OS.PadToColumn(40 + 3 * Bias);
- OS << printEnum(Sym->getType(), makeArrayRef(ElfSymbolTypes));
+ OS << printEnum(Sym.getType(), makeArrayRef(ElfSymbolTypes));
OS.PadToColumn(48 + 3 * Bias);
- OS << getSymbolSectionNdx(Parser.Obj, Sym,
- this->dumper()->dynamic_symbols().begin());
+ OS << getSymbolSectionNdx(Sym, &Sym - this->dynamic_symbols().begin(),
+ ShndxTable);
OS.PadToColumn(52 + 3 * Bias);
OS << SymName << "\n";
}
@@ -6006,7 +5822,7 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
}
template <class ELFT>
-void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
+void GNUELFDumper<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
size_t Bias = ELFT::Is64Bits ? 8 : 0;
auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) {
OS.PadToColumn(2);
@@ -6029,22 +5845,26 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
OS << "\n";
OS << " Entries:\n";
OS << " Address Initial Sym.Val. Type Ndx Name\n";
+ DataRegion<Elf_Word> ShndxTable(
+ (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
for (auto &E : Parser.getPltEntries()) {
- const Elf_Sym *Sym = Parser.getPltSym(&E);
- std::string SymName = this->dumper()->getFullSymbolName(
- Sym, this->dumper()->getDynamicStringTable(), false);
+ const Elf_Sym &Sym = *Parser.getPltSym(&E);
+ const Elf_Sym &FirstSym = *cantFail(
+ this->Obj.template getEntry<Elf_Sym>(*Parser.getPltSymTable(), 0));
+ std::string SymName = this->getFullSymbolName(
+ Sym, &Sym - &FirstSym, ShndxTable, this->DynamicStringTable, false);
OS.PadToColumn(2);
OS << to_string(format_hex_no_prefix(Parser.getPltAddress(&E), 8 + Bias));
OS.PadToColumn(11 + Bias);
OS << to_string(format_hex_no_prefix(E, 8 + Bias));
OS.PadToColumn(20 + 2 * Bias);
- OS << to_string(format_hex_no_prefix(Sym->st_value, 8 + Bias));
+ OS << to_string(format_hex_no_prefix(Sym.st_value, 8 + Bias));
OS.PadToColumn(29 + 3 * Bias);
- OS << printEnum(Sym->getType(), makeArrayRef(ElfSymbolTypes));
+ OS << printEnum(Sym.getType(), makeArrayRef(ElfSymbolTypes));
OS.PadToColumn(37 + 3 * Bias);
- OS << getSymbolSectionNdx(Parser.Obj, Sym,
- this->dumper()->dynamic_symbols().begin());
+ OS << getSymbolSectionNdx(Sym, &Sym - this->dynamic_symbols().begin(),
+ ShndxTable);
OS.PadToColumn(41 + 3 * Bias);
OS << SymName << "\n";
}
@@ -6052,20 +5872,33 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
}
template <class ELFT>
-void GNUStyle<ELFT>::printMipsABIFlags(const ELFObjectFile<ELFT> *ObjF) {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const Elf_Shdr *Shdr =
- findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.abiflags");
- if (!Shdr)
- return;
+Expected<const Elf_Mips_ABIFlags<ELFT> *>
+getMipsAbiFlagsSection(const ELFDumper<ELFT> &Dumper) {
+ const typename ELFT::Shdr *Sec = Dumper.findSectionByName(".MIPS.abiflags");
+ if (Sec == nullptr)
+ return nullptr;
+
+ constexpr StringRef ErrPrefix = "unable to read the .MIPS.abiflags section: ";
+ Expected<ArrayRef<uint8_t>> DataOrErr =
+ Dumper.getElfObject().getELFFile().getSectionContents(*Sec);
+ if (!DataOrErr)
+ return createError(ErrPrefix + toString(DataOrErr.takeError()));
- ArrayRef<uint8_t> Sec =
- unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr));
- if (Sec.size() != sizeof(Elf_Mips_ABIFlags<ELFT>))
- reportError(createError(".MIPS.abiflags section has a wrong size"),
- ObjF->getFileName());
+ if (DataOrErr->size() != sizeof(Elf_Mips_ABIFlags<ELFT>))
+ return createError(ErrPrefix + "it has a wrong size (" +
+ Twine(DataOrErr->size()) + ")");
+ return reinterpret_cast<const Elf_Mips_ABIFlags<ELFT> *>(DataOrErr->data());
+}
- auto *Flags = reinterpret_cast<const Elf_Mips_ABIFlags<ELFT> *>(Sec.data());
+template <class ELFT> void GNUELFDumper<ELFT>::printMipsABIFlags() {
+ const Elf_Mips_ABIFlags<ELFT> *Flags = nullptr;
+ if (Expected<const Elf_Mips_ABIFlags<ELFT> *> SecOrErr =
+ getMipsAbiFlagsSection(*this))
+ Flags = *SecOrErr;
+ else
+ this->reportUniqueWarning(SecOrErr.takeError());
+ if (!Flags)
+ return;
OS << "MIPS ABI Flags Version: " << Flags->version << "\n\n";
OS << "ISA: MIPS" << int(Flags->isa_level);
@@ -6090,22 +5923,22 @@ void GNUStyle<ELFT>::printMipsABIFlags(const ELFObjectFile<ELFT> *ObjF) {
OS << "\n";
}
-template <class ELFT> void LLVMStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
- const Elf_Ehdr *E = Obj->getHeader();
+template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
+ const Elf_Ehdr &E = this->Obj.getHeader();
{
DictScope D(W, "ElfHeader");
{
DictScope D(W, "Ident");
- W.printBinary("Magic", makeArrayRef(E->e_ident).slice(ELF::EI_MAG0, 4));
- W.printEnum("Class", E->e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
- W.printEnum("DataEncoding", E->e_ident[ELF::EI_DATA],
+ W.printBinary("Magic", makeArrayRef(E.e_ident).slice(ELF::EI_MAG0, 4));
+ W.printEnum("Class", E.e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
+ W.printEnum("DataEncoding", E.e_ident[ELF::EI_DATA],
makeArrayRef(ElfDataEncoding));
- W.printNumber("FileVersion", E->e_ident[ELF::EI_VERSION]);
+ W.printNumber("FileVersion", E.e_ident[ELF::EI_VERSION]);
auto OSABI = makeArrayRef(ElfOSABI);
- if (E->e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH &&
- E->e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) {
- switch (E->e_machine) {
+ if (E.e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH &&
+ E.e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) {
+ switch (E.e_machine) {
case ELF::EM_AMDGPU:
OSABI = makeArrayRef(AMDGPUElfOSABI);
break;
@@ -6117,43 +5950,54 @@ template <class ELFT> void LLVMStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
break;
}
}
- W.printEnum("OS/ABI", E->e_ident[ELF::EI_OSABI], OSABI);
- W.printNumber("ABIVersion", E->e_ident[ELF::EI_ABIVERSION]);
- W.printBinary("Unused", makeArrayRef(E->e_ident).slice(ELF::EI_PAD));
+ W.printEnum("OS/ABI", E.e_ident[ELF::EI_OSABI], OSABI);
+ W.printNumber("ABIVersion", E.e_ident[ELF::EI_ABIVERSION]);
+ W.printBinary("Unused", makeArrayRef(E.e_ident).slice(ELF::EI_PAD));
}
- W.printEnum("Type", E->e_type, makeArrayRef(ElfObjectFileType));
- W.printEnum("Machine", E->e_machine, makeArrayRef(ElfMachineType));
- W.printNumber("Version", E->e_version);
- W.printHex("Entry", E->e_entry);
- W.printHex("ProgramHeaderOffset", E->e_phoff);
- W.printHex("SectionHeaderOffset", E->e_shoff);
- if (E->e_machine == EM_MIPS)
- W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderMipsFlags),
+ std::string TypeStr;
+ if (const EnumEntry<unsigned> *Ent = getObjectFileEnumEntry(E.e_type)) {
+ TypeStr = Ent->Name.str();
+ } else {
+ if (E.e_type >= ET_LOPROC)
+ TypeStr = "Processor Specific";
+ else if (E.e_type >= ET_LOOS)
+ TypeStr = "OS Specific";
+ else
+ TypeStr = "Unknown";
+ }
+ W.printString("Type", TypeStr + " (0x" + to_hexString(E.e_type) + ")");
+
+ W.printEnum("Machine", E.e_machine, makeArrayRef(ElfMachineType));
+ W.printNumber("Version", E.e_version);
+ W.printHex("Entry", E.e_entry);
+ W.printHex("ProgramHeaderOffset", E.e_phoff);
+ W.printHex("SectionHeaderOffset", E.e_shoff);
+ if (E.e_machine == EM_MIPS)
+ W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderMipsFlags),
unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
unsigned(ELF::EF_MIPS_MACH));
- else if (E->e_machine == EM_AMDGPU)
- W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderAMDGPUFlags),
+ else if (E.e_machine == EM_AMDGPU)
+ W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAMDGPUFlags),
unsigned(ELF::EF_AMDGPU_MACH));
- else if (E->e_machine == EM_RISCV)
- W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderRISCVFlags));
+ else if (E.e_machine == EM_RISCV)
+ W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderRISCVFlags));
else
- W.printFlags("Flags", E->e_flags);
- W.printNumber("HeaderSize", E->e_ehsize);
- W.printNumber("ProgramHeaderEntrySize", E->e_phentsize);
- W.printNumber("ProgramHeaderCount", E->e_phnum);
- W.printNumber("SectionHeaderEntrySize", E->e_shentsize);
+ W.printFlags("Flags", E.e_flags);
+ W.printNumber("HeaderSize", E.e_ehsize);
+ W.printNumber("ProgramHeaderEntrySize", E.e_phentsize);
+ W.printNumber("ProgramHeaderCount", E.e_phnum);
+ W.printNumber("SectionHeaderEntrySize", E.e_shentsize);
W.printString("SectionHeaderCount",
- getSectionHeadersNumString(Obj, this->FileName));
+ getSectionHeadersNumString(this->Obj, this->FileName));
W.printString("StringTableSectionIndex",
- getSectionHeaderTableIndexString(Obj, this->FileName));
+ getSectionHeaderTableIndexString(this->Obj, this->FileName));
}
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printGroupSections(const ELFO *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printGroupSections() {
DictScope Lists(W, "Groups");
- std::vector<GroupSection> V = getGroups<ELFT>(Obj, this->FileName);
+ std::vector<GroupSection> V = this->getGroups();
DenseMap<uint64_t, const GroupSection *> Map = mapSectionsToGroups(V);
for (const GroupSection &G : V) {
DictScope D(W, "Group");
@@ -6167,15 +6011,13 @@ void LLVMStyle<ELFT>::printGroupSections(const ELFO *Obj) {
ListScope L(W, "Section(s) in group");
for (const GroupMember &GM : G.Members) {
const GroupSection *MainGroup = Map[GM.Index];
- if (MainGroup != &G) {
- W.flush();
- errs() << "Error: " << GM.Name << " (" << GM.Index
- << ") in a group " + G.Name + " (" << G.Index
- << ") is already in a group " + MainGroup->Name + " ("
- << MainGroup->Index << ")\n";
- errs().flush();
- continue;
- }
+ if (MainGroup != &G)
+ this->reportUniqueWarning(
+ "section with index " + Twine(GM.Index) +
+ ", included in the group section with index " +
+ Twine(MainGroup->Index) +
+ ", was also found in the group section with index " +
+ Twine(G.Index));
W.startLine() << GM.Name << " (" << GM.Index << ")\n";
}
}
@@ -6184,129 +6026,66 @@ void LLVMStyle<ELFT>::printGroupSections(const ELFO *Obj) {
W.startLine() << "There are no group sections in the file.\n";
}
-template <class ELFT> void LLVMStyle<ELFT>::printRelocations(const ELFO *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printRelocations() {
ListScope D(W, "Relocations");
- int SectionNumber = -1;
- for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
- ++SectionNumber;
-
- if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA &&
- Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_REL &&
- Sec.sh_type != ELF::SHT_ANDROID_RELA &&
- Sec.sh_type != ELF::SHT_ANDROID_RELR)
+ for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
+ if (!isRelocationSec<ELFT>(Sec))
continue;
- StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec));
-
- W.startLine() << "Section (" << SectionNumber << ") " << Name << " {\n";
+ StringRef Name = this->getPrintableSectionName(Sec);
+ unsigned SecNdx = &Sec - &cantFail(this->Obj.sections()).front();
+ W.startLine() << "Section (" << SecNdx << ") " << Name << " {\n";
W.indent();
-
- printRelocations(&Sec, Obj);
-
+ this->printRelocationsHelper(Sec);
W.unindent();
W.startLine() << "}\n";
}
}
template <class ELFT>
-void LLVMStyle<ELFT>::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) {
- const Elf_Shdr *SymTab =
- unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
- unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
- unsigned RelNdx = 0;
-
- switch (Sec->sh_type) {
- case ELF::SHT_REL:
- for (const Elf_Rel &R : unwrapOrError(this->FileName, Obj->rels(Sec))) {
- Elf_Rela Rela;
- Rela.r_offset = R.r_offset;
- Rela.r_info = R.r_info;
- Rela.r_addend = 0;
- printRelocation(Obj, SecNdx, Rela, ++RelNdx, SymTab);
- }
- break;
- case ELF::SHT_RELA:
- for (const Elf_Rela &R : unwrapOrError(this->FileName, Obj->relas(Sec)))
- printRelocation(Obj, SecNdx, R, ++RelNdx, SymTab);
- break;
- case ELF::SHT_RELR:
- case ELF::SHT_ANDROID_RELR: {
- Elf_Relr_Range Relrs = unwrapOrError(this->FileName, Obj->relrs(Sec));
- if (opts::RawRelr) {
- for (const Elf_Relr &R : Relrs)
- W.startLine() << W.hex(R) << "\n";
- } else {
- std::vector<Elf_Rela> RelrRelas =
- unwrapOrError(this->FileName, Obj->decode_relrs(Relrs));
- for (const Elf_Rela &R : RelrRelas)
- printRelocation(Obj, SecNdx, R, ++RelNdx, SymTab);
- }
- break;
- }
- case ELF::SHT_ANDROID_REL:
- case ELF::SHT_ANDROID_RELA:
- for (const Elf_Rela &R :
- unwrapOrError(this->FileName, Obj->android_relas(Sec)))
- printRelocation(Obj, SecNdx, R, ++RelNdx, SymTab);
- break;
- }
+void LLVMELFDumper<ELFT>::printRelrReloc(const Elf_Relr &R) {
+ W.startLine() << W.hex(R) << "\n";
}
template <class ELFT>
-void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, unsigned SecIndex,
- Elf_Rela Rel, unsigned RelIndex,
- const Elf_Shdr *SymTab) {
- Expected<std::pair<const typename ELFT::Sym *, std::string>> Target =
- this->dumper()->getRelocationTarget(SymTab, Rel);
- if (!Target) {
- this->reportUniqueWarning(createError(
- "unable to print relocation " + Twine(RelIndex) + " in section " +
- Twine(SecIndex) + ": " + toString(Target.takeError())));
- return;
- }
-
- std::string TargetName = Target->second;
+void LLVMELFDumper<ELFT>::printRelRelaReloc(const Relocation<ELFT> &R,
+ const RelSymbol<ELFT> &RelSym) {
+ StringRef SymbolName = RelSym.Name;
SmallString<32> RelocName;
- Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
+ this->Obj.getRelocationTypeName(R.Type, RelocName);
if (opts::ExpandRelocs) {
DictScope Group(W, "Relocation");
- W.printHex("Offset", Rel.r_offset);
- W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL()));
- W.printNumber("Symbol", !TargetName.empty() ? TargetName : "-",
- Rel.getSymbol(Obj->isMips64EL()));
- W.printHex("Addend", Rel.r_addend);
+ W.printHex("Offset", R.Offset);
+ W.printNumber("Type", RelocName, R.Type);
+ W.printNumber("Symbol", !SymbolName.empty() ? SymbolName : "-", R.Symbol);
+ if (R.Addend)
+ W.printHex("Addend", (uintX_t)*R.Addend);
} else {
raw_ostream &OS = W.startLine();
- OS << W.hex(Rel.r_offset) << " " << RelocName << " "
- << (!TargetName.empty() ? TargetName : "-") << " " << W.hex(Rel.r_addend)
- << "\n";
+ OS << W.hex(R.Offset) << " " << RelocName << " "
+ << (!SymbolName.empty() ? SymbolName : "-");
+ if (R.Addend)
+ OS << " " << W.hex((uintX_t)*R.Addend);
+ OS << "\n";
}
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printSectionHeaders() {
ListScope SectionsD(W, "Sections");
int SectionIndex = -1;
std::vector<EnumEntry<unsigned>> FlagsList =
- getSectionFlagsForTarget(Obj->getHeader()->e_machine);
- for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
- StringRef Name = "<?>";
- if (Expected<StringRef> SecNameOrErr =
- Obj->getSectionName(&Sec, this->dumper()->WarningHandler))
- Name = *SecNameOrErr;
- else
- this->reportUniqueWarning(SecNameOrErr.takeError());
-
+ getSectionFlagsForTarget(this->Obj.getHeader().e_machine);
+ for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
DictScope SectionD(W, "Section");
W.printNumber("Index", ++SectionIndex);
- W.printNumber("Name", Name, Sec.sh_name);
- W.printHex(
- "Type",
- object::getELFSectionTypeName(Obj->getHeader()->e_machine, Sec.sh_type),
- Sec.sh_type);
+ W.printNumber("Name", this->getPrintableSectionName(Sec), Sec.sh_name);
+ W.printHex("Type",
+ object::getELFSectionTypeName(this->Obj.getHeader().e_machine,
+ Sec.sh_type),
+ Sec.sh_type);
W.printFlags("Flags", Sec.sh_flags, makeArrayRef(FlagsList));
W.printHex("Address", Sec.sh_addr);
W.printHex("Offset", Sec.sh_offset);
@@ -6318,31 +6097,33 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
if (opts::SectionRelocations) {
ListScope D(W, "Relocations");
- printRelocations(&Sec, Obj);
+ this->printRelocationsHelper(Sec);
}
if (opts::SectionSymbols) {
ListScope D(W, "Symbols");
- const Elf_Shdr *Symtab = this->dumper()->getDotSymtabSec();
- StringRef StrTable =
- unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*Symtab));
-
- for (const Elf_Sym &Sym :
- unwrapOrError(this->FileName, Obj->symbols(Symtab))) {
- const Elf_Shdr *SymSec = unwrapOrError(
+ if (this->DotSymtabSec) {
+ StringRef StrTable = unwrapOrError(
this->FileName,
- Obj->getSection(&Sym, Symtab, this->dumper()->getShndxTable()));
- if (SymSec == &Sec)
- printSymbol(
- Obj, &Sym,
- unwrapOrError(this->FileName, Obj->symbols(Symtab)).begin(),
- StrTable, false, false);
+ this->Obj.getStringTableForSymtab(*this->DotSymtabSec));
+ ArrayRef<Elf_Word> ShndxTable = this->getShndxTable(this->DotSymtabSec);
+
+ typename ELFT::SymRange Symbols = unwrapOrError(
+ this->FileName, this->Obj.symbols(this->DotSymtabSec));
+ for (const Elf_Sym &Sym : Symbols) {
+ const Elf_Shdr *SymSec = unwrapOrError(
+ this->FileName,
+ this->Obj.getSection(Sym, this->DotSymtabSec, ShndxTable));
+ if (SymSec == &Sec)
+ printSymbol(Sym, &Sym - &Symbols[0], ShndxTable, StrTable, false,
+ false);
+ }
}
}
if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) {
ArrayRef<uint8_t> Data =
- unwrapOrError(this->FileName, Obj->getSectionContents(&Sec));
+ unwrapOrError(this->FileName, this->Obj.getSectionContents(Sec));
W.printBinaryBlock(
"SectionData",
StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()));
@@ -6351,12 +6132,34 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
}
template <class ELFT>
-void LLVMStyle<ELFT>::printSymbolSection(const Elf_Sym *Symbol,
- const Elf_Sym *First) {
+void LLVMELFDumper<ELFT>::printSymbolSection(
+ const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable) const {
+ auto GetSectionSpecialType = [&]() -> Optional<StringRef> {
+ if (Symbol.isUndefined())
+ return StringRef("Undefined");
+ if (Symbol.isProcessorSpecific())
+ return StringRef("Processor Specific");
+ if (Symbol.isOSSpecific())
+ return StringRef("Operating System Specific");
+ if (Symbol.isAbsolute())
+ return StringRef("Absolute");
+ if (Symbol.isCommon())
+ return StringRef("Common");
+ if (Symbol.isReserved() && Symbol.st_shndx != SHN_XINDEX)
+ return StringRef("Reserved");
+ return None;
+ };
+
+ if (Optional<StringRef> Type = GetSectionSpecialType()) {
+ W.printHex("Section", *Type, Symbol.st_shndx);
+ return;
+ }
+
Expected<unsigned> SectionIndex =
- this->dumper()->getSymbolSectionIndex(Symbol, First);
+ this->getSymbolSectionIndex(Symbol, SymIndex, ShndxTable);
if (!SectionIndex) {
- assert(Symbol->st_shndx == SHN_XINDEX &&
+ assert(Symbol.st_shndx == SHN_XINDEX &&
"getSymbolSectionIndex should only fail due to an invalid "
"SHT_SYMTAB_SHNDX table/reference");
this->reportUniqueWarning(SectionIndex.takeError());
@@ -6365,11 +6168,11 @@ void LLVMStyle<ELFT>::printSymbolSection(const Elf_Sym *Symbol,
}
Expected<StringRef> SectionName =
- this->dumper()->getSymbolSectionName(Symbol, *SectionIndex);
+ this->getSymbolSectionName(Symbol, *SectionIndex);
if (!SectionName) {
// Don't report an invalid section name if the section headers are missing.
// In such situations, all sections will be "invalid".
- if (!this->dumper()->getElfObject()->sections().empty())
+ if (!this->ObjF.sections().empty())
this->reportUniqueWarning(SectionName.takeError());
else
consumeError(SectionName.takeError());
@@ -6380,36 +6183,37 @@ void LLVMStyle<ELFT>::printSymbolSection(const Elf_Sym *Symbol,
}
template <class ELFT>
-void LLVMStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
- const Elf_Sym *First,
- Optional<StringRef> StrTable, bool IsDynamic,
- bool /*NonVisibilityBitsUsed*/) {
- std::string FullSymbolName =
- this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic);
- unsigned char SymbolType = Symbol->getType();
+void LLVMELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable,
+ Optional<StringRef> StrTable,
+ bool IsDynamic,
+ bool /*NonVisibilityBitsUsed*/) const {
+ std::string FullSymbolName = this->getFullSymbolName(
+ Symbol, SymIndex, ShndxTable, StrTable, IsDynamic);
+ unsigned char SymbolType = Symbol.getType();
DictScope D(W, "Symbol");
- W.printNumber("Name", FullSymbolName, Symbol->st_name);
- W.printHex("Value", Symbol->st_value);
- W.printNumber("Size", Symbol->st_size);
- W.printEnum("Binding", Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
- if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
+ W.printNumber("Name", FullSymbolName, Symbol.st_name);
+ W.printHex("Value", Symbol.st_value);
+ W.printNumber("Size", Symbol.st_size);
+ W.printEnum("Binding", Symbol.getBinding(), makeArrayRef(ElfSymbolBindings));
+ if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU &&
SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
W.printEnum("Type", SymbolType, makeArrayRef(AMDGPUSymbolTypes));
else
W.printEnum("Type", SymbolType, makeArrayRef(ElfSymbolTypes));
- if (Symbol->st_other == 0)
+ if (Symbol.st_other == 0)
// Usually st_other flag is zero. Do not pollute the output
// by flags enumeration in that case.
W.printNumber("Other", 0);
else {
std::vector<EnumEntry<unsigned>> SymOtherFlags(std::begin(ElfSymOtherFlags),
std::end(ElfSymOtherFlags));
- if (Obj->getHeader()->e_machine == EM_MIPS) {
+ if (this->Obj.getHeader().e_machine == EM_MIPS) {
// Someones in their infinite wisdom decided to make STO_MIPS_MIPS16
// flag overlapped with other ST_MIPS_xxx flags. So consider both
// cases separately.
- if ((Symbol->st_other & STO_MIPS_MIPS16) == STO_MIPS_MIPS16)
+ if ((Symbol.st_other & STO_MIPS_MIPS16) == STO_MIPS_MIPS16)
SymOtherFlags.insert(SymOtherFlags.end(),
std::begin(ElfMips16SymOtherFlags),
std::end(ElfMips16SymOtherFlags));
@@ -6417,40 +6221,37 @@ void LLVMStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
SymOtherFlags.insert(SymOtherFlags.end(),
std::begin(ElfMipsSymOtherFlags),
std::end(ElfMipsSymOtherFlags));
+ } else if (this->Obj.getHeader().e_machine == EM_AARCH64) {
+ SymOtherFlags.insert(SymOtherFlags.end(),
+ std::begin(ElfAArch64SymOtherFlags),
+ std::end(ElfAArch64SymOtherFlags));
}
- W.printFlags("Other", Symbol->st_other, makeArrayRef(SymOtherFlags), 0x3u);
+ W.printFlags("Other", Symbol.st_other, makeArrayRef(SymOtherFlags), 0x3u);
}
- printSymbolSection(Symbol, First);
+ printSymbolSection(Symbol, SymIndex, ShndxTable);
}
template <class ELFT>
-void LLVMStyle<ELFT>::printSymbols(const ELFO *Obj, bool PrintSymbols,
- bool PrintDynamicSymbols) {
- if (PrintSymbols)
- printSymbols(Obj);
- if (PrintDynamicSymbols)
- printDynamicSymbols(Obj);
-}
-
-template <class ELFT> void LLVMStyle<ELFT>::printSymbols(const ELFO *Obj) {
- ListScope Group(W, "Symbols");
- this->dumper()->printSymbolsHelper(false);
-}
-
-template <class ELFT>
-void LLVMStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
- ListScope Group(W, "DynamicSymbols");
- this->dumper()->printSymbolsHelper(true);
+void LLVMELFDumper<ELFT>::printSymbols(bool PrintSymbols,
+ bool PrintDynamicSymbols) {
+ if (PrintSymbols) {
+ ListScope Group(W, "Symbols");
+ this->printSymbolsHelper(false);
+ }
+ if (PrintDynamicSymbols) {
+ ListScope Group(W, "DynamicSymbols");
+ this->printSymbolsHelper(true);
+ }
}
-template <class ELFT> void LLVMStyle<ELFT>::printDynamic(const ELFFile<ELFT> *Obj) {
- Elf_Dyn_Range Table = this->dumper()->dynamic_table();
+template <class ELFT> void LLVMELFDumper<ELFT>::printDynamicTable() {
+ Elf_Dyn_Range Table = this->dynamic_table();
if (Table.empty())
return;
W.startLine() << "DynamicSection [ (" << Table.size() << " entries)\n";
- size_t MaxTagSize = getMaxDynamicTagSize(Obj, Table);
+ size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table);
// The "Name/Value" column should be indented from the "Type" column by N
// spaces, where N = MaxTagSize - length of "Type" (4) + trailing
// space (1) = -3.
@@ -6460,108 +6261,49 @@ template <class ELFT> void LLVMStyle<ELFT>::printDynamic(const ELFFile<ELFT> *Ob
std::string ValueFmt = "%-" + std::to_string(MaxTagSize) + "s ";
for (auto Entry : Table) {
uintX_t Tag = Entry.getTag();
- std::string Value = this->dumper()->getDynamicEntry(Tag, Entry.getVal());
+ std::string Value = this->getDynamicEntry(Tag, Entry.getVal());
W.startLine() << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10, true)
<< " "
<< format(ValueFmt.c_str(),
- Obj->getDynamicTagAsString(Tag).c_str())
+ this->Obj.getDynamicTagAsString(Tag).c_str())
<< Value << "\n";
}
W.startLine() << "]\n";
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
- const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion();
- const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion();
- const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion();
- const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion();
-
+template <class ELFT> void LLVMELFDumper<ELFT>::printDynamicRelocations() {
W.startLine() << "Dynamic Relocations {\n";
W.indent();
- if (DynRelaRegion.Size > 0) {
- for (const Elf_Rela &Rela : this->dumper()->dyn_relas())
- printDynamicRelocation(Obj, Rela);
- }
- if (DynRelRegion.Size > 0) {
- for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) {
- Elf_Rela Rela;
- Rela.r_offset = Rel.r_offset;
- Rela.r_info = Rel.r_info;
- Rela.r_addend = 0;
- printDynamicRelocation(Obj, Rela);
- }
- }
-
- if (DynRelrRegion.Size > 0) {
- Elf_Relr_Range Relrs = this->dumper()->dyn_relrs();
- std::vector<Elf_Rela> RelrRelas =
- unwrapOrError(this->FileName, Obj->decode_relrs(Relrs));
- for (const Elf_Rela &Rela : RelrRelas)
- printDynamicRelocation(Obj, Rela);
- }
- if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela))
- for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef<Elf_Rela>())
- printDynamicRelocation(Obj, Rela);
- else
- for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef<Elf_Rel>()) {
- Elf_Rela Rela;
- Rela.r_offset = Rel.r_offset;
- Rela.r_info = Rel.r_info;
- Rela.r_addend = 0;
- printDynamicRelocation(Obj, Rela);
- }
+ this->printDynamicRelocationsHelper();
W.unindent();
W.startLine() << "}\n";
}
template <class ELFT>
-void LLVMStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel) {
- SmallString<32> RelocName;
- Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
- std::string SymbolName =
- getSymbolForReloc(Obj, this->FileName, this->dumper(), Rel).Name;
-
- if (opts::ExpandRelocs) {
- DictScope Group(W, "Relocation");
- W.printHex("Offset", Rel.r_offset);
- W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL()));
- W.printString("Symbol", !SymbolName.empty() ? SymbolName : "-");
- W.printHex("Addend", Rel.r_addend);
- } else {
- raw_ostream &OS = W.startLine();
- OS << W.hex(Rel.r_offset) << " " << RelocName << " "
- << (!SymbolName.empty() ? SymbolName : "-") << " " << W.hex(Rel.r_addend)
- << "\n";
- }
-}
-
-template <class ELFT>
-void LLVMStyle<ELFT>::printProgramHeaders(
- const ELFO *Obj, bool PrintProgramHeaders,
- cl::boolOrDefault PrintSectionMapping) {
+void LLVMELFDumper<ELFT>::printProgramHeaders(
+ bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) {
if (PrintProgramHeaders)
- printProgramHeaders(Obj);
+ printProgramHeaders();
if (PrintSectionMapping == cl::BOU_TRUE)
- printSectionMapping(Obj);
+ printSectionMapping();
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printProgramHeaders() {
ListScope L(W, "ProgramHeaders");
- Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
+ Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = this->Obj.program_headers();
if (!PhdrsOrErr) {
- this->reportUniqueWarning(createError("unable to dump program headers: " +
- toString(PhdrsOrErr.takeError())));
+ this->reportUniqueWarning("unable to dump program headers: " +
+ toString(PhdrsOrErr.takeError()));
return;
}
for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
DictScope P(W, "ProgramHeader");
- W.printHex("Type",
- getElfSegmentType(Obj->getHeader()->e_machine, Phdr.p_type),
- Phdr.p_type);
+ StringRef Type =
+ segmentTypeToString(this->Obj.getHeader().e_machine, Phdr.p_type);
+
+ W.printHex("Type", Type.empty() ? "Unknown" : Type, Phdr.p_type);
W.printHex("Offset", Phdr.p_offset);
W.printHex("VirtualAddress", Phdr.p_vaddr);
W.printHex("PhysicalAddress", Phdr.p_paddr);
@@ -6573,16 +6315,16 @@ void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
}
template <class ELFT>
-void LLVMStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) {
+void LLVMELFDumper<ELFT>::printVersionSymbolSection(const Elf_Shdr *Sec) {
ListScope SS(W, "VersionSymbols");
if (!Sec)
return;
StringRef StrTable;
ArrayRef<Elf_Sym> Syms;
+ const Elf_Shdr *SymTabSec;
Expected<ArrayRef<Elf_Versym>> VerTableOrErr =
- this->dumper()->getVersionTable(Sec, &Syms, &StrTable);
+ this->getVersionTable(*Sec, &Syms, &StrTable, &SymTabSec);
if (!VerTableOrErr) {
this->reportUniqueWarning(VerTableOrErr.takeError());
return;
@@ -6591,22 +6333,28 @@ void LLVMStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
if (StrTable.empty() || Syms.empty() || Syms.size() != VerTableOrErr->size())
return;
+ ArrayRef<Elf_Word> ShNdxTable = this->getShndxTable(SymTabSec);
for (size_t I = 0, E = Syms.size(); I < E; ++I) {
DictScope S(W, "Symbol");
W.printNumber("Version", (*VerTableOrErr)[I].vs_index & VERSYM_VERSION);
- W.printString("Name", this->dumper()->getFullSymbolName(
- &Syms[I], StrTable, /*IsDynamic=*/true));
+ W.printString("Name",
+ this->getFullSymbolName(Syms[I], I, ShNdxTable, StrTable,
+ /*IsDynamic=*/true));
}
}
+static const EnumEntry<unsigned> SymVersionFlags[] = {
+ {"Base", "BASE", VER_FLG_BASE},
+ {"Weak", "WEAK", VER_FLG_WEAK},
+ {"Info", "INFO", VER_FLG_INFO}};
+
template <class ELFT>
-void LLVMStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) {
+void LLVMELFDumper<ELFT>::printVersionDefinitionSection(const Elf_Shdr *Sec) {
ListScope SD(W, "VersionDefinitions");
if (!Sec)
return;
- Expected<std::vector<VerDef>> V = this->dumper()->getVersionDefinitions(Sec);
+ Expected<std::vector<VerDef>> V = this->Obj.getVersionDefinitions(*Sec);
if (!V) {
this->reportUniqueWarning(V.takeError());
return;
@@ -6626,14 +6374,13 @@ void LLVMStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
}
template <class ELFT>
-void LLVMStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
- const Elf_Shdr *Sec) {
+void LLVMELFDumper<ELFT>::printVersionDependencySection(const Elf_Shdr *Sec) {
ListScope SD(W, "VersionRequirements");
if (!Sec)
return;
Expected<std::vector<VerNeed>> V =
- this->dumper()->getVersionDependencies(Sec);
+ this->Obj.getVersionDependencies(*Sec, this->WarningHandler);
if (!V) {
this->reportUniqueWarning(V.takeError());
return;
@@ -6656,75 +6403,49 @@ void LLVMStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
}
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printHashHistograms(const ELFFile<ELFT> *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printHashHistograms() {
W.startLine() << "Hash Histogram not implemented!\n";
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printCGProfile(const ELFFile<ELFT> *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printCGProfile() {
ListScope L(W, "CGProfile");
- if (!this->dumper()->getDotCGProfileSec())
+ if (!this->DotCGProfileSec)
return;
Expected<ArrayRef<Elf_CGProfile>> CGProfileOrErr =
- Obj->template getSectionContentsAsArray<Elf_CGProfile>(
- this->dumper()->getDotCGProfileSec());
+ this->Obj.template getSectionContentsAsArray<Elf_CGProfile>(
+ *this->DotCGProfileSec);
if (!CGProfileOrErr) {
this->reportUniqueWarning(
- createError("unable to dump the SHT_LLVM_CALL_GRAPH_PROFILE section: " +
- toString(CGProfileOrErr.takeError())));
+ "unable to dump the SHT_LLVM_CALL_GRAPH_PROFILE section: " +
+ toString(CGProfileOrErr.takeError()));
return;
}
for (const Elf_CGProfile &CGPE : *CGProfileOrErr) {
DictScope D(W, "CGProfileEntry");
- W.printNumber("From", this->dumper()->getStaticSymbolName(CGPE.cgp_from),
+ W.printNumber("From", this->getStaticSymbolName(CGPE.cgp_from),
CGPE.cgp_from);
- W.printNumber("To", this->dumper()->getStaticSymbolName(CGPE.cgp_to),
+ W.printNumber("To", this->getStaticSymbolName(CGPE.cgp_to),
CGPE.cgp_to);
W.printNumber("Weight", CGPE.cgp_weight);
}
}
-static Expected<std::vector<uint64_t>> toULEB128Array(ArrayRef<uint8_t> Data) {
- std::vector<uint64_t> Ret;
- const uint8_t *Cur = Data.begin();
- const uint8_t *End = Data.end();
- while (Cur != End) {
- unsigned Size;
- const char *Err;
- Ret.push_back(decodeULEB128(Cur, &Size, End, &Err));
- if (Err)
- return createError(Err);
- Cur += Size;
- }
- return Ret;
-}
-
-template <class ELFT>
-void LLVMStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printAddrsig() {
ListScope L(W, "Addrsig");
- if (!this->dumper()->getDotAddrsigSec())
+ if (!this->DotAddrsigSec)
return;
- ArrayRef<uint8_t> Contents = unwrapOrError(
- this->FileName,
- Obj->getSectionContents(this->dumper()->getDotAddrsigSec()));
- Expected<std::vector<uint64_t>> V = toULEB128Array(Contents);
- if (!V) {
- reportWarning(V.takeError(), this->FileName);
+
+ Expected<std::vector<uint64_t>> SymsOrErr =
+ decodeAddrsigSection(this->Obj, *this->DotAddrsigSec);
+ if (!SymsOrErr) {
+ this->reportUniqueWarning(SymsOrErr.takeError());
return;
}
- for (uint64_t Sym : *V) {
- Expected<std::string> NameOrErr = this->dumper()->getStaticSymbolName(Sym);
- if (NameOrErr) {
- W.printNumber("Sym", *NameOrErr, Sym);
- continue;
- }
- reportWarning(NameOrErr.takeError(), this->FileName);
- W.printNumber("Sym", "<?>", Sym);
- }
+ for (uint64_t Sym : *SymsOrErr)
+ W.printNumber("Sym", this->getStaticSymbolName(Sym), Sym);
}
template <typename ELFT>
@@ -6752,7 +6473,7 @@ static void printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
break;
case ELF::NT_GNU_PROPERTY_TYPE_0:
ListScope D(W, "Property");
- for (const auto &Property : getGNUPropertyList<ELFT>(Desc))
+ for (const std::string &Property : getGNUPropertyList<ELFT>(Desc))
W.printString(Property);
break;
}
@@ -6769,19 +6490,22 @@ static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) {
}
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
ListScope L(W, "Notes");
- auto PrintHeader = [&](Optional<StringRef> SecName,
- const typename ELFT::Off Offset,
- const typename ELFT::Addr Size) {
+ std::unique_ptr<DictScope> NoteScope;
+ auto StartNotes = [&](Optional<StringRef> SecName,
+ const typename ELFT::Off Offset,
+ const typename ELFT::Addr Size) {
+ NoteScope = std::make_unique<DictScope>(W, "NoteSection");
W.printString("Name", SecName ? *SecName : "<?>");
W.printHex("Offset", Offset);
W.printHex("Size", Size);
};
- auto ProcessNote = [&](const Elf_Note &Note) {
+ auto EndNotes = [&] { NoteScope.reset(); };
+
+ auto ProcessNote = [&](const Elf_Note &Note) -> Error {
DictScope D2(W, "Note");
StringRef Name = Note.getName();
ArrayRef<uint8_t> Descriptor = Note.getDesc();
@@ -6790,24 +6514,14 @@ void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
// Print the note owner/type.
W.printString("Owner", Name);
W.printHex("Data size", Descriptor.size());
- if (Name == "GNU") {
- W.printString("Type", getGNUNoteTypeName(Type));
- } else if (Name == "FreeBSD") {
- W.printString("Type", getFreeBSDNoteTypeName(Type));
- } else if (Name == "AMD") {
- W.printString("Type", getAMDNoteTypeName(Type));
- } else if (Name == "AMDGPU") {
- W.printString("Type", getAMDGPUNoteTypeName(Type));
- } else {
- StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE
- ? getCoreNoteTypeName(Type)
- : getGenericNoteTypeName(Type);
- if (!NoteType.empty())
- W.printString("Type", NoteType);
- else
- W.printString("Type",
- "Unknown (" + to_string(format_hex(Type, 10)) + ")");
- }
+
+ StringRef NoteType =
+ getNoteTypeName<ELFT>(Note, this->Obj.getHeader().e_type);
+ if (!NoteType.empty())
+ W.printString("Type", NoteType);
+ else
+ W.printString("Type",
+ "Unknown (" + to_string(format_hex(Type, 10)) + ")");
// Print the description, or fallback to printing raw bytes for unknown
// owners.
@@ -6826,91 +6540,56 @@ void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
DataExtractor DescExtractor(Descriptor,
ELFT::TargetEndianness == support::little,
sizeof(Elf_Addr));
- Expected<CoreNote> Note = readCoreNote(DescExtractor);
- if (Note)
+ if (Expected<CoreNote> Note = readCoreNote(DescExtractor))
printCoreNoteLLVMStyle(*Note, W);
else
- reportWarning(Note.takeError(), this->FileName);
+ return Note.takeError();
}
} else if (!Descriptor.empty()) {
W.printBinaryBlock("Description data", Descriptor);
}
+ return Error::success();
};
- ArrayRef<Elf_Shdr> Sections = cantFail(Obj->sections());
- if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) {
- for (const auto &S : Sections) {
- if (S.sh_type != SHT_NOTE)
- continue;
- DictScope D(W, "NoteSection");
- PrintHeader(expectedToOptional(Obj->getSectionName(&S)), S.sh_offset,
- S.sh_size);
- Error Err = Error::success();
- for (auto Note : Obj->notes(S, Err))
- ProcessNote(Note);
- if (Err)
- reportError(std::move(Err), this->FileName);
- }
- } else {
- Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
- if (!PhdrsOrErr) {
- this->reportUniqueWarning(createError(
- "unable to read program headers to locate the PT_NOTE segment: " +
- toString(PhdrsOrErr.takeError())));
- return;
- }
-
- for (const Elf_Phdr &P : *PhdrsOrErr) {
- if (P.p_type != PT_NOTE)
- continue;
- DictScope D(W, "NoteSection");
- PrintHeader(/*SecName=*/None, P.p_offset, P.p_filesz);
- Error Err = Error::success();
- for (auto Note : Obj->notes(P, Err))
- ProcessNote(Note);
- if (Err)
- reportError(std::move(Err), this->FileName);
- }
- }
+ printNotesHelper(*this, StartNotes, ProcessNote, EndNotes);
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printELFLinkerOptions() {
ListScope L(W, "LinkerOptions");
unsigned I = -1;
- for (const Elf_Shdr &Shdr : cantFail(Obj->sections())) {
+ for (const Elf_Shdr &Shdr : cantFail(this->Obj.sections())) {
++I;
if (Shdr.sh_type != ELF::SHT_LLVM_LINKER_OPTIONS)
continue;
- Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj->getSectionContents(&Shdr);
+ Expected<ArrayRef<uint8_t>> ContentsOrErr =
+ this->Obj.getSectionContents(Shdr);
if (!ContentsOrErr) {
- this->reportUniqueWarning(
- createError("unable to read the content of the "
- "SHT_LLVM_LINKER_OPTIONS section: " +
- toString(ContentsOrErr.takeError())));
+ this->reportUniqueWarning("unable to read the content of the "
+ "SHT_LLVM_LINKER_OPTIONS section: " +
+ toString(ContentsOrErr.takeError()));
continue;
}
if (ContentsOrErr->empty())
continue;
if (ContentsOrErr->back() != 0) {
- this->reportUniqueWarning(
- createError("SHT_LLVM_LINKER_OPTIONS section at index " + Twine(I) +
- " is broken: the "
- "content is not null-terminated"));
+ this->reportUniqueWarning("SHT_LLVM_LINKER_OPTIONS section at index " +
+ Twine(I) +
+ " is broken: the "
+ "content is not null-terminated");
continue;
}
SmallVector<StringRef, 16> Strings;
toStringRef(ContentsOrErr->drop_back()).split(Strings, '\0');
if (Strings.size() % 2 != 0) {
- this->reportUniqueWarning(createError(
+ this->reportUniqueWarning(
"SHT_LLVM_LINKER_OPTIONS section at index " + Twine(I) +
" is broken: an incomplete "
"key-value pair was found. The last possible key was: \"" +
- Strings.back() + "\""));
+ Strings.back() + "\"");
continue;
}
@@ -6919,32 +6598,30 @@ void LLVMStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
}
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printDependentLibs(const ELFFile<ELFT> *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printDependentLibs() {
ListScope L(W, "DependentLibs");
this->printDependentLibsHelper(
- Obj, [](const Elf_Shdr &) {},
+ [](const Elf_Shdr &) {},
[this](StringRef Lib, uint64_t) { W.printString(Lib); });
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printStackSizes(const ELFObjectFile<ELFT> *Obj) {
+template <class ELFT> void LLVMELFDumper<ELFT>::printStackSizes() {
ListScope L(W, "StackSizes");
- if (Obj->isRelocatableObject())
- this->printRelocatableStackSizes(Obj, []() {});
+ if (this->Obj.getHeader().e_type == ELF::ET_REL)
+ this->printRelocatableStackSizes([]() {});
else
- this->printNonRelocatableStackSizes(Obj, []() {});
+ this->printNonRelocatableStackSizes([]() {});
}
template <class ELFT>
-void LLVMStyle<ELFT>::printStackSizeEntry(uint64_t Size, StringRef FuncName) {
+void LLVMELFDumper<ELFT>::printStackSizeEntry(uint64_t Size, StringRef FuncName) {
DictScope D(W, "Entry");
W.printString("Function", FuncName);
W.printHex("Size", Size);
}
template <class ELFT>
-void LLVMStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
+void LLVMELFDumper<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
auto PrintEntry = [&](const Elf_Addr *E) {
W.printHex("Address", Parser.getGotAddress(E));
W.printNumber("Access", Parser.getGotOffset(E));
@@ -6986,14 +6663,18 @@ void LLVMStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
PrintEntry(&E);
- const Elf_Sym *Sym = Parser.getGotSym(&E);
- W.printHex("Value", Sym->st_value);
- W.printEnum("Type", Sym->getType(), makeArrayRef(ElfSymbolTypes));
- printSymbolSection(Sym, this->dumper()->dynamic_symbols().begin());
+ const Elf_Sym &Sym = *Parser.getGotSym(&E);
+ W.printHex("Value", Sym.st_value);
+ W.printEnum("Type", Sym.getType(), makeArrayRef(ElfSymbolTypes));
- std::string SymName = this->dumper()->getFullSymbolName(
- Sym, this->dumper()->getDynamicStringTable(), true);
- W.printNumber("Name", SymName, Sym->st_name);
+ const unsigned SymIndex = &Sym - this->dynamic_symbols().begin();
+ DataRegion<Elf_Word> ShndxTable(
+ (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
+ printSymbolSection(Sym, SymIndex, ShndxTable);
+
+ std::string SymName = this->getFullSymbolName(
+ Sym, SymIndex, ShndxTable, this->DynamicStringTable, true);
+ W.printNumber("Name", SymName, Sym.st_name);
}
}
@@ -7002,7 +6683,7 @@ void LLVMStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
}
template <class ELFT>
-void LLVMStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
+void LLVMELFDumper<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
auto PrintEntry = [&](const Elf_Addr *E) {
W.printHex("Address", Parser.getPltAddress(E));
W.printHex("Initial", *E);
@@ -7026,40 +6707,41 @@ void LLVMStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
}
{
ListScope LS(W, "Entries");
+ DataRegion<Elf_Word> ShndxTable(
+ (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
for (auto &E : Parser.getPltEntries()) {
DictScope D(W, "Entry");
PrintEntry(&E);
- const Elf_Sym *Sym = Parser.getPltSym(&E);
- W.printHex("Value", Sym->st_value);
- W.printEnum("Type", Sym->getType(), makeArrayRef(ElfSymbolTypes));
- printSymbolSection(Sym, this->dumper()->dynamic_symbols().begin());
-
- std::string SymName =
- this->dumper()->getFullSymbolName(Sym, Parser.getPltStrTable(), true);
- W.printNumber("Name", SymName, Sym->st_name);
+ const Elf_Sym &Sym = *Parser.getPltSym(&E);
+ W.printHex("Value", Sym.st_value);
+ W.printEnum("Type", Sym.getType(), makeArrayRef(ElfSymbolTypes));
+ printSymbolSection(Sym, &Sym - this->dynamic_symbols().begin(),
+ ShndxTable);
+
+ const Elf_Sym *FirstSym = cantFail(
+ this->Obj.template getEntry<Elf_Sym>(*Parser.getPltSymTable(), 0));
+ std::string SymName = this->getFullSymbolName(
+ Sym, &Sym - FirstSym, ShndxTable, Parser.getPltStrTable(), true);
+ W.printNumber("Name", SymName, Sym.st_name);
}
}
}
-template <class ELFT>
-void LLVMStyle<ELFT>::printMipsABIFlags(const ELFObjectFile<ELFT> *ObjF) {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const Elf_Shdr *Shdr =
- findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.abiflags");
- if (!Shdr) {
- W.startLine() << "There is no .MIPS.abiflags section in the file.\n";
- return;
- }
- ArrayRef<uint8_t> Sec =
- unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr));
- if (Sec.size() != sizeof(Elf_Mips_ABIFlags<ELFT>)) {
- W.startLine() << "The .MIPS.abiflags section has a wrong size.\n";
+template <class ELFT> void LLVMELFDumper<ELFT>::printMipsABIFlags() {
+ const Elf_Mips_ABIFlags<ELFT> *Flags;
+ if (Expected<const Elf_Mips_ABIFlags<ELFT> *> SecOrErr =
+ getMipsAbiFlagsSection(*this)) {
+ Flags = *SecOrErr;
+ if (!Flags) {
+ W.startLine() << "There is no .MIPS.abiflags section in the file.\n";
+ return;
+ }
+ } else {
+ this->reportUniqueWarning(SecOrErr.takeError());
return;
}
- auto *Flags = reinterpret_cast<const Elf_Mips_ABIFlags<ELFT> *>(Sec.data());
-
raw_ostream &OS = W.getOStream();
DictScope GS(W, "MIPS ABI Flags");
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/Error.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/Error.cpp
deleted file mode 100644
index 1010f18a58c8..000000000000
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/Error.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- Error.cpp - system_error extensions for llvm-readobj -----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This defines a new error_category for the llvm-readobj tool.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Error.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-namespace {
-// FIXME: This class is only here to support the transition to llvm::Error. It
-// will be removed once this transition is complete. Clients should prefer to
-// deal with the Error value directly, rather than converting to error_code.
-class _readobj_error_category : public std::error_category {
-public:
- const char* name() const noexcept override;
- std::string message(int ev) const override;
-};
-} // namespace
-
-const char *_readobj_error_category::name() const noexcept {
- return "llvm.readobj";
-}
-
-std::string _readobj_error_category::message(int EV) const {
- switch (static_cast<readobj_error>(EV)) {
- case readobj_error::success: return "Success";
- case readobj_error::file_not_found:
- return "No such file.";
- case readobj_error::unsupported_file_format:
- return "The file was not recognized as a valid object file.";
- case readobj_error::unrecognized_file_format:
- return "Unrecognized file type.";
- case readobj_error::unsupported_obj_file_format:
- return "Unsupported object file format.";
- case readobj_error::unknown_symbol:
- return "Unknown symbol.";
- }
- llvm_unreachable("An enumerator of readobj_error does not have a message "
- "defined.");
-}
-
-namespace llvm {
-const std::error_category &readobj_category() {
- static _readobj_error_category o;
- return o;
-}
-} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/Error.h b/contrib/llvm-project/llvm/tools/llvm-readobj/Error.h
deleted file mode 100644
index f390e1b96f8a..000000000000
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/Error.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- Error.h - system_error extensions for llvm-readobj -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This declares a new error_category for the llvm-readobj tool.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_READOBJ_ERROR_H
-#define LLVM_TOOLS_LLVM_READOBJ_ERROR_H
-
-#include <system_error>
-
-namespace llvm {
-const std::error_category &readobj_category();
-
-enum class readobj_error {
- success = 0,
- file_not_found,
- unsupported_file_format,
- unrecognized_file_format,
- unsupported_obj_file_format,
- unknown_symbol
-};
-
-inline std::error_code make_error_code(readobj_error e) {
- return std::error_code(static_cast<int>(e), readobj_category());
-}
-
-} // namespace llvm
-
-namespace std {
-template <> struct is_error_code_enum<llvm::readobj_error> : std::true_type {};
-}
-
-#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/MachODumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/MachODumper.cpp
index 20a60b3df699..c13b1f3bf2a0 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
-#include "Error.h"
#include "ObjDumper.h"
#include "StackMapPrinter.h"
#include "llvm-readobj.h"
@@ -28,7 +27,7 @@ namespace {
class MachODumper : public ObjDumper {
public:
MachODumper(const MachOObjectFile *Obj, ScopedPrinter &Writer)
- : ObjDumper(Writer), Obj(Obj) {}
+ : ObjDumper(Writer, Obj->getFileName()), Obj(Obj) {}
void printFileHeaders() override;
void printSectionHeaders() override;
@@ -68,15 +67,9 @@ private:
namespace llvm {
-std::error_code createMachODumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result) {
- const MachOObjectFile *MachOObj = dyn_cast<MachOObjectFile>(Obj);
- if (!MachOObj)
- return readobj_error::unsupported_obj_file_format;
-
- Result.reset(new MachODumper(MachOObj, Writer));
- return readobj_error::success;
+std::unique_ptr<ObjDumper> createMachODumper(const object::MachOObjectFile &Obj,
+ ScopedPrinter &Writer) {
+ return std::make_unique<MachODumper>(&Obj, Writer);
}
} // namespace llvm
@@ -161,8 +154,9 @@ static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM[] = {
};
static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM64[] = {
- LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64_ALL),
- LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64E),
+ LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64_ALL),
+ LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64_V8),
+ LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64E),
};
static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesSPARC[] = {
@@ -629,13 +623,20 @@ void MachODumper::printSymbol(const SymbolRef &Symbol) {
getSymbol(Obj, Symbol.getRawDataRefImpl(), MOSymbol);
StringRef SectionName = "";
- Expected<section_iterator> SecIOrErr = Symbol.getSection();
- if (!SecIOrErr)
- reportError(SecIOrErr.takeError(), Obj->getFileName());
-
- section_iterator SecI = *SecIOrErr;
- if (SecI != Obj->section_end())
- SectionName = unwrapOrError(Obj->getFileName(), SecI->getName());
+ // Don't ask a Mach-O STABS symbol for its section unless we know that
+ // STAB symbol's section field refers to a valid section index. Otherwise
+ // the symbol may error trying to load a section that does not exist.
+ // TODO: Add a whitelist of STABS symbol types that contain valid section
+ // indices.
+ if (!(MOSymbol.Type & MachO::N_STAB)) {
+ Expected<section_iterator> SecIOrErr = Symbol.getSection();
+ if (!SecIOrErr)
+ reportError(SecIOrErr.takeError(), Obj->getFileName());
+
+ section_iterator SecI = *SecIOrErr;
+ if (SecI != Obj->section_end())
+ SectionName = unwrapOrError(Obj->getFileName(), SecI->getName());
+ }
DictScope D(W, "Symbol");
W.printNumber("Name", SymbolName, MOSymbol.StringIndex);
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp
index ce61f1c53a4d..fc91d81f0784 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "ObjDumper.h"
-#include "Error.h"
#include "llvm-readobj.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
@@ -27,9 +26,25 @@ static inline Error createError(const Twine &Msg) {
return createStringError(object::object_error::parse_failed, Msg);
}
-ObjDumper::ObjDumper(ScopedPrinter &Writer) : W(Writer) {}
+ObjDumper::ObjDumper(ScopedPrinter &Writer, StringRef ObjName) : W(Writer) {
+ // Dumper reports all non-critical errors as warnings.
+ // It does not print the same warning more than once.
+ WarningHandler = [=](const Twine &Msg) {
+ if (Warnings.insert(Msg.str()).second)
+ reportWarning(createError(Msg), ObjName);
+ return Error::success();
+ };
+}
+
+ObjDumper::~ObjDumper() {}
+
+void ObjDumper::reportUniqueWarning(Error Err) const {
+ reportUniqueWarning(toString(std::move(Err)));
+}
-ObjDumper::~ObjDumper() {
+void ObjDumper::reportUniqueWarning(const Twine &Msg) const {
+ cantFail(WarningHandler(Msg),
+ "WarningHandler should always return ErrorSuccess");
}
static void printAsPrintable(raw_ostream &W, const uint8_t *Start, size_t Len) {
@@ -38,7 +53,7 @@ static void printAsPrintable(raw_ostream &W, const uint8_t *Start, size_t Len) {
}
static std::vector<object::SectionRef>
-getSectionRefsByNameOrIndex(const object::ObjectFile *Obj,
+getSectionRefsByNameOrIndex(const object::ObjectFile &Obj,
ArrayRef<std::string> Sections) {
std::vector<object::SectionRef> Ret;
std::map<std::string, bool> SecNames;
@@ -51,9 +66,9 @@ getSectionRefsByNameOrIndex(const object::ObjectFile *Obj,
SecNames.emplace(std::string(Section), false);
}
- SecIndex = Obj->isELF() ? 0 : 1;
- for (object::SectionRef SecRef : Obj->sections()) {
- StringRef SecName = unwrapOrError(Obj->getFileName(), SecRef.getName());
+ SecIndex = Obj.isELF() ? 0 : 1;
+ for (object::SectionRef SecRef : Obj.sections()) {
+ StringRef SecName = unwrapOrError(Obj.getFileName(), SecRef.getName());
auto NameIt = SecNames.find(std::string(SecName));
if (NameIt != SecNames.end())
NameIt->second = true;
@@ -69,24 +84,23 @@ getSectionRefsByNameOrIndex(const object::ObjectFile *Obj,
if (!S.second)
reportWarning(
createError(formatv("could not find section '{0}'", S.first).str()),
- Obj->getFileName());
+ Obj.getFileName());
for (std::pair<unsigned, bool> S : SecIndices)
if (!S.second)
reportWarning(
createError(formatv("could not find section {0}", S.first).str()),
- Obj->getFileName());
+ Obj.getFileName());
return Ret;
}
-void ObjDumper::printSectionsAsString(const object::ObjectFile *Obj,
+void ObjDumper::printSectionsAsString(const object::ObjectFile &Obj,
ArrayRef<std::string> Sections) {
bool First = true;
for (object::SectionRef Section :
getSectionRefsByNameOrIndex(Obj, Sections)) {
- StringRef SectionName =
- unwrapOrError(Obj->getFileName(), Section.getName());
+ StringRef SectionName = unwrapOrError(Obj.getFileName(), Section.getName());
if (!First)
W.startLine() << '\n';
@@ -94,7 +108,7 @@ void ObjDumper::printSectionsAsString(const object::ObjectFile *Obj,
W.startLine() << "String dump of section '" << SectionName << "':\n";
StringRef SectionContent =
- unwrapOrError(Obj->getFileName(), Section.getContents());
+ unwrapOrError(Obj.getFileName(), Section.getContents());
const uint8_t *SecContent = SectionContent.bytes_begin();
const uint8_t *CurrentWord = SecContent;
@@ -115,13 +129,12 @@ void ObjDumper::printSectionsAsString(const object::ObjectFile *Obj,
}
}
-void ObjDumper::printSectionsAsHex(const object::ObjectFile *Obj,
+void ObjDumper::printSectionsAsHex(const object::ObjectFile &Obj,
ArrayRef<std::string> Sections) {
bool First = true;
for (object::SectionRef Section :
getSectionRefsByNameOrIndex(Obj, Sections)) {
- StringRef SectionName =
- unwrapOrError(Obj->getFileName(), Section.getName());
+ StringRef SectionName = unwrapOrError(Obj.getFileName(), Section.getName());
if (!First)
W.startLine() << '\n';
@@ -129,7 +142,7 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile *Obj,
W.startLine() << "Hex dump of section '" << SectionName << "':\n";
StringRef SectionContent =
- unwrapOrError(Obj->getFileName(), Section.getContents());
+ unwrapOrError(Obj.getFileName(), Section.getContents());
const uint8_t *SecContent = SectionContent.bytes_begin();
const uint8_t *SecEnd = SecContent + SectionContent.size();
@@ -155,8 +168,9 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile *Obj,
// Least, if we cut in a middle of a row, we add the remaining characters,
// which is (8 - (k * 2)).
if (i < 4)
- W.startLine() << format("%*c", (4 - i) * 8 + (4 - i) + (8 - (k * 2)),
- ' ');
+ W.startLine() << format("%*c", (4 - i) * 8 + (4 - i), ' ');
+ if (k < 4)
+ W.startLine() << format("%*c", 8 - k * 2, ' ');
TmpSecPtr = SecPtr;
for (i = 0; TmpSecPtr + i < SecEnd && i < 16; ++i)
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h
index 57477606d6e8..d4e166b504cf 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h
@@ -16,10 +16,14 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
+#include <unordered_set>
+
namespace llvm {
namespace object {
class COFFImportFile;
class ObjectFile;
+class XCOFFObjectFile;
+class ELFObjectFileBase;
}
namespace codeview {
class GlobalTypeTableBuilder;
@@ -30,9 +34,11 @@ class ScopedPrinter;
class ObjDumper {
public:
- ObjDumper(ScopedPrinter &Writer);
+ ObjDumper(ScopedPrinter &Writer, StringRef ObjName);
virtual ~ObjDumper();
+ virtual bool canDumpContent() { return true; }
+
virtual void printFileHeaders() = 0;
virtual void printSectionHeaders() = 0;
virtual void printRelocations() = 0;
@@ -59,7 +65,7 @@ public:
virtual void printNeededLibraries() { }
virtual void printSectionAsHex(StringRef SectionName) {}
virtual void printHashTable() { }
- virtual void printGnuHashTable(const object::ObjectFile *Obj) {}
+ virtual void printGnuHashTable() {}
virtual void printHashSymbols() {}
virtual void printLoadName() {}
virtual void printVersionInfo() {}
@@ -70,7 +76,8 @@ public:
virtual void printNotes() {}
virtual void printELFLinkerOptions() {}
virtual void printStackSizes() {}
- virtual void printArchSpecificInfo() { }
+ virtual void printSectionDetails() {}
+ virtual void printArchSpecificInfo() {}
// Only implemented for PE/COFF.
virtual void printCOFFImports() { }
@@ -78,6 +85,7 @@ public:
virtual void printCOFFDirectives() { }
virtual void printCOFFBaseReloc() { }
virtual void printCOFFDebugDirectory() { }
+ virtual void printCOFFTLSDirectory() {}
virtual void printCOFFResources() {}
virtual void printCOFFLoadConfig() { }
virtual void printCodeViewDebugInfo() { }
@@ -98,11 +106,15 @@ public:
virtual void printStackMap() const = 0;
- void printSectionsAsString(const object::ObjectFile *Obj,
+ void printSectionsAsString(const object::ObjectFile &Obj,
ArrayRef<std::string> Sections);
- void printSectionsAsHex(const object::ObjectFile *Obj,
+ void printSectionsAsHex(const object::ObjectFile &Obj,
ArrayRef<std::string> Sections);
+ std::function<Error(const Twine &Msg)> WarningHandler;
+ void reportUniqueWarning(Error Err) const;
+ void reportUniqueWarning(const Twine &Msg) const;
+
protected:
ScopedPrinter &W;
@@ -111,27 +123,24 @@ private:
virtual void printDynamicSymbols() {}
virtual void printProgramHeaders() {}
virtual void printSectionMapping() {}
+
+ std::unordered_set<std::string> Warnings;
};
-std::error_code createCOFFDumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result);
+std::unique_ptr<ObjDumper> createCOFFDumper(const object::COFFObjectFile &Obj,
+ ScopedPrinter &Writer);
-std::error_code createELFDumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result);
+std::unique_ptr<ObjDumper> createELFDumper(const object::ELFObjectFileBase &Obj,
+ ScopedPrinter &Writer);
-std::error_code createMachODumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result);
+std::unique_ptr<ObjDumper> createMachODumper(const object::MachOObjectFile &Obj,
+ ScopedPrinter &Writer);
-std::error_code createWasmDumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result);
+std::unique_ptr<ObjDumper> createWasmDumper(const object::WasmObjectFile &Obj,
+ ScopedPrinter &Writer);
-std::error_code createXCOFFDumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result);
+std::unique_ptr<ObjDumper> createXCOFFDumper(const object::XCOFFObjectFile &Obj,
+ ScopedPrinter &Writer);
void dumpCOFFImportFile(const object::COFFImportFile *File,
ScopedPrinter &Writer);
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/WasmDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/WasmDumper.cpp
index a02dbb999826..fb7134d20a85 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/WasmDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/WasmDumper.cpp
@@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
-#include "Error.h"
#include "ObjDumper.h"
#include "llvm-readobj.h"
#include "llvm/Object/Wasm.h"
@@ -25,7 +24,7 @@ static const EnumEntry<unsigned> WasmSymbolTypes[] = {
#define ENUM_ENTRY(X) \
{ #X, wasm::WASM_SYMBOL_TYPE_##X }
ENUM_ENTRY(FUNCTION), ENUM_ENTRY(DATA), ENUM_ENTRY(GLOBAL),
- ENUM_ENTRY(SECTION), ENUM_ENTRY(EVENT),
+ ENUM_ENTRY(SECTION), ENUM_ENTRY(EVENT), ENUM_ENTRY(TABLE),
#undef ENUM_ENTRY
};
@@ -58,7 +57,7 @@ static const EnumEntry<unsigned> WasmSymbolFlags[] = {
class WasmDumper : public ObjDumper {
public:
WasmDumper(const WasmObjectFile *Obj, ScopedPrinter &Writer)
- : ObjDumper(Writer), Obj(Obj) {}
+ : ObjDumper(Writer, Obj->getFileName()), Obj(Obj) {}
void printFileHeaders() override;
void printSectionHeaders() override;
@@ -241,14 +240,9 @@ void WasmDumper::printSymbol(const SymbolRef &Sym) {
namespace llvm {
-std::error_code createWasmDumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result) {
- const auto *WasmObj = dyn_cast<WasmObjectFile>(Obj);
- assert(WasmObj && "createWasmDumper called with non-wasm object");
-
- Result.reset(new WasmDumper(WasmObj, Writer));
- return readobj_error::success;
+std::unique_ptr<ObjDumper> createWasmDumper(const object::WasmObjectFile &Obj,
+ ScopedPrinter &Writer) {
+ return std::make_unique<WasmDumper>(&Obj, Writer);
}
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp
index 380baae2eeb4..7e84c1bca35d 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "Win64EHDumper.h"
-#include "Error.h"
#include "llvm-readobj.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -120,10 +119,10 @@ static std::error_code getSymbol(const COFFObjectFile &COFF, uint64_t VA,
return errorToErrorCode(Address.takeError());
if (*Address == VA) {
Sym = Symbol;
- return readobj_error::success;
+ return std::error_code();
}
}
- return readobj_error::unknown_symbol;
+ return inconvertibleErrorCode();
}
static std::string formatSymbol(const Dumper::Context &Ctx,
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/WindowsResourceDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/WindowsResourceDumper.cpp
index a2fb6aac3f93..fb085ecaa76e 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/WindowsResourceDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/WindowsResourceDumper.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "WindowsResourceDumper.h"
-#include "Error.h"
#include "llvm/Object/WindowsResource.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ScopedPrinter.h"
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp
index dd62f98d9595..8f0f18cedceb 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp
@@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
-#include "Error.h"
#include "ObjDumper.h"
#include "llvm-readobj.h"
#include "llvm/Object/XCOFFObjectFile.h"
@@ -25,7 +24,7 @@ class XCOFFDumper : public ObjDumper {
public:
XCOFFDumper(const XCOFFObjectFile &Obj, ScopedPrinter &Writer)
- : ObjDumper(Writer), Obj(Obj) {}
+ : ObjDumper(Writer, Obj.getFileName()), Obj(Obj) {}
void printFileHeaders() override;
void printSectionHeaders() override;
@@ -515,14 +514,8 @@ void XCOFFDumper::printSectionHeaders(ArrayRef<T> Sections) {
}
namespace llvm {
-std::error_code createXCOFFDumper(const object::ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result) {
- const XCOFFObjectFile *XObj = dyn_cast<XCOFFObjectFile>(Obj);
- if (!XObj)
- return readobj_error::unsupported_obj_file_format;
-
- Result.reset(new XCOFFDumper(*XObj, Writer));
- return readobj_error::success;
+std::unique_ptr<ObjDumper>
+createXCOFFDumper(const object::XCOFFObjectFile &XObj, ScopedPrinter &Writer) {
+ return std::make_unique<XCOFFDumper>(XObj, Writer);
}
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp
index b9c6ad2256ae..41cd4414d051 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -19,20 +19,23 @@
//===----------------------------------------------------------------------===//
#include "llvm-readobj.h"
-#include "Error.h"
#include "ObjDumper.h"
#include "WindowsResourceDumper.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFFImportFile.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Wasm.h"
#include "llvm/Object/WindowsResource.h"
+#include "llvm/Object/XCOFFObjectFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/InitLLVM.h"
@@ -63,7 +66,7 @@ namespace opts {
DependentLibraries("dependent-libraries",
cl::desc("Display the dependent libraries section"));
- // --headers -e
+ // --headers, -e
cl::opt<bool>
Headers("headers",
cl::desc("Equivalent to setting: --file-headers, --program-headers, "
@@ -134,6 +137,11 @@ namespace opts {
cl::opt<bool> DynRelocs("dyn-relocations",
cl::desc("Display the dynamic relocation entries in the file"));
+ // --section-details
+ // Also -t in llvm-readelf mode.
+ cl::opt<bool> SectionDetails("section-details",
+ cl::desc("Display the section details"));
+
// --symbols
// Also -s in llvm-readelf mode, or -t in llvm-readobj mode.
cl::opt<bool>
@@ -183,14 +191,18 @@ namespace opts {
cl::aliasopt(ProgramHeaders));
// --string-dump, -p
- cl::list<std::string> StringDump("string-dump", cl::desc("<number|name>"),
- cl::ZeroOrMore);
+ cl::list<std::string> StringDump(
+ "string-dump", cl::value_desc("number|name"),
+ cl::desc("Display the specified section(s) as a list of strings"),
+ cl::ZeroOrMore);
cl::alias StringDumpShort("p", cl::desc("Alias for --string-dump"),
cl::aliasopt(StringDump), cl::Prefix);
// --hex-dump, -x
- cl::list<std::string> HexDump("hex-dump", cl::desc("<number|name>"),
- cl::ZeroOrMore);
+ cl::list<std::string>
+ HexDump("hex-dump", cl::value_desc("number|name"),
+ cl::desc("Display the specified section(s) as hexadecimal bytes"),
+ cl::ZeroOrMore);
cl::alias HexDumpShort("x", cl::desc("Alias for --hex-dump"),
cl::aliasopt(HexDump), cl::Prefix);
@@ -265,6 +277,10 @@ namespace opts {
COFFDebugDirectory("coff-debug-directory",
cl::desc("Display the PE/COFF debug directory"));
+ // --coff-tls-directory
+ cl::opt<bool> COFFTLSDirectory("coff-tls-directory",
+ cl::desc("Display the PE/COFF TLS directory"));
+
// --coff-resources
cl::opt<bool> COFFResources("coff-resources",
cl::desc("Display the PE/COFF .rsrc section"));
@@ -420,55 +436,73 @@ struct ReadObjTypeTableBuilder {
static ReadObjTypeTableBuilder CVTypes;
/// Creates an format-specific object file dumper.
-static std::error_code createDumper(const ObjectFile *Obj,
- ScopedPrinter &Writer,
- std::unique_ptr<ObjDumper> &Result) {
- if (!Obj)
- return readobj_error::unsupported_file_format;
-
- if (Obj->isCOFF())
- return createCOFFDumper(Obj, Writer, Result);
- if (Obj->isELF())
- return createELFDumper(Obj, Writer, Result);
- if (Obj->isMachO())
- return createMachODumper(Obj, Writer, Result);
- if (Obj->isWasm())
- return createWasmDumper(Obj, Writer, Result);
- if (Obj->isXCOFF())
- return createXCOFFDumper(Obj, Writer, Result);
-
- return readobj_error::unsupported_obj_file_format;
+static Expected<std::unique_ptr<ObjDumper>>
+createDumper(const ObjectFile &Obj, ScopedPrinter &Writer) {
+ if (const COFFObjectFile *COFFObj = dyn_cast<COFFObjectFile>(&Obj))
+ return createCOFFDumper(*COFFObj, Writer);
+
+ if (const ELFObjectFileBase *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj))
+ return createELFDumper(*ELFObj, Writer);
+
+ if (const MachOObjectFile *MachOObj = dyn_cast<MachOObjectFile>(&Obj))
+ return createMachODumper(*MachOObj, Writer);
+
+ if (const WasmObjectFile *WasmObj = dyn_cast<WasmObjectFile>(&Obj))
+ return createWasmDumper(*WasmObj, Writer);
+
+ if (const XCOFFObjectFile *XObj = dyn_cast<XCOFFObjectFile>(&Obj))
+ return createXCOFFDumper(*XObj, Writer);
+
+ return createStringError(errc::invalid_argument,
+ "unsupported object file format");
}
/// Dumps the specified object file.
-static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
+static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer,
const Archive *A = nullptr) {
std::string FileStr =
- A ? Twine(A->getFileName() + "(" + Obj->getFileName() + ")").str()
- : Obj->getFileName().str();
+ A ? Twine(A->getFileName() + "(" + Obj.getFileName() + ")").str()
+ : Obj.getFileName().str();
- std::unique_ptr<ObjDumper> Dumper;
- if (std::error_code EC = createDumper(Obj, Writer, Dumper))
- reportError(errorCodeToError(EC), FileStr);
+ std::string ContentErrString;
+ if (Error ContentErr = Obj.initContent())
+ ContentErrString = "unable to continue dumping, the file is corrupt: " +
+ toString(std::move(ContentErr));
+
+ ObjDumper *Dumper;
+ Expected<std::unique_ptr<ObjDumper>> DumperOrErr = createDumper(Obj, Writer);
+ if (!DumperOrErr)
+ reportError(DumperOrErr.takeError(), FileStr);
+ Dumper = (*DumperOrErr).get();
if (opts::Output == opts::LLVM || opts::InputFilenames.size() > 1 || A) {
Writer.startLine() << "\n";
Writer.printString("File", FileStr);
}
if (opts::Output == opts::LLVM) {
- Writer.printString("Format", Obj->getFileFormatName());
- Writer.printString("Arch", Triple::getArchTypeName(
- (llvm::Triple::ArchType)Obj->getArch()));
+ Writer.printString("Format", Obj.getFileFormatName());
+ Writer.printString("Arch", Triple::getArchTypeName(Obj.getArch()));
Writer.printString(
"AddressSize",
- std::string(formatv("{0}bit", 8 * Obj->getBytesInAddress())));
+ std::string(formatv("{0}bit", 8 * Obj.getBytesInAddress())));
Dumper->printLoadName();
}
if (opts::FileHeaders)
Dumper->printFileHeaders();
- if (opts::SectionHeaders)
- Dumper->printSectionHeaders();
+
+ // This is only used for ELF currently. In some cases, when an object is
+ // corrupt (e.g. truncated), we can't dump anything except the file header.
+ if (!ContentErrString.empty())
+ reportError(createError(ContentErrString), FileStr);
+
+ if (opts::SectionDetails || opts::SectionHeaders) {
+ if (opts::Output == opts::GNU && opts::SectionDetails)
+ Dumper->printSectionDetails();
+ else
+ Dumper->printSectionHeaders();
+ }
+
if (opts::HashSymbols)
Dumper->printHashSymbols();
if (opts::ProgramHeaders || opts::SectionMapping == cl::BOU_TRUE)
@@ -492,10 +526,10 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
if (opts::HashTable)
Dumper->printHashTable();
if (opts::GnuHashTable)
- Dumper->printGnuHashTable(Obj);
+ Dumper->printGnuHashTable();
if (opts::VersionInfo)
Dumper->printVersionInfo();
- if (Obj->isELF()) {
+ if (Obj.isELF()) {
if (opts::DependentLibraries)
Dumper->printDependentLibs();
if (opts::ELFLinkerOptions)
@@ -513,7 +547,7 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
if (opts::Notes)
Dumper->printNotes();
}
- if (Obj->isCOFF()) {
+ if (Obj.isCOFF()) {
if (opts::COFFImports)
Dumper->printCOFFImports();
if (opts::COFFExports)
@@ -524,6 +558,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
Dumper->printCOFFBaseReloc();
if (opts::COFFDebugDirectory)
Dumper->printCOFFDebugDirectory();
+ if (opts::COFFTLSDirectory)
+ Dumper->printCOFFTLSDirectory();
if (opts::COFFResources)
Dumper->printCOFFResources();
if (opts::COFFLoadConfig)
@@ -539,7 +575,7 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
CVTypes.GlobalIDTable, CVTypes.GlobalTypeTable,
opts::CodeViewEnableGHash);
}
- if (Obj->isMachO()) {
+ if (Obj.isMachO()) {
if (opts::MachODataInCode)
Dumper->printMachODataInCode();
if (opts::MachOIndirectSymbols)
@@ -569,13 +605,17 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) {
reportError(std::move(E), Arc->getFileName());
continue;
}
- if (ObjectFile *Obj = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
- dumpObject(Obj, Writer, Arc);
- else if (COFFImportFile *Imp = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
+
+ Binary *Bin = ChildOrErr->get();
+ if (ObjectFile *Obj = dyn_cast<ObjectFile>(Bin))
+ dumpObject(*Obj, Writer, Arc);
+ else if (COFFImportFile *Imp = dyn_cast<COFFImportFile>(Bin))
dumpCOFFImportFile(Imp, Writer);
else
- reportError(errorCodeToError(readobj_error::unrecognized_file_format),
- Arc->getFileName());
+ reportWarning(createStringError(errc::invalid_argument,
+ Bin->getFileName() +
+ " has an unsupported file type"),
+ Arc->getFileName());
}
if (Err)
reportError(std::move(Err), Arc->getFileName());
@@ -587,7 +627,7 @@ static void dumpMachOUniversalBinary(const MachOUniversalBinary *UBinary,
for (const MachOUniversalBinary::ObjectForArch &Obj : UBinary->objects()) {
Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = Obj.getAsObjectFile();
if (ObjOrErr)
- dumpObject(&*ObjOrErr.get(), Writer);
+ dumpObject(*ObjOrErr.get(), Writer);
else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError()))
reportError(ObjOrErr.takeError(), UBinary->getFileName());
else if (Expected<std::unique_ptr<Archive>> AOrErr = Obj.getAsArchive())
@@ -607,7 +647,8 @@ static void dumpWindowsResourceFile(WindowsResource *WinRes,
/// Opens \a File and dumps it.
static void dumpInput(StringRef File, ScopedPrinter &Writer) {
// Attempt to open the binary.
- Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(File);
+ Expected<OwningBinary<Binary>> BinaryOrErr =
+ createBinary(File, /*Context=*/nullptr, /*InitContent=*/false);
if (!BinaryOrErr)
reportError(BinaryOrErr.takeError(), File);
Binary &Binary = *BinaryOrErr.get().getBinary();
@@ -618,14 +659,13 @@ static void dumpInput(StringRef File, ScopedPrinter &Writer) {
dyn_cast<MachOUniversalBinary>(&Binary))
dumpMachOUniversalBinary(UBinary, Writer);
else if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
- dumpObject(Obj, Writer);
+ dumpObject(*Obj, Writer);
else if (COFFImportFile *Import = dyn_cast<COFFImportFile>(&Binary))
dumpCOFFImportFile(Import, Writer);
else if (WindowsResource *WinRes = dyn_cast<WindowsResource>(&Binary))
dumpWindowsResourceFile(WinRes, Writer);
else
- reportError(errorCodeToError(readobj_error::unrecognized_file_format),
- File);
+ llvm_unreachable("unrecognized file type");
CVTypes.Binaries.push_back(std::move(*BinaryOrErr));
}
@@ -638,8 +678,7 @@ static void registerReadobjAliases() {
cl::aliasopt(opts::SectionHeaders),
cl::NotHidden);
- // Only register -t in llvm-readobj, as readelf reserves it for
- // --section-details (not implemented yet).
+ // llvm-readelf reserves it for --section-details.
static cl::alias SymbolsShort("t", cl::desc("Alias for --symbols"),
cl::aliasopt(opts::Symbols), cl::NotHidden);
@@ -665,6 +704,11 @@ static void registerReadelfAliases() {
cl::aliasopt(opts::Symbols), cl::NotHidden,
cl::Grouping);
+ // -t is here because for readobj it is an alias for --symbols.
+ static cl::alias SectionDetailsShort(
+ "t", cl::desc("Alias for --section-details"),
+ cl::aliasopt(opts::SectionDetails), cl::NotHidden);
+
// Allow all single letter flags to be grouped together.
for (auto &OptEntry : cl::getRegisteredOptions()) {
StringRef ArgName = OptEntry.getKey();
@@ -690,6 +734,11 @@ int main(int argc, const char *argv[]) {
cl::ParseCommandLineOptions(argc, argv, "LLVM Object Reader\n");
+ // Default to print error if no filename is specified.
+ if (opts::InputFilenames.empty()) {
+ error("no input files specified");
+ }
+
if (opts::All) {
opts::FileHeaders = true;
opts::ProgramHeaders = true;
@@ -714,10 +763,6 @@ int main(int argc, const char *argv[]) {
opts::SectionHeaders = true;
}
- // Default to stdin if no filename is specified.
- if (opts::InputFilenames.empty())
- opts::InputFilenames.push_back("-");
-
ScopedPrinter Writer(fouts());
for (const std::string &I : opts::InputFilenames)
dumpInput(I, Writer);
diff --git a/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index be5dbdd1c559..919943129311 100644
--- a/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -764,6 +764,8 @@ static int linkAndVerify() {
ErrorAndExit("Unable to create disassembler!");
std::unique_ptr<MCInstrInfo> MII(TheTarget->createMCInstrInfo());
+ if (!MII)
+ ErrorAndExit("Unable to create target instruction info!");
std::unique_ptr<MCInstPrinter> InstPrinter(
TheTarget->createMCInstPrinter(Triple(TripleName), 0, *MAI, *MII, *MRI));
diff --git a/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp b/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp
index 987270e98c48..4f98a4c0ec10 100644
--- a/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp
@@ -542,9 +542,7 @@ static bool checkMachOAndArchFlags(ObjectFile *O, StringRef Filename) {
H = MachO->MachOObjectFile::getHeader();
T = MachOObjectFile::getArchTriple(H.cputype, H.cpusubtype);
}
- if (none_of(ArchFlags, [&](const std::string &Name) {
- return Name == T.getArchName();
- })) {
+ if (!is_contained(ArchFlags, T.getArchName())) {
error("no architecture specified", Filename);
return false;
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-stress/llvm-stress.cpp b/contrib/llvm-project/llvm/tools/llvm-stress/llvm-stress.cpp
index 22f530dde167..538240d65738 100644
--- a/contrib/llvm-project/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -38,8 +38,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -238,7 +237,7 @@ protected:
return ConstantFP::getAllOnesValue(Tp);
return ConstantFP::getNullValue(Tp);
} else if (Tp->isVectorTy()) {
- VectorType *VTp = cast<VectorType>(Tp);
+ auto *VTp = cast<FixedVectorType>(Tp);
std::vector<Constant*> TempValues;
TempValues.reserve(VTp->getNumElements());
@@ -316,8 +315,7 @@ protected:
Type::getFloatTy(Context),
Type::getDoubleTy(Context)
});
- ScalarTypes.insert(ScalarTypes.end(),
- AdditionalScalarTypes.begin(), AdditionalScalarTypes.end());
+ llvm::append_range(ScalarTypes, AdditionalScalarTypes);
}
return ScalarTypes[getRandom() % ScalarTypes.size()];
@@ -483,10 +481,13 @@ struct ExtractElementModifier: public Modifier {
void Act() override {
Value *Val0 = getRandomVectorValue();
- Value *V = ExtractElementInst::Create(Val0,
- ConstantInt::get(Type::getInt32Ty(BB->getContext()),
- getRandom() % cast<VectorType>(Val0->getType())->getNumElements()),
- "E", BB->getTerminator());
+ Value *V = ExtractElementInst::Create(
+ Val0,
+ ConstantInt::get(
+ Type::getInt32Ty(BB->getContext()),
+ getRandom() %
+ cast<FixedVectorType>(Val0->getType())->getNumElements()),
+ "E", BB->getTerminator());
return PT->push_back(V);
}
};
@@ -499,7 +500,7 @@ struct ShuffModifier: public Modifier {
Value *Val0 = getRandomVectorValue();
Value *Val1 = getRandomValue(Val0->getType());
- unsigned Width = cast<VectorType>(Val0->getType())->getNumElements();
+ unsigned Width = cast<FixedVectorType>(Val0->getType())->getNumElements();
std::vector<Constant*> Idxs;
Type *I32 = Type::getInt32Ty(BB->getContext());
@@ -527,10 +528,13 @@ struct InsertElementModifier: public Modifier {
Value *Val0 = getRandomVectorValue();
Value *Val1 = getRandomValue(Val0->getType()->getScalarType());
- Value *V = InsertElementInst::Create(Val0, Val1,
- ConstantInt::get(Type::getInt32Ty(BB->getContext()),
- getRandom() % cast<VectorType>(Val0->getType())->getNumElements()),
- "I", BB->getTerminator());
+ Value *V = InsertElementInst::Create(
+ Val0, Val1,
+ ConstantInt::get(
+ Type::getInt32Ty(BB->getContext()),
+ getRandom() %
+ cast<FixedVectorType>(Val0->getType())->getNumElements()),
+ "I", BB->getTerminator());
return PT->push_back(V);
}
};
@@ -546,7 +550,7 @@ struct CastModifier: public Modifier {
// Handle vector casts vectors.
if (VTy->isVectorTy()) {
- VectorType *VecTy = cast<VectorType>(VTy);
+ auto *VecTy = cast<FixedVectorType>(VTy);
DestTy = pickVectorType(VecTy->getNumElements());
}
@@ -733,10 +737,8 @@ static void IntroduceControlFlow(Function *F, Random &R) {
int main(int argc, char **argv) {
using namespace llvm;
- // Init LLVM, call llvm_shutdown() on exit, parse args, etc.
- PrettyStackTraceProgram X(argc, argv);
+ InitLLVM X(argc, argv);
cl::ParseCommandLineOptions(argc, argv, "llvm codegen stress-tester\n");
- llvm_shutdown_obj Y;
auto M = std::make_unique<Module>("/tmp/autogen.bc", Context);
Function *F = GenEmptyFunction(M.get());
diff --git a/contrib/llvm-project/llvm/tools/llvm-symbolizer/Opts.td b/contrib/llvm-project/llvm/tools/llvm-symbolizer/Opts.td
new file mode 100644
index 000000000000..ac23639f130e
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-symbolizer/Opts.td
@@ -0,0 +1,71 @@
+include "llvm/Option/OptParser.td"
+
+multiclass B<string name, string help1, string help2> {
+ def NAME: Flag<["--", "-"], name>, HelpText<help1>;
+ def no_ # NAME: Flag<["--", "-"], "no-" # name>, HelpText<help2>;
+}
+
+multiclass Eq<string name, string help> {
+ def NAME #_EQ : Joined<["--", "-"], name #"=">,
+ HelpText<help>;
+ def : Separate<["--", "-"], name>, Alias<!cast<Joined>(NAME #_EQ)>;
+}
+
+class F<string name, string help>: Flag<["--", "-"], name>, HelpText<help>;
+
+def addresses : F<"addresses", "Show address before line information">;
+defm adjust_vma
+ : Eq<"adjust-vma", "Add specified offset to object file addresses">,
+ MetaVarName<"<offset>">;
+def basenames : Flag<["--"], "basenames">, HelpText<"Strip directory names from paths">;
+defm debug_file_directory : Eq<"debug-file-directory", "Path to directory where to look for debug files">, MetaVarName<"<dir>">;
+defm default_arch : Eq<"default-arch", "Default architecture (for multi-arch objects)">;
+defm demangle : B<"demangle", "Demangle function names", "Don't demangle function names">;
+def functions : F<"functions", "Print function name for a given address">;
+def functions_EQ : Joined<["--"], "functions=">, HelpText<"Print function name for a given address">, Values<"none,short,linkage">;
+def help : F<"help", "Display this help">;
+defm dwp : Eq<"dwp", "Path to DWP file to be use for any split CUs">, MetaVarName<"<file>">;
+defm dsym_hint : Eq<"dsym-hint", "Path to .dSYM bundles to search for debug info for the object files">, MetaVarName<"<dir>">;
+defm fallback_debug_path : Eq<"fallback-debug-path", "Fallback path for debug binaries">, MetaVarName<"<dir>">;
+defm inlines : B<"inlines", "Print all inlined frames for a given address",
+ "Do not print inlined frames">;
+defm obj
+ : Eq<"obj", "Path to object file to be symbolized (if not provided, "
+ "object file should be specified for each input line)">, MetaVarName<"<file>">;
+defm output_style
+ : Eq<"output-style", "Specify print style. Supported styles: LLVM, GNU">,
+ MetaVarName<"style">,
+ Values<"LLVM,GNU">;
+def pretty_print : F<"pretty-print", "Make the output more human friendly">;
+defm print_source_context_lines : Eq<"print-source-context-lines", "Print N lines of source file context">;
+def relative_address : F<"relative-address", "Interpret addresses as addresses relative to the image base">;
+def relativenames : F<"relativenames", "Strip the compilation directory from paths">;
+defm untag_addresses : B<"untag-addresses", "", "Remove memory tags from addresses before symbolization">;
+def use_dia: F<"dia", "Use the DIA library to access symbols (Windows only)">;
+def verbose : F<"verbose", "Print verbose line info">;
+def version : F<"version", "Display the version">;
+
+def : Flag<["-"], "a">, Alias<addresses>, HelpText<"Alias for --addresses">;
+def : F<"print-address", "Alias for --addresses">, Alias<addresses>;
+def : Flag<["-"], "C">, Alias<demangle>, HelpText<"Alias for --demangle">;
+def : Joined<["--"], "exe=">, Alias<obj_EQ>, HelpText<"Alias for --obj">, MetaVarName<"<file>">;
+def : Separate<["--"], "exe">, Alias<obj_EQ>, HelpText<"Alias for --obj">, MetaVarName<"<file>">;
+def : JoinedOrSeparate<["-"], "e">, Alias<obj_EQ>, HelpText<"Alias for --obj">, MetaVarName<"<file>">;
+def : Joined<["-"], "e=">, Alias<obj_EQ>, HelpText<"Alias for --obj">, MetaVarName<"<file>">;
+def : Flag<["-"], "f">, Alias<functions>, HelpText<"Alias for --functions">;
+def : Joined<["-"], "f=">, Alias<functions_EQ>, HelpText<"Alias for --functions=">;
+def : Flag<["-"], "h">, Alias<help>;
+def : Flag<["-"], "i">, Alias<inlines>, HelpText<"Alias for --inlines">;
+def : F<"inlining", "Alias for --inlines">, Alias<inlines>;
+def : Flag<["-"], "p">, Alias<pretty_print>, HelpText<"Alias for --pretty-print">;
+def : Flag<["-"], "s">, Alias<basenames>, HelpText<"Alias for --basenames">;
+def : Flag<["-"], "v">, Alias<version>, HelpText<"Alias for --version">;
+
+// Compatibility aliases for old asan_symbolize.py and sanitizer binaries (before 2020-08).
+def : Flag<["--"], "inlining=true">, Alias<inlines>, HelpText<"Alias for --inlines">;
+def : Flag<["--"], "inlining=false">, Alias<no_inlines>, HelpText<"Alias for --no-inlines">;
+// Compatibility aliases for pprof's symbolizer.
+def : Flag<["-"], "demangle=true">, Alias<demangle>, HelpText<"Alias for --demangle">;
+def : Flag<["-"], "demangle=false">, Alias<no_demangle>, HelpText<"Alias for --no-demangle">;
+// Compatibility no-op options.
+def : Flag<["--"], "use-symbol-table=true">;
diff --git a/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 6a702c64a105..8734c2d74045 100644
--- a/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -14,15 +14,21 @@
//
//===----------------------------------------------------------------------===//
+#include "Opts.inc"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Config/config.h"
#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
#include "llvm/Support/COM.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdio>
@@ -32,144 +38,42 @@
using namespace llvm;
using namespace symbolize;
-static cl::opt<bool>
-ClUseSymbolTable("use-symbol-table", cl::init(true),
- cl::desc("Prefer names in symbol table to names "
- "in debug info"));
-
-static cl::opt<FunctionNameKind> ClPrintFunctions(
- "functions", cl::init(FunctionNameKind::LinkageName),
- cl::desc("Print function name for a given address"), cl::ValueOptional,
- cl::values(clEnumValN(FunctionNameKind::None, "none", "omit function name"),
- clEnumValN(FunctionNameKind::ShortName, "short",
- "print short function name"),
- clEnumValN(FunctionNameKind::LinkageName, "linkage",
- "print function linkage name"),
- // Sentinel value for unspecified value.
- clEnumValN(FunctionNameKind::LinkageName, "", "")));
-static cl::alias ClPrintFunctionsShort("f", cl::desc("Alias for -functions"),
- cl::NotHidden, cl::Grouping,
- cl::aliasopt(ClPrintFunctions));
-
-static cl::opt<bool>
- ClUseRelativeAddress("relative-address", cl::init(false),
- cl::desc("Interpret addresses as relative addresses"),
- cl::ReallyHidden);
-
-static cl::opt<bool> ClUntagAddresses(
- "untag-addresses", cl::init(true),
- cl::desc("Remove memory tags from addresses before symbolization"));
-
-static cl::opt<bool>
- ClPrintInlining("inlining", cl::init(true),
- cl::desc("Print all inlined frames for a given address"));
-static cl::alias
- ClPrintInliningAliasI("i", cl::desc("Alias for -inlining"),
- cl::NotHidden, cl::aliasopt(ClPrintInlining),
- cl::Grouping);
-static cl::alias
- ClPrintInliningAliasInlines("inlines", cl::desc("Alias for -inlining"),
- cl::NotHidden, cl::aliasopt(ClPrintInlining));
-
-static cl::opt<bool> ClBasenames("basenames", cl::init(false),
- cl::desc("Strip directory names from paths"));
-static cl::alias ClBasenamesShort("s", cl::desc("Alias for -basenames"),
- cl::NotHidden, cl::aliasopt(ClBasenames));
-
-static cl::opt<bool>
- ClRelativenames("relativenames", cl::init(false),
- cl::desc("Strip the compilation directory from paths"));
-
-static cl::opt<bool>
-ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names"));
-static cl::alias
-ClDemangleShort("C", cl::desc("Alias for -demangle"),
- cl::NotHidden, cl::aliasopt(ClDemangle), cl::Grouping);
-static cl::opt<bool>
-ClNoDemangle("no-demangle", cl::init(false),
- cl::desc("Don't demangle function names"));
-
-static cl::opt<std::string> ClDefaultArch("default-arch", cl::init(""),
- cl::desc("Default architecture "
- "(for multi-arch objects)"));
-
-static cl::opt<std::string>
-ClBinaryName("obj", cl::init(""),
- cl::desc("Path to object file to be symbolized (if not provided, "
- "object file should be specified for each input line)"));
-static cl::alias
-ClBinaryNameAliasExe("exe", cl::desc("Alias for -obj"),
- cl::NotHidden, cl::aliasopt(ClBinaryName));
-static cl::alias ClBinaryNameAliasE("e", cl::desc("Alias for -obj"),
- cl::NotHidden, cl::Grouping, cl::Prefix,
- cl::aliasopt(ClBinaryName));
-
-static cl::opt<std::string>
- ClDwpName("dwp", cl::init(""),
- cl::desc("Path to DWP file to be use for any split CUs"));
-
-static cl::list<std::string>
-ClDsymHint("dsym-hint", cl::ZeroOrMore,
- cl::desc("Path to .dSYM bundles to search for debug info for the "
- "object files"));
-
-static cl::opt<bool>
-ClPrintAddress("print-address", cl::init(false),
- cl::desc("Show address before line information"));
-static cl::alias
-ClPrintAddressAliasAddresses("addresses", cl::desc("Alias for -print-address"),
- cl::NotHidden, cl::aliasopt(ClPrintAddress));
-static cl::alias
-ClPrintAddressAliasA("a", cl::desc("Alias for -print-address"),
- cl::NotHidden, cl::aliasopt(ClPrintAddress), cl::Grouping);
-
-static cl::opt<bool>
- ClPrettyPrint("pretty-print", cl::init(false),
- cl::desc("Make the output more human friendly"));
-static cl::alias ClPrettyPrintShort("p", cl::desc("Alias for -pretty-print"),
- cl::NotHidden,
- cl::aliasopt(ClPrettyPrint), cl::Grouping);
-
-static cl::opt<int> ClPrintSourceContextLines(
- "print-source-context-lines", cl::init(0),
- cl::desc("Print N number of source file context"));
+namespace {
+enum ID {
+ OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ OPT_##ID,
+#include "Opts.inc"
+#undef OPTION
+};
-static cl::opt<bool> ClVerbose("verbose", cl::init(false),
- cl::desc("Print verbose line info"));
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Opts.inc"
+#undef PREFIX
+
+static const opt::OptTable::Info InfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ { \
+ PREFIX, NAME, HELPTEXT, \
+ METAVAR, OPT_##ID, opt::Option::KIND##Class, \
+ PARAM, FLAGS, OPT_##GROUP, \
+ OPT_##ALIAS, ALIASARGS, VALUES},
+#include "Opts.inc"
+#undef OPTION
+};
-static cl::opt<uint64_t>
- ClAdjustVMA("adjust-vma", cl::init(0), cl::value_desc("offset"),
- cl::desc("Add specified offset to object file addresses"));
+class SymbolizerOptTable : public opt::OptTable {
+public:
+ SymbolizerOptTable() : OptTable(InfoTable, true) {}
+};
+} // namespace
static cl::list<std::string> ClInputAddresses(cl::Positional,
cl::desc("<input addresses>..."),
cl::ZeroOrMore);
-static cl::opt<std::string>
- ClFallbackDebugPath("fallback-debug-path", cl::init(""),
- cl::desc("Fallback path for debug binaries."));
-
-static cl::list<std::string>
- ClDebugFileDirectory("debug-file-directory", cl::ZeroOrMore,
- cl::value_desc("dir"),
- cl::desc("Path to directory where to look for debug "
- "files."));
-
-static cl::opt<DIPrinter::OutputStyle>
- ClOutputStyle("output-style", cl::init(DIPrinter::OutputStyle::LLVM),
- cl::desc("Specify print style"),
- cl::values(clEnumValN(DIPrinter::OutputStyle::LLVM, "LLVM",
- "LLVM default style"),
- clEnumValN(DIPrinter::OutputStyle::GNU, "GNU",
- "GNU addr2line style")));
-
-static cl::opt<bool>
- ClUseNativePDBReader("use-native-pdb-reader", cl::init(0),
- cl::desc("Use native PDB functionality"));
-
-static cl::extrahelp
- HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
-
template<typename T>
static bool error(Expected<T> &ResOrErr) {
if (ResOrErr)
@@ -185,7 +89,8 @@ enum class Command {
Frame,
};
-static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd,
+static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
+ StringRef InputString, Command &Cmd,
std::string &ModuleName, uint64_t &ModuleOffset) {
const char kDelimiters[] = " \n\r";
ModuleName = "";
@@ -201,7 +106,7 @@ static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd,
}
const char *Pos = InputString.data();
// Skip delimiters and parse input filename (if needed).
- if (ClBinaryName.empty()) {
+ if (BinaryName.empty()) {
Pos += strspn(Pos, kDelimiters);
if (*Pos == '"' || *Pos == '\'') {
char Quote = *Pos;
@@ -217,7 +122,7 @@ static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd,
Pos += NameLength;
}
} else {
- ModuleName = ClBinaryName;
+ ModuleName = BinaryName.str();
}
// Skip delimiters and parse module offset.
Pos += strspn(Pos, kDelimiters);
@@ -230,24 +135,26 @@ static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd,
return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
}
-static void symbolizeInput(bool IsAddr2Line, StringRef InputString,
- LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
+static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
+ bool IsAddr2Line, DIPrinter::OutputStyle OutputStyle,
+ StringRef InputString, LLVMSymbolizer &Symbolizer,
+ DIPrinter &Printer) {
Command Cmd;
std::string ModuleName;
uint64_t Offset = 0;
- if (!parseCommand(IsAddr2Line, StringRef(InputString), Cmd, ModuleName,
- Offset)) {
+ if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
+ StringRef(InputString), Cmd, ModuleName, Offset)) {
outs() << InputString << "\n";
return;
}
- if (ClPrintAddress) {
+ if (Args.hasArg(OPT_addresses)) {
outs() << "0x";
outs().write_hex(Offset);
- StringRef Delimiter = ClPrettyPrint ? ": " : "\n";
+ StringRef Delimiter = Args.hasArg(OPT_pretty_print) ? ": " : "\n";
outs() << Delimiter;
}
- Offset -= ClAdjustVMA;
+ Offset -= AdjustVMA;
if (Cmd == Command::Data) {
auto ResOrErr = Symbolizer.symbolizeData(
ModuleName, {Offset, object::SectionedAddress::UndefSection});
@@ -261,102 +168,182 @@ static void symbolizeInput(bool IsAddr2Line, StringRef InputString,
if (ResOrErr->empty())
outs() << "??\n";
}
- } else if (ClPrintInlining) {
+ } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) {
auto ResOrErr = Symbolizer.symbolizeInlinedCode(
ModuleName, {Offset, object::SectionedAddress::UndefSection});
Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get());
- } else if (ClOutputStyle == DIPrinter::OutputStyle::GNU) {
- // With ClPrintFunctions == FunctionNameKind::LinkageName (default)
- // and ClUseSymbolTable == true (also default), Symbolizer.symbolizeCode()
+ } else if (OutputStyle == DIPrinter::OutputStyle::GNU) {
+ // With PrintFunctions == FunctionNameKind::LinkageName (default)
+ // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
// may override the name of an inlined function with the name of the topmost
// caller function in the inlining chain. This contradicts the existing
// behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
// the topmost function, which suits our needs better.
auto ResOrErr = Symbolizer.symbolizeInlinedCode(
ModuleName, {Offset, object::SectionedAddress::UndefSection});
- Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
+ if (!ResOrErr || ResOrErr->getNumberOfFrames() == 0) {
+ error(ResOrErr);
+ Printer << DILineInfo();
+ } else {
+ Printer << ResOrErr->getFrame(0);
+ }
} else {
auto ResOrErr = Symbolizer.symbolizeCode(
ModuleName, {Offset, object::SectionedAddress::UndefSection});
Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
}
- if (ClOutputStyle == DIPrinter::OutputStyle::LLVM)
+ if (OutputStyle == DIPrinter::OutputStyle::LLVM)
outs() << "\n";
}
-int main(int argc, char **argv) {
- InitLLVM X(argc, argv);
+static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
+ raw_ostream &OS) {
+ const char HelpText[] = " [options] addresses...";
+ Tbl.PrintHelp(OS, (ToolName + HelpText).str().c_str(),
+ ToolName.str().c_str());
+ // TODO Replace this with OptTable API once it adds extrahelp support.
+ OS << "\nPass @FILE as argument to read options from FILE.\n";
+}
- bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
+static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
+ StringSaver &Saver,
+ SymbolizerOptTable &Tbl) {
+ StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
+ Tbl.setGroupedShortOptions(true);
+ // The environment variable specifies initial options which can be overridden
+ // by commnad line options.
+ Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
+ : "LLVM_SYMBOLIZER_OPTS");
+ bool HasError = false;
+ opt::InputArgList Args =
+ Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
+ errs() << ("error: " + Msg + "\n");
+ HasError = true;
+ });
+ if (HasError)
+ exit(1);
+ if (Args.hasArg(OPT_help)) {
+ printHelp(ToolName, Tbl, outs());
+ exit(0);
+ }
+ if (Args.hasArg(OPT_version)) {
+ outs() << ToolName << '\n';
+ cl::PrintVersionMessage();
+ exit(0);
+ }
- if (IsAddr2Line) {
- ClDemangle.setInitialValue(false);
- ClPrintFunctions.setInitialValue(FunctionNameKind::None);
- ClPrintInlining.setInitialValue(false);
- ClUntagAddresses.setInitialValue(false);
- ClOutputStyle.setInitialValue(DIPrinter::OutputStyle::GNU);
+ return Args;
+}
+
+template <typename T>
+static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
+ if (const opt::Arg *A = Args.getLastArg(ID)) {
+ StringRef V(A->getValue());
+ if (!llvm::to_integer(V, Value, 0)) {
+ errs() << A->getSpelling() +
+ ": expected a non-negative integer, but got '" + V + "'";
+ exit(1);
+ }
+ } else {
+ Value = 0;
}
+}
- llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
- cl::ParseCommandLineOptions(
- argc, argv, IsAddr2Line ? "llvm-addr2line\n" : "llvm-symbolizer\n",
- /*Errs=*/nullptr,
- IsAddr2Line ? "LLVM_ADDR2LINE_OPTS" : "LLVM_SYMBOLIZER_OPTS");
+static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
+ bool IsAddr2Line) {
+ if (Args.hasArg(OPT_functions))
+ return FunctionNameKind::LinkageName;
+ if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
+ return StringSwitch<FunctionNameKind>(A->getValue())
+ .Case("none", FunctionNameKind::None)
+ .Case("short", FunctionNameKind::ShortName)
+ .Default(FunctionNameKind::LinkageName);
+ return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
+}
- // If both --demangle and --no-demangle are specified then pick the last one.
- if (ClNoDemangle.getPosition() > ClDemangle.getPosition())
- ClDemangle = !ClNoDemangle;
+int main(int argc, char **argv) {
+ InitLLVM X(argc, argv);
+ sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
+
+ bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
+ BumpPtrAllocator A;
+ StringSaver Saver(A);
+ SymbolizerOptTable Tbl;
+ opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
LLVMSymbolizer::Options Opts;
- Opts.PrintFunctions = ClPrintFunctions;
- Opts.UseSymbolTable = ClUseSymbolTable;
- Opts.Demangle = ClDemangle;
- Opts.RelativeAddresses = ClUseRelativeAddress;
- Opts.UntagAddresses = ClUntagAddresses;
- Opts.DefaultArch = ClDefaultArch;
- Opts.FallbackDebugPath = ClFallbackDebugPath;
- Opts.DWPName = ClDwpName;
- Opts.DebugFileDirectory = ClDebugFileDirectory;
- Opts.UseNativePDBReader = ClUseNativePDBReader;
- Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
- // If both --basenames and --relativenames are specified then pick the last
- // one.
- if (ClBasenames.getPosition() > ClRelativenames.getPosition())
- Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly;
- else if (ClRelativenames)
- Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
+ uint64_t AdjustVMA;
+ unsigned SourceContextLines;
+ parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
+ if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
+ Opts.PathStyle =
+ A->getOption().matches(OPT_basenames)
+ ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
+ : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
+ } else {
+ Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
+ }
+ Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
+ Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
+ Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
+ Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
+ Opts.FallbackDebugPath =
+ Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
+ Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
+ parseIntArg(Args, OPT_print_source_context_lines_EQ, SourceContextLines);
+ Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
+ Opts.UntagAddresses =
+ Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
+ Opts.UseDIA = Args.hasArg(OPT_use_dia);
+#if !defined(LLVM_ENABLE_DIA_SDK)
+ if (Opts.UseDIA) {
+ WithColor::warning() << "DIA not available; using native PDB reader\n";
+ Opts.UseDIA = false;
+ }
+#endif
+ Opts.UseSymbolTable = true;
- for (const auto &hint : ClDsymHint) {
- if (sys::path::extension(hint) == ".dSYM") {
- Opts.DsymHints.push_back(hint);
+ for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
+ StringRef Hint(A->getValue());
+ if (sys::path::extension(Hint) == ".dSYM") {
+ Opts.DsymHints.emplace_back(Hint);
} else {
- errs() << "Warning: invalid dSYM hint: \"" << hint <<
- "\" (must have the '.dSYM' extension).\n";
+ errs() << "Warning: invalid dSYM hint: \"" << Hint
+ << "\" (must have the '.dSYM' extension).\n";
}
}
- LLVMSymbolizer Symbolizer(Opts);
- DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
- ClPrettyPrint, ClPrintSourceContextLines, ClVerbose,
- ClOutputStyle);
+ auto OutputStyle =
+ IsAddr2Line ? DIPrinter::OutputStyle::GNU : DIPrinter::OutputStyle::LLVM;
+ if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
+ OutputStyle = strcmp(A->getValue(), "GNU") == 0
+ ? DIPrinter::OutputStyle::GNU
+ : DIPrinter::OutputStyle::LLVM;
+ }
+
+ LLVMSymbolizer Symbolizer(Opts);
+ DIPrinter Printer(outs(), Opts.PrintFunctions != FunctionNameKind::None,
+ Args.hasArg(OPT_pretty_print), SourceContextLines,
+ Args.hasArg(OPT_verbose), OutputStyle);
- if (ClInputAddresses.empty()) {
+ std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
+ if (InputAddresses.empty()) {
const int kMaxInputStringLength = 1024;
char InputString[kMaxInputStringLength];
while (fgets(InputString, sizeof(InputString), stdin)) {
// Strip newline characters.
std::string StrippedInputString(InputString);
- StrippedInputString.erase(
- std::remove_if(StrippedInputString.begin(), StrippedInputString.end(),
- [](char c) { return c == '\r' || c == '\n'; }),
- StrippedInputString.end());
- symbolizeInput(IsAddr2Line, StrippedInputString, Symbolizer, Printer);
+ llvm::erase_if(StrippedInputString,
+ [](char c) { return c == '\r' || c == '\n'; });
+ symbolizeInput(Args, AdjustVMA, IsAddr2Line, OutputStyle,
+ StrippedInputString, Symbolizer, Printer);
outs().flush();
}
} else {
- for (StringRef Address : ClInputAddresses)
- symbolizeInput(IsAddr2Line, Address, Symbolizer, Printer);
+ for (StringRef Address : InputAddresses)
+ symbolizeInput(Args, AdjustVMA, IsAddr2Line, OutputStyle, Address,
+ Symbolizer, Printer);
}
return 0;
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp b/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp
index fcac33b23d4d..bde028a432fe 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp
@@ -35,6 +35,9 @@ static cl::opt<bool>
cl::sub(Account), cl::init(false));
static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing),
cl::desc("Alias for -keep_going"));
+static cl::opt<bool> AccountRecursiveCallsOnly(
+ "recursive-calls-only", cl::desc("Only count the calls that are recursive"),
+ cl::sub(Account), cl::init(false));
static cl::opt<bool> AccountDeduceSiblingCalls(
"deduce-sibling-calls",
cl::desc("Deduce sibling calls when unrolling function call stacks"),
@@ -126,6 +129,32 @@ template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); }
} // namespace
+using RecursionStatus = LatencyAccountant::FunctionStack::RecursionStatus;
+RecursionStatus &RecursionStatus::operator++() {
+ auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
+ assert(Depth >= 0 && Depth < std::numeric_limits<decltype(Depth)>::max());
+ ++Depth;
+ Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // ++Storage
+ // Did this function just (maybe indirectly) call itself the first time?
+ if (!isRecursive() && Depth == 2) // Storage == 2 / Storage s> 1
+ Bitfield::set<RecursionStatus::IsRecursive>(Storage,
+ true); // Storage |= INT_MIN
+ return *this;
+}
+RecursionStatus &RecursionStatus::operator--() {
+ auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
+ assert(Depth > 0);
+ --Depth;
+ Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // --Storage
+ // Did we leave a function that previouly (maybe indirectly) called itself?
+ if (isRecursive() && Depth == 0) // Storage == INT_MIN
+ Bitfield::set<RecursionStatus::IsRecursive>(Storage, false); // Storage = 0
+ return *this;
+}
+bool RecursionStatus::isRecursive() const {
+ return Bitfield::get<RecursionStatus::IsRecursive>(Storage); // Storage s< 0
+}
+
bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC);
setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC);
@@ -137,6 +166,8 @@ bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
return false;
auto &ThreadStack = PerThreadFunctionStack[Record.TId];
+ if (RecursiveCallsOnly && !ThreadStack.RecursionDepth)
+ ThreadStack.RecursionDepth.emplace();
switch (Record.Type) {
case RecordTypes::CUSTOM_EVENT:
case RecordTypes::TYPED_EVENT:
@@ -144,18 +175,24 @@ bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
return true;
case RecordTypes::ENTER:
case RecordTypes::ENTER_ARG: {
- ThreadStack.emplace_back(Record.FuncId, Record.TSC);
+ ThreadStack.Stack.emplace_back(Record.FuncId, Record.TSC);
+ if (ThreadStack.RecursionDepth)
+ ++(*ThreadStack.RecursionDepth)[Record.FuncId];
break;
}
case RecordTypes::EXIT:
case RecordTypes::TAIL_EXIT: {
- if (ThreadStack.empty())
+ if (ThreadStack.Stack.empty())
return false;
- if (ThreadStack.back().first == Record.FuncId) {
- const auto &Top = ThreadStack.back();
- recordLatency(Top.first, diff(Top.second, Record.TSC));
- ThreadStack.pop_back();
+ if (ThreadStack.Stack.back().first == Record.FuncId) {
+ const auto &Top = ThreadStack.Stack.back();
+ if (!ThreadStack.RecursionDepth ||
+ (*ThreadStack.RecursionDepth)[Top.first].isRecursive())
+ recordLatency(Top.first, diff(Top.second, Record.TSC));
+ if (ThreadStack.RecursionDepth)
+ --(*ThreadStack.RecursionDepth)[Top.first];
+ ThreadStack.Stack.pop_back();
break;
}
@@ -164,11 +201,11 @@ bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
// Look for the parent up the stack.
auto Parent =
- std::find_if(ThreadStack.rbegin(), ThreadStack.rend(),
+ std::find_if(ThreadStack.Stack.rbegin(), ThreadStack.Stack.rend(),
[&](const std::pair<const int32_t, uint64_t> &E) {
return E.first == Record.FuncId;
});
- if (Parent == ThreadStack.rend())
+ if (Parent == ThreadStack.Stack.rend())
return false;
// Account time for this apparently sibling call exit up the stack.
@@ -199,11 +236,17 @@ bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
// complexity to do correctly (need to backtrack, etc.).
//
// FIXME: Potentially implement the more complex deduction algorithm?
- auto I = std::next(Parent).base();
- for (auto &E : make_range(I, ThreadStack.end())) {
- recordLatency(E.first, diff(E.second, Record.TSC));
+ auto R = make_range(std::next(Parent).base(), ThreadStack.Stack.end());
+ for (auto &E : R) {
+ if (!ThreadStack.RecursionDepth ||
+ (*ThreadStack.RecursionDepth)[E.first].isRecursive())
+ recordLatency(E.first, diff(E.second, Record.TSC));
+ }
+ for (auto &Top : reverse(R)) {
+ if (ThreadStack.RecursionDepth)
+ --(*ThreadStack.RecursionDepth)[Top.first];
+ ThreadStack.Stack.pop_back();
}
- ThreadStack.erase(I, ThreadStack.end());
break;
}
}
@@ -226,7 +269,7 @@ struct ResultRow {
std::string Function;
};
-ResultRow getStats(std::vector<uint64_t> &Timings) {
+ResultRow getStats(MutableArrayRef<uint64_t> Timings) {
assert(!Timings.empty());
ResultRow R;
R.Sum = std::accumulate(Timings.begin(), Timings.end(), 0.0);
@@ -240,11 +283,13 @@ ResultRow getStats(std::vector<uint64_t> &Timings) {
R.Median = Timings[MedianOff];
auto Pct90Off = std::floor(Timings.size() * 0.9);
- std::nth_element(Timings.begin(), Timings.begin() + Pct90Off, Timings.end());
+ std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct90Off,
+ Timings.end());
R.Pct90 = Timings[Pct90Off];
auto Pct99Off = std::floor(Timings.size() * 0.99);
- std::nth_element(Timings.begin(), Timings.begin() + Pct99Off, Timings.end());
+ std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct99Off,
+ Timings.end());
R.Pct99 = Timings[Pct99Off];
return R;
}
@@ -423,7 +468,8 @@ static CommandRegistration Unused(&Account, []() -> Error {
symbolize::LLVMSymbolizer Symbolizer;
llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,
FunctionAddresses);
- xray::LatencyAccountant FCA(FuncIdHelper, AccountDeduceSiblingCalls);
+ xray::LatencyAccountant FCA(FuncIdHelper, AccountRecursiveCallsOnly,
+ AccountDeduceSiblingCalls);
auto TraceOrErr = loadTraceFile(AccountInput);
if (!TraceOrErr)
return joinErrors(
@@ -445,12 +491,12 @@ static CommandRegistration Unused(&Account, []() -> Error {
<< '\n';
for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) {
errs() << "Thread ID: " << ThreadStack.first << "\n";
- if (ThreadStack.second.empty()) {
+ if (ThreadStack.second.Stack.empty()) {
errs() << " (empty stack)\n";
continue;
}
- auto Level = ThreadStack.second.size();
- for (const auto &Entry : llvm::reverse(ThreadStack.second))
+ auto Level = ThreadStack.second.Stack.size();
+ for (const auto &Entry : llvm::reverse(ThreadStack.second.Stack))
errs() << " #" << Level-- << "\t"
<< FuncIdHelper.SymbolOrNumber(Entry.first) << '\n';
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.h b/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.h
index b63ecc59b71a..371a9cc708e9 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.h
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.h
@@ -18,6 +18,7 @@
#include <vector>
#include "func-id-helper.h"
+#include "llvm/ADT/Bitfields.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/XRay/XRayRecord.h"
@@ -27,12 +28,26 @@ namespace xray {
class LatencyAccountant {
public:
- typedef std::map<int32_t, std::vector<uint64_t>> FunctionLatencyMap;
- typedef std::map<uint32_t, std::pair<uint64_t, uint64_t>>
+ typedef llvm::DenseMap<int32_t, llvm::SmallVector<uint64_t, 0>>
+ FunctionLatencyMap;
+ typedef llvm::DenseMap<uint32_t, std::pair<uint64_t, uint64_t>>
PerThreadMinMaxTSCMap;
- typedef std::map<uint8_t, std::pair<uint64_t, uint64_t>> PerCPUMinMaxTSCMap;
- typedef std::vector<std::pair<int32_t, uint64_t>> FunctionStack;
- typedef std::map<uint32_t, FunctionStack> PerThreadFunctionStackMap;
+ typedef llvm::DenseMap<uint8_t, std::pair<uint64_t, uint64_t>>
+ PerCPUMinMaxTSCMap;
+ struct FunctionStack {
+ llvm::SmallVector<std::pair<int32_t, uint64_t>, 32> Stack;
+ class RecursionStatus {
+ uint32_t Storage = 0;
+ using Depth = Bitfield::Element<int32_t, 0, 31>; // Low 31 bits.
+ using IsRecursive = Bitfield::Element<bool, 31, 1>; // Sign bit.
+ public:
+ RecursionStatus &operator++();
+ RecursionStatus &operator--();
+ bool isRecursive() const;
+ };
+ Optional<llvm::DenseMap<int32_t, RecursionStatus>> RecursionDepth;
+ };
+ typedef llvm::DenseMap<uint32_t, FunctionStack> PerThreadFunctionStackMap;
private:
PerThreadFunctionStackMap PerThreadFunctionStack;
@@ -41,6 +56,7 @@ private:
PerCPUMinMaxTSCMap PerCPUMinMaxTSC;
FuncIdConversionHelper &FuncIdHelper;
+ bool RecursiveCallsOnly = false;
bool DeduceSiblingCalls = false;
uint64_t CurrentMaxTSC = 0;
@@ -50,8 +66,9 @@ private:
public:
explicit LatencyAccountant(FuncIdConversionHelper &FuncIdHelper,
- bool DeduceSiblingCalls)
- : FuncIdHelper(FuncIdHelper), DeduceSiblingCalls(DeduceSiblingCalls) {}
+ bool RecursiveCallsOnly, bool DeduceSiblingCalls)
+ : FuncIdHelper(FuncIdHelper), RecursiveCallsOnly(RecursiveCallsOnly),
+ DeduceSiblingCalls(DeduceSiblingCalls) {}
const FunctionLatencyMap &getFunctionLatencies() const {
return FunctionLatencies;
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp b/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp
index 522609b938f2..00a1807c07c9 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp
@@ -312,8 +312,7 @@ void GraphRenderer::calculateVertexStatistics() {
if (V.first != 0) {
for (auto &E : G.inEdges(V.first)) {
auto &A = E.second;
- TempTimings.insert(TempTimings.end(), A.Timings.begin(),
- A.Timings.end());
+ llvm::append_range(TempTimings, A.Timings);
}
getStats(TempTimings.begin(), TempTimings.end(), G[V.first].S);
updateMaxStats(G[V.first].S, G.GraphVertexMax);
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp b/contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp
index 1e4490289534..d04b998dacca 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp
@@ -252,13 +252,15 @@ private:
/// maintain an index of unique functions, and provide a means of iterating
/// through all the instrumented call stacks which we know about.
+namespace {
struct StackDuration {
llvm::SmallVector<int64_t, 4> TerminalDurations;
llvm::SmallVector<int64_t, 4> IntermediateDurations;
};
+} // namespace
-StackDuration mergeStackDuration(const StackDuration &Left,
- const StackDuration &Right) {
+static StackDuration mergeStackDuration(const StackDuration &Left,
+ const StackDuration &Right) {
StackDuration Data{};
Data.TerminalDurations.reserve(Left.TerminalDurations.size() +
Right.TerminalDurations.size());
@@ -280,7 +282,7 @@ StackDuration mergeStackDuration(const StackDuration &Left,
using StackTrieNode = TrieNode<StackDuration>;
template <AggregationType AggType>
-std::size_t GetValueForStack(const StackTrieNode *Node);
+static std::size_t GetValueForStack(const StackTrieNode *Node);
// When computing total time spent in a stack, we're adding the timings from
// its callees and the timings from when it was a leaf.
@@ -454,8 +456,7 @@ public:
int Level = 0;
OS << formatv("{0,-5} {1,-60} {2,+12} {3,+16}\n", "lvl", "function",
"count", "sum");
- for (auto *F :
- reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) {
+ for (auto *F : reverse(drop_begin(CurrentStack))) {
auto Sum = std::accumulate(F->ExtraData.IntermediateDurations.begin(),
F->ExtraData.IntermediateDurations.end(), 0LL);
auto FuncId = FN.SymbolOrNumber(F->FuncId);
@@ -638,10 +639,8 @@ public:
{
auto E =
std::make_pair(Top, Top->ExtraData.TerminalDurations.size());
- TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(),
- TopStacksByCount.end(), E,
- greater_second),
- E);
+ TopStacksByCount.insert(
+ llvm::lower_bound(TopStacksByCount, E, greater_second), E);
if (TopStacksByCount.size() == 11)
TopStacksByCount.pop_back();
}
@@ -669,9 +668,9 @@ public:
}
};
-std::string CreateErrorMessage(StackTrie::AccountRecordStatus Error,
- const XRayRecord &Record,
- const FuncIdConversionHelper &Converter) {
+static std::string CreateErrorMessage(StackTrie::AccountRecordStatus Error,
+ const XRayRecord &Record,
+ const FuncIdConversionHelper &Converter) {
switch (Error) {
case StackTrie::AccountRecordStatus::ENTRY_NOT_FOUND:
return std::string(
diff --git a/contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp b/contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp
index b94c58decdda..401a58fc154a 100644
--- a/contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp
+++ b/contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Dominators.h"
@@ -29,17 +30,28 @@
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/StandardInstrumentations.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/Debugify.h"
using namespace llvm;
using namespace opt_tool;
+namespace llvm {
+cl::opt<bool> DebugifyEach(
+ "debugify-each",
+ cl::desc("Start each pass with debugify and end it with check-debugify"));
+
+cl::opt<std::string>
+ DebugifyExport("debugify-export",
+ cl::desc("Export per-pass debugify statistics to this file"),
+ cl::value_desc("filename"));
+} // namespace llvm
+
static cl::opt<bool>
DebugPM("debug-pass-manager", cl::Hidden,
cl::desc("Print pass management debugging information"));
@@ -55,7 +67,7 @@ static cl::opt<std::string>
AAPipeline("aa-pipeline",
cl::desc("A textual description of the alias analysis "
"pipeline for handling managed aliasing queries"),
- cl::Hidden);
+ cl::Hidden, cl::init("default"));
/// {{@ These options accept textual pipeline descriptions which will be
/// inserted into default pipelines at the respective extension points
@@ -92,12 +104,17 @@ static cl::opt<std::string> VectorizerStartEPPipeline(
cl::Hidden);
static cl::opt<std::string> PipelineStartEPPipeline(
"passes-ep-pipeline-start",
- cl::desc("A textual description of the function pass pipeline inserted at "
+ cl::desc("A textual description of the module pass pipeline inserted at "
"the PipelineStart extension point into default pipelines"),
cl::Hidden);
+static cl::opt<std::string> PipelineEarlySimplificationEPPipeline(
+ "passes-ep-pipeline-early-simplification",
+ cl::desc("A textual description of the module pass pipeline inserted at "
+ "the EarlySimplification extension point into default pipelines"),
+ cl::Hidden);
static cl::opt<std::string> OptimizerLastEPPipeline(
"passes-ep-optimizer-last",
- cl::desc("A textual description of the function pass pipeline inserted at "
+ cl::desc("A textual description of the module pass pipeline inserted at "
"the OptimizerLast extension point into default pipelines"),
cl::Hidden);
@@ -108,6 +125,7 @@ extern cl::opt<PGOKind> PGOKindFlag;
extern cl::opt<std::string> ProfileFile;
extern cl::opt<CSPGOKind> CSPGOKindFlag;
extern cl::opt<std::string> CSProfileGenFile;
+extern cl::opt<bool> DisableBasicAA;
static cl::opt<std::string>
ProfileRemappingFile("profile-remapping-file",
@@ -116,6 +134,13 @@ static cl::opt<std::string>
static cl::opt<bool> DebugInfoForProfiling(
"new-pm-debug-info-for-profiling", cl::init(false), cl::Hidden,
cl::desc("Emit special debug info to enable PGO profile generation."));
+static cl::opt<bool> PseudoProbeForProfiling(
+ "new-pm-pseudo-probe-for-profiling", cl::init(false), cl::Hidden,
+ cl::desc("Emit pseudo probes to enable PGO profile generation."));
+static cl::opt<bool> UniqueInternalLinkageNames(
+ "new-pm-unique-internal-linkage-names", cl::init(false), cl::Hidden,
+ cl::desc("Uniqueify Internal Linkage Symbol Names by appending the MD5 "
+ "hash of the module path."));
/// @}}
template <typename PassManagerT>
@@ -137,72 +162,63 @@ bool tryParsePipelineText(PassBuilder &PB,
/// If one of the EPPipeline command line options was given, register callbacks
/// for parsing and inserting the given pipeline
-static void registerEPCallbacks(PassBuilder &PB, bool VerifyEachPass,
- bool DebugLogging) {
+static void registerEPCallbacks(PassBuilder &PB) {
if (tryParsePipelineText<FunctionPassManager>(PB, PeepholeEPPipeline))
PB.registerPeepholeEPCallback(
- [&PB, VerifyEachPass, DebugLogging](
- FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
ExitOnError Err("Unable to parse PeepholeEP pipeline: ");
- Err(PB.parsePassPipeline(PM, PeepholeEPPipeline, VerifyEachPass,
- DebugLogging));
+ Err(PB.parsePassPipeline(PM, PeepholeEPPipeline));
});
if (tryParsePipelineText<LoopPassManager>(PB,
LateLoopOptimizationsEPPipeline))
PB.registerLateLoopOptimizationsEPCallback(
- [&PB, VerifyEachPass, DebugLogging](
- LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
ExitOnError Err("Unable to parse LateLoopOptimizationsEP pipeline: ");
- Err(PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline,
- VerifyEachPass, DebugLogging));
+ Err(PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline));
});
if (tryParsePipelineText<LoopPassManager>(PB, LoopOptimizerEndEPPipeline))
PB.registerLoopOptimizerEndEPCallback(
- [&PB, VerifyEachPass, DebugLogging](
- LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
ExitOnError Err("Unable to parse LoopOptimizerEndEP pipeline: ");
- Err(PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline,
- VerifyEachPass, DebugLogging));
+ Err(PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline));
});
if (tryParsePipelineText<FunctionPassManager>(PB,
ScalarOptimizerLateEPPipeline))
PB.registerScalarOptimizerLateEPCallback(
- [&PB, VerifyEachPass, DebugLogging](
- FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
ExitOnError Err("Unable to parse ScalarOptimizerLateEP pipeline: ");
- Err(PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline,
- VerifyEachPass, DebugLogging));
+ Err(PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline));
});
if (tryParsePipelineText<CGSCCPassManager>(PB, CGSCCOptimizerLateEPPipeline))
PB.registerCGSCCOptimizerLateEPCallback(
- [&PB, VerifyEachPass, DebugLogging](
- CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
ExitOnError Err("Unable to parse CGSCCOptimizerLateEP pipeline: ");
- Err(PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline,
- VerifyEachPass, DebugLogging));
+ Err(PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline));
});
if (tryParsePipelineText<FunctionPassManager>(PB, VectorizerStartEPPipeline))
PB.registerVectorizerStartEPCallback(
- [&PB, VerifyEachPass, DebugLogging](
- FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
ExitOnError Err("Unable to parse VectorizerStartEP pipeline: ");
- Err(PB.parsePassPipeline(PM, VectorizerStartEPPipeline,
- VerifyEachPass, DebugLogging));
+ Err(PB.parsePassPipeline(PM, VectorizerStartEPPipeline));
});
if (tryParsePipelineText<ModulePassManager>(PB, PipelineStartEPPipeline))
PB.registerPipelineStartEPCallback(
- [&PB, VerifyEachPass, DebugLogging](ModulePassManager &PM) {
+ [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) {
ExitOnError Err("Unable to parse PipelineStartEP pipeline: ");
- Err(PB.parsePassPipeline(PM, PipelineStartEPPipeline, VerifyEachPass,
- DebugLogging));
+ Err(PB.parsePassPipeline(PM, PipelineStartEPPipeline));
+ });
+ if (tryParsePipelineText<ModulePassManager>(
+ PB, PipelineEarlySimplificationEPPipeline))
+ PB.registerPipelineEarlySimplificationEPCallback(
+ [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) {
+ ExitOnError Err("Unable to parse EarlySimplification pipeline: ");
+ Err(PB.parsePassPipeline(PM, PipelineEarlySimplificationEPPipeline));
});
if (tryParsePipelineText<FunctionPassManager>(PB, OptimizerLastEPPipeline))
PB.registerOptimizerLastEPCallback(
- [&PB, VerifyEachPass, DebugLogging](ModulePassManager &PM,
- PassBuilder::OptimizationLevel) {
+ [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) {
ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
- Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline, VerifyEachPass,
- DebugLogging));
+ Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline));
});
}
@@ -211,7 +227,8 @@ static void registerEPCallbacks(PassBuilder &PB, bool VerifyEachPass,
#include "llvm/Support/Extension.def"
bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
- ToolOutputFile *Out, ToolOutputFile *ThinLTOLinkOut,
+ TargetLibraryInfoImpl *TLII, ToolOutputFile *Out,
+ ToolOutputFile *ThinLTOLinkOut,
ToolOutputFile *OptRemarkFile,
StringRef PassPipeline, ArrayRef<StringRef> Passes,
OutputKind OK, VerifierKind VK,
@@ -237,6 +254,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
if (DebugInfoForProfiling)
P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction,
true);
+ else if (PseudoProbeForProfiling)
+ P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction,
+ false, true);
else
P = None;
}
@@ -260,8 +280,11 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
}
}
PassInstrumentationCallbacks PIC;
- StandardInstrumentations SI;
+ StandardInstrumentations SI(DebugPM, VerifyEachPass);
SI.registerCallbacks(PIC);
+ DebugifyEachInstrumentation Debugify;
+ if (DebugifyEach)
+ Debugify.registerCallbacks(PIC);
PipelineTuningOptions PTO;
// LoopUnrolling defaults on to true and DisableLoopUnrolling is initialized
@@ -269,8 +292,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
// option has been enabled.
PTO.LoopUnrolling = !DisableLoopUnrolling;
PTO.Coroutines = Coroutines;
- PassBuilder PB(TM, PTO, P, &PIC);
- registerEPCallbacks(PB, VerifyEachPass, DebugPM);
+ PTO.UniqueLinkageNames = UniqueInternalLinkageNames;
+ PassBuilder PB(DebugPM, TM, PTO, P, &PIC);
+ registerEPCallbacks(PB);
// Load requested pass plugins and let them register pass builder callbacks
for (auto &PluginFN : PassPlugins) {
@@ -297,6 +321,25 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
}
return false;
});
+ PB.registerPipelineParsingCallback(
+ [](StringRef Name, ModulePassManager &MPM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+ if (Name == "asan-pipeline") {
+ MPM.addPass(
+ RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(AddressSanitizerPass()));
+ MPM.addPass(ModuleAddressSanitizerPass());
+ return true;
+ } else if (Name == "asan-function-pipeline") {
+ MPM.addPass(
+ RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(AddressSanitizerPass()));
+ return true;
+ }
+ return false;
+ });
#define HANDLE_EXTENSION(Ext) \
get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB);
@@ -305,25 +348,33 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
// Specially handle the alias analysis manager so that we can register
// a custom pipeline of AA passes with it.
AAManager AA;
- if (!AAPipeline.empty()) {
- assert(Passes.empty() &&
- "--aa-pipeline and -foo-pass should not both be specified");
+ if (Passes.empty()) {
if (auto Err = PB.parseAAPipeline(AA, AAPipeline)) {
errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
return false;
}
}
+
+ // For compatibility with the legacy PM AA pipeline.
+ // AAResultsWrapperPass by default provides basic-aa in the legacy PM
+ // unless -disable-basic-aa is specified.
+ // TODO: remove this once tests implicitly requiring basic-aa use -passes= and
+ // -aa-pipeline=basic-aa.
+ if (!Passes.empty() && !DisableBasicAA) {
+ if (auto Err = PB.parseAAPipeline(AA, "basic-aa")) {
+ errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
+ return false;
+ }
+ }
+
// For compatibility with legacy pass manager.
// Alias analyses are not specially specified when using the legacy PM.
- SmallVector<StringRef, 4> NonAAPasses;
for (auto PassName : Passes) {
if (PB.isAAPassName(PassName)) {
if (auto Err = PB.parseAAPipeline(AA, PassName)) {
errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
return false;
}
- } else {
- NonAAPasses.push_back(PassName);
}
}
@@ -334,6 +385,8 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] { return std::move(AA); });
+ // Register our TargetLibraryInfoImpl.
+ FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
// Register all the basic analyses with the managers.
PB.registerModuleAnalyses(MAM);
@@ -351,18 +404,16 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
if (!PassPipeline.empty()) {
assert(Passes.empty() &&
"PassPipeline and Passes should not both contain passes");
- if (auto Err =
- PB.parsePassPipeline(MPM, PassPipeline, VerifyEachPass, DebugPM)) {
+ if (auto Err = PB.parsePassPipeline(MPM, PassPipeline)) {
errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
return false;
}
}
- for (auto PassName : NonAAPasses) {
+ for (auto PassName : Passes) {
std::string ModifiedPassName(PassName.begin(), PassName.end());
if (PB.isAnalysisPassName(PassName))
ModifiedPassName = "require<" + ModifiedPassName + ">";
- if (auto Err = PB.parsePassPipeline(MPM, ModifiedPassName, VerifyEachPass,
- DebugPM)) {
+ if (auto Err = PB.parsePassPipeline(MPM, ModifiedPassName)) {
errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
return false;
}
@@ -407,5 +458,8 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
if (OptRemarkFile)
OptRemarkFile->keep();
+ if (DebugifyEach && !DebugifyExport.empty())
+ exportDebugifyStats(DebugifyExport, Debugify.StatsMap);
+
return true;
}
diff --git a/contrib/llvm-project/llvm/tools/opt/NewPMDriver.h b/contrib/llvm-project/llvm/tools/opt/NewPMDriver.h
index 7ae273a2c1f4..87a71cec4c53 100644
--- a/contrib/llvm-project/llvm/tools/opt/NewPMDriver.h
+++ b/contrib/llvm-project/llvm/tools/opt/NewPMDriver.h
@@ -21,12 +21,17 @@
#define LLVM_TOOLS_OPT_NEWPMDRIVER_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/CommandLine.h"
namespace llvm {
class StringRef;
class Module;
class TargetMachine;
class ToolOutputFile;
+class TargetLibraryInfoImpl;
+
+extern cl::opt<bool> DebugifyEach;
+extern cl::opt<std::string> DebugifyExport;
namespace opt_tool {
enum OutputKind {
@@ -59,10 +64,10 @@ enum CSPGOKind { NoCSPGO, CSInstrGen, CSInstrUse };
/// ThinLTOLinkOut is only used when OK is OK_OutputThinLTOBitcode, and can be
/// nullptr.
bool runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
- ToolOutputFile *Out, ToolOutputFile *ThinLinkOut,
- ToolOutputFile *OptRemarkFile, StringRef PassPipeline,
- ArrayRef<StringRef> PassInfos, opt_tool::OutputKind OK,
- opt_tool::VerifierKind VK,
+ TargetLibraryInfoImpl *TLII, ToolOutputFile *Out,
+ ToolOutputFile *ThinLinkOut, ToolOutputFile *OptRemarkFile,
+ StringRef PassPipeline, ArrayRef<StringRef> PassInfos,
+ opt_tool::OutputKind OK, opt_tool::VerifierKind VK,
bool ShouldPreserveAssemblyUseListOrder,
bool ShouldPreserveBitcodeUseListOrder,
bool EmitSummaryIndex, bool EmitModuleHash,
diff --git a/contrib/llvm-project/llvm/tools/opt/opt.cpp b/contrib/llvm-project/llvm/tools/opt/opt.cpp
index c250eefb8c43..5cb59f85ccf8 100644
--- a/contrib/llvm-project/llvm/tools/opt/opt.cpp
+++ b/contrib/llvm-project/llvm/tools/opt/opt.cpp
@@ -22,13 +22,11 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/AsmParser/Parser.h"
-#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -40,6 +38,7 @@
#include "llvm/LinkAllIR.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@@ -71,8 +70,10 @@ static codegen::RegisterCodeGenFlags CFG;
static cl::list<const PassInfo*, bool, PassNameParser>
PassList(cl::desc("Optimizations available:"));
-static cl::opt<bool> EnableNewPassManager(
- "enable-new-pm", cl::desc("Enable the new pass manager"), cl::init(false));
+static cl::opt<bool>
+ EnableNewPassManager("enable-new-pm",
+ cl::desc("Enable the new pass manager"),
+ cl::init(LLVM_ENABLE_NEW_PASS_MANAGER));
// This flag specifies a textual description of the optimization pass pipeline
// to run over the module. This flag switches opt to use the new pass manager
@@ -211,16 +212,6 @@ static cl::opt<bool> EnableDebugify(
cl::desc(
"Start the pipeline with debugify and end it with check-debugify"));
-static cl::opt<bool> DebugifyEach(
- "debugify-each",
- cl::desc(
- "Start each pass with debugify and end it with check-debugify"));
-
-static cl::opt<std::string>
- DebugifyExport("debugify-export",
- cl::desc("Export per-pass debugify statistics to this file"),
- cl::value_desc("filename"), cl::init(""));
-
static cl::opt<bool>
PrintBreakpoints("print-breakpoints-for-testing",
cl::desc("Print select breakpoints location for testing"));
@@ -274,11 +265,13 @@ static cl::opt<bool> RemarksWithHotness(
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
-static cl::opt<unsigned>
- RemarksHotnessThreshold("pass-remarks-hotness-threshold",
- cl::desc("Minimum profile count required for "
- "an optimization remark to be output"),
- cl::Hidden);
+static cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser>
+ RemarksHotnessThreshold(
+ "pass-remarks-hotness-threshold",
+ cl::desc("Minimum profile count required for "
+ "an optimization remark to be output. "
+ "Use 'auto' to apply the threshold from profile summary."),
+ cl::value_desc("N or 'auto'"), cl::init(0), cl::Hidden);
static cl::opt<std::string>
RemarksFilename("pass-remarks-output",
@@ -325,48 +318,6 @@ cl::opt<std::string> CSProfileGenFile(
cl::desc("Path to the instrumented context sensitive profile."),
cl::Hidden);
-class OptCustomPassManager : public legacy::PassManager {
- DebugifyStatsMap DIStatsMap;
-
-public:
- using super = legacy::PassManager;
-
- void add(Pass *P) override {
- // Wrap each pass with (-check)-debugify passes if requested, making
- // exceptions for passes which shouldn't see -debugify instrumentation.
- bool WrapWithDebugify = DebugifyEach && !P->getAsImmutablePass() &&
- !isIRPrintingPass(P) && !isBitcodeWriterPass(P);
- if (!WrapWithDebugify) {
- super::add(P);
- return;
- }
-
- // Apply -debugify/-check-debugify before/after each pass and collect
- // debug info loss statistics.
- PassKind Kind = P->getPassKind();
- StringRef Name = P->getPassName();
-
- // TODO: Implement Debugify for LoopPass.
- switch (Kind) {
- case PT_Function:
- super::add(createDebugifyFunctionPass());
- super::add(P);
- super::add(createCheckDebugifyFunctionPass(true, Name, &DIStatsMap));
- break;
- case PT_Module:
- super::add(createDebugifyModulePass());
- super::add(P);
- super::add(createCheckDebugifyModulePass(true, Name, &DIStatsMap));
- break;
- default:
- super::add(P);
- break;
- }
- }
-
- const DebugifyStatsMap &getDebugifyStatsMap() const { return DIStatsMap; }
-};
-
static inline void addPass(legacy::PassManagerBase &PM, Pass *P) {
// Add the pass to the pass manager...
PM.add(P);
@@ -490,28 +441,6 @@ static TargetMachine* GetTargetMachine(Triple TheTriple, StringRef CPUStr,
void initializeExampleIRTransforms(llvm::PassRegistry &Registry);
#endif
-
-void exportDebugifyStats(llvm::StringRef Path, const DebugifyStatsMap &Map) {
- std::error_code EC;
- raw_fd_ostream OS{Path, EC};
- if (EC) {
- errs() << "Could not open file: " << EC.message() << ", " << Path << '\n';
- return;
- }
-
- OS << "Pass Name" << ',' << "# of missing debug values" << ','
- << "# of missing locations" << ',' << "Missing/Expected value ratio" << ','
- << "Missing/Expected location ratio" << '\n';
- for (const auto &Entry : Map) {
- StringRef Pass = Entry.first;
- DebugifyStatistics Stats = Entry.second;
-
- OS << Pass << ',' << Stats.NumDbgValuesMissing << ','
- << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ','
- << Stats.getEmptyLocationRatio() << '\n';
- }
-}
-
struct TimeTracerRAII {
TimeTracerRAII(StringRef ProgramName) {
if (TimeTrace)
@@ -530,6 +459,67 @@ struct TimeTracerRAII {
}
};
+// For use in NPM transition. Currently this contains most codegen-specific
+// passes. Remove passes from here when porting to the NPM.
+// TODO: use a codegen version of PassRegistry.def/PassBuilder::is*Pass() once
+// it exists.
+static bool shouldPinPassToLegacyPM(StringRef Pass) {
+ std::vector<StringRef> PassNameExactToIgnore = {
+ "nvvm-reflect",
+ "nvvm-intr-range",
+ "amdgpu-simplifylib",
+ "amdgpu-usenative",
+ "amdgpu-promote-alloca",
+ "amdgpu-promote-alloca-to-vector",
+ "amdgpu-lower-kernel-attributes",
+ "amdgpu-propagate-attributes-early",
+ "amdgpu-propagate-attributes-late",
+ "amdgpu-unify-metadata",
+ "amdgpu-printf-runtime-binding",
+ "amdgpu-always-inline"};
+ for (const auto &P : PassNameExactToIgnore)
+ if (Pass == P)
+ return false;
+
+ std::vector<StringRef> PassNamePrefix = {
+ "x86-", "xcore-", "wasm-", "systemz-", "ppc-", "nvvm-", "nvptx-",
+ "mips-", "lanai-", "hexagon-", "bpf-", "avr-", "thumb2-", "arm-",
+ "si-", "gcn-", "amdgpu-", "aarch64-", "amdgcn-", "polly-"};
+ std::vector<StringRef> PassNameContain = {"ehprepare"};
+ std::vector<StringRef> PassNameExact = {
+ "safe-stack", "cost-model",
+ "codegenprepare", "interleaved-load-combine",
+ "unreachableblockelim", "verify-safepoint-ir",
+ "divergence", "atomic-expand",
+ "hardware-loops", "type-promotion",
+ "mve-tail-predication", "interleaved-access",
+ "global-merge", "pre-isel-intrinsic-lowering",
+ "expand-reductions", "indirectbr-expand",
+ "generic-to-nvvm", "expandmemcmp",
+ "loop-reduce", "lower-amx-type",
+ "polyhedral-info"};
+ for (const auto &P : PassNamePrefix)
+ if (Pass.startswith(P))
+ return true;
+ for (const auto &P : PassNameContain)
+ if (Pass.contains(P))
+ return true;
+ for (const auto &P : PassNameExact)
+ if (Pass == P)
+ return true;
+ return false;
+}
+
+// For use in NPM transition.
+static bool shouldForceLegacyPM() {
+ for (const auto &P : PassList) {
+ StringRef Arg = P->getPassArgument();
+ if (shouldPinPassToLegacyPM(Arg))
+ return true;
+ }
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// main for opt
//
@@ -563,12 +553,12 @@ int main(int argc, char **argv) {
// For codegen passes, only passes that do IR to IR transformation are
// supported.
initializeExpandMemCmpPassPass(Registry);
- initializeScalarizeMaskedMemIntrinPass(Registry);
+ initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeAtomicExpandPass(Registry);
initializeRewriteSymbolsLegacyPassPass(Registry);
initializeWinEHPreparePass(Registry);
- initializeDwarfEHPreparePass(Registry);
+ initializeDwarfEHPrepareLegacyPassPass(Registry);
initializeSafeStackLegacyPassPass(Registry);
initializeSjLjEHPreparePass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
@@ -700,7 +690,8 @@ int main(int argc, char **argv) {
Triple ModuleTriple(M->getTargetTriple());
std::string CPUStr, FeaturesStr;
TargetMachine *Machine = nullptr;
- const TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
+ const TargetOptions Options =
+ codegen::InitTargetOptionsFromCodeGenFlags(ModuleTriple);
if (ModuleTriple.getArch()) {
CPUStr = codegen::getCPUStr();
@@ -729,16 +720,41 @@ int main(int argc, char **argv) {
if (OutputThinLTOBC)
M->addModuleFlag(Module::Error, "EnableSplitLTOUnit", SplitLTOUnit);
- if (EnableNewPassManager || PassPipeline.getNumOccurrences() > 0) {
+ // Add an appropriate TargetLibraryInfo pass for the module's triple.
+ TargetLibraryInfoImpl TLII(ModuleTriple);
+
+ // The -disable-simplify-libcalls flag actually disables all builtin optzns.
+ if (DisableSimplifyLibCalls)
+ TLII.disableAllFunctions();
+ else {
+ // Disable individual builtin functions in TargetLibraryInfo.
+ LibFunc F;
+ for (auto &FuncName : DisableBuiltins)
+ if (TLII.getLibFunc(FuncName, F))
+ TLII.setUnavailable(F);
+ else {
+ errs() << argv[0] << ": cannot disable nonexistent builtin function "
+ << FuncName << '\n';
+ return 1;
+ }
+ }
+
+ // If `-passes=` is specified, use NPM.
+ // If `-enable-new-pm` is specified and there are no codegen passes, use NPM.
+ // e.g. `-enable-new-pm -sroa` will use NPM.
+ // but `-enable-new-pm -codegenprepare` will still revert to legacy PM.
+ if ((EnableNewPassManager && !shouldForceLegacyPM()) ||
+ PassPipeline.getNumOccurrences() > 0) {
+ if (AnalyzeOnly) {
+ errs() << "Cannot specify -analyze under new pass manager\n";
+ return 1;
+ }
if (PassPipeline.getNumOccurrences() > 0 && PassList.size() > 0) {
errs()
- << "Cannot specify passes via both -foo-pass and --passes=foo-pass";
+ << "Cannot specify passes via both -foo-pass and --passes=foo-pass\n";
return 1;
}
SmallVector<StringRef, 4> Passes;
- for (const auto &P : PassList) {
- Passes.push_back(P->getPassArgument());
- }
if (OptLevelO0)
Passes.push_back("default<O0>");
if (OptLevelO1)
@@ -751,6 +767,8 @@ int main(int argc, char **argv) {
Passes.push_back("default<Os>");
if (OptLevelOz)
Passes.push_back("default<Oz>");
+ for (const auto &P : PassList)
+ Passes.push_back(P->getPassArgument());
OutputKind OK = OK_NoOutput;
if (!NoOutput)
OK = OutputAssembly
@@ -766,9 +784,9 @@ int main(int argc, char **argv) {
// The user has asked to use the new pass manager and provided a pipeline
// string. Hand off the rest of the functionality to the new code for that
// layer.
- return runPassPipeline(argv[0], *M, TM.get(), Out.get(), ThinLinkOut.get(),
- RemarksFile.get(), PassPipeline, Passes, OK, VK,
- PreserveAssemblyUseListOrder,
+ return runPassPipeline(argv[0], *M, TM.get(), &TLII, Out.get(),
+ ThinLinkOut.get(), RemarksFile.get(), PassPipeline,
+ Passes, OK, VK, PreserveAssemblyUseListOrder,
PreserveBitcodeUseListOrder, EmitSummaryIndex,
EmitModuleHash, EnableDebugify, Coroutines)
? 0
@@ -776,28 +794,13 @@ int main(int argc, char **argv) {
}
// Create a PassManager to hold and optimize the collection of passes we are
- // about to build.
- OptCustomPassManager Passes;
- bool AddOneTimeDebugifyPasses = EnableDebugify && !DebugifyEach;
+ // about to build. If the -debugify-each option is set, wrap each pass with
+ // the (-check)-debugify passes.
+ DebugifyCustomPassManager Passes;
+ if (DebugifyEach)
+ Passes.enableDebugifyEach();
- // Add an appropriate TargetLibraryInfo pass for the module's triple.
- TargetLibraryInfoImpl TLII(ModuleTriple);
-
- // The -disable-simplify-libcalls flag actually disables all builtin optzns.
- if (DisableSimplifyLibCalls)
- TLII.disableAllFunctions();
- else {
- // Disable individual builtin functions in TargetLibraryInfo.
- LibFunc F;
- for (auto &FuncName : DisableBuiltins)
- if (TLII.getLibFunc(FuncName, F))
- TLII.setUnavailable(F);
- else {
- errs() << argv[0] << ": cannot disable nonexistent builtin function "
- << FuncName << '\n';
- return 1;
- }
- }
+ bool AddOneTimeDebugifyPasses = EnableDebugify && !DebugifyEach;
Passes.add(new TargetLibraryInfoWrapperPass(TLII));
diff --git a/contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 3d63059dcb8b..9d304910ba4e 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -612,7 +612,7 @@ struct MatchableInfo {
/// operator< - Compare two matchables.
bool operator<(const MatchableInfo &RHS) const {
// The primary comparator is the instruction mnemonic.
- if (int Cmp = Mnemonic.compare(RHS.Mnemonic))
+ if (int Cmp = Mnemonic.compare_lower(RHS.Mnemonic))
return Cmp == -1;
if (AsmOperands.size() != RHS.AsmOperands.size())
@@ -789,9 +789,8 @@ public:
}
bool hasOptionalOperands() const {
- return find_if(Classes, [](const ClassInfo &Class) {
- return Class.IsOptional;
- }) != Classes.end();
+ return any_of(Classes,
+ [](const ClassInfo &Class) { return Class.IsOptional; });
}
};
@@ -1018,7 +1017,7 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
case '$': {
if (InTok) {
- addAsmOperand(String.slice(Prev, i), false);
+ addAsmOperand(String.slice(Prev, i), IsIsolatedToken);
InTok = false;
IsIsolatedToken = false;
}
@@ -1113,9 +1112,7 @@ static std::string getEnumNameForToken(StringRef Str) {
case '-': Res += "_MINUS_"; break;
case '#': Res += "_HASH_"; break;
default:
- if ((*it >= 'A' && *it <= 'Z') ||
- (*it >= 'a' && *it <= 'z') ||
- (*it >= '0' && *it <= '9'))
+ if (isAlnum(*it))
Res += *it;
else
Res += "_" + utostr((unsigned) *it) + "_";
@@ -1980,7 +1977,7 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
}
CvtOS << " unsigned OpIdx;\n";
CvtOS << " Inst.setOpcode(Opcode);\n";
- CvtOS << " for (const uint8_t *p = Converter; *p; p+= 2) {\n";
+ CvtOS << " for (const uint8_t *p = Converter; *p; p += 2) {\n";
if (HasOptionalOperands) {
CvtOS << " OpIdx = *(p + 1) - DefaultsOffset[*(p + 1)];\n";
} else {
@@ -1990,14 +1987,14 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
CvtOS << " default: llvm_unreachable(\"invalid conversion entry!\");\n";
CvtOS << " case CVT_Reg:\n";
CvtOS << " static_cast<" << TargetOperandClass
- << "&>(*Operands[OpIdx]).addRegOperands(Inst, 1);\n";
+ << " &>(*Operands[OpIdx]).addRegOperands(Inst, 1);\n";
CvtOS << " break;\n";
CvtOS << " case CVT_Tied: {\n";
CvtOS << " assert(OpIdx < (size_t)(std::end(TiedAsmOperandTable) -\n";
- CvtOS << " std::begin(TiedAsmOperandTable)) &&\n";
+ CvtOS << " std::begin(TiedAsmOperandTable)) &&\n";
CvtOS << " \"Tied operand not found\");\n";
CvtOS << " unsigned TiedResOpnd = TiedAsmOperandTable[OpIdx][0];\n";
- CvtOS << " if (TiedResOpnd != (uint8_t) -1)\n";
+ CvtOS << " if (TiedResOpnd != (uint8_t)-1)\n";
CvtOS << " Inst.addOperand(Inst.getOperand(TiedResOpnd));\n";
CvtOS << " break;\n";
CvtOS << " }\n";
@@ -2012,7 +2009,7 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
<< " assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n"
<< " unsigned NumMCOperands = 0;\n"
<< " const uint8_t *Converter = ConversionTable[Kind];\n"
- << " for (const uint8_t *p = Converter; *p; p+= 2) {\n"
+ << " for (const uint8_t *p = Converter; *p; p += 2) {\n"
<< " switch (*p) {\n"
<< " default: llvm_unreachable(\"invalid conversion entry!\");\n"
<< " case CVT_Reg:\n"
@@ -2129,12 +2126,12 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
<< OpInfo.MINumOperands << ");\n"
<< " } else {\n"
<< " static_cast<" << TargetOperandClass
- << "&>(*Operands[OpIdx])." << Op.Class->RenderMethod
+ << " &>(*Operands[OpIdx])." << Op.Class->RenderMethod
<< "(Inst, " << OpInfo.MINumOperands << ");\n"
<< " }\n";
} else {
CvtOS << " static_cast<" << TargetOperandClass
- << "&>(*Operands[OpIdx])." << Op.Class->RenderMethod
+ << " &>(*Operands[OpIdx])." << Op.Class->RenderMethod
<< "(Inst, " << OpInfo.MINumOperands << ");\n";
}
CvtOS << " break;\n";
@@ -2390,9 +2387,9 @@ static void emitMatchClassEnumeration(CodeGenTarget &Target,
static void emitOperandMatchErrorDiagStrings(AsmMatcherInfo &Info, raw_ostream &OS) {
// If the target does not use DiagnosticString for any operands, don't emit
// an unused function.
- if (std::all_of(
- Info.Classes.begin(), Info.Classes.end(),
- [](const ClassInfo &CI) { return CI.DiagnosticString.empty(); }))
+ if (llvm::all_of(Info.Classes, [](const ClassInfo &CI) {
+ return CI.DiagnosticString.empty();
+ }))
return;
OS << "static const char *getMatchKindDiag(" << Info.Target.getName()
@@ -2448,7 +2445,7 @@ static void emitValidateOperandClass(AsmMatcherInfo &Info,
OS << "static unsigned validateOperandClass(MCParsedAsmOperand &GOp, "
<< "MatchClassKind Kind) {\n";
OS << " " << Info.Target.getName() << "Operand &Operand = ("
- << Info.Target.getName() << "Operand&)GOp;\n";
+ << Info.Target.getName() << "Operand &)GOp;\n";
// The InvalidMatchClass is not to match any operand.
OS << " if (Kind == InvalidMatchClass)\n";
@@ -2811,7 +2808,7 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info,
Record *AsmVariant = Target.getAsmParserVariant(VC);
int AsmParserVariantNo = AsmVariant->getValueAsInt("Variant");
StringRef AsmParserVariantName = AsmVariant->getValueAsString("Name");
- OS << " case " << AsmParserVariantNo << ":\n";
+ OS << " case " << AsmParserVariantNo << ":\n";
emitMnemonicAliasVariant(OS, Info, Aliases, /*Indent=*/2,
AsmParserVariantName);
OS << " break;\n";
@@ -2880,7 +2877,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
OS << " { ";
// Store a pascal-style length byte in the mnemonic.
- std::string LenMnemonic = char(II.Mnemonic.size()) + II.Mnemonic.str();
+ std::string LenMnemonic = char(II.Mnemonic.size()) + II.Mnemonic.lower();
OS << StringTable.GetOrAddStringOffset(LenMnemonic, false)
<< " /* " << II.Mnemonic << " */, ";
@@ -2978,7 +2975,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
"FeatureBitsets[it->RequiredFeaturesIdx];\n";
OS << " if (!ParseForAllFeatures && (AvailableFeatures & "
"RequiredFeatures) != RequiredFeatures)\n";
- OS << " continue;\n\n";
+ OS << " continue;\n\n";
// Emit check to ensure the operand number matches.
OS << " // check if the operand in question has a custom parser.\n";
@@ -3011,10 +3008,10 @@ static void emitAsmTiedOperandConstraints(CodeGenTarget &Target,
OS << " uint64_t &ErrorInfo) {\n";
OS << " assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n";
OS << " const uint8_t *Converter = ConversionTable[Kind];\n";
- OS << " for (const uint8_t *p = Converter; *p; p+= 2) {\n";
+ OS << " for (const uint8_t *p = Converter; *p; p += 2) {\n";
OS << " switch (*p) {\n";
OS << " case CVT_Tied: {\n";
- OS << " unsigned OpIdx = *(p+1);\n";
+ OS << " unsigned OpIdx = *(p + 1);\n";
OS << " assert(OpIdx < (size_t)(std::end(TiedAsmOperandTable) -\n";
OS << " std::begin(TiedAsmOperandTable)) &&\n";
OS << " \"Tied operand not found\");\n";
@@ -3086,7 +3083,7 @@ static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
OS << "\n";
OS << " std::string Res = \", did you mean: \";\n";
OS << " unsigned i = 0;\n";
- OS << " for( ; i < Candidates.size() - 1; i++)\n";
+ OS << " for (; i < Candidates.size() - 1; i++)\n";
OS << " Res += Candidates[i].str() + \", \";\n";
OS << " return Res + Candidates[i].str() + \"?\";\n";
}
@@ -3094,6 +3091,67 @@ static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
OS << "\n";
}
+static void emitMnemonicChecker(raw_ostream &OS,
+ CodeGenTarget &Target,
+ unsigned VariantCount,
+ bool HasMnemonicFirst,
+ bool HasMnemonicAliases) {
+ OS << "static bool " << Target.getName()
+ << "CheckMnemonic(StringRef Mnemonic,\n";
+ OS << " "
+ << "const FeatureBitset &AvailableFeatures,\n";
+ OS << " "
+ << "unsigned VariantID) {\n";
+
+ if (!VariantCount) {
+ OS << " return false;\n";
+ } else {
+ if (HasMnemonicAliases) {
+ OS << " // Process all MnemonicAliases to remap the mnemonic.\n";
+ OS << " applyMnemonicAliases(Mnemonic, AvailableFeatures, VariantID);";
+ OS << "\n\n";
+ }
+ OS << " // Find the appropriate table for this asm variant.\n";
+ OS << " const MatchEntry *Start, *End;\n";
+ OS << " switch (VariantID) {\n";
+ OS << " default: llvm_unreachable(\"invalid variant!\");\n";
+ for (unsigned VC = 0; VC != VariantCount; ++VC) {
+ Record *AsmVariant = Target.getAsmParserVariant(VC);
+ int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
+ OS << " case " << AsmVariantNo << ": Start = std::begin(MatchTable" << VC
+ << "); End = std::end(MatchTable" << VC << "); break;\n";
+ }
+ OS << " }\n\n";
+
+ OS << " // Search the table.\n";
+ if (HasMnemonicFirst) {
+ OS << " auto MnemonicRange = "
+ "std::equal_range(Start, End, Mnemonic, LessOpcode());\n\n";
+ } else {
+ OS << " auto MnemonicRange = std::make_pair(Start, End);\n";
+ OS << " unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n";
+ OS << " if (!Mnemonic.empty())\n";
+ OS << " MnemonicRange = "
+ << "std::equal_range(Start, End, Mnemonic.lower(), LessOpcode());\n\n";
+ }
+
+ OS << " if (MnemonicRange.first == MnemonicRange.second)\n";
+ OS << " return false;\n\n";
+
+ OS << " for (const MatchEntry *it = MnemonicRange.first, "
+ << "*ie = MnemonicRange.second;\n";
+ OS << " it != ie; ++it) {\n";
+ OS << " const FeatureBitset &RequiredFeatures =\n";
+ OS << " FeatureBitsets[it->RequiredFeaturesIdx];\n";
+ OS << " if ((AvailableFeatures & RequiredFeatures) == ";
+ OS << "RequiredFeatures)\n";
+ OS << " return true;\n";
+ OS << " }\n";
+ OS << " return false;\n";
+ }
+ OS << "}\n";
+ OS << "\n";
+}
// Emit a function mapping match classes to strings, for debugging.
static void emitMatchClassKindNames(std::forward_list<ClassInfo> &Infos,
@@ -3194,7 +3252,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << "#undef GET_ASSEMBLER_HEADER\n";
OS << " // This should be included into the middle of the declaration of\n";
OS << " // your subclasses implementation of MCTargetAsmParser.\n";
- OS << " FeatureBitset ComputeAvailableFeatures(const FeatureBitset& FB) const;\n";
+ OS << " FeatureBitset ComputeAvailableFeatures(const FeatureBitset &FB) const;\n";
if (HasOptionalOperands) {
OS << " void convertToMCInst(unsigned Kind, MCInst &Inst, "
<< "unsigned Opcode,\n"
@@ -3324,7 +3382,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
HasDeprecation |= MI->HasDeprecation;
// Store a pascal-style length byte in the mnemonic.
- std::string LenMnemonic = char(MI->Mnemonic.size()) + MI->Mnemonic.str();
+ std::string LenMnemonic = char(MI->Mnemonic.size()) + MI->Mnemonic.lower();
MaxMnemonicIndex = std::max(MaxMnemonicIndex,
StringTable.GetOrAddStringOffset(LenMnemonic, false));
}
@@ -3438,7 +3496,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
continue;
// Store a pascal-style length byte in the mnemonic.
- std::string LenMnemonic = char(MI->Mnemonic.size()) + MI->Mnemonic.str();
+ std::string LenMnemonic =
+ char(MI->Mnemonic.size()) + MI->Mnemonic.lower();
OS << " { " << StringTable.GetOrAddStringOffset(LenMnemonic, false)
<< " /* " << MI->Mnemonic << " */, "
<< Target.getInstNamespace() << "::"
@@ -3497,12 +3556,12 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " // Get the instruction mnemonic, which is the first token.\n";
if (HasMnemonicFirst) {
OS << " StringRef Mnemonic = ((" << Target.getName()
- << "Operand&)*Operands[0]).getToken();\n\n";
+ << "Operand &)*Operands[0]).getToken();\n\n";
} else {
OS << " StringRef Mnemonic;\n";
OS << " if (Operands[0]->isToken())\n";
OS << " Mnemonic = ((" << Target.getName()
- << "Operand&)*Operands[0]).getToken();\n\n";
+ << "Operand &)*Operands[0]).getToken();\n\n";
}
if (HasMnemonicAliases) {
@@ -3552,7 +3611,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
}
OS << " DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"AsmMatcher: found \" <<\n"
- << " std::distance(MnemonicRange.first, MnemonicRange.second) << \n"
+ << " std::distance(MnemonicRange.first, MnemonicRange.second) <<\n"
<< " \" encodings with mnemonic '\" << Mnemonic << \"'\\n\");\n\n";
OS << " // Return a more specific error code if no mnemonics match.\n";
@@ -3727,10 +3786,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " FeatureBitset NewMissingFeatures = RequiredFeatures & "
"~AvailableFeatures;\n";
OS << " DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Missing target features:\";\n";
- OS << " for (unsigned I = 0, E = NewMissingFeatures.size(); I != E; ++I)\n";
- OS << " if (NewMissingFeatures[I])\n";
- OS << " dbgs() << ' ' << I;\n";
- OS << " dbgs() << \"\\n\");\n";
+ OS << " for (unsigned I = 0, E = NewMissingFeatures.size(); I != E; ++I)\n";
+ OS << " if (NewMissingFeatures[I])\n";
+ OS << " dbgs() << ' ' << I;\n";
+ OS << " dbgs() << \"\\n\");\n";
if (ReportMultipleNearMisses) {
OS << " FeaturesNearMiss = NearMissInfo::getMissedFeature(NewMissingFeatures);\n";
} else {
@@ -3865,7 +3924,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " getTargetOptions().MCNoDeprecatedWarn &&\n";
OS << " MII.getDeprecatedInfo(Inst, getSTI(), Info)) {\n";
OS << " SMLoc Loc = ((" << Target.getName()
- << "Operand&)*Operands[0]).getStartLoc();\n";
+ << "Operand &)*Operands[0]).getStartLoc();\n";
OS << " getParser().Warning(Loc, Info, None);\n";
OS << " }\n";
}
@@ -3908,6 +3967,14 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
emitMnemonicSpellChecker(OS, Target, VariantCount);
OS << "#endif // GET_MNEMONIC_SPELL_CHECKER\n\n";
+
+ OS << "\n#ifdef GET_MNEMONIC_CHECKER\n";
+ OS << "#undef GET_MNEMONIC_CHECKER\n\n";
+
+ emitMnemonicChecker(OS, Target, VariantCount,
+ HasMnemonicFirst, HasMnemonicAliases);
+
+ OS << "#endif // GET_MNEMONIC_CHECKER\n\n";
}
namespace llvm {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp
index d10ea71e97e3..92df204475b9 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -64,9 +64,15 @@ public:
AsmWriterEmitter(RecordKeeper &R);
void run(raw_ostream &o);
-
private:
- void EmitPrintInstruction(raw_ostream &o);
+ void EmitGetMnemonic(
+ raw_ostream &o,
+ std::vector<std::vector<std::string>> &TableDrivenOperandPrinters,
+ unsigned &BitsLeft, unsigned &AsmStrBits);
+ void EmitPrintInstruction(
+ raw_ostream &o,
+ std::vector<std::vector<std::string>> &TableDrivenOperandPrinters,
+ unsigned &BitsLeft, unsigned &AsmStrBits);
void EmitGetRegisterName(raw_ostream &o);
void EmitPrintAliasInstruction(raw_ostream &O);
@@ -212,12 +218,11 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
// Otherwise, scan to see if all of the other instructions in this command
// set share the operand.
- if (std::any_of(Idxs.begin()+1, Idxs.end(),
- [&](unsigned Idx) {
- const AsmWriterInst &OtherInst = Instructions[Idx];
- return OtherInst.Operands.size() == Op ||
- OtherInst.Operands[Op] != FirstInst.Operands[Op];
- }))
+ if (any_of(drop_begin(Idxs), [&](unsigned Idx) {
+ const AsmWriterInst &OtherInst = Instructions[Idx];
+ return OtherInst.Operands.size() == Op ||
+ OtherInst.Operands[Op] != FirstInst.Operands[Op];
+ }))
break;
// Okay, everything in this command set has the same next operand. Add it
@@ -288,22 +293,19 @@ static void UnescapeAliasString(std::string &Str) {
}
}
-/// EmitPrintInstruction - Generate the code for the "printInstruction" method
-/// implementation. Destroys all instances of AsmWriterInst information, by
-/// clearing the Instructions vector.
-void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
+void AsmWriterEmitter::EmitGetMnemonic(
+ raw_ostream &O,
+ std::vector<std::vector<std::string>> &TableDrivenOperandPrinters,
+ unsigned &BitsLeft, unsigned &AsmStrBits) {
Record *AsmWriter = Target.getAsmWriter();
StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
- O << "/// printInstruction - This method is automatically generated by "
+ O << "/// getMnemonic - This method is automatically generated by "
"tablegen\n"
"/// from the instruction set description.\n"
- "void "
- << Target.getName() << ClassName
- << "::printInstruction(const MCInst *MI, uint64_t Address, "
- << (PassSubtarget ? "const MCSubtargetInfo &STI, " : "")
- << "raw_ostream &O) {\n";
+ "std::pair<const char *, uint64_t> "
+ << Target.getName() << ClassName << "::getMnemonic(const MCInst *MI) {\n";
// Build an aggregate string, and build a table of offsets into it.
SequenceToOffsetTable<std::string> StringTable;
@@ -349,13 +351,11 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
}
// Figure out how many bits we used for the string index.
- unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx+2);
+ AsmStrBits = Log2_32_Ceil(MaxStringIdx + 2);
// To reduce code size, we compactify common instructions into a few bits
// in the opcode-indexed table.
- unsigned BitsLeft = OpcodeInfoBits-AsmStrBits;
-
- std::vector<std::vector<std::string>> TableDrivenOperandPrinters;
+ BitsLeft = OpcodeInfoBits - AsmStrBits;
while (true) {
std::vector<std::string> UniqueOperandCommands;
@@ -435,15 +435,47 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
++Table;
}
- // Emit the initial tab character.
- O << " O << \"\\t\";\n\n";
-
O << " // Emit the opcode for the instruction.\n";
O << BitsString;
+ // Return mnemonic string and bits.
+ O << " return {AsmStrs+(Bits & " << (1 << AsmStrBits) - 1
+ << ")-1, Bits};\n\n";
+
+ O << "}\n";
+}
+
+/// EmitPrintInstruction - Generate the code for the "printInstruction" method
+/// implementation. Destroys all instances of AsmWriterInst information, by
+/// clearing the Instructions vector.
+void AsmWriterEmitter::EmitPrintInstruction(
+ raw_ostream &O,
+ std::vector<std::vector<std::string>> &TableDrivenOperandPrinters,
+ unsigned &BitsLeft, unsigned &AsmStrBits) {
+ const unsigned OpcodeInfoBits = 64;
+ Record *AsmWriter = Target.getAsmWriter();
+ StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
+ bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
+
+ O << "/// printInstruction - This method is automatically generated by "
+ "tablegen\n"
+ "/// from the instruction set description.\n"
+ "void "
+ << Target.getName() << ClassName
+ << "::printInstruction(const MCInst *MI, uint64_t Address, "
+ << (PassSubtarget ? "const MCSubtargetInfo &STI, " : "")
+ << "raw_ostream &O) {\n";
+
+ // Emit the initial tab character.
+ O << " O << \"\\t\";\n\n";
+
// Emit the starting string.
- O << " assert(Bits != 0 && \"Cannot print this instruction.\");\n"
- << " O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ")-1;\n\n";
+ O << " auto MnemonicInfo = getMnemonic(MI);\n\n";
+ O << " O << MnemonicInfo.first;\n\n";
+
+ O << " uint" << ((BitsLeft < (OpcodeInfoBits - 32)) ? 64 : 32)
+ << "_t Bits = MnemonicInfo.second;\n"
+ << " assert(Bits != 0 && \"Cannot print this instruction.\");\n";
// Output the table driven operand information.
BitsLeft = OpcodeInfoBits-AsmStrBits;
@@ -489,10 +521,8 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
}
// Okay, delete instructions with no operand info left.
- auto I = llvm::remove_if(Instructions,
- [](AsmWriterInst &Inst) { return Inst.Operands.empty(); });
- Instructions.erase(I, Instructions.end());
-
+ llvm::erase_if(Instructions,
+ [](AsmWriterInst &Inst) { return Inst.Operands.empty(); });
// Because this is a vector, we want to emit from the end. Reverse all of the
// elements in the vector.
@@ -683,9 +713,7 @@ public:
++Next;
} else {
// $name, just eat the usual suspects.
- while (I != End &&
- ((*I >= 'a' && *I <= 'z') || (*I >= 'A' && *I <= 'Z') ||
- (*I >= '0' && *I <= '9') || *I == '_'))
+ while (I != End && (isAlnum(*I) || *I == '_'))
++I;
Next = I;
}
@@ -1232,13 +1260,10 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
<< " break;\n";
for (unsigned i = 0; i < MCOpPredicates.size(); ++i) {
- Init *MCOpPred = MCOpPredicates[i]->getValueInit("MCOperandPredicate");
- if (CodeInit *SI = dyn_cast<CodeInit>(MCOpPred)) {
- O << " case " << i + 1 << ": {\n"
- << SI->getValue() << "\n"
- << " }\n";
- } else
- llvm_unreachable("Unexpected MCOperandPredicate field!");
+ StringRef MCOpPred = MCOpPredicates[i]->getValueAsString("MCOperandPredicate");
+ O << " case " << i + 1 << ": {\n"
+ << MCOpPred.data() << "\n"
+ << " }\n";
}
O << " }\n"
<< "}\n\n";
@@ -1262,7 +1287,11 @@ AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) {
}
void AsmWriterEmitter::run(raw_ostream &O) {
- EmitPrintInstruction(O);
+ std::vector<std::vector<std::string>> TableDrivenOperandPrinters;
+ unsigned BitsLeft = 0;
+ unsigned AsmStrBits = 0;
+ EmitGetMnemonic(O, TableDrivenOperandPrinters, BitsLeft, AsmStrBits);
+ EmitPrintInstruction(O, TableDrivenOperandPrinters, BitsLeft, AsmStrBits);
EmitGetRegisterName(O);
EmitPrintAliasInstruction(O);
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.cpp b/contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.cpp
index 24d29ffc28e5..cf24f79334ca 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.cpp
@@ -18,12 +18,7 @@
using namespace llvm;
-static bool isIdentChar(char C) {
- return (C >= 'a' && C <= 'z') ||
- (C >= 'A' && C <= 'Z') ||
- (C >= '0' && C <= '9') ||
- C == '_';
-}
+static bool isIdentChar(char C) { return isAlnum(C) || C == '_'; }
std::string AsmWriterOperand::getCode(bool PassSubtarget) const {
if (OperandType == isLiteralTextOperand) {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CallingConvEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/CallingConvEmitter.cpp
index a4e993f80ec9..9e997483d21d 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CallingConvEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CallingConvEmitter.cpp
@@ -38,6 +38,7 @@ void CallingConvEmitter::run(raw_ostream &O) {
// Emit prototypes for all of the non-custom CC's so that they can forward ref
// each other.
+ Records.startTimer("Emit prototypes");
for (Record *CC : CCs) {
if (!CC->getValueAsBit("Custom")) {
unsigned Pad = CC->getName().size();
@@ -56,6 +57,7 @@ void CallingConvEmitter::run(raw_ostream &O) {
}
// Emit each non-custom calling convention description in full.
+ Records.startTimer("Emit full descriptions");
for (Record *CC : CCs) {
if (!CC->getValueAsBit("Custom"))
EmitCallingConv(CC, O);
@@ -85,7 +87,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
EmitAction(CCActions->getElementAsRecord(i), 2, O);
}
- O << "\n return true; // CC didn't match.\n";
+ O << "\n return true; // CC didn't match.\n";
O << "}\n";
}
@@ -238,11 +240,11 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << IndentStr << "LocInfo = CCValAssign::FPExt;\n";
} else {
O << IndentStr << "if (ArgFlags.isSExt())\n"
- << IndentStr << IndentStr << "LocInfo = CCValAssign::SExt;\n"
+ << IndentStr << " LocInfo = CCValAssign::SExt;\n"
<< IndentStr << "else if (ArgFlags.isZExt())\n"
- << IndentStr << IndentStr << "LocInfo = CCValAssign::ZExt;\n"
+ << IndentStr << " LocInfo = CCValAssign::ZExt;\n"
<< IndentStr << "else\n"
- << IndentStr << IndentStr << "LocInfo = CCValAssign::AExt;\n";
+ << IndentStr << " LocInfo = CCValAssign::AExt;\n";
}
} else if (Action->isSubClassOf("CCPromoteToUpperBitsInType")) {
Record *DestTy = Action->getValueAsDef("DestTy");
@@ -254,11 +256,11 @@ void CallingConvEmitter::EmitAction(Record *Action,
"point");
} else {
O << IndentStr << "if (ArgFlags.isSExt())\n"
- << IndentStr << IndentStr << "LocInfo = CCValAssign::SExtUpper;\n"
+ << IndentStr << " LocInfo = CCValAssign::SExtUpper;\n"
<< IndentStr << "else if (ArgFlags.isZExt())\n"
- << IndentStr << IndentStr << "LocInfo = CCValAssign::ZExtUpper;\n"
+ << IndentStr << " LocInfo = CCValAssign::ZExtUpper;\n"
<< IndentStr << "else\n"
- << IndentStr << IndentStr << "LocInfo = CCValAssign::AExtUpper;\n";
+ << IndentStr << " LocInfo = CCValAssign::AExtUpper;\n";
}
} else if (Action->isSubClassOf("CCBitConvertToType")) {
Record *DestTy = Action->getValueAsDef("DestTy");
@@ -282,7 +284,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << IndentStr
<< "if (" << Action->getValueAsString("FuncName") << "(ValNo, ValVT, "
<< "LocVT, LocInfo, ArgFlags, State))\n";
- O << IndentStr << IndentStr << "return false;\n";
+ O << IndentStr << " return false;\n";
} else {
errs() << *Action;
PrintFatalError(Action->getLoc(), "Unknown CCAction!");
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp
index 6338d44fb2a7..53bf953b13cf 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -311,7 +311,7 @@ std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *Enc
for (const RecordVal &RV : EncodingDef->getValues()) {
// Ignore fixed fields in the record, we're looking for values like:
// bits<5> RST = { ?, ?, ?, ?, ? };
- if (RV.getPrefix() || RV.getValue()->isComplete())
+ if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
continue;
AddCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp,
@@ -483,7 +483,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
<< " Inst = Inst.zext(" << BitWidth << ");\n"
<< " if (Scratch.getBitWidth() != " << BitWidth << ")\n"
<< " Scratch = Scratch.zext(" << BitWidth << ");\n"
- << " LoadIntFromMemory(Inst, (uint8_t*)&InstBits[opcode * " << NumWords
+ << " LoadIntFromMemory(Inst, (uint8_t *)&InstBits[opcode * " << NumWords
<< "], " << NumBytes << ");\n"
<< " APInt &Value = Inst;\n"
<< " APInt &op = Scratch;\n"
@@ -643,9 +643,9 @@ void CodeEmitterGen::run(raw_ostream &o) {
<< " report_fatal_error(Msg.str());\n"
<< " }\n"
<< "#else\n"
- << "// Silence unused variable warning on targets that don't use MCII for "
+ << " // Silence unused variable warning on targets that don't use MCII for "
"other purposes (e.g. BPF).\n"
- << "(void)MCII;\n"
+ << " (void)MCII;\n"
<< "#endif // NDEBUG\n";
o << "}\n";
o << "#endif\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 6fdc116721f3..1ca4a68eb155 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -507,9 +507,9 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
// Always treat non-scalable MVTs as smaller than scalable MVTs for the
// purposes of ordering.
auto ASize = std::make_tuple(A.isScalableVector(), A.getScalarSizeInBits(),
- A.getSizeInBits());
+ A.getSizeInBits().getKnownMinSize());
auto BSize = std::make_tuple(B.isScalableVector(), B.getScalarSizeInBits(),
- B.getSizeInBits());
+ B.getSizeInBits().getKnownMinSize());
return ASize < BSize;
};
auto SameKindLE = [](MVT A, MVT B) -> bool {
@@ -520,8 +520,10 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
std::make_tuple(B.isVector(), B.isScalableVector()))
return false;
- return std::make_tuple(A.getScalarSizeInBits(), A.getSizeInBits()) <=
- std::make_tuple(B.getScalarSizeInBits(), B.getSizeInBits());
+ return std::make_tuple(A.getScalarSizeInBits(),
+ A.getSizeInBits().getKnownMinSize()) <=
+ std::make_tuple(B.getScalarSizeInBits(),
+ B.getSizeInBits().getKnownMinSize());
};
for (unsigned M : Modes) {
@@ -728,14 +730,14 @@ bool TypeInfer::EnforceSameSize(TypeSetByHwMode &A, TypeSetByHwMode &B) {
if (B.empty())
Changed |= EnforceAny(B);
- auto NoSize = [](const SmallSet<unsigned,2> &Sizes, MVT T) -> bool {
+ auto NoSize = [](const SmallSet<TypeSize, 2> &Sizes, MVT T) -> bool {
return !Sizes.count(T.getSizeInBits());
};
for (unsigned M : union_modes(A, B)) {
TypeSetByHwMode::SetType &AS = A.get(M);
TypeSetByHwMode::SetType &BS = B.get(M);
- SmallSet<unsigned,2> AN, BN;
+ SmallSet<TypeSize, 2> AN, BN;
for (MVT T : AS)
AN.insert(T.getSizeInBits());
@@ -871,7 +873,7 @@ bool TreePredicateFn::hasPredCode() const {
}
std::string TreePredicateFn::getPredCode() const {
- std::string Code = "";
+ std::string Code;
if (!isLoad() && !isStore() && !isAtomic()) {
Record *MemoryVT = getMemoryVT();
@@ -2388,7 +2390,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
MVT::SimpleValueType VT = P.second.SimpleTy;
if (VT == MVT::iPTR || VT == MVT::iPTRAny)
continue;
- unsigned Size = MVT(VT).getSizeInBits();
+ unsigned Size = MVT(VT).getFixedSizeInBits();
// Make sure that the value is representable for this type.
if (Size >= 32)
continue;
@@ -3086,7 +3088,7 @@ CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R,
VerifyInstructionFlags();
}
-Record *CodeGenDAGPatterns::getSDNodeNamed(const std::string &Name) const {
+Record *CodeGenDAGPatterns::getSDNodeNamed(StringRef Name) const {
Record *N = Records.getDef(Name);
if (!N || !N->isSubClassOf("SDNode"))
PrintFatalError("Error getting SDNode '" + Name + "'!");
@@ -3589,6 +3591,9 @@ static bool hasNullFragReference(DagInit *DI) {
if (Operator->getName() == "null_frag") return true;
// If any of the arguments reference the null fragment, return true.
for (unsigned i = 0, e = DI->getNumArgs(); i != e; ++i) {
+ if (auto Arg = dyn_cast<DefInit>(DI->getArg(i)))
+ if (Arg->getDef()->getName() == "null_frag")
+ return true;
DagInit *Arg = dyn_cast<DagInit>(DI->getArg(i));
if (Arg && hasNullFragReference(Arg))
return true;
@@ -3696,10 +3701,11 @@ void CodeGenDAGPatterns::parseInstructionPattern(
for (unsigned i = 0; i != NumResults; ++i) {
if (i == CGI.Operands.size()) {
const std::string &OpName =
- std::find_if(InstResults.begin(), InstResults.end(),
- [](const std::pair<std::string, TreePatternNodePtr> &P) {
- return P.second;
- })
+ llvm::find_if(
+ InstResults,
+ [](const std::pair<std::string, TreePatternNodePtr> &P) {
+ return P.second;
+ })
->first;
I.error("'" + OpName + "' set but does not appear in operand list!");
@@ -4288,7 +4294,7 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
std::vector<Predicate> Preds = P.Predicates;
const std::vector<Predicate> &MC = ModeChecks[Mode];
- Preds.insert(Preds.end(), MC.begin(), MC.end());
+ llvm::append_range(Preds, MC);
PatternsToMatch.emplace_back(P.getSrcRecord(), Preds, std::move(NewSrc),
std::move(NewDst), P.getDstRegs(),
P.getAddedComplexity(), Record::getNewUID(),
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h
index a3b84d76fde9..bc939fe9acc1 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -14,7 +14,6 @@
#ifndef LLVM_UTILS_TABLEGEN_CODEGENDAGPATTERNS_H
#define LLVM_UTILS_TABLEGEN_CODEGENDAGPATTERNS_H
-#include "CodeGenHwModes.h"
#include "CodeGenIntrinsics.h"
#include "CodeGenTarget.h"
#include "SDNodeProperties.h"
@@ -42,7 +41,6 @@ class SDNodeInfo;
class TreePattern;
class TreePatternNode;
class CodeGenDAGPatterns;
-class ComplexPattern;
/// Shared pointer for TreePatternNode.
using TreePatternNodePtr = std::shared_ptr<TreePatternNode>;
@@ -190,7 +188,7 @@ private:
struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
using SetType = MachineValueTypeSet;
- std::vector<unsigned> AddrSpaces;
+ SmallVector<unsigned, 16> AddrSpaces;
TypeSetByHwMode() = default;
TypeSetByHwMode(const TypeSetByHwMode &VTS) = default;
@@ -438,8 +436,6 @@ public:
unsigned getScope() const { return Scope; }
const std::string &getIdentifier() const { return Identifier; }
- std::string getFullName() const;
-
bool operator==(const ScopedName &o) const;
bool operator!=(const ScopedName &o) const;
};
@@ -1180,7 +1176,7 @@ public:
const CodeGenTarget &getTargetInfo() const { return Target; }
const TypeSetByHwMode &getLegalTypes() const { return LegalVTS; }
- Record *getSDNodeNamed(const std::string &Name) const;
+ Record *getSDNodeNamed(StringRef Name) const;
const SDNodeInfo &getSDNodeInfo(Record *R) const {
auto F = SDNodes.find(R);
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp
index 1df5902b081e..960fe08677f7 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -472,7 +472,7 @@ HasOneImplicitDefWithKnownVT(const CodeGenTarget &TargetInfo) const {
/// include text from the specified variant, returning the new string.
std::string CodeGenInstruction::
FlattenAsmStringVariants(StringRef Cur, unsigned Variant) {
- std::string Res = "";
+ std::string Res;
for (;;) {
// Find the start of the next variant string.
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h
index af59c1f3d833..c469f662a42d 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -148,6 +148,7 @@ struct CodeGenIntrinsic {
enum ArgAttrKind {
NoCapture,
NoAlias,
+ NoUndef,
Returned,
ReadOnly,
WriteOnly,
@@ -176,6 +177,13 @@ struct CodeGenIntrinsic {
return Properties & (1 << Prop);
}
+ /// Goes through all IntrProperties that have IsDefault
+ /// value set and sets the property.
+ void setDefaultProperties(Record *R, std::vector<Record *> DefaultProperties);
+
+ /// Helper function to set property \p Name to true;
+ void setProperty(Record *R);
+
/// Returns true if the parameter at \p ParamIdx is a pointer type. Returns
/// false if the parameter is not a pointer, or \p ParamIdx is greater than
/// the size of \p IS.ParamVTs.
@@ -185,7 +193,7 @@ struct CodeGenIntrinsic {
bool isParamImmArg(unsigned ParamIdx) const;
- CodeGenIntrinsic(Record *R);
+ CodeGenIntrinsic(Record *R, std::vector<Record *> DefaultProperties);
};
class CodeGenIntrinsicTable {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenMapTable.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenMapTable.cpp
index baca0768b26b..289a20a96f02 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenMapTable.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenMapTable.cpp
@@ -144,25 +144,15 @@ public:
}
}
- std::string getName() const {
- return Name;
- }
+ const std::string &getName() const { return Name; }
- std::string getFilterClass() {
- return FilterClass;
- }
+ const std::string &getFilterClass() const { return FilterClass; }
- ListInit *getRowFields() const {
- return RowFields;
- }
+ ListInit *getRowFields() const { return RowFields; }
- ListInit *getColFields() const {
- return ColFields;
- }
+ ListInit *getColFields() const { return ColFields; }
- ListInit *getKeyCol() const {
- return KeyCol;
- }
+ ListInit *getKeyCol() const { return KeyCol; }
const std::vector<ListInit*> &getValueCols() const {
return ValueCols;
@@ -200,7 +190,7 @@ private:
public:
MapTableEmitter(CodeGenTarget &Target, RecordKeeper &Records, Record *IMRec):
Target(Target), InstrMapDesc(IMRec) {
- const std::string FilterClass = InstrMapDesc.getFilterClass();
+ const std::string &FilterClass = InstrMapDesc.getFilterClass();
InstrDefs = Records.getAllDerivedDefinitions(FilterClass);
}
@@ -384,7 +374,7 @@ unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) {
for (unsigned i = 0; i < TotalNumInstr; i++) {
Record *CurInstr = NumberedInstructions[i]->TheDef;
std::vector<Record*> ColInstrs = MapTable[CurInstr];
- std::string OutStr("");
+ std::string OutStr;
unsigned RelExists = 0;
if (!ColInstrs.empty()) {
for (unsigned j = 0; j < NumCol; j++) {
@@ -422,7 +412,7 @@ void MapTableEmitter::emitBinSearch(raw_ostream &OS, unsigned TableSize) {
OS << " unsigned start = 0;\n";
OS << " unsigned end = " << TableSize << ";\n";
OS << " while (start < end) {\n";
- OS << " mid = start + (end - start)/2;\n";
+ OS << " mid = start + (end - start) / 2;\n";
OS << " if (Opcode == " << InstrMapDesc.getName() << "Table[mid][0]) {\n";
OS << " break;\n";
OS << " }\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.cpp
index 4584bc7cfae3..f9a7ba6bba80 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -196,7 +196,7 @@ void CodeGenRegister::buildObjectGraph(CodeGenRegBank &RegBank) {
}
}
-const StringRef CodeGenRegister::getName() const {
+StringRef CodeGenRegister::getName() const {
assert(TheDef && "no def");
return TheDef->getName();
}
@@ -496,11 +496,10 @@ void CodeGenRegister::computeSecondarySubRegs(CodeGenRegBank &RegBank) {
assert(getSubRegIndex(SubReg) == SubRegIdx && "LeadingSuperRegs correct");
for (CodeGenRegister *SubReg : Cand->ExplicitSubRegs) {
if (CodeGenSubRegIndex *SubRegIdx = getSubRegIndex(SubReg)) {
- if (SubRegIdx->ConcatenationOf.empty()) {
+ if (SubRegIdx->ConcatenationOf.empty())
Parts.push_back(SubRegIdx);
- } else
- for (CodeGenSubRegIndex *SubIdx : SubRegIdx->ConcatenationOf)
- Parts.push_back(SubIdx);
+ else
+ append_range(Parts, SubRegIdx->ConcatenationOf);
} else {
// Sub-register doesn't exist.
Parts.clear();
@@ -743,6 +742,8 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1) {
GeneratePressureSet = R->getValueAsBit("GeneratePressureSet");
std::vector<Record*> TypeList = R->getValueAsListOfDefs("RegTypes");
+ if (TypeList.empty())
+ PrintFatalError(R->getLoc(), "RegTypes list must not be empty!");
for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
Record *Type = TypeList[i];
if (!Type->isSubClassOf("ValueType"))
@@ -751,7 +752,6 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
"' does not derive from the ValueType class!");
VTs.push_back(getValueTypeByHwMode(Type, RegBank.getHwModes()));
}
- assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
// Allocation order 0 is the full set. AltOrders provides others.
const SetTheory::RecVec *Elements = RegBank.getSets().expand(R);
@@ -998,6 +998,8 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs(
const CodeGenRegisterClass *B) {
// If there are multiple, identical register classes, prefer the original
// register class.
+ if (A == B)
+ return false;
if (A->getMembers().size() == B->getMembers().size())
return A == this;
return A->getMembers().size() > B->getMembers().size();
@@ -1235,8 +1237,7 @@ CodeGenSubRegIndex *CodeGenRegBank::getSubRegIdx(Record *Def) {
const CodeGenSubRegIndex *
CodeGenRegBank::findSubRegIdx(const Record* Def) const {
- auto I = Def2SubRegIdx.find(Def);
- return (I == Def2SubRegIdx.end()) ? nullptr : I->second;
+ return Def2SubRegIdx.lookup(Def);
}
CodeGenRegister *CodeGenRegBank::getReg(Record *Def) {
@@ -2008,7 +2009,7 @@ void CodeGenRegBank::computeRegUnitSets() {
if (RCRegUnits.empty())
continue;
- LLVM_DEBUG(dbgs() << "RC " << RC.getName() << " Units: \n";
+ LLVM_DEBUG(dbgs() << "RC " << RC.getName() << " Units:\n";
for (auto U
: RCRegUnits) printRegUnitName(U);
dbgs() << "\n UnitSetIDs:");
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h
index 2b200adef312..5228e6518fe5 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h
@@ -163,7 +163,7 @@ namespace llvm {
CodeGenRegister(Record *R, unsigned Enum);
- const StringRef getName() const;
+ StringRef getName() const;
// Extract more information from TheDef. This is used to build an object
// graph after all CodeGenRegister objects have been created.
@@ -353,7 +353,7 @@ namespace llvm {
unsigned getNumValueTypes() const { return VTs.size(); }
bool hasType(const ValueTypeByHwMode &VT) const {
- return std::find(VTs.begin(), VTs.end(), VT) != VTs.end();
+ return llvm::is_contained(VTs, VT);
}
const ValueTypeByHwMode &getValueTypeNum(unsigned VTNum) const {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.cpp
index 67583c736cd2..b20eb6eff422 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -86,7 +86,7 @@ struct InstRegexOp : public SetTheory::Operator {
auto Pseudos = Instructions.slice(NumGeneric, NumPseudos);
auto NonPseudos = Instructions.slice(NumGeneric + NumPseudos);
- for (Init *Arg : make_range(Expr->arg_begin(), Expr->arg_end())) {
+ for (Init *Arg : Expr->getArgs()) {
StringInit *SI = dyn_cast<StringInit>(Arg);
if (!SI)
PrintFatalError(Loc, "instregex requires pattern string: " +
@@ -248,8 +248,7 @@ void CodeGenSchedModels::checkSTIPredicates() const {
}
PrintError(R->getLoc(), "STIPredicate " + Name + " multiply declared.");
- PrintNote(It->second->getLoc(), "Previous declaration was here.");
- PrintFatalError(R->getLoc(), "Invalid STIPredicateDecl found.");
+ PrintFatalNote(It->second->getLoc(), "Previous declaration was here.");
}
// Disallow InstructionEquivalenceClasses with an empty instruction list.
@@ -284,7 +283,7 @@ static APInt constructOperandMask(ArrayRef<int64_t> Indices) {
static void
processSTIPredicate(STIPredicateFunction &Fn,
- const DenseMap<Record *, unsigned> &ProcModelMap) {
+ const ProcModelMapTy &ProcModelMap) {
DenseMap<const Record *, unsigned> Opcode2Index;
using OpcodeMapPair = std::pair<const Record *, OpcodeInfo>;
std::vector<OpcodeMapPair> OpcodeMappings;
@@ -454,10 +453,8 @@ void CodeGenSchedModels::checkMCInstPredicates() const {
PrintError(TIIPred->getLoc(),
"TIIPredicate " + Name + " is multiply defined.");
- PrintNote(It->second->getLoc(),
- " Previous definition of " + Name + " was here.");
- PrintFatalError(TIIPred->getLoc(),
- "Found conflicting definitions of TIIPredicate.");
+ PrintFatalNote(It->second->getLoc(),
+ " Previous definition of " + Name + " was here.");
}
}
@@ -953,9 +950,9 @@ void CodeGenSchedModels::collectSchedClasses() {
}
// If ProcIndices contains zero, the class applies to all processors.
LLVM_DEBUG({
- if (!std::count(ProcIndices.begin(), ProcIndices.end(), 0)) {
+ if (!llvm::is_contained(ProcIndices, 0)) {
for (const CodeGenProcModel &PM : ProcModels) {
- if (!std::count(ProcIndices.begin(), ProcIndices.end(), PM.Index))
+ if (!llvm::is_contained(ProcIndices, PM.Index))
dbgs() << "No machine model for " << Inst->TheDef->getName()
<< " on processor " << PM.ModelName << '\n';
}
@@ -1083,13 +1080,14 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
if (RWD->getValueAsDef("SchedModel") == RWModelDef &&
RWModelDef->getValueAsBit("FullInstRWOverlapCheck")) {
assert(!InstDefs.empty()); // Checked at function start.
- PrintFatalError
- (InstRWDef->getLoc(),
- "Overlapping InstRW definition for \"" +
- InstDefs.front()->getName() +
- "\" also matches previous \"" +
- RWD->getValue("Instrs")->getValue()->getAsString() +
- "\".");
+ PrintError(
+ InstRWDef->getLoc(),
+ "Overlapping InstRW definition for \"" +
+ InstDefs.front()->getName() +
+ "\" also matches previous \"" +
+ RWD->getValue("Instrs")->getValue()->getAsString() +
+ "\".");
+ PrintFatalNote(RWD->getLoc(), "Previous match was here.");
}
}
LLVM_DEBUG(dbgs() << "InstRW: Reuse SC " << OldSCIdx << ":"
@@ -1118,13 +1116,13 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
for (Record *OldRWDef : SchedClasses[OldSCIdx].InstRWs) {
if (OldRWDef->getValueAsDef("SchedModel") == RWModelDef) {
assert(!InstDefs.empty()); // Checked at function start.
- PrintFatalError
- (InstRWDef->getLoc(),
- "Overlapping InstRW definition for \"" +
- InstDefs.front()->getName() +
- "\" also matches previous \"" +
- OldRWDef->getValue("Instrs")->getValue()->getAsString() +
- "\".");
+ PrintError(
+ InstRWDef->getLoc(),
+ "Overlapping InstRW definition for \"" +
+ InstDefs.front()->getName() + "\" also matches previous \"" +
+ OldRWDef->getValue("Instrs")->getValue()->getAsString() +
+ "\".");
+ PrintFatalNote(OldRWDef->getLoc(), "Previous match was here.");
}
assert(OldRWDef != InstRWDef &&
"SchedClass has duplicate InstRW def");
@@ -1210,11 +1208,10 @@ void CodeGenSchedModels::collectProcItinRW() {
// Gather the unsupported features for processor models.
void CodeGenSchedModels::collectProcUnsupportedFeatures() {
- for (CodeGenProcModel &ProcModel : ProcModels) {
- for (Record *Pred : ProcModel.ModelDef->getValueAsListOfDefs("UnsupportedFeatures")) {
- ProcModel.UnsupportedFeaturesDefs.push_back(Pred);
- }
- }
+ for (CodeGenProcModel &ProcModel : ProcModels)
+ append_range(
+ ProcModel.UnsupportedFeaturesDefs,
+ ProcModel.ModelDef->getValueAsListOfDefs("UnsupportedFeatures"));
}
/// Infer new classes from existing classes. In the process, this may create new
@@ -1250,7 +1247,7 @@ void CodeGenSchedModels::inferFromItinClass(Record *ItinClassDef,
bool HasMatch = false;
for (const Record *Rec : PM.ItinRWDefs) {
RecVec Matched = Rec->getValueAsListOfDefs("MatchedItinClasses");
- if (!std::count(Matched.begin(), Matched.end(), ItinClassDef))
+ if (!llvm::is_contained(Matched, ItinClassDef))
continue;
if (HasMatch)
PrintFatalError(Rec->getLoc(), "Duplicate itinerary class "
@@ -1283,6 +1280,7 @@ void CodeGenSchedModels::inferFromInstRWs(unsigned SCIdx) {
findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
unsigned PIdx = getProcModel(Rec->getValueAsDef("SchedModel")).Index;
inferFromRW(Writes, Reads, SCIdx, PIdx); // May mutate SchedClasses.
+ SchedClasses[SCIdx].InstRWProcIndices.insert(PIdx);
}
}
@@ -1315,7 +1313,13 @@ struct PredTransition {
SmallVector<PredCheck, 4> PredTerm;
SmallVector<SmallVector<unsigned,4>, 16> WriteSequences;
SmallVector<SmallVector<unsigned,4>, 16> ReadSequences;
- SmallVector<unsigned, 4> ProcIndices;
+ unsigned ProcIndex = 0;
+
+ PredTransition() = default;
+ PredTransition(ArrayRef<PredCheck> PT, unsigned ProcId) {
+ PredTerm.assign(PT.begin(), PT.end());
+ ProcIndex = ProcId;
+ }
};
// Encapsulate a set of partially constructed transitions.
@@ -1328,17 +1332,18 @@ public:
PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
- void substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
+ bool substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
bool IsRead, unsigned StartIdx);
- void substituteVariants(const PredTransition &Trans);
+ bool substituteVariants(const PredTransition &Trans);
#ifndef NDEBUG
void dump() const;
#endif
private:
- bool mutuallyExclusive(Record *PredDef, ArrayRef<PredCheck> Term);
+ bool mutuallyExclusive(Record *PredDef, ArrayRef<Record *> Preds,
+ ArrayRef<PredCheck> Term);
void getIntersectingVariants(
const CodeGenSchedRW &SchedRW, unsigned TransIdx,
std::vector<TransVariant> &IntersectingVariants);
@@ -1357,6 +1362,7 @@ private:
// are always checked in the order they are defined in the .td file. Later
// conditions implicitly negate any prior condition.
bool PredTransitions::mutuallyExclusive(Record *PredDef,
+ ArrayRef<Record *> Preds,
ArrayRef<PredCheck> Term) {
for (const PredCheck &PC: Term) {
if (PC.Predicate == PredDef)
@@ -1367,49 +1373,49 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
RecVec Variants = SchedRW.TheDef->getValueAsListOfDefs("Variants");
if (any_of(Variants, [PredDef](const Record *R) {
return R->getValueAsDef("Predicate") == PredDef;
- }))
- return true;
- }
- return false;
-}
-
-static bool hasAliasedVariants(const CodeGenSchedRW &RW,
- CodeGenSchedModels &SchedModels) {
- if (RW.HasVariants)
- return true;
-
- for (Record *Alias : RW.Aliases) {
- const CodeGenSchedRW &AliasRW =
- SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW"));
- if (AliasRW.HasVariants)
+ })) {
+ // To check if PredDef is mutually exclusive with PC we also need to
+ // check that PC.Predicate is exclusive with all predicates from variant
+ // we're expanding. Consider following RW sequence with two variants
+ // (1 & 2), where A, B and C are predicates from corresponding SchedVars:
+ //
+ // 1:A/B - 2:C/B
+ //
+ // Here C is not mutually exclusive with variant (1), because A doesn't
+ // exist in variant (2). This means we have possible transitions from A
+ // to C and from A to B, and fully expanded sequence would look like:
+ //
+ // if (A & C) return ...;
+ // if (A & B) return ...;
+ // if (B) return ...;
+ //
+ // Now let's consider another sequence:
+ //
+ // 1:A/B - 2:A/B
+ //
+ // Here A in variant (2) is mutually exclusive with variant (1), because
+ // A also exists in (2). This means A->B transition is impossible and
+ // expanded sequence would look like:
+ //
+ // if (A) return ...;
+ // if (B) return ...;
+ if (!count(Preds, PC.Predicate))
+ continue;
return true;
- if (AliasRW.IsSequence) {
- IdxVec ExpandedRWs;
- SchedModels.expandRWSequence(AliasRW.Index, ExpandedRWs, AliasRW.IsRead);
- for (unsigned SI : ExpandedRWs) {
- if (hasAliasedVariants(SchedModels.getSchedRW(SI, AliasRW.IsRead),
- SchedModels))
- return true;
- }
}
}
return false;
}
-static bool hasVariant(ArrayRef<PredTransition> Transitions,
- CodeGenSchedModels &SchedModels) {
- for (const PredTransition &PTI : Transitions) {
- for (const SmallVectorImpl<unsigned> &WSI : PTI.WriteSequences)
- for (unsigned WI : WSI)
- if (hasAliasedVariants(SchedModels.getSchedWrite(WI), SchedModels))
- return true;
-
- for (const SmallVectorImpl<unsigned> &RSI : PTI.ReadSequences)
- for (unsigned RI : RSI)
- if (hasAliasedVariants(SchedModels.getSchedRead(RI), SchedModels))
- return true;
+static std::vector<Record *> getAllPredicates(ArrayRef<TransVariant> Variants,
+ unsigned ProcId) {
+ std::vector<Record *> Preds;
+ for (auto &Variant : Variants) {
+ if (!Variant.VarOrSeqDef->isSubClassOf("SchedVar"))
+ continue;
+ Preds.push_back(Variant.VarOrSeqDef->getValueAsDef("Predicate"));
}
- return false;
+ return Preds;
}
// Populate IntersectingVariants with any variants or aliased sequences of the
@@ -1428,12 +1434,14 @@ void PredTransitions::getIntersectingVariants(
Record *ModelDef = SchedRW.TheDef->getValueAsDef("SchedModel");
VarProcIdx = SchedModels.getProcModel(ModelDef).Index;
}
- // Push each variant. Assign TransVecIdx later.
- const RecVec VarDefs = SchedRW.TheDef->getValueAsListOfDefs("Variants");
- for (Record *VarDef : VarDefs)
- Variants.emplace_back(VarDef, SchedRW.Index, VarProcIdx, 0);
- if (VarProcIdx == 0)
- GenericRW = true;
+ if (VarProcIdx == 0 || VarProcIdx == TransVec[TransIdx].ProcIndex) {
+ // Push each variant. Assign TransVecIdx later.
+ const RecVec VarDefs = SchedRW.TheDef->getValueAsListOfDefs("Variants");
+ for (Record *VarDef : VarDefs)
+ Variants.emplace_back(VarDef, SchedRW.Index, VarProcIdx, 0);
+ if (VarProcIdx == 0)
+ GenericRW = true;
+ }
}
for (RecIter AI = SchedRW.Aliases.begin(), AE = SchedRW.Aliases.end();
AI != AE; ++AI) {
@@ -1445,6 +1453,17 @@ void PredTransitions::getIntersectingVariants(
Record *ModelDef = (*AI)->getValueAsDef("SchedModel");
AliasProcIdx = SchedModels.getProcModel(ModelDef).Index;
}
+ if (AliasProcIdx && AliasProcIdx != TransVec[TransIdx].ProcIndex)
+ continue;
+ if (!Variants.empty()) {
+ const CodeGenProcModel &PM =
+ *(SchedModels.procModelBegin() + AliasProcIdx);
+ PrintFatalError((*AI)->getLoc(),
+ "Multiple variants defined for processor " +
+ PM.ModelName +
+ " Ensure only one SchedAlias exists per RW.");
+ }
+
const CodeGenSchedRW &AliasRW =
SchedModels.getSchedRW((*AI)->getValueAsDef("AliasRW"));
@@ -1458,29 +1477,17 @@ void PredTransitions::getIntersectingVariants(
if (AliasProcIdx == 0)
GenericRW = true;
}
+ std::vector<Record *> AllPreds =
+ getAllPredicates(Variants, TransVec[TransIdx].ProcIndex);
for (TransVariant &Variant : Variants) {
// Don't expand variants if the processor models don't intersect.
// A zero processor index means any processor.
- SmallVectorImpl<unsigned> &ProcIndices = TransVec[TransIdx].ProcIndices;
- if (ProcIndices[0] && Variant.ProcIdx) {
- unsigned Cnt = std::count(ProcIndices.begin(), ProcIndices.end(),
- Variant.ProcIdx);
- if (!Cnt)
- continue;
- if (Cnt > 1) {
- const CodeGenProcModel &PM =
- *(SchedModels.procModelBegin() + Variant.ProcIdx);
- PrintFatalError(Variant.VarOrSeqDef->getLoc(),
- "Multiple variants defined for processor " +
- PM.ModelName +
- " Ensure only one SchedAlias exists per RW.");
- }
- }
if (Variant.VarOrSeqDef->isSubClassOf("SchedVar")) {
Record *PredDef = Variant.VarOrSeqDef->getValueAsDef("Predicate");
- if (mutuallyExclusive(PredDef, TransVec[TransIdx].PredTerm))
+ if (mutuallyExclusive(PredDef, AllPreds, TransVec[TransIdx].PredTerm))
continue;
}
+
if (IntersectingVariants.empty()) {
// The first variant builds on the existing transition.
Variant.TransVecIdx = TransIdx;
@@ -1507,9 +1514,6 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
// If this operand transition is reached through a processor-specific alias,
// then the whole transition is specific to this processor.
- if (VInfo.ProcIdx != 0)
- Trans.ProcIndices.assign(1, VInfo.ProcIdx);
-
IdxVec SelectedRWs;
if (VInfo.VarOrSeqDef->isSubClassOf("SchedVar")) {
Record *PredDef = VInfo.VarOrSeqDef->getValueAsDef("Predicate");
@@ -1530,6 +1534,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
if (SchedRW.IsVariadic) {
unsigned OperIdx = RWSequences.size()-1;
// Make N-1 copies of this transition's last sequence.
+ RWSequences.reserve(RWSequences.size() + SelectedRWs.size() - 1);
RWSequences.insert(RWSequences.end(), SelectedRWs.size() - 1,
RWSequences[OperIdx]);
// Push each of the N elements of the SelectedRWs onto a copy of the last
@@ -1543,8 +1548,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
ExpandedRWs.push_back(*RWI);
else
SchedModels.expandRWSequence(*RWI, ExpandedRWs, IsRead);
- RWSequences[OperIdx].insert(RWSequences[OperIdx].end(),
- ExpandedRWs.begin(), ExpandedRWs.end());
+ llvm::append_range(RWSequences[OperIdx], ExpandedRWs);
}
assert(OperIdx == RWSequences.size() && "missed a sequence");
}
@@ -1560,7 +1564,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
else
SchedModels.expandRWSequence(*RWI, ExpandedRWs, IsRead);
}
- Seq.insert(Seq.end(), ExpandedRWs.begin(), ExpandedRWs.end());
+ llvm::append_range(Seq, ExpandedRWs);
}
}
@@ -1568,9 +1572,9 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
// operand. StartIdx is an index into TransVec where partial results
// starts. RWSeq must be applied to all transitions between StartIdx and the end
// of TransVec.
-void PredTransitions::substituteVariantOperand(
- const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
-
+bool PredTransitions::substituteVariantOperand(
+ const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
+ bool Subst = false;
// Visit each original RW within the current sequence.
for (SmallVectorImpl<unsigned>::const_iterator
RWI = RWSeq.begin(), RWE = RWSeq.end(); RWI != RWE; ++RWI) {
@@ -1580,27 +1584,25 @@ void PredTransitions::substituteVariantOperand(
// revisited (TransEnd must be loop invariant).
for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
TransIdx != TransEnd; ++TransIdx) {
- // In the common case, push RW onto the current operand's sequence.
- if (!hasAliasedVariants(SchedRW, SchedModels)) {
- if (IsRead)
- TransVec[TransIdx].ReadSequences.back().push_back(*RWI);
- else
- TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
- continue;
- }
// Distribute this partial PredTransition across intersecting variants.
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
std::vector<TransVariant> IntersectingVariants;
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
// Now expand each variant on top of its copy of the transition.
- for (std::vector<TransVariant>::const_iterator
- IVI = IntersectingVariants.begin(),
- IVE = IntersectingVariants.end();
- IVI != IVE; ++IVI) {
- pushVariant(*IVI, IsRead);
+ for (const TransVariant &IV : IntersectingVariants)
+ pushVariant(IV, IsRead);
+ if (IntersectingVariants.empty()) {
+ if (IsRead)
+ TransVec[TransIdx].ReadSequences.back().push_back(*RWI);
+ else
+ TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
+ continue;
+ } else {
+ Subst = true;
}
}
}
+ return Subst;
}
// For each variant of a Read/Write in Trans, substitute the sequence of
@@ -1609,13 +1611,13 @@ void PredTransitions::substituteVariantOperand(
// predicates should result in linear growth in the total number variants.
//
// This is one step in a breadth-first search of nested variants.
-void PredTransitions::substituteVariants(const PredTransition &Trans) {
+bool PredTransitions::substituteVariants(const PredTransition &Trans) {
// Build up a set of partial results starting at the back of
// PredTransitions. Remember the first new transition.
unsigned StartIdx = TransVec.size();
- TransVec.emplace_back();
- TransVec.back().PredTerm = Trans.PredTerm;
- TransVec.back().ProcIndices = Trans.ProcIndices;
+ bool Subst = false;
+ assert(Trans.ProcIndex != 0);
+ TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndex);
// Visit each original write sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
@@ -1626,7 +1628,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->WriteSequences.emplace_back();
}
- substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
+ Subst |= substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
}
// Visit each original read sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
@@ -1637,10 +1639,37 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->ReadSequences.emplace_back();
}
- substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
+ Subst |= substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
}
+ return Subst;
+}
+
+static void addSequences(CodeGenSchedModels &SchedModels,
+ const SmallVectorImpl<SmallVector<unsigned, 4>> &Seqs,
+ IdxVec &Result, bool IsRead) {
+ for (const auto &S : Seqs)
+ if (!S.empty())
+ Result.push_back(SchedModels.findOrInsertRW(S, IsRead));
}
+#ifndef NDEBUG
+static void dumpRecVec(const RecVec &RV) {
+ for (const Record *R : RV)
+ dbgs() << R->getName() << ", ";
+}
+#endif
+
+static void dumpTransition(const CodeGenSchedModels &SchedModels,
+ const CodeGenSchedClass &FromSC,
+ const CodeGenSchedTransition &SCTrans,
+ const RecVec &Preds) {
+ LLVM_DEBUG(dbgs() << "Adding transition from " << FromSC.Name << "("
+ << FromSC.Index << ") to "
+ << SchedModels.getSchedClass(SCTrans.ToClassIdx).Name << "("
+ << SCTrans.ToClassIdx << ") on pred term: (";
+ dumpRecVec(Preds);
+ dbgs() << ") on processor (" << SCTrans.ProcIndex << ")\n");
+}
// Create a new SchedClass for each variant found by inferFromRW. Pass
static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
unsigned FromClassIdx,
@@ -1649,21 +1678,25 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
// requires creating a new SchedClass.
for (ArrayRef<PredTransition>::iterator
I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
- IdxVec OperWritesVariant;
- transform(I->WriteSequences, std::back_inserter(OperWritesVariant),
- [&SchedModels](ArrayRef<unsigned> WS) {
- return SchedModels.findOrInsertRW(WS, /*IsRead=*/false);
- });
- IdxVec OperReadsVariant;
- transform(I->ReadSequences, std::back_inserter(OperReadsVariant),
- [&SchedModels](ArrayRef<unsigned> RS) {
- return SchedModels.findOrInsertRW(RS, /*IsRead=*/true);
- });
+ // Variant expansion (substituteVariants) may create unconditional
+ // transitions. We don't need to build sched classes for them.
+ if (I->PredTerm.empty())
+ continue;
+ IdxVec OperWritesVariant, OperReadsVariant;
+ addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false);
+ addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true);
CodeGenSchedTransition SCTrans;
+
+ // Transition should not contain processor indices already assigned to
+ // InstRWs in this scheduling class.
+ const CodeGenSchedClass &FromSC = SchedModels.getSchedClass(FromClassIdx);
+ if (FromSC.InstRWProcIndices.count(I->ProcIndex))
+ continue;
+ SCTrans.ProcIndex = I->ProcIndex;
SCTrans.ToClassIdx =
- SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant,
- OperReadsVariant, I->ProcIndices);
- SCTrans.ProcIndices.assign(I->ProcIndices.begin(), I->ProcIndices.end());
+ SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant,
+ OperReadsVariant, I->ProcIndex);
+
// The final PredTerm is unique set of predicates guarding the transition.
RecVec Preds;
transform(I->PredTerm, std::back_inserter(Preds),
@@ -1671,12 +1704,36 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
return P.Predicate;
});
Preds.erase(std::unique(Preds.begin(), Preds.end()), Preds.end());
+ dumpTransition(SchedModels, FromSC, SCTrans, Preds);
SCTrans.PredTerm = std::move(Preds);
SchedModels.getSchedClass(FromClassIdx)
.Transitions.push_back(std::move(SCTrans));
}
}
+std::vector<unsigned> CodeGenSchedModels::getAllProcIndices() const {
+ std::vector<unsigned> ProcIdVec;
+ for (const auto &PM : ProcModelMap)
+ if (PM.second != 0)
+ ProcIdVec.push_back(PM.second);
+ // The order of the keys (Record pointers) of ProcModelMap are not stable.
+ // Sort to stabalize the values.
+ llvm::sort(ProcIdVec);
+ return ProcIdVec;
+}
+
+static std::vector<PredTransition>
+makePerProcessorTransitions(const PredTransition &Trans,
+ ArrayRef<unsigned> ProcIndices) {
+ std::vector<PredTransition> PerCpuTransVec;
+ for (unsigned ProcId : ProcIndices) {
+ assert(ProcId != 0);
+ PerCpuTransVec.push_back(Trans);
+ PerCpuTransVec.back().ProcIndex = ProcId;
+ }
+ return PerCpuTransVec;
+}
+
// Create new SchedClasses for the given ReadWrite list. If any of the
// ReadWrites refers to a SchedVariant, create a new SchedClass for each variant
// of the ReadWrite list, following Aliases if necessary.
@@ -1686,13 +1743,10 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
ArrayRef<unsigned> ProcIndices) {
LLVM_DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices);
dbgs() << ") ");
-
// Create a seed transition with an empty PredTerm and the expanded sequences
// of SchedWrites for the current SchedClass.
std::vector<PredTransition> LastTransitions;
LastTransitions.emplace_back();
- LastTransitions.back().ProcIndices.append(ProcIndices.begin(),
- ProcIndices.end());
for (unsigned WriteIdx : OperWrites) {
IdxVec WriteSeq;
@@ -1713,18 +1767,21 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
}
LLVM_DEBUG(dbgs() << '\n');
+ LastTransitions = makePerProcessorTransitions(
+ LastTransitions[0], llvm::is_contained(ProcIndices, 0)
+ ? ArrayRef<unsigned>(getAllProcIndices())
+ : ProcIndices);
// Collect all PredTransitions for individual operands.
// Iterate until no variant writes remain.
- while (hasVariant(LastTransitions, *this)) {
+ bool SubstitutedAny;
+ do {
+ SubstitutedAny = false;
PredTransitions Transitions(*this);
for (const PredTransition &Trans : LastTransitions)
- Transitions.substituteVariants(Trans);
+ SubstitutedAny |= Transitions.substituteVariants(Trans);
LLVM_DEBUG(Transitions.dump());
LastTransitions.swap(Transitions.TransVec);
- }
- // If the first transition has no variants, nothing to do.
- if (LastTransitions[0].PredTerm.empty())
- return;
+ } while (SubstitutedAny);
// WARNING: We are about to mutate the SchedClasses vector. Do not refer to
// OperWrites, OperReads, or ProcIndices after calling inferFromTransitions.
@@ -1767,8 +1824,7 @@ void CodeGenSchedModels::verifyProcResourceGroups(CodeGenProcModel &PM) {
OtherUnits.begin(), OtherUnits.end())
!= CheckUnits.end()) {
// CheckUnits and OtherUnits overlap
- OtherUnits.insert(OtherUnits.end(), CheckUnits.begin(),
- CheckUnits.end());
+ llvm::append_range(OtherUnits, CheckUnits);
if (!hasSuperGroup(OtherUnits, PM)) {
PrintFatalError((PM.ProcResourceDefs[i])->getLoc(),
"proc resource group overlaps with "
@@ -1990,7 +2046,7 @@ void CodeGenSchedModels::collectItinProcResources(Record *ItinClassDef) {
for (RecIter II = PM.ItinRWDefs.begin(), IE = PM.ItinRWDefs.end();
II != IE; ++II) {
RecVec Matched = (*II)->getValueAsListOfDefs("MatchedItinClasses");
- if (!std::count(Matched.begin(), Matched.end(), ItinClassDef))
+ if (!llvm::is_contained(Matched, ItinClassDef))
continue;
if (HasMatch)
PrintFatalError((*II)->getLoc(), "Duplicate itinerary class "
@@ -2191,13 +2247,14 @@ void CodeGenSchedClass::dump(const CodeGenSchedModels* SchedModels) const {
dbgs().indent(10);
}
}
- dbgs() << "\n ProcIdx: "; dumpIdxVec(ProcIndices); dbgs() << '\n';
+ dbgs() << "\n ProcIdx: "; dumpIdxVec(ProcIndices);
if (!Transitions.empty()) {
dbgs() << "\n Transitions for Proc ";
for (const CodeGenSchedTransition &Transition : Transitions) {
- dumpIdxVec(Transition.ProcIndices);
+ dbgs() << Transition.ProcIndex << ", ";
}
}
+ dbgs() << '\n';
}
void PredTransitions::dump() const {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.h
index c487d142d46c..9020447c940b 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenSchedule.h
@@ -16,10 +16,12 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
+#include <map>
namespace llvm {
@@ -94,7 +96,7 @@ struct CodeGenSchedRW {
/// Represent a transition between SchedClasses induced by SchedVariant.
struct CodeGenSchedTransition {
unsigned ToClassIdx;
- IdxVec ProcIndices;
+ unsigned ProcIndex;
RecVec PredTerm;
};
@@ -139,6 +141,8 @@ struct CodeGenSchedClass {
// Instructions should be ignored by this class because they have been split
// off to join another inferred class.
RecVec InstRWs;
+ // InstRWs processor indices. Filled in inferFromInstRWs
+ DenseSet<unsigned> InstRWProcIndices;
CodeGenSchedClass(unsigned Index, std::string Name, Record *ItinClassDef)
: Index(Index), Name(std::move(Name)), ItinClassDef(ItinClassDef) {}
@@ -358,8 +362,7 @@ public:
OpcodeGroup(OpcodeGroup &&Other) = default;
void addOpcode(const Record *Opcode) {
- assert(std::find(Opcodes.begin(), Opcodes.end(), Opcode) == Opcodes.end() &&
- "Opcode already in set!");
+ assert(!llvm::is_contained(Opcodes, Opcode) && "Opcode already in set!");
Opcodes.push_back(Opcode);
}
@@ -407,6 +410,8 @@ public:
ArrayRef<OpcodeGroup> getGroups() const { return Groups; }
};
+using ProcModelMapTy = DenseMap<const Record *, unsigned>;
+
/// Top level container for machine model data.
class CodeGenSchedModels {
RecordKeeper &Records;
@@ -419,7 +424,6 @@ class CodeGenSchedModels {
std::vector<CodeGenProcModel> ProcModels;
// Map Processor's MachineModel or ProcItin to a CodeGenProcModel index.
- using ProcModelMapTy = DenseMap<Record*, unsigned>;
ProcModelMapTy ProcModelMap;
// Per-operand SchedReadWrite types.
@@ -441,6 +445,7 @@ class CodeGenSchedModels {
InstClassMapTy InstrClassMap;
std::vector<STIPredicateFunction> STIPredicates;
+ std::vector<unsigned> getAllProcIndices() const;
public:
CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp
index 891a08ea590e..8f6d212df5ec 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -76,6 +76,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::f128: return "MVT::f128";
case MVT::ppcf128: return "MVT::ppcf128";
case MVT::x86mmx: return "MVT::x86mmx";
+ case MVT::x86amx: return "MVT::x86amx";
case MVT::Glue: return "MVT::Glue";
case MVT::isVoid: return "MVT::isVoid";
case MVT::v1i1: return "MVT::v1i1";
@@ -86,6 +87,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v32i1: return "MVT::v32i1";
case MVT::v64i1: return "MVT::v64i1";
case MVT::v128i1: return "MVT::v128i1";
+ case MVT::v256i1: return "MVT::v256i1";
case MVT::v512i1: return "MVT::v512i1";
case MVT::v1024i1: return "MVT::v1024i1";
case MVT::v1i8: return "MVT::v1i8";
@@ -126,6 +128,9 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v8i64: return "MVT::v8i64";
case MVT::v16i64: return "MVT::v16i64";
case MVT::v32i64: return "MVT::v32i64";
+ case MVT::v64i64: return "MVT::v64i64";
+ case MVT::v128i64: return "MVT::v128i64";
+ case MVT::v256i64: return "MVT::v256i64";
case MVT::v1i128: return "MVT::v1i128";
case MVT::v2f16: return "MVT::v2f16";
case MVT::v3f16: return "MVT::v3f16";
@@ -163,6 +168,9 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v8f64: return "MVT::v8f64";
case MVT::v16f64: return "MVT::v16f64";
case MVT::v32f64: return "MVT::v32f64";
+ case MVT::v64f64: return "MVT::v64f64";
+ case MVT::v128f64: return "MVT::v128f64";
+ case MVT::v256f64: return "MVT::v256f64";
case MVT::nxv1i1: return "MVT::nxv1i1";
case MVT::nxv2i1: return "MVT::nxv2i1";
case MVT::nxv4i1: return "MVT::nxv4i1";
@@ -204,21 +212,22 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::nxv2bf16: return "MVT::nxv2bf16";
case MVT::nxv4bf16: return "MVT::nxv4bf16";
case MVT::nxv8bf16: return "MVT::nxv8bf16";
- case MVT::nxv1f32: return "MVT::nxv1f32";
- case MVT::nxv2f32: return "MVT::nxv2f32";
- case MVT::nxv4f32: return "MVT::nxv4f32";
- case MVT::nxv8f32: return "MVT::nxv8f32";
- case MVT::nxv16f32: return "MVT::nxv16f32";
- case MVT::nxv1f64: return "MVT::nxv1f64";
- case MVT::nxv2f64: return "MVT::nxv2f64";
- case MVT::nxv4f64: return "MVT::nxv4f64";
- case MVT::nxv8f64: return "MVT::nxv8f64";
- case MVT::token: return "MVT::token";
- case MVT::Metadata: return "MVT::Metadata";
- case MVT::iPTR: return "MVT::iPTR";
- case MVT::iPTRAny: return "MVT::iPTRAny";
- case MVT::Untyped: return "MVT::Untyped";
- case MVT::exnref: return "MVT::exnref";
+ case MVT::nxv1f32: return "MVT::nxv1f32";
+ case MVT::nxv2f32: return "MVT::nxv2f32";
+ case MVT::nxv4f32: return "MVT::nxv4f32";
+ case MVT::nxv8f32: return "MVT::nxv8f32";
+ case MVT::nxv16f32: return "MVT::nxv16f32";
+ case MVT::nxv1f64: return "MVT::nxv1f64";
+ case MVT::nxv2f64: return "MVT::nxv2f64";
+ case MVT::nxv4f64: return "MVT::nxv4f64";
+ case MVT::nxv8f64: return "MVT::nxv8f64";
+ case MVT::token: return "MVT::token";
+ case MVT::Metadata: return "MVT::Metadata";
+ case MVT::iPTR: return "MVT::iPTR";
+ case MVT::iPTRAny: return "MVT::iPTRAny";
+ case MVT::Untyped: return "MVT::Untyped";
+ case MVT::funcref: return "MVT::funcref";
+ case MVT::externref: return "MVT::externref";
default: llvm_unreachable("ILLEGAL VALUE TYPE!");
}
}
@@ -251,19 +260,27 @@ CodeGenTarget::CodeGenTarget(RecordKeeper &records)
CodeGenTarget::~CodeGenTarget() {
}
-const StringRef CodeGenTarget::getName() const {
- return TargetRec->getName();
-}
+StringRef CodeGenTarget::getName() const { return TargetRec->getName(); }
+/// getInstNamespace - Find and return the target machine's instruction
+/// namespace. The namespace is cached because it is requested multiple times.
StringRef CodeGenTarget::getInstNamespace() const {
- for (const CodeGenInstruction *Inst : getInstructionsByEnumValue()) {
- // Make sure not to pick up "TargetOpcode" by accidentally getting
- // the namespace off the PHI instruction or something.
- if (Inst->Namespace != "TargetOpcode")
- return Inst->Namespace;
+ if (InstNamespace.empty()) {
+ for (const CodeGenInstruction *Inst : getInstructionsByEnumValue()) {
+ // We are not interested in the "TargetOpcode" namespace.
+ if (Inst->Namespace != "TargetOpcode") {
+ InstNamespace = Inst->Namespace;
+ break;
+ }
+ }
}
- return "";
+ return InstNamespace;
+}
+
+StringRef CodeGenTarget::getRegNamespace() const {
+ auto &RegClasses = RegBank->getRegClasses();
+ return RegClasses.size() > 0 ? RegClasses.front().Namespace : "";
}
Record *CodeGenTarget::getInstructionSet() const {
@@ -324,7 +341,8 @@ CodeGenRegBank &CodeGenTarget::getRegBank() const {
Optional<CodeGenRegisterClass *>
CodeGenTarget::getSuperRegForSubReg(const ValueTypeByHwMode &ValueTy,
CodeGenRegBank &RegBank,
- const CodeGenSubRegIndex *SubIdx) const {
+ const CodeGenSubRegIndex *SubIdx,
+ bool MustBeAllocatable) const {
std::vector<CodeGenRegisterClass *> Candidates;
auto &RegClasses = RegBank.getRegClasses();
@@ -337,10 +355,11 @@ CodeGenTarget::getSuperRegForSubReg(const ValueTypeByHwMode &ValueTy,
continue;
// We have a class. Check if it supports this value type.
- if (llvm::none_of(SubClassWithSubReg->VTs,
- [&ValueTy](const ValueTypeByHwMode &ClassVT) {
- return ClassVT == ValueTy;
- }))
+ if (!llvm::is_contained(SubClassWithSubReg->VTs, ValueTy))
+ continue;
+
+ // If necessary, check that it is allocatable.
+ if (MustBeAllocatable && !SubClassWithSubReg->Allocatable)
continue;
// We have a register class which supports both the value type and
@@ -376,11 +395,7 @@ void CodeGenTarget::ReadRegAltNameIndices() const {
/// getRegisterByName - If there is a register with the specific AsmName,
/// return it.
const CodeGenRegister *CodeGenTarget::getRegisterByName(StringRef Name) const {
- const StringMap<CodeGenRegister*> &Regs = getRegBank().getRegistersByName();
- StringMap<CodeGenRegister*>::const_iterator I = Regs.find(Name);
- if (I == Regs.end())
- return nullptr;
- return I->second;
+ return getRegBank().getRegistersByName().lookup(Name);
}
std::vector<ValueTypeByHwMode> CodeGenTarget::getRegisterVTs(Record *R)
@@ -390,7 +405,7 @@ std::vector<ValueTypeByHwMode> CodeGenTarget::getRegisterVTs(Record *R)
for (const auto &RC : getRegBank().getRegClasses()) {
if (RC.contains(Reg)) {
ArrayRef<ValueTypeByHwMode> InVTs = RC.getValueTypes();
- Result.insert(Result.end(), InVTs.begin(), InVTs.end());
+ llvm::append_range(Result, InVTs);
}
}
@@ -403,7 +418,7 @@ std::vector<ValueTypeByHwMode> CodeGenTarget::getRegisterVTs(Record *R)
void CodeGenTarget::ReadLegalValueTypes() const {
for (const auto &RC : getRegBank().getRegClasses())
- LegalValueTypes.insert(LegalValueTypes.end(), RC.VTs.begin(), RC.VTs.end());
+ llvm::append_range(LegalValueTypes, RC.VTs);
// Remove duplicates.
llvm::sort(LegalValueTypes);
@@ -419,8 +434,6 @@ CodeGenSchedModels &CodeGenTarget::getSchedModels() const {
}
void CodeGenTarget::ReadInstructions() const {
- NamedRegionTimer T("Read Instructions", "Time spent reading instructions",
- "CodeGenTarget", "CodeGenTarget", TimeRegions);
std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
if (Insts.size() <= 2)
PrintFatalError("No 'Instruction' subclasses defined!");
@@ -599,12 +612,19 @@ ComplexPattern::ComplexPattern(Record *R) {
//===----------------------------------------------------------------------===//
CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
- std::vector<Record*> Defs = RC.getAllDerivedDefinitions("Intrinsic");
+ std::vector<Record *> IntrProperties =
+ RC.getAllDerivedDefinitions("IntrinsicProperty");
+ std::vector<Record *> DefaultProperties;
+ for (Record *Rec : IntrProperties)
+ if (Rec->getValueAsBit("IsDefault"))
+ DefaultProperties.push_back(Rec);
+
+ std::vector<Record *> Defs = RC.getAllDerivedDefinitions("Intrinsic");
Intrinsics.reserve(Defs.size());
for (unsigned I = 0, e = Defs.size(); I != e; ++I)
- Intrinsics.push_back(CodeGenIntrinsic(Defs[I]));
+ Intrinsics.push_back(CodeGenIntrinsic(Defs[I], DefaultProperties));
llvm::sort(Intrinsics,
[](const CodeGenIntrinsic &LHS, const CodeGenIntrinsic &RHS) {
@@ -620,7 +640,8 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
Targets.back().Count = Intrinsics.size() - Targets.back().Offset;
}
-CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
+CodeGenIntrinsic::CodeGenIntrinsic(Record *R,
+ std::vector<Record *> DefaultProperties) {
TheDef = R;
std::string DefName = std::string(R->getName());
ArrayRef<SMLoc> DefLoc = R->getLoc();
@@ -773,70 +794,12 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
assert(Property->isSubClassOf("IntrinsicProperty") &&
"Expected a property!");
- if (Property->getName() == "IntrNoMem")
- ModRef = NoMem;
- else if (Property->getName() == "IntrReadMem")
- ModRef = ModRefBehavior(ModRef & ~MR_Mod);
- else if (Property->getName() == "IntrWriteMem")
- ModRef = ModRefBehavior(ModRef & ~MR_Ref);
- else if (Property->getName() == "IntrArgMemOnly")
- ModRef = ModRefBehavior((ModRef & ~MR_Anywhere) | MR_ArgMem);
- else if (Property->getName() == "IntrInaccessibleMemOnly")
- ModRef = ModRefBehavior((ModRef & ~MR_Anywhere) | MR_InaccessibleMem);
- else if (Property->getName() == "IntrInaccessibleMemOrArgMemOnly")
- ModRef = ModRefBehavior((ModRef & ~MR_Anywhere) | MR_ArgMem |
- MR_InaccessibleMem);
- else if (Property->getName() == "Commutative")
- isCommutative = true;
- else if (Property->getName() == "Throws")
- canThrow = true;
- else if (Property->getName() == "IntrNoDuplicate")
- isNoDuplicate = true;
- else if (Property->getName() == "IntrConvergent")
- isConvergent = true;
- else if (Property->getName() == "IntrNoReturn")
- isNoReturn = true;
- else if (Property->getName() == "IntrNoSync")
- isNoSync = true;
- else if (Property->getName() == "IntrNoFree")
- isNoFree = true;
- else if (Property->getName() == "IntrWillReturn")
- isWillReturn = true;
- else if (Property->getName() == "IntrCold")
- isCold = true;
- else if (Property->getName() == "IntrSpeculatable")
- isSpeculatable = true;
- else if (Property->getName() == "IntrHasSideEffects")
- hasSideEffects = true;
- else if (Property->isSubClassOf("NoCapture")) {
- unsigned ArgNo = Property->getValueAsInt("ArgNo");
- ArgumentAttributes.emplace_back(ArgNo, NoCapture, 0);
- } else if (Property->isSubClassOf("NoAlias")) {
- unsigned ArgNo = Property->getValueAsInt("ArgNo");
- ArgumentAttributes.emplace_back(ArgNo, NoAlias, 0);
- } else if (Property->isSubClassOf("Returned")) {
- unsigned ArgNo = Property->getValueAsInt("ArgNo");
- ArgumentAttributes.emplace_back(ArgNo, Returned, 0);
- } else if (Property->isSubClassOf("ReadOnly")) {
- unsigned ArgNo = Property->getValueAsInt("ArgNo");
- ArgumentAttributes.emplace_back(ArgNo, ReadOnly, 0);
- } else if (Property->isSubClassOf("WriteOnly")) {
- unsigned ArgNo = Property->getValueAsInt("ArgNo");
- ArgumentAttributes.emplace_back(ArgNo, WriteOnly, 0);
- } else if (Property->isSubClassOf("ReadNone")) {
- unsigned ArgNo = Property->getValueAsInt("ArgNo");
- ArgumentAttributes.emplace_back(ArgNo, ReadNone, 0);
- } else if (Property->isSubClassOf("ImmArg")) {
- unsigned ArgNo = Property->getValueAsInt("ArgNo");
- ArgumentAttributes.emplace_back(ArgNo, ImmArg, 0);
- } else if (Property->isSubClassOf("Align")) {
- unsigned ArgNo = Property->getValueAsInt("ArgNo");
- uint64_t Align = Property->getValueAsInt("Align");
- ArgumentAttributes.emplace_back(ArgNo, Alignment, Align);
- } else
- llvm_unreachable("Unknown property!");
+ setProperty(Property);
}
+ // Set default properties to true.
+ setDefaultProperties(R, DefaultProperties);
+
// Also record the SDPatternOperator Properties.
Properties = parseSDPatternOperatorProperties(R);
@@ -844,6 +807,92 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
llvm::sort(ArgumentAttributes);
}
+void CodeGenIntrinsic::setDefaultProperties(
+ Record *R, std::vector<Record *> DefaultProperties) {
+ // opt-out of using default attributes.
+ if (R->getValueAsBit("DisableDefaultAttributes"))
+ return;
+
+ for (Record *Rec : DefaultProperties)
+ setProperty(Rec);
+}
+
+void CodeGenIntrinsic::setProperty(Record *R) {
+ if (R->getName() == "IntrNoMem")
+ ModRef = NoMem;
+ else if (R->getName() == "IntrReadMem") {
+ if (!(ModRef & MR_Ref))
+ PrintFatalError(TheDef->getLoc(),
+ Twine("IntrReadMem cannot be used after IntrNoMem or "
+ "IntrWriteMem. Default is ReadWrite"));
+ ModRef = ModRefBehavior(ModRef & ~MR_Mod);
+ } else if (R->getName() == "IntrWriteMem") {
+ if (!(ModRef & MR_Mod))
+ PrintFatalError(TheDef->getLoc(),
+ Twine("IntrWriteMem cannot be used after IntrNoMem or "
+ "IntrReadMem. Default is ReadWrite"));
+ ModRef = ModRefBehavior(ModRef & ~MR_Ref);
+ } else if (R->getName() == "IntrArgMemOnly")
+ ModRef = ModRefBehavior((ModRef & ~MR_Anywhere) | MR_ArgMem);
+ else if (R->getName() == "IntrInaccessibleMemOnly")
+ ModRef = ModRefBehavior((ModRef & ~MR_Anywhere) | MR_InaccessibleMem);
+ else if (R->getName() == "IntrInaccessibleMemOrArgMemOnly")
+ ModRef = ModRefBehavior((ModRef & ~MR_Anywhere) | MR_ArgMem |
+ MR_InaccessibleMem);
+ else if (R->getName() == "Commutative")
+ isCommutative = true;
+ else if (R->getName() == "Throws")
+ canThrow = true;
+ else if (R->getName() == "IntrNoDuplicate")
+ isNoDuplicate = true;
+ else if (R->getName() == "IntrConvergent")
+ isConvergent = true;
+ else if (R->getName() == "IntrNoReturn")
+ isNoReturn = true;
+ else if (R->getName() == "IntrNoSync")
+ isNoSync = true;
+ else if (R->getName() == "IntrNoFree")
+ isNoFree = true;
+ else if (R->getName() == "IntrWillReturn")
+ isWillReturn = !isNoReturn;
+ else if (R->getName() == "IntrCold")
+ isCold = true;
+ else if (R->getName() == "IntrSpeculatable")
+ isSpeculatable = true;
+ else if (R->getName() == "IntrHasSideEffects")
+ hasSideEffects = true;
+ else if (R->isSubClassOf("NoCapture")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ ArgumentAttributes.emplace_back(ArgNo, NoCapture, 0);
+ } else if (R->isSubClassOf("NoAlias")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ ArgumentAttributes.emplace_back(ArgNo, NoAlias, 0);
+ } else if (R->isSubClassOf("NoUndef")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ ArgumentAttributes.emplace_back(ArgNo, NoUndef, 0);
+ } else if (R->isSubClassOf("Returned")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ ArgumentAttributes.emplace_back(ArgNo, Returned, 0);
+ } else if (R->isSubClassOf("ReadOnly")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ ArgumentAttributes.emplace_back(ArgNo, ReadOnly, 0);
+ } else if (R->isSubClassOf("WriteOnly")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ ArgumentAttributes.emplace_back(ArgNo, WriteOnly, 0);
+ } else if (R->isSubClassOf("ReadNone")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ ArgumentAttributes.emplace_back(ArgNo, ReadNone, 0);
+ } else if (R->isSubClassOf("ImmArg")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ ArgumentAttributes.emplace_back(ArgNo, ImmArg, 0);
+ } else if (R->isSubClassOf("Align")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ uint64_t Align = R->getValueAsInt("Align");
+ ArgumentAttributes.emplace_back(ArgNo, Alignment, Align);
+ } else
+ llvm_unreachable("Unknown property!");
+}
+
bool CodeGenIntrinsic::isParamAPointer(unsigned ParamIdx) const {
if (ParamIdx >= IS.ParamVTs.size())
return false;
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h
index 6c89f34c50ec..9de9b512f74f 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h
@@ -60,6 +60,7 @@ class CodeGenTarget {
mutable std::unique_ptr<CodeGenSchedModels> SchedModels;
+ mutable StringRef InstNamespace;
mutable std::vector<const CodeGenInstruction*> InstrsByEnum;
mutable unsigned NumPseudoInstructions = 0;
public:
@@ -67,12 +68,15 @@ public:
~CodeGenTarget();
Record *getTargetRecord() const { return TargetRec; }
- const StringRef getName() const;
+ StringRef getName() const;
/// getInstNamespace - Return the target-specific instruction namespace.
///
StringRef getInstNamespace() const;
+ /// getRegNamespace - Return the target-specific register namespace.
+ StringRef getRegNamespace() const;
+
/// getInstructionSet - Return the InstructionSet object.
///
Record *getInstructionSet() const;
@@ -107,7 +111,8 @@ public:
/// covers \p SubIdx if it exists.
Optional<CodeGenRegisterClass *>
getSuperRegForSubReg(const ValueTypeByHwMode &Ty, CodeGenRegBank &RegBank,
- const CodeGenSubRegIndex *SubIdx) const;
+ const CodeGenSubRegIndex *SubIdx,
+ bool MustBeAllocatable = false) const;
/// getRegisterByName - If there is a register with the specific AsmName,
/// return it.
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DAGISelEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/DAGISelEmitter.cpp
index d8e78ce55c7b..32ed0bf98743 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DAGISelEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/DAGISelEmitter.cpp
@@ -23,9 +23,10 @@ namespace {
/// DAGISelEmitter - The top-level class which coordinates construction
/// and emission of the instruction selector.
class DAGISelEmitter {
+ RecordKeeper &Records; // Just so we can get at the timing functions.
CodeGenDAGPatterns CGP;
public:
- explicit DAGISelEmitter(RecordKeeper &R) : CGP(R) {}
+ explicit DAGISelEmitter(RecordKeeper &R) : Records(R), CGP(R) {}
void run(raw_ostream &OS);
};
} // End anonymous namespace
@@ -150,6 +151,7 @@ void DAGISelEmitter::run(raw_ostream &OS) {
});
// Add all the patterns to a temporary list so we can sort them.
+ Records.startTimer("Sort patterns");
std::vector<const PatternToMatch*> Patterns;
for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(), E = CGP.ptm_end();
I != E; ++I)
@@ -157,11 +159,10 @@ void DAGISelEmitter::run(raw_ostream &OS) {
// We want to process the matches in order of minimal cost. Sort the patterns
// so the least cost one is at the start.
- std::stable_sort(Patterns.begin(), Patterns.end(),
- PatternSortingPredicate(CGP));
-
+ llvm::stable_sort(Patterns, PatternSortingPredicate(CGP));
// Convert each variant of each pattern into a Matcher.
+ Records.startTimer("Convert to matchers");
std::vector<Matcher*> PatternMatchers;
for (unsigned i = 0, e = Patterns.size(); i != e; ++i) {
for (unsigned Variant = 0; ; ++Variant) {
@@ -175,14 +176,19 @@ void DAGISelEmitter::run(raw_ostream &OS) {
std::unique_ptr<Matcher> TheMatcher =
std::make_unique<ScopeMatcher>(PatternMatchers);
+ Records.startTimer("Optimize matchers");
OptimizeMatcher(TheMatcher, CGP);
+
//Matcher->dump();
+
+ Records.startTimer("Emit matcher table");
EmitMatcherTable(TheMatcher.get(), CGP, OS);
}
namespace llvm {
void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS) {
+ RK.startTimer("Parse patterns");
DAGISelEmitter(RK).run(OS);
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcher.h b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcher.h
index 223513fc8d38..ff9a0cb335d1 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcher.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcher.h
@@ -31,7 +31,7 @@ Matcher *ConvertPatternToMatcher(const PatternToMatch &Pattern,unsigned Variant,
const CodeGenDAGPatterns &CGP);
void OptimizeMatcher(std::unique_ptr<Matcher> &Matcher,
const CodeGenDAGPatterns &CGP);
-void EmitMatcherTable(const Matcher *Matcher, const CodeGenDAGPatterns &CGP,
+void EmitMatcherTable(Matcher *Matcher, const CodeGenDAGPatterns &CGP,
raw_ostream &OS);
@@ -41,6 +41,7 @@ class Matcher {
// The next matcher node that is executed after this one. Null if this is the
// last stage of a match.
std::unique_ptr<Matcher> Next;
+ size_t Size; // Size in bytes of matcher and all its children (if any).
virtual void anchor();
public:
enum KindTy {
@@ -85,7 +86,10 @@ public:
EmitNode, // Create a DAG node
EmitNodeXForm, // Run a SDNodeXForm
CompleteMatch, // Finish a match and update the results.
- MorphNodeTo // Build a node, finish a match and update results.
+ MorphNodeTo, // Build a node, finish a match and update results.
+
+ // Highest enum value; watch out when adding more.
+ HighestKind = MorphNodeTo
};
const KindTy Kind;
@@ -94,6 +98,8 @@ protected:
public:
virtual ~Matcher() {}
+ unsigned getSize() const { return Size; }
+ void setSize(unsigned sz) { Size = sz; }
KindTy getKind() const { return Kind; }
Matcher *getNext() { return Next.get(); }
@@ -700,7 +706,7 @@ public:
const ComplexPattern &getPattern() const { return Pattern; }
unsigned getMatchNumber() const { return MatchNumber; }
- const std::string getName() const { return Name; }
+ std::string getName() const { return Name; }
unsigned getFirstResult() const { return FirstResult; }
static bool classof(const Matcher *N) {
@@ -757,8 +763,8 @@ private:
}
};
-/// CheckImmAllOnesVMatcher - This check if the current node is an build vector
-/// of all ones.
+/// CheckImmAllOnesVMatcher - This checks if the current node is a build_vector
+/// or splat_vector of all ones.
class CheckImmAllOnesVMatcher : public Matcher {
public:
CheckImmAllOnesVMatcher() : Matcher(CheckImmAllOnesV) {}
@@ -773,8 +779,8 @@ private:
bool isContradictoryImpl(const Matcher *M) const override;
};
-/// CheckImmAllZerosVMatcher - This check if the current node is an build vector
-/// of all zeros.
+/// CheckImmAllZerosVMatcher - This checks if the current node is a
+/// build_vector or splat_vector of all zeros.
class CheckImmAllZerosVMatcher : public Matcher {
public:
CheckImmAllZerosVMatcher() : Matcher(CheckImmAllZerosV) {}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index d9ec14aab8a8..03528a46aea7 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+
using namespace llvm;
enum {
@@ -47,6 +48,8 @@ namespace {
class MatcherTableEmitter {
const CodeGenDAGPatterns &CGP;
+ SmallVector<unsigned, Matcher::HighestKind+1> OpcodeCounts;
+
DenseMap<TreePattern *, unsigned> NodePredicateMap;
std::vector<TreePredicateFn> NodePredicates;
std::vector<TreePredicateFn> NodePredicatesWithOperands;
@@ -79,12 +82,15 @@ class MatcherTableEmitter {
}
public:
- MatcherTableEmitter(const CodeGenDAGPatterns &cgp)
- : CGP(cgp) {}
+ MatcherTableEmitter(const CodeGenDAGPatterns &cgp) : CGP(cgp) {
+ OpcodeCounts.assign(Matcher::HighestKind+1, 0);
+ }
- unsigned EmitMatcherList(const Matcher *N, unsigned Indent,
+ unsigned EmitMatcherList(const Matcher *N, const unsigned Indent,
unsigned StartIdx, raw_ostream &OS);
+ unsigned SizeMatcherList(Matcher *N, raw_ostream &OS);
+
void EmitPredicateFunctions(raw_ostream &OS);
void EmitHistogram(const Matcher *N, raw_ostream &OS);
@@ -95,7 +101,9 @@ private:
void EmitNodePredicatesFunction(const std::vector<TreePredicateFn> &Preds,
StringRef Decl, raw_ostream &OS);
- unsigned EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
+ unsigned SizeMatcher(Matcher *N, raw_ostream &OS);
+
+ unsigned EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
raw_ostream &OS);
unsigned getNodePredicate(TreePredicateFn Pred) {
@@ -165,7 +173,7 @@ static std::string GetPatFromTreePatternNode(const TreePatternNode *N) {
return str;
}
-static unsigned GetVBRSize(unsigned Val) {
+static size_t GetVBRSize(unsigned Val) {
if (Val <= 127) return 1;
unsigned NumBytes = 0;
@@ -219,6 +227,78 @@ static std::string getIncludePath(const Record *R) {
return str;
}
+/// This function traverses the matcher tree and sizes all the nodes
+/// that are children of the three kinds of nodes that have them.
+unsigned MatcherTableEmitter::
+SizeMatcherList(Matcher *N, raw_ostream &OS) {
+ unsigned Size = 0;
+ while (N) {
+ Size += SizeMatcher(N, OS);
+ N = N->getNext();
+ }
+ return Size;
+}
+
+/// This function sizes the children of the three kinds of nodes that
+/// have them. It does so by using special cases for those three
+/// nodes, but sharing the code in EmitMatcher() for the other kinds.
+unsigned MatcherTableEmitter::
+SizeMatcher(Matcher *N, raw_ostream &OS) {
+ unsigned Idx = 0;
+
+ ++OpcodeCounts[N->getKind()];
+ switch (N->getKind()) {
+ // The Scope matcher has its kind, a series of child size + child,
+ // and a trailing zero.
+ case Matcher::Scope: {
+ ScopeMatcher *SM = cast<ScopeMatcher>(N);
+ assert(SM->getNext() == nullptr && "Scope matcher should not have next");
+ unsigned Size = 1; // Count the kind.
+ for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i) {
+ const size_t ChildSize = SizeMatcherList(SM->getChild(i), OS);
+ assert(ChildSize != 0 && "Matcher cannot have child of size 0");
+ SM->getChild(i)->setSize(ChildSize);
+ Size += GetVBRSize(ChildSize) + ChildSize; // Count VBR and child size.
+ }
+ ++Size; // Count the zero sentinel.
+ return Size;
+ }
+
+ // SwitchOpcode and SwitchType have their kind, a series of child size +
+ // opcode/type + child, and a trailing zero.
+ case Matcher::SwitchOpcode:
+ case Matcher::SwitchType: {
+ unsigned Size = 1; // Count the kind.
+ unsigned NumCases;
+ if (const SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N))
+ NumCases = SOM->getNumCases();
+ else
+ NumCases = cast<SwitchTypeMatcher>(N)->getNumCases();
+ for (unsigned i = 0, e = NumCases; i != e; ++i) {
+ Matcher *Child;
+ if (SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N)) {
+ Child = SOM->getCaseMatcher(i);
+ Size += 2; // Count the child's opcode.
+ } else {
+ Child = cast<SwitchTypeMatcher>(N)->getCaseMatcher(i);
+ ++Size; // Count the child's type.
+ }
+ const size_t ChildSize = SizeMatcherList(Child, OS);
+ assert(ChildSize != 0 && "Matcher cannot have child of size 0");
+ Child->setSize(ChildSize);
+ Size += GetVBRSize(ChildSize) + ChildSize; // Count VBR and child size.
+ }
+ ++Size; // Count the zero sentinel.
+ return Size;
+ }
+
+ default:
+ // Employ the matcher emitter to size other matchers.
+ return EmitMatcher(N, 0, Idx, OS);
+ }
+ llvm_unreachable("Unreachable");
+}
+
static void BeginEmitFunction(raw_ostream &OS, StringRef RetType,
StringRef Decl, bool AddOverride) {
OS << "#ifdef GET_DAGISEL_DECL\n";
@@ -250,7 +330,7 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
BeginEmitFunction(OS, "StringRef", "getPatternForIndex(unsigned Index)",
true/*AddOverride*/);
OS << "{\n";
- OS << "static const char * PATTERN_MATCH_TABLE[] = {\n";
+ OS << "static const char *PATTERN_MATCH_TABLE[] = {\n";
for (const auto &It : VecPatterns) {
OS << "\"" << It.first << "\",\n";
@@ -264,7 +344,7 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
BeginEmitFunction(OS, "StringRef", "getIncludePathForIndex(unsigned Index)",
true/*AddOverride*/);
OS << "{\n";
- OS << "static const char * INCLUDE_PATH_TABLE[] = {\n";
+ OS << "static const char *INCLUDE_PATH_TABLE[] = {\n";
for (const auto &It : VecIncludeStrings) {
OS << "\"" << It << "\",\n";
@@ -279,19 +359,16 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
/// EmitMatcher - Emit bytes for the specified matcher and return
/// the number of bytes emitted.
unsigned MatcherTableEmitter::
-EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
+EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
raw_ostream &OS) {
OS.indent(Indent);
switch (N->getKind()) {
case Matcher::Scope: {
const ScopeMatcher *SM = cast<ScopeMatcher>(N);
- assert(SM->getNext() == nullptr && "Shouldn't have next after scope");
-
unsigned StartIdx = CurrentIdx;
// Emit all of the children.
- SmallString<128> TmpBuf;
for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i) {
if (i == 0) {
OS << "OPC_Scope, ";
@@ -304,34 +381,21 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
OS.indent(Indent);
}
- // We need to encode the child and the offset of the failure code before
- // emitting either of them. Handle this by buffering the output into a
- // string while we get the size. Unfortunately, the offset of the
- // children depends on the VBR size of the child, so for large children we
- // have to iterate a bit.
- unsigned ChildSize = 0;
- unsigned VBRSize = 0;
- do {
- VBRSize = GetVBRSize(ChildSize);
-
- TmpBuf.clear();
- raw_svector_ostream OS(TmpBuf);
- ChildSize = EmitMatcherList(SM->getChild(i), Indent+1,
- CurrentIdx+VBRSize, OS);
- } while (GetVBRSize(ChildSize) != VBRSize);
-
- assert(ChildSize != 0 && "Should not have a zero-sized child!");
-
- CurrentIdx += EmitVBRValue(ChildSize, OS);
+ size_t ChildSize = SM->getChild(i)->getSize();
+ size_t VBRSize = GetVBRSize(ChildSize);
+ EmitVBRValue(ChildSize, OS);
if (!OmitComments) {
- OS << "/*->" << CurrentIdx+ChildSize << "*/";
-
+ OS << "/*->" << CurrentIdx + VBRSize + ChildSize << "*/";
if (i == 0)
OS << " // " << SM->getNumChildren() << " children in Scope";
}
+ OS << '\n';
- OS << '\n' << TmpBuf;
- CurrentIdx += ChildSize;
+ ChildSize = EmitMatcherList(SM->getChild(i), Indent+1,
+ CurrentIdx + VBRSize, OS);
+ assert(ChildSize == SM->getChild(i)->getSize() &&
+ "Emitted child size does not match calculated size");
+ CurrentIdx += VBRSize + ChildSize;
}
// Emit a zero as a sentinel indicating end of 'Scope'.
@@ -450,7 +514,6 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
++CurrentIdx;
// For each case we emit the size, then the opcode, then the matcher.
- SmallString<128> TmpBuf;
for (unsigned i = 0, e = NumCases; i != e; ++i) {
const Matcher *Child;
unsigned IdxSize;
@@ -462,24 +525,6 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
IdxSize = 1; // size of type in table is 1 byte.
}
- // We need to encode the opcode and the offset of the case code before
- // emitting the case code. Handle this by buffering the output into a
- // string while we get the size. Unfortunately, the offset of the
- // children depends on the VBR size of the child, so for large children we
- // have to iterate a bit.
- unsigned ChildSize = 0;
- unsigned VBRSize = 0;
- do {
- VBRSize = GetVBRSize(ChildSize);
-
- TmpBuf.clear();
- raw_svector_ostream OS(TmpBuf);
- ChildSize = EmitMatcherList(Child, Indent+1, CurrentIdx+VBRSize+IdxSize,
- OS);
- } while (GetVBRSize(ChildSize) != VBRSize);
-
- assert(ChildSize != 0 && "Should not have a zero-sized child!");
-
if (i != 0) {
if (!OmitComments)
OS << "/*" << format_decimal(CurrentIdx, IndexWidth) << "*/";
@@ -489,20 +534,19 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
"/*SwitchOpcode*/ " : "/*SwitchType*/ ");
}
- // Emit the VBR.
- CurrentIdx += EmitVBRValue(ChildSize, OS);
-
+ size_t ChildSize = Child->getSize();
+ CurrentIdx += EmitVBRValue(ChildSize, OS) + IdxSize;
if (const SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N))
OS << "TARGET_VAL(" << SOM->getCaseOpcode(i).getEnumName() << "),";
else
OS << getEnumName(cast<SwitchTypeMatcher>(N)->getCaseType(i)) << ',';
-
- CurrentIdx += IdxSize;
-
if (!OmitComments)
- OS << "// ->" << CurrentIdx+ChildSize;
+ OS << "// ->" << CurrentIdx + ChildSize;
OS << '\n';
- OS << TmpBuf;
+
+ ChildSize = EmitMatcherList(Child, Indent+1, CurrentIdx, OS);
+ assert(ChildSize == Child->getSize() &&
+ "Emitted child size does not match calculated size");
CurrentIdx += ChildSize;
}
@@ -515,8 +559,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
" // EndSwitchOpcode" : " // EndSwitchType");
OS << '\n';
- ++CurrentIdx;
- return CurrentIdx-StartIdx;
+ return CurrentIdx - StartIdx + 1;
}
case Matcher::CheckType:
@@ -810,9 +853,10 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
llvm_unreachable("Unreachable");
}
-/// EmitMatcherList - Emit the bytes for the specified matcher subtree.
+/// This function traverses the matcher tree and emits all the nodes.
+/// The nodes have already been sized.
unsigned MatcherTableEmitter::
-EmitMatcherList(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
+EmitMatcherList(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
raw_ostream &OS) {
unsigned Size = 0;
while (N) {
@@ -841,12 +885,12 @@ void MatcherTableEmitter::EmitNodePredicatesFunction(
OS << " default: llvm_unreachable(\"Invalid predicate in table?\");\n";
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
// Emit the predicate code corresponding to this pattern.
- TreePredicateFn PredFn = Preds[i];
+ const TreePredicateFn PredFn = Preds[i];
assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
- OS << " case " << i << ": { \n";
+ OS << " case " << i << ": {\n";
for (auto *SimilarPred :
- NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
+ NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
OS << " // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
OS << PredFn.getCodeToRunOnSDNode() << "\n }\n";
@@ -887,7 +931,7 @@ void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) {
BeginEmitFunction(OS, "bool",
"CheckComplexPattern(SDNode *Root, SDNode *Parent,\n"
" SDValue N, unsigned PatternNo,\n"
- " SmallVectorImpl<std::pair<SDValue, SDNode*>> &Result)",
+ " SmallVectorImpl<std::pair<SDValue, SDNode *>> &Result)",
true/*AddOverride*/);
OS << "{\n";
OS << " unsigned NextRes = Result.size();\n";
@@ -975,28 +1019,6 @@ void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) {
}
}
-static void BuildHistogram(const Matcher *M, std::vector<unsigned> &OpcodeFreq){
- for (; M != nullptr; M = M->getNext()) {
- // Count this node.
- if (unsigned(M->getKind()) >= OpcodeFreq.size())
- OpcodeFreq.resize(M->getKind()+1);
- OpcodeFreq[M->getKind()]++;
-
- // Handle recursive nodes.
- if (const ScopeMatcher *SM = dyn_cast<ScopeMatcher>(M)) {
- for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i)
- BuildHistogram(SM->getChild(i), OpcodeFreq);
- } else if (const SwitchOpcodeMatcher *SOM =
- dyn_cast<SwitchOpcodeMatcher>(M)) {
- for (unsigned i = 0, e = SOM->getNumCases(); i != e; ++i)
- BuildHistogram(SOM->getCaseMatcher(i), OpcodeFreq);
- } else if (const SwitchTypeMatcher *STM = dyn_cast<SwitchTypeMatcher>(M)) {
- for (unsigned i = 0, e = STM->getNumCases(); i != e; ++i)
- BuildHistogram(STM->getCaseMatcher(i), OpcodeFreq);
- }
- }
-}
-
static StringRef getOpcodeString(Matcher::KindTy Kind) {
switch (Kind) {
case Matcher::Scope: return "OPC_Scope"; break;
@@ -1048,20 +1070,17 @@ void MatcherTableEmitter::EmitHistogram(const Matcher *M,
if (OmitComments)
return;
- std::vector<unsigned> OpcodeFreq;
- BuildHistogram(M, OpcodeFreq);
-
OS << " // Opcode Histogram:\n";
- for (unsigned i = 0, e = OpcodeFreq.size(); i != e; ++i) {
+ for (unsigned i = 0, e = OpcodeCounts.size(); i != e; ++i) {
OS << " // #"
<< left_justify(getOpcodeString((Matcher::KindTy)i), HistOpcWidth)
- << " = " << OpcodeFreq[i] << '\n';
+ << " = " << OpcodeCounts[i] << '\n';
}
OS << '\n';
}
-void llvm::EmitMatcherTable(const Matcher *TheMatcher,
+void llvm::EmitMatcherTable(Matcher *TheMatcher,
const CodeGenDAGPatterns &CGP,
raw_ostream &OS) {
OS << "#if defined(GET_DAGISEL_DECL) && defined(GET_DAGISEL_BODY)\n";
@@ -1096,12 +1115,23 @@ void llvm::EmitMatcherTable(const Matcher *TheMatcher,
BeginEmitFunction(OS, "void", "SelectCode(SDNode *N)", false/*AddOverride*/);
MatcherTableEmitter MatcherEmitter(CGP);
+ // First we size all the children of the three kinds of matchers that have
+ // them. This is done by sharing the code in EmitMatcher(). but we don't
+ // want to emit anything, so we turn off comments and use a null stream.
+ bool SaveOmitComments = OmitComments;
+ OmitComments = true;
+ raw_null_ostream NullOS;
+ unsigned TotalSize = MatcherEmitter.SizeMatcherList(TheMatcher, NullOS);
+ OmitComments = SaveOmitComments;
+
+ // Now that the matchers are sized, we can emit the code for them to the
+ // final stream.
OS << "{\n";
OS << " // Some target values are emitted as 2 bytes, TARGET_VAL handles\n";
OS << " // this.\n";
OS << " #define TARGET_VAL(X) X & 255, unsigned(X) >> 8\n";
OS << " static const unsigned char MatcherTable[] = {\n";
- unsigned TotalSize = MatcherEmitter.EmitMatcherList(TheMatcher, 1, 0, OS);
+ TotalSize = MatcherEmitter.EmitMatcherList(TheMatcher, 1, 0, OS);
OS << " 0\n }; // Total Array size is " << (TotalSize+1) << " bytes\n\n";
MatcherEmitter.EmitHistogram(TheMatcher, OS);
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index 123ea3374c74..f7415b87e1c0 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -282,7 +282,9 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
// check to ensure that this gets folded into the normal top-level
// OpcodeSwitch.
if (N == Pattern.getSrcPattern()) {
- const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed("build_vector"));
+ MVT VT = N->getSimpleType(0);
+ StringRef Name = VT.isScalableVector() ? "splat_vector" : "build_vector";
+ const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed(Name));
AddMatcher(new CheckOpcodeMatcher(NI));
}
return AddMatcher(new CheckImmAllOnesVMatcher());
@@ -292,7 +294,9 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
// check to ensure that this gets folded into the normal top-level
// OpcodeSwitch.
if (N == Pattern.getSrcPattern()) {
- const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed("build_vector"));
+ MVT VT = N->getSimpleType(0);
+ StringRef Name = VT.isScalableVector() ? "splat_vector" : "build_vector";
+ const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed(Name));
AddMatcher(new CheckOpcodeMatcher(NI));
}
return AddMatcher(new CheckImmAllZerosVMatcher());
@@ -744,7 +748,7 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
}
}
- errs() << "unhandled leaf node: \n";
+ errs() << "unhandled leaf node:\n";
N->dump();
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.cpp
index 7391f6845a4b..781cb0636fa1 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.cpp
@@ -174,7 +174,7 @@ namespace {
struct Action {
Record *R = nullptr;
unsigned I = 0;
- std::string S = nullptr;
+ std::string S;
Action() = default;
Action(Record *R, unsigned I, std::string S) : R(R), I(I), S(S) {}
@@ -346,8 +346,7 @@ Transition::Transition(Record *R, Automaton *Parent) {
} else if (isa<IntRecTy>(SymbolV->getType())) {
Actions.emplace_back(nullptr, R->getValueAsInt(A), "");
Types.emplace_back("unsigned");
- } else if (isa<StringRecTy>(SymbolV->getType()) ||
- isa<CodeRecTy>(SymbolV->getType())) {
+ } else if (isa<StringRecTy>(SymbolV->getType())) {
Actions.emplace_back(nullptr, 0, std::string(R->getValueAsString(A)));
Types.emplace_back("std::string");
} else {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index bc4a084b3224..40d9385e5e9e 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -263,7 +263,7 @@ void DFAPacketizerEmitter::emitForItineraries(
OS << " " << ProcModelStartIdx[Model] << ", // " << Model->ModelName
<< "\n";
}
- OS << ScheduleClasses.size() << "\n};\n\n";
+ OS << " " << ScheduleClasses.size() << "\n};\n\n";
// The type of a state in the nondeterministic automaton we're defining.
using NfaStateTy = uint64_t;
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DirectiveEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/DirectiveEmitter.cpp
index 2061ff1fdd1a..c9daa9b24155 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DirectiveEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/DirectiveEmitter.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/TableGen/DirectiveEmitter.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
@@ -40,23 +40,15 @@ private:
namespace llvm {
-// Get Directive or Clause name formatted by replacing whitespaces with
-// underscores.
-std::string getFormattedName(StringRef Name) {
- std::string N = Name.str();
- std::replace(N.begin(), N.end(), ' ', '_');
- return N;
-}
-
// Generate enum class
void GenerateEnumClass(const std::vector<Record *> &Records, raw_ostream &OS,
- StringRef Enum, StringRef Prefix, StringRef CppNamespace,
- bool MakeEnumAvailableInNamespace) {
+ StringRef Enum, StringRef Prefix,
+ const DirectiveLanguage &DirLang) {
OS << "\n";
OS << "enum class " << Enum << " {\n";
for (const auto &R : Records) {
- const auto Name = R->getValueAsString("name");
- OS << " " << Prefix << getFormattedName(Name) << ",\n";
+ BaseRecord Rec{R};
+ OS << " " << Prefix << Rec.getFormattedName() << ",\n";
}
OS << "};\n";
OS << "\n";
@@ -68,44 +60,130 @@ void GenerateEnumClass(const std::vector<Record *> &Records, raw_ostream &OS,
// At the same time we do not loose the strong type guarantees of the enum
// class, that is we cannot pass an unsigned as Directive without an explicit
// cast.
- if (MakeEnumAvailableInNamespace) {
+ if (DirLang.hasMakeEnumAvailableInNamespace()) {
OS << "\n";
for (const auto &R : Records) {
- const auto FormattedName = getFormattedName(R->getValueAsString("name"));
- OS << "constexpr auto " << Prefix << FormattedName << " = "
- << "llvm::" << CppNamespace << "::" << Enum << "::" << Prefix
- << FormattedName << ";\n";
+ BaseRecord Rec{R};
+ OS << "constexpr auto " << Prefix << Rec.getFormattedName() << " = "
+ << "llvm::" << DirLang.getCppNamespace() << "::" << Enum
+ << "::" << Prefix << Rec.getFormattedName() << ";\n";
}
}
}
+// Generate enums for values that clauses can take.
+// Also generate function declarations for get<Enum>Name(StringRef Str).
+void GenerateEnumClauseVal(const std::vector<Record *> &Records,
+ raw_ostream &OS, const DirectiveLanguage &DirLang,
+ std::string &EnumHelperFuncs) {
+ for (const auto &R : Records) {
+ Clause C{R};
+ const auto &ClauseVals = C.getClauseVals();
+ if (ClauseVals.size() <= 0)
+ continue;
+
+ const auto &EnumName = C.getEnumName();
+ if (EnumName.size() == 0) {
+ PrintError("enumClauseValue field not set in Clause" +
+ C.getFormattedName() + ".");
+ return;
+ }
+
+ OS << "\n";
+ OS << "enum class " << EnumName << " {\n";
+ for (const auto &CV : ClauseVals) {
+ ClauseVal CVal{CV};
+ OS << " " << CV->getName() << "=" << CVal.getValue() << ",\n";
+ }
+ OS << "};\n";
+
+ if (DirLang.hasMakeEnumAvailableInNamespace()) {
+ OS << "\n";
+ for (const auto &CV : ClauseVals) {
+ OS << "constexpr auto " << CV->getName() << " = "
+ << "llvm::" << DirLang.getCppNamespace() << "::" << EnumName
+ << "::" << CV->getName() << ";\n";
+ }
+ EnumHelperFuncs += (llvm::Twine(EnumName) + llvm::Twine(" get") +
+ llvm::Twine(EnumName) + llvm::Twine("(StringRef);\n"))
+ .str();
+
+ EnumHelperFuncs +=
+ (llvm::Twine("llvm::StringRef get") + llvm::Twine(DirLang.getName()) +
+ llvm::Twine(EnumName) + llvm::Twine("Name(") +
+ llvm::Twine(EnumName) + llvm::Twine(");\n"))
+ .str();
+ }
+ }
+}
+
+bool HasDuplicateClauses(const std::vector<Record *> &Clauses,
+ const Directive &Directive,
+ llvm::StringSet<> &CrtClauses) {
+ bool HasError = false;
+ for (const auto &C : Clauses) {
+ VersionedClause VerClause{C};
+ const auto insRes = CrtClauses.insert(VerClause.getClause().getName());
+ if (!insRes.second) {
+ PrintError("Clause " + VerClause.getClause().getRecordName() +
+ " already defined on directive " + Directive.getRecordName());
+ HasError = true;
+ }
+ }
+ return HasError;
+}
+
+// Check for duplicate clauses in lists. Clauses cannot appear twice in the
+// three allowed list. Also, since required implies allowed, clauses cannot
+// appear in both the allowedClauses and requiredClauses lists.
+bool HasDuplicateClausesInDirectives(const std::vector<Record *> &Directives) {
+ bool HasDuplicate = false;
+ for (const auto &D : Directives) {
+ Directive Dir{D};
+ llvm::StringSet<> Clauses;
+ // Check for duplicates in the three allowed lists.
+ if (HasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) ||
+ HasDuplicateClauses(Dir.getAllowedOnceClauses(), Dir, Clauses) ||
+ HasDuplicateClauses(Dir.getAllowedExclusiveClauses(), Dir, Clauses)) {
+ HasDuplicate = true;
+ }
+ // Check for duplicate between allowedClauses and required
+ Clauses.clear();
+ if (HasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) ||
+ HasDuplicateClauses(Dir.getRequiredClauses(), Dir, Clauses)) {
+ HasDuplicate = true;
+ }
+ if (HasDuplicate)
+ PrintFatalError("One or more clauses are defined multiple times on"
+ " directive " +
+ Dir.getRecordName());
+ }
+
+ return HasDuplicate;
+}
+
+// Check consitency of records. Return true if an error has been detected.
+// Return false if the records are valid.
+bool DirectiveLanguage::HasValidityErrors() const {
+ if (getDirectiveLanguages().size() != 1) {
+ PrintFatalError("A single definition of DirectiveLanguage is needed.");
+ return true;
+ }
+
+ return HasDuplicateClausesInDirectives(getDirectives());
+}
+
// Generate the declaration section for the enumeration in the directive
// language
void EmitDirectivesDecl(RecordKeeper &Records, raw_ostream &OS) {
-
- const auto &DirectiveLanguages =
- Records.getAllDerivedDefinitions("DirectiveLanguage");
-
- if (DirectiveLanguages.size() != 1) {
- PrintError("A single definition of DirectiveLanguage is needed.");
+ const auto DirLang = DirectiveLanguage{Records};
+ if (DirLang.HasValidityErrors())
return;
- }
- const auto &DirectiveLanguage = DirectiveLanguages[0];
- StringRef LanguageName = DirectiveLanguage->getValueAsString("name");
- StringRef DirectivePrefix =
- DirectiveLanguage->getValueAsString("directivePrefix");
- StringRef ClausePrefix = DirectiveLanguage->getValueAsString("clausePrefix");
- StringRef CppNamespace = DirectiveLanguage->getValueAsString("cppNamespace");
- bool MakeEnumAvailableInNamespace =
- DirectiveLanguage->getValueAsBit("makeEnumAvailableInNamespace");
- bool EnableBitmaskEnumInNamespace =
- DirectiveLanguage->getValueAsBit("enableBitmaskEnumInNamespace");
-
- OS << "#ifndef LLVM_" << LanguageName << "_INC\n";
- OS << "#define LLVM_" << LanguageName << "_INC\n";
-
- if (EnableBitmaskEnumInNamespace)
+ OS << "#ifndef LLVM_" << DirLang.getName() << "_INC\n";
+ OS << "#define LLVM_" << DirLang.getName() << "_INC\n";
+
+ if (DirLang.hasEnableBitmaskEnumInNamespace())
OS << "\n#include \"llvm/ADT/BitmaskEnum.h\"\n";
OS << "\n";
@@ -114,41 +192,48 @@ void EmitDirectivesDecl(RecordKeeper &Records, raw_ostream &OS) {
// Open namespaces defined in the directive language
llvm::SmallVector<StringRef, 2> Namespaces;
- llvm::SplitString(CppNamespace, Namespaces, "::");
+ llvm::SplitString(DirLang.getCppNamespace(), Namespaces, "::");
for (auto Ns : Namespaces)
OS << "namespace " << Ns << " {\n";
- if (EnableBitmaskEnumInNamespace)
+ if (DirLang.hasEnableBitmaskEnumInNamespace())
OS << "\nLLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();\n";
// Emit Directive enumeration
- const auto &Directives = Records.getAllDerivedDefinitions("Directive");
- GenerateEnumClass(Directives, OS, "Directive", DirectivePrefix, CppNamespace,
- MakeEnumAvailableInNamespace);
+ GenerateEnumClass(DirLang.getDirectives(), OS, "Directive",
+ DirLang.getDirectivePrefix(), DirLang);
// Emit Clause enumeration
- const auto &Clauses = Records.getAllDerivedDefinitions("Clause");
- GenerateEnumClass(Clauses, OS, "Clause", ClausePrefix, CppNamespace,
- MakeEnumAvailableInNamespace);
+ GenerateEnumClass(DirLang.getClauses(), OS, "Clause",
+ DirLang.getClausePrefix(), DirLang);
+
+ // Emit ClauseVal enumeration
+ std::string EnumHelperFuncs;
+ GenerateEnumClauseVal(DirLang.getClauses(), OS, DirLang, EnumHelperFuncs);
// Generic function signatures
OS << "\n";
OS << "// Enumeration helper functions\n";
- OS << "Directive get" << LanguageName
+ OS << "Directive get" << DirLang.getName()
<< "DirectiveKind(llvm::StringRef Str);\n";
OS << "\n";
- OS << "llvm::StringRef get" << LanguageName
+ OS << "llvm::StringRef get" << DirLang.getName()
<< "DirectiveName(Directive D);\n";
OS << "\n";
- OS << "Clause get" << LanguageName << "ClauseKind(llvm::StringRef Str);\n";
+ OS << "Clause get" << DirLang.getName()
+ << "ClauseKind(llvm::StringRef Str);\n";
OS << "\n";
- OS << "llvm::StringRef get" << LanguageName << "ClauseName(Clause C);\n";
+ OS << "llvm::StringRef get" << DirLang.getName() << "ClauseName(Clause C);\n";
OS << "\n";
OS << "/// Return true if \\p C is a valid clause for \\p D in version \\p "
<< "Version.\n";
OS << "bool isAllowedClauseForDirective(Directive D, "
<< "Clause C, unsigned Version);\n";
OS << "\n";
+ if (EnumHelperFuncs.length() > 0) {
+ OS << EnumHelperFuncs;
+ OS << "\n";
+ }
// Closing namespaces
for (auto Ns : llvm::reverse(Namespaces))
@@ -156,138 +241,183 @@ void EmitDirectivesDecl(RecordKeeper &Records, raw_ostream &OS) {
OS << "} // namespace llvm\n";
- OS << "#endif // LLVM_" << LanguageName << "_INC\n";
+ OS << "#endif // LLVM_" << DirLang.getName() << "_INC\n";
}
// Generate function implementation for get<Enum>Name(StringRef Str)
void GenerateGetName(const std::vector<Record *> &Records, raw_ostream &OS,
- StringRef Enum, StringRef Prefix, StringRef LanguageName,
- StringRef Namespace) {
+ StringRef Enum, const DirectiveLanguage &DirLang,
+ StringRef Prefix) {
OS << "\n";
- OS << "llvm::StringRef llvm::" << Namespace << "::get" << LanguageName << Enum
- << "Name(" << Enum << " Kind) {\n";
+ OS << "llvm::StringRef llvm::" << DirLang.getCppNamespace() << "::get"
+ << DirLang.getName() << Enum << "Name(" << Enum << " Kind) {\n";
OS << " switch (Kind) {\n";
for (const auto &R : Records) {
- const auto Name = R->getValueAsString("name");
- const auto AlternativeName = R->getValueAsString("alternativeName");
- OS << " case " << Prefix << getFormattedName(Name) << ":\n";
+ BaseRecord Rec{R};
+ OS << " case " << Prefix << Rec.getFormattedName() << ":\n";
OS << " return \"";
- if (AlternativeName.empty())
- OS << Name;
+ if (Rec.getAlternativeName().empty())
+ OS << Rec.getName();
else
- OS << AlternativeName;
+ OS << Rec.getAlternativeName();
OS << "\";\n";
}
OS << " }\n"; // switch
- OS << " llvm_unreachable(\"Invalid " << LanguageName << " " << Enum
+ OS << " llvm_unreachable(\"Invalid " << DirLang.getName() << " " << Enum
<< " kind\");\n";
OS << "}\n";
}
// Generate function implementation for get<Enum>Kind(StringRef Str)
void GenerateGetKind(const std::vector<Record *> &Records, raw_ostream &OS,
- StringRef Enum, StringRef Prefix, StringRef LanguageName,
- StringRef Namespace, bool ImplicitAsUnknown) {
+ StringRef Enum, const DirectiveLanguage &DirLang,
+ StringRef Prefix, bool ImplicitAsUnknown) {
- auto DefaultIt = std::find_if(Records.begin(), Records.end(), [](Record *R) {
- return R->getValueAsBit("isDefault") == true;
- });
+ auto DefaultIt = llvm::find_if(
+ Records, [](Record *R) { return R->getValueAsBit("isDefault") == true; });
if (DefaultIt == Records.end()) {
- PrintError("A least one " + Enum + " must be defined as default.");
+ PrintError("At least one " + Enum + " must be defined as default.");
return;
}
- const auto FormattedDefaultName =
- getFormattedName((*DefaultIt)->getValueAsString("name"));
+ BaseRecord DefaultRec{(*DefaultIt)};
OS << "\n";
- OS << Enum << " llvm::" << Namespace << "::get" << LanguageName << Enum
- << "Kind(llvm::StringRef Str) {\n";
+ OS << Enum << " llvm::" << DirLang.getCppNamespace() << "::get"
+ << DirLang.getName() << Enum << "Kind(llvm::StringRef Str) {\n";
OS << " return llvm::StringSwitch<" << Enum << ">(Str)\n";
for (const auto &R : Records) {
- const auto Name = R->getValueAsString("name");
+ BaseRecord Rec{R};
if (ImplicitAsUnknown && R->getValueAsBit("isImplicit")) {
- OS << " .Case(\"" << Name << "\"," << Prefix << FormattedDefaultName
- << ")\n";
+ OS << " .Case(\"" << Rec.getName() << "\"," << Prefix
+ << DefaultRec.getFormattedName() << ")\n";
} else {
- OS << " .Case(\"" << Name << "\"," << Prefix << getFormattedName(Name)
- << ")\n";
+ OS << " .Case(\"" << Rec.getName() << "\"," << Prefix
+ << Rec.getFormattedName() << ")\n";
}
}
- OS << " .Default(" << Prefix << FormattedDefaultName << ");\n";
+ OS << " .Default(" << Prefix << DefaultRec.getFormattedName() << ");\n";
OS << "}\n";
}
+// Generate function implementation for get<ClauseVal>Kind(StringRef Str)
+void GenerateGetKindClauseVal(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
+ for (const auto &R : DirLang.getClauses()) {
+ Clause C{R};
+ const auto &ClauseVals = C.getClauseVals();
+ if (ClauseVals.size() <= 0)
+ continue;
+
+ auto DefaultIt = llvm::find_if(ClauseVals, [](Record *CV) {
+ return CV->getValueAsBit("isDefault") == true;
+ });
+
+ if (DefaultIt == ClauseVals.end()) {
+ PrintError("At least one val in Clause " + C.getFormattedName() +
+ " must be defined as default.");
+ return;
+ }
+ const auto DefaultName = (*DefaultIt)->getName();
+
+ const auto &EnumName = C.getEnumName();
+ if (EnumName.size() == 0) {
+ PrintError("enumClauseValue field not set in Clause" +
+ C.getFormattedName() + ".");
+ return;
+ }
+
+ OS << "\n";
+ OS << EnumName << " llvm::" << DirLang.getCppNamespace() << "::get"
+ << EnumName << "(llvm::StringRef Str) {\n";
+ OS << " return llvm::StringSwitch<" << EnumName << ">(Str)\n";
+ for (const auto &CV : ClauseVals) {
+ ClauseVal CVal{CV};
+ OS << " .Case(\"" << CVal.getFormattedName() << "\"," << CV->getName()
+ << ")\n";
+ }
+ OS << " .Default(" << DefaultName << ");\n";
+ OS << "}\n";
+
+ OS << "\n";
+ OS << "llvm::StringRef llvm::" << DirLang.getCppNamespace() << "::get"
+ << DirLang.getName() << EnumName
+ << "Name(llvm::" << DirLang.getCppNamespace() << "::" << EnumName
+ << " x) {\n";
+ OS << " switch (x) {\n";
+ for (const auto &CV : ClauseVals) {
+ ClauseVal CVal{CV};
+ OS << " case " << CV->getName() << ":\n";
+ OS << " return \"" << CVal.getFormattedName() << "\";\n";
+ }
+ OS << " }\n"; // switch
+ OS << " llvm_unreachable(\"Invalid " << DirLang.getName() << " "
+ << EnumName << " kind\");\n";
+ OS << "}\n";
+ }
+}
+
void GenerateCaseForVersionedClauses(const std::vector<Record *> &Clauses,
raw_ostream &OS, StringRef DirectiveName,
- StringRef DirectivePrefix,
- StringRef ClausePrefix,
+ const DirectiveLanguage &DirLang,
llvm::StringSet<> &Cases) {
for (const auto &C : Clauses) {
- const auto MinVersion = C->getValueAsInt("minVersion");
- const auto MaxVersion = C->getValueAsInt("maxVersion");
- const auto SpecificClause = C->getValueAsDef("clause");
- const auto ClauseName =
- getFormattedName(SpecificClause->getValueAsString("name"));
-
- if (Cases.find(ClauseName) == Cases.end()) {
- Cases.insert(ClauseName);
- OS << " case " << ClausePrefix << ClauseName << ":\n";
- OS << " return " << MinVersion << " <= Version && " << MaxVersion
- << " >= Version;\n";
+ VersionedClause VerClause{C};
+
+ const auto ClauseFormattedName = VerClause.getClause().getFormattedName();
+
+ if (Cases.find(ClauseFormattedName) == Cases.end()) {
+ Cases.insert(ClauseFormattedName);
+ OS << " case " << DirLang.getClausePrefix() << ClauseFormattedName
+ << ":\n";
+ OS << " return " << VerClause.getMinVersion()
+ << " <= Version && " << VerClause.getMaxVersion() << " >= Version;\n";
}
}
}
// Generate the isAllowedClauseForDirective function implementation.
-void GenerateIsAllowedClause(const std::vector<Record *> &Directives,
- raw_ostream &OS, StringRef LanguageName,
- StringRef DirectivePrefix, StringRef ClausePrefix,
- StringRef CppNamespace) {
+void GenerateIsAllowedClause(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
OS << "\n";
- OS << "bool llvm::" << CppNamespace << "::isAllowedClauseForDirective("
+ OS << "bool llvm::" << DirLang.getCppNamespace()
+ << "::isAllowedClauseForDirective("
<< "Directive D, Clause C, unsigned Version) {\n";
- OS << " assert(unsigned(D) <= llvm::" << CppNamespace
+ OS << " assert(unsigned(D) <= llvm::" << DirLang.getCppNamespace()
<< "::Directive_enumSize);\n";
- OS << " assert(unsigned(C) <= llvm::" << CppNamespace
+ OS << " assert(unsigned(C) <= llvm::" << DirLang.getCppNamespace()
<< "::Clause_enumSize);\n";
OS << " switch (D) {\n";
- for (const auto &D : Directives) {
-
- const auto DirectiveName = D->getValueAsString("name");
- const auto &AllowedClauses = D->getValueAsListOfDefs("allowedClauses");
- const auto &AllowedOnceClauses =
- D->getValueAsListOfDefs("allowedOnceClauses");
- const auto &AllowedExclusiveClauses =
- D->getValueAsListOfDefs("allowedExclusiveClauses");
- const auto &RequiredClauses = D->getValueAsListOfDefs("requiredClauses");
+ for (const auto &D : DirLang.getDirectives()) {
+ Directive Dir{D};
- OS << " case " << DirectivePrefix << getFormattedName(DirectiveName)
+ OS << " case " << DirLang.getDirectivePrefix() << Dir.getFormattedName()
<< ":\n";
- if (AllowedClauses.size() == 0 && AllowedOnceClauses.size() == 0 &&
- AllowedExclusiveClauses.size() == 0 && RequiredClauses.size() == 0) {
+ if (Dir.getAllowedClauses().size() == 0 &&
+ Dir.getAllowedOnceClauses().size() == 0 &&
+ Dir.getAllowedExclusiveClauses().size() == 0 &&
+ Dir.getRequiredClauses().size() == 0) {
OS << " return false;\n";
} else {
OS << " switch (C) {\n";
llvm::StringSet<> Cases;
- GenerateCaseForVersionedClauses(AllowedClauses, OS, DirectiveName,
- DirectivePrefix, ClausePrefix, Cases);
+ GenerateCaseForVersionedClauses(Dir.getAllowedClauses(), OS,
+ Dir.getName(), DirLang, Cases);
- GenerateCaseForVersionedClauses(AllowedOnceClauses, OS, DirectiveName,
- DirectivePrefix, ClausePrefix, Cases);
+ GenerateCaseForVersionedClauses(Dir.getAllowedOnceClauses(), OS,
+ Dir.getName(), DirLang, Cases);
- GenerateCaseForVersionedClauses(AllowedExclusiveClauses, OS,
- DirectiveName, DirectivePrefix,
- ClausePrefix, Cases);
+ GenerateCaseForVersionedClauses(Dir.getAllowedExclusiveClauses(), OS,
+ Dir.getName(), DirLang, Cases);
- GenerateCaseForVersionedClauses(RequiredClauses, OS, DirectiveName,
- DirectivePrefix, ClausePrefix, Cases);
+ GenerateCaseForVersionedClauses(Dir.getRequiredClauses(), OS,
+ Dir.getName(), DirLang, Cases);
OS << " default:\n";
OS << " return false;\n";
@@ -297,37 +427,32 @@ void GenerateIsAllowedClause(const std::vector<Record *> &Directives,
}
OS << " }\n"; // End of directives switch
- OS << " llvm_unreachable(\"Invalid " << LanguageName
+ OS << " llvm_unreachable(\"Invalid " << DirLang.getName()
<< " Directive kind\");\n";
OS << "}\n"; // End of function isAllowedClauseForDirective
}
// Generate a simple enum set with the give clauses.
void GenerateClauseSet(const std::vector<Record *> &Clauses, raw_ostream &OS,
- StringRef ClauseEnumSetClass, StringRef ClauseSetPrefix,
- StringRef DirectiveName, StringRef DirectivePrefix,
- StringRef ClausePrefix, StringRef CppNamespace) {
+ StringRef ClauseSetPrefix, Directive &Dir,
+ const DirectiveLanguage &DirLang) {
OS << "\n";
- OS << " static " << ClauseEnumSetClass << " " << ClauseSetPrefix
- << DirectivePrefix << getFormattedName(DirectiveName) << " {\n";
+ OS << " static " << DirLang.getClauseEnumSetClass() << " " << ClauseSetPrefix
+ << DirLang.getDirectivePrefix() << Dir.getFormattedName() << " {\n";
for (const auto &C : Clauses) {
- const auto SpecificClause = C->getValueAsDef("clause");
- const auto ClauseName = SpecificClause->getValueAsString("name");
- OS << " llvm::" << CppNamespace << "::Clause::" << ClausePrefix
- << getFormattedName(ClauseName) << ",\n";
+ VersionedClause VerClause{C};
+ OS << " llvm::" << DirLang.getCppNamespace()
+ << "::Clause::" << DirLang.getClausePrefix()
+ << VerClause.getClause().getFormattedName() << ",\n";
}
OS << " };\n";
}
// Generate an enum set for the 4 kinds of clauses linked to a directive.
-void GenerateDirectiveClauseSets(const std::vector<Record *> &Directives,
- raw_ostream &OS, StringRef LanguageName,
- StringRef ClauseEnumSetClass,
- StringRef DirectivePrefix,
- StringRef ClausePrefix,
- StringRef CppNamespace) {
+void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_SETS", OS);
@@ -336,35 +461,24 @@ void GenerateDirectiveClauseSets(const std::vector<Record *> &Directives,
// Open namespaces defined in the directive language.
llvm::SmallVector<StringRef, 2> Namespaces;
- llvm::SplitString(CppNamespace, Namespaces, "::");
+ llvm::SplitString(DirLang.getCppNamespace(), Namespaces, "::");
for (auto Ns : Namespaces)
OS << "namespace " << Ns << " {\n";
- for (const auto &D : Directives) {
- const auto DirectiveName = D->getValueAsString("name");
-
- const auto &AllowedClauses = D->getValueAsListOfDefs("allowedClauses");
- const auto &AllowedOnceClauses =
- D->getValueAsListOfDefs("allowedOnceClauses");
- const auto &AllowedExclusiveClauses =
- D->getValueAsListOfDefs("allowedExclusiveClauses");
- const auto &RequiredClauses = D->getValueAsListOfDefs("requiredClauses");
+ for (const auto &D : DirLang.getDirectives()) {
+ Directive Dir{D};
OS << "\n";
- OS << " // Sets for " << DirectiveName << "\n";
-
- GenerateClauseSet(AllowedClauses, OS, ClauseEnumSetClass, "allowedClauses_",
- DirectiveName, DirectivePrefix, ClausePrefix,
- CppNamespace);
- GenerateClauseSet(AllowedOnceClauses, OS, ClauseEnumSetClass,
- "allowedOnceClauses_", DirectiveName, DirectivePrefix,
- ClausePrefix, CppNamespace);
- GenerateClauseSet(AllowedExclusiveClauses, OS, ClauseEnumSetClass,
- "allowedExclusiveClauses_", DirectiveName,
- DirectivePrefix, ClausePrefix, CppNamespace);
- GenerateClauseSet(RequiredClauses, OS, ClauseEnumSetClass,
- "requiredClauses_", DirectiveName, DirectivePrefix,
- ClausePrefix, CppNamespace);
+ OS << " // Sets for " << Dir.getName() << "\n";
+
+ GenerateClauseSet(Dir.getAllowedClauses(), OS, "allowedClauses_", Dir,
+ DirLang);
+ GenerateClauseSet(Dir.getAllowedOnceClauses(), OS, "allowedOnceClauses_",
+ Dir, DirLang);
+ GenerateClauseSet(Dir.getAllowedExclusiveClauses(), OS,
+ "allowedExclusiveClauses_", Dir, DirLang);
+ GenerateClauseSet(Dir.getRequiredClauses(), OS, "requiredClauses_", Dir,
+ DirLang);
}
// Closing namespaces
@@ -377,118 +491,253 @@ void GenerateDirectiveClauseSets(const std::vector<Record *> &Directives,
// Generate a map of directive (key) with DirectiveClauses struct as values.
// The struct holds the 4 sets of enumeration for the 4 kinds of clauses
// allowances (allowed, allowed once, allowed exclusive and required).
-void GenerateDirectiveClauseMap(const std::vector<Record *> &Directives,
- raw_ostream &OS, StringRef LanguageName,
- StringRef ClauseEnumSetClass,
- StringRef DirectivePrefix,
- StringRef ClausePrefix,
- StringRef CppNamespace) {
+void GenerateDirectiveClauseMap(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_MAP", OS);
OS << "\n";
- OS << "struct " << LanguageName << "DirectiveClauses {\n";
- OS << " const " << ClauseEnumSetClass << " allowed;\n";
- OS << " const " << ClauseEnumSetClass << " allowedOnce;\n";
- OS << " const " << ClauseEnumSetClass << " allowedExclusive;\n";
- OS << " const " << ClauseEnumSetClass << " requiredOneOf;\n";
- OS << "};\n";
+ OS << "{\n";
- OS << "\n";
-
- OS << "std::unordered_map<llvm::" << CppNamespace << "::Directive, "
- << LanguageName << "DirectiveClauses>\n";
- OS << " directiveClausesTable = {\n";
-
- for (const auto &D : Directives) {
- const auto FormattedDirectiveName =
- getFormattedName(D->getValueAsString("name"));
- OS << " {llvm::" << CppNamespace << "::Directive::" << DirectivePrefix
- << FormattedDirectiveName << ",\n";
+ for (const auto &D : DirLang.getDirectives()) {
+ Directive Dir{D};
+ OS << " {llvm::" << DirLang.getCppNamespace()
+ << "::Directive::" << DirLang.getDirectivePrefix()
+ << Dir.getFormattedName() << ",\n";
OS << " {\n";
- OS << " llvm::" << CppNamespace << "::allowedClauses_"
- << DirectivePrefix << FormattedDirectiveName << ",\n";
- OS << " llvm::" << CppNamespace << "::allowedOnceClauses_"
- << DirectivePrefix << FormattedDirectiveName << ",\n";
- OS << " llvm::" << CppNamespace << "::allowedExclusiveClauses_"
- << DirectivePrefix << FormattedDirectiveName << ",\n";
- OS << " llvm::" << CppNamespace << "::requiredClauses_"
- << DirectivePrefix << FormattedDirectiveName << ",\n";
+ OS << " llvm::" << DirLang.getCppNamespace() << "::allowedClauses_"
+ << DirLang.getDirectivePrefix() << Dir.getFormattedName() << ",\n";
+ OS << " llvm::" << DirLang.getCppNamespace() << "::allowedOnceClauses_"
+ << DirLang.getDirectivePrefix() << Dir.getFormattedName() << ",\n";
+ OS << " llvm::" << DirLang.getCppNamespace()
+ << "::allowedExclusiveClauses_" << DirLang.getDirectivePrefix()
+ << Dir.getFormattedName() << ",\n";
+ OS << " llvm::" << DirLang.getCppNamespace() << "::requiredClauses_"
+ << DirLang.getDirectivePrefix() << Dir.getFormattedName() << ",\n";
OS << " }\n";
OS << " },\n";
}
- OS << "};\n";
+ OS << "}\n";
}
-// Generate the implemenation section for the enumeration in the directive
-// language
-void EmitDirectivesFlangImpl(const std::vector<Record *> &Directives,
- raw_ostream &OS, StringRef LanguageName,
- StringRef ClauseEnumSetClass,
- StringRef DirectivePrefix, StringRef ClausePrefix,
- StringRef CppNamespace) {
+// Generate classes entry for Flang clauses in the Flang parse-tree
+// If the clause as a non-generic class, no entry is generated.
+// If the clause does not hold a value, an EMPTY_CLASS is used.
+// If the clause class is generic then a WRAPPER_CLASS is used. When the value
+// is optional, the value class is wrapped into a std::optional.
+void GenerateFlangClauseParserClass(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
- GenerateDirectiveClauseSets(Directives, OS, LanguageName, ClauseEnumSetClass,
- DirectivePrefix, ClausePrefix, CppNamespace);
+ IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_CLASSES", OS);
+
+ OS << "\n";
- GenerateDirectiveClauseMap(Directives, OS, LanguageName, ClauseEnumSetClass,
- DirectivePrefix, ClausePrefix, CppNamespace);
+ for (const auto &C : DirLang.getClauses()) {
+ Clause Clause{C};
+ if (!Clause.getFlangClass().empty()) {
+ OS << "WRAPPER_CLASS(" << Clause.getFormattedParserClassName() << ", ";
+ if (Clause.isValueOptional() && Clause.isValueList()) {
+ OS << "std::optional<std::list<" << Clause.getFlangClass() << ">>";
+ } else if (Clause.isValueOptional()) {
+ OS << "std::optional<" << Clause.getFlangClass() << ">";
+ } else if (Clause.isValueList()) {
+ OS << "std::list<" << Clause.getFlangClass() << ">";
+ } else {
+ OS << Clause.getFlangClass();
+ }
+ } else {
+ OS << "EMPTY_CLASS(" << Clause.getFormattedParserClassName();
+ }
+ OS << ");\n";
+ }
}
-// Generate the implemenation section for the enumeration in the directive
-// language.
-void EmitDirectivesGen(RecordKeeper &Records, raw_ostream &OS) {
+// Generate a list of the different clause classes for Flang.
+void GenerateFlangClauseParserClassList(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
- const auto &DirectiveLanguages =
- Records.getAllDerivedDefinitions("DirectiveLanguage");
+ IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_CLASSES_LIST", OS);
- if (DirectiveLanguages.size() != 1) {
- PrintError("A single definition of DirectiveLanguage is needed.");
- return;
+ OS << "\n";
+ llvm::interleaveComma(DirLang.getClauses(), OS, [&](Record *C) {
+ Clause Clause{C};
+ OS << Clause.getFormattedParserClassName() << "\n";
+ });
+}
+
+// Generate dump node list for the clauses holding a generic class name.
+void GenerateFlangClauseDump(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
+
+ IfDefScope Scope("GEN_FLANG_DUMP_PARSE_TREE_CLAUSES", OS);
+
+ OS << "\n";
+ for (const auto &C : DirLang.getClauses()) {
+ Clause Clause{C};
+ OS << "NODE(" << DirLang.getFlangClauseBaseClass() << ", "
+ << Clause.getFormattedParserClassName() << ")\n";
}
+}
- const auto &DirectiveLanguage = DirectiveLanguages[0];
- StringRef DirectivePrefix =
- DirectiveLanguage->getValueAsString("directivePrefix");
- StringRef LanguageName = DirectiveLanguage->getValueAsString("name");
- StringRef ClausePrefix = DirectiveLanguage->getValueAsString("clausePrefix");
- StringRef CppNamespace = DirectiveLanguage->getValueAsString("cppNamespace");
- StringRef ClauseEnumSetClass =
- DirectiveLanguage->getValueAsString("clauseEnumSetClass");
+// Generate Unparse functions for clauses classes in the Flang parse-tree
+// If the clause is a non-generic class, no entry is generated.
+void GenerateFlangClauseUnparse(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
- const auto &Directives = Records.getAllDerivedDefinitions("Directive");
+ IfDefScope Scope("GEN_FLANG_CLAUSE_UNPARSE", OS);
- EmitDirectivesFlangImpl(Directives, OS, LanguageName, ClauseEnumSetClass,
- DirectivePrefix, ClausePrefix, CppNamespace);
+ OS << "\n";
+
+ for (const auto &C : DirLang.getClauses()) {
+ Clause Clause{C};
+ if (!Clause.getFlangClass().empty()) {
+ if (Clause.isValueOptional() && Clause.getDefaultValue().empty()) {
+ OS << "void Unparse(const " << DirLang.getFlangClauseBaseClass()
+ << "::" << Clause.getFormattedParserClassName() << " &x) {\n";
+ OS << " Word(\"" << Clause.getName().upper() << "\");\n";
+
+ OS << " Walk(\"(\", x.v, \")\");\n";
+ OS << "}\n";
+ } else if (Clause.isValueOptional()) {
+ OS << "void Unparse(const " << DirLang.getFlangClauseBaseClass()
+ << "::" << Clause.getFormattedParserClassName() << " &x) {\n";
+ OS << " Word(\"" << Clause.getName().upper() << "\");\n";
+ OS << " Put(\"(\");\n";
+ OS << " if (x.v.has_value())\n";
+ if (Clause.isValueList())
+ OS << " Walk(x.v, \",\");\n";
+ else
+ OS << " Walk(x.v);\n";
+ OS << " else\n";
+ OS << " Put(\"" << Clause.getDefaultValue() << "\");\n";
+ OS << " Put(\")\");\n";
+ OS << "}\n";
+ } else {
+ OS << "void Unparse(const " << DirLang.getFlangClauseBaseClass()
+ << "::" << Clause.getFormattedParserClassName() << " &x) {\n";
+ OS << " Word(\"" << Clause.getName().upper() << "\");\n";
+ OS << " Put(\"(\");\n";
+ if (Clause.isValueList())
+ OS << " Walk(x.v, \",\");\n";
+ else
+ OS << " Walk(x.v);\n";
+ OS << " Put(\")\");\n";
+ OS << "}\n";
+ }
+ } else {
+ OS << "void Before(const " << DirLang.getFlangClauseBaseClass()
+ << "::" << Clause.getFormattedParserClassName() << " &) { Word(\""
+ << Clause.getName().upper() << "\"); }\n";
+ }
+ }
}
-// Generate the implemenation for the enumeration in the directive
-// language. This code can be included in library.
-void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
+// Generate the implementation section for the enumeration in the directive
+// language
+void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
- const auto &DirectiveLanguages =
- Records.getAllDerivedDefinitions("DirectiveLanguage");
+ GenerateDirectiveClauseSets(DirLang, OS);
- if (DirectiveLanguages.size() != 1) {
- PrintError("A single definition of DirectiveLanguage is needed.");
- return;
+ GenerateDirectiveClauseMap(DirLang, OS);
+
+ GenerateFlangClauseParserClass(DirLang, OS);
+
+ GenerateFlangClauseParserClassList(DirLang, OS);
+
+ GenerateFlangClauseDump(DirLang, OS);
+
+ GenerateFlangClauseUnparse(DirLang, OS);
+}
+
+void GenerateClauseClassMacro(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
+ // Generate macros style information for legacy code in clang
+ IfDefScope Scope("GEN_CLANG_CLAUSE_CLASS", OS);
+
+ OS << "\n";
+
+ OS << "#ifndef CLAUSE\n";
+ OS << "#define CLAUSE(Enum, Str, Implicit)\n";
+ OS << "#endif\n";
+ OS << "#ifndef CLAUSE_CLASS\n";
+ OS << "#define CLAUSE_CLASS(Enum, Str, Class)\n";
+ OS << "#endif\n";
+ OS << "#ifndef CLAUSE_NO_CLASS\n";
+ OS << "#define CLAUSE_NO_CLASS(Enum, Str)\n";
+ OS << "#endif\n";
+ OS << "\n";
+ OS << "#define __CLAUSE(Name, Class) \\\n";
+ OS << " CLAUSE(" << DirLang.getClausePrefix()
+ << "##Name, #Name, /* Implicit */ false) \\\n";
+ OS << " CLAUSE_CLASS(" << DirLang.getClausePrefix()
+ << "##Name, #Name, Class)\n";
+ OS << "#define __CLAUSE_NO_CLASS(Name) \\\n";
+ OS << " CLAUSE(" << DirLang.getClausePrefix()
+ << "##Name, #Name, /* Implicit */ false) \\\n";
+ OS << " CLAUSE_NO_CLASS(" << DirLang.getClausePrefix() << "##Name, #Name)\n";
+ OS << "#define __IMPLICIT_CLAUSE_CLASS(Name, Str, Class) \\\n";
+ OS << " CLAUSE(" << DirLang.getClausePrefix()
+ << "##Name, Str, /* Implicit */ true) \\\n";
+ OS << " CLAUSE_CLASS(" << DirLang.getClausePrefix()
+ << "##Name, Str, Class)\n";
+ OS << "#define __IMPLICIT_CLAUSE_NO_CLASS(Name, Str) \\\n";
+ OS << " CLAUSE(" << DirLang.getClausePrefix()
+ << "##Name, Str, /* Implicit */ true) \\\n";
+ OS << " CLAUSE_NO_CLASS(" << DirLang.getClausePrefix() << "##Name, Str)\n";
+ OS << "\n";
+
+ for (const auto &R : DirLang.getClauses()) {
+ Clause C{R};
+ if (C.getClangClass().empty()) { // NO_CLASS
+ if (C.isImplicit()) {
+ OS << "__IMPLICIT_CLAUSE_NO_CLASS(" << C.getFormattedName() << ", \""
+ << C.getFormattedName() << "\")\n";
+ } else {
+ OS << "__CLAUSE_NO_CLASS(" << C.getFormattedName() << ")\n";
+ }
+ } else { // CLASS
+ if (C.isImplicit()) {
+ OS << "__IMPLICIT_CLAUSE_CLASS(" << C.getFormattedName() << ", \""
+ << C.getFormattedName() << "\", " << C.getClangClass() << ")\n";
+ } else {
+ OS << "__CLAUSE(" << C.getFormattedName() << ", " << C.getClangClass()
+ << ")\n";
+ }
+ }
}
- const auto &DirectiveLanguage = DirectiveLanguages[0];
- StringRef DirectivePrefix =
- DirectiveLanguage->getValueAsString("directivePrefix");
- StringRef LanguageName = DirectiveLanguage->getValueAsString("name");
- StringRef ClausePrefix = DirectiveLanguage->getValueAsString("clausePrefix");
- StringRef CppNamespace = DirectiveLanguage->getValueAsString("cppNamespace");
- const auto &Directives = Records.getAllDerivedDefinitions("Directive");
- const auto &Clauses = Records.getAllDerivedDefinitions("Clause");
+ OS << "\n";
+ OS << "#undef __IMPLICIT_CLAUSE_NO_CLASS\n";
+ OS << "#undef __IMPLICIT_CLAUSE_CLASS\n";
+ OS << "#undef __CLAUSE\n";
+ OS << "#undef CLAUSE_NO_CLASS\n";
+ OS << "#undef CLAUSE_CLASS\n";
+ OS << "#undef CLAUSE\n";
+}
+
+// Generate the implementation section for the enumeration in the directive
+// language.
+void EmitDirectivesGen(RecordKeeper &Records, raw_ostream &OS) {
+ const auto DirLang = DirectiveLanguage{Records};
+ if (DirLang.HasValidityErrors())
+ return;
+
+ EmitDirectivesFlangImpl(DirLang, OS);
- StringRef IncludeHeader =
- DirectiveLanguage->getValueAsString("includeHeader");
+ GenerateClauseClassMacro(DirLang, OS);
+}
- if (!IncludeHeader.empty())
- OS << "#include \"" << IncludeHeader << "\"\n\n";
+// Generate the implementation for the enumeration in the directive
+// language. This code can be included in library.
+void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
+ const auto DirLang = DirectiveLanguage{Records};
+ if (DirLang.HasValidityErrors())
+ return;
+
+ if (!DirLang.getIncludeHeader().empty())
+ OS << "#include \"" << DirLang.getIncludeHeader() << "\"\n\n";
OS << "#include \"llvm/ADT/StringRef.h\"\n";
OS << "#include \"llvm/ADT/StringSwitch.h\"\n";
@@ -496,29 +745,32 @@ void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
OS << "\n";
OS << "using namespace llvm;\n";
llvm::SmallVector<StringRef, 2> Namespaces;
- llvm::SplitString(CppNamespace, Namespaces, "::");
+ llvm::SplitString(DirLang.getCppNamespace(), Namespaces, "::");
for (auto Ns : Namespaces)
OS << "using namespace " << Ns << ";\n";
// getDirectiveKind(StringRef Str)
- GenerateGetKind(Directives, OS, "Directive", DirectivePrefix, LanguageName,
- CppNamespace, /*ImplicitAsUnknown=*/false);
+ GenerateGetKind(DirLang.getDirectives(), OS, "Directive", DirLang,
+ DirLang.getDirectivePrefix(), /*ImplicitAsUnknown=*/false);
// getDirectiveName(Directive Kind)
- GenerateGetName(Directives, OS, "Directive", DirectivePrefix, LanguageName,
- CppNamespace);
+ GenerateGetName(DirLang.getDirectives(), OS, "Directive", DirLang,
+ DirLang.getDirectivePrefix());
// getClauseKind(StringRef Str)
- GenerateGetKind(Clauses, OS, "Clause", ClausePrefix, LanguageName,
- CppNamespace, /*ImplicitAsUnknown=*/true);
+ GenerateGetKind(DirLang.getClauses(), OS, "Clause", DirLang,
+ DirLang.getClausePrefix(),
+ /*ImplicitAsUnknown=*/true);
// getClauseName(Clause Kind)
- GenerateGetName(Clauses, OS, "Clause", ClausePrefix, LanguageName,
- CppNamespace);
+ GenerateGetName(DirLang.getClauses(), OS, "Clause", DirLang,
+ DirLang.getClausePrefix());
+
+ // get<ClauseVal>Kind(StringRef Str)
+ GenerateGetKindClauseVal(DirLang, OS);
// isAllowedClauseForDirective(Directive D, Clause C, unsigned Version)
- GenerateIsAllowedClause(Directives, OS, LanguageName, DirectivePrefix,
- ClausePrefix, CppNamespace);
+ GenerateIsAllowedClause(DirLang, OS);
}
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/utils/TableGen/ExegesisEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/ExegesisEmitter.cpp
index 8f784e4a4121..4e532c371691 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/ExegesisEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/ExegesisEmitter.cpp
@@ -144,7 +144,7 @@ void ExegesisEmitter::emitPfmCountersInfo(const Record &Def,
void ExegesisEmitter::emitPfmCounters(raw_ostream &OS) const {
// Emit the counter name table.
- OS << "\nstatic const char* " << Target << "PfmCounterNames[] = {\n";
+ OS << "\nstatic const char *" << Target << "PfmCounterNames[] = {\n";
for (const auto &NameAndIndex : PfmCounterNameTable)
OS << " \"" << NameAndIndex.first << "\", // " << NameAndIndex.second
<< "\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
index 88d210f7fd39..01b39df055ad 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -226,6 +226,8 @@ typedef std::vector<bit_value_t> insn_t;
namespace {
+static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL;
+
class FilterChooser;
/// Filter - Filter works with FilterChooser to produce the decoding tree for
@@ -279,7 +281,7 @@ protected:
std::vector<EncodingIDAndOpcode> VariableInstructions;
// Map of well-known segment value to its delegate.
- std::map<unsigned, std::unique_ptr<const FilterChooser>> FilterChooserMap;
+ std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
// Number of instructions which fall under FilteredInstructions category.
unsigned NumFiltered;
@@ -305,7 +307,7 @@ public:
const FilterChooser &getVariableFC() const {
assert(NumFiltered == 1);
assert(FilterChooserMap.size() == 1);
- return *(FilterChooserMap.find((unsigned)-1)->second);
+ return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second);
}
// Divides the decoding task into sub tasks and delegates them to the
@@ -602,10 +604,9 @@ void Filter::recurse() {
// Delegates to an inferior filter chooser for further processing on this
// group of instructions whose segment values are variable.
- FilterChooserMap.insert(
- std::make_pair(-1U, std::make_unique<FilterChooser>(
- Owner->AllInstructions, VariableInstructions,
- Owner->Operands, BitValueArray, *Owner)));
+ FilterChooserMap.insert(std::make_pair(NO_FIXED_SEGMENTS_SENTINEL,
+ std::make_unique<FilterChooser>(Owner->AllInstructions,
+ VariableInstructions, Owner->Operands, BitValueArray, *Owner)));
}
// No need to recurse for a singleton filtered instruction.
@@ -674,7 +675,7 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
for (auto &Filter : FilterChooserMap) {
// Field value -1 implies a non-empty set of variable instructions.
// See also recurse().
- if (Filter.first == (unsigned)-1) {
+ if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) {
HasFallthrough = true;
// Each scope should always have at least one filter value to check
@@ -721,7 +722,7 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!");
FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1;
FixupScopeList::iterator Dest = Source - 1;
- Dest->insert(Dest->end(), Source->begin(), Source->end());
+ llvm::append_range(*Dest, *Source);
TableInfo.FixupStack.pop_back();
// If there is no fallthrough, then the final filter should get fixed
@@ -941,7 +942,7 @@ emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates,
// The predicate function is just a big switch statement based on the
// input predicate index.
OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, "
- << "const FeatureBitset& Bits) {\n";
+ << "const FeatureBitset &Bits) {\n";
Indentation += 2;
if (!Predicates.empty()) {
OS.indent(Indentation) << "switch (Idx) {\n";
@@ -965,7 +966,7 @@ emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
unsigned Indentation) const {
// The decoder function is just a big switch statement based on the
// input decoder index.
- OS.indent(Indentation) << "template<typename InsnType>\n";
+ OS.indent(Indentation) << "template <typename InsnType>\n";
OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
<< " unsigned Idx, InsnType insn, MCInst &MI,\n";
OS.indent(Indentation) << " uint64_t "
@@ -1192,7 +1193,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
if (!Pred->getValue("AssemblerMatcherPredicate"))
continue;
- if (!dyn_cast<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
+ if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
continue;
const DagInit *D = Pred->getValueAsDag("AssemblerCondDag");
@@ -1886,7 +1887,7 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
// Ignore fixed fields in the record, we're looking for values like:
// bits<5> RST = { ?, ?, ?, ?, ? };
- if (Vals[i].getPrefix() || Vals[i].getValue()->isComplete())
+ if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete())
continue;
// Determine if Vals[i] actually contributes to the Inst encoding.
@@ -2009,9 +2010,8 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
// For each operand, see if we can figure out where it is encoded.
for (const auto &Op : InOutOperands) {
if (!NumberedInsnOperands[std::string(Op.second)].empty()) {
- InsnOperands.insert(InsnOperands.end(),
- NumberedInsnOperands[std::string(Op.second)].begin(),
- NumberedInsnOperands[std::string(Op.second)].end());
+ llvm::append_range(InsnOperands,
+ NumberedInsnOperands[std::string(Op.second)]);
continue;
}
if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) {
@@ -2162,7 +2162,7 @@ static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
<< "// * Support shift (<<, >>) with signed and unsigned integers on the "
"RHS\n"
<< "// * Support put (<<) to raw_ostream&\n"
- << "template<typename InsnType>\n"
+ << "template <typename InsnType>\n"
<< "#if defined(_MSC_VER) && !defined(__clang__)\n"
<< "__declspec(noinline)\n"
<< "#endif\n"
@@ -2182,7 +2182,7 @@ static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
<< " return (insn & fieldMask) >> startBit;\n"
<< "}\n"
<< "\n"
- << "template<typename InsnType>\n"
+ << "template <typename InsnType>\n"
<< "static InsnType fieldFromInstruction(InsnType insn, unsigned "
"startBit,\n"
<< " unsigned numBits, "
@@ -2193,7 +2193,7 @@ static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
<< " return (insn >> startBit) & fieldMask;\n"
<< "}\n"
<< "\n"
- << "template<typename InsnType>\n"
+ << "template <typename InsnType>\n"
<< "static InsnType fieldFromInstruction(InsnType insn, unsigned "
"startBit,\n"
<< " unsigned numBits) {\n"
@@ -2205,14 +2205,14 @@ static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
// emitDecodeInstruction - Emit the templated helper function
// decodeInstruction().
static void emitDecodeInstruction(formatted_raw_ostream &OS) {
- OS << "template<typename InsnType>\n"
+ OS << "template <typename InsnType>\n"
<< "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], "
"MCInst &MI,\n"
<< " InsnType insn, uint64_t "
"Address,\n"
<< " const void *DisAsm,\n"
<< " const MCSubtargetInfo &STI) {\n"
- << " const FeatureBitset& Bits = STI.getFeatureBits();\n"
+ << " const FeatureBitset &Bits = STI.getFeatureBits();\n"
<< "\n"
<< " const uint8_t *Ptr = DecodeTable;\n"
<< " InsnType CurFieldValue = 0;\n"
@@ -2374,7 +2374,7 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
<< " if (Fail)\n"
<< " S = MCDisassembler::SoftFail;\n"
<< " LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? "
- "\"FAIL\\n\":\"PASS\\n\"));\n"
+ "\"FAIL\\n\" : \"PASS\\n\"));\n"
<< " break;\n"
<< " }\n"
<< " case MCD::OPC_Fail: {\n"
@@ -2392,8 +2392,8 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
void FixedLenDecoderEmitter::run(raw_ostream &o) {
formatted_raw_ostream OS(o);
OS << "#include \"llvm/MC/MCInst.h\"\n";
- OS << "#include \"llvm/Support/Debug.h\"\n";
OS << "#include \"llvm/Support/DataTypes.h\"\n";
+ OS << "#include \"llvm/Support/Debug.h\"\n";
OS << "#include \"llvm/Support/LEB128.h\"\n";
OS << "#include \"llvm/Support/raw_ostream.h\"\n";
OS << "#include <assert.h>\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp
index e2a670070ae7..ab00cff63998 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp
@@ -150,7 +150,7 @@ protected:
/// A block of arbitrary C++ to finish testing the match.
/// FIXME: This is a temporary measure until we have actual pattern matching
- const CodeInit *MatchingFixupCode = nullptr;
+ const StringInit *MatchingFixupCode = nullptr;
/// The MatchData defined by the match stage and required by the apply stage.
/// This allows the plumbing of arbitrary data from C++ predicates between the
@@ -199,7 +199,7 @@ public:
unsigned allocUID() { return UID++; }
StringRef getName() const { return TheDef.getName(); }
const Record &getDef() const { return TheDef; }
- const CodeInit *getMatchingFixupCode() const { return MatchingFixupCode; }
+ const StringInit *getMatchingFixupCode() const { return MatchingFixupCode; }
size_t getNumRoots() const { return Roots.size(); }
GIMatchDag &getMatchDag() { return MatchDag; }
@@ -346,8 +346,6 @@ void CombineRule::declareMatchData(StringRef PatternSymbol, StringRef Type,
}
bool CombineRule::parseDefs() {
- NamedRegionTimer T("parseDefs", "Time spent parsing the defs", "Rule Parsing",
- "Time spent on rule parsing", TimeRegions);
DagInit *Defs = TheDef.getValueAsDag("Defs");
if (Defs->getOperatorAsDef(TheDef.getLoc())->getName() != "defs") {
@@ -434,9 +432,9 @@ bool CombineRule::parseInstructionMatcher(
}
if (InstrOperand.isDef()) {
- if (find_if(Roots, [&](const RootInfo &X) {
+ if (any_of(Roots, [&](const RootInfo &X) {
return X.getPatternSymbol() == Name;
- }) != Roots.end()) {
+ })) {
N->setMatchRoot();
}
}
@@ -462,9 +460,9 @@ bool CombineRule::parseWipMatchOpcodeMatcher(const CodeGenTarget &Target,
MatchDag.addInstrNode(makeDebugName(*this, Name), insertStrTab(Name),
MatchDag.getContext().makeEmptyOperandList());
- if (find_if(Roots, [&](const RootInfo &X) {
+ if (any_of(Roots, [&](const RootInfo &X) {
return ArgName && X.getPatternSymbol() == ArgName->getValue();
- }) != Roots.end()) {
+ })) {
N->setMatchRoot();
}
@@ -488,8 +486,6 @@ bool CombineRule::parseWipMatchOpcodeMatcher(const CodeGenTarget &Target,
return false;
}
bool CombineRule::parseMatcher(const CodeGenTarget &Target) {
- NamedRegionTimer T("parseMatcher", "Time spent parsing the matcher",
- "Rule Parsing", "Time spent on rule parsing", TimeRegions);
StringMap<std::vector<VarInfo>> NamedEdgeDefs;
StringMap<std::vector<VarInfo>> NamedEdgeUses;
DagInit *Matchers = TheDef.getValueAsDag("Match");
@@ -518,10 +514,10 @@ bool CombineRule::parseMatcher(const CodeGenTarget &Target) {
// Parse arbitrary C++ code we have in lieu of supporting MIR matching
- if (const CodeInit *CodeI = dyn_cast<CodeInit>(Matchers->getArg(I))) {
+ if (const StringInit *StringI = dyn_cast<StringInit>(Matchers->getArg(I))) {
assert(!MatchingFixupCode &&
"Only one block of arbitrary code is currently permitted");
- MatchingFixupCode = CodeI;
+ MatchingFixupCode = StringI;
MatchDag.setHasPostMatchPredicate(true);
continue;
}
@@ -593,6 +589,7 @@ bool CombineRule::parseMatcher(const CodeGenTarget &Target) {
}
class GICombinerEmitter {
+ RecordKeeper &Records;
StringRef Name;
const CodeGenTarget &Target;
Record *Combiner;
@@ -618,7 +615,6 @@ public:
/// response to the generated cl::opt.
void emitNameMatcher(raw_ostream &OS) const;
- void generateDeclarationsCodeForTree(raw_ostream &OS, const GIMatchTree &Tree) const;
void generateCodeForTree(raw_ostream &OS, const GIMatchTree &Tree,
StringRef Indent) const;
};
@@ -626,7 +622,7 @@ public:
GICombinerEmitter::GICombinerEmitter(RecordKeeper &RK,
const CodeGenTarget &Target,
StringRef Name, Record *Combiner)
- : Name(Name), Target(Target), Combiner(Combiner) {}
+ : Records(RK), Name(Name), Target(Target), Combiner(Combiner) {}
void GICombinerEmitter::emitNameMatcher(raw_ostream &OS) const {
std::vector<std::pair<std::string, std::string>> Cases;
@@ -762,7 +758,7 @@ void GICombinerEmitter::generateCodeForTree(raw_ostream &OS,
DagInit *Applyer = RuleDef.getValueAsDag("Apply");
if (Applyer->getOperatorAsDef(RuleDef.getLoc())->getName() !=
"apply") {
- PrintError(RuleDef.getLoc(), "Expected apply operator");
+ PrintError(RuleDef.getLoc(), "Expected 'apply' operator in Apply DAG");
return;
}
@@ -803,16 +799,16 @@ void GICombinerEmitter::generateCodeForTree(raw_ostream &OS,
OS << Indent << " && [&]() {\n"
<< Indent << " "
<< CodeExpander(Rule->getMatchingFixupCode()->getValue(), Expansions,
- Rule->getMatchingFixupCode()->getLoc(), ShowExpansions)
+ RuleDef.getLoc(), ShowExpansions)
<< "\n"
<< Indent << " return true;\n"
<< Indent << " }()";
}
OS << ") {\n" << Indent << " ";
- if (const CodeInit *Code = dyn_cast<CodeInit>(Applyer->getArg(0))) {
+ if (const StringInit *Code = dyn_cast<StringInit>(Applyer->getArg(0))) {
OS << CodeExpander(Code->getAsUnquotedString(), Expansions,
- Code->getLoc(), ShowExpansions)
+ RuleDef.getLoc(), ShowExpansions)
<< "\n"
<< Indent << " return true;\n"
<< Indent << " }\n";
@@ -850,6 +846,7 @@ static void emitAdditionalHelperMethodArguments(raw_ostream &OS,
}
void GICombinerEmitter::run(raw_ostream &OS) {
+ Records.startTimer("Gather rules");
gatherRules(Rules, Combiner->getValueAsListOfDefs("Rules"));
if (StopAfterParse) {
MatchDagCtx.print(errs());
@@ -861,11 +858,8 @@ void GICombinerEmitter::run(raw_ostream &OS) {
PrintFatalError(Combiner->getLoc(), "Failed to parse one or more rules");
LLVM_DEBUG(dbgs() << "Optimizing tree for " << Rules.size() << " rules\n");
std::unique_ptr<GIMatchTree> Tree;
+ Records.startTimer("Optimize combiner");
{
- NamedRegionTimer T("Optimize", "Time spent optimizing the combiner",
- "Code Generation", "Time spent generating code",
- TimeRegions);
-
GIMatchTreeBuilder TreeBuilder(0);
for (const auto &Rule : Rules) {
bool HadARoot = false;
@@ -887,9 +881,7 @@ void GICombinerEmitter::run(raw_ostream &OS) {
return;
}
- NamedRegionTimer T("Emit", "Time spent emitting the combiner",
- "Code Generation", "Time spent generating code",
- TimeRegions);
+ Records.startTimer("Emit combiner");
OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_DEPS\n"
<< "#include \"llvm/ADT/SparseBitVector.h\"\n"
<< "namespace llvm {\n"
@@ -906,7 +898,6 @@ void GICombinerEmitter::run(raw_ostream &OS) {
<< " bool isRuleDisabled(unsigned ID) const;\n"
<< " bool setRuleEnabled(StringRef RuleIdentifier);\n"
<< " bool setRuleDisabled(StringRef RuleIdentifier);\n"
- << "\n"
<< "};\n"
<< "\n"
<< "class " << getClassName();
@@ -914,10 +905,10 @@ void GICombinerEmitter::run(raw_ostream &OS) {
if (!StateClass.empty())
OS << " : public " << StateClass;
OS << " {\n"
- << " const " << getClassName() << "RuleConfig *RuleConfig;\n"
+ << " const " << getClassName() << "RuleConfig *RuleConfig;\n"
<< "\n"
<< "public:\n"
- << " template<typename ... Args>" << getClassName() << "(const "
+ << " template <typename... Args>" << getClassName() << "(const "
<< getClassName() << "RuleConfig &RuleConfig, Args &&... args) : ";
if (!StateClass.empty())
OS << StateClass << "(std::forward<Args>(args)...), ";
@@ -947,9 +938,9 @@ void GICombinerEmitter::run(raw_ostream &OS) {
<< " if (First >= Last)\n"
<< " report_fatal_error(\"Beginning of range should be before "
"end of range\");\n"
- << " return {{ *First, *Last + 1 }};\n"
+ << " return {{*First, *Last + 1}};\n"
<< " } else if (RangePair.first == \"*\") {\n"
- << " return {{ 0, " << Rules.size() << " }};\n"
+ << " return {{0, " << Rules.size() << "}};\n"
<< " } else {\n"
<< " const auto I = getRuleIdxForIdentifier(RangePair.first);\n"
<< " if (!I.hasValue())\n"
@@ -963,7 +954,7 @@ void GICombinerEmitter::run(raw_ostream &OS) {
OS << "bool " << getClassName() << "RuleConfig::setRule"
<< (Enabled ? "Enabled" : "Disabled") << "(StringRef RuleIdentifier) {\n"
<< " auto MaybeRange = getRuleRangeForIdentifier(RuleIdentifier);\n"
- << " if(!MaybeRange.hasValue())\n"
+ << " if (!MaybeRange.hasValue())\n"
<< " return false;\n"
<< " for (auto I = MaybeRange->first; I < MaybeRange->second; ++I)\n"
<< " DisabledRules." << (Enabled ? "reset" : "set") << "(I);\n"
@@ -1026,7 +1017,7 @@ void GICombinerEmitter::run(raw_ostream &OS) {
<< " MachineBasicBlock *MBB = MI.getParent();\n"
<< " MachineFunction *MF = MBB->getParent();\n"
<< " MachineRegisterInfo &MRI = MF->getRegInfo();\n"
- << " SmallVector<MachineInstr *, 8> MIs = { &MI };\n\n"
+ << " SmallVector<MachineInstr *, 8> MIs = {&MI};\n\n"
<< " (void)MBB; (void)MF; (void)MRI; (void)RuleConfig;\n\n";
OS << " // Match data\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp
index d59a9b8e3b65..3ebb293f466e 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp
@@ -58,21 +58,15 @@ void CodeExpander::emit(raw_ostream &OS) const {
// Warn if we split because no terminator was found.
StringRef EndVar = StartVar.drop_front(2 /* ${ */ + Var.size());
if (EndVar.empty()) {
- size_t LocOffset = StartVar.data() - Code.data();
- PrintWarning(
- Loc.size() > 0 && Loc[0].isValid()
- ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset)
- : SMLoc(),
- "Unterminated expansion");
+ PrintWarning(Loc, "Unterminated expansion '${" + Var + "'");
+ PrintNote("Code: [{" + Code + "}]");
}
auto ValueI = Expansions.find(Var);
if (ValueI == Expansions.end()) {
- size_t LocOffset = StartVar.data() - Code.data();
- PrintError(Loc.size() > 0 && Loc[0].isValid()
- ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset)
- : SMLoc(),
- "Attempting to expand an undeclared variable " + Var);
+ PrintError(Loc,
+ "Attempt to expand an undeclared variable '" + Var + "'");
+ PrintNote("Code: [{" + Code + "}]");
}
if (ShowExpansions)
OS << "/*$" << Var << "{*/";
@@ -82,11 +76,8 @@ void CodeExpander::emit(raw_ostream &OS) const {
continue;
}
- size_t LocOffset = Current.data() - Code.data();
- PrintWarning(Loc.size() > 0 && Loc[0].isValid()
- ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset)
- : SMLoc(),
- "Assuming missing escape character");
+ PrintWarning(Loc, "Assuming missing escape character: \\$");
+ PrintNote("Code: [{" + Code + "}]");
OS << "$";
Current = Current.drop_front(1);
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp
index a3a9b7d8b037..7e037dd03b60 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp
@@ -41,7 +41,7 @@ void GIMatchDag::writeDOTGraph(raw_ostream &OS, StringRef ID) const {
SmallVector<std::pair<unsigned, StringRef>, 8> ToPrint;
for (const auto &Assignment : N->user_assigned_operand_names())
ToPrint.emplace_back(Assignment.first, Assignment.second);
- llvm::sort(ToPrint.begin(), ToPrint.end());
+ llvm::sort(ToPrint);
StringRef Separator = "";
for (const auto &Assignment : ToPrint) {
OS << Separator << "$" << Assignment.second << "=getOperand("
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp
index 218b741be20c..ad9fbea8f881 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp
@@ -27,7 +27,7 @@ void GIMatchDagInstr::print(raw_ostream &OS) const {
SmallVector<std::pair<unsigned, StringRef>, 8> ToPrint;
for (const auto &Assignment : UserAssignedNamesForOperands)
ToPrint.emplace_back(Assignment.first, Assignment.second);
- llvm::sort(ToPrint.begin(), ToPrint.end());
+ llvm::sort(ToPrint);
StringRef Separator = "";
for (const auto &Assignment : ToPrint) {
OS << Separator << "$" << Assignment.second << "=getOperand("
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
index 96dc4fc94893..d08a83333c30 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
@@ -121,8 +121,7 @@ void GIMatchTreeBuilderLeafInfo::declareInstr(const GIMatchDagInstr *Instr, unsi
Info.bindOperandVariable(VarBinding.second, ID, VarBinding.first);
// Clear the bit indicating we haven't visited this instr.
- const auto &NodeI = std::find(MatchDag.instr_nodes_begin(),
- MatchDag.instr_nodes_end(), Instr);
+ const auto &NodeI = find(MatchDag.instr_nodes(), Instr);
assert(NodeI != MatchDag.instr_nodes_end() && "Instr isn't in this DAG");
unsigned InstrIdx = MatchDag.getInstrNodeIdx(NodeI);
RemainingInstrNodes.reset(InstrIdx);
@@ -266,11 +265,10 @@ void GIMatchTreeBuilder::runStep() {
LLVM_DEBUG(dbgs() << "Leaf contains multiple rules, drop after the first "
"fully tested rule\n");
auto FirstFullyTested =
- std::find_if(Leaves.begin(), Leaves.end(),
- [](const GIMatchTreeBuilderLeafInfo &X) {
- return X.isFullyTraversed() && X.isFullyTested() &&
- !X.getMatchDag().hasPostMatchPredicate();
- });
+ llvm::find_if(Leaves, [](const GIMatchTreeBuilderLeafInfo &X) {
+ return X.isFullyTraversed() && X.isFullyTested() &&
+ !X.getMatchDag().hasPostMatchPredicate();
+ });
if (FirstFullyTested != Leaves.end())
FirstFullyTested++;
@@ -456,8 +454,7 @@ void GIMatchTreeOpcodePartitioner::repartition(
// predicates for one instruction in the same DAG. That should be
// impossible.
assert(AllOpcodes && "Conflicting opcode predicates");
- for (const CodeGenInstruction *Expected : OpcodeP->getInstrs())
- OpcodesForThisPredicate.push_back(Expected);
+ append_range(OpcodesForThisPredicate, OpcodeP->getInstrs());
}
for (const CodeGenInstruction *Expected : OpcodesForThisPredicate) {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.h b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
index b86f6454589c..bf41a2e0e234 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
@@ -353,10 +353,7 @@ public:
void declareOperand(unsigned InstrID, unsigned OpIdx);
GIMatchTreeInstrInfo *getInstrInfo(unsigned ID) const {
- auto I = InstrIDToInfo.find(ID);
- if (I != InstrIDToInfo.end())
- return I->second;
- return nullptr;
+ return InstrIDToInfo.lookup(ID);
}
void dump(raw_ostream &OS) const {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 4e8dcc52fc20..0a6985a3eac2 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -187,17 +187,21 @@ class InstructionMatcher;
static Optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT) {
MVT VT(SVT);
- if (VT.isVector() && VT.getVectorNumElements() != 1)
+ if (VT.isScalableVector())
+ return None;
+
+ if (VT.isFixedLengthVector() && VT.getVectorNumElements() != 1)
return LLTCodeGen(
LLT::vector(VT.getVectorNumElements(), VT.getScalarSizeInBits()));
if (VT.isInteger() || VT.isFloatingPoint())
return LLTCodeGen(LLT::scalar(VT.getSizeInBits()));
+
return None;
}
static std::string explainPredicates(const TreePatternNode *N) {
- std::string Explanation = "";
+ std::string Explanation;
StringRef Separator = "";
for (const TreePredicateCall &Call : N->getPredicateCalls()) {
const TreePredicateFn &P = Call.Fn;
@@ -301,8 +305,8 @@ static Error failedImport(const Twine &Reason) {
}
static Error isTrivialOperatorNode(const TreePatternNode *N) {
- std::string Explanation = "";
- std::string Separator = "";
+ std::string Explanation;
+ std::string Separator;
bool HasUnsupportedPredicate = false;
for (const TreePredicateCall &Call : N->getPredicateCalls()) {
@@ -389,6 +393,10 @@ getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) {
return Name;
}
+static std::string getScopedName(unsigned Scope, const std::string &Name) {
+ return ("pred:" + Twine(Scope) + ":" + Name).str();
+}
+
//===- MatchTable Helpers -------------------------------------------------===//
class MatchTable;
@@ -445,9 +453,8 @@ public:
MatchTableRecord(Optional<unsigned> LabelID_, StringRef EmitStr,
unsigned NumElements, unsigned Flags,
int64_t RawValue = std::numeric_limits<int64_t>::min())
- : LabelID(LabelID_.hasValue() ? LabelID_.getValue() : ~0u),
- EmitStr(EmitStr), NumElements(NumElements), Flags(Flags),
- RawValue(RawValue) {
+ : LabelID(LabelID_.getValueOr(~0u)), EmitStr(EmitStr),
+ NumElements(NumElements), Flags(Flags), RawValue(RawValue) {
assert((!LabelID_.hasValue() || LabelID != ~0u) &&
"This value is reserved for non-labels");
}
@@ -852,6 +859,11 @@ protected:
DefinedComplexPatternSubOperandMap;
/// A map of Symbolic Names to ComplexPattern sub-operands.
DefinedComplexPatternSubOperandMap ComplexSubOperands;
+ /// A map used to for multiple referenced error check of ComplexSubOperand.
+ /// ComplexSubOperand can't be referenced multiple from different operands,
+ /// however multiple references from same operand are allowed since that is
+ /// how 'same operand checks' are generated.
+ StringMap<std::string> ComplexSubOperandsParentName;
uint64_t RuleID;
static uint64_t NextRuleID;
@@ -917,14 +929,25 @@ public:
void definePhysRegOperand(Record *Reg, OperandMatcher &OM);
Error defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern,
- unsigned RendererID, unsigned SubOperandID) {
- if (ComplexSubOperands.count(SymbolicName))
- return failedImport(
- "Complex suboperand referenced more than once (Operand: " +
- SymbolicName + ")");
+ unsigned RendererID, unsigned SubOperandID,
+ StringRef ParentSymbolicName) {
+ std::string ParentName(ParentSymbolicName);
+ if (ComplexSubOperands.count(SymbolicName)) {
+ const std::string &RecordedParentName =
+ ComplexSubOperandsParentName[SymbolicName];
+ if (RecordedParentName != ParentName)
+ return failedImport("Error: Complex suboperand " + SymbolicName +
+ " referenced by different operands: " +
+ RecordedParentName + " and " + ParentName + ".");
+ // Complex suboperand referenced more than once from same the operand is
+ // used to generate 'same operand check'. Emitting of
+ // GIR_ComplexSubOperandRenderer for them is already handled.
+ return Error::success();
+ }
ComplexSubOperands[SymbolicName] =
std::make_tuple(ComplexPattern, RendererID, SubOperandID);
+ ComplexSubOperandsParentName[SymbolicName] = ParentName;
return Error::success();
}
@@ -992,10 +1015,6 @@ protected:
bool Optimized = false;
public:
- /// Construct a new predicate and add it to the matcher.
- template <class Kind, class... Args>
- Optional<Kind *> addPredicate(Args &&... args);
-
typename PredicatesTy::iterator predicates_begin() {
return Predicates.begin();
}
@@ -1089,6 +1108,7 @@ public:
IPM_MemoryVsLLTSize,
IPM_MemoryAddressSpace,
IPM_MemoryAlignment,
+ IPM_VectorSplatImm,
IPM_GenericPredicate,
OPM_SameOperand,
OPM_ComplexPattern,
@@ -1101,6 +1121,7 @@ public:
OPM_PointerToAny,
OPM_RegBank,
OPM_MBB,
+ OPM_RecordNamedOperand,
};
protected:
@@ -1270,10 +1291,15 @@ public:
: OperandPredicateMatcher(OPM_PointerToAny, InsnVarID, OpIdx),
SizeInBits(SizeInBits) {}
- static bool classof(const OperandPredicateMatcher *P) {
+ static bool classof(const PredicateMatcher *P) {
return P->getKind() == OPM_PointerToAny;
}
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ SizeInBits == cast<PointerToAnyOperandMatcher>(&B)->SizeInBits;
+ }
+
void emitPredicateOpcodes(MatchTable &Table,
RuleMatcher &Rule) const override {
Table << MatchTable::Opcode("GIM_CheckPointerToAny")
@@ -1284,6 +1310,40 @@ public:
}
};
+/// Generates code to record named operand in RecordedOperands list at StoreIdx.
+/// Predicates with 'let PredicateCodeUsesOperands = 1' get RecordedOperands as
+/// an argument to predicate's c++ code once all operands have been matched.
+class RecordNamedOperandMatcher : public OperandPredicateMatcher {
+protected:
+ unsigned StoreIdx;
+ std::string Name;
+
+public:
+ RecordNamedOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
+ unsigned StoreIdx, StringRef Name)
+ : OperandPredicateMatcher(OPM_RecordNamedOperand, InsnVarID, OpIdx),
+ StoreIdx(StoreIdx), Name(Name) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_RecordNamedOperand;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ StoreIdx == cast<RecordNamedOperandMatcher>(&B)->StoreIdx &&
+ Name == cast<RecordNamedOperandMatcher>(&B)->Name;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override {
+ Table << MatchTable::Opcode("GIM_RecordNamedOperand")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::Comment("StoreIdx") << MatchTable::IntValue(StoreIdx)
+ << MatchTable::Comment("Name : " + Name) << MatchTable::LineBreak;
+ }
+};
+
/// Generates code to check that an operand is a particular target constant.
class ComplexPatternOperandMatcher : public OperandPredicateMatcher {
protected:
@@ -1523,7 +1583,7 @@ public:
AllocatedTemporariesBaseID(AllocatedTemporariesBaseID) {}
bool hasSymbolicName() const { return !SymbolicName.empty(); }
- const StringRef getSymbolicName() const { return SymbolicName; }
+ StringRef getSymbolicName() const { return SymbolicName; }
void setSymbolicName(StringRef Name) {
assert(SymbolicName.empty() && "Operand already has a symbolic name");
SymbolicName = std::string(Name);
@@ -1670,10 +1730,23 @@ PredicateListMatcher<PredicateMatcher>::getNoPredicateComment() const {
/// Generates code to check the opcode of an instruction.
class InstructionOpcodeMatcher : public InstructionPredicateMatcher {
protected:
- const CodeGenInstruction *I;
+ // Allow matching one to several, similar opcodes that share properties. This
+ // is to handle patterns where one SelectionDAG operation maps to multiple
+ // GlobalISel ones (e.g. G_BUILD_VECTOR and G_BUILD_VECTOR_TRUNC). The first
+ // is treated as the canonical opcode.
+ SmallVector<const CodeGenInstruction *, 2> Insts;
static DenseMap<const CodeGenInstruction *, unsigned> OpcodeValues;
+
+ MatchTableRecord getInstValue(const CodeGenInstruction *I) const {
+ const auto VI = OpcodeValues.find(I);
+ if (VI != OpcodeValues.end())
+ return MatchTable::NamedValue(I->Namespace, I->TheDef->getName(),
+ VI->second);
+ return MatchTable::NamedValue(I->Namespace, I->TheDef->getName());
+ }
+
public:
static void initOpcodeValuesMap(const CodeGenTarget &Target) {
OpcodeValues.clear();
@@ -1683,8 +1756,13 @@ public:
OpcodeValues[I] = OpcodeValue++;
}
- InstructionOpcodeMatcher(unsigned InsnVarID, const CodeGenInstruction *I)
- : InstructionPredicateMatcher(IPM_Opcode, InsnVarID), I(I) {}
+ InstructionOpcodeMatcher(unsigned InsnVarID,
+ ArrayRef<const CodeGenInstruction *> I)
+ : InstructionPredicateMatcher(IPM_Opcode, InsnVarID),
+ Insts(I.begin(), I.end()) {
+ assert((Insts.size() == 1 || Insts.size() == 2) &&
+ "unexpected number of opcode alternatives");
+ }
static bool classof(const PredicateMatcher *P) {
return P->getKind() == IPM_Opcode;
@@ -1692,22 +1770,36 @@ public:
bool isIdentical(const PredicateMatcher &B) const override {
return InstructionPredicateMatcher::isIdentical(B) &&
- I == cast<InstructionOpcodeMatcher>(&B)->I;
+ Insts == cast<InstructionOpcodeMatcher>(&B)->Insts;
}
+
+ bool hasValue() const override {
+ return Insts.size() == 1 && OpcodeValues.count(Insts[0]);
+ }
+
+ // TODO: This is used for the SwitchMatcher optimization. We should be able to
+ // return a list of the opcodes to match.
MatchTableRecord getValue() const override {
+ assert(Insts.size() == 1);
+
+ const CodeGenInstruction *I = Insts[0];
const auto VI = OpcodeValues.find(I);
if (VI != OpcodeValues.end())
return MatchTable::NamedValue(I->Namespace, I->TheDef->getName(),
VI->second);
return MatchTable::NamedValue(I->Namespace, I->TheDef->getName());
}
- bool hasValue() const override { return OpcodeValues.count(I); }
void emitPredicateOpcodes(MatchTable &Table,
RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckOpcode") << MatchTable::Comment("MI")
- << MatchTable::IntValue(InsnVarID) << getValue()
- << MatchTable::LineBreak;
+ StringRef CheckType = Insts.size() == 1 ?
+ "GIM_CheckOpcode" : "GIM_CheckOpcodeIsEither";
+ Table << MatchTable::Opcode(CheckType) << MatchTable::Comment("MI")
+ << MatchTable::IntValue(InsnVarID);
+
+ for (const CodeGenInstruction *I : Insts)
+ Table << getInstValue(I);
+ Table << MatchTable::LineBreak;
}
/// Compare the priority of this object and B.
@@ -1725,20 +1817,32 @@ public:
// using instruction frequency information to improve compile time.
if (const InstructionOpcodeMatcher *BO =
dyn_cast<InstructionOpcodeMatcher>(&B))
- return I->TheDef->getName() < BO->I->TheDef->getName();
+ return Insts[0]->TheDef->getName() < BO->Insts[0]->TheDef->getName();
return false;
};
bool isConstantInstruction() const {
- return I->TheDef->getName() == "G_CONSTANT";
+ return Insts.size() == 1 && Insts[0]->TheDef->getName() == "G_CONSTANT";
}
- StringRef getOpcode() const { return I->TheDef->getName(); }
- bool isVariadicNumOperands() const { return I->Operands.isVariadic; }
+ // The first opcode is the canonical opcode, and later are alternatives.
+ StringRef getOpcode() const {
+ return Insts[0]->TheDef->getName();
+ }
+
+ ArrayRef<const CodeGenInstruction *> getAlternativeOpcodes() {
+ return Insts;
+ }
+
+ bool isVariadicNumOperands() const {
+ // If one is variadic, they all should be.
+ return Insts[0]->Operands.isVariadic;
+ }
StringRef getOperandType(unsigned OpIdx) const {
- return I->Operands[OpIdx].OperandType;
+ // Types expected to be uniform for all alternatives.
+ return Insts[0]->Operands[OpIdx].OperandType;
}
};
@@ -2021,6 +2125,42 @@ public:
}
};
+// Matcher for immAllOnesV/immAllZerosV
+class VectorSplatImmPredicateMatcher : public InstructionPredicateMatcher {
+public:
+ enum SplatKind {
+ AllZeros,
+ AllOnes
+ };
+
+private:
+ SplatKind Kind;
+
+public:
+ VectorSplatImmPredicateMatcher(unsigned InsnVarID, SplatKind K)
+ : InstructionPredicateMatcher(IPM_VectorSplatImm, InsnVarID), Kind(K) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_VectorSplatImm;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return InstructionPredicateMatcher::isIdentical(B) &&
+ Kind == static_cast<const VectorSplatImmPredicateMatcher &>(B).Kind;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override {
+ if (Kind == AllOnes)
+ Table << MatchTable::Opcode("GIM_CheckIsBuildVectorAllOnes");
+ else
+ Table << MatchTable::Opcode("GIM_CheckIsBuildVectorAllZeros");
+
+ Table << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID);
+ Table << MatchTable::LineBreak;
+ }
+};
+
/// Generates code to check an arbitrary C++ instruction predicate.
class GenericInstructionPredicateMatcher : public InstructionPredicateMatcher {
protected:
@@ -2077,8 +2217,9 @@ protected:
SmallVector<std::pair<Record *, unsigned>, 2> PhysRegInputs;
public:
- InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName)
- : Rule(Rule), SymbolicName(SymbolicName) {
+ InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName,
+ bool NumOpsCheck = true)
+ : Rule(Rule), NumOperandsCheck(NumOpsCheck), SymbolicName(SymbolicName) {
// We create a new instruction matcher.
// Get a new ID for that instruction.
InsnVarID = Rule.implicitlyDefineInsnVar(*this);
@@ -2108,10 +2249,10 @@ public:
}
OperandMatcher &getOperand(unsigned OpIdx) {
- auto I = std::find_if(Operands.begin(), Operands.end(),
- [&OpIdx](const std::unique_ptr<OperandMatcher> &X) {
- return X->getOpIdx() == OpIdx;
- });
+ auto I = llvm::find_if(Operands,
+ [&OpIdx](const std::unique_ptr<OperandMatcher> &X) {
+ return X->getOpIdx() == OpIdx;
+ });
if (I != Operands.end())
return **I;
llvm_unreachable("Failed to lookup operand");
@@ -2267,9 +2408,10 @@ protected:
public:
InstructionOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
- RuleMatcher &Rule, StringRef SymbolicName)
+ RuleMatcher &Rule, StringRef SymbolicName,
+ bool NumOpsCheck = true)
: OperandPredicateMatcher(OPM_Instruction, InsnVarID, OpIdx),
- InsnMatcher(new InstructionMatcher(Rule, SymbolicName)) {}
+ InsnMatcher(new InstructionMatcher(Rule, SymbolicName, NumOpsCheck)) {}
static bool classof(const PredicateMatcher *P) {
return P->getKind() == OPM_Instruction;
@@ -2395,7 +2537,7 @@ public:
return R->getKind() == OR_Copy;
}
- const StringRef getSymbolicName() const { return SymbolicName; }
+ StringRef getSymbolicName() const { return SymbolicName; }
void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
@@ -2462,7 +2604,7 @@ public:
return R->getKind() == OR_CopyOrAddZeroReg;
}
- const StringRef getSymbolicName() const { return SymbolicName; }
+ StringRef getSymbolicName() const { return SymbolicName; }
void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
@@ -2499,7 +2641,7 @@ public:
return R->getKind() == OR_CopyConstantAsImm;
}
- const StringRef getSymbolicName() const { return SymbolicName; }
+ StringRef getSymbolicName() const { return SymbolicName; }
void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
@@ -2530,7 +2672,7 @@ public:
return R->getKind() == OR_CopyFConstantAsFPImm;
}
- const StringRef getSymbolicName() const { return SymbolicName; }
+ StringRef getSymbolicName() const { return SymbolicName; }
void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
@@ -2564,7 +2706,7 @@ public:
return R->getKind() == OR_CopySubReg;
}
- const StringRef getSymbolicName() const { return SymbolicName; }
+ StringRef getSymbolicName() const { return SymbolicName; }
void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
@@ -2587,12 +2729,13 @@ protected:
unsigned InsnID;
const Record *RegisterDef;
bool IsDef;
+ const CodeGenTarget &Target;
public:
- AddRegisterRenderer(unsigned InsnID, const Record *RegisterDef,
- bool IsDef = false)
+ AddRegisterRenderer(unsigned InsnID, const CodeGenTarget &Target,
+ const Record *RegisterDef, bool IsDef = false)
: OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef),
- IsDef(IsDef) {}
+ IsDef(IsDef), Target(Target) {}
static bool classof(const OperandRenderer *R) {
return R->getKind() == OR_Register;
@@ -2600,13 +2743,17 @@ public:
void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
Table << MatchTable::Opcode("GIR_AddRegister")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::NamedValue(
- (RegisterDef->getValue("Namespace")
- ? RegisterDef->getValueAsString("Namespace")
- : ""),
- RegisterDef->getName())
- << MatchTable::Comment("AddRegisterRegFlags");
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID);
+ if (RegisterDef->getName() != "zero_reg") {
+ Table << MatchTable::NamedValue(
+ (RegisterDef->getValue("Namespace")
+ ? RegisterDef->getValueAsString("Namespace")
+ : ""),
+ RegisterDef->getName());
+ } else {
+ Table << MatchTable::NamedValue(Target.getRegNamespace(), "NoRegister");
+ }
+ Table << MatchTable::Comment("AddRegisterRegFlags");
// TODO: This is encoded as a 64-bit element, but only 16 or 32-bits are
// really needed for a physical register reference. We can pack the
@@ -2628,12 +2775,14 @@ protected:
unsigned TempRegID;
const CodeGenSubRegIndex *SubRegIdx;
bool IsDef;
+ bool IsDead;
public:
TempRegRenderer(unsigned InsnID, unsigned TempRegID, bool IsDef = false,
- const CodeGenSubRegIndex *SubReg = nullptr)
+ const CodeGenSubRegIndex *SubReg = nullptr,
+ bool IsDead = false)
: OperandRenderer(OR_Register), InsnID(InsnID), TempRegID(TempRegID),
- SubRegIdx(SubReg), IsDef(IsDef) {}
+ SubRegIdx(SubReg), IsDef(IsDef), IsDead(IsDead) {}
static bool classof(const OperandRenderer *R) {
return R->getKind() == OR_TempRegister;
@@ -2650,9 +2799,13 @@ public:
<< MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID)
<< MatchTable::Comment("TempRegFlags");
- if (IsDef)
- Table << MatchTable::NamedValue("RegState::Define");
- else
+ if (IsDef) {
+ SmallString<32> RegFlags;
+ RegFlags += "RegState::Define";
+ if (IsDead)
+ RegFlags += "|RegState::Dead";
+ Table << MatchTable::NamedValue(RegFlags);
+ } else
Table << MatchTable::IntValue(0);
if (SubRegIdx)
@@ -3365,6 +3518,16 @@ private:
// Rule coverage information.
Optional<CodeGenCoverage> RuleCoverage;
+ /// Variables used to help with collecting of named operands for predicates
+ /// with 'let PredicateCodeUsesOperands = 1'. WaitingForNamedOperands is set
+ /// to the number of named operands that predicate expects. Store locations in
+ /// StoreIdxForName correspond to the order in which operand names appear in
+ /// predicate's argument list.
+ /// When we visit named leaf operand and WaitingForNamedOperands is not zero,
+ /// add matcher that will record operand and decrease counter.
+ unsigned WaitingForNamedOperands = 0;
+ StringMap<unsigned> StoreIdxForName;
+
void gatherOpcodeValues();
void gatherTypeIDValues();
void gatherNodeEquivs();
@@ -3394,7 +3557,11 @@ private:
Expected<action_iterator>
createInstructionRenderer(action_iterator InsertPt, RuleMatcher &M,
const TreePatternNode *Dst);
- void importExplicitDefRenderers(BuildMIAction &DstMIBuilder);
+
+ Expected<action_iterator>
+ importExplicitDefRenderers(action_iterator InsertPt, RuleMatcher &M,
+ BuildMIAction &DstMIBuilder,
+ const TreePatternNode *Dst);
Expected<action_iterator>
importExplicitUseRenderers(action_iterator InsertPt, RuleMatcher &M,
@@ -3413,7 +3580,8 @@ private:
void emitCxxPredicateFns(raw_ostream &OS, StringRef CodeFieldName,
StringRef TypeIdentifier, StringRef ArgType,
- StringRef ArgName, StringRef AdditionalDeclarations,
+ StringRef ArgName, StringRef AdditionalArgs,
+ StringRef AdditionalDeclarations,
std::function<bool(const Record *R)> Filter);
void emitImmPredicateFns(raw_ostream &OS, StringRef TypeIdentifier,
StringRef ArgType,
@@ -3455,6 +3623,12 @@ private:
Optional<const CodeGenRegisterClass *>
inferRegClassFromPattern(TreePatternNode *N);
+ // Add builtin predicates.
+ Expected<InstructionMatcher &>
+ addBuiltinPredicates(const Record *SrcGIEquivOrNull,
+ const TreePredicateFn &Predicate,
+ InstructionMatcher &InsnMatcher, bool &HasAddedMatcher);
+
public:
/// Takes a sequence of \p Rules and group them based on the predicates
/// they share. \p MatcherStorage is used as a memory container
@@ -3562,6 +3736,147 @@ GlobalISelEmitter::importRulePredicates(RuleMatcher &M,
return Error::success();
}
+Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
+ const Record *SrcGIEquivOrNull, const TreePredicateFn &Predicate,
+ InstructionMatcher &InsnMatcher, bool &HasAddedMatcher) {
+ if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
+ if (const ListInit *AddrSpaces = Predicate.getAddressSpaces()) {
+ SmallVector<unsigned, 4> ParsedAddrSpaces;
+
+ for (Init *Val : AddrSpaces->getValues()) {
+ IntInit *IntVal = dyn_cast<IntInit>(Val);
+ if (!IntVal)
+ return failedImport("Address space is not an integer");
+ ParsedAddrSpaces.push_back(IntVal->getValue());
+ }
+
+ if (!ParsedAddrSpaces.empty()) {
+ InsnMatcher.addPredicate<MemoryAddressSpacePredicateMatcher>(
+ 0, ParsedAddrSpaces);
+ }
+ }
+
+ int64_t MinAlign = Predicate.getMinAlignment();
+ if (MinAlign > 0)
+ InsnMatcher.addPredicate<MemoryAlignmentPredicateMatcher>(0, MinAlign);
+ }
+
+ // G_LOAD is used for both non-extending and any-extending loads.
+ if (Predicate.isLoad() && Predicate.isNonExtLoad()) {
+ InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+ 0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0);
+ return InsnMatcher;
+ }
+ if (Predicate.isLoad() && Predicate.isAnyExtLoad()) {
+ InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+ 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+ return InsnMatcher;
+ }
+
+ if (Predicate.isStore()) {
+ if (Predicate.isTruncStore()) {
+ // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
+ InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+ 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+ return InsnMatcher;
+ }
+ if (Predicate.isNonTruncStore()) {
+ // We need to check the sizes match here otherwise we could incorrectly
+ // match truncating stores with non-truncating ones.
+ InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+ 0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0);
+ }
+ }
+
+ // No check required. We already did it by swapping the opcode.
+ if (!SrcGIEquivOrNull->isValueUnset("IfSignExtend") &&
+ Predicate.isSignExtLoad())
+ return InsnMatcher;
+
+ // No check required. We already did it by swapping the opcode.
+ if (!SrcGIEquivOrNull->isValueUnset("IfZeroExtend") &&
+ Predicate.isZeroExtLoad())
+ return InsnMatcher;
+
+ // No check required. G_STORE by itself is a non-extending store.
+ if (Predicate.isNonTruncStore())
+ return InsnMatcher;
+
+ if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
+ if (Predicate.getMemoryVT() != nullptr) {
+ Optional<LLTCodeGen> MemTyOrNone =
+ MVTToLLT(getValueType(Predicate.getMemoryVT()));
+
+ if (!MemTyOrNone)
+ return failedImport("MemVT could not be converted to LLT");
+
+ // MMO's work in bytes so we must take care of unusual types like i1
+ // don't round down.
+ unsigned MemSizeInBits =
+ llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8);
+
+ InsnMatcher.addPredicate<MemorySizePredicateMatcher>(0,
+ MemSizeInBits / 8);
+ return InsnMatcher;
+ }
+ }
+
+ if (Predicate.isLoad() || Predicate.isStore()) {
+ // No check required. A G_LOAD/G_STORE is an unindexed load.
+ if (Predicate.isUnindexed())
+ return InsnMatcher;
+ }
+
+ if (Predicate.isAtomic()) {
+ if (Predicate.isAtomicOrderingMonotonic()) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("Monotonic");
+ return InsnMatcher;
+ }
+ if (Predicate.isAtomicOrderingAcquire()) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("Acquire");
+ return InsnMatcher;
+ }
+ if (Predicate.isAtomicOrderingRelease()) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("Release");
+ return InsnMatcher;
+ }
+ if (Predicate.isAtomicOrderingAcquireRelease()) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+ "AcquireRelease");
+ return InsnMatcher;
+ }
+ if (Predicate.isAtomicOrderingSequentiallyConsistent()) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+ "SequentiallyConsistent");
+ return InsnMatcher;
+ }
+ }
+
+ if (Predicate.isAtomicOrderingAcquireOrStronger()) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+ "Acquire", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
+ return InsnMatcher;
+ }
+ if (Predicate.isAtomicOrderingWeakerThanAcquire()) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+ "Acquire", AtomicOrderingMMOPredicateMatcher::AO_WeakerThan);
+ return InsnMatcher;
+ }
+
+ if (Predicate.isAtomicOrderingReleaseOrStronger()) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+ "Release", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
+ return InsnMatcher;
+ }
+ if (Predicate.isAtomicOrderingWeakerThanRelease()) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+ "Release", AtomicOrderingMMOPredicateMatcher::AO_WeakerThan);
+ return InsnMatcher;
+ }
+ HasAddedMatcher = false;
+ return InsnMatcher;
+}
+
Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
const TreePatternNode *Src, unsigned &TempOpIdx) {
@@ -3603,6 +3918,7 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
for (const TreePredicateCall &Call : Src->getPredicateCalls()) {
const TreePredicateFn &Predicate = Call.Fn;
+ bool HasAddedBuiltinMatcher = true;
if (Predicate.isAlwaysTrue())
continue;
@@ -3611,154 +3927,35 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
continue;
}
- // An address space check is needed in all contexts if there is one.
- if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
- if (const ListInit *AddrSpaces = Predicate.getAddressSpaces()) {
- SmallVector<unsigned, 4> ParsedAddrSpaces;
-
- for (Init *Val : AddrSpaces->getValues()) {
- IntInit *IntVal = dyn_cast<IntInit>(Val);
- if (!IntVal)
- return failedImport("Address space is not an integer");
- ParsedAddrSpaces.push_back(IntVal->getValue());
- }
-
- if (!ParsedAddrSpaces.empty()) {
- InsnMatcher.addPredicate<MemoryAddressSpacePredicateMatcher>(
- 0, ParsedAddrSpaces);
- }
- }
-
- int64_t MinAlign = Predicate.getMinAlignment();
- if (MinAlign > 0)
- InsnMatcher.addPredicate<MemoryAlignmentPredicateMatcher>(0, MinAlign);
- }
-
- // G_LOAD is used for both non-extending and any-extending loads.
- if (Predicate.isLoad() && Predicate.isNonExtLoad()) {
- InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
- 0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0);
- continue;
- }
- if (Predicate.isLoad() && Predicate.isAnyExtLoad()) {
- InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
- 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
- continue;
- }
-
- if (Predicate.isStore()) {
- if (Predicate.isTruncStore()) {
- // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
- InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
- 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
- continue;
- }
- if (Predicate.isNonTruncStore()) {
- // We need to check the sizes match here otherwise we could incorrectly
- // match truncating stores with non-truncating ones.
- InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
- 0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0);
- }
- }
-
- // No check required. We already did it by swapping the opcode.
- if (!SrcGIEquivOrNull->isValueUnset("IfSignExtend") &&
- Predicate.isSignExtLoad())
- continue;
-
- // No check required. We already did it by swapping the opcode.
- if (!SrcGIEquivOrNull->isValueUnset("IfZeroExtend") &&
- Predicate.isZeroExtLoad())
- continue;
-
- // No check required. G_STORE by itself is a non-extending store.
- if (Predicate.isNonTruncStore())
- continue;
-
- if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
- if (Predicate.getMemoryVT() != nullptr) {
- Optional<LLTCodeGen> MemTyOrNone =
- MVTToLLT(getValueType(Predicate.getMemoryVT()));
-
- if (!MemTyOrNone)
- return failedImport("MemVT could not be converted to LLT");
-
- // MMO's work in bytes so we must take care of unusual types like i1
- // don't round down.
- unsigned MemSizeInBits =
- llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8);
-
- InsnMatcher.addPredicate<MemorySizePredicateMatcher>(
- 0, MemSizeInBits / 8);
- continue;
- }
- }
-
- if (Predicate.isLoad() || Predicate.isStore()) {
- // No check required. A G_LOAD/G_STORE is an unindexed load.
- if (Predicate.isUnindexed())
- continue;
- }
-
- if (Predicate.isAtomic()) {
- if (Predicate.isAtomicOrderingMonotonic()) {
- InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
- "Monotonic");
- continue;
- }
- if (Predicate.isAtomicOrderingAcquire()) {
- InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("Acquire");
- continue;
- }
- if (Predicate.isAtomicOrderingRelease()) {
- InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("Release");
- continue;
- }
- if (Predicate.isAtomicOrderingAcquireRelease()) {
- InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
- "AcquireRelease");
- continue;
- }
- if (Predicate.isAtomicOrderingSequentiallyConsistent()) {
- InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
- "SequentiallyConsistent");
- continue;
- }
-
- if (Predicate.isAtomicOrderingAcquireOrStronger()) {
- InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
- "Acquire", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
- continue;
- }
- if (Predicate.isAtomicOrderingWeakerThanAcquire()) {
- InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
- "Acquire", AtomicOrderingMMOPredicateMatcher::AO_WeakerThan);
- continue;
- }
-
- if (Predicate.isAtomicOrderingReleaseOrStronger()) {
- InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
- "Release", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
- continue;
- }
- if (Predicate.isAtomicOrderingWeakerThanRelease()) {
- InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
- "Release", AtomicOrderingMMOPredicateMatcher::AO_WeakerThan);
- continue;
- }
- }
+ auto InsnMatcherOrError = addBuiltinPredicates(
+ SrcGIEquivOrNull, Predicate, InsnMatcher, HasAddedBuiltinMatcher);
+ if (auto Error = InsnMatcherOrError.takeError())
+ return std::move(Error);
if (Predicate.hasGISelPredicateCode()) {
+ if (Predicate.usesOperands()) {
+ assert(WaitingForNamedOperands == 0 &&
+ "previous predicate didn't find all operands or "
+ "nested predicate that uses operands");
+ TreePattern *TP = Predicate.getOrigPatFragRecord();
+ WaitingForNamedOperands = TP->getNumArgs();
+ for (unsigned i = 0; i < WaitingForNamedOperands; ++i)
+ StoreIdxForName[getScopedName(Call.Scope, TP->getArgName(i))] = i;
+ }
InsnMatcher.addPredicate<GenericInstructionPredicateMatcher>(Predicate);
continue;
}
-
- return failedImport("Src pattern child has predicate (" +
- explainPredicates(Src) + ")");
+ if (!HasAddedBuiltinMatcher) {
+ return failedImport("Src pattern child has predicate (" +
+ explainPredicates(Src) + ")");
+ }
}
+
+ bool IsAtomic = false;
if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic"))
InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("NotAtomic");
else if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsAtomic")) {
+ IsAtomic = true;
InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
"Unordered", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
}
@@ -3812,6 +4009,27 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
}
}
+ // Hack around an unfortunate mistake in how atomic store (and really
+ // atomicrmw in general) operands were ordered. A ISD::STORE used the order
+ // <stored value>, <pointer> order. ISD::ATOMIC_STORE used the opposite,
+ // <pointer>, <stored value>. In GlobalISel there's just the one store
+ // opcode, so we need to swap the operands here to get the right type check.
+ if (IsAtomic && SrcGIOrNull->TheDef->getName() == "G_STORE") {
+ assert(NumChildren == 2 && "wrong operands for atomic store");
+
+ TreePatternNode *PtrChild = Src->getChild(0);
+ TreePatternNode *ValueChild = Src->getChild(1);
+
+ if (auto Error = importChildMatcher(Rule, InsnMatcher, PtrChild, true,
+ false, 1, TempOpIdx))
+ return std::move(Error);
+
+ if (auto Error = importChildMatcher(Rule, InsnMatcher, ValueChild, false,
+ false, 0, TempOpIdx))
+ return std::move(Error);
+ return InsnMatcher;
+ }
+
// Match the used operands (i.e. the children of the operator).
bool IsIntrinsic =
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||
@@ -3902,12 +4120,22 @@ Error GlobalISelEmitter::importChildMatcher(
bool OperandIsImmArg, unsigned OpIdx, unsigned &TempOpIdx) {
Record *PhysReg = nullptr;
- StringRef SrcChildName = getSrcChildName(SrcChild, PhysReg);
+ std::string SrcChildName = std::string(getSrcChildName(SrcChild, PhysReg));
+ if (!SrcChild->isLeaf() &&
+ SrcChild->getOperator()->isSubClassOf("ComplexPattern")) {
+ // The "name" of a non-leaf complex pattern (MY_PAT $op1, $op2) is
+ // "MY_PAT:op1:op2" and the ones with same "name" represent same operand.
+ std::string PatternName = std::string(SrcChild->getOperator()->getName());
+ for (unsigned i = 0; i < SrcChild->getNumChildren(); ++i) {
+ PatternName += ":";
+ PatternName += SrcChild->getChild(i)->getName();
+ }
+ SrcChildName = PatternName;
+ }
OperandMatcher &OM =
- PhysReg
- ? InsnMatcher.addPhysRegInput(PhysReg, OpIdx, TempOpIdx)
- : InsnMatcher.addOperand(OpIdx, std::string(SrcChildName), TempOpIdx);
+ PhysReg ? InsnMatcher.addPhysRegInput(PhysReg, OpIdx, TempOpIdx)
+ : InsnMatcher.addOperand(OpIdx, SrcChildName, TempOpIdx);
if (OM.isSameAsAnotherOperand())
return Error::success();
@@ -3954,9 +4182,9 @@ Error GlobalISelEmitter::importChildMatcher(
for (unsigned i = 0, e = SrcChild->getNumChildren(); i != e; ++i) {
auto *SubOperand = SrcChild->getChild(i);
if (!SubOperand->getName().empty()) {
- if (auto Error = Rule.defineComplexSubOperand(SubOperand->getName(),
- SrcChild->getOperator(),
- RendererID, i))
+ if (auto Error = Rule.defineComplexSubOperand(
+ SubOperand->getName(), SrcChild->getOperator(), RendererID, i,
+ SrcChildName))
return Error;
}
}
@@ -4002,6 +4230,13 @@ Error GlobalISelEmitter::importChildMatcher(
if (auto *ChildDefInit = dyn_cast<DefInit>(SrcChild->getLeafValue())) {
auto *ChildRec = ChildDefInit->getDef();
+ if (WaitingForNamedOperands) {
+ auto PA = SrcChild->getNamesAsPredicateArg().begin();
+ std::string Name = getScopedName(PA->getScope(), PA->getIdentifier());
+ OM.addPredicate<RecordNamedOperandMatcher>(StoreIdxForName[Name], Name);
+ --WaitingForNamedOperands;
+ }
+
// Check for register classes.
if (ChildRec->isSubClassOf("RegisterClass") ||
ChildRec->isSubClassOf("RegisterOperand")) {
@@ -4044,6 +4279,40 @@ Error GlobalISelEmitter::importChildMatcher(
if (ChildRec->getName() == "srcvalue")
return Error::success();
+ const bool ImmAllOnesV = ChildRec->getName() == "immAllOnesV";
+ if (ImmAllOnesV || ChildRec->getName() == "immAllZerosV") {
+ auto MaybeInsnOperand = OM.addPredicate<InstructionOperandMatcher>(
+ InsnMatcher.getRuleMatcher(), SrcChild->getName(), false);
+ InstructionOperandMatcher &InsnOperand = **MaybeInsnOperand;
+
+ ValueTypeByHwMode VTy = ChildTypes.front().getValueTypeByHwMode();
+
+ const CodeGenInstruction &BuildVector
+ = Target.getInstruction(RK.getDef("G_BUILD_VECTOR"));
+ const CodeGenInstruction &BuildVectorTrunc
+ = Target.getInstruction(RK.getDef("G_BUILD_VECTOR_TRUNC"));
+
+ // Treat G_BUILD_VECTOR as the canonical opcode, and G_BUILD_VECTOR_TRUNC
+ // as an alternative.
+ InsnOperand.getInsnMatcher().addPredicate<InstructionOpcodeMatcher>(
+ makeArrayRef({&BuildVector, &BuildVectorTrunc}));
+
+ // TODO: Handle both G_BUILD_VECTOR and G_BUILD_VECTOR_TRUNC We could
+ // theoretically not emit any opcode check, but getOpcodeMatcher currently
+ // has to succeed.
+ OperandMatcher &OM =
+ InsnOperand.getInsnMatcher().addOperand(0, "", TempOpIdx);
+ if (auto Error =
+ OM.addTypeCheckPredicate(VTy, false /* OperandIsAPointer */))
+ return failedImport(toString(std::move(Error)) +
+ " for result of Src pattern operator");
+
+ InsnOperand.getInsnMatcher().addPredicate<VectorSplatImmPredicateMatcher>(
+ ImmAllOnesV ? VectorSplatImmPredicateMatcher::AllOnes
+ : VectorSplatImmPredicateMatcher::AllZeros);
+ return Error::success();
+ }
+
return failedImport(
"Src pattern child def is an unsupported tablegen class");
}
@@ -4155,7 +4424,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
return failedImport("Dst operand has an unsupported type");
if (ChildRec->isSubClassOf("Register")) {
- DstMIBuilder.addRenderer<AddRegisterRenderer>(ChildRec);
+ DstMIBuilder.addRenderer<AddRegisterRenderer>(Target, ChildRec);
return InsertPt;
}
@@ -4215,12 +4484,15 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
&Target.getInstruction(RK.getDef("COPY")));
BuildMIAction &CopyToPhysRegMIBuilder =
*static_cast<BuildMIAction *>(InsertPt->get());
- CopyToPhysRegMIBuilder.addRenderer<AddRegisterRenderer>(PhysInput.first,
+ CopyToPhysRegMIBuilder.addRenderer<AddRegisterRenderer>(Target,
+ PhysInput.first,
true);
CopyToPhysRegMIBuilder.addRenderer<CopyPhysRegRenderer>(PhysInput.first);
}
- importExplicitDefRenderers(DstMIBuilder);
+ if (auto Error = importExplicitDefRenderers(InsertPt, M, DstMIBuilder, Dst)
+ .takeError())
+ return std::move(Error);
if (auto Error = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst)
.takeError())
@@ -4372,13 +4644,39 @@ Expected<action_iterator> GlobalISelEmitter::createInstructionRenderer(
DstI);
}
-void GlobalISelEmitter::importExplicitDefRenderers(
- BuildMIAction &DstMIBuilder) {
+Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
+ action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
+ const TreePatternNode *Dst) {
const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
- for (unsigned I = 0; I < DstI->Operands.NumDefs; ++I) {
- const CGIOperandList::OperandInfo &DstIOperand = DstI->Operands[I];
- DstMIBuilder.addRenderer<CopyRenderer>(DstIOperand.Name);
+ const unsigned NumDefs = DstI->Operands.NumDefs;
+ if (NumDefs == 0)
+ return InsertPt;
+
+ DstMIBuilder.addRenderer<CopyRenderer>(DstI->Operands[0].Name);
+
+ // Some instructions have multiple defs, but are missing a type entry
+ // (e.g. s_cc_out operands).
+ if (Dst->getExtTypes().size() < NumDefs)
+ return failedImport("unhandled discarded def");
+
+ // Patterns only handle a single result, so any result after the first is an
+ // implicitly dead def.
+ for (unsigned I = 1; I < NumDefs; ++I) {
+ const TypeSetByHwMode &ExtTy = Dst->getExtType(I);
+ if (!ExtTy.isMachineValueType())
+ return failedImport("unsupported typeset");
+
+ auto OpTy = MVTToLLT(ExtTy.getMachineValueType().SimpleTy);
+ if (!OpTy)
+ return failedImport("unsupported type");
+
+ unsigned TempRegID = M.allocateTempRegID();
+ InsertPt =
+ M.insertAction<MakeTempRegisterAction>(InsertPt, *OpTy, TempRegID);
+ DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true, nullptr, true);
}
+
+ return InsertPt;
}
Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
@@ -4392,6 +4690,8 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
// EXTRACT_SUBREG needs to use a subregister COPY.
if (Name == "EXTRACT_SUBREG") {
+ if (!Dst->getChild(1)->isLeaf())
+ return failedImport("EXTRACT_SUBREG child #1 is not a leaf");
DefInit *SubRegInit = dyn_cast<DefInit>(Dst->getChild(1)->getLeafValue());
if (!SubRegInit)
return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");
@@ -4578,7 +4878,7 @@ Error GlobalISelEmitter::importDefaultOperandRenderers(
IDMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
} else {
- DstMIBuilder.addRenderer<AddRegisterRenderer>(Def);
+ DstMIBuilder.addRenderer<AddRegisterRenderer>(Target, Def);
}
continue;
}
@@ -4653,6 +4953,17 @@ GlobalISelEmitter::inferRegClassFromPattern(TreePatternNode *N) {
return None;
return getRegClassFromLeaf(RCChild);
}
+ if (InstName == "INSERT_SUBREG") {
+ TreePatternNode *Child0 = N->getChild(0);
+ assert(Child0->getNumTypes() == 1 && "Unexpected number of types!");
+ const TypeSetByHwMode &VTy = Child0->getExtType(0);
+ return inferSuperRegisterClassForNode(VTy, Child0, N->getChild(2));
+ }
+ if (InstName == "EXTRACT_SUBREG") {
+ assert(N->getNumTypes() == 1 && "Unexpected number of types!");
+ const TypeSetByHwMode &VTy = N->getExtType(0);
+ return inferSuperRegisterClass(VTy, N->getChild(1));
+ }
// Handle destination record types that we can safely infer a register class
// from.
@@ -4689,7 +5000,8 @@ GlobalISelEmitter::inferSuperRegisterClass(const TypeSetByHwMode &Ty,
// Use the information we found above to find a minimal register class which
// supports the subregister and type we want.
auto RC =
- Target.getSuperRegForSubReg(Ty.getValueTypeByHwMode(), CGRegs, SubIdx);
+ Target.getSuperRegForSubReg(Ty.getValueTypeByHwMode(), CGRegs, SubIdx,
+ /* MustBeAllocatable */ true);
if (!RC)
return None;
return *RC;
@@ -4777,9 +5089,9 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
if (Dst->isLeaf()) {
Record *RCDef = getInitValueAsRegClass(Dst->getLeafValue());
-
- const CodeGenRegisterClass &RC = Target.getRegisterClass(RCDef);
if (RCDef) {
+ const CodeGenRegisterClass &RC = Target.getRegisterClass(RCDef);
+
// We need to replace the def and all its uses with the specified
// operand. However, we must also insert COPY's wherever needed.
// For now, emit a copy and let the register allocator clean up.
@@ -4814,8 +5126,8 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
auto &DstI = Target.getInstruction(DstOp);
StringRef DstIName = DstI.TheDef->getName();
- if (DstI.Operands.NumDefs != Src->getExtTypes().size())
- return failedImport("Src pattern results and dst MI defs are different (" +
+ if (DstI.Operands.NumDefs < Src->getExtTypes().size())
+ return failedImport("Src pattern result has more defs than dst MI (" +
to_string(Src->getExtTypes().size()) + " def(s) vs " +
to_string(DstI.Operands.NumDefs) + " def(s))");
@@ -5035,7 +5347,8 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
// trouble than it's worth.
void GlobalISelEmitter::emitCxxPredicateFns(
raw_ostream &OS, StringRef CodeFieldName, StringRef TypeIdentifier,
- StringRef ArgType, StringRef ArgName, StringRef AdditionalDeclarations,
+ StringRef ArgType, StringRef ArgName, StringRef AdditionalArgs,
+ StringRef AdditionalDeclarations,
std::function<bool(const Record *R)> Filter) {
std::vector<const Record *> MatchedRecords;
const auto &Defs = RK.getAllDerivedDefinitions("PatFrag");
@@ -5060,7 +5373,7 @@ void GlobalISelEmitter::emitCxxPredicateFns(
OS << "bool " << Target.getName() << "InstructionSelector::test" << ArgName
<< "Predicate_" << TypeIdentifier << "(unsigned PredicateID, " << ArgType << " "
- << ArgName << ") const {\n"
+ << ArgName << AdditionalArgs <<") const {\n"
<< AdditionalDeclarations;
if (!AdditionalDeclarations.empty())
OS << "\n";
@@ -5086,12 +5399,13 @@ void GlobalISelEmitter::emitImmPredicateFns(
raw_ostream &OS, StringRef TypeIdentifier, StringRef ArgType,
std::function<bool(const Record *R)> Filter) {
return emitCxxPredicateFns(OS, "ImmediateCode", TypeIdentifier, ArgType,
- "Imm", "", Filter);
+ "Imm", "", "", Filter);
}
void GlobalISelEmitter::emitMIPredicateFns(raw_ostream &OS) {
return emitCxxPredicateFns(
OS, "GISelPredicateCode", "MI", "const MachineInstr &", "MI",
+ ", const std::array<const MachineOperand *, 3> &Operands",
" const MachineFunction &MF = *MI.getParent()->getParent();\n"
" const MachineRegisterInfo &MRI = MF.getRegInfo();\n"
" (void)MRI;",
@@ -5117,8 +5431,7 @@ std::vector<Matcher *> GlobalISelEmitter::optimizeRules(
// added rules out of it and make sure to re-create the group to properly
// re-initialize it:
if (CurrentGroup->size() < 2)
- for (Matcher *M : CurrentGroup->matchers())
- OptRules.push_back(M);
+ append_range(OptRules, CurrentGroup->matchers());
else {
CurrentGroup->finalize();
OptRules.push_back(CurrentGroup.get());
@@ -5167,15 +5480,13 @@ GlobalISelEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules,
OpcodeOrder[Opcode] = CurrentOrdering++;
}
- std::stable_sort(InputRules.begin(), InputRules.end(),
- [&OpcodeOrder](const Matcher *A, const Matcher *B) {
- auto *L = static_cast<const RuleMatcher *>(A);
- auto *R = static_cast<const RuleMatcher *>(B);
- return std::make_tuple(OpcodeOrder[L->getOpcode()],
- L->getNumOperands()) <
- std::make_tuple(OpcodeOrder[R->getOpcode()],
- R->getNumOperands());
- });
+ llvm::stable_sort(InputRules, [&OpcodeOrder](const Matcher *A,
+ const Matcher *B) {
+ auto *L = static_cast<const RuleMatcher *>(A);
+ auto *R = static_cast<const RuleMatcher *>(B);
+ return std::make_tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) <
+ std::make_tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands());
+ });
for (Matcher *Rule : InputRules)
Rule->optimize();
@@ -5308,7 +5619,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
<< " typedef void(" << Target.getName()
<< "InstructionSelector::*CustomRendererFn)(MachineInstrBuilder &, const "
- "MachineInstr&, int) "
+ "MachineInstr &, int) "
"const;\n"
<< " const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, "
"CustomRendererFn> "
@@ -5324,7 +5635,8 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
<< " bool testImmPredicate_APFloat(unsigned PredicateID, const APFloat "
"&Imm) const override;\n"
<< " const int64_t *getMatchTable() const override;\n"
- << " bool testMIPredicate_MI(unsigned PredicateID, const MachineInstr &MI) "
+ << " bool testMIPredicate_MI(unsigned PredicateID, const MachineInstr &MI"
+ ", const std::array<const MachineOperand *, 3> &Operands) "
"const override;\n"
<< "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n\n";
@@ -5360,7 +5672,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
OS << "void " << Target.getName() << "InstructionSelector"
"::setupGeneratedPerFunctionState(MachineFunction &MF) {\n"
" AvailableFunctionFeatures = computeAvailableFunctionFeatures("
- "(const " << Target.getName() << "Subtarget*)&MF.getSubtarget(), &MF);\n"
+ "(const " << Target.getName() << "Subtarget *)&MF.getSubtarget(), &MF);\n"
"}\n";
if (Target.getName() == "X86" || Target.getName() == "AArch64") {
@@ -5378,8 +5690,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
// Emit a table containing the LLT objects needed by the matcher and an enum
// for the matcher to reference them with.
std::vector<LLTCodeGen> TypeObjects;
- for (const auto &Ty : KnownTypes)
- TypeObjects.push_back(Ty);
+ append_range(TypeObjects, KnownTypes);
llvm::sort(TypeObjects);
OS << "// LLT Objects.\n"
<< "enum {\n";
@@ -5481,7 +5792,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
<< "enum {\n"
<< " GICR_Invalid,\n";
for (const auto &Record : CustomRendererFns)
- OS << " GICR_" << Record->getValueAsString("RendererFn") << ", \n";
+ OS << " GICR_" << Record->getValueAsString("RendererFn") << ",\n";
OS << "};\n";
OS << Target.getName() << "InstructionSelector::CustomRendererFn\n"
@@ -5774,11 +6085,10 @@ void SwitchMatcher::finalize() {
if (empty())
return;
- std::stable_sort(Matchers.begin(), Matchers.end(),
- [](const Matcher *L, const Matcher *R) {
- return L->getFirstCondition().getValue() <
- R->getFirstCondition().getValue();
- });
+ llvm::stable_sort(Matchers, [](const Matcher *L, const Matcher *R) {
+ return L->getFirstCondition().getValue() <
+ R->getFirstCondition().getValue();
+ });
Condition = Matchers[0]->popFirstCondition();
for (unsigned I = 1, E = Values.size(); I < E; ++I)
Matchers[I]->popFirstCondition();
diff --git a/contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp
index f3141735a995..9ff385faec56 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -182,11 +182,10 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
if (Constraint.isNone())
Res += "0";
else if (Constraint.isEarlyClobber())
- Res += "(1 << MCOI::EARLY_CLOBBER)";
+ Res += "MCOI_EARLY_CLOBBER";
else {
assert(Constraint.isTied());
- Res += "((" + utostr(Constraint.getTiedOperand()) +
- " << 16) | (1 << MCOI::TIED_TO))";
+ Res += "MCOI_TIED_TO(" + utostr(Constraint.getTiedOperand()) + ")";
}
Result.push_back(Res);
@@ -280,7 +279,7 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
for (const auto &Op : Operands)
OS << " " << Op.first << " = " << Op.second << ",\n";
- OS << "OPERAND_LAST";
+ OS << " OPERAND_LAST";
OS << "\n};\n";
OS << "} // end namespace OpName\n";
OS << "} // end namespace " << Namespace << "\n";
@@ -316,7 +315,7 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
OS << " return OperandMap[" << TableIndex++ << "][NamedIdx];\n";
}
- OS << " default: return -1;\n";
+ OS << " default: return -1;\n";
OS << " }\n";
} else {
// There are no operands, so no need to emit anything
@@ -371,7 +370,7 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OS << "namespace " << Namespace << " {\n";
OS << "LLVM_READONLY\n";
OS << "static int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n";
- // TODO: Factor out instructions with same operands to compress the tables.
+ // TODO: Factor out duplicate operand lists to compress the tables.
if (!NumberedInstructions.empty()) {
std::vector<int> OperandOffsets;
std::vector<Record *> OperandRecords;
@@ -385,7 +384,7 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OperandRecords.push_back(Op.Rec);
++CurrentOffset;
} else {
- for (Init *Arg : make_range(MIOI->arg_begin(), MIOI->arg_end())) {
+ for (Init *Arg : MIOI->getArgs()) {
OperandRecords.push_back(cast<DefInit>(Arg)->getDef());
++CurrentOffset;
}
@@ -393,16 +392,26 @@ void InstrInfoEmitter::emitOperandTypeMappings(
}
}
- // Emit the table of offsets for the opcode lookup.
- OS << " const int Offsets[] = {\n";
+ // Emit the table of offsets (indexes) into the operand type table.
+ // Size the unsigned integer offset to save space.
+ assert(OperandRecords.size() <= UINT32_MAX &&
+ "Too many operands for offset table");
+ OS << ((OperandRecords.size() <= UINT16_MAX) ? " const uint16_t"
+ : " const uint32_t");
+ OS << " Offsets[] = {\n";
for (int I = 0, E = OperandOffsets.size(); I != E; ++I)
OS << " " << OperandOffsets[I] << ",\n";
OS << " };\n";
// Add an entry for the end so that we don't need to special case it below.
OperandOffsets.push_back(OperandRecords.size());
+
// Emit the actual operand types in a flat table.
- OS << " const int OpcodeOperandTypes[] = {\n ";
+ // Size the signed integer operand type to save space.
+ assert(EnumVal <= INT16_MAX &&
+ "Too many operand types for operand types table");
+ OS << ((EnumVal <= INT8_MAX) ? " const int8_t" : " const int16_t");
+ OS << " OpcodeOperandTypes[] = {\n ";
for (int I = 0, E = OperandRecords.size(), CurOffset = 1; I != E; ++I) {
// We print each Opcode's operands in its own row.
if (I == OperandOffsets[CurOffset]) {
@@ -532,6 +541,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
unsigned ListNumber = 0;
// Emit all of the instruction's implicit uses and defs.
+ Records.startTimer("Emit uses/defs");
for (const CodeGenInstruction *II : Target.getInstructionsByEnumValue()) {
Record *Inst = II->TheDef;
std::vector<Record*> Uses = Inst->getValueAsListOfDefs("Uses");
@@ -549,10 +559,12 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
OperandInfoMapTy OperandInfoIDs;
// Emit all of the operand info records.
+ Records.startTimer("Emit operand info");
EmitOperandInfo(OS, OperandInfoIDs);
// Emit all of the MCInstrDesc records in their ENUM ordering.
//
+ Records.startTimer("Emit InstrDesc records");
OS << "\nextern const MCInstrDesc " << TargetName << "Insts[] = {\n";
ArrayRef<const CodeGenInstruction*> NumberedInstructions =
Target.getInstructionsByEnumValue();
@@ -568,6 +580,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
OS << "};\n\n";
// Emit the array of instruction names.
+ Records.startTimer("Emit instruction names");
InstrNames.layout();
InstrNames.emitStringLiteralDef(OS, Twine("extern const char ") + TargetName +
"InstrNameData[]");
@@ -628,6 +641,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
}
// MCInstrInfo initialization routine.
+ Records.startTimer("Emit initialization routine");
OS << "static inline void Init" << TargetName
<< "MCInstrInfo(MCInstrInfo *II) {\n";
OS << " II->InitMCInstrInfo(" << TargetName << "Insts, " << TargetName
@@ -706,10 +720,13 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
OS << "#endif // GET_INSTRINFO_CTOR_DTOR\n\n";
+ Records.startTimer("Emit operand name mappings");
emitOperandNameMappings(OS, Target, NumberedInstructions);
+ Records.startTimer("Emit operand type mappings");
emitOperandTypeMappings(OS, Target, NumberedInstructions);
+ Records.startTimer("Emit helper methods");
emitMCIIHelperMethods(OS, TargetName);
}
@@ -862,7 +879,9 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
namespace llvm {
void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) {
+ RK.startTimer("Analyze DAG patterns");
InstrInfoEmitter(RK).run(OS);
+ RK.startTimer("Emit map table");
EmitMapTable(RK, OS);
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 7e4191494149..978d24c8300d 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -246,13 +246,16 @@ enum IIT_Info {
IIT_SUBDIVIDE4_ARG = 45,
IIT_VEC_OF_BITCASTS_TO_INT = 46,
IIT_V128 = 47,
- IIT_BF16 = 48
+ IIT_BF16 = 48,
+ IIT_STRUCT9 = 49,
+ IIT_V256 = 50,
+ IIT_AMX = 51
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
std::vector<unsigned char> &Sig) {
if (MVT(VT).isInteger()) {
- unsigned BitWidth = MVT(VT).getSizeInBits();
+ unsigned BitWidth = MVT(VT).getFixedSizeInBits();
switch (BitWidth) {
default: PrintFatalError("unhandled integer type width in intrinsic!");
case 1: return Sig.push_back(IIT_I1);
@@ -274,6 +277,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
case MVT::token: return Sig.push_back(IIT_TOKEN);
case MVT::Metadata: return Sig.push_back(IIT_METADATA);
case MVT::x86mmx: return Sig.push_back(IIT_MMX);
+ case MVT::x86amx: return Sig.push_back(IIT_AMX);
// MVT::OtherVT is used to mean the empty struct type here.
case MVT::Other: return Sig.push_back(IIT_EMPTYSTRUCT);
// MVT::isVoid is used to represent varargs here.
@@ -384,6 +388,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
case 32: Sig.push_back(IIT_V32); break;
case 64: Sig.push_back(IIT_V64); break;
case 128: Sig.push_back(IIT_V128); break;
+ case 256: Sig.push_back(IIT_V256); break;
case 512: Sig.push_back(IIT_V512); break;
case 1024: Sig.push_back(IIT_V1024); break;
}
@@ -469,6 +474,7 @@ static void ComputeFixedEncoding(const CodeGenIntrinsic &Int,
case 6: TypeSig.push_back(IIT_STRUCT6); break;
case 7: TypeSig.push_back(IIT_STRUCT7); break;
case 8: TypeSig.push_back(IIT_STRUCT8); break;
+ case 9: TypeSig.push_back(IIT_STRUCT9); break;
default: llvm_unreachable("Unhandled case in struct");
}
@@ -632,13 +638,13 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
std::max(maxArgAttrs, unsigned(intrinsic.ArgumentAttributes.size()));
unsigned &N = UniqAttributes[&intrinsic];
if (N) continue;
- assert(AttrNum < 256 && "Too many unique attributes for table!");
N = ++AttrNum;
+ assert(N < 65536 && "Too many unique attributes for table!");
}
// Emit an array of AttributeList. Most intrinsics will have at least one
// entry, for the function itself (index ~1), which is usually nounwind.
- OS << " static const uint8_t IntrinsicsToAttributesMap[] = {\n";
+ OS << " static const uint16_t IntrinsicsToAttributesMap[] = {\n";
for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
const CodeGenIntrinsic &intrinsic = Ints[i];
@@ -687,6 +693,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
OS << "Attribute::NoAlias";
addComma = true;
break;
+ case CodeGenIntrinsic::NoUndef:
+ if (addComma)
+ OS << ",";
+ OS << "Attribute::NoUndef";
+ addComma = true;
+ break;
case CodeGenIntrinsic::Returned:
if (addComma)
OS << ",";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp
index 251533a8d154..8e6c05885e5b 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
-static const std::string getOptionName(const Record &R) {
+static std::string getOptionName(const Record &R) {
// Use the record name unless EnumName is defined.
if (isa<UnsetInit>(R.getValueInit("EnumName")))
return std::string(R.getName());
@@ -35,19 +35,20 @@ static raw_ostream &write_cstring(raw_ostream &OS, llvm::StringRef Str) {
return OS;
}
-static const std::string getOptionSpelling(const Record &R,
- size_t &PrefixLength) {
+static std::string getOptionSpelling(const Record &R, size_t &PrefixLength) {
std::vector<StringRef> Prefixes = R.getValueAsListOfStrings("Prefixes");
StringRef Name = R.getValueAsString("Name");
+
if (Prefixes.empty()) {
PrefixLength = 0;
return Name.str();
}
+
PrefixLength = Prefixes[0].size();
return (Twine(Prefixes[0]) + Twine(Name)).str();
}
-static const std::string getOptionSpelling(const Record &R) {
+static std::string getOptionSpelling(const Record &R) {
size_t PrefixLength;
return getOptionSpelling(R, PrefixLength);
}
@@ -59,75 +60,29 @@ static void emitNameUsingSpelling(raw_ostream &OS, const Record &R) {
OS << "[" << PrefixLength << "]";
}
-class MarshallingKindInfo {
+class MarshallingInfo {
public:
+ static constexpr const char *MacroName = "OPTION_WITH_MARSHALLING";
const Record &R;
- const char *MacroName;
bool ShouldAlwaysEmit;
+ StringRef MacroPrefix;
StringRef KeyPath;
StringRef DefaultValue;
StringRef NormalizedValuesScope;
-
- void emit(raw_ostream &OS) const {
- write_cstring(OS, StringRef(getOptionSpelling(R)));
- OS << ", ";
- OS << ShouldAlwaysEmit;
- OS << ", ";
- OS << KeyPath;
- OS << ", ";
- emitScopedNormalizedValue(OS, DefaultValue);
- OS << ", ";
- emitSpecific(OS);
- }
-
- virtual Optional<StringRef> emitValueTable(raw_ostream &OS) const {
- return None;
- }
-
- virtual ~MarshallingKindInfo() = default;
-
- static std::unique_ptr<MarshallingKindInfo> create(const Record &R);
-
-protected:
- void emitScopedNormalizedValue(raw_ostream &OS,
- StringRef NormalizedValue) const {
- if (!NormalizedValuesScope.empty())
- OS << NormalizedValuesScope << "::";
- OS << NormalizedValue;
- }
-
- virtual void emitSpecific(raw_ostream &OS) const = 0;
- MarshallingKindInfo(const Record &R, const char *MacroName)
- : R(R), MacroName(MacroName) {}
-};
-
-class MarshallingFlagInfo final : public MarshallingKindInfo {
-public:
- bool IsPositive;
-
- void emitSpecific(raw_ostream &OS) const override { OS << IsPositive; }
-
- static std::unique_ptr<MarshallingKindInfo> create(const Record &R) {
- std::unique_ptr<MarshallingFlagInfo> Ret(new MarshallingFlagInfo(R));
- Ret->IsPositive = R.getValueAsBit("IsPositive");
- return Ret;
- }
-
-private:
- MarshallingFlagInfo(const Record &R)
- : MarshallingKindInfo(R, "OPTION_WITH_MARSHALLING_FLAG") {}
-};
-
-class MarshallingStringInfo final : public MarshallingKindInfo {
-public:
- StringRef NormalizerRetTy;
+ StringRef ImpliedCheck;
+ StringRef ImpliedValue;
+ StringRef ShouldParse;
StringRef Normalizer;
StringRef Denormalizer;
+ StringRef ValueMerger;
+ StringRef ValueExtractor;
int TableIndex = -1;
std::vector<StringRef> Values;
std::vector<StringRef> NormalizedValues;
std::string ValueTableName;
+ static size_t NextTableIndex;
+
static constexpr const char *ValueTablePreamble = R"(
struct SimpleEnumValue {
const char *Name;
@@ -143,17 +98,39 @@ struct SimpleEnumValueTable {
static constexpr const char *ValueTablesDecl =
"static const SimpleEnumValueTable SimpleEnumValueTables[] = ";
- void emitSpecific(raw_ostream &OS) const override {
- emitScopedNormalizedValue(OS, NormalizerRetTy);
+ MarshallingInfo(const Record &R) : R(R) {}
+
+ std::string getMacroName() const {
+ return (MacroPrefix + MarshallingInfo::MacroName).str();
+ }
+
+ void emit(raw_ostream &OS) const {
+ write_cstring(OS, StringRef(getOptionSpelling(R)));
+ OS << ", ";
+ OS << ShouldParse;
+ OS << ", ";
+ OS << ShouldAlwaysEmit;
+ OS << ", ";
+ OS << KeyPath;
+ OS << ", ";
+ emitScopedNormalizedValue(OS, DefaultValue);
+ OS << ", ";
+ OS << ImpliedCheck;
+ OS << ", ";
+ emitScopedNormalizedValue(OS, ImpliedValue);
OS << ", ";
OS << Normalizer;
OS << ", ";
OS << Denormalizer;
OS << ", ";
+ OS << ValueMerger;
+ OS << ", ";
+ OS << ValueExtractor;
+ OS << ", ";
OS << TableIndex;
}
- Optional<StringRef> emitValueTable(raw_ostream &OS) const override {
+ Optional<StringRef> emitValueTable(raw_ostream &OS) const {
if (TableIndex == -1)
return {};
OS << "static const SimpleEnumValue " << ValueTableName << "[] = {\n";
@@ -169,73 +146,66 @@ struct SimpleEnumValueTable {
return StringRef(ValueTableName);
}
- static std::unique_ptr<MarshallingKindInfo> create(const Record &R) {
- assert(!isa<UnsetInit>(R.getValueInit("NormalizerRetTy")) &&
- "String options must have a type");
-
- std::unique_ptr<MarshallingStringInfo> Ret(new MarshallingStringInfo(R));
- Ret->NormalizerRetTy = R.getValueAsString("NormalizerRetTy");
-
- Ret->Normalizer = R.getValueAsString("Normalizer");
- Ret->Denormalizer = R.getValueAsString("Denormalizer");
-
- if (!isa<UnsetInit>(R.getValueInit("NormalizedValues"))) {
- assert(!isa<UnsetInit>(R.getValueInit("Values")) &&
- "Cannot provide normalized values for value-less options");
- Ret->TableIndex = NextTableIndex++;
- Ret->NormalizedValues = R.getValueAsListOfStrings("NormalizedValues");
- Ret->Values.reserve(Ret->NormalizedValues.size());
- Ret->ValueTableName = getOptionName(R) + "ValueTable";
-
- StringRef ValuesStr = R.getValueAsString("Values");
- for (;;) {
- size_t Idx = ValuesStr.find(',');
- if (Idx == StringRef::npos)
- break;
- if (Idx > 0)
- Ret->Values.push_back(ValuesStr.slice(0, Idx));
- ValuesStr = ValuesStr.slice(Idx + 1, StringRef::npos);
- }
- if (!ValuesStr.empty())
- Ret->Values.push_back(ValuesStr);
-
- assert(Ret->Values.size() == Ret->NormalizedValues.size() &&
- "The number of normalized values doesn't match the number of "
- "values");
- }
-
- return Ret;
- }
-
private:
- MarshallingStringInfo(const Record &R)
- : MarshallingKindInfo(R, "OPTION_WITH_MARSHALLING_STRING") {}
-
- static size_t NextTableIndex;
+ void emitScopedNormalizedValue(raw_ostream &OS,
+ StringRef NormalizedValue) const {
+ if (!NormalizedValuesScope.empty())
+ OS << NormalizedValuesScope << "::";
+ OS << NormalizedValue;
+ }
};
-size_t MarshallingStringInfo::NextTableIndex = 0;
+size_t MarshallingInfo::NextTableIndex = 0;
-std::unique_ptr<MarshallingKindInfo>
-MarshallingKindInfo::create(const Record &R) {
+static MarshallingInfo createMarshallingInfo(const Record &R) {
assert(!isa<UnsetInit>(R.getValueInit("KeyPath")) &&
!isa<UnsetInit>(R.getValueInit("DefaultValue")) &&
- "Must provide at least a key-path and a default value for emitting "
- "marshalling information");
-
- std::unique_ptr<MarshallingKindInfo> Ret = nullptr;
- StringRef MarshallingKindStr = R.getValueAsString("MarshallingKind");
-
- if (MarshallingKindStr == "flag")
- Ret = MarshallingFlagInfo::create(R);
- else if (MarshallingKindStr == "string")
- Ret = MarshallingStringInfo::create(R);
-
- Ret->ShouldAlwaysEmit = R.getValueAsBit("ShouldAlwaysEmit");
- Ret->KeyPath = R.getValueAsString("KeyPath");
- Ret->DefaultValue = R.getValueAsString("DefaultValue");
- if (!isa<UnsetInit>(R.getValueInit("NormalizedValuesScope")))
- Ret->NormalizedValuesScope = R.getValueAsString("NormalizedValuesScope");
+ !isa<UnsetInit>(R.getValueInit("ValueMerger")) &&
+ "MarshallingInfo must have a provide a keypath, default value and a "
+ "value merger");
+
+ MarshallingInfo Ret(R);
+
+ Ret.ShouldAlwaysEmit = R.getValueAsBit("ShouldAlwaysEmit");
+ Ret.MacroPrefix = R.getValueAsString("MacroPrefix");
+ Ret.KeyPath = R.getValueAsString("KeyPath");
+ Ret.DefaultValue = R.getValueAsString("DefaultValue");
+ Ret.NormalizedValuesScope = R.getValueAsString("NormalizedValuesScope");
+ Ret.ImpliedCheck = R.getValueAsString("ImpliedCheck");
+ Ret.ImpliedValue =
+ R.getValueAsOptionalString("ImpliedValue").getValueOr(Ret.DefaultValue);
+
+ Ret.ShouldParse = R.getValueAsString("ShouldParse");
+ Ret.Normalizer = R.getValueAsString("Normalizer");
+ Ret.Denormalizer = R.getValueAsString("Denormalizer");
+ Ret.ValueMerger = R.getValueAsString("ValueMerger");
+ Ret.ValueExtractor = R.getValueAsString("ValueExtractor");
+
+ if (!isa<UnsetInit>(R.getValueInit("NormalizedValues"))) {
+ assert(!isa<UnsetInit>(R.getValueInit("Values")) &&
+ "Cannot provide normalized values for value-less options");
+ Ret.TableIndex = MarshallingInfo::NextTableIndex++;
+ Ret.NormalizedValues = R.getValueAsListOfStrings("NormalizedValues");
+ Ret.Values.reserve(Ret.NormalizedValues.size());
+ Ret.ValueTableName = getOptionName(R) + "ValueTable";
+
+ StringRef ValuesStr = R.getValueAsString("Values");
+ for (;;) {
+ size_t Idx = ValuesStr.find(',');
+ if (Idx == StringRef::npos)
+ break;
+ if (Idx > 0)
+ Ret.Values.push_back(ValuesStr.slice(0, Idx));
+ ValuesStr = ValuesStr.slice(Idx + 1, StringRef::npos);
+ }
+ if (!ValuesStr.empty())
+ Ret.Values.push_back(ValuesStr);
+
+ assert(Ret.Values.size() == Ret.NormalizedValues.size() &&
+ "The number of normalized values doesn't match the number of "
+ "values");
+ }
+
return Ret;
}
@@ -258,13 +228,12 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
PrefixesT Prefixes;
Prefixes.insert(std::make_pair(PrefixKeyT(), "prefix_0"));
unsigned CurPrefix = 0;
- for (unsigned i = 0, e = Opts.size(); i != e; ++i) {
- const Record &R = *Opts[i];
- std::vector<StringRef> prf = R.getValueAsListOfStrings("Prefixes");
- PrefixKeyT prfkey(prf.begin(), prf.end());
+ for (const Record &R : llvm::make_pointee_range(Opts)) {
+ std::vector<StringRef> RPrefixes = R.getValueAsListOfStrings("Prefixes");
+ PrefixKeyT PrefixKey(RPrefixes.begin(), RPrefixes.end());
unsigned NewPrefix = CurPrefix + 1;
- if (Prefixes.insert(std::make_pair(prfkey, (Twine("prefix_") +
- Twine(NewPrefix)).str())).second)
+ std::string Prefix = (Twine("prefix_") + Twine(NewPrefix)).str();
+ if (Prefixes.insert(std::make_pair(PrefixKey, Prefix)).second)
CurPrefix = NewPrefix;
}
@@ -274,19 +243,16 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << "// Prefixes\n\n";
OS << "#ifdef PREFIX\n";
OS << "#define COMMA ,\n";
- for (PrefixesT::const_iterator I = Prefixes.begin(), E = Prefixes.end();
- I != E; ++I) {
+ for (const auto &Prefix : Prefixes) {
OS << "PREFIX(";
// Prefix name.
- OS << I->second;
+ OS << Prefix.second;
// Prefix values.
OS << ", {";
- for (PrefixKeyT::const_iterator PI = I->first.begin(),
- PE = I->first.end(); PI != PE; ++PI) {
- OS << "\"" << *PI << "\" COMMA ";
- }
+ for (StringRef PrefixKey : Prefix.first)
+ OS << "\"" << PrefixKey << "\" COMMA ";
OS << "nullptr})\n";
}
OS << "#undef COMMA\n";
@@ -295,9 +261,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << "/////////\n";
OS << "// Groups\n\n";
OS << "#ifdef OPTION\n";
- for (unsigned i = 0, e = Groups.size(); i != e; ++i) {
- const Record &R = *Groups[i];
-
+ for (const Record &R : llvm::make_pointee_range(Groups)) {
// Start a single option entry.
OS << "OPTION(";
@@ -344,8 +308,8 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
auto WriteOptRecordFields = [&](raw_ostream &OS, const Record &R) {
// The option prefix;
- std::vector<StringRef> prf = R.getValueAsListOfStrings("Prefixes");
- OS << Prefixes[PrefixKeyT(prf.begin(), prf.end())] << ", ";
+ std::vector<StringRef> RPrefixes = R.getValueAsListOfStrings("Prefixes");
+ OS << Prefixes[PrefixKeyT(RPrefixes.begin(), RPrefixes.end())] << ", ";
// The option string.
emitNameUsingSpelling(OS, R);
@@ -382,8 +346,8 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << "nullptr";
} else {
OS << "\"";
- for (size_t i = 0, e = AliasArgs.size(); i != e; ++i)
- OS << AliasArgs[i] << "\\0";
+ for (StringRef AliasArg : AliasArgs)
+ OS << AliasArg << "\\0";
OS << "\"";
}
@@ -427,39 +391,63 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << "nullptr";
};
- std::vector<std::unique_ptr<MarshallingKindInfo>> OptsWithMarshalling;
- for (unsigned I = 0, E = Opts.size(); I != E; ++I) {
- const Record &R = *Opts[I];
+ auto IsMarshallingOption = [](const Record &R) {
+ return !isa<UnsetInit>(R.getValueInit("KeyPath")) &&
+ !R.getValueAsString("KeyPath").empty();
+ };
+ std::vector<const Record *> OptsWithMarshalling;
+ for (const Record &R : llvm::make_pointee_range(Opts)) {
// Start a single option entry.
OS << "OPTION(";
WriteOptRecordFields(OS, R);
OS << ")\n";
- if (!isa<UnsetInit>(R.getValueInit("MarshallingKind")))
- OptsWithMarshalling.push_back(MarshallingKindInfo::create(R));
+ if (IsMarshallingOption(R))
+ OptsWithMarshalling.push_back(&R);
}
OS << "#endif // OPTION\n";
- for (const auto &KindInfo : OptsWithMarshalling) {
- OS << "#ifdef " << KindInfo->MacroName << "\n";
- OS << KindInfo->MacroName << "(";
- WriteOptRecordFields(OS, KindInfo->R);
+ auto CmpMarshallingOpts = [](const Record *const *A, const Record *const *B) {
+ unsigned AID = (*A)->getID();
+ unsigned BID = (*B)->getID();
+
+ if (AID < BID)
+ return -1;
+ if (AID > BID)
+ return 1;
+ return 0;
+ };
+ // The RecordKeeper stores records (options) in lexicographical order, and we
+ // have reordered the options again when generating prefix groups. We need to
+ // restore the original definition order of options with marshalling to honor
+ // the topology of the dependency graph implied by `DefaultAnyOf`.
+ array_pod_sort(OptsWithMarshalling.begin(), OptsWithMarshalling.end(),
+ CmpMarshallingOpts);
+
+ std::vector<MarshallingInfo> MarshallingInfos;
+ for (const auto *R : OptsWithMarshalling)
+ MarshallingInfos.push_back(createMarshallingInfo(*R));
+
+ for (const auto &MI : MarshallingInfos) {
+ OS << "#ifdef " << MI.getMacroName() << "\n";
+ OS << MI.getMacroName() << "(";
+ WriteOptRecordFields(OS, MI.R);
OS << ", ";
- KindInfo->emit(OS);
+ MI.emit(OS);
OS << ")\n";
- OS << "#endif // " << KindInfo->MacroName << "\n";
+ OS << "#endif // " << MI.getMacroName() << "\n";
}
OS << "\n";
OS << "#ifdef SIMPLE_ENUM_VALUE_TABLE";
OS << "\n";
- OS << MarshallingStringInfo::ValueTablePreamble;
+ OS << MarshallingInfo::ValueTablePreamble;
std::vector<StringRef> ValueTableNames;
- for (const auto &KindInfo : OptsWithMarshalling)
- if (auto MaybeValueTableName = KindInfo->emitValueTable(OS))
+ for (const auto &MI : MarshallingInfos)
+ if (auto MaybeValueTableName = MI.emitValueTable(OS))
ValueTableNames.push_back(*MaybeValueTableName);
- OS << MarshallingStringInfo::ValueTablesDecl << "{";
+ OS << MarshallingInfo::ValueTablesDecl << "{";
for (auto ValueTableName : ValueTableNames)
OS << "{" << ValueTableName << ", sizeof(" << ValueTableName
<< ") / sizeof(SimpleEnumValue)"
@@ -475,8 +463,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << "#ifdef OPTTABLE_ARG_INIT\n";
OS << "//////////\n";
OS << "// Option Values\n\n";
- for (unsigned I = 0, E = Opts.size(); I != E; ++I) {
- const Record &R = *Opts[I];
+ for (const Record &R : llvm::make_pointee_range(Opts)) {
if (isa<UnsetInit>(R.getValueInit("ValuesCode")))
continue;
OS << "{\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp b/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp
index 9f7f40db2626..a76640f6d11f 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp
@@ -198,6 +198,18 @@ void PredicateExpander::expandCheckIsImmOperand(raw_ostream &OS, int OpIndex) {
<< "getOperand(" << OpIndex << ").isImm() ";
}
+void PredicateExpander::expandCheckFunctionPredicateWithTII(
+ raw_ostream &OS, StringRef MCInstFn, StringRef MachineInstrFn,
+ StringRef TIIPtr) {
+ if (!shouldExpandForMC()) {
+ OS << (TIIPtr.empty() ? "TII" : TIIPtr) << "->" << MachineInstrFn;
+ OS << (isByRef() ? "(MI)" : "(*MI)");
+ return;
+ }
+
+ OS << MCInstFn << (isByRef() ? "(MI" : "(*MI") << ", MCII)";
+}
+
void PredicateExpander::expandCheckFunctionPredicate(raw_ostream &OS,
StringRef MCInstFn,
StringRef MachineInstrFn) {
@@ -358,10 +370,18 @@ void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) {
return expandPredicateSequence(OS, Rec->getValueAsListOfDefs("Predicates"),
/* AllOf */ false);
- if (Rec->isSubClassOf("CheckFunctionPredicate"))
+ if (Rec->isSubClassOf("CheckFunctionPredicate")) {
return expandCheckFunctionPredicate(
OS, Rec->getValueAsString("MCInstFnName"),
Rec->getValueAsString("MachineInstrFnName"));
+ }
+
+ if (Rec->isSubClassOf("CheckFunctionPredicateWithTII")) {
+ return expandCheckFunctionPredicateWithTII(
+ OS, Rec->getValueAsString("MCInstFnName"),
+ Rec->getValueAsString("MachineInstrFnName"),
+ Rec->getValueAsString("TIIPtrName"));
+ }
if (Rec->isSubClassOf("CheckNonPortable"))
return expandCheckNonPortable(OS, Rec->getValueAsString("CodeBlock"));
diff --git a/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h b/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h
index 115a81cf123b..29cca92d902c 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h
@@ -79,6 +79,9 @@ public:
void expandCheckInvalidRegOperand(raw_ostream &OS, int OpIndex);
void expandCheckFunctionPredicate(raw_ostream &OS, StringRef MCInstFn,
StringRef MachineInstrFn);
+ void expandCheckFunctionPredicateWithTII(raw_ostream &OS, StringRef MCInstFn,
+ StringRef MachineInstrFn,
+ StringRef TIIPtr);
void expandCheckNonPortable(raw_ostream &OS, StringRef CodeBlock);
void expandPredicate(raw_ostream &OS, const Record *Rec);
void expandReturnStatement(raw_ostream &OS, const Record *Rec);
diff --git a/contrib/llvm-project/llvm/utils/TableGen/PseudoLoweringEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
index 3a80d8e5d1c4..e05409db67d0 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -89,11 +89,15 @@ addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Insn,
// problem.
// FIXME: We probably shouldn't ever get a non-zero BaseIdx here.
assert(BaseIdx == 0 && "Named subargument in pseudo expansion?!");
- if (DI->getDef() != Insn.Operands[BaseIdx + i].Rec)
- PrintFatalError(Rec->getLoc(),
- "Pseudo operand type '" + DI->getDef()->getName() +
- "' does not match expansion operand type '" +
- Insn.Operands[BaseIdx + i].Rec->getName() + "'");
+ // FIXME: Are the message operand types backward?
+ if (DI->getDef() != Insn.Operands[BaseIdx + i].Rec) {
+ PrintError(Rec, "In pseudo instruction '" + Rec->getName() +
+ "', operand type '" + DI->getDef()->getName() +
+ "' does not match expansion operand type '" +
+ Insn.Operands[BaseIdx + i].Rec->getName() + "'");
+ PrintFatalNote(DI->getDef(),
+ "Value was assigned at the following location:");
+ }
// Source operand maps to destination operand. The Data element
// will be filled in later, just set the Kind for now. Do it
// for each corresponding MachineInstr operand, not just the first.
@@ -128,23 +132,38 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
LLVM_DEBUG(dbgs() << " Result: " << *Dag << "\n");
DefInit *OpDef = dyn_cast<DefInit>(Dag->getOperator());
- if (!OpDef)
- PrintFatalError(Rec->getLoc(), Rec->getName() +
- " has unexpected operator type!");
+ if (!OpDef) {
+ PrintError(Rec, "In pseudo instruction '" + Rec->getName() +
+ "', result operator is not a record");
+ PrintFatalNote(Rec->getValue("ResultInst"),
+ "Result was assigned at the following location:");
+ }
Record *Operator = OpDef->getDef();
- if (!Operator->isSubClassOf("Instruction"))
- PrintFatalError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
- "' is not an instruction!");
+ if (!Operator->isSubClassOf("Instruction")) {
+ PrintError(Rec, "In pseudo instruction '" + Rec->getName() +
+ "', result operator '" + Operator->getName() +
+ "' is not an instruction");
+ PrintFatalNote(Rec->getValue("ResultInst"),
+ "Result was assigned at the following location:");
+ }
CodeGenInstruction Insn(Operator);
- if (Insn.isCodeGenOnly || Insn.isPseudo)
- PrintFatalError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
- "' cannot be another pseudo instruction!");
+ if (Insn.isCodeGenOnly || Insn.isPseudo) {
+ PrintError(Rec, "In pseudo instruction '" + Rec->getName() +
+ "', result operator '" + Operator->getName() +
+ "' cannot be a pseudo instruction");
+ PrintFatalNote(Rec->getValue("ResultInst"),
+ "Result was assigned at the following location:");
+ }
- if (Insn.Operands.size() != Dag->getNumArgs())
- PrintFatalError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
- "' operand count mismatch");
+ if (Insn.Operands.size() != Dag->getNumArgs()) {
+ PrintError(Rec, "In pseudo instruction '" + Rec->getName() +
+ "', result operator '" + Operator->getName() +
+ "' has the wrong number of operands");
+ PrintFatalNote(Rec->getValue("ResultInst"),
+ "Result was assigned at the following location:");
+ }
unsigned NumMIOperands = 0;
for (unsigned i = 0, e = Insn.Operands.size(); i != e; ++i)
@@ -177,10 +196,13 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
continue;
StringMap<unsigned>::iterator SourceOp =
SourceOperands.find(Dag->getArgNameStr(i));
- if (SourceOp == SourceOperands.end())
- PrintFatalError(Rec->getLoc(),
- "Pseudo output operand '" + Dag->getArgNameStr(i) +
- "' has no matching source operand.");
+ if (SourceOp == SourceOperands.end()) {
+ PrintError(Rec, "In pseudo instruction '" + Rec->getName() +
+ "', output operand '" + Dag->getArgNameStr(i) +
+ "' has no matching source operand");
+ PrintFatalNote(Rec->getValue("ResultInst"),
+ "Value was assigned at the following location:");
+ }
// Map the source operand to the destination operand index for each
// MachineInstr operand.
for (unsigned I = 0, E = Insn.Operands[i].MINumOperands; I != E; ++I)
@@ -204,15 +226,15 @@ void PseudoLoweringEmitter::emitLoweringEmitter(raw_ostream &o) {
if (!Expansions.empty()) {
o << " switch (MI->getOpcode()) {\n"
- << " default: return false;\n";
+ << " default: return false;\n";
for (auto &Expansion : Expansions) {
CodeGenInstruction &Source = Expansion.Source;
CodeGenInstruction &Dest = Expansion.Dest;
- o << " case " << Source.Namespace << "::"
+ o << " case " << Source.Namespace << "::"
<< Source.TheDef->getName() << ": {\n"
- << " MCInst TmpInst;\n"
- << " MCOperand MCOp;\n"
- << " TmpInst.setOpcode(" << Dest.Namespace << "::"
+ << " MCInst TmpInst;\n"
+ << " MCOperand MCOp;\n"
+ << " TmpInst.setOpcode(" << Dest.Namespace << "::"
<< Dest.TheDef->getName() << ");\n";
// Copy the operands from the source instruction.
@@ -221,23 +243,23 @@ void PseudoLoweringEmitter::emitLoweringEmitter(raw_ostream &o) {
// expansion DAG.
unsigned MIOpNo = 0;
for (const auto &DestOperand : Dest.Operands) {
- o << " // Operand: " << DestOperand.Name << "\n";
+ o << " // Operand: " << DestOperand.Name << "\n";
for (unsigned i = 0, e = DestOperand.MINumOperands; i != e; ++i) {
switch (Expansion.OperandMap[MIOpNo + i].Kind) {
case OpData::Operand:
- o << " lowerOperand(MI->getOperand("
+ o << " lowerOperand(MI->getOperand("
<< Source.Operands[Expansion.OperandMap[MIOpNo].Data
.Operand].MIOperandNo + i
<< "), MCOp);\n"
- << " TmpInst.addOperand(MCOp);\n";
+ << " TmpInst.addOperand(MCOp);\n";
break;
case OpData::Imm:
- o << " TmpInst.addOperand(MCOperand::createImm("
+ o << " TmpInst.addOperand(MCOperand::createImm("
<< Expansion.OperandMap[MIOpNo + i].Data.Imm << "));\n";
break;
case OpData::Reg: {
Record *Reg = Expansion.OperandMap[MIOpNo + i].Data.Reg;
- o << " TmpInst.addOperand(MCOperand::createReg(";
+ o << " TmpInst.addOperand(MCOperand::createReg(";
// "zero_reg" is special.
if (Reg->getName() == "zero_reg")
o << "0";
@@ -253,15 +275,15 @@ void PseudoLoweringEmitter::emitLoweringEmitter(raw_ostream &o) {
}
if (Dest.Operands.isVariadic) {
MIOpNo = Source.Operands.size() + 1;
- o << " // variable_ops\n";
- o << " for (unsigned i = " << MIOpNo
+ o << " // variable_ops\n";
+ o << " for (unsigned i = " << MIOpNo
<< ", e = MI->getNumOperands(); i != e; ++i)\n"
- << " if (lowerOperand(MI->getOperand(i), MCOp))\n"
- << " TmpInst.addOperand(MCOp);\n";
+ << " if (lowerOperand(MI->getOperand(i), MCOp))\n"
+ << " TmpInst.addOperand(MCOp);\n";
}
- o << " EmitToStreamer(OutStreamer, TmpInst);\n"
- << " break;\n"
- << " }\n";
+ o << " EmitToStreamer(OutStreamer, TmpInst);\n"
+ << " break;\n"
+ << " }\n";
}
o << " }\n return true;";
} else
@@ -271,24 +293,18 @@ void PseudoLoweringEmitter::emitLoweringEmitter(raw_ostream &o) {
}
void PseudoLoweringEmitter::run(raw_ostream &o) {
- Record *ExpansionClass = Records.getClass("PseudoInstExpansion");
- Record *InstructionClass = Records.getClass("Instruction");
- assert(ExpansionClass && "PseudoInstExpansion class definition missing!");
- assert(InstructionClass && "Instruction class definition missing!");
-
- std::vector<Record*> Insts;
- for (const auto &D : Records.getDefs()) {
- if (D.second->isSubClassOf(ExpansionClass) &&
- D.second->isSubClassOf(InstructionClass))
- Insts.push_back(D.second.get());
- }
+ StringRef Classes[] = {"PseudoInstExpansion", "Instruction"};
+ std::vector<Record *> Insts =
+ Records.getAllDerivedDefinitions(makeArrayRef(Classes));
// Process the pseudo expansion definitions, validating them as we do so.
+ Records.startTimer("Process definitions");
for (unsigned i = 0, e = Insts.size(); i != e; ++i)
evaluateExpansion(Insts[i]);
// Generate expansion code to lower the pseudo to an MCInst of the real
// instruction.
+ Records.startTimer("Emit expansion code");
emitLoweringEmitter(o);
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp
index f298e639bf7f..183c8f9494db 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp
@@ -37,11 +37,11 @@
// compressing/uncompressing MCInst instructions, plus
// some helper functions:
//
-// bool compressInst(MCInst& OutInst, const MCInst &MI,
+// bool compressInst(MCInst &OutInst, const MCInst &MI,
// const MCSubtargetInfo &STI,
// MCContext &Context);
//
-// bool uncompressInst(MCInst& OutInst, const MCInst &MI,
+// bool uncompressInst(MCInst &OutInst, const MCInst &MI,
// const MCRegisterInfo &MRI,
// const MCSubtargetInfo &STI);
//
@@ -101,11 +101,12 @@ class RISCVCompressInstEmitter {
IndexedMap<OpData>
DestOperandMap; // Maps operands in the Dest Instruction
// to the corresponding Source instruction operand.
+ bool IsCompressOnly;
CompressPat(CodeGenInstruction &S, CodeGenInstruction &D,
std::vector<Record *> RF, IndexedMap<OpData> &SourceMap,
- IndexedMap<OpData> &DestMap)
+ IndexedMap<OpData> &DestMap, bool IsCompressOnly)
: Source(S), Dest(D), PatReqFeatures(RF), SourceOperandMap(SourceMap),
- DestOperandMap(DestMap) {}
+ DestOperandMap(DestMap), IsCompressOnly(IsCompressOnly) {}
};
enum EmitterType { Compress, Uncompress, CheckCompress };
RecordKeeper &Records;
@@ -138,13 +139,12 @@ public:
} // End anonymous namespace.
bool RISCVCompressInstEmitter::validateRegister(Record *Reg, Record *RegClass) {
- assert(Reg->isSubClassOf("Register") && "Reg record should be a Register\n");
- assert(RegClass->isSubClassOf("RegisterClass") && "RegClass record should be"
- " a RegisterClass\n");
+ assert(Reg->isSubClassOf("Register") && "Reg record should be a Register");
+ assert(RegClass->isSubClassOf("RegisterClass") &&
+ "RegClass record should be a RegisterClass");
const CodeGenRegisterClass &RC = Target.getRegisterClass(RegClass);
const CodeGenRegister *R = Target.getRegisterByName(Reg->getName().lower());
- assert((R != nullptr) &&
- ("Register" + Reg->getName().str() + " not defined!!\n").c_str());
+ assert((R != nullptr) && "Register not defined!!");
return RC.contains(R);
}
@@ -237,9 +237,9 @@ void RISCVCompressInstEmitter::addDagOperandMapping(
if (Inst.Operands[i].Rec->isSubClassOf("RegisterClass"))
PrintFatalError(
Rec->getLoc(),
- ("Error in Dag '" + Dag->getAsString() + "' Found immediate: '" +
- II->getAsString() +
- "' but corresponding instruction operand expected a register!"));
+ "Error in Dag '" + Dag->getAsString() + "' Found immediate: '" +
+ II->getAsString() +
+ "' but corresponding instruction operand expected a register!");
// No pattern validation check possible for values of fixed immediate.
OperandMap[i].Kind = OpData::Imm;
OperandMap[i].Data.Imm = II->getValue();
@@ -285,11 +285,7 @@ static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag,
}
static bool validateArgsTypes(Init *Arg1, Init *Arg2) {
- DefInit *Type1 = dyn_cast<DefInit>(Arg1);
- DefInit *Type2 = dyn_cast<DefInit>(Arg2);
- assert(Type1 && ("Arg1 type not found\n"));
- assert(Type2 && ("Arg2 type not found\n"));
- return Type1->getDef() == Type2->getDef();
+ return cast<DefInit>(Arg1)->getDef() == cast<DefInit>(Arg2)->getDef();
}
// Creates a mapping between the operand name in the Dag (e.g. $rs1) and
@@ -471,7 +467,8 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
});
CompressPatterns.push_back(CompressPat(SourceInst, DestInst, PatReqFeatures,
- SourceOperandMap, DestOperandMap));
+ SourceOperandMap, DestOperandMap,
+ Rec->getValueAsBit("isCompressOnly")));
}
static void
@@ -514,50 +511,41 @@ getReqFeatures(std::set<std::pair<bool, StringRef>> &FeaturesSet,
static unsigned getPredicates(DenseMap<const Record *, unsigned> &PredicateMap,
std::vector<const Record *> &Predicates,
Record *Rec, StringRef Name) {
- unsigned Entry = PredicateMap[Rec];
+ unsigned &Entry = PredicateMap[Rec];
if (Entry)
return Entry;
if (!Rec->isValueUnset(Name)) {
Predicates.push_back(Rec);
Entry = Predicates.size();
- PredicateMap[Rec] = Entry;
return Entry;
}
PrintFatalError(Rec->getLoc(), "No " + Name +
- " predicate on this operand at all: '" + Rec->getName().str() + "'");
+ " predicate on this operand at all: '" +
+ Rec->getName() + "'");
return 0;
}
-static void printPredicates(std::vector<const Record *> &Predicates,
+static void printPredicates(const std::vector<const Record *> &Predicates,
StringRef Name, raw_ostream &o) {
for (unsigned i = 0; i < Predicates.size(); ++i) {
- Init *Pred = Predicates[i]->getValueInit(Name);
- if (CodeInit *SI = dyn_cast<CodeInit>(Pred))
- o << " case " << i + 1 << ": {\n"
- << " // " << Predicates[i]->getName().str() << "\n"
- << " " << SI->getValue() << "\n"
- << " }\n";
- else
- llvm_unreachable("Unexpected predicate field!");
+ StringRef Pred = Predicates[i]->getValueAsString(Name);
+ o << " case " << i + 1 << ": {\n"
+ << " // " << Predicates[i]->getName() << "\n"
+ << " " << Pred << "\n"
+ << " }\n";
}
}
-static std::string mergeCondAndCode(raw_string_ostream &CondStream,
- raw_string_ostream &CodeStream) {
- std::string S;
- raw_string_ostream CombinedStream(S);
- CombinedStream.indent(4)
- << "if ("
- << CondStream.str().substr(
- 6, CondStream.str().length() -
- 10) // remove first indentation and last '&&'.
- << ") {\n";
- CombinedStream << CodeStream.str();
+static void mergeCondAndCode(raw_ostream &CombinedStream, StringRef CondStr,
+ StringRef CodeStr) {
+ // Remove first indentation and last '&&'.
+ CondStr = CondStr.drop_front(6).drop_back(4);
+ CombinedStream.indent(4) << "if (" << CondStr << ") {\n";
+ CombinedStream << CodeStr;
CombinedStream.indent(4) << " return true;\n";
CombinedStream.indent(4) << "} // if\n";
- return CombinedStream.str();
}
void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
@@ -568,23 +556,20 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
"'PassSubtarget' is false. SubTargetInfo object is needed "
"for target features.\n");
- std::string Namespace = std::string(Target.getName());
+ StringRef Namespace = Target.getName();
// Sort entries in CompressPatterns to handle instructions that can have more
// than one candidate for compression\uncompression, e.g ADD can be
// transformed to a C_ADD or a C_MV. When emitting 'uncompress()' function the
// source and destination are flipped and the sort key needs to change
// accordingly.
- llvm::stable_sort(CompressPatterns,
- [EType](const CompressPat &LHS, const CompressPat &RHS) {
- if (EType == EmitterType::Compress ||
- EType == EmitterType::CheckCompress)
- return (LHS.Source.TheDef->getName().str() <
- RHS.Source.TheDef->getName().str());
- else
- return (LHS.Dest.TheDef->getName().str() <
- RHS.Dest.TheDef->getName().str());
- });
+ llvm::stable_sort(CompressPatterns, [EType](const CompressPat &LHS,
+ const CompressPat &RHS) {
+ if (EType == EmitterType::Compress || EType == EmitterType::CheckCompress)
+ return (LHS.Source.TheDef->getName() < RHS.Source.TheDef->getName());
+ else
+ return (LHS.Dest.TheDef->getName() < RHS.Dest.TheDef->getName());
+ });
// A list of MCOperandPredicates for all operands in use, and the reverse map.
std::vector<const Record *> MCOpPredicates;
@@ -610,17 +595,17 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
<< "#undef GEN_CHECK_COMPRESS_INSTR\n\n";
if (EType == EmitterType::Compress) {
- FuncH << "static bool compressInst(MCInst& OutInst,\n";
+ FuncH << "static bool compressInst(MCInst &OutInst,\n";
FuncH.indent(25) << "const MCInst &MI,\n";
FuncH.indent(25) << "const MCSubtargetInfo &STI,\n";
FuncH.indent(25) << "MCContext &Context) {\n";
} else if (EType == EmitterType::Uncompress){
- FuncH << "static bool uncompressInst(MCInst& OutInst,\n";
+ FuncH << "static bool uncompressInst(MCInst &OutInst,\n";
FuncH.indent(27) << "const MCInst &MI,\n";
FuncH.indent(27) << "const MCRegisterInfo &MRI,\n";
FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n";
} else if (EType == EmitterType::CheckCompress) {
- FuncH << "static bool isCompressibleInst(const MachineInstr& MI,\n";
+ FuncH << "static bool isCompressibleInst(const MachineInstr &MI,\n";
FuncH.indent(27) << "const RISCVSubtarget *Subtarget,\n";
FuncH.indent(27) << "const MCRegisterInfo &MRI,\n";
FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n";
@@ -638,10 +623,10 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
return;
}
- std::string CaseString("");
+ std::string CaseString;
raw_string_ostream CaseStream(CaseString);
- std::string PrevOp("");
- std::string CurOp("");
+ StringRef PrevOp;
+ StringRef CurOp;
CaseStream << " switch (MI.getOpcode()) {\n";
CaseStream << " default: return false;\n";
@@ -651,6 +636,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
EType == EmitterType::Compress || EType == EmitterType::Uncompress;
for (auto &CompressPat : CompressPatterns) {
+ if (EType == EmitterType::Uncompress && CompressPat.IsCompressOnly)
+ continue;
+
std::string CondString;
std::string CodeString;
raw_string_ostream CondStream(CondString);
@@ -664,10 +652,10 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
IndexedMap<OpData> &DestOperandMap = CompressOrCheck ?
CompressPat.DestOperandMap : CompressPat.SourceOperandMap;
- CurOp = Source.TheDef->getName().str();
+ CurOp = Source.TheDef->getName();
// Check current and previous opcode to decide to continue or end a case.
if (CurOp != PrevOp) {
- if (PrevOp != "")
+ if (!PrevOp.empty())
CaseStream.indent(6) << "break;\n } // case " + PrevOp + "\n";
CaseStream.indent(4) << "case " + Namespace + "::" + CurOp + ": {\n";
}
@@ -688,9 +676,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// Emit checks for all required features.
for (auto &Op : FeaturesSet) {
StringRef Not = Op.first ? "!" : "";
- CondStream.indent(6)
- << Not << ("STI.getFeatureBits()[" + Namespace + "::" + Op.second + "]").str() +
- " &&\n";
+ CondStream.indent(6) << Not << "STI.getFeatureBits()[" << Namespace
+ << "::" << Op.second << "]"
+ << " &&\n";
}
// Emit checks for all required feature groups.
@@ -699,8 +687,8 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
for (auto &Op : Set) {
bool isLast = &Op == &*Set.rbegin();
StringRef Not = Op.first ? "!" : "";
- CondStream << Not << ("STI.getFeatureBits()[" + Namespace + "::" + Op.second +
- "]").str();
+ CondStream << Not << "STI.getFeatureBits()[" << Namespace
+ << "::" << Op.second << "]";
if (!isLast)
CondStream << " || ";
}
@@ -713,10 +701,8 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
if (SourceOperandMap[OpNo].TiedOpIdx != -1) {
if (Source.Operands[OpNo].Rec->isSubClassOf("RegisterClass"))
CondStream.indent(6)
- << "(MI.getOperand("
- << std::to_string(OpNo) + ").getReg() == MI.getOperand("
- << std::to_string(SourceOperandMap[OpNo].TiedOpIdx)
- << ").getReg()) &&\n";
+ << "(MI.getOperand(" << OpNo << ").getReg() == MI.getOperand("
+ << SourceOperandMap[OpNo].TiedOpIdx << ").getReg()) &&\n";
else
PrintFatalError("Unexpected tied operand types!\n");
}
@@ -727,27 +713,26 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
break;
case OpData::Imm:
CondStream.indent(6)
- << "(MI.getOperand(" + std::to_string(OpNo) + ").isImm()) &&\n" +
- " (MI.getOperand(" + std::to_string(OpNo) +
- ").getImm() == " +
- std::to_string(SourceOperandMap[OpNo].Data.Imm) + ") &&\n";
+ << "(MI.getOperand(" << OpNo << ").isImm()) &&\n"
+ << " (MI.getOperand(" << OpNo
+ << ").getImm() == " << SourceOperandMap[OpNo].Data.Imm << ") &&\n";
break;
case OpData::Reg: {
Record *Reg = SourceOperandMap[OpNo].Data.Reg;
- CondStream.indent(6) << "(MI.getOperand(" + std::to_string(OpNo) +
- ").getReg() == " + Namespace +
- "::" + Reg->getName().str() + ") &&\n";
+ CondStream.indent(6)
+ << "(MI.getOperand(" << OpNo << ").getReg() == " << Namespace
+ << "::" << Reg->getName() << ") &&\n";
break;
}
}
}
- CodeStream.indent(6) << "// " + Dest.AsmString + "\n";
+ CodeStream.indent(6) << "// " << Dest.AsmString << "\n";
if (CompressOrUncompress)
- CodeStream.indent(6) << "OutInst.setOpcode(" + Namespace +
- "::" + Dest.TheDef->getName().str() + ");\n";
+ CodeStream.indent(6) << "OutInst.setOpcode(" << Namespace
+ << "::" << Dest.TheDef->getName() << ");\n";
OpNo = 0;
for (const auto &DestOperand : Dest.Operands) {
- CodeStream.indent(6) << "// Operand: " + DestOperand.Name + "\n";
+ CodeStream.indent(6) << "// Operand: " << DestOperand.Name << "\n";
switch (DestOperandMap[OpNo].Kind) {
case OpData::Operand: {
unsigned OpIdx = DestOperandMap[OpNo].Data.Operand;
@@ -759,68 +744,67 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// Don't check register class if this is a tied operand, it was done
// for the operand its tied to.
if (DestOperand.getTiedRegister() == -1)
- CondStream.indent(6)
- << "(MRI.getRegClass(" + Namespace +
- "::" + DestOperand.Rec->getName().str() +
- "RegClassID).contains(" + "MI.getOperand(" +
- std::to_string(OpIdx) + ").getReg())) &&\n";
+ CondStream.indent(6) << "(MRI.getRegClass(" << Namespace
+ << "::" << DestOperand.Rec->getName()
+ << "RegClassID).contains(MI.getOperand("
+ << OpIdx << ").getReg())) &&\n";
if (CompressOrUncompress)
- CodeStream.indent(6) << "OutInst.addOperand(MI.getOperand(" +
- std::to_string(OpIdx) + "));\n";
+ CodeStream.indent(6)
+ << "OutInst.addOperand(MI.getOperand(" << OpIdx << "));\n";
} else {
// Handling immediate operands.
if (CompressOrUncompress) {
- unsigned Entry = getPredicates(MCOpPredicateMap, MCOpPredicates,
- DestOperand.Rec, StringRef("MCOperandPredicate"));
- CondStream.indent(6) << Namespace + "ValidateMCOperand(" +
- "MI.getOperand(" + std::to_string(OpIdx) +
- "), STI, " + std::to_string(Entry) +
- ") &&\n";
+ unsigned Entry =
+ getPredicates(MCOpPredicateMap, MCOpPredicates, DestOperand.Rec,
+ "MCOperandPredicate");
+ CondStream.indent(6)
+ << Namespace << "ValidateMCOperand("
+ << "MI.getOperand(" << OpIdx << "), STI, " << Entry << ") &&\n";
} else {
- unsigned Entry = getPredicates(ImmLeafPredicateMap, ImmLeafPredicates,
- DestOperand.Rec, StringRef("ImmediateCode"));
- CondStream.indent(6) << "MI.getOperand(" + std::to_string(OpIdx) +
- ").isImm() && \n";
- CondStream.indent(6) << Namespace + "ValidateMachineOperand(" +
- "MI.getOperand(" + std::to_string(OpIdx) +
- "), Subtarget, " + std::to_string(Entry) +
- ") &&\n";
+ unsigned Entry =
+ getPredicates(ImmLeafPredicateMap, ImmLeafPredicates,
+ DestOperand.Rec, "ImmediateCode");
+ CondStream.indent(6)
+ << "MI.getOperand(" << OpIdx << ").isImm() &&\n";
+ CondStream.indent(6) << Namespace << "ValidateMachineOperand("
+ << "MI.getOperand(" << OpIdx
+ << "), Subtarget, " << Entry << ") &&\n";
}
if (CompressOrUncompress)
- CodeStream.indent(6) << "OutInst.addOperand(MI.getOperand(" +
- std::to_string(OpIdx) + "));\n";
+ CodeStream.indent(6)
+ << "OutInst.addOperand(MI.getOperand(" << OpIdx << "));\n";
}
break;
}
case OpData::Imm: {
if (CompressOrUncompress) {
unsigned Entry = getPredicates(MCOpPredicateMap, MCOpPredicates,
- DestOperand.Rec, StringRef("MCOperandPredicate"));
+ DestOperand.Rec, "MCOperandPredicate");
CondStream.indent(6)
- << Namespace + "ValidateMCOperand(" + "MCOperand::createImm(" +
- std::to_string(DestOperandMap[OpNo].Data.Imm) + "), STI, " +
- std::to_string(Entry) + ") &&\n";
+ << Namespace << "ValidateMCOperand("
+ << "MCOperand::createImm(" << DestOperandMap[OpNo].Data.Imm
+ << "), STI, " << Entry << ") &&\n";
} else {
unsigned Entry = getPredicates(ImmLeafPredicateMap, ImmLeafPredicates,
- DestOperand.Rec, StringRef("ImmediateCode"));
+ DestOperand.Rec, "ImmediateCode");
CondStream.indent(6)
- << Namespace + "ValidateMachineOperand(" + "MachineOperand::CreateImm(" +
- std::to_string(DestOperandMap[OpNo].Data.Imm) + "), SubTarget, " +
- std::to_string(Entry) + ") &&\n";
+ << Namespace
+ << "ValidateMachineOperand(MachineOperand::CreateImm("
+ << DestOperandMap[OpNo].Data.Imm << "), SubTarget, " << Entry
+ << ") &&\n";
}
if (CompressOrUncompress)
- CodeStream.indent(6)
- << "OutInst.addOperand(MCOperand::createImm(" +
- std::to_string(DestOperandMap[OpNo].Data.Imm) + "));\n";
+ CodeStream.indent(6) << "OutInst.addOperand(MCOperand::createImm("
+ << DestOperandMap[OpNo].Data.Imm << "));\n";
} break;
case OpData::Reg: {
if (CompressOrUncompress) {
// Fixed register has been validated at pattern validation time.
Record *Reg = DestOperandMap[OpNo].Data.Reg;
- CodeStream.indent(6) << "OutInst.addOperand(MCOperand::createReg(" +
- Namespace + "::" + Reg->getName().str() +
- "));\n";
+ CodeStream.indent(6)
+ << "OutInst.addOperand(MCOperand::createReg(" << Namespace
+ << "::" << Reg->getName() << "));\n";
}
} break;
}
@@ -828,12 +812,12 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
}
if (CompressOrUncompress)
CodeStream.indent(6) << "OutInst.setLoc(MI.getLoc());\n";
- CaseStream << mergeCondAndCode(CondStream, CodeStream);
+ mergeCondAndCode(CaseStream, CondStream.str(), CodeStream.str());
PrevOp = CurOp;
}
Func << CaseStream.str() << "\n";
// Close brace for the last case.
- Func.indent(4) << "} // case " + CurOp + "\n";
+ Func.indent(4) << "} // case " << CurOp << "\n";
Func.indent(2) << "} // switch\n";
Func.indent(2) << "return false;\n}\n";
@@ -858,7 +842,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
<< "ValidateMachineOperand(const MachineOperand &MO,\n"
<< " const RISCVSubtarget *Subtarget,\n"
<< " unsigned PredicateIndex) {\n"
- << " int64_t Imm = MO.getImm(); \n"
+ << " int64_t Imm = MO.getImm();\n"
<< " switch (PredicateIndex) {\n"
<< " default:\n"
<< " llvm_unreachable(\"Unknown ImmLeaf Predicate kind\");\n"
@@ -884,13 +868,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
}
void RISCVCompressInstEmitter::run(raw_ostream &o) {
- Record *CompressClass = Records.getClass("CompressPat");
- assert(CompressClass && "Compress class definition missing!");
- std::vector<Record *> Insts;
- for (const auto &D : Records.getDefs()) {
- if (D.second->isSubClassOf(CompressClass))
- Insts.push_back(D.second.get());
- }
+ std::vector<Record *> Insts = Records.getAllDerivedDefinitions("CompressPat");
// Process the CompressPat definitions, validating them as we do so.
for (unsigned i = 0, e = Insts.size(); i != e; ++i)
diff --git a/contrib/llvm-project/llvm/utils/TableGen/RegisterBankEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/RegisterBankEmitter.cpp
index 586f857b1fb0..0725657150f8 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/RegisterBankEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/RegisterBankEmitter.cpp
@@ -71,10 +71,7 @@ public:
/// Add a register class to the bank without duplicates.
void addRegisterClass(const CodeGenRegisterClass *RC) {
- if (std::find_if(RCs.begin(), RCs.end(),
- [&RC](const CodeGenRegisterClass *X) {
- return X == RC;
- }) != RCs.end())
+ if (llvm::is_contained(RCs, RC))
return;
// FIXME? We really want the register size rather than the spill size
@@ -131,9 +128,12 @@ void RegisterBankEmitter::emitHeader(raw_ostream &OS,
// <Target>RegisterBankInfo.h
OS << "namespace llvm {\n"
<< "namespace " << TargetName << " {\n"
- << "enum {\n";
+ << "enum : unsigned {\n";
+
+ OS << " InvalidRegBankID = ~0u,\n";
+ unsigned ID = 0;
for (const auto &Bank : Banks)
- OS << " " << Bank.getEnumeratorName() << ",\n";
+ OS << " " << Bank.getEnumeratorName() << " = " << ID++ << ",\n";
OS << " NumRegisterBanks,\n"
<< "};\n"
<< "} // end namespace " << TargetName << "\n"
@@ -168,7 +168,7 @@ void RegisterBankEmitter::emitBaseClassDefinition(
/// to the class.
static void visitRegisterBankClasses(
const CodeGenRegBank &RegisterClassHierarchy,
- const CodeGenRegisterClass *RC, const Twine Kind,
+ const CodeGenRegisterClass *RC, const Twine &Kind,
std::function<void(const CodeGenRegisterClass *, StringRef)> VisitFn,
SmallPtrSetImpl<const CodeGenRegisterClass *> &VisitedRCs) {
@@ -182,7 +182,7 @@ static void visitRegisterBankClasses(
for (const auto &PossibleSubclass : RegisterClassHierarchy.getRegClasses()) {
std::string TmpKind =
- (Twine(Kind) + " (" + PossibleSubclass.getName() + ")").str();
+ (Kind + " (" + PossibleSubclass.getName() + ")").str();
// Visit each subclass of an explicitly named class.
if (RC != &PossibleSubclass && RC->hasSubClass(&PossibleSubclass))
@@ -279,6 +279,7 @@ void RegisterBankEmitter::run(raw_ostream &OS) {
StringRef TargetName = Target.getName();
const CodeGenRegBank &RegisterClassHierarchy = Target.getRegBank();
+ Records.startTimer("Analyze records");
std::vector<RegisterBank> Banks;
for (const auto &V : Records.getAllDerivedDefinitions("RegisterBank")) {
SmallPtrSet<const CodeGenRegisterClass *, 8> VisitedRCs;
@@ -300,6 +301,7 @@ void RegisterBankEmitter::run(raw_ostream &OS) {
}
// Warn about ambiguous MIR caused by register bank/class name clashes.
+ Records.startTimer("Warn ambiguous");
for (const auto &Class : RegisterClassHierarchy.getRegClasses()) {
for (const auto &Bank : Banks) {
if (Bank.getName().lower() == StringRef(Class.getName()).lower()) {
@@ -312,6 +314,7 @@ void RegisterBankEmitter::run(raw_ostream &OS) {
}
}
+ Records.startTimer("Emit output");
emitSourceFileHeader("Register Bank Source Fragments", OS);
OS << "#ifdef GET_REGBANK_DECLARATIONS\n"
<< "#undef GET_REGBANK_DECLARATIONS\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index a615587efdee..dce7594dec2f 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -127,7 +127,7 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS,
OS << " " << Reg.getName() << " = " << Reg.EnumValue << ",\n";
assert(Registers.size() == Registers.back().EnumValue &&
"Register enum value mismatch!");
- OS << " NUM_TARGET_REGS \t// " << Registers.size()+1 << "\n";
+ OS << " NUM_TARGET_REGS // " << Registers.size()+1 << "\n";
OS << "};\n";
if (!Namespace.empty())
OS << "} // end namespace " << Namespace << "\n";
@@ -146,7 +146,7 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS,
for (const auto &RC : RegisterClasses)
OS << " " << RC.getName() << "RegClassID"
<< " = " << RC.EnumValue << ",\n";
- OS << "\n };\n";
+ OS << "\n};\n";
if (!Namespace.empty())
OS << "} // end namespace " << Namespace << "\n\n";
}
@@ -323,7 +323,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
OS << "/// Get the dimensions of register pressure impacted by this "
<< "register class.\n"
<< "/// Returns a -1 terminated array of pressure set IDs\n"
- << "const int* " << ClassName << "::\n"
+ << "const int *" << ClassName << "::\n"
<< "getRegClassPressureSets(const TargetRegisterClass *RC) const {\n";
OS << " static const " << getMinimalTypeForRange(PSetsSeqs.size() - 1, 32)
<< " RCSetStartTable[] = {\n ";
@@ -337,7 +337,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
OS << "/// Get the dimensions of register pressure impacted by this "
<< "register unit.\n"
<< "/// Returns a -1 terminated array of pressure set IDs\n"
- << "const int* " << ClassName << "::\n"
+ << "const int *" << ClassName << "::\n"
<< "getRegUnitPressureSets(unsigned RegUnit) const {\n"
<< " assert(RegUnit < " << RegBank.getNumNativeRegUnits()
<< " && \"invalid register unit\");\n";
@@ -356,11 +356,10 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
using DwarfRegNumsMapPair = std::pair<Record*, std::vector<int64_t>>;
using DwarfRegNumsVecTy = std::vector<DwarfRegNumsMapPair>;
-void finalizeDwarfRegNumsKeys(DwarfRegNumsVecTy &DwarfRegNums) {
+static void finalizeDwarfRegNumsKeys(DwarfRegNumsVecTy &DwarfRegNums) {
// Sort and unique to get a map-like vector. We want the last assignment to
// match previous behaviour.
- std::stable_sort(DwarfRegNums.begin(), DwarfRegNums.end(),
- on_first<LessRecordRegister>());
+ llvm::stable_sort(DwarfRegNums, on_first<LessRecordRegister>());
// Warn about duplicate assignments.
const Record *LastSeenReg = nullptr;
for (const auto &X : DwarfRegNums) {
@@ -462,18 +461,16 @@ void RegisterInfoEmitter::EmitRegMappingTables(
DefInit *DI = cast<DefInit>(V->getValue());
Record *Alias = DI->getDef();
- const auto &AliasIter =
- std::lower_bound(DwarfRegNums.begin(), DwarfRegNums.end(), Alias,
- [](const DwarfRegNumsMapPair &A, const Record *B) {
- return LessRecordRegister()(A.first, B);
- });
+ const auto &AliasIter = llvm::lower_bound(
+ DwarfRegNums, Alias, [](const DwarfRegNumsMapPair &A, const Record *B) {
+ return LessRecordRegister()(A.first, B);
+ });
assert(AliasIter != DwarfRegNums.end() && AliasIter->first == Alias &&
"Expected Alias to be present in map");
- const auto &RegIter =
- std::lower_bound(DwarfRegNums.begin(), DwarfRegNums.end(), Reg,
- [](const DwarfRegNumsMapPair &A, const Record *B) {
- return LessRecordRegister()(A.first, B);
- });
+ const auto &RegIter = llvm::lower_bound(
+ DwarfRegNums, Reg, [](const DwarfRegNumsMapPair &A, const Record *B) {
+ return LessRecordRegister()(A.first, B);
+ });
assert(RegIter != DwarfRegNums.end() && RegIter->first == Reg &&
"Expected Reg to be present in map");
RegIter->second = AliasIter->second;
@@ -960,7 +957,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
const auto &RUMasks = Reg.getRegUnitLaneMasks();
MaskVec &LaneMaskVec = RegUnitLaneMasks[i];
assert(LaneMaskVec.empty());
- LaneMaskVec.insert(LaneMaskVec.begin(), RUMasks.begin(), RUMasks.end());
+ llvm::append_range(LaneMaskVec, RUMasks);
// Terminator mask should not be used inside of the list.
#ifndef NDEBUG
for (LaneBitmask M : LaneMaskVec) {
@@ -1167,7 +1164,7 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
<< " LaneBitmask reverseComposeSubRegIndexLaneMaskImpl"
<< "(unsigned, LaneBitmask) const override;\n"
<< " const TargetRegisterClass *getSubClassWithSubReg"
- << "(const TargetRegisterClass*, unsigned) const override;\n";
+ << "(const TargetRegisterClass *, unsigned) const override;\n";
}
OS << " const RegClassWeight &getRegClassWeight("
<< "const TargetRegisterClass *RC) const override;\n"
@@ -1288,7 +1285,8 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
OS << CGH.getMode(M).Name;
OS << ")\n";
for (const auto &RC : RegisterClasses) {
- assert(RC.EnumValue == EV++ && "Unexpected order of register classes");
+ assert(RC.EnumValue == EV && "Unexpected order of register classes");
+ ++EV;
(void)EV;
const RegSizeInfo &RI = RC.RSI.get(M);
OS << " { " << RI.RegSize << ", " << RI.SpillSize << ", "
@@ -1435,7 +1433,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
}
OS << "\nnamespace {\n";
- OS << " const TargetRegisterClass* const RegisterClasses[] = {\n";
+ OS << " const TargetRegisterClass *const RegisterClasses[] = {\n";
for (const auto &RC : RegisterClasses)
OS << " &" << RC.getQualifiedName() << "RegClass,\n";
OS << " };\n";
@@ -1615,9 +1613,16 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
void RegisterInfoEmitter::run(raw_ostream &OS) {
CodeGenRegBank &RegBank = Target.getRegBank();
+ Records.startTimer("Print enums");
runEnums(OS, Target, RegBank);
+
+ Records.startTimer("Print MC registers");
runMCDesc(OS, Target, RegBank);
+
+ Records.startTimer("Print header fragment");
runTargetHeader(OS, Target, RegBank);
+
+ Records.startTimer("Print target registers");
runTargetDesc(OS, Target, RegBank);
if (RegisterInfoDebug)
diff --git a/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp
index 326cb4e54edc..912d43b2caa1 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -12,6 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenIntrinsics.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Format.h"
@@ -19,7 +21,6 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include "CodeGenIntrinsics.h"
#include <algorithm>
#include <set>
#include <string>
@@ -53,6 +54,7 @@ struct GenericEnum {
struct GenericField {
std::string Name;
RecTy *RecType = nullptr;
+ bool IsCode = false;
bool IsIntrinsic = false;
bool IsInstruction = false;
GenericEnum *Enum = nullptr;
@@ -62,12 +64,14 @@ struct GenericField {
struct SearchIndex {
std::string Name;
+ SMLoc Loc; // Source location of PrimaryKey or Key field definition.
SmallVector<GenericField, 1> Fields;
bool EarlyOut = false;
};
struct GenericTable {
std::string Name;
+ ArrayRef<SMLoc> Locs; // Source locations from the Record instance.
std::string PreprocessorGuard;
std::string CppTypeName;
SmallVector<GenericField, 2> Fields;
@@ -106,15 +110,17 @@ private:
TypeInArgument,
};
- std::string primaryRepresentation(const GenericField &Field, Init *I) {
- if (StringInit *SI = dyn_cast<StringInit>(I))
- return SI->getAsString();
- else if (BitsInit *BI = dyn_cast<BitsInit>(I))
+ std::string primaryRepresentation(SMLoc Loc, const GenericField &Field,
+ Init *I) {
+ if (StringInit *SI = dyn_cast<StringInit>(I)) {
+ if (Field.IsCode || SI->hasCodeFormat())
+ return std::string(SI->getValue());
+ else
+ return SI->getAsString();
+ } else if (BitsInit *BI = dyn_cast<BitsInit>(I))
return "0x" + utohexstr(getAsInt(BI));
else if (BitInit *BI = dyn_cast<BitInit>(I))
return BI->getValue() ? "true" : "false";
- else if (CodeInit *CI = dyn_cast<CodeInit>(I))
- return std::string(CI->getValue());
else if (Field.IsIntrinsic)
return "Intrinsic::" + getIntrinsic(I).EnumName;
else if (Field.IsInstruction)
@@ -122,11 +128,12 @@ private:
else if (Field.Enum) {
auto *Entry = Field.Enum->EntryMap[cast<DefInit>(I)->getDef()];
if (!Entry)
- PrintFatalError(Twine("Entry for field '") + Field.Name + "' is null");
+ PrintFatalError(Loc,
+ Twine("Entry for field '") + Field.Name + "' is null");
return std::string(Entry->first);
}
- PrintFatalError(Twine("invalid field type for field '") + Field.Name +
- "', expected: string, bits, bit or code");
+ PrintFatalError(Loc, Twine("invalid field type for field '") + Field.Name +
+ "'; expected: bit, bits, string, or code");
}
bool isIntrinsic(Init *I) {
@@ -138,17 +145,16 @@ private:
CodeGenIntrinsic &getIntrinsic(Init *I) {
std::unique_ptr<CodeGenIntrinsic> &Intr = Intrinsics[I];
if (!Intr)
- Intr = std::make_unique<CodeGenIntrinsic>(cast<DefInit>(I)->getDef());
+ Intr = std::make_unique<CodeGenIntrinsic>(cast<DefInit>(I)->getDef(),
+ std::vector<Record *>());
return *Intr;
}
bool compareBy(Record *LHS, Record *RHS, const SearchIndex &Index);
- bool isIntegral(Init *I) {
- return isa<BitsInit>(I) || isa<CodeInit>(I) || isIntrinsic(I);
- }
-
- std::string searchableFieldType(const GenericField &Field, TypeContext Ctx) {
+ std::string searchableFieldType(const GenericTable &Table,
+ const SearchIndex &Index,
+ const GenericField &Field, TypeContext Ctx) {
if (isa<StringRecTy>(Field.RecType)) {
if (Ctx == TypeInStaticStruct)
return "const char *";
@@ -165,12 +171,16 @@ private:
return "uint32_t";
if (NumBits <= 64)
return "uint64_t";
- PrintFatalError(Twine("bitfield '") + Field.Name +
- "' too large to search");
+ PrintFatalError(Index.Loc, Twine("In table '") + Table.Name +
+ "' lookup method '" + Index.Name +
+ "', key field '" + Field.Name +
+ "' of type bits is too large");
} else if (Field.Enum || Field.IsIntrinsic || Field.IsInstruction)
return "unsigned";
- PrintFatalError(Twine("Field '") + Field.Name + "' has unknown type '" +
- Field.RecType->getAsString() + "' to search by");
+ PrintFatalError(Index.Loc,
+ Twine("In table '") + Table.Name + "' lookup method '" +
+ Index.Name + "', key field '" + Field.Name +
+ "' has invalid type: " + Field.RecType->getAsString());
}
void emitGenericTable(const GenericTable &Table, raw_ostream &OS);
@@ -183,7 +193,7 @@ private:
bool parseFieldType(GenericField &Field, Init *II);
std::unique_ptr<SearchIndex>
- parseSearchIndex(GenericTable &Table, StringRef Name,
+ parseSearchIndex(GenericTable &Table, const RecordVal *RecVal, StringRef Name,
const std::vector<StringRef> &Key, bool EarlyOut);
void collectEnumEntries(GenericEnum &Enum, StringRef NameField,
StringRef ValueField,
@@ -258,8 +268,8 @@ bool SearchableTableEmitter::compareBy(Record *LHS, Record *RHS,
if (LHSv > RHSv)
return false;
} else {
- std::string LHSs = primaryRepresentation(Field, LHSI);
- std::string RHSs = primaryRepresentation(Field, RHSI);
+ std::string LHSs = primaryRepresentation(Index.Loc, Field, LHSI);
+ std::string RHSs = primaryRepresentation(Index.Loc, Field, RHSI);
if (isa<StringRecTy>(Field.RecType)) {
LHSs = StringRef(LHSs).upper();
@@ -314,7 +324,8 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
} else {
OS << " struct IndexType {\n";
for (const auto &Field : Index.Fields) {
- OS << " " << searchableFieldType(Field, TypeInStaticStruct) << " "
+ OS << " "
+ << searchableFieldType(Table, Index, Field, TypeInStaticStruct) << " "
<< Field.Name << ";\n";
}
OS << " unsigned _index;\n";
@@ -327,11 +338,10 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
for (unsigned i = 0; i < Table.Entries.size(); ++i)
Entries.emplace_back(Table.Entries[i], i);
- std::stable_sort(Entries.begin(), Entries.end(),
- [&](const std::pair<Record *, unsigned> &LHS,
- const std::pair<Record *, unsigned> &RHS) {
- return compareBy(LHS.first, RHS.first, Index);
- });
+ llvm::stable_sort(Entries, [&](const std::pair<Record *, unsigned> &LHS,
+ const std::pair<Record *, unsigned> &RHS) {
+ return compareBy(LHS.first, RHS.first, Index);
+ });
IndexRowsStorage.reserve(Entries.size());
for (const auto &Entry : Entries) {
@@ -344,8 +354,8 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
OS << ", ";
NeedComma = true;
- std::string Repr =
- primaryRepresentation(Field, Entry.first->getValueInit(Field.Name));
+ std::string Repr = primaryRepresentation(
+ Index.Loc, Field, Entry.first->getValueInit(Field.Name));
if (isa<StringRecTy>(Field.RecType))
Repr = StringRef(Repr).upper();
OS << Repr;
@@ -388,10 +398,10 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
if (Index.EarlyOut) {
const GenericField &Field = Index.Fields[0];
- std::string FirstRepr =
- primaryRepresentation(Field, IndexRows[0]->getValueInit(Field.Name));
+ std::string FirstRepr = primaryRepresentation(
+ Index.Loc, Field, IndexRows[0]->getValueInit(Field.Name));
std::string LastRepr = primaryRepresentation(
- Field, IndexRows.back()->getValueInit(Field.Name));
+ Index.Loc, Field, IndexRows.back()->getValueInit(Field.Name));
OS << " if ((" << Field.Name << " < " << FirstRepr << ") ||\n";
OS << " (" << Field.Name << " > " << LastRepr << "))\n";
OS << " return nullptr;\n\n";
@@ -399,11 +409,11 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
OS << " struct KeyType {\n";
for (const auto &Field : Index.Fields) {
- OS << " " << searchableFieldType(Field, TypeInTempStruct) << " "
- << Field.Name << ";\n";
+ OS << " " << searchableFieldType(Table, Index, Field, TypeInTempStruct)
+ << " " << Field.Name << ";\n";
}
OS << " };\n";
- OS << " KeyType Key = { ";
+ OS << " KeyType Key = {";
bool NeedComma = false;
for (const auto &Field : Index.Fields) {
if (NeedComma)
@@ -414,12 +424,14 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
if (isa<StringRecTy>(Field.RecType)) {
OS << ".upper()";
if (IsPrimary)
- PrintFatalError(Twine("Use a secondary index for case-insensitive "
- "comparison of field '") +
- Field.Name + "' in table '" + Table.Name + "'");
+ PrintFatalError(Index.Loc,
+ Twine("In table '") + Table.Name +
+ "', use a secondary lookup method for "
+ "case-insensitive comparison of field '" +
+ Field.Name + "'");
}
}
- OS << " };\n";
+ OS << "};\n";
OS << " auto Table = makeArrayRef(" << IndexName << ");\n";
OS << " auto Idx = std::lower_bound(Table.begin(), Table.end(), Key,\n";
@@ -476,7 +488,8 @@ void SearchableTableEmitter::emitLookupDeclaration(const GenericTable &Table,
OS << ", ";
NeedComma = true;
- OS << searchableFieldType(Field, TypeInArgument) << " " << Field.Name;
+ OS << searchableFieldType(Table, Index, Field, TypeInArgument) << " "
+ << Field.Name;
}
OS << ")";
}
@@ -511,7 +524,8 @@ void SearchableTableEmitter::emitGenericTable(const GenericTable &Table,
OS << ", ";
NeedComma = true;
- OS << primaryRepresentation(Field, Entry->getValueInit(Field.Name));
+ OS << primaryRepresentation(Table.Locs[0], Field,
+ Entry->getValueInit(Field.Name));
}
OS << " }, // " << i << "\n";
@@ -528,40 +542,49 @@ void SearchableTableEmitter::emitGenericTable(const GenericTable &Table,
OS << "#endif\n\n";
}
-bool SearchableTableEmitter::parseFieldType(GenericField &Field, Init *II) {
- if (auto DI = dyn_cast<DefInit>(II)) {
- Record *TypeRec = DI->getDef();
- if (TypeRec->isSubClassOf("GenericEnum")) {
- Field.Enum = EnumMap[TypeRec];
- Field.RecType = RecordRecTy::get(Field.Enum->Class);
+bool SearchableTableEmitter::parseFieldType(GenericField &Field, Init *TypeOf) {
+ if (auto Type = dyn_cast<StringInit>(TypeOf)) {
+ if (Type->getValue() == "code") {
+ Field.IsCode = true;
return true;
+ } else {
+ if (Record *TypeRec = Records.getDef(Type->getValue())) {
+ if (TypeRec->isSubClassOf("GenericEnum")) {
+ Field.Enum = EnumMap[TypeRec];
+ Field.RecType = RecordRecTy::get(Field.Enum->Class);
+ return true;
+ }
+ }
}
}
return false;
}
-std::unique_ptr<SearchIndex>
-SearchableTableEmitter::parseSearchIndex(GenericTable &Table, StringRef Name,
- const std::vector<StringRef> &Key,
- bool EarlyOut) {
+std::unique_ptr<SearchIndex> SearchableTableEmitter::parseSearchIndex(
+ GenericTable &Table, const RecordVal *KeyRecVal, StringRef Name,
+ const std::vector<StringRef> &Key, bool EarlyOut) {
auto Index = std::make_unique<SearchIndex>();
Index->Name = std::string(Name);
+ Index->Loc = KeyRecVal->getLoc();
Index->EarlyOut = EarlyOut;
for (const auto &FieldName : Key) {
const GenericField *Field = Table.getFieldByName(FieldName);
if (!Field)
- PrintFatalError(Twine("Search index '") + Name +
- "' refers to non-existing field '" + FieldName +
- "' in table '" + Table.Name + "'");
+ PrintFatalError(
+ KeyRecVal,
+ Twine("In table '") + Table.Name +
+ "', 'PrimaryKey' or 'Key' refers to nonexistent field '" +
+ FieldName + "'");
+
Index->Fields.push_back(*Field);
}
if (EarlyOut && isa<StringRecTy>(Index->Fields[0].RecType)) {
PrintFatalError(
- "Early-out is not supported for string types (in search index '" +
- Twine(Name) + "'");
+ KeyRecVal, Twine("In lookup method '") + Name + "', early-out is not " +
+ "supported for a first key field of type string");
}
return Index;
@@ -586,11 +609,11 @@ void SearchableTableEmitter::collectEnumEntries(
}
if (ValueField.empty()) {
- std::stable_sort(Enum.Entries.begin(), Enum.Entries.end(),
- [](const std::unique_ptr<GenericEnum::Entry> &LHS,
- const std::unique_ptr<GenericEnum::Entry> &RHS) {
- return LHS->first < RHS->first;
- });
+ llvm::stable_sort(Enum.Entries,
+ [](const std::unique_ptr<GenericEnum::Entry> &LHS,
+ const std::unique_ptr<GenericEnum::Entry> &RHS) {
+ return LHS->first < RHS->first;
+ });
for (size_t i = 0; i < Enum.Entries.size(); ++i)
Enum.Entries[i]->second = i;
@@ -600,31 +623,33 @@ void SearchableTableEmitter::collectEnumEntries(
void SearchableTableEmitter::collectTableEntries(
GenericTable &Table, const std::vector<Record *> &Items) {
if (Items.empty())
- PrintWarning(Twine("Table '") + Table.Name + "' has no items");
+ PrintFatalError(Table.Locs,
+ Twine("Table '") + Table.Name + "' has no entries");
for (auto EntryRec : Items) {
for (auto &Field : Table.Fields) {
auto TI = dyn_cast<TypedInit>(EntryRec->getValueInit(Field.Name));
if (!TI || !TI->isComplete()) {
- PrintFatalError(EntryRec->getLoc(),
- Twine("Record '") + EntryRec->getName() +
- "' in table '" + Table.Name +
- "' is missing field '" + Field.Name + "'");
+ PrintFatalError(EntryRec, Twine("Record '") + EntryRec->getName() +
+ "' for table '" + Table.Name +
+ "' is missing field '" + Field.Name +
+ "'");
}
if (!Field.RecType) {
Field.RecType = TI->getType();
} else {
RecTy *Ty = resolveTypes(Field.RecType, TI->getType());
if (!Ty)
- PrintFatalError(Twine("Field '") + Field.Name + "' of table '" +
- Table.Name + "' has incompatible type: " +
- Field.RecType->getAsString() + " vs. " +
- TI->getType()->getAsString());
+ PrintFatalError(EntryRec->getValue(Field.Name),
+ Twine("Field '") + Field.Name + "' of table '" +
+ Table.Name + "' entry has incompatible type: " +
+ TI->getType()->getAsString() + " vs. " +
+ Field.RecType->getAsString());
Field.RecType = Ty;
}
}
- Table.Entries.push_back(EntryRec);
+ Table.Entries.push_back(EntryRec); // Add record to table's record list.
}
Record *IntrinsicClass = Records.getClass("Intrinsic");
@@ -665,8 +690,9 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
StringRef FilterClass = EnumRec->getValueAsString("FilterClass");
Enum->Class = Records.getClass(FilterClass);
if (!Enum->Class)
- PrintFatalError(EnumRec->getLoc(), Twine("Enum FilterClass '") +
- FilterClass + "' does not exist");
+ PrintFatalError(EnumRec->getValue("FilterClass"),
+ Twine("Enum FilterClass '") + FilterClass +
+ "' does not exist");
collectEnumEntries(*Enum, NameField, ValueField,
Records.getAllDerivedDefinitions(FilterClass));
@@ -677,36 +703,44 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
for (auto TableRec : Records.getAllDerivedDefinitions("GenericTable")) {
auto Table = std::make_unique<GenericTable>();
Table->Name = std::string(TableRec->getName());
+ Table->Locs = TableRec->getLoc();
Table->PreprocessorGuard = std::string(TableRec->getName());
Table->CppTypeName = std::string(TableRec->getValueAsString("CppTypeName"));
std::vector<StringRef> Fields = TableRec->getValueAsListOfStrings("Fields");
for (const auto &FieldName : Fields) {
- Table->Fields.emplace_back(FieldName);
-
- if (auto TypeOfVal = TableRec->getValue(("TypeOf_" + FieldName).str())) {
- if (!parseFieldType(Table->Fields.back(), TypeOfVal->getValue())) {
- PrintFatalError(TableRec->getLoc(),
- Twine("Table '") + Table->Name +
- "' has bad 'TypeOf_" + FieldName +
- "': " + TypeOfVal->getValue()->getAsString());
+ Table->Fields.emplace_back(FieldName); // Construct a GenericField.
+
+ if (auto TypeOfRecordVal = TableRec->getValue(("TypeOf_" + FieldName).str())) {
+ if (!parseFieldType(Table->Fields.back(), TypeOfRecordVal->getValue())) {
+ PrintError(TypeOfRecordVal,
+ Twine("Table '") + Table->Name +
+ "' has invalid 'TypeOf_" + FieldName +
+ "': " + TypeOfRecordVal->getValue()->getAsString());
+ PrintFatalNote("The 'TypeOf_xxx' field must be a string naming a "
+ "GenericEnum record, or \"code\"");
}
}
}
- collectTableEntries(*Table, Records.getAllDerivedDefinitions(
- TableRec->getValueAsString("FilterClass")));
+ StringRef FilterClass = TableRec->getValueAsString("FilterClass");
+ if (!Records.getClass(FilterClass))
+ PrintFatalError(TableRec->getValue("FilterClass"),
+ Twine("Table FilterClass '") +
+ FilterClass + "' does not exist");
+
+ collectTableEntries(*Table, Records.getAllDerivedDefinitions(FilterClass));
if (!TableRec->isValueUnset("PrimaryKey")) {
Table->PrimaryKey =
- parseSearchIndex(*Table, TableRec->getValueAsString("PrimaryKeyName"),
+ parseSearchIndex(*Table, TableRec->getValue("PrimaryKey"),
+ TableRec->getValueAsString("PrimaryKeyName"),
TableRec->getValueAsListOfStrings("PrimaryKey"),
TableRec->getValueAsBit("PrimaryKeyEarlyOut"));
- std::stable_sort(Table->Entries.begin(), Table->Entries.end(),
- [&](Record *LHS, Record *RHS) {
- return compareBy(LHS, RHS, *Table->PrimaryKey);
- });
+ llvm::stable_sort(Table->Entries, [&](Record *LHS, Record *RHS) {
+ return compareBy(LHS, RHS, *Table->PrimaryKey);
+ });
}
TableMap.insert(std::make_pair(TableRec, Table.get()));
@@ -717,15 +751,16 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
Record *TableRec = IndexRec->getValueAsDef("Table");
auto It = TableMap.find(TableRec);
if (It == TableMap.end())
- PrintFatalError(IndexRec->getLoc(),
+ PrintFatalError(IndexRec->getValue("Table"),
Twine("SearchIndex '") + IndexRec->getName() +
- "' refers to non-existing table '" +
+ "' refers to nonexistent table '" +
TableRec->getName());
GenericTable &Table = *It->second;
- Table.Indices.push_back(parseSearchIndex(
- Table, IndexRec->getName(), IndexRec->getValueAsListOfStrings("Key"),
- IndexRec->getValueAsBit("EarlyOut")));
+ Table.Indices.push_back(
+ parseSearchIndex(Table, IndexRec->getValue("Key"), IndexRec->getName(),
+ IndexRec->getValueAsListOfStrings("Key"),
+ IndexRec->getValueAsBit("EarlyOut")));
}
// Translate legacy tables.
@@ -756,6 +791,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
auto Table = std::make_unique<GenericTable>();
Table->Name = (Twine(Class->getName()) + "sList").str();
+ Table->Locs = Class->getLoc();
Table->PreprocessorGuard = Class->getName().upper();
Table->CppTypeName = std::string(Class->getName());
@@ -778,7 +814,8 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
Class->getValueAsListOfStrings("SearchableFields")) {
std::string Name =
(Twine("lookup") + Table->CppTypeName + "By" + Field).str();
- Table->Indices.push_back(parseSearchIndex(*Table, Name, {Field}, false));
+ Table->Indices.push_back(parseSearchIndex(*Table, Class->getValue(Field),
+ Name, {Field}, false));
}
Tables.emplace_back(std::move(Table));
diff --git a/contrib/llvm-project/llvm/utils/TableGen/SubtargetEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/SubtargetEmitter.cpp
index 68ee839c43ba..7d2b4b929df3 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -114,7 +114,6 @@ class SubtargetEmitter {
SchedClassTables &SchedTables);
void EmitSchedClassTables(SchedClassTables &SchedTables, raw_ostream &OS);
void EmitProcessorModels(raw_ostream &OS);
- void EmitProcessorLookup(raw_ostream &OS);
void EmitSchedModelHelpers(const std::string &ClassName, raw_ostream &OS);
void emitSchedModelHelpersImpl(raw_ostream &OS,
bool OnlyExpandMCInstPredicates = false);
@@ -267,12 +266,15 @@ SubtargetEmitter::CPUKeyValues(raw_ostream &OS,
for (Record *Processor : ProcessorList) {
StringRef Name = Processor->getValueAsString("Name");
RecVec FeatureList = Processor->getValueAsListOfDefs("Features");
+ RecVec TuneFeatureList = Processor->getValueAsListOfDefs("TuneFeatures");
// Emit as { "cpu", "description", 0, { f1 , f2 , ... fn } },
OS << " { "
<< "\"" << Name << "\", ";
printFeatureMask(OS, FeatureList, FeatureMap);
+ OS << ", ";
+ printFeatureMask(OS, TuneFeatureList, FeatureMap);
// Emit the scheduler model pointer.
const std::string &ProcModelName =
@@ -728,10 +730,8 @@ void SubtargetEmitter::EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
unsigned QueueID = 0;
if (ProcModel.LoadQueue) {
const Record *Queue = ProcModel.LoadQueue->getValueAsDef("QueueDescriptor");
- QueueID =
- 1 + std::distance(ProcModel.ProcResourceDefs.begin(),
- std::find(ProcModel.ProcResourceDefs.begin(),
- ProcModel.ProcResourceDefs.end(), Queue));
+ QueueID = 1 + std::distance(ProcModel.ProcResourceDefs.begin(),
+ find(ProcModel.ProcResourceDefs, Queue));
}
OS << " " << QueueID << ", // Resource Descriptor for the Load Queue\n";
@@ -739,10 +739,8 @@ void SubtargetEmitter::EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
if (ProcModel.StoreQueue) {
const Record *Queue =
ProcModel.StoreQueue->getValueAsDef("QueueDescriptor");
- QueueID =
- 1 + std::distance(ProcModel.ProcResourceDefs.begin(),
- std::find(ProcModel.ProcResourceDefs.begin(),
- ProcModel.ProcResourceDefs.end(), Queue));
+ QueueID = 1 + std::distance(ProcModel.ProcResourceDefs.begin(),
+ find(ProcModel.ProcResourceDefs, Queue));
}
OS << " " << QueueID << ", // Resource Descriptor for the Store Queue\n";
}
@@ -1005,8 +1003,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
bool HasVariants = false;
for (const CodeGenSchedTransition &CGT :
make_range(SC.Transitions.begin(), SC.Transitions.end())) {
- if (CGT.ProcIndices[0] == 0 ||
- is_contained(CGT.ProcIndices, ProcModel.Index)) {
+ if (CGT.ProcIndex == ProcModel.Index) {
HasVariants = true;
break;
}
@@ -1219,11 +1216,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
}
else {
SCDesc.WriteLatencyIdx = SchedTables.WriteLatencies.size();
- SchedTables.WriteLatencies.insert(SchedTables.WriteLatencies.end(),
- WriteLatencies.begin(),
- WriteLatencies.end());
- SchedTables.WriterNames.insert(SchedTables.WriterNames.end(),
- WriterNames.begin(), WriterNames.end());
+ llvm::append_range(SchedTables.WriteLatencies, WriteLatencies);
+ llvm::append_range(SchedTables.WriterNames, WriterNames);
}
// ReadAdvanceEntries must remain in operand order.
SCDesc.NumReadAdvanceEntries = ReadAdvanceEntries.size();
@@ -1235,8 +1229,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
SCDesc.ReadAdvanceIdx = RAPos - SchedTables.ReadAdvanceEntries.begin();
else {
SCDesc.ReadAdvanceIdx = SchedTables.ReadAdvanceEntries.size();
- SchedTables.ReadAdvanceEntries.insert(RAPos, ReadAdvanceEntries.begin(),
- ReadAdvanceEntries.end());
+ llvm::append_range(SchedTables.ReadAdvanceEntries, ReadAdvanceEntries);
}
}
}
@@ -1443,20 +1436,20 @@ static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
OS << Buffer;
}
+static bool isTruePredicate(const Record *Rec) {
+ return Rec->isSubClassOf("MCSchedPredicate") &&
+ Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue");
+}
+
static void emitPredicates(const CodeGenSchedTransition &T,
const CodeGenSchedClass &SC, PredicateExpander &PE,
raw_ostream &OS) {
std::string Buffer;
raw_string_ostream SS(Buffer);
- auto IsTruePredicate = [](const Record *Rec) {
- return Rec->isSubClassOf("MCSchedPredicate") &&
- Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue");
- };
-
// If not all predicates are MCTrue, then we need an if-stmt.
unsigned NumNonTruePreds =
- T.PredTerm.size() - count_if(T.PredTerm, IsTruePredicate);
+ T.PredTerm.size() - count_if(T.PredTerm, isTruePredicate);
SS.indent(PE.getIndentLevel() * 2);
@@ -1468,7 +1461,7 @@ static void emitPredicates(const CodeGenSchedTransition &T,
for (const Record *Rec : T.PredTerm) {
// Skip predicates that evaluate to "true".
- if (IsTruePredicate(Rec))
+ if (isTruePredicate(Rec))
continue;
if (FirstNonTruePredicate) {
@@ -1504,7 +1497,8 @@ static void emitPredicates(const CodeGenSchedTransition &T,
// Used by method `SubtargetEmitter::emitSchedModelHelpersImpl()` to generate
// epilogue code for the auto-generated helper.
-void emitSchedModelHelperEpilogue(raw_ostream &OS, bool ShouldReturnZero) {
+static void emitSchedModelHelperEpilogue(raw_ostream &OS,
+ bool ShouldReturnZero) {
if (ShouldReturnZero) {
OS << " // Don't know how to resolve this scheduling class.\n"
<< " return 0;\n";
@@ -1514,15 +1508,15 @@ void emitSchedModelHelperEpilogue(raw_ostream &OS, bool ShouldReturnZero) {
OS << " report_fatal_error(\"Expected a variant SchedClass\");\n";
}
-bool hasMCSchedPredicates(const CodeGenSchedTransition &T) {
+static bool hasMCSchedPredicates(const CodeGenSchedTransition &T) {
return all_of(T.PredTerm, [](const Record *Rec) {
return Rec->isSubClassOf("MCSchedPredicate");
});
}
-void collectVariantClasses(const CodeGenSchedModels &SchedModels,
- IdxVec &VariantClasses,
- bool OnlyExpandMCInstPredicates) {
+static void collectVariantClasses(const CodeGenSchedModels &SchedModels,
+ IdxVec &VariantClasses,
+ bool OnlyExpandMCInstPredicates) {
for (const CodeGenSchedClass &SC : SchedModels.schedClasses()) {
// Ignore non-variant scheduling classes.
if (SC.Transitions.empty())
@@ -1541,19 +1535,24 @@ void collectVariantClasses(const CodeGenSchedModels &SchedModels,
}
}
-void collectProcessorIndices(const CodeGenSchedClass &SC, IdxVec &ProcIndices) {
+static void collectProcessorIndices(const CodeGenSchedClass &SC,
+ IdxVec &ProcIndices) {
// A variant scheduling class may define transitions for multiple
// processors. This function identifies wich processors are associated with
// transition rules specified by variant class `SC`.
for (const CodeGenSchedTransition &T : SC.Transitions) {
IdxVec PI;
- std::set_union(T.ProcIndices.begin(), T.ProcIndices.end(),
- ProcIndices.begin(), ProcIndices.end(),
- std::back_inserter(PI));
+ std::set_union(&T.ProcIndex, &T.ProcIndex + 1, ProcIndices.begin(),
+ ProcIndices.end(), std::back_inserter(PI));
ProcIndices.swap(PI);
}
}
+static bool isAlwaysTrue(const CodeGenSchedTransition &T) {
+ return llvm::all_of(T.PredTerm,
+ [](const Record *R) { return isTruePredicate(R); });
+}
+
void SubtargetEmitter::emitSchedModelHelpersImpl(
raw_ostream &OS, bool OnlyExpandMCInstPredicates) {
IdxVec VariantClasses;
@@ -1596,8 +1595,9 @@ void SubtargetEmitter::emitSchedModelHelpersImpl(
}
// Now emit transitions associated with processor PI.
+ const CodeGenSchedTransition *FinalT = nullptr;
for (const CodeGenSchedTransition &T : SC.Transitions) {
- if (PI != 0 && !count(T.ProcIndices, PI))
+ if (PI != 0 && T.ProcIndex != PI)
continue;
// Emit only transitions based on MCSchedPredicate, if it's the case.
@@ -1610,9 +1610,17 @@ void SubtargetEmitter::emitSchedModelHelpersImpl(
if (OnlyExpandMCInstPredicates && !hasMCSchedPredicates(T))
continue;
+ // If transition is folded to 'return X' it should be the last one.
+ if (isAlwaysTrue(T)) {
+ FinalT = &T;
+ continue;
+ }
PE.setIndentLevel(3);
emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS);
}
+ if (FinalT)
+ emitPredicates(*FinalT, SchedModels.getSchedClass(FinalT->ToClassIdx),
+ PE, OS);
OS << " }\n";
@@ -1646,9 +1654,9 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
OS << "unsigned " << ClassName
<< "\n::resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI,"
- << " unsigned CPUID) const {\n"
+ << " const MCInstrInfo *MCII, unsigned CPUID) const {\n"
<< " return " << Target << "_MC"
- << "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID);\n"
+ << "::resolveVariantSchedClassImpl(SchedClass, MI, MCII, CPUID);\n"
<< "} // " << ClassName << "::resolveVariantSchedClass\n\n";
STIPredicateExpander PE(Target);
@@ -1692,17 +1700,19 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS,
<< "// subtarget options.\n"
<< "void llvm::";
OS << Target;
- OS << "Subtarget::ParseSubtargetFeatures(StringRef CPU, StringRef FS) {\n"
+ OS << "Subtarget::ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, "
+ << "StringRef FS) {\n"
<< " LLVM_DEBUG(dbgs() << \"\\nFeatures:\" << FS);\n"
- << " LLVM_DEBUG(dbgs() << \"\\nCPU:\" << CPU << \"\\n\\n\");\n";
+ << " LLVM_DEBUG(dbgs() << \"\\nCPU:\" << CPU);\n"
+ << " LLVM_DEBUG(dbgs() << \"\\nTuneCPU:\" << TuneCPU << \"\\n\\n\");\n";
if (Features.empty()) {
OS << "}\n";
return;
}
- OS << " InitMCProcessorInfo(CPU, FS);\n"
- << " const FeatureBitset& Bits = getFeatureBits();\n";
+ OS << " InitMCProcessorInfo(CPU, TuneCPU, FS);\n"
+ << " const FeatureBitset &Bits = getFeatureBits();\n";
for (Record *R : Features) {
// Next record
@@ -1727,26 +1737,28 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS,
void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
OS << "namespace " << Target << "_MC {\n"
<< "unsigned resolveVariantSchedClassImpl(unsigned SchedClass,\n"
- << " const MCInst *MI, unsigned CPUID) {\n";
+ << " const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID) {\n";
emitSchedModelHelpersImpl(OS, /* OnlyExpandMCPredicates */ true);
OS << "}\n";
OS << "} // end namespace " << Target << "_MC\n\n";
OS << "struct " << Target
<< "GenMCSubtargetInfo : public MCSubtargetInfo {\n";
- OS << " " << Target << "GenMCSubtargetInfo(const Triple &TT, \n"
- << " StringRef CPU, StringRef FS, ArrayRef<SubtargetFeatureKV> PF,\n"
+ OS << " " << Target << "GenMCSubtargetInfo(const Triple &TT,\n"
+ << " StringRef CPU, StringRef TuneCPU, StringRef FS,\n"
+ << " ArrayRef<SubtargetFeatureKV> PF,\n"
<< " ArrayRef<SubtargetSubTypeKV> PD,\n"
<< " const MCWriteProcResEntry *WPR,\n"
<< " const MCWriteLatencyEntry *WL,\n"
<< " const MCReadAdvanceEntry *RA, const InstrStage *IS,\n"
<< " const unsigned *OC, const unsigned *FP) :\n"
- << " MCSubtargetInfo(TT, CPU, FS, PF, PD,\n"
+ << " MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD,\n"
<< " WPR, WL, RA, IS, OC, FP) { }\n\n"
<< " unsigned resolveVariantSchedClass(unsigned SchedClass,\n"
- << " const MCInst *MI, unsigned CPUID) const override {\n"
+ << " const MCInst *MI, const MCInstrInfo *MCII,\n"
+ << " unsigned CPUID) const override {\n"
<< " return " << Target << "_MC"
- << "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID); \n";
+ << "::resolveVariantSchedClassImpl(SchedClass, MI, MCII, CPUID);\n";
OS << " }\n";
if (TGT.getHwModes().getNumModeIds() > 1)
OS << " unsigned getHwMode() const override;\n";
@@ -1817,8 +1829,9 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "\nstatic inline MCSubtargetInfo *create" << Target
<< "MCSubtargetInfoImpl("
- << "const Triple &TT, StringRef CPU, StringRef FS) {\n";
- OS << " return new " << Target << "GenMCSubtargetInfo(TT, CPU, FS, ";
+ << "const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS) {\n";
+ OS << " return new " << Target
+ << "GenMCSubtargetInfo(TT, CPU, TuneCPU, FS, ";
if (NumFeatures)
OS << Target << "FeatureKV, ";
else
@@ -1862,17 +1875,18 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "class DFAPacketizer;\n";
OS << "namespace " << Target << "_MC {\n"
<< "unsigned resolveVariantSchedClassImpl(unsigned SchedClass,"
- << " const MCInst *MI, unsigned CPUID);\n"
+ << " const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID);\n"
<< "} // end namespace " << Target << "_MC\n\n";
OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n"
<< " explicit " << ClassName << "(const Triple &TT, StringRef CPU, "
- << "StringRef FS);\n"
+ << "StringRef TuneCPU, StringRef FS);\n"
<< "public:\n"
<< " unsigned resolveSchedClass(unsigned SchedClass, "
<< " const MachineInstr *DefMI,"
<< " const TargetSchedModel *SchedModel) const override;\n"
<< " unsigned resolveVariantSchedClass(unsigned SchedClass,"
- << " const MCInst *MI, unsigned CPUID) const override;\n"
+ << " const MCInst *MI, const MCInstrInfo *MCII,"
+ << " unsigned CPUID) const override;\n"
<< " DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)"
<< " const;\n";
if (TGT.getHwModes().getNumModeIds() > 1)
@@ -1909,8 +1923,8 @@ void SubtargetEmitter::run(raw_ostream &OS) {
}
OS << ClassName << "::" << ClassName << "(const Triple &TT, StringRef CPU, "
- << "StringRef FS)\n"
- << " : TargetSubtargetInfo(TT, CPU, FS, ";
+ << "StringRef TuneCPU, StringRef FS)\n"
+ << " : TargetSubtargetInfo(TT, CPU, TuneCPU, FS, ";
if (NumFeatures)
OS << "makeArrayRef(" << Target << "FeatureKV, " << NumFeatures << "), ";
else
diff --git a/contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.cpp b/contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
index 3821f4757464..105ed82c9d02 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -7,11 +7,10 @@
//===----------------------------------------------------------------------===//
#include "SubtargetFeatureInfo.h"
-
#include "Types.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-
#include <map>
using namespace llvm;
@@ -113,7 +112,7 @@ void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
StringRef TargetName, StringRef ClassName, StringRef FuncName,
SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
OS << "FeatureBitset " << TargetName << ClassName << "::\n"
- << FuncName << "(const FeatureBitset& FB) const {\n";
+ << FuncName << "(const FeatureBitset &FB) const {\n";
OS << " FeatureBitset Features;\n";
for (const auto &SF : SubtargetFeatures) {
const SubtargetFeatureInfo &SFI = SF.second;
diff --git a/contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.h b/contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.h
index d72f8b93461f..8c8a4487934c 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/SubtargetFeatureInfo.h
@@ -9,17 +9,12 @@
#ifndef LLVM_UTIL_TABLEGEN_SUBTARGETFEATUREINFO_H
#define LLVM_UTIL_TABLEGEN_SUBTARGETFEATUREINFO_H
-#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-
#include <map>
#include <string>
#include <vector>
namespace llvm {
-class Record;
-class RecordKeeper;
-
struct SubtargetFeatureInfo;
using SubtargetFeatureInfoMap = std::map<Record *, SubtargetFeatureInfo, LessRecordByID>;
diff --git a/contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp b/contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp
index 8015a58471ca..6d851da34731 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp
@@ -12,9 +12,7 @@
#include "TableGenBackends.h" // Declares all backends.
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Signals.h"
+#include "llvm/Support/InitLLVM.h"
#include "llvm/TableGen/Main.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
@@ -23,6 +21,8 @@ using namespace llvm;
enum ActionType {
PrintRecords,
+ PrintDetailedRecords,
+ NullBackend,
DumpJSON,
GenEmitter,
GenRegisterInfo,
@@ -60,9 +60,6 @@ enum ActionType {
};
namespace llvm {
-/// Storage for TimeRegionsOpt as a global so that backends aren't required to
-/// include CommandLine.h
-bool TimeRegions = false;
cl::opt<bool> EmitLongStrLiterals(
"long-string-literals",
cl::desc("when emitting large string tables, prefer string literals over "
@@ -77,6 +74,10 @@ cl::opt<ActionType> Action(
cl::values(
clEnumValN(PrintRecords, "print-records",
"Print all records to stdout (default)"),
+ clEnumValN(PrintDetailedRecords, "print-detailed-records",
+ "Print full details of all records to stdout"),
+ clEnumValN(NullBackend, "null-backend",
+ "Do nothing after parsing (useful for timing)"),
clEnumValN(DumpJSON, "dump-json",
"Dump all records as machine-readable JSON"),
clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"),
@@ -144,15 +145,15 @@ cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
cl::value_desc("class name"),
cl::cat(PrintEnumsCat));
-cl::opt<bool, true>
- TimeRegionsOpt("time-regions",
- cl::desc("Time regions of tablegens execution"),
- cl::location(TimeRegions));
-
bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
switch (Action) {
case PrintRecords:
- OS << Records; // No argument, dump all contents
+ OS << Records; // No argument, dump all contents
+ break;
+ case PrintDetailedRecords:
+ EmitDetailedRecords(Records, OS);
+ break;
+ case NullBackend: // No backend at all.
break;
case DumpJSON:
EmitJSON(Records, OS);
@@ -278,12 +279,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
}
int main(int argc, char **argv) {
- sys::PrintStackTraceOnErrorSignal(argv[0]);
- PrettyStackTraceProgram X(argc, argv);
+ InitLLVM X(argc, argv);
cl::ParseCommandLineOptions(argc, argv);
- llvm_shutdown_obj Y;
-
return TableGenMain(argv[0], &LLVMTableGenMain);
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index 54aa5a8164f2..7518b262e6e9 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -39,9 +39,15 @@ void emitWebAssemblyDisassemblerTables(
->getValue());
if (Opc == 0xFFFFFFFF)
continue; // No opcode defined.
- assert(Opc <= 0xFFFF);
- auto Prefix = Opc >> 8;
- Opc = Opc & 0xFF;
+ assert(Opc <= 0xFFFFFF);
+ unsigned Prefix;
+ if (Opc <= 0xFFFF) {
+ Prefix = Opc >> 8;
+ Opc = Opc & 0xFF;
+ } else {
+ Prefix = Opc >> 16;
+ Opc = Opc & 0xFFFF;
+ }
auto &CGIP = OpcodeTable[Prefix][Opc];
// All wasm instructions have a StackBased field of type string, we only
// want the instructions for which this is "true".
@@ -133,8 +139,7 @@ void emitWebAssemblyDisassemblerTables(
}
// Store operands if no prior occurrence.
if (OperandStart == OperandTable.size()) {
- OperandTable.insert(OperandTable.end(), CurOperandList.begin(),
- CurOperandList.end());
+ llvm::append_range(OperandTable, CurOperandList);
}
OS << OperandStart;
} else {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 76e4fd9a13ee..331664f875b7 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -763,7 +763,7 @@ void DisassemblerTables::emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2,
}
if (index == 256) {
// If all 256 entries are MODRM_ONEENTRY, omit output.
- assert(MODRM_ONEENTRY == 0);
+ static_assert(MODRM_ONEENTRY == 0, "");
--i2;
o2 << "},\n";
} else {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 8026c324cd40..85d926215113 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -127,6 +127,15 @@ class X86FoldTablesEmitter {
OS << "0 },\n";
}
+
+ bool operator<(const X86FoldTableEntry &RHS) const {
+ bool LHSpseudo = RegInst->TheDef->getValueAsBit("isPseudo");
+ bool RHSpseudo = RHS.RegInst->TheDef->getValueAsBit("isPseudo");
+ if (LHSpseudo != RHSpseudo)
+ return LHSpseudo;
+
+ return RegInst->TheDef->getName() < RHS.RegInst->TheDef->getName();
+ }
};
typedef std::vector<X86FoldTableEntry> FoldTable;
@@ -225,14 +234,8 @@ static inline unsigned int getRegOperandSize(const Record *RegRec) {
}
// Return the size of the memory operand
-static inline unsigned int
-getMemOperandSize(const Record *MemRec, const bool IntrinsicSensitive = false) {
+static inline unsigned getMemOperandSize(const Record *MemRec) {
if (MemRec->isSubClassOf("Operand")) {
- // Intrinsic memory instructions use ssmem/sdmem.
- if (IntrinsicSensitive &&
- (MemRec->getName() == "sdmem" || MemRec->getName() == "ssmem"))
- return 128;
-
StringRef Name =
MemRec->getValueAsDef("ParserMatchClass")->getValueAsString("Name");
if (Name == "Mem8")
@@ -568,8 +571,6 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec))
addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
}
-
- return;
}
void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
@@ -653,6 +654,14 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
&(Target.getInstruction(MemInstIter)), Entry.Strategy);
}
+ // Sort the tables before printing.
+ llvm::sort(Table2Addr);
+ llvm::sort(Table0);
+ llvm::sort(Table1);
+ llvm::sort(Table2);
+ llvm::sort(Table3);
+ llvm::sort(Table4);
+
// Print all tables.
printTable(Table2Addr, "Table2Addr", OS);
printTable(Table0, "Table0", OS);
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp
index 6a245b5eb425..e4b7c05cfb88 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -54,7 +54,7 @@ static uint8_t byteFromBitsInit(BitsInit &init) {
/// @param rec - The record from which to extract the value.
/// @param name - The name of the field in the record.
/// @return - The field, as translated by byteFromBitsInit().
-static uint8_t byteFromRec(const Record* rec, const std::string &name) {
+static uint8_t byteFromRec(const Record* rec, StringRef name) {
BitsInit* bits = rec->getValueAsBitsInit(name);
return byteFromBitsInit(*bits);
}